From ee0203d452e3e2cf22cef2e34219986c200048d6 Mon Sep 17 00:00:00 2001 From: "Rebecca N. Palmer" Date: Sun, 22 Jan 2023 11:54:30 +0000 Subject: [PATCH] Import pandas_1.5.3+dfsg.orig.tar.xz [dgit import orig pandas_1.5.3+dfsg.orig.tar.xz] --- .devcontainer.json | 29 + .pep8speaks.yml | 4 + .pre-commit-config.yaml | 254 + AUTHORS.md | 56 + CITATION.cff | 10 + Dockerfile | 13 + LICENSE | 31 + LICENSES/DATEUTIL_LICENSE | 54 + LICENSES/HAVEN_LICENSE | 21 + LICENSES/HAVEN_MIT | 32 + LICENSES/KLIB_LICENSE | 23 + LICENSES/MUSL_LICENSE | 132 + LICENSES/NUMPY_LICENSE | 30 + LICENSES/OTHER | 75 + LICENSES/PACKAGING_LICENSE | 202 + LICENSES/PSF_LICENSE | 279 + LICENSES/PYUPGRADE_LICENSE | 19 + LICENSES/SAS7BDAT_LICENSE | 19 + LICENSES/SCIPY_LICENSE | 31 + LICENSES/ULTRAJSON_LICENSE | 34 + LICENSES/XARRAY_LICENSE | 195 + MANIFEST.in | 60 + Makefile | 30 + README.md | 172 + RELEASE.md | 6 + asv_bench/asv.conf.json | 131 + asv_bench/benchmarks/__init__.py | 1 + asv_bench/benchmarks/algorithms.py | 172 + asv_bench/benchmarks/algos/__init__.py | 12 + asv_bench/benchmarks/algos/isin.py | 342 + asv_bench/benchmarks/arithmetic.py | 511 + asv_bench/benchmarks/array.py | 72 + asv_bench/benchmarks/attrs_caching.py | 51 + asv_bench/benchmarks/boolean.py | 32 + asv_bench/benchmarks/categoricals.py | 342 + asv_bench/benchmarks/ctors.py | 124 + asv_bench/benchmarks/dtypes.py | 127 + asv_bench/benchmarks/eval.py | 66 + asv_bench/benchmarks/finalize.py | 16 + asv_bench/benchmarks/frame_ctor.py | 225 + asv_bench/benchmarks/frame_methods.py | 770 + asv_bench/benchmarks/gil.py | 341 + asv_bench/benchmarks/groupby.py | 952 ++ asv_bench/benchmarks/hash_functions.py | 91 + .../benchmarks/index_cached_properties.py | 75 + asv_bench/benchmarks/index_object.py | 242 + asv_bench/benchmarks/indexing.py | 503 + asv_bench/benchmarks/indexing_engines.py | 102 + asv_bench/benchmarks/inference.py | 323 + asv_bench/benchmarks/io/__init__.py | 0 asv_bench/benchmarks/io/csv.py | 573 + asv_bench/benchmarks/io/excel.py | 119 + asv_bench/benchmarks/io/hdf.py | 138 + asv_bench/benchmarks/io/json.py | 312 + asv_bench/benchmarks/io/parsers.py | 42 + asv_bench/benchmarks/io/pickle.py | 41 + asv_bench/benchmarks/io/sas.py | 30 + asv_bench/benchmarks/io/sql.py | 177 + asv_bench/benchmarks/io/stata.py | 60 + asv_bench/benchmarks/io/style.py | 93 + asv_bench/benchmarks/join_merge.py | 431 + asv_bench/benchmarks/libs.py | 106 + asv_bench/benchmarks/multiindex_object.py | 258 + asv_bench/benchmarks/package.py | 19 + asv_bench/benchmarks/pandas_vb_common.py | 80 + asv_bench/benchmarks/period.py | 108 + asv_bench/benchmarks/plotting.py | 164 + asv_bench/benchmarks/reindex.py | 169 + asv_bench/benchmarks/replace.py | 77 + asv_bench/benchmarks/reshape.py | 319 + asv_bench/benchmarks/rolling.py | 377 + asv_bench/benchmarks/series_methods.py | 295 + asv_bench/benchmarks/sparse.py | 233 + asv_bench/benchmarks/stat_ops.py | 144 + asv_bench/benchmarks/strftime.py | 64 + asv_bench/benchmarks/strings.py | 304 + asv_bench/benchmarks/timedelta.py | 61 + asv_bench/benchmarks/timeseries.py | 306 + asv_bench/benchmarks/tslibs/__init__.py | 7 + asv_bench/benchmarks/tslibs/fields.py | 74 + asv_bench/benchmarks/tslibs/normalize.py | 45 + asv_bench/benchmarks/tslibs/offsets.py | 90 + asv_bench/benchmarks/tslibs/period.py | 141 + asv_bench/benchmarks/tslibs/resolution.py | 52 + asv_bench/benchmarks/tslibs/timedelta.py | 69 + asv_bench/benchmarks/tslibs/timestamp.py | 156 + asv_bench/benchmarks/tslibs/tslib.py | 68 + asv_bench/benchmarks/tslibs/tz_convert.py | 52 + ci/code_checks.sh | 94 + ci/condarc.yml | 32 + ci/deps/actions-310-numpydev.yaml | 23 + ci/deps/actions-310.yaml | 55 + ci/deps/actions-38-downstream_compat.yaml | 71 + ci/deps/actions-38-minimum_versions.yaml | 57 + ci/deps/actions-38.yaml | 54 + ci/deps/actions-39.yaml | 55 + ci/deps/actions-pypy-38.yaml | 21 + ci/deps/circle-38-arm64.yaml | 55 + ci/run_tests.sh | 49 + codecov.yml | 18 + doc/.gitignore | 4 + doc/_templates/api_redirect.html | 10 + doc/_templates/autosummary/accessor.rst | 6 + .../autosummary/accessor_attribute.rst | 6 + .../autosummary/accessor_callable.rst | 6 + .../autosummary/accessor_method.rst | 6 + doc/_templates/autosummary/class.rst | 33 + .../autosummary/class_without_autosummary.rst | 6 + doc/_templates/pandas_footer.html | 3 + doc/_templates/sidebar-nav-bs.html | 9 + doc/cheatsheet/Pandas_Cheat_Sheet.pdf | Bin 0 -> 396529 bytes doc/cheatsheet/Pandas_Cheat_Sheet.pptx | Bin 0 -> 116861 bytes doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf | Bin 0 -> 420632 bytes doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx | Bin 0 -> 82563 bytes doc/cheatsheet/README.txt | 8 + doc/data/air_quality_long.csv | 5273 +++++++ doc/data/air_quality_no2.csv | 1036 ++ doc/data/air_quality_no2_long.csv | 2069 +++ doc/data/air_quality_parameters.csv | 8 + doc/data/air_quality_pm25_long.csv | 1111 ++ doc/data/air_quality_stations.csv | 67 + doc/data/baseball.csv | 101 + doc/data/iris.data | 151 + doc/data/tips.csv | 245 + doc/data/titanic.csv | 892 ++ doc/make.py | 377 + doc/redirects.csv | 1415 ++ doc/source/_static/ci.png | Bin 0 -> 555245 bytes doc/source/_static/css/getting_started.css | 263 + doc/source/_static/css/pandas.css | 52 + doc/source/_static/df_repr_truncated.png | Bin 0 -> 8040 bytes doc/source/_static/eval-perf-small.png | Bin 0 -> 25314 bytes doc/source/_static/eval-perf.png | Bin 0 -> 18603 bytes doc/source/_static/index_api.svg | 97 + doc/source/_static/index_contribute.svg | 76 + doc/source/_static/index_getting_started.svg | 66 + doc/source/_static/index_user_guide.svg | 67 + doc/source/_static/legacy_0.10.h5 | Bin 0 -> 238321 bytes doc/source/_static/logo_r.svg | 14 + doc/source/_static/logo_sas.svg | 9 + doc/source/_static/logo_sas_white.svg | 9 + doc/source/_static/logo_sql.svg | 73 + doc/source/_static/logo_stata.svg | 17 + doc/source/_static/new-excel-index.png | Bin 0 -> 11506 bytes doc/source/_static/old-excel-index.png | Bin 0 -> 11570 bytes doc/source/_static/print_df_new.png | Bin 0 -> 77202 bytes doc/source/_static/print_df_old.png | Bin 0 -> 89239 bytes doc/source/_static/query-perf-small.png | Bin 0 -> 21731 bytes doc/source/_static/query-perf.png | Bin 0 -> 20351 bytes doc/source/_static/question_mark_noback.svg | 72 + doc/source/_static/reshaping_melt.png | Bin 0 -> 52900 bytes doc/source/_static/reshaping_pivot.png | Bin 0 -> 52132 bytes doc/source/_static/reshaping_stack.png | Bin 0 -> 54479 bytes doc/source/_static/reshaping_unstack.png | Bin 0 -> 53895 bytes doc/source/_static/reshaping_unstack_0.png | Bin 0 -> 58533 bytes doc/source/_static/reshaping_unstack_1.png | Bin 0 -> 57978 bytes .../_static/schemas/01_table_dataframe.svg | 262 + .../_static/schemas/01_table_series.svg | 127 + .../_static/schemas/01_table_spreadsheet.png | Bin 0 -> 46286 bytes .../_static/schemas/02_io_readwrite.svg | 1401 ++ .../_static/schemas/03_subset_columns.svg | 327 + .../schemas/03_subset_columns_rows.svg | 272 + doc/source/_static/schemas/03_subset_rows.svg | 316 + .../_static/schemas/04_plot_overview.svg | 6443 ++++++++ doc/source/_static/schemas/05_newcolumn_1.svg | 347 + doc/source/_static/schemas/05_newcolumn_2.svg | 347 + doc/source/_static/schemas/05_newcolumn_3.svg | 352 + doc/source/_static/schemas/06_aggregate.svg | 211 + doc/source/_static/schemas/06_groupby.svg | 307 + .../_static/schemas/06_groupby_agg_detail.svg | 619 + .../schemas/06_groupby_select_detail.svg | 697 + doc/source/_static/schemas/06_reduction.svg | 222 + doc/source/_static/schemas/06_valuecounts.svg | 269 + doc/source/_static/schemas/07_melt.svg | 315 + doc/source/_static/schemas/07_pivot.svg | 338 + doc/source/_static/schemas/07_pivot_table.svg | 455 + .../_static/schemas/08_concat_column.svg | 465 + doc/source/_static/schemas/08_concat_row.svg | 392 + doc/source/_static/schemas/08_merge_left.svg | 608 + .../_static/spreadsheets/conditional.png | Bin 0 -> 141383 bytes doc/source/_static/spreadsheets/filter.png | Bin 0 -> 244075 bytes doc/source/_static/spreadsheets/find.png | Bin 0 -> 69155 bytes .../_static/spreadsheets/logo_excel.svg | 27 + doc/source/_static/spreadsheets/pivot.png | Bin 0 -> 159941 bytes doc/source/_static/spreadsheets/sort.png | Bin 0 -> 249047 bytes doc/source/_static/spreadsheets/vlookup.png | Bin 0 -> 70189 bytes doc/source/_static/stub | 0 doc/source/_static/style-excel.png | Bin 0 -> 58167 bytes doc/source/_static/style/appmaphead1.png | Bin 0 -> 5023 bytes doc/source/_static/style/appmaphead2.png | Bin 0 -> 7641 bytes doc/source/_static/style/bg_ax0.png | Bin 0 -> 13699 bytes doc/source/_static/style/bg_axNone.png | Bin 0 -> 14203 bytes doc/source/_static/style/bg_axNone_gmap.png | Bin 0 -> 13629 bytes .../_static/style/bg_axNone_lowhigh.png | Bin 0 -> 14041 bytes .../_static/style/bg_axNone_vminvmax.png | Bin 0 -> 13030 bytes doc/source/_static/style/bg_gmap.png | Bin 0 -> 12925 bytes doc/source/_static/style/df_pipe.png | Bin 0 -> 8673 bytes .../_static/style/df_pipe_applydata.png | Bin 0 -> 5981 bytes doc/source/_static/style/df_pipe_applymap.png | Bin 0 -> 5809 bytes doc/source/_static/style/df_pipe_hl.png | Bin 0 -> 5485 bytes doc/source/_static/style/df_pipe_hl2.png | Bin 0 -> 6432 bytes doc/source/_static/style/footer_extended.png | Bin 0 -> 12326 bytes doc/source/_static/style/footer_simple.png | Bin 0 -> 8717 bytes doc/source/_static/style/format_excel_css.png | Bin 0 -> 33222 bytes doc/source/_static/style/hbetw_axNone.png | Bin 0 -> 7309 bytes doc/source/_static/style/hbetw_basic.png | Bin 0 -> 7504 bytes doc/source/_static/style/hbetw_props.png | Bin 0 -> 7776 bytes doc/source/_static/style/hbetw_seq.png | Bin 0 -> 7275 bytes doc/source/_static/style/hq_ax1.png | Bin 0 -> 6092 bytes doc/source/_static/style/hq_axNone.png | Bin 0 -> 6102 bytes doc/source/_static/style/hq_props.png | Bin 0 -> 6241 bytes doc/source/_static/style/latex_1.png | Bin 0 -> 11638 bytes doc/source/_static/style/latex_2.png | Bin 0 -> 14870 bytes doc/source/_static/style/latex_stocks.png | Bin 0 -> 99214 bytes .../_static/style/latex_stocks_html.png | Bin 0 -> 95185 bytes doc/source/_static/style/tg_ax0.png | Bin 0 -> 12972 bytes doc/source/_static/style/tg_axNone.png | Bin 0 -> 13502 bytes doc/source/_static/style/tg_axNone_gmap.png | Bin 0 -> 13772 bytes .../_static/style/tg_axNone_lowhigh.png | Bin 0 -> 13622 bytes .../_static/style/tg_axNone_vminvmax.png | Bin 0 -> 12808 bytes doc/source/_static/style/tg_gmap.png | Bin 0 -> 12591 bytes doc/source/_static/trunc_after.png | Bin 0 -> 29195 bytes doc/source/_static/trunc_before.png | Bin 0 -> 50913 bytes doc/source/_static/whatsnew_assign.png | Bin 0 -> 13462 bytes .../_static/whatsnew_plot_submethods.png | Bin 0 -> 5579 bytes doc/source/conf.py | 795 + doc/source/development/community.rst | 119 + doc/source/development/contributing.rst | 376 + .../development/contributing_codebase.rst | 928 ++ .../development/contributing_docstring.rst | 1001 ++ .../contributing_documentation.rst | 216 + .../development/contributing_environment.rst | 212 + .../development/debugging_extensions.rst | 93 + doc/source/development/developer.rst | 187 + doc/source/development/extending.rst | 490 + doc/source/development/index.rst | 26 + doc/source/development/internals.rst | 103 + doc/source/development/maintaining.rst | 308 + doc/source/development/policies.rst | 57 + doc/source/development/roadmap.rst | 250 + doc/source/ecosystem.rst | 602 + .../comparison/comparison_with_r.rst | 581 + .../comparison/comparison_with_sas.rst | 584 + .../comparison_with_spreadsheets.rst | 465 + .../comparison/comparison_with_sql.rst | 512 + .../comparison/comparison_with_stata.rst | 501 + .../comparison/includes/case.rst | 10 + .../comparison/includes/column_operations.rst | 11 + .../comparison/includes/column_selection.rst | 22 + .../includes/construct_dataframe.rst | 9 + .../comparison/includes/copies.rst | 23 + .../comparison/includes/extract_substring.rst | 7 + .../comparison/includes/filtering.rst | 16 + .../comparison/includes/find_substring.rst | 8 + .../comparison/includes/groupby.rst | 7 + .../comparison/includes/if_then.rst | 12 + .../comparison/includes/introduction.rst | 9 + .../comparison/includes/length.rst | 8 + .../comparison/includes/limit.rst | 7 + .../comparison/includes/merge.rst | 17 + .../comparison/includes/merge_setup.rst | 8 + .../comparison/includes/missing.rst | 31 + .../comparison/includes/missing_intro.rst | 9 + .../comparison/includes/nth_word.rst | 9 + .../comparison/includes/sorting.rst | 6 + .../comparison/includes/time_date.rst | 22 + .../comparison/includes/transform.rst | 8 + .../getting_started/comparison/index.rst | 16 + doc/source/getting_started/index.rst | 647 + doc/source/getting_started/install.rst | 444 + .../intro_tutorials/01_table_oriented.rst | 222 + .../intro_tutorials/02_read_write.rst | 208 + .../intro_tutorials/03_subset_data.rst | 379 + .../intro_tutorials/04_plotting.rst | 250 + .../intro_tutorials/05_add_columns.rst | 173 + .../06_calculate_statistics.rst | 286 + .../07_reshape_table_layout.rst | 382 + .../intro_tutorials/08_combine_dataframes.rst | 321 + .../intro_tutorials/09_timeseries.rst | 390 + .../intro_tutorials/10_text_data.rst | 248 + .../includes/air_quality_no2.rst | 22 + .../intro_tutorials/includes/titanic.rst | 31 + .../getting_started/intro_tutorials/index.rst | 21 + doc/source/getting_started/overview.rst | 176 + doc/source/getting_started/tutorials.rst | 121 + doc/source/index.rst.template | 134 + doc/source/reference/arrays.rst | 639 + doc/source/reference/extensions.rst | 77 + doc/source/reference/frame.rst | 395 + doc/source/reference/general_functions.rst | 88 + doc/source/reference/groupby.rst | 145 + doc/source/reference/index.rst | 63 + doc/source/reference/indexing.rst | 495 + doc/source/reference/io.rst | 211 + doc/source/reference/offset_frequency.rst | 1537 ++ doc/source/reference/options.rst | 21 + doc/source/reference/plotting.rst | 26 + doc/source/reference/resampling.rst | 66 + doc/source/reference/series.rst | 609 + doc/source/reference/style.rst | 82 + doc/source/reference/testing.rst | 77 + doc/source/reference/window.rst | 110 + doc/source/styled.xlsx | Bin 0 -> 5682 bytes doc/source/user_guide/10min.rst | 840 + doc/source/user_guide/advanced.rst | 1250 ++ doc/source/user_guide/basics.rst | 2493 +++ doc/source/user_guide/boolean.rst | 107 + doc/source/user_guide/categorical.rst | 1190 ++ doc/source/user_guide/dsintro.rst | 849 + doc/source/user_guide/duplicates.rst | 206 + doc/source/user_guide/enhancingperf.rst | 866 ++ doc/source/user_guide/gotchas.rst | 411 + doc/source/user_guide/groupby.rst | 1620 ++ doc/source/user_guide/index.rst | 88 + doc/source/user_guide/indexing.rst | 1964 +++ doc/source/user_guide/integer_na.rst | 151 + doc/source/user_guide/io.rst | 6601 ++++++++ doc/source/user_guide/merging.rst | 1513 ++ doc/source/user_guide/missing_data.rst | 947 ++ doc/source/user_guide/options.rst | 407 + doc/source/user_guide/reshaping.rst | 935 ++ doc/source/user_guide/scale.rst | 396 + doc/source/user_guide/sparse.rst | 368 + doc/source/user_guide/style.ipynb | 2049 +++ .../templates/html_style_structure.html | 35 + .../templates/html_table_structure.html | 48 + doc/source/user_guide/templates/myhtml.tpl | 5 + doc/source/user_guide/text.rst | 801 + doc/source/user_guide/timedeltas.rst | 491 + doc/source/user_guide/timeseries.rst | 2663 ++++ doc/source/user_guide/visualization.rst | 1837 +++ doc/source/user_guide/window.rst | 649 + doc/source/whatsnew/index.rst | 291 + doc/source/whatsnew/v0.10.0.rst | 549 + doc/source/whatsnew/v0.10.1.rst | 268 + doc/source/whatsnew/v0.11.0.rst | 464 + doc/source/whatsnew/v0.12.0.rst | 535 + doc/source/whatsnew/v0.13.0.rst | 1335 ++ doc/source/whatsnew/v0.13.1.rst | 482 + doc/source/whatsnew/v0.14.0.rst | 1087 ++ doc/source/whatsnew/v0.14.1.rst | 284 + doc/source/whatsnew/v0.15.0.rst | 1242 ++ doc/source/whatsnew/v0.15.1.rst | 319 + doc/source/whatsnew/v0.15.2.rst | 258 + doc/source/whatsnew/v0.16.0.rst | 690 + doc/source/whatsnew/v0.16.1.rst | 480 + doc/source/whatsnew/v0.16.2.rst | 181 + doc/source/whatsnew/v0.17.0.rst | 1177 ++ doc/source/whatsnew/v0.17.1.rst | 213 + doc/source/whatsnew/v0.18.0.rst | 1304 ++ doc/source/whatsnew/v0.18.1.rst | 711 + doc/source/whatsnew/v0.19.0.rst | 1603 ++ doc/source/whatsnew/v0.19.1.rst | 77 + doc/source/whatsnew/v0.19.2.rst | 98 + doc/source/whatsnew/v0.20.0.rst | 1800 +++ doc/source/whatsnew/v0.20.2.rst | 143 + doc/source/whatsnew/v0.20.3.rst | 76 + doc/source/whatsnew/v0.21.0.rst | 1198 ++ doc/source/whatsnew/v0.21.1.rst | 187 + doc/source/whatsnew/v0.22.0.rst | 261 + doc/source/whatsnew/v0.23.0.rst | 1480 ++ doc/source/whatsnew/v0.23.1.rst | 151 + doc/source/whatsnew/v0.23.2.rst | 123 + doc/source/whatsnew/v0.23.3.rst | 16 + doc/source/whatsnew/v0.23.4.rst | 47 + doc/source/whatsnew/v0.24.0.rst | 1936 +++ doc/source/whatsnew/v0.24.1.rst | 94 + doc/source/whatsnew/v0.24.2.rst | 107 + doc/source/whatsnew/v0.25.0.rst | 1277 ++ doc/source/whatsnew/v0.25.1.rst | 119 + doc/source/whatsnew/v0.25.2.rst | 49 + doc/source/whatsnew/v0.25.3.rst | 22 + doc/source/whatsnew/v0.4.x.rst | 69 + doc/source/whatsnew/v0.5.0.rst | 52 + doc/source/whatsnew/v0.6.0.rst | 66 + doc/source/whatsnew/v0.6.1.rst | 58 + doc/source/whatsnew/v0.7.0.rst | 384 + doc/source/whatsnew/v0.7.1.rst | 41 + doc/source/whatsnew/v0.7.2.rst | 38 + doc/source/whatsnew/v0.7.3.rst | 151 + doc/source/whatsnew/v0.8.0.rst | 284 + doc/source/whatsnew/v0.8.1.rst | 47 + doc/source/whatsnew/v0.9.0.rst | 109 + doc/source/whatsnew/v0.9.1.rst | 171 + doc/source/whatsnew/v1.0.0.rst | 1303 ++ doc/source/whatsnew/v1.0.1.rst | 79 + doc/source/whatsnew/v1.0.2.rst | 125 + doc/source/whatsnew/v1.0.3.rst | 29 + doc/source/whatsnew/v1.0.4.rst | 48 + doc/source/whatsnew/v1.0.5.rst | 39 + doc/source/whatsnew/v1.1.0.rst | 1224 ++ doc/source/whatsnew/v1.1.1.rst | 56 + doc/source/whatsnew/v1.1.2.rst | 64 + doc/source/whatsnew/v1.1.3.rst | 78 + doc/source/whatsnew/v1.1.4.rst | 55 + doc/source/whatsnew/v1.1.5.rst | 56 + doc/source/whatsnew/v1.2.0.rst | 878 ++ doc/source/whatsnew/v1.2.1.rst | 153 + doc/source/whatsnew/v1.2.2.rst | 49 + doc/source/whatsnew/v1.2.3.rst | 32 + doc/source/whatsnew/v1.2.4.rst | 33 + doc/source/whatsnew/v1.2.5.rst | 31 + doc/source/whatsnew/v1.3.0.rst | 1236 ++ doc/source/whatsnew/v1.3.1.rst | 51 + doc/source/whatsnew/v1.3.2.rst | 51 + doc/source/whatsnew/v1.3.3.rst | 57 + doc/source/whatsnew/v1.3.4.rst | 57 + doc/source/whatsnew/v1.3.5.rst | 34 + doc/source/whatsnew/v1.4.0.rst | 1112 ++ doc/source/whatsnew/v1.4.1.rst | 56 + doc/source/whatsnew/v1.4.2.rst | 45 + doc/source/whatsnew/v1.4.3.rst | 72 + doc/source/whatsnew/v1.4.4.rst | 65 + doc/source/whatsnew/v1.5.0.rst | 1294 ++ doc/source/whatsnew/v1.5.1.rst | 122 + doc/source/whatsnew/v1.5.2.rst | 46 + doc/source/whatsnew/v1.5.3.rst | 59 + .../whatsnew/whatsnew_0171_html_table.html | 872 ++ doc/sphinxext/README.rst | 5 + doc/sphinxext/announce.py | 161 + doc/sphinxext/contributors.py | 57 + environment.yml | 132 + pandas/__init__.py | 426 + pandas/_config/__init__.py | 28 + pandas/_config/config.py | 912 ++ pandas/_config/dates.py | 25 + pandas/_config/display.py | 62 + pandas/_config/localization.py | 179 + pandas/_libs/__init__.py | 22 + pandas/_libs/algos.pxd | 22 + pandas/_libs/algos.pyi | 420 + pandas/_libs/algos.pyx | 1522 ++ pandas/_libs/algos_common_helper.pxi.in | 73 + pandas/_libs/algos_take_helper.pxi.in | 222 + pandas/_libs/arrays.pxd | 11 + pandas/_libs/arrays.pyi | 34 + pandas/_libs/arrays.pyx | 183 + pandas/_libs/dtypes.pxd | 36 + pandas/_libs/groupby.pyi | 182 + pandas/_libs/groupby.pyx | 1782 +++ pandas/_libs/hashing.pyi | 9 + pandas/_libs/hashing.pyx | 196 + pandas/_libs/hashtable.pxd | 141 + pandas/_libs/hashtable.pyi | 202 + pandas/_libs/hashtable.pyx | 183 + pandas/_libs/hashtable_class_helper.pxi.in | 1390 ++ pandas/_libs/hashtable_func_helper.pxi.in | 468 + pandas/_libs/index.pyi | 87 + pandas/_libs/index.pyx | 1080 ++ pandas/_libs/index_class_helper.pxi.in | 65 + pandas/_libs/indexing.pyi | 17 + pandas/_libs/indexing.pyx | 28 + pandas/_libs/internals.pyi | 85 + pandas/_libs/internals.pyx | 836 + pandas/_libs/interval.pyi | 174 + pandas/_libs/interval.pyx | 589 + pandas/_libs/intervaltree.pxi.in | 434 + pandas/_libs/join.pyi | 78 + pandas/_libs/join.pyx | 887 ++ pandas/_libs/json.pyi | 23 + pandas/_libs/khash.pxd | 129 + .../_libs/khash_for_primitive_helper.pxi.in | 44 + pandas/_libs/lib.pxd | 6 + pandas/_libs/lib.pyi | 233 + pandas/_libs/lib.pyx | 3147 ++++ pandas/_libs/missing.pxd | 19 + pandas/_libs/missing.pyi | 18 + pandas/_libs/missing.pyx | 507 + pandas/_libs/ops.pyi | 50 + pandas/_libs/ops.pyx | 310 + pandas/_libs/ops_dispatch.pyi | 5 + pandas/_libs/ops_dispatch.pyx | 121 + pandas/_libs/parsers.pyi | 70 + pandas/_libs/parsers.pyx | 2070 +++ pandas/_libs/properties.pyi | 28 + pandas/_libs/properties.pyx | 69 + pandas/_libs/reduction.pyi | 8 + pandas/_libs/reduction.pyx | 33 + pandas/_libs/reshape.pyi | 16 + pandas/_libs/reshape.pyx | 138 + pandas/_libs/sparse.pyi | 47 + pandas/_libs/sparse.pyx | 738 + pandas/_libs/sparse_op_helper.pxi.in | 309 + pandas/_libs/src/headers/portable.h | 16 + pandas/_libs/src/inline_helper.h | 27 + pandas/_libs/src/klib/khash.h | 719 + pandas/_libs/src/klib/khash_python.h | 446 + pandas/_libs/src/parse_helper.h | 100 + pandas/_libs/src/parser/io.c | 107 + pandas/_libs/src/parser/io.h | 34 + pandas/_libs/src/parser/tokenizer.c | 2085 +++ pandas/_libs/src/parser/tokenizer.h | 236 + pandas/_libs/src/skiplist.h | 300 + pandas/_libs/src/ujson/lib/ultrajson.h | 316 + pandas/_libs/src/ujson/lib/ultrajsondec.c | 1202 ++ pandas/_libs/src/ujson/lib/ultrajsonenc.c | 1200 ++ pandas/_libs/src/ujson/python/JSONtoObj.c | 601 + .../_libs/src/ujson/python/date_conversions.c | 163 + .../_libs/src/ujson/python/date_conversions.h | 39 + pandas/_libs/src/ujson/python/objToJSON.c | 2122 +++ pandas/_libs/src/ujson/python/ujson.c | 81 + pandas/_libs/src/ujson/python/version.h | 43 + pandas/_libs/testing.pyi | 12 + pandas/_libs/testing.pyx | 212 + pandas/_libs/tslib.pyi | 29 + pandas/_libs/tslib.pyx | 857 + pandas/_libs/tslibs/__init__.py | 81 + pandas/_libs/tslibs/base.pxd | 5 + pandas/_libs/tslibs/base.pyx | 12 + pandas/_libs/tslibs/ccalendar.pxd | 20 + pandas/_libs/tslibs/ccalendar.pyi | 12 + pandas/_libs/tslibs/ccalendar.pyx | 289 + pandas/_libs/tslibs/conversion.pxd | 39 + pandas/_libs/tslibs/conversion.pyi | 14 + pandas/_libs/tslibs/conversion.pyx | 655 + pandas/_libs/tslibs/dtypes.pxd | 103 + pandas/_libs/tslibs/dtypes.pyi | 79 + pandas/_libs/tslibs/dtypes.pyx | 412 + pandas/_libs/tslibs/fields.pyi | 58 + pandas/_libs/tslibs/fields.pyx | 774 + pandas/_libs/tslibs/nattype.pxd | 18 + pandas/_libs/tslibs/nattype.pyi | 129 + pandas/_libs/tslibs/nattype.pyx | 1245 ++ pandas/_libs/tslibs/np_datetime.pxd | 118 + pandas/_libs/tslibs/np_datetime.pyi | 20 + pandas/_libs/tslibs/np_datetime.pyx | 606 + pandas/_libs/tslibs/offsets.pxd | 12 + pandas/_libs/tslibs/offsets.pyi | 281 + pandas/_libs/tslibs/offsets.pyx | 4456 ++++++ pandas/_libs/tslibs/parsing.pxd | 3 + pandas/_libs/tslibs/parsing.pyi | 60 + pandas/_libs/tslibs/parsing.pyx | 1202 ++ pandas/_libs/tslibs/period.pxd | 7 + pandas/_libs/tslibs/period.pyi | 129 + pandas/_libs/tslibs/period.pyx | 2656 ++++ .../_libs/tslibs/src/datetime/np_datetime.c | 1093 ++ .../_libs/tslibs/src/datetime/np_datetime.h | 102 + .../tslibs/src/datetime/np_datetime_strings.c | 969 ++ .../tslibs/src/datetime/np_datetime_strings.h | 93 + pandas/_libs/tslibs/strptime.pyi | 12 + pandas/_libs/tslibs/strptime.pyx | 538 + pandas/_libs/tslibs/timedeltas.pxd | 27 + pandas/_libs/tslibs/timedeltas.pyi | 156 + pandas/_libs/tslibs/timedeltas.pyx | 2042 +++ pandas/_libs/tslibs/timestamps.pxd | 40 + pandas/_libs/tslibs/timestamps.pyi | 224 + pandas/_libs/tslibs/timestamps.pyx | 2420 +++ pandas/_libs/tslibs/timezones.pxd | 23 + pandas/_libs/tslibs/timezones.pyi | 21 + pandas/_libs/tslibs/timezones.pyx | 450 + pandas/_libs/tslibs/tzconversion.pxd | 39 + pandas/_libs/tslibs/tzconversion.pyi | 21 + pandas/_libs/tslibs/tzconversion.pyx | 725 + pandas/_libs/tslibs/util.pxd | 221 + pandas/_libs/tslibs/vectorized.pyi | 46 + pandas/_libs/tslibs/vectorized.pyx | 393 + pandas/_libs/util.pxd | 17 + pandas/_libs/window/__init__.py | 0 pandas/_libs/window/aggregations.pyi | 127 + pandas/_libs/window/aggregations.pyx | 1940 +++ pandas/_libs/window/indexers.pyi | 12 + pandas/_libs/window/indexers.pyx | 149 + pandas/_libs/writers.pyi | 21 + pandas/_libs/writers.pyx | 175 + pandas/_testing/__init__.py | 1161 ++ pandas/_testing/_hypothesis.py | 89 + pandas/_testing/_io.py | 437 + pandas/_testing/_random.py | 36 + pandas/_testing/_warnings.py | 220 + pandas/_testing/asserters.py | 1495 ++ pandas/_testing/compat.py | 23 + pandas/_testing/contexts.py | 242 + pandas/_typing.py | 340 + pandas/_version.py | 560 + pandas/api/__init__.py | 14 + pandas/api/extensions/__init__.py | 33 + pandas/api/indexers/__init__.py | 17 + pandas/api/interchange/__init__.py | 8 + pandas/api/types/__init__.py | 23 + pandas/arrays/__init__.py | 36 + pandas/compat/__init__.py | 166 + pandas/compat/_optional.py | 173 + pandas/compat/chainmap.py | 37 + pandas/compat/numpy/__init__.py | 34 + pandas/compat/numpy/function.py | 433 + pandas/compat/pickle_compat.py | 306 + pandas/compat/pyarrow.py | 30 + pandas/conftest.py | 1881 +++ pandas/core/__init__.py | 0 pandas/core/_numba/__init__.py | 0 pandas/core/_numba/executor.py | 59 + pandas/core/_numba/kernels/__init__.py | 6 + pandas/core/_numba/kernels/mean_.py | 150 + pandas/core/_numba/kernels/min_max_.py | 70 + pandas/core/_numba/kernels/shared.py | 25 + pandas/core/_numba/kernels/sum_.py | 138 + pandas/core/_numba/kernels/var_.py | 159 + pandas/core/accessor.py | 298 + pandas/core/algorithms.py | 2002 +++ pandas/core/api.py | 148 + pandas/core/apply.py | 1603 ++ pandas/core/array_algos/__init__.py | 9 + pandas/core/array_algos/masked_reductions.py | 149 + pandas/core/array_algos/putmask.py | 146 + pandas/core/array_algos/quantile.py | 224 + pandas/core/array_algos/replace.py | 161 + pandas/core/array_algos/take.py | 584 + pandas/core/array_algos/transforms.py | 40 + pandas/core/arraylike.py | 533 + pandas/core/arrays/__init__.py | 43 + pandas/core/arrays/_mixins.py | 514 + pandas/core/arrays/_ranges.py | 194 + pandas/core/arrays/arrow/__init__.py | 4 + pandas/core/arrays/arrow/_arrow_utils.py | 61 + pandas/core/arrays/arrow/array.py | 1086 ++ pandas/core/arrays/arrow/dtype.py | 204 + pandas/core/arrays/arrow/extension_types.py | 105 + pandas/core/arrays/base.py | 1863 +++ pandas/core/arrays/boolean.py | 380 + pandas/core/arrays/categorical.py | 3008 ++++ pandas/core/arrays/datetimelike.py | 2286 +++ pandas/core/arrays/datetimes.py | 2586 ++++ pandas/core/arrays/floating.py | 157 + pandas/core/arrays/integer.py | 224 + pandas/core/arrays/interval.py | 1748 +++ pandas/core/arrays/masked.py | 1294 ++ pandas/core/arrays/numeric.py | 272 + pandas/core/arrays/numpy_.py | 435 + pandas/core/arrays/period.py | 1182 ++ pandas/core/arrays/sparse/__init__.py | 21 + pandas/core/arrays/sparse/accessor.py | 396 + pandas/core/arrays/sparse/array.py | 1959 +++ pandas/core/arrays/sparse/dtype.py | 416 + pandas/core/arrays/sparse/scipy_sparse.py | 211 + pandas/core/arrays/string_.py | 578 + pandas/core/arrays/string_arrow.py | 479 + pandas/core/arrays/timedeltas.py | 1021 ++ pandas/core/base.py | 1334 ++ pandas/core/common.py | 710 + pandas/core/computation/__init__.py | 0 pandas/core/computation/align.py | 214 + pandas/core/computation/api.py | 2 + pandas/core/computation/check.py | 12 + pandas/core/computation/common.py | 28 + pandas/core/computation/engines.py | 144 + pandas/core/computation/eval.py | 414 + pandas/core/computation/expr.py | 844 + pandas/core/computation/expressions.py | 286 + pandas/core/computation/ops.py | 619 + pandas/core/computation/parsing.py | 195 + pandas/core/computation/pytables.py | 656 + pandas/core/computation/scope.py | 330 + pandas/core/config_init.py | 991 ++ pandas/core/construction.py | 917 ++ pandas/core/describe.py | 429 + pandas/core/dtypes/__init__.py | 0 pandas/core/dtypes/api.py | 87 + pandas/core/dtypes/astype.py | 418 + pandas/core/dtypes/base.py | 528 + pandas/core/dtypes/cast.py | 2105 +++ pandas/core/dtypes/common.py | 1885 +++ pandas/core/dtypes/concat.py | 365 + pandas/core/dtypes/dtypes.py | 1487 ++ pandas/core/dtypes/generic.py | 166 + pandas/core/dtypes/inference.py | 466 + pandas/core/dtypes/missing.py | 783 + pandas/core/flags.py | 115 + pandas/core/frame.py | 12030 ++++++++++++++ pandas/core/generic.py | 12926 ++++++++++++++++ pandas/core/groupby/__init__.py | 15 + pandas/core/groupby/base.py | 166 + pandas/core/groupby/categorical.py | 121 + pandas/core/groupby/generic.py | 1942 +++ pandas/core/groupby/groupby.py | 4437 ++++++ pandas/core/groupby/grouper.py | 997 ++ pandas/core/groupby/indexing.py | 303 + pandas/core/groupby/numba_.py | 181 + pandas/core/groupby/ops.py | 1371 ++ pandas/core/index.py | 37 + pandas/core/indexers/__init__.py | 31 + pandas/core/indexers/objects.py | 398 + pandas/core/indexers/utils.py | 566 + pandas/core/indexes/__init__.py | 0 pandas/core/indexes/accessors.py | 512 + pandas/core/indexes/api.py | 386 + pandas/core/indexes/base.py | 7489 +++++++++ pandas/core/indexes/category.py | 585 + pandas/core/indexes/datetimelike.py | 709 + pandas/core/indexes/datetimes.py | 1251 ++ pandas/core/indexes/extension.py | 192 + pandas/core/indexes/frozen.py | 114 + pandas/core/indexes/interval.py | 1142 ++ pandas/core/indexes/multi.py | 3971 +++++ pandas/core/indexes/numeric.py | 420 + pandas/core/indexes/period.py | 588 + pandas/core/indexes/range.py | 1094 ++ pandas/core/indexes/timedeltas.py | 280 + pandas/core/indexing.py | 2679 ++++ pandas/core/interchange/__init__.py | 0 pandas/core/interchange/buffer.py | 77 + pandas/core/interchange/column.py | 377 + pandas/core/interchange/dataframe.py | 109 + pandas/core/interchange/dataframe_protocol.py | 485 + pandas/core/interchange/from_dataframe.py | 524 + pandas/core/interchange/utils.py | 95 + pandas/core/internals/__init__.py | 59 + pandas/core/internals/api.py | 97 + pandas/core/internals/array_manager.py | 1408 ++ pandas/core/internals/base.py | 226 + pandas/core/internals/blocks.py | 2387 +++ pandas/core/internals/concat.py | 765 + pandas/core/internals/construction.py | 1062 ++ pandas/core/internals/managers.py | 2434 +++ pandas/core/internals/ops.py | 147 + pandas/core/missing.py | 993 ++ pandas/core/nanops.py | 1747 +++ pandas/core/ops/__init__.py | 514 + pandas/core/ops/array_ops.py | 526 + pandas/core/ops/common.py | 142 + pandas/core/ops/dispatch.py | 26 + pandas/core/ops/docstrings.py | 765 + pandas/core/ops/invalid.py | 58 + pandas/core/ops/mask_ops.py | 189 + pandas/core/ops/methods.py | 124 + pandas/core/ops/missing.py | 183 + pandas/core/resample.py | 2265 +++ pandas/core/reshape/__init__.py | 0 pandas/core/reshape/api.py | 41 + pandas/core/reshape/concat.py | 820 + pandas/core/reshape/encoding.py | 520 + pandas/core/reshape/melt.py | 551 + pandas/core/reshape/merge.py | 2501 +++ pandas/core/reshape/pivot.py | 863 ++ pandas/core/reshape/reshape.py | 850 + pandas/core/reshape/tile.py | 647 + pandas/core/reshape/util.py | 82 + pandas/core/roperator.py | 62 + pandas/core/sample.py | 152 + pandas/core/series.py | 6265 ++++++++ pandas/core/shared_docs.py | 893 ++ pandas/core/sorting.py | 734 + pandas/core/sparse/__init__.py | 0 pandas/core/sparse/api.py | 6 + pandas/core/strings/__init__.py | 33 + pandas/core/strings/accessor.py | 3367 ++++ pandas/core/strings/base.py | 248 + pandas/core/strings/object_array.py | 483 + pandas/core/tools/__init__.py | 0 pandas/core/tools/datetimes.py | 1303 ++ pandas/core/tools/numeric.py | 244 + pandas/core/tools/timedeltas.py | 259 + pandas/core/tools/times.py | 149 + pandas/core/util/__init__.py | 0 pandas/core/util/hashing.py | 366 + pandas/core/util/numba_.py | 112 + pandas/core/window/__init__.py | 23 + pandas/core/window/common.py | 207 + pandas/core/window/doc.py | 140 + pandas/core/window/ewm.py | 1083 ++ pandas/core/window/expanding.py | 898 ++ pandas/core/window/numba_.py | 350 + pandas/core/window/online.py | 119 + pandas/core/window/rolling.py | 2899 ++++ pandas/errors/__init__.py | 576 + pandas/io/__init__.py | 12 + pandas/io/api.py | 65 + pandas/io/clipboard/__init__.py | 678 + pandas/io/clipboards.py | 156 + pandas/io/common.py | 1183 ++ pandas/io/date_converters.py | 131 + pandas/io/excel/__init__.py | 24 + pandas/io/excel/_base.py | 1781 +++ pandas/io/excel/_odfreader.py | 251 + pandas/io/excel/_odswriter.py | 347 + pandas/io/excel/_openpyxl.py | 639 + pandas/io/excel/_pyxlsb.py | 113 + pandas/io/excel/_util.py | 333 + pandas/io/excel/_xlrd.py | 127 + pandas/io/excel/_xlsxwriter.py | 289 + pandas/io/excel/_xlwt.py | 228 + pandas/io/feather_format.py | 134 + pandas/io/formats/__init__.py | 8 + pandas/io/formats/_color_data.py | 157 + pandas/io/formats/console.py | 94 + pandas/io/formats/css.py | 418 + pandas/io/formats/csvs.py | 321 + pandas/io/formats/excel.py | 960 ++ pandas/io/formats/format.py | 2183 +++ pandas/io/formats/html.py | 634 + pandas/io/formats/info.py | 1116 ++ pandas/io/formats/latex.py | 832 + pandas/io/formats/printing.py | 515 + pandas/io/formats/string.py | 212 + pandas/io/formats/style.py | 4258 +++++ pandas/io/formats/style_render.py | 2346 +++ pandas/io/formats/templates/html.tpl | 16 + pandas/io/formats/templates/html_style.tpl | 26 + pandas/io/formats/templates/html_table.tpl | 63 + pandas/io/formats/templates/latex.tpl | 5 + .../io/formats/templates/latex_longtable.tpl | 82 + pandas/io/formats/templates/latex_table.tpl | 57 + pandas/io/formats/templates/string.tpl | 12 + pandas/io/formats/xml.py | 558 + pandas/io/gbq.py | 230 + pandas/io/html.py | 1222 ++ pandas/io/json/__init__.py | 21 + pandas/io/json/_json.py | 1414 ++ pandas/io/json/_normalize.py | 540 + pandas/io/json/_table_schema.py | 377 + pandas/io/orc.py | 176 + pandas/io/parquet.py | 509 + pandas/io/parsers/__init__.py | 9 + pandas/io/parsers/arrow_parser_wrapper.py | 155 + pandas/io/parsers/base_parser.py | 1335 ++ pandas/io/parsers/c_parser_wrapper.py | 434 + pandas/io/parsers/python_parser.py | 1344 ++ pandas/io/parsers/readers.py | 2248 +++ pandas/io/pickle.py | 216 + pandas/io/pytables.py | 5336 +++++++ pandas/io/sas/__init__.py | 3 + pandas/io/sas/_sas.pyi | 5 + pandas/io/sas/sas.pyx | 436 + pandas/io/sas/sas7bdat.py | 816 + pandas/io/sas/sas_constants.py | 261 + pandas/io/sas/sas_xport.py | 507 + pandas/io/sas/sasreader.py | 175 + pandas/io/spss.py | 53 + pandas/io/sql.py | 2257 +++ pandas/io/stata.py | 3678 +++++ pandas/io/xml.py | 1104 ++ pandas/plotting/__init__.py | 98 + pandas/plotting/_core.py | 1888 +++ pandas/plotting/_matplotlib/__init__.py | 93 + pandas/plotting/_matplotlib/boxplot.py | 539 + pandas/plotting/_matplotlib/compat.py | 22 + pandas/plotting/_matplotlib/converter.py | 1116 ++ pandas/plotting/_matplotlib/core.py | 1856 +++ pandas/plotting/_matplotlib/groupby.py | 139 + pandas/plotting/_matplotlib/hist.py | 531 + pandas/plotting/_matplotlib/misc.py | 484 + pandas/plotting/_matplotlib/style.py | 284 + pandas/plotting/_matplotlib/timeseries.py | 336 + pandas/plotting/_matplotlib/tools.py | 492 + pandas/plotting/_misc.py | 610 + pandas/testing.py | 18 + pandas/tests/__init__.py | 0 pandas/tests/api/__init__.py | 0 pandas/tests/api/test_api.py | 332 + pandas/tests/api/test_types.py | 63 + pandas/tests/apply/__init__.py | 0 pandas/tests/apply/common.py | 10 + pandas/tests/apply/conftest.py | 18 + pandas/tests/apply/test_frame_apply.py | 1662 ++ .../apply/test_frame_apply_relabeling.py | 97 + pandas/tests/apply/test_frame_transform.py | 258 + pandas/tests/apply/test_invalid_arg.py | 369 + pandas/tests/apply/test_series_apply.py | 927 ++ .../apply/test_series_apply_relabeling.py | 33 + pandas/tests/apply/test_series_transform.py | 49 + pandas/tests/apply/test_str.py | 304 + pandas/tests/arithmetic/__init__.py | 0 pandas/tests/arithmetic/common.py | 155 + pandas/tests/arithmetic/conftest.py | 232 + pandas/tests/arithmetic/test_array_ops.py | 39 + pandas/tests/arithmetic/test_categorical.py | 25 + pandas/tests/arithmetic/test_datetime64.py | 2433 +++ pandas/tests/arithmetic/test_interval.py | 316 + pandas/tests/arithmetic/test_numeric.py | 1447 ++ pandas/tests/arithmetic/test_object.py | 379 + pandas/tests/arithmetic/test_period.py | 1601 ++ pandas/tests/arithmetic/test_timedelta64.py | 2106 +++ pandas/tests/arrays/__init__.py | 0 pandas/tests/arrays/boolean/__init__.py | 0 .../tests/arrays/boolean/test_arithmetic.py | 129 + pandas/tests/arrays/boolean/test_astype.py | 53 + .../tests/arrays/boolean/test_comparison.py | 60 + .../tests/arrays/boolean/test_construction.py | 326 + pandas/tests/arrays/boolean/test_function.py | 126 + pandas/tests/arrays/boolean/test_indexing.py | 13 + pandas/tests/arrays/boolean/test_logical.py | 254 + pandas/tests/arrays/boolean/test_ops.py | 27 + pandas/tests/arrays/boolean/test_reduction.py | 61 + pandas/tests/arrays/boolean/test_repr.py | 13 + pandas/tests/arrays/categorical/__init__.py | 0 pandas/tests/arrays/categorical/conftest.py | 15 + pandas/tests/arrays/categorical/test_algos.py | 83 + .../arrays/categorical/test_analytics.py | 384 + pandas/tests/arrays/categorical/test_api.py | 579 + .../tests/arrays/categorical/test_astype.py | 99 + .../arrays/categorical/test_constructors.py | 772 + .../tests/arrays/categorical/test_dtypes.py | 136 + .../tests/arrays/categorical/test_indexing.py | 393 + .../tests/arrays/categorical/test_missing.py | 213 + .../arrays/categorical/test_operators.py | 406 + .../tests/arrays/categorical/test_replace.py | 72 + pandas/tests/arrays/categorical/test_repr.py | 533 + .../tests/arrays/categorical/test_sorting.py | 129 + .../tests/arrays/categorical/test_subclass.py | 22 + pandas/tests/arrays/categorical/test_take.py | 95 + .../tests/arrays/categorical/test_warnings.py | 22 + pandas/tests/arrays/datetimes/__init__.py | 0 .../arrays/datetimes/test_constructors.py | 162 + .../tests/arrays/datetimes/test_reductions.py | 176 + pandas/tests/arrays/floating/__init__.py | 0 pandas/tests/arrays/floating/conftest.py | 48 + .../tests/arrays/floating/test_arithmetic.py | 232 + pandas/tests/arrays/floating/test_astype.py | 118 + .../tests/arrays/floating/test_comparison.py | 65 + pandas/tests/arrays/floating/test_concat.py | 21 + .../arrays/floating/test_construction.py | 203 + pandas/tests/arrays/floating/test_function.py | 192 + pandas/tests/arrays/floating/test_repr.py | 48 + pandas/tests/arrays/floating/test_to_numpy.py | 132 + pandas/tests/arrays/integer/__init__.py | 0 pandas/tests/arrays/integer/conftest.py | 68 + .../tests/arrays/integer/test_arithmetic.py | 367 + .../tests/arrays/integer/test_comparison.py | 38 + pandas/tests/arrays/integer/test_concat.py | 69 + .../tests/arrays/integer/test_construction.py | 236 + pandas/tests/arrays/integer/test_dtypes.py | 297 + pandas/tests/arrays/integer/test_function.py | 201 + pandas/tests/arrays/integer/test_indexing.py | 19 + pandas/tests/arrays/integer/test_repr.py | 68 + pandas/tests/arrays/interval/__init__.py | 0 pandas/tests/arrays/interval/test_astype.py | 28 + pandas/tests/arrays/interval/test_interval.py | 415 + pandas/tests/arrays/interval/test_ops.py | 93 + pandas/tests/arrays/masked/__init__.py | 0 pandas/tests/arrays/masked/test_arithmetic.py | 248 + .../tests/arrays/masked/test_arrow_compat.py | 195 + pandas/tests/arrays/masked/test_function.py | 51 + pandas/tests/arrays/masked/test_indexing.py | 60 + pandas/tests/arrays/masked_shared.py | 155 + pandas/tests/arrays/numpy_/__init__.py | 0 pandas/tests/arrays/numpy_/test_indexing.py | 41 + pandas/tests/arrays/numpy_/test_numpy.py | 324 + pandas/tests/arrays/period/__init__.py | 0 .../tests/arrays/period/test_arrow_compat.py | 121 + pandas/tests/arrays/period/test_astype.py | 80 + .../tests/arrays/period/test_constructors.py | 123 + pandas/tests/arrays/period/test_reductions.py | 42 + pandas/tests/arrays/sparse/__init__.py | 0 pandas/tests/arrays/sparse/test_accessor.py | 236 + .../tests/arrays/sparse/test_arithmetics.py | 516 + pandas/tests/arrays/sparse/test_array.py | 485 + pandas/tests/arrays/sparse/test_astype.py | 128 + .../arrays/sparse/test_combine_concat.py | 62 + .../tests/arrays/sparse/test_constructors.py | 308 + pandas/tests/arrays/sparse/test_dtype.py | 209 + pandas/tests/arrays/sparse/test_indexing.py | 290 + pandas/tests/arrays/sparse/test_libsparse.py | 553 + pandas/tests/arrays/sparse/test_reductions.py | 308 + pandas/tests/arrays/sparse/test_unary.py | 72 + pandas/tests/arrays/string_/__init__.py | 0 pandas/tests/arrays/string_/test_string.py | 613 + .../tests/arrays/string_/test_string_arrow.py | 199 + pandas/tests/arrays/test_array.py | 393 + pandas/tests/arrays/test_datetimelike.py | 1459 ++ pandas/tests/arrays/test_datetimes.py | 641 + pandas/tests/arrays/test_ndarray_backed.py | 75 + pandas/tests/arrays/test_period.py | 182 + pandas/tests/arrays/test_timedeltas.py | 293 + pandas/tests/arrays/timedeltas/__init__.py | 0 .../arrays/timedeltas/test_constructors.py | 63 + .../arrays/timedeltas/test_reductions.py | 215 + pandas/tests/base/__init__.py | 0 pandas/tests/base/common.py | 9 + pandas/tests/base/test_constructors.py | 173 + pandas/tests/base/test_conversion.py | 552 + pandas/tests/base/test_fillna.py | 60 + pandas/tests/base/test_misc.py | 198 + pandas/tests/base/test_transpose.py | 56 + pandas/tests/base/test_unique.py | 158 + pandas/tests/base/test_value_counts.py | 317 + pandas/tests/computation/__init__.py | 0 pandas/tests/computation/test_compat.py | 32 + pandas/tests/computation/test_eval.py | 1941 +++ pandas/tests/config/__init__.py | 0 pandas/tests/config/test_config.py | 477 + pandas/tests/config/test_localization.py | 142 + pandas/tests/construction/__init__.py | 0 .../tests/construction/test_extract_array.py | 18 + pandas/tests/copy_view/__init__.py | 0 pandas/tests/copy_view/test_indexing.py | 802 + pandas/tests/copy_view/test_internals.py | 95 + pandas/tests/copy_view/test_methods.py | 231 + pandas/tests/copy_view/test_setitem.py | 91 + pandas/tests/copy_view/util.py | 11 + pandas/tests/dtypes/__init__.py | 0 pandas/tests/dtypes/cast/__init__.py | 0 .../dtypes/cast/test_can_hold_element.py | 79 + .../dtypes/cast/test_construct_from_scalar.py | 55 + .../dtypes/cast/test_construct_ndarray.py | 30 + .../dtypes/cast/test_construct_object_arr.py | 20 + pandas/tests/dtypes/cast/test_dict_compat.py | 14 + pandas/tests/dtypes/cast/test_downcast.py | 97 + .../dtypes/cast/test_find_common_type.py | 173 + .../dtypes/cast/test_infer_datetimelike.py | 28 + pandas/tests/dtypes/cast/test_infer_dtype.py | 208 + .../dtypes/cast/test_maybe_box_native.py | 40 + pandas/tests/dtypes/cast/test_promote.py | 582 + pandas/tests/dtypes/test_common.py | 803 + pandas/tests/dtypes/test_concat.py | 48 + pandas/tests/dtypes/test_dtypes.py | 1130 ++ pandas/tests/dtypes/test_generic.py | 138 + pandas/tests/dtypes/test_inference.py | 2018 +++ pandas/tests/dtypes/test_missing.py | 856 + pandas/tests/extension/__init__.py | 0 .../extension/array_with_attr/__init__.py | 6 + .../tests/extension/array_with_attr/array.py | 84 + .../array_with_attr/test_array_with_attr.py | 33 + pandas/tests/extension/arrow/__init__.py | 0 pandas/tests/extension/arrow/arrays.py | 197 + pandas/tests/extension/arrow/test_bool.py | 104 + pandas/tests/extension/arrow/test_string.py | 12 + .../tests/extension/arrow/test_timestamp.py | 57 + pandas/tests/extension/base/__init__.py | 71 + pandas/tests/extension/base/base.py | 21 + pandas/tests/extension/base/casting.py | 86 + pandas/tests/extension/base/constructors.py | 142 + pandas/tests/extension/base/dim2.py | 304 + pandas/tests/extension/base/dtype.py | 137 + pandas/tests/extension/base/getitem.py | 488 + pandas/tests/extension/base/groupby.py | 116 + pandas/tests/extension/base/index.py | 20 + pandas/tests/extension/base/interface.py | 127 + pandas/tests/extension/base/io.py | 19 + pandas/tests/extension/base/methods.py | 610 + pandas/tests/extension/base/missing.py | 160 + pandas/tests/extension/base/ops.py | 217 + pandas/tests/extension/base/printing.py | 42 + pandas/tests/extension/base/reduce.py | 69 + pandas/tests/extension/base/reshaping.py | 378 + pandas/tests/extension/base/setitem.py | 443 + pandas/tests/extension/conftest.py | 195 + pandas/tests/extension/date/__init__.py | 6 + pandas/tests/extension/date/array.py | 182 + pandas/tests/extension/decimal/__init__.py | 8 + pandas/tests/extension/decimal/array.py | 288 + .../tests/extension/decimal/test_decimal.py | 481 + pandas/tests/extension/json/__init__.py | 7 + pandas/tests/extension/json/array.py | 244 + pandas/tests/extension/json/test_json.py | 390 + pandas/tests/extension/list/__init__.py | 7 + pandas/tests/extension/list/array.py | 132 + pandas/tests/extension/list/test_list.py | 33 + pandas/tests/extension/test_arrow.py | 1709 ++ pandas/tests/extension/test_boolean.py | 405 + pandas/tests/extension/test_categorical.py | 310 + pandas/tests/extension/test_common.py | 81 + pandas/tests/extension/test_datetime.py | 194 + pandas/tests/extension/test_extension.py | 40 + pandas/tests/extension/test_external_block.py | 40 + pandas/tests/extension/test_floating.py | 215 + pandas/tests/extension/test_integer.py | 236 + pandas/tests/extension/test_interval.py | 188 + pandas/tests/extension/test_numpy.py | 454 + pandas/tests/extension/test_period.py | 191 + pandas/tests/extension/test_sparse.py | 528 + pandas/tests/extension/test_string.py | 393 + pandas/tests/frame/__init__.py | 0 pandas/tests/frame/common.py | 58 + pandas/tests/frame/conftest.py | 284 + pandas/tests/frame/constructors/__init__.py | 0 .../frame/constructors/test_from_dict.py | 199 + .../frame/constructors/test_from_records.py | 466 + pandas/tests/frame/indexing/__init__.py | 0 pandas/tests/frame/indexing/test_coercion.py | 188 + pandas/tests/frame/indexing/test_delitem.py | 60 + pandas/tests/frame/indexing/test_get.py | 27 + pandas/tests/frame/indexing/test_get_value.py | 22 + pandas/tests/frame/indexing/test_getitem.py | 477 + pandas/tests/frame/indexing/test_indexing.py | 1760 +++ pandas/tests/frame/indexing/test_insert.py | 106 + pandas/tests/frame/indexing/test_lookup.py | 94 + pandas/tests/frame/indexing/test_mask.py | 162 + pandas/tests/frame/indexing/test_set_value.py | 68 + pandas/tests/frame/indexing/test_setitem.py | 1255 ++ pandas/tests/frame/indexing/test_take.py | 88 + pandas/tests/frame/indexing/test_where.py | 1058 ++ pandas/tests/frame/indexing/test_xs.py | 419 + pandas/tests/frame/methods/__init__.py | 7 + .../frame/methods/test_add_prefix_suffix.py | 20 + pandas/tests/frame/methods/test_align.py | 403 + pandas/tests/frame/methods/test_append.py | 292 + pandas/tests/frame/methods/test_asfreq.py | 198 + pandas/tests/frame/methods/test_asof.py | 195 + pandas/tests/frame/methods/test_assign.py | 84 + pandas/tests/frame/methods/test_astype.py | 783 + pandas/tests/frame/methods/test_at_time.py | 124 + .../tests/frame/methods/test_between_time.py | 289 + pandas/tests/frame/methods/test_clip.py | 178 + pandas/tests/frame/methods/test_combine.py | 47 + .../tests/frame/methods/test_combine_first.py | 528 + pandas/tests/frame/methods/test_compare.py | 254 + pandas/tests/frame/methods/test_convert.py | 59 + .../frame/methods/test_convert_dtypes.py | 43 + pandas/tests/frame/methods/test_copy.py | 63 + pandas/tests/frame/methods/test_count.py | 39 + .../test_count_with_level_deprecated.py | 123 + pandas/tests/frame/methods/test_cov_corr.py | 434 + pandas/tests/frame/methods/test_describe.py | 411 + pandas/tests/frame/methods/test_diff.py | 305 + pandas/tests/frame/methods/test_dot.py | 131 + pandas/tests/frame/methods/test_drop.py | 551 + .../frame/methods/test_drop_duplicates.py | 488 + pandas/tests/frame/methods/test_droplevel.py | 36 + pandas/tests/frame/methods/test_dropna.py | 288 + pandas/tests/frame/methods/test_dtypes.py | 155 + pandas/tests/frame/methods/test_duplicated.py | 113 + pandas/tests/frame/methods/test_equals.py | 82 + pandas/tests/frame/methods/test_explode.py | 277 + pandas/tests/frame/methods/test_fillna.py | 808 + pandas/tests/frame/methods/test_filter.py | 139 + .../frame/methods/test_first_and_last.py | 88 + .../frame/methods/test_first_valid_index.py | 94 + .../frame/methods/test_get_numeric_data.py | 103 + pandas/tests/frame/methods/test_head_tail.py | 57 + .../tests/frame/methods/test_infer_objects.py | 42 + .../tests/frame/methods/test_interpolate.py | 411 + .../methods/test_is_homogeneous_dtype.py | 57 + pandas/tests/frame/methods/test_isin.py | 219 + pandas/tests/frame/methods/test_join.py | 564 + pandas/tests/frame/methods/test_matmul.py | 86 + pandas/tests/frame/methods/test_nlargest.py | 239 + pandas/tests/frame/methods/test_pct_change.py | 120 + pandas/tests/frame/methods/test_pipe.py | 39 + pandas/tests/frame/methods/test_pop.py | 71 + pandas/tests/frame/methods/test_quantile.py | 1020 ++ pandas/tests/frame/methods/test_rank.py | 497 + pandas/tests/frame/methods/test_reindex.py | 1225 ++ .../tests/frame/methods/test_reindex_like.py | 39 + pandas/tests/frame/methods/test_rename.py | 430 + .../tests/frame/methods/test_rename_axis.py | 111 + .../frame/methods/test_reorder_levels.py | 75 + pandas/tests/frame/methods/test_replace.py | 1587 ++ .../tests/frame/methods/test_reset_index.py | 795 + pandas/tests/frame/methods/test_round.py | 218 + pandas/tests/frame/methods/test_sample.py | 365 + .../tests/frame/methods/test_select_dtypes.py | 467 + pandas/tests/frame/methods/test_set_axis.py | 193 + pandas/tests/frame/methods/test_set_index.py | 718 + pandas/tests/frame/methods/test_shift.py | 691 + pandas/tests/frame/methods/test_sort_index.py | 927 ++ .../tests/frame/methods/test_sort_values.py | 893 ++ pandas/tests/frame/methods/test_swapaxes.py | 22 + pandas/tests/frame/methods/test_swaplevel.py | 36 + pandas/tests/frame/methods/test_to_csv.py | 1328 ++ pandas/tests/frame/methods/test_to_dict.py | 423 + .../frame/methods/test_to_dict_of_blocks.py | 77 + pandas/tests/frame/methods/test_to_numpy.py | 38 + pandas/tests/frame/methods/test_to_period.py | 85 + pandas/tests/frame/methods/test_to_records.py | 510 + .../tests/frame/methods/test_to_timestamp.py | 153 + pandas/tests/frame/methods/test_transpose.py | 118 + pandas/tests/frame/methods/test_truncate.py | 156 + pandas/tests/frame/methods/test_tz_convert.py | 132 + .../tests/frame/methods/test_tz_localize.py | 67 + pandas/tests/frame/methods/test_update.py | 169 + .../tests/frame/methods/test_value_counts.py | 146 + pandas/tests/frame/methods/test_values.py | 273 + pandas/tests/frame/test_alter_axes.py | 30 + pandas/tests/frame/test_api.py | 389 + pandas/tests/frame/test_arithmetic.py | 2085 +++ pandas/tests/frame/test_block_internals.py | 440 + pandas/tests/frame/test_constructors.py | 3181 ++++ pandas/tests/frame/test_cumulative.py | 81 + pandas/tests/frame/test_iteration.py | 162 + pandas/tests/frame/test_logical_ops.py | 191 + pandas/tests/frame/test_nonunique_indexes.py | 346 + pandas/tests/frame/test_npfuncs.py | 28 + pandas/tests/frame/test_query_eval.py | 1270 ++ pandas/tests/frame/test_reductions.py | 1887 +++ pandas/tests/frame/test_repr_info.py | 365 + pandas/tests/frame/test_stack_unstack.py | 2185 +++ pandas/tests/frame/test_subclass.py | 747 + pandas/tests/frame/test_ufunc.py | 303 + pandas/tests/frame/test_unary.py | 184 + pandas/tests/frame/test_validate.py | 41 + pandas/tests/generic/__init__.py | 0 pandas/tests/generic/test_duplicate_labels.py | 454 + pandas/tests/generic/test_finalize.py | 773 + pandas/tests/generic/test_frame.py | 200 + pandas/tests/generic/test_generic.py | 487 + .../generic/test_label_or_level_utils.py | 349 + pandas/tests/generic/test_series.py | 144 + pandas/tests/generic/test_to_xarray.py | 128 + pandas/tests/groupby/__init__.py | 27 + pandas/tests/groupby/aggregate/__init__.py | 0 .../tests/groupby/aggregate/test_aggregate.py | 1468 ++ pandas/tests/groupby/aggregate/test_cython.py | 401 + pandas/tests/groupby/aggregate/test_numba.py | 241 + pandas/tests/groupby/aggregate/test_other.py | 674 + pandas/tests/groupby/conftest.py | 204 + pandas/tests/groupby/test_allowlist.py | 454 + pandas/tests/groupby/test_any_all.py | 190 + pandas/tests/groupby/test_apply.py | 1372 ++ pandas/tests/groupby/test_apply_mutate.py | 146 + pandas/tests/groupby/test_bin_groupby.py | 69 + pandas/tests/groupby/test_categorical.py | 1847 +++ pandas/tests/groupby/test_counting.py | 377 + pandas/tests/groupby/test_filters.py | 614 + .../tests/groupby/test_frame_value_counts.py | 783 + pandas/tests/groupby/test_function.py | 1603 ++ pandas/tests/groupby/test_groupby.py | 2923 ++++ pandas/tests/groupby/test_groupby_dropna.py | 500 + .../tests/groupby/test_groupby_shift_diff.py | 156 + pandas/tests/groupby/test_groupby_subclass.py | 113 + pandas/tests/groupby/test_grouping.py | 1026 ++ pandas/tests/groupby/test_index_as_string.py | 85 + pandas/tests/groupby/test_indexing.py | 332 + pandas/tests/groupby/test_libgroupby.py | 284 + pandas/tests/groupby/test_min_max.py | 244 + pandas/tests/groupby/test_missing.py | 155 + pandas/tests/groupby/test_nth.py | 843 + pandas/tests/groupby/test_numba.py | 73 + pandas/tests/groupby/test_nunique.py | 184 + pandas/tests/groupby/test_pipe.py | 84 + pandas/tests/groupby/test_quantile.py | 380 + pandas/tests/groupby/test_rank.py | 681 + pandas/tests/groupby/test_sample.py | 144 + pandas/tests/groupby/test_size.py | 90 + pandas/tests/groupby/test_timegrouper.py | 928 ++ pandas/tests/groupby/test_value_counts.py | 193 + pandas/tests/groupby/transform/__init__.py | 0 pandas/tests/groupby/transform/test_numba.py | 229 + .../tests/groupby/transform/test_transform.py | 1580 ++ pandas/tests/indexes/__init__.py | 0 pandas/tests/indexes/base_class/__init__.py | 0 .../indexes/base_class/test_constructors.py | 50 + .../tests/indexes/base_class/test_formats.py | 148 + .../tests/indexes/base_class/test_indexing.py | 86 + .../tests/indexes/base_class/test_pickle.py | 11 + .../tests/indexes/base_class/test_reshape.py | 86 + .../tests/indexes/base_class/test_setops.py | 261 + pandas/tests/indexes/base_class/test_where.py | 13 + pandas/tests/indexes/categorical/__init__.py | 0 .../tests/indexes/categorical/test_append.py | 62 + .../tests/indexes/categorical/test_astype.py | 87 + .../indexes/categorical/test_category.py | 408 + .../indexes/categorical/test_constructors.py | 159 + .../tests/indexes/categorical/test_equals.py | 90 + .../tests/indexes/categorical/test_fillna.py | 54 + .../tests/indexes/categorical/test_formats.py | 114 + .../indexes/categorical/test_indexing.py | 426 + pandas/tests/indexes/categorical/test_map.py | 115 + .../tests/indexes/categorical/test_reindex.py | 86 + pandas/tests/indexes/common.py | 902 ++ pandas/tests/indexes/conftest.py | 41 + pandas/tests/indexes/datetimelike.py | 139 + .../tests/indexes/datetimelike_/__init__.py | 0 .../datetimelike_/test_drop_duplicates.py | 80 + .../indexes/datetimelike_/test_equals.py | 182 + .../indexes/datetimelike_/test_indexing.py | 46 + .../datetimelike_/test_is_monotonic.py | 46 + .../tests/indexes/datetimelike_/test_nat.py | 53 + .../indexes/datetimelike_/test_sort_values.py | 316 + .../datetimelike_/test_value_counts.py | 103 + pandas/tests/indexes/datetimes/__init__.py | 0 .../indexes/datetimes/methods/__init__.py | 0 .../indexes/datetimes/methods/test_astype.py | 332 + .../datetimes/methods/test_factorize.py | 107 + .../indexes/datetimes/methods/test_fillna.py | 62 + .../indexes/datetimes/methods/test_insert.py | 267 + .../datetimes/methods/test_isocalendar.py | 20 + .../indexes/datetimes/methods/test_repeat.py | 78 + .../indexes/datetimes/methods/test_shift.py | 163 + .../indexes/datetimes/methods/test_snap.py | 67 + .../datetimes/methods/test_to_frame.py | 31 + .../datetimes/methods/test_to_period.py | 191 + .../datetimes/methods/test_to_series.py | 40 + pandas/tests/indexes/datetimes/test_asof.py | 31 + .../indexes/datetimes/test_constructors.py | 1184 ++ .../indexes/datetimes/test_date_range.py | 1161 ++ .../tests/indexes/datetimes/test_datetime.py | 168 + .../indexes/datetimes/test_datetimelike.py | 39 + pandas/tests/indexes/datetimes/test_delete.py | 138 + .../tests/indexes/datetimes/test_formats.py | 273 + .../tests/indexes/datetimes/test_freq_attr.py | 61 + .../tests/indexes/datetimes/test_indexing.py | 808 + pandas/tests/indexes/datetimes/test_join.py | 152 + pandas/tests/indexes/datetimes/test_map.py | 47 + pandas/tests/indexes/datetimes/test_misc.py | 308 + .../tests/indexes/datetimes/test_npfuncs.py | 13 + pandas/tests/indexes/datetimes/test_ops.py | 85 + .../indexes/datetimes/test_partial_slicing.py | 459 + pandas/tests/indexes/datetimes/test_pickle.py | 45 + .../tests/indexes/datetimes/test_reindex.py | 56 + .../indexes/datetimes/test_scalar_compat.py | 363 + pandas/tests/indexes/datetimes/test_setops.py | 607 + .../tests/indexes/datetimes/test_timezones.py | 1220 ++ pandas/tests/indexes/datetimes/test_unique.py | 77 + pandas/tests/indexes/interval/__init__.py | 0 pandas/tests/indexes/interval/test_astype.py | 245 + pandas/tests/indexes/interval/test_base.py | 71 + .../indexes/interval/test_constructors.py | 473 + pandas/tests/indexes/interval/test_equals.py | 36 + pandas/tests/indexes/interval/test_formats.py | 105 + .../tests/indexes/interval/test_indexing.py | 616 + .../tests/indexes/interval/test_interval.py | 918 ++ .../indexes/interval/test_interval_range.py | 355 + .../indexes/interval/test_interval_tree.py | 209 + pandas/tests/indexes/interval/test_join.py | 44 + pandas/tests/indexes/interval/test_pickle.py | 13 + pandas/tests/indexes/interval/test_setops.py | 202 + pandas/tests/indexes/multi/__init__.py | 0 pandas/tests/indexes/multi/conftest.py | 77 + pandas/tests/indexes/multi/test_analytics.py | 260 + pandas/tests/indexes/multi/test_astype.py | 30 + pandas/tests/indexes/multi/test_compat.py | 98 + .../tests/indexes/multi/test_constructors.py | 839 + pandas/tests/indexes/multi/test_conversion.py | 164 + pandas/tests/indexes/multi/test_copy.py | 118 + pandas/tests/indexes/multi/test_drop.py | 193 + pandas/tests/indexes/multi/test_duplicates.py | 339 + .../tests/indexes/multi/test_equivalence.py | 298 + pandas/tests/indexes/multi/test_formats.py | 229 + .../indexes/multi/test_get_level_values.py | 125 + pandas/tests/indexes/multi/test_get_set.py | 498 + pandas/tests/indexes/multi/test_indexing.py | 900 ++ pandas/tests/indexes/multi/test_integrity.py | 280 + pandas/tests/indexes/multi/test_isin.py | 78 + pandas/tests/indexes/multi/test_join.py | 208 + pandas/tests/indexes/multi/test_lexsort.py | 63 + pandas/tests/indexes/multi/test_missing.py | 112 + pandas/tests/indexes/multi/test_monotonic.py | 188 + pandas/tests/indexes/multi/test_names.py | 205 + .../indexes/multi/test_partial_indexing.py | 148 + pandas/tests/indexes/multi/test_pickle.py | 10 + pandas/tests/indexes/multi/test_reindex.py | 161 + pandas/tests/indexes/multi/test_reshape.py | 185 + pandas/tests/indexes/multi/test_setops.py | 576 + pandas/tests/indexes/multi/test_sorting.py | 282 + pandas/tests/indexes/multi/test_take.py | 79 + pandas/tests/indexes/numeric/__init__.py | 0 pandas/tests/indexes/numeric/test_astype.py | 99 + pandas/tests/indexes/numeric/test_indexing.py | 595 + pandas/tests/indexes/numeric/test_join.py | 392 + pandas/tests/indexes/numeric/test_numeric.py | 703 + pandas/tests/indexes/numeric/test_setops.py | 166 + pandas/tests/indexes/object/__init__.py | 0 pandas/tests/indexes/object/test_astype.py | 24 + pandas/tests/indexes/object/test_indexing.py | 203 + pandas/tests/indexes/period/__init__.py | 0 .../tests/indexes/period/methods/__init__.py | 0 .../indexes/period/methods/test_asfreq.py | 130 + .../indexes/period/methods/test_astype.py | 178 + .../indexes/period/methods/test_factorize.py | 52 + .../indexes/period/methods/test_fillna.py | 41 + .../indexes/period/methods/test_insert.py | 18 + .../indexes/period/methods/test_is_full.py | 23 + .../indexes/period/methods/test_repeat.py | 26 + .../indexes/period/methods/test_shift.py | 122 + .../period/methods/test_to_timestamp.py | 132 + .../tests/indexes/period/test_constructors.py | 546 + pandas/tests/indexes/period/test_formats.py | 199 + pandas/tests/indexes/period/test_freq_attr.py | 28 + pandas/tests/indexes/period/test_indexing.py | 930 ++ pandas/tests/indexes/period/test_join.py | 58 + pandas/tests/indexes/period/test_monotonic.py | 42 + .../indexes/period/test_partial_slicing.py | 205 + pandas/tests/indexes/period/test_period.py | 353 + .../tests/indexes/period/test_period_range.py | 121 + pandas/tests/indexes/period/test_pickle.py | 26 + .../tests/indexes/period/test_resolution.py | 23 + .../indexes/period/test_scalar_compat.py | 32 + .../tests/indexes/period/test_searchsorted.py | 80 + pandas/tests/indexes/period/test_setops.py | 360 + pandas/tests/indexes/period/test_tools.py | 48 + pandas/tests/indexes/ranges/__init__.py | 0 .../tests/indexes/ranges/test_constructors.py | 167 + pandas/tests/indexes/ranges/test_indexing.py | 93 + pandas/tests/indexes/ranges/test_join.py | 178 + pandas/tests/indexes/ranges/test_range.py | 626 + pandas/tests/indexes/ranges/test_setops.py | 492 + pandas/tests/indexes/test_any_index.py | 195 + pandas/tests/indexes/test_base.py | 1620 ++ pandas/tests/indexes/test_common.py | 514 + pandas/tests/indexes/test_engines.py | 193 + pandas/tests/indexes/test_frozen.py | 113 + pandas/tests/indexes/test_index_new.py | 374 + pandas/tests/indexes/test_indexing.py | 373 + pandas/tests/indexes/test_numpy_compat.py | 193 + pandas/tests/indexes/test_setops.py | 880 ++ pandas/tests/indexes/test_subclass.py | 38 + pandas/tests/indexes/timedeltas/__init__.py | 0 .../indexes/timedeltas/methods/__init__.py | 0 .../indexes/timedeltas/methods/test_astype.py | 131 + .../timedeltas/methods/test_factorize.py | 40 + .../indexes/timedeltas/methods/test_fillna.py | 22 + .../indexes/timedeltas/methods/test_insert.py | 146 + .../indexes/timedeltas/methods/test_repeat.py | 34 + .../indexes/timedeltas/methods/test_shift.py | 77 + .../indexes/timedeltas/test_constructors.py | 279 + .../tests/indexes/timedeltas/test_delete.py | 71 + .../tests/indexes/timedeltas/test_formats.py | 93 + .../indexes/timedeltas/test_freq_attr.py | 61 + .../tests/indexes/timedeltas/test_indexing.py | 360 + pandas/tests/indexes/timedeltas/test_join.py | 53 + pandas/tests/indexes/timedeltas/test_ops.py | 14 + .../tests/indexes/timedeltas/test_pickle.py | 11 + .../indexes/timedeltas/test_scalar_compat.py | 142 + .../indexes/timedeltas/test_searchsorted.py | 28 + .../tests/indexes/timedeltas/test_setops.py | 260 + .../indexes/timedeltas/test_timedelta.py | 145 + .../timedeltas/test_timedelta_range.py | 92 + pandas/tests/indexing/__init__.py | 0 pandas/tests/indexing/common.py | 190 + pandas/tests/indexing/interval/__init__.py | 0 .../tests/indexing/interval/test_interval.py | 175 + .../indexing/interval/test_interval_new.py | 233 + pandas/tests/indexing/multiindex/__init__.py | 0 .../multiindex/test_chaining_and_caching.py | 80 + .../indexing/multiindex/test_datetime.py | 50 + .../tests/indexing/multiindex/test_getitem.py | 394 + pandas/tests/indexing/multiindex/test_iloc.py | 171 + .../indexing/multiindex/test_indexing_slow.py | 97 + pandas/tests/indexing/multiindex/test_loc.py | 962 ++ .../indexing/multiindex/test_multiindex.py | 229 + .../tests/indexing/multiindex/test_partial.py | 258 + .../tests/indexing/multiindex/test_setitem.py | 526 + .../tests/indexing/multiindex/test_slice.py | 805 + .../tests/indexing/multiindex/test_sorted.py | 127 + pandas/tests/indexing/test_at.py | 236 + pandas/tests/indexing/test_categorical.py | 559 + .../indexing/test_chaining_and_caching.py | 595 + pandas/tests/indexing/test_check_indexer.py | 105 + pandas/tests/indexing/test_coercion.py | 945 ++ pandas/tests/indexing/test_datetime.py | 170 + pandas/tests/indexing/test_floats.py | 694 + pandas/tests/indexing/test_iat.py | 49 + pandas/tests/indexing/test_iloc.py | 1458 ++ pandas/tests/indexing/test_indexers.py | 61 + pandas/tests/indexing/test_indexing.py | 1114 ++ pandas/tests/indexing/test_loc.py | 3221 ++++ pandas/tests/indexing/test_na_indexing.py | 75 + pandas/tests/indexing/test_partial.py | 672 + pandas/tests/indexing/test_scalar.py | 292 + pandas/tests/interchange/__init__.py | 0 pandas/tests/interchange/conftest.py | 12 + pandas/tests/interchange/test_impl.py | 206 + .../interchange/test_spec_conformance.py | 164 + pandas/tests/interchange/test_utils.py | 40 + pandas/tests/internals/__init__.py | 0 pandas/tests/internals/test_api.py | 55 + pandas/tests/internals/test_internals.py | 1445 ++ pandas/tests/internals/test_managers.py | 72 + pandas/tests/io/__init__.py | 27 + pandas/tests/io/conftest.py | 214 + pandas/tests/io/data/csv/banklist.csv | 507 + pandas/tests/io/data/csv/iris.csv | 151 + pandas/tests/io/data/csv/test1.csv | 8 + pandas/tests/io/data/csv/test1.csv.bz2 | Bin 0 -> 307 bytes pandas/tests/io/data/csv/test1.csv.gz | Bin 0 -> 294 bytes pandas/tests/io/data/csv/test_mmap.csv | 5 + pandas/tests/io/data/csv/tips.csv | 245 + pandas/tests/io/data/csv/tips.csv.bz2 | Bin 0 -> 1316 bytes pandas/tests/io/data/csv/tips.csv.gz | Bin 0 -> 1740 bytes pandas/tests/io/data/excel/blank.ods | Bin 0 -> 2813 bytes pandas/tests/io/data/excel/blank.xls | Bin 0 -> 23040 bytes pandas/tests/io/data/excel/blank.xlsb | Bin 0 -> 8908 bytes pandas/tests/io/data/excel/blank.xlsm | Bin 0 -> 8418 bytes pandas/tests/io/data/excel/blank.xlsx | Bin 0 -> 8379 bytes .../tests/io/data/excel/blank_with_header.ods | Bin 0 -> 2893 bytes .../tests/io/data/excel/blank_with_header.xls | Bin 0 -> 23040 bytes .../io/data/excel/blank_with_header.xlsb | Bin 0 -> 9129 bytes .../io/data/excel/blank_with_header.xlsm | Bin 0 -> 8813 bytes .../io/data/excel/blank_with_header.xlsx | Bin 0 -> 8773 bytes pandas/tests/io/data/excel/chartsheet.xls | Bin 0 -> 43008 bytes pandas/tests/io/data/excel/chartsheet.xlsb | Bin 0 -> 18427 bytes pandas/tests/io/data/excel/chartsheet.xlsm | Bin 0 -> 20093 bytes pandas/tests/io/data/excel/chartsheet.xlsx | Bin 0 -> 20069 bytes pandas/tests/io/data/excel/df_empty.xlsx | Bin 0 -> 5595 bytes pandas/tests/io/data/excel/df_equals.xlsx | Bin 0 -> 5595 bytes pandas/tests/io/data/excel/df_header_oob.xlsx | Bin 0 -> 5605 bytes .../data/excel/df_mangle_dup_col_dtypes.ods | Bin 0 -> 7784 bytes .../data/excel/df_mangle_dup_col_dtypes.xls | Bin 0 -> 6144 bytes .../data/excel/df_mangle_dup_col_dtypes.xlsb | Bin 0 -> 9132 bytes .../data/excel/df_mangle_dup_col_dtypes.xlsm | Bin 0 -> 7236 bytes .../data/excel/df_mangle_dup_col_dtypes.xlsx | Bin 0 -> 5507 bytes .../tests/io/data/excel/dimension_large.xlsx | Bin 0 -> 4895 bytes .../io/data/excel/dimension_missing.xlsx | Bin 0 -> 4875 bytes .../tests/io/data/excel/dimension_small.xlsx | Bin 0 -> 4894 bytes .../io/data/excel/empty_trailing_rows.xlsx | Bin 0 -> 4900 bytes .../io/data/excel/empty_with_blank_row.xlsx | Bin 0 -> 4301 bytes pandas/tests/io/data/excel/gh-35802.ods | Bin 0 -> 12692 bytes pandas/tests/io/data/excel/gh-36122.ods | Bin 0 -> 8974 bytes .../excel/ints_spelled_with_decimals.xlsx | Bin 0 -> 4734 bytes .../io/data/excel/invalid_value_type.ods | Bin 0 -> 8502 bytes .../data/excel/multiindex_no_index_names.xlsx | Bin 0 -> 5723 bytes .../io/data/excel/one_col_blank_line.ods | Bin 0 -> 2882 bytes .../io/data/excel/one_col_blank_line.xls | Bin 0 -> 25600 bytes .../io/data/excel/one_col_blank_line.xlsb | Bin 0 -> 7992 bytes .../io/data/excel/one_col_blank_line.xlsm | Bin 0 -> 8650 bytes .../io/data/excel/one_col_blank_line.xlsx | Bin 0 -> 8637 bytes pandas/tests/io/data/excel/test1.ods | Bin 0 -> 4440 bytes pandas/tests/io/data/excel/test1.xls | Bin 0 -> 28672 bytes pandas/tests/io/data/excel/test1.xlsb | Bin 0 -> 11359 bytes pandas/tests/io/data/excel/test1.xlsm | Bin 0 -> 12091 bytes pandas/tests/io/data/excel/test1.xlsx | Bin 0 -> 12074 bytes pandas/tests/io/data/excel/test2.ods | Bin 0 -> 2877 bytes pandas/tests/io/data/excel/test2.xls | Bin 0 -> 5632 bytes pandas/tests/io/data/excel/test2.xlsb | Bin 0 -> 7579 bytes pandas/tests/io/data/excel/test2.xlsm | Bin 0 -> 8086 bytes pandas/tests/io/data/excel/test2.xlsx | Bin 0 -> 8067 bytes pandas/tests/io/data/excel/test3.ods | Bin 0 -> 2889 bytes pandas/tests/io/data/excel/test3.xls | Bin 0 -> 23040 bytes pandas/tests/io/data/excel/test3.xlsb | Bin 0 -> 7553 bytes pandas/tests/io/data/excel/test3.xlsm | Bin 0 -> 8063 bytes pandas/tests/io/data/excel/test3.xlsx | Bin 0 -> 8045 bytes pandas/tests/io/data/excel/test4.ods | Bin 0 -> 2992 bytes pandas/tests/io/data/excel/test4.xls | Bin 0 -> 25600 bytes pandas/tests/io/data/excel/test4.xlsb | Bin 0 -> 7646 bytes pandas/tests/io/data/excel/test4.xlsm | Bin 0 -> 8360 bytes pandas/tests/io/data/excel/test4.xlsx | Bin 0 -> 8344 bytes pandas/tests/io/data/excel/test5.ods | Bin 0 -> 2906 bytes pandas/tests/io/data/excel/test5.xls | Bin 0 -> 20480 bytes pandas/tests/io/data/excel/test5.xlsb | Bin 0 -> 7824 bytes pandas/tests/io/data/excel/test5.xlsm | Bin 0 -> 8642 bytes pandas/tests/io/data/excel/test5.xlsx | Bin 0 -> 8626 bytes .../tests/io/data/excel/test_converters.ods | Bin 0 -> 3287 bytes .../tests/io/data/excel/test_converters.xls | Bin 0 -> 6144 bytes .../tests/io/data/excel/test_converters.xlsb | Bin 0 -> 7810 bytes .../tests/io/data/excel/test_converters.xlsm | Bin 0 -> 8467 bytes .../tests/io/data/excel/test_converters.xlsx | Bin 0 -> 4810 bytes .../tests/io/data/excel/test_datetime_mi.ods | Bin 0 -> 3585 bytes .../tests/io/data/excel/test_datetime_mi.xls | Bin 0 -> 24576 bytes .../tests/io/data/excel/test_datetime_mi.xlsb | Bin 0 -> 7947 bytes .../tests/io/data/excel/test_datetime_mi.xlsm | Bin 0 -> 8700 bytes .../tests/io/data/excel/test_datetime_mi.xlsx | Bin 0 -> 8687 bytes pandas/tests/io/data/excel/test_decimal.ods | Bin 0 -> 4406 bytes pandas/tests/io/data/excel/test_decimal.xls | Bin 0 -> 34304 bytes pandas/tests/io/data/excel/test_decimal.xlsb | Bin 0 -> 16038 bytes pandas/tests/io/data/excel/test_decimal.xlsm | Bin 0 -> 17971 bytes pandas/tests/io/data/excel/test_decimal.xlsx | Bin 0 -> 17955 bytes .../io/data/excel/test_index_name_pre17.ods | Bin 0 -> 3699 bytes .../io/data/excel/test_index_name_pre17.xls | Bin 0 -> 26624 bytes .../io/data/excel/test_index_name_pre17.xlsb | Bin 0 -> 11097 bytes .../io/data/excel/test_index_name_pre17.xlsm | Bin 0 -> 10896 bytes .../io/data/excel/test_index_name_pre17.xlsx | Bin 0 -> 10879 bytes .../tests/io/data/excel/test_multisheet.ods | Bin 0 -> 3797 bytes .../tests/io/data/excel/test_multisheet.xls | Bin 0 -> 24576 bytes .../tests/io/data/excel/test_multisheet.xlsb | Bin 0 -> 10707 bytes .../tests/io/data/excel/test_multisheet.xlsm | Bin 0 -> 11313 bytes .../tests/io/data/excel/test_multisheet.xlsx | Bin 0 -> 11296 bytes pandas/tests/io/data/excel/test_newlines.ods | Bin 0 -> 2261 bytes pandas/tests/io/data/excel/test_spaces.ods | Bin 0 -> 9263 bytes pandas/tests/io/data/excel/test_spaces.xls | Bin 0 -> 5632 bytes pandas/tests/io/data/excel/test_spaces.xlsb | Bin 0 -> 8036 bytes pandas/tests/io/data/excel/test_spaces.xlsm | Bin 0 -> 4848 bytes pandas/tests/io/data/excel/test_spaces.xlsx | Bin 0 -> 8622 bytes pandas/tests/io/data/excel/test_squeeze.ods | Bin 0 -> 3218 bytes pandas/tests/io/data/excel/test_squeeze.xls | Bin 0 -> 26112 bytes pandas/tests/io/data/excel/test_squeeze.xlsb | Bin 0 -> 8567 bytes pandas/tests/io/data/excel/test_squeeze.xlsm | Bin 0 -> 9122 bytes pandas/tests/io/data/excel/test_squeeze.xlsx | Bin 0 -> 9106 bytes pandas/tests/io/data/excel/test_types.ods | Bin 0 -> 3489 bytes pandas/tests/io/data/excel/test_types.xls | Bin 0 -> 26112 bytes pandas/tests/io/data/excel/test_types.xlsb | Bin 0 -> 8053 bytes pandas/tests/io/data/excel/test_types.xlsm | Bin 0 -> 9042 bytes pandas/tests/io/data/excel/test_types.xlsx | Bin 0 -> 9010 bytes .../tests/io/data/excel/testdateoverflow.ods | Bin 0 -> 3422 bytes .../tests/io/data/excel/testdateoverflow.xls | Bin 0 -> 19456 bytes .../tests/io/data/excel/testdateoverflow.xlsb | Bin 0 -> 9856 bytes .../tests/io/data/excel/testdateoverflow.xlsm | Bin 0 -> 9374 bytes .../tests/io/data/excel/testdateoverflow.xlsx | Bin 0 -> 9351 bytes pandas/tests/io/data/excel/testdtype.ods | Bin 0 -> 3196 bytes pandas/tests/io/data/excel/testdtype.xls | Bin 0 -> 22528 bytes pandas/tests/io/data/excel/testdtype.xlsb | Bin 0 -> 7697 bytes pandas/tests/io/data/excel/testdtype.xlsm | Bin 0 -> 8517 bytes pandas/tests/io/data/excel/testdtype.xlsx | Bin 0 -> 8501 bytes pandas/tests/io/data/excel/testmultiindex.ods | Bin 0 -> 6504 bytes pandas/tests/io/data/excel/testmultiindex.xls | Bin 0 -> 40448 bytes .../tests/io/data/excel/testmultiindex.xlsb | Bin 0 -> 23620 bytes .../tests/io/data/excel/testmultiindex.xlsm | Bin 0 -> 22757 bytes .../tests/io/data/excel/testmultiindex.xlsx | Bin 0 -> 22743 bytes pandas/tests/io/data/excel/testskiprows.ods | Bin 0 -> 3235 bytes pandas/tests/io/data/excel/testskiprows.xls | Bin 0 -> 22528 bytes pandas/tests/io/data/excel/testskiprows.xlsb | Bin 0 -> 7699 bytes pandas/tests/io/data/excel/testskiprows.xlsm | Bin 0 -> 8281 bytes pandas/tests/io/data/excel/testskiprows.xlsx | Bin 0 -> 8258 bytes pandas/tests/io/data/excel/times_1900.ods | Bin 0 -> 3181 bytes pandas/tests/io/data/excel/times_1900.xls | Bin 0 -> 16384 bytes pandas/tests/io/data/excel/times_1900.xlsb | Bin 0 -> 7773 bytes pandas/tests/io/data/excel/times_1900.xlsm | Bin 0 -> 8282 bytes pandas/tests/io/data/excel/times_1900.xlsx | Bin 0 -> 8266 bytes pandas/tests/io/data/excel/times_1904.ods | Bin 0 -> 3215 bytes pandas/tests/io/data/excel/times_1904.xls | Bin 0 -> 16384 bytes pandas/tests/io/data/excel/times_1904.xlsb | Bin 0 -> 7734 bytes pandas/tests/io/data/excel/times_1904.xlsm | Bin 0 -> 8260 bytes pandas/tests/io/data/excel/times_1904.xlsx | Bin 0 -> 8244 bytes .../tests/io/data/excel/trailing_blanks.ods | Bin 0 -> 3060 bytes .../tests/io/data/excel/trailing_blanks.xls | Bin 0 -> 25600 bytes .../tests/io/data/excel/trailing_blanks.xlsb | Bin 0 -> 8086 bytes .../tests/io/data/excel/trailing_blanks.xlsm | Bin 0 -> 8824 bytes .../tests/io/data/excel/trailing_blanks.xlsx | Bin 0 -> 8806 bytes pandas/tests/io/data/excel/writertable.odt | Bin 0 -> 10313 bytes .../io/data/feather/feather-0_3_1.feather | Bin 0 -> 672 bytes .../data/fixed_width/fixed_width_format.txt | 3 + pandas/tests/io/data/gbq_fake_job.txt | 1 + pandas/tests/io/data/html/banklist.html | 4886 ++++++ pandas/tests/io/data/html/spam.html | 797 + pandas/tests/io/data/html/valid_markup.html | 62 + .../tests/io/data/html/wikipedia_states.html | 1756 +++ .../io/data/html_encoding/chinese_utf-16.html | Bin 0 -> 824 bytes .../io/data/html_encoding/chinese_utf-32.html | Bin 0 -> 1648 bytes .../io/data/html_encoding/chinese_utf-8.html | 26 + .../io/data/html_encoding/letz_latin1.html | 26 + .../io/data/legacy_hdf/datetimetz_object.h5 | Bin 0 -> 106271 bytes pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes .../data/legacy_hdf/incompatible_dataset.h5 | Bin 0 -> 4480 bytes .../legacy_table_fixed_datetime_py2.h5 | Bin 0 -> 7104 bytes .../data/legacy_hdf/legacy_table_fixed_py2.h5 | Bin 0 -> 1064200 bytes .../io/data/legacy_hdf/legacy_table_py2.h5 | Bin 0 -> 72279 bytes ...periodindex_0.20.1_x86_64_darwin_2.7.13.h5 | Bin 0 -> 7312 bytes .../io/data/legacy_hdf/pytables_native.h5 | Bin 0 -> 74246 bytes .../io/data/legacy_hdf/pytables_native2.h5 | Bin 0 -> 12336 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack | Bin 0 -> 118654 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle | Bin 0 -> 127923 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle | Bin 0 -> 127244 bytes .../1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle | Bin 0 -> 127216 bytes .../1.2.4/empty_frame_v1_2_4-GH#42345.pkl | Bin 0 -> 501 bytes .../1.4.2/1.4.2_x86_64_linux_3.9.7.pickle | Bin 0 -> 126123 bytes .../tests/io/data/orc/TestOrcFile.decimal.orc | Bin 0 -> 16337 bytes .../io/data/orc/TestOrcFile.emptyFile.orc | Bin 0 -> 523 bytes .../tests/io/data/orc/TestOrcFile.test1.orc | Bin 0 -> 1711 bytes .../io/data/orc/TestOrcFile.testDate1900.orc | Bin 0 -> 30941 bytes .../io/data/orc/TestOrcFile.testDate2038.orc | Bin 0 -> 95787 bytes .../io/data/orc/TestOrcFile.testSnappy.orc | Bin 0 -> 126370 bytes pandas/tests/io/data/parquet/simple.parquet | Bin 0 -> 2157 bytes .../io/data/pickle/categorical.0.25.0.pickle | Bin 0 -> 578 bytes .../data/pickle/sparseframe-0.20.3.pickle.gz | Bin 0 -> 625 bytes .../data/pickle/sparseseries-0.20.3.pickle.gz | Bin 0 -> 521 bytes pandas/tests/io/data/pickle/test_mi_py27.pkl | Bin 0 -> 1395 bytes pandas/tests/io/data/pickle/test_py27.pkl | Bin 0 -> 943 bytes pandas/tests/io/data/spss/labelled-num-na.sav | Bin 0 -> 535 bytes pandas/tests/io/data/spss/labelled-num.sav | Bin 0 -> 507 bytes pandas/tests/io/data/spss/labelled-str.sav | Bin 0 -> 525 bytes pandas/tests/io/data/spss/umlauts.sav | Bin 0 -> 567 bytes pandas/tests/io/data/stata/S4_EDUC1.dta | Bin 0 -> 2997 bytes .../tests/io/data/stata/stata-compat-105.dta | Bin 0 -> 771 bytes .../tests/io/data/stata/stata-compat-108.dta | Bin 0 -> 1128 bytes .../tests/io/data/stata/stata-compat-111.dta | Bin 0 -> 1514 bytes .../tests/io/data/stata/stata-compat-113.dta | Bin 0 -> 1514 bytes .../tests/io/data/stata/stata-compat-114.dta | Bin 0 -> 1810 bytes .../tests/io/data/stata/stata-compat-118.dta | Bin 0 -> 5798 bytes .../stata/stata-dta-partially-labeled.dta | Bin 0 -> 1390 bytes pandas/tests/io/data/stata/stata10_115.dta | Bin 0 -> 2298 bytes pandas/tests/io/data/stata/stata10_117.dta | Bin 0 -> 2298 bytes pandas/tests/io/data/stata/stata11_115.dta | Bin 0 -> 810 bytes pandas/tests/io/data/stata/stata11_117.dta | Bin 0 -> 1268 bytes pandas/tests/io/data/stata/stata12_117.dta | Bin 0 -> 1285 bytes pandas/tests/io/data/stata/stata13_dates.dta | Bin 0 -> 3386 bytes pandas/tests/io/data/stata/stata14_118.dta | Bin 0 -> 5556 bytes pandas/tests/io/data/stata/stata15.dta | Bin 0 -> 3183 bytes pandas/tests/io/data/stata/stata16_118.dta | Bin 0 -> 4614 bytes pandas/tests/io/data/stata/stata1_114.dta | Bin 0 -> 1130 bytes pandas/tests/io/data/stata/stata1_117.dta | Bin 0 -> 1569 bytes pandas/tests/io/data/stata/stata1_119.dta.gz | Bin 0 -> 269559 bytes .../tests/io/data/stata/stata1_encoding.dta | Bin 0 -> 3507 bytes .../io/data/stata/stata1_encoding_118.dta | Bin 0 -> 5587 bytes pandas/tests/io/data/stata/stata2_113.dta | Bin 0 -> 1490 bytes pandas/tests/io/data/stata/stata2_114.dta | Bin 0 -> 1786 bytes pandas/tests/io/data/stata/stata2_115.dta | Bin 0 -> 1786 bytes pandas/tests/io/data/stata/stata2_117.dta | Bin 0 -> 2228 bytes pandas/tests/io/data/stata/stata3.csv | 204 + pandas/tests/io/data/stata/stata3_113.dta | Bin 0 -> 12737 bytes pandas/tests/io/data/stata/stata3_114.dta | Bin 0 -> 13255 bytes pandas/tests/io/data/stata/stata3_115.dta | Bin 0 -> 13255 bytes pandas/tests/io/data/stata/stata3_117.dta | Bin 0 -> 13703 bytes pandas/tests/io/data/stata/stata4_113.dta | Bin 0 -> 1528 bytes pandas/tests/io/data/stata/stata4_114.dta | Bin 0 -> 1713 bytes pandas/tests/io/data/stata/stata4_115.dta | Bin 0 -> 1713 bytes pandas/tests/io/data/stata/stata4_117.dta | Bin 0 -> 2185 bytes pandas/tests/io/data/stata/stata5.csv | 19 + pandas/tests/io/data/stata/stata5_113.dta | Bin 0 -> 4628 bytes pandas/tests/io/data/stata/stata5_114.dta | Bin 0 -> 4924 bytes pandas/tests/io/data/stata/stata5_115.dta | Bin 0 -> 4924 bytes pandas/tests/io/data/stata/stata5_117.dta | Bin 0 -> 5366 bytes pandas/tests/io/data/stata/stata6.csv | 6 + pandas/tests/io/data/stata/stata6_113.dta | Bin 0 -> 2752 bytes pandas/tests/io/data/stata/stata6_114.dta | Bin 0 -> 3048 bytes pandas/tests/io/data/stata/stata6_115.dta | Bin 0 -> 3048 bytes pandas/tests/io/data/stata/stata6_117.dta | Bin 0 -> 3490 bytes pandas/tests/io/data/stata/stata7_111.dta | Bin 0 -> 1024 bytes pandas/tests/io/data/stata/stata7_115.dta | Bin 0 -> 722 bytes pandas/tests/io/data/stata/stata7_117.dta | Bin 0 -> 1159 bytes pandas/tests/io/data/stata/stata8_113.dta | Bin 0 -> 1439 bytes pandas/tests/io/data/stata/stata8_115.dta | Bin 0 -> 1624 bytes pandas/tests/io/data/stata/stata8_117.dta | Bin 0 -> 2063 bytes pandas/tests/io/data/stata/stata9_115.dta | Bin 0 -> 2342 bytes pandas/tests/io/data/stata/stata9_117.dta | Bin 0 -> 2342 bytes pandas/tests/io/data/xml/baby_names.xml | 53 + pandas/tests/io/data/xml/books.xml | 21 + pandas/tests/io/data/xml/cta_rail_lines.kml | 92 + pandas/tests/io/data/xml/doc_ch_utf.xml | 29 + pandas/tests/io/data/xml/flatten_doc.xsl | 18 + pandas/tests/io/data/xml/row_field_output.xsl | 19 + pandas/tests/io/excel/__init__.py | 20 + pandas/tests/io/excel/conftest.py | 67 + pandas/tests/io/excel/test_odf.py | 50 + pandas/tests/io/excel/test_odswriter.py | 68 + pandas/tests/io/excel/test_openpyxl.py | 412 + pandas/tests/io/excel/test_readers.py | 1636 ++ pandas/tests/io/excel/test_style.py | 300 + pandas/tests/io/excel/test_writers.py | 1411 ++ pandas/tests/io/excel/test_xlrd.py | 96 + pandas/tests/io/excel/test_xlsxwriter.py | 94 + pandas/tests/io/excel/test_xlwt.py | 146 + pandas/tests/io/formats/__init__.py | 0 .../data/html/datetime64_hourformatter.html | 18 + .../data/html/datetime64_monthformatter.html | 18 + .../io/formats/data/html/escape_disabled.html | 21 + .../tests/io/formats/data/html/escaped.html | 21 + .../data/html/gh12031_expected_output.html | 22 + .../data/html/gh13828_expected_output.html | 21 + .../data/html/gh14882_expected_output_1.html | 274 + .../data/html/gh14882_expected_output_2.html | 258 + .../data/html/gh14998_expected_output.html | 12 + .../data/html/gh15019_expected_output.html | 30 + .../data/html/gh21625_expected_output.html | 14 + .../data/html/gh22270_expected_output.html | 14 + .../data/html/gh22579_expected_output.html | 76 + .../data/html/gh22783_expected_output.html | 27 + .../html/gh22783_named_columns_index.html | 30 + .../data/html/gh40024_expected_output.html | 18 + .../data/html/gh6131_expected_output.html | 46 + .../data/html/gh8452_expected_output.html | 28 + .../html_repr_max_rows_10_min_rows_12.html | 70 + .../html_repr_max_rows_10_min_rows_4.html | 46 + .../html_repr_max_rows_12_min_rows_None.html | 78 + .../html_repr_max_rows_None_min_rows_12.html | 269 + ...l_repr_min_rows_default_no_truncation.html | 105 + .../html_repr_min_rows_default_truncated.html | 70 + .../tests/io/formats/data/html/index_1.html | 30 + .../tests/io/formats/data/html/index_2.html | 26 + .../tests/io/formats/data/html/index_3.html | 36 + .../tests/io/formats/data/html/index_4.html | 33 + .../tests/io/formats/data/html/index_5.html | 40 + .../io/formats/data/html/index_formatter.html | 31 + ...index_named_multi_columns_named_multi.html | 34 + ...ex_named_multi_columns_named_standard.html | 29 + .../html/index_named_multi_columns_none.html | 23 + ...dex_named_multi_columns_unnamed_multi.html | 34 + ..._named_multi_columns_unnamed_standard.html | 29 + ...ex_named_standard_columns_named_multi.html | 30 + ...named_standard_columns_named_standard.html | 26 + .../index_named_standard_columns_none.html | 21 + ..._named_standard_columns_unnamed_multi.html | 30 + ...med_standard_columns_unnamed_standard.html | 26 + .../html/index_none_columns_named_multi.html | 25 + .../index_none_columns_named_standard.html | 21 + .../data/html/index_none_columns_none.html | 12 + .../index_none_columns_unnamed_multi.html | 21 + .../index_none_columns_unnamed_standard.html | 18 + ...dex_unnamed_multi_columns_named_multi.html | 28 + ..._unnamed_multi_columns_named_standard.html | 23 + .../index_unnamed_multi_columns_none.html | 15 + ...x_unnamed_multi_columns_unnamed_multi.html | 28 + ...nnamed_multi_columns_unnamed_standard.html | 23 + ..._unnamed_standard_columns_named_multi.html | 25 + ...named_standard_columns_named_standard.html | 21 + .../index_unnamed_standard_columns_none.html | 14 + ...nnamed_standard_columns_unnamed_multi.html | 25 + ...med_standard_columns_unnamed_standard.html | 21 + .../tests/io/formats/data/html/justify.html | 30 + .../io/formats/data/html/multiindex_1.html | 32 + .../io/formats/data/html/multiindex_2.html | 34 + .../data/html/multiindex_sparsify_1.html | 40 + .../data/html/multiindex_sparsify_2.html | 46 + ...tiindex_sparsify_false_multi_sparse_1.html | 42 + ...tiindex_sparsify_false_multi_sparse_2.html | 48 + .../formats/data/html/render_links_false.html | 24 + .../formats/data/html/render_links_true.html | 24 + ...index_named_multi_columns_named_multi.html | 88 + ...ex_named_multi_columns_named_standard.html | 72 + ...unc_df_index_named_multi_columns_none.html | 62 + ...dex_named_multi_columns_unnamed_multi.html | 88 + ..._named_multi_columns_unnamed_standard.html | 72 + ...ex_named_standard_columns_named_multi.html | 74 + ...named_standard_columns_named_standard.html | 62 + ..._df_index_named_standard_columns_none.html | 54 + ..._named_standard_columns_unnamed_multi.html | 74 + ...med_standard_columns_unnamed_standard.html | 62 + ...unc_df_index_none_columns_named_multi.html | 66 + ..._df_index_none_columns_named_standard.html | 54 + .../trunc_df_index_none_columns_none.html | 39 + ...c_df_index_none_columns_unnamed_multi.html | 58 + ...f_index_none_columns_unnamed_standard.html | 48 + ...dex_unnamed_multi_columns_named_multi.html | 78 + ..._unnamed_multi_columns_named_standard.html | 62 + ...c_df_index_unnamed_multi_columns_none.html | 50 + ...x_unnamed_multi_columns_unnamed_multi.html | 78 + ...nnamed_multi_columns_unnamed_standard.html | 62 + ..._unnamed_standard_columns_named_multi.html | 66 + ...named_standard_columns_named_standard.html | 54 + ...f_index_unnamed_standard_columns_none.html | 44 + ...nnamed_standard_columns_unnamed_multi.html | 66 + ...med_standard_columns_unnamed_standard.html | 54 + .../tests/io/formats/data/html/truncate.html | 86 + .../formats/data/html/truncate_formatter.html | 36 + .../data/html/truncate_multi_index.html | 101 + .../html/truncate_multi_index_sparse_off.html | 105 + .../tests/io/formats/data/html/unicode_1.html | 50 + .../tests/io/formats/data/html/unicode_2.html | 14 + .../data/html/various_dtypes_formatted.html | 36 + .../io/formats/data/html/with_classes.html | 9 + pandas/tests/io/formats/style/__init__.py | 0 pandas/tests/io/formats/style/test_bar.py | 307 + .../tests/io/formats/style/test_deprecated.py | 170 + .../tests/io/formats/style/test_exceptions.py | 44 + pandas/tests/io/formats/style/test_format.py | 501 + .../tests/io/formats/style/test_highlight.py | 218 + pandas/tests/io/formats/style/test_html.py | 977 ++ .../tests/io/formats/style/test_matplotlib.py | 300 + .../tests/io/formats/style/test_non_unique.py | 140 + pandas/tests/io/formats/style/test_style.py | 1582 ++ .../tests/io/formats/style/test_to_latex.py | 1087 ++ .../tests/io/formats/style/test_to_string.py | 91 + pandas/tests/io/formats/style/test_tooltip.py | 85 + pandas/tests/io/formats/test_console.py | 72 + pandas/tests/io/formats/test_css.py | 289 + .../tests/io/formats/test_eng_formatting.py | 234 + pandas/tests/io/formats/test_format.py | 3408 ++++ pandas/tests/io/formats/test_info.py | 503 + pandas/tests/io/formats/test_printing.py | 200 + pandas/tests/io/formats/test_series_info.py | 179 + pandas/tests/io/formats/test_to_csv.py | 738 + pandas/tests/io/formats/test_to_excel.py | 431 + pandas/tests/io/formats/test_to_html.py | 890 ++ pandas/tests/io/formats/test_to_latex.py | 1530 ++ pandas/tests/io/formats/test_to_markdown.py | 101 + pandas/tests/io/formats/test_to_string.py | 340 + .../tests/io/generate_legacy_storage_files.py | 345 + pandas/tests/io/json/__init__.py | 0 pandas/tests/io/json/conftest.py | 9 + pandas/tests/io/json/data/line_delimited.json | 3 + pandas/tests/io/json/data/teams.csv | 2716 ++++ .../tests/io/json/data/tsframe_iso_v012.json | 1 + pandas/tests/io/json/data/tsframe_v012.json | 1 + .../tests/io/json/data/tsframe_v012.json.zip | Bin 0 -> 436 bytes pandas/tests/io/json/test_compression.py | 122 + .../tests/io/json/test_deprecated_kwargs.py | 31 + .../tests/io/json/test_json_table_schema.py | 855 + .../json/test_json_table_schema_ext_dtype.py | 262 + pandas/tests/io/json/test_normalize.py | 893 ++ pandas/tests/io/json/test_pandas.py | 1920 +++ pandas/tests/io/json/test_readlines.py | 298 + pandas/tests/io/json/test_ujson.py | 1249 ++ pandas/tests/io/parser/__init__.py | 0 pandas/tests/io/parser/common/__init__.py | 0 .../tests/io/parser/common/test_chunksize.py | 282 + .../io/parser/common/test_common_basic.py | 944 ++ .../tests/io/parser/common/test_data_list.py | 87 + pandas/tests/io/parser/common/test_decimal.py | 62 + .../io/parser/common/test_file_buffer_url.py | 430 + pandas/tests/io/parser/common/test_float.py | 65 + pandas/tests/io/parser/common/test_index.py | 299 + pandas/tests/io/parser/common/test_inf.py | 68 + pandas/tests/io/parser/common/test_ints.py | 215 + .../tests/io/parser/common/test_iterator.py | 109 + .../io/parser/common/test_read_errors.py | 319 + pandas/tests/io/parser/common/test_verbose.py | 55 + pandas/tests/io/parser/conftest.py | 297 + pandas/tests/io/parser/data/items.jsonl | 2 + pandas/tests/io/parser/data/salaries.csv | 47 + pandas/tests/io/parser/data/salaries.csv.bz2 | Bin 0 -> 283 bytes pandas/tests/io/parser/data/salaries.csv.gz | Bin 0 -> 302 bytes pandas/tests/io/parser/data/salaries.csv.xz | Bin 0 -> 336 bytes pandas/tests/io/parser/data/salaries.csv.zip | Bin 0 -> 445 bytes pandas/tests/io/parser/data/salaries.csv.zst | Bin 0 -> 281 bytes .../tests/io/parser/data/sauron.SHIFT_JIS.csv | 14 + pandas/tests/io/parser/data/sub_char.csv | 2 + pandas/tests/io/parser/data/tar_csv.tar | Bin 0 -> 10240 bytes pandas/tests/io/parser/data/tar_csv.tar.gz | Bin 0 -> 117 bytes pandas/tests/io/parser/data/test2.csv | 6 + pandas/tests/io/parser/data/test_mmap.csv | 4 + .../tests/io/parser/data/unicode_series.csv | 18 + pandas/tests/io/parser/data/utf16_ex.txt | Bin 0 -> 11406 bytes .../tests/io/parser/data/utf16_ex_small.zip | Bin 0 -> 285 bytes .../tests/io/parser/data/utf32_ex_small.zip | Bin 0 -> 251 bytes pandas/tests/io/parser/data/utf8_ex_small.zip | Bin 0 -> 201 bytes pandas/tests/io/parser/dtypes/__init__.py | 0 .../io/parser/dtypes/test_categorical.py | 310 + .../io/parser/dtypes/test_dtypes_basic.py | 387 + pandas/tests/io/parser/dtypes/test_empty.py | 182 + pandas/tests/io/parser/test_c_parser_only.py | 691 + pandas/tests/io/parser/test_comment.py | 168 + pandas/tests/io/parser/test_compression.py | 211 + pandas/tests/io/parser/test_converters.py | 203 + pandas/tests/io/parser/test_dialect.py | 156 + pandas/tests/io/parser/test_encoding.py | 313 + pandas/tests/io/parser/test_header.py | 680 + pandas/tests/io/parser/test_index_col.py | 354 + pandas/tests/io/parser/test_mangle_dupes.py | 176 + pandas/tests/io/parser/test_multi_thread.py | 154 + pandas/tests/io/parser/test_na_values.py | 656 + pandas/tests/io/parser/test_network.py | 322 + pandas/tests/io/parser/test_parse_dates.py | 2039 +++ .../io/parser/test_python_parser_only.py | 490 + pandas/tests/io/parser/test_quoting.py | 167 + pandas/tests/io/parser/test_read_fwf.py | 955 ++ pandas/tests/io/parser/test_skiprows.py | 288 + pandas/tests/io/parser/test_textreader.py | 343 + pandas/tests/io/parser/test_unsupported.py | 206 + pandas/tests/io/parser/usecols/__init__.py | 0 .../io/parser/usecols/test_parse_dates.py | 155 + .../tests/io/parser/usecols/test_strings.py | 97 + .../io/parser/usecols/test_usecols_basic.py | 436 + pandas/tests/io/pytables/__init__.py | 15 + pandas/tests/io/pytables/common.py | 84 + pandas/tests/io/pytables/conftest.py | 19 + pandas/tests/io/pytables/test_append.py | 941 ++ pandas/tests/io/pytables/test_categorical.py | 222 + pandas/tests/io/pytables/test_compat.py | 77 + pandas/tests/io/pytables/test_complex.py | 203 + pandas/tests/io/pytables/test_errors.py | 239 + .../tests/io/pytables/test_file_handling.py | 447 + pandas/tests/io/pytables/test_keys.py | 80 + pandas/tests/io/pytables/test_put.py | 367 + .../io/pytables/test_pytables_missing.py | 14 + pandas/tests/io/pytables/test_read.py | 345 + .../io/pytables/test_retain_attributes.py | 117 + pandas/tests/io/pytables/test_round_trip.py | 550 + pandas/tests/io/pytables/test_select.py | 976 ++ pandas/tests/io/pytables/test_store.py | 1041 ++ pandas/tests/io/pytables/test_subclass.py | 50 + pandas/tests/io/pytables/test_time_series.py | 66 + pandas/tests/io/pytables/test_timezones.py | 370 + pandas/tests/io/sas/__init__.py | 0 .../io/sas/data/0x00controlbyte.sas7bdat.bz2 | Bin 0 -> 72816 bytes pandas/tests/io/sas/data/0x40controlbyte.csv | 2 + .../io/sas/data/0x40controlbyte.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/DEMO_G.csv | 9757 ++++++++++++ pandas/tests/io/sas/data/DEMO_G.xpt | Bin 0 -> 3753760 bytes pandas/tests/io/sas/data/DEMO_PUF.cpt | Bin 0 -> 694 bytes pandas/tests/io/sas/data/DRXFCD_G.csv | 7619 +++++++++ pandas/tests/io/sas/data/DRXFCD_G.xpt | Bin 0 -> 2195200 bytes pandas/tests/io/sas/data/SSHSV1_A.csv | 1427 ++ pandas/tests/io/sas/data/SSHSV1_A.xpt | Bin 0 -> 23920 bytes pandas/tests/io/sas/data/airline.csv | 33 + pandas/tests/io/sas/data/airline.sas7bdat | Bin 0 -> 5120 bytes pandas/tests/io/sas/data/airline.sas7bdat.gz | Bin 0 -> 1431 bytes pandas/tests/io/sas/data/cars.sas7bdat | Bin 0 -> 13312 bytes pandas/tests/io/sas/data/corrupt.sas7bdat | Bin 0 -> 292 bytes pandas/tests/io/sas/data/dates_null.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/datetime.csv | 5 + pandas/tests/io/sas/data/datetime.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/load_log.sas7bdat | Bin 0 -> 589824 bytes pandas/tests/io/sas/data/many_columns.csv | 4 + .../tests/io/sas/data/many_columns.sas7bdat | Bin 0 -> 81920 bytes .../tests/io/sas/data/max_sas_date.sas7bdat | Bin 0 -> 393216 bytes pandas/tests/io/sas/data/paxraw_d_short.csv | 101 + pandas/tests/io/sas/data/paxraw_d_short.xpt | Bin 0 -> 6960 bytes pandas/tests/io/sas/data/productsales.csv | 1441 ++ .../tests/io/sas/data/productsales.sas7bdat | Bin 0 -> 148480 bytes pandas/tests/io/sas/data/test1.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test10.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test11.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test12.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test13.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test14.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test15.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test16.sas7bdat | Bin 0 -> 73728 bytes pandas/tests/io/sas/data/test2.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test3.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test4.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test5.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test6.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test7.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test8.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test9.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test_12659.csv | 37 + pandas/tests/io/sas/data/test_12659.sas7bdat | Bin 0 -> 131072 bytes .../io/sas/data/test_meta2_page.sas7bdat | Bin 0 -> 262144 bytes pandas/tests/io/sas/data/test_sas7bdat_1.csv | 11 + pandas/tests/io/sas/data/test_sas7bdat_2.csv | 11 + pandas/tests/io/sas/data/zero_rows.sas7bdat | Bin 0 -> 262144 bytes .../tests/io/sas/data/zero_variables.sas7bdat | Bin 0 -> 149504 bytes pandas/tests/io/sas/test_sas.py | 34 + pandas/tests/io/sas/test_sas7bdat.py | 399 + pandas/tests/io/sas/test_xport.py | 175 + pandas/tests/io/test_clipboard.py | 401 + pandas/tests/io/test_common.py | 606 + pandas/tests/io/test_compression.py | 342 + pandas/tests/io/test_date_converters.py | 43 + pandas/tests/io/test_feather.py | 200 + pandas/tests/io/test_fsspec.py | 319 + pandas/tests/io/test_gcs.py | 206 + pandas/tests/io/test_html.py | 1433 ++ pandas/tests/io/test_orc.py | 307 + pandas/tests/io/test_parquet.py | 1194 ++ pandas/tests/io/test_pickle.py | 600 + pandas/tests/io/test_s3.py | 50 + pandas/tests/io/test_spss.py | 76 + pandas/tests/io/test_sql.py | 2981 ++++ pandas/tests/io/test_stata.py | 2186 +++ pandas/tests/io/test_user_agent.py | 379 + pandas/tests/io/xml/__init__.py | 0 pandas/tests/io/xml/test_to_xml.py | 1352 ++ pandas/tests/io/xml/test_xml.py | 1680 ++ pandas/tests/io/xml/test_xml_dtypes.py | 479 + pandas/tests/libs/__init__.py | 0 pandas/tests/libs/test_hashtable.py | 656 + pandas/tests/libs/test_join.py | 390 + pandas/tests/libs/test_lib.py | 226 + pandas/tests/plotting/__init__.py | 0 pandas/tests/plotting/common.py | 600 + pandas/tests/plotting/conftest.py | 35 + pandas/tests/plotting/frame/__init__.py | 0 pandas/tests/plotting/frame/test_frame.py | 2255 +++ .../tests/plotting/frame/test_frame_color.py | 659 + .../plotting/frame/test_frame_groupby.py | 73 + .../tests/plotting/frame/test_frame_legend.py | 204 + .../plotting/frame/test_frame_subplots.py | 674 + .../tests/plotting/frame/test_hist_box_by.py | 383 + pandas/tests/plotting/test_backend.py | 94 + pandas/tests/plotting/test_boxplot_method.py | 643 + pandas/tests/plotting/test_common.py | 42 + pandas/tests/plotting/test_converter.py | 408 + pandas/tests/plotting/test_datetimelike.py | 1525 ++ pandas/tests/plotting/test_groupby.py | 118 + pandas/tests/plotting/test_hist_method.py | 777 + pandas/tests/plotting/test_misc.py | 616 + pandas/tests/plotting/test_series.py | 843 + pandas/tests/plotting/test_style.py | 157 + pandas/tests/reductions/__init__.py | 4 + pandas/tests/reductions/test_reductions.py | 1536 ++ .../tests/reductions/test_stat_reductions.py | 278 + pandas/tests/resample/__init__.py | 0 pandas/tests/resample/conftest.py | 171 + pandas/tests/resample/test_base.py | 261 + pandas/tests/resample/test_datetime_index.py | 1866 +++ pandas/tests/resample/test_deprecated.py | 316 + pandas/tests/resample/test_period_index.py | 878 ++ pandas/tests/resample/test_resample_api.py | 961 ++ .../tests/resample/test_resampler_grouper.py | 499 + pandas/tests/resample/test_time_grouper.py | 372 + pandas/tests/resample/test_timedelta.py | 207 + pandas/tests/reshape/__init__.py | 0 pandas/tests/reshape/concat/__init__.py | 0 pandas/tests/reshape/concat/conftest.py | 7 + pandas/tests/reshape/concat/test_append.py | 378 + .../reshape/concat/test_append_common.py | 775 + .../tests/reshape/concat/test_categorical.py | 256 + pandas/tests/reshape/concat/test_concat.py | 805 + pandas/tests/reshape/concat/test_dataframe.py | 230 + pandas/tests/reshape/concat/test_datetimes.py | 543 + pandas/tests/reshape/concat/test_empty.py | 286 + pandas/tests/reshape/concat/test_index.py | 456 + pandas/tests/reshape/concat/test_invalid.py | 56 + pandas/tests/reshape/concat/test_series.py | 148 + pandas/tests/reshape/concat/test_sort.py | 116 + pandas/tests/reshape/data/cut_data.csv | 1 + pandas/tests/reshape/merge/__init__.py | 0 .../merge/data/allow_exact_matches.csv | 28 + .../allow_exact_matches_and_tolerance.csv | 28 + pandas/tests/reshape/merge/data/asof.csv | 28 + pandas/tests/reshape/merge/data/asof2.csv | 78 + pandas/tests/reshape/merge/data/quotes.csv | 17 + pandas/tests/reshape/merge/data/quotes2.csv | 57 + pandas/tests/reshape/merge/data/tolerance.csv | 28 + pandas/tests/reshape/merge/data/trades.csv | 28 + pandas/tests/reshape/merge/data/trades2.csv | 78 + pandas/tests/reshape/merge/test_join.py | 941 ++ pandas/tests/reshape/merge/test_merge.py | 2693 ++++ pandas/tests/reshape/merge/test_merge_asof.py | 1570 ++ .../tests/reshape/merge/test_merge_cross.py | 98 + .../merge/test_merge_index_as_string.py | 189 + .../tests/reshape/merge/test_merge_ordered.py | 208 + pandas/tests/reshape/merge/test_multi.py | 909 ++ pandas/tests/reshape/test_crosstab.py | 826 + pandas/tests/reshape/test_cut.py | 746 + pandas/tests/reshape/test_from_dummies.py | 398 + pandas/tests/reshape/test_get_dummies.py | 655 + pandas/tests/reshape/test_melt.py | 1112 ++ pandas/tests/reshape/test_pivot.py | 2475 +++ pandas/tests/reshape/test_pivot_multilevel.py | 252 + pandas/tests/reshape/test_qcut.py | 302 + .../tests/reshape/test_union_categoricals.py | 363 + pandas/tests/reshape/test_util.py | 79 + pandas/tests/scalar/__init__.py | 0 pandas/tests/scalar/interval/__init__.py | 0 .../tests/scalar/interval/test_arithmetic.py | 63 + pandas/tests/scalar/interval/test_interval.py | 279 + pandas/tests/scalar/interval/test_ops.py | 119 + pandas/tests/scalar/period/__init__.py | 0 pandas/tests/scalar/period/test_asfreq.py | 801 + pandas/tests/scalar/period/test_period.py | 1618 ++ pandas/tests/scalar/test_na_scalar.py | 307 + pandas/tests/scalar/test_nat.py | 728 + pandas/tests/scalar/timedelta/__init__.py | 0 .../tests/scalar/timedelta/test_arithmetic.py | 1118 ++ .../scalar/timedelta/test_constructors.py | 413 + pandas/tests/scalar/timedelta/test_formats.py | 44 + .../tests/scalar/timedelta/test_timedelta.py | 975 ++ pandas/tests/scalar/timestamp/__init__.py | 0 .../tests/scalar/timestamp/test_arithmetic.py | 321 + .../scalar/timestamp/test_comparisons.py | 321 + .../scalar/timestamp/test_constructors.py | 679 + pandas/tests/scalar/timestamp/test_formats.py | 71 + .../tests/scalar/timestamp/test_rendering.py | 107 + .../tests/scalar/timestamp/test_timestamp.py | 1120 ++ .../tests/scalar/timestamp/test_timezones.py | 472 + .../tests/scalar/timestamp/test_unary_ops.py | 559 + pandas/tests/series/__init__.py | 0 pandas/tests/series/accessors/__init__.py | 0 .../series/accessors/test_cat_accessor.py | 297 + .../series/accessors/test_dt_accessor.py | 811 + .../series/accessors/test_sparse_accessor.py | 9 + .../series/accessors/test_str_accessor.py | 25 + pandas/tests/series/indexing/__init__.py | 0 pandas/tests/series/indexing/test_datetime.py | 484 + pandas/tests/series/indexing/test_delitem.py | 73 + pandas/tests/series/indexing/test_get.py | 214 + pandas/tests/series/indexing/test_getitem.py | 720 + pandas/tests/series/indexing/test_indexing.py | 404 + pandas/tests/series/indexing/test_mask.py | 69 + .../tests/series/indexing/test_set_value.py | 45 + pandas/tests/series/indexing/test_setitem.py | 1664 ++ pandas/tests/series/indexing/test_take.py | 33 + pandas/tests/series/indexing/test_where.py | 466 + pandas/tests/series/indexing/test_xs.py | 81 + pandas/tests/series/methods/__init__.py | 7 + pandas/tests/series/methods/test_align.py | 224 + pandas/tests/series/methods/test_append.py | 271 + pandas/tests/series/methods/test_argsort.py | 67 + pandas/tests/series/methods/test_asof.py | 210 + pandas/tests/series/methods/test_astype.py | 621 + pandas/tests/series/methods/test_autocorr.py | 30 + pandas/tests/series/methods/test_between.py | 84 + pandas/tests/series/methods/test_clip.py | 151 + pandas/tests/series/methods/test_combine.py | 17 + .../series/methods/test_combine_first.py | 104 + pandas/tests/series/methods/test_compare.py | 141 + pandas/tests/series/methods/test_convert.py | 139 + .../series/methods/test_convert_dtypes.py | 231 + pandas/tests/series/methods/test_copy.py | 90 + pandas/tests/series/methods/test_count.py | 98 + pandas/tests/series/methods/test_cov_corr.py | 176 + pandas/tests/series/methods/test_describe.py | 190 + pandas/tests/series/methods/test_diff.py | 84 + pandas/tests/series/methods/test_drop.py | 112 + .../series/methods/test_drop_duplicates.py | 257 + pandas/tests/series/methods/test_dropna.py | 115 + pandas/tests/series/methods/test_dtypes.py | 8 + .../tests/series/methods/test_duplicated.py | 52 + pandas/tests/series/methods/test_equals.py | 143 + pandas/tests/series/methods/test_explode.py | 144 + pandas/tests/series/methods/test_fillna.py | 997 ++ .../series/methods/test_get_numeric_data.py | 36 + pandas/tests/series/methods/test_head_tail.py | 8 + .../series/methods/test_infer_objects.py | 23 + .../tests/series/methods/test_interpolate.py | 825 + .../tests/series/methods/test_is_monotonic.py | 28 + pandas/tests/series/methods/test_is_unique.py | 41 + pandas/tests/series/methods/test_isin.py | 203 + pandas/tests/series/methods/test_isna.py | 35 + pandas/tests/series/methods/test_item.py | 59 + pandas/tests/series/methods/test_matmul.py | 78 + pandas/tests/series/methods/test_nlargest.py | 245 + pandas/tests/series/methods/test_nunique.py | 24 + .../tests/series/methods/test_pct_change.py | 82 + pandas/tests/series/methods/test_pop.py | 13 + pandas/tests/series/methods/test_quantile.py | 242 + pandas/tests/series/methods/test_rank.py | 483 + pandas/tests/series/methods/test_reindex.py | 425 + .../tests/series/methods/test_reindex_like.py | 41 + pandas/tests/series/methods/test_rename.py | 157 + .../tests/series/methods/test_rename_axis.py | 47 + pandas/tests/series/methods/test_repeat.py | 40 + pandas/tests/series/methods/test_replace.py | 677 + .../tests/series/methods/test_reset_index.py | 208 + pandas/tests/series/methods/test_round.py | 64 + .../tests/series/methods/test_searchsorted.py | 67 + pandas/tests/series/methods/test_set_name.py | 21 + .../tests/series/methods/test_sort_index.py | 334 + .../tests/series/methods/test_sort_values.py | 256 + pandas/tests/series/methods/test_to_csv.py | 181 + pandas/tests/series/methods/test_to_dict.py | 38 + pandas/tests/series/methods/test_to_frame.py | 61 + pandas/tests/series/methods/test_truncate.py | 65 + .../tests/series/methods/test_tz_localize.py | 113 + pandas/tests/series/methods/test_unique.py | 76 + pandas/tests/series/methods/test_unstack.py | 149 + pandas/tests/series/methods/test_update.py | 133 + .../tests/series/methods/test_value_counts.py | 227 + pandas/tests/series/methods/test_values.py | 29 + pandas/tests/series/methods/test_view.py | 58 + pandas/tests/series/test_api.py | 305 + pandas/tests/series/test_arithmetic.py | 894 ++ pandas/tests/series/test_constructors.py | 1999 +++ pandas/tests/series/test_cumulative.py | 131 + pandas/tests/series/test_iteration.py | 33 + pandas/tests/series/test_logical_ops.py | 489 + pandas/tests/series/test_missing.py | 118 + pandas/tests/series/test_npfuncs.py | 21 + pandas/tests/series/test_reductions.py | 143 + pandas/tests/series/test_repr.py | 536 + pandas/tests/series/test_subclass.py | 61 + pandas/tests/series/test_ufunc.py | 455 + pandas/tests/series/test_unary.py | 52 + pandas/tests/series/test_validate.py | 26 + pandas/tests/strings/__init__.py | 0 pandas/tests/strings/conftest.py | 177 + pandas/tests/strings/test_api.py | 154 + pandas/tests/strings/test_case_justify.py | 409 + pandas/tests/strings/test_cat.py | 397 + pandas/tests/strings/test_extract.py | 708 + pandas/tests/strings/test_find_replace.py | 1130 ++ pandas/tests/strings/test_get_dummies.py | 53 + pandas/tests/strings/test_split_partition.py | 736 + pandas/tests/strings/test_string_array.py | 102 + pandas/tests/strings/test_strings.py | 847 + pandas/tests/test_aggregation.py | 93 + pandas/tests/test_algos.py | 2381 +++ pandas/tests/test_common.py | 231 + pandas/tests/test_downstream.py | 330 + pandas/tests/test_errors.py | 108 + pandas/tests/test_expressions.py | 408 + pandas/tests/test_flags.py | 48 + pandas/tests/test_multilevel.py | 414 + pandas/tests/test_nanops.py | 1144 ++ pandas/tests/test_optional_dependency.py | 86 + pandas/tests/test_register_accessor.py | 109 + pandas/tests/test_sorting.py | 525 + pandas/tests/test_take.py | 334 + pandas/tests/tools/__init__.py | 0 pandas/tests/tools/test_to_datetime.py | 2859 ++++ pandas/tests/tools/test_to_numeric.py | 801 + pandas/tests/tools/test_to_time.py | 80 + pandas/tests/tools/test_to_timedelta.py | 280 + pandas/tests/tseries/__init__.py | 0 pandas/tests/tseries/frequencies/__init__.py | 0 .../tseries/frequencies/test_freq_code.py | 97 + .../tseries/frequencies/test_frequencies.py | 29 + .../tseries/frequencies/test_inference.py | 524 + pandas/tests/tseries/holiday/__init__.py | 0 pandas/tests/tseries/holiday/test_calendar.py | 116 + pandas/tests/tseries/holiday/test_federal.py | 38 + pandas/tests/tseries/holiday/test_holiday.py | 266 + .../tests/tseries/holiday/test_observance.py | 105 + pandas/tests/tseries/offsets/__init__.py | 0 pandas/tests/tseries/offsets/common.py | 207 + pandas/tests/tseries/offsets/conftest.py | 31 + .../tseries/offsets/data/cday-0.14.1.pickle | Bin 0 -> 492 bytes .../tseries/offsets/test_business_day.py | 236 + .../tseries/offsets/test_business_hour.py | 1372 ++ .../tseries/offsets/test_business_month.py | 224 + .../tseries/offsets/test_business_quarter.py | 315 + .../tseries/offsets/test_business_year.py | 224 + .../offsets/test_custom_business_day.py | 91 + .../offsets/test_custom_business_hour.py | 330 + .../offsets/test_custom_business_month.py | 447 + pandas/tests/tseries/offsets/test_dst.py | 230 + pandas/tests/tseries/offsets/test_easter.py | 36 + pandas/tests/tseries/offsets/test_fiscal.py | 698 + pandas/tests/tseries/offsets/test_index.py | 57 + pandas/tests/tseries/offsets/test_month.py | 696 + pandas/tests/tseries/offsets/test_offsets.py | 1058 ++ .../offsets/test_offsets_properties.py | 60 + pandas/tests/tseries/offsets/test_quarter.py | 300 + pandas/tests/tseries/offsets/test_ticks.py | 393 + pandas/tests/tseries/offsets/test_week.py | 366 + pandas/tests/tseries/offsets/test_year.py | 326 + pandas/tests/tslibs/__init__.py | 0 pandas/tests/tslibs/test_api.py | 63 + pandas/tests/tslibs/test_array_to_datetime.py | 202 + pandas/tests/tslibs/test_ccalendar.py | 63 + pandas/tests/tslibs/test_conversion.py | 161 + pandas/tests/tslibs/test_fields.py | 40 + pandas/tests/tslibs/test_libfrequencies.py | 29 + pandas/tests/tslibs/test_liboffsets.py | 173 + pandas/tests/tslibs/test_np_datetime.py | 222 + pandas/tests/tslibs/test_parse_iso8601.py | 72 + pandas/tests/tslibs/test_parsing.py | 286 + pandas/tests/tslibs/test_period_asfreq.py | 116 + pandas/tests/tslibs/test_resolution.py | 24 + pandas/tests/tslibs/test_timedeltas.py | 137 + pandas/tests/tslibs/test_timezones.py | 165 + pandas/tests/tslibs/test_to_offset.py | 174 + pandas/tests/tslibs/test_tzconversion.py | 23 + pandas/tests/util/__init__.py | 0 pandas/tests/util/conftest.py | 26 + pandas/tests/util/test_assert_almost_equal.py | 544 + pandas/tests/util/test_assert_attr_equal.py | 33 + .../util/test_assert_categorical_equal.py | 90 + .../util/test_assert_extension_array_equal.py | 113 + pandas/tests/util/test_assert_frame_equal.py | 367 + pandas/tests/util/test_assert_index_equal.py | 279 + .../util/test_assert_interval_array_equal.py | 81 + .../util/test_assert_numpy_array_equal.py | 223 + .../util/test_assert_produces_warning.py | 212 + pandas/tests/util/test_assert_series_equal.py | 413 + pandas/tests/util/test_deprecate.py | 64 + pandas/tests/util/test_deprecate_kwarg.py | 90 + .../test_deprecate_nonkeyword_arguments.py | 157 + pandas/tests/util/test_doc.py | 90 + pandas/tests/util/test_hashing.py | 401 + pandas/tests/util/test_make_objects.py | 15 + pandas/tests/util/test_numba.py | 12 + pandas/tests/util/test_rewrite_warning.py | 39 + pandas/tests/util/test_safe_import.py | 39 + pandas/tests/util/test_shares_memory.py | 13 + pandas/tests/util/test_show_versions.py | 104 + pandas/tests/util/test_util.py | 83 + pandas/tests/util/test_validate_args.py | 67 + .../util/test_validate_args_and_kwargs.py | 81 + pandas/tests/util/test_validate_inclusive.py | 40 + pandas/tests/util/test_validate_kwargs.py | 66 + pandas/tests/window/__init__.py | 8 + pandas/tests/window/conftest.py | 140 + pandas/tests/window/moments/__init__.py | 0 pandas/tests/window/moments/conftest.py | 72 + .../moments/test_moments_consistency_ewm.py | 249 + .../test_moments_consistency_expanding.py | 144 + .../test_moments_consistency_rolling.py | 244 + pandas/tests/window/test_api.py | 489 + pandas/tests/window/test_apply.py | 326 + pandas/tests/window/test_base_indexer.py | 507 + .../tests/window/test_cython_aggregations.py | 111 + pandas/tests/window/test_dtypes.py | 174 + pandas/tests/window/test_ewm.py | 755 + pandas/tests/window/test_expanding.py | 737 + pandas/tests/window/test_groupby.py | 1259 ++ pandas/tests/window/test_numba.py | 467 + pandas/tests/window/test_online.py | 105 + pandas/tests/window/test_pairwise.py | 439 + pandas/tests/window/test_rolling.py | 1956 +++ pandas/tests/window/test_rolling_functions.py | 533 + pandas/tests/window/test_rolling_quantile.py | 175 + pandas/tests/window/test_rolling_skew_kurt.py | 236 + pandas/tests/window/test_timeseries_window.py | 714 + pandas/tests/window/test_win_type.py | 707 + pandas/tseries/__init__.py | 11 + pandas/tseries/api.py | 8 + pandas/tseries/frequencies.py | 660 + pandas/tseries/holiday.py | 608 + pandas/tseries/offsets.py | 91 + pandas/util/__init__.py | 20 + pandas/util/_decorators.py | 533 + pandas/util/_doctools.py | 195 + pandas/util/_exceptions.py | 94 + pandas/util/_print_versions.py | 160 + pandas/util/_test_decorators.py | 316 + pandas/util/_tester.py | 38 + pandas/util/_validators.py | 542 + pandas/util/testing.py | 14 + pandas/util/version/__init__.py | 579 + pyproject.toml | 179 + pyright_reportGeneralTypeIssues.json | 112 + requirements-dev.txt | 103 + scripts/__init__.py | 0 scripts/generate_pip_deps_from_conda.py | 143 + scripts/list_future_warnings.sh | 46 + scripts/no_bool_in_generic.py | 87 + scripts/pandas_errors_documented.py | 52 + scripts/run_stubtest.py | 85 + scripts/run_vulture.py | 21 + scripts/sync_flake8_versions.py | 145 + scripts/tests/__init__.py | 0 scripts/tests/conftest.py | 6 + scripts/tests/test_no_bool_in_generic.py | 20 + scripts/tests/test_sync_flake8_versions.py | 125 + scripts/tests/test_use_io_common_urlopen.py | 23 + scripts/tests/test_use_pd_array_in_core.py | 26 + scripts/tests/test_validate_docstrings.py | 468 + scripts/use_io_common_urlopen.py | 63 + scripts/use_pd_array_in_core.py | 76 + scripts/validate_docstrings.py | 477 + scripts/validate_min_versions_in_sync.py | 88 + scripts/validate_rst_title_capitalization.py | 277 + setup.cfg | 220 + setup.py | 666 + test_fast.bat | 3 + test_fast.sh | 8 + typings/numba.pyi | 41 + versioneer.py | 1913 +++ web/README.md | 12 + web/interactive_terminal/README.md | 35 + web/interactive_terminal/jupyter-lite.json | 13 + .../jupyter_lite_config.json | 7 + web/pandas/_templates/layout.html | 115 + web/pandas/about/citing.md | 127 + web/pandas/about/governance.md | 317 + web/pandas/about/index.md | 86 + web/pandas/about/roadmap.md | 224 + web/pandas/about/sponsors.md | 60 + web/pandas/about/team.md | 75 + web/pandas/community/blog/2019-user-survey.md | 172 + web/pandas/community/blog/asv-pandas-grant.md | 141 + web/pandas/community/blog/extension-arrays.md | 218 + web/pandas/community/blog/index.html | 14 + web/pandas/community/blog/pandas-1.0.md | 31 + web/pandas/community/coc.md | 65 + web/pandas/community/ecosystem.md | 419 + web/pandas/config.yml | 186 + web/pandas/contribute.md | 55 + web/pandas/donate.md | 14 + web/pandas/getting_started.md | 70 + web/pandas/index.html | 126 + .../pdeps/0001-purpose-and-guidelines.md | 128 + web/pandas/static/css/pandas.css | 105 + .../img/blog/2019-user-survey/2019_13_0.png | Bin 0 -> 7549 bytes .../img/blog/2019-user-survey/2019_18_0.png | Bin 0 -> 10459 bytes .../img/blog/2019-user-survey/2019_20_0.png | Bin 0 -> 6926 bytes .../img/blog/2019-user-survey/2019_22_0.png | Bin 0 -> 10205 bytes .../img/blog/2019-user-survey/2019_24_0.png | Bin 0 -> 9829 bytes .../img/blog/2019-user-survey/2019_26_0.png | Bin 0 -> 34497 bytes .../img/blog/2019-user-survey/2019_31_0.png | Bin 0 -> 6053 bytes .../img/blog/2019-user-survey/2019_33_0.png | Bin 0 -> 5547 bytes .../img/blog/2019-user-survey/2019_4_0.png | Bin 0 -> 8545 bytes .../img/blog/2019-user-survey/2019_5_0.png | Bin 0 -> 7388 bytes web/pandas/static/img/favicon.ico | Bin 0 -> 1150 bytes web/pandas/static/img/favicon_white.ico | Bin 0 -> 102199 bytes .../static/img/install/anaconda_prompt.png | Bin 0 -> 1373 bytes .../static/img/install/jupyterlab_home.png | Bin 0 -> 1962 bytes .../img/install/pandas_import_and_version.png | Bin 0 -> 2252 bytes web/pandas/static/img/pandas.svg | 1 + web/pandas/static/img/pandas_mark.svg | 111 + web/pandas/static/img/pandas_mark_white.svg | 111 + web/pandas/static/img/pandas_secondary.svg | 1 + .../static/img/pandas_secondary_white.svg | 1 + web/pandas/static/img/pandas_white.svg | 1 + web/pandas/static/img/partners/bodo.svg | 1 + web/pandas/static/img/partners/czi.svg | 38 + web/pandas/static/img/partners/dfine.svg | 1 + web/pandas/static/img/partners/numfocus.svg | 60 + web/pandas/static/img/partners/nvidia.svg | 56 + .../static/img/partners/quansight_labs.svg | 1 + web/pandas/static/img/partners/tidelift.svg | 33 + web/pandas/static/img/partners/two_sigma.svg | 1 + .../static/img/partners/voltron_data.svg | 52 + web/pandas/static/img/pydata_book.gif | Bin 0 -> 5862 bytes web/pandas/try.md | 21 + web/pandas/versions.json | 37 + web/pandas_web.py | 433 + 2437 files changed, 788406 insertions(+) create mode 100644 .devcontainer.json create mode 100644 .pep8speaks.yml create mode 100644 .pre-commit-config.yaml create mode 100644 AUTHORS.md create mode 100644 CITATION.cff create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 LICENSES/DATEUTIL_LICENSE create mode 100644 LICENSES/HAVEN_LICENSE create mode 100644 LICENSES/HAVEN_MIT create mode 100644 LICENSES/KLIB_LICENSE create mode 100644 LICENSES/MUSL_LICENSE create mode 100644 LICENSES/NUMPY_LICENSE create mode 100644 LICENSES/OTHER create mode 100644 LICENSES/PACKAGING_LICENSE create mode 100644 LICENSES/PSF_LICENSE create mode 100644 LICENSES/PYUPGRADE_LICENSE create mode 100644 LICENSES/SAS7BDAT_LICENSE create mode 100644 LICENSES/SCIPY_LICENSE create mode 100644 LICENSES/ULTRAJSON_LICENSE create mode 100644 LICENSES/XARRAY_LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 RELEASE.md create mode 100644 asv_bench/asv.conf.json create mode 100644 asv_bench/benchmarks/__init__.py create mode 100644 asv_bench/benchmarks/algorithms.py create mode 100644 asv_bench/benchmarks/algos/__init__.py create mode 100644 asv_bench/benchmarks/algos/isin.py create mode 100644 asv_bench/benchmarks/arithmetic.py create mode 100644 asv_bench/benchmarks/array.py create mode 100644 asv_bench/benchmarks/attrs_caching.py create mode 100644 asv_bench/benchmarks/boolean.py create mode 100644 asv_bench/benchmarks/categoricals.py create mode 100644 asv_bench/benchmarks/ctors.py create mode 100644 asv_bench/benchmarks/dtypes.py create mode 100644 asv_bench/benchmarks/eval.py create mode 100644 asv_bench/benchmarks/finalize.py create mode 100644 asv_bench/benchmarks/frame_ctor.py create mode 100644 asv_bench/benchmarks/frame_methods.py create mode 100644 asv_bench/benchmarks/gil.py create mode 100644 asv_bench/benchmarks/groupby.py create mode 100644 asv_bench/benchmarks/hash_functions.py create mode 100644 asv_bench/benchmarks/index_cached_properties.py create mode 100644 asv_bench/benchmarks/index_object.py create mode 100644 asv_bench/benchmarks/indexing.py create mode 100644 asv_bench/benchmarks/indexing_engines.py create mode 100644 asv_bench/benchmarks/inference.py create mode 100644 asv_bench/benchmarks/io/__init__.py create mode 100644 asv_bench/benchmarks/io/csv.py create mode 100644 asv_bench/benchmarks/io/excel.py create mode 100644 asv_bench/benchmarks/io/hdf.py create mode 100644 asv_bench/benchmarks/io/json.py create mode 100644 asv_bench/benchmarks/io/parsers.py create mode 100644 asv_bench/benchmarks/io/pickle.py create mode 100644 asv_bench/benchmarks/io/sas.py create mode 100644 asv_bench/benchmarks/io/sql.py create mode 100644 asv_bench/benchmarks/io/stata.py create mode 100644 asv_bench/benchmarks/io/style.py create mode 100644 asv_bench/benchmarks/join_merge.py create mode 100644 asv_bench/benchmarks/libs.py create mode 100644 asv_bench/benchmarks/multiindex_object.py create mode 100644 asv_bench/benchmarks/package.py create mode 100644 asv_bench/benchmarks/pandas_vb_common.py create mode 100644 asv_bench/benchmarks/period.py create mode 100644 asv_bench/benchmarks/plotting.py create mode 100644 asv_bench/benchmarks/reindex.py create mode 100644 asv_bench/benchmarks/replace.py create mode 100644 asv_bench/benchmarks/reshape.py create mode 100644 asv_bench/benchmarks/rolling.py create mode 100644 asv_bench/benchmarks/series_methods.py create mode 100644 asv_bench/benchmarks/sparse.py create mode 100644 asv_bench/benchmarks/stat_ops.py create mode 100644 asv_bench/benchmarks/strftime.py create mode 100644 asv_bench/benchmarks/strings.py create mode 100644 asv_bench/benchmarks/timedelta.py create mode 100644 asv_bench/benchmarks/timeseries.py create mode 100644 asv_bench/benchmarks/tslibs/__init__.py create mode 100644 asv_bench/benchmarks/tslibs/fields.py create mode 100644 asv_bench/benchmarks/tslibs/normalize.py create mode 100644 asv_bench/benchmarks/tslibs/offsets.py create mode 100644 asv_bench/benchmarks/tslibs/period.py create mode 100644 asv_bench/benchmarks/tslibs/resolution.py create mode 100644 asv_bench/benchmarks/tslibs/timedelta.py create mode 100644 asv_bench/benchmarks/tslibs/timestamp.py create mode 100644 asv_bench/benchmarks/tslibs/tslib.py create mode 100644 asv_bench/benchmarks/tslibs/tz_convert.py create mode 100755 ci/code_checks.sh create mode 100644 ci/condarc.yml create mode 100644 ci/deps/actions-310-numpydev.yaml create mode 100644 ci/deps/actions-310.yaml create mode 100644 ci/deps/actions-38-downstream_compat.yaml create mode 100644 ci/deps/actions-38-minimum_versions.yaml create mode 100644 ci/deps/actions-38.yaml create mode 100644 ci/deps/actions-39.yaml create mode 100644 ci/deps/actions-pypy-38.yaml create mode 100644 ci/deps/circle-38-arm64.yaml create mode 100755 ci/run_tests.sh create mode 100644 codecov.yml create mode 100644 doc/.gitignore create mode 100644 doc/_templates/api_redirect.html create mode 100644 doc/_templates/autosummary/accessor.rst create mode 100644 doc/_templates/autosummary/accessor_attribute.rst create mode 100644 doc/_templates/autosummary/accessor_callable.rst create mode 100644 doc/_templates/autosummary/accessor_method.rst create mode 100644 doc/_templates/autosummary/class.rst create mode 100644 doc/_templates/autosummary/class_without_autosummary.rst create mode 100644 doc/_templates/pandas_footer.html create mode 100644 doc/_templates/sidebar-nav-bs.html create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet.pdf create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet.pptx create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx create mode 100644 doc/cheatsheet/README.txt create mode 100644 doc/data/air_quality_long.csv create mode 100644 doc/data/air_quality_no2.csv create mode 100644 doc/data/air_quality_no2_long.csv create mode 100644 doc/data/air_quality_parameters.csv create mode 100644 doc/data/air_quality_pm25_long.csv create mode 100644 doc/data/air_quality_stations.csv create mode 100644 doc/data/baseball.csv create mode 100644 doc/data/iris.data create mode 100644 doc/data/tips.csv create mode 100644 doc/data/titanic.csv create mode 100755 doc/make.py create mode 100644 doc/redirects.csv create mode 100644 doc/source/_static/ci.png create mode 100644 doc/source/_static/css/getting_started.css create mode 100644 doc/source/_static/css/pandas.css create mode 100644 doc/source/_static/df_repr_truncated.png create mode 100644 doc/source/_static/eval-perf-small.png create mode 100644 doc/source/_static/eval-perf.png create mode 100644 doc/source/_static/index_api.svg create mode 100644 doc/source/_static/index_contribute.svg create mode 100644 doc/source/_static/index_getting_started.svg create mode 100644 doc/source/_static/index_user_guide.svg create mode 100644 doc/source/_static/legacy_0.10.h5 create mode 100644 doc/source/_static/logo_r.svg create mode 100644 doc/source/_static/logo_sas.svg create mode 100644 doc/source/_static/logo_sas_white.svg create mode 100644 doc/source/_static/logo_sql.svg create mode 100644 doc/source/_static/logo_stata.svg create mode 100644 doc/source/_static/new-excel-index.png create mode 100644 doc/source/_static/old-excel-index.png create mode 100644 doc/source/_static/print_df_new.png create mode 100644 doc/source/_static/print_df_old.png create mode 100644 doc/source/_static/query-perf-small.png create mode 100644 doc/source/_static/query-perf.png create mode 100644 doc/source/_static/question_mark_noback.svg create mode 100644 doc/source/_static/reshaping_melt.png create mode 100644 doc/source/_static/reshaping_pivot.png create mode 100644 doc/source/_static/reshaping_stack.png create mode 100644 doc/source/_static/reshaping_unstack.png create mode 100644 doc/source/_static/reshaping_unstack_0.png create mode 100644 doc/source/_static/reshaping_unstack_1.png create mode 100644 doc/source/_static/schemas/01_table_dataframe.svg create mode 100644 doc/source/_static/schemas/01_table_series.svg create mode 100644 doc/source/_static/schemas/01_table_spreadsheet.png create mode 100644 doc/source/_static/schemas/02_io_readwrite.svg create mode 100644 doc/source/_static/schemas/03_subset_columns.svg create mode 100644 doc/source/_static/schemas/03_subset_columns_rows.svg create mode 100644 doc/source/_static/schemas/03_subset_rows.svg create mode 100644 doc/source/_static/schemas/04_plot_overview.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_1.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_2.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_3.svg create mode 100644 doc/source/_static/schemas/06_aggregate.svg create mode 100644 doc/source/_static/schemas/06_groupby.svg create mode 100644 doc/source/_static/schemas/06_groupby_agg_detail.svg create mode 100644 doc/source/_static/schemas/06_groupby_select_detail.svg create mode 100644 doc/source/_static/schemas/06_reduction.svg create mode 100644 doc/source/_static/schemas/06_valuecounts.svg create mode 100644 doc/source/_static/schemas/07_melt.svg create mode 100644 doc/source/_static/schemas/07_pivot.svg create mode 100644 doc/source/_static/schemas/07_pivot_table.svg create mode 100644 doc/source/_static/schemas/08_concat_column.svg create mode 100644 doc/source/_static/schemas/08_concat_row.svg create mode 100644 doc/source/_static/schemas/08_merge_left.svg create mode 100644 doc/source/_static/spreadsheets/conditional.png create mode 100644 doc/source/_static/spreadsheets/filter.png create mode 100644 doc/source/_static/spreadsheets/find.png create mode 100644 doc/source/_static/spreadsheets/logo_excel.svg create mode 100644 doc/source/_static/spreadsheets/pivot.png create mode 100644 doc/source/_static/spreadsheets/sort.png create mode 100644 doc/source/_static/spreadsheets/vlookup.png create mode 100644 doc/source/_static/stub create mode 100644 doc/source/_static/style-excel.png create mode 100644 doc/source/_static/style/appmaphead1.png create mode 100644 doc/source/_static/style/appmaphead2.png create mode 100644 doc/source/_static/style/bg_ax0.png create mode 100644 doc/source/_static/style/bg_axNone.png create mode 100644 doc/source/_static/style/bg_axNone_gmap.png create mode 100644 doc/source/_static/style/bg_axNone_lowhigh.png create mode 100644 doc/source/_static/style/bg_axNone_vminvmax.png create mode 100644 doc/source/_static/style/bg_gmap.png create mode 100644 doc/source/_static/style/df_pipe.png create mode 100644 doc/source/_static/style/df_pipe_applydata.png create mode 100644 doc/source/_static/style/df_pipe_applymap.png create mode 100644 doc/source/_static/style/df_pipe_hl.png create mode 100644 doc/source/_static/style/df_pipe_hl2.png create mode 100644 doc/source/_static/style/footer_extended.png create mode 100644 doc/source/_static/style/footer_simple.png create mode 100644 doc/source/_static/style/format_excel_css.png create mode 100644 doc/source/_static/style/hbetw_axNone.png create mode 100644 doc/source/_static/style/hbetw_basic.png create mode 100644 doc/source/_static/style/hbetw_props.png create mode 100644 doc/source/_static/style/hbetw_seq.png create mode 100644 doc/source/_static/style/hq_ax1.png create mode 100644 doc/source/_static/style/hq_axNone.png create mode 100644 doc/source/_static/style/hq_props.png create mode 100644 doc/source/_static/style/latex_1.png create mode 100644 doc/source/_static/style/latex_2.png create mode 100644 doc/source/_static/style/latex_stocks.png create mode 100644 doc/source/_static/style/latex_stocks_html.png create mode 100644 doc/source/_static/style/tg_ax0.png create mode 100644 doc/source/_static/style/tg_axNone.png create mode 100644 doc/source/_static/style/tg_axNone_gmap.png create mode 100644 doc/source/_static/style/tg_axNone_lowhigh.png create mode 100644 doc/source/_static/style/tg_axNone_vminvmax.png create mode 100644 doc/source/_static/style/tg_gmap.png create mode 100644 doc/source/_static/trunc_after.png create mode 100644 doc/source/_static/trunc_before.png create mode 100644 doc/source/_static/whatsnew_assign.png create mode 100644 doc/source/_static/whatsnew_plot_submethods.png create mode 100644 doc/source/conf.py create mode 100644 doc/source/development/community.rst create mode 100644 doc/source/development/contributing.rst create mode 100644 doc/source/development/contributing_codebase.rst create mode 100644 doc/source/development/contributing_docstring.rst create mode 100644 doc/source/development/contributing_documentation.rst create mode 100644 doc/source/development/contributing_environment.rst create mode 100644 doc/source/development/debugging_extensions.rst create mode 100644 doc/source/development/developer.rst create mode 100644 doc/source/development/extending.rst create mode 100644 doc/source/development/index.rst create mode 100644 doc/source/development/internals.rst create mode 100644 doc/source/development/maintaining.rst create mode 100644 doc/source/development/policies.rst create mode 100644 doc/source/development/roadmap.rst create mode 100644 doc/source/ecosystem.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_r.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_sas.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_spreadsheets.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_sql.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_stata.rst create mode 100644 doc/source/getting_started/comparison/includes/case.rst create mode 100644 doc/source/getting_started/comparison/includes/column_operations.rst create mode 100644 doc/source/getting_started/comparison/includes/column_selection.rst create mode 100644 doc/source/getting_started/comparison/includes/construct_dataframe.rst create mode 100644 doc/source/getting_started/comparison/includes/copies.rst create mode 100644 doc/source/getting_started/comparison/includes/extract_substring.rst create mode 100644 doc/source/getting_started/comparison/includes/filtering.rst create mode 100644 doc/source/getting_started/comparison/includes/find_substring.rst create mode 100644 doc/source/getting_started/comparison/includes/groupby.rst create mode 100644 doc/source/getting_started/comparison/includes/if_then.rst create mode 100644 doc/source/getting_started/comparison/includes/introduction.rst create mode 100644 doc/source/getting_started/comparison/includes/length.rst create mode 100644 doc/source/getting_started/comparison/includes/limit.rst create mode 100644 doc/source/getting_started/comparison/includes/merge.rst create mode 100644 doc/source/getting_started/comparison/includes/merge_setup.rst create mode 100644 doc/source/getting_started/comparison/includes/missing.rst create mode 100644 doc/source/getting_started/comparison/includes/missing_intro.rst create mode 100644 doc/source/getting_started/comparison/includes/nth_word.rst create mode 100644 doc/source/getting_started/comparison/includes/sorting.rst create mode 100644 doc/source/getting_started/comparison/includes/time_date.rst create mode 100644 doc/source/getting_started/comparison/includes/transform.rst create mode 100644 doc/source/getting_started/comparison/index.rst create mode 100644 doc/source/getting_started/index.rst create mode 100644 doc/source/getting_started/install.rst create mode 100644 doc/source/getting_started/intro_tutorials/01_table_oriented.rst create mode 100644 doc/source/getting_started/intro_tutorials/02_read_write.rst create mode 100644 doc/source/getting_started/intro_tutorials/03_subset_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/04_plotting.rst create mode 100644 doc/source/getting_started/intro_tutorials/05_add_columns.rst create mode 100644 doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst create mode 100644 doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst create mode 100644 doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst create mode 100644 doc/source/getting_started/intro_tutorials/09_timeseries.rst create mode 100644 doc/source/getting_started/intro_tutorials/10_text_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/includes/air_quality_no2.rst create mode 100644 doc/source/getting_started/intro_tutorials/includes/titanic.rst create mode 100644 doc/source/getting_started/intro_tutorials/index.rst create mode 100644 doc/source/getting_started/overview.rst create mode 100644 doc/source/getting_started/tutorials.rst create mode 100644 doc/source/index.rst.template create mode 100644 doc/source/reference/arrays.rst create mode 100644 doc/source/reference/extensions.rst create mode 100644 doc/source/reference/frame.rst create mode 100644 doc/source/reference/general_functions.rst create mode 100644 doc/source/reference/groupby.rst create mode 100644 doc/source/reference/index.rst create mode 100644 doc/source/reference/indexing.rst create mode 100644 doc/source/reference/io.rst create mode 100644 doc/source/reference/offset_frequency.rst create mode 100644 doc/source/reference/options.rst create mode 100644 doc/source/reference/plotting.rst create mode 100644 doc/source/reference/resampling.rst create mode 100644 doc/source/reference/series.rst create mode 100644 doc/source/reference/style.rst create mode 100644 doc/source/reference/testing.rst create mode 100644 doc/source/reference/window.rst create mode 100644 doc/source/styled.xlsx create mode 100644 doc/source/user_guide/10min.rst create mode 100644 doc/source/user_guide/advanced.rst create mode 100644 doc/source/user_guide/basics.rst create mode 100644 doc/source/user_guide/boolean.rst create mode 100644 doc/source/user_guide/categorical.rst create mode 100644 doc/source/user_guide/dsintro.rst create mode 100644 doc/source/user_guide/duplicates.rst create mode 100644 doc/source/user_guide/enhancingperf.rst create mode 100644 doc/source/user_guide/gotchas.rst create mode 100644 doc/source/user_guide/groupby.rst create mode 100644 doc/source/user_guide/index.rst create mode 100644 doc/source/user_guide/indexing.rst create mode 100644 doc/source/user_guide/integer_na.rst create mode 100644 doc/source/user_guide/io.rst create mode 100644 doc/source/user_guide/merging.rst create mode 100644 doc/source/user_guide/missing_data.rst create mode 100644 doc/source/user_guide/options.rst create mode 100644 doc/source/user_guide/reshaping.rst create mode 100644 doc/source/user_guide/scale.rst create mode 100644 doc/source/user_guide/sparse.rst create mode 100644 doc/source/user_guide/style.ipynb create mode 100644 doc/source/user_guide/templates/html_style_structure.html create mode 100644 doc/source/user_guide/templates/html_table_structure.html create mode 100644 doc/source/user_guide/templates/myhtml.tpl create mode 100644 doc/source/user_guide/text.rst create mode 100644 doc/source/user_guide/timedeltas.rst create mode 100644 doc/source/user_guide/timeseries.rst create mode 100644 doc/source/user_guide/visualization.rst create mode 100644 doc/source/user_guide/window.rst create mode 100644 doc/source/whatsnew/index.rst create mode 100644 doc/source/whatsnew/v0.10.0.rst create mode 100644 doc/source/whatsnew/v0.10.1.rst create mode 100644 doc/source/whatsnew/v0.11.0.rst create mode 100644 doc/source/whatsnew/v0.12.0.rst create mode 100644 doc/source/whatsnew/v0.13.0.rst create mode 100644 doc/source/whatsnew/v0.13.1.rst create mode 100644 doc/source/whatsnew/v0.14.0.rst create mode 100644 doc/source/whatsnew/v0.14.1.rst create mode 100644 doc/source/whatsnew/v0.15.0.rst create mode 100644 doc/source/whatsnew/v0.15.1.rst create mode 100644 doc/source/whatsnew/v0.15.2.rst create mode 100644 doc/source/whatsnew/v0.16.0.rst create mode 100644 doc/source/whatsnew/v0.16.1.rst create mode 100644 doc/source/whatsnew/v0.16.2.rst create mode 100644 doc/source/whatsnew/v0.17.0.rst create mode 100644 doc/source/whatsnew/v0.17.1.rst create mode 100644 doc/source/whatsnew/v0.18.0.rst create mode 100644 doc/source/whatsnew/v0.18.1.rst create mode 100644 doc/source/whatsnew/v0.19.0.rst create mode 100644 doc/source/whatsnew/v0.19.1.rst create mode 100644 doc/source/whatsnew/v0.19.2.rst create mode 100644 doc/source/whatsnew/v0.20.0.rst create mode 100644 doc/source/whatsnew/v0.20.2.rst create mode 100644 doc/source/whatsnew/v0.20.3.rst create mode 100644 doc/source/whatsnew/v0.21.0.rst create mode 100644 doc/source/whatsnew/v0.21.1.rst create mode 100644 doc/source/whatsnew/v0.22.0.rst create mode 100644 doc/source/whatsnew/v0.23.0.rst create mode 100644 doc/source/whatsnew/v0.23.1.rst create mode 100644 doc/source/whatsnew/v0.23.2.rst create mode 100644 doc/source/whatsnew/v0.23.3.rst create mode 100644 doc/source/whatsnew/v0.23.4.rst create mode 100644 doc/source/whatsnew/v0.24.0.rst create mode 100644 doc/source/whatsnew/v0.24.1.rst create mode 100644 doc/source/whatsnew/v0.24.2.rst create mode 100644 doc/source/whatsnew/v0.25.0.rst create mode 100644 doc/source/whatsnew/v0.25.1.rst create mode 100644 doc/source/whatsnew/v0.25.2.rst create mode 100644 doc/source/whatsnew/v0.25.3.rst create mode 100644 doc/source/whatsnew/v0.4.x.rst create mode 100644 doc/source/whatsnew/v0.5.0.rst create mode 100644 doc/source/whatsnew/v0.6.0.rst create mode 100644 doc/source/whatsnew/v0.6.1.rst create mode 100644 doc/source/whatsnew/v0.7.0.rst create mode 100644 doc/source/whatsnew/v0.7.1.rst create mode 100644 doc/source/whatsnew/v0.7.2.rst create mode 100644 doc/source/whatsnew/v0.7.3.rst create mode 100644 doc/source/whatsnew/v0.8.0.rst create mode 100644 doc/source/whatsnew/v0.8.1.rst create mode 100644 doc/source/whatsnew/v0.9.0.rst create mode 100644 doc/source/whatsnew/v0.9.1.rst create mode 100755 doc/source/whatsnew/v1.0.0.rst create mode 100644 doc/source/whatsnew/v1.0.1.rst create mode 100644 doc/source/whatsnew/v1.0.2.rst create mode 100644 doc/source/whatsnew/v1.0.3.rst create mode 100644 doc/source/whatsnew/v1.0.4.rst create mode 100644 doc/source/whatsnew/v1.0.5.rst create mode 100644 doc/source/whatsnew/v1.1.0.rst create mode 100644 doc/source/whatsnew/v1.1.1.rst create mode 100644 doc/source/whatsnew/v1.1.2.rst create mode 100644 doc/source/whatsnew/v1.1.3.rst create mode 100644 doc/source/whatsnew/v1.1.4.rst create mode 100644 doc/source/whatsnew/v1.1.5.rst create mode 100644 doc/source/whatsnew/v1.2.0.rst create mode 100644 doc/source/whatsnew/v1.2.1.rst create mode 100644 doc/source/whatsnew/v1.2.2.rst create mode 100644 doc/source/whatsnew/v1.2.3.rst create mode 100644 doc/source/whatsnew/v1.2.4.rst create mode 100644 doc/source/whatsnew/v1.2.5.rst create mode 100644 doc/source/whatsnew/v1.3.0.rst create mode 100644 doc/source/whatsnew/v1.3.1.rst create mode 100644 doc/source/whatsnew/v1.3.2.rst create mode 100644 doc/source/whatsnew/v1.3.3.rst create mode 100644 doc/source/whatsnew/v1.3.4.rst create mode 100644 doc/source/whatsnew/v1.3.5.rst create mode 100644 doc/source/whatsnew/v1.4.0.rst create mode 100644 doc/source/whatsnew/v1.4.1.rst create mode 100644 doc/source/whatsnew/v1.4.2.rst create mode 100644 doc/source/whatsnew/v1.4.3.rst create mode 100644 doc/source/whatsnew/v1.4.4.rst create mode 100644 doc/source/whatsnew/v1.5.0.rst create mode 100644 doc/source/whatsnew/v1.5.1.rst create mode 100644 doc/source/whatsnew/v1.5.2.rst create mode 100644 doc/source/whatsnew/v1.5.3.rst create mode 100644 doc/source/whatsnew/whatsnew_0171_html_table.html create mode 100644 doc/sphinxext/README.rst create mode 100755 doc/sphinxext/announce.py create mode 100644 doc/sphinxext/contributors.py create mode 100644 environment.yml create mode 100644 pandas/__init__.py create mode 100644 pandas/_config/__init__.py create mode 100644 pandas/_config/config.py create mode 100644 pandas/_config/dates.py create mode 100644 pandas/_config/display.py create mode 100644 pandas/_config/localization.py create mode 100644 pandas/_libs/__init__.py create mode 100644 pandas/_libs/algos.pxd create mode 100644 pandas/_libs/algos.pyi create mode 100644 pandas/_libs/algos.pyx create mode 100644 pandas/_libs/algos_common_helper.pxi.in create mode 100644 pandas/_libs/algos_take_helper.pxi.in create mode 100644 pandas/_libs/arrays.pxd create mode 100644 pandas/_libs/arrays.pyi create mode 100644 pandas/_libs/arrays.pyx create mode 100644 pandas/_libs/dtypes.pxd create mode 100644 pandas/_libs/groupby.pyi create mode 100644 pandas/_libs/groupby.pyx create mode 100644 pandas/_libs/hashing.pyi create mode 100644 pandas/_libs/hashing.pyx create mode 100644 pandas/_libs/hashtable.pxd create mode 100644 pandas/_libs/hashtable.pyi create mode 100644 pandas/_libs/hashtable.pyx create mode 100644 pandas/_libs/hashtable_class_helper.pxi.in create mode 100644 pandas/_libs/hashtable_func_helper.pxi.in create mode 100644 pandas/_libs/index.pyi create mode 100644 pandas/_libs/index.pyx create mode 100644 pandas/_libs/index_class_helper.pxi.in create mode 100644 pandas/_libs/indexing.pyi create mode 100644 pandas/_libs/indexing.pyx create mode 100644 pandas/_libs/internals.pyi create mode 100644 pandas/_libs/internals.pyx create mode 100644 pandas/_libs/interval.pyi create mode 100644 pandas/_libs/interval.pyx create mode 100644 pandas/_libs/intervaltree.pxi.in create mode 100644 pandas/_libs/join.pyi create mode 100644 pandas/_libs/join.pyx create mode 100644 pandas/_libs/json.pyi create mode 100644 pandas/_libs/khash.pxd create mode 100644 pandas/_libs/khash_for_primitive_helper.pxi.in create mode 100644 pandas/_libs/lib.pxd create mode 100644 pandas/_libs/lib.pyi create mode 100644 pandas/_libs/lib.pyx create mode 100644 pandas/_libs/missing.pxd create mode 100644 pandas/_libs/missing.pyi create mode 100644 pandas/_libs/missing.pyx create mode 100644 pandas/_libs/ops.pyi create mode 100644 pandas/_libs/ops.pyx create mode 100644 pandas/_libs/ops_dispatch.pyi create mode 100644 pandas/_libs/ops_dispatch.pyx create mode 100644 pandas/_libs/parsers.pyi create mode 100644 pandas/_libs/parsers.pyx create mode 100644 pandas/_libs/properties.pyi create mode 100644 pandas/_libs/properties.pyx create mode 100644 pandas/_libs/reduction.pyi create mode 100644 pandas/_libs/reduction.pyx create mode 100644 pandas/_libs/reshape.pyi create mode 100644 pandas/_libs/reshape.pyx create mode 100644 pandas/_libs/sparse.pyi create mode 100644 pandas/_libs/sparse.pyx create mode 100644 pandas/_libs/sparse_op_helper.pxi.in create mode 100644 pandas/_libs/src/headers/portable.h create mode 100644 pandas/_libs/src/inline_helper.h create mode 100644 pandas/_libs/src/klib/khash.h create mode 100644 pandas/_libs/src/klib/khash_python.h create mode 100644 pandas/_libs/src/parse_helper.h create mode 100644 pandas/_libs/src/parser/io.c create mode 100644 pandas/_libs/src/parser/io.h create mode 100644 pandas/_libs/src/parser/tokenizer.c create mode 100644 pandas/_libs/src/parser/tokenizer.h create mode 100644 pandas/_libs/src/skiplist.h create mode 100644 pandas/_libs/src/ujson/lib/ultrajson.h create mode 100644 pandas/_libs/src/ujson/lib/ultrajsondec.c create mode 100644 pandas/_libs/src/ujson/lib/ultrajsonenc.c create mode 100644 pandas/_libs/src/ujson/python/JSONtoObj.c create mode 100644 pandas/_libs/src/ujson/python/date_conversions.c create mode 100644 pandas/_libs/src/ujson/python/date_conversions.h create mode 100644 pandas/_libs/src/ujson/python/objToJSON.c create mode 100644 pandas/_libs/src/ujson/python/ujson.c create mode 100644 pandas/_libs/src/ujson/python/version.h create mode 100644 pandas/_libs/testing.pyi create mode 100644 pandas/_libs/testing.pyx create mode 100644 pandas/_libs/tslib.pyi create mode 100644 pandas/_libs/tslib.pyx create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/base.pxd create mode 100644 pandas/_libs/tslibs/base.pyx create mode 100644 pandas/_libs/tslibs/ccalendar.pxd create mode 100644 pandas/_libs/tslibs/ccalendar.pyi create mode 100644 pandas/_libs/tslibs/ccalendar.pyx create mode 100644 pandas/_libs/tslibs/conversion.pxd create mode 100644 pandas/_libs/tslibs/conversion.pyi create mode 100644 pandas/_libs/tslibs/conversion.pyx create mode 100644 pandas/_libs/tslibs/dtypes.pxd create mode 100644 pandas/_libs/tslibs/dtypes.pyi create mode 100644 pandas/_libs/tslibs/dtypes.pyx create mode 100644 pandas/_libs/tslibs/fields.pyi create mode 100644 pandas/_libs/tslibs/fields.pyx create mode 100644 pandas/_libs/tslibs/nattype.pxd create mode 100644 pandas/_libs/tslibs/nattype.pyi create mode 100644 pandas/_libs/tslibs/nattype.pyx create mode 100644 pandas/_libs/tslibs/np_datetime.pxd create mode 100644 pandas/_libs/tslibs/np_datetime.pyi create mode 100644 pandas/_libs/tslibs/np_datetime.pyx create mode 100644 pandas/_libs/tslibs/offsets.pxd create mode 100644 pandas/_libs/tslibs/offsets.pyi create mode 100644 pandas/_libs/tslibs/offsets.pyx create mode 100644 pandas/_libs/tslibs/parsing.pxd create mode 100644 pandas/_libs/tslibs/parsing.pyi create mode 100644 pandas/_libs/tslibs/parsing.pyx create mode 100644 pandas/_libs/tslibs/period.pxd create mode 100644 pandas/_libs/tslibs/period.pyi create mode 100644 pandas/_libs/tslibs/period.pyx create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.h create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.h create mode 100644 pandas/_libs/tslibs/strptime.pyi create mode 100644 pandas/_libs/tslibs/strptime.pyx create mode 100644 pandas/_libs/tslibs/timedeltas.pxd create mode 100644 pandas/_libs/tslibs/timedeltas.pyi create mode 100644 pandas/_libs/tslibs/timedeltas.pyx create mode 100644 pandas/_libs/tslibs/timestamps.pxd create mode 100644 pandas/_libs/tslibs/timestamps.pyi create mode 100644 pandas/_libs/tslibs/timestamps.pyx create mode 100644 pandas/_libs/tslibs/timezones.pxd create mode 100644 pandas/_libs/tslibs/timezones.pyi create mode 100644 pandas/_libs/tslibs/timezones.pyx create mode 100644 pandas/_libs/tslibs/tzconversion.pxd create mode 100644 pandas/_libs/tslibs/tzconversion.pyi create mode 100644 pandas/_libs/tslibs/tzconversion.pyx create mode 100644 pandas/_libs/tslibs/util.pxd create mode 100644 pandas/_libs/tslibs/vectorized.pyi create mode 100644 pandas/_libs/tslibs/vectorized.pyx create mode 100644 pandas/_libs/util.pxd create mode 100644 pandas/_libs/window/__init__.py create mode 100644 pandas/_libs/window/aggregations.pyi create mode 100644 pandas/_libs/window/aggregations.pyx create mode 100644 pandas/_libs/window/indexers.pyi create mode 100644 pandas/_libs/window/indexers.pyx create mode 100644 pandas/_libs/writers.pyi create mode 100644 pandas/_libs/writers.pyx create mode 100644 pandas/_testing/__init__.py create mode 100644 pandas/_testing/_hypothesis.py create mode 100644 pandas/_testing/_io.py create mode 100644 pandas/_testing/_random.py create mode 100644 pandas/_testing/_warnings.py create mode 100644 pandas/_testing/asserters.py create mode 100644 pandas/_testing/compat.py create mode 100644 pandas/_testing/contexts.py create mode 100644 pandas/_typing.py create mode 100644 pandas/_version.py create mode 100644 pandas/api/__init__.py create mode 100644 pandas/api/extensions/__init__.py create mode 100644 pandas/api/indexers/__init__.py create mode 100644 pandas/api/interchange/__init__.py create mode 100644 pandas/api/types/__init__.py create mode 100644 pandas/arrays/__init__.py create mode 100644 pandas/compat/__init__.py create mode 100644 pandas/compat/_optional.py create mode 100644 pandas/compat/chainmap.py create mode 100644 pandas/compat/numpy/__init__.py create mode 100644 pandas/compat/numpy/function.py create mode 100644 pandas/compat/pickle_compat.py create mode 100644 pandas/compat/pyarrow.py create mode 100644 pandas/conftest.py create mode 100644 pandas/core/__init__.py create mode 100644 pandas/core/_numba/__init__.py create mode 100644 pandas/core/_numba/executor.py create mode 100644 pandas/core/_numba/kernels/__init__.py create mode 100644 pandas/core/_numba/kernels/mean_.py create mode 100644 pandas/core/_numba/kernels/min_max_.py create mode 100644 pandas/core/_numba/kernels/shared.py create mode 100644 pandas/core/_numba/kernels/sum_.py create mode 100644 pandas/core/_numba/kernels/var_.py create mode 100644 pandas/core/accessor.py create mode 100644 pandas/core/algorithms.py create mode 100644 pandas/core/api.py create mode 100644 pandas/core/apply.py create mode 100644 pandas/core/array_algos/__init__.py create mode 100644 pandas/core/array_algos/masked_reductions.py create mode 100644 pandas/core/array_algos/putmask.py create mode 100644 pandas/core/array_algos/quantile.py create mode 100644 pandas/core/array_algos/replace.py create mode 100644 pandas/core/array_algos/take.py create mode 100644 pandas/core/array_algos/transforms.py create mode 100644 pandas/core/arraylike.py create mode 100644 pandas/core/arrays/__init__.py create mode 100644 pandas/core/arrays/_mixins.py create mode 100644 pandas/core/arrays/_ranges.py create mode 100644 pandas/core/arrays/arrow/__init__.py create mode 100644 pandas/core/arrays/arrow/_arrow_utils.py create mode 100644 pandas/core/arrays/arrow/array.py create mode 100644 pandas/core/arrays/arrow/dtype.py create mode 100644 pandas/core/arrays/arrow/extension_types.py create mode 100644 pandas/core/arrays/base.py create mode 100644 pandas/core/arrays/boolean.py create mode 100644 pandas/core/arrays/categorical.py create mode 100644 pandas/core/arrays/datetimelike.py create mode 100644 pandas/core/arrays/datetimes.py create mode 100644 pandas/core/arrays/floating.py create mode 100644 pandas/core/arrays/integer.py create mode 100644 pandas/core/arrays/interval.py create mode 100644 pandas/core/arrays/masked.py create mode 100644 pandas/core/arrays/numeric.py create mode 100644 pandas/core/arrays/numpy_.py create mode 100644 pandas/core/arrays/period.py create mode 100644 pandas/core/arrays/sparse/__init__.py create mode 100644 pandas/core/arrays/sparse/accessor.py create mode 100644 pandas/core/arrays/sparse/array.py create mode 100644 pandas/core/arrays/sparse/dtype.py create mode 100644 pandas/core/arrays/sparse/scipy_sparse.py create mode 100644 pandas/core/arrays/string_.py create mode 100644 pandas/core/arrays/string_arrow.py create mode 100644 pandas/core/arrays/timedeltas.py create mode 100644 pandas/core/base.py create mode 100644 pandas/core/common.py create mode 100644 pandas/core/computation/__init__.py create mode 100644 pandas/core/computation/align.py create mode 100644 pandas/core/computation/api.py create mode 100644 pandas/core/computation/check.py create mode 100644 pandas/core/computation/common.py create mode 100644 pandas/core/computation/engines.py create mode 100644 pandas/core/computation/eval.py create mode 100644 pandas/core/computation/expr.py create mode 100644 pandas/core/computation/expressions.py create mode 100644 pandas/core/computation/ops.py create mode 100644 pandas/core/computation/parsing.py create mode 100644 pandas/core/computation/pytables.py create mode 100644 pandas/core/computation/scope.py create mode 100644 pandas/core/config_init.py create mode 100644 pandas/core/construction.py create mode 100644 pandas/core/describe.py create mode 100644 pandas/core/dtypes/__init__.py create mode 100644 pandas/core/dtypes/api.py create mode 100644 pandas/core/dtypes/astype.py create mode 100644 pandas/core/dtypes/base.py create mode 100644 pandas/core/dtypes/cast.py create mode 100644 pandas/core/dtypes/common.py create mode 100644 pandas/core/dtypes/concat.py create mode 100644 pandas/core/dtypes/dtypes.py create mode 100644 pandas/core/dtypes/generic.py create mode 100644 pandas/core/dtypes/inference.py create mode 100644 pandas/core/dtypes/missing.py create mode 100644 pandas/core/flags.py create mode 100644 pandas/core/frame.py create mode 100644 pandas/core/generic.py create mode 100644 pandas/core/groupby/__init__.py create mode 100644 pandas/core/groupby/base.py create mode 100644 pandas/core/groupby/categorical.py create mode 100644 pandas/core/groupby/generic.py create mode 100644 pandas/core/groupby/groupby.py create mode 100644 pandas/core/groupby/grouper.py create mode 100644 pandas/core/groupby/indexing.py create mode 100644 pandas/core/groupby/numba_.py create mode 100644 pandas/core/groupby/ops.py create mode 100644 pandas/core/index.py create mode 100644 pandas/core/indexers/__init__.py create mode 100644 pandas/core/indexers/objects.py create mode 100644 pandas/core/indexers/utils.py create mode 100644 pandas/core/indexes/__init__.py create mode 100644 pandas/core/indexes/accessors.py create mode 100644 pandas/core/indexes/api.py create mode 100644 pandas/core/indexes/base.py create mode 100644 pandas/core/indexes/category.py create mode 100644 pandas/core/indexes/datetimelike.py create mode 100644 pandas/core/indexes/datetimes.py create mode 100644 pandas/core/indexes/extension.py create mode 100644 pandas/core/indexes/frozen.py create mode 100644 pandas/core/indexes/interval.py create mode 100644 pandas/core/indexes/multi.py create mode 100644 pandas/core/indexes/numeric.py create mode 100644 pandas/core/indexes/period.py create mode 100644 pandas/core/indexes/range.py create mode 100644 pandas/core/indexes/timedeltas.py create mode 100644 pandas/core/indexing.py create mode 100644 pandas/core/interchange/__init__.py create mode 100644 pandas/core/interchange/buffer.py create mode 100644 pandas/core/interchange/column.py create mode 100644 pandas/core/interchange/dataframe.py create mode 100644 pandas/core/interchange/dataframe_protocol.py create mode 100644 pandas/core/interchange/from_dataframe.py create mode 100644 pandas/core/interchange/utils.py create mode 100644 pandas/core/internals/__init__.py create mode 100644 pandas/core/internals/api.py create mode 100644 pandas/core/internals/array_manager.py create mode 100644 pandas/core/internals/base.py create mode 100644 pandas/core/internals/blocks.py create mode 100644 pandas/core/internals/concat.py create mode 100644 pandas/core/internals/construction.py create mode 100644 pandas/core/internals/managers.py create mode 100644 pandas/core/internals/ops.py create mode 100644 pandas/core/missing.py create mode 100644 pandas/core/nanops.py create mode 100644 pandas/core/ops/__init__.py create mode 100644 pandas/core/ops/array_ops.py create mode 100644 pandas/core/ops/common.py create mode 100644 pandas/core/ops/dispatch.py create mode 100644 pandas/core/ops/docstrings.py create mode 100644 pandas/core/ops/invalid.py create mode 100644 pandas/core/ops/mask_ops.py create mode 100644 pandas/core/ops/methods.py create mode 100644 pandas/core/ops/missing.py create mode 100644 pandas/core/resample.py create mode 100644 pandas/core/reshape/__init__.py create mode 100644 pandas/core/reshape/api.py create mode 100644 pandas/core/reshape/concat.py create mode 100644 pandas/core/reshape/encoding.py create mode 100644 pandas/core/reshape/melt.py create mode 100644 pandas/core/reshape/merge.py create mode 100644 pandas/core/reshape/pivot.py create mode 100644 pandas/core/reshape/reshape.py create mode 100644 pandas/core/reshape/tile.py create mode 100644 pandas/core/reshape/util.py create mode 100644 pandas/core/roperator.py create mode 100644 pandas/core/sample.py create mode 100644 pandas/core/series.py create mode 100644 pandas/core/shared_docs.py create mode 100644 pandas/core/sorting.py create mode 100644 pandas/core/sparse/__init__.py create mode 100644 pandas/core/sparse/api.py create mode 100644 pandas/core/strings/__init__.py create mode 100644 pandas/core/strings/accessor.py create mode 100644 pandas/core/strings/base.py create mode 100644 pandas/core/strings/object_array.py create mode 100644 pandas/core/tools/__init__.py create mode 100644 pandas/core/tools/datetimes.py create mode 100644 pandas/core/tools/numeric.py create mode 100644 pandas/core/tools/timedeltas.py create mode 100644 pandas/core/tools/times.py create mode 100644 pandas/core/util/__init__.py create mode 100644 pandas/core/util/hashing.py create mode 100644 pandas/core/util/numba_.py create mode 100644 pandas/core/window/__init__.py create mode 100644 pandas/core/window/common.py create mode 100644 pandas/core/window/doc.py create mode 100644 pandas/core/window/ewm.py create mode 100644 pandas/core/window/expanding.py create mode 100644 pandas/core/window/numba_.py create mode 100644 pandas/core/window/online.py create mode 100644 pandas/core/window/rolling.py create mode 100644 pandas/errors/__init__.py create mode 100644 pandas/io/__init__.py create mode 100644 pandas/io/api.py create mode 100644 pandas/io/clipboard/__init__.py create mode 100644 pandas/io/clipboards.py create mode 100644 pandas/io/common.py create mode 100644 pandas/io/date_converters.py create mode 100644 pandas/io/excel/__init__.py create mode 100644 pandas/io/excel/_base.py create mode 100644 pandas/io/excel/_odfreader.py create mode 100644 pandas/io/excel/_odswriter.py create mode 100644 pandas/io/excel/_openpyxl.py create mode 100644 pandas/io/excel/_pyxlsb.py create mode 100644 pandas/io/excel/_util.py create mode 100644 pandas/io/excel/_xlrd.py create mode 100644 pandas/io/excel/_xlsxwriter.py create mode 100644 pandas/io/excel/_xlwt.py create mode 100644 pandas/io/feather_format.py create mode 100644 pandas/io/formats/__init__.py create mode 100644 pandas/io/formats/_color_data.py create mode 100644 pandas/io/formats/console.py create mode 100644 pandas/io/formats/css.py create mode 100644 pandas/io/formats/csvs.py create mode 100644 pandas/io/formats/excel.py create mode 100644 pandas/io/formats/format.py create mode 100644 pandas/io/formats/html.py create mode 100644 pandas/io/formats/info.py create mode 100644 pandas/io/formats/latex.py create mode 100644 pandas/io/formats/printing.py create mode 100644 pandas/io/formats/string.py create mode 100644 pandas/io/formats/style.py create mode 100644 pandas/io/formats/style_render.py create mode 100644 pandas/io/formats/templates/html.tpl create mode 100644 pandas/io/formats/templates/html_style.tpl create mode 100644 pandas/io/formats/templates/html_table.tpl create mode 100644 pandas/io/formats/templates/latex.tpl create mode 100644 pandas/io/formats/templates/latex_longtable.tpl create mode 100644 pandas/io/formats/templates/latex_table.tpl create mode 100644 pandas/io/formats/templates/string.tpl create mode 100644 pandas/io/formats/xml.py create mode 100644 pandas/io/gbq.py create mode 100644 pandas/io/html.py create mode 100644 pandas/io/json/__init__.py create mode 100644 pandas/io/json/_json.py create mode 100644 pandas/io/json/_normalize.py create mode 100644 pandas/io/json/_table_schema.py create mode 100644 pandas/io/orc.py create mode 100644 pandas/io/parquet.py create mode 100644 pandas/io/parsers/__init__.py create mode 100644 pandas/io/parsers/arrow_parser_wrapper.py create mode 100644 pandas/io/parsers/base_parser.py create mode 100644 pandas/io/parsers/c_parser_wrapper.py create mode 100644 pandas/io/parsers/python_parser.py create mode 100644 pandas/io/parsers/readers.py create mode 100644 pandas/io/pickle.py create mode 100644 pandas/io/pytables.py create mode 100644 pandas/io/sas/__init__.py create mode 100644 pandas/io/sas/_sas.pyi create mode 100644 pandas/io/sas/sas.pyx create mode 100644 pandas/io/sas/sas7bdat.py create mode 100644 pandas/io/sas/sas_constants.py create mode 100644 pandas/io/sas/sas_xport.py create mode 100644 pandas/io/sas/sasreader.py create mode 100644 pandas/io/spss.py create mode 100644 pandas/io/sql.py create mode 100644 pandas/io/stata.py create mode 100644 pandas/io/xml.py create mode 100644 pandas/plotting/__init__.py create mode 100644 pandas/plotting/_core.py create mode 100644 pandas/plotting/_matplotlib/__init__.py create mode 100644 pandas/plotting/_matplotlib/boxplot.py create mode 100644 pandas/plotting/_matplotlib/compat.py create mode 100644 pandas/plotting/_matplotlib/converter.py create mode 100644 pandas/plotting/_matplotlib/core.py create mode 100644 pandas/plotting/_matplotlib/groupby.py create mode 100644 pandas/plotting/_matplotlib/hist.py create mode 100644 pandas/plotting/_matplotlib/misc.py create mode 100644 pandas/plotting/_matplotlib/style.py create mode 100644 pandas/plotting/_matplotlib/timeseries.py create mode 100644 pandas/plotting/_matplotlib/tools.py create mode 100644 pandas/plotting/_misc.py create mode 100644 pandas/testing.py create mode 100644 pandas/tests/__init__.py create mode 100644 pandas/tests/api/__init__.py create mode 100644 pandas/tests/api/test_api.py create mode 100644 pandas/tests/api/test_types.py create mode 100644 pandas/tests/apply/__init__.py create mode 100644 pandas/tests/apply/common.py create mode 100644 pandas/tests/apply/conftest.py create mode 100644 pandas/tests/apply/test_frame_apply.py create mode 100644 pandas/tests/apply/test_frame_apply_relabeling.py create mode 100644 pandas/tests/apply/test_frame_transform.py create mode 100644 pandas/tests/apply/test_invalid_arg.py create mode 100644 pandas/tests/apply/test_series_apply.py create mode 100644 pandas/tests/apply/test_series_apply_relabeling.py create mode 100644 pandas/tests/apply/test_series_transform.py create mode 100644 pandas/tests/apply/test_str.py create mode 100644 pandas/tests/arithmetic/__init__.py create mode 100644 pandas/tests/arithmetic/common.py create mode 100644 pandas/tests/arithmetic/conftest.py create mode 100644 pandas/tests/arithmetic/test_array_ops.py create mode 100644 pandas/tests/arithmetic/test_categorical.py create mode 100644 pandas/tests/arithmetic/test_datetime64.py create mode 100644 pandas/tests/arithmetic/test_interval.py create mode 100644 pandas/tests/arithmetic/test_numeric.py create mode 100644 pandas/tests/arithmetic/test_object.py create mode 100644 pandas/tests/arithmetic/test_period.py create mode 100644 pandas/tests/arithmetic/test_timedelta64.py create mode 100644 pandas/tests/arrays/__init__.py create mode 100644 pandas/tests/arrays/boolean/__init__.py create mode 100644 pandas/tests/arrays/boolean/test_arithmetic.py create mode 100644 pandas/tests/arrays/boolean/test_astype.py create mode 100644 pandas/tests/arrays/boolean/test_comparison.py create mode 100644 pandas/tests/arrays/boolean/test_construction.py create mode 100644 pandas/tests/arrays/boolean/test_function.py create mode 100644 pandas/tests/arrays/boolean/test_indexing.py create mode 100644 pandas/tests/arrays/boolean/test_logical.py create mode 100644 pandas/tests/arrays/boolean/test_ops.py create mode 100644 pandas/tests/arrays/boolean/test_reduction.py create mode 100644 pandas/tests/arrays/boolean/test_repr.py create mode 100644 pandas/tests/arrays/categorical/__init__.py create mode 100644 pandas/tests/arrays/categorical/conftest.py create mode 100644 pandas/tests/arrays/categorical/test_algos.py create mode 100644 pandas/tests/arrays/categorical/test_analytics.py create mode 100644 pandas/tests/arrays/categorical/test_api.py create mode 100644 pandas/tests/arrays/categorical/test_astype.py create mode 100644 pandas/tests/arrays/categorical/test_constructors.py create mode 100644 pandas/tests/arrays/categorical/test_dtypes.py create mode 100644 pandas/tests/arrays/categorical/test_indexing.py create mode 100644 pandas/tests/arrays/categorical/test_missing.py create mode 100644 pandas/tests/arrays/categorical/test_operators.py create mode 100644 pandas/tests/arrays/categorical/test_replace.py create mode 100644 pandas/tests/arrays/categorical/test_repr.py create mode 100644 pandas/tests/arrays/categorical/test_sorting.py create mode 100644 pandas/tests/arrays/categorical/test_subclass.py create mode 100644 pandas/tests/arrays/categorical/test_take.py create mode 100644 pandas/tests/arrays/categorical/test_warnings.py create mode 100644 pandas/tests/arrays/datetimes/__init__.py create mode 100644 pandas/tests/arrays/datetimes/test_constructors.py create mode 100644 pandas/tests/arrays/datetimes/test_reductions.py create mode 100644 pandas/tests/arrays/floating/__init__.py create mode 100644 pandas/tests/arrays/floating/conftest.py create mode 100644 pandas/tests/arrays/floating/test_arithmetic.py create mode 100644 pandas/tests/arrays/floating/test_astype.py create mode 100644 pandas/tests/arrays/floating/test_comparison.py create mode 100644 pandas/tests/arrays/floating/test_concat.py create mode 100644 pandas/tests/arrays/floating/test_construction.py create mode 100644 pandas/tests/arrays/floating/test_function.py create mode 100644 pandas/tests/arrays/floating/test_repr.py create mode 100644 pandas/tests/arrays/floating/test_to_numpy.py create mode 100644 pandas/tests/arrays/integer/__init__.py create mode 100644 pandas/tests/arrays/integer/conftest.py create mode 100644 pandas/tests/arrays/integer/test_arithmetic.py create mode 100644 pandas/tests/arrays/integer/test_comparison.py create mode 100644 pandas/tests/arrays/integer/test_concat.py create mode 100644 pandas/tests/arrays/integer/test_construction.py create mode 100644 pandas/tests/arrays/integer/test_dtypes.py create mode 100644 pandas/tests/arrays/integer/test_function.py create mode 100644 pandas/tests/arrays/integer/test_indexing.py create mode 100644 pandas/tests/arrays/integer/test_repr.py create mode 100644 pandas/tests/arrays/interval/__init__.py create mode 100644 pandas/tests/arrays/interval/test_astype.py create mode 100644 pandas/tests/arrays/interval/test_interval.py create mode 100644 pandas/tests/arrays/interval/test_ops.py create mode 100644 pandas/tests/arrays/masked/__init__.py create mode 100644 pandas/tests/arrays/masked/test_arithmetic.py create mode 100644 pandas/tests/arrays/masked/test_arrow_compat.py create mode 100644 pandas/tests/arrays/masked/test_function.py create mode 100644 pandas/tests/arrays/masked/test_indexing.py create mode 100644 pandas/tests/arrays/masked_shared.py create mode 100644 pandas/tests/arrays/numpy_/__init__.py create mode 100644 pandas/tests/arrays/numpy_/test_indexing.py create mode 100644 pandas/tests/arrays/numpy_/test_numpy.py create mode 100644 pandas/tests/arrays/period/__init__.py create mode 100644 pandas/tests/arrays/period/test_arrow_compat.py create mode 100644 pandas/tests/arrays/period/test_astype.py create mode 100644 pandas/tests/arrays/period/test_constructors.py create mode 100644 pandas/tests/arrays/period/test_reductions.py create mode 100644 pandas/tests/arrays/sparse/__init__.py create mode 100644 pandas/tests/arrays/sparse/test_accessor.py create mode 100644 pandas/tests/arrays/sparse/test_arithmetics.py create mode 100644 pandas/tests/arrays/sparse/test_array.py create mode 100644 pandas/tests/arrays/sparse/test_astype.py create mode 100644 pandas/tests/arrays/sparse/test_combine_concat.py create mode 100644 pandas/tests/arrays/sparse/test_constructors.py create mode 100644 pandas/tests/arrays/sparse/test_dtype.py create mode 100644 pandas/tests/arrays/sparse/test_indexing.py create mode 100644 pandas/tests/arrays/sparse/test_libsparse.py create mode 100644 pandas/tests/arrays/sparse/test_reductions.py create mode 100644 pandas/tests/arrays/sparse/test_unary.py create mode 100644 pandas/tests/arrays/string_/__init__.py create mode 100644 pandas/tests/arrays/string_/test_string.py create mode 100644 pandas/tests/arrays/string_/test_string_arrow.py create mode 100644 pandas/tests/arrays/test_array.py create mode 100644 pandas/tests/arrays/test_datetimelike.py create mode 100644 pandas/tests/arrays/test_datetimes.py create mode 100644 pandas/tests/arrays/test_ndarray_backed.py create mode 100644 pandas/tests/arrays/test_period.py create mode 100644 pandas/tests/arrays/test_timedeltas.py create mode 100644 pandas/tests/arrays/timedeltas/__init__.py create mode 100644 pandas/tests/arrays/timedeltas/test_constructors.py create mode 100644 pandas/tests/arrays/timedeltas/test_reductions.py create mode 100644 pandas/tests/base/__init__.py create mode 100644 pandas/tests/base/common.py create mode 100644 pandas/tests/base/test_constructors.py create mode 100644 pandas/tests/base/test_conversion.py create mode 100644 pandas/tests/base/test_fillna.py create mode 100644 pandas/tests/base/test_misc.py create mode 100644 pandas/tests/base/test_transpose.py create mode 100644 pandas/tests/base/test_unique.py create mode 100644 pandas/tests/base/test_value_counts.py create mode 100644 pandas/tests/computation/__init__.py create mode 100644 pandas/tests/computation/test_compat.py create mode 100644 pandas/tests/computation/test_eval.py create mode 100644 pandas/tests/config/__init__.py create mode 100644 pandas/tests/config/test_config.py create mode 100644 pandas/tests/config/test_localization.py create mode 100644 pandas/tests/construction/__init__.py create mode 100644 pandas/tests/construction/test_extract_array.py create mode 100644 pandas/tests/copy_view/__init__.py create mode 100644 pandas/tests/copy_view/test_indexing.py create mode 100644 pandas/tests/copy_view/test_internals.py create mode 100644 pandas/tests/copy_view/test_methods.py create mode 100644 pandas/tests/copy_view/test_setitem.py create mode 100644 pandas/tests/copy_view/util.py create mode 100644 pandas/tests/dtypes/__init__.py create mode 100644 pandas/tests/dtypes/cast/__init__.py create mode 100644 pandas/tests/dtypes/cast/test_can_hold_element.py create mode 100644 pandas/tests/dtypes/cast/test_construct_from_scalar.py create mode 100644 pandas/tests/dtypes/cast/test_construct_ndarray.py create mode 100644 pandas/tests/dtypes/cast/test_construct_object_arr.py create mode 100644 pandas/tests/dtypes/cast/test_dict_compat.py create mode 100644 pandas/tests/dtypes/cast/test_downcast.py create mode 100644 pandas/tests/dtypes/cast/test_find_common_type.py create mode 100644 pandas/tests/dtypes/cast/test_infer_datetimelike.py create mode 100644 pandas/tests/dtypes/cast/test_infer_dtype.py create mode 100644 pandas/tests/dtypes/cast/test_maybe_box_native.py create mode 100644 pandas/tests/dtypes/cast/test_promote.py create mode 100644 pandas/tests/dtypes/test_common.py create mode 100644 pandas/tests/dtypes/test_concat.py create mode 100644 pandas/tests/dtypes/test_dtypes.py create mode 100644 pandas/tests/dtypes/test_generic.py create mode 100644 pandas/tests/dtypes/test_inference.py create mode 100644 pandas/tests/dtypes/test_missing.py create mode 100644 pandas/tests/extension/__init__.py create mode 100644 pandas/tests/extension/array_with_attr/__init__.py create mode 100644 pandas/tests/extension/array_with_attr/array.py create mode 100644 pandas/tests/extension/array_with_attr/test_array_with_attr.py create mode 100644 pandas/tests/extension/arrow/__init__.py create mode 100644 pandas/tests/extension/arrow/arrays.py create mode 100644 pandas/tests/extension/arrow/test_bool.py create mode 100644 pandas/tests/extension/arrow/test_string.py create mode 100644 pandas/tests/extension/arrow/test_timestamp.py create mode 100644 pandas/tests/extension/base/__init__.py create mode 100644 pandas/tests/extension/base/base.py create mode 100644 pandas/tests/extension/base/casting.py create mode 100644 pandas/tests/extension/base/constructors.py create mode 100644 pandas/tests/extension/base/dim2.py create mode 100644 pandas/tests/extension/base/dtype.py create mode 100644 pandas/tests/extension/base/getitem.py create mode 100644 pandas/tests/extension/base/groupby.py create mode 100644 pandas/tests/extension/base/index.py create mode 100644 pandas/tests/extension/base/interface.py create mode 100644 pandas/tests/extension/base/io.py create mode 100644 pandas/tests/extension/base/methods.py create mode 100644 pandas/tests/extension/base/missing.py create mode 100644 pandas/tests/extension/base/ops.py create mode 100644 pandas/tests/extension/base/printing.py create mode 100644 pandas/tests/extension/base/reduce.py create mode 100644 pandas/tests/extension/base/reshaping.py create mode 100644 pandas/tests/extension/base/setitem.py create mode 100644 pandas/tests/extension/conftest.py create mode 100644 pandas/tests/extension/date/__init__.py create mode 100644 pandas/tests/extension/date/array.py create mode 100644 pandas/tests/extension/decimal/__init__.py create mode 100644 pandas/tests/extension/decimal/array.py create mode 100644 pandas/tests/extension/decimal/test_decimal.py create mode 100644 pandas/tests/extension/json/__init__.py create mode 100644 pandas/tests/extension/json/array.py create mode 100644 pandas/tests/extension/json/test_json.py create mode 100644 pandas/tests/extension/list/__init__.py create mode 100644 pandas/tests/extension/list/array.py create mode 100644 pandas/tests/extension/list/test_list.py create mode 100644 pandas/tests/extension/test_arrow.py create mode 100644 pandas/tests/extension/test_boolean.py create mode 100644 pandas/tests/extension/test_categorical.py create mode 100644 pandas/tests/extension/test_common.py create mode 100644 pandas/tests/extension/test_datetime.py create mode 100644 pandas/tests/extension/test_extension.py create mode 100644 pandas/tests/extension/test_external_block.py create mode 100644 pandas/tests/extension/test_floating.py create mode 100644 pandas/tests/extension/test_integer.py create mode 100644 pandas/tests/extension/test_interval.py create mode 100644 pandas/tests/extension/test_numpy.py create mode 100644 pandas/tests/extension/test_period.py create mode 100644 pandas/tests/extension/test_sparse.py create mode 100644 pandas/tests/extension/test_string.py create mode 100644 pandas/tests/frame/__init__.py create mode 100644 pandas/tests/frame/common.py create mode 100644 pandas/tests/frame/conftest.py create mode 100644 pandas/tests/frame/constructors/__init__.py create mode 100644 pandas/tests/frame/constructors/test_from_dict.py create mode 100644 pandas/tests/frame/constructors/test_from_records.py create mode 100644 pandas/tests/frame/indexing/__init__.py create mode 100644 pandas/tests/frame/indexing/test_coercion.py create mode 100644 pandas/tests/frame/indexing/test_delitem.py create mode 100644 pandas/tests/frame/indexing/test_get.py create mode 100644 pandas/tests/frame/indexing/test_get_value.py create mode 100644 pandas/tests/frame/indexing/test_getitem.py create mode 100644 pandas/tests/frame/indexing/test_indexing.py create mode 100644 pandas/tests/frame/indexing/test_insert.py create mode 100644 pandas/tests/frame/indexing/test_lookup.py create mode 100644 pandas/tests/frame/indexing/test_mask.py create mode 100644 pandas/tests/frame/indexing/test_set_value.py create mode 100644 pandas/tests/frame/indexing/test_setitem.py create mode 100644 pandas/tests/frame/indexing/test_take.py create mode 100644 pandas/tests/frame/indexing/test_where.py create mode 100644 pandas/tests/frame/indexing/test_xs.py create mode 100644 pandas/tests/frame/methods/__init__.py create mode 100644 pandas/tests/frame/methods/test_add_prefix_suffix.py create mode 100644 pandas/tests/frame/methods/test_align.py create mode 100644 pandas/tests/frame/methods/test_append.py create mode 100644 pandas/tests/frame/methods/test_asfreq.py create mode 100644 pandas/tests/frame/methods/test_asof.py create mode 100644 pandas/tests/frame/methods/test_assign.py create mode 100644 pandas/tests/frame/methods/test_astype.py create mode 100644 pandas/tests/frame/methods/test_at_time.py create mode 100644 pandas/tests/frame/methods/test_between_time.py create mode 100644 pandas/tests/frame/methods/test_clip.py create mode 100644 pandas/tests/frame/methods/test_combine.py create mode 100644 pandas/tests/frame/methods/test_combine_first.py create mode 100644 pandas/tests/frame/methods/test_compare.py create mode 100644 pandas/tests/frame/methods/test_convert.py create mode 100644 pandas/tests/frame/methods/test_convert_dtypes.py create mode 100644 pandas/tests/frame/methods/test_copy.py create mode 100644 pandas/tests/frame/methods/test_count.py create mode 100644 pandas/tests/frame/methods/test_count_with_level_deprecated.py create mode 100644 pandas/tests/frame/methods/test_cov_corr.py create mode 100644 pandas/tests/frame/methods/test_describe.py create mode 100644 pandas/tests/frame/methods/test_diff.py create mode 100644 pandas/tests/frame/methods/test_dot.py create mode 100644 pandas/tests/frame/methods/test_drop.py create mode 100644 pandas/tests/frame/methods/test_drop_duplicates.py create mode 100644 pandas/tests/frame/methods/test_droplevel.py create mode 100644 pandas/tests/frame/methods/test_dropna.py create mode 100644 pandas/tests/frame/methods/test_dtypes.py create mode 100644 pandas/tests/frame/methods/test_duplicated.py create mode 100644 pandas/tests/frame/methods/test_equals.py create mode 100644 pandas/tests/frame/methods/test_explode.py create mode 100644 pandas/tests/frame/methods/test_fillna.py create mode 100644 pandas/tests/frame/methods/test_filter.py create mode 100644 pandas/tests/frame/methods/test_first_and_last.py create mode 100644 pandas/tests/frame/methods/test_first_valid_index.py create mode 100644 pandas/tests/frame/methods/test_get_numeric_data.py create mode 100644 pandas/tests/frame/methods/test_head_tail.py create mode 100644 pandas/tests/frame/methods/test_infer_objects.py create mode 100644 pandas/tests/frame/methods/test_interpolate.py create mode 100644 pandas/tests/frame/methods/test_is_homogeneous_dtype.py create mode 100644 pandas/tests/frame/methods/test_isin.py create mode 100644 pandas/tests/frame/methods/test_join.py create mode 100644 pandas/tests/frame/methods/test_matmul.py create mode 100644 pandas/tests/frame/methods/test_nlargest.py create mode 100644 pandas/tests/frame/methods/test_pct_change.py create mode 100644 pandas/tests/frame/methods/test_pipe.py create mode 100644 pandas/tests/frame/methods/test_pop.py create mode 100644 pandas/tests/frame/methods/test_quantile.py create mode 100644 pandas/tests/frame/methods/test_rank.py create mode 100644 pandas/tests/frame/methods/test_reindex.py create mode 100644 pandas/tests/frame/methods/test_reindex_like.py create mode 100644 pandas/tests/frame/methods/test_rename.py create mode 100644 pandas/tests/frame/methods/test_rename_axis.py create mode 100644 pandas/tests/frame/methods/test_reorder_levels.py create mode 100644 pandas/tests/frame/methods/test_replace.py create mode 100644 pandas/tests/frame/methods/test_reset_index.py create mode 100644 pandas/tests/frame/methods/test_round.py create mode 100644 pandas/tests/frame/methods/test_sample.py create mode 100644 pandas/tests/frame/methods/test_select_dtypes.py create mode 100644 pandas/tests/frame/methods/test_set_axis.py create mode 100644 pandas/tests/frame/methods/test_set_index.py create mode 100644 pandas/tests/frame/methods/test_shift.py create mode 100644 pandas/tests/frame/methods/test_sort_index.py create mode 100644 pandas/tests/frame/methods/test_sort_values.py create mode 100644 pandas/tests/frame/methods/test_swapaxes.py create mode 100644 pandas/tests/frame/methods/test_swaplevel.py create mode 100644 pandas/tests/frame/methods/test_to_csv.py create mode 100644 pandas/tests/frame/methods/test_to_dict.py create mode 100644 pandas/tests/frame/methods/test_to_dict_of_blocks.py create mode 100644 pandas/tests/frame/methods/test_to_numpy.py create mode 100644 pandas/tests/frame/methods/test_to_period.py create mode 100644 pandas/tests/frame/methods/test_to_records.py create mode 100644 pandas/tests/frame/methods/test_to_timestamp.py create mode 100644 pandas/tests/frame/methods/test_transpose.py create mode 100644 pandas/tests/frame/methods/test_truncate.py create mode 100644 pandas/tests/frame/methods/test_tz_convert.py create mode 100644 pandas/tests/frame/methods/test_tz_localize.py create mode 100644 pandas/tests/frame/methods/test_update.py create mode 100644 pandas/tests/frame/methods/test_value_counts.py create mode 100644 pandas/tests/frame/methods/test_values.py create mode 100644 pandas/tests/frame/test_alter_axes.py create mode 100644 pandas/tests/frame/test_api.py create mode 100644 pandas/tests/frame/test_arithmetic.py create mode 100644 pandas/tests/frame/test_block_internals.py create mode 100644 pandas/tests/frame/test_constructors.py create mode 100644 pandas/tests/frame/test_cumulative.py create mode 100644 pandas/tests/frame/test_iteration.py create mode 100644 pandas/tests/frame/test_logical_ops.py create mode 100644 pandas/tests/frame/test_nonunique_indexes.py create mode 100644 pandas/tests/frame/test_npfuncs.py create mode 100644 pandas/tests/frame/test_query_eval.py create mode 100644 pandas/tests/frame/test_reductions.py create mode 100644 pandas/tests/frame/test_repr_info.py create mode 100644 pandas/tests/frame/test_stack_unstack.py create mode 100644 pandas/tests/frame/test_subclass.py create mode 100644 pandas/tests/frame/test_ufunc.py create mode 100644 pandas/tests/frame/test_unary.py create mode 100644 pandas/tests/frame/test_validate.py create mode 100644 pandas/tests/generic/__init__.py create mode 100644 pandas/tests/generic/test_duplicate_labels.py create mode 100644 pandas/tests/generic/test_finalize.py create mode 100644 pandas/tests/generic/test_frame.py create mode 100644 pandas/tests/generic/test_generic.py create mode 100644 pandas/tests/generic/test_label_or_level_utils.py create mode 100644 pandas/tests/generic/test_series.py create mode 100644 pandas/tests/generic/test_to_xarray.py create mode 100644 pandas/tests/groupby/__init__.py create mode 100644 pandas/tests/groupby/aggregate/__init__.py create mode 100644 pandas/tests/groupby/aggregate/test_aggregate.py create mode 100644 pandas/tests/groupby/aggregate/test_cython.py create mode 100644 pandas/tests/groupby/aggregate/test_numba.py create mode 100644 pandas/tests/groupby/aggregate/test_other.py create mode 100644 pandas/tests/groupby/conftest.py create mode 100644 pandas/tests/groupby/test_allowlist.py create mode 100644 pandas/tests/groupby/test_any_all.py create mode 100644 pandas/tests/groupby/test_apply.py create mode 100644 pandas/tests/groupby/test_apply_mutate.py create mode 100644 pandas/tests/groupby/test_bin_groupby.py create mode 100644 pandas/tests/groupby/test_categorical.py create mode 100644 pandas/tests/groupby/test_counting.py create mode 100644 pandas/tests/groupby/test_filters.py create mode 100644 pandas/tests/groupby/test_frame_value_counts.py create mode 100644 pandas/tests/groupby/test_function.py create mode 100644 pandas/tests/groupby/test_groupby.py create mode 100644 pandas/tests/groupby/test_groupby_dropna.py create mode 100644 pandas/tests/groupby/test_groupby_shift_diff.py create mode 100644 pandas/tests/groupby/test_groupby_subclass.py create mode 100644 pandas/tests/groupby/test_grouping.py create mode 100644 pandas/tests/groupby/test_index_as_string.py create mode 100644 pandas/tests/groupby/test_indexing.py create mode 100644 pandas/tests/groupby/test_libgroupby.py create mode 100644 pandas/tests/groupby/test_min_max.py create mode 100644 pandas/tests/groupby/test_missing.py create mode 100644 pandas/tests/groupby/test_nth.py create mode 100644 pandas/tests/groupby/test_numba.py create mode 100644 pandas/tests/groupby/test_nunique.py create mode 100644 pandas/tests/groupby/test_pipe.py create mode 100644 pandas/tests/groupby/test_quantile.py create mode 100644 pandas/tests/groupby/test_rank.py create mode 100644 pandas/tests/groupby/test_sample.py create mode 100644 pandas/tests/groupby/test_size.py create mode 100644 pandas/tests/groupby/test_timegrouper.py create mode 100644 pandas/tests/groupby/test_value_counts.py create mode 100644 pandas/tests/groupby/transform/__init__.py create mode 100644 pandas/tests/groupby/transform/test_numba.py create mode 100644 pandas/tests/groupby/transform/test_transform.py create mode 100644 pandas/tests/indexes/__init__.py create mode 100644 pandas/tests/indexes/base_class/__init__.py create mode 100644 pandas/tests/indexes/base_class/test_constructors.py create mode 100644 pandas/tests/indexes/base_class/test_formats.py create mode 100644 pandas/tests/indexes/base_class/test_indexing.py create mode 100644 pandas/tests/indexes/base_class/test_pickle.py create mode 100644 pandas/tests/indexes/base_class/test_reshape.py create mode 100644 pandas/tests/indexes/base_class/test_setops.py create mode 100644 pandas/tests/indexes/base_class/test_where.py create mode 100644 pandas/tests/indexes/categorical/__init__.py create mode 100644 pandas/tests/indexes/categorical/test_append.py create mode 100644 pandas/tests/indexes/categorical/test_astype.py create mode 100644 pandas/tests/indexes/categorical/test_category.py create mode 100644 pandas/tests/indexes/categorical/test_constructors.py create mode 100644 pandas/tests/indexes/categorical/test_equals.py create mode 100644 pandas/tests/indexes/categorical/test_fillna.py create mode 100644 pandas/tests/indexes/categorical/test_formats.py create mode 100644 pandas/tests/indexes/categorical/test_indexing.py create mode 100644 pandas/tests/indexes/categorical/test_map.py create mode 100644 pandas/tests/indexes/categorical/test_reindex.py create mode 100644 pandas/tests/indexes/common.py create mode 100644 pandas/tests/indexes/conftest.py create mode 100644 pandas/tests/indexes/datetimelike.py create mode 100644 pandas/tests/indexes/datetimelike_/__init__.py create mode 100644 pandas/tests/indexes/datetimelike_/test_drop_duplicates.py create mode 100644 pandas/tests/indexes/datetimelike_/test_equals.py create mode 100644 pandas/tests/indexes/datetimelike_/test_indexing.py create mode 100644 pandas/tests/indexes/datetimelike_/test_is_monotonic.py create mode 100644 pandas/tests/indexes/datetimelike_/test_nat.py create mode 100644 pandas/tests/indexes/datetimelike_/test_sort_values.py create mode 100644 pandas/tests/indexes/datetimelike_/test_value_counts.py create mode 100644 pandas/tests/indexes/datetimes/__init__.py create mode 100644 pandas/tests/indexes/datetimes/methods/__init__.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_astype.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_factorize.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_fillna.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_insert.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_isocalendar.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_repeat.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_shift.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_snap.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_to_frame.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_to_period.py create mode 100644 pandas/tests/indexes/datetimes/methods/test_to_series.py create mode 100644 pandas/tests/indexes/datetimes/test_asof.py create mode 100644 pandas/tests/indexes/datetimes/test_constructors.py create mode 100644 pandas/tests/indexes/datetimes/test_date_range.py create mode 100644 pandas/tests/indexes/datetimes/test_datetime.py create mode 100644 pandas/tests/indexes/datetimes/test_datetimelike.py create mode 100644 pandas/tests/indexes/datetimes/test_delete.py create mode 100644 pandas/tests/indexes/datetimes/test_formats.py create mode 100644 pandas/tests/indexes/datetimes/test_freq_attr.py create mode 100644 pandas/tests/indexes/datetimes/test_indexing.py create mode 100644 pandas/tests/indexes/datetimes/test_join.py create mode 100644 pandas/tests/indexes/datetimes/test_map.py create mode 100644 pandas/tests/indexes/datetimes/test_misc.py create mode 100644 pandas/tests/indexes/datetimes/test_npfuncs.py create mode 100644 pandas/tests/indexes/datetimes/test_ops.py create mode 100644 pandas/tests/indexes/datetimes/test_partial_slicing.py create mode 100644 pandas/tests/indexes/datetimes/test_pickle.py create mode 100644 pandas/tests/indexes/datetimes/test_reindex.py create mode 100644 pandas/tests/indexes/datetimes/test_scalar_compat.py create mode 100644 pandas/tests/indexes/datetimes/test_setops.py create mode 100644 pandas/tests/indexes/datetimes/test_timezones.py create mode 100644 pandas/tests/indexes/datetimes/test_unique.py create mode 100644 pandas/tests/indexes/interval/__init__.py create mode 100644 pandas/tests/indexes/interval/test_astype.py create mode 100644 pandas/tests/indexes/interval/test_base.py create mode 100644 pandas/tests/indexes/interval/test_constructors.py create mode 100644 pandas/tests/indexes/interval/test_equals.py create mode 100644 pandas/tests/indexes/interval/test_formats.py create mode 100644 pandas/tests/indexes/interval/test_indexing.py create mode 100644 pandas/tests/indexes/interval/test_interval.py create mode 100644 pandas/tests/indexes/interval/test_interval_range.py create mode 100644 pandas/tests/indexes/interval/test_interval_tree.py create mode 100644 pandas/tests/indexes/interval/test_join.py create mode 100644 pandas/tests/indexes/interval/test_pickle.py create mode 100644 pandas/tests/indexes/interval/test_setops.py create mode 100644 pandas/tests/indexes/multi/__init__.py create mode 100644 pandas/tests/indexes/multi/conftest.py create mode 100644 pandas/tests/indexes/multi/test_analytics.py create mode 100644 pandas/tests/indexes/multi/test_astype.py create mode 100644 pandas/tests/indexes/multi/test_compat.py create mode 100644 pandas/tests/indexes/multi/test_constructors.py create mode 100644 pandas/tests/indexes/multi/test_conversion.py create mode 100644 pandas/tests/indexes/multi/test_copy.py create mode 100644 pandas/tests/indexes/multi/test_drop.py create mode 100644 pandas/tests/indexes/multi/test_duplicates.py create mode 100644 pandas/tests/indexes/multi/test_equivalence.py create mode 100644 pandas/tests/indexes/multi/test_formats.py create mode 100644 pandas/tests/indexes/multi/test_get_level_values.py create mode 100644 pandas/tests/indexes/multi/test_get_set.py create mode 100644 pandas/tests/indexes/multi/test_indexing.py create mode 100644 pandas/tests/indexes/multi/test_integrity.py create mode 100644 pandas/tests/indexes/multi/test_isin.py create mode 100644 pandas/tests/indexes/multi/test_join.py create mode 100644 pandas/tests/indexes/multi/test_lexsort.py create mode 100644 pandas/tests/indexes/multi/test_missing.py create mode 100644 pandas/tests/indexes/multi/test_monotonic.py create mode 100644 pandas/tests/indexes/multi/test_names.py create mode 100644 pandas/tests/indexes/multi/test_partial_indexing.py create mode 100644 pandas/tests/indexes/multi/test_pickle.py create mode 100644 pandas/tests/indexes/multi/test_reindex.py create mode 100644 pandas/tests/indexes/multi/test_reshape.py create mode 100644 pandas/tests/indexes/multi/test_setops.py create mode 100644 pandas/tests/indexes/multi/test_sorting.py create mode 100644 pandas/tests/indexes/multi/test_take.py create mode 100644 pandas/tests/indexes/numeric/__init__.py create mode 100644 pandas/tests/indexes/numeric/test_astype.py create mode 100644 pandas/tests/indexes/numeric/test_indexing.py create mode 100644 pandas/tests/indexes/numeric/test_join.py create mode 100644 pandas/tests/indexes/numeric/test_numeric.py create mode 100644 pandas/tests/indexes/numeric/test_setops.py create mode 100644 pandas/tests/indexes/object/__init__.py create mode 100644 pandas/tests/indexes/object/test_astype.py create mode 100644 pandas/tests/indexes/object/test_indexing.py create mode 100644 pandas/tests/indexes/period/__init__.py create mode 100644 pandas/tests/indexes/period/methods/__init__.py create mode 100644 pandas/tests/indexes/period/methods/test_asfreq.py create mode 100644 pandas/tests/indexes/period/methods/test_astype.py create mode 100644 pandas/tests/indexes/period/methods/test_factorize.py create mode 100644 pandas/tests/indexes/period/methods/test_fillna.py create mode 100644 pandas/tests/indexes/period/methods/test_insert.py create mode 100644 pandas/tests/indexes/period/methods/test_is_full.py create mode 100644 pandas/tests/indexes/period/methods/test_repeat.py create mode 100644 pandas/tests/indexes/period/methods/test_shift.py create mode 100644 pandas/tests/indexes/period/methods/test_to_timestamp.py create mode 100644 pandas/tests/indexes/period/test_constructors.py create mode 100644 pandas/tests/indexes/period/test_formats.py create mode 100644 pandas/tests/indexes/period/test_freq_attr.py create mode 100644 pandas/tests/indexes/period/test_indexing.py create mode 100644 pandas/tests/indexes/period/test_join.py create mode 100644 pandas/tests/indexes/period/test_monotonic.py create mode 100644 pandas/tests/indexes/period/test_partial_slicing.py create mode 100644 pandas/tests/indexes/period/test_period.py create mode 100644 pandas/tests/indexes/period/test_period_range.py create mode 100644 pandas/tests/indexes/period/test_pickle.py create mode 100644 pandas/tests/indexes/period/test_resolution.py create mode 100644 pandas/tests/indexes/period/test_scalar_compat.py create mode 100644 pandas/tests/indexes/period/test_searchsorted.py create mode 100644 pandas/tests/indexes/period/test_setops.py create mode 100644 pandas/tests/indexes/period/test_tools.py create mode 100644 pandas/tests/indexes/ranges/__init__.py create mode 100644 pandas/tests/indexes/ranges/test_constructors.py create mode 100644 pandas/tests/indexes/ranges/test_indexing.py create mode 100644 pandas/tests/indexes/ranges/test_join.py create mode 100644 pandas/tests/indexes/ranges/test_range.py create mode 100644 pandas/tests/indexes/ranges/test_setops.py create mode 100644 pandas/tests/indexes/test_any_index.py create mode 100644 pandas/tests/indexes/test_base.py create mode 100644 pandas/tests/indexes/test_common.py create mode 100644 pandas/tests/indexes/test_engines.py create mode 100644 pandas/tests/indexes/test_frozen.py create mode 100644 pandas/tests/indexes/test_index_new.py create mode 100644 pandas/tests/indexes/test_indexing.py create mode 100644 pandas/tests/indexes/test_numpy_compat.py create mode 100644 pandas/tests/indexes/test_setops.py create mode 100644 pandas/tests/indexes/test_subclass.py create mode 100644 pandas/tests/indexes/timedeltas/__init__.py create mode 100644 pandas/tests/indexes/timedeltas/methods/__init__.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_astype.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_factorize.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_fillna.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_insert.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_repeat.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_shift.py create mode 100644 pandas/tests/indexes/timedeltas/test_constructors.py create mode 100644 pandas/tests/indexes/timedeltas/test_delete.py create mode 100644 pandas/tests/indexes/timedeltas/test_formats.py create mode 100644 pandas/tests/indexes/timedeltas/test_freq_attr.py create mode 100644 pandas/tests/indexes/timedeltas/test_indexing.py create mode 100644 pandas/tests/indexes/timedeltas/test_join.py create mode 100644 pandas/tests/indexes/timedeltas/test_ops.py create mode 100644 pandas/tests/indexes/timedeltas/test_pickle.py create mode 100644 pandas/tests/indexes/timedeltas/test_scalar_compat.py create mode 100644 pandas/tests/indexes/timedeltas/test_searchsorted.py create mode 100644 pandas/tests/indexes/timedeltas/test_setops.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta_range.py create mode 100644 pandas/tests/indexing/__init__.py create mode 100644 pandas/tests/indexing/common.py create mode 100644 pandas/tests/indexing/interval/__init__.py create mode 100644 pandas/tests/indexing/interval/test_interval.py create mode 100644 pandas/tests/indexing/interval/test_interval_new.py create mode 100644 pandas/tests/indexing/multiindex/__init__.py create mode 100644 pandas/tests/indexing/multiindex/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/multiindex/test_datetime.py create mode 100644 pandas/tests/indexing/multiindex/test_getitem.py create mode 100644 pandas/tests/indexing/multiindex/test_iloc.py create mode 100644 pandas/tests/indexing/multiindex/test_indexing_slow.py create mode 100644 pandas/tests/indexing/multiindex/test_loc.py create mode 100644 pandas/tests/indexing/multiindex/test_multiindex.py create mode 100644 pandas/tests/indexing/multiindex/test_partial.py create mode 100644 pandas/tests/indexing/multiindex/test_setitem.py create mode 100644 pandas/tests/indexing/multiindex/test_slice.py create mode 100644 pandas/tests/indexing/multiindex/test_sorted.py create mode 100644 pandas/tests/indexing/test_at.py create mode 100644 pandas/tests/indexing/test_categorical.py create mode 100644 pandas/tests/indexing/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/test_check_indexer.py create mode 100644 pandas/tests/indexing/test_coercion.py create mode 100644 pandas/tests/indexing/test_datetime.py create mode 100644 pandas/tests/indexing/test_floats.py create mode 100644 pandas/tests/indexing/test_iat.py create mode 100644 pandas/tests/indexing/test_iloc.py create mode 100644 pandas/tests/indexing/test_indexers.py create mode 100644 pandas/tests/indexing/test_indexing.py create mode 100644 pandas/tests/indexing/test_loc.py create mode 100644 pandas/tests/indexing/test_na_indexing.py create mode 100644 pandas/tests/indexing/test_partial.py create mode 100644 pandas/tests/indexing/test_scalar.py create mode 100644 pandas/tests/interchange/__init__.py create mode 100644 pandas/tests/interchange/conftest.py create mode 100644 pandas/tests/interchange/test_impl.py create mode 100644 pandas/tests/interchange/test_spec_conformance.py create mode 100644 pandas/tests/interchange/test_utils.py create mode 100644 pandas/tests/internals/__init__.py create mode 100644 pandas/tests/internals/test_api.py create mode 100644 pandas/tests/internals/test_internals.py create mode 100644 pandas/tests/internals/test_managers.py create mode 100644 pandas/tests/io/__init__.py create mode 100644 pandas/tests/io/conftest.py create mode 100644 pandas/tests/io/data/csv/banklist.csv create mode 100644 pandas/tests/io/data/csv/iris.csv create mode 100644 pandas/tests/io/data/csv/test1.csv create mode 100644 pandas/tests/io/data/csv/test1.csv.bz2 create mode 100644 pandas/tests/io/data/csv/test1.csv.gz create mode 100644 pandas/tests/io/data/csv/test_mmap.csv create mode 100644 pandas/tests/io/data/csv/tips.csv create mode 100644 pandas/tests/io/data/csv/tips.csv.bz2 create mode 100644 pandas/tests/io/data/csv/tips.csv.gz create mode 100644 pandas/tests/io/data/excel/blank.ods create mode 100644 pandas/tests/io/data/excel/blank.xls create mode 100644 pandas/tests/io/data/excel/blank.xlsb create mode 100644 pandas/tests/io/data/excel/blank.xlsm create mode 100644 pandas/tests/io/data/excel/blank.xlsx create mode 100644 pandas/tests/io/data/excel/blank_with_header.ods create mode 100644 pandas/tests/io/data/excel/blank_with_header.xls create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsb create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsm create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsx create mode 100644 pandas/tests/io/data/excel/chartsheet.xls create mode 100644 pandas/tests/io/data/excel/chartsheet.xlsb create mode 100644 pandas/tests/io/data/excel/chartsheet.xlsm create mode 100644 pandas/tests/io/data/excel/chartsheet.xlsx create mode 100644 pandas/tests/io/data/excel/df_empty.xlsx create mode 100644 pandas/tests/io/data/excel/df_equals.xlsx create mode 100644 pandas/tests/io/data/excel/df_header_oob.xlsx create mode 100644 pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.ods create mode 100644 pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xls create mode 100755 pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsb create mode 100644 pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsm create mode 100644 pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsx create mode 100644 pandas/tests/io/data/excel/dimension_large.xlsx create mode 100644 pandas/tests/io/data/excel/dimension_missing.xlsx create mode 100644 pandas/tests/io/data/excel/dimension_small.xlsx create mode 100644 pandas/tests/io/data/excel/empty_trailing_rows.xlsx create mode 100644 pandas/tests/io/data/excel/empty_with_blank_row.xlsx create mode 100755 pandas/tests/io/data/excel/gh-35802.ods create mode 100755 pandas/tests/io/data/excel/gh-36122.ods create mode 100644 pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx create mode 100644 pandas/tests/io/data/excel/invalid_value_type.ods create mode 100755 pandas/tests/io/data/excel/multiindex_no_index_names.xlsx create mode 100644 pandas/tests/io/data/excel/one_col_blank_line.ods create mode 100644 pandas/tests/io/data/excel/one_col_blank_line.xls create mode 100644 pandas/tests/io/data/excel/one_col_blank_line.xlsb create mode 100644 pandas/tests/io/data/excel/one_col_blank_line.xlsm create mode 100644 pandas/tests/io/data/excel/one_col_blank_line.xlsx create mode 100644 pandas/tests/io/data/excel/test1.ods create mode 100644 pandas/tests/io/data/excel/test1.xls create mode 100644 pandas/tests/io/data/excel/test1.xlsb create mode 100644 pandas/tests/io/data/excel/test1.xlsm create mode 100644 pandas/tests/io/data/excel/test1.xlsx create mode 100644 pandas/tests/io/data/excel/test2.ods create mode 100644 pandas/tests/io/data/excel/test2.xls create mode 100644 pandas/tests/io/data/excel/test2.xlsb create mode 100644 pandas/tests/io/data/excel/test2.xlsm create mode 100644 pandas/tests/io/data/excel/test2.xlsx create mode 100644 pandas/tests/io/data/excel/test3.ods create mode 100644 pandas/tests/io/data/excel/test3.xls create mode 100644 pandas/tests/io/data/excel/test3.xlsb create mode 100644 pandas/tests/io/data/excel/test3.xlsm create mode 100644 pandas/tests/io/data/excel/test3.xlsx create mode 100644 pandas/tests/io/data/excel/test4.ods create mode 100644 pandas/tests/io/data/excel/test4.xls create mode 100644 pandas/tests/io/data/excel/test4.xlsb create mode 100644 pandas/tests/io/data/excel/test4.xlsm create mode 100644 pandas/tests/io/data/excel/test4.xlsx create mode 100644 pandas/tests/io/data/excel/test5.ods create mode 100644 pandas/tests/io/data/excel/test5.xls create mode 100644 pandas/tests/io/data/excel/test5.xlsb create mode 100644 pandas/tests/io/data/excel/test5.xlsm create mode 100644 pandas/tests/io/data/excel/test5.xlsx create mode 100644 pandas/tests/io/data/excel/test_converters.ods create mode 100644 pandas/tests/io/data/excel/test_converters.xls create mode 100644 pandas/tests/io/data/excel/test_converters.xlsb create mode 100644 pandas/tests/io/data/excel/test_converters.xlsm create mode 100644 pandas/tests/io/data/excel/test_converters.xlsx create mode 100644 pandas/tests/io/data/excel/test_datetime_mi.ods create mode 100644 pandas/tests/io/data/excel/test_datetime_mi.xls create mode 100644 pandas/tests/io/data/excel/test_datetime_mi.xlsb create mode 100644 pandas/tests/io/data/excel/test_datetime_mi.xlsm create mode 100644 pandas/tests/io/data/excel/test_datetime_mi.xlsx create mode 100644 pandas/tests/io/data/excel/test_decimal.ods create mode 100644 pandas/tests/io/data/excel/test_decimal.xls create mode 100644 pandas/tests/io/data/excel/test_decimal.xlsb create mode 100644 pandas/tests/io/data/excel/test_decimal.xlsm create mode 100644 pandas/tests/io/data/excel/test_decimal.xlsx create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.ods create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xls create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsb create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsm create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsx create mode 100644 pandas/tests/io/data/excel/test_multisheet.ods create mode 100644 pandas/tests/io/data/excel/test_multisheet.xls create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsb create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsm create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsx create mode 100644 pandas/tests/io/data/excel/test_newlines.ods create mode 100644 pandas/tests/io/data/excel/test_spaces.ods create mode 100644 pandas/tests/io/data/excel/test_spaces.xls create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsb create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsm create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsx create mode 100644 pandas/tests/io/data/excel/test_squeeze.ods create mode 100644 pandas/tests/io/data/excel/test_squeeze.xls create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsb create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsm create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsx create mode 100644 pandas/tests/io/data/excel/test_types.ods create mode 100644 pandas/tests/io/data/excel/test_types.xls create mode 100644 pandas/tests/io/data/excel/test_types.xlsb create mode 100644 pandas/tests/io/data/excel/test_types.xlsm create mode 100644 pandas/tests/io/data/excel/test_types.xlsx create mode 100644 pandas/tests/io/data/excel/testdateoverflow.ods create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xls create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsb create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsm create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsx create mode 100644 pandas/tests/io/data/excel/testdtype.ods create mode 100644 pandas/tests/io/data/excel/testdtype.xls create mode 100644 pandas/tests/io/data/excel/testdtype.xlsb create mode 100644 pandas/tests/io/data/excel/testdtype.xlsm create mode 100644 pandas/tests/io/data/excel/testdtype.xlsx create mode 100644 pandas/tests/io/data/excel/testmultiindex.ods create mode 100644 pandas/tests/io/data/excel/testmultiindex.xls create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsb create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsm create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsx create mode 100644 pandas/tests/io/data/excel/testskiprows.ods create mode 100644 pandas/tests/io/data/excel/testskiprows.xls create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsb create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsm create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsx create mode 100644 pandas/tests/io/data/excel/times_1900.ods create mode 100644 pandas/tests/io/data/excel/times_1900.xls create mode 100644 pandas/tests/io/data/excel/times_1900.xlsb create mode 100644 pandas/tests/io/data/excel/times_1900.xlsm create mode 100644 pandas/tests/io/data/excel/times_1900.xlsx create mode 100644 pandas/tests/io/data/excel/times_1904.ods create mode 100644 pandas/tests/io/data/excel/times_1904.xls create mode 100644 pandas/tests/io/data/excel/times_1904.xlsb create mode 100644 pandas/tests/io/data/excel/times_1904.xlsm create mode 100644 pandas/tests/io/data/excel/times_1904.xlsx create mode 100644 pandas/tests/io/data/excel/trailing_blanks.ods create mode 100644 pandas/tests/io/data/excel/trailing_blanks.xls create mode 100644 pandas/tests/io/data/excel/trailing_blanks.xlsb create mode 100644 pandas/tests/io/data/excel/trailing_blanks.xlsm create mode 100644 pandas/tests/io/data/excel/trailing_blanks.xlsx create mode 100644 pandas/tests/io/data/excel/writertable.odt create mode 100644 pandas/tests/io/data/feather/feather-0_3_1.feather create mode 100644 pandas/tests/io/data/fixed_width/fixed_width_format.txt create mode 100644 pandas/tests/io/data/gbq_fake_job.txt create mode 100644 pandas/tests/io/data/html/banklist.html create mode 100644 pandas/tests/io/data/html/spam.html create mode 100644 pandas/tests/io/data/html/valid_markup.html create mode 100644 pandas/tests/io/data/html/wikipedia_states.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-16.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-32.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-8.html create mode 100644 pandas/tests/io/data/html_encoding/letz_latin1.html create mode 100644 pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_fixed_datetime_py2.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/pytables_native.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/pytables_native2.h5 create mode 100644 pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/1.2.4/empty_frame_v1_2_4-GH#42345.pkl create mode 100644 pandas/tests/io/data/legacy_pickle/1.4.2/1.4.2_x86_64_linux_3.9.7.pickle create mode 100644 pandas/tests/io/data/orc/TestOrcFile.decimal.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.emptyFile.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.test1.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testDate2038.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc create mode 100644 pandas/tests/io/data/parquet/simple.parquet create mode 100644 pandas/tests/io/data/pickle/categorical.0.25.0.pickle create mode 100644 pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz create mode 100644 pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz create mode 100644 pandas/tests/io/data/pickle/test_mi_py27.pkl create mode 100644 pandas/tests/io/data/pickle/test_py27.pkl create mode 100755 pandas/tests/io/data/spss/labelled-num-na.sav create mode 100755 pandas/tests/io/data/spss/labelled-num.sav create mode 100755 pandas/tests/io/data/spss/labelled-str.sav create mode 100755 pandas/tests/io/data/spss/umlauts.sav create mode 100644 pandas/tests/io/data/stata/S4_EDUC1.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-105.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-108.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-111.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-113.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-114.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-118.dta create mode 100644 pandas/tests/io/data/stata/stata-dta-partially-labeled.dta create mode 100644 pandas/tests/io/data/stata/stata10_115.dta create mode 100644 pandas/tests/io/data/stata/stata10_117.dta create mode 100644 pandas/tests/io/data/stata/stata11_115.dta create mode 100644 pandas/tests/io/data/stata/stata11_117.dta create mode 100644 pandas/tests/io/data/stata/stata12_117.dta create mode 100644 pandas/tests/io/data/stata/stata13_dates.dta create mode 100644 pandas/tests/io/data/stata/stata14_118.dta create mode 100644 pandas/tests/io/data/stata/stata15.dta create mode 100644 pandas/tests/io/data/stata/stata16_118.dta create mode 100644 pandas/tests/io/data/stata/stata1_114.dta create mode 100644 pandas/tests/io/data/stata/stata1_117.dta create mode 100644 pandas/tests/io/data/stata/stata1_119.dta.gz create mode 100644 pandas/tests/io/data/stata/stata1_encoding.dta create mode 100644 pandas/tests/io/data/stata/stata1_encoding_118.dta create mode 100644 pandas/tests/io/data/stata/stata2_113.dta create mode 100644 pandas/tests/io/data/stata/stata2_114.dta create mode 100644 pandas/tests/io/data/stata/stata2_115.dta create mode 100644 pandas/tests/io/data/stata/stata2_117.dta create mode 100644 pandas/tests/io/data/stata/stata3.csv create mode 100644 pandas/tests/io/data/stata/stata3_113.dta create mode 100644 pandas/tests/io/data/stata/stata3_114.dta create mode 100644 pandas/tests/io/data/stata/stata3_115.dta create mode 100644 pandas/tests/io/data/stata/stata3_117.dta create mode 100644 pandas/tests/io/data/stata/stata4_113.dta create mode 100644 pandas/tests/io/data/stata/stata4_114.dta create mode 100644 pandas/tests/io/data/stata/stata4_115.dta create mode 100644 pandas/tests/io/data/stata/stata4_117.dta create mode 100644 pandas/tests/io/data/stata/stata5.csv create mode 100644 pandas/tests/io/data/stata/stata5_113.dta create mode 100644 pandas/tests/io/data/stata/stata5_114.dta create mode 100644 pandas/tests/io/data/stata/stata5_115.dta create mode 100644 pandas/tests/io/data/stata/stata5_117.dta create mode 100644 pandas/tests/io/data/stata/stata6.csv create mode 100644 pandas/tests/io/data/stata/stata6_113.dta create mode 100644 pandas/tests/io/data/stata/stata6_114.dta create mode 100644 pandas/tests/io/data/stata/stata6_115.dta create mode 100644 pandas/tests/io/data/stata/stata6_117.dta create mode 100644 pandas/tests/io/data/stata/stata7_111.dta create mode 100644 pandas/tests/io/data/stata/stata7_115.dta create mode 100644 pandas/tests/io/data/stata/stata7_117.dta create mode 100644 pandas/tests/io/data/stata/stata8_113.dta create mode 100644 pandas/tests/io/data/stata/stata8_115.dta create mode 100644 pandas/tests/io/data/stata/stata8_117.dta create mode 100644 pandas/tests/io/data/stata/stata9_115.dta create mode 100644 pandas/tests/io/data/stata/stata9_117.dta create mode 100644 pandas/tests/io/data/xml/baby_names.xml create mode 100644 pandas/tests/io/data/xml/books.xml create mode 100644 pandas/tests/io/data/xml/cta_rail_lines.kml create mode 100644 pandas/tests/io/data/xml/doc_ch_utf.xml create mode 100644 pandas/tests/io/data/xml/flatten_doc.xsl create mode 100644 pandas/tests/io/data/xml/row_field_output.xsl create mode 100644 pandas/tests/io/excel/__init__.py create mode 100644 pandas/tests/io/excel/conftest.py create mode 100644 pandas/tests/io/excel/test_odf.py create mode 100644 pandas/tests/io/excel/test_odswriter.py create mode 100644 pandas/tests/io/excel/test_openpyxl.py create mode 100644 pandas/tests/io/excel/test_readers.py create mode 100644 pandas/tests/io/excel/test_style.py create mode 100644 pandas/tests/io/excel/test_writers.py create mode 100644 pandas/tests/io/excel/test_xlrd.py create mode 100644 pandas/tests/io/excel/test_xlsxwriter.py create mode 100644 pandas/tests/io/excel/test_xlwt.py create mode 100644 pandas/tests/io/formats/__init__.py create mode 100644 pandas/tests/io/formats/data/html/datetime64_hourformatter.html create mode 100644 pandas/tests/io/formats/data/html/datetime64_monthformatter.html create mode 100644 pandas/tests/io/formats/data/html/escape_disabled.html create mode 100644 pandas/tests/io/formats/data/html/escaped.html create mode 100644 pandas/tests/io/formats/data/html/gh12031_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh13828_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh14882_expected_output_1.html create mode 100644 pandas/tests/io/formats/data/html/gh14882_expected_output_2.html create mode 100644 pandas/tests/io/formats/data/html/gh14998_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh15019_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh21625_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22270_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22579_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22783_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22783_named_columns_index.html create mode 100644 pandas/tests/io/formats/data/html/gh40024_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh6131_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh8452_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html create mode 100644 pandas/tests/io/formats/data/html/index_1.html create mode 100644 pandas/tests/io/formats/data/html/index_2.html create mode 100644 pandas/tests/io/formats/data/html/index_3.html create mode 100644 pandas/tests/io/formats/data/html/index_4.html create mode 100644 pandas/tests/io/formats/data/html/index_5.html create mode 100644 pandas/tests/io/formats/data/html/index_formatter.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/justify.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_2.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_2.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html create mode 100644 pandas/tests/io/formats/data/html/render_links_false.html create mode 100644 pandas/tests/io/formats/data/html/render_links_true.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/truncate.html create mode 100644 pandas/tests/io/formats/data/html/truncate_formatter.html create mode 100644 pandas/tests/io/formats/data/html/truncate_multi_index.html create mode 100644 pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html create mode 100644 pandas/tests/io/formats/data/html/unicode_1.html create mode 100644 pandas/tests/io/formats/data/html/unicode_2.html create mode 100644 pandas/tests/io/formats/data/html/various_dtypes_formatted.html create mode 100644 pandas/tests/io/formats/data/html/with_classes.html create mode 100644 pandas/tests/io/formats/style/__init__.py create mode 100644 pandas/tests/io/formats/style/test_bar.py create mode 100644 pandas/tests/io/formats/style/test_deprecated.py create mode 100644 pandas/tests/io/formats/style/test_exceptions.py create mode 100644 pandas/tests/io/formats/style/test_format.py create mode 100644 pandas/tests/io/formats/style/test_highlight.py create mode 100644 pandas/tests/io/formats/style/test_html.py create mode 100644 pandas/tests/io/formats/style/test_matplotlib.py create mode 100644 pandas/tests/io/formats/style/test_non_unique.py create mode 100644 pandas/tests/io/formats/style/test_style.py create mode 100644 pandas/tests/io/formats/style/test_to_latex.py create mode 100644 pandas/tests/io/formats/style/test_to_string.py create mode 100644 pandas/tests/io/formats/style/test_tooltip.py create mode 100644 pandas/tests/io/formats/test_console.py create mode 100644 pandas/tests/io/formats/test_css.py create mode 100644 pandas/tests/io/formats/test_eng_formatting.py create mode 100644 pandas/tests/io/formats/test_format.py create mode 100644 pandas/tests/io/formats/test_info.py create mode 100644 pandas/tests/io/formats/test_printing.py create mode 100644 pandas/tests/io/formats/test_series_info.py create mode 100644 pandas/tests/io/formats/test_to_csv.py create mode 100644 pandas/tests/io/formats/test_to_excel.py create mode 100644 pandas/tests/io/formats/test_to_html.py create mode 100644 pandas/tests/io/formats/test_to_latex.py create mode 100644 pandas/tests/io/formats/test_to_markdown.py create mode 100644 pandas/tests/io/formats/test_to_string.py create mode 100644 pandas/tests/io/generate_legacy_storage_files.py create mode 100644 pandas/tests/io/json/__init__.py create mode 100644 pandas/tests/io/json/conftest.py create mode 100644 pandas/tests/io/json/data/line_delimited.json create mode 100644 pandas/tests/io/json/data/teams.csv create mode 100644 pandas/tests/io/json/data/tsframe_iso_v012.json create mode 100644 pandas/tests/io/json/data/tsframe_v012.json create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip create mode 100644 pandas/tests/io/json/test_compression.py create mode 100644 pandas/tests/io/json/test_deprecated_kwargs.py create mode 100644 pandas/tests/io/json/test_json_table_schema.py create mode 100644 pandas/tests/io/json/test_json_table_schema_ext_dtype.py create mode 100644 pandas/tests/io/json/test_normalize.py create mode 100644 pandas/tests/io/json/test_pandas.py create mode 100644 pandas/tests/io/json/test_readlines.py create mode 100644 pandas/tests/io/json/test_ujson.py create mode 100644 pandas/tests/io/parser/__init__.py create mode 100644 pandas/tests/io/parser/common/__init__.py create mode 100644 pandas/tests/io/parser/common/test_chunksize.py create mode 100644 pandas/tests/io/parser/common/test_common_basic.py create mode 100644 pandas/tests/io/parser/common/test_data_list.py create mode 100644 pandas/tests/io/parser/common/test_decimal.py create mode 100644 pandas/tests/io/parser/common/test_file_buffer_url.py create mode 100644 pandas/tests/io/parser/common/test_float.py create mode 100644 pandas/tests/io/parser/common/test_index.py create mode 100644 pandas/tests/io/parser/common/test_inf.py create mode 100644 pandas/tests/io/parser/common/test_ints.py create mode 100644 pandas/tests/io/parser/common/test_iterator.py create mode 100644 pandas/tests/io/parser/common/test_read_errors.py create mode 100644 pandas/tests/io/parser/common/test_verbose.py create mode 100644 pandas/tests/io/parser/conftest.py create mode 100644 pandas/tests/io/parser/data/items.jsonl create mode 100644 pandas/tests/io/parser/data/salaries.csv create mode 100644 pandas/tests/io/parser/data/salaries.csv.bz2 create mode 100644 pandas/tests/io/parser/data/salaries.csv.gz create mode 100644 pandas/tests/io/parser/data/salaries.csv.xz create mode 100644 pandas/tests/io/parser/data/salaries.csv.zip create mode 100644 pandas/tests/io/parser/data/salaries.csv.zst create mode 100644 pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv create mode 100644 pandas/tests/io/parser/data/sub_char.csv create mode 100644 pandas/tests/io/parser/data/tar_csv.tar create mode 100644 pandas/tests/io/parser/data/tar_csv.tar.gz create mode 100644 pandas/tests/io/parser/data/test2.csv create mode 100644 pandas/tests/io/parser/data/test_mmap.csv create mode 100644 pandas/tests/io/parser/data/unicode_series.csv create mode 100644 pandas/tests/io/parser/data/utf16_ex.txt create mode 100644 pandas/tests/io/parser/data/utf16_ex_small.zip create mode 100644 pandas/tests/io/parser/data/utf32_ex_small.zip create mode 100644 pandas/tests/io/parser/data/utf8_ex_small.zip create mode 100644 pandas/tests/io/parser/dtypes/__init__.py create mode 100644 pandas/tests/io/parser/dtypes/test_categorical.py create mode 100644 pandas/tests/io/parser/dtypes/test_dtypes_basic.py create mode 100644 pandas/tests/io/parser/dtypes/test_empty.py create mode 100644 pandas/tests/io/parser/test_c_parser_only.py create mode 100644 pandas/tests/io/parser/test_comment.py create mode 100644 pandas/tests/io/parser/test_compression.py create mode 100644 pandas/tests/io/parser/test_converters.py create mode 100644 pandas/tests/io/parser/test_dialect.py create mode 100644 pandas/tests/io/parser/test_encoding.py create mode 100644 pandas/tests/io/parser/test_header.py create mode 100644 pandas/tests/io/parser/test_index_col.py create mode 100644 pandas/tests/io/parser/test_mangle_dupes.py create mode 100644 pandas/tests/io/parser/test_multi_thread.py create mode 100644 pandas/tests/io/parser/test_na_values.py create mode 100644 pandas/tests/io/parser/test_network.py create mode 100644 pandas/tests/io/parser/test_parse_dates.py create mode 100644 pandas/tests/io/parser/test_python_parser_only.py create mode 100644 pandas/tests/io/parser/test_quoting.py create mode 100644 pandas/tests/io/parser/test_read_fwf.py create mode 100644 pandas/tests/io/parser/test_skiprows.py create mode 100644 pandas/tests/io/parser/test_textreader.py create mode 100644 pandas/tests/io/parser/test_unsupported.py create mode 100644 pandas/tests/io/parser/usecols/__init__.py create mode 100644 pandas/tests/io/parser/usecols/test_parse_dates.py create mode 100644 pandas/tests/io/parser/usecols/test_strings.py create mode 100644 pandas/tests/io/parser/usecols/test_usecols_basic.py create mode 100644 pandas/tests/io/pytables/__init__.py create mode 100644 pandas/tests/io/pytables/common.py create mode 100644 pandas/tests/io/pytables/conftest.py create mode 100644 pandas/tests/io/pytables/test_append.py create mode 100644 pandas/tests/io/pytables/test_categorical.py create mode 100644 pandas/tests/io/pytables/test_compat.py create mode 100644 pandas/tests/io/pytables/test_complex.py create mode 100644 pandas/tests/io/pytables/test_errors.py create mode 100644 pandas/tests/io/pytables/test_file_handling.py create mode 100644 pandas/tests/io/pytables/test_keys.py create mode 100644 pandas/tests/io/pytables/test_put.py create mode 100644 pandas/tests/io/pytables/test_pytables_missing.py create mode 100644 pandas/tests/io/pytables/test_read.py create mode 100644 pandas/tests/io/pytables/test_retain_attributes.py create mode 100644 pandas/tests/io/pytables/test_round_trip.py create mode 100644 pandas/tests/io/pytables/test_select.py create mode 100644 pandas/tests/io/pytables/test_store.py create mode 100644 pandas/tests/io/pytables/test_subclass.py create mode 100644 pandas/tests/io/pytables/test_time_series.py create mode 100644 pandas/tests/io/pytables/test_timezones.py create mode 100644 pandas/tests/io/sas/__init__.py create mode 100644 pandas/tests/io/sas/data/0x00controlbyte.sas7bdat.bz2 create mode 100644 pandas/tests/io/sas/data/0x40controlbyte.csv create mode 100644 pandas/tests/io/sas/data/0x40controlbyte.sas7bdat create mode 100644 pandas/tests/io/sas/data/DEMO_G.csv create mode 100644 pandas/tests/io/sas/data/DEMO_G.xpt create mode 100644 pandas/tests/io/sas/data/DEMO_PUF.cpt create mode 100644 pandas/tests/io/sas/data/DRXFCD_G.csv create mode 100644 pandas/tests/io/sas/data/DRXFCD_G.xpt create mode 100644 pandas/tests/io/sas/data/SSHSV1_A.csv create mode 100644 pandas/tests/io/sas/data/SSHSV1_A.xpt create mode 100644 pandas/tests/io/sas/data/airline.csv create mode 100644 pandas/tests/io/sas/data/airline.sas7bdat create mode 100644 pandas/tests/io/sas/data/airline.sas7bdat.gz create mode 100644 pandas/tests/io/sas/data/cars.sas7bdat create mode 100644 pandas/tests/io/sas/data/corrupt.sas7bdat create mode 100644 pandas/tests/io/sas/data/dates_null.sas7bdat create mode 100644 pandas/tests/io/sas/data/datetime.csv create mode 100644 pandas/tests/io/sas/data/datetime.sas7bdat create mode 100644 pandas/tests/io/sas/data/load_log.sas7bdat create mode 100644 pandas/tests/io/sas/data/many_columns.csv create mode 100644 pandas/tests/io/sas/data/many_columns.sas7bdat create mode 100644 pandas/tests/io/sas/data/max_sas_date.sas7bdat create mode 100644 pandas/tests/io/sas/data/paxraw_d_short.csv create mode 100644 pandas/tests/io/sas/data/paxraw_d_short.xpt create mode 100644 pandas/tests/io/sas/data/productsales.csv create mode 100644 pandas/tests/io/sas/data/productsales.sas7bdat create mode 100644 pandas/tests/io/sas/data/test1.sas7bdat create mode 100644 pandas/tests/io/sas/data/test10.sas7bdat create mode 100644 pandas/tests/io/sas/data/test11.sas7bdat create mode 100644 pandas/tests/io/sas/data/test12.sas7bdat create mode 100644 pandas/tests/io/sas/data/test13.sas7bdat create mode 100644 pandas/tests/io/sas/data/test14.sas7bdat create mode 100644 pandas/tests/io/sas/data/test15.sas7bdat create mode 100644 pandas/tests/io/sas/data/test16.sas7bdat create mode 100644 pandas/tests/io/sas/data/test2.sas7bdat create mode 100644 pandas/tests/io/sas/data/test3.sas7bdat create mode 100644 pandas/tests/io/sas/data/test4.sas7bdat create mode 100644 pandas/tests/io/sas/data/test5.sas7bdat create mode 100644 pandas/tests/io/sas/data/test6.sas7bdat create mode 100644 pandas/tests/io/sas/data/test7.sas7bdat create mode 100644 pandas/tests/io/sas/data/test8.sas7bdat create mode 100644 pandas/tests/io/sas/data/test9.sas7bdat create mode 100644 pandas/tests/io/sas/data/test_12659.csv create mode 100644 pandas/tests/io/sas/data/test_12659.sas7bdat create mode 100644 pandas/tests/io/sas/data/test_meta2_page.sas7bdat create mode 100644 pandas/tests/io/sas/data/test_sas7bdat_1.csv create mode 100644 pandas/tests/io/sas/data/test_sas7bdat_2.csv create mode 100644 pandas/tests/io/sas/data/zero_rows.sas7bdat create mode 100644 pandas/tests/io/sas/data/zero_variables.sas7bdat create mode 100644 pandas/tests/io/sas/test_sas.py create mode 100644 pandas/tests/io/sas/test_sas7bdat.py create mode 100644 pandas/tests/io/sas/test_xport.py create mode 100644 pandas/tests/io/test_clipboard.py create mode 100644 pandas/tests/io/test_common.py create mode 100644 pandas/tests/io/test_compression.py create mode 100644 pandas/tests/io/test_date_converters.py create mode 100644 pandas/tests/io/test_feather.py create mode 100644 pandas/tests/io/test_fsspec.py create mode 100644 pandas/tests/io/test_gcs.py create mode 100644 pandas/tests/io/test_html.py create mode 100644 pandas/tests/io/test_orc.py create mode 100644 pandas/tests/io/test_parquet.py create mode 100644 pandas/tests/io/test_pickle.py create mode 100644 pandas/tests/io/test_s3.py create mode 100644 pandas/tests/io/test_spss.py create mode 100644 pandas/tests/io/test_sql.py create mode 100644 pandas/tests/io/test_stata.py create mode 100644 pandas/tests/io/test_user_agent.py create mode 100644 pandas/tests/io/xml/__init__.py create mode 100644 pandas/tests/io/xml/test_to_xml.py create mode 100644 pandas/tests/io/xml/test_xml.py create mode 100644 pandas/tests/io/xml/test_xml_dtypes.py create mode 100644 pandas/tests/libs/__init__.py create mode 100644 pandas/tests/libs/test_hashtable.py create mode 100644 pandas/tests/libs/test_join.py create mode 100644 pandas/tests/libs/test_lib.py create mode 100644 pandas/tests/plotting/__init__.py create mode 100644 pandas/tests/plotting/common.py create mode 100644 pandas/tests/plotting/conftest.py create mode 100644 pandas/tests/plotting/frame/__init__.py create mode 100644 pandas/tests/plotting/frame/test_frame.py create mode 100644 pandas/tests/plotting/frame/test_frame_color.py create mode 100644 pandas/tests/plotting/frame/test_frame_groupby.py create mode 100644 pandas/tests/plotting/frame/test_frame_legend.py create mode 100644 pandas/tests/plotting/frame/test_frame_subplots.py create mode 100644 pandas/tests/plotting/frame/test_hist_box_by.py create mode 100644 pandas/tests/plotting/test_backend.py create mode 100644 pandas/tests/plotting/test_boxplot_method.py create mode 100644 pandas/tests/plotting/test_common.py create mode 100644 pandas/tests/plotting/test_converter.py create mode 100644 pandas/tests/plotting/test_datetimelike.py create mode 100644 pandas/tests/plotting/test_groupby.py create mode 100644 pandas/tests/plotting/test_hist_method.py create mode 100644 pandas/tests/plotting/test_misc.py create mode 100644 pandas/tests/plotting/test_series.py create mode 100644 pandas/tests/plotting/test_style.py create mode 100644 pandas/tests/reductions/__init__.py create mode 100644 pandas/tests/reductions/test_reductions.py create mode 100644 pandas/tests/reductions/test_stat_reductions.py create mode 100644 pandas/tests/resample/__init__.py create mode 100644 pandas/tests/resample/conftest.py create mode 100644 pandas/tests/resample/test_base.py create mode 100644 pandas/tests/resample/test_datetime_index.py create mode 100644 pandas/tests/resample/test_deprecated.py create mode 100644 pandas/tests/resample/test_period_index.py create mode 100644 pandas/tests/resample/test_resample_api.py create mode 100644 pandas/tests/resample/test_resampler_grouper.py create mode 100644 pandas/tests/resample/test_time_grouper.py create mode 100644 pandas/tests/resample/test_timedelta.py create mode 100644 pandas/tests/reshape/__init__.py create mode 100644 pandas/tests/reshape/concat/__init__.py create mode 100644 pandas/tests/reshape/concat/conftest.py create mode 100644 pandas/tests/reshape/concat/test_append.py create mode 100644 pandas/tests/reshape/concat/test_append_common.py create mode 100644 pandas/tests/reshape/concat/test_categorical.py create mode 100644 pandas/tests/reshape/concat/test_concat.py create mode 100644 pandas/tests/reshape/concat/test_dataframe.py create mode 100644 pandas/tests/reshape/concat/test_datetimes.py create mode 100644 pandas/tests/reshape/concat/test_empty.py create mode 100644 pandas/tests/reshape/concat/test_index.py create mode 100644 pandas/tests/reshape/concat/test_invalid.py create mode 100644 pandas/tests/reshape/concat/test_series.py create mode 100644 pandas/tests/reshape/concat/test_sort.py create mode 100644 pandas/tests/reshape/data/cut_data.csv create mode 100644 pandas/tests/reshape/merge/__init__.py create mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches.csv create mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv create mode 100644 pandas/tests/reshape/merge/data/asof.csv create mode 100644 pandas/tests/reshape/merge/data/asof2.csv create mode 100644 pandas/tests/reshape/merge/data/quotes.csv create mode 100644 pandas/tests/reshape/merge/data/quotes2.csv create mode 100644 pandas/tests/reshape/merge/data/tolerance.csv create mode 100644 pandas/tests/reshape/merge/data/trades.csv create mode 100644 pandas/tests/reshape/merge/data/trades2.csv create mode 100644 pandas/tests/reshape/merge/test_join.py create mode 100644 pandas/tests/reshape/merge/test_merge.py create mode 100644 pandas/tests/reshape/merge/test_merge_asof.py create mode 100644 pandas/tests/reshape/merge/test_merge_cross.py create mode 100644 pandas/tests/reshape/merge/test_merge_index_as_string.py create mode 100644 pandas/tests/reshape/merge/test_merge_ordered.py create mode 100644 pandas/tests/reshape/merge/test_multi.py create mode 100644 pandas/tests/reshape/test_crosstab.py create mode 100644 pandas/tests/reshape/test_cut.py create mode 100644 pandas/tests/reshape/test_from_dummies.py create mode 100644 pandas/tests/reshape/test_get_dummies.py create mode 100644 pandas/tests/reshape/test_melt.py create mode 100644 pandas/tests/reshape/test_pivot.py create mode 100644 pandas/tests/reshape/test_pivot_multilevel.py create mode 100644 pandas/tests/reshape/test_qcut.py create mode 100644 pandas/tests/reshape/test_union_categoricals.py create mode 100644 pandas/tests/reshape/test_util.py create mode 100644 pandas/tests/scalar/__init__.py create mode 100644 pandas/tests/scalar/interval/__init__.py create mode 100644 pandas/tests/scalar/interval/test_arithmetic.py create mode 100644 pandas/tests/scalar/interval/test_interval.py create mode 100644 pandas/tests/scalar/interval/test_ops.py create mode 100644 pandas/tests/scalar/period/__init__.py create mode 100644 pandas/tests/scalar/period/test_asfreq.py create mode 100644 pandas/tests/scalar/period/test_period.py create mode 100644 pandas/tests/scalar/test_na_scalar.py create mode 100644 pandas/tests/scalar/test_nat.py create mode 100644 pandas/tests/scalar/timedelta/__init__.py create mode 100644 pandas/tests/scalar/timedelta/test_arithmetic.py create mode 100644 pandas/tests/scalar/timedelta/test_constructors.py create mode 100644 pandas/tests/scalar/timedelta/test_formats.py create mode 100644 pandas/tests/scalar/timedelta/test_timedelta.py create mode 100644 pandas/tests/scalar/timestamp/__init__.py create mode 100644 pandas/tests/scalar/timestamp/test_arithmetic.py create mode 100644 pandas/tests/scalar/timestamp/test_comparisons.py create mode 100644 pandas/tests/scalar/timestamp/test_constructors.py create mode 100644 pandas/tests/scalar/timestamp/test_formats.py create mode 100644 pandas/tests/scalar/timestamp/test_rendering.py create mode 100644 pandas/tests/scalar/timestamp/test_timestamp.py create mode 100644 pandas/tests/scalar/timestamp/test_timezones.py create mode 100644 pandas/tests/scalar/timestamp/test_unary_ops.py create mode 100644 pandas/tests/series/__init__.py create mode 100644 pandas/tests/series/accessors/__init__.py create mode 100644 pandas/tests/series/accessors/test_cat_accessor.py create mode 100644 pandas/tests/series/accessors/test_dt_accessor.py create mode 100644 pandas/tests/series/accessors/test_sparse_accessor.py create mode 100644 pandas/tests/series/accessors/test_str_accessor.py create mode 100644 pandas/tests/series/indexing/__init__.py create mode 100644 pandas/tests/series/indexing/test_datetime.py create mode 100644 pandas/tests/series/indexing/test_delitem.py create mode 100644 pandas/tests/series/indexing/test_get.py create mode 100644 pandas/tests/series/indexing/test_getitem.py create mode 100644 pandas/tests/series/indexing/test_indexing.py create mode 100644 pandas/tests/series/indexing/test_mask.py create mode 100644 pandas/tests/series/indexing/test_set_value.py create mode 100644 pandas/tests/series/indexing/test_setitem.py create mode 100644 pandas/tests/series/indexing/test_take.py create mode 100644 pandas/tests/series/indexing/test_where.py create mode 100644 pandas/tests/series/indexing/test_xs.py create mode 100644 pandas/tests/series/methods/__init__.py create mode 100644 pandas/tests/series/methods/test_align.py create mode 100644 pandas/tests/series/methods/test_append.py create mode 100644 pandas/tests/series/methods/test_argsort.py create mode 100644 pandas/tests/series/methods/test_asof.py create mode 100644 pandas/tests/series/methods/test_astype.py create mode 100644 pandas/tests/series/methods/test_autocorr.py create mode 100644 pandas/tests/series/methods/test_between.py create mode 100644 pandas/tests/series/methods/test_clip.py create mode 100644 pandas/tests/series/methods/test_combine.py create mode 100644 pandas/tests/series/methods/test_combine_first.py create mode 100644 pandas/tests/series/methods/test_compare.py create mode 100644 pandas/tests/series/methods/test_convert.py create mode 100644 pandas/tests/series/methods/test_convert_dtypes.py create mode 100644 pandas/tests/series/methods/test_copy.py create mode 100644 pandas/tests/series/methods/test_count.py create mode 100644 pandas/tests/series/methods/test_cov_corr.py create mode 100644 pandas/tests/series/methods/test_describe.py create mode 100644 pandas/tests/series/methods/test_diff.py create mode 100644 pandas/tests/series/methods/test_drop.py create mode 100644 pandas/tests/series/methods/test_drop_duplicates.py create mode 100644 pandas/tests/series/methods/test_dropna.py create mode 100644 pandas/tests/series/methods/test_dtypes.py create mode 100644 pandas/tests/series/methods/test_duplicated.py create mode 100644 pandas/tests/series/methods/test_equals.py create mode 100644 pandas/tests/series/methods/test_explode.py create mode 100644 pandas/tests/series/methods/test_fillna.py create mode 100644 pandas/tests/series/methods/test_get_numeric_data.py create mode 100644 pandas/tests/series/methods/test_head_tail.py create mode 100644 pandas/tests/series/methods/test_infer_objects.py create mode 100644 pandas/tests/series/methods/test_interpolate.py create mode 100644 pandas/tests/series/methods/test_is_monotonic.py create mode 100644 pandas/tests/series/methods/test_is_unique.py create mode 100644 pandas/tests/series/methods/test_isin.py create mode 100644 pandas/tests/series/methods/test_isna.py create mode 100644 pandas/tests/series/methods/test_item.py create mode 100644 pandas/tests/series/methods/test_matmul.py create mode 100644 pandas/tests/series/methods/test_nlargest.py create mode 100644 pandas/tests/series/methods/test_nunique.py create mode 100644 pandas/tests/series/methods/test_pct_change.py create mode 100644 pandas/tests/series/methods/test_pop.py create mode 100644 pandas/tests/series/methods/test_quantile.py create mode 100644 pandas/tests/series/methods/test_rank.py create mode 100644 pandas/tests/series/methods/test_reindex.py create mode 100644 pandas/tests/series/methods/test_reindex_like.py create mode 100644 pandas/tests/series/methods/test_rename.py create mode 100644 pandas/tests/series/methods/test_rename_axis.py create mode 100644 pandas/tests/series/methods/test_repeat.py create mode 100644 pandas/tests/series/methods/test_replace.py create mode 100644 pandas/tests/series/methods/test_reset_index.py create mode 100644 pandas/tests/series/methods/test_round.py create mode 100644 pandas/tests/series/methods/test_searchsorted.py create mode 100644 pandas/tests/series/methods/test_set_name.py create mode 100644 pandas/tests/series/methods/test_sort_index.py create mode 100644 pandas/tests/series/methods/test_sort_values.py create mode 100644 pandas/tests/series/methods/test_to_csv.py create mode 100644 pandas/tests/series/methods/test_to_dict.py create mode 100644 pandas/tests/series/methods/test_to_frame.py create mode 100644 pandas/tests/series/methods/test_truncate.py create mode 100644 pandas/tests/series/methods/test_tz_localize.py create mode 100644 pandas/tests/series/methods/test_unique.py create mode 100644 pandas/tests/series/methods/test_unstack.py create mode 100644 pandas/tests/series/methods/test_update.py create mode 100644 pandas/tests/series/methods/test_value_counts.py create mode 100644 pandas/tests/series/methods/test_values.py create mode 100644 pandas/tests/series/methods/test_view.py create mode 100644 pandas/tests/series/test_api.py create mode 100644 pandas/tests/series/test_arithmetic.py create mode 100644 pandas/tests/series/test_constructors.py create mode 100644 pandas/tests/series/test_cumulative.py create mode 100644 pandas/tests/series/test_iteration.py create mode 100644 pandas/tests/series/test_logical_ops.py create mode 100644 pandas/tests/series/test_missing.py create mode 100644 pandas/tests/series/test_npfuncs.py create mode 100644 pandas/tests/series/test_reductions.py create mode 100644 pandas/tests/series/test_repr.py create mode 100644 pandas/tests/series/test_subclass.py create mode 100644 pandas/tests/series/test_ufunc.py create mode 100644 pandas/tests/series/test_unary.py create mode 100644 pandas/tests/series/test_validate.py create mode 100644 pandas/tests/strings/__init__.py create mode 100644 pandas/tests/strings/conftest.py create mode 100644 pandas/tests/strings/test_api.py create mode 100644 pandas/tests/strings/test_case_justify.py create mode 100644 pandas/tests/strings/test_cat.py create mode 100644 pandas/tests/strings/test_extract.py create mode 100644 pandas/tests/strings/test_find_replace.py create mode 100644 pandas/tests/strings/test_get_dummies.py create mode 100644 pandas/tests/strings/test_split_partition.py create mode 100644 pandas/tests/strings/test_string_array.py create mode 100644 pandas/tests/strings/test_strings.py create mode 100644 pandas/tests/test_aggregation.py create mode 100644 pandas/tests/test_algos.py create mode 100644 pandas/tests/test_common.py create mode 100644 pandas/tests/test_downstream.py create mode 100644 pandas/tests/test_errors.py create mode 100644 pandas/tests/test_expressions.py create mode 100644 pandas/tests/test_flags.py create mode 100644 pandas/tests/test_multilevel.py create mode 100644 pandas/tests/test_nanops.py create mode 100644 pandas/tests/test_optional_dependency.py create mode 100644 pandas/tests/test_register_accessor.py create mode 100644 pandas/tests/test_sorting.py create mode 100644 pandas/tests/test_take.py create mode 100644 pandas/tests/tools/__init__.py create mode 100644 pandas/tests/tools/test_to_datetime.py create mode 100644 pandas/tests/tools/test_to_numeric.py create mode 100644 pandas/tests/tools/test_to_time.py create mode 100644 pandas/tests/tools/test_to_timedelta.py create mode 100644 pandas/tests/tseries/__init__.py create mode 100644 pandas/tests/tseries/frequencies/__init__.py create mode 100644 pandas/tests/tseries/frequencies/test_freq_code.py create mode 100644 pandas/tests/tseries/frequencies/test_frequencies.py create mode 100644 pandas/tests/tseries/frequencies/test_inference.py create mode 100644 pandas/tests/tseries/holiday/__init__.py create mode 100644 pandas/tests/tseries/holiday/test_calendar.py create mode 100644 pandas/tests/tseries/holiday/test_federal.py create mode 100644 pandas/tests/tseries/holiday/test_holiday.py create mode 100644 pandas/tests/tseries/holiday/test_observance.py create mode 100644 pandas/tests/tseries/offsets/__init__.py create mode 100644 pandas/tests/tseries/offsets/common.py create mode 100644 pandas/tests/tseries/offsets/conftest.py create mode 100644 pandas/tests/tseries/offsets/data/cday-0.14.1.pickle create mode 100644 pandas/tests/tseries/offsets/test_business_day.py create mode 100644 pandas/tests/tseries/offsets/test_business_hour.py create mode 100644 pandas/tests/tseries/offsets/test_business_month.py create mode 100644 pandas/tests/tseries/offsets/test_business_quarter.py create mode 100644 pandas/tests/tseries/offsets/test_business_year.py create mode 100644 pandas/tests/tseries/offsets/test_custom_business_day.py create mode 100644 pandas/tests/tseries/offsets/test_custom_business_hour.py create mode 100644 pandas/tests/tseries/offsets/test_custom_business_month.py create mode 100644 pandas/tests/tseries/offsets/test_dst.py create mode 100644 pandas/tests/tseries/offsets/test_easter.py create mode 100644 pandas/tests/tseries/offsets/test_fiscal.py create mode 100644 pandas/tests/tseries/offsets/test_index.py create mode 100644 pandas/tests/tseries/offsets/test_month.py create mode 100644 pandas/tests/tseries/offsets/test_offsets.py create mode 100644 pandas/tests/tseries/offsets/test_offsets_properties.py create mode 100644 pandas/tests/tseries/offsets/test_quarter.py create mode 100644 pandas/tests/tseries/offsets/test_ticks.py create mode 100644 pandas/tests/tseries/offsets/test_week.py create mode 100644 pandas/tests/tseries/offsets/test_year.py create mode 100644 pandas/tests/tslibs/__init__.py create mode 100644 pandas/tests/tslibs/test_api.py create mode 100644 pandas/tests/tslibs/test_array_to_datetime.py create mode 100644 pandas/tests/tslibs/test_ccalendar.py create mode 100644 pandas/tests/tslibs/test_conversion.py create mode 100644 pandas/tests/tslibs/test_fields.py create mode 100644 pandas/tests/tslibs/test_libfrequencies.py create mode 100644 pandas/tests/tslibs/test_liboffsets.py create mode 100644 pandas/tests/tslibs/test_np_datetime.py create mode 100644 pandas/tests/tslibs/test_parse_iso8601.py create mode 100644 pandas/tests/tslibs/test_parsing.py create mode 100644 pandas/tests/tslibs/test_period_asfreq.py create mode 100644 pandas/tests/tslibs/test_resolution.py create mode 100644 pandas/tests/tslibs/test_timedeltas.py create mode 100644 pandas/tests/tslibs/test_timezones.py create mode 100644 pandas/tests/tslibs/test_to_offset.py create mode 100644 pandas/tests/tslibs/test_tzconversion.py create mode 100644 pandas/tests/util/__init__.py create mode 100644 pandas/tests/util/conftest.py create mode 100644 pandas/tests/util/test_assert_almost_equal.py create mode 100644 pandas/tests/util/test_assert_attr_equal.py create mode 100644 pandas/tests/util/test_assert_categorical_equal.py create mode 100644 pandas/tests/util/test_assert_extension_array_equal.py create mode 100644 pandas/tests/util/test_assert_frame_equal.py create mode 100644 pandas/tests/util/test_assert_index_equal.py create mode 100644 pandas/tests/util/test_assert_interval_array_equal.py create mode 100644 pandas/tests/util/test_assert_numpy_array_equal.py create mode 100644 pandas/tests/util/test_assert_produces_warning.py create mode 100644 pandas/tests/util/test_assert_series_equal.py create mode 100644 pandas/tests/util/test_deprecate.py create mode 100644 pandas/tests/util/test_deprecate_kwarg.py create mode 100644 pandas/tests/util/test_deprecate_nonkeyword_arguments.py create mode 100644 pandas/tests/util/test_doc.py create mode 100644 pandas/tests/util/test_hashing.py create mode 100644 pandas/tests/util/test_make_objects.py create mode 100644 pandas/tests/util/test_numba.py create mode 100644 pandas/tests/util/test_rewrite_warning.py create mode 100644 pandas/tests/util/test_safe_import.py create mode 100644 pandas/tests/util/test_shares_memory.py create mode 100644 pandas/tests/util/test_show_versions.py create mode 100644 pandas/tests/util/test_util.py create mode 100644 pandas/tests/util/test_validate_args.py create mode 100644 pandas/tests/util/test_validate_args_and_kwargs.py create mode 100644 pandas/tests/util/test_validate_inclusive.py create mode 100644 pandas/tests/util/test_validate_kwargs.py create mode 100644 pandas/tests/window/__init__.py create mode 100644 pandas/tests/window/conftest.py create mode 100644 pandas/tests/window/moments/__init__.py create mode 100644 pandas/tests/window/moments/conftest.py create mode 100644 pandas/tests/window/moments/test_moments_consistency_ewm.py create mode 100644 pandas/tests/window/moments/test_moments_consistency_expanding.py create mode 100644 pandas/tests/window/moments/test_moments_consistency_rolling.py create mode 100644 pandas/tests/window/test_api.py create mode 100644 pandas/tests/window/test_apply.py create mode 100644 pandas/tests/window/test_base_indexer.py create mode 100644 pandas/tests/window/test_cython_aggregations.py create mode 100644 pandas/tests/window/test_dtypes.py create mode 100644 pandas/tests/window/test_ewm.py create mode 100644 pandas/tests/window/test_expanding.py create mode 100644 pandas/tests/window/test_groupby.py create mode 100644 pandas/tests/window/test_numba.py create mode 100644 pandas/tests/window/test_online.py create mode 100644 pandas/tests/window/test_pairwise.py create mode 100644 pandas/tests/window/test_rolling.py create mode 100644 pandas/tests/window/test_rolling_functions.py create mode 100644 pandas/tests/window/test_rolling_quantile.py create mode 100644 pandas/tests/window/test_rolling_skew_kurt.py create mode 100644 pandas/tests/window/test_timeseries_window.py create mode 100644 pandas/tests/window/test_win_type.py create mode 100644 pandas/tseries/__init__.py create mode 100644 pandas/tseries/api.py create mode 100644 pandas/tseries/frequencies.py create mode 100644 pandas/tseries/holiday.py create mode 100644 pandas/tseries/offsets.py create mode 100644 pandas/util/__init__.py create mode 100644 pandas/util/_decorators.py create mode 100644 pandas/util/_doctools.py create mode 100644 pandas/util/_exceptions.py create mode 100644 pandas/util/_print_versions.py create mode 100644 pandas/util/_test_decorators.py create mode 100644 pandas/util/_tester.py create mode 100644 pandas/util/_validators.py create mode 100644 pandas/util/testing.py create mode 100644 pandas/util/version/__init__.py create mode 100644 pyproject.toml create mode 100644 pyright_reportGeneralTypeIssues.json create mode 100644 requirements-dev.txt create mode 100644 scripts/__init__.py create mode 100755 scripts/generate_pip_deps_from_conda.py create mode 100755 scripts/list_future_warnings.sh create mode 100644 scripts/no_bool_in_generic.py create mode 100644 scripts/pandas_errors_documented.py create mode 100644 scripts/run_stubtest.py create mode 100644 scripts/run_vulture.py create mode 100644 scripts/sync_flake8_versions.py create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/conftest.py create mode 100644 scripts/tests/test_no_bool_in_generic.py create mode 100644 scripts/tests/test_sync_flake8_versions.py create mode 100644 scripts/tests/test_use_io_common_urlopen.py create mode 100644 scripts/tests/test_use_pd_array_in_core.py create mode 100644 scripts/tests/test_validate_docstrings.py create mode 100644 scripts/use_io_common_urlopen.py create mode 100644 scripts/use_pd_array_in_core.py create mode 100755 scripts/validate_docstrings.py create mode 100755 scripts/validate_min_versions_in_sync.py create mode 100755 scripts/validate_rst_title_capitalization.py create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 test_fast.bat create mode 100755 test_fast.sh create mode 100644 typings/numba.pyi create mode 100644 versioneer.py create mode 100644 web/README.md create mode 100644 web/interactive_terminal/README.md create mode 100644 web/interactive_terminal/jupyter-lite.json create mode 100644 web/interactive_terminal/jupyter_lite_config.json create mode 100644 web/pandas/_templates/layout.html create mode 100644 web/pandas/about/citing.md create mode 100644 web/pandas/about/governance.md create mode 100644 web/pandas/about/index.md create mode 100644 web/pandas/about/roadmap.md create mode 100644 web/pandas/about/sponsors.md create mode 100644 web/pandas/about/team.md create mode 100644 web/pandas/community/blog/2019-user-survey.md create mode 100644 web/pandas/community/blog/asv-pandas-grant.md create mode 100644 web/pandas/community/blog/extension-arrays.md create mode 100644 web/pandas/community/blog/index.html create mode 100644 web/pandas/community/blog/pandas-1.0.md create mode 100644 web/pandas/community/coc.md create mode 100644 web/pandas/community/ecosystem.md create mode 100644 web/pandas/config.yml create mode 100644 web/pandas/contribute.md create mode 100644 web/pandas/donate.md create mode 100644 web/pandas/getting_started.md create mode 100644 web/pandas/index.html create mode 100644 web/pandas/pdeps/0001-purpose-and-guidelines.md create mode 100644 web/pandas/static/css/pandas.css create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_13_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_18_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_20_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_22_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_24_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_26_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_31_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_33_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_4_0.png create mode 100644 web/pandas/static/img/blog/2019-user-survey/2019_5_0.png create mode 100644 web/pandas/static/img/favicon.ico create mode 100644 web/pandas/static/img/favicon_white.ico create mode 100644 web/pandas/static/img/install/anaconda_prompt.png create mode 100644 web/pandas/static/img/install/jupyterlab_home.png create mode 100644 web/pandas/static/img/install/pandas_import_and_version.png create mode 100644 web/pandas/static/img/pandas.svg create mode 100644 web/pandas/static/img/pandas_mark.svg create mode 100644 web/pandas/static/img/pandas_mark_white.svg create mode 100644 web/pandas/static/img/pandas_secondary.svg create mode 100644 web/pandas/static/img/pandas_secondary_white.svg create mode 100644 web/pandas/static/img/pandas_white.svg create mode 100644 web/pandas/static/img/partners/bodo.svg create mode 100644 web/pandas/static/img/partners/czi.svg create mode 100644 web/pandas/static/img/partners/dfine.svg create mode 100644 web/pandas/static/img/partners/numfocus.svg create mode 100644 web/pandas/static/img/partners/nvidia.svg create mode 100644 web/pandas/static/img/partners/quansight_labs.svg create mode 100644 web/pandas/static/img/partners/tidelift.svg create mode 100644 web/pandas/static/img/partners/two_sigma.svg create mode 100644 web/pandas/static/img/partners/voltron_data.svg create mode 100644 web/pandas/static/img/pydata_book.gif create mode 100644 web/pandas/try.md create mode 100644 web/pandas/versions.json create mode 100755 web/pandas_web.py diff --git a/.devcontainer.json b/.devcontainer.json new file mode 100644 index 00000000..7c5d0092 --- /dev/null +++ b/.devcontainer.json @@ -0,0 +1,29 @@ +// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at +// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda +{ + "name": "pandas", + "context": ".", + "dockerFile": "Dockerfile", + + // Use 'settings' to set *default* container specific settings.json values on container create. + // You can edit these settings after create using File > Preferences > Settings > Remote. + "settings": { + "terminal.integrated.shell.linux": "/bin/bash", + "python.pythonPath": "/usr/local/bin/python", + "python.formatting.provider": "black", + "python.linting.enabled": true, + "python.linting.flake8Enabled": true, + "python.linting.pylintEnabled": false, + "python.linting.mypyEnabled": true, + "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": [ + "pandas" + ] + }, + + // Add the IDs of extensions you want installed when the container is created in the array below. + "extensions": [ + "ms-python.python", + "ms-vscode.cpptools" + ] +} diff --git a/.pep8speaks.yml b/.pep8speaks.yml new file mode 100644 index 00000000..5a83727d --- /dev/null +++ b/.pep8speaks.yml @@ -0,0 +1,4 @@ +# File : .pep8speaks.yml + +scanner: + diff_only: True # If True, errors caused by only the patch are shown diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..2ca5b5c9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,254 @@ +minimum_pre_commit_version: 2.15.0 +exclude: ^LICENSES/|\.(html|csv|svg)$ +# reserve "manual" for mypy and pyright +default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] +ci: + autofix_prs: false +repos: +- repo: https://github.com/MarcoGorelli/absolufy-imports + rev: v0.3.1 + hooks: + - id: absolufy-imports + files: ^pandas/ +- repo: https://github.com/jendrikseipp/vulture + rev: 'v2.5' + hooks: + - id: vulture + entry: python scripts/run_vulture.py + pass_filenames: true + require_serial: false +- repo: https://github.com/python/black + rev: 22.6.0 + hooks: + - id: black +- repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + types_or: [python, rst, markdown] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: debug-statements + - id: end-of-file-fixer + exclude: \.txt$ + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] + - id: trailing-whitespace + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] +- repo: https://github.com/cpplint/cpplint + rev: 1.6.0 + hooks: + - id: cpplint + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib). However, + # we can lint all header files since they aren't "generated" like C files are. + exclude: ^pandas/_libs/src/(klib|headers)/ + args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir'] +- repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + additional_dependencies: &flake8_dependencies + - flake8==5.0.4 + - flake8-bugbear==22.7.1 + - pandas-dev-flaker==0.5.0 +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.3 + hooks: + - id: pyupgrade + args: [--py38-plus] +- repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.9.0 + hooks: + - id: rst-backticks + - id: rst-directive-colons + types: [text] # overwrite types: [rst] + types_or: [python, rst] + - id: rst-inline-touching-normal + types: [text] # overwrite types: [rst] + types_or: [python, rst] +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.6.1 + hooks: + - id: sphinx-lint +- repo: https://github.com/asottile/yesqa + rev: v1.3.0 + hooks: + - id: yesqa + additional_dependencies: *flake8_dependencies +- repo: local + hooks: + - id: pyright + # note: assumes python env is setup and activated + name: pyright + entry: pyright + language: node + pass_filenames: false + types: [python] + stages: [manual] + additional_dependencies: &pyright_dependencies + - pyright@1.1.264 + - id: pyright_reportGeneralTypeIssues + # note: assumes python env is setup and activated + name: pyright reportGeneralTypeIssues + entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json + language: node + pass_filenames: false + types: [python] + stages: [manual] + additional_dependencies: *pyright_dependencies + - id: mypy + # note: assumes python env is setup and activated + name: mypy + entry: mypy + language: system + pass_filenames: false + types: [python] + stages: [manual] + - id: stubtest + # note: assumes python env is setup and activated + # note: requires pandas dev to be installed + name: mypy (stubtest) + entry: python + language: system + pass_filenames: false + types: [pyi] + args: [scripts/run_stubtest.py] + stages: [manual] + - id: flake8-rst + name: flake8-rst + description: Run flake8 on code snippets in docstrings or RST files + language: python + entry: flake8-rst + types: [rst] + args: [--filename=*.rst] + additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9] + - id: unwanted-patterns + name: Unwanted patterns + language: pygrep + entry: | + (?x) + # outdated annotation syntax, missing error codes + \#\ type:\ (?!ignore) + |\#\ type:\s?ignore(?!\[) + + # Incorrect code-block / IPython directives + |\.\.\ code-block\ :: + |\.\.\ ipython\ :: + # directive should not have a space before :: + |\.\.\ \w+\ :: + + # Check for deprecated messages without sphinx directive + |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.) + types_or: [python, cython, rst] + - id: cython-casting + name: Check Cython casting is `obj`, not ` obj` + language: pygrep + entry: '[a-zA-Z0-9*]> ' + files: (\.pyx|\.pxi.in)$ + - id: incorrect-backticks + name: Check for backticks incorrectly rendering because of missing spaces + language: pygrep + entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]' + types: [rst] + files: ^doc/source/ + - id: seed-check-asv + name: Check for unnecessary random seeds in asv benchmarks + language: pygrep + entry: 'np\.random\.seed' + files: ^asv_bench/benchmarks + exclude: ^asv_bench/benchmarks/pandas_vb_common\.py + - id: np-testing-array-equal + name: Check for usage of numpy testing or array_equal + language: pygrep + entry: '(numpy|np)(\.testing|\.array_equal)' + files: ^pandas/tests/ + types: [python] + - id: invalid-ea-testing + name: Check for invalid EA testing + language: pygrep + entry: 'tm\.assert_(series|frame)_equal' + files: ^pandas/tests/extension/base + types: [python] + exclude: ^pandas/tests/extension/base/base\.py + - id: pip-to-conda + name: Generate pip dependency from conda + description: This hook checks if the conda environment.yml and requirements-dev.txt are equal + language: python + entry: python scripts/generate_pip_deps_from_conda.py + files: ^(environment.yml|requirements-dev.txt)$ + pass_filenames: false + additional_dependencies: [pyyaml, toml] + - id: sync-flake8-versions + name: Check flake8 version is synced across flake8, yesqa, and environment.yml + language: python + entry: python scripts/sync_flake8_versions.py + files: ^(\.pre-commit-config\.yaml|environment\.yml)$ + pass_filenames: false + additional_dependencies: [pyyaml] + - id: title-capitalization + name: Validate correct capitalization among titles in documentation + entry: python scripts/validate_rst_title_capitalization.py + language: python + types: [rst] + files: ^doc/source/(development|reference)/ + - id: use-pd_array-in-core + name: Import pandas.array as pd_array in core + language: python + entry: python scripts/use_pd_array_in_core.py + files: ^pandas/core/ + exclude: ^pandas/core/api\.py$ + types: [python] + - id: use-io-common-urlopen + name: Use pandas.io.common.urlopen instead of urllib.request.urlopen + language: python + entry: python scripts/use_io_common_urlopen.py + files: ^pandas/ + exclude: ^pandas/tests/ + types: [python] + - id: no-bool-in-core-generic + name: Use bool_t instead of bool in pandas/core/generic.py + entry: python scripts/no_bool_in_generic.py + language: python + files: ^pandas/core/generic\.py$ + - id: pandas-errors-documented + name: Ensure pandas errors are documented in doc/source/reference/testing.rst + entry: python scripts/pandas_errors_documented.py + language: python + files: ^pandas/errors/__init__.py$ + - id: pg8000-not-installed-CI + name: Check for pg8000 not installed on CI for test_pg8000_sqlalchemy_passthrough_error + language: pygrep + entry: 'pg8000' + files: ^ci/deps + types: [yaml] + - id: validate-min-versions-in-sync + name: Check minimum version of dependencies are aligned + entry: python scripts/validate_min_versions_in_sync.py + language: python + files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ + - id: flake8-pyi + name: flake8-pyi + entry: flake8 --extend-ignore=E301,E302,E305,E701,E704 + types: [pyi] + language: python + additional_dependencies: + - flake8==5.0.4 + - flake8-pyi==22.8.1 + - id: future-annotations + name: import annotations from __future__ + entry: 'from __future__ import annotations' + language: pygrep + args: [--negate] + files: ^pandas/ + types: [python] + exclude: | + (?x) + /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$ + |/tests/ + |/_testing/ diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 00000000..84fcfe05 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,56 @@ +About the Copyright Holders +=========================== + +* Copyright (c) 2008-2011 AQR Capital Management, LLC + + AQR Capital Management began pandas development in 2008. Development was + led by Wes McKinney. AQR released the source under this license in 2009. +* Copyright (c) 2011-2012, Lambda Foundry, Inc. + + Wes is now an employee of Lambda Foundry, and remains the pandas project + lead. +* Copyright (c) 2011-2012, PyData Development Team + + The PyData Development Team is the collection of developers of the PyData + project. This includes all of the PyData sub-projects, including pandas. The + core team that coordinates development on GitHub can be found here: + https://github.com/pydata. + +Full credits for pandas contributors can be found in the documentation. + +Our Copyright Policy +==================== + +PyData uses a shared copyright model. Each contributor maintains copyright +over their contributions to PyData. However, it is important to note that +these contributions are typically only changes to the repositories. Thus, +the PyData source code, in its entirety, is not the copyright of any single +person or institution. Instead, it is the collective copyright of the +entire PyData Development Team. If individual contributors want to maintain +a record of what changes/contributions they have specific copyright on, +they should indicate their copyright in the commit message of the change +when they commit the change to one of the PyData repositories. + +With this in mind, the following banner should be used in any source code +file to indicate the copyright and license terms: + +``` +#----------------------------------------------------------------------------- +# Copyright (c) 2012, PyData Development Team +# All rights reserved. +# +# Distributed under the terms of the BSD Simplified License. +# +# The full license is in the LICENSE file, distributed with this software. +#----------------------------------------------------------------------------- +``` + +Other licenses can be found in the LICENSES directory. + +License +======= + +pandas is distributed under a 3-clause ("Simplified" or "New") BSD +license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have +BSD-compatible licenses, are included. Their licenses follow the pandas +license. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..0161dfa9 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,10 @@ +cff-version: 1.2.0 +title: 'pandas-dev/pandas: Pandas' +message: 'If you use this software, please cite it as below.' +authors: + - name: "The pandas development team" +license: BSD-3-Clause +license-url: "https://github.com/pandas-dev/pandas/blob/main/LICENSE" +repository-code: "https://github.com/pandas-dev/pandas" +type: software +url: "https://github.com/pandas-dev/pandas" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..7230dcab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.10.8 +WORKDIR /home/pandas + +RUN apt-get update && apt-get -y upgrade +RUN apt-get install -y build-essential + +# hdf5 needed for pytables installation +RUN apt-get install -y libhdf5-dev + +RUN python -m pip install --upgrade pip +RUN python -m pip install \ + -r https://raw.githubusercontent.com/pandas-dev/pandas/main/requirements-dev.txt +CMD ["/bin/bash"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..d4e49a14 --- /dev/null +++ b/LICENSE @@ -0,0 +1,31 @@ +BSD 3-Clause License + +Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2011-2022, Open source contributors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/DATEUTIL_LICENSE b/LICENSES/DATEUTIL_LICENSE new file mode 100644 index 00000000..6053d35c --- /dev/null +++ b/LICENSES/DATEUTIL_LICENSE @@ -0,0 +1,54 @@ +Copyright 2017- Paul Ganssle +Copyright 2017- dateutil contributors (see AUTHORS file) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +The above license applies to all contributions after 2017-12-01, as well as +all contributions that have been re-licensed (see AUTHORS file for the list of +contributors who have re-licensed their code). +-------------------------------------------------------------------------------- +dateutil - Extensions to the standard Python datetime module. + +Copyright (c) 2003-2011 - Gustavo Niemeyer +Copyright (c) 2012-2014 - Tomi Pieviläinen +Copyright (c) 2014-2016 - Yaron de Leeuw +Copyright (c) 2015- - Paul Ganssle +Copyright (c) 2015- - dateutil contributors (see AUTHORS file) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The above BSD License Applies to all code, even that also covered by Apache 2.0. diff --git a/LICENSES/HAVEN_LICENSE b/LICENSES/HAVEN_LICENSE new file mode 100644 index 00000000..ce1b07b7 --- /dev/null +++ b/LICENSES/HAVEN_LICENSE @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2019 Hadley Wickham; RStudio; and Evan Miller + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSES/HAVEN_MIT b/LICENSES/HAVEN_MIT new file mode 100644 index 00000000..b03d0e64 --- /dev/null +++ b/LICENSES/HAVEN_MIT @@ -0,0 +1,32 @@ +Based on http://opensource.org/licenses/MIT + +This is a template. Complete and ship as file LICENSE the following 2 +lines (only) + +YEAR: +COPYRIGHT HOLDER: + +and specify as + +License: MIT + file LICENSE + +Copyright (c) , + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSES/KLIB_LICENSE b/LICENSES/KLIB_LICENSE new file mode 100644 index 00000000..0a996fae --- /dev/null +++ b/LICENSES/KLIB_LICENSE @@ -0,0 +1,23 @@ +The MIT License + +Copyright (c) 2008- Attractive Chaos + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSES/MUSL_LICENSE b/LICENSES/MUSL_LICENSE new file mode 100644 index 00000000..a8833d4b --- /dev/null +++ b/LICENSES/MUSL_LICENSE @@ -0,0 +1,132 @@ +musl as a whole is licensed under the following standard MIT license: + +---------------------------------------------------------------------- +Copyright © 2005-2014 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +---------------------------------------------------------------------- + +Authors/contributors include: + +Anthony G. Basile +Arvid Picciani +Bobby Bingham +Boris Brezillon +Brent Cook +Chris Spiegel +Clément Vasseur +Emil Renner Berthing +Hiltjo Posthuma +Isaac Dunham +Jens Gustedt +Jeremy Huntwork +John Spencer +Justin Cormack +Luca Barbato +Luka Perkov +M Farkas-Dyck (Strake) +Michael Forney +Nicholas J. Kain +orc +Pascal Cuoq +Pierre Carrier +Rich Felker +Richard Pennington +sin +Solar Designer +Stefan Kristiansson +Szabolcs Nagy +Timo Teräs +Valentin Ochs +William Haddon + +Portions of this software are derived from third-party works licensed +under terms compatible with the above MIT license: + +The TRE regular expression implementation (src/regex/reg* and +src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +under a 2-clause BSD license (license text in the source files). The +included version has been heavily modified by Rich Felker in 2012, in +the interests of size, simplicity, and namespace cleanliness. + +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. + +The ARM memcpy code (src/string/armel/memcpy.s) is Copyright © 2008 +The Android Open Source Project and is licensed under a two-clause BSD +license. It was taken from Bionic libc, used on Android. + +The implementation of DES for crypt (src/misc/crypt_des.c) is +Copyright © 1994 David Burren. It is licensed under a BSD license. + +The implementation of blowfish crypt (src/misc/crypt_blowfish.c) was +originally written by Solar Designer and placed into the public +domain. The code also comes with a fallback permissive license for use +in jurisdictions that may not recognize the public domain. + +The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +Valentin Ochs and is licensed under an MIT-style license. + +The BSD PRNG implementation (src/prng/random.c) and XSI search API +(src/search/*.c) functions are Copyright © 2011 Szabolcs Nagy and +licensed under following terms: "Permission to use, copy, modify, +and/or distribute this code for any purpose with or without fee is +hereby granted. There is no warranty." + +The x86_64 port was written by Nicholas J. Kain. Several files (crt) +were released into the public domain; others are licensed under the +standard MIT license terms at the top of this file. See individual +files for their copyright status. + +The mips and microblaze ports were originally written by Richard +Pennington for use in the ellcc project. The original code was adapted +by Rich Felker for build system and code conventions during upstream +integration. It is licensed under the standard MIT terms. + +The powerpc port was also originally written by Richard Pennington, +and later supplemented and integrated by John Spencer. It is licensed +under the standard MIT terms. + +All other files which have no copyright comments are original works +produced specifically for use as part of this library, written either +by Rich Felker, the main author of the library, or by one or more +contibutors listed above. Details on authorship of individual files +can be found in the git version control history of the project. The +omission of copyright and license comments in each file is in the +interest of source tree size. + +All public header files (include/* and arch/*/bits/*) should be +treated as Public Domain as they intentionally contain no content +which can be covered by copyright. Some source modules may fall in +this category as well. If you believe that a file is so trivial that +it should be in the Public Domain, please contact the authors and +request an explicit statement releasing it from copyright. + +The following files are trivial, believed not to be copyrightable in +the first place, and hereby explicitly released to the Public Domain: + +All public headers: include/*, arch/*/bits/* +Startup files: crt/* diff --git a/LICENSES/NUMPY_LICENSE b/LICENSES/NUMPY_LICENSE new file mode 100644 index 00000000..7e972cff --- /dev/null +++ b/LICENSES/NUMPY_LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2005-2011, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/OTHER b/LICENSES/OTHER new file mode 100644 index 00000000..7446d68e --- /dev/null +++ b/LICENSES/OTHER @@ -0,0 +1,75 @@ +Bottleneck license +------------------ + +Copyright (c) 2010-2012 Archipel Asset Management AB. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +google-api-python-client license +-------------------------------- + +Copyright (C) 2012 Google Inc. +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Pyperclip v1.3 license +---------------------- + +Copyright (c) 2010, Albert Sweigart +All rights reserved. + +BSD-style license: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the pyperclip nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY Albert Sweigart "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL Albert Sweigart BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/PACKAGING_LICENSE b/LICENSES/PACKAGING_LICENSE new file mode 100644 index 00000000..4216ea1c --- /dev/null +++ b/LICENSES/PACKAGING_LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +Copyright (c) Donald Stufft and individual contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/PSF_LICENSE b/LICENSES/PSF_LICENSE new file mode 100644 index 00000000..5cdb01e8 --- /dev/null +++ b/LICENSES/PSF_LICENSE @@ -0,0 +1,279 @@ +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations (now Zope +Corporation, see http://www.zope.com). In 2001, the Python Software +Foundation (PSF, see http://www.python.org/psf/) was formed, a +non-profit organization created specifically to own Python-related +Intellectual Property. Zope Corporation is a sponsoring member of +the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.2 2.1.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2.1 2.2 2002 PSF yes + 2.2.2 2.2.1 2002 PSF yes + 2.2.3 2.2.2 2003 PSF yes + 2.3 2.2.2 2002-2003 PSF yes + 2.3.1 2.3 2002-2003 PSF yes + 2.3.2 2.3.1 2002-2003 PSF yes + 2.3.3 2.3.2 2002-2003 PSF yes + 2.3.4 2.3.3 2004 PSF yes + 2.3.5 2.3.4 2005 PSF yes + 2.4 2.3 2004 PSF yes + 2.4.1 2.4 2005 PSF yes + 2.4.2 2.4.1 2005 PSF yes + 2.4.3 2.4.2 2006 PSF yes + 2.4.4 2.4.3 2006 PSF yes + 2.5 2.4 2006 PSF yes + 2.5.1 2.5 2007 PSF yes + 2.5.2 2.5.1 2008 PSF yes + 2.5.3 2.5.2 2008 PSF yes + 2.6 2.5 2008 PSF yes + 2.6.1 2.6 2008 PSF yes + 2.6.2 2.6.1 2009 PSF yes + 2.6.3 2.6.2 2009 PSF yes + 2.6.4 2.6.3 2009 PSF yes + 2.6.5 2.6.4 2010 PSF yes + 2.7 2.6 2010 PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +Python Software Foundation; All Rights Reserved" are retained in Python alone or +in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/LICENSES/PYUPGRADE_LICENSE b/LICENSES/PYUPGRADE_LICENSE new file mode 100644 index 00000000..522fbe20 --- /dev/null +++ b/LICENSES/PYUPGRADE_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2017 Anthony Sottile + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/LICENSES/SAS7BDAT_LICENSE b/LICENSES/SAS7BDAT_LICENSE new file mode 100644 index 00000000..8fbf1940 --- /dev/null +++ b/LICENSES/SAS7BDAT_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Jared Hobbs + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSES/SCIPY_LICENSE b/LICENSES/SCIPY_LICENSE new file mode 100644 index 00000000..d887ce5f --- /dev/null +++ b/LICENSES/SCIPY_LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2001, 2002 Enthought, Inc. +All rights reserved. + +Copyright (c) 2003-2012 SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of Enthought nor the names of the SciPy Developers + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + diff --git a/LICENSES/ULTRAJSON_LICENSE b/LICENSES/ULTRAJSON_LICENSE new file mode 100644 index 00000000..a905fb01 --- /dev/null +++ b/LICENSES/ULTRAJSON_LICENSE @@ -0,0 +1,34 @@ +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 00000000..6bafeb9d --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,195 @@ +Copyright 2014-2019, xarray Developers + +-------------------------------------------------------------------------------- + +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..d2b1b8cb --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,60 @@ +include RELEASE.md +include versioneer.py + +graft doc +prune doc/build + +graft LICENSES + +graft pandas + +global-exclude *.bz2 +global-exclude *.csv +global-exclude *.dta +global-exclude *.feather +global-exclude *.tar +global-exclude *.gz +global-exclude *.h5 +global-exclude *.html +global-exclude *.json +global-exclude *.jsonl +global-exclude *.msgpack +global-exclude *.pdf +global-exclude *.pickle +global-exclude *.png +global-exclude *.pptx +global-exclude *.ods +global-exclude *.odt +global-exclude *.orc +global-exclude *.sas7bdat +global-exclude *.sav +global-exclude *.so +global-exclude *.xls +global-exclude *.xlsb +global-exclude *.xlsm +global-exclude *.xlsx +global-exclude *.xpt +global-exclude *.cpt +global-exclude *.xz +global-exclude *.zip +global-exclude *.zst +global-exclude *~ +global-exclude .DS_Store +global-exclude .git* +global-exclude \#* + +global-exclude *.c +global-exclude *.cpp +global-exclude *.h + +global-exclude *.py[ocd] +global-exclude *.pxi + +# GH 39321 +# csv_dir_path fixture checks the existence of the directory +# exclude the whole directory to avoid running related tests in sdist +prune pandas/tests/io/parser/data + +# Selectively re-add *.cxx files that were excluded above +graft pandas/_libs/src +graft pandas/_libs/tslibs/src diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..c0aa685e --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +.PHONY : develop build clean clean_pyc doc lint-diff black test-scripts + +all: develop + +clean: + -python setup.py clean + +clean_pyc: + -find . -name '*.py[co]' -exec rm {} \; + +build: clean_pyc + python setup.py build_ext + +lint-diff: + git diff upstream/main --name-only -- "*.py" | xargs flake8 + +black: + black . + +develop: build + python -m pip install --no-build-isolation -e . + +doc: + -rm -rf doc/build doc/source/generated + cd doc; \ + python make.py clean; \ + python make.py html + +test-scripts: + pytest scripts diff --git a/README.md b/README.md new file mode 100644 index 00000000..aaf63ead --- /dev/null +++ b/README.md @@ -0,0 +1,172 @@ +
+
+
+ +----------------- + +# pandas: powerful Python data analysis toolkit +[![PyPI Latest Release](https://img.shields.io/pypi/v/pandas.svg)](https://pypi.org/project/pandas/) +[![Conda Latest Release](https://anaconda.org/conda-forge/pandas/badges/version.svg)](https://anaconda.org/anaconda/pandas/) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3509134.svg)](https://doi.org/10.5281/zenodo.3509134) +[![Package Status](https://img.shields.io/pypi/status/pandas.svg)](https://pypi.org/project/pandas/) +[![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/main/LICENSE) +[![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=main)](https://codecov.io/gh/pandas-dev/pandas) +[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas) +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas) +[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) + +## What is it? + +**pandas** is a Python package that provides fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way towards this goal. + +## Main Features +Here are just a few of the things that pandas does well: + + - Easy handling of [**missing data**][missing-data] (represented as + `NaN`, `NA`, or `NaT`) in floating point as well as non-floating point data + - Size mutability: columns can be [**inserted and + deleted**][insertion-deletion] from DataFrame and higher dimensional + objects + - Automatic and explicit [**data alignment**][alignment]: objects can + be explicitly aligned to a set of labels, or the user can simply + ignore the labels and let `Series`, `DataFrame`, etc. automatically + align the data for you in computations + - Powerful, flexible [**group by**][groupby] functionality to perform + split-apply-combine operations on data sets, for both aggregating + and transforming data + - Make it [**easy to convert**][conversion] ragged, + differently-indexed data in other Python and NumPy data structures + into DataFrame objects + - Intelligent label-based [**slicing**][slicing], [**fancy + indexing**][fancy-indexing], and [**subsetting**][subsetting] of + large data sets + - Intuitive [**merging**][merging] and [**joining**][joining] data + sets + - Flexible [**reshaping**][reshape] and [**pivoting**][pivot-table] of + data sets + - [**Hierarchical**][mi] labeling of axes (possible to have multiple + labels per tick) + - Robust IO tools for loading data from [**flat files**][flat-files] + (CSV and delimited), [**Excel files**][excel], [**databases**][db], + and saving/loading data from the ultrafast [**HDF5 format**][hdfstore] + - [**Time series**][timeseries]-specific functionality: date range + generation and frequency conversion, moving window statistics, + date shifting and lagging + + + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html + [mi]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-series-date-functionality + +## Where to get it +The source code is currently hosted on GitHub at: +https://github.com/pandas-dev/pandas + +Binary installers for the latest released version are available at the [Python +Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/). + +```sh +# conda +conda install pandas +``` + +```sh +# or PyPI +pip install pandas +``` + +## Dependencies +- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org) +- [python-dateutil - Provides powerful extensions to the standard datetime module](https://dateutil.readthedocs.io/en/stable/index.html) +- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://github.com/stub42/pytz) + +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies. + +## Installation from sources +To install pandas from source you need [Cython](https://cython.org/) in addition to the normal +dependencies above. Cython can be installed from PyPI: + +```sh +pip install cython +``` + +In the `pandas` directory (same one where you found this file after +cloning the git repo), execute: + +```sh +python setup.py install +``` + +or for installing in [development mode](https://pip.pypa.io/en/latest/cli/pip_install/#install-editable): + + +```sh +python -m pip install -e . --no-build-isolation --no-use-pep517 +``` + +If you have `make`, you can also use `make develop` to run the same command. + +or alternatively + +```sh +python setup.py develop +``` + +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-from-source). + +## License +[BSD 3](LICENSE) + +## Documentation +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable + +## Background +Work on ``pandas`` started at [AQR](https://www.aqr.com/) (a quantitative hedge fund) in 2008 and +has been under active development since then. + +## Getting Help + +For usage questions, the best place to go to is [StackOverflow](https://stackoverflow.com/questions/tagged/pandas). +Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata). + +## Discussion and Development +Most development discussions take place on GitHub in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions. + +## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas) + +All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome. + +A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. + +If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out. + +You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas). + +Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! + +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). + +As contributors and maintainers to this project, you are expected to abide by pandas' code of conduct. More information can be found at: [Contributor Code of Conduct](https://github.com/pandas-dev/.github/blob/master/CODE_OF_CONDUCT.md) diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..344a097a --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,6 @@ +Release Notes +============= + +The list of changes to pandas between each release can be found +[here](https://pandas.pydata.org/pandas-docs/stable/whatsnew/index.html). For full +details, see the commit logs at https://github.com/pandas-dev/pandas. diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json new file mode 100644 index 00000000..b1ea2682 --- /dev/null +++ b/asv_bench/asv.conf.json @@ -0,0 +1,131 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "pandas", + + // The project's homepage + "project_url": "https://pandas.pydata.org/", + + // The URL of the source code repository for the project being + // benchmarked + "repo": "..", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/pandas-dev/pandas/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.8"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + "matrix": { + "numpy": [], + "Cython": ["0.29.32"], + "matplotlib": [], + "sqlalchemy": [], + "scipy": [], + "numba": [], + "numexpr": [], + "pytables": [null, ""], // platform dependent, see excludes below + "pyarrow": [], + "tables": [null, ""], + "openpyxl": [], + "xlsxwriter": [], + "xlrd": [], + "xlwt": [], + "odfpy": [], + "jinja2": [], + }, + "conda_channels": ["defaults", "conda-forge"], + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + "exclude": [ + // On conda install pytables, otherwise tables + {"environment_type": "conda", "tables": ""}, + {"environment_type": "conda", "pytables": null}, + {"environment_type": "(?!conda).*", "tables": null}, + {"environment_type": "(?!conda).*", "pytables": ""}, + ], + "include": [], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + // "env_dir": "env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + // "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + // "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + "regressions_first_commits": { + ".*": "0409521665" + }, + "regression_thresholds": { + }, + "build_command": + ["python setup.py build -j4", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"], +} diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py new file mode 100644 index 00000000..eada1478 --- /dev/null +++ b/asv_bench/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Pandas benchmarks.""" diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py new file mode 100644 index 00000000..0008a589 --- /dev/null +++ b/asv_bench/benchmarks/algorithms.py @@ -0,0 +1,172 @@ +from importlib import import_module + +import numpy as np + +import pandas as pd + +from .pandas_vb_common import tm + +for imp in ["pandas.util", "pandas.tools.hashing"]: + try: + hashing = import_module(imp) + break + except (ImportError, TypeError, ValueError): + pass + + +class Factorize: + + params = [ + [True, False], + [True, False], + [ + "int", + "uint", + "float", + "object", + "datetime64[ns]", + "datetime64[ns, tz]", + "Int64", + "boolean", + "string[pyarrow]", + ], + ] + param_names = ["unique", "sort", "dtype"] + + def setup(self, unique, sort, dtype): + N = 10**5 + string_index = tm.makeStringIndex(N) + string_arrow = None + if dtype == "string[pyarrow]": + try: + string_arrow = pd.array(string_index, dtype="string[pyarrow]") + except ImportError: + raise NotImplementedError + + data = { + "int": pd.Index(np.arange(N), dtype="int64"), + "uint": pd.Index(np.arange(N), dtype="uint64"), + "float": pd.Index(np.random.randn(N), dtype="float64"), + "object": string_index, + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + "Int64": pd.array(np.arange(N), dtype="Int64"), + "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"), + "string[pyarrow]": string_arrow, + }[dtype] + if not unique: + data = data.repeat(5) + self.data = data + + def time_factorize(self, unique, sort, dtype): + pd.factorize(self.data, sort=sort) + + +class Duplicated: + + params = [ + [True, False], + ["first", "last", False], + ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"], + ] + param_names = ["unique", "keep", "dtype"] + + def setup(self, unique, keep, dtype): + N = 10**5 + data = { + "int": pd.Index(np.arange(N), dtype="int64"), + "uint": pd.Index(np.arange(N), dtype="uint64"), + "float": pd.Index(np.random.randn(N), dtype="float64"), + "string": tm.makeStringIndex(N), + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + }[dtype] + if not unique: + data = data.repeat(5) + self.idx = data + # cache is_unique + self.idx.is_unique + + def time_duplicated(self, unique, keep, dtype): + self.idx.duplicated(keep=keep) + + +class Hashing: + def setup_cache(self): + N = 10**5 + + df = pd.DataFrame( + { + "strings": pd.Series( + tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N)) + ), + "floats": np.random.randn(N), + "ints": np.arange(N), + "dates": pd.date_range("20110101", freq="s", periods=N), + "timedeltas": pd.timedelta_range("1 day", freq="s", periods=N), + } + ) + df["categories"] = df["strings"].astype("category") + df.iloc[10:20] = np.nan + return df + + def time_frame(self, df): + hashing.hash_pandas_object(df) + + def time_series_int(self, df): + hashing.hash_pandas_object(df["ints"]) + + def time_series_string(self, df): + hashing.hash_pandas_object(df["strings"]) + + def time_series_float(self, df): + hashing.hash_pandas_object(df["floats"]) + + def time_series_categorical(self, df): + hashing.hash_pandas_object(df["categories"]) + + def time_series_timedeltas(self, df): + hashing.hash_pandas_object(df["timedeltas"]) + + def time_series_dates(self, df): + hashing.hash_pandas_object(df["dates"]) + + +class Quantile: + params = [ + [0, 0.5, 1], + ["linear", "nearest", "lower", "higher", "midpoint"], + ["float", "int", "uint"], + ] + param_names = ["quantile", "interpolation", "dtype"] + + def setup(self, quantile, interpolation, dtype): + N = 10**5 + data = { + "int": np.arange(N), + "uint": np.arange(N).astype(np.uint64), + "float": np.random.randn(N), + } + self.idx = pd.Series(data[dtype].repeat(5)) + + def time_quantile(self, quantile, interpolation, dtype): + self.idx.quantile(quantile, interpolation=interpolation) + + +class SortIntegerArray: + params = [10**3, 10**5] + + def setup(self, N): + data = np.arange(N, dtype=float) + data[40] = np.nan + self.array = pd.array(data, dtype="Int64") + + def time_argsort(self, N): + self.array.argsort() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/algos/__init__.py b/asv_bench/benchmarks/algos/__init__.py new file mode 100644 index 00000000..97c9ab09 --- /dev/null +++ b/asv_bench/benchmarks/algos/__init__.py @@ -0,0 +1,12 @@ +""" +algos/ directory is intended for individual functions from core.algorithms + +In many cases these algorithms are reachable in multiple ways: + algos.foo(x, y) + Series(x).foo(y) + Index(x).foo(y) + pd.array(x).foo(y) + +In most cases we profile the Series variant directly, trusting the performance +of the others to be highly correlated. +""" diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py new file mode 100644 index 00000000..16d90b9d --- /dev/null +++ b/asv_bench/benchmarks/algos/isin.py @@ -0,0 +1,342 @@ +import numpy as np + +from pandas import ( + Categorical, + Index, + NaT, + Series, + date_range, +) + +from ..pandas_vb_common import tm + + +class IsIn: + + params = [ + "int64", + "uint64", + "object", + "Int64", + "boolean", + "bool", + "datetime64[ns]", + "category[object]", + "category[int]", + "str", + "string[python]", + "string[pyarrow]", + ] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10000 + + self.mismatched = [NaT.to_datetime64()] * 2 + + if dtype in ["boolean", "bool"]: + self.series = Series(np.random.randint(0, 2, N)).astype(dtype) + self.values = [True, False] + + elif dtype == "datetime64[ns]": + # Note: values here is much larger than non-dt64ns cases + + # dti has length=115777 + dti = date_range(start="2015-10-26", end="2016-01-01", freq="50s") + self.series = Series(dti) + self.values = self.series._values[::3] + self.mismatched = [1, 2] + + elif dtype in ["category[object]", "category[int]"]: + # Note: sizes are different in this case than others + n = 5 * 10**5 + sample_size = 100 + + arr = list(np.random.randint(0, n // 10, size=n)) + if dtype == "category[object]": + arr = [f"s{i:04d}" for i in arr] + + self.values = np.random.choice(arr, sample_size) + self.series = Series(arr).astype("category") + + elif dtype in ["str", "string[python]", "string[pyarrow]"]: + try: + self.series = Series(tm.makeStringIndex(N), dtype=dtype) + except ImportError: + raise NotImplementedError + self.values = list(self.series[:2]) + + else: + self.series = Series(np.random.randint(1, 10, N)).astype(dtype) + self.values = [1, 2] + + self.cat_values = Categorical(self.values) + + def time_isin(self, dtype): + self.series.isin(self.values) + + def time_isin_categorical(self, dtype): + self.series.isin(self.cat_values) + + def time_isin_empty(self, dtype): + self.series.isin([]) + + def time_isin_mismatched_dtype(self, dtype): + self.series.isin(self.mismatched) + + +class IsinAlmostFullWithRandomInt: + params = [ + [np.float64, np.int64, np.uint64, np.object_], + range(10, 21), + ["inside", "outside"], + ] + param_names = ["dtype", "exponent", "title"] + + def setup(self, dtype, exponent, title): + M = 3 * 2 ** (exponent - 2) + # 0.77-the maximal share of occupied buckets + self.series = Series(np.random.randint(0, M, M)).astype(dtype) + + values = np.random.randint(0, M, M).astype(dtype) + if title == "inside": + self.values = values + elif title == "outside": + self.values = values + M + else: + raise ValueError(title) + + def time_isin(self, dtype, exponent, title): + self.series.isin(self.values) + + +class IsinWithRandomFloat: + params = [ + [np.float64, np.object_], + [ + 1_300, + 2_000, + 7_000, + 8_000, + 70_000, + 80_000, + 750_000, + 900_000, + ], + ["inside", "outside"], + ] + param_names = ["dtype", "size", "title"] + + def setup(self, dtype, size, title): + self.values = np.random.rand(size) + self.series = Series(self.values).astype(dtype) + np.random.shuffle(self.values) + + if title == "outside": + self.values = self.values + 0.1 + + def time_isin(self, dtype, size, title): + self.series.isin(self.values) + + +class IsinWithArangeSorted: + params = [ + [np.float64, np.int64, np.uint64, np.object_], + [ + 1_000, + 2_000, + 8_000, + 100_000, + 1_000_000, + ], + ] + param_names = ["dtype", "size"] + + def setup(self, dtype, size): + self.series = Series(np.arange(size)).astype(dtype) + self.values = np.arange(size).astype(dtype) + + def time_isin(self, dtype, size): + self.series.isin(self.values) + + +class IsinWithArange: + params = [ + [np.float64, np.int64, np.uint64, np.object_], + [ + 1_000, + 2_000, + 8_000, + ], + [-2, 0, 2], + ] + param_names = ["dtype", "M", "offset_factor"] + + def setup(self, dtype, M, offset_factor): + offset = int(M * offset_factor) + tmp = Series(np.random.randint(offset, M + offset, 10**6)) + self.series = tmp.astype(dtype) + self.values = np.arange(M).astype(dtype) + + def time_isin(self, dtype, M, offset_factor): + self.series.isin(self.values) + + +class IsInFloat64: + + params = [ + [np.float64, "Float64"], + ["many_different_values", "few_different_values", "only_nans_values"], + ] + param_names = ["dtype", "title"] + + def setup(self, dtype, title): + N_many = 10**5 + N_few = 10**6 + self.series = Series([1, 2], dtype=dtype) + + if title == "many_different_values": + # runtime is dominated by creation of the lookup-table + self.values = np.arange(N_many, dtype=np.float64) + elif title == "few_different_values": + # runtime is dominated by creation of the lookup-table + self.values = np.zeros(N_few, dtype=np.float64) + elif title == "only_nans_values": + # runtime is dominated by creation of the lookup-table + self.values = np.full(N_few, np.nan, dtype=np.float64) + else: + raise ValueError(title) + + def time_isin(self, dtype, title): + self.series.isin(self.values) + + +class IsInForObjects: + """ + A subset of the cartesian product of cases have special motivations: + + "nans" x "nans" + if nan-objects are different objects, + this has the potential to trigger O(n^2) running time + + "short" x "long" + running time dominated by the preprocessing + + "long" x "short" + running time dominated by look-up + + "long" x "long" + no dominating part + + "long_floats" x "long_floats" + because of nans floats are special + no dominating part + + """ + + variants = ["nans", "short", "long", "long_floats"] + + params = [variants, variants] + param_names = ["series_type", "vals_type"] + + def setup(self, series_type, vals_type): + N_many = 10**5 + + if series_type == "nans": + ser_vals = np.full(10**4, np.nan) + elif series_type == "short": + ser_vals = np.arange(2) + elif series_type == "long": + ser_vals = np.arange(N_many) + elif series_type == "long_floats": + ser_vals = np.arange(N_many, dtype=np.float_) + + self.series = Series(ser_vals).astype(object) + + if vals_type == "nans": + values = np.full(10**4, np.nan) + elif vals_type == "short": + values = np.arange(2) + elif vals_type == "long": + values = np.arange(N_many) + elif vals_type == "long_floats": + values = np.arange(N_many, dtype=np.float_) + + self.values = values.astype(object) + + def time_isin(self, series_type, vals_type): + self.series.isin(self.values) + + +class IsInLongSeriesLookUpDominates: + params = [ + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + [5, 1000], + ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], + ] + param_names = ["dtype", "MaxNumber", "series_type"] + + def setup(self, dtype, MaxNumber, series_type): + N = 10**7 + + if series_type == "random_hits": + array = np.random.randint(0, MaxNumber, N) + if series_type == "random_misses": + array = np.random.randint(0, MaxNumber, N) + MaxNumber + if series_type == "monotone_hits": + array = np.repeat(np.arange(MaxNumber), N // MaxNumber) + if series_type == "monotone_misses": + array = np.arange(N) + MaxNumber + + self.series = Series(array).astype(dtype) + + self.values = np.arange(MaxNumber).astype(dtype.lower()) + + def time_isin(self, dtypes, MaxNumber, series_type): + self.series.isin(self.values) + + +class IsInLongSeriesValuesDominate: + params = [ + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + ["random", "monotone"], + ] + param_names = ["dtype", "series_type"] + + def setup(self, dtype, series_type): + N = 10**7 + + if series_type == "random": + vals = np.random.randint(0, 10 * N, N) + if series_type == "monotone": + vals = np.arange(N) + + self.values = vals.astype(dtype.lower()) + M = 10**6 + 1 + self.series = Series(np.arange(M)).astype(dtype) + + def time_isin(self, dtypes, series_type): + self.series.isin(self.values) + + +class IsInWithLongTupples: + def setup(self): + t = tuple(range(1000)) + self.series = Series([t] * 1000) + self.values = [t] + + def time_isin(self): + self.series.isin(self.values) + + +class IsInIndexes: + def setup(self): + self.range_idx = Index(range(1000)) + self.index = Index(list(range(1000))) + self.series = Series(np.random.randint(100_000, size=1000)) + + def time_isin_range_index(self): + self.series.isin(self.range_idx) + + def time_isin_index(self): + self.series.isin(self.index) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py new file mode 100644 index 00000000..496db66c --- /dev/null +++ b/asv_bench/benchmarks/arithmetic.py @@ -0,0 +1,511 @@ +import operator +import warnings + +import numpy as np + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + to_timedelta, +) +import pandas._testing as tm +from pandas.core.algorithms import checked_add_with_arr + +from .pandas_vb_common import numeric_dtypes + +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr +try: + import pandas.tseries.holiday +except ImportError: + pass + + +class IntFrameWithScalar: + params = [ + [np.float64, np.int64], + [2, 3.0, np.int32(4), np.float64(5)], + [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.pow, + operator.mod, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ], + ] + param_names = ["dtype", "scalar", "op"] + + def setup(self, dtype, scalar, op): + arr = np.random.randn(20000, 100) + self.df = DataFrame(arr.astype(dtype)) + + def time_frame_op_with_scalar(self, dtype, scalar, op): + op(self.df, scalar) + + +class OpWithFillValue: + def setup(self): + # GH#31300 + arr = np.arange(10**6) + df = DataFrame({"A": arr}) + ser = df["A"] + + self.df = df + self.ser = ser + + def time_frame_op_with_fill_value_no_nas(self): + self.df.add(self.df, fill_value=4) + + def time_series_op_with_fill_value_no_nas(self): + self.ser.add(self.ser, fill_value=4) + + +class MixedFrameWithSeriesAxis: + params = [ + [ + "eq", + "ne", + "lt", + "le", + "ge", + "gt", + "add", + "sub", + "truediv", + "floordiv", + "mul", + "pow", + ] + ] + param_names = ["opname"] + + def setup(self, opname): + arr = np.arange(10**6).reshape(1000, -1) + df = DataFrame(arr) + df["C"] = 1.0 + self.df = df + self.ser = df[0] + self.row = df.iloc[0] + + def time_frame_op_with_series_axis0(self, opname): + getattr(self.df, opname)(self.ser, axis=0) + + def time_frame_op_with_series_axis1(self, opname): + getattr(operator, opname)(self.df, self.ser) + + +class FrameWithFrameWide: + # Many-columns, mixed dtypes + + params = [ + [ + # GH#32779 has discussion of which operators are included here + operator.add, + operator.floordiv, + operator.gt, + ], + [ + # (n_rows, n_columns) + (1_000_000, 10), + (100_000, 100), + (10_000, 1000), + (1000, 10_000), + ], + ] + param_names = ["op", "shape"] + + def setup(self, op, shape): + # we choose dtypes so as to make the blocks + # a) not perfectly match between right and left + # b) appreciably bigger than single columns + n_rows, n_cols = shape + + if op is operator.floordiv: + # floordiv is much slower than the other operations -> use less data + n_rows = n_rows // 10 + + # construct dataframe with 2 blocks + arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8") + arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4") + df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True) + # should already be the case, but just to be sure + df._consolidate_inplace() + + # TODO: GH#33198 the setting here shouldn't need two steps + arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8") + arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8") + arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8") + df2 = pd.concat( + [DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)], + axis=1, + ignore_index=True, + ) + # should already be the case, but just to be sure + df2._consolidate_inplace() + + self.left = df + self.right = df2 + + def time_op_different_blocks(self, op, shape): + # blocks (and dtypes) are not aligned + op(self.left, self.right) + + def time_op_same_blocks(self, op, shape): + # blocks (and dtypes) are aligned + op(self.left, self.left) + + +class Ops: + + params = [[True, False], ["default", 1]] + param_names = ["use_numexpr", "threads"] + + def setup(self, use_numexpr, threads): + self.df = DataFrame(np.random.randn(20000, 100)) + self.df2 = DataFrame(np.random.randn(20000, 100)) + + if threads != "default": + expr.set_numexpr_threads(threads) + if not use_numexpr: + expr.set_use_numexpr(False) + + def time_frame_add(self, use_numexpr, threads): + self.df + self.df2 + + def time_frame_mult(self, use_numexpr, threads): + self.df * self.df2 + + def time_frame_multi_and(self, use_numexpr, threads): + self.df[(self.df > 0) & (self.df2 > 0)] + + def time_frame_comparison(self, use_numexpr, threads): + self.df > self.df2 + + def teardown(self, use_numexpr, threads): + expr.set_use_numexpr(True) + expr.set_numexpr_threads() + + +class Ops2: + def setup(self): + N = 10**3 + self.df = DataFrame(np.random.randn(N, N)) + self.df2 = DataFrame(np.random.randn(N, N)) + + self.df_int = DataFrame( + np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N) + ) + ) + self.df2_int = DataFrame( + np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N) + ) + ) + + self.s = Series(np.random.randn(N)) + + # Division + + def time_frame_float_div(self): + self.df // self.df2 + + def time_frame_float_div_by_zero(self): + self.df / 0 + + def time_frame_float_floor_by_zero(self): + self.df // 0 + + def time_frame_int_div_by_zero(self): + self.df_int / 0 + + # Modulo + + def time_frame_int_mod(self): + self.df_int % self.df2_int + + def time_frame_float_mod(self): + self.df % self.df2 + + # Dot product + + def time_frame_dot(self): + self.df.dot(self.df2) + + def time_series_dot(self): + self.s.dot(self.s) + + def time_frame_series_dot(self): + self.df.dot(self.s) + + +class Timeseries: + + params = [None, "US/Eastern"] + param_names = ["tz"] + + def setup(self, tz): + N = 10**6 + halfway = (N // 2) - 1 + self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz)) + self.ts = self.s[halfway] + + self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz)) + + def time_series_timestamp_compare(self, tz): + self.s <= self.ts + + def time_timestamp_series_compare(self, tz): + self.ts >= self.s + + def time_timestamp_ops_diff(self, tz): + self.s2.diff() + + def time_timestamp_ops_diff_with_shift(self, tz): + self.s - self.s.shift() + + +class IrregularOps: + def setup(self): + N = 10**5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) + + def time_add(self): + self.left + self.right + + +class TimedeltaOps: + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp("2000") + + def time_add_td_ts(self): + self.td + self.ts + + +class CategoricalComparisons: + params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] + param_names = ["op"] + + def setup(self, op): + N = 10**5 + self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) + + def time_categorical_op(self, op): + getattr(self.cat, op)("b") + + +class IndexArithmetic: + + params = ["float", "int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10**6 + indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} + self.index = getattr(tm, indexes[dtype])(N) + + def time_add(self, dtype): + self.index + 2 + + def time_subtract(self, dtype): + self.index - 2 + + def time_multiply(self, dtype): + self.index * 2 + + def time_divide(self, dtype): + self.index / 2 + + def time_modulo(self, dtype): + self.index % 2 + + +class NumericInferOps: + # from GH 7332 + params = numeric_dtypes + param_names = ["dtype"] + + def setup(self, dtype): + N = 5 * 10**5 + self.df = DataFrame( + {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} + ) + + def time_add(self, dtype): + self.df["A"] + self.df["B"] + + def time_subtract(self, dtype): + self.df["A"] - self.df["B"] + + def time_multiply(self, dtype): + self.df["A"] * self.df["B"] + + def time_divide(self, dtype): + self.df["A"] / self.df["B"] + + def time_modulo(self, dtype): + self.df["A"] % self.df["B"] + + +class DateInferOps: + # from GH 7332 + def setup_cache(self): + N = 5 * 10**5 + df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) + df["timedelta"] = df["datetime64"] - df["datetime64"] + return df + + def time_subtract_datetimes(self, df): + df["datetime64"] - df["datetime64"] + + def time_timedelta_plus_datetime(self, df): + df["timedelta"] + df["datetime64"] + + def time_add_timedeltas(self, df): + df["timedelta"] + df["timedelta"] + + +class AddOverflowScalar: + + params = [1, -1, 0] + param_names = ["scalar"] + + def setup(self, scalar): + N = 10**6 + self.arr = np.arange(N) + + def time_add_overflow_scalar(self, scalar): + checked_add_with_arr(self.arr, scalar) + + +class AddOverflowArray: + def setup(self): + N = 10**6 + self.arr = np.arange(N) + self.arr_rev = np.arange(-N, 0) + self.arr_mixed = np.array([1, -1]).repeat(N / 2) + self.arr_nan_1 = np.random.choice([True, False], size=N) + self.arr_nan_2 = np.random.choice([True, False], size=N) + + def time_add_overflow_arr_rev(self): + checked_add_with_arr(self.arr, self.arr_rev) + + def time_add_overflow_arr_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1) + + def time_add_overflow_b_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1) + + def time_add_overflow_both_arg_nan(self): + checked_add_with_arr( + self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2 + ) + + +hcal = pd.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplementedError with .apply_index() +non_apply = [ + pd.offsets.Day(), + pd.offsets.BYearEnd(), + pd.offsets.BYearBegin(), + pd.offsets.BQuarterEnd(), + pd.offsets.BQuarterBegin(), + pd.offsets.BMonthEnd(), + pd.offsets.BMonthBegin(), + pd.offsets.CustomBusinessDay(), + pd.offsets.CustomBusinessDay(calendar=hcal), + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + pd.offsets.YearEnd(), + pd.offsets.YearBegin(), + pd.offsets.QuarterEnd(), + pd.offsets.QuarterBegin(), + pd.offsets.MonthEnd(), + pd.offsets.MonthBegin(), + pd.offsets.DateOffset(months=2, days=2), + pd.offsets.BusinessDay(), + pd.offsets.SemiMonthEnd(), + pd.offsets.SemiMonthBegin(), +] +offsets = non_apply + other_offsets + + +class OffsetArrayArithmetic: + + params = offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + self.ser = Series(rng) + + def time_add_series_offset(self, offset): + with warnings.catch_warnings(record=True): + self.ser + offset + + def time_add_dti_offset(self, offset): + with warnings.catch_warnings(record=True): + self.rng + offset + + +class ApplyIndex: + params = other_offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + + def time_apply_index(self, offset): + self.rng + offset + + +class BinaryOpsMultiIndex: + params = ["sub", "add", "mul", "div"] + param_names = ["func"] + + def setup(self, func): + array = date_range("20200101 00:00", "20200102 0:00", freq="S") + level_0_names = [str(i) for i in range(30)] + + index = pd.MultiIndex.from_product([level_0_names, array]) + column_names = ["col_1", "col_2"] + + self.df = DataFrame( + np.random.rand(len(index), 2), index=index, columns=column_names + ) + + self.arg_df = DataFrame( + np.random.randint(1, 10, (len(level_0_names), 2)), + index=level_0_names, + columns=column_names, + ) + + def time_binary_op_multiindex(self, func): + getattr(self.df, func)(self.arg_df, level=0) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py new file mode 100644 index 00000000..b5820091 --- /dev/null +++ b/asv_bench/benchmarks/array.py @@ -0,0 +1,72 @@ +import numpy as np + +import pandas as pd + +from .pandas_vb_common import tm + + +class BooleanArray: + def setup(self): + self.values_bool = np.array([True, False, True, False]) + self.values_float = np.array([1.0, 0.0, 1.0, 0.0]) + self.values_integer = np.array([1, 0, 1, 0]) + self.values_integer_like = [1, 0, 1, 0] + self.data = np.array([True, False, True, False]) + self.mask = np.array([False, False, True, False]) + + def time_constructor(self): + pd.arrays.BooleanArray(self.data, self.mask) + + def time_from_bool_array(self): + pd.array(self.values_bool, dtype="boolean") + + def time_from_integer_array(self): + pd.array(self.values_integer, dtype="boolean") + + def time_from_integer_like(self): + pd.array(self.values_integer_like, dtype="boolean") + + def time_from_float_array(self): + pd.array(self.values_float, dtype="boolean") + + +class IntegerArray: + def setup(self): + self.values_integer = np.array([1, 0, 1, 0]) + self.data = np.array([1, 2, 3, 4], dtype="int64") + self.mask = np.array([False, False, True, False]) + + def time_constructor(self): + pd.arrays.IntegerArray(self.data, self.mask) + + def time_from_integer_array(self): + pd.array(self.values_integer, dtype="Int64") + + +class ArrowStringArray: + + params = [False, True] + param_names = ["multiple_chunks"] + + def setup(self, multiple_chunks): + try: + import pyarrow as pa + except ImportError: + raise NotImplementedError + strings = tm.rands_array(3, 10_000) + if multiple_chunks: + chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)] + self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks)) + else: + self.array = pd.arrays.ArrowStringArray(pa.array(strings)) + + def time_setitem(self, multiple_chunks): + for i in range(200): + self.array[i] = "foo" + + def time_setitem_list(self, multiple_chunks): + indexer = list(range(0, 50)) + list(range(-50, 0)) + self.array[indexer] = ["foo"] * len(indexer) + + def time_setitem_slice(self, multiple_chunks): + self.array[::10] = "foo" diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py new file mode 100644 index 00000000..d4366c42 --- /dev/null +++ b/asv_bench/benchmarks/attrs_caching.py @@ -0,0 +1,51 @@ +import numpy as np + +import pandas as pd +from pandas import DataFrame + +try: + from pandas.core.construction import extract_array +except ImportError: + extract_array = None + + +class DataFrameAttributes: + def setup(self): + self.df = DataFrame(np.random.randn(10, 6)) + self.cur_index = self.df.index + + def time_get_index(self): + self.foo = self.df.index + + def time_set_index(self): + self.df.index = self.cur_index + + +class SeriesArrayAttribute: + + params = [["numeric", "object", "category", "datetime64", "datetime64tz"]] + param_names = ["dtype"] + + def setup(self, dtype): + if dtype == "numeric": + self.series = pd.Series([1, 2, 3]) + elif dtype == "object": + self.series = pd.Series(["a", "b", "c"], dtype=object) + elif dtype == "category": + self.series = pd.Series(["a", "b", "c"], dtype="category") + elif dtype == "datetime64": + self.series = pd.Series(pd.date_range("2013", periods=3)) + elif dtype == "datetime64tz": + self.series = pd.Series(pd.date_range("2013", periods=3, tz="UTC")) + + def time_array(self, dtype): + self.series.array + + def time_extract_array(self, dtype): + extract_array(self.series) + + def time_extract_array_numpy(self, dtype): + extract_array(self.series, extract_numpy=True) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/boolean.py b/asv_bench/benchmarks/boolean.py new file mode 100644 index 00000000..71c422c6 --- /dev/null +++ b/asv_bench/benchmarks/boolean.py @@ -0,0 +1,32 @@ +import numpy as np + +import pandas as pd + + +class TimeLogicalOps: + def setup(self): + N = 10_000 + left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool") + self.left = pd.arrays.BooleanArray(left, lmask) + self.right = pd.arrays.BooleanArray(right, rmask) + + def time_or_scalar(self): + self.left | True + self.left | False + + def time_or_array(self): + self.left | self.right + + def time_and_scalar(self): + self.left & True + self.left & False + + def time_and_array(self): + self.left & self.right + + def time_xor_scalar(self): + self.left ^ True + self.left ^ False + + def time_xor_array(self): + self.left ^ self.right diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py new file mode 100644 index 00000000..ff0b3b2f --- /dev/null +++ b/asv_bench/benchmarks/categoricals.py @@ -0,0 +1,342 @@ +import string +import sys +import warnings + +import numpy as np + +import pandas as pd + +from .pandas_vb_common import tm + +try: + from pandas.api.types import union_categoricals +except ImportError: + try: + from pandas.types.concat import union_categoricals + except ImportError: + pass + + +class Constructor: + def setup(self): + N = 10**5 + self.categories = list("abcde") + self.cat_idx = pd.Index(self.categories) + self.values = np.tile(self.categories, N) + self.codes = np.tile(range(len(self.categories)), N) + + self.datetimes = pd.Series( + pd.date_range("1995-01-01 00:00:00", periods=N / 10, freq="s") + ) + self.datetimes_with_nat = self.datetimes.copy() + self.datetimes_with_nat.iloc[-1] = pd.NaT + + self.values_some_nan = list(np.tile(self.categories + [np.nan], N)) + self.values_all_nan = [np.nan] * len(self.values) + self.values_all_int8 = np.ones(N, "int8") + self.categorical = pd.Categorical(self.values, self.categories) + self.series = pd.Series(self.categorical) + self.intervals = pd.interval_range(0, 1, periods=N // 10) + + def time_regular(self): + pd.Categorical(self.values, self.categories) + + def time_fastpath(self): + pd.Categorical(self.codes, self.cat_idx, fastpath=True) + + def time_datetimes(self): + pd.Categorical(self.datetimes) + + def time_interval(self): + pd.Categorical(self.datetimes, categories=self.datetimes) + + def time_datetimes_with_nat(self): + pd.Categorical(self.datetimes_with_nat) + + def time_with_nan(self): + pd.Categorical(self.values_some_nan) + + def time_all_nan(self): + pd.Categorical(self.values_all_nan) + + def time_from_codes_all_int8(self): + pd.Categorical.from_codes(self.values_all_int8, self.categories) + + def time_existing_categorical(self): + pd.Categorical(self.categorical) + + def time_existing_series(self): + pd.Categorical(self.series) + + +class AsType: + def setup(self): + N = 10**5 + + random_pick = np.random.default_rng().choice + + categories = { + "str": list(string.ascii_letters), + "int": np.random.randint(2**16, size=154), + "float": sys.maxsize * np.random.random((38,)), + "timestamp": [ + pd.Timestamp(x, unit="s") for x in np.random.randint(2**18, size=578) + ], + } + + self.df = pd.DataFrame( + {col: random_pick(cats, N) for col, cats in categories.items()} + ) + + for col in ("int", "float", "timestamp"): + self.df[col + "_as_str"] = self.df[col].astype(str) + + for col in self.df.columns: + self.df[col] = self.df[col].astype("category") + + def astype_str(self): + [self.df[col].astype("str") for col in "int float timestamp".split()] + + def astype_int(self): + [self.df[col].astype("int") for col in "int_as_str timestamp".split()] + + def astype_float(self): + [ + self.df[col].astype("float") + for col in "float_as_str int int_as_str timestamp".split() + ] + + def astype_datetime(self): + self.df["float"].astype(pd.DatetimeTZDtype(tz="US/Pacific")) + + +class Concat: + def setup(self): + N = 10**5 + self.s = pd.Series(list("aabbcd") * N).astype("category") + + self.a = pd.Categorical(list("aabbcd") * N) + self.b = pd.Categorical(list("bbcdjk") * N) + + self.idx_a = pd.CategoricalIndex(range(N), range(N)) + self.idx_b = pd.CategoricalIndex(range(N + 1), range(N + 1)) + self.df_a = pd.DataFrame(range(N), columns=["a"], index=self.idx_a) + self.df_b = pd.DataFrame(range(N + 1), columns=["a"], index=self.idx_b) + + def time_concat(self): + pd.concat([self.s, self.s]) + + def time_union(self): + union_categoricals([self.a, self.b]) + + def time_append_overlapping_index(self): + self.idx_a.append(self.idx_a) + + def time_append_non_overlapping_index(self): + self.idx_a.append(self.idx_b) + + def time_concat_overlapping_index(self): + pd.concat([self.df_a, self.df_a]) + + def time_concat_non_overlapping_index(self): + pd.concat([self.df_a, self.df_b]) + + +class ValueCounts: + + params = [True, False] + param_names = ["dropna"] + + def setup(self, dropna): + n = 5 * 10**5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_value_counts(self, dropna): + self.ts.value_counts(dropna=dropna) + + +class Repr: + def setup(self): + self.sel = pd.Series(["s1234"]).astype("category") + + def time_rendering(self): + str(self.sel) + + +class SetCategories: + def setup(self): + n = 5 * 10**5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + + +class RemoveCategories: + def setup(self): + n = 5 * 10**5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_remove_categories(self): + self.ts.cat.remove_categories(self.ts.cat.categories[::2]) + + +class Rank: + def setup(self): + N = 10**5 + ncats = 15 + + self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) + self.s_str_cat = pd.Series(self.s_str, dtype="category") + with warnings.catch_warnings(record=True): + str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True) + self.s_str_cat_ordered = self.s_str.astype(str_cat_type) + + self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) + self.s_int_cat = pd.Series(self.s_int, dtype="category") + with warnings.catch_warnings(record=True): + int_cat_type = pd.CategoricalDtype(set(self.s_int), ordered=True) + self.s_int_cat_ordered = self.s_int.astype(int_cat_type) + + def time_rank_string(self): + self.s_str.rank() + + def time_rank_string_cat(self): + self.s_str_cat.rank() + + def time_rank_string_cat_ordered(self): + self.s_str_cat_ordered.rank() + + def time_rank_int(self): + self.s_int.rank() + + def time_rank_int_cat(self): + self.s_int_cat.rank() + + def time_rank_int_cat_ordered(self): + self.s_int_cat_ordered.rank() + + +class IsMonotonic: + def setup(self): + N = 1000 + self.c = pd.CategoricalIndex(list("a" * N + "b" * N + "c" * N)) + self.s = pd.Series(self.c) + + def time_categorical_index_is_monotonic_increasing(self): + self.c.is_monotonic_increasing + + def time_categorical_index_is_monotonic_decreasing(self): + self.c.is_monotonic_decreasing + + def time_categorical_series_is_monotonic_increasing(self): + self.s.is_monotonic_increasing + + def time_categorical_series_is_monotonic_decreasing(self): + self.s.is_monotonic_decreasing + + +class Contains: + def setup(self): + N = 10**5 + self.ci = tm.makeCategoricalIndex(N) + self.c = self.ci.values + self.key = self.ci.categories[0] + + def time_categorical_index_contains(self): + self.key in self.ci + + def time_categorical_contains(self): + self.key in self.c + + +class CategoricalSlicing: + + params = ["monotonic_incr", "monotonic_decr", "non_monotonic"] + param_names = ["index"] + + def setup(self, index): + N = 10**6 + categories = ["a", "b", "c"] + values = [0] * N + [1] * N + [2] * N + if index == "monotonic_incr": + self.data = pd.Categorical.from_codes(values, categories=categories) + elif index == "monotonic_decr": + self.data = pd.Categorical.from_codes( + list(reversed(values)), categories=categories + ) + elif index == "non_monotonic": + self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories) + else: + raise ValueError(f"Invalid index param: {index}") + + self.scalar = 10000 + self.list = list(range(10000)) + self.cat_scalar = "b" + + def time_getitem_scalar(self, index): + self.data[self.scalar] + + def time_getitem_slice(self, index): + self.data[: self.scalar] + + def time_getitem_list_like(self, index): + self.data[[self.scalar]] + + def time_getitem_list(self, index): + self.data[self.list] + + def time_getitem_bool_array(self, index): + self.data[self.data == self.cat_scalar] + + +class Indexing: + def setup(self): + N = 10**5 + self.index = pd.CategoricalIndex(range(N), range(N)) + self.series = pd.Series(range(N), index=self.index).sort_index() + self.category = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.category) + + def time_shallow_copy(self): + self.index._view() + + def time_align(self): + pd.DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index[:750].intersection(self.index[250:]) + + def time_unique(self): + self.index.unique() + + def time_reindex(self): + self.index.reindex(self.index[:500]) + + def time_reindex_missing(self): + self.index.reindex(["a", "b", "c", "d"]) + + def time_sort_values(self): + self.index.sort_values(ascending=False) + + +class SearchSorted: + def setup(self): + N = 10**5 + self.ci = tm.makeCategoricalIndex(N).sort_values() + self.c = self.ci.values + self.key = self.ci.categories[1] + + def time_categorical_index_contains(self): + self.ci.searchsorted(self.key) + + def time_categorical_contains(self): + self.c.searchsorted(self.key) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py new file mode 100644 index 00000000..ef8b16f3 --- /dev/null +++ b/asv_bench/benchmarks/ctors.py @@ -0,0 +1,124 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, +) + +from .pandas_vb_common import tm + + +def no_change(arr): + return arr + + +def list_of_str(arr): + return list(arr.astype(str)) + + +def gen_of_str(arr): + return (x for x in arr.astype(str)) + + +def arr_dict(arr): + return dict(zip(range(len(arr)), arr)) + + +def list_of_tuples(arr): + return [(i, -i) for i in arr] + + +def gen_of_tuples(arr): + return ((i, -i) for i in arr) + + +def list_of_lists(arr): + return [[i, -i] for i in arr] + + +def list_of_tuples_with_none(arr): + return [(i, -i) for i in arr][:-1] + [None] + + +def list_of_lists_with_none(arr): + return [[i, -i] for i in arr][:-1] + [None] + + +class SeriesConstructors: + + param_names = ["data_fmt", "with_index", "dtype"] + params = [ + [ + no_change, + list, + list_of_str, + gen_of_str, + arr_dict, + list_of_tuples, + gen_of_tuples, + list_of_lists, + list_of_tuples_with_none, + list_of_lists_with_none, + ], + [False, True], + ["float", "int"], + ] + + # Generators get exhausted on use, so run setup before every call + number = 1 + repeat = (3, 250, 10) + + def setup(self, data_fmt, with_index, dtype): + if data_fmt in (gen_of_str, gen_of_tuples) and with_index: + raise NotImplementedError( + "Series constructors do not support using generators with indexes" + ) + N = 10**4 + if dtype == "float": + arr = np.random.randn(N) + else: + arr = np.arange(N) + self.data = data_fmt(arr) + self.index = np.arange(N) if with_index else None + + def time_series_constructor(self, data_fmt, with_index, dtype): + Series(self.data, index=self.index) + + +class SeriesDtypesConstructors: + def setup(self): + N = 10**4 + self.arr = np.random.randn(N) + self.arr_str = np.array(["foo", "bar", "baz"], dtype=object) + self.s = Series( + [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + * N + * 10 + ) + + def time_index_from_array_string(self): + Index(self.arr_str) + + def time_index_from_array_floats(self): + Index(self.arr) + + def time_dtindex_from_series(self): + DatetimeIndex(self.s) + + def time_dtindex_from_index_with_series(self): + Index(self.s) + + +class MultiIndexConstructor: + def setup(self): + N = 10**4 + self.iterables = [tm.makeStringIndex(N), range(20)] + + def time_multiindex_from_iterables(self): + MultiIndex.from_product(self.iterables) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py new file mode 100644 index 00000000..55f6be84 --- /dev/null +++ b/asv_bench/benchmarks/dtypes.py @@ -0,0 +1,127 @@ +import string + +import numpy as np + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm +from pandas.api.types import ( + is_extension_array_dtype, + pandas_dtype, +) + +from .pandas_vb_common import ( + datetime_dtypes, + extension_dtypes, + numeric_dtypes, + string_dtypes, +) + +_numpy_dtypes = [ + np.dtype(dtype) for dtype in (numeric_dtypes + datetime_dtypes + string_dtypes) +] +_dtypes = _numpy_dtypes + extension_dtypes + + +class Dtypes: + params = _dtypes + list(map(lambda dt: dt.name, _dtypes)) + param_names = ["dtype"] + + def time_pandas_dtype(self, dtype): + pandas_dtype(dtype) + + +class DtypesInvalid: + param_names = ["dtype"] + params = ["scalar-string", "scalar-int", "list-string", "array-string"] + data_dict = { + "scalar-string": "foo", + "scalar-int": 1, + "list-string": ["foo"] * 1000, + "array-string": np.array(["foo"] * 1000), + } + + def time_pandas_dtype_invalid(self, dtype): + try: + pandas_dtype(self.data_dict[dtype]) + except TypeError: + pass + + +class SelectDtypes: + + try: + params = [ + tm.ALL_INT_NUMPY_DTYPES + + tm.ALL_INT_EA_DTYPES + + tm.FLOAT_NUMPY_DTYPES + + tm.COMPLEX_DTYPES + + tm.DATETIME64_DTYPES + + tm.TIMEDELTA64_DTYPES + + tm.BOOL_DTYPES + ] + except AttributeError: + params = [ + tm.ALL_INT_DTYPES + + tm.ALL_EA_INT_DTYPES + + tm.FLOAT_DTYPES + + tm.COMPLEX_DTYPES + + tm.DATETIME64_DTYPES + + tm.TIMEDELTA64_DTYPES + + tm.BOOL_DTYPES + ] + param_names = ["dtype"] + + def setup(self, dtype): + N, K = 5000, 50 + self.index = tm.makeStringIndex(N) + self.columns = tm.makeStringIndex(K) + + def create_df(data): + return DataFrame(data, index=self.index, columns=self.columns) + + self.df_int = create_df(np.random.randint(low=100, size=(N, K))) + self.df_float = create_df(np.random.randn(N, K)) + self.df_bool = create_df(np.random.choice([True, False], size=(N, K))) + self.df_string = create_df( + np.random.choice(list(string.ascii_letters), size=(N, K)) + ) + + def time_select_dtype_int_include(self, dtype): + self.df_int.select_dtypes(include=dtype) + + def time_select_dtype_int_exclude(self, dtype): + self.df_int.select_dtypes(exclude=dtype) + + def time_select_dtype_float_include(self, dtype): + self.df_float.select_dtypes(include=dtype) + + def time_select_dtype_float_exclude(self, dtype): + self.df_float.select_dtypes(exclude=dtype) + + def time_select_dtype_bool_include(self, dtype): + self.df_bool.select_dtypes(include=dtype) + + def time_select_dtype_bool_exclude(self, dtype): + self.df_bool.select_dtypes(exclude=dtype) + + def time_select_dtype_string_include(self, dtype): + self.df_string.select_dtypes(include=dtype) + + def time_select_dtype_string_exclude(self, dtype): + self.df_string.select_dtypes(exclude=dtype) + + +class CheckDtypes: + def setup(self): + self.ext_dtype = pd.Int64Dtype() + self.np_dtype = np.dtype("int64") + + def time_is_extension_array_dtype_true(self): + is_extension_array_dtype(self.ext_dtype) + + def time_is_extension_array_dtype_false(self): + is_extension_array_dtype(self.np_dtype) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py new file mode 100644 index 00000000..b5442531 --- /dev/null +++ b/asv_bench/benchmarks/eval.py @@ -0,0 +1,66 @@ +import numpy as np + +import pandas as pd + +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr + + +class Eval: + + params = [["numexpr", "python"], [1, "all"]] + param_names = ["engine", "threads"] + + def setup(self, engine, threads): + self.df = pd.DataFrame(np.random.randn(20000, 100)) + self.df2 = pd.DataFrame(np.random.randn(20000, 100)) + self.df3 = pd.DataFrame(np.random.randn(20000, 100)) + self.df4 = pd.DataFrame(np.random.randn(20000, 100)) + + if threads == 1: + expr.set_numexpr_threads(1) + + def time_add(self, engine, threads): + pd.eval("self.df + self.df2 + self.df3 + self.df4", engine=engine) + + def time_and(self, engine, threads): + pd.eval( + "(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)", + engine=engine, + ) + + def time_chained_cmp(self, engine, threads): + pd.eval("self.df < self.df2 < self.df3 < self.df4", engine=engine) + + def time_mult(self, engine, threads): + pd.eval("self.df * self.df2 * self.df3 * self.df4", engine=engine) + + def teardown(self, engine, threads): + expr.set_numexpr_threads() + + +class Query: + def setup(self): + N = 10**6 + halfway = (N // 2) - 1 + index = pd.date_range("20010101", periods=N, freq="T") + s = pd.Series(index) + self.ts = s.iloc[halfway] + self.df = pd.DataFrame({"a": np.random.randn(N), "dates": index}, index=index) + data = np.random.randn(N) + self.min_val = data.min() + self.max_val = data.max() + + def time_query_datetime_index(self): + self.df.query("index < @self.ts") + + def time_query_datetime_column(self): + self.df.query("dates < @self.ts") + + def time_query_with_boolean_selection(self): + self.df.query("(a >= @self.min_val) & (a <= @self.max_val)") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/finalize.py b/asv_bench/benchmarks/finalize.py new file mode 100644 index 00000000..dc06f55c --- /dev/null +++ b/asv_bench/benchmarks/finalize.py @@ -0,0 +1,16 @@ +import pandas as pd + + +class Finalize: + param_names = ["series", "frame"] + params = [pd.Series, pd.DataFrame] + + def setup(self, param): + N = 1000 + obj = param(dtype=float) + for i in range(N): + obj.attrs[i] = i + self.obj = obj + + def time_finalize_micro(self, param): + self.obj.__finalize__(self.obj, method="__finalize__") diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py new file mode 100644 index 00000000..20c0c0ea --- /dev/null +++ b/asv_bench/benchmarks/frame_ctor.py @@ -0,0 +1,225 @@ +import numpy as np + +import pandas as pd +from pandas import ( + NA, + Categorical, + DataFrame, + Float64Dtype, + MultiIndex, + Series, + Timestamp, + date_range, +) + +from .pandas_vb_common import tm + +try: + from pandas.tseries.offsets import ( + Hour, + Nano, + ) +except ImportError: + # For compatibility with older versions + from pandas.core.datetools import ( + Hour, + Nano, + ) + + +class FromDicts: + def setup(self): + N, K = 5000, 50 + self.index = tm.makeStringIndex(N) + self.columns = tm.makeStringIndex(K) + frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns) + self.data = frame.to_dict() + self.dict_list = frame.to_dict(orient="records") + self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)} + + # arrays which we won't consolidate + self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)} + + def time_list_of_dict(self): + DataFrame(self.dict_list) + + def time_nested_dict(self): + DataFrame(self.data) + + def time_nested_dict_index(self): + DataFrame(self.data, index=self.index) + + def time_nested_dict_columns(self): + DataFrame(self.data, columns=self.columns) + + def time_nested_dict_index_columns(self): + DataFrame(self.data, index=self.index, columns=self.columns) + + def time_nested_dict_int64(self): + # nested dict, integer indexes, regression described in #621 + DataFrame(self.data2) + + def time_dict_of_categoricals(self): + # dict of arrays that we won't consolidate + DataFrame(self.dict_of_categoricals) + + +class FromSeries: + def setup(self): + mi = MultiIndex.from_product([range(100), range(100)]) + self.s = Series(np.random.randn(10000), index=mi) + + def time_mi_series(self): + DataFrame(self.s) + + +class FromDictwithTimestamp: + + params = [Nano(1), Hour(1)] + param_names = ["offset"] + + def setup(self, offset): + N = 10**3 + idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N) + df = DataFrame(np.random.randn(N, 10), index=idx) + self.d = df.to_dict() + + def time_dict_with_timestamp_offsets(self, offset): + DataFrame(self.d) + + +class FromRecords: + + params = [None, 1000] + param_names = ["nrows"] + + # Generators get exhausted on use, so run setup before every call + number = 1 + repeat = (3, 250, 10) + + def setup(self, nrows): + N = 100000 + self.gen = ((x, (x * 20), (x * 100)) for x in range(N)) + + def time_frame_from_records_generator(self, nrows): + # issue-6700 + self.df = DataFrame.from_records(self.gen, nrows=nrows) + + +class FromNDArray: + def setup(self): + N = 100000 + self.data = np.random.randn(N) + + def time_frame_from_ndarray(self): + self.df = DataFrame(self.data) + + +class FromLists: + + goal_time = 0.2 + + def setup(self): + N = 1000 + M = 100 + self.data = [list(range(M)) for i in range(N)] + + def time_frame_from_lists(self): + self.df = DataFrame(self.data) + + +class FromRange: + + goal_time = 0.2 + + def setup(self): + N = 1_000_000 + self.data = range(N) + + def time_frame_from_range(self): + self.df = DataFrame(self.data) + + +class FromScalar: + def setup(self): + self.nrows = 100_000 + + def time_frame_from_scalar_ea_float64(self): + DataFrame( + 1.0, + index=range(self.nrows), + columns=list("abc"), + dtype=Float64Dtype(), + ) + + def time_frame_from_scalar_ea_float64_na(self): + DataFrame( + NA, + index=range(self.nrows), + columns=list("abc"), + dtype=Float64Dtype(), + ) + + +class FromArrays: + + goal_time = 0.2 + + def setup(self): + N_rows = 1000 + N_cols = 1000 + self.float_arrays = [np.random.randn(N_rows) for _ in range(N_cols)] + self.sparse_arrays = [ + pd.arrays.SparseArray(np.random.randint(0, 2, N_rows), dtype="float64") + for _ in range(N_cols) + ] + self.int_arrays = [ + pd.array(np.random.randint(1000, size=N_rows), dtype="Int64") + for _ in range(N_cols) + ] + self.index = pd.Index(range(N_rows)) + self.columns = pd.Index(range(N_cols)) + + def time_frame_from_arrays_float(self): + self.df = DataFrame._from_arrays( + self.float_arrays, + index=self.index, + columns=self.columns, + verify_integrity=False, + ) + + def time_frame_from_arrays_int(self): + self.df = DataFrame._from_arrays( + self.int_arrays, + index=self.index, + columns=self.columns, + verify_integrity=False, + ) + + def time_frame_from_arrays_sparse(self): + self.df = DataFrame._from_arrays( + self.sparse_arrays, + index=self.index, + columns=self.columns, + verify_integrity=False, + ) + + +class From3rdParty: + # GH#44616 + + def setup(self): + try: + import torch + except ImportError: + raise NotImplementedError + + row = 700000 + col = 64 + self.val_tensor = torch.randn(row, col) + + def time_from_torch(self): + DataFrame(self.val_tensor) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py new file mode 100644 index 00000000..a28e20a6 --- /dev/null +++ b/asv_bench/benchmarks/frame_methods.py @@ -0,0 +1,770 @@ +import string +import warnings + +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + date_range, + isnull, + period_range, + timedelta_range, +) + +from .pandas_vb_common import tm + + +class GetNumericData: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 25)) + self.df["foo"] = "bar" + self.df["bar"] = "baz" + self.df = self.df._consolidate() + + def time_frame_get_numeric_data(self): + self.df._get_numeric_data() + + +class Lookup: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 8), columns=list("abcdefgh")) + self.df["foo"] = "bar" + self.row_labels = list(self.df.index[::10])[:900] + self.col_labels = list(self.df.columns) * 100 + self.row_labels_all = np.array( + list(self.df.index) * len(self.df.columns), dtype="object" + ) + self.col_labels_all = np.array( + list(self.df.columns) * len(self.df.index), dtype="object" + ) + + def time_frame_fancy_lookup(self): + self.df.lookup(self.row_labels, self.col_labels) + + def time_frame_fancy_lookup_all(self): + self.df.lookup(self.row_labels_all, self.col_labels_all) + + +class Reindex: + def setup(self): + N = 10**3 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.idx = np.arange(4 * N, 7 * N) + self.idx_cols = np.random.randint(0, N, N) + self.df2 = DataFrame( + { + c: { + 0: np.random.randint(0, 2, N).astype(np.bool_), + 1: np.random.randint(0, N, N).astype(np.int16), + 2: np.random.randint(0, N, N).astype(np.int32), + 3: np.random.randint(0, N, N).astype(np.int64), + }[np.random.randint(0, 4)] + for c in range(N) + } + ) + + def time_reindex_axis0(self): + self.df.reindex(self.idx) + + def time_reindex_axis1(self): + self.df.reindex(columns=self.idx_cols) + + def time_reindex_axis1_missing(self): + self.df.reindex(columns=self.idx) + + def time_reindex_both_axes(self): + self.df.reindex(index=self.idx, columns=self.idx_cols) + + def time_reindex_upcast(self): + self.df2.reindex(np.random.permutation(range(1200))) + + +class Rename: + def setup(self): + N = 10**3 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.idx = np.arange(4 * N, 7 * N) + self.dict_idx = {k: k for k in self.idx} + self.df2 = DataFrame( + { + c: { + 0: np.random.randint(0, 2, N).astype(np.bool_), + 1: np.random.randint(0, N, N).astype(np.int16), + 2: np.random.randint(0, N, N).astype(np.int32), + 3: np.random.randint(0, N, N).astype(np.int64), + }[np.random.randint(0, 4)] + for c in range(N) + } + ) + + def time_rename_single(self): + self.df.rename({0: 0}) + + def time_rename_axis0(self): + self.df.rename(self.dict_idx) + + def time_rename_axis1(self): + self.df.rename(columns=self.dict_idx) + + def time_rename_both_axes(self): + self.df.rename(index=self.dict_idx, columns=self.dict_idx) + + def time_dict_rename_both_axes(self): + self.df.rename(index=self.dict_idx, columns=self.dict_idx) + + +class Iteration: + # mem_itertuples_* benchmarks are slow + timeout = 120 + + def setup(self): + N = 1000 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.df2 = DataFrame(np.random.randn(N * 50, 10)) + self.df3 = DataFrame( + np.random.randn(N, 5 * N), columns=["C" + str(c) for c in range(N * 5)] + ) + self.df4 = DataFrame(np.random.randn(N * 1000, 10)) + + def time_items(self): + # (monitor no-copying behaviour) + if hasattr(self.df, "_item_cache"): + self.df._item_cache.clear() + for name, col in self.df.items(): + pass + + def time_items_cached(self): + for name, col in self.df.items(): + pass + + def time_iteritems_indexing(self): + for col in self.df3: + self.df3[col] + + def time_itertuples_start(self): + self.df4.itertuples() + + def time_itertuples_read_first(self): + next(self.df4.itertuples()) + + def time_itertuples(self): + for row in self.df4.itertuples(): + pass + + def time_itertuples_to_list(self): + list(self.df4.itertuples()) + + def mem_itertuples_start(self): + return self.df4.itertuples() + + def peakmem_itertuples_start(self): + self.df4.itertuples() + + def mem_itertuples_read_first(self): + return next(self.df4.itertuples()) + + def peakmem_itertuples(self): + for row in self.df4.itertuples(): + pass + + def mem_itertuples_to_list(self): + return list(self.df4.itertuples()) + + def peakmem_itertuples_to_list(self): + list(self.df4.itertuples()) + + def time_itertuples_raw_start(self): + self.df4.itertuples(index=False, name=None) + + def time_itertuples_raw_read_first(self): + next(self.df4.itertuples(index=False, name=None)) + + def time_itertuples_raw_tuples(self): + for row in self.df4.itertuples(index=False, name=None): + pass + + def time_itertuples_raw_tuples_to_list(self): + list(self.df4.itertuples(index=False, name=None)) + + def mem_itertuples_raw_start(self): + return self.df4.itertuples(index=False, name=None) + + def peakmem_itertuples_raw_start(self): + self.df4.itertuples(index=False, name=None) + + def peakmem_itertuples_raw_read_first(self): + next(self.df4.itertuples(index=False, name=None)) + + def peakmem_itertuples_raw(self): + for row in self.df4.itertuples(index=False, name=None): + pass + + def mem_itertuples_raw_to_list(self): + return list(self.df4.itertuples(index=False, name=None)) + + def peakmem_itertuples_raw_to_list(self): + list(self.df4.itertuples(index=False, name=None)) + + def time_iterrows(self): + for row in self.df.iterrows(): + pass + + +class ToString: + def setup(self): + self.df = DataFrame(np.random.randn(100, 10)) + + def time_to_string_floats(self): + self.df.to_string() + + +class ToHTML: + def setup(self): + nrows = 500 + self.df2 = DataFrame(np.random.randn(nrows, 10)) + self.df2[0] = period_range("2000", periods=nrows) + self.df2[1] = range(nrows) + + def time_to_html_mixed(self): + self.df2.to_html() + + +class ToDict: + params = [["dict", "list", "series", "split", "records", "index"]] + param_names = ["orient"] + + def setup(self, orient): + data = np.random.randint(0, 1000, size=(10000, 4)) + self.int_df = DataFrame(data) + self.datetimelike_df = self.int_df.astype("timedelta64[ns]") + + def time_to_dict_ints(self, orient): + self.int_df.to_dict(orient=orient) + + def time_to_dict_datetimelike(self, orient): + self.datetimelike_df.to_dict(orient=orient) + + +class ToNumpy: + def setup(self): + N = 10000 + M = 10 + self.df_tall = DataFrame(np.random.randn(N, M)) + self.df_wide = DataFrame(np.random.randn(M, N)) + self.df_mixed_tall = self.df_tall.copy() + self.df_mixed_tall["foo"] = "bar" + self.df_mixed_tall[0] = period_range("2000", periods=N) + self.df_mixed_tall[1] = range(N) + self.df_mixed_wide = self.df_wide.copy() + self.df_mixed_wide["foo"] = "bar" + self.df_mixed_wide[0] = period_range("2000", periods=M) + self.df_mixed_wide[1] = range(M) + + def time_to_numpy_tall(self): + self.df_tall.to_numpy() + + def time_to_numpy_wide(self): + self.df_wide.to_numpy() + + def time_to_numpy_mixed_tall(self): + self.df_mixed_tall.to_numpy() + + def time_to_numpy_mixed_wide(self): + self.df_mixed_wide.to_numpy() + + def time_values_tall(self): + self.df_tall.values + + def time_values_wide(self): + self.df_wide.values + + def time_values_mixed_tall(self): + self.df_mixed_tall.values + + def time_values_mixed_wide(self): + self.df_mixed_wide.values + + +class ToRecords: + def setup(self): + N = 100_000 + data = np.random.randn(N, 2) + mi = MultiIndex.from_arrays( + [ + np.arange(N), + date_range("1970-01-01", periods=N, freq="ms"), + ] + ) + self.df = DataFrame(data) + self.df_mi = DataFrame(data, index=mi) + + def time_to_records(self): + self.df.to_records(index=True) + + def time_to_records_multiindex(self): + self.df_mi.to_records(index=True) + + +class Repr: + def setup(self): + nrows = 10000 + data = np.random.randn(nrows, 10) + arrays = np.tile(np.random.randn(3, nrows // 100), 100) + idx = MultiIndex.from_arrays(arrays) + self.df3 = DataFrame(data, index=idx) + self.df4 = DataFrame(data, index=np.random.randn(nrows)) + self.df_tall = DataFrame(np.random.randn(nrows, 10)) + self.df_wide = DataFrame(np.random.randn(10, nrows)) + + def time_html_repr_trunc_mi(self): + self.df3._repr_html_() + + def time_html_repr_trunc_si(self): + self.df4._repr_html_() + + def time_repr_tall(self): + repr(self.df_tall) + + def time_frame_repr_wide(self): + repr(self.df_wide) + + +class MaskBool: + def setup(self): + data = np.random.randn(1000, 500) + df = DataFrame(data) + df = df.where(df > 0) + self.bools = df > 0 + self.mask = isnull(df) + + def time_frame_mask_bools(self): + self.bools.mask(self.mask) + + def time_frame_mask_floats(self): + self.bools.astype(float).mask(self.mask) + + +class Isnull: + def setup(self): + N = 10**3 + self.df_no_null = DataFrame(np.random.randn(N, N)) + + sample = np.array([np.nan, 1.0]) + data = np.random.choice(sample, (N, N)) + self.df = DataFrame(data) + + sample = np.array(list(string.ascii_letters + string.whitespace)) + data = np.random.choice(sample, (N, N)) + self.df_strings = DataFrame(data) + + sample = np.array( + [ + NaT, + np.nan, + None, + np.datetime64("NaT"), + np.timedelta64("NaT"), + 0, + 1, + 2.0, + "", + "abcd", + ] + ) + data = np.random.choice(sample, (N, N)) + self.df_obj = DataFrame(data) + + def time_isnull_floats_no_null(self): + isnull(self.df_no_null) + + def time_isnull(self): + isnull(self.df) + + def time_isnull_strngs(self): + isnull(self.df_strings) + + def time_isnull_obj(self): + isnull(self.df_obj) + + +class Fillna: + + params = ( + [True, False], + ["pad", "bfill"], + [ + "float64", + "float32", + "object", + "Int64", + "Float64", + "datetime64[ns]", + "datetime64[ns, tz]", + "timedelta64[ns]", + ], + ) + param_names = ["inplace", "method", "dtype"] + + def setup(self, inplace, method, dtype): + N, M = 10000, 100 + if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"): + data = { + "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"), + } + self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)}) + self.df[::2] = None + else: + values = np.random.randn(N, M) + values[::2] = np.nan + if dtype == "Int64": + values = values.round() + self.df = DataFrame(values, dtype=dtype) + + def time_frame_fillna(self, inplace, method, dtype): + self.df.fillna(inplace=inplace, method=method) + + +class Dropna: + + params = (["all", "any"], [0, 1]) + param_names = ["how", "axis"] + + def setup(self, how, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) + self.df.iloc[50:1000, 20:50] = np.nan + self.df.iloc[2000:3000] = np.nan + self.df.iloc[:, 60:70] = np.nan + self.df_mixed = self.df.copy() + self.df_mixed["foo"] = "bar" + + def time_dropna(self, how, axis): + self.df.dropna(how=how, axis=axis) + + def time_dropna_axis_mixed_dtypes(self, how, axis): + self.df_mixed.dropna(how=how, axis=axis) + + +class Count: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) + self.df.iloc[50:1000, 20:50] = np.nan + self.df.iloc[2000:3000] = np.nan + self.df.iloc[:, 60:70] = np.nan + self.df_mixed = self.df.copy() + self.df_mixed["foo"] = "bar" + + self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index]) + self.df.columns = MultiIndex.from_arrays([self.df.columns, self.df.columns]) + self.df_mixed.index = MultiIndex.from_arrays( + [self.df_mixed.index, self.df_mixed.index] + ) + self.df_mixed.columns = MultiIndex.from_arrays( + [self.df_mixed.columns, self.df_mixed.columns] + ) + + def time_count_level_multi(self, axis): + self.df.count(axis=axis, level=1) + + def time_count_level_mixed_dtypes_multi(self, axis): + self.df_mixed.count(axis=axis, level=1) + + +class Apply: + def setup(self): + self.df = DataFrame(np.random.randn(1000, 100)) + + self.s = Series(np.arange(1028.0)) + self.df2 = DataFrame({i: self.s for i in range(1028)}) + self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) + + def time_apply_user_func(self): + self.df2.apply(lambda x: np.corrcoef(x, self.s)[(0, 1)]) + + def time_apply_axis_1(self): + self.df.apply(lambda x: x + 1, axis=1) + + def time_apply_lambda_mean(self): + self.df.apply(lambda x: x.mean()) + + def time_apply_np_mean(self): + self.df.apply(np.mean) + + def time_apply_pass_thru(self): + self.df.apply(lambda x: x) + + def time_apply_ref_by_name(self): + self.df3.apply(lambda x: x["A"] + x["B"], axis=1) + + +class Dtypes: + def setup(self): + self.df = DataFrame(np.random.randn(1000, 1000)) + + def time_frame_dtypes(self): + self.df.dtypes + + +class Equals: + def setup(self): + N = 10**3 + self.float_df = DataFrame(np.random.randn(N, N)) + self.float_df_nan = self.float_df.copy() + self.float_df_nan.iloc[-1, -1] = np.nan + + self.object_df = DataFrame("foo", index=range(N), columns=range(N)) + self.object_df_nan = self.object_df.copy() + self.object_df_nan.iloc[-1, -1] = np.nan + + self.nonunique_cols = self.object_df.copy() + self.nonunique_cols.columns = ["A"] * len(self.nonunique_cols.columns) + self.nonunique_cols_nan = self.nonunique_cols.copy() + self.nonunique_cols_nan.iloc[-1, -1] = np.nan + + def time_frame_float_equal(self): + self.float_df.equals(self.float_df) + + def time_frame_float_unequal(self): + self.float_df.equals(self.float_df_nan) + + def time_frame_nonunique_equal(self): + self.nonunique_cols.equals(self.nonunique_cols) + + def time_frame_nonunique_unequal(self): + self.nonunique_cols.equals(self.nonunique_cols_nan) + + def time_frame_object_equal(self): + self.object_df.equals(self.object_df) + + def time_frame_object_unequal(self): + self.object_df.equals(self.object_df_nan) + + +class Interpolate: + + params = [None, "infer"] + param_names = ["downcast"] + + def setup(self, downcast): + N = 10000 + # this is the worst case, where every column has NaNs. + arr = np.random.randn(N, 100) + # NB: we need to set values in array, not in df.values, otherwise + # the benchmark will be misleading for ArrayManager + arr[::2] = np.nan + + self.df = DataFrame(arr) + + self.df2 = DataFrame( + { + "A": np.arange(0, N), + "B": np.random.randint(0, 100, N), + "C": np.random.randn(N), + "D": np.random.randn(N), + } + ) + self.df2.loc[1::5, "A"] = np.nan + self.df2.loc[1::5, "C"] = np.nan + + def time_interpolate(self, downcast): + self.df.interpolate(downcast=downcast) + + def time_interpolate_some_good(self, downcast): + self.df2.interpolate(downcast=downcast) + + +class Shift: + # frame shift speedup issue-5609 + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.rand(10000, 500)) + + def time_shift(self, axis): + self.df.shift(1, axis=axis) + + +class Nunique: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 1000)) + + def time_frame_nunique(self): + self.df.nunique() + + +class SeriesNuniqueWithNan: + def setup(self): + self.ser = Series(100000 * (100 * [np.nan] + list(range(100)))).astype(float) + + def time_series_nunique_nan(self): + self.ser.nunique() + + +class Duplicated: + def setup(self): + n = 1 << 20 + t = date_range("2015-01-01", freq="S", periods=(n // 64)) + xs = np.random.randn(n // 64).round(2) + self.df = DataFrame( + { + "a": np.random.randint(-1 << 8, 1 << 8, n), + "b": np.random.choice(t, n), + "c": np.random.choice(xs, n), + } + ) + self.df2 = DataFrame(np.random.randn(1000, 100).astype(str)).T + + def time_frame_duplicated(self): + self.df.duplicated() + + def time_frame_duplicated_wide(self): + self.df2.duplicated() + + def time_frame_duplicated_subset(self): + self.df.duplicated(subset=["a"]) + + +class XS: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.N = 10**4 + self.df = DataFrame(np.random.randn(self.N, self.N)) + + def time_frame_xs(self, axis): + self.df.xs(self.N / 2, axis=axis) + + +class SortValues: + + params = [True, False] + param_names = ["ascending"] + + def setup(self, ascending): + self.df = DataFrame(np.random.randn(1000000, 2), columns=list("AB")) + + def time_frame_sort_values(self, ascending): + self.df.sort_values(by="A", ascending=ascending) + + +class SortIndexByColumns: + def setup(self): + N = 10000 + K = 10 + self.df = DataFrame( + { + "key1": tm.makeStringIndex(N).values.repeat(K), + "key2": tm.makeStringIndex(N).values.repeat(K), + "value": np.random.randn(N * K), + } + ) + + def time_frame_sort_values_by_columns(self): + self.df.sort_values(by=["key1", "key2"]) + + +class Quantile: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) + + def time_frame_quantile(self, axis): + self.df.quantile([0.1, 0.5], axis=axis) + + +class Rank: + param_names = ["dtype"] + params = [ + ["int", "uint", "float", "object"], + ] + + def setup(self, dtype): + self.df = DataFrame( + np.random.randn(10000, 10).astype(dtype), columns=range(10), dtype=dtype + ) + + def time_rank(self, dtype): + self.df.rank() + + +class GetDtypeCounts: + # 2807 + def setup(self): + self.df = DataFrame(np.random.randn(10, 10000)) + + def time_frame_get_dtype_counts(self): + with warnings.catch_warnings(record=True): + self.df.dtypes.value_counts() + + def time_info(self): + self.df.info() + + +class NSort: + + params = ["first", "last", "all"] + param_names = ["keep"] + + def setup(self, keep): + self.df = DataFrame(np.random.randn(100000, 3), columns=list("ABC")) + + def time_nlargest_one_column(self, keep): + self.df.nlargest(100, "A", keep=keep) + + def time_nlargest_two_columns(self, keep): + self.df.nlargest(100, ["A", "B"], keep=keep) + + def time_nsmallest_one_column(self, keep): + self.df.nsmallest(100, "A", keep=keep) + + def time_nsmallest_two_columns(self, keep): + self.df.nsmallest(100, ["A", "B"], keep=keep) + + +class Describe: + def setup(self): + self.df = DataFrame( + { + "a": np.random.randint(0, 100, 10**6), + "b": np.random.randint(0, 100, 10**6), + "c": np.random.randint(0, 100, 10**6), + } + ) + + def time_series_describe(self): + self.df["a"].describe() + + def time_dataframe_describe(self): + self.df.describe() + + +class MemoryUsage: + def setup(self): + self.df = DataFrame(np.random.randn(100000, 2), columns=list("AB")) + self.df2 = self.df.copy() + self.df2["A"] = self.df2["A"].astype("object") + + def time_memory_usage(self): + self.df.memory_usage(deep=True) + + def time_memory_usage_object_dtype(self): + self.df2.memory_usage(deep=True) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py new file mode 100644 index 00000000..31654a5c --- /dev/null +++ b/asv_bench/benchmarks/gil.py @@ -0,0 +1,341 @@ +from functools import wraps +import threading + +import numpy as np + +from pandas import ( + DataFrame, + Series, + date_range, + factorize, + read_csv, +) +from pandas.core.algorithms import take_nd + +from .pandas_vb_common import tm + +try: + from pandas import ( + rolling_kurt, + rolling_max, + rolling_mean, + rolling_median, + rolling_min, + rolling_skew, + rolling_std, + rolling_var, + ) + + have_rolling_methods = True +except ImportError: + have_rolling_methods = False +try: + from pandas._libs import algos +except ImportError: + from pandas import algos + + +from .pandas_vb_common import BaseIO # isort:skip + + +def test_parallel(num_threads=2, kwargs_list=None): + """ + Decorator to run the same function multiple times in parallel. + + Parameters + ---------- + num_threads : int, optional + The number of times the function is run in parallel. + kwargs_list : list of dicts, optional + The list of kwargs to update original + function kwargs on different threads. + + Notes + ----- + This decorator does not pass the return value of the decorated function. + + Original from scikit-image: + + https://github.com/scikit-image/scikit-image/pull/1519 + + """ + assert num_threads > 0 + has_kwargs_list = kwargs_list is not None + if has_kwargs_list: + assert len(kwargs_list) == num_threads + + def wrapper(func): + @wraps(func) + def inner(*args, **kwargs): + if has_kwargs_list: + update_kwargs = lambda i: dict(kwargs, **kwargs_list[i]) + else: + update_kwargs = lambda i: kwargs + threads = [] + for i in range(num_threads): + updated_kwargs = update_kwargs(i) + thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs) + threads.append(thread) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + return inner + + return wrapper + + +class ParallelGroupbyMethods: + + params = ([2, 4, 8], ["count", "last", "max", "mean", "min", "prod", "sum", "var"]) + param_names = ["threads", "method"] + + def setup(self, threads, method): + + N = 10**6 + ngroups = 10**3 + df = DataFrame( + {"key": np.random.randint(0, ngroups, size=N), "data": np.random.randn(N)} + ) + + @test_parallel(num_threads=threads) + def parallel(): + getattr(df.groupby("key")["data"], method)() + + self.parallel = parallel + + def loop(): + getattr(df.groupby("key")["data"], method)() + + self.loop = loop + + def time_parallel(self, threads, method): + self.parallel() + + def time_loop(self, threads, method): + for i in range(threads): + self.loop() + + +class ParallelGroups: + + params = [2, 4, 8] + param_names = ["threads"] + + def setup(self, threads): + + size = 2**22 + ngroups = 10**3 + data = Series(np.random.randint(0, ngroups, size=size)) + + @test_parallel(num_threads=threads) + def get_groups(): + data.groupby(data).groups + + self.get_groups = get_groups + + def time_get_groups(self, threads): + self.get_groups() + + +class ParallelTake1D: + + params = ["int64", "float64"] + param_names = ["dtype"] + + def setup(self, dtype): + + N = 10**6 + df = DataFrame({"col": np.arange(N, dtype=dtype)}) + indexer = np.arange(100, len(df) - 100) + + @test_parallel(num_threads=2) + def parallel_take1d(): + take_nd(df["col"].values, indexer) + + self.parallel_take1d = parallel_take1d + + def time_take1d(self, dtype): + self.parallel_take1d() + + +class ParallelKth: + # This depends exclusively on code in _libs/, could go in libs.py + + number = 1 + repeat = 5 + + def setup(self): + + N = 10**7 + k = 5 * 10**5 + kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}] + + @test_parallel(num_threads=2, kwargs_list=kwargs_list) + def parallel_kth_smallest(arr): + algos.kth_smallest(arr, k) + + self.parallel_kth_smallest = parallel_kth_smallest + + def time_kth_smallest(self): + self.parallel_kth_smallest() + + +class ParallelDatetimeFields: + def setup(self): + + N = 10**6 + self.dti = date_range("1900-01-01", periods=N, freq="T") + self.period = self.dti.to_period("D") + + def time_datetime_field_year(self): + @test_parallel(num_threads=2) + def run(dti): + dti.year + + run(self.dti) + + def time_datetime_field_day(self): + @test_parallel(num_threads=2) + def run(dti): + dti.day + + run(self.dti) + + def time_datetime_field_daysinmonth(self): + @test_parallel(num_threads=2) + def run(dti): + dti.days_in_month + + run(self.dti) + + def time_datetime_field_normalize(self): + @test_parallel(num_threads=2) + def run(dti): + dti.normalize() + + run(self.dti) + + def time_datetime_to_period(self): + @test_parallel(num_threads=2) + def run(dti): + dti.to_period("S") + + run(self.dti) + + def time_period_to_datetime(self): + @test_parallel(num_threads=2) + def run(period): + period.to_timestamp() + + run(self.period) + + +class ParallelRolling: + + params = ["median", "mean", "min", "max", "var", "skew", "kurt", "std"] + param_names = ["method"] + + def setup(self, method): + + win = 100 + arr = np.random.rand(100000) + if hasattr(DataFrame, "rolling"): + df = DataFrame(arr).rolling(win) + + @test_parallel(num_threads=2) + def parallel_rolling(): + getattr(df, method)() + + self.parallel_rolling = parallel_rolling + elif have_rolling_methods: + rolling = { + "median": rolling_median, + "mean": rolling_mean, + "min": rolling_min, + "max": rolling_max, + "var": rolling_var, + "skew": rolling_skew, + "kurt": rolling_kurt, + "std": rolling_std, + } + + @test_parallel(num_threads=2) + def parallel_rolling(): + rolling[method](arr, win) + + self.parallel_rolling = parallel_rolling + else: + raise NotImplementedError + + def time_rolling(self, method): + self.parallel_rolling() + + +class ParallelReadCSV(BaseIO): + + number = 1 + repeat = 5 + params = ["float", "object", "datetime"] + param_names = ["dtype"] + + def setup(self, dtype): + + rows = 10000 + cols = 50 + data = { + "float": DataFrame(np.random.randn(rows, cols)), + "datetime": DataFrame( + np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows) + ), + "object": DataFrame( + "foo", index=range(rows), columns=["object%03d" for _ in range(5)] + ), + } + + self.fname = f"__test_{dtype}__.csv" + df = data[dtype] + df.to_csv(self.fname) + + @test_parallel(num_threads=2) + def parallel_read_csv(): + read_csv(self.fname) + + self.parallel_read_csv = parallel_read_csv + + def time_read_csv(self, dtype): + self.parallel_read_csv() + + +class ParallelFactorize: + + number = 1 + repeat = 5 + params = [2, 4, 8] + param_names = ["threads"] + + def setup(self, threads): + + strings = tm.makeStringIndex(100000) + + @test_parallel(num_threads=threads) + def parallel(): + factorize(strings) + + self.parallel = parallel + + def loop(): + factorize(strings) + + self.loop = loop + + def time_parallel(self, threads): + self.parallel() + + def time_loop(self, threads): + for i in range(threads): + self.loop() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py new file mode 100644 index 00000000..2de1f25f --- /dev/null +++ b/asv_bench/benchmarks/groupby.py @@ -0,0 +1,952 @@ +from functools import partial +from itertools import product +from string import ascii_letters + +import numpy as np + +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + period_range, +) + +from .pandas_vb_common import tm + +method_blocklist = { + "object": { + "diff", + "median", + "prod", + "sem", + "cumsum", + "sum", + "cummin", + "mean", + "max", + "skew", + "cumprod", + "cummax", + "pct_change", + "min", + "var", + "mad", + "describe", + "std", + "quantile", + }, + "datetime": { + "median", + "prod", + "sem", + "cumsum", + "sum", + "mean", + "skew", + "cumprod", + "cummax", + "pct_change", + "var", + "mad", + "describe", + "std", + }, +} + + +class ApplyDictReturn: + def setup(self): + self.labels = np.arange(1000).repeat(10) + self.data = Series(np.random.randn(len(self.labels))) + + def time_groupby_apply_dict_return(self): + self.data.groupby(self.labels).apply( + lambda x: {"first": x.values[0], "last": x.values[-1]} + ) + + +class Apply: + + param_names = ["factor"] + params = [4, 5] + + def setup(self, factor): + N = 10**factor + # two cases: + # - small groups: small data (N**4) + many labels (2000) -> average group + # size of 5 (-> larger overhead of slicing method) + # - larger groups: larger data (N**5) + fewer labels (20) -> average group + # size of 5000 + labels = np.random.randint(0, 2000 if factor == 4 else 20, size=N) + labels2 = np.random.randint(0, 3, size=N) + df = DataFrame( + { + "key": labels, + "key2": labels2, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + self.df = df + + def time_scalar_function_multi_col(self, factor): + self.df.groupby(["key", "key2"]).apply(lambda x: 1) + + def time_scalar_function_single_col(self, factor): + self.df.groupby("key").apply(lambda x: 1) + + @staticmethod + def df_copy_function(g): + # ensure that the group name is available (see GH #15062) + g.name + return g.copy() + + def time_copy_function_multi_col(self, factor): + self.df.groupby(["key", "key2"]).apply(self.df_copy_function) + + def time_copy_overhead_single_col(self, factor): + self.df.groupby("key").apply(self.df_copy_function) + + +class ApplyNonUniqueUnsortedIndex: + def setup(self): + # GH 46527 + # unsorted and non-unique index + idx = np.arange(100)[::-1] + idx = Index(np.repeat(idx, 200), name="key") + self.df = DataFrame(np.random.randn(len(idx), 10), index=idx) + + def time_groupby_apply_non_unique_unsorted_index(self): + self.df.groupby("key", group_keys=False).apply(lambda x: x) + + +class Groups: + + param_names = ["key"] + params = ["int64_small", "int64_large", "object_small", "object_large"] + + def setup_cache(self): + size = 10**6 + data = { + "int64_small": Series(np.random.randint(0, 100, size=size)), + "int64_large": Series(np.random.randint(0, 10000, size=size)), + "object_small": Series( + tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size)) + ), + "object_large": Series( + tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size)) + ), + } + return data + + def setup(self, data, key): + self.ser = data[key] + + def time_series_groups(self, data, key): + self.ser.groupby(self.ser).groups + + def time_series_indices(self, data, key): + self.ser.groupby(self.ser).indices + + +class GroupManyLabels: + + params = [1, 1000] + param_names = ["ncols"] + + def setup(self, ncols): + N = 1000 + data = np.random.randn(N, ncols) + self.labels = np.random.randint(0, 100, size=N) + self.df = DataFrame(data) + + def time_sum(self, ncols): + self.df.groupby(self.labels).sum() + + +class Nth: + + param_names = ["dtype"] + params = ["float32", "float64", "datetime", "object"] + + def setup(self, dtype): + N = 10**5 + # with datetimes (GH7555) + if dtype == "datetime": + values = date_range("1/1/2011", periods=N, freq="s") + elif dtype == "object": + values = ["foo"] * N + else: + values = np.arange(N).astype(dtype) + + key = np.arange(N) + self.df = DataFrame({"key": key, "values": values}) + self.df.iloc[1, 1] = np.nan # insert missing data + + def time_frame_nth_any(self, dtype): + self.df.groupby("key").nth(0, dropna="any") + + def time_groupby_nth_all(self, dtype): + self.df.groupby("key").nth(0, dropna="all") + + def time_frame_nth(self, dtype): + self.df.groupby("key").nth(0) + + def time_series_nth_any(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0, dropna="any") + + def time_series_nth_all(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0, dropna="all") + + def time_series_nth(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0) + + +class DateAttributes: + def setup(self): + rng = date_range("1/1/2000", "12/31/2005", freq="H") + self.year, self.month, self.day = rng.year, rng.month, rng.day + self.ts = Series(np.random.randn(len(rng)), index=rng) + + def time_len_groupby_object(self): + len(self.ts.groupby([self.year, self.month, self.day])) + + +class Int64: + def setup(self): + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 17, 5)) + i = np.random.choice(len(arr), len(arr) * 5) + arr = np.vstack((arr, arr[i])) + i = np.random.permutation(len(arr)) + arr = arr[i] + self.cols = list("abcde") + self.df = DataFrame(arr, columns=self.cols) + self.df["jim"], self.df["joe"] = np.random.randn(2, len(self.df)) * 10 + + def time_overflow(self): + self.df.groupby(self.cols).max() + + +class CountMultiDtype: + def setup_cache(self): + n = 10000 + offsets = np.random.randint(n, size=n).astype("timedelta64[ns]") + dates = np.datetime64("now") + offsets + dates[np.random.rand(n) > 0.5] = np.datetime64("nat") + offsets[np.random.rand(n) > 0.5] = np.timedelta64("nat") + value2 = np.random.randn(n) + value2[np.random.rand(n) > 0.5] = np.nan + obj = np.random.choice(list("ab"), size=n).astype(object) + obj[np.random.randn(n) > 0.5] = np.nan + df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "dates": dates, + "value2": value2, + "value3": np.random.randn(n), + "ints": np.random.randint(0, 1000, size=n), + "obj": obj, + "offsets": offsets, + } + ) + return df + + def time_multi_count(self, df): + df.groupby(["key1", "key2"]).count() + + +class CountMultiInt: + def setup_cache(self): + n = 10000 + df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "ints": np.random.randint(0, 1000, size=n), + "ints2": np.random.randint(0, 1000, size=n), + } + ) + return df + + def time_multi_int_count(self, df): + df.groupby(["key1", "key2"]).count() + + def time_multi_int_nunique(self, df): + df.groupby(["key1", "key2"]).nunique() + + +class AggFunctions: + def setup_cache(self): + N = 10**5 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + df = DataFrame( + { + "key1": fac1.take(np.random.randint(0, 3, size=N)), + "key2": fac2.take(np.random.randint(0, 2, size=N)), + "value1": np.random.randn(N), + "value2": np.random.randn(N), + "value3": np.random.randn(N), + } + ) + return df + + def time_different_str_functions(self, df): + df.groupby(["key1", "key2"]).agg( + {"value1": "mean", "value2": "var", "value3": "sum"} + ) + + def time_different_numpy_functions(self, df): + df.groupby(["key1", "key2"]).agg( + {"value1": np.mean, "value2": np.var, "value3": np.sum} + ) + + def time_different_python_functions_multicol(self, df): + df.groupby(["key1", "key2"]).agg([sum, min, max]) + + def time_different_python_functions_singlecol(self, df): + df.groupby("key1").agg([sum, min, max]) + + +class GroupStrings: + def setup(self): + n = 2 * 10**5 + alpha = list(map("".join, product(ascii_letters, repeat=4))) + data = np.random.choice(alpha, (n // 5, 4), replace=False) + data = np.repeat(data, 5, axis=0) + self.df = DataFrame(data, columns=list("abcd")) + self.df["joe"] = (np.random.randn(len(self.df)) * 10).round(3) + self.df = self.df.sample(frac=1).reset_index(drop=True) + + def time_multi_columns(self): + self.df.groupby(list("abcd")).max() + + +class MultiColumn: + def setup_cache(self): + N = 10**5 + key1 = np.tile(np.arange(100, dtype=object), 1000) + key2 = key1.copy() + np.random.shuffle(key1) + np.random.shuffle(key2) + df = DataFrame( + { + "key1": key1, + "key2": key2, + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + return df + + def time_lambda_sum(self, df): + df.groupby(["key1", "key2"]).agg(lambda x: x.values.sum()) + + def time_cython_sum(self, df): + df.groupby(["key1", "key2"]).sum() + + def time_col_select_lambda_sum(self, df): + df.groupby(["key1", "key2"])["data1"].agg(lambda x: x.values.sum()) + + def time_col_select_numpy_sum(self, df): + df.groupby(["key1", "key2"])["data1"].agg(np.sum) + + +class Size: + def setup(self): + n = 10**5 + offsets = np.random.randint(n, size=n).astype("timedelta64[ns]") + dates = np.datetime64("now") + offsets + self.df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "value1": np.random.randn(n), + "value2": np.random.randn(n), + "value3": np.random.randn(n), + "dates": dates, + } + ) + self.draws = Series(np.random.randn(n)) + labels = Series(["foo", "bar", "baz", "qux"] * (n // 4)) + self.cats = labels.astype("category") + + def time_multi_size(self): + self.df.groupby(["key1", "key2"]).size() + + def time_category_size(self): + self.draws.groupby(self.cats).size() + + +class Shift: + def setup(self): + N = 18 + self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))}) + + def time_defaults(self): + self.df.groupby("g").shift() + + def time_fill_value(self): + self.df.groupby("g").shift(fill_value=99) + + +class FillNA: + def setup(self): + N = 100 + self.df = DataFrame( + {"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N} + ).set_index("group") + + def time_df_ffill(self): + self.df.groupby("group").fillna(method="ffill") + + def time_df_bfill(self): + self.df.groupby("group").fillna(method="bfill") + + def time_srs_ffill(self): + self.df.groupby("group")["value"].fillna(method="ffill") + + def time_srs_bfill(self): + self.df.groupby("group")["value"].fillna(method="bfill") + + +class GroupByMethods: + + param_names = ["dtype", "method", "application", "ncols"] + params = [ + ["int", "int16", "float", "object", "datetime", "uint"], + [ + "all", + "any", + "bfill", + "count", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "describe", + "diff", + "ffill", + "first", + "head", + "last", + "mad", + "max", + "min", + "median", + "mean", + "nunique", + "pct_change", + "prod", + "quantile", + "rank", + "sem", + "shift", + "size", + "skew", + "std", + "sum", + "tail", + "unique", + "value_counts", + "var", + ], + ["direct", "transformation"], + [1, 5], + ] + + def setup(self, dtype, method, application, ncols): + if method in method_blocklist.get(dtype, {}): + raise NotImplementedError # skip benchmark + + if ncols != 1 and method in ["value_counts", "unique"]: + # DataFrameGroupBy doesn't have these methods + raise NotImplementedError + + if application == "transformation" and method in [ + "describe", + "head", + "tail", + "unique", + "value_counts", + "size", + ]: + # DataFrameGroupBy doesn't have these methods + raise NotImplementedError + + if method == "describe": + ngroups = 20 + elif method in ["mad", "skew"]: + ngroups = 100 + else: + ngroups = 1000 + size = ngroups * 2 + rng = np.arange(ngroups).reshape(-1, 1) + rng = np.broadcast_to(rng, (len(rng), ncols)) + taker = np.random.randint(0, ngroups, size=size) + values = rng.take(taker, axis=0) + if dtype == "int": + key = np.random.randint(0, size, size=size) + elif dtype in ("int16", "uint"): + key = np.random.randint(0, size, size=size, dtype=dtype) + elif dtype == "float": + key = np.concatenate( + [np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0] + ) + elif dtype == "object": + key = ["foo"] * size + elif dtype == "datetime": + key = date_range("1/1/2011", periods=size, freq="s") + + cols = [f"values{n}" for n in range(ncols)] + df = DataFrame(values, columns=cols) + df["key"] = key + + if len(cols) == 1: + cols = cols[0] + + if application == "transformation": + self.as_group_method = lambda: df.groupby("key")[cols].transform(method) + self.as_field_method = lambda: df.groupby(cols)["key"].transform(method) + else: + self.as_group_method = getattr(df.groupby("key")[cols], method) + self.as_field_method = getattr(df.groupby(cols)["key"], method) + + def time_dtype_as_group(self, dtype, method, application, ncols): + self.as_group_method() + + def time_dtype_as_field(self, dtype, method, application, ncols): + self.as_field_method() + + +class GroupByCythonAgg: + """ + Benchmarks specifically targeting our cython aggregation algorithms + (using a big enough dataframe with simple key, so a large part of the + time is actually spent in the grouped aggregation). + """ + + param_names = ["dtype", "method"] + params = [ + ["float64"], + [ + "sum", + "prod", + "min", + "max", + "mean", + "median", + "var", + "first", + "last", + "any", + "all", + ], + ] + + def setup(self, dtype, method): + N = 1_000_000 + df = DataFrame(np.random.randn(N, 10), columns=list("abcdefghij")) + df["key"] = np.random.randint(0, 100, size=N) + self.df = df + + def time_frame_agg(self, dtype, method): + self.df.groupby("key").agg(method) + + +class Cumulative: + param_names = ["dtype", "method"] + params = [ + ["float64", "int64", "Float64", "Int64"], + ["cummin", "cummax", "cumsum"], + ] + + def setup(self, dtype, method): + N = 500_000 + vals = np.random.randint(-10, 10, (N, 5)) + null_vals = vals.astype(float, copy=True) + null_vals[::2, :] = np.nan + null_vals[::3, :] = np.nan + df = DataFrame(vals, columns=list("abcde"), dtype=dtype) + null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype) + keys = np.random.randint(0, 100, size=N) + df["key"] = keys + null_df["key"] = keys + self.df = df + self.null_df = null_df + + def time_frame_transform(self, dtype, method): + self.df.groupby("key").transform(method) + + def time_frame_transform_many_nulls(self, dtype, method): + self.null_df.groupby("key").transform(method) + + +class RankWithTies: + # GH 21237 + param_names = ["dtype", "tie_method"] + params = [ + ["float64", "float32", "int64", "datetime64"], + ["first", "average", "dense", "min", "max"], + ] + + def setup(self, dtype, tie_method): + N = 10**4 + if dtype == "datetime64": + data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype) + else: + data = np.array([1] * N, dtype=dtype) + self.df = DataFrame({"values": data, "key": ["foo"] * N}) + + def time_rank_ties(self, dtype, tie_method): + self.df.groupby("key").rank(method=tie_method) + + +class Float32: + # GH 13335 + def setup(self): + tmp1 = (np.random.random(10000) * 0.1).astype(np.float32) + tmp2 = (np.random.random(10000) * 10.0).astype(np.float32) + tmp = np.concatenate((tmp1, tmp2)) + arr = np.repeat(tmp, 10) + self.df = DataFrame({"a": arr, "b": arr}) + + def time_sum(self): + self.df.groupby(["a"])["b"].sum() + + +class String: + # GH#41596 + param_names = ["dtype", "method"] + params = [ + ["str", "string[python]"], + [ + "sum", + "prod", + "min", + "max", + "mean", + "median", + "var", + "first", + "last", + "any", + "all", + ], + ] + + def setup(self, dtype, method): + cols = list("abcdefghjkl") + self.df = DataFrame( + np.random.randint(0, 100, size=(1_000_000, len(cols))), + columns=cols, + dtype=dtype, + ) + + def time_str_func(self, dtype, method): + self.df.groupby("a")[self.df.columns[1:]].agg(method) + + +class Categories: + def setup(self): + N = 10**5 + arr = np.random.random(N) + data = {"a": Categorical(np.random.randint(10000, size=N)), "b": arr} + self.df = DataFrame(data) + data = { + "a": Categorical(np.random.randint(10000, size=N), ordered=True), + "b": arr, + } + self.df_ordered = DataFrame(data) + data = { + "a": Categorical( + np.random.randint(100, size=N), categories=np.arange(10000) + ), + "b": arr, + } + self.df_extra_cat = DataFrame(data) + + def time_groupby_sort(self): + self.df.groupby("a")["b"].count() + + def time_groupby_nosort(self): + self.df.groupby("a", sort=False)["b"].count() + + def time_groupby_ordered_sort(self): + self.df_ordered.groupby("a")["b"].count() + + def time_groupby_ordered_nosort(self): + self.df_ordered.groupby("a", sort=False)["b"].count() + + def time_groupby_extra_cat_sort(self): + self.df_extra_cat.groupby("a")["b"].count() + + def time_groupby_extra_cat_nosort(self): + self.df_extra_cat.groupby("a", sort=False)["b"].count() + + +class Datelike: + # GH 14338 + params = ["period_range", "date_range", "date_range_tz"] + param_names = ["grouper"] + + def setup(self, grouper): + N = 10**4 + rng_map = { + "period_range": period_range, + "date_range": date_range, + "date_range_tz": partial(date_range, tz="US/Central"), + } + self.grouper = rng_map[grouper]("1900-01-01", freq="D", periods=N) + self.df = DataFrame(np.random.randn(10**4, 2)) + + def time_sum(self, grouper): + self.df.groupby(self.grouper).sum() + + +class SumBools: + # GH 2692 + def setup(self): + N = 500 + self.df = DataFrame({"ii": range(N), "bb": [True] * N}) + + def time_groupby_sum_booleans(self): + self.df.groupby("ii").sum() + + +class SumMultiLevel: + # GH 9049 + timeout = 120.0 + + def setup(self): + N = 50 + self.df = DataFrame( + {"A": list(range(N)) * 2, "B": range(N * 2), "C": 1} + ).set_index(["A", "B"]) + + def time_groupby_sum_multiindex(self): + self.df.groupby(level=[0, 1]).sum() + + +class Transform: + def setup(self): + n1 = 400 + n2 = 250 + index = MultiIndex( + levels=[np.arange(n1), tm.makeStringIndex(n2)], + codes=[np.repeat(range(n1), n2).tolist(), list(range(n2)) * n1], + names=["lev1", "lev2"], + ) + arr = np.random.randn(n1 * n2, 3) + arr[::10000, 0] = np.nan + arr[1::10000, 1] = np.nan + arr[2::10000, 2] = np.nan + data = DataFrame(arr, index=index, columns=["col1", "col20", "col3"]) + self.df = data + + n = 1000 + self.df_wide = DataFrame( + np.random.randn(n, n), + index=np.random.choice(range(10), n), + ) + + n = 1_000_000 + self.df_tall = DataFrame( + np.random.randn(n, 3), + index=np.random.randint(0, 5, n), + ) + + n = 20000 + self.df1 = DataFrame( + np.random.randint(1, n, (n, 3)), columns=["jim", "joe", "jolie"] + ) + self.df2 = self.df1.copy() + self.df2["jim"] = self.df2["joe"] + + self.df3 = DataFrame( + np.random.randint(1, (n / 10), (n, 3)), columns=["jim", "joe", "jolie"] + ) + self.df4 = self.df3.copy() + self.df4["jim"] = self.df4["joe"] + + def time_transform_lambda_max(self): + self.df.groupby(level="lev1").transform(lambda x: max(x)) + + def time_transform_ufunc_max(self): + self.df.groupby(level="lev1").transform(np.max) + + def time_transform_lambda_max_tall(self): + self.df_tall.groupby(level=0).transform(lambda x: np.max(x, axis=0)) + + def time_transform_lambda_max_wide(self): + self.df_wide.groupby(level=0).transform(lambda x: np.max(x, axis=0)) + + def time_transform_multi_key1(self): + self.df1.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key2(self): + self.df2.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key3(self): + self.df3.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key4(self): + self.df4.groupby(["jim", "joe"])["jolie"].transform("max") + + +class TransformBools: + def setup(self): + N = 120000 + transition_points = np.sort(np.random.choice(np.arange(N), 1400)) + transitions = np.zeros(N, dtype=np.bool_) + transitions[transition_points] = True + self.g = transitions.cumsum() + self.df = DataFrame({"signal": np.random.rand(N)}) + + def time_transform_mean(self): + self.df["signal"].groupby(self.g).transform(np.mean) + + +class TransformNaN: + # GH 12737 + def setup(self): + self.df_nans = DataFrame( + {"key": np.repeat(np.arange(1000), 10), "B": np.nan, "C": np.nan} + ) + self.df_nans.loc[4::10, "B":"C"] = 5 + + def time_first(self): + self.df_nans.groupby("key").transform("first") + + +class TransformEngine: + + param_names = ["parallel"] + params = [[True, False]] + + def setup(self, parallel): + N = 10**3 + data = DataFrame( + {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N}, + columns=[0, 1], + ) + self.parallel = parallel + self.grouper = data.groupby(0) + + def time_series_numba(self, parallel): + def function(values, index): + return values * 5 + + self.grouper[1].transform( + function, engine="numba", engine_kwargs={"parallel": self.parallel} + ) + + def time_series_cython(self, parallel): + def function(values): + return values * 5 + + self.grouper[1].transform(function, engine="cython") + + def time_dataframe_numba(self, parallel): + def function(values, index): + return values * 5 + + self.grouper.transform( + function, engine="numba", engine_kwargs={"parallel": self.parallel} + ) + + def time_dataframe_cython(self, parallel): + def function(values): + return values * 5 + + self.grouper.transform(function, engine="cython") + + +class AggEngine: + + param_names = ["parallel"] + params = [[True, False]] + + def setup(self, parallel): + N = 10**3 + data = DataFrame( + {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N}, + columns=[0, 1], + ) + self.parallel = parallel + self.grouper = data.groupby(0) + + def time_series_numba(self, parallel): + def function(values, index): + total = 0 + for i, value in enumerate(values): + if i % 2: + total += value + 5 + else: + total += value * 2 + return total + + self.grouper[1].agg( + function, engine="numba", engine_kwargs={"parallel": self.parallel} + ) + + def time_series_cython(self, parallel): + def function(values): + total = 0 + for i, value in enumerate(values): + if i % 2: + total += value + 5 + else: + total += value * 2 + return total + + self.grouper[1].agg(function, engine="cython") + + def time_dataframe_numba(self, parallel): + def function(values, index): + total = 0 + for i, value in enumerate(values): + if i % 2: + total += value + 5 + else: + total += value * 2 + return total + + self.grouper.agg( + function, engine="numba", engine_kwargs={"parallel": self.parallel} + ) + + def time_dataframe_cython(self, parallel): + def function(values): + total = 0 + for i, value in enumerate(values): + if i % 2: + total += value + 5 + else: + total += value * 2 + return total + + self.grouper.agg(function, engine="cython") + + +class Sample: + def setup(self): + N = 10**3 + self.df = DataFrame({"a": np.zeros(N)}) + self.groups = np.arange(0, N) + self.weights = np.ones(N) + + def time_sample(self): + self.df.groupby(self.groups).sample(n=1) + + def time_sample_weights(self): + self.df.groupby(self.groups).sample(n=1, weights=self.weights) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/hash_functions.py b/asv_bench/benchmarks/hash_functions.py new file mode 100644 index 00000000..da752b90 --- /dev/null +++ b/asv_bench/benchmarks/hash_functions.py @@ -0,0 +1,91 @@ +import numpy as np + +import pandas as pd + + +class UniqueForLargePyObjectInts: + def setup(self): + lst = [x << 32 for x in range(5000)] + self.arr = np.array(lst, dtype=np.object_) + + def time_unique(self): + pd.unique(self.arr) + + +class Float64GroupIndex: + # GH28303 + def setup(self): + self.df = pd.date_range( + start="1/1/2018", end="1/2/2018", periods=10**6 + ).to_frame() + self.group_index = np.round(self.df.index.astype(int) / 10**9) + + def time_groupby(self): + self.df.groupby(self.group_index).last() + + +class UniqueAndFactorizeArange: + params = range(4, 16) + param_names = ["exponent"] + + def setup(self, exponent): + a = np.arange(10**4, dtype="float64") + self.a2 = (a + 10**exponent).repeat(100) + + def time_factorize(self, exponent): + pd.factorize(self.a2) + + def time_unique(self, exponent): + pd.unique(self.a2) + + +class Unique: + params = ["Int64", "Float64"] + param_names = ["dtype"] + + def setup(self, dtype): + self.ser = pd.Series(([1, pd.NA, 2] + list(range(100_000))) * 3, dtype=dtype) + self.ser_unique = pd.Series(list(range(300_000)) + [pd.NA], dtype=dtype) + + def time_unique_with_duplicates(self, exponent): + pd.unique(self.ser) + + def time_unique(self, exponent): + pd.unique(self.ser_unique) + + +class NumericSeriesIndexing: + + params = [ + (pd.Int64Index, pd.UInt64Index, pd.Float64Index), + (10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6), + ] + param_names = ["index_dtype", "N"] + + def setup(self, index, N): + vals = np.array(list(range(55)) + [54] + list(range(55, N - 1))) + indices = index(vals) + self.data = pd.Series(np.arange(N), index=indices) + + def time_loc_slice(self, index, N): + # trigger building of mapping + self.data.loc[:800] + + +class NumericSeriesIndexingShuffled: + + params = [ + (pd.Int64Index, pd.UInt64Index, pd.Float64Index), + (10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6), + ] + param_names = ["index_dtype", "N"] + + def setup(self, index, N): + vals = np.array(list(range(55)) + [54] + list(range(55, N - 1))) + np.random.shuffle(vals) + indices = index(vals) + self.data = pd.Series(np.arange(N), index=indices) + + def time_loc_slice(self, index, N): + # trigger building of mapping + self.data.loc[:800] diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py new file mode 100644 index 00000000..1a88bb7e --- /dev/null +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -0,0 +1,75 @@ +import pandas as pd + + +class IndexCache: + number = 1 + repeat = (3, 100, 20) + + params = [ + [ + "CategoricalIndex", + "DatetimeIndex", + "Float64Index", + "IntervalIndex", + "Int64Index", + "MultiIndex", + "PeriodIndex", + "RangeIndex", + "TimedeltaIndex", + "UInt64Index", + ] + ] + param_names = ["index_type"] + + def setup(self, index_type): + N = 10**5 + if index_type == "MultiIndex": + self.idx = pd.MultiIndex.from_product( + [pd.date_range("1/1/2000", freq="T", periods=N // 2), ["a", "b"]] + ) + elif index_type == "DatetimeIndex": + self.idx = pd.date_range("1/1/2000", freq="T", periods=N) + elif index_type == "Int64Index": + self.idx = pd.Index(range(N)) + elif index_type == "PeriodIndex": + self.idx = pd.period_range("1/1/2000", freq="T", periods=N) + elif index_type == "RangeIndex": + self.idx = pd.RangeIndex(start=0, stop=N) + elif index_type == "IntervalIndex": + self.idx = pd.IntervalIndex.from_arrays(range(N), range(1, N + 1)) + elif index_type == "TimedeltaIndex": + self.idx = pd.TimedeltaIndex(range(N)) + elif index_type == "Float64Index": + self.idx = pd.Float64Index(range(N)) + elif index_type == "UInt64Index": + self.idx = pd.UInt64Index(range(N)) + elif index_type == "CategoricalIndex": + self.idx = pd.CategoricalIndex(range(N), range(N)) + else: + raise ValueError + assert len(self.idx) == N + self.idx._cache = {} + + def time_values(self, index_type): + self.idx._values + + def time_shape(self, index_type): + self.idx.shape + + def time_is_monotonic_decreasing(self, index_type): + self.idx.is_monotonic_decreasing + + def time_is_monotonic_increasing(self, index_type): + self.idx.is_monotonic_increasing + + def time_is_unique(self, index_type): + self.idx.is_unique + + def time_engine(self, index_type): + self.idx._engine + + def time_inferred_type(self, index_type): + self.idx.inferred_type + + def time_is_all_dates(self, index_type): + self.idx.is_all_dates diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py new file mode 100644 index 00000000..dab33f02 --- /dev/null +++ b/asv_bench/benchmarks/index_object.py @@ -0,0 +1,242 @@ +import gc + +import numpy as np + +from pandas import ( + DatetimeIndex, + Float64Index, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + date_range, +) + +from .pandas_vb_common import tm + + +class SetOperations: + + params = ( + ["datetime", "date_string", "int", "strings"], + ["intersection", "union", "symmetric_difference"], + ) + param_names = ["dtype", "method"] + + def setup(self, dtype, method): + N = 10**5 + dates_left = date_range("1/1/2000", periods=N, freq="T") + fmt = "%Y-%m-%d %H:%M:%S" + date_str_left = Index(dates_left.strftime(fmt)) + int_left = Index(np.arange(N)) + str_left = tm.makeStringIndex(N) + data = { + "datetime": {"left": dates_left, "right": dates_left[:-1]}, + "date_string": {"left": date_str_left, "right": date_str_left[:-1]}, + "int": {"left": int_left, "right": int_left[:-1]}, + "strings": {"left": str_left, "right": str_left[:-1]}, + } + self.left = data[dtype]["left"] + self.right = data[dtype]["right"] + + def time_operation(self, dtype, method): + getattr(self.left, method)(self.right) + + +class SetDisjoint: + def setup(self): + N = 10**5 + B = N + 20000 + self.datetime_left = DatetimeIndex(range(N)) + self.datetime_right = DatetimeIndex(range(N, B)) + + def time_datetime_difference_disjoint(self): + self.datetime_left.difference(self.datetime_right) + + +class Range: + def setup(self): + self.idx_inc = RangeIndex(start=0, stop=10**6, step=3) + self.idx_dec = RangeIndex(start=10**6, stop=-1, step=-3) + + def time_max(self): + self.idx_inc.max() + + def time_max_trivial(self): + self.idx_dec.max() + + def time_min(self): + self.idx_dec.min() + + def time_min_trivial(self): + self.idx_inc.min() + + def time_get_loc_inc(self): + self.idx_inc.get_loc(900_000) + + def time_get_loc_dec(self): + self.idx_dec.get_loc(100_000) + + def time_iter_inc(self): + for _ in self.idx_inc: + pass + + def time_iter_dec(self): + for _ in self.idx_dec: + pass + + def time_sort_values_asc(self): + self.idx_inc.sort_values() + + def time_sort_values_des(self): + self.idx_inc.sort_values(ascending=False) + + +class IndexEquals: + def setup(self): + idx_large_fast = RangeIndex(100_000) + idx_small_slow = date_range(start="1/1/2012", periods=1) + self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) + + self.idx_non_object = RangeIndex(1) + + def time_non_object_equals_multiindex(self): + self.idx_non_object.equals(self.mi_large_slow) + + +class IndexAppend: + def setup(self): + + N = 10_000 + self.range_idx = RangeIndex(0, 100) + self.int_idx = self.range_idx.astype(int) + self.obj_idx = self.int_idx.astype(str) + self.range_idxs = [] + self.int_idxs = [] + self.object_idxs = [] + for i in range(1, N): + r_idx = RangeIndex(i * 100, (i + 1) * 100) + self.range_idxs.append(r_idx) + i_idx = r_idx.astype(int) + self.int_idxs.append(i_idx) + o_idx = i_idx.astype(str) + self.object_idxs.append(o_idx) + + def time_append_range_list(self): + self.range_idx.append(self.range_idxs) + + def time_append_int_list(self): + self.int_idx.append(self.int_idxs) + + def time_append_obj_list(self): + self.obj_idx.append(self.object_idxs) + + +class Indexing: + + params = ["String", "Float", "Int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10**6 + self.idx = getattr(tm, f"make{dtype}Index")(N) + self.array_mask = (np.arange(N) % 3) == 0 + self.series_mask = Series(self.array_mask) + self.sorted = self.idx.sort_values() + half = N // 2 + self.non_unique = self.idx[:half].append(self.idx[:half]) + self.non_unique_sorted = ( + self.sorted[:half].append(self.sorted[:half]).sort_values() + ) + self.key = self.sorted[N // 4] + + def time_boolean_array(self, dtype): + self.idx[self.array_mask] + + def time_boolean_series(self, dtype): + self.idx[self.series_mask] + + def time_get(self, dtype): + self.idx[1] + + def time_slice(self, dtype): + self.idx[:-1] + + def time_slice_step(self, dtype): + self.idx[::2] + + def time_get_loc(self, dtype): + self.idx.get_loc(self.key) + + def time_get_loc_sorted(self, dtype): + self.sorted.get_loc(self.key) + + def time_get_loc_non_unique(self, dtype): + self.non_unique.get_loc(self.key) + + def time_get_loc_non_unique_sorted(self, dtype): + self.non_unique_sorted.get_loc(self.key) + + +class Float64IndexMethod: + # GH 13166 + def setup(self): + N = 100_000 + a = np.arange(N) + self.ind = Float64Index(a * 4.8000000418824129e-08) + + def time_get_loc(self): + self.ind.get_loc(0) + + +class IntervalIndexMethod: + # GH 24813 + params = [10**3, 10**5] + + def setup(self, N): + left = np.append(np.arange(N), np.array(0)) + right = np.append(np.arange(1, N + 1), np.array(1)) + self.intv = IntervalIndex.from_arrays(left, right) + self.intv._engine + + self.intv2 = IntervalIndex.from_arrays(left + 1, right + 1) + self.intv2._engine + + self.left = IntervalIndex.from_breaks(np.arange(N)) + self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3)) + + def time_monotonic_inc(self, N): + self.intv.is_monotonic_increasing + + def time_is_unique(self, N): + self.intv.is_unique + + def time_intersection(self, N): + self.left.intersection(self.right) + + def time_intersection_one_duplicate(self, N): + self.intv.intersection(self.right) + + def time_intersection_both_duplicate(self, N): + self.intv.intersection(self.intv2) + + +class GC: + params = [1, 2, 5] + + def create_use_drop(self): + idx = Index(list(range(1_000_000))) + idx._engine + + def peakmem_gc_instances(self, N): + try: + gc.disable() + + for _ in range(N): + self.create_use_drop() + finally: + gc.enable() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py new file mode 100644 index 00000000..54da7c10 --- /dev/null +++ b/asv_bench/benchmarks/indexing.py @@ -0,0 +1,503 @@ +""" +These benchmarks are for Series and DataFrame indexing methods. For the +lower-level methods directly on Index and subclasses, see index_object.py, +indexing_engine.py, and index_cached.py +""" +import itertools +import string +import warnings + +import numpy as np + +from pandas import ( + CategoricalIndex, + DataFrame, + Float64Index, + Int64Index, + IntervalIndex, + MultiIndex, + Series, + UInt64Index, + concat, + date_range, + option_context, + period_range, +) + +from .pandas_vb_common import tm + + +class NumericSeriesIndexing: + + params = [ + (Int64Index, UInt64Index, Float64Index), + ("unique_monotonic_inc", "nonunique_monotonic_inc"), + ] + param_names = ["index_dtype", "index_structure"] + + def setup(self, index, index_structure): + N = 10**6 + indices = { + "unique_monotonic_inc": index(range(N)), + "nonunique_monotonic_inc": index( + list(range(55)) + [54] + list(range(55, N - 1)) + ), + } + self.data = Series(np.random.rand(N), index=indices[index_structure]) + self.array = np.arange(10000) + self.array_list = self.array.tolist() + + def time_getitem_scalar(self, index, index_structure): + self.data[800000] + + def time_getitem_slice(self, index, index_structure): + self.data[:800000] + + def time_getitem_list_like(self, index, index_structure): + self.data[[800000]] + + def time_getitem_array(self, index, index_structure): + self.data[self.array] + + def time_getitem_lists(self, index, index_structure): + self.data[self.array_list] + + def time_iloc_array(self, index, index_structure): + self.data.iloc[self.array] + + def time_iloc_list_like(self, index, index_structure): + self.data.iloc[[800000]] + + def time_iloc_scalar(self, index, index_structure): + self.data.iloc[800000] + + def time_iloc_slice(self, index, index_structure): + self.data.iloc[:800000] + + def time_loc_array(self, index, index_structure): + self.data.loc[self.array] + + def time_loc_list_like(self, index, index_structure): + self.data.loc[[800000]] + + def time_loc_scalar(self, index, index_structure): + self.data.loc[800000] + + def time_loc_slice(self, index, index_structure): + self.data.loc[:800000] + + +class NonNumericSeriesIndexing: + + params = [ + ("string", "datetime", "period"), + ("unique_monotonic_inc", "nonunique_monotonic_inc", "non_monotonic"), + ] + param_names = ["index_dtype", "index_structure"] + + def setup(self, index, index_structure): + N = 10**6 + if index == "string": + index = tm.makeStringIndex(N) + elif index == "datetime": + index = date_range("1900", periods=N, freq="s") + elif index == "period": + index = period_range("1900", periods=N, freq="s") + index = index.sort_values() + assert index.is_unique and index.is_monotonic_increasing + if index_structure == "nonunique_monotonic_inc": + index = index.insert(item=index[2], loc=2)[:-1] + elif index_structure == "non_monotonic": + index = index[::2].append(index[1::2]) + assert len(index) == N + self.s = Series(np.random.rand(N), index=index) + self.lbl = index[80000] + # warm up index mapping + self.s[self.lbl] + + def time_getitem_label_slice(self, index, index_structure): + self.s[: self.lbl] + + def time_getitem_pos_slice(self, index, index_structure): + self.s[:80000] + + def time_getitem_scalar(self, index, index_structure): + self.s[self.lbl] + + def time_getitem_list_like(self, index, index_structure): + self.s[[self.lbl]] + + +class DataFrameStringIndexing: + def setup(self): + index = tm.makeStringIndex(1000) + columns = tm.makeStringIndex(30) + with warnings.catch_warnings(record=True): + self.df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns) + self.idx_scalar = index[100] + self.col_scalar = columns[10] + self.bool_indexer = self.df[self.col_scalar] > 0 + self.bool_obj_indexer = self.bool_indexer.astype(object) + self.boolean_indexer = (self.df[self.col_scalar] > 0).astype("boolean") + + def time_loc(self): + self.df.loc[self.idx_scalar, self.col_scalar] + + def time_at(self): + self.df.at[self.idx_scalar, self.col_scalar] + + def time_at_setitem(self): + self.df.at[self.idx_scalar, self.col_scalar] = 0.0 + + def time_getitem_scalar(self): + self.df[self.col_scalar][self.idx_scalar] + + def time_boolean_rows(self): + self.df[self.bool_indexer] + + def time_boolean_rows_object(self): + self.df[self.bool_obj_indexer] + + def time_boolean_rows_boolean(self): + self.df[self.boolean_indexer] + + +class DataFrameNumericIndexing: + + params = [ + (Int64Index, UInt64Index, Float64Index), + ("unique_monotonic_inc", "nonunique_monotonic_inc"), + ] + param_names = ["index_dtype", "index_structure"] + + def setup(self, index, index_structure): + N = 10**5 + indices = { + "unique_monotonic_inc": index(range(N)), + "nonunique_monotonic_inc": index( + list(range(55)) + [54] + list(range(55, N - 1)) + ), + } + self.idx_dupe = np.array(range(30)) * 99 + self.df = DataFrame(np.random.randn(N, 5), index=indices[index_structure]) + self.df_dup = concat([self.df, 2 * self.df, 3 * self.df]) + self.bool_indexer = [True] * (N // 2) + [False] * (N - N // 2) + + def time_iloc_dups(self, index, index_structure): + self.df_dup.iloc[self.idx_dupe] + + def time_loc_dups(self, index, index_structure): + self.df_dup.loc[self.idx_dupe] + + def time_iloc(self, index, index_structure): + self.df.iloc[:100, 0] + + def time_loc(self, index, index_structure): + self.df.loc[:100, 0] + + def time_bool_indexer(self, index, index_structure): + self.df[self.bool_indexer] + + +class Take: + + params = ["int", "datetime"] + param_names = ["index"] + + def setup(self, index): + N = 100000 + indexes = { + "int": Int64Index(np.arange(N)), + "datetime": date_range("2011-01-01", freq="S", periods=N), + } + index = indexes[index] + self.s = Series(np.random.rand(N), index=index) + self.indexer = np.random.randint(0, N, size=N) + + def time_take(self, index): + self.s.take(self.indexer) + + +class MultiIndexing: + + params = [True, False] + param_names = ["unique_levels"] + + def setup(self, unique_levels): + self.nlevels = 2 + if unique_levels: + mi = MultiIndex.from_arrays([range(1000000)] * self.nlevels) + else: + mi = MultiIndex.from_product([range(1000)] * self.nlevels) + self.df = DataFrame(np.random.randn(len(mi)), index=mi) + + self.tgt_slice = slice(200, 800) + self.tgt_null_slice = slice(None) + self.tgt_list = list(range(0, 1000, 10)) + self.tgt_scalar = 500 + + bool_indexer = np.zeros(len(mi), dtype=np.bool_) + bool_indexer[slice(0, len(mi), 100)] = True + self.tgt_bool_indexer = bool_indexer + + def time_loc_partial_key_slice(self, unique_levels): + self.df.loc[self.tgt_slice, :] + + def time_loc_partial_key_null_slice(self, unique_levels): + self.df.loc[self.tgt_null_slice, :] + + def time_loc_partial_key_list(self, unique_levels): + self.df.loc[self.tgt_list, :] + + def time_loc_partial_key_scalar(self, unique_levels): + self.df.loc[self.tgt_scalar, :] + + def time_loc_partial_key_bool_indexer(self, unique_levels): + self.df.loc[self.tgt_bool_indexer, :] + + def time_loc_all_slices(self, unique_levels): + target = tuple([self.tgt_slice] * self.nlevels) + self.df.loc[target, :] + + def time_loc_all_null_slices(self, unique_levels): + target = tuple([self.tgt_null_slice] * self.nlevels) + self.df.loc[target, :] + + def time_loc_all_lists(self, unique_levels): + target = tuple([self.tgt_list] * self.nlevels) + self.df.loc[target, :] + + def time_loc_all_scalars(self, unique_levels): + target = tuple([self.tgt_scalar] * self.nlevels) + self.df.loc[target, :] + + def time_loc_all_bool_indexers(self, unique_levels): + target = tuple([self.tgt_bool_indexer] * self.nlevels) + self.df.loc[target, :] + + def time_loc_slice_plus_null_slice(self, unique_levels): + target = (self.tgt_slice, self.tgt_null_slice) + self.df.loc[target, :] + + def time_loc_null_slice_plus_slice(self, unique_levels): + target = (self.tgt_null_slice, self.tgt_slice) + self.df.loc[target, :] + + def time_xs_level_0(self, unique_levels): + target = self.tgt_scalar + self.df.xs(target, level=0) + + def time_xs_level_1(self, unique_levels): + target = self.tgt_scalar + self.df.xs(target, level=1) + + def time_xs_full_key(self, unique_levels): + target = tuple([self.tgt_scalar] * self.nlevels) + self.df.xs(target) + + +class IntervalIndexing: + def setup_cache(self): + idx = IntervalIndex.from_breaks(np.arange(1000001)) + monotonic = Series(np.arange(1000000), index=idx) + return monotonic + + def time_getitem_scalar(self, monotonic): + monotonic[80000] + + def time_loc_scalar(self, monotonic): + monotonic.loc[80000] + + def time_getitem_list(self, monotonic): + monotonic[80000:] + + def time_loc_list(self, monotonic): + monotonic.loc[80000:] + + +class DatetimeIndexIndexing: + def setup(self): + dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") + dti2 = dti.tz_convert("UTC") + self.dti = dti + self.dti2 = dti2 + + def time_get_indexer_mismatched_tz(self): + # reached via e.g. + # ser = Series(range(len(dti)), index=dti) + # ser[dti2] + self.dti.get_indexer(self.dti2) + + +class SortedAndUnsortedDatetimeIndexLoc: + def setup(self): + dti = date_range("2016-01-01", periods=10000, tz="US/Pacific") + index = np.array(dti) + + unsorted_index = index.copy() + unsorted_index[10] = unsorted_index[20] + + self.df_unsorted = DataFrame(index=unsorted_index, data={"a": 1}) + self.df_sort = DataFrame(index=index, data={"a": 1}) + + def time_loc_unsorted(self): + self.df_unsorted.loc["2016-6-11"] + + def time_loc_sorted(self): + self.df_sort.loc["2016-6-11"] + + +class CategoricalIndexIndexing: + + params = ["monotonic_incr", "monotonic_decr", "non_monotonic"] + param_names = ["index"] + + def setup(self, index): + N = 10**5 + values = list("a" * N + "b" * N + "c" * N) + indices = { + "monotonic_incr": CategoricalIndex(values), + "monotonic_decr": CategoricalIndex(reversed(values)), + "non_monotonic": CategoricalIndex(list("abc" * N)), + } + self.data = indices[index] + self.data_unique = CategoricalIndex( + ["".join(perm) for perm in itertools.permutations(string.printable, 3)] + ) + + self.int_scalar = 10000 + self.int_list = list(range(10000)) + + self.cat_scalar = "b" + self.cat_list = ["a", "c"] + + def time_getitem_scalar(self, index): + self.data[self.int_scalar] + + def time_getitem_slice(self, index): + self.data[: self.int_scalar] + + def time_getitem_list_like(self, index): + self.data[[self.int_scalar]] + + def time_getitem_list(self, index): + self.data[self.int_list] + + def time_getitem_bool_array(self, index): + self.data[self.data == self.cat_scalar] + + def time_get_loc_scalar(self, index): + self.data.get_loc(self.cat_scalar) + + def time_get_indexer_list(self, index): + self.data_unique.get_indexer(self.cat_list) + + +class MethodLookup: + def setup_cache(self): + s = Series() + return s + + def time_lookup_iloc(self, s): + s.iloc + + def time_lookup_loc(self, s): + s.loc + + +class GetItemSingleColumn: + def setup(self): + self.df_string_col = DataFrame(np.random.randn(3000, 1), columns=["A"]) + self.df_int_col = DataFrame(np.random.randn(3000, 1)) + + def time_frame_getitem_single_column_label(self): + self.df_string_col["A"] + + def time_frame_getitem_single_column_int(self): + self.df_int_col[0] + + +class IndexSingleRow: + params = [True, False] + param_names = ["unique_cols"] + + def setup(self, unique_cols): + arr = np.arange(10**7).reshape(-1, 10) + df = DataFrame(arr) + dtypes = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8", "f8", "f4"] + for i, d in enumerate(dtypes): + df[i] = df[i].astype(d) + + if not unique_cols: + # GH#33032 single-row lookups with non-unique columns were + # 15x slower than with unique columns + df.columns = ["A", "A"] + list(df.columns[2:]) + + self.df = df + + def time_iloc_row(self, unique_cols): + self.df.iloc[10000] + + def time_loc_row(self, unique_cols): + self.df.loc[10000] + + +class AssignTimeseriesIndex: + def setup(self): + N = 100000 + idx = date_range("1/1/2000", periods=N, freq="H") + self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx) + + def time_frame_assign_timeseries_index(self): + self.df["date"] = self.df.index + + +class InsertColumns: + def setup(self): + self.N = 10**3 + self.df = DataFrame(index=range(self.N)) + self.df2 = DataFrame(np.random.randn(self.N, 2)) + + def time_insert(self): + for i in range(100): + self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True) + + def time_insert_middle(self): + # same as time_insert but inserting to a middle column rather than + # front or back (which have fast-paths) + for i in range(100): + self.df2.insert( + 1, "colname", np.random.randn(self.N), allow_duplicates=True + ) + + def time_assign_with_setitem(self): + for i in range(100): + self.df[i] = np.random.randn(self.N) + + def time_assign_list_like_with_setitem(self): + self.df[list(range(100))] = np.random.randn(self.N, 100) + + def time_assign_list_of_columns_concat(self): + df = DataFrame(np.random.randn(self.N, 100)) + concat([self.df, df], axis=1) + + +class ChainIndexing: + + params = [None, "warn"] + param_names = ["mode"] + + def setup(self, mode): + self.N = 1000000 + self.df = DataFrame({"A": np.arange(self.N), "B": "foo"}) + + def time_chained_indexing(self, mode): + df = self.df + N = self.N + with warnings.catch_warnings(record=True): + with option_context("mode.chained_assignment", mode): + df2 = df[df.A > N // 2] + df2["C"] = 1.0 + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py new file mode 100644 index 00000000..0c6cb89f --- /dev/null +++ b/asv_bench/benchmarks/indexing_engines.py @@ -0,0 +1,102 @@ +""" +Benchmarks in this file depend exclusively on code in _libs/ + +If a PR does not edit anything in _libs, it is very unlikely that benchmarks +in this file will be affected. +""" + +import numpy as np + +from pandas._libs import index as libindex + + +def _get_numeric_engines(): + engine_names = [ + ("Int64Engine", np.int64), + ("Int32Engine", np.int32), + ("Int16Engine", np.int16), + ("Int8Engine", np.int8), + ("UInt64Engine", np.uint64), + ("UInt32Engine", np.uint32), + ("UInt16engine", np.uint16), + ("UInt8Engine", np.uint8), + ("Float64Engine", np.float64), + ("Float32Engine", np.float32), + ] + return [ + (getattr(libindex, engine_name), dtype) + for engine_name, dtype in engine_names + if hasattr(libindex, engine_name) + ] + + +class NumericEngineIndexing: + + params = [ + _get_numeric_engines(), + ["monotonic_incr", "monotonic_decr", "non_monotonic"], + [True, False], + [10**5, 2 * 10**6], # 2e6 is above SIZE_CUTOFF + ] + param_names = ["engine_and_dtype", "index_type", "unique", "N"] + + def setup(self, engine_and_dtype, index_type, unique, N): + engine, dtype = engine_and_dtype + + if index_type == "monotonic_incr": + if unique: + arr = np.arange(N * 3, dtype=dtype) + else: + values = list([1] * N + [2] * N + [3] * N) + arr = np.array(values, dtype=dtype) + elif index_type == "monotonic_decr": + if unique: + arr = np.arange(N * 3, dtype=dtype)[::-1] + else: + values = list([1] * N + [2] * N + [3] * N) + arr = np.array(values, dtype=dtype)[::-1] + else: + assert index_type == "non_monotonic" + if unique: + arr = np.empty(N * 3, dtype=dtype) + arr[:N] = np.arange(N * 2, N * 3, dtype=dtype) + arr[N:] = np.arange(N * 2, dtype=dtype) + else: + arr = np.array([1, 2, 3] * N, dtype=dtype) + + self.data = engine(arr) + # code belows avoids populating the mapping etc. while timing. + self.data.get_loc(2) + + self.key_middle = arr[len(arr) // 2] + self.key_early = arr[2] + + def time_get_loc(self, engine_and_dtype, index_type, unique, N): + self.data.get_loc(self.key_early) + + def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N): + # searchsorted performance may be different near the middle of a range + # vs near an endpoint + self.data.get_loc(self.key_middle) + + +class ObjectEngineIndexing: + + params = [("monotonic_incr", "monotonic_decr", "non_monotonic")] + param_names = ["index_type"] + + def setup(self, index_type): + N = 10**5 + values = list("a" * N + "b" * N + "c" * N) + arr = { + "monotonic_incr": np.array(values, dtype=object), + "monotonic_decr": np.array(list(reversed(values)), dtype=object), + "non_monotonic": np.array(list("abc") * N, dtype=object), + }[index_type] + + self.data = libindex.ObjectEngine(arr) + # code belows avoids populating the mapping etc. while timing. + self.data.get_loc("b") + + def time_get_loc(self, index_type): + self.data.get_loc("b") diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py new file mode 100644 index 00000000..0bbb599f --- /dev/null +++ b/asv_bench/benchmarks/inference.py @@ -0,0 +1,323 @@ +""" +The functions benchmarked in this file depend _almost_ exclusively on +_libs, but not in a way that is easy to formalize. + +If a PR does not change anything in pandas/_libs/ or pandas/core/tools/, then +it is likely that these benchmarks will be unaffected. +""" + +import numpy as np + +from pandas import ( + NaT, + Series, + date_range, + to_datetime, + to_numeric, + to_timedelta, +) + +from .pandas_vb_common import ( + lib, + tm, +) + + +class ToNumeric: + + params = ["ignore", "coerce"] + param_names = ["errors"] + + def setup(self, errors): + N = 10000 + self.float = Series(np.random.randn(N)) + self.numstr = self.float.astype("str") + self.str = Series(tm.makeStringIndex(N)) + + def time_from_float(self, errors): + to_numeric(self.float, errors=errors) + + def time_from_numeric_str(self, errors): + to_numeric(self.numstr, errors=errors) + + def time_from_str(self, errors): + to_numeric(self.str, errors=errors) + + +class ToNumericDowncast: + + param_names = ["dtype", "downcast"] + params = [ + [ + "string-float", + "string-int", + "string-nint", + "datetime64", + "int-list", + "int32", + ], + [None, "integer", "signed", "unsigned", "float"], + ] + + N = 500000 + N2 = N // 2 + + data_dict = { + "string-int": ["1"] * N2 + [2] * N2, + "string-nint": ["-1"] * N2 + [2] * N2, + "datetime64": np.repeat( + np.array(["1970-01-01", "1970-01-02"], dtype="datetime64[D]"), N + ), + "string-float": ["1.1"] * N2 + [2] * N2, + "int-list": [1] * N2 + [2] * N2, + "int32": np.repeat(np.int32(1), N), + } + + def setup(self, dtype, downcast): + self.data = self.data_dict[dtype] + + def time_downcast(self, dtype, downcast): + to_numeric(self.data, downcast=downcast) + + +class MaybeConvertNumeric: + # maybe_convert_numeric depends _exclusively_ on _libs, could + # go in benchmarks/libs.py + + def setup_cache(self): + N = 10**6 + arr = np.repeat([2**63], N) + np.arange(N).astype("uint64") + data = arr.astype(object) + data[1::2] = arr[1::2].astype(str) + data[-1] = -1 + return data + + def time_convert(self, data): + lib.maybe_convert_numeric(data, set(), coerce_numeric=False) + + +class MaybeConvertObjects: + # maybe_convert_objects depends _almost_ exclusively on _libs, but + # does have some run-time imports from outside of _libs + + def setup(self): + N = 10**5 + + data = list(range(N)) + data[0] = NaT + data = np.array(data) + self.data = data + + def time_maybe_convert_objects(self): + lib.maybe_convert_objects(self.data) + + +class ToDatetimeFromIntsFloats: + def setup(self): + self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64") + self.ts_sec_uint = Series(range(1521080307, 1521685107), dtype="uint64") + self.ts_sec_float = self.ts_sec.astype("float64") + + self.ts_nanosec = 1_000_000 * self.ts_sec + self.ts_nanosec_uint = 1_000_000 * self.ts_sec_uint + self.ts_nanosec_float = self.ts_nanosec.astype("float64") + + # speed of int64, uint64 and float64 paths should be comparable + + def time_nanosec_int64(self): + to_datetime(self.ts_nanosec, unit="ns") + + def time_nanosec_uint64(self): + to_datetime(self.ts_nanosec_uint, unit="ns") + + def time_nanosec_float64(self): + to_datetime(self.ts_nanosec_float, unit="ns") + + def time_sec_uint64(self): + to_datetime(self.ts_sec_uint, unit="s") + + def time_sec_int64(self): + to_datetime(self.ts_sec, unit="s") + + def time_sec_float64(self): + to_datetime(self.ts_sec_float, unit="s") + + +class ToDatetimeYYYYMMDD: + def setup(self): + rng = date_range(start="1/1/2000", periods=10000, freq="D") + self.stringsD = Series(rng.strftime("%Y%m%d")) + + def time_format_YYYYMMDD(self): + to_datetime(self.stringsD, format="%Y%m%d") + + +class ToDatetimeCacheSmallCount: + + params = ([True, False], [50, 500, 5000, 100000]) + param_names = ["cache", "count"] + + def setup(self, cache, count): + rng = date_range(start="1/1/1971", periods=count) + self.unique_date_strings = rng.strftime("%Y-%m-%d").tolist() + + def time_unique_date_strings(self, cache, count): + to_datetime(self.unique_date_strings, cache=cache) + + +class ToDatetimeISO8601: + def setup(self): + rng = date_range(start="1/1/2000", periods=20000, freq="H") + self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() + self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist() + self.strings_tz_space = [ + x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng + ] + self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng] + + def time_iso8601(self): + to_datetime(self.strings) + + def time_iso8601_nosep(self): + to_datetime(self.strings_nosep) + + def time_iso8601_format(self): + to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S") + + def time_iso8601_format_no_sep(self): + to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S") + + def time_iso8601_tz_spaceformat(self): + to_datetime(self.strings_tz_space) + + def time_iso8601_infer_zero_tz_fromat(self): + # GH 41047 + to_datetime(self.strings_zero_tz, infer_datetime_format=True) + + +class ToDatetimeNONISO8601: + def setup(self): + N = 10000 + half = N // 2 + ts_string_1 = "March 1, 2018 12:00:00+0400" + ts_string_2 = "March 1, 2018 12:00:00+0500" + self.same_offset = [ts_string_1] * N + self.diff_offset = [ts_string_1] * half + [ts_string_2] * half + + def time_same_offset(self): + to_datetime(self.same_offset) + + def time_different_offset(self): + to_datetime(self.diff_offset) + + +class ToDatetimeFormatQuarters: + def setup(self): + self.s = Series(["2Q2005", "2Q05", "2005Q1", "05Q1"] * 10000) + + def time_infer_quarter(self): + to_datetime(self.s) + + +class ToDatetimeFormat: + def setup(self): + N = 100000 + self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N) + self.s2 = self.s.str.replace(":\\S+$", "") + + self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N + self.diff_offset = [ + f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10) + ] * (N // 10) + + def time_exact(self): + to_datetime(self.s2, format="%d%b%y") + + def time_no_exact(self): + to_datetime(self.s, format="%d%b%y", exact=False) + + def time_same_offset(self): + to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z") + + def time_different_offset(self): + to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z") + + def time_same_offset_to_utc(self): + to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) + + def time_different_offset_to_utc(self): + to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) + + +class ToDatetimeCache: + + params = [True, False] + param_names = ["cache"] + + def setup(self, cache): + N = 10000 + self.unique_numeric_seconds = list(range(N)) + self.dup_numeric_seconds = [1000] * N + self.dup_string_dates = ["2000-02-11"] * N + self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N + + def time_unique_seconds_and_unit(self, cache): + to_datetime(self.unique_numeric_seconds, unit="s", cache=cache) + + def time_dup_seconds_and_unit(self, cache): + to_datetime(self.dup_numeric_seconds, unit="s", cache=cache) + + def time_dup_string_dates(self, cache): + to_datetime(self.dup_string_dates, cache=cache) + + def time_dup_string_dates_and_format(self, cache): + to_datetime(self.dup_string_dates, format="%Y-%m-%d", cache=cache) + + def time_dup_string_tzoffset_dates(self, cache): + to_datetime(self.dup_string_with_tz, cache=cache) + + +# GH 43901 +class ToDatetimeInferDatetimeFormat: + def setup(self): + rng = date_range(start="1/1/2000", periods=100000, freq="H") + self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() + + def time_infer_datetime_format(self): + to_datetime(self.strings, infer_datetime_format=True) + + +class ToTimedelta: + def setup(self): + self.ints = np.random.randint(0, 60, size=10000) + self.str_days = [] + self.str_seconds = [] + for i in self.ints: + self.str_days.append(f"{i} days") + self.str_seconds.append(f"00:00:{i:02d}") + + def time_convert_int(self): + to_timedelta(self.ints, unit="s") + + def time_convert_string_days(self): + to_timedelta(self.str_days) + + def time_convert_string_seconds(self): + to_timedelta(self.str_seconds) + + +class ToTimedeltaErrors: + + params = ["coerce", "ignore"] + param_names = ["errors"] + + def setup(self, errors): + ints = np.random.randint(0, 60, size=10000) + self.arr = [f"{i} days" for i in ints] + self.arr[-1] = "apple" + + def time_convert(self, errors): + to_timedelta(self.arr, errors=errors) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/__init__.py b/asv_bench/benchmarks/io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py new file mode 100644 index 00000000..10aef954 --- /dev/null +++ b/asv_bench/benchmarks/io/csv.py @@ -0,0 +1,573 @@ +from io import ( + BytesIO, + StringIO, +) +import random +import string + +import numpy as np + +from pandas import ( + Categorical, + DataFrame, + concat, + date_range, + read_csv, + to_datetime, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) + + +class ToCSV(BaseIO): + + fname = "__test__.csv" + params = ["wide", "long", "mixed"] + param_names = ["kind"] + + def setup(self, kind): + wide_frame = DataFrame(np.random.randn(3000, 30)) + long_frame = DataFrame( + { + "A": np.arange(50000), + "B": np.arange(50000) + 1.0, + "C": np.arange(50000) + 2.0, + "D": np.arange(50000) + 3.0, + } + ) + mixed_frame = DataFrame( + { + "float": np.random.randn(5000), + "int": np.random.randn(5000).astype(int), + "bool": (np.arange(5000) % 2) == 0, + "datetime": date_range("2001", freq="s", periods=5000), + "object": ["foo"] * 5000, + } + ) + mixed_frame.loc[30:500, "float"] = np.nan + data = {"wide": wide_frame, "long": long_frame, "mixed": mixed_frame} + self.df = data[kind] + + def time_frame(self, kind): + self.df.to_csv(self.fname) + + +class ToCSVMultiIndexUnusedLevels(BaseIO): + + fname = "__test__.csv" + + def setup(self): + df = DataFrame({"a": np.random.randn(100_000), "b": 1, "c": 1}) + self.df = df.set_index(["a", "b"]) + self.df_unused_levels = self.df.iloc[:10_000] + self.df_single_index = df.set_index(["a"]).iloc[:10_000] + + def time_full_frame(self): + self.df.to_csv(self.fname) + + def time_sliced_frame(self): + self.df_unused_levels.to_csv(self.fname) + + def time_single_index_frame(self): + self.df_single_index.to_csv(self.fname) + + +class ToCSVDatetime(BaseIO): + + fname = "__test__.csv" + + def setup(self): + rng = date_range("1/1/2000", periods=1000) + self.data = DataFrame(rng, index=rng) + + def time_frame_date_formatting(self): + self.data.to_csv(self.fname, date_format="%Y%m%d") + + +class ToCSVDatetimeIndex(BaseIO): + + fname = "__test__.csv" + + def setup(self): + rng = date_range("2000", periods=100_000, freq="S") + self.data = DataFrame({"a": 1}, index=rng) + + def time_frame_date_formatting_index(self): + self.data.to_csv(self.fname, date_format="%Y-%m-%d %H:%M:%S") + + def time_frame_date_no_format_index(self): + self.data.to_csv(self.fname) + + +class ToCSVDatetimeBig(BaseIO): + + fname = "__test__.csv" + timeout = 1500 + params = [1000, 10000, 100000] + param_names = ["obs"] + + def setup(self, obs): + d = "2018-11-29" + dt = "2018-11-26 11:18:27.0" + self.data = DataFrame( + { + "dt": [np.datetime64(dt)] * obs, + "d": [np.datetime64(d)] * obs, + "r": [np.random.uniform()] * obs, + } + ) + + def time_frame(self, obs): + self.data.to_csv(self.fname) + + +class ToCSVIndexes(BaseIO): + + fname = "__test__.csv" + + @staticmethod + def _create_df(rows, cols): + index_cols = { + "index1": np.random.randint(0, rows, rows), + "index2": np.full(rows, 1, dtype=int), + "index3": np.full(rows, 1, dtype=int), + } + data_cols = { + f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols) + } + df = DataFrame({**index_cols, **data_cols}) + return df + + def setup(self): + ROWS = 100000 + COLS = 5 + # For tests using .head(), create an initial dataframe with this many times + # more rows + HEAD_ROW_MULTIPLIER = 10 + + self.df_standard_index = self._create_df(ROWS, COLS) + + self.df_custom_index_then_head = ( + self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS) + .set_index(["index1", "index2", "index3"]) + .head(ROWS) + ) + + self.df_head_then_custom_index = ( + self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS) + .head(ROWS) + .set_index(["index1", "index2", "index3"]) + ) + + def time_standard_index(self): + self.df_standard_index.to_csv(self.fname) + + def time_multiindex(self): + self.df_head_then_custom_index.to_csv(self.fname) + + def time_head_of_multiindex(self): + self.df_custom_index_then_head.to_csv(self.fname) + + +class StringIORewind: + def data(self, stringio_object): + stringio_object.seek(0) + return stringio_object + + +class ReadCSVDInferDatetimeFormat(StringIORewind): + + params = ([True, False], ["custom", "iso8601", "ymd"]) + param_names = ["infer_datetime_format", "format"] + + def setup(self, infer_datetime_format, format): + rng = date_range("1/1/2000", periods=1000) + formats = { + "custom": "%m/%d/%Y %H:%M:%S.%f", + "iso8601": "%Y-%m-%d %H:%M:%S", + "ymd": "%Y%m%d", + } + dt_format = formats[format] + self.StringIO_input = StringIO("\n".join(rng.strftime(dt_format).tolist())) + + def time_read_csv(self, infer_datetime_format, format): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo"], + parse_dates=["foo"], + infer_datetime_format=infer_datetime_format, + ) + + +class ReadCSVConcatDatetime(StringIORewind): + + iso8601 = "%Y-%m-%d %H:%M:%S" + + def setup(self): + rng = date_range("1/1/2000", periods=50000, freq="S") + self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist())) + + def time_read_csv(self): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo"], + parse_dates=["foo"], + infer_datetime_format=False, + ) + + +class ReadCSVConcatDatetimeBadDateValue(StringIORewind): + + params = (["nan", "0", ""],) + param_names = ["bad_date_value"] + + def setup(self, bad_date_value): + self.StringIO_input = StringIO((f"{bad_date_value},\n") * 50000) + + def time_read_csv(self, bad_date_value): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + ) + + +class ReadCSVSkipRows(BaseIO): + + fname = "__test__.csv" + params = ([None, 10000], ["c", "python", "pyarrow"]) + param_names = ["skiprows", "engine"] + + def setup(self, skiprows, engine): + N = 20000 + index = tm.makeStringIndex(N) + df = DataFrame( + { + "float1": np.random.randn(N), + "float2": np.random.randn(N), + "string1": ["foo"] * N, + "bool1": [True] * N, + "int1": np.random.randint(0, N, size=N), + }, + index=index, + ) + df.to_csv(self.fname) + + def time_skipprows(self, skiprows, engine): + read_csv(self.fname, skiprows=skiprows, engine=engine) + + +class ReadUint64Integers(StringIORewind): + def setup(self): + self.na_values = [2**63 + 500] + arr = np.arange(10000).astype("uint64") + 2**63 + self.data1 = StringIO("\n".join(arr.astype(str).tolist())) + arr = arr.astype(object) + arr[500] = -1 + self.data2 = StringIO("\n".join(arr.astype(str).tolist())) + + def time_read_uint64(self): + read_csv(self.data(self.data1), header=None, names=["foo"]) + + def time_read_uint64_neg_values(self): + read_csv(self.data(self.data2), header=None, names=["foo"]) + + def time_read_uint64_na_values(self): + read_csv( + self.data(self.data1), header=None, names=["foo"], na_values=self.na_values + ) + + +class ReadCSVThousands(BaseIO): + + fname = "__test__.csv" + params = ([",", "|"], [None, ","], ["c", "python"]) + param_names = ["sep", "thousands", "engine"] + + def setup(self, sep, thousands, engine): + N = 10000 + K = 8 + data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)) + df = DataFrame(data) + if thousands is not None: + fmt = f":{thousands}" + fmt = "{" + fmt + "}" + df = df.applymap(lambda x: fmt.format(x)) + df.to_csv(self.fname, sep=sep) + + def time_thousands(self, sep, thousands, engine): + read_csv(self.fname, sep=sep, thousands=thousands, engine=engine) + + +class ReadCSVComment(StringIORewind): + params = ["c", "python"] + param_names = ["engine"] + + def setup(self, engine): + data = ["A,B,C"] + (["1,2,3 # comment"] * 100000) + self.StringIO_input = StringIO("\n".join(data)) + + def time_comment(self, engine): + read_csv( + self.data(self.StringIO_input), comment="#", header=None, names=list("abc") + ) + + +class ReadCSVFloatPrecision(StringIORewind): + + params = ([",", ";"], [".", "_"], [None, "high", "round_trip"]) + param_names = ["sep", "decimal", "float_precision"] + + def setup(self, sep, decimal, float_precision): + floats = [ + "".join([random.choice(string.digits) for _ in range(28)]) + for _ in range(15) + ] + rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n" + data = rows * 5 + data = data.format(*floats) * 200 # 1000 x 3 strings csv + self.StringIO_input = StringIO(data) + + def time_read_csv(self, sep, decimal, float_precision): + read_csv( + self.data(self.StringIO_input), + sep=sep, + header=None, + names=list("abc"), + float_precision=float_precision, + ) + + def time_read_csv_python_engine(self, sep, decimal, float_precision): + read_csv( + self.data(self.StringIO_input), + sep=sep, + header=None, + engine="python", + float_precision=None, + names=list("abc"), + ) + + +class ReadCSVEngine(StringIORewind): + params = ["c", "python", "pyarrow"] + param_names = ["engine"] + + def setup(self, engine): + data = ["A,B,C,D,E"] + (["1,2,3,4,5"] * 100000) + self.StringIO_input = StringIO("\n".join(data)) + # simulate reading from file + self.BytesIO_input = BytesIO(self.StringIO_input.read().encode("utf-8")) + + def time_read_stringcsv(self, engine): + read_csv(self.data(self.StringIO_input), engine=engine) + + def time_read_bytescsv(self, engine): + read_csv(self.data(self.BytesIO_input), engine=engine) + + +class ReadCSVCategorical(BaseIO): + + fname = "__test__.csv" + params = ["c", "python"] + param_names = ["engine"] + + def setup(self, engine): + N = 100000 + group1 = ["aaaaaaaa", "bbbbbbb", "cccccccc", "dddddddd", "eeeeeeee"] + df = DataFrame(np.random.choice(group1, (N, 3)), columns=list("abc")) + df.to_csv(self.fname, index=False) + + def time_convert_post(self, engine): + read_csv(self.fname, engine=engine).apply(Categorical) + + def time_convert_direct(self, engine): + read_csv(self.fname, engine=engine, dtype="category") + + +class ReadCSVParseDates(StringIORewind): + params = ["c", "python"] + param_names = ["engine"] + + def setup(self, engine): + data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n + {},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n + {},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n + {},21:00:00,21:18:00,-0.9900,2.0100,3.6000,0.0000,270.0000\n + {},22:00:00,21:56:00,-0.5900,1.7100,5.1000,0.0000,290.0000\n + """ + two_cols = ["KORD,19990127"] * 5 + data = data.format(*two_cols) + self.StringIO_input = StringIO(data) + + def time_multiple_date(self, engine): + read_csv( + self.data(self.StringIO_input), + engine=engine, + sep=",", + header=None, + names=list(string.digits[:9]), + parse_dates=[[1, 2], [1, 3]], + ) + + def time_baseline(self, engine): + read_csv( + self.data(self.StringIO_input), + engine=engine, + sep=",", + header=None, + parse_dates=[1], + names=list(string.digits[:9]), + ) + + +class ReadCSVCachedParseDates(StringIORewind): + params = ([True, False], ["c", "python"]) + param_names = ["do_cache", "engine"] + + def setup(self, do_cache, engine): + data = ("\n".join([f"10/{year}" for year in range(2000, 2100)]) + "\n") * 10 + self.StringIO_input = StringIO(data) + + def time_read_csv_cached(self, do_cache, engine): + try: + read_csv( + self.data(self.StringIO_input), + engine=engine, + header=None, + parse_dates=[0], + cache_dates=do_cache, + ) + except TypeError: + # cache_dates is a new keyword in 0.25 + pass + + +class ReadCSVMemoryGrowth(BaseIO): + + chunksize = 20 + num_rows = 1000 + fname = "__test__.csv" + params = ["c", "python"] + param_names = ["engine"] + + def setup(self, engine): + with open(self.fname, "w") as f: + for i in range(self.num_rows): + f.write(f"{i}\n") + + def mem_parser_chunks(self, engine): + # see gh-24805. + result = read_csv(self.fname, chunksize=self.chunksize, engine=engine) + + for _ in result: + pass + + +class ReadCSVParseSpecialDate(StringIORewind): + params = (["mY", "mdY", "hm"], ["c", "python"]) + param_names = ["value", "engine"] + objects = { + "mY": "01-2019\n10-2019\n02/2000\n", + "mdY": "12/02/2010\n", + "hm": "21:34\n", + } + + def setup(self, value, engine): + count_elem = 10000 + data = self.objects[value] * count_elem + self.StringIO_input = StringIO(data) + + def time_read_special_date(self, value, engine): + read_csv( + self.data(self.StringIO_input), + engine=engine, + sep=",", + header=None, + names=["Date"], + parse_dates=["Date"], + ) + + +class ReadCSVMemMapUTF8: + + fname = "__test__.csv" + number = 5 + + def setup(self): + lines = [] + line_length = 128 + start_char = " " + end_char = "\U00010080" + # This for loop creates a list of 128-char strings + # consisting of consecutive Unicode chars + for lnum in range(ord(start_char), ord(end_char), line_length): + line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n" + try: + line.encode("utf-8") + except UnicodeEncodeError: + # Some 16-bit words are not valid Unicode chars and must be skipped + continue + lines.append(line) + df = DataFrame(lines) + df = concat([df for n in range(100)], ignore_index=True) + df.to_csv(self.fname, index=False, header=False, encoding="utf-8") + + def time_read_memmapped_utf8(self): + read_csv(self.fname, header=None, memory_map=True, encoding="utf-8", engine="c") + + +class ParseDateComparison(StringIORewind): + params = ([False, True],) + param_names = ["cache_dates"] + + def setup(self, cache_dates): + count_elem = 10000 + data = "12-02-2010\n" * count_elem + self.StringIO_input = StringIO(data) + + def time_read_csv_dayfirst(self, cache_dates): + try: + read_csv( + self.data(self.StringIO_input), + sep=",", + header=None, + names=["Date"], + parse_dates=["Date"], + cache_dates=cache_dates, + dayfirst=True, + ) + except TypeError: + # cache_dates is a new keyword in 0.25 + pass + + def time_to_datetime_dayfirst(self, cache_dates): + df = read_csv( + self.data(self.StringIO_input), dtype={"date": str}, names=["date"] + ) + to_datetime(df["date"], cache=cache_dates, dayfirst=True) + + def time_to_datetime_format_DD_MM_YYYY(self, cache_dates): + df = read_csv( + self.data(self.StringIO_input), dtype={"date": str}, names=["date"] + ) + to_datetime(df["date"], cache=cache_dates, format="%d-%m-%Y") + + +class ReadCSVIndexCol(StringIORewind): + def setup(self): + count_elem = 100_000 + data = "a,b\n" + "1,2\n" * count_elem + self.StringIO_input = StringIO(data) + + def time_read_csv_index_col(self): + read_csv(self.StringIO_input, index_col="a") + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py new file mode 100644 index 00000000..a88c4374 --- /dev/null +++ b/asv_bench/benchmarks/io/excel.py @@ -0,0 +1,119 @@ +from io import BytesIO + +import numpy as np +from odf.opendocument import OpenDocumentSpreadsheet +from odf.table import ( + Table, + TableCell, + TableRow, +) +from odf.text import P + +from pandas import ( + DataFrame, + ExcelWriter, + date_range, + read_excel, +) + +from ..pandas_vb_common import tm + + +def _generate_dataframe(): + N = 2000 + C = 5 + df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + df["object"] = tm.makeStringIndex(N) + return df + + +class WriteExcel: + + params = ["openpyxl", "xlsxwriter", "xlwt"] + param_names = ["engine"] + + def setup(self, engine): + self.df = _generate_dataframe() + + def time_write_excel(self, engine): + bio = BytesIO() + bio.seek(0) + writer = ExcelWriter(bio, engine=engine) + self.df.to_excel(writer, sheet_name="Sheet1") + writer.save() + + +class WriteExcelStyled: + params = ["openpyxl", "xlsxwriter"] + param_names = ["engine"] + + def setup(self, engine): + self.df = _generate_dataframe() + + def time_write_excel_style(self, engine): + bio = BytesIO() + bio.seek(0) + writer = ExcelWriter(bio, engine=engine) + df_style = self.df.style + df_style.applymap(lambda x: "border: red 1px solid;") + df_style.applymap(lambda x: "color: blue") + df_style.applymap(lambda x: "border-color: green black", subset=["float1"]) + df_style.to_excel(writer, sheet_name="Sheet1") + writer.save() + + +class ReadExcel: + + params = ["xlrd", "openpyxl", "odf"] + param_names = ["engine"] + fname_excel = "spreadsheet.xlsx" + fname_excel_xls = "spreadsheet.xls" + fname_odf = "spreadsheet.ods" + + def _create_odf(self): + doc = OpenDocumentSpreadsheet() + table = Table(name="Table1") + for row in self.df.values: + tr = TableRow() + for val in row: + tc = TableCell(valuetype="string") + tc.addElement(P(text=val)) + tr.addElement(tc) + table.addElement(tr) + + doc.spreadsheet.addElement(table) + doc.save(self.fname_odf) + + def setup_cache(self): + self.df = _generate_dataframe() + + self.df.to_excel(self.fname_excel, sheet_name="Sheet1") + self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1") + self._create_odf() + + def time_read_excel(self, engine): + if engine == "xlrd": + fname = self.fname_excel_xls + elif engine == "odf": + fname = self.fname_odf + else: + fname = self.fname_excel + read_excel(fname, engine=engine) + + +class ReadExcelNRows(ReadExcel): + def time_read_excel(self, engine): + if engine == "xlrd": + fname = self.fname_excel_xls + elif engine == "odf": + fname = self.fname_odf + else: + fname = self.fname_excel + read_excel(fname, engine=engine, nrows=10) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py new file mode 100644 index 00000000..4a2c1c87 --- /dev/null +++ b/asv_bench/benchmarks/io/hdf.py @@ -0,0 +1,138 @@ +import numpy as np + +from pandas import ( + DataFrame, + HDFStore, + date_range, + read_hdf, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) + + +class HDFStoreDataFrame(BaseIO): + def setup(self): + N = 25000 + index = tm.makeStringIndex(N) + self.df = DataFrame( + {"float1": np.random.randn(N), "float2": np.random.randn(N)}, index=index + ) + self.df_mixed = DataFrame( + { + "float1": np.random.randn(N), + "float2": np.random.randn(N), + "string1": ["foo"] * N, + "bool1": [True] * N, + "int1": np.random.randint(0, N, size=N), + }, + index=index, + ) + self.df_wide = DataFrame(np.random.randn(N, 100)) + self.start_wide = self.df_wide.index[10000] + self.stop_wide = self.df_wide.index[15000] + self.df2 = DataFrame( + {"float1": np.random.randn(N), "float2": np.random.randn(N)}, + index=date_range("1/1/2000", periods=N), + ) + self.start = self.df2.index[10000] + self.stop = self.df2.index[15000] + self.df_wide2 = DataFrame( + np.random.randn(N, 100), index=date_range("1/1/2000", periods=N) + ) + self.df_dc = DataFrame( + np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)] + ) + + self.fname = "__test__.h5" + + self.store = HDFStore(self.fname) + self.store.put("fixed", self.df) + self.store.put("fixed_mixed", self.df_mixed) + self.store.append("table", self.df2) + self.store.append("table_mixed", self.df_mixed) + self.store.append("table_wide", self.df_wide) + self.store.append("table_wide2", self.df_wide2) + + def teardown(self): + self.store.close() + self.remove(self.fname) + + def time_read_store(self): + self.store.get("fixed") + + def time_read_store_mixed(self): + self.store.get("fixed_mixed") + + def time_write_store(self): + self.store.put("fixed_write", self.df) + + def time_write_store_mixed(self): + self.store.put("fixed_mixed_write", self.df_mixed) + + def time_read_store_table_mixed(self): + self.store.select("table_mixed") + + def time_write_store_table_mixed(self): + self.store.append("table_mixed_write", self.df_mixed) + + def time_read_store_table(self): + self.store.select("table") + + def time_write_store_table(self): + self.store.append("table_write", self.df) + + def time_read_store_table_wide(self): + self.store.select("table_wide") + + def time_write_store_table_wide(self): + self.store.append("table_wide_write", self.df_wide) + + def time_write_store_table_dc(self): + self.store.append("table_dc_write", self.df_dc, data_columns=True) + + def time_query_store_table_wide(self): + self.store.select( + "table_wide", where="index > self.start_wide and index < self.stop_wide" + ) + + def time_query_store_table(self): + self.store.select("table", where="index > self.start and index < self.stop") + + def time_store_repr(self): + repr(self.store) + + def time_store_str(self): + str(self.store) + + def time_store_info(self): + self.store.info() + + +class HDF(BaseIO): + + params = ["table", "fixed"] + param_names = ["format"] + + def setup(self, format): + self.fname = "__test__.h5" + N = 100000 + C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(N) + self.df.to_hdf(self.fname, "df", format=format) + + def time_read_hdf(self, format): + read_hdf(self.fname, "df") + + def time_write_hdf(self, format): + self.df.to_hdf(self.fname, "df", format=format) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py new file mode 100644 index 00000000..bb09fe0f --- /dev/null +++ b/asv_bench/benchmarks/io/json.py @@ -0,0 +1,312 @@ +import sys + +import numpy as np + +from pandas import ( + DataFrame, + concat, + date_range, + json_normalize, + read_json, + timedelta_range, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) + + +class ReadJSON(BaseIO): + + fname = "__test__.json" + params = (["split", "index", "records"], ["int", "datetime"]) + param_names = ["orient", "index"] + + def setup(self, orient, index): + N = 100000 + indexes = { + "int": np.arange(N), + "datetime": date_range("20000101", periods=N, freq="H"), + } + df = DataFrame( + np.random.randn(N, 5), + columns=[f"float_{i}" for i in range(5)], + index=indexes[index], + ) + df.to_json(self.fname, orient=orient) + + def time_read_json(self, orient, index): + read_json(self.fname, orient=orient) + + +class ReadJSONLines(BaseIO): + + fname = "__test_lines__.json" + params = ["int", "datetime"] + param_names = ["index"] + + def setup(self, index): + N = 100000 + indexes = { + "int": np.arange(N), + "datetime": date_range("20000101", periods=N, freq="H"), + } + df = DataFrame( + np.random.randn(N, 5), + columns=[f"float_{i}" for i in range(5)], + index=indexes[index], + ) + df.to_json(self.fname, orient="records", lines=True) + + def time_read_json_lines(self, index): + read_json(self.fname, orient="records", lines=True) + + def time_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient="records", lines=True, chunksize=25000)) + + def time_read_json_lines_nrows(self, index): + read_json(self.fname, orient="records", lines=True, nrows=25000) + + def peakmem_read_json_lines(self, index): + read_json(self.fname, orient="records", lines=True) + + def peakmem_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient="records", lines=True, chunksize=25000)) + + def peakmem_read_json_lines_nrows(self, index): + read_json(self.fname, orient="records", lines=True, nrows=15000) + + +class NormalizeJSON(BaseIO): + fname = "__test__.json" + params = [ + ["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ] + param_names = ["orient", "frame"] + + def setup(self, orient, frame): + data = { + "hello": ["thisisatest", 999898, "mixed types"], + "nest1": {"nest2": {"nest3": "nest3_value", "nest3_int": 3445}}, + "nest1_list": {"nest2": ["blah", 32423, 546456.876, 92030234]}, + "hello2": "string", + } + self.data = [data for i in range(10000)] + + def time_normalize_json(self, orient, frame): + json_normalize(self.data) + + +class ToJSON(BaseIO): + + fname = "__test__.json" + params = [ + ["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ] + param_names = ["orient", "frame"] + + def setup(self, orient, frame): + N = 10**5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + longints = sys.maxsize * np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + self.df_longint_float_str = DataFrame( + { + "longint_1": longints, + "longint_2": longints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + def time_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) + + def peakmem_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) + + +class ToJSONWide(ToJSON): + def setup(self, orient, frame): + super().setup(orient, frame) + base_df = getattr(self, frame).copy() + df_wide = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + self.df_wide = df_wide + + def time_to_json_wide(self, orient, frame): + self.df_wide.to_json(self.fname, orient=orient) + + def peakmem_to_json_wide(self, orient, frame): + self.df_wide.to_json(self.fname, orient=orient) + + +class ToJSONISO(BaseIO): + fname = "__test__.json" + params = [["split", "columns", "index", "values", "records"]] + param_names = ["orient"] + + def setup(self, orient): + N = 10**5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + self.df = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + + def time_iso_format(self, orient): + self.df.to_json(orient=orient, date_format="iso") + + +class ToJSONLines(BaseIO): + + fname = "__test__.json" + + def setup(self): + N = 10**5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + longints = sys.maxsize * np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + self.df_longint_float_str = DataFrame( + { + "longint_1": longints, + "longint_2": longints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + def time_floats_with_int_idex_lines(self): + self.df.to_json(self.fname, orient="records", lines=True) + + def time_floats_with_dt_index_lines(self): + self.df_date_idx.to_json(self.fname, orient="records", lines=True) + + def time_delta_int_tstamp_lines(self): + self.df_td_int_ts.to_json(self.fname, orient="records", lines=True) + + def time_float_int_lines(self): + self.df_int_floats.to_json(self.fname, orient="records", lines=True) + + def time_float_int_str_lines(self): + self.df_int_float_str.to_json(self.fname, orient="records", lines=True) + + def time_float_longint_str_lines(self): + self.df_longint_float_str.to_json(self.fname, orient="records", lines=True) + + +class ToJSONMem: + def setup_cache(self): + df = DataFrame([[1]]) + frames = {"int": df, "float": df.astype(float)} + + return frames + + def peakmem_int(self, frames): + df = frames["int"] + for _ in range(100_000): + df.to_json() + + def peakmem_float(self, frames): + df = frames["float"] + for _ in range(100_000): + df.to_json() + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py new file mode 100644 index 00000000..5390056b --- /dev/null +++ b/asv_bench/benchmarks/io/parsers.py @@ -0,0 +1,42 @@ +import numpy as np + +try: + from pandas._libs.tslibs.parsing import ( + _does_string_look_like_datetime, + concat_date_cols, + ) +except ImportError: + # Avoid whole benchmark suite import failure on asv (currently 0.4) + pass + + +class DoesStringLookLikeDatetime: + + params = (["2Q2005", "0.0", "10000"],) + param_names = ["value"] + + def setup(self, value): + self.objects = [value] * 1000000 + + def time_check_datetimes(self, value): + for obj in self.objects: + _does_string_look_like_datetime(obj) + + +class ConcatDateCols: + + params = ([1234567890, "AAAA"], [1, 2]) + param_names = ["value", "dim"] + + def setup(self, value, dim): + count_elem = 10000 + if dim == 1: + self.object = (np.array([value] * count_elem),) + if dim == 2: + self.object = ( + np.array([value] * count_elem), + np.array([value] * count_elem), + ) + + def time_check_concat(self, value, dim): + concat_date_cols(self.object) diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py new file mode 100644 index 00000000..c71cdcdc --- /dev/null +++ b/asv_bench/benchmarks/io/pickle.py @@ -0,0 +1,41 @@ +import numpy as np + +from pandas import ( + DataFrame, + date_range, + read_pickle, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) + + +class Pickle(BaseIO): + def setup(self): + self.fname = "__test__.pkl" + N = 100000 + C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(N) + self.df.to_pickle(self.fname) + + def time_read_pickle(self): + read_pickle(self.fname) + + def time_write_pickle(self): + self.df.to_pickle(self.fname) + + def peakmem_read_pickle(self): + read_pickle(self.fname) + + def peakmem_write_pickle(self): + self.df.to_pickle(self.fname) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/sas.py b/asv_bench/benchmarks/io/sas.py new file mode 100644 index 00000000..369b7964 --- /dev/null +++ b/asv_bench/benchmarks/io/sas.py @@ -0,0 +1,30 @@ +import os + +from pandas import read_sas + + +class SAS: + + params = ["sas7bdat", "xport"] + param_names = ["format"] + + def setup(self, format): + # Read files that are located in 'pandas/tests/io/sas/data' + files = {"sas7bdat": "test1.sas7bdat", "xport": "paxraw_d_short.xpt"} + file = files[format] + paths = [ + os.path.dirname(__file__), + "..", + "..", + "..", + "pandas", + "tests", + "io", + "sas", + "data", + file, + ] + self.f = os.path.join(*paths) + + def time_read_sas(self, format): + read_sas(self.f, format=format) diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py new file mode 100644 index 00000000..fb8b7daf --- /dev/null +++ b/asv_bench/benchmarks/io/sql.py @@ -0,0 +1,177 @@ +import sqlite3 + +import numpy as np +from sqlalchemy import create_engine + +from pandas import ( + DataFrame, + date_range, + read_sql_query, + read_sql_table, +) + +from ..pandas_vb_common import tm + + +class SQL: + + params = ["sqlalchemy", "sqlite"] + param_names = ["connection"] + + def setup(self, connection): + N = 10000 + con = { + "sqlalchemy": create_engine("sqlite:///:memory:"), + "sqlite": sqlite3.connect(":memory:"), + } + self.table_name = "test_type" + self.query_all = f"SELECT * FROM {self.table_name}" + self.con = con[connection] + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["date"] = self.df["datetime"].dt.date + self.df["time"] = self.df["datetime"].dt.time + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_to_sql_dataframe(self, connection): + self.df.to_sql("test1", self.con, if_exists="replace") + + def time_read_sql_query(self, connection): + read_sql_query(self.query_all, self.con) + + +class WriteSQLDtypes: + + params = ( + ["sqlalchemy", "sqlite"], + [ + "float", + "float_with_nan", + "string", + "bool", + "int", + "date", + "time", + "datetime", + ], + ) + param_names = ["connection", "dtype"] + + def setup(self, connection, dtype): + N = 10000 + con = { + "sqlalchemy": create_engine("sqlite:///:memory:"), + "sqlite": sqlite3.connect(":memory:"), + } + self.table_name = "test_type" + self.query_col = f"SELECT {dtype} FROM {self.table_name}" + self.con = con[connection] + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["date"] = self.df["datetime"].dt.date + self.df["time"] = self.df["datetime"].dt.time + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_to_sql_dataframe_column(self, connection, dtype): + self.df[[dtype]].to_sql("test1", self.con, if_exists="replace") + + def time_read_sql_query_select_column(self, connection, dtype): + read_sql_query(self.query_col, self.con) + + +class ReadSQLTable: + def setup(self): + N = 10000 + self.table_name = "test" + self.con = create_engine("sqlite:///:memory:") + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["date"] = self.df["datetime"].dt.date + self.df["time"] = self.df["datetime"].dt.time + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_read_sql_table_all(self): + read_sql_table(self.table_name, self.con) + + def time_read_sql_table_parse_dates(self): + read_sql_table( + self.table_name, + self.con, + columns=["datetime_string"], + parse_dates=["datetime_string"], + ) + + +class ReadSQLTableDtypes: + + params = [ + "float", + "float_with_nan", + "string", + "bool", + "int", + "date", + "time", + "datetime", + ] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10000 + self.table_name = "test" + self.con = create_engine("sqlite:///:memory:") + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["date"] = self.df["datetime"].dt.date + self.df["time"] = self.df["datetime"].dt.time + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_read_sql_table_column(self, dtype): + read_sql_table(self.table_name, self.con, columns=[dtype]) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py new file mode 100644 index 00000000..4ae2745a --- /dev/null +++ b/asv_bench/benchmarks/io/stata.py @@ -0,0 +1,60 @@ +import numpy as np + +from pandas import ( + DataFrame, + date_range, + read_stata, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) + + +class Stata(BaseIO): + + params = ["tc", "td", "tm", "tw", "th", "tq", "ty"] + param_names = ["convert_dates"] + + def setup(self, convert_dates): + self.fname = "__test__.dta" + N = self.N = 100000 + C = self.C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(self.N) + self.df["int8_"] = np.random.randint( + np.iinfo(np.int8).min, np.iinfo(np.int8).max - 27, N + ) + self.df["int16_"] = np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max - 27, N + ) + self.df["int32_"] = np.random.randint( + np.iinfo(np.int32).min, np.iinfo(np.int32).max - 27, N + ) + self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32) + self.convert_dates = {"index": convert_dates} + self.df.to_stata(self.fname, self.convert_dates) + + def time_read_stata(self, convert_dates): + read_stata(self.fname) + + def time_write_stata(self, convert_dates): + self.df.to_stata(self.fname, self.convert_dates) + + +class StataMissing(Stata): + def setup(self, convert_dates): + super().setup(convert_dates) + for i in range(10): + missing_data = np.random.randn(self.N) + missing_data[missing_data < 0] = np.nan + self.df[f"missing_{i}"] = missing_data + self.df.to_stata(self.fname, self.convert_dates) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/style.py b/asv_bench/benchmarks/io/style.py new file mode 100644 index 00000000..f0902c9c --- /dev/null +++ b/asv_bench/benchmarks/io/style.py @@ -0,0 +1,93 @@ +import numpy as np + +from pandas import ( + DataFrame, + IndexSlice, +) + + +class Render: + + params = [[12, 24, 36], [12, 120]] + param_names = ["cols", "rows"] + + def setup(self, cols, rows): + self.df = DataFrame( + np.random.randn(rows, cols), + columns=[f"float_{i+1}" for i in range(cols)], + index=[f"row_{i+1}" for i in range(rows)], + ) + + def time_apply_render(self, cols, rows): + self._style_apply() + self.st._render_html(True, True) + + def peakmem_apply_render(self, cols, rows): + self._style_apply() + self.st._render_html(True, True) + + def time_classes_render(self, cols, rows): + self._style_classes() + self.st._render_html(True, True) + + def peakmem_classes_render(self, cols, rows): + self._style_classes() + self.st._render_html(True, True) + + def time_tooltips_render(self, cols, rows): + self._style_tooltips() + self.st._render_html(True, True) + + def peakmem_tooltips_render(self, cols, rows): + self._style_tooltips() + self.st._render_html(True, True) + + def time_format_render(self, cols, rows): + self._style_format() + self.st._render_html(True, True) + + def peakmem_format_render(self, cols, rows): + self._style_format() + self.st._render_html(True, True) + + def time_apply_format_hide_render(self, cols, rows): + self._style_apply_format_hide() + self.st._render_html(True, True) + + def peakmem_apply_format_hide_render(self, cols, rows): + self._style_apply_format_hide() + self.st._render_html(True, True) + + def _style_apply(self): + def _apply_func(s): + return [ + "background-color: lightcyan" if s.name == "row_1" else "" for v in s + ] + + self.st = self.df.style.apply(_apply_func, axis=1) + + def _style_classes(self): + classes = self.df.applymap(lambda v: ("cls-1" if v > 0 else "")) + classes.index, classes.columns = self.df.index, self.df.columns + self.st = self.df.style.set_td_classes(classes) + + def _style_format(self): + ic = int(len(self.df.columns) / 4 * 3) + ir = int(len(self.df.index) / 4 * 3) + # apply a formatting function + # subset is flexible but hinders vectorised solutions + self.st = self.df.style.format( + "{:,.3f}", subset=IndexSlice["row_1":f"row_{ir}", "float_1":f"float_{ic}"] + ) + + def _style_apply_format_hide(self): + self.st = self.df.style.applymap(lambda v: "color: red;") + self.st.format("{:.3f}") + self.st.hide_index(self.st.index[1:]) + self.st.hide_columns(self.st.columns[1:]) + + def _style_tooltips(self): + ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2]) + self.st = self.df.style.set_tooltips(ttips) + self.st.hide_index(self.st.index[12:]) + self.st.hide_columns(self.st.columns[12:]) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py new file mode 100644 index 00000000..e3c6bf9b --- /dev/null +++ b/asv_bench/benchmarks/join_merge.py @@ -0,0 +1,431 @@ +import string + +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, + date_range, + merge, + merge_asof, +) + +from .pandas_vb_common import tm + +try: + from pandas import merge_ordered +except ImportError: + from pandas import ordered_merge as merge_ordered + + +class Append: + def setup(self): + self.df1 = DataFrame(np.random.randn(10000, 4), columns=["A", "B", "C", "D"]) + self.df2 = self.df1.copy() + self.df2.index = np.arange(10000, 20000) + self.mdf1 = self.df1.copy() + self.mdf1["obj1"] = "bar" + self.mdf1["obj2"] = "bar" + self.mdf1["int1"] = 5 + self.mdf1 = self.mdf1._consolidate() + self.mdf2 = self.mdf1.copy() + self.mdf2.index = self.df2.index + + def time_append_homogenous(self): + self.df1.append(self.df2) + + def time_append_mixed(self): + self.mdf1.append(self.mdf2) + + +class Concat: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + N = 1000 + s = Series(N, index=tm.makeStringIndex(N)) + self.series = [s[i:-i] for i in range(1, 10)] * 50 + self.small_frames = [DataFrame(np.random.randn(5, 4))] * 1000 + df = DataFrame( + {"A": range(N)}, index=date_range("20130101", periods=N, freq="s") + ) + self.empty_left = [DataFrame(), df] + self.empty_right = [df, DataFrame()] + self.mixed_ndims = [df, df.head(N // 2)] + + def time_concat_series(self, axis): + concat(self.series, axis=axis, sort=False) + + def time_concat_small_frames(self, axis): + concat(self.small_frames, axis=axis) + + def time_concat_empty_right(self, axis): + concat(self.empty_right, axis=axis) + + def time_concat_empty_left(self, axis): + concat(self.empty_left, axis=axis) + + def time_concat_mixed_ndims(self, axis): + concat(self.mixed_ndims, axis=axis) + + +class ConcatDataFrames: + + params = ([0, 1], [True, False]) + param_names = ["axis", "ignore_index"] + + def setup(self, axis, ignore_index): + frame_c = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="C")) + self.frame_c = [frame_c] * 20 + frame_f = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="F")) + self.frame_f = [frame_f] * 20 + + def time_c_ordered(self, axis, ignore_index): + concat(self.frame_c, axis=axis, ignore_index=ignore_index) + + def time_f_ordered(self, axis, ignore_index): + concat(self.frame_f, axis=axis, ignore_index=ignore_index) + + +class Join: + + params = [True, False] + param_names = ["sort"] + + def setup(self, sort): + level1 = tm.makeStringIndex(10).values + level2 = tm.makeStringIndex(1000).values + codes1 = np.arange(10).repeat(1000) + codes2 = np.tile(np.arange(1000), 10) + index2 = MultiIndex(levels=[level1, level2], codes=[codes1, codes2]) + self.df_multi = DataFrame( + np.random.randn(len(index2), 4), index=index2, columns=["A", "B", "C", "D"] + ) + + self.key1 = np.tile(level1.take(codes1), 10) + self.key2 = np.tile(level2.take(codes2), 10) + self.df = DataFrame( + { + "data1": np.random.randn(100000), + "data2": np.random.randn(100000), + "key1": self.key1, + "key2": self.key2, + } + ) + + self.df_key1 = DataFrame( + np.random.randn(len(level1), 4), index=level1, columns=["A", "B", "C", "D"] + ) + self.df_key2 = DataFrame( + np.random.randn(len(level2), 4), index=level2, columns=["A", "B", "C", "D"] + ) + + shuf = np.arange(100000) + np.random.shuffle(shuf) + self.df_shuf = self.df.reindex(self.df.index[shuf]) + + def time_join_dataframe_index_multi(self, sort): + self.df.join(self.df_multi, on=["key1", "key2"], sort=sort) + + def time_join_dataframe_index_single_key_bigger(self, sort): + self.df.join(self.df_key2, on="key2", sort=sort) + + def time_join_dataframe_index_single_key_small(self, sort): + self.df.join(self.df_key1, on="key1", sort=sort) + + def time_join_dataframe_index_shuffle_key_bigger_sort(self, sort): + self.df_shuf.join(self.df_key2, on="key2", sort=sort) + + def time_join_dataframes_cross(self, sort): + self.df.loc[:2000].join(self.df_key1, how="cross", sort=sort) + + +class JoinIndex: + def setup(self): + N = 50000 + self.left = DataFrame( + np.random.randint(1, N / 500, (N, 2)), columns=["jim", "joe"] + ) + self.right = DataFrame( + np.random.randint(1, N / 500, (N, 2)), columns=["jolie", "jolia"] + ).set_index("jolie") + + def time_left_outer_join_index(self): + self.left.join(self.right, on="jim") + + +class JoinEmpty: + def setup(self): + N = 100_000 + self.df = DataFrame({"A": np.arange(N)}) + self.df_empty = DataFrame(columns=["B", "C"], dtype="int64") + + def time_inner_join_left_empty(self): + self.df_empty.join(self.df, how="inner") + + def time_inner_join_right_empty(self): + self.df.join(self.df_empty, how="inner") + + +class JoinNonUnique: + # outer join of non-unique + # GH 6329 + def setup(self): + date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="T") + daily_dates = date_index.to_period("D").to_timestamp("S", "S") + self.fracofday = date_index.values - daily_dates.values + self.fracofday = self.fracofday.astype("timedelta64[ns]") + self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000 + self.fracofday = Series(self.fracofday, daily_dates) + index = date_range(date_index.min(), date_index.max(), freq="D") + self.temp = Series(1.0, index)[self.fracofday.index] + + def time_join_non_unique_equal(self): + self.fracofday * self.temp + + +class Merge: + + params = [True, False] + param_names = ["sort"] + + def setup(self, sort): + N = 10000 + indices = tm.makeStringIndex(N).values + indices2 = tm.makeStringIndex(N).values + key = np.tile(indices[:8000], 10) + key2 = np.tile(indices2[:8000], 10) + self.left = DataFrame( + {"key": key, "key2": key2, "value": np.random.randn(80000)} + ) + self.right = DataFrame( + { + "key": indices[2000:], + "key2": indices2[2000:], + "value2": np.random.randn(8000), + } + ) + + self.df = DataFrame( + { + "key1": np.tile(np.arange(500).repeat(10), 2), + "key2": np.tile(np.arange(250).repeat(10), 4), + "value": np.random.randn(10000), + } + ) + self.df2 = DataFrame({"key1": np.arange(500), "value2": np.random.randn(500)}) + self.df3 = self.df[:5000] + + def time_merge_2intkey(self, sort): + merge(self.left, self.right, sort=sort) + + def time_merge_dataframe_integer_2key(self, sort): + merge(self.df, self.df3, sort=sort) + + def time_merge_dataframe_integer_key(self, sort): + merge(self.df, self.df2, on="key1", sort=sort) + + def time_merge_dataframe_empty_right(self, sort): + merge(self.left, self.right.iloc[:0], sort=sort) + + def time_merge_dataframe_empty_left(self, sort): + merge(self.left.iloc[:0], self.right, sort=sort) + + def time_merge_dataframes_cross(self, sort): + merge(self.left.loc[:2000], self.right.loc[:2000], how="cross", sort=sort) + + +class I8Merge: + + params = ["inner", "outer", "left", "right"] + param_names = ["how"] + + def setup(self, how): + low, high, n = -1000, 1000, 10**6 + self.left = DataFrame( + np.random.randint(low, high, (n, 7)), columns=list("ABCDEFG") + ) + self.left["left"] = self.left.sum(axis=1) + self.right = self.left.sample(frac=1).rename({"left": "right"}, axis=1) + self.right = self.right.reset_index(drop=True) + self.right["right"] *= -1 + + def time_i8merge(self, how): + merge(self.left, self.right, how=how) + + +class MergeCategoricals: + def setup(self): + self.left_object = DataFrame( + { + "X": np.random.choice(range(0, 10), size=(10000,)), + "Y": np.random.choice(["one", "two", "three"], size=(10000,)), + } + ) + + self.right_object = DataFrame( + { + "X": np.random.choice(range(0, 10), size=(10000,)), + "Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)), + } + ) + + self.left_cat = self.left_object.assign( + Y=self.left_object["Y"].astype("category") + ) + self.right_cat = self.right_object.assign( + Z=self.right_object["Z"].astype("category") + ) + + self.left_cat_col = self.left_object.astype({"X": "category"}) + self.right_cat_col = self.right_object.astype({"X": "category"}) + + self.left_cat_idx = self.left_cat_col.set_index("X") + self.right_cat_idx = self.right_cat_col.set_index("X") + + def time_merge_object(self): + merge(self.left_object, self.right_object, on="X") + + def time_merge_cat(self): + merge(self.left_cat, self.right_cat, on="X") + + def time_merge_on_cat_col(self): + merge(self.left_cat_col, self.right_cat_col, on="X") + + def time_merge_on_cat_idx(self): + merge(self.left_cat_idx, self.right_cat_idx, on="X") + + +class MergeOrdered: + def setup(self): + groups = tm.makeStringIndex(10).values + self.left = DataFrame( + { + "group": groups.repeat(5000), + "key": np.tile(np.arange(0, 10000, 2), 10), + "lvalue": np.random.randn(50000), + } + ) + self.right = DataFrame( + {"key": np.arange(10000), "rvalue": np.random.randn(10000)} + ) + + def time_merge_ordered(self): + merge_ordered(self.left, self.right, on="key", left_by="group") + + +class MergeAsof: + params = [["backward", "forward", "nearest"], [None, 5]] + param_names = ["direction", "tolerance"] + + def setup(self, direction, tolerance): + one_count = 200000 + two_count = 1000000 + + df1 = DataFrame( + { + "time": np.random.randint(0, one_count / 20, one_count), + "key": np.random.choice(list(string.ascii_uppercase), one_count), + "key2": np.random.randint(0, 25, one_count), + "value1": np.random.randn(one_count), + } + ) + df2 = DataFrame( + { + "time": np.random.randint(0, two_count / 20, two_count), + "key": np.random.choice(list(string.ascii_uppercase), two_count), + "key2": np.random.randint(0, 25, two_count), + "value2": np.random.randn(two_count), + } + ) + + df1 = df1.sort_values("time") + df2 = df2.sort_values("time") + + df1["time32"] = np.int32(df1.time) + df2["time32"] = np.int32(df2.time) + + df1["timeu64"] = np.uint64(df1.time) + df2["timeu64"] = np.uint64(df2.time) + + self.df1a = df1[["time", "value1"]] + self.df2a = df2[["time", "value2"]] + self.df1b = df1[["time", "key", "value1"]] + self.df2b = df2[["time", "key", "value2"]] + self.df1c = df1[["time", "key2", "value1"]] + self.df2c = df2[["time", "key2", "value2"]] + self.df1d = df1[["time32", "value1"]] + self.df2d = df2[["time32", "value2"]] + self.df1e = df1[["time", "key", "key2", "value1"]] + self.df2e = df2[["time", "key", "key2", "value2"]] + self.df1f = df1[["timeu64", "value1"]] + self.df2f = df2[["timeu64", "value2"]] + + def time_on_int(self, direction, tolerance): + merge_asof( + self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance + ) + + def time_on_int32(self, direction, tolerance): + merge_asof( + self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance + ) + + def time_on_uint64(self, direction, tolerance): + merge_asof( + self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance + ) + + def time_by_object(self, direction, tolerance): + merge_asof( + self.df1b, + self.df2b, + on="time", + by="key", + direction=direction, + tolerance=tolerance, + ) + + def time_by_int(self, direction, tolerance): + merge_asof( + self.df1c, + self.df2c, + on="time", + by="key2", + direction=direction, + tolerance=tolerance, + ) + + def time_multiby(self, direction, tolerance): + merge_asof( + self.df1e, + self.df2e, + on="time", + by=["key", "key2"], + direction=direction, + tolerance=tolerance, + ) + + +class Align: + def setup(self): + size = 5 * 10**5 + rng = np.arange(0, 10**13, 10**7) + stamps = np.datetime64("now").view("i8") + rng + idx1 = np.sort(np.random.choice(stamps, size, replace=False)) + idx2 = np.sort(np.random.choice(stamps, size, replace=False)) + self.ts1 = Series(np.random.randn(size), idx1) + self.ts2 = Series(np.random.randn(size), idx2) + + def time_series_align_int64_index(self): + self.ts1 + self.ts2 + + def time_series_align_left_monotonic(self): + self.ts1.align(self.ts2, join="left") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/libs.py b/asv_bench/benchmarks/libs.py new file mode 100644 index 00000000..f041499c --- /dev/null +++ b/asv_bench/benchmarks/libs.py @@ -0,0 +1,106 @@ +""" +Benchmarks for code in pandas/_libs, excluding pandas/_libs/tslibs, +which has its own directory. + +If a PR does not edit anything in _libs/, then it is unlikely that the +benchmarks will be affected. +""" +import numpy as np + +from pandas._libs.lib import ( + infer_dtype, + is_list_like, + is_scalar, +) + +from pandas import ( + NA, + NaT, +) + +from .pandas_vb_common import ( + lib, + tm, +) + +try: + from pandas.util import cache_readonly +except ImportError: + from pandas.util.decorators import cache_readonly + + +# TODO: share with something in pd._testing? +scalars = [ + 0, + 1.0, + 1 + 2j, + True, + "foo", + b"bar", + None, + np.datetime64(123, "ns"), + np.timedelta64(123, "ns"), + NaT, + NA, +] +zero_dims = [np.array("123")] +listlikes = [np.array([1, 2, 3]), {0: 1}, {1, 2, 3}, [1, 2, 3], (1, 2, 3)] + + +class ScalarListLike: + params = scalars + zero_dims + listlikes + + def time_is_list_like(self, param): + is_list_like(param) + + def time_is_scalar(self, param): + is_scalar(param) + + +class FastZip: + def setup(self): + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + col_array = np.vstack([key1, key2, np.random.randn(N * K)]) + col_array2 = col_array.copy() + col_array2[:, :10000] = np.nan + self.col_array_list = list(col_array) + + def time_lib_fast_zip(self): + lib.fast_zip(self.col_array_list) + + +class InferDtype: + param_names = ["dtype"] + data_dict = { + "np-object": np.array([1] * 100000, dtype="O"), + "py-object": [1] * 100000, + "np-null": np.array([1] * 50000 + [np.nan] * 50000), + "py-null": [1] * 50000 + [None] * 50000, + "np-int": np.array([1] * 100000, dtype=int), + "np-floating": np.array([1.0] * 100000, dtype=float), + "empty": [], + "bytes": [b"a"] * 100000, + } + params = list(data_dict.keys()) + + def time_infer_dtype_skipna(self, dtype): + infer_dtype(self.data_dict[dtype], skipna=True) + + def time_infer_dtype(self, dtype): + infer_dtype(self.data_dict[dtype], skipna=False) + + +class CacheReadonly: + def setup(self): + class Foo: + @cache_readonly + def prop(self): + return 5 + + self.obj = Foo() + + def time_cache_readonly(self): + self.obj.prop diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py new file mode 100644 index 00000000..a498c6b2 --- /dev/null +++ b/asv_bench/benchmarks/multiindex_object.py @@ -0,0 +1,258 @@ +import string + +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + RangeIndex, + date_range, +) + +from .pandas_vb_common import tm + + +class GetLoc: + def setup(self): + self.mi_large = MultiIndex.from_product( + [np.arange(1000), np.arange(20), list(string.ascii_letters)], + names=["one", "two", "three"], + ) + self.mi_med = MultiIndex.from_product( + [np.arange(1000), np.arange(10), list("A")], names=["one", "two", "three"] + ) + self.mi_small = MultiIndex.from_product( + [np.arange(100), list("A"), list("A")], names=["one", "two", "three"] + ) + + def time_large_get_loc(self): + self.mi_large.get_loc((999, 19, "Z")) + + def time_large_get_loc_warm(self): + for _ in range(1000): + self.mi_large.get_loc((999, 19, "Z")) + + def time_med_get_loc(self): + self.mi_med.get_loc((999, 9, "A")) + + def time_med_get_loc_warm(self): + for _ in range(1000): + self.mi_med.get_loc((999, 9, "A")) + + def time_string_get_loc(self): + self.mi_small.get_loc((99, "A", "A")) + + def time_small_get_loc_warm(self): + for _ in range(1000): + self.mi_small.get_loc((99, "A", "A")) + + +class GetLocs: + def setup(self): + self.mi_large = MultiIndex.from_product( + [np.arange(1000), np.arange(20), list(string.ascii_letters)], + names=["one", "two", "three"], + ) + self.mi_med = MultiIndex.from_product( + [np.arange(1000), np.arange(10), list("A")], names=["one", "two", "three"] + ) + self.mi_small = MultiIndex.from_product( + [np.arange(100), list("A"), list("A")], names=["one", "two", "three"] + ) + + def time_large_get_locs(self): + self.mi_large.get_locs([999, 19, "Z"]) + + def time_med_get_locs(self): + self.mi_med.get_locs([999, 9, "A"]) + + def time_small_get_locs(self): + self.mi_small.get_locs([99, "A", "A"]) + + +class Duplicates: + def setup(self): + size = 65536 + arrays = [np.random.randint(0, 8192, size), np.random.randint(0, 1024, size)] + mask = np.random.rand(size) < 0.1 + self.mi_unused_levels = MultiIndex.from_arrays(arrays) + self.mi_unused_levels = self.mi_unused_levels[mask] + + def time_remove_unused_levels(self): + self.mi_unused_levels.remove_unused_levels() + + +class Integer: + def setup(self): + self.mi_int = MultiIndex.from_product( + [np.arange(1000), np.arange(1000)], names=["one", "two"] + ) + self.obj_index = np.array( + [ + (0, 10), + (0, 11), + (0, 12), + (0, 13), + (0, 14), + (0, 15), + (0, 16), + (0, 17), + (0, 18), + (0, 19), + ], + dtype=object, + ) + self.other_mi_many_mismatches = MultiIndex.from_tuples( + [ + (-7, 41), + (-2, 3), + (-0.7, 5), + (0, 0), + (0, 1.5), + (0, 340), + (0, 1001), + (1, -4), + (1, 20), + (1, 1040), + (432, -5), + (432, 17), + (439, 165.5), + (998, -4), + (998, 24065), + (999, 865.2), + (999, 1000), + (1045, -843), + ] + ) + + def time_get_indexer(self): + self.mi_int.get_indexer(self.obj_index) + + def time_get_indexer_and_backfill(self): + self.mi_int.get_indexer(self.other_mi_many_mismatches, method="backfill") + + def time_get_indexer_and_pad(self): + self.mi_int.get_indexer(self.other_mi_many_mismatches, method="pad") + + def time_is_monotonic(self): + self.mi_int.is_monotonic_increasing + + +class Duplicated: + def setup(self): + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)] + codes = [np.random.choice(n, (k * n)) for lev in levels] + self.mi = MultiIndex(levels=levels, codes=codes) + + def time_duplicated(self): + self.mi.duplicated() + + +class Sortlevel: + def setup(self): + n = 1182720 + low, high = -4096, 4096 + arrs = [ + np.repeat(np.random.randint(low, high, (n // k)), k) + for k in [11, 7, 5, 3, 1] + ] + self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)] + + a = np.repeat(np.arange(100), 1000) + b = np.tile(np.arange(1000), 100) + self.mi = MultiIndex.from_arrays([a, b]) + self.mi = self.mi.take(np.random.permutation(np.arange(100000))) + + def time_sortlevel_int64(self): + self.mi_int.sortlevel() + + def time_sortlevel_zero(self): + self.mi.sortlevel(0) + + def time_sortlevel_one(self): + self.mi.sortlevel(1) + + +class Values: + def setup_cache(self): + + level1 = range(1000) + level2 = date_range(start="1/1/2012", periods=100) + mi = MultiIndex.from_product([level1, level2]) + return mi + + def time_datetime_level_values_copy(self, mi): + mi.copy().values + + def time_datetime_level_values_sliced(self, mi): + mi[:10].values + + +class CategoricalLevel: + def setup(self): + + self.df = DataFrame( + { + "a": np.arange(1_000_000, dtype=np.int32), + "b": np.arange(1_000_000, dtype=np.int64), + "c": np.arange(1_000_000, dtype=float), + } + ).astype({"a": "category", "b": "category"}) + + def time_categorical_level(self): + self.df.set_index(["a", "b"]) + + +class Equals: + def setup(self): + idx_large_fast = RangeIndex(100000) + idx_small_slow = date_range(start="1/1/2012", periods=1) + self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) + + self.idx_non_object = RangeIndex(1) + + def time_equals_non_object_index(self): + self.mi_large_slow.equals(self.idx_non_object) + + +class SetOperations: + + params = [ + ("monotonic", "non_monotonic"), + ("datetime", "int", "string"), + ("intersection", "union", "symmetric_difference"), + ] + param_names = ["index_structure", "dtype", "method"] + + def setup(self, index_structure, dtype, method): + N = 10**5 + level1 = range(1000) + + level2 = date_range(start="1/1/2000", periods=N // 1000) + dates_left = MultiIndex.from_product([level1, level2]) + + level2 = range(N // 1000) + int_left = MultiIndex.from_product([level1, level2]) + + level2 = tm.makeStringIndex(N // 1000).values + str_left = MultiIndex.from_product([level1, level2]) + + data = { + "datetime": dates_left, + "int": int_left, + "string": str_left, + } + + if index_structure == "non_monotonic": + data = {k: mi[::-1] for k, mi in data.items()} + + data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()} + self.left = data[dtype]["left"] + self.right = data[dtype]["right"] + + def time_operation(self, index_structure, dtype, method): + getattr(self.left, method)(self.right) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/package.py b/asv_bench/benchmarks/package.py new file mode 100644 index 00000000..34fe4929 --- /dev/null +++ b/asv_bench/benchmarks/package.py @@ -0,0 +1,19 @@ +""" +Benchmarks for pandas at the package-level. +""" +import subprocess +import sys + + +class TimeImport: + def time_import(self): + # on py37+ we the "-X importtime" usage gives us a more precise + # measurement of the import time we actually care about, + # without the subprocess or interpreter overhead + cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"] + p = subprocess.run(cmd, stderr=subprocess.PIPE) + + line = p.stderr.splitlines()[-1] + field = line.split(b"|")[-2].strip() + total = int(field) # microseconds + return total diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py new file mode 100644 index 00000000..d3168bde --- /dev/null +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -0,0 +1,80 @@ +from importlib import import_module +import os + +import numpy as np + +import pandas as pd + +# Compatibility import for lib +for imp in ["pandas._libs.lib", "pandas.lib"]: + try: + lib = import_module(imp) + break + except (ImportError, TypeError, ValueError): + pass + +# Compatibility import for the testing module +try: + import pandas._testing as tm +except ImportError: + import pandas.util.testing as tm # noqa:F401 + + +numeric_dtypes = [ + np.int64, + np.int32, + np.uint32, + np.uint64, + np.float32, + np.float64, + np.int16, + np.int8, + np.uint16, + np.uint8, +] +datetime_dtypes = [np.datetime64, np.timedelta64] +string_dtypes = [object] +try: + extension_dtypes = [ + pd.Int8Dtype, + pd.Int16Dtype, + pd.Int32Dtype, + pd.Int64Dtype, + pd.UInt8Dtype, + pd.UInt16Dtype, + pd.UInt32Dtype, + pd.UInt64Dtype, + pd.CategoricalDtype, + pd.IntervalDtype, + pd.DatetimeTZDtype("ns", "UTC"), + pd.PeriodDtype("D"), + ] +except AttributeError: + extension_dtypes = [] + + +def setup(*args, **kwargs): + # This function just needs to be imported into each benchmark file to + # set up the random seed before each function. + # https://asv.readthedocs.io/en/latest/writing_benchmarks.html + np.random.seed(1234) + + +class BaseIO: + """ + Base class for IO benchmarks + """ + + fname = None + + def remove(self, f): + """Remove created files""" + try: + os.remove(f) # noqa: PDF008 + except OSError: + # On Windows, attempting to remove a file that is in use + # causes an exception to be raised + pass + + def teardown(self, *args, **kwargs): + self.remove(self.fname) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py new file mode 100644 index 00000000..4f81aee6 --- /dev/null +++ b/asv_bench/benchmarks/period.py @@ -0,0 +1,108 @@ +""" +Period benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.period for benchmarks that rely only on tslibs. +""" +from pandas import ( + DataFrame, + Period, + PeriodIndex, + Series, + date_range, + period_range, +) + +from pandas.tseries.frequencies import to_offset + + +class PeriodIndexConstructor: + + params = [["D"], [True, False]] + param_names = ["freq", "is_offset"] + + def setup(self, freq, is_offset): + self.rng = date_range("1985", periods=1000) + self.rng2 = date_range("1985", periods=1000).to_pydatetime() + self.ints = list(range(2000, 3000)) + self.daily_ints = ( + date_range("1/1/2000", periods=1000, freq=freq).strftime("%Y%m%d").map(int) + ) + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_from_date_range(self, freq, is_offset): + PeriodIndex(self.rng, freq=freq) + + def time_from_pydatetime(self, freq, is_offset): + PeriodIndex(self.rng2, freq=freq) + + def time_from_ints(self, freq, is_offset): + PeriodIndex(self.ints, freq=freq) + + def time_from_ints_daily(self, freq, is_offset): + PeriodIndex(self.daily_ints, freq=freq) + + +class DataFramePeriodColumn: + def setup(self): + self.rng = period_range(start="1/1/1990", freq="S", periods=20000) + self.df = DataFrame(index=range(len(self.rng))) + + def time_setitem_period_column(self): + self.df["col"] = self.rng + + def time_set_index(self): + # GH#21582 limited by comparisons of Period objects + self.df["col2"] = self.rng + self.df.set_index("col2", append=True) + + +class Algorithms: + + params = ["index", "series"] + param_names = ["typ"] + + def setup(self, typ): + data = [ + Period("2011-01", freq="M"), + Period("2011-02", freq="M"), + Period("2011-03", freq="M"), + Period("2011-04", freq="M"), + ] + + if typ == "index": + self.vector = PeriodIndex(data * 1000, freq="M") + elif typ == "series": + self.vector = Series(data * 1000) + + def time_drop_duplicates(self, typ): + self.vector.drop_duplicates() + + def time_value_counts(self, typ): + self.vector.value_counts() + + +class Indexing: + def setup(self): + self.index = period_range(start="1985", periods=1000, freq="D") + self.series = Series(range(1000), index=self.index) + self.period = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.period) + + def time_shallow_copy(self): + self.index._view() + + def time_series_loc(self): + self.series.loc[self.period] + + def time_align(self): + DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index[:750].intersection(self.index[250:]) + + def time_unique(self): + self.index.unique() diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py new file mode 100644 index 00000000..789bb8d8 --- /dev/null +++ b/asv_bench/benchmarks/plotting.py @@ -0,0 +1,164 @@ +import contextlib +import importlib.machinery +import importlib.util +import os +import pathlib +import sys +import tempfile +from unittest import mock + +import matplotlib +import numpy as np + +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, +) + +try: + from pandas.plotting import andrews_curves +except ImportError: + from pandas.tools.plotting import andrews_curves + +from pandas.plotting._core import _get_plot_backend + +matplotlib.use("Agg") + + +class SeriesPlotting: + params = [["line", "bar", "area", "barh", "hist", "kde", "pie"]] + param_names = ["kind"] + + def setup(self, kind): + if kind in ["bar", "barh", "pie"]: + n = 100 + elif kind in ["kde"]: + n = 10000 + else: + n = 1000000 + + self.s = Series(np.random.randn(n)) + if kind in ["area", "pie"]: + self.s = self.s.abs() + + def time_series_plot(self, kind): + self.s.plot(kind=kind) + + +class FramePlotting: + params = [ + ["line", "bar", "area", "barh", "hist", "kde", "pie", "scatter", "hexbin"] + ] + param_names = ["kind"] + + def setup(self, kind): + if kind in ["bar", "barh", "pie"]: + n = 100 + elif kind in ["kde", "scatter", "hexbin"]: + n = 10000 + else: + n = 1000000 + + self.x = Series(np.random.randn(n)) + self.y = Series(np.random.randn(n)) + if kind in ["area", "pie"]: + self.x = self.x.abs() + self.y = self.y.abs() + self.df = DataFrame({"x": self.x, "y": self.y}) + + def time_frame_plot(self, kind): + self.df.plot(x="x", y="y", kind=kind) + + +class TimeseriesPlotting: + def setup(self): + N = 2000 + M = 5 + idx = date_range("1/1/1975", periods=N) + self.df = DataFrame(np.random.randn(N, M), index=idx) + + idx_irregular = DatetimeIndex( + np.concatenate((idx.values[0:10], idx.values[12:])) + ) + self.df2 = DataFrame( + np.random.randn(len(idx_irregular), M), index=idx_irregular + ) + + def time_plot_regular(self): + self.df.plot() + + def time_plot_regular_compat(self): + self.df.plot(x_compat=True) + + def time_plot_irregular(self): + self.df2.plot() + + def time_plot_table(self): + self.df.plot(table=True) + + +class Misc: + def setup(self): + N = 500 + M = 10 + self.df = DataFrame(np.random.randn(N, M)) + self.df["Name"] = ["A"] * N + + def time_plot_andrews_curves(self): + andrews_curves(self.df, "Name") + + +class BackendLoading: + repeat = 1 + number = 1 + warmup_time = 0 + + def setup(self): + mod = importlib.util.module_from_spec( + importlib.machinery.ModuleSpec("pandas_dummy_backend", None) + ) + mod.plot = lambda *args, **kwargs: 1 + + with contextlib.ExitStack() as stack: + stack.enter_context( + mock.patch.dict(sys.modules, {"pandas_dummy_backend": mod}) + ) + tmp_path = pathlib.Path(stack.enter_context(tempfile.TemporaryDirectory())) + + sys.path.insert(0, os.fsdecode(tmp_path)) + stack.callback(sys.path.remove, os.fsdecode(tmp_path)) + + dist_info = tmp_path / "my_backend-0.0.0.dist-info" + dist_info.mkdir() + (dist_info / "entry_points.txt").write_bytes( + b"[pandas_plotting_backends]\n" + b"my_ep_backend = pandas_dummy_backend\n" + b"my_ep_backend0 = pandas_dummy_backend\n" + b"my_ep_backend1 = pandas_dummy_backend\n" + b"my_ep_backend2 = pandas_dummy_backend\n" + b"my_ep_backend3 = pandas_dummy_backend\n" + b"my_ep_backend4 = pandas_dummy_backend\n" + b"my_ep_backend5 = pandas_dummy_backend\n" + b"my_ep_backend6 = pandas_dummy_backend\n" + b"my_ep_backend7 = pandas_dummy_backend\n" + b"my_ep_backend8 = pandas_dummy_backend\n" + b"my_ep_backend9 = pandas_dummy_backend\n" + ) + self.stack = stack.pop_all() + + def teardown(self): + self.stack.close() + + def time_get_plot_backend(self): + # finds the first my_ep_backend + _get_plot_backend("my_ep_backend") + + def time_get_plot_backend_fallback(self): + # iterates through all the my_ep_backend[0-9] before falling back + # to importlib.import_module + _get_plot_backend("pandas_dummy_backend") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py new file mode 100644 index 00000000..29d2831b --- /dev/null +++ b/asv_bench/benchmarks/reindex.py @@ -0,0 +1,169 @@ +import numpy as np + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, + period_range, +) + +from .pandas_vb_common import tm + + +class Reindex: + def setup(self): + rng = date_range(start="1/1/1970", periods=10000, freq="1min") + self.df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) + self.df["foo"] = "bar" + self.rng_subset = Index(rng[::2]) + self.df2 = DataFrame( + index=range(10000), data=np.random.rand(10000, 30), columns=range(30) + ) + N = 5000 + K = 200 + level1 = tm.makeStringIndex(N).values.repeat(K) + level2 = np.tile(tm.makeStringIndex(K).values, N) + index = MultiIndex.from_arrays([level1, level2]) + self.s = Series(np.random.randn(N * K), index=index) + self.s_subset = self.s[::2] + self.s_subset_no_cache = self.s[::2].copy() + + mi = MultiIndex.from_product([rng, range(100)]) + self.s2 = Series(np.random.randn(len(mi)), index=mi) + self.s2_subset = self.s2[::2].copy() + + def time_reindex_dates(self): + self.df.reindex(self.rng_subset) + + def time_reindex_columns(self): + self.df2.reindex(columns=self.df.columns[1:5]) + + def time_reindex_multiindex_with_cache(self): + # MultiIndex._values gets cached + self.s.reindex(self.s_subset.index) + + def time_reindex_multiindex_no_cache(self): + # Copy to avoid MultiIndex._values getting cached + self.s.reindex(self.s_subset_no_cache.index.copy()) + + def time_reindex_multiindex_no_cache_dates(self): + # Copy to avoid MultiIndex._values getting cached + self.s2_subset.reindex(self.s2.index.copy()) + + +class ReindexMethod: + + params = [["pad", "backfill"], [date_range, period_range]] + param_names = ["method", "constructor"] + + def setup(self, method, constructor): + N = 100000 + self.idx = constructor("1/1/2000", periods=N, freq="1min") + self.ts = Series(np.random.randn(N), index=self.idx)[::2] + + def time_reindex_method(self, method, constructor): + self.ts.reindex(self.idx, method=method) + + +class Fillna: + + params = ["pad", "backfill"] + param_names = ["method"] + + def setup(self, method): + N = 100000 + self.idx = date_range("1/1/2000", periods=N, freq="1min") + ts = Series(np.random.randn(N), index=self.idx)[::2] + self.ts_reindexed = ts.reindex(self.idx) + self.ts_float32 = self.ts_reindexed.astype("float32") + + def time_reindexed(self, method): + self.ts_reindexed.fillna(method=method) + + def time_float_32(self, method): + self.ts_float32.fillna(method=method) + + +class LevelAlign: + def setup(self): + self.index = MultiIndex( + levels=[np.arange(10), np.arange(100), np.arange(100)], + codes=[ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ], + ) + self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) + self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + + def time_align_level(self): + self.df.align(self.df_level, level=1, copy=False) + + def time_reindex_level(self): + self.df_level.reindex(self.index, level=1) + + +class DropDuplicates: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + self.df = DataFrame( + {"key1": key1, "key2": key2, "value": np.random.randn(N * K)} + ) + self.df_nan = self.df.copy() + self.df_nan.iloc[:10000, :] = np.nan + + self.s = Series(np.random.randint(0, 1000, size=10000)) + self.s_str = Series(np.tile(tm.makeStringIndex(1000).values, 10)) + + N = 1000000 + K = 10000 + key1 = np.random.randint(0, K, size=N) + self.df_int = DataFrame({"key1": key1}) + self.df_bool = DataFrame(np.random.randint(0, 2, size=(K, 10), dtype=bool)) + + def time_frame_drop_dups(self, inplace): + self.df.drop_duplicates(["key1", "key2"], inplace=inplace) + + def time_frame_drop_dups_na(self, inplace): + self.df_nan.drop_duplicates(["key1", "key2"], inplace=inplace) + + def time_series_drop_dups_int(self, inplace): + self.s.drop_duplicates(inplace=inplace) + + def time_series_drop_dups_string(self, inplace): + self.s_str.drop_duplicates(inplace=inplace) + + def time_frame_drop_dups_int(self, inplace): + self.df_int.drop_duplicates(inplace=inplace) + + def time_frame_drop_dups_bool(self, inplace): + self.df_bool.drop_duplicates(inplace=inplace) + + +class Align: + # blog "pandas escaped the zoo" + def setup(self): + n = 50000 + indices = tm.makeStringIndex(n) + subsample_size = 40000 + self.x = Series(np.random.randn(n), indices) + self.y = Series( + np.random.randn(subsample_size), + index=np.random.choice(indices, subsample_size, replace=False), + ) + + def time_align_series_irregular_string(self): + self.x + self.y + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py new file mode 100644 index 00000000..8d4fc024 --- /dev/null +++ b/asv_bench/benchmarks/replace.py @@ -0,0 +1,77 @@ +import numpy as np + +import pandas as pd + + +class FillNa: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10**6 + rng = pd.date_range("1/1/2000", periods=N, freq="min") + data = np.random.randn(N) + data[::2] = np.nan + self.ts = pd.Series(data, index=rng) + + def time_fillna(self, inplace): + self.ts.fillna(0.0, inplace=inplace) + + def time_replace(self, inplace): + self.ts.replace(np.nan, 0.0, inplace=inplace) + + +class ReplaceDict: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10**5 + start_value = 10**5 + self.to_rep = dict(enumerate(np.arange(N) + start_value)) + self.s = pd.Series(np.random.randint(N, size=10**3)) + + def time_replace_series(self, inplace): + self.s.replace(self.to_rep, inplace=inplace) + + +class ReplaceList: + # GH#28099 + + params = [(True, False)] + param_names = ["inplace"] + + def setup(self, inplace): + self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10**7)) + + def time_replace_list(self, inplace): + self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace) + + def time_replace_list_one_match(self, inplace): + # the 1 can be held in self._df.blocks[0], while the inf and -inf can't + self.df.replace([np.inf, -np.inf, 1], np.nan, inplace=inplace) + + +class Convert: + + params = (["DataFrame", "Series"], ["Timestamp", "Timedelta"]) + param_names = ["constructor", "replace_data"] + + def setup(self, constructor, replace_data): + N = 10**3 + data = { + "Series": pd.Series(np.random.randint(N, size=N)), + "DataFrame": pd.DataFrame( + {"A": np.random.randint(N, size=N), "B": np.random.randint(N, size=N)} + ), + } + self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} + self.data = data[constructor] + + def time_replace(self, constructor, replace_data): + self.data.replace(self.to_replace) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py new file mode 100644 index 00000000..05e12630 --- /dev/null +++ b/asv_bench/benchmarks/reshape.py @@ -0,0 +1,319 @@ +from itertools import product +import string + +import numpy as np + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + date_range, + melt, + wide_to_long, +) +from pandas.api.types import CategoricalDtype + + +class Melt: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"]) + self.df["id1"] = np.random.randint(0, 10, 10000) + self.df["id2"] = np.random.randint(100, 1000, 10000) + + def time_melt_dataframe(self): + melt(self.df, id_vars=["id1", "id2"]) + + +class Pivot: + def setup(self): + N = 10000 + index = date_range("1/1/2000", periods=N, freq="h") + data = { + "value": np.random.randn(N * 50), + "variable": np.arange(50).repeat(N), + "date": np.tile(index.values, 50), + } + self.df = DataFrame(data) + + def time_reshape_pivot_time_series(self): + self.df.pivot("date", "variable", "value") + + +class SimpleReshape: + def setup(self): + arrays = [np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)] + index = MultiIndex.from_arrays(arrays) + self.df = DataFrame(np.random.randn(10000, 4), index=index) + self.udf = self.df.unstack(1) + + def time_stack(self): + self.udf.stack() + + def time_unstack(self): + self.df.unstack(1) + + +class ReshapeExtensionDtype: + + params = ["datetime64[ns, US/Pacific]", "Period[s]"] + param_names = ["dtype"] + + def setup(self, dtype): + lev = pd.Index(list("ABCDEFGHIJ")) + ri = pd.Index(range(1000)) + mi = MultiIndex.from_product([lev, ri], names=["foo", "bar"]) + + index = date_range("2016-01-01", periods=10000, freq="s", tz="US/Pacific") + if dtype == "Period[s]": + index = index.tz_localize(None).to_period("s") + + ser = pd.Series(index, index=mi) + df = ser.unstack("bar") + # roundtrips -> df.stack().equals(ser) + + self.ser = ser + self.df = df + + def time_stack(self, dtype): + self.df.stack() + + def time_unstack_fast(self, dtype): + # last level -> doesn't have to make copies + self.ser.unstack("bar") + + def time_unstack_slow(self, dtype): + # first level -> must make copies + self.ser.unstack("foo") + + def time_transpose(self, dtype): + self.df.T + + +class Unstack: + + params = ["int", "category"] + + def setup(self, dtype): + m = 100 + n = 1000 + + levels = np.arange(m) + index = MultiIndex.from_product([levels] * 2) + columns = np.arange(n) + if dtype == "int": + values = np.arange(m * m * n).reshape(m * m, n) + self.df = DataFrame(values, index, columns) + else: + # the category branch is ~20x slower than int. So we + # cut down the size a bit. Now it's only ~3x slower. + n = 50 + columns = columns[:n] + indices = np.random.randint(0, 52, size=(m * m, n)) + values = np.take(list(string.ascii_letters), indices) + values = [pd.Categorical(v) for v in values.T] + + self.df = DataFrame( + {i: cat for i, cat in enumerate(values)}, index, columns + ) + + self.df2 = self.df.iloc[:-1] + + def time_full_product(self, dtype): + self.df.unstack() + + def time_without_last_row(self, dtype): + self.df2.unstack() + + +class SparseIndex: + def setup(self): + NUM_ROWS = 1000 + self.df = DataFrame( + { + "A": np.random.randint(50, size=NUM_ROWS), + "B": np.random.randint(50, size=NUM_ROWS), + "C": np.random.randint(-10, 10, size=NUM_ROWS), + "D": np.random.randint(-10, 10, size=NUM_ROWS), + "E": np.random.randint(10, size=NUM_ROWS), + "F": np.random.randn(NUM_ROWS), + } + ) + self.df = self.df.set_index(["A", "B", "C", "D", "E"]) + + def time_unstack(self): + self.df.unstack() + + +class WideToLong: + def setup(self): + nyrs = 20 + nidvars = 20 + N = 5000 + self.letters = list("ABCD") + yrvars = [ + letter + str(num) + for letter, num in product(self.letters, range(1, nyrs + 1)) + ] + columns = [str(i) for i in range(nidvars)] + yrvars + self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), columns=columns) + self.df["id"] = self.df.index + + def time_wide_to_long_big(self): + wide_to_long(self.df, self.letters, i="id", j="year") + + +class PivotTable: + def setup(self): + N = 100000 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + ind1 = np.random.randint(0, 3, size=N) + ind2 = np.random.randint(0, 2, size=N) + self.df = DataFrame( + { + "key1": fac1.take(ind1), + "key2": fac2.take(ind2), + "key3": fac2.take(ind2), + "value1": np.random.randn(N), + "value2": np.random.randn(N), + "value3": np.random.randn(N), + } + ) + self.df2 = DataFrame( + {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]} + ) + self.df2.col1 = self.df2.col1.astype("category") + self.df2.col2 = self.df2.col2.astype("category") + + def time_pivot_table(self): + self.df.pivot_table(index="key1", columns=["key2", "key3"]) + + def time_pivot_table_agg(self): + self.df.pivot_table( + index="key1", columns=["key2", "key3"], aggfunc=["sum", "mean"] + ) + + def time_pivot_table_margins(self): + self.df.pivot_table(index="key1", columns=["key2", "key3"], margins=True) + + def time_pivot_table_categorical(self): + self.df2.pivot_table( + index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0 + ) + + def time_pivot_table_categorical_observed(self): + self.df2.pivot_table( + index="col1", + values="col3", + columns="col2", + aggfunc=np.sum, + fill_value=0, + observed=True, + ) + + def time_pivot_table_margins_only_column(self): + self.df.pivot_table(columns=["key2", "key3"], margins=True) + + +class Crosstab: + def setup(self): + N = 100000 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + self.ind1 = np.random.randint(0, 3, size=N) + self.ind2 = np.random.randint(0, 2, size=N) + self.vec1 = fac1.take(self.ind1) + self.vec2 = fac2.take(self.ind2) + + def time_crosstab(self): + pd.crosstab(self.vec1, self.vec2) + + def time_crosstab_values(self): + pd.crosstab(self.vec1, self.vec2, values=self.ind1, aggfunc="sum") + + def time_crosstab_normalize(self): + pd.crosstab(self.vec1, self.vec2, normalize=True) + + def time_crosstab_normalize_margins(self): + pd.crosstab(self.vec1, self.vec2, normalize=True, margins=True) + + +class GetDummies: + def setup(self): + categories = list(string.ascii_letters[:12]) + s = pd.Series( + np.random.choice(categories, size=1000000), + dtype=CategoricalDtype(categories), + ) + self.s = s + + def time_get_dummies_1d(self): + pd.get_dummies(self.s, sparse=False) + + def time_get_dummies_1d_sparse(self): + pd.get_dummies(self.s, sparse=True) + + +class Cut: + params = [[4, 10, 1000]] + param_names = ["bins"] + + def setup(self, bins): + N = 10**5 + self.int_series = pd.Series(np.arange(N).repeat(5)) + self.float_series = pd.Series(np.random.randn(N).repeat(5)) + self.timedelta_series = pd.Series( + np.random.randint(N, size=N), dtype="timedelta64[ns]" + ) + self.datetime_series = pd.Series( + np.random.randint(N, size=N), dtype="datetime64[ns]" + ) + self.interval_bins = pd.IntervalIndex.from_breaks(np.linspace(0, N, bins)) + + def time_cut_int(self, bins): + pd.cut(self.int_series, bins) + + def time_cut_float(self, bins): + pd.cut(self.float_series, bins) + + def time_cut_timedelta(self, bins): + pd.cut(self.timedelta_series, bins) + + def time_cut_datetime(self, bins): + pd.cut(self.datetime_series, bins) + + def time_qcut_int(self, bins): + pd.qcut(self.int_series, bins) + + def time_qcut_float(self, bins): + pd.qcut(self.float_series, bins) + + def time_qcut_timedelta(self, bins): + pd.qcut(self.timedelta_series, bins) + + def time_qcut_datetime(self, bins): + pd.qcut(self.datetime_series, bins) + + def time_cut_interval(self, bins): + # GH 27668 + pd.cut(self.int_series, self.interval_bins) + + def peakmem_cut_interval(self, bins): + # GH 27668 + pd.cut(self.int_series, self.interval_bins) + + +class Explode: + param_names = ["n_rows", "max_list_length"] + params = [[100, 1000, 10000], [3, 5, 10]] + + def setup(self, n_rows, max_list_length): + + data = [np.arange(np.random.randint(max_list_length)) for _ in range(n_rows)] + self.series = pd.Series(data) + + def time_explode(self, n_rows, max_list_length): + self.series.explode() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py new file mode 100644 index 00000000..d65a1a39 --- /dev/null +++ b/asv_bench/benchmarks/rolling.py @@ -0,0 +1,377 @@ +import warnings + +import numpy as np + +import pandas as pd + + +class Methods: + + params = ( + ["DataFrame", "Series"], + [("rolling", {"window": 10}), ("rolling", {"window": 1000}), ("expanding", {})], + ["int", "float"], + ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"], + ) + param_names = ["constructor", "window_kwargs", "dtype", "method"] + + def setup(self, constructor, window_kwargs, dtype, method): + N = 10**5 + window, kwargs = window_kwargs + arr = (100 * np.random.random(N)).astype(dtype) + obj = getattr(pd, constructor)(arr) + self.window = getattr(obj, window)(**kwargs) + + def time_method(self, constructor, window_kwargs, dtype, method): + getattr(self.window, method)() + + def peakmem_method(self, constructor, window_kwargs, dtype, method): + getattr(self.window, method)() + + +class Apply: + params = ( + ["DataFrame", "Series"], + [3, 300], + ["int", "float"], + [sum, np.sum, lambda x: np.sum(x) + 5], + [True, False], + ) + param_names = ["constructor", "window", "dtype", "function", "raw"] + + def setup(self, constructor, window, dtype, function, raw): + N = 10**3 + arr = (100 * np.random.random(N)).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_rolling(self, constructor, window, dtype, function, raw): + self.roll.apply(function, raw=raw) + + +class NumbaEngineMethods: + params = ( + ["DataFrame", "Series"], + ["int", "float"], + [("rolling", {"window": 10}), ("expanding", {})], + ["sum", "max", "min", "median", "mean", "var", "std"], + [True, False], + [None, 100], + ) + param_names = [ + "constructor", + "dtype", + "window_kwargs", + "method", + "parallel", + "cols", + ] + + def setup(self, constructor, dtype, window_kwargs, method, parallel, cols): + N = 10**3 + window, kwargs = window_kwargs + shape = (N, cols) if cols is not None and constructor != "Series" else N + arr = (100 * np.random.random(shape)).astype(dtype) + data = getattr(pd, constructor)(arr) + + # Warm the cache + with warnings.catch_warnings(record=True): + # Catch parallel=True not being applicable e.g. 1D data + self.window = getattr(data, window)(**kwargs) + getattr(self.window, method)( + engine="numba", engine_kwargs={"parallel": parallel} + ) + + def test_method(self, constructor, dtype, window_kwargs, method, parallel, cols): + with warnings.catch_warnings(record=True): + getattr(self.window, method)( + engine="numba", engine_kwargs={"parallel": parallel} + ) + + +class NumbaEngineApply: + params = ( + ["DataFrame", "Series"], + ["int", "float"], + [("rolling", {"window": 10}), ("expanding", {})], + [np.sum, lambda x: np.sum(x) + 5], + [True, False], + [None, 100], + ) + param_names = [ + "constructor", + "dtype", + "window_kwargs", + "function", + "parallel", + "cols", + ] + + def setup(self, constructor, dtype, window_kwargs, function, parallel, cols): + N = 10**3 + window, kwargs = window_kwargs + shape = (N, cols) if cols is not None and constructor != "Series" else N + arr = (100 * np.random.random(shape)).astype(dtype) + data = getattr(pd, constructor)(arr) + + # Warm the cache + with warnings.catch_warnings(record=True): + # Catch parallel=True not being applicable e.g. 1D data + self.window = getattr(data, window)(**kwargs) + self.window.apply( + function, raw=True, engine="numba", engine_kwargs={"parallel": parallel} + ) + + def test_method(self, constructor, dtype, window_kwargs, function, parallel, cols): + with warnings.catch_warnings(record=True): + self.window.apply( + function, raw=True, engine="numba", engine_kwargs={"parallel": parallel} + ) + + +class EWMMethods: + + params = ( + ["DataFrame", "Series"], + [ + ({"halflife": 10}, "mean"), + ({"halflife": 10}, "std"), + ({"halflife": 1000}, "mean"), + ({"halflife": 1000}, "std"), + ( + { + "halflife": "1 Day", + "times": pd.date_range("1900", periods=10**5, freq="23s"), + }, + "mean", + ), + ], + ["int", "float"], + ) + param_names = ["constructor", "kwargs_method", "dtype"] + + def setup(self, constructor, kwargs_method, dtype): + N = 10**5 + kwargs, method = kwargs_method + arr = (100 * np.random.random(N)).astype(dtype) + self.method = method + self.ewm = getattr(pd, constructor)(arr).ewm(**kwargs) + + def time_ewm(self, constructor, kwargs_method, dtype): + getattr(self.ewm, self.method)() + + +class VariableWindowMethods(Methods): + params = ( + ["DataFrame", "Series"], + ["50s", "1h", "1d"], + ["int", "float"], + ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"], + ) + param_names = ["constructor", "window", "dtype", "method"] + + def setup(self, constructor, window, dtype, method): + N = 10**5 + arr = (100 * np.random.random(N)).astype(dtype) + index = pd.date_range("2017-01-01", periods=N, freq="5s") + self.window = getattr(pd, constructor)(arr, index=index).rolling(window) + + +class Pairwise: + + params = ( + [({"window": 10}, "rolling"), ({"window": 1000}, "rolling"), ({}, "expanding")], + ["corr", "cov"], + [True, False], + ) + param_names = ["window_kwargs", "method", "pairwise"] + + def setup(self, kwargs_window, method, pairwise): + N = 10**4 + n_groups = 20 + kwargs, window = kwargs_window + groups = [i for _ in range(N // n_groups) for i in range(n_groups)] + arr = np.random.random(N) + self.df = pd.DataFrame(arr) + self.window = getattr(self.df, window)(**kwargs) + self.window_group = getattr( + pd.DataFrame({"A": groups, "B": arr}).groupby("A"), window + )(**kwargs) + + def time_pairwise(self, kwargs_window, method, pairwise): + getattr(self.window, method)(self.df, pairwise=pairwise) + + def time_groupby(self, kwargs_window, method, pairwise): + getattr(self.window_group, method)(self.df, pairwise=pairwise) + + +class Quantile: + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + [0, 0.5, 1], + ["linear", "nearest", "lower", "higher", "midpoint"], + ) + param_names = ["constructor", "window", "dtype", "percentile"] + + def setup(self, constructor, window, dtype, percentile, interpolation): + N = 10**5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_quantile(self, constructor, window, dtype, percentile, interpolation): + self.roll.quantile(percentile, interpolation=interpolation) + + +class Rank: + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + [True, False], + [True, False], + ["min", "max", "average"], + ) + param_names = [ + "constructor", + "window", + "dtype", + "percentile", + "ascending", + "method", + ] + + def setup(self, constructor, window, dtype, percentile, ascending, method): + N = 10**5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_rank(self, constructor, window, dtype, percentile, ascending, method): + self.roll.rank(pct=percentile, ascending=ascending, method=method) + + +class PeakMemFixedWindowMinMax: + + params = ["min", "max"] + + def setup(self, operation): + N = 10**6 + arr = np.random.random(N) + self.roll = pd.Series(arr).rolling(2) + + def peakmem_fixed(self, operation): + for x in range(5): + getattr(self.roll, operation)() + + +class ForwardWindowMethods: + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + ["median", "mean", "max", "min", "kurt", "sum"], + ) + param_names = ["constructor", "window_size", "dtype", "method"] + + def setup(self, constructor, window_size, dtype, method): + N = 10**5 + arr = np.random.random(N).astype(dtype) + indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=window_size) + self.roll = getattr(pd, constructor)(arr).rolling(window=indexer) + + def time_rolling(self, constructor, window_size, dtype, method): + getattr(self.roll, method)() + + def peakmem_rolling(self, constructor, window_size, dtype, method): + getattr(self.roll, method)() + + +class Groupby: + + params = ( + ["sum", "median", "mean", "max", "min", "kurt", "sum"], + [ + ("rolling", {"window": 2}), + ("rolling", {"window": "30s", "on": "C"}), + ("expanding", {}), + ], + ) + + def setup(self, method, window_kwargs): + N = 1000 + window, kwargs = window_kwargs + df = pd.DataFrame( + { + "A": [str(i) for i in range(N)] * 10, + "B": list(range(N)) * 10, + "C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10), + } + ) + self.groupby_window = getattr(df.groupby("A"), window)(**kwargs) + + def time_method(self, method, window_kwargs): + getattr(self.groupby_window, method)() + + +class GroupbyLargeGroups: + # https://github.com/pandas-dev/pandas/issues/38038 + # specific example where the rolling operation on a larger dataframe + # is relatively cheap (few but large groups), but creation of + # MultiIndex of result can be expensive + + def setup(self): + N = 100000 + self.df = pd.DataFrame({"A": [1, 2] * (N // 2), "B": np.random.randn(N)}) + + def time_rolling_multiindex_creation(self): + self.df.groupby("A").rolling(3).mean() + + +class GroupbyEWM: + + params = ["var", "std", "cov", "corr"] + param_names = ["method"] + + def setup(self, method): + df = pd.DataFrame({"A": range(50), "B": range(50)}) + self.gb_ewm = df.groupby("A").ewm(com=1.0) + + def time_groupby_method(self, method): + getattr(self.gb_ewm, method)() + + +class GroupbyEWMEngine: + + params = ["cython", "numba"] + param_names = ["engine"] + + def setup(self, engine): + df = pd.DataFrame({"A": range(50), "B": range(50)}) + self.gb_ewm = df.groupby("A").ewm(com=1.0) + + def time_groupby_mean(self, engine): + self.gb_ewm.mean(engine=engine) + + +def table_method_func(x): + return np.sum(x, axis=0) + 1 + + +class TableMethod: + + params = ["single", "table"] + param_names = ["method"] + + def setup(self, method): + self.df = pd.DataFrame(np.random.randn(10, 1000)) + + def time_apply(self, method): + self.df.rolling(2, method=method).apply( + table_method_func, raw=True, engine="numba" + ) + + def time_ewm_mean(self, method): + self.df.ewm(1, method=method).mean(engine="numba") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py new file mode 100644 index 00000000..09c318af --- /dev/null +++ b/asv_bench/benchmarks/series_methods.py @@ -0,0 +1,295 @@ +from datetime import datetime + +import numpy as np + +from pandas import ( + Index, + NaT, + Series, + date_range, +) + +from .pandas_vb_common import tm + + +class SeriesConstructor: + def setup(self): + self.idx = date_range( + start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s" + ) + self.data = dict(zip(self.idx, range(len(self.idx)))) + self.array = np.array([1, 2, 3]) + self.idx2 = Index(["a", "b", "c"]) + + def time_constructor_dict(self): + Series(data=self.data, index=self.idx) + + def time_constructor_no_data(self): + Series(data=None, index=self.idx) + + def time_constructor_fastpath(self): + Series(self.array, index=self.idx2, name="name", fastpath=True) + + +class ToFrame: + params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]] + param_names = ["dtype", "name"] + + def setup(self, dtype, name): + arr = np.arange(10**5) + ser = Series(arr, dtype=dtype) + self.ser = ser + + def time_to_frame(self, dtype, name): + self.ser.to_frame(name) + + +class NSort: + + params = ["first", "last", "all"] + param_names = ["keep"] + + def setup(self, keep): + self.s = Series(np.random.randint(1, 10, 100000)) + + def time_nlargest(self, keep): + self.s.nlargest(3, keep=keep) + + def time_nsmallest(self, keep): + self.s.nsmallest(3, keep=keep) + + +class Dropna: + + params = ["int", "datetime"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10**6 + data = { + "int": np.random.randint(1, 10, N), + "datetime": date_range("2000-01-01", freq="S", periods=N), + } + self.s = Series(data[dtype]) + if dtype == "datetime": + self.s[np.random.randint(1, N, 100)] = NaT + + def time_dropna(self, dtype): + self.s.dropna() + + +class SearchSorted: + + goal_time = 0.2 + params = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float16", + "float32", + "float64", + "str", + ] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10**5 + data = np.array([1] * N + [2] * N + [3] * N).astype(dtype) + self.s = Series(data) + + def time_searchsorted(self, dtype): + key = "2" if dtype == "str" else 2 + self.s.searchsorted(key) + + +class Map: + + params = (["dict", "Series", "lambda"], ["object", "category", "int"]) + param_names = "mapper" + + def setup(self, mapper, dtype): + map_size = 1000 + map_data = Series(map_size - np.arange(map_size), dtype=dtype) + + # construct mapper + if mapper == "Series": + self.map_data = map_data + elif mapper == "dict": + self.map_data = map_data.to_dict() + elif mapper == "lambda": + map_dict = map_data.to_dict() + self.map_data = lambda x: map_dict[x] + else: + raise NotImplementedError + + self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype) + + def time_map(self, mapper, *args, **kwargs): + self.s.map(self.map_data) + + +class Clip: + params = [50, 1000, 10**5] + param_names = ["n"] + + def setup(self, n): + self.s = Series(np.random.randn(n)) + + def time_clip(self, n): + self.s.clip(0, 1) + + +class ClipDt: + def setup(self): + dr = date_range("20220101", periods=100_000, freq="s", tz="UTC") + self.clipper_dt = dr[0:1_000].repeat(100) + self.s = Series(dr) + + def time_clip(self): + self.s.clip(upper=self.clipper_dt) + + +class ValueCounts: + + params = [[10**3, 10**4, 10**5], ["int", "uint", "float", "object"]] + param_names = ["N", "dtype"] + + def setup(self, N, dtype): + self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype) + + def time_value_counts(self, N, dtype): + self.s.value_counts() + + +class ValueCountsObjectDropNAFalse: + + params = [10**3, 10**4, 10**5] + param_names = ["N"] + + def setup(self, N): + self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object") + + def time_value_counts(self, N): + self.s.value_counts(dropna=False) + + +class Mode: + + params = [[10**3, 10**4, 10**5], ["int", "uint", "float", "object"]] + param_names = ["N", "dtype"] + + def setup(self, N, dtype): + self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype) + + def time_mode(self, N, dtype): + self.s.mode() + + +class ModeObjectDropNAFalse: + + params = [10**3, 10**4, 10**5] + param_names = ["N"] + + def setup(self, N): + self.s = Series(np.random.randint(0, N, size=10 * N)).astype("object") + + def time_mode(self, N): + self.s.mode(dropna=False) + + +class Dir: + def setup(self): + self.s = Series(index=tm.makeStringIndex(10000)) + + def time_dir_strings(self): + dir(self.s) + + +class SeriesGetattr: + # https://github.com/pandas-dev/pandas/issues/19764 + def setup(self): + self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=10**6)) + + def time_series_datetimeindex_repr(self): + getattr(self.s, "a", None) + + +class All: + + params = [[10**3, 10**6], ["fast", "slow"], ["bool", "boolean"]] + param_names = ["N", "case", "dtype"] + + def setup(self, N, case, dtype): + val = case != "fast" + self.s = Series([val] * N, dtype=dtype) + + def time_all(self, N, case, dtype): + self.s.all() + + +class Any: + + params = [[10**3, 10**6], ["fast", "slow"], ["bool", "boolean"]] + param_names = ["N", "case", "dtype"] + + def setup(self, N, case, dtype): + val = case == "fast" + self.s = Series([val] * N, dtype=dtype) + + def time_any(self, N, case, dtype): + self.s.any() + + +class NanOps: + + params = [ + [ + "var", + "mean", + "median", + "max", + "min", + "sum", + "std", + "sem", + "argmax", + "skew", + "kurt", + "prod", + ], + [10**3, 10**6], + ["int8", "int32", "int64", "float64", "Int64", "boolean"], + ] + param_names = ["func", "N", "dtype"] + + def setup(self, func, N, dtype): + if func == "argmax" and dtype in {"Int64", "boolean"}: + # Skip argmax for nullable int since this doesn't work yet (GH-24382) + raise NotImplementedError + self.s = Series([1] * N, dtype=dtype) + self.func = getattr(self.s, func) + + def time_func(self, func, N, dtype): + self.func() + + +class Rank: + + param_names = ["dtype"] + params = [ + ["int", "uint", "float", "object"], + ] + + def setup(self, dtype): + self.s = Series(np.random.randint(0, 1000, size=100000), dtype=dtype) + + def time_rank(self, dtype): + self.s.rank() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py new file mode 100644 index 00000000..10390cb4 --- /dev/null +++ b/asv_bench/benchmarks/sparse.py @@ -0,0 +1,233 @@ +import numpy as np +import scipy.sparse + +import pandas as pd +from pandas import ( + MultiIndex, + Series, + date_range, +) +from pandas.arrays import SparseArray + + +def make_array(size, dense_proportion, fill_value, dtype): + dense_size = int(size * dense_proportion) + arr = np.full(size, fill_value, dtype) + indexer = np.random.choice(np.arange(size), dense_size, replace=False) + arr[indexer] = np.random.choice(np.arange(100, dtype=dtype), dense_size) + return arr + + +class SparseSeriesToFrame: + def setup(self): + K = 50 + N = 50001 + rng = date_range("1/1/2000", periods=N, freq="T") + self.series = {} + for i in range(1, K): + data = np.random.randn(N)[:-i] + idx = rng[:-i] + data[100:] = np.nan + self.series[i] = Series(SparseArray(data), index=idx) + + def time_series_to_frame(self): + pd.DataFrame(self.series) + + +class SparseArrayConstructor: + + params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, object]) + param_names = ["dense_proportion", "fill_value", "dtype"] + + def setup(self, dense_proportion, fill_value, dtype): + N = 10**6 + self.array = make_array(N, dense_proportion, fill_value, dtype) + + def time_sparse_array(self, dense_proportion, fill_value, dtype): + SparseArray(self.array, fill_value=fill_value, dtype=dtype) + + +class SparseDataFrameConstructor: + def setup(self): + N = 1000 + self.sparse = scipy.sparse.rand(N, N, 0.005) + + def time_from_scipy(self): + pd.DataFrame.sparse.from_spmatrix(self.sparse) + + +class FromCoo: + def setup(self): + self.matrix = scipy.sparse.coo_matrix( + ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100) + ) + + def time_sparse_series_from_coo(self): + Series.sparse.from_coo(self.matrix) + + +class ToCoo: + params = [True, False] + param_names = ["sort_labels"] + + def setup(self, sort_labels): + s = Series([np.nan] * 10000) + s[0] = 3.0 + s[100] = -1.0 + s[999] = 12.1 + + s_mult_lvl = s.set_axis(MultiIndex.from_product([range(10)] * 4)) + self.ss_mult_lvl = s_mult_lvl.astype("Sparse") + + s_two_lvl = s.set_axis(MultiIndex.from_product([range(100)] * 2)) + self.ss_two_lvl = s_two_lvl.astype("Sparse") + + def time_sparse_series_to_coo(self, sort_labels): + self.ss_mult_lvl.sparse.to_coo( + row_levels=[0, 1], column_levels=[2, 3], sort_labels=sort_labels + ) + + def time_sparse_series_to_coo_single_level(self, sort_labels): + self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels) + + +class ToCooFrame: + def setup(self): + N = 10000 + k = 10 + arr = np.zeros((N, k), dtype=float) + arr[0, 0] = 3.0 + arr[12, 7] = -1.0 + arr[0, 9] = 11.2 + self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float", fill_value=0.0)) + + def time_to_coo(self): + self.df.sparse.to_coo() + + +class Arithmetic: + + params = ([0.1, 0.01], [0, np.nan]) + param_names = ["dense_proportion", "fill_value"] + + def setup(self, dense_proportion, fill_value): + N = 10**6 + arr1 = make_array(N, dense_proportion, fill_value, np.int64) + self.array1 = SparseArray(arr1, fill_value=fill_value) + arr2 = make_array(N, dense_proportion, fill_value, np.int64) + self.array2 = SparseArray(arr2, fill_value=fill_value) + + def time_make_union(self, dense_proportion, fill_value): + self.array1.sp_index.make_union(self.array2.sp_index) + + def time_intersect(self, dense_proportion, fill_value): + self.array1.sp_index.intersect(self.array2.sp_index) + + def time_add(self, dense_proportion, fill_value): + self.array1 + self.array2 + + def time_divide(self, dense_proportion, fill_value): + self.array1 / self.array2 + + +class ArithmeticBlock: + + params = [np.nan, 0] + param_names = ["fill_value"] + + def setup(self, fill_value): + N = 10**6 + self.arr1 = self.make_block_array( + length=N, num_blocks=1000, block_size=10, fill_value=fill_value + ) + self.arr2 = self.make_block_array( + length=N, num_blocks=1000, block_size=10, fill_value=fill_value + ) + + def make_block_array(self, length, num_blocks, block_size, fill_value): + arr = np.full(length, fill_value) + indices = np.random.choice( + np.arange(0, length, block_size), num_blocks, replace=False + ) + for ind in indices: + arr[ind : ind + block_size] = np.random.randint(0, 100, block_size) + return SparseArray(arr, fill_value=fill_value) + + def time_make_union(self, fill_value): + self.arr1.sp_index.make_union(self.arr2.sp_index) + + def time_intersect(self, fill_value): + self.arr2.sp_index.intersect(self.arr2.sp_index) + + def time_addition(self, fill_value): + self.arr1 + self.arr2 + + def time_division(self, fill_value): + self.arr1 / self.arr2 + + +class MinMax: + + params = (["min", "max"], [0.0, np.nan]) + param_names = ["func", "fill_value"] + + def setup(self, func, fill_value): + N = 1_000_000 + arr = make_array(N, 1e-5, fill_value, np.float64) + self.sp_arr = SparseArray(arr, fill_value=fill_value) + + def time_min_max(self, func, fill_value): + getattr(self.sp_arr, func)() + + +class Take: + + params = ([np.array([0]), np.arange(100_000), np.full(100_000, -1)], [True, False]) + param_names = ["indices", "allow_fill"] + + def setup(self, indices, allow_fill): + N = 1_000_000 + fill_value = 0.0 + arr = make_array(N, 1e-5, fill_value, np.float64) + self.sp_arr = SparseArray(arr, fill_value=fill_value) + + def time_take(self, indices, allow_fill): + self.sp_arr.take(indices, allow_fill=allow_fill) + + +class GetItem: + def setup(self): + N = 1_000_000 + d = 1e-5 + arr = make_array(N, d, np.nan, np.float64) + self.sp_arr = SparseArray(arr) + + def time_integer_indexing(self): + self.sp_arr[78] + + def time_slice(self): + self.sp_arr[1:] + + +class GetItemMask: + + params = [True, False, np.nan] + param_names = ["fill_value"] + + def setup(self, fill_value): + N = 1_000_000 + d = 1e-5 + arr = make_array(N, d, np.nan, np.float64) + self.sp_arr = SparseArray(arr) + b_arr = np.full(shape=N, fill_value=fill_value, dtype=np.bool_) + fv_inds = np.unique( + np.random.randint(low=0, high=N - 1, size=int(N * d), dtype=np.int32) + ) + b_arr[fv_inds] = True if pd.isna(fill_value) else not fill_value + self.sp_b_arr = SparseArray(b_arr, dtype=np.bool_, fill_value=fill_value) + + def time_mask(self, fill_value): + self.sp_arr[self.sp_b_arr] + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py new file mode 100644 index 00000000..92a78b7c --- /dev/null +++ b/asv_bench/benchmarks/stat_ops.py @@ -0,0 +1,144 @@ +import numpy as np + +import pandas as pd + +ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"] + + +class FrameOps: + + params = [ops, ["float", "int", "Int64"], [0, 1]] + param_names = ["op", "dtype", "axis"] + + def setup(self, op, dtype, axis): + if op == "mad" and dtype == "Int64": + # GH-33036, GH#33600 + raise NotImplementedError + values = np.random.randn(100000, 4) + if dtype == "Int64": + values = values.astype(int) + df = pd.DataFrame(values).astype(dtype) + self.df_func = getattr(df, op) + + def time_op(self, op, dtype, axis): + self.df_func(axis=axis) + + +class FrameMultiIndexOps: + + params = ([0, 1, [0, 1]], ops) + param_names = ["level", "op"] + + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + codes = [ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ] + index = pd.MultiIndex(levels=levels, codes=codes) + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) + self.df_func = getattr(df, op) + + def time_op(self, level, op): + self.df_func(level=level) + + +class SeriesOps: + + params = [ops, ["float", "int"]] + param_names = ["op", "dtype"] + + def setup(self, op, dtype): + s = pd.Series(np.random.randn(100000)).astype(dtype) + self.s_func = getattr(s, op) + + def time_op(self, op, dtype): + self.s_func() + + +class SeriesMultiIndexOps: + + params = ([0, 1, [0, 1]], ops) + param_names = ["level", "op"] + + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + codes = [ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ] + index = pd.MultiIndex(levels=levels, codes=codes) + s = pd.Series(np.random.randn(len(index)), index=index) + self.s_func = getattr(s, op) + + def time_op(self, level, op): + self.s_func(level=level) + + +class Rank: + + params = [["DataFrame", "Series"], [True, False]] + param_names = ["constructor", "pct"] + + def setup(self, constructor, pct): + values = np.random.randn(10**5) + self.data = getattr(pd, constructor)(values) + + def time_rank(self, constructor, pct): + self.data.rank(pct=pct) + + def time_average_old(self, constructor, pct): + self.data.rank(pct=pct) / len(self.data) + + +class Correlation: + + params = [["spearman", "kendall", "pearson"]] + param_names = ["method"] + + def setup(self, method): + self.df = pd.DataFrame(np.random.randn(500, 15)) + self.df2 = pd.DataFrame(np.random.randn(500, 15)) + self.df_wide = pd.DataFrame(np.random.randn(500, 100)) + self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9) + self.s = pd.Series(np.random.randn(500)) + self.s2 = pd.Series(np.random.randn(500)) + + def time_corr(self, method): + self.df.corr(method=method) + + def time_corr_wide(self, method): + self.df_wide.corr(method=method) + + def time_corr_wide_nans(self, method): + self.df_wide_nans.corr(method=method) + + def peakmem_corr_wide(self, method): + self.df_wide.corr(method=method) + + def time_corr_series(self, method): + self.s.corr(self.s2, method=method) + + def time_corrwith_cols(self, method): + self.df.corrwith(self.df2, method=method) + + def time_corrwith_rows(self, method): + self.df.corrwith(self.df2, axis=1, method=method) + + +class Covariance: + + params = [] + param_names = [] + + def setup(self): + self.s = pd.Series(np.random.randn(100000)) + self.s2 = pd.Series(np.random.randn(100000)) + + def time_cov_series(self): + self.s.cov(self.s2) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/strftime.py b/asv_bench/benchmarks/strftime.py new file mode 100644 index 00000000..ac1b7f65 --- /dev/null +++ b/asv_bench/benchmarks/strftime.py @@ -0,0 +1,64 @@ +import numpy as np + +import pandas as pd +from pandas import offsets + + +class DatetimeStrftime: + timeout = 1500 + params = [1000, 10000] + param_names = ["obs"] + + def setup(self, obs): + d = "2018-11-29" + dt = "2018-11-26 11:18:27.0" + self.data = pd.DataFrame( + { + "dt": [np.datetime64(dt)] * obs, + "d": [np.datetime64(d)] * obs, + "r": [np.random.uniform()] * obs, + } + ) + + def time_frame_date_to_str(self, obs): + self.data["d"].astype(str) + + def time_frame_date_formatting_default(self, obs): + self.data["d"].dt.strftime(date_format="%Y-%m-%d") + + def time_frame_date_formatting_custom(self, obs): + self.data["d"].dt.strftime(date_format="%Y---%m---%d") + + def time_frame_datetime_to_str(self, obs): + self.data["dt"].astype(str) + + def time_frame_datetime_formatting_default_date_only(self, obs): + self.data["dt"].dt.strftime(date_format="%Y-%m-%d") + + def time_frame_datetime_formatting_default(self, obs): + self.data["dt"].dt.strftime(date_format="%Y-%m-%d %H:%M:%S") + + def time_frame_datetime_formatting_default_with_float(self, obs): + self.data["dt"].dt.strftime(date_format="%Y-%m-%d %H:%M:%S.%f") + + def time_frame_datetime_formatting_custom(self, obs): + self.data["dt"].dt.strftime(date_format="%Y-%m-%d --- %H:%M:%S") + + +class BusinessHourStrftime: + timeout = 1500 + params = [1000, 10000] + param_names = ["obs"] + + def setup(self, obs): + self.data = pd.DataFrame( + { + "off": [offsets.BusinessHour()] * obs, + } + ) + + def time_frame_offset_str(self, obs): + self.data["off"].apply(str) + + def time_frame_offset_repr(self, obs): + self.data["off"].apply(repr) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py new file mode 100644 index 00000000..eec722c9 --- /dev/null +++ b/asv_bench/benchmarks/strings.py @@ -0,0 +1,304 @@ +import warnings + +import numpy as np + +from pandas import ( + NA, + Categorical, + DataFrame, + Series, +) +from pandas.arrays import StringArray + +from .pandas_vb_common import tm + + +class Dtypes: + params = ["str", "string[python]", "string[pyarrow]"] + param_names = ["dtype"] + + def setup(self, dtype): + try: + self.s = Series(tm.makeStringIndex(10**5), dtype=dtype) + except ImportError: + raise NotImplementedError + + +class Construction: + + params = ["str", "string"] + param_names = ["dtype"] + + def setup(self, dtype): + self.series_arr = tm.rands_array(nchars=10, size=10**5) + self.frame_arr = self.series_arr.reshape((50_000, 2)).copy() + + # GH37371. Testing construction of string series/frames from ExtensionArrays + self.series_cat_arr = Categorical(self.series_arr) + self.frame_cat_arr = Categorical(self.frame_arr) + + def time_series_construction(self, dtype): + Series(self.series_arr, dtype=dtype) + + def peakmem_series_construction(self, dtype): + Series(self.series_arr, dtype=dtype) + + def time_frame_construction(self, dtype): + DataFrame(self.frame_arr, dtype=dtype) + + def peakmem_frame_construction(self, dtype): + DataFrame(self.frame_arr, dtype=dtype) + + def time_cat_series_construction(self, dtype): + Series(self.series_cat_arr, dtype=dtype) + + def peakmem_cat_series_construction(self, dtype): + Series(self.series_cat_arr, dtype=dtype) + + def time_cat_frame_construction(self, dtype): + DataFrame(self.frame_cat_arr, dtype=dtype) + + def peakmem_cat_frame_construction(self, dtype): + DataFrame(self.frame_cat_arr, dtype=dtype) + + +class Methods(Dtypes): + def time_center(self, dtype): + self.s.str.center(100) + + def time_count(self, dtype): + self.s.str.count("A") + + def time_endswith(self, dtype): + self.s.str.endswith("A") + + def time_extract(self, dtype): + with warnings.catch_warnings(record=True): + self.s.str.extract("(\\w*)A(\\w*)") + + def time_findall(self, dtype): + self.s.str.findall("[A-Z]+") + + def time_find(self, dtype): + self.s.str.find("[A-Z]+") + + def time_rfind(self, dtype): + self.s.str.rfind("[A-Z]+") + + def time_fullmatch(self, dtype): + self.s.str.fullmatch("A") + + def time_get(self, dtype): + self.s.str.get(0) + + def time_len(self, dtype): + self.s.str.len() + + def time_join(self, dtype): + self.s.str.join(" ") + + def time_match(self, dtype): + self.s.str.match("A") + + def time_normalize(self, dtype): + self.s.str.normalize("NFC") + + def time_pad(self, dtype): + self.s.str.pad(100, side="both") + + def time_partition(self, dtype): + self.s.str.partition("A") + + def time_rpartition(self, dtype): + self.s.str.rpartition("A") + + def time_replace(self, dtype): + self.s.str.replace("A", "\x01\x01") + + def time_translate(self, dtype): + self.s.str.translate({"A": "\x01\x01"}) + + def time_slice(self, dtype): + self.s.str.slice(5, 15, 2) + + def time_startswith(self, dtype): + self.s.str.startswith("A") + + def time_strip(self, dtype): + self.s.str.strip("A") + + def time_rstrip(self, dtype): + self.s.str.rstrip("A") + + def time_lstrip(self, dtype): + self.s.str.lstrip("A") + + def time_title(self, dtype): + self.s.str.title() + + def time_upper(self, dtype): + self.s.str.upper() + + def time_lower(self, dtype): + self.s.str.lower() + + def time_wrap(self, dtype): + self.s.str.wrap(10) + + def time_zfill(self, dtype): + self.s.str.zfill(10) + + def time_isalnum(self, dtype): + self.s.str.isalnum() + + def time_isalpha(self, dtype): + self.s.str.isalpha() + + def time_isdecimal(self, dtype): + self.s.str.isdecimal() + + def time_isdigit(self, dtype): + self.s.str.isdigit() + + def time_islower(self, dtype): + self.s.str.islower() + + def time_isnumeric(self, dtype): + self.s.str.isnumeric() + + def time_isspace(self, dtype): + self.s.str.isspace() + + def time_istitle(self, dtype): + self.s.str.istitle() + + def time_isupper(self, dtype): + self.s.str.isupper() + + +class Repeat: + + params = ["int", "array"] + param_names = ["repeats"] + + def setup(self, repeats): + N = 10**5 + self.s = Series(tm.makeStringIndex(N)) + repeat = {"int": 1, "array": np.random.randint(1, 3, N)} + self.values = repeat[repeats] + + def time_repeat(self, repeats): + self.s.str.repeat(self.values) + + +class Cat: + + params = ([0, 3], [None, ","], [None, "-"], [0.0, 0.001, 0.15]) + param_names = ["other_cols", "sep", "na_rep", "na_frac"] + + def setup(self, other_cols, sep, na_rep, na_frac): + N = 10**5 + mask_gen = lambda: np.random.choice([True, False], N, p=[1 - na_frac, na_frac]) + self.s = Series(tm.makeStringIndex(N)).where(mask_gen()) + if other_cols == 0: + # str.cat self-concatenates only for others=None + self.others = None + else: + self.others = DataFrame( + {i: tm.makeStringIndex(N).where(mask_gen()) for i in range(other_cols)} + ) + + def time_cat(self, other_cols, sep, na_rep, na_frac): + # before the concatenation (one caller + other_cols columns), the total + # expected fraction of rows containing any NaN is: + # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0) + # for other_cols=3 and na_frac=0.15, this works out to ~48% + self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep) + + +class Contains(Dtypes): + + params = (Dtypes.params, [True, False]) + param_names = ["dtype", "regex"] + + def setup(self, dtype, regex): + super().setup(dtype) + + def time_contains(self, dtype, regex): + self.s.str.contains("A", regex=regex) + + +class Split(Dtypes): + + params = (Dtypes.params, [True, False]) + param_names = ["dtype", "expand"] + + def setup(self, dtype, expand): + super().setup(dtype) + self.s = self.s.str.join("--") + + def time_split(self, dtype, expand): + self.s.str.split("--", expand=expand) + + def time_rsplit(self, dtype, expand): + self.s.str.rsplit("--", expand=expand) + + +class Extract(Dtypes): + + params = (Dtypes.params, [True, False]) + param_names = ["dtype", "expand"] + + def setup(self, dtype, expand): + super().setup(dtype) + + def time_extract_single_group(self, dtype, expand): + with warnings.catch_warnings(record=True): + self.s.str.extract("(\\w*)A", expand=expand) + + +class Dummies(Dtypes): + def setup(self, dtype): + super().setup(dtype) + self.s = self.s.str.join("|") + + def time_get_dummies(self, dtype): + self.s.str.get_dummies("|") + + +class Encode: + def setup(self): + self.ser = Series(tm.makeStringIndex()) + + def time_encode_decode(self): + self.ser.str.encode("utf-8").str.decode("utf-8") + + +class Slice: + def setup(self): + self.s = Series(["abcdefg", np.nan] * 500000) + + def time_vector_slice(self): + # GH 2602 + self.s.str[:5] + + +class Iter(Dtypes): + def time_iter(self, dtype): + for i in self.s: + pass + + +class StringArrayConstruction: + def setup(self): + self.series_arr = tm.rands_array(nchars=10, size=10**5) + self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)]) + + def time_string_array_construction(self): + StringArray(self.series_arr) + + def time_string_array_with_nan_construction(self): + StringArray(self.series_arr_nan) + + def peakmem_stringarray_construction(self): + StringArray(self.series_arr) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py new file mode 100644 index 00000000..cb0e4455 --- /dev/null +++ b/asv_bench/benchmarks/timedelta.py @@ -0,0 +1,61 @@ +""" +Timedelta benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.timedelta for benchmarks that rely only on tslibs. +""" + +from pandas import ( + DataFrame, + Series, + timedelta_range, +) + + +class DatetimeAccessor: + def setup_cache(self): + N = 100000 + series = Series(timedelta_range("1 days", periods=N, freq="h")) + return series + + def time_dt_accessor(self, series): + series.dt + + def time_timedelta_days(self, series): + series.dt.days + + def time_timedelta_seconds(self, series): + series.dt.seconds + + def time_timedelta_microseconds(self, series): + series.dt.microseconds + + def time_timedelta_nanoseconds(self, series): + series.dt.nanoseconds + + +class TimedeltaIndexing: + def setup(self): + self.index = timedelta_range(start="1985", periods=1000, freq="D") + self.index2 = timedelta_range(start="1986", periods=1000, freq="D") + self.series = Series(range(1000), index=self.index) + self.timedelta = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.timedelta) + + def time_shallow_copy(self): + self.index._view() + + def time_series_loc(self): + self.series.loc[self.timedelta] + + def time_align(self): + DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index.intersection(self.index2) + + def time_union(self): + self.index.union(self.index2) + + def time_unique(self): + self.index.unique() diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py new file mode 100644 index 00000000..9373edad --- /dev/null +++ b/asv_bench/benchmarks/timeseries.py @@ -0,0 +1,306 @@ +from datetime import timedelta + +import dateutil +import numpy as np + +from pandas import ( + DataFrame, + Series, + date_range, + period_range, + timedelta_range, +) + +from pandas.tseries.frequencies import infer_freq + +try: + from pandas.plotting._matplotlib.converter import DatetimeConverter +except ImportError: + from pandas.tseries.converter import DatetimeConverter + + +class DatetimeIndex: + + params = ["dst", "repeated", "tz_aware", "tz_local", "tz_naive"] + param_names = ["index_type"] + + def setup(self, index_type): + N = 100000 + dtidxes = { + "dst": date_range( + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + ), + "repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10), + "tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"), + "tz_local": date_range( + start="2000", periods=N, freq="s", tz=dateutil.tz.tzlocal() + ), + "tz_naive": date_range(start="2000", periods=N, freq="s"), + } + self.index = dtidxes[index_type] + + def time_add_timedelta(self, index_type): + self.index + timedelta(minutes=2) + + def time_normalize(self, index_type): + self.index.normalize() + + def time_unique(self, index_type): + self.index.unique() + + def time_to_time(self, index_type): + self.index.time + + def time_get(self, index_type): + self.index[0] + + def time_timeseries_is_month_start(self, index_type): + self.index.is_month_start + + def time_to_date(self, index_type): + self.index.date + + def time_to_pydatetime(self, index_type): + self.index.to_pydatetime() + + def time_is_dates_only(self, index_type): + self.index._is_dates_only + + +class TzLocalize: + + params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] + param_names = "tz" + + def setup(self, tz): + dst_rng = date_range( + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + ) + self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="S") + self.index = self.index.append(dst_rng) + self.index = self.index.append(dst_rng) + self.index = self.index.append( + date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="S") + ) + + def time_infer_dst(self, tz): + self.index.tz_localize(tz, ambiguous="infer") + + +class ResetIndex: + + params = [None, "US/Eastern"] + param_names = "tz" + + def setup(self, tz): + idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz) + self.df = DataFrame(np.random.randn(1000, 2), index=idx) + + def time_reset_datetimeindex(self, tz): + self.df.reset_index() + + +class InferFreq: + # This depends mostly on code in _libs/, tseries/, and core.algos.unique + params = [None, "D", "B"] + param_names = ["freq"] + + def setup(self, freq): + if freq is None: + self.idx = date_range(start="1/1/1700", freq="D", periods=10000) + self.idx._data._freq = None + else: + self.idx = date_range(start="1/1/1700", freq=freq, periods=10000) + + def time_infer_freq(self, freq): + infer_freq(self.idx) + + +class TimeDatetimeConverter: + def setup(self): + N = 100000 + self.rng = date_range(start="1/1/2000", periods=N, freq="T") + + def time_convert(self): + DatetimeConverter.convert(self.rng, None, None) + + +class Iteration: + + params = [date_range, period_range, timedelta_range] + param_names = ["time_index"] + + def setup(self, time_index): + N = 10**6 + if time_index is timedelta_range: + self.idx = time_index(start=0, freq="T", periods=N) + else: + self.idx = time_index(start="20140101", freq="T", periods=N) + self.exit = 10000 + + def time_iter(self, time_index): + for _ in self.idx: + pass + + def time_iter_preexit(self, time_index): + for i, _ in enumerate(self.idx): + if i > self.exit: + break + + +class ResampleDataFrame: + + params = ["max", "mean", "min"] + param_names = ["method"] + + def setup(self, method): + rng = date_range(start="20130101", periods=100000, freq="50L") + df = DataFrame(np.random.randn(100000, 2), index=rng) + self.resample = getattr(df.resample("1s"), method) + + def time_method(self, method): + self.resample() + + +class ResampleSeries: + + params = (["period", "datetime"], ["5min", "1D"], ["mean", "ohlc"]) + param_names = ["index", "freq", "method"] + + def setup(self, index, freq, method): + indexes = { + "period": period_range(start="1/1/2000", end="1/1/2001", freq="T"), + "datetime": date_range(start="1/1/2000", end="1/1/2001", freq="T"), + } + idx = indexes[index] + ts = Series(np.random.randn(len(idx)), index=idx) + self.resample = getattr(ts.resample(freq), method) + + def time_resample(self, index, freq, method): + self.resample() + + +class ResampleDatetetime64: + # GH 7754 + def setup(self): + rng3 = date_range( + start="2000-01-01 00:00:00", end="2000-01-01 10:00:00", freq="555000U" + ) + self.dt_ts = Series(5, rng3, dtype="datetime64[ns]") + + def time_resample(self): + self.dt_ts.resample("1S").last() + + +class AsOf: + + params = ["DataFrame", "Series"] + param_names = ["constructor"] + + def setup(self, constructor): + N = 10000 + M = 10 + rng = date_range(start="1/1/1990", periods=N, freq="53s") + data = { + "DataFrame": DataFrame(np.random.randn(N, M)), + "Series": Series(np.random.randn(N)), + } + self.ts = data[constructor] + self.ts.index = rng + self.ts2 = self.ts.copy() + self.ts2.iloc[250:5000] = np.nan + self.ts3 = self.ts.copy() + self.ts3.iloc[-5000:] = np.nan + self.dates = date_range(start="1/1/1990", periods=N * 10, freq="5s") + self.date = self.dates[0] + self.date_last = self.dates[-1] + self.date_early = self.date - timedelta(10) + + # test speed of pre-computing NAs. + def time_asof(self, constructor): + self.ts.asof(self.dates) + + # should be roughly the same as above. + def time_asof_nan(self, constructor): + self.ts2.asof(self.dates) + + # test speed of the code path for a scalar index + # without *while* loop + def time_asof_single(self, constructor): + self.ts.asof(self.date) + + # test speed of the code path for a scalar index + # before the start. should be the same as above. + def time_asof_single_early(self, constructor): + self.ts.asof(self.date_early) + + # test the speed of the code path for a scalar index + # with a long *while* loop. should still be much + # faster than pre-computing all the NAs. + def time_asof_nan_single(self, constructor): + self.ts3.asof(self.date_last) + + +class SortIndex: + + params = [True, False] + param_names = ["monotonic"] + + def setup(self, monotonic): + N = 10**5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + self.s = Series(np.random.randn(N), index=idx) + if not monotonic: + self.s = self.s.sample(frac=1) + + def time_sort_index(self, monotonic): + self.s.sort_index() + + def time_get_slice(self, monotonic): + self.s[:10000] + + +class Lookup: + def setup(self): + N = 1500000 + rng = date_range(start="1/1/2000", periods=N, freq="S") + self.ts = Series(1, index=rng) + self.lookup_val = rng[N // 2] + + def time_lookup_and_cleanup(self): + self.ts[self.lookup_val] + self.ts.index._cleanup() + + +class DatetimeAccessor: + + params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] + param_names = "tz" + + def setup(self, tz): + N = 100000 + self.series = Series(date_range(start="1/1/2000", periods=N, freq="T", tz=tz)) + + def time_dt_accessor(self, tz): + self.series.dt + + def time_dt_accessor_normalize(self, tz): + self.series.dt.normalize() + + def time_dt_accessor_month_name(self, tz): + self.series.dt.month_name() + + def time_dt_accessor_day_name(self, tz): + self.series.dt.day_name() + + def time_dt_accessor_time(self, tz): + self.series.dt.time + + def time_dt_accessor_date(self, tz): + self.series.dt.date + + def time_dt_accessor_year(self, tz): + self.series.dt.year + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/tslibs/__init__.py b/asv_bench/benchmarks/tslibs/__init__.py new file mode 100644 index 00000000..815cf55b --- /dev/null +++ b/asv_bench/benchmarks/tslibs/__init__.py @@ -0,0 +1,7 @@ +""" +Benchmarks in this directory should depend only on tslibs, tseries.offsets, +and to_offset. + +i.e. any code changes that do not touch those files should not need to +run these benchmarks. +""" diff --git a/asv_bench/benchmarks/tslibs/fields.py b/asv_bench/benchmarks/tslibs/fields.py new file mode 100644 index 00000000..23ae7381 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/fields.py @@ -0,0 +1,74 @@ +import numpy as np + +from pandas._libs.tslibs.fields import ( + get_date_field, + get_start_end_field, + get_timedelta_field, +) + +from .tslib import _sizes + + +class TimeGetTimedeltaField: + params = [ + _sizes, + ["days", "seconds", "microseconds", "nanoseconds"], + ] + param_names = ["size", "field"] + + def setup(self, size, field): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_get_timedelta_field(self, size, field): + get_timedelta_field(self.i8data, field) + + +class TimeGetDateField: + params = [ + _sizes, + [ + "Y", + "M", + "D", + "h", + "m", + "s", + "us", + "ns", + "doy", + "dow", + "woy", + "q", + "dim", + "is_leap_year", + ], + ] + param_names = ["size", "field"] + + def setup(self, size, field): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_get_date_field(self, size, field): + get_date_field(self.i8data, field) + + +class TimeGetStartEndField: + params = [ + _sizes, + ["start", "end"], + ["month", "quarter", "year"], + ["B", None, "QS"], + [12, 3, 5], + ] + param_names = ["size", "side", "period", "freqstr", "month_kw"] + + def setup(self, size, side, period, freqstr, month_kw): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + self.attrname = f"is_{period}_{side}" + + def time_get_start_end_field(self, size, side, period, freqstr, month_kw): + get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw) diff --git a/asv_bench/benchmarks/tslibs/normalize.py b/asv_bench/benchmarks/tslibs/normalize.py new file mode 100644 index 00000000..b263ae21 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/normalize.py @@ -0,0 +1,45 @@ +try: + from pandas._libs.tslibs import ( + is_date_array_normalized, + normalize_i8_timestamps, + ) +except ImportError: + from pandas._libs.tslibs.conversion import ( + normalize_i8_timestamps, + is_date_array_normalized, + ) + +import pandas as pd + +from .tslib import ( + _sizes, + _tzs, + tzlocal_obj, +) + + +class Normalize: + params = [ + _sizes, + _tzs, + ] + param_names = ["size", "tz"] + + def setup(self, size, tz): + # use an array that will have is_date_array_normalized give True, + # so we do not short-circuit early. + dti = pd.date_range("2016-01-01", periods=10, tz=tz).repeat(size // 10) + self.i8data = dti.asi8 + + if size == 10**6 and tz is tzlocal_obj: + # tzlocal is cumbersomely slow, so skip to keep runtime in check + raise NotImplementedError + + def time_normalize_i8_timestamps(self, size, tz): + # 10 i.e. NPY_FR_ns + normalize_i8_timestamps(self.i8data, tz, 10) + + def time_is_date_array_normalized(self, size, tz): + # TODO: cases with different levels of short-circuiting + # 10 i.e. NPY_FR_ns + is_date_array_normalized(self.i8data, tz, 10) diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py new file mode 100644 index 00000000..978a36e4 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/offsets.py @@ -0,0 +1,90 @@ +""" +offsets benchmarks that rely only on tslibs. See benchmarks.offset for +offsets benchmarks that rely on other parts of pandas. +""" +from datetime import datetime + +import numpy as np + +from pandas import offsets + +try: + import pandas.tseries.holiday +except ImportError: + pass + +hcal = pandas.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplementedError with .apply_index() +non_apply = [ + offsets.Day(), + offsets.BYearEnd(), + offsets.BYearBegin(), + offsets.BQuarterEnd(), + offsets.BQuarterBegin(), + offsets.BMonthEnd(), + offsets.BMonthBegin(), + offsets.CustomBusinessDay(), + offsets.CustomBusinessDay(calendar=hcal), + offsets.CustomBusinessMonthBegin(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + offsets.YearEnd(), + offsets.YearBegin(), + offsets.QuarterEnd(), + offsets.QuarterBegin(), + offsets.MonthEnd(), + offsets.MonthBegin(), + offsets.DateOffset(months=2, days=2), + offsets.BusinessDay(), + offsets.SemiMonthEnd(), + offsets.SemiMonthBegin(), +] +offset_objs = non_apply + other_offsets + + +class OnOffset: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + + def time_on_offset(self, offset): + for date in self.dates: + offset.is_on_offset(date) + + +class OffestDatetimeArithmetic: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.date = datetime(2011, 1, 1) + self.dt64 = np.datetime64("2011-01-01 09:00Z") + + def time_apply(self, offset): + offset.apply(self.date) + + def time_apply_np_dt64(self, offset): + offset.apply(self.dt64) + + def time_add(self, offset): + self.date + offset + + def time_add_10(self, offset): + self.date + (10 * offset) + + def time_subtract(self, offset): + self.date - offset + + def time_subtract_10(self, offset): + self.date - (10 * offset) diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py new file mode 100644 index 00000000..af101027 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/period.py @@ -0,0 +1,141 @@ +""" +Period benchmarks that rely only on tslibs. See benchmarks.period for +Period benchmarks that rely on other parts of pandas. +""" + +import numpy as np + +from pandas._libs.tslibs.period import ( + Period, + periodarr_to_dt64arr, +) + +from pandas.tseries.frequencies import to_offset + +from .tslib import ( + _sizes, + _tzs, + tzlocal_obj, +) + +try: + from pandas._libs.tslibs.vectorized import dt64arr_to_periodarr +except ImportError: + from pandas._libs.tslibs.period import dt64arr_to_periodarr + + +class PeriodProperties: + + params = ( + ["M", "min"], + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "is_leap_year", + "quarter", + "qyear", + "week", + "daysinmonth", + "dayofweek", + "dayofyear", + "start_time", + "end_time", + ], + ) + param_names = ["freq", "attr"] + + def setup(self, freq, attr): + self.per = Period("2012-06-01", freq=freq) + + def time_property(self, freq, attr): + getattr(self.per, attr) + + +class PeriodUnaryMethods: + + params = ["M", "min"] + param_names = ["freq"] + + def setup(self, freq): + self.per = Period("2012-06-01", freq=freq) + + def time_to_timestamp(self, freq): + self.per.to_timestamp() + + def time_now(self, freq): + self.per.now(freq) + + def time_asfreq(self, freq): + self.per.asfreq("A") + + +class PeriodConstructor: + params = [["D"], [True, False]] + param_names = ["freq", "is_offset"] + + def setup(self, freq, is_offset): + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_period_constructor(self, freq, is_offset): + Period("2012-06-01", freq=freq) + + +_freq_ints = [ + 1000, + 1011, # Annual - November End + 2000, + 2011, # Quarterly - November End + 3000, + 4000, + 4006, # Weekly - Saturday End + 5000, + 6000, + 7000, + 8000, + 9000, + 10000, + 11000, + 12000, +] + + +class TimePeriodArrToDT64Arr: + params = [ + _sizes, + _freq_ints, + ] + param_names = ["size", "freq"] + + def setup(self, size, freq): + arr = np.arange(10, dtype="i8").repeat(size // 10) + self.i8values = arr + + def time_periodarray_to_dt64arr(self, size, freq): + periodarr_to_dt64arr(self.i8values, freq) + + +class TimeDT64ArrToPeriodArr: + params = [ + _sizes, + _freq_ints, + _tzs, + ] + param_names = ["size", "freq", "tz"] + + def setup(self, size, freq, tz): + if size == 10**6 and tz is tzlocal_obj: + # tzlocal is cumbersomely slow, so skip to keep runtime in check + raise NotImplementedError + + arr = np.arange(10, dtype="i8").repeat(size // 10) + self.i8values = arr + + def time_dt64arr_to_periodarr(self, size, freq, tz): + dt64arr_to_periodarr(self.i8values, freq, tz) diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py new file mode 100644 index 00000000..44f288c7 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/resolution.py @@ -0,0 +1,52 @@ +""" +ipython analogue: + +tr = TimeResolution() +mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],)) +df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"]) + +for unit in tr.params[0]: + for size in tr.params[1]: + for tz in tr.params[2]: + tr.setup(unit, size, tz) + key = (unit, size, str(tz)) + print(key) + + val = %timeit -o tr.time_get_resolution(unit, size, tz) + + df.loc[key] = (val.average, val.stdev) + +""" +import numpy as np + +try: + from pandas._libs.tslibs import get_resolution +except ImportError: + from pandas._libs.tslibs.resolution import get_resolution + +from .tslib import ( + _sizes, + _tzs, + tzlocal_obj, +) + + +class TimeResolution: + params = ( + ["D", "h", "m", "s", "us", "ns"], + _sizes, + _tzs, + ) + param_names = ["unit", "size", "tz"] + + def setup(self, unit, size, tz): + if size == 10**6 and tz is tzlocal_obj: + # tzlocal is cumbersomely slow, so skip to keep runtime in check + raise NotImplementedError + + arr = np.random.randint(0, 10, size=size, dtype="i8") + arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8") + self.i8data = arr + + def time_get_resolution(self, unit, size, tz): + get_resolution(self.i8data, tz) diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py new file mode 100644 index 00000000..2daf1861 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -0,0 +1,69 @@ +""" +Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for +Timedelta benchmarks that rely on other parts of pandas. +""" +import datetime + +import numpy as np + +from pandas import Timedelta + + +class TimedeltaConstructor: + def setup(self): + self.nptimedelta64 = np.timedelta64(3600) + self.dttimedelta = datetime.timedelta(seconds=3600) + self.td = Timedelta(3600, unit="s") + + def time_from_int(self): + Timedelta(123456789) + + def time_from_unit(self): + Timedelta(1, unit="d") + + def time_from_components(self): + Timedelta( + days=1, + hours=2, + minutes=3, + seconds=4, + milliseconds=5, + microseconds=6, + nanoseconds=7, + ) + + def time_from_datetime_timedelta(self): + Timedelta(self.dttimedelta) + + def time_from_np_timedelta(self): + Timedelta(self.nptimedelta64) + + def time_from_string(self): + Timedelta("1 days") + + def time_from_iso_format(self): + Timedelta("P4DT12H30M5S") + + def time_from_missing(self): + Timedelta("nat") + + def time_from_pd_timedelta(self): + Timedelta(self.td) + + +class TimedeltaProperties: + def setup_cache(self): + td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + return td + + def time_timedelta_days(self, td): + td.days + + def time_timedelta_seconds(self, td): + td.seconds + + def time_timedelta_microseconds(self, td): + td.microseconds + + def time_timedelta_nanoseconds(self, td): + td.nanoseconds diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py new file mode 100644 index 00000000..eda9bce8 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -0,0 +1,156 @@ +from datetime import datetime + +import numpy as np +import pytz + +from pandas import Timestamp + +from .tslib import _tzs + + +class TimestampConstruction: + def setup(self): + self.npdatetime64 = np.datetime64("2020-01-01 00:00:00") + self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0) + self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC) + self.ts = Timestamp("2020-01-01 00:00:00") + + def time_parse_iso8601_no_tz(self): + Timestamp("2017-08-25 08:16:14") + + def time_parse_iso8601_tz(self): + Timestamp("2017-08-25 08:16:14-0500") + + def time_parse_dateutil(self): + Timestamp("2017/08/25 08:16:14 AM") + + def time_parse_today(self): + Timestamp("today") + + def time_parse_now(self): + Timestamp("now") + + def time_fromordinal(self): + Timestamp.fromordinal(730120) + + def time_fromtimestamp(self): + Timestamp.fromtimestamp(1515448538) + + def time_from_npdatetime64(self): + Timestamp(self.npdatetime64) + + def time_from_datetime_unaware(self): + Timestamp(self.dttime_unaware) + + def time_from_datetime_aware(self): + Timestamp(self.dttime_aware) + + def time_from_pd_timestamp(self): + Timestamp(self.ts) + + +class TimestampProperties: + _freqs = [None, "B"] + params = [_tzs, _freqs] + param_names = ["tz", "freq"] + + def setup(self, tz, freq): + self.ts = Timestamp("2017-08-25 08:16:14", tzinfo=tz, freq=freq) + + def time_tz(self, tz, freq): + self.ts.tz + + def time_dayofweek(self, tz, freq): + self.ts.dayofweek + + def time_dayofyear(self, tz, freq): + self.ts.dayofyear + + def time_week(self, tz, freq): + self.ts.week + + def time_quarter(self, tz, freq): + self.ts.quarter + + def time_days_in_month(self, tz, freq): + self.ts.days_in_month + + def time_freqstr(self, tz, freq): + self.ts.freqstr + + def time_is_month_start(self, tz, freq): + self.ts.is_month_start + + def time_is_month_end(self, tz, freq): + self.ts.is_month_end + + def time_is_quarter_start(self, tz, freq): + self.ts.is_quarter_start + + def time_is_quarter_end(self, tz, freq): + self.ts.is_quarter_end + + def time_is_year_start(self, tz, freq): + self.ts.is_year_start + + def time_is_year_end(self, tz, freq): + self.ts.is_year_end + + def time_is_leap_year(self, tz, freq): + self.ts.is_leap_year + + def time_microsecond(self, tz, freq): + self.ts.microsecond + + def time_month_name(self, tz, freq): + self.ts.month_name() + + def time_weekday_name(self, tz, freq): + self.ts.day_name() + + +class TimestampOps: + params = _tzs + param_names = ["tz"] + + def setup(self, tz): + self.ts = Timestamp("2017-08-25 08:16:14", tz=tz) + + def time_replace_tz(self, tz): + self.ts.replace(tzinfo=pytz.timezone("US/Eastern")) + + def time_replace_None(self, tz): + self.ts.replace(tzinfo=None) + + def time_to_pydatetime(self, tz): + self.ts.to_pydatetime() + + def time_normalize(self, tz): + self.ts.normalize() + + def time_tz_convert(self, tz): + if self.ts.tz is not None: + self.ts.tz_convert(tz) + + def time_tz_localize(self, tz): + if self.ts.tz is None: + self.ts.tz_localize(tz) + + def time_to_julian_date(self, tz): + self.ts.to_julian_date() + + def time_floor(self, tz): + self.ts.floor("5T") + + def time_ceil(self, tz): + self.ts.ceil("5T") + + +class TimestampAcrossDst: + def setup(self): + dt = datetime(2016, 3, 27, 1) + self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo + self.ts2 = Timestamp(dt) + + def time_replace_across_dst(self): + self.ts2.replace(tzinfo=self.tzinfo) diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py new file mode 100644 index 00000000..f93ef1ce --- /dev/null +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -0,0 +1,68 @@ +""" +ipython analogue: + +tr = TimeIntsToPydatetime() +mi = pd.MultiIndex.from_product( + tr.params[:-1] + ([str(x) for x in tr.params[-1]],) +) +df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"]) +for box in tr.params[0]: + for size in tr.params[1]: + for tz in tr.params[2]: + tr.setup(box, size, tz) + key = (box, size, str(tz)) + print(key) + val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz) + df.loc[key] = (val.average, val.stdev) +""" +from datetime import ( + timedelta, + timezone, +) + +from dateutil.tz import ( + gettz, + tzlocal, +) +import numpy as np +import pytz + +try: + from pandas._libs.tslibs import ints_to_pydatetime +except ImportError: + from pandas._libs.tslib import ints_to_pydatetime + +tzlocal_obj = tzlocal() +_tzs = [ + None, + timezone.utc, + timezone(timedelta(minutes=60)), + pytz.timezone("US/Pacific"), + gettz("Asia/Tokyo"), + tzlocal_obj, +] +_sizes = [0, 1, 100, 10**4, 10**6] + + +class TimeIntsToPydatetime: + params = ( + ["time", "date", "datetime", "timestamp"], + _sizes, + _tzs, + ) + param_names = ["box", "size", "tz"] + # TODO: fold? freq? + + def setup(self, box, size, tz): + if box == "date" and tz is not None: + # tz is ignored, so avoid running redundant benchmarks + raise NotImplementedError # skip benchmark + if size == 10**6 and tz is _tzs[-1]: + # This is cumbersomely-slow, so skip to trim runtime + raise NotImplementedError # skip benchmark + + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_ints_to_pydatetime(self, box, size, tz): + ints_to_pydatetime(self.i8data, tz, box=box) diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py new file mode 100644 index 00000000..c6b510ef --- /dev/null +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -0,0 +1,52 @@ +import numpy as np +from pytz import UTC + +from pandas._libs.tslibs.tzconversion import tz_localize_to_utc + +from .tslib import ( + _sizes, + _tzs, + tzlocal_obj, +) + +try: + old_sig = False + from pandas._libs.tslibs import tz_convert_from_utc +except ImportError: + try: + old_sig = False + from pandas._libs.tslibs.tzconversion import tz_convert_from_utc + except ImportError: + old_sig = True + from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc + + +class TimeTZConvert: + params = [ + _sizes, + [x for x in _tzs if x is not None], + ] + param_names = ["size", "tz"] + + def setup(self, size, tz): + if size == 10**6 and tz is tzlocal_obj: + # tzlocal is cumbersomely slow, so skip to keep runtime in check + raise NotImplementedError + + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_tz_convert_from_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data, tz=tz) + # dti.tz_localize(None) + if old_sig: + tz_convert_from_utc(self.i8data, UTC, tz) + else: + tz_convert_from_utc(self.i8data, tz) + + def time_tz_localize_to_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data) + # dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT") + tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT") diff --git a/ci/code_checks.sh b/ci/code_checks.sh new file mode 100755 index 00000000..113186c7 --- /dev/null +++ b/ci/code_checks.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Run checks related to code quality. +# +# This script is intended for both the CI and to check locally that code standards are +# respected. We run doctests here (currently some files only), and we +# validate formatting error in docstrings. +# +# Usage: +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh code # checks on imported code +# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh docstrings # validate docstring errors +# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free + +[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" ]] || \ + { echo "Unknown command $1. Usage: $0 [code|doctests|docstrings]"; exit 9999; } + +BASE_DIR="$(dirname $0)/.." +RET=0 +CHECK=$1 + +function invgrep { + # grep with inverse exist status and formatting for azure-pipelines + # + # This function works exactly as grep, but with opposite exit status: + # - 0 (success) when no patterns are found + # - 1 (fail) when the patterns are found + # + # This is useful for the CI, as we want to fail if one of the patterns + # that we want to avoid is found by grep. + grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} + return $((! $EXIT_STATUS)) +} + +if [[ "$GITHUB_ACTIONS" == "true" ]]; then + INVGREP_PREPEND="##[error]" +fi + +### CODE ### +if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then + + MSG='Check import. No warnings, and blocklist some optional dependencies' ; echo $MSG + python -W error -c " +import sys +import pandas + +blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', + 'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} + +# GH#28227 for some of these check for top-level modules, while others are +# more specific (e.g. urllib.request) +import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules) +mods = blocklist & import_mods +if mods: + sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods))) + sys.exit(len(mods)) + " + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCTESTS ### +if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then + + MSG='Doctests' ; echo $MSG + # Ignore test_*.py files or else the unit tests will run + python -m pytest --doctest-modules --ignore-glob="**/test_*.py" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Cython Doctests' ; echo $MSG + python -m pytest --doctest-cython pandas/_libs + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCSTRINGS ### +if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then + + MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06 + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### SINGLE-PAGE DOCS ### +if [[ -z "$CHECK" || "$CHECK" == "single-docs" ]]; then + python doc/make.py --warnings-are-errors --single pandas.Series.value_counts + python doc/make.py --warnings-are-errors --single pandas.Series.str.split + python doc/make.py clean +fi + +exit $RET diff --git a/ci/condarc.yml b/ci/condarc.yml new file mode 100644 index 00000000..9d750b71 --- /dev/null +++ b/ci/condarc.yml @@ -0,0 +1,32 @@ +# https://docs.conda.io/projects/conda/en/latest/configuration.html + +# always_yes (NoneType, bool) +# aliases: yes +# Automatically choose the 'yes' option whenever asked to proceed with a +# conda operation, such as when running `conda install`. +# +always_yes: true + +# remote_connect_timeout_secs (float) +# The number seconds conda will wait for your client to establish a +# connection to a remote url resource. +# +remote_connect_timeout_secs: 30.0 + +# remote_max_retries (int) +# The maximum number of retries each HTTP connection should attempt. +# +remote_max_retries: 10 + +# remote_backoff_factor (int) +# The factor determines the time HTTP connection should wait for +# attempt. +# +remote_backoff_factor: 3 + +# remote_read_timeout_secs (float) +# Once conda has connected to a remote resource and sent an HTTP +# request, the read timeout is the number of seconds conda will wait for +# the server to send a response. +# +remote_read_timeout_secs: 60.0 diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml new file mode 100644 index 00000000..ef20c2aa --- /dev/null +++ b/ci/deps/actions-310-numpydev.yaml @@ -0,0 +1,23 @@ +name: pandas-dev +channels: + - defaults +dependencies: + - python=3.10 + + # tools + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - hypothesis>=5.5.3 + - pytest-asyncio>=0.17 + + # pandas dependencies + - python-dateutil + - pytz + - pip + - pip: + - "cython" + - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple" + - "--pre" + - "numpy" + - "scipy" diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml new file mode 100644 index 00000000..deb23d43 --- /dev/null +++ b/ci/deps/actions-310.yaml @@ -0,0 +1,55 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.10 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - bottleneck + - brotlipy + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba + - numexpr + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pymysql + - pytables + - pyarrow<10 + - pyreadstat + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - tzdata>=2022a + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml new file mode 100644 index 00000000..06ffafeb --- /dev/null +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -0,0 +1,71 @@ +# Non-dependencies that pandas utilizes or has compatibility with pandas objects +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - brotlipy + - bottleneck + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba + - numexpr + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pyarrow<10 + - pymysql + - pyreadstat + - pytables + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard + + # downstream packages + - aiobotocore + - botocore + - cftime + - dask + - ipython + - geopandas-base + - seaborn + - scikit-learn + - statsmodels + - coverage + - pandas-datareader + - pyyaml + - py + - pytorch diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml new file mode 100644 index 00000000..fd23080c --- /dev/null +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -0,0 +1,57 @@ +# Minimum version of required + optional dependencies +# Aligned with getting_started/install.rst and compat/_optional.py +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8.0 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil=2.8.1 + - numpy=1.20.3 + - pytz=2020.1 + + # optional dependencies + - beautifulsoup4=4.9.3 + - blosc=1.21.0 + - bottleneck=1.3.2 + - brotlipy=0.7.0 + - fastparquet=0.4.0 + - fsspec=2021.07.0 + - html5lib=1.1 + - hypothesis=6.13.0 + - gcsfs=2021.07.0 + - jinja2=3.0.0 + - lxml=4.6.3 + - matplotlib=3.3.2 + - numba=0.53.1 + - numexpr=2.7.3 + - odfpy=1.4.1 + - openpyxl=3.0.7 + - pandas-gbq=0.15.0 + - psycopg2=2.8.6 + - pyarrow=1.0.1 + - pymysql=1.0.2 + - pyreadstat=1.1.2 + - pytables=3.6.1 + - python-snappy=0.6.0 + - pyxlsb=1.0.8 + - s3fs=2021.08.0 + - scipy=1.7.1 + - sqlalchemy=1.4.16 + - tabulate=0.8.9 + - tzdata=2022a + - xarray=0.19.0 + - xlrd=2.0.1 + - xlsxwriter=1.4.3 + - xlwt=1.3.0 + - zstandard=0.15.2 diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml new file mode 100644 index 00000000..222da40e --- /dev/null +++ b/ci/deps/actions-38.yaml @@ -0,0 +1,54 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - bottleneck + - brotlipy + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba + - numexpr + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pyarrow<10 + - pymysql + - pyreadstat + - pytables + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml new file mode 100644 index 00000000..1c60e8ad --- /dev/null +++ b/ci/deps/actions-39.yaml @@ -0,0 +1,55 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.9 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - bottleneck + - brotlipy + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba + - numexpr + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pymysql + - pyarrow<10 + - pyreadstat + - pytables + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - tzdata>=2022a + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml new file mode 100644 index 00000000..e06b992a --- /dev/null +++ b/ci/deps/actions-pypy-38.yaml @@ -0,0 +1,21 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + # TODO: Add the rest of the dependencies in here + # once the other plentiful failures/segfaults + # with base pandas has been dealt with + - python=3.8[build=*_pypy] # TODO: use this once pypy3.8 is available + + # tools + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-asyncio + - pytest-xdist>=1.31 + - hypothesis>=5.5.3 + + # required + - numpy + - python-dateutil + - pytz diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml new file mode 100644 index 00000000..263521fb --- /dev/null +++ b/ci/deps/circle-38-arm64.yaml @@ -0,0 +1,55 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # test dependencies + - cython>=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - bottleneck + - brotlipy + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba + - numexpr + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pyarrow<10 + - pymysql + # Not provided on ARM + #- pyreadstat + - pytables + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard diff --git a/ci/run_tests.sh b/ci/run_tests.sh new file mode 100755 index 00000000..e6de5caf --- /dev/null +++ b/ci/run_tests.sh @@ -0,0 +1,49 @@ +#!/bin/bash -e + +# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set) +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + +# May help reproduce flaky CI builds if set in subsequent runs +echo PYTHONHASHSEED=$PYTHONHASHSEED + +if [[ "not network" == *"$PATTERN"* ]]; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + +if [[ "$COVERAGE" == "true" ]]; then + COVERAGE="-s --cov=pandas --cov-report=xml --cov-append" +else + COVERAGE="" # We need to reset this for COVERAGE="false" case +fi + +# If no X server is found, we use xvfb to emulate it +if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then + export DISPLAY=":0" + XVFB="xvfb-run " +fi + +PYTEST_CMD="${XVFB}pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" + +if [[ "$PATTERN" ]]; then + PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" +fi + +echo $PYTEST_CMD +sh -c "$PYTEST_CMD" + +if [[ "$PANDAS_DATA_MANAGER" != "array" && "$PYTEST_TARGET" == "pandas" ]]; then + # The ArrayManager tests should have already been run by PYTEST_CMD if PANDAS_DATA_MANAGER was already set to array + # If we're targeting specific files, e.g. test_downstream.py, don't run. + PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas" + + if [[ "$PATTERN" ]]; then + PYTEST_AM_CMD="$PYTEST_AM_CMD -m \"$PATTERN and arraymanager\"" + else + PYTEST_AM_CMD="$PYTEST_AM_CMD -m \"arraymanager\"" + fi + + echo $PYTEST_AM_CMD + sh -c "$PYTEST_AM_CMD" +fi diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..d893bdbd --- /dev/null +++ b/codecov.yml @@ -0,0 +1,18 @@ +codecov: + branch: main + notify: + after_n_builds: 10 +comment: false + +coverage: + status: + project: + default: + target: '82' + patch: + default: + target: '50' + informational: true + +github_checks: + annotations: false diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 00000000..e23892d6 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,4 @@ +data/ +timeseries.csv +timeseries.parquet +timeseries_wide.parquet diff --git a/doc/_templates/api_redirect.html b/doc/_templates/api_redirect.html new file mode 100644 index 00000000..c04a8b58 --- /dev/null +++ b/doc/_templates/api_redirect.html @@ -0,0 +1,10 @@ +{% set redirect = redirects[pagename.split("/")[-1]] %} + + + + This API page has moved + + +

This API page has moved here.

+ + diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst new file mode 100644 index 00000000..4ba745cd --- /dev/null +++ b/doc/_templates/autosummary/accessor.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst new file mode 100644 index 00000000..b5ad65d6 --- /dev/null +++ b/doc/_templates/autosummary/accessor_attribute.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst new file mode 100644 index 00000000..7a330181 --- /dev/null +++ b/doc/_templates/autosummary/accessor_callable.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__ diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst new file mode 100644 index 00000000..aefbba6e --- /dev/null +++ b/doc/_templates/autosummary/accessor_method.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/class.rst b/doc/_templates/autosummary/class.rst new file mode 100644 index 00000000..a9c9bd2b --- /dev/null +++ b/doc/_templates/autosummary/class.rst @@ -0,0 +1,33 @@ +{% extends "!autosummary/class.rst" %} + +{% block methods %} +{% if methods %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') or item in ['__call__'] %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} + +{% block attributes %} +{% if attributes %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} diff --git a/doc/_templates/autosummary/class_without_autosummary.rst b/doc/_templates/autosummary/class_without_autosummary.rst new file mode 100644 index 00000000..6676c672 --- /dev/null +++ b/doc/_templates/autosummary/class_without_autosummary.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} diff --git a/doc/_templates/pandas_footer.html b/doc/_templates/pandas_footer.html new file mode 100644 index 00000000..6d8caa4d --- /dev/null +++ b/doc/_templates/pandas_footer.html @@ -0,0 +1,3 @@ + diff --git a/doc/_templates/sidebar-nav-bs.html b/doc/_templates/sidebar-nav-bs.html new file mode 100644 index 00000000..8298b665 --- /dev/null +++ b/doc/_templates/sidebar-nav-bs.html @@ -0,0 +1,9 @@ + diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3582e0c0dabf9af69855863c0d13b96fd62f7653 GIT binary patch literal 396529 zcmZU41yEfP=n&7S%cXti$?(R;4ySr;}cXxLucyQNX0Rrqz_;=rKJ?d1Qnd$DY z`%L%r)SYR_6-CAAnHe|`;mDhSe~56*L`+2XMplS$yu6Gmo(`srB8D!8HumO>iiYN< z&O`tpS&32B(9WF7#FSoCjGBm1*~Q7#*hR(3)Ktmd-i3&ngXtfSoS~Dol%1J95a(~x z)PxAgXUfmdC}-+oXaWQ!0&nL3%;8UIzo%JL6|pC1v<)XwCe zL4Q^MAItt(#3*9#YUe@(V3e^man>ec`3p$I#rgN8OZ30R|5E-xao|@0H3G%|@-r%# zI@`NC0rdlI7V~hCPb{&7esvlFrZZ2`YS+}_RwNEBx#`lpRi96-eU zclMMR#aW1$|8X&jvl0P5rbC>K2=E_1b|O~R4?Yeew*QQA5^?;S3m~qBf{~S}@n2{u zTi~3tauNZbzp($hY)nMJ=l^v9L~I|%e^6P8*gmj7y6i-3|81_Kp^J;DlN}INk)4eQ z`259CoF8(W|B?H!jO)WPt`8Kh4-~Es6s`{xt`8Kh4-~F{D7r+zRbgUj zC~WVc4V+OXU?4FuF#rRLjfsJU6G#;S#u_kAor(ToFiJStyE*_p`EP;#>o6)asyG?i zIXnD~XJb!B5oJbEQ#VUvQzZ#upubcMjhuljA4~Nk=ADU{{`d0u3;f^9L)_8^7%Gh7 zHo))~H8r+3F=doBwKI3I0ERUO2ispUXBQ_^Lt8{RkF0aw&BT57gr6NGcLLW0*g56V zxDg1mh>|1t)aM21;rg^?aNC`U7Jg{I}NWbJS;ataS$t|R!*cd zyV(7b`VhxYU-+^nqw962n4Wt2%k7E+v?7e)#m-6m0V z90m}LrC&-dG4+(v%4+>zw!frEx6Qc5CS^Z?eX;7B820T`Dz7glyX(t1Q!4L2u=vb~ z!sK5c#D@mcK)&*8rRIrp98pO>J;Hd%yWC#tUN@OwOE^HQx5lLN_x()n*I%Dq?@uUu z#2}(PLD8f8pUYX{@L{Ooa28CLu*$7umW{Gk&4*%J@%}~P zdX+l>`VdBK%ZR)jyWE4qq|CGYu&0n)pQU;2k3-Ah^^uqN=c+gPVAe$ABuqBQHa9-0 za;zm1>j^ajOf`)}f@jg8O>|dsP3a4mX44wS4L=GuNA*#P?jvNasU$efku0n#dSPf`}fU`U-v^)H4aJ>SY z<4qWy;!OxQL)e>dkNg2Yz=W9)@S!Ig{@fIYS1E+JERI-Thxf;IDr4#4&B|G3AQtX^ zcP}s=37T{~Y&~{QJSG;cK6IXv>*f6a6wHj{XIQ^>Q_Tb-KnTFF~&P3Um}bsLm2Si9AS8Pn*aY zIXFR!$FX_{!mc#zM{G-G6(U)fK>xev(-fhUEojM&tqQ6&J);iDBu8tuh_LoauY9+V0VA%U|h?cQ2=Oz0ky z!W~A$ZYFIA;=}}E)@f-JPMi$iG9lf*4bUgY(Zg$*GC|e@lkybfa=dRE6LKV`vTrUV z446~vH|lNVNH}hHE^3%q6;-XTh>?(NTZ78f zQDeUBJK57nC0?410SPoywgsHQltc1?ir@O#ng)#~h_Nw9J9duOr?-_z= zvIp?hrwOq00t}y3+50W2)fHzBdhnf=>KTeu@v>SWU<1ezT5FFY;wDg_bl9dO!5Ktw z9B^@wS`iF;O(Cea0YRXtmXm%HD7>ktHPziZ)(@xfjb7^&j3Qw_QqjP5q@x^HDb8Ez z409bosap6&PID>}`YLY)Vv}X)F23&kc>eSn+yu19m+PiaQ0C?}ujZz^90rWrH6=OQ^g+ z!9QZ+qeELR#6q>^#i(ARh8vtcu*y6P>B>T`nlO0sOAe}=tN7X2hkjAJer*}I)-2-$ znNXAsuO(Voy&vXBF2^x3x1Q~lgKeqC#L|#fjokzCk5KO{q6W;#$awH?c5oT=KP#6` z`S^d1c-h5FZS=MVo~1BJ_SNzjvT)>0JwX_QyH-;QU^GDKJTS@4ho6&Dlno@vW3oum z_qr5N`!QXsm4M5ZWPqf}vqCBwT($wp9Pw0U30i*WWfUL*ZC`Q`97yIMdc%RCL7Sbh zktTKubh#M4QY2Hh6&UfF0_9!7wY)BUvnOz`s1b_Jlt^S zJP5;>%Bokt-^ASp(49~^#V-&#Axzr<0D%K8a1=(V*Ay7W3u&P0MbO8lH5Bd3=T=~U zgPeTu48HZb6(N7dG_!WEJ_uc1^?Q;XGZ~w(C~Mwg#32a!Kws;Rz3O-~z!%x+WCOd^ zqagl+h~!U5MrSeA8t8$NqKs`$a{HK??k)%1w!KaV(y=7U2N^it6dx;7sf;(Rnvv6` z^>sw5ZDPaOla-GlQ*-3N3Av3-*4P#Qm>3dWU<>IkZts`ZsN?-zoV6cPmv#InhU3EW z&1p&YR4iTRLOpErLw&VLwby>0c0+n}-bGMiP4v)S4~R%%1i&r-g$xO!5o-L5uu<;=L@@Zhq_N z8qlTDUh8+#+-`;-TI|ybTAiEMu4>hg=}2mno5My}x%U z2qMJ?4Pda@E8C5+0cXmlIT&63n>n2PJ67by(&X8KnUcr9HODjc;;6SxLCZ2=wU4{T zPWZNrKtdXK_l>G<&Bhu0O?sBVqz9o>pITVaq!$y{cIe*Yy$84)+mL7{|L_a<5}VLm<%t81QHdY&*r)il`o(lUndY`tZ7c5OVVnSxRFe z+5*nX`?@Yt6bBY@-nRSWu92H)ATMIwA!c>uePHr3_Tkr>z+PMG8}B)pUjku(T{7J}V1;~kdW&%)rX zCTQj;*Ns-9@Zh@@f*gf6^he@nVCcpX=Q6Wl#L6PV#?#H>PS0Y8M*luuRHWNev*WVs ziDV~{N7UCdO8?39$M@?K)VsrdHjK zFk(rkX%3XJ+&}~*Q#CJ)f$cYPr}9Yh05_vLp*3RQUgICf=5c(Y@;@#L0u9QI0cIb# z9no^u{&q|%7D|%8Yz{O_5W-W+^E|}$Yy`-YYgljK9 zLz_I#!rIt#-kw^JkWHzMpPzRs}K^APB*6Kg_baYC!V&3h>Q4^chzgK~MiP~Eoqvf}?q>^M6k z%SgIRVC$1S)ktz92Y_20q07D&huurDFt;8mgcJipsa{cQJ0*G^PQY>1mEudX$p-k;g40>ZMI@WIY> zf5fxyT&gRp^AlqfxDeFfyd_{w4)JyfC=->>{M}$m;jg5yw+P>(-KOJ_ zRs^8h6*t;HV;=L$G~SIqq12KF=`_ILe_ks%DEU{UWR} zvi^DLm}Sk}JyO8F;ei=AC^h4Zw(1nYF{hOK>iEweLlN*t_CkX45_){yot)Uowrm}G-8fl71Tv$Z>N1Q~2rvt5 zAz~uJg+-4>Vo7zK(84k+?|H|+&~w2eIPwowe`is_iA8<$8KqMGhP1Mn6;LFV(JfF+ zwFGEriRdgG3t>4@&^x%_C#ST-IV-_+5BOY9vw@$l0PrrQF=z##I(Y6teZU> z0m{WG^6ZqoIX~X-2^KQKqQ4YU(ju{@eRFMS2eU027`V4~qCt6m;}S>h>Gog!$%7z+ z(5xcvX$HqgfQ_C8QtIg6{$)3;d5hE>HPjD#wb35aVKWRt-zW!6&rI4B!y)We6w=2t zbnW4gy%54yVSTyPQs!`2-#Qid8`*uL2JKVefeJ5287nPEELM}QMRx4+!mpsyiBw#s zRLzHhTh(ysB2|Ho`bY5U{oo3>>u;9t@AnRvac}bxhWBzUU||(s+wU!zUiX6aEDmal zsh#T`&(p6e11T>61F3o5tX6a4gbikNt@MI3$XR3dlNDzO_dgj}vK)2UrM5ly?51Ow zr(}YEd}GB>S7aq07^*HlXXp;I-xp@2ABvVLHYUPK&e-qu{eJJVRcEV=#G(=-TNG() z=9IQ@LU0I+6zNHGMLpRmezkk@e@2prN7G7-tgbXSXC|*Ww$a^Kvd}T3 zTcreElRz~wkjB(hi`cufnj(k}N#>|m`qY!uN;mrLU172DPTMDkyg9P6qT@8Oa8l!!$%M{8z@tT>08dN> zd#H?)Y^JH5etu+JVzl+H&$z&`Y}6`Svg7-aFn$~> zH3NGEx)GAVWLoIjAK^|qQR96yff>_;%*s^H0`GzBdbrYGOfaqE>*O-g`-JDc9%d1- zFEi`#uk@Y)yo?^|Q1ML&(`IeFW(0#_ZUFYIY0B8Z+ywADqem$9ZUL+__Kl5asH^L) zHrnRQ$cpKyH0QE;I_zmO8 z?0`9>sVU3lgu0;V;VEp>sYjHb}7h#1F5UAY1%5@=ZoAYRE_sL?%Y5b$SmCgT`(<-mS@C9G6K?XE zVKU}Ap-9@9OsD)|qLZ02hdLXEvDRI`TJC!PIJHn- zU0eUerMjDf)5cfsfRHwMV1srE_J3uFvXoLPn=2Lk60Twywi{FJa#$qoa@NpLXmkv8 zX!2g|7r}ER*^#Au_*S~SmH+6;wa!n6W%I1+K=%3rBnC)KE^0L~C9l27pOXM*O6v#5U-F zyMz*&+^|G~dpBMd(qJ`F9)eT3(4w^{rK?~xypE+yd=!qmr6*9`O3-2nAWFe(7%`L! z(YQH87iCmCi>@~ecQ+~`*VwdqfB6lG6U=iVCT|LK}cGE%;n zFQp{E0!@1KqeH)(_|4-nAjeIdPYguE6C$g`z*_%sf3tXQv zr>{dSS)>I9+>_78%@wv*-{$ZdV_@tgSy5#-1lWEZ+Uu%Gzgm z(2S?1axH(PGD}j(or)PQQ5jOHj{Qxqs==qDVUR@Tw;QfTVSK~W&x8@T4ozC;JNHbM zZhG`2**RB6*sLQ+NW%%yPsRaB!eFN_n>Xd7u+@WR;q2_6RI6*8k&4Y$`;uPBvNP-d{6dm^>b9_ zAjgjJ1E)rU%IO&Dpx6jK4;+JB&=S7zam?wp4xt`&G(^Ciwcf+DHjWh%Hnou)$PE^~5%$(y`XY-*lnuLiY^_`^v69%{Y@m{uWF8F?CD zMIjwJQGxQ}J?uap?v~`spJOKWJ;WFq(qLcbSg+?awmdqtYUC@3Lu!w>_X7%ZFm@Tg zZb?6e4fpt7w=PJ?+7L~$5dFHo*5(oDS!j1?q9erl28eTf4IqE@N6~fqcW3t#r2$XAuuf(C5drJ>U#`<`U^viOT1Q;uB&Us>`Kh}+$ zy*qw>8Ps+rmPI*0B(*%ma6oN9ibC9%!I&F;;KOm}iWHm(x-X=#Fr{nky;AS+CZ+Xi z>getgi4Iv8{W*8Omvg#>;a`BHZ>M+lz+;iMCxKn7l4&5Fx?cHmtB)l!{BfHzQIBS?jX;3rHx~Pv3s{%#|F#=t2ImEQc-^nfN(yO#^zq#=?b9k z83*Z+do8YTz9%4MDx>4fba%$d1pJKOX_8c;A1mVF$L?V~$kJGy8S*f`Ir|@TjDeBV zp)4_CLr%1Sfa6;P<_vP)pBhmH;squJj(V#ep^cKSf{;znU~haSofCBPg_$B)(P4zs z`9n0$=#zwmf*YO9@aMP=*f(ETYA=qxM#wg1PR<8AqH*~E4h#*A3|0tASs_on6Aj4- zc5D)vA`0OI_Hb((TW^Vh&bW^aBqCqIj*ey!loG}XvLd4hvz9#Qjlj$wnvm#lVXZ@U z0)EuR!Qc)+DuncB7`DPFW^?NNx$y7wasTWr&4feE8IlngAArQn%dK>6;!!~vgJHro zyhd*1V4_pgpU7N&sE9?KHH`mgmpJ$00oYnv0Fe$k6aDh^R^HGf6!Tivp0zca=vbT6 z935_qLZmh%EGBA*I5RvXUkiK7r{-vb0{XyOWqPwU_Pw2+TP<2b+wuwPFuJnCRMP>j z#X`RYq*-<6C4+_cW+x$Vd_Xnt(wCH)BG7xrl8xzi=a}yJ*v7?~$t8m)jr${iJdYkE zcYKF$_T(nH!olShrPBz$45d>`O=4Tee%Z$6i|A#P!l9i$rL+5(a$Cpaz{ZG?%z`vr z(P=%@4%?#&4_KhNp#r89x8J^^hnfV8=F}Jk^k@UO@aQvaw$3lYBRr1H^kfU*37Ka8 zWh))Je-$1+5k2)wD)5LH>(L2XioV%6H2mzTU4)j!GQ54Z|av-;rT%BJNErbFit_7Qzt|rxBb}y}%){s-6*3p^^>A#lQhZcuGB?Y+L*uLN zBj&JIloGXT0M*s2+GY0FwatvvNkN_&nu)xbTh_HoRC3(CnPVc_>XOqui*N0od70GJ zEk8|FWLrH%skI!{%2%JtOzAYQ+$_RH8u zT-u*LDT?UpWzs*GA%Sv`$8C>z_>DWbAxQ!d9*0EKhA@gt@ z4zN=497lXmUd_)G;Ip{C8k(jn$Mpwo<0y=tva|y=PDzG<^ElOzw7WBu6`K^kd?kmZr|>x(N5Nu-SoPT-aLQnVStA!tR!nY-`r! zt!{;4_vtAWuNVIGtu{*KlZzJgI*HkO z*H1zt%kHpR&C}$^E6@Bw6U$E!gxWu;EmpZp@{G^CA${ezzU7kM!AV~6iQSLpw!6r!mP+Y#P6jB$(c zdEa?Jx@h$VthsZ9zNnw=djENccG(p_Bo-9=m$jabY!SVQRRIE+Wk5|&j41W%!YjmzDAhGf|^!egEFNeyp zGn2Uo8FswXtnJt)uyx_*;$bbdZxp~avJ*Cf*M}yRWpkcNf?In|ZEOyI;Ma{!JyP7; zK-e`lI#a9ktv|O)pU~?JJF@!vCimu>=87^! zE+%&1D}iIUhs&=)NRExjr2`~I!!KKVFMFytpQ0FeK_LZZzOESs;EigU%BF`m`<@&9 z!o5J(-~MWLROWM^Y5RSd?%EYSjc$edWsW`C-&cM?lt9c=&REC6-W{4d4*{aVc8D!r zz+J7TfBD(gJ93QurBbORME8frQM0PgQ{X8v` zP7GGiutKZa{Am7KbArPL2uHB(t?ywE!} zH3RiB{j#X#wN*~{6r}>Uo^8>ww`4l+ zGnL*}>5Vmy#&Vr7Z2FIZ<+c@7W`)=*D;wh!;?e2BWJ{f*)s;+ zuCiZjs%-i?n}ORrC)iMo^@P?SOkO`?AGssMquk>LG6ObTSXNB|?pDXmBN__ZXS->G zQQru^KNEajU-eik1O&Dx&j2zsjr1Ug)!IMQ5|GGuF4hvw6U@SOup)CwZw|3-p_&#l zuR9Q@vti~pjJ6cY%u!I+yaLH zJ0!hYPY+fdLe6C7sJ_)8mW57^7F7>_oL=3UHjPw*vq&Z}ii|f{H)L7vJjGf8O3=Zr zuYVe5YJ=^@{DRWAeLdlWU&1DkQrYteVT?VEjeACyjBj#V;X-D`QGi#zsXJiMIRTbT z_1@}|B7;VAkzXU=sn*N|^z66n3`L(Wi^I%033cRD!1laOGS+4Mc=P*iH2rYbcZFG9 zY;hX;=4bo)bZ6V?Xm=`7>RV`=Tn;0tuBl{-HjdgQ#;1My6nhbF^_eg%G(spn&!N)? z$@nF1J^cL;sYF&t`wZA2)IZ&_J(@V3fhqE#SzZWOQMYaLtDm!J8C=~Pu^xd2_CgGh zKmK%2{S1%unarinEg9W3yn^fSjfo?%P)J^d!b%3=1UuR9Q<}eBajCws3!9Ak-jCLK zaCVEXc27S;nEN$HiL>q@d;)XF8CV3_tE(vH;6R2>-$>`v&3q8ZH2loS8B7k@ZA|om zSH4yjw927T(Yq0FO(pz>(O}`kK-XeUBdN;hDBU6_%PB&62gl|%zOQd99t9N!pA)Pm zk1*l<2^BIDMn)1tVjhC1+i$*?J?}~Nd_9Z+W3SE)VOOG_8$@FL77I?N2bwV#)F29C z%pB&Cs*ASEaS~`h0 zxpwZC*-Xc{KSu7X6AU=Y7q7(dXR=${ju-u`@YWnL$vWm9n?6W`Tp{F!sX8w&)OkfiKCs9cN94E4nA#c))Ln6|mA^#IWO=ei!Q@E&V&4YVK=6~YA{HLFIopO}?y=m| zj)tIMXaZ!er3i?r>MJkIb^Haz)>ZxMwl#i`&);A zDi1bVY2s8~Cc2~ghAfsE8fW=b)ej3_N~n(tW_)LBvOHS;cbI9LC(-=y&xX*SB80z+5*l8`@o0IYBuQ@H0hM?%o^5FJxwl>65gbVXj5!qX;t9(!U%Qwp7 zp_c%xE6m|QQ2M7@o&+_vi;j1VS#AMXkTH2)(Je+HAx-sWtgF*X%cd53&gmccX1ZL( z8lr_`yz}1YeL6WM%OPSzfs+_j&}&5?I4j2%w9~GEf5N13=QF=%5KVAro7TQfm52OM zW>9D^wM?S?(!mQI9w2)#m=T;tu9X$w`B_@#9gI|t-LK&FlL+Xua*WLkvq51x z+^;%(Hf}*&)gVN4ZmZ$*X)C`RL*}8wH_rL`alajQ2WB<`y{F&QpF5ZQoB zp}!G|a_{^VeJxoac!~*;do-ehg;>jw?WZ5QhiJLF27Hv=Ib*~yZJh`_49JC#^yj(V zjKtX0B-9eXs=Ye+e^q-XL?2aq#lWHlFox5;?x^Z?FH?wHp^Dn56ck3|$otPJbxrO% z!8*mse+w-LrQuj`Fmi zwnF5<+vpLthq80!E8B;$AM7uS-tWGM&YTHgxw+_CnSsXFxX+L5@H5*Dce*g+qq-dn zr>vU$hn~G6_ZZi+&b@)I2X)Www;Fg_99Oj~N6c7EfhuXpe8?W9^I)I6kho724~tS7 zMf=VfBh(iH63{Wgac4-an7$7R(ioo+qPcgLjhWPaqUe)9L@y)T3U-IWlfs71ih`nX zFL5VHqDkS#<;AT3;NPW>BW{Jk9GD|J*-sb6P+D4ytEQ-VG~`XEVqQS zubKVD7=4VwiY|S8X{8J}oEQ=VJF2LGg88&gJW*=ss6=jaLvblrK(7`;7pr>G3*J^|OO(0j+n^n5JYJ;e}ws*S-5P^l7Ym7jbqlZ$eMxr5m zTj3KkDAX0+rmMKnq|YZa4<#T}Q8Z;vb1d7^Ygrok9Km&pWXc}S7ULqUh5MAu0wQj| z0@hRjvFu|8bH%sWg6lBoDyO22V0u~LCnyQFp(tn+7>1af!U(M?t<%^h z<}kh#%A25x#AN1qbXorWASO2V&HE-qw0aBajv_12L2IZwXcDb(X=Ji8$1Ig>5;MLu zs7~iRF~t}Bi7R#_38=lfZapNj-|z&9GHx&vhOTq6COB{>X2&?U6*{34+;GJmyLGxI z`{6)R^nd|y+HJ`)E zh=o4b($|FX`tdVU+|Z5P)Eedrh}l(eqqp?3)qRe9VFn-bn~_di`<7j-i;C5^TdY{O zU5c2g9C>bioGV&eBEc30{R?(nY`1We?Q2bLBl1_=>=L&@F$QC2Lgo;bvwoI%1bluO z2?U31kMZjSx5~|pH~hD@!Q9YwzPHu6ri4t=``xGuoPZa|X-5HKKp1T7`m3}S3UByW>citjvj|pa!YsH9f zx-4C5dVAHU?q6rbo7y2fncxO>F}sIAzJI~IvvPG_Sr8jV!D9;}sWyddLp7_0AJUhAomXN`| zx~^xIIJYZ2-jQ|lQjYWR$^p5qczsMiO8gzbxK%6(C6-%6yHv%a@o-n&xS~k?EST%2 z`}7LcGGvw{p&}Gppv{P>+a8Kf@##vD1(NNvD;1muiY@!OVkjf-L+U8d6 zpcPVzH{5O^;j!Yzj@>|Qt_g<8+z!;^_6+L=nQ`F^U;{!>fxr&Q+qv#hn`I;KBn=fD zgu*;fNW!7<2v5E)_|p}sGXnQiDSIP8gJdcMe`*O?zpJ#Tv>eOHLhJTjTBXxppo)D) zYo+HWxLlvU*o^FH#7R{2u~CZPaeQA{(G7;T2AK`I>-X0Ou#&hDBj;9D)SMfX>7sO2 zEHiU^NS4rD#TCgA57UuNyUZU8ScK8NSh*9mnbZzm1tpMz3Ky0bb0=O696yqb-6u}1 zT-lDTk_BI?^Lf5~y0RK;JyNKMNfFMISdR`Nb?$imjrVk(nK~r9Gq#mqHg} z2Chm~Fu93k8_$s&vk6o|kqx^`HzzetAcuBN*Uy%UbOq(#x^7HBXCF+(oVR%M3JFDK znB%&z40+4Dz;E0NDA#B>CBZu6tWd`^deu&}eidd7bPQfX1D`l>i!i}CP#KnN*|`e*q1H40Z_0~?*NPr? z8TV`w%nS+7He7AYy4%R`X z=&H>m9lv7w~32*(pkc}iexJNf@M2b_XBUO9c=KNkmaq@3ULi@NeDfEz5`07 zclydah>r!2UN0Vu9?jehW5FVq44rNJAqZ!@kBb$rP8Nl1$p zx8`ye-pEyLkk&ly?xL+2?-J_?E?j$fIydbj;nQjor%4o!8Usd8!y_lrZ58$nYS`vz zxfOCdN5!C8i;I4X63?QWGupc~vYRzCnT&PwG-r;&67R$dA*7Bp=oT~tseXE-N@FI= zKs{o0S2=FEd)b+Y@_RN)K^b$}xJh3%=81bVtq**G3{Htx|`0%#DDH|NT(7MS6Ro$H?C14G*CTig{Z(dLP=uNC#+A zH0oD7LWnQSxv#TdMB z+2ew3yoi(S_Y8|4!{Eb+0cp%?J5DrY+n9-AEPvz>J&Opk z(;>>UKBP>eyS{kACro^~h(0NEE9#}ehWI9>iS<-~hc1aBsB!dJr< zLQ6Ux)%{h%_&fLGN%Gfn#n5?1Jj4(}T(*?VeUbRu_`|yOOPahD2eJwTlh!E{f2^%Wu!_AI%p4$UFC^; zz(c~I%SXaWa_mE&teBF3%XLT_Zvf-g{32Gmac84B){yU1KTmw;S>KPI+)hbTlrBZ5 zK+IK;FhVFHmQqkN84=-B;B<2sKuM>W5?c&Wx{ZpL8<(sAg2EJ+c>l)=cLv#>B~}$0 z->CArGRVs67^mA7|2qU33`kHZo_7(b1)_A?a&nIrNIhkasVky27y^Bx{cL;s$^7F% z&GDDt);6`?9;32a?syn^&+XkrPb-4@zp300PRq26bOI|%tSns-V~~HSxAse9tGGVU z5@yF+HPFB&goZ6fF$9B7U%Yv<(5D zZvIB_0gz!D(wAwg4+H-(84pe^@{AQHP;+4b3LIr-GxvAMtfj?u>^-6aNNFjV1woym zIwIC>7%~s3Qc*&${UFr>e4OMiXWbT0IYCzUw56+>p+z~ z&{t`+&{IWqi%^!mg;!ai%T<~1M0Al5*>xsdtNV66EXDSxeU%BYM$g<3H~x~p2NzQ~ zIS!7G*m1dL0d;-cj-^4$>K81E*GU5>0E^hI98)h1rihN;6p|-6t*$18VhJ?Q<1*3M zAGIHBcsWbyxSSdQj>r`nnpq@5^+C*FF{jB;K8mUZCE@3M3PB(R$^fnidQ8^Bw9X&2G?2h zUYI8pV_hg>Af7J5+vbWH_Q0y6XU9V((O@$QP>5hdFs+Ut1!6mlArjTP;*)drvw=5V zYTGg-qK~0E84q8&*k}}7azU`E8g;%P!k;g}(3zdXKeId?aB&4C7AuUF%6CaRh?T~bP&89`W}?`F$B9`4s%8%@9UX=9tc0c4 zwFJCkQ%<(DnKdRnvAc_^U&sChZmQ{Cpu2e2dm+@d@34l}>9gW2f}AwYqJ z&RlYFIwB%w{yZ4Wz%R>H`CZdGv~qD@8?iDAPO1r79(Xw((-*=I){D3=m}R3T^F@o@ z?7FB~8}rtsln$0v!NvnGeCyOX^yt1qJ!LvJEyb{;G8@Z1mZ{JXEQ!^Iz`{EjX2rZ$ zzbewa>uc81j~Y&b@0z-y1S)CkpOeMDBU!`h)EJy|aGqMrcKXLkz`1@!YPlE~aRRp( zR*y~0y%GRT`OS#TlVo*zkiWj>^7^&O5mbe;O*;VGqhs^@({eAl z+@xGuVHSP)Lq7QTnmd3Fd@ZwwaF*F7A%cdnedILqcz0%H+}!47L_%$)?F3*v!QgOk zw94>!7el|!jWhh(DI^IORQ|%5a_e))D|opuMge^&>bETDod4qPP2j28`v37LN;1n# zhh%m*$Kf1`B4e4!6qz&6$1D^wCqrb)ETRmVO6DRYAt^#qGE;~W{nt6TQM%pdxjpxO ze}6CRz1N=BXRXh;*804+uI16|yPiLdLgtc-Ki}6_KcIhr4u}0xV?^>H-#OK2-vLS5 zNafG-)?%yGKH9>14pqclj!sjvk~gkNp&uFRH?xa=gdRE6+H&R8LQk8#`0Ne*8W_Wz zudl8`^C$FM341m~1&!_@FTc>-JJt33kK_{7$qnV{23(A~VEgn6>E&yaDUzBreg}1O zaE$FGxdh#WatiDNBIu+cHDc3EN9N@ULyVaEPcYxW^->`^a(Bk%YGmu>7rMk|ITL%{ zExevzS={l7X<TZ4%?I) zoKJW7zKmV)uqx>NGUoBQfl7B~;*iZfdwNm-r00g`_M+%N%$(HrC7B-V_J|l%Tj9)m z%tcHWeBy!tLWDBgpTB|Z9gEpf71Q?!PdK*#+94^PC}H>2heJZn1fmYz%MpcDGr}wy z2VTsJJY>T;94{BFE!`A-e6Dt%xPzXo>ZK}iU+a2fo?Dho&y=1IrV>qAJ}pXne2Sbj zy@Md9`w6e}$S!$YK7_;tPSb&wdqVnf6Kg628>F z^mP@#<}20FHiFwQ_ORqas#DRHLL>e~6XXd_LJ0%bJXU+&*}pSp$`nB!FfQPF)TE!Q zNc*_ug{@N1xv8&C_nyy)1%9fiNz(^@#u?r-6$AvyMY^)?wHLWg84#IRWywffIms0m zvd=npka=a+VKGQ|;8kQ>TPWF`@yR!j#Ci1(70uc)wlFSV+)Gi+Os4hp{Y7QcPx#)a z=I@eSpv$am9_*k@OB24A(M2RWQC5{}`K6(sQD>Ls{UOHP$^&}UE5sTWJXd?ux&=!Z zxESqF1}_qZ0{d&J6r8nPTuXQ+KF?lKMDn&yXUCel%Cr`HDu=!OibHKc96qV38vJz%=2LP~cOvbF?wE<})^gz&7JRU|=bJdDC(6tv6vnEH-XZb2dU_ zaAV{xY^?yIiZL31vIsx{{)Z4s01D7Z;7CCzz$iiTBcX610Bs?m27oLyF<=yY(hvln z_y(+nxG~zo&K7`HoKls;{BfA0(e_Rve0=tXwkC#7P$asBh+M0iY`^?4ago8*5GsZpmf+?&!QtR`6SO%73}+2jW^*@TZvCOU z-yV$xr@_{ByZQlO4>Kps+vEVWkA<;Cv?Blj}07If+*q3UqU&+tUr?q|;<{4^eQyUF%3-%fbEY1n9 zAV7-2yZ{1&3khIe3%CK(LI8fm6po=93!k!?YAn#q2H6{Zvx(6V0z%L=M&n08gkVBY zfVuJyGMcT4sT=6_pRgNz3ndtYm<>v{;Wr>b{9Bkz`Wusd<1aZA&|wy6cV5}`9y2in z%?Fy$3A8DAfhwQ6-6>lO41x=$J+U}6TWW#12wRKOkhQaVkZ*uX*piJpZf5w+b=l~J z<8qRi7h_0)ui?^Q4*5W_x}A)iq=KP6-$rfuBsBqK5MU5+QqA27a0Uh^3Ibchw!z%? zjgcWREL;vy5TO3p-7WRsj9R1QXkzLJnxA9iJph|Z)zr)as6}@UaeyIZiUF%~wzs!7 zwZZWD7Wf)Kb+du_Rqt%B-2c11 zv)LYP^vNboheQZNfxrl%5Kw?Xh2rN2#uOqXEDS}#{|TFFWawmJ?1WWx2c7I3(Ln5e zw66kNx^T0_+J+z$KMD$Pwl)Yt2||%Tb@M|J|2RF4rWWgpoELZ+yoMMoEI=QxwV{!z zwbPGeA~utWMSJ@}kNnJ!Kx~mJS&UTK**e)-8#?`GjszTwH}+H668h#zVCx6K-o9yh z?7ctK_$Qk0$146p!hj~*;5G^guHo_hgH2#;XKQTu1LbR_8>Fut{YWu#3&oq`!EGoO zL?Qv?AVN?Gis^Bz?~D)32C;`CEfK)*w6rgYl!}y^{ z5Htv(paTCe#oq)MV@ma146U6_*KF%1N#As8qt~~+??)yO-qI+WJ*huw6c}(XzBLM5 z7%Ke#uSS7w@&2*MtQ*?!5C8HHvw$ht8jZ>Q+*EHwAKS!a>u|SQVlu2(yrqZ!aFoEn z2^LOtb4-RU*{JE}nC!~iz}~V^&%YI!VPCWH zq(A!RC;rKgt?>VnrI>)J~Yg;iT9pu2&65=03@1^zV$1t8C2{1nV! z+a|@eR{EpPS}XriUBR{tM`A&N|3Wh$;J`3Lg!vIrFz_KLDDW>f16EUGLx}I2;@g=Y zo8#NLZJK1u&=D3O`7bmHzW@|~46oZYK^OuY{aenPqp6cAT7TO%*>vj1M%i?7n^xHp zUt;m1f88nwK^PQZbEAY1P{3LUAsAHn-)a;SM?3rPIRsPoV>4h%x1oPa;E4sw{uBCv zUjwkgg<(*D%MC*yF{~lP52*jwc;Y8x@>`-5AkyDPNw-mgSfUWZV3c5kCP6?G4DoMu z=}&0d5_n)S%zxEUa1i?vLfM5z?z5xLIYykcaS_=V;88GSpYzuvhDE{tX18hswfraS*6Y7E6!Vkv&1fc-E z9|i}k4~HOu_4?Na8-9-`1K$#3V82iB%Qk?+F>x1|{KM#J0pQ#HYaHO;t{nEG7aI-mha>+j$l5x^Dzs&2^u+h^*4oVFx0$eCOgi5$gwt1 z3WkDz94KA;YR4Axe>IE(tgjFdSD@O#WSuY+e5?dK7I;_xfa?0@Q62y*HODh6}N7)t(!W@_^)1*~1dI1mRhrM%d+3)@h%#p%U<7-oYa ztVpeYJmwFn!hQy3jl02zSp@mkr{rNU1o)m3)bY5T zjlG>MW|kDiCkH+~;%E;hCD5jjvxe4ArhIA&hE7%x1UBdglMX-qScrfCRsn3ROL0q@ zN>)%DK1LcKybozeVs@3g(4>v*dGRB>iXGfk)?|eK2=dVLdzkl}kWF$ShaaL=vwR># z=iHlXHZN8swMrTH{@t`oAIj6y^WD35?Ck6{pMxo9^dwlCnwnO4S0^XsLk2@pqM~{f zQTOZW&IhB>Xx0LM($)FP@7;JFD6vt@NT^*d5}(Q|iM7mZ4*j z_NlUGFvaA=l+A>g;`;L}?&6qYlrk&dU8=E>k&&-_5|>Hr?o>=kB`98BsE0zKbVrZG z%foL;U1w?L;)y%?X*Dn~5L)eH#;dAA`Pnsg_s7=@3kwqy+%d8(Z{KFc-Utv}eQ(pn zQOt50TA~(dvVYv?YuKbbYhTyfy2jJ(?d|3)1UfhFvhpge>|9)2bO>d&Dtb$Pcz?6Y zUU8S`$-xLn)MRfVi>R1b*@ErN%*;n(0s_OnxYAM7-snm5RrNwN4+VjzV6Eu6G^ih0hl+aS|UhUoND~=OR;nF;k5fORr(@fp`!(-CCe@`iXRN6bTtVdt>q+y$k^N&mCY1) zE-X0b&$R1B40W-rO;f$gB$ccCXF$&4VLo$u@<@!5UJ5^BTlF{qkue_a{9vUnn0o`J?5+UhuR~wE<>(?s1?Tw#UC~}KtPS(En$b^QCk9x3Pkqo2~xm7qe zEi&D6P*7uGWpCpFSwBtL*R|ZN4u8r`7$yU}_Xba@en=)no$|p96PzymS@(j@KtLvc zV%z+^Yk+C?#xIr2a*)eiWQpWaW{tuNkt`++&6Kf$?@3u872Tu9Lj~?C zym5OzS6(Jy?*X~aYqpdQ)_^TQ2QQU3npfnTLFB0UcgSS$}64JlbxW=&FN zEx`W&hjvPWOLU>^qLMxWMLZ{g@ID#z;|{3@0y)m|-WJfpH7c@k`Cwai*TCC}RoT!0(8k)Q7GB01Y}uR{!l{`8r(UTFrA zq4PWU+#x!(ZHHh#$o5}#2pr6a|4lP2xH0$tb2BXbeRm|o6emtlUev}5HBdm5r(HkG ztRC}X2fc4B71!lklJECO;O_Eg6@tBHeEah7hfv0Ah$1KNjF}!yf7be@_;!I|2mZr{ z4+9GfOjpfCT5hqs{*M?K8Hvi5qD@^aLFBi}!paTIv|Og2-%ihLju0RGtBZ@S+^bVl zymv1r#h6+=I4UM4O&vu+ZQ<&Q3@)juv1<=&8t&NPbncwZ1ARR`DR55hVaU;=M_M3nlAQ!BQh1T*3$h~OnK_UsMN&ETAj6l{HWQeLdrWjlua$=B0@64Ii zb|i^q5z?;WP8-Jlw$&>tDV-h!_E+R(=ozVEy__2@-e0GtY!1};Sc!Iei7nc^@SA`0 z;K75guIL!qYqxI;@O2Yft<0)*xmn8yy)-Xic>zTn{YrdWFtRh!mPF%>hnt%+g+<91 zlYDj8W{+zl^tQIPn{vQceJMavBYdig37@kr-|@cFG}AKCPHXtyIJPU1HFF&Xv)O#8lag8o&CM z(X+P?TP1W1k|=Al0^D8d6FZZ_Tl;MfDYN#)IWwxed?w{o7#hz=EDdEG$zTq8Gh7g# z!i0yzG}>T8z}m`|Q1Y z!tS|AhSzabX7Kl?e6|Ip}YYHm)zGfw*{p=cTt_PW4SX9vm6H)jX7fnXy z>+daK-8mB(^6ALEsC~MuBeHwHJQOBpu{j{wi4&QAjFpj8ZntCzZX_DB%efa*iawUf zfhqmH7Z@eWhCX<0XbuQOkT0TWK>_tJdPx_SpJV2wSk0UOrP^s8rk+n1EaTpo6-1RSaBG3 zjCYF_C&Oh1Ry?nsLdFD?H!wXko*%~$PVj~=M#Rh9! z&JeVBCth``B%jVh!est}^yeCVp5&b8|KT+OY2!%(GC-yJ_5DSv%-irQgjkd@nY)z_*VdfO6@7rM)kl7Kdcnr~m!eJKn55BSrE{N+UM56?TF zy>a`6b*T<-Tx$e_YV2DjG0qU&cYRYIZa8D}rqr}S6kUem4Yw;kNSv#g7Gm!=LPA)A&5Um53P_BqXc6`(TrELg|VkZsLiR zUM;Y1uHjMc`Y^5*nk%@!Vd7khjymTlT^^BswjR|0W(8NReS7G2d{-7HSr|7t3Q_>CKW0Xz9HAOzY!wSTVCSw8SOy85B!C4;3MFLSLA*V8%TQQ$Dn> zG2+xppw5?q`Ey4;AI-d?p<|J?vd49DDCvk(MFvfHq?6A- zl<%OL!sR-qoXP5Vd1)*sL+7DFrMbXi?fef)bsZir43^X7^5&C4FhQ zeyM4g*Fi8*!4!B2sxNKJ+X==!s#=(V0I>q6P5ebt2rZNyTRZ4MCwjI=1rz=kwYDpT!|@+&Bw>m*}E zjAvQK_#$pf8WgG64U$1kBgH;ESKdWMdr}cC`^sMyk)}N@qn?Eby2pWNJ$pNH@7r$o z!82{sO#=t^CrBM1wu&vEqm2k=;|RU~(6#4G8;VO`tu2*DN=2QR|4MTE(h0yS^bDt4yd}O-?md$iN~Ja4OZ7ANoeyO{yuev(5W;hng)rOlQL!#GtDMyPJ~4&S7@+j^gr{;rCi0Supq5LuRJAruQ!6-8ZR2X!9#j$w%XL{cN|SA^j_@ zQCKs1%&tvyWAoWHw;#^E$>%8zqb~NXXyln`r2ZtmJ=bquy7_mx{(qnWVDRr5 z|5+uG@3DP;hJ5u5^7S;2q#V==aSQM-RuENurc0;@dNP9}rmd1`!sWzzGE#leKzi&F zonMKO!N$**M{fHr4(m6KhI3o8_QLjIe!}t_4fz>deUS4H;cD2DePMRS0yLMt4kJKO z7mxJL2v>8b?Hh3OB*w2BaML*XZ%`W>i7!7>8v?@Esp3rvH0flB-!V!lz2FIZwW@rS z!w5b@#ZJv9qWs}OW9LJNGbdR{_L7PCgP?$WdWclkKW6vQ_cG}mEyX+vT_nlRI(K%$HyfKllI^WH#IX9Z6JTw!1@qIW*wb z#LN5$`M9eUq0mcVGLE{$cgfM~Lb91V_e9j)U6m?d zOO;pKA_-jdStn{b{TZ2LgxxqFUQ9&}rTd&FeaX~J&Atj(! z1f?J~!Mz}V>@vX~IF2|Qpvd2V>kEZ*h^y%6L6{p zusgm-ms4?PcgU2G{3Mv!yif<*Ab*xjhTJl@dvep*K_wKe4sJEOXh>bIqG+fL$s%bWeLrf^~W!kedXf0FCBHP;0!iN#Lg3L!zh z%MZCOV1*6H{M^V`yQE8Ty5RfnRKBatY~GL4O=f-8iu1CkIS?DAj;$m|8Rfe2wMr3Z4b zvHBTX8SI7IRtx@JwT{CuFGheY4De8>5E6x1OaX;sw&2~z`Ty*!;?_KGY_)#dOHn8x zIM4+k&loJ)0b7(Hn34xo`u?SxJ@}2L!gj#+l>Y3o{~4v5UFE;<(%;j(n>`g}%&dZ= z?K+`q=7yN0c#v+j?U48S>Gg|`+u1o{*bRxvLjtRTP%sR30E{23>;>zb*A78D14ILm z(R3Pu28&0q|2DBASl@_YJu&dJiRU&u0$Up8ezvp`lO=IA?7*lbX? zRBL+%`8|ELS!w_6m;Q`rzZV5eBm4*Q_p`_TN1}jj^8etazomJRn+N!QhbVxh{y@nf zg+Q8?Fh&#*Kozea{!$b$Z2z4oAU6+jZL=IkKmY;O)e3+m^@4B+0$_N6L}LN4L`D$2 z>>td3`?ksqvkY3_1T5OL010tFF5LWf`o{%o-_Bug-DX|Qmc`V_&HDPMO@l!)L4wp! z1a?s?3P4Z%Ym4=q3~j(#*6&yAt(E@R8f)d-G{Tm}*T~KG<0p*(*3iQdAXN}#NP^2j z{w);wZ*j+OYJx3mwUL{x*-x4PkSzd*g2jyjAUzgr;Fu+2|HR_AZIgs;tqmQ`Or3tn z7u_h|rqj2qVajB0G&YSUnAMv)X?O6Nyn6JL+F-8*kIx3Um+BTGwcsSfk5;B zYpbx)h8EV}6N)MOkx+pxE3J{6T?~vri4Cy+SQ7aivI%NSFe;A;M%S^N{=+rKD6C(% z34{U05Bu37V~k$e5~5*m-tbWVsPj)28E;d`Z9~(|>x=&?sM8i88etUTF9fGsfM}2) zG~57n+75^Yz?6JX`er|A8v>8wLZ>Bmzht1P+WXCS3hj+>o=_1+U*T=342G zwsx(282Y1^jbe{;VqtnG}k zRBuBJd~=M2`i1wjK@4zYK`ggM42b1_Tei#JC1%SKYk*1fmwE;ctRRLM&`-c25QP4w z+@`-m%SOL!O1cF_+w{^F9~bot-)6m+z@qWBUcwT_5BTr*$lY%xY>VFt^5%ZsW0+h8 zI3}>c^cWB+m=J(=`8|}a!Le*XBm9+Qt>b(As68wU(cARn79SP$3;zY%kAPeOYziI< zh}nVQ_m0;spT7`_tCP zAn#$uLI7+MKr#jC^%y4=pyvE8w6MQoEx#pfZbQu$(=YrB)4%3%!vIHNKn39dECBF4 z2!K()$zuF%YPQ&WF!=PReX~vt0Db_T!3H(JpZ;C;?{8DH#heTO!kn*D1F)-rySYIP zh}rr7uk_6pa}5~hTi;;AT4h5sQ$A%wkYSC6U{snaz-V={b9OW~bpkU(e;`zYeHk@D z2sTazdw^8TwHSffybC)C1@2J>Y1Nq2 zYV0-O04Sl2xl+FMyh(5sHe(Vdgai-6bcO0h0metfz7kYI+0o8e%@lo_Pgzo$Pu^kr+UEG$Mh-K@!zXTQYlj9CSpe`B^E9wsn;wM$4ZU`Z(U+KVjEcc*u{47%mKLzZ z)&gv?F9+MTStJ-13mII$h7Aq2>-Vprp0!p3O|bsHwZI%4 zufGqAi3Og&23iID$MyQJfkA=e_4;FhiokV1{b8Srg{Q$BufJbl{drg{4@~*``>~i2 z;24XHfZ47;51S{CIbMI>TIx19UVk1oix+c#ou0KcRN&69*KaK!2IJnZ)kko>eF5kM zwtj-^^n&Tcwd3{r0lU6-yk5Vx*+bCj>;1JppA7Yqw88oH=Ydcb+z-sHf~kpx zAr|2pQzOt%VA=)(CWhCL1COtnCntz7_C0(uj&{!WfbFotfjt0_=jy;rIoV@`#Mqth zxZ1b;T)M{mxZj%F=) zm|m8iJ*qx?r0{f6AJ5D2bbdy0Rq8(Gaak7Lqmyuk+X7XUydUSoM(PhO*n2NG%j@p# zCQ4Q5#_Kbzek5;5Ub`D|UjC(+*XrAbuIc{n1+|54q`s!l*Edpn-HCT^X|vPh(d11K z<%yiKlhhQoKI;DdK+O%}lzU5G76)&PuMD&EX)O#$<1uuD4K_ zrq8Dpc0TX;(m9)B!B+b;_}ncf!J{BqLH!(F4rEbS`%}zA|pXI3g7*C z%!xbH7?Q89JaH$wG54BUpOAU~kZls#jR%VAuBZ`ToetDaoEzhAm4+c6QL9u>-lK8` zFB0UCD*Mo+AL|mnec*D6FYK1X*M4K)S`QeG#QA$CTfE1@ILe4~4t-3iqOKKUjM2K@ z{ZZ6qNA#6X^$Bb|BlHv62L*AmNktAVi+^%!`4GzUDHSp}pwF0eDX9Icy|bJC&9Xqv zud7qN9QzWE7hC6(iSDaW@m%V5D0Zxr}sWjGZMcRY|+#!Cwo4Wpl~p6#y0$c z$^NjR0?k3O!GSZP`TFm>rw>>?yIIz)lke?RI<28()u^F4F_Sm5Iu%V_v+}q+<>2#9 zNR~Q`uSMR#r}iFY!!v#ynED*ekiHi15PxHK-ukA@K02lwuXyGg|V*}0SCRPlij zs)oGSR-)*j0lbpu1Gv&5#Q796D)hVd9jsb`V-~E6e$2`U(E^0YF>{jVY z!w!yPd^g4YeetF5+`1{RqcCW#+{VE^cmA%t>?7oj;zkQxd0P1sWs)KX3)ojBTA9W| z71F9X(uj|})lvR<%8f zA2lq*W4}HzN&4b~aTm9p?4i?p6bJaTql(Zh&o1}Af8l*sr8}U)(0LCmgE{LR{)yy4 z{z5w0hKH7~a;nxWRxe)%Btv$GYlk8eCTvGHw1S)Y{-(ylsY z>USf|D#2N+!-n19(uw_g6EFDGOX6{QO0E0GD<^vL5?$X-Tqz>wz&mI|l;&0CQRRI| zGv!=n%Ag$_@`0T6w2N+JVnSUC4ZYq4s+T*_1h{oRX+~dy@G5$b4i`-Myl$U3{2BEs zf+6{Az3{=he6sZWmL~LjEIesoGzs#R*U046s5(Su%e5q?=sL@hdB}+{)jT}X5MzIS zHbLc63K2Dz1g9q|-!&w3=UwO8IY1u!b=Mg+J9P>_SGVeOl~N>M(j;PBXKn-v4(C-m zSAGt{iQreSUu^2{+D(Tj*W~b8N%Ih!TwUFl!L{$E=;%Q!l>_DbS$&N}UKz@C+Aa2l z6_?`$@?%9u9jX1khYwkmNHpcFlAwwAPnlF6Z=|V1OQa;jB6xy7kVimXfg z;+^vryOd}~68jI*RrU{k&c$UfV=nkubm4JWD8A$98{q`aCr=SXQ^$A>EhWX(h8vQe zi`$lI4My~kEQQ`}?n`2FG;vPIuCvj7fIO#2!U-oBGbC;s<`pu<<&G`fdtLOnjmM6h zH`3Q!!V7LOx%QZJ*p@w|Gb*jr=N>(HG+lw!%H6pl7cxpl-BWTWLou_)wdc)XOLwJU zAnnNU(y9xeh8OO3Fy_{CzNX*TB(y4eRceNo@=Wy)aw@x5O@jXsek5@Ot+lXjeP ziDF@IZ`H;3Idv;@RJ!MSz54g@UeVL9u0k`xh_7c3VSYwJR7`ZjD;mX?hPQI@*1L0Ynjqb;Ea-aV%umJ*!&3=RQJq}xmta`yXP ziyZj^#|@h*0bh3nSoH z(XHLBl+%wEi7)GFuF$IDD>}gHCdxVob=+ebOgl1mLNk=O1os`R(3<4Mpsr|oyE6%K zbmo=d1Fo-9>ky_sQesa%(WzzK!!Eq`b3>@ClXi?m|(~p6E|Jm{SuDK~* zkq|ds*_+CVCE{UnSxC#hWmptKg;MOxfL5?Wow3kDQD@ z50<5U`rJ0nXi=Mx*B$$8q%2VIpw#Z0UkzO=+%w&!uC-9V*l$w$eAXNXz58*khRI1% z%??8`hB}wUk9K_%>;O(7|^&d`0)^CiI9%Y%&-W>f4RgrFc@u9pc3;Fx`*d+MfjqOPA$XD!mBrzu9NhQS3x`GL@Nkr{Vs78H$Akk5z>iLSUB)R z^+4I~BTDwpJ~e|Xe0OqRCUi17U(4gFU?jjh7c8{zWTHwf+%x)amjAvfs0yL=fs{q1 z?rGR=BeIOW3Xsk{DNk}ew1!n!_-|^hI0jr}TZ-_&^RgEWbjbIRS!v@KFc+}CEtE`n zmkWA*a9}34l*uPqWw(?@wt_%ify*U6kK%{Kw=ASpChsEN5@*n-0=0(8a){Tn6CG_huN{ngc_x9Pk z)HxZ*a6L>sL)*xMiXM@fV3u~KL2~q>i$cMC{HwVK<`L4wxpq_^6NSv)a5$<&7KBaLF{tw1Zuo$XR*C&6 zySPK3y{C#LqpDl9fDQVr3f_cMGCh>}4s9{t$Nn-(^rdcxE%#KxAmr$yh}7^B;XDmQ zpTYHmhK+4+?>-t6R!g1rl1s6V_blC^DN}bz$W7w%@HqxH$?jpCjyEc5FGv@ZjwDQ` zKD0TJI-{JWMjQAz_4GZ3z-%Oqf}r{_iGD#Neu^Gvd%4by?H7wpZqe%=WRO85l2KH_r~mqb88ofv>QEf z{GhN1QLcH~Et8W3mQSS81MWugnj#W-O?8jO!NCP*B6&?&bZ*3?HhM{OW^`C{4&&a9 z3HDOUl9xPrMU`CT@f%g?j2EhncxX{-BgmWa8jiquAr$LpVFGtiZO6Gqn^ORy)FY{D}pgBaC#>Hu&C9tuJ< z`bs@M^zCOSS9l^<@@miNGW4M0?#ZehwmYpU>1XRGq8?_+Hji3)-VDw13SNPv^)kml z6oqI|YqyZe4nFTNZ5+nSjTSa}7T}}7S0rv9a@($1kH%iH-|bu`{nyIH_=&{)viX|G zq&;K>h1A^Q*R|oO9a?&Cp5{{WdYGhX+|T_QC+UA)oK{hT&WNG~4rCS8dhKRf& z{n@V(C3wh}t}QU>{4iLjv|3%vS2vib0m8|E>P1p5o1x*=*s<{fG=sELRg~Ow?~kJK z{(O!^4J`TBuj3P0YCYV8&QaYVbM6C)i)2HJa7Hf+PY&!b*~LV`8&_cZs;M%Jxl+Z1 z>d`JH87Cgmy$HElgwI#{;iP3zX7h>e*CWWMuA}Dqu?~9vw-G}4d5TF<;#FtPxjU)7 zn7FulEa8E=-bIDJunhj-!vi!Tv@$bx#rCoBtYgiAfllK%BUUVLTE}!=PCF)k3a~UV zC%f~GON$m?Rqo{eG?x|=QfLE;3$NOos?OA7q_oUlm?rcp{tFfX)m_7_A&+`a!pl62 zKE^h_UO7nnYS4nqCE>2^?W9Wh(`;OlE9aqHQY5Sl(G|?)g>?9o9&e0Y?dLOHyVE?D zy;d3$%JZhWhIQ~cPek;rm>%R>hSRS;ZGVn_wajJi()=NA1y3xVg}h@Ha+HZiA+rxQ z*^}(8etp^t!SFS)=D9_OWQH~(7)@bLli^38_Q%n@CwcVx!LYeW`&jK0NyK||dHD3*JfsY(6a;Kq_G#R8YQ%o-p6nZ~P zDyS6BviH-q>B)s>rk9Hx@6FXYCSujbJ*NA44yn|I{2& zq-u*QQq6dO|HK`f2PeX%Df^x`*c;|~s!IoPo#Uh+g=WxeSYOI;cpyu(Gc+;*S&Z;N zT8=;LL|^PP?yWU+N<`T|f5~=sBW{RH#}xHVez!X(xaemqD17o3Mf_% zkD~ErE}90me?>3Mmz+bX_e4M%Pz6i{B$r9^9C~z_BW^r-r|ZFIaCDdN*ZQ#)H99X! zy}YMy9!X1l+}lvaQ3{_jadN;Jxwe{;JF)wWcft6$-6_p;bAv&AA8HSsegmV_=(sRp z?o6-$cx*Hz$6E1B#g_>UzWp_?^Vna;x4M0{3%)W1QIr;VLS?7R(D2mcrd24~H87_k z@W>HSXOe5X=G84lucUW~Y529Tq|5Yjjh_+0)WzU<+odT5Zmw`)dn;DfM_&XfD#CG- z+05~UjG`t$0(uV;Yq&pRI#cvw`(8vzdwX8i39eAOdlSRZRYAry^%G4%Nq&1d&m zlQ-GY)p`4tOrL!XXW_h(o%*1Q4sA7iVou8B6>t4G`=O>`H?n(k*BDIFAHGHDo*y-q zZ8M~43%B@ad!XF&`a8i$^J21`$Q?j(*MrU@0Z zK2WX`&eQeKIAB=KSlDSyT&w!S_^V5+E3#6%`AfH%3TjYWFD|g5hE)PM8@xER~IcPq6%vDCLL6N7}Kk0e8RN&Nr$n$C-(sS z-VVe}m8{X6d))1t(Doy8;Rdr=tX?vCt#7rALN9lv*wXf(4?il1fBN7|Y$;6lV&EAx z`GJRTL!eH{*FHCt-}7!XJ~!WZdsy&^C`B(xFR|!85wfLcEyYREOriEYO=$8*fmg3b zlW3M?d_As6Fq!O_=b0{}DX^H?#G*Tg({GHb`(X9TUwhic>QdiOa|pdi&g_eDu`~Hf zT*|W7G^mPlHMoWQyM>|m^$~4#Bp)42=5~An(GhN7BJX1QYTuhCaC?)BPmxJda?eKz?O|*%J3joYu>w^mr!cBHn$7-aE?isHV@R zO|9SK*@FM^Azr4N=Zi<>>V^V&uJAZry{aB1Uf279E4Ps=IKnxIfJ=J>5~m?ogP(ov z0(V1+?aKg2A%*MEsz}pm9e*oP`;fxpcVvlFcdt$l34hJ+W62z^t0vU&E3H5!86J7m zN7!R!f`7aHL`80}`^~=O0Kc&n?j{`~MlYo!hYPHh<_<|256V1lVvMWiGkvY!2UU)5FJ1_w?T+KIzr$d2!`@v&(`% zg^KE+>)}p^l~>SPs={>lctm-PF3qZ(9GIWJrBat`Y-2+z1wYPACt8fRt$&Q zk-pKaQ@45=bH}NZo1y<;E&L3Hfho;kV!~O3xbBros{~{mv|-`atPbjNmfy+qW+R=; zd5aU8RSt}1^4DAWYt;B2bl137&3}&n@=3-?lsAKNGAGY6o358zvqGYd(V1w>;#D7< zqhb%Vmd#=(dFD+1zKP1I(`UlZv@A8Y%-+j@`n?@Wy48Jc^uyfL-0EI}%zl9x(Q0%| zR?gc;1jExV4M(J^vgjFTGpq7N%Q(Xlo~o%c@^g&5ceM#dTKhBCvX0?Kh3GVj&`K>> z&O1u+7T-3dys}WjmXek8q@{oSI*%SNy6=ElTK)9V_U>*yD4BF$g!6v0;nIRUw%W0g zF;|;CX1cqm4WdhMIQ4iNsO-?)1s$QzrQF31CeHgw)A7)ej**ut-(; zJn70$G737VaO!DxX0E8nINOehGiirumPRfLJ(RM?5t5_7{PZb{bW25{_m#)l^aj72+rny2;C+JWzWErzbHIDlzLBH_f)Ns!;|VRqg6(qrf+)w`X*r7w&);2SEIaO~=#EC?b+ zH<0pv#WnOtd$lInTMs7c`#b93y(J@0D}9WF&mA7&GCYlJllM(Juyh&z z#Fx~??W#nuwZJajawU}2S zmMNe!@XIOHo0R>LN@}_OY8d}=LcOC{c>JsZ|@4O**B}+)9b+vLX)os3e-)T*! zu>H?YF$!wbG@sw^gp$`2Ru`b;4&%z#^7#O9NRh3uTFQQ)9d+o=j83(MZtVc!=#%|* zzPFyK^+fNW&f2|`aj!B%=!BFo!R7lqM@&7*`6Xdb^nA4k_(KUtbY6Zf#=U6E_n{Zc zvT#{4LduX{N@@3^XuqNm&NB0{-0&wjY$1e8uL59B!JQ9`M#!^>d_P^U7Pic&@FjWn zZs`CXr%|_i)syi!RJD7+88?#SvOW9g1(``cBA!-%$)t(z$z#w^tfthQlN0v1XWg93 z@FCstUK_VeB)7uN^qs1_0}m(Js9Ba~yK4HUh$GAbbv>==F8Yoi%;uxaemQ#hP3oJg ztDzcuc<`gh%!OPu4)#HCZSmU=dj=Ot#Zk$J-6x<%c@rH?ASvO!bDtpCexc-?A9r@M zTUwUf&OHDh4xch^k5VU3s`|ZM5M4HyK|WjD(z*r)X+Cyi58Cxhv3nxQeV591Xxzqa_o+l3^YCtS?9V*4^EvzfO%b%ZAdS2>ki=R!7{bgd&`^iPskEsn=BT5ZP zPd!WB&p5x38j|$d=tC+=o5$%Al5;y}8zX&b=ZJ8^aEzXT2PhEJ;<8 zrP*IWJVhCyY%DKz5|xSx4+7^YyE<6Yvz|z1&>~)qFQ_Hy__2}k2C&h!apKR2Hc`J- z`f}zVEYodre^iv4BJ;TjNi;nX%V$p>iT&?_pDaG_6O`4Jy5@FQr`9VV)zSWuSI)d> zqxh+jq0|}wK>Bk6wQdzxkFlG1RLHV$DpRkUo1pkn-kFnVx9IZ=grY4R0S5)0hldgo z=^Z#RPD_pqEgUu(@fq#)J>5}B9Rf>^Uz*`Z!gx7S>doAiM&>WtT{?VcPm_^B(Z#Nl zC&jh2do@ff2qThPv~1qGT@F=d#iu7rq4u+A&5u6mMJ96J{2;#JTXeJ3)qQ^9&k79tK2Sc4`Q7Y%Ja`xDhcc|K{qmx@} zgZE|*0nU2$@VP`>0LA_w8#XIcusJ8ntuofIYUt~&qKn) zhFY|zPy4?Xcpk=2ct!Oh>V#9-y;JcNux!SL3P=0`j)OV0y6+8#&whRt(j@Aa!&V>x zt^Y)8t?Xsux5)gsG-;ua!{ItC5toyTEm0x!rFnf}c8_kF0u`Nk!~QsdC$Y?5Cf!2m zdeP%CwizNr-H+U_CB-rCm}1pC^ZvYIW^cuzmXW|k+IEP&slIE7iG=yAe0nW6K|GJU z7a6p@D*-2^h^X9vNsfuHq4mBd$-doO;&)`RO#Yrli zbA9r4mB6RS`lH$aC3D&sYeU<4={pp(-rBQK1fu&Tj+~Kf@mw+74S8nyc0l~I`KNQV z`c++N<=VY^Z*C|Ijvg0D!|f>h%&-8JxcsvMac1<;F7rzGC` zm1fIxaz}A_o60#FURfkV&du49vFGp_<>pho!Qma0y_$X82OGcxJNS1Tzvl?QgH zTs%+FPe1VB^4m&e=&TKXKRp>fvghO^2qmoS*mWX=6(wmeDhy9?WGV``;w8SD;;Fw4 zs^^u8SCxE=*7NX*d9{1V9E?M_1B!*Y@Tz1E!XXw+mEpT@vxFKq_BHQJcsnH-yf<#v z<%9hQk(J7L{h@owK_^Ds4y#PpOCh4l)0&=24?Lc>x93|D)g7IZ2qr|e!`e~&#r-iq3E6ivtw4J9xd*BFUvGf z#`4R#k&e6(Z;WT-Y_Tf$m-}#@3L&==DHp|vzUviD`8MZtEh}CkR~M_4_INDKjt^*# zc2+6(32>kLF(>(`Ak`PVhEX5v&&IQo9V$Ov&nl%3Z(t3+n<-bH;fbRr43~3lS2`Kb zsuG8gb8F-;JTLlyRc~ssB;_@emZz8YSYdRtN8>R*DrO@>QlAdG8cC?^;U`MWtDpOA zBcAFnk+US|c-cThKb?H~|FQQLP+4x<`nZCiba$78NOua-2uO!?cXx=iq@;j!cS|GP zB}hsN(k0#C|NU@#wrsiXbM8Io`oClR#;}+#e79Pf@Q6I_x&A z6wfd>`uZrt3rc6Yvd#O9FDQlH*B_WZMktP#0eg(J8Y84iZ2fvH9d<>DW6|4N6kHj8 z$f%V}QYp_CRvD#hb&~?>OL|^vbsZ(Mm>u4XF9WD_eg}xkmNIhCv>ho*>LGNF6pP}k zF9>OrhswK&gi$QL#m55t#P7CB%>_{8;cJ;wc|@1K@h8nG2v;&nt(?LyLzoNTaBiwh zfu~2%fElqyS8q4sMOt2G7`gkdGD+PHj-v{@8F9At8;1Tq zy|sV?`_@<)#fJ&)CXsVXZyZD91hA@2)%y7L)~H@mr7Oz%nx|;H9@y&!il_UTlY)t6 z;YM2$)1eyC*kkw|dcP8)f*Zuiv2Ap`bMT?`p0H8uZJSVr+fBIgX5+JFA)ul;#sIL( z@JC|mPS78IbL7f#R*T0c==b?Z@3CQ|P@my3*9+%++bQ|h&k%n!}tys-0hvS z1g+vs-6SkyZah>walVO%{H?kQ7EcZazu=Hi4 zF(kNi6(S>RUK=s$)7$C%N0*jv%dioFmX8D_UKvU`@aw-xq@Wxo&LqEs`sDrBmaSLE zpQ0;xl`9O8?6W^Xzr|KulbhVjX8m9tpt&holyn~j4APE)ZlK{kP7KBDkRCJPN{D=V z`AmMXt%aL~N54yh@-k+O;Bitaq_0hU90nbo^y_#s*e95F>h8h2pQ*Jqn>+J8s;l$h z9i-4`=2Hv8NS^7aKPK3a&=#T8OUB3}5?13AFsj6>a(GZEkJ#jb(Mh(SRc zLsjVn@0Da7N93h5R9X=_UV>Ae@D%J_-4)If3QV%Owf*(wm|MN>`$ZHn1KTVFII= z2jOrR`aTW2@QL*kvp%n@?^(86#P{vRj)rZ5*r-+(lZ%TS+z1mDOApp3A8c4ZNb#GI zVlhTGE48|v@MxHL;?6u6hetNwTPOjDc<*w~$FN5OmYY~z>elZ%=CDMOi_t$oAESrg z$#;4SR`0~rW{GK<DPkIfI)odN7L8 zCW)z_U?w#`>Geeu9oVn-nlTMG?@TD#y zv_Az(L{7o&(Vz+Fmhk1*6p?P~iVZPAa?P;*grP;)Sx^#z5i%&n1YnxEc{%yD3ad`b zR*i@dMNvLrmIk#4Y4LXm*UPudxS=ppF;{2^&$V~I7J~VzKS39p&@&ZDZ7XGt6PjE} zbflL6w$Isc1vP=%;5@Ke$EKoR$kV{V~9V@F}p%aBinQ z!Uaof=5Y(JL4iL_TML>a9adLa3FdC9_BlxrreL=8c04K%O>0%{9>?apg-u!bL*iD- zhx_O%>@ofq^6t`>D(aUE>j~k9A{g~Gk-+nEr<#gA4mtzB;-(c7j zd3fBM!>-@4R?(!V4iEn#en*Bm>{eNQJK4+aFaZw8q)-uSNk%#{iK|s7Ek)KphiM); zYk8OwvWSf3pdea+JASXyv?p`eRuXCH=p7u;vqZKH9LmPvP4o<+n?&jew>JS{nopI2 z>;hM!5s60a0X_|0XbiiM{~Wdc$4`k(!;d||IEcXG99HY-yYC7IQy4Ux7^&F9YxE0H zh=CjAA#fDMtGfk+2vttkbzlfY-svBmpq48Ldeohpy;342E1h4~()TuiUk>i@D?3U% z1yNiwF3i)uX;k*}10+f^psZ=&&|GZa)Bs-DThsMd+RVHL)j^M<{7Wz5B zzvjxNA{ZzNpJc^{T0%T6FC*fBPn5@3Cmb-CmLP!z{s47AJ^j`JMYRX$r_?Ri4LZcm zy5!!^lx$x)AaVzudiP`-8@yto!J~W4AkQHZ`y!G%U`G6d-)N5R9{dI4poaOY)T8kI z)dS`2ed9|kTV*fP{paC@T)CG{(V3Xh9$~x&2DIH=hPkHb6mB!TuvA%iY1CesnVQFm zTET`sp(z?XJ*Jgb27boI=oGTDd{5aq(8x%bn8{pBFf!GNp>VajOFl}Y^UI2t-C9+F zChS(84gLT(0pr;H=AF@WVGc({?7?`}O9q&UNe3IkRWWF7=!Y9i&McntbQ*|IV|@4T z%s&rq+&ej>ktUt4dJ$#FVe&#Ps3c*X+SO!9Wp7hi;{`+d=VYhV3JD;rG1>qleqf=kEN zMAqW?rDJAw+cLamc9M>HT6$n+sj;kdtYz_g2P8Ijv&wrz+S}IPG}0v*hJJ0t@qr$- zaHr_+?!Luc4_ld9Qr_5vc;t1LtC4@nn#iADE&Ah6CW#_LLlKD$$j|y%DGXM zUk0PJP6|jy96T8Up>kkhblxBIGQt?l9ExM92ytbjnAi_=;2tIm8e)Q1DzafszgEZO zh7=;@`~2)dL9J(rxq{-+(X%>FtAnR}p~9-s(usmA*x(!Q7K#MA_x5U@=@~T)N*jmv0e>q z#xY8HPiO|86AeIV)hcq(XN{#xxhqgM7Usp+qZgXdiK6`WV;C1w>^)vYw7Cx2mA4_WTI#ja=Qzg;TlJhSoFD47OQiqN|fiD!>kd1q(zsM9%{Er4dG2 zd7K}Mxi}=r-eTWM1nX7lvsr~)B;GLa*y`GTqgbUplVA8HUYmC|Je*gi!}TI|tIYXD zD#m_Ay5{V%kD$J6cZucZ#{)Awyagk< z90QPo3I}K(EmqG$*b==k_YCR-LyLTjkR~x}@}b1BLYQv}bp$jwZkr>2&WoGo+~86k zQ8WY>q_@@wqp+fR;cY)wHR%u{DjuMfb#dGeA98ej2*HC)*o&onHa&3rnPv8!=Ld1v zge)Ri`CscV7a+n7Di2*=&1w(T+)BZ&>oh{P)`)}?9KHQP{j~UYYD-))%fn)Vg6t=f zr{Etm?!CMeFsJmAxf=tvV%zK2PZr0yj-f~%R;FN$gwB~Ro4USI?+iW#sqy;Ls22`A z^E>s6ORc`~wXKu-Q@EgFU)NVn1X%9|>+@5G7(c#Lk4P?&T&YCZ#xeHKd*hm#CT0EL zD6tBrc=h88ghZSO8(&1q?qW(JJidGiJp3l#nH4VLZ7Ij;EYb}K-V}D_#uwV%0TyfZ zq8$;bqcQUtx*kpJ3$FSifLlEqH8)Ji_8)N&w zM51*x-|1UjZhX=v-t=S%!|!7=9sON>pmqlQ&K(%l!Sp_(g1)PR=PdTG2 zk|Xpae1~NB;+(h#>mZyy41&pLeebux7S7r5=xVKj&G(7~wF7^CbJZubUp)V}J<$J%Jy;SBWSRY}|YYckTo9!rX`3X|hL` zsLf$Jsj?&XIKJsL38HBoX16ekkHAbx#xW1iyvWn2kCTVj$hL@z*xpR&qU!E2s-$k}k-N3PyRpz`;J3=5M8Z8?x zPCOj*`F5aUzzr=w3#h}=%Se1 z(&L1~dlyc=&mGD5tn@hOQ9EpT=WQr;7-#lIa1JZCukX}n-Rm9R6EB!u-<;>m@Y_?;Y#-Mx*ZtNOH*9qeq#xR_u@DKvVQ4&exPR*HpU+WtA`4~PVHATC< zRFNKhh^UbN8-}dMPyCf|ET33I4pU$(_!O(CI6yUb<2*{E4vk)YKvG(WNauXmXL}}P z!xct>u%IWO#Zm={o-R{~fHAYkW+~0z8Td&wg`fao-sSXBe$}h@m6eL0;L8V+X1EDf z+P~;15K!s_H9`^FXA>@6Y$5qQbrU|xtZ{a!#K-UQkqnvP)^x4yqk4oTo)JBFfvK&#gLg< zWW<+JF;WInPMhf0py%{4IJUEL&qUC{o_F@0KE-nYMlFA#X&}E9;Gmp~S zH^NZX&WTnjZqMq;4?1+lEseDq9@+?gSy=P)XehQQHHa*K*@cA1y%C7eSvC|A-!L5! zVN=VUKPeA6R~Hn1pr-F=Ia?9v(VH9*E!|xY14rBf^At&yz~R;8-0EbK^;7^w@GO{{ z>|5@wH+{;pRWo0TaoNYvC+w$41jWzGV_zxtP#jDOq7j<-V8mTuvpKaoyB~#(lB`rr zV6c-tEET!`)!j*PXp${N;A?ZRp9JzIx=- zXY0{L0g;O@C8_Y&hNHKhRz!B(}noMx3t-T$6a<&C~Gu?C4p-)J9yE5Br{x$>H2ohZep>B_}>Q8yr4MBu(8G z>*Qfm((;_Vj{Zj_IU%k?WjS>RUiN8SGN;ut5B-p}S4I&Tk;tVI_W1nl89D;uAvMIU z_&DA=<_u716knXq-tZ$3%xHTJ^hoGY&xP9X3589>wmCi})BCy;Xg8r5Cs|tC|Cr1` zQBP%H3_QPkO+8(^dr#i~f+f7Do6yt(Lt(M%0gcYEd-tJ_Q_SpC`0&KJp?1^^c;_Qc z6&GF8TRZN9P`Hs#DxBEnEaSW9!Z1$`=Vm>cF!h>~$&wG(j5_dOACC2rH$>rH*Z=ctL{ay)B;VmCPwOn{8kseOt=kc-jw#OKx(8H-H_8l_osKC zF-*)jUhirX?cZZus$Yw{L^)zo3W@ZBK6#?|#q&w^^F~|ec8fK%(?s$ad;>-#Z>9cH z#)%VeTbs7G)o29)&LwW7?hs4`8P&rDM6AcfbQ$q(E#CK24 zCcn2m7JRSxbta~BsB#py+3~Xq;QHxb z4%#A2Vw9ah_d8KpBa%1Vrj^9do5Ot(h`FJn>ib1F_jN{AKGN;!#CqPy%`?>#I7K8h z?-<+Z8!zKP6%$yH?cJ&l)NvKM>L?Yb6%o)t%mhN5*a>WF{HEbL43ea z(8!lUH;SB5`~1sxk>|iY$rB4}LdDdu$b^d#E?3W(-KvA?##5`UR}&8}ccS_|mNv9~ zgB)yj%|1P=7{$#lFA>-Y`*_eXmb4^dQ*)b4=uUksiW+$?p`6cU#p7vbNrs}3ro7K6 z&>gQ);&;mp&o&iI{EHIb__c(V3yM*?HyZ{jKUMvJ4!mOa!@~`={%p=1SpUM8YR{GP zP3cn`kMs?LPFY5w zAAzO86+aar$!2A4j^&_p*0qw-uMohyLnOXm$(u+_XmN~I_i5W&C@7L`FS9^vU$=iH zOw-yJ-?}Yk&l_7Xt4QUA+a7N{EHaZOVb7z4>_}^I7}~mFE?B?h3T)P|PsG<=nKtoN z!DsE!fJ3+oh3w_9_fj_1;mg6DC3gMG7j!nsLB$H1Db6cxVMe(s@tKR{~GR3LyFA)56J8`#)0^ zMFiqeC@1s1gy)uwkH1S3=o<8ix4^=ky5O0`JIHa?7ed|}aES&mIt%zN6VO9*bm#b{ zY2h0Z50v%~*^Btt=bNY4-@iMlC6NjNaZ-p0Pi}@;4gF1wMR7{WcnX7p`v*zmaYk%xN~+bZYcTw(?A-2| z0?dRs!gHdw`1gB;O>KoR7C4Tyh?7pLs#^}*-0MqS)vYLOqWO1MGem%>SR?-`O4-bs^?+CUAOWcGt!m2(UUqYp#hH78C!_+ zqiFotyv2q39!k?SyKtE39~e)#%X{z8K9k>@ibW4tcmgw9{kkMdtw#v&F=3Q4>gTqw z@iq^)*XSP^s~J1vY2^@!Q5cuDxj7t!sWdGz>Nl!*E!Ng<#g)$vA9!rl5)3wO;?H4l z8k-(H0>85u*2(}*d2hr&i;`nZV3C_?(F?}drg%w~cn3xSyaQ@YUWw01TLEp$q0gkR zLvQEM6PjOOe%+6u8Q;#1_k42;-T^$ORlcG1_)Gom(ngW2DPCeY0`Gy=p?a=_avvUxgkG zVVs*<*sm``OtD_tZ5WB}#gRcN$`N8miPEB~pzL*&e?DHXU(K&C??IP*DkOSQw|S}) zsIaTM>Fe~MI1q?D*q&6m4dsV(oz1CWu}2#FR01Az5+xUbkLX@D3N|;2@X$OP+~idCH+FC}*en`3409Z-K4n>BWS3m&s>Bn-S?%ta?Az2m%FZWHQUwG&YKmxObbojFeFx z7<0P1Q%oMgEmjs+#~vF5pk~rG!_G+Llea*7ZohZ*ym*OcOS*j5ZX#sGzrs@qH)DS? z2Db}rLo-%*mvfFVo&kU3*6bl10hf_PAge9fXy635RLp&uE*OItM5cH@@%s;d>M!Ky}km2+r8hJiL zy-e#|q-Y&^bKno~J(Z9$g?^z}WF&%!j_mD~C7}onuQ@jdYT6c7}kn#hG(7 z#nnp_kV2N=blh-RTho`I_@cD%wo6DZgNnst!MHJ5k&volq0y0Snvs<|9LTkMD^LOT*121a^U+(pN9Hf zxg>^`NRpf=`l8+(g{H4mym{~lIUJ{&=NDFHZB-Rt!3W&IcW-0^`i8~w9U$W-yfQWr zmKGZw2&4*xObK9zurUCbj{hc%onM2DXLB$dTyk+2~js0T5jFdJk=E0T3=5 zTj1FZ_)gd0*uG)R{6arg5U3-d)0Kk2cQXNK&L9O@|4pcszgEyp&)VR>2FYW-hRF5} zCg)eG0As}T4}J&$Se#UzI@#)Va%zOUa70x2UB3lfvKSKl}FgCpXyQ z8ZzBC)T>|E0)Shh0g$nn+1NqYm<%91c0y(Ve2@JH8{*|5f`d;m`VCiEpsLH8{L);B9|z0f0gRfKLO!_ZbP90Gb!Z z|6JbjH<8T@gk@&_R}*}b{%nJ9@*9kB4V~;8a^2q>0mw=KH(&*L#sIijAUR?FZC0?d z(=xX;`k7zlyZmQMe1CF-Ev{#3SLl6zYYRqzaDw&Qj9>(2#BT$t*0QuTb^5C}TnT?R z!Ik)D{aLQ3cULft-}V0o@4_3n-(ErFF zcm)vnBfj3ZAO7jB1^!6ZEv&ClD!)?*-t?M(OV{w<;u54|`Ior_83EYBD=tA+0DtlZ zF2O5M#Ou0)a1_7me#7309z=xnj}?poNX&q&g_!~P>t6;+{&(f%W=21UFeCGyT^9Jh z8yxm};(Z0L`SiI-Iln>`{U_81)-Hfb z2q20CaYHeIkc5BR*27HirWH)wOzyW?z+TJx zuMPl+es%yrc!T=a6Z9(>*?&TP5bEil>eB;x`ftl%ZGWOmva!|uS$jZugZ9_6_bc$+ zUue$=0Gb0_E-WB65P&o9ir|C=U<3JgVNKsqSoxa_PRqu|$l#~&_f2|(y4Mr-D~#a3 zSC<)>-vG8Y6QDIH)&qM}`u}BLx6-lui2)2G`##E`f8U_(_2iu4NA&+|e3*2!vPRAT zEiUs_jX^+9$3hqQ>IaHkz_(=pWci45*y>*h*; zj=+92g5DmUz3>K|uG#vR)fEtf9K5rh)hGI2!QQwA()H1>^lRp5Ip^TdEY?uw4?8Ou z-xoY+rOKVki~4MmZCMai3%1z*D3;{u+P&8kUSx-wJ4)H(=%)dL1JQ$)gCbR(*q5ei zUzw<({YGIP;t&A8Ad#GS5uN-lJ16TU>Q<*+r)y7g?+$uy8S*LalX0Y-B$T+?Z1}Jr z)`35JKhZW+Q9y1u{XFRPrm*46a|(NWCjYbhVFnY`W0)9lJGI%bW<_h%{M4p$(WVr6};S=%aF)*wy)-x6Oa8F&&DLUaukXVIsvLhO8{1k{Ivmte)gVjaZof7 zqcLRvI2O+XO{5wmg%(dGlpbw8>X3sH0fjDpf#_Q(I?Yegp;&hAFTAmr;301$ zvV?cWz`bYCTw=+98=@Yn9-)p--kxG*j++p#KHh_sDH*=EAi^#$zH6GI*vsA{ZBfm> z9kx*N*dhsefMr0xm^_f5H0Ur{=&ck5x*29}`@^8iL7EThsmU^Wf>xq#WG+uQCC(&j zY2@7|yqDL)nz=Mm$YlyJ6bhwORmQ5amv<1v-oE+9+xy+w0ixflQ1lJK{d-jUHxR2b z165-H<@>7U!b-@(0+f~j@L74sc+}{#y@vjP!p>#cT`#;rCqZPe#Ej@eOI&^>X1AF~vV&1V(@< z9%KZ1;7EW0s2;QY=Z*yaCg%p&8%+MX;R4Biwg5-Eah74yzNVFcj37+4gTf#nBOf&;jr*neB)RmaZkh6BWJ(x0Q@H~G);ko9^A z>WX>jpRfZRz`ypV9e_pw`rm1X8>+B?zBd+PzsYZ~!}Vg+74y>H+kqZnI0d-B|L_NT zpn-t(x7y+7VmwISo9gl3aMD53(@)C)=HF%n)St ztc~<+XaKb=tu1~kO8z1KJ}s_)^9@G0Uc9=Z-TH+Q*Z@*OppA`%4Ok+8&M%Np0Ch^B zQ|k9ElAu1YpL$5Hgx_uOXEePM-(ZC6sp1vq*gs(eAXECm2n=-p$Bl43UA!V(`+FlW z1NriwMqmaAf&Wi?0^9YJ5m*3!xpVqsIxqobq(Hs?kLkd~4zNQ0u9WfLFdNveXN^}( zd%ti5cHoQ#*fB8zK>#?gWe0dZ0ggQ;Iv_y&E*lux{Jc1TWIx9Pko4zpz;-=ryrLNV zr2(#DA|ucl2{-{e@E6cp#>m7>^S^w0@n@L(9~ts>-7LQqy;8FF!zBCG#gM)1DIif&b!|?5&mRz z{-LAzO89dyyb|9K4Xb1aXsO?qC)|IOmv{0JVHh`AlCo3GO3QK(N7@`B>QO{>K(F3!OZopty_xLSQ@tz7@4r_c zh z<3D#L?uS*(!3gNmaJbUx1}(1VWLJ#rzt94-5CeAQ=ygs|Ub2pga&@UPPsB>UL}An6VI zUr*|oK^F~v;RT@cO(2P-2kO{Bwr5m27fgGAo^K^mUV;wPw)`(; zPQdFqI5^N)S{s?`=-FDBTL9n&W=75cw1I^=jgEyG?M<(GJ^Nw#vAyz!t|)e(hR1sK zroes6tD^RQ?^Qw7EYQ1J+t}La8d+SuorC2=9Sd_?J#$-HJ4;gwEnOR020D6XS~><= zP+)lI0CavBfL6zcI)-{$wl;=(dbTu{y81V1bv+vbV%jgAg9%iZWdr4Opwbi&C4ghk ztF-PvH%Weoe)dK-c3P(2$LYTcf7i1bmcQv<106Mhup59H1rYtN$}Pa%vEO&u6&TN- zI@dt5pC>#>deewr&u4(Eb-x@@W@aGN15+Ke31bE{2X-h}JkNM2i z-A*MbZDTzh+kaf|6gAT_(4$o}(zP`N?uyaTis%^`7}^4$4lJ~fEle$}Wi7RI^k@b2 z?2UBvWQ6%?|F}-^*uu=x!W@``z>Nw~U@}@;T9|6t>JjQ|ncC?6;`Rh11LM^QeeWoD zjNeQ+ladof?=X389p3eXppg%n$qwP#dWL@x)nvf8r>BdBP&!&CHnWe}&zX#{W)^zRi($c&eoAdg|GJ~-q zjiT&qC3STgYHDg)+AMk-krUVY*jTLb7gB7BJ3P4WAjpT-5AZo`I~T1NmzLD)U9s3u z&u%x2k%s5oLau`G7RPh7(ej>}o`##%xMg8%41Lf0qWGQD-jbMro}OOHM=A;mHis?T zH|$TwLi?g#zuuavW>j6n=Y6g!CnqN>3$7DYQ^Qd;qEetR&dbZ26dUXB^dP6GNVsfd zc9xfp?u9=55VNk$fQi4knE!ST4grC18H5J?O7CE=R*kQzq{CXP5}q&&3=IA&^6k&9 zt*zN_O}-!Er~6O3tzlA`nqAs*396yie1V+ zof9}_-MX={VUVQ9xD-u|!-DI}`77~3MDf^Ef1S|>J_j+;@@@^Z;_m8h} zY|}!@PGieY-_CJ4SVMdBaN*;fOq0$f;xuzh%i@}vn(At-3QkT=i>WFina4XhfzgQB zXJ`)?);`6z5Z=Az)zsMJbr%m_*xtS(NkP(ZQ9v?GdilUPFfj0$ii#?YqNHTV`(>1` zP!PiznVA(86+DjHjcvek3g3W-j}M93M&28x84iEuFh}B~C(O7dJr8;u1Jjr8_4=l< zUpy{C;BXikAuNr3$}Gn%VJ&ab@j<%Jg|>z@7M-4+f*F9r-=ez53XYbHH;C=(`O2~| znbMHx*t?~|@OFQArx@DDxF&M3Dk!sNi#9Z6vGeG^%hze81eyWBceH8A`_C8ern2QzN#iQ0qx#8{1+;Vr>Z9Ze zl=-(}v}vV89M^UE9ei8Vq~%g$^{sKSpoeu zP}Z|XSv@p6?Wy-5iP ziII^JVl3pOnn#*U&Z18%dts36pw+~gY>;FD&n5oAH#9Vqmz*_I_aP$*#SmcXdJ zX38>|`c~k6G1Zu^w-L=7PU>4r%k)-kpW@?j-MJXX=Md!eikFv{84q8UX!S$6o?_zY zn1;11X`1?%EC*H3-p_WRdrg$obUEWP84NWaMi;P$G5z<6lS2}lXeeFRQ0wglJZl626ScUe1!%0ss>*__9!zI# zrG$ZPw{MOo+V+Mc<`n`g?Ec-=XpQHqtO`mcZ87ayB*`Fm2w=9u!_$u12vN+G`vBW_ex3F*l5}6w4=zfuUVHkv*yHH< z6Y$ZO!-IGV!7K14=}(6r;%IO#4htrZh+2(nu1ZatOo5yg8r6zsrze_Lqt5a6B)nf* zUY>Y9eY+P&^iH`f$UjFY7}i{QW+TX@7#JAp)M+&jR|eBi$R&?IKc2orY)8|}w$a)u z5G1K1qV*<_!E1We7N6T0wL*~JI-hae#Mqe4j)aSWxgTrAz%*RyVZpgG9E4kl>-i2r z{ntxf>)i&u?lFz8RSJ+Uv^tDodc};dyT>VX?CbaJo-#iu+V-8TU3PM_+-zzOZjh4f zf6cX<^qfPtTqUhn)6ZNW5cV)K5_QG3KZ#dENl8gjaru>$My27DP8HUqmWq}jdmNpW z(lOOy7lD(EjKcnr@JSs1{s(s!ty1gMc#gMGD0~V2CTlrqQ79xnSbp-7YLDeP$S#NMf&|nq?xRM(RAZESkfjL=X~y-|So!BfFicxo{J#(B-wzok&2Y8Y7VU-3+|E4{Kk>7iN^ zLd>y&?0bNe&1faTZMVM6t^H`)kHFV%lR;5>nYl1OoN+}3>o)7SeF&*5B+S=NjoYno+q04Q?@wu~urn^{q;WffeHB|`?)Me+| z4|fu_^BM}fHNVjJYx=QN?&0eMC}Msl$(T!qCVuDYJLUmdHrlQEm7t-8ShtwfYK_CQ z-`7hK(cF2KCifmnQA9Yy+>}sk(02Dz=A2mmCfiB9<%j; z9Q2Cj7hIxGAneRPch_`iNvO(7O`kgP=0mTwBjr4Hp(*(`Pz6cL$V1Kvly+~`$;sYG zP-KMYqsRb}S7wa@{sCDxyR;bIV!Q|?{;>3Oyu~pf92420`b8lF10{nI!A7l$Cq>~O z%~_;Cy;XNAEBt_bPI_!)dADWfqv+eoc?2(ci zP<`3j&k9kHl#|BzfGdp=UgPw+yF2IEJVY=sJ~Na_?)6y(1)Kiyg{sxskJLnL5-sXa zo*5Wq>3$}R{c1U!#KB2yXH%CfZ7@F(P#V=hQYu=wfc+HX!xXg+We~F%t9q@;*aX|E z6rO%&1PMK30_K_WCEx8^zPIg&`fl5;?yiqm27DMoC~}%k%PK4{KN!xD6%N7m$eia> z11Gk7kLBVa0!)Sn0my6oZ^WOjg%I%aFf-fjE%k+jz+(=Xe=J@6kRk44U1K@doRwr= zs5?WQqX2SRn;7@OCCtp@|HidE&Ra#jNn|8^qKE2XCRqk z4TI82T8_zsZVT$yIv=8js9&3<9QrXiF!kCTzbANpY}L2^{BYx=+u5PuP}C0`s!b3P zFjeoHF@fw?q>HZ*y_XM2Oi6kV4h}rnW13q#6&?XOr5^%;ZwOw~YeZP*!9?AA%f%%n zNXsyb%T7HVA$TYj?fycKnz=AcRcq}Za&p#YWeIm76{5!zYd(qr@+h!v85tR87Z)8} z8p2NnDJgUFZGj~$3(Hahv$YPi`;hbHFG|p-Vs`iTQoICbd!MDZKKm=<2LUS3{oOUbfr?0(N@j9_iFmcqhCNRj;o zfbBuLUB*}NuJk8;H$zwg$NGP6F8_9OMbFOs=jMv-KeD+3YC%_Jnjbb-YEE);V^|F> z$jtPjY5pAwkIlG^oKso)oF!8~PQ>3g`#_B^;h7`xBxN+*n}rY&Lvvgo(pO7E_XS_0 zmIeeWgf2>?Jfa+`cd-{CbZ6?|>U$@4n@qaWUKfOzor}v)Tr<5FGpt7!TMi9h+-BcN zrF;T5P

Rw`5T^%YRPR+1}m7CLnkf5U{(w zZD?WwdDHTq7||Ow1P&i4M$kltID4eYM#8WGfV5 zY}8Da0`rac*odt(@8E`Q5d;MV5hm-4vI(c3-OH34j2B!$zOrCJWQIU}Xf`Uz!EU;6pn5`V2&Q za0HqKsKZ1W32+^qoq@S2BrHsR|2_*dGbJ1GDy7n!iMqPF?IR%7aB_0O!^5+#<{~4@ zC@GOnA(gSVwiXnGh;A9d86F!0_OH&4j&u(y>guMNno_E&Fx`RZAS^7b6-G=*uFfXiL738yq~wmD_|Qia;3EZaR72(6EXvf))WD zzM`_S#Y<1&ZivVc8lXm!^IGj){B3UQ1-7zqcH(Kp#?9yFJKT_v!ti(j2C58zZJ^=c z;Hs;gf|kXA_kZvxs_SED9IHj?{;&^8xcdA=xyhGz9kW$ov!kP#98Q<#KFf)p-FKmq zf^Bhtg?4@2#LTRXPf<}(SlI8>!;|Dgd~#u7{{US;$AjL^&Zh~maB$=co?&4z&#k`b z7W1XsEcfr6oC8A~*rqKdB?Su|CbF=#6{3~;=_M2Si#{kQ|DEl7IMSXR>`Y9p%XhmM zDPLaDHwA};FgOYx%8&*J2OAj~U2WL9a&mG$e*9?ge#ULTJ40j%mM|zyXJBn)eqcc9 zU~lE5FAi_C(2kOly|2?pNh~a$*L`McsxC8AhyoS*j%Q*bJ_Z?JO|*EyH;)@IbXjI2m1}VIwZNb4*S>LM$U9F2208 z8x#zlJsviUlir!>c7^P$x-pzt)UVrdq?84TkB5bxgN&i5b3<< zSr>C3q!%XDmsU^^5`iSj^7QlsT(MC7U2a1B@GuDiH|gRuE|%{xmZbK z@s3Bi=b4*lQekSv?MSC}Uptz{jx_x~9bHhS@0wJP ze0<#d%xrp%Ut}em?CfZBPZ5p+S5x)NZ{!0kuCP&Mwlo)u*I z`%}=<>jV@D3KI~g_YZ785*H6_r)~VgO!snm2fC~@^L;oSM2N`5ZQ1dO@-tZ25C{lC z1~QbbQ^xn(P)6l%QKfd6_h;fPc7a)$QPBc%abF4F@#(XQs=Bj6SsL+*2PkBjOk8ex{!pE=HO_-dDH`l*jV;8jpS0O;hhp4rzH2S z?KTXWnHdI1`2;{)74X}r&pXeZn=P{y3am-fo4P200Q`KCg-<{rzD6v0=s^)1(!7*N zEf9iL+?ybk$UZFgM3az^;NajqprV4uWgD28DI6dn?t>E(m%`(5#Z*6f$F)3^)dLRR zLZ6_lL}<-rXt=W5Meb@xBPdM%?y_ct;IjDrgi5nIi-$b%$B7AFU*FNuQ87~FUWj25 z+Yphp7yfjNC@Lz>hSW(y!Vi^+>FGLEO6DzIakqCk#`DW5D=+%OJ=01^bnez_mVL=7 zMwfs3)k~gu^JCdvc%lU_5loWfyic!5E_gM~?uDV0cC@y>E*)P37JbJx$~-^(QsuWI zou3ne0t4ezu*WGP@bZg_vMz)DMV*e+mOa7xj5r~iK3J2Y^~bY=2YTnl^SE)avBju7 zcS(AdK0l8fODz@^<0UvzVHO=G-3*T4Ju+-4;?fk3fPfGk8F`WQS8jc?31RscyPqFfW@FL8{ zvS)k;(jOHsq0YIyV{4iR*1l8@TgUL83v$cnD8lqN+mtkNPoFvhTcR#FAk}rMdxB4k zGZS*^Wbs9lUj>~Oz1LNAqeO>%G~a5nej4o%?p(Fp_{)5o2LuF!%0;pfBAO46ZDq+$ z#z#Nfm%{@Ck2c?Wm2mBi5i+ck%zX$dUOC?Jyg0i+dJ)(r1MU^cY{FAkiGYir=#v5D zeJJT+kT-0Y^3?d1k@EEoG%4xsuIj9e1e0Ix(q@-_RBdlL}CPc{H}S`>NE%M@K8F zsTF8G6a+Jxbxo_L%CD`R92mf`da4xTTb`lGoSiz@;nn+!cL`ld)iir^>j9qK%EwjR?W>jVOZV12Bt!m6j~I<$TK|gI zK3&oiO2d7##41fzLSi#3iK9S$)0a@3GXd$aelKJwE2*YY-H2Rk?om?wW@)Jy!Ygp> z9v$eq;kP=Vno`%BD|}T^pfeJ{g>B>$XZ%NGD#YiLk8+ezFRU<$*367 zrzbu+iPkMRay(*GY|m9&v|X884@RaRD6mDpW+;PL@${rYh8haZ$;m_Kr#qjVn0Tw6 z1>0bLVtu*y{@H8~t>@W(8wS`)rWGqr)oCDllc&6&FaEHkhYxH539@-dI9a0iKJ=aE zu*S?Nk8JoQX;nyL;t8a+tf8>RGg8f^2SxSZ{QEl$BT{Sxz*JQ|c}_clYFTN!)ZC<> zPuH*+zqGW~lPA<}ig%C~c4Ow}l^0}Q;rA)mjc;Z|*Xyj3t-{=`5lN>^R-Mw464@(M zBVL0gP7@5ALeldRAC9ed6~M)$_`D-Yt6^T$_W#lL)=`lJ-IpNRK;tfrySuwv<23H> z&^U#=ySuwL?(PMRySq!{&eZptowKvMGrNCnovb`j5s^`GA~W*5xcA0;M{FI?0%T~? z#`cUJ&qmYnC_Jv_9?v7r)0e(`aK374n4eQ)W4<4*8twcN4?gMuXk*YyYjJ6*!94;R zLOmjQC1b4|QJ+yXt6e5xhZda>XsE3Wy?NbxEk3paWV%L%i--)5_q$JE+t|;|%}q^B ztvD7O0b)NOfb2UEO$UO7nV1x5Q$Q;?RW;nZEE_rofrpplLJ)q19Oxpt0VmQ&=wNJY zZAy*^7tyX!RbD=nrjIr)DjU9VKJSXQ*W}4Uric-nQ8pj+S2lQ$^qZN z$F;7cgfEY%-aaQMKqOa*dhyxpVe$lQzhZNkxw4oC`IWNyZL_|bR zF4|aue(rQ0ld0$AHt$QWneex*tsJIIb93|I!9jHFR%3BxTie2k#OJ>7H--qJ%DiBG(-8eOIVGX$hd#T} zbZ=>A=ksiYY_d=OxCDNRejg$2U*~XeUe{^4F(#2hogh6x1Q-I2nB%1#O{CCKQnvg| z`#u4@wozphn?y|5+usig3@k60dUPg_LFM&M#5QQsZn>~`Ev%pkEd9&1-IT{^+2QN+ zd*Brn)7oZwWR&V8ud0aS$X^=gc|qT<5@UShcnt)PVKKQ;7K0X94V|5Q)Y5Ak8`aJ1 zE^CpPkNo-}bxcWh{PR|mF%lqT48&Xo1h_{X96OD2ddMdv%>J@2YPwI>B`j(XZSyoX zc2ZEN#WExR_3)5LVV{vPinFB2)$dj-9|A($CT$kP``RQRxDpfR=H^(b*ygu|lH)U_ zh+MOEHBwVj%nal0Z1Lq7)))2)bw4EtJa=rdF)?xRC~4S} zl$~P7(|HEnYrY?z>*L5cJeJ@B#41030@s=iP!gedLFBl?Ge*mazQV)FNjYJ43Me8t zBfB)q4gzF|j zYxIXPGejBRPbn!bo@{080FRZ%3jo_5J*ot2#(0SFwL$3Q_LPLajFFfYbG5nIl&LxP9u_*tCsi^nOQ4Fpbgb(ijUR-6@= z6}TiS%F3wna3Ci!_g-;5|3TI6ZG29vL<}IjXcD8N5d`ykO-5t)N8<)bB_t(PY=-(6 zxwu+|J?k4Ai)i{mA0W_XYf%_GVt5ar5!3_a;NVc8G)A28t8XaaRE#!duP!VsO#F8K z(+UJ$iHV5~;@=}RdN#Aj`~OT|rk)S(#WN6QW3b=qia1p{@A!CpbUUjV1%Zu&f865O z9Unk23~y$1Y%GW(E@UuYX=&kPSP)(>Ev1|Q2I4i=j15RTSXmXc2p4DCkd(EyVm~FE z!Az)~oSdWw$s*hnZaAz6Z$PKhtPE!Ks%eFkmzPgY%4Nuv<{*I}BIdXqv;|l=xc1+> zfmos-7@L`{e;b5%3VA4HxkvR`i95%yZf*+D)5}G5x9yVW6TQE`@9ph@X2`(cpv59S zBb`fmZEb8sgoyLKg{`eE2#w!J%z&q%qeG7n-$Cob#@FrgVBzM*ad$i7d!PJ1^B9lT zHq+PVUsreC?tJ_k9t4Wh(a~AZAu13^qBn!-!uwC�neAbt;k3(b=@1O#p$8=QY*U z!qPPyt1Wgq-roGRNUR1OE^yr}I&e3lj;A1k{+=p20?S&-!Q>Z5*+$&zM8VSje*oD? zsW`%#swzk}kU+wsYEsUBtEU3Wd=3Pvf*_q>yoebsygfmKDu0}&g zH%d0Q!Exm4!(U<11!e-~ z1m*zd4Emabxq#Jz5rZj#((Yhpp#1-&tibI5k9>2mcTgVj|KciQFg7qwFpmGxO;C^j z|7iUGB1z}^e@NIs3N}?2J0=BCGzMmn)C{Bz1#QM)WB-3qopFI&iT5*(N7zBdrs^x}_th(1E>OPO6!2_4JU} zn8KjAfPgdH{ClGyui?5_F|Rl8qbD=S-KG-u>l;@{e@u$t{toE}S4qE@VjkcI`fD4R zWY(RMYlwOFn0qMm^wCN>V@S?ytbf*m;8xtP&WJjpc9}a_bi2wb9c{eMw>-8->S^gw z!1FQPG2rurRKTvd^M+e!PHp?D;2JuNZlOR}@rKM(#{GpwImq-mAvfVW-oiM(q;>p3 zpkJjJ*Q~G*U2g*!(9_vS*zXi{$^H3vx zgfv-ge6~2{LvHPnxPW+X6!KK2Z{ROQ4Z7TmnKAN|Pr1O;$hZY!bUmH9o`v6ka0pZY^&xj*KQT51bDiIaPv$4>3=Bys!#Qa&YGa3MKNmWz+QnNvx@~ zqCj};q~e(hSpq&@e>jy_BH;&=t3ZrU2Oc_yC58{ZSaimLc!z+90(n3L{%{fmi>kpG zrx?9bnl2rdCPzXx&7f@nJ{$p=Nl5}bag+)q!B0o&+8I3>L^#TSG^$1HKagqSX>&a@ zAY$sN9HbL?57{=iqT^2=#aS)!$D|4*9v6repG@z6vDQY5Wirbxr2P5^J4YVJ%!CLa zq~n#;!2j3>a+{`atL8O?5ha?*2%Sm7iN((3cnc)`T}Thj-jQC|%w~qdVC4~0#T(Wg zGa4r-5BOA~KBC#@BQM)yyd7pu+{%SWpqJVvL1tvYw;BWf%{o)c6q{Q_8{<4s$8fDD z(JF5GLu>(JT&vhUD3TDiNzr11QF53A?+q%LF`(DjJ>M%-$Oh>~1H4o1%W$$#x&jKF z0uvyh_o1YPkxOA4n{dj)LjOSX;Q{hRM$-0~XZI^a;?j3=F)UR};^7XkR7+T-f|vh< zh?acAMbp>U^Ag9kcGmnp63GY!Iysh6<60Ldz_A}-ek=QpOZy86!;&NEClsqX10jTr z6U{TaOfFh&{K1-WRJywQo|vt~Yus|m3IvP;?z)XxCY*_eZ_ z1&HWGF%>#a!$FQPtae>k56OU9@ac4ZQk-dGNa9qu=W_Ua1Va*^gaJImlQAy{iG1gq zWt1C@_jUi({>z=9zZ8*?!Ys$S-)!TL7D%^Y!Ca(3kqN=t2DmHw>0eC4<24|!@(EEs zA3HAl;updI!34CFJ*U8R;5M?Rpvkv>jLgG$Dt`mU!)t?+5ZYz`)`Kev5%_ z%+I+*i$=UTE0+^^Xx8Q%_SeuTFeS}#iDxHa?Z|r+Ycq2FeCPgd>DmHO!Z{h?rN}X= z0#W^bSkk)TK(7R18Ir_k7d*Nyobim{9lQsom_C<-X?7JqGAEnDV7P-atVCVmZ}um6 zI;JsJl3;gkS2N&aHiCS z68ku)n-mGx5<4Q}gzo;NDX6QBg=%}Tf|&lul3CB#w+^;)2tJZ7ie!@DoD??_p3r|F z9KvHVn^{B;a9Y$0$HpvTp`r(Na}d&sm7{q-qy;gN6G`KRa9{C82p8;C1zA^r4AUa)pQ}>pfaiI zzr|_(G$|@wxEpbdh~2HrkIyLN4kSa>DLf8DMo%N5i8)p8 zQlV@$S?GMaI;>c zPr)*9wjmuP*(vXtI5_3pC>RmX1LG)nTp|9*-^^n4n}~#}7On9VLgOhUqtWAvXI2M! z5ck0M$a@Ie26^T(qmB)+m8;DLUjA_Z_934ha`>n#R}Qk60uaofg(qTc=c%bCot;WO z6maP~^QUnSyaRi7dWICn@zSjhk0`>Q=enSr{R6ueJzN}bV8xyOU3#Iu|LB$)N?W-C z`xlDb#6!Yjc#jbc$2PzzLWh14_FTeT*h^Xpe=91Qt(Xo$ZV2_yumh5snS_blP<(K2 zp^aPGLG*%)UP0Al?M83t9x6G23oR<_nskqfmEs8BQq8+g*=r+8I+rq02t8Z_cp0|R40A$W(tkY|apuEMo!sK1t-MBbJ+jB%*c9Vtt>>XEp^)U+vzvz4 zahd>@7uf7++UaayyG})4Q=*29#9pA6>sW_50cgETN;l+}(S#)_qpAbebYF+9yP zp%a*7DsiU|vX6-1bh;jzv=MD|s@T_BbXkl})<45g@S*(Ne3*11@!uC4=7fgA^^hfC z9^zJPgpuJRn`G?o?{{U>B7Z|2pN~#k75$tak1nSkPu;KUS`~Y{I{{waPv>`+!}CI2 zI3w3#Wz*XT;0Et|QGOrTJ&t=ndb_uDKQCH7Kg#@Y@VDoCZp*&j>l|`FPZ#}=OFkZs zjcIJLhCXkfuP%??J1^th6@6VBH#c(hdT<2^W@MP$oV3>TsRm*0cYsryM_gQ4QJ0ss z@XA+#CXNO)W?~N^NTrOw!@Y|cQ@bOj0=Q6KTe9wkCfrrEw7u)MZ`XT zq7zew$?!uW4q`rs(TVpfRb;|O0*m<&wRyEwmvoYqT-Qc{PX5(T4+jC%QT2mM9VUSR zPtC4vErx$(NS<-^TT_ot&W?4;sfP?fCyd@Y4CYT-&(3?fo}UQsS}tuZ{`Mc-GdVae zRp2hY>s`EQ#qS0i&B;{aQqJn+J%TUMqwd;KXxAy!jZI&j}JJ2sN>6IuXi*+)R`*)9TejbjcfuRkU5b%R&liL zV$r{ke{pGW+x|+Sm22?d;>&szaDDgU_R6}g{dG5$Uqw%w2a6|{2COFEIagO9g^1Qu7Q86FIj1({6-=KH&IDKlr9Z5npc^(!titp z_jc7NrY^I1SvL z;y%C)gV_INnEgHaA|OHFWQ?$y{dwIc6Elz9axDNOjyFs`-qBhjK}iUy%@GJcs=ALS=jU-H%)!ECNu;{5-4qwR_-gi@wB_aXpGb98%luMNtYDcTpSHL%BfXfe z=t%(>f{dDEMXIl5B$&$juWx=A6kB`%a^|7Mh;GI2%%*fTTQH&2WM2){0*2u-GwRTx zlNF4OY-X-86ZRbfhTM3ZLb;oQ&IH7T+`Xe&RzFdbGW5-2v#V?Qcg$L)z{T^ZHcWz} zf#&T(gRn*;HZ$ptynd#l({f6UJ$ptGI)Iwl2G;6dpu$LXoWpfYW^I%kv z{0DUNnH^`(@warLU+1UBAI+uU^t+EM5Fq>pshPXHoqQh#g>~QWFQ!g`ik&~k!s`qB zip|}Hv~ZWYscM*PQN9;ZqHm(6Gq>&EqYh8x9z=lOaY9Y zNA7Z^R7m*~xcN>$FP}HqVb`rb zTC_lJlT^3oRzenu-0d13&A8+Nb?5if7gdQ{W^J;!tRwWn%~Q8LNBeYVEY!Sfen>`Q|NKhx;4xb(-=nP5?lI{Ddd*b!@4Wor891R z7lLZu%~{unx)wAJ&i@#= zH@!CZyETMMVYaH|K7txOucsc}-q4?N#8Grm+m3sZ(0{3KE;qh;x*2rOnYC_rzbOy} zi|b;UrmPcE)hKQu^_d=dZ697mAOIQf1CA9~Z_cabKzblISZ4^fzWUUOs+GIZc=#x44IazOh`1}UMYCC<$ha zTvb+wU|Qg3DQ9<@^xh}pHeNwgXim26zyOA$dEG3|Z#6EMegbjbY8PlG&IRLhL|&@- z1~PB8h&-Hqs$34uwBxZ2l;8mU@8gACNL^JE=jl4tF0(OuUKy3i!%xY7h7A^b@S2!- z?`y3Lu$0`P1<@>wY6otG9~gmd_2x#~mo(Eu6Ha)H9bB5vaPF!-e$cT|xp0&=cnW%e zjJuRW{%Vb(K(k||nV&xA6X#?OwXSk{0DwLp1J#~Y|9h%9|0!`-X_x*AJdiK<(Q(N2 z((IKd#q1LMB9CG1oVu&>1L97vKVg79*yb<4Qa7+r+F63u#cLwDg|W=EdkCYFf;I(a+K@lYkKKc zjtz38U5PIu$2c1XK4c7jyWRRqbrxGR5(oGTYT~PH!AAVW(_W^^6VOy}nqwSQOYo{H z{wpD3W0%2x+mkW&Bdk!hd&GX=QzhJ)^e%kg_$o$MHA?r%WN|$~QF}Q%1@Y@4X^Lu) z>4*fYJwR_QicxAZ!j0#j<%yiyTF(=XvXt1T?-n!N^tiwt90#khUubrRiQ5%wKZWQ$YU?yw2!(P^?A4bb!nOZ^$<(*yJ3z+AhL_& z?qWh_y4*%8NC;CG1A+*Y8PBj65J=40pzTdcf2QO1sXCHcb^qicw`iAriaK2~# z^(z&FNO?7LR6hIPD#etQ@LK5;F(UPkM-z{sqs=hT?}b)cCn3aVQp=iB@Y+O)tvJk} z*ygzMR98JyIh}tnwU`pMn19YLv-xXm`p-t0^idAbU{Ndh&T`&~s~6zaW_6W36Y%iR z98Y}`T2X5sD9)boYo6ap(doG`x<<_b$a~ffF1E4iw)QB`w%_{R2z^k%Bngs>Fq7X# zX0gMv28(uYFh6;E>LPgo?g!S>E@`nu@KanGLN2b2FV|||T`Sd*rmVJdbR;|DC`(6(`k~CkKitGRJ`XR? zq9t&wrlGIE^IxThx&JBMm_k<7N6xZ^?%J>IlH0wz-3!&fiWqNbOIRF^Q9FG2ZjcQG z-rnctG{h-hS627BZvAs@Ey_N>GtMA6lPF7Pz$wzm-P3hmxF~1KF=k-vI-daIlQN~^ z7V0}kt}2;_d+u840elXYv%_22B)hk8rrt7oF2WebyDVPxGRv3K_dc}W5ScU;&>xH)+Fx9kmaqqf14Op*-K0rrpg)Yw+8imxMD(p=4 zlx0QC=p>GRkD|RzUoQYYG*9Ih=sO*>I(-Xm4gEOoyLMCO0)wp>f;43EC{Hyih1AI^ zBIcILj{(mIJj>fet}yebIYGnj^9BJzotrq;v>ITBW*x_{_4p7xbXiAY^!?xuN7;7y zm!4?a>%cfy7IdoaC5$Q(bZ0IhIe_ay5+F&1=^6OrVqW*RO0ofSZ^2I9z2|)9txO#q zh4ZWt<1=s+y!cSd9uND%f%@55NJ!30;9%({cv`^h`T}yXliqs2x}v>Kd_oGHyfr;D zNnc9#Z+vD_PXTTFVTSVDQy9hFw=AT>+tcsk45dd0zbvHxQ|w#2+j09kmKDYRVF&Z# zAAW$%<~$e0I*!ZgJg0c0qo!Y{MB%(z(tAmq=a}%mr%Qe+k;<-7jFqx1;-Ll**qi+M zc(;x;*N;EF&SXe$MTB+XkRJW}Kb<4+(-2PnSN=bSh3I(+vTpyr!owm>fX-!DJZI{m zC~0|6w=PkE3!qe|q9CKNiz?M2P_03cU{@QyQKwNtPhGoMh30@S+vP}odJMf*!@XQ} zurEE=r7Z90OQ^(HK+(;fJXu2GtG6qGA3LW=`!?rDajv&OCqAxAGDrJ-t3_EQFqah4 zF59HiKkfA0PZ#w}um>a%z1X`#wod)a`eIcqdFcOXTKoOu-{s5Z>UBZC#R|mJacArD z@y9(b6_H;9ske~1GX_Hb2)#qBOu|34?NZa$BmBm>KV;vY_B9W+Rrc;sSD`L75}Ft( z>8iWhu8X*7ntxiq@hf_`m+^VLSlM)WH@GjhR($|iV+D=?^3QGt&rdY~Ks`?z_X^<# zAwQpoh(UYyQ}$!??-wZaw?XA!Il#(Wa7vq^ESv=NIM!vpM13yVqe88>;S@U<=od|l z&Ze-{UxKwmjMrg}o%1jEuvB^K#%XTIqlSfi`F3@G;l|zB*3<6UI(%ZMz*+bA@ATEyK7G=DRnp}< z@gLoU+uO-D<|WJ#%3NqlMp*K#_9PD>zr8`+q@FKEhOeWQ!PQ;hhXJgXPf3CWq>x`Z z;COROF?b9nzdC$vbfW@RyNW%@hBksrh@yMUf@}9D>ztfPdrsP;UM*wztfY@!@BNmxADnsl>`>ygF5R6*3JQ%%@gfvBu6tOXnAAc&}RthOjoF zh9GpJtAo%+#lzHlqjlIdCzpk$@|i$V{}kgjcG#WbFYcH3Bq>$%6!e@yaYpZ z7O#$Wb#ZMoo<>9ct}^r-Cp@5;xP?CP zH|SeTI)`Z0sya@a9@ZSD)qysCq<8DNk%a>&rlF;VVPAK=21P}e&SR=1XX3IZw`9k^ z_O+Jvv$ZUGpL?X1e#Wf+*m@fC>~_@ia#cv1YObuSy9&KDIQg0_&b%acP?`otIq{r` zp-Ki31ZwwbHH&G64{*S)?sD;26E$p^Y5h1CKaR3jbd(_v8`cnN(0>5--<(M;!Dj$! zOROIk8Uxxpp4MBceH!B!=LDaRFGtVPF7mM-otV3z6CZ+5H7@ftcp3qD%j4(M9*XCy zm#emL8KoH^TVD(1W78LbkA7kUz^n_R*{Z)H2b(uMjLs;G8D^lP_SWo!yx-doWjhh_ zB9(@#eQKX%sHrEXWC!K@*Yy8UOz z<8pARlitPzdeuU}3wTU$a42t~we6D1koC7g|NDg1R6g%SCRfe}UWQk!K}IkAH9zq6 zZ=>xhAoZbey%PF9$6{3%wNsG9u-n^f%cAf01=$V2)--X!5v{d&zP^VjQ<(N#Ak zPHYV5+LMi|!M0EPrLVQrYI|tguh#w9DtNGz>AxW&iVxLVK|-BXVVe^# z&PAVngj;vCJMzgj2J?Gw=^w1+F01Ubn{^KT8{bWju`~WND@3L-N!S3orF?%WTRJ?F zPOOj?R9TVqCu)zSvl(84i!r z2lOzEh`H|4Y*SY~V@QsR%BZh?lRpw@+e6Gd!`r0QXTcHS#6-3fX_j8M9M825f%p;n z%1&NLj8`)YRmHM}7i7g@&xuh^Qh)x80o4G5mANRgJAsZLZ%Z6DXpeZ{AwB5kLzzAtt%rhEyNOjPXn@&TS3sr&Xr<3a%pDKh5{qZYX0m z+76}UMgecSGR)Pt-ZJ@7M-IOOo7o=J@0r;?*ymxqwJQ|&N4%f?1Tixp;2%^%5WutS zIoXyU5iv(B;4%adnTqFLuQ~f&LMFCk#-v_^4qOPvBuE0ba0P6r(ofVrchB zrF6yOUj+AYXknNFm@2zGiK+3KYkf+ELkS$o7MfWJ0XZ+?J60`(;i!pnhX{wLv4884 zCP)#%FT|q?_^1Cdo8ITA#gcDp2Y$cBI1`g;JP2nRQ#FbPGvbDf2Ft)Flj-hTQ~v&> zoZG*NBuV)ThR!ARcFYNhk`%-7h@OJ)G41M`;UIdO#tD{m$!ekaM}KXI

#*k9Meq zz|Yv}IYg^7uy4_5t|<6aE0I`ziH95u1$>sH{k|ckNsh72vmH4_rY*}n6L&Z?FmITp z!_qt_D1@;d1z}KPg`B+dWUJ;Y^{}R={uo9Wk{I8@5-HF8p^XqYVwVcV8%xjGb4qYk zSXTo#!V~v7mQdG34Q%-71Ia>Fm^XGzt(4=*dEwv)MwVUKz}YwN@|JAB;iWuaP?=pJNh?rkarU(J{O9TZqA1Hs92BN!Brrp$H?(_C=QE z3_?br7{h|0s#wma+U{8$2Z#vREzbFPXme%5YEVUk5~VoIFk*<3G^v)4H6>> zaf8Dm*J7!+v;v$2fN@!|;>t$87Uc|P&qA#6$i?_c*fEo^=9(y>)fTA5)~?T&Zua?B z2YHgxDh;g6rNNdHOwawDRTD`SmuVnsvSP=sDa8zFSXl`y|@LivlQn-TQW1>lmyKp}-TD^vaE z2uD7hl>X&ZZ*^t3}CLL z|48}1U#if0aTZ5sg*Vc4l2g17;aSO4K3YY|g#^Ep&Q>dph|t8$w%8Czev*NPb8BrR zhFPvTzdes!R1W)?s?7S#hB2FsqV>%$LYU9vkmQK+vMIWQqsX9tM$bAFEznZ@54?t! z4s$HlwCw!1xXJL1a3!d~;snzQEGYj%_$SdoWfFCN)IuCjk{28?;{N{B&=E2mA)|Wn zaQqqgK^o{Hx@6HGnCFNspqKiCQ7+_9e;@)2{FDlO!aQWMGEO}11ziqmh|f>5v7 z4TaX-Z^OnjLMKvzTpo~Z;Qnx)5U}9{Eh3GaKO`j@Ilmbpm}%ly8rlU!YOhrvhVNFI z&qB9HCiO7F?!PO3D+z+xIT(LqhyY{zK0$%*ZxO`2@U1QQ2R9-s?S}uFa5HN#NMcUs zbVkKM6IP&>&KlySrIY`WN^j&NOl1YpPjZ+@g@O=Dx(1dD>ooffN(Mu+QaBld@Gd(b z1StUte-IKw85$y5NjxkY(TF%>ki}Lap&42OIEMP^8xLx~9bfY{JWYhJ7!<`a$`mo4 zJS2g>udPc~@P@KHZfLg?Opp~usYt9VoHU0e(z2L{^k-riGE&9iP$vZj)abWiL-`Zp z!Ll2iQcr)uYv`bSX=J1=ab@T}u)o9+hV{fS7?3>zR0Wg>{$;_83rAWy>3VFMQY`la zx7Ojb+Qf3u=wa~oS_t!Q<@zMPh#Jb!kP29QSYk?Rw=5EBkSg?O6!jz+9Q$J7;U@4`fzoXdggX`ms}3 zfi|>F&FYKRBr6eXoS?zMA#(&46`JppUWz^eTdYM6E1o{=9QobIG6SK2z0eS25XGsq zj5}0b2{n==@_0!9Peofm6kXsyS}_F_4!znPeuIh`=~^5^bsUUgFZmOe&$3@qIV6u} z38`j)*>5GzsKU;l(S;TXMN{=6R)fsYl@8o>zo6P>e#}#n!PGX}# zaozve{gXN@!dS^{rxXPD6SIjDs+N8(7-hMTI>_la_Ny?T#bmHCj@@7tFG5HGDDIcz zx8TA*yWg575eU9P(wIO__Hj_V6-3svqm`-=M3EeshKG};K+R%M}DX9)uYPzU+RuejTj(dO833@?10! zc$vBjEspE`TH$~BR4dZ_51;!UwyRvWS2SIVeU41{d28&~sQEF|yDs$FvVEEPaX#F! zYMb?02ijxVRNyj10vKnN{S;iMjMn`lPwEPT*}KvO6sn7yo~|&l)atR;sfaxXj-F5X z`ZQ`&Fto*?dwf#E-4an*-tA2*Ct3?N$mCIqkTR8L5gLsOg+cG$z{jz_M>f35PmQI` z^7Gy5{Zp=^YrdVT?H}U;@wEHJKCRD;GAaNWYnAaW@e+l5Id$sAv?)i0iXPhtAKp4Fp%Tp_oXGF4ci&?!Cxsj=FwTJ8b;FBwtslnQ z4tLQta{ABY3pjU}S!&>aRvyDF$y?|RnyVJZdUHRfSHE&U{?VW2zi~UJF%dB>T_@F7 z!&hUwA=MK6NOdJ2<*>b%r9GF6wb8^VY1|54t(TcWIG9FQzoFZ!i|$1yTVM2_5WxQ> zAv9pBXZEWJ(A=d-Zg*atSDn_PX^`8mIelXIH!*Bh3t5%ve$zaQ+)O~OySVXN_~@6U z%PD;$>mUy_oYDSF=kpH|(x~Y1p%H$Jpg&wBd4G(g^}-gC#2BN*2E8BOXC@lX6N=7X;@*pf9d|nZF3gOSnh~irE00;t>1=KO z0`dqc2Hw_|YR~NdkpKvhl?{?&zUWA_RW-z#%BL4kNHSSOtEUc3-crlw*zB(QXajymetX5>nnIxY7|axb>dth9~MRybh?4O+WBu)_Qy`_6TSzTbBc+$E{Tnzr z=)Yda^l!aO@5+G7flFt{Zf35$*c+puOr#8WC9&);pVrS=RU8&1KdPyk)^9G(u=4p_ z-K$?M$#En`J+DwXoBXHHCj@T_%5b9zPd>vy-p>Cwkqy(U2Jpc%c>U9rh{Rn(?PX_1 zPU?GUG>$4dO**Tsx!S{xw8$vRQnE}*RV>cu^HsYv@1&H@VuNq$!9h`+HP6wgPRrCN zkvXsJiSf$JaD4$C-NZsL;wDtJMOQja7~-bb>Qk;nb`Rh1uhUy3nkY6v>EHVd8PCZu zgH3i4cQer|YuNTI<~rPNS7a(v^lm?m7G@pAvE#VUkGv$byIHN~dW+?c;fC#x`}g_L zuT-$D!$CGa`G1@c>k7=mh>Pp2i|%aiWkr|D7!*=lHqHrI`S zv|WNaM28$TM()QZ)92l3(suW=GkVafTu5`lXWF*kJpZM^)EBPt(_cIYXS(wQ@{qr86V^j&{{H3#s}vRn_BL2oG+ z^2tV^MzdgAen6?o!=SOKVubP80*HL0VwHa>`_r|KEf+qdMST>$47&aCZSK$$KDlMA z)V72rtc5hptkZvrV!5-t^R3jrX|uu#lJ zAn^76_(bFN=~|_EYQNyj7!Rx;lJ+ zh3J{Kxxh1J!d7J@y8C)Rx?jr3Y5D5Pu^-ItoScIawwSnVFj}Y9O$DZ$2$b_GCsng`uW2iMnQzi(3czq#QS%j*V@s>rxP5#! zk~0ByYjZXpc0+@8xx4OIIFBb*+i7*2wcK0^u`&#p81<=!JLvhAfS!#jjJUlNj#@}n z*sbCoJs$H0u~Wdg3Dn!ADW_f=PKV{9A=_{@rhmqb9qrmwMt}~~`B1vT-AKzeu~0_* zGdy3R*f|q}TZXKI{XqigiZB2chJ8cWp^JqXGA)Abgm8nm_LF6<3*vHqFF;92T1f^<3sjoRA0vp@N`q?$7)nK@#euKKr=;_w;qli4w6ARrAs!j! z(CH+#<{hq&Vc$l*Pj|lzsH64*?H$ldH^&-9fV;0?5uV%IS z^^c2pm;qpJBEoxb2jU7^r!D^sEDrl<2pZ5DHmdy3SVv|i&==8l_;qZ|DPnF_gDd9o8 z887tt1kK6>A;xjF!tys>15oC;oFWYGA%>|La^%)n##*?GxN@$*lnb-A!ujGue)d-l zTLC@dvOuQU{EO=$j0Sas-0L6APd`D9#ddH0l|`r3Lm;eJ0y0`7Qg*_Eq?;vIF94Af$G49?MzGiNL|NkCvN2XVkZ&Bg0(EAGW)G@+#}3W zf!GK)1e+vh*X-+u&B;{u{q90Nhv(OvDX%V+$s~S9I*-fYq*3?dU!rcPFrO_!&enWy z6vbhF?F(;@Q@KBm&G2K2EM;rD;2xd6A%Ne5rJ*4iQ$T;sqB*awxWS}WdwQ|+A*Aoa8lPX&r*6}6Ua8|`9(+xj zI}!6aIHk;M5I1cuYm6u2H9wv&5bS9YHxX5!eK7NB#zgj{V)3tM^GeT|``a(G-0sT8 zFxd>bw4cwreEH>ELdBH>Z+U{+etvIn<1a-?596yc&Q6}UN2ZPqyzM-vS$Wyfwy8p? z8V19S;4&LNt*iRCUuUlnFf*A{r2{>9 z203Rs*R@zxi{LE{8|-A)C^{We_wCUP*qGRuG?8>r58t75by#K6Tb7>HXviWT|)Q%sTB|f zp8GmxR03Joqs#eD*fOrLW~t`tB{5RbtsXicU+qPn;{7K_p)4+3Jxuf})EYd!Y@!EK z*EfBRGle8Mm!-8_AbjO`2*4WwrOwdlYDD%MOsQ4B^l}eMB?ty4n?UUuQO*lXwv8J< zYu9y(r60x#OvUEM)|@KQrRj z`Rssw$LgqIjKG>WEpw2oNxZ6uS6iG9Q%89;jRsJgUX}p@KC7|r`5CB=;<6UtMZMiU zZy!woPJjj>KuoXj4(<9AGi2Jto?5N*UGI+%JT4hyGL#(v^eM93^naytZ%5K=ind>m59Z4-gVNDS z+cWwG{c8O6DMN>VGVKk{9!T;C;GIHqo3D4u7&VI`)F}H6mhg*5fwnqn==;(w&$G*H z^Kem(Afep@%j-Q$$|>-evZ`Dv_d!~F1!C+&5 zL-c4?gX$!;YD%J-vG?3da)D%@H{sMa!O_HgdUP_EiRiO>SvPX{u5;-8F#M5iwbX;V zS7O6GN$15UTj<3@iKUXIi`nOyfiL}xtssF|Y)z%9q8+eSf%V!*PNm4X=wzDpJ9~Ri zpsdeBaB(!EjOkeM1M;@4onYh1wklreVew^NY<)si)aP;GMb15{1+%eC&|J}n#|!j? z9_Qt7jQ8Av{@N^poQlW`WJ22_XlF5QKP9&{LT9~tLW z0*TZzP+cvXMUQxOTh`JMkhW3qzM~(ropU^A7fi;%vA$fcNrk`N?gi!l}5#&i6eR@SOXJEPd#2a&2Ch5bRh_>UPaF{0hf*ghk=lG2Q~ z8E2^k*EKU^hypIp7yYXluKRKN@ z);2+QVKijYcDNwbt3nm>i}KWE2HZAn6j{2GfMe#L8imzQde`#4=*7U3?xbpZ$WW$Y z`2lqb-SS{{fn5kmf4M%6=C1-BsDb4@<1EVh%JVYNThG|GZO=2dZTrsq-rVHgKVQDwWT#Ej)6>;; zn)bAN@3r5T9t4&k%eVkl6V#y< zyum}_5>iYBb8NLoEq#JL-nxuPR%HgOB-Q8xtz%+R){ejyw7CeVLw&TH!n-Qp)0e!|!CoG^HG~>KHcBF?yt>cS&DB=n!#d zp;cOECa#L`u}S8-gIZfzZLQY|6_8Ycid3$%29aKR)))=*i$-BID-nfYRwu_Tt4<#- z)Wzb`IQO{nYr3-@$JRE%+#q)0Z963KRg|5yW_}E8*WNB|b+FxTrIhyog~(2F!BD}m zW#P{U!|Uc?ZY1r{_y+GzKQASbRQAaqsHYI)r^)mc_q&aQXfxF~7Q8@kShq~7O}e<3 z&u?5g_TH=pgx z*5eJ%ejZ1@xt!O~ll8NeMX8IVP0jXz(ODk{%wy)*F7b@*O-m zDvXbis5pl0Ex&@#)x0l!si!+JL&pF&zi2#tIFD_Ztg_5gW;Zq zXG|vFd9Q$nb3FT0eZH-J8+VY*&}2*~M_s~P+#pe6$Dd7cMGQX|&T)ew!JnIk;ATPC zIPTD9i%%{=pkyT{+xF0^9ejBFYC4V@iPbuF@rCvo1|T1d3OiJwNGdDyZN9#eIAI&x z!9G(u)=CnbIdxeBYNo1O&SKt*2a}(p53j66J7M-5Oc4N4H zlYF^sn>1dfl%<*2IUU-ublb#>{5k4qH|A-jJZP82bmC^h z<4d@U&WN`9U@BoTo-!YM9j3=j`!vx8NpgUd86joBRO>V5QOY;u<+{#UWb8KCH#3vf zW?^#Zo*p`Cq4QDYX;A!(lzC@>@I3%G-`AabMGP7d?H+2KFLi5Sr<-s{$UJ~Z7(Z*6g01VGSwoR=~R-jb8F*&y<#oYb<; zP#+I{CmV^ktCLIf?nn0^y2^gZfl|x_j@qBcBp4?LFBuQU&we7N6gCZ`@x#iKo&=2!j74&C>!uGy1aRxKQIp3Yy!MSP#( zok~xp@|8jOUOt7|@gevZaPQTWcG7Ca+7^ASgY&nr1}inm-gWJ60S-p*%G0&4C=pkeihq`9@J z|Cywo#W5bEbDsCYvuJ>5s%23?dkpaFSlf=Mygu?B6Pw@#%9LrpOj1bWX%+qZ%ke36 zFo^2;(Xrly{3@)$9A!bdN2xUMrzp6T!@K(q+_6&r(pPk8R6}AN?$h4lBC8Vpi>=$( z*&R}JR(@);DlL6?xVrlFVe5t=0;zVkoczckB!}qr9G$%O{k6F0vE?KzO6p8baewN) zm6II!1%Im~Tuh@VlGBA8p@cK8Zru42rCF6c}~o)V%OO^W=vlD1?|aTgrq z!H;f?hwFcg|6u&BoA-ZeCI3$WAj~Wr|4RS}I~)6d3ugVlv-tI#a7JVNdMiHQ{X1UX-yU-z>#amS40 z2jKgUJXSd^!5-{tmkUGQ{$PQp@gRBJ_7Et7I5mG&x8hMy!Ws z-&G!m?NcKelo7{TT9l!dT6&nr_D~5E^TCqgS}cd=g@q?$XS>;VQo+-a7wqou^C;cUE|`PZ*Mx8k)}dKgfzOPc89Qt ze3C0*QY6wR6N1U_h~-oHsD@!M>wp`P^Nu=%W2&6&<<-x9i8X;_5)Z6OVMQ{CS972Z zh11TpdPo$CzZC#SvY=eV<5(c261|0g3anAixe#bZ8iU`0Vqn&YM+!@!-q02k9-VRR zX3tA07?j7e^?@jbXn{_^mveR%f;h2sv?_=3Afm!H=ZyS|HI7frn}L z!7^#^`6dv*M}k|;QX;1GKT9U*YkM?w02x7Rz|ol3dTC=Scu}5Waa8rKNc7{dCVJaY zO#h|XJt|fy=;IxYd$>$zND^z?5P%pWkD0E{EK+P^iN$uxQ>Taw4m>X zc@2xAN=_AM^3UWW?UdqpWIu>y$c7`}XID!w5`c0Oq9TER9y>eIQK*q*4uu6wE7cEeI3hoQYA@-RM0{X_;SU`&SRq<#e~1S%bW16Gerpzj>ALQy^G5O1iT zA}P(05HRsUN`>!#=I!%U_Yyb=GY!HVk}nhzxlPdm(8vBb3Jw5d958_3 zhB}0_#1X_qSUeIB_ zfp=CD+)!`mA%#LT#9U4h$?5q&DI$Xk;{Y8T*kG|jiK36WN@Q>x4#S9ue+&Let?XN) zAP^Nu;LiIH)evh(G7$_Y!*t$KMpuVtm>H5>z@;oF@sG6+%0)5_^GGRG29iK96A~CA2PBy0O@U*YYHwTLPhL%}Cv?KLLxvOO! zB?jrr0%{k#48{K+;+UiajUZ65iNB?Q92}%7um+_LKnV~D5!7LZfY^fZpcOKwuyBtu z&QPc~rF<0Zn^FOq!JU8h98#l?RPsW&H|2RT;v3r|oP>ToM4t&D2k63{4H*q#U&a~G zE;eIW@hQ}JJ+Y}eV>}sh8|_f9M0m^;k`91%pE05tgUuas4LKDj6c&;0QRyL8S?O}2ei(Tkw!{V@<~kccH5QyfSGQvFnO21n2&pgbZmLJ?2{v_((htLjC7 zYZUQZG~kFwxA2F*IYz&&QDCUmr2r%^LIISh&S3rm70@yXmjQqXVIrssERLZ&lyWIS z7JwZ#Is?iu4nt2_e4@}J35gc-kPs>>1DysdC?cQ17!)g^B_p_5mV}5VS7Iu15?c0k zAVqjnziSEoE3+Xn1(ei86NwRPmKFNvPc`-D+@EBh%aimUQWH%$Jnftg0iO&dud1hl z`rH5!R62H;|70v3P)( z{1O5}Kz#qA4G?dDb}~4Hl(+y}*WqRHh&s2iLIHJx6Gv<;zLeo%-~{ zNlpQIK;D1`1agGg4{!g_2;!zz#G59;v}Np3PLH(>?j0l}n{ER1TO;t`OWdP1i4jmD z4(Y(jW8-1A^6{vwauS3|F@YS;F~J6LkFbg`5k=r-ImCgYAX2P=EF{7BGbpgcBnQMw z5b^1=0X!kUV`E{*|A~ymncK%bR06>$Ps-&sHVYf3E5t5K;NJs)Y*Boguw|R*32H@P7Pm$gACr{SEtbS>wCTpEcH4 z8I?78<-2MfMz0#Sl-9X)l(|yPqx8&1MZ|8_lBW^Ia#%{f^B~I_#3=R~*Tyo_ z-o0k~?ZBL6gi)-eu!fsuWM6u%%{H29`e9+6R{hDWdNfULmpj9Lgl~nHO9~}ZSI<6e zlDV`xUN_7BF=*=Lis7rRvspFXr`Fg!TY`WWk=~|5DHU;*3BGSsw`-Irvk+%0pnEgx zn$>EZ1SI16x>Y~!ecmIQ^?1-VBV;mUkRLfvgYgV){MPe*$Cfg0cXb-0RP?A+0p)edBQHgCPV^pxvl!= zlk(7mLF)ba{J_F)`m#D?F=)V+yO=`SU3-5%kUVOUuCo2KhI;Y*YWO`^}h*Hn_gs>KjXf=dB|e6w`v1OypjwiDe`qiJpMF+MA3H*gTL70-9EKul62O+ zAejyOd9MqFIeULMY~;g-Qv*<%EZ{GMJSh*Zn&n%v?e1;ofXU)@`do@_*TT1T)k5$J zS>-CcGFE>%xNuURT_XJ_^szJ37}Yar5s#pWd%|EVS9ceXk+G|A!dZEnt>P{hzytp> zX&)*)Yk$h=3U-XsG_i>?YgUbAD2r$$TSo^ol&LX^RB~1oTPV#X$0pp0H!2=#no}Pb zlsXS;h31oGO$G@Xv?5DhhX_K7I$&d;C9<=v?U(?(%3uH@>d)yVgH-l>7_F4!w4+>M zPBo8hw|{wV>Xm+!qM&8wYE9$8gST8OL-b*GSR66YGD$1mS#Jzk&>7W-fz+%?iB>3* zSC|`iwd9H>9q7b7xHA<^g2K-H{(cm7H9b6>KslRD8qDVOT`X;k80T5pzlwvIg>2-M zSohY`$}+E&h#E|{a*~Bg|2KK-Z1cKJniKE1^Ez==8Y#$nA(~l3aCb}xFG?eVm(#*L zuj=VTt>|=|Ouo=6b*_TCT={ad5uH8qIkB4KF)nNAxvo8Z@rCH9h^yBqit^fhb+s8| zTeJ_H&`fAT_}czbYs&4wDK9r>R{A|Rrc@$-Su1fWLTh=BUrYMxC%c($=!i(rR83~Q z=|(FyiFPzoTVIqx?T;vsVa^(EqvYtI<%+fYEF`{6E@W+W^ip*@6CILkCg&a7#FV@g z+7nmbXN zdc+=u1JA6rMaE>vlpZQ&%a$8^z0F}I>MR|5BdH2xp^NJ1jA|TXM_t^+jH-rgN;C7- zg%p@kNc={bE~&n5=2oDFz16xJtz|m*u%1mzmUK;K9rL3!bPW=Rn`pCU)aHD-YF(SQ zPj+!izJ{60sAlZn_SBx{IyY9`tP*RvO-FD4W{KTR9iZpU63{49*_K{SpG(cC;16EK zEC=?HaqgdKsLeDhI?M_bZD(D#sbXAsX&omEm~I;l9Gk{o4V|}T?abixE!K2I{5&=k zQmA9wLp7^zee(r50r43Isb%jS)uEhDhIRHFx5g$QV>fl&EGg*G!47V>fX+b!Z8WiQ zsKE9gA7d&cZPzq{29b#w9gOOPJG)9;g%UjLRwlj}`2PO_upEwsW5CBQm*pGTF$Tq*3aDFGv(<-nzH?4ud9p=q8^( z9*=!7LRApkn@zs0G-s3~y>Srx+s3!cEJn>ccH4FyN`x`jb=+d0>?42J=>~z=7k({J z&3e9Of<3Qmvsx`rw~B5jw-&5N5U>cpL1;=!NP)R>hfWFd?KJ4}nEvAGV-Wn9g5Yn? zK`%2oWCb44hF=x?4gb^^(FsVMXWs&!iE6>%+0|~}*XDmrb^`&!7YH15y8vPn5@xd+ zMlcG!fBQE(t%p+vKC8N>cICFWlC}u5bMM}=6|dzY@3mh~44nMPqZ@n0GrfOvP&>O% z(%-=JX)CFBuev(Vo8Noldp9`vh7f4`37q_`JA?QmC~5PXuwm;7eEb8SF6;0KKH)0r z>8W~MzGd?8D*sYCqfnajha>lL)%QsSqx$QQp~Nv1AyVK>YMhtIFx4S&wkDx1c!W2J6b9x9cIa$s)9k^Y7gMG!N|g znQ4z5=ipJy#g1y+J=rO^3t0|`5r#l8_jdAflIQe5N)xNqF?{N-=hI#Vm|qRKdPmpLpxR?VP-xNj?RxPW=M+# zT5I3-I^B{iwZFs>Y^I(edAWdAcz*wU-krTaWtt@i{Oi$khIz^^p9f#dU-fcP=yGJX zR0>Ky4LO@JJCMzjd!H`q$%bRKcS>oKZQaPb;~z2@|CXi4Q+rhuMD^EsE?b@&bXyf> zQw47G?LX#SKzYl#nV&n@T#U0n2>Zh9QqPJrRlWBWHq9-!R0y*0d|ekcTJG7)i_`XB zYkIbK7w}r8rL1$lCj0lRMi9hd4!f^!i{R($^7U}Lczhe+6P^rT^Ywhv?@&XHdvOM3 zWrOHdFN!loyT4`m0X#|lx1n47a^FF?bcY0PlLXF2gMjwIQy}Tp(dX6P<@+q@;mNhv z*U?H4smKl8q$c@R*0#WZev|nd?6D_L#@vgwOHYTMG|@S@oZO(vHL81+Hul)|E@4gL zB!_G2I$hRr^q>RwU<~$vgRZi`c;gNb)`%@@a{77Myogxgi!_2JS4mFH=_NfHOo$t7 z?j;@~Fdn!DBrDdru7bL>+3#}r+kxdJE>2e5{{gPCvm_~Y)J1D7;lHM6MK}uH6#rHN zg3;XA!S{OxT#kl^_s6H}V^?TNE{uQd2(uH0VF!&}^A;snc*g64w%z<%Ix?e(zc;8S z_@2wFtv6fS4LY_T4Ugxqm^$wKDZashMxglbD+YUZ^!D_FZ6*w!*v><=cCD>-gJbZZ zbK2Ig4OHSxLD5ffwWh0K7_pmGtqV9_&0Lh^G`YAQR1!SxePI0=q&YC3?hP&t+Xm7k z7E+hjH=FSO0H%?fR=TCE#BNFO5_Cvk=lcLkn5%GU&vm6L?k~4FVjR>S+wt#aP!x-6;t! z;w2r3>t3;|VC(4R$cKll$*d^UZcK3J8B-$Mm~&|}Pyza^2v$kXLH2TQ;|Nlae`{N; zNk$;i%~oWa<59mRxi;H&^oqcoz);tTfRmNjkcOAkJix<*JojcuhE(%hB%OG*&5nH6 z9L$*U!rL$NsvMUT>*BKd+l?y^-OF^*z0z%_vCSfphR(~FOwpp$SIBM$A$4aEqZad( z5%U^NoQ1N%WE-_ks`X1xMmTw|Vav5*K*nwfPZUjmT)I%G)FPU?QnsO88Q&xePm9GRT>qy7^r>qS>mn%?&%nuI)z>_0W64fJ=;fOnu= z=zcm~#u^!-_=ycwzTflpwnh31O5*IAz8W-N4-|LO{taX@MgY`RS=Zd z$*~FMcFQr7g-;bk6&)oyqGi{E3?J{<29Jj7SPbcL20g|uQiAm(v0SW$>f39Tm4BLH zb`(opaac3;W;$QflyPW%xCG(Xq1_G^G#8)pF~#)YGkKk-BeSX-%Yd*SD&6?AmMC!{ zqGkO$XxOI7GF!|3Sg#{OuPB-=Iv$(K9$;1%0f*J6#Nx4JnU};bvkabI2*zOZHtxT` z%ogODCQWVS&uZidS*D78K^C)6u_dIc{w&cct7ML#?N?ij>sx(9P=^jDPb8r012p7=z7 z0z!EjqPQT3SfWoEZEApD4bHl9mbuiW$obzccze?SofQjTQJ$K3MP|y~|_h zh@O@I86W!7geF$20WTyNU!aRYT78sMP|P5uNnBYQR(9O`OR`O~#=lWBTc;-7TbH}= zBRtQpqekHGLr$LueM(Apw`SGJo9k$$g*KK%5N65so1gR+Av1@Nx9J;XA1^O@rR_AX zh{)w0rtPTZWa)7%!V6jL zl^BbQG||aZX7HY9@>$*kyqYh#H-Rmt{X@)oYdRRi0!&idY?$SVdi{vyxJ; zbIa6d6IraxPyETc%}AT&j8{A<$}@;MUAd}i!LPcsnDWN{Gm<8r)yS*Ocv6-neKpVW z@1AoCofl@GjTI5Ih-B5xsng^&d}iHHk`&+H%_S>UY=1IMDlO1d4d>SkGeDDzT1TEp z^^llOdCnbpt)`QCH1|E1u_(cK%bxXPF7#@9Ol2V`oppU573YH8tLr@Z$GtaR8G*&zAtIlvDveFwa(rcGh$7hUtG^I&9 z<**W);k4NB(>0;`lI#MBRBFcTv#6JOUgDbpPpL@T{s-w_8pg|Z9Aq0^x??wQp6qbT_=cBBuE-U~fBK7L{mv5rhA+tZ?bNw;7e5t6aXE zn8(`usxFn3^t=>tyZVab%q-|!eqb^eYKE|KJ|UWumh>zoPHK*oi7nZ!aWRJRR9NiJ zExpTnW6BGg+~=e}G~-hliZM7#apF9Kov(TlaBN@^X?zBo%;u7pFs>BJ=Jc10QmCTosna82PL1FeL$WkF$5{Wn<_wz!Hl`FHHGq={U2Z)h-$iH>E|IuC?fn zT`AB57Nk*}&eUt4_Xn*#-3*s?-B*9f_nCY~mevN5vJ;ugL6+9wu1}UFPiS;9UcT%+KAfDHd)@fSp2R;;|35pK**KX0 z|2df%|2HQy11IPIS1P_*M<vXzrAr6bvd~0F=*p%X@n?m!2{Oos${Bb2!{x2u1z8Rh&jzuX&BKS8|i4t>oOX|$B>30Oo~ zJ9Dx`Jd>WZ7BAw?;75uuQae@BZyJd>rVq&}q9@W)0!=JwSRbRHds7ujX*wpJkQ-WD zO{j}XOdsk?*DRHg8=Hl~ftP0*x8z?*b;ofU*3(H7%3nOTibAX4Js|zXHJ44IEQ?Bs z(P+ZjiA|Gq3JuYuZhvoSTg=%e41&C(d$v}5QdC2@FhEG zXgq;?p|N$d+`WQG zatHS&6R1~;izSm;uZ5B*tZ<$$druL=7`!kq56WyeFdPKx=*POl_o?!RLlniaKS7Ov zKM)R7MiK{m4BXZCm$?GV9Y`AG+JiO->VRY}s7JaQiJ$ra`<3(~4s}xvlV;PYmC`Qu z2Y_kwQi`RW-C`Sb*EBRNrzmlJXuMZ=kcoCAwTr5EVL*^8;%nnIX(>X|u6@cN${K!UkC;UN!PTosWfE-rrs!)p%UkWk-ff-Z)X zqffC4y!8kJ{e$DuUFX<(G?8Cx2P;%~k`fFUrODG_1~hlua|&Zb`!@TdtWZA%YORp- z5p$?z3}J~9y`eI7g0v_78fo&7MEc>b7|tS-z!V(Cto->=+R@rVVS&9-sZQ&G$^Y`8 z{gnX{CSr9KQJFPQ2N6jh1%Wq#7;W2ch((M5#BezVK;zC7P#zA4=Mrc4!#xaybLjMc z5`)~FIYiiJldGeYI(3~wP&8~cLJH4HK=ouNX4F^2I>)G|xTB6mk)-QeEkcEL0TDtn zJk#fb!*<3KC=SPSbr+G^-+^drf+hTn$iPSqX!(->=s!zh&^<%bm!yYkVqOh-4qHtq zr#{qNP)baJ6tTfXsn3+W0a4I~xF|jbs5_tGrH(=mh~e?TjFHnC02<0EjeKgAnxB@z zm{tvWTD_ly`wJyQTnOx-G2Egpg5STOQU71RTLy{hoyr;!0G+z3g9wN(q~x)|vQ~ir zkhC<4I7n6sNqT^ov{)EKYATR7WRhI@t)x(a0u>FRx@KQ3jZnsGii}=Jfp;W{Sxf;8 z1X37)Ib`TRjKe?1=n02xB$SfL{h%I^6R#+5iUIrxThT-K z+kr_+$&}!Xa!g?Xv2QIze}L%(YTX(B3gVcjkQwK%R73t1v=E}OU}lmuB^4thMDZ4o zG#9%?^__AIP_RsxIiW@QD|6a05F}yP4B3P9XV)EV*r}3Yg9j!x0vVYnJQAi{yABo^ ztAJ=LR80Yqp(smq3OSA{3(H$!2m$ji5xWF?qj_*|;;T*yG{R_bcA#hznYXk)1`<2@ zn`LKo7=MjbGBn4UR#5(o!$84izxF`6S#l^O5l}o9(gF!Oi}7#Ffj*4JW@cZrNo%m^ zxOcf=?cwU!62~NhwEOKol@nzB7Y$KexpWCNIzyUBuH&5DF%p_&G9e(G7Q6<%6C+(8 z7T_htmnkEv;>dHLp}kfR!~lUN>94;ozZL2n*Rx(sA8|!0P!6PTvWzT?N%)X$yhrS7@nIYntJxKXo6$QHf9iawy)SUDJNGiTZ3R&Iv zSS3pU_Pr^|)MQqIdZ2L?MH^&jcn&-|12TiBBcM2Yl)oz|P)57DcG@u1)nx}lr#3+M zSSc|NjcJi64OuuzYP~Eb;Z!=Js)D272oe>*QOa9@Xxr)pBR(`X2y7c0C}aebqG9M8 zAyfaDO;w40Ou%gb#eDx1DyOZzV8AvRI?Aby7V_LiaYVG0WG29v3AD7oV(~Pp*vhno zyDTh{i+T45i9x&JTdR%$0_pi~6RVkGs1>{T5QTki9JD_g`8(3|9Cg$b>VbNe2(xxw zpc}$9&4Mf7qZCW+ZVuVluX?+Ds_nk6?#w=rbFo432$9~fv<$_dnvF1$?+U}xU8y&H z?f4oJO1 zZ-LFm%?l=#G`K+6_CZi^1LGz(2Z;QJouF-ltX`<%`#?Qx5wYhqk*5&yyC*^OBe#no z6It6>0NIEj_W+a#0Kqn=DGT($c1ouu=AcCTVHC%mq8@<^M?e>-b7z7z+F5h04i~`` z+u*JcOfAsfs1tDm0ZIvIY|=yK?a|LcRS?XxhT9mD@`bZ28!2`eqAv1{MfngttHMfA;(XHHapI*FTpVVXbPlJssb7dxN+0KrS+OnI#QY?@5MYjZ32 z9;|j_3FT^=*d+J%-C8ii_BC}ys;GyC&6hx&Is-LzO^ zVoIT_?SA~!-mP(AXTAJL|KXKJwhf-wf4R9RUNcv1@U~vrXw#fY;8{Dhttv50^&zn$ zlT0%$)EJ*o)M9iPOLZbynaZVs&p6P36Yk-_R%!T+eH|L<$BMnoTx}HH`h(MuaX!6I zbG3HLXB;-I8PUr?|uP z#qfPRCEW0R*zcYn`=_vg;8dZs@hROYK!@qeVWJyOBs4pDVw;CGqdB-NR3BE3gfH*i z3X?;ZW{2ujFO>UTO%)VQex2HpYg*0{}j zb57@+sjF!mgq2NQ+V|W8{cDzKqUuZTjE{I+Hh!Nl6Fn6?iyKCI{3eG-DaH;D^5_+M zKFmQkx~DYmpVx~WpS#ProlkUMS5nK_mh>FjZ+oAguP5_A(or@xqqwuaah?zDzCWYW zS9+*+*Z?hnj*aQtF^3QE;-yWSGu7wk!)~256-UgrI{5U$DXB zEDK~2QrP{=Bat?jVl9fA<~}k$K9{-9R`+MyG~Y*CJN#d}OHXd__}*QcTfXo8aeF~0 z_%U!6jr~=^TSN!fIjO#GZcd$__D{E0lUW-#X&;@vy&658y&rq_Krw||usn`HHnn}# z7kk%r(!@ZmHR6ng)r?ymV5F+7iDpKeWyNtOa`-cAVNROEtzz5GmqR;OI4WJUJ1R$Z z_gp_l-?mt#tLxi)H)yj@S|@f`cXB~v)sZ>)<3YVyVv@EhV*9eh@RRUd zDkoOd>^xZrZ%Y&sdjpc3TuIw+FSroh)zbTFFAj5b#_2vgz{sD;P0@X52lvhq))C$j zqFKe^w{u>Q174i8@Av}PWBR;qJ%5aVZlKC#n!1H|yK^sErFMBQ0e6J)0_!86h&>S8 z)i}m$&lRI7tan#j5ZqUUR;tfYh>AjLcB=kL>Y&L887q^xsc9Ty*6Co>BZ*Z-yI9AG zEiV?jJtG43ws&{;@C)x2(aDkVHb5ovdOi>n-lD6)tL4$?Y4dB};k&ig`mU#E|GXYw z?;YJAzwb>$czIHZiyHOM6|%4J-obmkmR5cucZ+_FehOp#{D)Kgij(*12YDPh%&dM}S-jSRj~)GuMwSQY2^N8YHS zKJqWG$0YOszndNY%$>Yu-UR>nYS_7Wz@y`LyXTYT+s0C5>xvy88m98&IKy-KrIE@X z{3{2|tQH#@z;|_^Mg zHajrqiS-(@PN0Qqqzd>BTNPP(DqpXMuZ@pJttpG8@D&|L+}>sB%7*mIq)e+;%bG0} zrz_Rz=2S}Rsi-+f;h4pGFD>oI*P8Exs1AP_E z6I&IqEc7Vy3b&bUzsQup)DGnjaKNpK^QZGSL4j7Po-gIQrQIgSv}2uD*cPbuSlMR3 z^OdY4Wbx{iq5g_jE_;TPm<+9@1POH&J^7TDlR-iF6to2P+EP4bt2wrT^&yq&NK-pH zHJjARmNf*{NtMnOmADL63Jps;WWB4@TDG)W&A4t(|2prdvQ*0+F3YUWa;)5K))hOj zY|Z2KE4?b`i~ja)-d^>yQ4vk-W z$CA38%Y}v%I%tbOT%cE7Uf)f&qbCNi{qW5?1gcAB=+>l>Zbump5Nn1x0~W*p?SkPCTK4oRlAR} zW9L&Jf!xvU+WT71Xu#F7K~>`AYf7#*GgV`Q-pb%)W!TZt(oRiUW%a+8RV9;(2P^)o z1<{a`uMV+Ckr+zyAlZu^Nns=6uw|BD9uOm`{G}{bUMzwT?$3^%?;W>sVy-cxohQzX z8aG2v5j;i0xQGU~G2(^xnaV>!BdZuOo3N_U*H8OzVvi#7MDQ3Uni!_b*iqM>bX%+2 z)Y#d0@rAL$>c_(@Nn-d`I>I<~x5=xV2|~#O1UF)GI7zIqHkTLzDfy##g%OAqLKu1s zP%$WeFYb=Teg$HfgHR|0)%Lgoaxd}=3t?~sF)1-|MD6iyi)8LG&*$XCtXr_`2{vL} zc}z=tO*se8+^N7t&#`!=^u7$5eYcfqF>ecSQH6#{Ok#7?^ZO!V4SBA_3rX>YT<4-fsnULnlrsviO3a|@ra~NwjU|`{DeYw5 z-_^M`9L|)7(XpyWQQ}$wq+7j>wSfh=&|J~7m}+&5b~9UprA;z9d_|Iq74ZvjtP!*{ zcV7Tm9DAb5ZyfePjB!dk4o7q>*eaxKV2Grl_aTc5dzy=W zY0GV(JH}y4>OfQU@Nx|~MP5+DhR6VCDljU0O&0;n?c(`8)%C!9S=C>pQJGdI#;9HY zrNgbV9(aK!*IsQamQqyx5>TJ3th}8a%m>&4D$);E;fab|$-qB8K5uz{^kzF;v`@>bFeYLx}JzXAFE{@`U>(JKX**|Ovpbxf})liZGQ zq8UmS=05ylB+DMZk2qD|fj44BeeQ6@DATyn*@^qbxf|YvJv!2V4>?PiIk-yxJ3AcQ ztTuO}uADm>`>h6t7hs=WqkilwaL&?bsgdv{libxXhC!58i8U)SUKw>s$OMOx!cMHi z)o3ZHllxksXp`W^=<)mfbb$*V*Rk2JwBSfp^h#0_d%MLMwm2fe^EgO`Ed5U_WzJoe zT+75471`Z?X$JRPH284DJu`Us#F=EA-M^$u2aEu~@6y2QKme`cS$~F1BycCHI z{>;!Pwnk^zW686PdyVthXT}=b^uqwBbXK~FvaD8ET=;WoAYg{Tc${PcgBP|P_4Suo zac){-mk%me<8SL^CDc)pON?h7S#!|lmDRYk+$kq@rP%_NrucWlzhFaVIliZKvCa@{-#uKdvZa(}>y;Ds#0GSWP{R=x$$D{z?AIFxo;h-%gSkX;#xGn0Mbp|R;NIY;U=fe^6 zrVM)<-%zE+3Dm&ggi=L=`|roZeOS5H6?_8hA3re4XW^s-ACQ-*lh~lCM90v5nGFYUJpA)B?cV0mutU&pApKSRg@m++Z+-$!xMe9bd5JEG zrSh7JY;X5~8izX7b1A-kH5aM`Dl>H5z-5jwDb5ZBU1BR{A-qAg(7K!R;Q9azF;LRW z(w}jw>AzPo}Lc=*eUEBrPJ-5fpO4eVHEl~7ncu1jFRj^_ke~Z zWD0T&-TaHUupi%#-^u*(yOwfro*zq@1gEwAHL^jQ5QZS0Sp3bb{UvDa1_6YcQ~4k|V*M%m#PY+o`~S_}8+BlVAR?T2l|Ss^zZO{jThhzyP!|FBDdc z%I_?0TN#d|OJE6+NrN-~mZ%Xg=bT-Fg?bcu2AB*kFgl3fAUvCch&*--6E#;Kbt&RiAFF3ZAes?ej@ z=@&Ok+#yNaYGXyDF9~>@>CtH&v~o^?D5viQkWP?zqod3}&8B6s}{PvHV)|D6j7`A4af#qj8^)TO>6;Jgv#%+`6R+Xv8gV)bN z_-~@*X7Bg2`>1(AT5^ZVVXuJiK;mAvmPMmU}e`#+Ur=V5nm zVLRL1o*#o(S@ONxTaQ8fd7HcCFv_DS8e#kx@=bSX+gqznm?w2A(kYu0@=3>QPVRII zPtzOVoSY^U{}xbM2NwTfvYy*ta3R4Sxo1Nw`*?ia?jLs=WI6^;la}C?+bOxBnf0){ zm&8(b{~Bzo^&ssA1ocG*+m8I~OLe2s$Duk39<@JlH)mo{P;Oj2Z=aIqjh~T$oTP#9 z)s=SDR#wC3#@E)y%H+xK$A(c`)6k^qTTTDbozH_v7wph`-yBs|sN@@$`q9NM=d?4% zS5M>*uu;vH@c-x~WcuGyQJI+8eqDqADJadv!ovK&c?ntB82(Q$p;prW#n)Fr#nCL` z;w4#6R~yW8SUa19pRA-FyMd+$5%J~{79b#+ZoRdsJq z&(7KH?)vnb<9(GiUtvdC-O4)1|Mm*u6_2*s)ya{j{wWibeCkQhlToC1j`*W9^ZIx? zc!)tG)4vtJi)iI3n+yFH?*PFQLH@T6hl(2={KFv{A2yJXAqZ+2HTzAqb`Z@7ik)Rt z`?>S3QHiZ3m<`i~AlRU((!<2(i2)hn zi~l5e@4bJi!*5$5=&xQj2Ut$2u;U_P)04ZH_+<%W6TIO6cO2EW6eWrr+S- zv5mW77a~JrAH+;+6aQ@*HpnL?f;z&h+-PV~GUpKKM5+1JgnN$7osET-Jjl{&twlcj z{cp#vFGuazpNj<192n;fK9>wDrY~nR2&YC*nqas-%TKL~VR(b_?BnnY@ zZu4fQ5O>T6AXl4-<)`jZOk)&+cN0<;J%N7|DujmN^eqR#fInYYy3&9p?wqgVx_O!x zbQeG2kx}{5QE=G6@j^@vdGhEuRQlZbMiuTS$o~g zX}rstJR^)Wf875=wt2D$ex%?ijlO?oLDZP%q*+g#Iov>mE$>2ynrknc6LKrEZiutp zz7i1w?SKxiu}EZ*dwR|eZ4xE}u3~s{xvhmfcC>TFI&FO!Tfg~d3qc7%X)Ge=3a!F0 zX+C@!Gi!}GvoQKOVO#AJkeem7nIRX9nQle7ovzWaRu%7p2tqJ?1q2mAw~|^6KXj!> z6ZS&^8oM4g><&V2MzDPhoC8*#YsKGw>aiYMq_8j%!hUiPebdtA_PvB@sk!TigmjJT z{@k!Cb56;fah;vz?3KrlnzMSV5y@Xl@jeTJXv@$T=&g@BIEPi~4h!|MOr4D4l1B+F z*chXhe24b@mqzCgFKEhl$tAT)kUs_qa&UjdOXs|B9WVlc8BoW45TuX|G0Q0!VV5*6 zW(M^5jF5v0x4$I3-*hp|jzX41fto)W=mMAc8l{tT0pSv^VN}ABw(UE=h$YMlwFgg>s)p-%ns zUTqdxAn#b)z2Kvg*r2pOjOK(nw<_j&4`GDAE1qq*4&M8(QU9(8%M=7L(pJV9=D9EU zV)%&ZtYh>+x^b5jlJUoP$SX1mz!VI5rxh3cr0`E3!*cvhGa6bzeiSC zNfA+^@f{j@rBV3jqraxcYSoZZ)X5xKKu4WN3%TI;iuX2km80r~%qB=a zmk<~9k3UQ}*mse{>t1i~?M~!qVJNrnPTZYSr1G$0v=_JcQ7n$}@c6x?o+t-$V&^u! z8oOMt!)EXX_&u!}zi#uDof9-4E2LH394q&>-`Bsx1B9A)r^ZH3Ag*_{3J5YUY_M*S zb+x>0l^dr!p&!Cjtz1)dKjjecytTYEAIM61IrX!<^oOqB9$wxGNl$d*DQ?>c*}pRA zmnks5ty*8_j6GW@kvvRz7WGVjd6t>JnkQ(ZcM!pwKYgkeXF+^oe1OY_(hJtptJA46mPHoe)aPB zem|q3YxKK0JVaT_`|0;{i;f8Mokb%X6O^(8w?(~spM-Y^8%st+EMd?uUxdQD)Fj^+ z&Lp@L38sjM;7oqJ+%C>xBB!cgh=D+nnQ&Rt5qsa^oByHo0R1ua%V+jU{p|bIzmX$SFZJG7`9e%y`Y4sb%E9PFT!c;^| zDA8%8z)$LdpIj^}EigFFml7WCf4rcQC9+Y>x4!2EY!t>rz9HEmQ_|iRBDz0Q&K2=A zxbg}26JGu1c;lXyW)GLsVw@=ij9#fN;9&{;@)J|tvm!YAvw^p8>f#{UBxq4;1OXe^ z375|Gu47w{ALdp?Ha#GmcFypP3F;Z1oM!&S;;9D83%ort}M!rwzbhbrYy#b zuU}r|!)kSSGKJdNNcLOFB7|JnNQ}*wssrjQi4cBhWkN@Ah&mzAPeO^2=kmhhTGHo~ zv9>J`J*CgAPqnoE@vL_*{w6Qf&J%rX%bNJG>B@*%MJS9;Y>4vyQG*-46ZJ z?xH3u+0Kcm`dRT!lTrhbI2H4^YdX4aRvyIhGRrcHohMe4Bi+K~wBJ>?3VkzBeu4!*6Nw~2ZI8bU>%J_g)g!+Uo6 zw6FO@7v7Bju>~tRdM-F|)72RZ3khGX4RgDW8R|PrxeLxs^t~hI>WH|GUb;wVZk%g5 z)Z`eDx{EyAe*=Xsw(q{)xcPJsJ$pSh*<54fbm*&5FKRT`=2tq0P^oIi2WA`-YLQ?T zSPB$7WgV)z6~9zbIK$3;=-CbV_(ZZt;he)4vc{3QHF^dVot60UL+4d4hVm}z+}3;g ze2pWpo-I1Yv@yVZxRZH9GoqJ5lC;kJJnqFHJG4r0sP(F&#cp(+BYU=Gaxz_e%1J(Z$J@g`!N8OIj@X!rhVPS{b0#C}#@bo)adDq>LtWgDnfelK%e%v;L#Sfx z8m{nrnooSmoQv?jQ;37R?E@J@TwckmwCzWpRr(co`h06ynIlKxwr|M~V9L= zW317nykf=KwOK+$r2eP{L&N$=o%OMUz)qk26EPDb`6=(oL5+`Q_5@G!uL|``YuaWf z)@xB8UU#m}YStV3z8@|REcY?HDb&B@q1PGNlcQH(uDUgQ>SSfwDoi;}k{@A2_H8S_ zH-#2-i@I+crIhZ8&t$lqO*8BhIQfB)?z3VjI7=JyN_~_34pCNA>S+BVhk&VYhtgnC z;!i92c24=k%Wb7+xQW-^H{y*$+aMt!^!;Nasvshw@nLl%Dr2^+9mQ%ap9F_iikytp z@Kh3(wxo|(!!Df)3igOzBWl6~C>wZ@W|lE6-a}|-+8KyZtM466I0fHltmkD6ikl}# z#%)o&bmQiVT@5wyDr%(t>7^E8N9KIdmnLwgYGq%9_HifrftuH3kOo#j%dPh6DO7c( z@0RcrT9aj;r7{fq-?R74fSyh|FD(Y$4lqmg`-}7okj*dFeVV?3@&;EZQ<_wNHN@@l zCP|!{4Xc%L^Krym0?!pGhmG_+v?((+gB^!ycm)bE>#ZJ+B9&$VM|Qf^SIs7$@O!}0 zEz7O@ZsFSlvTw1Ph({;P`%{~rKRmhpO{w#liN^gD7T}40g}gXMPp~`(0mS zF$1$;zaU9s-ji=;O_QWqeT~}FD9=`xX&XM@k9UgmPJ67dC0Wu`3~QYv8(}D zTMpMx$l8usV+MSu-`BY@HjkmUj_DdEL6e=41R;|P-ogxovcMCa$JjKI2GfIOoi?+l zILGg@0{lP8)|7|N#si|)l)==G{ zH&Ku;ajp0JH>U5dwr<;hr#fgWKi!{~`xO2py%X!7G1bG_FCO5k*$FL!`YxDkHB(9 zm*Hj8w_1-_o91c)k%%JSr-_L@t4{8_irP59q6_q*7F+U$t8bN)a70lC)L1M>T8!41cbDAgA_`ivEtJIfu|{rMLtj%ot9ab(9vtb1M~>CJzNL+oY;+s3pg%2+ z?e;#`tP_P_GTj!PUhN4Qg|ACCXJl=7`|t{y4QB^uOS@Ge>=r9-FY@s0Ja&6}`&p-W zPJP-l|9wVo?_XiL197#Jx>}(0IHnRrPOxHmWiq=w(q<;8_jUMvrWM-fY;M1i(S72} z>F2dVp@m^gmG5-9JLW=kNEd6E>DBaX-@LNMjIkR1@ZwhQr$>RG0gDaaSNESDV5ywUh%UUx32dyI9(Yyn)ZU?p!isi-olbzzyH3!)Oqbha5?g2>(IAx zprmu>jkC)IVK;{zgAco*@h<;oeeKKpTTXC}dgzxk!_{wrB)&RuX0F7iY03`=aX8;_ zs_BofSFDWTh@>9Zx46N5b^roXBkb`p$w9{|#EdrfKQn){?xG$}gC|{7Pc2dnvd8BB z**LsKsBkXS)*5)>-FDWH)5!ou1QSOI<-q`4#118MQ{H`u(a8C1knI|2fAl8}XkF^( z^HaFtb?NkBU{PjjH$TTLJuBVob8AnW5Q<>*i2>bq;TPq1+!`!7A?>0i{){-|PNUP+ zHcvMX?xsZbujg6uW7-us$?jFf89w3#=T;; zR>nUV?M*hu5j};KCRH+0P?)sdhaI*4_&dw^#om8cQpwYyvI;1mp2%yU=ccXn^N)C8 zj*RfkYAN-$t+;PvxBf-b_t3(drnGvjuE*{5#T;0DRtzqZ)zztPZ;a1EJDeoTi#gq| z9i>j^(Fu&&=pD?hp?}JUT$E$?v&pA~I|rDG+*^C`1%W5USzV3)igZs-z-y>4CcZqb34cy|8XP zHWRXmux>S(Dzq;0uo_6~*I#~EEPwgTVAlo}DH{%`d9_IT#DTp;$ATb7)Vy0w`<&bk z6w)PsgMzNnKfI4P-Rb6-n_MWv%=SY%N1eU zo4Bp%c+h%%+Sz-(M>&zyY!qQ7xEossC{XnHEni@{JY3y~?=ez%*PEiEj9?v|xAz@| z!o%J)``LegYhAa0bGZ&ZT~F*VDao0uFIBvDyuKS)xA(ewIv>!>etr#|H|zA_ec0Uf zbnO56VC%yQ1I-Bv*9mNH=n~)$*{UHk9pG{5e>z`5{dT8K0zD)i?z71YN zf)q7=sTS`oGeW9K;T|q-YLwBMS)Xa#re#JTE=$3}8o}|;OC_Sh&$-rll4F`finEt` zk;;t$VuC-oYdcf8OS9$)!KOS!AHJ7T7o+6|+@g`Whe?$JZ&#OU0LU;l@#E8s`i_RPj=*9j#ZzE3^5wQ{}BaH8|8n z(4?j03-g|pDsvI?;sfpE**~&`@RaAJ*piG;?@nK^%Z?fzq7YEi&i|ziiqF?-!DEz~ zcT|@vE8DU*sVpoyPS&$(=~722Kit{T(^ZN#V6H?ER2xycoQtrIC|;0O7c}?a3byfS z#EQk&A=%P1RZ1>M(IGjc)12ydQkt1*9t z`pd5D7r6+Pgh{1{nBLQR9n?qcVcB$%7SB>k>7yQ-S`DFgd}ar|t*!f{?@aZjH6mSq z7`E>5i&9cGn$^d?AF;K_GP9REGUt`V#EaNy^Oh{!1yw0*X?*4#X*1^!uMipRA8fOi z*DQ?ZHjPIW$y`$+ zvemv-(wH+8FM3J!F+7td8-1(MSjAh`_dNiWC>_kT>Lq1WxR7k1l2%8}n)|l^W@OTU zE3&#Jas;4G052MZv*sbpG&hRCjo}CY>}!#Uv}y2+T;N@DMIm5|34H?B$arQRaL@PA}`Kjm^HHV@$8GCN^ZEB&gH1ghK?=mJNwJ7*Mp|_m*ld_!9aJww^ zndej;;slxS-g-_8*gjwq%({p7)~d(8NGqoxkch8iTq)m}U+BYa6nA|(+|_)2f$}SH zzhOz!_Vl>tE5tcz*5>ypOnhhe;)Su=4|hido%TK*M*$cYjgRg2VI6xJ-hor|z84$T z#@-?4^S;(Rfus5=50LN=^oD^mlm~4=RXhj$L8B6m_Xd|okAYRJJrsTCbg9yBzPj))?Zw*^;Z}h8UZXqfRPMn9R4#ZHTJAQpx8fZbD^-VGR4K zHzH-fW#+F-GEPdKBS))Cyd%h;$On!uJ8#vyBn$nbmN2YD3SFY^jh;UG6wL52kQK}@ zn@#=v7AfCc^3IBvZq?g()I4dDH+~)~TT|fN>XzNf-3XR?f0)1i>6xuDiF0d;?!|Fh z?VO!tFNbj|cbGh&c))s=Uw>oWBJcg_Z1~?^rPp2JrV+;^yIsK;RnS# zAG;MGx>Bo*Z;@A>o8CX4Sx4XU|HC}V=@}5=Eg2dkrCvN$Jw(T=MbVMb1)Ip?9Ld#G z<07u09J9rk2Aa(A0zcrh=_?#ml}$l;Qp#Il6*d5VuRwPqpz@ojfK4kUzK0rx3F?Oq zo&rNJN^ibKfntQR&!>5FokY$1ZBH+sLk9t@`f!QN`7O8Z!u>1M7m}OdJwe0uLgX}T zo-h_*d}f%h_=r*&zjzDSCS&kJWg=$_25Nudqee{EGWP@({2b7op^C6Tf!xR8*M;rt z!>t3E;g*5J2x8m-DzQ0YSdPy@n1dg};7T1z{A5hvfJk5vxfsoGo&bzH*yi@8!{?(e zG%4T+hb9;Z1lV9wmMCK)2J&L|2LRFG_HfdaO@ssX;j$?Th5<+oxgRM9m-FG0*yy*> znH{I8;OWVgN;;LoTCmGx)lo=*_M%|}*i-=UG&H4~D1aO@9})amQOrlmi4cm#59~(= z@Ii~?2B^%$z7wVnFzGM#JM|L-u*}5m12BDo{R9AL1n~zBViQUgY8C|`08#u;0H!ApacVQpwC^2mA`Z@=v zc4>;EX{i&$0eP5aG~fy%N@8(95au>HSQL>mT^jHO(~JPT2`?q90B!*KiUHmSJQNTt z7G|o_-$vz5+&de#3ltR#QvpGNRT===kze5`QN@rL5_?!BfrMXk=)1A+lY3afM1)Y{ zY1MFtN{(_!YjP&Y`N*ib0^At@deC&2I4fZ~9Ccz3FPH@Ipd`K-fawJMBR~X2?ntBx zK$s>A%px8DLII>e{+WQVbY>PAlc(?&I9S=F9x6%=u`o@Q!4O%wixM1EJ`5 zcgB7efEWsbKO#4>6Y{8|K2e3_K_F&(?wa#B$QYoHxs8`Z+4*5n^aAfb+evz4QviaI zwCalONE8o0%G?Nd1`HDoqXJFOd?JKV6r%~iln3@>1A5^Q;<8}Wk<$YGVPFqp)M3m} zz|TCe2cIehikvzg0ta&i&5%$VeiG3kI2egakUk@Q@M6fNoV>WYi@}9Lb7CrnAx=q2 zYj-?H_-?JoUiD>1k01S^Wk-orBe+?%S65JUNY|q$APss`>~(xwDQ&=&1V`D zW5i4)L{d@2NUs%Mdv<^2I7r>;c>-`bECULbY&LS3Vw(=VF^XE=1XJEbML|3tQ@HWl z=P{WkFb#V`iv?(NlXr7-7^*GEtR)nLnGM=e(_xY>SyniOOzLwRV zcg2sqR_N~p$G!EF5A$AX*ls(pU(~%OK;ppv-$a57`>EhRMEMkMe&}z0C>jUeF+LGL z!m*s1wJ~N^h^hwCtcw`@RrOmOGp-Bp<0^s{hDhq=o6U~X5`L#|rHp*A#INrUO9G3s2U_S@;-3nE^3 z4j03}H+S3kJPw{uXq$Q?ErSpTimO|zgyDp|4(2^g%?OOV4ZZx_9;^13^$TZ1NvEpxyN_;?I~|LR2tGix+tcFuqG%fDnn!T-;MI42Ju2*j%R#oqGUS2iFU zJ0Az=|Ck^@$wD$nnt!~{x$Zhg1)-omv4c=iQGuX7x5faRccfts=+qk9B?B^^9hp+NsP0vVxevtLqz+4< zL7q{qX}ObtBp)oqK2=q)5%&Td)$)b!ZE?q?P3(>6&b05Z$YUYxTHenZ=saCN=6Je^jM{fNj4Y4ga^xLt zw1#T9kH&tRuyVS`!Od(m7V$6I5V|(qh2L<%06#1NZ#E9iyKkw*ZXB9Nf{q7LO`dhf z><28%V#CUx_NQp`VXNLzqKAlH&xGfVreAD~b;zBhz2FGSf>ArrsSeXSrs>g?gU) zhAwS%(CO);-7{}SmXuXFZi$ria4()dlQUXtFV)>7?D#8qO+p7cesmu=GlfA_sQvge zgYrszh4H(gPN%OOgHvet&bXJOaYT(ecg_fAxQ(%&=hN)aQLW*6qJ;?L%l@1&lL2eZNYov-EJ!mAS(x5-CxV{Ze9{>w{5y5-p~1KiN3)!?8P5p7(4Kx$Lg zTYbo)>Jo6R7>(=U`No6nWIz9U{ut|W3bW`QTG4We0*h0!Mut>ROd(*E30=QyX<5{CwKf>Qs-v>T@zkSj( zP0RHsG(|)u$aE(^6ugddX{#Z~#diJUH>0(Lq-C2y-yj+S!cY`vJebsa6F>GCU^II0 zk@Cd1crF(1HwnYktWiM?(z|5Wp%)DIB-yKmX?I6rI?>ne>1ZGybARYGA?PZ0R*kgN z+7w8@LERQ_$$UyW3;y%g@V;LlLc3!duiE|H<-f$_H+{$k;+HbP^T_NwMLwT*);ihw zSHSnR>tnZ#=WmYA^R)lV=wJ5TIW?}`-!c9-Z~yK1m8wCM&YHOlt&x!Hkf(bHby%WO zf-Kwi=I6IsB<~qaD%ca`c=jU#8$tM6Ol_6#CJLS~lt}2T0=k z@y`#vuu$9@QS>A+JOfk?Q#RX>Aya`9i*zx%hN_yZg~oOb`4y zGIAZV28@$fWI!<>oEY5TII3J+$nS_eI0g{i&aZvWuYs+gSR^L@Lmga>n$KDTgY|N@ z_%;@mw!-=l`_DYJ$tUZ>uRs#G>|B_kX&~*SaYY|W9!p0>NyVm*c*v5a7n&cF`Lr~a?c;y*<@ksI-m9{ zxqyL%ft5s8Pt8Ds7or97UYe#1y1qu(MQ_lXnN64oM=H8s3X2mp^w+Aqy1r3mpiwo6 z(=ubI?pAX-kFHqFz&%ftmF3)~_DR8luV6sYfV2seBFTwIl2!3rXPYT& zV@^xWIO=s}zKlx!*Eda1;)e3MG!667bX#jH%aOkHOw0rYgl3e@^4(_Fh;Ul8*-Xv( zm;^H)1A{EvAgbk5b=^OI<9&!MuY~)Q25%c)>=*^Uff|K1IVLzOP4oLca+?%z*Z!x$rE1CEM22|MQF>kent~83m0lu% zE;tgpL_)`OE(d#y<`uRR8M))rTH1%8&bDKqb(-6ORdi%%QeU*d7wlr?v zlk}InnrGm$l%e#_d&(igktM@gQMvD6Y$N_ANLZthZ=|`mporM4RSc5(0wvz zwL-VbIrxeeFLl`uGe62)xH(;s?P~Eh&061S>`~vz{pI3^>(m*tL_r%=>qtWmH48v5 zR+zjZVkWiz1`_(K(bw*7eW4*Dl7@v7IqDuioPnQwxO22E`zn?CH(U54c2<{<_u=&P z>W8)|zIN$Gk5IKJ+S$mbZ#6mU!No!34wmP|J?2}jY)u!tv$_lFG<4gEk=)Sh!*s%t zSkfOfh{%c#i}-P>DD{(rVxx!jvNs}#)unFSLj_l}HPu#GWp+K1T*OK7?>>X641x+v zAG58Juc>#v9hy!Wwy(0y%rmM>^NXSdcsxi0v2uW<4KoJ69o3Cg(1m{#wzQ|qlPKQv zX{;}PdOmC)`u5^FE3$!ziXZEK}hPP)r0}_sI60(N!ZbTmPZ4^Aco9bu8Mk)%HlvxO z8!(I@@NF#hiZ(smE)jCN0Y1;(EWtI%eXbwjX5p2?QX({4vsZ|kz5i>Vxj3g~JVe0) z=qv4a*!x_xwTY|Px0;?i$E#U%82qWRG;S58rPKdJ-u|vPxEDf*5PB|WcSE;U>?p=3 zI`#Eq#9Co(-)WU=KiSOpg@G-#j4=M+TYnV>I*sGqu@*>18p$W9v&M)?+(mlMHtl?{ z$4HJHmv4Fd8FoL3{N3EY^yiY_<@V+?E0E44$rNpUVISvBo##7Ndr%_dIR3L+;Ub=H z1^OiLppo_vUEyjcR>btM+%F@R><$}Baf@b6`uUeRzJr#G*wrtSDH0`}IUR@ouL{#T zK?+&7@6pJaW}Ot0#%I519ts8bgwoQLH8K?Ez>{tBD3>!o$7|3$&dsat@2;U`BfC3f zsYeU3-ysziLrWt@j(QkhJRzJgR6jC(S6_!VS3+^J+{#K3dN8R~&(QhppCQtyT`-We zIJ=a3j%6Ji3;X$Ii+a&yZJ>Vz{jRS_%E^aDXrf<)^d18DV(;r#VI#1vD}jP4rsr7648c|uPZYZngyu?qWU3*jNPs9x z-XCE_UK9aq;8BY-WpWEWM^=*WB|@wuSJ=KQ*--mUvVa%&s-Mz?0FnAS-eXv7*x--o zXLNQFEAc_bCG@Qisg!t^ z1!Hj`_W_QX7edKCGOZ3s8<;UzU3AZh)^6BcR6D6)RzMDpFtxY*J>u3mxPT>yVjE}F zNAZ3b4+qS<9T6_+eGpMgB@FCPc8|Sa(ewy)cEI^f)gk8|bio5Lg&c#sv0ugAqc0e? z@Q(T^A!FcF0PBq1n6^8`>%bYeBd*!l>u7f0NZ zgRnq8LC_&?fsxZ^5tHJ)NUH|Fmcu>(VsV{_o-QzBxwtOKz zbI(Q)3kV%V8X^R4AGtw*cv^SGI1-pLZ+(K0&7HYHh#_`heWYegEgXBg#qyE|d5992 z8xjr1o7+Ty+<C#3rYor zI>P4#PoG2(hqYvaGC+}zL|bYQMRA-hYzQjk8-!Dw+LR$LKt(xCJWVl8GEFv3lu)_E zMG9S<7eIhBh;;(i6Gt;;%KM=rMJPe2R6-N=9ygDLDhX#4K#f!g#ujJG3saFL6sy5P z2J|8I0VI$l06|C(Rbk$eyCH71kt(kPT!aniE1sKB3? z2}p8q9F)KCgyG^vL9LL7z?kpB7}EXmzhuK`2?1>2uYuW^QeiZbm=GjWuu7W*V4X58 zN(?L-7=tN#@!=~Hao~?!s{43l<3J3eWH@*+sxY{K9sn??6=@yJ0$~Lwf^on(NFrFi zNe*MaaWefeiI-lG@8JJ6gr?>0BQ9vSP$2u@DL~H5%j*_02j%N0-Xr8b_(5Sx?-S%P z32#Z>ZDdAq?*n8;X>U>YoQrJ63b@NE#tPg^^esDx{+~xB2+5xlhd(EvEmw%~$g*+C zx(!$xsTva<%ZRQ+^3;fC(U0yqoc|4G3%Gvnp{+x?4aId&y?(%VbW{JwR%D9^A`F>* zpVJWn7Z2aqJFkmX<FiMvN#lrZAn8Qz95 zeuLXKg-BFQ%l8LWP5%l*G{`f@ylWR9qB?U4UctOmOPh z9D@}l*EW&U!7ab>oUzPW89&2p^D{ib&Ud7g8JMqz2;!??KQ;R4o?uNJEbXi0NUg^{ zMtK@kpfr-rhqs~KAk7V_Hs{-m`JmpY&dI1yNc#Y8#O4rH&Sg3=Zj9z8N-|V<%6^zF z7F0=eqPQ|2WtDu-{Tq3^oSifY8l?6Nl--z9<;> zK09@+;Pt022to|BfyR0+0BuY?zyx>EN>)X7Umr5LOYnNoRX{(<2>K+NBEH=+pqy!WdMaEsQfW+EFbu>I0wQ*%rvzh;*XrX+^?r%;G0CCd9Ls`2RFT3eVb#XA z{<<)vHdThC31$QT0?R|_#o@Q8#QE}~KnW`H^pvLR)ufmBid1M$(!hlz>(^lvWkfG z3&8RY^JuJuHLok|(Q-~Cugl*P^TuNiw!}~JRMZFC(wpGu)BDR{Rh8#r`F*|bpaMV} z%c2eDlD_{3AXP=j0<#FG2#Xj=9SIHW3BiSYhR{GHA#f0Pak5^qmNZa|$%i@Y3OJNt z{{Q2lkKdyUFrlIcV2jZJ;G~slmd+&FRNGZW`g*zo9(m^aN~#N9@ord4s`I)c9+43| zd5)af*lziMVLu^Sgm*UCroa4P9y?JOOIfP^UDsb_#Qs@s09&TY(WjhnMsJnB($(nrv@-P9=C$Z? z%mQqsS*TYxa|xG&>|x!4zFsgU6G3h=yrYXj`AYXUqh->kHD;HSnaoT+NZk2;4Yf;J z99yX7L}?DSN#&^vc7lS(X@CnPU*6S)eQ#+mOgo8dRlkXIb%>LPht=-)+_Pf6nbq}? zg(*o9oPK>y)8DJQ_m#dJHE8O&UwAbxF%(%}LAUqf4?1rJa)F(F4kupv~`*rs-R0Vrq*+ z4gCOSHW{;+fiUJ@L;1mlp7@xJoYt;Mg#H8pEb+DR<-6bbz* z$a-Y^nle5EWmmDlJyV7ks@Dj=!lvicn6liEQlXZ?O$T6VqpjZ0)uVheSKX8+cO4IJ8!#<#4`tb_-zl#K{Exrc_a}{9 z<&fXo!W|gR?z3W7^;Wl=qMU$>H+9{yj6%=TkdI9H1YvV}2rig2`}Y?@gX_MgIOqAD zJNX{I2AOex9eBwFJ3`$d-R-Z(wuua#_IAg9LED4SUVn@f7+&{itMpR(E_25y&vo#p ziGPc>f!sXn3`ZP#s@=uIs3o9y7k|i`+s)zl_gu9AGYC=7l#(fMz>$_IpxpH1Gjvrb zi3$AL`vUQZ!weJJiogt0t~Houh+PeWeW<|ZCmodUz5F^DB%qkP9_gh&A9o(2?_G-x zMitJ2LN`^HB04C@{SmiJ-QlidP5(ASaK2Kb>{7^#S+o#TV&o++Za3N8?}FVNd)<}- zT%;ee4G>R44KHY%c_#PBmr#`ZgfoX;conCjQEgnch&!8_t-_}2)VQ_wToZ-1UGJ9h$Me~c}FcU1XVImhS*g!tSf>rSXq z7^3GFI#p}&Vd1Bz#=cPNUykKIQ(G^%7L#e^N_f zidE2*<-ykPRu5A1XWq#VQQ7N|L^B7XIZ?t&IAH{uW0q@vC4){nrjxj2RjEm$L#tTam#g zT%X@5t4~c^U(QWlUDn9DAhIsLAE)|LFRUCM)s;OyJmYkQ>|EUtdIvp?e7_Ld^AjCo z8_%*5TEC+9eSs6{L(1$m&MjB2I&Mf{IT+*{ghjwf1FRRT6<5^BEA}R}6hE+w6qykh zeR;hy)_%y;%0FX95-gB(NnsaR#_kW)1)QaBF$GGo37uuIjrW-8l184Z<1g!*^<(2G zn1pB7fm*+Oc39P^!1`Tmk343AaxwarJr;4On9_$(C&om3oCwOlu$oIg_eOKL^40fR418`;{I&fW zFwowu8uiVOHp|g(fPH-Y1tLmQRY>Db#R9FM@wPYPd&ke)= zIA~cxAR^Ld*5}q#3yRtc$=m7itQKA&P^+31gr-r2TMj#h;;n_;s4{)i`-FJuOi#bz z8K%2&?p>h1=n4XDta!9=P$$9JN64$&J12=> zX+|I$=lom~PxR@Px~wjtO2{`)pwbvl`}%p&1^EqUgW*}(hzT!ejnO;hoG?4r;HM@E zvdD&40$LtY`ma8Q257zDZec-&l_8#KW3JqaJR3s|vle;0Y(l)OVINvIwB9gqyRxjG z23S-uZOg}3x=FxIBJio6>Wxy4OZP7tgJG=1Z;<(W5L72i)}r41S|&-E`3b=@I}VeqE`0! zj~rHLU0v#$<0e!jFUSL%mV^BguJU{%G)wT3VvUiSgR7B+jVWr6NiOR8l0fOw!sQGo zje~b)j=l5K?L9;XD~G^FZam9b3^AjXIYN=1vv?A5lU^wY+7!9k58M*(7@?6L#dN3u z%`jhn=ESz=p!o!LZ-2Qq29O4G#8J&=km32Jo&h{?ce{*Jl;KaWkWg(j-oiBq0GjK#+l?=p9uxKPR@LS7!U^P zj^EJ{(Uj3Y$tV7pNk}NKE^A!C-9OOKHD~!kL5HS{bnJFi32!J;96KI%)I}@U8TMv) zLU@;&AZ-ebG*=m=VfwoA;)N?Uh<$eKc-a#Dzb7I z3W?X--JQ_zX85HTn}v@`1K$b=C~q#$h1VcF$GN|y3%ipL-Gx>9?6CU4uo_#=1Tf1> z&;N@GheF~Q8jWoaD-w1Q#g-qWYwPpb8^D>{FQ zcv<8yJg3>Mo2EKWHP-KXv*paZRQ*h+;dxNEna2Y8*w8S8@hh8#!{hdAILUtW{ccJJ zBYCgx5LP&&lcd!JTy!X_%@()5zi|!QQW$ka#I#vxem3MgAhX9%Ps@s%k=jek`{$aS zoom9{Da|VmM@HR7gz|2|fDGM+_55Y0Sobl{g{*=E?+zvV0DN{ze{U(wbKBnaFktcN zp&S?40CQdU1fd%$8~x(gsQ-0zHg&V8x(HZG^$rYoQ{IkI(s$X{Lt1rYR1hP(tvteT zhg6HTlMJWqxxI_CM!9B>GCSx`IrxFkU-MD&YlCf4?5F6F;-vx16X7P5+1}pxQS`g` zQyqzq7z;VkYHl@10=Ad)@mE?A)PJK`lZQ@3O}a=y`kM~>P2b6GqSdGNEtF}T{H>!@ z$@F*ktF?v?TYoH7(6}^-MXK6vY8?to$b*ZXpfx4ydgDk7&$vrmtqZYPXUMGNj)tKL z%bo8CW)P%caC=s9%TIs(g2sBD;s$;+iLF6*zQ5WRitWjESR55>GVB{kaO)r1*AG26 z)e>|m5sZ`whOK|tT?7wDQiI=47A?s3bxFOaHYp@apBsEvdT++a63>5dxeph9k5VQ{br+-uvnn>J6A)D(ld1ZR zj0cd!h;VoPezD$JsrTk3VCfya894REu}+I{jx0bJAxVk+fl(kUT_*KQgUiO!bW4+$$Y-OTxIko!Nu4L`aS5DmD zKf9LaR-&RD%Qe4*hlL$s7M;ot&wbTMbH~PZE~n-?3KL=?3vzIxSk2==Dqz3dsv+`V zFc_-VYYW*){H!`ir#-adWeKKuxmVmyXJw4s;{oXpu?9T!RV*(DEb|^@bss7{PIQD{ z2GNqUrI&7y)8(+~a*NoF#%mbm#f-=*4u>rO)_LA>CWtOCiE72n?y>?`Or*S`_<4NF z3qgG)rdZgKi-SdfXq%1`sg#*fKJNWdwab;)xOicH{PUsn)~3z{R`CJW_Yiy8zZVJm zb!qhaBc&^Spb4REvF=hbZf+eM9QdLelU=6{Q=lh~hdX$PlD>y2N?xw1-{zZOp(-OR z|5Cz-OZBkxP5k$5YuSa6oIr2Ez`f#a{3O4l^g0B!8pF5Nv21|4$;2Lwx{l64`W6~@ z*w*r%@R;V&^!pNFeZ}R{k@)qee8faiJ<6eUh&%EtO~KESF5npRvM4 zMiUN#(P$OXmSn)DBoqnvQZ}Q6xVU3;Y-+;C7f<5gT=tf;NlibY`CZ`dOgdw?_aOM( zK4%jsJpdQ>Ma#k}R_-6#8Y$Lw0}Q(N3L2Bwjfogj=XnhD@hnLjD+1J8u7981%9)=D z)b3^{PL33uKRGQ%g%U*38+*Tj6(|gKRNUr+o*Tl;W&17ph}AZ&r>v;OT4i)#12Cxt zZLWiQBwG{jt5VtwOXt$~J&kvCj?neLHWuwe0sTxa=ePNSUi42rLu~n9egbcV0wV?% zg*$CiQf_sWEZD~y=En6^G8Wd;RrS(m*t>hONtju3UWV85)}uS8rsT`#KX~6K6{<&) z{SN>+K*qlV)ZOLr!|a}MSK_#Hp%^yMxOeC*fhnpz!a2K3yKHgQ;XOM^&v4`yxx=0E$9O(}k^5Cn`s_qWqD4F-IAP%i(eqvx z9v&9Pa?D9)b5E=)eGP-N24ux(qE~_0L?4O)p zZgHDU;xwBhNTKY$Ri9bRcc%jP=XfkmgmT#3^Z7MGSi3VJDE*Ueae$^Q`dCD3hM<+^9oaL$o*G|#hi zG|#eSd6b7(vMkw-ouL^VXL822N!p}IlhCF^(utw84Hvj{(Oclby>D?Lv6Hx@l%WAG z&|Y|@w}n1tx>jF#E3O-8wh_l4$JG?2FJzul1%mRzJszYVnC@5x?gPoo6A3 zfCQ-WEF}~|6#ghIeTb*60VDJ7<+>xO8Dqd|3mR!i-fUT<*UUT&F?`e#vbL^)YOB)i*m-`?5w|kF%`&6EE^>@UC%Fsus;-T;FF&q)ZF zI(f;W#zc}jok)x;N!HVu^k@A>rIe`L&5*|S=4jeNlV}@?HW7@qITlT+#Y_~cTPPu6 zlrrKNl)nf{qqt(if1}hgy#58p5W4z~SOdn;Uds+SX^di!_IR8D45OT*+8Mn;=5@On z*9>+#r^*?a%xG{l?xyht0$$hL(2TKcR+TldFZ0)`tPmq}ie>+@K4uUH-FS_W)km*R zsjI4A^$Oa7{FO)>>`7%;_*LYOuuG)M*PCetzlbWYNikzK70XyC{CDUd2${V(63^P@ z#M{{KaLSU7g%dhl+H0^XNK#=n;OQ$b=&dY&P3-mrbRpy^ zuFGApRvtfac{~dvwxDtHRN>lMrFMZoWfAYemRPJVoQv(}&W#75d& zm9MF|O#I=;V`-N%sSWkDeP$}w8qlG=?VAcor`*Hgk8vJb&yIp`MK;Jtd|hkMW7epH zy`pu;Dq9^XkGk@f%9Q|qzy9j~#2+ErkS3(#tO4;|#D0P>hz@O4cR^*ooVbWTBDJOp8=4M%{Zyg+&^Hf;#_}x| zhLp%8jHh)p+qt>hrOqW}HjF=MPpAW>X7K5Vi!51EQ@THFq$n`c$B5CCel-XfU ztiP+Y{=PAv(Q1_F0?;D?ibN|w5o(G)s3v}nR}=4sY9bmxE8h;{W#SAepj3^g*VF=A zSDyZlM=QVN5orDBiytjk{^Beg+`M=1O}CXf%ys(5_qGYf&fW9vhx(7KOU`ZyuRQ_m z3jHYp?j($K@G9Wl7o-lgLybsHGpPJL%b~nZhbqs?wu9NBJR_=m&H->^FV-c#Wu!Un zLTN4nO*s2)c%xV=xFiVhqjC?rLL!$Egj6o6Ttx2!FOgf!UOgq;vsA}f%pN`UI=C>K zygF&6BGr3gG&}|JD+TV}jikxmT^*6gl-Dr$<)(<)tPY)e9zW&6@HTT{WmdReD zv>ubmqm}xdl_8^7EYq>l!oxqj=f0O7E2}M5O{9tnp#$+YhNRCxCFz8qBt1Ee=!28Y z40=mW>hUayu?=&G41Way@L%rEV7+n ztg^>Bf7&i1q^zv+do&|q#5g}LT*AuGzgEQ1*ZDF252@hw(n<+E%P1rS31*9g)tXh6 zrz;kPK_y3oD1*HX#%X{%d)3er#Ii}?>e({{A5R3a%nRq^A~_6uTdkU{*aE`s*%@>N z_+DZ%69vKHxtfKqT%rvAszu4?K}2Xl3e#b2#0Mtbx}4M{i&2q?D-BZpuACBaHEoaR zXxz$ex9(!ACgq6@?}Vys2VA=_KvfG8p+@nkn(vnqp9HD{xQ>>5o;Jt5T--uom7fqT zU5?JH4Y;`Nbf!aLl23BoDqGy~cxTVWN#K9AA7)$dc9n{0Xhwp53nA!{zxdB`8 z;ZW+)2zvFx(dcygR1|NZ;@u(c`+>OX7tWjw#V}NA2)bMmqcqC>SYQLa)*r4)qyYQ6;>&vxpL;N7)ofGG6$s-OeQHiUUoTr9;YuVCK>7Xo61!1MGT9$UU|7 z6fgxyP1W(MJsCBpfbj%43Y0n)82qncZZU+$jooUGej=Wlh#_jIys<#Zi@ z4jusOE?dAF>KP2EjP3UW5Pk$fc=4kHCvLvU6-+BdDQs}$#6|$fF#yO$0La})6_Gap zA}Nq%d!7N|@?WMCY)F}gTbQ{vB}_h+kP=YeskSzK7C>_jP6}blU#>6l^Mb`IjZxIM zguGKH!ur~XtBAe9I9em2v_p=UQgZFh zciful3ToA2io^+uWq#++a;nPRbuhoqd0d5kgU##nO)O=LbohQ|v0^{8#NC?8|I&EV zaQ63J{XHH8xDFyKpFw)Ce!k>h`zo{P0M0 zO@ERVlQ=G>rF5`kB<@UkRDs^r>sI#!(6*rut&Oopt4g7@>+F65WpSBVd)VSi_?-S! zep^rR;7CxeHL`NO%V=_HBzlvcwfJ|p2*;F8L9jDu zX3D&s;-_OxT7k^30bTbz;bgOty$X}LFJYBkmB=6>G6{}J7?A$H)e^b#d>y3@p>GKG z{P@84-wz^}AH?Gj;(=Ppab7DqagO39*-7(U(CS(kw5}z9_&X1O<+fYCe7J+LCcSV0 z%a$FEM#~usZHse(n3YDKI`oe>x2AS{`W|dgEgQ~$acot_Y|RV}Vw1Jk1p5#caDOf& zaTd{o%$S{a=_$Q-20NakXuWk(M^?#BP4}3SJ-o{m`LMLden=`@ovDs~jwLJKp-F8f z5pTBA#NV*L5;AK#97(DrbcoeZI4IRs_$S=~4eaevT=@eoW7RYv)&|^ywjr7T_t%cB znWt)u%~v&^%qa~_W->S_NE353vaV??CuzKY&0>sHxI0tr{a!iwCHxB{>&c~?dc8`r z@)s#?YE4H{W(xl{_HP7ZPKM$c8%=y4o5pENDjbI8gD_;qsKIWT{g_(E%p1N^ab*^J zSE->1f>yJ$ICez|OhIabd}YG{U;gjF7Aa))vxo=#EMh|Z&=5&uDro`_L=N}*8{n$m z(>WGlIF5--a*WzOscz(m##kz501)yhX8E2BK}>Dv)$)n)x_E)l*+ZYHn*qnaWkfAq zp+t*AE}EgKX;N-&@!k4#JxU^(;voAm} z8YTKfFUK5IwI{O~Q`FkT0ZjIy@$<5X)FV^rTcqMTgFAa~o$b zz*BMZSKq|%z+Z)V67^NpPpKZH$qmzp&xf?mU@LO064#@D)}#6vCXHT6qiMKPOA0$s z(&=b-a0WHx%)fS__=j8{abY>vh-HbiAHp$^>GFRTtDk!ovRe_{2EhSP_CU)B#|jOMGcAsE_}DZ-!s%fm4J7C^gWY zR8y7eR9_+ZI4u@3p$?Yg_#K*1FcMH^KC&i%cx}An&gsKzmA>wH*S1oMrIoaZvh;7- z*S7og=I~!PcdThP=DV8KMeTA{EN12TwjS@m&4r=c2R+TfuAs)^vdB%G-tMuwZR)`4 zQyX7Xc~Z`nTyq+-pS!NUNg(8YB#3k%PgGH*oXr=p&7gn{VfW{xh?Z(@auQ^`wvrH^ zK?id(u4wLOOIco<&%*7jIkH^S=3&Q`K3iq25Q_6R_z?{vzfZe3*$LscM%BeiosMUY zNP6qXHbjQTn!>gy}`w>`YpUwKy<>`fWtsb*W#mZn5sM1%g} z@N=gMO0G4qgyJS6ZyRN!Od<`ui3E+w}jg}K(e)M6u=Qk%US34O)^;XDB z$!aU)2l}9SJ&`s3B@*bDXr5;2#7VKCwYWCA^RqW+dXId5tH0daq?3}khE;OuLUQXa zQ>vUw54LbJDI+GnYH}MCdZ&raeQ5gNspn60$_+N1!r(Tx#(>-X!()Y8i(Wftr_B6% z5U`Y(Adeu2klUwrPORJmLq|Nb60yu+6H`9lgyuPH0+9eFdJvgFLZ~%I@5`tEv$gBb z$?b*J7r|glNFMFaQR~Zy1$UL?aM8Vd1~2lI2qvr9moImv;18jygXBE<%a{2jTM*lY zaNby{^5YlJy})r*I!Y{U9_>6M>hv5}H7Bkm{%g;v;)V~EU5q8+@g^*cifiE#TbgUX zN-cDHAgZ%?DG{zwvaaNcMAeqXCJ(TsguqFOu4P~iZ~5Gw`&Y0ox!KTs``M$J^*uo) zep9Zieb*z~W`98mg)W88b{Cth{k^kasl7rxgrUY@)*fy1DRgdgYs_Rf2^iSCHrf<& zn%sJY&S~J$I{Eb@S&}s7!riy8NsttyQVIx(E(3%PBlk|N8Op7L2-&%u?%;DE9!w$( zz(LPXlP}Br=@chsuP3|2q%|O&P<=y`oi2m6*R_KGMwIR)D|G9y( zvP703t(Zfpt6C%0b;{Y~>uepcrSzx|fY6+TB@&}3$mW-6Ys&9WJ5{2}D+^#Ub8|3l z+5J9XLZxDjGr_l9vGU40Xp~``lmaSUtPFjn;$H|=Fvr~pR9CK`+ZYx9QMAIPuKctT z(O3oi$XCb%$X&?sY2@(kVSEPLI9&6znV$`R1eR}8i$0nlRhIZAcpO3a_90rwG zL`eu*>4;`+rCSTG?P?7)`8KsT>vgyIw0e(}#564HiuWZJ#Iwfem)mm^fQz3uc89wU zjKyQ??_V=S>HXGBta4y{Kr9uDwH`}2u9P!kZurO!^js`s_3Np0WJOq~^JU$kPLDzl zBVip{Ard+pV&447c`WQ^CRCJ$H!e}OUt zY!1JXUhxkbcYI`>KehF-iNT}oG@pts%sb87niKhuRu$+=n-ZyJhpU!$w-rZ0rrie9 zaYq~ayIKyOP4^WNqmxZ7d&ZIqSH=&mRs>c%4>)NENuzkRTUf1jhT+*=p)@Gqf!&;V zSRKa9;pYj+|MW7nj1Vk=l}3oo1ojm1RRSXjOKe7H_Jz+m2Y65Hb*^am3nFLbm=c#u z4GdbA8o(Q*f6G~_*){Ys;Cz>>^l$vYrt#3_P2*4t`bD*cI4Avo`qeyH61knrP8smK zubD?ljGqs9W`ARD8}IJfJ`h)cz=mUlMAo|gV9(*HBW;~`{PUjupWP9E8{aSy&&P}y z`c5>Q9q)Fj^=h%oY1G+u3b{eAY(M(74;}u-$^M>0Pi=DSxzp1z76Vbpc=a9ZlOT(< zBL}LO=vc%IlYBhrr9g_9YR;RunYoP}_Vc-T!BJuhwPvX#%;1+&vzJnr_%1u@`u=8N zXLV!jnNw=&0Y_Nd!ahkzD8K@YF1^`jVm>G3L&@hDOVZ;>Sn2(0H3?qY=P4gv>Ff8y z%I+^xL4piuq_O7;{eAS4fvU z1-5h$@Z~drE<@_m#g~UrjfAB8Qi^9jAW&!d^@oNfzXlF161_hw>8r_e8O|*_oZ1FB zce&8#VXB;)V8oO+5Y|Q7-SQ`d_`1=)@_JNFv5n-uA$`%>rBX&J1=0H2gkC^P@*H^- zQ6WzupPhc2Cw$1_jQHn8EkV-T67fmfyZ%V20MAa>C}niPZd4` z0Gs}B|Nd>qA!5o?g*!^SOX~_NN?kM+BI0sb>9m$fh;O{;?H>bgKV!`ENM7bK>=GbN z3d>rlDm~(JY*uKM=4)I5+66$M%K)Z*-!9->)*FN-#d?a?if5F8YB81G+7t&sz1tDs zkJ*uZmaqV=jVMzahj6Tq*nEb8YQco16STgukn@-jfDG+ z448{Dg;-^9YFd-%YYTwHjKdoq2h;7AXPz?8<7Tw7rEVnTJ6n(q)A_t>%ma_Yj50dNzo=C%l}m+WQ&9UZm6z*jD03xSfQ#1xP@2M_|`CFlSte(kL~q35jsfuM8Fy-fTH7*lC< zXj@Yt20{ov-vGgaiMu2OgIxL0%23`&D%at&j^oIzFj?~51X@#{clo}{IPZA=6%b7TU6 z7N<<+YzYK14w?EIIi+-PSQqizXfcV=tVp4?Xq)2bI~+&+z?-tmWcHNTlXS@Cj^w-P zYuQUkIueSO>)fWej}Va{1Ufx}#YBU=#f~HQPLGU4jzBw2hsVP=1M=|fc`6JZS^fm& z;~OI|Ur2j9vPTM4t4)z)??E_|w&gb#Zh{vB{SikNrj6c0s#G&*J#9eoYrJOGn!xh! z{M=DBa>8rc_!{OWmPjLuF%S*PoS9%CAZZ^)pm2+;}-#P_aijSSh_oPZQxO$09-i+87&p=r^$y0?t+K- zTBmQz?=9?v7dQ2{#|jWYX0*^#YASgP$~u*(Pr9zIDxH{1R{*~G32V7lyBp3)OHakQ zbhyA@BzHnY11YCkRC%>F83Ce{S3vZ-Q<8UnMiTO1Rsh^2=Vkz0-EPyPtv-a>VF{Zjw#K2K&`; z!gl4|by99DkhSPq&s8s1C#ClTGei83*Pd8QP{6r~#Zy4n2bea{QdNd0Y5{ZSFmh{7 zIyM%M+3mDolg+VM`z^5CJ$rZ}4qxccwco}YX?jO~qObv8l=|cDcAjVV7n(~A7Mq(J zb+ZXP8yHWidS3>-;VB?$sZ%dXN_qYsehw&jV`ckybmS$D65*K{m1G=t`I)I>Q~NEKRJbrFb+C0|4ssOakoq7qWY0~o%mOUN|< zl1mT~RjsG}npSR9*J^ftPmv$}mqlt?>5i4YLAP9jL-9?dG&=Qg>ohtl)J9gOrszl7 ztBSsuq}8rivjr;p*xSqKZ9*n*l;^%jvG6bomik^*yB{Bz)9zIYTEq<>8AZP@SXj&f z|9=#DWV*A{Gz109bH0rmWjD(qPcsdh?z|7$T`RjqW&#i2-AE$jyR&d};r81TJD{ak z=Z6X)MOe>e`tLK#eNdxJ6r3eZ=>Y)C8F5vt<9DQ28CsW6+cYeqEG%y7yG^d2mMsO* z`$JC5L0e}mgTU66uPi@5pe7n9c%UZQD50W*-qIbTT)z*(E2aUw2B*#f|6!E23ZK;o zuTjY_*c@%d=yyQ zj=#b`@y!nA-0H?JriH*egAl^+X8q(jZ<Y)oq2YDU)3- zn%tNjifi!A@u;E0Idtb(NGq3lbXG4W!M(kk+ItSINqTbYnw;&Kq+UN9Yun;;Zygvq zwmL#d!HkM%*?Kd#4%FscG%ctTF=G!6*%AtFW|2t9e@{1oK4zBX0T1u zh+K~J!(}9y--?X33?w^8Y=pZT9?Bjm4$hz}a+JG7{Y9hps4?}-)n8vwX$xw@-^gLO zZ!C1+a!t`yrc#Z)N#3QpW1Hsfxxw`HIl6dht^3Y@y}51oSd$Vi+lUx36D~~lwytV6 zd%L?=_-d~T5k*O|IFqvt{K-al0WS3txgSX)duzMGX~3qEK@H#w zQ?{V7ahH5KC(T8Qo_=10w5dV(>xYQJN4$EstSa^REu9~&pZ{T z_FUC!t|6Z__5sxtF$~#z^&Rvh&kDbdI*#Y6^Uol;t zJ2hg_Kg}OgbqH$6Y2aLgtKCNz-nY7Q)#|p6)vMa;ca9$gL>JsA7(=|&+A>gT%L)jg zNAd60_9Jbcrugq&em~NpZ<_mpe2gpNd3ud{rgS zAvzaL`Ufoi$~u;OrNnO@j`58j{C@AH-qmnHHsKl%`M_g3<&aWt-3CD7If13mStk z^KF9T=Be7NO9Fol2<#+eOV+eusevr?4kGZ?tWY$(&acg{D%2?p`C@ZaW zH+dr)n|YxXm`Uks_Cz<X1EAYxXB}k3YP=n5ccPm{>R1r^enRCeAYoFvb=T5erw}BnV)O1*hvA;&)#i znt!^EDJNAF>xbo78d^V>IbG*ke{dbA>kvfy(V1h1zk0Bx?b!#5J$KFAaog7)D0bg9b5GB{(P&_JUmiS10z>;?zij0+fq*eK z_RBh(san76$!qn?4zR-?v|si;KBBQ-b_w{G>z4&&VUw@Bquo*W17m<+m-P(}jm7x2 z_;-|n-lQ=B`(-z$6Dz`6^zz|voGK{nQF~=${q&_@)|_IuzayY2pE`RuyL&XDfUB~v z^$sLg?%;XWX`W|63)VWF>~sirIt7sw?AXzI2k4HFonWK2@kjYKB~!75AA1Ymro@)X z;pID>Ztx@Kohowud<=#oU^m*suW$o{U;)ri9>;L;m|m=u(qicz=>r(jPJ4Nv7m$8uCI zROA#|$AGpZ95}*$j(WcS{BOt^q2rcf; zmKKZ5<}lDCf#HLmsELC0o2NZI`|{%8vM<@qDR8lrVzk29kZ)pdfw2!DZ_J%R6s?~@ z)RzP7gNUNh*E1jqQ*S6UIT%4RjtqwLCkH9o(RO&!nVCO5=yiB1zJ8t1pSO^kD8zLPAz`F+D7U)V z)#TH7JIX7|9p2=o2UZ1#b78fFz;Q7nk#gC=xGU*kxz6F0!<`&zD;*s2Df9+yBy4qS z#YUS+Zt|OKA&12k&aLmt?JWfvl~$q9+Vy6aMyxex5uuqZN#Y{dGIZe%~!JM9UmkDcUI6opZ7OZQ~ATA@&PPZH&Okn%D3pBPzkmP7q5 z8~614-iQvqF&YJrwW-&+J;k+e^_Nd7VCTEB@PBCg68JW%Ywx);ni!3GjeYmhiQukEK9jJBb1*c6-$CeFJ~~64Klv1&Nm26!SD-4z_&mWB@)4X; z^S%@cU3K}l8q6feGh2+|e9+$%6>ScNP;+P*da9SsBnnIVyPev~2U@hgmhKvqcwC?r>43<5 zS~d!5&t^|$g-UhSw_=tz>()`Vp(SWy1Kka9Vn@3d zCwd&+BFdSdy2^1y6j?+uMM$Yr<#^)ha>fwaJDeT~EUU2dgxN2gs9hl!(GXm`w=dKc zM)^K!hNAsZ(Gjs}tI~($or}_rDgvFKn#9SeoJl#Zr)AEQVObr-y#Tqfi3vns)m(B3 z9s*B7GnB@I@`(kR?y3yr(RQqDG~KR85#G4V73Wdf3XzMqckAPbv6^$;CP?qG;9^FO zowuDngKCy4DZZ$LCfby(u5|hNiTCd@?_^|kXBB~1L*Tpw3iv*Ka&|H#&F3eB!ewgJ zuOJi;!r7vD_Go;`0OB-J+o5&97nX}J$Ch-rcXxL;59rX`M|0fP8RZPboh)=G^yr{{#1j6!v3Wjs>no;wBDDRolxMqDG8PL zc&fJ%u1=&(I!&2YRU~wHMP92r9}z#Ql!5!`Jcvhs9ZKCgrCwyqnxcXxM&?duTXw{&+Q!VfHl z%g}YCWwD&)tVp=Bd(c*{?1Lm-QF2wA2W77OtUT!18TE6sulL-yD*Edv+K;vYL`i{x zawc_|%Xd_*0&5J+wyLZl_?c@8?nAH>><;q`@maap)gqD*Cq?9QO3u`xPwg%#m_ofI zW6HRKDP&CrQ>dq*lwFQpHtL?URNkb!JbStB^1;E&b%eA8u|-D>+N&E4O8sc7PPcJ! zch8*en(oHN_)<(liy!P(A#pR8ts6zy%-r2JLr^bGMDmc_P~eIhQb9G!8)Mc|Fe z1!0b~EhyIwM=6KXPPr(xZJ_fg`;NNL@zb-Nh?!A@p}t}2Y|IakL+yob;KGOc)?TyN zE0Ya6MRH=`hK9=>b>qFe)G85 zOL_f8e6YY_1#e`5m$NV6dZwyT+O2O_SEHQq17)X!o-C7HXFi6Je7_N1^?4 zk`Xj#hix|eZjHc@Dwc=y@=ttBz5(Yo4|||&KRTR-R3f2|74gmj=hQdq^eUpMN#9b$ z;{|Pj(6=wBU7*i!e0NiE;>Tm~nt<1W4!vp86PDltOApgiDLxsG$EQ`UluJ8Q>O3fQ z8KnY`9y!Y|mkWYij2B^{NMO{B{)|m{XH& zg?GPWNXQn!NY=p-hjIMdgw~!822&D8j1u_MxEx|gODyp?=W$I@5_d7o*?*a$tJBM| zD5IT4WYuU8SrL(W_Uv}9e5Yb~Br_h!J$FJXr?Fcq)meS`?U)q{;IRUevceZ;;WR~j zLqTpTM2K7(^ZPlrh#x8FumTrd;O7MA0%0l?S;bw5tXBQo=$MjB-%1HyY4+bk@+6i6 zS45ef4aTZk5>=HXZczhAQzqHq=2cHTq0%|)Vu?B{U_Sac2H5j8iJVJE9(;_@*puNz z&H}i1>LBZp1fyj^Yv~nM%Mhea6u>d?h{2|#2ujPA-U2bT2HLPXo2j%-uJc;DpBRU< z31F9&E3pHH(P4I}h>|}tY*8iHJ)`0)YE*?Lu9@{` z;mo~+f*=jXP$&#SbyamIkcTOnX#A$_Oz+}I#){KdYRzQnwAPwW$FmL{{Zry3WlS{0 z^OCxB$|ll!s{rDZO-I!EgJw0Mv02VOgKw}4YPA@&!I{m&F#LVuBo@aWFYj8gd=Th) z9tae)f{!0{o8Sx8q4@476BH^=l^~lbwr+m1!HRRVKrT|EkI)5=Pb*G=!DU}J2$y>D z(O=|A@+Qf;!Kvm37btN7{j>Mwg1yHjC&BO;M% zAqFz2lNu|A9L}Jqj;*L|U!AwT&RBvDM{U5Bb4fufM|{{l(wE>wzcmLzUd_>jo?hGEyr37{J8TRQ`Hnc8eO6P&V^vY^@*$wlcOn-u*2BKE z)or4a8(QWL=I!3X`nFV05EVpqhD7EcT=i}#V&xc1*daCg@qbmQU!Tm>EZg1!*Cg)s z0yC{F1zlPg2-#!xlB*#m`Qnwi+zzECh^4Uh!zpOP9vYVfK?)VYcM29v(&-r0orgjv zLwG1;jo#-OW*)Nco+<<@3r@wUF4t9^JjW`Ur!CRcDOUV;Nh&?b`x~Rt_F6B5c2)}2 z&rd#nFxES=aN)MOo)biEofNU_aN;Ak%b9TTjE1%N15O=Ww|gGy-qRb8bZy968e0wS zm>u$x1Ai5~M%l4?EIn>8U^=k~4i|)2$fy2B<_g~v;uEo*juG8X*%>>dq|cqro|;Oh zpA4#qv*mu)Pi1L=*Km?1DX&4W@YEU|uhR}PV$kUfT9^(UyluDDEo-o4?JR|pCrvgj z$*K)bOWGBSdP|ql9E!JaGd#3df`909(}*al ze24GRaV;FkMG-r=2*|Vf)+dNM)EIETNn=9C{w!v8|p5S!vCL(5+ ziA{vfZWHSc`7f~xby;7`&Z_>&@773ssyk~_tJMO>3pj~eqb-5x{G8Jq%6WsGc1?}L z*J?4frqT=QB#Lr>%^%U4oxDG!F-xV5q6k!Ghu&sk4HjfCtj1r(ABU)#!P18#1|-G~ zjDxJhV-6u=(H)Kt`*v7H$Pp!ncnVLam2eAPIFSW^9LlQG0XZZRx98kYpF15Pk)49m zbaP*=i2v&(ccdHZ1HRHzmHTQS_4xvGR^*Y~E`=N)hZuyiSUe}7o%bJb=omP3jCKEq zO+O?L`JHo$WKIsQp*lsy#h4WqhL8@gW=5N8FC^1#?a8T8;U+Z;Z8(}$zmSVYYU`qr z3jOYZ^sB>~A;;E$XHfN8NJSCzgBPGCX2EZe5sI=yh8#PD3MI>gRI1oksU~6;DmF}I ziO@z-#P4CCOXh|!PX6ziq`aNv#DLQov9cQDwcpY*mC`B>p_y&s8hszp$k zZa#RvU}ANL+lM`e^TRFm(d>*586}Q9xbtwK_FK0FjN9#4ml7@ zh9kDDyR!Vjs69K$pZ;{lm)Vf(zQ}5;NoQ>>%WBN$=h2z>xLF7PYs{pwF{{tQ1zP*^ zWo|hL=bylmhXcZtwCOO1?ofC*VDSu9rfiTAs+?4Dfyquy4oTWUR4?2SQY6gToQ=(?eAZ@cS-mkIY-LxPP=$cO8J}Hi4W(G^ra#_Lks5b zT9%0P?CkM3Cv28@ODNPFv-BW|UkyIQUxXt<7GT4uJHsLZnq>>ngqhh)+N~my5hxAm z_f#8!mf4NKX>2@6!*h+mwu@VBu@Ll0%6y?Vx-A)hdSh#0O`}~l0;PJND+ei!jKBpW z%My%W7k-bnvQTm{{%abF0i0q68^c}}@eC)c1@QfV+& zFdDgruv4Y;55>M?9j<#;x0~Ff7*lQXlqQX-8PS~SyZ)k?o@q@XG?t*{6knbztZKCT zI@ZszBoY}XZE{=FKE+AXyJ}F`6N6~$uBks@%ag%j_JANBAY~(5F8+jU@mZ?IR_(RcLw~7qtD0t;yHq$C znYTXgn^WV~z|pEH#uaPu`xD`odCiePVO71mG2w*NrPL%PMQWWPZ>+VuHA?JHcV%px zj@LL{MyrnG1)bGlv70T?_WDFe(xT?HoWpIjXjK}$#$mGB&0?fowhd2!*T{od7E6p_ zes>tHh~CI^?w#Q$Y}^yZo$;q>MV3#?XI)O6J@s~#dy$)A0#=!cil2c#1FxyIR$s_? z+4_P`tJAe1=s}1&tb@0^>|UFjBx$G!oKByXp~(%Geu^x>-H?Y=D)`rKWFP)A7|@Ws zP3EB!_~WDrTI9*Yj9(E6C&Ed=&k)fe%aCVu(zTc@#Wjm#Q5Dz}r^Pk#$9);A$DmGa zZfNPtS{RG0x0qbm1m^VT!4kyq1pKS?(}u=?53H=*%V&`A*YG>w zh{IUTsKM(lg3)oKSM7y5?m1|&dc9ivkah$cQji@*S*ik==x}6pTrL!v9*e`Ha5$`r zdK~qPPQ-IAE%7)_a+a{m6}GU%c9rTff)#_L38xvoRV*b?wNVsF0B?#`Jwzaa(lDj_ zJsz-)(NF}6py96(pTN;}Vk?e$yftDvZAv1Cf%BRS%_cS7+G=V;R<%Hz>W9*;CPIn~ zNh6gBDCOcOi_Ojwg`7E~@cAcK^_FTSP?!MEV}0M1#;G!U&-G;nLRrC&`^#A2DEh(@DOM0O1Csocr* zc@@7+?jqWM0`r02px7ePd{ePS@b0+Cr8%|;-hcm<;*jRu*5ehCm&v!YL7_9=ekP=0wOJv$8~AA6j^K zFm49q&G>IQ=&GVN?ax0?F>qFDEeCWTvplWBDTaRRAjOdILml^LwBrN*h~Eik=*3!) zjrWH!wVJg+78v)6jA$x?V+9Q>I)=;)Kg8@J_F`4x!ODze*el2X8ma=GQ?+x!om8R4 z7Zq`G;TtSrj0b&TtA-lJFU1)#;`2pKfWieqtAY#j9UQkf^%Tw+gwiPhI#DQ?ys&i>}!-lmq`-hAm6GTGHwKM(#Xjl<{s|A`N9jNAeZq$uWw z{t;fpJeV23>nKMCrNw%5c>C=iR*bibbWSDkPq*g4uK;U{yFD=*3+x(qE`mB_kXmoO zT%AC!R#5NNf+s7Pfy5nJla`{jM(rm{qm4eE_ccaic|XtlW#8>b6Yqk5R$U>F4f$^P zH{`K><0z+(!O?BS;L!A^W?GiS6q?T%-Ak+`>hZ=bjGb}h;_+H1qjA-SL(19uP`K8m z0h<{u+J1sQ(VIFW9Gb?c`?M~ooVpK&>d+O%BFc)cv4bZ{4f>x*YXwgy^*1 zCW4p zJ@^YoVFbQ=%H-e^I*hHN8czMrwJ>8$bKS@X#-Q_U2YnO%t?EgyY!& z$76uw@nQkQogrN^~w#x>Kf_|xjK9j>qG-?u;-T6 z6TB30M3W-bzw)9LBw- z)DOqAW5NLbqKpPH!hBvdF(}e8)m>r+&s$LS$RO8gFG$*yl0AcBnhn@jwmHk9dqY) zQlvE*bcL-Pp6Gz6{{j5==LR6Ut7u--KL(Z^IRg3~g|a6bj|0yh8iPEwt@J76pffl{ zT#F6D`wj5kAm3ky4MJ(Wir9$%oV>Dv6c7d>`+ITfXq1#fUHUGF6sNQDkY^Ldsn|tG z@i*{S7_-kVLA7kvNC}T8A+e?HjLC18d}cMU0Q6yd4RKdxvmCr!*+4I~PyftphND|D z@vdsMs&%R-%3(Px|B&1hrDK#|3vH~r$>B+`8}d1IzpF{iiF@b=iI2ziGwItJBGq*Y&TsX-z6Ia9z1xV3R5XWx;*e z!WHpn$1p*|6~Xmmjy2>uNVl`6<<2wJr9!E!JHZz?xz(*0O;3N%vDK!k>H z$Q@4EXfLYsg?5MtdW55dc*yRz=;^~MiU5R~L%j`_LSBCgQfLm=eH>!xM@MUe@P*}H z#Qy>pG74J9Jf6H%1p5k1qaYGgYOOwB1XmuW2IcbXC?G1xy;{yG(q29`2_YN+xeZ?II(Tpwr$%wv8@v)C$^p3ytlsZ z{&}mWrnakQdv?30YG<}hY3DY%opOWX90`-h;TtC?n-az^ zNWvwLF8ppYUrA|ko#F1@xH2c{{x0e5?EHgswaMs;;UvLTowK#m+S*4x$f?-Otfi9_ zeh2;Qe6-T?-nok3PIhjsmtnWjO6Fj&&ad8_m7|ceo$>IO4`O1^L2BLO)?HmqDT=s_ z)}R!B-Gf1LV;yqNae)*g zTabl3-(6Lg@K7_hS~;i^joiiZrP!%WXJ6rDKM6Z}lbeMtgP*|l)jK-i zMZiWLjBi~<d@NSd;>cP-Givl0x!4LsB<&&>)Hqmw^ zEJ&vvXos0GapmKpee7zz)d`XMK8AM<4(uYzz#|44}vuzVt9#bWoa{b76x+%c;~d$a?nIFY=+=D<+{z>z znW+JeRK~g-j_efHQ0h}3n)nB>ptCvO;HBCGL@ZWkX64Dz#z|-A`gUw{iL-#i28e;l z+$&wKK@vJDJzHTYUoJmf*op47xycMUzv^gSS#D+#bbBux^T8!(uihr>01IM08v`S6 z6Rpg^m1Zc<2}Y#eNV(x4ez8|Bjo&yz_aH`#FWn8Arr!c1pA{$|pGlrCa)9uBA@;1_ zW;cW`yxMj^4@86_Xz}BBM>3lVMUH8r2uFFEQMie>D=3=ux2wE}m{HMn+vNJAEvtRI zu-bpPI53zR>Aqmt2mB3RWTJA?r07@d51Bm()XajneDXtM%HK;}B-yjzQS%k$s^`?K z^Z93IlS?4krswU2eU)%sQ~Rx{tAJB^XXC`4oniVYm7UA}eOj=??ANNgpZ)#!+jxMu zZd+cj)ufHj_RvHYB*Xgzi~jFj3bei;wNTNN+-4R6{QC*D5f>wb5)ys9__S!1DyGMV)=Hb@rc<+Qe4tT9gY{!Fc`xhR z;n+&rvJ_WbgWU(x?pNt-VjbuFB$*Kwvr{(itZa2tnp@)FHo?ATjJJaW@0z*K?oCO6 z4hj$ZFcZ!=18oE`?L{~- z?d2g;bVI>uu#WhT1WL4by&AA?)3L!qk-&lT4~6fX@f(yu5~Z)vVF;TxUSe7D>NK!H zn6b0>r-eqmI@P_c49lm{RU>01&YiE>pNqdbtFPG3)@3(o(o%tkw^4RAxo)cyPCQ_u zz(w92%zc=j?EEmw_!3#nx>6=!!{MarFA^?i*aQv`I1Fu>cCG)bbMkTmCN+U+C+iDf zfpoH!QYo4?DTx*+TPKI{Ok4bSzRA{BTE|~QT<&hjo!rnrLkYTV zM@%A1WeD`cMjcctg$f3#lJO*N^x$leNh@k5jzcQMq3t&_0)IG0(!gW6<+PuPCOj-p z?Q~CyRVvV_^2lagPN>boM77Otfm8h(p0*7P%v45kk5a|PZ=_#XJsyvC`zi)tS@XyY zMsa*19zhvuDUk@qf{&MN6=D$9Q^s0WbVnlzh3q3(G(Z`Pg~l~pGkm9q&#?V62+IHU1h2l+_EAc>z18&6 zf8J)(S2T-D*luBv&Hnj2T#bF@B+e4mPf!FSDl@** z8{0KcH?>ETQj^W=A)H6-@j6V`3@k5PaqLse%63qvGAzjp?w8Y}1E1^l6iO5!iHg&i z0AWT)<0nzv5?&xv5E^-Z&{TsfIFV$;yLXgQj(oWV>%0zQyGpqBBYDP=`U%rrFj{3| zh%f5st+4HWlOP0YQNVIjeeLuMN4BopE!3^@Sb_vZ2hrNBL}$T+Vo&qQIb zoDVI?yPwASL*8aiv8F(1&CaEYt>wVAgJ-kK^3jBraC;Gf%aq6smyrjhLq`#9Gau-O z*=@au0yfqL?tb+O0>Eeyk}!3pHqy-;EbU&mr`vM6D;o|{uea_}(&_>gYRYWnokly= z%G5h}ZnO!%CYypl6~B)MBn5qa?H?Bj`Ad6Tyd`M_KOPv|Bh8`ic9@P9L5Vwmsz#xez=>QA^@bW1VX4_5 zwS$|duS=V!-y2&yF3RW9)zN0uH1{4zLz>Q*TDrCN@+Z?AtxQOlevndr%k!AW^bxJ* zpF)yKL7m_GY*yKiyHw7JVlTFrk1sy{@Cot5&OPU(ff!F|3q#tzKC^5Ho)NkH0t{UdQIS`m~v5CDk4) z_~%=h0H*uymN&&F#S4aXT$mhJ9k-tuLDXXMYPJ`6uf^mS8+LK;ZJDItXLX z)@-ggwej9aKuX4Ku7)EdTdhu<@x6JnPHU;VqwSYue8zU8;(G&)RHkvYxoZ7yyI~sk z~po`my*6a+LJMo_j^&09=TPhw$h#WX;WDe)ce)Shuo&H;EbZ zB>f)Jc7D)SJU7IF(g09j;4M%ncsum&$*t0DomZ`rFob|hYbItT4V5yook>{Srt_pz zR#D5Kq;2=%>-5~U>GYLYQJsgw0#PaEN{DtM18L08?agS|N{z=+k!0^zgtw37WsY{| zLFh+qS(u$*Bfah9lXZ9NzF!i?7_`yhrUN}HxExsdAP|<2#8n++h58r}NEF7c*Qkz~ z0+m9y6PPa*QHqCr!0ofR2mh{=5Au*CERy*IsaX9432UR)5L+im_``oOsJ-FE>JP=h@SSiW)*l7I9-UUz^9&>&)561Or5@v2E; z5wz|Eaf&3JQ@Kgy2-FDNutbTtW$*|Hk(6J!eOUC9odb7iHE)Fz#u8!&X*oXB19&;8 zR(h{dCA8b?**Rv$-D?r1*J-XdV0_C9QpN%V6cp;*C{rqe zklaUfPc@6e!2EI&Fz7-#t>TnQ;)3c-Bp1pQRIN46gGKh_#%?8HZ300&PQ{P7I1|en zbsrCOP3R3a>|Vf{gsa8s<#jE>hW{rn!CE#ekB(;&noD7OjIpAdMABA^DorEpl})TD z`e}hYaoRNPSxkXH{k)%QTp`JF$F_W<;1lz$oeBJ_ux6*cGt8WW5I0Z_WHSOltAG4v zjF@9gXks$+fkiy2*Pk_=>ozP9l@x@?1_4_7*!zFJ2aKU+^iC%c%5)WI_{oEOk63L+q-$T;_Y@vF-*hotBT+Wh%1B}drj*yzrQrmQ2~d2+lDhTD#qZvwrxP zB1_VqP-+AS8XkZ+=ZW!&gXa=SaZdZ^jLICFw56wQu@v03sHkFo1My7K zv!tVSNXE7?jj3|sJ2}mXes(^nb8hy}tpl}{m}i^9u4v;?^$FVU#jw-QYZK3J%_-U7 zkxl6QjEc#T*bp*0u}Ua7o8`?Z9IL#*<+=A+gIck(Jl^r>zTm$soo;BLp6#>aXNq*t zDXz2XehcmGpGjrO5R;-!Mv=UHx{I~-G4Yb`5{VETs7iK0cnHCn(E4PkkWqjYllhL- z9jJYUVy$m#q%?PPzOr(oX~xNz=wH(*vCQECSTVFw5A%er8?S~B0^7B$RJ*91RO56k zST1nkgnoQFd1~##wH|b|9E*he23%3Q--+}0@BC`-Q8ucrhqyYhrNm3wv9H*GPV4lr zSwoTqGkgP&VWDYz8SARDniUp88eoySqYT)_e^P5rwkU+T{ENDDztfufu5eT8DN^jl zOP8zE71@dJM}ps%-ms!6jIZ&|(YlHZ#W#AYzV^V4B}_>R?-A7lhR zi;nrG*691UH~bieuFzQz;+hxP{f}+UitcskA;gH%89UUea)#i?Fywcj^n<@Pv8Kg5ww%AT$6ms{dQDDP-fdVpP=7P5J1RZ)G(>(cyo^Et)uDp39K!% zcaB)4<+dhD8_mFaFb|EpD@nEFB~w$6SGF7Zbb6&7OL*PymLLWb!}9EB`yM4|Z2wRq zP9pqoEkawUZvfVAfv{Mxh2WTg$l(h3iT=qbcQ!)6{?M*LH2Xc@jEc9HcTxF_VCpJ! z+I2loS<IS|Cfw=6M}YASxRNkGHj~*Y{~8hu7{euI*p1Pr$v@m^I9m^Wg%WW$9pSw!x=}I8_AI)029Gpy!n0nY7M0FyRiv{A(~@`; z+TtA~nuqA=Q}*y^lys|IPTY#`brNJ68Ec*@J>tlDHu-sy2BdPR08r;3>&zQ`5zw?##_x@y8-@w5aJwyek7IL+WUu>wJE#yTqxJJV|~g@Kk&&PHOXk#vI|ZwpcfLu_NjFbh#7uJ*L-J zbC7q#k~CNv1EQ6OBLap8C3o~z`KC?ts1%~3rnIbfd6!ncbahmT&>(8;95ZmCP*b_aLTtW1w?Ex9baRnRxy^bkiYZ}fQMh(tmPk=6VOwMO4Ex2Pv;hZE zC8KdjO}%=;M$)pJ&sds>jC8x83mG;V|bwNj4hK^ zCKs+7D<71y!@#}iW5|cQmka9Q3~wM?h$ ziS_MXpX=OJ^3_J{SHg%1CwC16Iy)7Z2Nb#E?ccC$l~bH4r@#lpw;&SCDW_9G-h!EQO1&CG*J6aG&L@?7ivPGOc($4(~B3liYr>9j4p7jQCh;2t_jl zhH)HA2G&yB=~;wV7Un0|xwBj8$;o;xt6DtC+wzWT-*yP6R_3O>LymSu5 zMH}8B-ha!$F1t6=6`zZ|AOZM`@5-EnsV@jK9X?bqpS#a4n{CeRie|cBLVFl*&kG=< z;&ELK#5(!|Iq;k7e{x>W6Xjp0kEd73k78c$CeQYcGdK1^52C?IrIfJ+2v_|FIH8 z2s_scf^$JZS!{2^-4){3Bedod1gcOB7J+9QRMkon+IQK!NC^dq0-+eNtG#(WM2$EttzVDVe@2dix`2m!)|a<>Gdr;>TPt zeUg3BY>(!-`vi%8iG2*ts0h8|$KbB>b;RFjzu(_e4bmT;oqPM(?$Z4SdYISh4Q{;);sx24 z!XA`~bJZK%VDO~IbUGubO824YXM9FdENSJ+(9DWI4V)rFi%ONu;wGw}Z{xMK`(Tjc z$F-u=*i!(Knj@m2FRL5C(Y9!ouna#9w+uaVxwd+RLfhh)u`-nWA`_!#^m7DWy1ClP zyU^LyqEfjs`BPG6`ozf+UaMuj)OpptTRw>hoKk8}fh6QOLPhC^_&edQEkcFMFQDW1 zq*F`$J@7(uF95YiFJ$d8B@>4r~!=dXfmjz8U64@7Cqh8b6b zMmg^_>R9xlT#h20r*e6{N{0_{m(q(@LV4l5u0;Y2n0_I4h71h=OB`eQmAOPqWDzAM z@+)+KgkyXx3sK08Tw{=?;GGw$4%@x6hY7~nRck`kvSqzYg$hj1OT6QnSv5qq0HV88bE=?S3W-kl~gRnSC7H@Q&3vST=Q8-e@XSDAeKr<=xq7}!cNhWVmzK^ zh5GEt<6UFZl2wit87KD)K4V`|HUnc?a;~yb(nC&?J;k{1YktbsH-_=|IPR@ldZDK~ z;o;KnpYwnVNH2$^0L8f-%gQH4_-=ETt{TOi*M!o)u@QdeXXO^=@anEH&9Tj{(76|D z=k{xt3U35qJ`D3*7W0?iO*?LW?w8A@84FDIP%nI$1dV3m|Do= zB0MZ%kDtJoG^b2idY(_lirk!>?&>fJhRJ-6|91U>m!HkZGVKz5+%VIU35HA);8&ki z0xkG@(pa2wHJ!HFX-yO3M7fN|wGlLN&MDEmWRt1Jse+J>j>C7`-Yd>gDA;2LFCBkO zw+{j{Br`qm`h4R6X6QD|qGELzz$*?V-FET()WSoR+Nxp4YPPcLu3e2XrddBvmafC( zvYOO@(H+n_Vo!;lqq zIY$Kyb53h9 z&J0~G4^{$kQ6NKRz|S`2@PhJ+VCb6`Q)|CApcB#o_GxM&Yc-rlqoqYltF28m|5r)+ zTcK#tqP((&@PPHR%`!)!=LwE@TbtrZ6JaJN4%6s~2{v8Q<>$oNy@?&fiN5jVw)AUL4yDcS&K>CMYpN zzYHlm(W#(%nT|3yq@zVGxu!-`Vh!(XC<(vF0^GN(UKq6u{{-9G$rfExON+PnYCVM4 zRaA5bcFbrVuuTelYGzanSxu^TY9`E0+}oX};K0{#eNrEm`Tp&{TvF@`>2rgLYFwNK(>2vI2rq-qx`z z_DncgTw97-K}%F(6r5-#qIr*+%k zslIq1##I1f&ykbBZr`ii@bPV`&8;eKcxE^lRXAPv?4H5l0y?j3{_MEYA*g+2mg9hx zu<~M1WTTs|EAk>sVot>xd|3F0|65KSpH&hRi~;<7$;!$m&zjd%T}%uym3pT=uVB<8ihpaX9g9AN-gUat&*YN z=pI{2(y`kf$<1OoGO^Eger=T?eFk;#8CPwrY|4L2RK`Qyp3gtGEZ2PU?_E-d<&-Xu z`;a<#c|6!=MPQ_M!Y3{+uVr?uBeIYlp`idPB3h!098N;e*;CWnYee!pePPdddU&pi zQu&k#zcarrG@F>?Ck>wtYF_IT5K0J*H_FR264~JOM=5h z&5o;Rp#^5yp-IUf%Gsmnclp;ZKQ1g;b1U9slAf5X(Hkg5*-v}+kvl* z0HVc;6_q8zTLrufaZ8oow6#Sa&SCZ#rbGnBJ}B#8TT-MaA}012(5=X;y-S~B%-_GF z5!A`H1<^Kaoh0@_d5t!#3DtS6a_|j*_%P>Z^@0d9$OJmlcxPvIR!zPqc}vr}WWHeP zC@T;;Rk#AO$2?^$Jn-Ua0nquFNDq(rd+=F^w_c#emgDp&UFwxN)~8@?bG>b zRSGnl+m^}_1%=@YV&_7fyqI~ zOv)jY*ut{6)@}dQb2f3h-NylE-VwI!U86!=S{-O5t@z5ii5f+b{-aG8ONOcWgSowD zlV$64Vj0?YSZ*2;`X)4_r@Ud^XT+t4ZoWMDv8;WjGA_px(&SJ+vMT7C|4Eer{s18O zi}-db(yN?9ZU(+(#z*jL8CB>vEs_<9rRBfl-ywvW6=PYrZs)PzU&5albV#sl8Pm;u zu59yHTgbl+_!1_`$W`HMzxHl@=<7vN+8$$9?o1Q&6^=E=v5%uj8$?P6*xiQ9S>(z4*_0N(+(CeD?%8dSH9Fob9 zDtQUeFsd`P|4{yl{CZ_H-+J0ewrsiMC>SxUv0+zkt9WoMn6Fn;72$7bg~Cjw%s4fH87uT zV7}txcb1RxID8ZK2J@LMG&3(Df^eAg)hb~VL|D6&0)7br6Ko%Re$`&}@LRh;kO^_T zB&8|Qau{@|)A0!Fp3=3@QKI@jVakxjr#RE8YuGYP(E}$fyZD$iw(}J9%yorM1<)U9 zA+BNdYlE8c%(}dGqM~imQi5-qd2vzZ>$bMEj}*6TtEd^NFL98a^sy`tr#G>^2(V5}24-;GR`hvqGL4d8ibz zrG9*Yjg@NyFlTE_tt|j#6(R7p>Xf!#;OEN5K22Yh#fFXI$o|8rk@`)?o?6LPd1T;| z)G%af4xbX%0#UPK*fPmx(&g$hbr#&X^^$?$nQe=Kw|lX33cgK%(tZg8-ujJD|2^|; z{XKnzpx#mEd`Um|Q*6gwuew#Wy;aq9EJd)u_rd7Q+()4nFyMlOG` zwxnjMp{1(c^t`% zA(aDW;~*KC;2r{pkJOG~-8GgU!$(8DMAiV3Ls!(_nYuF;oV%`^}b8 z`Kpm#J}6#r5OE+3+>PUdsudG+yQ(fW5{@Nlw#{zs(N{=a$-Qb8_TkT#`zKTY#`Sl?XR~NdJeq4Ck0IY8%Tmy@W1!`ze^yd z#%spso=!je@6kFeJ)V5-`saSPT4w7&PmrSjnc)8o{!hgoe}BMFFUT10W75)$>t~EY zX+cko9AEcVF0CNcn@o!S6)p=45PE@UpchlybJEcQJI!;nU@rb1D6(JH}^&1!GPffvS)oQtHY@Ze1=jUQSVEU_SK z_KlE*jUV>JOv4~4d93SbhFZ`+T=*>|I8{P!p7m(KKrc?=#QCGR#wA8}vf`GH0lc!I zqN5ol!@5RCiyV1PqN5onMHC;n@MhJ=qUS`7VR|A%zRA&au{wL5jn<6g|1Z`kX=mu_ zWTxErgbP1X1)l+6izNUc!vKhqUl?Bqg?Py>qBKFNA`zJ09d<)D3bSSAe{Pz5(tL=h>#2+lJ~lu+3`r5KA1j=xT%j~2`bMF}<-1>$P~ zV21wpjw_vSniy?l2Yuo=Ga+yHr>x{8APu^lYu7~lvv{N<<*d&Zj46?7sDM3Ihwo)TV%TdG<7L`7k z38^`f`JV;pcvotYRhwd;?4|G`^8SbtKE+r#No4&r;zP~CMXdPM-l=N6a_()bBve&c{XZYfG=d2ABc zDLar@*zDppJ*Qww46{gkG}7c!s8qW7MWXo$VXA?rb`i34^3M@&T{0-eYP!ZDn#MV+ z|~3> zR7t|nXJlayr?vGXn~bLftJ2^aAhsnA#8%fF)9yFx*%<6y8soci&-o#B>T!6oSCx|2 zpq~6o;Jt4XC?S%?pue!f1nb3vExW=MFLEr#W{WIlEZwW@0twEDEv>8dv+DdT&S#N% zjv`!sXCfRvHeS_Wj?RG3tfbwyG_W_R#RtAQ{8s!HVAjx!{_|~Ec*6uh5ltDouWUbu zJEK%fph!LhvWK%0754Wa(cW}CB{_$-=fDhpplK)o|1GY;Sp~oW|Er08-_z(r0S%uX z7!<6<@zmgBV^XB~`*$y+c1xVCLd zQ{3=EkHS&jfjQ`seCDu7Cp$KYDv1cMacLTv$DQ78MI+W~;IHCQVPcR6!G}5;^jgC2 z+%dM%7^!N%M)yw-zASD7(sN{zGOXLO8e`Ys12$nkdTH;-$B!tJxQ+TEdGe*YzXk)^ z&oc`*z9!lygcsqn_C~I;<_xWd7n@T9Z&Do@wKm`!{@jKhhGt0K)l>h>NbimL6cV^l zddSMEFU2`E&~XtLHWLmb*ptZK^#4k%Q|0cnHEr`=6GsGQ zjF3dQK`YE~RaQxLM?3M|G$H6s;MV+AWJ4uC^%$D%;27^-}jfUX%2f_dp2w25#KCzr_G-6o#ju55?v^ey&hAlM1%B)i_&cV?RwTRVT zdIk~e16_lB?1=0+#zGSAsK=bEYC!Z6tQvgCm;LZ(!D9-7zSsH5(GJ>{`?{TN4jFC8 z5Jj&?=gP6cxn7e1$}P_Wsqb*8x~aE-o&oWsv3x4oQ>zBkpGK8gWUc*8sg}f56GJ*f z7p#A5x#W6@Q;YrYj}qFQjujZOH*{$sq|*{vdTB$*B%>wI6_UPe8pN~;YAm-PFh}Fa z2Rdn!EdWwACf9!qsB!CKf{I+blHINFzp`^;@F`AfOt+|6ByNi2_A+A#DO)3{akWjP zs|{S(M{j!`({X!(C0ISKnucVQi``bD81&%#^kH{L+kls3Yhw!p#-{ttxrJpQ3Zet3 zc!1SQiB~~O_yJ15>=Yg6=wiyO#&|gXo?YWuqfgAhqrb*x6R}pquo6c*asYm4Z+~pO z=2gKY86P;LEfwN>CG(xJW_on^Hw$rPTtx}W-lfruZ(~GgUE!I>Vj}2DlmRpUxHqMi zi3Y}$IY2s~PyGD5Z~}TErTx-4^Z3;O)AboEyH%IjPsjGWiSwo$H8)!$Cl?pWmH_-g5zB-><`} zl#PS2f>QZ|7o|yh^b}0fHw+WCk0X~=IQJlA(I@jE7f5eb^U}7!xEq`0&>mWUxw8Iy z^Q2GhgL1o|NW>-OR>R#Q;}mxese9wrRX`HpFv=lLU6ZvGhZ;@tfITH$nN*t9ri?PG z_D8`WT(Kdh?^b0^yG3rr{CcxhSj3zJm*p2R4m{1AGU`>C-OYMtc`WF}FeO8tEN(gt z;x6UQ(Nr)i>=5HYHy{}&V@>#}iD!}#;m(oEjhBa54zcf)@q%n>&7GU6r5npSIKTWL z6DwVciNZ{gs;4)-ivf4Vjb`fovUFiaQ8<1mZc3CHSz;8H?@ZN`{u@H=s0nL!={LzF z8SFcE`pi#WQim}KhCB_L|OWt7+vnf zNE`FIdv*ll^ziZV!M$7}y~)wRx&}o<^g1cm(LskfNd~;LWKu+j9LPa9i>$>7(v6t1 z<_UQc*BD+j19@WARDzgo!g%e}VWI;?5=3{z5(pBws?#mGc}6BAu(8su97jLgQF_p= zxh*;BEK3HLeZm-Utj5Z$aatkKs)jT6Li_lF3`?3uD1$Ps>@D2V)QD4Y5M5KYbPxwq z%bXWWhQJ@XK>LiL1xL12BNS?iFcbJ>Vbon_@b(4BjE*6Y({%CVD8gysgP{U<|Gl;9 zF&5eRB^p$1;FLyZquUapu?(u4L}}v&v4eSXQxoi>00PW>vaEK3%HxIpIm0QXWMFW* zhW7f~A&eqyBD!4YfAEztqvOscN%xCHICO8NLO$uVb;F0aLwS>8#|N_WQ^ZrSzpS=- zFon)0+qqI9kc&}ntqt7C)v%}4eqhmGay&WOQ-`TdHUqofJgM+>xAdTMMCsbP3XH~u zlTlRzCwOZ(wJ_{Sv<>D@20Y**VOr;7jkJ@K<6u~by%+|ji?l>7s;1ymRAKNKm_~*} zLOmVrc^QhvX#?S!B@AXL_UsURib0tQL? zb7_O?{C8`Hr0@obaZ;kzvFDZ;seBe-jebUZu5p9w3`Ig|^2Pbrn1!guLXdM1S0Nv+ zhD7a9DJH^E>wTW6+kbZw5hB(g?dlteDdv zHK!^9S~oJ~|E!5~Veq8!WavhZQRjttnvjM%h?e;Jhf`<(jLfk6knLGDpnGb-7k&=x zp$;*GznS&3Aad)?`|}c!HpAOFLuKeD;0KgSa7HJ_Q6xcng~1%m!9(E_GoD!?S&?4q zqx@MJfMzvDh-l}i*vuB3kZzPTsD7bTd+>HSGB63xTXW#ciZ%H7=LxUL92Q#3$&(4Q zq_mI1Z?YumG=CYQ7ntXYiK!uKuaGuiU}C0c<*{^o&c8GC?ZO)#K@h14Fa)!&;s5Sx+r8zN!NjyR+%FjB6SdrB*5hCwD*Fa9L? zi{WX?G3tg&UO>=hpdq1B;!l+k$D(M}#*8|42h?mZV|{xEa_Qk(?IVpKjKV@}sqlp9 zhQgmf&%|iT?lG8P-lj8Q?(BBVgGjJq`EWpvBmD3%vWfdR>ws|&#>@9N_u_BU8^3T_9P`u zt%0#}u?X<3!I}gWZ4GvLK}f!5Fqm`lKp4Ffxsy2au#Xr7>!=6yB(*msY)4!Hk4bbw zgh7^>e#-6aE>T@IA@am99VA3l3|wsuCP15dKM%dE88hZz2iQtNO1@J7$zDrcpP<2Y z6#+&b6huQa@WSB`QB@MQgg(Iv>b$JIU4Pg68AM`Z;a;7F{HfK;r(0m(oGb(>?F=Ig zBK0k$WI;K-Ca)REPlHy9DY7-qPeTLlh5B$+gpZB+&Cxk3QAR;@4#_}SdM9rNLFx4= zXZ+h91-97kH(clFelUhnF{(M|-^r&y@oDQhD3yWNt)!mw=7$G~(1JSY&GvdnAyscS zfH&I>fwA^aZ})62r0^9P2l-_rKw!|K?kxuR-&W6^iAjiR?Jd_YTlP|(|9{)=cZVeS zZq@|>STKXeJmkEZAlq}PKp_qCyB2?o3ICTQl{0M$+EPJXd#_B$Ic#lvuSC!}?2n`t zbsqI2DTJLz{YV2qk0F76BJRh5@k^M0^l`b*gr+==CDoveOUi*?g+9>pu>?DDF8%ZW zf)+Gm`e1>qNCT$7g}mmdn+f@W4L`c~& zqj1Vb%D=UzGDe5HKm9)LGiEPX(Ho~qVnoWlOT~jY&`c;BjWd!_Y*6H-<%7a6WKF8# z+gIpT6HOK(I~;$IsiG%^$(K0MZ#}bSrk3r!J6?W%o1@Q8S^~#T0(nl`vrgM}&)j8i zZ0LERild^pOnew|Luq@kx9~3{-jKauvxCaF2QQW%D7m4uH;*s6-nd_2f+2k)hcA@f z@e-r57835*HLSJyk7S{X7ZKxZZX}r;$x_8JWQpcqVmvksza*n5&Mc&dth8|Eg(k^x zEZ)Q8no)xzT?1akXoDKQF13CCUiu~MfV{i|`T6ZWgMHoh3!=UZ2KX`Gl!5s1++0I` zp~iPXeXRQT!Ms@cf3x5C4IHvj+_Zsz+4kGPet`P#z`pnd_|e=%1qi~tl=SzZytDy* z+wKa&d>jXS!`$qEeTDUFztamIbQAbd?p`B&AoL4Dy^I2VJMZQ}emL~^fxNf@e-rNt zLVg_f`$4|sfqZLQ1FK*Lxq-`x)g&c|!>Cb^ntw`vuwm z4RiGp$XJNIFWXR90-v{+#(%%RE;^hC0b(06_3;v&_-uv|x zi&3$DN!o{d?)u0-kl}>;_Fsd4p!(~>eHivLzNY30s>k)g-aG?;1@vD-ehmDlhZE>O z^C5n?1$@(UKy&t&?w78?ZDPFW2Kb@f3@#w+!+do4e-r)8@d#mJY)0i-X53;gkW7ZLKyeODj+KYJm*D1!JQ-|PVWxbGX|CCdTaPz;VzE_^(g z9$B6XC1Oa3BmxWcBcPuU;sd%L@r}~;f1cvUiLfszJ9ll^wWD@0KMTYU<>trRj=MWB zAC&&z5I2GlUli70+u$$a{nwyBZ-9sk;!S>Y4DxNV>j(Dn*uN9)R*JoAx;wd&ztO5gG{M|ZmU-aHwgZ+*0rx)r!Flney?!u`CRJHUrjlb|jBXh(9aczX?2P83>u(#7RsLR738eL>l(Xryxcw&}WF9dtS6&&J zC|im1XjQpC$AmzfW|YY=han6Lm?$;07TMr*q`)|z}iw3O%aiva6wK_Nt zr4$x9J+R*0$HO*r)Xc+Xtc9m#l`2wVv!BP+-0I*g<-84_OoE&h*YH|mOpQ&A5VEAN zAhO_Oj(~blAD@C&l`E+X?kyk)j84Wh;G&5VqQ#)N%aqCOobVzk@F{N+X_u2Z*w0li z%cfUDzu3W!X>`{sm2kxtVX2@T)oKD$efZ-Co}?1y*x6P0-RBwk5cGzbuK7Mg?_$Mgg@tl3^$msX}KfZq!T9lG{3WqIv3LV^MVC9LC+$dY_1#(eY z%H$T_0xur25`T`%ENrbDoInmxev>avX ziNix0PS`XsX4i!6X60Lz>oDBJ4x344?-rT4cnbyqeX) zG%{#Ry*n!vy9@RVJ>nRSDv@kO`OXEj%lLE*GyCVXqQqr5D=Q_eA_W!_Zn`BEX~Qdj zVbq#VxMZu4xOvQ!P=M8eE_U^~Te#Wnb%hiO9_qT**$7*kBQpT;rVb>{7St{d;vq}mv-K3bYM{5QVG?mgOiyzuM(!G!2)MbF*$7npm(m* zpEh(C7M4?t$4nGLW@XK*=P&{w`6fp@LMO^4xUTR;D*~&De$gE}+A~}?Jir%w#ruY+ zKvYP{ptlEXS`~-9m0#*u68;Jf{sYAwq0!&TBPEZALSdQvWmpv-%zMRawEM-Us z&E6(c&O(6~Ro*C$bFCIMw95?vmm~#}Tj`)mWYB<`z&d8Q|52+&^rmK9jwKDygJG#s zO1?AaQdBfao`NHg%6c?`fVXdM`E?AP{0n+B^;~{AGuPOm`6=*FrN3P1biDc(0|hR1 z26M}ZteA?A$~4gUh=w(k)&K(o2nL~%1?9O^dgc41-U zh#6C|j3`gClIIeQyc`ZmTiHyrR?*N%(GY~=Q6nnbNjR1VBsBalOJyrr<#Z`Kd3pNv zU@=peGlH?q_V(Zq>dZ`{3nVQ4BCdvX@zEDJ=5P$S(wP){7~=5J0e77(>*cu28n%1Y zg5=Q=51p91{@2VUs#)PX;n4#!P6Ei|+st~?>hj?oSYe3i`p?49?h5Ers4E-GeP&|C z9A*7~OGQ^2i7M0~2!k}m=r<=ufNow1^ixZ#&?PO-$`bJc0O~BzF*dch%&(&21TBm2 z%v&o7afyt4IickRngv=z0>YM0jaf*-$%@tl(l!{??FG;hOBV}^U&tW=#g#vN zXEk?gSc-~Ofp*s491u6{HvwmG@rA|f8~Ns=qwW$n0ma(MfyKDhP({Bxq{Lv@qI6tv zKHyVDp^}l%ceSAh)J~+6a%me+-sCU5ApcQeEe< zxTm&IzA8!LI0L4L8J)Ml9jc*MG_+jaN>ucx2O7&l$uH3C^Bou{nVv?v{y(g}Wl&u~ zv?fY`paFur=iu&6@Bjx7?(Xg$LXd;YAvgqgcR#qhySux?9~?C4t8csu z41dEPb<)PI`&~j( zuiD?>xfCCgF>b}KsY)dsKsgS5Pu>=rV{D_MW+9lqNEB+uEtpA3(!hsnLIpXTO^)IF zY8J@tr&ZUC2CZxGQ#iDXbnBOZ&fb`0Xe$%3_d^M|PItxiK4Oj^0&>eVSt3(ka0z(Y z(V?um0^e5bFoMk7t>Ay@d_(8$_(aIPVI{5;K8GwfJ42xdzQ8IMl>4f*3rhqOUwD-p zkb?5v$X1Cx>;P^6bd&QZDH`YcD?^pR4^R>^Rd7WiT1pjcX?ag%1D|PlBB3$XCm}nql^ZrAX9J!LW8eeLJTyoWUe_WETGXl| zJDen??Sr5`beYB`r>Xw4GR4$#2bh!jt>snrhvW}QMK~VqobL=@ts4wJqR{_TPAR`d zXY7h$e9w+hEa9X#t@{*dHbJ0slY>lb-ZhksLfj|H+^i@P)Le+GSLg&6L6G{Cg|b_l zB5!p_Eu>DSWNMLC*_7P;50P~MwGlFQsG) z3x^WMh~mxPU4?4{OnhOP@_BvP8ptL`ZJM8&l(t5@MoWs~fWxS5*8GJ@+LDK9CzgmR zCz@R3cmy&4Mb&_YS+LD!P9Q&i2k4LG@=^!7rAo_vhoOngdRrrpdqVzpbI)o=6RUem zm(!5Qktd;m#(Uhs{m;sCH^S0h@46Y+WEd7x8owi0X>tM$O16P9^;9YPJ>wmev_x(+5Iq*l^eO7 zbXkn_7M}n9r;n?dp6TAEnm=j z>8dMTWe%0Wp{UHjZ#M(T$1Tdbs}%AWh_z!Q+*kR9{W1UkSpT%%lj?-{__2O^RVuyX zY&_Y;U*@&1pP24?4DO4SPFlK!+xp`{Q;y?WsnTy+RG6jF)iqP?e#Y{Uobr?PYqN=8 zIf>m!4pqRx~^K4hEb;;j|I%--I4k`IIEW}N~SEX zk`Ij@9puv)!qRDcozFI=6B}aDQBw8%=P``VjbeA~4)&QFLDU>+E&{w#RMvj(R+hSn z3bFBJ_9pxOl15r z%i4U!XOW}?y=C_g)d#~Mq5iNmdAwN636#AXr5%JRYHy8t^`Wn)EgS)phU;y=usNBr zbWcls6D&p6{~C(t{6L}b$Rl16l{c#vN%{hZ+M%~m*hj{1y`&&$+2Hf64p7yRzvTu3+o>W3j<><39RYmHtCUL6@@0H2= z2=D!TOjAGq?OP?^lOe(qS6(u_ZmSqz+?^rYqr+Uc!%qz-)@`v)sOi? z%WRPLMj2}LEZVuE0DIgpxUnN zu#QxF;r_UtWI1uQo_|$W*~xK1Q_}D#n-(8Ywch<2RcO^Zg>UOD(@hoeQZk=^7KeAS zpEOnzw@boPWM8V4=wh}b0%n!2cx&cfo9qd!NH2mY@Lwo292Pf9L`h{qFI87gG!jAS|1MP4x`Nav5FTQjTmPpEEYgx@kTQIm!*R4(WF zGO5c^eO2QgFOgSoPMz$W8$#0>$jbqOpT|o_g(tYtkV;E<=gG^~JW{96vkMVpVUAI~ z9e-qYg&z+9-|8gPg_stmSnnEUKbR^Oh8d4W@A%u2{Wb9eBQ;bt+csy3rKeNILwf0T znbSCh+2~)D2diIhrOt>>ot?XI3!Xd1)*n_%o9y0Nt))*SSckZ_TkYFAhuWmh40#1p zjV?G1r5>+~IERH-7R^lB-PWf~(~-_~M%j9Y+r9Sj?OlGHAK6wk+td;A`%T^-O@->c z4WeWt9Jc)CGhUYJ8-hGujnRJ$K=6Cc%o?%P3UuDMcno;#pcc>KF zv0}pB%Ve?fEIg34wLw;%i`1_N#OOCR4BPo9`|@LBtMWnT9nCTIyTTz5Qpv)B!p}dk zVBJ3n!!g6bOTO`A`c?tbZKct%pzm1ru(2u%}BJ ztq&&0Ogcm?PQG;J4jT_M_MebYH)&TqJsk%vUvXzPKg>S=4M8q7f(OzFg4ST ztxMZ9EnUHBxbnH^g#TP&fyt=Qq9p?tM5@%*nQu3yW178T(^NIu6E6_6Ty?U-@{Z<9 zq;6i8dZNRzv}4gwV(V8HSI4_CEN z?Qq|1Et`wA4~uSX-I$eTwRX8GLQD@S8y!GNl)7P1z2#u8XyAbi`#YyM*j~ps@rxzy z-Lw$eP1e4+IZr0v_@z*x8=zZMS#A8(_^Od=tiLL{}WJmC@ zIRI_G=hzXJX@~8z;Nu!JsC}s8Gs3JJhnv=TxIXF&G3m@ns1OVDy?Rn}*FWXkmQ2st zhnv18do*8tkZ^2zd((iWqnbmvP)eiaECF}Fl%(hd>^uqzD$1A8WLs7X!;!Rs zgNY;_tQBeVsaThf6cxNNujGx($VH_vpz8M)&YSh#`_i<^7b&z>?mPJ;^ZV&-WCbf3 zwtmehL7&$wgW812=|OOx>8C9tiRV{cTsD_qi+HSV?lc>8t9zSW@7;@vvT09eK!H=G&^1 zg-F%upVAz&IFT-m-a9FnM7trxf9Y(T1M2=OencH>wmpCL6e6;RaRhXIgb@VeqzLN7 ziQuFm+-)9j-Ln9s^?XSez6X7z#o)ooF+^eqm(P^(>>O3Ro`T7(l1pnBF~F0}dG=RL zcVpM{ew}qjm5j4d#1+Deik5;>xs}Z*-tHP_v}hfcN5SC*J1$-|cG`y+7Bo||K@#xR z+xUDhWAA}5!9PWLIRwlbDp-3bn!_S)^3XS7Yoi`kag}3X9W*ih25Eg&j>wCX`-|Se z?kcqM{z$^0{pH?~d$fH*(pEpm%Vu`BhNV+^D_ ziomX7z|QFrp)1VgyaH4CHrvp=hp^?%>4n2cXQYDl{74%kqfh%IW6inKR&Vf{+G=+D zOOYSlU@iN@fER|V9*fuhlIO|+qq)=5IYJd9zWIaG9LIeWehK4BW+mUUzcNAyGe+|r zXfL57!pT-|jML90nXt0{@+?`tLb5`h&hd!dc>E&)Ug8BDbNepg>Mf^d-FG(c6yY_{ z@>=DU-*W^SlX)O@X632&v!!R4QQA5we-2j&@saS=T&829A5PEs(QKu+H?x$P&Az6( z&S=-DX=E!~>hCae>I~mvZKqx z=5O~AhV{N}g~xfP)m<5weuJ6EDJeBH8z$mWm+i8`Xx28kkFE*63zzv)+e6a~6|njX zmd)9^Uq@%}4nFAgGu(XLjuvJi@o|U|+JPs1BuhRF>P{EpcXL7M=e9bAt(DsP&#xz; zHt)4(iPeOEbx*za6Bd)`m-U`$gtuxaRXk;f#D+7aT1R9%m`-MGG`57xtbQh@6_5M6 zH!aJZ@SKNTW>n-xU@Hva)wrkxY(B|Y(UG;U!+ooGQ5UTel(vFhJkCU@c9kOgV<~2$ zz;`jS*jbe~glld_GFgVyv<04^s!O~uShEjZt}N?QxsMX8 zYRWp=w ze?0MEHJ9AYIc@K6;*yX$Cq8igmf?JJHb<-8n~u-?8~4>vxLvBHy$*iSm|or^BSTNs zMv6gzXMeTs!W|}m*TqNpCQTX3g@$54x|*4PV&E>npGtM~oFQ;|ddtbzYnDJ~ge$78 zaj|YwRxY#1c0}q?^E@hAt)9?f$y`#^_caDZ_MB0uohNTtu;Hcl&_6dybMDbrew|if z9<5ezi=}C&Ry+Q66!VYu+hq=+1So+&kBe0D+4kUQrmfIlTruvviC8kkzgoxb^p%&w zbhJM?lDkh|X1R5%wbdGUpA3Dhk0n?pvC960cCH>9({ULJGF=PUXok9OiZ1hY8`F8r zeZEQ6m1>M+7UCRS4YPP*T*hWcWtnJHR<=G9f~#3kqq)B+kbiOM^Z~3H z8;H|x5_{^idBn?#(*V}9W-zT)kJ?6bS3mcwxjp&LAKYt4r<0}-|5Yk#tzj!OJu3}q zy6#2G@0c6B4tb8t)FQD^$hjVEOk%#JnEz@s(QtjFPi6W_tWbLjmE!OkQ*(-+L)tMy zkNIYneDp(p7HD@MRcKr-Wg7kC^3nfw0^S9=_;Xdgjf`0wD*Neak)>Vm8V7dK@#CjpbR#45=8EIr%o@!Qi3El^L6-m@M@oy2+LDlC`!d6ge()~_4goSg zCKjuJ)wCpsshm7$fBKJLxbB#U`yMVjX@kPdS$dYE#n*}A# z6&HY8jF-H;W_7eUDaLNuQ+rtmbtQ?{<2Y`J=k2L$k^R(W<)_4CM-a1Z^T8YZ{SidA z8}-B!_lfL6nY&=+=CCHQOuyReU;(815{a0QE|t{kjNj_@5^1czKigP!;$`c9wgkAJ zqn%zDea&n|Xx=)c12y=Vx09wD&P6&0Bz^^rK%zT}E9jM6>Jhm6`G56q@<`XmqXVITC^4~^z z#6c`0#WJvJ>_r~$IlmeY2SSnX3x1`7)05l6r|HJ z(UaZz@&^JtS3>UV-Qt@Fqd)L^3K35i9=ab0qY!|eXLyF^O-5UR#nXc(twesn>y!$~ zs(*pkGOM}vcz?%U{tpo3zhuysEYw;97Vx7nNM^&Kt@v^=L;qFa`}4@cq1#MAA0 z>H|dosJNHMSeI`9Tov`IIv+W$Utu%&5IVo6J?iezSLP3U7ki*;+a_>H2BSJ;HU9P{)|ymM z3y-9+^R^+~FQ;E8PX6dGrKR;cXS?B7?)BGUzuvXOAYS?gg_LvdrDa*6hoL)(H=UyU z)(S_>Al$C*eLegR$O4()Nrfo>D;{kQY4#>ha_g?YpyTU9e?VCW(V#q&WT(_|50U7= z{QD@f#$?JiB8v3u6l$*Nai9zNpZYHn4i?OkC-AF|?!DlK)a?5b*bY+wqwA1p~jZeN{m=_yLHbJ{*D%p zmEN6wRLs>s)^@~S*}{qEDo|q>>P@tXH1=q|p}M5HQ048+G()fn^&f>>J65iV?YsV> zRB@r2+ef*M<2Oj(of%~wh2PB@6J+~0O|jJML;I)V!ymP>t#i4!;PVdnWSIm^X97~G zn3!ITGt@U=ldp#zDd9QpZrLv@oYS-kvF>QA)JK9zRB~30?}$9K%$U8|a@L*p(Fj#P zNrznB>y@H_+I;PdKvudFv4!49c8LilV=<}J@OMX795t6I58D9~6})UIoD2%3TK%E4 z;7LA6UOm{l{58|I$NwdjOE5D{9_dCmZvKf-jEKOV)(|1Gkd){ilW!4ulJ%CE>hH;Sjn%LigF2UaUNBIzh}B(_Tkk*+IzA z|MBw_37P@{=9|^%cop2;vl!jEcsr&StX??DocC!!iLR{po^@1x_`*ij$zv|D*nnI- z*xYB{_k!2-y1?k6Aoh`KIXE&x`_WFLh-^ZEk z?3rk?iL2RY>f2HRVLNn(m^XfgHQdHw{JuBC2JKlvF030*%ADQ))WaoaShJ1q)*Ay; z@!<64@ZBnk%Q)e@n|ui>!^S+r!D7G3AE{dZ)yDF`*Ye2d)~^{Vy{)#dS97>Q%e@9} zZs)<&ZSmC4Cbkt-YkS)so=qcU6jf0~?PpM@IgMu2cm6pX(X%K>rgkRIE>5O~w*M*G z8(E!X=CVODsF0QZ(_;}G_^B#u^?mP;t&!-LHZy4 zJ<@bz#_hVvzVw|7eC|I}6p1U>%%f4Bw^c@c&dKwJf|d`ig`&8LX~+yHjyFThCA~9$ zNltGr7(4jB7}8K3@?FXn9MY@T3d;w{MWN+U)e5H1(;n+W58m?Ag&s-AXS5_35@KzF z?U))>TKsf|;C+wO%7wY=lkT25~+@m-2qMmv2TPBqRs>U~La}a$Cegf|Kr16q98?LP-Ov5byaW-9s zzG{ts;D0x$9RI%tm5Ym)m+QYqn3ar`i=C5|?SCER6JL0rpS@3a{QD>QiE;oFX}9h; zMXs+}WLSf7aOAb$ak#Sp-?D-;COS2sB8{5vqS}5nYbyT6Zd<_i?g-PHDN(P?(ZjkD zbY3I=K{NEa{(gSlvTnbA;+$c==D9EEyqA{D9&YgCYwTxuOjVpcY%pMkq%o$pi;uz@ zXt;Lp(&nbEyTG270W|fMpv@0&RFc?z97qSjzn?IK0ADL%8%QY zD(9jtI21jm!(si_9DNfY9$xTOg3o-mEX{LT)mKpb{)Q@6leeY5crTHazHIx z8{Jc%I=KUCm#JodzQUr_;*Jm8L6%ZeXJ_b|Ab2zz_-(BjYcqaNy-Z_zTSBnwxuy%!wL^TqA-pnr zw7sd>QMZzrsGbs9$37utP>{2~gpf%}T9h>vPS z*35VBeoQm{G-W8)`@R{=7NnW2wu)cJycY+Lf%j7*(}yi=ua;xRR{6&vaUuoCul;WP3Id`RU%;~~04#cWVLK=>xtjGt%mDH;qA*TVZC9mqk zb;bPpsAfyTfqazQ*6FtTceQS3IivB+)$m`R84NxdPnY8w7L=Cc!q^T!M9p?zxMxXm zuR-P1+@OA|Z8PD|LX*8j{dBXg>G$1v(i1{wi{%-h zSrGP2iP>@}DwxW|!EM0Ig6N%>snD?do^jZrggcmzC&Q{MGF^u|m~~!&|Gj8XF8$UW z0%MVWmSf|<$?|zUL<;&IM)sHHFXF<_d!J?@urQB!sd6()!m0TOvPC-UGWbt6#Z&GR z&}~W8H}FN`{$MHtG**%Y*coW{U!4$!e@IDi;GrbUr}@*`=lN%o*Y0+k+=tKJ2+Ro< z_md8%Ff#LJJ?zzqRMv?~`lpx=_aAY)WF&bqkn7CaS-V^!$CxJzOp8J_px{RWSZfIj z6`8g=qNCDukdAT8v&(jhw(Og|5c?-&4aidJ;h_KdxRAWrlE_i+Yar8HV4)~;R(M@Y zK)52)cTYB5hBH{bZ#C%Q)s{j>1eJDNia~efS@gBXhrqJfuu86^8;=8DunnESCBGH~ z8mIC<=ix)~e~KsX_VMeiD1G<9!6WbdtoIG?eDrCEe>-i5J;6HX>pDBkIxEco*5_jy zKf1pOU>3Y+pZ$mUzbpNZ7Zx&&mngzM!$BW@hl@dSf#HKzBhx#2KOkba#A89h+m~Yg zheZP=3B^>Tu-1Hg_P(?7-)Z_k^rtUrk-pVB_Q4T}W+n(zBzvSODyK>N1(&Pqcw5!TCFjVy8IXX>B=e zkoJN0DZhZ%t(!zY&N_k)S|raCut+jTnk1oTafCoSm#0_3jqIREhT-Ye%<}vaH#r)4 zw1wcp9>&xyQ_|?{(zI8sP(-!%vu|D1>3Bf-v)M_>QewmTv@hYEE!k3oU&TwL^$1ZMNf$h9Me+m#o#CS2`AR6G`FTH?InWsyC^U1! z?2K9qzpC^SDN*Bu!BqiEv2jC@dnLA%9n(D&PcRh1%zJ9K0P%KgplCTWl^EIuEF&b& zpyVz&^!y(xV23b0qdyz7F&%wx>En3v-TwP#&7J%Hg!1}qJTWKF zMdDJ7ez|@wxnbka{RVIBvULA2&e|4PDqI}(w#KHyZRW|XAY7D$pETZK{XTk~tb}ovkT*Wt%CLC(GrfvC3F7F@lbSH@mQsOxV7W z-hBaIEaVFbnBC#Kn_5Li@+#X@3lHCN+?FxJWbCG47I~+LS$sL-5Q7Vr>QWnif}TpS z@yd9NFP;7T<|Ng{>&(x99sTQa*Wv@zrI zoG@l*&%m@5L6q$7Nmy?1#8TNJM86?fT3Q$~15+s*IMhgz$6nRy3AR-h_wl#SsAw3-$YU) zZN^!*B}e@ht(Xol0boXDeD@eV#0hCTB+I&vf_Yx{E`G`Q=Q1o3r>wU%X(-w@^`|{6 zq_Mr0V=)m~TZaSYv%PUq$go?A;cYDau;$q;o#`l(YW&7u&dB0Di)$IFlvdTCgtzhP zMd`IZ{JV@~Jm2m3D72$fJ;59&$y=L2)jGMT-_RrtY|-(sK2_I&O6gUBsrjtWNx8#*@W3QwYAbN=tX94Llx6c?uvU(U@tf2 zj=AX6=jF2HCnbNn#McX|z3qGxEG|zgGcVhOIRb%9_EQq;lzG*tO?r0^tKnT;Cymxs z;l9JE2zG)p2ity$5(CfO&O>syXtUYj-PPaSdVc~b?P=&38g(U=)+SrC+@dSvkh(Qr z(3R_k1Ak{^2rH`{R$IRPu-5V78;r}GemHH3))?C9CHeB51x*tdR}C&;r&A~HAHvKsvcgs!W&jhTbP&urtzeD4ripNiwNwUxr$*_LvHbH4FsEn@rEJ5DDTZhsd9D~9Sl!>;f@ z5i<2=aWE%H>@A`?S>*_f%H1(CiD(%i=!A3@+J8c^=FE1|3>vk?jhzdJo2LgjI!PBp ze;is19EpvaYNU$ukm@&nPSw9|Ww~kkP;4b}D%=$gE}& zA*H|fQ3+g@G$Hch$DB`oC*Yr%;lR{^%?aD4)IP< zhBngnzIbMrUgc{@=^I~m>Y&N4)6io1=731Z>ol(A@ay2ZO5bGS67vc|q=G%gyvfOhltb?0tfP1Z8(Y)(?ZI46Fb;O%+(d#y z7VO|RI6UH?*u(7`HR!qtSqeiOd5j#LRnW2pC$+J(c(ZE*H(PYAtBt-q0#QV>JmO5h zH{Xa_76}?XNr6Qgi7^g+JF^TD$od7unf&JnVBf?W~YM z0-i7=6)^)jHavAVR*z~>!|-DsYbQZhsvpT+OCwzg7{1sM>_P~|WfLKiQvEm5HS^$M z8W+#js=w)MKWY)Y_qvJEfA6m+n2` zc=v!BU6~Q&fDm}l&aMYx|1$mXX~IHEn6iRa3U&>;O(w{#rTzEf$Mc;P=_G=}&*~F~duQjI$v019|dy8?Zy&o)VgZG1fDbHMo>-g-4 z`2zRgdSSgm4T%mUg%OsqW3wbx^#Au|BD$Dh!)-$j0=$lqR^(Yi4x7)&hW-av2wx60 z66#Cj+CZ5@*5bByH}Q=XiTg$eFs8I+10YD?kp;QRl&Ts|c83sk8b_`dU5f!6y3}r)QM| z70CdVjH)rPWx({CN?Skx744>4HV_1o0V-zy1YOERs2G)E7=!W3#6cK2U>Y@}TntJu zjdHdk2pVveFrZ@8h`|JKO2EtS!I$}>Y7~kQn1Gin9)$sufcH$7@I*7I z4PpRz(N3y>fPgfa8^!Dvj8*VKv3M}Xp^S@CSC+UrUU0{3c>>L(Hg2!Xo8|- zzR^yKfW!e9;81l6d5{Lc2wX{nUM3F4h?NniK`$3i!pM^OLxWx_9)n>90LmQ5SjZp% z2AIW5Fr>gdr9JjCFU39NfVaFJW|^1L9t9bqpIcG@KgBI?K)cG8HlST;%Nl@6Q>728 z06!F9=+j*WVZ1ADIRk`Lwp0Ljw3qGxA*C&2z#ZLXECwmvB{x7wamyF5rn03ASX0`v z1*|D=$pO|Bw_E{wDqE@mJtd=P3}*VvI1FaG%ikE|^p|NE<8+r17z*@N+Mo`)OE884 zU6m||8tkD1>IZxL1WAECG(hPxwKP>?pb)T!BIs1cMy0Djyaq!LK%%(i2RKtPD#6eO zH>!iY!HwFW7I32qh#%aj3t9#@s)Dq^3#C1yA8WTKQfB2}&#jD3K~J5GPEOCPj80xp zf{ad9&#ugi1ZWuC_!A@xZqxwD$>`+v1j%sFItF0u(^n~hP-IHgMQy$REZrg0ouXSU zQ_j>&71~BCE|X-{3;usJgaTv=U!xVDNwe;RMNj?zEWIPty`x*bQ_j3e{oic(pTBeJ z_Gjr7p>7%7s)2IGPO5MI z!I1gTcJt=SSTA_^G?+ELlLJf{!h*857ntOf1jmMAFtlg#n*8MWw;kx4Dn%Q-b#TY9`mlyv$$_0fk@l!<5i|XM!WHN^}RW`FH*nMnKz_cWQvfqny%*lzw5b*Nqb94xEP zPHeC=?yArF8lSGu=G%FLmOswB!_MS8l;^AS#6L|9Bz0cZ%a2}oB%fYS5;E}06kFS# zY5|@<@TDPoEzcWb95Ci{FFwLe9|J2`+lI(1SWXT1hk?wjZ38#ZAp+w!MEseiUcrZ! z`Kv(x-GPF}n{U%h`P7YPj`{ryk2KSiKs5GK6RXVuVjZv0Ly`Pb;Ia>>i7b=Zk*zR! zaQFsl`W1N3YHPaG zJoCywPAWHT4kpwY%_~VNH);NT%(TzoZ`d4h%w*d9=g=r$4v6cx6QpBh)cotvCqEKM z=!2;G@3#F9{=e&_v0K0hYpg4zH0j)f(aSB z_TQGg(`@;0`Cj>XXzpR0%9h~_+Bzic<8qZ zbo1!9`n2{xZH<49yzIS)jogdD$X9CjI6)R?O_XGdG%$~4_9HHw!0kZ)xe+dm3_cIH zhuwWN3jf}ntOINUcKcL$bcC}va?IeWpb~U(EZ|a-Mv4G=+mq1$1+?R#S^Q$ohX#+j ze%S4!5u1oFZ*?@Yq@ISoeneWTCa|s1Q$a}V!+_&&4UWbJn^|2=hYV!ydk2(`x^_0w z(s|7ZsB4^Y2vz7BWDSpcOiX3Wjt**op8<~y)8C1G{vQ4l&;PLFe4cy_yRY3S3?~t0 zVMbvlVH*PuQD2PBX;ctYSX91BrSMMm$frviV*q!oA~Ou*c`$Ts`H6ImpHZIFH@;pl zKllZln{QBWkX=x|2t%#sobiJ@KDU2rheL&QkUk)}zKcJ~ZXj=vLiUwvY%(QA>wS?< zVBS!Dpx661+8|Pp5Sn+CmroZ$7bF*UI|9r6vA#C;f$L$VHEsCGhTbq9#7}iU3Qkow zuno|T^n=gy1|I8iv-E%YkvugGG}W6^?k{A3(rmr?oc`K!FTG(uWXfxO_&|FQ zc`z7Nolb@V>ZK_DE|EkPHk`t0{UW{)h7|~HJ}q2^%S5w)YlXmU(%asLR8+6xw)fR@ z`jpwqKJYEKK?EVI5M4+*gm_cXwjt0S#!ezt85{ptcdXZ7TE_2FhM;e)t6lAOyToWMtZ^KMf~38zx2xA?n&6%1-Q?Z-clRu$ z4uwNS`<&oNuRgES)+*G#(0;<-=BqY=xJcl^DjA1WCC38cO{mvB#{Cqd>`JZ4OI#md z4UkX11|Vggv;ZMkTdvmGf;j6CPPA&iO)1wdv`w^G42$0XPQG4sp6ul1ty!sQ7;zdO zDRP{*D{@rdA8}CM{n=cw4|G*nW}6iR6m$uC$_BLv^luAcUdB5Ot(-BMo`HDt&JeX_ zR=1t{n)|zisS0`oy<|b&KgWK!U;d3fL+6z^6V(xLt#+w)pJJ?rG3L*rHo^96D`PtD zpR+Hcs`e^tDfO~(sb{5uLM{Gt$zze$whAY`i+51 z5SDdW;&<5(f8am|g#{HkQy!bSv@C)g&1G;z<_Y$`_cZlTPDyPSp-X$>3U<8UsHl#P z-Ag*Ged~)XK+^LyUk)6nz9pA@aCw-@U9gjo=s}q0*7RkSj*~ohh1b@yTe+a$6|UEZ z=-M_Sq4yQT?A1SeE_|#4or;NMAExra8v1Rzo$CI*xi$oV?MTK?G zLv{9ngLdQI7z!zFc~h73b-2!^pf_&1L;CWL>s504?X*KueeIg1k0Ko|O7#I)?@?1L zy86YEWoYL`cbjG6Ca={)8KY`~_wVDi~Vq&UvLP&3MC=bQBWZP>%a5^#2_8N^nYarWc~_-L_^RZA8dM% z1_(D~9`eaR_yYSvVdK9xC5Sns03x(;4xxthL-aN%H^esVA@GoAh&QAK!Vg)7e5`p0 zA>8{l5RUhrJtUC>-0T(DGP zT6}VYLi>aUy&Pcs>;K(nAg>T#h?)Vi1G58~1GfXN1GNLZ1HS{I1HA)sC4M7*BVr?Z zWN;rhlPahWXe^>k;-#$Yg@eOEVX1V{o z*A7|1O?rlQ);IX5d=>U4eq(O95^jNc-<6b2tY1u$mQ4|g4E=3S!RO@J;(y4Nu{Wh` z)iVyaB%U~pdGl%XpL14u-lz-ps+gWvTt?=UCVS$Ew!s(^-UqWq|+BAXIu`m<2fVE=H5KRTt+@xkB zyZJP4u<}TT^PzxNbW45gSOKzMgG;Bd^Y%@AxDmm) z5>DO2E|ui^fl-)G>)7G+YANMKy&3w=d8t-ok5mfZlZk>Cc^y94^wg2kk+wYGwu*jh zDxvR|^tzrnVg0)JvN3CPK*t`O%eUrD=Zn1>itm9O<@7VJ89QZmG}!LAeyz;@XhCS(pY9XtrWn5Y-qsxd z*zow;o|BP&!y@>bddG1oC}EHGg=XCNLU8l`jLS#IFYGO>I$~?;gJ4aq5cfdB3#6A- zptNL6lqq(o>Xp9s&t3f6jP(s5D&$c0x&TX}`||ePFyb18BQpQhzTB=ql{Xo~!ugoF zY*^cWQfdmQ6pXT?wvu=1g5F-m%!5N~gJt%_-#dLbmw7|GVHdK_Juc*AeavmrSKRc| zpSUDVzLO)gZ$hJasb%fZ6HHMhE1lc9)d`P@ge2+(Cmcl_D(QA&DEcGRN4-`$d?hw5 z?RX)UxK=uv)(%~;VIR)E>rCMOLMr6mg!t{?`^mpj7>eU=1)-;&tJRDo5zSX2&+B-h zwzBevodIH3i~<@cF8=vz7`c=13{i2+##^;gh}&Fx#gAD&z-Ea zh#=Z3a`Gx#5F#yWXNWM16!Fs<(XDlR(b+ux%|CJ#rgWL+Gbz{k#|30$=gNjrW5Nsjpw;cW%@SJ zm7$YSI2{8PmalH!JmQTQhF{A~il!K^@GOZaBXUYoe)HF}*XKrReR@Q+=W8GU0K^b?Ku>m|5#S7b` zuO}1JKos(>IJ=s>{-6{bk~g1n5}f@5a2yGv=$~<{#VYIKHYq2tW!(aO1|Q`lf$3Br zrD$&q<#>o>aLS0%oOnF50G;@}Savrzv0h|>Cw;8+Mw2{%dUs;n$wqgu{#!VnagF$} z*f&&Zbe;uP<*Sa?mSzBTqhjWUsIRD~NB);FaAz_bj|!am(8fr}B_Ku1T@XPI|J5P0 zo*d)lwBABkir{6s9wD$}0f$Mmrjdd>iT!eAh}mPgd>TkNitBxX{<*o znaDDoz1sK*?I1C@Nw4F0a`|@} zzSkUtHD)RFmpUSe0)MRZXzSO?Au14qOwO0s!|+rKFY%jyHb;zJoFVdETi1J^5Mz70 zO+CH2B};i3izG~JOJu``EzpyfkGH7IQnkdRZCw|$+IIb2ip6BVtk@O&@zL4N`Mp$1 zB!c*sAYRUjcv+M}#Q1oSLc!tuqt*K9Xv>vZkIigLwIV%%y&-#mz-Ug6e3M+sx1W)O z+fro|a&ja~9j&RgB+SRd@MGOjIxI5md~H_C?b)4s?P?iD1>;1pRLOC$>N0&xw8hMo zaphtsWi~pUQ-G45l7>q4O&?>Y#s@KDSEG88voAg^E|qF)=Ng|nbG*?b;-?NnFf9|J zG1h&kj}UFL6vv`)_TYO+Epvc<4&Qyc96ROckCaIZ{L_*;je1krC}Ph9V*AUu9yg zpU|TruW}maWsft()be0{do4A#92-e{zZYJ=e{Xk|ob))H`Y2yr(<#J(XkT}uli^m` zJ#Gr+ca1)EvVUHSY>G~Sig*DV5zKT_M$_^|+SAR3e*S4J6Y4rXz`gFp@J^Mjy4W!8 z>*>(!w!a4UBI;1A4Y&7i;5%Frw50qQJY@wW#yA6Oc(*%g-fXb^X~U#)YB8DG@!^d(c4>6eyp3D# ze;F3Uz+=|(A}nG`P;X#Cc=!>$2-6YZw#iTw9DrH-1*?51#smSOXuwtnMV*;%_bPCd z{O-cA!3#qKn(3%7L(>neI1ijMXiM}6X`BRwcHzri#bi}D4YOM~i!Z8Pos^n7(D4<9 z65MH;>n%4zO#~a*-F8wQtZ~7Kxtjn>4`~9u4-xj_!$r!PnfQs4k4akI3oe=z4TaCp zVS>W*s5XgFMB;adJpIPPR{;+h5vjnhiV+K>c+TWxxQgWJftT!_26%80=ylGeoEvds zYB*$5L2H7Ho)80XyiVKrm2AJW=!n?KLkRN`B0?~3#Pg7>y)yR^v8493g&vzu{p4FNrdDHoGkya}X5T^Sl z4Op%&tqbM51pY-&)k!7x!{3}D^Z}}k0C8-N(Zo!ALVjJBy{~Ow1^N*`XNb>3r}Sg% z&anhi=YvlikTMRF;s}&1UHNGLQZz%*W@p1=%qA4b(pJ#N^WO`57j_@_-Um@Ypcv>q7(55DZ-Y(zPQ2}jJs)04ZQOz|%VYY!TVX45(4KHw zfQml6wIkLSR|KuClA?N)utqFjn#&0l- z^gLG)JrFrVdTB2{`3Qazat5OXY-q%?3lDEIU-EP)9{(y5S`G^=^nyzfr@VaulRA?! zlWxtvA@v1njx$g3SY$WgnDK47(6-H97{Hukr_9jJ5rB0+(et|#)uzR4m2!UL4^;b$ z_dOS$5tjvLgKHdv(fPe>BHvHfVzfn6?*2i~D+o`c6GNPeBjA=CMeoGP|aeU`17DmC|tz161hN%AY+veoT#T9Bzl@ z#w%&eV|P9MKLCS3e82qMp$pjnN*m_K@&`}H&LK~IRfuDB1PP)y>kL=4l1 zNe_a^69wa3_?lkys63*RAZ|2a|AG9cu&HHN3t_CTcBE2 zhYfHJ8{ix^z&UJybJzfl3e$2kmpabfV^QGrXL^?!NUu}5z0jH49rOokaUu@`kjuT#1i=o z$=SSCo6pJ-`EDG{Ri_+~m@lQ@V`j5egB`t^wO1p0eG9^F)nKEt}yla9~>CVGA6RhKC54Z|RVs${eGdFjhm3A&el*A3!;kfvJXRQ3qM; zQNUD%G8&DA?RMK`0QHfMOtgfb)B&T-Wvq9_!eLu}d!Wrj5CkjvW!Z0GD_ld~nBObn z4c@wRjf?~3Bl(^7fQ9Y0g5;U=renmb*-tk1JlK2jZ$%OCcsyiqB>ZzfsU6=mmRkAU zmBiB^Hw5AdOh*D6zcfpo(*=Rsh+$t=vp_4v$O@@xg~Hhig|k&wVa(+80jwH32r@{Y z2EI?@yiXGaK1~$(H1K_=KuTi3oInDYhr@90>P8j{=&|A)K5BNN5N{sVh0#>jjVu(< zCDjU~pG)!$>Rj*ruN?XOFYfHmc>_U@w(z5o4ohw$~b!yw5XYU+Xea{b3HUTu^^I)!fumtv1q!J^v zHdI<0Dy>a#TASXqHdGpt$RTzu>9zUg^4vQ6Jc(H7xN>U5*`P|-rz43m%6m&vASMI`XOI;WDZoQpN}=bLK?|Rd${?Ma7r}Xib4m4%jQFS3tiUWi-|!4jB=Lgfp%eA#O13 z*J*&o=+Z1!Oa68EB4UA28+h7!#r20;Y91Uf5bVx>zo$n^w3IKuwkK|7^WR&{wLK2M zNC#WjHTfz=9{c;F51)sG`|Fnnj~v=n*|aQVmcqoV*Z%O%6~lKwwSCVscLDbONMj$x z0ru5l9oUZ$%08h=tY-l<^$_KH#Hf0RdOd`>9^iE{0qZh}RuN(gTB3%rsL@o^Xew$L zi}2wCPn7_YwiCN@crNE?2JD&+4mz~pf^xmtBG4su76|mQsvPV(t$>a}*z<&et@HPhmX{1Ee^?U$7`dpXta!q${UE#mt)T~wy{2}wH9YsLBCA0*q$7<> zM|Li2+qSa7YDzY(sLB66;w3-Tzr(@M`Tk(zDiCM7FU^wUfalBcxT;UuE}fe)3wVE< z24|ZFbDI`r+B8_(W{8ShGM$sGcz-$vG+87aN%L+Q_I1M`?-m633v_Wq)487_YG9;4 z?naXC+2c;_)vCUpFhlXqSDnIRSUpIgqB%|s)Z_Izj>r2&xG&8BTlHeSXm5h4W}91= z7`B5Mpm|teu2~W0>dEAoFe^Y8Gq2_?>Id_#x4sJQ7FI-%WJOOZ=&T8N+LH=8uMVEL z=#WA0q;~3g*Gw&tjF6=V+sY1G11cWmfI(T{^Zbk{;S|?e?Ke zYNR}S7XQfYSjB~Fwji$~IX!d`E6ds*FzrgWxxcsEp`X5i(23FcV{JBTs^ zIR|rK5#l}trS?a5`B1Kjm!6%5^NhB@qJUTP>S>TFyp7;}E-9Rb7$2j(7E}rF7W{a6 zz6>y*Cly>0lB&N%RgNQtOaipDDhPQ9)j&}8f8i@dmTzH2k0hxzbTH1xL44l_)OHe^ zR(>s{S!-$3pI7kK#kmQ!uk+x{4Ojo* zK!;izk=Tmi8jk@r-1CJW<3MXD6_q!pj8oP8lAOVIvX3A z;qxawCMy3n@jhvCB?7?;kD1IrPUB)U;EzZQfroM2O6sj;K2Ol9C-DSShjF1`AOpY__iiH952o5XaA(=^4zpJTX%3c#DGVd_kO4oGGoCS@u z!r8RC0$LOM9a_LCdrc80n9Ms0IQk@$@T8Ix=27Q#;a`4hh~M`Hi4A)G^)`>u9JnvE|Fw z;tz)#;`aPA?#iZoU$VVQ&VR$6Y=QIVnoDnyw^P;F66|V-!Z8_(%@8d)18={O^0s;j zZ)gU$fH1z5crQ>>T|?AV%-~CoGCN?gW^5My0;1~-T#`BAwO>%YBI;mJ#H}^NdrA!h zk3Xh>4vSV}7SuF9Y&C|wopseNAFEf6-O|;4@VJs1?QL}FLHf;b(blnC*L8yxsgb?Q zn@5}DMw-=;U-SlDL66k^*%xlR_2-}K6+A&#IB0Q+tUuzb-}d0xmIo&?zOauLJy5wq z7F+}@fU7Vq>KeI3WZ^;!Va@6F@_EyQ`@C*jA@x(2x_Og)UNKGR+~*bDwuPx)`0{Hd zNOEHIrX} zwt@F&0{2&etu3sTfE%;w15!Z3^sc|dRgd?K6S4Q`d0yhF7uB^6#7ZgZ`n!UbH{Mf> z6Ev8GbO|Xx#;S&iVmNy4bvOpX!f*@){$}!<@NEnS*JwfeA^clF_fQ=GZAKjrBe*RV zr<;Ft!SfOeeyTSv(x)#yoD_c~Nvw7XlNG?F*MG z3d1ssy7LM+CO}75bLGX0`URHu3*UAF(oHv`v-$VlW zDOPYw78khp)vP*Lmi%eZ8-(|+TzZpylS*SbY!`|+KA%~JPfBBP^9->xXIK(`$EgE9 z)du*eSi@E;m=8y*cW4%Zwozvc@Q`rXAjdhw?46a%|lDOrC zyOyuK_l296?O9WAHIR2nZh`fdd7G|!;EF8|Zd+ov;XYtKXi~=O&yQQZjKw8!{dfKN zrd$5Uy(?{gzvS0!3nX9^OJVDfZNof#oZJ!N;FlI*j`GO}bP)Ovgb-A+N~;z=UOvf3 zR6p{;u<>zf^yDvD8?bJ`%H(~h$`^z+`k?F#SXumcj5Q!TgErPtMB~T#hC-YCYmw*U z_?HW9ZG?Cq7@-Y|Xw`2pGJ`OQ#sX{$2Y~+i&@y{p;a>Fc4#2$>?|}Rpt+r_R4d)`b z(t6#eh$!|1a7#21711}AUwRAwD!6_etDV2T*A$+Vp>NjhQ0vUd9?`Ea2(-UzRb@>AyV8l+^Y!Aq; zkfaAmoId|8d@s$?q|5+cLBb^*aqchd2Mm>8g8vk}OTu?)&LndA>+*WmWHe|#ocI+$ zK`z)zg0p}zPJ$_JI!bLTtO-C{87C?D?lFbhHoqJ|UuHFc_?2MJ`>m1r`-XW`X9RdC z^5=!yx_B!ix>Y;qCV{VAid}`)?<#DSF+hn+Ieo~OGE`QEY7Ou&5eqGusI+sWH#*_n zE@*}*){@hfY%^R&2lG=zHI42%O)^rz;S5+9B7Y|pj@vvI zJ(+)qU@QTr({Ev-vf{4@%6dFO;c4C(OnA0BBlELn@5Mtr56DWB`!9aB`0n$e04&ST z)e=APmAkk=sNkR80v2h+`jLMMTHx0nc&O?b{AYlQp1Mh{+ySEsxvi9nnrF&!rJpZg zAq@q9S~<6>>$ZzVE7|RiOcgnAO!BRe>pFi_$e)X^jNv$eGor^Xd*NvJi*BpP=96oH z$Or#ui^m~*MY=r{@COO5@8SMXZ(na{?&;F8vSwMxM+Uz(6koGuB#ytss%uy*)H+)) zy-9UYX;d}uf#ZG#vl5NKsrbNeSnD<)Gj9#g;AYK#6a^0|aGR$T^Hx~kHWwYqM=Ef4 zwB7QeV=NJ_0%)K7G+o#+k`ZP zJ|ITnx6okjYWU5GVnEL)A=vhEwLnSet*BsT>17!b;{Mqdh~LIC0VoJs|zi;vCU-4|9jDj%fai- z{h{?XpRKC7HSCCVeCB6|5Bv;R^3%^BT6Xi*>mpTcHoA=HTYKahVAapBTK@&5seRY| znpGbGR?PqMHBNByXPf~oXvC)xz7tT4=D-|-ZvuT)vW&Z<( z2TQV1d`$sI2D0xe+Od}}@5U}u-YsTh4gg6>Mn*ou+R8lcpv^#zm?PC~wc83T0CLf_ z=~Gu$dzbXrxGT$o!W9PQ&$jBm+ykF$S&?>13~)xI-o*X2yd&kxuPn0SXF+eYYg-%4 z)dVhBos0ipmy>uU+>~_YpKzw&_oy$w^d@l;@S_hqq~g*}Sf-=3(OQ!ieh(350u)PXwfDiI9L_H=+rQk~07e+Bb(lQuV;mEb41ivdNb73hFMw{@Mb& zu`s<)s|C~3$q&;D|DaY%Tx{I>x#7&l{v`sVBM6|Kxyr81O_lxi{$$s>v31?$wd?oy zln*VdF`>`(j6Tskl!@gk7+lr(Ai=(<)NYFERMT*MDKh(#+%ow-CDu z5@PO=&66FWP{;UMVrPl63j$t7#uc`3grx(!#io$X^Pka z0w}5tsf8g_6*^h`d{NQxGNm$9G`yBFIHLYgxy)1fFVU1a67Ywu42h=+pa}HVkk=bD zQjAaCGHl|>=j?7H+ERS+Yvg)^Q7ahYSpB88W zcgV)FaF`Fx5cZrX^YxSElht8Z=9^DeV3n^hB4-NLxnx0YTe22S94+p5x$+=+new2N zk+kxl)`Fv_UZaezxG$8HIr3HV48_ZFe<0}sQ2ZYTw?qTJpu~{>N&J~)EkUm*WMRnn z@ZXcH1W_>&w1$en6YtOIc=IBJ3>P0KhdCpB*QmevUA0Y_+zxvO2 zRB2z@p;T1OZ!(WD^SjJT;BwR#QNqbv&7>va_ebpp-5)vbj}&JOyZmtrX~HY=zvp$9 zc-R}V8Farg^Jgf71k`fWOmq3)x42}Zjs!vp|D?l_znz73L!&I?zr}xss);~c=O1;s z@C~q{NSj>Nd=_8>tEShXs%f|Cr5%KH%*eao_m)Lu48|@!@1K-u%OqWx2C28RzkuV1 zGobiUaR!v+FCQQTngmY06mmGhFkMvjN+@wjtG=^XTY?_59$&DjQO(DI)B?8wZguWD*-x>8 z5&vC2Co3ict>jOUrg#_4?|`}8-=1nGIlZHn=kbABxPDX%myT)$xaLtiga18e!eTKqhVvL) zVZs`;Rni7+Z%%`@TGUWjCux`=*ql{#Jcrc^wM65ywK!Ib*Vb0GmCxW{>c14iLm|rh zMpbX~%lrU^r3!nI(FW47-5bUV8%a(lH;iSqeaUIyH#Us9b4JdA*E*h4;Q2!6gmwiB z;dTm~v&#F1Qq{{hzpTLX%Bhka%4k37n3{*saGESxQX=>=OEi(Ey+fg(z^D=14$DYn ztKf0F{HDhH2D`5ttZcdd+dKB#YgS~NH!rW@Szb>u?)H(bwVOY)I{J;fJ0{xwSFCE= z*(~!sP4oP^)~>Ryt!@3gdds?MS1oaaXw3>vv(xJed!>ps2UeeURJJC%hTA)UjUECv z`h{*cR*p4eC(vpG@TP`fowhbtr>)i1X{f`$5$bg__|fThw%&=qe#5ClU1GE$aV@J>D_PyrLQ*&P$HdaG zu;TU~+|YU3M6;#5J)M7Oc(iH!W~eUK0gMCW3)m9uCrJBv&?>#ULGx8@W9GRjxLxlu z+wb38xb6PZ8|rp^g3B3GCfwwF-JdfUd;F0ZoS2e&$v@S=6==P&2QCfJNA&|xeI{qo zui|Fu)7l219Pf9&uBa!F;2BOTQg03UCk38l8lFTC&vH!B52!K}tr%SX0S;Ofo%DhQ zg8-ppoK1aeQ=1>0T++7tp(~Pu9ZO_AO<0WPSksz@y|)B&V@=tS)+7&i^8JhG6phX@ zuO)Z$@f$w%%uS7gDyWo_Fw4qgSn zd%U`S5cAC7=a0u?8?C4C^B4;hJC}29OwR7?Zf#iEKvehV`iX}AhW_r>bLokmZZO20 zVch`cAw&J9eka6@>_r5Hfp4}oqwazj8`JV|SOQDGa^}ogQC(9D>`cxoJG;#d{ssc; z7y1dFL}#C9J*R;4uR|B$mEsjhCju2*1MwJMB-xr#_d@{hF{WnoYQ!lmTp)az`8T3b zP2^LThu^lqNOngws+o(lQ8#wq)3@&CzL4Gm@+ZHQaa4ELwA|duqOy;~a$$4x=u&s2 z64}aFmL1G$wp!C#j%)=j+t@dXESpn4x58E-Ijl8XAKsDZtP2^*y1wPjTR*#b?q#@M z3?J4Z%mW)bB5T*o-C1~t`ricMuj@&)4pf^&ml*T=B7W7zVPs>Q;I#10G7nWwjhVItH$^=o}m}?{wjfuv(s&m2N9^m5W>E#1rzpfvp zxura4)pGx{+OsUe+v!(@5=xYzGjDLsDjT}Z34emX8ihu{3v~A3s&fiB|8jVN6N+{P zlpHT)L91FbxLif`|HJh2MOYxxaG+$S8Eeq(4$CGwe`t~YL#cWD-*D^4us^Q};Ntj_1SOmk)>_}OcXu7l3hzPEK~_L|>D&QK;@~i@`QVA~uVqWC{yKGhgmqPVDz>M@l zen1~lG%L_>noVIX#a==KN-O+odatNGJ~DTRdb_pi%G-vAKRJ|q4QdZjc&)z65%Jhq z9ZQpp$rDSvyT^0By=DugH!^#jmFVONs>YzTF%32EEQCTYMp-kz>mGuG@mYABadUk?Cz3t#^CUUFAy$ z(!2*I$F8mUuSfKF)vI;E-8lop^SX#n|=StNQ| z7d`!_s-B#o&{2jxrxdx@1p7`Ys52~sAknj+reCTo!AzeoG8BCTvTQy12qSu|@G0r; zhprsIYc!tTa^J?4x91qEA1WXHW6M6-(F$}9&^>Lz=3H0IS&%t<2Sx^NKf2}mQ-``c zml0fHXZTzvP&ixm=Q?hi1d3-_4M2Acp!*O|{7I}9djp|co~moD+gV3SP$NnK_=ubo ztbltMDEG}SXeGG$XAo7X`P-x_h|u%D$N(X#;(6s0=p511 z%RqOURJ&Wro?hZdY`+x+l;t^q>HkhWaOYI`g)wAv18BNlvUY z%qrl>>gcSZ9g9L)y7dEVT({p+`GuW_2^&A)fR0uGnO?4Mz8$LafFq(PE4ZT1a z&2)rTXrjq3>?^)?8Kf68XVdCp9o%GaQa#&dbUK#;&)4glRlpfL z8fKNkId#`B%ytl@|0wC9>5qrBL(>5tk|fa$z(K$ysWZ-6pyLo3N>A%E)isWmRaN4X zYAASev0=(b50AN)W)cn)i8Err1wTvl_AHNWxnnf`Jv&UQn%g>K%l3D)tX+D!q>6fu zB!TgqwL|qEYRye!hu0)1hG7h>fj9iK3=G+uy+OARvtW;6Zy|dhIkNLnXy@tu8#ne| zF%DZhrJb!w4vGVPZ=iQ4_}P!m5T2ZW$>D?hAMJT$_O7m7M|akf#glzISy}_j5#DjNXRyDW?n+V>67sCW*`gV z{YzDMOKRB$#pw0T$|( z#qFY3Xpo;EK=WhS zcOceaxeoNCccPx(ACuk zi$MNx{zk8N6dHkZhtqxF<-%lTbiWxct;$Q*1LM2OvHF%u-)Mc3-%v~Q4ly=t+I$!m zUg^TAP{eq1yhw|!UTvDM zy$tl9W}^etavPIed3b!sT@wMN2``Y^Gu6)FlVd<2VDm0=3DSKZ$}FFj+1r`n>sUYn z_E@d@u=W^w2Y;k50GToq+i(@>*-gZA)Dc2W_<$YQHsgoVWbN%t&G3`;p^86M?=>*J ze6;33%K!2ewl2xY>-nF*_44a4LW$sJ_@T+V0FLiX)dfRIIsZsjy2emr;NfMrXvmX@F8V9kPFN zN8t!jw>#ocsT`3%<*r6gJbNltY7J{Q`rQhp0@!Dl9aEHf3sLkg;vCY`&70F7g>*ch znokM9tvY%^odRD4A2hXh?M$P2Z*)W9qr8D(MvvF`R(A4p^=dioFVsi*u9`&b#=Hj% z>lKOQ(kk4GNH4L;Erq~PB{0_B<8=X!R-LO0aJf~pra{0`=XJAmJ$2Eo znMB)jeECsPhi)YvLprow)S(W80e-*=y|T!W_N=OfAr>=OJHR_;kQperi`;03-0jbpejyfP^{_)Q{k8jCK}M>F0LZ6Q=pk#$!j&lGa6#XUu1 zp8RxOfTNx`gzO_8w;v7+=1MrUqoXyl5zQvl)%FZ!)?t&eZHe^qbEo;?+I zAU^68MhD;{QavaNTpWu6tpK86umzl+wAb)Hv|F4YS4+f8?YaEt8ZQZo!DN7fB3H

bl%)!k#)4AAu|jUuTlv>p#AIK8T>!%@ zY^@7x2eLOul2)5m7MbJxMkA^vKa}rG&pOYqY_|Da7iL)A8M^8Q_E(R+3S8}JzMDy}rSpAw#Vc-OAM zLqpJq=95V-f>yx>g~3Ok13bQu&_GX``xrL}zIZicZ-khw@%L$%Pgg%)_SJ8z9O7@S z&y|O=e0`SRSnH`pcw-|hYE0L~q8J)Ay=cJdm3}mCEQpCi-{z|eaPYZX>jE5pmahvd zE`YIK&uujYyl9Y)&6-~NyQRIe+Af#&BOs@Gkm^KSq_pjIFIGZ$LBqi_l4IBKp2R9m)mW_>;lqT4gOv)Q4;XkFrtU7e7So^omE zHbBT*D!cfb>a}ubXsBN1duuHeZ7N@btawoZ5Z5Sr$^P5Qw*UgeQEE5U1vpN5s4ldO zWM8Re?}y+YT(6jak(7tV%47YI$^#e{ZwYE0|B2esW)TsuRQy)<4U}Ve6#2uYakvbj z-5&}O-TU`z%X3`>w6q+noq`^Zb=PEx@8SmrxOClJ87H)Lj+di^VII4~#Uh?90Us|z zHkzac%(J`D08XJXf;(Lo;24~?E}&8DA_MU-$(7ZOZCKNqw2_!|%@i|va7~h?a(Fya z_G%S0Emty13(VY_N?3K`Hz|J!!6;0IU-j}_#BEi8gJde!o5*sNeI*Z_StpHy z$={mwc|}5`dPU3}tLLT6u_wstRr1Khu_;QAxg!4+(6Y}F|Ag$*@UU|V=DwbZ?cAvo z^e{qoPCGw#FI2G(-C>;*e8oON@SggSb%)UWjD1l^y<1g+g4b%FU&u%J)zf&AA=Ln$9vb8A&61X1LE6< z=zk!Fi78?i$sr4EHKi-CPN1UHf**d?Ox7x0#Y^wK0~4!7u|fd6zIFlrsi>F+zm{|c z6Iq*f$qb!zmqG#a*(}P!ey5}ZzofeqE|;P!ErXfcQa4Q9&cRE}`B`wP?e)`u#e20JItNr6Qz3QJ9Tbqj*AoUfThD>%{fhk)XkU|I=Vgw(u zSCZg=Qu={hFEp-p{3n9eYTfdN00$qUe=We_`!~KS^pE>*dR6dGOO4P)T&2%67>eAsSZced*oIjV%M||P#op;v~6(7kOjmyjX=Q}G|+y0w! zeAc#Q*UYc|3A^gP+b8x9yXfN~UwC_V>Z4my9?q2UhBB0z3T)UhIJ~$m7b)%N4h;8q zIUKdrhTYM~{Km;Ux29Ff!17x=Z}sFzV$j2Ay ze(bAJYW6o7%r=}c zM6vVhmywM?)kC_LGt=%dY^o%r4Dp&GlE~pFcBeEZtIoh$ zv@nlnc9@+T_Sg0&3SqimeunITqsFvSGWhKQ;_v{-oB}Q{_E`0)c zm4Cx55qXR;c)r+8b~K@t#gc{=%w-XS8O*mfFNU{8K(`3v78RRl;NR33+1A^E(mNN^ zG96O_?You#dFX+>MvtmX4t}#3(niWzda^-W? z8?T;DX)QmNG9(mn|9gKVXP7KuB?83PFludRI&?Hdb7BSk3fv_#dVOsbJ`=`cdX8Em z+=O*KI8~ACRzS)NY9B=PhZ`E7cd%7N!&mA_vCc-Ual#Pi4tUTr0zL|m?)hYAyQX|I zbPu7W049PQUQHWwYH%P4zZ=w{?~^cM$j>$~5u-Y2Dk_SN%+UB8Y5MF+r#n303#sHIWgbM;3B2;K=6rRr~ zrGc(x7uK|`^)-pn!V{+Dcls~75B$}E+`@Furl2Spqn4`@oA!+rcW&?oH zr5W9l+?aJ0HjKq27x3MG0DSC74i0uuWAG<@hmD5OVl?P2dZP;%4I45VH%EHI#=%=2 zoTR!W+Q0j+1G`4U#ze7`Ixcn zWW9RHA2UmoHtajOHK|nVj3ym~w`VMAzDoa(wF*R#h;B1LgTlBo@~jp<>ILwm(G`y; zN+`q~LVsHMAlLC2*SPho-?2DD+y!QcZsKX=n`w)^WFklAN+xnLcj>*C5W8|>Ibja+ zQ;S|vIbrTODn)?KS~uKIWMi9H=j9J(Xq)$n?}~W|_*t7tFXFSy1h*VO`*K7Prd$jG z$}P^4<+Xb-VBF1?y&8Vg@DsdV4q^{`^B74RVQM2dZR9S%6|9S!A)4Xl;k8>~;9=Kn zneiO?4scjlQhsh?JdCQmN5+TC>HccE=4g?YRw(0wh?EKAm6ztI)%u~Q>Vu}9O0_OE*Moc z5%Jj-9CDq-j-f_qIR~dGuE~#;8wtzrF3~ ze>>7!IQETW;I;4XT*C*aK`0Y&6%S7H2gm&6|2gu_Pfcw6`1#ww>o|D5t9sAPh3;MV zOpf2Pr_jA{52W4`%ctod0_wp|$+J?YWT00q%9>h~HML|1)gpb=C=9XUCP~yTi4c8p zzhv7k$tpX&ZkOcMn&PoCIx_r4)Bi0zjD>z28p zeRrYw`80~3A$2ce%MQrv(Q0!4#R}B<#uPAiA(1w(Jb{8*yhaX~!^WXE1e1 zZH-hl8g-a5zGe$LaVJ|WMrV=(FMXDt&Aq=n>nvh zrBW#^nOfhwPp!3w_x6s(475_MR_P%g*UWo$>P0}4D#>EMF*{M5n4b8=#FGJG8aEqv8yAf<2Iu}MM9?1R-oOT-0n06da?e93a>C60F zFsk1a2qSCs8)-c)0(1Wqp>mHi+#8aqXh3qq-j)x7%1yo4wQ$c==k`sVoLUA^x0A&i z`a8z*u2^aFwwY2aF?(`0%nu~23Ytds5W(JRwxg7=#!Iu?wv=L|ev<&C#OAPseP-a~ zyZkOws5cVrj{Abi;Twi}_Eb|^laU76?;{IUA&{hI-lR3;BB_1Ju zk5RSy$)6GXiJgGvBg7*4+WBz8eDZ!6tPL0(hQlNKN6ZF;d1SwA@@``CB=4)ADEIF? zSe|%ucJu7+*~M8pGn<*+-t|KC;Q018%9HmS>JI)P81<;|tkv57$HE2I0!p^MeA$Fo zT1~jbpZx`dzrx@e-6#1IzPf-fG&=z}Gt18UX8~u>wGMW@Ab_itx4$8P>p2W{!NIE@ zLcx?uoby)p0Va*u=WA`sZrklgt>%~Ys{C!~8C_@jpE*p)6k1;#Wox`A4JL>xey1a| zF%ci_bcQ@i7{{rCJ>#uRzPgB{Hy;{yBu$*HbLYKVXYbnD@k@wNE+vS>1b~qLX|c0RiC=*)^1#AXrzBJKQ^IxPEJVt{xE+0O)F z45dYzkQQx4TC^2uaS}?4b~TNXe>PpZ9^yT^x{c2@Q%3bG&~SE%oq$&^qt<*dbX@?) z+DbNEU9j13}IS77(9N%5@1HR`aE^(Qj6^1hPiab5r~Hg#~$6 zbgDkb_rdI?nx9P7+&ov8S45`==Fwn!3sB~=m_559b~vE4nL!+4ZmKTK@pu%SfQOND zbwOT{JRq1ZgCjwkwldKrPRHxyuCLxkD-oF<2=Rxj!BNw`T!T+w+tGO5B%tXF{TBeSUV7OqMnj_AOl zJ;}N-SnA;;7UU$M9Q5;@JjKuHll30n1Qj(hxwA#Vp)6mX z$LZk9Fm>PP5f@44z{3zc%TpSf@0rs>LIN_HL^3LNu26p{(k$#kZI}P$*7Tvaqi7SB z*VxWZ$iA;M;!J$eyFAiDaEewKyz%xVC;whrutS~VCjGB^D~-(lO`ZU&SBq3{VTRFS z)ai8Vv4bR~%@vm4Yj1B9yMev=Eb$`Z)Xm37A*UAZj=|*H#uFP^CuCVPdQWs_V^5Uh zqCFdD7~;Uf$s;F^98lLEtlZ5X=SO4Ch57-A1R^^(Hwnv#pBb8L^u1#K<#ARBa1L4| zJioW3^d(L@(ZCO?>q2Ab0}FNG06$kjtel%1g5zHh#0y~Dha^tv7e#pK>*<_ruR7PQ zP%Z3lWd9YrWsFL$u%bM?&(P?-H3e{HKh?rbg)tCoBOOXMa821DUxpO{k&&S8+TFYB zDD8FXa;xwlvx?IGqD7Q%K^E~mwulTRSw#5=(ENjV9r>JZcp4ePgZYE{gY)wT^|WgW zR$*<-!FA4a5!V(t@7VTD^0i?;$0w8i{?2}?f0}UBBfJcn0ITRKiUy#F=Xz*4kqjZ> zK|7Z2LqmCm=2%2O+Yih^e`K17xavZLx5|(euu7g_(*`uEdfq?xufrgd`&(IT^sQrY zn=GbAJvLOq1+l)fEn?F&Ds*O@DxIq_aea&4TIK`zcpBn7(BEpT82XCcq$UQVvKWlY zVsI~(7+Oq-v1k#O{KEwt-}*#P&L?_u;Po985yM}eMw^+7A+k@5`+TU|%$%;qG>oGf zzCEIycQQ#Do$@7Ojqbn8PmQ^7_a?f*^h8jZ|tqfRulnrJT@`Xr!Y ziqwgy;qSduvcUy*AKJ+!;v>EI7ca?))Ub-LmgcfMdNnn64}0%-r9Qazn30Sc$rww< z88R3LA8ZJc;UF1+zZL^zI6(T*r+zZ*Cu0Wk_5c}x@B}KOl@Itqyc~c#i-eUhYTYWWJniuA@EdfD=eG6wFrX!C^3D%rA({u8A( z2V6FeX#7jkWy+OUwYpABXb|% z%O*01`BHiBJEaYP2>q}WDbY{%!PiKJj0VW4pN#s*sF#d-$e5dq%g6*x4h)im1LRU_-%3uhVi5p*EvbR{vwkoFYY;z%uO%&94uT&WMylvINcJK-%^qbx!OGZ@iR0O> zYNR^wxfGd#e@nrY4c5%@x2En$QJcW0wi@yue+X$lf4Nw^0%(po)7;7kZ)Sx5A(oUp zBUJ`BG5p~mVq{HFhkmbE?j`N70YMIX*t{}R%0kxlBC~}8-sp#*eG9qhBY!;i=HgVOlo?k)vjk~r9t;@hI+-UVc?tyEyWIk zpngPOB$620{F?*|o|J>Tibc^*kXdj{w`xp9sUpTpB=xj|H^fj{W}*QyI)4RNOu*c8 z5eGcv837!_#BdsB;(a2FdMWBha;0AR?kiRolz8$p%b#G)a5rU2rZH+2@U!I;@&B?y|n1Xc?6fs79mntzXB25ese~ElHg>~Q( zdBb@ER$5;oca%Ja$Qi%iWj*E3ke!)QhRS5rt~2qazDL!^>0{#hKiaa~h_LEgOYb87 z$Qc2goUxt~hzy(gU53`u;QYAjj1XT^_dP10F~mH8i2m8!`zE+9@@I2#trhr_D!IG? zXJmdLHPP#fPY9!1b-u1>WGL-b>P-5fY5-3_u)lPj zkp!12c4RiCxsa>FtutATA-CD$wR?Iev!B#*el8XZ#{jJaK&vmw%|w*wBX%NMsePT# zk=tQPkobF!hvxk7K&d3gAd7-Befw zPqW&0!Sb#Uv!GU-Y@)sd@xk2rOxo@WvYeigvrdb}$ujvJU8NiQouAeD^5IB18?Pin zc^^x^U0&LpRC6KwkX8pv6KOXC(aXV)y?x8?6%Ou|6(%48hL}hNTq!gpg9i zzbGh8f?IqUWXv@O59J3WYF=eM@ir|fdpZaui;=r&a8IRgDavGW^g*sjgRB99B z$&si=p#Twfx6Y(f+XIh%X8PDfSgtV|)q0a&V=<{^0o(39yKnM@R7Tj-$%Ci5Q*H!y ztOsKUpHn${o+IZ#D3c~1DzQf2VTX#2KgBKOzo2cA^#VpQm$(TJ;^I#U+>$o`1wq>? z&|+dszQPf`D%2Pd;yWDyBWI9j_Y7^kso?aF>?-DF;|c@fsQkfrB_0ORrPiB|hN~Is z-!KP9vbpT^M~2GBrjyYqnPHSN8qA5z@|H}--#r=%mwN)q9+-!%fM-4e^gTjkh`Uft zQU*jt51e-yjjre=a(l@}xXh>ZdR69>9|C#V6OZ_pRA=nRrCqs}#4RbLwJS&ipWb|0 z0O!auK+R>Oiw5WU6ORb~CA;d3U_aj2rVHwK>)uuk)GyAuLI|3eU(^w&dHIwnu`w4d z<^yWAQXfp_`uu0k#KsSel|dl%fNaxPs5@+?WQ5ZZ+t9&j3|h0(?a*sg%p+&YOH&>3 z@d!bDaQ{N+>mAi>v;s&%1Z88yh?h-jyGIm6R%Evm71Rv)f?>Oo{ray|- zFY35) zy%So!PIHG`@3L3{v!^t`K26zj<9S=os$wYSpYRs}0C%2y}`AI(n z%vEZ5|LsIEIbUDGG6I70`YZGGf(ZHg^3Nc$Cx-fb9U+F7as}CG0bIa3b6Oy*EVzVL z%bya^RbE3^=`EcS;B|mV`AHq!av?hkBt!%`aH%Bw{Cu*|z~Z@j4SnyL5_<5hSX)9b zmy7SH{W{fzNgo)#p~u&6QtP7rM>Dl^%<^3>Ib;9p&OW0jX6fu^J>lhpUN0qc#ki2)VE3c_+olVMRh7Qs-*b*wn{? zGhs1<38$|=#3vX$`^cqPD05@yz)YT_{;eUc{d^^xUOpqeZ=HWS;^E;f1z@T606nFc zKcPzO5|&<${0VqC0z4uF7jd3F9?nj`V)8@=ZjD-%xmtwuch+y z-x-6`D&4WwX<&50&P*Vf=?paQnX=23a*6^!E_Qgmi4LEapx*m2`8KHq_ZS4$bV7{r z$)B7{XbCut*g)j(yb$#)TApBThxQZQ=Vg4O(bOgA84J;pXOXcyAuvd;#oKKrk9%=$ zvA6w=lY}YF_i`By+@D4%R~f9vNLXQTnc>4|gV}5#l*tCD7doeUy>f*@Yi8XxnxWGL zs(a1TU}@R|q^Xn!E(}ID?g#i|)flNv=e(!n>TI z`s&srT52^Lt_|HrocfxznhG<=QF|07H^;e+D&_~PW$L_4ZE~V=8kPPl-_s~tge$%L z&us)N_^Mnjr{NPTEvBcI-!vKU6BEA&Pux=BzWuzKJ&c}8s&GGypP;Q2{eJa*HSeQ& zZBbL{-++0yOdLU)+8Y9s?YYd55ekZ%D1({TqSl`sS@NGPEDiN0@{5UMwifD&b&lEB z3*bK_ux$kB?Z{HWe^yv|!m3Kft7!4+_fVp686DT??fngSFeFXgJ^s{{?|V^^jx>yA1!;SIC>q9mRK)Jlna zWQC1QyQU}H`ln(`VVi$ZA`q@DgjNlaMjz8ZCA1vTQb%|dYBZl7TC|gpp=R)?z5Ss| zTq1%a6maBscMZwte5BKB9tk}RuwSG-lv+iF>nJ$cNy5Wx&f+sU43Q zmO`J$(L&4KtOmzpz-eYepO?aeYnSWwQeRF?-^NcbR)ewHiK+1;)yQWIkqw!S4RH&6 zo!UmfJ-RrXj@1rVMvu;<5)+53@k)=^-Cap_l)F6(@NNb29qLQqZef&jGhW5-R73Jx zQIumLtT4n;cV>Mu#%gO1pJ;VyU4s{EW1Zr{24|`x zZj%I%(Q|Cj*|le=8SC)rcsU;Kg0W6zX*#Jgdo0WEGH@3wIq;+EP(1;Qa-G@fLnzis zc7Hpafw2yV3c&N5W=&trBh!Z#JD##Hb$>yBoH>C5gCZ(4 zYy<|Jbez58DZ##^@BV^-j>WM-ab;nl)C|6==#8@~RtE>z&;S*j>nZK%cLqjw4?EJS zj9YHti~$D@4+44(hU zQ++wM6f{`9i`HY!dbPJMW5jk{Onq5s{{1?t)UdkYt+C1PgW!%)YQXem0NxbWYFX?Z z(U4gm;1nPl`GL`{S1@u$V~=~5zrIr6D!#Z)DO1YzaIc?@)OX1`c#0y%^8F@JAh$l_ zPx(_?$0c%G$wO#6PQ8}-yC{hm>cl7uNGd``H zW)w2DC*B)L4|fb#iwS>WChzTvIW-LU3nS;kSzjK69^=Ih`i|trv|VE`Xl+)VStkS_+g&YBGx z-_h9P4$b4{qsb=}*hXDJkO-IG`tA==jBoWF{r{wW34B|{x$l{?NjgW;IY;}x+OlL> z@+#Z1ElZZSc#F31CToj4%`N(hipwia%n(9(q#+O*Kpki}pO$F>0<(td8H|M9~4dX=(7%nHSnSN<$!(yN2)N#9xBioenF??pvmD zL*RSoqSh^ULu4_MU}Y}1TVL@p%E<}x8G_MyY&Nf+Audsrg16}mHVsXj#ECm`Id7$O5N9blRYomG(F^3P9Q;Ko zDCHC`zRiSI3%(+g(*%5v68t6N-{5;RV>gKFWL)lYLusil1Yah>x^`+>Eg%| zDadry^QAEzVfD{j7jkbVZR?uf!c5$@EVqXl+Rsev!}a-A@9(<)rHiip{;N)jqmU$J z1o>4$u60|iZoLBE2k@PQLWiz&4DlFA$OMbdXcJ`k5gflB$au3}Z&uU9F&sYtp zET>-8Ogojz%;#4cP0h3UMpG)mS0nFH70yg*=E7oAC#$h$OGGi->-8`c;z?z>pQ- zMKWn_qN+w)@6;ye(G@=a4w74F35?43xu4r z>Mc$Vrys?Cr%=jB91iCfUxaKU*KJ3oobGpkaW@>37l&3|iZ zh?SX_RXetAB!b08I|s;~*JMELu$Z9ULOzJ!fzyKBWO8tT!Z{U7%2mu6+`#FSBra33 znL{}El#GExiq-O%D7k(N{z|FgxbDl&IDnsBaOmI%^Q6IlBv^%8_?&r;^FBfyfP(5d z_GYn7;Bl;Iv?_bd_*_pdJNSci7 z%CA|qh;S!(f#KeMPuvDn&goP#gWkk&9M6Joi&YJ;HAb`6rpqi+DjmX*7;#ZtMa^PL z%%9uWd;cP>AW;0}UtE?{t3?Ho5aTIZesOul;x8$-pg*@rIdhu4oLy3xIfKe3U7(9P zv9|2PV%a;^mA&H{Wp|ZCYm0-?+F<59Rag@Y)YQVVzmMU7S$+q+Ms0&oH-Ht29+*_2 zwTDMx*Iq|qNiR@^3Go^k7GD3+Y5+(0@=HqBLUiyf)ME8|n?@!8x!!BDdi8R(!t8fC z115#S6mUBIW(7EuTa`dOqtqxV8nVj|qVAxTWvxNCJ7i{Aa|pHUmE|koG&v#GuvQxF z27CwR!t{9b0?QY}`p#movi$dRn#2WkGhs!0Y+BI$HA;BZwJQA?861fQD^CM~))v^T z9<5BFFcjErKBGcm^x5nM1_g+q-AD-dg)b|4hN4(b`Ju~RWMWxUk=^b$GmO~}X}UP` z3$TFwCuYU$qO4+#HjIB6#mqRzCg3^b)|SiJDKME#;=MK6d{q`8rW5X>R9drEFfxEV z!I}yz<^m&opR+0)GXDq-A2i7ew8yM2q=~y=J6&3S7krBNjJR5>LfSJ`m&R|p$T+;_ zZi<6ccsYJqbR|MfgzhCckrd(WtLRz6`u6!0>Oq$u^`PH{dT{+U;c}Ir-Yao=Lg*M; z^1G!QusbbEr6mMg=3!XNeXwO%EFjg9PA@-qK?!;wHFsmEg2C^hW>O@GZn1LGrg$r) zysxyeA&ma^wwH!m;UD55#l#`7K-~b_+nR0fc34)nx!ZH)fPb>JDW9{lG-UdbOexN( zD}eSEi%Ub(8W-{iD<*ydpZN+-6QkHTtOLmWmSCvwKsq&wZ^l!Y=y;4yOrndPG5hH3 z0$tfRNR`g6H@h_?jgONmtwV2iX-MkNDo#$yR9Y2%lZsQo2kXSWtDMVK_&M^Gf1>>7 z2o{!aatR~>{sh^r4L(aCs3*~esx-pHz2erYaP*<_Tfrh8#$y_&v|xel)QY#jaxddD;s8XT z_hSHZoGT=bAB9iyNjuQW*nb5!Y~^8RNHz^@XRh@;64z=!jB7O-E%7}C2esR3pV#Xv z^eP1F^78UK_%JN}ZP+gdu@ro-6Pd4r|4rS4d8JX2&=bhnfk2CE2ysfA&^y@^SRA=& ziY_JAv*_9s7riQJsEBHe`Hf7&geXd+1_pne7@ruWfZA@>ShPx_dZ5W-;jR27|24-QT8V4(~RYo_mRYp{v^O>mDM5^#SHZf?xZ*8^x)r3-O z(OB#%K#gtNwvEJjn^A8QbxOxBUKl||vC8u$U6_o!2eEp}dg0KI(f@ffDY4UQW z7uNT?ndfjTbr^F>(&)VT1b=q{B9`FHFQhoq^1Rf4t9me}F1YjFsp3SZl~XTc zN`r=FH3lWXXjV>g>c<~P$IORrE3&lHK{UBf^G3mCd%uC^jN)!?ndiWTRrTOsFmun- zEA=q*FQm*g_k4=Z*F%gckoRv~@pKfIlwV*qM&+`aL)$<<{sZz&wS(bnanr8#t00B z@ZqL!ojx}_%9-XT;i7NR;ISH=osW*d<(pX)p?2a27_a9;_TM z)eqhIibYI2ks9H^$DL{uZnZBqdnK zuY%2IPJ^wFE7S}{kg&Sne0mYq@abh2c#Av*Id??j1p-8{FI|uuF0tlkC7Ob5C#l^G zYq*qRN%OO-2n)p(5!Gy@Vpn7rEg`xxfVb4_n+A`^@Fv7}kbf)aLIK+ za6qUD9`WejVpS^k_S?~VBocnTrSm-QvfCVPEBOT|eK_8rrmY?%RZc9EkX!v1?ry#B3teT#Vg-DNu*doRQ5W8(G5vK@{AbbYHRh3+9{K$JQJrGz92DLy9&sRX}9 z5=s~@XH^9r8E4XH%xamD(YuW%sNDhLj{L&W|0xEAP@qVKIC=*Di1>^=X0-a+(Szcb zMQ}k`6XG?(jF&5}{=Bt!40`-_IJz|&6Gs~v9jbs%0WzOlbFa*|zDDRQQ@-{d4|yty zVatD@oKz>a3EPc*0=pSYiM*fbSg3N`U>=ci`%vthjXi+?7e(7Pw)E`yJ=d0-@fVl( ziW*=({YI4GVoJ=>5|tu4we)a1exKsn@{|3!#nW0ej-}z8qe+>AFg6mA6(f*zudGvw^p-eA>-+sY#y zrG|svmYL?j#`c7@YS-j&VMTL5lX(LS)odT%Qc}IOyVcvce@msWZF{VKdTg{JFg&uw zZ*A@A3luOa1yt=gmAN*xYn#8I%%Q~PCbQYWF>!wL&y$=zYwUNhA%Zy$Y!JFsJC z8mP{;&z7e_TlS8Xs%LpvggI+mx3``DP$?Zu%;w#^qmwes~r`#m};Mm zq2kV_Vzb%RHs23Q1zv*|3RU zaNtm9furrfQ1Y{5jsgnm-$tiSUL6{o(n2v#1DsSA7`1`eaEotZ&w<*h2c|247wKj# zTgo+@Ct4h6EY=@z)(_WrAFr&M80swy)w@)gS3oc{(A(|vBx5z^WU8mQplNF)*xTI` z=s3|JOBYKvR68pC{+_NLzqO*g9DgC(=dv`t414=H_7L_k_Gf8@^~|xGwvd}#i^4N(@1vRcL@(-vydMJ&z%^Vfm~3u7Q(84JYmx9 zqh?ML`)+Dvj*(l6;AiCSzwCRs|L}hU3Ty;x0!qjx5Q9gAAnt7sJI@&9rO9h)sgNU#GrFoYe1tuv+m~4XkbQUmB?TWdayShX=Tak^n$kddCbdZt8Hh$!o@L?l zv*1}yQH6{1AY^tWE}{xT+WVU{XtGqacHI_o{>a(eYgW9I!5X5;&z_+PPeAD!rf!Ot z%*-9CuNjROj6U`Lqc{HGk^LQBz15)7nM^vG)q36L^2z)E`(OU$yP)jl{r=9G*w!y6 ztVLF>9L+=kl2q7(5qtlS5AONh;pUrWH#NA6+qP9EI{l;nvTNj)fx@NvK<`3l-xu3s zH!SQ47c_<};8)t>=JLpfkgl$>a=bnG!6T%*eQs-~w_?ZCurG4w*ZytpYXF=ct=xM3 zCqLcVHPhr($OMg`mg}O!hgzpDedE5}b#wnK^YV9YdF|2{8p~>Fnj#@g(^>=t3jfp7 z+_AwBi1kj_HqCD7a1MQ`sqxTgZK!9*k{U6aU`b$n$L@%!qP_*c-4!jiI4cvbiksT( z(Z&tHkK|jy^83V3DIGQpCF&882JL+Vn`0fn9Es2)RkG1vI*J9y2>cW@4-^a;hqAUr zv^J*_ZD)`@#6?Fjlyjuxmnku;WAvA)b$LyzG6%)HDDrMjW_k(9Qw*Ku6jyitEDv9` zIpm?u&0@Q=XUNE9px`BZBZW$KyZ<1krR6HwiDE!Q*l%?3G$_uzU5rzl&1gbb0WpPA zen;R4r`2+I27pX(7)>^gECz-mAm<$>qg_n_{{p97I^zfWV&Zzi#K|&?4v%VpfG{=OzM zVNHVEkueJ6Do@ACcD(=zR5f=m?TZj@H`Rfsw_;Hx)h2oQ=+hv#){i^ zcGon$*;h*Tg{l5GEk2c{4}M(rJ6*fAyRX%S5b|YL?v>`o{^~EWhf%?OO>d^ek}dsj zrq&hx4| zz>)bayyETBmGcfxqv0O*+>Mew;&7KjB^BNUW;^aIHQIR+*p73WrAIu#;>LgBv1R^b zb9>zH=lbydl{2^7V*@<;^=-A9lrgfm-&S$r(LIs*6X0PFPAp#`BV?s$kJ=#VMmoF= z`2fIg1~UW0S#Hdm28J`##2QBcF?f$*F~yCc;5~-3Dfs)x4cFaCG7Ifb**t5{j@z(bi&Q)41gvCMel7NS?EeQrjTGEL}!3r3mnzQzmTam&KI$6Phl| zoA;ziG8s)v9J!`2J3?`q(YuTWm!5qet!N}@Rso7hr2zFkr$%O0h&CsdA%-7hkrpVJ zp&0)zPT>GqACbl~4fV^rVEZ;=2PKWgfX`m2EQB8{n#PYWvMyH?j+SUbQ7ae-Iy}Zh z8CiFR9v2<;k-wrKBAyjX`XbB`7PGLaTpk)_I-7#IM8|#PdnkywChNVQ4boe<0#dTw zL+8l2h*sNZrRIdpuq)Y7uB z^>=u=oMWgB8mk8Ke}_r$GHVrYH%#`mx{DHFi>t^5IhX;eaVoyRV=C&NYpa@`y}s?c z3Tel_{!E5A0^4TKpR+CH;MU{v@Hcr}5c~m|7dt#!Qfxm=a9aPO-bQXO0PtDDQJf{H7+f~)I`NLyCCN760@sPL!R(*N-TgvB|y@h%En45 z4Q8XITq~X?YQ0rzWA@I~+Eec!h$Hm}o}6o4*ix&ML2VieVzz2vt~I&6#ZxkPr29rC zrywag$Id5bJBlokzDWIaX9WZGU=k~1hPovcdl zFlfyj?e@7mjhid0x5hm(-lB()j`qffs{bTX`EgxB1tTi#;J>jAnzUxTZ_`bc%C0;CdqLuyY&DH zp$o|onW)jIx+o>&QOXOcY*JHpZ-H$1T*x=`j;@hBHRd89ie>;WaFZ2U%}a8$#L=yl zEu~qFibk8>fWK2ZcgX22cc@?D44K_{CInt-_Y}S=Q_3kyu9UrsM5NAXv++ce0+~wX zoZ`cKy+q-M87X=UM(iP7a0I3#3abG>J@2pZS17G%y!|Yubfp1yuC9)%PJ{ZhB_oCv zqRP6IuX40FQ-@NYPr>9RDE(rpWP~!TCZe1ldEwRLu;kZ?2w5{Jk}B@3!z!_-_2#E` zMQ8dWI)*}lR0@+jJLA{&hCE$2Y<89w6=-cHryX}F)GVdbW*XcZ3g#Z0tvr9-<8!r~ z-lQvX^AOjYY(`hhY)5>&(Lqq8r4Z*`F1gmGE%0YPLlV_fcOuDsbomPLbIOHPU~Skg zk$VPWX}o1oX|X6P(|GGeOc`FTt)&X!>z>sPC(}TmlK?A?lJm1y)WQJ;6KhfG#gulK zLg`mTLf*AL-y~$^y0G-Jb>Yf#Vcvco+4uGR1D_a=`vh%i?~zmcin^Oh1v#LV3dR@R zSh?+v%|XJF+_<%D&&koEr%kmZ$-?gTxW%2ANHk40I>48QzIa22zcY3BSGNs5`K5by zH7L{?!EVu5c)6Na_Z<7i80Ro?(Vh29#wLm=_U%2P^1hwYicE$$3TelQwPa^K z@!jWW+9;&)&U2WN64IdIoMnWa6nAvUEJvrOzO{xlV)sQLSfb;#G~^Vo`r^8LGS!#x7AW8ocuPXbRMu0$CG&9e zQoeH~7ZXYAiAHm?>>NyJ4lbq4!&LLqs*a7W6ZaoAs1Tw@BNF{koz_-~TC=o(hWJVB zz^OgkAKf1d^z3bG7*Dv%cYbbW`o8h7yJ=f}`+S%G=L>tTTd+pA#CGlrdRuq3#wX*> zPoB8t4$wVx`$#Cze?xC_%Ceptrk%0n5hFAim|B^A8Nu+<-)Oz|S*RzD0wL#_9`ArgOUHRZm zPtFJWTFSKyNh{?Ff8$_e>fVtMZfWY?T(9sf8n7p7x*iG*2a(wl&zyb)(7d zPl1;PzxC+7GYt%{)!MDdJC@h-T}Mxiadv~MZsy+p&0jd&HuBi74<0+6DlO~XSyeyP zTqw@vZh(~Z{CZ5-Dq_M`5fd6EOlZ6+CN$gv9gZsi%(K`oxYv+1{s)9(!VE z;AvBAw!d?Clik!H9v!$QmBoa6e~2*Q3rDv;JQuIq^T5{Z5suhz!=dmf8Gv%FUmCFi z+>+n~hrq%QX3}cBqX9TFbvcCfI)sP0Kt$*jE^0Lin1n`YRsfad^~>wQRHgeWa8aBo zAs(d}m27EDretXv3M8OjgB>*{jmf2<-h7IT^ulN?__7Dd(rQ$F_G>s3=xJ zF^2faLVWndaEZC);K&uc3ZkC%kTE{h*H+Lo;_|!Yf=z328w$K;e^)A5vvW@tE&^;9 zM2jb38*I&?#d5qeq1rg;+nDfe+~`XXYADn9K(tsd)C-1c5iNEMmMptmRL7X%W1~ee zcSp5^76)A=%PFydwSxXyXtDbL5iFi;Jow~wP5ZajadMhas}zyJxt8RP7Ef^SNY72M zy<{}2R_qs{VpV^nZmPS25fwd3rmowx&^&VID1?e5_04mAq2pWc-&Lb`I5@S=t}n1U zeNIo~P-XS-3Q+VUhHC;Fs+}GP6gjIwFlvs3rfIWWs%~x6{Kj) zB1P~wh!h$3XpuFHMm~w~AC!faR|Se#AuXLJ|a;+N$Cs8cD zge<2hCLOp!>RchGyUfX*YX!8)AiFzOr@lZxn;DaEz<@(=IIS zQN3fezIHXG!$V&aG(OaJ>`ba5wJE|Q=TkW=V~X2nH#E=phl~0@u{pNYCxXTp($aLA zjNRK&Hg{?+dUp3$=IS(NlS(C61dUaYn;m9na(8FrMBJ%d3mPdrGIckkh3lYPcot&D zDr^Jx!|YxN%g-f@9Nsh834({>U?vWl199X4m?1khJAvKvMeSK+llo6BeKANGCQSPemGXDbAy@on+MwnjU6eCW|5Ye6ZeVO5+~ zEy7Yk-F@tdEG)fivTh<-fMDsZ-<}PX_3wm@wjGYi(~wf_Yf-5lqS9UGFg>L~sMK^- zRBF4M15MqF=MlFM5&q#o$2Ma~J&8pT-%H>9R*Ydz7N;Kevb+LXs#p2ZPE;mcc@kxk@KFTmM-g zpIZj-Q6&FN!~Rp(?fBBZI)B%`)`l^NsSiWIrs@T)I zt0g`WbDlVU>~_$->GqLu@xTo|SuEc_R^4)VE5!2q;+5Mrb+|+?&_Aoo5cqWKod(zX`OH!l_<_6>PKLQ$N6ctBRX8q%*~-Yc*k z@$OQx6OmRqXldf&QDv5=FvkPjWKeG*)jjLlxK8*&G8eZThB!?{zx24gyFyui-jx z`P!JXtbW(M1Dij8eH&D(?*2*}#P?;Ty*sKJrke{{g9AzRU2tr^wjSZvhzP$%MEL5n z+zf>9>NC{rM-jdnBK#SI@YRr?&VGD^|626GlXH6?PetnHpPWa>Z~NP)>sod;dy3kp z>(LQ6-TI^ZyPJ-`c<-%0y01HN{G|sEesR)Yx96cTcns9Uw$9FMiyaitS)c$ z)t+;UR^O&`ww)B3CRq9*Vtc3;&nl78w`nmY{(^Yk53gbK&8_kTuQ588$>`L}-{kd5 ziXa*Jd#u)I)tH=u%p{lG4CTb+tqiWR8wI1FBtI`xNbc~keq#`)2gwSo0h`O=^r;n# z?`=7YZ`FlL109}0=HAE|dY5GGjb7W_Ta{3xq#VfD8-a*@SJv1Yy}Gft<{uk-4X~1O zm3-+=wO&P2j7Ig7maUaKW2iY?xhWo0pujpfDHp0YPDM7~JP@!n9UT2Scuga;2^Nil zhL}|BclP z>r#(Q)okl1=b+eEvTSj3JXSZ{;Bd9fwjDr<9m%NVsj4k;kG42jTDi3~gn}HS2IXsD zgKj+BA8@nP{qk%II4Zi9}*|X0tiscvR2(PHi$xTLm8QWH z3C>VtEqj*?xp|&(?__q13+75SQH0mmfb|3qk$Ty?sZ~i$xwH_l=cdK}FWfCWt*ErDw-7e1ydC5 zt4vNNocG>g@q3-TQD?D$-=px@q(Z64>{l5rcFoxR+aqA}mJ?&8te|69!J_4jYMEdV zT(!NS$q|AgOiu8xHmh7TBxn_x-va`GVDCJniG9m|Aa+A;Q~~8;O0x4paj47Ld36ab zrmX%=IIN|d&P2hx+L^>VS(cyOPI#TE*BGw;O)8sQTkvj5J3}Sj$xDH3o{w$p^{oNc zyDPTeH`cL#u+Ga1!M54^CLB$5L0*p2jGS?oCyO`EwR>@WO><|_)>{UOo}AickJVS{ z9X0(`RU0czU{m+;i5ho&Wd4rM_D|n7KUgMXIo@nS%h?zuQ#W=r@R{(T`U{)KzN{wHgJJ z&Kx4u2Cvy&Bv9aMOYaJThR5$m&d0>hH8!(R`QcNn=xAqdQhr&FG8z6?PO~xW+ewqrtVxDP#CEK~VCBf^>yt-U! zbMTdvd4_6#CEvH}76xD2)+R}OXJX{|mY}I> zus#6?!HbhSDmu%Iy0YH-=`AjE%}6piR1RU==2-J^L=V`eJ)Qo-*2#u&--d+K98GkU z)Nbz#7AD7PgPqBGo2{X(7yKdKqc5y-Ix0#^%%RZ?@2f2@v)F5@s%)-WztL7AMjIM{ zE!hoQvI=XHVq~9jLakK0i19oYN25X$(CaOAe+wsnQ&1oa&sg4-t>+r6%)bVhHeC0& zR6+7LDKWXs@@`6YHI6~9=NN0vzpD_Uv{_5Hx~O^ffgQyg8;jI5jgvBlQuv}hWnFta z3vokjvZHYL*kDoFY}m5@&g$t;4tTxw8-wFB3}=9C)Cb$B7q$`dA8pL7 z=&6Qe<-+v|h0^-2FjM&Vb?`8u6s+&2gc+*v?^i*Fzq^PPprIsTmSY& zoe?oUdi#Z2zq}&;u%mf?bN5Kb=q$}|>7Jh!N#hkS-Zl>|q4#`s$(1kMHgD+07jM{g z&xUkY^^T?RnCh(F0Y~!9aG*3nbeJaQ)$PB=M?IoQ*cb_}eJhUyH^s#d6ugCE18P|5 zq3{5y5=1pB`YNoW7C)FkSPfwU6V~6tIjz4-)i$pn(=N%K$)U+YH+36=xOpI9>Dzu^Y|D!ejgYG3BUhkCBjJaGR?beCb!MP zlIs~!R)wc7@R7r(Uc)iqf2LB6I=>8cexjHtBAwqV>-=`C^V40w2?Xf$M%xFUr1R75 zvd#~5{icd~AjLiey zTs(->zKuj&eVF)ge5Io=w)(a;S+Hoyb)y;1WYV#s!^B$*8WU7!#rdt7nOVC^MOl1c z461zy2t=n*H~t?anH;$Y$-_Ej>>{m?GXfq-Lak37N7gu~^@$2ku&uvKr|HNCP~*Q{ zR|;ondnw)eJE-kxDe?i<^lwjUgHK^Uo0=zMVyvfD-@10_q6MvfUWoQ?y=`5%za_$| zX#j;oqw$qyHO<{V%SR=yUov{dY~OcGsiEfnT?uFN{8G!@oCPdG0_TF&q z#fuACmS^+^iIp6@nlUhK>#rWLCo+q7kE92SuBd(e^$SAbjyXunuZE+fXR0^1qIh$= z6mM?nnB%Nk-XUvw$9c7UgEzMaz6?FSi#9+tABYNS@SRid!Jbu#9QieasGm^u4>7Dp zQS^GrX_QqxMa+l&HwX4#kjN1|bxofDrwJnwg1x7A8N4nd{7;*If~4S-+(A+mQJe2* z$s|H*D*BprbNB1iCusV5ZEN(ks&zBf{d$#}rs>sZRrKof`*i30@ag3@t<8J8R&_Ox zl$^<>*RNc0^@8ZO&vmWt_CCL2^{P>GdboMX5}&hmNx3vTXWO>p!fn8O+t>UvHuh%c z&+2sA+InU;l{X9|B7G}MORmehI{M~}ysdx&Rq&2dB+#GDnXC2l|z8n~yKceWW zE=As#$krby=EkEO(TSv;i_Z96AfRb|yJ@rSZB3)F|N7N7MbZN^eurJSt#4PGHfwBe z*A@7uciyU|J8!#$c>jgBua6IwBZ7`pGbkW%I5+>|fgmsyyN42M_RaC9R^760=%Pg} z0q%Y$Y_Hb2IkS`*Zk@S1kAoct zavPxL4tLCrr3MSGU_GXf_9IwG49T)%CNoe>ykzDmp?8b{{ZW2l2p*2mOBDTG`?Vb3 z!lSFltNcO~`Jq2rg%!?a-!Y#V*4MrFsjIF$acOJk-Y5561&>D)^L7s{yKKlGAK5j$ z^s-?;x&My$53X7Az=z+u>%+rqmOS|3J?w2S?k~^z(o;K?V?C~qwi14#Adgc&DOPk= zkzpCv0vZMKBCqX9thFCunF=n%JD>k4;e4!4HS>0cVm($P4(EgGD}ZQ?>d}0#WxI$g z>b9NHN!!lVk1}Fwn{3`mWAn~JWZsD#Q7>vV?@af{J}TdYj67+@$g>a|d16Pa@Xkgf z&)J-_(x(XOL*CiedQjg-%PwdUU2xUZzIGs^hwn_2YMt1=WP8`@n^&bRvkrXjB$+`D z+99J$(rE;b#pJPC4M4y0E0?WLCg-#V0#UzKbelP=$O++)qqOq!{;n%;JGAq4ol#+5 zqxQg7YVgt)&#?BaXz&+ zoe%BoxoBydsHG_b#}r4l&#VXZ?WucdcT5EIts5#B6w8lPTDqfW^h=WgePG-C8@7~9 z9v^2gc`QMvcX~V@uL)F^wV-&uK-`a#%^0jk-slfGGmCc5>bzin8$;6hMRGjfM=BL< z!uW6VB>pqh@!vm=S+Iu|Hnf847U&nB69OKSy|he)sW#L;8wzY-m^f!tpUPx>QES59 zTJ5Q~C!+dZq)q;`Cz$hcFG>=M>Kg_3f&DS>_*+O%s?ff9KMIMKye_9mexNnzAk zg4XJ?1$ws)b*=97s&Jqm@Av79*k^kuNdiz>|3$b~P$K{wPBFKdZpIwP* zgU|kbg1|9bf1<|V%z|RJ7UMl1*em~jQ{}HncQ1rpr8}aT+QUe@8?hVy{c51{SFGe0 zA~%liXjOBzPW^uvqeVWx@Zdc+ZRubHv(fD^I|Vh4(YkM?;6gE4H!i?2THpT7u6^H? zW3)Qf^@b*6v|vj}L=rH#CG68Ef5Uak-xQ)!2qS-K3N(=UB4%t8Vofn2cc#gP&YmDs zXn`-2-1!uDt50EI^(oXT@c0hO1rb77cF9ylr}x1QsSp$*$*t(%9U1*RJxO3z4<3_k zV;AW&*sD+I34N-`DPxdu0|r6-1K59wXa|H}uWDpt#!k!z-e6}Pq=Y{Sq`=NnHDDAK z^?c@Y-`#)teVdcn>UXcc0v^B1Ig=f8vWvHMT0A|Q23i+)#%wzBhCBX#WZlvSKD_7l z5ApHfy1Or3++rWO>B;Jse|&X&sCV_n*Aex2Gl*5vfW^HK3VA@t4MHvubb^orgzNxO z4p>1P_l^;;&`mZ5eKMlr zdxl5sixw5%b3^$v=qE5iiw@p#j|fC$3h6!;To4z=fTj*lkTpahO*xsYB@3A7KKTR1 zfjr6Hv8rw=={`|~KcPmyF^N>pW-R%qhssfJ-l3O6uR1oh^1XhV4l9>sls1@oQyZBBgy3?r)2M8lmCos(k3LrX; zW*1Hodrlci9O`Zfb{#qAxC6<1~7`rQ82&Fs}r#o7@NWq3{UG* z7D|^|1`PFCq3R?b(f^m2LQ<`j+86JrG{zKyizRtpc2xpwf;=jZz@m1VT{|ih!^S zM4TXqr_ZnrgsmWA0TDBZm;i(kP)I^F2&q8Q3GiHMl;_fv#Rg9-Xo|HcVt|g1!rr&I z(rIDr^v4x9tRbLbFCdu`5OtV+rd~mzktUg%^wb|&!9b&YY2atHR_CI$|xmd9b6#$YGKU}+4E!5B9$zazgdPvzaHBiyJX+<0)i z(cpGJP8J9P{)&PcPN2Z5iUiLdMWIr17!O3~P3`ad0sw_mP6PIJ1ov$HpF7$`yNzQ- zhfzcUnr?@$Z_7l0<|Nox0yH6BRw6VZUVaf{{S9!W9mZJyukxHM!Z}ti`oU~bkTJqL z$|Q*4hNzIO!Q+2oiQkRM+=Q|EU4kH!iQ&5hETgk25R336H-z0q?;s*D1>o8J6H4Oe)-Nb&5B|KKqF)lS$^Ha|JfidQK;Y zHcqZgc%8B`iNU3rq*GQVN^~P^>7Nj?NAX?iUS(zSdpts6KGMSRTf2x)e{HJ!eIfMP>Gw(H;xTB zIx4&mE|pNk8(cLd@N!8zv0OT@N%VXnZ_V|Hx8(euCX)_DyfLxP1%*!$@iu%D`S_xP z-@I{anGuXeufyWtXx_-9{Vd*WdGk#R&xm&`mDl%$Q>d>7$Qt!M5~m9$sdy^=(k{rPATb!br#XTfF*jRWE7U% zjqhTVl03X?Ev~~({H+>!7p`MaO8Q{Q0bIfAt`Tm6D%=RVoeM#e!D{tS{R@!pMniY3!Yj56i$EJlldfm~1 ztv&GQiVSSU`J7K@!6oEieaH4=gdZF_jt#qGKsx5SjJ`tIu`L^Sl^xsC@hZMFv16M) zU-aH3TB8%CW!Bum8TH8HsCk@XAZj()4XQDXj)v$)ry+NbXh=QU9&6CkJAjG=DBBf$ z!r{}+{^k2_+2K1G3xSaB7RXR1LHYUmgvFM?RIP1wzc(JY`L}V zt!=%vZQI6M+cvj$d;0z7pL1rCNhZ%EnfuN)nR${s*Hz91V?(7>%LRoRqIdH)ChEcG z^sth}54@uN9)w|CcR^R{Hc70wYnSv7hHzG-cg>@2O5O8e8hE7$laZ&gR%g1xImbt^ z%fTt8CcmxC?%%wBv)}QXm9<{mYoxEtbfL@3*6ZQBw#`r8oCQ+SJl(l&J55Q`65gW_ zS(*5}m7*J=0!y`G&&v#e&y2Cn@5LP>ai#u$>iL7lY+(fSmywD>=EGcQ{eo^10TOBdDzV{c`@59#Frfm`MsW!Lk2E1K1-POa@wafVoKEGXZ z5WibB=AN?scsh9){snmhVcDWeyDgkU=!1RWL1rF>+hQ;kd|s83*_pUAv_&EKvV3ai z-I>+?dPei~w`0R*q?{`3m+vn>=E(Opmq}|PZ<>T z!n_~VVMByhlVYB;94Zd##L7PGuL3s1Z$XxilpX#(hN`fJrYMVs|<1CXPZ`~*vMBMQ-xed(R4X=wQxh$##vI! zl;eJ41fkZndGSHc?s4J)WGXcR1V!g15@;r^*jS3YiWW+D3T&t2+dCP!E-cLhFoq`w zN89zc+`}mH0+uN)K`?G9KVXR5TJl_(Y(l_3=ZdnxBy-@bze&b!Tbobdd#%IW&U%dPo$xXVyqp)V7c>BG;K ze~zJf){DU{*i#KJ>A|vul2w8;oeb!D&>nO0D=p;5w=k7^iYxiOzGTlLN|wyY!ttBX z+Gk)^^noUvi>@j{(vsKq#%{{BV$b2tB3V_wW5f;8FR4ZRO~vdgke$RL3UI1Oqj&|` zr5qPC%UM+xld+BqY%0S}<(_F4WQp889ucIx!Tj{*f)Hn9{bd)|i*}&5yZ}^^(l5P< zHY8IwJ6>Fv=rt*o7Ie5|f4(C|iTHY-tc?yAdqt@UI}d&k>o@rR)BYrK_e`^CC{G}! z*5YJ%v(1yC=Tx|QJGqwKbA&K#t!o8=AOc5h^}?-F9$lWNJ?2cg5~k9>Y3BN5ZXu^7 zin&liamc32w2VT6!rL;+!t68&C+pVEd^Jey^i=1wrFm+a%R?@o<6 zL>UDMWk|zkwv|*#1ohllV$5m_ai}+j`0{9@dX;@=uQ4YLjY=G}wNuG7)5)~B*kKYy zW*=XxHoBYP1#rS5VUs_X{4539(BoP~lVn}4)_lOmRL@QxN96Z26U-V7*-!{O^`Rg7 z-vk)(4MI*VbHI9qA%l)qG_uvEPj_g#V~_}|`XaJjieX!+nQr_1dMjBwKDywckA4H} z*U6sJ`+M{BVz)g9;B>)L*iEl)ygW|SNCPygs5oMD1(w?yHZClFFRyZHv3QQb8$=u` z_&`*VcYmL;uq3D?tU6}ad0%df*%X87dL21_uj7S+Z zgqj+LFeSk6-p(q!7k**J;M8Ki$$h;g%Ke$xgkL8psbqJDrh?T^vp*ZMo(JLO8j&tW zd7Wk4d8cD|Is+*&52?i^k2);^=;2>uoN4%VxPoZs7#t5mVlE#@kLtL*(P!TH6LSa& z+siQX2USjDG4(3qwcYcmK!V9cUU&s%yM2JRpc*!XPvmgr75J1dP(@H!28{yU>Q{ha;iOipy(KgbBePa69 zm?Sq#Q*@BSmHAS={O2?zdL=UZql6Y#16QGbFWOn8HlZvc#s-f9`Z-c@92R%)B~~SI z$Tw!`Thx4-0)HIgUA2ksxs#X?g&UJdV@?Ibm9sN| zi{7AF7*D5p?B8F#C7$=Mlso5>^+Rz20PB)h>g$SmUsLKpQ(jFI0I|b-FKA($%e zeKyr3ZbNIH%X`h{KgyeDtReiWOsd82C~@biy(`i+*pU3oMYAKSDtqmfIX-Wu5JPlV zaj4Ibd1}!geoaYb zV7QM$U@}7i2|gLANu4TAVLNnmmM<4@5Io8e3kyx%7q{hFZpG$A;cO=i>@9j#(HJ6+awHo|i?JHvWL*n6!8l&Il3n=~Kqbk$VcV z?acI3onJMZDJTCPTHF-J4itD=mz3e#`$yw80vxNfw{@5_+i8Vqh|Y$mE6&&@gV|!nM^JfIc~R34Ojl?cu^C$U)w4@rBkyqH12& zaPgI<$t?oQ5 zQwk|v?`PT5+&v^AYBKVC^L@9S$f3QTcaasgj>~pYd zY7N}u()zf^aa-Ljfd7S2?Ny-dXfv45Dt*!KNUVy=^dzy8XyVT%r;*WCq%%e~_bl$+ zA8-j%cW2uvvvQ-He4m|9Dc6Q08G@o3l^;Pu#QIV0;+eEW=}J7+aJE{OfOVw0vztR7S=O8KL|ait23cWf@jlrz+D|qd>T1Pfaci*zG5`Q5Er>>o5MyTvbShaU=+k zF3#@NJ|s6*>?{x$Ao(^vV_1ghTlYXw{KH1>&oH#JLIK!Bst!d<*Hawwe995pmw!>wM9S779ay^6Kliu$7)juE@0FwgHyr9FNm`cs4Mu;=!8_etjNnYhfGmnZ*nob+D;ZDh9+ z)&8cxUeD6@DC(W1Mi25oRRI+qcCe|-5tE1l#ZtB^Kgz3|wZFRKm~;OiqA{`Dg1o}sDBiW7-XR`7Du4P235a|KQH|h!P#Hu-`r*;|8|d1|UsmEDjB`8M z`%G|2==#v<&RwcEdG}>$L9$$Rg|Fa~2^>v$vr6LZuS)n-hM#6n4_T^RT)tFI1;;`* z7TIqgVRc~7Lu^~r#f{-p4mHNc2r(re6Pl-a-kasNoTjz%x1;$-8#y5j)s~R5&R%w# zCA5>HnLyS@8?K?|~jdzq_vsm^~ zuTxa`RjC;eq?f5zF4Z}6nt?&=Qk;72ikM>ij)A#Mb$ar;v8ku| zcG>fkoYIxP^;y@lX&Mez2n`Q{DzOgbk03H|nwq5L_#ue)v{*ds8BvF~xp z@-k%ysrPeH{~?z`qeqPIn~g7=I4dD6s_^p7Q4114}hVO!AO_31O;abHKZJE9U;uIZ|g-Ewr&2?sJ1&3A^C+q&v>BEf3 zD+Rws83B3~m2H;VuZrD|^ACRj<%?9uEili6G(!C%BFvSlnflm+B3Qd=e9-`_N*n##t$tCcrw{qo2*Rdc0uO;d)Z+zL6 z?Si4ka{onQOiU{C3>-INE{>(NDNm4FvR%O;HincMS%Y{x6%uLYNFKs*qk7Ij;*#@w z`h!yDEv;O$=d(BQy0*;s_H26$;`k`l1iF@l{?^6**3Dqe8@GTgU|$n&;K8kA3q8TV zmzd)%(acqbTkbP_J2nD#C45}fM7g7gc+dV|4}?XVjz8%`f3dB93TEsJy!j0gHG4!N z+~=xKKzUK%Rp;xvMbG|2DxaH}#=VI_jaawZpQFH&&8=4%9yQ=EE~HvCp)fz zTy?wLZ2R=Fm`$ILBVG1axk+UAhOmWr^2DJr7~)-lf>Rnjp^_rQ>YK{VRYnf!w@M`< zUjLL_s!Fq;KBY6IQp$xITl^o@c96MdFnywAJF8f%>4c6y;`$zYl>Lc2wq<{qvI}GM zqC}D^dr~9cxLdajk#Kx|b-XbT@YJJ1GlJ&~ly}bL=vf@g? z>`=N!lNEMo=8JQV5AC7`z^C`Vts~lAEUpXHNg!;R+k#PCh%8S%i&e`biqRi6^<;MDhQjrBM5uxT-Hn~t$Ed72mAdb0aw@LFOiQpJr{XCw9;>n zNYve9xrOSuZV@L_s-f@Wgo3()UmV>XFR88MrqEZ#q$Ad&W=j!TaEcmMK`bB3@v)XROK-aYGgxNd1VwYYeYMT6qtwiX7DL*AL zB_JtbbiujQj{r@6;IKZdJheHksJr3uczyZg9^I{hXk#4K=EPnIU1sGWI@Ea>VWxf; zl1xG;-BhJI>}}~tAdM|Ci{XK^^+axK5FLvtfy?K0Faj05Xa2V0his> ztIOPqASrw0{ZUt^A4lSm3h7ZknlREr;uc(7P}Uhkq)llVCTO?xn+G)!KdAm8)%y+| z4pZGnO5CmXU~PEI4?fpkE8)fg${X08izX@#&u5=N3{=ylBGp*<6;tPK;=URP5Q-3I z9xA`?4x@$}ObTME1P?1mVOoT3(`rxs0885CMjO&=ngpBKapVK3TGv4sl2@YHA%tzt zJck7A^8w&g94@0tyuCc3qo zE__~HqhZ+$H+e(QtIZz$D+#EsQVbYKHq0#hLmCDN0_KOZjQgVq?3Hk=ym)2RqCez7O*4ykWpHyz{x}ZVv zVQfdmQdOxain{(txXvq2?XD{o1I7>zihadawPkOj+NB^4<-n@<_S@BAtq^wRQi^zV zl_))FTF&O=!u2$z4!YWE%5D})1^d5Ijg9&{nZ9S8!1$cd(drYeRE^XM-KT-X=I@Si zmoh0dMM0^>md57bDpBG$wRv|?q$@aN#mO8QhLLnek?e3otV8t@g)=D)hlxUb^wKvH zZK5slR);kr+fefBAmc<_yc(gv-{lAPjWvgfq;CB0kcijgyFKN{ZJpSDQ$SsCzg# zrK>+@X84zkCqDF^!}a#O>o`ef9+uwSkYB?qLicoL;SblRWhBvyLn0ouDy227-;7sN z4E17Scw~M0#ht$ao(UmepLw1WqT`+uqRLuSaD*iR(dn_bWc69Uj0Z9u{sdzCJd*&C z&oPdX%wxR@&Shmtr7I*39!E%(&e70y2;bipKW(H~N~WeD=DTp(NduU1aJV>dUZfx8 zmaCgl-^Y0*e5Wv3!=9>4Z))KT5WVQ+;cB6{2fC=Ud2;381F_|4W2+pteooZ?<#IM5V z=MVTnD_2m;5bkv(o5IZHDY`5Hwk!+N2(Phd2eNl>ka(B{?@(p$QK zRM$J-P;|{l;-k+f?LrbC z6lFA7fi3o@ylzq{!5dcV_ZJ6;Ba~rGyL1ejmnxW-CHB_27;c0y@~wRZR-|)TU1Qv1 z%H+R?+JqeN7aF?E+g1}}7VQppJqu~hjim!R%I1@tWI(nZQ(c6|*+s@y^(C&$Mw{1o zOSOq;Pgkop8-3})T zxepjrYSoC(Ov&tJ2lknlAdXC8iO)t#R3m4lV)jz%l`zS>nF@AtVq)@z0qgW|nGqS5 zQdgspDqcO)z7lsU`lZU0K|;>6T7Up$mh#9>auAZj*FQ>rNGG-5eBtoWY&vSo<#9?7 zx{;S$-%OIV_)yg)c{wf6O#T$x+TgcmuDiCVy2wFrllHRZ96-_`jhfa`dOd(va_wx+ zqFtVvVm(N_RGvaq!CKK?cq^D-0y@W7AbrA_hEI;N{?u)-47Kc!tOCtnjv-zIZcT1u zZf!>$?y(H~1DXI9VU8WzM11MS!A7#*HMwg8J?Xl);MuaCz14BDU#eX`r4eU391Zp5 zC7l$Cl`2E#4m4{(Yn(L!)&kC8G!?d11<9fVwn`o#3r-K(;X}t z{nT}|+F{iub)XjFAOhNwo_zrzZZX!#d?S(osZzBDWmz_v3k|&cz!XI+^xK`!xw?#m z5qOq=%fw-@G879di!zqNDIzXCUgfA>TZP%r17{mQU$0|UHx-e8 zuwJiqJ1bSQ(PMN;5|xYQbF%56CNAr)4mC~t+9Ixp;Kalr%7il(p3F>l9pgDM?JR+z zoL>~hcxZ?c{TO(jFOBxpR(k%WdAnHn$@L!e*hN1#B~h*hm)SADEX_&7LZ^z-RCUa+ zmfUE)?9f&dFD5 zTdH&ny}j(N%FCk0(F57hhwK|07cY0)O4lT4T$dNw4~9$GC2nf>c;ufkcu&!8Z*Z3? zN6YonQc~xy%w(qMY=16F86`XPhci=zF%ul2)im;YugCp-4miG{VSkTl``68sHG*jA z3}3t;VxwG^rBag#ZxmdTV)ig~Eg`OHq0skCZG`@wlp#5rkT8cjvLz&^@7N;0ojioj zLYq<;wTf;nEG!Mnc)OPj;wII9mU$5zt2hp^>)j5qVR(p^y?M@<#be+k;P%?Cy1u^8 zUztd-t~5f#$v>kUvb{LHIAc1jW@sgM-)L&|CQV;NbhKES938#90Kr+&lW(wV;(*rR z(kX$!)#WH!+(lJ##tBfAtqzy^oOUATuguqy>f4w`1cx zJOO-{uaWU#$k$Nz#hSG3qJK+>^xhJ=-`?Z%Q#J5!RPQJU#EDf^;L2_%@XqF1$eRwM zYugetTqGYu>zSP?Ek`V?Xsqd&h7K#KD>r&N7hFHP48Y@s9V|K&s%eR8mMH!laa3t* z%6soG9=3&L8K)jt);XRZLu$! zJ~fD7v5k=TnOoxH66+Sd`0z`tbv9a@oPYHF$!(cNkYj`4W)#;5Qi z<#bfE)`5=2T6XSxlHI76#&`KML#_#) zCsS%i|MJ61CqxjXCH5$u;)v|B%6beAd#z;ufky4qv|2@cxdOg|j65rSM^%iH<)nF_ zf8Y7ez}z{}rh)4r(e$xX8hG$s;yCt05jH=Er06+2{Od909JAgI>XW=2w~9*zTM~uu ztg^99oD?5cn~t1UT+(?R{EEYF8?3wa;sWHotV)uFS|ks5cTtY*UMzVz%gPTlQ~h-L zqM&-%gHcC~+Ge$wc~gu0d^cmS_#YT(3+r?G>UeT=Ivr*wDjAtwocJhNQ80{#7OAsF z?x2!QcXt_42|z_PLd1Opkoj^J1X#xFsbf9mtTJB%7|U?HawKzFI$1=i<78`QwAPig zg-xsjey?fRtg3G*EheBOYb2#Nv#j-ODseRMGc5xkx=0vk z>&xmLoFy%E=3Y+s_wz6YkS@mU86vj&$e2cl-Sa=zR2_|aF#52V>byqFHGo?6|4M1t z>wil0<6;r|!`Dh089iZyoebRyhRxs@wWRF^ijitIsH^z(1)V8p|^A zb{}^A$2uePg`)Jrc7?7P*aWn zKw4+WY@v*nMh@p5Z?VC;j72j)FfINqX@%K&<6uL_&;8R$yFnt#2|wap#$ayM!AR?H zY%+c}m;GPJO4?1_P)A}PhIx%(E${6~#V$X9ES#AzVz+D58@fJpSApe^IZGuOirCh8 z&Qqaq`Gj8kMAL3hvPxuWhp-eY%Xu*}wXM<)py-2A_vv#9w=OAR5bykxxbG^##+94_L60~}NaP1EbvS~8Dpm3_GUUI$FY3<#1@ ztD-&Gsl2+$x?2J0>c$2_e-*?Uy_DPm=hL z0$Or;rbfyPw5~cb2-3mrBKu2u14s-cF}Qf(4v}N&SR?IXyIMb|hWWqZvExKxUw+i; ziRenZ4YFCi1W|>(itNAUk*U$qG?O1e?_Vdj_?}}1{pGsPiytcVW~VyQn|d-igimGs zGZ9XrN>5T;pRJvzePehj>Rx@Q=Ut2b!~pDD>|4yEmtBi+O-do-?mmYkDeR_s6$0Ks z|0yjgycrk25gvpFDXQ6&dv)AgW2Bezp(iFGt2PFWR%0cf_Ai83u3G3~7CwaK%y&}D zyfY}Nvsa&XOX=#pAE#k0b-3iRd6nCr85%O!(7=wI91i*7!zTeBfGU;7mB`KX*0046 zq8IQ4-~@_JK-X0s!IL@{{)OL5?}DDQo`hGwXFX-F+Pz&LDDS53KhLk|!Q1@OLdEuN ze?s@|C+F#WQF`cIlhX(WcGz>{L=x%{{CLeV{rAxu!Wb`zjEF>l;}JDt*ZZ$($2N+I zG%ap9ZXd-!2?w8>r_J`_()sgcY3#-O?9sZ$+RY~4d@mrq$@*o+1ayepL;h`Af|2tBV&m1!gS%WUgd(4K590=?$IR^-f1yy~YEJ!39SRAFqiI%fAklud6ZQ zD;tfkSsPb+OHLaJ9~U32OWIYRG5q(Qe`{R#Hm^J9JY6q4ZLZ!n*B&?PAN@K%>mCO- z1ts>0v~A2juIfCO&-wmv$TM;o@Yd@cn@#w3 zLLb%q_Oh7NOxQ|KO@b08^?y6>6KUQ2?-1=DRiB}T=_KkHT1gYY$2??+>#j1i4w=n}@XC^(+wODl|^zhB(T z+20TqI-ncLi~`4IECdQ=Xoi{G&H7pCo-?Ev6i$F+B&M}=4w#^?q5{U8^s81DyZ8Uw z{`b>=#SHW4*E2~>ZozSwafhOeQhkeGr^e@zN!LsaLlM=mLm@^JRPNZE?>40u^TN!jmn0a-`W zR|o_%ezRl?Vm-<5)`xbLe8E4aTQf?@y5|T=)L6-#yx=|T@+*Dq>9IrZ42)f*tlNL3 z_bEBJK)tww-{K6}^JWI@#lOh?Li#=z_<}tdn~y0&>=kV&$Y5Udoxm8%w9=`IWkXZv zELYNG+9E?4&CV;F@XVMe7cJMQc!`^O$-|>Fs;P!~4m;r&VeuBEG-+$4P>g4({Kd3* zldc`%eh0GFvV9>IdfJKAxf8`h4Sd(~(Yu8%rv>V%Jt(^|tcf|v50(j5 z2E~r3nr@{UZZqZLHyVb_%}zRm#<=fXR7A}Mg=msRCnbj2x-v8+-*9H+k4T1s$JOUI zggb-X`V#kB#mdba8uh)1D!EH5jk_^Z!}3F2oMdhtl$wkhjDPJ-xaR1} z7;(<0K{hm-ulC~~Fp-LzQ`xS(X2eq_JV&@(|FVtso|mwe=UXC3i7MGEm>-H*Ra3qa z7#C@np7c$ko0pV=kPDfRo#azDGyI}ODZG|7cS$X1A~9u{)Nsa{@KC+MU%4%x208qR zmi26liRM<|kBh7$)eq4BqnklVN#5-Ot%Wv)L4PUciFcpbt=P37&T=UzH+11gpn1jZAC0m^-Yo z&}u7rHO$1E&Da9<0dy@^y6P9<_=*H;c;@7nV(n*oPN$%fei}aP?*@4+ggk+zhH+upH~&16QPe+TR#?)d zoFE0tPO9GV%cf|AJ@=4%W0{fzP0cz&pUpF>j9jRjL*HY8NNMkk7x{*mt};N_2L{Pd z{y!Sb;+Tp_I#xaksE z$C8(E0low7Xqy|hP$cbUyqkk2RUXjc(R2QAOLmUiF|+0Hr&Yky)r?IbM-No z(ZQOwK010Pd^6Y#B)sa%82zwqT0S4GGZRTlT9V+vJK?|U=G`LG5ptZkD5f-LfRFW^ z6=A6?A)3l^F=F-S%T?@T+KemZMa;3{kZt}!mv5GJe38jm=$+h)Rj>qqgG6-n_7T`B z;>`7g(_~(gw-b^z7vp4Ml7)~2L((Bl6RDR(A?tvIST~`Row~_bPJjj4{!(N%_;kkI zR&;&8Q_%5n61`KAXrgZUK;(E+$fAEeD*b|-sb~o*6g}1BZaIICP3&JRXB>nR`iu=N z4}oQwjBL%9O}WwgxiqsV6D_==udPOwOdDy|U5D#b>olygAt`mzjcS|Ndfrp~!uSe( z_!xOc3_oU>LltYKJP0%px*Xy4SFG-CPzT1D)mY>X7cSDPGz%pi86E_pwFuO0U)Z~8 zqwo@`2kp7I(+RYLG-*-@kPM~;xDRgO0ywHC>gPCgaW@R`ZkJ7h{x(l=^Bpz8@&mb- zA{FggC@$1{PaUS*M=-vhw00?^OHLPGSriup`%?|i!wxp$6JykS^10~ z;i9MiQs@xojtU&sXwxGJx_~oWKn69yg*i0tA|4@eFWb z7Grg~oul;y8n0fapqX5BQTOt)9y%^V#K7;ZW+wcCllN{i*Ezw528)?HyavK$3K;a6 zTu-=J>Gm4+)13=M8r%b6HqGH%wCHUTN6F_{k+L_U7&2MsIFPkGtOZv6H0(TeYM^E? zJ9+V|G!5YFAYU8S4DVrGinmECa&Z`X@iVbO?8K2Pl;*>`LIP_HL<~(=HA!%@Z15d@ z4TId208@QsC0qX~*bzeb!`)g>SfzAhtrYtNgXul8XtxWE+}5ce*b6e1IvA#&1};o{ z!n4Fd@IkArS>d()$o^f}C$&sICxYvcqNOE8T(t;T|GnotFi`RpPV15di%U7)*>PJBQu$Z+ zU+QKL+h7nV6Z*d7u50j<|1uk<3WbfA;UjYx`dzOO0%9Rlm?FvZH#Oj4BBCCDS|9no z$H5{$Ych6d7lJ0P>wgtK5hi@jrciWl%!z|RM$p^~-?9ffpx{Jl!qD*OS zXQK3ns@)ed%x~$RBNN5}Qsw7EhuU|rxiQ)Ptl6RA;uwichCw4F<3}Zd8IT@NWrCsw zpeMsuF_ZC9`WfbiX+6|W3Bu>a4xTw{USRGLg|ZgV6;F{Is9gsK*E{kb8e8 zDVyP%2+kwGw3fW3kA78-^Q$&Z!d%~M6)%`Vb*Q){1!lEF=T0$HK6kdcN*gr zUkqT@2JCBzP@EF?dwSbZ@XDbB+!>&K2ec*2ndY}RIu`ksX`GN?!2{m&GwCboMf}`h z?)^~>tdHy@KSyeOF#`^}cJ4)d()BO38J%1o8WUkE&!C{RLxKZ8gmdd%bBgORhIwAo zJSDVvURQ%Cbe5K4oIBCtLKQ+@Az587iMd+| zR+#AXb+>D&cP|*`qe6&8K)OS2GZAXzZ|XT*Gd;64Hya)xUpYO#K0il;6eP|5n8NER8;I#x-P)O?yAY!b zWzH(y-bhe+_hX24Pw&>w)Q2cl!qI2z-_J7gl!I-%?b*gm(MocE_Zr7gFmt?f$wvsv zs^j9!?Y1uUS}cDpn0lDB*q4!CmcNpL>W;`SxFlF191Ky>e3+qpm{0J_5`^$?NBo%6 zGu0#cem>)p;Cc5XG0^`nF^nI^NkZ(z!2IEf8;1D5>2r&pLdYt-+p>o#n}YQXa&&ZCvV)o zEayV6JrZ3DUJ?PXS-0kl1?%A5~?i1{_d~SXc`>_5C`w88Z;S-`a zk$uqq$sGqyYR6U~RyC%QZPGxQVgZM)VE_=e^~4$9YeN1)4w z0?p|^qt$q!(QTO)O;;vIL4j0j=)gfdHRnRsFm&|#Ct#rRo*Rjp4tTomNYde+8=M+1 zSUNp$ke>emQO}Lh!_VSxAh()v2#{e-#dld&0FwWKNzFPEkX64Gi_R7bM5^!32q1(6 z4fozq*YLpt83PV*YWP4zJ2)Tk9Z-J^ds)!vB6{nf=(Kwse$X}h7lP9r^*9IuHerE} zeg_)8H?TEcnCA*1^#$Drr1f&&L3}k85MT2=1pGJ8H=v`wJ1Jlj5{T@3prZf$9q<7| z=K~2mN~N#J!ZB6psRN_a@Ha*VX!XuQ0^56UC~DqF00J<;n1BOE{bw3L);Ex=Cf0Bb zYlh#(=t0erL*eAK_DajADq!!M~+zS~tP1Aw6Ut-abXYgCu=-j1hb=5Toxwfc`T!KmZ=N1rFlu zxp~xoM)~v;neZuLy7p0~+nJ&UUK3H5q0AoI0bh0W3H1KP*nBE&Yz~3<|eU;ui7&^v*NxNvfIsIE4DbOD5I_bBeh29V96-PE!et4T zhwg2u{kgrO9(#VoK-6D>2B!DUegn4mI1m8{;Y@LO(fe2s|66K8y7pcNQotn`=w07k z5I_hHGVQ&=sPT$8F7P_pO7VEUB@@D*xw<^Nw#1|PB9{qCI5ohx;+ zYeg#cf40w+v-z`amCS@lK;_bpdM zy?8y51-}D1YVQ0GnldT5#);_lI<~b(t;eDX~0bZE?`OBUE zlk$Mzjr&0G3$(QlRNcZ4ytwQigF~2@O*-|aF%qK|^QK4yJiLN}K0sB~OLsxtTbrox z1+&S?tA2P>bS}rmt6@POz@TFKLc`Ph9g~cs?{Vpc{S`%&_9V3m9ZiFDx&(L5lr@|C7HPhNdvR=Hc?$@$V-?)b|8>TfQ|ZU9$q^3mzx4^euW+ z3B{N$AqJFh%p}kRpQXWADZj*Mx<9AADX-eEeql=|1*@HOU2lN9%@7ZE70g_J$#Ku; zRc$NK<>SsJro5Ap zD4#boG=iLs+M2pqcF!_UixjyJixySf|LM-oF-t=oi`5c$DvdKKUAs!vsc$ZD92vT1h` z8>CV${q+2<{PfN*q7JgSn|jH<_rY<%7V#qGbL!R9o~wgqL_i5uL~TpkPE~u`B6G@T zKx@+?FIRgNHPRp-V&Kn(g<#~ay_nW?5a~5T>4*@oUSA>a(?CJctMp$W@hW^;sH@=c zz5k^4mA-Vp+lTxnHz&Whi&D`h2T|^iQlUlWFWT7U1!E~l8+ec37B7mOD|_aEu*Xyh z?Z51!T)`O}T&oUeL=S7U?^6j3yj~{28fNCB!5%Jw*h|TZ+XGZc*+AabXVILr;#_SL|aGMD0nCPaUIE&b&mb`kOUIs+Sv}{v1FuIpjGN*1{-Xh*g^G z9h{1LHdkj?!j>;Twwk&iC9a1)3PsDhji|-1$h?g>%Tt%FYImL@JmGPe`TmS~88`q+ zs+~PUI1wk$<|$sNT`fs)+t_mR*&|Rz68X`pwNd}L<@4$X&beNbyr|CpTTvG~yhKs0 zxHpIV=!=x?#h1;1a_A*s;xMkT4);kjM^is0y^V1x7iA zXSm|qhouvoL@~m?uSA1tc{OqHLw;{njQ}$uocb8}2i1p)H51BDjN`!*(KukK1nYS2)TWw7_2 z+fRRJ#EQ6=d6uJ(^2{Ls+VF!y$0w=6NiFEh0_r|7bf<^>s@ReQa0o)sKSOt5N)!_e zQV@p19Q-2!4Di1t`Q_%bK*8q4!2}ZNe?tqc)UOsLLuUHNlvKhX4DVb4!H+^HBqk}V z6|%5QL-)h-cV`mVT_j0>?9U!mWhAZ=^c0y!J3FvmL0WctlzL9<3$@i%hIa-A5^Z#Q za8OuF?j~)#!A2v&By1%nfK5|LV`-2)KU)OnMxqBPyaW>rnV7$Z_O(_xKq+D^0u2=S z_Tzh?#8(EJ39ZaoAe_<6v7%Lj%lqz&K>COMAME(q2$LF|BLh0d`>|bN#YNytZT*nM z)>Qu+JEW8n-X+nlgrG@*VWeGcM6D-hQ9wzEx3*H7K9hOEWy2Ig)%y&NEVpMFZ0E`Tm%9aUtECVi~;P5NdCtS4<5{J1P0i@ z__VS9p}-f996BC6RTiXldOT|0W?{s_IJuUn@qV;3QcQ^C z{$wU2B;)!lebwBeqW_1nw~T5lSi?q(w@8uVF2&s)id%4ZcPs9$h2riWoZ@c9-GUT% z_u_s-&pGR^@5f#DTUklonR)e@C!4Ud_l!BlcX32qme@++tSkgf&tXBer>4R@R6|^# z2db3?6CfeA*zX7XfdV&nuh*;X{FDK{Z8-be2afu#P$U>qCD@`%CRs=hDwFDb!=+r| z3Q0)L4_rN@E1(`EfkM+MmN~Zpi|D1KA|eT024W}!Nkc6LP8<{MKx}2R zu3VYM2$w#upuJElpA> z93>H@*x-lCVLv6=;uS{hm3CMPQ(ew@GJ8qnea)ZwPWUv?KU)e5<7i95^fD!G$iMQf z3OD#iXtBA1QJJqR)Yzd(SU^Z43nJYV4HR(r0^=bfq&1|Kn}M6bLQ+FUQ)W}KTJ3&C zIM(so$ObU54~kms52q>BJsitoKs(DrAfZv={b5LD3&zod1Q!C7o(XGv5YEiIfQ~(y%#i?;8#fkMW^+#DmY! z0F^c*fT&O!QZ7rL^0zZ$IB8GOjA1GRcasi05>PT{ccH9U`l}5m&+HJgw(?RnG!#KH z5iSxob|X1PTr{oLZowsV@D~(a$;QTxo;*#126(&=Rv&m(Mi|{nM7n#+dKz{7-Pwjm z1N@LqS^bM0ybhV78T8WMKJdf;6lvO-lAkRw7Pe#&rQH55vhu|+hAWsKhYui8V@_L! zm04R+4%kfMSA+-CVbfq#TV}k*$O6DuUlw<3C<8!YD{R8dqju_DIZiwVrhYx ztf7O+8QXKq?n{vj$Q4EEapwH+sd-MP zvrlmx<2xMjm^JK_x1bsilMWMc`{y>@C_Ho8iZ(fCFJi&yUjzNYy)K`nIl7Ct{cBzn zK6s}VQVIOso*JM+!$O=YGHkUecPw{|Q7sU1QgKpwxeg6YKWVjr(ET=uYrT7Sf%h;@ z%@B{pAaT5+_adx4Zhmj4oibsvU{xp5(>%-N(_!5=@)FMYQ~s8%$dfG z(-%kxC^*)S-U8wDB4Kofu|lFKAB96>)L?PUB=csLLie(>_d-LprlZip&`J!T)_ zOJ7I~Cn5VzKjk0>8}1vbIe%Ill_-(QgW0+as4>#c+ndj$n390b#C6U;Sj2ahlEEdc zf>CQKA;`tQuKDnY&w?!sf{slTijRkVQ@BD4+dEL7D&I+nP@bY1W<-(98U_9r7S9Jj_9Kibwz}Sdf~uOy}bqBQC;^)Kn7m z7-Eqb6SN<4a4yvOR^l=bOb`_Do(9)xE5z!Urs35(JH-J3AJxLMetcGm#<+d9$l~u2 z-U%xRqsrzct;Uvy@?^4L3Nl3c6ov<@DTMhe(DP7p#6$`uMSU#!1pZ%{|I4{~-)@&rWXi2C$5tp>c>zDkS3&m;;E=rbC)hRi*Bf{BRh`|yl#C*@2 z!hD|xHd85bBOCC~pxsNAMET}7wSq~`=rz878*;43xW|D;`@-29)-Niaivyp)!)h_y z6P|hz{0BRkjx-80KF#EOD)!({DoQd<{?BBEw7*^6;~7N(C3%i~$_^T5c&G$c2MH+X z1jT2T_F*N_M%5I=$>H3iD`P!lCZC72Bu(NHBm9cY5racVECGQTVa1#>a5%JMWdnaC z`tpAH@!WHj@jf=oJF^&TW2dKPWZ-H2Ns~8-5~~j;Q+nME_YP*9CgsymR_bL!?YT0* z`IxBez=s_gJySn`-<+OD%emQNu*s)XcBa&J9S7%1$kq>oN#MdGW{SV)7P+}|-|F1- zH;nuW&iF}w7tij2Pn>qUPQAaJfc2%bO?TPYwUPcns~O)eQN$kD?vzL3LZ;$LpC!n? z6Vcyay!dDjZAnpl{UB|s@6wnM%N3a{ZH9C=Fx&myKbZq>B)<=E_N9>F#m`}OOKR-6 zUKJt71>zt_(@k1>(Ih-Cm#|gd6xh$vSFPT)jDd0uLaQm1dVDO1r^GUow|=%ifOfpUi5^Ixo-dCUQ9w< zkBy{QWNtXs_uYDc*SQyOzhaxsQ}T`KLCU8w8WR-#Ye@D$JQgGedS?b_Kpj?x8y-H< z4QO=9Ar94|wC12;pL}4;Qq&wZL)0Kid;`q{^9Lk;g5=j;CPN+;3xL}A%#)7{}U zo8sc9+KvMmtPXN`BVuFR*^p-LDtuUO(F{p`e?d zmP3gTJ?^3H^J(bj@!Ia#DX;MuuYPk{bla3}@{l2HtbWo0MC2@|TdJf#)nN_jSvt$p*WTNn$6(%F+;%0sfGRbG_qc7`=o;~K=EfjvHkihvmzR$bk zGJg9_^tFj?WQeDLp4fh6LN@W726vl_QRf!P*g)|ORGUs{d(=FNqw#iX zVKA_RU_KX3KUChDsg>^U0}BDmxUR?JoY7@Zer7r`+1; z`dPV^-l&;q!i~j}K!j}zGU_XW11n~K6c>wpakr6tzs$zb(ke_TCnWtkVyJ%lvWnm9 zg(U{sH>mR3FD+ z{xDY?8FZJtz~-(aqB@E6L5di}LPMGp8#Isxd>ibtH3$#%n-+x>mf)QT-V}oT9N_C{ z&W8*q@)f2bjFr)2-rD2ja%<)I=;ie(g!kD^fAHSrVOE0c%lWL%vG;t}xSve-M+9Ag z>A1yKe&zy}lZ)Off@U18VoGa=ll+*Q&Y-;=yHO>(*rl0f-9-rQX8F9c%(^#9ACL^jLc_%=fZ^7+rq6vKf-i5=Nk+*CUCEJnB3ocXEoasfV(WxdP7Q7t*G?FFDNN5T21X{VNcwF0m4W?_(v z&YicE!WXu(gw8TK5X) zg<8QK>3a4edJox$>5(lbT05JYr|AM?QGV zV}`2qn-&H>{)5f4=k&)D>zWDq2k{o&-JH?bg9))JxTPkB@~&^CIH}i%f6$y}9J=rd z>~r1{z1Vdh_}x%X9IOB%@e$gu=^S6osT)n&k$ySQGyAf+uI@ZUWdT2DsL*gur(9OF zS*IgKer!f!&~GE!w%fS#C;kR2l&g&mT_OaJAj^D9`_>+e)csvLX^D^Fs_71vkD!cb!~bI0x~RqON4{Mu8}|-m646+V}jc5^@5jmQl8?a^gG^ zGkj+5ApH%bN6g(FV&+34GqSeM%GM0^OD!^cRr@bjP`mZ=^>?2JGa<1o%AEduBF+nY^0?F5BPAp(QVkVY3o@k8Xxz+=; zuCx_V&I;iR+TTW_w=Z1AEjxqse+0@L)su7*!QxU2ne_FMkh))p1nzg~=;q(6#gU-+ zzZ8+wex+6f#D-0yeUuu;s^un!lhT|_N)YHlsJ{UXTjPzM5WOWH)tTN#c8LA<3VL<9 z+8L%GRHaJSYCb=>Ca)8SPZQ4zFJ_`@CMEuiv4pi$wD^4GQls(H_YpP1k&gIY8X6j_ z?~_C9$B&qAlj+z5VI9@;e$9lit;yq~Xk-1n0QVK&t%JW|3tu``HXI9r`(LM>_co8Q z)NS~ToxOIlPYz(r&9|x_*LoPB^GaD>XIMvr7^Ux{n=!w! zd*`XHV>gRBde|gLsYaj#(8^utFb~V7wrrURq)kvQm~-c^=Lx#vJfU6cbgklh@IDWK zu46q*!Lp9RZ~AGro7LeSozS6UDkheu?e6nCQOGVI)~$&T?bdT-=t26Xjh!9$bopj^ z($CqFsd;s5PmYLlO^nxI09c`f(a^sQyw&Nw2y3+KF%IXw@>BSiC=U^6ZOh0x)*0#)pUWsfk#>%Q$b z>4C05BaXn21WM;QTss%h{xChEFnG*$=&(_rG?GJd7vH=GdwO4(T{ouw1VGh5-LGtB z+h5j>h)ZM`f4($X1x$7bvda`0jHF)1ojUO$XOU=9q3TRlVos?NWk}3N+W>Dgbe6h;_IZI@&aVPQ^(hj zb0rp zW}z@?m~6>nk&v)1(tC3MoYVL!^G&|MP1V)TlU$$7(8fRitQ3|&H?rm-S($Q0l40E& zpjp0SpkW@nlwaIh)dtCu9hK!KW1QaLHa?ujv2>rEA@w@oR-ZjMo^a`?r8kYhU&$A^ z4a2uHs;-0WJl&f*}1wloG;UJ|Uwy?b2kuM62F`20VL>Pug5b(L~^a7>rUrA79~avTCV1Ypb#d zI1>+wYTP57lV9L5nU|F9^7hK9WbVMf(S#lI2+(fS_DU@jpc=)#F==`ltTauM z3sHQKt#o6p?XRgr=$p9J5ANFBco_8=wzx!cqf`2ez^jQ{h6&#oogf<-}kZ<1< z#it#7t-zmtIn0RXnU}2uD4RqxHRr`B@WZdd7Z>LdI3uS5QVnV?xqgeIndu;IYm z+mi|Aw(t7_UYglS`BgLZDf014Y$9a+)lqOF-&b-hsl~5q@u-ASnn8aM*~5Y`0Izl# zA-<#}*qWlw3f@NFSI`PVK7mhEH$&nZm5r#W2LMG5`~m|z^@;pD^ckP|LPcY>NgoTt z6)yKpu}jOJt;jH61#I^_1{Nn;#ei6T# zU8e^`a{MY)r&Q%8qI*TTWNMZM+32sePt9f~9cS{Hvw=7%IjLopUk%tzv)qgq?}qm0 zO~^V37%5{G-JS|Z{_2dHpOUEKQ#i%f3oxmlO-gChy>2%+a1~#s)S675^Gg<_VGjLEH43^QR#FdO8=_w34bU(ktWltT?@Y3Hytq7hOm9C(3v{FCjIVop75il9A*T4Ydb5vbt3!ih#k2Hays2&Dwn!w*RXbMU zCzU|TouUT5`>7|rJd3so(E8mjduJAw&D}~)JL_tCeWr7QN~enKnUTWOaJEE)@kcD} zlcPeHb%9UpXJbz~Y;M-9&P_vyJtxQ>PldpiA-nA|c45zyQMZ#i2c0S_wdHzqc_F3v zJ5> zvDD7IB2!D}3*a!P+e3?&ii5corh|AjdwlC@$u5Tv9PaHzIm5^wW~Z8*wn#GY>NW(U z&!sxx8zO(e%wgphVCi1!^_YD1c%9fn9YG=7{P;OEE|LDtW#jQ?yL(eV8XY*Bdx45m zgPtvx@;r`k(EmMpAYS}y(ckd-izsN!Z_9T&Vev@k>#MRji}`LOJd`&oVKZO8if&=@ zj3Qss<4?}L><+p=yk7dLQR5EIy(&-MV)*5kJj&OkbqA3U?IS%cWG)f#N`zgov+Y1DdRXTY6}=Hu}UJha}IY+tXH9+p4)$~*b0fuKGCqGw9*%S zG}|v(8Yck}>Wz@r)18XRc24>MQU+PX+CsRiw?7}n3G&VukYp^Pk+R=BeWZx_U&11(wPKR&Ng25i^|dS*Z)LW0o?hpm+4j6 zOz)mbBh_Q>vg@=@>8pCC*cXJhZ{CtbQ}u8d^Y2mc)6Lh?SiO9#)@@pc z<*(7+^L~6vfU{lqfU^BvXP@ci%NK*H2j_+~5*4V}x!-y)CFdlh@suuaAj>!6Ip zCFYRd`XaZUJeR$ji=Y^D`k*!$#={Q%R>nxv2a67wn1huW++uszV0qVN16#7Nvw$Ubum­EhINur3SJy9Dmz zVrM7jVB-L{IN8Bg9PheZ@9~)5)8OI;_i?g1qreGwxY^?8a{7aUN?H|r8#GL=|XJdbl z%*qM|bkK^y` ze&_Z782!%W|M+L-2J3^l`#;vc^Yxvn|KsRC3otF;`S?E~{x<>`I_IF18k4oS%FtA|8{FgQFbb05=zcw2f128lGM+xv4 zm<#_0yze>u`>6BrF^XH-IGZ{$irW}En~Itm+nbm&0!;19oh^u2Ihh3nkl_D!sBq8p zQ5v-iVnS)Z!VqW-`)DKl;S&jaVI0?W-b{8GVR%QOWIwUU=VzaB23cjAel(4`>?|LP z6}t7^R>IUL8!gT5?lLV}C?78tv_BBh4X*BeD)eDEoBAB4`^_9J*iR7YI_i}PU&nlE zVqOfyU7d=TpjLR!5)=)rd`vj)H4Pmcj#?Qlz9Ek}%_LHnR7{C1@+TuPVS&_IYR_+& zs%$10w_Q&BKlNp%$5jS)(vqht5&f3p^HNGaIh7)qvbyX9R)~-b$^_Z0*YeA|Q|Hs? zyNFuIQ;DFXI`t~Lsa^P8PwWkAJ2p+~CjuQ((Yx9FL~Yfs0TWvS#Y> z`pX-zYfOXv=qK7QoCqiSg^Ofe6!TNeUuU@lU?J5v)DUJ08uJDj!##!J&zwM}^6)F_Z81QYWKu zdqS3?Zqu;T4eGV_%Y9`KYIhw7Yz-m%EY3(Vq9JMWVmJ`rf7aeJv?glY9-cCx$7LSy z6g~SQ%I)++n80a5pM#ge=Yc?U*ZK!TyYE>=YucdsSGSNV3WE)uufY_dFCSlCP|vyD zes9SnulO)HM>hSKvFCg1!Ewn-_iIl$D22|~cnfTm!E{~RXh(Wbjdf#8ZD_7Ke|afJ z@u?saxD0HCsVi%`aYpHYdZ^|{VAQGS`i{vTb=ywAi}h+a-*cueE50f+O*mLrTVz6Z z#Qe6;3lu|s$rheg@xCQ=xru+?WB2-HJz~{4-a6h$RO>nuE=Xnm0(`(@R?S&W+|4mq zHM4+!430{o-c(m0c;?BZqdLq5l!d4_V&A~m9@I7Xn2@*KP*{IfaW!vjmzK9QL7wZe z%}WPrv%P!_f!?V8uz;Yi-n_a)!Bbj;)JUDKCbKo)YoB-hsa&Q}#j3c*|1u8opqnNa zKNXsd#sm`NU)k*=y+<4iksFc-S)oXG1%%r6cS>9{>HZuH;iS4`577v{W~5-haYEeP z+09#A8Wy;Of|$1Uz&0|%u~2qFv-d67stc~UDC$`yWPJuf9|qQ6_k?$mut2taX-HO` zco*bQytwBir##UIX~18Ni;Xm_u)DYA(6kREQiL?duDg{Awuyy)#$%ib$A>(7-Xr@%08nk}YDl|q#IOh>Y*RDbEXsZ)7y z3(S`~P`)G=?6lEvscJ2-5`BSSKLd1))_h}xdd zSL&Kb`&I<=@CzOr*@QZ{^i{LyncjyRFr0BC?ctO|(&19s$yh?aPU8avWNo4MBP2%e zc9V4fzJc+DorJ3TICH9S%KT&yvex=GGwqv8eF75XvXMB$4g0C>!2zH{Ft zx$iTIzwc;gsE2D@-E@Ta#j#xnxD*duB75Q0zcJRtdl*$AvRC+f*AT}!>6~*kkXg*= zZ$6j)MDksDclbiL66J6E_3oR(V)WjO20O=I*`>jNdO63)Elj1C__M<C z?D*qqe8H?Fzrr`q-@sGE$86L;<4oJvwRS&kQHOJ5u<0))a2#bh_nHFpdVhjSJ?Jg> zJeGCfT$`kp%VKBXMFxJtO^R71xJC=3o)-tGtGF{djXt*JVuaeUu2K9Vw{2YW97wMm zfi6xnFQ`?+W--$lxt3U}ng5RPOPLG#VcCCD#a*y&I>j(T>gF={^T4D-H{4G}$3^Ko zXF|q-_T=q?U%1+iuOmKve7x7Q9=>Y@13z^HciXdzZ)G$k#u$$=9M9bqHw%Rud-!b) zvblQ$O#+=Qxla$?PR4q=$SDIS^)?QzNQ|D}@_Q<;aJ) zt1f;Q!dr2sfjn5~7R2Zpb*caJhFaXv4w4ZpdoU~sum|}aTgyPk9i8&^e24AN9*q#!`4s&i2V^qaP>;z!VS+_{piDj+fFB6Zow6dl(40cz8gmgIc-*m0eyT>&0*#Up$WI3Bp`9}*u z*C8HmY_blMt|oxhzc#Do1o|1)Glwf*8qANcT}=YdLjuoyAp=}Zmc6jlIzMl`eD!y- zHpnV-OE*em9Q?(1#Q=Xq$vwZb^mV;|$9GMBj`3=&jdaoVfdR9~y<#NA*e^WLL$DsZ zS!w@rR${-!0d~9o)JJ{;0b@e#R;)fQ#bwB0CFc&x()e3t(ZK8XcJ{UWx~%6$mfwy8 ztt1}xM^}}n%S(Z)BZ4AJKeJAC)Kbs${_=n23@`Exo7l%4wocRuoG{Nop5vCt7SdBT zS7~t@o0V^IV!7>$KAYch|14k(o3niscn#nhe_hIVuk$2|b>ky~0;OrM{oZZYWT#uU*aC&|F_7rd#gPZR8QxmNRqOw$Y$!-jAw6o2Q<~J_VxTe?u0L z(`a;~QxHvM8viD>#Kb5)^&>@?^6oZwsy-Mr)GD5p{w4TJyBkBRn(+wz8^56T>?20< z@w|(Tx&dqNAKIPQgu+MVd6L0g?}f#JE3Y=2L9cFom)A9+CBKk0&&jiS?Q_q=mTpVOtjIq5pX(sL-VJXF#zXJH8%k4&79MR5$D9a@Rd>NSw~&0~ zoTexz>b0;C?W~1fh?c5Re)%fKacRa0cv94Z;ER2poY9=UW(b4@YM%1%{)Pwj<$rZm zv*gPfDb zy=@2}^TC`L-GlE`Q-9q)sq^Q%vQsh^i9+38$=ehC_Q(VHotaQL0BjL-UC`cFy|_=_ zo^O}X&CTI)+ikJShL<65D`PoswvT%C_Zi@4rq(A^j};Kh>M7dcLwtKh8kT6*+r3iu0M{okqx2o7(Y{i9Ft<{;!2qNATz>Y$uW2LF7X8u+|vTK`k( zfnWjvb2C9V`@pnepTye9sO3oW(<>uj49%fWZ|dr%t% zA8u>*Lb{`Lsw~O;og`Rq1NI(-9DZh*Rn(0CeX(D%916_HR(@=%)1`OFe9EBp=hZ&7 zA4to^d6N9%z0}S@7{<;UV_66h*UEkA9cf3wz%>R6L zlA|o;EoP-&W-l2#^tPwj&3VdtS)N!TJKL<29d#?&Yr81RJy#z;*CU{@PY-NoOgIjB zSS|DLt)uOVQBUnz7sFncuu=6KKev+ARbsGsh8b%(ro)g(>sIWzIU5c39Q@3;F6V^| z8*&QoSQ)=x(OD*tS!oe$74|&g+8NK<6~umSHxKP2Xnm#)Ji~y884%H1#Q zXTdju8=o!~=6L2X+{mfC@MOuPUIHPFB$lrNP_rpwNyAcpZ^z$AaqwJv`g2QKtLy^|}bH z$7yL^J)}M#E zLBIVZ4+83O>Xq@);Gj4U_eZ-Xi=Y3V_tp?aKT-WHkj#%Et?-()FQ<{SyLL8yYx~xwpiM>53nyUP zluA-HI}lM`ueijuB&iOfXoY3H*J_QfDyGNQ=A4ScUS2*hTVj|GCpW6((9$lRvp%L5 zsYhB8Kee(=aE)rzY*TBKYtvg6CfS1PjVD@q4a9eleOc2f(0>b`Z~W%Yf~KVvTs=)u z*tASjSnetmWOq;lk@t;D4Exez?4!AiZp{GxaYW0YYLjOW6YH=1xogU?oQBlHo78>h zRso)ZwWpjXjU^cy%?EA4n3*-!smGqToc*`Sai_n@av7dyiNox3s!3mHM<{<1ak`Ej zD3VDeA~DiaGtkgo>>=}d9mM|j2y%q`Jam#VU94^;EiWHact0U0;cVirQL>6@G)JGF zD?pw-qt(RNFkf8kIWodxvZ5Ro+?pG#m4hc$R6KIO(h!>Zq$0)m)x0V0y0)-MOo?kn z#L_~X$nD4ADX+?j17}3~C)JPkbhj#PK7&6mu*oWlapNRK;}aBnbkPKx{+xbkI78Dv zjBmLiZ|I+DV&$tCQf8y2D?mO)M37*@C(&n|?8(YsU^0ZR*CP*dvoeWEoO`6BCU;@= z*``!ULT`&tD34;JVR3#Cj_~FXN%6|APn&OZyIs`p%9|DfS%b*TJk921OaE#fDU_@! zDd~3&>;vv@Zqc7S^&cJh()mUg29--PFRO@(`rt_fdgVbwyKj#eVOsNN*Khk)*$- z$>=g|E95r~{WiI?yJ-pyVUCazuD5P&uDc3;Wy!AhMv+t0rQ|oNtmaSgR=TXr0_JVy zI>Q3?>?`W0rRl2lUc=iLHv)l2@lm)@N*3C9mdYB9E6GM~>Xr;L4JY(pn`JI9&_9cX zLal<&Y{p=Ic2WO?m1a~TCN(rg?TJ!Th@Lb|7fGk3vL{_7w>SAKbw`OqQ`!5LSoibS z`agX)&K9^bGC`hgGG-WXIPMWj6+!> zUT%uFlWWaWcm=1ORyBEvtIo=d6Bfm;qzYI#NFIf^-UJyv8tWw+Szaa1dsMq5%`dRS zs>%IYKfIY|q$|zh)i}ENnW}kST7rAT0-*h+b=In)h9OeWSMnnS$EW%N{p>ot;ft$h z|58MH8UJU}yQLqhNa<4%RtV*8x1xG;gq)bJWU2|JEQ;bQETrs7WJa7jx?b&~v{5>% zl1y8yxG*@DuB@_CKgFX!_-5&$Y+0ao@$~21u&w=`E)8p$6;{)A9XNb1uqU;Nk_1-;(&Ks**3;TMufpd6>;eB%O&{{$~xiswN|e2^pMGu7sS@$m3V9Omf|o2+>C{W z40359HwyAuro>}KEK(e!@o0oNdkCipA@`KMn?AeV^}s)6<%{lQjpCnj?ZO!GF^gbZ zlra3=V%de40{yHF%Fun3%F?DJce7=xDB*myuc+exMU!ERozqvJkEm>wi^gXLx#Zh(WzC_7%JxTh#KzpiPLjbe*GK0b{* z#WL_)B-6(J5w%o_6rCp(wM36JT_gp=+OSL|vK(-C-Me&}3iA}TUpWAZ+L!*Od>KN2 z8A3%ur7X~JO6A9@YPH*LgT--vk&}h-r4Dw`%zZsuT_kw@Oz$SP{ zSYM4cXkZmvHVFI=L~$cDZZfVbotTy&-yKvss@n(N`l7X>WWn&jLInCWDJ%;ZsVkN} zwTa4*5oPXfD-U^`&G5BnUc7a5v&n32(0il9Z^d#NO{vm zUfGb>6__5KmA=1@!H<|P%KQwq}M%-2!Cb(=+x=I>Z2` zq`3JA*K&$|`w9icZVn4c7-A!UJ~!DXh8!&eR|lpiF8qM_&{yyHTa8margaU~A|fTm^a%8?Y3x z&mwD>KBm&EGOp6^{2BBWC=L99q-*9+lRA<{!4zeG@beV#(-?P@4Vb44Z{aKg;ITjl zNdi#;rhwiw=``uoh_ndYOpO~c5@m>l1+JhU-atcuIV+MWZVy{3b&Ms7G6U!v5CZU$ z=9pTWR-5|BY|3(DI%06tn>gW&sca9_16Tq|0n`BgG(G@-YJA!XKnk#$hL(nwssmU} zZDr|a?Pu<1aWOm^oBcr@Iy}5%5MdT!6k!r!0B;VTFt{d;I$~m8%N)X_ndVG^kTCEy zbqMc+%zc1a?kBEK&xp^^nIC+ieLr-5YgFBLFD?9sMCJ0NMX8;LhI<@*MK~gFTczggxZjhqVuSaIc@+VcT)p84fWv zOgB(2&@Mz+9&;8$Bk2k0j$bqA@6)Zfp=f|S^^p&f z50MZ01lAP-se59GT1sF;Xai!ycSB&qzPqDaKfuesEuhVxH-Oi_>q8S%69linc7V3O zAk;G19_1>c4U`?E9kkttMo4Y{wxiY_HN+CSYdf@3b`#bNARRnz9?!|YGO0> z^p?z@oLqL;TQ6%gmd7LtauW)R=}%IhGSKW~-I;#3_K<&W>vXi#OQ9{DJ&yR%PexZe zyG&m$NN8!kxhXwBM)5hOx@5*dx7cNFU~pl;8vQ(Soyfj4N*|kXP7q(Pxw>9IGnU;Y zB8X7`dgl>gVWM z)1o=bdY^DE=kPd{j5ewCjl@)gOb4U;dX5x3lUPn}a5w!CzlfG$(RaEZL z*HHO5@StH9l_Lj6g0L910Ecg!-hFS45O0x0*Ti>6`C@31mqPM*dDqEj$;O6p&)Dr6 z(TJEmaJ{`$l_S97QO0wdeQn1VW#7wj5Jr94tF-)7SbhTb*S#O?^`TY89oOjmBIi?z z@Zl5rfZ2%hi37Y!R{=Gwel(cZ)0}gCf&<%^`L`c_(vmvFeUHgEUq(Mo)KvlHUc~|? zNv`B_n_eUQr2U1>;5^|v=l>2_T1_z)Xr!5C+3VWcR(IOroSu_fSFir%9ZQ$KIuNEQ zpbhH$vL=}_RaB3Exht8pUTn+UDb<$tD(p5jS06PGg1smzEX#4c}b;VGBpAUeUsV%OhNel>PMb z!MWRE&NX3&JkgXPk1$dGn_!;bwE4CDV|*vj{(#HJJkcEryst-CXdjj|2v1sFTUO!_ zdq5pAmI3wyHz`&Dz5EWsuTefDen^a7dJBAAEly zBCBV!0{RalW_)hrerQDo2H_k=^vM2xBbTr%$79BX`ok^qQ_;Hh`PvA>SABDC?NvOMKEhxc=h@Xm?j$_d6=t;xoiaPM`h1v=K5{88J9qK}^XHE3@&r!g7c3m^} z+`ALds{B;dDhFxswURCfb2uvdrPs#9_kT&0i_14_>NPw!^U}!_!F9pBl6=icR9$n| zJ#GYHpw0c#FWcT5lH@mODf2!-(-X-GD@L83niB(MJlK*Qj8_q%-2U7J8yZnsR zjQAMs$I_T)8&N|eUtLALGorK16rY|&etM2Zb+_U=uU~)u)Sqc6_`UWW@o?+AWj{-M z(YZ-09$DpK+{)t4^1g;vTsmc(hs(-3bD=4dWNtW96b`T7A6UFN*RP2iIw$Z=tEhQ^ zdUxJ-;wIBBGN}Fg`zAb2EKV#}PE!Y| z?LyqE1Nft^1L5se;LW*Kw~$R4^_@2S6*A{0ccTPFbTwbf66l99kq3YuQc@n^qMqe+ zsLT42#wXV(DL###r^qgVGTd^-5~lL$>Zt;^O2W14k?9+hzP4U?*=tA^7c>S=NOX|Y z3LH#IFyi=Z{|$X5MR38>O*#RH`)Nc~Ip%||D-@fuo7cOvby{wED+`_}JpmfL)(;QJ zry#qQ%f28`MU~=mm#_P)Ze%D&rKk3*QW+}!3S5S8X*|8BB%nJ4zv#dAK6wvL<;SWX z)W|u16#Te!$8OH+HE$!%r=K>ro+M5R!>~dy{#+a#^lNI$%+!C31Rl;qu9mE`t8t1tl*$SIo_M>qI_3eEHL?`uhI> zIY7q0FQkTz70O4JkT4Nyg^#?^VPO%G*03Y?B0U^qr+~XBJD&$yX7d{d9B&w7E4*DH zSkP&J^bjy0*}2s+*ndR0o1PJFz5My-o0h%!qGb&%I0y?qg#}g`qP(PAQ2Q3!|9O zFsmLF)v0q#k+VP#aa-JYw|Uj;h_Zy(G;6rk8s>-yF9W49TB2#u-rsIucsu>X-2ufo`np2K@5$8mswU7O`mmLv( z#jN}zmWX?rKl0DFx|+VaNh=WZ47;WWKDRVkvnY=?E4$Lt5@@fMNS!E4vo(Y!q&_OT zQCeXC?x>iuv_9zEhq5|ZlMC&8g$J`YN_1-QT8oX&ZhrF^MtNTIo9URR&tL^{8WVLd zrTz{L<~q|OHElq5>%M8uyp z*we2dyI0@Ala@{#Q$OcyTX?kn!-&q&_AV&{lY0-z?43Du(#3Mk8iziNGj@hTbIlNq z;-oiH$J!N2mXEyF0?k~9cpg5}{0uzsNZUMPfkG!?3Tiu>{M#(g>0?he=wlBZQokbA zXQLiFO;EF=3)$?#qPeYB%nM<+S+oKY#_oX8ijL<&FSnK){4)l6dt!2UB#UI1C0YB2 z|55)%BJTa+BmXHvFeHxo{laO{QF}6KFE!@oU8;7MH7qPdL`PEy{%p<|yDc)pV(HX5 z(&7z&H1HWkVJ3A7i;fDnSR*1rVw`qcfmM*PBBNTaI+N$qug=b0zeAzXBuknN+hC5I zn*6Xd`46wk%vj<7&xm)D>_e`(`Nk|g`89Ff9rySzLSrBO6Wj-cP4t`@F%|0OaB-tn3e~${;z_ch!%ONPoI34u}u?-kFP|k!dPBjr? zu?go|>>=*bi8F?bT{6D+*~|A{u{t(uY+lWS(pF0INwJ1xIYeD9Q}6I0VCt(w$4Qal-dR&LU;f{`WCKkEZu>WT8r6&_MM z(a!M1y;`_D{b_Lxo$Xy4MV2=-dVFYwyUA-cTO!X9Z>YU-4|Q!{Bxs&v7}&T`R@Fvl zY^v{N`N z&d&0NJaRnk6}b@XF8VikE#)msQ}dhX7o$_3F`9H{<>enk7*ilF&FcAUpmLjgtX*Kqn{TL0~{etc4CEHsT~%2Gko` z^=-}(Rf`ovo@jQ+?x5;D)?iuU@Z%qyY?nwJG=7j3B|oC(BqOClM`hEjd^}{$_r1E> z-DvwRzS8I`_m-?eLzh+9QIWpOL;K9N+XmO(cVc^)R1EJQUJt*Uwhj&640wbaJS5wq z(Juu6%~1!)1l{}`mCKC)uW_el3eb+$7*kIO1YY;wtSw25Trx^)#z%lBk3+!^`JhCr zf8xWeT?$31yg>SiqS^>iK4+i;K$7vU%=#sC-7`0M=FDH1vu5DV%E}GBE0+IuRlz0b zh)nU$xkX0QnOmF{i>lI%mOsce3Q_sy!HpZ<+}T`l>u>3zSq0YhtN6PM1j*EHkPNJ2 zHmZ^&4G>ARMu|9)Qki1YNQRYKx!j~k9)J+Vl{jshrfB8j3c{{N&|@J!_#AupZ8Z6g zalGgHj>A&(MyclsbT~CJzEF^yeOqZ(UA`SYz^wkBIRibVJx_L(uM1s1D>$Ig8iS3I zrw8LRiQf-Cy8N-u9JQR;hdRL%$`7dVr+S1_scO3Vq>dZjPiuQ0a}d$f|#{KE_o{*4;lSG@+mfh%40(W&Z0ymBo_;bFE^l| z@ZEQEJ6$(DuW3z_huHPIuIcfO)Hden0asy$J~VerYkWGIZ95Uhi_{A!s-=KUiL6*k z6rovYLPn=avFcOI<`lhEV6&T2hLc;B1F)V>Qc`w`Fp-pQm^L(XX7CaA?p)?&M7H4f ztKw|-y#M+R8;gMB?giqcj7`mmPfb|`X=#O7CR0{nT3SJtiMhypN|>Q*rir%1T;rVD351CA z;L>Y+1nY84cktA>rUY*cUXCHfKv0PEl7+mH7p&)P(`>+vCAw6}VfZ_gY-hsX(`F!( zWo3I?g(L13^$2ZiF{c`g&4yOlxWL@}XE9-m4Wi@>qtac+L7esS6vsK?S;&YD*}y#_ zW;atHH|p#~?vnz!NoS!wr}hgB*}+(P`z)c!A1X{=xs>R4$K2X!H^x_tKO-+a?4vU^ zvzqbaW4q&?#EeAo(*SOmG$ZwdtRILCtt89gXjO|$ENSK@np1{Rn?GstBjY5^gfcbPp5Ie7{Gr5L+Sn0oT-WL$ zi50PyJkzX2c;-CpDe;-Yiy!aC==UJre-fb&fX8_2OEZJRL^@D{48qzcHm43if>wbv zRT`lo(_9xbl$5PSd^)JM4_&g$p0&0WVrak6=Aq!=NQYESfNt?>C;TC=3w(= zYy6%Z@`_A|qe(WBJlVMEiD|3)TRIN6&7Y@>bhXy6u5(3K{&wa3hhsAysjt656Q0*J z^VTU2m^EX4qf@W@50A4rV01fOMt!(;9$j)%Tdq$2hSiv#YfMjb8O*WTIoR`KJyFHL zoS3EkYO_SC!7;}umq^iONtS4a6>?=W7&RM4C#G<}2Ly5Eq;Je(Y21%;UhJ2<}Xzc51k> zq_mjz-;VOe8zN0>n>^&D_|mra8>bUnM#OCP7V6m2N<52D8qqwOicpHd%{=GOio_)0 zfB;vnlBg7lPzoqqdFAU<%>cHJ*QcV$R3@&IoWOJGh=31+xOGPL-CN zhC+n))kA$mEnxrWFd&$NSGvGUJU;Ue)wU!9qkH?vTd& z*!cwJv|L@U7RfFHuk*6Ig_Kl6IHyVkV$wNPES6AcBY{jrkPZmtNm7voJrxa%enVRT zR6#;XK?f8_MFN>bETo!cG6Il1UOx-i$c{yH-rVtmr1>;@;1!|D+I|fGsv*|Kf&a$& zxvD&(nWi2jm$4$=6`+)HO!N6HJr{oO8|I@>4Qy-G|pfh7*QsQ?b8Bw z^EyZ}Tl6}IRt$~CWUFCd^b=ZZFzU2g1SFZHEEcm~*DO)s#z30lvcC9n3XTmN6PrR|3cO+rb=fgesKC{GJXqVg5oJwzPaZo#M;w!hl zv8{rl-XSP7*J927F_Y)kkWM0rC15Epu~bYTOUDEq9v=M!eMAKF_e)xZ4k5voc?5iV z5{x1~9Yy{>KW=_-WM z?D%nnj-^fLEj3$YO?9&CYR+)zs+9tLs*61J4fEzVk3U{<$NrmBYpTnwX1c1z9_y%Z z6Rr^-aVm~}2bbNnrz?k&CW&5`D5OGT-qeDX-4ik6>P#&yQK=LuG8vR4Vay6-T%XZ8 zs8O(W7L#1ApyF(Sg+)A+fP5{yT;$V-^v;RE4MvF2Dsl|BtT=E0L%#j_Wv#zz9q|cH zlH`pyuN)l5s@5=D5*Zac(%JnG^0|~^VlIW;X6aRV)V0$kvS2KaM2=J7V#lhkSEj_p zD4j{C(|{zWL6c^Z4=bbwgH3}d2Bf8circ2?RKpr1_E4~NLSYH2K01zm(5quOxrbwe zGPVXeq0l67>7ym3)||$u9X`TzL*K~TP|2Kxezx4SIV9*8B}s%_xrs3h4#Kbaf|CN{ zMOj28!s`LE4tP8cTV{#_YeK4%io|Hk$kbw}Mx8OPlG=tHR#mE4Osb}VxF`KGir}T( z8k9fA+ZYys5BqPSxc@e*G&Zig!xJ&bD_neU*y?#}1CT6c!BcQob9 zsK|r^Ol?V-yFF7NQ@!*wT*jf-D_&Qad7+-`Xi_S$)vZTy|7*5N;Q=)VfsH8c(}`tB zp#54AlEsOO3wos)j+&uaCM3m|rOhC&RgFo4Yg-@9T|LAV1mnkWrZfS)nkUwi6D0C$ z-_pDzz04a#NaZT@BB;#AW?YEi$_?xuf$x0^j=U0|<1fq9Diwk$lZp}8dRY9(Dz(7h z%i^ZBTA@&<{~@dioYzEoHLA%Nkj8F9S3J*ZECiYl0x8lMaAx5vt3nn_UN}AObPRiv zvphz&GNYtulf9PmwCLiY+{;ED%12pjVJ|WK3C&$qr{OV9T#`X~I}evQHCbganH50g zD)4&RGmv#0?&Sr!s**E&niZ{^lj}8^viB+GQC>)-S5`{I`RyRDS82r38SFx#BUKew z%7;ZpQriOJcIyO#j!h6{T)Ec70 zSM8pe&;a%5H8pS{KMfEU6BcaRr4&iiW4$B0;u ziAZ`l)yTp&3UoQM(_;(U8`7aNUOI3HJ|wDtWxKP{MSA4Qlu&ghQ#d$Ctbb)!+bp3N z;oOTZ&anAzVXsn(y@^pSH<_gvpYlzO@h~jJ4PsEVU(=#!ma{=@A}o{?2FIK@m%DNc zcSe%2S5}oJ7c4+nhR+BQ3R_uefq|6D^-3{D!q$Bd45>nb?RYBk)>vac(9rMmS#>U~ zE~{K9%<(5BV>MZU9jQs<66r|N)FK^LBg6K2$9+aAB8BJ)WCLmp;kv>><2p}baE%de zr%z}xG1k>+e2?_fRk{?RJWIRnH%^LEUP7l=+I~k9K@1YatQi8V%rm@xPx3ZE0y~mu z3$+n=K|YuQZa+YJ+?4`B*#P`AEinp1d0K&yLecWiFGq}MQ+TOCA{#fI!S~T2S%jF@ zD}*7^BcR;cD$<$wJjF~t$mWI6dnq0g$ge>pdXywadDQl6*YPbJsM-!SqVkj zRL}U5W5knKYp@Y`B|@al&-WxsE69~Wb}q1GF{81B6zKATrK>L!ltzAKM~Lx#4*p=F?y??9ry@rKRP&w0S=m%(tOitC%;lr-V}+X>h)`o4Cai z@|euts6Br{RABVym*Pm6iu5X{HUSkV0Mqv&StA4Rv;9t$+ila~+cniHYJkwvTAN1e zEVzu^zZPT_`h)STHe}YY*E|)IpZM{FqGGBOY*~uqX56YEoUj;MOh(jKuhv|#YHJ7) zrB%!8JoeHKx_V)0%7xN9k8bVh-&p3Vm=m3SW69cQm*>pO3d|2gn}dc8J;Xif<#VEe zwxZOG+Jy!0yz*>ue8=2p?yN6Z`pdcX>*topUH5j+xUVxTlDsAn*@JZIT(-J%irGtC zgliG+G~m_Ty8S?|2FH^D@}IcNCXj11uq#G_i9_o=RkXMsO zg^j&{P=oxwk z{~5oNYei9I1;TX^u12^FM*v0C10~W*ORp4sjqoZJi zh>%N36(NEh0Drq00e2%z@1RIWA`j|30tg}tJ(UpAJ}vHGoU%_bmB6ts{x>rv@yn7v8(*klp719L! z1c*U0QMS+J(t!cOy-%SNc?VzvEp5e93?KY*_4sB>H{DN8MS&ED1CQ` zGi&Yjn>D$R(721d6YM&tfwyZ2k1~shr-h5q0BH}RkbAlx5Ocu{fY-O!_T!Km9{rSA zBnFodE~sMWklU$K$jFlWloB~|F$cte6t4i&*&{-sAzb-`ZbF@>$d#e)1VJ_jx4qS#<8%?76K6fgFB9OVFv6 z67h7ff<5`McY{F;$)Hi|2%%8q$=8JQ2oGHQ_it)9_bd<3!2qFPPC*~jBPtynE*?bZ5TkSG_;WykkUa-U4pYmC zVbFfXIk<#Q4aG2vIZeo!oBkSQB zru1B1hK=1|(6jzMq$gwVyftT8X=#2#uHN7E*otYTxs6eqIj<#u*Ez~Qm0`Jls3Oz8XHZ{4trrt=&82=AU1(`XiN z2;uF=EF5qJ$uH5NY~YKP`j~VgT-;>-vsj1hCycuFY1MI0yRsswm_LlAM)uALUWp}w>r zYR}F+G2xZZPrTlA|FRNSq%7anw6N*^rRXUgaWyZ*xYmtk3hZobanOiQ;@etCD4@tT zI16y?&3sWAT^l+f>>>`Ik5^OM{_(VM4%j)4L zcyI_J8G8YVm;yq4c<2OIVH)~~KLz{y@2{sqA-wGvdc-GV6!@z*Nicg*t=ChgZ!ir&`#!VP3h-Uu25kibhq5V@Hk9I8GJcvhR0>t^ zvCYbC?*>+^3g4H3|Ec&r$|p)l@V5F-TwUB3SXMnSXp*TXw3|! z#HuZfDK>3g`+}XF`NH85N%~htj~FgB#M;(PtG=U?7QS_wQpw&|TZDvkO4L_k#yad6 z6$(_;`ydkpK@9A|)_egaRQd30IypJLATt!R+l&SjWhK&}SEq-10QCk)9fBD^h*T$4 z>T87(N$6arWdQnU>&j%9Y_oZXbVgs$E{)|AVl)VuI%sHsjcfvsCp^wFkox8hsW>e{ zzyDgAgCBK8uv>5T6WIRc#juSZ{_ ze}U5JjC?VG%5#Syzv3UL-u{P;%MWa=tGVmghUEvg)Qt=i@GECeQ*?e!CZQr2vtvzT zbU{r9@z4{@i*p*DVtU#Algt5~Qv(-v&WqjBcZb@kV)icRC|JAi3XOO`At>6fq?A-f z@Gv|FY(NLk?aQzgq1>Q~mH}Y)U2t|rE|_l|3(ZJwyduu7{#?z|smVf(rJq~|MuhB9 z%#EXVM+8&h2w|(mP&X#n&9%yalab?r_$i=Q z4Z+p8An3t$8p;WziS^W8iFg41mDUp2*Ag;0b&eDX1VFS`1iM~+-N&%2IH_vNT1S^s&R$X5nOOf~y)f&5Pp}aux%YYw;Fg%M zOXiAq9=;Wx^(v|Q%`iM;NlatPsrEH)E!^CGdc*uZD~r7?YwHII526?|?&zH6b5#UV zDx$w_o)?(^U`NyTJMXO7|0(X18vUBRA0`S8B_wjdFX^N}o+aS61OgT}DMRkyYuc)n z#XJ|>&I{Rjma|%5c9?U`q=__}sq9WPBRsTBMr&QQGR1|&wB|b^aIa`=h%GHc+uuPpyouSXjDfiU&Th;IS12BqgHWl`E~`ra)G8Ao-9z zug25TW;35q>J-A9_S@^%J>-!F8oTiP@&F(xL`+VKvb>vkO9r9=*hkCdl#rCkBotx+ zNdw3!%4i@Hk|Y!^gk5~Qj3&n3^%z9y7qXHNiZfg@+D{)bcApJQRfqHtrjl#V$M-@8 zwj5`gnfu@xC}qBf(vu`Ta{PVL74KuaX}%`UBdm6COTq%ouy?=Sq&MM_KTk{4GKc=0 z#X_0!H|H1@qJhcSFH?LoJ|yGr?>)}>vE888b<9+yeiY7`I`;}<)+4(w=yPcC_CRe?Y#p-wB*J&Trh6EA?oT_ zv^#mP>!>l|Q1)CR;IkePf6s@AccP+%6E)XvdTGhoilP8UIe~PKozpe` z$~T_i6xBFPvxwjuerz>S@D~di}tRFr4_UcrI+Sd z7tQZjlvdPAd8$ITTNY$D6{fk%XA=F%W$nu1r)2*ZJ~kPg!MkwYw;k{WpDXK~+{mi^g4;>8*$c+0IZl4hBFfvR(du-Ivo+ zOn$K#bF-m7mEVKxmsXhaykDynTCF(kLQ1Cue=`*=?wVkQu@l@406}cKYQF7X{*%XW zb}QgrXe2W6E*=}<8t){HU~MsZ^R_th@+)Px=Ny&bWLI#~_`BFP zu0dyhfL%7+``TJeY8rOnJ;yH3q%E&Xu*)px(sGtvR^I*buclW%@bR9zKl;`53bZ$P zHE>#GS?+>|R;g1}%nL<@=;;;mV0Jl`WtTx6W|tWt2+#G|iU1~;j*q1mq@z+&DV--> zDJAVvzjTw7luFIM^Um(b`EkZMclFH)!T!fM=VYm`LFGO*dlBc1;sc_Hb9&8>Pg1}$ zi+3%|$(gsabMdauoSgYP$2cc1d-_DqDV1iJ&nvr~=M$upo$uGfI=yl*sx96{KkU^>VDq<**A?>D-l+=#-3Sz^CJuWZ6)b$%S{i@a;NevXf%`j9_d~P z(rGCnL=1@HDbPN_B7jGkqd!5e-bnF8=QHLaM|9p7Ec*JLF{ZPP_2SK7=@Aft`+*rT zoqk&C)B#L!oXQ3qrYJz8FqNWgX`rLo0-F?o411AGr?boK!Ov~WWYu3tvDQN92gPSI z(i}1}KVH?wrKF*ECsjtSgVT&VNh4-8oKo@I#HGCXQ?lyrd~U(A-}e+}RL&0iT8o_Y znkN=?{(4nWYRSyM`)UFfOQ=5Js|{Ej`E~M)>X6MIZOLWHP1y|McX?I=jwhshGC7toiQFSZLO|s-mv;DYP{lElzh7WLqr0;xtcL#6N1zD=2ekR)tb5 z{xY|(rYMBJqXmrw_BalZ3(9+yK5Sh;Bm(L9>xqqlY)s4LO3$}R8jUdZytNw<6?d<2 zal*iOPCj&O%)AH@VktjFScDV#%E-;#H(Y*U?@LP`SzMGPEV#9>^YP^cH5>nLdtU<9 z)^#QP7Cr3}+9k9;A%P?Wl8`_$m_>jM7-Jl;@shZ9Si}-zz$_ueiS0OcngtxYX~^Q( zahAVL+$|o4?a`+H}%%(q^XN^*{H%CxMsg^q={^zu(u- zK1RB_diS1l&pG$pbMCwE=!y3Br}wVr+}%6++p)7^jIF1Zzsh~P=bE-3MB2tyx?RrF z>mJxP`0PZo>frzW*)4-RZj@=w>gyitilDm;^HdN040Asvp(er7H$%sog_`>XTw$`4o)9B?9uThi4b&uX&4Z{0 zWH%2M>=~+YarfvPUdhj)-J?@k?VeN2wxzpAxfiYDl8e@H`kAtxLmO7#)>T&4b9Y!9_cMco?`Dq@ZSEw@aiowohiPhuJM{;ohgBSdW`rv zXw*2B1e+dtP7dnwVp30wNeL}}49plqO0SjC&y!o1tlwm2qliJLizI0Ej?v=_cl_w7 z6{PLuxgVV(CtDxezwj{X{b#uMM#72i%}B^kXU%4`bVrh!EHdk;TJfTU(6v*Jmxwe5 zd`2g|%>xJ$>3HL!-5WqS@nT}7PgPVD+#2kxI{u}VqitbV>AK=pvhhd9PZU*0@{D#j z>pC1+ZZF+fUewxH@2dccehpF>p_;%tZ+V_@ke?(~p(0brYYK%-URnmr_HUDJjm$f@ z*0aq+dDyu?k@@&brtg_3lZg(rOHa!bbM&^wg&Qy1J3mJE%P-iW$u)?D;QYVbS%Hp6 zK6u|0PZS&5Zi}t(X*30ED>to3)cMrm%F5KT-16m>d5vQ&!P=VC2J6lHDG@_Ity3CZ z1|4fw-KWmZ(dXnC)lbURQrZ!3EL^kEpxy%d_B!zXI8};Q>++;rDN%?Sg-Aw_^W^EI zM#RX(%2`qYei|jYmfCv3QXPJO;ry!_036UV9;2s(U#m$vMjs!0n;Rob$>10_|H>r+?6~~PJzS6B5bp@;9Gn9*)^PuOEb60fL zurwnUosg-5VVhegCf}SN*A!ii*5;pp><<8KHX;E2iz#=Wd|^(Sm4%$O`J}-v_4-V% z#|>(5(XwXM*ZO9J3CZjIc`bhWF1?!}xMoZ49ZQV(;F$2xpadW8r4JNu*;W78vw{9| z_YB`#-W)QD?6vH%``#@n*SMY8(q-lGguA+{MxERoSI zO(kd+eGZcB1-iwAE2*X_68$13{Y~at3Qf!yUVM>fg$z20*Iv4pdzt$^S;`%!hRDMU z_fyTUp!*g-hw<-$*_cZd6Lmz>IU-0tKu5#OT08)_HDWZg^B2Oi=q*Y~+ zJP{K(W^S)}IL!_IFxf>r3(O)rE?QlYUy4|Qm)1&JkM7)L_6_JgCD+_o_sG}TP_aQ* z=(qdUj$hR<+7h(*B4&F$-}=+y*mYK4ps!dv3N?D%RP2HczfpXXiB{ z02?y9c51G6S|xjXok{Kc5OpdM~x@9?T#-M`|Tcs1nuKFOABl;ny>xoQTk=_Tll`K zB`KHCBqKU2rq7W?_+w;OU&XiEg;VtO%U}GAwxK^#dUoN%Gcb3zx8^Dtg@F?_)cm@H_z-90VcLMpnCm-27 z0(eRho@)@EXXqxt^E%*p4)Eml?M8T-v2XFGPv~c8H}6{zKSHZJOoMUS%=;F&=4ZIZ z&HEO(=GV|_g!b^h1+IA=t~riQb|K#a)qs`zlpe!1hlOi?0<8|-@i@JO zy6u%$xa(he1@RVqZ2EE0Ca@5u8HN_kQ?;`sMYIxCuL@^w7_iYKNlO>DQjb2wX~<8^ z)G{h@jO*jNQ2TG;3@W(&^Mr`1JxS6GLGkTheIFl~@R1}5jTg2aA^(-riZ(5Ld*O{^ z>zzKc^rpRBH95I@rQDLw za#x`_{cj>8JuLn^oSAWTQYoWoIf_`MQW2Q8PN=5Iwxmu(O@(Duvb8c=CPSVM0jxRz zB*nrpysiVc!XKk&u28}>_#Y`EeIAKNAJfAcNPXA$ssFzBUwxlDclC_OxPL!&WFbYF zxp&bCJP+XC0QgZJ#XNK(Wy6e01#xQIX^DPHA~8&fF{Y1WIMociLf802k2d4x;Z%wA zhTt_*oiDuKws%AD5nX;myr%TpPG)=a?yDoz5#QQfTZ&ssom~BOXw|(2aOwf4j3_)w zQ7|H~V{J)T+^0xVOaN9MXDpn_aD%c%0%Bt(|KaaQ4fo0Yiu2>cK&7#n2Eq*(BKk7=nO-AzU-y?#z=LPLcE!d_mw|qtF$W1fmyh zMUfp3kP(p)g!=XkFqBWU?5)Nv?nNr9#lvb-lKM6q);5fG=)pGDm6>5|v zP@y2?OIacudP35U;w_Lc&c_yu0ZQ5@k$8MQ2@T3+HYXA>J@t`_@izCbWJSt--jmB+ zY}M&pVB6qy_uFrG-@_fGj?7)xbq1IxNT?QQQxOH|wBWiqLIGl*C)b^36qHyjBN%}W zgc4C82!{3<5!du)EwGKc|HZ%^PrdBD6R-?z_`~-=I#bQ0Y#z%eUnGM_<}6W4)Xw;P z1p4U>y}k(Ez-5xj>?Llsnei1I%5%GA>O)dMcN;!GgYZPPr8sE;5*8l1pz41?w!TDR z%V5p&ff8$JS*bO*#GU1eCTpsqE_UViCQ~F5)a!$xurVj%R6AqI+NzjS^g+C(*s4qo5+_uPbqh7^!HNSS}UGHQ8B4huLUx6{6=|Nf+?19$SmN83?37 zH)!BY7CD3O$WNPf{Xu5(vK26%+Wb*a10d~{+sP%!ieQaObg{yodQK^`g61N<;)3~3@@*CU-X<0plY z8cIAgdF!S6Cw`VY%@8oYB`uC$zoKHNxDII7q(3_EkBezLHRevuix{H zSZU6h9E;E7t_ayhhUQ%x3QgH`)#vd%_Q>NKw)n%`XHtcxpb4m>5x;>tlx44c#-Wm* zB~xJb4P^I8!fw7-m30mXQUW^>g%0*!MBy%hLV-kN1b{^0IR*5DkPkCAA?)^)@Hcfx zI<(=2MoYwe;warZxO{WD^XbB<-sAOI%q3P^allG1ti8FdkYPUi;{Mfbg{>oNI@Zgr z0rn44iN3G_t;Bx>4K)Xzp=wr^*2qMDQ^ZtW3#M9fC`((g?uMqC@lC~n)jM)(me-bn=k+Q1LwYx} z8lt3HVlA<6wz!ypMI$vEZEQ54F;rAISIh&$efjy$Y89I0UT+Cfc&FRxP9C~OOIB&u zYAH(VlS>ZO*N2x8hoaFnhr%$#nPrEv=c!w8Bn9I_Pa`cvqaSPookw;OTS+uuKi2Y7 z6?sSKtoPC+VJ5RVoiz;42>j4Ve95CHvBeBw_@p6D5Ekcm9_WD4x3Z(mQ*V_>*ubft z;oO#6uGu)g%I#Wd_TZ&X)aJlsjh2=TwlK7<6>`oNLqv%icU0*cTTE;5W z3XMX`QW8;9OR%}jlO3we3shUpF-Km);YbFfEoCf2k1xE)8kI(y#;T?z`W*86MWK@X zh(C%}73wYeIOwsGsGLz!;(4+wsfG|h4qq`6N~t8}FH$!V3^fi;=pKT^_5p2CXwfPb z3nQmdgO6^2^~5elJf4V=^zje=^~oof50}2neL%g1P7p&V>?$8$&b;)YJH*O5tqzpap)dBOj=tC5|wI8Yj}Ml zQ2wUL7+=ZVMt&z>;fUmNw^L&qgNfpNUt;UETqT-Y{|<9&2aF{PW|tpZsTqyN3|cuW z)lI9Ardg+X@I(wqbC zDS&%&>XtdX>jI0(+asV1NpjI>8W$fFTxvGsba!;)4RzMA<-}1YxUH;g^KvILoGbIq zK6C2^kzxH!>jISMQ@W?7z#VKEYX-xa;1;9`ywCXy>NR?kSWK9R>e;L;urkO7NlYfW zR4UO(rsXQRQVA~GO>-vwG(l%*6hag-97QUJgB2iDcx(_ohMzzlw9$Z{5Zn~rn7{Wc z&pdO2v~a(;!EsfLdQT}I7$&uo$35Sg{QV9E%F#k*gXGpQwZJp*sSRLOD7{|!0`N@% z8g>)n)toMCnqttpNM_c>Dl(k*sXgh%5OmMLXJVn(JjCR!jg@Sy$T_jmSHqs5-m1*Y zag=RZPTtFX`G))$`BmV-JD~rQfLTmLPKp@{LQC`;Ycx%w-+Ln{@mchnsQAZBt6=23 z(r9@f1>zokjP$&7?${%zxrL{hS}sX`bKx#D9xZS-1XkWbWV+*;YR8d<(p<3W&K@os>$&*d^~yDwJ7Dl}bs_(`Ms5b^V-7IW5jYKYt`s zHo!W*8@P=a99%*imUoz!Ga@UU;WCqR!Ay@P)>^M8X>Q@EmtG32>948U#yqH0>@Az* zI?1Dx^;Opn z47zUN9kQJJE+yyQB*89>k)xBlxO-r{9Q55I29+V=&nabR$!=lVk)&M8Osh37Qa8ik zZh&dGXg!xqH)J;#jTG`ykX534o+>Q!Yz{s36w|qT`}JUl%2zB4F5|vISA4RrvuCG( zH^l^rY@+x%%7XA_L)g(Zt5RiKU^FMqD%~_CIRn@UU@zcYcr8}7h*h7-_~uC;uS$_p+_yPWnYJY&!Cf!yluNpZ3r+^3IX^(?7WCt=n&Xz2)nnTMi!#GPZ?TavApqwT4><68a{I zPvX8zzC%9%lo^PXCpDyAuO-k7GZ6z2Yz{G`IEQfYnODN-b|Lwqo2&`udaY`3P39S+&T=hF^%UOJa_ z-qMHPeM=nf!xYv{AeQsr;H9I$G zHV$MNHg4t~`!Hy?J3VehUxi2svHAP>tWwUL)jXxvo&kE4)V?{Jb4s2GC$aXX*N6~x ziDzv@9HN8%#_MX_@uG0{L2*ufVr%bxc|mW&p>gDKDWPT_-Qvo*ZTI>* zU*Mk@37VPoLW1jf|cM_L=5{f_ zyK9@Ss?KTgh=Q&Idn>x?^AxgMb%ymT8k!AiJZc#o5Wk1l&Ad}K3lfOo3Lb9vO*Ul&s+C`W5iq^a1Jzq+SF=gm6EJy{TNBv?&ASGrPUtZ)?M zuDEu2eQ&K>M1O0cl#IP)bX-f4s4K86vY0H%VrFKtn3f57d5gPoAL@cFJ_W0#qysb2lj85ey+G z2#=_6%}+X8X!rAF+=q!W;7!tO*!OB$2ZE-eUgU#VLFd5+(=|0}!ocp)JIPMG2Y=U?h8U{I@~E?qfuJs#ta2aLV(=1sAlCQSC3b zOl`T%6|z4_)6<2Y*0@gY4skTyhPW?afdjTj0z@c5X6dp^rU=WwP5T`&DpKbrf|kno z-?zLIk-yNVBGni5PuM{yQAGGWXIQhfGM&)ybIX3d*zMKFnsU#ju9GfuNt;)^$keMW zYDz{Hn+3lYm*bR=FhSOn)&nE`f{#e{Mci!A3LKFIl_@;yd8G*n&(rXts4EoQAM1NZ zk4yWD)y3gb^jAat4kVgR4>i*C1h9RUe$uaTKHhRsD?V=7_%l};!3!yY;O@n~_9nM;y%R^IkD*gi4{|Ski#erqOBt5nh2Gni8%5ut z${Q?J!<`CTf@Pdb5f9u9J{~DQT^U#7cPWxES+FQyMtlRLi?n96SGU22cZr&?l5TTNJ*=>WA^E+iiHt9Q6?Z~aAM!1F$GR{OV=Bzb$I~bkfDUt8!34gArLCx#3$i_KFcfR&p>i1ZzNdwm%x0`EwfSymT ztnETsw?>kBA&$3Wi7!#MH;KldLyIYEg>Kl>+`wP+?uY!)=Lux{TGddNAzI|hd3V~q zxMuyiAxCZjjhg-C)O2tnEm)M6sbg(EV9$1+@xp{Y z42cuQsSzt-lmrXbyQ{OPlJV%InfK>*e4SK1+dK~AWe)~Ob3Kx2SG`fE&9_f%L>qkK z`VAB+c_IMXwr2RUT;Fc;sTeldoNcnk7 z0piAvsU|eL!F&;FlN9Tx@qy-fPFzM)B8VE!I4UUVh#gv2e_OLyV=t zY?g|e$T_kk;{aS2Uvpc3*4O&@CY&8&D~haq1=3sUTo@|$8S-_%jpo5e$EJ#Sp!k+F3yS$*F_Y|K>eP2~Fzxt;lFr0lqABj4M|W-9KJ6#{dF!pa zy-vZ*uh;sj`2A{I6+xW9(waHZESUI2drvoOhl!bW+xX_Lx!Xjh%rDjxLsAKSU;T!_QGYWZGTKflt`i&q>+*Reto0*Mv$ADDVHqK7|(t2u!p;5_9JO{Ns7HttL4y3 z@uY_j;uQiM8|v$4_e3K`^njSh7gpf!TMqDd($;M|gs)U}nI7_Ml>8Le((5xQ$Ko?F zKNOd9&h+>8Z(cBz27-5nMt%3wsY5M8Pt;HFPW&dRp9#8kH80<}fD-Se)v};8_Dj^I z!ngN95MBF%1Xx>!_+QEpsy>jmVtxwXoJNs=HQ^%Rb_I2#Pkf)s4p>j_ zRxWgtB}nx(QA_PR2yPkm&VE7z9b)#@?!djO8us6yc84Zynv~Vx9tOhwOQZITm24}jUU8KBVK6D+DI$n%EjsHI9+P!m!ZztQ3vg>%D zzBIK9b!&5*^J4kA+V19c`QV#SHr7G@KyZWZkq(kO=r}*U^1G`>CPQAJe6V z|Gv(BRC%3$G=sGSRrfiFQHHR_e}45sq&B%=wb4P5dE`Q_E39;Kp6N@X^Is0mxVA3Qx2pD*M2=|3$6{O zoVq0;iyU7S(iAiA=lK&7Cc+M7RH~QKSBkQ1Sbcy2-h>S$IhVFFUxNJVHv{N(Z;V&q ziFr!Ca~$;l-}R0+2&Yu=uf3`1$H|R3BK;XIX!Pm5HC(Z{EM$g}DOi6MP>3W2DFV}3 zLjDY{QG7|_8K(DW`^itL#|v(wDs+*)xfZkOJKw!9(oq`!>k6V@Uh^NFBXm0w+Nxrw zaCc|BsQ47loX8PB#4w6`s^j894+&_pWmeb24`ny<76N3XF${Pn)}NwhM@YgR5TI6QkO_=u zivNmS@#UZjA-?{SRcj}1@n-H!j5UOSSis43b1)I+MO8Z4|6KL0|1& zL60<@4_hg2e}*`!cv>7W93v9YML0zrt@-oFr)})sxoaDq5Iv<{Gd)yc4Tvy-yhon7 zQpnl3g`d8>x#&sI@>rWjpR9VX$k^6Xnb2QF)7XLq+zeZyci1g-53VtPTC7X!w+3rEQe7&h`|8~E-AQx-X^)W~9a}a$aNgV6bqJ=u0i$ka7Eq^~RW>?@F&?%n1lA&=+G5kr4DJZBHHRy+COt_Bvvt z>pdx6?=$i0586wM*l494`vMc+meO7ixkt6z!3CLrbrhLNWOtk73xsVTj$7z8epH=x z=OV!DI^=K(d``$qI(J`KzaqtrLZ>rcymdiDivHpnQ`->KCmhswp63$;Nz~Zdm5q0C zdW?r>7^+NCvVE$o3s~t1i0gU3M`^SRwlNbAU2Rk1SqW{b3tB%jx3iR#=j#hH7A|ge zvL8}-`L1tCn($N`IpM6#vb5Pi#P;V5(Xt37s-&M{Sf@G8>1|WYr=?1CZ2?|R&J`YR zZWT<#n3$MQ*Zdba!H@0fa6nJi_B~)D-2Tw~VXONn%$|zct;y}ngN^tioe~E#JL}wL z@`=$j6p<@v>uXz{PaqzyWDlc(d2t#Z-VdHo<@3A}h{iIzAH`ihx&Af;M@FGPU$$1| znn|H98N&c`SJ9o*ibs{k5OsjADi${X=RgLrrJikP0InfrbAqODUy!bGW*!vn4s$)- zcbgfQ=g&*m8=aFIWC>9u;9s2YV_X6xnCFlXZ{@4sJHgUdroroJ^$_`9xh${ub$uZD zB*ydld@?0of59k#Wc0_FpkICbO;{Tsk4NnZ49;=B5gZX%U$(}2o%5C{^_g=L@~eEs z#lyjc3ePpnwenPc6UK5xN4GN&YW@cMiTl(0?Y+w__ynL!1iJcjaGTQlx$#>b;64t3 zJL60J`tm!*9m*>(`JH4M)=!8k;mOZ;kIMB9=@!Oh7DiueqqxM=eds&5HV@O9J{V5g zie5K+Lkyx$=lus*RYt4z{|Pks7wGc8F=79~3;hdQ`iHjp=ktg7`4^M)AHpA;=syLf zzX+%QHU1A2>py${8fX5Cx%yw@%zu$j|EtHu@^9zA)&E=jXJRJS|2xzF&HZoPpD-*; ze^E;Rd0njk%;hibU%=Xbr^Ukdhp755Bes90|CiBUMAd(3|3a?*OXT=>K+gYIDwcon zZ2#+8{}4O>V~&3?VE>T_+kYCeas4y+-wFL|B6h}qM*ouuJJWv?|H9P%d%^!t@?Tz@ zfB3EcOq%^aa`{hK=pW4Kf87KBJ@7y7fImb3UH7;ApW6TI{cZDqYX9HX|J3}I#$P`F z{rjK&|Md9F;%}>e+y8C(pSFK#|I_Ag&HvW^@4o+=<^QGbe=Ic9pS=I|u>Bq2?+f`m z&A;2f#{WI`@5KIx^gkwG{(FM|PwXob`ycS_e_=yeI640(HgxUZ*w8wLOfXGzBW?@S zeq@MGFoL8AnqVlfu+yKAQQ5(<6ciN2O)y1+VCb6(gQ9767Ct3!MMSJN=j=4oT<~1L z=N`Xw70J02h3AfsfAD>Lc>e*0Zj@@PHl1WB7s;tUKvEG)4Ga`^2cNVuB?wCBup_ld zqMb^A#`8T{2Zhfm<)@qBE!GzAhVre3MKBOKR%E%{pz6;9!70$9vSs%8eJ^tJVnDj; z&57m)cDe3T(?eO@4;9K;qS_6`voG-;PFkjZZX&(}^>^@$&iKrz9QmQZcY}PdDIVLA znJ9uYcerEqu!svT*X{Bowncpx%xTFek<_eRtt9%B>+d*ORQV%rYZ+v8Z+3XR2yeQ9 zw|l$V(dnRprlLz?Zh}i0cdzN*0qG1K&refBj!&W#lbQ+h4hQ(T+wS^W^f3uW;Ys{n zawCy{b_XbXf4e^1cxVq9XQ=3W*wMdt_CELJw8d_rjHZo8I%(;;c+`?@I()CCV1=EM ztSBg!??ureci@r@(Rl%6iQ-cOOeOXO-}FT3y((P@TBs&{LfX(}@d?j7qwyOl={KYz zqnc=W{_F!S{0a@>wAUemcH+Z7N-~;C?Gu)1syUHgx$?wpdFkrqk-ydoisMuF?2Ekh ztj0faoA6M`BC3FJcH4AY>qK1$wB#nZpJu4^;g3-C`yh`86>s4*GEhq zZ3%T>hKFyb-^|k^&mM|&zJ{h;XBb`JPG^H;K7i`2h{p{F@4jmv@H#(!JhhfMHTN7V z>4U!Wc0p@coS6x{?3|&SuZsu0B!eZSyS*!F^G2I}dZC?NCw%6QJpBUq;}r$_3u7ar z$X?HG{%&i%_FRY`60LZiHr=ZI`?l^VHukA+&Y!>KB!_SFPP54kelc(e@S%2t1Tfg2czB63QPUi&WtsK5c=gu^{`Bw8B0zU{< zzFfgyxSds~mkRM~+_AO3eS4_+j&L#U#|AUbUkLHYas_kK2jZK&rwp)cyD|me#{;)V68<>=9MJyJmKBp1^xZ z4|*jsrXV$`_v7vzPJEg01)!(cY_KgaoQ7oerPNK*f$e>l_BSP243+tS#%{@kxoYim z46bq*^k?~CNeAv4uDU~&d9=oP6b@4m3w*9BqDZT$Vm%$U4M^I)6Z`2E`RdDllWw%9 zKKI0bh`UyImW5g!b##60wbr~zZ*$BqChW3Ek!wYuZ^x)FA1$oGA7k{dg{v5L4ZI)k`%&X-bf0TGAH)1ea?sGhL~vDV(yB#oc~UKMzCtQgzuXz)SF)?i_usynXlatJrl}?|C>wIW{$@ zQ!}>X z-79HXTkTj|1#mUMX*p{5oGZXrPk0)$;GJvY#yp52E1Gh!{c!Dr@-x}GekN&yabgcc zm74Ifj`*9nWGp54X3@OO?iyhChnL^BYE!W1zfFP$@AcT<_>j3nyeq~OAKbT;_A;T? zR#T%HZwoFMUqAcTlu-B^gGGvxt-KI?;E3$lN3_OF($w%6gEk>VnOsJ24Z9m}-L4Uy zeG~1M7%olxrXMTJwmI`^#(bRhI4h}Fp_{+Fbm(S2pddCWTOM2u9r%`QC(c|-$n>#K zIXvg+uf|F-x{g^fEvOyJUMYNEX8!0B#$Vkw?5=4%!#2g$-r-tb9d-z!yWWv7-ULl> zkNe2Q#QSinNk9siT-Egu1|jGoM~kB6j*2vxB)*IN$<}IS2^nhOz--LCF8O<<;WSnH)qq;Hsu1HQGwyeM$ zT0MK}p)zUi4G&jqlZAcC@VA~*pd%KcY=eAwcI4;b%HvwB4J$RT8r1mh(e07_-9q&+ zfp`$`zN@`oRikTM`oV4qufVS4w}g$=!)jDGw7n-_%^_>lVBWr5dxkv(Z;1;516X>N z7)LpaQX8AUR0&>HOJ`cgX)(bAi)A^wsRw+@91Gp!sAdmXbK?=i?IFSBe9y`TIn#(gZk8BY8x{O3HH${zTjjOhM3 zpk~R^kpC8p#;$|!)?-A8Qc1@>4wE*WQy>TGS}v6rkPk$zUD#s2auw> zjCc&#mieF+({9$RwTH=HHM_O+OkweBXxqRy*fvf>GI^C`cly%!W#2t=v1Erbcq#$8 zh--kD0`C+nO!xE-)i>k8^tp45*9<$GJ}iTPEZpbB9S!8axY;o&{gp4u+FeH1I0lCY zwdm+8@6g<_t@sY&Ayga4ns|fv&=-NtrK7OI8q#<0ozMFnCECz)>Cc&`U)EerutiUi zx!Lu7m%sWI^YYH}r#E=;saYH!2irqUep4BA9KFefmy=Cq-s_psdeVl5Mc2R9T%6Q# zMUPc6-+x4gYr1#+dMkt*8$D92va$9`mf~NHJ&@SRzP&k79HnI1gdi>Yl4;pRA9IX(Wm0i$jE7P{!F6_P8V)+Qmh>IMzOMJa6@FV?7!Xr~ zHGL;J0KVvCrQ?b)tURctbY3;riOPF+*sxuGH9g4}2lRkhsf9g6)gerxUQ z)lm%#UVA|9@NQU@Xcl(SEBhtfhCbZ(U&Ci}fOvQK_NTkqisPMsyalFxgNUFAuVSf8XIoj zUb^_8e`y`w_{@&&DZ5Y&w6DSKC<;bec#$lhXg3RA z(u_JwzkRGxSFeT&c`A6M&3eF0#$G)kYOL6OHCK5@l)4WH-26V}V{pq4Y#?}{=6L4V z`0%1!gMMH2JuB1w;IFQFkJ^O3Zz6b!ce?a3(0}9fCOBG&(G5h-=DK;J@LTLV9YSn# zni_ZDu3~~8_l2~|=N3JSnfluMt}?{O>$eg`2Wo*>pdaw6+bMQItjXke;*-+I_F^Q* zO@P0}#|azH1hsBS1v#~K@10)$%FV%qB|cPp3LNGFJBL(-IHy?N^fvE4Wx7mWSM)~v z*y%mUbi^-j{&dC72Jx7+cl}vJV&UIl(If5zYVcQSY%wn5DG_D>EfB{9%+1qO&-r~4cRNUsmEABo?XKDMa!1?~~+*gJEhm1KgMDy4s@obbU-z*=9 zY-plNr}rPPimvEl%W+PYp}YLiU45G!jc%9n>1>6~abT;deb!|!Y?`{;uIK~8d$jyDO4cz5@i zJ{Fg4sS|bm#&h*}_a5}{^x6S}EnL&zb4#!cHJBSghN&FCrExY-;nYee1Dc(OV-1EAsI0@pgzGg z_e_dAr8cwaSRzdbiZvdtB+p4y9F^%$yWEq&S?Kl$|6?kcG@>n4Fjlon)Ata)04SKz`=G&0?8aKFc`L+0(hS zR)IXGna{x~k6WO;BbiVzxy^r`yy2eW9NN9aH~ZZxzFxf1=fXp~O|Zph30V0-R$@Gl zc^AeF&*j#qmG$#pKLjJwzers%5yaW>Rkr1e=ZKx}u}!|c8DG@9op38bzKJl2$Ov%$ zQ=|zZo-F*4hL*&y;~!dHEv%BFy=Qt z`iJVW_ur-b`tH7IMW1?bhaTQQJzog!P}Uq!+GN%Rc|Cm41B|oxS81VK= z{*tq|Mo$K*#EeW;!rO_$n%kURwQ>p1ue|@zP)79|$DylsbaM~>V&(zuwnMiVZA4;k zAMmq^+e$Byp*YQFz9;SXVS^fnI-o-~95(TNlnZv?&0JZC*cQzJix=2s{+ruAe}-cr zV7`w-6Fqsn3ZWIQRNEBVih@l|-2-%!=8Ug;(iq5;*rTzYWXJ!Is`ki!x4Po#+ZJld zjzurIA%fS;;M^>EDK;G(ErPT&PZ z%;w@V9{}{On|+x%YvgDI$GeHPd4o)1)C;=e1K1rA4JIcK`Djk#=cGz}N|v=usE;xA zT%}Y{nnSNodm34|`i+n9`a|$L8RYRMZ{1yyq(>RA&Tj#{^0o;1jv+>~PLNw)6wFEB$XUrYb^I zA@dbcb%&pw(mU!5vz>gRHrfqxV0Wx;I3_KO=$`m`I_-8{Jf5RkF$Wh4oSPYAcpC{P z7tK9%Wy|S)JX+dc$KVG*;}=U)Xy)aKKjI`N_YPcPDhFQPTSCLC3+`+9?(LK&8h3ce zdfYr^(tF5euR--R7dP?_huunU)J;`NT3jgT>)&OR79(EF7q2=4>+e_J>NPMA28vOJ zPQKy#?ooG-i)MwGI%8TfjbY_${izBR-8F)Q3pdApv!t=0)wO&2wo4&vF}K35#_Um* zz)OJXVR{j$M)sw;a2R)?L@s9fSi;ddd1$vc#;3yJ{rawAt#*@Df?tLplC#iWXDq_- zIEabCYHQ_{jm|fFNPTu?=7KhTxs?jpTvuhC*N(Gkn}DNjUVoFbx2#QV(f#KGtFJ0| zl=?134E+W45R2z;{Y)MUjvrgbNwe9IFnhp zws$`@sf3DT&|j{j5^5D@uW$<&nKjVNcsOkHiq@^rjevqdw0q0(SU`{0fSlvMRyMXfsiN3GlJ}CFo0$BZeS#KAO(w~nPahf6Jt80GlT}-uNoR5 zqYB7~JoyWz0vYJ#C2{RiXTSm3=M_9NN8=5a4P9A}>N<<=gh2yuly6;aTM7xdedc2K zDOu{yjo!#bU7kXm3@t2Qgu1FlnRUbKSk^{m8(m_ z^jA2$Bv)%GB#hts0In9Zy2A|A7M8LPNHb_?AVPovmoo3O*j-fot|%&l6)MpHIpDDF z)t$9$8QNl%wG7{tKAN?MhRbOS>AaT<2UZUh>#k85*uNKS;O)m6=I5Qglzo`jM%Y*) z$z7#9CJWxaiY6lx1D7XaLH(WEoi+L;&~I#7mHru-gQX<%N94pc) z;4t|)eE1p(UOn0vI?jA$iM*wT_l9(MU6rwAy=B_ZQg5!PIXheZGrP=f9E1erVb!3kLGV!me=L-kyQ(Z+# z+uRrbbUn>&RFu0YRe{$bv z_Q?cP^{ba*kf|R5$_bGS03<_A14RqgHPH>x7Ptwx=1r5Uk+%z86$8Z>)pAR`)#aaC zC_W|KkCt&MMvdwoQIGI#;3V>Nx$E^!4>;i-pikMRsPfhUOcb1iZJi|R*o=X)FwLcGW%0~w75LQoBtMw{M)w7;O?=YKFTPod^gl(?- ze;$NWyXBTrS6YA79p6crPdW{4HG^xY$5Vi@#7rY%)jgsFSO_VH6xQ$GLb1WJ!QoLk z2P*^te*ioI+yF_T1|hT%M65JI=M3`i%$0dIreDrp`Bj1uC2(PePX1U3jQ!n_F|Spv#}7vSG?%`<_CLRKM_$gS;k zJb2Ej`SciCB+lW$0U<0{UCMebb5(6Mj3Wp@qR=6H1+pauxBQU~U|;AECX?=#1JKoI zg8%vX7ce^{2{x0=xe$mg6Azp!aX!e|1kW!CQj#7g#pJE$ra=k-xb~!-4((W+ZD!@iRH%{gTHR`$G68XkLZt|9=|@qJR(1Wxf8e} zxnsFQyW=jwwnMeUwu86-g_wnFhiL!O{^=Fo9c2l;9gG+0Pdz;7KQA-Evp#1*yupbPRe9&=9*wj8Pwj2@aE zk{)USEEx<75(^v)qAWnQ8!Ei$pi+jH`@x&>PN}5B(W~NQAI`JMv%=NP%Am2y{kv>s zQ@>S#r&C8+ySj(nL+jK@iR=AqZ4t)N@k9met;|$68Bq&)>;!Ja8UH}D`&)Cq*=~HS zmat|imz82tV!CF03kzsWh6f1gLb?9ClhQ}CHPauZg86Qr2?a;34)Fth>&h z=6Q|MV~H6M`K`WpwyBw%jg4bIErpxJ#jy0ZWl^DCqNmqMP%~^kBA(vwwD@$RsGZXw zs#Rsz!e$yJ^1^WmhYG0{>H){~s@sohVX2D35z*aMJD4WW%s!8m1R{rM5OGt2^%RfIo zC1!WXC}j0@Sn*W))jNxG$-GIj$tbq*EvTRMZxlbOc&yuPF7TLCF*bBw@P+tr78nRzmeYUW03Zp?8Zl>neHgny(rMdI zeX%~l)H?GHWUk`&IX?$jAP$VX{5i&E-D;`<-kEux#1IoXZUrgrfJ=|==QQWH+bEk8Qde&R*1WWY1QEl6pI6|BfZv`i=xmY!?_m7hKoWv z?o05MQYQj{4X}F`*duUE<(ZZRFtgw}Sr09Wd87U?Z9iv+6SA_Y$39;#(Y6*(-{#;0 zLlAFTTORB>;_}RL_1Di$KF07&)XQ5uM)3UYICT(F~3DmA1VhXavN8l)NmS1iSbl;rkPt8%hnz!@hKimn{U zttnLGuMQ;U22ayKd@Ikr)B2hGo#vG2q3zi7$e*lDG3Fh=;uoPVe8rk&el56q+$i7I zCz7upcv?xciJ{KVOr8)k?lX}A;#Y(>1fkyC;*G*%XC?Yk_o4~D@Ti6!)vTSp!=kR2 zMtcN8o($q6NhUXHbD#&8j+i6k;Ya1?afw^$`WrSM;Z4`+5p9%;vo`$2=jhHNf+LC* z;VuOj>EwzvYOw(#Jf$R$XOi;;GGBHR+I_fWW>kH+n0d~EC;s*U0DYUGI#Y2Z6xTp z*Zx;*l1z|Bs8rK$WQ@V6YkH@ehtLfq2iOsAZcBU_1kE}!u{#fARdhdX+Pky9(4j3) ziZ|Eqmi#iJ%Kkjdg%q#P_gyrhJenBEq*xh3(m^y!&%vRCSG1AGRV9!+oYt}I_fv%> zHT8<679o_S166H&)vY_C1fIXF0^xD@pRyzcZoA+YF02vM+>`D!R>_^Nyy<#+wbF^! zGz<(>*Jtrv^)jTrBLR>Jqspb)S<)`@HLo@AwKIYw6+}TV+PNzc% zLhoOhMFzCcvMxDyJWl%pCC%6mG8fG*Vwpsb`95=~CLUsDx@#I=0!`MveOMwwT|jsV zaf(2e`SW#9!0HPKg6I5@p@IW?io1Pps_O>^;@NVZ{$(T^*>0h$PQp6Jkkgf*CiFqR zhvyynJ};t$E}}(&G7q#ILR0|1WKE5OL->UX&mw(|Zb@Tid{|d-J!`>CIIo(w(4QT` z!YL1~glI|;;SEMJ)=lSfSNTy=Qr{94Boh-<6%`e=6w@XlB&TQz*sW8jsn*$x+w+|j zv@CFH>!fR?W~9ty1WH+DKM=lcb?gdQxNi9uw%e{7>H4Y@B#%|yw3;Ld4liWQN+0^w zHc}SsBps?YNYTnmMTKX5xjbxLQxIc_-z31mE96IkT5ATv1JW z6y#Xq!{p^9B~N^Tg3Ir&W%7g*^EOIKHm-&Y6o?+^>e|CRb71oTbv}x-Qh(S4;nRH# zjM5*AjW_i(g3RUqW`3uJw6^R4w*X;jXpp zn-;eVHPv4t4(Df`wjQY4XAnS0o2@Fu}# zwXuYD>Q}X|-Riq)2#ksw4f$a9=H^MBlx_`)F0$yE0iFNS(w8KEo0rptjyBJ^>$c`V}F zHYypMAyja&1wB1N@V)DAR`07&k#KP?)o|2WHkSg%j@o5py0rxA>AUV%UvlgDwQ&0o z^RD9~YebD8T!n-pwXVnZ5-!=~MulqqARj(oA-4jvzG_0CUL zd5S+Nm+AUoQOb33eyqGsJXMN5jonmy_qFkL>il)X_f!U|nru?mebR5axrs-5W~yWF z36I%E3;$kw)U;cmi_x|;>1NoQZNC#9JN?UIzx)z4i}3B;u3^k5GJG2J!(0=oCbwGr zya%yShDR5(0HX}Cu6*Fv;3l)fjqn7^A|6ls_UPgym*bNnq$qN zLSZR_WXpQtGy3eHpbabZEa4<{S=Z&O!Gy5tcl9vS;0tl*u;6e+6 zogzT}$dZCmso5}agUUBCex^;JEiR!s>p1m&g{TAi-oeody4HczY|tk?9-AOLe=XHT z@;qV%;(~;D1AMn{?&{2lhe}*4k+<-D!@W>t$T||w`N06fLu9sMjq_RWY%LmSracs&%SoZg39e&xo@ijA#ppdYkl&_r z?Fkf7lz^^k%r2%gH`a9{HK`@Ry6%JfR2^*xBR!Xim%KeT*{v_7*EVnOrYJQ^Qc@z^ z+Q$rWjqR{(yPD|S55^Bm=RDmJ@qwhnfV4?gSGilEo7F@ZBxB>J(V)Lpu!uHgY>L+k z>asu#JS&94JT-c{qvGiv*g0dL`cYg5V8+-qu z%VVo!6tiZl=6V@7@KMx318%p=J!$6_Fd(_cA!#6p3@xbhNx~nQ3^l)B8dY)~%OwDe zQYaS&?i1a$vN4ToIVF?NaJ-3j^zK@CVt$3KrsH0J;dyKL9z%^Wh0O`O>&etPmXG8a ztHZarllFBeNj!(w2H7rlfSAtsMW>J5LAD1A2w$Sxx-vqO2J4Z|)D9ixNPF=;iW?$M zHhhLRkh&l+mIl2(BUU<*27^Xag9b-YgXTCQSB;Z>z(E#m&w=Y@D7_BN-t@l3Xa(jv zf_QD_DG@L(y*jnF`@C%zrXk)o^%A-0_Ph$g5zWUG#wQrYCpO%!n-M%k$o2=e*bEUZ z^u6sO*gF;N^4rvp2^!9yTm3h41kF?A%9!Th0&h?&E1LMUgnG+2!_gj-_cqzDJ4q8U z5R2^9AJV@^HjY~FSKwR-+8cYyn40$uHq)y3y@v|5qIqin998AYl2WBx0VdxR%&1nJ zzRWzj#kK+B(~vwhrfo#!=c#0f!$p3Mg*m=rGR4C?AgYj#k2z~WB{VtO#CUU6TG}HA zNrsCaev2O?{B|WT=O9=l*X3|wu&AT^8GM~vR5YU7^~NN}a+R|vSVzb`V&SSI zw)>E`p7~_&aEYN+p`oipoJ2&UDfVkX zt*lY9>* z6eFRHW_@2IXNaLOda~#>3Rmo>S^)YTtS)w|SEE|6ckh4_jw0WO`_CtDW$IY`Pp|T> zeNuS5TM&?>wlIkFaYV5=k_3q__Aln4Tf0Bd^VWmU)MaIXBErc$q~ zO)>J08lp{^@8K=)8lQWnYXZ~ezcisJ8dcIH#f95i2c0-rVzO+x^MoB;^Dwv$1rYN5PTEuK@Gtm(3W}|v~j-KW8Ie>6tpF;{c zmU18hVE25_RlR#Bg=rt7QZt+7Aic)vfomxvRRCNz4d|vt)R`D-;}U`O7Yu_=1(+){c!C+rMn&z?UDN`2f2Pi zu6yK_CmbxqCvMO}ziYFee44{T-IG4*nUo+TLeA$4xJonU8kVdOkyY=#68Li3yGU=? zt&$AJz#FOiNt~JMP8rL>H>TKG*QZys9!9VI@#S&gSM%j=tj7f^zGy))9iY&nHbx<#h|Fp7a%wvp?PVQ@y8ZxhR4KT{jYHl?W>BSqd-s1e-* zTL-p%gW`og8F;pVl2Pq|-^<$tMTlzHPpl#mvJPV#d&QJ;_pr1KQ2El%N8>!=AugG z6Su69s#+AlbOqTl8+xFjo-?D&`S2;lqI=2DV#vv4L`6u3rIhgG;uP~RX2ohiWllqL zV|{Dpv$6g#XloUEeE3Ob=iTDC-JbH3xl}4B53Q{bpFFB8eCb5r&q#nSNpMailt|x* zm&BYB;NT?-Ud2j?A?lAFsnnxGxbq0l0OL$<1YKIvPizTETs23UKbl-pNtMWUW2M#p zNoa`5K7{65rMJ(d-4sgC)+euC$VeA^$FYH2`F>73_k=sqM+>}4a-%&jK-|$mThhtA z%}Sb@Zp*N$QiTn+#*a8JFQ*4<`+BZdZECD2YrStyQhQ2H6-`zJ-wWaHHd->4>RZxD z8RAK26J%IzY)s9sWA>%gjV2aks+pE_3g|+{CF=t=V|p`kbNa#22m_y)!38zJ98U5f zP*$8o7O6(yP~|HmX7~0P+m5TL##ed)4ayw-#5Wh0ej$`TLt3*ciYTD3qrz8OmrSVQ z#rm&$7dl%U>Fs|u3&7=7e4Ne!W=)rG={o&tizlut(mplJE*6IyQx$hr_Q-B3dh5)wEr*@aWL2qei0jhQ zn2WSAt4Yp3wKCFBUZ1l z8Nr6PBLiP|!eO35HRgnm26`e8__mE9mdI7D^`(>5}(E`PBxR6Hn zEbxonMD0$0L>t4j1S#BokJISH1XYKeh+rqx#cV7V+4LNQU!D9%6vMuRrB^YBK{)zK zZpVb2#o^WcK_zvYabjlsHzy5g;fc!E>TyDyV%5zG`uWG?Id$kFe&MgBLG)H!JWeP7 z4`c5bq)8C2X;0g>r>8M(+qP}H`)%9iyp3twwr$(CZF^_WiL>9gyRjP^l~GwwCV^7$(I09DRR>d;V0r22wF8tc}K6fEafR$vb^<^{rr%ed&;@t@!)^Q zTand3%fJpo>3D+`Yq^x}=mZ@LUkPWs%+#vvW$5&Y*(db)nRkyQ!Va*)_HJq3oeKR8 zT;r;5&R>EZOx@z**U}Xd-@d+ieKKkGH7n^gC`uzQ7ip;WGU!?}Q%V?OCl|Pra}>&f z8g&_o@XWbzI&)E|P(=Ur)9-?db~znUcns_YVuE;-`3GN!#2qqv+Vc&r0L2foU+OCs z{>^LrPoohq9irg4A|e zsZ!>KTJ(j)tNG^4?X?#}F0G01G)eOakt2>PWj#I#t$oy?jl0vlqdj$7(5-rZSn<$N z@`@kGw=IC64#<}*oO~4t$Qwxe!-5B^LdoWg!t5Gy9*bVkZIOqM$D@_0R}VO3Ejhg_ z4CN6Gu1HLz_3&C_#h}Ly?2$#iktDqkY+k;aE;dw4O{JN!B@YWi?t2`$$`@sin@PWkjKzVxuKK0;|;$F1?`LM^*wmDUMJ) z2`wt4H?xzVd&*`SsLM{-zndlB;JS_*Jkz!9>@#~1u zY6(-DgwoaargmS|74pfrF!-T=mC^p!x9#d zJ*^!TdD>JL8dNVEe3cV}wq%KsF&wjwfQq4=#YoipjK$>S`Mg|W8i^6uz}kuzo`@d2 zL6o|xlX`p?tq+rozTZ8%1T@8#vPH)nI2|IK+uL&Q><0L_RFE~e zgNbyA{v0RU6*3Xqt#{M_jza4MTyqg8)q{jB;0LsNrhK9yxK@d+Bw*1)^0RIDJMA~p z>N;EPwDXleqI+s3m21T`Qc*c40#TA1zOX$wiLABjxMoEwGTzQZRM~jNfHS)(8NcY2 z?E>-1MpFHM=zy+93nYA4VynZ~2Kp26%!eDe|FRi~#+}8z)q|6b#1}a`)pL(u;ND2@FPVV|OH4DK*0uHUQ>_=pvUlT)deJM)qMHa z5@B-bxDeI~$ql*^>TA6cC~@g1{Qc96}7@^c$G>8jxcdva-U_jU14d@VZ-QYToJhn|8^ zr;N2>p;|;nvCEJ{C8W^Pi{^SuEc!1}9&tj^i!oJ*dBk6OC1yhqEq{lx+Zp>z`N`cX z!kb*Z)GtoeRN6Aeo_;SZa$S2Tw=Lu$y;Bc@`ZSZ&S-PC-10t0gBz{iLJJ&Q0>y4&( z1L=75yQpH7u}-oBK6S!5EeX{vrsMY|H%@_4nC%J$sH7J%v$}-{<6UY)l*Z#0r)<5CS0N678j*FV6!(Yjs5USZn(g z45?OILQ5M$(J;7!ol6>_ZY@5Od~wLD!93(F5%zJ(Aji4rPz8!0qzOXQ#Cw!P2Q+%@ zrBWbPCn<^DcK%^6iK=6N>Th5<1|G)a0rTOrCc9iM5__fs=kEr>cooQ(=4Al3Tyy-R zyBEq@d!1j|kZ{Sz$pRVapuP>*x2LK8@=+ste=SmowDk&~pV+Bslpxofw@TBkP7|M#s%7_qy%bfFRa$KbT2i#q*4T?OiDsf7=HOi1^yft z*AHccthB`kw^N0LvxjGzZo4Qph zK2sFXt*&bm+o{g930BTH%Oc*?-?kNTRP3ghVP846z&XVEJd66Y%?QQOE&6`J2Y_3N zsy&@Qsl8zL(Yl4FrXZfhW}QU8erCRziTcRvdshI0Y`dB*Yk$$Paq;3Ep-2@L3hl#| z?wlY(Z!h4m6%^?KwD4HTn7FK^(G;(oahye3oBArC*IiuWf5D@KBn0u}7trtP_qxND z8q9@{IMmv#A=SXuq(nqspJwB=|m%c1Rdv0 zU(b261)a*D3=>-evnhzgZK`7%bLNp`HN=I-&j6~MuY;+E@@`$r*Dg!#4XOcSp2>1Q z*&)}1fbHzl(f|PdW#w<&&#y7}^nUeotCJp-y9xoM#3Dkq>89e?IByo>xH1R#U(m!* zyjeE!+x?NEk&h(71DcF+G~2}{O`+PDqe#${@q;@1rUQ%FMTgd%vxSveu|rh!%^HD| zRet`nSx~Yuy%vH8k`ktT(uJ6c`+`Iio?{j`>=sc=L;PnP71M?*Fi zn=)?VqS9rBU5mJv4cs4?5i2K+^p^L|r27#Fs)6~qfK&)IN~s89l;&X{Gnh8H??Y;SNgFz{8BGNAu+Aw${UfaL>{spO_=4uaF1Z~yhTeZ-M-NJ)5R*~dV zMo>mfQEfDPwUM7t;BR{RpC6)eVnUpcwtpDQX#y~x`Fq3pF-r_Z7!PbO9m)^3ny>>YosyJA&Lp#N-#Ps|LF z5rCFp^a&WqJ`&ms!QKjCGYa0EN#r*ABPt>$)w!Fy5AJeJVvXo~>4pUJp#K6&{C2n> zBk$|)r+K=I^gHi?K9G<^Iv`pJ&VOr&l;g((iA>)s4X=kdb=Vjk?{P37*=b;xN@9Vo~aYQl9!ph~H z{|t}AqIA~_#5N+B9-qNALYf{I%o^v>7b5t%b?gCwi++{)Q_S(i3raz0k7L>e~+d(^GZ# zuHclM?OhLu$jI)1ZVZ8+ySE4Ki1N|LH~(+d^ly})K8j!~1zvkyiYO(DY2PO=#K(M4 z9qkd{rgsJ}nZ86Cdtr(nZfUbWk-?VhE%0#ty+|%JA@!JVJn``7XB>&&!LJ;UJ$nFk z7mP6c5KYT04I41k9t~hsyK%RfAT6HOsZF)V`9D=_U10Ux`aDp@#_CYPZEp{F^s>(j zEe&5;>@#P`VR(FP5z70atLcM`W{javhK@hyr>A{ADJ7CYt@kihZ=BN>3o{eRhtJf0 z;6g?O3bbp&`a`ly;qS1*f$|S&8RA8)Fg!3osw>-fW+y`D`}tH4o`24C#Wc=EpMKef zoIyvOFq8|14(6|bH9dJ^I)paPV|PT?sU2Mmtk$pK>W|m5^>+FDkoWse3#0+qC-&He z^m>>2_TlU29Hxt2^q4+4&GQYHaIv#Ny^#19hsP8j&=8;Dt?}gOt|CyD#P8U3PqUYL zpX0o3Q+Va;U1@8k(K|Enmf3lyfVd`_->Z{*34!~SjO)C-?4db}KmWuZ9ZyfL?3+Vs zPJvz4`t82X5AQpcZ*k&$m9I=aWSM!bvGGpBRU9hKJM60AqKkKHR)uQv`LpgiQ`YQb zkRV=V&$A+^yVj>2ucX`>b|rUAl`~c7I)22>NA8QSzA}yRMHJF%)vqAga*hn8@?YfZ z(BSXUGS7QCgqnCH)P5UyylWJ2u7Al72Bcymn>_Y7`J$5!%agBaHXlopJ9IrHg(xxs6 zaVN;;m-CYka>fp7xeWLw{$PfR!x<=6klQlH;^21?D&F+vGInvYK|4dgz?;0+{dC&N zfBA6^((7Er&&j0$O)4JhfDyc6m*bFcKKsy&ru!TtpwyHlTz|E1mrFb!B)smv#n5*P zOtG7~#=m2P8@>zTv!9+oB&ui8#BO_B{01LrQSAOdD55Yi|Cffsf68q9rwqgY+xfpl z8~*p~e=YxuR0GR@J?8%|)xgBY#Qy(DHL$WXGx6~uz&bfQni>KSVBIq|GGKHP773rH zJ&p{w#Y6;)jqLkWL{v6LLO>uCnFR+Se?e#t3>0oq-G?sA^joK2rp`G6mO&aM8YZQVNqgr z8SREHV0`X;QS*rM#2?ZWT^vOJO{v8-Q9!P;sw5>ReAbZX;s=QL=192*9|V8>`8f40 zR-e+dpRHs^e8IeO3qHwIW^HabNXV#~hEt6AFlL{mYqikBGRFejB^+>@qSIHHJI)0W zrKrZc8g-H~OZHoJc#$D4qsRF5aOB~1OmFd~suOjC9m#*)cjw{L-u034&ngKHr8&Tg zDiHZbEIA>J+rg?n6(Gl{&i0 zb__1X^zh&rhBw{2ZwvKvw(5Z~Shpa&(jvsPh2F=;ETlsWJbcGr8%!zAl&AbsRnahf z2bqM1VSqs<-O!g5)W?E>Xj?t}Ha5maHyY~y55)rh$f-I%mF33_C zdT`)l4)g@%bU^1%v-Uy?%8fOpYrQ)w#XXP=U7MdB39Lz{G(@{?8ji__Ba;cP`h}nChOdI@$`%B z_ULWXFLKZ1jOQJx0jU;rq0G0S0WR@dax^yavac zFZ*%4HZcFJ7qv(XVmVayB*#DcmLa40F{-Nb%6(h)uy3+=NU`%9O6YeUo#0r}yHC|# z-1nvLX`n1&JXd!qVC|%pZ9njWWiT5D>UY3~l^QlS)ZF>;Hv$J}w>AjSdIAimBB$js z5Rrip^W+J028$qr%%gt##Oe|slH#wiag>=_vX`_D@V`O|E|_0RjR)(q|jPV z8>kL3Pb^u_N&bP!)(UfJpVvUn_V@9(2t`^C){4} zO%8Bmz?pg5VXE{n8Pu=(c*K8`dX&#y^|bSyvC*`1)y_vgR2s4UNxGHe+ir7m93?`oto7sCEn6>zuBk&m{)CZgu@d4LEBG9Z#3-M z&x*O(y5)C8cP43-&ofa0+LN@6p<|mo#UkNlel2_6i_N+!txBrvbgCze;`>(JxNs3; z)B8}1IOiR#&lDAU|AaqzfxtRH628o1ix0y;lDO2>!lzpr`lO5ZCAesK+OdR)-_->` zPDG(cMqsX#s`w?jMaTSy@8WdKS;chp!f&jSChV#;-RajXDmq{>VTY^s0ArUv_GQx& z$4^TFajjL=+>OcI^aX4M1;R<&S+0sz`NGAf#G!5ZZ00$wn!T56yRY|*?rj@UjOz5R zhyGr3 znl6{N!K*qn&S~+BtOAB&FATKa%UtBsvh#ZYJ3>`aAH}#tK0T{H=MG+ljuo`Z<d>*^L((}6aw%AVYLf-~&$Hygj<-YX47~h}wMk9Q$;%ofa z2^S4(i5dut${BB&_YS}+T2Y;p)F}G|^fA~#WWdw|p#47zbVF8Ev9tP3kX%B!`c5ld zze4umglD(-KI`%T>$a9a0UU2^`Z|qTiQCBRY%@L*M1f=fYeVa3WyrFA#;-7R&#p(K ziq@LAYw1_XG&p_%dW8kX$yc30DbwZ+z?h&5@-+3wG9KuAb%)4cdzj_A?Gmrc%J6V0 zUL`$ILqqc8m7r57k!=VfoXCemq+z5Jx(~Vs>}7W0%vUj$GoLU9OyoTFdF!th(~1$F zvJdOEnnnb=!BhCn{MM^B+7p^Rme&i#11vdnB(D6KH_>z4N)+d-ockBK`ooVu?p@}x zdc)KPet0H*Zw!*N@YwDLleA%mY z%T;#q+ZSwWgl&3lHek_c;+?G!t&z;_H zmvA%9#S2dJ)>OO4wTlymw1Ya|t*)T&p;@A#2$SM1{aM~Q1cqiDk#$I~~?*|k?Qb&Fr)U8-sItTWby z+{TV?#$E2g>6mJS@H$z3RI&-^E0*s@4Cdke!pBdOWwOKZKXv{knIK`_+H&O!oFi(1Z3ls#!1b)2O?{N;UO?o6}Fb+dS9Y z4>!nb5m#j;&@N1$?~T)cY(x1m?1+=!f@iZj=Og)rQR6Z93JUi+Ii2=8+Y&I!q`icN zyA9{wJ;UPXck6Ez^jPLyYBv6EV--`b`myCZ@E*Wi#;R5-bB43hoQ@K-&`uD^zINb~ zau%Qec1c(Nszc*idN=d$4?U=9#a5l%>Tf8wzZZc~*PusyCcS z)>l7}k=PjhA>%P`bb2lh))Q$%diTe});vJ#z0^f7-)?AX=c9tha)YOE%0ZkXD~EZG zJ|k<3Rv~MT_a>L>UIXLXx|@Sndv7Lfj7BKI29aPn_}rJC709obX*2wsyh>XFF9V`si!U$uW18Ngu0}$F+o|@q#{c zLk!he(Nzh6Q;@?pm=gQBfWsjh;HWum_)uk>W+lEyh_Q_bDs#>oEpzC;;I@Y`@3~di zcexR|#GGmt*k)Ie7ZWM%bxT6dUp=eSi=!;J=TN@wC+)fADH`B2?Ri|e{diK}_FXTN zeoXC&WSo0XVH-5ukfmS2gmiqTwQs{T!hw0Nvo}407CVO+F`R47ZPFyX?%04ue-D1h zLAbfTJDrr@>8*P?qxU-0*gyI&aOLYHSfd_Zy__k+Z?M(0P9}_JnI1zwOHJKEVyTAl z+?ab7jnXpCJhb>6XN=*cE^*4!Q?>*)Z1IW^{=V{yIC>`cW6AV&6M84gL`Zt4TnZbC zHXG+m-J8MHEK%_bQ!Y%%`iE?sR%bz3`Xo^OS(<45Ac$wx-&Q-d^eJ>Z6`nl0mZ@Nd zyR#7LlAWahTk0f#FhpC56}wmlu284dn6VjlT9rMN%Uh(gI7Q3OhdXJLJY9=bbON~( zjA8&H>#RmdfB~F`6TTvpchCJnz=noC9{oYvhwa9@fL-FZ4`AaN^j~&%bHX;P_?LLC zJe%!JeXSb%-oRvZL)UY{r`LAace;kT)rD@?^0z>myd1LY4Y=idbHhE6W%m&u z=RQ)=Cz8PS;QZ{Q7l~B?6tg0o{9-!3n(C zVR?7?qK9HfF0F^g^{hx#c4Fwp*R^Vh$KE!zci}?Sn8?hVxcP6-??7|(e=?k3sb=GL zwIw>~D!SIHD`MT(=FOB`?}MePm$}P}|2|p0w92byrGVD6cfDDYChzI;`&^FCiKJ_z z)An9>)ADDw$?NL}ZZx2;oz>gx>2>&{TUVRRYFU?aXrUW{^`e_xV;SOGdw(!m!>t2K z-!q>COZxP-(kEswS9MY8{W-N3_a|7J!hGvYHkXg<{Mhvm(gtw2}w~gd z8FNOis1x(sTC;J&%6EsnvT9Fut4Gel_L1s;?0OWnN_S$~T68xV>iDpY&X(& zVkIU&Nuti3>&c~#9D`03a+G3_w_@mdqCsQIk7cXdL<(ffne{=(VACkaxXb$p47Ca! zhX{;rnTa7*T^kf$hbWdfT08XZaB@vbw$a`_d_@YYK@OM7!S34vM+)bF!zxh?HjU_o z^_?}dK~ujv%qXN>#ykm=a3PO!_#q$9y{fDuKT8V1I*9@7jmURdlz(nLsIL{mS#G%* z^@BRIqBJ4YV^oIxD6m9rp?$mvEO>PsV_2Dxqf?C9_1DZ?T5B;MuSo)n+F`6Qqjqpjw$$b!y`N%(HTJN zTpo;Nz8iVbKRAvQIxuxnJhHCIgA&WS&h?HplyZvQj(QMSG{yl8qKT@A+}@yuqCZ9H z8)n@bCPRjjl%2f!s71l0R+seOe1)cyU$P!}BUCmg%Bmw(zGjF251p}m>cNWfHAkj@mp%CHFgv;HwWUTrtZoSF?z4e zFwcj=6S6QA$~#^YZlcM%xkSZ8of^bMLc<}zAi}~k07B6_>vdKK#|qvAYB=HIaqq9#6)zDjwV#nvRc!{g^CAPiMuCf;N4Tl!90gb z>&E&zE7f@VfsolgIoq~?Cm8A_o%y6ZXowpD&&*n}$SXMcnjfl(5=MGD%rh#u5z$gk??Ni4YK%(FFN$iv zV{txnv~%`tN&nXu^3}sy87u9 zUm4zxvyCJMm|GEb$SD}|0u(+ZRPyd3V|E0p-6CXy;4J56)=a7;52Y1`Jqf(3+qLXKV@QFoBRHUF< zx%W;DS@cBiPdqY$)O`sGiJfVf$O8paScwP7_e0Y#ai`Q5zDrD?v2uVXSN)jyD@KiK z5lERoRr7xGd`^=ByI7>b*%Us3iS)okNOph?4gJ|5N3r?=7jKoLdH|g9NalHNo1(V% zC%5fiYaPAZ;WGfM6nV3w!wa&jhbU-ZV?TBTV@l@1%%)YVxb~Hqxm@}VBbNpTqpm`K z&y}&18d3yd?8dq%S!F3@Ia-ql?nX41B@PIQI+puyQjC_4`FI@|mAiv2m)>7IhK{!q z6I(Vk{#6&K$@5neRg{)Ds`AyPPVLr5rWop35vfZ1xXNn#X$Lq*HD%1R^$QL#iR4;; z;`hnTe^CpYEVUk#b#OK7u$Io$Z_))jEt<%85(DD~Qz2Z6NhazyDTCMVSVxk1Mt4b} z4p~s54XC?}+qJ{3s23V|XxuD=(3A9Qp5pTwdJXuww@cbxNZ9453lYe}T3l?;vd1ANa2avR zJKX$PelD~TeW@mgraECYK-zs~#-EUp_i7sa?l5?-Z56A3vWiL<+wCIyN9N5yXGqmZ z#V~BtF=;luqamlRsKtK?Z5AgV%WsZamn3-E9w-n2*58?1bthw&LLL7xKgyr^2{y_P z?Ih@$bYi0wY8|Pok;WLt(?{&_v`Qgu9yHCq8eg;QQ1cU{z8o6evx8W20%+F#^>MwGFtjt|a44B+Tg+Cr8 z1wI+Q3vo%q%BZmksu{Z-Q7+;WJGQx{5-o2;z;z%81+7lb%V`Jp9g(i|U1A&5DU-5e8;RaNUUv?`e-C2c*_ zR#S`}0UpiACX)?rt_dYfJ%25~~Hq#pFQx;hL1 zqE#n*L+JcQNl92(XXip`=LXWSyo`dmDI4ObnwoYuS={^z27 zK1(>T+e5PIuOpX=`b=uuhEzImPpeY6m9tJKW&H4_X$=)2tk8~2SIYWi#8=M&RVzMdtWpi8m zbwuuPU_h;&V3RkTUY@UUAMpq7Z->|gGPu&=;eb$#Ua<{259H+eP>G|0Rq=Alh2o}h zA(5~oL0dluj&}vG$Ygb!NGsK{pB>emD-Qdu4iqk%W;lGYLzo1cm1vg)`_;fy(|DWh zP5YE^2|A@S>k{!5;MaBwh9}${2g3SNEC(lMrIm%Sl`w_yTf=MtPz>-<1SDB|QN`=F z*f$5@FAd4By(r@GTkOZObuACMt-Yw>ZCmW;upwIPTLV6%@$@bCr2*c#!0E=@R5rfF zq2FsS8hG!Gw{dKI3qw$AE5*R)##?znM={W?@iv-mYiUS(&0QJK&SKvM&`|_bZ@i6U z(_0#nURx;#Vm00dvSlp~Ijy<>^ANJww*>GM0(sZmmGI;(jGNh9E%zM&JVik9#@lc< zS1aRxc#?I#Al~=P5F3D}0$91Wk_YqzELirJ11HzqCGe1}_CWv})xh@#hibN~*`a@9 z)5UYM-j@SxHrv^u$+Z?qJa(&nNPtc?uyL(L4sY3N9~Yog z2`q1@6~kj)Yf-}cccp}^^)=J>P zH8ASqVObehu^m|$H?fs9FiPVcTN@X#Q8h9O<6*2(EQ_VGNi{O+;*qUUEDf;$WEO`! z)+pwNTGuFMLdEfpEsP`C?iv|2@z5IMih()+nVF&0HSB6&?HYDI&<0>)MIwx63n;KQ zZea_xB9X-tZeY~F3u<6gzyoWDD+7M5ofHGR)+A=dvT03B;MuB>O^0AFv_aJ^X~NDO zTl9JpU0cQnb!u={koT)^-ly7%cG_^TTj=UtMRb5x1zqr^PXBfah1Tq!LEigMYG~;v z#cdmW6!P_Bb#H1I>1gR=5+%;Ax1e)h7Pib6wv3zU@Pj8jDIM@kwt!zr?_`SPWXJqzs%rK0dJC!z2UkA!EZ>ZJ0MwEpVmX{G6A z@$mGO{NDRW*n2QC*7`1ey$y-M!o7#(r(!80Jw7fz9v>ZzznlL(svCa*d2VOqsOM{Y z7yRLYW-_k+r7nH z({`IK-$+`*{fU|z_)1Z?-eMr#-Mb3imGhMld;!_7=BL%{o`I`j@>$CJvk&#u*wV<- zgmdgp`;$^KmV~Ce?xXLHdE785Tf?`)tw6xnfOFuEcifF;yUx4cG4}As_%hA6(QD+P zbP~M=pZV*bL**oS4Zf;tF7UeSLy{yUO+JIw;XA%@e$6dokK{vwq-M=6Baiq)grw#3 zcKDEvp*yN^Y8v|bj)A)5L(wEGjVyDI=tIpUbB!!Bk4RIb@pu~fnhUel!8?v|W15&c zp1!(xQ>by*q|mn(_z`}Mrdpnfx54D^0eV3iH-XQ-W~-O>^BwpCj|Q>_4^MjUGqdajm3tjVjaD&_k=F za*d^$51=Wlw5-pC#YiGexrSPuPH$b{p<~h^ji0IJkkX_h&bYotQk~9NUGO1lQj{j0 zk>*5QxGC{CXwoYUR-H}{Q;aFyIDXP9jVevO#*}&LsM07?P+?$UM&X~r1XC_)7-`0F zmZVae<689EjB%wArJlXg>kK5$FjH7*zHyTzNg6cGI}Jxo@|uiErmRDnBnKK}&7qou znm=ZvW}`+pgG_(P;>m&rC;v_{1)6enEWn@zIPvGHT43+9egdMMuQy(eR$YwFN!YM^RlJQ6?( z?P^?Oqklj~`|J18@0iKa-_YM+yr6o6@Ihq<$oKxfZcw(18#Y^*LX+q26_d?)&DXF zjt2Wctryr*#o*PgY7s)F1X;RUb8E%Clh z&5iZQrS!_EV4!eVHoSgtxM-kgc#UnqHkx&#Hu~iPqe;B*u{$44Nsom5H_#gTP8R*G=sP^-={&v03ze7T2hb>G0%}vFnHPaxwWlXQ@u2ls$b}#Sd3lGJ+ zTM7@CgV2%5xTeXt7;)Sf@h<#V!-7cy&R@*jmS2G(_gLg)liB>+ia+Cz=ye$UzChz1 zzQ4r3s^M<@j)RJ_t7V)=K_4LhK)_*=)B3YO^`=zw!HqQbI4T{yN`5>lhfde*;`IMO zg;pf@$$?=B-0nfSp?-n*fJY3H^yiwCfSH|KW}B19AI_S8k zeh24<#U1tnL_Y9$k8KSc4}MI$%|h>H(}d zkUB7G0f|4R`y3%!@OLCN=)jT#y8R1x=HT1tK(KmBr}lmrZ8vJU_SOTU9wg3&7~g z>biz+hF;=KkrrY3@P#RX)nSh6`fO4Kul#0@WF-VJ$R^|yOhKs{5jIF<{ANfV zkZGY4@S;JE8W`L|trelca2hZ=(I3zsbaV*OfOz(Pu|F|2!tqwu>Yn{f*C44vRqsEp zwwO)~Z`FF$f$3a}l=i3)pi9UbqMvLuqV+s5bBJnD-5{?Dc;|Z2p||zFmYw`MAGlPT zSo%7b!}jZ3@$c#s-k`h^i`k~W66deikD3PC6Rg!-=>3)(H;;Em`+kZdzfI?$>cZGI z&Ilf}SjEUUZNZOO%-2!Ts#FP9ODDmMY_+IXTPz`71e7j9CTjx3F8d30$m!H+ zYsRdCk}zAQsJ$Xi_9X_Kg2|~U)IX~PVyX!fpd`c?S7oup5%2&w0 z0@Cb+HqUoGv7dNLK7GnW35CM3D*GIWnvAy3v6@~}W*}>YFVJJc#Tnyq2V6Cr7ky(Q zc*iEIHuof7c*wFar^B%Z^%`E*YqBut!_^0^_guz5t6ns`NutA%2C+`Ud`{x02aflo zZyPy&eA^AW?Rhz1`#7WfxS%s9zft1wE-J!ccRpdW>ZDaxRH1I}2HQO~{v;sVkv0ASzgT(*XQ%X&Dp#HrEMe!qQILM2NCuZhHlV|@!7d#PhE5!bZZ`ZH9mjHcRgVqvaBic5@6tM*UIDNt3q@-P~) zF_qX%poUnuz#bAJG#HY7MBmoq7=e0TPIZLkCS_w!%N#2+Dedn95in_| zj%-DTsWE@j43pQvc3)g{-Sjg?^!;g6Ih_y29>24HWi>^#JNY`y22v~kK(Ab`SS%O{ zDl9}y)U9SU7#==6@(dfzUIl0JO zVsp0`mvP!FFW2n0C$qq_8(+J0U&-QieCldP%B(NFM19=87F~+-!`rhA+H6RCM+6X? zy?51K4)KJez~qHn>0%LzRmTJoWa1uRx@u7fYLn?K=&P1YG2~h6@r+Y%6Zx#7OzoM0$o0Uys5lDE`NmUhG^1G9 zj=?M={Rex3RiMD_{A{oq!q$r2F|J4{2gq)>s%Gt;f^)Q3p*)a)1$q$45kbh11ek+Z zb<75i&U2vYCZ4m*M!C%DxpB~?w;pg=ADQiUbT_CADUY+&5-i203cZe!FhPu|INCHc zU{DMYsQtCSC)7Tag^G3G27i%(HTScBgE$wZt7oICS^)*Ah`md+aKNUfyU`p;Et-Xf zNM)v`Ws0#{T`^CATGd3=!XUVsib37kr*MqQP04d^Ud7q^VSELJ&FWchQ?(b3iws_b z2!7zs4f{Y$4){0&{L8m(7gMz>9QwNlH`3L1ag~bvDT^kHVD^GqkhP} zUaYbH@`u&D=z%hzNrdAMhl-hYa+R1ua>orMP8aR3HLL{c(zsSo7OA-l`TPSUq<})~BA^ z&pl9~UxDnRgCdZwczmV$`=%;mr(MEo>CYI}r3px3ArR!*gc3kP!u_6Z-%oj@3yee= zAl9b%!x-T~ZTubDIs$7_W{=QM3b!YLJgiqkCT=a)S9GST{g%kXylz3vC~kenIn-VgEk zSOK1O2TW86;%HD74E};!66+$wmY1ogxcX~XK4W+-&E{OSGzApv#zZOI->UIimoZih z3Au905Pla(2Hy;q*^(IXMV0YNmT60prcTd3*O>%i-~&afWx@A}*L=@c?_JmLpWfSE z+tNmx6TbT5))vP02QR(1KbQS(tf{1bRy)HJY^dpSq+Yo%?t7i}tZMT3F#%hpbvW2Q zG5DI;dr@Jn3BNDuV_M5=jyx6ul!9}g0~(Qb=Yse@J-CmxdeW=G5p(-V!`$pY2x&Fd zTU7ivTFd6ONv(C=Bsgl`8-kXBg<+Y`HY$xAA4g;S+*OAt{-kDiJAZh-=yp5tIEO@^>>FZUcK!8s5d&R~|4mq} zpr<_}H&hPi2ad(_MFBxJe~F@nXq@HF=aB9RHx03K#5$L;muijHOrVr&cKgzQ)x7NF zWRE>abT4cDONGWT|l>QAQ>C>kUF(~t7-1>&kuW#m%UL#wz` zua_i}4#e0nfP1>^+CD=izSnTYd=EV=G^k)BlmQvd`Hb$k&`ita8H}6@&j!B!vZ&Zk z8IDqvXBuyC_slK~&d-n5^XLz?_bKYL*y9gAy-yeu{AMBWX+dzniRf8(_2T98y997> z3j+T=gnpp8F&6?~K7`@v*7g}!@!?&FxSQxb<(|QcM`S3326Y=NKK5hB2>REL<;+N2 zjn#T55A`$VZ6wi#YUm%CbtTUhgPQyU)Y*xir&$K~1HNuQRi zcE z4JcI*cZpD1a>4}b4&Jv{lcIgktZQU7u1r6;-s>SvaXIX2cmom0H-wrnuaTdt;-=6b)Sf6FB z+dNxK+;ea-gj;CqZRzkDfq+Flr>i?lart;3kNxjl>?&82ik3%$qyV z3&Lg#knnBY;4m((n36eGVtd8sf-}#WMCVOM*dKaj^*1_jUPdKQPzcK+rs}<@Q+qvc zy;ml#&zi)4v$45V&xp2W+ML?ns7wrF#)s_!x1rYJg4y)X&st{M%z)Qi1?%HJ$5tl& z5bP7n*L@Y5GIBi`A<9P3-14L(?AGePF2!Zi?bBaacLvnazJA@;tm$e+jQmd+_>l-R! zDWv=tp%L@D|2mA3wUkm$n@jkQn;!TmP=v(O{4yt4u}wyIN2*yyA%rkP6cW|YM+!ea zK^C`!T5&!Xd#fM`KwS26f(ApMFrC|?H!Y|&j8=k3d3Fql(t76+pMv=9xLOkZaK49n zK}5Q~6}+xZl-##_v0Fo82qtnd9m<0|7U&Q;R~BfGe3q_WF4N=J8HFpcfHu|=s{SSZ>^}%HZT}=P){y*}eIN9*bd^oPGCk%Wzn0Rad z8Jc-BO}v<-+>SEr#~ThEb4v(GCI|>yz|VjbRrYQn!F znGaG{53z=Ew$8keCFs*AC{u$2MX4MSAdVD`<=_ zUWkv(8_%SkOB?LN6fN0l>@y#YvDj+H+*81V=4;~_^N^PByKq-EAFh%{aD;WPSisq= z)?jkRC_-e7nEIfV>(OYLy;fnmFLu7qgB>7dE|z+i5W0Vsk$CRZ`9q8APd>-FA=z_5 zjNI&xllWKBrT&cmajT_Nq;KkG=_L#+N^SjSwn?3~t||4X9JV*9?jgLR9}&TWM)S8; z+|aetMyff5G9>ILgf}Ivzv41%WSRMtd^5-loXL5%-jK$6V}1x)VK-{h_xV+Y@ zkV;yah9`PaF&9zd)3~eQdej(-6w<9|mu{{IIn<9}yI z82*{d{%8H4Z7j^p{|oxT_Alo@Y(W37=m!%68wbPxKtI@+IT%>}XY}L38_HXI@$uW| z+LGQHY5Z>%jWpq%>mCNTT8DUOrba{p#NdKjL*C^zeNKIszv4DX&XPTf0FTd;~qi-0@{8O6a9BzcU|YXZ~rBH6$oP}VjZt2sj}jZ z{#MuD2c6K~@1!DJwxZX=-I0t41;UezkWj{q`Ym-uUbp81KA}h!f?DD}9PajgM~H;7 zP}NaFpSM092oHz__gAW6b-m$8&CP+AAM%Ka*8#~%H5MW(*q5c1FF$*s@tjC4HvuAg zns86<&)@b>ZInZWFkk5iK|=nG`$#L7#Rbew)=sZMlFHD}e~?u=Si8$BFB2kekC0p_ z-X0O`F1GqsoudN?+C+b#2C5yKE<0ut^F=0Wb*gT6ciO9O$u|!LL~f#xRlJ_p=y@S1 z+mHCL6j@jQboV)4cJ5+Dn#iUh=_406C#m&6r(OJgaHm!$M9Ga)@;OWNEhv6-nYmdp zbxjbgXPTCQmOUDjGNL3#4dX5W9B<^A%2-j(x|`ulnX5?#Mu|mZWr51lQxkncDbJ$r z&LAlqSu%ucWWVV+Lj&AMT;@|BwMaEubLX3Wkxe@FfpO07s@C^Wi`m@Q^@@aN_|G~O;^rizK+ z2sX;Q(MS8K59~NfFKf2{?H=W4eV^uhzBwmzNlG|(zd1D&!2HgK9#{8miQ{E?pX6N3 zWRxZ0)&5eq-;|X8HPFKN(=d9SZ;NQvn0pc|(56S$fAgLZI|G7z}zpH;4%^PdRB|I6#&k8V$b_`f#) z=ji|B55@A$`PEi7+5cA<#sa`9_XpNVF-72It{g~6bc-D*bdscs$|%mwkk3gkr!ZRb zuRPS`6ksrH4j{a5;ka4fiHTF6ZuP$l7MZGN4+IJ1-_|ML#2|ILYK2Fn$z}_|hCZQx zp^hSP+&5xtKa7p$-My=a+$&ap2bR*s#V}n{O25L)!&XWcd~N;xP%Qm9+<^(3=k;`& zc(K>*wA)4FH$RLBe*WwK`*N*8yU)g)3++m;qpj3`>OdTJTSqr17YFy^(gqXE=0sET zS8W{?BW=EJQekH9@uP*;lDf(cX84Y8m5p_!pb8v#W3e6jweDKo7lzBpDGpQ)9OprqvI6iaFiGs zs<9S6UCHda_c$e^Rva2hZ4%Yx^>HpoQBB9XKGGE>N6(&-khm4-ev_2$EVa8a$0C0U z%)8RA#6&%=3i98`12yZ*+wDAPhuwo#VYXP(9&3cOdqO-Ta zmiDrh27{xc2KAAI?@2)l2Dg?=N%K$MT3M1&B$91UM*v2W%U0>iK!o>Cgdw?h*REva~s9Odp>%1A^GpG z!wxLWO>h~O^;ia(X#R<+>?b%}!;S&JC`LMyiEB4*YXW-eAz_MwXM@8p)(WVdL*Sr;g{FXR&1z0 zm6+9qKryz?gmQ76*hTVUFgYI}VCFdiVhm-tfqDVEto>kRy6HH4xigFlPK&iIy(MkV z<5I0T9;wBPSLrk&v^- zzWv*na|zRJs^Bbj12C@k?Bx8QAw-Sv1wU8qW1@%G2)R@5x>}kgem>3<2H+;?!D6mCu`dIy3_)1srILamY`%7yCcNotO6$}fVg#^mH(EN0g1UUtc;#&5^6VdQuuYN5*s&s@%jV?Go1g%3gr3q2NDIu1X?hmk%ZvM7md|>g%i^ss>%cYXs_~zhK6xcTXTQy9MI+n8?ml@%oimIt zN!s@p;jCcsPoSZpR5E0$lQg_^iVHGC#3|0U5h0V?mtw&#_UBWr?ckL9*e@N2>DE7czunGdlRBw0w?B;-yN%Ah?QM zp{UWvgdd2`rmv2in-x()HwLaLkm>ftrPbAx$gF& zk-;=05UQcxFbj&5WuaRZc!0RUPNhNr$UZtMdKrw2kjUXrXO2enkMVyJ2j55TtJK=?GpeWU z{iK>Kb~Mw5BKN2oo*Bj>tA_E}Jq)fWfJ)?Ufm>rCz<}YE`7oNteE$H`*g%aBLx_E( zS&hl!Bg7kKq^Es6FCcx988N=Y;N-7JibowY>yhiMvHBd_b*;Gjm2!CHKY^qC$<|1#^I)JG^o9yFm5# zGSmx#&&T2y>B~rylC;;6nXp~DAfu*7iyD2Dq=ebUJjbBd)ISdkC~ZeXesmz~Vd(c0 zYkYIePjE9ZJnQGd%^@G^5%kE=@WT_|5j9wzZH6n8!rD9Sd87c?A;mOuRzWp5a5Un( zMCObWBH={@8)RgBa`HH`5BK%C3>e*g#fHTXTxJsqmK3g&SR^X0!zrn;07#}v1?iE; zwlO})!(S)SXSYSsBK(QveicBOTl~Tig_9)owy6qN%16%!p92@KiQDba*|I52n*nKD8#lHXh8RGL4$yg_f4heslZSNayPA5q)#iSml67FuvQc~7y?J1 z!^atRvF0N?vY?m2r#iLJ9FZ3%b1$j(tpwUh~WEyKL+^KxQ8r#Gbg|vj}uBKPk&ZmCsSiUsF zHvm=WCRk(39c^f)Fs3LP-h(-BpzXssnjm{9UF43@{8gY_WR7BCwEZ-IDe#~~7`(G2 z{h-i}GGr4$F<~)cF=jC&eD`7zEOChaCDPjXG7iuhQJsWVcoPb79FmAua1$PJ)IxL< zS}|xbzA^OhftL>dAW0p)n3brNxRtP#gq4Vun3Ygf;iu{Wla}2<%YwUQ{Imgp_F0glZ;G8Oa7x-G+rQQ|UsOkrqH05*w8oX%$l1KYd;Aoizla*lix5GAI-<<`5`#uDWI@I1uEZY_TjME(R>7 zLZfR!s0kzii8rYITd2TT-ArJ=sJnGQJL(=I6d_S>;vGTF7xunBS_0`8^nM~#piufB z(l3Pl&W#sAr~zRw$bEaYcgL_!r8kOUT&PCUUW`sU$T#-kTIU&G|dtkhKCyFNE-U5is4t$_S@;hD#gBO5%y&!dGP`u&UX}}OPk_6NkJWLQe z)XX6S*Gaa~4$vv@a&!}%vGoqn1m7|;=ity>8!27%rFA3b#B)4QED}aZ6MQjxI6c%a zD0yf8yj(fb8n&W;cY>k87Njcv@~lJ;X+0bW#91G!h@wFknu)NOr=fIHC!7hf7^Rq` z;Sn@#!o3T~+*u!7hFR!Ipt4p%6N`A++vc~A%|a-vP8-ZUZbLoqoRV5}bhfIS+3b0_Wt zhQDQv{14esHfqYo^4RMQz)GX-w!lZu*E~SjA!x!OL=y7IHZD|V$XH+~Szvi!eb9r4 zgM~mppl?`{0uUpKMPHtK1Hge$U+ZZO>xMysQs>Tng-e${h;Q?Qw;+k3Uy6f)VI6`( ze_sa&K)>nleuBO@2XBSm?FWA#0$`tZ!#;2a8$kykzHIk2gAEt!JpX1|3_jEDWd<99 ze3MTafPI1A%?BF>eLM!u0}I0Xp}k@6RR#w_e5vjgRw+93mnU==sGR}=;Jn20FF%kc z3xXaHS9A4(rzTc$^%JW(0R-xZ0D|C*$W`%%$RI*6fx4{|994VIze4RJ&;F}Lm%w|U z!HB~A2wX5c%zL-MftYW+gMT0dp!^8$`lea+Ua1saf7kP^x?}F;PxCkMrupi3hJJDE z=>|Ij_hY|N?BNDG0`(KT@t7A}>g?H~dG<7D?3{PQ+{x_G1|b3&HefmFS8#2+S0DJS zlgHrdW70Z!=z~4L2G1Y&HaI#TH@IfGtZxqO?`cCuhFU_rLd6 zpGD>Nkw#L8uaN{tt06+SLwOM#5L2$Nw<9|zI$}@8ItIG($+xk`HjWM>JGS%VgM#_D zSGUIPhveL00J9^ySP`p8@N~0w+J5Tk|CGV>tO2ydZ>E( z{Vafsy=fx=ADSNU$IJ|G=ul|>kohtGP&p)Ti0m{bB<4Rdl((+a{!oXJt+#8_v5{+2 zbuaSMr00ZU9|-`Q_K{&Hnmx60-f8m?;R=(t^T zzID&&c)8X;O?6;f4+qnjW9}K;3_)G7h#+#_f#j@qi{07mvyItU9$fIcYDu~r+PGEo zmp2?$Ik}iAa}(TeJCiw5yb)G7<2Il?IJX;1xdw?FpCf$brn=*QOGYi_LS=I?9nN=bPf4_d!i~&(ACb5O@ktnv|31Q_(wB80 z_DuGbj{8dbO6&b<|3UM|kYQ zI>d}yTX@(ES{rUhfoGDWC8Ca0zbx`8MPNyBEfu&0_`n?|Iz`@$)k$zuq$9=NjCa&2 z2`)Kbi9k-OsXsSQMuGHS8<1xc^jLM@J4cBpG!Hsi&=zLCU>G&Ub+T*lPspja8R4el z9Aw#ic1w0UvI}q*(#fNV%aq!C%6!_=L-~=+-pu4N)1{`+J5JW;<>d1bgs@MLo_GU> z4QJ|h(cc>E?zZc&@AxB%h3@bjn4C`0mmU8UhSlC|J~@KmJf9Rkn9Ucgv;v~Y;x91o z&|c|X;sNpj(gDjaIqGFG0irz4I=+M-l9Ip^lPAL8e~`IJCJ929_Qftuqcb4GA5f2|H^K`j z=95Z~CX#p+p}*L?F&+-VA6z%u1LO@9%@$-uAiS{zy$gDY9+C~C+2gW&^Z14sD0+&& zS$yed!H@?;*_>91P47HZf`LbVx_ah_xh-7oK9%`((t)rym9+F-g? zZ8E0NQPN+$xOeU|6bMrBf~z!BoN=xpH~X%z$&M;DyNsxmaxXb*9v|Vo#f(`VykWvK z2FMbrXH8Weu*fqrHMNUEWKZ)FzWCaMsb>8Orp;#QoiuH(`VoAUaM2)Jzt(Vw`pQK# zRYuU#-V&L{#?U)L+-=wEv`Zf~wP4+tSa~YMG(iULid2u@mIz{DMtYo`m?SbLADfw1vNDQdkMS_` zGA!v{=NRnXF2oR^w6;?#I2vrdpw%kSwMeV0G-DBzJpTYxN)?oczVunid$!NmCReMq ztqa4tdakoQ;&=9#76;k^Nl3}a3fipqDei6_2%&8}MV(xEIPK!48QcO3&pd)j^^wd4 zb2rH2Tn3`7eIe9bIx1fWT<{eMHWzq=V(xt`r;v--TX&-E)@!)zB#F9M{Ye*07C9FL zleh0A$;|EDmnD)61zZ(uHlqh4OLc9p^Ifj4-f@m`5)8fP!{4nwI1h1G_Vt{4?goGO zqQ@%^Z=85#x*2!17FW+ckA8oV<3cEw2F|-66Yn6!QDt70A41=`luBE$O!*n4tDBP> zy!-asH7-uh?9Sh?_ss{|7=fdI#5h;%LFvzIm5z~b^?fGb6^FKA9p8#Ca_GJUu=Qke z_7NTnva8wL7UIyKGI0E&Bb&Ho;BXbK)2dmRq$GLFex}eWl!>=H*n&85Af>N0pZFo# zhL+S)VMUUw5rJ8JNr?xSi+pA@gg}TN%ldN06qK#3f-2vsi|79)tiNC^yFn1Qd9j5ixX%W?Hn-NU z&&@f3J5o=eRYNH^m~zwol~PX9+Hk!+ME~kgnAzoYXra^@zhe-eSCbk&4ADm)E{zJB#A^Z5pB>x(@J~)4DeMoW4QS5T80R$|!80o^l!9 znzfIO)IEKngWO#UM9)b^wxwER3*!DLn5p2wnEYiv@!clBaPI!pQGt&zKY(VAoP zndKzFBP|{Gw?;1NOFkvp_X>Pgqc7@r)$`uroNFF|O#j{eR+=}r?o%-$QC_jJQ4tuatnmltBnBdl6ktVkXQmcF zv0!dMEhRrSGVx~^yyePX(8*_ML;&?FFIsv*n(^`yO*ZQi_p&M+M_$584EyK6KSx+74IVz;cs+)_CLx$M(ieBE*kBTyoJDNb~4Wm zHZI4E{$ay)%@qBrMKekE*r}Aeb$Oho$yCkUt?m$woI`FnXVfO4w2K|wc7H1=rNd+c zb#Kf5RmF`Bq^5qC0AR4{T<4=>7vUADIx4j`J#ea;Q7T_cq%|^BHP(0Q+DQ$JSH;!) z@2IE$BPOcF1A5d=q*Eg;QRVW6fE=G4c{4IOuV9gdRKzrmiQS#)y}+o9u=Fn;5F1$q z!w8w%t7Swf3kYBls7uE$_FjN6Q`@1=KA)<8GI)IEkliRgbt>vw_PnOfxYHfo7`Vi` zX7eUpatr_bWZdStb+7RFZ4RrolR;N z`pYb*Y&&Wbg_;azO*t`h8){00=IkubEK(=k(&A+Ky!oq?kq;DWm0wA&_^F2_FmHMJ zvNn^bUK2d2_yW!oDu73urDEj*1CDVX(CUgOJ%(1~Fc<7(D;Plq+xjo6BJELnGzNj{ z*fU|M&*%bz2e$!6>fIj2P;QIQrHbJ`YWoNuo&AZ$HVye>N#!AuP1z_qexAiR2a_q> z<|O};BbW}XI~qE3KLNcdi<~HPJ%Y?+Q<>EmZ3F!Qi_czf+7oYi6<6c!aafV>@W<1v z5A5*dPHlas<{YnTBh|kc4|n2s=A{IxNfHBiF8<6@Go(gE z`Nge*9TDVDPhrOwgfMI1Ga)A)Ew;4aRVq}{1Q1uMKT8aDMU5*$5#}X4IfebY5%0dT z*0`V>Hnx33(GXdwH&2l)gDXHsedQZ@1;HG)_FD6XbwgH8~V1$vKe3o<8ZG99(id(JDMn5Tp<%OsxZQ zX`n^YSf<4?2NOadAq2GVYlI3gsr9DJW}3Y5pn=vqn?-stU8skL(akkF9FThcNM0$f z1zKKizDzk|C+#PcCX;GQUOdU&CZF4wK*uY{D?SxfE>o|Uu8eM=)NLZ%A(Gj!!qKp+ zx5%NC8ZkrZXVENtG{dshPI7|Td@I)Ptoc%)(|m0gP_+`-O*){t}!yDwj{{ThQ=7{u$^-1$l$CYi8#n}KtYIX z@kUveTt`-!WO;_isDwiL$d^})Kg(MdQfIpj_QBtgg17GNX?!PO`*1 zj)%zgmBM+E^SIt;k(zk{7<{l~=g4*3Ulh9hDD6K-9-H#^*W0+H4TBv+mJN3TEaRDCg1 zMhnNm5}V2@udgUGk$X#v$yW=w!?FgKLI`Qk?+`2dN*oK?U6{o={P%mWdH2w0LabJq>h(pGONs~E?&qQ z_5!RMw$(eR28;s-wXWMU($pU^cpNRP#!@cey5t@qZJjgftLW3tsixRY*%}0P)6|Ei zmSkccX2%|TA90Mt86*53!L=5jzB^BsX>$TV+`*4VYuZ~N=5mBH+f{r#Uqq}uV1?AEL}?V7+`c3KQfT2g}Uzckw5xUO6YS5SAzNX zkFD1BkF({g9IoleprdE-TU_=kVeOKE+tgk=zzR_M+#Nfwr>lX4W1zibaPrjY=7>$h z=)matq1)4mFiaVpGi7=O$@GolGGdv(s?C!xS}1{MeMdcwvjFj+^K9KsCs{F?vTHJA znsEccB=vUXWOgw!Y+Uj|dBH|f>V3xUld{!Tnq^b?)D@Sm@N*!tp@BqiffX%Pp)z9z z5xbm4V6a=JD>Bg7w54~~wU30?i%>dMKw`3!k+YT{mp@^hECs+7!=Q9rAL(=49UPmX z=$K4nNrocW>2m7aKa+5ld>M{$YgS`O9<=!|*A$U1Ur%QbOqzjFKI)-1!lZ)t;=vmR~O6xVFgx2+aeqmss6T#+YuS z!dNZ4m%RGIdTb6jQQB88-OSFoz*`@i!r0%6#3_#%jM-4qH|;$?p7L_MEladbm7>oz zJ>}*{<5kMV$TZH`XGdo7i7=f}!re&5+d!R2!i7O?uLWKv^!mgk{mSC>82oUQrD3D^ zTj}E8mZo9Gvvff{^25sB#qxYnqhR+)jpwe{?8iK-A-eFH23t2;or4aS6(;XVr)Y~6 z=AkM}AIS*U^0rIY>}w8sN+>_GowN2aP0cCu^{2=V!&+nIcON&lwJ5LT@(gYoZRY44 zXP3$VF*q6;X&u`q%q;o8jpSCNvNcsfH@n&cR4ot;IT;n_`s>wd(=Xw=40J8jQ+Y|z zbaW19+*qNcvY$L;_lgm98A^{0 z@B*zHWE2Y9(;igxSbSENK5thZ3Ko6-+3-aHRMiCB=KGuC3wbcag0KvlqdJ-68l}l& zye=7A1SK0=+vNsbaBD_MxH?-|(=YbQa?Yc4^e|H}DcV@?GAvoqzM#8% zx*<>Dk_F$r_l8C`mB55Ycs$=`9A%v3Wg(v&!~C<}ND+A49r{}(x_Fesj*~KQb>TbU zn`U6;JIoLQrh`da%qE0z=X7>;q zZ|*=a#*5j|Eu>qDuvssy;i%xSX}OyU4ZIjG9d@JiPESeF9G04LIr=;m2{7U2e>9cV)r^PJSt-6Aq~tcrW)c2jUAWq;1`r7v5EI0NP)SFb z3yQfWrrB}sV3w9{GfR`234#t0`nGxUGFo}8;uQ#yA&A~eh7*7%(&hjU+&YzO40sveZz$H;tS52zfYeAfFv8$DYB7cMtc?A zjuhRB_eYLc;vBiG%uipRvNT+K5_s))uAmynf0Z3Si~qXaW{?0Z02U z_lIN_blCO!Ce zKJ{!)&6+=k^_EPy3-?1n%qV+`*w7w5yaWHz?*WhyxcO5;$LwBv2A=W_2WH)YsoyVm zu^}o}YgVl=uF!^HqEkJSk@0B;tZ=lv{TlRJx0@jF`=r{T>e(kT_UzZSY-f&#VGwz>PljCP6zyZ z(L$xdPU$4W&As#cJMdgVzJ(uv2nYAdzC^)MS zYf_~}zxzeYx!Fdgm4t}R&6#j2QffbtzYiII4}=nB%$uhwGY1rEJq2EKqa{E z7rn8Gh)i-b{8KXE`a?-K!9KC*TKSsNTuHQ}?^r1D;c)y}<_><(-`81NVKB@fJ7-z8 z@e9{sEZ@tFf%+PKDHh@QcQnyhLw^&s>&DSsi*E{FghfMx=Rx;BO5E+%tqnIwOI#3p zbEs=xXGmZKDN^ZeI?=T)NlMnZpNw1=;OW5y102#2wJC z3jh2U`yXp`2ymrE1_>({#v`5&O>9m4?0LE@@iU?)4Gjvrq*m$|^vw`^=YS?w3*&}^gkZvA zt_zO3#RFC1PI~vA#LoA(#oUMJIj&IHw`j&cGrIS^@mG10Z;vmnt^)hNct_^In|>&H zF;P4{*pKc=843-n*c|al(^GaZ@ljMZWXZBYy21rioMDkHDB3SIB#w=l(ZI{; zIK$PZ(#Y)Os9@UjFklFAv=M@j6FOX@;&g;vcOZUjgLa^Ev7_NSJP$W9d?7RtcKih@ za-6$3l7}D{(1rRjJb0$s(G;b-(Eh+$V5e)oDvxYyl7B`4Tbfg0Adi}$;H-lxgPQPu zNbvB=zlIUQ)zPm?YTJnl+w}`ozOD^5=!OiZRD-XDkg=N>B5AEOEX~$&D zBFpdk(LdD?TQ>UEiy^xmK&clv9khpFvbAve*TnubMB)V40EH$eElS3FE7dhV3n443 zxBl5(1*+xA+H4}MD>CH%M@XT7?EqiWL~Km2E61wgs6!oDj$&Ujl8(=cMgNoz?^6Nh zwfTZK#;79=|ATFK)~pamU&lpLC#3#soc19r`*){nH82l^9U&`iIkJZhlV(H(x^9gq z$mn96yRplNgRX=JXaWuzw7T69N%g{OZn0x->wY@&=N7?w+%GJKIKuck0}8us7r0I0 z=2D4Cn`f-V#crH6W-d?sKJ0|*RU;Vq4TN^P44$oR0)GjPl;rO?Lpm;;E9RP!eu-pT zhxLf$=9KW}VH+6P-w--X1UWl50Sq?O6KfX_9U#71= zP#i8-$$t>5Mn#X$;!uA>pEkA*MjS65P>HvwcYaoq+TPX|r`yfuZWk-7u`U{cdEqD2 z{^aY>omY?lFt2hUk}V=3bAgg>UZN$3mJWxJx9&TZwuxYj4lkH&lg#G$mQY5-cWGsD z?qSu8Du?-nenXP7PtI*@D5Mu#53&$J(=@fo!JApRW0q>*om$lKnu7BZJX~1KhE|Kf zZ!E=`+goR}k+(h=HsDlbcvD$=%wR8$F^N(OjIXtc8#ZuOIPP+Vi`Zvv9Ag z@X51=$WxU&`D5!FHsY2K6_j{|HuPK7((Fys{xXYD?w?`(uKh^CvfD+mKKt2Omi{r$ z9ulY&ss!c!!#*1n+#;ZAL1L?)UN?J6(du5f$MR2sgd7H^eJC|YBN7)#^!u%RW z1VRYg-o?*4^R${X>18&;#S7dbZyr~TQ{Qr{hlKiX=yh5A+R(pG#+@#wRT@U~Rrh+b z8}kPGmwM}RQD-WcZUBY#{pAP(UqWwnc;88pbRgpajF~08Kjj`0Et}iQ^gIeM9@gL4 zH%~#ogC1^0NaqzoVXP8QqMIh(rU=+)r^W_7k`vDQ4I^76j}1M!kGe`;W}A8PRIvhC zm5WThnbEf=KlFp$JDPq^1T+}U$)(p7UOUf@rf2iRGULg;$u*4oY>|q~yScqLJYPB9 ziT8xBXnC4(^6Or<+cns1xB`%8E;d-7-^v`uny04U*89V~rB4MAcN5&2lGpZchL{Nn zvIrKPCrl%6`n##S)i`^udrWZe@I(7!cRjjz=8W*Qmlvb21k{jTcsbo7EetR5NJiss zf5CC>*5L8bk-Bqdh);ObY+tf7oYHrGwmbIHP8vur$eyVCIk)bV-wO3O_v&+(S= zGpa^B>fSN`THQjlzs;pK$IS_Y&dT+UnUA%H+^cifdcN{&{>3C{Z?knAQ??mh$)@XmZFn<` z2l5S^Ay0d=Ax?&`O&3F{+B;Acm9E!GR79pO(X zR)%W?Zz71d)#DJ}XV-xTeFG^9cS;|nniYj!kzq1puEeKO)QjzNYF_yBL@VtSDHGtQ`yh>-aG z!N|=jJ;6RHw*0)wkm8%-doe2jL63e6zXXY?wW)%9!$)S#LTa0@_hKRDUX5Z6LK}UP z2=?UtE_KE+DK3ynu=_)fInJ#CC&b;aK$tk`70VBF`!pogfo(lnQ- zOJNltlBEQ zc4gBgwUHYo)%4`)`c)m|!X=u%bQZ3qY9{R}IR7r;MrB;84AQ}@AZl~=RUI}Sm}uR~ zjNm>LoNQcgA{cOPAJ^LqMDY`3&AFu&(<86+4X z`)-^;wtvo^7^QP2@x^K!H6oQAt2%BP(Pp8S zGICkQX3MMFK-z}&vBg^KBlT+WTN^^wHbdocJClAAOfBN@-C9o?X3F{95jE_j974?~ z9(5noKn|?IBB{wTHZXWfBX=T0rPV6BLQMJO75qgX+uk7pQa{K>cxGP#%=ge&=~gE) zl$aZIHuVh>_SF&}M(Z7tR^(c{aUJXwOrgq+t>Z_lHY6)+Qg;;Trp(!P>KwV7TY?-K zwo6*3Hdk59K}fc?Ou01lHm>TTb=`Oe@WCKu#Tum@8d-5hUO^W|7WM48@IFvPZuAj2 z_BG%$>`LuOG8|}aR=C(=pbQ=7XlWXb?4m@h3GBL0PU@gUJYJfiyzUt|eZL@%rg57z zUYfvCg}<`sUZlYfiU69PQIPia;}t^2+}KX&P!n@NUuv4=>uTIm8jy0&67j_tE=8g~ zsmLT0B7{?Ak)T{kfWkI84&m9*pL&j1gAuTbeBcC0-g@8-5M-Y{kgi~v7;y8XH8o4N ztd*^~HC5J%5GYXhL4PY1TUJr_g^Q#ds0tZNCeIG470M`M2t01F#K&?O!-mdco?|r_ z63r~7HKJr~RoW_-c62j#>}}eJ-_Xc*bZ2qQ?jJUN+Lf!drg9n6T5U1lllOM^(O|C! zO@m!QlWvA^J+vm2T2Xvp&M4X5CD3jn6~TEdLct>^H+l@o)V%0&l$9h$c(e-S3%GJc zpmb1nO>r8+>RLr_#c07{bzu3)@k1JW%0iWL$4XCelT>J2B+rENysC!|U^ zN3oez6ap<4=@A_qSY=J~ny$nIDtH@u2_%U~Xe)Uo*=9Oacbr`Qe}CgxA%?XR(d&iK z&QzwU?g;VgRxQP8#xdOm@}?9j8?tLOE11*n`7D~(MOEO1j1#G&Br&#nUPl%KPW!O$eGwr0V5e;pXLNLTHtkX$ciDt<_HgC{3792?Ol`%`Pvg+C9-sfX#%G%D##?dGnmcW}J>cSq( zPRb{=urkHZ7=r}mc)+kVWSsnrh^;b=py#p8e%ayTrLpSVUFB*H#s(QE9IR zkO{75K7uoEzB|Q@j???WKd)XwA%Q*w$X}KMBu2gKk6A^h%);}gAq5j)`nBHkr6F^P zFjs5m-g*mzTcjKp@fbm8l8axIV0a|0>To5pF&^TKd+yeWBO#N z$v~nuglM}{&?B7!RK2mV5kC*Y`}+FGd=sC-`@eUX{LVv%XO4joZ^1qWu)MGeB#VAx ziD8rcp%qz@pon9mtR9q01i2HHQa+<958 zFj0WymBZe=A^`e;IuZBAK^=JZPC#$023LV04(|Wy^QasKU4jCVgL1(D+CjQtfJ2~8 z7^Vwg2OrFxRs=vmP%Y4o+Mqh{4bD&aW z7%Cc?oI+m1;}5_kT}^fhj5KxA?Wp0(Tp>~nh$st7%L9rx;<({7dAY}T9gqrL&`NY# zHxaNEnGiAxkQfkf4s~`^TC{Y^5!~FQm5oh-B`UOZvXzmVbJiBEMDTJ*K15R98L zA5vrPrE?Gwa}pj2G4Zeqk+7Lwf}9a1nC$TBWLQ_%AaWSsMJSo9N=st8l;hA zhh=SAgNZIDE`IM|^%znmU}TAQGABvBYbZWk^-df2hS(cth~ZfAB7PR^`#n-_iL6-g zJR8AS^QOyLr5V2r4StAJ$v){G(wlY;p$8(`IlMC$<$F20qkB5iXNbA+yg{M>=D$nX zTWdc7$40@O3kuguDSk<{2(eT2htNlPEh${w*AW;bU1nulI^~=iTXN%Enp@cRFvr6t z`5ohCE92lx-&T?&uU9Y3etJy!VGXXwyZGy}&+Of2D7oF^a_Ht2%fWql2Oo>oZZ3K* zDl#SqSXvcabm!-}KNwSaR@B`L>5mVvu`!3k=ex5k3t82ir4ugNm0q!0m$34BOBz$pUo_$LLnFw3`!ckZ#y zua5NpcD5%r?Bn0FuPh%R(0}diX#Touehn;q1(1DAbyyC1h4+q9TUZ_7<}bV%6R$Y|RPXAlPx?)CG*s<-)Ntv)wxRv2kR7r}tI z_-oat4R=N$p<}$G2iY5$7rBH5Lg-8x1euJYkdaKdQ_VDLCO)#ZqtDao?VFYpbwa?+Z1nNs_7RqW zeV&dNZ9y)*|96vk51p6(pOdW~0d3iOW_s0((@M_W@ zt6D_T0KtA9^*rq_ z_Wb``D%$xdn?uFMd~mt`O=LKImGiZmN(Sa8bWoSjhmo($1tUvqhD9*9yA9{&tdL{u zcEZeT?n}@}6~WGZ=|qc#>PAd`5>8le=cy4hj3$%_HC=Qh9ox6bkgFUW8(U`Duvu#^ zvk#2*yK=uV)cc8CtvbH9H?|+H%;4bg+Pz@i&nOqaKW${Bx6#h(doCY2JRN;ejDq&- z{+^dy17lCtt-yQvJ;a~pZ~Z6blG7Vla6WAZp1I+z%VnY8>DColc;FtR%$6bWHQ}_E z_Mu$buslmv9+Y9A0d2X4zo(!ia}q%@;HUeUyRjSb)WOh6h%L1n<;`YEF?IKITmsJb zYcx~QkVif6G+NNGaDm0@bX)jx2D7iH(R=ypxspRHpRO5D1hvoyqgIYb95tF83T-#nokP*5`Z#E{jxHge>q&jNL_oq>e_m__~RH9?Lt-Bf0^y@V8#cM_= zWm}J9#^k)rr!W)F)5l!CYlJDJaFJ%ldtTWdcGW^iO{#Z zW{C2VFg{$D+<(f5jf$Hsn2tD*;&pL0vh*-=HuC-@?JWJP`~HuOw6bmpub?1p?XS7g zPlp7JpvZfgO#Rc8ETX!?!v&GXEYNG)Cs&<+vS##Ef#^)=!{~$PJ>Pxb6$yWMc{1dC zsT9u@VaNY)S8^u{9|)QJt`3A68s?iy+P9Q;jPFg0ra2#SI!ON7#=adKfku7_#i=6F ze;Z99Y!+I6xViwmUJ^PK?&k`Iybqg5R;#!s-sT8TWGvU6{+{? zejD}i>SKJ6LP#g8&&ATtt2sUFAxdm3Ut(tWFn2!3>uhGGag;d!x8;2v@Na*!a<#jS zQd%F~M<&&(SOAOr&UQjs+cb|GXDf;ObJmE{eOC#KgRu$?lQ-lb^&|wN2yj=`-J-H` zD(TUC%UIaOqrjxER={Jo!*nuq@rFuOc+l+(4=aFG^(O%I_1{p0zH)M za}Mv2fu4sn*7z*{E|dP*74v&=9o`mQF`KJ*8~9nz-JKMh;hE{^x3`=eUe9$EE8AIY zRpT@8LPw(Is=@BZ*`>ocvF;vYCH)sX2Q-1zn*=Rf@}2^&HLhxP7QbfVE46Nrbqbx_ zujl;7JJw9J7H|0NC2I z!%@gHcpwqzNdv7dn)>Jd??twuF7uN{Nr;g&!Bt(qs+HE9RXr;!$4q9{2oxQPla57s z$(aOc1LQ+?Nl3%e5ZSd~x3d4eDft+5_0=4oGCKl}pCnzPfz5nC?`FHv%jvwOxSTtt z&rT0l{R^arowoV3%`$y)GJqRZ#>;VcvzJ{5X8^>x()xe~!@w-fI?Lr* zSJYMZzD+B{bg-^ocW^S|1~fOc(j8rt_QG>MpEoJ=i8;e{FZdaEW-&2imn-Ea4fIvS ziEh1*l3riW`L5vi<8+E2d$sHM6*?EAU6-=DUzjd|;oej0OuS*C+uL??r-alw#D`JSgyVcx-{GDeca!ME zqq$f{wd{G_>1*neH zs8c?$tK-*A-J=~Eka{U}sZ$-rMi1hd<1%0U%Og6L$tWW*GE{IrV=QBN?u*IDD@gINIX;Cd$-H!A&5q z@1-Hc3+1dpl7sy&Bi7lLr1=$at9%a69%!h2*IPhOa6-sN=8nw#8(a(n-ZxMk^V75e zZav%_7VssL=AB97!1a0)b}6%}IGFIlyViRhy?W58l-xYH>&lNPf{>TTRxw%3^D@t4 zdL#Yo!xC6(UP1rKM5kH?&WUs_4GVH91J+-?Dq%N1k)TT5COsBeJ9XI`O6>ebGDRZ@ z)qGy-X{HyqiH#L_8=iRIYQ^0frh5b!>RE0|baQn)Wk8d!N6P5|ak5zR52@Wk-)pE# zjx%okjh<>JCPA+)&qt|Xecy%DhRWYt1&UC?q{5F;AUT4h?BiPFo5Prg6ofSMbsOu? zw%#*IRrlomFNRw|%%Pu1fgR{BhELV45?G`@1Y3WXitTKd?^$pqbbSuWsm8xRxX!ak zY%yD2xOwh3K$&;JXvGMcaa)d zwHu?k;%(@Rf(;*`x9#|lNCwxb%mZoRFjl2bjf2|D>SQIdm5-2@SyKn+H_lhJgj$!g zj!kfrzKtN3_lffFohKPTRUfD9`Se3cQh^eo`_8M~6ETTB3NSC2kndv7yBizuivP45 z|ChD;1bV={QIr|1azyRh+KJ5#6bjIRX+E{-xlQdZ#2TJsPl``KON=LW`{Xdqsl0n! zpju>c+ds}t!1x9PYi4jm@4kFFAtpK^j(4)Eez9{-cGJw$+?pBGEE5Ac0` z%9*2SJ#8|{2Gfc?LA9J(+sFGSFBbsm>cWm@(VDB@C&L92xzL_xU`)g)Z*;n$emN>7ohsL}-4!^(wF1 z^pN58Iv@1r@s@I*F)-+hHM<bMPu`4D3A^LF8!0khdoHX1`kd zyij+=XznTG%<8_pz1yiH{snPQ|H*!Ac{xs`n@wn+mA~Eg26cCh_>^02uY38t)uVVz z!nA*?xXQj2y=---lkpVeU`4WxUV(bK>dm=iVz4{I%nOgh z4cpe;Hmq$FeP8o=T&(TK@WG&>;RbLg>9CVF#72MA2?Qj4<~ z|Jq=fmdD5ivN~Z#(Ip{ALRh68QHi$Uf8hTe!KWQzz5h|bS`f4(Fz|ka7e`fLJ>Jzi z{H2#JvYtqF%e+R%fYsC~cXVaiLbnHTskT|`3yVOsB$`R*40W&4MDxDhd3OQgcG9pY zrbEuRHS@Q?(`1M*G$l&Nd{ch2+N90d)7o-8c-6Mqf0~d#$&yLRW8AyR;p9tO&5qrj zF`uizoJq zKII>mOF(L;x@)j`9lyDm{(Q8BLPj^y(c4*(4>%4I(pdz)QC$+ytA4%f+W>l>np?^O zcE-)eUn25vGM)`7BP2*;PjaGing{0+Pw|%i8Cxw0Y&hRE{gKGE3Jy{F*QWY1#C#%j zajZ#eGPR7glAe&(?d>&ch@jqN8m&#>DtFh{WwDOH`Q+=fY`~nvtbN+naaJ0nezMzN z`%}s$%rt4TsClCs@-R$O8&W?{nxn`2($~IhGCVwU=CpdgJhsAAXTy+!yOQ>|1=62w zJj?KO!AAm`b9ZA_dz-wqLpCnj_}$VpZDY3&I+o$-6!>JuIb!g8n!nBAPT6b8k!Q|0 z4TmwqK&|k0+_>!dWb0xK)d2Le4CvhOddy!c#8$S`PX|^cU%vX;yodfB*F(Brox2Zh z-BT<&U*mdpZ~$FTP#LErnpOQQ!b4DZH@U8bVMuO*(SnymRm@$tMwZrBFMGq`v&+68 zLTF1LY?ROoV?f8v$Z# zJ=Bl>1WU~=&1fN|TIP*JFBRM_J99Z3C|wlZ1019wK9~}tO_qtOluOcHdT{&VH(!7` zYjNB*UPp(r>vz?H@P_rnwk4mcd3Wrt;6{{D5-ntu@XjZ<;>RuLm z3>{YkAxO^%F)@~4 zWfH>+?nol^bhQ-`Um6$XE48%hMpw$?)qF;q&2yp3A`Z`+GGexrC%>?uSU!zQJ`YF?+nO8p~YkckK!4;D3KBO@&8X;Wa0e5!~MUBi)`F1tp68r z5z#$)Vd1S6+>zvipBNvHLXH_KdW0T`h!+TpDftV1`Wt$-aQrCUZ~xCF{@NngGh}0V zSlEM=^V;P}zd2fr^eh{zTx=Ly-s={hNMGs07hljOUlMubILPne6?g-<^a6T8-_eWE zbtmw|M??zgq5f~+v&hxsWO+$l4}3>V`5YBZ_IA&;LQx}*+1JCu7-E3#!=QW~%YkT9 zNpZ8CJKC{EggCQ>!RGos=t4s!eCioNX{f6( zrkqb%f+mn_N5p4oc2OAls|HQxn2=NdAJ z|H3 zCdGJ99Ct>}>G>t+9+v%zwc_5Y6^4HuLLcD8w9x++4pC(Y3?J+^SBLD9xAp#b{^m;T z4p%8u!{<@7-VP~UDQYo{yk*R#@+S?4&G6^UMGA+ZgW>u?(t-{Fz{m`>*$!Kpt@@m;TIln&qo15<3cB(!tIVh;|)Jz5&u`BXO^lSE(~xkrKREx{f+y zmg@C17{5KB1i60N0*H+miHojiWOTt3ktoJJd1wHGAdOPpBH+a@)BFNJokpLm&qT^T zBhQu_8bPT}$TcK?t6hL~f*-7xu7x6VVugctrLH+@P@c7ZPHV@2k0h=KZ|;S6N+2Y2 zbq|-B+a5B>cWZ^oFo$x2%YAFoB+g%Jyo9$3UvAs2Rr93RMArR1xKK-$@_y@~oxlhv zjeBj*fY5W0X*)+e6I`0ngiSKte{)=(K`-AbLcv}q4_FnMkK{{ui%pHn-5PW}(*i}> zGB*$}+#kBt`JWY?MBko$bwo3^Rus7Xa?f8reU3IZvGn?UP@mnC19v7ugyy>?LU2c~ zsK8#rX~Y>{E_t^mXJ-OUx}vRu&!o3o)MHz;HmTS7UbRMv`Ir zu6{4LC%n6lQf-V&KJOTIPk?6G(l@A(YBkh^Q9<5HXP?(GOU&I5;g-V2paX{)j{2iH zjErn!s8*=o+^JPI(_TmSNrzUCnZWSEsu>xLRm>sBtE3N_u8u@SbDWF5;@Xfz4QgBy z+#+!O^FxS`s5c;$de7>Z8D1_PYTN+sS2(Jrv#8#cBN#x1DrnfoSexci@b{^(iU z#|(J?n_pgs1^NCHl*ZK~9dvAGNtGMgIM17v_G zR6YrQx$6!)_Kr!BmRJPbdNmiVVQ>9&4-KaE0l1uT*@U0xK;?L`@ro7$qsiT?d0`HS z{LW@v7i`NKm-B2QmS#SyJSgQLS6gj5Ed!YsIj8uG|889p8lfMl5ExitS*-JZupn^2 zaG`iK^1F$7eS_?6c30S`e}UPzx^BA8C^<-Gdz zhy4%l2R_W@_a%N?^_wvMVoiW%zyC#yX%E(i+R{cGPyo--X)_Z1t=(_bh|w_;#Ev;1 zg6GX*AR0gOJzfT4Za3LIQrCx$s#Q3EI>g=G<;>`@uK&?Q`-&8gC-Ryp-c;efZ+wZlz(WhuZ1;0&8 zpx&AM<)mHa#G%i`NNTW?K%wwO^Svkb6|b1D3+zUpE?n>ChJS*!&@Zl$GX9}hY+@Jy zvPT(VNi=vujvXYI=vxsU;+;+#e4=`?9N4>#{dbeS&$24$t|vEr zZa4jK*6eFoCU7Z0EMiKh86N{F+YGAs{8YmGHNlO8m04ww#StxlsFH@_k(%Q@<;t;i ztF9Y$eRz!H*Ivx*t4Dl}jI8Jrj2AJOP`&H=AsAv|h>BF95&K5luWFu%uidmhWf*Gg z*a{FP?bYA_{{6TW24^sPyX%`11U~hyf`(Yq04Z2Jt8^fqzH*atLsolgeD6B;IHI({ zay+(Z=9Fmn4Fn<1?%Lw<*E>y2 ze3~%)d2r~lOMVx1+lm7~w#H(VNd{PZYB)1$Kppz?WSUuy_nYP)U*TK>+LLZvzr*pb z${Vi++hq;;c^16t5D3!F6J=99MqBi7hr&u}@*8>O$IVPs8{`{=->LYqD*JdIZMF^g zv3wP{jl}CFcUvzpr*^op#*3Dh=}FMz=@*Ibp%$!9U>e(}ROj$w7BfF0HOsqUh|OWb zm11LJ(Dzx0Cg(rM7zm8BpiX96%LxUN^QXa^f_A(l&-$+`MmiO)lf=9iaB?ePUoKv!W z@UnbUiJJ#?=xg9|QswanqdT%&yQ}oe`p@2lL#RfiNUxL6;uRhP_^&ecTT4_&&UuSz z|K>t}k$Rh;vx55xPz0-&q-2D+Z6I_4aJ_!$ryTco+kg{+0>7vO�!oX`j+Awv@e} z#l2|rs|R>#V4;1_-y1bsP|FoE`9{kIUunqWE=NC<)SIb)lg?PPYw7666WF`l+2c<( z=TTFKx_kA`&AxpM&noE9!k%z*96Wo2c+Bl6fZ}bg^O%$e>zM?qH=!~wt&Xpho#)+c z#kSM|MtlH&?AYS4Af%t;UQq>c`-4aJS30GuaWj1l@}MonSSjY9ct7I&vj8Y^D~TpF zJfZ@bgBcUjA(O&X(A8|wYG@NhZuTai8J&s@kf^0Wc(9(h`m%_tS3bpg ziBIZ~(aV2&OQ(I;MnPd?`7pp^c$v`en;Hspr=h0df-a#ExK=@Y_gAdv8Algd; zr0cE@(POpEw>rN_j(&5#a>-G^nf4v(s$Eg~)_s-OW2r)N=cv;oqqbkt1SN3mA0NZ( z7rRuFaxpON4fdO|giM1jK?=h`=hT{Zk@=ri{W6ykFJAA|`}WvO;LP=*i+fP&6QcI! z;28DD>`UCzgEhj0jzRDgKFw)Mn>M9_iU1P;7jB&B8rv#`=@;~U5UKr}B4UjN`7k+y z`uW!@rm%MEhgd90lgt}&Y$Uv!yp~m=+|yygXN=%|bE~eme;|jjRP3#Us})RhngFNV z?b_Bip^Otj!LM^&A44R5ZZ?yhe*f!DqhRg06i~EAd{R(%I}as`R<{r;GgO>n>OP%Rs@oLt0IJ zIQ;g`T$@|!17x1AdXf*F1nxvfENc9h@omWQniVWk`7Lj@QkR{_u`n%qdz&zy4{%X> zfY^OdbxN!*XgkN99nl<#`tLV#u)wVyOdrUsi1~_{N!A74*r4Cdi{oU1+!(8B*xV)+ z5xjFbdd;*x??vR-Jzp}uSjr|9|3W=X(l6T>=LzWozJ%;r&-$$Ay}Yp3zgeT`X;mJ& zZQb~45XR#Bm6HJtnM?V|k$p9CD&5Eh^a38XZ>-S0%bKC6) zZq+q1uNP{8r#OEuJzSFTUzxbpmo|7W;yNTKCx_R{Vmjt_oWd1d>A~F8$C}DpV9Blf zz*JDknE8m{t-u$++Mgfx0kpuU>*$d^jbomPdGB70Sv9)FWOJFrC+Ye(TbfUTH=mCvaCc-WcJLf)D1OL9u_AdRNxC(6eiu!(CKOaE4Fm@jhw>w(Y0l&1`ow9ewB z*^r5Ex0!*IUSQOzVHgEaX_}>5WRrGq7kn7t;+&rmw}1Y0S41gn()q={!?mjP?m9}& zgS7XFUEtr|5=5KSMSD}Gf$^cyVE-vv1ET$`c^Y$m`t!i<;Yrd%p$7_I+O`ox0QKeC z=UvS|w+JY`pc!jGrJ?H_+se?(f39CAtkt-Lt~?8b_V`-qbpv%AZ?HQ!>do1#C&(NW};A;1FjSec#$h9jjrDvV2$PVIN49~SX9k=PV_sC50uR~;4Ty5!1;mv~^ z>&QN9Aans4!RTRyA%z#Bs4h-L{1lzHZ(o;20d=qC0BPM~zum_e==7GtA;oUKxSHLI zhd4{|fJ^QYtzba!u6u^U%BRk03dd>ZvDzGow&1EJ!2<79NuALkRxV-*d2P8$_Gn)T zUq{U_#W&py;XOqwd~BWO>2GvDjDjVsu?-?<1`nM5St}JvGsNePRgsUq4I$G%PB9;Q zR>g+rr0Etkqnn&8kTwA%Z|bjgas=}q z)O)YNaMs95Xyuk>cawfSN$|NKtW1XnKh0r8`JliqU{}^5Oo@Ui#N`50XJDAZe-?M} zWmnB1L6fO$(C!cXzHhC1hMaN9V+^Z67}jR=@aVpQz?`JxPK=*^_GiS&BAg=!7RF+1 z=Tz>+W9*stn)E{;{Y2bjxc+&0u_x6 z_KrVnZ2`g)-~sJLi=D8T=TY*A1x6TnL>jXEPMkO-xZEc+xAJS*$AjdeWxGHUk z%E%JfXS|(9KoLqiLry`d+Dn{Llxk_+${sH(yFB$XXD0Z1WbKF@9eR7v&HpUYO8rJt}7K36o&@o?6U9yfpg z&8=WGX`#EaR}g@K`%)r9t(C8$VE+)D^lRZqWL2-7#BES8qn$Q3?n=vX1D(08Pt~J0 zf{grRk^nP0=1kg0e8Zf1ZV6Ml1+!4`Ryyr*N<98Nm3A{l=-s(D0RytY5AA^(-tu2! zP5c$Q`}jxVX0*V=bANsjR&mEjA!cV`Zhu1qQ8};LpN$ zRKl)e-(Psbid$|G%ar3|R$_T|)bBYMbkw-1$YyvcLirAR!O0GA`5O}!8X7WOE7!{{ z*&DyQmWypF(6Y`--4HVPFLVz(emlFNQ0-4a6#7ppgFiZJaJK1v(J<98u3lJ~UpR-j zx~Sgc$_+>#I|Bc_pR4TcF0i% zNmcbQ`Vjlu{Nn}2_nUJULy7oSc#tDCjc(R~)M%e&inGFTA_<>8=Dy7RgUNU#fzCJ7WkNsbeV``jMuBnTd zt07O!Xwi=p2Nfjy>rrjBfp_1+MR(4`0gbHf8qTi6p4nce3XN@MG?tF~=GNxDT|S5xtD!slfn9~uG;(u->+*2A0#M|wz)4Pw$fwLj>h(<6rGz`3 z{Mxlxx!%RxgRM25CV##1dd9o^b=Pt9_&%a!ZKkn*G*P0X#a>g2mJ$l{e$VLb*xrJ# z8B-%Yw6*jsUF;;7N={D|5Xz8b<^TR@}pLjk;=!lTfo|2lB%>hRGZ? z`kw125ncs=w_oDVbFf2T6+}}6q(m3zL_(O-Wb6JT#B3!z|bbP9vXgEyy zS8*~yJ8Us&x-OQyIx9NfVT{t%C+c2(9QC>Z{6ArGp=C8px_V4uUU`|Mbyv=zq$h~X zpYa6GV$x*H>&KTqu}OrbyjG1debLAyZ0xy}3^2{1Y#WF~|3T66#Y` z+8dvAF(M9ZTSc#LBlN&5QNJn8q3KT@V&b&@HLnat5j9S-w6b;q8F3-jfR$#=N#r_z zkE|>lTp*2L>qLwl2?E>*sqdF|9Jrbsxi^MpoXf3s5M2wMTe!?a2+LcrSCmuHiL&%D zg)-W?H4V0*!e4{hl-qO*_3XQlYwZ3$OcvkQN?qyiCzjbdQ^1i=Y^w_JDq|bpwB)h+ z9-5UFd;N6Q_ltdQUeBi+9?|dBs$^CqE~C)++MRWbh=-4p%~e$-7mCR)@nT%SU-H*ST@_;-D*PkkjQELQ&v& ziFMtIz6?k7LDsYX9`3rj#|iiMboLN04?&Lj`)1m|X~EqU7M|zw7M9q5lDoUOu%c?| z-a{chMTK*b8+QRH0x{t){xA+h$jgr=l1)Ph2$NsRUDTe}4hO;XjY z2*uPZ-4M+?Zy&QxT~)EEhu38tG}UF9ifc{n28y{9<2Ecb;xx>2;Ic1p;5?iny{J47 z_2tc-I35bH1CRbfhxqrB1KZksT#TeQuw91M19@Cj4#Rz%2ACeIn{tb)+P3%GVw1#n zM)QotLD7?J*6n4Y5-H0XRH+(>b}1zcB_;ICf7Q7fb1SCStQbxss z*hK-Nb*!SW!q&`Z~pgIV`PQ$smdw?DS&EP6|OWd9{5ku@kKH@{z2Z z=wq_R$ocQel_R5(Sp_53VrI0gDhZg_>Hj2CDCV$BRSSM4IEod~vZ^JpigiTFDw)(# zBw_LVOxVLtUry3TP%az!Bqoi;BcE^;8Ld*FnLr^Ht7cL`5fT}#R3Mx%6X}lipatBf zj#r9|R^7)FyT)$S$KP@p`{hbkt6#p{DGqULq~KExGN4ObN*Wrs2&S+V3xyUO2JF-A zo=EMOCghL&5Uaz+FAG;mU=U;Vf{#JK&yx(LIF3Y7|5iDy^d+%OGLj+)%i()sg=8W{ zMC7;cU#lgvC~6}IzJINhOr~%ZlNGZT7-jp3A{I=s-zbd#GZ&2;#T(Td?wRJ2>JrZW z^F7h~{tRtfWgBzbc$;!tWSi^~=2Gy|Zd+j6pr^e@D8S9%?}Ob(I~Y4yyAKHdFFjiU zO9AiU0dV>s_2Kkk^x^ek^*_DBcEqb@-tb=%T{2vvT(V!9Z_{o|Z7cL(_K*e)2Mqg5 z1W5P`eL(xTsdKmUfPP7K`N3Yup46VX2KNZ=0_FnV7i}Hx493&nmh8hi;yTRpmllCq zT2OPu0#9?mD|`#~Jx2}UJ+?ivJ)1qIJ*EA}8srv|mM_bQy2!d8by3egc*e+d+(iblF6xbH=iDG#?wWzhRS84Pzk_4+IOqg=+b$XFHr_xc_i`8 z^3LVRcu`KC$zpSHi&i(<#Tnl&ODlryC8jRNVok0T>slx*G-k7J$dP2wv;s9l)W;bS#@+Tw$QE|i?welUGpHK*Z4)scVyWn?h7t-tV-PS zDqXj-pwW|95|QDWVRR<6B(9vo$tE4!km8!tZ&;^VyC~8*c5O>)J9s+B$|X5^q4LCZ zkIpGnIAU8C&632>!onia4AYpvp_y?UF%3!X2eL@y?3iu~kN%v5AWc4~;XysyFkZG$ ziaf245>%(uO42)oGJn%i(j8?(*d3Y@cJJ}>k-7Hk8M zBTl`!`5h`ES~T)io@ZR^?~e8#3mJFf z=QrTnG4RgL3}0>+IF#nodsKs4U)^k1FUPtKsEDieBs{v#C0#*OGS1(n-F(IPk+XXy zzlVPse=jpXhh+t3L(mC3QnidkQap2GLBG?msaeiG`?sAPE_jl(;&_L;hQ!mLFd!qFHV`dMIjI@>?$BT_#28^{Azt)(Fg(UV^f>ADPJQc zu{xbkRd3evbu+PLc6I6BqqRp;n;nn}(RG@d3_j=0ye36;Gq@JFgnq+DRhT4B6#Z6F zO!jcD%UXG5>WSNXqO#0Kg$*f+E)ev!oGxYo=rOb5^}3 zJhs(vxmfw|$Vdg*?@>_z>OTa?{%^|C$mFAl&p)NHaBy*i#&#Je85o#Ils_Aa)VaOP z`^FLLyv^|i=sRwdmd^Cq@C}(cMeWWM7}U%x3G5K0H@_IVfU{cq;*$6i zHJoIsk9Vqo#n~+wm1SuX?SNlKY<% z{w8tqGtTTt$`aa|4VjpB=j3FvK0`1@EqIP$SMF|GYp)PG?o^T;_yyt2^tCgTwY7D< zYf46Dpxn<{&bA>8)A8O(Brd`FnnPMbT{^U2L#^YL>Sj<8cWC#fuc(UGbAap0BIe4h zit4@xQC&05%J10WWXUI)Nybq|o{N?mLvb+)ka)jg7pME878jFLW<00!#E26Ua23a! zOhx$rpcD)cQ|n(7wnE48J^+Vs1^NmIAFr7u(5qd!#c`=lR@-Ti5PmT?&N-+b1y zPNTm|31==Qs#I}h@?OOz>!+L1(**q72d2QEo18@Mq{$T0$fSN(xqHWXM#Fmy(*63Omr=8cDC>`ChOLC2z8v~R6gZ)d8c%@EwWr8`+_-Eno$AWV%Q|bK$T=F zoHPq)(VoD@B;-@$RiQFh?emb6P^>@uy-dbDPKp@dC-?eC9{wYyIeGf9ZylDGd;_DT zRs%=hq7#CIxk>m$MR39X{!cB}1atvOljZz6%S0>Xt=l0riR@^pU(yH%Z3i!(1eM=? z4oREj2kn`X9!28w5i&mY0;;2w<-*pbNj6l2GqdpdHW%}I1zrPdww)LBq*}FBKCf+j zi>9gSUd)>6Nvx+|C?%?{{VKJZ!ChT9F)mhVkDPGcy^v@6UQ733tO=JNF?k^`^_`^p zo!PUa@A(G~O%op-7A{`&mCfuGAG>jRE-#0C^ChP!$eC_FXwpDmUA1--Y$upD+DcPw zBOPY#P}OZ^`F)lE>j#Y8Emek_MMDnmI&PhRYmAQfsa#BRZn0&G8O$>3+`w6x)hGNm z-aCOYsEyZ?%PnS&=4Ln7PSh1zA6Ff|Sk)jBFbr3O&T?Q7M30A0O*pTH;GG;{o4czd zg^5cG{xv*u&lG;V4KDd|z+l?{umDf!10&QMf~ zKYSMHE64I{*$gFK7d{GgEc(2G{+Gq6=7njc8g^13T34Cm{-hLWwJ zwi;R#_bs8%$oB7)tIE7-#b1(?S`*43#^cIzT4|l@usW(vz0T2vE$&0V-lH-`Yd)6` zvL1pqV<*IwU0hWaoAv9E%likL-_l6g!6z3Q$>g|?oBZiA(PmK#zZ>aLJ+rk7=TG&d zKj`Ok{awg7m8LhIzsl5&ZH;_)LZNO*eEc?LMfM{5vB21=|5=)b#<&7yd^jWI`#=@f zr5Q(cD^rmKTdeGH8obERK{ZDK--j$jvNi1!{E-?I&_AgcQbdx{BcU~$da&%vm>f^4 zJeP{bdg+iJe3gllv0=x`@A)){FO~SqC6Otkj1O3MtebcKukrp*A->*<=WjFL+NO$=+k?fW(H}A|0DUI84 z++$3)xER!j#S0PwJ$d3@m*?hE-zMZT8Im)jqFS7c)Y8sCzNP%Rvg9lhyfKj)s-hO_ z&b54yCVghU3-GY?cEz$WA%A&YUZEILG|6IhW=8s}m#pgZ&s`u)>uMs>E6hS>OPmND zuLqAd2h%WA)~0?F%SL>-Lb7Q1q~#DGB;@|tHiYNPE?gwbnaoKc1&c-uIn^Py^57!J z0?2AnyPN$u$N53o&b(TC<7Cvv-Dofg$dm01nQx|1mQe2P6vD%E?*zQ|uh6{iupX&C zh$TS=W@`D2Z%N-(f7B^>j^VfvOu5z$dGpwz0`@Ide6tiD0-Et<&U_txL!6Gke_+qx z?Nql{trpZ0lX<9b7-vk_CL%3hncJv5@eN(8PA9Pj`r_#PC(22+E&@sQ*S{BpOt>TR z_k`roC@&m4(^XDd?$*1@m(I{NL^9amRI}dDVcVJM!WX)zCl&rkCxTz%3r}}(zDa9L zL(Tr~PS^~YMn)fBuab7Nwa<^lGpOq@ar?tVWYQjA%&uiF=J9LaQ$qBMnYyzx;r)2t zPh@7IPrh}S@#c5NQ7TTII8*TP*oTil?qMF=iqWv>cxWYY3((2u!1Lg! zX4*z}w*AnZI|<;xD)F)1OwT-ZdbFb$L^W_*bjkEsq+K&u4AVLxi;yq4eF)egLl8;e z#3d;4sHc3B!+5~r5;bUMJZ<~qlwmFg>adyZF#QIX!dBppj_NR=Wqp(QM}U^q^-fMf z#wy9Gb-7+SP2y!a``T^!nNXV3JuTRZ09xgm!ByycSd_Z2)hux3_OWp|t$s<3mm{;; z5;_K&Dk9lxmZ=sfp2Kk^SrdQv?m6u+hnB~PACdXq7jLo9U@P>0>HM5M;Q9Q;Q3YT( ztL1DNv|6D=mv&Dyr)rzT(H+s;a6p(G{n%!U={Y`O4vQdd-H+P6o}pg}?~-t_8{+-z z2!~2q4|Doteo1VDn$1&&o$a~f`*lCo$}^$wTj1l*2Bq$rB@_9_yZHv`F6T+LHyDQcR>UoA)ZE;z!U z_u0MZuPFFNDpfH@QPFTx`hLOBntqhN36q8h2aquIz`8p4HDi+>GTgTixV}iR_^F^6 zEvR?I{jivq6oL7ke4^#5zG5+u-9EH3ptF{B#9NOTNK^rvkaPTz;C{`b$wl}ZotC=` zZ;bLdvl45~UMCZ4%hnZU)Taog?-T7<9mvr?tED=Ri#A_yQovm|RhgErCw-iN72k^~ zuBjYTG=@J1wRaKTxjr1LvJK$e?**#;O(eCFm5YD0I6jA(Mf~VpQyUnp3A$o?j+Wx> zpRN~8mWQ7lvqmV)9kcI<7cm>wnm`u>Jfe*hpQiKGXmE=z0!2Djrufdj6sy>VWcvX8V_SM}O5}Q})V>KLi=J@G*_tS+qQ~~g6@#SXCp!rPy zVpr9mo}IYU{*udlYPuTG`<9KfuLZ9L47$Vca6Hc%7i$%m<{%VcLV}&1JHFVrBaCT+ zN@@_IY}$S97;HG(iuwcS3st1!5TBq)cnxu3v?XSx>MG+;D=w z_?n4q1R%Lt-9%Qz#EElqV*ObJ6fdLM`!EVFRQNnPlx=HUx(`i^d z%t_?5o5@+dOK>grsH|)ivEp)<<@7!1azz6?U3T>|YNcTN8fyt*TKYYMpVH3Pisl6K z7jdjToe{8Cmn3FDf0iOQD#u-`#)`FKA7m<#X6EIO3_0oK|Es<84y)o^*S@`=s3=Mi z1;MVsnpI~ZVpPN~f+(n{^rCb@iDC;PN;4RHjbN`}iP(^$V2g@ki5eBrNTLz35{!DD z!6Z9b&wajg?eoX^{@JmaJJ##W+6>%u=3=sUT*osZ zag{&3nsTwnmiBtFz^Qr6%X$xk+SQ#oR?*64=KlLX&5L?(7f@Mm zu%@hWbzJDSVLvr{H~;mMmh$-b8 zqW#*}jq-Hbel2QfVWg?YcDua$`&zh_htv(ryX;n_xYh8G%*pp?=VkpLy*6u9HnyLa z^C*46Z26VI5evGhpWWH`;l<64>evgzZ>U!WPt7g5dv(Q=y!+*rfkO-YS5DYGdgpRG>yd?HMx7fKq+C4CGozpWuj`W>Qa=r8 zKBx7eHVdQc?X3GUCPsBG>eXHMOz&=vBeuJaxVCAd;o)8G^5u`N{IGQB&~1&b%`*8q z^LD-CsW;#LTJte&t@GedKaKFJ={+rTo$HisHIbA0xfy zSvAZ0dq<93{Hn(`!-j1vH|M9fJ00WVI{R?6{Cw=wDxpII(;vE)yj;_LWT;|qiw=|5 z-FdjH@7zNbSv$WCejMffbB)jQ;cpARJ-&DAs9t_Y9yR^`Lj8-CN6dbGz0=Kb`a9QO zer>aLc*D@(z!O%vvxiZXx>-)9u4 zH`i^~?Z-AjrXDSaJ?VV#mh*O{dbET6`T@_IrrVD)PZ}Nd+HLs#Jswkkx^%pKFs(27 z(K?~+@lm1eXCHW(KF0ZQgQv$<9dqATd~@LV;-h6MU#q8=Pfy&r-M!&%yTTRu zZ;w4c`Jh|Vn)erN4J)m>nOer&Z+?8@j`x?&G ztED$r`v>KG9g{!%_Ta~XTf<6Cdft5e-Ks17uJ5?pYi6S!g%4ist8BS#?)?+v_U4yl znMcnZ=#p$!VeoL$iOJiqiO*&vZA&#>bg<1&Biq~^^dP>`%%(m~v&NhgEE?w}zr51p zlCo1kqu~?7&aIOtK3(&xy4poGeV^AwkH{eZZ%yFjy@Db|!+x!N<51P3qy`SGfaku9Bjqn|3Uk=!CV&L~#-`{n8 z@x;pYDZ3}nI$1=lh=FVFDORE#!fC@n>F%U<8G?eAIcZb8DjGD_nVd+ zI;3B1c_#9kPD?eT7O%e2s48XXc4K{mUvoN}ZreEBB<`%uh6CF*Z#~9^JNz6}vE}LA z9s?Jg^LuGL;dt|j`+wLs{@~@lcl5`O6%)IyRgT-8IHtS(unq4mV;634 zP`%k7aDLD6A@lTlRQNt{b~n%8KHF`6j_U)pL92OB^$hpdNoa9xMACG_2X5|FL#J$$ zw>tjAo?+pWLR0T<`EsYCc}UKR_!B2>a&q?^cb+q1tlNZpCyxynGi%}X_ImEmg64D$ zAHQUJdDs2@`rTaTel&ak%SLbaP06zRsoVPYk1gIUFR>51GU}T1h{s~FLB*S)3y0U4 zJJ!v$<;RYx4IbtvU0Qa_tYpK4{W;^7;|Nb%fvOmwCnl9weK_q^!@5x z`;P8ytgIfMb;rGb{Dg|3Cv9hYmj9Mwr!jB8zo%@U;*v>aQFG_jyLMVljvs&U?00*1 z?_cFP#GymM(#_W&J=#C`=W|JM`%hhcYCSi6sPiD%j?souGp4(|Z#~6%ewX)=m zW7M^7Q@%@?wmiqhveBxL)Ki&rBa9ks{5W{R=BqttziC+Jf2Ys0X^n2DG)!pt{qW(3 z{Oc$Aq>jFGH16BEx4D@)QM?%r3`tchRI;kr#uso%k4lV+Q?tQ?;^+qwMK#>b_{ z>OY<~E3WbR8x9TV3m@7~>8HouoB4IJ!w*{)^u6?9*|>&Zy<2=twkQ0WS!~phi8)91 z&e;%f>PGqG17U-9-ag{&l5uYLox545G7cX;de(jIil;q;f2}&zsMn_^oll;dG=D|% z+EUXCPhL05OivsZ;p6QQf9K_^f$i`1-<5rR*nau14K9{sjJvrn>#M!h2i}Z#-t%DT z%KPq-{@1_TW_e@lPZMsedugqiHLB0YT~B@tnVp`uGdp!k!xnS%#&_y7YjVUoug9x? z^@vIsV3jz$sAziP)x)YSqoS4-MQ)8-`;(ivc=&+SUpt&XTynVio#pqBUEFVzc3_@m zLFL5pH?Q`7O#K{G$L%sbMqtWASTn}Sc8E7#{) zKG-?PW1;lF>dx~w&Z!e}Ij>9nhfhtKpO1KTxo(jvIjhOP{`VsLK~B}k@^x_z3w%v- z&VFhU-=j_|&4yFvtp;>)(;Ip!D|Pg0PhnK2jd2BHYP4bU)l;V@#RvGWAJp-69}{W0*yEQ~@~|&5e7e6{ z_rutvO&7!jxAminf=@^G@n7Hn2hVjW2hW}jO^xUh(R4mo|!BFGx znHu|ijC|jqxnZ6~F=vZLUdY*a?ELpvZTUvymXE6Du%%nYKATgT)k`6i}bg8Ln{q@+AOhM;Od)l%qTjorda720V(txHJr^B6=Y^-@u=i$rtth!_yJ9p>}*ejVvoc5c+1G6!U zhLm=w**1Et%Tc#mt$rPTdXevkp_{v`TwL6Gv!7D!X0xaDpiTxq_W9bZUF-C+ArHMe zd){l^-ZX8=X75gRy%#R@vouU=zNq#41dmf+n=QV$E;!s$-MaY5d!P8KE2EWi&y8Q^ zj5s^upp8lE%sGA2Ps@7uC|%|5w#9tK(4}@xHF2BEd^(vK-013){pbt1$&_|YQqs~^ zw%qolSE8!kx$M}Ga7#mT<7ThBy!Rb=&il*mMehfmTv9RDJ}j-UEIrmU#i^iP$I&(+ z$*E1^Hw)f=V!e#{vDM#wozis9k|EtqdbBMlDa$c7?bUX`K<`c)7Z+S}FmAnIW~y$uFXZ z7axO|E`^09n@`>@SqVL)AZ}L`D$*XjFT=8j{aU=U8idP7vfcOQ-g0FZ!W9ubTshH zy!uve8b>$s|2W`$-%s7QbehwC?K`uXJEJ|!Zucr5U)cKI;;OMr@;AF~_V3+kPi2Zx7QNL%#w36F{HYiVim$95zBw%xGl zHnj(4;~O4oOF!5OHVRv5L-cRE(NW=kF~0s$4kLqoqr#_$2SgX_vltyd(?4o-cyO5X zM?m3;D2u;-wNR*4_R?1Vm1%D)9l_s@=ns{DD}l%8=lhrRh?uC5TH$`aw*Ddhq5fgf z)9EmU+QanowTljp4)J%WJ$L;>qCfv5Bv{gF7vdcj*w-@F+Rs10J0>LB(&6*rgarTb zkJ@R$=}OQqCM{*wBS+KBazAepjSnG|F< z@bi(mg@=dydljOaZ2wn*zoU1wzk^&R7p!FpYq{1kRx`!T)Jp|K92PjD5PEr)$T* zCIk-uno#(M(JW#nO&9)~iHkyV1pCwX%Q1L*M2Pp7bbaF{UGG{%N5%L{^Ay@hZHQc{ z5dIckJJ0cO^$)Ne6Fke`Li*L#eWHb6p|p@svaodx509olWEQTrL&E~XEd;eRNuoa- zC)y8?sbr#(E}Ucv$NnlMy+)<&|M$x!hyOW(_TUs8673&l>l8xQiH`oh;eP&q&ajkf zrTF(5R_uD!-I;e&-6Xh6&3Xg$|uv zx%K2b$K`K3&YgMx$#1_EzKYxsxoP^o=^LKL%y~Y2)0j-BH>Vz781rFXxvA=7dAr^{ z+^+YEuFkqrGRSU5@93z>+u!SL2)BJUr_o32XV=V@8ou3|+^^aDA3bi(8&)>IyjkNJ zJ)(Cm>8Ceoo^ABO(mBDKiyJ4z=h~XNHf|FcxH-_-(_M_7T9&mYVe%`zlbdYK(yRgu zYu0;;R|}ub_AJ}+F(I?DVU2!ovunq$oNAlf!|Z9v#@LzO5s3xCg?a$A{HYwVs&6@enF56#_ z(={P^U0a)Amz+Z`-Y1@TpBOv$hjj@}ef8Q5Htc$I^M%=lU5_l>UY(@Tb9ZoA{jg+I z-`x=_4$qDmc(KQbr{h-NYj?^pz2!FT(XR9D*4#d*7jwSo>$YLtORd`v{AI-|?_XAJTGps#yK%jz zS@!Dd9+dNK*2|{}PL+nHt5Q4dUTzdJ?!%6(+kpw!FE}ml>fh&r>EpH=w|TtUSTJF! z(b|)4K_7j*Lp7xZOP*e|-7|4x(IK<(L8^UMOrLHZSy@o?K`-<|Pv_&uUgX_gY8^8A zyR#vSQ?us$kiF&U0aMj3P2sW3sl88I_Aa`4y>$Hsqg^|Ncb`^yy3RTI-1@xhnBC1IlRk0tdRm$iCbuvu)LSl;Ay zetJ&F;tbogd>7I5!xi)5a&di1UE_rd#Jb&lWQ!hjUY|5ZX12X-#QL<&V*SF1c9)92 z6dOPBQC=z@;yK?Sr&VTzL6KMQu3lzE^Y*4 zUZhj4PM8+iA-BNuV$sMP1I>eRcfVREG@kje#oe?!FB<$*9(J*CQjYn%vdMSj6RTQv zJY7H5Y-yQ`{a2b7%{LZ}iEX`N&dXP^EiOOw@0~Y1)?k0R>O_+Ji!S%_@5D9>e^~#- z(swUx9P*2+o0var7MFJUMR2DT9ezLJv3QV2(wS=0;V<;(o|eZYR#w%YTjX5b_SA!! zzc1=nl{kA`kDF7f?<`sI;zRtr?7LsejZ>2!4r-Jcn|^kP-)q~Rrm4xzoys#}v!5n) zce-mCQ*vsD_i5`SBbO8^wv0E(J?pi+SAmg9ihS_0%t)6LXT3VhecJ3yenHz#;RQJiRZKoR7{%V!vvOQ?3pGrsYxk5XNqd)z&*i1G4kZkJo$ucfIe@D=tzbY*Q4RVJ}dQG{ZDPB%h z8H>`7HJQ?&cL5#b$3eeX#!JDavWM-;=%}hKlMIejOnGhl(&R(3v?DrZX}@xVvB_Re zpEBkdoT-?y(6+#+j84NR()+z6p~gs?{BaQdT5?LtH8uJ`e-5$=EIGB<$LyiwQOmfJ zoE_eWgd{o<-JNKczuCsC0^M6gXZn85s4@C}$*w{AnTuQscT6aiO*K}f6gbuXDQ&NY zw%2OV^`32pNk5GxiS(1fk;M~FTTe9(O>uQf&Wv?A(S7oAIb!AjdDx|K zuWxNkz5SJ^lRTRKnr%0(aF<8JX&F6ke9`96>0=&^r)@S_>l=FLi#~_W)C48m@Msx& zrR6%GHX$k0_hy{FaZ7Kak0K<^I;;M7#WSS7XZtK^lGOJ$-l~7xSMzM?rHX}V74tu2 z*}l!E4)?rb(LS+G`QVzWTs1~z$LisnW>W;O*wQVN1n@;yA0hDQiR<#Nv@ zy<~OU2u)Y{$^#ynj>vcOOxP{xUlZ!zd}w#EAT_7%P2Uy)9nf z+lqt3Tve^!&>4`wF%*V`S? zBX?jI*ZV#uxy8=a2J;>U{+>4FMX$Mq=GE=zmE9lUXSt%t`pA}fQiFZeJA-;g>7ZiE z7?;8z-{tZoqn*i7)Mg^n&o+ss7D`>AbkI@ib7$zY>^83SYu|t^HmyzaQ+5ri%Af`! zwZMn%HrQMnuW~g?PipRTJtNiSeD}#)Y@}RAr)sJDHt~>}X>h94x|7n6Nv(TVyWgmz zRAtO_DI79M>Q!TtTTn#Db$;-#v5E~LLEhF;%fSJe0u)kiH~|G8Es8D zRJ3`R)QGjzWr{PVreBse+S()`m5zXV?vbLjjX8WcK6?6^U}nG{*->LJ=~WjLTYy>IuYr|hD_>Ar%6Qz z)WfQiLGBK3f4}6MLHbrSTu9wvs&ORs7JX?uXT3gmvmzRos24Bxp+DWX9XyuV^KkcX zCcarFbF8xTGB~qi!snBfMB`PGL9R4jwWfjM;~*OPsOx%{W;Kr(tLW+L*fTug9Bq=$ z#)!uYhWbSxlrJ4*IOfYk88kwD9Mt}TZ2X*qD|(;vGI_A;eEW(uK~cLWPpGSMX?%Ee z!tEW2$AVI7g66Ntwj4KLpt3;feBLVMnO}mwQe|~rn`tt^i@;}{-EcyN{)Ek{8-+Es z;L|7Y6%^fSuukfHxwksr@#z(k8k*H%#_1NfIvn@u_AF&mR(m_2md}=)yi)hQw|Vq{ zz-Os*BI~>z+%mdwLB&`5D$?R3TczcBiw*ilcAvFb)1o-NB0f@VSNx#6S?b)1rR9-k zjmjOH=Y6q$et)s)K$+q8GQag-jfu3oblOBTDioRrmW8a3e-+u$$+xpjUK7v7E;&tn zW$ld0TYDybnPXZ`ozJ^o*N0IfirVJ19wTe(?rpcNtdRu1B?P{JvRQW*wvX%ZQTo!B}A6R9! zvA9{R!Krfn6N^?==}kWK^fU4jk5sj|cjo;Lzn=%jEx7zbaLC_YZ8oKBcwF-S>{iKH z#>2A=??33%yU=z==}y;u<}a+~7KK+IZq=vC&F-z{W8S9fW}hBR={>XWnO@&aRPAc_ z$IvQunNsQ$68Wgn8Jy`pIa=Xrlu8Yj+76+HxM)gzAf{cYBTEY?$e3aHh$WL)0oI#8Ufm*(bGMIrT47>YGx(?!UZO zhH+W4n#O_R^$QRCyCii6bBB z5M8B#$xoI+omUzHXuN1L#ZkyGPER6qejbS=f(xmC3?H|q3_d_TK6giuqfmPiipu&c zZKNLR=(HsxvgA}nKm)~X8jQL->0AAAc9ymMJia7-R;bth55WdI|F|C5bJ6#2hr43u zzDrxq8@jI9EumXFMXppAbjBo6X9G>1?+V}C@zEan?m?f2Ahw`0nwQB!uUv=TOTMgR#8il0p&Fb=9-rHL(?)bESma-wsYDQk}XYM5t^iF1V zlc1L}s$xlf>UzadoF6adhrFp#Qnjr5&741^sWi|H+{P9%a4g|aO8p0-T6<2W}46Z zOYIZUC#-NUfseZ0L+`tZvm;ynR8C#5aYVn#g$JdsC)g(izEEx~_$Ick?ZbxlOP{>Z zn_N`y#6thrmiws-#%rqD-77j?-Jr6(e%#`vqjz4jj7z%wqOC)*QFZHPRo3mwr}fTv zb^GP@q)sa~e5$N&+V5fGxTW_Ad-<2=4%<3*Ub*e>OZs_kS$6Dkwb3Whd3VOt+M!kE zW2&N_pr7gKa+pTie_aWsf7RXHZSTdY*Sm)-m+xx#M^H(Q)Ut0AthPC|&rB`J4)Rpm z+@{N=qCw>usU<}f9!eWiql8pfx_pf+DGKtHE|h+&fv2IG~ zqKqysKVRgTt#G`aAziI`|M^4S=S#)U?X9^}58rtmBwQ1vTyA~Frc26v+I6f%m(OE> zt5crT8-k|(nPh0?CAD&?t4SBZ?df`u#sD|vXSPegJG*#fgWi9%=*)H0qG|k?(6+Hk zB3Ng+bV=LY$*@=AlalPSUWU}`Kljs3pZjS&8Z18d)5}3)7t$alk+DkZr)LAV*h^Oo z(sg)kzk8&RE`aGG;Pch8)W?5Ik**26{&g)KR(mqCE2eDOC*LwiW<^)~6&?$1CB)VK zF-%`_)7E8EOjmOR@Po^(xv91L46JtH=>T`?fWcYZ*~`T z;ZL{fP4u1}5+9n~VMg(#TXp*o@V=Uq-Tv+Aj$(uN-zx0BwX*XLH=92v+gt~Dj^?jV z(@an5b>&UW5_83NA0nHl6_ivYmKmp%M_N+H8g*9Em{3lugHd7%ZPb zx~<&QGjW>G_+UhveZ~4Y1_vXXg%!1lH9O+nF|4>lPMhhGM$e1ptX?`h!dANCY~J!= zC;ODaFS<|8`)0@Q`e&!dwyUMykr#G7=UKnhn_N)uM2dTr>AlnKtIak&2tBcM<_nv9 zg^jCQnV0#-Epo1En7e1x*{ivaZI!ue-kPfdS6%*ibN!gy@4vm#p|IJ^6_qhNRQuy9 z`|thj*N>UIM*jAC&e%KW5@tMp+F{r2RhJ*S>v!MeyWiB~l__z0}m{)o) zC+_@-w=LqQl~$Wyd$4H%yh|1vW8R_FooZ zf0YRux&?&JAVqLO3BWZQmtjjLO1_)+m%A8WqnnllCk=wGoaROy9$+pp92*i>l7-H>re}F zRv&7Gn$??HEwK7ki%NDZ)O6YQG;$R$Q?WL#&}jJaX|!zIr7y8)Sbc~JEj#z3T43!( z6nXt>WxPF8b+BBmlJ7&S24$jBcPyddg^hnonOu6r^pEjbDN{1$DP?MQo~dW?eF!p@ zZd+3dS~j+;w1QkWPNk(5sB9DIX3Ea9T2L{)YC&XuK}{fJ`z5I8o_n=iCFr(iwOqscf?7cpr>j>X7uoixW9qhh zwSpE&>E=>HV8;i>9JPwbubWG|VCRnxknMvI#g0#{VSH1owftNW_L&Y10Z#W=G#cIx zL>cRYYEjOZtQJ)Yb`C^(Vye3jEp-M~reN)vMqQB|GhO=VjvKU2l#N3gYJR%=(8$zs z-SelRVS|+s2YH!7tE){Ts2P(rf>vPrl?!a#pz2_p*2t-K=$se2`s3OZv}4^k0-SDp z(a_3M-E*&@-7;-TTK1>AJsQH8UZt955C0tNG)jI=qfrU0?b6j6uMd@4uFEKmijO54 zX%N%hFI{4?afpo7@WyXo2l5daekkj0m$8jCf{Flc89n?Qq9I=9z z0Ug9?9tU&?KnKkOcrMUE<1UW_I%w=)G*0qdpo3ON zu{gTc<7Gex-Ou51KnF2^#{nI54b0AA1+v7SEfai464dn9rP|&?nwhzE_x+@6cfDXWOx_bz6feyfP zdgl=20v!U-0eG$eJXg@%nVkc`bGo|;;(!ji568C$bO4?!=>7rA1w2;(o+}hU2jIDa z?tAln03CF%fX4wH3Q!+_=X95rZ;$5KY#)H<3c7E`%lfDXEs#`X(% zF5S=OaiBf`&lM`51Mply_g?uvfDXWO1>LV>xq#;iz;lHf=m0#I=JtFapg!oi0gnSZ z=zblK13CcD6@cdoz;gxQxq_aPu>As_)58f62XxT$557I11Mr;gkn>zHKUV;rOV2gf zegV%FV1BLuJXe7ExdQN90eDUiZTPVOo+|*)rRM=m2biBL0M8YG=L*1c>As%sKIl$A zDC0WlrZ0%&;}ty-0CBv3&{F~s$J>?kOpYHj&;fW(H~V-l-TPwO13Xs(p3}oNkPCDG zo=eY@`F?>8z;h+wIX&#<+XFmT0-h_u{G4v!gYAL(06bR$o+|;*rB{^LIRHFY0-j6H zv3V}255RNj85Pe3^#ORU1Uy#)o+|;*>ES!yFPNVz0ne3y=SskHC77Qp0ner9_Us%0 zo-66TJdXo90MC_x=SskHC77Qp0ne3y=SskH=~)pwX25g0sSn~ny8`obCE&Rd%+Hm8 z=k!pW?-%e~33#pqJXZpqD*?}?wIuAk0G=zs{9Flmt_1UQCEz(N7~uN_JeSt&u;T+f zm)3dkI51uTo=a;#crK_9z;h+wIW2tP+XFn8p1rgE0-h@Y&y|4ZO2Bg^;JFI$Tm|Om zD!_9UJ-^gFW?Iw%%6NUK0MDg06MTETK2%_SEj*4z;hMgxeCnBrDybPdw}OEz;k->8RUZc06eEfav&G9E5LIV;JFI$Tm^Wp0z8-2 z#ISP!=I1KFb7{Q_&jmUF&!shdJQvgl;JFI$oF1U_?E#*v!2DbVc&-9GR{@@@0MAu` z=d=itA0Obk3h-P7c&-9GR{@@@0MAu`=d{=W93Q9;z;hMgxeD-H1$eFkJXZmpOY2`) zdjULG0iM%hQ=SWWPAkbk9B5a7=PJN+TC4%K2Xp|Q)5~GS4qE*N;N=m0!dgZVkF!~@#{^#OP; zt+ir00MFHc=W4)nTG^1)0iM%hY@Q2vt_D0;1D>k^&((nEYQS?f;5n@#<;Mqjt_D1(MX4Yc z)Cb_X8t_~Vc&-LKR|B4_!Tekec&-LKr&X?82jDp^q6cxHT>+k}0ngQd=d@^%?*s5$ z4S22w^K&)exwHMUchtdeJdUZbO4@f0M9jm=NiCs4Va&60MF@# z5`KJu=NiCs4dA&3@LU6Ut^qvP0G?|A&!u+|*f|CBa}D6R2JoC-j0W2S;}zh!2Jl=1 zcup@`^L+rGYXHwRfamnm9@rkx0eCI~p3_T2Yrmjs@^s1Uwf3 z&qcs<5%63DJQuCXiah!vauM0mGP#=KjTEKHH;JM`4!1e)nt_3{T z0-lor5!fEk0eG$j^K&x7;M)T{*8-kv0nfF7=UQ;TU3!-RgXJ9(0jEz@R#@ZE?vHGAgRv%Qx`Z<-c@rugWxJzZMpHmso z!Feu7UL`tpL}fq+=Q+gz9h~PB2XyfHImH1Toacg!^PI|n4$gCm1NFgqPH~_Lsq!FevoIM1mJv@6bYiUaM6 z^PJ*9yW%{jIMA**&&f*)^mER0iUT@0&nXV*;5?@|po8Lsq!Ff(`KnLeJ#Q`0h=Yov$ zoXSAE;ykA~(5^VoDGszN&U150ME(D3+w~v06dpG zv2=MP`FQa%pabw+06Z4}&jrA9GI!09 zihvHla{=&N06Z4}&jrA9$=8&f7r=7?@LT{q7XZ%%z;nslmG2j{E5LIB@LcjWW!nQh z7XZ%%z;glcoD3PkK7bCua{=&N06Z4}&jrA90q|V%PG#o+@Lcl9<8eR-;JE;JF8R%| z?E#(xe>fUSz_<%|E&!ejfae0>x#XwHj}MGjfae0>Iq-+0;ScBlJQo1ZC4X;re1PWy z;JE;JE&!fO-pp(tfam1!4dOt%0z4N0&jrA90q|S^Jf|<0vtt4NaDp809Qeag9M=K- z;V6#R2k?iZINq*+KODsY9f0R#hRO8;p34Ex$#r;JF;k&*gyUa=>%R zYh1Ve$pO#FNgc!i9f0R#%novSKPPu}9tU_X`4_We20WJoo=e{a;JH8t;5j+CgIu5k z@LUdfF8SZHV*xy;k0O9LpabxnT;f44&;fWZ2RxSpo|Azw-v{8i9PnHYcrFJ#m%O~$ zxdJ>V$9fP4+7;lrQ5k4g zfaj8LGCMDT=W@Vv>6;8Z7w7;y2mWw04FKa6;JF;|Tn>0H2RxSpp34Ex<$&j8e$4d( zp34Ex>EjY07xZ($b2;F-9Pk|Y!_hQ~*9Y*26M#P)mGSye0G;bmN`2qal=(Yu@55RN!+6UhU;5mKp1jO<23i!hb3cz#V4@YsJJ^;^w zKOEhD0XhKBfj=DO0v&+oWKPe|0l43;06dqzZKBIT1>m^?@ErKV(XAy=AAsk;AC7W? z4#0EzG73Ltz;nsNo@oO-2mWw$I}GRmJO}=8lnZnKo&$e4$_4cScn>q7Q7e^K;-2M~^T0cm@37D2}%);15S}KnLJC@Q0&Zpabw6 z_`}g7SD*v%9QeagE@)SP=fEG1aybWqKODvJ{sH{q=rJ+S0eBAl;V2jA06Yi&aFh#l zfc0>|AC7YQcm@37=y5mbAAsld>0~xe0)M#L?-}Zjrz&v2T?Kd!{NZSOKnLJC@P`wC zKOB_-9f0S+AC7WCeE^;Ve>lnoIsngAU_Bi0hZBH59F>874tNgy;V2jMbHH;I;JFIS z&w)Q2Esp@>6_}p`e>lno`~W;x0iFYYINBbl55RNa4@XNnfDXWO;15T+KnLKt3e3-e zKOAk3>j3_6wB&^MbKnn0alC&3e>jTcI;8LQ>h^u$4@bFt+y(w{v?PX)yTBig;&^?i z!TcQf!%;5K0p{nxAC7W?4lqBbueb8^0-m2s-&1Gj56sVjKO8L;0y@C_oW2GN_5pN& z=jZfMR-OxZ4*cP0*%D}1falWpD%m*zJg1Lcf;gZ9%+G;8oB;gcs0`Ex;JF&`9Qec0 z_CWstJXeGHIq-+0C0?LD0MDiGsjQ*IsngsKOE%(9f0S+AC7W?4lq9l{&2MX z8T1dpbKnn0xuBl|o&$e4$_4ET+;69^lk++PJO}=8w2T|n2jDsIhof9jAAsk;AC7WC zeE^;Ve>hrZ4(bEkZwLNxlnd$u%+ED|=fEG1wg+?oo&$e40rf@w`~c6-fj=DO0uF-txd!lD19%Sn;pi0v-mZW@9K~@R zz#op{czpnWIEv%_9Qeag9Pj7AAC6v|06M_@9QeagF7F?}ACBU9eE@$risS7{1Uv`+ zaP+DN&;fW3{NX4U=m0zi{&18F>I3jx1oLy?4<`VBI4a})9Qeag9MA!H4*cOL7w7;y z2mWyM`V8ke@Q0%~pabw+1oLy?4@cVr?F!7#MZj|r@ErKV(Q81UT>+j0e>lno^#OPe z{NX4U)Cb@>@P`wCKOB_-9f0S+AC7W?4#0EZ4@bE`2jIB~cnr@Keh&QMD3@QC0Dm}& z%+G;89KF^Hbb$Fe@Q0&Zpaaa$fj=DO0v&+oT5!J|_`}hw$Uq0U-wyoYC>Q7e^K;-2 zN4Y=;n4fC_&w)Q2y*>?e0G?|B&$WQ(z#oqG0s1-MIq-+0SGmEs3wRFv;V2i3SAgfh zAC7VX2LaE4KOE( zImH1Toaba$0Ce#AImH1ToaYn=bnyG_6bE#0o>Lsq!RO~>u>f>%o>Lsq!Ff(`KnLeJ z#Q`0Beok>f2j@B2LI53{=M)EY@cB8#0Uey@6bE#0o>Lsq!Ff(r7C;B*ImH1Te11-G zKnLeJ#Q`0h=M)EYaGsO>2GGHIPH{j7pPy44(7}05aX<&>ImH1Toaba20(5YmQykF2 zc}{Ua2fyD=aX<&>ImH1Toabb70(9{CImH1ToaYn=ba0+i9MHk%=M=|v0Dm|+@Q0%^ z&U4@oM{%6zz#op{IM0DU9K~^-1AjQOj^WoOz#op{fDXWO;15T+ygmfLbKnn0xj+Zt zIhp0?J~Na9e>f@wIsngsKOE%(9f0R#Tmg;+=m0z?GXb6pcnk!f1MLd%9QeagF0T)o?6Tvdi6bcEI%q-z;&{8F3vCd` zb z2jICJ@ErKV(e{82z;ij^Iq-+0Lk9H$cnPWR;5qPzqg0p{nx zAC7WCeSrBn@Q0&Z&_4jrfj^ua_`^{d&;fW3{NX4U=m0zi{&18FbO4^q!TekfcnfU8fDXWO;15T+pgsW4fj=DOf_@Ho4*cQB_7e0Dz;oaaN4cPXfcZJ_hofB3 zKfwGP_`}h#%Xtp`;V6#R2k?iZIIaWu!%-Y>SHK^R;&^=ke>g$~&;fW3{NX5jTc*CoIoj^cR#0RC{aG(d;(bfZ~%w?O(qW)Y^ZC;dNth(x-lFa7gh zW%Nc8UBBp-Nq%4dD5Hs*c9UiQ<8nQ zmX}e#{3|Xx$~!p3KT2QE-rjah@GO4|n&HZAUBknpEocuGuC_zN0>Uk%+n2R}I8L-5 zK;I1$l}?WIs)b{J@^qnBANrF$72O%6hgwv#4*zq+Bnt-zeZA??-cix9QT_q?dW21t zQqb4y(q)jdlfK^PLYak3U+@3Z@!5`!@}FVx`LJv~T>S&aM2FI!G!<0q9=cXd7Yu(K G`hNjv6B)t) literal 0 HcmV?d00001 diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pptx b/doc/cheatsheet/Pandas_Cheat_Sheet.pptx new file mode 100644 index 0000000000000000000000000000000000000000..746f50851696457aca0fa1189bd9b96b5f558141 GIT binary patch literal 116861 zcmeF%gO?;tm?-?VHEr9r-92sFwry+Ln6_=(wrx+_wrum6kxfr;dG8%zd-kSnSK9F?vpHK92L#9CqHd94S8(tE&+ zO~8#;Vo&_*N1Q7}5Hm<|0ApK~I-W1SZ8Pul?TJPSlbyf@Qp3!KNksZqngR^d^XcoV zMMH6ZeW=_D|417An)Ke)U!Qin_{0UQ1cpe$u9NN~WH~>@DuIQFh|U`MEnXR13BSmO zdg#_JXs0U$=vVrVd%-?5(Zfpn_SZAosD5|D4N<>nfDUP4#B*XM0C%YDNp%uGE>m%r z)R6p`qDX}6$`) z?R=6P7x{fCI3QoO)8r#B@mibu969+c*CNA2TV>1Em~&EsC-S7uUkVRvmn z+Xi2;+Fwc)*66e$9pXWUi(_;=KDmk$S7-X#$vEaTiHPW$JezM?1!MGeCy$^D42s;s z;qDfNY_0cOumxxn9Tz@P6FhyHE@bRCSX9O-EP#{ZZ5{}(6xe>wDu z-|G^u*WYZF_nXs)X>cSm2JCZKa-f)8(-Sl#x~ z2Kg$X4pyhPu1p|yxmRtQ%FPB3wHco=pN$<%778vwe_l(cBHK)uR>-}EAwuv*y|)MZ zs$jwc|5qzioY#pH0mr_BRA*$#b`4o;*%?-<4`WU${ooS0n5H9W9*^)Vo+hUO^9Z@K zSVL&36j_ro+N{B87)nYGo+Jkzk}^SH2AW2uYUr;_&l|I0jKpf42h7qhX|osC+!<$; zdG8r6(X6A6qA=THboC+OR^*ryMK&jI8GN}a++Wft6o+ocxuG1q7Z-c+#Me2%Lj3@a96O*VP zF&wLzt?}_UCJrbiMHUp84}U&o8k5*7f(3WJj*@L|s?Lc5ZA(|dvp7B~l}TKeMpSt% z=XM|%53(sf?egLkR^>DAlIRD_m%a%mdh)!zo6kjJiK85(nTa{`hmPkT|A-a zR@A-xelSuoQIUS@91+PI{6tSTMEp~!aw4gRSl=Gqe^6j+@KXv-f z1nC(4)NjDpLIc2HQ+_fiV1mFw#6BKDam+t3utC(QlH{C7P<_Osb5{69wQ7R)F>*;D z>`~x?s1YOy8IZy9*rw*}2z6@Ix%W~K`RKb&&#QRfo2kdhA^ucne7}fMllt-2wPH1- zw_b-zZAw;N`u3*DJB=oQ9{tWfA$$xBTpQy%#Dm4>Hc|R_$N`k&m7jpeqUbI|2V>_iHwqe zs?y81DG4xHKJSxPAB7V26jXFDZ%_x z;v^bX03o@oVxpR{zK-$hdU)Ob&xWx357QHO;Lcl2nS{rBfef8Yw$Izzqwo;L1bt43 zcC>PjEEV;2xhc|^v(%NvE487nBgVb|Lp-aCd?kEA?7H81tNzhtgjTSfpcV6# z06XD6%ASP-;;s=D#YM*(YEm zQ_rIKE2q%fwc7r5_zL%NNS(RG9X(I2``~BvYU0>Q z5KAzzU(sQ097uQCCHX>u!AckJl4KSQKmSP{k3UD}$c6mVJ=bPIm-IEJgGj1sE;S<0 z>6&+3SwTkxx^wAxAWn)K_mm0Od34`AEJSt zAev$DvxEM}DN=Z997#ANTtkC3iA2+MqZdFkFGaHj>xk-Y8Ay7Fqk6nKY`C_La%EP# zMZ&;Iu?yl8AID3o!N&VVPh_zXc|=)wW7XYr$BiY@uFm{j?7G!_mSo&<%Oow77M|nu zl}CzQ%zM!#u6L?Z%oLThK$d1&*04R_bhwpT*Qu!?*R<+sKt^hu8_iX?!$W~j(#Q>c zCWUSJZaa&b4$I?%Rm}bJhM4?|HdoAkLbeXaX+`S-f(z2A%uP?&E%tFIU?-P8#-l@+ z{~%EBa=;bkZ@$olvbcu`+>qUO8<^#sc4(O4&|LhTa{3K6bXs`#p8Ep8SA+?A*?f2& zIcUOrn;(R)r7;bZa)l6(m$S|)pk{(Okk))m@unP1zM;feG6|FghsPHSxxc0cAI8~K zyJJ1FaUXBneLp{ZGatotevF=V87&{2Bu%dpD9)IUBL7UHx^RP6%Je=%f+O-w>Q51^ z%`mlv_a&~>JBnL?Ptt6cA((BpJmMb*wB#yqIFy;D<`7UXtnz}POWac|M_y{> z^3`O2I(Ak6Qrs60%3SlNq;mhHx;KY|s`+8@YJd zv}ylEL4r=vm5%ShIt6A+h1c=kg)uH&H!aUQYH%SWn&mujcJq^WcLF|kG0v&JOt#-q zdA(BBv(^y$6yqo0p8yOj@HY^5XdlXbPzB3+UdwO<6ch4d**cIc4yF0T+{GnJxI#?1 z$%BK!L)#Yu(GSK1Ox?#;1+05z4On-s=S$V4p%nQwC->&FcBM2UoALZUl!`@j@J=|3 z_(R(?dsEwavW5N&&8_{Lo_hjZv5Vn2x_&LV!njuDXCx=#aUz}JhxR;*n_mviVeGEr z^?^Kdc_qt6n7*;Tq&Inoz%SmG$GP=Va+-GK48xB*p`40mP1Ctd&m7K0jDt(gLl6oi ze8!iN1sRCZUh~BYLj8Gh_P-cvZ3GEiC&=pI>E3Rq>{Ghumm;{I<#4eQ~# zn8U$cEtz%ux&CV!>S_sn1nMf1!(4h|Et~mmGemN#9j4idU`YUYDVRM9+3k{6xNjeU ziS{^JE0p@$&9B9oKw&Hx4l1(^0d&aUmM8QUK>r#)0Q?7cXv`h{<@m{vPRqP?_6MD9 zva>(uyO9oB3l}Bm$SsUg0hyEkh31Z{HDZ}vL7O=#Ekztoc!dv{CT2r)q-Du z@h^CBMylan8({R;=9A#f+)b5Vk_6BKD0O!7hx%SDl$j>9Z-))_%u%n#NnA>%&@*hS zqSV&QS?>2ZTum5*2~he=ccpYeVNogKo;yeb*JacylA4Xs>vL^kA*w2ut14J!i(?hC z$k4WR%1Im{Y`3^@3S9HOseNbg|hh`MC1F2i5kgcn^K{8RQ<~6~{4s z9id~TkxQQZWPz&bwm{x_k5yYRS_HP6iS`KzF=yjhW}lP2ie62PfxGeu59_Tem_JR0 zATt(Yow|UJq}8JHHcm_xjK!hJWX31r<}DfMsM^>1Cr$?vNCDzbk~$&*gm=LO-2#*= zeT@JJpJH<=;P~$gc|ZU|5&R(jN08@5H~{_*eh@Pt{{jrHF@n~Y`7Zel`4?a)ek#Ht z$bXi+7yu(Q!|3%u1H!O^os)pjyQLr?K2HD)=L4gIJV5r{Ujl%M^}+psLFfe#Um?KW z`x$K55vdF`(?tLP63Hhf@&4{rIXzYoLVW(8xfdJs(d(Il(dr6cZ(qja0M7&uH0IS# z63ST71m25#nB4lSwaQxOLf_;LWfHIJSE}dA-MNf=07mZ!kjSL5g8FsbS|5aGiffn9 zgW40lC2`t%G|sk>XkFN8d9i_S0&G3NS(mf8np`66~sOgb8b_3$$JBo(#z8r-IrB&nKWkZUnI_JIxCn zJ*8*l4d@*mfG409w4P3YOHeyHKN6rM2sOg*!d$R#AsI&hR<>Tqw6D@ir#qp20L>g$|d?y9mUs2oOprQ+g9GuE1o2W9$k(UDk9`BrRmt zm~N%MeCvTu@aNZ75Z$z#cGWZiqR^3mdh0?2MtTt6F z{(F>{o-N`V%}`-%t^UfuSCeNE-txJ#IR4SD{o&on=~q{jtl1L5Lr*|^Ax8o`;LtX1 zC3Ij9gGmN=&aiE(RrmD`c(~$rpD}?1oH#HyvRtr^HZr6N@6subtWHhDgLAQKN0i>e zXE=j>z{>)Y4oI%3+LNeDd@O1mkwz+j#(+Zsn8?2OuVjWP9{x1vEV2S3OnSp22-1@iCNb&&accI6O0G_of=nM z&iXN6;&Z{p=xpxYiW^}zqO4fQ&oA<{=l}Rk)_b}YUv+t5DG=Fl4O;8{=oR2UQXE4+S5wMT0fR%XHsjC9#qY;4}KND0|?8lVYv|m&Kg=r3nw1Lud)Xh z`pX?)omej,V>$r^F}g}J49XMRTEi_r^{TQ9?bI<=SlgT* zGz0V&X%HV400Jn`+ZSTC4~h+aP3Pwe_zvBEM~5wKx-?K1SP#DYC|u=(pOQnGjeiiJUstmiicebUDYg$4 zrj@NtxU5Ll5f$Nh&w|+Y$OP$iB7fl2iRG*0RSMaWR7ay5CmbEp11_qq7{tBFxJntx z{xmBng^#M#dTZ_BLD}J_`nK!}!BtL}G0EL+l0_x5i_chW+Fy5cRKF-NgC#6`#Rb8c z+JzC2qia{?)+IRfav7BI%!_Iz&=_l)qIzK>(uFM?pN$)Y{q5Zn!aehsx5!9w>k1y> zzE2NO5$S1rPzJ(4?V28_Al$buaDwXj+6jmkW|DEk;Ej?qXa?3hK^b6Ti)?xbj&Cnh z!5MV!AkHQIaNGe3=I~w;p>Zye9J$a#95=wiK`c{Yl(?ql>3*`;vsDl z(&-y`g^sWwn`BZs@}Dj4FSywzSWI!6LKkiL5d8BI{k<7K`My456v?Oyd>BLL%kvRd z?sQU+X8|E9ek+mGD%c6=#fa-g;V`Wyb1PUZ+E1u1V$QeTosIUw_gIa0XJll0h4sCY zk9gg#tBR(;r=4UtAWv3dg>dO|Z1naK@UP|ysvn@IsPg`CLfFIw&+v9;XpaS%6)Xw^ zA_OCBDhyS&*JWA@=m0mevZ0K(&4OA{MXj#vcP#i#i{gIZ-Vu5GX92^MZEmghB9U+H z#5D@BJ536|0-UTOQPN>8nL8aj(ZIKdHt-HlcI)SfGRK@vVEk94($e^z+>xthxAy0m zSXbGDOQVr}sKnr&BH`Ri0;ooZj>r&D*>6-vv$t-dhQm+asXxHqe+#xy=h9jk<3-j@*VcJqY? z!j;a&#*TN+CY8QbDOS?Y0-d;A-UHs6A))-IaVG0C_v6E>bd|5;=XsrwTW-Z7Y z94`&eJ>GA_04NzI7%?dTJ79uX0sY3e`*UGyS7JU!Z=np@DY? z^B-#wl0Tjk--Fu&O+aQ~{_(!3c))`=&nn0SWd`<-M-X$kCy0LY@r(Ea10Z|wv?);%TvET%`&(WNyUpCPJI0gr9}SVorQV$tWwxe;Qf~-R(w=m(w198Yys<@ zHzukpg3(6@!YONo!7Cbu_a^nK>=Ka`qVyN1mosPW%hf6=uxeZl@ZGT66+Z6S*Lh~O zV#nn#AL}64qjQ0d`r@WfG1<(Qni-+O)6%^!Cr;V1{6C<@_N8vSqCTggie&7!UH8tq zv9721#9zaFA1-B6c$Udh2T6{r`HJA#oJP%yIJBhle7H+L@qVda3hVnCmGf}dED(_+ z=7{+b-n^rP;VmN5kiqVMIC*4TzY@(W;tGnc1M14GcqDNxscTN#9zC496m-@s#!dNh zL(0lz#!EgFckf9^xlS&A_p26#Kb`}`47%`t)Bd?5gd!w}kvr7SKK^m1Kp$TyuOq*y!oxYQ#0Do@Q zsc0^xx$06l_9{J`LT(dM#ZaFGRmCv3ZQiBa$`x4>tOn>G0SZojo`1>h@4MN-hR*?3 z{UspJ+0*FU0(N`75s~Um^7L%T1mX)H00RX@PM(v~LH}zWb_e$#Tk<&C%`;E$-l7v%*`g@WoT>3rFYSi=AKvcIHE(tW(IFI0j&RPn%DXHV+Nwo8esG< zINVWI4fR4llYh}QV*`Ohfc(R&_d$cFhyP#X0X6QOORdfs>3lz5l$AK(@sf4-FO3Kf zlY#HU1yOKP`1OGNA@@@x`=`(-0APUs;hF3F1JsY;AG^?x;esXb|E7rB&nr4u&*M_Z z!!uF9B;bGGSpdA6uyRtj0Tcm#p~VlL-Uul3d9^+#j|-(aPlG zxKxMqzP+k%RUI3;u||)JS&-kTS(31LsOhB#(|X@&v8luttQz7bXPi1rg-VL=x6;g7 zwokI&dT&`bYHkMqxU!(7bm2Z`v9-WxOWN!}hvk)Ql)T~92V}K`Rj_M0d7env-B*7R z(u$@P$qlf~&)+<;s=%{>VD!F%>9oRA?N1f}2nLBLD|SRWsB2Y_fBOmNTxHVs=eyfU zYZ|j(NvPsSo+wTCfHuaSqK`=pv$}O?_e8nW4t&DC^#d5up;I|ZU5mTh;O0^&oU)iYU9=lPZ-nKhXN^DpP zZ&~Sy0LvGWjMlxfRo0b!cuMY-+;l7Z>Qb@t;M6!I}UVBof3nTI;jPpe(x#{MT@7RvUFt^v)5VYSE&p=T=vpEbeJ#OF ztN2m)(^jP=9xhEc<8`|_s%AHXgHPcoA(Qh{w)oDRbg;J&})cOU3*js=w9u=puGsRig`w1t-Ub6+V4(y%P`}V;DaacH-zoKVS z0A4o-#{1pXSM0>sMX4gro~j|>}^Woc$! z4-Vnx zDsEY}FM8C2L7~{27=u!Jk{*f}2TEcRo^-}z^-6STy}_o+NNOg+6-q4wYwU(F*`di| z#?H7mGkNQnSV~8}Z=;ibtJXUpAKYEIyy~B7iO8_064!!kDrk39mYHrRhz2j@qQb7c zRlsp^YIYp$k<(_lagWxgi_P+22PNAhChKNc9pBdrG^_-@v9$1@V~%GXPo`934*gQY zeY-F7x#Qm++~PseNKLW+WS1BNF1ln3+$E}L{Yallk5`~ey=?Rc=bnghQo1ZnjB%2^ z+@@(=2;^3voU?BscuQ5fnE&&>O!PQy8wTl_+#a7!AJach%*mnnYj}5uOBa78{)J{ZS2ecXVar9`SWulj)t8>hCyrb zR5`17aOUCuaB?E`r!8_`Bb$VDHQbyJjrZUmYv-U2UPaJ`?ol#FhVyYcML6mQ_29Zw zA}olF+?Fu+;)5nmkk$s0wJLn+{l|Bk=FR>D#0p@4ve2xkdVx-5y*l%aE8;iogLrD0 z@l`*wy>t|R{;VDsc`df`*9rq0>Gg+RhhbEqL7-E8lTF(cqG+_dt5W_1(8RYWJ50C5 z@^aBdPlUrJGIRqPC$@CqQsN=GV4$R1OCM!Wju@m>)oEYtvL7t1MGHyb9|XU=(T{$< z5WH2Wzc5OHKdy*`lg2%^cj9&RcPK zrf(YRr#>;df4774xo$I4h$vCfVPrt<<<^TKe$i&)3@0r*hiD?~Km z9?~k-_;G7bm#nc1m#>RhVpi~|oPh|Y{s_NT(^-_8ua04lvZg=!&nMH?kX-MF$To<; z*V0)y_D;9Z)ysrOh~xS}DE4a#Sr!V5L^!?GqV8drh*h~Nw~bZpx8vfs-{=m=B{OY_ zlhlSE#zs%0X-ez;-%Y42I$;~3Oqbea;?{z0Vk^DMRr1RCOMo}rUo+k~+TZRuo*Z4k zPaQ9hLfYk28$1?=JWLys83p_({yW(Sit+$PKy83;I>c?*T7sD@pEIpvV1T_79yv{; zu`bQ?6mD&x$=f6cSIyUYs}mSQ5yo8r2EgC&=nQmqXlxH57FffpGw5$|*|i*cMStc3 z4J-1O)69myc}N4#Kz%!<%3Yi%xBc?yVxsq&!UrtF`o0I-oiDZ7HZ4T@xZsq^U z-PF#H>OX{8;3L3`+((LeHY+2~;%Q~BwzgSjkC0()lRBt&WP!BsibPUTvbp1?%Sfi! zQFQqo=z~ZzM}Q3g-?FkL-6u(}P0ti~x3#5!cR2nc&(#%zyeM-EanHxifzlcUPzF$@ zTTkT1=m?Lv=UnAh-HP*!dy?8dzcsZZ2}J#;etH&Ol?Y!=tE?yYPeABJ`7ZeG?Yxm; z1H&X}NfTEGQ=;fdpdl8sMRabFt*qG8!V+noV)uDso;7pp_c zSlVd>&SNsWr9kSFa@!R*W>ogr!=gj}6@8Y;QFQ`6xdRw&>F4zAeouTCu*>pV8`ufZ zNq}`nU6~eJ*2njiulVcgzPdQ(w&{}ZMJ&Zer}mSA=V}`lM?c0$9GLq`FQA7T?(W%t zc&dbPXSXR76IC9u7>YDG`#9)iM@fUJc&yO3tx(qYAs!9;y;;4h|1oO}%s0+AsR%`l z1;7jZo3DF0*rM7-twjzmt+Z^_tsP_dTV@T6Y4`Y^xbwz%zy19tUQ%~4o2|(ddAQWn z>-M(TOe6^sLfQ zx9F_ElmD5{3|UUO34YmhQ3?>x_{ukIyrO zs1kK+1+N0W{Tg^fic37kU}}pgneTQ7qPpW}Ij1f7_Q%*7I+H{??UjpnG9qPR|9_u2 zlr^=jkF=y|7~hx1g_rvEM}SL`xj;VLO;4pyCCqr{2aP$5i&i&P8tu=7cIkmu$uykv zIMkn=q~!y3*(B{qk{w+s#TR{owh*vEqNDncCerr)ZEPkzW%i7gi7EJlSxqVRZTEth z7q{=M2N`6)H;b4OQWY*22;x|~(H@4l+rn)!ZFz-Nm&H@KHrK^*QGR9=4mRIPvS$%P zV<;?_hcG*Dui-*$No{2x&g?yheXl8p(kV=KVCH`XBFan)iGQ5^WQIig&tZwZ zIeEJLa(3lU&HSRI{E@OUKl6XZHhplImo@VdCVzl@D)l&jd)Ql2mbH;x@R{Raqv9Ma z-cQJ41YrM{*t%PX`>E{b>m=US1Nv>NmQMoAi;OH|FR^&-l|I(~j z&$p4qZC$XGZ(c0)U+))oVz_E#xH%hurxdr(DC8oDu{f!25r-^-XbouQNp7-Rw`TdM z!h%>k18Eoyq8mPdyItj7pQ7Sqw`gl=Y}Ju6L5^=Xk|+}N4T%VohbT@#%0vUz&o(F$ z(kyb94@D4bTG;=oKLBrr@}|He2D$v7eM8X?1tgx1xXMPwv`*nn8K%MRBIhFM_`=og zmkcU;@=q6>Lxr|mMsrhf<74FbLuU#+gKX=|7N)`A-qeV&q*#MFUtLt?kW`JK^Q*Gl z!w}F!J%rZGs>whBp7n6gxDqI%?Bvz86c4#P-GQBUCP*F{kmipn;dPm1lup`sTTyAY z9kG^*&C6MtVrhA_E6@OjJ8}cCfYRv|K@&>BQeizSCPe~5_G(mTc+`N!8+^3YFa+@%(fV8P(OI;7xz@WV`ZB!CcWO*sk)>)R^d*Yh zPw*F9k|uP|jaTV##+b3sk&b+iYs|oHG)mVd`!)r4>T^fR2)t4CA519W1a#M}9ZSBE zy}Ft&)s`+Ad)+@!WWD7qiDrA`aa4{`IYvSUc572)Lyw(f?S8J`5l67J+bD|ho;As< zQetfCR4;D!CDu2>YtxgK6b~!bvCZQyrtaQz%j{cO55ZQI92y&Pnz^vmT2U^QQ--ANd(8FF{&(zKkat#vNVc;gI8#?y;#iMbGw5m?_mmduXFnL8K6#9GR^$E-?*3*_zG~1%0TZ=^Cg zV|9_FS`S3oA{)!%s#@i@WvGPW0%5U+#nIM3Afb?$8UGf8Q2ery^AqFyVG@l*7^g$B zPS4vd>ohE@Lak14$3W|Zx2-@9lC5x_l<+u8jCR?T1QD<7np$8K)@rn8>vL!A*$al` zTbG?C`hMCc`C?IV+55Hh!w0X58iIQ63&?DO%rKOK3lfR-G^bX{IJ{>$B0ddkZ2ZWY z2v4za6bq#0Fe9#%;n*uA_}oiYtarRif_sjeAO(<0aQYrrCJRKyqCO!1@ht(U*3F#Se>^Hloz3GJ z*dW}dVcE}eU1aTE-gM46{dpj5NN)qda4Qww7R8?n3DD%QUVk9;N{jH*pY>D#*TYS4 z{*MJ4Bxs3Zih0ky!c=;3qhSSeQJw{A$2{(A3PTgb_>b?=qLyOxjSuaL+~;lN=Q3$iHtI`ITc!|PgNALT{F=L*)NUvotm<42GE#4m)>rDCb) zaZHA7Xwe;yk}fobOi^UZzc-aJE$fT(-4496In+7A9TF>Lct>6`4zllQ>@{?TjlGaY zkERmn-5RCUySQMUevvrJfeq}EIYM|`z$KZqVLe-;%pf>z0Mr&mwAG0hOltDfne+G0 zlQ@>lv91`wNsxMn1mjAI4^ryCHJRfe+(PzKy5`+rBFIWkMlJ(@Y78&Czo#O6=4XIW z&2d&7RCC6E5!@xq%9Tg(=s$gqj4oI!%sFUwtnWuchqR}A_+>?RTQLr*f8Hhx<>U}v z*G6@%rghRorkL)zsnnsN@yaLGqj-Wwf+_bNc-NraUNc;yU6ta)9Jcb>vS?GfDc&X@ zw@Gh3kQ*la-novST?x-&dOz?SQGxdW@%^Fv&Ot9Opn99U;HeI(s~gyT`KV81Tbjhi zv9MFX+TWSAGjSk2XaIQGmKV5gqbRp@p?3&spor?3frLfCA=WiVwcuAavK0(im;p7& z#yN>_SZ7}2{X=VIrwu-^c)4P1o2Ppih0oJ-Y1P>&iGv?zmGwT#F>O=IB6~Otj8|9Ov5aR^NoB8nmmUy$rA69udfp^g2E3PXqAQB z5Fq`nQ;0r|s_C@8LMdg(bB~b=gzc-Q6y_2QD_NCJmg(Ay>X0f`1Di^zS<0; zAtX@*W_UEtM7DR7cc=I7N=hB4WRle+u~tm;^dhUe(*=&=(~P@C)d+7UsmlocW1Wr5 z1AhWdwWSNuej~jUp!*9G?xok()K72tt!Ksg?wBK6onqf@gz9NSCmxUw{Ul$HdAeH( zP+AuHZ0J$Y!^$6W%|LgKVkMgXFC4@Rl|Ap+h^H!gK5&rERP=nmg4$40vV{aUp(dsY zaxeT_<5VefObFT{JfZJMsksOZY-Er%!&Amna=m{rlA@30%7rZ;6ZSQ?{L8jLw0G(lr%4_*B^L2Kahqdp=`#`4rm0SjO{>uvz4$|ep{oj z>R3$b|G|e{zTNLJPc~=GPZm7bF_#!HP8VjfT*Kbc=*~XM-+;vQKpkY>{6A27LQ0zih@YvNN(fO}myPoja^&VIYx;$}gjHc; zmc+>L2+k#WnTE)rwZYDmjfIA%bB}h%^!mrq`$p9J$GLB$NM^PcuSkDKew*9UXV_4Y zcC}VD-0>o5F(46$F#-j?JRJ}-)O`jz1E?1+B4uzS@uOvNoaxi_5%bJtYbkm0dh+cL z6D^qoqAJ@l;&+2W0c+{TY#BOT&oAw2&CRnV_i~(wY3N&(2d_HLa4$#e1f32EE`N6a z1_AuayI)*?mVf$^bL`%~eB{pClOd2H9Dz<>fVvWQX~CT3VJusswPtOKuEx_ey_GQ1 z>F!CN8EpKSZgOeX5WD1)>QXX0pV6vH{*5KLN%u30TCc9(im~=c%d@U>L2l!T1Xc`0 zPZ&K1Mkohe#4ngZI8=;JWj5>JK!V*8-~u$)y4Nf{Uh*gC>>-4{lqytZ5Ym91x~X>% zf`tmK0MQ#Fdu=eT1vdjInas+K62MPJ%qLRdh+p*8vhnr|6+ZUxJGZzye8 zMM$Pe8UPcm8VZ{VglaJz%UB{7W37$B`A6V`^?0uv*?^cK2oc0CfCv_%_~izm6v3Cs zdojZW^N@=VT>QxbSyB)c!fx*!inP&(+IuiT&Q|;US+Ldm{{};xirh@`)k2wMR_u!2 zhcSN^9jFR>4Boy%=S=<#eyu-WCw8&PH@{?t|E^}BS#dz@kIeP?yUT6Z6{@()&CPL} z2?q_l;`R89+{+CjjHm_s1px+)!Kc_FM$p1rBt%FCfc9(g$l@x|i;>XCuIPp6fUkkm z?F)nnYmG9(OE924_ub`yU`l*9ErCrfOuvo?;!rEQkBW2Qx&Ufn7bUk@S^mB;%k~B zUn_7=s^^ndI7YGha!&2LgdRwprD7mbHTGmt%&#{(diXx9(U#v9Y)^}>h-!aGIZYdF z@*IKeN%u(GhD(rSf^aNkqRMK74(J=MgAVv50>nX#5TN+&rTBA7;K||16}k}PN{2bm zV^@X39>nMgR+xV1P)Tr*9_-FZNdyOL4^GqyK@l18y3P=Q<2d{i01~06`?!!pfYI6g z{LAxNDBH%%S`=!jP}&u09j#^Xr*c~ba~ut)EWUplmgfoMbpClgI^Ptp<`Vw8>Y2Y> zsZI`l&yN_uLSArxIzfQ2J}rLi5cF>Lw+baK45;?s-bsXoDlSr7kE`Pb*)-G8S??=G zQWd4YPiNulz-5!db{<#ZA{dro@q3;~fSH+_kid9%pcKt?qzzh&FVF?tOmJ&3YfS`W zR^a93ENjRwe#w!}rhU*$l#s(hNGcjsp#9XGAYV4@9=9V+o&BowZuF>@iQ4LXELbD{ zbWYwc<+0dD9+F0i;Q2NU0G0XG(Cln_ew%_e2P~!9_NGa zS`RDia2IxzU6`nrwLBONe%*a?a!`cfBG9~&WgGN$`?IcKIDW9%r+#+O1_wD-J9)vk zl|CpOrTRSi}@Lgte83NrQL;3h( zx4mb|ESoEnX-(|`ZMeOvgA4A6YlnqTRNFj0Zp0R6Dm2LWxqm|a#KIBe5FK(_`29q+ zLUjPt(XrLKm^$kT>(lGgXw;$exGI`NQIh*Y)nXD5gmV*NrEAv4iCatac|FZ#?eZx_ zlO89SYib*g%6z7^9*FKbe0d~YGkF+MJ?8uUy z|12S*_$s%3Ls=kJ^0=r?iK=6ZrUY_3?6$CW@MNPEnYY(CdXE{e-;4>*H65(#41ciy zCE?o`C9D0rUE+mkNfC2;k?pGI$CJqTMdc9Hc)R=nZ$q=GD8!2X_b@DbsnfD6%%yp0 zJNCR3og`-SekE5599L)+>guS5?mcCPSiAmJhh4mcR=A5p+NOtk(5fMi`W+j-uQ zky)sDL?gte9pSSigvhJEs0WS{ga*umCrIRegH=0IFti0C=^$prE{t{+k>%Sjb!$P~ zhpmO|h{8NgxSqp)KY7=v#!VCeAf&orSt_0}hQQnP)Rp&y>vL6`EL(S>)^QpwQ@=Vg z@u|V$97uia?`}Je=!CKxZG!9Mw{E($+P1XR(D6a(`QTU0tHU!>?pR{Qh|6Z-nTh&j z&Uu3?;q9HXwLV+G*OeaSQy?R++nNX0<=ZJy)s^TOfUuDDNJ$Yv_kpCu%xYGZ`7|j) zfI~U?1Z5oW|ImIVnUT7BAO|z=PFTD2!1JS5wxpN6ay#eYLFMt~H=#Kb`CJug7f!PO z?c0YFYkB<|s``ocS3_GvM!fx)E8&6WI>VC9X8v09tLk*Q_uf9I_TYiGmPpJ3M(6Zu zCTo`ellnAT7)p>C)jOEQI-GcqKqNVMf;}j5E`(ko5DNOEl{>`y;ZEh_Z-Fw2GANOR zypFj7(%fGdi7Cr4n;_+DiMHcM7^_&lwz8)@K%`VJKbm^l&i~%JJ8L$(Q!S30bgbg? zH!AK;4&AX;?@hAqa~`cqcP^a3*T2uF`^2eIvs+R~GgTAsrH}T}S-TR4WV`pSUKHi7 z!7yy>`!=U##n_z-h5F&mq012y?L5T-VMC92A|L^Vv2^ZEn|~a4G;w=h>CMM?A9qVQ z-Vt}=FYL(+eKs*&On5e&7jfN1Hr3uV5x%t$qYF6uMDCHm4VL_EZy zRQLzXWHT;C@XVlC;I8D$pPbXc8?(@N1jBHIvGV)wo3|%c{>K}1X~B*VaDw4i1GpBB zms|?a=jXM&)ggrvs}Z&DbNkbyIg)IsCLhK51=et9iA{Vm8u0@mWyZ@@^^Dt<>xqf> z`DC^HkiX_pyRUJrmn|)YFYboyJ;3Q+p2v-FX@`fBGY=M^&<%QC+~0Wc{H59`8UWE% zeM;^e6^_oy>Fainaz!1qO)fqw_62KAI<$Xe-I(~GU0_1EmE(4RXW_kgg#`WZUV%=U z!1lSqK=?kGID*skplF`=&Sl}n04eeHx;A%#`1&gxF@+=NBX|iX+Vn#NxH=p1{z`pT zb5%EA@G;zPUs=T*$!QQ~Fhj8wuz+*9x4E|NNHl6EZ@ticvi(HGzE+OUBiZnuaz$2j z6RGgFB1y(nWt^X1OUdTLZdH2@c{+EljfYN-KEJMPO(IrI7rz+sq~6Tpq+n7;F03_) z5`g&|gJ?RR8wWC^MBLnaAL==?ml7ODVmx~Z4m=E=S56waXTm%B$ULk;H9cZXV$i*_ z=`}rGcOEMo8gf za5hTx(*n<6d}1S8X~iS~41fSH0f6!IA_1X{0`ok)fJq!~I z!gD7AtqSvMI#M?amY!R#VLEpVX9K9PO2!5=Z0NGr=<+tFY&N3_g*?4GPcgrbNN5)+ zlm{#B9OKV+V@fV=U7+mzp!68+zUdyAwsVHxf*mpUm=0=!(@m5Q-oA-wje}c`FEb9> zzmK+-)PV{eK2AxKing{6aQIc5uz?zDL>fMinwntDI)F zm6CsfgzwM(0*xJ$7ZwNLIf5CCl*I&_hZmbso&PoGBr*TzxuMX^Ip=7>l8HQ$zOM+m z15zoFP`T*SYUAuWYRD4C(s0Z%9{&CJ)+Ve`ODCDT2@4)ngLb~e|$=)qE^UX&}J)V`Un4$%2w{RgSVG2 z?vsi0Rvz4OvI(2`(bvCK$;XVc!tWJz85#PRIrZz4T%c)%Hb?aJI~U$&~dWbSy(jHF|o+3Zw^s65kH z*M4ljT7_R@iMRHsWTuT-34KIO=i2-eOK0W zDDF-TkwOFHa zdqWdye`)TOyoNjfw@R^6Av}fFyu0gVj3K-hi6Xf>>2LODze*%}jpHz)GpI?MBOm`5 z=50J-r*ge3UV$pDICs1JC{^uj%b>!qgfF%yBsZ2JS>qv*ExTfC`Av=ox$F?n%(9Pg+~$++FF6_s;u6cKZgu7^oXH@VDm8B5D&OJnFqtc^vIH=oB9{BwBg zEsj7MGpv&o8~6JX~F)6lkUal7$92P4?vP z3rd_;e6VPT*B%cZ#z{?A$)CIJ;7Wi}2wAjx^QV zuB5>o`E!j}SgO-U@MTu0FKjGi-cP)9UPUBVmpo0bl>MV6S>dVNF z@}}cv%ZDBcIC+INd$DjF%Bw=D%nd;`sEw_nR(Iubo0)8I!i=oNxxSFbk;iTabefF*F{96e4Jo(!@U z5p~0x%PQ*+d?kfbleQ@1)HzC42OZ{{;Lawjp>C9{c4*{SGakq*XM#oJcS0^&*f(YF zZ@C}b{6vIjR&J6R`(??pSLQ*rkaWSa%h@Nq7$@#y_6{xKs?lvy$RHN1f43Z~(cQv9 z!fPsYg+9%?L*1lfxMJyAL}wb-_pj%kV&z}ov5`;%7y744ZmGiGGyn6#hcW6S z8k;y{0O*IYFK##&4T6=vrVXqPS0i9*U&E$`Qd{BVj!Rfq^){P_EMvvS7|`ws0xxB! zO{JX@uu#r99_I(|CgrWNa#Kz(nJ(hD($KTtikd&95jRU-G{(e_G)S@^3cf#kvM3{Q z#5-`FkAj`Z@>&-f$ggcnT)c>_Yvxc+zeZ&LJ{TPVd)@Dm^AP#v%>2Ho*_)MzAE;g* z(`=!F?g+QoR*-bx()-i7kr&#YMPt7OcdBiW@j%vo$xd<+zA7?=j)GD8tRbR#3Ii)S zbeR&U9)ZDR%CH2;i$$U^tgAtvcGqdXWoR!<@k%OD=_k!;hd6PZh%rZpC#-F^S3li^ zKfwk4`Equo)+6t5HgPsoayOK6CuGym;QxVd#U3=VbZmr0#Ct~nix!jP`$M<7acP6S zCtm4CFpSnYdruffdqr7pcc9`#TUT>Dw3jHJH4(CFISY}wyfTk$$sZBjw!9g_sn@L6 zYihIz4>o3etolO~O*H7P1(zLK#dFp&VI51abTEmKUN5Wm;9v7@`*i}ltx&kN4D*4x zf#1O$;EZ-Ck99E9Gf`jP-KZhTSL_`xTt}#%t=-36PD^aA-6J8dvW49v5nzY3Wgah) zzvrar2B!wN+UfFZ@lB~q!%2oSBC*o?-N*moqV+5F)Ic#!`vWVd#mef&h!lvf3u#Xcl{IAY-W7`IpcErZu|b_MI+8$^DyBSla_*ee@u1jAHrwsgv`u*P7zJ8HnTU# zfRTR`zXt~Wn5FPXTW&ARijOV7jbn_Sl46kbibxNH4U~V4s(YQ^$wVEd5mWE#qpI`~ zhVb%`E`RbE4U*PFUA-vNjn5VNm83+8N84SGg{-?35NZ|d@L=M6_752`k0Z{TVNmj7 z-g&B+m{r9@=#Sr3H-=wR5SfY}ZXXMzZ_!94&{}a8YIQ*Oge1uqa29%ud?A9V%%FrE zh6k)uoLoX9iCdU7E?KgDtrJ{}jb+lkKH59ZsqwsDYEw(}O!d@Q?^iO+6Cai$Mj5?` z_6w0TytJx@@1Vrpd}wmNgJYWPT=#cLs9%(I{3Eb`FaZPm(lBfH$iDHe&jD=Y^Ctd^-xRvDsZk+ zYL~7`kj5|i*to+eVLkk$hbR@vh-@Qx)HCz5@=eoAn66$C z4R!*hhM;-Gu_SfaY=~hRLIT<@t*da|-OWy+cbVElP|<4z$`((+V>7Zoz%uMu6&Idum~K$&T^KHR$cqlQ{vGCBgGHPD zYTDeV!Zi4fY;bfbfu_M0&sX$RR70$b?=MlVQgiG0lI-JKzlBjQm(<u5TB#IPWFpVrF23PJl%FGXMI zsMu*eD>V1lejQLSzQP(YP>EKU6_pk7;~)4Y=<(_DsVi_ji37n7qT5naL4?+=&=7)j zz`USwQmLjJL$@&uoKn{f%Pu)|v)*PgrQ3M9prXkU32IHLX-aBFR{-k-3i7opxu6~# z99r7$L6dg>OTcDKVmg72tk{@kHl5@Dn(qB;*h3D|h3JT%c`-1?J&2@hhEL>@*?6~} zg@}I&=zosCuKTkq+va@I()|Lc5dZ876~yRc0{;rw1WrFP{<^O+zWsvy>o+@w0IieC zEH%8gX-#Ya$;FzE-)S^S=V7rD3W27{e;CaSQeq%A|+kLw%EI-l8XW&YWd~CIoMt-%0hZNY6NI9U`Fv)-U zqlnDQlkCw+7ov>W7@emI|6XO5CHj22x^~Yy?AWQ2h?U0AN+-+0Y-wJxZ{%%0*C(;5 zZH<78e;<9XzD}8H2%nax&idSlUmmro+-aOuIhpxt{cRPLPU7_ooJC)6rs_m9>O?yD z^sox8*q)J7OM|sAbjgyk!rffF&9mO0ETU?(9qYMg+_DTs9XOgMqoSN??SWKIDGWVt z(%G`4xL$nR${%5dBOfYUv&2`CX23~ubu$)AQHfuPSW)RpTgi=}$)nv7KG#I1kQdgx zCOY^U#L${I8NN3Hs0COo;05?%3QSoGASM78oK<{wpLafY1Tf_`$qr+y4H*QO*#U_Z zVo?|daX7qkqe}(>{dZKMR}!#sjb+sii>8dnt% z{U?3Z*4K6v7$me!KoV1bUia8v4g9yN?VfMp?>&e>7(kZTDP31y&a!1!q~_y2F%qtO zLG~c%=s*cqDU33jVwNHZAaMj)e_te!#Arw`ARuww6}}=wki=ei@c3X~Bm{;<&j3rX z0budWKTu>qs7-0qgfrswUirD_>b%~u1uj&7(!aaTV{65kN@eaC(!8$5@(J@96Ar|@OW;R{Wd z$>PTszq7h~S$Qq0)nNjyrCQ$nj#TKE=|ZUnV#CAcyV1n6#Fcwb!16wb-iP87CvNmD zDl~g5O&N^7m~K2@<5jvl-fV>)yJZrjWz4tU16bQQ&wuX4f}rgc z27Y_|6<~TST2dVqj-BS?MDrc>nR##CiYf3$9c<8wR4%};(X;JL?2CBPc`~82|%E|ES=Zgac&V9RN)_JRIcAp;0-Lk$Y!#$(<@FY*U{szgs6p!FM#Z$lXJc2Ic1nlB806tq4Z0rCwP~@Y`y{ z&)LQA=7j;f-M_1pW{s(N7U5~8hg%MBD_kEey%b0^#?pz6iu~4SrK~4*StfX3=YB0=yD0|HCvd-J_E814f z+y)jbg?~1wVFPDu!KLb|stk57Cb%HNTNf)%9<8Gwc)Ey?*^vj+hVrq*1^&e&R^wg7EuXe#e<)zO+8_P64l6c(2-jsQr77 zm7Sw)ShMhkKGVcK{d~xzj_07y0q5jUSoAqD7#oEX9BQ?RHqAQCK|n=|>;P#G<$*Om zm@8Q+W~8rC#95g*f8L-uK|WP%rhNAdNsSyFBHrrcq&!JUx`{hLun$Ia7ltx;DMJ#} zHOFk%1s%ob1c6_efyVM-Rm21rW88CiLB7c95+Vi7O;hi#n11WW@fodJd zot}K;8zfx^&E6E4UE$z_{D6%@; z(Jb^aUo8}H@n&$ae76)2)Tl5+`}q4m_N#MTgf8-F=N0-ck$miJf9RJJ}T%(^t0y8rBg|m#C9o zxIg^i1k!a;nHZA&n8d_4gU3nD8JBz`3z!Zyl8j{@+c$PfFq(*K2uKnC;DE{(hq^`E z7?80M5EnlAT~?n-nb{Aq$eP5VtfSE1*90LSlzH(%f+$I zSXZLDl6P>N|2G3PezQkjtUmmiDY&cySwmPZiI;0QAKMY8I3FWfoz|m6EkhoV&k0I+ z;0ZKjC&$nvTs-ah>+{Cq2vx?3oV2YCYME{;1!`K*?s|Yw>^?;0f(P?z@@};T*)QF= z0>d~C-MB-ldVo-YGoV4lMB8FX4ud25$Wo3HAq2!#XFI*)Hp$Qwfb(<+!=@QK38C&B z#;Cr;m*wxU0J5!G#k_y+A6UM`do)+y%USx5rfu4I6OX-F#8Lt;0EmA+=kQX zNA6+D;i|7gQy1WrEFuQFQIsCrrkH?jdB;SdLe{StMyMR^J)?-6x2cv_)KXTyLSRF4 zKnNis&G$wcYtE0bti*RI%FK2&W*KijpE_2_#_!jOK{miv#(*uV?SDXAYUHm@!g?Nz zO+^L~;Qs^t2vB4wEe|MfH_!-b3kL0R?YW59cM1J60o7he6fGkVXdnm@5Z1CPiVB<= z%9wIH6&uVzaYEdqQ%Tg@!Q7upB~;<<#MqlcunBf$Tr}h7#Mm1{kO|gbT}HvlgJ)sg zM2HRcW%>oWf;Iz|283u8YARsI6zH!*`3puENwKs?p0lf5op$duq5g6C*k&UV;kW7B zqUpD5dw`|?ojmOx0eDqJ2}dF$)U9J^i2x_g-YEi9uq(&l#_w&o*Nxa7*lhg3iQhh0 zcP+oyVdt(|;Na$<=Bgmzq!6~$cSQK-x(OF)R+KicbuZ>QSp~`WHGk1x8TI}4D-}-m z+eH8g!Dj{O`yHymYYX8ofPn4)zf=%#eE(7Hd6Qj*{3Cb-8TQ7ERDc=z+?iguY{=ta zU_Jn`T<3zM$=&>|sk7uu35Zt6ZKps>s^eM`_VN7FY~zBjR@Mem@sASkZ2-qV)3p)6 zP0#4=cqYZh_OWW7?f&=3A8rDkm*wnY$uUFXSiq(Y3%L5@b+q@?%JVSWv=UbfW>aOv zr((3DTFS}Ymf?mUm}L@Az{|#AhwQe=^>h_$yXSM@c9#wpM%9pg1V~$=o`Z0`P(pnF z-Q1SYqSc~q(p7Wfo=nxdAbBx$i+hq%2iM#r{OtH%J6J=PZYthnwN^bRz zuok!+QG!5)#m+=0{4!~p+nE%0&`jI5vdz_=rEZyBNlig1?}l$PpcU{sSmV@&X@+ZN zS6%aMzbqbYjFG#ww#86`tr%cGeL?`h{=obmj zoKxJj;~W#SqZ?+ZJ5U=70mxEtsR&}7#C*x(k;7gL!-n34A;M#=d|zf`q3K&HX@l4t z(;|idOq|1avsYrbEu*jzZ0&5yIBfUW8jb^QGUD z9bx%fbO?v87d_Dn2n-H`O=sKtuf{AmfLHD57M&RN9h ztQ_7dGR7rC0ow>TnBN=*33gw_U~hjRFVn0YUzi97h!>C{=kd*jljpM3Zl8O4oW})P zM9pW{97Pg=d2SdicDckSCK4Vku(z>Hh6LQSJ;v`b346ZrAZ!tK!O?cTErsj`4qlSO zJe6oCWo}vTifp{(hily1;Rs~j^l7H{-Pyt-Y~?}ZgQ!y(bH?~JMQRDcB3dIE6bbCB z7$zUl9|1_0+aPaVhp;)nV;Il9(cM!_##icM4KhTu6?d{258!{QF>^0a!RJ(Bh_K&y zrukIvQ%vNvaAH^mP&EsX76c`b(vlQV-Pos;p)loUWTvn_Gb*t2ZDTY{~HLaXQ!Drg=c9AScXeAo^XkM`NJ0*5gEKOzXd-ZvP~`Kt#aPq9zK<4``+2_(lpJhY zW+GJkT5kiZBVwq|#a_|LGePnae$IDh?2=?gXqRTMC7_YXxi%U0YTq=8QO^D(N=x|z zVYx{+?VxscX7dvxV6J*6C+PE0Q-2ud`zpHiQ~rctkXPxEwChHe7Bg6I#af#`KG9vX z_9UV<9CZN0st|g|RJxkfoaP=xxvkD(@_}_WH8cJ zd9Bl-vp~94G1e$@Cro%s+)g;Jl?i)h9?{74NyB7wN6=F}M`iuK#WFCrZd}6V`D~CV z)->d$wg1=+3j5Am=dTYs1Qp&?7yh`3Ik-c!Gh{%VveOlooW#cx8-_xfYjeKR;uE1Q z+c@$3X!O{zssP7CU@ABpnv1AWgm}E{iYMlh`&Z*o2H}-&XJYT*PJ$uXLh;>hy1-7V zZlbtz3FZrX;-C%V?sbbNilL5%wyHrGGU>-geMc75Vw`(6PxphdR0hFH#1@%@c_YQ7 z_XT~jV#60a+_m`Q?0kNaz^&c7>bmStHkSJ^(^{S*T3L)$V~(4oz4m=}52&53T7|?8 zQ-GYvw8M5iP;Z54b8#o4ZXm;@%Z{M31>^Ilw%wob20}B}Ai<{V${iusu*)qGE)Hy9 z;6dve>YxcbGXolEtEkW0x5?afP)o$05O{7hSuLJw&uzd7eY7uh@#PQHS~Or`A+L~- zi0R*OQe0;xh9R9bR52321htEkyh)1}R#cG#PAQX0N)|I!AbZO)@zgu+4Whn8r`pL2 z+^rbzMratLtJrS11=DBrTD9PMO%-R_mW+Z_=S|^>Y)&yoViYW*IA9FZAXQ4L@(l`I zP=|k4ama*O9?GEY-RhnW^BXuc6K}n^{@9^*I|Gu2VpbcgE5~WA4N}YMgujMvv0X}T zQ*__^sXkAOXVIZjCNrC`WHCPLW8PNT8=aMC*Otmn3q! zJ4e;l9nheoZ#6Yc`XTJYtEXx4 zd5ea4D-e&^Y$JHtE`igWR3yXdeKy?pXKZoakx(il^E z%=cnT;(Eqe?c1upD#(43*lEb|S5kK~XW67Pq;%4IS@r;p7d=682lXm}wLKt&s;X=6}Fu&(nZ!#Hr%rLvy26Hk-?6o{IFMr&_xKJKy0!=){*lX-_xrrlYC9==NZR0 zF=TKN?19lZ7-ke>Q4UfX+}^CrGirHt7rvrYzBMd=QX4j81`0ol-rw|HdWfX~h$RMj z9>j7!5Pqj3S=f9M=%t{krz>NlfjR4^3LA%K=_t2M4A-l=O}w{FtrODAhNfAJ_xw(}*8^CQym>Ddd2*e7UN=2V3tIDWSO#g? zx4t(y#B+Snr4@NM>(18s&)UV3L(n?7YRbng`V=)6ZOp>sVyld90(w7`$0iX4k31R) z!&Bs72KQ00!wyzRTz^b18ZUWsX~E}*Hgo*ja!ozNt(~kynv1|I8;6owNRYi%?Lu4L zXK=^w_SsJ}%DK;PL_zNlEA1XU*v4vkns(U3rMLRUhv8L%@U-@9 ziJL@96L_PCtXrDV49QwkLfmB|FX1K|hFmMLQ3z^(4#$6$Fo{o6CP?<^z$BS4qN@9F zVxJGWCjPyDG9jI<>srEYoy}nh8>prKK*Naytdec@Cy%Y95l*5bL8XLQ?S}Hbsp&?lkZYItRpB197Zv_G_ zTp0}fJ3rHLpY#K}F!(;E4f@2Kd;skZX|w$fi455vIX$QT)=1v?EVk;<3=*MabvBq% zw(tmZP~<*yiL=M_JW~48q3Xz*n7#0ZdK66jfhUES7x-EZI!mV=ZH^U&S_d-c?T*4% z{ZG+y&>pgtC{vKSRQ_6YwVn-a|jtg}rGogNC5Dgl?(%!>S0mB`yQmf+YU?3vR=2FRF5R9f2i^i6_GKu9Af>6pU6L(}b+7iE4!W@2- z##X!P-Ln$E%uD4H`zQm(DNrF^g2;J^T{gnmM*2XT5vUwH6w+bRb!*;G*5aT`XJ6yI2K#4_<+qF(%TqwBHI$}nv|W(-`an>| zH{slG1gm#RR~P6O3q54#c@|eE)8OL-HtsIMo2HY;>csrKC#4YWN7WD=4(XA${7#Pw zPI^gqjgu@KaUV;O1J`;WyFmOC5WW%>Xm+`hB)$@z0oDmr_{$6g6M}m%_8GQ=7Z9W8 zWJ>6+2#yvOIS_jZRgqP5xcAxUmMKp03YgVPe z)IdZp=rnFZV&?Q9AqI$fE|JZ2$1o*fRfajPrMQRR;xJ)d;5z_b? zwjRXHEl1ny_KmzVKOAolho!nP6oV~|BleFse1f#L$a$b)zA!_<7tboMCW|^sL=UDT|ICDbcpIh_RCC8iXSr&%K(;kWJT~Y#n zhWK6G&BlvGRDhdk>ocMgDbi296|2@8gDbf^p#$Vjh0a#Qx5xfwQ3+swyVIzGAOHXhGynkj|0iM6zj^uoMVs_56yv`b`WDsYY!>JcyhZkS z6#uq~sSJaeZfOHRLTc-B%2;2JEMsppWHlYGI9R90O$FO2r0szh-m*D)=FGV4WH~t3 znGGH!V@xDz8j8G9WzeZnj2ktLdTV;8cTbTlBZ`H|##7f*H>?f4PN13Wq~gPQ8(LIf zf*8j+$1P2tJ`3qhq%#j5i8DeHQW|0I?e-K?Fp<}Y4TZm2I?Jd2#a?^uVk6BtC1A`U z@BFr}SnHXrQl`|dhcCCpBGEcP5rSpJ=sRSV6qg**{8JU=*3>e0ww)&z+0q3DEZiym5da}JKjR=(+me(YuJYyGI4GG#t5O` zgOd~S6M`XyCj=gcD#e1jnUCUGS9AnU*gh^kKve`d>fBUtpFLa>-9~t(-^G}#t{!N|iITkD?h|~2K z2DC6mTJqps{r!`Q*7id5PCYs2X{!@maO4NM)eAK}{4-XrZ3m#>)e}|GhswRu?lT8n zJTO<}fxaT>m4W^o84iR{F989ftRanA#U5qbOBjAYRfAzina5b5<8{DGCybex!2 zAlm4Mat-B z)mO+j&4Z$04b-U~-#?Q@=Ong!bG!q6?)HUx-q11N7avd}5~C!^xxywX9_LySkNp zK-W81zdued(CGAS`3u{U=W_yEv*RKb|I}L?zn&wLgG`;T6z-U} zU-Z))8jLNeQXh;FDim<+!vSt=1_=7{ixpL;4g-k^g_KXIi%29PABagyI_VqEJS+P{ zAk5E$3ri-QissCigZ-5KMoiNxPZWT!%+I2wTQ?4v!H9!1SHT!-A$w0$LLZnoa-&+y zU0bS?YrR=a3Jk?yvRrDN^$6usuwAjg6-lw&<6mUJMEZUp$XJK+aL%T9`6EEnSf$!er+5GP|Ib`W_>9jMF#mh*$PN)h1;eu0OrbjbqK zLZ`{!6*O%umT8Awze$q1_sNV}cceFZM~Ys`g}nH*1#oB(VLbhJUFtslCaJ*`-OpN= zG@1jg{P!EPoBoukSHyB-E%uHOczNCq)xso12#2t71)7HsUXZrwMV>t>w9xy^P(J#r{;!d7`iTjc{|W=k??Ec!;}f5+{Q5j%h$Nk z{-OuMz9{CvIYgpmQ(JrN=#o4q{~pCaQd5dsJAu}KU%em<+O2cvNu@QX*&O+1ins^B zpi5C@9UElA@BrxYp@CZXrp5;qH%?#xKt@b^u8b_rUIXcAR$=$$*L~# z%RfEHdqR!rD}1Pz?U41Y1sqT1FD}M2hR0f$BVnB>OlIm%W`C}q#Ivo;G?hK{3+xG5 z3ZDo0HF*${Tiz9{Oe4r>!EqeSMaw@>T4X(z7C0n^=ca=>3U%09ry&fqdN7rNqpMs* ztt|PfFs&JA=bX2bem-PdfZoO3;GKIQiYFxVhuQbPP%v6FJJb{Y(EA?zcold65P<(s zmHyk6_`eBD|0m@y;1AvR&w&4bKROrZ#0Thpf$e(qxj?kFm{mx}6$v2dq4}S9`le<# z_c1|=v0QFo{mHsS?~B+yd|mi>#S}k|h|$i6RV8iAZymLd>2X{>ScE2f`{ zN#7pZJ{xJTu=H-Qc9wKAnwx>sZzi}0@7m8_BKehS z;>*Il?*&W74^>wM=z9#r{i?R$pw2+u3gScM!mpJ)2VOinka$i{AZ|02QBwZUlQiDZj9c8VTGb_6*xAilvGXo}2b! zjcA|dJ*Ut3{iCI_;Y0p(GKvvD*55`Vpy3^w@ZzTB^b%DIR;#{9cXta>6S|&@;>klN z_LUu;g*1hO=Uw8Nd%ggeL?d>Zwg!U3*{o@=7N4hPn?xxI?oF1KO!+naifcEA1x<&? z;O7bpIF`P`b2@oHpCZ5|y~RapjWS`MEfU@ z;dE;6^@0p)nrGAJOX#<(lySAwKCW!I`h_-XL9wj!ctc0I6gFF-Hqq1dx|S)WtgR-v z3O(z26!&rqSS}woE7WLL)85Kh; zRjr8d#7g(p2PsGn=(1vlg9^*!@e2<4{Jg z%zqD#aSWujZRhml3$P7v@;x0hCHvAPk;-UNW>a?KAEGUQ-W|w-d}+-P@b(oG1IZv- z1kQx)&(@gCiB2dVE0MT!l;;h?l`c`MnDm@?@_pIAlCjd4s9eO`^AC}!C@64RTsz2K z7GS}0SXj$aG^w&J5qbLb={GEHa-gLx1MdApF9mC`Z`%6OpjW*=zzRtYCqc^i&#uA9 zskXE+Nx`tOiu_^%MkAbJ$BTZ;>3yIadNJc*C6{+mc#UDpPwVNZl=mGulH8_zJtP2} zQ%h>0z7~>AaJ^&L)wHnX>+(zNA$*uy%C9FY{{4n+87V9##e+{t^15`v75+WV)96ObffE}_HWwjz6CMLGb zN*q+16(PU4i|q-EPTM@diHzyNK*@^=7t@-!SdM~58UCr;!q}?`_?kof_XZ3ZD|}@p z92ProP-332&(DBou>0FRVBji4TiexBOic;mL4=oD+Zz8dR9oF+7T?;*vf@=2;io;LkgJKVVV^)?LA?PO-%>;u(mH zW(m}U(*R%>YLN_yEwXyWfhhZw{i}!?<7dSQDE*ik$e^9Ur(@Ap(v}Y4)pa%4r+D*Q z3E&^)J11a~aKdN^Us#8CqalPRz~%dQ4vT$lWzn1*rvPtrt!L+$-);MbG#I+#!stYQ*N8&|O_wKX)*<#NEHSAe8XlpxqsIP*V(HPYmmKvrO5b8i z(>|y_pDY|(+$bEY?iXFYZ&q4abKK(4>aI!SMsY!M{zZs>fOA;i8_wy~{-t=wQrw|# zE>00MtviO>zN7qomDEoaVptM@q%dpY!d}0;x}9;-vb9rMbd89L@>eCc;-|>^?x{bY zFnGyRTu9Dy*IpbPMjCu=aso>pJ{_Z!yA}K7V<> zXDqjG)QJC0a`TmAaeNS1b#=gyG+f4V9 za{#XBhv0i+-D-zt=n+jID>``g)Dc#wA~fk#9%KjFNNvs|B+~%4ZQW=(I4@duy)Vs4 zMC^utYu#4WX8sc4Oe8!&-m8SRfvoG|I?>6{L3KU^zE%wWGg-`^)O`dGa_mj%)E?nU z;#shJ!vMEXf2IeS_otPxe@0|G)5!!3KL4k${@rJ(kY@(gq$=B8z0!-kl8#~Q9CLnv zm2|lm{Smb{(QshlaQ8(Znn)5?>6eM^SgNnvWqUW9ci)r}97gKH!RBWAZT4%3eE28S z_@i#f(46WzILI3y#o>7*bem^$JLzo$AT0|y@bQqwWp9jtOCY+Hu~=KqZs?d&^l7N= z**?Q_dDL;A`R_E{OLOtJoj*4F&yQuN{@-o4q@J6NlcU3b9L9f{^M9-8>HbNi{g=lW zInxEzPlo`0qjiJRe@Rjm5X$6)*w-iY02qC+4z@vbkbkm~G3Ot!@Adhm+2A8p;3}_= z))}m>j6a&8n{~~+Sv|6Z3S#nrI^udakgK=ogT$EHJptYOu(;%eUAsDWYljQDIByev| zjh)a$JZNZ9VHy9>#ivA$+l5yLAO(L|JwohR|u|A zdBb|21;LAU)(8KgdbC|NIkE{Y*%Ae`UQE`6$2aaIRZHAZY~7-TthQ(AGxIYZM?OAF zL!re^XD}i-gwX_U%k|C z2CPoSV4Zc#-EG{xTYWBm(ygsbtUv=n41IaGM_nP$oyiU=JysAfQtNY-xC;$|y61g~ zZ&}n@?}S4kv_rEVin%p7OrsowuNQ9ItpIQglgy41lD^CMJ>i#x9pPnr#buOWBg4=9 zBj?_Z#?GsjRwEwuAKlk`x7NfK3qTmAf9&v0JnkPSvDeHLn-dQ0nl*KLbTz8cXjMHv z!L|dWq!gvX3H@b7GG|6%`BMRm}+E zw(t_+$$}BA%<9Mr%dBe$dpH3FP?FvNgZkY!JB}YeKDi?FX7ovk9S3a1mNS)hrEgfI z&6+sUGySCX{CDaDV!yb$go~549z(a2TT41rXLNRq#Khr7V&kawNb%LVbwoO~6x`T# zUuJwkcSO6mG3eO=k#3e1bj)0>yhAdu8j=>unjasj>b3fY ztnyzB62S_HpauSj7A!{1TmCiHW3e6RkGWTigWJ=;{ej3UB9s?1lb?JF$Dc>F9psdN zl&K#_?jG5aJup4^|H4h`rUX4ca^#wfNSf2Qub;1%nzX^9(hK4K zW6~)<{Xf`y%c#hfCS4e4+}+(B8mDR8-QC?C8uy0A-QC^YoyOgzao5HjZqYMy=biJO zb!X=LIrG(8P`fH?=gy2B`9x&IlRHPn)|hLJ>WL366gRlHj9uY%vOz%(F-mIK$5M>j z#Qcq#DNW8j@kwp!gB4xabi=iHWv)r@y?8SyAeO07+&-;mlrGrnzO}|;Y;<~Z2WVK& zs`A?{>Qeu@of1)iBpaRK7>XMke<`-hbNk|8D*V+cOdnI&EphA$QHHSmGS8kWq+-gu z@@ZuvDpT2om5xl+vw%TD$Hh38;=z;{tf*1p2147qX<)xMa@GzJP80$zz}CyMgSA9A z^&!Xj>J$2+CQaN*r(R12vfRiwYR=T;rRN#fC%nI#i2Q#5vwx3f{m%ije-s1le+kps z{|7=kQI}YtIw10)4*#D+`X4Q5PuhsgV?+IUL34|Tcn^yi-ZO2Y66d2_u$&^#{0s!o zfl}{Va%MzPL49V~5LG9uu$mlgWLgv_J(sqL`FwEw$c6j6?S4}E$chpJtn3GUGql!^ z1+3AqC-XyZ&vdtv6G-vn^!V~ZK^Muu=eMVWtaa8;^7~CjqYE?g;To_*Q`+Lou%5a` zG#W_u1K1j69+E$4i05_Rb`R1$RZdH9`Uv^Fg+o^wt{*k`>-K7g4!;|pgf7$u+G!w{ zY-5z=BH>lI!BlK1QHG~@VaBepNmS4piE9tS?%>P=uUQs+O-pJ~jFWKwdW(!}+Y zvbXhZRn_xhol|Wi^NB~@{40!M-TY19w0DzlS5vXdJotS@dUf2a4d_zI@iy8x-6#)K zk%>f9;>O5ogc<9JWnFDsrnrsSH#3h7mM2%8HCV)MQsJc`P$&sPXasg}UzU*BCF;mn zOv7m06PvAZKAtGG&iPr4ak_EShn>ceqzGOMd?5&qj^`p5o+l|K z#HNH>kTc8RMU-&@eP*O!XkNQ+`<%;FBl3QdD4MZ%eVeQO>-4InGVC_jS(`4m;>$0u zJ@)Sv&z~iFDfvaun~s1r?gddv5mTo&w%=Unsvx^q5#;3$N)wa!^|5IeTuQ%Tck74< zK{}T-jY7`CdQkULLHEv1^IPRNnT^*S7K;WzQs^UQjKRk>kK)4ATP@Y~0S%fXW=#Y! zo-bpYEnnhu^&o;DN;QIOupKWW^~)pXo7S>U)y8GQ`B4LJ-mq{QS!Np-d7Pp`5CQ!u zv7e0GVUg}Z*c}Ge{KTO2TS(RUZ88<2EtQ=p_6q{p@L^~|3p7Qm(Aj8#C7ZSIh_UMX zidvT@(0FMTI@R^y>7{~Kr5TzJIfvAYJgOT}&{BXa%dN;XlKYB?wYvf$-^XkYHxjHW6@wd6uy%9T~FU)V7>5d7y-LWV1_blbs2kj^T;>SH}C04R-kGY8$2-Gyc+`phL zUBB|wvQSk?_Rg3393dVz62DP%WHE;{VIs~eWO)(!eeoUS?~Igvi|=qS0GD8p{&QSn z{=nt20jiKSA)65j__TMTjRFtm6CocTBW&_GI*ZbT*0d985r}wO0M5bj>+K_Qk6MT zoCjnx;8Z78>vVnGW?|!p7?uikEI`?)S@nu01Wrsg-KaHx$CkvkZlQUmE!G-g>+EGD zN!}q@CH0bY61b7kP#4W64IVCDyTx4EXV6LN94!Rz`kQa6hULn4H0)zqM(je5shvHG zxzGAGg_?FbCUOlykDxi}AHaG}nh=Yk(c~EZWX_8WCrr94v#=pOC{(WSjvFs;NmMy< zaHf-w!6C`0-Ek)P^7&b#!m7_B+k26HS&uuZz)gT<>S@j|-+HXd^yxt(ul#K6TgUSD zv9DEQGH&$U6L5P6e}pRC+TwZidPbT6Oyuu{vP@N04Cg!koKkR~@aGB-&5#AtYb$i^ z!-miE!wm%1h7gX7$mW>Q&CW4GR5|hM2+-u-hqEkJ9R;XDFNr3T2Mgbrr%v=UY{Q@E zzV@i97>8kx+%&RM$7dczY&LcvHw%tDzQ`h~WC1~liko`T=&g8!Mm*NIBgca>Uq`B* zKdW;E@!*6IKl?gGxayeQL{g2D)lU0`K#7F&TaRndw2xG!7zOEVV2;3)+#|24$$B7E z%tjq>EbTA#Uujf_GgttlMGZOTuL>>s`AJ1)#@;4Jb7XHBs#EZT1|kup-P#slCrIfm zg(xa<(h?V%HzX!iD`)^R6_B}>>7RdQA3|JC!t=mPh%TV3ssSbU7FdM{~V)^~Li*|)Q6v@7RsQ_!q#p7_* zMOo!{>KezrPI?RVvnqO#50I;jWQUC-?HQ~@U>LR}M7Y=v!Ut_EDkIpK9}hBScXqZ_ zeNTB!y?ee*Hutveg3mPUnrJW-?9ww>zgV@V?P{@Kl_&V6eHI3{ha0*0GpuVTzJHhz zQY&_5uHeXS8$KZC0$dR{Vxp*oK$_zHX7CnQYhHM`XyBK^RarqJlut5nz63XNy7H>f zQC&9)74AFK`T!mxKBw&KE*14@9aDv6+~sTgQr@xRHPa%%#nzQ1*Zms;+Pn6dl&iR#)>YMiZINrW%qgJlPS|3M7^1{# z6_hHX$1AGw;E51$!3K|y3>to<^u>bwk_^u@e5U;r)!r3o_%zSmPRpcT=3XgV+j>fW zqoe4vTU!F-Q8efcU^SW$L1YD>*+ zJt;M!Fz~#1@8@_X_&XRwQ;*7;1k9Mn{tNCE%Lj~|s%qP;av*x^>Rs?1YRB9%U^T!< zHloAii_5h^=#f${BqFzvo-Jw9PFbGe7g11&i+@9I3z==T^+KWwec`zFVkSNqUzLvN z!TzihV`8*~@##}GIfd~2T0}TP*jPAhH<%T4uy}yTPjvg8vxobQ^(msk0kXB&1R&(R zEkO>ZM1iRs<4eyFiy;O`;fNO%WEDXo-Q^5rMpuR!ODoO*e!cl>i*OrmE@N)0QEE}e z*eZ*;VD#@quz~ei3^XGiEbh?Q#=(-99Zxml29gC$+nHJ$Z^l5}Q5Ac^*5L&wkGL4w zehs*4YbepNB4U`YA+unDVz;Tc1mP1$aI-X(3+%8;fdYpr*edrqX@$CJ4d-gIi5s`t z9Bc9ADiuZ`Rw6;DR%Mxq*n&8~SKk;nqV`8mNF1;CPKT_@HoD!bORh>;&!_N;TukMm z!hJigOPi7a`w02$W)sa2?0KK8+lZXhSsc|CY4bZ@uSB)hu@{m^S|KY~&679P&Ii~9 zw1?h!G6j(FN-fk_(BHiXP#g zx(&|s@CJoRNllI~C)O~~;ifXV(WLI=f!-zu!j8FbCT>SGZtoqg_$FQm(qBO2vT{n{ z6R<==gbwj(1IGE3L;i5U=7D3ylM|WC8%LObm#fkTQIhUvI96WxH}-}9?JY9(PQCw2 z%=ZcoS(oYbuU(_Kd-+sAB9`A}`Q}o&qng!JPM!Gfjl09klhrcA4>Nq#5=4Q0bckIi z$^7>>twOTN+?9#H2TV1bZK{&LjC{|^g7VISQly;9W0~52JH*n2+}SiTv*sR6(Hwz^ zz1k*7LC}wHt~~13v2wR(Vn$)dO13e9I3f*$;AeYo>as{1 zwu~_VP19BguCe5-Y475^)4?zR@j!pkAd?;Q z<%uI@1p%8QkDY&FM8>mB5R1sqaP9N$T)5-#ij9fiRm4-Z|5dj4GnedY$LG!X>DTi? zZFS=Ju=Ht394*&IVY89ibRL}*f+9nT8akrS<90oiE%mBGOBYq%Z%5kmD8kAepY%raswLZd#|F z8A~LIL6%b+WT&84vWs!uj!ai^EKtUQ1Z}8t5lX6>Fr&g#b98+Z@{9Y$M$?2zw{*3; zQ{ky7j<}AJem(-2hGxvMr`V0Sl!`(q>hKCfN4nI*27QhGq9wr;dy2OH*FHbLvN9Lu1ZUSrhfuts#{^X@e#o-d56*6emoZ1WoP zyP(S#{9I^3#AFFO6;_+q_iXPJQ}YW9k<~crd}2M6-TV4DXP3I0TGN4bbiFV z!Nhq6n1bK$tUx>&ZU~_*jlCE`^fAxNwDQU_J%j}`f(FA06O9}TS3wXmZ zwbXOFznDTg^T-@)@6Bfq&B`Vkp$|KldvyyS+hC0=DNu#3tET>f3!FN|cYImJ9~*I? zZ%S9h`mA{w-aq(?QvR9;XRo^%Kc*f-rT*gBERi;-f@A(EYrm%7bu3B`R=@hB77>^M zI(NmeTl91IiS!rOv0j!8W@S};3zL(t%sS|NbafJAwo(rVEQ$qu2ylA#Wycxn2{9>m z!4Ha9OrZv=?q;`IirCp*z#^0(U%N==Ve-66MOf$b1i^qqy%-LIZg-|u2Q5?r0&&VO zau_drBh?`Lyvj{jm+WA0NFX~7oj#7u0^kUtzcO&*m8rwAt?Lo-5rW`-+Co&R!?vmo zl#V|5WcLMdOuM4j$0&|G#fl8C$6AJOOBP6F=8nRu|BF;dchl)S^@;o&^cqzGOk;s% zcd=6Fxe|jSwM>=H3X7xGSn7(g#a2lJja*J7&{u^vrJzTCE1|bYo9lp#x~{O#o0r<; zq{b|D(6yw(5VJmaG!AS{O zc#UTo2s5;=XgGbhj^qzj0kbgUp{5eB=)jMQzD=B67hfZTRd!($+PTI8wXajl* zs+&mps_ndS zU{kC=KhuR%eg9hu1w^gMEG593YK8pI@ssTXKg(4#ZB`i(Ul^tUb3#=k?o%<5)85#M zTt1!R*-zETcrZC)iTQYK!hTw1+%mE=WMt&#vFQ9;1Tn-eNnFg56*B8*8ljCsr9pKe z82E5sh13R4g0yAZWmOBZ+{MhtETlL|P;6!6?`@v$#zQb^G_tn2H_SjkCu8q~G%aX^ z2k%m)~bm9d!WJw@$}vF}B`EvnFV&!FEt(S0&bqz&2Ie8J9=1 zGIL32i3^N{TW?6#peD_HNw;Mnv#>zPcDeEQ&SS5uva+P;LAkeNAK3XN(P#onKV`G+ zXXJ4WJBAI&JUEBV_gOS_y7n|`Fz8gnuL1Yu3Br}qw;b3JLlo^0wTtduDR0NCvO|A{ zcZGd{U+)n9u^evjn*6X{xmq9h!^pk%hW_Ao9rp@Lq%C+*36OSBG|>I-w!}iZD_K)w z+4~|p8u^Kv&P?$a;W$zWPQ*m@j1aL04=Rt)=kI2cVG#mqULj;}52To-I3)z~lvY3K zJGw9LUC<2&w{b1~>8UD*QfILpLg4w$Dz@-_0=fK-Mgl(Z-&Bv@5`tE3GGU@_MAjX0 zjPEQ()skMv(ds_9r6asF!-@`+kAdeoT>jFo!p=S}Br|sGh}kvB>qv4ED4ho*Oa6sX znfa4HzQ;MD7s8n^(C6?vtCUnGA=Xh5Bg#1hA!k%#R3@1Lhjt6JDB_yim+gnK(IKY# zOdDd zN@S2XXLwxx(5yOA9~@{FBbI9H()xYQH*IFA%-iwC5>h~hAc}hZ4vquyB~h#gXEmUY ze}lCt*`H6aZ&7d8C}?iYERJ=A0nQV%A$CQUjdg^KJ@xfDzx}Ms^ZXrk6gZS~4BIdlFL<16_7l#>e)=GFHfnxdNy?q3k=r~Sq^ESAlGR-u z(&2Qo9Lp+y+<$`G@T_y~TWx}Pn1r7%UfI^^gQ896ROwe=;Gn=!!4Tp&gq%vGei(~Tfl>;l+r0*AE>s!()fp7bUF^ofQq)y()X2szxTUd_O-t5a-7 zq8CmoY`Dq&MK@upH1=~LU)Vt_Xwg)GQZCy5Dmn~DkA{cFOz+yTPN!s~%4!2l!lNkoC+uRM(vb3;^0 z_vDij&2BIWmC6Xnr8FgrAeB!f4h-2GxlJfCx(_gXNXRW60YrQdr z>sT@;1Rg#SfeYtpAu^ZqVtgCRJDkt9E!AQimT*q2D{TAhInjyIqi#$OH+S;$1%8om z$_^6&<@?jOPCn~ztn(H}s@S~c4G;d0w-fbHxZq5a;URq$3(muFVLxqbhlh3lcDVHa za(=o5geu*T|MTI(@nMzv)ieQFanx6aDQ?IoRnlW=!&v?oYG|X_> zMiVvJx?waW;U*UOwX&jL@n*0!76IWR><2QhrxtokO?LZ)nwxbl%^-_eN;M0OZsqu- ztRW3sghn(I87IwALrZSMRcgYL z54Iz&{Ge-B7+ib1Xw6}3-|#5vVPX?ZxH4U^CJSe2a|9E|U^GI5zR#vZ{_e^&R<2ks zrME&WVx20UZle!T2Ai%u(9KFL%+d1%Mu>L%6hjgBl~qEaI{=slj1jXcB^m3Jf;f3# zNg0JqN&U!xYXLzaoIU}y>kL&o5bZj%cguAB7bG54i zHVN>{q2Fo07lO%6+$v10p91qO?8mc4mN*<)P&k}A7?p@bAa|BF#zP1=a!#_zRoH>P z6P${!F{*OcoYpy!iSqWL5tA7+8BP^X!kOxXg{bW*Er;PYz!x|@!T6qR=r>}Xcvb?> ztYfPU6R<}1Wv34%h)SMZ=cfp+!Cuim8bF#%v7r2} zH8>zMNrl*G(}jdL-U{`@-rz%uIUB5?6_D|wvLN=q9{lZtydSvm^&&82m>$^~D&+ge@$ zs@Vx8)O+T50G3xh?5-)BVML8?;Qu%8S z*A&IPM)cH^D)wGMBFhp1+#ivvF$iv7`~_<)AAyitA{QeN2Qn?X^Fo)9TjAjiC=SIi zn>8KUDbi9Idpj?_Ul6U$E+kG}*Bo$MdH}YmX(qct5+OQP+tu#nPFVO}o!0`{@xxAb zM6;JLcn8zD;tGE2z3|?4Hk;3KDjKpikH6HAVLn?4mlDz9=1P}LY}{ReaxOw*50%H% z2-QajQ%CM8)3jx2Ffrl{jjlm z%tO>{>XSFhK}6UF7Jx+9ab8_uxhyYj=5i`)Jq!Sz%4*Z7PyBp< z^?l}|FRYjFT+?eeiq0pP$PkBhL&z)rpI~z7B5eQp@&xicp3Izz)Q{#ylhWi{=mzi3z!=` zNI(>%VK&)lvw<>N-2clpP`J=wuNjRn*BvU@e#6RO3dIo-nYxO~0Ft|*{M8mkW*{$> z_fxwhO1*J=`1Pq)pJ`!`8v4F``mZcYU0V$I>>PzD+FbEWz|LWl5@*)~Rb8IcBF4zt zGhUrYkeKS6xd$s6!3IW{#?2;Fbdy*9#=keH52(=WMlT|QXCd^8LiAG#P4ihM>RWj) zW_fihwxh?d?BZ&rln^J7sr6p=zR-^ZsodnCr6>5xceDv2KZCT>ZU6pOL}dK4={Jxi z#->}d6mk4CD*jr3&XQdazCn$MqJ~&(EmfH2M6#9f>V|6#txfZ>dqc+-Wh7~-h?&T% zsCA)DU4;c^RO<%2^p}+iSL!N6qz+T%Ng)Y!?3KrRm zAl5&Y3bE2bJ_?%nAW%-XGR4n9$O^#0MfoT}VRjUKV--G$!#MpSgtXhBb@7dLDAgO# z&lI1}mH!?p!SqH^-~ewKlYTVFWQ; z1>I}T98Esim{dKRJwO5(-m0M$fn0p4AyUW@NcIV`c-b2B1)S7CuPwB~zXWjb18Cx) ztWXPkgtGKZbGVqdPj&(ij$szRr5}-E%33G;O+j$wMKe1mUV6->`|BMHbBbaA_{wmd zzyWhz&m;Fa^u_OzIBGrxn(ag}nofEb1UgG(sa`ye5>wMrIWM6^SK@su^*2L(^Li;0 zV0$!~`8?u@^Al=w>HLdvqL~4ozeUEuQYqOYKzt(^z^??XE(H2miu4bZ%70xV@{a^l z0KqbH_5Ttq6P#-nm<%932$pPZVM|9`B-1p+@k6oohDDUo8y372y0JuL%O0Z(;%gW& zxd4LY0j%it@olAv8DJfGdkV+fKsSFrm8*DKTgbIdLK_JLRYV#x^Llhh9mR+n_v>wv zszJG$Tj0HC9+Do5ML2~_!5@MpwSu8@519GpYU(JqrE7wV4%MaTKN2jx-A24Sb!Pp? z+-m-vU|Cu$$DPUnCM}i=^z+>NFm@d#7(IBpEF+*iW34yzLW=0dU|3>q6B}JciP_(m z@CE{N7KFK;n9F)RUJVfIC|`afA5d76XnS_|ph2R8)A+IN`s>HUSSOn%*CoDn*GZ9U zyXNRhcsKNlG%lj8ZDywWCknY~di25h+*Y@rjm7Ab0G1{2dWYZ#%To3a%TnQAS(caE z0sqLdWDYHjlAK{LFVOavg9ct{P52Mw?b~DWviF6R| zsh>4G`BF&~6e8C33PObLJexs$4Irlz(Zd8%uh1o5?vOC4OXlt=Q5SaBxvLH?R%wH{ z!w`_6e=%uAht?sdLr?USZw>QnDKrCHmvY&?oi{&v9I~;3%0aTpG=1kG)5R=pJdOUg z!4O2$YmtK=JU#RBfQmCXT0|?}vvtf=OLht>H2ORmToyVq3*7e(KK3Ybd~DQz2rAU- zjsE*x@~Bw$h;w0=pIj5fn=MXqNb42~n#bgX=ZB{5qyqDT+^ICGk2ZPF9wi)rdzsrG z5$m*7sQ<;TZ!HQC=ZD?)ymWcLpd?a~wv3%^iy&pkxt4ULhEiyl!0Z{{QaCg}1cQ1B z19>loZ~pDi!&^L3tx9&VwpRJvy2SFNuDzhq*!$AbgDi8{@E8Mf4QKst>`T%^cf~HB z{?XsK?R*EIQVb&Pk|G=AdWauJd5-;Ax?qKv1kjg@L=FRky-DCzWkCpzmV&n{#Z*Z`qbmO-u2M z{D|NI`lo{#a+{jG8nzKPk|@04Qk^~9CYH1LjXu?5^iYSsZM1&~n33*JXnz-9VM2|; zSp^Jp7uf&8F#kikt5^LMxx#_jp=-X;GvYu39s>20^S;*O-%9`@j zq5zNvvVS6dJtbxQG#0-sV+$bt^YCnNz8PHapEc&134W(l=dN$3Sro{hP?ul07d&|- zt}}!Y4?F=;REf)JAenLJdb&+w_arp8k8rW7K-8Oa@cpqMLB(T-n^GkfxQDirALJrQ zPF=n!zs42VodwjE8+p+hqqtSuxbnCi`)Jid>&RsM*h<`B!3Ub72JF*9-EunhKxU?e z2*qe}XDDGYNTi7De; z{QRaWZ_pQXB(3L#F}2EOvVXs9ImN6zexNw$iF{*iT*S3-$d zOzLE6wDn%O#DJ{V{-5y3FM49`!J1*v6tWcz+61cDNLj03GxV%A+* z>@^lFImT~DR6%0$)Hm>w!k|p*f`BcxO>$r{0tAk?;bC3|0l*-UgU>MpcNAH^_1+%R zQxc+)$rb8YYfulQ@Mif?qRdr`vao?q-qpP@bQUPvhCL|1`8+-8m|%&(=Ao{EqZXf1`FUQDNOfQP#sN2OP+% zY;Qk&oRdir|M3d{zHppJ^66Wt9|5LX%v2R?0y)wEW&#{su1!sxobhK8#;r1fds4!%ld-DZ+O&q?-eh?xQ|5IE-c;Tos;I8>On8_y(={_m zYUi~v;mbAll6I?Rm-pL+h?dvu%O2XdGV|6pLeG<`0t*i_Itei{>A0b+{*nxLeUl%v z`LFmUo%1F{1^veAX*6bx1T(wwcR>?@6Ko@wCKgs%_~oo-rb$r0k%Aw zkZ|<7Te}-+>v;nGt;Ip|%X{Vtbgp$FdVXTk`Dsew68?HMVwo&~vzy!`xDTxB7wHeg z4EOsV`ZsWY2MzyyQu4n)>g0b8HQPK_mk14z0U87Zp#K9k``?zz{==hQ{$auFpVc<~ z^U@Z5jEG>HUO6s|bx~woA#w3T%ijgE%VP2Q)OH9gBt;Y?yq-f71z`2fh_WNPV{i9& ztm}C_;k~Z>7Mw+3h7{nTjz@}R*g@7OCd24RCnPJ3Y+0a_r+RWcJ(s!+7`&QyS8SZb z#inKWPF^4Slgl)4^CicFzogIogrE5#yIL);a#72l=%~Z!kpa9%%gjjye3#$*a?P@> zTPv;Yf`%{cj3xJsLy~;L70BSggh60XU_AvRHhc2AEW64Q4)jg4F`P>IUU|pjw8kZk zalCX%zq_Wy=#sNF1P-=0H8({YPLN01XQr2@Ro1s!+UZ%v%R#9%AHoRaZ_w z49K+9Lk9&10K#IM7zv#|HgNivRTOZ8`F1i4ra4j>X7Iaga>7_a#bdxVb~G-QTRz0T?t^z;82Mo9_+908 z_z^^`B_K#l zqgEooJOfJ;_th%1f_<{=Kex*+i}fl6=X66MUZYB9GfdNyx{fg}_34w=PHe*a7u;#> zUc<>RFl_Sll^m)Q6v&$+je=IgNHKx-dmJ?cYV9rW6yej>_hp~KDpTp%iI5X zdlUQ%c#}EppvfUn#B!QBt_Wfh>y!A9J_s}3CwEF>I4?tZD3(odvHMMJa%O)hSLbiK zB%X*hDc#_kx0DOM&+)Oq#y>x6Pd-t5=qG;PpI9yrZ-d*4DY12-13Y$|`rtELiW5ox zbi1(8kbW>SDc^!f5hT{!xqkVhGr@DX!xyAYeDF`f5%iq>hSqEq@s(R@KaFjODR(L! zP2TA-+M!KBev15LBsbp0M~w;D?MZ(GJUlfz1|BI#50h|0 z`V0HAR-T9UKBD8&{qWa?7^%$?s1^t+IuhaN;2}0HuCzU{I_Va z^TxP(wXAC{pJ#f#?p8WUW{ceXjp&$7G?0UjPwZvL5vg|O8}jjXcE@1fcw88+sr!Bo z)MeH*Z7p2aORD=ac&FZN(zXQdj<{wOY?vOs8z!zdW=`_5SIyLXNyUJ$GbZQRVd3fd zjvLI$@1a%TNDqM(o-d~ye|!ZUEmGTjsMHq-Tgs%TO+}UayQlt_UBUt_ zov-mrW}4QNb*@?9E%iDKHW>=Qp$YB#w0jQENGgjn7)Lc}F<`0c4m-462+r!jMON{8)qlK#c3|sb7 zBUyI(>P@A0<>kxX*u zy89xxra7It%`#uD_G~)u`Y&=_gNBi3?6R-URR^s$F(-{@$hKWX@x>KiW{Zu(;Q;(~ zyaQK4O6R8Z)YZG->vgyVQ|m^4)FMgc)rSYZ!TY_j``sL85zt9p$n>E!f6aEr31+`o z^yNJvU#vdv6aMj|RhvG0nubW`@+Ijul_QQTl---=90%QtK z8o|-3_8L244!&fZALf8XqAs1YYqznUDrq<`RxDL$h;+?X)Srpu*)~0Af?ma;OUqCX4*fIta>fFWJUrv@Fxp#^|I5Pg9_I|+Rqrw~5A=j$c_ zPVXaNkP!S+GQm#(C~F&o4ELd@}Pg( zp#b>c1o3f+|67SY$?qNrg0bY!JG*hRZ!3$PY>xY5cXxNqNZB2T9Em^^{8heu8X&c? zH@m}e2zX!W^?LlN6brw;J?x;Nq5bv;0yO*y2(*<%PEGCMfcM>Pp1M7bB>a z6{4=u1cc)MP_h%rRVfLjlP6x}2WZfLU1Ym&#rghZqDYNPk-(MeAA+%d5b!5(QeMyVmV;f|g{d4t)5*+n%emYo zv?*9Xt=%Mk>$r|X!nc-H9d`iP-Cs}gWHYNC|#~F#sdG$EGdUG^epwOavd{mT`SS8>9Nt`rD;Q6~pQI>l?uu)hz zpuLKPT0#04G%S%#t=qr>voLJ0k5el;EdjewCL=ZBY@}LVZyE#?MvVzq-S0Hg5!C;4 zoG$m%yNz& zue3v@eRsmmML@JG>AqmJ^sp-FectFCj^ino7ah^-0=Adyn!Pkf9=9<%lIB)c@i@?X zH9!6R{dH7(BN4@rmpVL}%I|D3hr0bfwW+%dR%7<^XXAhX#16nHhFH*kwh3MzH4l$8 z{o9rnH=lP8lzn|nt}S3r_FI#Ytd7Cvaq2n z-uNh3vu@C>FTlgL_@f*#!7HIum6eTkzk5Kp>&G}J?d=xT5Im25_2}Ey`68$Snivi2 zQh|TK2&;us+1R+y33-p|E1ZT>?K=?I6Y97$>EA9h{g*GBqrq4{&@LDmIGsddS%BsP zNihH@cvmN5l zon;kp1qi39eOwPI%JKhE_0-y$)jw68Q(ivym)0#!3OGAAcp;rr0fn(q0F($O0PUbU z3(SUcZXbwN?~;e(h;CgSOab`-5|kw2Pd|_V{SX;0Ecvi1AVj|Zq8QXPG|Vf!k}lw3 zY&9RXkl+Jq32bDWtrBY3FhKxxF%T3mTrjzFFnTxzMAr;`LqkLHNBgUR)au0{0IHG< zX9tf_>kIyYP4dLPjzUzLW`O|3+5`a@D{piSP0h1^QmnCb&e?zNkDFV|KlLZj-yh_e zcuf%i05aVm=osKM6I%l4Q-4ra0s#M%#xNBh{m}wg$)(BOSTf0fR$~@6wiS%6Opt0b zM-cStW=1u8_-A#sdb60nT+NOM#_YqQB9eh{h{o3js(f^b9$!*@7EkJ2QUCu#%C&uRGlAj9=)n&m})pFqIl z$$?Z>TU*=O`jqnn-~Ff{>FM@_-Da&>miKio3U=hr>3l|z4|b6hS)Nx#yst;q`hA_R zXSTd|ONMPO7dB>QVk|cwnD=3e0j@U;ee-4rMdnhQ3xkWV>jlWbz`#1pU=gWWeGB-9 zMggp+Xz3aP?+YUZg=x1h@YPnOcH7@!2DmnDfQMn|2i_ytK#UCtnrB+KL2aK!d zhk_7Rf{iGQTz= z(P|g~lzt-zRpfDd}U=t2NIjtyPftfWlbakJHcJY@>i>~b|Of}QDK^b zZaZz&Ogu{oe2{nDcD$ zPEzPQp8#n)R1NPjBCxaWj4hEOXHQWAh~4SY_a9BUNA}yam z+}hsjVXM(h&j5o zDFEoSlF2+dex&DXv^pKt8jtRFuD)hydU$x`!a01mU(|M~0Nk_)_Vr{QuP1fW3|9MX zA&gWEC^%(h<(MgRHNf5O?g2sXBaz3M_~A&a)2FLRFlAE2<%f46?zdhBm zRiHflwF+*!a)jn#}1~_!n>mNbTy%;V+m4Ku9n!u)p^1|ACNzS;5jjHQ3C`s{Wr5 z(rlXeFIW2qg#3?&;~&j&pLZjf)IxR=M8^=zL&a=t*AmN5;%wtYj6&Wn&)Ek61R!gzPh zyZHkM)C!=VRxy*E&3#{Faj~AUV!c4kkw9|=K5R$HAKRg&1cWZFa!9vQ2@M!nn19?I zo5!m?fP{;IXm8M~USs)p6O{P&>>_>nOytHx(P)^KxWZh(!pK_y&Qugc*j5M-;H-4J zkGoAQ3%J{!(B>abJa8YtP3-H2g80y|s}Glvik6*K^NG^*(>~w+H-& zbt|{?M$8N}uY*`pOzn_PqgoHDq^qC|{a>1MJXi9!%~62+m@6y+%&kG31@?!A5fTFI zFdrdvvB~xRIM!*#K%2jCoR)i7<6ywRZz11*aQecay8+(l3ImpqjyRYS`~f7S5%~Pv z!h%V)3;G2#y0pU)-3mSedLIh#gg$07xt5MjrWX=DH4wlXw(K%ObR$EP<4h+Zj8^*e zaalId7tW8_8$b^jU4L%^;HzA{j$B+Ee=}Y-cJ<+M$M92h(8uTs#sl`5us@v2fnjL> z?FWHBG(3Fde+#ieFX;!S{5M^Mq51cPk zwRBl)aV&bdO)CJ_9sYrvq?J}1fy?PIU#dJDdwY9(H8_X22e}_3iZAX>oFL}+ z2?Cy!H-+7H-5(4l_TAdtJO%_X&l{FKZ@cK!wC!Mh)NN5B0ozsZxr#F$O&m}DOu~8y z2r0;u@qMj*dcE-3;G7}ta)7VXDY4}85q}#~IgW{cm-tL&N4OW~CP63mXe(*zR~RpQ zu6Vo<|Bio5YbPo(Wh^ny@#WB{K&8MFIldGk0s55j*Yiz$S{enOCX;}zY^0(|5NYVH zCz)c3lnqk}dExRXL)S}~{_@mR7*^+?wm;|oArttn!r(8L4X-{p_Wb`t+gnFfwRY{p zba#t%cL~yhbayujs31s}fPgeecc*j;3J8LTBBgYR3J8KUNDAmTx1MvJ^M1eQ9pm@? z@twgL4))%xd#(Fk_ng;t&1jW$iC-tYk71 zH!E?b^wHcX9#6*H(B0v+x}Wym-A0_kq(a|i*Shdi;PJA2Kj)n4qt(Q@S-W;-pD15TZ3eYL zlb}NM?u_e4ujxFXtzX_ex-y@<7ExVS7hiWGkoHhfJ_B~EyA8Gni{tvyDouUI{(vH- zB}<0~@0t3oM`su4tq7lCKgE~=id-1e?$ zZntt9qej5Ncwy@!|L}0&uz`y1rC6lxLbj{3v-5wA z8V)*|fUz;|-`cCQ8D&9S*&dt>@d8am2q^&qW-nte&?IRy;v;~)O(x)X#Qt}3s{XJ0 z|B}HKP_Xch-Yr?aELP>~RQ&MepzFK)$Wb%W5mw7-2P&Z8ey&?_FJ^UN=NFWdAVPD74h3F1lERv zFMpERf4kcKb-ptuL@@V{oet_}*jM8!bpI<9jutEa*HEbSdz>V8AYj+Rg&{Zv2iwA& z+Af44R7D~`6meQwS_f*zjD}_OulV@*dwveLj<-KrrH^Mx`fT@v<31ed0iurIl_H*G zT1K6Ub2QtDtyZg=U|*>>E{8Q2NLPcFCW-!2(iOsDgM+51N$9#VaWy7S9~;j+r*ZD9 zA+~%*=adJ-VNdAF%F0+pMc<#Fq!g0O*?lL?+RE)+F;@weIs7Cgo8?NO{^BFp)w9>9%*pKRXZ4JA ziA{vt&|((};yuFf158%MX>Q7p#N#7Yx)pGb>#3e3B8_Y^2Ls6-m!MQ=El1|b&YhSH zLFYNbH z4a_1#|F402{~nl?^`C+N_lug`48=MY4pvM_`RV{L_9sF?S(U96`g_4r)Bd$!hJTM( z+D`q?hzoV|!r}`@`uJ4a1CvJ-4OIDW{|7>ee_Ni20c%GjpwV66ad)oyPEQy%1laBI z^7q*yPGBw15#1TO@;)_OsxO}Q`OQ88NTI&115q1E>f*n0;d_>rc^7XKm98t=rBajQP~dOJ;K0#GineE{fF>kv&4_pOSW>{^+sy;P6JxH(blPi7yh&e$j`CNf32?djY zMkR>J_$wukN#glEb@f=X#5SJOe#nJBAXu>d;?8g^4YpX<4r;nP+jQxQLn#LNMOO6> zcTN2}ExEH!y6a|T*%A-5&*#f0YOQ|;$yUnep7$MoPNrQ`fmiy&;yjD_lVdu?BTth?7 z>n&A6?-v@YdYO5bg+)cRl~UqjRpMSAZcHoNJ7C>IuyTgCrP-Md01o!~l~m;A`5jK^ ze6SZX<=+|QdAt(`<$ccQ#rM44xGN4rnX!EBw-M!N^ zKGORdFP`e(ogHh$Swl;3#x(PauFuP!BiPBS)=$keCU1a5SmuSxJFU5K= z$Shr=dx$~8r>m-R{dpgW-pF=iE0J*br!mc&U2C1h)V0LPj(7RgG_Ery?fH=Ll=}nD z;ESYWy1n`?Z~qLTtsm#ydvRL#eB11~_txcm9fE;B4iMV=c2`WYr1_NNz8rkj`44PD z3SQoZ+YPy-tIq%Nip-t7%=*T22P(YDJyE$-5`LeW2Ue)?Upu;w#o`QfeKJfHSj zz)59!zQke}X%D&97J`K^*hB}*thO%2a3l0_zkwsDhp?q)WMm9(8SZB$TdZ6IEX*qs z+3@?rL=pfih}&~BF-%Cty^lxkHrz%IQG96MPtrh@r|3#h(mIZoa`|XBo)tuldZbv4 z*m>n%A|X{^J*twHHHv&uuG**w5j+-SuvpPXPy#4TNh>y@3c`R3ik)eqLiTU?!U5_g zhfC;n*m-YJ5LrO={Klfn)0m z632N^z0PAZNy25CAO9_G7QfUpK%*!@u(p^S8^pyl1tYynLRT%tM*I>-(k&jhq(xfu z?oXTr&3Du$$vCxrI^>BdF^Hnx3I1DkxS{hSa5OqNfw7g3lP;bMca1_I6MiKB_l3of z|9`qLp1;!D@3W7IeR;zZ?F)^PTRHZdFZE^u5bUQH`>!nj!@QIxHHL5R)O@_eGdB?m zQ@?HiQ;&t1`u}Yjw}oF`{VT+SqDsvJdG7TZ)|)@qJMgdTt*k=%&(ziad+N+_|C%~U z%?7DINd$@(iVs=&33-&11>V_`iM~W6SUY48!EuvSND;qKu>y%itU~3#4`qh)pF;)w z`%sYXl$2e0a=!qqy?C~_1;xDlE2c|rb~QMmia9QDucxL$yk1VccsKg#6Wqx@XO};N zq;*p`1q}hhY+_^)W!@~4M~@zbhlc|Q1M$OwiLIDM%GY(FJ4Eec@dQ?iTUs*fOqlaS zJ+=?e4Jz-T;q#8}z5Gy-8a_&GwzOp8hM-!JS(;MvP3*t2u`}YvK(3s1GV*%&h3!H7 zP8@gbw&3lQNFiyXWDcD#;uU>3on*M2w~tNaaOMKo;~vM?W;tR*RlZZdEVd`0z)x=yYM`Pm5{uNGzZ3d*3jV!5laqhnKjUa$ zBIJ0fAWd!TF4tN74T+N-5!2fn0bnM_U|a_Qp^yP#prB;wilklI%4}J>spx<)oU*qtYqQE#}RI7=YVrQjyUqup%&PBb&OcC{I zqE(0bniVqOL_ZHM`L`?~Ho5X-)~+wn|CETGKXuI7T`s0~(mlu_Qo`*nE(2tct{B@* z@`g&o{Fco-xAVQe0{`{OaxWKD3_O83tlu#yIQTkBh@cNxn$ww^36s;){8k;l({+{^)K@j`tf1UKX zSKfrGf%di2WlZ{j22c^4Nulg5!)Sk5#L>Xg(z0o}-D_hxp;Gdb=eTC2kLFu10nWsB zrD2Hj`qt}B_m5qEi{v%Si;?u#%%W?)@H6Vw_wpE5ovaDzr~attDx31!c2DIi)7x1| ztoG1j@VZ`fsz$3iYwL3>+e|&_gw<0N%Bq3;Ar%69tws zhdK8~5kLDJbU!2JGBtJSrTu$Y$j__y)pJbC3?gR>zDSO+&z4@f#>{Ox694ZB`GTgFcRH?qc;;EE^ zi~^J#xyY1O&ISL5WIB`qJ5gX$?60IT+9d-uO7?Cy`I|~Xy$wH|oV)w4j6sSxEm;bX z_?j4^g%QLfSQqw+?)>lYn1~LtB4zT7{T3-DOIsl_&m0c~@Up=rRzAd#B)>B?*7e|D}S?@yO6aTRYmX-+>M9` zqCZ38CBs)vA6ibJo>>2d>(H6r>bS@P14s|v0{T-YLmLr$Lq(g>$E0vohK3O7P7n}a z9Ig5<+V(pQIy(I)4W_UDYo{--g#4Ae*r*Uplm`lvcGpSnpddul61YOdp}7BjsO9jf zZ7;~vVTe5ae+ok`6 zGy8uZ%JkA-hk_(rW%-8z|6aCrM~MEg)QITsCSOE>2Ze4gUw7j=V?E@wR}jlpMG#@+ zhCGPGnpJl=5EmN^bB$P!?18wWcXN3!i#2%x@NjX%T!D!48@>$xE1d&F`cFC!|5xLFK)TIGhu8^fx4y8=iTNo8XqTAcT(T9(7yRimczw*;ZHLN13@GGI`{AHJ)WQ#OEeZ%$JrE(oh zthh~@FAe$gfWE)`H<~OE5He z6izVxbGQ5>G5jNOC^zqin;`o_53OCYQ8$*<%w66S;xGVymo-Z)0Of_tiz` zeolYlJ+n&pmo)OLZ!~ebGqYje=weu5ZE0B%5dZf*F=TV9Q_0+EDL1f_tlz)?E9E-- zrU(T$@1vjork4T>j-Pl`V9Y#94e``IFFDu1^t8uM*KPF441uGNzT^0=aUB^p>7 z@_oD)D-8%EM~DgF=^I(z)Ckuc11c06Fm_QswD2`#H&rn;+#biKXqcg2{jI+k{s%B1 zgbMsIaSC2DVY|19^#Lo~%*ycrrz;s-qI^O)T94xNO5|P^zs3hOf;Mu_1`;q-Sb*uI zgJ;6nI`HuM=Zo!Dw=cfeOD+zI|KjW@9`WJ0l2_;K(}K9az)!`BV0c_j(gs6%b4 zepe=u2hl>^;=z-pu}%A8e}J2`w!gpsbTlCdJX=PCkC>$pu>~tkmLb!t`seBU2LyCd zCN)dR>U^qE=ZEzVIzhKJ_&x(k!al>&!IzZ=9%B7Ki{6wA-1o(h&Kul)(ybiTCk z4>O1pmhGlN&hkagT7GUoG9(z8Tbqn~`N8R+H3*D}vO+iC#358f~AVG`2S)Yr?L zZ9ehv@EEpbCi%01x9{D10G_mEhcZMub47w^^){IqPu$RJr}KDVXG!by^t9`IgyEb2@MPlw?23M>VY!Pub*dK$DKRjMVS)6cU?DM2t(d07xgHD-@;eS3jSB6n`y0g^@A}wKVxVWEeQlHLJS#Wvn`%9ejHNVo@@QPjHM`MEpu6cBgLYD-n-2j8Is z{!;^o5eI>U-~O`oxg4DjG{3KB^*#OtCPMcw(eq(U3G;8-J!n4q3Qj;>2?`6f)?HQ+ z+Pb=kvzqG>UvJs;eD`L8 zwD*2L`z@c#&+p|)QLzxBvJOBwNa4lpL4l|%i4#v_J!-5vsj2% SAGyNUUcwJ1^C z3FM~=@F`-$Z^rXYv2Uokxw9?Qqw3bhPhKg>mCl*7XCq#<5mFrv3D~QutxYT60ILT1 zT^Alnkn<#~s;gJ}U)q=}I*+n4)O($te}BKy?A3(5e+Ctd;tx!zml+tWD?rBh3JMt^ z{k3^nv19r3t$Pn7zm=wFN$vH{KDjl>rL(^uYa}iXW-Z=Los`T&8RW(>?p&^Ulm?*N?Bfa zZy1&LBhjG{P1WQ!aH*64-{6XNQ^z)8&@O7*_AfBcaL%xao99sSF#X)?qb~q${%ogf z=dQdQ`B(eHAsuuvtF{2f>gsBBKHl|Kvn%^whq9h{Z<)3AFb=L{MN`#s-0X`-R0rt; zbUt+v%}@*oM)C~%T4nRvUUqM0d@6XOH_D__v_Y9C#9m~891!EPh6Y!J*+c2ADb}fU zDkYvm7Rg)+xbv3NL%;z<@j+#pd)cw?!O!mnHfI_f??yWn#OJ6$$`XCChi|&PZvE>= zMvVunc|&c?ZRh((n@Ok|exDSz4hH#~`a3g;oz>ZzOr^h@_>Ym6%5)sfIqjF{N%{=z zjF}6CP5ImI4gQJnABr$DUS=?dA;@QGoP}JP|Dj$0&&%!uy}*q`9hUC zPwq85bM&j!+?LSnL}Pj;|Mo%WX6B*2o!25CWhBdr>1Ri0)D>0PQy0d1_oT@);4&+| zaw^V;-~h>>;_R<&ZKdL|%*3J*pT$itxY)b}C^u=5*(c*|C6~A(VXi92v^dID(W*Z_ z>0@XZ6YGwKUQTzMA#DrSx8jK8xf9(JRtQ9Ng@qHeV<|Bcc#B2i3lE1;s8~@ z(guG;xKVaB!JGXkjusI$dvk$ReU23yU&}o zPY!zL;eRrC1@R1_f4S{N{};EtED{p(o!@TzvnNOh)BV5Q_HrSAyX~D_4@KX&&Q-PlaWfV`F8q7a$nvoQuj*s0X%l`dVFhQKxpAX5Z z!-veWAIYU4VJ9MCkAyT5z(4SFTQ!(KL;Z>FZ4w4#_>mBpLi__iyzrqrcdp`S{vMNI zHl)_7(;tfOyk^Z6x+QO*0PDx4{1=+s3{)PXqpBK$c zh#?YX(Xds#ZNQ_ZS+11_VscV?x>4Fsgtr?i3NJffKM)BzKT(XO7zW=gBu=pM8C!Z8 zq75sh{kg<3vRPc>k2R;?S4%`4vX;RN-nE>pj)RMfiBAoZDv(LB0I!qXMK;asp{y&n zW*TfyJirVH&i8G=Ct&lEGf)0~u?jK4%XB7QHH${dkEp%C>-AFR_2(zHuOhzmC9Hn^ znoaLuZ{OnbvAv>#L+@%y)1N72D6Bxuk@9n|HEVejPix+<0(HWZ4)~*~)vTwGl@3uUDPt z7JgQ7b(}24zPxUy`sZXLLLx*}B8zVj*3{H&#`3>t;F1Rur2xG*{2@YsIeI8IL^lI= z4esIMJH~w@XSr9Mv%PN^lZ(;BgvsNYu+zlo;Ch)wfbY>2W$uSorb__ta=IoJv+90 zXc`>r2p{|d0K~tbV*vq0e=f7jVB=BXGX35RI+4M4n2!mt)a3r`KT6mplUpJIC$5u< z&Ft2H&eQ~Prbj^mMV;zt914rUjZDb;PMOBN9of4>S&V=7ffO>QCf|LVny^1-_5asq zmBoA`@mHu|g;V7nRx0|lk=D9RFcBdm7_FH_7V~l0EQj##6GNh<+W5Ue#29uVeIrD* zv!gA7v0bAFSR)~)uhQCJ|AfVN19%Xz>0W?41<>2YLAlIpFH=*~wdM&`MhStu8bwnW zhJ~bcF(PR&qh2s>Hlh)8r=Lp6CZsIT1 zIncm?*fWK&aXq1<=+dQ2_|(F!Kh~?k!dy>!Z$J&gHE#%%nVkBQ6{Eg#aBy@>*6l?< zoeR6}+E1_v!1mij<#^nsD`R70Jh`8P&bAN^F3s#_2pZE?_W`~-1G#E>K`Rq8TPV$P;dvJlz@3y$)SbBqCJ1r&iqN((t$eA>q8tbkX z(7J*!G|x7z00zzxz9V$&T`mN$MXi&kuoZlU;p?*7Ju6ciT3f}yx(u*tc6OGFL*)|U zUSa4(D-~`_zHj3_bP5&Hk_X;jvu-Uph_T=ae1(vS-N^C+T5!P4;=R8p{pY^6Wh!h( zeHAy20CeCmvblyg4_wA+vTCxyR(=oz5j6!g*oyf6eA*j}W2<{)w*70i)5t{+OBSuY zFX5l<;)zwU(XFx5;2-nd)k)8Z;ecAQ(qDjK2fdaGa<}VwHX<7Fy55lZ4m^dL&Ud zh%kHBz{M3;=rco42a*B8V+;!4j0)OVHe{0Bv~Ezo8?#a-*B7aky-&@UuGGf| zr?MHRF)H-MiyESEUd$TE?cIkRkc8%1%0jRbL+ci*xnyixB6oKu3|~kg{bk1{F(Lt7 z?zJVP-hmJn#eqyWLazI&uTt|*WGG~X7s|?J!*Db)*~r+hFe8&-#UK%`;lwnzuZ)CL zI$c&%LSab^gO_d`7NXvy1w=5C2EUC>x`!@5MkPf{o`rcv(~`|YZEvOEb^Y?ogn>4b zY?CJyO+}Upxee3PcI8s>shbd@hA&VRz0uSpWZN!SQi=v4=sM!I>Xr_<>_QdYayvrs zgTT=O?ewe21lIyueF;Wc%&%b-JFF0SL&Q)lV%eTajD+;BydQ(uBFxs^a@+lpGA3Jw zj5<4|@@6ZXjfSX(y)T9XG_x1J zGZWs@l`sw_7!tqo7=d&`V$hcB6k}^d*1DOT{JFFcG2WjPVAO}8)O^G(!lWKdz|-`& zdR6 zn{XoGe_k~UPg-NArp*kxTG9(gqx`=8nsc9m>29xt@!USW6s?Q|JEIV zF|-L|P5kxhBy-U-g=E?paK1<4T^fU!KrQ0PW75EozXEMr0HCa%xqO7Ju|3oH>c{%p zXbA`s{cqpi9w~e|X>8s3;8##5uPox^QP}?sgK8|ijIQ?of@c)k{sXC;+CV6w?E;9} z9OTZ!ozK4jsR-Po`fsslnXT9#PX5|oQ*y0>WRXX^YA+P^{>9-muPNZLt6u8{-`_(t zo2;|E4}Ab%y#P@`Q=#GaYEVqDus=hmMU~p!nFeH|*Uzbc=;FY%kz+G3l@t~t?eK+t zPo7eAi(yY1TUOf+VGVGfX>l^XbLWnWOWmn6L@WSTW(Edf8(?stmkFBx0S!Xc5SP9h z2P8pe3)V-$7F}#ceBXF0LzA#!1MgPlz6De|SbHEzsIr zWj$JlSnGwP;6(BPIibA@6@}#LwQjWH_k)A-iBX7PLzyZ51hy8|C1ghK)Ns3E@ZOZ~ z)pAptV+(N$)lth}K83!*^ESWl4(d+qJ^3Mp4Got!GQ+O+sjyW~S@H2d)hQUpq917F)R33h&gA|g@8{yA-IN@Mz#B&o0{0sn($zYopT8jxtsjn-G>M z0(ZAC7M5ud4@=7JU5cWz`4?R0P}~#by4~sW;?zOE6tEa$<_oPn2{0FZcoF!V^q7t8f=wtDPwA>G;zxPhj*iOdSGWFQx zkt!0WaNt<@EL<0KI3S0)${0-}O`r7M8I==qGpjf_D}&?|Nu+qdn1zXm{ZWjrQ>M3g zO`G1D6~#FPED1X?W{L3%XCIG;D2{JWWTSa95$zT)EOVzihms~o4>vL0zf5alr>Xr~ zRTdS+f~+PaX*pU2lPy779ip*{A_G%wsJ`vOaWsN6IlurMYQeYYxgDwOdy7%^-;j}o zp{W!yA@kulJW|mDz9j;`%jzNyRAHM+97~{aNna-sixf+2K#Our>db^vy+1jHK!d_B zT^&V+#3t2p-=E=V^X4}~oH2B)+4^__9y^ROiELu5>#2G5kD4kJUyjj}mEso8625xe zIws1n2NQCZeNu}11fLuEW5Opf-`&FJk1`5P}S*In$xN47{S zyF|WV%EhhHw6WzTOo#E&?^j8Pm`36?5LV?Cg3yEy>IG)GU0~My>~9Y*X7)UVG0L!&0aF<6)>L@?D{c?W+<3TN+9s zUaUGTTloTupJv{Uwa##Mi(wPWn|HlppHb2dWVI9?*bd0oBvZ90IdBd5hB9MU@!8oM zL)5E7c_6qZzL3L10Ie!#+w7T3-t#-9HoaPUF|pWEL-${2s|?BINgeJupI;nz{rXrU z!5N+uq?F-_EJdP~WucZbNS7J(o+i~q8D;X&n1DuX;o_WtQ`UlKtZIh%bU*;(@*8`i zpMdBOgOMK&4h(E^B)g__+xzI;6$>H}ua51<(?F8Md*z|6)>ZWlUylw`*j}*_l_9Yy zmM9pnC)U-?{icO-=`-7fids9{khDb32j z-edTrijt2N=0()6TVgS~`trW%&FCj1ybUNDq!JI8lh(%eBChFN^V=#%{XQXKpvDva zSOq=Pcnzy{tv)RNWfyr*sc~ArfJWSI=uXK0H6Rj%;eJ@?^=_6`naQErb2y%@E`<(0apb)@-gYT}LE_S*jklL`+<|5{?(W4s?M zVSjko6LUv|wXhB~2NYR#{!U>4{*Nw;LO1(&=wiqc+2|W~IAKCo#1f;;2G^&4U+nxP z?q_p{UD#!6RA08MUmV0D8z&@^m2cj)tv0F!?Z!45QCi4_Q$6tjXWd$etgIX6D8nxoB3w$tN`=wS~;#huMCe7PC< z{%!JzW!r77FXqM`KICEUt%JjsTwWza;7dJ+s>1NtI&^Lh3=9bUQb4+xsMH_c_=6tK zRx+#|$cmp+cN=VDRghL?==_Xfzh}GOnq+H(gnEYm4l)Xw@v?!_uY<#WehI^ogfsb_ zoG(R!IE`}U6fbBy`OXQ5m3~1J0yI3~H%5J>UAwo)eI+gk`k#S%-f<&kv>s=xQ#{|s2bZQs4P%(PO%(4dtN|(%5#`o-KyQ?(sHZf%v(Z8pa+Xrz* z#mdSGcpG$jegIEOG$}isr1#K(A3S>DtDHmFv= zYuL`+R6T3T6lq4675U8^QX?q{MYNe#&TM6dQNgZ+i;*rUqEA$c>ofZEQ-ekcd61Vc z-T175`St!q(!SWjb0Z#EX%s{0u-N!0e@8=ZUAcrcRwDV>RU{t_oa%dzQ9X?5X-bPx zJ&f80Lu=RXtZfMfY32QKW9ajaPe=~O-r3b5ld5O5Jp8aWN6S_{AGM0u*0dW67 zqmC44Oe2~kZZtGu5JjJ|#KOFDzuTqWG<125edrvKkHAP7=+Lwn??Or$JX<-ru z-J^Pff0OW22EX-{fD^1Ulx_D$H61&z!W7#mLDnD}&dEl~7L}R}|zHr=S z{1Wm{gM;Oi6cl$qzp$ibOG*p-!>{78IVOqScmhpGN^_GBuE`sXDhH7;``n@FvTFU* zcvIOV%CQCnqBn0!)>w$Hqj272=H{I8;`0)_`kxB3|BW8^zYrfDTSqZk4v z4IZQF4>P4kf{cM4!lIu{uKa8L(i?SlEgDEwgwI4l%mQXA>B{}%twh9VzrdT`uiZyW z($W_`QD0!p^=5LSw=4)`yHhKkI6& zE9}UWXdN2Y%Xs+uVTMM#k|G-4Snet+(zjK*eVL00ng0_&POg?Eb}(Y03@G_6nO1_v z^K9OI5cDLOi-U1M;viLwr+V)%_gB2crR19ja~jk(KRmmKK;vpLD$(BzDA>@ML%OjH zaAco_zI*2U!AM63sqbqw3kU0zOV3LaN_PBL=c2gQVk8Vf7r~BB*a5HCm^?T&>lWok zY?9|eXNJ<=ot~z@Nm=!ONSUY2%bD=9u;^fVt+I8d+Cl!EhlI7OZe7#5c_a&jjU>b; zF5&_fmtBi(n+he`RUoGlFcv@bf}VZr`&%ub?0|BFcxcQi>c99ILQ(p|&&U*f((Dsu zO|X0kC)HJ$7MVbj@h5dEpT}QmTL^SK*cml3!?13ZtTi@I)>?;y0ShrVn^(`j{HivBY{R%5NRubN^p1DfWm6Zhar||>XjDzpC zbMvpozof)55baXXnGVSlb7Nv>Uxjve<&!t{H|xP4H^&))c&dzB{rP!7x%h4n2^Kvv zW05}yTV|z)N}KNrr*#?CTu=Aq&@5&MU+jwPvIMkX{n3|Tt$2M0=-^7~A# zWr562kN}J*in-=7^?j52w00G}LfEYa9+{cF3o+dN3>-|1T<74+YHNi0E5(1&!EK;} zT~O8i_}nLB;oOu3drg(0uHZnhhZ;|Q`NXPB(F`Eb8sL=PA}PtUa)s~RdeYqY6f4sL zHB6qlSONj3^Iv1+`R)%32!eb9T{Y(xREI%QKs@vaav?(;o%+%xrTQ9L&dhAIbArj( zonrJm%JJdxbF_; zS1MZz6i(qztf6L>h3w!BP8zW^lM1@8-9$^VM)%W?#RaC#-m1N&%Ou*@%eDa6@JisC zI;{!=5lUUTt3}v4da+Jnct@tKzPwwVHNGtM6AL&}?iwX|?e-W;=?$~c8=g6C5p?CQxfl<_p8seQs4ihCUBxnP5#3mv&r|1IaqDi&dUJSEa|UKHa^Qb|)SX)muv2KoeivBWpvx_j#r0@+6Zu#;9yZ&#*7BCg;m=2uS zqu}*0hix1ehnLG6O&^SV@tQ43PQM0ca)n|kvNaWn+sjFhFIO$A$m?O=yU2_Auo(_r zwuJ6TUd#507c4zH&Gp&cd`+&rcJ{Xm@_Dc=Gq}t8$%(>GAPn=wc8-mYLuC}d}mbZ1#a=iFv>C6Srs?+_Opkx7}LN zvNYt*1^Fd+OYT2$dJsF>12s8&a4cqG5BB%_fn^)0mfhDQXc*7;`P>2sTM1@i66lY@56l`JYo!Z}dSUb%W#6*TpnA@}O$d#{NP=p^NUXtN!O!1-~J zQy=bi1wv#`!%9y}Yk^$GXT&LtY9zjbr`ElHVrh|f`tW*0(e`zttIR)=DIe&cxQ3GBIfgJoBziC1tz2Mp#9m6muy&4~{>>Tay-qodNx`)-?KZGuigF zi7Z6vxM5z;7*;41Wl=G&`t94%qQr>hDA#MV0<{>hcjEM zW{=K!8GDAkuo+bndhw!&ne*XMFLyQUD}|wR3!9Y9!tFKrYQckR6#5ydsY3-apI`KJ zj7x?f^|l}StX4(3%Lve1y&1cjyaBv7{v z&CIfMa-4$3;0ZwR%pMAIa>e-lO6M-jc9``^Ep9Gx@mMt|tqMfO1rRMjvQ zG}xj0{?_3;EpBlnA$<}{?E}Y{+WYZ8N*qSpE7&N=JibRGWsjdskw2={MrR$oO7wZN zl_>092mP@e%^Vm*uAMBP+D*WdCo1*Jv&EOtglJG76)_%llQ2=7_3~a7)wckv(c6BH|8(VbH$6^pw+eW#iJN!lnBO^l@=Hotx7gIz`8jl2iKw9E8X|Neg=SAq# zHwvNTHWtEqPHZ49yJ1&Dd1JxDM}~UwES7z?eL649p{g~#o#E&NqqgYzDZfYvi{p=F zw@!`?j#^h-39dZR$V}J>EpIx{_Xmf?1-^^i8Dlwk1z=^o-daXukX?f<{IO$;{t2m1 z^2KXQPqnaE%J;tv)GsnJQD2=Dt#msq?kxA!v*?bqCtNleUL@EHDT+6{4#7m)--fR$ zlq{QX`9Lz7UVA>R2h+kwEzF1Li|w11iOe&4#dq&JkR+;r35e-jfNaan>Du&th5j3>6JLI` z`$-*#CmZ6M)G+}GB@+y*yV1IlYA|M`a?C*$swG|5N9X8XcE0@q{b6cZVz3jyk-AaA z#V%~ItyB)Ici%X20~0nHBeTQO>H{UwofVQ)3b}w^Oba;=HU&MN$zDms#xfYA3?5b?!)#wI}u{*%qn9_1dA%#l{D}#r|d%A{Jzc z28KJ59NfVXlOe*4og$gKVg>#cKM%L$9G$Dk5v-VtS|l^VP;vNS8s}SGF&Y1gYlmB* zA}H$2$j5Lw^H09>+b7~zKQOB(c(RQ+_|}9d4J!+B3Je}2a{U(-z_%&YR6#BlZViI~LR4I;AZi$s(eh#8(KG0sjo>l7`U6+k=w2$|2+Ete5 z>2<}c#c}$G=yZ?luQ9SO?EH-1ch6{gQtIjze)qb$Di`Jb!FClU8bwbb&u8|oo+mo` zBG@W-+-Fhse;~h3_=ZoZeax54@yO75k*cTUX&^_Q9AYJel#q&Bu||B7SdbLuEYReA zWG{uZG-b=aY zgQeZQ-XKP@8x8Mk+mH`Wy&mtpV8Q!IVech!lUeajUx11%8LXHtOG|$ryf@4ecSvN8 zeU7zA%+2V*jNFDcMwG;9j8dS+&&LPlsOxsOyuF`lE8foALOk|tb7r(i9DnK(nU)>0gGE zN;Fj22?4e;24rc3i2z}l=Cf=UzB?mu<%$Ow#BR?mMQi;#$?-?@90b!ka#qJ4WVdH67=TGb%T^J2En~`FVH{tcg{|*vRNBroaysnF=9ol0Cx~zbE&DFxkB6a*=AswrW&QqKPlsl_I z=R~I11~_!1_vkrw%p4R^{D~8nx+iGm;&u{)plIuA^N4y8`{RFjydUZtsy{TIr5tro zubL<-;ZQ|b@F^vH?K!w?yq>yBOM-GTJ4@Nu)F#aL#Hy7-Ol%0h(Zo=dAplL-)y5a^ zl1xy*`+Vv5k~>mVNS|=u;gQ}<7j%6@`07>;(d|^%8#O*V^Vb5~8GAl~)=E3AEhEkR zc(Y0@&3K9W`cW~1t2~b`OBNw^g*@OD@3XCEH`(rjmd#fIc69vc`|#wU=WoMQJBTXU zv3{f(f8~QJAoZPIFVLxPez{NHZhzAIle#69MB*b6w|xX8U?#1T>$X`@VG*#@IT`}`T4;Tt--uH>wK8mRaBc$1wR<rBZKF(M-fKPP*ms zi~}8FR|lh&5*MPTM11mkiSn$ZwpC^0F)2F%Ic2@*aoq78e#>dID@DvGDBRH{?tWLW z(f&xRR)Zu#$?4>olpoZOhr*O|eaW-GJ!EC*NJitn2GJW6LLg0da!|Fl?hckS=y=A5 zQKVr$YR0q1$a1rqdzHy2j#v&_DBBQ`+rF?drakyDsw6-oSo4wW7gux`^?N2&269Lr z24ShEpJYT+IB53z^4OiGYAWN7Lea6EL|=EQG0h+i#iJ<@6{3<}7fh5okM*yoif^41 z;1h~_UPww29VwLks@x?82Q~SpVY2MH7%n67EjX>*y<}gXo)T3B=UF<+_!;b5b>WiP z^1h;qdC02P+|j)QU_xOKL{2+q+hfW}4b<$wy31bs`NQZ9^Ar=_y<R({z8rfu&$M$#yy{CyGVJDknzJ+>T>2S7(4i6W$nYzTaYoyO~{~ZKNW~Dq&?E_)xE>0@g4#{k49CKfI$5dAGD9 z$a6?da}p7eUp(qnS>u>6SD99bn}6q_`mHqYRu&U_=L^@;p+vMzn87FVoOs7!UOUmg zAH>|pdLC7^OjBw-yp$}91T%&b%+rN6Dc2duM1sz~MJnN-jv|4uFpoOM+Z$b(_|QnL zN~+MS8XNv*UPTsxFwAn^*X`=bb)x3S#)PIkb-MRS8D*3KiUZh^m^ACW_IzqFDe($h zMae9`!G<9(vsoR3d}rB^BKl^j;)lpnu!D%Pfyam{>oH`qLoG2C4mUn7753UYB84+7 zL~JPS5)JS`&?d|0-d^hy-=K&CA)&{P#M+CPd_B)DQD?~_m0oMk*h9brHUmFev!@Ld z!Y;s!`rvcmvGaT@w7|xLarf8$Ab+QhjafNbG9HOqK`hBiDK8tca*0?A`VM=@8p+7s zNfvO8c&Nsuo`ZDzJRngY|49uboyADh(*(1$;>#!fQow@rcn8;%_5GsO|cO;!rjS1O;U>FelM`AJ0%; z8f1K+%&8eAj}L|7(!PD`+88hYkjxrukdm^W80?ASo^63%VyYJtCg5L#lmomCd`u@f zpU)qIDh2)Ox5#d&xx~8=(FL_1ue{3 z?~k!AmB_RGgMe81efto2BM*xF zqKjfbb~v^P6rjR;o5=gtUI@j#jBum$1htp`#!E0oamT&?Eewb5wf0_fuDRa%#`8R{LmfoG+q(c44ABDRCo%zv%DtZ?(J`TT zwlWELw_6j&S(MV@X?!=>dh)ZMX}HDQbWi4%nUY@ z{uB|~FsF>W7@|0<7=I^l+UPfq5IKbUu@#(}7@=fUiMCi;z`y#6pXdO+vf+4Sbh*u$4)>&&#Qw@W7kJLjl2& zhJA?-k}cjEzv%Pn{6 zZMUib+YKJI@NYK&ad!>uM&e@zf*h!MfV>OPw?v-Jr^cWG+*3Ur zz{}kM6&nCBz5!^Qzd#u;8KBA@RYd^*Yv2RUkrmKv1_)_=K=%VYj10f~8&D!^oEFB| ztH+ikDv==vSGI`jU;GYs*-sPw;~Q@X^UIe7npN9?3q35lwlFGmu5d9{R0OgxNRW=E z^?46)%kCX%x{^(ayiTG3noC%F`yXKY)uY#<`X@m3tXtpjuS>R+#ScI4%ni_eo6y9> zB-!r@U==~p?6~crRd9zq>ciqE>c^5Y0QJ2^EjVfi_}f#UT)7C;rSpscv;x?{#JFu9 z5$7V;{Zx7#1%C&M7#g5|xUev}p-bY$Mo#95A+`TDcdLt}F=?}_CDA%Y>r_yMdZ#0<_h(I3Z&hVwM?g0K z`KP~r_d9+ig7g6L4R9=gMRFbwCiL00-M<@2f@7DkfPer|;>hSImY!Im%p*EAmKp^3 z36t>Fi}f6DJ7l1oErhpvCwhR-;0qcmtTT1cCFxXx+#Qg1nt<8wP2@gGEdh86VPTlK zDR{=gZFmhvCLcxNQ$EDnR0673j|5gQX>Dv*obivo;*r&6f2?-xM<5-W#%|aN7KxJJ zA@_XT1Kd*97$PQ;>LI}y6nDzsgPi3;Q;Uyk3`mE zpodxTJK`3s)kgr`JbV)nq#oSR(`b%pW47)n?)XG5;E1qP37TxQ=Z?obLN8xG!KH1V z5e<~eAAI~L4ib2x41ZF)`~%oU0Ukp5kMTmz$BzpR01w1}EiM_VK=52sTR0afJbr{B z@C))H@hb*q)_cN17lX3M?q>j??2iBg?1yc7T`>>8%V~8WKQcNv*r{nT*NQL;_Dlpe@MI#3gf z!r$IJ-ja4DFhRZ(_+Q<8@Be@Zinv#%%Odg-P$}lY_2{)f;x| zb%Kyc1c1AMck$L)#10uT*50VOj-heUifx5n_els z`o}W@0$(6d#s-?GAQI~Q0@P&yWP2E(&j=PpPbLWm*m*F!F>hbLM+S;xt&be(D)X^2 zpw%u!9Nr{(Z-FVRRvJT_2ibUUU<`(SU-(bfu*r#92g zI3Cb&eOQ*+BD&sT_2k>TEjB+?0o=*}+i!Fyz^y%DS=3g=XICO1dz1L)@mZubAhK`| za(?OPX)u8K_qTZl&`g_a&dd>{1^0Ha8B0CE#*g^Svim<6uMurIsm^C@3|Kmm;%`WP#K z_CpqN3lN7K$pQo%&@BcAx&;|}c`eR>YCmYDpwuC=mGu>P+G)PGdvjGpObECiJpeG9 zoZr4(QUs2k7%=7^<)=X%7s5G;Y4KmFvO$OvgvH?Nzi`fHhqy)V?dt+8wm?GCJ+!?L zMT4t2n*f+-4FyL)#sSPGiua>)`pzJ~GtY1aV4~}@VJzq;?qQNV7BiFpA*x&LKX=j! zx*dG_hlWr}lDxTN4+I+5KLw4Dom4vv&?SMY-=t*=zOSD+%g=bq6qP@M0XtIwQBe*! zQ(!}_4a0hkE>WLeWC{f?OB%4zc^`oMnZ|RQh5&#axdm(V&4U8ZH${(kq+kV_>usPu z!{*lz`jJ;KEsHp%VHsu4cOD9CkK*@N)6G)TeRwTk&nzk#2cYEgjQfjxCbg3KKb1qw z$azTw?yp;cFy~Y88JSq1N&KXXF_8ceVexOj&Z{!Q2-e9pTwte@1**j}fL?3G`1x=s z7UxJTR$q>eWj(j^C2ktGRKqZ(J47qk9;HJ00eVp0W((*}Hud2pGtoO5$s5N+l`aT1DBXI^YWX5gOWW_n(2~OL(Xkp{(y}!~f8VzXi zqFoX`Z{K3=PXn4-`F8?dCcM#g{}lJms-wPgslO{CdGH2#pk))>xnZ8=lV9TU$e}I% z&S%Ww@!shEK&3nxgNyM>mxVfVDi5w*us)-+pFTfBqY-t7JuIx;o52CG42E>u644QBZz5WURMY1W)cg9l zYOAo6ciAAWH|oW$`b7jdYW^p?@q1agNUzxgK5_8zvp1`i=?DN`tHO`b{)RWZ8ve>e!Z}@M!09JJn3baL-K^CcaD*PisjjtkGL<9nhDjy48GyA zvSV4d4#PN#gXz$Ld-qyCnEf^bwn_h}27ySx!pnq_YJixGFBSa`g&7%FJD9S;FblD& z9w3}6S2yN(tEk?*7e{9f5epXX+%#SScOo)vJ^f~KiX4KX{=LqO)TMa7310h7P7z@7{Go{$OZy?z>Oh)d> z6hZNOKNoTSFX&wZ8RnQfiS~ohP=vFF{kTN%tKI{6WM*2hPi55;5XqEZWVe4g&%-Z@ zCzFmhAcB`rbYE>3H^X?2Rx*54DOZ4)PQR|oRnD<&OMvT7usA&1H_B55I5LkuY_xx( zlfnI(!RX^o|N2QINc&CpD1VKAbaZ5XJOOT^8p=Og<^Lfo@ZV=D$M`1#f3upKH00k) z_%D{?qoW%K9Nk2->|bj%{wlsdM@|7tKymt!UH_;na~JzbNyX~TlPnZ_F8zOs3GQ@1 zfybv0TPFlYk^kV)a|3wAuJJBCE>llD6gCjJpUfwwSjwydO!^_v{IK`ke#8r{LCDjr zi!$+^eM5Kxk}w1~76cMcTv1k0a)Cg>1F)1#tgLG1CH8_^Clq)np^%;Sz_npznRWuW zv{W4s|AAZ*$T9wwYfqJ->O`1>5C3^u*l2`cKWGoy(6Kh|W|fpMrwK>D?*=HL9+Mn? z+8;Op56H47uNYtawGV}W*Z*Dk;?*kTC$#|t^7u;#jxw<`e&=Xs?*w`iyVw}n8d`p2 zwX!!gf0%n%h2Y3Z$x1=s;NTzz;1A?s0U`l`fAZw<2fPr#4I1s_rK3?#Dz2KgJYeYguK}ACc2h`vI z$xe9qCkXJ5R}GH#0q;W)a1rsIbBG~5eQ$_N?eL5l>R}+dI2^`=@8;7nfJp(3{)GaXpUn|9Sm8u>WIRxL{mQ5D?%IkRQhd_rwkS!s8+! zKIcHf6MK(r=EqD;8QK50fd&4zM)p4k z_P@q855a(k12+#I7a|Jzt5UG@&1N-DonL0E$_Fw~$i$~JZR6aedZ#5zt5O$7z|Aa6 zb=vn+G~t6fSLKVhgWKZXawiXvE3c(7_Zh3O;qgVOp%(k?yhY7+*$dR zF+O{0r^CFJtHU1N0Oq+z-OGK~;f^l@%u7yVQ@5wON0CgsxlliCGPzQ{GSM$J=BG?u zb+x7TJcxljlCjN~FIDAeHiIUgNRDOpAF3?5-@UHU9I{G0I%$#`?_Ef>7nIxhdz>^W zq%P=!9b8`R6~}GJ(>%$Z$oqGBTQHYiu&FZ5xRA=ZXg)DxP5#hmJTmfv(DfQ4xz%wpWq7^f<*3h-qGV`dd#zU=AW%6T{abWBzg77MNThGHvb_M$?$*s$d-kv7 zBG#=}e~EWC&i0JWi5b&s&mJIw(W7ktaR%kdSF5%Qg60QEd(s1h=Lt!z*C8$Zg;UL6$nk71{j88X-3I6jL#2(+GlfM`5_ zfQS&TJV55t&95*XAUk(=ZL1kV50K9HMGuf)TIUatC;Bjp2S{+X>I39Q&CWeS8+7)U zmT%!Y8?I&D^{sRyfi<~^WyvwUuIg{YbW~zt3Aen(@Wk(0$jXCa2K-c|lm`f+JDqnAkh^#H9T~8DKR4LZ4eb9ItS3SAx*w+g z@4e!J{aF&{-B_L>F&CF_M8wyjwDs`lw!W(OG!ag92Rt)uxzl>jQn_+hzJ3gT9e_d+6VSUpQ2{3DV+A@oLa6ifCgeAD2^U`KVrOUa2#fB0|)cfa;h^Jcp@OSK(JPq6@gL*OXx5 zZ=Txy`Nme;c0%|78K7`;DWN^#$*V@dN0za#TgTQs_jdwMT+R5993mBD!&qF!-dF1c$FpUUO z*bc`q`-P&HhG-uk>`63q{_P8)8s?O$a;oER^nzS1^m~Wk!bUE}rgQHw%=HNNU z!DZQr35x6qeRTrq`<0RB2%S{3q@nMoz((yis>52S{1J$B#4)Ba@zrU!rBHvn?FRP& zVpLb7-|+xBlkWeei+V&L3RTRLWU2G|=6`#+Cr>vwRPD=|)*DqMAxrh~X<%2n!)(Ot z9LxHl*0Qr~jU9UbM0WLiY36SqGu^*@jDgcnjuRjYdY3_D&03K#D@2x~_~`*6?}+xa zSKw|zMpWNe>CVM(th~qXjLy$U^yUUCdM^XLaY3FdS!VyvhXmx!lrRI*D7Ye~UgeeZ zlrUbUGd8pswG(y6Ws*5MzU4?4CAoVI-HRJt)<0aqau*bD@*)z)rit(Ve6e4L2-bgEotgX)09WGsauE`) z+G>fpD5h#l7Uvelk#fhhE|u?8?rtmZhq`~#nvdUYrc|+<)%XHS?0NmDq~ID$S&N-D zFlV-7+*|obi%QP)rV7seFFtiY+aXB6FVBEIS&JRuq0_#@5=O5yB(gYP)aY<<`VRzVVC>@C30ltjC0*2>MPFBd48tUS41A zrQBFqYu|sG9N9C{*gSnoJK$hv^P-C~aU6o40GIZC_%%Xcc0@!_FPDO*kXpr8jf-o> z;VeywC++NTVWCHyeiT7u6Dj<(Q!KXXy>f+Ei|yu$arkBeAK6FUaHPc{7exfO+S=Xk zykB#YH^C1ua(Pjei1RhN-4lpQ@_aWFZ#J(pNQFwr$(47a|lY0^l<}@{p**b>2Znm;7wTu#7IDuw#qIv_Lf2}(=c9|vk%@VWASsQtz&qo1mGlhO zks`U-R%qkZbd5}Yl+F8P{)CQ+BjXR=gu^!RE22?^{5_71A>Y$xXL@*)$fU_!@0Gi3 z{BSVf-pZw!2h5VG z`~1g^X*ed`cU8=LEqR|!`lVHEoK}L>yN;4wfYjrNqsfNAa5y?f2>Ff{rZV14m_4WK zPO~aKZ*!O(rF4k-#Byg532R4>=q_q5yU(vusYlqnqp>F>KTrhzZ{GUvjp#ojlqtk@ zxB)c_A-Czj%d=dttE^7usjVq^f68kr^vc*$OQ(8aH>0J6%=zlElbtvszOSD&OjZ0e zdaB<|XW9^(b7pIZCnL}vywcn)|a|^WV&uqp2I*eMQE6q^&eE8j04CTp9R25PFEpm;32vxs#^7En!=4<8Wy%U3)~ z`(xnl>Jbl3gLkP?$L9rOjB<-%HC}~sS!6^Mczz;SF(-=|*EaS3GU^wVW3!Rk1TzGr zA?&mWhy9(PfQ+}A&h870SB&;ZVq)z?@Fx)84`K5_^Yf#4j2I-3-7~uPd$KNtU6EA` zO9VDn=4v&bHatmf`jLD=7f$9GjtrSGF;TCZlQL+UsTj4#!(!b@&0s`FBKVF@)769* z_4y6TfyVCd}BsPj{C z%`3IIR9f7_&_0RqO76M8jVgiw?dr}-6r%nFcWrY8=YrneEPHhXLTr0g*@r{JdiG7u z7-=I+ikH7Gu<_k}kEI$?W(1jPqsL@^BPWOL_h1RK>*mj;Ye|HEa3xYGJh%Pg>XmZ;^5n3mK5E_ikb3^fX(w%c zwairi5$;oFSQQszz~WE%c$KbI8~H!ug^lB}{+l9U<+nG1RWO^A+rGYZRE-_3nLqq44TJ%)SIJ9QF zfz>@g+KTVFcQ2V_b*Od4j}{R%;pi_*4HZ?Qnbku25*hn%uCKbP>KiIfiX}1P-c&9! zeMRt>i6*|-U5PE{iSH7@ZJtre2=_vYZH_nRU123Jc_&Be!1g^eY}3=c)W>jr3<|4w zfRs5tK==rU<)SFKLfO`$uY>6)a!v{ifssz5%6qetF`Vt{%6{@T3BJ zWFYbD-M2Cay@XUj9YIRHlY*qZT>M-=f-nXwOlnFg92uzQJMiguRS`&HU+Y=mY*}c$1(;@Am?{$;C2VIJbEPr3yDJe#IHkdF>IJsI6Sl^_$>5-l2PESqr7vuFbtcMMX zf8ji$!wo0<1c7uL7}g6A7tQ*TpG0LCd(dT(9yDZqrwPd;eQwO{tEs#s6FqWp_-mGC z3+8n{b0>PW`NKnO&?gJegVuy%2{fOG->7LVs69KFpieHsCD|vd!dt?IA>6C(ms^zH z?LI(Om4LI(uS;TkzjH6wHvC7W;~sg5w0X9PWGx{}`zuC;M~7;iO^1k{HpFE;`%_0N zMj{b;neAHa9I>%iu!sixW*HVT|55i#jDUJ!pLKjCY5KH78D$SW(a7q#afp7eUfz#2 zJwP%nLDS3!2->aNyOYGmlC zt$;?+&s5*Aa$klTjk7N_!1Cre?M3dFYZlppz40kBF*}DCULVtRP6U3#EBt^aC6}-= zT@_)Tj_q?w*-OCyby{mO`{hfvFN#~1IXN7+sIjQI5XAWMMw2#1fYsygya-o2ebD$` zV$wbDNn(PZSH3QkNjU)_iNqutA+EifEL-Sf|6H#lwy(WAIZ?z?g8=Pp_5;LU6(f(~ zWo-s!_^H7~wu@BX*_>{dw6wDo9a7>x+J3`ggvazM)oe z7|(+~^0lPJ)iK0TQz0kDvw(arC8u!(#%#UxTu97|3~fRx*LIz#`%O?4nIo58@yW4l z`ebKgOj}!$`q%y~vqn?b_gm$ID2Hhoz0E0PvP$^szg27x6jzjDe8j^W6-Doy&*VkX zp{~oqaubIv!{WEjVHVq*Bb%^5`p6DdDxA$DNQ^sKgsk7{3X)Iymd(z0EmDtRWse5S zt+_mo2m{vju35-VMY$BTFmq*BglL+FNzrzl@0`$xsSoG%1Qt9+8ksuk8mX?EcDO=g zeD`d>Pcdq^EVUe) zfl^{L{QdmvEk)lp<7_1VK0r<|EH^7`=;^M(avtPKN5A?h9-6x zBPQ$+G@Hw-gfSNGaEKD*r3KdNcUqSog0 z0O9I*+T|&4Z&w~4G^D{njIfb;+)1F!dXJvOGNXHjr!?zA>#{_}PuF`FRX>Zg-M%Kk{hZ zH2<}3cNC(O7EW)Dk|H{Ab3Tg05XCormNUipVGIWwk@!hW?%H#2ZbrGHn~t^oY};FRje*PHudRzZZbpyG;D!-Mgd;I*hNRdl#*__G+9VG4g&(ypKhJ8GjqwyPx zlsZ$n6%|eAWz-m&-nH+$Y-ngPVmzZAw4Xje3giDy!V2YuzGlrP{^&pn~pxAOj2SlGdQ&-pTpR>dHvcVd8uR%Y=?mC9Hcb;sfU(D9zC z;=8v_#SI#~MzM#{nNdC-A6>1zl&9Ok#l~AZOladi|MJnXY(u@&kle4J2Ht=wBpQD# z4X5eo_dQOHr&u(P8- z+rORud^CfehUyNp6B_d)^ z!;RUKvW%`))u_B$lBHlbH#gjD<`YYQF#_57GV#C?w}P9mj-|T}o_9LGxc?H^D8l%h z51yTA4?MB-Yf%olcSV5E2y&9e)Vl~krUxq(v*{$OlhcuEy?l)ByqO3LqDjp?oX+{K z{d-)el@x0#Lb+HHn%Rp_v|a|8sHe#h^uynM8SR zq|~fEKnSdELvyLEv}~fj=LZj~qZZ_pZQ>DC!i5l{`=k48#POahXBF1GNLa~CvvQ(; zHrlky&ahTN>fPf(jwNiBJk31%N4MQIqVAY5VBez{XECQ}S-T{NpH7=!B+bYARjgyU z%nYWKY}F2fkZMkTUuqKr+vfSqwyA}@jMN(inZBgxnU*%`=Gx|8b|lkvKbEnP%R2TE z4P{UC#o~Ka+>^%&{Ve2Z`ukbO5)zg3(W8=iruuTbaxD4m+f7VO-9(HN1s0G!>mM#cmZZNZlLk-r7jCa(^tom?8{WM+Cf+mO2S_8@pH^y}Jw3Y0X(8d_dF%I< zr`q8|-^E8!3FuH%uS$|{Qh$rBl5HS0*kSSo!kCOP6TgepI8$@Vh0`Yw%M2OQ!VM6? zw@SC5({!w-_Dt_wg)6cQ+1%OraWt>DQe;e9_L7w@A9~F$w64(1F-YM4iq(l&!TZDV z``S{3+Ei_!fm^V4V4l>UW|ZA?gLp$=z2rA^f994rH(Y*Z`uRfJ=tTaPR=cC(0<#W~ zap`rMq;<9MxcyF;mSWIX5l3^h}x zW`9YIc zZl1p>Wsb2r&H5sMRZs@cu2P2_aWu+M&g-Xx;w%NcYlINVG0w zuDoWmSrziq86D~)PRw#gwXWlDpR>fhtLLZ19Tm*zE$Ui-=|Y#r)^(_K#1O|6E<FZ-pxl35VkYg8fZX~4*Hr%M($qRLgxsX z+pA`U%0X@1OcUfG4E##hTGU;Kkqk`eg38p3?=CHTO5?#8#xW8_x&8fUvP(7WgpS2e zxRUgiEM?b;e+S2Tb5GO9NyRkvGHcrsSJYRp#&!K7!;|v*e()(W$71Z0`c8r|8+*91 zM;LkXA@l3yqI{afj!n$z0R|_*T)Ttey|67c+a1vd2s*m1i({kor{5C5RgVq7*Z5p_ z6O8V?Q$DndZeAl{eoETyw7Age?1x<+d(~V44Yv98Qqxf=t=1tRAbu9&r2AyRw$W{# zuyeSWI??+5GjW>|-qkYS(=l;}IsqzJjl5j*odVae5VAuqM+&`jv;4b>M6}$bK@ZMB zcC0_xNo7b@a*TT-%%?nC-S&OiyL>8~y%RNgr3?0&Iq*ZYt2p9h&X6`C*I(7gdD%Ky zeU64mA0!gg7y6MW1&VHk6Y74KmLq+`44JB6YGk&e;Ot_~@qdM9b@2e%H+g_KKR_6b z_=GecAe4!sm!tYSN@ zSNQKZ9?@Vy zB8c^y;uli}*T^WGm3Ru&H?EKm5p6wRH^UkjBHNlb9$Uf=SmCu1B4H!oziblbU4NNn zv3|$cUrKR}wU=llhxgf`?}D0bxym>_0O3l5-2)_>q(O;5mf2xgr0Z|K4RvpvR`^qA?Ify%Wh6NG z*)(_Um31vpQ$?vcgplkIg zHbQUVw&Yqnp)Dt&h@Dva*Gj7jRv_bz+{x>j=xKL+EO{SA18gJ?_ps4AqD*^{x}+D55DOW)&Mo$g_B8kygjF|6D4 zXs^y>;E`HA6X~Ba8|5=rY&A_TwT|uN!vt2O5*Q4v-@dn%Ef)S*^Ls(5dA9A>pNqMi zh9+IKWxgkTaf;lKE+GaHbD^t}J0&~|Ez-X!v)U(rgA_WN3v9ewHczk+($YD!TPaVG z5;jH{@}hcA7M{rpm1@hUOu-f-m`}@;CizPaRk~g=OlKPB%q8m9izsN4JwTfDqWJLD zu}LT7?)^@%u+6i4O-Cs8K)EGogv7QXcR9IY+k^Mdf>WCLb!*>QvD>fx48!`7txO7S zzoBeT%3JGq?&|Utlak|ARdv;DRcRFx2t6-@`UozaOnneh&Cbf!RikAo>Sw;&tS zQ<-KK;K&oi{o?$7eXbCyxKNKgyvab^_wjDly2!mrYGONa=KNjMvU@yhL<1v7ldv$B zbpl8rG^|Erc4{4T*A7O+eZuQEnl4ktG>&C^Q${;`6g|POhWmW zk>Z~cdCmDR1sg;z*^+>BE)3kJHu?uhcsXCgNqQ4Er&KHF)Vf)ag-a|a)>zBs7|$f7A8f@2Zcm}Jn*H_@OYTor{*3D`C>pY?vkcCUVRo`(p^$#XVCYQ8Rx*~Q z|8h^RQOzgf_{}7g4-+LtYJT+xb{)R~CyK44MebNAZFQi^`oKo$o|=Wwh(fnq1xD_N z*p7uha@5P*`sl7Y0^aNVo;n9D5dy)GX|(K0|8GlaPtUOnsT+-nH2DFXIyWgvwuO24TB{qc4-;Z#;1y3#*_fs7G4_n$icj(JS`4q19 z1};lW1r_TiY}DpwL>Upy44S@dC0A@_C|gXHNTpiT9&lg7$y0K!OVFfM2vz*y3fP6< zo8|n-Q@YPnx>h>Np9Jebh5j!8<{gdGf5uZ#TTsH*Ux7xypE9woay+z;S&PZkr+>mx z1J?zs{}|m85q1dskB#j^D}uGkhWXcBw)MYnWms%omc*A=i$?3>EIN$MECM)iNUb&y zU_<^gn-vgA555TJC4$a99v`c#vy%?^F061|FO>#2VtA2X5VOTD0mueFzFD8z#Pidu zOt-70=j!iXKTi?XRFn0OjG~~LGa(H?J~nN%d1u8WOhK;FDMIqIsNEPJ6*b}1~k^S-|R zwbFd4$lxiCHlr~SJaMnXC|4XxpRA5~Kuz$3H&>RXm6@Qd*=O{3^mrvjCi4L(X8ma6 zPLo)#3<7@7VCjth`OT_o^6b4N6@4`bQ+3t<4*C5hnoi~U-g4P}y4XQ>8XOUlAc3)U z1Ju?3_ala(MZ&Tr$CUDIsY>->v|n3ViWX(5BX^9l$xPDx(yd6gmGx&OBw5u7%=gpD z#{ni{;FW3mA2yK>eoGG!*NSFKW7B-t%hOJ+g$}F9J$0OrFRBW0+;0n-;o>v{0~TwM zwaR&0*0l?XGnqnGmA2)xs_lJ?iR;r!MqO=p(!{ho9ksWVEPv=KZk*|8=%k>lqGS|`TkBpaWFmt~**Uqds z+go`}KBnXP@FUfkj^YyHhU@xu{sqH`H{xxi?=cT$x2(Ra1%jAm<7xle%^F zajQDetnJHl>fTNo+u;66l&iwm9E0ZDH=c<+cDmz9vHKKSel9+*l#4=$-a)SL+pYBH zZ(B)bOw!Kyd!tiu73H@a37K;yqgO3~iU1UlCN?tw0NHQ9pt*g5)RE!C08k3Chmh za%cS`-mdw>gyP5B*-wdX3f;z&wn@jq95Fp}#u#Eo>{zJelB}g0hkDlGpQ|zT-y{t7 z|LQ5!Ts&H;L1TK>JB{qBjj$FURUD}xj`mgkfrk- z2TmqW3_g{*#C?KRkxe~+AOGDFavS-x{}t&N*t%EUi!%Eiq_vH{S}SjM$=RDtQj6yq z0Cj+4GS|3)*q{h}Wr6wE!~@h#83cZPThV`=l5YAmrW4o#NIpP+Z*XbX#ASb1k&xJ; zd9i||FhGJxSMkLuaZDk-9m~<#xn3&Og-b_o4kfeZji}MdWINIsFE4hoCTc>!>kkoa z!ZP#Fq*FXQcs`2vS~F#xRIe80sZ$6kUgCbqSc5~m2Y#ME4EsHxuyH63fzJJ z&t>=!p_@u(IViVBu8>5cI2&@`#q_#-i`#Wpe~#5yvmdX&^GPtmfJB{upauamDtXGn zzOp4=A3-LX9=Au}!JoSr4n1_~*aQsUT?2c_Qd-JQoZb9&FZxF_C3xoWdd5}x^T0U- z*PVjfFilbGXcCW@EGRKt41s{`7otAsY;|R`ifm5X$xE;y?KJB$%|Aeb9w0P(#iYV- z$(8rzQ`dyUxrhv3Hhsm9CJP}aHa&7`cIstPZeCdR>t51cw~a2;wYrtFd^*3#EKBl< z^lO1Ym)Ca7i{{&}TXMEfm&kGBQk1oc?`dXfPga7wRSSFYlz2+hl&NLEH`R%t_6d5m+`JMf+^~#GNhk z=bSBAL*&n}%d8VAt84>u_R4BKYb(>`1R}L8?6H?SMBX~YFy&)@?oy7(R`r@a^kF$z z-0FVUJ^fXe{%E14reTYa;uSQ>+?r>kH$Gp&4oxVMMh@B;N>7v#f5xY@&*&j!cuGH@ z`TXa^k?+-Jn5-M?goOPh#%drZC+c2W#Cd+F9(~-hn%g9C9b(7S7X+N&W4C-@KWL4V z1y~=yFTv^h-|}vJ-}-IRdfxbmZY<=}`Yx8fj|$9H27s$&N_)=y@m-8fQo1E%yb!>|5?&$h`3QFm#!yP|tjG({=3Kt5a=C zY+sW3eJ$VILia9hMPi%HNSgZUDeB+nNO#Ay=t1#)Q_~|7U80y-C*QAH^4zmEP9;M# zoLN|Dewo!Y#W4igyi_&5(VzfDky)m0L!bQ0_lh_4_hu_NS$pVr7P z`H8(lnkxOLweQmxPBqcpcGbE#y<(Pl=S{@?dIH3-!eDQ%xMeA8wl9hluADs8-bZxN zM4P6LKBs!0=t#>KA0V6M50E$k0ghMq=coFxFHV}!h0@>d`d!y2Vz#Wu!P|ZuTNd}y z^G(nAX`&illxaaX)_E>?&$Tryog^G^;u!F^(7Fo*;kYpB_%BAK98t<8f88TM>DQ41 zgoL@#%QSv>$5m6WO$f%34~Q5gP?1HkBjb;`hp%CzJWI0QTfg6*Ut|=YSn1B~eR3u; z>fkSPJkgwz{0EU}@TBUkdZS(OpF8pl)!sIrA$gXJ#1<^GjXqJlkJ)u`2MUum%InT` zUvd+bzW)x3FmmdZ*2@Ty^EN!?78(2VUC;sTz4rj;X6+)}jtjEDD5P&v=%^ykN-ZU4 zM%j|^DXqgzYO1nI;_tS5A7Kae5}06-&aQPKIq#Mxb=>Q=dCLyvIrzU%g<`Nbw5Q5L z^QFFfh-tajMjzS$Wy#};ks-Z9C;8hOl5_W}b!TgM)`YyF9Yj-9+V5D4=3E$#bA38N zb7qy?ZDAQWJYOgTby<79p&$=)_RadTYZ+!-+D+c&H64evp?!o|u9;6?dX=ikG(?$) z4}~5vJfjYwIr807l#3`*>@wm@NZE?=1P{V*LR}5 z>6vW`_8LtvpIeNldS_EH7n)db6*fi|QG@p$d zubHGkA>=&wgI)p>*32IVDL&Qx6Qq5!Ss6njdN71L_E8!5g3GJ2OEMwjLvzF->x-7s zd$wO&UpW-d#O+dNfk*pS@w@g0-{<-BF4q*B)|B{tHI7A@*|45B5u;d(?47U{0ijE7 zumLNzGVnAzv4-c?CF_6mLh*h%AgIvLr|%u@kn!eJz1Jq$a9Ieu=fW$rgZ+uh>)ld96Lk+udTsJbF z8?&P_NgpeX4PQ_^C0l52-EMA<;?jWTpVEtk3fOT+vZzVTAvk;^;?)l%Q1;tO&FIXp zgod9=D}SM_8Ru@8-q8G%*dT~@giPToC4CS(Wz<*$rO$N_F%P}(>#^@ci8Dl)T=2;K zDjjgvx3v{fX^e+^S9Kg#c=A-?e!Qwik){_{TV||vT~Tga4W}soC?f7H`;(w+DVf9k zS;u?*5PcXMtp7G$GYWOn zcM$WPn0MhItYEQVhn#!%%5Pk4lr*vSG-$}d6n{9j%LDcK!WR4Y-F(MT;ny3&uT*Z2 z^kKyh5I8?rOlyAhz4;@)_e|hRE0y>AQ0oHl=@)Ky0)ol*DCDzTPo&>2?PtM^PivD( z_Xy@^l5%rPFZwx1D$Z$`;pT@J;DcrtN&e74k+Vmv{|9?-86HQLY>SpGW@fT2W@fh7 zVrGlUVrH;qF|#bR*ka3KwwNqtW@hHnO?S`q^vrah@4M%{fA5ttt5lVhQ9E<5j1@aF zBKEel@rHN|l82CO4ZT-7Ny@Nij8BPE=QF5EA&AmONU0%SKg`RLiy{ANQm)Q8Q&9Dx z5Fd~1WmHh|4C{jx=_BCHNdXr|H&(Y{gC%os|7D=`l>M6tvI;m`k0)>E+P&YLMy0Kf zV7lA#J){j<8p%{t5h}Gi7G*qxXh1gV53~5a-Wn`cr-hQidQ%%-M^;-F-_E|qDjc`> zjG=>~-`zppT8-5#*u0Uml1m*kd*JP6l|QXE406T(VejbJx@bPD?h^I6sJ--kGGkqF zd`j)x607ajsdglX8*@SKjW%Z!b6rx~B#-+)A}-6sjvdEj4z zq~{hPE=wANBd_3Wk4!@p_*+$?nGQ>HlcT99_*`zf6#*9SW`mEhnn-g z3rLLR2BIvL%o@h*HDu3?!*&Kn3iN{Uc%S4!SVs~VJ{xkc592KfMYCrMMNT!Rnfa7O zE`IcpWQm$HH!&$AXC$9cwRaEFPkqz83}(vQtD>0;I_-_gH+UY>jWe&tX86n8jB~iQ zG&uPImjWpe(seb=v_oOOzk=P&U?*5sDCfsIWCo={U#s$+j4h-i7*1`$i*`*0O{%X9 z75*923*2yjatgoe*KDPnDKF1!i>=N=*Ay*~suXoFCX$~CT%^OB*?y(UMDO}oF11sb zrNpE9eMPhpSt4U@^M-`@G&9Q_cfMKL%JMo;Ym5;hvLg^lhD;|&7aqn9A|S?I&3|mv z8$QQ9Z75A{D#;~E#B3o4$VYKk{Q}~s?;FTUap-mxI*lj&wjxgEJ}!;p9C4RwZ9N<~ z%lT76DF$*_LJBeBLeZ~7(;NeaoTklN!#y8qyoHB#5A4Vgc$$&8;MUlY$W zXwExSko1x7bhu${$xdeS@<0fEk5w;6&b)Y*J{F%0;=}n?s7TMMs6o`~bZh)6?2mW& zr{Mftce_q)zre#Qy676c41#%X5=$-|=%|Sp_?M6!Q82LBv38|;D|*WeZI6`FWaL}Z zI#Pym%OVhoU0Pw-9S%gjDl2L@cmRCVExR zAlavb;+oU4rRuYOK=m`0vE~&hFsi_-L+w~TEdxr3cDF>o+(;-R{>oE-z-f>&_H?G> z8^zRG-2?-|8_N2-l^v z34^A_VJ3?Wz&llw#<$+ctSKtl1!9)u(s@xT*OdzToxe+>&X19?Zkikptc5Hs2%u({ zOuPBMI>R!OOzxV0>y!V=!iA7fB+;C9g_aIR)+v6J(WwvC>w!-z()KlYd0NwQ?4#)e zb_16rVXH#&4xeKSOH<}lb&0%UBPpuU3M!Hnx@9Kk$6?pT zfTQc|X`DEgi|lz^XxGOjK~vH$h=tSR=+lx_)W9Qt*c`e1TRQm~8g^F`AqFHTt3cOb#_IRr?s8BwmUXyQa9UYSn=UJ50B~cyLDq!8KbSFs$ zd+ZnA$(GU0EzHwby)HR2{^mW4l;?c2r)S=|62wX1TV}pahirI~9U+ZTq7~wXjg0Bi zP86_^?$AEuVzVY7SGrLnNYiF9WA8-K(i}+}BQec@KMYUY1aca7ZsyBfv~abv6L@$J z>eJIJ+xS%OL55yj)IW$BRcckVL09j{V`*g_;Syg zF%koVX-Dp6iwb#0Tvr8mbTs|uwYkmCPePMODZP^Paw)^Q z#&xcqsV`o9Apu=NRb=|H2{Dpy zcnoIAzV5V04G5x+dn;uxQIvHjg<4xG(JIx!W zbOW=QAd~6si9xGTEuEz}xTqcp7X)E$3ja-_C{{So&5vx-;Kw&@UfuMIuHVATrOCkt zUJ$o4QfVKl9q}vsAwgOBAg4cv|I}xYwi8Hvs{bCg&T1QZz_%<%#@~&AV>B#-TzFio z!M{PqvG6Bn#D(i^)!L_K85JL{^scYn>4y!7O(#&j3cg>WaX;9mR^Eg8$Jhm#nD7ZltPDb-qI$%Iw}jOMLUsxTtdRK)^K@>*z;06p^J1JkPZl<>?Bz#I$T`Hzq!qGU*wTZ-o?7s2yUrtzNq*)-5yi72l~-8Y}UwBgh= ztQX1c#i_oYkmdS7)`S*TulR{lhleiHWAEP#5C_lR5GJzbGqE~?(g_huC)=dG;$2EU zKJT9*nt2P2%JvX3TN2no5JUEF`KRLEKKN-9_n(h9xi0>wnw1nWJ{%A6pb&WrKkft^n2 z@5JC0e%?6>9vFM8!H-wxLnSA3v|-WTdnkO16XU@wV%sVUzhiBMwX3lqlf@p1h_S znjv#^{b5#}{)vZF3dh;}qJgKq72-eo+-O8w-kT*?Ym3;FNH z4_&-;f~=0z33r)=BW?H#V58h%1)sjYNSNnu#hI|pKRwR6KG@5P@X0*Bo8mm_Bpzxa zRQvo5mK$<{#H&=-C+d8yC!YB52$Q2Xz9MA}7v>G6x_Sz<>}Iah zeJf3u!I`?GJ*^F?H4k`2Ve%tbqSUh|(G99kRLTyWoNG;m z4DByBh!0w$2%6)H2;`;j_xfarPY*tfpCM+iGKF+cZ8Q?U0mup<~np0*z6YL z$3M+!HEH}(jPdaCcWLz~GVk}0!9l6?jsv$I{CecU7NEN{GHcQln;-m4z9qUXy*&!B zL#~IZ%SmL;3m5K&bLYay!(dS~)U_zjP_oB~Dc6$7Eu2=4i=DipU%@+#wo z{;T37L~JS-37O4qA~SqC-u}n2F*yg%`O9rTWSx#=_{uBxGcWsc-NZ;KBshBsW>{Iw zkFYX@7GS~}x77Pg05_ee-BX=VO#om_?gQUGyVBCrhyQRk%@X;Hp$G2%)iXf18IudPkPW0QXuSP&h z_+;z0P=%r2&Y|z6r(pD23l3j?%fzuQm3zh=LBfOtqN9lNmOR@JvWT;Y8S!@_#t-7& zY!lei!<+N;5qi_e0;6o0aq_rv()?3dx0Wlx&!9^zz$gEM$L}^(FgqLwU{!ud@$o_g z67r01ax2dyijZ5gZ3-iq6dRhF>prU}R##r+R_iCQxbeSmgp-%C=7t4nBx?_?4SMZK z?3y%c>Cm}$swa~*do|#Jw9yO8pbZefWX9wbFMe6xcW5SKnt%|-@Du^xppM_fO~|4a^rSd zR_&JLE1$91@!Hh>WjGN=4pXDOurpNpw0c(lAo6DHzSH|uxyG(xvv^%3?9ahaFIeK6 zkbHWHVY+()3i`(SmS44n5mC<0DAPs|hsZw$Q9)Mzc+oHvo{4i7*cLV|3Y~Zq8^sYD zcg+#mV-m}mxcK%P3sDd#BI*v(x50+0qn1^tv{rjN-wj1dOQ;&Dzg|#5_oFBcX$ps{ zK72RFz)u%san3%9^Nj`?d_l`(Xh|px*DN)VBFq-{koGOg@c21h%E^_u`!v=(6A>A` zFf{ND5-?C>Y7~~^-|}*>P1NbJKhWCG3ZSqB+(i#3uh&IXE5P&?=vonkYkrQ1?JBXJ zzX1I}`}O2e0iv<4E|c)H%AtldeGyceD3gRn`@D|_ z)V2q+9i{Y5Oet(aCm&|EI=3s;Ha_;xkqw1Fo7Z;}#C=UKT9dgDdj}(*c!``(6zmAS z8LJw6Ch%;r8f%Ye756pKI!`u96vi8Ia4~8NnE=OhY(UrA@ z;t~>~ebXF)C9#>K5t|{*H6iRWBoa$hAuT+W50}!!Oa}?wi*35^GYU?H_z6uf$_Srw zZ@ffFnf$*P^M6pTi15XS8!E0kGHmXCisgFc|EjXBo-E4&7^iBTmVM4%?mnw7yjETC zSC$t9OYq!7L?!s>V?M?`WZfYi;Mwi`YRo}39(h%rc*Ce!5)F2FL>#@f0hD=v4`dZg zA0(8)(}q8t?eVz}*%7>Xu1D-^kL+wywfG1d(2IqOqX>iJBVDuPGp8TCB$-Lqyy>;m z@9m3Ib(=&kNd>fkD4L_yihGnHAb6rd=(f>My{qO_=GbqORI9V_toKBJ+f%yaik6fNuxsSD%a;7+>%vVU0Ms}DZ8jluVfHb+tR}IWf zyqQi@cfd3cm>CUAXgu?d4OhK*Bl((*r?A0Hwon(lwT=TU!4@C*`P#sNWLdZ9<)UA9 zsf&39gAKoEr`hmM^T{|aG}rgw3f7lQLaen_yLF_U5G&;G*WPDl6zXX87$nxf6T|3D zG9r|?P!X*+zh&~D0sIFgXBt795F~Hc-THSQT$&#|}pfN@@L=ckK%=?T& ze(=i}r+3G=Y(2^!vn5S8-Rslq;MHwwY*s#BNW`D|K7-_wMl@>AG=lNJuq<^I$b4Kx z8<6#tKVD*)ozb#dJJQPw>UPP3e9K?Ip;@PkV=?nektJehOI>Ah)k*)2Ux&}OWxRfS z_Wq2b#5q+m6hwimD#LlXy(pR@(FS)vXgg=Z_q9yX0gVAa%{raQMe}FI<1!3x2cY=@C!$dKw-7wM0K@x@d+ zh_&1EEcKWDSNsV3;H91$eTSi^(11hm#lYng>YG!}fVD_SO+CSl?$f=APGk3JRQuQX z`pkD}D1Du*whYy1te!+pgHNGr*DaIU2O_xQ+ZP2oe07A)pW&ASho3>o5e7SW`|j5* zmL@g?k7DLp-l3d?ib!9MxaZRwxGh)pin%#` z$nC37hAnW)bNj5VFI&TWP)0PTqJuW8>(Jg)%9Opf{AA`9A@aIh8pTAtpD2QcyT?kz zmZRO+UVJ3vOM@kIn)3tePlry@cqn81W6rU>&axo~bL+;!kTRQZG91Hc)|Ia7Jee4lNc*xDahRt!`J)6km4L?x9w^tU%>Cnma40|^I#(ZG^`9y~(NP$W@&`kBmp6SG}yA#zBS`EwthAylqm9ACY`?0!)=+R~4{i@yRQ0oWde z`hM8JcF7dIueg?>=D)B&vfn=wCpUwOcw4R}Ox;NlL;F#=XMWhb%AoV3WL<1ZnR~N= zvm1v8^0d{Cid28RaRjZud^fc==iB?#rG@2c<%McqZ3P>%uwz-PDyTS+IzxPrGT(;I z+M|ZOPbE`f)c%%nxbs-=H?AQ{LEUr*eF~;>p;1 zn#r{Ah~I4HJ)msJ362@O<=fte4P8EIwyxnf#Pxq9SqLS9NcUmeMG@pDA%7}N)6yWG)6#qf5lUXff~LJ=dET zxBT*c8_(uTG=W`6cS|vLA8l-{j>QJ(td97cu9qVk+uJQ}Q8IUM9n&}BqiS=_#Px^j z&B$E-jKVAx&6U$Q--(6V<4LxNjl*gEM>&_Z^M(oU{T8A0tUcq@T1F0BxuXMxz6wm5^W(MlH>zk@5P_g8?cuTYx`~shyhfL_gRzyQ z+8{f^eg^{T+SQLgFzY|I>d=s~ae8p9r3UUCk(KlkxlJc?6%pQr)ZyT;LVKgvzep#- zU^}*A91*=eGbEz1Tf1G;ls8_;+gqVosC#+m<9e1QPiq=CpzL_xE#L=4tAgsc5onl9 zh3fCky9CLrajxxq4Rd&P7ep-?dLJnY=GA1AG}jYAk5WeX1MzKmFA+w*kNeeZ)J&9l z3)pyFd*n7sAMx9`(81CJUW(?m1tW3NW&W61BBFC3Q@*;5KxY?>lGe^ESM^>SC^mN> zTZ>aZF#siwco!zZm!DO5GvD0Nb8#I#G0kFb>0n)X4EOR|rEtR!?hc2H+;i+fQJO^0 z_l32e?0kqyFI^99REUcDd*}M^8Bp{2xz+@nI($hR<+wW3zsZkx((WBqEWV8zqk}unyXP|Ax_ke*tHP|3esBgaXI#CzpN`hyuO8g}NpzFcQ$6r6Z3cMLk*>?~%x^mdTg#4n#OS`yUY1IQjkvS*$F@#{@KuB`ZX!e8R&S-a9buRKF0)=^ zX5Y{5+GvHG1`F+a#gHkoH@g$W6)MPCJ-AgDHjjx99a)}=P)`>SS@<&Y4j+&EU7)Bb4e4pTw`Fvw$(iyC4I~EVpI>)ger(Q6;UPvgeToBIHJ86#^I7loFOKyc zjvU=7?|Z<6;X=qgN7cQuOTy6lW0d#gzxForL&VNSw_CPjuOMvs1MOGIX(-g!6#X-Z{h1Omz#K1 zrDvsHZ!xZFm6n#ps)ZO~Yb2=#CIZ3cyMq5ML3s#BIQGmUG2k3)_#6UR&dRgq*|-?2;N#bILO%Ze=Idj1ji$4mI1Z z$IAz(cuABMR`FvcDQasi^?)W2dc)T#S@UdnD=69U3cWMTZ3#EW|_QN81vS3DY>cz_e?nh{)zNK zKbx&WSVED0=xF^y&s7%&L9@vkm~iPhFBd6Mj=Hd|FbY8@o7V!XZy;1|MEdUhg0Zfv z#=J=h@HRvCCvp8hJa@(HN4bNvhg zH*`O4R`4hNT(Y>=6`Fen**#74UrQhLCipt@of`@i_NEq-p+u!`K+RB5)e6gQgs3s0i*c&33l7q@h zD@bn?$X_Y_^mZ|=8D?9UZ$jk*XK?`4P@X|YYD2q@3b{SgNz<5f+Q%pei()^LUa|;L zI>JcbJETuNv1zw8*2BADPer)%#0(;(UwrWY3m3n z=-ftaZQ77B`9AU9Y&V|C+>B4$*JVfIKGZz<7RsPBtXo1JPnhTm!k#>Xf@i=2jXM#( zJorDx##|Vbk4$;fHMAX6K4~ToFU_Ri(M2s9UlfxdsTf*0*-lqkD3+$>il_2C{yf$B zwBgTP9)1K2RYsLJz)ywGAavcNUx#h=>#%tktP~zxi`s@11;0mkFdr}W)gXQC%?lsSy_J8%Rs!xv4J%X;^3Xq+#PW>R}_nU*c%@Q!Q zR2K&CAAI?pUd~mOTNn}LFm*z6T449*R^!i(mV#n zkjusp5g&^gdK}$1qYFJ3&Q#HtEP08PCF*FWQBzD7Lb<~q`Z|S@-_`W&@k*UOO|>ki zePPFCL{b)7Fs3Bu)7)T*@nGccUAp9uvw3p4#Nb=`S6kIYbz=Gp8tHH1*?1v{YU?HL zW&$3{6FqD(A$c=?2%LrjjaEbUl-eHOiax5+JvPy&7<z#M!gN&|SCR9;RIB^e* zGZF$i*Ez!70!me^%+WhZ2WMizVM2w=_Y<#9Z^QOoNsz1FjEV7gM{>5Zof{uEWACLq z!d?DgaOQL003v61f-Tp?C&db33;UnE!YU{O?7#`AK~#J>5x&RQXy1+91_YIY^mUOo z9UZ!Uox!lSt0&f}te}Y-k!CVutMAHqy$HsZvN(7(JA6y_4x{&BHHLYuse=wyYiU+8 z^?m5o=r0WjUqzk0hkJ5L&BY0}4GRFH_s3VjyP=ZOJgiWU6#CZKRO?vSIFW?OmN#j0 z?}WN94t-qKJTCTI_0wk9dh%%pdd*xecb_0|sp9k3vh6Gueo+FCE+mw~en_4X*~x2rq<13;RqZ8pY7}ZIZqvxc22dT5T8QWxi z8UrIm^`zO~gTk_ckEH_~Tp;j6W-~yPm_lkW_=Vqfu>zM%_tyutgJ)1)=Xt+Pjlv0T z(Rq&$Z&|@U#*$WEvKtqUO8Rl|jib#@)Enb`t&7-l2H{D{gr@@1*jQnqW_>ZN@fo{= zW8jvC{scR}rG0IcrqqwvTJbyIA~5#AK0&Uuv3*j+m>#?J>)1)Ois*#cy>umUXCRQR zUt`0R!AW7vTdI;V-|#vc(@t&?&0bPb(o85rByU%cWONhNv=9%W=e646&*A{L0SF_| z-NJ_haqNi^!FMuEuFPA}TpTqNRb!7_g}n~M<$=aF#62TCuc{)U(%@<8A%@4L@pt7o zH$+c|w)1Vij(pLS`>K6(&bawKG(X&y^U#sCe!jl_P=Gq8s?yCYzct&+ili^cMDFJ; z$Y+}(^k6bhVME!^TJ&j7=oOeZF_w<++V*sE$9*>yU_gVv-;T4RQU4h}d9OTM2PaeaMy%^^XAL9E!#&|hlMXCua z>~$34;#$}?V5~XqPIjfmKI6}>+#a?>iQwZ6mY1Fp7cxM_sN!CyEUuHBZ0Mbrv|rUg z`1T;kx|uR8)eF{Ag|Cl1;p$DexLl@rOHJsXjaAN%+7YRt+;&==z+|W2PCI(0V=3m` z?=H%ZF(Hr)P+OO-!IC^fSSs7r^kHc8!?uu5QBe9El#6vUP*3A?J@yKaTYDor(9P$T zkVkapr1#{$)`k^S@h2d-P9kT>_)O_5@&vVsn?kJnYO$wT&QOjlqWOIsbtPhE5i7_HZ_uSY?%YP=oY0H2w(@~}0n7imo=|iZ z#A_Tisq~KMX|B4FZ92LIYc9f>n;uBGu2vd0Cbezo-^l3o6N!&%ft8;Vv7@menClXe>LQJaI~y7UXprher-ECi*Jzt4Bb%*dWx0!Ia1oTEg?!=t zGq(EX7*@WRJSx_s%X*$ul!1go_(z%jS0KnV=BR(2J1MkxnWmSLe@%+Di0F(KB|NgzLt*xV@qrbl&m{Dtfetvaz zb!}}8m?L^;dwXwhZ-0OP;Nals=;-A5`1JJj{QMl4$Kw)ME-$Zu3jGd@^a#_AW(&dX-ETYn+Q405|86Z#UH}SRNi8f#vZLkUw=q7yz6l z+~3d*MPY8^;OOM$?&!<`UqXijOXO5^ZjN%m4 z7);_}T|7Onn2nF(j}e#9mku)bia-gL-X-`cAu<=-k8i09W$N$M zy^A85gBJRR3Sp^YfEB!7W!4ZfXj=6+9%LoWKtEt+NNkXH;hILtO1-*nxsPP9epkmy zH*qD^U`Ad1USga_QY(6slE@TCa)7pNx173;porSzZ398k0QZ2b>1i;Ha%PVI8-_>U z4H|;P@4g!s35(HKf*x-XRKYsO<*I#8N|5t@&Z?9^3XC5M`17|@nFVRfOeP6A|Du(S z%4Z>ca0<0^&ZtecBZ$pB&jCNd7YrM6o}0+(&ld4MU9ww{*;DU?S4sQV0c8C8X6T!` z#4s;2)eUlLpB6T6eiqMO2h~maI*dShEMBEuv20A>pT^8YZ8pzvgomds&UfKnhN>Iv zw2p;3e-c84pf~dNsqgE|O>)NBZrIq6I~TP;0(llOOe@8(u6LH-A%fBg9IJE|upCg$em0Oh^3v;<&#b8`~_ z_V)HRpu%@{b^tAYcz6gP{1{k%)8c1mXa7*-|J33D&aVMQ{%2kOn>xS$GxC2^?EvyC z`uWL#gG2uf`7k)Nx`t*BUdb5+)s1~at6%qKKLQ*7N~uF~|H8ccyQ6tTr*UDdAj3(| zPD0!OiLSR})s@w+%cZ!VpZz>IdG?}AG2^OK@;{$sR1)!aZoNvA(eY{lPxQXJ+ESmX z`WAfiAh6)nI3w^qh^iWlH_Fu*Bb}Jd_E{BOpgIWio*ijdpe0^<3cn>$8XL)!`3EQf z5eCn|z8xeA*(t_ommli=aPk;rdKMGAC>P$8Y0hncXUd*9rW1ek?X~uqT=i)iWcho( zh)`0xijk8~7D_+scnwl;D)hB5A^kE3y;JHqkSrT=s_~4Qh_}1FOCVvuL>BZ_73x8; z9n5sNDjo3y%-MPGA#rM0PdEKAIf5J-Qdl!pgSt-1*cvuU0zb6tD9}9id~X{v7`aw7{DUu6Pc~f4GDWHDd7x!L!?=@Ng&@a>rL5p>0L%65HpQ%~R@iKYrenwCj# zk1jq7k`#E`PvRZY#=X9qem8`C*ZB48H1kht{5ed-w&>}gbE!T_%BF(m8sqre zp&w>)hS9VH$Zp?9yGcDL)v}*94E1?@Kvu)`g+=BP5}IUrgu$8JZcbtIYGizWb`ik; zk;;RX?|8<9kEq$0{#I=T)%(@Lq{*@${t@rJB)h$McN; zS{rPqLjs{ZbrXt(4B=}=veiIffTl`HSxg7+F#Q3)&Mp-(qUw_dz zF;Pw&OO!3{8jWoe(uJS&K41D9H3Y%%59*y=kna!%Y|AsauW*{Dnh7dpqfg+GURiT?5uDok zn?TBgrc}Tbh>4g*B-jP45s7jI_DllmAa!Q-Z`*G@uj2WS zmZr=YE5N%BNphK7sc(O4Fm+^^4;mJ7fd+hxIyzcJq-wAVaa&mTd_CE;o54QXriU3ufoSXnw;U8@I z`+(;kRQdaW=f6qvH}ml6k5SNX10g+i8Cu}wb zI!O66%v_Nl?u`J9$p$cHb&#ZqAhfeYk!-R0r5dGL+wKPrqOoM^FE^JWlxkUf8Gx2$ zmijYgBL0Ii?*Pgaze~vGmmLhw)R;BI+jgeUt>avHMK*A2%YOqxtr_CHe+AmzSi+2Suk3SBebKL4Qs3%T;M5ui-0a7*jg8HRle{u(Our~)-y zT~j@$lP0SfeT)SjIqb5QKc9;M#U#8+ZI{#Kn#x8i#CZTn)UJQI7$Qjy`eDqCjoBGR+(JaezH&X|x` zVre){+(dPWRqimduRBg`k(*PGdiylIuD|9m*CRrHr~!d8t5(o-DxpH-iF1R4Hk z*0nbXdejl7V`VLB%*h8>*B-9TZx?yF8E$uL6=30as3xOVu8L^KKEGH~=gKIpXDaSL zH5V2L1_TFu_zGm7X2f3rOpj;_37E6MC%^nOu9l{5zs&*3=x$>jq0(-L&_Ix7U% zTv5w5GZ#VK9cI*Y$^3He_+uV7fweSR;$;6!a+1o7i0iK0Yf{n`vivF0m72q^kObyD zSZ+MbHu)+&oJ3%2Ki@kTq}*?BtB2pCP(_}813y}B9psKNwLy}-2(MZF7Cknj735gK z_rY!0N~R$LF(bqoS+3xt`>XZxAsml)rWwpQj8|)9SGIL*%0fIIyVa3o(xYV|#Rn) zEo^Pwaqim|__bObQj?q~8nXqgn`17EKj*ERijTmh@V5|4CY_a(>H}T({0s^D<$U{B zr^fN@E_nfFP&hVV9{i(IoehnwO;wydtWBMMb?c$}irqXjnr~M1bNjg$!8s%WTJ~$< zYUw2s-f1@~habhpxm|Rs6W|*jd4J(-+5}mP(>KXNR4yowFx)zJ< zFMrT)q8no3f{7kdAw?+B!qK*nnogn3Ps?xaRB&Cyg#AKr!%o;B@^F(}Vh{`8{N!!S z3{*+~au_ti9DibzNh#H5!#kFXHI#qzY2Pe&Y#dVz4+=d>@3x7Dd8okVre!-SG-WL6BlkDDS^;j2+|pjbCXkx~B6 zahT+u07blIBs1O3$B}(enhaQ7xXek|s%r&JiPw+$f?pAYE4@nz4=PIszTdCsmvBA4 zp5hljfqT6m^+e~TJ5G0&1nc?*7Y+fat?N?-4apOLl=$)JdQ_-q6b#qNQQ~cSyivEI zYFIK{QsLpMM57+IC+ORhLraU-u8ZME7FhVF&BgQBYDOMS1LnYkhTgzt{g zU6*M-BY4&G+E(nB>r&#gkW<`&I!Zh+}!{~N2Q`bR@_09IK5 zhweWWkiDa+(;o_`G(pE|ff=iJ1@-}@{iN9Cf}%h=)*2e-05A6elv+k^_T7{`{|N8l zIG&iw3NDa!AD1}S>F#NSXZQNJVIo}8RL|;O?USuRwW5zrYVAr{&Iu}Ej3nZY>57z% zXHA$d4enBzkZ(P4SUYW@CzB7{(|g=kN+$>nVb{y3;zUiJ6})@4Kfm1D*KyRw3qH>4 zEo$CjAvp8)pcOb!EeNJ=9OvtDH*f_ll-_d>gl7TPnuBir>~EB8W=^M*Kk zU0`)Z_TpL|ZDQTCFAaS-+bkq3xH1Vs$ge%n!6!;b-CkGPS2Iz=41BoF>Qww{{ycX# zr%hzt6y_U_k&jJ5UU*ho>JLafQxM?K}{L4S2fV!5`1h6PVY6 z3pke+8{CR2R8k-kwy+dmaj(v? z!hT+rmREjNoSyNm*LJz2AThsgz6J3y7H^0+b2n;da^;r2FScR&dYPnra-22SL>-9B zG8NO(4x=!A8JGu!rG#|L3MWq|#)_AMCm8+JQjfvrq9~q^B6V70e9%qR`$cQ2q&Qjt zjoC}24-jupF}^vJaGQI?72r|AW#Uh%lT>Hnm1O%!@<-@HFRF&lGOI&SzlE@P1EskQ z1(E6~TMq}DE%_tw>&^H*EZsrY+T3kHEq^IcAV(Gqi`nL2$Ee?Mr4Q)1|3+56%8`*G zpdUX1=t2jD+9n5IIU11a_HmT}MNH&2s zx9o=Bq9R)YHz&VwB1@ZHEil>C(Dp4KA3wK|x+R;0`{+UQF{^XZN|J{ufx@`n6w5^k z4w&ACyf{d~D1DE$xeLN!2zzLXuo@tQv?~}hNQWYHkNci*KxC6Kfn_{cr^rU=5q!Sh z$im?y$U#_&|7=M3Lz-yB9ytnr8}HaZn}Y0@>ide;Iq%tdr()FxHX>44Sa}J^^M+ia3jZiNGtPxR%C5?nF;EYpJ^+&1#GDQ zh9P>cnF|L1!9YO4q5T<#hW7UVKMMbK<)y1G*v<>1`BY5$eS1WK`D7866se51{>n1O zzVD)kWFeQaY5Mq+Zfl|6IO)MG3?xIC*yB1ITW{~i#vJ-g1tGC@yGOMd#&uH*Gbnx* zOHL4;blqcz7KxuGp4J9gwu9?*t>*DwoIZo09sMJ!X2-3xCR*YK^|>?z;Z~C_r*41g z0`>anR_qX~tbtkl4l-UCOv^I$M~xTpZ{}%q+P^6g+3i*36v-8XA9;Q0v=qYdIP=w^ z&Rfl3Sq49Z%vXmn&=7CHm_$$5yd)`;X(767ix!(Sa8OAiz!X1BY3}UOvSUMV?T~qR zVQV=S6A~4Ai0gh_5w-2|a1NVd?g^XxCJ5$kLEJfrB59y#vFQ7$BX_;g@GBg;NE&;; z*YmH=CNNfR$p~KJq!bvqs>x?{tsmPKSC%4rQLBfJGn+n`6jYlhqrg{!fJh|eY~$^u z9v!K^#vdDesJ+xknF+xIm(ulWD2dtMvN=Q|UH1C>IqH-5doiI(^6XP$9r@v3zsKj` z_dUhiP(-nM5r7&RIgP}>og8w*gqEcf`8W|HtbZERe#bQ_Vne&3(qv4I3>Msg_`-JV z$LHF1oPE}6xf67yFmE-cYU%BGtHe;>-TdVO$*Os3Ad4Y0vzF~sr^6ooQr61RpA>u8F)c)Hg&vcHN8JACu@qT~Uy$~-L0GG=@hco<_kgS8iwjku+E#H5^>(_reqIR~HPI|wtK1G>-7hrMgoDEDv4q6AUSj2zZ8<-j%->?t+%x(0R< zL;2kW^sjw^oZa7sRllqn;KvP}E$wXo>ZeokcXb4XfxFymP$0l?|I@<&xJCYJVCH0P zX=3X1`wPooiAZPnqm85nftnZ~|3k#s+y559`d1N1c6@usSO}dEO3kZ zPp$aRFg-vs^TS;74pCld##Mbp>+DGk&8@1*^9WPd5ms@CZ{8PGYKK>JAlt!*1# zzf1eQQU7_S|5DgV&{={H&^5ZOzsA9Te#rR#scCYCPR^!||2XNtzxm5kKQ$J2_UnEc z7tlV2KT89_2K`Cg|EcV3LYoMpI6e_SiiQf(7SU5gK~RjJtsgz8Ci6EKP!w_k(WpoP)^Zk*jeCdmQ8?&%q3|iJ0a<8?6fySE_EY zb_E_L_@CS&{d&Rr`gJ#@L9d6}d(|8EB3r|;L?~dIy<96wA%Sz`y zZUpbjQ1X!ndt=y)Yt`XSbrgy<$aFm*9vSz$V8+q#;>7uOD*N{*gd(>JrM4;prVU1usnQKp zK(h1P51EQPAbAF|zLI|wpQ+9RlD|T4*urEg`GDl_FZ__H`U8@GP5L2I*4IJ3QC65_t@O(A~{RT)9(l5BPF4Ve#|@T1bVp>hy(39n2j z%_@X89bxY@8C};2`3GL8$q7$!Ux>_?N$gbCI@6oz>eO-h{b`o2P-ElZeyn+7<+hFcL=e8MPb3jrqJt6xs2JyHnt zBA5~QnFU|LaKgxAMvyLKusJUFbMwnjm^mu_V*swea=@k%14#8)Y%Z7CxG_{b#)Q>W kFS=oKxjZ78hsOo;xnjxjqf0=3ghb&}gEL-#8PrVv0|tLqYXATM literal 0 HcmV?d00001 diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cf1e40e627f336e4fd38c246824e238a154770dc GIT binary patch literal 420632 zcmce;1yo$iwk{kzXhI0VNfX@N-60U%-CY}Zw?J@r_u%dh!5xCTJHdjzhU|TIa?gF| z-ZB0+envOF)~cFSv+A4m&FZy!PC_XGAxc^*W<)r`YS14d94&wbV5wt*2*<%eE$3vV zPtC7wt8Hd!NG+vpsBZ(H0~O0qi)&jLlIUAd%FB`hsAX+|cDlB5Kz)4~OG{e-6YF!8 zPuf6JQ40f0P%}@5`g#CRHGM8F>QDN%+IpZi0ko`4%+FO7jP)J#fl@$y1AU;rg)XQU zP!Y>h5f>LCoW6zLvp!Ef|Do8kLe%`0b{4h(I%+XvJsVX3{gV`c@mWiC!0+NuCI2Q4 z8WgBUkoc1rwT!-vr5zB|KS*amM_XZ8TWwo?(6O*ABY^R_hOjIXfa&Q7G!7w43tLd3 z5G~->D1_($zeX-Z51@UTMHy-#1_14|M$|%#0NUpfQ428v=>EFK3}E={hy}p#s{Kz&Hj(*QuPrzWN7nE;^I^C1Hr0QCB$jTF#QS61Ivl^TR6wVb}A zEw!jQ2r#~1AN;>QM4zUFn%Ba@(iSvV49|K47@qY4Fgz=-4gle!XROU<>8J{7nFcfx z8X77Z1^^2KD-|mfsF)u#Y|xb102rQ?5C&S>S%Fl1HkIcSYFTPIptgmL)zj4KI#Kh> zQVZzY8|&)J2=jq@m($j<0af{BG{2_b20-&WQcn&4jg*kF8E6iuh0H)m2giL9 z>suJw8Ubh-8CmF`#B6MV`r77*aE{3vZi|*U%oW=%o_DXBuU7YrwB9qfe)rPlsmN}u zjMp-x2is~NMtUn+!x+w8LctDeaQtwnDo@b(eUYH?)zs}<2rf4DyxF^QB?@=c;1P+V z_ZG`)8k`Jy+)ZW==ku0xxr?>t1I1d!+Qojv-<7@}vM(+yyWS2hKTf({UnUtoo-$80 z-QOKD7dy@888e#teI#@~&$Dd3(Oh<)Nan{Ks%&K%N#nj<-Ao%vyzCC%`f)%=Q0w1r z*vsm^S5hA^^LhFp$E@m|OHSeyGVkFR3xWAVR<49*Z{nUy8NWPoo@sj>>IR;A{Lgdy zJf6gks*cK!0#5ZCBH>@>#I|JFy=TvRT0ho=8>0Y$lHt?3)*}vqN#PBKnlGrVe9!tV65$1!#!wZ^e44~7y*3PNR zuS24}dn&_aBYF+xH%^_t*_UqcX$az`DFy{(TvkHcHfaxWC!J`(n{-f+IYVM4MY zFJW6FDqpC9Ya;s5y3FstEHgpEBmr+PuRtc3=kF37!806L-$0{f;bk7TS9wDl zE`-O!E@FR4H{=&^g$Z<{Mj(Nz=Dbez=sk?^g1x%uX&)51sl=&uaBS_^mYcuVuuR2-wam{1PpA= z9z3{v+^ilh9-hjU-=5fmH5PVJVOm^LzW1perR8zMgjgJ0J@IIEbDY86U(6eLH4~SH zX0`9U;7oR2LO3gPoKW0$f3`TZ*?B74Vj9}GxV*Huz(JwKjHP3^h?`Dde{7Q8>H=6v zYjL?m$O&EMJpmpz#0zcs#$b_WZTdd09`!$55X$-KX1^oR|Xx}o9E>_;3ImQ;!2_1!JLqS4TgsL=z} zhmH@AU*QNj?+`Zxf3#_l*A7w>?yBO%lU4BNX#j*ybF6UUT4g_A-C{<2DRx7noG{ z@9Z>RVw;;GVO%3bvSfJ1nKUjheQD!Htd%Y=SwrKRAxc11R>-Ja&gYPoO(GQA%rf>1 z(OOJllt@V;MHecH)NZz_AqolJ7Feg`0e6$G;63z?wfd-d-$RuMWh&SywT~O;mHFew zV-@>2vnzidO#g-eurmkWhfqn!Kzq8EN~UqQ&1yIC7Dd> zjS4uO_j($8T+sH;@+p!0D9yVlEeB*lO%Vcg&S+UWoK$t`h!xo%`Z2}(#wBBQ7a1*O zZ~RSh!r;j3=P82VgDp>$t;3+^l0*8*ov={f(Vudo!IO7LNs`T_2g1Qay7Fr(jp9BY zz?0jF(7t_FRsO|PfiJ6f{K_xponm=F1s}0^Yp=dv+;y_6vbrQr3@S?*S-@I9lT~!^ zZ6Gqi`y_Nk#21s;rpZGA%xH_G>x-}mU2QBT?UIrjaP3q^LHlYVm>N`O&YU`R@&0qb zDgpX@YIZ~)<#;}(LUq_JX_W0nIOIAojPnwaQjeHlpnq6uB+}e>@`l=pF`w+( zGv~_HHl7}fu?K43<@50Oab^hSbaN`_^Oy%|iNt_|InxTi6!je4cao%kVEkw-5bAso z?@SH3a#9M0H1qwcboZPt9xcPzTE<%(7qONT#^FRI@~z$m3ox;Sh;X{<2WE^?fIs%j z*|+9uIshw9(sF|b#*8j4B5YX_THtJ57me_)=7iyJ$3p`(EsYaHOIKgU`Wt?sP<%xl zPEy*$VCkR;E;8D=8tEV}KptUiPb*QhlG4be%j;1CBRQRG{5(QvB3d%qv(S$r*jaDZ z_k#&baBv32eb?T^$ss~8?nh3WeZ!tC`Feu!Hsf5%{=G+rpLddr+S!e>lbQXi+Fr?v zqji|93eDabdyC9UXera|D{Bhn0McodSZ zT~X7KVI>E5FD!L2DCaKVU^b7!LFF1W;^^HPAFXwUG`v+%QT3rra9o&D+t=y|Fm2va_rM=7MFzI|F#; z;G$LAE6oDNGN%Srpjy)ai`Ad*N~r`)C7k9zZnQHs$?=EHl8DqQA`15;h*ftUyV znC5fEz4f=fo94A`89Zp{785nK2)5z zw@wNX!`_A7-ld|yEMXpn31iPt$cH%8hb;GETqwH`$L568BqxZ|8>f`1Wr@n4=ug+BstNYyHaCli=RB%xIxH*Z@C#EZ|^qDb{fi6xgnfkwhDaOWeg)EAc|#E_NM7w zoN=nyH!t^DxQ(c(vbz?)FWAejNW(Q&(Y-qAm|b}HU2h(4qKebxwMxL!usWctQQT{) zJwyqo{dgOa3Wi|~=o=_TItNEbr5c9QU^aD(XgBVZjGz`uvkGMWFq8lCX?y020@|Jp zazz#5)YRjka-I@z!^LG=iwPFbEqp6Tl&+hU%`)NAm!9H~1ieQx3&HZTyRS6bc`Ph+ zNi(!lYiG(*9g1~QzbcleniuOL>1K1{{t$5ieC^ksIpDONE zn68}Dc2pVRsp{AJK~ZfJH%32A))J@wCG$d5EODAXM((Sj$~2>vepM6}X%cE%noLb9 z>p0U0$9PRDwG8tJvrMfgy#hg)Yo=A~aW(oWYk%~}CYEXPfJ3o0XN$-tM@we^?wpf? z%%&3jc?*@{eC*|@-Jwc`|LW1fBofuYgbsXK>B~oBSkXfkHRgp7Z5sYi#1NTJ))eNO z`*RT&*a5pz@N_J(m#21zZF5tDlQg10qBg;}oAEW^qQLsLp+~c;V_p5@sG1w}yD_G; zQmz#e@I=nGJ7m$81a>qZ_er`M5=$@+> z;_ZV~bm*ge>5TIH{v*0EUgv%oIpZ>zCo#-1`p~cw54F>JSUp-E5bc?SZH3;}RiU7D zE=XZ29JrC|yPUtLPkHDrhn0@168cig7A2^`BNdQ(p{g+9&32wvg@E6OzNYg!8q>g~ zj;^d$`Y}c&8SUd5?-E~O=tMuXdl?nW)R?~p9`cNTEdR-b?+R=RZ%|CdN7j=Yoz>kU zTe49twiEAex)c98r5ib%9J-Xt4?zR+ePWq?l$7VTZJ2RI;ffh*TAWj>oBerYG0$i9 za-8Ubd#CQcC5OQonK9oo3ykgx3&&g4 z$6{HtcK8l3rhx`Xq$Ux7PbG|Q<0Ob#$D!S^Nk$J^i>U*Qqr4jY=0a!Q*@q$;q2aM; zR$r`~Iw0eUI$p-%Zy40!ztsj4#*;a%5><4M#Q%h=;@{ZyDhxPOvQ#7x^!2lHJ9}-M(xiE*WNO!x= zj}Kc%V9Zn+$0f@Y7HIK9_IiGRFjgqKXxQa({a)Jhz|JES5}ECcgObqhqxHqO^*n>C zRQm}uhx9>vq`Eq1;z~X?G%**l+@it;j&dfR^W@SR>m$w^)$q~bF4t%ojoebav=QyHKG0y|}Yf%7omx(c= zrWcvwCSOn0h*Y*R4lYfn*HO-~b0%?H*4p^y#yNyBd_wz_g+iL9Q8lAOlC~?it5BNl zF_FtQWOH(nakaRlVF6iDTmj&Nd1NmJiOIlxtyPjl8PkLyUHt?KjD2C0L}Fk9`32lb z+GH|0+GGmBc{u1|C1MfongJ6Y?n^*;5-lL`w0Zkd&KREQ! zSB!5~y_rme>p1X&iY$-udZk@nsWorHtCepieGxauDm67~H(phj0vHJ(D$@vCy#aui zxLP5MxNb+Wc<;SJ`+x$IC?If+DQYNRz43tE(v#&J+MYrgUBz2af4Xf(KY&?dKg^{% zEmg>0>C=*_FakY3V|kfo(dyo%|0Q6iu_(;p`f%X(RodzCmGJe=#o5D-=s9Yu#*$n8 zX{&2(@oIo-pC9^rSu!!|vp1 z3#Rp%kj;$Z(fC88X|2(UVsV9H@7bZPo7ufeDYDp+0|OL~kU;7)5D;9INuTbjch96^ zQS;`H2tqdNmJ{WZH+1?`w&AicyVVzZr-o5Ys6Q9b4Wp{FO82C^0OLbp+LbeQ0EeMT zBioi3jgdutU8A(;gZ#u4Ad4wQ?GPplJd2Qy23#fnvKQ|2mX1CzBBzn0i>Nz#;avGhc$Ap-$WR;kvy-!_uJxuuuDi zLAhdh8!S5GR(Grp@ShN&b>>iuRN@ILwTcUKob+A*j8A>hM(e~SR=DlZj&}%L63D1s$7woL*Tj1;U@$mz%8h&t55F zlut@TZn1x75jM}3a?4IT6}p`2eWl2d;zPPzWO{e$&uQ(!>i*Eg(LZNVD;NHX*16P} z9>PUvyZpTsgR2uSm}snSfkV=#JQ{GUC=9c=akkoW3+A<6bNw9A?iHl5+X##HSovz} zL17`b3gS$ShfGquK^0O8gBUgQJTb%28w@=rc&hE}HG3g?{n98&@UzTBV@jp=0&;JU zzJ` zGJa+JP)Q@zYUQoGG2C@vrOjyI)We&1Z095E(MmCAN?STR$-3TKP%ZSeb%AtfhMsk(qnJ<7lXEV*(m`Zp zdop7@kRl7o`Q*bvRwG}Kx!#XUOC{4vwjC1M9-tyHw{@W8v6L(J9lWG8M_;;tgpFbh zPLiehw2QD6jOEFK?jRX$dre7@$MM>8nkyxm23t6w=40U>opot35BK=_P#lBac`hi(nJSGlRjv~uQ4OC`}x z`#~)=ylcvO8O#qG2UJdg26EsT5|kfT=10M?M?V>78Evzqk&VsT3LAch3yPGW3<*Vz z25pr*>U1(Tkam#W%HaxR5ZmI4q80mZI%7Z|8ETj5OU%*aX@znnd>VehO^`F`{E5q$ zj&;$fp$uK(NwfvmKW1}LAGVZ3f?lMEx6zTcz^OgBNv;P{DsUjw$`2D`)jQkOcK~_3 zP6?e6QjG!^^p7b76P!vFld-JEzLtKKC^O?>lvHE5Szx*tgUgu8qtR{{&&;~puR1A3 zwEZ-8r7G5R`?i}AKCpOkP#p?iRxAOm)&MG^I_YcGTF@p%EZi}~v_;M6XDW3v#Q@WV zS4xd3>z^A*@bPm|6Pr`E;Uq%OWJsxrLTUYCrtS4ZL?yIEW3rV3lEvVlzwOw=`KHte zYNIj|#8Iu_67}{)dV}w8LDD0IvdjNh9R|^VVSaSmkg}U8rB;Cv_ z3*8zyR!NWW_o}si_jquW?0xxodhupnwYKr*&RN*q_LzHhdp=8_u&9OF9v1@*O&+0v z$Xxr)tifs3R|S_MnL35<5SQ_z-=j=t5fQ2)!BwOs!(f9r{lOr>5`0&wFWj8AbQH*q z0{e^WPQJ)516iY_10w9k>Dby{ManOmvqmWxGL(?#WLN9xewNqNWsM7|#sHXB8CJ4P zsP~fi%gMO#m>zudd-bC}pE0by9*6+f!=w&91*h*c_$fvfTP8hD_#@lF#*dt`T=nRF z185BB*F-)Be*JRG)Cq^ZD!emIGe2}Cskk*4S7u(#)V^ODpIljsQ&U)4)FMtjHY0r<-(o>CbPJF zdRWXx1+%eXD-zaQTX{0NUXc9m>+X4%fj!P`6LvJJO4TA15Ql0{Pj%YsUgJXF8Wuk6 zys+5wT}%@-EEvCH6Kg`k!$F;T6$$^zAqc7n>7xM7I&Nb zGt|fr=uE|B40S^aeSpa^&?;MmGD=yo4FOPQqeA%X!H;Ya;gp^Z@F}34n=Hb#TWRXPq0Il1$^WunTI{l}8MKPm@ zx2ir;VJYrFqEA)2#V%5pDf;~4T@6Xv&^vE-gT+v3A+B}p6Fi^9J@$4WC65YlTf#ox zV*JYUlV@)5eqx)w{}LmZF=RtA#j=Ou3`tER<{;zeYAZ#?s6EpT;E-N5%`0OTlg6bE z#$*V+KpXGMxKn-RV7kQyl#Isus82gTAJ&P{JWuwrRDn*9tLInOSEyeb?>4tO=K~ym zTznhzXsyqYk9T+_ygzJPwijHm)i3~s`y~$jQp=(&o^ytV3Hj)OkI=UVYx5*^5*4%Q zyIPhhA~fVD-C~Su`DM8n?2WSG>*!05lvhS4A|GtseXfXX$DvT{Z|6`0_z+uB%`O~JtW)S*WZ0> zUfpx?m$}+l{m!uy?7NC?X79W`@OZa*K6F1b5HKG$D(imTWM*4PogD#dwjoKaYqp9h zV3Cck=~HJRUH&sPF?#336D*2#j4~;6&(tKc!rRNjaONAF8=0L4Gp(iMW3IC zafSMaglj9&j`2N$SY-Rqw|UXh>D0Le(>@?m4i71cnk~MTm_bV3l95Rd zR>q@D8u~d=E(_QxBz+}cGz~}dWy?&DjXK1j0VO8Sh?0S8e(V!9i`olyW?S}D4J%p0 zVw?7(%3UpQK8MD4EDi4zMLs*_C3ip=NI9j5#Q~>~;?H-%x4;8>)%qKKzYAh)IV(>djy`oi|lnyK$l26&ZP@-7UzF0KxeVh6aIhWx99>nP4S6gC(9y1hPCP;9h%BIXI`XM4%WG`;K!Z}hv=Uv3) z@si#Jfme3BL+1*FFe>{rRc0X$_B&hupsoi}O#zwrB!oruq68+$qW2c!3Wkv)2OlDT zdPXUyhypP72X*#qu;w&M>Ct`Nj^Ob@t-UYn-xbFU(h!h2W{H_dV2HFp{uWy^vnvkw zP08?`s)OD2veN=JH2&LplBQS(cb}{< zlE~}ZUV68r2>mwBo;AVeTN=aKwjI*Oly%T{`w9X9Og?3bSYaT?a3Md+$a;z$6a}M8 z+@gW%(K~91kznKQ4n;fcE?;(1oAQ^DIw}gOdn5s*!m4P?058Z!c-%uI$dSuFn2hQ- zElI(GW9Ex7*CZKO4un(M$c8khku<)>;{80Wz?9yo9!LI}$SKE)K+w(RE*cX=ne%9M zB1gDJ1ema|zO~A=$SbRarWlTx4V5iPR`+v-T$yaSIyIrR52l4-JS;QJt!~FZ8Fz5h zh`rKu1c?_ox4@-mfGAqZaVE?=Bo?F6*c1#J<=Xbkt!&=Mqn-yxJFssOvot-eAnTxT zb1NE)veIZvwj;~erZNCP&J;*pMu$z+iHXz0O!n(ksxO=2Sxl~t=}4m{RYU!T-8C%B zRQW!rP!y(B2JFNdP2?1%i~(0>MNGh1T(WlzHCa4Rj;j^P6a}^ZOp@o=314xW>}K9B z7h@boY+>*a7~t*8WN5i(_xttR=v^}1B|-U#pYEP)ZGBG~8{)d!+Bgb8Q{+lPQ}k*N zE{e7*lma`;mxAFL&N~l>N5BP=?#pn}obixNS*2#F<#!~hXTTw}NI8Y#st0I*9}64# z?KwnoM5h{izN2Mj;FNqtrE}`2w$gru?+Jm{qqgSe(ru1KWL?nrbXQ>2ktz!uZ{uZm z1e=i#=-XD5Lw$c3=w5);O(7CtNMQ?%r0Oyvv}VN6s~?!!nP<@6<=4(}1P2felx?)a z(FCNK5rNa4O_}~WO@gq%=^+E_Y)+UMH1mo*njyT%ANSo9^URr~GT`aDSS5@WJ zQ-pt=oo=)sG03Yt$GdLcj3T1b9Z5z8p>GWTrM4R4=j1q3iGr7kanhOXW7$jnfo27v z_BlGK7AjtCRjO;30+E=)^nF1R)uS5c0+I+!1uAo&6JcNCex@yk=p1zk4V(xPsBY$` z6LX1&z<7c0J;~m{SuWzzP-(eQEZ!Ma+=j7l?~x-&ikOW_#Mo1s*xpI=T8>M_*09~+ zXCmug5J7F0HG|qo$=CWh&U+Orv3R-{F93xq!LKa0x>xq=XwGpnEKoFBeihVDilPQi zY<_wNDM^5N^V!}BUl4g5&Br&#B)O*GC=XFRuc=fxHGOLqG3$r}5d$AKO#T?!apl?&pTYfy>Tj39C*6?#xOOi8+?6m-02p3RJA8tN-JkMn) z4b_Y`9f(}j|!diTI;7Id(@Agh;n_2_ub z2NQ5?!9}gJbihLI5Z7$TLtT5YMrwQ^}ki0Zkx7n}1MRj8&wr803x z`=m$$5Br70&SIeH}s4c;djPt+u466rtijZ;e}po>#iB@9&U!!=qxOx%!FwvE2+8F#vE zq}bNfYLU!#0;6r{=cwHM&C2DkoG8?(6_>2gSk9+%T)p>dk5`kAR%F;2`y8ux3&_J$ zI_Iz5&daJ8=d9Tr{VW#y0^>r-}a_+uIz z{D}KWWx#6Ubh5!67p-Ql9qL#82Lf9(LA#6a;pMGWY5RwtZ+(w7S8>&#$BehRl@cc4 zT9r@GIB;}WD9cWqG|z*6Vggf|a!Z4=w)U=dD@&kfl;w~xE_>0wqHd3lkQ^AIbC;i} zhr2*kiK5*GtC351#j?F;?`c*}Mi0GGpHCJLZe|X=c&JlF{uHbjK%5j5V`_We7d%A; z^<{O+t8YuG>CC#}hu%9o`KALkJ%aTB6iOjcos!R}a^+ro=8|uwhSXJTjyTMv; zyizJww)2OdJujnVImdJoAYBwE&K&v59v&XaK{$ghvgBK-L)C(Hvxf)&>ehP=2)8LO zFby0?O5|a4OF)KvL3lbkcYN$yT6dbSz8AE<+#Zjmg5mRDDOZA!7tvGVfjDl2zE`zz ztqb*2VOxzoLkHByVbIj`-F9VreMYQxG)kX}*&Ul4G%Z5!3fIOI9TwX@w+%G0>8Xsr znpNalx?|D2oqODl(=Bi0Jzlz9&iL7!cRV9@hGe1fV)fR)(=d3${)P*nfunEUqS}^k zO^WVR6oXfn7n(@w@X>4Lrca1utF(^^t~ba>GNH^doa&K%37SwYh?l08HBH|W7PWy& zA0xdF!R?|rDY(|&SB=hNzy#Xq^2(EAWF=x7gX4^Xzfm!fLqzm!v>+rL{s`o*0V5B% z;ZH>zKEkO1E3^$LxlSS9mmzvMe0t*zPtYq7^qaLhmd%rRQ|~zeBlM0NPS9Z-12oN= zGyL96B|9?TUqlFBO+YJg%7)o=ZI&adfgJ=6Kw#H_uf@X?k~+_Os-e4mgnW9W;Qi~7 z!Yb%KA?Wrp*^^6I`#`;~p_b7vtlxEkha5MC5~)8qxV()8w0(NWG71s^wnXuBh%ro! zD<#*%dnI}v@9bE;bxLi=AEATXxM6k-QO4S)-SW6XKS#jTjm6U%q=Q5l-Mc58=OkiU2tKq%5QiuW z9Pq)YgSMq;5MIFBtN#!<)_CH)JbPqBb-M95djwgs1bRm-!cNhwVD1riba!h6p~n#u zSYIg6Z?IG(dKJ~$dXd-I!Uk6)pu8|3SR8lW!`?00XTEJ6eMjV9lFNG=+%t-_KOTRD zoFR#k#D)qLUxyqX51*!^ zxFyxx_V@ARdNx;+8qBJtU>T!R4>gY)0omO7qJaQyQzyY}_lLlf!keF-ryUEFxAm&a z-Ub=7Vq-Oi5=;9vOSXrQnt56(Nq*ls{A!&Pt8T+usn|Id_48vLt;dSrP zq>%E8(rDAnigYSxsfE>vofX()QcIEw@TBk8{PH_e#N>PF?RSJkA4FGuEMd_y8?SJsqmWrdU; z)sX=$tkA#5cQQ^}GxU|4Pl7Vx=slI?adL`ghSXI14>-7P66Z5rxk9!w4c?r{dl7SOFunyoWuTF?`?_<@;~woO=MG^P((mtAkk?BxW+Vd=Y5zj5bn zYSGMDK&GVja_F;+Bv%xzbRE6)m( zdQ)?Bbn_w9IV~6#3HDI#b?D?_Bedbfw)kF6PGefvcr#cN+x3ndpUh}Am?-R6@u$)3 z!&W!YZ}*&+gJ?hKvy^b;=ywn8ifto%hjQ(?ekm@5h}fzM+K*;xR*ICeLmy9O%6PA@ zk<=&5TOt3Z@Jd+3R91y=MoYc#LQAso&dAg#D-Fr44dv$8!wNyqsR8F}-2yJWu!aV6 zK9Z?Pp33YTpJ+=1k38}vR)9UMMssBcWGYF!YE9V`vbbPwb*`=T)y3Hnye$HAuZf@0 zo+2YrAe*1MYKdLnWw&fkS}od6cCaSJ;OYX}zDLDgmj^Fe+NOTh-fWJnw#lZDx2U!T zPUSnS5bzw7bQ?H)6)LpWT|G&IY=o(8_#K5@u^cGdK}r0t?B)5mHpTYs|f{EKY>S2)n&x;FwkY> z4QJWb&pQbyjb)qDsE;vmXx1RAegbPUf58JgZ{=(;FRTA98^2K3)4+`HBb#i}2_Ay+ z`;LjYC8G2a$~h=$97cbM_BZl8+MZzWG|*eBm{e(2qNz)#)s zf78n8Ug$8Q5S{<39%e^5Ahfd3@i3H zAwWKkd!1Owb)%4A`Db*5Y~q=N6}_5+VA|63xJM;y>P>LYY`C;YkXl^>R0U~TqpeP0 zidv#;jaKa>SE#k*nOd1Uwk8>GYJ3f6uOV*`z}fUbY2mm%3ld_J7>=7Ec{FU&-^1vy z+Z-uThftQyN7O}CY5MH$r>#zSfCz!6VtcRYe?h)yJswi**{E&BPmb|b&+J1F;_WAL zs8kizoJxL8n#y_?glXuSwW0BoTsCYSICdp1d|(O&LbVaQ9BXW zgK1S#V_rtZ(YH0=hzKK2wDM#U9^vsjBx*ur z3AEms(W5WlY9Nahhf~;vcZpFXk%=7eF!1UGWLm!=&nEkbP?#a4bE8KX1bUeBh8^J( zbveYCW6-8-3_*`7_AV|(IbVf$S4Dw$7m@%9fjtQ(aXDXMWY?%-1)q+e9#oLNEL0Fq zAd$#{6oZ7$ce#y;Ni@{3H26egzlvIsonxQ$$P4j|Y8ZypDuJMtj-@ zLYCM+KUP+g7Y04ca~k@(d&?erl?%`8qOoXdAK8e^Po~um{7qWe9kREj?@)`$c!A=^NHpm(hRRNt86(|1q zPF3aOhV6^xj<+pgEOjnl_hDJon`pS+bNAc^&T>_D;`_s*t%i++Ms@e1lrA?p+x^%- zyy$GqA{KVAeT&xO`e18g=W>3tvz?Pwm&vBo%zlzdoL-;%+AEho@AI7jTI!u1v5B=y z8Jj}0NM_VgpCMdIUKOotE?JAhj;XSeYW$Zc+Q+$tUEMlP8U)Zxn_8DDo{gxsZ{oS>q3OaKcBm< z(IW3vQes&;h+yyXFS_`=FFjd*IeDsh?IETbYN3CGb0_?Bq$&wNw$m|a&+1Yg&ZBo? zMI9ckpVfMi+S^$`sUCv_o=zJCRI%2z4h+Cn(LB~4oAvO$@%yqXC@nq+n#S` zJIjeEV5eJK5qs(Fl=W^@h4fnt8hos(QA|n(i&A5tk_yx4RK#8s=&sD?h~zGX`0ZYS zytIASf!D%9Dg#?LwAMQ)S|-x!c|JJWcZR13P**lKI(laHit8Kofc)mwD-T?FS^_)qKvwE#2L51IK$p&Au6 zC&b7DHhR*j{V_y>)Alf;QJGLRNdztOM4Pl3)!Rl0P&oa8rhk^vW%ZLd(q~*A@M@-A-#R zevAngIb+#eo$gIJNQSzv3oql`6NSAqw4NHPnfN4h#CX~564QkI#qNkJb~%>H7$pq0 zDFwYK->qo8ObVw9s>e~KcwAV`DH6RTo^yZ)Q`BU)MD{d3dE%OsPR)R3|Z|nXCY&;3%9%N5H*t zk8mB#`zrd$M>T+!IkPgUo48Q9M=?1H){2%aA`O#mWBm+xgXV;?_Er}8re3@9ATu|H zAy3B@4~vE%#!gr+%2cpBoX`D^_ua?I&yir&=z@KF!V?lwleWp3_4F+v!QV^`tSAlM zRQ%k_Cj%VWJsiTM>^gOO@=3U*MAp%wDQc?7Gx|8^1>xdI%NOP~Qu)8BCMZI5`tcA5JkEU|F6drMj6FEkakX+jI@Yjt zKGa^{pWRXEPJAV6D3kmk2hY!R9zuLKPkU=W*i5)({_&uY{>3^?=7|oqMhnl{@FhJ#S(8+JOV<8C1ZiTc|vEza~etY5k4_ROdithDo=i$G&^8o)nXpIIZ2}T zczSWHao6_FqHb??(U#4}no zS1?3qP$8>UWAVD^R38pQsWTy>f<=PXgLm3CK8#OYe_*3g6Tsv*Dy62wlwSJ3`2-YLoEj~(pmz?5SI(d(xSX3THK5~G~fyj*A!1OTB7wWot;H&nF(+&@J z?oRw&DOH5dr0T&=Of|E0m)B*-kwsrj7Wb*@4Iu_zIG}4BQe|&Yeu^-FJCqBEtAxXe zk>TZ$Q1EzreztYrqdiBls2U?eKWkorQ%{w<`sozM4Ku7cgFTm3Lq@Z@%;8J6T~#19 zGqWhKiF7EY*~_lkGwHWlSWVu4_Drvq~G@zNq4d5F)d8P3u7pm)TiNmXDmM11yS zxF7HssBr^x+GCf=?|kJHZ5sj(s&bItH;@ZWW5Z zQ8dLnpxFpYk?enVUUlh%1Rq;(!~com-kkL=oV2JIx)WXjwgb2PW4-a!xR#4*k{TH) z^x6=`xJiYGL{)?!T(c%)>r2ura`^W&vhAza-Dpz-?>MezV># z&r@vRsk^iKysMD44>iF^WYEx><@B(1sCe5`T?>{PQ!}R`o~u>egZ(9^F~ygpYN%pv z+?v>+I@w6KsQNaBU5OI%{hen)n(g}vv+B9B$x(lI@ul*wsb;yvR>?~A@<;gz?!DYI zC*;c7Du|OaJN^|&8g#guG_hH8=z5{XeclpWnZOEYELnuTq#L2*8UX4smgyN{z1Tc&)Xhmc43u$=7c@>c$Gr8-bmJ2X&m$I9>(O1bvV{Qkt7e9cO^YhH#?QdQ% zDvy*YRaqcc2gsLJCT0X|&diTQPjfCVQ{ZZlv^O@s(W8$YT*9FieZtgW>Hv+eU=rEOE2np;81OdJFrxfvyABl*Q#7z$uIc@ zb}9#pR6ZM)5`Z~c89ai_)f{g;>7TLu6{>2|>v)AbD4l8VC&seS<Z-+n$~oMwvgq;GmiIUNQx3DqVWQJHTp=VWI)|p zsJP@N=_v~Ps-E#9eF2q^Fj3U_@j8>>SoUhcun^QTYzN7KQt=-OE-H+5_FzLZu-0wo zeJ!?ji5>a6`%Pi2XC)?t-fNb0rj9!$*eRWWZQ&@{&X%S%>vDPZUVZ^m!(KT)d^x)j z52M6Ubqu>$PnJ^iS83NS8fHa6X2S0<_-RVZ<8?736dq>vCTFoa-mk5(s1t~;!P>66 z>0BvjIdnRyL(;JzTb*Ai^WQP?PeJ2Br1||kjx&y~wC$tQ-w9ox1{;J79Lqo0fmlPc%4JZa)-&_GC@?3}S zIpm&_mWdJY)B%8&4m37U+n|v9r?~+I!t2vL4fr`||93c_;`aY-!2dU7=>9-P=m{Cn zXmxr1`{*OA22chw=gl%{|*x)Eh~T$)By-4CVBv<+y4sFb2f!P zhW&4eHU85IqW=RY;eT-Y-$V2(r^WvbBBp;JVth_Q@rSNYvj1C%epl}Q5jr3P`ezX_ zGyIzQ|G0=4>Ha{6hW>AK7-;?*bQqu0bNn&lr#bky5&yG_KqN==2P7gONIuC@3R~J5 z8SDN7il`olEsSlQC`JCVLp^QK+6Ap&5Lx|7m?38=Z(;nD$O5#O8J{oxp~t_HU#NmI zT!3_vQ3w3PZO>Q!*7A3X{ZpI2S(#r{;Hm2;Vy63*pyS!(K$qn#g+&EEX1GHd71!SFUK*aQvy9Ge|E7Qo6*gp#tK>I7<2}pvK@!$COS>$gWC4qYSKoH%N z{M`>IU5t#rp)tshIg#*!^1A3h<&UwmvNF>*f1=qx@_;;d^LI%Kw~! z|7?dWjI95#1c;=6V~MAte^>%EAOF}PEA#)x4rzY3Lsn*H5ScU30$5p@0HCJ-3rR3O z1N;a4|E>4>4`wL(-!a4All|Wr;ja`me;DCk`DT7Ivwyi1RRG;{>F-APeC2OgemBD3 zb@_j1f^>hS()rT_>3*@>vkCr{0_Pw7pGE%G@c)wuK6mqXOaGGz{yzTyY=Hl3H}(HN z5kBW8`nM7PV}XBe^qBtO#E(zLXlY?%X{K%S4<-H&J3YEzwD^yo0&S;$+e-ZIr~Ym0 z^DO&s%KpPte=x)!+WimsfLzXR+X&|02;#{b{X-C-J;gr+K}*B>+~I$ARE)n91Oq+Y zlcQn=FoBo?)bxKPh=1EW(fz*T7qYX^wKcZ1crr`?NMlQLZDWh4v`w_2cAj_E`XDdz zbpFp9RKf?!nDvw^3zXN1j*j-JMXIMah^$yxo)q}2RZxxp!`@kk#g%P|KEWZlLvTWH zx55+LA!u-ScY?cnaM$1v9D=)hkl^m_&a2R!+sWQi1#A5g-vH0k}7tF-S_Fc7TSXcqy z5m+<~-;cnu`RM+mPbGg1NuI>|w-pKiuzbHE4I9vD2Ue5@z)Z*Xy*d85(4Q^zf1##- zciVrhrvEU=-+^O4_xk@}^F9H`el+jzEVAFbRMt@Idk_Z{hR?#(!dli+OGgi=Fuy~- zXn+6s_lx-~EUdo|H&#|Q8sHEDjxyl513DYbj5PENzn=oz85p?&qgs_GO5D?bKMh5K zr(q9-uKhe$GJYS`KjizRQT``X0jQ`zXN-XH$yX*|d@@=Dj87Bzud48L;a`gNKiOA5 zyRW~r#eb*JzZ7zTZy&GgWvR_W!I3-wpH+`F`pC{}EOAZ608JngZ?3g2f1CIHY0 z`NyR2{p>$ggx}r4pGVviPxFU#-{+qvXY230SJq#=SFxYG*MBV=#`MoF?629dpIlg= zwga`~7m2@r_fHc4jSKr{5&uX0Sk~V%lE3@0f0GXT)sJOm`OS|7Ui=q+EX$ul%qQIG zpQ`KMqs0HYqkhZS{+A<;-*U#Ze?%N?tUpH_bbn;VfT_KI?ym11@t@@Wo9_BM zLI1MTsDZiS-;>SXV~l@2`~bSEKZhUxm2P8X1^nJ^z@Yxm;RkSv`seEZJkk7!@x_hI zO@LV)9a|uNmyQvr#;h~|0!9EE4X~5xnSo9oP?LFOfzG1h_xuj6AOREY+aJW(w@(2K zkOwO-Bl7)4W@u|`$xcgasb#LKWkX~6;X5gq#=_cwmi#-q_GfDBuff0*#r8+X{DovJ zbj&nB=k2>_05%31w*Q`FHbBuF92{tX%My0FMiw+W7G|^#mee}HHelZL`&trd4As`|7 zyYuk9LiBV@zz~WBm|>)0{f_AUR~!3#jVys_IvqV*3v&xgTO%_gC#~;+IpOly$s6T(?4d|JE(&8sC{pYrQ`tqM- z{u?d+3nBkg1^%xtL;RB8VFNJztiXT0TJ&q8n-v%s{aA+ht3)@;pVG5WXzX9A{vD(J z@8Dqn!Mgqj2>TDf^&d#=KTy|yfUto}Ld;K2wpmG%!>8Te1}RD=Pu; z=gBV7zL(NA2HG9q6;H5hpt=6dV*h^1MDW9c(f&8^*uaH2U?~K@H6buT^Zg8{T>n)( z_HU~ywErGHHq$So_m^PrKLBI{taExUfuV9m{4d_2B$S2vyBol2VT zWJ7g`ZC_M?x-*#>MB$9B5KIbX`h=9^&u#Q_Gd=3lI(7DpU>;mtsJVpryvu2uwWl26 zDjs7rtprxANggG4p_rC{T>RJ^5nt2i;Q-(5cbw>-5_w@nA=P;yqT!4DU>Adq@*uZ8#Y-(PR584V$lRhENeb2117zp%^Q+$NZMNnX5WkkJ)YwGKI13Og#tWzO8FpRTaZ-pIuvrWi%eK;%4U1ii>-rH04P zbega#H^bS1QKB7%+N*{OU(8_0Ult+KpVRR(4GG4Q0sY`28J8}a_j&luIg~FSMDB&Z zP#OAeIP*1XUjpE`*LjR+Z7dh`_JNK5Z58a7Zi}M9d`UvS+&4Tzc)>5B{e0JP=MNz1 z+7*pKtGE0ww+=96K)H@O&Dz&DAQ5w>?~Z(22aGIrcpyuAST(Z<81rd`=}8T zo)Asxa=ktpdV}C%YEdp&wo$%xLItf>Q@h#BBOwPjHjS+{d4`@;E#qUNsAQ*)Z0_hM zSinA;a|PFN8{}qpH+Kt3k20qaNcEjnw96*za~B#`LkG%*Y2KzUSh~$Lnr%Ig z*N-c?`edT^FqfoI61mEqIF{R88t&kJ>r~GlV|?F?ktfn@OE=0(`pW6K%$!<-Lhizw zl;bJYb_dtqp<&l*#?m^@P~~;v(4Za%l7yYN_ zj1RV#>(>W9!TW=8C_~jrD81E(JcQrIAsEd|Ex5|4=DW}=5+Pn*UR%Cc>JnLOZ0MmTiDfQeSc zA1+G`@}Ny_K**m$Jpq$A+rfjQ?va*e^ipdB23p667xi%lphe}WUoF@FS&15D`S7z6 z;(EO;xe1BHJDmzDPOa%8m;6}KZ<7OxkC!BT(VqeLQ^NV_E!BKPq0H-*B==d{hAJRd6aiG<3|K617`tzN*v@0$xX9z3 z=aSde7uK`Qa?atWpdOj@4MB=IbO5DDqt86CuzIu3NF?uQf~zraGPjBsRKRt>Cm7|N zF5}F61H8`t1@JnnMFf&qOrBN`jf5I%VH4&v5GgpMl7tM&q`vn;lEWWzU+9;{V%5_z z7nemPkF8VmQX2K4b}OGyM3j&SEh5=+Wl_@aVR^NR#d*6?xA?2D?My`$>`p}*mud(V zlmK6iZn!UJPSI4Fx-n4EoaG3KdtIkWbwZu**=6j2FNvFT^;#7F9@GR&Y50Illv2#B5mX37r`)e$#lKbwOq`F{~gmZU~~y&W$u}+?aCo7<92JK=|0nQIq0wZuR*rt}M$cCRGq?6`#K=umZE%Xj+m*uTao|MUG6*RQ z;xIqI=#RHGhy#dHbS!4t4GR@zRj39t^4(yU39&W;V`6d3uey#Eke{iR^RUa{e46M z;2Uk&OWE*S`QTHTZHUzJl19!FGjO-jp7THo;%A-3SX7 zb35QyXWh#j;_tR_2v)mre?wxBTf0R)7!&r%?wy!J%J14lwUv+1+ey9`04*_2 z5phDR=q%e)Wu>yL*jdr&$Bu~+XO$y^HXQ8a?lo(lN%jCNdMbLSx1?t7Q}-H693t)) zGj7WZPdMpl2~;-jsbYlk77GbCN_1m;zNYD2zDN_bI1X56jf{@YgihY}@P3fp($@NV zyuRD+<;&W*)56DuUS3%VKw=@Vmlz}fwpuIS=FW)vc-r+uT`;oh3tgieto+ptXsRTzkHWaa{h{KU3zF4 zzUr96wbs?aA+-LjM)iwI?Fo9nDHuH?8_TNwZ%SRALsB8IUBp1nxA{yaT){cJ68+0S z#ycJQ$Nk;&)zf({(d&!-3pkUbq>)AXJ5#EM37*(>jplVGT6ef+9IabgU!Bd&;^^l5 zbA6?2v8j+4%N6hv9`~llDgZoP)kAOShw<`Jtc3;B#nt}D8%J*xnf;4#oxnn3d#`*2;L-9?>XmDtgt_oXAE`9RQ)0%IeU-j~6FWQMz z4%@8~^0RaCzZHDTM=9^jg@E3n27icG;;uxrD-X%{cCz2AUP&n!w>0M5$(*VOTl6>+ z*R{4${cFvXt;*YlzSg{2^*N_0>xCdPeI9G2SH|_J-6xq{&xFH3(*yG!-R>*jj8(gl zx~q@*UEJfK5F9aL=6pRjtxv6gu`+3DjGC)+yy+m;(YWCxjmlc9qmGYo`XajSOq;to z``WR@>bb7Mn%IRUImt19s?qU#a!8ttscH#?cM%JH*|7dCR2rgeb6%XM&M#JDzbVVs z;}Dk)F|n(OzBGPm1xwh0S6U|%Ikk7_IptDp)1W+F>3^C?iS}Xe0nc%8ioKiC+>||e zN;SX7nKlx=I4+QE_s< zLrDx{=Z13Ni)*yv0rcSF0hB9+p|2cMPuG5}U41-K&=2lZ*noDSG}uWo0p4*9HWZ%} zC-HgVoUQXImowLd$KrFeV*tU9Bi@K(7z968Fc(^olHEqQB>oX*Wf^qLRQL0kVa#{e z51i>ISi}^I_z-g3I&UUUGAeH#=nS(w)jyL`r$w3)e?ep{qSp<%Ih5{ z2qbc%R$XNZ@Rc+tp6VN`rsz3Bd=u@-VBCITPeDZ9?rXNl?`vOqkh0z3& z8clkHfacgS`hNXhLw~onI3=L%ImW)zH`3q1Q z{+|7BmENiGDLl{Oc+`BeN6H65(D6*q>rItA>&xX;Mz;s<5w$rtw}d)gdlb8xZQ-P;t_;Yt@%uuZuHt z45#KAa{wvB;-@toGLe zjiViIrAfi^BQ<(mFS2d;8^mYIBAA>X&&8mZ*>ES8TDf8>0Uz<%$&p3A9mOx?9;@Z% z9&fv1Kk_){^t&yq z2AwD?-2ZIZRJc!P7gB|bS<*eD?_!v~#U-SnyQGmGmz2T2V$3)cum>!B=PSL;{vSsn zw287T>;fHBv0+!*T+ht6@3#F!Uou3Q?R}-61hQ`;3xL8hzw2(?_G#VQyL@!IzjL`g ztF5~Rd8KU8G{@Pd1K7cVl3OVPDQ>Ir4!Pw9ZKN;W$lT5_RDg5$(?Ygsht04&S#B!L z?7y0xr$jpx6V>=wsRayZY6tNjE8@jPEAhD6BsAXxBpP*84KE1#eGO5jP-;)r-0!j- z*RqS#+*27S8m6qPdq`H=sapJ=qnC4y-=yYeA0Nik#I``89>Ji{;z3M2j_U2LxAF zA&}KxEIHCs+dk_Kjm`Db%vO_saP9*kdbM@dE6iP;A)gxgIL1lj?%ab?gVtXs24~78B2#KLiqgq%SMi`w|dgWN(1lU&Pd4Vq9}>K(7ij8ng% zhEd}|agaVLXQk|P;07HH(gc)k7;Q`K+Jr-F_7Yr-HWX@#zSVANqQ|qgb|E*SXffVe zITvtK5x2F**E+O%y@3}J37~NuOHnN({zx0o2|hO5v_*Y28EIkaKaU2Vuf8#k$|gt| zybOdL$VTH5r))0{YjD87&=%<+nw(3)Vjdn-d$#(z!;(w;lDcfQuq~MK?ME=9i#HaQ zkP)HuF|hRVbtpwIm=>Zo#l#S`$S&ykw)HOCnbVqTG23(nY6pxA0*ylWroVwO2{chC z*OkE=bTe4!ki+9?pyhGeYjRdc0#2pK2-dZ}76h!-n!U|~6NbEzJv(Zb?n{uqZFFz0f<5LfXPQ?L0&rZ*&Q1fD*7@r-U z(c@vtv0rIAUax`O-x4(!B0-qwBbVB?nBPfv@AYgJ#Hrd)&OHoksm6~@T z_XAsK&FS0WHV$$eR+=-G?nYeMxQt6RFo&_{295-5IAN(tP%3JBkyRz06}^%=i{L)v zwbG35@FyuOJ&?IGkR4iU8@;6N3^bZej~zf4hnG+&{QLrNvs}!nay;StA(ZE1P>y0a z6il}Y9t6jkpSKlsf5w>=*V|i&Sh-Q z6b+ONVTKZOiUwwCO_t)`E=AahTV_tx;4o(x2r$JKoK{Yh^gW zCdXW z1>)FEYI2L0DYfdG0I?k}5_#9Hu|)5poy1eOvV2VJSEY=S%JPXHp1l~;z%lDMO7+)9 zF>X{J+rh@4%ub}+yK@LXG^c10 z$(kKmNZ1Xrp+#V^PMDe$M5HohvS^I>>KBQX&Xoryki}a^p{!nHhpGqiLAAmoVaKR? z*4a1#{jg&x6#aC_4ESh)&i>SV>|M_7u6qttYN`0K6KNpFkFznc-}2BhF|JoSNzE_A z7E^x}k>gIXeUHnuk8VJnp`)aWKMm~lyXwhLARm)T5p3Uf5T)Czhnl3K+G@fI5}EOi z9PFsRck3!C33>rJ@oYm$%gAr4^R&iRVNJ)1v4bA}gO8z{ zvZaGqDd^I~`!yRcG3P0+R=BtMVpMpmuVn75j!?!&Fu*Ubx5qi$C*3^PCg@g`f@<-Pw8>6DO@*;qGZoAV9r(0?!q=sZkC+`N)Ye#TB-Ewx8Eq3VYOr70 zkWp{+y>?HY=i8o&tiih_vZ3LY_WN z%h7Qk+aLpf#Xy9e%(DQ&Ju6~4b9~Oyu38KoHQNEA_sLa7#|6_CZ{{^^*Sxbdf{ulm z1@EJYpW9aVM3PzCcKF322|2dBeShga8R7vaV}m)V?5TZLwyPOUZtSi)+nS*gQb;WM;$Piy6b6;)~C zQ=PeblOBNsaKQp_XO0yBhvn8_+r;w6b+t=?+d0m+j$WptjDwtnQi!PpXQtN!!}Eud zPP>bvg$p-2XhcB>MohRxC)-GHRkm;Vz8~~}hv#rCxvQis7CqsaHi}#l!nuz2ZO%pS zF>-{McoE2bhMbCwJsw3^EwrM;k1}J4g*TCC`thtXP6J3pMwA6&WkhR-$Bf+?eVatz zW{4Ccqoyb@yBYAZR5p{g_aF!eh8?eb{6-Kh8jJ_3T}RHSH)R(>na1F4kfR!wP)NjB zcx@9M#0$A0(_@K2B3RH0sBk426fP^Z6;77x!}_d9pnmn)tx~gnS`3V8DSmlbhQ6KN z1#LW}2}<3ZL!$ljDBO0p5hg~SR_Lm&5WZvBcKCela2n+~1PKoDy~S8dcKw#7sEU}w z#``RgHQQPCxhxEZgIid$;}ggMD1k2_;oh8q&Mgq($5`IoWG^LYx`@=ey0!sH-= zSEAKnKoxcdLF?oS$k3x^IBjAN}Ey1}&+=t`pw$L^n^jNRTjmR;Ok*_ul$gxYfk&QXm(izvP(vl>V* z!ai~Ai^{3SP@<4TmFL4JB~`IY$XcN*!#;PUcr}P53sG7wJ?AavY7MY8}ZY z7&QM0I~TxOm!OpmfXMeFlWfBFew`yhFN0I4u!>@!A(0rFc_Y`3WU!&c=z`kaKk>e! zx0Ry__w$G#Nxg{GWR04gGI3J|6QiC@`A#JSjXwEDoeileQm5P8zTT~+H{j-KyX{y@ zj+l0@bcPV=2I7)mP;H96_NE=CYMVx(GZoa&%f36x+2R3 zDl+YTK{yw<3T+WgnL%Jq)G6u&ol@O&MaR2dr2u0_K`|UQLVf@!7qKj8DC-?H!er$B3o6EUo4Cj zaPTou#SqK93ss155rHy>(kE>dHUbxtTa_bGkIEK<8Vk9DF2xlGcXytX1C?KoCTo`v z4MdmF^s2YPOCBFCFcMr2S-;Nko02Yuq^DcNylEPTgO?y` zoKwh#=~R3$W)II|>(zC8IyZMqPiXi~{z}DV-Ydd5pG)M-tAbEsB^u)bLyaxSRcHBDr>)_15EJnhSxEx<8NgLyo97p#kGmXtZk3^19P;q<>7+k z)g;?=jzL)MadFJ#SJCwqKUVh9%N}rd`?!ryyc+7=$xa}LP@j!go8m|PY(trXNC*@K z$(8C^SmORq%LSg`<9s5HJa2|5^+^lgfq$&{2&I~!ubc5+A$jHU6ZN3PXr(;pxx|nT z2b3DXJhhVCV_^6&(SJ4<0k1KT<+QElkcDRydjE0}?ORb*i!6(k#Dp1a13$Ok>1uA; z_QAQup+zA5m!kXGLzXl=4cA}|_vJ-&gpa|-1kI=idE=ETFE0yUjXJb>34G`-%Y)bu z+XvWpt%T?V6|;(!U|>x2xQGxc&8()bL9ovT&YW0dlbX{=8Ulp}&9@1sV{p?wWRRblNWD6?%Fjme?Td zUg&wDe}-Xk!YE5O=j&=CVGHwBH9^NPaz{aDcU?{~r;cr;sTigRg9$*HgC_6D+>)8z z7SJ9&5~?F#JL$Y8hiMU#)P`|R;c^*}7QW`62CuLz-zX-nMQ-__LDE+dyeXI>Sr-ba zDk!A{d&ykTS9T63uWjO)gAGOU$0)|VK2;)|^lpNW`sr=y3$aJNS2<;-mLEIXY~HPq zF*Rf>SD?;Kl^MIDN~((%Td}Y_7nsUFC*`$tEs@U*ftY?mmDSN_}I_L$R;lT>0$HyRVcDFNwQ%Dfj z<7W(bu(rp3L9KOf1|szV>y5UZaM5@cAAFWSdz2kxTD5yQcR=TI3b?^Rq*AGbWj!X4 z4!`p;c+XkDavQ?gd8V3V5uUDoOy}s+ilkig`OU|zj0xfOEm*sr>0B7= z!$QnMXsqRudy64d#Crk=&`Cf}aGP$VL-XPzLih~hnD9GUDY z$WBMgt+TiXmx@+5=d4g^jVs?HJZXqkLJSXwtrwE#f&wYWO>A&c&`M>ak3c($BvcGh z3dxMQryiM8zN}*L4q+4{T`GSq+S+Q}kSm}yZ`(*Ph*D?*)4P{27=0}nxn{DJU|j+; zkE!{{@6%2bfAlP!r6>MEYV?5&;)J1(bz2z_lW&bZe(Fcp<(}W~eP=Aq%dFKs5>GoA zmgY9IYzEmWHp0*RW|^O#di+CbHP13y;h|)#@OHr4hX7*F_)sOn<#cW?Aw|@g0&j;A zux(l)Nu#!A4F|5KR$2#*yMZYcRHJ&=8fmlwtF7{a^lj?{WqE*hcB+E!D-QqsHY8Tb zPx6G+nV@pZ0hG;A1raD?{Al(MPPE(T`QE#JXT55Ecr!xUu%BEtQ5iJF!!19~4 zak?VPure*g)q3GtACL>_WbsKRBSMAmew3S+;L-7@s$PE14a&Bjbj7*M`=MYs*E~XH zr6>20UgGY?)-=dnM*N)stE_Gn1cW0~BT>6cx9+D^R?1xO*H+5G#3IYEW+bV+UZ?0u z(f3@8c~DYSwp=C;`E`prAIywS&8uAwF5jzPe_Z)?QnyI@elt~_A#8M^ed+nFe;?cs zG=vf%+TFkx;DWWqtw)~$un%7V`!Fjj)BeK64sMYHNJ37Gcz(%E+}o&2bpU(8><1i* z1soiBS28yK-JMR;aRCZingP3pmj_6z+2_)9NY{7=sLLhf}=>(H@ zK%SUEy7Q7H{hemLA5(Oy^{g|6#&}Z}k$MBale!E0X7(iCRQuZ(^O&xcAC=9Nayr5L z?cqqP@Isig6e$R`?u&|RN%s-1)6|VBi6Gn zH9l=Wvtx6jAMt@NBq!uke6V!mjm`b3e@k=t_we8)L+FygS5!p_&vK55-@eKRg8{Xp&mCYwb4BtLuO^i*Q2#Q--l-_aCpMzrK5Rc z-|F`^7G4_mD`vxBi8Mt7!I{8f5jQo+K>+RK0N`zj|FKXU&PvtHh!2n?Xe|)JL%LOk z0ifvYuon2tQ;b9%#o-@<%PtzROL(WWRV==aNiL=<;KLJ|1s2vG)6V*~2My`Li+?PH zDxqFvR{2YfS39J{nU_E=G#tqcmvh%Or^L|0O>S`PYru_{`Nk~ z8#Qc2i^@g6_LoIPK4Y-N2vyHWyiDg;9r$o?19NzG6 zBZXqjd-NFW$%40@$q{B{os<@AHh%2$aOeh@Umxc43V!i@(>V}9n^q=;#y=zL8&agA zgk&R10FSFkLqs)k6C*#GTSQYK$C5>viD=kR6(V+YJuZuy!;D;qx07QBbGX(=C*vYQ zN%RQb-DWoEldOt#L@WFX7@>w2`JehDdzOKBGk!)Zu@|syCWvGr&VgpDBWg%faGb4R z)Wpp#{lr8_I@Nyp0=btiDltL@+r zc*9kc-Y-L(?;NfiPLIcjOPlU}G@7{=xsj5GLF54-{iMY(0Qf>#U*aR@v4RL)y#$IP zha;jq-k>;dxeRO%>OvT{YISFWFz#>j}8=>+OW{DNOL>S zO=!BO>`6)wFlY6#IWqQkcLzR1_HbSLYOVWL!@Xt%UEaHE9b1#oO_F(Oemr;BqK5U= z@+~k0hO^#Z$l9@?fuDED=~{b#(G5<8{~RrvDGfXB_7fE?UD^bqHQQ-|J_$}W9m5XK zG=##}$B72<{?i6reM!Yr7V1<)!~PXz7d!#`X{&Jo1)NFiM4C<=Eh?(Ao663?WL2U= z42R%-~6a8$?5We;K_Iw+Aiu|XjL?#({7 zu=Qj`$Bm1tWXiZ1ROD;aN|aqo;QNEeY$}VktB$%T4j5lQ1B0Wm!EZMwrVQKucmCy1 zr*+nh7HXA5i-iVP$<(avo2GSG)W`w&@7gz2>)5yrcKe&`Ims2W_$$^orjsdIFK4Xi ztNe!RtZiI|CgU(juQIWz6RBCd^_%7#Q53!^?px{|X_Xp}s^Pw@s>s5Y`zmWSCq8ja zlYF@u+BV$+oWUDh@uh(dV@yrSZ%?XqnLUMI3)SC4FmCygy;KTa;8p5yz|(*}huz;+ z+9kiosF4)!S0pdIB`5k9Ztk(TfZkm*A=Kg}$ugs$%e6YwQoq3(x;*=pHa61xai}4q z(xUSuCwU#^v}LItO~2!kwh&>7+IyM!RHgrflq_HNiZs0Qtoch6b7Q)ueV)>_Ggf3f$4W2cYA-b8Ul#_ug4 z$}DVth|X|?4mxeJp$oyy5C6qLdjZ)NW-}qLq-3=d20SojYF#AAFV$qfD1WSKgY48$ zBQ91f!_45+M*(e_%HF-OtdNhr!)X*F{!%KRRA94sY&Q;haqrvi1rs)DS``PSsI>$J zng-JfVywIz?aa`7Nrpz0gUpx>(No6#Akz}R zjIM;vQH7|%{aN56cHnA9IWydxsytfmcXq(%{T6lL^>|j&UT&ogU#%aYWbb=1%Qa%J zBznAiB~>22Ys%|u?FyCz!cT zQpCh#@=dh!>$OhsdTR{r?%;!L?0WV>#sD^+=OXYKByB9IqsHR99Xpq> z?hD#4N-@%ohvB1h4XXCMRIk>Uc+=Ml1ydjMjSLe?{1Be=ykr^1G|sw`G$fOp_-e?S zri}!N?&^d!|Ghi0pHXmzIP;OULM7t!#BKYzq}%#oI{0)o7M?}!Mv5T&s>mRjSn~k! z)ARD49%I0ODCHM(mO&_k#p~lSgXWw~G=mrHONqIce2~!rj~`*=iz=rtgA}8Bjn~${ zk_j*4I?{BnGv z+_TT&R)A|xj*ijE3&B!)yGHo!0A`U$QGCpiNOh~ilJgy0L$Vx{#psbbN=(~vC*=nk zM+a)Dz^VxvhlxZ>dDNkCNxnWvFbkfJV0mKtTwRDp+=ZextkZ3eW1hZ1d16)rf&Kwo zsJi2DeQ~8yjwv435A~YSI#bE*Cnq^-YO>(S#kDJ<6=ZzjaoU4KSS356Ov}~FpUSuA zAs6D_9O11>3E_cyUuF#Q>sEjeU&IV+GHm59$5pZQ;c&hN+!|Al&TXx1(Y)E*P9g?M5 zBHbaA8nsGvw32gTI20jzdO`)?JN5x3eP@+;kx2{PdlG6_rlk|pc} zLRru9ZB1%0fKQ9xhcJRoDUP6}mEcRBT~tpKPMvuQj@`K5n5DxRC>ju$p_#Ht;%@b4 zf+^y%`aXC?2$0j7(O$}BbST^UZO;!>dh|2nmbq<3VZR+fyysN`G0CN_A@t<1u$rIh zgaxi%Tjc|EmpTO-oW_Onjg_eS?kQTss|5; z965>ce>i57VS)uN}Usz|IO*^g(U2wap%X>Iq?uySq z2U#TK5xY>JO;Ig?2uz_7ppcgTLiC*0O_WMt;o;;uQS!2(-uZ6H!iHSPY+|~s*ABI# z4JirTsUl@2@3@l3&m8e}+Kc{SpD&;J{5REV(jq(W_|V4--x+@FR{(@+F-pv+d-dQ; z9j&0%H9Gsa-DZ#-GV25oSg5{9@#>RO71s1GG(f~iv$!-YG-%4DY7h0R0a3kA8fU2(!au( zWmC@xcZQvc=u9HETMO=KS!rBXxgzVnpHmduSdn9f(ziBSR1s>JdzS;z6+Cqz&y4&@ zg0S)px+317+$#y9IJ}y@{&^mAj1JraY;?x5*9(>6qb0fI+%RE}4j*lRJ1iSo>n$WEN9e1VYt{A~9?Q|(vme5*ny}saOA9q;q1Y&2Ughx8e7=AD z+u7Qcium^a5^Z^IU(l(Rkl_|jlcfoR4*M$uJMw}8{J7>TxEda22*rtj@Q;M0AdsMT zY@CO4G&RyvbLt{VvqE2xUROR&k%i&g#S zpAWIDuGa(8`b$1zt22;OG(<9Vk8gt%LCM20B%mT?>eM9%pr0KuPjSijTcWgxvt}xR z7s6o}JT^D%?n=CbSUJ2!{Xi*9LKv0;7{3^)Dv&cm7-umWF4FVhf6km!wCHxpG5Z`@ z2->rT>scV*%Bh-*LzBBgQ7$&Ago~t24Ojn}tBz!U=%2)k&W&O9{zttaPT+U-)32%3gz08Cs?Wd;R zo=p}gcR=H(jf!5*kr0--ToK#?NX>Hcms2^#6M(4U)nfEPtYQ(IKfh&sU-}%R<=iul z@TE<;{^{hU|O%$V>TGs0(ah4IXEPGmQ`BmTR$eig}A2@DXPo2&TAMZo$9NwTpQ6YwtS`gH-mbY?P_B}*CS|jwW!=5a^vnsN(_c~5 z^#TajKz&nr7$6m-@Z(3$UoPrbPK|vvfpCw57W0%P| zA)+4GibA$X(cyGzecUV6gATNP*VB6FFxUHdE2#(r|7C_~38bcYdrGz8 zGK^BUv2{$ZGpXQ@N8F=sDgux>qd|2EDIA7*6xDhE+Ip3RM4W4aQpI>8k_| zR^oA=&k;~9%%`qQCAGvrLqs1Y4+a}9aHji7D31QThGw!WTU)ZA4yxeRK+CaZrFd|* zWoDV=;v@oo>7s^xvNfiNO${s<12taM_*JOw2dzu(^e*%EK@|GA%Ijwys|t#nZSCVCUdW4 zfzP|pv}9{x`GwRY;edIxiamG;4S%4Y`!uRqr+2X_pspHxl$~eMtL|F=5hT$im~N1U zWk@q$%PAjkVf_nv(N`x05cc5Ztyh$!JkKgA_ zqu&ILj>L^xjtbZIU_Y3uJu*?j_)or|eu)qKfk67egZRP!p=Y*9qG5a9WxoEp0CLpp z!0@f&83}vVbxN6w&7LpYMFXhH@J#1;bup>oyjw`@zL4R98<{;GQ{e6ENQ0TWDNGEw zbMEZiH(C+mfrC(c*D(t+BU!j**iLzg>^NUIB z&<^x7dIDH}FHJ;*L1<4x79W2oq*p8ac>(CpiS>O#dyqI+grJy1J7iAtl}IZ2&f**rw-WU<@~$&Jn>&$hoEjOVGQ}7QB~q%&Q?+@U#|Wa8iNJm3K;8V4 znttCi|AQ{@Z=eG)Gt<)mi3Q9+)EEm38x4@d1^gALjsGDm?gv%(mp$`8b6ft`e*CZf z_+R_+|E2x-nJ@Hjupf-9tiRb0ApD1cfQb!w_-;bJpZ%v+q6YD=wXZBNeEl?E`{SmI2 z9y^SyW&-U46*vO@B7s{t%D-u{SV7nezI*c;I`A0>i;h>Q7c!kT5%TAc;HYLLh`wSS z0r61nh){vr1A}i?ru{lvr#%8`kEy4pyK2l|8f#1?&?uEO;TGI{{V?Awbm*V?HQBBH zOKQpZe!-0r`m=xhi%(4kSY>8rt_-Dcyk-4Ati1(TmEG1ZyeKK@l5SACJ4Hh2F6r(D z=@LXj1eB0&5R?XKkWlHAZls%qz#`U}xZihwd!KKgxX%Bt>*8Xuc%J!;Ip-K-j&YBB z&V>jW88mTpUTUNCq=!Hj-d=&PD7iX6Tx#(>ZS>gW!h+BQ(%je#Wi+}hwVj-tpi&1^ zR#)?Ey@5bJ(A-Rw>w^QZRQg$U%BO2=R7WTgAbi}H&G3)fW!`(!c%JeQh_X~LWGtCL z;Dn^z;@cZ-U0u`#JTAmpMt2X7?Wjt}nQG;9fl-9Ksw%gk3}F-$6sXhjM5(5;ukl1aZL2A^mIc#z0~*bz1F|uZ6-*q|^bB;}|dmx+>T@) z0@^Tn-_|DK-JUl!HKjVkyG}TF~D}@AZ z=czNZML$rQY$1!AW&T^zYM5D+wGo8PThmw|I+?Q zy}3`&)%5iAJ3fxnGoS22M~OeeL7~tL$>6&gDJd;=JE&v;^5x6N|2O$SCp* z3kxzbGD%aZ74Wm&shhuhc^@0wxY3>QZfX~*fkvK?ot{;E`gGKQ>>*9TTFb4k| zaiSe!<{SHw_VV(9f9t;#3)BZS0O$Vwa>E7=W@ej}uMw!&6s}p=FjN#Ms!nwLtxAWF zudgrI@e4-c=*ZRD+S;+TBzF4et(0%$8UA+I|LP!~s zAxbwUA?)yz&2PZO37cAiO;yFg*7l3%c47cH-mzyeUGP_U*~gETWkAEK{x_PO`JJCkMWv=BEi3D0Z{u>u*~_{ZxP>*xwdot*jhJ5iOE%K3;@ir`LEg=hFUu zXOwVj{YMg%RH5I?%j;yGmh7_oH9_?xP|6q*&c5UAgp((fup>_nZXut8jDSfnyiiEv z$=3+X6HDG%&x!Z*gmH0kClyIhRWg&sEih}!i^Wm(w5?-bzdkmElRwUQqxYQsNlhyk z%@beCUskj``w@7wZ!qzyQ_}ZQ$G@iDED6579V#n2wma?9lJ+C2ov|uAJ3Dhn8r!|u zDXUmFeCm=WW*BKuh^1ioquNE~Av{VhJ>_<6aur7AML-J^4N(v&9I% zSc`YZ6!k}QqL_VhkW0a7`|um42yVZRWwWxkw>Jwk8~;cgxX5w(jjro_$>#6(W6Xvh z!44!Dm(#r^KcJ8Jz-42I-@kufb$bw>iX#rIxr(#`XWO)2|5Mwg2VMcSlC{>m?+-iy z&4x_;?H(O3Tco%Q9NHal6M8SFoh9mWvWTKQkYFp`b8wzgK$ zauSVuh%tK57(~3PpY#t`hWy|ZZ#1}~DQjuEP1V>#K8hmg%!VY*42%qn_iOm=0gq8| z4#L(j+7`|=3aku&nJCzf|I)_iEaR@v!)O#Yq3u8_#MsND9Jmxi=Bv2B$Ly~UgIg&# zdY%S*pKDd!^@=p~hvZQT;r7z#XjRY+Y^KHrtk(cV)an?$L zL%6A_pf1FD*(rZDSq$B3cbXHBhlACVn1m!HIa#Gm8(aPc>F(jd#kk=u(0_=oAAbwH z9#Cp0Cnr&#gXe7l!lVq;At5e>A&0y+LnA+H&JKxIEuXYvK7QUn68W+Om(Gz>5JTOD zb3{`0$-E0e9?R;j9WC({$hZ}w_Z z#NUEhcp(HEmw%$m5`P=<__`;a6|dx(fb+a9ckJC{X5{5d+No&XyrS~kN|2|TskI{r z9Qn1eK}<|+ZDmC%?B4DB;ID2-QdWP{EQjl4N5Z6b{!);jOPiOS?blcTSTz1ky(H;f zp{TcDu|?s5i~~Nnt|U ze=DLcV@E0u4$OokFaGZ_mkIvshHvJcp0zbKH47+|u;C2hHDhX2zfjGSx}9{ zm*U?_8Lahn`% zV;+y?%Digvi2&+BvPJ#=3k=S)jiJv0I~Cnd`OCtni70N6{#c4U3QPv>34zHoe!dmt zx}3_9Y%bgUW2hy|Thw3fR(eSmw=-&w1^%gk8%;`31^Rmc;3eIY?W$3SjoaBfsW|X| zD!V*N61?M&O{l*vgXiEX&qGzc242BoHGkP-KE5s-Z>nNzWA}e^W3USY>#KxYGINQ5 zpMN`syUr|mgVn0-7I=?8Yoiv|CC+nTSK4KRe`iSio;&9qw<8!c7ApR&5lu%_=fZ}l6UYpt(0In%Px zv)Zj}`<`z2;| zC-{4O?$*V{wP*Hq(k*!)*$%-VtOlw5TAp!KKbVq5G#4Gl8-3{tZ9njRxB#QykU`Ia z;b6GQS~2rU!*>JBB`9tj#BmA@gS!JBc4`Js@<<2!q*b(^e_fYE_@g9;@z*6;mT5VX z`KdVa@S3bwW1l3=CEhKU22bBM8=n000f#4I@0@S&PIHk}Z?yZ7Zx*cDZWl3=;K)+& zk_)m^v#{G(6_^*4^YlMmCjaL$soNdoAm(52+ooz!d5l8i6@n$pv= zb7LVGGtljxsZ<;Ss14!i%2sgKsan&S{J<-Gu`}JcOD^ePId1AVx6hU;^OFA$GTjY8 z@X=o~y*z}y>RNeAegFP_HlwDA;{STUa!P(MOVA*268c`&KE?=q3$RfUc`>#geg`2sayI^s>>&!n!Lo)lstt-l7lxXGlAC-BRBf=j?4yqHKpx?x*15%p+Do+4q!Ftgk*s-S_$t6nCOc9pAF^ zLuRz>HU#S8;xa{i)M_(Kg&goLQ~;CmAhSY2EFQQaR}ymxCaD2=t*vpdo@gSFauJN zc2u4}mz7eDRlG+<(~z z-S$yPejgkZ54foG_?B)Oddm&udB#6zA<@^@_fn7JNlf5Wr%)Z|C=wHu3^n4(o3KzM zFd$5Da1ArO{)<_B%bQUa_HWkv%yQfgI@=$Ba(7vCly-ODz{JJ zx<(|T^G2=Uyusj<)?a}$ily1wBPMx@aE_jM6auKE6ZNA%iD0`Q)wA~7AJB|9u!e>P zPU99K5QVa`Dl$lh+S%Lh?CmMlT_xWvz|(B9au07kw|v^JkN|o` zBTh<1#R0OyM~^PolT<<49OlgY2zGTPd9_yql1~6upe`sP_V)H-Q;I-0(p|E|d=qYHb0n@o|g)yb4{EuqamB(^0MO#r$y~^dEpA4l16A34n z2lW6FSMM}Oj1j>nww5XCBXO}De*LuN?580;Jw2cFLW2wS&3H$wmdJzF<6iP4fb1k? z$jZPcI#u8x8CH=f91KwCU0ht;+1UZksj0A#u|z-t9u^jcMJ_P%4KI)p{Ua8mLhy&R zPtP0OH-|C>T^8@}ly6&_3SJgFDN;9jKKN8plEbMl-H=2m>b>{k_l%9Gh{!!uRIJ8$ zlYsAam?RtkQ+a!RmkQHg@Y-z#b+g7thEEMf#JwLLs3OV{F~?LK!moy<}AsZJ5~7aFT_naf~1u} z{(mgR{yF;m{~j0yUs&)rFf0td5c5BMt=IoYV3?bSM?mln`~SOW{32s76J3w++wX?q z`(f~k&cN$&jwJ&YzD4UuJ)n9rFeIx32lj^#62byOG~ z@)Hz?2X}L{hO-(^wDqgyo_Abo0Xvm16QZ#@6 zdhl&27#?_g?d#!jdU5gMfD>RKIyyRFW$qaU;OOVipE)=H4O z{oV&E(N$4l+&bGH++JF`9f3)5#LGew5xd%hbD=#J!HA1iAsleQg+bgyPD#1Hx#_dq zfk5c!>)V){Yi4Lj&ZMTR>tbuW%;fCsJTyG)lG8C9jE8nZ=_y)OH#IqF*8i;npJ>+uyC_l2 z+ZNmYH$yG4wiR+Br}#eJ_wL=x4*X48{~N%j%9$c{<>g9w=1vtA73aTy|2{tlK#%NT zp4mdZlcAcLnuEhGaH61Re!u#NtVD8X$uf6H2i3u&glg9-00hjI*QOJ|g&PN~_2Uf41KK;B=G7n2x zQLzhWI&H6D`LgSKcJOVZ-tqz&UFl~mN4o`%m;s5|X9GzA5 z(k#lg7uY^FOFA1(iEYti^r^1>9H{&><6_qHKp;H0ey{^ zMt|Pyjb+#zA0H2D5Zx$-y1L)ughHt+O&@Bo;S1`G-zm}aMtmx&A!@fUJ-x>brdN;H z*eqvbCM3QhWA?`hnaPJzk4&thGBX#4goGp~C!d_SyI^GbIx&Yaddl0|&t4r4Yiq4v zpTy{xnW>+nDxfRjI&#v4F`_}ltKPHfBF-b&n$?_N!C=ptp3sG(C6B;u9UNqE;(A07 z)Yv%>$tWn;w((!zr_+1fUQkf*MAPFJiHYmp{(`-O!<9P=1fKe0k| zh<6K8Qc?|b$9!l1YL@R#>IIA z2Fh$Ag92u%t>(`vbw59^s;&lwt2wZ6MV}w7bg8JQ%twfcMc{Yh!xa4d zV7exy@#NpQd26=aF$kKv$BWpzkbnME0f-qirBX4KMvr42WpDaSUr%p+b2DG7%5-LC z2E;1>uok+WnwVJM*Hu7P+!D2w>+kO$HE!`c!!R^(cX!v+BnU=ybV&`F8UXczgQa#= zew7abDSP+Fkz>5fO-)Cpr=!|aKYUMV5ZKH@8_tWMuERZ@as@>FMdHC@5~>>N%ydHT{xDK)s`)fx64a#6ma_)^W%MigR*u`YoS6 z#d-HW1%VJLgNw(2z4WyzPpTN%Ln<;wx!nuHpHA8dV1yTEG$KhFU20vO=mp@!wE5vqgbL8_p8mzt0sAs zPvwimf#8aS^n3L>4_N#Njuh%y^WntwURQOssHbPpu1x~|C2bfZ6zSyj)XK6J!2n_p zzf^UiFnrqdWEI>&!=N{pmX?NxhgX7KG#LQW-OcQ>O1)w$%YbAKeuC>KXapY2qh~1P zp`i94(VM-E0tBXQ^kRj5Y6wDcg`c-i_P`FvBi?3Y#AeE*G*S9we;;q_bWJT)g4KR(bOTkOcws~ks(L}5e zjS`Xs8&ED~DcVzuot>3)1V=T5^EWu19vn@9`UN>TDm!~PhUC^vLrMxs=KfpI)sXr5d6zcnGJDuhLrHd%&+PPT z=xRA7Utj}iyyks)Jt#RiM;JMarw;5_ipaz}20rMLzP`S;j*gcE z(s`!D`RTzzGr}A7U%??%h)v_4Ka&#?)md4^DJuu>s&|ga>1t`YN`mZ8B`xeG@HRco9x>jkW3pgbud7wm5NE$m$l#oJZAa(BUWLvx-r*m9v zLF#ugTN&9s$roW#m6I1iU2be?F{q%lOZcNXAOh>tQ5yF@!p-6;r9Zg7dfT2mpfU&+DrE+I56-Ax^3`|+PJ)+R;`-TH|>!g z+b~#xL8A3Fvsy!6|Mh`9C>QE?EM`k(9?aIcz&wZYWP4x8M2KOyh|GTJ=;-KAsmQ@s zk6&E$CnB1HWL#dXSzZ0U41kMo)(i{zpHtzkD2{<^g3c*z`UqQZt^yh7b{;&O21yZN zes*?tT-+cCHb;`GYHQ~kT(Uz$5e|vS@ycsM&L6U}?!wM?g@uPj<~jG0P;une^iV>= zb!v=Nt!Z=i1HWgK|pcK#ShcT0D1MMZXI=Kk)kKM3FE8eClF zzu*Of>ZPRD4xu0xUitSjJ#*XX7s@bH1&gdcEx?kIaM0*qN;OJN$J78}wVYZPW zOyTM57WE81PrDr!8rIXiv9eNSJxFF`w2+#bT3N|OO-+4$v)U75Ytv|_RtZWJgVPOd z0YBabLJCRt&hzpuYU{8*mH*t>*ofY#RaREs*=d4Ms}m5phWJkQ^h8J^5E*qpS64sW zZ`h1@nE|BxSo*`wXNhg#W19DlI41T7+br7&744KKenflfjoeNk0eiqWy}D`+0_N+t z$;s9>HYus8psF5iqFzr+3r8Oe4a)>r_Kzgy(G3YdsKUrL+CnH&Tmrp~I*#rzr-5Y% zWs!nP`nz|?mH4KTA*@nuVPSz%9i!NGmB%DRM7djEzjE9stHsE9JGj{BTVvDErU|ll z|HDBPE|8GR0vAd5sFEwk9z-gl{RWMqppvfFD}G##e;fDi!+6_vC+`j1nB059lz1xGw0lsXT`kvKaR zerVc+!rw(3#nY{NB-{)LMyuXCm$U-p@0h2~J1T%E*P~TYrr>dV7419Kdf4HpyH0cUiduEi=Ldc79Tbxv zluS5cewB;mLX9F5feKQbd#b>%eLsErw7WNxfkj?F(infwo4)In z17z<4su}kRXWXto1r>zXsIxpC9{$3O>})?@4@>Gw6DfXVur);ZtP#oPJotg%sHk(y zeW`0qpKIWtGkpDYbJ)gaQ=#AzuM_{pGMD8T17(Q4y_iEr76$_6*I%$Ra*N&vh#6G; zeHULNViH;IA%Vs5NNQ`o{M+atew;Bir5&y= zWqlgo*4(1|m2(s#sWWva^F#O%P04{k%qKJVG>x`3Z?6N{aGS*By3bq|(j9oQ zBKDNufI{{|YAQsmVBO;m84(w0$;mP8z0VKn2Zkdr{|(Jo%g%0lSkhrxz3bI*V}L$l z5;g3uB@l&MLTxES?u9T}S?%ZL(LH_7z{KP|klF*;WmYww#WXb0)lJFB7-)y$ONzZX z>gee~L0xm1ovLeVLx^YL;5b;81<|&kfPh5t)@9)c!ku*FAJfv}GUh>g!)#>af&Rww zn915^^NGBCe!vrVnQV!{4_qs{A%2Jj$y`g}S%y41=6&6jsM&OZMyi`5AQncxSR~Fx`iRWftO?&9_}v zS&3=%`Fj^}VNMR_Tg;rtH*m-67yK(GM5Fh3QDVFfwzszd?sGL}_ErKtkUt#1Hp;yXCo5dw1 z$#2p^vSt<*Jia2gs!CiQgyZl@+-QAUYF{&#V(O6UYc@tzr>hFh`nf@IOBo&9_0Ex2TEd)Hnay&O_21$cBC@8Vp6KtOJ#N zFrz%ml}msnCrYHPf$HuY6>(+B-G#mi| zZEa^U4OYm4bohjzGn6nu!Q6axB)1=OqFGksRy@xTcEq+*sY_5HCK!0-_!{Pbj!b)6xC=%e8!ok@T zR)10%%gD(Y3~Iv0#)cjqqRxP6rfp@hCgeDMKSUnlBLRO-iORo_g^NYKXzOXt%gb9u zPO@Jw@s+K`ig19?u>rU9*;}wO3G%0c^o2V3`H(eg%?z{6z{r+ZV@fo+hqm^lo=lgR z#O<5u{#i<&Lnfuv9E3`M2w8D+S0!?&oP!2PP*5U37K)9Io(tz@5=aEd1c>+#VmLj4 zZUWHlCUboG$aP)8$-Z(%$T3yGrzUpDb!~jLL?=v|{fp}of>FzhJ8z~jnh|AM7zKkx zy0jbhnNudF_|#RqA8f-MYMnu{*{W9;7dbZPKRi*IVG^_gaN*&1GKPGX-&gzko{8Cj zRr)mnGhhH7@itX}#jU^ZmV>^=;n*ArA^wgSYBMa5DiF7}>#smYlBkG5H*Fk3&-q$R zT}f4yWW&llC5PR^efHw!JmWp<{-w`T;(O)$o#AyQBivI6*6K;PEcbA3>sngGBqVOL z;oXw^HuBo{REUY`C*A!-P6mGdVaW?qstzntQfkV~r7xM0Z&%2! zZ{olI@P%mgeWy5r?n!6}!tH<^v$8hhFEdK@rgD!nE2&WnsyLilb6zK@BVqxS=CrRDRIX93S8iOs{_Y*2vH8LL zy%kjzr0I0voFGE+g_Jx`-q-(t@O|k90A<(Dyt1-pyYZro42pydO^q<{6=Q*qZTfNu zhqFG(z1kL)?=!7EHml(w`i2!ZOI0vzMsvQp}iqJhDOkarKU3=QRp zcWJx~T2oUW7{9rmim9d)K9RTIzsyRYmlSjDC@$8y*Xp_G{c?Yz%jQnPkA9gTsP4SE z0iG{i(0^xl_f@1frPg!0iK(gK)LfaKw9c{F-rh0jOa1w zZ(Ic#JVhtY)F2|FsQyGD;CPV6YhyQ*iRyjG!V<~$(+z?Z67X?wPt>nA{m0G=En{Qj zhE7JV&apxfxcL47AR79lu>m&^`IcJ&v6r}zx(hE-NV-sU9_g^@aO&{rc!~Z>Yi??) z(p!Z1_VvL)v0=}jBaiOk!xvj4myiv6`z9BIN1X1quCIZWW@cv(dreoSs3lKc?#~$?N zy#FE_?Kr$wMRWY^U_BXM)Y{4lW1@fPl%9r0*kr$dVtD4j$uE%!9_>Bx0K2*OG7l&H(cB3vWJy8r}vKyKK2Mr_mfj#21MuR&d%QE`dwSM$cf%(Cbq zSJ?=(`xH2i(I7wG+R)a)@XLy|n0xv1<-|m-@nH|!eIffIYeN6P+s1pDF8ID1IkJ{0$(kBENM zIZRnvQk0|-l>#5>EqsY8KQmvJd|`~FfrW%JH$T6(vJx6O`TaZE-2193?(?&3Q0}wv zG5wa9^Fc#XvpF*_R=v?xcg7}3_ri5=vrVwz#i!~nWk`f7d#=+q)h zI(w`^xB3Jg)I63yj@Ov|Q~YhX=*7i7tgMFGZ}}xzSeiV-T3T9wvyZ&QrfyT%G56uc zxQFo@F!;f0PH!;%NVCXsp@|5kKk!FH*e~`2sm`G^-p#GieBf@wiV;S-TK&(Ld^c@E zw8x9fENw79P^|9mV!B58JxwUf&mZ4^{-{=GdNv|MBcipl((Dw%0U7B2s+3kO#Cr;# z8)pG^hmJVWV~ZgQb(QN5jQIt?lij7ilJG!T}E#g`v&;SKo4{&^)Vo z)$hh8XWTt%i5G7{HC7T~D;R#0czw;$vV=X~7MPCFVUF-tfoNksnceV$fPtAAhcr*F z#}1v3zpnRVo8Q@N-+R{9R-f0}PZAOmo;|xap?xJJ1bvZ-7mA9+&K94OL&H}J*8OIm zLpjZUYKCO>upV>A`7f^r7g5zd=9v2IOxPj32DO%W3Un>aH6h`~)yYgT-@eh&V?Y$n zd>`{^c3>bjTI@GTjF&C^mg8r(H$=50FAs|g-4vBr&UlgZY8`v?J{Fdy?o>ma*H;ca z6bbhN><$^yv`D5sxmRxWvRokEUyrs$w0|TsYo;(t(@b>rN2w0R~aGmg;4h& z3k$6tVbReTGBUe|?VDd3eJ7{ROg!7iT`&X(=6$gq(Cf9P zexGox4o)et#*O0#G;Sg&^2o9E#HVf5yYL9qmfw(BA>;CSvoW*m=%mmfz*lOOZR3k+gY|8>a|6P>=&vgJ zXj_^ZQ)vsIPOme^7(^>-`m(ZlGlp^0@60?sW8>q87n;${&xlmB#BRn_?_rXBm~Ud- zWBO>T0WyK`!QUwfGgGuD-YGe!hA=x)XS%5876r>^#jnuq{s^3O6F{2<-X7pv-VVldrA@OnHWe+`A|a{V7SPpI zzxv#J(h7Gib#+8wHg@ie+B;&QqvJVvo9C63D9Blp68)%zcHz`Lsndh7ws%E)U6bMB zF7NG8kdn$$alA&S(?d|ZdMI3}cZHCJMP3vHJYkhJvjiJk`LA`pckf965F*dOTn;uQ zxXt7g7YE-pP=)T)b#Yj>8hI3tIUCc;@`iq zv9aIi>(8t{IwY5F(9x%weXrvem=ZG0>m0GITaY8IF1 z%S|U@YH532Z2kn)VI1-zW%kFx-0lSea7W~K5G28?AOgMp+uP8-J_AU)_0Yt_N87gv zgwk5LozcmFO+73*1(_}@u*Ai6jctEv>r_`eU}Yuvy{~C!TmLBb5D=1!RVHtbWELnz zTS#BX8UZ4~hr~qpvzV6WV@6X+k7msMPEK06tuUk%g@VGYZq@- z2E-IT7Y1ou!TW@4^=3FYH~{b?ARuroE76%K0z4joRk}1M2hmUeo*I8U6mVVD+kCh) zJtK@oEkQynls`uz#5 z%fPGOZt%X^C<3FgnA`5zSruusnYXko)zmC&at!t38o{I9D@rqxmwGs&ZW) zGBeo|J~^U;gfXlqS?Aj1OU=~?4%?$gEAbC{T|2EUEg=tF1*2gX7W-MwIZ8Aw==U&m z9xyQK?MXFZSO9Twm9MsKk?51+09P^~0D+`}tFcLTBW;?(wmY_25`G-|HHWP3LO7 z5Hw)n_@3X}FXd2a&~u(Vwc~EuU7%Xs*?H!KI)jZHS6G<)HuJ~8fR?7=@!3=cC=4mi zeU1`(sbATFyDabq?H<-xz{L@*_$4+#-Xhl9H51(-JL~IwplsPJ&cMdz*p229;sH@y z42Hh~2ZxUBJ;=8kpK5@7L003peVefS5LYh^?BG&YR|k890?YOG=7V6bk&YhZ=ZLyiCV=dShyv{w z*H{JSF2S;lh|kQln9gN}9@5|aTJd1%u&kTo`4?zKaOr*WB2ax)ERv7LrJJS-4etcC z@Rf0k9+mVW8D$iUdq@*6*dXB~*adYU_Om#JdmlslJd__d#sNf+e z&#dQ#oI*rGQch03a+IxvPH6xa<&cJojQsuk9>f+gO24iz%n@hG1gHQ}2>?=9g&X4> z>(A#KXT{w^+)}q!R?dDfXJiL&6cj+2(9B$q(1nG4LJWv^^Wx&L@En6>S&(CTWFmkg z<`r8-{_q{t-T)^Hpg`y?x7vG3j+ z$?S`r6@UC_c4+)OPy5rykNDQO;pMW!%p2eXQ>F&VgzUC7U_zbp-c6K7{hrN?i?pEaD)-TP;5%q&!gWWQ2!QEed zw#nCj{Tj|_RFV!o9hE=rnIPd86l`vEGHA>2fZF1UJX-x8M#GePh~J=@O=Y(#2gfwt5?*}3(f*w5Zqcx}{`mwA(%7(1+Y zY-|nhg+hd>XcuxPn436K3kWXvi^^Y};X#NP;gK3opHk-F)7T%I(|7VlgXs5d(j1x^ zNa5T(Jm^2r0gIn=@IIJFcg0_(3YjfYKNWCUyk%m{W1nk>FQ=He6SMO)2E#*_$5&e` z>%1~0C|&yP7jZ+RmLG!~%`MhS8lx}~%n`C!S$3A%gH5DhO-&-S+4Q+JMuvupd6f7f zcMnk00Z5r<@eq;%l5OI9Bf#F_>^#A}hxnv2a2@#6!}bc`ZWcdk3jpPLtHnJ+k#yOG zgg`gl7B2G3sg@ZpF-cQNSvM`+FwxcKhKf$luDG<+ZuN<0KLLWd#FdgPoisaItu!+0 zJ|Pj&Bi3CtRx@(B_@A_9?_h#YD#ug|4b!Wt3d_siw_LL*DvD|_-;mQy2T6e)PXX7N zAY~@uBx5up4tqfxwoeHFUa;YnmX?;XK9T{q8w1ag^92Y>T6{$ue;>NLQ$PA^XN*AfyEB{R&bS$c+s4VsG2>gwuz$D}%e?Y9J6;s+;0$$tR+iOET? z(~UuKESqOEQ0!W(=~<8lGm_th=R?u)@7}!wG%v*z4q;(ovtt0Ye+iQf){~ciMPe39 z?zYO%12)JqXum)Vr5a&bXz?X5Y;JFF2cd_h(Q5l;M_8ta7ly2XiAj8zldY{($^-2H zutWLg>iiTt;X>0KQJPL+p78+E)CS6}4PfuLr>B_5#{JtcId4$7^Z&iKLd^nd;rE!7 z2+H3A645;-K%odD1arqA^raV}ZlXtu3CauBx7?y4P-ESs4q6rx@tgShb1dmm=m;qw zx=lBp`jC>kgkT5@tAaqxkB1Q#=ossL!1;sVL7dohQk$Da#nbT7GJ&&=ghJ)1gM>&r zTu;^`EYBZ?A%hPS0Wk*C#9w2A z=(R2s=i2Yh*U?ct6jqwmmw?Udo$FM)e0Zp;DyIN8ECar=xNjjqCm0w2FLiF?+4=bRfQ-zqwGG&6b#)?=pr9aMh2CcXaVPhX@bmK%5fPb_ z2uc-?OV_rx0wV4?6iNv62L=jVD9FeU*39V+nAXvdkkyrx2G-XZQ0?ms6k-FVO-)T5 z99S9ADvV4_)_4SJzy{d1He(@@E07vC)Yf*l<(|whEX2Qg14bXve2}Y`X;?x4X(;Wg z=Pnc(3Z*H?wXK^y^q@sr(KvWYBLmnJhlhuln3yvfzYuyXYvuCo01r_j$!T_tuAQd@ze_tPd&sm^r2%H2_aO{9iXU((f$k4 zo+R<&<(TrsYomF{?88%Xl+ftDbqKw-0V(ZEX0|xPLX89=fNgg_c&=r4zhH%IP4XDn zx_VfY;B|!05#ICr>y}ln98Ki=KsVnfBR`Uy89qDna(^b2l2~cNM)u&r_u=8q3a$I0 z^e<`69@P(YX(7p=u9&*I9-WHIO5*za`-9S^EuB^6=F2oG5D{J$9;qg#;lH*dR#i~g zIyfM1{(|r&q&xTaVPp7S-W=VVhq+(1g_SpVCFx+{>aE%R}L-V{x`9)t{sX+Iww&FGB7ktGo;s2%*@Pw6jm!WvS51dv(y@m z>3`^-#QtyK^%PY^lI#J~WKDT*AZ(vc7nKE%=$ znx^iv4i0YTUe!l9E>G=3VB6AX;GLytv}BMAk%hV?M!m3pdlgVtFJ+3^MN8QC4bu~& zm*Ltfmi6^_@n}_9S(NwBef2@$mqxTJ^!Uw-4*dxcLIUFBF;l$GABVtJBeV=!!X1|X z7}w_d`a`#`u=b^j7u9KCFL(QTmlz|%e|srK058*O4FCPRXncEpikpEUNC;TmV;LC` zM^4z+C45GH3VH|X?yrD9O=x4I0iglz{VuXNxk8Pz-@k9CHtCLCj!5u@A2R&s9SZ+? zSlr*I5GmO?J8)`PS$G=K2yyVy@Nw|a2=m9MAm7&uIbOG1KEj)sp@ z8vOO=v>#~;PdgW9PEAg2ji+}%4{bd?T}3!KUCo>=%{(|=rKItQ}`hPA^wSaL};_Cl43cnZd=wc_%%)k2^TX($UV@hTX!} z%FNTl*2>D0!`0IIzxU|RQI!Ae(Z8Om^&fg9Ajr)j$o;2B;GiQ8UjDltaSQxE4E9}z zT;1)QEv!6UoLyW!?VRlV%yPAs~m?JfJ zD-SR*e_DG)!ztqq1^_ht1e{6q@QDZ*CSfifZf-7NUI89Kb}oKqE-q$pnX-%J|KSQX zcNa@93oG}Bw{VCu#13Ksafi4-Jiw1N#1rxvLIY6)?|mRv;Pbn8b`a;kKDP#cxr6_! zf-9~6+-U{=;)Mu6`2QBr<^S(y|Ml!F!GBfMKQ^FhZm;R-#Hm8V&&36fXacH5!zaYe z^S?Q*NswQN@89?23>uk4S^v-$r0XR@?tB%(Y5ddn@ENaWu!}Z(n$&7jx20fB+Ds+3 zc>gr-&7e|q>q2H$)(?(;rYFcrjw2)HgvU>x;86#B&wokd>w3DJPL3N(b3vniB%S>= z@$7m>^z5u5Nz-dLDF3aMe|z)!)y}+!O6I9(@s8Fcmb01+UpV!v5>%&a+{v76o`Oyx z(QdcO=%5u;(IVQW&J|{FSMK{yu@)O?i_9jV{X(%9PalL8ZY10mwo56GzukMw7nmx| zTKH;xdz=74(AMc2EefoFb6kM}876@>O9qp`p3S9n+dw44b6FfWvO*QNxBtB5!hjvM znA4@uK!o=iTQ#rUvgDEY=)+pIP!2hmaZjhMj1OO6A#IgFICnmRWO!9~64OqvUEZ_c z-W5DQir8=Te50i72PK`z;l)Yyd4<&EO$42S;?oRj0+D<|t9W*m48%s=on)1EuF@j( zxu_15K}tLIjlz%SL#gde#S3T(4REjwWCK@@E$ANPAe&D`O2y)#3W&_Rg)6It)OjK# zV0mToEWZ`uWF5QL^gvozqXJ*^!xQ&=eoSZogRgG@&LnEPoH!Fs>||ndlDx4ov2EMQ z#I|kQwr$&QY}?7^`?q#$ck8dMI(?hB`{}OR-52M14m+&!C}_r?mKp!M?bH@Wp6w~N z79=80-jw^?1w1+u3w?`&`b2%&;ww;<@k-1`PSczGG^y7&DhIg;`NC8G;0 zv)J(9xTEHkLpJveZI(Ls=`@s16UU=IEOzWs1#(hU_+Y4LxLPqU7V=A&U|o62p^AB> z{Ic7@=WNFwxj`|D64oei9TD%evc+^C6FVmZMz*Qa*nY**^R_t~n})7>BvvQq(hHH; zbm&A~*rTJ@vi36i!~NGMpjt88BgfWre-&+MZhKQ6B!vtPgEzw5LcvMI^qX8yQ%rXv z??nS{aQYx*l+8v$^T@T~W^@C%$=Nt(85aZ@h1jrUefF6QT^W1XN4{%q;q}7s!A&&fxBR_2+>R`;l4xG{5B^kBT8 za>8doDu(x?K;AI}Ai?rkLNzCU=!rcnptl%-ctCd9=wU#Ln3dkd_{LWRJp0#cK;c09 z{6T~(e&g6P5^;jj#wzo5i=p*58gSTgWR1fx+^+w58x#?^&#=m=*@V0u0=8s?_^*Zl zjKGo1=p0Jf?=9kAK#XHzEz%MyNE9&*xgQW(Cp5%LPbeHbXTZUig07bX?2=R!<#!KQ zgVfmdPd7gH=r2G>0Wj_zl1_RSlh!95KPuJkpAh};JU71z+=)@m646Rf?w$tWoa@l% z`jEJQR8(_&-N~g?`wHRb(GuuR4c$poF@q#^q#2?K+37gm_{(IMQ!#;ZkVWE$ti>N1 zE)Y%>x+?irBfU6Ve%eH4G#xZ0&{RI!;P~l;7(mO?;o~sFMl7{lyK>*Qs0!*~IOIsL zU>~v7AHw4Cy;vPR(_mUehoCy6!7+xv_KBFlu=0qcRG%Fc7oyR<@If+`j2Fo0N9AIB z;!1j^^XJ(e_rl!9`lj>A*Xw)y*+6eVe{lRJ>VGm2qJ!X#{)yW&O(~DU{p>1WY_N0m zlMrH>uw&}pXDo?pqVEYMFDOukyodP`WuKD-9;+c%zh`MrH4w@L?EV6pD;OhQ1LVv>0Q4D3Ktr{|K8MRVq4=|jc+Jfe z`CpaDJx_;B;5+jM>>g7+$+eveWeU0bBjp!-gOT8;F?G!t?V4%Sf) zlW@V8NJoj@k~K^!XZ%_rVj2S1*7p-KO^>naJsZu0uj3hj417n7g$_kV#tV~PSRx!! z(@j(eAv_i>{xul$M)Z8c?&3kN9Z95JsiQ3{v0t`db6w!KF9leA?_~Lm&E(f`D0r|b(E)~ zPy?8;c$~o$#52;GRL_yDMXN(mfrG4<#X{F4}Sygj2* z|IU3U_mTeDP~ysvE_A{&_g5)<6pj$Qh&}FH!TLqL+(P22MgG~r8wA&P+q2!*cYL7p zXnE%;*+K3j5V-GLvZowB>M6z!n{t9z_O_2euzdWpMZf=9_CgS39M7&o+Pm9FNNVZ+ zIwC@vP&$z{Bv4z?H6YI%TEZLsS3Xe)pGcz50#UuhA49!Qe|Y+*PN?)|{F;zD5qYCS zEeCt2%}j&cLrP#|{Ikj7z!>=osPEz4xd^@cF;nx%zsr$Kk6^O>dk}o@G)kA<_}{25 za8@L@V8WvoaBoM%u53_8n62Ukq$iO9$6L$ph|2;DLx0pw+99QoMeuyb!e2*>b@J>} z@iO|Bf+{4rDj~fg;M#-)J{azq2D}epq5*O}V%7Eoyn31UMHwu_?^nK;mR8_(KAeh{AD!4P!`=EEM~%H%jK% z!9zGujzOQb-(L@Wdj2KSC+KxBaj@+4TKw_oid>L|tVbC8@#9mbIc_^@VIKz@2oF^e1{rV_Lgl=$FVE)#o`#1}V(TpBhlu>MZ&*${|w=dsAR z4g^J@A=-ukUV`1CjrbrT8qw9580O+X-{vA}d1OH@`D0gUcDUW7esDg?pkwH`T)5cX zoSuw+XcTwwVdZx{Ai|+^d1UppwPkl_l~!2Z_|k zzXL2r_~nSrcua3nM*kLvi=k?MOg!Cm26;)i0W7w^z94c5fxgWz8Y|T732q*IY z8ZR2O0}zF#VU*D0P88qf+l$-#!^zu&oe>@puV~(SLQ$RS5iJu@mPbcTo@A~N?$bLr z-6dGxAuSc**&;J7-%mFOFHdYAF7@r&3=KAXBF(dlRz`<5=jZo|>9jRZ_xf%u+!tgp zqNH1b`yja%FFL@mk{$;#N5>5H(n!wa2*ocL6jqC~2|C)JH~pKF*ZxL}n?JK7FzJ_^ z7K59(9U49*Ld2u^FX0X?>>Qje+kDc*qwIl<(MDQ}GDUaw`8d9uywqb;x}Ez5uGCyL zhTL+)3!C=&>Pi~8MR`68u6aqc&p2xT3g6X}!db=49!qy9mvQ0bhVXdCy zKSb)cOdoN$Z%srute>N+$Y?1vt<5Jzf*ceRn^UunOFr(iuv4T5ZPH!w%3ReJ zgW88A4Wtzg7NJA@Y_s{euo;=mQ)+2tShVv#xoCY#({RToD>W0@QPS6dv8q^!BWxz= z28@d9WgX5DLn-5thRseY^_|2__?ehk304l0L9QQEVBA3#D5kEsra^s(79|?%hwzr`Kap3*%f} ziT?9&W69m^cQ~i>r_`qor`Yoea$Akub9OlncpqyF(kjl9%V%2M6ZRvsLyeu|-0DPU zrw;gd|A>Yfvx4-0oS0GMS7#ffR#;(;w^8D4b~LlO*0{HMJC4W!$3hR`fMMMpH)=%` zW@w{4rnw11x5q(m;7eFr5m1qLirJ|8=|Xc%bcsi_xE6nZSldF4Ad8LN;GzgZ zL-|U9K&iXXy2QkRhOPYt>mkA|>HON^^nl*;!&W0x>ywUGYKq*1E7I~^SoK-FoJo2j z=UEI}vDEX`UE>1{YpJ`$CYJG?RUx_7;E&XJ38Uz|^TMqEm!3J3{zOjG8!V!iXs5@4 z)dTK{gXjnUY)N&F$hx$sNMx*>f#Cusdpou(8$0A@MwWs_o*1D*wYKCiCcU6&$}F)2 z1&3w^uw`^DJ~lu7@PRxfDgu4ev5Mw$bCVm$+fmQDxND?yb#x(CMC^V8*O?fKPVav8 zj{m%V-WYt!yO>?htKgjrYdPJpvvo#g%?Roqb^#NOV&~-FB$!F)P>Wcb0K9ftO$qJh(o{;ny!xN56KEkVdbbHV#KIs+UYh4*=^RqT@Rw z`pVrjg^%DNhww9(k5rTkm(!-s6sXJnEToR`_M*$C&d^jgwb=I9OXZ?uMp>P-t5Z&t z;A;!-6ti_z7xSq&s+ZxMmMOA>ycR%WmdZO#7V`J<58~|R#Nkr*` zv$U~e7&|svL|zi)Cu8OP7D@0DZ;nE^?6y~Bdit$@fZJ8&;X@f(vM5t}%r$McznYjP1eM;en92p4H*A$dHVa}>dZJXMcoVNJj*&89+*O;w$TZHR3v6n7 zUwAirp1&>LlbOyY_U6HUwZ~h%Ux@Yw&!HW|)OCl_b8n`{z2apfved}y?Uc|!{3p}Y z6b#i4^@Q>Ah0qjMD2zp9=}=;P8c1p2?r`id6VBA)sG#m}K(RLtSM)8tj-}H^S&2&H zKMofKvn!0H*+$t3U=_mBH@;TV7fc{D?kX~jNX@5&>{PtT`I&E=QWL?!dQG06Vub1t zp&um|QSNp)TjqAYB@85RO%P|YZ2_9WiuYLK;Q`l3Q`T17E1RFJ)04wUNw9utI?#c= zg_!5;yF-~F(vnLuWjMz7+Y1zt^os!sSt2Bu@+Nf4bT#9U3e)Gxx(#Vk^NZ@MKX-Aa z|FB@mkhyKu1MDYfZ2t=`kJev*8!eiyh}NUKlADqZ-n;8fzl$csPsFC+O_irBt+~Oe zhB#kOOBtRBB_S4rE0>!cry=p6Zfm!rk(|Vcf13qBoaM$iMhi$2(i#g=e`P@FomV`e zMO4$$4*_bd=AsFoJ&xs%iIFNUdrd^#<+H#Tx`6FEV%X-~E%U`JKeNUS$CD)G^s3Fd zTPiOUH_=pP!&YZUdp84if!5D~CF)^kWkFP0`cptI`+>zNr<3HdmgHNE#ppW7>u6{UrK48QC7+_c(kQiq5)f=Tn%*D$YIh!UvUu~hvh2rV%ss_(s6?Hw{Bc2!5!JrjlySs!-4~v4z|)m)wm#3E=yV~y0DCPhVPd0D)% zKO5fF@c301)BBNcGiVUZz-!Zz@#x^IZ91DC7M3i+H{&Y-Q$|%nTSXI1+6E0}07Q3} z1Y&y6%i?C%vDjsvr+7E$Uk8EH4s04Ac^$j0*$ZV``@&NWE^a%op&Xn#^hF0~gYikf zR?S!2STq1v&Gj8yQ>Ui^g^Qi>Ris&$1edhhon1xYQcRHLopxKt=Ygd z_;KolH!&kpH1)jrY5j2GBlkYv4ro{TD_>ea+F;m!ONxK!EQ}2`$-~`YdxfZ0J<=Cn zql7Hn^2ZTVL*U3Z@0X;6Amgp&&&Y`Ay|V_FQeT1g{N{wMNIAAzOZNYTrL(%F`z4SO zu1D#psOj)TPK{|ggsTkT<{I#IMgMDRbG}kZi*H;ajeNEGKi)Ihd0h{mnY?oJWjNl? zY7Zw$&f^yOZ=*b@i~B~i4lSC(EUre5yrmAAV|NETEt1UOw(Y@^YOgGedd+9N$nItdLY6=@yc(E+rV+dR6o z&Vj9=qdyj<9Ia$Y=)Et<8<`kgrH*j37Iw(J&&$86rmR0p-7XcjROr$Bh_qGLX-L)R z?$!S%p8lrqsAWh+@@qSrwF5jpx3$hgsN7=hiqcoc()Jre9a_{uk~-AwAZ4g6;zY6k zxX5SA>Zq$PEM9Fq=L;x54m~QXonaE<5<@K`RvO@(l@haBmUSgAy=uFTpOD|aik_{5 zE}fMwJ^hv48aMkeXL+BupMNdcc#^ie)uyDS?`Y_9)eNe%@ogxFPm|g?OC2q^R${LG zj!LQ_?1JUJ{=lq3U%s{o>`uBi@O)+`eODy_uVAGbf5Tj%koZDXHllRfi>7l+=$~9c z02pjS4@GgaHFbH4%Ta4(@e&$MsMpzR#|KuZg+vMj;jKVG>Xw6rGi=$x_;yrSSj z-8t~VPHTs;^c>EDuM&Occ$9|G3Eo^uU$sywNUryp*_hWqOTRpSDQXY8|Ms?=p99!B zgYvqdfoPbVl=RhA%mDvdhVdLImUtqRR$hhGbCbuey$-SAkYBfWVU}L-L@r?L9NbrW z{w}RthP0ZFEokaOvR8k0n|5C2{MJ8|W>VlpUUmjwTR;i@ep2Lt)yo`KPg6XdVcGVO zYru7rGxs;kQ?|YFCBT!E#MWOga@&CLZOhEUe3)@$v1QM&LL<*H&YzlZ&*H^pOFqd| zlBe^F)1?jNT2Xa8TAu0oXTT!>`p#g%QX}BC^;Lwom_wk>LUD&Iu=>g42`RD^n%+3C z%JX;rJp6;Rkf92%o}KzL)o0tKdIH^z0uW-i!AfFl#(TKS^3Z|!1ShfOC$3&H&5{~N zS`^#cwa{IENQvT(|xVO+<-&iJE2YJLs(zy)EY9m!!(Jg!n@uTye7es?1K zoVojq8;%KacP;k}Zy+P1O}b&lDf85Q(PXQ!h)AQLX6i+6)^hlMrC2OIUO$MSYI#Z zI_j_l*LtW6t?zv9`hy%~Ji-Ab?>aw+3aCQ3*n|o*?r3WeD0hfr6)1O88f(1_-B!Br zbhdC`X$$?@P%m#EUk@Q;FHgUZ(3k9$BewbcHnE9$VPxI;utupYM+De4FH=UbI)dYT zcunG865D(u`3SVn8@D?c`=8)eKM1=UjLT0PC@CN&eND!c_i6Rf>&^888GH+y#Apol zo34z&a(#=lP#MzfWIA-CGN{tVazJ@NQmKw;ZB1k(wrR4z7)vSt-YsJ^u}Ez&-?t1l zv@w4Cf17WoFy^>SmO4_FrQ@fl#=qAnf|HXQv^5tpGPqQuwW*Q6~VyL2ZjJp@^l@IQKvfOB`YkYT4kW9&?dr%>^h*Cv(70gii&0&<^p~)JWqzP z^x*;p#Pg#;vb*fNkghU>K8}nmk2`d=ODpmz0t>I!=~cc3qhprwa#xqPioWl!^Qird z*Wml&e>=@YjCJVmUFMNeQzs(>sxi7foCAr_b_OFBnso$^&Z=|oxWA`|NagS6#a;Er z;^=1dq%VHDPM>%8U>|~brOQc%V*xTIC!P9SH6 z-)XOCg@LmIXt#AMi`qJPeH^Fbnss+SF60GGz5Sy2G{XHs%XqKjK z%U71EQg46a#B=vf4T}1>O%wL>&eiVXo`}0prfApXvzart5xL&kMLv~{iTXa<4Y8hk zD{aYc$!{rp7k|ok=6|*yD?-w3;bS^?TvQacySdg#U@#d*Zmv$eO3WSza=(UVcB6f) zO_z_K+7=$>-~PY~ZzH&_F&Gu~n9zT&h>6=VfGqv4SKcBEJs^*I33~E(+7$WgfkinBf1Gk)sr8*mMj16dEzg^)FB*6r|n%mPo+@_}^Mk|BDUr-;D&bGqU}EMuOScSpFaV z%0*A92qQ6NZM{g7ArT*p5IyuHx;!c6KkTV?#Fw2&vsvaA#%X%GW#wh%FlqKiPG?Go zX1c=8f8+(l;D~kguzr%LGDuuqPxm$bLJnta0*v6$q{goNc4e$9 z|L}auY)MVTv$NTl(B2k)3UA{jm(E~2;w0Z4XjRitDamVUx^64cQcGa@JWxoMsW82Q~**+D^9iQEogL?U-|gyLW+l+Jwnda)nHd z$@|t-&zhPum*p+6nC5rSCBS1>QIscziGty>y3|)#L_$7WGH;oO&6UBZ@U6PB@~t)^ z`qu$iu%&X9Mm5mk>%uS`SFf>jOtlhC+2B<7QtO1k`mMyZj>~#0k(o*&`N0It_=^9z zl=uEhM}~(glL}q#+(mQT!Ey8j!8Ag}X2EvJ2XRsmaEW9MoW*WtE-0g|P6%X-k;&K6 zW7`+eVE%m};x7#!Yu^w=YzhZ7UkU8hK(AlO!7(6g5_#)myS2PdGQq&~F8rN7q-fgE zEGfr7(Zg?6o=YgqE#YHd#p3L%EW>JQmRca@EryOarMw!E^c=1P;QIjeSvaJ87 zS&3rg(Vt34zuL6Z@bSo8<(pLH@|@pN{jD~9#XB?t4O0C-gX~#5%6e_)*}{TU*7WXn zg8GMWl0NwHu*YlVhAk)8UHY3;ZqP-~7#1TTXrbawpR>uQYe>sS5RKQUZs9H(j3$@U z`^-&24on+~Bim|%-UXs+NUlLq8qfwoLs!z~emEpyi=s71xx<3fss-9>K;a2Zo2sgl z9=mh}9%bZV`-7=a0sZWfwxg2Hn8})abJEf6wOp&`kv{6+|>M^!YWN;*o+(-etx4RQU4NV3dan6&`|9 zG_gtOHYj4}H@0X~&3C~t%cF7nS$%sn-BimX5m)ZCD{{jwf@IaY_w(SoGN1P=WCbPk zYViw#T+6WrRn}$N-piTG2X!U<;Jt@Lp&2hD=?tQ!sHb(Uw1((E3IRuF4M!(SHdjqT zPco+VqrO>IKJV9ir7gmkvwntjn}^9itT%PlXQeocJB8MuXgp-Q)>+lh8zf2Q6O#>! z0^vl}pw)a+CZEl#RT}|W3e2*c@vyAWnINKRk;{RQ}so~ zb@#Yg zfEEv-1vC5AVT(9d^vK6ELn7MQd@r0}VHS#5Rj^l_5%L{XxZSSCdEiHo;+p9XCc6ee z4J==t6sH@HXgZ|aTtCh;Igk{eP2q+PEF{cFIQ<`5!}x5`j;2fG^sEf$S{HEx+pcf^ z1|DPIEHLA+y@!Qs*dhx#FB9y21(Y6tDG)s$wF5pyn4pJuBy8;jzEGt60ZruCbTjMq z$n+))DV12nMWBYb>lr=7Y&7fEvBBCA_S5tR=pfW!SOW4ID8w8i+kAu7mme3aK8bz+ zTKPk$VHRhX&}igO41e=q=-?i7gN~qO^N!&XLF#XgKZ1HCtPk$YuBHI>FW1HMNXC@m z^VT3eEXLVhIz8X*e*UF2-C%SeEfy*JeI*EV5D>C5EI;5PKhOksMnEuhHuykmD1K_m z$|V>`lTDnbn>Vw&A+IoF-!a6)b(C>K7xWXMr*bI*2~#i+0ydu@Fu90Koo9G^fsson z=dY*=++tfgNUAZ2gN#JnPHy}rjAj*jQ9gR zYg7L70M1C&^jdIeVpcjVrJ6H4g&fMyVZp2Q^pJMJLMt5Ox7vg;{6W zo^KuslqI5IMnE$fy`*P}Kh<`-g-rJvhBnYomU}y9m40%0_BWT*nw)f25hASu+(--D zZis14irT$Ie&EUr?(20C{01MQpi%f)A@<#|*=rUxm(2WAL@oE^W_1~gLY&1c z3!E~z4VEK}qUaRGWw=rU#28mcmskk?t` zTEfm#F=*w0o)%}N_9*`WytnHMhhs1WgwLE;$XJ|VShtWe3c_g!0<`rzYzziy(b?VI zUxK>f8F@Ae$aN<*x>pHkul&qH(%f&R?*0has-SNEggCl+MvCK&BrpDYKS)k`D3O`5 z^lU35e?{@5sQ8L3TN#$OTwAXBDBrM%{4!O(uy4@Ht1n+dq>LCaHOERjp16j8wi`5F z7akq7SZ(O^rGK0#KaNaZr${f?5P6DvqD0aJSDUZ6T(*2oq9HIP=945!b0j1+P@1PR zu}h?}U1%w5RW_y>O(f;eg)VIwt8&EIbjZ}QV^?4T<*!#8d@@xoCsSExAqHQpYHp>M zwRiF{p4S$(QNm%JQ_$xJhDDrRhB2tl8RV=ssLX0qVvhV0l+tRNs|SYq|{P3EmOl!u2oT5*PjIj$c^{|^1pcu`5DmBy8p<#WB#Nv|MmND44zsRb( z4gd*ls*f6}Gb?P9Bxx5KV7!WK(V~)zl|(6tl6E2eOSVtfvmePo04@pPLh0W4>>o^S ziz!pofe_9(DI2(Oo7KHkMK4EUZmqe?Vly6K14^!vw#RnHk_84+9~Ue$PS%%f89I!@ zk0MqQnMv2PNNcoqbuLg-5;o2>45H*|Y# z>i(kOJ|CQ}v!RVI^=K_kKucfM$-}h2t6$6rrHwZ(A2U2lfp8~7KShOQ?HIAXyyOk*0ybFcPer~j-CC%0 zm;XeRN9aGjiWfcWH1Ivf~AcNHLZ zd$ao38NWY&dgieBbP}xMCF`lwS=^PYy_Ko?vUS=!DN}Cs@Ym#op!mI#r|>;>EUg$4SVWEW(JQtdVy{q?Vx08_*-$&v(OZ}E+r-1R5EO-Zi&hcS>DFE|%srB%= zw1ilitw0aKFZ2P8a@6Vfm)fVw0X%lekDtGwu_qzb@3~e&@80gnNb%}eYFhI-Vm96J zF9Wk%JS4h;NqH~*XM~%QmXZyw1^>oNTA2<4w?eFMRo~6(eoWH@te`r3(7x;gMQ@0$jl1{N@s5Wi8SlEQ|EDvS{kOmAOztN|Nl z;ayul(@c(bPqz$vbKZhZ*u5l6mp^Luz%8^|<@^%|u1jS3kiMi+&SX6Hn6l9$x!N$^ z(E2(i26G$Io@^;E`=RwvIHmE@;$Qm_W&@$3g{@*01yXS5(67(^_9;N@vId={{?pph zEj2z9>`frJ+)m}})s7_-%t8~PswiGP66cpG2iS-MV(XaGL3NdfcMKnwfbB=w!CcCz zz2aD*lDRCXS>o7~6^O#o+jxAAxguA?DA}rSnVg+YrWB>m_uTpDjeilgWbDv1e#(S} z2TIWsTQRtZDl_OiqM$4b)#X#2SYyuGMI9!ZvsahS9ck`BzTs%?fvOL0JH&Ine_6re z;9q}8@T7d4yH(0;40?!O5r2?!H~z)4`(DsQkg`6L_YW`MHAnm@W0OR6+08U4B%zOP zrOq~^`JbfaOpf9y-09G997f}s+pnPy-mRZguNG&VPpo8j!;%BdQhw)*MZ&~b8jt#E z7o#|dSeRsjZ1K8dBtff8fcnkDC^Gj!!>)SmbAl=iTb{E);-=pe?^l?+yCyPi%+dl} z-0><0A;rV5!2|J+bYQ9^%6N*L6Nb92ykaSA{wT=Wdczzv?}9nlH7pTl`I!yO)m|qi z)i&ClZz69zY>!c`E-?>0MEJ({1H2z0nNg>{WctT|Zl=veSNN0Uqj3LUmMSVCbZcE1HW zJMCJqXNDd>`zMgFQkB{V>;Zl`jZVD$-=32)@ zml?zL*iW{rjzEOAU&n)Z{VrX*&}sC2(tiL5|1Kf&?5HPe0KH90L5-E|njhk*X5e=b zD}_kynnvAg>f;XUM@x4QG&W5E#xq3Thu4+6x3l`}>srYM?IPbCkYbu?a+>M=*B*=D z`jfUZK^xD*VTYJ8anG1(>-~$Y$?{$ zMd6K{&VFeFv$edQX7eW!8V3YY9#;zUKOQ-va#jbFOY^5LPS|3iwl7{yKug(zjN?!9 z9PyxLk+@39mw6o=&Sbe)KNl{xl&j0!hfPK!qVi8>xWX2d^&8{v8@_Q;P+WN@^AD1{ z1H~LWBGel0V19!aE?}1e;MW?N)XYcM#>q@FsE#W^8=_QPnLm|z!DNdX>}$y}la^_V zJeSf^Yl6aQWJG4&KF);wonyzT_y;^0kzi41Yrq>VlN~lcMsz%+awjPo=$}z@z zg65Q*Fd=*FUf+3eZ+o`8w?tbX(R^1HHjXXK#aa&FDe@y#6?hTFcplb80DH; z7p-jy{>f_ZLoCklWT-4BvOP4ve9R5Rf?ef!DRBsnqa}B@)Dz!Vb2bBmT~ZH|)aihE zY(7X30QvSI_y&ofMW;{t0*{*5oyIrlZweO#)F=ZQUnC@75gdt)B4UtbP-NW$y(r!_ah71+gawbOT(T5gC_m!iyD*nH)-9W&LV^b4oF57?+%Gxno zn7@V!GETOh7%uxLMw_E>(L7dgu4Wta(#aLv&97JIqAmN)ANAHVZ`6uu&!Zb9tIo&RfVR-X}rT>eJk8Aj$* z3BKdW+}sY9WU!lH6XlVBW?SA$>IQ8`u_e;}GK?Jq`WFl_ZfiA#+eoTK(>CbL5*o zoMp7#p<-#OJWWzPskUDWsSyHJ$#f%l8DoyLmpNj8>9z`Gezu%*OgAoDOCplF_Y&;} za#eM&%`0(77-k?lot#RmEQ0Av;wb#XzB>u7=6YE=iiRh;bPHT;XI5 zZo$FlZ<@y41y)>g51f)b{0DQTDk>(p=1wP|{k^V|M`KWv{K&X{`WbH<@nRO$+--Pq z!#Bvq9_l;qDjj*`+0vfH4yreEm?}?MKkefvoxX7*Sz2y2#aukuIgVOf#BOyW^@-qh zlaGN9(O$M|#AGm1x~bt^29&#UDAqf`p@&+t!$^{9_b5h4*!?tzOLlmOdp-KDm|2ty zbYLPV-uR&|Cq9v(3el9rDrFjyXthZ%I<21a2#cw_AyA38%{c!;JY@)T>`2dC(R8;u z+vR<${k_p*Fhcgvz{%shwekqyWluX3;U4H~g+(djbaJb>jMnxj6?1_k_XVY_*;cn0 zSwb0@&E-?P(Z$gJg)z*E=WId@t@Bc@!JBuePyCeK7|T}mlw8FjcG2p1QZ`Ph<#aN) zs25vokV%+c1Nw0yzqlg~x$66~yz|#Cd~uzrsL(J(UG=1R0BEV^O%~W(d>PZZ>4m?k7V!su9NDK~Erb)xzgP|8-?NmG(`HjP;qm zn&$5E+Lu^y%?(U&FV?StSda90k93LTZjS7;FzJ-#clpvN_DHa+AH_Tu6`t1L?l8Cm zv+U_pV?xo9I|YOpKLI43Cvz-SbnQSu#wNugth!TCc26uwQ(P_9*YV$~iWJrM*IW9@ z#dlQG0T>3z)Z3GUQi40$^%vm2t#g~I0Q)lROO#u2-pyPD|5V)Zm8b_lI&5@vOG&q_ zquv?>H3y85!3dGi&rFoz8s|0ChOY^C+*;-Q>@;ww)k%aE@Ao->VGBn>GFv*s%ZbX}c`@8h)9kRu?Bo=BI?x<`I*8f~Y#BcH zAe#QPkVc4kp|$gl?K1NhM$AuirT=Pcl+NYEg9~B4nr_ebozdzO;xOn`N;y9l`Cbyw zM6${QvT&8@3JZ!7;?m^B&BZB8DUCwq#?(&ar^J)(f7GM)CL;ey-0hHf5eY))Hv>VY z^|08{2;Xk-AX6VV$!nZwPjX$BXDKZ-mzP*9NXkf0x-nJ5He1bKVB*mV*2yInhF5Po zZLnN4d--lx?WjD*tj^jq2#otG8uVR-b;Sw=BY91jrzhG0PgOp(B%DlW-xvC1Xg|iY zw@ts0Z%zU4c5trR`bwoWh)Q7>Thp2!V`_|IJtMKtw(lerI{q(%lU(3xCw08O8H=p< zC4lii-KBdr#>FSKug}q^UnRV9cTZjiXW22HK=4UmMCUR&tA8BWcica3CSh+#F~y%% zcguS7HU!@4)jZ^l^t=vz%daKGwGcdJ&nZkrZ(}$(@2wD@ynS4i!|}ToFtrlYjO|rt;-qp3U6kW=;radpQZUeJ=5);Fr8mZ)tFSymSklg}xF$ zyr~9#k?+v@nhSQE-~;WOIUkGo{4(7$Q;=m@2E-A0lw03Wc!6Fn`_#el!MDO2Oumk* zsG;Cj+C@D&FTjT_q`aR!IM}%XwMx-d(a#eP;z~!NcdOr>C@AV_Ju`%d9%-d+1MKBr zrJtxV*Tbig7SniV6cS1@A@sgeV7OV5`rZNkNN)t3^Mj8&=a*u!q}QGyx%lr>pS!oo zWZ|p-YIjd`+1)MFX_Dd(ENe5Y=)>IKf(ISzJiAphOX!;DcKc)qE(!2FPVAm&mr%nH z8N7imBIsD4dLVRuMi5d6V<<%we~nA_fb_~98`Td3**|TT_SFaY-v}vEO9Q?}$c8%a zSG^Uybvke8S-T$8pF{(YXS`YQQMS2*hItjeb3Y?r!F+t!$TNTJSmtS9aH?V`s8(PA zih}D;h&&cV$Qjt%5gE%Lc8l@le!rWeeeF&^d~ka;mb_s)fNF`39F0A7f8_%{Kho>2 z@=$bB=+lp)yT?2}TixsKOhVkFzrr%bawJ$M2WR_(?(FN?^ikc# z_Luw)X3yzgZEwJF*U6I>vQLAQs0-fiSI+CKE$2Iw)AiH#>xwTW?v{^)Pv!;jtonFc z{Iib3`qvbw%ulE-5cjJ9f#|#V&caV^HaQPz{bGH=JI0l9f1PYRD=keWnHw37lB>7Z z#?39IHvliv?aI;l-l}@pX~&~O$3oSV1>0uQhbw%LKC1i?OKPIbFa0By$YO%7R}1qe z|10R$=w5*mvMbqKL0?%X?KcBFmzR*xWMWJ0s2W%H zzC)w@m2r{UWPb*2R{T!O61P>^hq~NTBf-Vs51UopM|-#tiNNk;7;k#cC*V)+)sRQ~ zh`#ZmQs<*hVNz zT+UXRM|g;5t*dauOQnCT|Dxs&4@C;bmJSzl5&vz-x3foBO9xks;K#FC(uPGc1atGZR5+RzI4&}7yF@p(dNaVs^!I?$}7h{3AQl)rMY#8 zmeg)N4|SJiUe|{Mg+5LcUB6J~$35fArnUWT-7CA~<&eESXnVa@qWj(706*=NAuhFjsSBp?+j$I;n3? zN|Q$=RM_i1?VN+Bu-N^AY6a$jZpa*Q+Wz^fw-yiS>t*)g_e zd$7Gd4(Qn3=A8(ahEi;CeGD;w>Z@#2*bU3~h79*SP5b~P5lG8$jFUw@VNDbeQ*Us& zn?9`VE@pd)sN1zn>()&FsQaS0(}?dk&|5_XBHqpwA9JY88UXFuJ}T^26JW9QXYWtE zAG58sX`H1)s4oMUTuu}m8!yOZX{Gx5)j6e-ta z%f9JLinRoP%wAJxH}b%P)rPh^PD&Dd;x6(l?^g`(@b4z?`_|c)_;Jy;gDks%izN^X3_Tfkd#y!!x+8kitDO*7`6HdC<%*I`JAs?hT&yML>UN zgZbYwU>F(L7=JVT{=ei-{~HPn%O4iN|Aztt_>Lm+e+NUT)RKxQ8kWP)!zBXWh28)P zlB~<`UR<8)po-cvbZuuxc8-wXk7Hj=eY$kkFS)SN@H1i{_+t9ffIyKT*pmOvuos}f zK=^G34N9lU#vj#6lAyeBoBE-qsuLQ=l^q96Umu-3^v7HND}rh&fU< zy~C&dG_knVR0I9&r^U~xzSZ2jG4@JHm-y|6>P0IQ?ZvG@EWk1i6Bq!{l#&?1@;AB* zNFrRVkHlqfm=kbG(MZ$~?y`anq_`auNVvATh8_G(JNy{wlKdrx1$IwpFL4h`BSUlQ zs;XGeg@JAko++r6r%`yl@3Jyok3AbcwV#aXBG8KFavJ9$A`wvzX%$MzM~#kFpX|Jc z3@dpQdXPwyZAXB%;`7|{IDSgj%rpGUN8nv;8{KxFn3e`J?bad``8C8$Os+tvyMV-D z|7HFB;E%`x$JofNxe&D-!K8#upoSC=ti(TvI9F)3UuW?;_8~?P^2YJ`Xvw&lTq-T3 zaSGa)bTbLF6DZJ9YK#&e-$?Yk}p@LZie%GCWV69SK} zvR0dU)TXwcU%^E0VzAIU)nR9?WdOiO-2$7*1;&QOBps&j62$X(3Dx96OHV)kr=FA} zk~TM3f|LW6S4dNPxGivCi^QDgdY?TEc60lPMI2)#S*P}vev&o!P00 zVi-dBlR~1ujBs*1i6tHNx)N5X^*kfJ=EXFF2?=4)vWC&X%G9K8s3VE0cF4G(BAJ^7 zW{0_rkZyLd#eVk_nZS~5f_6LUqW1#~gx#G;WQo=DGoDSzz4C#s6W}as*Zaiau5(v~ zuGcJo+*IqGHGRS*!XBtY!cmqzZgODa6=@ELDTv{rSxencN=~^XqifWV8&rHlkcB0?ST_X($m^Smw7Y1N>s*k76;w4P|tt2y# zVeOD5BTG>OdrGcEKzGS?m)=#8;twgGmQv2Ovs6bNb#5{!=4X3BqBXzqynHnX&J?XZ5VkC4i+H;SRfYton+|aRj^Ox-0`H`Jw8%aG7g@g zBL*#$pa&yOa>)cGiankLehrhD?~TJuA}iManABStlYnBxMO_I02TX<6iAXYHmnwpZ z9Z0p!!Vb!D3-$&^C`491I3HrVcq)p$AJvA!G_m?4SVFNJQIS{{2AVVxMd(D5Z=onM zgW`92&2&_URbk|Qs(61YNYlvT*o{?^5(uoEB*;&yW`w7oA*t$;>p5>bz+on%LiHxT z`Xni@Sg(9^K=k^AoKQwP(X??P_!yeJoxHw{rFr7y!rw?#>Rpv2rC(v4R6LG?*<1Ts zamd;C#ceU0QTJ(cI*FTb-c|ED!VXhF8O?3wJBUEJ;VjNztw9>a7@Bg4fc9hi{cPVvw*WEg=gX&4CTk1#V{pMgBcM%Jf1)yjL68~o8V3T#gURh9JI4C~z4FMUGk+wIV12rmv0Y<{m%32w*hD4EQA?rro zki9{}&@KXXiC0vK+$Y`bC#XD4TU4`!d0!lB3#YVughm{#EA`E4NI#CeiH#?IdI!l~c@# zF#MY&E0f$J0JK@hgJ$qbOSNsJ8e{-PN29DWW6ScLkO|PInzTvH(#&9y+ZpLgE`j_@ ze~#~&p~?4)X)5pbbqiQ3WSnmPBM2qWQ@fqoG!h){FGl_1{?BSCD}W}Z@xknO&e{g! z9ZN`6%GDgl!HssAq!9+rj%c@sK^ZT$(2@q$znM^pOHx4~g0r(J45q;%Ai<84`(mB_ zOQAEg{2}W>1C+K}&uUWVzK+va>KPrYg?zvG(b@zQ=TeMV#53;5P)BE=v@ppzph5{Q zOgn_#vC+oRO4+uVBFixF7zWk7hJ{f^T+_0={6KmzS7k`<)%$;=7qms?XTi*H2qA$c zWI5t1YKF;RBu(}twiA{{W@+s^JCL2xxB^v8{Xn_|$n+f7cZP}eMESCcqaiw^Iwcdb z%ns4_Yo!|_i9w1CpNy9vz71nc=}FLNAuS|>&noibzu}RHWm6>uYE~R>N!kjiRddSn zT0oly_?$83JfCU=+#Gd{ccObeu-@+T zubTSVT{|P1`+c6*#cy48hKwzShIJA<*IpI&d0YzmHis*|#ahh$bs^yD(DfGZdY#kf+;<@0>~JOOPhTByc|9DWxQTafP3*gNf8|_Xi*UWX+K6BOdWeIXvlG}9*yyDON1m>}95@-68vqV$ z1P_%VUlQOGWfQCmvl_5=+S))jmRYKQU*&6)n_;M0-+6p)j zTwm@I4>q*L>SRX472Wf@?|Oun9J_Sb2!}7mk^u`K?=?QOLQ$;Ae z^QG|)f#n|zAv6j)R2;a3!mYg|Sl_8*Ft$!WJOr=EZ1u5ad8aGgC9YZtwf`Mew9*>x zPg{zF91xkV%jn3~F$rugo9BYlc2|4j8y-WHS#Uvm$QI#0n&mng-{M+T<>Tk+@Oi#} zTb{}7a(~@^*4-IH6fZtto)E9ubR~Li>!c~7GEED!gUwyj$%EXZqLGtW)dI8f2e{)~ z>-#15yrwjbtcuOV6 z>R7IUjUSrfWpcj7#8^z3@>c$nNpn#WV0lvbR?E~OXXcohu&gfw$!g)7!qVXW;f{TOna zL%Zew@=ro9%cwcaN`=j?a)ak7j!?gOnn{29er`!%wf8a5&7Qz_ucFFe-6|!#!M#vv z$ylxx%}4Wi8VCu0pj4cDTC~HhbF)rAOntI0It0U|A{Fe~3doeZ|2+A?!?I&TC}s_1 zQ?;P2)WGxWI$C;fFW`!2wv)|opd4G^_B;hLKkWbZ?w{drJ0t{i3E+APZxY9i#uP>N zVHqznRg~nEn@dm$8sVsFB}L`O(ep97ZatJ6iAo7Cz%x#%mCnEhMEj6uArwrXETCjg zrVx78^>BE0iPTv7Af(1T`u1kIq?oo9e)eapfr~x-aG?qH<*qHrQmNW_>%2NNBP@C1 zf#p{l8g3=3TNC~c%R|B?EUmBPU$5}_JCzyIw=~6VOgVLSXp2jc+4Q@qqTdqTg(EhG z&8GBXt}=#8Nd}$Q^0(!jDKxvH>_z@zEKareiAC9I(FlZv-uSyW=h3hEorF~L>1{UI zW^<+^`PIaT$}E?%ZAl-SlkiRgC_WZ0KXeii>AvEJCi1 zbDrX$S6$(K2+Dh`JMaekMK_~XV%t+ehmiciwCaR7xX?{|z*e`-RSww4GpkaOIsYJi zGsxOzL)J*+bi+334^$7JSR>H#n2RvdLzno|jnO@k+MCrA!}OjUF3>~{G;98&=0#I7 zKU9tIgjq_RYq1X>@Fs@vGr|Z8i>%2m|Id&tAPEl*VW{_{^{BY9A=$VUqPMYVB8TLE zso4`FqU)TfaF)Tv@UfQtXajaKcsHyW)f2)L*F}R_{U)C5q_Pz2>mZPv!eiBZAQ_Nf zoa3sVe8lSHhCT}vdadM72!m~`{uiv?3!m~|eTZ0cQ;Ju2`_`ajfE8M9M54VU#%o5j zjHE4WiBzR;2q$ee4_F6$Mc2&2>+y!KgfXFh?B?hoe>-S)33xjh`FMzqPeE1`;c@bc zi^n)M!sG5@AGhkw=ClT1CZ`s5y9lJ99z~-kC5>9LB8gRvgqq4h=lQ-~*5$sxw0-vX zyyaBs>~zJIYh><+u}vc`DQPY7T?EQm~r3Ia>_NRQ-Z< z@v~SEcRX||9v|fAOoMxwZSlWykRY0&IW!N-QBg2dsyP5M&gHH!@Oyf$2>tLbK2xQP z7fv|}G#`(H)w`Kjgoi|0E~(wnR>e(=09Nx4bl{nLYIUgw=kv z*GFoRMkhHGMrL;IDzF0G7Mzn^ey`Jl>Ex0TGfsaSM$o}_J@^p=4#Er7_ayY7T zzh--5DZsA(5vVX*m8vqH^^MBo3Ysuhsq8F7>St$t+fet}?-1Sk;B}mV%e33<*Z>hx zrgWYPyl<8}>f`Q|W;7@-ct-M?N_Nf=_1VHZ?X#ku->*tarvae~+L6&gsp$e+7d+c< z7_R!#!Xgs&*UIzq%}T$oQRb~}Mb-6B^V~Mjz{BK)ElP{kSgACBQ(HkY1e3Fg{zi7z zJ9EcoawW;hNOE_MZk!eoiq(S>yp@ke`nk_6j7uQ#axVG zvQMEAP0)PJ-xmC%qbzp``I}DVTQ{*ol`l5@pph#*I(ZL@^gkm~=`_Hx{Oa6->?c!F z%6!UtrA^aN>mzKDP6?GvNxL50~yO=vSbw{ z&UwDrWkp%Z1|aq|Fga&Bz6YwGlIT|E?y~yXMX{RpoF}KEj^{pn;Nc{!4>qP%~k+XbDYu3%`gK^s| z{MC6FFQC&UtNiAAq02tNPg?F&~h!bH~H1E)?n!ebN?k|N2R{}_>_O? zk*SCB;xd#)+ASQNJ;38*j@e_^aq${3j2y^DC0cMX&H_RuUS-UWO;cCbU@KSjKh{T~ zg{(&EylkdSAz?6moHzsu7%RTzSrFMUNBNf7DN) z0sr$W%}67FK%u%r^dav@YX}GNK_W$l9&a}~FlANhzgLdk|6I4gwOGn{8R<%th1|Vn z@O$w8YkU~TV?G8u%Tv2{)%BP^g|)Fa$_R@5*|@FQ)7sTGT)Qe^V9~p*7hADwyn60R z=IpkaUPnGGN%m0w!;N?WNkL# z?#3JkkJUz|*0433Kc?7=rwJrD3&*5jsjbP2Eopv=)r6SV2Y1e~-8K7H>7J(p#SZAw z&4S4v4FpyiHtFddluYmz#x0k8Z$4u!t_borc?54~b0wd706|D4j!_+osi(qvATTor^LPCzAx{ZX|hLj=U{oBsXzUg;O%NMnZm zzk-I9YT(|tmcUuMXeSpRN?TXR9=#RrZ?!X5B`affUw1di>v~V@p!bn)A6;8R$kBQr zkH>|NFQTrwa{*h_kGD#ylg@{?``9Z*v^z&UHq^yhDiI)WpTSKMU_P!R@4B4i(c94J zH7>oM`bUe5oojr1L;>!-M9D4WCF53Iv&LiGoN;Z5vGjfpC(iI|I>hT>Glw8}DXkT2 z=WHyygbRmT>s#$aT>*|?Xa2jNmxV7-Uc_?KH{=KSc~ibXSLEGifdr;b6T#K0xE^el zPGUj1$~dVi9txF3W-GNZSAXugA}r_sRQN15T&c&~A1WkQn#@Fk6j`HhVNU-8X?*Dcj0wH!x>J#Y)j~lxjIq|1XgX)c+=${$8%|j_Oav zT8jy8WCk+JeeCS+RS>9;zaw#YfvG`elq^<1ILd8-<`*K<1el^3+{|GE;9Yl0_a zE~|L)DLuzJENRy1rMv*qpYS=~?s+MI%bBx#SJ`MuvYL2X))sHuSu^{fa}@{9(OrYN zif0%R9q_nrI$m{RO%Jdlv^K#;5YX`dyU+V4Els}C9y=eeU)l`3?{NYWq|J%#0(Di_ zBDRmwF4#JtS*)PQ6i*w>+ntZ;J)B$pbKE#|PT%BUPatuMT|FoJ-J7HL?FeQS2 zvK=-qZb{&DT_sXn)SI$kwY?%uS}^v02AKTwk@2P$zShz*+zk<^1!;QU-gMc?wzy+p#cB9!_?hR(_nVu@dP<`b~S z`Q5=IfziFhK~iH-STswrpkcJ1KN|wcwHhKIs)YblFb%)xAt01nnMBm`USIoiot{p% zl4Bj-1dkRfYR8txS+C|lj8a1U$vKhyrfYn()l>%9o zrf<+|X}HhUY+}_|QjW$jc{b1DH*nMD<~NvHpy8+V zuXf(Vpb68iK8*}H4r0`Z++;H#_f|3f#Gi}qRV;W@K0$5RQQoh*|2-M_awqOJwVT~t zs(z9PKY=%+;ikGdtW4>W zN7T7rVhfL46kWdVR8+HGEroR|Vd9EZSSZ{E7TXgaY7fc_O^>hffO7P)n0stwimU=N z<1p8TPJ5Zfop0)9Vl?8M;62EeL{xm!QR0xqkUw`~fP_wUGCm& z0tBX1_dDJk{GD1MC+5WdGxLv${TF5yGj&T@S?;8~u|JZiz?r>2m^dQyfnF35lA|eA ziQ2xYyuayue95*Hrhm}5=9$FD@BalhZAd5!!P$x=LZo5OB2>{eyeLjqV60j^=V45$ zvD~L%Ba?P$PqcnN3}>3{0=12w4H%N1P;rmI<{}|3}+Z z1YvH?Qoz2;z?x)f_Udef*nLzrHXmzbgii8A#eD=JX+Is0@iCo-R*UwGTxX%27ra;B zTx)mOdS2!Mf1J}&Dpq0>-h~X&N`4EYZ6jpTb_Z4nrno{0UW}bL4QIXAnWp~0W4VeK zWEZayT3?Wo2o_oFx%j>LCBj-yMuw>fd#P)wSALW&>4;wiu#PHQltO-5-(He03WGiQ4!0^=J|mzC-6Oe0?$`9IS#skDEc}Ef zXJ0JCDp+3PmI*krY@&qtB#eYy>EyJ6ocB^1mKdQu%*zG*;9(B{i2E#bV@EZxeHA^`P5=XQU1msU0}I zj}v~PQL6KY!ufn#Wlqt&Q!9P!*^WEihg*MkJ#c)_ooNJ9M%PG-CvuBW8YDpz-bfwZ zgZH`PZhik}w*9s_4i{QDOu>2@_UE`O38#$fV3I84_``b?i+K}e3E1{XI2tru$V7=2 z)RgM+G^w^%H3B`|Z?9&$oV@TmDl|aUhq@^@)87Q{JgwlcSiKjSH~hhPv8(Wu*=;*8W(r|S^HEhD zWp+&D!9fXBW5ye#vOYvpan&r5!t}#pe2n5PEvfrFDl$ z&Np66VXTo}dO)EvzQ3`QdK2ik$kU)vn$R(pfijBbo#sQzc{GN01fud&Jl<>a_P{V9 zNM469t33lAd6?&gL2&d0f?UnEID`qpW2BY)<{BTVfk?4w+n6KUydF_Gc`I$TpOq3` zxQWqONOtN)UtUm_CRda(H;~Px5Fj*>?bM2@+mWu4u&uR$pP;m~E6wVZjF~`>zPFN| zDNFa`hTC6fVD|jC=FQ$WF!+%^T2^O&q_GmIyoxIdE8!1Wxo6w(s9YY90hxs?ynoRJ zL(j`XIZ)fua-y95RmIru(1OyFPm){}YslSFA5A-d`JNVd5v);IGK|N|n#2O>_tD?R z#iU&(xMtQPBQg&67BHW4ib!;Q_wwNm{4VrBh@*y-bW}9iQwx9dtJiD=n}^fQ*q$>V zP9j0wiw!PbZb7cMm7CLnO-h5kv$(nd{R_Q587r8!fJcjfc;{+e;{W&^B81G+G7Q z)`0ykHfpj`lJ;}z%#~Z=BAc!?l9ky# z-)VITcJ7DC!g~R!6Mi`OZfzmHzuhU+KnA@`z?t^Pdg*&i7vtHerqh2;iwpGIDEc`) z+78Nz?4IL_Yob|Dq8vdEGjn^67bGHIt7>Z?^;)3&lQE*1=cys};>plu{O9_otkCu> zni`%pNJ?GxLa%!g9y1EL(rbrhlvpM+tzgcAJg7I%*GRU-n9Q%Ast}B#$m6XxB>X1G zML#F9)@qxL1^Q5mLt0x9-O2Aqi2J7mX z1VSF4nf^5;u9w+W$+wY|TB#05+GpE~!5azpsUXV@CSaG=2Binp&Ejd4&Jkk(WB=AT z8fixW%JeaK5XA~q7h8OTO0R)D4?Svp>Dh`er;zhGqKd05(ndn6e8Z0pWDq`@s z(VI}eW_k~Z&0tJgUGWXP92UlqlV0+`G&{Ww7Jd91foM~8Y^8H(h>4ef<+YAr?H z17P6#aG-`4&aSY>lpNDOLLIWs#zb_%{j%tM_op3X(ShrGtBt9TL3 z!r})P5+m6_pDZcVVNDi4_wXiQo~=0n!v|g}+8G5ZZ?W23PEB10fUS2Z+HH>OCA9Ei zHeJrkf%-7cVsTDyHFA-pF7s`@i!*2Y38tMPj*H?ZNEFqRnprM!Td<;2i<2PYv98HP z`=3clK0U`yI&4##2sIo^UbLda5+x!d-q2=>5vzw{;Q*w{oO6Y_$+Rj4oxA`0-L^c^ zoQxo6{*Cq~O0*Ezw0ZeXF{#OgZ@( z{@o2b!SR@$&`e8-pg$`*HQ(LfB+zE5(o#uXv`_$7CXl23REw^;I{w*(mup!WC_~B| zVL#x`+q75GXe?yWQ&MxB=DMV_7&Qbh&CbjeJ50<_O*OI^rHrqw6xD-S0xEpp|7Y<} zu~9ct*nzi6Hc^fhz}OHsQn*@17t!f8SLr!1qjW`QEo!Z$CXCBxQrpS%1@hBOCvsqL zSLU3qH$}PN|Go7aR zx6)(FSxVrNJr6o*9u3yR*1_6)iTbSmjM>cg-Q^G4&ou1H50h$j7=^WDHPSq*I=@?!&J^{thXg;hOSOz*si7g#!neQ}yTtw9;@ zpx5ypN+RNRk?DQu>2`DupY?FFchstXtgYBKZq67IT_iHOIFvj!q@#hvF?NK!RJN>- z?#>wCKWPXrn2vo;uQSsA2D7If!7Cfxebc*H|8#QdM~{L}(HRQAJlLRz^Iquh1&>V6 zr-LClni1hdVVoT)LT$h(4d0&&GCp9PU}!RuaSB}Ys`OF^^1X2&Ed!3*!2w^fp9UWq z-Zfuz)XS_ZHx=ooSD)wQ(ug!J@{E8^q7u>9vgXg|)X4BRk$|=X!dvi!$pj zybYGwL9q7`e`n4GGtd_v_;(P&vW|uhNn@<%Bkr|920EQ1J#H~Rxaf!FmC*en(cIaY zV8xaFR2(3LaT}6;mcW02QZt)T0IKJQ`(=OqC+i3ONBXP6L*7~6qQl3g;OR`DC%g#O z12~`1hLk8!^_(7mbO&=G+T5+%+t1DSf;;L5gQhxsLps+Lef20Gu3@6=muNuv^-&ht zUx(wbg|F&9#A)sNK5!?NwIZ1_nQ)yZyMhA-e5_#1pn}kZE)?Iki*R)=ZrOXOHDvnd zl~5pjD{><}!b<457$=-uo&k$^%oz^2-Kb&qju92^o(O00jlV~__@8&U2-UG_7EkYh zB*HDv*NK{ks}JP0(X02vbir%rfNpOXE%s~M?8h3~vLj@|^E^IvkLhJ-3yCZ@vC!H6 zCOz!sEQ9VcRO~RQKQZWZR8ZE4mMf7xZ?Cj{0j~aM6R}^Pv0vaDP(0_NPqm-fpZXtO z6W)E~re#p#G0A@OIb~3M=fj7uLG*HnzSxw{N{ZG(fccM?y^xZlMQi=yT4V+o-O zNWr0$82``r9g*Fs;~D`Sre-SUmAcT6$24daJ!L{{djIXS`OyC_Eyi-uG7jJ>?YD)~>u1$fTEXi@qJwd%0LTryz;mX#=tMi~oiil5)3>^wDG>ES9XJWW znt+;+V$S3Ye8y6ovVrvnoUB*twvPgR-*OKYhFKE>c!blH6ND=4A;xiv_GyQBvzSvq0R99Bvrp`=i%vZ5vRtQ z(oh@RKb7<4jS~G;$EdwCa8AKrTyAlM0~+v|KVESEEwB6)6K!6kt(A9jRdf1cFk?Wv zW2pG;iDmxRv4CxazT3y9Cjhijn?1*>z?|p7x>SV_u|(g?$idIw7r|UxWSWVeaLE2M z?}9{bRdnA5hi!$fH5txMR=%?+11w$OPsmgcujF| zG<<>jf)LkJ`vAeKoi3Z~S>d(sp3_Eji^dw8QY*1a?QcE22XsKd=U?JwkZc03;PBQc zxLW|K?J1tM^4W>%8!s$aO6) zY5hL14}!-I$9*M|*IN&LCv6PYZGkG2?>$?#KZ`)vo}>qwR*iJ2;JEl#1bKCG);n)U zTPEWf1izmD9r&cTomvuGaeUNsK20>=Id;6RT~#CbKfL+B@G6w&tL-(6tCYF>sX|^? z$s@k`pxlJ+HPJ4|m#a|H8t3xdutnIW_}xdo{AeiKyFgJFYzZk`f6l-P*jenES^q4| zw4lqa1mlfzdYYal^mlcLx6pV}s1xw@Z4YP-aC33?K$f--bpAqoi0n~7i*>x=7CJ18 zTYAEK1ijGCH?O1flX0r3&>N?t7J{Lt+2_ICI8Y*y-i zE*9`(|?bq^dw)=b3I@T57G!iRZ8E==9v-QE{K*`g%N9R7L4hS4(J1 zc+T$)^Qp7K{T>znOBp^bjGDl9$)f6Eb(7m@`*Y_ccah@Us%y-hiqT4q?tI$52+a^$ zI^GaFezsYU`$k|B2K;g8by{C z0(HcD)c{n9*>aS>bNNPd{VKn87Nzv9uif_!N}ycnZsFlKbk33L+gH84x)6ULt_P1O zhF1?4tSpfhJRFPlGlH&pfw$g4^#l3~#mf~6uFy4e`cEO$DeHLx7h$XA4c)-1(1+2C z@=z7T#+d+N%moo#T5O6!zfUhs|A1t4q9Ql_o3*+f@8|ai4Tr6i#j1OMZ&W_)e?jmlBe@@&nDTbU(gNg0e@j-I} zgnmuQVRvkQU9)iE;N$hG&y@_uzC*64Rwwoep+ftD{vO}*9;U4q`vg-2!R?RnZVi1G zGees#kpCIoHwd)oR5ypOIfgy9W3Qc=(V`BlFT|qtu5VOYo_Mw}N-0F9eXS3|Qs=Hx*A6tM^=>NU{BIZvPGuz1#v!lv%*W){9&n%~H&WOA3zxkH z!7~{CT%F1eNlOc>e%>!lo`uQTS`U9?LHT(;ET0`CEC0S0xxsZ~L+Kto_g`lYl%E&F zaoNu;=mGA=3i9W`NPg}HQp(3Y5HDjv`SUR#*9pXNcJk41ZgvQy{P{$r-hagI``>L= z647~`jV0tA2Vfbv8fYos?z2~7(fd4%IpxoXA_=%|R48BF3l%86T?!c~y>Cc782wMh zYW4Sz#r*a655?^N1Nxi0y%|J|{zv~)Bm|Fs&cWjLN{mm3nTYo79v|~rXS_mv@l1@z z`oEy>dKX%Me@{%{8%Fel$0;LxgT^Ug^dS+VN#co}Vdl4p-BFACgs&+g{2&^li62pl zw}ipQh(D0$uZTYogE+q>Fw+nN8N|0RXiv~Mfe5*nA z7FO&PZ@+GstdP>TO!pZirKm9mnEL@qH2jrn9E_)(Hcj;8J{Fg@BRg($kk3&WBxt1+mV`ok_ui znA48Ks6b~xV-k3JYBomoez9)okko;#a;OUVL5w^(r5_DIgU$_<+$l%Tb|neFr^5FnvDl@`29i)VQ0<#Yj_toAlT$`gnOaCr zCu9{s;9;(CDS<-MZ1X{K%L^9>=}a>!DwowL0^;DDF%pXW@elTt*=mbrRSO(3-KwZ` z#1?6y)Rmf|lGIeIr5}m`Mwr&%O1KfP3@IK3oib557}9#h-I6~M#gss-!4%Pmv;JVV zVoW^RhvtN?C{X!$rrOe~t{g}B#jPAh=*7WCMn2=Y`vA=uLq#<>KB-m##R-=amw2I} zwzi%9x$x)B2_KknXX}lBYU(=~Y);{%n&^CKs%wp%Fm0;FE6k{sfjl`$C4!-y-B+6)r+hRKA8u(kU5Y z8Cpr%bkkYiY>`yl(Mfk9M+f}7!&=!~liR7x?8M>;q{@l~l+!sQ9WKMg8MEj|)4S_h zG2}9eIU|-Vw;Lhr>bY2?Kw~nk7_w!2&CM>kFy=D#zdM=G%MjRiy0~hGxE=N4Xw``i z)(c|R`#rd@b>bDJqqY^|NY%{rB?R`P=PT~DC=UX-8TH~c)y%x5-+Ll~jRBDaBiL_= zu{tL{=NJDO|0)P$}7U^s^G4y<<>S!tsj^s#o@xNFU9jkdAD0Jg_TOMHw%BQm#%;eKs|(yvoG=^XKj zJ1RGllIddGGG#+0s2?pkdKG_Z-TX`B%ln%VCPvS)qXta#0%LJf!11tkavYhNID)xf zd7w?|%}tmr^vX<5ePaR%mT)q7$W08xq1-hs%ubtQgmYAL@+(-`+^k3YS5Eh10vi^s zGI)qoD2-1MWOJ;USFCJLo4Njs_LCvp+ZU+7k7yl(xloVVQ#LT+AsK0N(NXXrm~-Uv z%rMv*rVSZt?+xXC{Z)kXkb$%g$%NlpdgBwA?pl5UlTgxNP1~Mgv z;uUW}d9}j20a~Z~2RQ&a(V?E;<|c{iD&UNvyu?Esk`0V>#KO1Zxrp=T)(c(+?_dqG zLLmFSP*My6JTpy0P>6q^!9gjA`yqbHq742`t7Ec5fcR}_vUWh*{zvsF*T`ht`i8pm zmA>^ASoaF7dTsd5zJp2aYay{C!U~rssq`!!F}O^RA#jy{Jp~fVdWR|RQ#Y2gJk%_K zrI!;)ZO$ajxAKxYUylKlw-D`yIrX zaood%a`u~Qb+gCnAiVeM3lwI%ZT|n=d4!vd<9|7i=n!+VuzVN(d*k5X{6EkCQ^U>1 zrbql=I{qu_otc~MKl1+-0WWT4>uToAC~j-yY9?xC;$UjVC~IbK;c7|D%*@Wk{67U| zVip!oCZ_-0&*a<()>l<*-t&p~aGX6kT9)PKU6Ryb;@QD36c|$gQa=c6LIMN~(3It8 zAQ|f)Sujv6v^0@mfar}7GZkE+KMTG!2&Su=qHvM>Z@YqP;2?c?%Q6zS? zej(CfA8)=zq03CDM-H2&gX|?q26VT52S&4^o^L-PEg6Fe$ng$}sLP>Nbs5O;iTEaJ zS;0@j{P1yD#4Tc^6=dY34wKQ)clW}O@p#BMD71+%fhQzT;Wk46_MgE)!C>I;i!_1; zL=KE400tAB%t0yvg*<~1Zj-8P*=qmYVTgiiPOyPv;c{nZp^3iM*yJjbsdgJ3+48Ib^)>O&SKa{ym@*-`eG^MfvDE_)5ab(hP-LG~TVKp9GQBsru3m8duXnL$A% zeUjS=GyUj~L&e#6)dZw}S9g!|J-2yXh-dTUB%Y!}fA)*_8AG-p5_-tQ3|>z(0L~y` z!YfOFbB0?gk2Yev*yJ{beY()f%&X(y`1oyzC*?HrNmY)d;ATY>shlDb&M5Vy(*%%7 zwd4&ob}`sJ(f89IJM5+s;-R3zSSvr!OC1in2UXe`k@vcb@$|CpcTM(;J#5Z zK|9X_i?W=#C&m&)wt)gR1j8~YvvAPdFIT7{bA~(h_2#uR?nRAB%ZNapt%u#&W5q7a zuU28s&P!LqDMMgBqhv81&t=F1vLNoH#)kr@U21uWZ1_s~Ejn%UpezZ6$UtFu-9UnA zUbkP!x~-ytIME-iJtCRaiC8LQQ)(mzP5)4-e25J$b3i3-pzy)4uM+9O(uiaQyCwJR z87YwTn9<*heuPTNl=<{3;TPQH{FSrf)o6~B)AY^|q(b6R|3Z-Io}`y3z&E-{$U)6z z8jJ~th9+44peVV2KQVETuI_>(-@s(=Uh_|MI*E&O3>6J};+W@ct)=48Um%wL#!!pzLf$t0ODGf$YlFef|zeS6>TUTJq_S#4KWmCI7Q z-Lg-e(sbX_KvYXXl4~p`Gnzf}RCmYjHn3mzQLMdL(@>r>nXY!JUb^CK(Y#>a(lCq| zte8=s$DJ9-VbzsOi@`1}1)=*@$n=Y|I(G7+BAHULD3+80nROGcRfV7e%@}S#BUGbUK5b}=(tfPKI_hV!o9@G^LPVHgSX9Y|o zloi-1cnACdDdCM2WX~?Lw0)MlxE6%6$5-Ta$d9xE_x9`RVFVQlx>k8wjRh|=&c}bp!$$OcTKk$Db8UHZ8#ScYl$!B`d zVb+akVynO@D~GB$Nx%kO-_kK?P-iBxcybXcj*s>y^E2~z!~T9z zgt=9)4@i5Ek+HUNi*UH?a;ZLzkqtFR4jnB+6MC+i%gKvtxGVe3ST%eAn-`)Y=rE}wm)rJd~|$9=5Dd^ z3Z}{0l2T_@vJq~-7EB0gHLEmaxTB6!b7a64mr)UtKd9o`7{LiZZVHAmqjksc{OhK#bk$r#j2Em&PUE^35s=3 zipj1!Ixaii$_~J~R4i}3zZyG}N18W}cbqqj-sR7Vub4NXF6Ga@cVavF;|abIl|N~{ zlj&t6&UCB-!(&e4HzPPMi3LxPb8^eh!lB!Z6F2xzzC0O zX!h^&HHH026uBr-*+IC7nyg$}J*m4yza+1gZ-+5*M245OKu1&-wx z@i*ok?7->|BM?6jCy+h=AT1C?&>qkRIAgYf-XGNFs1HYqazGMTX49OKLkE6$is1D!`&j@z~(f(o~#NtNE zR2Jbl>ERXIjq5F&c4>OMUBgihf{h-t%_%;BC5DyvEE~h%K9Xpj^!WD&_y9=o6Yu(W zp{RG7=34Nx2)?Y5d%NbiLG?8wlT_$|gCF}RHY2soHk2XJSBU;|u70~VVoL~)@Hx74 z?(PI1j7+E}R^dBbtZg-9@mC#}iC+xV$ugWQX4;i%!R--`>xduH(G>)#mNnVr7?P^q z7|QFIxJ|nCGaIx}SdU5lZSZgmQ;>36Vs0XXx=12g)d#>ePQ6rzlTx*tNQgqCGWZe+ zQvAuHdwU)}VcTKMBVwCnRd5xA+zZ^sHD`F|vLO_3P;F#fcxn|NV;kR0MwO;@^7jY4 zt%+yEVhnwKUA9dkw91?Q6B2c#K&;!PEZ~fQ8lOZf0q-rnyDzv)dc1zF8pU*W=P&s7 zj|LragLbbi1%v8}Brc(F-MC{O`1%+0)x-p1Fj!qzkVi`sF9cc2xwWnYYP_mG|u}FWy_(VB$quNw3 zmISC_e&i?a)Q1^ zUEN|P_csoBf1pwosub+YTe%|>NR^9Z=R4yDAQFvbP$O`JpO?X{^VAuP!;KDl9aX^U z7*Aogptlz97In|@wqSJhhxd)Jk0S>`9hz`MlppZgjp7B*o-7&LaHok+m!X~6Kg2vp z$>Y8!Ug*t@!@0piz=Ri|vtwQh^B4!)EP>}}h1$1;{N)pF&X{A_;~`i*@bSY6fWH~> zy&#I%M#X6Y%-uvN0cnvy>U;-T64AOSp`(l~_#1GT_TpR?nI+MxY<8y09*EQvoEO!7(pS%R!ncnqm$C~ii zC`KG7jhC6lO3f{O9brw9aaIhDo0k=yR&`49ieu&yA!*xEiAGsgB0oOX{hYe%`**Qt zVFAY+A~0Zjd6koe!`n2#W&=ENO0-;Mwy}D=E5C6J3!=zj%M?15rLZq_Nl;n_X~RzA zkin+rx{MMJG%XvYHr%*1@483_1EE~00RR(LSnAMyRdvXnej1`)gjXz_UHoQhsz$!R^2q`c z@UzAQ?J%G?z+a(Aa{OSW!YI%nFEa|&gqMz(76~^rluW9mtRw+Hi4zc?IvElwuKH)( zjiLtN=~5v)le*T;XRT=W%g@@;Gta}rTK48OlVkXn=)JGJXT4ddkGr>kc-*lekeIO7 z^8onwBP1lm=k01oa$j%9uR)LX@>p!%H~dBB(ymRTmHSk2nLs3g8L2T^7SmDP9`-G% zH0|=I1*@E^jiJ4=XBCb+7bmW%zgw_OuugDT@C8^msCig0H&84;IWZBs!}cR{H@NcRn;$uGG_8k+^!%M4^#?$d!5QF=o!IDn~Cvpc>OTH9ErN4 zK$v`G$@XC*l*ebKe|pIvy-k3BlN>V1l4S z6f4i0jUIkh>DYAdCW%=9I6V|3CL*8{h^w$8E?Xh0Vk`4U!j>*yHlT+8j=Z~1h@@Ak z=FSyGD;mf`ULy~sq0REf$(N-4!PN1Y$N7Q)yHlg?ZSJk%?e5KDxW1!~F+xtT6d@|U z31^xUj(2Arnw-EXu9iE`?L(vy6<%GxK}$ZY@e@`@DPb=a1P5eNX%z0KW>2D`8$Q;k}eJI;@RBon`R@oE+YCi4qcB0)2 zJnEc(L}-SdDbfYbx`x+4iy3#UARnRqTg1U9ETk8C#17R4-%(d5wAZL3Vxh=Z#uldA zuG)6Dv^yDSnREiS13*&c%t?Y6WQ&3k``F`3)JtWDx-=;twQlB*fjro){7PXb_CX zO-g4R89_GH=atp|4mn#4A5mpIg%-g16N{gUWJw8zCw|s*2tGOKwzJlaXT(hY_S|qs zZ9sF<-?A`!(7xr^Ym~7|A(WA?e^h=icSi?4q}U%CkpDvC7O+;Ion;HGQq{(Q!nhf% zZOp#o03SQgYC$hGKyBx=u~(s&Y~(YmT=Rc1Lq~$!U0HNxA(LU?21=MDE2}zMXwHtp zI_nMyLB!aEh*?Xb;iB?Vr>2K3i0n2F%CC7wmWg*GLJc#Gz|n!4eT?ld1skY9SHDqr z$}{4{)+fMOdu@zE)uvb`D9B4|Rf9&9D&QwiBp5Dr_mf5u3Apg~4);4Q2a92-HQ%wR zkwHZ?(h8QphW&2;w`O4ls8Dq39H3Z4rs6vZeo~=wcB7ScP+ooBkaHJqv$72Kml-YA zpRGRRCrf|Gm!Q4}Y6%2w2`QR&cu%{1H=^!Ak>^N)I5A;5|1L&=^Zb>9%6MNex!=dF z_VSorIKh>z1T&M|-_a%SbjJWi7BUIJ`$WQZ8*eoSG<$v1p7Mi`OnY1 zi>L2DH!*G4%DJ$TJ#nFA7`>t_wWo5E7ISSziH2obWU_;y@}m9!e*U+bpA$h2Zbw0``kMeAB@ypNn3X~y>4KoX(Hj73|vJ;QO9 zIOEvXU#qrO!Rh5q9h{&8Y{1jg$l0;?G=ij_P&4>*1KTub_W%nIOPG{e62!VP8EbOAM6Gg_ zWbGQ3pO;`NQzDb}^&5`U0W)YRT4@QOQz;!vBYl|mSzem`D;NSbX`@3Sx|P93s6a_2`{KGzoR98K)ms*ZZ(A?;#R%#mtq0+8HW7M9IxVhq9MOInt;(-QOI*ak=u>cacP z1njptFgA8DLn){lK|NspwheEznTSLnbb+ZI@n{_hIpALdaSUqXpeB?>NbaqIq5R{J z%-ooewpJ0PT@JGWiw|y34^ZNmz(US;wnDuXa-P9TVwqa#P!LBHVI+Hz>lMc$=lCN1 zJbZs_LOI(CSQdUjd2B$-iqckp;N(k^`o#e?ZboSq=?B2k*(1r6wk6$w@rTHrO4fW; zHcRo!6c$agyigF({Kw|juHMmz0PJ`EtryAo6p)Wb<NYt*Orv%RA@GI(YfBFWZ6D zp=gY@Nlvqd1eG(%eUVQoyjuh)o%f+!`xsO-;%TO{=e<*q1UZk^{1* zBJU-AfGcmR{q!bo;SZK~WPlY#k{nQ<>=t6a{Jj2NkT0_vWET(E?lm#|k96BBTBGSC zXc?e#0zyyHJG%ZLVF(`mZtrx(-(m08E7ZkNdHqZ(LPio2ne^K=jRUQ6yDw3}3y)&f zmcnP2Y{wd+u9kSw%~YVB!r`G3fcH1kJ9lbgfCA$7+h}yxdxS2qQ(aAZozgl->2jTT zH1jsufPTakBCx4wo*HI>G#NFjJkt67EzO+NmNn|Z7Cc^NBpl3nZAGb)+y~~x^z_7m z$!k+x9a=LFTvY+|tQp1-S{-rc^`}4F1AoC(Q2QR+P`#peVi~f{t-{CCTkGWG;^=xy zvXyC?-Oh&4Cf7#u3#B1XL-bbl&egK%Ut0>5A%QaEV$6hBhxAM1`$VaS2+0M*)DdaY zh>nA7@@q-buuQK?Zc9-+x(vx`vhE1%iK=&jiA$70M+Y1}oOzKaBNo)G(hABy+G3nnEfL*jw{cHR(mpVlY`#u7k*QqqT4c2_cu~pIpDe|?S(%gxFMivR`qcC1x^mmJ zS=-!q!r)dU@8)bh+uglMmzfr*|4Q`{ggnXxu{QL#?J#2<@Ede*jHX0+?KGfHuB4zv zpJ7e&P`wtNR1OQ&ArIS>+&6?>I>vukm z{p&GQ7$(exw&P$wqLzx?m|d3*hS&AQxOZ$v<=cVmZClbS%zwdf7YWw?^J)8IgoCDL zIUP@t%prhlrdWbkDe^>^OlO%(Z!E9eR7r33tG$Gv;VzEsSBZbWF0HpL`8k=_+-U$J z1@d=uFfZ&({8__I`S6S&tPA0vAtOB}Ray}UpkgRHdc|;o25$~m*t-EQZj|J}nc5Go zRX>^83-{aD@oOt4uhOKER=m|zFG_aVUcJcGWx2-TUe2ikI;#2U)zgz5>%a-bi#oY3 z#baAf4G>DX_5(^UEwt?Dz76@E1DRT#1$$Z@Hn|es?!N}^$tA6y-R*OhkhgjwUls>Q z)5+kRG%FdX3Req)re7K7+Dnc`bWyt=sBq!P-V#I(50Tzc%g0lL7o2*GGCerV+NLLCaYbfxla~mG@RNc_rD<)`tqc$o+~`%Oh~zY#E)b^CU80+ zJzOr!lj?qO(8>2qch@?S@g?^LBK;-L=SazIP}dCwy)v>DYev_AG}S}Ui7WD4GCCnZ zJRp!9%D51s>6kTqTws{JO5oZUpk6P`v!c;`)78`FCrwQ9+CsKgh#{j7{`s1*{x`p0 z>*LTFvX3s>!!1LoyG-#H%4~h~-)>U0L2@>TDR2w=9=3wkq#P*QNcHu<$4m5AiY*UX zxs^lTm3?QSGNijN!-H5;h@)t4SnR?=`We>C+Hs(y=e--i6Sj-;@+6hw z{$Y$`FysY+m}T9zJX?{!I)bs8b%p3qwca>xs+WjA1OS-;@4Lax6%O7|G zbop#|8(p<-)>>cZvVJiIsFk(N8hy+R?d@<))*iE8F!ric|NqWEW^|?viWc1^pJny#n`cj+yqE$WqRh40ChbuIY5LvVjxzafvi*FjZWr>X z0{#%`r5Z@}d)?Rm zAOyNx<#Pobv?;pJiX0*(Ouc;UJYVSuJvcDxzuHZ__kK}oE5VP}7hu^H(#WgWj3)o) zNk8|6d+y)KzSl{#xc`n(gcr2(c8N7-X_j3$$Vzuz#F!VL{N>MZjg=MI~+CMe44wpgi{!nw7HortHDvP@WTz=qsSOo+W;7qSsx-#)jVXzf*8 ze0o7>k`(x=&@oF!q+_r8HTv?|FZ>R}5pwWl@dBo|=9(eFuQ#w#AI50hAZ`*@=e~d& zcs;)kU1fOA_zTfI00oSMzE2l?Nq2kqc-bEBX8E|%-Lp|=>biO5U8lP0PCc=1gp9dA zWv5214c1UO0U#r!0o@?J}Zaa>v1MM))c=6 zl;}Uql1E2AZ)e)I=>2Q=N(&uZR&)8fhk$lprwoCUxZZYm!3Vd^>Ay0bYA<>C3t!g# z&HC;5@Si>}DzB~_5q4h|?>dWD-5_%Xv(`SQOCc=Bt}A@Z`2)IuXX{Gl5)_^g;NR0< z+9T1e@>@ptl@oCn?h;XWaN$hpwQjM!mi`}rpG~Q4ZJzg6v-gmhprpK$e+C5xO}ust zj&IMcOjL~(=_$J(H)K$#WjrG0=j~3?m}x@6Oi+jgwS;*r=$WRPJ(tg;ydIIfv9<4q zUke?6qZ6WkJzHcWp?8ch3-)Am!`1HfLKH=tSjl>vQmPQ|%NADB8s5))&R3oJ`t_~d zFZ5{CpE1?VKKnD-{h`xxFdDk0$?9swLoDHUd)>Ka8Ehhv8&hF@5dRyV7HsIq|I%6b zfWUeDF_Zgs@98IaoD26>{ftn}oT~5b^81*KOv>^XhQ~V^4V8zWFOWI=?9cn1=ZR2s zz_-Do>z^y3PYK^Lu1(voPh<#CW2|?TSWwyrTHWkS?}+dB!@8`#j}a$-W$FFYI8s)$Vn#u{)+dz?`z!qswWu|@`0lwqx%8fz;B>KF`@5fsL&oT9{ zxpwEf6|8qop_#aD|BXW=p}sy^^TGE+Tit%2wximEgud5g$U&%a+d-?I{=byr)ik9Xo!B1h4Ab!^}+b1QZB_Eo$T&sK? zpR+I)SEyDdQ^BscKd$z8?>S@X`5S~5*(tA2av2zU#V-O)8je@{m=JYu;<_bDw^vGc zR|#-C37Jm{e(zxR=j&s&8Rzy1=j9A=8v^FJ7rA40-(RnKw@7|Dn8!|s-Zx^NM&80~ z2c}%{U&Ex|bgosZU4)MTmiWH^su=C!`g7i8PnJIa7V0-2N;;Pl3iNWD`lZ7siMjeL z!DZ3w{&1GCzWJTw_q@i~>i!kn7bL3iS(u-2l5?9sit*Lzb2d{hmM~H}M&4r@Kwm&6 zX}$egbLLH(z{5)CuD+P+w0n-#OW1$$OoVQEL@(dvZLq0i7hD0v%t7#wu?&6PLBb#uB@(&{yh93CQ0jGg5+bDsB0~|=$GSK^ktL_> z|DeNseX^g}As=9C60Z6w=|X(NgpM9;SSsmDaM_87Ej9fPBko<`*VS1zzo34YK6Rm) zye+G+zvma41)UR(HMLS3U~F=Q>Wdu$oJ>E&gpVnaT2# z(#27Kh*oT4OSbc`rUPHgsJg{+3!{V#i&$Jqjd}RQXOgKxQHj-0gu5owEC6DF% z_nYVU$5rpX6Wok}YC;LxgTuCoVtsdcLwtJk#VB&bL7u5v5IPz7 zTx(2QgrV9A=uwS6~$=xVo77mqajz&JBViI3siC!!BUPN4%+C(7n71$FkeK z`o7XY>3%einf6w8fs!JLoLzR5>F?NZ>!uyPPG%QkeoE5xa2CAmgQs$fyKv}JcS%V0 zxBfb!f`G6>3(~JK5iWp$M*jiFB{w+bu{oeKDb{>)acOE6bGEI7?&#H!X0x{HH;VHiY` zo9qWO>ri&&sU1qGe~dd--1 zpB0-xuPnFa62sa9WNy5q5?&qA0@oAdsP1q( zdaAgE+=+;CLeL|dUP9&>4?697Do4?QswA>2wWWcrqcBT4Rq#Qf-pF88H;el2_L4Pe zdK)5kk8QPJEP86~eQYm8H@tOW-PG#kcib zeBj5=-^+qgwi|W(uHQHK4Merfb4AuRKR!wj-s)nDLg)cxrd`|`RnjVX{b;CiML;sO zC(d0r4LnOcH|9SvNZz*TF4lT(K_j9nLXgSh=+UnLiq7HKE@1!O(JDlE=&9yBvh4sW zv*j-qPI8nTxh{iuu?Mm5-kGAi=rug@4O`?!p)DaFE%lOMY6J0g(kXO*lQArvzDRMh zsV&*|^`yL8spN`|vyi4uSBR10KZWJ>gKT=Q5ISM8c89Kemg>oWvT)IFtb<1HC|}u~ z@9b^qJgS?tCdd&@Dfvot8)}Ftz6%(5Bzf4#7_2^$CQ1zMv;h(j`C^I{P+aF3w$B5V zbXhe-W9#0|CKG3vBqm%wWi#<4Hmd_?km57K5nkfx)Cx1A-Mb9N-*PyeT5f6cg1|A z4SU>C+7EKOO}3c3{(j6XYdR_sj?;yl{Nt{T$vtRY7~-%6E7v*h{AD?PQf;M8>sAUK zb08WCN0)`%qRug%sfvSx04TC`5DE3-TM<-)9ar)H+@^-QD}DS`HC zrl*MAN~8!dHYZtluBN}I84O_1;A8s!mv5zSGc3rXk9q;;#Ri1wsPDVpc=!?s+i35r zF}0!mxX_B!51S3;i$+BuY}jQYO}sPDgf0vjfz^yAz!59gY@PBRDYq=#w_@h6XpOmJ z`(6(74aW6EVzuvAH2 zg?V+eoY_m81>l`YpfI1fOP5w{zn1^N37X+Wb8*qh>dwsJ^!As0r*|{N$;a_v4D5i6 z)%I?jp7+6un&p$l(b0sB*XeQPL@5!D#d)3B<}qsR240^BUo30#T@=Ii92}NI(-B2N zP<#>_=ZYA#Pe1A^pQ^)mt)qIH?10L&djoT1Fm~aUN3R5D(Bb7UsGvP@7$iN`zk6*v z0XJ{n`o5sB`H;;22>6i`nFE9#gd3$*6o`|B*B-gel6GIsKZ>x+7a()5HZMBu2b&Q? zQ{+a}G_B(oX)r*}6LpQJE)K%?$42VpkjxMeyS2u@I4KHBGTpxlNtq?EiscES{UB9E z#0&t;`+>33Ui;VY3#p zSZSSZszPz38!u(q<*pN4hUoVXDn30lhJoPt`3P}6a5K2Lf74=Jl|y;dU)Qih@Rz%s z+$`8atM=B|x`9Je-}$Eo@M#=36*2K7J6cyzRC||;E~gY^rE(J~h>K^eX(WV7g^52dCVv+kP6@9$Mxw6yPXEk9(a2GT@_DR6#B5$I949 zM1F#)$jzPozd^dW{~MXD_YZLWAAI-!ePd%`CH~L;-!j(!0(x^Wa}oc~{l7K-XUoaV zNzB2?Ld?O+L(IwcpI&wjp8xId-x(|0zcbE%GET04nr#2M76<2lWb8Z~|E0(C&%^)p z`k!9*f7)Ec>@3{>)p9Vi{fm^7n4Rl?$yxsktIkaPpO%}2?SJX8{V!c+Vm1yAVm2Ng zVzz(4^#Ah0#YW7<&hcNd{qyiY!0Z2s_x~Tf&d$cg#s0s+>nyCy9NhmCy`Br>r>3zo zFzbIqVcpy!v)W?4(NeFVw?Mf~&7dd76>Uyu-(YUP4p1opO_M-z18h43_10(wCABDxq?M7r1*fX+8VC>wDd~+4;Pkq4lSJ z;Kx8D7ED(4A3D5`&E2UV9^X&iuHG{0BX;*t6 zf26O(@!e2^{c1-S#$dT08f|MD!$Shq?}q)X&sp@D`H0I;H`~6S7vg`*xlf;7zgV+_ z?4W_)`x@zN*0b>M-rvM)5D;k7;>SV=H;}0}=|jCgb3I`)e4D;o*kt}@8Q@vXYd5x- z9G!ItKw6t~3o1%}WsuWjbL2QNe${_ypU&^?IJ)vp{ta^Rue5?^z~sj|lNDDOh;1%t zV*X~A@%%v$#LAvu-TqfCoMn5!O@Il?Kr-KtgB>VBxW@s@`}Uuj3JLgc9OQesB0m@abdpXqyiPIv z0b{9Z^m&^3`bb6fJh-VkO3-dyCIWixKS}ebm}NWXA)}P#4|npVy?g39G5e@u1ex)0 zb(7i{dlopYc(Gf~upfuEiE(eD9_X(AuG9iIoBN`#fB5naWVGU zi4^a-p78n@`9c+`<_~s9^As^a+4sZQasv@-pN1N&Z1+Z`KU~#N$r_AV2s^xqOdII}I@`HkZ-A0N(q~sGkn`IJeQR|< z@TFA>B{>m5PEI~_1)N<>pAOYh)T11Z{?0}DWJO^Nc2&_+Y~&jBTgntV_M{{=^f;c~ z|F%ye3~C(ee(}DN!=@q(fvvJ!5frIK^*CZ9VCH^U4)a@unKyv64J)QQPB-$_ zdan$w6l%7&ZecpuwlcICICOI!mKLiDzv>CEMI$sdr14`1*Ty2YEr6a3irS5?mZzcG znR_|X(b?HHVhm_%@7dz-iaQ5&;H{qhXv2Y&Y*=suWgE3|=7#^cjP5fs$Rw(JbwU`3 zsNThcME>ZOzBUFAFHVPAOyka?5yGx-{FC%gL^he|p%;LFdqh zCa`_Vb3(4f<=y2gkU7tZ9)5hhR`oO6)2P($;%CY^E=zBlszZ}^cj^N+1;ct;EC^d= zrS`C}G||LcMkBF23HjrmCf+jFX15LV4q^imnH@lwp%!(4u2wC8@dmGfR-k{$#LZNEJa zo~#~GHm3DOHPnlJifSKOpK7kjO25jgtk7Mo84h2q>4u|YbQ%*;*}Jv%h#U zn$11B^>t*NZLJ=<=vek0U}jpY$Y#U77^b#ug8!Gb~YM%$%dg?a6<-#U98(mJVJYFx!N zAA6}~F*G{`&NOo9s`g$}!W*S*ad$`6C)+((5Di?brV*y;a<`}S_~yJ$ap0J=Ne+^7 z-Y$!V*5Wd`yeiW(AGpxy^c5Y6*~d+R3OKCE&4rdLT0yayN=hIwp?WVz9j&?z!@FUnzy?j^3p5j*KyX1$99Qv% zIqFG_9u>eT1bsGc9bRazkqszGzL{RhQRlBJUt1xhTwQ6u|9xZ}YCOybO;y{LVAI(} zUIpzqZNtC)_Z)NGqD7;<-3?N6+UmxsdsDAE>jd%Ge!SS?-w*mYF@`_dRP1#P^d|-q zA`I2`)U(rr&E-8}7FsLr9qVb2DY%}vAa+`u>~G+d>g*X@?j>eAhl+T4SJ4`Knht+) z@MeMOl*W|ux9yiBQcJX@>ZqsWKO zdgj@q87Bv2ombNeAIBlw`m!GuhLlK9^~6Xw#v!^l%YT|DdwDx^HYYoJV=rwSR`-4z zJHv+#&z3w{;LOr&DaRR`v|vc=l+>~zTUp6(Tb|j-j@EKQxT_X(Z3SEQ(;m`4;7zpR zOq2CiTeJb4gUXXumw(1)tphgO%0=98ju6H$he8<&q*_%Y9Hna5(N-a`lq>sT*>*G( zb||54TAU42nsVa|E0UlNo7A#t`EJISmk24asGli!U^imvxQCqWly{{u;Vc@PDPfZ( z6Ug#u%;u*o@Fev@G9NpZ&*+zR$64_b#3uyxRB9=-q5X7C!)FQ}!}%ITK8~$(Q+AW9 z;1$%+5DX^_P}~Ok;tUDm$9Jew2Vt=Z=y6BKLUy(GlshjLdosHee-Sf{ri3BsTFZov z8jAg{z~uj^>_cR5kx@Eg4v;(zvA?9K4xiOeaK>W8L zUtS7m8su9~%0}~OS}yptxj78~sa4<=hT{_@2@H$Lk86m==s1dqBm?@i{(@>Rh9!<- z{1w)P?JiVBejE)}0IoWcLrie+!$?N1NmOE4x_FAYUPqR;XvUHP1!Y>-cFjQw~ zuaUuV)t@jf$W7Qh$T|Re@jv72KWP^r37sN8H2E`(MorYiJ4vkMu%G?Y~(2DePq zVN6HlE5tvcHAYk{i;#3Miw)y_$j$?nhAT|CAK2|0p?!a9uZHh%KD3#{aMqX%<8ms) z?Y<}1(&P?#_Wgu9&@*yDJir@T1q`b{VI1|5`-yOizvKWN?=DI4$qy|7N-+7zf|MUI zK&2reXrkBTVH6_OYp|hrL0j~LpxLY-uUSiXk{ya3F|TYOW@nE~XFu8w@kbnxcfTI+ z30Q>=B>Bh$((YpnO|nP&12UkQPz?(lzQywb$@!JM;(<^@hC|H&OK2suC*-^$&U|uy zk>A?@1Hc#zA51P_3_1aokmQosj_L)PpsxfJkkK%m@vq#}zmhs8bS7~7&wV9+BcTAO zik(luJ9IY8GwL;oUC}un3?8&6+BNBxOh;%|U#=6LMt>TRZ-@bq4a23Gbinlup^(tph3F>ZUN4KMgR+79-up<15g3r z4oQo06hy_#~#j%n-ao`}K^^y9?yo5MK0(tg#0gOW;L;u$hdV4Qt zQk47%1tu{ea$&zG0)v_Az;6ZjgP7_-Z}XVyP;VLc35GWNI%r4q`Z{bzvH}0bQbF0b zq*pHPI~_+`J`i`GX=yFLa;F-g1B0s;FQW0~$nr=tN{cQ@? zzoyLS9zRl~?GEL4EZ?&K>rJ=sslJan^v~cnGP~j}4)6%Tvyxs-IdT8n8Shlf)P~rY z&i`LS?s=4*wC-+{ouuwzR9-}XKPEF;`!I$Fu5km44ab=$?K7R9i7Z2IdowYDv|3d# zp+bhIfGGP8{(P~*S2=${LH@STqrgX{y-+ar4f$MiiJ_uj${+a=Wa(YyMr=FQhv{6P zB3qfO$U(Rl?~(CpD|J)ZQR0)}oS}lNcn9!rn($e|Q?8flW%P6S!@FdwvRY&q%1f%x zO^F{Il}L&&F%yQ~wtzM5I40+(;%Cc1Nq?95vZ5>95i}>OEzF(IobN=quRr7nEe0dz zCmBBoP!)wSK`#8ooTw~LO^*IT`YJe0^%tcGnldB_StgPg+pmKuxQV|+F@wB#F4|CAx$Puv6{Y zUlkpAq!`p&Q3TDN9s~W@a*k&8^Z(AK{h)3%}4Fi7JQ+10vr*&fhz_-j$6+ z{ZSqvmV^qwP#(RPPL#icy09K~mK2x9oJNTx{K4I!&OMekU(eyT(GUMK)!h40eo&7b zqs|mIOp#1cPLWQLPZ75uXTdDOEJCZns6nT|q(CbGB!|`kQQ%00!F#0KG)tWT{E$%Q zphSgn(x0f80C?SOw-9+{6!H*hWa!vyX(vZfNafE8A~RznsZY9puZUIU?~pgkB`p;r z5q})8UNraCUr8>Vr}F>uxytl4oBz0;rEBD+anSK+*y-%g*J3vnd4yHynTY7Om`+Bw zYgRDT>0EbOO}Zqs@LO~i7-%++G`mfrCOE(>8fh;cTU)(XTsdW9FRsjSR@*yo#@WBm z7j(AUOKt9OHnKI05iD!6miTCFy4#Dwr^C(tChg>DRlri|?XW1coKFuUv^JR+H5uOz zyBrXu{LAq35Q~!uea`lM=%L2uTENLf-?Y`)WJmN_^fn|5B5#EOj{gvoUc13%a2qa9 zw?MbkcpKCNuMGS-#7XS&mE&IEX4}0NJpmaQwE@u$F2wwz59|iJ)Q2B%WJbQv zIq^Dn;2C2Z?@XR5pH!bkKOvv}%aFpaFe5p;ekwG-zISV9 z4P4^8;74m)^8U>;=JUJAKel;=%}1{Ggx)gFaw#2m_63)7LFcLxHAW2x13K}toz+A8 zs@zm!+t#KVrg;H!K~lYz8eim93zJR{^xUX3?ml4* z_Xc{og#F@X0|LG9J?ZF_>mqu_V(YC@+TzULev-@8%a8B&9}YkJs<9roBmLO+1Py-X z2`aoEfleFz_R0$b2rgVRoMABYZNn|mIyhq+EzTLkonuid~4 zi_+8#yO8%ik+tX^sOzw4Cn5fz*Il!R_Mu@bGPolm(Mia)$d&iGKfN9HcmdvpV15k* z9(!3TeN?M3zI!TO@SSK{u2`4hRC_Y|V*(vbv(%gpXA}85DRr|$hA&|JP_x5C`$`>b zhJXxXq8|AAkD6qm2VJjgjTg2Li06%f7rGwAz|j6-Js@Kb6x1HrmOGRZO(2-Sj(EOj zh~Z7~s?Z+V!=!G|En#DFaJkUyMv{mS&ajCIJLV*m zZgA7Psr@=eHdCqcS>CaFj7*m8!#4l4XPYYgJ1CGgsqj?g?Do_-Az>~;Dzn|$)J|xi z3*Fx^WAlAw-`vsk3yq%u(X@m|NKNyA?M2J+&1mqB@rBC|(THwU3 z_KR=Xr|C%V;|;Mdf~+HCte{(|?*0D&Q9!Q0q>^s%;~L`!3??uU{Kr$rt%^!a#ngDF zg|Tp^WqM{{5z6mXX-+E9^_G}UDbe>+7Oq&LSTu{l^D>^9St7IGxoKs|EQO`Av_54C zy(Km*ky$#+2$sbuH4}R4rl3cBAKz@2$Clug|2~%s)A_834P=8@DOz$dUx4|+SeB=- z9LB>8I1i4fJ{DfW_G9o=>G-*)w(9o~3mXF81u2m$CafQ>m+Ke^*{UGo_jojfoI68ns$Za|#19?Zt(K1;d4bMRqwY zLQrf)#e;^6g$2oi7}H}JF7P}P-#RuQS8;7T> zmlM)oxBg!I72J9Lq3wA>l^*lp#h481`%8z7EF#MyOPIZX?a%uKR9 z1K&E`$!;C!S(K>PXIj%`IazCn13AsCiLy?J@~AvuE-NdGiZ3lO6`C-x)2B@>OzbLP z39e~V&HD-pS6s1oFEj6*HkFcj`S=Oia^e^s;fY_+S6;ru(UcJvM{AZTrfMQYIn8b_ z9>i#&;x)3EDs|}9aV7bMrOA5fY=7be$zUDma2Ca=^=zHmC=V|jQk`j1Kf@kl3udGb zh*gQgFNz}#0 z>VW^7V~)L$|0|p>F%w_Bx98_;(mR9mPREyZcet^x$joy^{I%Y#X~6joo5;g*)<)HM4*Q&DkYDh4E$@9|`j6*&c2Ht`J}eLN`@ z+bgEFt{m%sAT>8Pl~pa-e8Et&v%)cGYE_nht2uAfu+{5JD|6#2k_S(!x#P(}wS!XF zNBj?7E+Ul*TS!GHU8 zT;MZFuafbezKWvfO9ID}!t)RTX4V3!U=PmH&VTX}T`zUANskT8Q>L27e>CF`pU9`$ z?bzQ{#xEGJ{kXv_yMJbVj8sNU1l{;WQN_i$b=0xJdz|0>u`$U44-nr{l3rA6vbOu?F1-J|fvH)xFtdaIHap*CdvKRUK?*FO zC_M$y!tHp%CJHw|IGBTTGp;Ll^rtrp)WR1^dE9eziHX(tIoS|X1;KnuX{r+ZHBtLKfPzlU5iH?yRPJd z2XNKb29Ag`aYmW&7|o|c6WuA?=#Dd3A=#?QiezInX1#%p));k|e4L#P(SbvcVmdlH zLERZRupK?r$_)^~Myos7*bJ-ISB5OBldo1t2!y>759pdd`lVv@{^(ZstQ&Lfirt9IMO(`_V>fySJt08j=g`nFYxPb_*I*=O<@d^th6@XpLI68ZSpb*Ck}rI;tQSv%76^?EHP9)tAax&*z>yo=?68=QedJB+e0I=?e; z$Zg}Pcq6|W=o;_J%Xg|%vg%Siy(d0D^7$(&p7jLaRdcl8{6qS;fmX?3Ha#;-EX|OTU6RpZ$xJY$_e58+9{tE*(uyj+50Z|4*lWMEusBjK z9dN8$WCI4LM@C>RpjD7MRZ%buw$my+ni;8eMn=Z)-wa5079e&&a>;~j9*3w%yD&OC z%bBJ(@|0euj_hPB9?R4w@SX1DKP>OrDj1E@QhfB3I`BDL>Y$io#9bx|w(plW*%M(3 ztlgd|$Hm2-8s6w+F}~2=XS+(BMoUI?t6b>FO7K6KG&o+6B%Q^XE;|#ogR<7zi?Y+B zkHtB%>`_b<^-0e3G-rZ#N_@Im7m-m`AWSV@F{EbQ=^I;4`eybJFzrfTf;K=$!kPj2N~2p2@#?aA{f1#hdw426vH&_i_?s z!zIB}IbFR=SPv$!3QxPk!IYs45A&bt^Y;dYw$4x`hQt=%O&hwD2?xZRgs zW>S9p%W3Qxx)DEfZFjrlhxHhS;m58=-Kll)Jts-ebd(Qp`*GR%hUXvPepVxWPNGcF zDV`FXxcFo-_<1fqJ}!+|uOCDS>punn^7RP4gy9Z3@2^&7P#}$@a#;!Qq znd+W4)LNLE6+J&v>wjo)c}!vMg=G*TFHyWuucZ%{n`xKvN;r`wHX~rMB{@vOLtxAO#@w#w) zu1TA$*$MiiUASmm%~1#IC5)1XpQkRH@mxUr9%m(!Ov< zp0*}m{NS@kl*OK>5Wdd`F!@`H6HmS&!lOHFBSD3AlDpjP_EEXMmDc)*S z($c*2>^POEi;X^(D!vhIHmD?nIp$cF__i_Lpi;#-GWe%eE#6PVr5s3yEU10N9G|JT z8#;x$$Kvg1(b@6Cz@|sQo?#t;FEU1VxLzM+ogFn-H5d4FakJxnqJ)Y+C8A1=?-29y zkH6bxp8Ren(Yu{P7Zx{UP<~00P8#8#Gfc(b?Q-*@H99s{ZAu?rm^~yr(WI506Ru|& z)8#CDyVFD(p6rakN3A+;K$>`~FS7e6!=WMS{ecaVVKVhnY)D7Ko4C?jS>@b z;7KL{X0wDLo9qiocG$;bUOX>hn6M$Jz28?=-D;gSFthvmJ$0$v>ZP}&)=nQ((dGUUvd-j#_tK1KPh56yy=3*DI9Ff95Dz#_WExF_0JI7WZ z{K2uvSgF+}BPDW)%-1$r=-S@n2@XtlWvi_LnHbQkuY{6zn@PFqffw(8;N^!06h?_53F zJ-Z{i;ckE{)TbD@lPJ;&i9B}KGg6nugnc;+ud^fNT^?6Z4bB=%; zd#O-JBsiV%FT6XRgXo*+2?A^Y3s$1j8wny7o8dDsXEg32*ILnIp|GX@?BlyiKjb zf0&EcD5KvBZ?$$0#ea;#pJjY8cy}AG{R`nBIzZk(Fy(M4O>hnx2`Zn%o&svzjcO1Q z@uJd8W{}94lN6kZYTKimK|9VT_X{9%=&E?#g%&Dk!90C!$>+RtAqM8EA~Elg>pD(Y zNhvK-XNGPW96Y@#om_vik0f zbM-5~cLfl6VO`|&z*1Tfb~>t$1%NVcDoQ=_1Qm;UvLFQRE2PK{%|uj)gG&)nJ_P_dfS!DwwkG{d(nevmUl6U`u9!`U*M5nC zvka54QgPAfNDDuKkwGSv2GiY1aCz-Vr!YC!$#;3@V&h173OK42=G28fJ54+gFr7{ zjlp2d#PKHH*AroW8cJ$@<1RW-3`3=+u*VZKOXJMXHBO5``ZIr@3qvs!m72pIZ^SHJ z7m6}r^vC_5>T&e5B}WZwUW`<0*VVQTKf-!PMn`<;4`5CRGe!_I)&XyF7I+iL_yUmY zeU0OKUx!=|xDMqWFVq`y|Du?&0#hT-mL7WH-ZOuHZuJ0oKmGgzZ0#*;*W^HDPq)?D zwR@nvw`jxM=YH`>rQ_cJ{^*&Xf2!Jb?~6~2+`T24pE$jG-F;gU`H8z>JOh6C8K7Nu zkd>s)Gk~Mr4LBOOxeJIIgbn!=psjxRuHP+da>cJS@{Psh3i1cCnKa*BE(F2^N zXY8^#gGPfDU++FPQ2SFVt__fxVB2*jRD6TdN;V@+` zyxQa%0)ZvSP$PxnsnA^R^oGpU7K>zE3cRYY`xE|heR&{`ubuqFzBWs4C~FS;J*st- zr1nEK)6sID}wl*MO@fvVA%%j?u!&NshYITnRWL< zv!06xfJaGm) zP>&b0UmIJSx7zbVgV<#K9FGrS1NY}aQWp>-h#iG#kC8I!XRy;niZi)uiJC&QCGshvwe8f9ZBXLH1Shx z8mDcUXcQ)%{E}s69d>Z`6Iz38-tyJT_hzxbs&zC$&|1|jj=iS_P>{MXmlHEUm;Wcg zB7>}Z7V%+EA{HbFfk>M(X$yEE3b-TA1Z&`5Dyk3}!^mQjj7;mC)Hd=lPKcR=35bxB zBCu`n1KN5s>%{|Nyg_^MIXsN{1`Ig~QU^2@LrT})#BalY38M}g5c-Q!BM3TMrjbAZX`8`T7gcK9i2j=qHO|Pg z=zCc-3#WsnuuGK9#(TmusHtfEod?A~;`z7-D|$ve+da6_Don^l_P-kjn^X2+r$?`M6?$vioyomriU*cHTaHVuL!+ zlkDDA$*5>GEuw7w+YYtwe|&rNo$a0LTg;{I+@`owp%RN#ic)*8f8b_z=;)xYCEOj> z**rFdg)usPcCSMlT6b>iuQk4myS3Pog}KC?*WM%$@-PxcI+4c&sZ#Ei%h+~6BqG>- zMJb}ET5@iJOxA0M$r*I8C}+ynepRKwRig`V3q_Hv3Z?dL*eN#_ge(b);?f8Fil&l% zK)*P2@Xo8A8j95h1D6^i*`1%*5*sS5_R;3B!x=JDGJDeRPugXkRjb(Gu7@@RYkyUT z`!eQarp1xlkxQ+L>Ciu%cY>&m)cY`<1lNe*aKkG_Ma|&t^n?G9#K-J96Ws*wj|u5a0Xc&H28QU)dR~ z_T>yx64$BJOqNaW+-J#DGugpbMlO|!iKi`IlhWw6sEQw%K6dW;ySo%7he2ubn%fdU z?LPAedrR5xWSo?hs{jL}!~}T~IgT8i+B>o40CXM6{2IhIgH22Y0u#FDun9y0^15Tl z1QJ2*Mfy-F`+wTH|1G_nU3VEQwt|$<{vx%xir8>ZMFAT=)$@3nOE948pMB+OcLx68 zSp-aV)33e4wF`i!5eR6R0V z%I8nnWE!TGN$qH<|0=c7*@3vh=BGrsPOb8!SEmFl%Q+vwQbORQ#LzmhojYG-_T#xYDdNg z#8RSCUQFceK_iuot&SQDfr2;EQK%G{8dpe7GPEi+fe%RkQnU%)Ir0j~ z8LkTH!}yVH@Oz+zlad&}6!Ojf-r7Fi)4O{h zsRaHD#|VkMZS%3-6H_PKyKej21Baj7lYATBGLbAL%ozG^JX#p<@o0@&vBqsSI1NgL z$*Ar)_1%x0`2N}c-s4Yga~-(d*EyB|UdVjyUF@?UPIMqg1eq9Ah!qBjWY|y5p!TVj zl7*R>8%A?pDkfQ1Ma9+|y=fQ$u4HDfWUg=%0@RKDO;ftT>0-0*nd8k3hr`#XvCk3` z3PcgJ$7l^$WM7tYuH?%yTiWMK+3CYtEeTE?@>Nf)3G@eHp8b~Hs}+kSVzs|LGRnJz z*?8TFV=wc5U~tQWTjFx1+#CRUYQFZS=mbDy5;=+7Ivoz{eF+Fhf!cZ)yic#u>kE5k zK(a<7tGWu*k;7XF@+OdCP46hL?{`4J%B6T4SaVUjip^Ghtg3Ph%%_cea2WZ*_F3 z9FI6B5=q3k#@`WIRut9E4{nJOVyRRnl>%?Qaze;UOY$6f3eg}>Ax};}_Sm7Ppp-72 zo|q_a+69jeX%BTr=oxH#QCfDD4}sTd!1K;t%ALFW^i%8?fWW3d)_-`{X(%!EIrg^7 z{>mnHb)}o8B1BRFGv&4_3FVtC`}@bh$IqKfoFuPuGVJC>n&!*!8KK0*#n=Kr=EZR& zUbG7Yfi5F3{rgq{_p;i+4_Gupv|c=;4hhM4c4saL1ohX>5GUDu)FdZRi8U2$WbZpa zx*^z{-#Z6+$us7IkMD`g3^4zq)_OD^4N}O&B~V2+m&Vv(c+lPNH6(zp zJX2sp6YLP>Re)7Y?<-IA2cYL1E9Lv39a2@cvl3&?6_DI?H~Y*&0U8-!Plp(D zvD?2q$@svYQ{XS60S?xrgVy*V#mLuD<{f6lIL~1&Q2nJGX7Bw2TTfR#o)w722X&G> zXF+)UC7%R({W$W;=?5Oz_1JFc2M>>pbyYXONVem*aa>P^a@1r zKg{*Hv43C6)Aigxz;Q`?lIN2C0+ZaCt5-3AprzjE^4>0e1ExkOm-22m!ZH5?+#DAp z;e_NVM2oCLM$WHBE@R(CD8vbDaorjh)Qy&8bPZdD7T8g8MSBPJT;5Ic?BGd(gAI|*@`j*drf2Fb&-m#8Rs6}YFqo!%OQ@j}|ySvbiGs7;Y% z-!W)OyGmQxjqqTgKjtdHu+hh6Ds?~`VFMOV#_O=w2Ri*$dUZkVy>oYoWS#m%NQ&Zf424R2n+| zir}{VA3CDof2<>j(-PVjjp(l%Iy&3J&7s5dAtQtk8APo>uTLPi6s2Qh$%NBM^O$T; zBsvblH2K1bi6lJHU+lP*18I6sX@cDX4=VjhZwD8j``MOC6JiZTo`a2#&j38N=6wP9 zezmv(u&A}+vMDJg0DQwP+dto9hw7obIV1jXZdtxqZ|4OBPC9=8`6y(b6X%O%=v8Es zUN4fJoRsx%%pfp!8&tBP8MIWSCaOg?!?v{XgQyfIUv519NZd(%=^eQAcB<&)#*=>Z|MbJ?`r?SiHmB3hs zPze;1wAPbovB63p_VzM7o%aCEwEicQ3T{+VQ9luCe&YjkH9w7#7BRyoN70}12#Xn@ z|DQxYI^ES}8G?n5i-E0M|_ z4Y4dZxR~$1*QyA>+D3|XS9F#8fmqIq1(MJ0!Vx@sH=h$V6<-%7MU83G8+YeRiRk@F zPRu~8^R_{Nb?w{B+Xt+rgjgS}r8Jjb&_RFYwo#@(0L3e&fp|@BgAM+}C~fDzt698e zHCO08)hv&dt)>WN&09^AtV`7`4{nXg6@X~iZC)*Kgd&x{BfM^T6+m#rT6+}#68;OM z3mHNtP(+xBA66#Cc&oRZDL-Gvon^HA`=82ClMI#pbPRQjp{6l(>`yQ0QKKG3^eR23 z)a$pm;(zI2!>(xWv%MJ7i}t?MT2^jBRrr<{i>_g=n>Mj+y!-0-ILL^(<|OP>f?vm9 z<=(g^o3*&E`NDJ=z5cc5F5S`lVlPJYqRN#nzisXY=3Rfh-sJ)7pT=)(Dl0miKpv~@haBXBrFVkk?zp9*A z6KOm4?c=xp&EACCopeSLEfH@xzw?37aMg`k)%x1^Mh5(?e$7aUY4vN{+3qQeQ!AR> zS{O>|@a@UCsnb1l`&dM;koydFKPJKbecL*EkFQVrikouoj(pl^98R?F2zYl64BfFV zMoFW!zpx|bNWr;k*c8ssu8$=#QtNd&ROzgdNpO{Wpb0PHzeGAgUNCVX?HHNCwoM}n z1=0`8&+?!h8EqX%ca1m*Zx4I`U93DfgRU-8-U{_+oz|ztv@_R!_l!o*YE6G3hvD3K ziDglCvafAzi`C!L zvpP_((g#cZY=|}ooz9SnT2!Zx9R9-gD6P@SRYs@Y!RW;rqsEY4Q&{gwJC#FcpWbo& z+51W=Ut4&hZhp1bR+Um~c4u1;tVt_9EdiJ{+zE22m&wCO8o8;ylamJ7R5}cZ{q`wG z*xbA`s9Ka3V`X1IN8Y)uiQHx&ehLn5zPq81&)dPch!QrM(gJ(LU#|Nvt#2NCtG^2z zZw1=j+<&e!X$-mP+Pmn-$st6KxRJj4{y78oeZ+?7vF(sVp?6#?npFe*&A)27I=549 z(Lc>!lduFW%xR#l!7kaUh4-!PTDz{jbKTmG#+~D*C?&j4u!dx%t#zQ%Uf?B!p2B}z zpGV$4O>x&~3b!yP3qkutQed+q$lnV4us~W0QXz*U%_sF;0XRXCMpR8&bz!jF z{G5SmQBmwEclF0w2V#{uRfB!(#sr1rG<;#(UkPFX@uSJ+6EA05eWT%A@2tAm4qO{WH34LC7%Y=N>b zJ~Jovy(1&WNQXbt70`C7e;|Q0a+e^1uogn@;M@S>0QUUHDH^SEb}g&hH4W@K zZ1xQ_AkuIE3HvMSFu#vKyfL4kzn)KM0QR}D>-dCupa3v-0FS_4dy^ml77KRN9_Oa4 zkI&yxYbi>V<;DhCThqwRGCOKl`hy$TQA-dVr)KUr@$|9Q&Qs6aaq{V-t+iQwdaSz@ z1Pw!KZC7D!iv@l2)@L6m_ue^k+pXWbzua@@%$eRpqw&!2p%QqHg@z8nIi=d;1OnFB zJg4Mtq3Ux=XIGk28c+>?*g2)^d`0t|(h}ldZcYgV%WZ+4&JI^Y56mI1pAi@w8cT3J ztasI+zO*?7=ajZ*Q>&wT^y-Q4pJSEIxU;skvE%x+x>D>9c7}A-a~Dn&_K&8Nu$%Gg zzJc_bJ-o$Wm$~`9TLgwbJeOLZyZ;Ls6amkEiSlIYA1jXoR$CbGaZyj`uWHo3!PqDcb72Yqo1 z1@muTM|Ad;#l~?c-NPtxv6PbO;d37buDyx91=c=*yiv~axg(YZlL|iscw79vucvB@T)995-z0+v*=)`)HUSaX-oMD&E9W8F+Wkh~~Jc2xe zeER!HD;7s4k*(M&qz5^S^-cRi+B?p2ELdq)9_*Ry(JGbNo=Kv52C3cwAGm8P+K%_P zZavT+cq2ae#%LV8He_CB4wN^%)n7fUgcEaS_Cd&pr5qpT3Tm0Tod9r0IaezJq}T|~ z!Lea}D(N-o9aMbEB3gdv2wRA#enkm14Ua8DhRH8rB8k!yhoy&-cfUqY(n{$*ZJ^-y zwuKC4n-rJOPz;r3F{~`a+`60dI*-z9%I*LBo?VY@Pb@`;Z63W`(ICY2p`=wYgPv(| z#ab=d&=T_NTv_^k`&mCp==zxAEu-<~NvJtW3^c%a?GM<)#CfC*`4n%ZY%_7LKQ z)$9vKd93c1W3a5`W$p6%2|1LnkxjvYv!a-{4&a$z;(}UcHhm?{m;8XsihY3Z;G_KF ziL;f&hdoSdyh1JGCVqnKz``m795;io(<1UzG@)Gp`U>>}4lB|{oD?f5d_EMTFJYIn{-^S1^#;7@!fVN0F|ouc(2L=fi^PtFl%~qjKRR(mM%1yWN7S30HyM>+E(9-!J*Hru!wDNlM zZ%Zrc4Xdwv@gX680#<8b8P{Ot-GDGbCusS>CMi%_-%?VgH2&+#6nzxB9X}gTO7ZK> z;5C~_)X*@I8?>A`q0e;(c%LGAj`JzgyiXxhb)O>o2C%XN$leRiu3@gpav*&`abSG> zfC9G;LAB^j!CuP+zjX|@S`@phY^94$v8}C5~A zBWYy6z=`MpKQiqELQraB0|A;e5BN;=I?Md&W_}MgAkAD2U2y}SUTPA0!B)tY{M=0R zi;xhyLnf;HxzVj3sk%AX04d@ILU!cqv(TRAn!vsX_h5|^HAz2LK)}{CRnQFA-o>ZD zs-wt|z@$}f=L~k=wB9XpduOomB8?Q?!2vgI8K4C+2e;K(Ouw1i9BF-K!3PA=A#Oa{ zt*NA3-&p6s@F^50iQ3zwI?&gW>tobp?QM}5r9sFK!KW&{AQJrpF1IBZf5JlHPm&6? zLGj+N)Or~~%JeEcpmQliu-{2a)w0 zzmh6a;2h%nhKaXMm6_;BQet+Cb;}>x38yKwE z`4d(Oe~NZF=8(i3Qt9mfn#0xZrYShdNI_U-G6*XKSxL>_ObfpeQYEw*7to%M1C$fU zSsvwQ5fAndVnTdaxG0UAzzZ?bvKeegk$V6xZlDv0*Gp3~*u|m(Dbk?VB@DjG0k8{i^`(H z;v6tj+5u{i7)A!`#o9UL;%p2)R2eQ@w&Z8Q%H25cLGoIL37|~2Y4;~&mM4z0oeUez4ud&2 z+WIkx+N`lVY&NjE&0uwxMKXjz@<{dqFGIo?)$z+Gtx+-b97jeZP}J3AMzm^;{$_jELq0b7-Jio zKnM^6;aVVO!;&xDwj~<@PGEx}7Y@mWAK8QrNp|yn1hP&zLP!9QzN(%XNfw4c*p+^o zo~{{nzk2oR|Bin3$`Y+cl2`}74Xu=&NE0>3%tT-uoG55GaO0J|eIT+KzMRaLIGFzoV8J#H&*G(^=Q@vh$PW^G>I&pFF=Z+Xpc@b3G@n$pivL9Qo!K|nAkQAI&HO7FRZm>tQ-xgXAKrL#VK_T zQ_2~Qc#0P>JdP&t47ke^i8_DDKE_4KKXS*nP8*WXRkR)w3$7 zQn?E^=Z2Raa_^WNF{J3vymltvQm%VdGP<>L{7_!3|97(-avw`_K!41~lN@Ned6&R!;FlpG##ElHD1LQmGVqL4*`EM;iT+&Kid?SmOz_TUAwdZ?`bU`Z;S2*m8Ao}_%~wYkRr zjOm;n)FO(%^)ZTNS|O1~I^-mxRgbiY_atEeC-8G!q|aH-a#$Gl-CwduKB;gPbIt=< zWX8o8?V8_z`Fww*Z+HKK9rHuq=j}x&aK8+~C)>7>;SDM2NG|JYJfiQCEluJQjgUL3ey==O{WIDgW zbdDHL^6=vZQ_JcmOEieA)3RJB^)6G|ms;0USW#z{4JKR8>#RYX#s<^uk%e(qw2E)x zhZ;Cd5Wdc!X@EGJOs5uZ?!xwxH*&i_P_mcsTzgHnZ-b*=D9MFV%C#H9^7)=9e2l7mT~aV7@>;ZR3-1|C|SCz za)&H=Dlp6)5~V|wY~IP0udtPOo|;$Fo>AqHC9XI(oC-*VXI+r@Hdnb+h*~Ah zI-|8dUp&+}t0C+!EX%p-;ts@OT1nBiaMls@M4P&sBINE=SK7jB1eL?7H)|+C)R^ri ztI-r`&BfaiCMB=t?Jm7Ztx#!Ic7xe!l)|mD)o~KMKpiGBM0}L+xk7kEw0ePeZ4EtO z;UCa%jXlJaB>%LW<9+Jvsb5VqTx(_+7t;)ok_C&~zza&X*&EbfbU{I*)@WMrM8)_s ztU;&StR9Ptq8J354u@CGGSr&2A7R^KI}#*?0=;aKifl)>Dv1DY(G~_-uX|%S;>aG+znrX653kXT)#Dm~{-n7}RQzkdmHuhyqtVu?B zHWIbeI+6U#CTqn;&8O4;=KegGkENbOFU5bWt@C@q(#pAZP=a0>c9ye~{VR^wEY z_c0W23OSu26Gv`VC@vy7DL@$@!)nb^F^+2`QJj_sFH2@Eny)0SVvD!A-Cz@|qDdTi zfG?0AB5K=-#ZS0BRZ=QtNMKWx_ZV~y1|`$fWN5+mn#dS(L#ZYMX$ue8Mk*aBm4lot zm35xz&CDK!KDl$#-ja7Y$>B>~h>(Ig|PGJiHF^Cq?w1 zB)s7FI3|Pj#8gZMyfG&6DV`e#Zz5gi<0C^po^lL{le$jk(DPHnJS_qPDsh$&Q;D;L zf@P+d${RQ_47{3#QX=S!nK`(41&M2bhmsPc@4Ar&#(2;dHFIQNA0BvDAZ7&ALHGwA zIYD?UrpF(rS=1|~ng^N>IDt_>nq|Izm}V)|sNw&DBjw;-cq8hehiJqW(L19`C1*k+ zFy@h1$uJI{D5yBeK4fHtA$A+NgJ}79SvMK>%5jt0X@R1t%3t6{y3pv2NRXQIGmg~9 z0^X2WMGwQZkd?w-Z^Qs-D2i$Y8kk2QG&!_1WOZWk6aeiwghmNA32RA$d{)lWus)?54gob)BEe*ID-o*V*0B z+gsn*+nX<5LnXTEa$&&Z1c38h*`h- zam9$J$o*7G=($z}_$}ZpF_$}P;eb`;stJdyU2010$;s>F1!*9s6ed7eLjUwtUCJ<@bQ!_0b zatbGBQ1>0q6mff^Cf3T@YhtmigH<`Rp}BhplQ%}3RN z()zDJuo@q+U9e~3UGPm1M%-tQv1AX?a=KJ(b&}!VY+bad(7L$4aA>g5GO()9f^l_I z@i4SghX^0xeTJ}t50E0zg1-?oLBj2$Mi+k#(Skhtv#*?aMV3Znoas!GpfawUTB{%x<6Ib1Fa zUtOEi)l|dDnJ(#|&RVk|M+&yEJ(7^<{-sM6Q=}ykaD}Zrxp6bJ4!`>IpKZh{Qi(`9 z1rERZOK|u(wWLABD5-xd_9J?>Oz6O$6+c4!@jOFlp@cYNg~(?_{MmF2@rOv2*z@u< z_LR}*9PWTWws&;4B4kMh?P03`6RmNBG1eMK>jl`BV&JUW0Z7?M#N@OZ67^t>t49?4vIV=e zU1&C+&d4O6Z^BGk-(;|4bi?OZqt|LfO3AFU#og|>jVo?u4L+;QYg7Ugz>Td{2_#dXl9ZWX*C#CUi03^|-SAAosCpATI35)BDI?bD z>7)mkF5qS0S9KfOKVK|{F;vyN>NI~)y^s>@%HWxo&`vS;FkZI6&X9Y-cw`oio7J}2 zv<5f!5nG70NWd^+rDt~=3^YI0lpTCluSKFB@RyBDL_{l~Xuebiw!1mBw$N{^DIX&{^v7JnzLLk+Ni|adCeGmz&ek&AzOE4drZe zu|jrI7eZ0Nxy)$iEK~PnGW8b#r*JO~qsca0lFZwoj^EC-Z?2JHx5_5yYrZ>`+Khvb z5WN(Cqp`uWcGbMj(qL2vh?1iQ*Ol*yuQM|h>xy=W5CAErGb3Od>d}>BtBNqX$Nu!_><6H*g3c(^2bow$!lA4~-WbvwE&v(6bmu!2O8_Z#~UD z$omhEJRigU;697Ij#_^Nel9BIsCq@1#Iw)q=-Y4C^%Fg3ILz&3%A>=|;}UY+dPw|A zGOA1OTaI8FoXA!t>iP-KdTf9!`zYlG%w_@B{(h^S?f|8QYBS1jH6p&Ks)5l!6e-FBc?R&is0#`D| z4f7L{WP!Cv`xgp99R_UT>W2p!zAiVVM=cU1DftJBT1aqYL%x-B&SHv#_ONZ&dDy-zvA5_av94qlOe5Eof^wv8sx2bRPfpAChtbcYKQ9=+*#8U7o7js7 zV?Jq->w{EpFyPPdSb}EvD2XC)VVxZan+>|Jf?7p7yTVd7c=@#x|2fhZ#Amz^U52L$ zoOR^nD*Veaa)S?Jqo#IvJ#lf9`7_*rK@Tp{El_3}98relLO#Ts=fz8HipG9jf-$)l z^wAJV6Z`4(#GxmO;CbRCp7xS9?!EIJW(Bv#{cOB)B$P!?ov+Po`?h*@PhY30>r}@s zdsDN8NV%bQ385O&&+~o*P}AvtBO}Pq)^h$f|5FJFm_gO|wBIRUWPBKgMA$h&uV=aV zyHJ$ua9t+R%<>eoCUZDEGmcB)7nrQ5UB_}UIu5mKBa{-Q-Rbub0rH z;>q>bog0~d?+eQ=YG;{hUAtgqCzQGHz->J};;Z^p zQ=?0bz(mgC-79#9HyT8RqgoW)YOIk)4MdIzqlpy__xZ(06Ki|og@en--w>JHR~<{E zk1MzHiHDzR0DlRn5&0nm4Iu~B=DHR6a5Yq~kYlm2p*W(a4e=(}l%HuTItUh;p6>Bz zc*DO4IjO$!>R1ebT=6z?BR7V0=|Q8ps0HND!f|_HTEYBssY4(Yr%yVd_{Lh=7DuF97 z$|ip$Whw#x)d9)=@<(S{I4>rnccw0z;%vSzqv?R zfy(`lR4OKo6^oN~Tew%Yy7~3?Ub<%rBZ-TvJ_S^Xg{X}@pgX{bN|L1S{X|h7W>5B2 zud|e4k6P}_1SI#Fl;C;vSTLD(o`OPv$Ssv!&RA+*M`?*|N3a%*m_AVm1?`f5gL?RyxuGT(vp zi!RAQGzW0%FU4&BB(Nc)_Pu!};Qn91br)E>g83~B?wE5NYPBb6v)6=>k+DiL0&?qE zGy4mA_gz8D{2i5m%ZzK?`$IEV;?kwf`ht8L)x-g1p0(X3x9`h;eF0%#o-bR71cW+Y zv+w%q`n=8L>4d)KxI7eR&HNg-mej0soKKX{h@Hi~8KQba>+fYlT(xdmfYv)P9WBCG zPLG2)nfD`(i#Z5ufy6vKQflyD8_8~Y2|ikb`KCq;OB~<)#Wr75R+*9V;V!QU{cRd* zT%Q`dbiC4x836m6UD&p!dgQb6)}|ht^GN6tcNsFy*_7XPI2aa+^4omwuT_t3Rh9LV zU8WQGJ2NXtn+M@UA`58GKUHnHx7E|}i(S47A{%=T9q!uWlxKCWBZD*B; zHr=?mg?Bk@c5%;ndWZ%+%`m>##u>+t2}hbVRynGagMX(rby#H3>owP6AEpxOA3k(@ zo+4>8y>Q%eJ1dOW6L_o>VYCGJ%?65;y&|+?gPIF0=Zlm(IxD3IM#sge*%9@Y>fV-` z66NVv>TYIJFoHIzPc9r3`pulyt1J%YT}>~d&>XozAd2bdu^&m)+CdnKiBFikWVKdH zbsMd02ST@a;H8SaCIl`a{{sDqt1BoEM(3qbD+ffON5+d)92)H#e3q5QYU^wzo)Ut? ziuHznXh{qc4w4V8NcB7cJy|6_uGB1I*Xh4QX@EQ}6@T|byW3$0yvvN$$&p##5M<%Y zR6bNRsK8wDy@M&Q2BG@ZHH6;SjAK7^&$#x`iLTK8gakXSW#Y2~++E%)@X{ z<@rJFTHk|6E7kZuzWBc%^1Z$752JBAjN3n&!$NBWyQxjb9;!Mj_WYD7$e<6iTVX9Z zKvtN3hXTz8G9fzj_!Uvh=Z*=9w* zsVL>v&6n1@TNNz^F&iQ6ydi-*k-?FiaLiu?+udL@gqher8l18O1xK?t0L1M4Z&S%NOwMaV60CYXStrxI~kFeIagP zAx>2E-A$$V=>1_KEx65m>Mk;mVw1pCo0P85Y)TL=i7Wo>#|N*Ho#bD(>i|U)R80rd zq465Ga!eO>>NQJA9z*98;PZClt`9i>XRHgNHBO6_r0ahU_Hj2JM-QpGe*?+tJgIyG z4@S{LIG{hf!*X+nWQjytx2MS>a+~y$UU}n`K|vEvt{O}%l%N^eH0LZyRY@_qDRmeO z`EpX0H82E936cTKgUqcqH>_lmv6uI%4> zwpI}RUatdgCi^0Rl(SiB`6wW*lsHtSD3cj18zZ)85%M#|{Tq;z@KO z%zm(bxN2jN;2#0JhJZzHz~-+8suwCj5GjP?ptn?nnap;o97kRh;Ra**kZ&RrYO)oI zWJmdUrXi$ObyrVbr8P4wPrQkfsRVEz{L|$h#)XOUiwatp8L2TS4kMV2s0`wmSgPcJ zb-R`=<**~kFU7|cbH|NqM$h&zaCB@(BW{;WAu*!twf-M z`&u}uaeZ&Ft?tv`zcb@sv#aU8-)sLEUWZiQbdb{h+FM_m?qOnFJyUO)($@})sjU4x zPm>Jd-N8HX;EFp2&Y`ofKfX=zp=|2FJ5JC=JX7A&*+Ec)neY(e6)XH3$TlJnILVYc zv^!5dOoJ^`+olJ-C?B~Mh~!GD^6M9g9oqaXUwn`kl9{9EWnwTe2Z4e+3IPc(LMo)%ZXlb%`g7!K`2LPQEFZdX~PA-!k7EeF_KX9~IsMIlVQ{ zX+=z{Muv!^sbuqXtV`APjk(gqk!w(ywA-Hr1&eI8h7sE(z01nVzu19N*@~rX?$!vg zeI(ETZ@Y2MN?bXv7QuY)F4gFfwj1s#g zG`N(jj0e(cr-JVbXQk?n71DwSDvZGLyeE_4?b6{!C)PS<&eiW7G^RN$%w7Abznj@( zZ7u{Uiof)RxKN@4Qe1Ugsh0*T?RUk{wNI{O)xGWIfmIwj&P*(0Xx4b)E+nkVr@3aN&xpN-)6uFxf5S%;xtz6878c+d? zL)XS0*gdbwmf}*gt{jZ2Srdi)W;tsx}LIsBljUPxJ*}bo*M1ZRDu|L#AZ<}1X zxetH3w5mDxL$bDP>faw>+@06Gmc38AE@^-hr+(AFKjE9(3D)}A(Msh*cx;+r7MP~W zMiZuncSue90vU6?18=a=XtbelbA9Fq(kh=KYABVI3SP{4HVBO3>LjdF(p3D570$n3 zof^f5JVCv5x`ceXwCYnN`-`cv{_TCqer+>w0!#hZdb8sj2xog?Fn1?)CtI0|lVX2E zgL>bi=33zFY$oS6%Kl2Go13&Nvc+Ykt3d*T0oS6TJC7yICM8d-xDJ9oCNBcC0rNsP zXN<1H@oIygwxKOqeX=||H6^ONt?dXL`d?B`WGh;F(-FoKGT)9c1&YTmZfz98B7Ad2zgsCoFe6k(*n5LkQx@eBd^fc)P2lCS}<#l>x+hy`p9*sN24{ZbZPes+$=+cjljp+-l8e%IDYLo|J>aC z@>B}Bq1PEjs>sNIieDYkoko_6L|=)^zq1Ly(Q=LOj;8<2CRv0d)$*?Pbg|Bj&@D)De zOaH}otcuBHbIW;?drkQ4xvY|sRh18xx_H8dyJ*l1ax2nj?|jQcS&JE;ofycwlS+kI z?*u2=Tq8yg+)B8ips5&Hw~-LK-0%9ztJF3W_Eo8E%bcv;XfL=C$ZCOZx*=>H0YB(C z1T*9vNDS65+KQA>6s}beazcWEBonbXC^o3J=V-)uE4@3n6`hG`16cR?v!A@1e|dX( z1-taROx@w7!Y`UH@-JUb9k+s0zs*NiLBhpFVy694-PZI1ly>Tgd);+%`=9HOYPR|x zhBh|0a+g<{V;bFU6btAoyOPW{bEU)$hsEN`6gJAlDH%0g&qV}^)?Hz{ILU1JY>WwX zu5oZTflu(A?^&Om@7~sv`y~}K*8Dp~eSwVek@4fnsQdJh^jy}tw&MWo<(iFHZ*E=P z>@OT&4?*qZ>brfnp*6TS4>oPp}6SjLn&asDTLMBeOCGjikjnw&j;eY8@L9eZBExI-5`A!S^bK>f_2} zOi^rFX?AGLKg%0TOlGsxSnl8uih7qYe{@#4sV$~x{9I-MM=nu$$~O=~Hxy?g6Lvb< z)hF5x%S#loqc92U39hsj!+FY(Ybnb*&HB}>R2Id&HV&C!DT~n7kD=?*pJe21xy9|` zyt^{?#E-oF;$*Q(84fq@LN*4P{dvA=1)@X7N2R}yMY$AXn)l>FO6a8@<+RtSONUdQ zVp4a6yN+tE?+QkHaL1R$H%x04W))k8`1J2XgLe01aD=J@X0&a#V2*hY;@wO5P#9@C zSwd#irln{fd3e6nU%i<>7*<8Pf~IpTy%7i#d%iyoUMufjVZU>`7=i8fTj#2-hBx8; z!z2UU?vW60LBt}8$fU{KC5r|ph{!c#5CefEg+YM9fW+oX$i&k}&qc?Mxf+@itXZ~R zDZ<>GH12pDyvgE<^Nqq{vL_6e>P6Pm-SO6;`B&@Gnc=T0D{S+mqzK3pFRmsFa-mdI z)z!Pce-0Wv_;^o(pNu}r6zM_iDQ=1k$U zrJKS8llw9@2vaHQODT)Iq)pFecX3HE<0!RW7bmz$J)AG*?URv#Ju!veUZURS?p;i- zx2s$%)`p#6an54Lsos0QmAf9Wwf z<}724y3p(^`lFdWhIQ#dxV=qNhf$vuKCbvIVnN+T9?Q%~1X|CON+>PGjK-me)1{AZWpf)MEVsXCmVmEdS-6p=}BD` zyK?57EI&~+*+COi`b0fTI%2$Wfs&eC# zx^Y5MffBd3z~pY%+B4|cPMmOnhTr9-=Dc}frg)jqvccAtXCaoUtind3&`IPPhVFxrErxJ?mUJw!60M2 zsAG4o`;Di`nIm@~U>jcI7+eAzZH0W;r^np3YC8Bc&$ljK9;##gAPMZ=pHR06JBQX3 z(@lHe1uL=!as}%z_ZmcU`Rgyoa^-TUn(-?HIg~fvi2J=0x`+Z{;~%ZNpO}wWSd7@U z;6~HFTg8n>z4)*x4TD&qY3))Oq?Om02_VVXdg@L?%eShF*KuVcod_+kHdVI0sVj?k zn%bLf!p@?P>Hzi2;gx;A$4=V{pE~E9KV|48=V0rpOp@u%w?&4ez(Xa{QfG)8Pmzy` zoxq!F4?8y!vh;M`%?CfSJpN6lPn9o?G}JyXQYCv5o_kJc&(%~fFG_ynHhjQ8%q1kS zPe7<_Ig{0&jsgiLAdOcsPEsR(CWp@9|(CjG8 zkN%8~S+2NQCh6(!;&|ifa~0r>{3Exe?YW_ei=R6sYplv0QoiPN;8&8e1d$Xo*dPg? zMj*eZ+AmDQ(Het;*KB~f-9uHUzNFp`560;7Ttwm1;jC@E+M={$P??~#=%=gt@davf z7Br-)hAh$Dc$9}<)<90FOI5*e1YcGXUQ@ms$z^41h>;YzpzSnK2b>tM1w)7@IOg)A zzRqFx>tg>?AOEwoRn=^&2AbOMlj4h&Yo%hNWufF^ygz!$yTS5;gMoTR3obG`&ofGO z(Qb+{py_A@ZNx0;AmR@;pPP5+e6bB$<+v(dhmrgtH+rt6sQ@ZIXYrk7jDnm|kBQ_z zA&F|uaGbQ3XsH*k>#^aocn?TFNWI+rbs__TMV4gihBG9SClZrag7Jg3xO6Kq@SCb5 z*SMYbC0dVY?{i0|?x`-bng(jtnWL+X!x}?!QkzxhYhcWhYOyU z-KDCID*+_)sV}gtG3F`jnDLmqxWlTrTQJ$_LhG~~(s?^uKqRG;tcd%me2TS&mpF=7 zOdnd;ra9X^<+ZFFGuHJ!Y$RtfxCbdKxvA-2F$^^`OcrTJCwNjGcsuuRbBrT3SYu-= zm6m?1#t$cPj#DqDVyMJ(MermI3rA0hLkNo}h_`0x3;0&g*QuZt+J}rPb3Y|5r6q~= zW^0?trW6yaN08jX)P+l$EnfpKTSu|y7^eB&%=Eb*KDt(&&HDOebt6e% zJ)@Ep>%la?wYklk&VFD^JIi`$I@2ve*s(&~jCypEvoy^ndOefg$W~n2_>VG8Z6^_9 z>~bl^eGAW1w9RkqjWmzqNsMNh#UPlQ&wDdN?!&e2Wc4&~-n=Ra;0z2eqUJ=17yLdz)KXwV{l-Zpj!+H9-HgSgW~OLXK(L>IzHhReZM!m4dhibwSx08E z+4)G`_^(hCF}qgIR95U(u#2TAVuNZHB?0^*68sbr{G!28%6VjH#fEN7Gk=S1!@-qv z<8_;j_OONWbje1x+Q*905XHKnqjL0P398TBeL$B;^eJkK+pfgTGqH0#Iv3@*nC5Ym z@EaQ>s!@=AOMYIh^#B)RXJTebPGaUT z-&&oaS&5h%X_fBVGv*xg#8aM(wE$;K1=AT?nq064?M`}h{?FPO$WVt$kviT@CNE7+cm$oXuBFwlmd3 z0XDOlEVfq!Pfu3@&a|xB%Ps8d2J%W)oCnsU%o)+}?3w4BCT1nJ4YpM>(sI({O(v&P z;G7$?-c*{!+p*hnVhXkJ8#XB4_kV%cTUz_+az;MvAii2Y{(aa<+D&h3B%u1>@>L$a z%HSxQT71RSn+Dy0+<(lZmrogj&fxab^d+bnZT4QEPI@wMQ=k3RmKNj=@%^f-KBL>| zMouRH-CmE2rxjm$x6pq}6Bp^zd$|d#O+C?Hv!(aGh~S;iW@dqpRI&>{s+b#89D7o{8@GbXHJKa<`XU zuz6cEIwK(I%nP92UOJ{8q#jFD|4Y1jGnUpF5Hh1d8De#iC!B+fgYI~SQy)I$9!b^{ z6Im{rP^g~F?c^q#BR#IiIF?~GuCA1^mtYC@DtBhb;TPd7BL z!f_QTBh^Ay^P9S>^da)Rn6CQbdV-Mg`^$6np$^7b^;Yd@_sOg=^PSaF<_ z+;`{w4d6j{1no}N$-+s_N#y<*{Vp?QJf&vE)B1RCj%T)g;Ys#g24Da8Xew{_>aFC+ zYfNcPT&}aumTvoc!+RUQh}U#Ys=Th5-|;c8{+Z?J+u{~K%;~n5`Sr~fsn-DXYb@!z z{AcA|xzDp}!zM(W4-EFn*YB2wcS$#>1BAF(lvmOJrQd#UkDqVWJn!u1xS*WUUv!}! zoNkrA&;3~OdKFwpd|m#($h1(S_@CR|ef9$f@W(ak7gau`pZia4kUx;p@KM?S?fn04 z{7QO|`27$3`}x6|`0JIy8x}cJ^one6^x85_GMco9DPNhNF_~MX+VeL}Wt!LaD;a)NzUf zIlKO6quro?#eS+Ztdg0xc`ByyG_0yLPE@3*Qh(43N<`wi8ZN%18q_+%KLcAlQgdq`x>Dz{a5z)p?3Z(i)fIqtz zkbtN7(R+%Q)mu19U?geKVy61gGQh`J0H%I&jv?t%LdK=X28Z7#T;HDB_+Da zbjKFOD`fg0Y>{M6(w|HCCOgn{hi4>fMq4q^GlE>no+r)H11b*nOCR~jrI*hgj*^Gu zaTB(Q*?h>1@>kDqxEZ2GFDQs>_D5`DS0qK+g|J;XTNfnSsv*gdTNz0)mI<&d7nII= z7s}l=B8zl?Jq9Co>{=JT10KShP{7fVSi@={Z8uPz1 zgO$==h{0Y$^DoGGz$56H^bq-Cq*DqCL!l{nN@Ix}QRI9~Qjl(yF>lAA8Vv;Gkzhkd zTW!ZM2?8V5NJKS|0~PU6^y@}Ilsfn(xT5ySpP(hR-tzurO-hfzA2WNldOB4$bmu)gc z6F{IXh)5XgIU>dEKw;?z$xQRf&khP5xk=xv>ywMv^ng&t$O|s$p7;;qq(43~^_z~m zSiP$cz3FFu8Ww~_)}o#}lO3hHH|nMHN{|B}jk=gKsbRfmx8+_Q{3sxCe?(;4au`<- za#>|jqIzx4dZJ!Xv>&1kOp-vY`k*XQMHwbrO_Sv*FKVEN7X7)3{X`e37Un|^?844{ zotnHAsVDyI;m>g+9XJvq!NEW2qfhg*&Sd+Ap^319L$ z;492Hjr_??U?cV1B-Rru+w!ZpyT(itY(yc+CR_^7Q9Zrux>{%OLxr2aDe+jLT3l0BI=neC;Hu? zed>hSXE&v5YS@I%Lzu2_${$kXRssLm;4srFcV_twq4_hCyr0NM++k{feF@^z=&e*Oq ze-7(>#e~?CHXMt|^`*mUM{3XgRg!LzxTaTQwbFAP_`xg}W9JFu@HS=+!cdMg%5-+; zk-cQKc&-jX^N>VMk4seqFMuEhH}Mcb$XjN8Fa4BQL$ku27kM9Ek{Y3hzRUP;j9WsC#V-e zM7EXY(OYl%O`y>+9n>;`&f!R{$4<4s+dd+4tIp$iU1`=b?y@S^{P|Ydb&worhHtA` z^FlyJzltG^y#OXlR< zYYtjenjN(XTD^I*LHszN-Y7KTTeg2kkN|$_-DQvN1OJb_IznY$$Q;ucYT0s8%ruGp zJRk#q_&RBN&ESH8gZOg{=u0cQ^hv%_dKgvwU~y^JW9_6Jqp(BQV`^ zU54o*^d3zH@*mKQ4!vRVXRQx{q*-ljt?wq0g9?XHkbHsMhOf;}i;Vr-=iXJJdf|t@ zp-xaIID`nwIu35=IuBX63BC={-7dK`i7M8k2L!Cfj|>DkUP0wEO60vFoF z3@nvHh~daQf4zNs4o+z=1FLOGIr==|2Ti+|M!7h?b!vq7YZ$F0zxu98=G^ux#=eOB zO9+rSwXJZ62x3`J5ML$wjWPwXZXoduW4G1m*E;q7U18(HhKY9Y9v~37bW}uVU+w03 zDP6#}`UK4bx2#;IJt+i>)%Y#!qh){fIwR>5@5NWU7(x45mZ4oD*cr%PIKO219wVD? zo)g|jWTOI*a?I=}QBk_a9mGF`>K0iq9=Y)K6f=}P|H?Zfwdv5<}&UWA#XXIV*NxQ6++EF9)g2~b5)2MlYp;oc1OV(rk90s)54Tso&t z0lx14 zd4_Hnviy>S1N+>8ucD8-dVHfCVrBxIU?Xy68UEWx9ave)!`1=8@?lE#6UBRqN(eX= zHGEdVVtB%HP>)2JoQvbSx{-^VhPJ0(#Lb+66i46zl9@&A$8g~t9*O|Ufe^TsYY2$`Xh+G+E5`5zp z@~|H&C;v|Rq8EHQm{li&+5-(xJ-9rI>K{un!Z*zvVw%WQK63mZ5A?mi_U_Z0!lJ+p z4=?71wHwq}91rFVxgFLo6q}f!l;dx&NMHq7_bVOJG@!Njlq|e(zuo+ z++P<6aw7~Dbr;A4Bo%jH&}VpKt5+*h*cRk#?rMdY0EN~#$4~+5m};M>jBjrQIY>1{ zR+k!qS2(Y78*>7q9(A0Iq19(GF^4qAfBn!wI=A z5+1bh-T*O39~VqoU38z?j$*6M1ENU(ynByMaLn(~>jy3-K^Jkk1-l4=2-XwqWYzTBfYKKY_(DW?vkFs7>4tk1k#4mC z1d1q@A^G%i4)VtZ1q8G#P~I#Sv<8-^I|!s56sI&Da4m>bDmlf-$3CGya#)|}EaZ6W7!ZTtfnvdgVGAoAD&4d(QO|@3h#hZntMQ88LbZmo z&leK{cE&8f5|(m_WT~*RyBwBlO@vMuh&hr1Aq;3b-%i2G*}V`%@9okmF~Y+o0iSt; zTevQ$QBT+PFLf`84h>6Oe@}~I_gbsk2D|S(DE^D)tv(T>ZQFlKUXmNji|cySt{W?M z*H~Cc?-xeu3Vv?tkc}>n%`Vq=*Jqog&HXN2KDEtmLS+4UK|Ulozi}!mje!uC<_+!6 zgDa|=Dm$!heiyfPSk+?2w&6lFMSOC+8>p{;*=FiqY};L5(~1jA z3*C@XHucO=c@chz;hi+PxFwcbzYBTs{;R33`Ht(dQaH|ASzX@T0*Ijkt z&s%G^(dADz>Zl%RE)CLIge+W<;*{zL%Em8(K5FReY3>7b>%kRP*SO6hEK2{fAdJ%W zthLH3RBBx9fT$I6U4J)lx0Y`$h=l`LesrBxtSuj6C;0J<@Ak_h1;}`XRpSoh^kUNT zs^iv}L6e{ViJsu!^WT(dB1KD+_I8hdx!H@Ywb>hp?AZvzgt$w60ova_hQwO%ia=yW zAv7Wr7?ku5uM^0m6d2Wb7Yen`9t^{dKx9QBH2vs#HVhT&<^+jRh2;qWxYo{Xf`wBD z9fOl?yV+NZ6w{!Ag~l^<0=~paPkZdUKBr7Ka=mUm-zmv=zbd|+yFL>W9sC|=AD#-(R58e#Ef<;!-0+0cQ)@I*NCV@3A55$BG# znd#mTjKwk2$sR~4V0BIJX3q^?zlR=J;ha#7jcg6;u?3Z+g^ruinbj7TO+)9)w!43E-km{QZ6HuR3XHlio_+_GMw-2y#AI$}LCy%JwR zpD`+TeJ4sBdyK_wBdYhd{O!-0R2tw#Ef&e&ymU;bUz*`f5 z)dkG;!CPH{0Jgm@NcQlc41zoD`1U?<_M4!s-9W9~Fs=-vnvx_dh4w zX~(tqLALjS*CGJd5d`_4mjGU7NgOdxXQ4p1M+X6D^)7-yy#Xy(`=1Z^pJVN`!`f#9 z>-a&o5`gJ!gX<`O0*--~&;8GH+q$a$b|8N^W=-wM*P{DZLfg9l0f2iCfpmO(UEu6} zK6Dvc0MhtvEo-w!I;kJeWH?5Up3>I@>>;hNPGQE&u6#oPis} zP6X>7Gjar0N8ee4Xl(~tj`p{LvzG$_@B%L%11+WkFLL*~5ZLpB()R%A)BppvMJ8xi zM@pDdSNM8Wf8qxMaQ!cS_z*h8y)G#BJ~;N9h^^hwt=+J#IS@KCpaAuyJr+2c|AGe7 z+XQNl52Qote@?aY#Wk2-2dZ<;LYD~AdIy|}Zhr};)8&6I;y(e~Y6`SW z3A~I6wEUB{A36fwjwiBx7kKMe$SYpu9|F_)bSUk?EVF9=N>KY%;AN&A7+}CU@Ny~8 z@-_(I8=;i|R7Vg-;<`)Qb+GLkOz#3<3xP zT6UU!!`S&tf|(fIFH6322BHHAoC^70Fgk#ief-aXd)XmdF@XNd1^*G%%N`aji(B2| zq(yUjTjnLO4k3t+1kf_ue+d`bG9dh)U#6bwfKV`JLWu-*;rS zjeL5nYa#@AnQn&%zIA%%p=KU%)`%gad^>s@kDLv0HgSJ{W9O?`D$XOf^N!5bO*m^2 zFWTfau59r+Oef;a_3tb9Nuvnou&Q!u5OGCrIaG(v0 zJMnBZT{MMn1)72!xwP;|NZFR)n;@4IMcAOl#SJggQLUa&6N-4V{ELkkp1jAC+(O= zOgiEfDsTIJe=c;H{BRdD*KypDw7o)dxQ**H!bm0@$qnkV@;7mC?K*EK{|*%lA6fkH z5E{$4i`2yiQ&z>=$i%~QadEr!?3``oOAXOF`;*HfH98`dTMGuJk{qG})mur@y);YB zoIjE~jhx@OYVuI_li9XESa#8!==Go_DJM0LtGH>{D4epc;{^d@|_XBz(07Gp26DQR3Xe#@4J>(UJfcW-7 zc3}?oxqW0wYXwD4g?Q;G>w`%PA)w|V8s$Ly4A;o`Xb}iob7XPV$~d7Ws0Ez_nI*9< zg40KCD#*nk10>4X*VX$5PxwO00}?+^`h|?ph>8KZ){;_acov+ia@`2Yh51VCF4?Zp z`W&-|VKJ^Py^nvhutW-opGcxL#t)&eHD6zpj2}CzGMJLp76%{#;u%J$s360Oy>MiF zaoFaInQ>drLajtPJp7)+9Xq<_aBL7&YSj=Hb;SM#tPt?VvqRwxRjRC787V!4M5QJK z(=+DmQEHHCdI7yGW${Az#(PK)3zlXWP?&w-RwG%QwNXOo#>S!*<6%p^=HTRn$D}3{ zr$!!29z1}Jn0PDIC^aK;YAY!EKK);?JBKWa)$wplj6$;@H*MUKxY5yC1QxH62A28< z_6ISNm)4hb{P2`rTiihfl|z<8(i7)M#8d+y5JKS9y?Sd1ibOyb3$4Epxbl@)A?T%^ zSC~;Gjx8=^$eIANPD#b_?$I|%3v@6AcQol`omvz$)S8&^5GG7eRES_ThB2$CM1u!f z!+H7vDiPlWw5=6ZJYO&ZkK?x#K{3o&Y6YoG9SnV%H#x50C+)H)(iQa$YHDyG#!U1O zDLXGiK^i$t0#-ws(|Kla9}^9BK_4Io*l{vMn5JV9x=3s_(>bXJi>s)$nGqJ?G}VeI zI((dFv&2ERE{i1i5Uea|Ggb^)jyVMBgy}W<<{6e5^&0eQ(kbW$r{i!nf86<(?pd%O zSxg&TVFomENRHX;@yL&3l#Hr@vUhpd(`p@HxJ0BtE6iO+>IzBX9+u%@E2_!aW>`7{ zXM_I@W16ZbM}-1E&Mg-4i6o^l53Uua-X?RsVWgl(>HUS%Oob|BPjFR^HUVNm%bO*x zMo8$FR8AD5C8}=Rc`Fc(udU|Z5MZ48{FKxYGWGR`8X&tU44lrmz^XIC(X zH#C}^`fY%P7~_3DhjRG%*ZC<4f|VC1TGcGZO5X71_mqX@o#%0(qR_0PWZnh=&Rj)Y z9zoy@kZ?|mbK{Lx zT>#6BlLP4r=!eKyX6i&>6%ZyMX(cG+YrGW?9Wbfb^zCexWZKtxg-l@}EhE^pvmG7(uRQxgv-6e3H5 zNNn|MbYPy<$&oFNf35$H1YagPo=zgAgldo>lFqlc`1rO()o2Kg2l3=alV3SZdp4|F!d8o; zk}b1lARw_|eZjIffmpD5^YWtdqQ8#wN8=0(zAZ5S7bz(gZFPLd`>i=#YFPEP5MiC*tFFwGT1{bzw$9Q4nZRh+h7q#i{kU^rc0 zXuZZ1XUqd32A^{D6_xiS^zy|DQ_bOw*~pDr#7eSPTY41U9PEJ0Qds0B99>_Gm5O6i zrm`l1ej6N5(^9NREQ-!pMItFo&*0(cv5v!x{O{m!L^-%LfgOrHJp{F|TZtC;&dD%} z)BJU^$cIK)S$0!%)J{|tLUrY`*|2_}sm3}>(W@xyJ zp&k1?*M4>;D!{cG$OxFZzKWS3=C8Y~Qh*%#tz>^38x2T8mU_^m%Rva0AzTPygTFzo zEPZZ%l7=ohJR6mRig1vcZCroC1Jhyggdwy;gzTBSPR5>t)C3np{2sr_2LaFd-J5u^ zqkMSEy2C1lGo1Qgn|UF-;#SHN0VS#KWld^V-u`Yx9*JSIQM{YfcQN;^U!@aW%bqOH z1kt|p<B6hL#?cdW8Tc2kR6ZO?(`YkWfWtN?Z+bmv8?^2&>`( z@?*PuQz!p2jki(CFyA{(fv+MDObn|4Bc{dBXM|K=qhchBQtQ2LvkuoTXPGP9>kYp- z7IXe`&pFS8ce`h&9SM#m1eMMc-Z!0@=@U@ff*Y|)<`Vish%SnVj1AvpisVhf-CD63CUF_$F=xfOGS^GKnnrWy$+Ejd93+08o)Ht|Z69m@!&`5jI?l5$bassg}E+KYFly@EMC7 zV5vMlR8L!ZSVZ6!;H9-7#!sS-@#y<^k?1|ZCkG5RP_&rd^)Ghf&jTY$ziFt&V%O8e z`#6t-y6HLvL(*_~RXPL3q*iY5^x3@)MQ)B4F>JIxAY5;W*&h1;u=b8YngmgsXq(fv zZQI?`wr$(C`8KC*+qP}n#fpv$D=P&!t zp1}@+P=IqG?XNU24w4WcSUV|A%KS%#4Mw4wb>XKaMI0pPc9*VLjFy>Liz}gsP}m8e zImkIDKy~+mq7zxRK>L2XB9X=hv7R9K+5S%-ges&8aPrh*dL$9dF2Qb-OXut2VP;M2 z^Q%()O*+^$@yBF@{_wWwP#|*P2dn`Q$;cZS5fKvRFICVPpo!j1Akw3uTz-O%&A{f( zAB%O+fWy&A&|%?Q??kX6fj1Ihe6kvgAC2*$jzWTRf$XlLw0Mkex8+McOuF?2QbR+0 zkmvm~qlXN7@mz(J6US)TS%8$}-_a)aAS$YxqWf4+tagE;?zfQ(?%)Bg$Z#BFH7D0{ zZb0M~eA6*J_AN7a#U@R$M3$ZwHXr|tGVA`fzUN)^+wXTaBT&U?;K9g`F4L(FO63eP zDxO@V2LFHqqW+ZILn~ESMT(ldW$V%&RvQ7WYfTvh7>Hd+AvrK;2-}GnpfZ* zzEX1@4SUn+8f*K`Oz{A14IgAIr_?D@AXa1mhyy`AAM=MH2>#F5YtJgmr{tG@gW_6@ z)irv-SrQOIhpb+sY%62Fy?z7oAaYVgLx;?uXlxX*hP|hMr&3=Su==|j(+ZLc^>hna zt=(_Ul$ykA2X5n3o8rxX-p_CAYI%-MDb6?6XT+!bR0kv$>ULuJy(40W>YhEJp)wiAG_Pxc+R?gA2xj(8UMbA+YF-vVr=_O znTCVOf*2mzhx7$HQ9lA*&94)TSJ{l@Nxu0?M5OZsPiq2DIb9rJorSrCUaTUDoj(3f=ywz7=9l_~ClY;YO0yTQw`fAHXd z@0lG<;BVjV{Wa51TaM`e$dPggsOf;93-X>KHFvYgjQQBGhON}vlMI0Gc}GMY6WYePpvWyvxFtm35 za6J17Wl~#O%(1^>UPI$@5&AytRQDha(I z(8%E`>JQm32`JDrT^Lq@-+|`9TV}^V;MCuykiCt2KW77=grf+!@DMk@iGa>1+{ycX z_Yq?9ZQiz#$MRgJLto7p#UM!cy> zTe&s9i|~H01WihLk{@J(Y%){s;TUS4swmLTBq>w6O>Pmvn&2WaHVf}8v*-S{rNygU z>+xD1DrQiOfB3ho2)9&x#wTv|b#3Ex9d>Mj-#xK-D2AtK9nvta+r?YKynQ`=WMk50ikr06bncKql+%T* z;?_%FIZ{g@D6qMj7j@DkN-#Qb0#GAeuZW0NjSY!PPqA4p$DEdFh=A}9u9deDo(^A3 z)Y>HJ)RBwh6B?RM^`!&PI>A-BFGbWlNlVp^NGJ(s~>O+r>BD8_GKWX zfo#+0VLDFWa~NR4eVA-kBv9OkQ3$;Y-DItmYty>6j`1-u!&VCqXi$<$dHU|t6@~B7 z%Yj8W5ZBdFfE1lPx*2tD&3-<>uehYjNlF@d4%#z7YX-*6q3q2?@Pgoe*9s;0)*Tp7 zwvA?P33w=v4ugy5aGq^!u4!j@Ycx*kKG!)rHon~UC-M*iM*tT{B(+fz0!Z9ZomNr7 zZ%=w#L%(VKX8ZJLaq!e)(b^6oIGNQ1zI*62Go>&5UbG?L>?-7EA957hY6W8nzRphv0A*@a`I1rl^8XgB?ZS&GJ!{Ocw(qi`^;0sICWe`9 zE9+sS&z6bG+|buB5*?q&kX;x)VR8pn3`rryIGCP{4$6;F*{LoX_q( z39mAL42*`wV?}?4`5Ch}NHZQ<|mn#bj{Jb+<&o!$93Z{{qukY&Vf zKQ(V`9blr+?_3et@>uoR28vZSsjE^I2hQojAH6cTsnJ|;tl`T|@+miUWa%K?oh#0- zs!&>38xy}~ZbI9z#zD)TvBD7v`@V4Z9`vv2?;fXxz(WsiF+dasC3>tu@G$j3KtK|o z+qf7Zpt`VrOKXNO@s!V|a*l)UqGFOP#|Q|-$23o||BUVf@}Jw~!44LCkqzS=@wBXFulh(EvbupRNuvu#p0IuxOC?1>rTbUju4yp>3f)v`LbLTZOL+p5f-y?(kJvGTvo zt6Q7vkzN_8=uCI5H;R%`5Nv-@hlrM})Q0meul%H)V7u2#C{_Pd${+GU=tmX8x+}*; z_IF{@aTjT3uQ5iM$cI>Yqf(Qvp4O=K%+9tj{^=o0=6@le3prMSIosIjtR6rw^a6imIM*sH+j5?vQ?`YD4+STna^r*d8n7NwO6i5k8k$%eN=~+Yr(C7>jpX zMF%8&1Zx0A=a&jHk=CP2Mfl3~3+mU0co`-*kT<;N;|=FgTm%px5snoGF?9 z&5FuXl~ChSTIrs7-=`FdWWOoqR6GfuA!ba4s9sHexSTpMUk$APSg7-uzV!HTMB|-7 zitT1jO>{=>V5dDjMP-0NZb#2sKuCni;4-n$$tALY2<3Vik)pfqPH*l{hOBPxPfhJ6 zPsX7tHAdLUDViO4_dU%Etf3c~GP>nGlMYLoBQ%YZAF-HJo5oUJwUimoQ$f)0j;bzF z^sC)ebi$^BeE%rntWwd1_45q7X}pRL@wTAOo+ZSPEgd%9%Vo>xXxM7+q5Xd2iN2pb zDguEF3d#Ga`Wzb5`r#=ZPjGhU6B~HrPio5&!$V$j22K@I7cH9EaVHd7{uef6tUa$A zE1}}*Bh`8c>`Iy-1DSAwdDSJBr(D(Sgdrta=kg^cmE4X3&iSQ-laCqONbe>^s&?D_ z^tUaCxoIv}G{Ra&E0-C{LX!vE@@WrcqrBbEy{_}5iFsa5f4d#k%>V=r+Ai5i(IEYt zPUEe!;n^S0o$^otEz0+z5VOU#C*}QWQY+gpE*N6`yUH2sol6KI2b9KFR4m!~z13pF zaPbAU9@3;0DpNg~b8ikz^!D292hOXk@6gXL#993jGVy1GywKm{nv#MYomfaPgzY~o z%K{Pi{Sl+Gubg05g|~lF6pU@iZ=mnF)Y~u(+9>Q^Y05Iv&(kZ2sDD;P39@z%y3kLn zJ_G?&gjSJ4R})*IpvJ{{oE&0)asQ zH}twg0xw2=tEQPA8~u&tVezJL<{*$(cMr)9M{#iK1%@IVi8L|?E)T2{Z0Q`QDEW7K zk4y1$PJVe~CqMZyV+Ca&s|TiCDM`pJ@bD3c4Mhi(9< z;kJL7hDXY!BBa@~$TJ!jGrF-!OJYQNJXU;cNZ1=IQL*oS4PueMGlAaqx!E+GNHF;x zB6XFrY#=+h6>LFA9eg#)bZAw4x2H}hij&MF*k#C}^=Fg8D5H zis2#USlWxTqfjeO8bu82)nI1%sV@t2(qF{N6w8- zL`R!4iYzRq&rnM@Z>;<}l5W!!X<(<<{(8cM(qyP| z4Ndk_iew$%K2)o57(z5hs8;^{>0|^Cs9<(=0@7wUv(VPGimfWZsC+%_>2zoD@#X5J z8f`?g%Q}J4kkq$!sZx@~&F#LN=HwuWzZ}f#VA1F|b$=5cZ=H_NYzV;eE>Dm(z^_1O z|{T3}v}`7B*0mtCt{a!!3@OF|Ag}NQeq3Z0UbKrRusGA2u`|J;#XFC5aj1o&u@_4gW*(1l%nX z2=uLg(suE6x2&SE19UC$WYhiS{;Te%Xj29GexWn1bqD}R(|`#LV^a{xT1e#oS}=#JtP{G@Id8m z1P1*0AtJmtwcD3&$9(ewgxvm>s!=P}j<5J8{`6>)w`sXu25uxw@wf+r(75&*MQv5b zumPpSLIKaCfW698d5HYYb8m$CZQM>UnFS3jeMA#R2!hBpV03UhBqVD^4Z z>??@=c*-xTH3yS+I&%e|ZJY;bUNp)nR1hls@8!CWFg$E&QZiEBa2O&q>^ql+B1-D zF6SveD&ME_Sjec9LF?v;3HRzqB&4|7z}aC5zlqkP@(8T(6mGGTAN+<7_HCKb%BUP} zt<6^*K?P>&sO1N?3s~}lcf+)=LF!EwM#XurI`hnGkQsLBH_!z+w}0kI#6lg+^6Rh? zVI*q7^T;8N0Fz^ExzX{=%0?fz=cUBa0L_YKf#T05O(ifB%k^XlD-`TlT`JO*dfscL zcC`q!a%yzo?$RKn+UclfE{ZpHr)w8uR(uAnqrxhYZ@`(PyhuX!I?NA@Gx!Uv&k^M< zZE4n>MNnA3_D)k@FVktx*a$x>dzWTExQ6>A!DLPT91}E0!Q5C*4$+I*UL50ilxvfN zEt0=_6gql$<1dge$b1S0CpZ{WI}>LYCsRY)|Ah8NR&X#(Tx^VljD-J*d3hPcENxs& zofyPy3|&k`OpWbLOc`WM?aW;)2w6C|`1#>r{#$*IEZ>AptKi>=VK=^!-7*Y8oflL= zPyof-0!XV5AhA)EvdXHHfz}&tO)b zN|^NYgST*aWdKK-SWUPXsz)7o22;qXktSQcOb?!E?Zk)5*EC22WX(B7RsBKEB(yW7 z?Ec8Mb|ZAtlSn#+Ct)2YDIT_E^>U2%L~O28J7k6$?9ZJu>}Y z>5-j@jf?TWDwT*D3(bN8mz z{yyt8-Q8>_oyFX2rYbB13@P1_Nzqi$&TqX>+75xN`fF0;%8zyoFZrc*A+1z0Dq9{^ zi_6W&EVq*2#yLPDD@rSQ*}3Ax+$5FIenqKZZ{VCP%9iJ}H(S(YaZ^MPK{i*wc;+18 z30UO84M~*Ce2FgedwRt*3UJ*P9b)-BwNa6g%kceXI20w)_06&$(a{+kog3$Jn(mCw z5CdDi75<1J!ot2hU0LIF##Sk5BkI&aM`ip(WxCqDJ*~KiBlVu~l)|g!fm-nJC zOmU9%6{T&*yA=F#hUXtY+N04NTKP@ygO2`wiLV5(@7tznGzr3KhSZ({^t@9X&N6zPc~tCa4x`OXUQo zM+JnxWX?-=C9-25+zam$hZkCudY;`Y7Dq67g{({V517uYbS1Z&8Or5U2t36jsZ`7} z0vd9$DE~bD`jMA-DbT5pyKIPUiLH9)NXoA+soC&qumYugQMtM5U1qG6ynL(lMyd+{ zr!}zIXeRzF@=SKgv&GJ>@4)$QGfTvkmxnadYS>xij;3A@Zv$<5I5ngL5RAA z2Y%eu@=JIf{74tR2sqv_@P$comaW?Y`FNa%-DBUrud?xW_l`J?ucvXTka$E!N&*Q0;Of0 z5KV+D1D^q^2vh>~#LSFY(i4c9oqku7S~XWn?g>6Q7fmYP6(4>VR*7U8;xr&puukfX zhX7^?pf-R}AfCis?X4-@>eZt-CzvrM8DiiR_9)j#S==EBcjGx36K7TIOnbK=P-ben z!vfMN=$0vs3NJ&x{Y>xJcuJ!4E85Ybf@8WDR|-;#;3iv=o-C2u_#457T`%9Pqbz-w zn+?gi8^h?a96!sMhEuq4M&;iU699hsQ<0AU@$$Vn3Xu7T=eEdQBcwGM5P!IGv3;cP`17-*+~vB*^86na%i~O%I$+`(Mldm#Y5R0D(`cxBt-b|E|EL z|7z14D&#L9+;120JeoQDIv{AJpK|t+MFj&G#+BS_=p>-GK8HTCCxCV9)|>eT{Qumj z|Dk>V3%op9x$o284*nrq{5tFNr<1s_|HLC?HvJ zuOeb>ftx)4mo=Vzkv2nq(vr>g-=~*2kLb-(^iocFxAI4cSuwnnYw^j(fQ^}US9Let za*BPXy|$UQHP38IxpLNFsJ<-S$S$-HEW49}`b3_eg(0REW+=MU7oCbC9vyE@sGb9Y*~i}flyRpzjoxTV@9_cVxe^}lw$`PcIok6-V=U)EpRUp!ylU%BrH?{OWU zbjMnu#Q+W~*Q zG%tA$vV0VV%s^4POQPf@8+dwXLdzHwVzhd@zWkMP@P*xMnlZ>vapSn5rNL<-yW_AUJhwHK&EN-xf63}D|D z5991mY=q$WUfDb1=c1%FzRlP-l3%rUPWz`NxtylaC!-W!MF!y#GVp7U^aHq^dZkvo z%VIz1?crpmoX7aNDxIZdbwwtV+w{H&fcDQqfBkhK!O?g$8ef&S!D8rkAn3De%djRZ zzZ4FO*TUdA;9E1D&)dXB_FuSr_Xcu`p$y{!;({FzeD&O=YZp2tI9=q4qbrs(w{P_TJ-(TE%J!78{4d!#I1#?%Bt_^*ERO zFvYqX>-iI;b}`mqxVRb)%Kp!wO)J~C#97x@-*+LOs-Jw_;3tVBm3_Ty+Min1eETz^ zYX&;m=i^T^3|ZlUO@)Jh`(Y)TK1UNSDc;Re>5bCIq#dSm!4sC%dY1BuaTR$BKiwu~ z4XuV5)#4ChyRUA0IB)E=IeJ{xUL{8r8)Dvm{BhdpqZbq54ytcA&3WbO$IEiMi11fU$^AP`}su}tX@_n-qpz|*i*BA+2hEjFQ`XvlQ;RAkMF70r+4hU z=XEmX3lw2r++-|OHFi8BNS03KX}#Kgv>eu89P3Zv1;eo7CM6~)GI)kgkkFNe=OTMU zRK$k`h1`!~{%CeoWpE56sO5}2^XN9bexM%RhP41p!wQ?_w`L?eM~Y3uJJw;W9yVQW z%kl)j;ol3h+j28-Vf_cokf;exp^}Sv>YVl$5l0-xOVex~7C19U^{g3Mjp5rzJde7^ z>)CVc?>xL6B|}F&Hoi5L+*BeT8o7>(SxqWKQo^}3&YH?bt%|~N{;nKF7&k^12=-yN$Dt-ELS*`DC9P#l@5F>k z>@3q(){dOmW*Ew{xS%cy=Jiw-EeLL~jF`Cf&oxao={I8wjTCiKq7<-?+1H2L$zBSWbX4}bP&!R9p8^j=g+$A= zkSBm%!XP+>b`0pmYbAvdCDRk|BIMbm<=Ms)OKD^vHZD1EM1}pcf;KHhDqqrX046K0 z&0h}BnKZHm$>-~Fe2M3AQ1@sNMLxh$PZ~=O(@f(D<>+i<_A)@$RoEP6;&KjPjK{59{G?M9kx&=Zw1*!#eAkSkH&h zJi3GmwbmW1Vy)iPfF_~Mr#Q3&eN;EZ*)5H;AYg-W#5e9B=5diZ&(1XXEn702G+}h{ z%`Z(U=twtE6KX9qDt{)GV@#+*HBb2*p8oV2?jEgQR*gnV%nuzxeOpFYOd5Tom+#_W6lHKT> zDiYPxgOMYEjBnntM>ndbY*IV2qLJk^vBIj6tg+*rs(2^+phX{kV5uA8s#ahhqf%t4 zOwjEhcBPf5FK;Z~LflQ@MTqM^&mZZtn^@X3ad^eJYDWw}<;I$-lS1!4WItDEd4}ZB z%g;57Z40@qV&Y*%b>9M?5+NMM}OQgxh1gwp;iEZ|Y+4LvgJu?=o z%a;vRO7+Csj1<;15QYj;CX_pY9RwM|YyL?MxSYl{Upzt_MT)!Qn;j1ojww6SEFa~O zLt|P=M1{@LtJY;_KAw(RfQj2EbxGF^*>^Qm(=@ToRHu1(C5Th(l~8LggB)4jW7;A5 zep-HSQM}2QM34R}`Q^cNHaKEEYay@5uGw7r^$BcheUqT=4f%=wYAetS?FahF{mOP* zxu@P27eoLo@XIe^>(${K{|R&gs9SXzs1>9ZRL}*27&`K|uORfsVG_a;P*mV#0AZ3Z zm}e_-0b!=w%yXRy)4wUwhHT{@9TPH~RF$GpGje5;M)?p7Sqe?P44_ylt9%HV>^xbc zYKWK&osvl(;3+kwY7~p?DwUN#WJ6{~#iR_tl9Ev`Py=9-RV1gCh$JH?OUbAjMIt9A zrxgu}kWnX5st*(n{gskY7^oIW|7S@~sahZcaFmKxHHt(t-?{bXF^pg8Y?qRw+V2=0!6v z4Ui%8qMjE9{36Sea#1hHMAl6{D-h{Gh9~2uF;p)|M7ES#ORA%uR|Ke#aZ?&95{m+! zrATS!#I0-K1AlK(N%OG(bP;RTGe#^r{FTNO~ar_PV zlJY4Y5=eSg0x%@K>HubwUOSOjppS+>&aOPxg=iKqh=}o-sAx`0xAb&N-y8Je^d7P^OGcu=~h;*-D`6RiQ2SBpOiu4jDRVGztS<{SxCvs;ip%&a^_ABQ!_GdC33jjG2{E;>sT_VBB>G{h|>nr2RIp$s6wdLR{d}($5wm5 zOpF%R%N?Z{;{^*s|JEsr;2td5%k(ni9d-FTo@12cru!Ma&!_4@y!WT_;y=pE`OZG# zEw*O8z?JrxarZUlKJ*y9L-O>Rya)YtE}3l`+vR$qDt&f5M=6O*e|Fjtmmz3)04;e* z-!b+|jp`aKZ%yC1Z|1>!EUDcFo5PW*ZG2!WQBT)1^h%HF8fps9W?=5_d*oeo);ZEF zF<*OQT~wBlXTCD?3XUB)wNx$Cf+Dx}I?nwTNUXqz^XS6Zp zLIT+tSg14Wf_JJr=>oU7ETiriO2g#}zIZH?>bO&kdyHHX#dSkm0-MgxWNXq9Ae}Q? zE>`l8HeTmCGoBccsTuO7_UsJvpa=@uwAUu2y0GxQu$Eo9!Sgs z*%&1BiWt~VI(J8kR>M%tS@w>Ab3v*T%M2q0JcI7 zLq2fQMHCGfoUs$rF(cg5@ctLQ+~E?Uz73ZJ<07GcRTegAdV_EMpdhGEJ-&j!z705K z)WbW|)6=k7z`MivPLGDCako#sldT!^)6qF!Tj(!x3doC@rI=t>)ANC(nWNj`3B~g% zlhd?pPBU!CMn_Z{W$~KSiKW2tfuw<@9rK)DQqSq70UK{q%7T(I02yeRI~hkEq7<66 zb*w?uY1CoVaWaxN6HcDNG@*2%w4inq#8lRLW%GqJFqC#JqY_Or_)!&YgyAI(|KM*l zPN+}V9{s%!tXJq)q*vf?e~?|08=j(G@E+hE2>8BUf>#*NpWpA&d$4=XdyQvUE4|{g zO@1(!p!~r6VfGBKBzwYpphdfoy^vk7U7%ffu3=BrWjiYz{q`cun)-24tvwK3Fkezh zi|@?0$aRo!3UI6XCYaE_4almOjo>#Q?zel zH7vKW>nA$MjGCY5_w4(!`|$d%`fQQ-s#=5Wf$hYyGy5`0X6kmJd_f`Ku zY%a9igFmY1;I7%ORj-$|)!Iqa01>Jws>79PRT~ZG-KsThd$-Bg$Jc1uDV?e9vn~wV zP9-0}wtTXyEA3%zsW#>Andd0y7^@coOHp)B#Jzft6&TpdnHRQCUPneRLN5h$u-i=z zTLWRCJE0~0JE7hfR!s^a9IcldEprR?h?iRJj1xL-eSEWg{*$~fTYsIm8t3~p*IBk$ zTBjW3XKNf+?aCe1&E=idP1QR}&E(zW*O}M(s5%Gu9Hm1$c{dOF@s24>Qrow+C$+~-q}uJ_kNuR*qWT^Zf$T<4}6K#jTcC`^#r zyJ_hs^(I@hbrLXv{ zUSxTcyDD{Cb3&6$90#Xnv6CgqCvvyE;qCWHH{XZsD2iCHeY$$_OV5Nlc?=RpA$5hBpQ`H!l86rphup) zK+}ssC#L1MjAQBaUE|0!w*?T5Nkj*b4U{4+(77r;1&p^pjQE;gh&!V2Z5Y~=!~FDzgZE% zML>{#b0R{7fDrv=L8>#U=zQzy{9c{DlkR2kHk75Ap-z2kZyo2jU0j2lNf? zgWv<{1M36o1BVOr45|5Bd%7^Irxeh-VN2U;+>Vpc~K|;2V$|AbU`I zV0#dIpjyyc;98JcAO=tdVDA8oK#YLXz|#QpK=Xjoz|sJU!0kSizQaD_KKebyJ)}Lu zJ)%9qJzxWhxnGw+*O1qsEufn~1an{>f!=}M0iA)>edv4t!{E|a-AC6q-6z{;wkNg+ zvFEkNy{EN@y=S#Yy(hH?v*)zOyr;B>yl1pWyeDLUQis$A#7STQHUl~X&JJ`D=&JIM zDcIl(w#q;7iJ|Tre5JSV9Yf;>pUa-1@gC|19IZGH_@$l}-1{ zXx%SS-RCZD{dRd>DEvm7DC^tsvRiNy*=uE+zh0?3ugRQJEFEy$+_ z#zdqepI2ahA@Yj6 zF7!Ur>)6Gdw2H=#i@;9(JAyegidi*;m$O|m%Ijy|61!rx)!Ef5_LEH?>TjEgE&X{& zDV-COfE$_{9Nlo97T*T-()T`w=XmaP_#Nl589Q;~x$L2{YGzWyrlf>-!I$0>R5J?G z1+C6ip|t&L`dm!7^=Wrb%dIiKC)BQZ^Op9S@!u0|!TUmP1Dbs?Z1byMxeGR_PrT3U z4#z#q0X|JJEr*gj@;qhhw?BG2fX~fOKmAH`3clY6Mq#`{opp=6u?)wam_6cOJKJG# zi*bu65|c_Wc){iL>L0+LfLr;j^xQ|q#i;O;> z{y>L5c(VCaKZ%{2_2@}LfTs52)D#wNzj^UF7sW8pBQ@UQD;Hq?`dL0DB@L>rM`*IS z=ak$>$-SEJ9k>k7*OL@`%73Kp|6*KRI&p{2{l=CxK9joPEyk8pHkd+GDmh!Dqe8qKB@Vn~e;-=%DbPZj(=;=(1 z@q$QCIhCI|Ure}PIFUxyE8W{8WTV*rAXbNlkb-UUJS{bJ6usPRw$3WhRqA(iD>s?F zzCRH#s@!ar7M4&@&ML^3gnpmfqrI;Q_itEv&;0xAi>GXaTo4r81V8S`7sfetNWmTU zC<8Yy9Q2eo_ooKbi3e>J&Bcp@l%0)Aq3m`&Ni!FlxPc^k46O*R6+kx6_oT`!qW1lK0^(XOo2kG^yRqirf7yv-zJVVO7YpOSHj-z&CMH3Bm{XMu?Dpq*UU49xh8&O zd0Cay#7!&&b9+7K`jv>8n7LiKf{#~`Zl#d+zD`SgBQM|py zm6YrsLAfX>{X75fJ$iZl00$8$cOrJ?m{lO4nT0Q(MiXRvaN{s(JT%TtooC*0Az`g+hNLURHVYFF9 zu;ya}|KJGdE21o2=XG$&V~>a2rULTkRSTwoIM(N4i$rkm8ak0COJ@PQ_+{iAu~Nt9 zT&KH)`C;!<>Z3I}k}Q#Fk~86`1{0|0$x&Pk(al3gVyF)AOkmY92#oyS?c;Fn5XMQ& z&5J2ABId|ic*8mer)U_^;|$3N_Vq>XV)NnWi(L*l<7`R{d2!vxi}LlWdZ*(df_QSn zX?*SOyt7-rs}5IdiUkaPwQY)Vt}5@9qM({^gklr*=*O!WnyX#EXyaEbGH-bJ=cD2% z(b8#3ybZqjjp3%2D7HeIx3e76!N$XZMyXRPVrLb#@WI5Z;8FZ{gFogGPY!P3z{U6a zvg*w~){(!?Bfdt~tXo|3`sIEoxxWgbb{NEwKAUkxOZ`!^<1BpZC&{B^qzYZ)_oE2x zy%bzZY0t^L*h6LeP9ENYq2tE~P5qoX#Hy(1+BHq=Dy5_3?a{F|FAhe{(=$Y0Y+W@C zyN`ZcDuiW!ZP}HEWYs{=_uIO~?uq=!5x=Ajg~Vpy(s8-U#%n291X-U)7A()|b9#NE z)mDw?=qg%3!!5|>BjwNgj?iQB7#dFpRGmkW$LgspFp!h-YU~=y#YV}Utqd!f!{O^(W+PHBD*0*L3 z{EZ%mmp#v(iy4226c95xd?1$E!JLo>{__Y*H)KUKwEr%eq_5Fhbgy6pX<5cd<}4jN zgRd-xpT~^&@QZG3uO&D!0da*c>n|nGTTmr z4gto<-A8Vj>~d5PB047?f&%`MiM+opT`ogE@(m)m)J7{*_NpZgM>PVhldzlAkvJ2eDARdYoXi_$D@!?J)tAJz(nMW8Lh(m1p!C(%03lB)F8aBf*_|5d*ctt+&Cct zPD4F5GsOWq2{4{;X#&b%k%oP^iadiEzVJ*2W`QCQbMwkUurlnUnAzz4wqR9OEZk4%(j#tKCy0kddB8Mown5E*a33DGXRU{-<9i~Fr(i$|t`v|~re^VopF z4ovf@H2l1*&Q-9c@*hDQqEV1>MV7%cM#)sA8Y5Pxq|5IxasQ^FX8xHzS4pZ(I%CGQ zvOLaD7t9iJIw3k>Ai@yc7OKY`tX}hQ*nX1tA>Qu&$!x@HWy6qv;4OwgdX(0xlr*;5 ztTQdS>lg5~H9F4>9SnA#e&%6a66>d{+Yh^~@b+uF=woqzY2SXW4n$M{n#9&EDcFPR z1SRHpx@wL4$DZiU!4t$58(XIa&gSq)A>HbIU1_yLCY=%svU}a< z7i+WSp}TrbFN6`dX~ZNOOv#NdK)fgX@DhT}B?rnU^8X4+1cVhZV^%a?1?duV1)6O~ z+S=N0?@IA*YpEnATM$|XXfTBXs(=fuNjm;zENOD0@GOv|pGe~f>0nxAbHs2|q(7p( zZNHciwTBY_a1aYFD6U*KZ+?PEqb3%u9I$Kin$amv9RP`37RVc%XKni2{V5xHDZdrg z3g@udwRC{lN>H`oa}SwkBn2mC$$LOZjuX(sIlx_>c}P4u%+cypqidsUfCth)t(UuW z9MU0%2-Za6d1s}}<$io)NA1)^>cj$U7g}0K{H~`E0rdf;Adkki2~gjD>im{3kT;lPR9q(!HWSQg$yHi-5txn0x9=~5jl z!DvBc_u$$I6=VQjOX($RlWjbOch}+BHNBl+f$E5u4OY8-&Qh<*BH_?S7l-`g&ndl!x^tEf9wL>J4s0;+x&iF*v;8u!eSEV5XMGFl`EPH`c!lS=BRglTP_fzv<~T2%hD~PWeV4dXmAN ztwWt{MV)O!o$W)P7ds?Ks^C98D7P0BNIvVz!DQCof1$H*XqN5U3L%##`>*2gcP}iifPOUOV!{J;fzfdnKye% zkTKLltI_PB#)b@%os<<%6U7bwvu6FEebQZRx}h6KW0cIV-(+fXS0!C*Bs#XPsAbD5kaV!&ND=8y>3JxvnO$HS=vURyDqR zA>w%Z`{{-wjPdaOyxh0jhyi=lG!a)lRC&xf50~HsD$&@9=IBOabgkCat&L>;Ladky zAZHH3nx$ao9UDf;2RlVI)TaQ`E}e(k{f$APlKt=@LL#ztD2EE0%Vnp|KYf1+4H|dK zRI^XT8`KQafiz>`sZX<$G4JG&6YkB)TDLsWdcOiMG1-w_k>NT@dwBi&11~E zaGuJzA1a2xs=iA4Rg^0l&(=18=YkX;G2vDcIz|gLoR2EsQPshq@7vT5#7jImKa~O~hN8}}N~ zB9hROt}y0Fwz;{=&WUuyeXGux7ogw!4NEBd1U^JvyKHL&vMeZnac*)^@jPO(%H}O5 zc9T}dvDO`wf&q>)R;MG_%Cn=q+)(k)p2GTee+ zy}z;g;!>Bx);Z^Yg}j3!QdbXxA|Xm60g+IPTC)}b+n?13--_FW6k(`*G&pfmKO z{tYcr2Dgh3jXB5EGZhi9xD9~9VFT8A>=*MT(IXhW5ccpzw8yK&gB@u$F@=U6%3_6j z*+WQ)NlIlgZhPEP3J^l7z&#&VqQSMZEQfkR`+opfK&QVgtCeO{2Pvbg#^a5;^i=U3 z;(b!@sPXyhTn4iE1da1ypEo2h1Rlh3GpROLd0c+8n#5}e+)dI#(CrBtaaCB)!|227 z$^UgeR%%nwUPHY&g7moZBuQNt86it7~uApc=`-*8dJ() z{I%SGk=zJ+aFO?lPchecNR79mj=HI z-o8fpluFthiqGNXQv``=F)v{~4ohW=L?@W9))Dz#Vnqb`KszQ$vDF#-<81GAE|IfUa@dRe$AEaUo+iZ{2%5}$c$HSx_zQf2(<*G z!>PcZE%p5!-&{y_S6kYht>b+^{7rLT(v2sZCi)WrquWnD=647Brn@73ZS{J#ws$T5 zaIme~T724B-%%Wj_B4pa$E?vVh!Z<6zD*va;#f0wJ(R*R5sNGmT^S8$Jr{GQ-Gn=^ zh?@W~&k^tW8sm*bW8EU&e3aP^i&9es^ajG{X;_dt;kKTW+&t=_k@&er;ytO6f#)BS zK!;0eQOh#AtJR{&gRG-zu6R*{#Uo(;dcqy&%JQhr(XQRpwZ=b1pOul&w4|i)~ydtZFz7e=?QvhkbD5UhgNVN zSOKg-b;&CP0+E7cCc>OitHrbW8RuEm)>7q2F7$JH@vNkuQ8~{_s;$c_MZqgqipcXT z?|J_3&LK7O_x$t0zVBAA`HXbm;kiTC))C%&Up&+;Ypnn7r*9v-cWc}Ew;E^v9_j|( zpB~&_9k#YqDFQcUR{I2>fT1HDD(Mxs-GB>%>gRD{rJN{Eb#hv#g-3$%9 zkghPwk5SXnQ;e3r@H(6WVPdol1^#D>*Wp_kEv!(3_CxqTf=EX70ckVxd>EtCWU?FB z;){&YA@Fu1Q~Ww(w4<3_e4lt5%q)nlMl)0KaAqc#R_nxEPSshSw^8SOGq!EZ3pJ%_ z8FnLr^!e#{h(cMU#xy9(-3q9ok@zd8utK4*MLF;@eBQf<)wMrIO%M zIE9%I&B!6AU&eSwr3{IE}2k%H6T!for!sxV718!975IP^FC9Tbwq#8IwcM=I?!O{~bR) zJYw;B1+Su85Y)q13|ohE8|2`_bheNd{$vAYtDOx&fKXo+LQpc8kTra~c9sjtIC6Q| zSe{o`D!5#WC^yMpuoj?QpP4CoQ2q?`45)@^_nBGzcZ}I5+Wi*RR+h$d#kNwL{I{~5 zPviekYAZ9uhrkRiSV(5j7?_#HIE&^2bPFed{<~^PXQ*^9YIq0WUdnes{yQzVX!u*N zJh;*Z)u)Lt_9TEMnh5g>E)cJP>x)>Ee0>2*meI`uv(X==ww5Y5@IG|26g+xNqP8v- zZ|E!3ZirWmhN9OTDobYU+o|6PIYfOjxoXQ_{fi}=Ni zI-rYb>gxkd8u%ZN1)68-tyCT1!zF z!w1z;tQOTQBlv7~pNS!gpQnP=7MDp)79S!Qlh1DVnwYRCdF%Y58n2=71ZVfxxaRDk zB~9!*e>caGw3;Rloc~<;=#K(ESVXzdL_F`Qb!dHolFh#jG}4ack{(GCpN1Ku^59XA{^ z%mo*5!%@}tWkK|)!48hjONKcW{2F9k+m}buiUn>INGoUo)z^K;5BBdrd0R)%oj=%r z^TL75G5_E#YuDa76!Z;&&;5Bn;koO7-nXjnGe16b`%8yc^&NWdi)-(fIx@TNA6@_8 z&i0<&55U+DG`0g+up2PIRWkphpCZf{4{p8#sEm)it5R`Qf3eJP+bWAIr3j>o0MJ%( zma4x<=#&pDr@(>$Q&|NdcQj3ZarQ_F2*M(+3-wo!j`^rW;@rUud z*2jwXRjPz$xa6wY!Q2*Jr7G_9whm#+d>#SwNrJdEBcpj^@a)t6oncLcIR%->n zB_CleRW7IBq9G>?p?G)G*3$ez@O51G>Gg4UbH348U*$Jmt6~0Ri4SES_(IpJgk4~O z8LJh6B0UvED{K`d-bbYdXi0kxZRfmtD7UUA7L74Bj+S zXL7oXTBF5icAM4ipxav09syJMx(X4XRRuQUzkAXK$*6jW}_VYWchm$olseVxU}#9 z%MHX0l*ShJ25Lo)Dt?Kk3?ZL4Xa+fQf&j5lZ4S8IejUYlMu=f6sB z(CA=Z?X-e>Z@u_F&47FFz-~pgTy;#tVIA=}msrH#&S*L~o2aV_2D!i@Va@O&*E(A} z8xO*w$rAO0HIvee>@iq15RG!%lrkx_;L?6qDz=bUD7FYmK`FK}J_YL&OTqfmIV&Mw7576Atjg z^zg?fL=Kf(c#QLf!(9Drm=nCSLYXWh6G+A!A}|1C1F4wIql^fBQ28r3kaM zW2r)1w$`$gaPm_I(p2O1hOHXaAGF#(P+D`);jK22dc3aqdroDl4!Q#tjp`Kxcbd`& zfG36xw6^&BE{CX7k$@uLpS0PE2U%E!(TO7dYy2muHUns7@ll5Z-w5k7wBBJZrhsf< z4Qel{L1kr>wt%;1HE;m^=$do_4o}|n^PcO^20T7hB26RH3o~tE?j5~Ku)pYVu4JPKSrG5FUY@r zh8}NEtcTR9ZA~194>!TGe-kYBHyPnFK+_`rpBX(Ci5M`P!{B-j)~2i?v?*(dZAxJo zexhZEwnc)?n0ec?Sd+1dXn&>&$C~h_riSj?MI5;P%KE9u?O6$q7eE7AB`knjDKKV(`%S4~kn4O^ zg5!#@irp?~Z^D$!z-j0v&CT@E=BZ?}!bz1Q6bcHA9I>r1DJRoLm($_Zx8FCK-8)*} zb<;PtAFwvAN_B2t*~qb+nqr(i6LU?QKYLC1vA^q^>G58>x_eir$Z<5yaqH3pRReR~ z`Q3w615K-&ogiAXM!Ui8b_Cr*-J094Ic2L)*9?sJ^Z^|`1a$N>)gG)C>%>l=+ATm+ zjlV@H3EY$BbH%yQ*McVAjKOum#e#r79wB z#BzD)mC?JD12DYRifH7|lGBH$qALdmBCN@2F}qAO$n-$kYGSL0a=Gd)pP#IL($X}M z@pWbTBYg*YyVkba@z-yD>h1wP+*We~E7yKl)rwL&bKws)D}u&V2furB|6MbkrrMrF z@uBg_j_KPVyI2Qg>?2>mnz82*_i>?BEP3b0ZPzw{{WQ zj4r0f_4e1j8I3OI4K3otyf8@qu@SD&sC7A5Xr+&;havln7SLOH0haC!EVM*qdMBwO`|*D?^woLM={qTe)f1IvdR1u zeimZ^W7lRJo1%qX*>u}T8xhZE@qiWa0@Swow*jllB8Ji(D@^qKS<5*U9Sx&SAY zuRz+7P{B2j9m9)6(@A-~5TM7DoX5%$r!r+n@@19|gu@ESr>-o1%Q7WdZQ-z@F498X zlKtY)y4!{VY7@wxyk^E0&o*}5*3Y7XhrnushR(?q&QLwlm98o^no@MNCS8kk1vT3~ zG>J5uk-o6XQYYBVjdKrgujy|I=*X6#m7Q~++kD|wRtr5u%Mym+jeViDYc71gbcFif z1mSJT)ue~xdfvfDyq=I()^QN&*kZJsID3@~S;O7`bW4h1T)lNYH(t}IVzhdl;#sPv zRlBg;<(=a5*uL!}Bs79t>j>PJc{Za>b|y!_Yo9s14myo>UwUk0jEK)>54xq9FFn~m*8KCm3yrnw{@_FNPAgw>eBU5k~ z#=K($He;LtMu5gYF!{O!u5cr|giZq2=oAYQxJo^|48XSfVYas*flHwZ2C##~a%+IL zP}UrX)v{)3m@``|G)*a`EhVm(W6Vc352Dp-ZdiZU`1mKsqOZXi!y8{~9k7L57FNa5 zB%^ml63*;&#%T1D=|>JVl<&Q z*(=RNh5~aXXaF-=K|}6w9l+d2R6DUSmco93xQ930j%%GM$T(7PS62$|Hb}u(m4cxw z1)&1NVlpDf6d;Z%Kpaz8NK66Z7z}TKv<_%f5htaug@H*N1oVYEt{+zAVJ?e;Tl$h6 z!*X=XXi6PL*mGVI2lcS;yo5T#bQcA;WmU^c1_7LTDVZ$80McwL`3S?i%<##I>_h9P z@13kpY`JgK$iWO__QD9Ke!TaSeQ6NSKz!@=cV-46_EI?8H#{+X@aUGCp1M2R-%Dsq zJC!c$YnP{*$ER5c6(g_AmYZryC z{fql=`rfDd`}Tfmt#H?ohWuHGzAHq z84njE;!Om*jON;t&O3g|T>QoK;Oe{R^yCtuh&2fCUf{1fVO%$<7XynbOugvLKLV19H9+J zY8JZ%TR;(_w$>7gL3WVRY}gHd5qs2PNzK8?pvC&SQkp%xH&CkUA?q}69-J8P6b7>m zZH2yEQ$Cb4=5MMbuSifO4tpwzctP?^LSBx5OLHgX(Ke&u91_iS&cWElqzsN!w%+d^Ivmw*i2FWVpYYH9_Fu8A?s7lkrAd*XjoTdvYlF-ldAE zojf$OtS&F7P^lEfTJbxB1YW2OiBmQn+_u_w<3nq1-3(+w<4v`txb6CQ8NiP)%*a zD)4bK4pOV}f<222K|y-iA`9yMQ zC@i!M$KmBNS_xjzK38L*UUWGNucN+R3q&qf#lCSu6;G{E+(!1hev2cSUl zhVPebuP9w=g`wQ_*}5K!2H=B6$69>Vj=>K6^d*)-1U657dP24%^<{RnrEHmMxm2U3 zo|7%Jd9-!A4QdL2j^J|xgqqtuso*sA46w${*db(#*=%4u1o5|lvqJOp zUK20{Ojs(}z!uhJ$8#fv^gz%WE2MHYc~_o8V62!FMC4P+Q_!T6XxUG(BiA~%Ai=)r zk%9z!r(y-E++SR>k8~-1l@#?iY1zMYn0f{Z-U|fZ`GO1;FMLdO9jO9GqrDcuDM^*l z=VGA%|7yW+UOJ}efTlqN*oi%YbdXH8!e#EW^XojGUI+rmcGkCoj}yb$-dO>b(CCb1 z+u){wNMStN+FKaTb>{1H_B`Tr6_}q&K`)n=yr4?tCs^je5Zllg4har29w|s*7?4Ux z6$6RsN|;You7+J9%loV0UP&NnTGoP@v6gc@q?ErF;?;aoT8lHp^eENauCWPW$T-P*s?T*5ll-s-irrk$!hYNQO z%uMbs?9c7UPvwVmHb*L7m5&3}oOBHG1KEPA1m3XRdo}339JoD4a(5msNO17|y9?6t z;T>>%fY$H|s%$}0mC;(cNCj433e6v$?*D(oXg^WB0wia@IC$-WAwO%93t?iz%6Qjp zeZU~#!z73j)VKakuzZ9PuLQ#%p-=$uSfl=1@ffm$_sJ8$rUYz_y!Yy8KlT)H1E#^e z00-8L`k*ygTHl_(F{oN{;k+F=|LK>Eg_4b&N&Nbq(`SAPEy6mQ>liLb z;QW@lf@H{)dO$Bf?RY6#()cC&63CCBxMg)U3*4VYo={sO9(Tm1(bytxPsFM=eSj9S z+xx6_A*Y6>2%rkyQRV7yBN&IBdeIezBZOV9D!W>3uR7QGLGqNNC#%+IjUw-JF)Rb{ z*(u5rRlS2GdH_3y{B-r|`dgqJ=j*2GBtWZnozT>QSIP^a_P~aE6!&^NQ@7*{Dpl`I zh3V{u+}gs*fqGx6(3?x=YGo&D`VrH+rR(yO_P?y?|y1&t$ICLaUwbTzgO>D%iK4rrimSKEV7 zx8_tbHZ&j<(%GO)Wb8RrNug!mIbDWR%g{ht0L&#DED;xUzE}>B%VOhS8w$zCO2Q>r zMjLOATR%MJHAgCAp8anOZL;K4{{nnvpZq*mMm;_rk9oaXB)5g@VzG{$h>z^s6o-Oz z4`e!S1cIE)Zpy7M15$C0`-YvX3l@RVE`qxV+*Wz8eSTfPmpB`Sk?}jx*3v zkSazJ)0MvQu>}5cU1KTSss5$p0wl|DvF5nt;{+D(^0=ZXthyqY*(+nm$LI1_aE4`3 zOJ*p4roN483-_Z~k{N{bgPRQov8LHsZeVQ^gDk~rRRusIUV{(gm<+GCy8}kOnywn!yNb}uTA^PD zUUnFJ5#du;mtz&Ic`ZaXY|u%17?~U+j$02x7pv9n)H%Q_atGEJjog}(a(nkS&K1^V zSLJer)`5dgeWcKs^XJX^&j1A+WlD>diKWn#mKkHkYQW``F{F&VX09N?v0HNm360(e zvIBX1243Kp#FSSy6HB40v4D5rZ)>kU@gMD@EipDazaq2UZ zHF~{r7<52~r>|~9@QI%0;{D|3$mg*xY!$W%w<3hLnCi1|eIUzn;LT?g@O(CzUcC4o zEU2cHl>zYi+Y|7sG&2I48J)p|=SQ5BAxMRt;jLqax4fG{}N zF$wS>6DHw3U~DJeNXQW0m?ePn5ndJ;l6>$DkU%~?$iO2&NbB8ut9p?vIbkfTTUA|s zmvhcN_uO;O`LB@((656jyrjDyvu(4;RqKLMh;j}UcvGIu!vERo3zEbnE>|4+kFw}= zZh51O3SUY6ri_a3Uj5T@_lS1oPs^|hBW<8x4`h(PQ*%Z_0Z~<5h!RN_nmVe>$W*P` zQC(=SseAMD@$&X?Uw50wGuE_fXCgkadi~9p zG;`)~<*n`4YTd)B%dZJ_mEN7~?I+Ei=5)$AuqM#bhxe19e0+}jKCuc~J|D$i_d;+J z(k6t}N(`di2}>$KjCT(g`ZfjW=m0v2pm}5zzMYPax!+`Tw&|WirL7I*e{mk`2)t`( zY4~O{`s%52^GFnZxyY5HV~qRFGGj1B_xr&T>htu%P|%JA=^Lc0HpDO7C+aFcTyFh` zEu9WCMba!<;0=FZZSktrT6$z?C|TV)N)N3YN>@F@7i=EI_dd9@Nw7IBlI#$X|IY5Q zdspoq+nw%-N$c-Ce)Y_gcMd7>73sa49+1gZ-p1QW{p;@7wbGVe(~9QO2ccE@MP`~P z5TD0dyT2V|>M$(ERz?Fpi-Fey=UZ1$K%032+FugTPLqH)nFRD1PZSZ1MlgfjI%k_l zVuLkD4je%sZ-8j%fhTxp?um@28T)Uza_w>57`45Jn`ZGsp8Dc~mGmuZ^*PaQsz#a& zeQNlMi@Q-8!%M235t6uj{;yy&lP8=+n0QRb6Nzq&?vGNc5h0}-bi&tm{En57YG^Fg zAguZnIZgNp=W?(f2HBmElxO%5LI6XDpPcf@!F7iU56!X=86%(VIoTHR@1znRHs1Kud2X-2;DhxdDCDHySc&z@Ja? zMgyyvG)6;~&ZN1Z>V_Jjuu>EhTcTPMlu#$JxrX7gA7i*IjoDJA9&rx0Xt3I%I%dn@ zf3I0}q0n2!{0M5LO(2M8ET0X5`C!DW1kP7Z;$-EiaWHrxx{;ox-++Sj+uEoK237cH zFj&y>PcRY+20_7t3$6FgZbBLSoBA#UnbM2B0F3G>@Mk!@Wxzf@IugUN5CbE9gU#K; z&11DesfXLtP_}vuTm@)H8F~0}Xn>>zToRRJ4Loo7Uud4%EgsN{dkgvT5v}hPy9c+afn_9vzBp zyJ)2I`1x5wg<_q^H;-gIvEeGakPTAPp;g2XgG0WPx*8SA4d2m2 ziQkhS3(3E{BCQen*!uTSqzi}%iMSK#Q52Q;8l)o1K6)0@(chm2DH}*DAYlOs5hTnY zVFDSPC?7NeLV^ZSgGR(c(1=(F8bl2uQy`e<0q;PukPbvr2a1Vsph!svqO0Rcl1I1} zXC-3&6lA9dol_)@M9%@z*jq-CdInjmA$vN60RE?^KY>$aX#}M|k+r?!_mloaz1BPP zwd?o)%f8N@nXk^k_pX2S_3haRC;qUnf6vC!p0yhI)xNKPWOVhdr>=+ZBk=u};XAJG zY2R_j`jI=X>S>?615xk6$`R^CNIeucda@cf8t&u`S>p{^m0x_qjrK2~x zc%;S$dV2zva4aCw6riqFg9^uSW=C$U>)jI#uB1jqxMFU&v;pCK%m5%mpSSZe?? z2E5D!w3Qm+Wg6_JV!SK&YFXjmEE9^XXbNSe42YSZVSJepMt!3y#x-ym6rJ?n!IvSQ zYn!}dL(A56EhC(-`WP1G2c&yZ$cEW`_W#U;b(m9;Z6w}UB zas1LP#U!w&zeEs|Mz388b7*~{!F?kE1BXMT2rSBTxjc1ZI9Vha5HR4i>jbT{(S^hzFS@4)T> zyCm5McGK%`C)OV-h2{u=sEq-fGFm$KV7KWD=WY6$*`^0+$giOkO+5{!{ZQ#(Xs)bRG(HMBGcJ#7qWo|Mr@ z!|Ww#lNd^2zgzH-WJCBkH+yoc)2THr-e|x~p|rn9$4DcSFD@d@n?Bg*$=Xy`%l11j z8NcO{%&!sJPSw?CxN;O#5d^8~%xR40Y#mG&NBmlF3F+v)xNP-awJyQ3?owj){>=-5 zNbBw#loR(maxeWgqL;WA>(4ZyM4Jtr&}`_6Wx;O&FpcPUn45@Q#1TAyj}HtK zcD13-dp7tJ1)>mvKbCD9c9kY4*|x-nxoxE`zl^>C3TJ+GA3l?5k-zlNSX77 zA9U7;QNa#S$tYsmhPm>#l3t3XtQW(ob7gj@Iy)oBCn^c)w1qB9!+EjHF2uzHp{qou zdZVSi!;v*8nh-mzvn|*k`K<1x{(g13s$?6;x;zi4v-My@?0~ch%Y-I2x~s#=vP?pV z4Uj9;@0dK90AD5u!bHZ&|AFA@@%>kzL=WiuZja{&WVEu(27_4%X-Sdl$PSd`?A(fu zk^_Yy$JcXu81gLCeISoFPm#k3bHoB4?a0oRR}?!+aR-h(#?Rx(W4vdLI8bOrMtuKN z{&G;QrV&)~?~{@~@86F7{q!=V_<8y#aO!x}po*<o4v zlvb?JN-ZU_w9T5G>nPdKK#i~8-Y~$CET5my9sB3eCQL2L-yoewFNP?|B~qYD9owvk zkdTZvgN&x_3x+=uX&J>4FXhQsFNh>wilTKm5@jhpK|gO+^gWgSU}d0z;3Q>|f~loR z4!&9S;^SC#$b^uTXS*y%1t#3>_p}zSf@f_yV z)d$ufPEFpPL~hj82Up8p#Ii)P8G9SK!UbgbUY5Z6r}+IMK*b^P2tL${X>l-4A@ zlXKUhoE`e{ZE(c^=+i6L*CLm7{uOlp9^@R-Nc#NVusKuZq!$-_4?kC~6@A_0T=}}v zwjs>QZR=N}^3Rlw3f#Je(l=-N4OpG`6Z^dM(0Shjs)7BreH_zHn&DWJ6Yr>mq+0Z} zEv)a+G&OM3q=b`8NQYStzo%?~7tr2Ld>~?Z@A&Iw2JQPC%Zvm(7fZ?wZ1gg zS86R~vs!mcH`%?B@Xf_bG*SUh!&EdBK)*iMBh86r2n!F!sosT)3YP0-5#4e(R0rMh zjYQlxSB{sQG}Z#nswG(00k!?vzn}Z>Lm`9RjVx9|%V^v>i^(+s7v76U7)SXAv8gK+ zD!0sj>&2M3yhd-_u!Vd)f--feZ`8b5q5gItB+;rYiB@Gv1nru{e#fMY9gN^FXY}=L z$gt!>h9w8z|BP3}=#MAx5f;W0Ib^K+LO5Q~-aMQX7|(F*9AN6{Iy$J1u_Sw;!QtpD za>f^|F&vg%PtvL3>@3RBS%m%2*;UWh`JBlXaJm8tyZ!;}wyJK0y7Hx#zC-Iwju6^8 z=W5=p8!z3k^4fc^CL`5l@w z2$t~>2<@_zX?nU;$-wIsI<$A^1A)dZT*XHCenA=}l`wM{oRk?Y`H z*BG`RUqm#YIN#qt3u&%%Ca?P}2#jf3^Gaft6axdpk{pszRkWzrQ^XZ*Y629l#0D!# zG<7zSBotLzuLg4LMU?88-GOR?_UGh;9-hb6SXV`y(BV0j;9|p{%w057d7HAhQo&Hh zBT`S3b z4k6NPIi%B8aPx0uVIhx<`B#gw~ey za8ytN!%EFP-_x(yfS&S3AXcG)qmLcPDWUQ41t1@eD#*u@5FZTyA8X9^WGEPQ3iMB2 zrUhrjABZb}1MbS(W?)ZhfvAJ0XMaTVO2`+8+eohRmnN%SWGIA&vAgnF1jNILb}Kjw z9<$mlG{y3!%1N-1McDI#L#j+7y;eCw-2$l>(~)JrPWT`tI*?BCfwT{}@m7}`B&?lQ zGRXlia?|yCfu|dh)dNDq9^O98kJ1~7QDch zdT1L@ZFjn@3}u!q@8-yt>;i-4M5HM$CkXOo>MW5Zb{Z%ku%{|QGf!Yl^E`Z}omIF6u|SgWdNR=3M5zx#km z_7Nnpmi#tZWPC(3(Mx;++iWIh?>TVUDFVgJodcH_15*5`rum$QwH#>46?0@R$NP?^ zrn^4PAE0K8tXh=YIj3MewZgdHV4fXn{~ymRcj}D zydN1)tw`7kO-=nV@;^mUu(ZU}YEyqEx2{=@`Z9it&8bBFc1O@1=v<%wu&8QkG8s!k zT9qNKKF`{T1kpuo$F$-@El+{1XuAr0tf&z75N~Zd5t;Vvwa&DiWDZm}M|ye;m^n-o z^i-tjL|K21X**elPgb`=dKM)ZF1B5Cc{dq5bWVLEg#0|p@b(vS&2C>rR;>&xdmRq1 z%oHwfD_+s<{iG#Sh{Xrc{S=@DOymF3FT)H&KP{DJlh=4Z`!jq zzP3Y4cRrnK4z;gAXgILvZpK5j5aR}-aQqzXvBW!%1tOo3rl|*;QlB#&Xgt*l!v`1D zfb&>6AVoe?mZqDi2g^;S)aS~k0}FtIF@|!DqYaL6R%a{jG3h30Q{;*jANg2z^twJr zHkEJ*EQQpQiBI*1hsH)nvI7ahWP&T~c8krzyTcFMw{d1P#tMqUTWwasVdLqrYv)xv zuM9*v1x3o2V6Sdw6{yELbd;aRIZww^U>jV@G=qDKvJ%?s;i%LJb-M6bu|d{*biA=D zH_=0C>O@(c77L#(i;V)^(_hc65#gcVg{^H;50ePxeV8`#m`I-W8~n4|1H zsi9O1t}ex3ArTwSk-yYAIFN76Z~VZ@!I_QOL;~a(Gflxck*RFS<+S!S(b!-|INO2t z2!~*w_rdWVCvwCsI7T)HS4AH^=2;vCpobP+!L5v!HU9oA4PwmW^lHq9M%AE=@c zPuGr1tOdgsLst8dGW-smgTtH#J_>$MOW$ACrrq4pvirb-Q>Pj+eQSB!s&j!5)U&q9 zeO`Ozur0m1HPK%P^Ss#_$+mWBM~@~)KDc%eE`&ZxuUi{!kJ(9@@OqM~GO8encCX)K z6*=bqql42MGO5AIPGxY^m1++nogO290G?$1L?>|>_CwojgjGEUwiT63B4U1^Wnbi& zdMvZwKV#jG{TFk32=H5l^Lps0qs9C{xn*A_a;%(D;jjKAc0@F8_tt!)>QWK$o%N_t z@FYpI6q|LS)!sqTY7uT^tv-hnGJAsn_0tAd>qx=X>f{)b`9D@0Pg+D@CeZEn2i%o@ z$a)&_AAHLl@Vh&^#yh-b&TMfI6p;aMle^)VT}g})R}#P0cXT#`wM2@rfpIvctpi)m zwr1f^+_&l+*z&lESdWyMkGKR}R!qg|BR%_5n~xNoo1CPxbXdxns7`3sh@w_J+&Qgn z1zQgnHJ~AA8?#W-ZXo)z6LZtLjKIbD+)ER4Jq9?-3$Gwp7Mvy`{{mxs6F6#(&+jvVt`*d!s-5(v^KU%tGAlQ`jYf-Q2iS%u4 z^XHvs1mWr4E??Sb>23FCeU@BD{-aU%=-O;=RHA?8QSDiGu2k?^MBXLa+$2jn6Wx*2 zn)ZO2=+IIFK}+5nUExxDviVY*k7e9nXz5Y{Nk>b&9Eer+1cM~)OR7;#a%&j3n*0HI zE1Z!Gy?iodLmc!I0u*P7@GB{o^<;KBvez}k%v3|{dKw{5;S*Ww$#U(PYFu68ECgqt z#Kq(4$XlT?;jqIAr>i?!N;CTG!&YaZ+!Zaon+q!W%bK)ymxl7qm7~?`#`!0ciuG;j zftu+q#+Ut2!!?%mMpSYA{XUz2VT z{#)Ia>&p8R9wD}ByxSNThak0@3>@H?QXdyq!f!p1oQ}D)sVaf=s3|lmFlnqw>xpv1 z6Adv^7okSI_h>{MLJYOTu6A}uhf-A{c%orXVQ1Tl@zw>SVHkDk$OT=(oq;1^9h|ct zhP|=FS=%_%zya<>n9R^h;dsvj(scAQy0zJGrh(*n02*CP^fOiS^1VZmI?0FA8?P^I zoEnZK#|~~7**6@&Pl~U~Wmcse=zYVb)Z1&O#+#F4dxzHS-`tcQ-8-Bb>InMVhng~j z?SVk{yH-naF%gK14IVDfwW{P)0%2bB%_ws$0qR zM;mQr^mJp77KaVhcyXbv+&{U>+muPUsshMZRXO5qyJ}_KRxUNC22-&%WGfF%Z_IM` zfTQv*gYb%3I1kUE;R#vP+LGV+L2N5$+n;XE>9#VIJ@7nUQ4L}Yw1}6GkYOcy`SZ!? zh~x}TIcMq-KX0Aa0n02&{(QOq`TL0T(FIOFR0TW_&11$aNKQq9^O4#(w%nOS)NT!{ zCTP$7SaDlT49f`aRG{+Zg%URuO1aFmnYAKZ)79j+!5r*Gzm8|~b)pAca$0NBnncez zaA`3>h?&DbZ~c8M+1h!(r-zA8^M{{Nex#6!dOvfZ5nXv=A-r-i-kLdF){h;Dcivyd z-&Od-IK&3(3x!edH+ZZA{a}#giKQcII^lwlUbrW1tQ#Oj=3@PqcW5heA(5pR6U_%w zo$=OgB4d#)X>TyTm`6Bf z-=BTZq>s{BoE~`ot#7}GS4qy$etYs**<&x-g=1y=epB{AW08bI6{@+)*g#%vKQ6E_ z(Q*mh1IJlj4ThzimuwIOL0r!otA~%nE8*i>#+zVimLVxwbqi*e-aZKu$bS3~V`gX? zzI+HdT}6(x*z4qta8ZDeXjUk<^dP45*28s9)m& zQ9IGFJnFAn8SHXydUb)9p1jo)wYe-z%kH)nn_E>DS{x3K%y#vJOX;eztu2QNSb2gm zRv)r#<$FWJ`DReAUc)|x$R9%c*g+%>jAmOHnhy7zvWLxKIJaI=6iiw;Z1GN6W{4?$ zk^r8(*NxUaHH_xk=lT?ZH|T?;gRS{R;o5GYTa*kre3F@>pQLz4#P5$fdFm;K;bgx< z_1jqLev-O}G|N850WA$tvQ(^Ql*wln%;3L7MKnX6q2BDU+rh&oGfTmKG?71~{vGyX z4RI5Wg4e>WYCi91hMg%2VTWyn!)#i$%F&N}If}{AIh9}KpnRz*-%Pb<`r!>VxMmE| zT*Ygyp7~T6hR+joONnQ`kicTg$vNn)uww7+`B+Erm&=OR#tnLJ()1b=ISznUrppD2FRM%9kiRt zn?6KwHoxlCWcF*1ut3tho`7Pe4$M$y$>;QjB$E9C`6eftXc7+RAN&BSIUL^##ERJh zZ?Y!l>3^e6bM)GjtOw)3A34Zodfrt4yzicI?jQQ7e!w6PdPbh zio2e=!7!Z`LYM1Wo^N={U31 zhGm~8NWjnk3B1BgLSrvY#BmH86Zfr`2cfLIil8~pF!3V3MoY6-pKCNpsYERgX`zt{ zo;KP1PN&~yQh?bR_4}euvz7CtLZP&q`DCOb6!D=)&^`vMM85XL`cY4BEpCI_M z;c&A@5IoI@vUBru;5fYlx3F6u?JBv8&@w0^8~!; zexub%wJ+D`qb4{KRiDfPg|)~0zKGq#acV5!Pq;YFmGB2*DhE0cA}0l3$azub8Ag!A zziEM_TM*pIKp^Gec~1(;bf)qem?3^n_=o_Ws|1&ykY7OgqfZD$_&3zn-dST4!DJj3 z)n%_WWd_=G%Ds%m?y)N_9?*9S?wHpTa|!%L${d5O+y^Nm5NW-QEINf09d^l4V+3ko z_FITdT#*{XjWdqmsfqHweBa6(`rSK}&#i;MNQZoaItX51Zi4joVS2Ob5G)Io!xq({ zYB^v*j%}B&#~dawCc0xDL|&cjqTOS&xdfUXyv?U6EURd~V0$j-e$LFBFmyhuF#~Iy zX{b?fRcnL{uGjc?@|Q`3+Vair=S(8r2jzhMHm_!5S(}EUcs@b>820i-lBKR7Cg61j zUeDm`D~Smx0ryZhQaRkFE+UG3rEEkkz;?CGBA%aSBSm!S?r-zX7M7+5oVS*t68Wg$ zin`qqRbVYH`J;?z^VnpS2TaAa#0O4DzM)%%YT`Y##pXs$r0(KfQI9LC3YCW%K7gH~@h7NH zOfm%9=$=JK(4pW2pg@jbq3|yE0p!d*)C-Ukh%=LzQ(5W-*ei6s19A!wpz=D!Gyev4 znhTEtnIUkPFZ#oc1L9mp+l_-tO<$%hj)2n>w$Us(L0jxWr$@8V%v%=8Oq(or3wy{S zagc}(jOiSjZzrFiU;I1De})oZ`EE_2De!wZHTz**3dQ^{bkGQk)=wMpkt0x3qw?2* zQ)G@@ML?@c$BZ@!b2NcZK14GSEzFewOhB{05t@H(-#{O|ctbR2Q!0}-_z(QE*}^g4 zFUepikqEMi7t(1RxfYf?OPZ+b;cDem1c0;C98Hq9!kQBL$t)(}>%dRcD?3RG=Yap@ za`n-q-Tt?JyUk{&zQsu#Lv|*j(L_ATDZcsn`9G1D!_t2X`Er0L!+zaec?|r4`8W~P zHE7XEq0j;frLs_X1DSG$gu97;6cni+Y5X|4cK72LMwWCsY_8e?A2|w-Af3SAcRMC_ zT*&}yz+>~;MXGbW+aKx~Zv${Cp}PDs$z1(xd4}{cHxeP;ayjk!p5*;5;$D6Z+nti1%Wy3g6pvt^WSu8O)g}lwRRjbpm}$xS$Rp^z^0%-L`L)?B+Dd0-m!kRq zx{{S#80Eb33^>!!4*7c?>AP&b9Ul36We+QTx6Ibs!QeB*s?mh?xI&yKOYn^kvib%unC zF(=Uv7r2dlI*2phq`3(3pl=E26vdi&rFcm6#DDM*1QT}`W-NC8d^-k=50Bg3UJnvw!8 zxSlkVnbkQr_{H`oj(-(428t4FyrP zt_u{n+WFxbv-*_ad0Lk#NITs{!wc$DWr6lQ-5^+uew1JB;6A5*NF1^%eq|MYsYW8| zFX37M&b%CZ*Ik13(V#z|F=E8WT2)zcS=o1XVLZ%hVLKke?dUY(X}}MQaP4LjpK=Mc zhMd|!s{$SWge-7FW zlB`-;Uj*3$chA#MTfG6b*R6P#xfx!+j<0XS*Wa8Ez_IrNy#6r0zG6NA#{jy%9nV6m z$akoJr}x5{c7twL=b>pv5O%PyDEbS9@PAn|e`Hh@&K+%edG3X|LcZp$mI?)H_`k|l zhROV~j2>~rXV3}c1uPACa$$8xwc#!@q6PG{E8Wpc{co~)aC=M3U}riWiDi6|e9#in z93F{DtliO_AL`7+!m*4$oDW*Vn%xU6bK0`@nsrfUv?-Sg*#a$n|DU(-0FSG>);;^o zIn&EI(|hlu8I8)Qk7m@ZW;IK)Rjgu5mgKSJN;4Q6Y(jB@Lw}?|LIQysiftKBAcgNj z;66&;O)hZr__&Xd0LddEg$p_DYFx}+Uxg)66oS*hHGNh3qnz%(UN#0BEU%9)n_?s9ACsG(rJV{&9_-C}cS;7dc{u9ZRa^g%Q8g|(_H&1LHAsqZ!X(ELsA6EXAhB$&+&5^sagiPU+y$(r6%vXRsT8KN%(l%zZ`>{?#YU6Ku9AwCo~oF;B4p5qsyoI7q@^s`7zs6`(_UL5 zWVA%9=7Yt3DGzH48KPt3V=-qcl@=W3=|?S6hly5VgxF2&$>kHMo!UWD(qyvBWQ|cJ zZ9gVU2`Ql^^+X>L1|OuQPHCUh*(Vj0jAqG-w3hJslC@JY>kbp$C2ku0M73w@1gc)x zG+ls-eJ8o%ovBw5y&?Tj*cJ|`XYf%=)=*x=@+(2V>P@P_PcrabP3UEFnWpJv0j>p} zkTC(E7yWhgxoURQlOTX0(su0P{OZ-s|=-^YON8wDu3|0p6>lE zUVG!7-r9SH>|UC+84V7txFp&$rhy7y#u(djX&)$%lWpNJC0K%6#echZx$ z33eW+mL8(|1Mo9&^M^a{?z;SkKtc=fL0ka&i220PUGQ#x=pl=Eu> z-LkobaY9?1g}=g<(e6yI{UtkZ{xAc7&A{bZAUTy8*ie%N@)Ma2oWeqDWchcQnV;pH zJG`VQCUHO%#h^a`!fs1?dI9+#0)_^Z2vEtAQO_opoZ zi$;umW`Lq3wn)O(_49o*zrVch!1RVHSGaL=QMx5K_|!R9~{fpe6VQaWncPA zL+fOXMi_V<`(ct+Sq(B zsnTdvH->J3V$#!xtYEF8p}qy<>D?4zwW}*dPr)qWnY$(L?+@qQR5quFe`NZOMX{ z&iMSX{8GwS265_{w1TGEs8%IvH=4S8YAItKs!bhjE%K#}trAJAG3_f765+;)w$hq+ zI`gT{7~S=b*{?8n!jH3Z!a1dxx)2E=;N(x~<>z>R<$svM=*68i?_}_s&0X(g*1Y;6 z6c*+P3~zFFl~UKz-4a!EH>Etr6S+!xx4tMx-jCF(JKRSR#qM$dA&W?>76SXiZ{bgj zjr+-QgG#Md-R-^^b-UN@%7FsG(v#k{D|*`Zb_Bg` zm-n>q>j;vX9WUH7(0%(0`!ncz+Y8tB-@Pkcwf(mK{=0U=>+Q&oz4$39q=JNtsKS;W zgDgv$PZ6wKmIbw^tb_DsxM1dA!IjRDbyA+Tq9=11{0KRAmkiOIJ69wWauT`J$8#tt zlIK5s5Ce5&#R=gs4$4Qdi7ZAu z%RZkuI3k9J$TGPxLNw@P29Cl(`Y6tj#khu#)aMPFzGaZYlSjCOc0{-c@=dfyqt#_Ki)+GZPz$EPoA{E39Cu;X5oQiV{0gozk; zZsBCi#bin0T!5m8XqKeU=p&44FlGs4f#EbWEFGnW2|jf(O2G#w2ATB{ea~r9zQE-yA#lz2 zYHpwN1ELiC`{`g|uuyKvl1(QGxibrhQxz3-NfuO|%p1^Wf$0Q2mfP>Fsr_AndY{g~ z?s=&HTqbXTMk636jxFtVR^-ZBoBdpU37Z;1G8l+olzTbF%V%Itp;cP zbaQIB+Ag3evyWt)PO-+S@dg*}qy!~nH=-4AaPb4d8?=)sBpQirn2$rrELlG%H=E@} zS+d~_A&)JVmC;`K=bqGTs?7pjP8KasN-nxvQWjgxz|N=|B{7%LY@$*BW#P0G(7iMz zRMDEWwxTr&oN|e;-bn0xXm|J3!zn+j$?v%G!JUD&ntWCagmQ`0U%tL*^9_9wfw^}5 z#`w%_gMlZEWdpUowx*QXl^#jgj8@yh{k`9~qB+=-x%oSrdmj1bE!(OjN;PXUtIdp9 z$tc?oeQ!u*H>%1fZW&FE)Or*Khx*WCGtq+13GN)ARB#Zk9S2dLtMNN^N+>k2S+eC6 zVW8P8s5)gHkd5+X(3jlXCuuW!HkZK(>u}6fv$?WqS8MQ%y))bQTFU#A6FVcGhHVY0(Ujv$N3XpBwDn#;5Dj%* z(UF|&YjrysI)_T?E*~h$@7hsaGO}Tv!_(3`LXK@18}AL&CClwahvvT(YfV?XoXOhe zys?=XT<4e&WTB3T6Sc$y<^Wp(;sBHywi0r{lY#Vt+bt?V43IZuL<}HWDHZrrOnCjf z(+e5c7l{P8{_}=JM!fz-V{6wxwN@mQu1@^ACfZVv)fV&|cw|?ov%WwhrG#>^Bv{>( z7&|r)CCxQ$eeoT43qm?*9stMg(1_d2+eURaAx8ZbI( z!05!lp%cS-(TO34P7Hq+IuX23ka_gr;fJ?|;yWHac zg<{l3wb7{-zAsiv=z5eZNyL~nYPdfme~;mefi=S@Oy`nLb|#r@#DEc!)`?Kr=i`uYfYJV;J>3oq=72iB%wGWo4t) z2*!U-?dxR#^K}|Zs`-RcrxFRIDmgH=3=}Y9TasH^3l+3XLQD0jfxW3sM>pk}>h}$N zKo*NtvbEqPHP+eaZ69z3U1HX%F}w6$k15!isV$wD;eiWDIH5fLHIG|1FXI*ik6XaU z5VuHOg8@qntwRc-5h$69&I?;`KL&L?o#A>yFZTr~*G-gIeqct=()?2--w)d_GzH`AtixWdcg4yD)0GYBg$6 z+Op)j(}Yr^F_t_HDv3Oz0s<1Zo6Z|BE+ZEtq8JXuu0Stya1e$qCo}E}D5}n6@HpDI z62Rbcn=5ea+EmqCu=E8X%w3I#PGqVw8xjl(9~H|)(s0xCy1HFmu|U_=eaVe}3}KQ8 zy=v79HcxYW_QBcmlT+WCtx%hc3I%Is)fQH4vYQ;WQ!UjaDTjPDgrUjA*v)V)Y==_n zM-agj6YGfQxjAZCd?syBk?o@`AhJINCR3m;1&UL^n*ymUS(nzztycL}2{4lYtqD+( z0FeZMyi@n&E&@2=q@%<5DlQ%H8Tc9^E(h}L;-_gT{6Su^7?0C_4i43y1BW8RuSO!n z!*AnL^%xo9yFzqq87hc)#bO2y*Z6Y)ss0iGlJHUh(vP6{-`#cfP}0Y!V;z@2xYO5` z&Qppg5{P6HnZKmH2-P(am}}bm3Z{>5@;_!Mfe@*s0YaqI=2UufwGBMbd*79-A(Bch zQ>Zjb43b!7+o6YfkaW{%#Yn9efuzHaO-JKh6L8YDK%RUYu2kDFmK^JX$1y~T(JF)2?Dn&y5WGKsyG8>6(uWrSNI|b3!y^IF4Oh!k z7#)=oO@KesUxE^iu%N93WgI~+>_nDtN#t6Jb1lWWmLlZ$W&CuCE zdl??cH===MPmy&5vH0pKv}~7`@iV8i{FULaT-NJnaM{^fl%GI~-ccKeFRjge>Dt_v zuI0aUEn*&4icaZE6G=J>0b|1|z{uxad6i4_u_9L{zA!7Yz9N1gBK$uHE8*)*Q4A{^ z{$5y_3*`@!v7zqzv_*t)Z!Y6#-WEf`nkp+nG3v#o(&+RAzonOTS|t0ppG zcf+>&)JW2C^vI#>LEDDw2V&vwE82Nv*)>#BfB8m;EO(}gHg9NlVq`fIu8*4#wyY|5 z6d#=bcC57~>2g$KY`GoErfw*`AH>+Qp0hx##+G$+J|w=OEJ#bj=|!hg6kZ81R|xNA zkcnh>7!Q$Woax039<&-~u8`d4g_WajK&d=IPoIl7l@M=EBD|@D@^t#r@#dT5dmfqH@jxa~vFnjt==xZ&X}qF-qRt&? z8m~lG(s=l1$J=U-ymajF&u(o?A9?wXecu=jR?OTr1h1jWnY&OL=*3ULn^b^s5fw|; zFwH3!wuaT5vQE&*d^-O;5_^b>C*{Z(Rx_8uPms)e{sOizKGH%5-jr&cMuS5qeuvS? zX#ph_|4pVbSky)bD>90?usyM0^stmv*bJB3ngtSzx_(lE3BCR1>7c1J3ibzm$5t0(KCF{o$eb;n{%r*N49|CWx*+$l^ zmIzfAz0PhjD1Kf)JJ9BGSLPWlZZoQwMdSt{yc7^!PQI)x8o!R;_YlB-~3nt>QHem1kB zt1;IXLdl#V`{Fp#PGTT=RIU1kNbAs}OkWFm5JuAo|AU)^T1Uk0j@cF8fP@w<+($0l z17<*_%l&pPH27_ru^H7?qd`e_$&^wqq0QwY#zc0`{|ceWvBf_Lj8I3{aupQOS*-_` z&}0RNCd;ybNz3!9-*r0aysgH+alCdBAc+Ss8E5so89anG{w>FVt0mESFr@!nGZiaT1q09EbLYo%r^DV z@hu6^*MD>VRW|B^SvLv7QF_&x<^RI$@Q-xb@9ctd)Q{iFQNaOUaBlen#2qPwiQnA!kP#a!9 z+vFj2rFAWVjn{Su9vRzaOI8-^?4@1B#p?@=U_;xHky2M`VAl;TO<%cgS5I6dQ!yqZ zIw~cVODo0>*30#*G{1Xa=g>9@t5!D{5>!F9|!+OQ5v7Qp8dR^F0DfJ$cE5Op=yYpvRR?U#dQLv)m4Yk!|kbnND z42Os^GQqUp>-Er#1zEycAuXpNEfLmhCH8ZcusBI#OBlJr5?1y!se@{!mefOZQ&yWy z#DhXQ@|tO~={26XIBWfR%wgJGZ&T#8jA@c?dToiet8>*~#3sh+qLtu5&=;R7}yKTSf+s^hb=vJ(X$51TT$lEo_M!wDFG0@qVYNbfC7jw*Uf)zGU5| zgbv7RW?F*2hS926=eo4RRGx0lE8Ef%@zoBMMOtbrt=6i>4)DiRyUthauovd#nWBRW zjK8cPZnl*c7h9cWL4!39=K|@5GuZ}bvY2S(GT5GQ!2ObDj$jxfg}6c;(&@|E3gZf=D$rDsGGih zYj}NiKq(ZGlt@ZT{N?TO)*UTAQeRfv?AvsxClDV!KH50jQ{`hHwwHGn7Pl9h#wYB_ zs$#M{^`-0gZ%oJ;MkZCM6*{w$Qm~5B(L+rN$O^IUeU0m`8!vZM_3ys1Wc*9r9#7@^ z$nd08rH6CW3FoK-&Jl|Es?MDjDS^w%N$S!Px#cW7>HF&%;F6ZJma`dllJ@=eJeY;L zAZFr2AJm_Li(f{uXVL(j7ou}GpR||h4f=_GICY4B}GO5)jMnrycodl>H23D_<(GLhH0ZOt$1`nZ)A78SLKcBA)D|y~h6s_EG?^xAHV_1e#GEq>;yQ>CkDu!#l_S(sHj)IT} zsd*zQ5XmIChSb84xjwl0_(&XV=sP-?uheQKa@MR>=@lX^RA0qiW%-S96GaL1PLPFa z9s&T?qLwWDkt8FXyU=s$`Ybf>0g*1SR zLnLx0n)24~Y;p?1H}nl2Xmfr?8*M3R*c-N#bR|mH7wEwTROZE#8r*$-bKT9?@98ZP z%T+R|LL=9hRYI9URz7}cohh8(yRS3WTx<=P#&78M`l{NIR^JS{q-M33E6sbks(CM0 z;*|NkTHVZPb@N5F`U)>s4g6#1g}t-_s&#jO5rXf`{{>Q(VwL3G3Rb$L*#Ai;7x9W+ zs#8K66Ux2tw*UA(I)X)%`9 zwB=Q9Sr_)#4<-7J#;sNL>%s3*?d@rMQGUeWA79XU$_wLWLw;eUyS1y6*G<*@dhg@x z5OA5RPb!HdTJ6d}?YnNbsO-JGN#lE>m5crDSIha8?yh|oySB>S%j8XpjPK*=a54+Oy6Bw8g2G@W{38uITVr##(l5D0M5pXMyuo zw!T28$!{%doF&U^Z@F$?U!g?FC`>jKx-DlE6=R1Q5NM5U*xT5A81upGk%BEyQ~Rpc z2cykJR!=VKhISzsNVITkM@QzvW#S<7PZCmd7D!L3dc5#*g6`w>^>c4?et#8uI+s!P zAdk58WCnJ)kflL)Zs1Mo;E%4k`t)UG$%8+<_8NFS8SdEEvhj)*SEzGuYyTCkF7oi5 zAANtM@8PrGy65cUBYlsYysVyt$zVn;U{BGjQZebHn+ZR+>u) zyFp&7vRv5BryHkBSu5NnRgA97m%_i9CWR8VqHkyQ<}VM&^i4;0yh7$9$5o5ks*#8o zyIyNI7!*J{blVlT->)Xk?9MQbq{ znOsIAiX0mG?35mRu4HK`k21QYy%z)|r#SDGEG^5d=zTs+vLgHV1!He0t@=f=7JIS8 z^Y1Oi-n>X_T`{|-z@dCmqd~DZgWw@>IOte-3rTGXod|ZJ5IT*+YGKKb;f9it~{6kDfAj$$HPDlhHG%lhMO}NaP9ioZ4nFDL?y}oIyu3kz+-ov1 zosT?$-~FEn0!LQ-L6C*g07X{x;uvA-02Uo z%IQJoZOO@u=zK)|{}@R@KGpO6`;Tp_k}*29&8)L9LL5o)&=6xqkrcOd<4B4JezW)5 zC%8z8s&qcx(t zr^zVVUUCJHoV**#$yWk(@@e(q^>^r$yBB3(B%^7G)CmcaCIqOsUs?i3^gb|dpij!A&ur)IF0CAp%GDTYV6lRDXE#ahgVjS#Lk@+^$smLmFfE;l?jPO ztOA(t%vWDJRN<}Lyyqy9iwc37GX~_C3SO@rcx}LI1s)6Vnt|5@ zP&#S@2x01|QHJg~uKtY(5(fmDd?LixoC@(%9^xw*g!m~B@hQMVS*Vwlb|V^Ol%fA+ zEbo(pSL~x>d7s>h_Gi#5aPoc$9C1GjShNvOo$_>tm@E+GlqK4W^9l7|i98o6`U}4O zDeqWY&g?#%+o-2bXW%P@=pD;?^L$(Ne|SKP;Fotwxquc1!3Bx}D3blNTTws@AJ2j@ z!;6?EickrN*g?pKa_OhDIahuf=n*&Qv9D9_#2MDf=n}Z#C{q3uc`YI1rcj15=gUw{ z)Jp~($Ysgnj+~1U=g9wm4?)%h&V1HCU9Pq!I*XHI%>{C_?`s>w#Yf{8|8&gI@)_H1JX&VgZ;#)EtLI^+tH3cQSnCYOZ4Sug;)~ z*49{z$u54Lw!t0@A|``G8-t34Fi5m9=*Tq&A)#vgcAH43Ix(}#mMI0hr1FQ<@W)7hH!j15=23KJQ74fFJc6@1#;Ts7{PtWAvkvNcWEi>0&OhA zN&esQk{>G(mM^aYuYbiWejhI1!en!!7=|k!f=4m@DEwH4D#Yw(5PJ&w;xMkLfBRdAs&9$KoD zaDhC_cvB_e@n#SLZjFS;n|v$}oavVd&P(xu;6Wa5{uXnHCKLl<8ia5ymarfArJx=O zD<_(gdQc#SQ&+$(`>P5-c|mi*bb%mJ01C1s+=&oMrISETK`h9)fSNhP36)5}!=aT( zORLfF{gt4kvaxcqQsAuwm02>9R_6PFFa4p@DJuCWjG(TVhq}CF1&4@1wt!Q}M~M}# zwQr~t99ga^8WIJ*Tt5;B5Xd9EB;-Dln+K2Ol3cb;UJjM@8AF1xkagm2W)`LG%- zIV-fPyAq8aRkq$=U*%TFlyX9@3)gptn%6YBw2`5ntH68>bHw44X-phz(sSxNWAn4s ze6+z}GRPS-YqIGqM%ta3U*+vsI^I4e?0xc|kKw2B_6t_1z_G%d&<$vXijO=i=_gmK zuD#L<70*VV&6Om*K$ZD?dzy(Pep;j5t6+tSljk2sR+Dg&k^=`pTYMIK$fDV+rt@#( zpL}Yz+V3h@Oub8{F&h|*(Vzm4$|<>E)FdBrgM0GA7_0-fk0p3B)M8c^b~GO<>_Gu9 zWf_e2JdM7>daY_7|E$zbF8cl|tFQAsjlRCBs(vk3wU6YVEw-E4-(7t!EkUyXj4!EL zysOjOk`8EOG6LCCr!3{o(O^r2H&k>b11YPH(VM_7sLjb}ZT<~?gzp^h@Wiw0+q?>z zQ>mc(4tptz(b0^jI^>SIXob!M=9vu&+Dtp#kKn*nzZBK>Fr>lVke?%PATG=RP6lhv zfFC}h;(1l{3^;oO)l4KbZC~k(xDKp_-8|4#dXREp73W3XR5O~Vf5?kWKv!QR+N=w7q#Q(t@GOjlM(Saq-EgiBZ2 zQ6lLb_*#Nc$nw7B0*qY z){DRI1>T2@` zLdj_r-j<#`C&|b3A$p``ks=>rCA2x9*4HPOoOPTJWxxE)4_9VYNCe4oChI z6tLQCuykwlC%_2)YQdftvNm>5ur_u*vNi^eoie)Ahq30V8FOPNQ3T&SC0hPPhBpd^ zj%S-2TU%e(kdXm3Z)A*&68EOPoX(dj0}8E~(^)hG{^;IMZn*K^_M;IsE+b`42M^uY z-hTMdL^BRG3^?wCxmyNvw^GPms$gyGpkQqbHeqCK?D|uTRV8K*nHv-Ivy&)B%%6Y8 z-dL)px*94CrS`^F>bz+Wp=3<7iB}W#wYBv)uHp5ZiIxLT+Nb|<`;CVVHo=_XXuyBX z-wxmK$zJ4Q6(>l%dE(GvSd*FaAK+v7oiqAlR%gJRD%JulCzuHrlpiO^8({N*A_F#! zXJ2_%(jQOcmp!q%)qZcPjMc+(gpw{T z=$dzMc_K1&-9XQQ6{+gsYkD$6bq;%Gai)DZZFJTzh7^sT{{-BKe+BA~nS%a!BC8S< z$WQGPl9014wNOx?2V#o^VS&c0z=#H=a2Vd5t#}sk8(Wte!>A`3wfupwrkA?d4_7 zZS$%I1TE5V{u8_rzg^HGM`yIin+saxL{?`C+FTJ3c~H_052_B$D3Uuax!38zTuIu` zsY&WfG)V$!l8qX5pXB<7%D;sc4S|f3zeCVwFK-W8RDCM#JNWZIJ7cx)kSj#P>pNty z<*+6ls{&WaDAA!tezzHPiysBhVEflld6eLn}&mq(B1%C93$4;I_f6~E;V%u*>$vouP5 zdEcgNxFuqT3`)x7N^_vD%9RS3x_ctEW{%Mtz!J4qqsjkGA7vX>wRm1iEv|DZwOW;# zN2XOZj7Dnr&cV+sMXvm;4nN+Z{k3kx! z*cgc29}1DI8@Rs@#YYBYesC!o6#Lgu?EV}~&$z#jD=y3L2Gi;~`I5cPvznn5U%dPRYupYSPS2Zq5ns19XxLWLtv&!zMu+gtpEpGN# z4D9ZichmY>l2WTVmNjT(6eV}2Is!U_E8QDU*Xo%>Z`7=FS4duzaRze4%&uhcvZO!$ zDyzYaB&!3ueGECK5b?ObXr_z(z#!}O<*QlSk*o29yLGg^bzRnN3}n2{m`7);U)|O+oUwK{bS%C%(oh#qHyHdjmSa6h zk2&Pxl=eiZHT_ViBVjjHWW)AYIKVQlV8r2Rjyd>>I&W&e!?g&=yy0@MtIDD>T5S1$ z)I)`a*6=o$j&qvn3drSNNHI60SQv7-I99S$oB;z7nmRo4_wIOGrRVxFFmJrkvAl_tNZF@_Egxefnyyd z?~S^vTEcqVJ+HEHankH*S=-#UrqTWntgFIq4LcaMCFFFrfz`c-$Lc7xM$6G|i%PA6 zoE_8|9O>?GL}dQjn%<~^b5~exx^S>dsl4;CX&<6D9pAkMM)JxwR>J?~3fd%svf^leoEWK6CG7(+0 z=L!lQA3P*&up3M^HK8C5leE!hG}<(TLZMVrgaWzWQ7Y7Of}#y-G}n0kH{^DhgCL|4 z3-bhzgc`ObN)`eK;E}{uo_~w<$g;v-?MA4Lk72iqEfE9XgClP>xbH_s6@+_HiwfL_ zrYc~3H*BZh!%SETd@swnhmOu$ceHI&?NM9wDC^$f z(9ztywzhRgb8VY!wMTO{zdw33C)~@!n~@FEX7q!mrY<4?`5CI*XQ_H%SdJ)Yc4#35 z2jq;=0ja6Qsp%)nTF2V@M(cUBzp!Vd(RzojtkUX^*l7+*Vs90c)?H<7qiy{o4ZIs3 zdqx{t2=^BNdhF%`@zS=G5lsYswuI-t-jC(xB4eG%d(b4jNA(CA zHgRYd#OED35#KU1(;CKP;P8)hhSD%Wgj}wW&*RAtA;0x7YZ!}dz*fkHW%Dr#V=y!3 zf+K(^R)aNTUDyI_2wQ_~#I|AkunEvFj0?67jOUi*GFKmHI1t*tyK=XC?Wk{5(cQ1< z$Fl9Rb~ctWq;dy#kM_5xQtkbtyAR|j`?8h1y?5u{`FmThIoNqHan;u9t=1K*ovU?= z22F!_U6Z^?6^_u6y$83h9&Cz4ng&;IJ-C+&Y*_CJV9^(%FK`7@Z^%%_3yCZL0Cb1$ z@3{jtl+3>`f7t-666@caMWOz@o^&di2uer2bTmuH!gcC`^9zp`UZ+g6&;1uXzwkcd zl~^hj`x^TB*JL~y_n}a}CISB*PR8R&d=dJcwxS69#$wv(!ukHu2BTS}CtVBk;a zwqK`WsR#@}nQF&hz+WN%8J8`#x81DNvXn|=5^3=g{((S?N3p{BGU5D;K$|-t?OMoxf|5q`iAHLXhb@gk zi5bW#t(KEb@n`UxCON?)nl#lt#ZC(}(P+u398AN@rgDW;d~@?G-^e9y-AV@{c#De5 ze{i1FYT5J7U;rvsV3bpl4>(-EGnF|!`PXT^p2mOTGB}WhZhQ)VlYAfM`EjWXF92Ss z?9qqtwE7U>O=C0fCZ zzL>ncb}}d3aGEq1T|kOkb84eAy9WZo#1i5e!M-$F7zobj!U7`|_(+^VbrN`8xM!@s zVU?ZMDG9UMs!_XwF-Lv3IgqsZy6b)Z))m#Zbi}7tDNGu(+R#*0oi+y(c3*FupEy;! zywPPtrp8QM8%I$rqpD9?gHEf43#9w1Qv=m@1*2D~Oon!vR6EiEYrtiJ*Q$FV?PQR4 ziv-#|FVND1pOZz&Kf?2$3G3cc+r&JWUmW>7>YMU9G#Z@)GR!Gm(dQT(A&4#qG`^|a z1qt{<3W2MNo+N^0vvn~@2?~zpe@+6mljrTMT-KU@y8%~lcAj^s0SQ#NlCv32PELg{ z+3<7xuQaQ~0ZGYEO(Jg%go4xJZ&FGbj>}Z?yYs(*BO*zXU{0(8j^zcIXAo(;H z!qk`x+yb?uPrR3UudJhU|+&(6umtcgZQ`g-b6!JeoASX(jp32r{p`O zm1J0SGrULCdfIgQW}?8gGjEiO1B_G%hI&q+;c1qq)SSV z`)K-%$HN%Zg7t;`&p2LZVidKTzB0V}o0}V0lf~y3doE`b5u#7)5|7Bzlmp=9tWB?TB1d9Qo!-XE@!ui0AZXPe|7#&6aT@G>FsnFcKS_Zn zz;dWBn;>g}DXqX-@SQIRFNKk(wE!}b$Uo@K+zhh_+#^~2HUlGBm{`1nu_+jv0cL_E zUz^?qYoC+2;|(bzLOz6E533i~=qIZzXTY+P-k1sMZp5r6oFNo)?&6AmwUOUU zD4r7Ry9pta=}V=YcS_ysw z0LAh~7AW^l0ci|WO5k{|D+iIE2|Mc zf}8#j*Xpc{oYZhy^xd%7-^01FiY z#48dBE=p7C=u(tcqF>OU7xu^8#btLA=42o>!%q00l(26O=xjXCyu|wSK!#Ja)nMRh z%E@I60E<9$zc!;m{T;?`wQ#UQVys5i%24>H$czBRxp?qJM|)*zVJJULB4?K}1&P1T z8|Ab`pMS{?709Y&n+rTtkRXD57Bn2pj+IL^@W~NZa+m`O1{KEnrsQ&scgl#gHchE+ z_ijPAlf!-(rZIY_a)ng0bUQ?Y3(GAlmD>mXUF$oRC}`x9o}sMVgSJ>pSJZNcqoT?* zzdw-l=w#DPBOSr~d&TN}-C~f@flP1OpX90hr$&DYtsaWn8CH$8i(Z^hMVhf1DBw<& z+nYHg)p>iQc?u4Uk@ICY_dD)=j^sFV)07+4Uco!MAl-MMj(B>CedS%LA9h5zW+>uk z-K!+v-R`ET+|2!gtzc2s@2Ds>oe+eY*+q0fid(^Fh*_AoM9P}fIq#QgOgh$K)5?2Q zW=Ghbfz@p=LJHZKW6p9W+8yUPf5vNY*?DcJl6=k+)Hp3&i^}3|2LG*C#VW<)_Lx_n zf4WGxH+UTZ6rOZ@d9XR=(b~nS4~`?FhW6u?DJnX4PQtU zD>&XkLaM}}Z^3hGvgpn?S@hvbtRU2Shso&FtMM=4H{dFr!)$iJlNV)xaat@6Ru1ka zj^J_{wp*T&D}a!{a}AJLj^gj&o9A!H;GO`@C>r<;(8 z1r(?e2fV`C8=lds^PMV&Rv&-V#Huvs6jn=_=}xs4j93f?i`9s)9oHCKW+QbF>L#k` zIwO3mAJpP~vVB+-bDd%dk6xgHho~xjIZ{+g-#%;U6P-&7fYQ<@-lwJW7EVtaj;1@y zx!yi+W8AAWQnWHu*A;46+w3wVS9IPEuCsw0Z!@}_Ufq4^!LE8+ZJ*I%g!-q3G8!3A zZGS~@{))|Qx4^n`f+~EmY!j5fZp?Ses#8NzaxSaP5e}1DO_``OD4)@i#Y$j6OO?V1 zIW1)x6e$JywhUiv8E`rr*8DSSMxg{+MIc;NK45tR&jSoMRUJi3JV$k9E*}Tp;Iu3~ z{V8}RFU(VhfE(|Ut-_L6`DD}v^Km*CjgtNffpUJ*Sm2+RB+8ZYP%scEt}|3$xS9xu zp~bbtuBs*bdL!LQms~+nDy2Qr7G1q{Y`4F`rDKgcaFfN7Kf~8KdbalX@nq|UPQRLF zWF&9rObo{?9PVGB)LFoMgE1ceJ&t=Chae3tklzO(zgNJ1ynHek7jhm9lAKG(IY-vc z=KR??7uUCo(=J!sOE&23+S0dVYu2K1CI`0lHq29NDWzI&@~3^x%Q9BNx23gXO|4}Y z7pSkOUFkK!)>73R(&4qWBYhQh<42YzH;*lA4d^HZhnxVa6tRVy>rH-tc3G3ZK5TND z7vx&)&R8?54I2pKOJwUYGiE$V7~pu~+{v6l;)e=>5#*HMV0yM?NfW+Asmh7=Gt{;r92o<{sph6?KJ54D zv-!!)ymJ3QO=DXtCh&n@;Dd6g*~U)twpxJ^Y&Oy#ZBYp&@Z&FIB&ILn22oF`EwDqN zl#ef7FPpvRYS6jxOFQEW3_1;Ls+M3)pk=t;hI_}_y4TlPJYBiIrCYPsy@2tzRY$Tx zj`cT1YFFUz%>T~eC5%d8;;nj>S`{1ERBI^nWtXLE1RlAgt!_hql{3~<=BuwTyHUBC zuoJK&x=N7eZYa;C!Z7&jurOY!Fu%gYA@XMm^`NbOOGYdbA+gAID$AD3s6UD@%8MsdP`` zHKrwItIe2yLrqhPj}`7s(2mp>+&sGaB>n8)E$aYrv73rS(pwRzr|_vs*)J+WMPYD8>k2 z)t84K$2ST4E+OZaLC!D7YO%or-%m1*S#2Oas|{Qv-to(e_gPy!${h8>O`XHl zM!seDs#u#sMJbe|-dW*H_eRXXIb2gaKW1E8-yQ9D=={|_f4YnTUx_Ym^u-4Ec4cqg z(!t8*3I~8fz~vu+ECi0SG?6mdx#?fiDvri86$TUf!~@G>$M688ng#Qr z`pznqX0JCCBG>}eSV%Pji%6}tNVSWE`ixAUwW(sZL=POiX6J#$OTKJ_^;x?x&3}36 z;(d+T=B9?8J%*kI-7Q^xJ)L0F%JqXwn}z}{Ie&A+%7^PbTUr83fx9x5sw}T}r}F!KV#nWHbwub&~vOD(Qd<7Jk%t5=6xI9jDp z%8g|aS9O2Pj5`La8y3cmj`|f%ohz%2BN}(6GLSB#>-u7Sc(CQ>EuC5g1;rE#`|(ig zP>i!V(>;}u#Z8bO_GMyAvwlk`WrOvP>K!}3Pxc|K5e>#oYVarU=U^!`_%j$1*~T<# zLBcYS><^4m85mkSmb&G;7S2jAZX1Vz^AC`>%UJS5OpVc#ay6vJWKNC=g_U+{p(Qxt zBO3nw+lzKP`JvJ7FvnBy&c~@Ec#QfBtZv05BrH<4i85nZC%$+3R_ZSsgb~D95g>Ot z;C1*fXFx2F@W+cG8zDm%;MyzV)fvdYLA8Dk{2W1XM1=B&68!bbP<``CP;-~Xof<<4 zMkTy-F|<+b*#hX8fUyg(eHJEXz|=C2%z)Kh2G<~Pl!OOM@HY{#ZRr9O-Tl`y=FN+Y|pIwOmW_#1&bG+^RvFjc8*Dprd9an;YYyGu%)!kP2xB5N-*Eg6O z4mP~nxD?_{(?HYN=JMv-v(fAiTZ}DN|8K|rEw9XB4*zkq`e$K6!egzEwQ1UFAhx$X z*mkn*@pgOrMEe)ok6aRG+utjo!`9*JSllsu5gzK4b#`~&D&W^$;jXP+C%Yc+`f=Ak zcKz~Vbhkl#y=PDFw!TyUUi9zj-!pG;-Xrtn^Y5I0=K{@wZw!P7u3yM6yl)Y)X#1iM z7k#*R>PY^sD9|SWk!fQmb;dpTtTjQaJXUk8!PdZDv0Y>=~jJf)rYHZ zUBj)}vF7Qu3n9*}TeEKCx~JCtbbap#J@WeKS4Y1(wq-+hL-&TiY?$77ZsRK(-`w~% z#Csb*9tY#9as9X*!ap7vuNlvd-!=Z{O~Vi`UbX0|H#f&NKc8C!ac%C|E$o)HTOQl0 z*m@A+qiuV(O>bYY{ZBi#?YMf!p&hqEe09gYJMPpLlEHt?NVA-+28yhn$Bd z4*dz?211$FLV3he@DU4$j|Be(UFrNPxDJ&~V zLt+|>S_;cc;wh}&VwS=R%xJnt3M(T)a5e^yV=(nz_&W+yYO!t@S`TB!uzfJa0{FBs_>3jk zej((>`r(uJ!{_c1-k5`-jY3Y`@P8Xjvmf4Dc!#@~=NOhmU+ospYOqS-{p;Wpw!^e; z_}+E!J?I%D*k&nhFFYHEvFPd^n7>`c@+^Vzn}jmtE}NeX0v+5~D?DEhub|j<0u^SL zQGCuesibbh6R0?i6D7z*M6G)!Z8^}swK>TDA3+bYntUbufu zNHKp-wl!ycBA0rX1+{ZkUTr@XlS33T;AZHVGx!R4m6j z_`e0-@jux+6S%0#{r{gcXZ9Ho#f(byxTL5E2Uk=wklhfF#kHFZv%u&uGtCSFWjZ1n znkAa$ax2knyQO8DmMw!AnR?r^y6TNG>sDH5cB3Nsf1c-@Ss?Rz@9Xye{eG{P=O~fi zcIsT43Y&VV(dj%CsuH(Iwy(mk+3^w7e9mkcyUNCs4NvCCh1 zjL^hD*EdS*$IJJVQsnp}e;lNYqW_!tzZdBqx~7RnB3WfYzz4z1 z{B4~qlj7PLm`x&A^tc`K%`|rY|I}OU<%;`%rsC#66C)Etvuv0&&NG=bDjZZZ4^Af{ zJ052hJ>zJV?HW08QoP0Ex`xJ28I2&~mm2hv9I{YdNZfRlYNI+34T3ZL@-7skZW`xK zYP(3me;@JcY)W3NG&Pj8NX8TJwtu$2E*PcSO|m?eG9dGuQdET2X(#1sm+FHk#VoZA zQcX6~iejY}irSS@J^Wto?lF&ahZsMu-}G1lHpE_1B@lN76J_d=b{{c6#dTu?>vXq3 ztfEzvlac1rg_u=ImRf11*=SCQng3h%=nKoMee?LaWDsGoN6O``H5NpU&MLDf^R*5sI>S45Ank5_j zPZ)m6U86dddd5JnO^pBfs(YR5o(VDC>Vlcp=_>j}VWr+DUNxYP5GtefNF0loWX|hW zJBLZx8>|+=rvbk<|IY4oiMs*cx0{R{zYIP)5Y6+^C;G`BHR98WP1;3*(S5NU&L5S( zzsnKN2-7~H9Y1~a?Q_}JGeT(bD zuKPT^JuHNgDdY_yG#@Tm9us`4@z1_+b&Ks*-Tdsk-)*CB1=enM`}GPwg9WoDxD)w( z7|ZzI&d6JszikeQ5GQ^Ave9@e57}WwtxKscW@)FY4EQp%mWV!#mptyKSz-(5;Lq!> zv2^KFLw9)5%FvyrYfJ}8R8fu<|DhlKyFn#=yA;WoLo_hapJdG-m-(1s4B0z87pcc$ z+wBi4IzEF8tJOr+b8dmkE78^Y0TEswrLXa?@HN`l?j2`@(y!;deah z3Rv%^QEI2s#Vq>mo9}-b0sj3hi%F-Wc}xacGqGD1(4K7c@u;E&7@2`q8hX>vGZZrl zrSYNEQ#0v4l!bZ4bdM8d6yRqr+Ouhm%V2ogCaoJVKNqsdz3I#>x<)#r71EgnRQ5FV z=HPd_G>_boik@P$$#o{Jg5r9)xJ!J8$d>j*QPU#y^8pLHYL!h_^lLT^tpdo;l15V? zJDbWQ`jhoD=yh&D&kRY^6v~K5CNfi@Q4Vd9-eUaD$Gk$iehRf&)Havekb!a0mUOBC zSvgL!Rh&oo%#cQikBELbID=NDPzG64n;^eb{LY8wM1Celi|7uKhda}#rG=DUy2OlF zn?qYct3*$wQfmlR!XORT)9^Plz^;J)6m=;Gk=B*nOgbK%CEAxF{Y<6g@@Px+Mk;L; zQD2eKSjm?K)Q;|}&ZLo;PG_Z1iwgr0l0l~VaG&Bhm4%gQ}&zQF7@yB3;Faba2JQkS`k&vTmXMKw+{ zpJS+TRC;)uqs(gLjSgpxi`>Bz$--!U2>CTWmM<{aoaKC$!ESUI7oc~lqukDCRhryn zouYE9o419^u{d1(6lFqu6DtIZvkYOq;LT~@M&fv<2t zAl5P1-H_t4T6l}0!fLDGtE`@K-d$Phv6*?71M6DtWl#!JJ?0ABX*U6Y%WihL@)q^#R?pWSkVE|X7!HH{fOGTy4V|7BLy|TjW!c4c>Lj|~br^^9v z5N#pdW^+{W<$%juE1U+Shqu~!4`B_>a0_g-V=ae;FSV9Y!Qw(5bF~LITNjw)c*)G6 zZoa}`ui=fAaH*&_!Dk0#mjSl8tZqWoY^dNXon#qEEJKgmdJCp|9584hG0DKg0Ttp} zBzTPF1{V}FyW$GWWtBFAD-c%`{Yy_I@i2A<5D^!QiyM{D1--{*Fqtb1t_8#n>e4`D zmVvmFbQ>Ko-)=R#<8msEF$VWAc#+R^IUJsHkHWRA&EWw$vDrmpl1A}T}{%z^vJMWx4yDqu1b zGs&ECv(4F+DC!boSDa29Vx@UfZY{NXNPP+`g31;L$rhrrggTZlHMpUx!yc#h6T+5BId$H@IuAwU1>%RneiUMJl_GE z2pZ&)4TX*3?ph`Z$}WX6EG(Zm$4!$5b_0PKcOkSv#uUq2Tqt@ZMUaSPu$eFhD|i&Q z^Nvy!J3E15poQ8WXP0gTQOMwSJB(HXi4T**h>~UZ7{q#NwSi0wk=VsrzEG;v?+v3G znP@c>{cyo~w9b<5kZ_BY!i{L}AGTQ$r{dZ~mP_2JumVjYVqq+)9Ru^ zGZkQRpoUUi=9`Tke}Dz!7{PC{(!`r6hPa{BvCtf{;o2P@k}_g3w@PU&Mv64#E+_jz zskuw$8bS%YU1d|mm3hX==13>aEE1fUQ%P5+eQjpGP7xMW9c{8%p($n~%DTQbc zjpb)%7iHxY7x9=;kdj+8o6pPOQ*vkXH)Q9g#q#O1@(a=n3;DbPK6_ezPIfx_vU5{& ziqo=lGx;gFFE*0Ms`syk(mLZ zDSUoPK~Z*UaZXABpI=;%pI4ZU<FAu24$V@g&)!J^jJQnAiIzN$tcK!c!Cpm zFLi!I1evdBzts6Xdy@I8*h`%s8HxG$&5&`>|>^hK*$NS)QH4j%5GDPGlWyCR@oCu?yLG>}~9Pb|qS?*;=-N zy_4O{Ze*Whx3fO>Nw$f7k!@iQv9Gg7*u(4zZYujNSIqvx&6IK6ELk`=N7kF0E9=YM zBpb}llf`m2*%;0xOXEDUsoY{&KDR_RkMqjx+)~+X+&!|@+y>d*+(y}-x!tlyxl^*| zxYM$I+_$o~xHj1_?u_g!?yT&e-1i*Eo#P_7AGm1lM{YRR&W)B&;F9DiTsHhYy(=@W z_}^r<3d}Zu*>*771!k{+*%1+#C!>?ihWRt{#pz$_ZfMu1r&m}P)jAt)KZ%m!vv zU{(ueYrt$X&WFJ4X)t>k%npFr$6)q3n4JN$4z5_H1+!jY77b>Dz-$DVjRvzxV3rDI zg{sqArv{T#(7tBtASvz+R zr{gwok)Y5Q%*KOR2ACCtnF-9?U{(ueYr(7$%$@|ZSHSGAVD>SXodmPeB@w8bsf_+Xl5zI2dtO(4kV73s< zR)E>vV7491c7WN7VAcd?2f^$;FgpfjC&8>8%+7O*IXSn4i{iXoEVq%eRqm^}?<`@rm7F#8P5&T{AEGVTX?FYZVAK(1XLC!ZjnC{K~2cFAw*%B)v= z2(xG~ivzP8z^ok1YQSt2m^}h!d%)}sF#Cd;$#Tpbwl{MVn*e50z^o9=oM5&X%o@P# z0WfV&Dcn5n2F`|27q@`(a5sZlHJIHAX4}AQH<%p; zv-iR5?`ZwXosuIrVH~KA$@)zm@x5z8cK#2eX}E_9~bi0<#an>>QZ= zN}ey&D*TCvh>1yCR9mZ7uxeFnLql7AeSI5gD4q3Q9Q97Ml2xnQ>g#Y|M2?X*uNQy3 zT^e3GEiujOy{|4UQO&YyxmP;KG=+5CrdG3RZFBQOI5usf_w3)lef#?Lt5?%S>GI?r zq7H2|)YsE>N*cV$d_+TuTEVE5?YwxXb&Oh9$Jg;wlcy$6$H{wnuTsG(Rc-3(`ub`r z2CLQ+NlH1ZR5*ziPTH>~Q!t6nch74`M``8==GFiK6mx87Tf$~h3HIEIn@dUbGGaxz&!(TWRiYlxbRmyF~**wX*yvj>B?|jg4v*0z=5k5(-wY zcBz%{cudTqnEdwqeCmFRlEOrzq>+X~yEFpj_++sm2>JO9?GX`T$k5=E?nz9hF(EeE zsX4^oqG)Gegj`jtBU z49q%aJ+^Nh)7v}Po65+E3$V+#Ycz~Tqh@;JBwk8kme8otC|Qk~tlW+aYbR!^lcrE4 zQdkM#X4U=Od&;qhmE;BoS9hshHp; zsi;!0D&mJ~M6OoJYSj?BXFmk)-$RDP&AZ-7hvag(XElaadsIqRrLL~4>+pIPY2}Po z5y;16%v7ls5vO=Dp}LDqXh_YMa?-11S#1!8SEXT9`e&HKwDO22T}Zmuzi6E(R_fos zhukS=RZ@o1Yf?i>A|hHztx)(?6Q}@sKP(`2lk`LWskDqrmzJECJlsppo_O99hcTF+ zU#|_Biom1t+agF-wP{(I)?Ywj6w13yPEtt4vMLxx($$OSUzwbY!B&!U;sqwj6|7d7 z)zHv@IK)L(RzsMQ(J9IQ_jpL!vT{W$t6~(Y^I;sTRq!Es%hMimAs!gglb+Tf(pIs9 z4;-zW5>a7#cB5R5)v^_X8b*t34(9U`WHb#o6|2?IWFdLm zK@8VSP7y(+Or{(yD8bsAQuOT-In8k5Bc%yecYvz)2L-)e+3 z0&3vEft>@>I@8k9XfTR?psrBJv}!((`_d>if=?EcB`T2pw7CezS0kRJJs>GriVv|F z$p7TPLe+JF(Q4{FNN-bdj9xA+i}x8liU7IkqGdp(CS^dUWpr9S-rLDJ0B5pyi5E&F zdy{o4Rwrc^tpqwX+Wib7@?=9l($==G5Nkp0igp^{q!viUAlU8@L!40U&y;wR8N>&( zMwH_%@%CrBM?bkWSQ;QIjMeqt4pxoYMcstg){0fs zUpKsZmeq%%^Qv{MI{XD+3ttyFn;?iPb@N63THsh}%R1|%XM;XzDPDiBrpIH&J-gZ4J(+3{+ zx>OQ1DjAzm(|xr9@9lNw|v9t7I~NF_CJC!e2{zaIBux5`Qtl6;J!< zmA{xIdud;nYT|7b`$Ee}mwKXCk$R%nvU*qqmipek2dAagb)u5caj~pm4vA{C#kvxdcV2=4 z8iDoF#2>-+GbUnC^IBz861`Pw@q+i(bd* zb>U1na}7Ncyb0bC-x6%{WS`foS-rNSrKM$mM@w^ab4!O_gO2`;(_6y$Le3KO^k*o; z^Gq{dy?r4^legJ>o}o!{p0qn?@A+WId2t@Sy}#F)EXsaey1m5b^!4}ZWr(;?spNKb zNc+4#oYhCP_HFIkmh^V)N4Ae_2XYP{-oN_w)y?{5J(b<+YxBMB`v~V@oGm!_`I>!A zKFLaIpeUPI*}=3h&Gc+x$hFvN@>0E%EIwarb>DEM@^H0Y&FD3qQNcssgVpzJQZ^}@ zR~T0tS6dES4v+h2%&es9C?QIqZ&8a{l*+{|EpILiQ?p@OvgENZn#u9SG4bupLiMmD z(QyvPq%t~QHi^7MVfmI8P%BLeQ^MOxB_$>8CDNfM<4fSOmPNkBxMOkmC7PSVWNet+ z=VO=vJgFz-(CArBk7KQ03N2ku`U)2iL|aJ~dTmM4hm3u3jL0BQ#2>Z#C6YJ1>iq

+KE(@Bo;=J_gL~zI7Wt?<8b!HsR_p);!a~!*+$dGF;6A6MU_OA;2Bw? zZnjvGqLM6@X8ncljlwA~REZ9KRFqIs;_C>L$-+W{3Op1XD^oxppBMXqMnNDkgcI2u z6m&RD!6P@sD>_l3V#Cw}Er$;uR;ZXTwZ&pNT;Eg0gsYTFy&fw?2{_11A1h}S$~Kn# zTS})lg5;^#FU5IDi9kE)m2{AC+S%5{JH9rtQ|NNEapXZ8RZW4YP#-2on2rq#o6LwP zEKCxU=(8d2Ns1bMNhM&V*9@Q!6`fx4a|+H*?-cr*riJ8Pzk~JK#Ojayao$Ng7PTNe zVJnb!(!>d8`Z0Y#eFzrJK-zSAl?*xQ?8U^}iGkitXiXX#Rm8`KgdKtQwVB1X42^ythL^vnATEPeg zrFMi8(NjBK#>yKDh3P_UNKfBw{k(nQT5{&m|C-7{|NBPrU*An8r!XKyn7sGLxlO%x zt54l`UCE5%(+g+CeASX&+gKY_B-G0H3ANl`8aWv&lSPh(ihG~08hrCqp5dv&dxfxo zw%D2(p^B=>70Z>8vf{#oNTDZbs3WyA4eoNQz0BjVCqxM0q*E2CDlnTW9ClMeKcO$_ z(MDcz!LO4h3=sO0aV|0{I9_C}Fpn(s7%H56erk%)@5-=*al!;)e8Tv#iDSmkK{GL= zDJ*^Z57A67Xvw%Pl1s@;O&BU%E4KRCQ>{+&Ewi-3bUwW>ccPG%o;Y$;TI$%5qf=8R zBwQ;D5)JBmL4yj#@9GJ)?7&dKtb*Zc*&Yn~wX#~4W%hi3a{SqSXOfS;lz7^C|Miw( zpMLjK=ZV)IdGCrvXMUJobFTK)E$xS1U3%cAPvYDo53cTg`11`v_sD*G{apj5#-7-@ z_@PNV=KnM=#xf*oMPkpRTShEv<_13VmFe)*e|)v-x{bHIcB1T|dA-kX`|{QyzYaS07h~t7=Ub+1S(MQRYP#yiygGHZi?EClx z!P4uFNEwGTda#z&fQUkf2GZzoc^`T2HOKCp_wD4&rcK}O9@YAK+2G1Ihu=U$D0+}Q zO6cS5J!nk(#|0Tq?djxS7XI?gh^LyzKGQ=eA`|+{rwKO**^QZv=_^vDue2Clwr*c( zaW1fu-gxQTmG1a}8;KXG6A_egm?F$ls*#rp>|HI-*qxVd{=gObgSVnDH^T8dtV;?kM zxBZ_twj4Zn$G06X4}GERfG={#3oktUH}7qK8#+ElSANyApTF_V!1f;3*1q(k{_a78 zpWFNLf(KtZ-*ZmOb=x-8Uw`jQOM1=fyleLQuq6-7i+FXUWzFUzOTYMgF_RIuAoKVw z6Ld#E&E6Mr?4D)&`fvWaY~OvEu2Ocor6Q*>Zuo)?zrLkBI$1mPwXHk97?d=2V~T4* z>Ys0&+-J^*?>x|OV#g|VzjIUS+49->n}i>J`Xr;G{)*f9IqyA_UO8=fwC7yi=9{0n zxoT=}d)=&l*7?5qZ@#tj`s4iZ2`6ryJn4{c_T4MKzH;E<1$QzXi9)TCMHTxhq>AnT zYGwN^OY^_#q*ZKxC^a3b*y2B=R>TNH#H8;ZI&L!ah1N3q-AXu?e5pQx7PIj}VnV_w z0p}R8m<5}H=MT|O8s~mz{9jh1`c-=dH>=ic_SRg{F{Gr!RUiBFxd%7Y-m15$_fKGWKtOy?Nai z{W?$Gyx`Ch^+Hc;#PqE9&oBqFavoHTJvlq<*G2ao_+rk9DmILA$RTRGm_>v;7P+XZ@yCeGhByd3$r;G~uy_`n>*P=G-4fjJadWnpxwU zbNfCy@LAnQOhRZJ9%by)u5HZJOL>938LlQ4I}{CgkW zUccbp7{{6YJHLK8{mR3oxl5lbx;pc&jSp4$>_c1HhxR{kntx2U>&%v}hB?ERQ;#o_O2n;tJ7oEr69*7E8VZ~ye}{nuahaqpF1 z-npfGaDDlt$6oj54*60&AjkOYeRp3mZS-?9O7cFO`aA@Foee2#C+i#h5s8?p`y92)KNZOyE z`!Rm9>7ntClKks;r#0j?>h5@D@vMKoUUs|T=mQ(~A6R|Jk@=}FZvE+9|9ncQI5j`} z(XZ}Zc;IFA{_~T5*y$dxe0s)PSG~XYhxG?7Be-|(sSrA%cC1Ue^0q|>b9Ti9_g9c zTT{EG?sReWd3JMjzIr(mA!MB1{r5o`d-snl+VW(->Xd}4!+-u{(z3g^8f4E!hwbY4 z@y}1QZw|bnsPiX9^FChZukVk8$Rk3JK*doA9Daq=^9w#*lJXthLoP@Btq>xV8fj0s zf+anSuymtXo0m2SOYijd4u7JyBzeZr4JWUS>=^#3w(y?WpKsr4+-~^8!?QM`W=Ef` zQyU-Jk>j3qP8Atv7V^bSAzQ#c)R@|svf}zne=ZITlmC$wQp)LuP!zHuWC=!b}D5{Ikq8_FT7oJz|dutebnWiLTpHF>T z7Y^0D)N6_pn*-Ww@ou&@O#3+axw>EQr@ue`v%6)boczS6tC1aN%qMgFlh2o6PWOHY zq23Yq$6R zPn*vd4Q1rCtN7Iur2<&@%ms;2vq()+X7|g_sm4%(obLHj_=>2vbQgI2)~}(@SNt}7 zrZrBy&AmQHIEvR5tSVP(l~Da!Y`m(h-Q^zMjHOotXqR>Cf z=c*&m?)7ft%U=gbY@q;&#rY2ua3ywcqJa6&GJqKUe_4vr0Pxiz5r4%Yw-o`3femFe zpoIAfjU$^y6tGeJ+Z785{4G1c8|(lx*un6*+Y6s@JO_|>=)Si|JO+w2i}x+X@-&5{ zy{D{=Z&kA@!ywq2%gvkzUmo>t;iW?tlpkiWmbGH~nD|keJu$mOT%&$Sc9b|@nv2bz z^0eHoNh&%99W5=#P-hfww)E zp7<=O*i!Z`>(q=WyO#L6D&dRBF5GfGC;qYWYKU!jpKVAX7BR8Afy@`&kgPht`bx%{ zN5sG!ej^k&!mIKCYfqn@)w+IV;8Mk@;JN67^4W-B1PWnb>cC(y9$=H2Wr2e zdn%b#%PUHglz-^XA;`WrG`}vUbhsJg>w4L=CY$@te({JW%QeiAKYnyr=rp0mB}dgY ziUk~Y5QN|Fkb)~8PRE=&@+_=8>FxpX;+yU>P9o<<;n*C9=&{3gJ!1N5CXcEt)Fqe~ zx1(K=JMP;TfB}(zTam?qf}9#7ndNv-%TQL@^({s5sn%@aCE` z-l0N1;<-h(Ej1Vpe>dyX3{Q|vIC0ddY4qyJm`A%#(|QZF5q=vV&^P_om1bz-!?qIt4sE8HQ)zBI1h4wOgFPc zwLlvG%GGb6yi@Vu#uOzh@cvg4aoYD*#9Z$!|+>% z*D{=SOO!YO3Jz%WCg$vSV9o?E1`Ky#PV=u}?mzS&01)#wCNByhrUArcfOviDAt9Lo z;uS#skKw>fdlmkh%yRW}xyZOPFCQmdh>QP4q-TH+aAKo zaFc~IYu59#Co+XBtuF0tiFFa#<>hOgp8ERAa$~}hj?B>7vCev>dEdi#$4jMVJ6|qw zm1e-x(hay&p^NM>{UXh1!LbFJ+%Gn{_plg_7LJE4ckYf?3=9fHpV78dMqiMET`pgi zS{ag65%o&1u!`~Y^(%f5(0qc0wTMX?u0~SlGjNACRvNq?O}Ko(Pas^oIB5KYv|E0& zor!Z~v#`sNw8-HXODl_eDVb8EPYcsJ=j>cGUfHl^N2##}vv#qT2a51FI5|}<{MsS} zX&Bac%mtqta+9A)`#`ZxrGxq@I`vjdX@$G=^>{J}BAp-vtvstxT|4-oQcL%Z8+oCj zhd%42h}>S)6(_vUU7>nE&`%qC8;FO^Odrh%5!Cy9RzD09G! zEKPua_uL<|Z;$GvND?Iet!JBUox3FZCpJS!OrRjCj0$l6y*%eIE4{AC+3JWzm`#1k z5-4^w>fOuw{dDVjZ~tnqIIVk-CP`f&0|ol9rU~F$o7n^1R*qzY_P&6yzObzIpD)%f z&3j6__0%@*mVz3sF}2Y>oqfq+=1!KeD{HOOXMIWCRV^ZPF7A|1zz1FXm#6Ug)Qk1# z8GY@29{g@eOGfO#32`gQiRr;QB^|F-fU z^F4(pWouz{a4VG_Ty`J2OiP!?M662?4I(@)ObOS+U%HN}d2Md%-)VVFQDP1Xr*||d z89KL@_eT0oh&edZuF0h-XLO z2}u#-k9TlBA{kyDIU1|mreP#R&^v4vfrAAioVkOUi>tG_vHd^N(ZmK3j)jMVm4un( zAFWHmtV_bd%%x94!lp~Y%F0Q?!Nmituy6sJ+1W`r*w}zIR(4$yP8N<&id&b2otyiU z{gdM0A>m|W0oK@o{Hz?TK#G$U*v`uQNdY;yIXFl-**SqV?oWvVD*!U8_3Vj^Jx<+ z50GSI{%mDq`)uX@1&-a&vt;%Ka~{pOIi={tx-7 z=U*haKSRsL%=SMYF5vlJ=s)N6KO8))Kr1-8Kc|d`1L*L7T7mNb90&x&e?P!(U>neu z|E=wRr~mbT2JAnsvM~Rz*#OeOX#bO|Px61jkL`cK?-O#&z#kwR@CX$0X%mq8ukD|G zpJ4!c_!*f`r~h~Lz~28C!~Vtce@XtNfYAOgs6XNS3FQAm_@9aG%=szAHJpY|P?fPu_&+*^rPaXe(>3_~ZzkrDRgyO$C5OM!TexCmaa=>H+3{KXOc5_uynN|VF4Nl^<3PJ0dD`iXJ3CQRnLzmwtV$>^o+a60+EWA5@ier@g3(P?){ z{Lry?^LlsRs?V91#)SL=U&PfBaWRg+^Kr4av9s&^rQ6TXUz^DB;ZYG=Q!{JaaAf~> z{Mzl7^deV~Nk~uc-FKZ~B;qY?GGc4|V*7VZ#DL-WUVXr7WX##wq(d8Vw0|extLtN{ z9b?`3-lW5XP-mwKpYm*Bd6+TO#;JD&& z6rwT|l61wjm$43Choxjn(JiOQ5hPTrJ~Ye!DwNl$;gQ-?60l~3Tj*!wv1duSAa#lf(i8iI{0J<pAg;**uV+&5|JCjd;56qs8WUELk))XYRfDI2}3g+ zR222qSbDVv-WusC8lTx-e447UNpRq0@u$&O5ORF4dO>W1YKw*;yM&#JgtH-36NW8a z)LwUnf=t9N76U9&wr<-AzpDrj+m)C_QmC2zirj#eviN(>+h`_qHtH4R#?z~q@SU9P zZ|bc=Fr9m!M3)H1%jy3iUP%UJta;30Rxc{WiWwW z4s~I?hr?hIw2OXOc|hsS+~iZTifXAQ_mB+zg!Mw}Tx6dvrtq=#=2X^O3ad(x9+-NS zi?M~YcDBbAo;0iqIWEdgIsYb>;Er4Zo;=B&!9g9eU=~c_ee8TbAS{e7pVIY7AFQwrY%1ER$9(e{4&cO%;AIxA8cRqLTb;ECIn+#=B zZPyYRJn*QCY>a2P7!)v$MIwazJY0@>x7ED#Uh9V7H1LVOt~Ld?35_oWH{x672MltFnn1&#l&~WV30F_*PZ#jZBX^jhm0f@FGkn;?lMf8+Hw?91|Q z!`O5D?kGQ54Nn$P_(f~_WMW9BY?_+)^&iWQNF=PJnuo~Qnp1NuV9KYtCso*W(J z2-yy!GgaFHC`@2F`^dHenL;!tCEP}*e8aiB5b6ebhM$So6F^YY@=cV{%-6{eSHRQ@+Go^!z@+gG<|xSj zzY8}a=u6$q*iumN|D)_yLL&o=ClrAD>XZmdf*+^xi!B|QD|g%_QX4Rw!v`sXfh2J||3Nd$)FTs9^AP~6EK5X=f1@4o^itb(pR zpZ^QF0&|Wm2XZj50lzHsb&EhgZ@?F+rLsabZ~L#!%`^AR(U`!_oRg4)2SUSq)4)l{ zc-_sjut9Tzta9#!o4e3{L+h3$Na=xL!09dSAA=hA4B{Sw-);gEZ#%DDRF~7QO>+uY z(7Xpuc@N{ir!som6=w@}=kLy36F#gSmHu0@aT-SoB-=)~DIQ+LzPtfn6)Jt4G%cg{ zZ}g#25H!jz?&PnwJWDw8LTS|$Fz8Z%+!i=JdOF8`??AXP^+9-q%`1vWUy7fe&l^!5 zDZk~T7=g9Qa!xvZ>mP{=-WA){GD$<%%exw{x=L)1QP9{ZW#!F#Cfdo}i%awMDkt`4 z$P>C`yC_<)^!OD(d#!oY?5BO~YvEtN)U!8q;wj&}B2Mfx z0)Gdb zFEAtg8q|AyXXHQIO6MN|cU~aTs-PomM+(C_8;kzP2Wo3<*$VH;pWxhG0ev^*U*#o0 z%scv^E6C@@YhUyD9WgMrNuT>`tJI_E4Rq)vPSvYgNy!y6Tjq!+57RJ=Qw2EPu9Uv3 z{%L=oRk3qL-&2u*d3ZG8?{L|p=TY;H+PbQ}tGz2;^J+0L46*og-VF{laV2#|cDmL? z4LqK&AZckni-H@$*;X~L`YBK^n!^1Rgo4`}Els!06RW@CqgGzPOZ^l@cX1DkbPsWU zK6jkR5jW(}3GCkq!J%s~yOKk*Ghvbcqd1xQDq9NPee}QK7~Xv_K_0jmnmw1&#~-Z5 zG2QsgSHB}HS$Z`exV$qW-s2jI%s5@);ty0tSN1fIr^SMbU=e zmG=Yb<2su!2kt}YF)i-9eHKP3^zqfB>L=Q>FS)e?j^Ixbb7H*}sT=d;eV0BWlHWgm zNJ4J_8GHqOS9?Sk)%`7F{vrX<^#3f;s^~^xe z&qrWT4U6ZdC^)TG=#%d29&>4YyXq+F42qZKESX;?&b=ldunzpn8^P&xPt5v#3=SJW94m|xtw;Ex3gh!di z!o@B)eb&n?^GDvhrtV9LyQc9Cd8>QF^|RSikM+spUyGNB#}yZsamQ!om-5G_w3k?}_aCiX zMI7NBm_JYUHiMVek2a&1_>VWkmtN<)2=#B79+B?iU@u!{c$eQebqu#1M9+LWwnWa- zT0IKyiCaAi5ZisY)|3u5L#n$5H{z?`Dt(yNJbT|5*PhaSh}WJ>AAPL$6xUy7hq>0F zXG-#4{2FP`FNL$vE<>#5aUI~-7PkCg?0H8vqBtkN5=$&=c67|#ldo?aLv3z^IJ7SM zL2Rs?Uy5@XUR;WDIl4U-{k-aco?$R(!Q3cpPbk!-Y8Mi23Evo*v7hb`5^Gs*6#}$y zwFyahNL}t7$Ddp(Izz2?t`UY{y>IGAPbs$3&pv|Vtp)!v)RE6uw&K(wy2>$T-JA%V$e&1@(9TfKV99{Zpv`b$n`doci(x&o zWSr7VdA2wVpV-LoVSBXHVMVrlFh?>YntY|X^*qGRpk|x4Fc^L1yxq#sWOFoo&-7xt z&6~AqWYrL9C(^_r*rsQ{KGl#(PIr~TJv*zS9*M~hnmjOXdTn$0M&ll*&t|(Dz zhxDhu;=f~aW|q!o&fd-~V~PqH><4SWjLgr+1Y10Q(|$g5eD}fDY^C~q7l$V&xZ-SY zY?2dtoVO1dTbYy--WE$&mbMLfnQXRGb3E8n&%@cNJTHfEE?apKhw4*VELRVZOpFLj z(M*|5Rs)5Gd5`N!6_mQE9E@2lzQ>TnKWTJleTMo|G;4hA4}Q~c^)&weQN)PvzB*ai zs(!7`Q8`?#)8;*fNo?@EwD!&RsK(Dar}pxf8~(CDr-2@^j$zmPGooxwN5GyVXLrN( zfc%&|Jg0u=$>dpH`$=Ke1FH+0tdSn$bk(HY)OyBhZW~hr_^`R7gC0?c)3dgNoKN5#h~ouO!ct zv96GvAFO|_y}*2%WIe?H{{8W-MxcK88)<^)W8=m1bZ+Tm)v9U-b=}As`6a6Rhtp%~2G@DFnTN?T zU&rS9{cyF=xwrc=TZj9VPPgr9wRXqw#p%bN%n0nOSF0miKEt)=+ngi8yIs-8<_T-u zmu+I=FI#m!BcgoW9YtP}1MzEw8PK_Y+vU)Vo<{v|(;FY>Z;BfqgKsb0EgwEnFaEw) zD?Slh_}yt+?;RJ@_8-p=zndL5y1iaHFE~65J(stx{@&_4uHTn-yq{HvutI& z=N#I9Y(CW3Upc&7G>82zvB&qZAU)Nxr)|ej%a*du%5tvpUp*n@H_^%ErlEtP3MBZd zmW#W6&L07mcEkf(;OPj!+8h_w#-$Mr#rd*Fi9w1sjzEHeFM>pd;S)#zpm_lE!Yc5D z8r=~t(>#ZFMkwG$+2LFIw)#sYGTU@;u3zxOY{J{3V)3wX#u|qd(}vM1igxQ zwI}fa(Ix3Y+#d+t6(D&8dlY&1Awfoo-hzInDR?3o9SIZ)eibDlrpE9HdBy#Moft7~ znM;9jeUu9{{ciLX>IrN=59C=FfV%Ypy&WL>j+x(0on_x0Pca!f};R#PtXFq0FIO8kQ3lWIywTgs7_)?rA9i3 zrwb?(gan4|FLvlKBE`aXuG1}SQw|9iy3GgV4LG0+rhqC}i3uE+V&O~#3+2@4R{lq* zw_@T!DDR5b{8xbAmr-RPX+Q4_@M9VM4Re1_(t>(wlqNrodYVA8aE@~N0e&+QDtLT* zXo&i^cQY%G`tB#hGZ%n3nJt(6PK0nz9I$mk(q|eAowptGcUTkcZ!gk_q3p2_G3g_6 zDAW_gqW}Q>aBZ+46zXny5HWPvKam&(kGcB$DeNRO;5#MYRwEIq0QT10Idmp8XMLX? zNSiow0niWU_o-pQ!(ilt+Dqkv`B$EkZDGNcmXmd18CFi@pCf021%*Z=t>mFs(`LYY zcpvQn_~({%vQ{Ap>E56Ph%R5(IamW&DOxe8|UUe5*RBp0Awp*_KfCbn^A^eNE)lY)#9A{uJZSrr1OnOpi!yRK7T|q z!SgXCU?^m%J06vp1+qbN@7Kaio znHEnb7vg8g8Ox0L=CwK*YF%cFDX9hJ@2j3&d#<0z5o*4jrWr^1C4#5HhSHd)v&9?M z7_xCE8X1pi>{X{j^r z3S~(|$nG@}SgDnl?)Og5%fdog68%zlV$$y&iO$(2t%dx4`?nMZ-vnmE71fwLc0=9+ zjT7M05=%A#(5pB9VvMz$#LxiSP`hdG70e9J>v#a`BaGjyf&KtuSnEb-j2sEMmM^r{ z+jB49lwYy?w4M5N*LID??yurBo-vCx9XsX%rQYIM@jzzr)p_);p+XmDv?0Vtt)(^* z^%k3FRQ9P5n|1S5oalzXn?_h-%n*aLW;X`SEciKwbzk(U5Db?-Ksl^4j>#r5YM<#e zF=01GaP;^&#((tXk2pB1#CsvE7rG&WT9YS1Q9=7q#m!TI)`p2D>6dIe#dWFSNzyS= zbgKJUEX8UESd9Wvrm%`#(XVNZgO#y2)ZUYO<4EunO}yGoK6^*g`1>;r;*tLWmr zy}dPBFw`(LTQD>P_M;Gq?od=gV0B?&H6fz7kYEiFq7nne3I?*~hOJntL z;IFB=gps<0iMoWbx`e6vu#x((iTZF59ZcLx%-^^yh;kN(IRnMtnOJv|RtnzTJUnZ` zOL9bY%ORQ6O`baZ%krD6N(poxOYRZHCHNHTDT&ExB1v>%6d8dO1`yIB2GW)mI$$%fvzL0d1=Fh$y6#(GYp-_wY4$r=X72+GW&k_qD z^Vmlo05gp?mjx-QaO4oPV-zVU#L(gZA*sI)LB$?SQr>2DCIZUH$&=X5P^WR;2e+1m z-DQe`LLm2&Z#U0fQB4+ou#b*L^=s6Z{@ib-@z8 zvD*CJ3{mhTcnC$C`-*GTl9uKf{&RF`5mvgOpn9r$(QR{@?$@5KHxH7pgs4X^(e;U5 z*XFuq{VFdhJw<~KcdDI2<_@0?%O51Q@E6U(Jb#&3)Yj@IDilMQqsh_j)pm%hb=wOSXVtNXvqQR(V!!vhU+fU^3}Us?k`7WK3w+t=f^F+JzcQRWL~!81qI4p%3CBc|2C=cLpG~4OvGJ z@u1>F4>t&^vp9{aU=EoA+y*vUOH__h$DF?UpqD;Oa3WhhQT`B|XyW3`9dj358BBeu_VO}tL(`^i{YSkPX0p}DLK9Mdps%l(+c})PG80q17-E3Tar>Jfp0wn zH#kTBLThdF5pj>)(iJr&Zk~@=&X+D@nX?_KG>F|IsyqM}8krONdlN8kr0^mPOypB! z59?bhc;J$getf)OU4MQ%=FAwzDV0RBKc@vuvZUauhxyebSP;^lI9UEFf(RDPR?Jxq z%GP%SqRJ$apbNc6hEVvMvvX{z7vvF~u4IpW~6wGUWm15&pDB`FVfj;ye2 ztBIwZOc7**K#Vg`8}3%w(7TcF6dBW>Xn@LU8l4%(Yf#5QHFL_cmvp%5;Q{=HD$n4TUZ2#+cFtH8j6S&Zc|o&SOeyfJNmqmf(z_H{x!x zYi(Y$I@OF!vv&641@)&J!%bGbVRAuNjIYw(jW z3yEtp&?i0f*23i0Rp+kJ)DWv#&Sa8YRNzlwzixZh%zi%8D zN#fkz!%T=s+t#`qP}P8b*+J(pIo5B>Nfy9^- zZ9^qO7#WuMJNc)oX26#>MbLcx_0jRcq6R8s^As-%GB2jI4&p~*3EREZ67s|_5rzQuE z*mQ1V3>0>>rEh*i_DP~yv`v=_#+VZ{(=dAS@D0M)h%<@0%L$b(411>HZjuB&hRPlXLI7-fD>E|CMqf7wIDgxl()pU zS`g|!MUfYpO|CRo2Sy;2MUN0}EHhg#^=%J$M(erm3k6=JW0f#ZQ zYLwWo-dReQ!S%)SBxt6$K6-qrh>t4rf*s)Q-7e7FAq zf4KKmNMj!y^70dtFmW9oLGg=Z&8#r>^W<+pR*xEq-WOnF;jCZl!i05Ox)hnAE8QRr zr(E@4a+DQ$BTJPgs5oiU>M;;ou8|eoV4@2yzy~-~TNrWjN4WRm5ex6ETM5I!7NndY zKk+crYX$dJ)jtoBIVy3+(;Wfd{KUQkpkq0LA#!)7AS-WcKd$ByM3d+}$91rF+>G(92P!jDs%XC27ic3`WHG{ZR z8$(M(5ONzxfxF)Xi1dV`R+E|{Tzi3J3-=O_4jma?Dcy_IafgEN5Y{&v@o_#h=D|!d zEg(p%tE;M~ejS$=&twj|y@Of6g6mwa>}B8Wa&9XA`&#h}AhL-Uu6WIv#+uV!Y=BaO z?Wuj!zmP_;3Ev#)_R8fAYWM94xLKifKt|7;;lcDt2Aw>*0P-A#tn0+2>N~SN2|cJZ zQ&I}jJz4(OY2lx1ZMY~`cQ?58JIKdW#kYL4tI+gU$cxm%UOU_FLu%qFZV81I(cu>o z5pL)93vk~Quk`5UI)AWJs0QPbiYp^TwdIh2^NHx+morcPefFx^e#AxLBZHD9K++yF z!y7>`f_0<7JH^3Ff(jFuHDM%8fSK4Y`C+pPrHUm9f}cS&U>Z;Gh{Tq&!2BoF4Z(2d zV)9QD3jY2Bc0l;bc`3~zs(p!cYix+6F8Ap4USPOb#Hu*ux)D0SsI98=3PsCUeJt(U z>6bi3HT=r1X3WX;fwih$$WA$By?y&s#EKHeXMWVFn^ogH8o0uN`XC2SMhKVd9VX+J zX!60=^^y1po|Ti{=)7M)Z^snQ_}w8ys?;~m)XPILX!Xyzl_@c{O^`-(q6fW0KpD~W z<}1@{sul^fh)nPMGeU4s}0P(O+ zJVdmSVz1#$Yl{5yDz39|p`|v>Uu68r^^Rj$n@3daM6t!ukVEss91A-{4vR8|r>38)wWX6^=ka;H4Ps3rPs7Tpp4gJ z%$sVy`V%^^pnRC-Cz3I5EVm!0SAgNJ!3;Sds z%;?qlkgZ+y=A%)9cBG7wxX;GZ7l>uV+tu&C*NIAgxpqjHDN4*)BK^#@)s*`)Uc#V> zyJF7t9?A%Qqf_#y4Yo>rC8%f}k1*)zgyGMIh!&}>a?fkU4zmc<)eq+`@JcAp5CvuD z2xvJsIXqTNXVJ8TQskRLK1^DIuMQu)?{V);-WV1z@j^F#37x@dCj={IId(Y=u50j?ssK^b+8-rSF(vhiwb77p zE*=_4f|YU%>|{jLN`^Wc%i}OzNhcE)2}bZMKUa)?I~EGmLTUJ$B)pB4Bazd)>Ri>Q zg|fqF0mP#&)pxW8rPicZTGtj!D%_F9e7_)HO3@-)tCSU2(4fZu_(dMRrBNl!?(jQy z|DZ{j0yFf9mm9yO1H^Z$V>{q`?b7|p%0Z&k>Go~baO2Dj(YqP)X=sQ>-W>^S1AkBf0xzT=g9 zW}!7p$4q5zrv23_ERBAdA)t*S#~N}BSA_JO%|JvE@MSX)Zd8GQxM_D!yHjR2=ZSG1 zW*_18yWc#B{&!QK?BKNO5N~r;FCE%ATbqR?3hkKa;n8oZrGVnOg@v8~y*e?iRlf=v zFT5Xt=osY8_auxd9Ezx4Fr=4E<`E1*3Ly;KvAf7{YG|~ds+unJtpe!JNxa<+Sdeo4 zpUoTI*sE9&absegEXUUjjj+eYlFEwzieQe@svl^{(J_V!R2Dw(QpQq)5K*9{+V9~J zlKC5;dCtdHLZ)Q4q(Km~^g&{XLvHRu)C@QDsVE>)@`N)rLr$s;)m|=p#j6*q>|PP9 z)nk%o5zWJr6<=lz` zKSI`qgWw|#qaDl%!4&n%f)*gP%`LUBzG6%pPO;z|2dZ3V!Iq6|;ge&ZVlm$8h#j_w z)`2NFd_%@0sxm4ajX8(W4DzKh=>RQz$ZanSWI}*`^2c|`$*^!-cR2%Q^{2!t+b?-B zEbpM*x7cO)=ff{e2@9lb0s)CDEMEzI5O+DB??yxy(>vb5N9rb8HL6~Gg8)OYy$j#i zt)q1FcR>Nd| z!4)A~J-uev%)rOH0F;fzuy-0Pz6%v#Qjd1hhjh)w)XJn5!K{{NaLP4*Wk-{jkS#nh z(pI`tWA`V(@gUGNZvbA_5;$cU;f`PPi`S8%tgZ!58>$#R;g(c?t1Q3gjixA}Z9^>mjKunKN3@)I(^c&F(=Uc=0Cc&C>) zh1crjS!lzAXb2m=Pp5KN4w8)Rc+l$6EK}O8L#s)wj~>Tgu$9{cHtCIO8fl$&wx;{w} zdC{hpvawQzBs*kQPDAo7)A*iwC{`~il|p`2UAw{z%ZXsQXQpOVWZ-X;aUat67 z$v1mqvGIOIYcr}Tk6kl!%T#wmtPGE%C50Da`{4|1^phB8w^9;1LvuotG@D~IF=?X?XITz z?zctm|GNIz#Y$hvN>6Wla#?YDd3tHnv`~pBEoH;NFbhUOY75#(mdIUi7C$LM0&0RF zWoJ696^OEDjAFqYi;4p=z`R0m?qVTfRQo0Lf@LH(BNzKRKtmlqXnMbVqN=+3uh8yL z%;RO(mAFnJy?3EAlIV{xe5!j#38Hpy(g~1<)4_P=n(L>%RH2v-gYkuqUnDc?s$BQ} z6(AFYAgR?);SMiuBHaNGcXf5k9_z{YYr@~WqbbOne)Ddr$}fwwZ*V7{cLi+wL--p! zW1hX^{jQi45-u^ea9?_D;>W4wRAboqWpN`i=Fzo4Dk@r%fZ(!UGc~GG42Z<4FrV}Y z(l35W+-!gnyyG|OA=FDL5L_XnQk^Mc#+>_u5Hxil&6`qi$ zbh$+E6**93lv58mDT-2>L&y*lVgg>+AF#PUvYjACF3K&}Pn5s*w9F2QZWhCRRObkwb&qX{-W8>Mg$oEO&?P=Rq?I<-&zg?&Uy6pHvX4e7$tdqCT2 z3FUk482CF>@0oz%h%J>t%1D~(M7Y^B45pvGN9&>}OpwZ7kmUS2O;rD9!ARJ2d36~k z?=sP8{@ff!jafY|Wm1^%ZkBY_W$v41*Dxx!cLBNfsq%^% z9DgXoL-YV|yK#V&N$w-sqkik$K5x5CnH*E0DP_(WYdxl-A@nLny^7j}qicd^bovzk zSKHLuYo&|Z8qzDDb&RelOEdi$UParAo%DE^8Z2_Ggu>DL z&V!vtVi^bc>|Oi8KX$z{1O8zCu6BKZy!2Uz28D_@#f)rVU*V&Tp>0Ge7GG2szzoAV zh7820wHH$XBV_L#!YQtGN=QfOaFK*ZI7c|O~QT(0yN=ogPp%KBQ0NtwkF@xbCuH7pH?q1 z1~72XMxiS!^lO+3f-?$WB@4E3h(p{C0zxJ~0$3Qc_YPOy7^c)c3EfSnQcmVvI>}^m z6f4`gn@&NI$1R+}P(4w*&AAlmEV9suWNjz8H{M^w%?AwA9^j#a@CcUfePvwbHB~OL z7_;w7AHf&&K^W;SGl#eta13HhhEceWdq8erKvUDWI^fAT3Kg4Rxl9y6q?|5e-GY2L zi{FM1=FB7WZHMNE4XlCpxqkhuzTizg!!6nCB=(<)IQLc)bO^IBgkQA&woJlk@|!>c zn7n+2*m)ay3({v>`c2!}JZ&k@mwX#++NlX!^bG~#8&2q=tp+F-yVdc(ag7+tpUVJ zh@7&i2M`+Bw%mxU&*a&HCnP&!nW%2xp7THlwUpRL+e{+7aWAHD5QT1MB8no2+S6XE zrL~AfF@2xJYct{O7&IQgyM+X<6>s)Q5Q5)b$h^l4EJsziygdbhxzFQLna07p6q;9+=$aXnW)F`1A1~ zIeQjV8x6#Yg+#6DDC`qVF;(v)rdj88rz4eXHK&MXY|dRu9TMvZ7kADpB3G%Cd`#^Q zNb~c4!edm;$UP=d$v29@M)_v^Y2heZ;--zGrbY*vAuCF)iM;SXafOv!6e9W;spBFg zULd;0IVb&%53gSy;}|}opD+~PVnR_VoDL18Um*4_X}P92Z0YOJaC{W@tru}y#yXLD@_nx`?tlc;uS~T)*#UzeO~~I} zv}1oZoR5u_OjsPxATq($3+q;p>`+*7Z@#eP<=?kH|6Ih`a{HS#;@#TdEjjGAtsK#4 z##0Jgr@7|fwL?P6V`8LuR44K!WcBG+!v}%j@PH3@|3jOBywmQ2r?KKKnMn2QeYO8}@O zKjbx2-*jT*NxBJ&_y82Ra#mSQxt(|%&a0oUJC^$@7ko=o=!MY|7EC|Pk+#sUp(R1g;yfA)5Uc-Ma5>sTK`d&h(XAA zLon{LiKy9KBjUXLgAkpy5KAlkz;W{^udT<3(?R}Hm9FL2o#3QI%u>_`M^SwEHE>%p zj?~wQ0o-RnoLTCCpRo49n|jpGY}f0ejL3o?b}Hi=sYg@a0OZVpkU78^*!Hr za5zqznj~x&x>ymnx~lYu&S9n!l8VuFv5~?X&(Od`@I|&4I}bX!Hf6DyNm0+)zrDe5 zog;O6i`1~zMN1XRc2~!!WZ_RmXDZ8l0#AnU)SxLm$yU#xpLv6!8NV^5w7(r76oHom z;E24l%@_2F&Mld_ApqIE$mftXlVHShUV+ z#$*`Q7>|U^q8;_Y8h07;oAbzW)Xsq=hX|gT0R7ER5f&m#!rk+P))CH{4zk)a<;KL` zm22Uy=k#-Fn?S#5?V9bIr1pJKJ3j#|0qIf8N#>gxWneP#+;3{$@({77mg_G@$Z~~f zF-y@^6n|m`7k8%Le|PPwx5UsBwPmjZ_HYj#gm!Z+SOVre`Xa+j`E>mVXJ5sdz)kx- zB5E3m{A!43GCd~Cp^5kBB_pkV&7Jfg!E_zceeJAq{`ht=a@TBtKfqf2vpFn%ezv!; zzV~&=&y$N7Z_Sp>x?(Rdke@Bc+vdrS5mt;zU!FySZ|XAM1VqYn(to(?@x1ha*7&=# zeas~Bj1!;#x(;J%Z`_;i>_d;N7Jn!GfT&MSxRI%9K{O8C3V$o;$E>J}7Ng}FnuL0l ziYf?ZJk2zrl2!c?1q-uw{ChTT7JAda$k99D8+~Er;UNyajWT=t~(iNWL_y=c{{>;=iI$G$mC9!ZYK2qx(Xq19t zKX5S*m;rc0Dx6=BdLmXtoWL$@Pe%xGvWIPxsBD|JVR8>`tH8;(3Hu88Lg!!%!N__; zXe*7y^6CIA5UQby27dR8j@ka*k0{-L?Q^CSo&d)urF=Qu4_zSsH6;c(%td6le=yJU8M6E*tY)pDuK7X3z+odl*CW?&JgSMKdD<0cKmo@B{Fc2QO_J|$A-V_{oCa3sF%(_VL zg&NMO!5=v*^`=y`s+}A-3HRX&gj0>KRcisHFi&!UDF||f5Yw5F?DG4lse1Zldr96Z zdtu{zFKSWLl*;~B8}V$t1pz;T7a`2(de3e%)e5o5^xm50)f5P*VU#@Z6 z;No%0JN`nVyk||9?zq6*uX2?h^QhhP<7S{i-a5$;@lpRC%J}pPWdnBSO=AI`&7FwF zP`WSOdU77zqncWWg4&yGo;}E5h61H7U#KAVmYgFhz3~nV`g zM2DAbG(0$2N%g>>I@FKA(-6h;S(w9SS7DM|hnLO9@^2;{+;GQ^&}J$rb@=cc=7z zb+tsyYH8rwvl_T{n7}hVj*6#`ouB{Z74)oi;Pxlt%x{OUELm0({mO*seofua!Zck7 z%Il2*igSI+7CjhnukahG-3P=vjobiZPY@a0`B8SAi$q;H&Ytcv!;qsYHpkjEGw10X z1yi{)MF;X#NV@ZuE=KhlW}47wE}sX9T4sOw`esA{%3CK{sL<10HUT6AubyVQL#LSM zvB!ma!qShZfRIf%e$%}Lb(^YIy$<116mF658Z8nt$mg4z9O@# zV1|XtxC#u(Xo?q=YI9)X1s~N~g|e2~V~q`mv!8X9zYm4u-ur`6QUrWzcOc8mL!;@e z9^DDSdMv@82TESJD~eckJPXmdbHv3lV!v*E#pN_l#-w=nb428?h&Ztf0DE}DLNM!{ zbdg<$^FYLRi&|qRG%qYy_E@+rebWxuMtQrZrh{i(N4PcOYS{;ul8taB~A%UX=dWEFDsg& z=4zC){)A^G`c07zbboEx-9FyM^_J(xPs@^WHTX!Es@8WmyY9!&xPPu9|4QhhtnFV5 zCzENrGcZsfca|C$<%-OlI+j8V5&kW!bUlT~A6;##`n#+)l_}U>ulg70-jVy)Gik#> zWBiSb&53?brwJ1kkOC}pf*p&B6ukID30HO4*4+i)H1S=Du2ly&s0LAMUFO^`X0aC9 z_qd99REI@g?GF?|vFy$q+WxAx>^j=Qm*$cB^Wv19o*JiBy6V$SuBEnPxCY%w{7X}C zL9%R{Cu{yIdW7hm_jNw}G+dnbdNJrSxDuMB!}?aR zj9Rg*SW(s>i52FnphDDj3m|T@r_bU@VFmnO082o$zt1W?w1D7iPEfj|0}B+|u>ATQ zSK?~0Q@h*@x5}M~J)bM{WkD0QqE^*vTGMD5Q%2Y5YctyH%3vk-d{LHU3N)$b=eUm9 zu{y<0sZ-`_e4TIc?Mk=OtISkv@P7|-({L~+5J=24O;HC(;0c@c%eO;=z%xcO z@cyz8_UgsGkL_JpPzxhzv6PvWyV%t|#gy_|9fJd3R=-Yltfm}6&Zc#%l}iair)FzsELlbwiVQzgD9p5d7d5z>u6xPnhQ}jq@dq(xFS- zw3~5zv(vMXFINLp#^tJC^P7IlpYpq42ntEu6iG#?Qfh868c;|Q1Rc~Vdo{0KX;s>d zZbNfBpM})N_QK02bgtuvYO?O^xtV$Q7&_o5#j7Z=s8`KimO5|%_V(JP%k*(8eEZ;o zpDgmq`|mVWtGy5WvW{-M4WS$_6I2a?itocdIPyVWz8mTEEU%HAA#?yA#E~CxCU?&k z=0XvUmctz(#JAN#IbBJ!t=;T2dsEX>v&}(s4wh;$y_9yq0#B{2>|UXj_Xb&xmUD%C zp)ymQY0NZdIx|x<{nQ{eO3kOnsio8oyb}jBD}XiJX)o{*KoN;#Gr3oQZb`kF?UOCQ zzlEl$H$-4Nx>cI+6MDR3UDpk)=uTZBs+JDD^n`E{^wAHeCHavz znP%ACqX^+CtuPy15?v6c{KXVP$2B9mosP{t`%jR^p992Hk1N@$MlmeGGSK4JVMWsH*do4z5gcm_^9t!QT4=Ms@fg5N62$Me^OI@k$WCENIN;BP@(LYHUrRc z;Z&H)q_U|(u9&N7b*&||1vWm%7XvYia;PpgL|Vv?Q`qKf_1E&$MNM6X-Md5y>e6od z`hJ?U*O&2AygYw|mk%!U@=^WdPF}vybWZW|!b$Uz&Di`bSd-%7a-x*cbNTe-zC;}l zQWO<}!QZIhJkb!w)ui3II+~>ASl#@JBxDO>+UBJ(uO4vMRG|@#ytx!ov^&Qclp4ME zXovV~+ISk(+ZM`Bq1vJ)@ux}~8l*sq)m^|#Kjr=uqH2a!(AHu<%oZAjsdl#=<6kaN zg2^fPaj z-}fNnPY%S#zu~?E$ZyQ;i}Qg-+oO~~sr9Go-TJs6^Ct&`E181G!t$Y~os4 zz$+KG*6m%z8*6zn8=llu9r^z0)2+(hC2}>JgWN8^Ec&srQT^xwR+aHWyI|&KJU}kN*dc}nsd~i)^mGaZ`$me z3dK6GZaj@}7=_tJjwuh-#N8(8T^&wbkt89VS|R@E0lIf@ZRkCtD+vz12IqwZSqQQs z(fJ!aA|Vkb-%>GJ+qu42M#Y^)`{Xgbd^m-SA`-vE%g=mL{_WqbwcD|d@0(z^q*fYd z=5rQxyycXmIG07kj=V5s@M>*-zKC2GLu#;E?qizjMF7O8PrebAA$;h*DoHg&9H1r6 zsVfUnSm+*tDS1eICe6!l{~Z=fL#C_!Inyf<*91n~=a#wSvBpNZWftc!Ju668uZ=sn zi~D$HxIAPU8=-MC*UGm#(^J#kQm;glGR0t zxKz(1Q98;Nb47+qGgQ-*z-)$MDwM5NYSkKrGgMvlv{28~vy7>gOO<$;t6>DQ!pU6Y znbA_}c1qTWWd>%OSZuheOCR`zhvi@n<9P@L)Nv#I(DQp0EilcxsRDw#62lHz5^FpD z%|kxA?`BVYKv3@Y(dQk69K$dpgaDy(Fkgr1AJ4%|%P`j=x5%w?x5e`X%#@)kUCr0> z6p)Qa3nL)dD=^G6V6rCh1y z>bY*K*YaRL8%yiUJXu@a&BrD=2dHkLglMzfW>lTu5>Q4W4{tomS*xLlW!pD+-p$C!G;bjfp;Bnn~`&6&^2|4KIa@|xx{=N5n zTKW@S{)!*>@ns=z{}trzM6Z2XP-%0TxY?LSVHuGki8iJoT12Ah1_*jcp+s7GS#R2= z;toHvV1pKc1VPBPq>9H2!tN{7?B!%AC{ssTE#l?(zrN05j(PSah^fy)%&9}v zIg)s$ukspCmF>J=sKz|GRcyze8E^(~XohZ7n$>2b&}OKS?o;+z5Q?lq>|y(ot--eZ zkSqh<9P4PDSe|}>&g%-Z{&c-gcN-*8Pw)&;s$Ya>FrInHKQVIW_Is$u%gs|Q*9V53 zyZ#R>@3A+(nR0*c>`x$WKFjUr&f^Yq$GKN=4>6n0jECd7u?X(fPzq|nD3}HNh;zg( zcm>)ebFg+^?Sk6j+Ofj%!pXv`3IU@=GlSW|j$J! zKja*Cj+}q|{1eWr9NPruM2jx8gRmS_g0|k#HR^`!IM?#h4r+qIn5w6VGfq5`bsDp3 z?{>!w#~}kclSp+A1fm?fneZ(7{uc8MyreAk`w{6aEv3`q?1C=pp+6o^PY(uzJkO_S z8KEoi>8u!8yqMQ_E~{!_9OkO zoRrXt)y#^zdEC zD@J+$3jjjhRG<)dLo-tJ)0Y4n{TyPj3b|*@t#Hj4eLA#a4y#T4C&)Q{Zf~JtgYkKEsJ8{X{70wZRwhGzoA0+xO1Q)Gz77fjj8_Mnp$(K~&4TeeDb}Nh` zJLMU|-h+@!714FPb!kfQuj=L8$gX6vIGye7BeE&+#sh&fEaCiI8oOao&k+%RINkNg zzL~`apwS5>97AVBu6??SRC?(BwM zhyk^pS?X7Bs#FH$@@!2~(+nTJ$h`o3-eyQ5LHX=_1?*eO;fa%WG*$}sQhti+yHxLU zSbvqKvVtF7b%QmY(GMz!;;y6OG4TAWr~U{=j`Y zdL0_THC@+LJFLz1xE;yKKNtr+*U<5&d?i-hJ7I^N{f9ikw}h4eMz?A0ivaHv zVS!jP71(jRVJA6RnF!32Dl;uezROsY(mh{wGL=#u(& zj?(jW`uSNVsw~R9{9muW!bE+_NekfT|DAh=W@hm9Ja;hmaoX`9>9m=`)V+|}I3LCw zhcX{}<_{uQh6XW%(?q~xGEaTmfLy#;7^7J>OlS>Np2pjN;$U?3!UDbmUv}@k@B9G% zQzs`8>B4q<(Y@%%U8wiWcm9N;S!yQ)cKROJNoML}TrqMc46LrTwAfC3Q7D>Ct68VV zZ|SH!*N63%`M7hgN`uF*U*p)0;qcvfw@ds+?=t*qTu5vEuAagvdrXL=N_&J*Qk7S~ z`W>0ja`?&_-8$;)kzF-BWP7=tvc&6>EAiJZ#&tOdaQI6MhlhZ4I*I1wa1I|UB|<2~ zl@VSNBnmUBECH0TF{zAiKX%7PJ-?kt#j1p&{_)L+gIn&Bkf5Zi_-1^^9=Vl1Q7UVP z(!i~TwX+3W_h*1<3cz(cvG(+2 z0k|L$EYV6?X{%VH-3X0ZyVNPg^$ZT!pvM>ZYOB_27CW?_vFb^1Tb9y90xx_*2$kQ! z2)ALfpbzn6*s^%F(CZaEJ8-cZxpnu@DLlq_{3cYC_&|08`Z+Bqq@M(5Tzy3$3aY)) z(>D6I_uUuUJ_~dC9}v4^pN=LS3REML44L&I*~B3p@d>sq1r}Wg zs+cA8M&0)P>4gRAy!U5F+G}!;KchR}{|?268Q4?Lb6=s=L(XTP7?gAt zDW2{bp6NM2D&knB03S0oOQWCa7Z?luW`BtGt1#O20dfFL7bq{u`1tKjw zSc1D@u~15M+zB9^Fr!4<7*nLatv8&4_sQ8`mIANZEs07#Lt80wxy+sUNX$B-bb%Q< zH9gb}Nx4MXl}0vBJF}uF^=ncC8#+-gAalGtO*Yg+#Kk+O(y5?5P(;XE!;^S-sAH8Z zd%6+nK}grj2Tt+=a3de`$_N;0DcYTx%)`~H@C`_}uuOgBBecJ& z>8g9rJ?D4M`TfpkL)Zsr4bAL!x6_5gkRV-8sKi1|)X6#pr|3-L)3^!|saCD`djr~A zX|==B{_}0V>XM~%((4X$oAGH0g%pHHt9bB5;qdUKKYww5A6=t7d4 z2V)`h@!%QG9gjH!WfA^zCuYgSU!#%Ou#uX*$h~SJ_T7_GsmvW;#cQJCHTgf-e-+7! ztjD%u9|-Hc%Lzbk;03@i7&brkg^U3;TV|8oQ!7Bp9yO4fg$QLm+@z41Nj|%$3bSU4}Pmke!TtrxBMNrSw?NeaMYA}(S()|-0_dDrkftc zVK0hZ&F2gwtLquVi2VCsVE_InC!L{ZVcf)yKGp`<&raF3+4^jAwlyQ(vOv0i*gzoB zb`?)aDk&we6qM#lYh|=DUNPY?O&0UT`gCKuwYt3u^}+;(D;PXsxOg^IzzABz%QIQ_ z#d+aO>c*c+jo|y+j-DTt{-M6U9dN!<`_c<6`;e#YID-U=Q|uf3foa)2zJ@a?Ra{W4 zn9BlZC6j5r=hha!%_YqM-W+%2BVn+sBdy=WjxB+BBfwdnbP*9Q5P$7j$L<;7d{h^& z7vw~2EEijg?Z%E{m!nBdtF_fmy$jXvHz9o{)IuAoPqlu%583g&zuJeKpW^TtgVtE3 zTy3v(=m^f`*9y!M(N)h*5q&Pqk55W-&sPYFZZ*mSL+Z;vl>35VPZR18aBzPj66D_>M zS&Y+x8{uGo+Qk`gG3vfF3$4PCAMqqfXmqjVw{aFU9!*$3O465a_jKdl{!xfsdwwJ< z>K&&rI+KR9DT>oi^V?a;iMy#@Lbj!(JhvT&=NIs2{nuZ27J^_Krj~GKiac;vFl2)g zAbnr+c>Ku}VO=s6O;C{Ez;99F@E=_a_7ftnYS(l#A@hWvi(@3r!Ei!`=r?Gwd7=r9 znAofa%92g(Ocm|z+Rn&A%?I552Vg?)a<6IoY3nYRd%+vg{EBy@7|&h2o((ZDV9B>) z@mLC|xSMPJvJabutR)6aolN@tg6Z zbtUg;H0Z;7BJT=Z1Nep?TnMy>!hQZ-xlN?VTZM=5<5yQ!Hd_cyS5cr((Ls|hAE1$W zcQ_o97z#Xrw=i}{V2`*R+mFpc%(J?=v9?&--rU*T-3$@0(@L}&t!8U=XKiP9vA2Nt zr??K#6YI4GKFi~3b@03rpc>}?XAkyCL1ZbZgc{GXje?K)2Pw`5mAR~{Ym#dDwj(}T z@8*UAN0;)fVTwUXHsw+- zBZ|T3k?e|6!ZA@?lr*v2L4F)>ylt~GyOp16uRKW&1}8PEw{Y%UjU9!EV#$F`Co@T? zbK)I)vzRC)i1~O+v9m_$6sVLliEM(@4~X6fwlHG%p5ctPS7C(OI{A1@KHn#g3n&W6cKnWTl)}klE?PlvV#dpkrhXAm14e>uhwhzm3%utNDLFuD}ds0v(chGQII^>2R6%zuzKX5 z?pR3N2K5zr_D&ya>`6-8RyASwh+!cI`{*YW&zZcyn7mwBEC{gK+mo#e2Oj4dahq{m zBq4luEmX*U=+_vHr!xkc;g8x!Y08XxUQnBR3vYvkarxcQuab2Kwi3p7I`oKv76465 zLjU)mEkcqu+qFw}*>2e@Hr|#ubMZVWLYo+`z?2&g1(~9Kj`jO??uNL#^Q~b%KaA7-8cLuIQMR}MSr^QO?JHd)oXZRK z=Bm&2G%gM)I`>&ezCC;xZV*GbLMlj8GYMS=7i| ze=;hRCTSXG?Enw{f8sa?`o-DOXQZ;tY%ALv^+&U8=rc&}S*zB-4wvv+#%oTI);NU! zcg&T-&i#^Qe#X8|J@McfGA+d`=vm)NssT>?90B9ixrj_(FiGBFX4nL~_H2Nkw5OA# z8A!#H<5q3Quq-z%vvk6bdw~-8e;;7aC$YVK)Id#~a2sJPH-GYD>>K}(XhMQW>jw~; zX#JL*3|S8h>ZFaaoM{m<0nfCD%%X&yj+*~A3hXS}#|x~bM6W(v$LY=5QBJ3Tt>?TF3}_p&&rD1ejZ+&bSj+cOa5O4$R?f(mJ~cmn=0sY5 z<0tcTByr@s_9v!{% z1&00T^xD@ijO-knZ6OcYNR(yG$O#PX;3pnZ1yn+)l#Pe;(s11WaqQpWlp^%Rwr1%T zG!U{@&MK-UmBf1w92V(Yt7$c-=G8iO1B6eA69#-bXb`GmyTbYAS*1hxzL!0+=g4Cb zHh3)X*zNM%$3AUJE731Bm2(uAc#JLo;DaB0cpGUb zJqY>#4yhskKDK?(J60nln59@aAX_2hWtkrm7c09mR9=>p?&nhAu=ep)P4Y!q)OMdd z!3o?;FF6yJ_Z0nUGA*)nkVv6>2!CDsos=hM5%%4{OZ$JZe@S>%MZB7W*hZx8u^5ZB z49S#yD=sA^G1S>aLcC=4guf*d zDCi`UtsahFqSe=D7Jf{19&mDgL)}chh$aYsJ$q(t=8Rza!o*Ev44O`-8bQ&d>8h{N zOfT-myURM`4`VRtWi_@LyA-<>&CK84+Swu#f7Ux29a8_^-PxnOwaxX##wMI#?+msk zDI&PdQ=#PZ=!6uL=8)C0AuRw!1eAs>TBOS53)lPuZDW(8DT z6Gh!OEOAThGnSy}Rstjfid^N@$Bv{*c^AJ-P>V{&vnfH5T^?zsW12yM7FeG$P08Zi zK;~#{eY}-aSSpccB~i6fn$BCC%rFVlI!?w^a}sx)l+KZ>IBTSr z@D|L+!HBg>7~E^IC!y~E?LoSjDRzq8BJex)-fTD<&8D-}{#u`hdXJZcOgu-zQmUE) zINMDT)mm@TpNwYX*?h1%fD~TCr;lW-7~7t8?e2M92rO*%oK-E)*WQ*N{C1lL3-qLcvHZx zRS;+aiPV>&zK6PK72&V0<7kC0Mj2^}`!E00P%I2_Xf~JPg9`p!BI!;3IQDNzO>+}x zCKqE@V>e^3#2$s-2x*S?^v)}H0R71qjxHWuKe%zg0vF`T{g>{Od0B_oj;`G}xOu=T zjd1E=(J47sE?vC@I}EMnmtT5H{4Dmjk6(O(pRrNh^R&4cfxQ7402@Yb%J$ znhz2Qs;f&S4qiS0_rT>PVyScC_Qx0;`-Ym-0>fe+;yC#M3P~cB5mdvF)B=9fiind! z$xu{*qJ9D`?PcW89uLPC{ zN|B2f#hhx$mVUpYuyi?>jmx!W)<%A!FqMkC2#NJ{Kc^=v?*Yt*q)y_~aGeePMDO~e z*dN6TVcx!}`$Aku3MnBgk1@4PIxv1xxcS&vzWt%VnylPjM~O zkdbm31D7q4V_c~lt_N`Fz85RSwqpmehh*Oz6bG)!pgOG1s`Ki4b)zZ~=Czj9vyiJp zhZZNhWyntE<<;_Tb}y>~p9gyfQs-9{&Cr3+ztU>&9PIAz9q^~5*QJwqe*Rb(c&qQY zEX$X9gcV`vStWroNu{Cb0wdMU9+gVtWw~~pp0GGle9}*HIo-whEudJjlF8sd`*|VH z%YDbtJ>5%5$h7_XLCtJFMRkfRq719C)GF!;NWQ2ec3l+MKpLYi#WkLm$#^)jR)0Za zhJ`b@NCSgyM1hRjQh-yR_X;5w!fSXT(~w0R!i$b<{8xY}3wd3))~k`~foWK|Y@UW< zha}3$M97gP8E43YiI5gl_+fbO$&#F}a=-7xPMX@1kEl#d%R84olgSY1n&nDSDf7)9GCa$rrF4EF@1HA`)JpZVkTb$Jkc|*jugQ({T`cci0DUV_q!N z%C<_aatlt$bZxe_zP7QpHQ%1^t{tqIq3%saav}*jZE{kQ;KlUW_I!I|f3d$i-J3!H zK@M`$smxN647BL8NBz9FhjPL@7{Teu37MIvw2EKOv{gv0M}{k!U)sLyY&)+x+uOgx8WvA0tRV^p=a}{}#rF;!Rt47q)vi5g){`paa!+A#)5lmU}q(oCilc9WZ>R;gR+m0;~Qwinxn z+eh0M*DkGj5Q%GzI%zIJB;KsRydI#EC)gyLt>&ujVz1Z-iT7l0vOhVR98WGyAbFE3 zoqDI$S?Lfu0+|90bDfA0{Zt2240Y#36%k2hMv=1Y#50|&a=5f@(8|XHCS@Yd#Hf_z z@|nzU-f=$h-lA137Vi|-iBBS;X(#KvMR650lx*c9Hn9ysWaHu-eN)jt^~psZ-`aQs zGILa+xE$P30^h>EiAr)nsO^M(rv0eXWP<{)maeDU=}x+z9;Bf+sjn_J7F%oEYkO<^ zYXqlemO?y`qe00B$lIM>w>Rs}d*Gh*_1%r##p?FzPH(?QDC#)-#Th>h+@xhO>z(UB zOS0NebxebBw$dc(#(hd~7hl}ybR@mb)OtzV(#o7GNQVv^C#5`}rulp-g_kUyZomKi z)}>3A3aX0ZtA`3S%BLhVwZgGo4fQ1m2Wk z^ujbYBWe+lFF-)fz#^uMw6UdaYe(9#cH`d7doO4Yw1>iDLC5P?z&;B{>~5!buzz^# z=4}oZ692B*XyIq?diaRwHr%t!lfjvWK;-^mbji>1`Tj}m@I*(0ox=pXict{=LX&%* ziD-`PG8)67y0d3bq)t-X;kLPLht@PT?lA#zi4MbX*(3n(_gbE8m=G6=zG|_$JrRVa z`D>Qb<7h?#eq8FlSrmM^T$LTMYVL8WCHDkdx@darI*-GrI6^&&Z^X^CYKV@0>{6T& zH#oLXH)I(53@HQAQ~w!E8jRTagUGYKKji2G-P*+oK_DcAQlr|amFwkJd8OPf_sTl> zh2eBGosH*X;umx%=7ie9d>jf^2@uuQOO4V>tzGL0ePNUyr>Dc&5PI(6+*=rx5n46m zTtCBjT|!Pdnd||qoY1!p$gC~ARo_mg{1>(*>;>e07;0KV^^NP6l_y`HK*$_?m!~z!5e%Ha7$_vbQRsYI@rFBUT8Os!dOHYzKXcE8sjc19hQ ztX#~aJSe>YSrJyQkS*m(g-kJ1#qq3BX;#{mPG!&d`V zPNy$rGS()9u>GBURjMYPL_-+bi_dg;OT26$W{1Omfgcb$ac?{}z4T;a`o3SCOfC?* zd`d%BYC6p5HcFVwS^(&H6XZ%6yRk!@Azq8U5ytsUC0i*}iWP|Q!}(}F9nZ!)t=-na z{BV9ezc4q6s{m^6Wpo8WYqO9{r_$McF5k&_^P{8j(K>kL{BC}4b~wARe{r8IzA%wzIx>vd!{XS+VzYIbvPINDJ%&7oB-p5{Qyb>z=c0ZTTV(%Ba;ynH7z1V zVfZ~ndq(c{pU)?grYI!fp2!r3)@{XRltQ95#J4Xo1#-%3t9)+DM>L z1ECL^Ni$1v0o={x=h$H7WA8J}-%2KrO|xs3+RYrSs}W&CHkQR_y^UA(<=ATxi&nmL z^@XbsuD*Pg9AlYE(J4?3l1T~8VQUCdvGw7`@W!>9*B)Pcat$UW6dpc!*OJ7^W%))O zI>^0l9~&ZetuH=$EG>Be<9B2LkhQEVpM?G6vrWY1QlIH0yt9Qy+4~ zepen^7R~ckhIe_1;dZT;M7**#EA$nSOS7DA%5tT}Gl&)VnZe6^A9tC|ucJm(+bJpx`vCc+mFVxJ0k zsIk@D+L`apSqb|RX!W)7E7+oY`9*#+fA!|Io3{_{91ve(*B6ZiNNYBmTg~0(UQ<-? zZp9+#vC?UGCY@>L=-~L^(uKI9z?-Qd#_&BE|ct4~ps1RV2KL6`aLw#DgshR>mRUd-7#&&hIJbXZ~46)Y<& zW&C1wIK&&%@0ZKAGYPi*!D8{~`z!cYjr+fM{VT4i2Bw*{Ro;efO=aDh|-WlM%!jtHbG zWfD93g+XDQnPh<31Z0S~AJ9u7yN{uZf653AE9k}ZJ9<73UZPJjE0G+(SL^QBpryr? zjFawX91|HS)L0qpE5|L+p=f%p*J|m3s%tF)iLAqVqc>Q~;aRnbKK8N7YI-eU*iHs} z?ov1hpoD*v@GErjRgKqlKCIsWPx~#5zq@2-K;lgLb}$`|5FnvdU?H0*JNR(uGY*n4 z2={qb*4YpAa}Q-UsBuk!77w)}{W3@0e1bRW=^L-#FL}oPX4|dZvb`G3=fZ0)h8&Q8 zb25eD7TJ{)FjiH;NSKKfToA2MY5*s%j*qHFa&1Dz$F3N72l;xbUM{YU4SnSQ6r(Zj221A?W$d~n|8b0DFfgqWJ@yJ7k`7=!nrsb zN=%mJ>E(nS5^$F@M)02i&GN6VtOT3FD$+Dt)J1CO96kO1qhn`6DTS;5WaNzT%^Ago zz*Sw@P%J?Pul|aQko%hIAV>In7=!p5@GZo_fI0pcpRoXM+$j!TL^xP)O;@J#_G){h zy=X%Z!b@b5nQSJPDW@ta7%T%vE6h{CN5sWI48g}JHG+x?D3e>W&a7J-)P}uDZ@Rv^ zzP7Sh0k5lIC*lTxKA5u}gXtCnmNn5r)eow4S838iG9+aVk8k&jG@A$QW|Aa_@50Ybpf-%&MMW_esxIgLd$mH`8+b*pKyfMj z(07BA+9~vCeY}6usIM4Zcfb#?J0LGbq>x4qBdM=7Rl zRLhoUmJ!tRQNi%7+U|ekbe2l!h{Cp%l^ogE%!Fw<9URU?Q`a;-Jg+|>_3YDJBsZiTNUJ))vJzEb6poXu%9CKvoQB)D|QfjF;o(N;}+lCl&>6KJ-mB(?-05~Hl1eD zd|F5Y0dFn6o+cb{`PQA=cOKn&@eZ8Bu+7x7 zw8%>uJ~l}m!yc`*+Uaa>3yLCJoG5EbQqOq`VpP^ju@0{p8cwg^;zf>P7%3%_BZ;nM zdN~YeTvHNNL6ZtF)DS;ICXZigsT>8@NVUAe)G*n+Wr$oyRTWvE=(?jx?|nUGTctLc z!$4+B$}s`k7tG}o2qP%$%0f;C9lOsGOEY!HaMCze&6C=p0GaL#c+*=OTibg(dk3qB zt4E9D#r3TlTLhg}z%eYx&Ol44nyqC=c#V%QUbuMq;+2axF5bKdwL^Qpx4O5nx4{3C z-M#(U!3?yuFfTGSO!s>O#uHr${}`6U!jSHrwqJ;>K;#%rDr zdB$(*jKM^F)&!FPZ~+z_#!zgvfEgk_E4jG#va@q`#y@%ZN3K49MZWAAcf?1YT~+&T z9D5(>-MquezW>Axw8zPWd%Ty52M^-etXJG-S<18oi=!$*bPb395&Jf_gorl+{1>Pq zhtq_XW8A^xIQT_Gv2(8B96|oXAK%#8y8q{890wYYzKu4}0YV?3xYxc+YK4DE=KD=z z7jVvhH(UYy$#?hn_7C@u_Q7f2nBJU%)7E#kcaM)QT)%q*r<helfq*v$`ou>;_nXb@| zEK%#sf*Mm}dW%}noUUkZ2-{jk(=?MROJ1vX&AGjWut?*j4 zJdVvjBz*5=q?CnIyBV%yBW3_T)_9!fA<~Zqf~^CcA8=fmVbeP68AM@%%@!Fmh8CF{n@F^OI7kg!0SU zGR!Y7k}DxjV!hBZt-uaI5ffS_G%BEjn{v`l&di(GGXEJqJz+V05?yS8z^Zq|Av|F_ zgZ?SZbi(g{;9CXT6@ zvMRDkDqli%1la_BDCV2(w8$FqK6Kkmy=JdOQuj6WDXT{a+;)1zjLD3 zsMIRJ5F~L>(q&y0HIkt^qAQX(=!uTxXtFMYjLtPZv$Zl=Ar#Y2yGucsgA8yGUY4CW zMd3_|)sHSJBb=m6_ynDd!EquASokU(*`|gl;SuL#3mZzq#0i9|P@b}d`CZJMDR)5a;{FHzig5RA|G1M@#f zI@0C3q9R4ZD-+_qKaI7ql`lIomm}Vg?3q4>Oi7chO?o}EzRIq#o9q_5!|t*nMj&!! zD{g^DEiHM+%UOX*f)v)Ib!m&+=60z)$}$??#GrpUvDJP4UR;iOIOBYy@|JLNbzxXm zcokHyg=pD-SD4Ow__lcTp7%JfzN*N)tYmbX6&Nlra`^F=;0hpGM%5KtdoSq6t4|UM zJ@9Gn_hi#DP180#)NUj1vMAZurLmNRLq2|};Pv9IqGGFp`BB?0n5Jdz=_x&9@Nfxg z!p8~u1~_$%Z0jg?C3Y*COajH$VTmDhR3V`z>iI@~m>=Z{9d*zjk_&G%8?6)RNQ3xX zd`yUU72d8HafI`LdMS=NAs-3D>N0x4uR_+%eOd{416izRaMbOxVVHs}7X=KUnHp^f zA`1X$KGcS48>us8E)>!JH?jF?`U8>nvHSEKhyQ?12ef?8^5wYw0bY`2!CX@X+0b7% zks*qbmDyleN@pd8ku);%x0ON3ZYyu+R@QT0#sN<-z8d$Lz)SnO7cgGH`vFFsZbAwB zDLV1J|B76FszP*|p(1TRsZN}c$TdW((ZHwE%gZc_2Vjpv zgy{c$OUl3EY&o)45i1|Z0WU{;67_SeC-m_lHsv=oG2oQlxa#Wm|7YyWVTyuEFIbynZvrPG{cgs9^n;h&_fq>-zq9UL%GdPYduf)yw!^u>1j z*>)$F)3m?;d5Cp9Y~%hr-g~J?MdWQCKc6JDl|aW1ux!z+EKoTxRZE4D3P5LfPfFHJ zs|0Zi?wlm9VoSXH8!vd*t48TNNv^S+9@J=NgHCX&NAf23D4PIF!5Zw^U@`!cnn`tnPS(%)#Y`zv zE>sExAYYg(^a^U6W;SM;>yuNoi!%-UdS7}m)qx>|+moHH;|62&Qv`$%&s7Xd1I5$x z`SP`5CR1RWHJbjtb8vOgL;QiJy_Ds7E}2X^cxCm#EHm^F4G{k5iKd2g{9p0?I{7f9 z71`xm;+t;{1dg*fO>oQ@bMfbiZzt@S@l=kHdbxxG7E@wn)>fd~m8LRO9%q_>g^o4y zU7S_Btm_1RkOKZwPzZ{0Sypfu2YZ~*HAF!|mQ64wQ*PFA)$3THV47Ka@#~M)?d4?F zNdFrn!={*DSzcZSX{Ob4=jQWx=Y9BFhRZx#sf?!|)7S`VyWfueX_{2IZpOSChGM3t z6DmA`;h2t{_ws%|l?QnrV4>8V87J!3ffz%knyo^4l^j#kw|po^rgA|JqKuhbBPZ$c zP={k3MWQAENk{xTO*+8o{s>xYUj{e1lfQru*L&X6eWCl*e|b;nI21Wk6twyQ_u_(-6gQA#VPmj-A&VWP z-EOyEOXqAq^SI%0+)YWn$#F<8A(iI^l`AHVB+i#9#x}#*uoL#e2wj_yAcnN1>RfG(s4KU-5JG(&pJxzB_2N=d zgmTh>poBd2zV#CCSxGq9aN)yTelX3>|1VJJd|mR^H^uzzm^cx9kZBw0$;{ zvPx?_lhlki>&SZLCXS0N^Cn7(45ga?NU{GpYN2_YuPdZ~^vC$VT6ia1P25a8ns_qt zOyYLp{Sofs?cSCCm8961G=g%vT*`!*#rdWA$^82KrJc(=5AQs(^VrjmKmF9LXKp=v z=k}eKo_PNgHb8G5S$}jrDK{q71a!9XX-4R%ou}?RedpObx9+^~ z*o%*WU~Vbp2gO2ZVZ3^cxTpro z$L~jF&}kLs9_*PDg$(>C@@=D|CU3PQDiihm%80X@q8xJFO|&vpBLY6&^oIrJY#XH z&X}AaYjxego>G-1HB;gQkzN->Ft#t{zSe3b2v_5;2tLFib#a(zupjH-($gpzv}7IH6;om$ zX2qOX6ibz6CDNG%+hgJ6wO*YeM$%F;YQ_~jAtyQpuhQ|!l(6xn^48}PHl z4-+#uw%tn{;OO>R;@2W{Bp_RWfmPx|Quo~D_xSVt0k5!)ug+P`iCZ{ZJc|Qdf_^(x zFRKrz*VL%e57nx}a~ID64!uio!~6QaWgr8zmmu@V?dSIk`=HA{*YEWq%G*`WDp!;v*+du?Zp&;)R@jja+kNksRn zkT1r%U&D8k#W`MLKCThPD@>MrbmH;cTRY(6cVQzxDcXQUl4RM|bSaG#^Oks?}`Gw+5|oeWAWwUMWvbt)Dtw-Y!FLz#{1~tFS(sVl!-(B_&!y zZi>bStgTLVw$CtAasY_|c#_?X?t6nfo}6*t8r}=6$Ln5!krLb_lwC=e4E?qSSs-`N zQC-zyxQ?kVeEjoIY;WV$+ueO~)NC3)V>4U2WPK&X39)J*#xrzXbkb1YG)&GDgsT@_LN>q(%X>F~m&1qw9 zf!ms#o_Mjw2IXQ88bUk2HXY0nn}gbYOL?Li5&6`U>ke#|Sk!3h0JO8Fv4^7@TacN( zS!$Iu^^P)VR^!O&S-)*jZ4W;0X} z=YEyu4V^)rru(6AT1~0&k=JAG5uNa(0dojhavfXbS`?Xd`(474GkJ?o;tT;{#PGwcz zxHry|SrrlK71|xahO@OSiFqQuuI+IyfoY(F1rG{qkNVRm6-Bb9d7(B*J^dfIcVqTEzTJ^#X* zELd6x`g0*8_*$OD>~}La3fWkN8=#hyGKdZ-VVj>A3qPx>s);skIN*dLt!+3xC8`() zg(B9A&7@DHa%?jpN~M_rGqL#s24JBcHZ`njNJXEAokc4Pv4qAB^rpT`8deM$RN~Oj zetaxVDV)jc;=4_ln=Q;YU9QwWJ0hxHX~!t|&PosCqWE^#*;IfOS^l{ciqN0A#TmQv zII>O0;--2lwV(&OL<^>0mAn%X8wb0Gf;w{R?5M5f!i`OA55n%mQb|!aYi#JI_4t@N zhaOPOK_16$GLvjRAa&>5u^#gNl}`|1(%r|M)TbhA8hy(x>HgS{+HW4{c zOkn2i2gAX%mMy#>3o12GeEYG^P41VqwYIgk&9NFk`U`+j)7`02DTIuo^qE1va1b;z z#)?s+dc6)f>RBoBxQ-zI$*V(s*YY|1$=$DiUu10fi z?dBR+)vJ4FA!wjP0>!Q$Biih>H|#Z*96ZaK9%Ig=yo=LT%1Xm<6f&C zYYK5d`FN6tL7mie!;r}^^>Mo6;51U8I~oQVqs~tL+LnB0VYd5JIA+rTrX6Yu;M(e7na?Vb3 zcKc@0R3#<+l`m4?m=uL94mVzH9IGK(Er>_<{`jet7gdwq*M38goy|*7k*OrD9!zcou=~x6zI(JHFHFY;KQ1w#e-FI!uHOK$pA86S^H{pXqfeo{!!UNTFrs6gvvA zSS((A#OttdlWiN1=Z$(Q-WtLqurABcxjjdfEjz+0tx?SNWcmUUzNgyCDK`fzWGRKJ zag5f=DJB3huT^^^`INlvtXNNIFu?W;kaB&C<#s-!umiN9;l4-96nNwSgJ|HMG-i2H ziYsdOK47SIv{SS$-%`ZueR{iZ@d|IGK|^sNi(Ov<)$lgO56L<%+`rx-2}wy(5fBh# zG!myTTedqZM7YqCnYRaKw5ei`FgzuHL{esTDcp0FEIX%N8$Ce6byp9g0KHO z;G|775numTsr;BqLwq^V4zm(qx``5HN;%-}%y@z#ER}F+NXB$ZzzBSf__2Z6S2(;I zg*+pMmk$eeUD;Gt-(AGr>+!)1^Ca^=Q|<~)VS&RUT+UCq_>-M&j5m@XTEd5<6=PAlo z_z@`J?-uk=^5j*!hBL+VGH#no9OK9)pq=b;%iK5R;Xn3&in+Sb>JGmj+#P0Ti_tk^ zGCD^v#hbVGk;vdqlu(9e&9KsBNhQYz`|5*P#Bbw|@N;G|E3-Qaa>wHT3ho#r;I&^t z!VAxV6Bq9P3vvRhf@PXvs@DiQm`A*3PSZG@!{K|B=e*kfw|&0d<>(FLJZu-pEP0{u z5+RA%0X~$4u8G!})|oE*GWT5f1n-INN_(xN-PIc|P6Tx1E{G(WP1wCf@NDCnR|Uif zyptkmv`Y^7VDM+Twsb4Fy!SUJb~hgG=e0MMAPo!9Q6#9c682&SCK-J_wXb|#0%SJZ zy@@L~>o48`%ZLKX)8guMG89v$?b%l;IsKZfDkKAf>8k{l;e4R@;$OkPr?H%znKfPl zgjVeX%Ie0GCq0#VDIHE|2m!tPriq-r_hq&9v*Q zHQ>~O@W=!NAX-6-ptHG);O6qZTaU5XQ>(SQ@BG}Sf*6@^(!)c5lf|cg z_*|M8q<6JNz+ZK1zeWs;JHhX8t#`0wNZ>_SfJdpH;!T4>!2|>6%ya}LBWsGF8oajq zd1tCuBdVt}c<}8g8iybm>-AyVC`u!IvQwFDz~55TXLM6Rh?a(%<%HI0!xk0VN$hA% zISYu&3koK76EiGSCX6ls$>tkVUG5-dI10=x3StJh^NTSpkl`$Z`S!WUIAi`G#)@iV zjxtv9o&{d|*O|!ctKX`6W0Qf=Lt8US#bpp#|LQcK%^Qz#D6H3t-*Aw-;u13L(_&7Y z{Ovf!ZQRa2(TicmOf<>^v#p-$$_oF=eF*2G4lTS8ab)5<;X$RM>1(>#qq+XLuas)Q{?hBCk^yujFq zTi`zAHRLR3yc6+M81%e9Q+Hi#IQZnUN2{Lg(4Pl09DoQay+ENxLd0AyI(0$=qY^>| zMHNX!sy09?t^O-AA3jr`w2GL1(!X1Mpbf)nZP$%CCu(IR^Ir2F7eDXVBPm)*RelpB z_}52->gB^S>xHtJJs{3YwoF&qpX@7cn|5o=jB}^9#(@ORFRPLlsNmIdrR1ZA5z7eBkEH$@QYV6v z>~oZh!ITA0M5fMYZFPCsz24jnO{C(szV$x-R8oJLJ=vWzEFO`PJItBxB5Ora&O4p8 znX<{va#Cb+bt{~yJ@w7hoa+HnDW5O3j9EiV!?~vx(xyy ziG^ctS^#6Ec?C3HD_xLCKXz6UtnAIUPE<^Q;8AE@Vs zjW^S9r7^~55$VHDJeNRsJN23D%?VYl^z)yhE%Va@ZIVElm5IG8I5kpU%k?TJvm6K|ENW811mZZ|110qMz=n`mp2ZrAh z2_)8vGg;!Wp;$SMFy1Lf6q@ixX={h-&?0>j%5tjtrtbpiQ)3*tVJHCajbAmk_v|{$ z7k#6(Z|-7%T?>1tMZ8G=hU(+X6gqIZY7;7UL&ztlFwGWc$Fs?_(fa)9(sE;C17wBe zI%>}PlYd8P%fHe_x#Goz&E6}Tb@VgBXj*Vk#w@G$>JO5sp|;f+g^=Z zTgM#tCNXc*FPaXo^0M+LydtabG-RHeR$Pu|CK(!EB#{p^>xsThI8>aDi85G}VVD z4vNz&zdcfqL9j+rRLaQTF&u}vsdyWGA***MCu!_uXJ?2sOv>u%Dq?I}84=+jbupmA zXfkkm3@{9KbsgsWXnw6*Z@kRFV=jVL$1;HiS02uc{gp_&m+zl@qhA(_-W!NbK(Vgv z9zOl6KsGYtz)@|M6cwS}ls~|qV%x}D577&YT++fSYb%Z642A|v2Y^W|$Y~k{J-&)T zw{JRc6*JX9!fI4;Vdia*(^#CZPKiKW(;KObjP0Nov1J@^LarjKEV2$s|BOCACZY=*Yvy?aYtNou-M2>=Vlft&;EM_`f9Zq85;Y_%nDd5p%=8UfA z?)u28Ts`ft9XDX5>S*kXAV3e`pu)610PqaMJUcgAtk*DBZv!=?6FZMjPosi@f=L9o z4>X2feicHkXYsfApts06S2J^<*JA}&;njU4QUdrJjpIh-A+)Iz9Gsd66gqKG5=uK| zvRo2S1pzo=f=m(`s>V96{be?;2p#lFM@0!2d#$CN^OrvQ0_z)zs5dN`EjQ+?iC)#! zcIA|50R|FFll1R)kGKw0s5E|$0#1@D`W0YdH3=x#zD8TO+Ag0ju zQ4OH8PD9&eQg-bjFKsg`77Dyqh+0c{iHRUhIZO*)TIHk32`C!Imctfm)|ShJIwAp~ zt4XHD;4eSB_bAphVbU0Q+uO+%r5ME~9=yQlE4N0=w;O<$ zy_%{0oZ2b=GC!yY0uCv@{lb zP*$aYjrypL-3LAnEGTizHD+POcuP5v%=tq&HfCI}y}f)U*BUHW$iJecpkoi0&Bu)W zVaZa(CCjaeLr=mtXu2k21U}x5Rd}-97o>mr<4St9- zrF^{4MNmMpKv?SPs{<7;q>Sr5<$}Fs`Vu%lE9sl$?mkRMt}eBJa!!Iho1(0~V9eq< zp5rCysywl3K03t-8&z$aM@1@WoMkH&Qf#3TX;86RbYC8GwIzZ@)25BQyT#m3_m&uh zalLxwG=k^^9D@ijhf;6cOu9To4!bAFt88HbbCWKwPR)c;LOTgg3>%P{o#H5q`^^r{ z=@F-NxkZ5+@%OF96S^kGhT9x(bys_a8fI!->H$n>Lg3r#eMfJN(Rt(r$?Iu0#XAM< z`*y|69S28av^kjW=pOFC;oAN;l95S1s_2V5J=tGQ!60j5ok&JO9aJM6b#NCu)I)iQ z+&#+je38Zg{skf67mXUO6W9(YZA77;1va2F89dL+Ac`=)*_{8l$(bgHq(ao#HNj=B_Uj-O6Wx4#lJ= z3UbKPQ*&1v#;^N7dhiS}aFdYTsI|Th%$*SCNfhi`l zpZuK#oTW7*pI*N4oe7Czs-{Jlevu`xh#HEEgkcig^qP8R;|{QqqMi^TUleE$OYj}c zBdp@0T5t*(m>atr7j{92*R|wZ{cL%$kU04(Lw**bc{YkiNI5qOk(zFbi8>i9HpXx1e!aM%Bx9kMmdDs1=LMp(hgZDF>3aIae;5qU!?hd#wY9T20)8CA@9>I{W z5!Fy9KX;4D-*P}GJ6|NOPlqwT$S7YHqL>kV5?;*r zI^1vwKV1THR?GuU2w{Mtm-)9}**hfOIcRDq5OP=w4m^o5pJc_sHa(f>t7d}3Nqp@| z67iVil%VlkI&$AgN&<54 zZofXHtcdzw!bO&!A4lcg*lgSRF?@Tb>%W6<7k2WBfm=om{ zfl3jRk~H$rU@+%y1|e1EED*GmY{Q~WO#iB2*NYU$8(bYX_bOtlL-ane6EW z|NI&52t6wzO;)7by%-g1B$i~ck0B{)Ebw2 z%h7v}+&O#cvazO^zlnJ>DbeOl1v{GumhIKs@bh=nTw0aZsdL}_;ZNxgHPPv`bOwhu zt)v<^VI+v7H{nq~bP(8^{=94(SEkQNFO#Hj`5lRvBLJCs%;wO|2Q80owi7@|Y+D@k zIzZ?}PiuT?aT;D>GEbd4rz11S+0LDH63_H{b3SoWo>URa$5CFH`}7TOjR)t*cxkx` zO00}caogQbNr=6`|#dJMOe*=afPs zA(ya*ZdaKxvUSw!+!Y=l`M(aa=aqsn4m^XGu0Ksb(8`)O3Na|vg{oJ+!Xk;n`Y{E0 z*O?KzLdvZe56@)Z%t#O9!msRu&N`4pzDPKdG99FJl2Q+TyUXyB*p9N_`Fgti#UKk0 z1c+3BHuVz3Od`+XreennpE94Y*=Tq|$6GZvmPwb#;>XNFKdha zevfd~larH&l#B4zLOya^H*wSdCTvWu*7d#c${&ffbn!{N4t=Z3z17d2n%xH06KpnVDR02-4LV&&YM8ifnhUfEIM zp8$6^nAJ3p-~FcM;};vzEa?u@y?8~S=>iMZZH0Y%06W9-Mw?pe8iZ8&{+JAnX|ByP zR!Hu`WS1pMPJQy(}2I$;$jiq z+tnwXUO*C1f>*21@+5bitG=bp+vBJYHv1wx1E)#B_=MPVRPSz;bNW7FHAGN$-c>;i zrlU)TdWqjv&}8J+_=vBSsba7nA*}_|S)6I0Ud9kjx@FqXkYveKQU<1|_gmmkFx*(N7X0h&qKuON(k1sp0Wq$TsAYy`h7YU-zLovH=sdT9;Fs*1d5-P$LN|?{FY58x4r?px^OhbF zNhth5(6qr3gbkx*kP)C?FbKj#X@&+OKxKGA9LlV%hBV8WWvnv3B-R)6a#=`akP?q< z=O02+h<#Xl1Tsmbh;Mp+7Tek}e_6tZ8{n=cv)oONytmWcU0|jX4caY0YesTgW`L8(N7Xz7TUA@lTw-z5= zKsZy-FAO>fyD^iIHgdA^YJ6zXmY$u_`t}h&DH(wBYt$Q%EWR&5}j5UhEydSzfryFPI zaC??+bh{h}9Iyam~41`X1KuiVSy;Y5M}snDRc zc4vpHab3P^xD3X>a4T!B5GMbvG%dZ{k6)@$0S6NW1F4+U6c;p!6kvH$KTVFE_5vrIqADtZ=lxB3ZRivK9AoY2*iQj z1OyV9{C_uyhMi16hLBXztdR9!^HA*~cu?Z-ZjfN@gZ(&r&;6HsLHxOZ`M^K$#nF;* z8l{HH?*L-~wi5jqazNFhTz6dqtNLO=dPMVB%c*Dw41HQl#}=|?Wf!aO2kPKyiAfH` z2=Gpa-RDX@tN2bhx_;*Zmo8s}WbICUKt`B+)qpYu4?v~v;_p=MRw{svvaw>#V9W%I zy4$Iqgv^owl6myuoUzi-SFx~U@-lcMvi3!O)WYa{4(NcjLiEcZm+g}5cJ%8)+urLV zyJGaC{9xK?4|-%gh$=5KyY1h^LV%(jn6||wjP^_Z+~|D=8-_uV)jzxpYryyNSJz z-YTgULI$$eJ-s>)fQrwhn`B>VFa?7u#LV;XVwCN#9K{=B{dElxqG2&zP#Z}cQdupC zl5LjHrW!0JNLwRf%f#H1=uE26P)(<8cI;P^oee6FI6+)(@7-l}qG8J~@xvZOWs24_ zyXWA>H-!BfRj{X5UC0=TX87!AIwFl!t*mj*D=V=VTd^8&(?6=)@ffh4 z8W5Y71W}FH2<$fEF5bxu=NKCpuA7L@WCQY|*E+aVwKLSWNeI9Kx1~D4dfoj$0u8Xm zR)GPwUI98{fq}b6(-~l=8o|*F2?VLAX4JWc+fHY}=WVyIt37=i$?iZVZ?&%@7w`K$ zk|J<3cKBk`htmJ~d#}37d-tlq67-q^9Xc{O^iCdE_i=wM-f%3y5;vFnwMg$3pw+EP z{}dHug%$D^yzYCv&MFw_|GCJvo|W!X)c<$l|2ohAbkOGUc7^%=a9dPi)*ir{>tAV0dWH~JQbeDd5DZIZHJl%_29!IXvpf%;vS@7vD`aD&9 zUZ}pW)jT!mUYho-&-gUuexBjJqamwS($Z*>{AISkJJ^s){hd@rtEod#siUORUEkO$ zuBOvm?oeQ7x`w&CReT=IQV|(Eo-n`kP%p1eN?k#&qoK|%Ku)Qx-R*Gt2GKwNTSU-Z zgkq5vSdyVgN)_sG2<16NMMcmx_&ndcuD0Gu{g!ebO}P?voYI4FVhkKfTvCnKahz&A zVwt)k3%`=KS`EsYERK~2IH?-(ijG2CWzTf&liuvK+?Z+w83paNl1{V9p-YcO207xI z=9i|I(}_O5B}G@M(-M!M$nDv9#Jq7bJpDF6QCmxm?E<)(lz!6qp6A2_Wr|8tf2oM^ zbB%_2b7QINLyBZRWi|d)QlI=yXGy)6OtHMO^k3qvguUd$E+S3^aowgZAu=Q&96>Kh zNhqnlqwo`W#93q`p~N}pQI!1RQpB&#fI9`$jBtl|r0Qv5V>7}GKZm?(a0j5Ga|I4c z(Fy+!XvO{>q*aVrNk89~LMLXd`QS!p+Qv4VslX4blQP8QJVi5--g^r-9m)3!zeaG6 zcC>lO2VJ`=Xb&{<4)pmA^GwLr_Kaoa{w7Z|cNn$#0UglOE_s~@6~G?!R;0dq1HEbi zD)X7tXGbZ#W*R?MlwR^+%Zj`!(i2lgKl+}f$hB1MiZYo)dcwMV@M`KmpUU&An-bQj zrGL%|w1GbiV*e-#d#oO4U_6!sp&OY*=%bEOBj|8En(=fAevs@Z0XHlzxTOC{HIQ?s zxa`RCaG|TEG{-UjEq-|t{vdgVV7_FSx@f=>?J#7ln&Ro5Mqi{(_2-b#tO0r)$*fb{ z*OF&n_9lktIWK8?=rd*87om zjedUh&4aFhXA^jZ@`SUhh)Il)u&S+_`vl|Dp`-h<1vHE-fiBsp(*bj;xY2iPg|O9u z@3AxLs@;EiYhI_KJZ)Mk{aF1@a=gZ0k;5Ccw27x-Z2?Zdx`?akn?=%u9LM3@xa;{M zPEjst?2X_FbzWS`aU(V+nV4qoShPy6DZAlwet9j`L0~GhS#|@@`rX>Mg~!%&{RyZ( zg*bok%GOyzry%;1)A5~Ohk?Z(LB;|4hFJSg?OfW&AQ6QRdZFwJ=sP% zi0-ON)#uiL(p6d$;k=5qrU6x9o2QlQ`YK9%1@5_2w#Now%?03tQBXUdX#SRC!gpZt z3^#n;p9~#{3q=M+$@|6<;IG}&`#WVy&Ra0`yPBRP{oYv zAvjpl5BwcR*;e6cGH};242oj+@$V$9ouk9DrJ$PW`(rTAmp*Og?Z}!r_->m&e7c*( z#y>Zr_(p=$tswC-tl-mKK- zxz9JzqW!t@+mYMmMW5$f=G|y~%t<%a*KhYQx@qYD4R~~bEgk%uG#{;JLHM1`Y2J&@ zM-6>t}pesTwFKQ@jAaE)VDGv3Z$|(AelxGDi9sU zc7T!{rR;!9R*dPMG&j`DuxwVa?EuG%3pd#DUYi^47pzVg+V1sU$4?z(2Au$?TgFaA z)t#kQ#F{=!dz3HU>S3(9Fv}a%4*;KEd=Yp9T(@wy`)+7o3^x6qH#%MbZ2P&px%+=^ z>28=_QeRYGKt3!#us(iXpyl?`?I*sVe0X|6_JnWu`wo7i^+bLLd`EnTe8+tIZ-eEQ z;$29l<5Aw+Kfn5p`tp27e<%&^cYm7tN;hK<*J5xXbo+e5b!r}s(0`f+1aK+bJYM&# z#7=yKCi{SKuYe$fAPc+^t5|-6oQ}nfJm3dPN9s32JR~n=K#E?5E@H$ZJQ5Zq&rOVQ zgP!iLQXJXGH4l?Tf#gH%Aj%`2==KKP*6rs8Iqeo{C%Ou0aIid;po>4t`I5a-h|3kn zNXhs$GWl+E7i=Todg76=mIEV%tl+T_y1rt6H1w&z!DX_dcc zS62TN+5tVUZdqISTdkh=8e!bUg_5|lt zQo5ain7*w*5F5;mbXP*FV!9-=AhWVrDK&R^dUSfVWWDg&a>cPJ_U8Fe@nHG*UFECQ zBeh+$UHFYM$#yCAZmF&dI$$;63LPDyi^a<#is@^_t;cQl5q$eP%aH#^a2xt_5t&~l zY?1W?HKNv8pDt({=}YfrU^lm)vp`F`D9SQwG^~})z;W;^3>X1V7*&im zRtu$#+{PRRl$>t)RJoz0mp*u)aCt&Sji;3Jj2hA}pFF8p`PODwhM1eYPv2xM(HMXc+%w=_S50t6kY zm(gd#V?h%W1Ghcm1z`AldDO(bH^EZV*jZzui;RcKyss6(PXJVe)po;W15R@n^Q+1# zUy?1n@yAYCosw?)+i}@<7ApN^ih24%b3D$`?w8nf_Sf{o_hcH&0Eb~;)aKaX7+kzI zxqUR%0qcAh#nRIFepU~iudgjkda9JmO;a&Zem*V^M2KTqi7gI9_SG$onq#=bhZC_= zN#zyjTznXuk&mu_j4;W>8)it1aOl`NY#z}ZUz~ktJ`n>;eRJT+sF}|2wUC%7U5fqN zT>Ko_0q;`{kzp2Nb#6=)MbZ*X0g++5(FdRtu}f3dMo-LOPV&ad0)l_?$m`oILcae? zC5)g&tM1ZM6;VGz{ZXSv(XtQiXKs+3SddbEJC?9mC13ojHwb5fGQqheIwzw~Pvs>5 zC!>CT6c;qk>oTqShJcfiL4MSBXpFR%_VB5g7%5H4`)W_jYSh0Wtd>li{lFOG#P9{c zN%vBlzSI>fIhe41I2}!Dx?|VH<78wB9%K>@X12q_oo|Ve)&^_4iGweKc+L8;g3+0a z`sNRU7SYj;ixz;VU?d6B09-MQF(t7Q;@>=!3_($4i=F_X0Mw_vjf$HE*fzBu+ige`M_h|~kPr&mH_jF3KI=gQ(x zorG6fVvJ@2wTfgz8{1*WZx`Y(lK(TpRT{e#!zpOJ=gi-sNWf4tjLL_bP`nj*b;?E_biQZ2?ekoG z*Y=S0&8g_d@Fx$24oAQ71*!;7O$@!NJJ8{W8rw~n5+a?Rnt&L$cTE2nw+&bUvCh8; zCt)p@@Xmu1c>H3WSprUj$NM{+dxuZ?L|g3yXJM&)zi@8t!B!A*qf^hr=>CC2j^94N z;TGlmE3qXtoBGpy=kPy{k8xFs$&Eq&J$hyXk5d*mascUMBvV*fqkl5`IWziCWaqTw ztpl_1*D~=mn)U4hPDP@;R{)%Q3D<#u zPDTsHKd6in{|=u^i=k`Vk$G}-H%9a9YE(0a$^I}!%{awW8BU|N{Mv7;GG6XaJ0m{Saqt2y8o%BKD{!Q z-jIGiFb4g7Iescuh<46Q*kj)`Y5xyVX6(d2RIh!3j6F5{Vz}SQ|KXBT`OkS|)c$dH z!9(^>H5cD)|CEcpO7>4SavxGxhTohHb`4SIe~7@giUj=lYVy-ai%-Md@rxZ1eH>#n8Xa&J;bX=~n;nfF&)~>7xsRbNj>sgmc@B^TS{p z(GQ^;{}JRD@40guvFLELThIFpt4{_ME$xEB~oDK#_lh;(U@!PSf|77X@`Ci}rqpz}eqrX2y z;(ZhS!}vj=@lTgq=`{Z^UUw7y)1~|M^FR0eFixoaXQ~Zg#xDTQyIAUf&cRU?{}1DW z@sEG%a`vM$r^>JP{`~gy7H9D~4Qjduo`$VlSNw3+E&3mEvj}vq|8Xgdb9?vWV|1qQ zIJd9ttv|TEfAne-_M=cZ0Wq(?KafrL4>|WD|JjauF8;>fIfy05SCZr3WmljJ2sPWP zKPvZ*{t>rzXdaQy6}Z#jhRc8be_GS{oO}27c0Uq%|K}h7xj{h8OY4tTW`-x62homw zoP$Kf?XKOwT@W~0Fh@AyB}J?HrNz@!R#XrmE@8-Q#6~?_VhynbB@_vrwAiZ=5l}5g z!3?wM(#D3cDFkc=1#rkn0=Pa~AqhNT&Q}BEbCUF)b5PL+Wh5)FcrXA7B^eq!@DVg} zA)rwOur)v!Yc9EE{+?O^$V|qqp2ynH{t@C9Ld@@CK)S-dq^tGEG;85~SBMP5?qP}f z?83+w@`-b%5Geo5$s2>9%2x7M)K^|50^`8IDQD2T~=r`E77D?$SND?x*n1%8!V zipV=v!MXVOTsnLL!nHnqEb2MRNljCN{P`?JwN^xk7UWo*bG5KZwkmN+ha-agh?@F! zCHNzdj$T6}dun@jY*}VjB}oML4dzA1oTQrmF<4K=D2o;@>^RPB=3um_10!^~O9`43 zi-})=E|!qhNA5Jzx%4ABY@sheAtLz9CKLm;^19LPIVR zK;~s@Nld1IxYSSh@&81GIpl~@hZ0HBhuktTs_mgsJLS|6u4|wuAo|l+$JBBX<#{*F znN16bYS;VVj1UkE2_BDRQ5Kg*1d5^5r!h`qha|Vc-XoxbA(E=3(e$I=@ft$y3Us_` zl;OynQDM%~@Utl?TH9M6Thgw7N=tugpknx^wZEq6c^ptEa~`LckSkSQPM+pyc@}jiA;b zw{F;E7sIT{o}h5#Wn`CVeiH?o{RWUw@{Tb@m+gDvNv;C0KIr1ZnA65#G%9N*Zog;{4>-ui~ zjbh;KsH3BTamwf`O8Dk6DVUV$`>Jkj)zgzJ%|~Vec0wDaYb<3(lFHSf4GnXOuGl~c zeNqhU*@;5bdh&_EH6-x`0+m49v7&#W1NsX{Ez3_`!yoA#y{tHG#+Z!t(4q#+Sr1vJ zeU;CmUx-xVzr=Si8e_cWLb6CxmY*iwoo27|v*HLNt6TzKfwQ!T^d@jz)~fX4sh#)H{_gNI)HJi@ zt^`vXFVG7Fv2^RL@q3_|Cp;zA9lM2JKilhLZH4s0;=?_zkTWXg!7?Hzp`7 z?~-D*VecH$8w6PjnglR-D{Zq2vZ@=1+vOiWp{B*F#oDpWqr7^`g4jV67%_*VZfXS)>LQj2onbs zcG;|bY_7K$3ondKAAzLIl8%E(w8_89ZXFV~Gsv$QjLtt$joy{Q)&;mFC$z9eWSh0& zEJmS6s%0t=r}Mq{1qRCHn$IYQszBKJh(v0+lyx|Pte_DL=Q}3_YZxcGRY^E;F(yHM4R!L_eoexu{n&jU*oKvpBPIQ6b)$YB~O_(FL{`M zv}@G)9xiKm3EUV12PS*fYSSbHljxYSmQLUXX9D4@^7}Nu$npJIP5V?>d-|s$H(k^O z<4hph((q^Kbb8A*kCBa^$k)f#?QzLR>DpNY(p;YjxfPegjj8DlG2Y1KiCrZ`CeI2( ztXFH}>ug1Cq#Ro&mznUU2{h7aK`|KN@?C@{vS_ColU)qR*0n^fh|ofcCZb4l)0;}43(qU@m?#Bx`tpWO?l z^UxNo^WGZblmq8wrOy43YlnX?KpG@WAK!^R2^&w6v_J@x(@b{-h;y#0<<7=;(motw z-ox(ohweybp8ij9 zx_xJ&+uV}|=T!0~@D@(#rAutp_TaFJl3PKm)m~gRx5lb)Ywwiv0D;{4!)o1*`cqu@ ztwUe}w?{8%lyOK!IX+2KzZM^&ao7q~M!&)4TS*?xXZxZ`2xBR#NS9gLWg#l4oIF(> z)tge|9I*)}VR-n^QhJ*Bip^scAivZPG|{PTBg|nh#KeefzyB5teUvE!O_t%=igS>p zRIc%^zI^y#MVRVgx$(^XwUvSL{UnRPgH`$caXHv1y&LaJfM53Ae%X{v*QA4wB%R`b zGdJj}FBz2T!`5JFwSMs-hKNvAH_|Urgla+nGh9WlpW=7quUHiFF?26|FVm7>nMvbLj7RPbsr6u{?&=hKb z8g3>bWHS3vTmWP4vymD{R>&hF+Pk;+7|`VBuGjvg6Euay(x?O1k7% z(NoS*b?UcV>QJfeqP+K;dsenHTHmVh{6h42!YF^M1d%RPq%RhBKC}buhuP4Z>TFKc zn&a`(VUK-HfrTnIgj;!OeH$#d=-S}dZ*^gyf?R0L0!vWfhmAb5@aAHeD?i^Gxz%xOpm;C&>J_G6gA1qZ#G8sfc3lYrGwhOB~0s)#pc6e-(Og<6vnzt zjswG^b5}$WX`QmD`^`>M45tiZXsMRZ8 zhtqv?uDV`yj>M$T4Q*+-oJEl8)ar@_XSVqt&C$D=&$*Ns({Wt96$0*GUEy4;2fiwa zjRGIS!ZS;kYTwIpEwUPXcsgjKbAH`d1JQt8Ecl^t>~sl<0Xtmey#m3{Tx|3Y(mJ## zJ64D!V^~!LV%jar4dGe%+5t|}Mp>_0_lS{yO#^tu7>J`Gdb$wY@7|b^lL?H8p!zQg-@)&DCK?CZ5%`%is5@C{t51MK*!X*DB*(4JmS=L5^w` zZqD*C8M`!1u#4{m4qw#-$U$d;3jR*80giYBnF^5)&W~zK;TWz3g$x_@Y-HUjAHH0n z7qHVd`nw|*sD1w6^NpLNf=bW~ftoB1aWHlZScx9M6C0WcGkIs2F>q!x8{LbkI6o6e$tAV(x z^f$c|5)}h0{HU4zHW4-_l<9SYEpEbJEPBEQ_N?wD#}e_^U*ET=%ND6~()l$5M^e2$qDN0|UQVTMSrWuFszbev?F#I;JQpF#JLwH03X+9pT?!M<7{;3;Gm`3D0wJ{K|8tax)C0 zp!!+Jeo)xN!baV_M0ZN2_rvsgC{OOxk(VyAnKnxcw&NdBRSnjCARBgxQZ%cc_IY>? zmXQvug!YpRnxy4mmZx1v5L^dJYHHP}?6`!#(Gb~XC-;jeXFlSW%Aw+x5enW6RzO!{ zl^QrGct<$u^TmlYqgtFMe)$$h4`4{qyL%>Q;nbq|rmsoQzQ1G{oPXBa@iz{C++`N#zi_G-4 zh*}L`pWc8hTkiunNfeR+MQvFv&Q?5}?zOMvS7F!Ae1ev(_4J5|6Aj%|^JD~AkDxis zncK7rM&VI~kB}hAaZw1oMBte=Oid)~EAf2&b3nwuNs|y>5d8$r$Jy9mO4OSy7I4{B z4G(g2MS;?FN@6%z^mJiw;25GY8g}9gqLZglWbg6605w3$zv9!R_Qc0~#jd7->J>vK z{SgZ%)PNy&&zRjUB!FhdPB$=l8yDOe!-_@Zy&6&@x<*0HTwMGc$LRJUwogW2MIgYU}RHF>3`B$L3{noo{mdk~X3)@ahS2 zo!f>^I_RSoFKI;(uGww%Z*pS@p-tj+rE^`cSl~G?gIl<;014(Z&i&_JJe1{@4oh)E28I`EIWgW1BF?)o97H?K#<=*#tZ|JEaYn&H;%w)tFT@$+ z#u`tLHIfz6g51`|t&@UUEpAnhqB$9JWt60uk?c@B4n=@B7}{1qdZ+c0$N_1@x1YpvQyy+z;?fJ((v`&*#}U zFG_H}^Er8QUhn)3K(@+w3@ib>lw^_d=F(_~jOTzd`YMdMSz71E*SoWDUYF&od9;o$ z>^@gRt2S5w+LgkFQC$lC!!xwi=-p1pjlyh8m%K;w^`@*y6ugs^g;Oy8=auQ$pmpAu zZUK)24>H7qDJwc@p-;h53kd2*Qk%n{e})0lXy~~PQ3LgCJaFCUTAS+f+Qoz?4Ndcr-EA~!sIX^Q(bFDh)2v{=BK-c zl3lZhL)pv;ECJPau4vt|m$u*g8&FN2l$GpFB^wJJuCqPyILjWc=y6wQN}r*K1i)D5 z`+Cn3?OZy!kjwHa0at1f=6)<2ZZ7jJ?%hHGF~37NFmPo0o}dI_1#l=i3QL@NPg}nb zoZ}diF!{iDxf*OJmW0hH;JjL3v0#jVgRC_vUweyW{v@sp=#a@kUq(CR%c||C_ew2t5z3@*HIpgDc;C~JhG*#Yf)&^ zV5G8g^ijdE|Br@_z5re+C_grR!+)SV7xoXO{Ev;1&ISD^^ZM`~kYRMjEx;sy+cN$Z zFLlmi%GXmM++Zcr^wHk*|K`Wk(Sep3+`v#0%T#kt$8;A*9RwLD$M0Kll-NN^Ed?h!U9f`qIO1 z>AssVuL<8p;8pi)Zx8xTm_C?Sr-{hKIW9^7o)(Cj;8VZ}!@n49qBoY5>zeZYAsw-}t6>!s+s2JR;+mr1 z%f;tN;l8#uFm#*a$%Qde6JkKi!k-@91$I}o5NM|7Q|UXOOApNM?V_MPC^`9^Psp1W z;b$xXw@WPODT6rz!@O-c1S&>VoGCXf4Sp(p?VqIowj|QI)=9X_QC2sV&{LLyUdn(L zji>JORO2d;?&{H9Gdc2FNB5Suy8q&MxA%?DRWR5X(ap@UOBwe<=2*do2MO-X4F6wg z)C}oSlQb!5O0Hc{vcu7gD?U9Ow@UymvZDgy1f4+CgO#N7eZ6k_(2_CaFObBtrgv6! zMZ+ILTFQ>Fo378jeSPla&}qgt+{ER(+?BidXm!a}6p;mb*xw3yKu4wTPC7ScpoPo=SYC%loMUP>Z)bWtSB^8Y4rB5`qM-FzIC%R1v zoB*!mt^ohH3||-csIH>1@H|76WU4xgB{VgjIV?W9V$haTC%b3c3#2_2oobE`4Dao{ zsLr)$odui=>sp7>9^9Fh(??|BsRF^1$|`rtQ0Q|1LyS4hAqZSO%)} zkm7+nCfHZCxH1f+8XvNTUMWBdb!xWf)F_;tr}hl^elSKjHza|Qdjy4^!-xngm?z>D1KZZGDJdBHE41v-UX?(oUb4 z{ds|`y0-sxxBqzhTbcCul2rk}xOPE*^F95|3nMRg?OwLNf0RO-(A@4{9(RpTx%OU) zW+#9XoaH%3NpX}LyfJVo`oyKt+WDD!h(dNDK}RpF%sM`kUs{}AqU7x*>Bgt>+XcCO zWw!m2?+9cURH!BD7=mztjQ{a0@n)+j%;|35njs_)A+h!L1e=uEDn(#RewJk$l9S6Y z;f*71C-sniG?os5f}b~{extFpl3i4*FQf=t4fnTEWrG}}9ux8W;us3sSPoz)!ZxGu zg&H-<<6kYrPfh$~_SRjTck4g=&7bXkYcxmj6BN5%lJ)eq9II+NDpC8x5BxS)CP*@0 z-S%pdO38;j)|ciEvz2Zi3n9I{>a8!5aDS)-4#?$rVz2%H!LX|IawXsKP+A|7pK6sH z^_EFNph02@YB18mw0}PV{j@nO++eadT2r1<@(*B9^Eo3%Ev8-5*G*;mkkP zBt+$iS9%5~hkDz7%qv+bDB~q1$zEp99lArQK(+?*!a0x|7kX6_B-&e3l5IX>;2bDk z*xV(W6sk1Bi;`*zlFFbl!e{PK3TwEAS=db2s$qnm{BkL%!CgXKsQtX3Mqo?3=)c>q4Q&Zf(XaCaU(6Kzg$WPjFBFz!rlALk z2p5@$s2s63qSOGc{ly~KLFOdMnD_$Lwm8`kX_xppZX z5Geg(Z>a6v4x^X9r5!OhaHmojSucHxfW}2>r?Hg_HQR{tl5I5SiTu&0XHmi}h*wIRHN|F~ip(?S_RNbr+!DEu3F`x6gI~H*}`( z^rq|n%wE&aQyXJapC@N{3>a?%iX1fXJ(E0|pyX*BN;D znkJ#hZP-@c&i8rf0g%v!e*yw3Hsu!uHbV(F*+>m_K6;#I#Fl3^=9r@x?)({1Xr)ua zNWk`sErn1rCDJp^;)sSfsWr>9U4o}-asze0kVrv%_eC-1Lc`$3Tb(jRQWR88_RuMZ zt+7A}WbmqwZG3)#tJoG${$ATDZCEqkG9GTGHLTS1t7^O?)db}Iwhx3b_u*9hiRIWxhT#xW!SDfEer$ z0SnMxEFFXQiH3u&_5_CS3CwWy;u5nYh-Og->x=ec>2 zLMFPw3yFw;Bwri@bW>uZf0cxY(b5=M5^lP!6mWu=q~W5iGmbRIi{qnd8zpt-g7^-; zcw1Y0QVwG?sm~A(IqADYDbqtkexOt{u}C6j@0LbOw*436EiuAxEVXY0v#Phv7)EBm z!tnM|${XUBOX2YOkE6$uTOP}fY5|ePNlnPDHiHv~0g!bP#sCHQXWW`GQXHUUCQ#cs zw7sDfSaVD;5R|th8%>2(lzU4LwvB5%7pNnZh5xW^emL#DMw5jRurt|U3nB?f87>k9 zo(g)!++k8fi9I+BEGZ$+Byd$xWZuG%T@~x5XeAP<2Td4mUt_p{t|IyJ!6!C)6sdRR zC`xv}Tos4!tm0nk0e(!{V0npDY==?#K!rK29i1wmz=<-<&sjoZ^`L}m0Y#nHMn|*L z)2HWfl6lr!pk(R5BGR#PL#~IMa6S3xuJ8Vv%yjB!9ne%51PKiM`#upyfXbqbu<^s0 z;M6V=1fU+o2P21i^;SxV*O+$E;ZLj))oiNN^b8V=l&h>ZYkMI4>yRjj(lE&Nl8jg$ zY985JLPf5kRuQONBBRc{msO_179HRv&)B=rmU@0L<$LjGFDq|75gpP-X;#RML4KN4 zJapzg%sn<$R9*(d!5Ho?LT|CHiP7lHf+QzJ@!cn(ZOH`}oqO2>!feO@wCKnaGc6x}sGN@jk2vS;;Vdhbw<)L$7Ri4s7?16j}jWHfODu7Lk}+73XjKryXFzA@sGw*Msvul|wkahTbXD_AJECMnxj+Gh zs+u6-*ItcFsg&|Vxog|dXe29U3`;P^&{qPd6i?9Qi_Jgb4SGaKr$9p7t4t~B&I?j> zP0uP>I@Qz*ZCmdK3BjJv#OTtl?8r326-wtiVq$D9nvG|b86~YmWLGa6Lo{3_PE(m$ zPAkvLF|YV8mfn4JZf-s3>W^ysJ|r&{ri9X14XM>Ux&Rs&6pRmnpe(#S26slY(JYMO z_>Nwfg_ul*Y3HXGx?^iY*?7j999Raq5G6!op?j+*52+_)Hew2&_6kG`ltgz`RR7C; z%?`+`58!7xZxlz&L*4k-{hBIlh znxbJ&Oq4Xi+BZj&1QFG~=n>biYW)&9{TuaMambi!!C5$~rkd5KVir)x0RY^YRrIW+ z;HEL*$%nrCbcFeG5Z59iDfuCxh_>FaB6B2P<1?F<@(@TvJoRvbA#SrFA- zq}~=B&rv-?U3GthQut*jrVau)Vs10%#92i6e#u!#b-kNv^ekgm zyKCzc!We$P03<_#vw~r;!m=xj3_WM$9Wb)T?%kY8o$rV!Eh8U|A=8RG%HYNKT(MI7 zS?Pk!FxxRO+;im!BsoA2;s*d=g=^nYi1LwpZXmjY0ns`K)&$<&Q3f8p?*_O_=PJ<2 z`vr!+?Im67_dAe(7ky&kJeHo3x6|SIKfmvLnbcg*YO9#}9qM5*xTB=<HuTT*?`kMa+5G8~^d}hsaU>gY&z*(fvYit@ygU*$cvHv&N`ot^5T^6{oi+KyPCc_sncr)p3ekzaH`S$xsLM?msOu!hx&D}+au|i~?m#Rg`=FwXq#s-SkWLXYY1;a+~mmPz=Sx5EPxu3fd}29__V5TRbfcQ3p#EM+E3EU~;tqL7RQ zagWCF>2F2E*$<^-sn8?4yqvEWtQiLGvp{hksS3FUT+#x}ty%<4BwAvlLzTVRxtGf} z|8i%G9x3Lowiqu3DoUi{Vhrv9ME%egO6o5x0e}0{Q!M%Poc;XeZ~V;tkKI#`OP>HG zd1EMn(f-Dq%Z~G?->^#@p4T=PGpRCV`tAMpb}_iJH1yTg52wqyWLTv8Ktk_i*b06+ zFK%8x|D*UUdR7!>9C577WkhEM8AeJJQar`Ap62-d8#G}Gfdck62-J^$A@h<_J}-01 zre)oG;x6@spd!;cs%)K=DA8ux;;6bj*4QmXxyY15sD40Q+bk71O6VxVOggRAwINC+-@6zSYjSI48D>*FVzMnWt~CzfJ?5>Aj~6CC&AqfM|-?4{>EC!YT2eeRzQ zoW1PPUoEf~4Zs7~csFE0ln*OJZLW^k`5}*62smCKtprgM#ZdF^#iZEQn@98=7j~3` zeB#fBe*bHmVPaz#Ap?JJbU>{x+_<1t*7U!dd-b0vemJBo(gg1cd@vCx^2YwwATrG0D*9}G^t+jt zn*3csX&UlEz(Y`Ul=wEuo0~qQfIIl?QZ9_1G5%dLgQ;G)UJ7Oh3EwMDr-1De(~(-K zl?&vv|KWqVL*MT{{%`HJudPXwzY20I&(YHh(Vo@dSDLU?x^{I+iD^d@BP^w4#} zyal5Fk{&2bIZFapu``fB!El0el|_h~hF3l6l+bSLSEHwX=eg|Iz2tOHF&`f+9{@vgwj^`IHsP`@|-f; zfga%Ds{Ulcq=AAJsB9wB;#3Oc4y?`I1-RYUo&MUz*(a{%f+2jQeWNJ!-}Hq}|4?-Q zR(pIc-v{X$A#bk={pIEE(;m^ipxsy{zOUoDrMBN#+w)WH#jL&msMa^tZfyh+PC(#-@rl%66&pb+WSnc#U?*xU)lrh z{N{cE;>@duzs0MP@v89j9rc8Xr*sF^5=cP}Z(}#bzzs17K_X;=e)h*l2-_ORyhkv? z+>lEj@c8tn+C;W(z+s)F21$yO1Xapuwau{m+22tk4rA4}D7G8370llI3p1_dMMDFQ z+G3B6I@WN5Kg^jeLexWbv-JR0NNgL`T9s0=l+j>lF51onNh6WS9V>X^nkJMB$}5iU}? zQw4`{0}D7(7W|aL)lq0Dy|x(-dZ$T90P=RlA>MbV3J~S4qFT12);E)f5qa#ZsY1d# zcdS5?f}vMScquTI)?WimK;L-*!YDHms^mb;9?k+ndhiVVEE7wh()ex2IoQS|iCn&3 zGQLHqPX!i;2kXUwPpz8!f1wY%LW5<6q2As(=*iy%-p5?Hp+s>csNT5(hWm-oED>-g zN!pqB7z4!CE(i|SJ2o^<6arFxu)OXOQ(h?=2?~~nP6+ioKu4akZER`h8Bc0JDm3Qo zAYPJ!;JN`jHPfn?6hKb_a+v>$GgWV2fI(7eV$Iu^c_`9dgAWdOk>~q3idk(hfzBK? zl;~W%dG|JtuH>?&?h5T}wx2%tm1+>`=URma-E7U&hMR$? z)O`+9WKXwvuE=h%Iftsp`INvO1@$`WDzSHs!3RZQm!zmi>^tCI=YMXpfO97JRr{8~PB)X8ee08yT4I^QbOn&yroidW4n^bbpT+mFus(L+*?Y>cVYb{uXlU2Ln*P ziR`y=#15N$TgeBlH2|>+9v{px9T~Nl)0klW0Oz&6?hBjmXvi}5p*Y|+JK#X8K$$aI zNEFPN#3yO_21H?53!RueD%0d8HV=NprBb(T54iExsPKnMt*JdKa1r*bZp5P+j?$Ma z*Z~8ib%vOzG`6Q-=S_&&Mx?gSKh}s|j&eQBs=VRhOn=Q#C?q~EQ3Enw+rR4k#WOUZ z6kC)%Z~Sc>k*1pa$p|r;8Wk{H)lvF#1sHGw(T-eF3-GtaY>AfzOR~d0s~X{UqWT{RA8{Zp7saBn*)HdwaC)HDgvhu)zA_eE^G*=lUSCq#0v z=)FUQL~`66E8u8y_!wo>!p(*wP-YtEX%h?1jk<#E^7aMDwP07(=1NHlQ8aw+Onu_E zK8&YrYseh=LeheDf8n+SwdK|WUu1t;^Ft07RgKo{y<4u@a2#i?JhCAI)@E6Q1BMz8 z(3X_aJW|T^Nn`dMb&rHwU?Hb*4N;`#oTklV6P3nBzk{$>%Ih)u)*ZEJI;!N($MA-f zj?}YAmdiAJ$EdO=?bQPO=pdUQq}>X;sM`b%H*BDT(`sqM1gkb#vP(G6sIyvZh)S%A z10BBU(LD(r6>)%d?yRCRL_HWNZ|uVuOImF5*u z>M7wPfhZqN)BWb_OW3ru;f{SvqN}q1NLEf*R56b5lKAd}ebBjhO3|mh=VBeo^vk1K z%%!MVegvT9rBPGfRC!vq@Pd<*T|!7iC8DVE{_MT zGDGHIn)!pMb{`}u5^Yb6!H~j*hRqYJWUvZiiVSz*0s|cGK!Ry_8I@GU7fzVHq!ctG zC{=*fpSPN`^=_Dg^b~fx`MTy3mF-hrJ=Uf8GcshI0)Bjfl4C2N4aqXMT0fJ!#1#H0 z_abOZijv8WN0YK}EGtLT3eA{DL5Z!d`5-i_SX#UFBYE_Q201j(KA_(c?a))+%8;vP zIhTb?h~~`0SfR>pn0wsg9lYRC;xLAYN7xO0h?{PPrf0HZY@Mb~e@J2_>|@+td!;2@ zuK6v1m<_j3W0LS|-e@eSSZq{E=Cz|R+UCe2Cxk0fY`sI7$&SXZ2gS+R)n0slrGqiG zryiPwi^|4oX|#lW1Rd95NmY^^U)grJ&uE=kAY<(N7#%D5DPE6Bv&oibNF}nV7D+b& zC1tK>VQPyW^C*KyyrF1hvGoYA{*lxEEORe(q>rTOiL! z2%jr&usOl!k<1U!wAJgt)o-pr)DD$qW$Q>kFgK5nDeW3I+6iyz2mrd+#V zSrLt3qKx00B9MMgyjN{jH!o&>vCx{~KsWjO>ns|spk_wU)gnAE39yi_&J@`sYOGw4lC6xpaQgRWSVoT`&#{A=+cp z5`JwhyCcV=1xUEU{9LsC+^(qQ4DHAQUol|4N5D;w&6A24q!Y3b?V>rB=oA{!0EP<= zZkl4FbDJ4RH?f48x7^&l{!nFLYb`VbP4MBLRB2^1ksrfQg>U2<1V;v%}E>eHtr z=7N~qC5j^EHeMC(Fg6Ajvbk*Tb`fP$S<_s-3dtxDkN=Rto=Bt>@!6o~g^NY&piKW3 zX8yYeL5c0FVl*tK+E+04NO`rXF z-<+IF^SfNizZ02h`NXe4$CGD0eLAaXQJxVQ_00$SVT{Ml&h=$v)iO1g@GMlonsI)6 zzafj09~*sTs7Jzk2EY)cDKqh_D0KC4^zY=(5)zR7?NSZY+Y!``#4o69ys#8?6Oyf5tVeseP(81$D=Mk^|=NP<_qnsEXe`NDFA|5eRJ|1j6(yCv?ik<6C z!!!H)roNuO$3R1K{Q`4NP#eGl$Q5SFsTd$-R z$<#xkP5xRRhMk3T`4%I_%e{x$SMC6drtYCm@=RO10TbwUY_%(xaO{JDVZ$BZEyqq< zQgHQ5^XU`P>(5STce%1OkJ0&%JK@^fvFG={AzkGhISpZ7>9Sa0aa6^WCc}!4Q!y8X zILcmns;BrC-w)1ASnOsQ+3RayGy1PqAwHfgNSTP)DqX|?$wXzyOaVe?xB@xHMAc$z zFBNjM1O@E?Pg-h_gZwDC~H{}-t!Y-$}3wS>aSLTkiEMPLZ~|KP5l+7dtMzYSmbkK z(&H#&R+toOEb6#&8L`(6m!Mb10-@ruKEffN`c&i7u!9PMgm=3MNaa?YHY2P9$wDeXMAB#7~nI`gUL-uQ9v zSbvVB zSC`cQ)nC1mihAVT2ic~fvw~Sq6>Kji774pxdd|7ev*k+Q4~?~vgzkK69)%HJ?|pqI zCp4WuhyijW={{{rn@j1fr+=1xpp=#||9TS1^4Q8E4pxu8CNKJi z#{2LY4Ch`T(v1@5P7$euiHt0jRQ%R@_xr#~Malg>0CEjh7F01;K_$->AZaB-YkkBW zCi<#ZOAE@gfAr&*$@9mi$S;e;y#ltoa((En?zxj|etaGic;6zWq7hi>a8%Z`e;s>T z`RhNv!d`_$kAVeL`%Teb#eJM+X%XESTpV|JuS{!uT%nLUYi^q+$DngHXR1JcWWwaT z#S@T3YogDtFjBBu6NJq(^QxyzrCRX!-{o zp`Xk~>^>eVTIvcfx7~n4h3h5|kCbH3( zEaNLe9%4DGAxKy(h!!xM`^kycSLUIQf%QF2`(D3yZtKkBwf1gNnT+_1Mab^4{x1)QxHH&LzX+Gl2T)`{wm^J+e9O_WpmyzBkB? z59HomR_xkN^okIZw3B^?@e1f;qBrnd^f8CmqWEYJ zF8y=6Ss%`1x%7~|Niv^JxA%7>o+d&nz@L9?HFwsx+wVd88KjL3e7CcX>CvNYNJk${ z8-1fDWWo*5c{anjU^exht2|eM(QSx__&oC{xkK`RcH#?bYorNzL>3A;*JFYSZtu4> zPoAFEEaAl*>g9U*Hy#e|rAO#ddXyci(BbNB%-m*>smd0yl0B*%QM=e$ZyyJ`s>~rT zs$Ze(A?o5nrgz9z!jyyXoO{Bvm(!Wit1U`(m|;K9%G9DRW;VAZ$*nT4vVI_Z%(icP z4RV&K`%rlq@*V17r9Y|S{@auFJ`jxO28>~k3`QF+PtGeZN6JG<+d*EzfoOa_0t%!V zS%*}_G$p3)SP!X?jzp9oV&>^yXPJ}>y*tz+uYO+dN+yWHCSQX2Kz)Vey6G7^GKI`I zXN$RHd_D{Li|e8PI3!-yoAemuNhqcCkm+^udK}%M9?63_jFVEayOX{Jq`sFS`Q?^8 zPi3I7tVeMvkkdG`LArK>G$7lzJV0Pf#&~+|L>}+2<8?@qbA$!kJgJ;U`ul*<^MlbD03 z1M&qqk$R98;m!2#Uw@Dq`qTO|`eV9FAEUKOdRdGDmf(G&l(@Wpl~I)`%k)Pxgq+NA z_xu#TDA}Ir-OpRw=!3;blAub&F}OmyWN41W0tp ziE-5$`XRd${*++IgZ{2cz$ZA?g*X~kSh2|n|D^eVH6(Rl-|R&@c`#9;Z>@o1yGkfM z0v`~C^eaOK?zhyqVQVz|vH9l)crKVxrGAnhS!tp4GUPkdr>Q;|S}&R9|1r&ySqLkE zii>dmkjilO-00k1(kp6&2dfm6SJ)y=$q#wC;G^MzPKHC|hWI8Jm+MYwpGc!C^yD|Xh~>0d|$t0jNsvAqOzV-Z>>d`Zn3 zWE2%J-h9v+5m14j$cqfhUsn@4r=&7`G|*TtgQ`LivI8H;P=b{Vr}{cfO4kz&8LmIQ zV9j@>aT!)ggYaf7(;_vP!5fRK-uMENEEkAovgnO&wyO72HzUn>B(1HXs*d=RjJ+f9 zkjX|q-zm{J+I?t*c;glbbJ{kJk|UnB2-IccZPePT14R@$XUK4-q>w_uYG2W9_g~J{ z2&(^9*q{x1+*0|e52YWzH=UoVRXNqtWPCtd(>9+t^}Xk|{=I8r{sR+_=%$WaY06d~ zhy*~>B72h=Oay`@SzbgG%v48{;|4eR{k2;2vY*9gdW)hkgb|(+r^F?2d!eVXq{ip= zfl!b|b_)8x298$a7HN|1G2p9S?rUX z#3hM7)c;nZuLp^B>mvGl8>E|F)k^n6fhb#mLceckHHI0Lj=CqgL*yIf#Hx3 zo2KcQr_9&QyvaO((8sL`MkhvvWRL!DU)D6fD z{Z%+WtDlB*F~{)PYku9l&up53&M@PH-SxBPV^Gsd`WSx0`NyzcP#fv6LLJv}SKQ$@ zb?B3O@qDJo8{P&hwMyy}Z;(<;$MaizM(7%cI>n3$T@f&Z44-Bu74~q9vre)hvjce@ z_><$OfmU&ct+YzVOY$RU%bbEUSSJbQ!Fuy2YbU!G$wlz*4g5c35y|m6J>BwkYpksXw7BsZtI{HQ-6E28jzIucu<%&3IXy0!-KKJV@H#{9TGfJLON=fe}98|Krf3 z9=-4(exKV4T+HXT@PCjcKIzgssqz0N^e%c8_3&!q;}^p5{(tpbX=x;W17ZRyp#|Rq zVHjWG4tFB>%sCG`4>`{`3r@#@qmn+YQ0++^+j5h1Y)c(M`NsLR>3J)#JogRP_t$9K zUw7ZJaDEW?65(Msq8o@8SA;2aF*WzlwJXw|%t@Po?=iI0w@Y8yJMyAX?hq1NArpn3 z`os|jZ28r16)K~C%z$#t3~s_l03sNG1rNXez6C*$ug#I9A8K1o*A1M9x)_B#45|h8 zb*Yc|1G-s7gxxe2pQ*hzOZQXU#R0tWQ1vjb=yJQ}y-%`YuiXnZnq`lm7ue0oCsT~9 z6;c{$jnqyyoba~!EwisOVx7`F$_rr-Qek6&q$fc$0jAM~PrpMFGNjnetU zDLU{@`UTq1BI4aYNX9rqVS|VkqbKphXlTTHnLIYw58_A6qO5n};c2x?6}JIgHQLKkE%U7uVVH4ybc?^pHF{8IUW@1X&+?nAG(YeZfC&?Yy`yId z^O{!EJhKh;0r8<8se%|I@x>M2dB%HfExV5RQqg*?9kIn|g-BnJd62CeXnBQk=n;S0 z2Y7E{!yDq>tg6PBwiZPBm5dya*7uD zc^$9k8Chd+L!zkwhF^qEZ<`bY1D%tVix+hE0JB2%X4BH4dv&>Qdp%8=iS zdPuKBNHcTu`4xo^M*jwwHt-8BJ$mUkhfEjzkwwul*dJ!4{R@RZ2-Q3viLR-!VJ>3* zIbGHdL@)r}%>1?a%=v z7YCt!@5({Qj-Qy_cuz@Ov0s$cr(oazstf|T^kAdS+|MaMgl(0>t3 zeIVu(F9^gymSEJ#e?^2*9}P4GU;a_)_4X-iqCVKtB!@%OJ?xsZ-{2q5>hF;q64eSb z-h@42PYUqq*yXH7)Dr|=fK_F@y-U44sx`KFu%hbIan;CJi+zmh4GNOMKZ{T^mc&cZ z3*v2S^R3OA@z(zR@(w=0Y_a(n@ZP-iQn*C^Xz&*KX|!pB6uZ_|;@>03X@jlu6k`>_ zcC#b>*HBUi1{vifeprZ_VtJ-JXE&E0@4m4AjnhB81je``4%+Xn>8k&Fgdt5~000-n zkwO|x6k&xGs3TBt1Wz@S&4U)OcawuuW^j$~Nj z{9qjfIC9&|ZE#75mDH!3lGRAXcglNE`=FjQ;8OSKPzzBHJ9C|DCB2m2M-mPy{*C`_ zKwh02kH4P%^y%``<@7tX|Kw1sTjY~sBo*Tlf%6k~xZ=0{_A9!thmLPT#!_Pc1zA4r zK6c_s*Negv?upsgYwQ0V=VSpuDh{Tx>F&QH|2vivaG~GfqYsAPIKRrF-*hqh7}zu6 zL+(rAKXw0IxCwE_ObADz|BCpHl+6L4hCyEpU!-q`Z_|r_H-KvrKM$`oJFg6-8vzIU zMNJpDN3QBG4A7fOKU7FxQ`CgQ-=-HtPwN;nHJyPe%(wKn^i2vKNc)NMi@hVAOCsOY z*IOTh4%dq8Bqd`Sh7Wow+bozk6kpNwV&x=7`9VALcoc{QX?vRBlNd06Sb^lU9DmRZ z&hp=fS_2~68y3`)x5>rCKaRt|2j5&GP<0mtk^Ue?uJs*r@pDIW$l(>@*npwmSy^-- zJ$YF9x2?Vs!dw5U_ztzJ10~ROj`nl7%dkX-mMx_(Xt&Rra|>6+U2iNS{bz**@efe5 z7!}QXqnFLM;akNB#y8MD;p-zl4<~6cEW(Oku-^&q^Y=G__*V~PM^RrZM9;Mq;wA>* zwSB*rSz69Z{SUbx`S{I4_zT~a1#7|DXp~;vr|i$N-;s5e-vm*<>Mw`Q=JI3B*IM$1 zuw`h1=I95JD&w~+vwZvlSq=#W2Zk==C&YcBWv~Zs8#Gz|Ug&h6bixg-^zuHPd_N>U z2~Ibg3r*4d_4jf9;`9OuKBxN4)9{~{L+yli;^px83Cu6!7Gm&y8259KNcc-${wuPP ziB5N#fAx6K{PD~GR>cP+8-N@1(}S=^KFILByodM(^eZXb6YBnU>N_-zSHDBfK#z7) zxPX5V?pbLI*P2EX!N_ujt?B_Fy_rD?!oUp3=eCT*O$ypYlJga60)kRE7k&uQzpx*O zP%B3D)ktR=#}@zVQ_H>g(|gPJern~DdUD_w+|T_2)a6p{7uWcK(2$?~Nj>myC3YN~ zQxh#baaJ6e{s1J^M)KAZk9pierWz;Xt${>M`QZfdpLh7;wAfqe0sX_!_tnRv0mL^K zK5RoYwnlybjpyYBb9-P1;dy_fxmoL6{oz*wel`p`?{v;YUV^_r2#?U?^mzC*d>;vq zh$H4dDK{#O7aIOi^zZEc=6U`myF~x%O~j=ig&ljNxedW4xP)E2)f&9s@FaN`Y+D^a zp|ZeVt6kfJ`j0l@1V158L?_zA4}81Cf8v$4jBg{1`IaGubwQE|{`IYJmSmr}f&LBI zfb+r20pIXMxB>M_8)k|7#44VzkL}BjGD~-1)kdq>RV|1Z{`l*5`?Wh#o|GWp`n1K} zsb65pE+pTz`AX8=U$UreW<|2sx+7%; zU46zRE#z_mbQgfmOU6l(J{FJc%wL=ey;{YV@uR2Pv{o6vg1sv-`G!SXaCteUE2sU> zEbBHcA;@{+55M`f6F>X*!HwhAMl+c?p8L^y8)}^`B0cV+U`0x~UY+a$RLF$X~_vnki921n|nVebm+_+m^b^l~)owMCIp zN8?bimwKb+!hj;@SK&ApK7)S-&Q<^usH-JIBZt5{mI1EwT#CVX#(JP?G5> zk^_u5n(Z~gagVn&oG|BQ+of26h~a!=w_HqsGtWu;YPpiEqRI5%4T^oyu(AT;2yG~h-VCv z<-@q37L;k)7hcrq=dPry$2&T9{u(XQ%VqaTT^|w&g=$ZtP-&`OA9Ur!cxq6DIzdX? z(ya|W-T{5OeZy3z@$kGi4KvE5n#dDg_lFHV(E%*`l7@1G(^`LtWu!XTAnFVyJlE6n zqz#ive~YdtyEfP+Nw8H1g4bh zSUl7zS;%cVdwILfJ8NCCBf%O3IFO+Coa^isRZWQ#50U~n=&JB9h|y*DYpgrujn>Nq zikuCI0qUKH_VU%M_G6p}%+j-3s;p|ETFg+DjV>4j7Z8tnqDLjL+9H)l06|HL;$Y&u z=krt4^3K{;ZIbCn0H18Ix4mJH85^K7>AFLzTM5JfIY4c@Lv>B1IZO*`F)Kv$*nW-K zqs4R+n3ip3y3_dyP4j@93gW;jq)Z%*BnvP*hioD|@&pf)b!VbfT(GLz%>w~+96i(4 zip24Tb4=tQJ6C;>EQkur>lw0Y19Vuls6lAR^NIaR)HUgItVc-*%`14L{_c?`4h54K zb9{-tKuj=Qy8&Q$aw66Rq6>}UJ3bu7e7!N|;^g9cuh8 z97&p;;MNq$dwG!`V~{NDkVmw>ZHYH}I@?{=b$61#`cC`QMqoddRBbhsa{y;bXiRGl zK`K4F70Y_v(**bo4q5aTRaI^W10l+dl;xFD=t&%)$fDKs=?-mT+RV-j>cdsW)&4G< zk|edjOsHW`HWvx070DQYb3UQ6gZG7gdBZf-!`oS~43LMXO*C>@$j8p0MX#kupY*nu zoZiu2t4GF0ls|+-He3KfA2mespc3c8J2zAbbbQE}Ry>j(J6ZcJi4#-MKpP`?WoU-M z`Dl!s547;!&IK)wRy>YQpo2B=4|d?ySVNmN@j1O;(ok}yf9r(7M($FClsCGTA;-Yz z>=ed&(nl5+LppZlOB<>L=3_{>u^4a{d1M4Ml1Fy?hFNUbA(by}C=(d`4AYD|nz$i> z;*@I?EVn_jUfOspokr83Sict?B6ZKvK6EECW4(^8RCdTU^>S;l#)UH>AB7;JCghYF zah6OHDs-B7(zhh+dZJ(TL0%^ne(wTOTuOIm*r7(L@8)&-6Mv=dw`g`c#a$+j?NWnr zMZo9UFaS;fMpdu_ZZhsuQ@=|adt%5g%#qHDjr5@vt+ESJ;=WL>0a(>hp%Cjk%#5Ty zCAS!&MToq6w^A{kMg5DOf3PdD`1TEVE~MJKB_PNQ?_jFOy`gIEA^IW+rSNVwa%oqC zm7geY8&KGitO$Gx#gJh^mp`9Lc(r!x(A9X06?NJ?TQc3?ylFjkgDd|_zSVv> zkygxwiw*5n4$fqM_Z%f7OcJb{|A_tM=%dA-`)?X7a7-Tj#1!~5W)|oMN*7azZ1b5o z(6Zp>a2aUIeuun7uM7USs&iWgtz*2V-m7ANeh*#Oc}CC4+nc%Wg2Uw6GcohChTZ|G znT)rS9gTEr133mRg6!!)%t%(4fedil$rz^2fMO5e9Obq_Kn_F@P&G5A)(l@y<`#}k zGF^2%kZxZ5o>T1jmyF5yicWk_Y{IEUX5Q+ydH5h})0=L0=M%4zDkFTNn!K#C!X7OAo z3106HhxTjxacE#syedRfF7Va*Hcs}gcsobhn})@L_LT}AA(D<%s}FR&18^o$v@V)S zCbl)PolNWrC+5V)AKT8vw)w}lZQHi~*yf#c&O3G9z3<&xwX2t^diUN{yQ^0B`aXq| zl^`{SJ9<_%^=-?UmOh~NIj?kEsa^gU`~SrrW3{E**D#ACQn^EK3wN& z-yVqHS}qs%)@6_waFo%bVsRX{t`(d4EvVAx=e08gI1IFLx$(wi^mAdcd8uDDFlbi# zGz_$u&8xAX^TyX4y(v+&+|o;s0))=h7nAz>9z89tbBgj*`s%v>_2a)ayq8+}<+n&?f-tsTqp&~JG1%LA%hJvOs? z#XTF%U;cGPmRK4sRVb?MO22}Ku9_YoKI)TK4e_V^e83WSv77$ZhO7k`wcu!pOn}>` z7td=(Yg58E6_Sb>lkG^0fS&unQuoUT1aN&zNIug>?oGDTeQT8NMpTc&H+&>)%qqLr z8=uJ+{j8$Nuzo!>z&gxkV-Ei0&x0^MDWjt3*n)t4P03xQcKQ(k?PreV-(&y{|F)W~ z_LvI(DuE~LMNX60UZsfN(-U+YwN}X@h;JZI!e_@eZ63=&*0H|UcJy((`NiAN92R)B zkoIbeE@t@<(r&0ivH$lb^jO49(1m~;LiLEST>$A{b1t|Whu9qw7rMhf)une&(;TAV z1DxvNkNl~Pj~AX-wM8LAz4G1X8QhJ*+fdXx@ehcZ%mgP}qq6-DMs6D^9k_X5O$oP@ zDy49Qmn`MCBY;}1wREJYVfBxDAIASGkFc#ir=v52JRq!H?p26zQ z*GV%LZV2?s>b%d}k}lXF_YI_oYQ@`+>(|b;VE)lN0e7a_svx(X4m7Zvw$Wm5J5_6? zCyPCxe6_>3mg(&3>-E{K?{+Lh5mR#Z|+G z@g8&>Vf_;kfd+*Feap;TJr#^4K}O{^@ZV8su~kC?gV~cg!0IlJJJnMO7CUSs#vx-} ziJhMCSAY4cbua!dFze-j)H|Q0mjJ$C(~?32w0Aagb}}NDsaepUDo^739IWYgAOvad z%x`d~StvuL6>t-^{W(R*U7LgqZxE@CfS1W}hIApb#_W!Uq{wj8%OMNfS$aW|eXmqd zH2F8q+Nvs9i;Ig;D4&zQkN{^H< zLIJp~a=`Z{no!P5kLZgc?PJcKwuNwZ3w$pgq;U7w@25cBg&AdwnZZRdQvOoON~>nE zJKO|zv#(M*_8T7H#6yosD|&S=0{)duwWOL~(^_bcDVdH(G=vcCjb6?u4+=D5M_12b z`L;?TRhq1l0WcAJ51R@y7bR zi)_%*8nOBDt}N`>3#Ng7;tFb~8JS;^UhVyUS}8ltu0|bG%0ND8N?TKN+;8p3z{{CJ zil|3?pO?MtRl#huDRAE`9?e-BsO|Q13LBD@Noov>I}Uoz#|a-xslTm^^G!=N_`MP3 z;d~F{k-Ti$ki$7MuAYiq?Vilro-D6pRG#|Tdxl)`l-e|9yNqerD*4vAfeWRS+U>3JmF)6BD)T7p40@} z9|hQM6fC`PoUj$66}|_JgGzQYnHH~Iu%|1OM*NgR0-Q5*#~QnoJ6g9`4ql&aY`IF8 zTHeh0V`<;Wd;DuGGsy)YqmdC&)6pZvcM{&9o)R>xt9yPwhDLuA2tqm1j}=zyE+%>n zhUJ;nTYqhIarav6*HNjI-Xb0D9k2(6IjiCZ6`-DY$i9(Zws|MeFM3!K(#l64XQX}L zK4xai5<>=ym$PIV76VIwq2o6wx88?n(L|dhbkCO~9FB_eI_flq@mm5#oF{(qR&k8id zDItfXz$k3HGAo|G|IQ?1#ost~0V@7#5bOA*zQY1=x@dV^)684`b@=V)?q5xTr?ZNFSmqLK&o>>JI3oRFI6XX&x4wAZWRi{ z%OQ?$Mg^Pnu#zTd!i9B(i_5o3?b)0O1}RzEn#b{!$hdHlQw)XrMDjs%t-ghq&8nH} zoI~K7^19xhEwZGup^@?XHG$f9ghGP%|uQIZ{8gku$I$}?QQ13EA%)n37nR{ zhF~uXI!F$za_k-Ea8koHVpwR^>MPPCmeEVgqc{btV-Z8(S2Gz<5BLY~;QYB7TlU`+ zzi;z8?Y)kj1s^K^q-eM+AU$?C!z+>A(J6IOL(c`<4290q`4#rrGv01IpYch=GB|7; zEd*!S+{uB?uA#eZb>-!jpde4SHoqGt-WtG!THKK|b+cki{Ca;E{kWjt-+}vVix-4H z9gCjb$juG^iD6qPLMD>HeOiFxPlcj~^-iPGBWqL$=ciHj(IU8#I5npaSwlWzEnnvh zQee+HBDWMw^C-xn6hUoijD>;ZAyS*k@0TuXk?pr^sF1yEf%PZCE8b82I%YtTUf@BV z7LXW4-`X0>Y*?m{!q+^yB5R$uuexeM27Xz5eC)S;peUF7>GFM{#u0)o!vVczD47og z=Q+(<8$)C#Yy*b6B)@(H#OTRO-{tmM9Qnn-w-6T~^wUHA($9W_$vfk31Y{NXOy5st zf$e0Tx$+c27G($|gMsa+>~?R^fAd;CBN7@|fz`skGP)xY$<@uyoXIk25IQOhRO=n7 z3PRg)_>$LM?I-&cnS{Zq`lm#r(slIym&fpMn>Kuyjta|joz4N4%?N#YGrVf~&%99W zX_WIg`NdLpX;s{Y^G}3+?oU(t;S*imd$aSYmukh z%{}}9#!02KSRKWb);bQMWlO(a_7+??C4U}lb*lo0`2{)2Rjghf-x{S6Vht;84#T9a zbB;lc#VusFmBfM^onB^#EL`Y#O+LnA%|v zo-^FiHC4GgF*0y%Pdqi;4Yc^V!_gIUNF;>yavxalwz3 zpXi>q@fy^XWL=ABPP~L*tJ>b-@0PYzb%`x-pr(mEFmZoy8V77j)G_Z?afqQVFG5Pv z>eq1E{Nbvkm38AA;i}zfm6;ZC9wo3%(ap<^ zQ0!OPZc4$IHSbkU{OxN%^IoQPPjUhOqOSJNlTHAUm8^57^!vp%@#WNe!xu+TUl7B! z7dX@Gnd>an3LV3iedV5Ui~rWK-&q0-`r;*xeZ1}+xvA7je$BpWG;d1L6(TMN-`yyv z1zN2WA=`0Ag}*2T6udkTyldtlf|O(fxAm5wJ-4>W*qKGziK@TyJg%I=CK-0_qx(Ui z^zBJ8IAz*J2UBkT+NYWE<<~miMW~Mu-uk-uONUN^F{oW0Ym{CFr6%@CBBA^$-CE{<|nV0j2- zo_{1F*Z?Wa&3KM1U;dx*tjOQc07aES-A;2wisUkr>m#Xx= z=A&&MSvwi;$JMyr%T-pPH_Fri+WNNOkwS z7T9{5D3e2w_CKgUvNeswEr^(#Osk{SM6C>CRFWAs`){(pM( zkI=R@oYO6`O_@0il@-Y5FoHI39{&-0Q%+HU+fejwBgJk0Tvu)+`a0)RMK$ z2g_Oz_eVz9=gK_$h@Ptc(b<{zbUAH(O?@YFLichew6&=7aLxKA%3sVFvSQ>UWIWU- zsuJg60+Y=f!l6V}J0kE$eV*Sf<7d_f38P^PofBbfX1mg6B?D)vUaT>p6qqfN6`nSS z7C9A+0@eCpEEgy9-s1Z42I!KkL?d41L}}#uWB;9kLR6M5nH$>=i!fs4cux1T89ik= zwZbHsdHI||Y+C)^WM+uBq2ITEA==@lN5=lCzE3DNy_V`vJ8oqtnz4nG_@ngNJ?O;L ziCX48!H1E(y9LQ`VVm!F@-}%NOom-GNNp+>1HNq^o?i#U&t2A*b)0KS27oPsj}VvI zOjR&Kx(HuWCEKhPAPnmuR5pZprNmsBi!AlaLF(^at|T!63`@_|{5~1JtNGr1O`Wm9 z9gLkXylDLvwJ@_G8<4@i9M?MI5yq2x*#~zGv9jrQ#VD$6MNj1!`rTA$ccvguva~8D zvK?BmzK74uUTB&-*(Qp$G*+g!ZpS^sCyIq%iKGr;kbu5_V6e1-0Vx!~wCOkNPgsZ` zlc)20BU&kynb%>w&|o^^-XG1I4V7H{4jrYKzP;a;!xk4lVqR##s36aL6nVaqR~90gAelXriT7ziIOAn02|+PO-v0GIxtJjM6;4Cw?Tv3x3R1>b0k? zq}@0ziLZGgA3}Yka3=rI;Wp{@A2AFWC;+tAS+QEoIgd{$+0;dSmgM7E_a95p8k9Eb znjnnTgrQn>_u;l!fFcFAZFbfB2t{#r5sY;{lsuiFrBGxtkcGh@~2O^E5MEtP}rT$4VB4s*$zCOB+77!sseS!*GxrDT0TI^rtY9<*b zW93<+@<-SR$LXfXs9~j8qeBiz(Spg%PnB~q*3=j4M>Fu^Zzca*MR=&NSzC^tQbPrb z6z5NcN&q@>LKsk!S~Z%IV^hRD!b7i<&y8JI>`0I~2a-gB?M~8NwZF3}JY`h8(+^LP z1}dMKO{hXYAv!py*lMj@3ex+r4{MbCcNaco(ZNRT$<+mOOYoIh^ z+0ArZqiSS2UV{~=R$kkSo(1Af4KOJeTz>F|m%?wjsHADZ{d;vx>HbDk4)+T$_GLT9 zbM_zT=`cBeSU%QAlooz`wNxOfFlo%8uSNz01~n1aw;-nFaZ2Y8joh_z z`s2_=F85q*?H}aHE(re5Z#l0nz zWZLif6r$b^PRG@Fws8=O9pEuG&3P-H*1ImGgMYV-IvTl#1bSSCiPGaPY~Kk8k_z?w z9X5m54OOqy#Wy+F#O00Z4jH3zSl_V*ALR^dnniSIXZch(^W_5=If;q_dPA|pvq|K- z8*r!C_^nB@?+&m>bqgP2#7ZHljwM~wZtA+vm&xYbY*jLYb1uu4U6T*r_`-$Xyj@h8XsE&QgOq2liao$Tw_5%Y| zx!<^Crs^V>y{msIHAmN=eH<$xPkf*mhk^h=!iAXne!>!n@@~mkpWut8%altl?npf&W^zP-q=1M@;Usl|0PZPCA-7b(*2e{ zTL+6GCMoF5J{dwDk9q80`PCg!6K%NHxR3+up(~i+cSK%H%ZQM2T@9j}OHNX{+hU(C z=GzM0Y@hPEdB#z(fATj^=dpXZv(3eQsm4|X*(ti-eiX;$)6f7cE%XbMLo5Gwj0=E& z73t^22|+S4$h~T9LV3zTJ_aqti5BHib5LlZoa$Bc%TY-Ay1z#0d{cH*?tHQG@E!8% z;$swkOIt(x`e%r+d9(i{sx<;S7iz9{u~i<*CU}6v+VDxTlED}AZL?BuOz>XTC34|r z*af^M9tN81Uu+RO>@uM0s00&!E>@e1QcOWR+Woz(&ax@JqzPHdFZb`fa50cUV&_$U z2jVIWo~gtxhS2=HzrtKqA{?8sEp*!-wJn3&^8k)&P1i1PkdNxA^Xx+%9)NnlY_nU~ z`k397;aTi8&=}XCExcLT;^NQBkPJ|=d&o<+3}1119^YmhB!I>cru0&rS2HqM2ep}Y zvnqhbw>G(1UOC#NW}8$hmC3@3HkNc%U#C}|!K#pP>dXoKbkN=&0@=a5$21rQFD+1ub5XtTZo zRvjZ!`9?DPo^gWu#h0v`(TNdFHc@B>-KD+LtWI|dMuF>QHFv`A#nDN~kmw?+Pq1$9 z4{9(KwdP2vvj`|s>AFPd%<1f1EuIn{@Fk|pvY{^^C? z#AsT;8u4|$!No(L!o#W;qlwDriXcs0nr-u#1sK-ZB~iVS)$)VzN7lH9zQscCc2g=+ z*0Md7#Uvlz{zbg_#c%ONW+1z!Vx4?mrs&%vpPD~js`7;Bd0nu!CSj;1@ z;^s_WrHzAp!hH;+GOhEhNBi37VRfGOd(P*ocHTCsw| z#QAJFf_d`c zXIPmf;w?^r=^f;qbF*IEqLzkCJYc)C*I0fH0ZfUWlu2!E%X z^jS^Xh%J=Z1Dj)PO7L<@x!|)BiAUQor7Vr)a87 z0^;UrreKiZ+>-$l2xytft?^mTXU^)nB7D}qr^TMl=>D~H8GDj+RCgNV@q2Fdv3M4h z*XQ>|SG^UpYua|W104VrXs#pCM13pRDP8!e#LitpEA7{C&z!-v%gpR0@!u55-(oY7 zKlBOBa0>U{^COeX1Pe|5D0}r2;qNZ>-im+pp->n2seAM3zZsL7 zJL$cc4O&@$LC|xj#dJ&=dFhQ8h_9K$PnqoAdD=#-WdDR|*qq?vqiNB0cN*F3f-;uI zQ)#)h@a-_M9(l7Q2PbTR{3ej?mlh!|(=c_7+qlNUUPd5R1d_V)f#6uQh{?!LQrTZ* zCaly}%hSQ&LQ7`&8EneG1&&`U#rX<)XBKCR)N zL|1Saf0885DP7_t&A_z{!v2RsbyK|5J$`B1et{K)8!q4XlNL0#e>0-yp(`lC`p z7bO=F)xMSV`7rtn?Py zJtwf9VP#IfoCaA*PjsWlskYa+OhoT%c05B9$p7g!US?9&n(1^CVRWX)FJ*07nL*;0 zV;)q(cBku9Dn&eYixk4*5wZQ1Qj904J|!flt~}EMxAiw{@r(SSYseO5k~>md3orQ1 zOBz4SSP{jhnmIV!$2M4E2=`I~mLo1Moti}`dCfvYr!)J4lr5>s`d1UW`xIA-BEkrq zTOuIuWL7nMmK6h~Z3}$W`y_Xlpb0h4U)(MVUEt$MU3A`YpyUL}oDVl6aq2t4* z<|wyfhNr3ET8X}$A|jycATMLT6rr`Jxn#4HZQYcu3!d}Wsa&~=h8=G$CV>kd-f!cb z{a|s6oRvJayp`5me$0BF%dMRk_NTvkx&j*VrAI10O8hpnM6J(mR&C-It#e~gVilD<$Bn&s;sSo zJ^g1!ajVbbgo@a)GlLbX$ivW!fA_ZsW6JayeF<}0Fgkd7a0 zYOjtjhadhR$faOPzNsI+UZ6{!O^arge>MmfGn}ZX(9h4?3P*f!*r0uM@*juiXu@~n zNBWT=IGes_L*mwTxy6cP`R_M`1a~1qqau9nfgXl1MSF(5%ZLxO>QPn61)DShxW~!P zcx}d;coiNk5m+ zFy&5EgXgaNv93Xi^vazMiY%}JbQ3Bwa#&2U#|DMisDsHv@FxggozPnqjwTcXINQe45D@ zbN6$xYB_naq|tOugeT+xW3iJP5e|N+)IIq`G>sZ-iZily`!Ke4ts^7yPvU}{B2kNt zwiWK)9jp_!N;w>%I?8AN!Y_b6T1sg3dxxN7p>Oq7lJ~5-Y1cKsh=7^Jv5d+7Ahwye zchPX@g?p<*t1FWqlA?9Az36fc@zz`iP2YmzdB|xBIx3LnzCaYLwBp%G(1BX*GXW6D8C2XXU+ZPxDF`I0 zKk&M#rgtX+X5a;K%(`$)z5-VgKVjN+}w^sr6ELH(tXm$Ms;2 zU$6_)#`V0;*xOySUS^bi6T<0#1Q;}BWJ=TU+igNrz&tU~=Qc=ieVcax@^8$V$s>Hf zQ1A7Vzc-kitYNiYw7Cm`<%#J{xVQ;|gya;a)rJI%bf1~;uZMtuI{I6gRjQ0wu_XLn zw=|~_AR5JN-qnVCX&rojgbN`T31jxfu@)@*w@bwS8$U};C8fto`5nJE2*JCy5wWb6 z>bU86gy@X%>w!#5XuMVO~Bu?V555Z|+vV{P*wh<(1+ z2n5bA6uxYNC1OeSvaY`?A`Kf&K!b82{s~K&2g^R7Fp=vUk7JPSgHGsJkx#NdPG3yA zgzLgq2nDm26zX667JXrxhGR3>Vw7LE^qD#==CJMmm^rD`N0~9EtWg@ zISXy9hrcuOMLh`KX?ZD+7?~G^2&6ENG^vu#pz*uT-i#QQ;dNmWP8IRD&tA$@Ko4MK z+wKq7qSad_)`!g>9-F>y6%456#%ngqK%@j*zRt_ngbK0WWi``T7GXJr;`bn;0Fm+E9i83q$(O8-0*Dn}UN$2CCQ*ExW>J1R}RR}YN&H9=Q_Pb4=k-~UFdDdwyIFAF;|512%2nCpJ81s)ORl<$Ncw)+W&~$mIY00jLbw--{_9T zX1Pg*8|HOL)?%XN4o8cD*$QPD^BPta$X$BbvzdFFCirCe+o#CfGif{@qv$1R+m^c2 zMT<%84kt{Q1rlCYKvZ{j^C_C8WoRY*63l2K;(FPY&;C51BJ^Jf5%7K$YB|g=7JRpF z+5g-}<_AOeet#Aj-t7w*7JNYQ+E)|!2t&Vq8d<{c?FjYHW-^2PD!EF|Lt*243*r1% zuvk7m8~?hn`~-dEaxQBQM4$?EDBtaFrXy3a5&5Xx?V{=kc7=Sxf5r$zG|3YZPi1;t z=f(sNzxfRQAoB=pUaZ{?G5+wIACAlZd@e>B<&`7wJ&yIljy8rGc9odgd%kX8Zn=!@ zND#Nu{1M{*qcQ)5rSye_U2x0XA4CjOU3YoCRf!n>IRN+7jIPGrBVhl2KWtE4L?Mf8 z;gOMTIlnda#rx~ZlG-f8)X!&;jkFgU(KQqCW8f|P4eFBB3+ndW#fSHNx84hj^5q9G0ROrD zxqV0cGvXb4ck-Rl2OHyg^WDY`zB^>y>=r-2J--GrpIm-51yT-N1 z+r4R3+|E{SMpNxso>|Dp9#?i)nK4|*O7fZW5?@o?`}DgYhVxnPDY93aC+^~`#QLHb zAN(&LqN_V@P{b6`nf&MZ*WsD3Y?qaXEX+!;6ElJ1BHV|H?)sO1Q9HXT={8_?f22_q zZL?UgRnfOy-j~!&NMc>yDV?Oy7WtHpioJ96eM!kzYX*3}_{YpcVjfM`^5j!d3$OHK zn%$%C4dfhA#Bj?bIOolw`o`fctE*?*u_F7*Rej}1eD0{~*<@ubGFyh$N%13SCZD5| zVEKc^2*U$IL$*x8wh z|9de1bN`q8e?9*l`F~&kBkTWP{_m0h$p5eC|C#e2_W$_)NBiIB_kZ*M*F643|BpxI z`ggzoTn#=xCUJ8sCx8Q!xRrquKonqPYYbqL0oa&2nGv&aaC371KL!gi+fOb|4gmo~ zxc^>Vx6FeqSnb5c&#!6sBiu)TD2mfhC@MQ4Q6XgqgI^>IGQUdwW`1F@C{kgwzD>-W|)$a4vF z4&s}hD91644BE?M9d3!-fY-o=8V&&UaGJdb4iwn$yJ}D1bmLv-#p4bxZu-^{nN$Fy zSmtNrx$CV2b??`_h8RCGSsYE=9~N-;-wuBm(bHts?g%K;fs&vr2y~`a3UnhMD$1$p z7(W2+GPt)quv684Lwr?xY)5>FH$W;Asm>2;yilJAPtcJMlVugOG(Ja57qXF?HBq%A zldR#@RhJf5JTSX*1I?4v87doBwSGa=Ot+@!rdZR|Z&i2K=8=_j0D66AXO+o{WZyb1 zoOS9;)AFveiZ8ZKM8~s?JzBBdp{Rg@~M> zA#ZNhlf!ot6BD1K#v@~6Tv-c`ge1#}X+8M+n`O4mf)gMs5hvb+>=VFh<9kL_?t2sF zAE<3%Svo_ne$y+mhZvLclG2eZZoeB;IPe}11sIdRDZK)*NM$|w`n>AUUCV*K;0w!cNc)rvfo34}SlD3yw=ZFhKil`o2r0B{r_>D?GTadzCcs1f>4h(U zI^&OjJxDIJG3`Tia?b4y4;S|`p)42vk5<@Uhb}h@ooWWop+e~_b)ElcEd@-Kv25!o zI6oviGCH_2+%(XhZ-i#F7Y5bV_I$QS-1@$3s-1jzq9l{H4_vJ89op?l&M`*2S&^o| zyJVS`nUPw?$G@oTzRA{;XW1`{NnrEgX61Hntt*Vz;k*8H8~;^m&b3{RRy2$qUC(X?I z(RAu?-2@;&!oM>&G!x@ulC5o}HJgzM`waV;oo? z4S4i%fR2%J0erx^%mTmBamm7i_4t-1P|RapfcCoX46v2QcjB?m??h-f(n_(c&=qoo zahLLd9Is9C)>KZfR%f*iiN=03(85DKEL?H#=s|KC`H#HJle9vysrA^Cg+uBq=y{Ki zYKUkE2ixe-x}=4337hfMT`4*&im)Z2?~Bg7UmhaCn|??ks3PA1e!va;?oF0SP|?)c z1;g8+&?;iLZgVm7ieW$eW+7t9CV;x{gEx0$2oJhNeK;=JH9^AF^4WwNikvh?mD3V` z>6B`oc3oosqIe3IhA@(A>n-C+0Cd4Aq9l1%^yX}SMQSZuzY-AF?9dEv`H)fRIi+3? zz_S%pITA(||8ahuso>+9#O2pHI|+6L`#~H^BYxBP?jNwodYhTR1w`ydkBzs`Fy{`(BrY-j_%` z`RW3a$`)8Pm#(|{Xz}P*d$-A#24D8sp#c1w{C*hE#Dj1}NU4L&doY(fm?h(I=0Q^Y zuNl4IDCm>`Okx~jL7O-OEZkra^OkgWuAG8LewJ_XvrK|hgM2rm`F4}mdY=Lm=MX$o z{6t*G?qP8W$x6&j+767>p!pc_sa^P&VpzJZH;fUZRl!^MdrK;O_pj#UY0QoXbcq1 zujbFtX=m~e;EGk~8S0>026DNE-aRvJI{Jw7JxTLa?jP3p&YbZ3jZ}xPji##IB*xSR z!}!h;?a}{j9T<{cz%@=S$mgC#+UJj?UT?kqkt%#~@Ht6mt5RJ0N@KaKkvo*EPDSf9 z?cVUgT0@(V3N5erv0;ex7;7pkZcW`6^_cXQvtH@>xb|>yn=vcLE4(P3WW8g)H}WJp zue*qshNL!9?7NyXfJbhKPw2oSt^)s z!L~nh##=tkiX+y2>cCgJ3Ljt%ntX%bPn_fy z=?-hfCSN~vc8BWY-_YVY9{$2;?|2o9JVTt@oNX`luJcxMp62URZvuTd0ME$Y$i}6` zJ`D*77*sOtr;;vJ0^NLe_s;$6>)gKd&*{=UGMvJtMq`vHs}X#wZ?$hAE~vRlc*U@A3i7Jrb+ctWMLont`6v z^W5=lSz9S3tidCeacUh8b?0{-w`C~+rGGGe)(_cYd~XpfE%H@Dkc1n&owth9-|{9p z0lw6_kHV3~Arz!vFS-cH-X*U>kx%$e({HA=O~%LB4`Pv58V!2Xn%&)QqD{RX8ePVl zcLG`s!&122)!4JTJCPQVDvFdZ2AhNr;|#MVUt$!?{mxy zKqjQena7C$@w#i;@U)v6rw-{O){T#4j&Y`%=LXZ8NBen#?SZ%IxPwg6!;1U|O}1^C zv;b)DB|M+-9R05k`xa679q$o&w@mR}PK+0&hx}S6%wxISl{7P@TZ<^4X=l-Vy;G=Y z2lORbE%?pWd@lZ5sl6`xcH~3BdnDgri@R?2zMKLd<&;FXvd4{$(7~txIB@i5Ox%s{ zb@==``tOP`hH52X`=Be|t=Eb_YaRZy69boR#LJ0~!XwpCxm%1yVH$3u6?s5ZGAY}un2`-k3S6d%Ug zwb+8C^LOc~bX-QauxvJqZ?=rt&0Deh+pJse<*xjv6_+3M^OrAD9U&|V?ih{zrTEiK z;%Onu?osv}s0CT!_KY_s2e=aFe^=&bDk0{0bG*5E!c%;L9WtX^ke*$~XBE31k&a8e zjCgmf{u#W$&rDA+hrVA562aTL;xe%2#p&pHx#&5giz3B95k;j&fM`19%$5D&8T= zG%9@Uq0W?&0?6SHapTOjCaSZpGDBY8dPUf0KZ(0fw zHR&Uo5BY3HcIH7^n!qoWC&N|0es7Rwy<1?L&Dz1P#5ed0{Rd%I^z&e;s=#vH)#;uX*I~-KXhBjeeIm1X^wF0E{YahF0_4 zF$?)&-ww2XGF3cG{(~GR+Z0W+`7qyL0%z&k|Kh9C>MB<5{xZ7~U|9X?d%idUlwT1K z2y8!`&$SE2KLWjpKA8+NOKV+Qw?E;{yZA&_a#;&xZyWCP%)6HaPF}F*KcP{!2P5jE z>;;;R*+ZdPyqTk%=;wqclqb+U71iUEFS z7jTpDWDm1gw<$zZjp{7BlTCTcHtf6OqPOT=o@WdvTnn2x)^F);M*#Jg^)wGHH{M>$@y_V*;)zG!PQtgYlJm^4e{6?plkXgk1#;<+8g z!w(U0L;Kj|Rn2TK=dKwXW-8_Uso-d8tEJgL;0Wwa;Z<%bblVzDHqyu({6~Mo9}zRl z@7ZuVtgWH=ehz`f7YEWQNc7#9JGNTNkP?N0uI%h=tI zF&!YjzWcKH+;xe!={EI17;}mBnnganPv%BlwYSaF743Yz`?a$yaaZC=cGe&5f^ioK z%(%W%+^7BgXa@q$(NxD@Yn@yKq}-Ynr>^)c?3(GnXqsN1Y*YHqL|iGx_lqguvAgj- zIFry3KoP6j3&b%=dmiZOLqq+Ik>1m{8gTbEO)v&&6_J3pO{-c|x(pqo0VV*eoh(30 z!*+u-2yn~0B3}`tRx|0v+@6S81py8LIlIUK&#G0y>sfQl7KMpXQSG5!Wqt1F(5jyidL9nL{_ zjQ3>Mk>{Y`Bd^|g`U_!K2nAj(ng+5!a?IYo8x+w{$lWYoh~Aj#j%H8${u=VmlftOE=KTz9J+pK+*x}<1-by7jnFSC5s^VH(Gr!TmWVyhPOgFTf;fP3hmVb zPs#_gJzZzm<@(#2>P6UPemiKb%o^UBZo`J`Nq->+`)oVC2*ccjbW*#TO+!2#vguHw ze{#9jt$@_}|;%vY)It68A>-IsGLEnokpBE7t- zqAHy`-{BQ=p0+4CB}uN;>$ae89PT#D9i8gn$T+6k?W`qYC>l4**}!C|B<{pKdI7aR z1hJDCFtT$8P4OswyiUjX$mbbzTyia!JhRgS<}0X2RBtoKb(Uc;O<&i#7q0D*$xlZo z)N(O+P5pRF?XbF&w8{}Zu4*h=-x&3;m-yC&g#crI7B4}|s(+;>u&W87eX`LuadT+^ z-XXIlC8iFvt{sBlnQ3rlA54Z5or+UX0}xe-tZQ|PW9U!fF3w}jH^aEKgh9YDC(^)* zok?N;{=E+&@bDB+ZL`xUZr4BK0nnbwVd>Vl*Gi$`$ZuIl%G#g60Yrt3a=x!fkTIo9 zOQclCrC6s^vuc@G9=Eq&Uddu{6jBS6Ea+-#A3LRm$CXSaDylUPEYfT(t_zGUzNJ^^ zDIES((|apHQ0pK_i<-3V{N|cw#NeW}$r9YRj{LE*RIL8`JfIoPVh~Q-#DU(IG^xOm zi1H8-h6>PFI?z|%v0FUi8nrf3u85k5!vS8+g^P~ZV4}rQ30tU0Sl*I)Mjn?%lD=>j zla8tI4kpiFSK7W0?ji4Tq|j8ENl6CH?A7tSl_)OQpl2}-Q{zM%YAi+C){g@Yky90!=AXNsexO<)Sl4Teh@)2bN`Q7bZPiR;W#)8>qfXYX(H z#%}^S5gIg_DUJLOBOF@VB1MY_01MpG<2tiqOG=T`4jJ=!`wj>BFSi#{bzZa5E)nHV zpcb7-``=`kr%Q}Y;q(@l_-Ce$7OKi!#|&;OKkaLakwyMOZUVT(Vv3OKxm%M;%~~QN zNh&yAdJMt|(kT38NKZ!SEoFQVyr&e%%jpximkvQmDUgJffV?*+b+&K@xP|p(MUJuZ zzt%t5$HB)oMej$37} z@+zY~33z)2IEs7B|FMTsps%-3d%?77(?j)+nMl-TV@Bxg98y|jxDh%D$D7t^^4S5S}GAYm?y|m4%>%@LF+oodlxKu2e97w@9GK@ z19@?&ooVV|7ioho8H33hY8wYtSxU=0b_*(|7%JL-)09k!bTmzYJ3ma=%M#AC^*hBA zDI=o}ji`#q=%NZaS6=2ap`i76a(i0)`%L{0OD8so%HUJyYf6RrKh487Njfz{U- zNEvpanGUToIY`66@QG1d2+;$$c=5sUyxrS%g zcVyj~;hWM!k0POBf#=36nyIJ~Kq$)e4OD%XXYO|bA(#K9&rEB?z21$r zzkH5}SCqQzJ6AQA%noBoES9Ls&x=j3xd)`_YT_+KDtyz*0Uh9=CF0=sC_2M}2;?Ea zA4qXbmsA9;Qk=NuBeez0!-$7K=icK%B3Ju2n4`IKG6h2*vD)8G`b|tTdF;>9KEb4S z@4#-RBk-%f*;PZ_D!Mu&^PwLOKk15BP0=*g(l;){rc%9^L|#}@b%JtwPHtm{T}-11 z0^G3S!gsuL%bN<+69W#p#Wc59W+`4$OKC+!aW8IvBYpma9vESp3&jl;rNFTqbmII2i!ccq@qh#&kV=(15 z=z(~noG=tjwPYm)BjQHJ;3k~bMBh|zY5Ey+hLdo=s{@G97k>^yK|!~?zmbT%!}8Vu z)q^H#yIx-Yxb-)RiuZIfRQ*b%o+_sSfHxrdi$~P$*usX>=3Sz$9menihf_YDh(QaqxM8|F+HF}=Kor$gs%){?We5;aUm+`Q^vpuFYc_)fGz zpeG$JH$K_vU;$^At2=FLc6HQ1*j}&S7j%7kD=pkCYAFnXf5X3-MsjKeukc+M;Wb3; zBOV4XMb#R~&l*B0w6)txSkJVT*LmnP zv(mtS{4d7tDLB)h+ZT8+u_m_d_|yob~+8rK->Vqy|sgoTi~%iXjMUrd&w zq*Qc_UF7=>CBies+%nw`=CqaRvgLNh*;sny(lRjdLZGmt&fBaT?ZjSNNbuCX;#ep~ zqSv9DQxA`<88LK(*q13g`#8JMR-=e#9NouTJACWaQZdulP3lV9hG0dj87Imntp=;y z+@848nH^Y=vDZhV_#~jeDh{uP%?=dx7-p8t#5ZKoRxR-pv)KmY+I1lv3hCz^HQ!1i zWu$UB8x&}wC1hMH;3^x7myBTYLbpZYY1{vW5~r>u+KOvn>g)@X0x@cQO+}GEO~gdA zzzSGd4sa=6N4$N3y%nC(mlj9SNyprJ>on2K`Jql`T{1Cv!GcO&c{+Ql7b#Ld;>91h znJTJCnx~89TqmxX7veFbsw)Pmuzi7iqBFJOnmw$WJ`OalZur?gndk`FeVSQvoe|`g zf9c|jt)Go8HtlpdJoPG(C$tLMtS^2(Go+PPht&w-WSeiC5Dt(2Qe!i7G-I-IYz1hi z%vS*gQk;abzg0(WCSK&QM^l|-v3qF_G0pJQMv%?$)JJ9~UZk)iQtk=?{)!_4DRSi5GEf1KLAcGv9I`V~Ud^c0TPPiz!S|OmlJ$O6*BG&z z&@2Y(q&md_?A1r4&8iefoXwV~4=K#()J9Ouw#tCR6Kz7+F)8(m*sHXM+-7u&Bh3>T zwLq_lHf?N|RAve6#uTMOU}cI@F)(u?<2O*+OjctAeIla-2x}&*Cf*L1NKvW)qL|4l ziPr<7Q=HlX6tpHu0N7NgOu&8WzUoNvM4KqK*F;7cP}l5;ic|;tFx9CZFhXqt1c=j; zs$ic@NUDU3V3$u|)c}7_NGgr|G=okdE9J5;GOG247Yiz7t>#&poBmkA zpnIvHPCQJYNIyuUlpLW`q92oE6k}MWQ_9W6FnE>-Rx-SF33ieZGcvNpE68Rfq;m(| zF(d9A}7K2(f84Bh#PVAReWbT)dbVR)F{O@#uxlwe377El-N@#Hq9#J}gJ z@U@HCyP2tRuFt|ZyLD)H&k0jrM;NLh#oR45?D3g6QTsL6^H7u5i!yO$uaIL^z+oIN zci;x#_wAD~lFXBm$pTpOP?5NV?iG@DrTj^`tKv2!N0>>4WD=5y$se-!sN=** zgi`)QuTbO9WV?yJGWQ7Mt7Ha=zEbyMCauH?A!PZgyS}3wMkvV_keL3GRCAX!F%3u{iIXLJo0ss zhU_Ekq*AFo60amv%yIc7d@{LMC94EV$(CeZ`G#0iS}9T~@TeG5xN-BOQAq-Fuh1i- zqzFm&L|(~;#3Rt8cd2XA_VgoyB<~~yvJ0te3M~m|%yI1`HnK{oYqIu`Blo0DN%h1n zX=k|cX);S8 zQ~q%_vIof;icA?mgem(tT@p@GrDP3RIYl{1rW7EgD9Mz29Gz@Yl18#5DJ3~2F(oM_ zL6(@Bf|{f~#}qJ5mL!}sDG4iiFO?IkcTbd_q$`{x0Z23jlVXrUlwu$EOp1{jNz@eu zq?y8xTPD#Z!ASw60FvT~Kr$&xDN?6|q9h;@wmbl6N;M8aMolJyLs}vTnVh2lh%_aV zayAiVCyO!#@p__^B0G~vn!}1C&wVD3a=65UI}=&xWR|TO@0RpV8cG~W%8@XpJQG3@ zF}As6g+`HcW{`RopQIY&B2!PYC)1NEkR*}nNDxg3O?IV#Aw3f{CWax0A%>xVv4+Qo zgCT<e|OJfN-y+`%}o&)mzqc>!?&&d(~1J)l@q957u zpF`yj7SR{K2X%{d%lCrl3)cf`5K`Nhz6a;{(-)B+JUxcdi#=z5l{aZXwj7!kCgCEKoL<)h==irJReTdOkBoPQmF=V1AUdH&YHsxOsFj zgMV>9*)uhq-eF7`SxH*jcigx%d=Jau!tU5ZvisuRw=CtmZ_+ozh}$3 zyy~GaOL$R%-ZE&kW^cqIIv(%AEnGQhGq?-=&8u5`HFZJXHQw!0S?usFTUHoB-XUS` zMioGjQVYNSvw?i5iTo(@=T&CHst7+EGncS?KxUhj%NMPL6(TIQdls2Cb8LC!c8a_ne`yyR3SJY- zK=2XicHE{w^IIrR&@UK&sNSH%J**e>53smk(mkdZ?2jLaL4^HCdxU#FxA-p@zTiEO z20@Jdn0rJoc)pPQ5Kw)TdnUKYFF$=j{>8uqdGzz_+1+AzqIQAmL%sL&-{N?pcY*0c zG6rGx!|i$1Ve&%dLHP99-@<$1@IvJM=~p{6T!U;07M}yrHH7no=Y=Hz zMGA@NqqrqngC+oZ4)N*d`$b?2;t}Gu$54k+1zOpkyNBq=(}qw5)*7tSH*H9+37Qtd z?TD%gn+C!Q_b|K%8Axf=^HUe81{|)h+3J9sa0*-_q#Z;d*B7G=#-}AoVXSw4lj;QzjU+;7LJ#$6%R$av~(zAMHd?rXhEG#!QGn&_W>! zNhqs6Q00C{CbXhJVnZfQLU=H6A%G+pxL`m6^jrPHcW&l;{@c;F1LofDC)UNk5>Sm~ zI22ns${2MT1Rggys}G*9;oFk>q`%*p0z+_GGL{>sZIOJ!@K5-=c)sI57G`bHd$U}= zmz=xbkvtq8=F@6*VEst8`A?p=_e%(=r@AsAno~OCTXlEue|NTi>B{`heog80=|r+Y zJSlaaJ8fio`Lt)?U}Sdy^hS9{?Y;wc|9%VCkNiepurRZXpcN#q4994YvTeBA?6P(z z-!Q?*9z3bHS@UfPoHlj&y;|Q}FLo*3f_9DbqWI^pEwOJpR!-3vMTVk4MW}t%(ON^X6ME zkIBbhTky9h0(ZtvLcX3TKA=B^K>RG}rBX{6Ti>_-paqK$<~L#k#uF6JxWBmslRTIw z)=l_4ASZtCTxo;`s7@nVIc?ggg}zZaLN)rg3WE*JO~p# zD?N+15k!0vS$`+kCXW6*5Uy0*8jW4c?iy&xLD?8`Z_vAxdO|Z-M%@IJ(M{c5w5@eb zwn6eifP%6sSkNWO<^0+Xs0&vUx&ykNfQN z5B;9KWa1f!D~y*fsFgQ9iT0NkRB#){ixYDw{;)8=4>)!iE!W>b`^tLqafxxbpb3BN zonY1dKw0<0Dy&VYwxkaBNKgpK7kq$un5CQJjfBXk-thk??6%BmOzl$Y#yb*Pq&H9p zs0Xh6t-w3{7qR!djyq~shsOy$59JI{sxpdLhikqAnNAhfLs^qSW>E3Pa1&&t%p6urLxE{~!jv4>nvvuVQRr`i&n6*o2}|#> zab^mO#u~ZSVuk4i8NZs2_F1t-{HLA8l@tU^t7!b6cvf(5F>&xPQ9yPb1z)?Cuf!6C zSCj51Jz;(A*}s&SjlDd(4dy}!c7^BRo?U3adjZc*R|Q7 ztk<)d)2pKbeYH^$oHO&X6yN$^{ItyoIQ$*83R&w_E;Xd!tSZjNl!uIL*yuTEX#g^= zd1tC94>?)+Ym8<_tNx^JpLW$IZIeKAQyirzSwH!4`U*RDBkJ*}J1NU1tWN3?)6?-> zIg0Mv^~o#&#;tzb0D4}>Ko5u41zSF^_?Aj8kq`d>kOpj1V3M48ZU+?9>nj9gXt*07 z6YH5dBHSI3<3g!NDQj>SdE@A-Z=P+*;9*(Lzek9lKgcLIIAj3~42;VnsKCB~YFyli zL?t{@!MT(v4}iBEvyq)bQh{xj7>IJ8pk&1z6{y{HH=}acL1Y zwd?HN@;zjSUnFMZIdm#R-n`7EjH$G6e9r{<6`PHY&rYqR-NxeHl?CPH?0=0nK|K)V z4A%)OU4;Jsy@@b5nSYeO#|r;+uG%Eo7)Yg zB0A1Pnnr#uN8~K2o1mIv>y`Y1r;T*%)-44g50fxf}W@t)-gP zUSbV+U*T$e(b-3qE5K+}iWw}yaA&xfHjmb}MSzVql=FRG`4hS0jT$h3j66^2y(^u2 z;hq1mbD4(!VV`j0B*Q>)?h_$uhC+!i>=t*~jK>+NMPrRtEOApQdTD>!t+h_iL-n#f zK@@Z2q^hWT5%#jQLXCZWh)YTX5XQw5`io<#`d4@Q*>`H1fHWk3ywux*ZBx~@w>@`d`=0^qNBp6C6BzZsJxX~UUO5aI3u2c$x6B0=!$Bcr6bIK56t$Tt*dSK)iZ6Ad78ykq)pm&=#L3$-h?Mk+O!Ji}t44=bNg!Q@B9 zH&f;<;p#`(@e!lSY*NR?kKHqCRvKllHP5n!^kj`0V7i>A$f4AxKLt7&N#Czkp7|O# z)SbWWzIDxOECgN(!3CB{q~$K51`&|1Mrx&+JKr;Je)3{~5Qqgv5a< z`UYyPtx9QOMTgarD$XZgU8scg8Kfny=Pa8+I)#-=G%9Fnk~SX4#a|H{Yb?D);+{!T^#MqLri3)V>XZRh`15WAEc0-G>cbZX+fk77_qLYyz>M-$jQ9p#i;;i~2v+F%Vgo7>nF_1Hr z;HL)31dJ(2`IUCH-YJu{=C;@%z;)cdy)RC_Bsu}jReAJRClyZG`aJpkj5b5$Wd?N$ zbTUV#*VCn_J{u}FZecpzkwmBIPM7D4BLPwZ0e6;dy=*xebeq&LXiQ@b=klvm!>91M z^I!73?q4cvVFqw&2zl3jhlLzv`iSU@=2`^{3<;MXqO$VM27#i18U`r+-3*Z(p=b_c z6~CGDoCy`c|5OcD(n@vx@T*5n8F@75{p}g{OR~dAfDGeMQYStE}T#^a#of z`qtd}E)bcarP^XfR@F$@1Z5AUATjI!SrSq()HXT@;*6pXBr8w_52BzgC3LRo$+Z19 zOb)-sd8Le(Te)Dk+}>$^Fuau|T%u9m{+tmm^5Bv4nR4zK!{{tiewQ#$> zCz7FoTg@nGx$QBy!i|&JaFaX}G#d0;s?jpx7qdA*yah!ohYMDD%$v!yY4Z4AegOgx z51k3Tw2g#;tkjkW)AF%aK)6t@w4{kL6?H7g`u&V~_5%I5@f12Z!TYE)jDqOGqPRrW z;Hjyyme|N9lFU@<&cbqeibL7^y23{1vv5V*-!3OrR)|LZTQo9)wz_J9B_|Wb=nV2U zOS~5D8812Ol{De4s0`VC_RuHCe?SM0mAdm7Cy!zNKuX{!hB2lyvZo{8+7hUC@qQX- zT?nr@50ZC31$!;!SIqW^h~U>#;P{pzjni;SXZcvZyUvGz4NopEFh*Nx)xa6AICt+S zm2rp!vd24Boi8;ykQo!8$E0-<3vO!B?@|$7iS)sEI}h4Cr&=^A7xwJsJ;`Bt)(*{*oLgqA;JI8ni;P z*hx{pmX~%;FD*_#r64~}bLnIZs5oHU%}LQo#0W@HPn2n}(52b#V|Nk|6IYdd;j&#~ zIzsK_cH^-IVF--y7#nzn?hgMw-ZlnHs>YSLY~%t+CkqgJPsBH}+MJtQKzJb<{Ok94 zH`-mjAV2=VWA!(NDI9hrgV_lf1lrvJ+8mD?qDwgfQoaPCVwZ{m+`KDa$d22>_P;Va zW4|t4$l4c(?0x?%HL&)B5crGx!LY*)0CVn|V82s?k8dr+XjAY13-8?x-VB0tQHAvoz&s+A}p$6u(R?0Dm25n%~A<6J7HF8X1GOh68BScwR!2C6`T^ z$1)B0o{<3;8qyhe;83d5cZ@M_DH8r5()5JTo|NF<-DCxBQZ&J0MIw~Q zTS|IroUU)X*AOw*Mw@)v<|UgLWKw>vcqueI>a#t`l%5>2uJTcH+JQRiuf7LA!rOcP zKFue%ekHI^$n!c|iIDsOv*%Xfdt#E|RTKX{YTw3mxE(<+{6=IB?)~tvHMyn0nKlW? z>7$bt-fw8Y#dUfS0 z4#qB1PK|ImXJ@R^Zf)~^s>;b+B(&IqLrq^+$?PZR;ZQq~tISMmrS3AdDAeux4Z)jj zly4!j)VOTfGSv?Ijj3Dr(hJ5N%9@~CGe9TQ^@b+)+U)^dJ34j+LMx)}C+&1$`!d`s z8$c6c6%5|0L(?xn*?SbQsR?Bxjj92;uF8I2z_|Fjo+`rMSuz=6)$tuEqH)a|wW!EH zF?a@ANjthf0d*iflby{n<6`vBaS-!Mb4hb?Jv(XbIwf5@<{5|b6V=!uVwFcZjGZPw z6?0)pchV0tx7RrZ|du=bd~s@=+E zqXfTsuFcC;aT%pObfgw!QG&}2sF$X|PV9t;0+!ScgQpq zdM3dtr|tf;lwg2(Z-=r{0a5P;A-7mbX7cV-w5*2sZ+!F|NHRu*m?fC-D+=K7*e?AYlO3DgSuS?Nmr_TNlf|ONDu<{2EoWsU+7v6}kC0KJ)<Pvrn!b0;Wu!z5or>nAzxW_%P9&g7yp7_^h?=Rbq7QM=+yj#rkJV?nrzmio*sW1IY z;?7Owbt&J=IUVTDb)s z$h)4y9R1ZqvR_mc@^W0q74lqRdOPtu#BT!Pc|Z1|Xul${>LaR)Dj`v26rCHqws)`d zly;mfNALIaE@Gr>du@_ck6zFFZ`f-1zP3cvcw!1VpXqHpO+9@0g${Uc4RHoT_^D?^ z&^-UNksvAU{Wjr6%Ctq%Q=2jxFR=iv0VOC&_u%by(Ymd13yp}iMZdj0D3Rj*$%FTr zusv%&?jAYwljp}|bc6gl;yiZW1Z-M+IBn?iHY69$`EAW0q{sr%4tgDTZcU15TOFP% zi#2e0`u@83IA``W74^o2|AwoBT&SX)9!jh@uZvMj_#BzJrinJF`ty9MJXI~TI-9Wa zc5~~s*ec$s1|d_7x4LS(_Ov zMQq*0%(V%(uq<;f%tn^b|1~wLlSoIdg;*;s*8RPh@SVs1yLq8nM@{N^cNQ-p?Lhnt z#yD9#!4O~4P=ts2+q-bYa4z{%tqON2uh5&;rcnrRiJ-~6(ufnAd5Yj&xop_yo#}?vMW@4`Guc*Nzb1LNjmnx0_+6m^Cg)cvv zKun)|oOF7g$&(nh9^^XZUS#I2grVzlG`syroyPR%n$PvsZNY3W5p0Vf`kYuo$ zJL*(W(VfKu($<_k#%!#AGOms$tr?mTY>>fF1Hl3=vfu~~*CAXGiW`>#h0u6AWGYue ztn3Em^6O;v#J%<}ratCfwQjQ!v*cuOlbkL{Cb5Ao znFLs<1rjudty7S8yyYbnFl4G_&VAg4j=hR5qEK3Ib#u?}V84UjrYl}xVuRob?Dt_&+E&-LHh;gd}g|5}z6liSjzHP|V6KRQ0^A_Bc-LFx`-{!X}3)mCNPAh8U{ban_BPBdO#ze=HJmMv6fJQCN%2`*fI?@y!Aix_hQySqRC^K(DY2H&u zVD?R!VSMi8E`M3)9JnF8`ik(^IS-tB^b@6ENc@Jtu>;#$p!yM;Le9CJ86s(FVW92! zvR{j@+gACJNWV*SybvDWm+~TKUT!+VQ-uFQYP|iR=FOh{txx4gGLvx70Uo;^z2hf~kB7V9FnupLULD4pUq#?5>6V zEZ*Hz)syE{$4T4R#>rBdF$G0y2oE)u6WV};jW-t{Xw&-W0_0uXO3bhvPy1~QtYcL? zQLv7qWv`e}Peor!D{X$82!%MV?lLhm>sjpVqs8xcv6!aHrN^gO@C4bmVnX>rm*Tp% z?ht;BWbxMpP2JH1>`xK;x+X_%TJJBb72wFm)(cldMUAIB|YS6lO>fyVnQm7-aByHBM|$B<#aqO{MOh^Ux}j){VOZ z-1Ym^yk(Axjf^CePM%s$@&O*4($v;)o6iIiQ>hTs95gHo>YG3>DQHVI3_cq`xm{NB zFDA&G$f;iw`t0z+lmg9N$$Dbuf;^qBcX8UuY>t}+icu@t8U_xFXt#(k-=5OTI-a85 z-d0x*{(7B18h4j2Nbe)Zjio~7!&9KT<8 ze^NSJUiSKIW>*wx7z}HAf4WJ1eX~AKhN@}&X8Z)7_nFcT(+8s380t{rMt}9B)$0Bk^Z1p~GG!Ngck>&W-d&Ue9+Gc&R+7&=>x-vaq!I=OVJbZ6+a7 zTApJTY5a|OR5%mJkqp;~#W6{~YnO7}dbwBoBX?+}5>2w0!{x3l^ON3ZaIj^AXp4c{w-87coDJxNO^ z{@}9?@S~f5{yUuEJ4hlkN$gE#2Bdq8PQf-6zA1QVojxx7R-dR~cH=x6z$l)IZ?6?^ zC7Zxeh36YLqyHBLs=QJNsau+Y&^KEsy*{=R^{qyo{qt33A%)WqL}R(qJu%kBI%^C7ns=Vi0RiK7o@m)LoL&JT@>ox?G6zC0=YRDD|-|y&f6y zGMd@|Bextn)_Pms1e@oqHKM{W={aGBt%rvu|7_45Qm32o9yb6>b<^%r zCINZ7!eIBmomGQMgsiBef0-|Z6_nxn>nOrsFE89ETojEGZ4vL5t_1PknlhjS{b(Z; zsxCvwb6c!r${BRr0UQ0fR8>qb8Bg*kcrN;f8M>Ew!v@;AWw0I2P8OovMw?31-n zSi2PZI)cgp&MH7@!Unz!_3aVtukt;J1nTG`RgU-XEeR#NVrW-kBtOSi?CaLKdYG(3 zpV(c8pf@?Rztd4)HKEEJVr8ggF^Y1~Q3R1sD=I%AqJ0Q+e9Bf#x6D3=Y?6JHqM7IE z>w}nRim?*BbJ5!Qy(4}pjGPs1joW3X>JLmb+k)nzSp+Kp`AMs@s0f|9V^j;6mgA<< zw+gb)yKEM6(?a2k4aUN`&)g!<*~B-8s?`ZvS(#@e+~04f&wC)_AkT3?Wr-(M(2lKL^VurtkN_a<;RCK%8*3<)an#7 zyC7MjS=9X90dnXg<0MWnPq*qlRV-+}nNPoyI(mtB6pe%yP5YL}XK_Re+15xi_D#eR zn$=cYN9;s>waEOG<&6_k6crWfJMjm*QjyvASHN)4Jl0;Z^Ur+BL|_xVa&70aPH=6P zaaJAc!WQ~fT+2}ChNXr@6D?MZC=c&|t`b@YNwr|ah=ZKTsyy=6HS>W z(v@n}e~l+Y6%A9Ca*hdQ4+V=-ESQKN%KfBO@lViK{rta6dEmQ3c#m3s#=uVN?HkhR z(u9(1=Ni}5szSSmhR)PL^V)^e7;%DwIk#c=$aE2 zr^*E?T4=B?E#lFlLpRN}q%=h#{XF~Wy-6}{XTo_wUgp6X!$&+P68vr%u||m|9v$lF zgL(_l#`k`qv$y)ISX7w$hkXUzih`REKaf@7ku8}-5i3Z^c;DTaNDmo{4ejdcXJP-G z=VLHHb)SI*u`C|V+c=kxd5#J>dC3YcCif5-!sg+{&ev`l3~^1gvU41=#fzH|E70v{eO8CEX+(Ob_ z7ok9o?Avngk>(T$lMM%zH zs+3)xn;bLtGui=yND~KWspJ@Q?ZQaa{$f;WJ?6F5R#Cn>Kh<^3$7e^{n8TN09v+gDVHq}yp{^P~K7YWA4>9~NrC|SWQsno|5XV2FSP@$OxeYqn_>T6pZ|jjH3b1{;rPKm`jh*R*)h_5a_z8 zUf0*I&h30eNUBYcIx5qDPae1^EqDqPHE4~8z+|B#nZHBdB8+_dg2iHLb-j1R%5*-! ziOyDJ81wbseGkE(!Fc^>1=}o?u2S9n*mZ7(KBQAx8M8Xu+{Z))OH6z;OnfKInqyAX zukZ{(J)w@L-!B+y4KDG2%o=^6CDpzUZgu#t$_;jm8V1lYDk}678v2a;Kv{&xy@G<( zRlaaJZm}o4uoVbv8uDF1@2ZqTdL2o%aK#rr{;0c^!yR5BAVAH=<_0xJO_pA!&NQVl8ahxgZK z%}Box(@EdFGKAtRbj1BArIkjA+?LoKUMP)aKc}{e7>B+R0V*Qf{4|R!k2H@$85Tw6YcLl_ zZE`6u^m-58M2nlFnBE=(>`@R6-|AN2>zFsNgqhLu-W+ZRE}&O`$n&Gm$a-s&w-{hj zko`5yW3*m>4%hLb;5{xOV|oiiS7Rip_Oct z-*Jwlk2RPr$U88@>0%C-A%=UWuEOU!RSmh&%%X<2HiO@yoGi;|GJRztm6FKf!>>_z z&PJ@tY@$>kqb|k0L%iCROkB`SCuc6~@cbLtJ$r`DqI6OYcU|}>VwQ9BT8;t#<{;82D zq7|W9akDs94h_ee)U~TJ_iuiXOL5I_Lis(ujDODn%}Iyo)Xk1IdZT90ORPL@`0k}+Peifi%j z+n9F3ntitOdo7iqakUm9-htdZi3X9cr#g`G?^hPAh-)%iq7{UAw4dKir}pK(wC_h~%Vsq4l(a z2S3Red?Te^*)zzc_0$W#g%B7}zDs^SXgxQ*s4O5-SUsRS>yAfQ;!}K|(O-!?;YQkR z3Im8%3`VLse3-teD)63pP$wK5qv^Z@pJ$twF!(Usawu=zeg<{3# zyXL|Ct+Ek@((Wtpw})X6Dr0kXR}6p7_2N$6>&oh3DUNT2S8IerZWkBbT`+n6kc zQQkFl)2ncZJfj!&E)2ev$;QMg4c-|j;eIGUiEKYaMx9RKHftO(f5P@U`twYOHQ=s8YPGk*^Az)o zo@>9+<6P>6A{`VBtj}47IU$jLsONM#fqV6d)YLUYwnnkPp4D!r- zNMgsFAF=b#bWj3!88JupA3@g^l*c>w$Wa74i(;`u%Hhuz`KbyVM?0?7L)OhKjs#!=sWO(zGR+>ZgQ>P*F)Rnuhs+(eeZUv?z#2P3srWWyN*;? zYyael+jRNQ0Q9cfTIG^5=;nvtoEa=nnU-4ooe3rFluUbSvIm+Jvc@~Q<0l&UOsl(x znszgPyXT;@c`QHFLm=RnB0cPb_+a9DBgji))D;LExR}++%%C*wj4zmQ`#gtShrMpLq_}69U=;QY2 z^4lNkt9?(q^e#Dmq&e)_(nzhTtJ%FHj(wV&PT$##$Hv;coO(CN>T*|q*J|&l%tU)A zu#J_6RE4)1c|hk<-w`R{&e_B` zfw7jwj(mzuy^|0>{<24y8)cpuTp0faj5p4rG1vQa)Ack~E80(feX<+OR>>0oB!inY zc1_An$!AsX@DK3~lj_SwL&87&(r!@WOw~FZPMuwwSR38J*8=sfcra?zvvyLMNeieGr?E}eS`B7eE%UVAn5G)Z;0Y5Zi4 z)W`Z#KK6Tc!CN)l;}&zkegd%OQfTo@ue<_v-z|IPO#V7RriL+^?zGYf*vu20*{Jc5 zmhxV;??H&dm57mqOT9kxn6mOILpC_=eCLjCz0(;p4n63nD!l?vDm*8twE@Yy=ci>cOO2-tR~Hx)OlNHpY3pMqm9bDQ3OtmM zR+GsnrYcn`YehN9px=qJ#;^iDiQQ~-I~Bc4cQH-tiUg!~@N@jG{vz&6wyzD3aQ{#* zL#qHJkq1iSe!Im5hZMIUU!-680q<228|}|j&4*Q`P&Qqj8A4xM$1d8%DSTtI@xgg_ z$6BFdUu+w*hOS3>;JLe8=aCm+ew{+{9Dlaru8*8&=HqI9?jn43Uj31;ZNzn5?J2OI zTXc7|I0fDQ)#)yK4Vk+^tP6C{FY@4YA^RtVB)p5%mWs&x7&JZFT3(0>g~SQ? zA{D^X$+#H)oUOif(Q@G8yGeX>2;CLmyi9C=>|Cw$v+c%m@Lh0onBVC!mtEIn^Ute; zIg)9jjzP+shg0XqVcU%Zt;E*i{1^KcYSX@75xU(*OF!ku&jRTu(kTDq;I$AV9Kx>e z7P)_fXk`)tNT*n^LK|f->e-K*WWa~c%^-}c z`u)J`G{;rDKj-}5F^)Q{=ms!{QgB zS^$Rr;e)?gz_WFt7Yq5HF5n9I=XOO;0E<*-x}@x>2lB>yB))R(SB?6rm^1&^>GEog z5FpO1YjioTkrDqMd-Gix^ONrqm0fC1TkO4xxPgC5r{8lO$=kxGK4o#Y|J;J(!f$8x z{Fz?wnI_71o&R&Y>MKuJKw{ylKIy$6$W@=#uJU5uZeJ(jYeqCLOp)tKZ_ey|-EUqBw8>ZD^sQ$#VG-sz9PXK>M+4JV3k#u?N{ytH+(Ot%KU zHcm@p~`lk4IkbQ@PnllOnlD z!s|I=f^<*dd`)&9Gk6`o1=a??T^nBVzVlSXBtQ9>i=MW`VG-TfzqBX2U!SsBk3mKa zxi7kxe8u-)S^^47SDtwnV7b5HE=$`qORD$p9>yA}iFb{ni2(swyuG7-;aKDD%TJ~Z z#$UNbZHiR4aaiO9%uN!PLH7IyX7z%Q-zVM3wF++J$9_xi;vP6nv#^?on@zO)z3*B- zh8@{Be}fnk3s;4Zzu?1Sd+I@E-)05ZlRILGi{Rax_ifWmm!dSA z)jZh6KC~}new4^#*r73cQxwz?_?rG(s7MAD1H;ebNY_MCJmaJc%A`AOhPMMZw32uX zG2;d-(h4zXU-|*jK&nA!it1_*$SBJ%zFL%E<~Pt5*oqedRGR)Udf<>E(_;iE6LfeV z6<)0Em`G(@gcK7*iLnFb09zdtL2!4SK@p9;S?vN|GA;|hxw5R3td^|VR8xi?vyU<2 z8vT=`Pp)6PU*YE)OY~}GGcnd>85aR$p#m^53vT}8xXcSj`vgVKMWyp(ROx|pNf($^ z>b}sm=HKl1fE|<-_6&8Y5PqGobL!ihF-N#)E>g%zg6QL*!dQHhz8LVy$Ns8$N7^bF3tV^{a<9*tWAk7#X6C7iZZh_wGg2$J$n=lvxuz52@PCI z*u?Mz`9$#q$uZ#AxmaFS1h*~K&$vaiQWj&CT1=YRHt52Lb}NwOMZ68hKK$yBPHSKF zpG}^9_cijBm}27)fYN#yD^)gpVe0e*#<90qrE^GwT)R@cRJ+zXqW^(JHMxNM7X;S` z`m2u%#LOo$zMs&(RmR*7DZGu}z$1fC*ja-?nb7j%3n|#opr1vj{2W`OSk>VX004BmWxxdc9Kr38afp$_ocKoV^>M~ zv79*TLiq%oh4QxPtuJ4%gNxyC?rrWB4NQJkJrdu)`bFkFBX`yIG&Got-s^pIyVwiP9az8olOwK8*Dy!-S+h-rq z%6SSW1i`)Gy<1+VZUzi9&fJ45CUmUXQ%tQ62HFi`7=Pm-T2GdkV@?f3`nXwsym_NUU2dt1fhCu3&g5?8 zdh80zrvCy?K(W6t!CrtDXU-VRu;6fMQ5w;lG4@Jf16>SjkrEU-OAkg2ak+(*WuT(D zg*Hc(gFokJl1;YB80R+m=(1o!dz-kPZkxu^)SyIr{@gsD2A-wVC>toXb=^*dY#}Sg z5+)kXLOsh6hcg$`6jY-n(I`gR-B4n$bk22FInjq)C6&}Svb8q8Mdi+-(&|cWw569! zIV;BbBir+2$gSj}0~RFN^fA@qRAnqE>}Zce$9eWq{BIWPJ^?&a*R!DT3O{v+f`Im z7!--3DLKi5apB2nbczk7jV`gFyt;C~1;D;xN@bHGl-!z}s>T7BSGnJg3%i!8P%0%- z#7+^?RE80gCU4vC0$8o(s#<~;pWQ4#OEc+d0%kWWoMzF}6iib&&84N$2S+G!0ooVW z+I;&Q8k?)D7BtjU(aeHy97p^UZs8mYZpAsaQ7C>vZnJZ4jyuqqL+M$Rp2g{YlmN6Mb`|=pt`J4Wy8}=|fYg8z1zH*MMJzNM#|8&bE58~e?$uT9 zswAqka$%L`%N92jI!EEKvrc0?Rg_ZI5avwNCMM1UbKH8O0BjqSS8|FCqgYuL8_7Qi zTRz)~S+i^GXl)f{m!qHaLTa{g%3NFz)%dv@%r=<`)Gb3?V6fR8Fa#CiAB6*l(0nw; z`BznOB0%#q~8c%J(zxCKu5`Bi$lr}?|^GaM+sL`4xf6R6V z$BnPWm7mK8Vv2L5Uf4tnM4;4hh*SNk=Rs(9L)?^R?|shY4xdl*U?<%f$T7jT9~~R2 z8agMqrzIs1Hg!%7){+|)PXWSVBMwU6ox}{&uKPb|T+~{70ChtV4Z(S3AOms6nho z>_N03K1P@zKv|2j*P-kicolfP7IUse{af)$tyqKC8oDa0QF0YZy6{?uz-KeO4n@=; z49d!;IoTD>$~8^d-q`G^$_f;}T5H7BD0DRnU5yVFb$H!_cteY&;B_WKf!Fcs)k0^< zE6Y*KRj6esUh5Ewc6B4ZMHrwMudi$AN0lqFsjIc?OzjGERhFWLWm-FyVcJljpg z7A4irol`}OoNybnFcZ?FvT;)F%4jHGWBP2 zFxi=6nffusFhw^-4@a^w^<|1;ie!pl3TNuW)SIanQy7z#$-)%M6v7nD6vPzBWM&Fr zGBNox`7s%pR3-zH!X%hLBjIK8FtstYGM!=ih3RLe(@Z}x{mAqK(WX8Mxp3#QMR{>Ahe)2B?wn{1^fA*>rjM9DWcq;VpG@yF zy~p$qrgxd%Vfs7M+e~jU{f+5OrZ<>gXL^n42-B-fuQ0vL^b*sHOfN7!&vcmS5Yux^ z&oVv3)WY;M(^E`OG96?(!1M&uex_!o$C(~udX#A&(_W?~rbZ?=(<4oNbCDiqdWh*k zroS>h!1NcU`20@{qPM-N3Y!>3XIuOq-cDF*Pu4WZJ;Ap6QQF>zJ-%TH6$riF7T;)-**D zt!7%qbPdx=rmLA&FkQv8oasuYWla$ik(M$oVOq>o&-4eTE12q-E@!%oX%W+b@c}#Pe<}l4>n#EMx6kd%~!!(m=2Gd1M)0w6*RWnsFRWe=3G?l4>X-ZR{ z*+}I~WlW_^B~88Yb|#9MCNoWH>J^7Hu_-JGsfcL;(|D#rrg2OKO!-WCOu0=~T=hi` zQ#O-J+k=TLrm;*HFpXgv%`}QBlW8PV2GawQykL(CMQ#WCI^$9DVC`pQw&oylZ~k_QxsDqQv_2uQy-?@Oud-Gn5;|| zrckC3reLNZra&e$ZF`$V0F#NypUIEO$fPnEm=q?#1THT!!GmZ+v?9(RenI?m>M*<{48n`e zftRodyqH4@ilb&mDJ79LkqSh*BU>U3HIa3Zr2etUQ8@)eBLvmdD+2Yn!&|}?2(yIM zg{=-#&6vJ7tRL>bG2a>%(YL^E8ESD^3}CszqJ&VcC2hEtn`KE%E(i^c4ONOmXNF!L z>J3#xLwAQh68ba-T%jXI6@-SwhA7!|eLCdzkfR~StdQc6nIVR)A-h8q!;?b$_evnd z4KO9CsM+6JR^&F7OmmA3?znQgx=O3vejD)7x_Vk=qY&3uH3}tnirY#@LRx&?mMsvI zQ{;{*uWT~x*%OmfRpeexVV6q_dnpVkQdPfr$&C7>8AwTVrHiOfTAYLl^-1!B!Np07 z>lZ`P{|z7UzukcU#Se}kk7|8AVKIJiq_t2|`*4#l)YC`f#pD?*gdcPdt+XtglteG@ z{4X_Rc&7jA&C@lo4gLa8!!glIj1&vSdRPxP!vS~}o`?6~ICw>GQ7CG}b?5a)tBa4( z2H^nSe|s+Wtn(%szxRCRneG?i^)tR}{LFBg^Nj1Dm#5zQCCa?#O;lg^O!tD{Ja3}+ zTO|{$x(0Oxgj1UFV&ghvlkqLXL{A^8!T*lm(X1J;5H5#8SnKmncne$!SHVh*uf$Y( zpSE3llXg4&8SaMbVJB>Y&B)te7hd`O+6I)k7w$(}J^&BGL+~&>2F=v^_a(~F&Sy#a5+Tkv;y2i}E$;CR0e|AY_VL-+`e!pCq7 z$Nw|<7kmN#hOgmU_zq6ODXqlYScXd+(K#q;sAj*<(RyEDyu+fOY8Lw(zC^3)CCIklqjf*hN_`@wsI0HlH=5ij_z`}B zU!WB{cn?y9DvZK}Wif_%KWMq9;b)XMgEDR4!8uHF22IjWm_&du%lghGuSM&O8!7ZXGZj&TNtM~Si82G-|gQ$q`Z-w3}z zFdKh@Kw~agj5g&p?~mdwzYieLFGB6(J=K$i0au$a)cduk9{uAjT%p2@)LIoL<0T4n z4d4%U7zWc1fZlQOPGe5jEZ9LGiLky0~l1*kPDGn=_%Iql=PIWbnJ9k=E(HElTM^r!$f9ON_u+V z$RlYh*B?40B&QCwxgyU~SL)CzhrwZRqHhZ1vO>?u38%zbQ7P88z3`xzDin{0x~?X* z?ZF;R#U7<%kAffqa=Awbv8P4|#v%N{W%lcJ+&{bbahD&}hx)T{l(ez2xdMKbg>`E~ z<*+(%JdD~ONUPY7m+7tE*+Ox>B{!^uL3;STE#!zj0y~A>?*{Yx2V8>^hH(Ztw zgP*@*3^W?emY~3Fvu4@RzJ%K5u>6Tr>C^ zoQ4F0!P(2vD!~6^U z14B*6f_jIV`oxWJB#ur=Oc^tE39Svq=p7N-3Y??QP0=jQxPnz&BLcwiiOZO+=<}WZ z{rR@%(E^Wm&eIkGTm;b7mBv=jPQ#1p6-KW>w_xX@B|IJfG>7U+jC{r+95#bP`Mc){&lMu#!*LV-Dg2DK))O9& zuo_fcl^3B$yr+i107!+Kq)+ti8|Z9S0{2=)TvD?V;ff8kdHeQ_u)Hb;K)fYhF&X0H z`wb|KIMuIYNSn*AOkb4c;u4lguAr^nX@_Vfq!#x3Ju0XN2_c5|TW`8{PwwyMKYYXp z@{2xJ=@#L*^k*1Me)fVXvqw)}T9SCd$^+M|3s0S#S^Y@a_^0Pzao57MQ`^T*9T8QM zpMF`+y8fAiB8HSLE-1XJGVOwdtih3qanHrXCrw>5wQYiUCUQ_l?AYuH`E(!NeqvaY%TU%nQQO|ZanT0#d96$5R}Om`=jYe4hYFAA6CW?9 zv_jp?Fg#PZbIS6TKaQKU`S9}D>qm^;mYsL$goHtpF3quz8<*cNAtpNK+LmjsdVcf7 zK9&=){fCq-DHy-FG%3Iu9*pBO5&if>9H($dfZ5!~{Sb~$+)LOa$ccjk2Z}!7rG3hs zt(KB#YO=4}q2CG1JDS64x2toDcJ@o`4vwGDx-ufe$~`lf{p@*@t47d}6>o02B7Rc8 zQ*D<=B(} zr3yGl?T`rbPS?PTwxK8BdR(m{$zAXuqPk8`_0l8&4k@lfnqFvVi zqU}onqo}gA@2%?UJ-zSg^pZ|@XX|u2olbXWt%PhOWFv%42uorDfe0875fFHQIO4|O zIH)*-%gm_HEg@h8{x}W@Z${CdAJJiaZsVY%7*J-$$Aom{zqhJ72?3RV-utPftGiOS z>bvKjd(L;xJ=Ofk$*f)qMNYm$`iWh&5u!X!K?Q&$W9@0ROX-?tZ}QGtxHxO|z~=Ju z4ZYpVf4wsQGIWHerq8=gM%0)aT~j(LQjC`0$utU4*_OeL8{XX2Fy;1N(}i>Lt?O6v zHSK~#Y7a;R)-fGbNfHK#1X`m+o>-|&vS}p4O08UOQX~#Qh^%akHqB5paaI;#S0m`r zAa9C~z56zre8)I%dt>czuX&}^^RzAGjE^rABxbHF$*9S*!v~mE-!W&Qx1{IE_Oi9X zE9V3U6yzsyt0fo;wiKBzg4iK+*D_&Gt1X!-X;D+T|4s z3N2<|k^B85w_cRCpv5;O)4;uwm9DzfmVcY=(<{_z7L~)Sf)c`AzNonB&iQ%X%0>CE z!f>v|Iy)!1ATP({UfR&I{U*Oe);AI?h*;7iwnVo%fe`3YTq$H?(*j#gSxUmo3B z7ZsD7FC@O)fSkhL|1P)FH8XSTSJ!)qhkw&PGq#c1&OANfE=bh}=WlI_%|x?p7s7an zdJ%cGB(NzSi%E$>Gz(2g=`=}JeUjOnq_+xec2m-DVv}+J*3t<|%1#j`lF|*+h6YXz zKEl?WQ+1Zmiju2OKvq5QhS6KtOzI%E(VnF32#MG76HD^D|7$CFWDY z4BaEYgk_Y%nV(@cXXHDb`I%;OWLP!dl$tJ+b7^!p#j;H*{HXOr*9a#sSwkyB?d^{$Q9S$}c2Fy?X z{&(00157&{gd>cA+CK6Ie31#n-bd&->2F7DxPe30PY@C`+DP(Bp+q8;2_(a!CNNAB z&GEU$8MTuL5of`r*Et2ta!hvc^thx1CkC%XkzgPwKyt}I)+imU=XKL;z~u}&r{pmF z4GQtn;4`!t$YdFr=}p2BPosK-wl$ia24jPvNjBbLZvL~Fu*T-RJH!e|cLR+$?d2(s zYr?yr5i7ERdqvD{wm@#w*$X|V1agzkLVHgi5EwFZqA4wNgeHHmAZ0}t(eH_Rv@>pw zb&tOzFF)j?(=>A$@atoHV&3?SME=tNZkjYB^@OY+hz(67%i(BMqf9Jm;3k_U%14x_ zgIKnSeLr;C)pOD~X-b|J+NaddjZB!5%z725x!Cr{+eVxGO*1h{@%;-#^$Olq87U!=2=oN$e7Weqw#Ryji62c%+L z36k&!Y5&TM;LPsQ+u z5_3siYou;%lb0mAqm8+yIURWBJmf9*nLC(2&XQOQ}lA)+jy%^MPdax3ol|u9nFEE3RZX6~Uz>xM1fP%a_4-YoK}NQk3JHj+G5x9N!)EBhN;4>vDdpbNJ*)vl^>M^^lL#lnZ8 zvmU9fy-5>VP(SIZ35-fE#Rf(MAfAZE1_5W6)zeW=nj{TZneQRaH}(C{S?tW? zz#$LriidV;xUsmTh_&C2vbvkX^=s5u_>;v-2nMTzch(n(>JqHYZ3N zWxim#q3U!=@fj#YXn%XCkEjOh|5pVB^YHo#c$vp%rle9KR4RcGZ6dWsEJ~1*hgG@#MlE>?30UOz|h%A^to}M$GK`*=@R2e&t;a@ex(m43vxIUK)Ml{o@ z)nc+~{_B@09NozL9H#x{Q>bB{gsb38<~ahu%S<1vXQVL(%fN^tQLLX9u!omHg4v?i zIkaMEG$vXN1EZhNT7yxi)gmB?BxSLf^|}U$0+*xF6c_cyj${P3J=vv9uP(~bj;r{JZ6~%*p{REV z3eB}>gMUorxs{7ksK*k}#rIe$CXl9Mf({Rleu7RB!Mp>KCZR(}uxTCvAD-ky5TA}9 z`yccNRUhWyjMotWf4l`=W(Z*tu7^(j!|+=sk@PYac;Pu>>b~VQp3SN^p*E`+h>2H`fzj^%eDR&;Y)mc?pW;N3lRrYA>6c6DZ@e!wE z=y!10EqmLuC~1P|4T(Z3H0DmvU(q=cGtSOb(-M_RktCBrNdm^KK+5$Qt%Di`n`be} z;$c^fL3Q_#<*o3;c6{h|bkkV`i)hQUGj6`ycYV7w@bC`Wj` zV9r6W*I`Rba$reFbyATSZK-Km4ArPJ##B<6lT6t&$req6DAub^pJBA~r3Fy^4v7Q_tkzf0|a*y;f zCy1cRU678TG9R09B7#dduzLi)_bE8?T5O)bG)=2i2&PU-Mquq>@guF&0)H=yo7QTD zLY@Bmu*Pv-7v;66CSyPnTZis=ft6SgG`tF=NMgX1f$yveS}eJtl$gsg>`lz@8rjT@ zlA=xaYRcQFiv_c<7T;lX(k-3d&VA_S z3>)tj^eUy;ni%DBlUa)KDc{r>55odnmIOryG>wV|IqSqG!a_k|&X^JBVpneA&PX!$ z@`}>L{6z@M@L3^3VJj`kH;_`fUMa>%*t`#dAyr7Q9#2Qs8cWOvruF-LR-GG*%PJQN zv-}B(SWH%6M`F^rMLLo+HA;uoNU?qC<2IuNkwWwavJN$ha9w4fah@kWxW+KI)5oP4 zAL~jqzDIiL3SE*=o}u0TD;LEHFD}!ob-%TqAO;Cy&MW~I=2>38r+6J8fgSZ|E42}% zgFG-5+;NcfdddZY(gFACxYjZYoTKgr_DLfS13Kd>%KtDLTEB^{I z|5}hy;LnL=G$XZ!z2@nd{KSsO1r=4DWYba{H{(_X;e^H5A~LMLcD3fJRa--lD5+Rp zxw>)b>Mqpte(vV}=&_nzrrEFd#&|K(D ztzMk}&TG#V#dgkrc5`ih*U#tIuAg5lci+=F>)y7INbK8L*^%j&CZ%bWl79aVsUn?}DRz7P;B({S}tX8i21(xt->TiK=~5!Uqr zjz2=4!o_dJOGrEEGZFNZWm~P;Wgbsyz-kSYdM14q5Y|AMJATkz7O=(|&^vSl{}~_2 zwj!_6jc{Fps}L^35kL{OK#8Q%*ek{33Gq7ZWFUkuy*gyGg{sq2D|2nO+)B909;!}D ztqR%gp{mrh>X04Fbp-a4UBrE8#;W@WK)lMDWV}dxGLVfcvPOtoVE@^(qoZJi2$Nl; ziV(q8fWOm=fO`<8caWzekp*=f0R)kO-b#o_pBC56P2I1UPGDabdmH-Mr6A}+k^5wr zdr!Gx;{A?jy4jo_b)db;lpci-lNYb-MQ@x=?f|<-`VY;>-@SqEX#rWF0!adX0>mJZ zDBbUN>%aiv*{{%v(g)x)TI!%KE5u^GV>%kVl3sl?;A%>uDDpsn-eZ@y<7`I67J70^ zj(nx(HC*tG;DWoFrKdEz0#!bJ+N`@;3o0J{{Lwn*KP;Jr$;kzo7E5M9a&lp&g*-iD z`=UbG5qP^byTp~o=`AD&+VZ~lwh3|Eg=^&mK zcAz2BzKWdgnSMab2eSZP;bO~PgKBv6Q>H@w)Dvj~+XAl$rN7{3BHDfyxV~Zrb@0 z)Ah+`%={OS&YeTop37d_`YMpaPiP6cv{E9T3A)*fAA2_^2SYMw)H*^a6nXPBpMi}t?>&b8QK#Qxyv)Tt%%yKm&E5GD-1Etw;nOdI(NTo_cjOLXln}yApabdc zIrMoh`*}0yKs^gEJ!A>h2SmWuN8)C{jRGng}koaq^)jt*Xa9pMZx zXUP(w3E|p`aiVLUC3WoQZ5SumgYI!3x&KFVQNS$0hlvSZwrN&T27U zHgJfsE=RFNI(|&`9=r*%~z{Iw%mr1f>*d z%Wr6{1d8M`F`-t8)ubG4K!S|RN$gOOSIyz_oNVX#qF3nBio9-j*hmJ+u#uufUS##G zS8j%8=8K!JSWhL5v@?H&ruGrS4MY7eZrSo;KMXN%a_%_kM+|+ z=pY8*URqA$9HmGM0U;+I z#kSya^TSd>n3SUkqVNb%!Uqwxu)_!G1hiKHhtsaKEA`eRFb7a*uI8Yy25I&XOCQFl zxoSKp6yP+Di?f*myuu*}JF3^rh}=|Lk{_{WW}lq!$>%5DsK0Mnu{&Iv=dNE|e_t1R zONZSJi!rXXqiF&=>s=f)VwCuC3K9w^vKh_+Tv;ujRz~-RE(qI+!xv(e)b@WqEu64HxHiVa!f-X@t0tKfB%LP z3gNnA=oMd#QQ)t9B*ENKu|Q0nL(VJ!B47_~_vS>BtOk9mK0ScUqgI!u)9F*w^l4dH zX?ikQahRacfP@I*fH^suR_{9mRUm8hx3od2tIi4pa{e5ONRq^qP@C>8)lV}GK>L2P z)e7*_#s=*a1P;YhC~zpj*<}1SW2gkG-eZe*+3F1}SQUOA0RL0*dlXR=lgO@k!(0&$ za+_gxDa)wk7O}$NAhIo(8+zG#AA^yIqM2%#+T@+Gqr1FzTYs1P<&=B3RXtuCD&5px zoVV(wb@rF-p-z9^x`LM%)RmW>7FyGClA@Is#uS~gwq?<-wmjkRh$Q7JqgM>O4AJJb zGb-)fOQkof`2Ko3RXAM}-0v^*%@gIUoui#@c)lg;aU)YdSG8B|j|~ zwcCsa#2zKmoOGQY>H*XnAaw|)0wEHeRH?5PN+iMaX_f)#qpd3vVWQ2JKBP1Ha_rJ* z9wA19kfwu%X|RqB;qipWaRw6K{2>)5Mdu)f z1=I6cNrq6A5ugHyZQ4Nw_zAz(W&-#tN*pOVypv|S4#Ny!0!EldYf^NM6cgI0#PiBF z?|GWKIbL_5;q4-}61eP)uMjx`T>r1FuT#N5>2yXu9YE!JLXcnc4^-~>-Nxkyx7AeL zb!@}(gIjAxh6(tUtEWD)uqusE5scZfx-PP)DwWv%1oP6oX-_e|?EXpSpw6X%i`y1N zZ|l2L?NTxO7PaQD*?*Npyr2LS9#B$BDmCXYJP&L@2hZfwS_O5 zZys~asNZ-^oL&F9mZ?*dJ2e)7axoYYv_~*Ij@TVxOoqdRt#ZfF4P5_@Zp8f?1<7Tt zg^~H?Zg$@~#Z9J}GotYZ4tq4i{P3i%9$LS@ClX%WyKa5o>PV!g7e|*9dzp;A`gx!O zHh7+vNX-|OZLW*d&2e^fy+q(-nxbgb!@7Ar5|cTopUXu2|o28+IMXsiwSc{LIK6SjT+S z2!DAJ+Ru{J`%Zjt<=|O1zl-o)Msjn+D<@tBB6xAX$z-<;5dMA@a6;Pv%%mkrgoxBS z+Ldzlj@q{P3K(mJSp$5M^*D*WH`0JxWWpwyOW%3Nr8lmr9VEPnV$8a;ZHCW1C6F{F^6Q2LfrSsY*6-N7x$3~DxKe8LYxen& z2sjk)kpq57Cj@dW0gojRuy{xrG6!GNR;?`Rz36dW%*?f%(*m=@oNXpeq}fbmwxJo} zrDZZ&>#mk5F2<)dUlW0=MWaJ(av9?CQ{L}rqAIpT#{A#C=mc}YRa;C5@Azs`p(w=H zXX%6@8%d5-|Lo6?B-hqdT1y^pX}-PQ?WteaylZ=XXOU$|cm19w-}LD-e9NC&5$RrB z(lONwA6WEQcRoppsCVT`Yp6buQ5i@)WY4YgHa6SLCzU#dFstQ`+O@mA(m-81o?l)7 z1OmCVr1jYY`=o>`tn{=#sD1Z7pqia)U&VlJH)x^1JF&Y zP3|FsDlw5#f2&&Tzj&2j!WQB~&a$IuCSLHAenYLsI)|d?VgVoZi1?>IM7$9d#f_-B zdeh5G&t0+bEX0_C4Pi$~K}c_nwnVb)!#4PfwkLXu zp#!EaKl-b?<_F)&oPJx4$?OO(d1z@?UDz(u800taob7cKG-1otg)G;1Y-~}=$>J3P z0wD?TqPD-E7A$71B5M{AwK<7okB!rH{>qo0;05n5nXd_P>^|~df&c3>$ak_&Eg)Os z1z|81Y>rnM``~`y0zr6ymYRXi>_VH8It)Jt6`%-yPHUAl6-7mnRG&ba{kg4;{(_c{ z3li6uPbgv^V+e_~`iC1~(NC&CkJn#uhSSS3T)qrTq#*U<#pt9wFlUOldegtR-STWt zfwz24j<2E674H7kijIBV(d7Kbp_Gy=YsbRYj^x57%3Be%-L@#Rz989CHk;^AENxj< zKIiUvS^k-K)J|DCvpmUCT)(iWYGZ3&M&pVp#qCX{WjbFT?JX^@tn?HHGykT~&acRr z)~N9mq@_=ZR1Inh^=lT3cG7<03sK9%2#^p{qca?`(7DYRN~^2MY! z3I0YTTHHQC3!^8wM*woL?yC91fBBEL;q+F(o6vB0;&VJU!aZI~8phIMau;lS;+4fe z?_2ZuBEOgj-CeO@@2cXeO)o9!cwti&lZdr-Wm{rVQCM#b%_Kf}^xTt63Js^y8g8pI zn;n@8A6dTmiCatZcYgknBflx9t5d2mCd|2ees-LV33l=PwCbSClwuU<09GCcqGO-a zCPYs!nv6r^%yDY!^<6q1yf8gW2cYpMYnN!f-kgkC<%1g5s?Ov&Ns+boG3((uE`h(f!be3J_ z=rFrX1v&71pREvJa_RV3dQmzmC6&?z(iKwDF7-<{NlB^H?7QIV3||;$oby-R8t>Tu z80Vba>+4XtPtECwb4GB32;!XS=Eo-q;MpY)FV4zZu&Zs!!);kv3wMoiPQLFc6FH|; znrgnF>~vj-lTLQN-w5lx0jQIQ=2~Pds}l$XTamYp_DYbK{sQw&$nniDIqulOa>rg| z21~BPHIE0fCy>W|u#x$o5$<~nkw@aWeQ$g)GV?6D%NEQXQUC3rh-HljO)9k6k_L#% z{fKiVAsYCKg+wgS-i}FNueK9#>wPR?96HIRVDO#X*I|M}C4q@xuxeqx+2E_lnzlM` zQ`3nxt&goJ-u3V%8zJofE;6sVDKoj)pO}~ZXk&~2rX91^ezmgIj9K8lqePnZIhB>cvwG7_|GE zM|~#~vz1lU0U zepOUN1h-9X-?Jq2ZBk@9*Xwefr^t6ZmbpO(~UfH{fe!HgU{tAsf zt?brE7WGenAJoqs-Bt94_BT@kHk{ z<`PGA-WPOy{mvNE+0I(=2GDf`gy8{TMog!lmb!ERQyiCa8g^3@pplzOQZ`PbBbfr5 z6o6EFp-iW<%j`Ly+m^{HzmQ_71yage3Bvr_3)&uEo?o@`nMI3U*j&pvJGz%IV#$g=ID;rS#{9nXrk2y$Eo&;A zc3ZUTkp(MWT1S`M|Ce)HR&?JXQ5zLqkG5yyRfYkglYEidLx_lbk)^LkN4_A|l545^ z(UJT3Bj?y7_?-LsBVVIdJ=CM<$OHV56X=|M)DCpyLGB1N5=+T7?2%pkIiI3d+u0*O z<&RuOt(LGyc8}e?libAK>!Gn$Yf-EJ+uoNzwRK&K-cj>Bqwxv>k`PEj0?A+o0XATa zalpny;@V~qLyQ44ggAC>$FcL8*0^?4lfjAO48KX-B&`A4cu3L=tI6w&^WJ)WuS1%o z)9PybSJS5H@9_J|?ce9zD}jgK-~ZlUziV}wy^xNM?m2s(z4zIBpL5UMLNzDEnlDiw z)3335sCih#6(PIn-4aC4BjPo`gqkF-c@))v?B>COy+k!H?jBvip8N{hJvy)L^qpt7 zE!{oJzs`?Kuk+*dOBH>`HmttCr=p_o^Bpbsrz?JH@h80QM9^yXC%vvj&^F(HvcD8X ziv5qld*>sgtGC1rmc*8Z<~vFamL&SgE8?f1QIk{(Y7+ zE)p*Cy%d~&zoE8+vcNu++484diqt%KU*k^g${PgF36R0 z^tMIEjotRHkI)_SYj$YzO%ic&{(szAfsV&M{J@e(mYO^6kFW6SbVX~cH?2rE_%)I0 z>h!Yw%H`Ds%@b{*`nvQ6`=<|43`@UgP?GW-t3wfo*B~LkF+Zs*xgGiDAA;-; z18oi>2=R+0f1Z4KPNvnOU~N8Sa?0}j7SATNagSRDG3E@d;Xo^I?#7KXQ-A+B|eSHQ(RRTU0cB+R~f8{7HGA3v08Gg)mdfc za!hi$*^$S8&Y7R1mq`o`zr&X=GdKl}2!cj5<7lmFR@f)FGNX&;BaJZHBD&lem#**` zSocd^or+oM*6cBvtLTOWPTi7K+U&}|>?V#>%-(@K>ES@b)}41%9DbsqBWhxu&91s# z>!M|!+Sk;$dvlzBu6&u<8?q?fZiif+Q}pA$<6RYQ*>i^6IB15-RYMm>yLgVCe(g^` zp&jU-lwDr<{w0{Z5#CJirdv=Bml>l{H&0$p$y2OSF=XV3VWP&KbJbRpbCtA3>_T zpMME>k|G|ZaS_ij;2F9J9`R0~u=nIATgL!TIl^-X!t)Z{0(jm6JZAwsR%5hJh%Ioc=$?X0XFG`!TLBW?I2Dhj9)1Eu4J=kmSv>@0dH* zj&0lOv2E`dJGPA-?bzt?%#LmD*tTt3Z@+u)ch5QZ-5c@ZMMrmMW@Tn&R@FbEqq-`8 z^L9aBkkI_{{3JO|O{jn{;v;qF3Uo=tJf7mcpGKaeIoZG_#mBA>9>2xYD$)%=l$>Cn z4H&#pnvtbhc3BQNbzWA9hYFulUtlzaly+lvlIIDtzJ#w$VY1S&${WaL&`MW;Q_0o? z7HnwQpUO9M!m#I?Zy#zN%~uc9H%2#PR-d@x$pcGYjmS~4 zU?Dv$#0yy_6T+1LKpG?)2OzaF%1~2FNw>jf(ji4z(xGC>gZ=(F%Q?WR(D-_nM?L@Q z>m+|dt&r@*8@5UFU}&7yMc3Dt(Ol}E;C)OlgTrUj+~qBlsBzzAo8SJiD><37f@i?huh>1ANZ!g|M=dK9_WhZ3HI(f!Evw65fUl4@4=BKLSsYGVZQpxunhskTf zLt)qJ(C@~Es>}&a2nR4Ez(rzdBak?iC*R+IVZH+6sZyMuq$gyb(8`TJq44DV`{=h) z6C^FBw2pL&6GveI!z0B)HcT*Dp&zM;Exe-q=)uh0q9Axr^}NfXzNZnCPra064SjuY zKjKheYNcx7nSw#UW!%k(*3u|BGbxX$p8~r(26AcXU_}{Mevq^g5a&l>2fwU4c*CKu zY{H%+h11y$i$BHRgDDswj(8e1D$1=Sxci891dSavfa)IPFV5hW?7^m+$^K`D}SuCr~hrVI|J5OHwzY7igK=M1_Wz=t*Sbuq!PYph{*FUBHs%p#!n${ zDU6F1NR~GUPn@u96VWF;&zaQtJi^qUz%}M$CqqB# z?=3mI51vx-S|Mw6=)_Ph^zye_Dh8(3bBTTeccds(v(<^>GO$E7+l?ppXwqdz9}TF2#I zMLfeg>gjvjey+p=yecz9NLiHZW;@~`exKvTiDH(!Ef`i)BNy*++}2BbMXZPnl@7Py zQL2}MYZfxy2Uyh)FrD9Bz1~-zck2 zU~afrR;N#J_S1_C$Yhu zd2U0>_keplftnpUa3j;ra;-vubjpkepV@(?uXLs5$YjcaZ%@pfFgZVP!Ib+)IXe$q zh&iPN#J69fCm5QkYzDYy(rBqY&WvK$)+Xn_@Jm-{be4f-2(Jr}kt-DGIO5ttj7l!F zqo;ea-KY(!3qdvU1hXfR3+wC3oGqnn&7S7A>o+fO;rs%`U1FEU8ht##Y)u)wOnQ3q zZkr-=et#ph1<#UbgwY`}AruV6Iuj!j5Gu|}p%|&v ztVkQvau?r%W<-@C=n7{v8}2nfw~sFmu6`&AGW#5lCUS8}$ldkE{VF~lrA5t-; zPTCY)pDW)OqNp)gq;OdEU`&}9)hdmwq0*#22o_LgalT&o>X6)1`5+@Gn{ zbyNasA%V@-IiIzf{l!NX+QrP6ZP|B2;~Xj5rMm?k6;xUkH4PFAoZ$PL)v0K-&#%LN zG^(w6&u``>5z#;sOQ2wj3$H=+!NO;7O9R?KUdS>=x4M;hab2ML#K@Ofs{+dh%N{P+ z46AF&hYL+{g@QsEmbWQh_*Ke9*&G_13>*II-;omFuvJBtrbNf{Oz8*fxcQTOopAa& zzE&IfRgUMZc5N(l#QG2RZB4i|Fty(o9uT4|-#1)@ ze>?|Y{Gz=HU7^tKv8Q;3OhdiAdyjhI>5w8JrV~9cSBF=JS|%#(&-*!Cz;j#=s8B2= zvRLLdXO>5EP8MgDq$Fi7aOBd32J}lDPURm-p^K!;JyEyF#YKlecV5lNm`mb**0t$1 z^_M=X{QxQ{`|Z_VMw51}mWp~v58?e>!)YXNhf*qp9fLDvE7OPSq!#XbAkiPkuAfDZ zd?QJ_k7PF?LsNTUnLu`){BxNKxX3qvSstD8y?lhfZrWfl3%F6iFyx|Uqb;Ak^|^h~ z%k?a_)#H`>DFIR!9G;y}jZRlNRi!h{7U63dnM;;3U8S^>6IUJ(2QQtEwF46V7VR+e zxRIoypudHsZ*uyK{@f5U3A{cEs57d*VaP+?PdVuznnu;`s~3;>v&rd;z$xZouI0Xa z`z<_n`$_U_E^TO_!&>?>io!Fn^CaBqv{lG0^W{?fJt3BTvyM1#QSi80=Lar{p0(3i z@5rfMxP6)7e9cLwEJcs7rJZ!?Oxc_jhOmOp`0bpYljD57Xr!;Zc%>DXU?);@^jAD> z0A8}2=ucJ>+Jt@P47YCSSjxqfOA<@p?GOzEgi7r4U&{jmWy%C4T)l1yv4oc8%h@|$ z!ddaA=0M&;U8JPtWC|UzLmqIk*_awlCeSs=N0q@5u<_w{<2kbfc; zZgmL<>ZKA5#-42x24CMyx#d8phDv6t4KpTm?9`J=g+yg79O(lE5Q5rpi`Mh*by?Pb zuIsWXQ%MV}M_hOTn|qtM-+JL~9y{^f0`SXpYv}5uK3VkcuUjuV^+-0@a7i4)XX+9W zw#SK8O(x5=vy6kLGwaF_W)tsF4JrLQ{GGN>uhYfK2W%Et4GyRhHUyNK5WzTK zop>Yy`n6FJE%XiX^|DFAM)X#jMl@_REY^uB1%pi9A#=AgxFp^d>gvJhQ}?QwRH&yn zh*GcNX_7<-s>{*o-N})^oM!}?GQdZ6`_xINeW6(J4Me5DT(Yef=t+=?)u0$E%5SrZ zi=E0#d6;JV;UD)hyV-ueKDqX&zFN2K6-$Uv6BDB4i2cdY5NZI8`hpWNnb#@aDIo}! zm=>ON=v**eLOJz?HLZL3Zu}Snt^*duXzuv#N}%`;{1G`9j?fD2Htv^K|f$aq{myLvm9ns*l?NPGig=X0+JHs7*n=ep)SJ36?<8v-T zA;LSvXTjF;`8mo#l+8#e;0^{*UH%_Xzmm2gzFVw2UiEC9@LG?iuR~UBydN~IVbKfw zB|5~hvw~UoiD1+gpd1d)FEEA5`&&F9afG=S&#+rvs)ZVUCm6s}IA_8g(CF=^r-vF% z8En|TwUXh29Q7(0>KAWQ#n7BzZXsdwd3bZusFrM3xfAm>6LO%JhFYiaLB~oNXB@HN zVd^7&LoHx!o*7T{_oO=90gc5)953lv|9KTMB(xUq<=|H24G=C=S0so;PfQ z)j}-yrDK|4a*}!{YV&#GT7pna_G)k~UQan>5!=Jf%DbG2ickg9pDm3%OOfBZO=2~{l>?+2M)PkyVKqzhdaXz(8g%NT zeKSJ!j zm%}E$I`4PRvvNkXW|Dr!e{(_br! z*ng5f4;3Y2_W9TonT2RTU0EvEkJ0xHGF%yG0Aluj&3ja(yr(>SMZMVqt z!9CTle8<_n+fS#)$>u`FZP{p_QT$}KBoFcxKTVRC!7Zd z7E=|%e#g7BCRfA)H{~V+SSmXFP#vpf5!aXZw?6Oh!CdJK)q-%nKJp}C`we%dLcKfN ziaYjS)X(6rSV*x`a7r29cWaCpC*P0Um_Wk8N}8T4FW&<{sT)bnQY8re2b(5XZb+4L zy+gUc#P@Q4YQX#{2+cDrC?JU`dpm7T?siX~iCI!3^7aSY%mJ#0~QmN{zGq-p5j zy&1*aRvMZ=%8I*G}D{%woEb#*TAqip&cdigPO&mBGZX)m1rZ zYo?N8I;S2qk-f`8usZtH5uRiNlk9_|p|4yil^fnKE>BxFQ!OM3q&^pnZic2iVuAAj zyDt%^rdxpg3Ea@swZ(q6f2^!MX}#2oV94&M^7(F?F~;+U?nyetZzQV){WM!RJOSM# z+bhD(PZVd&nudN%Ith+6eT3|A(l3Syk;;x?rqaa=rK&ta-k`E9v?=Bs$k(o`@x%sQ zw!<5c?C___in*nAYuYwko=6FctzR;gedBtL?E@HAG#IenP)EIkUJjEDkv+)Mrj0?n z>QOAT5!oPx2^BZy-ke&VRm>6yrgvTBdsaf?^+jTtPDkPwi!vt*pvG;^BufUrHt<$0 zVS{UITQ}DY>sg-epY*y>0YVAJY%R229UrQGR=&@DCcJ@pv1Y%~HsY!;i|0(!aIO|{O{-(rcKof8Q7$$dy zCL>QdiJ?lQ!GNPi9M_UlvEWVwauJ}gOWNm{_6DNjX;PF>-Blcfdd3j=T%F7Zl4w*! zIt=Y+G?t8I{xbgjQ3^V{vXJ2uobc}Z>HHX^Z1D)vU^6t6xj|4*G z{id6u_QW%MxI{|zJdi0*;vf6apP^qT@sT=!TB7D9olH>}=_)PJ7^99JF;^?MN;u(m z33%a-)?j@y@UOC__d_}PD7q?%nRD3yi9MO;>=s9fVF3{8uc5mPh z(JeeH17erD!7RgchQ58=1(RaypCf#AUh_o%i;kDhIGI1$PCK%^a6$SsK1){~N$8@_=iX}*pMt*ihRAc-+rS&z za~A^>R_0CJ*PHi@ca;yzPqdH2H}m(DH@m0J>(91NkxvCXL{Mrdzuap~x`4^I_3l-e zKy;z6hF(`(p>;iijDT&$?H~|?Hpr$e%(XzBt4A_IuuABz?aZs6Gaf z4V)dKg0cI#9RiAY@WLnh*?2Ess308j_b7tDP3_@rd+47&Aq1(elh%}OoQD~WdzSi_ zCU^0KjN2In&_2a1eUI&rX$!mPtQp#xY>Lj-A(Fz$TG2j#g0_F^N!}c}f!nu0cjd82 z9bHkX7QtW-hnuE{qh3auhE}8HgqtQ;$I;Z#5ucf4YbK+*nZL%NgdAynx7U)lH)+sR zN5!?rv{6+D<#+m~CFd9VrbuTUF5IZ`q~`V%S@TJgECrx2`pqK76o3CMjlw2AQPT{j z;g6jeY?4WWX0nh^>>sB|%f2STY44Sq$!I*o3^nCsfOxO|+8<0Z`pBbJH*&+)-?T)? zOsEp5hlj*`;Y?9f<3OJmi8BdI+$QR`G-&D-9Q9XW)%!n@c zia6xrSJsqsUxX~fVfr{5eiMN}--C-1P9tqbZ!1{3EA+kUU9Es?EbL;I3<@8C>aqG8 z@NWCzJ^qnh1V1W$gC2Q?PMzj_SM+j9f4Mq@&82a2;cUMQp3L&M1W`fmlp2L|;9FEy z2^;1A4JCV@$_0p#1Q06upX#8S>(qlFtc!vyLIK5)ikeY&@dil0LVpEqNAZ{XVw8L9 z+1g9;jzV=g(0qfLuXEew=xTb(^ zBQ)9Ucxn}1@_?;-PIoPO0kI^#6MGr%K&Aw$M0y;>4<@837t&OXHV1@w*k0DaQnVG< z$c}>k$v`skY6xV%for!CHLN1ie1sr(QA;|c00v5Y}OEd7d?*n zgWp+N>9*)cc3ONPz%|V&m~&dU?+^Gg#wPrUo2`(q?!xQ##5eLef|}9&Pvd-nS~KJ~ z`(_>RY;WcsZA)HA7p0868G|)@t#x&uW~4)gU2&EBGR`2;qF+b322yHhOZ<$=`4w3s zBm|=9`wV6}CamjeqW4+oI^ptBN6_3mWw(;USEATh-*ai9YuKMRPfHB1Lq&(_9buSf}`T_&C;hC+3l2 zW4^??#vpw`3TavC5w>;Ct@87Z!j+3Hl(>|B3_cAD8wiEw9c&I@u1vrsJU^4VJi}Y> zL3niBeLp7WzmG;#h{`>=*v=x&5&=K1v40r-1IQ+~eq}frdUy1dXfJ--g6&p}`6bUZ zk9Ue2bI)@|clYS(%Kh`{Nm7iAg_|3DBkh>>%e%yYTkMy|XxEs#%NXa_M?|FP*oc%o zM?3Syqi#g!brKh2G*Z$a@M8i*J>&xpBn5k3UNdcm;66Xw|*Hx2sKh|3_=$NS|Q8Goyjp*LRZ=}Po66W3H00l=pR%Xsa|qG&3T z34ML2JZ^eBJL2M+1knL|1rx}D4FN8)X7VZI_K7+|5eE|53Qg%LM5c8T76#?!6c9j^ zvat+VPp6N_|3+?gz0VIW;v+e>n-9J&`4$PIcxz)Y$_xz#?r+N1fp$D{1Y&rOosEZs zcX8H|?3D%fYbM`@{~0$c zGuz(*v$FkH9IR~r$mee*_J1bL{*TcA%Aft8`t1LeHY?{p9diBK5V3OoyHoCeCh(Ux z?*H<}{_l$H-2dqPUo-h*z`^tn?|+fZ|3Y&7*UsyzsoaR;0x4tw3^Gx!Bu*^|2{O!dFuM_&y@;^bvuY-`7&r_=GYQKH)y@J$ z3K7i(Z{AKbt?f}W(FKX*<|V&N8Lu3Af7++l$LA+c`(s6!&M9}XJpDRSK{yo#QU+K) z%^*Vl7l6t7qNE&-iprY#-SRI!Q7hsdgjofvOwRYD7EGmS4ADyHqMVv;jp?pMY*A7y zv=)VI_RFUPZ%!;ez+788)rQksg!y=DtM4H;Rb z3=;68Gu$oBXC^S@)Xmn9;F>cWj*#m*@i;o@f^a$KsP{2~g`bk0A#-tio2^~NW$2RE z@*r>^+wCDM_`}g>SHYIg@<3ygenrfge?`XKZ-e?4i;Me?YoqJ;^I;Z;lm5bo(<3VW zv`}JlA8Dax-g-tdhJqaa9eDpBiXVsH9=)OO_x*!WzvZ@1+(%=F$YC)DlQ?h1iTCjj zyE$~P1x7Cs+ZF zE7zarvdKG)#Jv%g;AMrLtgpC&lz%@4Mh4glNf^@1WJ3Wc=ogt%n*1pPguAu7#XE6( zPWSPfoFr4}r|tA+c7d~T3WC`eyLhOq{e%=paH?eSlnYK{qCq&Z1_D_{kKuLOHS z#FOnZSn?4tr98$vu0KExTWWGd&LZnXa$N`!E)2k!rs|(1T&}qq;Ske{JA|FNqB0O} zOy5wj?^qhA6!v;{3ldi5be2N>kmw}xb?8^^UyeAQqoP~{rv%MI<8FZ`Ef@GW8YJlGrrXUt7>1Rj<$Uqk~G42h-Z zY1=H3&mcwSZble8LB%T70O#rxGv`*%eaM#Im2JB|;dm1A#n zshdP*GGE7DC8D!_?oEFjYm#bQaK4718irb0w^uB}^M^5;XYqmz1z>_8h*!p@e>^ff z@*-@xgzdu^?0x|^|M21f4)!%(L#f|7lite4DTw-h(N|DlH8!wah~o#N@ALbLeZhXS zYQD$WGgMXGz<;O{tmQfhza)P5)Qr#ByIR1oE^!odkm{LtD7mEJ#_F@xs*(C~koqKm zX4cidfa%94b_e`!l)HqPvg=CP8Fmli!~4tBW~CEuoX4@~Ncj~Ap~Y(ve$@XgE!6k@ z9Lpk}`mqR`5W9o5ZvyEmi!(#O!1@FxY_pzJfT_Ln^yA9=$Z}_SKp|xDvnubz)a#hW zNnY3t@x@Gd0?zfY-S#fYy4&Sy&D!n&`K&x&y1U06Tl-Jd4Tfx7FrWi04k zg}Yz3Z^+>YwfLip_#+mGOaQrfGF}yjq0W2vf>Qtkr(=tzN$x!++pZmyZCrER$>NkF zJcv06w@_hJ*Z@v-xjbsd%%xL-pC=w&5LaB1zT02~f@RljZB56boF7t{Jh8#4RU$uj zGpgkwXok)%N@(XCf9NJC6UY4*docf+64BSG$1bQiA1<+w{MjGp3mwmP0ePt_1qdvL zI<5NNL5Nkzh_CzLXN)*q_YCcmCDv#$^6;0xq>;cT`h4v)thsu(J*CI+K2l%>zqz;E z@zxd_WgPl=<`}I0lxA`rwPs#WKh(4L6Iy2Z?2^D)-Dd8NXw1Ym`>M0Uz1~0MkVXI6 z-!ss8nCg=Hnn^(ZdZkZ{@YdcV5F`L~xQuZ+SlY0MPfX*{s1;l_Vh z$PMIsm_F7NKN!64YVTLI#u}G1Pz(4grYpRwh#K0n8dY8$@A1+SaIXsF7wnyT6hOWM)Z5WW-aU8f@LAYDVERtPye28-6NC#lzv=<=TNtZ)z zNMscQPQw(`a^cd;l0WWtG9Kc`wyk$21&+exh_rX0^0DF?j$upg(CNQIVx z#xGTY?ZLk5dXe#~PQVAoOOZUgFTR%$yTL(4@7n=?8xK~uw}wsCB-pFzy2d@of*$+Y zUDBekKC62ySeKPBqIyM*n|_-IeW3guLByiNYJo`S3f8p`_yR}RJHqRLdm3)sJNpYR z%PXLxDD0GP;K;HdO+$0AzKZjasWppc_|n_O2u`(jh$siwOfp6w*ZDd^}n46kEK!Dyq_>C;=up)lx<0MkQe zyPbnRyN@pEr3-elLfI2p)<<$ZuIJ($tQ4Rozg0UHEs{+l$TiXnP$gW?{q)0{#Eg0P@xlva@kX`W4e>{#2_^OQ&9 zc6kgjV(;Q$pL7*6#~H78?;7h4{odHyW!W(3^hdf~vB05srTC@d4ZCjyS1(@OKCt-- zaR{-c=NtJWF=r}w1bnNBmUa>k_!G3Qr|Y8a9+U4Ub|ap0diV%W_^*#w?-K4vvwxz? zNi`qskmH(gL+39buw|@`#lbP9ba@DP1uBz<*~#(JWype_isiWdFr|06Xh3Q5FO>$F*1S!If5zy=gFR3tpak?S?+JomLA)t&*E75N9Wl0%y?F5Wu2xs$mpoY7x#gs9;E@>wy(>E_)brm-U^B3%gN1p z8!0!Q=sz7Sa|ZWiqI{oy+_PEC(p%zfZ<)Dzl86F5e;JsvyIyq$>iM?*H%wiF^NlYi`+hxe&g3G4eQO)5Pj>BEIIE40u zkOW=5ITAFHy6Lw@vqs}pPZn4a;uX_-#J28$c;j!v&m=F z_PugnsqNmE?o20}{o&1jTI24S*PFxaT_dfL2ixG=#HM!kHPo(Ha&QNfkF5!uU)hHl z(*Vf3Mn-g}Ji5oJ)9G@NY-5}omNB?it-`#UG*?@RRGvzO!h#)VS`HkG;%wA~yRQ#v zt8!3O%o2w?{`ULJx7q9#LXE2)b)gA7Po&Am`E6dp#@Ew*T`ca^ZVC~*VH`{zz?pas z^>)y`2dXt|juwflI{6_5zi;L|hK;b1FLh_+VO_u4KqF81_fsEVwh=$*erVwjyWCk} zraK~w)!3kY+!x&QPXai^z|VR@=wFL|$av*`q<=63&Ifz;*7k|GGzG;8A-=!X$RO|V zN_2kiU3$8|FWEL_r+MT|SdZyCZFJ+5PN2W;@`hFm&c7053(^lHKkI{9r|i^eDb4YO zb!2rqWzm)YIm^F`00no4jT1hW6}h^_6BN19U2u9CnU{M2OY*4h5jeyTf)9ZKc22px z>HV$ylsPA5UCA5wbEh}cC&7LEu#R{krux%f zfaO(d!|RIAL@+8SkVAuFGrWN-;tHtRe`NR( zws$?LwLa@B#NIz%tjg?v$(tmu`{cXRoAkmrun5Hg3zgXm0-g)F|m+@BI zdwnwDb&TzE^LsgS3Z&nJ-n4!QEBD$std0GWvu$NkRobO<2G==MKTQj#CeF})aaP|i zk#T9;a9RGdqqMV%cb%wbuFqUbps)M!)Jarx;7mqi75Ytnh}mtll|SCmbEEScp#|=C zTSn74xZh;2`MrbqhTuwEUBd?D?aIhwU6Z6>y~6H&#Nrm}^lVOz+y{BqE#gl?cC&{7 zACsIiKfju#p|DGT0?yh@BO6$I+lZS;Z}q3%rG3D3!w0^oId)ZJN{(y`ZWR9ez+(Zl z|9+1rhZo03NBBpK|0Uv6`QYQFccZ4^=sL+H_D0Gh%PY!RVR?49!Cv+{K){Dm-|R$| znWIeh$!As}`7`NT&I_Vz=CK5@W}j@BFfPdW?VaHI`3=9BVenDUso%AJf;WT390yO7 zbD_kb_}Y$zq-%Nm57>C@A{M^i%wC(2$)FcBL6?Bv6?aaTo51|uYvfyu^1Vvfv`$&q z31oS{K|`xhh05q$K1_v*LK*hS<%uP`rknJsL#7yAp8-YITro}-$8qROo6m?UX3%I90{kIV(2ZN=?uu; zl9We%y1FvMhD-j%Tn6S0+n%1hgtEk;4^Iq*q5wh>SXn;bA@-h>Dq#s1yZU<7#sF)a z?oVf^hT{BEMi<8ohl>KXL-zC|UCBX3gb~CZixbOQC;DiSPQcy|azb~OXd+zGI!lx% zM4Cc?c@>QsLpyB&(3yZ9YNBt|)){^INQ;A95iLuOlgEn+vNS8v(O29DJgY-h1{7y| z?eea`_Go`Iu2%Aspo#TG@dt{WB38WENXw$u<=a{2<=TO+Fn(!@Q-xF8ZNzP49a4p# z3@nfkK*(K}6+K}&VLBn0d0)oqQ(>i6!M=o(=bKkrmYA)!siSZH%ZcTKz@DQ7damMx zl&`4=^#gI^m%I{gIhjk|N&$h!A@RPi<|zEsM@Ds?%Jliqfhi# zzyjsb=dbX`z{OzVBQdq1-%};nB*Iah>(e-XgltlO@}altUE&E&(Re!b>CxlilMXuv!(9xRS|Hf{cS%{Q$COD~suN*cR8$mMQ7ZRlzYQptC&NPUh3zB>n`$bC zvdMS2@k7>y-9Cz*omRgZvMeOJl<5!aw znW|XM?0Jd_ziLY@rI;$1mc|WG&Rib`wZ7hr86x?KnO0B5j)=!ZvgHO|c*kxo-hl&e zKlBU;QfDlbzuoW)B13zGqcCs#aLFFwdWK)`A2=E$H$qt8ko5E$pORhVTW*-ap<^vT zS3vpRNm|(v?JiR5H_}1*lGzmwb(RnR)OJE-4|9O< zFFYtswIhU6)xof1cQT&#@l%xT#m*wG&!KdR%4ItmGQuJxaXb~KJcncdyUx@UHT&-$ zW#V0vyT7cgY#cBZtXGg>*v5a^r20TCZWA)+4;!d(4*hE5sJy_tTwYVo3h5LLroBE^-L} zef+i|2b$OaQc;2C?Clvzl7)#Sufo>RAJ3-1H4aFpw*<`^Ahe*e=GcQb&^gZNX??DG zS(Zb%=c}Uv_xvr0q59E-UjG)vNkvORow}97&6avZIC5;7sZVjtoJj!xOA#Bc*C=dm zZYwMykbOQ*BLcs*S=u0SH>zrM{bXG&2c9N5 zcZG)mH$(87C%+AcE^%!X%s_sqy9&6Y>M@H=Bo!NeYXF$bbALDl)*|#Ne9Lqz_x(yT zzc9{)AedDO&pPCqqJxi0YgcS`YAQ7BvrOvFG-p|90y<-!MY(=3dPZ=z!sG7z9TZ`~ z_t`s~r53Ikh^m6IV8MELGb=LpahV;XmK(GLw7_eACE^bUt{Tco2{&IGpAKjJ!Q|90 z7OQBoqOov!qLy$%Jnrm)uYrCtQ)&}0R2r;}Ap_4?y%qMwE^;;Q__7m((=d~a`)F}H zgvbndgNP*4bJxKI12;nnL~a@%Pnd`5pIR+SIe7%W`q)cM!)>ska`$rI(E`!nBRzjN z$jL1X%gH$mcj*c3VWi+0ku_@KX7C3&YL_1z%#~#l*Ohyf4*mI0v<`p4ClxaG=n&!M zxX+2%FzxA2Wk&*ODw>004AJMd(uG!dZ%TKB@#;zmpWx!!O0hE>{5B}^or+(`T{zdO zd`Z4Bpz0v2AQ}3#m7*^m?v)_T`)c8wSF^2Pri_QIWZAbMnL<}*kZd3fr!xoGB33%( zqv>H>+#=HDe}0~#E=OKx!r7bBPLD}Dg5VuosV+F~wL-KEY_%X%6NQ^vIM(>e&nxT{ z&j)A9+$$!>Lsw75C1mWX{Nm7})TU&69EPpe#Ih58iyL3?PK47ewms9+4w+K{hvz+1 z&ngPt%iN)v;9kawXDzmHJ--&KvogmVFUitpt7xoI5cw`I;t<5%Fqyc5fu?#7(cl$$ zBYVUSGzvUj%3E2Hu$tdlNwKQ97GrN%wNkAXuP9N&s!hf&)mU1h~%PxW@RH#4l+l?re^VD$^<9{KntN@Gj{`@ho6VVr*tk?teA2K z;s6~0#vx@P12EB;IONVzQT5am>&!U zi%;zwJS7AK1r!3_LtbF-F=$XuNau7)QWR?xC(ZtBWydKZ0Rg}ZzPNW1st{LrAM$#cBN-rLumIMJ?2!Q=2hNA2UggLb7#l2r{vveb1H=bh!gkQ!#Z0XN z>|i^n??R_e0adW?Do1L-b3heL2i;x#6bBG1q!F%z=&pUr2`~xAPkL8Br2ZYl|_@5lw_1 zb-nZv9B?#*0j*WlqHyZRK{?zMIbbv-8I~Ty3bj@9hy+Luz=G2wy5j_PahMZ;zeG+w z10q8v(0DYC@PMDF+GNg+Q(6G{kO_1ikt1gyB;Yng9*sxMB72G&Knb5k=A1VL3vkL{ zrSLz18OP+&w@99n1!4ncGk9g8<8>2>x0~Wb&BlWNbRhVrp<|FiG$T7&s*8Q(k;(6l|JD zR6t>X1ROpwo1%rkVyU7Z&==qnA|3n!bB}pKu1YtjQX*LrrU+DY1KI+@;9W3ikVmji zNcONs&_*zBVE1A0DH7@Dw1#Nsq)PG?lNE~;LlvQc4}kOFnUI-aK0qme8ZZoy0z|au z*ebySt^p2!8UQoEmRM(q+ys`I7>>(O9A*jW4eSj30Zb6w%Rk4TFo4kiGT_qRF2K&e zDq#K3?ZHkkSK>GFE66MEE9@)AEtM@aL&_`WE0inZE6^)}YH)WXcPw{kcU*UPcQki! zcYJq*C(0|DYJ7J{cWigqC8%~78%TYSZ0Kz8Y{+cTY^ZFoY+3AtTZk*JtFKp7$WKQo zd`%!o;7FiIU>O0>{&GU__Ja0=_KfyO)hHHF93aA5wp$Ea@>_^o4|`JFrva<}F5MN~ z^xcB~837soxB*?@)&aQw?A`a>Cj~9si=c~Oiy#`{8lW0rzx-+aM*~LvWdf4A;kM#` z*EuFr66DlpFsR=THjS%Ot@(Z)|tw-ZbVu+sO~5^1?%shZF$Cz+APM11Ot+C*k*6%}1tWn0M_ zT_o!FuIr!5w3go6L&=)vYeR;*_rG&nsZtI4CcBqldr~9q0Xa-Hwn54%b9RdA&rgOp zn{>xdkfSLo4^weHXX&kGiz^MwBBv5iSWaBXQ$}HZ!F2bxSeXK*zT4=E#DdR2I4pZ1 z0eE?duMRYC1~*EdO}tm1E70j#f;lxHIRJ|ck&s&**0IIZNkZSj8c0+<5bL2n981qsKxt%ORc?`f%jIgV(Zv;Cj+5#z-`v#&qKN5j1GFXVPLA3 zJm;acHBn)Rbv2d}RB_8;E2BMUTT@#MtjiYWx5YQyb)Jx-DZCf!y^J{uZShe{V&w_p zKjjZVZd#($k1v&~@-vQ6-{nqew4NrUKGpmoEv$H77NHL2!ShzKu=EJ-hq8D#InR4 z;SX<8c&B-Ht#`$O1U8thguh(Ovp42W*vlY^^i8ujEnF*jM(7uvZ!I3Vcm{10G#!2S z4BIHgzZ38Q!CjKdnK7q`-E+si6cBo$ydyu)t0H;dO4pAV8EgnXT|#`VRn3%EJBGn1 z^A(EVQBErLr_rc0@;EFZKOsq1FrW39>Q$~tfOMvCrykE&0Y;EdO~PsCo4P~R*FrgV z<$QqRSDP%JP7CI`pcG`}Ch{>_ZnmiJVf4~+zdM4UvQCy03d}^k=CVoI-`^g)8 zjkpk}-<)c?)5Fbb$C@7L8FeVVJcfRW^DXMQgAb{8ULL9Vri!;iY?{_2I8BgcoHJ}p zt-+P^Lj#sZi9AibqA`;Z3fN9sxD=Nd8Ry1gk3LzJ{I}%AI~u zXxT<}0O=fzOFfNviC}N0HZZhj*ZzcxU7Nj3>%^U1M9}$p^+e!_Tz45U@}5PobcirOrNYl|KotbS#J`$G7pp9cK0A&KP_31KXA zX#oYz0ENTm=80^;Dn2inaF#q~%RKg4S$cS2V>-HIfVS?B^1V~O>^^0$ZDe#w9zNFw zwF^DJR4;Y}VD0;1drE_h;A2;+K(%`cjqkUu?wxOw49_y7qGWjI>oTRfg)955s;}sy zgr|Qh(6KI^EH)MIA8r{X%TLfAExJ|rE47i=Kc@K7Ea2SVd>9)v3Jeo^+3H>N>zUu< zs;6!M?R@&XGka$%%T7^S%d)C;#HamTJ1?-YpR<(LH#&INH+-DV>wVfP++O7g^}IhA zwIDlqU5=*7jn%HhBVHL#m*`iWOdiYo>Sp<}+I~7_ht^ky*3XoNaLPBh1XN1Z)Hpaq zT&VI6GBoOyHfF_#cLmq870yKPse1!tyZ8GzF5iVH3eeH4`wNzBSF^UHaN`rX$bgw6 z)W6Eh%L~fMvLNHLx$V3CLCc!=A_TxAWnK!B3W!^b9n>APhv%phWM=LRF7-FOKC-O5 z9?K@bFET9bySccuywNo2nNbJZoa?Ew9M)OGtBhQ@YpIp}4xFP__*JQ(O;}nv89XvC zG8yc8HtFmoGHT$foYmw;Ip>#no2F+*sf77u--a~e=`Jy`;9$tlqn!!>ZQAuMXmibBEXW|$3zo!5}4elxn=`){QjD9 zH#|h=QrK)rMa`h5xHTzL!&^UTm`|+c zEVn}TB^e!t%fNZeK%yALb2^8ShNJPQ#VX0_Aq$T3?W6TwG|T=;g7zCRu%jbJLYk^P z-RmB5t6~Jddp7~<`$;AKA^B`#`dgY>uv8*?ey;&NWX*P5lh&N{}S@VNz=9 z6Lr|dQKdhDr~tyU&Ps@2=EJ54Huy?slE7 z`pXdJ4Zx>?U^FA=5RP2824qmH(QGn~q!$`1FZ3J&0wgVRE2xf@zn zaZ$3ZT!TXItP-{oV7Fk3%iyP8%3@vA)LTJi0jY>NC-K%vSF){lWx`h@P@@T1sv)sj z_SQS6)}Je4RyCLWv~``|;6*iD%)N2WD>TBr==b)m1s57AJ-z7ge}Oa9oke&P=+q7dq8arb^qaH@+1OgwAkVE^<6| z-;AHHw>tF#wkk>a0#b8ART;kZqF zMO!r2SeS42qr&~#i|aoWvZuY-$QoZxzn4=&=|4^$EF`oNlw@(KQlCweHchTn$HJkh!LN+QU}{!;QEawwQzF z`7N-*)@MoomN;Wl$OAvd^TnJV&gdAc~?bo%z;c;R#ms1g^w%O~N?NJXujQziY%f_KW|6HI_2Z@r-HDOUN8 zm{ob1huRmnjQn)V63N%EUu=lt2}BHvP+4df)Q5$F+E-$^#T290_DpsshHRaGKyVD7 zT zyE)i+#=!BhYM_E*?ymP}kN!cTXc00?k(Xm@W)$SKzxW;_=S$;2ykF%B#ZVP0=dI)Z z5~Z?OGNaZvFuC7J#4b6nl@Vj-(nPYjhR=5>yQb%Z^(1@Dc!IFEF7{nOo@Cuk>W{~vL{&+~rB4zhxifuB;?9Cs7N-BhPDu5%uq_%>$cm7y zXW(6OH?X<8DLCh>sA*YJMzFYlv5=Z}86G4c?J)J+7tS28{(1_H^$A_SvZH8SlT@Vp z>2mC>W`#VoZOfM$~u8*zIVnP>CT%|#w1)WLN#rdp~_!V zVQ_pQ5GUSO<90+~Ju3!dZ6+!uDG-S8dPP!NKA3YZ8j1W-%wA7pM*ypc!GH zF-^`m+H}O+ix7Ck(E>UtCnTiFNS!$2d|Le8<@>}dT}KO% z%B6f zThCag{xt0>TD)ALtEi6ld})5Z9lv~jQvgS+N1dYAE<1ju3p(~;k-Ra)(Dgp& z6(w<3jn&sy%SM0ixU;wIE+rJmYOCQToVxoW(F#q~)5Yg@8|o(Ibok?4)#s?u?NK>m zHJ^tkS-GXR*BH>z^KJjbFEO#+^Q)j(c2<$+b}wTY$?C9*F<4Lp3!2=+9JF4M$coNJ z%aYlC_O>B}P8ESo;+>^nRKyLLnr^NtC&cw>Mn~k5Djg<*msw3lA%-#4%P{a_b?4e_G<5yL6e}ickUtFeU>EQIb`IC9T z0_H#_THLTW5_82LYw7abl~gbH(){h-hhtGSAgj)z+RP0 zP<_~LtC&(2VrZ;lCwlT)pu>&X;?e8Ot=3wnCuNcNbu~G6h8b_{+e=mH1$rGUS5Gpr zS7n2HK_s;p)gmUI%Dr7$;F(PwEP-*0u|4ZR_#6Vqnt=)PLXne;?PkfRWcYsMu8cU2~yl{PPpk- zHVh3$KJ`?+7q97F^!h8mcCQY$9mDC8eR~t{{iFX;&8U2t;Ys2#th#VHqJfMb4j z`oh8~(%T^{$%z-K&Gy^hBVb_QCSX2xL1+me?lm(YyWdv7lo(zW!;{kBY19?Xve|lv zbQV_96|u|v%3I+Nd))dqaektU7jKkt+({n8hP)?u%$T-gTF5?f@shoj0F;SIpQY&s?lBPA&dd+$p z?O+wEaA2Zr(Ha5X?W=w|nw7RDM_s(c1;q3W6de6#(+)hV^E&92H zJ7i?Wyips+5JhG?Y^^SS-NY{!t$wvJX|CHA3^`EQucmAG= zc!Ix2$*Z*#5o$On7Hd;f%V95TR|Q1bc}P=2nz7YY+0q&J@*G=DM~1HiSEH0+lR~h zs&~Q05-#b&LWn&8im)Bk*S0mv9JfM%&0}6|ysl421gm#PKA3$9yKVcIM4q$QAX0MU zs)YJMuXA1}Ix}83xS#h;WMc4-ac9W{nHIrmZ)A6vd>#v{QB+6F?5>r)CrR8YMKTkO z@;h}~8Mu{o=3$rTmdVa_Jqa&1{FwbyVKfqAw|l; zXw+QD<8{qP0Ow6bopABju~OU1=3ad02Qk^T$h2wri>vpe_BFK?7vaob-x>d0Zn*3! zJg~Wl()eGTKGWs#V}F(_o3UidR-tiCH4T;~oEX!U3&DQNRn$xFkdjta+*b)22t8_? z-qY)n9Al!o7T3$KN`Ju5?QPUc0$ zD&KylO@2N_zdJgq`*@F0+y!&2!CNX_TA;y*0$#EJQw3S;8)iN-CYLJmL7QVMcnv|B zCWJ7C#3lZ>>B{+KlX%1Hl}qGve*LwK5~qs)_s1qqymq|9#`r}TB;Nm;NcUF`?d6z` zKiwTsn6#I|zr0jeWB%aeWe)h~{AI zTM_0WkS@sW#CJfQiuQ~>Dj+=XRGh#m?|PY7n~QbjQ45>r1LnX~;H;jzFcBqQHaF%> zmndC;UM>PC8l_%>wP*Yx&|#U#C?FBO5T~d7ecGq3Y4H_J>7*?uwq0@!>=V2|HVP?T z(57lIQLB)4?HT<9h4q2^9Ep|K<$Nv2B5bq^x=ZgS2`?05Eps6rv#8ISOL}rSR_*>H zUEUoR2Gx9@JCQ79dNMxKC0A3Vo4)N%tm&Tl8A%vNooz0Jg%m58uXNT_r907}$#Zpx zLYJ8BlmjR~n9Tw4i9dKQ4)B!4?re$+V<)^C1p5{LDohihOw+Gp^h)r^GP`_e_YJAQ zn(V(nNIY?pmVci!ADr0Qr1qBls|r%r0w$;Zz#8F(7S^Wy3fV%>3sI+FKLWi9A16GQ z$1P_4YjcOKs=Q^wm|%-DuN&9ARS>LTS&$;GzDlK>g#yvTb;XkOu>>`wI4+Y}XWTZw zqr#$#BtfDuBAhl-H+KFCvXKIc>Vqss>$yN<@&u5z0Ci_2*QTkn3RR_4qi zav3q3vzPcqW4r;WRJG?}tUF2@Ob6H#!4Ws!3^=;vDPPg5NEuT>a4$K|p)2Gd7r=68 zcK3%f`4&j|6*4>|tV-D;d49Thy%I|X%xj6ir4!mhHqOd7)q!(BWaeT}*Q3KB%hp{a zQ#_n|)Rde9fzODf^Ec(WTYdN;ts5QdnI-3kThH_-PPKCvM-@mb>R7 zRBt#&ubm33dt14F4KwiN+z^^q_~#>cwXe%Dvzhs5x_985`V^dm(!d3iDzGLFDp$XV zxnx>?uPLD92#PWVQc4fyVLlnX+3s_I)WPDuz?z<}4~8`-Hx~2#*XIUKsGk%SGHG+7 zuNGEl$9(Q`nle?2Qm02$6cU#q;-fZte)eCEF{#}{A2;+|y>jQ*sG~ik-01SR4cBn( z{3LvXrexZQf~ScBDp%ZQWWU0SWt@nWX{Y0FHar%UCL7zIvf_flYrAdrTq_Z^TD0pY zC72H6b=voHt2kUZDfQb35p&!)uEsAR+vdLADjub>(W4VcW;lwyswhy%udPz+dF{yNLWGX}-HE_8- zUF#s;vewjw)78YDUO@{LXVb#Enpv6bn4`O!M4XLoB4(&ky@fmXo;j+g_ z!TFnULrm*}NMJE)&J>FpV6zi45An-Z*vB%bi<1cO4Gstugl0xDVm0XP*;2X4#$V>D zZKJUIe1$VaY-4HR*OQZJC0=9#k3Zj4%ScIb_n_jPA0^Ekg|UD!9Z)qo_XxP9PUq9# zl+i^V^Ttk@%;il`zB-^jwLXH8g*(r_(2-~n^7i6I@MX(V^98zPwOllNcAp8bAoyiD z?$Gl{;Uat^HO1Qt&O0*;JbM=!s?l$*f|wc$dS|!D1_G>ee51mI4J`Hg)q6l%aF@ z_kl|DTL~BNJMSrgTC#h~_bFTeQV^fp@3%qy3gHfl{iiy`zbhgR5 zKA0XlJjLk^^;&}mr)})^xdmR@4J50X>fn0^HJTbn$>K55^f~LRA>A%f82!AR1eZKy zg_sMu4FqXbaUTFOmdt+CC4|$R`z>z?VlkJ_FHd2uxF`u2dKUCnz}jn zq`PKVxqRNFx&dW|+R4&n@!@yuM77%xw`Wfcm+ay|eF(Ld(ZoeJ>wu1E^)lk85XoVU zE>d@_f;&|vR%Lh(T40YZ$j^!ZCz1=gpL|nuw;+@;H`bk*@u6b-9+KdBs&BS1Na$#n8*5a%yW0 ztrW`$T6JJpYeWZs@7SNw;C%~RXLGvlcAT{d8VuL)wXLy0<5Tk9#rv{@^scH*GPOJZfIOy$cl$!DbQ9@ph> zq_)U&Bxuj*@8zjyY=7Zcpo$17%kuI1!c236U5*fX|8#}D9GHK4XKj)WLIwsG*@Yy$ z+5a@EXcy3$n#pG1bA7^|eQpjQ*VpTK&6hgTdjlV<`fsj4arAjfpTr}lHqQ-C!{aF7 zGfvNczh#_NBpx1Hc`Gs7apJ1$rKqbOFoq)4h{x7auqx~@b`%xa9`1ImcBCCj(Oi^Y z(a_gCTdRJ z{5JOkBE+XRckN5Z!Xk^pAq+)#B2Z5e?(Fce!!sr>=45JHtv$Uo+4ID2z;TR8$513mr8({9NT2SKGhY|OS z(QUVAE`jBgP{vHIvr$npBJL5!1q1R0Y7~#jxLtiHDl%N8m6V085_V_$J%v<@Qz~_9 zdF(Cl*Hr(Av&r!Md)~AG+t9?9zIt|#@XUBh>@$glD4BvO2HE=j(cQzcy-?iXXsx*V zJLWNZUN6CZ>_$V@9!;_cjmv4!MyASfcz`Yv9KU>zIxHqq!y<$L<-cy{&o zd4%JI&`1AJ#=Lrehy7jY*pZV>?O=n2_%z80Nsa*T4!rf`(}k(vN{N^ z$nkE7x}{7YdCPrEi@uk9hhDk8FxIDcn|QUN%~8|zdChCUU9mwr)Dg=w_%V1Kvv=8R z5Fl&GDy&~y1JfCSS$)C)!v&(MIFIEofnvm@;>J_)=xUFbS$XwnZNuce#vr+wuYIwL zrWFi}NAx(wCrI2RJRoMX48}t?xqWMXzbaZ>jBbi*)#5D9-vM#7y=GP%jSAMZWLDpJ z(GzNAskB2%`ba36p0kvB@90e|YmOYA`Ej_;BH1&|!q!-P2KzAnE?Ma<<1Irxa+-Yo*Zm@4wL(ja z$K_eZuMj@10QOh)+LdhZ7GpTWT)Hzx)BvXqg+;jf_&&BOGQGk$Y^#(>eXx$L*{s&q zKJj8NINB}fgVmk~%M7Jlx@P*3^B^#eD#DfZkGoJEkbxN$>9t_cU^bWiqS;^@dWtaeiQt{OBe zX4hI?jD>y%l=vPC=Gu#y?)5fK3mJGUovG+mOo)u=Rg2s8wziSedPj#aUKD7go>-6c zPmPfUajB}*@o|-f1X4lNCK_sP4H4Wq+IjE@YHep3y`x#Ag%~-B=~f2GYAPefnzK^l zT~s+K=|^aNCpu;4as8zWQjH5Hls7TnyELXq(4M?O{~^A*PaYgC%`3XINg6C1`s>LmdU{x8a9C1AKbKhw z1KPRZxsdGE#b-OqG)=u|?D2^&mg*jRle(Jd!H0#uFEI6uh)p?S=qGv2{Kfde(0XNM zUdVj49y#Gm3g^N&YcmtA@piERy9hRV{TB&~$}Lre@reoEsk%z3#7M0OBTw{;rj0@w zfM39pE~sp5pQVCwmTE=6Yws^dab}pPoVwLUQz5|isFYga!U~GvOsJVnw^sHKg*53(r4L1_&Ke4Z%Y@XM z#I)tm$_sQ7tU9un4HUbDb>gY~+`fofDd)NZ94fy71T?v@Q+$+!slio!vuT&PAJsu5KeF@3J&59bkYtr zK|4ZCIDm3~5ZLFq;Ked7cf^?&Z|ivv`H#E2;d?)N-Q|@SUi*E{Yp8zimwsQ&58~~g zqr8Tu2w%wten$yMiH^`aU}GR(IxEE8H1G1yj|kBrpNNm7CqKfjqVsQlca+`iUbH%) z%ic1sn!vB~+(|Imv8!EK;4T)BjByQnKRjQW0M`S*U)-L^-{63qt)L&V!alyAX29X}!1oX6YkPVxB-w>_tO^dWH!us=JDp&aPd#Z2txu>4Yw>IDpSUY#i8l) zkpm&Zmmh3)?! z=-PoA`Gvuu*A_6z4$KtCFwkF*uAA6j1@gy{tIKSbm0e7gY{&X>|5}s z;UQ5F&-A#g_C67!ZjhUrY&?6gC0qpKPTTqbc?7ZjFv1&FVBrq|Ij(?@q&yP^xdLA3 z!m)M+0rEk#uql{=IsyaVGz~NDJlU=mKE4V4@5Zu12LWth-o$>l!wTNN zF#*|Oj`%3f#Pg)O41}xV#~n;U3*13`NmJ3#x!|DPvQR51RodEY+}+dKa3NqHYm}qq z_%UoLYtt4x7{Lm}b<_OYel@8)?WV$g5eD4#^!t219_^dvuJIv1W!7e<_z0%!+*c=w z0mtv92mR6P6ji=FIxdaY{`N%ojFeSsVeFWDE8K37&rH7+u~2ZNZEZ&%HO?65HR zmOheAH={yiBjRr4&s)%)2{}_w@ZYU3rY(0yLZ62z01YOa=_r}# z#Dy=TjS(-gP;hdh1-<3_g$3r=Xqelp#BSn&ugc#{$aQqN5rc-oHxpwo`zw*9?~z{Q zlCOUSd<{&ktsN&058Z8;JDg6YN1yu#5!Gop+pWBiQdj0F9)xS9K{FW38}WWWEC?Zjpj$WWINud2Dk3V`PEa zja%cxK0LUYi1zE}1a9_)kFD9|9(3rvHN` z#`@n6{r_oV?0=Y9{ufP*g`I{U1~#mD=#ha-E^-pei$=KF-jcEgaieR0Yge5 zrNFHD*OB&(f`T%xi4z3H1&e@{V`HpXZdH3Vr_dz=NZXVvl{Yk7m*KUvDyx0nL}fT&C)RAix>cmZaFk=LCN2c;M*EB5dH>t*z1X zHQltA{^~aXa*_0!Ikt&~x$%X}!x@rMVs*Q+xbd?e1g*=5?sKgwq@91?_VSY%zKf#x z(+PrE;&l~TuN(bDiM#RcMt`|%V(`TV)Yz}Hp+z)xVlC{`V$|7dt7Ysa_s+YKxw`%L z{@1^>z#&@_eR#_n3bNhtJu9zNrHo^Bf+^e)zb z!ve~(85sSZ*-6j)Z0sJ-6|#x07XlS<8lKNir!;Py&dXP@5wje3(cFSeSRxXGf6xl% zKT!P&Up6CVcx?Jy5N!QHNkvACEuH$bWd>ODMC$nva6qRg_a0zz%D?s`t(VH}o;XiO0{$FXXQ1o9S!$Rni(;PTb&E_23>#I+cq-c%rR<-Y ziWfmjJTO+hH@Uoak*HwPeEe&OC5VDF<1dwT{L1)Vb12puDDZ2!IUy})%-dU7CXA;j z=dRn&y`uKTrut6q@YYRyMSfi;PvLP@RCNcp{b;2~5)c2c_)$i(3kc4=%EAY`8;LXjc+`dnb|hXS+NCQFPJlT?a?l(Q2#&# z{rTnT`gCo(yV~j^Wjbc+%4RB))t24qt7F*SeCk{U_K8}JMz5vGc5GjynPo{O89ERZ z`uLymr>-ie*>hDvf57Kv)+OaPO)b}uX>%IPJ4v=`p{hgy&>PG5IvGdC0*cDfD>^-$ zzKb6yW;|q6Nm*3Y`WT}^dwAqnB55GSns0=097*6oLV!vR_yg4a5mqmbPs=(zJoE)t zzqp?2m0eOHahC41g$i74THXGG0et)LmIAFOT}e6C2sYzs<)@0xdw@5_ zJa!y20qWG#3k+mCG^lJ?JIj#}(W$icdRdYVvPJYVTaZ-+53e5ai2y1-bwzoE<(s-A z`2r?geCRzsewXtq)RfFGqsm?uM^knxk-CLEWhQoa0!wJ&htH+CuEuTW!ZDjUZ$B3q z=G_rMN)1*ne^^ChwXer55C72@ac}NqI0cSO%M*`236sO7%qm{#FBc)$Io{mZGsiJ| zgp7KmtsR7t4w$JN4A4m6XodRlW^Oz?hamNdZ}$oJ7qZ@1S{0eZ`^Lrr+^roN^uxlJ zH#ydC{Z5=@$BYauP7KlY-(O2hhpjAo=5<)%`oi_irskfA<6;@JVY0^|A@)t9JIFG3 zY6(D$#x1|ID+jtF=V>z(QG-j+iq)FRN;1w!S72CG5(BJh{DfX1lA^kKnU$V#=>De9 zjy{HQR@y5#TK*oWzEZGDEJZzN)PeKzp3^7psGY;NwKA?eY!!Xb9Xa#%7Tt!n;S%Ab zo1|GWy*R8pjmKKEBowL{3hi~oV^w!(cTcW1KjP+~aVMY)X{11k%3y|5ExN22s**6& zsYLVgj^>rbFH8`dlgP$hlEIjWh;qm_&xuuzE=m{laFo${0tF+7tx!c#7Uk;BA$kaA zWO$yfKhnmrOK5O7A(9(3V2`XW(25c4#(8di!~Vh$0=)lejDE|gy_sM7E{476_ij$6 z&|oSTy((6Y41mu(K&^)|s$<;^r>Cx^-+#J0-~U%G9L)TZt_1*=3F8_xD7->IWy>n* zETU>)nBNPhe)YzlX~gjrffU-wD4||2X>wnGHY4_0CtHCN4;X!9f`Qpp3O5sA`@*IcgaI7$3!GV=4!LjPU4g;H%Sr2+D4aA{}4`F`_-RHqif_UzB$Ofp3va)Q(SzY4^hp&thhX{JZ~c^mbMP zTJWMjG}`~*fm~9#AxPo$x(}oOqe#@& zaZi^Md7{IfnbJQsld}`<2(5lT=u%|vix#m2JgC2_meor_iuiis)t{n)BqNBJgpxQ< znD%Di8QawopdR_k5pK=R`OGGY4PrfSZ$`XRyG%jzoGQk`!Cb-_DxAzh>*)^aczBAR551-jslpymBo@j+L5eH@yvf4P(mt={>kBA!8mtQ zXMh!AknG2Xmo0618q(J{ z;%>BI;uncyJN#$lmsRF(EAX`m3n^iTG)AQ+u0Iyyn~1<6uyBKu;n%gPnOH|;-jt|V z{k*y0FN6h~F?MC&5S^gsV>$0GpQNOWqXF2rxW#kGOZ&--coq*Ix7T3}jiFsL&dQY7 zzqtbgzX23G_01Gf11Gy7TKzzNp1|MY*j^sCW;fF|U3Rn;6Ws%%4~wr3=&gjAA4s^N zp}I_7O)w2=53n-79)oQDc*xUX4-UA2Y-3ksr7&gOCK$TKPo{G21F$#^Apcp#@ROt^ zO@UIrrn7DU3t=~{k#l^SdHDyqcq=bjtFUY0qwjHrBXR*N%&9~~Pr4f{b=ZFfC8$3j zIolx=jt_WFC^*QdYv67BL0+7P(_^>@!S(VaWwZFs4BF)}@tiKi9>SAK4D4&vPl9b5 zIKTt0)k}UEpaZ7WO*#631o*-qhZs+Y9)}rE2rmgv30LtZvSMdt>Y_QF5v`+5XoOb< zXE7(rgipgvlnJfL%*oA(%}LGk;ieV|1!skAG9g%p=dBP}5waxe{uMO^hy~L0X#tp^ zgaIs|l@M4E=a8yMRbjj6rXp4-m&lNyt@X6om{@kjJz7jgY2@lSK?t5E11&Fac+vBA~&bV9e^g zF7Wcb7K_ECTOsNa zzEU#y3Ena?#3Fv81EfIt$l~!8u0$%j`ER)ya*5qjc0)mD5I+4$dgX3;40&a4Q2=g` zF~eEzbdYkxS*Q?l2;St!+OfCn03*;HGWVoi#DGvpyaHb_h>-9%0ssi;3-YFzeHwx2 zAK|ZpqKVeBXPE%IxX~3zI7IL0UEP3M#a^$nfvv$$D1Z*A8`7qLJr}?Rv=wO+$%&cR zJAL;!tq^RlB&`s2Z#bcr-~ytCv@K*;3RDeQ8`Vh|p-0?ESHehFz(`lt z9uwAw(xoj(KaPZ+EIyDy3}O!m))f)fl@I_%;=vp3i0ASH4Dbzbf_z52BxMiXwGL=B zgyT*O=3ySN#qvZ4$YvP$JD-uC7QT6wf4~Lc1zdq{fo?!_K(r)KYMJ-Q2E?QpvG$t= zJgbj;K-koypmkQ)CpjT%8d6s2F)_A_+i8T{L|3|zVof)u7BA3Ewnbs739lQAn@vj~j`Lv)MCRZ*l&)oOzY zR){IcG{nb>0i1?(gzD0D6e&?JCGt{4EQ3{KWf5y=gv!I#)=G3yYyA*_v}(kBB2%r< zI)8oN$>9PQJb-q9V?aY1t-m~0;}P*>_}VyvDe`2*+Nv;T7qM+2{36fEQF=ya^gal%7 zQv1ZB$UX@uP6$PDIWCzbY5hW9Rbc49>3zaPq7|5i%dimdX)eTqijy6X0%Ccjvj}Ap ziXsU}3?jgPfdY)ej40S5+<_;+)wf3+aXkD6V*Etj>D9lZj(BMN8koI)!j5#LB46NB zRByP{w}S1m92-^a|H5RlOlc=<2#d z_f9l?1NRXgjW4Cga5r*wT~S9EDL;bSQek|<_rx^xhF@TVC-vN+M;bJIgZD5g`1)_W zDL3gYp1G_p9=VJ%55DMkWt2M~B=F6F%y2Mm!FcL@{6;?G0yV+lkM`eGZQGoLo9%UBL1!dx%W4|gntTohkFXiYwdCK zvujdgVO#(3{6~EV@O8hu8|coN)EwO9=q9teikW?upUJ<h1jfsE>>xk& z*m{T&1IsBpafnT_mUt)$_efFBw=}7!<9XU#q*f%2{P1YSa6UO&(bS&l)OG5FU?C*UWBEa{RNAHLyQjXpnL5ep8SyIi`&v{px7!if?z-+I4|BhcO+ z{b^>%=>Kj{TcPi|Y#=O^hr~>lD#WZDU^r{pV5GuM6@6ji#5jD!DEuXY^;azQeA4bbFRR_Q5OF}TATmi5=o6a0onRcRIw0z9sdr95DQd|}_!*Vpp(`PH@5D`4n9 z0U0&s_V`vca`#;zysioxLS(-Gc^XOo!r!@I$_$dz`ulTxxZM>0bJOsQ=08_S{>A6Y zWBr$RHTI3D4W=e!WY=sK=7iV8MC#>m1c-V^H|N%(-@mq11% zBch@=!;Dp|a=Y5mES-|PW0;LG-7Zi!)pM02RfPi?#2RbE8HL z9d7@o;Fs6H=af0S=xrv6Y)Hg zdRqC=Edp#QuB&i7bs(N|p@9cz#z!ilDx#$c}=_uov)*hleiqSdc zE34>+kvPvUu{d8~-N2PSnOBz2-@eqKL2La)?&#m3Glef3a~G2*JfCafGgAk6JHb8C zUat6_)(!ZsR5ph*4{f>oHl6bKKsCc2$nP-sE)lUqMF&PQkLZoz8qNt~Xu3Sp92PH! zx+H9KW{EuR#3$0WJWGwpXB&JCN3yR_DV?`t%X^EOS?^TW_NqOc!oI~(ia}^142DexPQk!JozETIBpw><0@$4D{12G zKO)~U=LpX9!S89j(hW~9ba8{s7u8A!U6%SqIn;y%>4#(Riyge%V(VQ350VpSkMm$y)%?#;TZC{3hVweZdiHNxq5GDk;Iv5P;8rBVLT zEWQ44GNYY8tC63IavG(bLc);_imB#t9Q>juR-Vv`VfA@HBKVaNjFQU9| zRu{-qEJ8bPMeB$fD(DO>pqT%UI2hQGh^Y5j)|$M!ena3kB{~K!<(o*4VURrBp*TQ}WjFFX z*g=lXL}6cpR^!;9xeIRo0Bf=5P$KO`G21I>cyd~k0u5fQ@S{0b@-DaK(4hTRukV+*sTXqguOSXdV9xQ+aOCtZp$jNhf~O+-75`Qn6g7QhArZ{`t*fnm818 z;r7LTlanOvl|V_$_7y04hBu=e%Q2R-r{A032E(5m&{~DuV1TTZO+1>7FYRh#oFPNk zD3N+sQ_b?Avt`g!$@ZeEaI7PGgg|P#zT#?lD7C(X4?fuwBI8>#IKX+X6v-er+oGN;>UQ;4v*WUC6 zP0IpCxsU6@rst)wp&iY!M7DJ_L1EG3Eu~S!dJ@w1v{6JTuRd4KvF0C|#?klU6tG818ut z!z}$fZO^Ms8WV7AdD;d<|^-pZoZ9u$E47e`wQ1AoQYyrbO|a zMUfM4n|s!eb4r!liHF=IlHTa3!^IH|8n%p)T{)KW&~5;cxp9v-Ogl6HR_WFv2u1+C zvGt_h&u7k677jk&JP3R^jt3<3=Axn1Dt&XWMj{~XKtZ51!;+hFx$;DQOkyG6+FH z8Wg0vr3EAvBt#GeB+uaI^OSp?-|sxM6yj#p_QE zLCbF+XQaN7DVE@qU}`lw@C*ec-T<6=h2lDt6O8s2nq^WmfCjGjM?Q$#h@>_7U> zf4M`>o4OWLuKkvkC>bg-VRPY98_p&6>$gWmGI^6nR&SdKp6}=%mF$+_aQ90PNMbkq z_9{l>(_r;-6lW$E8&kjC7~ThJ?aiZ>>Z<3n3kn|Ho>lufmU@Ns!tg55IMg-r{BU??|Y=_#O%A(FjSbsoUjl-QW!lfqh0>x(U$w8tZfVPCt=jX-#>qP z@3hRV6FlocdG$`>LVBRMMiYtqS~l0FgwNW#KQ@`}WXcnil}_KKJmK>1xdl~Sio?Mj zqQ_3F9-jjV(vlT=_wQs>XJiW3x;_3-hH$McdYR+>4FlnW_(|gRna$^bdHcs|;JW;4 zMDl4>^xgxz$P%j+au2g=KoPH%5)tlSpe$-CkjImYPP!zOLK~8XImD%>jZMJkR`{{CzS9SSds=>~6~?=K8b z7c4hr62z9?@%m;8e2e9^7g?=QsV>N+80l@PE14d0ckxck_v6W@&i5aInOUsXFMqc9 z&^BtE{nVH`zwh9*2AkCC4EndIuPU^ zw8{lByb38)4q7R1P;n5(OS9}7ka-ZOJDeyc%P@T`a<%BmGM_1MyUuGQ_SVp9s6b=? zA#aLSJJ?w2!IB!G0fnP+2;7cA|_9fX_4zt?)j~_nI zns7JNt&h4a9uBkc6RG-$W!%VF_$C-&?Mb-Fmm=c{?1r5cgCrJ0s6y z#__`=mx$`X`-VS;NgF9cw4Rg;IeshO7_yvskCJIOBH1MMFw{r+KYOaE!4;EZg zAuF4y_d0@=4tEU%d>1w>MlvfxDw6x13)j9+yf(a>HukZ4Ymp|pu4W+XdIf_Tztv{Z z!A0L&ZR4;H(QT> z+Czk_WTMq9kbLlt(|Me%sH2^`{Ou;r2L!Z@2yL=1q|lAm<%L+SEro~hWPLlf%}J8M z4_gjdo$fOR)1Mrh700(vO4NNX3@;j{Zs%{ftp1=|-n2-cT2QQb+4fpNX7347CCQNG zT^>y5(z0J*+FXg|dhxw=q)`WHofhQ~)rZ**hvSDFAD(JmO^E&Ggiy&`o6zqQiJms? z>FyWMA&}D&Jebcby9`sPiaaz9`JTlmdQa!|-gR2?l@N@fJ(z@B%fTW1dhu5hBS~e^ z&Q^gpeLa$U{HgV{O5)OlN++MHH%SvVXl3zejThVR&!#3OC0S_W6BQEVG4CLq3Nwnv zzVwMOXSzn+O?iPT9l@W|sM0fgtXC|!CjOGYt6bYkfbGLIRZhpL}X{mGWZe zJX7Qk-JW5nh5A3lz5D*Y=nXw0&-Ygp;fk91%DU!;NSDud#W+c6uKJKkyl?E;@K?>G z7S&|_I`$%8Z*4S7jVzFp`NK?iH*bQx4mYKNZpmuEn03iRbER8(E|}_X*&80W?8KDo z$y#Of1ed|QrTlJs)E#I)XT^m)}4=H;;AEz=p~8{IPhk zba_6*&!Be`LtH424`@`J_G`l9V@f$YJZ!^n)T?D(RO*EvqNCD_&lSX>#|zDqn5WP{Sb{(TR0 zKQa8r#y@la@gY!Sxluu#zI?90B=F8vDXx{Vm1^&QriV+zfPPkX1^3tvjUqBs2c zMDkU0@j;@b-NQhGXH})@Up1_06xHWm4$0A1I2F0-@y!Z|Cu0l@1?WGE$Bm z8GVCSj1{qSGfWxNZcRCC9RKH%8x{S-dygcqHa`-{8DcGdaxl7YbU%wHF#9;yRr3mG za#4fEv+hBugzQLG2@<6M*0du<x=Wj0s(wexP#c=%-a@b&Ev zQIA=5Vl)u0Q^pkXMQ!h=oMTX>5$bU6)<=8uwOD=~yg34HO6J@fzHfa?o%{%W3|3ZE z?`sTrI}m=_`Nbq8Nz~ILo%x_wQFMGL(c#?Dy-->eHhpz(lXqjfR~Uxha5p$#&G-BwYtrOI;l}vaHf(EGjNT=9mhJ|O-uFk;5WQ1vFJAqW zv_DaG+i9^oyEjr zo%ojc!E%Dm)y0$5JCz$U0|Me)u^&3cJk+oR%!q2jc4cO2a(pS`smXC^-1t+r0-_Ix?E#x5Qj*w&2N zqBOZ~Jk4CU$fkmCdu3s#hJA-3YdMtj^kHVD`1VhUTag*wp9VYbL~Yln&*U4cWyjBA z9~)$A(dH`F6fdp2xlRP^U;0?o<~LZ2jJAymcYNZsjeMOXf7@yqc{e<*@&@l* zu#bG*rD$&r`<1kxJB@mG2&GfJwhg*e=DQ!MziiXLJnZXzWWRj5ZQO#$^}~tn%G=V~ zR8d=siC`ONHAeB-RnyA&P9LJ0%zEN)omy3oe6y6FiOr0BNT0HxT$5jajkvCyEhKLEKhMxQqoyOsi#r(Zn8M`i5 zze~KAyCEpUwP(}lig&4OEIHrElK^I_e z)hP4!tSCjzkHjmB991-A)=GR?PEM?=%YJRpn{)k#2EOuaO8y$kOY#!GLKpeHga_G) z@0OfcijVGYF0b7F#Ke<38Lb$_DMD1urBWXC1*5^}i=XXhFN1v7>LDS;^62anI+_>m zQKv?}a~NY9!vYzh#3Ijw81EaL$7T+_dL9FxnEoL>GXrkF{B6wh!-|rc+Bc+BwH`@F zk!{dC^6)5TC)XG+vD4LVzgtSNaXOQJ9&`O|!`!dE`Z=#S1G&vU(VZV~H@2Gv9dN_R zs>_VSx-L1-e<|m@M?4@m1tY3-2%Q&b=`7{n_Ux#?)LTdzbjR zj|je@!vocICaurG;!?Lbti{#heQceT*B)K}h%NrGZZo!aEylcxqSNU+*+cIf{r}`LaRlzO<0OZSvMr zRPPI)m7r<-M&6kJ;0IIpPWm;5nxxkyC>`xlcTL7(f=J4*0|XzQ*-T5)5IuU~i-4^; z6^M{&^?O|DiB?rFfQvX3q$`XH{20|Sh?I$}O~7$5@Y>6DnkaM((^e?Zw|RT$p3GXV zDkgst;W?@ma>;$AU_3@O0t@3HYG`9vfjVe@eUxxa`+J$l4!$aGC7F{{4fg;RrG<6J zFGxYu_(g}r_aEzQ*4vU@<@a>uk=idE5_29M_M(#a1j#ksjaysyVeSbPh4jv{1S z2t_F1Z4u;&nYPrzPdp`~XLe2$YhH3$4Ytirz}eJsli>sVCi~D;+}Q$ZA#<6FUm`Ia zK8auJzi)f5Yc5~?8^f2hw3o|I@DH9TZ*!hM6?yt>#(kCV{Bg8DF8hfye~&cRd6K?M(k!2!PQ9>o55Iji zvifa~nm*_G#0&2^pNFVA(_sg+UiavfF&e8c^(gh2FuW!*i=CSk|J;Y*CaP0P-_|bl zDG#qLm-&;IWh_?{U$Bh{jgl0wlVT^PjI1)4yxc2%GjT6J?Sgs)2}-Q3`fa`SJL&`Z zn3rYNoo2|SlUH|%i`7*#>HSf%Mx59{99LZG}Lg6`>&oI?; z@t(1ZP(+b#hpFW{Gv|=vF+5|IV<*0d{ZQv4K5nRdgCw(T+bx@mp`2N_iq4KvG&Z|Ap2)bY-7y9H1?GRTs8_r%Asa&`Y%VVr3!iFI{{R`JgQ%2!q*H@4?QNx({Acit5cZ_m`LT(r(a7o#;HhCiSaT?yh9`trSvS}hIEF9#sE&Bvv&I{$zqY#E8$_MM`0O!hYO~j8j*mg8XU<`YdGeYC zpO{3&D$2`mU->F}<=mde=PnBlyev^vXTodwBsooaf|o)egX+&|E*en4EW!M6&@STvp-SBnOvfo66V%H$E|$ z+^@w;#8`$iy*W*02GWjeTSFK)lnoulB56T`SchDic$ba7rs!e5l(2P)l|s#PVrty5 zr{|CkmU?(2iG{4!`~K$CQh_G6=%oi(eGxO}FF(;=y%_o+<)UJtD)+aNI}fvih+Z{c z4tX%-El0)@sc}0CzJ!^SU~zktp4(1CDfF?oeA6q%sZ>~{jPuz3eO39ME4k61B!eHw zKITvpeU!u&l#$cTV8@FTS=*IxubkfI-8v{g>PNF^Q%O1TnpKBBPB z_pOx12Og8O)H32F_V=oou2^Ijo4MzwZXhrjpA?kfY=Gy{j!8O1FUiczC~Wv7rXK%JSQjGb zMvn&47-KEx60>*W?fedfpwO%v&bh>LWAHb(Gj&gsG(DQI9}5EVnz4SK9Mq@P)oy0mR*ihARNckrdG=VMjtg!tr=I7T_@)l%_rsXWvuY+7`_ za-=a>aH>%}4*kfOoX_+Uli{@nhSe1e5)ZAXuQ|>|3R06T^0(HykP47JccUjyr&1uw z4VUC4CA6+1ekLT6+z|sk$?d+9i8BekC8d-J^9{2vF{+GGu~uaT*9hCpZ~b7`V$Q@n z_!9H5D@9d==^EaJ9ZE&jxmEfr7;3yEk{4}N-xpjvuS=RpBZ;LX&oB0PK#5+`#GMzb znd-trHG1-}{@jQ*DYZ#ruJC?S*Ox(l>^FL!B3dSc@K8}b^Y@2)&-oLh$dUAfu~(=f zA3tw9xA}oRkU>!kGo4m>NfK#k&Q9ijK^?>D$^7lYG*V(t=EsupjFQiuu*(aPWwxK! znPg81uM?ss`-=I4ectF|@_;$LX(&IHfq?au;3xKuE2Bap5%Qtz_98fUZd0b?4l%8r zN2IV{;g7$mX?9bCz@y^gxtmk4Yqk`8&otzjo7!)(u45+jHROgq|2eEquecmcb}2Id ziB+IP(|$nEHrPti zT378H-&i^f?%2bMJ@v<-?nwJ4=NEt-;J`0!bo9E^In9aOxU^)RMyB!ER#mp@J5F_V z?x5z5K(6{!Dm-jQxw}=!l*UH>KchGu>h;dIsZ*Pwu=hf!T+th+BK1rWRk2GbV!2s=q{&!F;S{~fJ#wP?PImnNw|B_ zlCrr^k{@g4oEw9=?E@65#k8z8Xny|w{T8*vXBgu{)}&MHeKJ2|jb(?@*A^6U^?tAi z_-7Pg#A<0-PT||x1|Kytz6g(&jeL}?{(D`si$ z*ZG&v>zqi>^Zs6LVj0Ea7nY&JAg+#R zbrVHI`-JPfb;`>Tvl>6+EE-GFd$9MKiI+aE(7)O`_vt>3$HRr<&E2&THX}~UY{jRV zFapLO3*J7pXb82 zQ0Kd#Qrgz;+?C|)lw)FTbFEW7FDi$FGxdo*MmkgGg`^Ai&u%a#;B{e{T+iv896pHd z>@}C2#Gsm}s9dF9iDH%{ii=`|53ev=l#5FWs-rpgV)ZM zcvqR0Sd;FNT3-BsOTIqH$#w_mwDfi*6-ZTw8J6?7?!xokKyEc*-(1_5)QlZYuIQLu9Pw5E%8+Om%Z1n4`4_^%2xLff&BUim7PZOuaMwhZYg2eM6Szr%m zQJU>;rkU6Wels4visrDCPqMJa>PI3HR^wW)z1b2R-WNm%a=YrE{Q zD&eKmFN)ZI*+{k&-(R@1n?0eYmsH(IE!=jHFw>D)zm|-gNck!xMl8dHS1NOZ>7Z0h zwcwp_1K(_+G()Yr_$o6?b@ueDT!Xh`qd&7}pYoc23#|!0T-Fu#@iH)t=#HHZnYC(g za-SRC7@5RhW;{o@&+dIm@ioEgT=Fn$T8AC7McVl4&#R0%6Y6d~g+DfsOs3MK51KgS z&Yh2Aqr6``Iw9C(y8X!cyy8f;iNX7qjsBi-SBN;SztBEMEjGN z57&9N^~7HtUZ0~f+0u^NOtXSTm9?XT;dWiz0o69>R%rB%cbh*&$j$F(mr`M*dO0Rf zyQl2P)$+Y}MaN^S2PLS8n4f(P{R%2QMsLKftxJ4SpDdoGC>dOsX>iFGUhE#YXgXf$ zwwj}6-C)z$deiPj``z&@64Bm{)X`FXH%gWdiDfP?rf1*ze6+9g!+KNbDfOdjU+g+f z!e&3Ms}yP3&zAI$$1;v3>-3flLI~&)r&H8&Ey~@;nlz5z?7j?mMm=~ORe|G0^pc;h zY<;=wek_9m+muBc;ec}4hY1~Xp`??@C2KfSg^+ous{xL0*FgdAMAj%zQGHmft|`qG zc!rqE!bAVCLy650{EIpd+c($E%R=@PL|k`t_at8EZagg+SG+-fxM0*xRG*kKc|=!nXM2NYp#Q=S z`SNF-K+4DaH}5F=?YT;8x>!u^l6A5rddE@9Pzy;MejN26#GZ6`)9K)~PJ{33{c^eA zxBhr1wIdrxZaphMM*Neh+w|B1S;PI7KzFx%51YErlGzf+pA0^9$)>-Q;rBnC^>L0} z-R^tg1O=d$nOPOD79T7qzVx5h%eG1&GSD`GUc$Q5c@#lWE^)l?sBXIbMXXW8Ub%3} z>t*RP`<<|RdlfT#;>m&O1_4&{E3yeA@hXCS8iq9w@-ltHZJtS&@Nd1zG~RVtTTHrc z<}~2vVPe7;@#d8i+3s}U!X(^x<57GiD(Q1;;WeAPG&mW_X4RXYucb41TunZ0Q%T!1 zonTT(h?uf}f`f!>W;$MATFtBAm!&qn&ZMcrW=PZcP{SMk{{TzStcD?qkYwOC_QJ}XNK(cj+%2! z*IPT^x$b3E*x#*pLbR3y48fx3c*%8&c6{FYsP>AkjQw&ec{BM@zM2o0-al?6dWtXe z&ic=-6&V$+jq{Qr9yZZpfpx<_mcwQk-i2$aO?dZTe0k}?3oGG7Kig6v_+j-kj~@(s zWuIJ5Y!dCX$@8_}_GHn_92pZ;8QEQ73cNZY;OF?0Q!ZijvvUM=!{GJGft-a8xCvoo z4$v+9h_&!j-eB>5(bG3RvSZFKR9+PCYU>xvlT438G`}SqR=t})z3HrR#n{_-Ehb#4PJ1K6wH)qg;drzyz946HE!&UMuYa{miIZ~ecxYbKD;V|8^oH}V<+bgthrKC3 zeARbFIbmOZc3d+$uFM%**ymr-Rmf^oEMp2tajls=X*TSLdV8##VJ6Y=A`2;kDmxiX zGCulV^Qv7U_h|D#meNY$eZh2z33GC+bRT8)Dv#LWVP!d7M`T?Zv;NaZB8o!lZ^Bx~ zLoOZP!xlW_LXQzu_EZEUm$PQ3+j)E(#)>OKj~;w>!k_p-SOe$O(VC=9IL56T%CZn5 zF~f+caylL1u%Ea|v{`--{DIU}YyG($lhu(%XRAYnpxyqK<$JA-$eU)h*>h`xbU})h zzx=U=x~OZltvcRE{A@~B7C&8FyCOHY`){Nm1Etq6-7BwN{po;6kUJ zqFg(%*pWBOcr|Nq^aCjGwWd_iFO2tyoXF2MZf0SV3#T89OOKXJkru=R+8E1SY2o&` z^R)FT;aK>nS`=oZzu}BwTmwN z(7*IkGQ#NH=}uk|7n|$z7o`NuW1n2EsP_`E?JB3%gt(UsepizljuV=3xnSO1_$KY@ zD0$5*@qlL^34HC}cNl&cT*tutt zS%{yTgLG@6w@*%fM(BS)F;@s)aQOaFAcnnma7!2Pi0+8&gWpiA1kRHNfU&WJImt7{<4h`F~TE6ziL$s%DydZ+ORgO54e!V z#V?rEm4_{LnjBK;r)Iyg*F3fzecinsOPl32H^6Nh8~3h8>Mq0V7doF!kAa4t0&nCT zRg(BzcUa9ou+q?osV=H+jeff5GSU68W7>&MY1-56vtHsLVaKy>W&d#Vv>4N5W!kIc zrd4%iE(#j*z2)ePWK6!I~YB)SLo;9fiC6LG1gBnzo6N@P$JNw02zQ(cr@sn}lQeZGnk?t^y=CTL5&kvz zyKWLyi0HdO*fF5K9krSsFzVVaTiZlW!8@cfoP@bkjHLF2gXY!GN;gb@+ z(g)vXe;p-zcUm-Kh3C!D_3AZ-%v(k0f6JJ)7uhOwI#>{=P|7->dC6dJvn>%{#fX#5 z1M9uqckqnyy{R$citpo4b zkH%dhA1L4Yx3q^azl$L#;EPkZ8$)R{v1c(kupV$fRxMa%cTF#VFTUcYgvZR$NYlpA zeuk!dO`WB$9Z$usR!qS6le|M6{D_x26QAEit^3Q^=d09D&eFmIJ90Vn^GOM6NrzNL z7VhF!4DI@4TD26HKcY>KXs4T^-RfSqe=++dy!hwm0ErCU$FhFy4>G@fLDYUQ;!GL% z@TPVm??gGQ(`a*)>tMS-CqIAkP@c>3tD~z1L6-T_fx@fkS99~Wrw!5wA6&m{MxBP>v!#*Q1a54cj? zbGzTV3B5m#s7c zx7T9SHAN?_V?5mshp)cpR~lKyIP?=iO-S?eY+&(NfFwCw} zNHbiyyn_7;zBiXbrRO@t{(b0;CPP*swo?wx2UXGcu9A!Q!!Y_x!jfx9_YU1SmMlIe zeKe_BTtiIoktTbN4|u9x{}%Y{$b)QfvBgrdLuvb7NpwBJ(v$C( zRSeCft7N#jYc%yBSCmgr(?2uaOhLr=P9og;k(|Z1t4H6{=6h_cTorqF@!1|dz@X`m z8hK2Wy?bF6Auix*wEvcjPfC0L@WQX6!Zdnr?sNgR7&5o_m40hf(=bmy8SFum?5Xdt zW6K~6=-1ep@3FKpr!BKJ7v?;ToRRMCo+Tsry*96UH4p)QK7JQ@?Y{(nAg|LZ84)xb z%{t&stbFQeJwNorTV$^JHZzUBp^sfhZ=Mfd%gpH!|MyYaY_{R%bf<~B%IvoXE%gl@ zRfQrwewE#Bt&i~u`|k=ks1W&NP}cmgdfT{dvV;{O=+N-)=P=p&qP1;(_Jldb^eO?L zp~wkyT#(e5K8d_qZaHdNv;XEn=-A}55)AC%q&wZLO0jodJNBhJvQ=cs7$*2!w|?m? zy>s4E?M{VQptt7xs$M>kti59WFRn{9nwt5vD)Z`&aR?nzuAqm~LQ2+M&ZSemOAfvV zoeLQI?}{zSE?=P*RqQj}(NDq|EwwbZI+dIc(@;Yz zTdALWlRqyL-jmgzFJ17jRiAy8OLI6}6JKEKoASuv@Jq0hQe1z^AO&@%8e(&_@sNMz zncOVa`>q2b=j-CGn^ksHS5A)J=O?$pQx3l`UKq33+jZ^pmxMW9^lPm4_#Ard(H_0X z+6lKF&6|G_4ez8{c|R<%%st&G%P4ofM@uj5b9n$0#e7W@_I|7oH~E0k)*P>aQNXoH z4*|9GKw>#@YBqskrt~%1`g6Q*^;pf_bi78XJ=fA6u!W2+C%M10u;%@?IO=g(K>^y8 z*T>ka!mvfu6}!v$Ydo#9)+r=Tdym((_twHc374HVe;5A`r}QxRv>$oX=_Oo!JzBJ!5gG< zD%twU%q^~K9jLgOTOgQ=u`*u*V@M2m)Me64}(Hx!_+pf$0=zceZ z(`nAyI;zV@laMzEi56xZl{QG!2Y zJ{Jo_46fkNon*#aKu&px^p@^<#M*nGst@59oFA|C15t3>&vv-1KOO1E5Fl9!vx@C+ z5T!w6O>X+V2=-MXIYyY`-t*;EItrdF=bDiuegR)gUp}MMY7=Vp z=Ins=R#ndghb=Y94=Y^$)SFebEW(I{iWhtuCw^kb5xe27%|9#m3Or%8oMXAY>Mj~b z^=`WAoO}}5Z$&Lf=~6o#t)SnG_X%emUG4AU@)!=1%4_YizF8Q(M9fm+zM3z06|SEEl1@3{rO)ac!9K{&F)ipBG+n9?nYNM3+m7H%`Slisb=W3yTPg%q4o}HPnIirNQ zsJDo>gR=wF486C5y`!6mw-}>|xs$1d2=w_Zn~#zHw~4!*7$brg#v{lNV-&}Oo+V}} zq9!HtcOhs?jM3WN-C2Z>&&$h;*9*bxSIfnR}R7xQZ*;n7KN+Ia#{5l+dd>d0DusI@vg$ZCE>;Kl9zZL#l4zPdbX8)A!?BQzvTeP_upM|}JgN38J8`MSE?=G5~iMZRi+gphL z9=DtJ?tgu-w>i@(VsGMTCC2E@V{Tz-;$iR3DE_aG)2q5#xItB!oDI=my|lLx|Leo= zYWV(D_5Y;?X!rk9#D9#jw}~?(fd97u$VlXttA&ZXldFc4ll}ksF#lOl^q(0PQYP*e z;&6VLFb_YR2L{vN7ZyPXh`GOY5^4~1~Z}%?L0VSpX z@3C=k_{Z=lDTzoqnR%S;cR49>4-Xr25os7S)1;+QJp6*vQanh06rAVg&0B&z2)HCn zNLpG-N|;~rx6J=6dnOdhk#lr&H*qwx_`j*`-^v2S)BivC!~1{G2L1mu@?VYe{|wjv z4A*}(0{@lq|IDuc8Lt0o1pX`I|CwF?-EjSdeQ1RaEf)W|#25cph+yFe#RXna0Pw#H zMTGt>6p^xVbGA3}fz~$~XRBCxcUKRKvu9~Sl{lH&65zpv1%&_2{CDJ{qh?{rr*3o4 zg8uAVK5cz^7(J36Zb;9k=H%oKZSd2p@yR(_I?)T9H3fZ^(ifFRp`;N4(lBW$enAu* z$$z$7`uEk6`2W8J)PRhQy}N}gpNu`Uz?8Bub27L1XUHTV#Q&cU0Yy!Fq@X;gf)>90 zV#m(CnR81`PCBQqPMI$XG~k604Ht)RDTR`hJkuZLstj z91ynJOIz6+5Jp(`b#+G234He{hna~l!jF87H#o2q=I#q#NJ_;gy2R~!*yLJA#N9a3 zgr`nWB!r>dl8D7s96f@gCGKLJ$Wca2ORMA25j@7)-jDUlNUXF&zShRLAKQ7MNpEDa zBe?3T)7LtjaouYZmx``m-=x-Y!uK{_pKd+aEe#g4cRp+-vICA+m?lZBkAC zs0{8;YS+g~Zxkv0T!oc?SJb&f&)akUno8H3oy7~HaOc^ZwO;c_(-$1Q-RFNDU$~oc zZSYdP_e|HEb6?6f#TWg30ngS=rWnHp0$Iw6j!ua;on2q=zO|X8{4jQW|-tUG{<+YKfP^+6z_dTH6(p8LJ=EpjC3T8gs5f-J~S}jpJ z7+hv#MNw5{{?5LLW|vX6Br z{nfw~KI!2M{K3(B3+LL^oT_8B2= zF0=!k*tNvT^J)g+*-Ll^ads{&B`ssM7bMyTxA2dyOf1}+3AzqFVZ80@KAB?&Pc2!^ z!bB~J*Mj4b<+foD0$cp|c>*HlmI9720^6{>iHR0WMDI&1(e2~Ywwvv9(RPyT zU(IS$_ofc)l-Ry>x2a4 zI87ig_RI{E#RJy}o+8EiJ7GUdOw&^GY~{?aq_VS0*%(kKLze$YY}3xl$r-{^*jXRp z-htlBl>3Ckq9yqfYZR`fTJjc4NTey0)XQHf+_!I?^mGO=@Pw9}oGVv%1MwO9Qcgjo> zUp4l(4_7L~73i2KC2NT4FT?)rZq@DMlNSz6tu{hw%CLUBSVQnPVo?LhmUGp2Hz9OP)7-9p$B zO+?#FN0j-C$fNUC^Vc#WE1U&>ds)qrsX-{03odWFMeQ~1`TSZu?Imi1`0NF}qic;# zd@eKrVOO1*99v$yARL>EYY7IIlxKqHmUw4k=jLf=q7S$9j|c;sj=eGe;bup;fgMc$ zbhCv0t0%GfTga2Rzq^I1!Ux#0yS)X5pSW*m2I zQv~v!aNLEb6tc9)nXf&8rl^ge!~1w5ps8P{*eq^L+;*u=EwGD{s{RwNrOA$cft&Zl97(H%t?}lGY5vggM8c$N&PnUQl9PEvFZ{l zG{|QI71FH>|HsxP)=FHX=a9VL-$5oX+zh^_dfcQ=?#Oj{v7-Y|S!8)6_lf^aY~2_B z2eHKBX*0`pzI~MXOUB%m?c9_^OJ_=E(qI?E7!Q~de-*DsSBX2s=p~N?3@Vji(0{I(xt2XrLx~4@Pas^Wk z?pTXbDy^395Zp{Pi()iOMYLMeAh?yjx>t<{l55%?|{P5@6O7;nNn>Hs2aL&l)YsPmi=(vn-w;fxE zI$}Zf+WioSY;Bi?uFZ9B3lTz&=SRf0Ae9l6Js+`;q1@apM3BGOxqo?}Qx_uJyn9B3 zn+xGUM_BE}$~7@wq({~)hyke37MOB6up1A?R!Mn*dF42F0hSm%jyW1Xo*2Iq;U}@8r3*2cSN8v z?{b6{*g4{D`}F;bt@VAutk#Nsin}e*-WY>RsQM`FGl!e@#&8be&gFej^SxAEWA+d?x9`r>TDJt|O#To&?0)4!9#BxN>e zYwZa%*KEwDQVMg^|4JYSv`wMSwOzHGf*jEHY>PVY$zO>E__efr(>S)YtqqgHkP2J2ay&>`esgog&k@_EXM1OY#rNB5;g1}Suw~euRKos7`tQUf?&>cd5@YTb>&#h7s(CFVrx-6a zLg-MrWMU8#A_u$7+Qznxt9U z_*y8I%eCK57qOsoE#!p@X+ThdGoy0Twh%Vk(&0WWZ?o<{{H~2kh(@BhU5IF9@mfY~ zv@-*5)2~{R%#H z*5Wiaf2^h0Td48IZ0!8DMZeieweD*%YZ2Jx+!6f8#uNPTj##hPeL(Z0zP3e&15h16__O@wHqEF`o_nQc0uI@eK z!BA)#?)bePBWc$_7ARa$uP5b$)>K!XxAFW^JUu&x|GjdfQ4o(X@j3r*Yq7UyCeVC2 z&%L1Vd!gl`AWqWI#^a(uI`ZNSG@XwJ|E~J?GLWR^6M`#vlpm!?aJDc!Rv;MZ_yILG zqt^P@QtxcFc(mUx;BpB4x1gXsoJh*&ptwEUCe}{YGwpzT#$yT+rWx%6*K}MDf^$8u>MS(HQ+jop!3?NSo67lw z24y`>iA4Tw-Xo0?{7chV2&cFO)4Xj@9}DA&g~fvH5`UlhYX6RyUlJ_*%}{6zg~)F1 z(1jC+e?`mQt`?RAc>GXYEXWU~fB!)dv;Yc)K+&Il_xB$Rx*mA;NALL0JeVN#PK-bD z;0WkV8GqynprDBSKk{G#2pAM;|CuKUMdE+tA>l~0Je2UCZNLPC5NLML)xkge2NQ&& z{?q{zM8eVfCn$j47eOI@^s+(-v^*hxel#6WQRsCd|EWy~Fa|BXa5i| z^l?VOh0t^$Pzba!MF;lSAq>zV4A3DA&;juXtzGC?hn@$}Aq>zV4A3DA&;iZyKihyo zghtOp*Ky`==yvEjVEpJh&W;=CWzluOAYP)|q3eM019U*gNwl&sfDRZy2lTcs^s)dQ zFn|siKnJw;L9Y*>13EsV*+It_^gMtLh;Qh203FbJ2;B~#17bJ29Y6=f3UoVw4g^34 zblgQNivZ|=Sc`54&;cqfL0b-JEG?SbO-=+ z2mo{l0CWfdbO-=+KHU zCJfL4;5igr1KI%S0Pq|R;Q3ih^5^)$0X&EEqt7cSk^+=P*8xRR06TOYaDH?hP;`Y} z7Qk~jfag%e1t<&90mT()V*ube9KdrZ3Imh{=m78>4&XTyt)bTk;5i%)&;j5%91hqY z0G`94m;`MM06d37=Qq&o06d2ROn@C=e*kz6ht9vCw*k-r#ggcD038Ux{s8bCx-kR2 zJ^;_*0G`7EJcmQ)RnYne;5i(?b2xzKZ~)KY0G`7EJck2#4hQfY4&XT)z;iehhoWf% z@cir?8M+-{e*kz62k`tXjz_Bxz;if&=Wqbe;Q*e)0X&BTcn*i|@Imhjz&`*yhXZ&H zooqp`4?53))&_v*aOj*Hx*fnj06d37=N{0@0(1a)4hQfY4&XT)z;if&=V#}P(E0+1 z&*1=`!vQ>p1LAWyfah=k&*1=`!vQ>p19%Px@Ei`{IUK-qI3PZU19*-A@Eif)IRe0Q z1c2uV0MF0PpZ>W&5CEPd0P#5jz;gtE=g`SR^fmxIhkh6YutWC`1c2uV0M8Kso+AJ} zKRZW@HUO(RBcLjsWl+0pK|T5T7Fe zJVyX{jsV2x2msF!fcP8%;5h=oa|D3r2msF!0G>l{XGb3^0M8Kso+AJ}M*w(^0Pq|E z;5h=oa|D3r2msHa6Yl6b06a$kc#Z(@90A}t0uY}g06a$kc#Z(@90A}t0>EJaLYiMZ496)@I0Pq|E;5h;ipCbS~ zM*w(^0Pq|E;5h=oa|D3r2msF!0G^*+|3Di90MDVTMd)?_o+AJ}M*w(^0Pq|E;5h=o za|D3r&;>3)|IpW0&=oE;yR&Ole~tkXz;h&k=STq0pO)^AA^|){0(g!D@Ei%? zIdoMHy6@=STq0kpP|}0X#iXfz;hHJK1Ts~jso!f?2b*eJ_F)& z6oBU_0MF0vuS2U3z;hIU=O_TrQ2?H!06a$lc#Z<_90lMx3czy|fafRx&rtxLpWVZU zHUx5oIGaGo!V4J;i4BSfA)h{P%UNXZ`-3dA8;Wy!`rNAO=T>*ubHVcqJioy63p~HT z^9ww`!1D_{zrgbgJioy63p~HT^ZUKX`s~5;3p~Hy|Lfdd4_@H;1)g8v`30U|;Q0lf zU*P!#o?qbk1)g8v`30U|;Q0lf-|uJF=Leo&;CX%~8M){W@ce$~^YQ=Y%}uoRI%B~qJfGnC1kWed=My}Sq@<JfGlsHYFh! zb%5s+JfGnC1kWdUKEd+|o=@<6g69)FpWyif&nMRB6Fi^b`2x?g(W^e^1)eYPd|`dQ z!1IOm`2x=uc)q~%dmqr_7+>J|0?!wCzQFSZo-goxf#>&LvOd4QUM=u^f#(Z6U*P!y z&$DT-&jmbR;Q755s_FpG7kIwF^C*D&c;NX0&lh;U!1D#3FYtVU=L9^97zS@O**i3p`)o`2x=uc)q~%1)eYPe1Yc+JYV4X0?)Hqy82^-=NmlV;Q0p6 zH+a6m^Lu~y<2c>m`3BE7c)r2&4W4iCe1qp3Jm28?2G2KmzOg>v;Q0p6H+a6m^9`PF z@O*>k8$93O`3BE7c%BXS-M`@Z2G4K5K#dLXe1qq=BcOBpy1T*i4W4iCe1qp3Jm27X zqyv3^;Q0p6H+a6m^9`PF@O*>k8$93O`3BE7c)r2&jrI8k&o_9!!SfBCZ}5DB=NmlV z;Q0p6H`eDHJm27X1QIk8$3VY`2o)lcz(e11D+r7{D9{X zQdDgRJU`(10nZP3e!%kxR{H;f=LbAL;Q0a14|smS^V@InIQ|ZJe!%kso*(f1faeE1 zKj8TR&kuNh!1Dv1AMpHkz0_v_o*(f1fakYMrH|M1{D9{NJU`(10nZP3e!%kso*(f1 zfaeE1Kj8TR&kuMWiBNx5;Q0a14|smS^8=nA@ce-1w;!qc5$^!>bje@ zsv~VRKhjp?Ds9ytX{+mQ+E7RHe7tL(rww&9&l86_n&*i_9nJH^p^oNxv_0sL=6T{! zNAo;!sH1tFIMmTRPaNuKo{x9U^R!`HHO~`=an(Fe9L81iJaHITeZM_%7+1~nD3ee} z^E`2=qj{b<)X_Xo9O`JECk}Nq&!c%l9nJH^p^oNx;!sEPJaMR_d7e1b(L5jTn&)Z5 z{Aivh4)dee=ZVAoXr3ny^P_nlbr$A%^E`2=qj{b<)X_Xo9O~%xdE!t<^E^&P)X_Xo z9O`JEe|9uJa4>BcSIzUpVO%xO6NhotJdd^v^P_p5ILwdcdE!t<^E`2=qj{dVt|M?P z#s@xV>;A|E5OF=OGBFX?<0^v)aXqe5NXLQa13Vw#`2f!ccs{`M0iF-=e1PWzJda?s zYXi>*cs{`M0iF-=JaSF^U-SofKEU$}zt*y@hJRjgW{BXH!;JOQ*5AZzJ>OL3ne1PWzJilGGRU3Fd zz;pQFa`^*4z;pQFl8bo`o)7RGez+VD`~c74hsz}!b%5va!zCAWfambTB^Uhxp2H9K z*;)PAm+-?SuIC5*aEa?W;D<|G&ky+F64(6^`*w{#_~8cpaA`vw;Q14H4nJIuhdRLX zC-5A8xS+e|`4e~!KU{Kqp2H89xaJ`IaEa@21wUNATA)9`^C$59{tbW{f8hBOc>aX- z`4f2l1fD;E=kUYjD-!wxJcl1Hx#$n@9DcauVqAge@WbUR9Qp%1haWDv=nwE5ez@df zT!H6L;Q14H4nN#y&wGs*@Em@)obNJzsi~a!5pTP4ctk23Ot7&F3S|mbMPE~xa4A7f#>kU zB^TogJcl1H%c!2`@WUmpzn8!dm$_~DX^{=ofq_~DX^I>2-I;j$FRxB}1N zhf6N{13ZTxF1Z+2;5qzoxmAGv0MFrvOD^UIcn&{Yaxt#JbNJzMD+BWbJcl1HxfoaA zIs9HyE-hf6N{13ZTxF1e@!Jbwny;fKpD zY19Fp!w>iQH_d9Cg6GdzpTiHAHyE-hf6N%0MFrv%cBX*bF9zdhf6NzIo9X!!zH(`yYRy$4&w?u zhaWDFSkNEfIs9!w;8Sj4SXQez-hV>+kLG!zHfA75s3C>pI|vOI&|XgdZ+(-5>D7 zHyE-ha2$2r44m}=kUWN7j=N=7uM(S!{vAwS6H9J50}UGr~~VB_~DX^IkU<@J*8kNmQ4tzqGZOK$ha1kd4z zOK#TzKV0H^e!vfxS7J~Hcn&{Ya#07?=kUWN7j=N=@WUmy#})i=d9?@qf%Q53aLMg? z4nJJtPzQJpKU{KA2Y3!YTwYP?`2jy%;!p>84nJIS`+5aGT;fm%cn&|@fFCYx7+1L8 z4nJIS(I2?q4nJISQ3vj~!w;8N!O$PzIs9F0Zel z4)7d)xa6V^@Em@)vQ;8ZrE^(*>Jcl1HuaNdUhaWC+J+9z~OI-H{{BViudA`7N_~G&s z$mkF79Dcauq7K||U*I|Xa5*080MFrv%WJu)13X_?pTiHA<6&H3eGWfda#06(4nJI8 zB}N@spD*wnez+VDb%5va!zCAWfambT4fx^GhWP=W!w;8S)B&Eu50_lj0iMGTmshsY zAK*FsaLL8|0MFrvOD?Wg;Q0d2;fKp>=BNWahaWDvr~^EQA1=A513ZTxF0a8i2N!q_ zKU{Lb5AYm*xa8ux3!cLdmt61zJcl1{zz>%;)B&Eu50_lj0iMGTm)x!cezA1-mI13ZTxE?X)3dIdjR;(C6-50|*^5BT8{*Vilf;j*ox z=Lh_7i9;Q@-wr=qa?u~)Is9&tJU_VK4nJIS(I2?qe!z41;j)bs z;|e^7A1=A513Z@>u02nwHrKzj=Ly-*uU~!sBh}{oUbIE_aPQ4{Y`dR%y===HJ9)3_ zvF+~57j4<}c|VVOXv>zzr)s-@XX+s?ua!Q=1%chv8e1CZn zm$v)4#>=+6&7bFuk8SrezQ?xP8TZhZ$9@l=X$7hk;p{cm5t`n549`TEuW=k(tG;p2yYy!rL4-u~(P c4?q6#)8F3Yr`q$$J)C*oAnBx?DAN-_N|WX1?$J2Qw$uip(9E zxz5U+JL1ZWh*XpT0YwFX10Vqa01-e0%a3CM2mqLd0{~C}kia@34)!i)_AUmhoj)BFaT*2J5HEKHUcr=Q4=X9mzs z!sp}8l%U)>iHWW#Di!#g#@@I-c8)sY3~7)0P(w3au>hAkp+DPxR8HBGQ3q-RD6?o+ zKFeg61lJKfOdKe>SJA{37XIiv{$^DcWAdTOMXwPO$)Jfwl9a&~!r zBd-MCAMe)}2te`wumJZq3!#6$Ta>;P9Ol~s7&w{PIy2D!!~dV=|6i=`|J&4SlIEmA zm=MEngKiN|c5>~&kgVD;ki1Z|2n;37zxM2zNsC(?98g6utj`Bb$opRIW)iM%7!^)( zRq#z38tSYWadjU|&gi;Z;;VTWwWvI@j16HVW*cnk>f7dc6Vu9gW0K?wLu`!{Q@w|; z!1JgylsHqe>B}-(a8=@kZ&JNR<@9!qUuOc$2Uxuwh^{~2GX7HfK=HV3pFIuo@;PSqumMrGG&@N@9DbHCCNIa=dVo8-yTlO!BXHUg`=Am1t-<5>Wlk?x+XL3@s ze9ocKp$M=OMh@(MBoAdsNX$){BU9T;bi9g4U`Ou@sWiSdv`x-${APYmW;WhxVGah5FSoWg|0fy7sbXH2x zWe1x`EP9yTBWG&Q*p!?Id!qD$c>(O^{G zI)YrZNA(mv&F zZeSn3S(akuwz$0Q%IL!ZCrYUgc^NC4(mxn0c4%yDYdZ$=7bxyl=s6^j#Ak-KpC)xX zG?hnRch;G~ibFSAU^=!1_4F3PV|kfL%%mD_V63Ox=r%5^FGo_5_T>?#>-|BHcITRS z5q3w%pZ)tK5kF6{0y9qLaeJ!w+d+qV1XOn=WV12%Li4B~tO_=#-OcBeh^2|VzL6R-Z7X#LrGP?&&^T3tZyA|MN^RlHtjK@s9vL+bi zlX!7L;$)05QIqUq;-W!qg9}x9ARmye|`QeLq`sK?7`UM99VXh4<%l zA#JfnROysg;OMICanIIgAyc{vyvY|K9&wv;*#wy{yXO12vG8X9@BQlkAIDJYC4`i} zTYNzQ0K)&NrOviire@9z|ADlBADI79)_=jRdwE-Wh!M5_kN1E(WrxF}I)~WmwRmXY z&bUuNLQb2}48%mGp08Ik*TNteKN;yAuW}p9CK12k{Z=A`iaX6 zrXDgWL3>h%+Q1}O#BpOkPbTlcG!tG_>|mR?<+iI!F_I9G=g9kuJG8P@(V4Vy=(n6HW zgs49|^c5|v2jN+wnwjo)F<+>#3e}MFo(*|$jr?lj*|+C84>D}Mq_t3FdezK7t~V{- zddp{NNC35s@TuyzUBRlFGresBI#^@~IG_&CN_R)FM-3cr0lBJf-{ zMauU=)lzJMz7|@T}f{{8(;7FpS?b12dJdno3%NOC(b&(2K>&iE@XXX4mznF z${TLy7C2-MTX*L&4)-nt&Gbv<_B`c{v^M4KwME%5W8T{_40g7t_kW$J`kL*!dOad6 zc+wOs^f#!}Xcm?xw|Q%?Hqc%%`MKFjn_bLa*vk7q)hAE+w#+qdN^iZ7Tkmx6zn=RH zyrz<)z@Y1MjX*-toA@=RwOT8lva_q5tvGlYp>TEr>t{C*PUCj6;0{v4z_-A&S z>z|eNyE7fSvv<8%4tL&HrAN2Riy1g`pOxFFnL1*Q332Jpbgjnd*1^tma?88=&exNQ zMx&>7jSl~0+K`mhOlHkm#6m5q#w=ZmHpw!wqcsAAVYO7Imzir=%<2#Pcj@h9u8jFl z8>O1CPS(R$OV?=lyVhw#q8p$ECZ%w;;78_U(nyHZ*asoPp7QK@li+Gna*T~y&p6*h z+dF!XJdp9Y@^$pJv~*X71}e93H9BP};$5{)nk_$I!7j&jr@cnq7TURRHx4;D!4Rh` z-f5ys2i4r>xIbjtCp>3r=Orp1l(;5jdK-Sdt4As;?>vDT=0^3M7`WPg7ESvvKxodE z(byZ`qF%*1Kg>)joR?YRX*5jw!`JP(JP-mYHGZ|}cud{ZI0K73!^(4@53f;H3{X3u z-B;spH%2UC-{KA{A{(7~+T?vK5jbJS&Y-T3Kn{`_4N^XckM-cKnNWaPb9wTKpyJIX_3cr z_L#Xf6nW0;Q|EkRF^i-%f-BTAlPd31PhZv&(b3Fgqe7>zpR6tmn$B;|m4Z8TXmEh; zyL;1bBt%!GrA@^RyBhBAlz!$=UJnr+D;UOjWpUonOo{B~t**s{X@*vQ*Z?cJY&JE* znle=qs)o6k+psLjok-7FGo}h^0WsX9Kj~2}ZP}kYmf0$Vi~xuYgrq3O{dg!bOePtX zC`b8-rR38cz@!u3TZ7VqrC5fKM*4|Sm~$(o<5FW!oih1?lhcyrX{<3VUj!TF6vH-o zhREbf8%+`R?vN`r{kJ=QUp+=I*CYQKjafb9)x5?XTDD( zW?upEMY)5ruk(j zdno!mMIH}cLk>|fmO^>#;I47`v1R(m$jg~=qJc|Kv(&J`f<_D4HKG-PA+r%97T2AF zGcMo`LKPw-$M(A(0RCU~_&8d+af6ImVP|byZ~0#M4?!zuZyt*T}1_sGkI7yW{Eime|_Yo z@CP!d$h`Z~Rq+_kT!LG{&}OJz?kzCTO?kVzI|&)aCohsJX`4QF+7g^jm_uzd?e!sR z)W$YZw*>N|d6TbtZxD^PCd=P}AWP>9(WqM-aU0KPPJboYoNDtVa`vQPxV~I z;YrZm$5z=3%}>-6nV3cn-?zw8bRf&nH7X8m;2fUwDY0l`5G?QEcl3g_u}j~qA6wAy z)Y#MCJiclHA4!IbIe!7fbcFJ{DeM#E>XjxYqu$TL&L=jxXx)eH=cg{w)fU3UyO>ye z&a@0wfgL~gVzcTdvYSH!C_ysNltiKec6LfiVX3vm1r5I@6VI8N9S1DlhCHAB8(W6d zpBYkMXbrbNWMnyQVQFy_9BBu@uPcP2D~!H+F5e8c(?7kg(~*;zbk(4%6rS?@8~hJ% z1X}{l8l! zYx<(7OrC~hVvm&{=A)$~CJoQHv-AjZ4h%9WcWrUuNN(rJx#~fEN&a>vb}4QJ?w=$5 zNOl*wF)QZ5EVMsfzh)+Hf>RI45y{FG8v zu^W^no#Nh4L7K{ArXn)X>@3{uH@&KfxQ!(8g5$fMRLKbyy8cC zf!pQrsX?#L9e!&5Bf{7F!G3X|WJI13+W=SAY$L6)4w2F@yJby5qnngfcyXih2df-4 z9^0JasV#A3jZyByI*7G{hAEe9EBaU^PGYNmNcmHKgV9-79n2sH;)0`PNmRwIVg(@9 z(qM_LHkl1#`jHRu-jLkO&v!y-f0i7Mfi{E@)YJu}OCI(}G)GPeiM|g{Re&qGETYiN zW3LPrfwdbJK}_RZ0F0TJa0ukTmOZ19Btu}Pyo7zA3V24KyKqL}+iMYfxFsHX(6I^0PY98H19fqm3EZS_V8xGK)kv#Y{{@6|ZX%~jbS94(&z$P86 z1ml7y${>t;u*pLTPM)j;r}&N@H#Sencwq% zkAhz1bSR|g+w^qpaGC$bU9p2;-5z|wA@uk`-}7}n&lxuyw~!Ie@HCeztHl;E-k^wD zGb#7!tGC9=W~u<0i48=HKZCloidkEYCpOU3-O|*uIPta!$LBM)N$>9t>47i7-i|sY z9QQZ&xyYpw(v~T4)VLA>yv@?cZV#Rlfw`kzj@+@|{!$KKb__im_at6PFMjWY(r^7( zWcQd;qxg{?q0FeCWXFs{rV2sP8cD};4gvfrpMdfuOdUsa?TT>5MGv}^75lW)>+XN&YI&=uURx`HUB zj-vOp*(z3pE-Zu|hYGxScq7(su6N&eg&r{;@Z>KZ`Y0F#UaY@1`j+B}#eWO_2s}|6 zsos6dI?e%I5op-$O0jznv|G681MljBb!pXRjU02qXTbk-{JKUkct5dwm3!2A3ckSm zdrThxsJn`ml4W{upgTh!ker!-27 zy<)jeEuF{J-=ta1j%r|~!>VeKwcNavB{YcBLz6MG z!OQWIcp2hb7-p0)UKXUgUBXtVCUru5q$VUo9imCxHrM6iaBsCl93nnpG5IuAd_}R* z;rYx%SXEQSL?N-TVf*jmEGP!N11u@3~7=(Hv)B-D9U=%DhbH%u# z7@VppVrK#IP}@J9xtrK1Q#c){p?N8SkF^QYm`R!o@X>*+EQf8Oca#(66El%(NQy-E zUVf4VQYV&q2wu{3r2MLpPTNd>rL27_<+oOTN6vrD6MS_KrZ8`)c_)9xq-6qWGzSY2 z1KgXEYH=)|h{rZrHSwz_M6=d`A_sddLy@w;mP-J0CDcfGSQ~zUCb_Ue16xh59)cpb z_5y%`fVp^qCtL;}B?%#v-$d&s39->vs!jQ(l~(1n9EcGW8wE$ONziEI2e{M6&oDT7 zBC^C(wNTi&V9}TY6K9DQ&L@T;aVV5CgJb0*HEo@I>}y#X6`9dwVkt>{5#&p4kTw)M z(hJOb(fOuF#(vF6Ka`?X>Q;Sw=$Fqi>L;gkL7P>K$UNICt2(L0TV(#420!PA?RPPS z>U#``vSHhW?>h#mK@Z{NroN8+P%*=tM${yaBEN?NMTI~W07rEoMWs;cPL$?lM3KO> zDXXxAq=cvRv%D_yjv`mga#|G4jNNyc8WLR{%Bs^OD|2J?-C1r#QYTi{)FW=Rk-(Av z!HWc{sR4=p%=C$OC+^9_=RS^eJmRuM3tbnIQ(mVid7={;Wl}Iiaj=C$8jT(|sRF)HC(b__= zS00YoE1PElF=45)lV?aciw2;JHBbhLH9ABe{K4ASY({4sjAfOD&HMn#cz{4@V2NF5 z!@n+^6osyi%K}O�Ugq`J{x#amKtph0o(p{7h zic%!ZX;9l7GnUl4fvz!<%cuiNeSWEK`%xpli9Kd39Vi@WV!k0sg_z*a1r^`0zls7Ph)P>BwSjI4_TrVEIS zI%_VX$R&}hGYtROLP7;6F#dm7#7Tf9{5MNrG$ig94Tuz!#AHN*gbWNogoFb5ccy@l z5Fm(v?`O!s1jao;KTrUOtf{1CJri7PuM%-t;li!Sd@1Ienj+*`P zcNnd*1T${SG%F*MI^Ay7yK*seOEbTVq+_`mM$DV__f!_P;rAuZ!xlG&%qVSNH_7vI z$2aT!s)ew0zbZrH2HGFu)zJ5whCQyYR36s-Dj<2-k^*3-;G7)dTOB~^K8#v{6VOGy z1IBPi2>*~ToU`OgRt@A3%(JW(lB${s*^#z3lIqAjY6Ecq8c+?Sp%{P$SQDu~3ZM?k zDc;Wn9EbERx`X(p@H-{J&wr?AIgUF7QGt&cGw<+EZggI1duiB8L;Wfdq!snTpa)4v z!fYh_LZGlvXHl3?Zd2g}gfYlHPi%i5C4NF(Bch&`&Vt%!6plSnPZF(r@;;ozzH+GC ziIY$UZ96oQP3cpqy~fb*fjsa9t0xxeL-Y=N@yNdYMNQk1VPF;Kq&a`@d#-qh^IDkk zG`f5t($1qa25Le~un`T^*B$v%O{d*=N6=zBX{A#cpOm>1;`>a|;a|_Jea-sQ44VRS zJJnfPn?L!y(uYF(Vog!m@vS~VO^njaWn}r*9g_{_F%2t1B+~EBW>;CPVHsUXSmnd> zE3lE0(Vj^8p#ziO&-%NW`)VFZP#6WpX$T34pCG|Zlkf)u%-G<ILbs-KAuq@3^i0H%jO3(M1n9=!s2pt5^qO#qK9)fN+*M?nF z54uMcPUUiDLWP!opj6$mVxSP&q1Z z=^hoB0+omKfC_Y;azkoREvoH5qJI||LV+ql;2(0?0Q%3Wqx1Z;<|wkCc(0eZa!q8; z`=z=OKI$TlL>H1uk|ZJ0D@rPQ-HZds((HjSe>C+X#!g*H_FQqqG9Ip7L7o;djou_H zGDyIaZ`5Relmeak`{y9Y z6SYYA#~V0}u+X#e{$#H z)6U*$Xqv_R9jtM8;QK!`)?C(x?kNP4mOd+#c05$` zYkJ!0jYbV%fBadj$6g${#}*d}Qq?0;d_|M)!#6%(!XofeYC>lSZEf0DP`3+LwrB_e-Y8?I_Btcu}-0bNj) z6~!|aOi&U{5U-B`4`Q7Ze+nkdvcjfDD%{V66oy=EECLFO0Sg*d`$U0Uq=aUq41z>j zIwF!Mi7+N6QFjCq5{DPU7CS9&2DA%4#Ntbmi>jd_?}vx2FrJf+BFvOnOfm)q7*=`F zh>Rj|!pW{%6*MquP-mZ(0(wq&!B^sgFDN2+ z5QVNi8{X0lpGF@(>+Abow;8;$1WDj(*{t$od@~!VJv%w0ZtttpR2sK}rKmf>wjMoy zB}zY-hiy9TdhNDo-0lX-!EnqpXUb-v3)V>9FV2(NvTI=jA&mmY0Y3H3FeVmnN-@|rrw-g$2wh(q zy0#+{rxNVi= zB}y0bV^w`$g3HQe>)Xej!X+uWB>EV)^k+tnqGT3HeL;%E)#A>O?6~&M$3A$`de`E| z|L#43w>654T@JJ}2@WFWeQD)-Zg9r(y;5+aotrI<7xJ4T`S7|6?P~5(Js7K7R?7Dc z*D;1DkEKVDKQslNKXhVrsMD~$MK&8v(SZi*cy+nmXw~S-S2l0?nuQR0l>d0mr<%f! zkZq`GWQL4Ug+)~>u}pxrQNzf}M~l&vnh3N7v5y2&z_igeaXzSn9}Nbj&+}0kEL2vN zPyy}hJu3M7kfKfoURw40ZP?WqKc_)tW1B70g{*_5a3vSk!2=dy@`t{w`424nN#?X6 zZjP$i1A;8r?kFkMAQDG(?|X#tAJbI{xC#3{KS+l*_E-bm8HNr?D~c2ME5(9gA+Y$B znR(&QjmnbV48p?db%d_Xr*kfDdo{kye=Oi>|HWy^op(C-HnJRG4Du0l$^WrXi`#K_ zBZ}uMd6mcP{>WJ*VtT`_U5AkMGukwFyeVQ}D5{PH!;afggF`bXk5FNIcr59scgRF% zRMB~rn{bk=#P^Pu;xg{e*h_vD?|9@Px276J1mp8SB!=)5jfU&#mLpDg9br)OZZ!IS z0welQbP1vjhUv}E`X`O4BpYm>n!`MtJ*0mMf`1Eo_}>xmCh-*QnY)x%XIh%WA?>MT zLqLJzz=5yw*L=pf+7P(FxY&r{cL5?42;rk`!~Oe~E_w+-z|5F1zBe98@y$q-qIh76 z7PN}xZ@;!kMDXwwAr#96mW7!2W}$|18*77{Y_)N~u>rNU0-u$@7XfG22(GY#c!Ih7 zt?Ugf0jqA^p;>VTHY9=|1+9-`0&IeKh&>;LaZnW?6-(u-;BMgaoA6pTHYlL(oX2dB zKPfeImCF~7+w&{Di>M=3ruL7WSc75`st$s<*F~9Wj)sW68;i0P^--WNV6z|?W@L^B zb#;XdJ5Ot#lyuWjzusvrj!Re;!NjtZEJ&{pVq6B%DUVlQfFH3YG7_hgfA;el5GGlW zhMavpAIw>-zfoA{cH2Ufj=;EtdWaK?&X3nIv5x>JzK&<`*nM6G>&Aka{(p7l{ISw)o zg4K~A2ZhxUAd-;iJ0Vm;^J8$vP;6c!o@f zNQjuhDi04mBE|{C0{EA}hhiG#*5#{?kaI`0H;OQEdYx4_)qq-PBte3*!jYJ7d(sm_&))^+dUr z3tAA=v3TaCjRV|2I6O)tQCD5MI)uSYQnXe$(7TI{O;dIxyYs8M5gT_qY4o9xd#O5;BOa@l$iVML%&RH!D-R9Aflwv5Y?NLf50#pOAodA+2XzUt_tT zr{FbXVi}HHC3v%^=ak@T_h+`!2^|`6N{(RHtHW`tBFHCy7jFe<@^g*mW??dwWGqYZ zXe@=%3kN@E@r};A;-^}s7t<XFD3uax4>CY2=g7I z1-n^bTmSVFR(Q~EUUj0JR<&m(`OS5=nipI#dnl`{QHAsJ!|ZH$gxvch5;ERzWSS=W z4>CFB!#@I0WgmaX#3(WSZ|?nBX7N!i^MBz9wUrB{fduBi;7I|}Z%XCyzu<`?38dVv zeRviXM=y^`n8b{g^Ri$YJD~cVV(HU00S#f_HipC)(oF!`dd~BFB74A|t%48U?F;$J zj}1}HP^&oH#7~jj+BDWq5qz`>3qLuM9!I+6(Kj#YFs7+(Jo{($5EFNx7sZ#NZw2Ub zaz_=}%U%mTD|4@oz5X9t(p1qFo#p+4B{*`&0ZVPldPAMyGcu#o>Q#^K-FI_obnS*}LI2ZnkKqMh zCGaYz=*v;TFUPcVfidB{H09ur_RnU2Gd z4c(Cf?^*6+w2#eS=6a|4fID+NzKC(15|7v|_#R?qsoG1I!{gjORure}4e6=WizGiTKafTXu_Dv{T zjWVl<$QBv|EDcT$R~>nVIwFvjg_Ya7$#XGVP4PM#u@EQDzd7n33p_*pk0*+w8uJ^G zf7KYLU@W;eLt}}RMIv3ZB95JoHg-7b*8O6KbqBT40MP`{0?+|d>l&o_sFjIMD&sOV zG7@=Lz>Gi{09kSzauXdgcjS?f#?FR@ZqR?i4&UN9K zhS*L-1xYDuC@DfbL3%Lb^U``EvNTd z(QaRZ#0$KsBlq!`Yr6GmNr-H$*dXa(@oX#V!YQPcR90crRxTV0G&^zD)O~`n6ylzDE$%#Vkse5+m+A z9iciA1$B zV$*neCxIOW&VE?Gj#tS_YTm1^$cm}5+F8m(1)`=>S?TI1QyE(g%KMcVp)ZOjUg(@= zz{{hxyH3%CtVv+`3z-g~T5^8=!GKhtzlJsd(x1H!M2aPdS4WmGvuW&?mY{H zd$YLu53Y|n)$$Zcs`k3lx;yh*bTtcT%CHJrjbi+nDDgdC=f05(T1k+>xBfD?_lSpO z;(3od^lyh~E3a_jL%M`905tOuo2MhYW_<8`@~^_i51A_qU4zqDd>(fRJ^Cs7dS5k2 zuVv%`X|fS(vO)j)<8Xq27abnx+K(fV%hCJ%PJRm`mojje3-Q=cQW^byVcZU^kSEIf=uUEkl|5 ze0AV;n{gNLSkT4w>o!G=78XqYONc9D^)~X^WRRub)7a}aPH#I3hvgJ>9#A2|>yx7f zS`O=n_vh~!8o>yZLSGGmSX$ZwFA(Wm+LbQd0`B&%Lry)b?b1hrz)83eN006_BV$G1 zX>8Cn`@nZ_F)z@|I(cJMh_l81-SC0kr{2%@9*e`Vb)jf*y3Szq>ogz5;y`9`+1M}- zM8#r>YWhdfvETq~xOZPm>j$0!aS-NMK8=>PsomU{e3mcP`*@bnSgxraZr>;aMK$ae zBf;NBKVV$X=r# z^s=Kdo1|11mOv(`l_ED(?FU&Cs1GYD!;7dDvu${-o$)9KFz#1!Uvx*duTrAwtxc)4 zl`h(HpFi^;^5tW8oT6^x({qeJ$CBK`jM|^HiHt!S^^xqO#(VN<4CoYr55%OK;2e@% z{Hiq*&(O7dRX3XK45&L(ofev57GKrQbw_roP-T$o!|o9Bj_nt@Y^7zc+#KCyx^~Y7 zY46-i4b=+;yL?{mawycN9iN>%*$te0Co2_hQ*x8kDi~qa6{en9K`yV3Aqkst{cs7L zj|atJaLPrAG#}0nF-;Bb>80v#Ro@RzVp#nMV{Gx5Ab0Pg?05dk(Az|*Z8-W&JuWFt z4$+W>WMi|#)u$v>W`|9_+BObGXGDo4Z8Si^?9@$=)3Ev!;jklN;SAz90%Nns{x!G-j9Db%ouOtk{7(GHkTOu6Nb>l4J2OM#*1yGm(h+D-^#t( z5au8TOjQJdRUqy`%Z-q$VQK!8Rf4EZl|frcb7x#a03ut!0^JX434vW3cm}!vb%bCH zYzW)}QsitYCMD`XizMPzvx6bGJ+IpEA`KY;@_Z zqNa}h4hx5miAzwHNQp0nhoWR#8p=d-Tmq2>!H189b?N_QqW*E#4-_GXK$?VE@I~kS zoFL&;{}||^k^r*r|N2H6#1CLvBt^V$2nwtKH;nv`1^1{9R{>Rw#ywjh!3#i=kfD=+8U#8}!u@6kI$^9=F5*z5X{?l6UlQwCB5^~j^X63{yE{uPE zHj~-64BtM-D65K+W@(*sM3GV0>!-8kDjKQ~>IGMv3!g-Yc-!?;D2z&aG?>4QwfSV@ zww#|5)z(XqUS_8UQ?b4Nn;3qL_5?N0Vt#sj-Rls|JUI-)8AmL)kRwh?pE`KN)Bp^ zza-q1Ez4>#9cQd+rUxh1nN|yt3BsbXgb}lesJU@n>L(Oo(p+EaI~e!ODvEFpm?(Gd z(l0O;=4Ewzl`JY6DV4KMt^SZLB@Z;56+6(@9QBk!sVMG`Rv-Sp(p=XT$bKU!jj|MesBdL2qS@>00tw=HY6eZi!Jp`zh{| zX%Nb6rZoVgX`r8@r%SF{MvE=e46Y^ET)jCtxk#;!8ibTgWk#*sCKI}zrF^6Ak|L9C zSFcOm$)Aklq-KgFK?JykS)O zYPGfLc=2gOrzb1&zISavaXLQrW8+Bs{Hav)8(%W863F+AW=-zi=sCc@;OL5HPcI2ouv%UA}Zgf22sTv#XzeIzO!#Dcgh_n3Fv723mIFe|Yk z_Lp9AB@N#&Q0m82m!)mS@#xQxa>*5?>O%zO-+WOy_b5(;_N~?2G8@a=`Caqg&0F4v z@R_PP-5|pRa~>5Y#;a+5RlVtObvYGp57Fw4vF<_--sNvdh10bbw7t9F9Q9lgy>~N| ze{4|iJ7bQi+E=mrF7VjJQI^Qcu#o*FN8)7lL=Y$*d)35K>?31J_b87vvlmMyUnF4zz;k^Qa;;^y<&>k(PA5a_ zvXiCHZuqsf_O=-i%!TaTe0bYTC1oY&EjA`2qT? z;+zwWSf8d-sdOfF?UozSG-Jg)#Has%n;JeFMC>|60|0El1OKgvK zn;IUl=-0q&rt{N77f`2E_Ap&Wom^sjI~}fC1w$+piHB?om~UfdQz1VmmA0*Ky;+Yu zbkV<##Pi<#YrDuJUvBr6E|`+z-}n@WLDeg&d~jL8gHEXr_|6NT?Kt)Q{3M`1aUS%X za%=|k_5;sFwg8=yM_bhL_VqjQ&(-0poP(^h*tBBNXsCV3l$*+Z3D@|1^M89gu;NHZ zAKz~C-ptA0_}zVQ-oeGuYIH33q*`dXanvO{Fp-V_a#k}}igyp}Y0b|;5lNgkm96DX z{&V;I|`$D|T>_2o5#!ih!Ob`C!Oky{Hh)k<__w~7%}8qy8 zS5+$}NQ!1uz`$GcUZFk$^I{S>oTplRnVDypxv%fS@6l9%kYQZv%SU2!+!51x*$R7uk9Ngfs+4WOX(qWOw=7?jXmUB<@j`^%HLDUt&9e}0X{$A*T!HUK??0vyEUl7+_cYykBqc5oAw-9cT&l4Tb zN7{LY1*cyF*7OLBvCUOTG?L@>n3&cD3;8<_f<{*u+Pt8|zU36H(OIuuO1pRB*gWjEW#q5r?J36G=$0mYis& za*LSvvU#xYCwo4I9BcP$HwaKvphHvM9Bz61wc8yc^wx8|m(k=;GvCn~2#ILS!-Tv} zB5`UJO(f#1N>n70^_%u$2_iG(MpBWI)I6!=S<4L3sc|Uv7QhL+g4b$&rLUB=|l5hihhv;cjx`oN7 z$dP)TnB}RsNWuGF6AeDH4LtYL33}dpyPuYhd(VWBnbNT25}Jc|B(2&E~~H86(3l(h7iQ%3yB7-q61x#S$1C0)Xuk3L5RM^AxMVQV&wq+Zb63|&h3nq<_>g`Wbl>-+`x9+WeG-v>YQzJtyR_On9VPB!}L$N5(U zyQ$J}7P8fx+#Idd`erjc1RL)14@GC=0+`)$zS|G8uC^YqO_rS50=}__AA2MnaNC9B z?Z||o{qmtZx!zGxsR28r!k*I|N~a17uz1cvBG9Yj3jU~TXP0M!iYW5Jbb3sBiGthb z1A*Rc83*lUJJOy$`r>%1!9Y;hLerAS3y_afTS4twEzIUodmG9Fe++oYhcDqU}XZsdqv z1gd-=)_N}IY}|W`2xvIoCq?@1dJH}m>e-9$q;_(0%;w5iuH8k(kzEJKWq~5YsBL_} zK(=$gh-kC(IxZ07Bx6%|^^f0#+baJ~*q@A*iTN>x%Md>Nhl*2<)h1SWFtSjMCt$S78@GvuDLXuXkr1A zXqa9OOE7K&H%jyIOSQ}z8JRJ^x4+5dh{oAKC0U%GMf@`B3qj$koXfP~l2Jq<4?D7x zqlJrAuuE`tDN?#bZhw!IR43JCxI&bkC*^BEt%!;PxhD2gY)BaZdj3__IbE!^GA)!P!u7d-sLuk`LSlYhqueAo=&r0LRFh%xN>)%jhCn`ut&5RcqElhEFmA}kX<}7FDfbrf|Lw$lbuQ1@PoD7g0Yk zxf=lFzfNix=;&!dWRn=RboE!&)kl-a+7hq+P25U@q;=fXG{$)YG>HBuJwqNx#@58F zgf>Z$8Rnj&E#3%;@G zItP!zAdxgOS~S}&X-_Z-!xseK5)P~=`4xvM0vYbKA0nfnF=i#;Sn_zv@y=S=mu+?` zbPY9bviK&E7(|QgbTneE+qU*2^38^8FIa2M(>d*llB;J_GlgJ1kNNpwMODy7d9cCG z1Y*Avg!3nwMqSbNKSIZlBbh@?P3@lpL^GjeQmoL0^h(s&_4?jDo_0<@OA+4hf_vWL z{oel4^u1F)SKam3@7&QSd`$9T!x?O6#onO@jaxs3{8)8rLVv*<*2qUxr(fKHLIk^$ z;Zzw%iZaOG_VhYF{+P2_qwyyA#pCy@{@41)P;0J*!Rzss-?6lwtlKj}s|T22`EF+^ zFP9Q~sO*G%BX&-6_vDmAd+%1yO9_7sPuvN`ZAWj<{X2Z@W}j-Eoxxukh4%{)>|KM8 zX$FBBKEGLzZ}jPvQ8Cl({+hzbZ|dVOU+PkoGPU7-uYDidDS!I7lk0O@Ver>`J1O7% zi|+ZbW6zyE9;fV)|3y2vFtgI>NAPf^gCfKDXoPckGf*bbgH~p&CeRiW?*I zIL7H=-W7v}^v5fN=`aDZSaGGnt1Mz-OmP%sl!+=PIMo7gfE9HRDlE@$kW-4pJLVa} z2#VpBK;3@3!X8G(6?Y@n)5K_&{@979@7??Qrq;4Jju6dBwVlSHw7HD0>81yy6MDZ`4W>wm0wISMZ})Z7z~&V;H>FtTcQCR%lL z5?gh*@j_M;*5H3KW#etLxIInDv{I~X0@@88bEZbeSyKJM&Mwe~d z?y_yW%eHOXw(aWjm+yP-opZlAXXbo=&t$C?DorwOSXgoEha z8z>~3CQj(Jlm?kz2-stYwUY;iJUW7fcgn;*w9N1ZNDwi2$B^XJ6P>Q)vVD$^h(d`(!W(nD;&fz*g|xnlWxj9}CZn$^Gepa3uWy#tdg_MmVV zkONBr@^cRm8Pdp;F;&JfZOrpa7`1mQWL*-jrqwGY>WZXl>or)h`$&)#;sn=?{hP5n z7I!KkY#&o;i{L>am)}O(wu+b1V7h(*!3MIxwToj1viKT9R3HID6r-qp1#kYBNJqfK zV1Vxj1Ym~20=s$`fdSw5chKe_)z^-YgWMPJ-vE8%Adlt|E|ZDkkhoBr!LO|^KhCpH zbxEHI2!9f1BCS*xdCTo#RBE_x&Fc(&QhqaBS-2mfOfNBvd>Nt)&pWSZ6fl%|t63vp ziVYWf53=x)yS%ouq%UMHmR_E$TCazVHl8}3c-L4%olBcP=vJ)Od}?dHbv-!q(MT+A zq@QE?vzHC>VXmCdqCOQCo%?v>>izPZRh%KX_Q@2#ezD^{cVL$K2~jb3;8JHlCe{xDST^);@UalvXElNNHbx$4~d;Ddw@kx zgVF1(Q)x7V>_nLDG*B-G$>AtH$?;fqGSkvo^IC>QNu4{;!`2y(-oI?q#rnK|ZTRci zuB=@`L8yl(q6k~Orz0FGkPuP3;KT3{YGG$q8%Jv8N-Q3pS-4z%+p z$(fl_0&brXkq0J>s|>+6HS`pfWEG(G-dH%1GQe^JHo$MfZ?SxTg~h@VfnPuYPRiUF z+3g?~($-$v9^!Y1J@%fu3Tn|A7Myp89d@8@;6Ly^whVK|gg`Wa;i7(eKqW9ap!Zh- zWUB8r&XU!9}toO+rEVn2m`jc!QTK;^c+R|ZPexkNgtje+hT@Pw8=Wirhi%@R{5zmi0`_OFe6i2XgQxl+Aj-C9AL`oslV zm%-$y!sW~I-8U4Ew0sqoGJ%FHf!Y1zZOKX~j>U+E%~G0QpuwsQ#}oH0Jb+7{OxIR{ zj4z>9A<>)jsG)-GXT)y8@L%=}$W`0gy-vB{L!ZW7fc6+Ynp)*AkZe@_^m}H?Y5ns? z*ai)+qH&8y;CVz_1S!^L&~>V})BInrz!|h=M3t|LfK#hhck`|s(WmplOIP?;dkIPCi)EJS`HCPU zO+pKJ6MnQ@r*wbEaIQ?uuJ6){@uBA1{5YWQ_METQBJ&YA7+seVbKuEh3X^oL9L&N}vDVJ71~G=^>B&Nx8K95`#29E=R?xPE<_y63LA z$4T3>{!$pE|B92Xx?@xa>CTvFZ}Li$O`;E@Io7|tb=;z(dtut5J3EZ{d7d-8eVxD*&q>aB-Iv*F?=#ge7jZi*WhXo7x6Rd#Lu=Hz`)-|A9_!UAXRGko z{_4*iRUgvf`6>FGx5F=H&GoLa80PM~k?POUYLySF-grnj)vH_bk`VB0#@;y3rEFq2 z^Dsidv0ECe(j|=rWlSkN(#n)nA}{TJf1IfhGE|Tdh-w@06}$zVo`b^KfiF^&!EID0 zOqZqe$-fXEE5uXgb+2gsE8=Ot^CYEO^xfcjY5HJ)YW}B7wl zKfKT^>xQV8fEr3|?;?hO5`@BQL%9E|L+IJ~9}2SX?K6_18CaxBQ89>lrVEYgwkxG0 zs!ZPN=~RcC3f?XouFluR!@u~n$~Rj)a-Ab@iVh|tzSU8->*|zkc8ZBx8Z?nx{+wCj zY5gIfsptfA3cZSR_eDoG4O%og4sU*sa_9tq{99o-M8TdfCz5pj!IR#B$l9YuwP!%YbLX`Ze)F}s zD`_g|Xy{m=2eH+trek}kqF~jE*&VwvCZMz~VEZ>W(X7F%aZPY4e(*CJdC&2h6|u*3 z$82hY4cLOG6FI&!ioTPKpIsZerU2b)`iVje@MYqDm7&zs__oB9F9Xjg0SlpK;@}eFz%|`e0U<@Xrsjs z+V=kE^e#X0Pr2`(Zx!S8pnIUh1c=&Q+B9ZL-%cvEM$uXVTr$GkygQv(I38rVj zvCR>26YZY`mkX4f_}LJz%#LfPmJV-j4EjJy<=fW?M$h^We>xwT;)+^Z;?{`F=OaPW zr^!yX5F5w#q5*d)I@E)|O}YwU9zxS*QP;G0w7`UsQ|n`Oy75oRA}p}PLB5g?1(Rq* zMjF~@zzfyc+)Mzd#+*GJYc2xdMZyb$A$nw)IY!hzgR6j?eSc?}&Uh%7YV{V<nQ>A((2iS1_`*raWj z*IF9!%F3!jbTUY2%ygMMb9EGD7D!bwq}t$gynq^T-2Tu0`YX%hZ2*rMzc1*0{w zDGlk=@%Bn-jrX1j`P`GO%=K2h2poFU2qUZfU%1p|vFJsZqxK>jTclDF@}(kyHl^Xp z+U9!d;RQ8qa2*#YoXlo`_Cm_cg!F{f_Jq}qezB@bTP(!YpJ#8F z;T4A3$4m&xv3abSI^t}kg35jaw3gw(53Ectwbo`-6HBLyj~@INTb(|?o~xUmx>_z$ z2rmA1n)L-II};-QVeU++FqFWTORq6vs{nq7ss*7YlGDf;b7niyA^o zt!)88a((BFmMej5AoSH*I+EwGh6Wn9XvSG6u;11Tyg_$xO%Z$o-mgh)VZn;%kB7tv zF{a3)q2ft{ISr<|_n3wz4;Jp9@u%^b%MnM_oBSPNe{!Z7W zNW7LBNTmABfpr*y#WvLz4ufNynmVE!8?5^&eWYw)P`(v7sn=l#LP;V4uC^0cc^P$p z9R;pLj~^WLj1aB_!u7=u7;Vm>HyDO(fwB2FJ}-NK)p0c2f+De*n4Sn;9-6b67O+CE zrl!0fwJ)Btn~PGxoUqnN0;HjS^^jmJT2HhG1Gj~cx({a+%}g_%GMm~ghlN-YwCTJX z`&9v+vVt(#Npd-MTQG+>`BHQlrCJL4U^G`>1ZNALGr$yS$6J(RwfPRhO{MnM6AB}` zaLby95RfE5NClSXufftrzCd&b1|=NCuf&iW9SQsxDoqC8`R$;kV;M-AtV~pfF{2?6 zQo%__mC&3jjuD4nh73Z%EG~4EEeQ!pTJk;+kQ9bk6hEsiJza!y0tU>*n=09(1hUBr zXo^r6lKA(L8IpWU!KBG%62Ki?h)gm1Zpe8}F}PTP&xF9v7Di$8FPBIx4GV>>EBeAG zCA&W$7*LdH&7Siu8;;zCVo;$Km0+au4EuIcw;?RpVgx6TxKw7;8e6yia7@G>^&Wr- znJ{`$*>avQRisU2A}x*Z$#Bg4p_PL2VS{?+POfL#s{JO|o6YBfjK{s@5+t}JtI0-$ zS3s$@H}!kyZXFnJu`|whs(OP^?qNLU{Y=%n@l18x`=v>@_3m!H-226Rd~ekdY?Bvxq2K2jH$WkDmRd`{8MytSSlWREdO}im(WW&|^v}4p$MW`O z@27FZ4+pY=Au;cl!K-{~?-%Cocdr;z30P^ccO2_?>co!z;qEoaXJ`Xto^tIrDWteP ztwT}MKyI_+n377P0+z>Rk+E3hq0r`5`8c|<4sWc6RzEg&NDIe(RomAjzL&)i@53Nn zFTk<=K>ZUU_)*@nkNm?1)dcG-1PD%hU)x40=!wS4%%^e$n>pqN#-|LQOF4IRQUJtc zvE>5~HdT3#PUHh};;aq1DNRX9eNlbOp(CIy&#WN^S&CN0X~NabVBeTbCjPcyU>uN( z-xih(hKdad1Y5ZAjWV-)@h%dm)o_NmGttaL?V^=Q{>d#MW7NkSKuh2uUYZFZk z=>${mFfuTWDrr<@z6-aTTGT0vcGu!y9|NF;DwbqZ?yD^`Xza}|840xB>-kP&i9Pox zlx-t(Do&Qcrv1&-f-NsDi~^vvlr}=Tn8)sOQHnGo zLtu^T4#Qjd*BMnHt0nTAh4eOBx`(b`|m9<|&kLH`J6@m^AX3$YY1fhM>AtArf~|1ftH5xj1H z{lN^=h#i}2@@y}>4US93Cl_mb>W7ff#s+tp?hQ zNo3ll<0n0veNd@7fU)oiVuCt=lUH&N%?Td)vJ)~1u8_j&Gqwp-Esb!gaX*k`xO5I@gbRVC%bEyk7&xkpL)aD=T7*{(!0Z9t6kb~SpMU^3 z`u_z{HqdD@7YL@6#R6_1 z85#pI{a8YLZt+G7gpL>F{}lqTw*FfPV5tkuHYNBPOA}XQNCUY$q_%*~?M)R=JSqUq zrm={vAO?}fC0NYPXNX~Cd2};YD?hDGo$xC~WHmTLaYhEYWzxw6$o@mVwmGy5bNeEz zE7m}~t!i-hJ&F~ZBlL{mQcv_ym2#+vk(-Bs09hiR_3t?KP{?}LjS*; zL=Fy9~Q+z%bAF}F?O0JmIM?3w~9%WVuJGz=tGjjG&pG`6%zKG*%OE$(4iYeLMCW% zo7s!uN--iuB8V>`471n?B@jhIT=ra)TT#N((pR+Tg1P61(_TU?je3zBDhuxQ1V5s_ z84P%VnNZz{fV@Xbt4QvzNr^IpGPq?%tHNu?v zgjm~tSY9ef{gKq1>1^r3oW)7hku!zMm*`EN(GW7~b@wN$c>8MWHSLL+Qnl^GmHAH3 z3Z}{7nICO(y9@bo6 zX9ewZN%T(2Ot{Sot(K?52G!+-3y2BBp9VKoha}q&LW5^jkfkjN9KE&NlKX(rI}lpN z4_$mbWF)m^^s}uz9KAB(OA|(uugaoFhYNRhxEdax&aY*0ALKn58hf@(ZwGjI|eFKX`{uC3m|;o!dI@{xrm`JZlHD|Sg34;9VRr(qM;Fv^$bq~9_GS(9_r%fAEC*s zv2Saj1WCr^=QSVS5(i^rg{DZxOy!pkaekHe22QI9`a8rBrK`SjDyb=9j}ft69z~fy2Kib$C+)$u z^evC{^bw}P+wWh$hLOm}_vXgt#)M5NrI{hkbi`mdfO?EjIQW}SvF9g%5$Xl7?SOWG z_T!pCaDi&@{$^4@fad7|=7$ABV3@@v9I)8|(t`#=V2Hvc+>qD-$^{34Ocg~y3N{0Q z1fm5{FR(ZOp~E5K0vQ25MsW;SE%-sc$jefGzq$l=4iFPcItdDVJ2x`dlPMTnI)R}TpSK%Ai05zvp=ONjfdgm~^-$K3RMMkg*7UuGvoG>6(*8yzStY9=YS3r;DH zjB~~&fHfme1)5$zjgD2>GPs~g88baz5@5lghdb5^a8NI036{1rakh)j&?}EjI%$qZ zy@Mrfcz{A%$lqTsOXstQfS@pVjARs zQZ$KxlL&52v+;pC2fW+_J@oy8c;_Pooup{)k7mnA4Fn0`?=qzAaKQ0B^AIpI`Tg|x zJ-Hv4S#5PPS?NtS-4`x7sj$l`@cKB%)cV=I(=t5*!Fkp`?i4Psc&XZzZ5FDw%Xq&k z3?KJL*V2^eG&9M{S`R03g!jS}YQ?K!IWDZ%00Fz^^>rgf@69%WB9TYTd0b z$IDra$VH!@g{f3=mR&`2@`s~9nD?I$IQ9_@I+)CCa59K%d8OjLZ)X65LjVkS0;|60 ztPNFT+>a5|@Q2Y*W}_ct%)wNu<<&*av(YvESM}2$_YfHl3ZJO(rjU2#^J>4iyzuyN ze~=b&`M3?F*0PawlfZX0L*;ovU9g+w(HT_3_e)vDj=Q_hW32>|GTjnz6zOAP@MZ8c zbhk-*K1AnUJ~@(YcD}`?=69*XovXY3f?nh2+b{BIXxr{n)vEZoiJrbHYSog?U>uzh zTH&;g7u^TyvS^i2u`?9CBA-5B%XQU{yW@(00nqL{36>}U*0`BGkA=4kakJ5zeeZPf z&^D^Fj_TAReG%(5VL_NzN{aN6_3%qeVxrPIWMQLb3t|D=s%rLa=Woy&TsZ0C{NJF> z0C{IbXS6~Hp-#IXrjDRz*m1f0McO0==s?;ext0SXPEq0teSYi04Zm4=B~Qb_c0YOk zh!07wcxTOE@A-vE7<_~FfcRKOo?}HltP3E73l0`~F4Mq>D9R$gxzecSr7^Xb;}cd| z@65r!{;;#Yju?-7z~}PEckqd?L6P-3(b>Tlgd()t8IjwdJa9nmz0^14P^GobQ@-1e z(3m`r>!Qrd*}dmm5C`U+a)F=j^d@_bTNdEmQ3sHA#8#@tPHRgRmhNdMF z>8*K(DxE?RIzWL#8P9#W&t*uus2ATJM~@eV7Yvq8&8~REzg)~dsQv49^6IIv<=Fgi z=xX@uG=(n^zejH}OUKfNx(u_#*wrUg$cM-Xh7C^ps8v1j>M5r`3<^?PIJMnQUNiNmwPwvSW%-@b9K491{zM8xzF_xvQ@$~_=2_?#MJ;gl>y4xdw?8}z%x)0I&5yzrm&uO(gW@hC$<9O;}D`C5) z>NF1(qOjB~$FVCKvsVmifP=!oNwG+*$@^*Vy%u&@)@q7iox_DAx_dLjr;N99>FV80 zPLO7}$6ckW1C2nOxdc7?;lc-H!PE{Gx!2iw&_a8a9!4@-sEs9U zi-;gI(I+Y7a)QIdaHCcerX<>fhMlbD3v0y0m^`gG7{`w(ja>`n6%0dZ$ zQZxT2RClV>kM>7k8$yxdM`ViWp0=`4KUKy9Eb+lQQP^k($M2$Lo`Fel#1E7k`kCmlI6=upmxWPT%f-lJAFNO|%mT??V9Vioqr)2x*JCL>L;W zM1R~4<+eTfaNgnSa-=#>A|$qb$=*h@xw623!nT8{ zlM>~!-bf#&drjYjPa?AS`_U-E37OsE&Il1+2*EImw1Ap$I5GtHn_Df8tn-;5#pHC$ zlSeTE>j901g(P92*}I=@w$w(az{tyHp%&XCo&GjzEA%yjb+PQs)g)(%hp(f1%S zuFM!4tU%T_2H2QnuGCgAGYmQ{j>8)?X5210o;0cz{M}!UWW?g><|zg>T;C-fQ7gXF z2Lf@9?hFGtl%-BJgtC$VO`vct6#05ha?*F`ICNgTh9yUr&j#sH7>OWxj*qTUj#IBZ z&^SqPPB-v2Yp%y?@*Jz7V;D^x%|)^wXZo;2*k+r?e4hwn=pcLgK9&lS_8ngkU(%&- za*l(qmo}dBh}J~981Xye``5qC{%y!=z9$3_bzcC8x(R@wfd8V?{|7_&|D9C-zl%@+ z1Io_;Cj9^V)d3*n8u+XH3ZMvsL$i5#6>-LuNFXm%-XpL9w{?Iy3X^@Kt?f$E8lBWF z63A9&~ zUkLF1l> zt_ONZSf5)q84~Ii_C+zowH&kF4mZV{JxDYg0m)sBn@rgOQVs;oopC@S@S`o16U z^7mw+871>jjo(7O+>q+Yg5Pr>YN6spLflYKdhvea4yX?iFHw;%$@U#5FuUBTh?zt0 zaghf)`-&muH-OoreG|k%bnD0s;~2-p7%=fsx&>4&`d2o1+y~zRZorqSK&m1H7rWL@gXI~`vws;x0UM4%3MtkScE!F84-f?1y;zAKfG91FX!S_3Z zCkPb_oRt%*g$(as(c5k3lgv9LP(hn(zsPN9>HfQOEHL; ztpuWvw3oK&31175OJ;6ype_&8V=q^qTZP=&3b@SeKYoG^pWgIxfCnP*rY(8efHv1V zbfg(vXH0Yhmi1p3zP7HcJOmOz`~LH<@>2g};brt4os1nE|M4sQWAG0O_)N{1bzxNB z?YKXP$?n4DJWqenSvAQtL6VvZ&*=E{_Bg`Sw=sV2lxE%b2z(KYt6>V^xxdr6Z@rY{-z4RewN zL2a)dK9675CU5@g9QYx?oD@*h2~_|>5{wsQ1R?Um?*x1A45oz-ch9m{hP2qT^0SSo zWDWOewmNd@xBU-+u$QpKKMO*+?ipoQvY)cpuFsr8f37x7ma~1=AOvU+9BIC)TKFP@ z1-0fw@U^6fI+d_7ZJTJyjC!}%)QuTUM)67w%ifKx}BDp zxk$Fy#}KpYIYB3o^<%}T%%cVNFmc>um%Lcm^0h^msB26%C$|i1jMvW~S&$-qoEcZ) zbD4g}-`3TSz-I?7gapCrd5piy4Y_W(=JOLA-c`w^@ru`kj#gOVY?6g(a`z}s&t~*; z=DaeK(SGX~C7E!Xoy9y`FJH7aZ990dnF;8*Rq9MLXf{Lg=9~8Pb|Jp~EV%{)UU3hV z&z}Foop=0P|3WDG4VfObipef@vv5Ts56D!6jhx+|hB6>lRI&zr8YuC0qF*2qrMh(E zzOC8#`WTPt+B9ErDU)bZk!O#)f_%osuiCQ3!*@+1Rd4!8C~oPOW5c{SDW`MPuiS(x|BM}5k+cP)oc6USGy4p;}r zn>hwUebecKBwVj`DjQ`@*CC6*rCXEvs>229f-TC~^5Gw1W7hXE`1bd(if3}jBQ)Gp z8H5UIj+%7b9%>F>8H9^UL(HlxNtW;L6W<0^eSGf6b#=eGB9Mv_3V+oHP8H%iKTexK z&HGZ2q(5IY5@V-E?}LmVGs08+&NkPAvVhg-(hrMmvO?@>#+;R@?GbD&FLt6T)g@WQ z;Dn9qJwn>OlYqmdp~#*|tKhIljV6#oJ9pYiQr_nAyZ>{12UbOk7e*d_4%O=5C40_+ zZ=a6;Ys#D8Y4&!s{=%7U5 zhuJ@z{Kc!08(6o2?M2A*l0e!g>IguaKe$&dyCv?VKdR!e%8Dpu}$ z7B!7jO`dG4da%?@JO?;dm*JIfFq*{%<0QGgOO-3H)isZ0hP?^G0k>;CqXi!BJJ6!$ z$hBf@l_RCjDA9q2Ovk-yc4?De^oPMT@-o z%PcE;@u}-IkdqIDm-~h;6ioa2_@Sg|8;GGJq@Nm+T38SLwJc;o3`r)Ig`JE!h}JdK zQX$Ogt+xVq-GQfZyFj-YNE@*dGvo=$nPBm?3{4x@`dPT z+efL~=Pm5ciKqmz3wO)72P%S%r_M-i9hVN9V~o$sOqj_N;dk+-=MOQ#A1vM|qJuf+ z5b{P76Q6%9XyKzy9uAt%8P)94^@u3;7VfN3YDJcy&`JHXTe72IcZo_Tssx=<`Oj|t zKfC4s>=ysCTm4_%#6Ly9>x$CvZS~n`YBXk=)m(M+s>!52@gV#e_(jPw=zI%Tbs1hWu~~X(t`gu>ue2L5`s+Mdp)^4KJshX z-{gF$1bmh3R_+ga|3ISyCUJaOkGGsjvdN>~6jp7kSKj0Dw<5U9>7k5771ivBXg3nE zFW^|)Yt-EFc+gkeTtXFF@6@-{s5<|Zy(ezpusGg8?o|+wolZa*-So?LsSa=WdEp)K zV3WEIi37;P!W%IA%3AGYt?8O>nGsDaJil#l^vr|wYZ(584|MMfdZ2sf@&f>n{`C}J zoQ=Qj0G!_J00;%B;qp)TB(3jm>+JNumDKsiTa5q2Qjyc0FntV2kT=>lczu`Tl>wnF z&d9yJA`igP2Wt@Pqz8E?>lw5D0sCGbpIY@kGWl)_1{fW|8Y+Y%8G2dQtQ%Fsi|Amc z59q@?68#yNPnQm}T4n=U(QbBeBJwqXgzZA~P z)=D0%`MH?~M=iv^YX-D*k$f12R5=DC^zxA3y@&E+{S$9=iH8DUG85 zmsu}dY(D+lWe|`J{%Qi$#cKorn*R;V{P!~a4JrRI`1{i0fbyixJU>zw$t^!-MlH>0l$r2>!USc2ZNHkZaIeaRW*1=$5I0PvXy;ZP zF$#m$p9qon{auEe6OW51k})L&Vl#2n19~yw6f$gGuqaq6;32oKwD@#8y>f*O@S-ik z2V@fD%nNX;0;mW6k5tMVR*3j(XvxQLvJ8L3KUECg>!Vfteu#a0eFU0=A$=28>IUdb zl+Pay5}8gE#Jr$hiEV()>^w24oLMCzdcKD;McW3);7ay3ui!>yh#GI$GWEmJ51C?l z`WK`5>(vJ@wqJ3RKTPXQS0ALOOkO-K3fDp!6HB1icBMkoEw(>$U)!%UELW?2c_ z{+_R{bb2_VCEh5p4|N(80si<EiQi%19Zg3G^iO3&`b+dZYvP~Ju%QPV4yc*-=weV0?|iRL33}G$Je{# z^PhY=UY(JG{*XSua0U3>F7`qUiBZu+w6V;q8>)hD1TEpSoiskOH|v)zFFz|yE=@L@ zEVEq7RnMK$JJEjS&s#Jv=OkaZ!-3{7?IdoG-1g2!?FZN{iU5_l>!)=~fwg9I&mC_> zrrojSI^&wDrV81I+++=y0ck_AoM(_AXxGGcFagU6q{2Dz3C4rwguVtf-Mg%Rfd41i zr};mUefIyWsMh~?L?1s^og)~a6h2}8v*`b=`WjRwZE)C;x-d?-X;$j%zI+4^a_i2u zw1z`BAah+QEoZgqq!h{$bA9!JKR6#3fPg({U`S=AMLbU1-DV$Hxel%*liMAXm0ClE zt|mO0w`23&|Cs3<3`N?-B{}(CK%z52$&9h&r+_}JzSz?X2FIoranMs@0jdt1%=U&5 z@ECc;6@Pb=mn{VTQF5{moGw24T0|KSO@|ITnAF$ zgC!EhyFJ*28&e;Mn>5-$sI(-@FWVWiLPQ+Tq{fbD}#CtdBS(Rw$XzG1nYWm23x4Z+~>Sgze9(Mf-eRil8{WXwl- z*z+5%)>>NnNPlrt>kxqx@G0`2O?W}x2i&Z{HYdZic-Aw%Z;`Mssfj_q1A+HQw&|q%V~e+YmPU2GCbz1$3m`4{)^uY)#Hqc+p@kz1uUbSiT{zDBeV*CusW9vw zMa_kmAXO&B++|?HCWjM^X-+CkN$&#}BOdh=$@up7A`lCP51R>wr;ZN;s@z}OADd#U zw_EFg%CIq${cA@(tGxpA3hj6;+#EBuHW{F{kdRwm2=8Cp_ZyFVxGb{(l;Zuoj9C5k z{@Lu+z>}IOTP&hFKfkcG{jen!Fo!dFi+O}Q*n3aGf$Jf3U4nt{eeq;4e(xcV3HM9> z0Kw~;1E%lhq})Xg#R5AoR#7`Hqr915Q|qIP4$N&aDz(Tob-(y=puUjFqg0G)a9=#F zFa7=}<1eOp+#U|dOuRD^hV?t5mlKXx)C@XsREN!u%KY6Tf1{Oe-T9D#@ zmn9^!7Z&fBD$)8w-$>yTu-cf_w4)TKK)1EJ2{6jJ_gkHRWsPP)1@5_)$2>f&} zAnhI`iXo5>=jbaxharRV4Q4C6whJUXxRZ1aN^-X#L};{PbC{w}defn8oFN$%Ytyhn ztT_#34cD5PHkz=^gR%nWP-L{I$*2HopZ@kET=>#Hr_cd>yU|`O1Xbo>Io8yTrlE!d zZd$g#R|VddqeZjdS_vl7QL<1q$o{L%5FC(&H#_})BJ{&6?PvGF{PD|d^ZN;A6Ehz* zBFE-PpO_X2mKz1A0te4NMINClSBW4&1XzmaU?K6;M0u3VOdV~t-I@dFxcxMoh=t4JT;*D)!6K|j;_DL!-W(bL#Zv}Vx2LodrH&R%?Nnzc~ zCMqTr(RH)+*DIJjUq8=J*D+omEPr@GYNUS;A*d#9W#Vshc)3Ml1P%i8F!o56XfajK zzT`+D+o;oa`}XN0`KN=%lF|UD?8E>?pgpc29{UNwWmwX}wj~>xaotL=TKS-LSik-2 z2ZD%3Bh^Na>*Ud#2dP`>aMj{`1L(9rwuVZaFg+S)V>k`7*^7y#xd=+9(n6W)0eG|{ zcsS{6;p3aD{o;c4BQUtH8u|zikR>qC;>8j03M^0}d?S93I#4dnjEC>iU8m(!v-0Fk z!dYy!clXQt`QvlBnKr|t6~cVmk@CA3owd$a;e1I{xIMuTDpo*oSNocC+muGXW~U)L z$h^&A$gTt&B7r9rlY@hs=l;RXusSi2va=G3HA%2bxVf>c z$$~$$tAH!|2wFpu2ILKOzWf|C2*V%0=X2@guWmqIa`hej2GD?gQ0&``? z_j-Z8hte0Ke&aYVrV#)Eu)M-xtwCOpHd!o|S&~r@n_Nw!-~N32py;tQXOLz<#tz)Y zgl^#@5KhD}b3$erV}Qgw^!k)E5cI3W%&^wOe%PBKr0~ny{8%_C6825u+H*?Lr!h5Z z=rxPEVmPd2UDFpu^)E2FGS_$!uqjG5Dj;71f9^C0Pv8F`bpSOGWj`MHE`6{Z|I3Xg zlHV5M(1u~9Yk&=Nqjg}73Pi0NEUDt*(7V|14Lrx{l{TYm++@0bwb<&3CBu8FXt;)F zU!7sci5VW>A)uf~4kiyeN8JqVKpZmYDlXLCrWkRtpd(wfC^7;*lROyURt^SDLtPNg zm!V^6?-M36<6A@Bm&KMXI0VI1hLB@E%KJ{vr~*sHM-^=^{{V9joGA!$|UAgnH@}SZ^ zZ?S&+@p$!CO{eJOyz$}dW4iXTfMkJBLTLn(PWG*jphj>Yi+i98aG(o! zpeYcbDIVWfUDZ-WT3w#R56DM!op-m#+qZH0W%+^`(Wc-qx_L370WcHyLMrpGbAnvN zFR#*G!|h9#%;4dJ*6ah2f`63{Zc%=u3cz`m1oxligY|FuP#w2f7e(E~7~w;j(Ac&7 zgF+0Mq;0y-C3@i9lZvY@JhE$KQXokQgNZ&fTvm=wF=H&BE73+bC-XHZJik4XMMSwu zcUe*zX~Tgy#cO)i+Loe}QP~NN$`yJMZ_hdh1Qk1W&{$W3QvY z#DF238sl>1^$`0gl8wJ3!kGl2CpS9`>PSp>u9+-OUeL2AFbWmqx*w0B))$|nK{=cQ z>Jq0}Weus~$85p&~j+|ncO7IIsBvo`DWQcd#;UZeDeGm%*H|3;c2hYsVC@$er8_E%akIjU`u|!X>?bxZ^iUdu-~wVqLw0_r22UH zd5Qp|=~H&kSAL&Ueh2_`%8l=3Twj%3KA_`CIhBz9c!{IT`ZFjzO8yH7)v^mzC2=wu ztITN$M_L~WKbMxz7swH`=x-Y5y+il@q)SFyzOjV|ta&X*3m&|w3io&o1$j!3^~ojp zjY<&9G{%@h3NDGQlf244%Bbm1g&4QVegCPqz&QA!H4>+FAWWa?aiTe%l~JI0qmV!m z(-_q_wB&RhCc7YdeEG9_`}4-R(Nmk&2Mqmq?;^z-Ow=9nac8i{^T(hTiVaBh+*7|l z+NoYEFbwitmFRd39`{Wjms&JV2!+!%g&IWBXhGnaqk``5sEq7s2gr#?kbBT9E-abf zdL^Y0snI&P(~XOxPTd6#jU0g%rNxw3eBeNZX&5WMc5`Emg0`U>vU!BjAnU6r8F^+a z9kOi05kku@&BDbT*jx(|$7{6&tDngysA59LuJb{Rf|~h?xR391h(`|Dx~Lvi+y0y(7(vPWMg;W&VHWA~v2Qb`Q^$&AA)-%jC4$)Y zB>#f~s=t$d{Zsh>YXI4^Ea7(VV)7%Ro^Od0ssu+&6w$f4+zFRb5!3OL(&VR!2Ixth zK)rV7VD7jrsV)JqTy*J(Qnxh}#3w4Lb%XXt)`~m8x%9819%|}iW(-gQMgSZAKOKc^ ze~Y@(Wb{14->#=PPFF>jcI!|!?b+nb@w!Sgcc6q8nLtCFb$ECv^F>K0*-ST3I$w@_ zfaA$3&<%b_cJ8L0`pI{I3_r%S>-Je@2r{HRkqax1YL|w;*>!D%h~_}P4;|QpgWJM= zL>{{yN^wfu*CB64|LZ6Ube9@CYbTSw5foGUg@eC#sn1m#ZHRQxA_TT?$7;$roYm^E zFN)L~W`)w(b^Wp`V=wSGm9EbBoM}J~xG0ZSQQ#?xFiJS6QW8-hw@|1cj zh^vg*iB;_nf3v2aCyfYFc}csSVUqC22V!IKP01$Gtu7Lw|DO2Pr33Glm@y(&vxcCT zL&dwuN#;T_F}yVMQ67rzW*w3Gwu;-_+jRJuSGa5j?R#yVw7k9T{pacEzFQ9Y=-w%< z@6Qj|nFO-rbaAktU5u5F>$NHU+(?M&&MUhaEGN;*t(z;IbJ_NJbN9}x+(Y+Y0UuLw z%a@O$^k?=;EMZWfvc{%#Q&YCunO^4Dz$GqpYAn4dd=#eV;ZrMxxH|d!3k7|Whh8*8 zX^TMk@H19s+93Fql6T}_{A4CnjME|cU^fyHO6KiecyE0F7Onx`7qBn^_UJc2CdYr; zqksE|A{8ec{@SC={oHN?oX%$R18!$dc7nest%DOGk?K%>QAr6ZL}~}8$^P0K2xH#? zq3=l?HV@uqRg-Y(`-#};rh2|<+ily(#Ze)JuB3D-cmu|}d7AI*KwRff|Lx)Ry#(Vo zKh6a61XwY&MTuz0{%{O}QH8pTKTa(Zu$~>>KG@;V2nQVNsCn39bZlaOcaR(zo1M}) zgW?}Ny%d^_%jXdNFxcodkH=V#S~b)_S#b5FL>$CCihhVc=N++aLJ5ch3E>vL_KQA> zOi^HsMWTjsl1DVK9CSqSOcV54a^sF7rH^jT6$e^ViQ~MLCR7wmnB*3gax0%^^BZxh zQ_x?Ox-N9l=Rs7J;G+Urge_@9-a-*(#@ik5mo-6H7u+|x1jNHlSKB-M3w~wWvrjxbu^V=DzGTOCmYR6sm%Jq^EZjYNk%kdy(}!Tw51-ya}nw`%}xGk}M2`+KS`7N_M&+6w2KFo)_S%yoIS)DdMj_~08 z*$1I!-gGo9z=avX)Z5L)%Y^oFg5(JMNT9_{vS|~wT~%)atzovmg<8 z8~7$J&O=O^%aecC=d&7m$IkIIaWE&F%s-cMTkAOGN|)nxvEjo&>tOAVH7z+eU-_%3 zN*^UfLbS@`)fA&ROK!P^pH#qI!tQ5|eia|b>rKF0!}$FH+~ip83{b!g-Pvg=QiCNX zkF34lpU!v#knSi+6+&P|7h_U5UhoTIcB}b^C=g@e>dRHSr$e*E!9N9&!{Fq3Ao?6) zAbZE=qV55ruK%#Qz|d-#MhqBD)PALhDRGM`X;9(G(qj%O z5GRO5BAohC&fkyvnyZ5}E?PoumiUw`E+k4P>U9`ermDckh~|AXwyTqQM;Fb->^ zF7Ys{`b{a?f)=Yn`IWC>d$}KA8Q+Myl*<;GUsPIZOnoJ{sJfO!e-h8mLcUlw_+fB_ zC@g?wVrV5aMD~-C5|x_oU{S(JuL&I|tD(O#6;3wo_I59We@aM3!m4I>^i1Be;{FCQ z{gFA=QwuJCCcF=IX40!tQ?K7!x5smdYN?;722A;9Q(qH!r3TKM^3JrFn%XC+r8-vx>rl??zE*SYnEKMf=`ojwIJ}KlV5KUG!Pk zit&&34C6IFPY99uJtFdym?tm>0S5Z3OHvIGuy$eW@f0^$PAim5m1A_ySBmA~1*hhQ zB}T&8pxtN4KiHpYSJl!DQjWvjLT!^l zsqW1pENnxiYj1Nwxgh#n2wEfVpESDFjg7$UmZgospC0bndY^A4&tnc%FD$wVvP`?h zGwjUPZ`Hp@Qk$n}mQxvDaNVFdoo#R*Mh>IkOYM3wt3Y^mr9jB%`Q;J%6(!GknQQi; zz^k{IylSB{dX3i`Cl}#(*giE!S($;G%#2$+F&`hSZL5e=D5|0_aH4batIOTdJ96sQ zV1VkuT1@mHvq}*agjd*{X1U=a{pXBjS8Z}&9q>i9fK5^VQ`X|QR2|!cpW1;uMC+@k zIiOJQ<#kG`l6f|kD=i0{kp#@?FYSO3(O;DohvE;dPnh?;!(!toC^|Bma7NE!j?5YD z^>SV7ujAfIjUD4aQ4$S6Gg@0}58#uGaV@1Nc|Ob5ypHGj)M2{t~hKGgE8H?@RK|Zrl~u1Ut9tX<`kUaQ9L3D2Zp^p1V`vNUWSkG%iEql&l9Zkpf#TJ@XKAOQ5zG`_- z^kwpZ!~2zJ`9|}R)2VF?OT&0jNy<=9nuwaqWl2g^Z&#ZBjs?YDx{OwBpooJFshYw?bvoqgb!8o9c`lT_ zXzs-`9Od#ww+C#hrvjP8dWn>LLd(W#lXNpe70=_e$s753yp9Bt$xO;WW0@XU+_ai@ zd}G_Se~HzES$cUL?1aJq}7ArxV+pQzqB?U8oRI(>$;S24aIll^JD-2KF@ zu->^`OQ-0obK!{1@v~Hg^3Nu=FSK_~E=CSx;|&*75K<1`knT{-@l4;Sn!|L?y85CxZ1R|(h|=FFz4OZ-Z^$u zg91I;Qm(d8sr#0jW1K1z50Hl+PzT9h*|^j$eIq6a_NP}sw%Ii9JAONUY_&DIZeJDB z@l6=$+*RR8Zu&@@BnX%nJR%;d*@9 z9A`q=W;Ihbi?2|kbxI3g)ln$vWe%wg(zlWtsGgihwI#slunp@pacs0@NIrq#KYaic zaic<#*MwG-q2ui|87(|J?4hfnXfA|1gRmdVP6H7ihloaKus(V5{-u(CE-g{sYkR7z zk`!w1QRCO7B5>BDqs{rH9>flqZwCE59wzJ|TwTGJirkF*S-cl$I@@{D&A@5{P39;` zpX~94Zm;}X$ny){WWioK{jzRkCUbE)l$70%&9Ft0cj#p$96HK_pCa%Mds+Ia)&%$J z=XKXL1T1k7qkqXA$J^1KwS2kjn)x={%x)PKbDju6 z8VYom>MpTfEHxE93Fd{?M!q__1 zJFmLQ^DEZ3XAh>5*QdVoBsT;Y=VeP#6txh^CdiNHD0g_MOLi!8c&K%DpJ}`qdH3rP=+9U0FW~g9J+8iW z<`-+&b7t`FpyO=lh9_UNLL0%9FMVA5yfK`75zeZPS@ZhN;_G#r%GAnowOre|3P$u1 zdj`E(YYl-fHWMkMWo{+?i*3H@t1eqvpeqZ%h+jJ@LyUDUi@Q< zn}x4UVI1(t3)KI2;KFmuBSCWatK@BR7i(LKN`j?33yU{rg^yr@jrtUNe5L1ZPuk9| zY$VQ|2TbWg+3&Q|g~KI$E5B7svw9NX%Im)`ms5^W(Za^L6dhnOZ%9qFHRr*kU_R_i zP+=J^BTTwNth5}YW>~JMB1}q4B|9#$M3Q-vU}7o~v=)VO^fSK;8`<7zqBZaVnlOMc zA|p|{8zGK1(|=7|fS!nX;cLYTZ)C{|fqGx}!={0Uy`|RkJu=z}Aivw$kfJaWOddDH zlpRnY!qXoNcOwzSK9pJNIY^idV^DD{AHNbMQsH1oF8pEWZpd4|m6C@nsL1#o<*IZY z2@gzjo#6Pvm$v052(3MDQK;s6l$ho~^Oowf8LXjpe~_d+Ye+SI0e2s(b|f0U%Da4S zlO|0xdCF(G@8YK4qgsj|n~m;Bc1JVUC>?g=TjxJ&n&wfJP~$(nv4hEen_Y8JTXRvZ z?Wt1eRjjVBrs1usQEX5hR{mDFWSRZqDWL>1cAozzbTQ{9Qx3PBHjHxOkYw?7PQXBY zVKuU6=kX)57lW8$p*Cj7u9bU}3S{-mz6gi5D?Vt11cz+Qj$+7S(Ie7H&(iwB0}9l1 z@OG4UkVv}Pb3)@~k4Wbt7?j0jkhw4GoT+dfsN;#o4h6QjuRC8{+=*#-w8QwyOl1X6 zYfH#}Jm`G5P`ScLlFMx6{&DvC>d0Qg*>(_=+e{TraPxoH)8`Kw$^#5c~se_{~% zh`~!WHR>-$kh*yN)IEa)0m-uL@?DUqP*u0H0isP~xK$;rknrT#=_2{07v`H!8$K2* z{uE;L&V&X|QX|xXUJp6kZ3y|C^)H`EN;uK=oRQ-~^2Nn_4u0qX=exc`Yt=2{>HhDJj@Sd( z6jvAEPdaHhM1$%Lew2Piwf1-8*_V10IvW`jRzd+6q+cnP*`;iNWt1nkcX2o-H4*Ra zgol{p{_ws2F=g%{kJ88efwOG?M~PdcP4uX|BwNe~?_We8D!>c#-WSi4R1td+d@tKR z`s;C@*FH#h|AXRYnW;~O2g1%yz##vv?&VkI`p=tGZh7Qy_3O8{544TRgWCtN;1@hI zTppB%)AGNLj_I267fsKPA{W+Nrg$bNsVwVx`6f;j&Cr4>{cTg!@!GO=g^&k^=b`Vo zlO$@FG6wQyPmTgN%-q125azoBa)leQ;HZNqPhZKajuFl#L z^0i4n$n^*FrHpo93V~rrk!Zh zecimDzEhXz#+v>f$(#E${ES9DC}rhUS&@Z>#>3;_lUScwrIZyK4XejICs3)pj4hoy z(((Go*!}mC!hc)dU$u0W(O+idTD{3_h$^#0>l z;WpjzAHNE>^~V44t8mNC|KnHTR_p#>ybAx#TM%w#z5iv)U^iW_}YCE{&bcXR?Y) zqP@C^tf{bkvVB-dDJ-sZr(cZyvBRO+2x{5d^ov&o7AJgJ{=IG%eARgypOZ1AXlXmx zryj6L>kTn;fA9a+OK|TQ9b#|4xf#<0T%GhoV?66Zweq`cPQC7CdYEtIcyuso%<(JT zVmXqo;P&dJ4>?lrKH9pAetJAVLlM_ow6+&`QM5knb}tFl%(tl`m3=7rg2YzV5!PmA z=N?rmjNu~!^rDbh@5VK`VNQ}Rl_WERX0w5ZLIg<#i8&iaAjJ577r8^ErB`y+Vn8Ws z3$<`l@?%OcP|Q5!`Njd$uk(>dMB;7;8;KL|=;y#L=mZax zz^!J4Z*`^)?eoO#R@sL?Fef~f?bMh{b>J1*GtWnh{W64{T8yi{tTFVyQA)aCdfa25 zxSLf-NU^?4I>N?qc;Ci3y2VVE#JxG_UHTIxbgJ5GgDpAZdP@#_wsYyxKAV9+vyeDc zP2IOsj|urd>om_wR!f^WwsX{;$S=fnFPpCKVXk%!*E^VCES|2OId-r;Bg8tzyRz@( zcnm`~oHA3Ho%CjK?=x*fUE)n+Y`!gr8*yJ(MsIP*v63 zu&IU4t%kQO?NN$<&mu)8T~K65=PQAgNf(}kn5;A!n)xv}iTYWG(!;PMnbF7emJ;`D z4V9NfyB)A7>ZxTLkA41<84^cRtAYKA28Z7xVtXNmqgjdjPKNR1m?u}O+ zd*>^7|6{BY!77ErivR^BrvU|Z=Z|E<3ky?gQ`XzRe||q%ik9s%7naXF>USX|H|DZ1 zk2d>~0qRcT`w`A>?@FXuzAlS>5+gfY?d2I1WO6@~d+K@Rj`KZeQu0eZ*$_T^WZ$=Ezjg78Q2jl`XA+LY z!tN}XT^vvEtC4?qV-GZc-pslF%;d)wH5z zQaeZ0Fhgsr?70*I>r)wBtcUiwY9rRnQOVLbkw(=#hSXKG&xv=F(n?&;8Z=NJjKwA= zQrugIi%RyA+O;U~w(^wp$t{|(NWcAG8(V0WmHP_;UO_V6slW(%XD4p~vNVj$I1jO# z^snoY*Vf%6RLP`n9`Mk78ZE zNr=*oG5q9W`~{_Pi$&&@4W8=AA$oS#+i46F37pOQHleJWiVN%g^;2-G=L&UiqZ#2z z?rpW7nsBns4ZypM*s*Vhw@}ipay&Ma^D)u$6YP#R6rRKx*wmbY7Hq^%3w^ni%`i=7 zbL7@5e*Xg>t%IpfV%}!@a`N@5l-N$tXCfc-hpt+z2$w^Rlc-18yq`GaEek4Ro|io_ zk9}!1uUD1o@<@m2nDQn+`FdY1#M!v3(mS`b!_SiGefNaZB5krpYs!>X&DzELJjX9K zDzr-Z!i`Yh6N9JLEE*YD3{GCTg`UoQrD-mTP#YR6{?C3bQ9I$cp+9Lo+#nLR%UW0$h^r3A-fFoZZgka zPxW(chJQ`oaZC=Z-S%ki$fGdE!-MX1=oDY2s@>vt;eGT$Wa&k1GxM7jLl%zYqGG{7d@W@X1p@E(02XF>wZ|t!=b~TFiz<7 zTad~d#3d6A^)V$8H4G@CcoHYiC4S&<^EaYHm1!|9nc%7IdH;#QEb;5i4t;-HS3h#Z z;0p}nFRyvHWhw0qaflQRhzpDy@0LA)T4XG%^6(E6zr2j@Zkps~zu2@hSwSKUP})T8 zVE zx*7Q>I{eqa*&;V4er0G@d4KL zCr~l%2y^XnqvuVw5mY+KbIsngoH)JT^mf=hF(Xci-=s7p%QyHTGnn}fCRDGUB@H2k zu;c?lpU_|bD`6IV5(GbbmA?dwp4*{wx%OUE=zAyH)9-LXCO#DGO_)RrRIf$gOyFK2 zfrzpdKS#?F+C%K|%ifay8bbCYFY*mez*mrfe*BHs-INC`h5B5JTRCE+Z|j z3=M^P(=fz-=LsaPGrPIRA1`tq@vrYwNG_&(qix9yl4*0yF7qcK^iV9PW4s-Tz(_ob0Hyuq^ZO_A{9JEB< z4KVVAeoefK=I`@y2w7r~pWN41xY z>sE>iQc^)iub|*4p`ejo(hYVx%Akls!C?77!8K6&I1XY$o)>PSfKF+pV3ifSePuv| z0=x`f@P@O)@UEjr2UkGB@F05reBTf3Cf*+hnH;xGE|7k|eHCk83Tl17Gi3OH>WQA- zT()A`>Abg)p!~-ytbJ+l3ep>+Qksa7>h_{34ni1mqy- z&3-c?MPCBj@=CPg_4Gsom*ZOB$;n9-c6vP)PaM>MNU@KwHcVO6_m%EwOftR-gJ!?` zDjx-|&zAA<@b>+nz`!q{;A&`|XlkCV3*ESm7if9^Sf(hh-L0XOi)%uK!D`~&fW?xQ z6W%Sv`67LNemQ@0-TVqs=zIk?LWFV+&Mqv$1uAJC*7wJcA127v(^QSUv6wmk_R4Y? ze~}!r-g}vBUofEm{*tZORfj7;Rvika0qKlqt!n+9J4?ga()HCPCNYP_@CO;&`eSu= zWqbS50;$xQj&O=SIk*5S$U7)S;XHUcKguV|%F0$+f35Voy7(e^S&);nvA@{b{ZY^J zc-A?)2!3P#RLX!)i}mT^*HJVBOWp*Uz)gUuEv4Ryb{qv+Suv!+URTZGqu# z&&)gxU!q629a}vOr7hgc^;DORwt6-|BvU?=r!7Acfazq9`_hHJ7p3DUvx}c(>tl zm&B%u>iCFRr}C-ulqn}CXKNrj%}_4f+CXZp-J)p13o0ml>v$+sO@RSCd3pJfkMiz| z0R$}d-<88_&*<0M^`cB|YQB9eAK;TykZ~%Xc=yPC<>aY-Jc}tYF|l~uk1a+cQk34@ zMjnn4TaxBXS_J%(m!z^83e|!|5xrTqy!%>}&$Sx8u2vA9PT|;&$8dj)jg9S|tzl+n zCgFM>@p(SQWzN;Xp+Cs8y*6j2lhhXKV*HgvFFcchy%~OouNdIYw^iX z3FEoC*hh;SuP@qO<9-pk@!%EXG-^Yt*7WfM>nQ0rVM z-B9b>-EEPJ?+##->pTkt+Y^xrjPTMmR;_V6#7+@fvw{~IRr4uf0Z-5sQ&(Q=i9}U4 z6w`Ey4?GhKntDpB7e0?;f1=f1eZ-uo?ZKQ*3*$HU<48R@%jeHyNKjS*E%Niz(`X69 zlE#^=cmG^)VoTW7wn+T=q`oeQHR(70ymSk>QjH-@;oFaE7hl(Lf63#OGq~oQ9WS3H0_6oSSF6r@!sl&xZK)V!@uRlP0id$4! zIo^PDddEj18MnlzJ)rrGxWa8rdmPMhKD;02Dkaf1<7Z z4ANCoWWI z>~P2bOVfihGtd9lbVflz-(N;I`9;*p>60h+?tQ2bu6y7Rp}xXfFG+*9p&wm?Vlp@o zKPRP018bo_yh=y z1B;^ggSRLv3*evL6txgCpQpe}PJU^LqLusmG3MannkHC$2UB9<2!mfz^+3}e<5Ekr z!Xol7z1dNrT0lH17A*{qR7_cbI%G;Lzy&?+f+2=DVDZ;b1BOC6xO9(8SX&;!8w{<% zK_H-H2^@(B<^!aE8K|KUs)U5ZUo9wrDIFR(RN)wKl9YPzwILp(N*D^V>NPcde{Z5U zp7rzJdMhz%NBPU8zby-JK95dk91q$FfS2(eOc#l!lXu%H{H#pEXFFH-@Qc7hR#w~X z$x?pj?O+_(id&-LLjfb0Vho@a9M`Xv);a(V&C@*TPGczqJ$SJ5%gV}XYA!N@$lbnW z#a|q6aofyPDGFVUhJ)x2!Z(CXkk0JF?xlGgW(!??D=}}59)vTySfSf4cJ*DzyFok|RxOdOI z$p`vyu}HV}Waq2PYODqwO4qjnIhJaZt?|NJEPctKnM?=l>hdh>^GkC)>FzlmDJiMW ze(rezx9=N^twAA3rRet9)j&SD+t?fAyAdKrtCFqoR_x(bB{q|`5jgbo8FwVl=Xg4| zr^@F&j{2vkr}dDCg}myRP7-G%-x9~V=~>OzIO)d0)PMV!tgK(hR6HC5Q&?EoJ$SVK zB|e|2`=*DN<36z5lpibCmwT+t%<|Wt3k&5@p3l|sp1+=m$DQy79vGuJ0J^6Mv>rS`DIc_J zUVJMv?Okd3dX=iC(Pr5>5}C(*j7b&HWJJ~|#n zzL&BfdPJR(;i3^rJyh{n+)qLdlgXglL=*l~G3Gjt!%Ek>l&!%#xC>#h_7*pe?dfAW+rz?iAy+2J7cy5upi$P_5#w2P|Oo}2^p z0A%qX@MwGoVyL4!DLeW0lE7Q$Le_!s{jImOw)`L1`sgvyKQw`R_SYu7@K+@2{{Cdu z7v*GF$f;lWaC_=yQfRP3Rbc>Gqm%@&NJyLf5n`7}{!jzhNFAXJGM*$hl6a;Q0|2ye zA)uAM3_y!gZshCVkTEAG=dbX_Vgij0LD@`ksP$mw%0UQWZGwk^%(|ko^6%i9!0Y(& zFW?GR?eNgyFPH@o5*{A@ue1BVAS8$iCjT~Ii|5ZP{*I6qL)3ri?Kgz{cgOL&J8t!= zqAsH&RKp;Adq)w{baFdkR~j2O*P z`Uc0%$tj=AC7+9s^;poC)umZ$w^eIUr)9(6fH&@0jtUDna@=7WsYWF0X+j0{vpvb^ z#_Oa9n>rO-urL{Xi01k1U1+t=I1%sPDtyiPCIIu`>#^Mtq6A81H}?T;-OmsO>AvK zMueQOLkMNO&&0w~$=V_T+@MLoT;@)m0h@rIJ-CK*F|*;jv<2MzdTKdo%?)Pl5(DJi z#;kn&pTpT4&HcyWD5FDSg-PIUjiR3sLrxeaCGf+-ln-;vj@J5#wo}_{{UoBFx`kAB zzG~kW_xX+Vg`hM6Hag8pB(5g|uZppbgk*v+Hab3TR^o!chk`Hf@Qrzz923747ka>u zu!`=fp58l8Y!(J6z=o|FO|V>P@t*Le&`?IG!a};`g8RV>iQd3Dc<^$60l-&*Rym!F z4EaKgV$|2OgJombh(L&S1(QL057=)EW}vdx?fW80?hatA#N}*qhTqqot)KFfb7CE>{L|#J zx1_udV|i-bQOC!}hn=Hjt4M2+QsgpT)UndOu!tCUg%Y`K=ltMNqi(D%EhAw-oY$?m zTf5hwV_VO8u3xQ61HYo|eHd-p8`qz}M#H%QBn2f}VS%!Xt3B^|-r>8A4jAQnxmNt% zGS_|Tn~^a$vTXP5C|9FhWtpYVZRN~;KgKBTs%(zO+>mcF*-6O`n#%U`@NJl6sb_iM z#N;8yqHI5yz5E`NoP3W=hh5ZGF-*lQ@a~%x4_cMP$2RP_bRVaBSsQiY4nZLlM(D0j}f%zL(?bARw6FM@WX2-uB%ZetTy?y>vrFps{%ILs`orrBsUfd;!m z@_uwDsZr)g6ylcB9=y?EUVP0Q6`Oti^f}Md*;j~|@9aB>1XU>JmrPAf53D_0 zm|gHgCIeIAAD%eI$PMXK=prx$u48!*P-U|MFMmC4Rs33(myxz{Z>n5MFs1^i4e}T{ z+|MWNW}PNdLD;(7@%G_N)J9-_QBhdY*@M_u5@PYdS{)3K9q&HdJXt3#t~>0JAke)V zFSaV%!TfoKdZK3iOPcqLeP{jk#g;?WZXE%YGg%cwu2;Mb~dx1327Z(G_U$AOY}qi2Ykx<^QAo6RAD~ z3KrVQz>;orrVLBH#Mk9!qnFf8ylAi!{*0^;A`b8_)0zF# zY!5eCE~z14?iMoP)lGRQ%aHEy2NVuvO8g@fD%|!Hc@G3&f&m;5O73F_*3@vpAEGK^ zxx(<0k&&4ZQ^l98z^|dBqaV1MnV;^?>cx#D^1JM|1fag^X#s&cdSf(iq*fMj49dw= zJ+f?}Ot@*T@`xzXv?qB1Tr5BQHNPEz#S$}lZcV>UDeQbircOGhnfn?kt$lRwsH~&C z`~Azwv$C>ixw*gmIEzllnKn7XP25RoTh)>F5jvh7SI1Qt+&$Job{lQVEiUFxoXKf} z=`Bq}aq3xpuBPc&OKM&5xmRb$q$o1db~{_r&y9BwIe$uIT<4C7>6EgD61FBsHC)Id zMwp{Nx`*aEFR8H*P8d35r>g+(u{&4hhq#e#8Xpj3kMI$QS7k#pC8dmx`NA`uATomR z4N;caMa18oxGA%{`*0O-x~p3Oz*m{^Dc0gUgc1vfG9!2(tawAhI!7NE$c7C;;#x-i z97tSikOm0?5$SIuMP{(XsOJpMe>5m#&-Uj#)9)z4Ta`oMqvMmuji2I?7jpAQFOr(M zZoQXx`qlqv?w|cbqufTAA%L$HdSu{3_RkYy6QuN?rr@!6Euli_?j>`-`qwPfswrC@ zp&JaXe8@v#GAI>-M76w6^2UE%{OQx5ZTl+Y>yd4Umb(vw;bhbal=x(2XW(AX+lTEF z;ob(L&VX8!F{V`@Ko;^0EaX1XDQQN%Hbms9ld4-XzVpaL0K2hW&gEdF|ogGnA)!%KhzNa+2cad z3PJerA=byKf8BqT*7Ok#-fQ1H=r-=Kh{+29#PI9ey2 zW^mq_{saU>#^O5mg9)HQ>pOJQ>U_5OG|zJfGEE<0Bshl}(nl}Rs3v$UBD(|%6HuH#RsQbf!|s}k!!IoAKu{v|ZAo=#=XERXb_xR?|DN+qnQ6$GKdJNGtwaIW z!{Z74`7G@Nv<#YE2kRa#5S6LrtJBJjPfYYusJNs&azCL77l`^H&JYQMh(RjtMXj+$ zz@!=R!&Xi%87Gr+4HF~qK9T55b&-N^Di9aRV+WH zOj#*yYG)y$sR4#zxEQ&6-$NZ0wMLs(a@Ni9+v1>?bt&Z&vlk=l-2kt*(>E- zs4yD$v2a}Gu{^iC64`ffr-oNP9RB18QEFw685pM+n>WK0B44N18;_x?sofX(l*j&M zp){|JhIxg9n_E#bIy6K&wE1{zLdw(($rcioQ+9nwN-zWD;E+{PT1<@9?2Oz6c_Be; zKt0uQfEOt5(=H81scoST&H55TSZXX5TLMTOUtZF&1ldT<=8WgR<}X!QN`CoPc`b}L z#;wee9P`IUo;%Y4AQ(|2)v{!qW$YWwv)c^w4j&~p(0o&FHXJ^i^Jr$O=5KY9E-9F& z*mqD|?+5wIdww61s;;hP)F>Jycl0;$dBpeO9Okug0c+!(Y;q#fOZTM^M@pe3BJx)V zI4nvs(jv8;ILd>&rBzrQO>@KY#*G{G*u;g{QO};U$UYXKiac<+!<6j?cm_)l0ky^Y zKiwrw~ss@>JDb>1Pdtz+}N_zuzDcN^bC@#~B~I}{CG{dj?liOyglQ=;KIzDK_#~rWAOo6 zfnAf5fS`#p;(R*zyuGMRXp)nuFj^pD6hnm|gv>~_VX@1A zs7Hohw?Kj=*iI1^B7%n?^kzyJu; zFc2>w^TTLSy3!cQN8;=Krp)FjP0!eVLU0k`rVUKL{ZGUPYB7iAS|4=C5O$~|VS6f_ zW+W>0%*d^{nc1wY3Wxt561BZaHh@f~%m->95?T}qo~!!;PdJPB`^4Vh{f{Qb^oM)9 z9sQe-W}_Ey%cPR_*~lHs#o7-5*iS5WKV11U@{$~T+;6d0FiXTVJ?aORep3}JJrZQ; z|Ep!(A!6$Itop{5Id*JeYqiUE%Lbr-S;(((%ie2>f&+AZa1- zik20VNmA;;fZ-kPW{86|fd(QtYLqlC~O%H#K@fV-2FvSUhF z2DJ9-^T7^C=DmBUwN!0VfZ`{SYy-}EObo#5m58f>p}9G5l3gxI=7FST%n=O-Az=n@ zgdu6(M9sHv-v$H(01^ho4>M}IOj3cDb_-3uva^|^NYVDOQM8l()~}T5zSgSdTEd|- z4;?gr&4~#ZBGz77(zJ(!T0x2O0%B$NZjzAWLx+J}Ic25pG_XwP9kv(BSh&k(5go+- zNIi=F$ue(FCrbSt)cY2vnxZJvp6wpLg1#h_E(CY z{-VTD&mha&76`~pvg*ha2!(k|BVeb^1M^Y0@sweg9EJO+#!+*A(9FhDb?U?VB)u*%mXBUEK2%P zkpI<~D)7l5&s24Fb<0+2oVHfNbNT09jL7G@$agtCxF1m?*$+@&MSru*{nX}G zBrjN545AR!eqFH4N~K)f#-x#Vw!y9vb5YEYHSVl>JV^V%626Tp98|UF#qlRjLmY%7G!@M`w>q_1<~>U3UTJ864Ib}ozR7022X%5AQ`kc@_g0UHGlK@%O+EqFz!}C| zpPofhXy3jZ;&-1XmH^wX3uDAD-2W9V=iWv$&w7Y@hp$r|{3b0&;69OYfVn8I8&$Fb5Ox6di zHDo`MdO}YIrc;|U<(UEDVPFJn1w@ov`qKZKI|mWczubAiZ|JYA1pNmr|6)QvJ*OE+ z<>KSxb8&FoxpODsbwaSi{z5ZQ4;enm=59UiP|GV;zR6bo2iI8xDpbbD#|MYAi#6AR z3hHvX|I{p8A)1ApU&q#wKuha zg5PPVXMsa6`TSx;6ZQQkh|n?7B4lu?{Sz;?dp~9W9`O+OI1} zAP5Ebv-S%m`+JmK-dndqgCR34NZed;D-p=quIRBxRLtd18y<#om*bwko|t34)QfZDS9dKg z)wHsl+%he-QOnWS)y?q;`*kMz49=hA($wpU_UuKebRXVCi`?7h#AjrFd+nx0PmXf5hJn9mQ5oG+5Ns9Gb{YC9xgaIN{pbzunGi!60bVU?< ztTNI_g?XH>#_w>mu%jrv4OMX^SE~YYPZUM z`K8GFtM8eA#OyGRVF9QF13;3v`2I~S2t&8-r9CLkdmk=H>}Qa7o0){}h4Zry12tKX zcgY$Kdl+#GySuy3hr+!;&6axaTN(ifwjlWnQzkg&U!Q;R#vm8aELe(?W048|aa`N9Wq2_kPyQOR8+)Fox<%2y| z`s4J^>Ew*xm6<-I&l)2G3c2G%W#LnwmE2J?sLblgs`YU~v!FuNWKVfB?8$m-8=nEl zg0y7C>usWDWw&^d!z$`H@6FXaq#P`Tu+quJfRZYw!%?z0me_!=laK)mH<3aozJyI& zsa3`4V+%?tj6x;xKJV2B0%L?QyKzS-L`4QE0~t|7O!9)_Vxi0Jaz{tUenT3ZpF3z_ zYwHba(pJo}Anwc#3ZPY26b%}$n$uqWY0uu0!uk1mXDn84{dxWl!kSOb8PD_&5RYc6^8OcBk8r zH0{FT;^NY7eyq>`SmR!moU`@Z(pgJGPgls;xW=zrUw;2q4$|RWciBxcK+OB4oj`vEB$`BCA;>3jd@KAshr(;Xkr#iQ zW^&L}>}+#LxBvnbvML+Y0V;0&ZtGeHnsE@g-_O^Tq47RSo`;(&p;xEJ;B!4((f=Vz z?h=07v68NoeoqfqR3i3)Iu~4Yrs28ZHq%;JIDi)6*2Mbh% zl$I*t2V5*-dh>w%zUw;y113WOy^Z_y^ zhiC%E1q)|HcQqRkics(~6@RU{b%BDlfkeDmZ6MXI%c}7*6 zTs}}YxT;uIw~Ohu4_m!^1Bz$vPtx(~Boi=EUmtW*qycUIa<6gkxtJ*4n(1-h6L=oI zY7eUX{CqhU=FKYYhlgu@iRI2a+SM&oy{n0@2@C0sJHsH^K?;v2bB$P&_&sbvO#LtQ zl9&xi94e?P*^JtPHS4)cq?mk7IeYK`WBgoFVh1VqknGYzI)4;HfF=$;(mong=dHx~ zf(jt~uhJ8YE6+N;uaD9UE+5km4AqPCP7?wti@V_fU2A1i|K=jTz>!v`r0|V}wfo6- zB#+N#kX`o3Fu2~d zL8?dY=d?YhmJ2bL8ez9=-}yYxB4xlH-8F{6D{~Dtub1&1d+w8R+mg@ETyy1j`Q+>2 zs6UH!dDm|z9GjXrEwT^<-Cfn1e`XE4DkFSuLsje$Id%zRnbnt~($>eI0wkXBes^J2 z6(N&u0um|j6lz@B)wVfcxsi)BF7aKt47|@UQ)K8TcwxA>^M2N0$0KR-=&ULq4MD0Z zKoKxVBsD~q!a|@Y0T(!rIcN((2vW=gQa9fhD zDq})mj~mEB-0aQ}IKpxSuBVS5svqR{DHm=%JrO?yJc4-vRb3>Fm9Qc0$^gUi0S8$2OeOA9yG?c_1bJ8Db!X6JLvKE zX`Fd$tqSlvY`bg->dCKeScNkBPZasSD(5QiaX;4qFvINWop2in_vgi zw}y#U710~C-PH)d>S&`?gxOd|7(#7^f6Jza{h}$Dx~r)qr)f+i&ajpR$0$DcS2OMR})J^-h~Rd2aa(J0NY0pnq2w2J)Lu;!9kE z{Mc2-2TmINE=a+=FepI$`Jy&XI_${s^E*kmFZp1EI{myzSPopIo%B{T8VWfA3VG1C z6a)MNel28tFdoa5D_P)BLW5tv2jh@`z%Mhn$kLJvMgF!=$|>JMy?Qs0d}r3K$WeOV z0aD(1)63&Jw(T0>k4_Sf#{eLKO5*6q$RQxx=NC7)-CSQ)P(!+i5QamRb~XStYVz3% zsX$DQjEhr`y@u3wgN#CReY-cO*N-!a5d3~nHwzpmDCN`8by9=tmyG>6#o;?ssMxaw z=SS=Bxy=$+KrwjZN|YQ5Dk>s6F_2V2Fv;WLbhf`pC!f@p_|SZ^#PG}!6a#|F_g%nG zK$(}QPSovW#ozd>kZU>>EUHVrC3XU}Ud=-7^W}!Ef#B0)*VopPD9lVvKiSOI^)Um9dHMHQir_~qi5N=F48R*){x_UeulKqmCn2-zzq zhB~aa0}v%AM?r{_yrN>U>t;al{=FH(2DhVrZZ;j~jX^58H7@Jvh3hMz;^I^ zUcA_9{c2N-o>q4}uCp~!ET#Vm)cqrq33gB!YS$s2^DIbdY3R9TQA{DEne*I14#F?- z;<=N7-+U91&!{9D&Bz-JtFVz7!qU^-?F)MeDk#G5=K}#j&Fy1Og*ciQ#zPa$7LO^@ zcuw0nQq-1FSA>kaKPTyn9kJcycD(5XK5O`}Y61YOhRitt^rVocz6nsV4yl9p288$q z+_33F1c#Bw5j)_D2BBWNe4i?&4G}21fSA7qf*< z1(qW2RgnvN_=~%0a{TJ!1D#J~YfFpY;bAA*vSh18!0F-Pw=;j^*SS;Y&WL0QwibzO z05gJ-JOKA%vu1fal1otJvOS4S&CID1fbsa@E2XCZpYFpAfV(|3tMiv+^TLdRcratb1+#_`?1`wxKk*r79=qVaOK77{UlYU1VmWN-rq#Aecg;l2QO z6cG^-p%ZgdU2Nuq66RuD+a6f}*UY{k&1BUbB@w(vPfy>(Uv%)ga@t?St{Y<;5ZiB~ zxg()O4~K_`nNsGxE_WamT=Gd305m4_+yQ;H2Eo;*1~MV9ADI#%YyHWe!PkobVhd8U z3~0{j7~mu&VKpTzf+7fBhaA7^VTE@BxqhS@zWr%J6!oQfEd~0G4dB)lNI~;d*(%_` z$sCsK<^w4Jz!nwGDuF4u^e1uZ*1X7#DXFUB0j0};R!vP!G0;mBLCy+g8{7wu>L_;u z=e~20A${-dyq0LbWX5wB?ZFy=OyttL4d50WP|f0excKPjxi+LuuYqk1oq!NJP+?>s zh&BUa8LP3pu@XbEUIYkgK6p%*!SeHJXC(TaropM2wIr*-tCqWoWdDb~?~cd1Z~He+ zoJQFrMfTo%?@jhfh(eNNZ=$U1J7FFbw5|v{anxeyPw~` zzx#Z>t}D*dd3-qfvL-n90>e(~Z(Pg~~h!0>Qf?DaBs)HnB4(T1-3e~Ijy?5rz)adDLH^eYs?_$&At z4&$W>%+hzo?o=~yC7?EfK1swCBAo6eKygLq`ium(b_=(dZ(1T_abm%iJ}>D>5Wv;u z9PGz;=TUfFo z>^!|Am#R@YNAPE2PDa`+n_Y|8Kckpyo~zTqNV;lHjGr<8hGc+n51Ya}t&}E;1)XFw zr3)h8wHZkhEA!NJ3m~5p3wRBjkEL&&n?R(hRxG@J3P5>kzyP zI4oqWf{f@S$VfE8CA`RnmQN$WWjD@hs9-R~hrnA`c5^YW(tt%Uk^;YtO}O6qSd2-E zNPG?BjL?>QfZ5VS!R`Fv#FUOEootmW8KEXeh2D(gZvE*@{&yDvR1Tl7{N9(kIsxkz z{(?dTs33XNvg(o!KI=qvuhBk?{|(=_Ihv`S@Hpoj8UqP>Ih@51iVbGayuo4^=20w< zB}Rh#KMjw8ZV_VXYP5HaQN>|NlhtH}Hl?FY<@XEI(xjXJ;CcX$zKZ(7l()N;>-Tzh z7@~GuXYNd;!_i>%kl}m~(7>Ae(oASgPr@YV^q{z4G0M^jjzoN{QjV$=Ug~1F;me_o z((%5eU`D+OOwL9v5eChm(_D4MB@3wankxz#wS7%>8KkulZPwvdr!`pG*qKFv=HbKy zeL{xo{W@eybB3+%+l6ZqeD6mDu0lgXf4yrKny|!5O_LrdQqT+1X#aRO?uwufGs=o1 zfj1Kv=WIzWd9O3#%v=jy?()YTT!m+i|MBhwW8Pz>M4D+pzK0W>83Ua_E$qN$T0@up z320kjKv_O^dJ9`)eY&>iU}b5v00I*I&!5|mkg*h@$MA&MXAgpSX4kNH2PWr5fdctdth3WYdB8Vpc_AJ zr9RNZgJC1brDG__%|+Yb3Hh2hsq7NTnlScJV?CJJ-*vk2hROBo*PWcIzTX6m1;&+` zp<&1u063hN4V*myf=~tMr9~6}1UT7(3@~s-=Np?9#J^*AhzPoH@NLAg=H_GG-Bak~ z5EOhxLN>KebbI~HV?t@ZriSv%qgAN6o=XUdCm&D{*eX?1N~~DwLgWv>c%c{{fl@Y< z>EiXUwUB4f={b`_ZSnzoQ?m7tlll^_0H;6|wJgpf;1wP<`+T+2d<2Fwy(i{v@Y=H? zOnSA_ca5?{ZlcJMuBWw#CG`nNM$)=~aCzmzrelwo&&qqQqc!_5Ej1Pifsf~=JCD7f zZ=m>EfIRbc19EVTS_KdnZhG0@Uv{y+c$a)-0MHFQ%^yi{vvZVD2X)l1X5&(toNr8! z3W?zJ-Dv>Z4?#Y<)xGh0==kYU$ClhqB*pDwZ^IepHX;H-hTB?T5EgLE-@!7jx z<{wB)eDL~vujh?zq>X)~9-H{4j>aw&Fc$h6`UMOJ^bluR<0-VM{od=N3IZ->r8|d3 za5#k%Deur{;GlGW1ika*#cSLb>zS8XfuEA zBWLo>JEV#ChUysn&eE9LT+n%{E{BO>PF5M5@bR7+4ojT0CTL?dWjcnaI|kNsd-qU2 zlN~&OO(W>qc}@o^+unRkgFZ5{5QJJT13C|$U5uJG_*=s8yPO_MP^on?anxz8GX}cx zNK&NOfHvir)S)S*W`APRX)Ow$R80(7602m3ZC|=?t5=r^@y4)_ud8EEbJ<`QNn{cu z<&z&&$JCW7KN&kuR*0Ybny{z1X-t%E3kKvcbH5P3{uC$r+qidP-kZ6tF=@Hp^76q2 zS2d(2gAEQz7fQ(-UB%wKWeK;miRTHToL?(U8T(j=WA|$Gs|X2^B3(ykzF+c#sP%&P zGzlSZ<(KF8$FPVDye3|NeI=cxn;%b7I^%iG#-`WZg1lr)!`y3WL_Pp}o!XXCW|$?C zsY>tQ?j3h@M}hHm5edFEEtOyoX5GfMY`*z-)6d76rrEp1a0wMJb@s4MtLOwUTL=xT z`)6MuQ#UWzarR%vn6@c>f726N)T2#xAgD4nm(7h2QJ%GK_Soq`>-9pbUTyuzC|s#Q zzo(gMg9;C%b~kPw9q)CXye*Jm4^0SENpnY+BGJw;*GPInn;tlPF4P%2>tGhfq_-FMCVjaTQ6Nlu|N{>il}}9Ei_4j z9ycA0&hq6``m{Jgw#v1bbn$gjL_q}IQO+)|SIt!mA6w6rKD4n8PPs##gpi);y+O0X zd=5*Xfi!8k0xgiuR#pd$dL5fbx9HxfV`L*kJc#=B3d~18Kk>VG^5fA$b|RkeeKoo@8RZc?5y*vR`b%@uzbat`wrE+NYS%}2>tsp<;vZ#)%~+=NiKSy zVcS?XnHS-wNx$T(rHWzAI`@SoTs?mq{sP$TytY$=%@O?se!I9zoi(xj8fW59acZrQ z3uAaqLCd_%=G&RGI~1cT%LP^&SX1OQaVqcekF_mv&RDG+uU73 z5xA^=YvCyvvDt9K0fm6}_K{1{4~UBJTLae`rN+PmXaIStakRE%`L~Ud#KVJvj!(@^ z;Fd;J3XarvsJmFLybJ~c{}b_3RD8K`Hk^FVIYEW38*fZy7L>m#(NQH_1@f8L$z9F#7-vccI zyrOwufFnSJ0Pt+Z{nR3ksAI~zwbU%$H%U$aW2tv%P15T7w(w*&CCV{UN)#d}y_zx> z8q{)n6~YzG>n&dbzt!hJ6oeBj@uU5%smr&eYr6eFqOF~sS%garp$EOTxS~qel&&-{ zTw-YAveohzPbxGNYq7w(+hjXP!ghDJJMy|Pb8Z!87DQwdh=-7!uH;IcUD_M24t}uQGu^a1q zsMNxJXzIQjAnTTbt#wBa%1x>*mvWd17P{`7aDK4dj2KwaeA2*3CvC`!TiYZTeY3W> ze1;}H(MF#=teZ6e6aRIZ(UaBi;pd4X7R~nBA1;}=d69>>HoX|K;P5CQ0$l15WEF6C^$w$K*Cp)|Syb?wuafgZ(%nB)B3ZM14jwD&_ABJ@?HyP(eF56TWqe?m`hF?!$>+LDmKE`!H82&A#RD8 zNHOX&nin=+#jRpYHVNUn7?tkgMz(a=I51=)Vma38#Xq z(HbwFFE4C(=^Dmpv;-+hF4e$I!cVj$55MyESFO5Rd8%*8kf>?X%@WV6yMKOnub)$e zEufy^Av#SuLs%}=3@OnV@n`}DA>-tz!+dm%5GdtRo8i*Dg%RwbH2#Fl<~DPX2NInuFLo`T4wuH1tWt}nodj9O zxL{=LvW&T2ji@i*e@(j#v~Q7JucR8?^VkT9hX49(oc}TzQyym2os_ZyRt$76 zC8>T6A(zeL6tqctgl>o~BVWA@%43pMqr(agl4Mw!X59Y~S>SMG&8G~UaC{`<&7jIY`!sEE(CVYU5y*+S80PZf~Ad-0-#l7hnVeTM}NOF~M>FaIhQmu-UBg)6|6 zr0{;?j*O!5sA?byqu2Fwot91SYOkuAMmSVLfT*vpV2O!H9)tZVBPaLFrAMm|fa@l{ z!e_LTaP1jgb);fD$rO!*djy^TgkdqJ!4N}oID|20 z1hc?1m2~DjcrFn(+RxY5yV&(@L0bCQE8>Hk*Q~a2+cOgk4apif3lbPr?QCqu;iQ37 z6;Qt^qF`r>K%g;aZ|zN5fY0R$q#!FgC8AABzKDybA4X`jGa;c$MtA{jtCF2|gvyEP&G> zxB2GrEtEB`5vg`>*^q(-i!rzhX^1EH_|D76H{TfR>Y_bctYBheo^@) z|FRefUEpz$!wp>jr%McO>=&x$6-KNQXCclEslDsnb^mb6D*klJ+^wEWhZcq03p%e| zw8HT41@FdP!iS$-I;Zp=kjxR*k`V7ZiSwDCb6G&lMaAXmqc~>_y2zd)f@mFo-i9Ba#!)24} z@+-AeU?$-n>z*ki{q!jg>rw1LCjE<->-V!|VxLgr8H#o)=}rYd5OZN*W&I3vch&vA z>Z{d&kDFl+LsgY=sXjXLFA?7iBtf1>r_b|+z?NCzuFC4m+$lYJ4d?IM3JCLQ!j4Vh zO(y?FVoT4-6Zhc8DL~RW2Uu`&zAp zjrOQTQE<1+-N6TY&+VPh=i?9OPa;I$fLlnU6<2+*d+3ssUQUssU@#P^T*jc#rKjbu zTtd>jpZy?DiaoVlEZ5o|*4*5(-OLhZ=G2Bx{ppfBv_#8OO4X&1YQ z7NU&(QuoC9rp%seJf}@3+SboB2eYAvPezkQjgAPTs>InmWCbf#>TA!4Z~(dzQZ;xRq_zAkESs45)YfOd1q@F+rx-|#7k z3-5YEHk(Ul{IVpLV1htFDILy3#Un8aW~*?k=~`e z%KwlC`sB&>x?L6TLTmSy%vl76swsn^=7Q<}E*v(#b5Io{DTKE{WgSvull|sh%z)U!-4^vM(WMjd)=bW$msZ*2OkU(O=|h zb{yiK5c-9UQ?X>nrhhr#9EO4w$BrF$6tEtbU>iqA6Wr&%cRmRJ_$f<*fNJ91HyFZ`_U=?S`D3Eg%-tabNWJ z`v7DcTrHc$Z$KE&_F2rRa-&Gp9rzGL)W2rK@Tk?QM2UR8Y=5Wa!lw)>dcqlqp7l6; zmgm%UAENi_WqU4(59lUjLC|JB9A)Q6hflt_)foVqJq4MXlF|rg8ILi$5T>#CCjyPG z?R^XLl<#-t!}8YUjYSv_5)~n2qvJ^|X|$LWKVWyZ<9u~8%Tzlis;(B)A#m zFgBuH6C!wGM7z?7$5r(NOYUbgTNnXD!3 ztiHyV79?K?BUvEd&gEuGntNF7y2tNLoyQBgM@<}x1K*;IJL>tvTe`9Us9t$gF`_k$ zjHtM^$B)rRT%Avb!$OD0--6CJZY~tOB618Qx3>^?sIRL7&QS~S&GM(Pbi%UIxGsj= zGac3EljTSBVtYm=+-nYj;IV1bYFV?$gmTV%-!JK(UQC(ZNR5ho1SBD?Rf~$|mh9ds zTT#=vO9XybdVBvchHn&I_K+N*q^#!Ty*U6~Syn+E*Z%jXOfrZa6H#xoJ6;)i%Vru#&xEQg6B;fB1As|NwN^45M9+tF-j?Wy_E24?dbkz)r6ebtnAFkezBi{!OiZB> zk(kvP$810m8gj)V3W;>z23vpofuhq-*A^&W`xQ?{$e6c!-fZ{XJ=oibla?>6xtbS9 zR4VcHeY4wVQ~QOckZ|b(`nv-@c&`gJY_ZtS;!d;XHegBu7cYXCF&j#~IBMCJ2?28p zmz2fa)8Hv-3?0ua>fp&%z$o{bDHAAY(B5ImqR9_&oK_Bdm0sg8}7YCm4(RIwnv zsDGE9oM%xg^@|)p^w?$vN4ZXDUCe0TQ6)FwE?RF3MsN@jb<$l@iiK!y=(X;CE<3w4 z3(t#|mb5&81+YN4cU`?0Y7S-#g#}7eeiAHIuOwWlq(@9F6z1BF_2xJGX00<`FTb_C}{rFbSrx2I7z>ZwW)|mSi z*4w`Q^=rYmBZkd;Z#p;8y)XmRwJ!1-ZqR;xZugZ2KR=xCd;-(M+YXTr{bCOa>_%Hk zSt!WdzTQL2e6>GG9`jHKi}{5J(fidVqL5o{=l2xO%>Xb&W`7RTW*jO{lo^y{iZ37p z&SA#n(SPeAVW2qd^~P|)sTPwF8H~-u6@B4l{sL3SO0}^o^hz*2JzWt_ z@iV)QNg@WV7`_8Imbgu8tX`yYqvF$7av|k57R-D^Y$z`G#io+-%AA{*EcN_h6zl7j zsRto;)|ALg}w=M@fSaya@O0P+jeYG$+b@R1`MwHY zA7k3-0b}J=^<`PD7pz(=p~Vi32K%I5iN{YZ+%-a?D2GoBHILKMF?;s&K6Tm6Z+v`K z&7}8=HSVnG(ERDG;JjEfc~B@yK3gO7=UX)~Z!CHS>JMvqr|mQa^Jl%qv!d2-|Jl)#r-}iucfAqME4qKSQRxD#zHsqJ$g36jGAV? zNiugweM_`PD{z+C)jj_5#%I32fr*F-U808Wx+EKCP}oGU5PiFFx}I2$Z|RTSH3f&8 z<>V-@n1fn4J~wL8KXnvCds(4E=(ue`SLrz&onmFCp1qlB-G zi=Jam#+35R|y|hEdoez+-;8GK3gb2UZQ@mX7o^l3(#{y;IcqJCd zxJyS6G|dLL*QfjQlz5-s$F+|jcIUz{xoj+6s67{Pa<24(am|P z?t(`FY!4JrGa;aYmfwUN@k(GqQ&uoXD0(Y6nwPVrLc>k7%RetJEWcAGnP4yFKCkFK z{)n*1AOb#7=tIhpfjI4?5P?ZXv{${9^Ik}uu9X@wlFfVYxsDb2(B#us?Z-?6KPYTH zgs(CxUw`JWCPxM{rpMIS{|r7GVv61+x`ca#oG0d_cVk3vMvM_9u$y4yXz=pzKsrj^ z=9;JHBOPV?2WzOZXRFhrd5Sme?u`K(>08$w(FPLc7Vj3FY$l&d)uUc2RqB*dbh0~$9f;@@cb=LU$!#%>i;j*4 zuyKK#i2VqiR>}N9=8dY$9sU<8s~`OVI4MJ5%8m1}6w#qeqd){G$TW{di;&|qpP--{ z0L1KP7Vc^Pe#r5Lb?*exIIvel?WRK-!`llyImufc&6RCOwbN=>Ios0Gba=VAP+k+W ztckJlA`ahynrx|n4oSSC+>nJj-2@;oP+?<2nSGSH0?NMAtKuO@qutPugp)PoI=6X~ zGsFZzkKKIVaSRwRPId;{%cQo}*o;(t*oS-PZ6>aoS6j4-d}EcklTVU8E)aNvDsjoNzOu%_b90hChjQa{;1SV9TthtC(Oc))bLcfA2B5qeri2o5~$p{K% zt7{hGskdyR5EC0bRcmUbPUnvha<=j&I3pYAKb$Q+EV&^?h4v2rB?0NxRDS0e!k%kc zME1$fS1P?WW@Q3e=)2!Rtd(X;M^>7#c(q(C#bkk6{#!nsvm%!sQwAY!sUp}bo`-9X zud+BoESskk;OM9B`9bBO*5@JWZA7Il$b%G zB;FEn+J?aa%%J@TP{bI6=xO*xz%RanXbU*gmt0}sNTq84k$5R*XJrE4Lg;01vk3F) z+-{qcY1|Tx?n0^QN(VHgjLSp4?vRqjF22^VnuLpiz4s!>)0-25R1k%O!R$1>=o=Gt zwweS#4C}5;(8U5xG2`^0vV{pZA8%2PTV6p56WaM@WvOCj^oO{=f+<%%Zh{f-59JCW z!~dA~+W~|shIlv*E)hy?2Axrnbr@$86Gyz#TxwsrT5;bxdS603prBv@?Zpk8316R(8Hj5&G>IO(=k+UdKm@*H`dmw~n%i89fE1Zzk4I zfsy>S1^frxU-ISvmC4P{{$y;iyIUc+*1=cG9hsOI>~lv>;4I|i#S!BkR<7PkWro;` z%KbCrDF_8PKDa_u8q4%2l+A=4qNN*)r|oD_yE+I_NSuqA6!y;J3C81shfQiXRMQF}kA zP?DdcXuCQQNgS6EA4CLaqXJ(T6KSmlX-Xb)wOHxdmkLMS0g4Oo!DxeDqZ$5cW~q*` znkch`<>i38`rV~4hf%3d4Z?ik6Qyv+!nQ#7L5&L&D2?0^)2p_`HhxKMN-28wrJMS4 z;e%@#3|Q?Q&V_^Vh*cQDdPR1Ey%3L$d){xvT*ta&${VLBweOxultY6N!wBN)L{2EQ z2QZLev96Iy*lA)&pdien4zaezg5z&$snto9dex&s`)1YTP!@(+)^OFPzCt@@c2wk@ zq(?Vyy;H>)rGw-Et|ZR66>eJ|jmV@}rM0|7rk}=!5jUe*6`f*x(V#NcYN7I*@b3Tz z5n}<25miPpoMgM&VrpzIJREAQ53dX7PBRg)V6;foKta$ti&jsMmGNar;($u%wjsIf zK}^2#z$RXI!94lA_Kb-epBrojUPObt6(qur0gQV4eZbz$*(RXC#sYBnWcvkgyRMa4 z2_lhzL?b7PX3Yw!}u$ku)a_!_a0jY~cP=nmkj(*3kXgZdy-pHO_hvGFl-z=44FL{eTXmZ=R5tqqfjl*4INL(Tzvh{hhSa@LV|# zQPC+!W55zZcGbYpknHWX_%2}`aW$&UW;XUww%|8OScySZh#fa8D(>FqD2UfmxSFS> zrmi@kpZcT?e;3=#c5jr8+88fV3xfo?^tp>k?n(kV?lgfZr3@u`4UN915N2xlQy4@; zhTI&?7tf!o>FB%%&!Pyo3P3HHBMm-2K8g#LKw+vxS3ri?#q;6DB-31=XUf3Ewb%GV z-HLcCkMW(M@-L&+R9cc+5G&TniS?(u4W}dVD;HF`?rWFK&F?2A-F;oRy6PDBR9Rd+ zY;=Tpc_a_B#lT1$(gpz#L7521iodi?V5BGwVSI3zlhX={cLkzTmX|xfyefH<$Q)&u zl(Zcm7LWTb8d9q!;0tLth2OP*l$cxC4Bc(!zZ-mHQH)&jCnzMU;P|URBN;Eqjw- zZFzpV%2}zvTcN9$DVGnOJDg~3Gx=3cBV)$!9`a0qBFleSK+L>9EugFS*gZ(Htp`Ya zP{6Tx)J8wRazR`tX&)eI!fD$?lbS!XVoR-|2~Z>)u)}0a2l@@>s2=?MU_gFQA7EUD z46q3kg{7V++E1)WL-H4F54VJhp%lw+ZWP&h$N4_2AlnoYP@%nb(Ha=XsM(g6ifl8h*iq|?URp&)E|Cv= zg+=`F;>yPXQWFFZ>o&n5Y9yA_aW!{5?a2GC_U?HZm&a)0{{E%J&_#P>4Gsmh!H?U@ zR2!5E1^Ud8z~ewQ(Ud*^%tm;MGn6~`q^uz6){sk4qr*Uba^vGt?;-B2B45-1%ZXp9&(WF@Gpc&1%iRKvfnEY%gN_N2U%$@Hgk3<*G?xL-x)$PK(-purUV{nNEC~{v zBU9(f0FHZyy8DP!6fTMl<(0T#7E#0<>K3B1j4<9PhwfH|>v_CCpo&sNx}(8s@7fPd zIuDEh+YMVSD&spK?)E@$q@p4aG*Hb0;xYg<^95oAm;>E+4`x_D$&k%t#p40)sfiiz za)(gG1^~w20gZDEmGRO6Rfei60{pLjL!xyDsM!n%>B~U(10F`s?H}JEMbInI_h7Hk<*KQU*|;Hg4&97vOKVpmOCbRF^Kk0?-Q3gQux1Ea6zcF$zIwLJ^E%Z6LC5&rT9a;9V*iMXJpM-d`mR7XfcVq# z?H`|SS4y%1c_xY^5VUg%at_jT~ zqQ0v}=YTMDM>#ofAZiCxVY5bui8eqw4rR{26(V4H`Of>pDXkQ1dl$e{popv+P!F^8 zb@TzORTO}32D23kQV)EvG;NFSs;z>A-xGUmq9bB@}RWurpf?mpD5Hod6%HB~a z89@C?gbh2&x`x5yCtw!|7D80+Xqf;?Tu3l%h(8vW8r>b(*9`XVm!KSY+EF1V!>#CZ!0%5^leMD~rB?ar zD?VdAr%5KC<^wIX*MiOH@gV?3XCUT{A#I?na?#IR$v2Pz1A=4|6>!LLSN<6A?e(>1 zbr;E@;MY&Qozl0#yQs#xGea+0J`pvJcAn(!?6}m5*R*@<=$z2VJi$#ZkJA2*nvyOZ z7@)v0@?@{TAFJR&o#|8Sv5!FkD&>6yQC2$&zfF^lohtdIvOX0;(Dt!VDz)tcb&frq zs4}QEI&@i3f8W(j?B7udq9TZ@r$ZHN?O-6NQCR3{Z!*AF2O;vIFI)qYCF?&&h#y8M zCPsA>wWjC>U@-C>16F11w|kfDC31o zRIn(`M}WEvK(>>9)|?Po{B-Ilz|OhS@hkOtY^@lSkp-(q zjWwP^R*o`V2zA9wiB^VwqnMUg!2WzVaKU__vi!Ti!08g9BLg-Z>aLpnXyKg(EQ0nULlu;ZMpe820VF4Tc<9X%JHlj_M+ z6Ke=s?#V+oBKJ-17m5kJWmJhqU7*t9wLyE|(AnxqX9omNZ%(uk`RqqsKFYV7#kX8* zc%^;#iT5_HIB{LDQwH-Un6;bSvzLs>`E)4GDWxi*_9BxNmA&^}cQ#jFtNv5Rudn4m z&9v#ZlEOkndj%@f09p$ES-NileS%6aq2i~$L6t)kRsjsO@bHS{&tUixZ;b*@dZzJ4 zz2&dL+1HqZ$#@oYCi4prDh|zb{TDeTYP`P#ga2)nh2#w+vGvYU@RKBK1c{R)XrqW- zIj$>KZKg!1k&O>xK#-kUD4o0dVKUg-comAigE9MzkF$-kj~sNCjIu6ZvwvzFI^5EC znD~iUFMbiiMk0c+rTJp)^$mZ0+y$;CR#+z1uSP{M5h>t=HN{;%UNJG1Hik41(C9vc zQ!VYen*LO)x;+TU=T}T?Xw>1O$%mI%$F@5@?ZUEWv}piVj;WmtfZ*d~e~5LkafQec zoYr~;x{F5AbDt&jc{D7^=>91nboUGx@^TXmKx#w{v5w&f_Ha~GRHFf66m`)9-|cZg zz*H&$Y+c!aT~it0wXY= zF}wvZisBO>H{sTL7=M$uC$vxJF8$1Dko9|Yy`4PpwgPue()mn-RmtGb@Q!WkEQbkD*Hyk_|UmyA%%R4TW)bV40#-1K_vHPNqO zpnEPnkeFywnRrIh4rgY0U+}t9asXF5RI=ej^6^kZ6A=5Pj!_%J-^it%iLJWPh&6@x%(^v z^o`;+M98joGMuX@d_b|)*~4@+YSK4ye^;}AV88)Nb1!s~83=#fi?u|r8uiD@RenxG zWp^>#djWUH=0XSc8dmh%M|^V^uYbPM{)oHJU>b^)^B=eXawKC@uUU8usn}#HAIILl z@2M)u&1ba_b;k3E&4JGg#lOR3jv>j^Ao%mj5GWcAw0PMb#Q<&X(<>7|Q>%VJA;L-W zav>slpZDW~VGW=A%3r#d zPLecCSMr{fOG zyzINoJYEFA+AGFXUonc$E@~2EEIC@Z4GDWIJm9~o63=jbxeH2N%37H>t>8a79f8lc zMZb;5DC>Q6^5*>yJ9H?P!Jh3@DLZt-pNh#P%a$Gu_eWD=gF|GdZUOl9SNe$SN2q|K z7PPsPw46_jEyoue&Mzv)*JfO0E)Kj}DZPb@ri|P{G)Er@ZMuYCx{bU%Cw8AkXV0GD zA4S1>3jY?^vcEDDb{t4TPA896mP< zl}J?YxNkS*6``)vU?TE%p4;LZmfkb>FOrucTzJnXaV-!z3r1y_cvsO3ehdrkeg5@Z zcws7}Q^DB=bHT_qN|p-9V{=Epz4V$cmC$2m4B4qOsL5GDY&$!RsC0f)`ip~!2@_Z*4Yu-hn&Bds!sdH7D>=omcWuLxnr2lGx& zGlcIT=jbCVsu~0q1-A$%L6#L(I=M=mE|Lp}_)^IEX6t;c`X)d)*Uf#H7OAIG(o?|a z3Y8C$?q0HH+3i`n(Ep);Xm<=F*m_A!W@kh(ov<&0jW!;xFKHCS@sz#^)J9-p)sUdg z`yk7nN!{TwgE}Dy9R;eRUgX)A-+F}4>2K#?1}d2z1*ys*+~??ZLir9t%;+WwG-o&O zM_z8&^;3QJLg)hQUB2iaP`W%WhgPxZNsh5*O!?iX>SEZW@9;m6IA0GGTFBq-Rj`xd zWrTaqo^x?ry8M+t_*v)N-G?OLLWIb3i`j9Vm)FgJVBg?58eh(6+Se;ZSTo0PcMaMg z|MX_u?IT9oD4xjNUBvB%WxykIF~K>N-$X&BR=rcub?<92d3n+~)g&t_OvMY{b6pB{ z1oU{7gX?t~rC8al3r2#~{O_D7h=V9*2dAFB6s`x349bR$3rcmf`Y~_y5_QtcVYEQl z@6}WNuPg=NneASAI^b`D7?@-8qNJ1<#{Az83+sr!cY*|Qz&A1 zBIl4rP@v!hG$khwkMY+^XGyb7S`uU!+UKr2^Mm?oT@>i|v&_N&gIE&87+-62WvS71 zBc?Hzh6b6a zh(**eu-oTwdT-rV4E>(^jHUKVhHxxKp(~vgx5WCB6lTWj^_oQZPiy{NJx zHPd#+Jl%HS=E$JtZanoxBQjNrj#ETulNzMMN4_z~J8kX9t%W^dYZKeOKU#dA)pX$E zU@ieQDUQZezZqwyv{k-c*85(ZKX2Ona|r$~mpcFK@9KXpb^gz#&i}d8`9GIB|L0QY z|6J<)zrED?&!fgnz?}UGoV4GeRuu6cr>*C8;F!AJa(3hKzUdgP|H7%8kEqRLMb@-K zkp#i6#AIIntvipT!Ry6qgl6o`*Vu`H0hBoQL=Ss!UKW%*G_9IZNyIc|Z5X&N5X+AK z)Pvpd!K>l5H}=~1zG|gtIbeyeb&VQr9W@BOmhal?Az{+xxhnqF=T;q?t3S>ux2reh ztgFTzS<_lvQZ2GoWpc6+y7xiIAzgPT`f=QxhY{}S>dcL^x*e>wbQ!8!POQ^KH}lLDKixa*Ga4FG*DA1fs<@!WU3{Hde^fdyDL>?L z+AW*ce!c8=j}G2O3Y55vbm8#5WFpf{ZBCtgK;g~8rMWPIUGO$Qs3l*k_+e=%!%c0~ zes>M^@IISI={aApV^*YBE=k_jHvhEi|9Bzg+uWm(gC|la8rr@$72EHQyub1xcSm3- zMY3|Y$|EN04OQRP6Qt@UJ}p^JnipmLwdKx7`!MYPxMlm#2_f~O5GI4k*h2g{ACu?#Yoh=4%?Z)F*J4`(LnYu(hHE0N8g6SFc2u(7UkqdeIj=rxl z%$62k(LG8OeZU<`E;?RA^>6$oTa6c$dc^ACXkHyV~}LrOL-W0pBj$zL=eN zc$q$WVznS@(I$gCb-QfnJnY#2zJx9$670&u^4YwOh6cB<|Fodpt*tc6@AOqhq^5Y`3i^r zYx!>auDV-aT4=J~U74WA%$B3ygFE_)>dBP3U{BozN}u9(l|cc7Sv?wK60w4Rl!l6tH$Kcg^4pu1oEGyXe%6w@^W~eB`N!Q>w#mfJ(NV!&FFjAFFCO@zr0AP` zQLK|YZ##dRg!cY+Zo!LpC;i2gscS0KPdy&LYz$wlVDJ<)dQ;wB@T{!-kSJ0J*EZ4f zv-6^cXZ78Rdh5fwa!jMO1DCgXp6M;@QMb2ia^e?{SBT^@W;g6kf1*;bSp6LHk*KS< z^b13lX!2*O#5RrYrN=%!;<)+m{X0Xv-<_{pi#OUzYK`i^WF%S;?0C}~I3u?BW#v9& zR@nW*R*Yvm>vs-s2YN|%(ngx%v#-tPNwvl$ElPZUdTlN>&hE|YG?#lfuiXv0*(GB2 zMqzp~)oygOb;hZnt*`_TrQP`Zk+!cLB2hy-cwU5S~DWsT#5`iej#IBUHY!NaY2`K3D6&_l&N;x zrzw}Jj@eVfdq1BVB76pG7dg%tgePaqQX1roNWm*rMN3)cObK2|AnInhe-Lqy$Y6&f zvmo*$Vn)iUN?A$)_`Oc@{D@gyxz>JW;0Q3m%hKoR9j>DNr0gf%THl{2 z!OlmWhO2qRINu!yTtRRa^YbYuI$HE_yu!AFT!Z7_*MF z&TaIB_D1l=6QVf zU2_HL=*bYP-cn|w$6AzU!j!qMAk+HMGV$bIV#J*LSgB`pcjM6uflNFVnfHnY=ryOH zb7w7j6;#pP$9JJlVMnSA8O>2#levQah^U@V^?ntzfL0Pd z{jnV07hmN9b*7isbA{E}lH|VlX^gmq2+%aJ;9gU(zd$woSfS!=_1LlYu3!IkxnpPh z3ABCmBeX!Y&KuY7HhX^LDxJrU|5_LCwyL-P#DOGe0ZON)jhsc~QlUrUGmQ3!KQ;_!;Pb&h6ZgcDE&-04+>Md7Wlodgjk3eV3MT0;2|NG^ba9Oj5kL>At ztWL##zqI0{yu2YfTRKNw=lp$&u~wexBl>>etYl3-l$gCL*6HWE-u(0^cgBYBrDlPq z63ZD4IeY`nx5-O4+toJo7BHuTZ^eX>&^##+Z*6A}De4w0yi=hJQHTc z-e>73?;?9IBzB=s?5U!E3-%?R4sT4c;kq-%MBYP~&)TgWR8`&UDxcb4_o|{Ha0t%B z!jkaTVnLkLq2J#&!NLASSY{(;))bljG(pB&{Eo%cg_h-@z9Yo#%GBzf^0M&&6AU6w z2l=}M+%*I4a~zBwY8*H+EQ|E17IO(pt&Yuv6{d$O3di0yyiU9xb+g~;vM^4j4vBI6 zUgyMo!sM5UGQcr>u+J1$B3qGNw72}~t8cJrc0#z&3j>A8kFna&4KE9Oq9KAA0WjJ5H`aJPS z8L|F`-KfaL*$WH{Qd4(03#=(_E zWpMhAAHf=r$VCLw70Y)(4FF%E?P`PK@-d#~ft#63=OlUlrod zu27=rIuMe<*?C;sUHfEsKB7;!n9#P&WTffLYAlfH`>_k{fm^QX15=FvR$9H7n6S}UVN4rOH8D23wQK#-v>h0YIF+?dbynP z^p1#S2Vs*sf>7+S3j^eXqa>*=pU{flAS^JdWBrDTloy}q5XSpny_a;d+4YRr=hJBp z#%rU&m--vy9L#7Cl^UP9FHx=0yp|vma!Nw?Y_{N1T~(!{+fO#V!R(DchrRaX)XWo^ zM78R!wk##y3DVIFmcNXB5s3ur;>NaVn?CYrCR$q~Iy2P@W;Aii2V4LXV z>LfqsHtM91m8-ckTOKJp-25)sUhF*ngy5FGY?dS6`tBA!uR>DAWQWi2^lexezf4DT z3~s0}{EzUf;+a6ve$dl{u-8z=<6mEw6mL1X+d8>lHuQ0}b+bTiFI~;Q3ix_sG;|BT zr3YIQ|1aOh0Gsqz@b+i0_Mg94C`jDmj){gAaqd^e<3GR41W{5{7#QUT!NaZHuitX| zyGN5&tIF#`+XLW1bbo#wej0$n$|wnLj@NB${}Dp|`HKKb(BHNvY0mKS5;q!}mJcQx z`Cp`&5&T8kPdPt>vj5zrmv969dg8#KR;lC3XaqYIfEws)C?0~FIRP24|J&2KyIH-+=JO6-5$%MO{p zN&ZL2f0O*{t^Zl+{Lhk|v;G~)Keyojvdy0N{~gIccjNz3vj2mBNAk~&`M;EWC;Q)# z{Bw`~FC|Ch{5z8WXA9tep267Me@F7qrTo8ab8_ClBl)LJ_`gd2>nWO<|L;itr5^rP zwDVi5SP|IMIba(8bx-FN{1W|_JpFIK{H==k=PdW1T8yIPmpFHO+nct$KmY&RISgmn z&lf$K7V>ygiWdzH^&jevj6Yw&4}VGc?Zym#)QuUxwDO-4{`r=TKPCL1y}yHD0{G4i z^m2U*+HwVi2`}Us!38H$IKWyhNJ6;F58UDN8rUENwPumR2WtqUPwpVx7N!m{0cENO z-D~Jm5eV(iH6YqiCMM9eqYpzPv}5O@Y-70{~m_5AXm0 literal 0 HcmV?d00001 diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt new file mode 100644 index 00000000..c57da38b --- /dev/null +++ b/doc/cheatsheet/README.txt @@ -0,0 +1,8 @@ +The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013. +To create the PDF version, within Powerpoint, simply do a "Save As" +and pick "PDF" as the format. + +This cheat sheet was inspired by the RStudio Data Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2]. + +[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf +[2]: https://www.princetonoptimization.com/ diff --git a/doc/data/air_quality_long.csv b/doc/data/air_quality_long.csv new file mode 100644 index 00000000..6225d65d --- /dev/null +++ b/doc/data/air_quality_long.csv @@ -0,0 +1,5273 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,pm25,9.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,pm25,28.5,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,pm25,34.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-24 02:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-24 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-23 02:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-23 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-22 02:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,pm25,32.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,pm25,27.5,µg/m³ +Antwerpen,BE,2019-04-20 02:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-20 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-18 01:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,pm25,21.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,pm25,38.0,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,pm25,45.0,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,pm25,44.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,pm25,46.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,pm25,68.0,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,pm25,83.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,pm25,99.0,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,pm25,91.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,pm25,76.0,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,pm25,47.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,pm25,49.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,pm25,48.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,pm25,46.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-07 00:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-05-06 23:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-06 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-06 21:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-06 20:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-06 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-06 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-06 17:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-06 16:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-05-06 15:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-06 14:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-06 13:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-06 12:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-06 11:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-05-06 10:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-05-06 09:00:00+00:00,FR04014,no2,44.2,µg/m³ +Paris,FR,2019-05-06 08:00:00+00:00,FR04014,no2,52.5,µg/m³ +Paris,FR,2019-05-06 07:00:00+00:00,FR04014,no2,68.9,µg/m³ +Paris,FR,2019-05-06 06:00:00+00:00,FR04014,no2,62.4,µg/m³ +Paris,FR,2019-05-06 05:00:00+00:00,FR04014,no2,56.7,µg/m³ +Paris,FR,2019-05-06 04:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-06 03:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-06 02:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-06 01:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-06 00:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-05 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 22:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-05 21:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-05 20:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-05 19:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-05 18:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-05 17:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-05 16:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-05 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-05 14:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-05 13:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-05 12:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-05-05 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 09:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-05-05 08:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-05 07:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-05 06:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 05:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-05 04:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-05 03:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-05 02:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-05 01:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-05 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-04 23:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 22:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-04 21:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-04 20:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-04 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-04 18:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-04 17:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-04 16:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-04 15:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-04 14:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-04 13:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-04 12:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-04 11:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-04 10:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 09:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-04 08:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-04 07:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-04 06:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-04 05:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-04 04:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-05-04 03:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-04 02:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-04 01:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-04 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-03 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-05-03 22:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-05-03 21:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-05-03 20:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-03 19:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-03 18:00:00+00:00,FR04014,no2,59.6,µg/m³ +Paris,FR,2019-05-03 17:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-03 16:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-03 15:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-03 14:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-03 13:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-03 12:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-03 11:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-05-03 10:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-03 09:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-05-03 08:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-05-03 07:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-03 06:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-03 05:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-03 04:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-03 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-03 02:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-03 01:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-03 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-02 23:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-02 22:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-02 21:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-02 20:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-02 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-02 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-02 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-02 16:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-02 15:00:00+00:00,FR04014,no2,41.4,µg/m³ +Paris,FR,2019-05-02 14:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-05-02 13:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-02 12:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-02 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-02 10:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-02 09:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-02 08:00:00+00:00,FR04014,no2,55.5,µg/m³ +Paris,FR,2019-05-02 07:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-05-02 06:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-05-02 05:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-02 04:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-02 03:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-02 02:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-02 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-02 00:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-01 23:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-01 22:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-01 21:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-01 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-01 18:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-05-01 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-01 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-01 15:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 14:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-01 13:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-01 12:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-01 11:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-01 10:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-05-01 09:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 08:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 07:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-01 06:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-01 05:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-05-01 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-05-01 03:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-05-01 02:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-01 01:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-01 00:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-30 23:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-04-30 22:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-04-30 21:00:00+00:00,FR04014,no2,42.8,µg/m³ +Paris,FR,2019-04-30 20:00:00+00:00,FR04014,no2,39.6,µg/m³ +Paris,FR,2019-04-30 19:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-30 18:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-30 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-30 16:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 14:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-04-30 13:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-30 12:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-30 11:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-30 10:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-30 09:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-30 08:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-30 07:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-04-30 06:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-04-30 05:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-30 04:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-04-30 03:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-30 02:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-04-30 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-30 00:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-29 23:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-04-29 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-29 21:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-04-29 20:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-29 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-29 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-29 17:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-29 16:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-29 15:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-04-29 14:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-29 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-29 12:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-29 11:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-04-29 10:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-29 09:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-04-29 08:00:00+00:00,FR04014,no2,39.1,µg/m³ +Paris,FR,2019-04-29 07:00:00+00:00,FR04014,no2,45.4,µg/m³ +Paris,FR,2019-04-29 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-29 05:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-29 04:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-29 03:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-29 02:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-04-29 01:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-04-29 00:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-28 23:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-04-28 22:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-04-28 21:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-28 20:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-28 19:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-28 18:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-28 17:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-04-28 16:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-04-28 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-04-28 14:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-04-28 13:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-28 12:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-28 11:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 10:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-04-28 09:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-04-28 08:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-28 06:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-28 05:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-28 04:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-04-28 03:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 02:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 01:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-28 00:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-27 23:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-04-27 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 21:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-27 20:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-27 19:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-27 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-27 17:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-04-27 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-27 15:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-04-27 14:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-27 13:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-27 12:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-04-27 11:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-27 10:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-04-27 09:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-27 08:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-27 07:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 06:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-27 05:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-04-27 04:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-04-27 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-27 02:00:00+00:00,FR04014,no2,8.6,µg/m³ +Paris,FR,2019-04-27 01:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-27 00:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-04-26 23:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-26 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 21:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-04-26 20:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-26 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-26 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-26 17:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-26 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 15:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 14:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 13:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 12:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-26 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-26 10:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-04-26 09:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-26 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-26 07:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-26 06:00:00+00:00,FR04014,no2,61.8,µg/m³ +Paris,FR,2019-04-26 05:00:00+00:00,FR04014,no2,70.9,µg/m³ +Paris,FR,2019-04-26 04:00:00+00:00,FR04014,no2,58.3,µg/m³ +Paris,FR,2019-04-26 03:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-26 02:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-04-26 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 00:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-04-25 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-25 22:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-04-25 21:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-25 20:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-25 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-25 18:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-04-25 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-25 16:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-25 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-25 14:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-25 13:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-25 12:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-04-25 11:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-25 10:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-25 09:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-25 08:00:00+00:00,FR04014,no2,37.6,µg/m³ +Paris,FR,2019-04-25 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-25 06:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-25 05:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-25 04:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-25 03:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-04-25 02:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-25 01:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-25 00:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-04-24 23:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-24 22:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-04-24 21:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-04-24 20:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-04-24 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-24 18:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-04-24 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-24 16:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-24 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-04-24 14:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-24 13:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-24 12:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 11:00:00+00:00,FR04014,no2,22.4,µg/m³ +Paris,FR,2019-04-24 10:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-24 09:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-24 08:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-04-24 07:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-24 06:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-24 05:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-24 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-24 03:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-24 02:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-24 01:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 00:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-23 23:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-23 21:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-23 20:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-23 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-23 18:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-23 17:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-23 16:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-23 15:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-23 14:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 13:00:00+00:00,FR04014,no2,53.2,µg/m³ +Paris,FR,2019-04-23 12:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-04-23 11:00:00+00:00,FR04014,no2,51.8,µg/m³ +Paris,FR,2019-04-23 10:00:00+00:00,FR04014,no2,47.9,µg/m³ +Paris,FR,2019-04-23 09:00:00+00:00,FR04014,no2,51.9,µg/m³ +Paris,FR,2019-04-23 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-04-23 07:00:00+00:00,FR04014,no2,86.0,µg/m³ +Paris,FR,2019-04-23 06:00:00+00:00,FR04014,no2,74.7,µg/m³ +Paris,FR,2019-04-23 05:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-23 04:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-04-23 03:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-23 02:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-23 01:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-23 00:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-22 23:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-22 22:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-22 21:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-22 20:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-22 19:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-22 18:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-22 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-04-22 16:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-22 15:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-22 14:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-04-22 13:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-22 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-22 11:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-22 10:00:00+00:00,FR04014,no2,43.5,µg/m³ +Paris,FR,2019-04-22 09:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-04-22 08:00:00+00:00,FR04014,no2,63.7,µg/m³ +Paris,FR,2019-04-22 07:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-04-22 06:00:00+00:00,FR04014,no2,65.7,µg/m³ +Paris,FR,2019-04-22 05:00:00+00:00,FR04014,no2,69.8,µg/m³ +Paris,FR,2019-04-22 04:00:00+00:00,FR04014,no2,80.2,µg/m³ +Paris,FR,2019-04-22 03:00:00+00:00,FR04014,no2,87.9,µg/m³ +Paris,FR,2019-04-22 02:00:00+00:00,FR04014,no2,88.7,µg/m³ +Paris,FR,2019-04-22 01:00:00+00:00,FR04014,no2,99.0,µg/m³ +Paris,FR,2019-04-22 00:00:00+00:00,FR04014,no2,116.4,µg/m³ +Paris,FR,2019-04-21 23:00:00+00:00,FR04014,no2,105.2,µg/m³ +Paris,FR,2019-04-21 22:00:00+00:00,FR04014,no2,117.2,µg/m³ +Paris,FR,2019-04-21 21:00:00+00:00,FR04014,no2,101.1,µg/m³ +Paris,FR,2019-04-21 20:00:00+00:00,FR04014,no2,75.6,µg/m³ +Paris,FR,2019-04-21 19:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-21 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-21 17:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 16:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-21 15:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-04-21 14:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-21 13:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-21 12:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-21 11:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-21 10:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 09:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-21 08:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-21 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-21 06:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-04-21 05:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-04-21 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-21 03:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-21 02:00:00+00:00,FR04014,no2,28.7,µg/m³ +Paris,FR,2019-04-21 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-21 00:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-04-20 23:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-20 22:00:00+00:00,FR04014,no2,52.8,µg/m³ +Paris,FR,2019-04-20 21:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-20 20:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-04-20 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-04-20 18:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-20 17:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-20 16:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 15:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-04-20 14:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-20 13:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-20 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-20 11:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-04-20 10:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-20 09:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-20 08:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-04-20 07:00:00+00:00,FR04014,no2,64.5,µg/m³ +Paris,FR,2019-04-20 06:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-20 05:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-04-20 04:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-04-20 03:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-20 02:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 01:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-20 00:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-19 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-04-19 22:00:00+00:00,FR04014,no2,90.4,µg/m³ +Paris,FR,2019-04-19 21:00:00+00:00,FR04014,no2,96.9,µg/m³ +Paris,FR,2019-04-19 20:00:00+00:00,FR04014,no2,78.4,µg/m³ +Paris,FR,2019-04-19 19:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-19 18:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-19 17:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-19 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-19 14:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 13:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-19 12:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-19 11:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-19 10:00:00+00:00,FR04014,no2,51.3,µg/m³ +Paris,FR,2019-04-19 09:00:00+00:00,FR04014,no2,56.3,µg/m³ +Paris,FR,2019-04-19 08:00:00+00:00,FR04014,no2,61.4,µg/m³ +Paris,FR,2019-04-19 07:00:00+00:00,FR04014,no2,86.5,µg/m³ +Paris,FR,2019-04-19 06:00:00+00:00,FR04014,no2,89.3,µg/m³ +Paris,FR,2019-04-19 05:00:00+00:00,FR04014,no2,58.1,µg/m³ +Paris,FR,2019-04-19 04:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-19 03:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-19 02:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-19 01:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-19 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-18 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-18 22:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-18 21:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-04-18 20:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 19:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-18 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-18 17:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-04-18 16:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-18 15:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-04-18 14:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-18 13:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-04-18 12:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-18 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-18 10:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-18 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-18 08:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-04-18 07:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 06:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-18 05:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-18 04:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-18 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-04-18 02:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-18 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-04-18 00:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-17 23:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-17 22:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-04-17 21:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-17 20:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-17 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-17 18:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-17 17:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-17 16:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-04-17 15:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-17 14:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-04-17 13:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-17 12:00:00+00:00,FR04014,no2,15.8,µg/m³ +Paris,FR,2019-04-17 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-17 10:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-04-17 09:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-04-17 08:00:00+00:00,FR04014,no2,72.7,µg/m³ +Paris,FR,2019-04-17 07:00:00+00:00,FR04014,no2,70.4,µg/m³ +Paris,FR,2019-04-17 06:00:00+00:00,FR04014,no2,72.9,µg/m³ +Paris,FR,2019-04-17 05:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-17 04:00:00+00:00,FR04014,no2,65.5,µg/m³ +Paris,FR,2019-04-17 03:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-04-17 02:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-17 01:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-17 00:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-16 23:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-16 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-04-16 21:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-16 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-04-16 19:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-16 18:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-04-16 17:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-16 16:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-16 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 13:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-16 12:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-04-16 11:00:00+00:00,FR04014,no2,38.8,µg/m³ +Paris,FR,2019-04-16 10:00:00+00:00,FR04014,no2,47.1,µg/m³ +Paris,FR,2019-04-16 09:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-04-16 08:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-04-16 07:00:00+00:00,FR04014,no2,72.0,µg/m³ +Paris,FR,2019-04-16 06:00:00+00:00,FR04014,no2,79.0,µg/m³ +Paris,FR,2019-04-16 05:00:00+00:00,FR04014,no2,76.9,µg/m³ +Paris,FR,2019-04-16 04:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-04-16 03:00:00+00:00,FR04014,no2,34.6,µg/m³ +Paris,FR,2019-04-16 02:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-16 01:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-16 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-15 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-04-15 22:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-04-15 21:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-04-15 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-04-15 19:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-15 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-15 17:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-04-15 16:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-15 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 14:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-04-15 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-04-15 12:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 11:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-15 10:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-15 09:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-04-15 08:00:00+00:00,FR04014,no2,53.9,µg/m³ +Paris,FR,2019-04-15 07:00:00+00:00,FR04014,no2,61.2,µg/m³ +Paris,FR,2019-04-15 06:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-15 05:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-15 04:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-15 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-15 02:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-15 01:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-04-15 00:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-04-14 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-04-14 22:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-14 21:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-14 20:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-14 19:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-14 18:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-14 17:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-04-14 16:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-14 15:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-14 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-14 12:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-14 11:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-04-14 10:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-14 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-14 08:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-04-14 07:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-14 06:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-04-14 05:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-14 04:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-04-14 03:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-04-14 02:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-14 01:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-04-14 00:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-13 23:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-04-13 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-13 21:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-13 20:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-13 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-13 18:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-04-13 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-13 16:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-13 15:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-13 14:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-04-13 13:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-04-13 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-13 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-13 10:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-04-13 09:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-13 08:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 07:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-13 06:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-13 05:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-13 04:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-04-13 03:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 02:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 01:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 00:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-04-12 23:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-04-12 22:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-04-12 21:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-12 20:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-04-12 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-12 18:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-12 17:00:00+00:00,FR04014,no2,25.9,µg/m³ +Paris,FR,2019-04-12 16:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-04-12 15:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-12 14:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 13:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-12 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-12 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 09:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-04-12 08:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-12 07:00:00+00:00,FR04014,no2,48.3,µg/m³ +Paris,FR,2019-04-12 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-12 05:00:00+00:00,FR04014,no2,39.0,µg/m³ +Paris,FR,2019-04-12 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-04-12 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-12 02:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 01:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-04-12 00:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-04-11 23:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-11 22:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-04-11 21:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-04-11 20:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-11 19:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-11 18:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-11 17:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-04-11 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-11 15:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-04-11 14:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-11 13:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-04-11 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-11 11:00:00+00:00,FR04014,no2,25.4,µg/m³ +Paris,FR,2019-04-11 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-11 09:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-11 08:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-04-11 07:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-11 06:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-04-11 05:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-11 04:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-04-11 03:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-11 02:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-11 01:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-11 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-10 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-10 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-10 21:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-10 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-10 19:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-10 18:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-10 17:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-04-10 16:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-04-10 15:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-10 14:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 13:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-10 12:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-04-10 11:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-10 10:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-10 09:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-10 08:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-04-10 07:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-10 06:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-04-10 05:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-10 03:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-10 02:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-10 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-10 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-09 23:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-09 22:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-09 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-09 20:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-04-09 19:00:00+00:00,FR04014,no2,48.7,µg/m³ +Paris,FR,2019-04-09 18:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-04-09 17:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-09 16:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 15:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-09 14:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-04-09 13:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 12:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-09 11:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-09 10:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-09 09:00:00+00:00,FR04014,no2,66.5,µg/m³ +Paris,FR,2019-04-09 08:00:00+00:00,FR04014,no2,69.5,µg/m³ +Paris,FR,2019-04-09 07:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-04-09 06:00:00+00:00,FR04014,no2,66.9,µg/m³ +Paris,FR,2019-04-09 05:00:00+00:00,FR04014,no2,59.5,µg/m³ +Paris,FR,2019-04-09 04:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-09 03:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-09 02:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-09 01:00:00+00:00,FR04014,no2,24.4,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,no2,27.0,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,no2,9.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,no2,72.5,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,no2,22.0,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,no2,21.5,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,no2,54.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,no2,64.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,no2,63.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,no2,49.0,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,no2,33.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,no2,50.0,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,no2,46.5,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,no2,54.5,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,no2,53.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,no2,73.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,no2,64.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,no2,69.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,no2,71.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,no2,83.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,no2,76.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,no2,79.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,no2,67.0,µg/m³ diff --git a/doc/data/air_quality_no2.csv b/doc/data/air_quality_no2.csv new file mode 100644 index 00000000..7fa879f7 --- /dev/null +++ b/doc/data/air_quality_no2.csv @@ -0,0 +1,1036 @@ +datetime,station_antwerp,station_paris,station_london +2019-05-07 02:00:00,,,23.0 +2019-05-07 03:00:00,50.5,25.0,19.0 +2019-05-07 04:00:00,45.0,27.7,19.0 +2019-05-07 05:00:00,,50.4,16.0 +2019-05-07 06:00:00,,61.9, +2019-05-07 07:00:00,,72.4,26.0 +2019-05-07 08:00:00,,77.7,32.0 +2019-05-07 09:00:00,,67.9,32.0 +2019-05-07 10:00:00,,56.0,28.0 +2019-05-07 11:00:00,,34.5,21.0 +2019-05-07 12:00:00,,20.1,21.0 +2019-05-07 13:00:00,,13.0,18.0 +2019-05-07 14:00:00,,10.6,20.0 +2019-05-07 15:00:00,,13.2,18.0 +2019-05-07 16:00:00,,11.0,20.0 +2019-05-07 17:00:00,,11.7,20.0 +2019-05-07 18:00:00,,18.2,21.0 +2019-05-07 19:00:00,,22.3,20.0 +2019-05-07 20:00:00,,21.4,20.0 +2019-05-07 21:00:00,,26.8,24.0 +2019-05-07 22:00:00,,36.2,24.0 +2019-05-07 23:00:00,,33.9, +2019-05-08 00:00:00,,35.8,24.0 +2019-05-08 01:00:00,,34.0,19.0 +2019-05-08 02:00:00,,22.1,19.0 +2019-05-08 03:00:00,23.0,19.6,20.0 +2019-05-08 04:00:00,20.5,15.3,20.0 +2019-05-08 05:00:00,,13.5,19.0 +2019-05-08 06:00:00,,15.5,19.0 +2019-05-08 07:00:00,,19.3,29.0 +2019-05-08 08:00:00,,21.7,34.0 +2019-05-08 09:00:00,,19.5,36.0 +2019-05-08 10:00:00,,17.0,33.0 +2019-05-08 11:00:00,,19.7,28.0 +2019-05-08 12:00:00,,33.4,27.0 +2019-05-08 13:00:00,,21.4,26.0 +2019-05-08 14:00:00,,15.1,26.0 +2019-05-08 15:00:00,,14.3,24.0 +2019-05-08 16:00:00,,25.3,27.0 +2019-05-08 17:00:00,,26.0,28.0 +2019-05-08 18:00:00,,38.6,31.0 +2019-05-08 19:00:00,,29.3,40.0 +2019-05-08 20:00:00,,27.8,25.0 +2019-05-08 21:00:00,,41.3,29.0 +2019-05-08 22:00:00,,38.3,26.0 +2019-05-08 23:00:00,,48.9, +2019-05-09 00:00:00,,32.2,25.0 +2019-05-09 01:00:00,,25.2,30.0 +2019-05-09 02:00:00,,14.7, +2019-05-09 03:00:00,20.0,10.6,31.0 +2019-05-09 04:00:00,20.5,10.0,31.0 +2019-05-09 05:00:00,,10.4,33.0 +2019-05-09 06:00:00,,15.3,33.0 +2019-05-09 07:00:00,,34.5,33.0 +2019-05-09 08:00:00,,50.7,33.0 +2019-05-09 09:00:00,,49.0,35.0 +2019-05-09 10:00:00,,32.2,36.0 +2019-05-09 11:00:00,,32.3,28.0 +2019-05-09 12:00:00,,43.1,27.0 +2019-05-09 13:00:00,,34.2,30.0 +2019-05-09 14:00:00,,35.1,27.0 +2019-05-09 15:00:00,,21.3,34.0 +2019-05-09 16:00:00,,24.6,97.0 +2019-05-09 17:00:00,,23.9,67.0 +2019-05-09 18:00:00,,27.0,60.0 +2019-05-09 19:00:00,,29.9,58.0 +2019-05-09 20:00:00,,24.4,62.0 +2019-05-09 21:00:00,,23.8,59.0 +2019-05-09 22:00:00,,29.2,65.0 +2019-05-09 23:00:00,,34.5,59.0 +2019-05-10 00:00:00,,29.7,59.0 +2019-05-10 01:00:00,,26.7,52.0 +2019-05-10 02:00:00,,22.7,52.0 +2019-05-10 03:00:00,10.5,19.1,41.0 +2019-05-10 04:00:00,11.5,14.1,41.0 +2019-05-10 05:00:00,,15.0,40.0 +2019-05-10 06:00:00,,20.5,40.0 +2019-05-10 07:00:00,,37.8,39.0 +2019-05-10 08:00:00,,47.4,36.0 +2019-05-10 09:00:00,,57.3,39.0 +2019-05-10 10:00:00,,60.7,34.0 +2019-05-10 11:00:00,,53.4,31.0 +2019-05-10 12:00:00,,35.1,29.0 +2019-05-10 13:00:00,,23.2,28.0 +2019-05-10 14:00:00,,25.3,26.0 +2019-05-10 15:00:00,,22.0,25.0 +2019-05-10 16:00:00,,29.3,25.0 +2019-05-10 17:00:00,,29.6,24.0 +2019-05-10 18:00:00,,30.8,26.0 +2019-05-10 19:00:00,,37.8,26.0 +2019-05-10 20:00:00,,33.4,29.0 +2019-05-10 21:00:00,,39.3,29.0 +2019-05-10 22:00:00,,43.6,29.0 +2019-05-10 23:00:00,,37.0,31.0 +2019-05-11 00:00:00,,28.1,31.0 +2019-05-11 01:00:00,,26.0,27.0 +2019-05-11 02:00:00,,24.8,27.0 +2019-05-11 03:00:00,26.5,15.5,32.0 +2019-05-11 04:00:00,21.0,14.9,32.0 +2019-05-11 05:00:00,,,35.0 +2019-05-11 06:00:00,,,35.0 +2019-05-11 07:00:00,,,30.0 +2019-05-11 08:00:00,,28.9,30.0 +2019-05-11 09:00:00,,29.0,27.0 +2019-05-11 10:00:00,,32.1,30.0 +2019-05-11 11:00:00,,35.7, +2019-05-11 12:00:00,,36.8, +2019-05-11 13:00:00,,33.2, +2019-05-11 14:00:00,,30.2, +2019-05-11 15:00:00,,30.8, +2019-05-11 16:00:00,,17.8,28.0 +2019-05-11 17:00:00,,18.0,26.0 +2019-05-11 18:00:00,,19.5,28.0 +2019-05-11 19:00:00,,32.0,31.0 +2019-05-11 20:00:00,,33.1,33.0 +2019-05-11 21:00:00,,31.2,33.0 +2019-05-11 22:00:00,,24.2,34.0 +2019-05-11 23:00:00,,21.1,37.0 +2019-05-12 00:00:00,,27.7,37.0 +2019-05-12 01:00:00,,26.4,35.0 +2019-05-12 02:00:00,,22.8,35.0 +2019-05-12 03:00:00,17.5,19.2,38.0 +2019-05-12 04:00:00,20.0,17.2,38.0 +2019-05-12 05:00:00,,16.0,36.0 +2019-05-12 06:00:00,,16.2,36.0 +2019-05-12 07:00:00,,19.2,38.0 +2019-05-12 08:00:00,,20.1,44.0 +2019-05-12 09:00:00,,15.9,32.0 +2019-05-12 10:00:00,,14.6,26.0 +2019-05-12 11:00:00,,11.7,26.0 +2019-05-12 12:00:00,,11.4,21.0 +2019-05-12 13:00:00,,11.4,20.0 +2019-05-12 14:00:00,,10.9,19.0 +2019-05-12 15:00:00,,8.7,21.0 +2019-05-12 16:00:00,,9.1,22.0 +2019-05-12 17:00:00,,9.6,23.0 +2019-05-12 18:00:00,,11.7,24.0 +2019-05-12 19:00:00,,13.9,22.0 +2019-05-12 20:00:00,,18.2,22.0 +2019-05-12 21:00:00,,19.5,22.0 +2019-05-12 22:00:00,,24.1,21.0 +2019-05-12 23:00:00,,34.2,22.0 +2019-05-13 00:00:00,,46.5,22.0 +2019-05-13 01:00:00,,32.5,22.0 +2019-05-13 02:00:00,,25.0,22.0 +2019-05-13 03:00:00,14.5,18.9,24.0 +2019-05-13 04:00:00,14.5,18.5,24.0 +2019-05-13 05:00:00,,18.9,33.0 +2019-05-13 06:00:00,,25.1,33.0 +2019-05-13 07:00:00,,38.3,39.0 +2019-05-13 08:00:00,,45.2,39.0 +2019-05-13 09:00:00,,41.0,31.0 +2019-05-13 10:00:00,,32.1,29.0 +2019-05-13 11:00:00,,20.6,27.0 +2019-05-13 12:00:00,,12.8,26.0 +2019-05-13 13:00:00,,9.6,24.0 +2019-05-13 14:00:00,,9.2,25.0 +2019-05-13 15:00:00,,10.1,26.0 +2019-05-13 16:00:00,,10.7,28.0 +2019-05-13 17:00:00,,10.6,29.0 +2019-05-13 18:00:00,,12.1,30.0 +2019-05-13 19:00:00,,13.0,30.0 +2019-05-13 20:00:00,,15.5,31.0 +2019-05-13 21:00:00,,23.9,31.0 +2019-05-13 22:00:00,,28.3,31.0 +2019-05-13 23:00:00,,30.4,31.0 +2019-05-14 00:00:00,,27.3,31.0 +2019-05-14 01:00:00,,22.8,23.0 +2019-05-14 02:00:00,,20.9,23.0 +2019-05-14 03:00:00,14.5,19.1,26.0 +2019-05-14 04:00:00,11.5,19.0,26.0 +2019-05-14 05:00:00,,22.1,30.0 +2019-05-14 06:00:00,,31.6,30.0 +2019-05-14 07:00:00,,38.6,33.0 +2019-05-14 08:00:00,,46.1,34.0 +2019-05-14 09:00:00,,41.3,33.0 +2019-05-14 10:00:00,,28.8,30.0 +2019-05-14 11:00:00,,19.0,31.0 +2019-05-14 12:00:00,,12.9,27.0 +2019-05-14 13:00:00,,11.3,25.0 +2019-05-14 14:00:00,,10.2,25.0 +2019-05-14 15:00:00,,11.0,25.0 +2019-05-14 16:00:00,,15.2,29.0 +2019-05-14 17:00:00,,13.4,32.0 +2019-05-14 18:00:00,,15.3,33.0 +2019-05-14 19:00:00,,17.7,30.0 +2019-05-14 20:00:00,,17.9,28.0 +2019-05-14 21:00:00,,23.3,27.0 +2019-05-14 22:00:00,,28.4,25.0 +2019-05-14 23:00:00,,29.0,26.0 +2019-05-15 00:00:00,,30.9,26.0 +2019-05-15 01:00:00,,24.3,22.0 +2019-05-15 02:00:00,,18.8, +2019-05-15 03:00:00,25.5,17.2,22.0 +2019-05-15 04:00:00,22.5,16.8,22.0 +2019-05-15 05:00:00,,17.9,25.0 +2019-05-15 06:00:00,,28.9,25.0 +2019-05-15 07:00:00,,46.5,33.0 +2019-05-15 08:00:00,,48.1,33.0 +2019-05-15 09:00:00,,32.1,34.0 +2019-05-15 10:00:00,,25.7,35.0 +2019-05-15 11:00:00,,0.0,36.0 +2019-05-15 12:00:00,,0.0,35.0 +2019-05-15 13:00:00,,0.0,30.0 +2019-05-15 14:00:00,,9.4,31.0 +2019-05-15 15:00:00,,10.0,30.0 +2019-05-15 16:00:00,,11.9,38.0 +2019-05-15 17:00:00,,12.9,38.0 +2019-05-15 18:00:00,,12.2,33.0 +2019-05-15 19:00:00,,12.9,35.0 +2019-05-15 20:00:00,,16.5,33.0 +2019-05-15 21:00:00,,20.3,31.0 +2019-05-15 22:00:00,,30.1,32.0 +2019-05-15 23:00:00,,36.0,33.0 +2019-05-16 00:00:00,,44.1,33.0 +2019-05-16 01:00:00,,30.9,33.0 +2019-05-16 02:00:00,,27.4,33.0 +2019-05-16 03:00:00,28.0,26.0,28.0 +2019-05-16 04:00:00,,26.7,28.0 +2019-05-16 05:00:00,,27.9,26.0 +2019-05-16 06:00:00,,37.0,26.0 +2019-05-16 07:00:00,,52.6,33.0 +2019-05-16 08:00:00,,,34.0 +2019-05-16 09:00:00,,40.0,33.0 +2019-05-16 10:00:00,,39.4,32.0 +2019-05-16 11:00:00,,29.5,31.0 +2019-05-16 12:00:00,,13.5,33.0 +2019-05-16 13:00:00,,10.5,30.0 +2019-05-16 14:00:00,,9.2,27.0 +2019-05-16 15:00:00,,8.5,27.0 +2019-05-16 16:00:00,,8.1,26.0 +2019-05-16 17:00:00,,10.1,29.0 +2019-05-16 18:00:00,,10.3,30.0 +2019-05-16 19:00:00,,13.5,25.0 +2019-05-16 20:00:00,,15.9,27.0 +2019-05-16 21:00:00,,14.4,26.0 +2019-05-16 22:00:00,,24.8,25.0 +2019-05-16 23:00:00,,24.3,25.0 +2019-05-17 00:00:00,,37.1,25.0 +2019-05-17 01:00:00,,43.7,23.0 +2019-05-17 02:00:00,,46.3,23.0 +2019-05-17 03:00:00,,26.1,21.0 +2019-05-17 04:00:00,,24.6,21.0 +2019-05-17 05:00:00,,26.6,21.0 +2019-05-17 06:00:00,,28.4,21.0 +2019-05-17 07:00:00,,34.0,25.0 +2019-05-17 08:00:00,,46.3,27.0 +2019-05-17 09:00:00,,55.0,27.0 +2019-05-17 10:00:00,,57.5,29.0 +2019-05-17 11:00:00,,60.5,30.0 +2019-05-17 12:00:00,,51.5,30.0 +2019-05-17 13:00:00,,43.1,30.0 +2019-05-17 14:00:00,,46.5,29.0 +2019-05-17 15:00:00,,37.9,31.0 +2019-05-17 16:00:00,,27.0,32.0 +2019-05-17 17:00:00,,22.2,30.0 +2019-05-17 18:00:00,,20.7,29.0 +2019-05-17 19:00:00,,27.9,31.0 +2019-05-17 20:00:00,,33.6,36.0 +2019-05-17 21:00:00,,24.7,36.0 +2019-05-17 22:00:00,,23.5,36.0 +2019-05-17 23:00:00,,24.3,35.0 +2019-05-18 00:00:00,,28.2,35.0 +2019-05-18 01:00:00,,34.1,31.0 +2019-05-18 02:00:00,,31.5,31.0 +2019-05-18 03:00:00,41.5,37.4,31.0 +2019-05-18 04:00:00,,29.0,31.0 +2019-05-18 05:00:00,,16.1,29.0 +2019-05-18 06:00:00,,16.6,29.0 +2019-05-18 07:00:00,,20.1,27.0 +2019-05-18 08:00:00,,22.1,29.0 +2019-05-18 09:00:00,,27.4,35.0 +2019-05-18 10:00:00,,20.4,32.0 +2019-05-18 11:00:00,,21.1,35.0 +2019-05-18 12:00:00,,24.1,34.0 +2019-05-18 13:00:00,,17.5,38.0 +2019-05-18 14:00:00,,12.9,29.0 +2019-05-18 15:00:00,,10.5,27.0 +2019-05-18 16:00:00,,11.8,28.0 +2019-05-18 17:00:00,,13.0,30.0 +2019-05-18 18:00:00,,14.6,42.0 +2019-05-18 19:00:00,,12.8,42.0 +2019-05-18 20:00:00,35.5,14.5,36.0 +2019-05-18 21:00:00,35.5,67.5,35.0 +2019-05-18 22:00:00,40.0,36.2,41.0 +2019-05-18 23:00:00,39.0,59.3,46.0 +2019-05-19 00:00:00,34.5,62.5,46.0 +2019-05-19 01:00:00,29.5,50.2,49.0 +2019-05-19 02:00:00,23.5,49.6,49.0 +2019-05-19 03:00:00,22.5,34.9,49.0 +2019-05-19 04:00:00,19.0,38.1,49.0 +2019-05-19 05:00:00,19.0,36.4,49.0 +2019-05-19 06:00:00,21.0,39.4,49.0 +2019-05-19 07:00:00,26.0,40.9,38.0 +2019-05-19 08:00:00,30.5,31.1,36.0 +2019-05-19 09:00:00,30.0,32.4,33.0 +2019-05-19 10:00:00,23.5,31.7,30.0 +2019-05-19 11:00:00,16.0,33.0,27.0 +2019-05-19 12:00:00,17.5,31.0,28.0 +2019-05-19 13:00:00,17.0,32.6,25.0 +2019-05-19 14:00:00,16.0,27.9,27.0 +2019-05-19 15:00:00,14.5,21.0,31.0 +2019-05-19 16:00:00,23.0,23.8,29.0 +2019-05-19 17:00:00,33.0,31.7,28.0 +2019-05-19 18:00:00,17.5,32.5,27.0 +2019-05-19 19:00:00,18.5,33.9,29.0 +2019-05-19 20:00:00,15.5,32.7,30.0 +2019-05-19 21:00:00,26.0,51.2,32.0 +2019-05-19 22:00:00,15.0,35.6,32.0 +2019-05-19 23:00:00,12.5,23.2,32.0 +2019-05-20 00:00:00,18.5,22.2,32.0 +2019-05-20 01:00:00,16.5,18.8,28.0 +2019-05-20 02:00:00,26.0,16.4,28.0 +2019-05-20 03:00:00,17.0,12.8,32.0 +2019-05-20 04:00:00,10.5,12.1,32.0 +2019-05-20 05:00:00,9.0,12.6,26.0 +2019-05-20 06:00:00,14.0,14.9,26.0 +2019-05-20 07:00:00,20.0,25.2,31.0 +2019-05-20 08:00:00,26.0,40.1,31.0 +2019-05-20 09:00:00,38.0,46.9,29.0 +2019-05-20 10:00:00,40.0,46.1,29.0 +2019-05-20 11:00:00,30.5,45.5,28.0 +2019-05-20 12:00:00,25.0,43.9,28.0 +2019-05-20 13:00:00,25.0,35.4,28.0 +2019-05-20 14:00:00,34.5,23.8,29.0 +2019-05-20 15:00:00,32.0,23.7,32.0 +2019-05-20 16:00:00,24.5,27.5,32.0 +2019-05-20 17:00:00,25.5,26.5,29.0 +2019-05-20 18:00:00,,32.4,30.0 +2019-05-20 19:00:00,,24.6,33.0 +2019-05-20 20:00:00,,32.2,32.0 +2019-05-20 21:00:00,,21.3,32.0 +2019-05-20 22:00:00,,21.6,34.0 +2019-05-20 23:00:00,,20.3,47.0 +2019-05-21 00:00:00,,20.7,47.0 +2019-05-21 01:00:00,,19.6,35.0 +2019-05-21 02:00:00,,16.9,35.0 +2019-05-21 03:00:00,15.5,16.3,26.0 +2019-05-21 04:00:00,,17.7,26.0 +2019-05-21 05:00:00,,17.9,23.0 +2019-05-21 06:00:00,,18.5,23.0 +2019-05-21 07:00:00,,38.0,30.0 +2019-05-21 08:00:00,,62.6,27.0 +2019-05-21 09:00:00,,56.0,28.0 +2019-05-21 10:00:00,,54.2,29.0 +2019-05-21 11:00:00,,48.1,29.0 +2019-05-21 12:00:00,,30.4,26.0 +2019-05-21 13:00:00,,25.5,26.0 +2019-05-21 14:00:00,,30.5,28.0 +2019-05-21 15:00:00,,49.7,33.0 +2019-05-21 16:00:00,,47.8,34.0 +2019-05-21 17:00:00,,36.6,34.0 +2019-05-21 18:00:00,,42.3,37.0 +2019-05-21 19:00:00,,75.0,35.0 +2019-05-21 20:00:00,,54.3,40.0 +2019-05-21 21:00:00,,50.0,38.0 +2019-05-21 22:00:00,,40.8,33.0 +2019-05-21 23:00:00,,43.0,33.0 +2019-05-22 00:00:00,,33.2,33.0 +2019-05-22 01:00:00,,29.5,30.0 +2019-05-22 02:00:00,,27.1,30.0 +2019-05-22 03:00:00,20.5,27.9,27.0 +2019-05-22 04:00:00,,19.2,27.0 +2019-05-22 05:00:00,,25.2,21.0 +2019-05-22 06:00:00,,33.7,21.0 +2019-05-22 07:00:00,,45.1,28.0 +2019-05-22 08:00:00,,75.7,29.0 +2019-05-22 09:00:00,,75.4,31.0 +2019-05-22 10:00:00,,70.8,31.0 +2019-05-22 11:00:00,,63.1,31.0 +2019-05-22 12:00:00,,57.8,28.0 +2019-05-22 13:00:00,,42.6,25.0 +2019-05-22 14:00:00,,42.2,25.0 +2019-05-22 15:00:00,,38.5,28.0 +2019-05-22 16:00:00,,40.0,30.0 +2019-05-22 17:00:00,,33.2,32.0 +2019-05-22 18:00:00,,34.9,34.0 +2019-05-22 19:00:00,,36.1,34.0 +2019-05-22 20:00:00,,34.1,33.0 +2019-05-22 21:00:00,,36.2,33.0 +2019-05-22 22:00:00,,44.9,31.0 +2019-05-22 23:00:00,,37.7,32.0 +2019-05-23 00:00:00,,29.8,32.0 +2019-05-23 01:00:00,,62.1,23.0 +2019-05-23 02:00:00,,53.3,23.0 +2019-05-23 03:00:00,60.5,53.1,20.0 +2019-05-23 04:00:00,,66.6,20.0 +2019-05-23 05:00:00,,76.8,19.0 +2019-05-23 06:00:00,,71.9,19.0 +2019-05-23 07:00:00,,68.7,24.0 +2019-05-23 08:00:00,,79.6,26.0 +2019-05-23 09:00:00,,91.8,25.0 +2019-05-23 10:00:00,,97.0,23.0 +2019-05-23 11:00:00,,79.4,25.0 +2019-05-23 12:00:00,,28.3,24.0 +2019-05-23 13:00:00,,17.0,25.0 +2019-05-23 14:00:00,,16.4,28.0 +2019-05-23 15:00:00,,21.2,34.0 +2019-05-23 16:00:00,,17.2,38.0 +2019-05-23 17:00:00,,17.5,53.0 +2019-05-23 18:00:00,,17.8,60.0 +2019-05-23 19:00:00,,22.7,54.0 +2019-05-23 20:00:00,,23.5,51.0 +2019-05-23 21:00:00,,28.0,45.0 +2019-05-23 22:00:00,,33.8,44.0 +2019-05-23 23:00:00,,47.0,39.0 +2019-05-24 00:00:00,,61.9,39.0 +2019-05-24 01:00:00,,23.2,31.0 +2019-05-24 02:00:00,,32.8, +2019-05-24 03:00:00,74.5,28.8,31.0 +2019-05-24 04:00:00,,28.4,31.0 +2019-05-24 05:00:00,,19.4,23.0 +2019-05-24 06:00:00,,28.1,23.0 +2019-05-24 07:00:00,,35.9,29.0 +2019-05-24 08:00:00,,40.7,28.0 +2019-05-24 09:00:00,,54.8,26.0 +2019-05-24 10:00:00,,45.9,24.0 +2019-05-24 11:00:00,,37.9,23.0 +2019-05-24 12:00:00,,28.6,26.0 +2019-05-24 13:00:00,,40.6,29.0 +2019-05-24 14:00:00,,29.3,33.0 +2019-05-24 15:00:00,,24.3,39.0 +2019-05-24 16:00:00,,20.5,40.0 +2019-05-24 17:00:00,,22.7,43.0 +2019-05-24 18:00:00,,27.3,46.0 +2019-05-24 19:00:00,,25.2,46.0 +2019-05-24 20:00:00,,23.3,44.0 +2019-05-24 21:00:00,,21.9,42.0 +2019-05-24 22:00:00,,31.7,38.0 +2019-05-24 23:00:00,,18.1,39.0 +2019-05-25 00:00:00,,18.0,39.0 +2019-05-25 01:00:00,,16.5,32.0 +2019-05-25 02:00:00,,17.4,32.0 +2019-05-25 03:00:00,29.0,12.8,25.0 +2019-05-25 04:00:00,,20.3,25.0 +2019-05-25 05:00:00,,,21.0 +2019-05-25 06:00:00,,,21.0 +2019-05-25 07:00:00,,,22.0 +2019-05-25 08:00:00,,36.9,22.0 +2019-05-25 09:00:00,,42.1,23.0 +2019-05-25 10:00:00,,44.5,23.0 +2019-05-25 11:00:00,,33.6,21.0 +2019-05-25 12:00:00,,26.3,23.0 +2019-05-25 13:00:00,,19.5,24.0 +2019-05-25 14:00:00,,18.6,26.0 +2019-05-25 15:00:00,,26.1,31.0 +2019-05-25 16:00:00,,23.6,37.0 +2019-05-25 17:00:00,,30.0,42.0 +2019-05-25 18:00:00,,31.9,46.0 +2019-05-25 19:00:00,,20.6,47.0 +2019-05-25 20:00:00,,30.4,47.0 +2019-05-25 21:00:00,,22.1,44.0 +2019-05-25 22:00:00,,43.6,41.0 +2019-05-25 23:00:00,,39.5,36.0 +2019-05-26 00:00:00,,63.9,36.0 +2019-05-26 01:00:00,,70.2,32.0 +2019-05-26 02:00:00,,67.0,32.0 +2019-05-26 03:00:00,53.0,49.8,26.0 +2019-05-26 04:00:00,,23.4,26.0 +2019-05-26 05:00:00,,22.9,20.0 +2019-05-26 06:00:00,,22.3,20.0 +2019-05-26 07:00:00,,16.8,17.0 +2019-05-26 08:00:00,,15.1,17.0 +2019-05-26 09:00:00,,13.4,15.0 +2019-05-26 10:00:00,,11.0,15.0 +2019-05-26 11:00:00,,10.3,16.0 +2019-05-26 12:00:00,,11.3,17.0 +2019-05-26 13:00:00,,13.3,21.0 +2019-05-26 14:00:00,,11.5,24.0 +2019-05-26 15:00:00,,12.5,25.0 +2019-05-26 16:00:00,,15.3,26.0 +2019-05-26 17:00:00,,11.7,27.0 +2019-05-26 18:00:00,,17.1,26.0 +2019-05-26 19:00:00,,17.3,28.0 +2019-05-26 20:00:00,,22.8,26.0 +2019-05-26 21:00:00,,17.8,25.0 +2019-05-26 22:00:00,,16.6,27.0 +2019-05-26 23:00:00,,16.1,26.0 +2019-05-27 00:00:00,,15.2,26.0 +2019-05-27 01:00:00,,10.3,26.0 +2019-05-27 02:00:00,,9.5,26.0 +2019-05-27 03:00:00,10.5,7.1,24.0 +2019-05-27 04:00:00,,5.9,24.0 +2019-05-27 05:00:00,,4.8,19.0 +2019-05-27 06:00:00,,6.5,19.0 +2019-05-27 07:00:00,,20.3,18.0 +2019-05-27 08:00:00,,29.1,18.0 +2019-05-27 09:00:00,,29.5,18.0 +2019-05-27 10:00:00,,34.2,18.0 +2019-05-27 11:00:00,,31.4,16.0 +2019-05-27 12:00:00,,23.3,17.0 +2019-05-27 13:00:00,,19.3,17.0 +2019-05-27 14:00:00,,17.3,20.0 +2019-05-27 15:00:00,,17.5,20.0 +2019-05-27 16:00:00,,17.3,22.0 +2019-05-27 17:00:00,,25.6,22.0 +2019-05-27 18:00:00,,23.6,22.0 +2019-05-27 19:00:00,,22.9,22.0 +2019-05-27 20:00:00,,25.6,22.0 +2019-05-27 21:00:00,,22.1,23.0 +2019-05-27 22:00:00,,22.3,20.0 +2019-05-27 23:00:00,,18.8,19.0 +2019-05-28 00:00:00,,19.9,19.0 +2019-05-28 01:00:00,,22.6,16.0 +2019-05-28 02:00:00,,15.4,16.0 +2019-05-28 03:00:00,11.0,8.2,16.0 +2019-05-28 04:00:00,,6.4,16.0 +2019-05-28 05:00:00,,6.1,15.0 +2019-05-28 06:00:00,,8.9,15.0 +2019-05-28 07:00:00,,19.9,19.0 +2019-05-28 08:00:00,,28.8,20.0 +2019-05-28 09:00:00,,33.8,20.0 +2019-05-28 10:00:00,,31.2,20.0 +2019-05-28 11:00:00,,24.3,21.0 +2019-05-28 12:00:00,,21.6,21.0 +2019-05-28 13:00:00,,20.5,28.0 +2019-05-28 14:00:00,,24.8,27.0 +2019-05-28 15:00:00,,18.5,29.0 +2019-05-28 16:00:00,,18.8,30.0 +2019-05-28 17:00:00,,25.0,27.0 +2019-05-28 18:00:00,,26.5,25.0 +2019-05-28 19:00:00,,20.8,29.0 +2019-05-28 20:00:00,,16.2,29.0 +2019-05-28 21:00:00,,18.5,29.0 +2019-05-28 22:00:00,,20.4,31.0 +2019-05-28 23:00:00,,20.4, +2019-05-29 00:00:00,,20.2,25.0 +2019-05-29 01:00:00,,25.3,26.0 +2019-05-29 02:00:00,,23.4,26.0 +2019-05-29 03:00:00,21.0,21.6,23.0 +2019-05-29 04:00:00,,19.0,23.0 +2019-05-29 05:00:00,,20.3,21.0 +2019-05-29 06:00:00,,24.1,21.0 +2019-05-29 07:00:00,,36.7,24.0 +2019-05-29 08:00:00,,46.5,22.0 +2019-05-29 09:00:00,,50.5,21.0 +2019-05-29 10:00:00,,45.7,18.0 +2019-05-29 11:00:00,,34.5,18.0 +2019-05-29 12:00:00,,30.7,18.0 +2019-05-29 13:00:00,,22.0,20.0 +2019-05-29 14:00:00,,13.2,13.0 +2019-05-29 15:00:00,,17.8,15.0 +2019-05-29 16:00:00,,0.0,5.0 +2019-05-29 17:00:00,,0.0,3.0 +2019-05-29 18:00:00,,20.1,5.0 +2019-05-29 19:00:00,,22.9,5.0 +2019-05-29 20:00:00,,25.3,5.0 +2019-05-29 21:00:00,,24.1,6.0 +2019-05-29 22:00:00,,20.8,6.0 +2019-05-29 23:00:00,,16.9,5.0 +2019-05-30 00:00:00,,19.0,5.0 +2019-05-30 01:00:00,,19.9,1.0 +2019-05-30 02:00:00,,19.4,1.0 +2019-05-30 03:00:00,7.5,12.4,0.0 +2019-05-30 04:00:00,,9.4,0.0 +2019-05-30 05:00:00,,10.6,0.0 +2019-05-30 06:00:00,,10.4,0.0 +2019-05-30 07:00:00,,12.2,0.0 +2019-05-30 08:00:00,,13.3,2.0 +2019-05-30 09:00:00,,18.3,3.0 +2019-05-30 10:00:00,,16.7,5.0 +2019-05-30 11:00:00,,15.1,9.0 +2019-05-30 12:00:00,,13.8,13.0 +2019-05-30 13:00:00,,14.9,17.0 +2019-05-30 14:00:00,,14.2,20.0 +2019-05-30 15:00:00,,16.1,22.0 +2019-05-30 16:00:00,,14.9,22.0 +2019-05-30 17:00:00,,13.0,27.0 +2019-05-30 18:00:00,,12.8,30.0 +2019-05-30 19:00:00,,20.4,28.0 +2019-05-30 20:00:00,,22.1,28.0 +2019-05-30 21:00:00,,22.9,27.0 +2019-05-30 22:00:00,,21.9,27.0 +2019-05-30 23:00:00,,26.9,23.0 +2019-05-31 00:00:00,,27.0,23.0 +2019-05-31 01:00:00,,29.6,18.0 +2019-05-31 02:00:00,,27.2,18.0 +2019-05-31 03:00:00,9.0,36.9,12.0 +2019-05-31 04:00:00,,44.1,12.0 +2019-05-31 05:00:00,,40.1,9.0 +2019-05-31 06:00:00,,31.1,9.0 +2019-05-31 07:00:00,,37.2,8.0 +2019-05-31 08:00:00,,38.6,9.0 +2019-05-31 09:00:00,,47.4,8.0 +2019-05-31 10:00:00,,36.6,37.0 +2019-05-31 11:00:00,,19.6,15.0 +2019-05-31 12:00:00,,17.2,16.0 +2019-05-31 13:00:00,,15.1,18.0 +2019-05-31 14:00:00,,13.3,21.0 +2019-05-31 15:00:00,,13.8,21.0 +2019-05-31 16:00:00,,15.4,24.0 +2019-05-31 17:00:00,,15.4,26.0 +2019-05-31 18:00:00,,16.3,26.0 +2019-05-31 19:00:00,,20.5,29.0 +2019-05-31 20:00:00,,25.2,33.0 +2019-05-31 21:00:00,,23.3,33.0 +2019-05-31 22:00:00,,37.0,31.0 +2019-05-31 23:00:00,,60.2,26.0 +2019-06-01 00:00:00,,68.0,26.0 +2019-06-01 01:00:00,,81.7,22.0 +2019-06-01 02:00:00,,84.7,22.0 +2019-06-01 03:00:00,52.5,74.8,16.0 +2019-06-01 04:00:00,,68.1,16.0 +2019-06-01 05:00:00,,,11.0 +2019-06-01 06:00:00,,,11.0 +2019-06-01 07:00:00,,,4.0 +2019-06-01 08:00:00,,44.6,2.0 +2019-06-01 09:00:00,,46.4,8.0 +2019-06-01 10:00:00,,33.3,9.0 +2019-06-01 11:00:00,,23.9,12.0 +2019-06-01 12:00:00,,13.8,19.0 +2019-06-01 13:00:00,,12.2,28.0 +2019-06-01 14:00:00,,10.4,33.0 +2019-06-01 15:00:00,,10.2,36.0 +2019-06-01 16:00:00,,10.0,33.0 +2019-06-01 17:00:00,,10.2,31.0 +2019-06-01 18:00:00,,11.8,32.0 +2019-06-01 19:00:00,,11.8,36.0 +2019-06-01 20:00:00,,14.5,38.0 +2019-06-01 21:00:00,,24.6,41.0 +2019-06-01 22:00:00,,43.6,44.0 +2019-06-01 23:00:00,,49.4,52.0 +2019-06-02 00:00:00,,48.1,52.0 +2019-06-02 01:00:00,,32.7,44.0 +2019-06-02 02:00:00,,38.1,44.0 +2019-06-02 03:00:00,,38.2,43.0 +2019-06-02 04:00:00,,39.2,43.0 +2019-06-02 05:00:00,,23.2,37.0 +2019-06-02 06:00:00,,24.5,37.0 +2019-06-02 07:00:00,,37.2,32.0 +2019-06-02 08:00:00,,24.1,32.0 +2019-06-02 09:00:00,,18.1,30.0 +2019-06-02 10:00:00,,19.5,32.0 +2019-06-02 11:00:00,,21.0,35.0 +2019-06-02 12:00:00,,18.1,36.0 +2019-06-02 13:00:00,,13.1,35.0 +2019-06-02 14:00:00,,11.5,34.0 +2019-06-02 15:00:00,,13.0,36.0 +2019-06-02 16:00:00,,15.0,33.0 +2019-06-02 17:00:00,,13.9,32.0 +2019-06-02 18:00:00,,14.4,32.0 +2019-06-02 19:00:00,,14.4,34.0 +2019-06-02 20:00:00,,15.6,34.0 +2019-06-02 21:00:00,,25.8,32.0 +2019-06-02 22:00:00,,40.9,28.0 +2019-06-02 23:00:00,,36.9,27.0 +2019-06-03 00:00:00,,27.6,27.0 +2019-06-03 01:00:00,,17.9,21.0 +2019-06-03 02:00:00,,15.7,21.0 +2019-06-03 03:00:00,,11.8,11.0 +2019-06-03 04:00:00,,11.7,11.0 +2019-06-03 05:00:00,,9.8,3.0 +2019-06-03 06:00:00,,11.4,3.0 +2019-06-03 07:00:00,,29.0,5.0 +2019-06-03 08:00:00,,44.1,6.0 +2019-06-03 09:00:00,,50.0,7.0 +2019-06-03 10:00:00,,43.9,5.0 +2019-06-03 11:00:00,,46.0,11.0 +2019-06-03 12:00:00,,31.7,16.0 +2019-06-03 13:00:00,,27.5,14.0 +2019-06-03 14:00:00,,22.1,15.0 +2019-06-03 15:00:00,,25.8,17.0 +2019-06-03 16:00:00,,23.2,21.0 +2019-06-03 17:00:00,,24.8,22.0 +2019-06-03 18:00:00,,25.3,24.0 +2019-06-03 19:00:00,,24.4,24.0 +2019-06-03 20:00:00,,23.1,23.0 +2019-06-03 21:00:00,,28.9,20.0 +2019-06-03 22:00:00,,33.0,20.0 +2019-06-03 23:00:00,,31.1,17.0 +2019-06-04 00:00:00,,30.5,17.0 +2019-06-04 01:00:00,,44.6,12.0 +2019-06-04 02:00:00,,52.4,12.0 +2019-06-04 03:00:00,,43.9,8.0 +2019-06-04 04:00:00,,35.0,8.0 +2019-06-04 05:00:00,,41.6,5.0 +2019-06-04 06:00:00,,28.8,5.0 +2019-06-04 07:00:00,,36.5,14.0 +2019-06-04 08:00:00,,47.7,18.0 +2019-06-04 09:00:00,,53.5,22.0 +2019-06-04 10:00:00,,50.8,35.0 +2019-06-04 11:00:00,,38.5,31.0 +2019-06-04 12:00:00,,23.3,32.0 +2019-06-04 13:00:00,,19.6,35.0 +2019-06-04 14:00:00,,17.7,37.0 +2019-06-04 15:00:00,,17.4,36.0 +2019-06-04 16:00:00,,18.1,38.0 +2019-06-04 17:00:00,,21.5,38.0 +2019-06-04 18:00:00,,26.3,40.0 +2019-06-04 19:00:00,,23.4,29.0 +2019-06-04 20:00:00,,25.2,20.0 +2019-06-04 21:00:00,,17.0,18.0 +2019-06-04 22:00:00,,16.9,17.0 +2019-06-04 23:00:00,,26.3,17.0 +2019-06-05 00:00:00,,33.5,17.0 +2019-06-05 01:00:00,,17.8,13.0 +2019-06-05 02:00:00,,15.7,13.0 +2019-06-05 03:00:00,15.0,10.8,4.0 +2019-06-05 04:00:00,,12.4,4.0 +2019-06-05 05:00:00,,16.2,6.0 +2019-06-05 06:00:00,,24.5,6.0 +2019-06-05 07:00:00,,39.2,2.0 +2019-06-05 08:00:00,,35.8,1.0 +2019-06-05 09:00:00,,36.9,0.0 +2019-06-05 10:00:00,,35.3,0.0 +2019-06-05 11:00:00,,36.8,5.0 +2019-06-05 12:00:00,,42.1,7.0 +2019-06-05 13:00:00,,59.0,9.0 +2019-06-05 14:00:00,,47.2,14.0 +2019-06-05 15:00:00,,33.6,20.0 +2019-06-05 16:00:00,,38.3,20.0 +2019-06-05 17:00:00,,53.5,19.0 +2019-06-05 18:00:00,,37.9,19.0 +2019-06-05 19:00:00,,48.8,19.0 +2019-06-05 20:00:00,,40.8,19.0 +2019-06-05 21:00:00,,37.8,19.0 +2019-06-05 22:00:00,,37.5,19.0 +2019-06-05 23:00:00,,33.7,17.0 +2019-06-06 00:00:00,,30.3,17.0 +2019-06-06 01:00:00,,31.8,8.0 +2019-06-06 02:00:00,,23.8, +2019-06-06 03:00:00,,18.0,4.0 +2019-06-06 04:00:00,,15.2,4.0 +2019-06-06 05:00:00,,19.2,0.0 +2019-06-06 06:00:00,,28.4,0.0 +2019-06-06 07:00:00,,40.3,1.0 +2019-06-06 08:00:00,,40.5,3.0 +2019-06-06 09:00:00,,43.1,0.0 +2019-06-06 10:00:00,,36.0,1.0 +2019-06-06 11:00:00,,26.0,7.0 +2019-06-06 12:00:00,,21.2,7.0 +2019-06-06 13:00:00,,16.4,12.0 +2019-06-06 14:00:00,,16.5,10.0 +2019-06-06 15:00:00,,16.0,11.0 +2019-06-06 16:00:00,,15.1,16.0 +2019-06-06 17:00:00,,,22.0 +2019-06-06 18:00:00,,,24.0 +2019-06-06 19:00:00,,,24.0 +2019-06-06 20:00:00,,,24.0 +2019-06-06 21:00:00,,,22.0 +2019-06-06 22:00:00,,,24.0 +2019-06-06 23:00:00,,,21.0 +2019-06-07 00:00:00,,,21.0 +2019-06-07 01:00:00,,,23.0 +2019-06-07 02:00:00,,,23.0 +2019-06-07 03:00:00,,,27.0 +2019-06-07 04:00:00,,,27.0 +2019-06-07 05:00:00,,,23.0 +2019-06-07 06:00:00,,,23.0 +2019-06-07 07:00:00,,,25.0 +2019-06-07 08:00:00,,28.9,23.0 +2019-06-07 09:00:00,,23.0,24.0 +2019-06-07 10:00:00,,29.3,25.0 +2019-06-07 11:00:00,,34.5,23.0 +2019-06-07 12:00:00,,32.1,25.0 +2019-06-07 13:00:00,,26.7,27.0 +2019-06-07 14:00:00,,17.8,20.0 +2019-06-07 15:00:00,,15.0,15.0 +2019-06-07 16:00:00,,13.1,15.0 +2019-06-07 17:00:00,,15.6,21.0 +2019-06-07 18:00:00,,19.5,24.0 +2019-06-07 19:00:00,,19.5,27.0 +2019-06-07 20:00:00,,19.1,35.0 +2019-06-07 21:00:00,,19.9,36.0 +2019-06-07 22:00:00,,19.4,35.0 +2019-06-07 23:00:00,,16.3, +2019-06-08 00:00:00,,14.7,33.0 +2019-06-08 01:00:00,,14.4,28.0 +2019-06-08 02:00:00,,11.3, +2019-06-08 03:00:00,,9.6,7.0 +2019-06-08 04:00:00,,8.4,7.0 +2019-06-08 05:00:00,,9.8,3.0 +2019-06-08 06:00:00,,10.7,3.0 +2019-06-08 07:00:00,,14.1,2.0 +2019-06-08 08:00:00,,13.8,3.0 +2019-06-08 09:00:00,,14.0,4.0 +2019-06-08 10:00:00,,13.0,2.0 +2019-06-08 11:00:00,,11.7,3.0 +2019-06-08 12:00:00,,10.3,4.0 +2019-06-08 13:00:00,,10.4,8.0 +2019-06-08 14:00:00,,9.2,10.0 +2019-06-08 15:00:00,,11.1,13.0 +2019-06-08 16:00:00,,10.3,17.0 +2019-06-08 17:00:00,,11.7,19.0 +2019-06-08 18:00:00,,14.1,20.0 +2019-06-08 19:00:00,,14.8,20.0 +2019-06-08 20:00:00,,22.0,19.0 +2019-06-08 21:00:00,,,17.0 +2019-06-08 22:00:00,,,16.0 +2019-06-08 23:00:00,,36.7, +2019-06-09 00:00:00,,34.8,20.0 +2019-06-09 01:00:00,,47.0,10.0 +2019-06-09 02:00:00,,55.9,10.0 +2019-06-09 03:00:00,10.0,41.0,7.0 +2019-06-09 04:00:00,,51.2,7.0 +2019-06-09 05:00:00,,51.5,1.0 +2019-06-09 06:00:00,,43.0,1.0 +2019-06-09 07:00:00,,42.2,5.0 +2019-06-09 08:00:00,,36.7,1.0 +2019-06-09 09:00:00,,32.7,0.0 +2019-06-09 10:00:00,,30.2,0.0 +2019-06-09 11:00:00,,25.0,2.0 +2019-06-09 12:00:00,,16.6,5.0 +2019-06-09 13:00:00,,14.6,8.0 +2019-06-09 14:00:00,,14.6,13.0 +2019-06-09 15:00:00,,10.2,17.0 +2019-06-09 16:00:00,,7.9,19.0 +2019-06-09 17:00:00,,7.2,24.0 +2019-06-09 18:00:00,,10.3,26.0 +2019-06-09 19:00:00,,13.0,20.0 +2019-06-09 20:00:00,,19.5,21.0 +2019-06-09 21:00:00,,30.6,21.0 +2019-06-09 22:00:00,,33.2,22.0 +2019-06-09 23:00:00,,30.9, +2019-06-10 00:00:00,,37.1,24.0 +2019-06-10 01:00:00,,39.9,21.0 +2019-06-10 02:00:00,,28.1,21.0 +2019-06-10 03:00:00,18.5,19.3,25.0 +2019-06-10 04:00:00,,17.8,25.0 +2019-06-10 05:00:00,,18.0,24.0 +2019-06-10 06:00:00,,13.7,24.0 +2019-06-10 07:00:00,,21.3,24.0 +2019-06-10 08:00:00,,26.7,22.0 +2019-06-10 09:00:00,,23.0,27.0 +2019-06-10 10:00:00,,16.9,34.0 +2019-06-10 11:00:00,,18.5,45.0 +2019-06-10 12:00:00,,14.1,41.0 +2019-06-10 13:00:00,,12.2,45.0 +2019-06-10 14:00:00,,11.7,51.0 +2019-06-10 15:00:00,,9.6,40.0 +2019-06-10 16:00:00,,9.5,40.0 +2019-06-10 17:00:00,,11.7,31.0 +2019-06-10 18:00:00,,15.1,28.0 +2019-06-10 19:00:00,,19.1,26.0 +2019-06-10 20:00:00,,18.4,25.0 +2019-06-10 21:00:00,,22.3,26.0 +2019-06-10 22:00:00,,22.6,24.0 +2019-06-10 23:00:00,,23.5,23.0 +2019-06-11 00:00:00,,24.8,23.0 +2019-06-11 01:00:00,,24.1,15.0 +2019-06-11 02:00:00,,19.6,15.0 +2019-06-11 03:00:00,7.5,19.1,16.0 +2019-06-11 04:00:00,,29.6,16.0 +2019-06-11 05:00:00,,32.3,13.0 +2019-06-11 06:00:00,,52.7,13.0 +2019-06-11 07:00:00,,58.7,17.0 +2019-06-11 08:00:00,,55.4,18.0 +2019-06-11 09:00:00,,58.0,21.0 +2019-06-11 10:00:00,,43.6,23.0 +2019-06-11 11:00:00,,31.7,22.0 +2019-06-11 12:00:00,,22.1,22.0 +2019-06-11 13:00:00,,17.3,23.0 +2019-06-11 14:00:00,,12.6,26.0 +2019-06-11 15:00:00,,13.1,35.0 +2019-06-11 16:00:00,,16.6,31.0 +2019-06-11 17:00:00,,19.8,31.0 +2019-06-11 18:00:00,,22.6,30.0 +2019-06-11 19:00:00,,35.5,31.0 +2019-06-11 20:00:00,,44.6,30.0 +2019-06-11 21:00:00,,36.1,22.0 +2019-06-11 22:00:00,,42.7,22.0 +2019-06-11 23:00:00,,54.1,20.0 +2019-06-12 00:00:00,,59.4,20.0 +2019-06-12 01:00:00,,41.5,15.0 +2019-06-12 02:00:00,,37.2, +2019-06-12 03:00:00,21.0,41.9, +2019-06-12 04:00:00,,34.7,11.0 +2019-06-12 05:00:00,,36.3,9.0 +2019-06-12 06:00:00,,44.9,9.0 +2019-06-12 07:00:00,,42.7,12.0 +2019-06-12 08:00:00,,38.4,17.0 +2019-06-12 09:00:00,,44.4,20.0 +2019-06-12 10:00:00,,35.5,22.0 +2019-06-12 11:00:00,,26.7,25.0 +2019-06-12 12:00:00,,0.0,35.0 +2019-06-12 13:00:00,,0.0,33.0 +2019-06-12 14:00:00,,15.4,33.0 +2019-06-12 15:00:00,,17.9,35.0 +2019-06-12 16:00:00,,20.3,42.0 +2019-06-12 17:00:00,,16.8,45.0 +2019-06-12 18:00:00,,23.6,43.0 +2019-06-12 19:00:00,,24.2,45.0 +2019-06-12 20:00:00,,25.3,33.0 +2019-06-12 21:00:00,,23.4,41.0 +2019-06-12 22:00:00,,29.2,43.0 +2019-06-12 23:00:00,,29.3, +2019-06-13 00:00:00,,25.6,35.0 +2019-06-13 01:00:00,,26.9,29.0 +2019-06-13 02:00:00,,20.0, +2019-06-13 03:00:00,28.5,18.7,26.0 +2019-06-13 04:00:00,,18.0,26.0 +2019-06-13 05:00:00,,18.8,16.0 +2019-06-13 06:00:00,,24.6,16.0 +2019-06-13 07:00:00,,37.0,19.0 +2019-06-13 08:00:00,,39.8,21.0 +2019-06-13 09:00:00,,40.9,19.0 +2019-06-13 10:00:00,,35.3,16.0 +2019-06-13 11:00:00,,30.2,18.0 +2019-06-13 12:00:00,,24.5,19.0 +2019-06-13 13:00:00,,22.7,19.0 +2019-06-13 14:00:00,,17.9,16.0 +2019-06-13 15:00:00,,18.2,15.0 +2019-06-13 16:00:00,,19.4,13.0 +2019-06-13 17:00:00,,28.8,11.0 +2019-06-13 18:00:00,,36.1,15.0 +2019-06-13 19:00:00,,38.2,14.0 +2019-06-13 20:00:00,,24.0,13.0 +2019-06-13 21:00:00,,27.5,14.0 +2019-06-13 22:00:00,,31.5,15.0 +2019-06-13 23:00:00,,58.8,15.0 +2019-06-14 00:00:00,,77.9,15.0 +2019-06-14 01:00:00,,78.3,13.0 +2019-06-14 02:00:00,,74.2, +2019-06-14 03:00:00,,68.1,8.0 +2019-06-14 04:00:00,,66.6,8.0 +2019-06-14 05:00:00,,48.5,6.0 +2019-06-14 06:00:00,,37.9,6.0 +2019-06-14 07:00:00,,49.3,13.0 +2019-06-14 08:00:00,,64.3,11.0 +2019-06-14 09:00:00,,51.5,11.0 +2019-06-14 10:00:00,,34.3,14.0 +2019-06-14 11:00:00,36.5,27.9,13.0 +2019-06-14 12:00:00,,25.1,13.0 +2019-06-14 13:00:00,,21.8,15.0 +2019-06-14 14:00:00,,17.1,16.0 +2019-06-14 15:00:00,,15.4,22.0 +2019-06-14 16:00:00,,14.2,25.0 +2019-06-14 17:00:00,,15.2,25.0 +2019-06-14 18:00:00,,18.9,26.0 +2019-06-14 19:00:00,,16.6,27.0 +2019-06-14 20:00:00,,19.0,26.0 +2019-06-14 21:00:00,,25.0,26.0 +2019-06-14 22:00:00,,41.9,25.0 +2019-06-14 23:00:00,,55.0,26.0 +2019-06-15 00:00:00,,35.3,26.0 +2019-06-15 01:00:00,,32.1,26.0 +2019-06-15 02:00:00,,29.6, +2019-06-15 03:00:00,17.5,29.0, +2019-06-15 04:00:00,,33.9, +2019-06-15 05:00:00,,,10.0 +2019-06-15 06:00:00,,,10.0 +2019-06-15 07:00:00,,,13.0 +2019-06-15 08:00:00,,35.8,13.0 +2019-06-15 09:00:00,,24.1,8.0 +2019-06-15 10:00:00,,17.6,8.0 +2019-06-15 11:00:00,,14.0,12.0 +2019-06-15 12:00:00,,12.1,14.0 +2019-06-15 13:00:00,,11.1,13.0 +2019-06-15 14:00:00,,9.4,18.0 +2019-06-15 15:00:00,,9.0,17.0 +2019-06-15 16:00:00,,9.6,18.0 +2019-06-15 17:00:00,,10.5,18.0 +2019-06-15 18:00:00,,10.7,20.0 +2019-06-15 19:00:00,,11.1,22.0 +2019-06-15 20:00:00,,14.0,22.0 +2019-06-15 21:00:00,,14.2,21.0 +2019-06-15 22:00:00,,15.2,20.0 +2019-06-15 23:00:00,,17.2,19.0 +2019-06-16 00:00:00,,20.1,19.0 +2019-06-16 01:00:00,,22.6,15.0 +2019-06-16 02:00:00,,16.5,15.0 +2019-06-16 03:00:00,42.5,12.8,12.0 +2019-06-16 04:00:00,,11.4,12.0 +2019-06-16 05:00:00,,11.2,10.0 +2019-06-16 06:00:00,,11.7,10.0 +2019-06-16 07:00:00,,14.0,8.0 +2019-06-16 08:00:00,,11.6,5.0 +2019-06-16 09:00:00,,10.2,4.0 +2019-06-16 10:00:00,,9.9,5.0 +2019-06-16 11:00:00,,9.4,6.0 +2019-06-16 12:00:00,,8.7,6.0 +2019-06-16 13:00:00,,12.9,10.0 +2019-06-16 14:00:00,,11.2,16.0 +2019-06-16 15:00:00,,8.7,23.0 +2019-06-16 16:00:00,,8.1,26.0 +2019-06-16 17:00:00,,8.4,29.0 +2019-06-16 18:00:00,,9.2,29.0 +2019-06-16 19:00:00,,11.8,28.0 +2019-06-16 20:00:00,,12.3,28.0 +2019-06-16 21:00:00,,14.4,27.0 +2019-06-16 22:00:00,,23.3,25.0 +2019-06-16 23:00:00,,42.7, +2019-06-17 00:00:00,,56.6,23.0 +2019-06-17 01:00:00,,67.3,17.0 +2019-06-17 02:00:00,,69.3,17.0 +2019-06-17 03:00:00,42.0,58.8,14.0 +2019-06-17 04:00:00,35.5,53.1,14.0 +2019-06-17 05:00:00,36.0,49.1,11.0 +2019-06-17 06:00:00,39.5,45.7,11.0 +2019-06-17 07:00:00,42.5,44.8,12.0 +2019-06-17 08:00:00,43.5,52.3,13.0 +2019-06-17 09:00:00,45.0,54.4,13.0 +2019-06-17 10:00:00,41.0,51.6,11.0 +2019-06-17 11:00:00,,30.4,11.0 +2019-06-17 12:00:00,,16.0,11.0 +2019-06-17 13:00:00,,15.2, +2019-06-17 14:00:00,,10.1, +2019-06-17 15:00:00,,9.6, +2019-06-17 16:00:00,,11.5, +2019-06-17 17:00:00,,13.1, +2019-06-17 18:00:00,,11.9, +2019-06-17 19:00:00,,14.9, +2019-06-17 20:00:00,,15.4, +2019-06-17 21:00:00,,15.2, +2019-06-17 22:00:00,,20.5, +2019-06-17 23:00:00,,38.3, +2019-06-18 00:00:00,,51.0, +2019-06-18 01:00:00,,73.3, +2019-06-18 02:00:00,,66.2, +2019-06-18 03:00:00,,60.1, +2019-06-18 04:00:00,,39.8, +2019-06-18 05:00:00,,45.5, +2019-06-18 06:00:00,,26.5, +2019-06-18 07:00:00,,33.8, +2019-06-18 08:00:00,,51.4, +2019-06-18 09:00:00,,52.6, +2019-06-18 10:00:00,,49.6, +2019-06-18 21:00:00,,15.3, +2019-06-18 22:00:00,,17.0, +2019-06-18 23:00:00,,23.1, +2019-06-19 00:00:00,,39.3, +2019-06-19 11:00:00,,27.3, +2019-06-19 12:00:00,,26.6, +2019-06-20 15:00:00,,19.4, +2019-06-20 16:00:00,,20.1, +2019-06-20 17:00:00,,19.3, +2019-06-20 18:00:00,,19.0, +2019-06-20 19:00:00,,23.2, +2019-06-20 20:00:00,,23.9, +2019-06-20 21:00:00,,25.3, +2019-06-20 22:00:00,,21.4, +2019-06-20 23:00:00,,24.9, +2019-06-21 00:00:00,,26.5, +2019-06-21 01:00:00,,21.8, +2019-06-21 02:00:00,,20.0, diff --git a/doc/data/air_quality_no2_long.csv b/doc/data/air_quality_no2_long.csv new file mode 100644 index 00000000..5d959370 --- /dev/null +++ b/doc/data/air_quality_no2_long.csv @@ -0,0 +1,2069 @@ +city,country,date.utc,location,parameter,value,unit +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ diff --git a/doc/data/air_quality_parameters.csv b/doc/data/air_quality_parameters.csv new file mode 100644 index 00000000..915f6300 --- /dev/null +++ b/doc/data/air_quality_parameters.csv @@ -0,0 +1,8 @@ +id,description,name +bc,Black Carbon,BC +co,Carbon Monoxide,CO +no2,Nitrogen Dioxide,NO2 +o3,Ozone,O3 +pm10,Particulate matter less than 10 micrometers in diameter,PM10 +pm25,Particulate matter less than 2.5 micrometers in diameter,PM2.5 +so2,Sulfur Dioxide,SO2 diff --git a/doc/data/air_quality_pm25_long.csv b/doc/data/air_quality_pm25_long.csv new file mode 100644 index 00000000..f74053c2 --- /dev/null +++ b/doc/data/air_quality_pm25_long.csv @@ -0,0 +1,1111 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ diff --git a/doc/data/air_quality_stations.csv b/doc/data/air_quality_stations.csv new file mode 100644 index 00000000..9ab1a377 --- /dev/null +++ b/doc/data/air_quality_stations.csv @@ -0,0 +1,67 @@ +location,coordinates.latitude,coordinates.longitude +BELAL01,51.23619,4.38522 +BELHB23,51.1703,4.341 +BELLD01,51.10998,5.00486 +BELLD02,51.12038,5.02155 +BELR833,51.32766,4.36226 +BELSA04,51.31393,4.40387 +BELWZ02,51.1928,5.22153 +BETM802,51.26099,4.4244 +BETN016,51.23365,5.16398 +BETR801,51.20966,4.43182 +BETR802,51.20952,4.43179 +BETR803,51.22863,4.42845 +BETR805,51.20823,4.42156 +BETR811,51.2521,4.49136 +BETR815,51.2147,4.33221 +BETR817,51.17713,4.41795 +BETR820,51.32042,4.44481 +BETR822,51.26429,4.34128 +BETR831,51.3488,4.33971 +BETR834,51.092,4.3801 +BETR891,51.25581,4.38536 +BETR893,51.28138,4.38577 +BETR894,51.2835,4.3495 +BETR897,51.25011,4.3421 +FR04004,48.89167,2.34667 +FR04012,48.82778,2.3275 +FR04014,48.83724,2.3939 +FR04014,48.83722,2.3939 +FR04031,48.86887,2.31194 +FR04031,48.86889,2.31194 +FR04037,48.82861,2.36028 +FR04060,48.8572,2.2933 +FR04071,48.8564,2.33528 +FR04071,48.85639,2.33528 +FR04118,48.87027,2.3325 +FR04118,48.87029,2.3325 +FR04131,48.87333,2.33028 +FR04135,48.83795,2.40806 +FR04135,48.83796,2.40806 +FR04141,48.85278,2.36056 +FR04141,48.85279,2.36056 +FR04143,48.859,2.351 +FR04143,48.85944,2.35111 +FR04179,48.83038,2.26989 +FR04329,48.8386,2.41279 +FR04329,48.83862,2.41278 +Camden Kerbside,51.54421,-0.17527 +Ealing Horn Lane,51.51895,-0.26562 +Haringey Roadside,51.5993,-0.06822 +London Bexley,51.46603,0.18481 +London Bloomsbury,51.52229,-0.12589 +London Eltham,51.45258,0.07077 +London Haringey Priory Park South,51.58413,-0.12525 +London Harlington,51.48879,-0.44161 +London Harrow Stanmore,51.61733,-0.29878 +London Hillingdon,51.49633,-0.46086 +London Marylebone Road,51.52253,-0.15461 +London N. Kensington,51.52105,-0.21349 +London Teddington,51.42099,-0.33965 +London Teddington Bushy Park,51.42529,-0.34561 +London Westminster,51.49467,-0.13193 +Southend-on-Sea,51.5442,0.67841 +Southwark A2 Old Kent Road,51.4805,-0.05955 +Thurrock,51.47707,0.31797 +Tower Hamlets Roadside,51.52253,-0.04216 +Groton Fort Griswold,41.3536,-72.0789 diff --git a/doc/data/baseball.csv b/doc/data/baseball.csv new file mode 100644 index 00000000..aadbaced --- /dev/null +++ b/doc/data/baseball.csv @@ -0,0 +1,101 @@ +id,player,year,stint,team,lg,g,ab,r,h,X2b,X3b,hr,rbi,sb,cs,bb,so,ibb,hbp,sh,sf,gidp +88641,womacto01,2006,2,CHN,NL,19,50,6,14,1,0,1,2.0,1.0,1.0,4,4.0,0.0,0.0,3.0,0.0,0.0 +88643,schilcu01,2006,1,BOS,AL,31,2,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +88645,myersmi01,2006,1,NYA,AL,62,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +88649,helliri01,2006,1,MIL,NL,20,3,0,0,0,0,0,0.0,0.0,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0 +88650,johnsra05,2006,1,NYA,AL,33,6,0,1,0,0,0,0.0,0.0,0.0,0,4.0,0.0,0.0,0.0,0.0,0.0 +88652,finlest01,2006,1,SFN,NL,139,426,66,105,21,12,6,40.0,7.0,0.0,46,55.0,2.0,2.0,3.0,4.0,6.0 +88653,gonzalu01,2006,1,ARI,NL,153,586,93,159,52,2,15,73.0,0.0,1.0,69,58.0,10.0,7.0,0.0,6.0,14.0 +88662,seleaa01,2006,1,LAN,NL,28,26,2,5,1,0,0,0.0,0.0,0.0,1,7.0,0.0,0.0,6.0,0.0,1.0 +89177,francju01,2007,2,ATL,NL,15,40,1,10,3,0,0,8.0,0.0,0.0,4,10.0,1.0,0.0,0.0,1.0,1.0 +89178,francju01,2007,1,NYN,NL,40,50,7,10,0,0,1,8.0,2.0,1.0,10,13.0,0.0,0.0,0.0,1.0,1.0 +89330,zaungr01,2007,1,TOR,AL,110,331,43,80,24,1,10,52.0,0.0,0.0,51,55.0,8.0,2.0,1.0,6.0,9.0 +89333,witasja01,2007,1,TBA,AL,3,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89334,williwo02,2007,1,HOU,NL,33,59,3,6,0,0,1,2.0,0.0,0.0,0,25.0,0.0,0.0,5.0,0.0,1.0 +89335,wickmbo01,2007,2,ARI,NL,8,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89336,wickmbo01,2007,1,ATL,NL,47,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89337,whitero02,2007,1,MIN,AL,38,109,8,19,4,0,4,20.0,0.0,0.0,6,19.0,0.0,3.0,0.0,1.0,2.0 +89338,whiteri01,2007,1,HOU,NL,20,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89339,wellsda01,2007,2,LAN,NL,7,15,2,4,1,0,0,1.0,0.0,0.0,0,6.0,0.0,0.0,0.0,0.0,0.0 +89340,wellsda01,2007,1,SDN,NL,22,38,1,4,0,0,0,0.0,0.0,0.0,0,12.0,0.0,0.0,4.0,0.0,0.0 +89341,weathda01,2007,1,CIN,NL,67,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89343,walketo04,2007,1,OAK,AL,18,48,5,13,1,0,0,4.0,0.0,0.0,2,4.0,0.0,0.0,0.0,2.0,2.0 +89345,wakefti01,2007,1,BOS,AL,1,2,0,0,0,0,0,0.0,0.0,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0 +89347,vizquom01,2007,1,SFN,NL,145,513,54,126,18,3,4,51.0,14.0,6.0,44,48.0,6.0,1.0,14.0,3.0,14.0 +89348,villoro01,2007,1,NYA,AL,6,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89352,valenjo03,2007,1,NYN,NL,51,166,18,40,11,1,3,18.0,2.0,1.0,15,28.0,4.0,0.0,1.0,1.0,5.0 +89354,trachst01,2007,2,CHN,NL,4,7,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89355,trachst01,2007,1,BAL,AL,3,5,0,0,0,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,0.0 +89359,timlimi01,2007,1,BOS,AL,4,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89360,thomeji01,2007,1,CHA,AL,130,432,79,119,19,0,35,96.0,0.0,1.0,95,134.0,11.0,6.0,0.0,3.0,10.0 +89361,thomafr04,2007,1,TOR,AL,155,531,63,147,30,0,26,95.0,0.0,0.0,81,94.0,3.0,7.0,0.0,5.0,14.0 +89363,tavarju01,2007,1,BOS,AL,2,4,0,1,0,0,0,0.0,0.0,0.0,1,3.0,0.0,0.0,0.0,0.0,0.0 +89365,sweenma01,2007,2,LAN,NL,30,33,2,9,1,0,0,3.0,0.0,0.0,1,11.0,0.0,0.0,0.0,0.0,0.0 +89366,sweenma01,2007,1,SFN,NL,76,90,18,23,8,0,2,10.0,2.0,0.0,13,18.0,0.0,3.0,1.0,0.0,0.0 +89367,suppaje01,2007,1,MIL,NL,33,61,4,8,0,0,0,2.0,0.0,0.0,3,16.0,0.0,0.0,11.0,0.0,2.0 +89368,stinnke01,2007,1,SLN,NL,26,82,7,13,3,0,1,5.0,0.0,0.0,5,22.0,2.0,0.0,0.0,0.0,2.0 +89370,stantmi02,2007,1,CIN,NL,67,2,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89371,stairma01,2007,1,TOR,AL,125,357,58,103,28,1,21,64.0,2.0,1.0,44,66.0,5.0,2.0,0.0,2.0,7.0 +89372,sprinru01,2007,1,SLN,NL,72,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89374,sosasa01,2007,1,TEX,AL,114,412,53,104,24,1,21,92.0,0.0,0.0,34,112.0,3.0,3.0,0.0,5.0,11.0 +89375,smoltjo01,2007,1,ATL,NL,30,54,1,5,1,0,0,2.0,0.0,0.0,1,19.0,0.0,0.0,13.0,0.0,0.0 +89378,sheffga01,2007,1,DET,AL,133,494,107,131,20,1,25,75.0,22.0,5.0,84,71.0,2.0,9.0,0.0,6.0,10.0 +89381,seleaa01,2007,1,NYN,NL,31,4,0,0,0,0,0,0.0,0.0,0.0,1,1.0,0.0,0.0,1.0,0.0,0.0 +89382,seaneru01,2007,1,LAN,NL,68,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89383,schmija01,2007,1,LAN,NL,6,7,1,1,0,0,1,1.0,0.0,0.0,0,4.0,0.0,0.0,1.0,0.0,0.0 +89384,schilcu01,2007,1,BOS,AL,1,2,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89385,sandere02,2007,1,KCA,AL,24,73,12,23,7,0,2,11.0,0.0,1.0,11,15.0,0.0,1.0,0.0,0.0,2.0 +89388,rogerke01,2007,1,DET,AL,1,2,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89389,rodriiv01,2007,1,DET,AL,129,502,50,141,31,3,11,63.0,2.0,2.0,9,96.0,1.0,1.0,1.0,2.0,16.0 +89396,ramirma02,2007,1,BOS,AL,133,483,84,143,33,1,20,88.0,0.0,0.0,71,92.0,13.0,7.0,0.0,8.0,21.0 +89398,piazzmi01,2007,1,OAK,AL,83,309,33,85,17,1,8,44.0,0.0,0.0,18,61.0,0.0,0.0,0.0,2.0,9.0 +89400,perezne01,2007,1,DET,AL,33,64,5,11,3,0,1,6.0,0.0,0.0,4,8.0,0.0,0.0,3.0,0.0,2.0 +89402,parkch01,2007,1,NYN,NL,1,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89406,oliveda02,2007,1,LAA,AL,5,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89410,myersmi01,2007,1,NYA,AL,6,1,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89411,mussimi01,2007,1,NYA,AL,2,2,0,0,0,0,0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0 +89412,moyerja01,2007,1,PHI,NL,33,73,4,9,2,0,0,2.0,0.0,0.0,2,26.0,0.0,0.0,8.0,0.0,1.0 +89420,mesajo01,2007,1,PHI,NL,38,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89421,martipe02,2007,1,NYN,NL,5,9,1,1,1,0,0,0.0,0.0,0.0,0,6.0,0.0,0.0,2.0,0.0,0.0 +89425,maddugr01,2007,1,SDN,NL,33,62,2,9,2,0,0,0.0,1.0,0.0,1,19.0,0.0,0.0,9.0,0.0,2.0 +89426,mabryjo01,2007,1,COL,NL,28,34,4,4,1,0,1,5.0,0.0,0.0,5,10.0,0.0,0.0,0.0,0.0,1.0 +89429,loftoke01,2007,2,CLE,AL,52,173,24,49,9,3,0,15.0,2.0,3.0,17,23.0,0.0,0.0,4.0,2.0,1.0 +89430,loftoke01,2007,1,TEX,AL,84,317,62,96,16,3,7,23.0,21.0,4.0,39,28.0,1.0,2.0,2.0,3.0,5.0 +89431,loaizes01,2007,1,LAN,NL,5,7,0,1,0,0,0,2.0,0.0,0.0,0,2.0,0.0,0.0,2.0,0.0,1.0 +89438,kleskry01,2007,1,SFN,NL,116,362,51,94,27,3,6,44.0,5.0,1.0,46,68.0,2.0,1.0,1.0,1.0,14.0 +89439,kentje01,2007,1,LAN,NL,136,494,78,149,36,1,20,79.0,1.0,3.0,57,61.0,4.0,5.0,0.0,6.0,17.0 +89442,jonesto02,2007,1,DET,AL,5,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89445,johnsra05,2007,1,ARI,NL,10,15,0,1,0,0,0,0.0,0.0,0.0,1,7.0,0.0,0.0,2.0,0.0,0.0 +89450,hoffmtr01,2007,1,SDN,NL,60,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89451,hernaro01,2007,2,LAN,NL,22,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89452,hernaro01,2007,1,CLE,AL,2,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89460,guarded01,2007,1,CIN,NL,15,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89462,griffke02,2007,1,CIN,NL,144,528,78,146,24,1,30,93.0,6.0,1.0,85,99.0,14.0,1.0,0.0,9.0,14.0 +89463,greensh01,2007,1,NYN,NL,130,446,62,130,30,1,10,46.0,11.0,1.0,37,62.0,4.0,5.0,1.0,1.0,14.0 +89464,graffto01,2007,1,MIL,NL,86,231,34,55,8,0,9,30.0,0.0,1.0,24,44.0,6.0,3.0,0.0,2.0,7.0 +89465,gordoto01,2007,1,PHI,NL,44,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89466,gonzalu01,2007,1,LAN,NL,139,464,70,129,23,2,15,68.0,6.0,2.0,56,56.0,4.0,4.0,0.0,2.0,11.0 +89467,gomezch02,2007,2,CLE,AL,19,53,4,15,2,0,0,5.0,0.0,0.0,0,6.0,0.0,0.0,1.0,1.0,1.0 +89468,gomezch02,2007,1,BAL,AL,73,169,17,51,10,1,1,16.0,1.0,2.0,10,20.0,1.0,0.0,5.0,1.0,5.0 +89469,glavito02,2007,1,NYN,NL,33,56,3,12,1,0,0,4.0,0.0,0.0,6,5.0,0.0,0.0,12.0,1.0,0.0 +89473,floydcl01,2007,1,CHN,NL,108,282,40,80,10,1,9,45.0,0.0,0.0,35,47.0,5.0,5.0,0.0,0.0,6.0 +89474,finlest01,2007,1,COL,NL,43,94,9,17,3,0,1,2.0,0.0,0.0,8,4.0,1.0,0.0,0.0,0.0,2.0 +89480,embreal01,2007,1,OAK,AL,4,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89481,edmonji01,2007,1,SLN,NL,117,365,39,92,15,2,12,53.0,0.0,2.0,41,75.0,2.0,0.0,2.0,3.0,9.0 +89482,easleda01,2007,1,NYN,NL,76,193,24,54,6,0,10,26.0,0.0,1.0,19,35.0,1.0,5.0,0.0,1.0,2.0 +89489,delgaca01,2007,1,NYN,NL,139,538,71,139,30,0,24,87.0,4.0,0.0,52,118.0,8.0,11.0,0.0,6.0,12.0 +89493,cormirh01,2007,1,CIN,NL,6,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89494,coninje01,2007,2,NYN,NL,21,41,2,8,2,0,0,5.0,0.0,0.0,7,8.0,2.0,0.0,1.0,1.0,1.0 +89495,coninje01,2007,1,CIN,NL,80,215,23,57,11,1,6,32.0,4.0,0.0,20,28.0,0.0,0.0,1.0,6.0,4.0 +89497,clemero02,2007,1,NYA,AL,2,2,0,1,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89498,claytro01,2007,2,BOS,AL,8,6,1,0,0,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,2.0 +89499,claytro01,2007,1,TOR,AL,69,189,23,48,14,0,1,12.0,2.0,1.0,14,50.0,0.0,1.0,3.0,3.0,8.0 +89501,cirilje01,2007,2,ARI,NL,28,40,6,8,4,0,0,6.0,0.0,0.0,4,6.0,0.0,0.0,0.0,0.0,1.0 +89502,cirilje01,2007,1,MIN,AL,50,153,18,40,9,2,2,21.0,2.0,0.0,15,13.0,0.0,1.0,3.0,2.0,9.0 +89521,bondsba01,2007,1,SFN,NL,126,340,75,94,14,0,28,66.0,5.0,0.0,132,54.0,43.0,3.0,0.0,2.0,13.0 +89523,biggicr01,2007,1,HOU,NL,141,517,68,130,31,3,10,50.0,4.0,3.0,23,112.0,0.0,3.0,7.0,5.0,5.0 +89525,benitar01,2007,2,FLO,NL,34,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89526,benitar01,2007,1,SFN,NL,19,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89530,ausmubr01,2007,1,HOU,NL,117,349,38,82,16,3,3,25.0,6.0,1.0,37,74.0,3.0,6.0,4.0,1.0,11.0 +89533,aloumo01,2007,1,NYN,NL,87,328,51,112,19,1,13,49.0,3.0,0.0,27,30.0,5.0,2.0,0.0,3.0,13.0 +89534,alomasa02,2007,1,NYN,NL,8,22,1,3,1,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,0.0 diff --git a/doc/data/iris.data b/doc/data/iris.data new file mode 100644 index 00000000..026e214e --- /dev/null +++ b/doc/data/iris.data @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica diff --git a/doc/data/tips.csv b/doc/data/tips.csv new file mode 100644 index 00000000..856a65a6 --- /dev/null +++ b/doc/data/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/doc/data/titanic.csv b/doc/data/titanic.csv new file mode 100644 index 00000000..5cc466e9 --- /dev/null +++ b/doc/data/titanic.csv @@ -0,0 +1,892 @@ +PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked +1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S +2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S +4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S +6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q +7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S +8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S +9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S +10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C +11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S +12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S +13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S +14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S +15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S +16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S +17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q +18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S +19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S +20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C +21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S +22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S +23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q +24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S +25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S +26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S +27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C +28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S +29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q +30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S +31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C +32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C +33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q +34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S +35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C +36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S +37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C +38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S +39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S +40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C +41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S +42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S +43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C +44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C +45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q +46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S +47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q +48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q +49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C +50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S +51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S +52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S +53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C +54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S +55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C +56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S +57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S +58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C +59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S +60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S +61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C +62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, +63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S +64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S +65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C +66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C +67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S +68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S +69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S +70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S +71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S +72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S +73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S +74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C +75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S +76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S +77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S +78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S +79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S +80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S +81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S +82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S +83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q +84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S +85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S +86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S +87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S +88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S +89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S +90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S +91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S +92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S +93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S +94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S +95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S +96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S +97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C +98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C +99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S +100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S +101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S +102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S +103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S +104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S +105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S +106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S +107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S +108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S +109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S +110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q +111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S +112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C +113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S +114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S +115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C +116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S +117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q +118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S +119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C +120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S +121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S +122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S +123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C +124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S +125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S +126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C +127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q +128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S +129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C +130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S +131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C +132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S +133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S +134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S +135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S +136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C +137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S +138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S +139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S +140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C +141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C +142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S +143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S +144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q +145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S +146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S +147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S +148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S +149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S +150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S +151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S +152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S +153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S +154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S +155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S +156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C +157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q +158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S +159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S +160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S +161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S +162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S +163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S +164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S +165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S +166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S +167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S +168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S +169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S +170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S +171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S +172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q +173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S +174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S +175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C +176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S +177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S +178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C +179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S +180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S +181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S +182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C +183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S +184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S +185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S +186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S +187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q +188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S +189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q +190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S +191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S +192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S +193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S +194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S +195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C +196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C +197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q +198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S +199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q +200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S +201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S +202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S +203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S +204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C +205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S +206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S +207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S +208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C +209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q +210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C +211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S +212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S +213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S +214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S +215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q +216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C +217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S +218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S +219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C +220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S +221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S +222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S +223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S +224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S +225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S +226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S +227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S +228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S +229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S +230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S +231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S +232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S +233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S +234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S +235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S +236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S +237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S +238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S +239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S +240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S +241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C +242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q +243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S +244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S +245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C +246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q +247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S +248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S +249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S +250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S +251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S +252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S +253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S +254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S +255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S +256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C +257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C +258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S +259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C +260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S +261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q +262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S +263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S +264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S +265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q +266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S +267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S +268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S +269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S +270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S +271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S +272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S +273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S +274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C +275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q +276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S +277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S +278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S +279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q +280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S +281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q +282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S +283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S +284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S +285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S +286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C +287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S +288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S +289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S +290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q +291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S +292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C +293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C +294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S +295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S +296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C +297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C +298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S +299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S +300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C +301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q +302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q +303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S +304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q +305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S +306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S +307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C +308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C +309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C +310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C +311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C +312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C +313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S +314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S +315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S +316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S +317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S +318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S +319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S +320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C +321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S +322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S +323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q +324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S +325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S +326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C +327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S +328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S +329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S +330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C +331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q +332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S +333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S +334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S +335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S +336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S +337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S +338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C +339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S +340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S +341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S +342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S +343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S +344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S +345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S +346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S +347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S +348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S +349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S +350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S +351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S +352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S +353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C +354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S +355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C +356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S +357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S +358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S +359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q +360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q +361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S +362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C +363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C +364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S +365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q +366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S +367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C +368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C +369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q +370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C +371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C +372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S +373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S +374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C +375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S +376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C +377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S +378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C +379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C +380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S +381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C +382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C +383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S +384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S +385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S +386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S +387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S +388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S +389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q +390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C +391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S +392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S +393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S +394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C +395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S +396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S +397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S +398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S +399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S +400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S +401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S +402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S +403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S +404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S +405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S +406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S +407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S +408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S +409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S +410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S +411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S +412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q +413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q +414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S +415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S +416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S +417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S +418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S +419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S +420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S +421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C +422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q +423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S +424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S +425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S +426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S +427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S +428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S +429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q +430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S +431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S +432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S +433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S +434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S +435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S +436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S +437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S +438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S +439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S +440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S +441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S +442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S +443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S +444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S +445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S +446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S +447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S +448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S +449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C +450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S +451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S +452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S +453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C +454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C +455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S +456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C +457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S +458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S +459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S +460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q +461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S +462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S +463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S +464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S +465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S +466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S +467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S +468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S +469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q +470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C +471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S +472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S +473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S +474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C +475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S +476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S +477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S +478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S +479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S +480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S +481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S +482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S +483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S +484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S +485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C +486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S +487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S +488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C +489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S +490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S +491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S +492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S +493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S +494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C +495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S +496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C +497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C +498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S +499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S +500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S +501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S +502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q +503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q +504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S +505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S +506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C +507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S +508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S +509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S +510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S +511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q +512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S +513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S +514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C +515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S +516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S +517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S +518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q +519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S +520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S +521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S +522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S +523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C +524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C +525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C +526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q +527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S +528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S +529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S +530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S +531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S +532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C +533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C +534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C +535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S +536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S +537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S +538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C +539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S +540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C +541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S +542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S +543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S +544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S +545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C +546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S +547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S +548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C +549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S +550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S +551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C +552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S +553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q +554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C +555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S +556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S +557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C +558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C +559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S +560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S +561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q +562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S +563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S +564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S +565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S +566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S +567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S +568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S +569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C +570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S +571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S +572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S +573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S +574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q +575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S +576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S +577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S +578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S +579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C +580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S +581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S +582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C +583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S +584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C +585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C +586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S +587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S +588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C +589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S +590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S +591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S +592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C +593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S +594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q +595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S +596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S +597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S +598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S +599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C +600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C +601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S +602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S +603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S +604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S +605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C +606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S +607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S +608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S +609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C +610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S +611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S +612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S +613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q +614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q +615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S +616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S +617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S +618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S +619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S +620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S +621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C +622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S +623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C +624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S +625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S +626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S +627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q +628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S +629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S +630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q +631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S +632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S +633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C +634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S +635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S +636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S +637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S +638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S +639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S +640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S +641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S +642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C +643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S +644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S +645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C +646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C +647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S +648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C +649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S +650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S +651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S +652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S +653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S +654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q +655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q +656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S +657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S +658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q +659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S +660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C +661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S +662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C +663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S +664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S +665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S +666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S +667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S +668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S +669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S +670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S +671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S +672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S +673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S +674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S +675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S +676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S +677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S +678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S +679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S +680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C +681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q +682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C +683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S +684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S +685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S +686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C +687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S +688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S +689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S +690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S +691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S +692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C +693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S +694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C +695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S +696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S +697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S +698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q +699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C +700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S +701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C +702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S +703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C +704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q +705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S +706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S +707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S +708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S +709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S +710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C +711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C +712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S +713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S +714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S +715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S +716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S +717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C +718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S +719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q +720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S +721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S +722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S +723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S +724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S +725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S +726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S +727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S +728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q +729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S +730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S +731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S +732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C +733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S +734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S +735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S +736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S +737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S +738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C +739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S +740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S +741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S +742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S +743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C +744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S +745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S +746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S +747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S +748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S +749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S +750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q +751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S +752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S +753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S +754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S +755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S +756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S +757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S +758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S +759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S +760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S +761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S +762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S +763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C +764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S +765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S +766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S +767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C +768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q +769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q +770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S +771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S +772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S +773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S +774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C +775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S +776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S +777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q +778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S +779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q +780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S +781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C +782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S +783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S +784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S +785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S +786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S +787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S +788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q +789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S +790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C +791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q +792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S +793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S +794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C +795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S +796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S +797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S +798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S +799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C +800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S +801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S +802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S +803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S +804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C +805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S +806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S +807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S +808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S +809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S +810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S +811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S +812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S +813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S +814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S +815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S +816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S +817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S +818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C +819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S +820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S +821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S +822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S +823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S +824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S +825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S +826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q +827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S +828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C +829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q +830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, +831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C +832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S +833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C +834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S +835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S +836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C +837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S +838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S +839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S +840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C +841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S +842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S +843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C +844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C +845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S +846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S +847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S +848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C +849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S +850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C +851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S +852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S +853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C +854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S +855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S +856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S +857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S +858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S +859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C +860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C +861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S +862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S +863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S +864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S +865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S +866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S +867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C +868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S +869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S +870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S +871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S +872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S +873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S +874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S +875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C +876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C +877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S +878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S +879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S +880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C +881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S +882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S +883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S +884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S +885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S +886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q +887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S +888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S +889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S +890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C +891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q diff --git a/doc/make.py b/doc/make.py new file mode 100755 index 00000000..c758c7fc --- /dev/null +++ b/doc/make.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +""" +Python script for building documentation. + +To build the docs you must have all optional dependencies for pandas +installed. See the installation instructions for a list of these. + +Usage +----- + $ python make.py clean + $ python make.py html + $ python make.py latex +""" +import argparse +import csv +import importlib +import os +import shutil +import subprocess +import sys +import webbrowser + +import docutils +import docutils.parsers.rst + +DOC_PATH = os.path.dirname(os.path.abspath(__file__)) +SOURCE_PATH = os.path.join(DOC_PATH, "source") +BUILD_PATH = os.path.join(DOC_PATH, "build") +REDIRECTS_FILE = os.path.join(DOC_PATH, "redirects.csv") + + +class DocBuilder: + """ + Class to wrap the different commands of this script. + + All public methods of this class can be called as parameters of the + script. + """ + + def __init__( + self, + num_jobs="auto", + include_api=True, + whatsnew=False, + single_doc=None, + verbosity=0, + warnings_are_errors=False, + ) -> None: + self.num_jobs = num_jobs + self.include_api = include_api + self.whatsnew = whatsnew + self.verbosity = verbosity + self.warnings_are_errors = warnings_are_errors + + if single_doc: + single_doc = self._process_single_doc(single_doc) + os.environ["SPHINX_PATTERN"] = single_doc + elif not include_api: + os.environ["SPHINX_PATTERN"] = "-api" + elif whatsnew: + os.environ["SPHINX_PATTERN"] = "whatsnew" + + self.single_doc_html = None + if single_doc and single_doc.endswith(".rst"): + self.single_doc_html = os.path.splitext(single_doc)[0] + ".html" + elif single_doc: + self.single_doc_html = f"reference/api/pandas.{single_doc}.html" + + def _process_single_doc(self, single_doc): + """ + Make sure the provided value for --single is a path to an existing + .rst/.ipynb file, or a pandas object that can be imported. + + For example, categorial.rst or pandas.DataFrame.head. For the latter, + return the corresponding file path + (e.g. reference/api/pandas.DataFrame.head.rst). + """ + base_name, extension = os.path.splitext(single_doc) + if extension in (".rst", ".ipynb"): + if os.path.exists(os.path.join(SOURCE_PATH, single_doc)): + return single_doc + else: + raise FileNotFoundError(f"File {single_doc} not found") + + elif single_doc.startswith("pandas."): + try: + obj = pandas # noqa: F821 + for name in single_doc.split("."): + obj = getattr(obj, name) + except AttributeError as err: + raise ImportError(f"Could not import {single_doc}") from err + else: + return single_doc[len("pandas.") :] + else: + raise ValueError( + f"--single={single_doc} not understood. " + "Value should be a valid path to a .rst or .ipynb file, " + "or a valid pandas object " + "(e.g. categorical.rst or pandas.DataFrame.head)" + ) + + @staticmethod + def _run_os(*args): + """ + Execute a command as a OS terminal. + + Parameters + ---------- + *args : list of str + Command and parameters to be executed + + Examples + -------- + >>> DocBuilder()._run_os('python', '--version') + """ + subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr) + + def _sphinx_build(self, kind: str): + """ + Call sphinx to build documentation. + + Attribute `num_jobs` from the class is used. + + Parameters + ---------- + kind : {'html', 'latex'} + + Examples + -------- + >>> DocBuilder(num_jobs=4)._sphinx_build('html') + """ + if kind not in ("html", "latex"): + raise ValueError(f"kind must be html or latex, not {kind}") + + cmd = ["sphinx-build", "-b", kind] + if self.num_jobs: + cmd += ["-j", self.num_jobs] + if self.warnings_are_errors: + cmd += ["-W", "--keep-going"] + if self.verbosity: + cmd.append(f"-{'v' * self.verbosity}") + cmd += [ + "-d", + os.path.join(BUILD_PATH, "doctrees"), + SOURCE_PATH, + os.path.join(BUILD_PATH, kind), + ] + return subprocess.call(cmd) + + def _open_browser(self, single_doc_html): + """ + Open a browser tab showing single + """ + url = os.path.join("file://", DOC_PATH, "build", "html", single_doc_html) + webbrowser.open(url, new=2) + + def _get_page_title(self, page): + """ + Open the rst file `page` and extract its title. + """ + fname = os.path.join(SOURCE_PATH, f"{page}.rst") + option_parser = docutils.frontend.OptionParser( + components=(docutils.parsers.rst.Parser,) + ) + doc = docutils.utils.new_document("", option_parser.get_default_values()) + with open(fname) as f: + data = f.read() + + parser = docutils.parsers.rst.Parser() + # do not generate any warning when parsing the rst + with open(os.devnull, "a") as f: + doc.reporter.stream = f + parser.parse(data, doc) + + section = next( + node for node in doc.children if isinstance(node, docutils.nodes.section) + ) + title = next( + node for node in section.children if isinstance(node, docutils.nodes.title) + ) + + return title.astext() + + def _add_redirects(self): + """ + Create in the build directory an html file with a redirect, + for every row in REDIRECTS_FILE. + """ + with open(REDIRECTS_FILE) as mapping_fd: + reader = csv.reader(mapping_fd) + for row in reader: + if not row or row[0].strip().startswith("#"): + continue + + html_path = os.path.join(BUILD_PATH, "html") + path = os.path.join(html_path, *row[0].split("/")) + ".html" + + if not self.include_api and ( + os.path.join(html_path, "reference") in path + or os.path.join(html_path, "generated") in path + ): + continue + + try: + title = self._get_page_title(row[1]) + except Exception: + # the file can be an ipynb and not an rst, or docutils + # may not be able to read the rst because it has some + # sphinx specific stuff + title = "this page" + + with open(path, "w") as moved_page_fd: + html = f"""\ + + + + + +

+ The page has been moved to {title} +

+ +""" + + moved_page_fd.write(html) + + def html(self): + """ + Build HTML documentation. + """ + ret_code = self._sphinx_build("html") + zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") + if os.path.exists(zip_fname): + os.remove(zip_fname) + + if ret_code == 0: + if self.single_doc_html is not None: + self._open_browser(self.single_doc_html) + else: + self._add_redirects() + if self.whatsnew: + self._open_browser(os.path.join("whatsnew", "index.html")) + + return ret_code + + def latex(self, force=False): + """ + Build PDF documentation. + """ + if sys.platform == "win32": + sys.stderr.write("latex build has not been tested on windows\n") + else: + ret_code = self._sphinx_build("latex") + os.chdir(os.path.join(BUILD_PATH, "latex")) + if force: + for i in range(3): + self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex") + raise SystemExit( + "You should check the file " + '"build/latex/pandas.pdf" for problems.' + ) + else: + self._run_os("make") + return ret_code + + def latex_forced(self): + """ + Build PDF documentation with retries to find missing references. + """ + return self.latex(force=True) + + @staticmethod + def clean(): + """ + Clean documentation generated files. + """ + shutil.rmtree(BUILD_PATH, ignore_errors=True) + shutil.rmtree(os.path.join(SOURCE_PATH, "reference", "api"), ignore_errors=True) + + def zip_html(self): + """ + Compress HTML documentation into a zip file. + """ + zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") + if os.path.exists(zip_fname): + os.remove(zip_fname) + dirname = os.path.join(BUILD_PATH, "html") + fnames = os.listdir(dirname) + os.chdir(dirname) + self._run_os("zip", zip_fname, "-r", "-q", *fnames) + + +def main(): + cmds = [method for method in dir(DocBuilder) if not method.startswith("_")] + + joined = ",".join(cmds) + argparser = argparse.ArgumentParser( + description="pandas documentation builder", epilog=f"Commands: {joined}" + ) + + joined = ", ".join(cmds) + argparser.add_argument( + "command", nargs="?", default="html", help=f"command to run: {joined}" + ) + argparser.add_argument( + "--num-jobs", default="auto", help="number of jobs used by sphinx-build" + ) + argparser.add_argument( + "--no-api", default=False, help="omit api and autosummary", action="store_true" + ) + argparser.add_argument( + "--whatsnew", + default=False, + help="only build whatsnew (and api for links)", + action="store_true", + ) + argparser.add_argument( + "--single", + metavar="FILENAME", + type=str, + default=None, + help=( + "filename (relative to the 'source' folder) of section or method name to " + "compile, e.g. 'development/contributing.rst', " + "'ecosystem.rst', 'pandas.DataFrame.join'" + ), + ) + argparser.add_argument( + "--python-path", type=str, default=os.path.dirname(DOC_PATH), help="path" + ) + argparser.add_argument( + "-v", + action="count", + dest="verbosity", + default=0, + help=( + "increase verbosity (can be repeated), " + "passed to the sphinx build command" + ), + ) + argparser.add_argument( + "--warnings-are-errors", + "-W", + action="store_true", + help="fail if warnings are raised", + ) + args = argparser.parse_args() + + if args.command not in cmds: + joined = ", ".join(cmds) + raise ValueError(f"Unknown command {args.command}. Available options: {joined}") + + # Below we update both os.environ and sys.path. The former is used by + # external libraries (namely Sphinx) to compile this module and resolve + # the import of `python_path` correctly. The latter is used to resolve + # the import within the module, injecting it into the global namespace + os.environ["PYTHONPATH"] = args.python_path + sys.path.insert(0, args.python_path) + globals()["pandas"] = importlib.import_module("pandas") + + # Set the matplotlib backend to the non-interactive Agg backend for all + # child processes. + os.environ["MPLBACKEND"] = "module://matplotlib.backends.backend_agg" + + builder = DocBuilder( + args.num_jobs, + not args.no_api, + args.whatsnew, + args.single, + args.verbosity, + args.warnings_are_errors, + ) + return getattr(builder, args.command)() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/doc/redirects.csv b/doc/redirects.csv new file mode 100644 index 00000000..fda09d76 --- /dev/null +++ b/doc/redirects.csv @@ -0,0 +1,1415 @@ +# This file should contain all the redirects in the documentation +# in the format `,` + +# whatsnew +whatsnew,whatsnew/index +release,whatsnew/index + +# getting started +install,getting_started/install +comparison_with_r,getting_started/comparison/comparison_with_r +comparison_with_sql,getting_started/comparison/comparison_with_sql +comparison_with_sas,getting_started/comparison/comparison_with_sas +comparison_with_stata,getting_started/comparison/comparison_with_stata +overview,getting_started/overview +tutorials,getting_started/tutorials + +# user guide +advanced,user_guide/advanced +categorical,user_guide/categorical +computation,user_guide/computation +cookbook,user_guide/cookbook +enhancingperf,user_guide/enhancingperf +gotchas,user_guide/gotchas +groupby,user_guide/groupby +indexing,user_guide/indexing +integer_na,user_guide/integer_na +io,user_guide/io +merging,user_guide/merging +missing_data,user_guide/missing_data +options,user_guide/options +reshaping,user_guide/reshaping +sparse,user_guide/sparse +style,user_guide/style +text,user_guide/text +timedeltas,user_guide/timedeltas +timeseries,user_guide/timeseries +visualization,user_guide/visualization +10min,user_guide/10min +basics,user_guide/basics +dsintro,user_guide/dsintro + +# development +contributing,development/contributing +contributing_docstring,development/contributing_docstring +developer,development/developer +extending,development/extending +internals,development/internals +development/meeting,community + +# api moved function +reference/api/pandas.io.json.json_normalize,pandas.json_normalize + +# rename due to refactors +reference/api/pandas.core.window.Rolling,pandas.core.window.rolling.Rolling +reference/api/pandas.core.window.Rolling.aggregate,pandas.core.window.rolling.Rolling.aggregate +reference/api/pandas.core.window.Rolling.apply,pandas.core.window.rolling.Rolling.apply +reference/api/pandas.core.window.Rolling.corr,pandas.core.window.rolling.Rolling.corr +reference/api/pandas.core.window.Rolling.count,pandas.core.window.rolling.Rolling.count +reference/api/pandas.core.window.Rolling.cov,pandas.core.window.rolling.Rolling.cov +reference/api/pandas.core.window.Rolling.kurt,pandas.core.window.rolling.Rolling.kurt +reference/api/pandas.core.window.Rolling.max,pandas.core.window.rolling.Rolling.max +reference/api/pandas.core.window.Rolling.mean,pandas.core.window.rolling.Rolling.mean +reference/api/pandas.core.window.Rolling.median,pandas.core.window.rolling.Rolling.median +reference/api/pandas.core.window.Rolling.min,pandas.core.window.rolling.Rolling.min +reference/api/pandas.core.window.Rolling.quantile,pandas.core.window.rolling.Rolling.quantile +reference/api/pandas.core.window.Rolling.skew,pandas.core.window.rolling.Rolling.skew +reference/api/pandas.core.window.Rolling.std,pandas.core.window.rolling.Rolling.std +reference/api/pandas.core.window.Rolling.sum,pandas.core.window.rolling.Rolling.sum +reference/api/pandas.core.window.Rolling.var,pandas.core.window.rolling.Rolling.var + +# api url change (generated -> reference/api rename) +api,reference/index +generated/pandas.api.extensions.ExtensionArray.argsort,../reference/api/pandas.api.extensions.ExtensionArray.argsort +generated/pandas.api.extensions.ExtensionArray.astype,../reference/api/pandas.api.extensions.ExtensionArray.astype +generated/pandas.api.extensions.ExtensionArray.copy,../reference/api/pandas.api.extensions.ExtensionArray.copy +generated/pandas.api.extensions.ExtensionArray.dropna,../reference/api/pandas.api.extensions.ExtensionArray.dropna +generated/pandas.api.extensions.ExtensionArray.dtype,../reference/api/pandas.api.extensions.ExtensionArray.dtype +generated/pandas.api.extensions.ExtensionArray.factorize,../reference/api/pandas.api.extensions.ExtensionArray.factorize +generated/pandas.api.extensions.ExtensionArray.fillna,../reference/api/pandas.api.extensions.ExtensionArray.fillna +generated/pandas.api.extensions.ExtensionArray,../reference/api/pandas.api.extensions.ExtensionArray +generated/pandas.api.extensions.ExtensionArray.isna,../reference/api/pandas.api.extensions.ExtensionArray.isna +generated/pandas.api.extensions.ExtensionArray.nbytes,../reference/api/pandas.api.extensions.ExtensionArray.nbytes +generated/pandas.api.extensions.ExtensionArray.ndim,../reference/api/pandas.api.extensions.ExtensionArray.ndim +generated/pandas.api.extensions.ExtensionArray.shape,../reference/api/pandas.api.extensions.ExtensionArray.shape +generated/pandas.api.extensions.ExtensionArray.take,../reference/api/pandas.api.extensions.ExtensionArray.take +generated/pandas.api.extensions.ExtensionArray.unique,../reference/api/pandas.api.extensions.ExtensionArray.unique +generated/pandas.api.extensions.ExtensionDtype.construct_array_type,../reference/api/pandas.api.extensions.ExtensionDtype.construct_array_type +generated/pandas.api.extensions.ExtensionDtype.construct_from_string,../reference/api/pandas.api.extensions.ExtensionDtype.construct_from_string +generated/pandas.api.extensions.ExtensionDtype,../reference/api/pandas.api.extensions.ExtensionDtype +generated/pandas.api.extensions.ExtensionDtype.is_dtype,../reference/api/pandas.api.extensions.ExtensionDtype.is_dtype +generated/pandas.api.extensions.ExtensionDtype.kind,../reference/api/pandas.api.extensions.ExtensionDtype.kind +generated/pandas.api.extensions.ExtensionDtype.name,../reference/api/pandas.api.extensions.ExtensionDtype.name +generated/pandas.api.extensions.ExtensionDtype.names,../reference/api/pandas.api.extensions.ExtensionDtype.names +generated/pandas.api.extensions.ExtensionDtype.na_value,../reference/api/pandas.api.extensions.ExtensionDtype.na_value +generated/pandas.api.extensions.ExtensionDtype.type,../reference/api/pandas.api.extensions.ExtensionDtype.type +generated/pandas.api.extensions.register_dataframe_accessor,../reference/api/pandas.api.extensions.register_dataframe_accessor +generated/pandas.api.extensions.register_extension_dtype,../reference/api/pandas.api.extensions.register_extension_dtype +generated/pandas.api.extensions.register_index_accessor,../reference/api/pandas.api.extensions.register_index_accessor +generated/pandas.api.extensions.register_series_accessor,../reference/api/pandas.api.extensions.register_series_accessor +generated/pandas.api.types.infer_dtype,../reference/api/pandas.api.types.infer_dtype +generated/pandas.api.types.is_bool_dtype,../reference/api/pandas.api.types.is_bool_dtype +generated/pandas.api.types.is_bool,../reference/api/pandas.api.types.is_bool +generated/pandas.api.types.is_categorical_dtype,../reference/api/pandas.api.types.is_categorical_dtype +generated/pandas.api.types.is_categorical,../reference/api/pandas.api.types.is_categorical +generated/pandas.api.types.is_complex_dtype,../reference/api/pandas.api.types.is_complex_dtype +generated/pandas.api.types.is_complex,../reference/api/pandas.api.types.is_complex +generated/pandas.api.types.is_datetime64_any_dtype,../reference/api/pandas.api.types.is_datetime64_any_dtype +generated/pandas.api.types.is_datetime64_dtype,../reference/api/pandas.api.types.is_datetime64_dtype +generated/pandas.api.types.is_datetime64_ns_dtype,../reference/api/pandas.api.types.is_datetime64_ns_dtype +generated/pandas.api.types.is_datetime64tz_dtype,../reference/api/pandas.api.types.is_datetime64tz_dtype +generated/pandas.api.types.is_datetimetz,../reference/api/pandas.api.types.is_datetimetz +generated/pandas.api.types.is_dict_like,../reference/api/pandas.api.types.is_dict_like +generated/pandas.api.types.is_extension_array_dtype,../reference/api/pandas.api.types.is_extension_array_dtype +generated/pandas.api.types.is_extension_type,../reference/api/pandas.api.types.is_extension_type +generated/pandas.api.types.is_file_like,../reference/api/pandas.api.types.is_file_like +generated/pandas.api.types.is_float_dtype,../reference/api/pandas.api.types.is_float_dtype +generated/pandas.api.types.is_float,../reference/api/pandas.api.types.is_float +generated/pandas.api.types.is_hashable,../reference/api/pandas.api.types.is_hashable +generated/pandas.api.types.is_int64_dtype,../reference/api/pandas.api.types.is_int64_dtype +generated/pandas.api.types.is_integer_dtype,../reference/api/pandas.api.types.is_integer_dtype +generated/pandas.api.types.is_integer,../reference/api/pandas.api.types.is_integer +generated/pandas.api.types.is_interval_dtype,../reference/api/pandas.api.types.is_interval_dtype +generated/pandas.api.types.is_interval,../reference/api/pandas.api.types.is_interval +generated/pandas.api.types.is_iterator,../reference/api/pandas.api.types.is_iterator +generated/pandas.api.types.is_list_like,../reference/api/pandas.api.types.is_list_like +generated/pandas.api.types.is_named_tuple,../reference/api/pandas.api.types.is_named_tuple +generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number +generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype +generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype +generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype +generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period +generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable +generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re +generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar +generated/pandas.api.types.is_signed_integer_dtype,../reference/api/pandas.api.types.is_signed_integer_dtype +generated/pandas.api.types.is_sparse,../reference/api/pandas.api.types.is_sparse +generated/pandas.api.types.is_string_dtype,../reference/api/pandas.api.types.is_string_dtype +generated/pandas.api.types.is_timedelta64_dtype,../reference/api/pandas.api.types.is_timedelta64_dtype +generated/pandas.api.types.is_timedelta64_ns_dtype,../reference/api/pandas.api.types.is_timedelta64_ns_dtype +generated/pandas.api.types.is_unsigned_integer_dtype,../reference/api/pandas.api.types.is_unsigned_integer_dtype +generated/pandas.api.types.pandas_dtype,../reference/api/pandas.api.types.pandas_dtype +generated/pandas.api.types.union_categoricals,../reference/api/pandas.api.types.union_categoricals +generated/pandas.bdate_range,../reference/api/pandas.bdate_range +generated/pandas.Categorical.__array__,../reference/api/pandas.Categorical.__array__ +generated/pandas.Categorical.categories,../reference/api/pandas.Categorical.categories +generated/pandas.Categorical.codes,../reference/api/pandas.Categorical.codes +generated/pandas.CategoricalDtype.categories,../reference/api/pandas.CategoricalDtype.categories +generated/pandas.Categorical.dtype,../reference/api/pandas.Categorical.dtype +generated/pandas.CategoricalDtype,../reference/api/pandas.CategoricalDtype +generated/pandas.CategoricalDtype.ordered,../reference/api/pandas.CategoricalDtype.ordered +generated/pandas.Categorical.from_codes,../reference/api/pandas.Categorical.from_codes +generated/pandas.Categorical,../reference/api/pandas.Categorical +generated/pandas.CategoricalIndex.add_categories,../reference/api/pandas.CategoricalIndex.add_categories +generated/pandas.CategoricalIndex.as_ordered,../reference/api/pandas.CategoricalIndex.as_ordered +generated/pandas.CategoricalIndex.as_unordered,../reference/api/pandas.CategoricalIndex.as_unordered +generated/pandas.CategoricalIndex.categories,../reference/api/pandas.CategoricalIndex.categories +generated/pandas.CategoricalIndex.codes,../reference/api/pandas.CategoricalIndex.codes +generated/pandas.CategoricalIndex.equals,../reference/api/pandas.CategoricalIndex.equals +generated/pandas.CategoricalIndex,../reference/api/pandas.CategoricalIndex +generated/pandas.CategoricalIndex.map,../reference/api/pandas.CategoricalIndex.map +generated/pandas.CategoricalIndex.ordered,../reference/api/pandas.CategoricalIndex.ordered +generated/pandas.CategoricalIndex.remove_categories,../reference/api/pandas.CategoricalIndex.remove_categories +generated/pandas.CategoricalIndex.remove_unused_categories,../reference/api/pandas.CategoricalIndex.remove_unused_categories +generated/pandas.CategoricalIndex.rename_categories,../reference/api/pandas.CategoricalIndex.rename_categories +generated/pandas.CategoricalIndex.reorder_categories,../reference/api/pandas.CategoricalIndex.reorder_categories +generated/pandas.CategoricalIndex.set_categories,../reference/api/pandas.CategoricalIndex.set_categories +generated/pandas.Categorical.ordered,../reference/api/pandas.Categorical.ordered +generated/pandas.concat,../reference/api/pandas.concat +generated/pandas.core.groupby.DataFrameGroupBy.all,../reference/api/pandas.core.groupby.DataFrameGroupBy.all +generated/pandas.core.groupby.DataFrameGroupBy.any,../reference/api/pandas.core.groupby.DataFrameGroupBy.any +generated/pandas.core.groupby.DataFrameGroupBy.bfill,../reference/api/pandas.core.groupby.DataFrameGroupBy.bfill +generated/pandas.core.groupby.DataFrameGroupBy.boxplot,../reference/api/pandas.core.groupby.DataFrameGroupBy.boxplot +generated/pandas.core.groupby.DataFrameGroupBy.corr,../reference/api/pandas.core.groupby.DataFrameGroupBy.corr +generated/pandas.core.groupby.DataFrameGroupBy.corrwith,../reference/api/pandas.core.groupby.DataFrameGroupBy.corrwith +generated/pandas.core.groupby.DataFrameGroupBy.count,../reference/api/pandas.core.groupby.DataFrameGroupBy.count +generated/pandas.core.groupby.DataFrameGroupBy.cov,../reference/api/pandas.core.groupby.DataFrameGroupBy.cov +generated/pandas.core.groupby.DataFrameGroupBy.cummax,../reference/api/pandas.core.groupby.DataFrameGroupBy.cummax +generated/pandas.core.groupby.DataFrameGroupBy.cummin,../reference/api/pandas.core.groupby.DataFrameGroupBy.cummin +generated/pandas.core.groupby.DataFrameGroupBy.cumprod,../reference/api/pandas.core.groupby.DataFrameGroupBy.cumprod +generated/pandas.core.groupby.DataFrameGroupBy.cumsum,../reference/api/pandas.core.groupby.DataFrameGroupBy.cumsum +generated/pandas.core.groupby.DataFrameGroupBy.describe,../reference/api/pandas.core.groupby.DataFrameGroupBy.describe +generated/pandas.core.groupby.DataFrameGroupBy.diff,../reference/api/pandas.core.groupby.DataFrameGroupBy.diff +generated/pandas.core.groupby.DataFrameGroupBy.ffill,../reference/api/pandas.core.groupby.DataFrameGroupBy.ffill +generated/pandas.core.groupby.DataFrameGroupBy.fillna,../reference/api/pandas.core.groupby.DataFrameGroupBy.fillna +generated/pandas.core.groupby.DataFrameGroupBy.filter,../reference/api/pandas.core.groupby.DataFrameGroupBy.filter +generated/pandas.core.groupby.DataFrameGroupBy.hist,../reference/api/pandas.core.groupby.DataFrameGroupBy.hist +generated/pandas.core.groupby.DataFrameGroupBy.idxmax,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmax +generated/pandas.core.groupby.DataFrameGroupBy.idxmin,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmin +generated/pandas.core.groupby.DataFrameGroupBy.mad,../reference/api/pandas.core.groupby.DataFrameGroupBy.mad +generated/pandas.core.groupby.DataFrameGroupBy.pct_change,../reference/api/pandas.core.groupby.DataFrameGroupBy.pct_change +generated/pandas.core.groupby.DataFrameGroupBy.plot,../reference/api/pandas.core.groupby.DataFrameGroupBy.plot +generated/pandas.core.groupby.DataFrameGroupBy.quantile,../reference/api/pandas.core.groupby.DataFrameGroupBy.quantile +generated/pandas.core.groupby.DataFrameGroupBy.rank,../reference/api/pandas.core.groupby.DataFrameGroupBy.rank +generated/pandas.core.groupby.DataFrameGroupBy.resample,../reference/api/pandas.core.groupby.DataFrameGroupBy.resample +generated/pandas.core.groupby.DataFrameGroupBy.shift,../reference/api/pandas.core.groupby.DataFrameGroupBy.shift +generated/pandas.core.groupby.DataFrameGroupBy.size,../reference/api/pandas.core.groupby.DataFrameGroupBy.size +generated/pandas.core.groupby.DataFrameGroupBy.skew,../reference/api/pandas.core.groupby.DataFrameGroupBy.skew +generated/pandas.core.groupby.DataFrameGroupBy.take,../reference/api/pandas.core.groupby.DataFrameGroupBy.take +generated/pandas.core.groupby.DataFrameGroupBy.tshift,../reference/api/pandas.core.groupby.DataFrameGroupBy.tshift +generated/pandas.core.groupby.GroupBy.agg,../reference/api/pandas.core.groupby.GroupBy.agg +generated/pandas.core.groupby.GroupBy.aggregate,../reference/api/pandas.core.groupby.GroupBy.aggregate +generated/pandas.core.groupby.GroupBy.all,../reference/api/pandas.core.groupby.GroupBy.all +generated/pandas.core.groupby.GroupBy.any,../reference/api/pandas.core.groupby.GroupBy.any +generated/pandas.core.groupby.GroupBy.apply,../reference/api/pandas.core.groupby.GroupBy.apply +generated/pandas.core.groupby.GroupBy.bfill,../reference/api/pandas.core.groupby.GroupBy.bfill +generated/pandas.core.groupby.GroupBy.count,../reference/api/pandas.core.groupby.GroupBy.count +generated/pandas.core.groupby.GroupBy.cumcount,../reference/api/pandas.core.groupby.GroupBy.cumcount +generated/pandas.core.groupby.GroupBy.ffill,../reference/api/pandas.core.groupby.GroupBy.ffill +generated/pandas.core.groupby.GroupBy.first,../reference/api/pandas.core.groupby.GroupBy.first +generated/pandas.core.groupby.GroupBy.get_group,../reference/api/pandas.core.groupby.GroupBy.get_group +generated/pandas.core.groupby.GroupBy.groups,../reference/api/pandas.core.groupby.GroupBy.groups +generated/pandas.core.groupby.GroupBy.head,../reference/api/pandas.core.groupby.GroupBy.head +generated/pandas.core.groupby.GroupBy.indices,../reference/api/pandas.core.groupby.GroupBy.indices +generated/pandas.core.groupby.GroupBy.__iter__,../reference/api/pandas.core.groupby.GroupBy.__iter__ +generated/pandas.core.groupby.GroupBy.last,../reference/api/pandas.core.groupby.GroupBy.last +generated/pandas.core.groupby.GroupBy.max,../reference/api/pandas.core.groupby.GroupBy.max +generated/pandas.core.groupby.GroupBy.mean,../reference/api/pandas.core.groupby.GroupBy.mean +generated/pandas.core.groupby.GroupBy.median,../reference/api/pandas.core.groupby.GroupBy.median +generated/pandas.core.groupby.GroupBy.min,../reference/api/pandas.core.groupby.GroupBy.min +generated/pandas.core.groupby.GroupBy.ngroup,../reference/api/pandas.core.groupby.GroupBy.ngroup +generated/pandas.core.groupby.GroupBy.nth,../reference/api/pandas.core.groupby.GroupBy.nth +generated/pandas.core.groupby.GroupBy.ohlc,../reference/api/pandas.core.groupby.GroupBy.ohlc +generated/pandas.core.groupby.GroupBy.pct_change,../reference/api/pandas.core.groupby.GroupBy.pct_change +generated/pandas.core.groupby.GroupBy.pipe,../reference/api/pandas.core.groupby.GroupBy.pipe +generated/pandas.core.groupby.GroupBy.prod,../reference/api/pandas.core.groupby.GroupBy.prod +generated/pandas.core.groupby.GroupBy.rank,../reference/api/pandas.core.groupby.GroupBy.rank +generated/pandas.core.groupby.GroupBy.sem,../reference/api/pandas.core.groupby.GroupBy.sem +generated/pandas.core.groupby.GroupBy.size,../reference/api/pandas.core.groupby.GroupBy.size +generated/pandas.core.groupby.GroupBy.std,../reference/api/pandas.core.groupby.GroupBy.std +generated/pandas.core.groupby.GroupBy.sum,../reference/api/pandas.core.groupby.GroupBy.sum +generated/pandas.core.groupby.GroupBy.tail,../reference/api/pandas.core.groupby.GroupBy.tail +generated/pandas.core.groupby.GroupBy.transform,../reference/api/pandas.core.groupby.GroupBy.transform +generated/pandas.core.groupby.GroupBy.var,../reference/api/pandas.core.groupby.GroupBy.var +generated/pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing,../reference/api/pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing +generated/pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing,../reference/api/pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing +generated/pandas.core.groupby.SeriesGroupBy.nlargest,../reference/api/pandas.core.groupby.SeriesGroupBy.nlargest +generated/pandas.core.groupby.SeriesGroupBy.nsmallest,../reference/api/pandas.core.groupby.SeriesGroupBy.nsmallest +generated/pandas.core.groupby.SeriesGroupBy.nunique,../reference/api/pandas.core.groupby.SeriesGroupBy.nunique +generated/pandas.core.groupby.SeriesGroupBy.unique,../reference/api/pandas.core.groupby.SeriesGroupBy.unique +generated/pandas.core.groupby.SeriesGroupBy.value_counts,../reference/api/pandas.core.groupby.SeriesGroupBy.value_counts +generated/pandas.core.resample.Resampler.aggregate,../reference/api/pandas.core.resample.Resampler.aggregate +generated/pandas.core.resample.Resampler.apply,../reference/api/pandas.core.resample.Resampler.apply +generated/pandas.core.resample.Resampler.asfreq,../reference/api/pandas.core.resample.Resampler.asfreq +generated/pandas.core.resample.Resampler.backfill,../reference/api/pandas.core.resample.Resampler.backfill +generated/pandas.core.resample.Resampler.bfill,../reference/api/pandas.core.resample.Resampler.bfill +generated/pandas.core.resample.Resampler.count,../reference/api/pandas.core.resample.Resampler.count +generated/pandas.core.resample.Resampler.ffill,../reference/api/pandas.core.resample.Resampler.ffill +generated/pandas.core.resample.Resampler.fillna,../reference/api/pandas.core.resample.Resampler.fillna +generated/pandas.core.resample.Resampler.first,../reference/api/pandas.core.resample.Resampler.first +generated/pandas.core.resample.Resampler.get_group,../reference/api/pandas.core.resample.Resampler.get_group +generated/pandas.core.resample.Resampler.groups,../reference/api/pandas.core.resample.Resampler.groups +generated/pandas.core.resample.Resampler.indices,../reference/api/pandas.core.resample.Resampler.indices +generated/pandas.core.resample.Resampler.interpolate,../reference/api/pandas.core.resample.Resampler.interpolate +generated/pandas.core.resample.Resampler.__iter__,../reference/api/pandas.core.resample.Resampler.__iter__ +generated/pandas.core.resample.Resampler.last,../reference/api/pandas.core.resample.Resampler.last +generated/pandas.core.resample.Resampler.max,../reference/api/pandas.core.resample.Resampler.max +generated/pandas.core.resample.Resampler.mean,../reference/api/pandas.core.resample.Resampler.mean +generated/pandas.core.resample.Resampler.median,../reference/api/pandas.core.resample.Resampler.median +generated/pandas.core.resample.Resampler.min,../reference/api/pandas.core.resample.Resampler.min +generated/pandas.core.resample.Resampler.nearest,../reference/api/pandas.core.resample.Resampler.nearest +generated/pandas.core.resample.Resampler.nunique,../reference/api/pandas.core.resample.Resampler.nunique +generated/pandas.core.resample.Resampler.ohlc,../reference/api/pandas.core.resample.Resampler.ohlc +generated/pandas.core.resample.Resampler.pad,../reference/api/pandas.core.resample.Resampler.pad +generated/pandas.core.resample.Resampler.pipe,../reference/api/pandas.core.resample.Resampler.pipe +generated/pandas.core.resample.Resampler.prod,../reference/api/pandas.core.resample.Resampler.prod +generated/pandas.core.resample.Resampler.quantile,../reference/api/pandas.core.resample.Resampler.quantile +generated/pandas.core.resample.Resampler.sem,../reference/api/pandas.core.resample.Resampler.sem +generated/pandas.core.resample.Resampler.size,../reference/api/pandas.core.resample.Resampler.size +generated/pandas.core.resample.Resampler.std,../reference/api/pandas.core.resample.Resampler.std +generated/pandas.core.resample.Resampler.sum,../reference/api/pandas.core.resample.Resampler.sum +generated/pandas.core.resample.Resampler.transform,../reference/api/pandas.core.resample.Resampler.transform +generated/pandas.core.resample.Resampler.var,../reference/api/pandas.core.resample.Resampler.var +generated/pandas.core.window.ExponentialMovingWindow.corr,../reference/api/pandas.core.window.ExponentialMovingWindow.corr +generated/pandas.core.window.ExponentialMovingWindow.cov,../reference/api/pandas.core.window.ExponentialMovingWindow.cov +generated/pandas.core.window.ExponentialMovingWindow.mean,../reference/api/pandas.core.window.ExponentialMovingWindow.mean +generated/pandas.core.window.ExponentialMovingWindow.std,../reference/api/pandas.core.window.ExponentialMovingWindow.std +generated/pandas.core.window.ExponentialMovingWindow.var,../reference/api/pandas.core.window.ExponentialMovingWindow.var +generated/pandas.core.window.Expanding.aggregate,../reference/api/pandas.core.window.Expanding.aggregate +generated/pandas.core.window.Expanding.apply,../reference/api/pandas.core.window.Expanding.apply +generated/pandas.core.window.Expanding.corr,../reference/api/pandas.core.window.Expanding.corr +generated/pandas.core.window.Expanding.count,../reference/api/pandas.core.window.Expanding.count +generated/pandas.core.window.Expanding.cov,../reference/api/pandas.core.window.Expanding.cov +generated/pandas.core.window.Expanding.kurt,../reference/api/pandas.core.window.Expanding.kurt +generated/pandas.core.window.Expanding.max,../reference/api/pandas.core.window.Expanding.max +generated/pandas.core.window.Expanding.mean,../reference/api/pandas.core.window.Expanding.mean +generated/pandas.core.window.Expanding.median,../reference/api/pandas.core.window.Expanding.median +generated/pandas.core.window.Expanding.min,../reference/api/pandas.core.window.Expanding.min +generated/pandas.core.window.Expanding.quantile,../reference/api/pandas.core.window.Expanding.quantile +generated/pandas.core.window.Expanding.skew,../reference/api/pandas.core.window.Expanding.skew +generated/pandas.core.window.Expanding.std,../reference/api/pandas.core.window.Expanding.std +generated/pandas.core.window.Expanding.sum,../reference/api/pandas.core.window.Expanding.sum +generated/pandas.core.window.Expanding.var,../reference/api/pandas.core.window.Expanding.var +generated/pandas.core.window.Rolling.aggregate,../reference/api/pandas.core.window.rolling.Rolling.aggregate +generated/pandas.core.window.Rolling.apply,../reference/api/pandas.core.window.rolling.Rolling.apply +generated/pandas.core.window.Rolling.corr,../reference/api/pandas.core.window.rolling.Rolling.corr +generated/pandas.core.window.Rolling.count,../reference/api/pandas.core.window.rolling.Rolling.count +generated/pandas.core.window.Rolling.cov,../reference/api/pandas.core.window.rolling.Rolling.cov +generated/pandas.core.window.Rolling.kurt,../reference/api/pandas.core.window.rolling.Rolling.kurt +generated/pandas.core.window.Rolling.max,../reference/api/pandas.core.window.rolling.Rolling.max +generated/pandas.core.window.Rolling.mean,../reference/api/pandas.core.window.rolling.Rolling.mean +generated/pandas.core.window.Rolling.median,../reference/api/pandas.core.window.rolling.Rolling.median +generated/pandas.core.window.Rolling.min,../reference/api/pandas.core.window.rolling.Rolling.min +generated/pandas.core.window.Rolling.quantile,../reference/api/pandas.core.window.rolling.Rolling.quantile +generated/pandas.core.window.Rolling.skew,../reference/api/pandas.core.window.rolling.Rolling.skew +generated/pandas.core.window.Rolling.std,../reference/api/pandas.core.window.rolling.Rolling.std +generated/pandas.core.window.Rolling.sum,../reference/api/pandas.core.window.rolling.Rolling.sum +generated/pandas.core.window.Rolling.var,../reference/api/pandas.core.window.rolling.Rolling.var +generated/pandas.core.window.Window.mean,../reference/api/pandas.core.window.Window.mean +generated/pandas.core.window.Window.sum,../reference/api/pandas.core.window.Window.sum +generated/pandas.crosstab,../reference/api/pandas.crosstab +generated/pandas.cut,../reference/api/pandas.cut +generated/pandas.DataFrame.abs,../reference/api/pandas.DataFrame.abs +generated/pandas.DataFrame.add,../reference/api/pandas.DataFrame.add +generated/pandas.DataFrame.add_prefix,../reference/api/pandas.DataFrame.add_prefix +generated/pandas.DataFrame.add_suffix,../reference/api/pandas.DataFrame.add_suffix +generated/pandas.DataFrame.agg,../reference/api/pandas.DataFrame.agg +generated/pandas.DataFrame.aggregate,../reference/api/pandas.DataFrame.aggregate +generated/pandas.DataFrame.align,../reference/api/pandas.DataFrame.align +generated/pandas.DataFrame.all,../reference/api/pandas.DataFrame.all +generated/pandas.DataFrame.any,../reference/api/pandas.DataFrame.any +generated/pandas.DataFrame.append,../reference/api/pandas.DataFrame.append +generated/pandas.DataFrame.apply,../reference/api/pandas.DataFrame.apply +generated/pandas.DataFrame.applymap,../reference/api/pandas.DataFrame.applymap +generated/pandas.DataFrame.as_blocks,../reference/api/pandas.DataFrame.as_blocks +generated/pandas.DataFrame.asfreq,../reference/api/pandas.DataFrame.asfreq +generated/pandas.DataFrame.as_matrix,../reference/api/pandas.DataFrame.as_matrix +generated/pandas.DataFrame.asof,../reference/api/pandas.DataFrame.asof +generated/pandas.DataFrame.assign,../reference/api/pandas.DataFrame.assign +generated/pandas.DataFrame.astype,../reference/api/pandas.DataFrame.astype +generated/pandas.DataFrame.at,../reference/api/pandas.DataFrame.at +generated/pandas.DataFrame.at_time,../reference/api/pandas.DataFrame.at_time +generated/pandas.DataFrame.axes,../reference/api/pandas.DataFrame.axes +generated/pandas.DataFrame.between_time,../reference/api/pandas.DataFrame.between_time +generated/pandas.DataFrame.bfill,../reference/api/pandas.DataFrame.bfill +generated/pandas.DataFrame.blocks,../reference/api/pandas.DataFrame.blocks +generated/pandas.DataFrame.bool,../reference/api/pandas.DataFrame.bool +generated/pandas.DataFrame.boxplot,../reference/api/pandas.DataFrame.boxplot +generated/pandas.DataFrame.clip,../reference/api/pandas.DataFrame.clip +generated/pandas.DataFrame.clip_lower,../reference/api/pandas.DataFrame.clip_lower +generated/pandas.DataFrame.clip_upper,../reference/api/pandas.DataFrame.clip_upper +generated/pandas.DataFrame.columns,../reference/api/pandas.DataFrame.columns +generated/pandas.DataFrame.combine_first,../reference/api/pandas.DataFrame.combine_first +generated/pandas.DataFrame.combine,../reference/api/pandas.DataFrame.combine +generated/pandas.DataFrame.convert_objects,../reference/api/pandas.DataFrame.convert_objects +generated/pandas.DataFrame.copy,../reference/api/pandas.DataFrame.copy +generated/pandas.DataFrame.corr,../reference/api/pandas.DataFrame.corr +generated/pandas.DataFrame.corrwith,../reference/api/pandas.DataFrame.corrwith +generated/pandas.DataFrame.count,../reference/api/pandas.DataFrame.count +generated/pandas.DataFrame.cov,../reference/api/pandas.DataFrame.cov +generated/pandas.DataFrame.cummax,../reference/api/pandas.DataFrame.cummax +generated/pandas.DataFrame.cummin,../reference/api/pandas.DataFrame.cummin +generated/pandas.DataFrame.cumprod,../reference/api/pandas.DataFrame.cumprod +generated/pandas.DataFrame.cumsum,../reference/api/pandas.DataFrame.cumsum +generated/pandas.DataFrame.describe,../reference/api/pandas.DataFrame.describe +generated/pandas.DataFrame.diff,../reference/api/pandas.DataFrame.diff +generated/pandas.DataFrame.div,../reference/api/pandas.DataFrame.div +generated/pandas.DataFrame.divide,../reference/api/pandas.DataFrame.divide +generated/pandas.DataFrame.dot,../reference/api/pandas.DataFrame.dot +generated/pandas.DataFrame.drop_duplicates,../reference/api/pandas.DataFrame.drop_duplicates +generated/pandas.DataFrame.drop,../reference/api/pandas.DataFrame.drop +generated/pandas.DataFrame.droplevel,../reference/api/pandas.DataFrame.droplevel +generated/pandas.DataFrame.dropna,../reference/api/pandas.DataFrame.dropna +generated/pandas.DataFrame.dtypes,../reference/api/pandas.DataFrame.dtypes +generated/pandas.DataFrame.duplicated,../reference/api/pandas.DataFrame.duplicated +generated/pandas.DataFrame.empty,../reference/api/pandas.DataFrame.empty +generated/pandas.DataFrame.eq,../reference/api/pandas.DataFrame.eq +generated/pandas.DataFrame.equals,../reference/api/pandas.DataFrame.equals +generated/pandas.DataFrame.eval,../reference/api/pandas.DataFrame.eval +generated/pandas.DataFrame.ewm,../reference/api/pandas.DataFrame.ewm +generated/pandas.DataFrame.expanding,../reference/api/pandas.DataFrame.expanding +generated/pandas.DataFrame.ffill,../reference/api/pandas.DataFrame.ffill +generated/pandas.DataFrame.fillna,../reference/api/pandas.DataFrame.fillna +generated/pandas.DataFrame.filter,../reference/api/pandas.DataFrame.filter +generated/pandas.DataFrame.first,../reference/api/pandas.DataFrame.first +generated/pandas.DataFrame.first_valid_index,../reference/api/pandas.DataFrame.first_valid_index +generated/pandas.DataFrame.floordiv,../reference/api/pandas.DataFrame.floordiv +generated/pandas.DataFrame.from_csv,../reference/api/pandas.DataFrame.from_csv +generated/pandas.DataFrame.from_dict,../reference/api/pandas.DataFrame.from_dict +generated/pandas.DataFrame.from_items,../reference/api/pandas.DataFrame.from_items +generated/pandas.DataFrame.from_records,../reference/api/pandas.DataFrame.from_records +generated/pandas.DataFrame.ge,../reference/api/pandas.DataFrame.ge +generated/pandas.DataFrame.get,../reference/api/pandas.DataFrame.get +generated/pandas.DataFrame.get_value,../reference/api/pandas.DataFrame.get_value +generated/pandas.DataFrame.groupby,../reference/api/pandas.DataFrame.groupby +generated/pandas.DataFrame.gt,../reference/api/pandas.DataFrame.gt +generated/pandas.DataFrame.head,../reference/api/pandas.DataFrame.head +generated/pandas.DataFrame.hist,../reference/api/pandas.DataFrame.hist +generated/pandas.DataFrame,../reference/api/pandas.DataFrame +generated/pandas.DataFrame.iat,../reference/api/pandas.DataFrame.iat +generated/pandas.DataFrame.idxmax,../reference/api/pandas.DataFrame.idxmax +generated/pandas.DataFrame.idxmin,../reference/api/pandas.DataFrame.idxmin +generated/pandas.DataFrame.iloc,../reference/api/pandas.DataFrame.iloc +generated/pandas.DataFrame.index,../reference/api/pandas.DataFrame.index +generated/pandas.DataFrame.infer_objects,../reference/api/pandas.DataFrame.infer_objects +generated/pandas.DataFrame.info,../reference/api/pandas.DataFrame.info +generated/pandas.DataFrame.insert,../reference/api/pandas.DataFrame.insert +generated/pandas.DataFrame.interpolate,../reference/api/pandas.DataFrame.interpolate +generated/pandas.DataFrame.is_copy,../reference/api/pandas.DataFrame.is_copy +generated/pandas.DataFrame.isin,../reference/api/pandas.DataFrame.isin +generated/pandas.DataFrame.isna,../reference/api/pandas.DataFrame.isna +generated/pandas.DataFrame.isnull,../reference/api/pandas.DataFrame.isnull +generated/pandas.DataFrame.items,../reference/api/pandas.DataFrame.items +generated/pandas.DataFrame.__iter__,../reference/api/pandas.DataFrame.__iter__ +generated/pandas.DataFrame.iteritems,../reference/api/pandas.DataFrame.iteritems +generated/pandas.DataFrame.iterrows,../reference/api/pandas.DataFrame.iterrows +generated/pandas.DataFrame.itertuples,../reference/api/pandas.DataFrame.itertuples +generated/pandas.DataFrame.ix,../reference/api/pandas.DataFrame.ix +generated/pandas.DataFrame.join,../reference/api/pandas.DataFrame.join +generated/pandas.DataFrame.keys,../reference/api/pandas.DataFrame.keys +generated/pandas.DataFrame.kurt,../reference/api/pandas.DataFrame.kurt +generated/pandas.DataFrame.kurtosis,../reference/api/pandas.DataFrame.kurtosis +generated/pandas.DataFrame.last,../reference/api/pandas.DataFrame.last +generated/pandas.DataFrame.last_valid_index,../reference/api/pandas.DataFrame.last_valid_index +generated/pandas.DataFrame.le,../reference/api/pandas.DataFrame.le +generated/pandas.DataFrame.loc,../reference/api/pandas.DataFrame.loc +generated/pandas.DataFrame.lookup,../reference/api/pandas.DataFrame.lookup +generated/pandas.DataFrame.lt,../reference/api/pandas.DataFrame.lt +generated/pandas.DataFrame.mad,../reference/api/pandas.DataFrame.mad +generated/pandas.DataFrame.mask,../reference/api/pandas.DataFrame.mask +generated/pandas.DataFrame.max,../reference/api/pandas.DataFrame.max +generated/pandas.DataFrame.mean,../reference/api/pandas.DataFrame.mean +generated/pandas.DataFrame.median,../reference/api/pandas.DataFrame.median +generated/pandas.DataFrame.melt,../reference/api/pandas.DataFrame.melt +generated/pandas.DataFrame.memory_usage,../reference/api/pandas.DataFrame.memory_usage +generated/pandas.DataFrame.merge,../reference/api/pandas.DataFrame.merge +generated/pandas.DataFrame.min,../reference/api/pandas.DataFrame.min +generated/pandas.DataFrame.mode,../reference/api/pandas.DataFrame.mode +generated/pandas.DataFrame.mod,../reference/api/pandas.DataFrame.mod +generated/pandas.DataFrame.mul,../reference/api/pandas.DataFrame.mul +generated/pandas.DataFrame.multiply,../reference/api/pandas.DataFrame.multiply +generated/pandas.DataFrame.ndim,../reference/api/pandas.DataFrame.ndim +generated/pandas.DataFrame.ne,../reference/api/pandas.DataFrame.ne +generated/pandas.DataFrame.nlargest,../reference/api/pandas.DataFrame.nlargest +generated/pandas.DataFrame.notna,../reference/api/pandas.DataFrame.notna +generated/pandas.DataFrame.notnull,../reference/api/pandas.DataFrame.notnull +generated/pandas.DataFrame.nsmallest,../reference/api/pandas.DataFrame.nsmallest +generated/pandas.DataFrame.nunique,../reference/api/pandas.DataFrame.nunique +generated/pandas.DataFrame.pct_change,../reference/api/pandas.DataFrame.pct_change +generated/pandas.DataFrame.pipe,../reference/api/pandas.DataFrame.pipe +generated/pandas.DataFrame.pivot,../reference/api/pandas.DataFrame.pivot +generated/pandas.DataFrame.pivot_table,../reference/api/pandas.DataFrame.pivot_table +generated/pandas.DataFrame.plot.barh,../reference/api/pandas.DataFrame.plot.barh +generated/pandas.DataFrame.plot.bar,../reference/api/pandas.DataFrame.plot.bar +generated/pandas.DataFrame.plot.box,../reference/api/pandas.DataFrame.plot.box +generated/pandas.DataFrame.plot.density,../reference/api/pandas.DataFrame.plot.density +generated/pandas.DataFrame.plot.hexbin,../reference/api/pandas.DataFrame.plot.hexbin +generated/pandas.DataFrame.plot.hist,../reference/api/pandas.DataFrame.plot.hist +generated/pandas.DataFrame.plot,../reference/api/pandas.DataFrame.plot +generated/pandas.DataFrame.plot.kde,../reference/api/pandas.DataFrame.plot.kde +generated/pandas.DataFrame.plot.line,../reference/api/pandas.DataFrame.plot.line +generated/pandas.DataFrame.plot.pie,../reference/api/pandas.DataFrame.plot.pie +generated/pandas.DataFrame.plot.scatter,../reference/api/pandas.DataFrame.plot.scatter +generated/pandas.DataFrame.pop,../reference/api/pandas.DataFrame.pop +generated/pandas.DataFrame.pow,../reference/api/pandas.DataFrame.pow +generated/pandas.DataFrame.prod,../reference/api/pandas.DataFrame.prod +generated/pandas.DataFrame.product,../reference/api/pandas.DataFrame.product +generated/pandas.DataFrame.quantile,../reference/api/pandas.DataFrame.quantile +generated/pandas.DataFrame.query,../reference/api/pandas.DataFrame.query +generated/pandas.DataFrame.radd,../reference/api/pandas.DataFrame.radd +generated/pandas.DataFrame.rank,../reference/api/pandas.DataFrame.rank +generated/pandas.DataFrame.rdiv,../reference/api/pandas.DataFrame.rdiv +generated/pandas.DataFrame.reindex_axis,../reference/api/pandas.DataFrame.reindex_axis +generated/pandas.DataFrame.reindex,../reference/api/pandas.DataFrame.reindex +generated/pandas.DataFrame.reindex_like,../reference/api/pandas.DataFrame.reindex_like +generated/pandas.DataFrame.rename_axis,../reference/api/pandas.DataFrame.rename_axis +generated/pandas.DataFrame.rename,../reference/api/pandas.DataFrame.rename +generated/pandas.DataFrame.reorder_levels,../reference/api/pandas.DataFrame.reorder_levels +generated/pandas.DataFrame.replace,../reference/api/pandas.DataFrame.replace +generated/pandas.DataFrame.resample,../reference/api/pandas.DataFrame.resample +generated/pandas.DataFrame.reset_index,../reference/api/pandas.DataFrame.reset_index +generated/pandas.DataFrame.rfloordiv,../reference/api/pandas.DataFrame.rfloordiv +generated/pandas.DataFrame.rmod,../reference/api/pandas.DataFrame.rmod +generated/pandas.DataFrame.rmul,../reference/api/pandas.DataFrame.rmul +generated/pandas.DataFrame.rolling,../reference/api/pandas.DataFrame.rolling +generated/pandas.DataFrame.round,../reference/api/pandas.DataFrame.round +generated/pandas.DataFrame.rpow,../reference/api/pandas.DataFrame.rpow +generated/pandas.DataFrame.rsub,../reference/api/pandas.DataFrame.rsub +generated/pandas.DataFrame.rtruediv,../reference/api/pandas.DataFrame.rtruediv +generated/pandas.DataFrame.sample,../reference/api/pandas.DataFrame.sample +generated/pandas.DataFrame.select_dtypes,../reference/api/pandas.DataFrame.select_dtypes +generated/pandas.DataFrame.select,../reference/api/pandas.DataFrame.select +generated/pandas.DataFrame.sem,../reference/api/pandas.DataFrame.sem +generated/pandas.DataFrame.set_axis,../reference/api/pandas.DataFrame.set_axis +generated/pandas.DataFrame.set_index,../reference/api/pandas.DataFrame.set_index +generated/pandas.DataFrame.set_value,../reference/api/pandas.DataFrame.set_value +generated/pandas.DataFrame.shape,../reference/api/pandas.DataFrame.shape +generated/pandas.DataFrame.shift,../reference/api/pandas.DataFrame.shift +generated/pandas.DataFrame.size,../reference/api/pandas.DataFrame.size +generated/pandas.DataFrame.skew,../reference/api/pandas.DataFrame.skew +generated/pandas.DataFrame.slice_shift,../reference/api/pandas.DataFrame.slice_shift +generated/pandas.DataFrame.sort_index,../reference/api/pandas.DataFrame.sort_index +generated/pandas.DataFrame.sort_values,../reference/api/pandas.DataFrame.sort_values +generated/pandas.DataFrame.squeeze,../reference/api/pandas.DataFrame.squeeze +generated/pandas.DataFrame.stack,../reference/api/pandas.DataFrame.stack +generated/pandas.DataFrame.std,../reference/api/pandas.DataFrame.std +generated/pandas.DataFrame.style,../reference/api/pandas.DataFrame.style +generated/pandas.DataFrame.sub,../reference/api/pandas.DataFrame.sub +generated/pandas.DataFrame.subtract,../reference/api/pandas.DataFrame.subtract +generated/pandas.DataFrame.sum,../reference/api/pandas.DataFrame.sum +generated/pandas.DataFrame.swapaxes,../reference/api/pandas.DataFrame.swapaxes +generated/pandas.DataFrame.swaplevel,../reference/api/pandas.DataFrame.swaplevel +generated/pandas.DataFrame.tail,../reference/api/pandas.DataFrame.tail +generated/pandas.DataFrame.take,../reference/api/pandas.DataFrame.take +generated/pandas.DataFrame.T,../reference/api/pandas.DataFrame.T +generated/pandas.DataFrame.timetuple,../reference/api/pandas.DataFrame.timetuple +generated/pandas.DataFrame.to_clipboard,../reference/api/pandas.DataFrame.to_clipboard +generated/pandas.DataFrame.to_csv,../reference/api/pandas.DataFrame.to_csv +generated/pandas.DataFrame.to_dict,../reference/api/pandas.DataFrame.to_dict +generated/pandas.DataFrame.to_excel,../reference/api/pandas.DataFrame.to_excel +generated/pandas.DataFrame.to_feather,../reference/api/pandas.DataFrame.to_feather +generated/pandas.DataFrame.to_gbq,../reference/api/pandas.DataFrame.to_gbq +generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf +generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to +generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json +generated/pandas.DataFrame.to_latex,../reference/api/pandas.DataFrame.to_latex +generated/pandas.DataFrame.to_numpy,../reference/api/pandas.DataFrame.to_numpy +generated/pandas.DataFrame.to_panel,../reference/api/pandas.DataFrame.to_panel +generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parquet +generated/pandas.DataFrame.to_period,../reference/api/pandas.DataFrame.to_period +generated/pandas.DataFrame.to_pickle,../reference/api/pandas.DataFrame.to_pickle +generated/pandas.DataFrame.to_records,../reference/api/pandas.DataFrame.to_records +generated/pandas.DataFrame.to_sql,../reference/api/pandas.DataFrame.to_sql +generated/pandas.DataFrame.to_stata,../reference/api/pandas.DataFrame.to_stata +generated/pandas.DataFrame.to_string,../reference/api/pandas.DataFrame.to_string +generated/pandas.DataFrame.to_timestamp,../reference/api/pandas.DataFrame.to_timestamp +generated/pandas.DataFrame.to_xarray,../reference/api/pandas.DataFrame.to_xarray +generated/pandas.DataFrame.transform,../reference/api/pandas.DataFrame.transform +generated/pandas.DataFrame.transpose,../reference/api/pandas.DataFrame.transpose +generated/pandas.DataFrame.truediv,../reference/api/pandas.DataFrame.truediv +generated/pandas.DataFrame.truncate,../reference/api/pandas.DataFrame.truncate +generated/pandas.DataFrame.tshift,../reference/api/pandas.DataFrame.tshift +generated/pandas.DataFrame.tz_convert,../reference/api/pandas.DataFrame.tz_convert +generated/pandas.DataFrame.tz_localize,../reference/api/pandas.DataFrame.tz_localize +generated/pandas.DataFrame.unstack,../reference/api/pandas.DataFrame.unstack +generated/pandas.DataFrame.update,../reference/api/pandas.DataFrame.update +generated/pandas.DataFrame.values,../reference/api/pandas.DataFrame.values +generated/pandas.DataFrame.var,../reference/api/pandas.DataFrame.var +generated/pandas.DataFrame.where,../reference/api/pandas.DataFrame.where +generated/pandas.DataFrame.xs,../reference/api/pandas.DataFrame.xs +generated/pandas.date_range,../reference/api/pandas.date_range +generated/pandas.DatetimeIndex.ceil,../reference/api/pandas.DatetimeIndex.ceil +generated/pandas.DatetimeIndex.date,../reference/api/pandas.DatetimeIndex.date +generated/pandas.DatetimeIndex.day,../reference/api/pandas.DatetimeIndex.day +generated/pandas.DatetimeIndex.day_name,../reference/api/pandas.DatetimeIndex.day_name +generated/pandas.DatetimeIndex.dayofweek,../reference/api/pandas.DatetimeIndex.dayofweek +generated/pandas.DatetimeIndex.day_of_week,../reference/api/pandas.DatetimeIndex.day_of_week +generated/pandas.DatetimeIndex.dayofyear,../reference/api/pandas.DatetimeIndex.dayofyear +generated/pandas.DatetimeIndex.day_of_year,../reference/api/pandas.DatetimeIndex.day_of_year +generated/pandas.DatetimeIndex.floor,../reference/api/pandas.DatetimeIndex.floor +generated/pandas.DatetimeIndex.freq,../reference/api/pandas.DatetimeIndex.freq +generated/pandas.DatetimeIndex.freqstr,../reference/api/pandas.DatetimeIndex.freqstr +generated/pandas.DatetimeIndex.hour,../reference/api/pandas.DatetimeIndex.hour +generated/pandas.DatetimeIndex,../reference/api/pandas.DatetimeIndex +generated/pandas.DatetimeIndex.indexer_at_time,../reference/api/pandas.DatetimeIndex.indexer_at_time +generated/pandas.DatetimeIndex.indexer_between_time,../reference/api/pandas.DatetimeIndex.indexer_between_time +generated/pandas.DatetimeIndex.inferred_freq,../reference/api/pandas.DatetimeIndex.inferred_freq +generated/pandas.DatetimeIndex.is_leap_year,../reference/api/pandas.DatetimeIndex.is_leap_year +generated/pandas.DatetimeIndex.is_month_end,../reference/api/pandas.DatetimeIndex.is_month_end +generated/pandas.DatetimeIndex.is_month_start,../reference/api/pandas.DatetimeIndex.is_month_start +generated/pandas.DatetimeIndex.is_quarter_end,../reference/api/pandas.DatetimeIndex.is_quarter_end +generated/pandas.DatetimeIndex.is_quarter_start,../reference/api/pandas.DatetimeIndex.is_quarter_start +generated/pandas.DatetimeIndex.is_year_end,../reference/api/pandas.DatetimeIndex.is_year_end +generated/pandas.DatetimeIndex.is_year_start,../reference/api/pandas.DatetimeIndex.is_year_start +generated/pandas.DatetimeIndex.microsecond,../reference/api/pandas.DatetimeIndex.microsecond +generated/pandas.DatetimeIndex.minute,../reference/api/pandas.DatetimeIndex.minute +generated/pandas.DatetimeIndex.month,../reference/api/pandas.DatetimeIndex.month +generated/pandas.DatetimeIndex.month_name,../reference/api/pandas.DatetimeIndex.month_name +generated/pandas.DatetimeIndex.nanosecond,../reference/api/pandas.DatetimeIndex.nanosecond +generated/pandas.DatetimeIndex.normalize,../reference/api/pandas.DatetimeIndex.normalize +generated/pandas.DatetimeIndex.quarter,../reference/api/pandas.DatetimeIndex.quarter +generated/pandas.DatetimeIndex.round,../reference/api/pandas.DatetimeIndex.round +generated/pandas.DatetimeIndex.second,../reference/api/pandas.DatetimeIndex.second +generated/pandas.DatetimeIndex.snap,../reference/api/pandas.DatetimeIndex.snap +generated/pandas.DatetimeIndex.strftime,../reference/api/pandas.DatetimeIndex.strftime +generated/pandas.DatetimeIndex.time,../reference/api/pandas.DatetimeIndex.time +generated/pandas.DatetimeIndex.timetz,../reference/api/pandas.DatetimeIndex.timetz +generated/pandas.DatetimeIndex.to_frame,../reference/api/pandas.DatetimeIndex.to_frame +generated/pandas.DatetimeIndex.to_perioddelta,../reference/api/pandas.DatetimeIndex.to_perioddelta +generated/pandas.DatetimeIndex.to_period,../reference/api/pandas.DatetimeIndex.to_period +generated/pandas.DatetimeIndex.to_pydatetime,../reference/api/pandas.DatetimeIndex.to_pydatetime +generated/pandas.DatetimeIndex.to_series,../reference/api/pandas.DatetimeIndex.to_series +generated/pandas.DatetimeIndex.tz_convert,../reference/api/pandas.DatetimeIndex.tz_convert +generated/pandas.DatetimeIndex.tz,../reference/api/pandas.DatetimeIndex.tz +generated/pandas.DatetimeIndex.tz_localize,../reference/api/pandas.DatetimeIndex.tz_localize +generated/pandas.DatetimeIndex.weekday,../reference/api/pandas.DatetimeIndex.weekday +generated/pandas.DatetimeIndex.week,../reference/api/pandas.DatetimeIndex.week +generated/pandas.DatetimeIndex.weekofyear,../reference/api/pandas.DatetimeIndex.weekofyear +generated/pandas.DatetimeIndex.year,../reference/api/pandas.DatetimeIndex.year +generated/pandas.DatetimeTZDtype.base,../reference/api/pandas.DatetimeTZDtype.base +generated/pandas.DatetimeTZDtype.construct_array_type,../reference/api/pandas.DatetimeTZDtype.construct_array_type +generated/pandas.DatetimeTZDtype.construct_from_string,../reference/api/pandas.DatetimeTZDtype.construct_from_string +generated/pandas.DatetimeTZDtype,../reference/api/pandas.DatetimeTZDtype +generated/pandas.DatetimeTZDtype.isbuiltin,../reference/api/pandas.DatetimeTZDtype.isbuiltin +generated/pandas.DatetimeTZDtype.is_dtype,../reference/api/pandas.DatetimeTZDtype.is_dtype +generated/pandas.DatetimeTZDtype.isnative,../reference/api/pandas.DatetimeTZDtype.isnative +generated/pandas.DatetimeTZDtype.itemsize,../reference/api/pandas.DatetimeTZDtype.itemsize +generated/pandas.DatetimeTZDtype.kind,../reference/api/pandas.DatetimeTZDtype.kind +generated/pandas.DatetimeTZDtype.name,../reference/api/pandas.DatetimeTZDtype.name +generated/pandas.DatetimeTZDtype.names,../reference/api/pandas.DatetimeTZDtype.names +generated/pandas.DatetimeTZDtype.na_value,../reference/api/pandas.DatetimeTZDtype.na_value +generated/pandas.DatetimeTZDtype.num,../reference/api/pandas.DatetimeTZDtype.num +generated/pandas.DatetimeTZDtype.reset_cache,../reference/api/pandas.DatetimeTZDtype.reset_cache +generated/pandas.DatetimeTZDtype.shape,../reference/api/pandas.DatetimeTZDtype.shape +generated/pandas.DatetimeTZDtype.str,../reference/api/pandas.DatetimeTZDtype.str +generated/pandas.DatetimeTZDtype.subdtype,../reference/api/pandas.DatetimeTZDtype.subdtype +generated/pandas.DatetimeTZDtype.tz,../reference/api/pandas.DatetimeTZDtype.tz +generated/pandas.DatetimeTZDtype.unit,../reference/api/pandas.DatetimeTZDtype.unit +generated/pandas.describe_option,../reference/api/pandas.describe_option +generated/pandas.errors.DtypeWarning,../reference/api/pandas.errors.DtypeWarning +generated/pandas.errors.EmptyDataError,../reference/api/pandas.errors.EmptyDataError +generated/pandas.errors.OutOfBoundsDatetime,../reference/api/pandas.errors.OutOfBoundsDatetime +generated/pandas.errors.ParserError,../reference/api/pandas.errors.ParserError +generated/pandas.errors.ParserWarning,../reference/api/pandas.errors.ParserWarning +generated/pandas.errors.PerformanceWarning,../reference/api/pandas.errors.PerformanceWarning +generated/pandas.errors.UnsortedIndexError,../reference/api/pandas.errors.UnsortedIndexError +generated/pandas.errors.UnsupportedFunctionCall,../reference/api/pandas.errors.UnsupportedFunctionCall +generated/pandas.eval,../reference/api/pandas.eval +generated/pandas.ExcelFile.parse,../reference/api/pandas.ExcelFile.parse +generated/pandas.ExcelWriter,../reference/api/pandas.ExcelWriter +generated/pandas.factorize,../reference/api/pandas.factorize +generated/pandas.Float64Index,../reference/api/pandas.Float64Index +generated/pandas.get_dummies,../reference/api/pandas.get_dummies +generated/pandas.get_option,../reference/api/pandas.get_option +generated/pandas.Grouper,../reference/api/pandas.Grouper +generated/pandas.HDFStore.append,../reference/api/pandas.HDFStore.append +generated/pandas.HDFStore.get,../reference/api/pandas.HDFStore.get +generated/pandas.HDFStore.groups,../reference/api/pandas.HDFStore.groups +generated/pandas.HDFStore.info,../reference/api/pandas.HDFStore.info +generated/pandas.HDFStore.keys,../reference/api/pandas.HDFStore.keys +generated/pandas.HDFStore.put,../reference/api/pandas.HDFStore.put +generated/pandas.HDFStore.select,../reference/api/pandas.HDFStore.select +generated/pandas.HDFStore.walk,../reference/api/pandas.HDFStore.walk +generated/pandas.Index.all,../reference/api/pandas.Index.all +generated/pandas.Index.any,../reference/api/pandas.Index.any +generated/pandas.Index.append,../reference/api/pandas.Index.append +generated/pandas.Index.argmax,../reference/api/pandas.Index.argmax +generated/pandas.Index.argmin,../reference/api/pandas.Index.argmin +generated/pandas.Index.argsort,../reference/api/pandas.Index.argsort +generated/pandas.Index.array,../reference/api/pandas.Index.array +generated/pandas.Index.asi8,../reference/api/pandas.Index.asi8 +generated/pandas.Index.asof,../reference/api/pandas.Index.asof +generated/pandas.Index.asof_locs,../reference/api/pandas.Index.asof_locs +generated/pandas.Index.astype,../reference/api/pandas.Index.astype +generated/pandas.Index.copy,../reference/api/pandas.Index.copy +generated/pandas.Index.data,../reference/api/pandas.Index.data +generated/pandas.Index.delete,../reference/api/pandas.Index.delete +generated/pandas.Index.difference,../reference/api/pandas.Index.difference +generated/pandas.Index.drop_duplicates,../reference/api/pandas.Index.drop_duplicates +generated/pandas.Index.drop,../reference/api/pandas.Index.drop +generated/pandas.Index.droplevel,../reference/api/pandas.Index.droplevel +generated/pandas.Index.dropna,../reference/api/pandas.Index.dropna +generated/pandas.Index.dtype,../reference/api/pandas.Index.dtype +generated/pandas.Index.duplicated,../reference/api/pandas.Index.duplicated +generated/pandas.Index.empty,../reference/api/pandas.Index.empty +generated/pandas.Index.equals,../reference/api/pandas.Index.equals +generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize +generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna +generated/pandas.Index.format,../reference/api/pandas.Index.format +generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for +generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer +generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique +generated/pandas.Index.get_level_values,../reference/api/pandas.Index.get_level_values +generated/pandas.Index.get_loc,../reference/api/pandas.Index.get_loc +generated/pandas.Index.get_slice_bound,../reference/api/pandas.Index.get_slice_bound +generated/pandas.Index.get_value,../reference/api/pandas.Index.get_value +generated/pandas.Index.groupby,../reference/api/pandas.Index.groupby +generated/pandas.Index.has_duplicates,../reference/api/pandas.Index.has_duplicates +generated/pandas.Index.hasnans,../reference/api/pandas.Index.hasnans +generated/pandas.Index.holds_integer,../reference/api/pandas.Index.holds_integer +generated/pandas.Index,../reference/api/pandas.Index +generated/pandas.Index.identical,../reference/api/pandas.Index.identical +generated/pandas.Index.inferred_type,../reference/api/pandas.Index.inferred_type +generated/pandas.Index.insert,../reference/api/pandas.Index.insert +generated/pandas.Index.intersection,../reference/api/pandas.Index.intersection +generated/pandas.Index.is_all_dates,../reference/api/pandas.Index.is_all_dates +generated/pandas.Index.is_boolean,../reference/api/pandas.Index.is_boolean +generated/pandas.Index.is_categorical,../reference/api/pandas.Index.is_categorical +generated/pandas.Index.is_floating,../reference/api/pandas.Index.is_floating +generated/pandas.Index.is_,../reference/api/pandas.Index.is_ +generated/pandas.Index.isin,../reference/api/pandas.Index.isin +generated/pandas.Index.is_integer,../reference/api/pandas.Index.is_integer +generated/pandas.Index.is_interval,../reference/api/pandas.Index.is_interval +generated/pandas.Index.is_lexsorted_for_tuple,../reference/api/pandas.Index.is_lexsorted_for_tuple +generated/pandas.Index.is_mixed,../reference/api/pandas.Index.is_mixed +generated/pandas.Index.is_monotonic_decreasing,../reference/api/pandas.Index.is_monotonic_decreasing +generated/pandas.Index.is_monotonic,../reference/api/pandas.Index.is_monotonic +generated/pandas.Index.is_monotonic_increasing,../reference/api/pandas.Index.is_monotonic_increasing +generated/pandas.Index.isna,../reference/api/pandas.Index.isna +generated/pandas.Index.isnull,../reference/api/pandas.Index.isnull +generated/pandas.Index.is_numeric,../reference/api/pandas.Index.is_numeric +generated/pandas.Index.is_object,../reference/api/pandas.Index.is_object +generated/pandas.Index.is_type_compatible,../reference/api/pandas.Index.is_type_compatible +generated/pandas.Index.is_unique,../reference/api/pandas.Index.is_unique +generated/pandas.Index.item,../reference/api/pandas.Index.item +generated/pandas.Index.join,../reference/api/pandas.Index.join +generated/pandas.Index.map,../reference/api/pandas.Index.map +generated/pandas.Index.max,../reference/api/pandas.Index.max +generated/pandas.Index.memory_usage,../reference/api/pandas.Index.memory_usage +generated/pandas.Index.min,../reference/api/pandas.Index.min +generated/pandas.Index.name,../reference/api/pandas.Index.name +generated/pandas.Index.names,../reference/api/pandas.Index.names +generated/pandas.Index.nbytes,../reference/api/pandas.Index.nbytes +generated/pandas.Index.ndim,../reference/api/pandas.Index.ndim +generated/pandas.Index.nlevels,../reference/api/pandas.Index.nlevels +generated/pandas.Index.notna,../reference/api/pandas.Index.notna +generated/pandas.Index.notnull,../reference/api/pandas.Index.notnull +generated/pandas.Index.nunique,../reference/api/pandas.Index.nunique +generated/pandas.Index.putmask,../reference/api/pandas.Index.putmask +generated/pandas.Index.ravel,../reference/api/pandas.Index.ravel +generated/pandas.Index.reindex,../reference/api/pandas.Index.reindex +generated/pandas.Index.rename,../reference/api/pandas.Index.rename +generated/pandas.Index.repeat,../reference/api/pandas.Index.repeat +generated/pandas.Index.searchsorted,../reference/api/pandas.Index.searchsorted +generated/pandas.Index.set_names,../reference/api/pandas.Index.set_names +generated/pandas.Index.set_value,../reference/api/pandas.Index.set_value +generated/pandas.Index.shape,../reference/api/pandas.Index.shape +generated/pandas.Index.shift,../reference/api/pandas.Index.shift +generated/pandas.Index.size,../reference/api/pandas.Index.size +generated/pandas.IndexSlice,../reference/api/pandas.IndexSlice +generated/pandas.Index.slice_indexer,../reference/api/pandas.Index.slice_indexer +generated/pandas.Index.slice_locs,../reference/api/pandas.Index.slice_locs +generated/pandas.Index.sort,../reference/api/pandas.Index.sort +generated/pandas.Index.sortlevel,../reference/api/pandas.Index.sortlevel +generated/pandas.Index.sort_values,../reference/api/pandas.Index.sort_values +generated/pandas.Index.str,../reference/api/pandas.Index.str +generated/pandas.Index.summary,../reference/api/pandas.Index.summary +generated/pandas.Index.symmetric_difference,../reference/api/pandas.Index.symmetric_difference +generated/pandas.Index.take,../reference/api/pandas.Index.take +generated/pandas.Index.T,../reference/api/pandas.Index.T +generated/pandas.Index.to_flat_index,../reference/api/pandas.Index.to_flat_index +generated/pandas.Index.to_frame,../reference/api/pandas.Index.to_frame +generated/pandas.Index.to_list,../reference/api/pandas.Index.to_list +generated/pandas.Index.tolist,../reference/api/pandas.Index.tolist +generated/pandas.Index.to_native_types,../reference/api/pandas.Index.to_native_types +generated/pandas.Index.to_numpy,../reference/api/pandas.Index.to_numpy +generated/pandas.Index.to_series,../reference/api/pandas.Index.to_series +generated/pandas.Index.transpose,../reference/api/pandas.Index.transpose +generated/pandas.Index.union,../reference/api/pandas.Index.union +generated/pandas.Index.unique,../reference/api/pandas.Index.unique +generated/pandas.Index.value_counts,../reference/api/pandas.Index.value_counts +generated/pandas.Index.values,../reference/api/pandas.Index.values +generated/pandas.Index.view,../reference/api/pandas.Index.view +generated/pandas.Index.where,../reference/api/pandas.Index.where +generated/pandas.infer_freq,../reference/api/pandas.infer_freq +generated/pandas.Interval.closed,../reference/api/pandas.Interval.closed +generated/pandas.Interval.closed_left,../reference/api/pandas.Interval.closed_left +generated/pandas.Interval.closed_right,../reference/api/pandas.Interval.closed_right +generated/pandas.Interval,../reference/api/pandas.Interval +generated/pandas.IntervalIndex.closed,../reference/api/pandas.IntervalIndex.closed +generated/pandas.IntervalIndex.contains,../reference/api/pandas.IntervalIndex.contains +generated/pandas.IntervalIndex.from_arrays,../reference/api/pandas.IntervalIndex.from_arrays +generated/pandas.IntervalIndex.from_breaks,../reference/api/pandas.IntervalIndex.from_breaks +generated/pandas.IntervalIndex.from_tuples,../reference/api/pandas.IntervalIndex.from_tuples +generated/pandas.IntervalIndex.get_indexer,../reference/api/pandas.IntervalIndex.get_indexer +generated/pandas.IntervalIndex.get_loc,../reference/api/pandas.IntervalIndex.get_loc +generated/pandas.IntervalIndex,../reference/api/pandas.IntervalIndex +generated/pandas.IntervalIndex.is_non_overlapping_monotonic,../reference/api/pandas.IntervalIndex.is_non_overlapping_monotonic +generated/pandas.IntervalIndex.is_overlapping,../reference/api/pandas.IntervalIndex.is_overlapping +generated/pandas.IntervalIndex.left,../reference/api/pandas.IntervalIndex.left +generated/pandas.IntervalIndex.length,../reference/api/pandas.IntervalIndex.length +generated/pandas.IntervalIndex.mid,../reference/api/pandas.IntervalIndex.mid +generated/pandas.IntervalIndex.overlaps,../reference/api/pandas.IntervalIndex.overlaps +generated/pandas.IntervalIndex.right,../reference/api/pandas.IntervalIndex.right +generated/pandas.IntervalIndex.set_closed,../reference/api/pandas.IntervalIndex.set_closed +generated/pandas.IntervalIndex.to_tuples,../reference/api/pandas.IntervalIndex.to_tuples +generated/pandas.IntervalIndex.values,../reference/api/pandas.IntervalIndex.values +generated/pandas.Interval.left,../reference/api/pandas.Interval.left +generated/pandas.Interval.length,../reference/api/pandas.Interval.length +generated/pandas.Interval.mid,../reference/api/pandas.Interval.mid +generated/pandas.Interval.open_left,../reference/api/pandas.Interval.open_left +generated/pandas.Interval.open_right,../reference/api/pandas.Interval.open_right +generated/pandas.Interval.overlaps,../reference/api/pandas.Interval.overlaps +generated/pandas.interval_range,../reference/api/pandas.interval_range +generated/pandas.Interval.right,../reference/api/pandas.Interval.right +generated/pandas.io.formats.style.Styler.apply,../reference/api/pandas.io.formats.style.Styler.apply +generated/pandas.io.formats.style.Styler.applymap,../reference/api/pandas.io.formats.style.Styler.applymap +generated/pandas.io.formats.style.Styler.background_gradient,../reference/api/pandas.io.formats.style.Styler.background_gradient +generated/pandas.io.formats.style.Styler.bar,../reference/api/pandas.io.formats.style.Styler.bar +generated/pandas.io.formats.style.Styler.clear,../reference/api/pandas.io.formats.style.Styler.clear +generated/pandas.io.formats.style.Styler.env,../reference/api/pandas.io.formats.style.Styler.env +generated/pandas.io.formats.style.Styler.export,../reference/api/pandas.io.formats.style.Styler.export +generated/pandas.io.formats.style.Styler.format,../reference/api/pandas.io.formats.style.Styler.format +generated/pandas.io.formats.style.Styler.from_custom_template,../reference/api/pandas.io.formats.style.Styler.from_custom_template +generated/pandas.io.formats.style.Styler.hide_columns,../reference/api/pandas.io.formats.style.Styler.hide_columns +generated/pandas.io.formats.style.Styler.hide_index,../reference/api/pandas.io.formats.style.Styler.hide_index +generated/pandas.io.formats.style.Styler.highlight_max,../reference/api/pandas.io.formats.style.Styler.highlight_max +generated/pandas.io.formats.style.Styler.highlight_min,../reference/api/pandas.io.formats.style.Styler.highlight_min +generated/pandas.io.formats.style.Styler.highlight_null,../reference/api/pandas.io.formats.style.Styler.highlight_null +generated/pandas.io.formats.style.Styler,../reference/api/pandas.io.formats.style.Styler +generated/pandas.io.formats.style.Styler.loader,../reference/api/pandas.io.formats.style.Styler.loader +generated/pandas.io.formats.style.Styler.pipe,../reference/api/pandas.io.formats.style.Styler.pipe +generated/pandas.io.formats.style.Styler.render,../reference/api/pandas.io.formats.style.Styler.render +generated/pandas.io.formats.style.Styler.set_caption,../reference/api/pandas.io.formats.style.Styler.set_caption +generated/pandas.io.formats.style.Styler.set_precision,../reference/api/pandas.io.formats.style.Styler.set_precision +generated/pandas.io.formats.style.Styler.set_properties,../reference/api/pandas.io.formats.style.Styler.set_properties +generated/pandas.io.formats.style.Styler.set_table_attributes,../reference/api/pandas.io.formats.style.Styler.set_table_attributes +generated/pandas.io.formats.style.Styler.set_table_styles,../reference/api/pandas.io.formats.style.Styler.set_table_styles +generated/pandas.io.formats.style.Styler.set_uuid,../reference/api/pandas.io.formats.style.Styler.set_uuid +generated/pandas.io.formats.style.Styler.template,../reference/api/pandas.io.formats.style.Styler.template +generated/pandas.io.formats.style.Styler.to_excel,../reference/api/pandas.io.formats.style.Styler.to_excel +generated/pandas.io.formats.style.Styler.use,../reference/api/pandas.io.formats.style.Styler.use +generated/pandas.io.formats.style.Styler.where,../reference/api/pandas.io.formats.style.Styler.where +generated/pandas.io.json.build_table_schema,../reference/api/pandas.io.json.build_table_schema +generated/pandas.io.json.json_normalize,../reference/api/pandas.json_normalize +generated/pandas.io.stata.StataReader.data_label,../reference/api/pandas.io.stata.StataReader.data_label +generated/pandas.io.stata.StataReader.value_labels,../reference/api/pandas.io.stata.StataReader.value_labels +generated/pandas.io.stata.StataReader.variable_labels,../reference/api/pandas.io.stata.StataReader.variable_labels +generated/pandas.io.stata.StataWriter.write_file,../reference/api/pandas.io.stata.StataWriter.write_file +generated/pandas.isna,../reference/api/pandas.isna +generated/pandas.isnull,../reference/api/pandas.isnull +generated/pandas.melt,../reference/api/pandas.melt +generated/pandas.merge_asof,../reference/api/pandas.merge_asof +generated/pandas.merge,../reference/api/pandas.merge +generated/pandas.merge_ordered,../reference/api/pandas.merge_ordered +generated/pandas.MultiIndex.codes,../reference/api/pandas.MultiIndex.codes +generated/pandas.MultiIndex.droplevel,../reference/api/pandas.MultiIndex.droplevel +generated/pandas.MultiIndex.from_arrays,../reference/api/pandas.MultiIndex.from_arrays +generated/pandas.MultiIndex.from_frame,../reference/api/pandas.MultiIndex.from_frame +generated/pandas.MultiIndex.from_product,../reference/api/pandas.MultiIndex.from_product +generated/pandas.MultiIndex.from_tuples,../reference/api/pandas.MultiIndex.from_tuples +generated/pandas.MultiIndex.get_indexer,../reference/api/pandas.MultiIndex.get_indexer +generated/pandas.MultiIndex.get_level_values,../reference/api/pandas.MultiIndex.get_level_values +generated/pandas.MultiIndex.get_loc,../reference/api/pandas.MultiIndex.get_loc +generated/pandas.MultiIndex.get_loc_level,../reference/api/pandas.MultiIndex.get_loc_level +generated/pandas.MultiIndex,../reference/api/pandas.MultiIndex +generated/pandas.MultiIndex.is_lexsorted,../reference/api/pandas.MultiIndex.is_lexsorted +generated/pandas.MultiIndex.levels,../reference/api/pandas.MultiIndex.levels +generated/pandas.MultiIndex.levshape,../reference/api/pandas.MultiIndex.levshape +generated/pandas.MultiIndex.names,../reference/api/pandas.MultiIndex.names +generated/pandas.MultiIndex.nlevels,../reference/api/pandas.MultiIndex.nlevels +generated/pandas.MultiIndex.remove_unused_levels,../reference/api/pandas.MultiIndex.remove_unused_levels +generated/pandas.MultiIndex.reorder_levels,../reference/api/pandas.MultiIndex.reorder_levels +generated/pandas.MultiIndex.set_codes,../reference/api/pandas.MultiIndex.set_codes +generated/pandas.MultiIndex.set_levels,../reference/api/pandas.MultiIndex.set_levels +generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlevel +generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel +generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index +generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame +generated/pandas.notna,../reference/api/pandas.notna +generated/pandas.notnull,../reference/api/pandas.notnull +generated/pandas.option_context,../reference/api/pandas.option_context +generated/pandas.Period.asfreq,../reference/api/pandas.Period.asfreq +generated/pandas.Period.day,../reference/api/pandas.Period.day +generated/pandas.Period.dayofweek,../reference/api/pandas.Period.dayofweek +generated/pandas.Period.day_of_week,../reference/api/pandas.Period.day_of_week +generated/pandas.Period.dayofyear,../reference/api/pandas.Period.dayofyear +generated/pandas.Period.day_of_year,../reference/api/pandas.Period.day_of_year +generated/pandas.Period.days_in_month,../reference/api/pandas.Period.days_in_month +generated/pandas.Period.daysinmonth,../reference/api/pandas.Period.daysinmonth +generated/pandas.Period.end_time,../reference/api/pandas.Period.end_time +generated/pandas.Period.freq,../reference/api/pandas.Period.freq +generated/pandas.Period.freqstr,../reference/api/pandas.Period.freqstr +generated/pandas.Period.hour,../reference/api/pandas.Period.hour +generated/pandas.Period,../reference/api/pandas.Period +generated/pandas.PeriodIndex.asfreq,../reference/api/pandas.PeriodIndex.asfreq +generated/pandas.PeriodIndex.day,../reference/api/pandas.PeriodIndex.day +generated/pandas.PeriodIndex.dayofweek,../reference/api/pandas.PeriodIndex.dayofweek +generated/pandas.PeriodIndex.day_of_week,../reference/api/pandas.PeriodIndex.day_of_week +generated/pandas.PeriodIndex.dayofyear,../reference/api/pandas.PeriodIndex.dayofyear +generated/pandas.PeriodIndex.day_of_year,../reference/api/pandas.PeriodIndex.day_of_year +generated/pandas.PeriodIndex.days_in_month,../reference/api/pandas.PeriodIndex.days_in_month +generated/pandas.PeriodIndex.daysinmonth,../reference/api/pandas.PeriodIndex.daysinmonth +generated/pandas.PeriodIndex.end_time,../reference/api/pandas.PeriodIndex.end_time +generated/pandas.PeriodIndex.freq,../reference/api/pandas.PeriodIndex.freq +generated/pandas.PeriodIndex.freqstr,../reference/api/pandas.PeriodIndex.freqstr +generated/pandas.PeriodIndex.hour,../reference/api/pandas.PeriodIndex.hour +generated/pandas.PeriodIndex,../reference/api/pandas.PeriodIndex +generated/pandas.PeriodIndex.is_leap_year,../reference/api/pandas.PeriodIndex.is_leap_year +generated/pandas.PeriodIndex.minute,../reference/api/pandas.PeriodIndex.minute +generated/pandas.PeriodIndex.month,../reference/api/pandas.PeriodIndex.month +generated/pandas.PeriodIndex.quarter,../reference/api/pandas.PeriodIndex.quarter +generated/pandas.PeriodIndex.qyear,../reference/api/pandas.PeriodIndex.qyear +generated/pandas.PeriodIndex.second,../reference/api/pandas.PeriodIndex.second +generated/pandas.PeriodIndex.start_time,../reference/api/pandas.PeriodIndex.start_time +generated/pandas.PeriodIndex.strftime,../reference/api/pandas.PeriodIndex.strftime +generated/pandas.PeriodIndex.to_timestamp,../reference/api/pandas.PeriodIndex.to_timestamp +generated/pandas.PeriodIndex.weekday,../reference/api/pandas.PeriodIndex.weekday +generated/pandas.PeriodIndex.week,../reference/api/pandas.PeriodIndex.week +generated/pandas.PeriodIndex.weekofyear,../reference/api/pandas.PeriodIndex.weekofyear +generated/pandas.PeriodIndex.year,../reference/api/pandas.PeriodIndex.year +generated/pandas.Period.is_leap_year,../reference/api/pandas.Period.is_leap_year +generated/pandas.Period.minute,../reference/api/pandas.Period.minute +generated/pandas.Period.month,../reference/api/pandas.Period.month +generated/pandas.Period.now,../reference/api/pandas.Period.now +generated/pandas.Period.ordinal,../reference/api/pandas.Period.ordinal +generated/pandas.Period.quarter,../reference/api/pandas.Period.quarter +generated/pandas.Period.qyear,../reference/api/pandas.Period.qyear +generated/pandas.period_range,../reference/api/pandas.period_range +generated/pandas.Period.second,../reference/api/pandas.Period.second +generated/pandas.Period.start_time,../reference/api/pandas.Period.start_time +generated/pandas.Period.strftime,../reference/api/pandas.Period.strftime +generated/pandas.Period.to_timestamp,../reference/api/pandas.Period.to_timestamp +generated/pandas.Period.weekday,../reference/api/pandas.Period.weekday +generated/pandas.Period.week,../reference/api/pandas.Period.week +generated/pandas.Period.weekofyear,../reference/api/pandas.Period.weekofyear +generated/pandas.Period.year,../reference/api/pandas.Period.year +generated/pandas.pivot,../reference/api/pandas.pivot +generated/pandas.pivot_table,../reference/api/pandas.pivot_table +generated/pandas.plotting.andrews_curves,../reference/api/pandas.plotting.andrews_curves +generated/pandas.plotting.bootstrap_plot,../reference/api/pandas.plotting.bootstrap_plot +generated/pandas.plotting.deregister_matplotlib_converters,../reference/api/pandas.plotting.deregister_matplotlib_converters +generated/pandas.plotting.lag_plot,../reference/api/pandas.plotting.lag_plot +generated/pandas.plotting.parallel_coordinates,../reference/api/pandas.plotting.parallel_coordinates +generated/pandas.plotting.radviz,../reference/api/pandas.plotting.radviz +generated/pandas.plotting.register_matplotlib_converters,../reference/api/pandas.plotting.register_matplotlib_converters +generated/pandas.plotting.scatter_matrix,../reference/api/pandas.plotting.scatter_matrix +generated/pandas.qcut,../reference/api/pandas.qcut +generated/pandas.RangeIndex.from_range,../reference/api/pandas.RangeIndex.from_range +generated/pandas.RangeIndex,../reference/api/pandas.RangeIndex +generated/pandas.read_clipboard,../reference/api/pandas.read_clipboard +generated/pandas.read_csv,../reference/api/pandas.read_csv +generated/pandas.read_excel,../reference/api/pandas.read_excel +generated/pandas.read_feather,../reference/api/pandas.read_feather +generated/pandas.read_fwf,../reference/api/pandas.read_fwf +generated/pandas.read_gbq,../reference/api/pandas.read_gbq +generated/pandas.read_hdf,../reference/api/pandas.read_hdf +generated/pandas.read,../reference/api/pandas.read +generated/pandas.read_json,../reference/api/pandas.read_json +generated/pandas.read_parquet,../reference/api/pandas.read_parquet +generated/pandas.read_pickle,../reference/api/pandas.read_pickle +generated/pandas.read_sas,../reference/api/pandas.read_sas +generated/pandas.read_sql,../reference/api/pandas.read_sql +generated/pandas.read_sql_query,../reference/api/pandas.read_sql_query +generated/pandas.read_sql_table,../reference/api/pandas.read_sql_table +generated/pandas.read_stata,../reference/api/pandas.read_stata +generated/pandas.read_table,../reference/api/pandas.read_table +generated/pandas.reset_option,../reference/api/pandas.reset_option +generated/pandas.Series.abs,../reference/api/pandas.Series.abs +generated/pandas.Series.add,../reference/api/pandas.Series.add +generated/pandas.Series.add_prefix,../reference/api/pandas.Series.add_prefix +generated/pandas.Series.add_suffix,../reference/api/pandas.Series.add_suffix +generated/pandas.Series.agg,../reference/api/pandas.Series.agg +generated/pandas.Series.aggregate,../reference/api/pandas.Series.aggregate +generated/pandas.Series.align,../reference/api/pandas.Series.align +generated/pandas.Series.all,../reference/api/pandas.Series.all +generated/pandas.Series.any,../reference/api/pandas.Series.any +generated/pandas.Series.append,../reference/api/pandas.Series.append +generated/pandas.Series.apply,../reference/api/pandas.Series.apply +generated/pandas.Series.argmax,../reference/api/pandas.Series.argmax +generated/pandas.Series.argmin,../reference/api/pandas.Series.argmin +generated/pandas.Series.argsort,../reference/api/pandas.Series.argsort +generated/pandas.Series.__array__,../reference/api/pandas.Series.__array__ +generated/pandas.Series.array,../reference/api/pandas.Series.array +generated/pandas.Series.as_blocks,../reference/api/pandas.Series.as_blocks +generated/pandas.Series.asfreq,../reference/api/pandas.Series.asfreq +generated/pandas.Series.as_matrix,../reference/api/pandas.Series.as_matrix +generated/pandas.Series.asobject,../reference/api/pandas.Series.asobject +generated/pandas.Series.asof,../reference/api/pandas.Series.asof +generated/pandas.Series.astype,../reference/api/pandas.Series.astype +generated/pandas.Series.at,../reference/api/pandas.Series.at +generated/pandas.Series.at_time,../reference/api/pandas.Series.at_time +generated/pandas.Series.autocorr,../reference/api/pandas.Series.autocorr +generated/pandas.Series.axes,../reference/api/pandas.Series.axes +generated/pandas.Series.between,../reference/api/pandas.Series.between +generated/pandas.Series.between_time,../reference/api/pandas.Series.between_time +generated/pandas.Series.bfill,../reference/api/pandas.Series.bfill +generated/pandas.Series.blocks,../reference/api/pandas.Series.blocks +generated/pandas.Series.bool,../reference/api/pandas.Series.bool +generated/pandas.Series.cat.add_categories,../reference/api/pandas.Series.cat.add_categories +generated/pandas.Series.cat.as_ordered,../reference/api/pandas.Series.cat.as_ordered +generated/pandas.Series.cat.as_unordered,../reference/api/pandas.Series.cat.as_unordered +generated/pandas.Series.cat.categories,../reference/api/pandas.Series.cat.categories +generated/pandas.Series.cat.codes,../reference/api/pandas.Series.cat.codes +generated/pandas.Series.cat,../reference/api/pandas.Series.cat +generated/pandas.Series.cat.ordered,../reference/api/pandas.Series.cat.ordered +generated/pandas.Series.cat.remove_categories,../reference/api/pandas.Series.cat.remove_categories +generated/pandas.Series.cat.remove_unused_categories,../reference/api/pandas.Series.cat.remove_unused_categories +generated/pandas.Series.cat.rename_categories,../reference/api/pandas.Series.cat.rename_categories +generated/pandas.Series.cat.reorder_categories,../reference/api/pandas.Series.cat.reorder_categories +generated/pandas.Series.cat.set_categories,../reference/api/pandas.Series.cat.set_categories +generated/pandas.Series.clip,../reference/api/pandas.Series.clip +generated/pandas.Series.clip_lower,../reference/api/pandas.Series.clip_lower +generated/pandas.Series.clip_upper,../reference/api/pandas.Series.clip_upper +generated/pandas.Series.combine_first,../reference/api/pandas.Series.combine_first +generated/pandas.Series.combine,../reference/api/pandas.Series.combine +generated/pandas.Series.compress,../reference/api/pandas.Series.compress +generated/pandas.Series.convert_objects,../reference/api/pandas.Series.convert_objects +generated/pandas.Series.copy,../reference/api/pandas.Series.copy +generated/pandas.Series.corr,../reference/api/pandas.Series.corr +generated/pandas.Series.count,../reference/api/pandas.Series.count +generated/pandas.Series.cov,../reference/api/pandas.Series.cov +generated/pandas.Series.cummax,../reference/api/pandas.Series.cummax +generated/pandas.Series.cummin,../reference/api/pandas.Series.cummin +generated/pandas.Series.cumprod,../reference/api/pandas.Series.cumprod +generated/pandas.Series.cumsum,../reference/api/pandas.Series.cumsum +generated/pandas.Series.data,../reference/api/pandas.Series.data +generated/pandas.Series.describe,../reference/api/pandas.Series.describe +generated/pandas.Series.diff,../reference/api/pandas.Series.diff +generated/pandas.Series.div,../reference/api/pandas.Series.div +generated/pandas.Series.divide,../reference/api/pandas.Series.divide +generated/pandas.Series.divmod,../reference/api/pandas.Series.divmod +generated/pandas.Series.dot,../reference/api/pandas.Series.dot +generated/pandas.Series.drop_duplicates,../reference/api/pandas.Series.drop_duplicates +generated/pandas.Series.drop,../reference/api/pandas.Series.drop +generated/pandas.Series.droplevel,../reference/api/pandas.Series.droplevel +generated/pandas.Series.dropna,../reference/api/pandas.Series.dropna +generated/pandas.Series.dt.ceil,../reference/api/pandas.Series.dt.ceil +generated/pandas.Series.dt.components,../reference/api/pandas.Series.dt.components +generated/pandas.Series.dt.date,../reference/api/pandas.Series.dt.date +generated/pandas.Series.dt.day,../reference/api/pandas.Series.dt.day +generated/pandas.Series.dt.day_name,../reference/api/pandas.Series.dt.day_name +generated/pandas.Series.dt.dayofweek,../reference/api/pandas.Series.dt.dayofweek +generated/pandas.Series.dt.day_of_week,../reference/api/pandas.Series.dt.day_of_week +generated/pandas.Series.dt.dayofyear,../reference/api/pandas.Series.dt.dayofyear +generated/pandas.Series.dt.day_of_year,../reference/api/pandas.Series.dt.day_of_year +generated/pandas.Series.dt.days,../reference/api/pandas.Series.dt.days +generated/pandas.Series.dt.days_in_month,../reference/api/pandas.Series.dt.days_in_month +generated/pandas.Series.dt.daysinmonth,../reference/api/pandas.Series.dt.daysinmonth +generated/pandas.Series.dt.end_time,../reference/api/pandas.Series.dt.end_time +generated/pandas.Series.dt.floor,../reference/api/pandas.Series.dt.floor +generated/pandas.Series.dt.freq,../reference/api/pandas.Series.dt.freq +generated/pandas.Series.dt.hour,../reference/api/pandas.Series.dt.hour +generated/pandas.Series.dt,../reference/api/pandas.Series.dt +generated/pandas.Series.dt.is_leap_year,../reference/api/pandas.Series.dt.is_leap_year +generated/pandas.Series.dt.is_month_end,../reference/api/pandas.Series.dt.is_month_end +generated/pandas.Series.dt.is_month_start,../reference/api/pandas.Series.dt.is_month_start +generated/pandas.Series.dt.is_quarter_end,../reference/api/pandas.Series.dt.is_quarter_end +generated/pandas.Series.dt.is_quarter_start,../reference/api/pandas.Series.dt.is_quarter_start +generated/pandas.Series.dt.is_year_end,../reference/api/pandas.Series.dt.is_year_end +generated/pandas.Series.dt.is_year_start,../reference/api/pandas.Series.dt.is_year_start +generated/pandas.Series.dt.microsecond,../reference/api/pandas.Series.dt.microsecond +generated/pandas.Series.dt.microseconds,../reference/api/pandas.Series.dt.microseconds +generated/pandas.Series.dt.minute,../reference/api/pandas.Series.dt.minute +generated/pandas.Series.dt.month,../reference/api/pandas.Series.dt.month +generated/pandas.Series.dt.month_name,../reference/api/pandas.Series.dt.month_name +generated/pandas.Series.dt.nanosecond,../reference/api/pandas.Series.dt.nanosecond +generated/pandas.Series.dt.nanoseconds,../reference/api/pandas.Series.dt.nanoseconds +generated/pandas.Series.dt.normalize,../reference/api/pandas.Series.dt.normalize +generated/pandas.Series.dt.quarter,../reference/api/pandas.Series.dt.quarter +generated/pandas.Series.dt.qyear,../reference/api/pandas.Series.dt.qyear +generated/pandas.Series.dt.round,../reference/api/pandas.Series.dt.round +generated/pandas.Series.dt.second,../reference/api/pandas.Series.dt.second +generated/pandas.Series.dt.seconds,../reference/api/pandas.Series.dt.seconds +generated/pandas.Series.dt.start_time,../reference/api/pandas.Series.dt.start_time +generated/pandas.Series.dt.strftime,../reference/api/pandas.Series.dt.strftime +generated/pandas.Series.dt.time,../reference/api/pandas.Series.dt.time +generated/pandas.Series.dt.timetz,../reference/api/pandas.Series.dt.timetz +generated/pandas.Series.dt.to_period,../reference/api/pandas.Series.dt.to_period +generated/pandas.Series.dt.to_pydatetime,../reference/api/pandas.Series.dt.to_pydatetime +generated/pandas.Series.dt.to_pytimedelta,../reference/api/pandas.Series.dt.to_pytimedelta +generated/pandas.Series.dt.total_seconds,../reference/api/pandas.Series.dt.total_seconds +generated/pandas.Series.dt.tz_convert,../reference/api/pandas.Series.dt.tz_convert +generated/pandas.Series.dt.tz,../reference/api/pandas.Series.dt.tz +generated/pandas.Series.dt.tz_localize,../reference/api/pandas.Series.dt.tz_localize +generated/pandas.Series.dt.weekday,../reference/api/pandas.Series.dt.weekday +generated/pandas.Series.dt.week,../reference/api/pandas.Series.dt.week +generated/pandas.Series.dt.weekofyear,../reference/api/pandas.Series.dt.weekofyear +generated/pandas.Series.dt.year,../reference/api/pandas.Series.dt.year +generated/pandas.Series.dtype,../reference/api/pandas.Series.dtype +generated/pandas.Series.dtypes,../reference/api/pandas.Series.dtypes +generated/pandas.Series.duplicated,../reference/api/pandas.Series.duplicated +generated/pandas.Series.empty,../reference/api/pandas.Series.empty +generated/pandas.Series.eq,../reference/api/pandas.Series.eq +generated/pandas.Series.equals,../reference/api/pandas.Series.equals +generated/pandas.Series.ewm,../reference/api/pandas.Series.ewm +generated/pandas.Series.expanding,../reference/api/pandas.Series.expanding +generated/pandas.Series.factorize,../reference/api/pandas.Series.factorize +generated/pandas.Series.ffill,../reference/api/pandas.Series.ffill +generated/pandas.Series.fillna,../reference/api/pandas.Series.fillna +generated/pandas.Series.filter,../reference/api/pandas.Series.filter +generated/pandas.Series.first,../reference/api/pandas.Series.first +generated/pandas.Series.first_valid_index,../reference/api/pandas.Series.first_valid_index +generated/pandas.Series.floordiv,../reference/api/pandas.Series.floordiv +generated/pandas.Series.from_array,../reference/api/pandas.Series.from_array +generated/pandas.Series.from_csv,../reference/api/pandas.Series.from_csv +generated/pandas.Series.ge,../reference/api/pandas.Series.ge +generated/pandas.Series.get,../reference/api/pandas.Series.get +generated/pandas.Series.get_value,../reference/api/pandas.Series.get_value +generated/pandas.Series.groupby,../reference/api/pandas.Series.groupby +generated/pandas.Series.gt,../reference/api/pandas.Series.gt +generated/pandas.Series.hasnans,../reference/api/pandas.Series.hasnans +generated/pandas.Series.head,../reference/api/pandas.Series.head +generated/pandas.Series.hist,../reference/api/pandas.Series.hist +generated/pandas.Series,../reference/api/pandas.Series +generated/pandas.Series.iat,../reference/api/pandas.Series.iat +generated/pandas.Series.idxmax,../reference/api/pandas.Series.idxmax +generated/pandas.Series.idxmin,../reference/api/pandas.Series.idxmin +generated/pandas.Series.iloc,../reference/api/pandas.Series.iloc +generated/pandas.Series.imag,../reference/api/pandas.Series.imag +generated/pandas.Series.index,../reference/api/pandas.Series.index +generated/pandas.Series.infer_objects,../reference/api/pandas.Series.infer_objects +generated/pandas.Series.interpolate,../reference/api/pandas.Series.interpolate +generated/pandas.Series.is_copy,../reference/api/pandas.Series.is_copy +generated/pandas.Series.isin,../reference/api/pandas.Series.isin +generated/pandas.Series.is_monotonic_decreasing,../reference/api/pandas.Series.is_monotonic_decreasing +generated/pandas.Series.is_monotonic,../reference/api/pandas.Series.is_monotonic +generated/pandas.Series.is_monotonic_increasing,../reference/api/pandas.Series.is_monotonic_increasing +generated/pandas.Series.isna,../reference/api/pandas.Series.isna +generated/pandas.Series.isnull,../reference/api/pandas.Series.isnull +generated/pandas.Series.is_unique,../reference/api/pandas.Series.is_unique +generated/pandas.Series.item,../reference/api/pandas.Series.item +generated/pandas.Series.items,../reference/api/pandas.Series.items +generated/pandas.Series.__iter__,../reference/api/pandas.Series.__iter__ +generated/pandas.Series.iteritems,../reference/api/pandas.Series.iteritems +generated/pandas.Series.ix,../reference/api/pandas.Series.ix +generated/pandas.Series.keys,../reference/api/pandas.Series.keys +generated/pandas.Series.kurt,../reference/api/pandas.Series.kurt +generated/pandas.Series.kurtosis,../reference/api/pandas.Series.kurtosis +generated/pandas.Series.last,../reference/api/pandas.Series.last +generated/pandas.Series.last_valid_index,../reference/api/pandas.Series.last_valid_index +generated/pandas.Series.le,../reference/api/pandas.Series.le +generated/pandas.Series.loc,../reference/api/pandas.Series.loc +generated/pandas.Series.lt,../reference/api/pandas.Series.lt +generated/pandas.Series.mad,../reference/api/pandas.Series.mad +generated/pandas.Series.map,../reference/api/pandas.Series.map +generated/pandas.Series.mask,../reference/api/pandas.Series.mask +generated/pandas.Series.max,../reference/api/pandas.Series.max +generated/pandas.Series.mean,../reference/api/pandas.Series.mean +generated/pandas.Series.median,../reference/api/pandas.Series.median +generated/pandas.Series.memory_usage,../reference/api/pandas.Series.memory_usage +generated/pandas.Series.min,../reference/api/pandas.Series.min +generated/pandas.Series.mode,../reference/api/pandas.Series.mode +generated/pandas.Series.mod,../reference/api/pandas.Series.mod +generated/pandas.Series.mul,../reference/api/pandas.Series.mul +generated/pandas.Series.multiply,../reference/api/pandas.Series.multiply +generated/pandas.Series.name,../reference/api/pandas.Series.name +generated/pandas.Series.nbytes,../reference/api/pandas.Series.nbytes +generated/pandas.Series.ndim,../reference/api/pandas.Series.ndim +generated/pandas.Series.ne,../reference/api/pandas.Series.ne +generated/pandas.Series.nlargest,../reference/api/pandas.Series.nlargest +generated/pandas.Series.nonzero,../reference/api/pandas.Series.nonzero +generated/pandas.Series.notna,../reference/api/pandas.Series.notna +generated/pandas.Series.notnull,../reference/api/pandas.Series.notnull +generated/pandas.Series.nsmallest,../reference/api/pandas.Series.nsmallest +generated/pandas.Series.nunique,../reference/api/pandas.Series.nunique +generated/pandas.Series.pct_change,../reference/api/pandas.Series.pct_change +generated/pandas.Series.pipe,../reference/api/pandas.Series.pipe +generated/pandas.Series.plot.area,../reference/api/pandas.Series.plot.area +generated/pandas.Series.plot.barh,../reference/api/pandas.Series.plot.barh +generated/pandas.Series.plot.bar,../reference/api/pandas.Series.plot.bar +generated/pandas.Series.plot.box,../reference/api/pandas.Series.plot.box +generated/pandas.Series.plot.density,../reference/api/pandas.Series.plot.density +generated/pandas.Series.plot.hist,../reference/api/pandas.Series.plot.hist +generated/pandas.Series.plot,../reference/api/pandas.Series.plot +generated/pandas.Series.plot.kde,../reference/api/pandas.Series.plot.kde +generated/pandas.Series.plot.line,../reference/api/pandas.Series.plot.line +generated/pandas.Series.plot.pie,../reference/api/pandas.Series.plot.pie +generated/pandas.Series.pop,../reference/api/pandas.Series.pop +generated/pandas.Series.pow,../reference/api/pandas.Series.pow +generated/pandas.Series.prod,../reference/api/pandas.Series.prod +generated/pandas.Series.product,../reference/api/pandas.Series.product +generated/pandas.Series.ptp,../reference/api/pandas.Series.ptp +generated/pandas.Series.quantile,../reference/api/pandas.Series.quantile +generated/pandas.Series.radd,../reference/api/pandas.Series.radd +generated/pandas.Series.rank,../reference/api/pandas.Series.rank +generated/pandas.Series.ravel,../reference/api/pandas.Series.ravel +generated/pandas.Series.rdiv,../reference/api/pandas.Series.rdiv +generated/pandas.Series.rdivmod,../reference/api/pandas.Series.rdivmod +generated/pandas.Series.real,../reference/api/pandas.Series.real +generated/pandas.Series.reindex_axis,../reference/api/pandas.Series.reindex_axis +generated/pandas.Series.reindex,../reference/api/pandas.Series.reindex +generated/pandas.Series.reindex_like,../reference/api/pandas.Series.reindex_like +generated/pandas.Series.rename_axis,../reference/api/pandas.Series.rename_axis +generated/pandas.Series.rename,../reference/api/pandas.Series.rename +generated/pandas.Series.reorder_levels,../reference/api/pandas.Series.reorder_levels +generated/pandas.Series.repeat,../reference/api/pandas.Series.repeat +generated/pandas.Series.replace,../reference/api/pandas.Series.replace +generated/pandas.Series.resample,../reference/api/pandas.Series.resample +generated/pandas.Series.reset_index,../reference/api/pandas.Series.reset_index +generated/pandas.Series.rfloordiv,../reference/api/pandas.Series.rfloordiv +generated/pandas.Series.rmod,../reference/api/pandas.Series.rmod +generated/pandas.Series.rmul,../reference/api/pandas.Series.rmul +generated/pandas.Series.rolling,../reference/api/pandas.Series.rolling +generated/pandas.Series.round,../reference/api/pandas.Series.round +generated/pandas.Series.rpow,../reference/api/pandas.Series.rpow +generated/pandas.Series.rsub,../reference/api/pandas.Series.rsub +generated/pandas.Series.rtruediv,../reference/api/pandas.Series.rtruediv +generated/pandas.Series.sample,../reference/api/pandas.Series.sample +generated/pandas.Series.searchsorted,../reference/api/pandas.Series.searchsorted +generated/pandas.Series.select,../reference/api/pandas.Series.select +generated/pandas.Series.sem,../reference/api/pandas.Series.sem +generated/pandas.Series.set_axis,../reference/api/pandas.Series.set_axis +generated/pandas.Series.set_value,../reference/api/pandas.Series.set_value +generated/pandas.Series.shape,../reference/api/pandas.Series.shape +generated/pandas.Series.shift,../reference/api/pandas.Series.shift +generated/pandas.Series.size,../reference/api/pandas.Series.size +generated/pandas.Series.skew,../reference/api/pandas.Series.skew +generated/pandas.Series.slice_shift,../reference/api/pandas.Series.slice_shift +generated/pandas.Series.sort_index,../reference/api/pandas.Series.sort_index +generated/pandas.Series.sort_values,../reference/api/pandas.Series.sort_values +generated/pandas.Series.sparse.density,../reference/api/pandas.Series.sparse.density +generated/pandas.Series.sparse.fill_value,../reference/api/pandas.Series.sparse.fill_value +generated/pandas.Series.sparse.from_coo,../reference/api/pandas.Series.sparse.from_coo +generated/pandas.Series.sparse.npoints,../reference/api/pandas.Series.sparse.npoints +generated/pandas.Series.sparse.sp_values,../reference/api/pandas.Series.sparse.sp_values +generated/pandas.Series.sparse.to_coo,../reference/api/pandas.Series.sparse.to_coo +generated/pandas.Series.squeeze,../reference/api/pandas.Series.squeeze +generated/pandas.Series.std,../reference/api/pandas.Series.std +generated/pandas.Series.str.capitalize,../reference/api/pandas.Series.str.capitalize +generated/pandas.Series.str.cat,../reference/api/pandas.Series.str.cat +generated/pandas.Series.str.center,../reference/api/pandas.Series.str.center +generated/pandas.Series.str.contains,../reference/api/pandas.Series.str.contains +generated/pandas.Series.str.count,../reference/api/pandas.Series.str.count +generated/pandas.Series.str.decode,../reference/api/pandas.Series.str.decode +generated/pandas.Series.str.encode,../reference/api/pandas.Series.str.encode +generated/pandas.Series.str.endswith,../reference/api/pandas.Series.str.endswith +generated/pandas.Series.str.extractall,../reference/api/pandas.Series.str.extractall +generated/pandas.Series.str.extract,../reference/api/pandas.Series.str.extract +generated/pandas.Series.str.findall,../reference/api/pandas.Series.str.findall +generated/pandas.Series.str.find,../reference/api/pandas.Series.str.find +generated/pandas.Series.str.fullmatch,../reference/api/pandas.Series.str.fullmatch +generated/pandas.Series.str.get_dummies,../reference/api/pandas.Series.str.get_dummies +generated/pandas.Series.str.get,../reference/api/pandas.Series.str.get +generated/pandas.Series.str,../reference/api/pandas.Series.str +generated/pandas.Series.str.index,../reference/api/pandas.Series.str.index +generated/pandas.Series.str.isalnum,../reference/api/pandas.Series.str.isalnum +generated/pandas.Series.str.isalpha,../reference/api/pandas.Series.str.isalpha +generated/pandas.Series.str.isdecimal,../reference/api/pandas.Series.str.isdecimal +generated/pandas.Series.str.isdigit,../reference/api/pandas.Series.str.isdigit +generated/pandas.Series.str.islower,../reference/api/pandas.Series.str.islower +generated/pandas.Series.str.isnumeric,../reference/api/pandas.Series.str.isnumeric +generated/pandas.Series.str.isspace,../reference/api/pandas.Series.str.isspace +generated/pandas.Series.str.istitle,../reference/api/pandas.Series.str.istitle +generated/pandas.Series.str.isupper,../reference/api/pandas.Series.str.isupper +generated/pandas.Series.str.join,../reference/api/pandas.Series.str.join +generated/pandas.Series.str.len,../reference/api/pandas.Series.str.len +generated/pandas.Series.str.ljust,../reference/api/pandas.Series.str.ljust +generated/pandas.Series.str.lower,../reference/api/pandas.Series.str.lower +generated/pandas.Series.str.lstrip,../reference/api/pandas.Series.str.lstrip +generated/pandas.Series.str.match,../reference/api/pandas.Series.str.match +generated/pandas.Series.str.normalize,../reference/api/pandas.Series.str.normalize +generated/pandas.Series.str.pad,../reference/api/pandas.Series.str.pad +generated/pandas.Series.str.partition,../reference/api/pandas.Series.str.partition +generated/pandas.Series.str.repeat,../reference/api/pandas.Series.str.repeat +generated/pandas.Series.str.replace,../reference/api/pandas.Series.str.replace +generated/pandas.Series.str.rfind,../reference/api/pandas.Series.str.rfind +generated/pandas.Series.str.rindex,../reference/api/pandas.Series.str.rindex +generated/pandas.Series.str.rjust,../reference/api/pandas.Series.str.rjust +generated/pandas.Series.str.rpartition,../reference/api/pandas.Series.str.rpartition +generated/pandas.Series.str.rsplit,../reference/api/pandas.Series.str.rsplit +generated/pandas.Series.str.rstrip,../reference/api/pandas.Series.str.rstrip +generated/pandas.Series.str.slice,../reference/api/pandas.Series.str.slice +generated/pandas.Series.str.slice_replace,../reference/api/pandas.Series.str.slice_replace +generated/pandas.Series.str.split,../reference/api/pandas.Series.str.split +generated/pandas.Series.str.startswith,../reference/api/pandas.Series.str.startswith +generated/pandas.Series.str.strip,../reference/api/pandas.Series.str.strip +generated/pandas.Series.str.swapcase,../reference/api/pandas.Series.str.swapcase +generated/pandas.Series.str.title,../reference/api/pandas.Series.str.title +generated/pandas.Series.str.translate,../reference/api/pandas.Series.str.translate +generated/pandas.Series.str.upper,../reference/api/pandas.Series.str.upper +generated/pandas.Series.str.wrap,../reference/api/pandas.Series.str.wrap +generated/pandas.Series.str.zfill,../reference/api/pandas.Series.str.zfill +generated/pandas.Series.sub,../reference/api/pandas.Series.sub +generated/pandas.Series.subtract,../reference/api/pandas.Series.subtract +generated/pandas.Series.sum,../reference/api/pandas.Series.sum +generated/pandas.Series.swapaxes,../reference/api/pandas.Series.swapaxes +generated/pandas.Series.swaplevel,../reference/api/pandas.Series.swaplevel +generated/pandas.Series.tail,../reference/api/pandas.Series.tail +generated/pandas.Series.take,../reference/api/pandas.Series.take +generated/pandas.Series.T,../reference/api/pandas.Series.T +generated/pandas.Series.timetuple,../reference/api/pandas.Series.timetuple +generated/pandas.Series.to_clipboard,../reference/api/pandas.Series.to_clipboard +generated/pandas.Series.to_csv,../reference/api/pandas.Series.to_csv +generated/pandas.Series.to_dict,../reference/api/pandas.Series.to_dict +generated/pandas.Series.to_excel,../reference/api/pandas.Series.to_excel +generated/pandas.Series.to_frame,../reference/api/pandas.Series.to_frame +generated/pandas.Series.to_hdf,../reference/api/pandas.Series.to_hdf +generated/pandas.Series.to_json,../reference/api/pandas.Series.to_json +generated/pandas.Series.to_latex,../reference/api/pandas.Series.to_latex +generated/pandas.Series.to_list,../reference/api/pandas.Series.to_list +generated/pandas.Series.tolist,../reference/api/pandas.Series.tolist +generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy +generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period +generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle +generated/pandas.Series.to_sql,../reference/api/pandas.Series.to_sql +generated/pandas.Series.to_string,../reference/api/pandas.Series.to_string +generated/pandas.Series.to_timestamp,../reference/api/pandas.Series.to_timestamp +generated/pandas.Series.to_xarray,../reference/api/pandas.Series.to_xarray +generated/pandas.Series.transform,../reference/api/pandas.Series.transform +generated/pandas.Series.transpose,../reference/api/pandas.Series.transpose +generated/pandas.Series.truediv,../reference/api/pandas.Series.truediv +generated/pandas.Series.truncate,../reference/api/pandas.Series.truncate +generated/pandas.Series.tshift,../reference/api/pandas.Series.tshift +generated/pandas.Series.tz_convert,../reference/api/pandas.Series.tz_convert +generated/pandas.Series.tz_localize,../reference/api/pandas.Series.tz_localize +generated/pandas.Series.unique,../reference/api/pandas.Series.unique +generated/pandas.Series.unstack,../reference/api/pandas.Series.unstack +generated/pandas.Series.update,../reference/api/pandas.Series.update +generated/pandas.Series.valid,../reference/api/pandas.Series.valid +generated/pandas.Series.value_counts,../reference/api/pandas.Series.value_counts +generated/pandas.Series.values,../reference/api/pandas.Series.values +generated/pandas.Series.var,../reference/api/pandas.Series.var +generated/pandas.Series.view,../reference/api/pandas.Series.view +generated/pandas.Series.where,../reference/api/pandas.Series.where +generated/pandas.Series.xs,../reference/api/pandas.Series.xs +generated/pandas.set_option,../reference/api/pandas.set_option +generated/pandas.SparseDataFrame.to_coo,../reference/api/pandas.SparseDataFrame.to_coo +generated/pandas.SparseSeries.from_coo,../reference/api/pandas.SparseSeries.from_coo +generated/pandas.SparseSeries.to_coo,../reference/api/pandas.SparseSeries.to_coo +generated/pandas.test,../reference/api/pandas.test +generated/pandas.testing.assert_frame_equal,../reference/api/pandas.testing.assert_frame_equal +generated/pandas.testing.assert_index_equal,../reference/api/pandas.testing.assert_index_equal +generated/pandas.testing.assert_series_equal,../reference/api/pandas.testing.assert_series_equal +generated/pandas.Timedelta.asm8,../reference/api/pandas.Timedelta.asm8 +generated/pandas.Timedelta.ceil,../reference/api/pandas.Timedelta.ceil +generated/pandas.Timedelta.components,../reference/api/pandas.Timedelta.components +generated/pandas.Timedelta.days,../reference/api/pandas.Timedelta.days +generated/pandas.Timedelta.delta,../reference/api/pandas.Timedelta.delta +generated/pandas.Timedelta.floor,../reference/api/pandas.Timedelta.floor +generated/pandas.Timedelta.freq,../reference/api/pandas.Timedelta.freq +generated/pandas.Timedelta,../reference/api/pandas.Timedelta +generated/pandas.TimedeltaIndex.ceil,../reference/api/pandas.TimedeltaIndex.ceil +generated/pandas.TimedeltaIndex.components,../reference/api/pandas.TimedeltaIndex.components +generated/pandas.TimedeltaIndex.days,../reference/api/pandas.TimedeltaIndex.days +generated/pandas.TimedeltaIndex.floor,../reference/api/pandas.TimedeltaIndex.floor +generated/pandas.TimedeltaIndex,../reference/api/pandas.TimedeltaIndex +generated/pandas.TimedeltaIndex.inferred_freq,../reference/api/pandas.TimedeltaIndex.inferred_freq +generated/pandas.TimedeltaIndex.microseconds,../reference/api/pandas.TimedeltaIndex.microseconds +generated/pandas.TimedeltaIndex.nanoseconds,../reference/api/pandas.TimedeltaIndex.nanoseconds +generated/pandas.TimedeltaIndex.round,../reference/api/pandas.TimedeltaIndex.round +generated/pandas.TimedeltaIndex.seconds,../reference/api/pandas.TimedeltaIndex.seconds +generated/pandas.TimedeltaIndex.to_frame,../reference/api/pandas.TimedeltaIndex.to_frame +generated/pandas.TimedeltaIndex.to_pytimedelta,../reference/api/pandas.TimedeltaIndex.to_pytimedelta +generated/pandas.TimedeltaIndex.to_series,../reference/api/pandas.TimedeltaIndex.to_series +generated/pandas.Timedelta.isoformat,../reference/api/pandas.Timedelta.isoformat +generated/pandas.Timedelta.is_populated,../reference/api/pandas.Timedelta.is_populated +generated/pandas.Timedelta.max,../reference/api/pandas.Timedelta.max +generated/pandas.Timedelta.microseconds,../reference/api/pandas.Timedelta.microseconds +generated/pandas.Timedelta.min,../reference/api/pandas.Timedelta.min +generated/pandas.Timedelta.nanoseconds,../reference/api/pandas.Timedelta.nanoseconds +generated/pandas.timedelta_range,../reference/api/pandas.timedelta_range +generated/pandas.Timedelta.resolution,../reference/api/pandas.Timedelta.resolution +generated/pandas.Timedelta.round,../reference/api/pandas.Timedelta.round +generated/pandas.Timedelta.seconds,../reference/api/pandas.Timedelta.seconds +generated/pandas.Timedelta.to_pytimedelta,../reference/api/pandas.Timedelta.to_pytimedelta +generated/pandas.Timedelta.total_seconds,../reference/api/pandas.Timedelta.total_seconds +generated/pandas.Timedelta.to_timedelta64,../reference/api/pandas.Timedelta.to_timedelta64 +generated/pandas.Timedelta.value,../reference/api/pandas.Timedelta.value +generated/pandas.Timedelta.view,../reference/api/pandas.Timedelta.view +generated/pandas.Timestamp.asm8,../reference/api/pandas.Timestamp.asm8 +generated/pandas.Timestamp.astimezone,../reference/api/pandas.Timestamp.astimezone +generated/pandas.Timestamp.ceil,../reference/api/pandas.Timestamp.ceil +generated/pandas.Timestamp.combine,../reference/api/pandas.Timestamp.combine +generated/pandas.Timestamp.ctime,../reference/api/pandas.Timestamp.ctime +generated/pandas.Timestamp.date,../reference/api/pandas.Timestamp.date +generated/pandas.Timestamp.day,../reference/api/pandas.Timestamp.day +generated/pandas.Timestamp.day_name,../reference/api/pandas.Timestamp.day_name +generated/pandas.Timestamp.dayofweek,../reference/api/pandas.Timestamp.dayofweek +generated/pandas.Timestamp.day_of_week,../reference/api/pandas.Timestamp.day_of_week +generated/pandas.Timestamp.dayofyear,../reference/api/pandas.Timestamp.dayofyear +generated/pandas.Timestamp.day_of_year,../reference/api/pandas.Timestamp.day_of_year +generated/pandas.Timestamp.days_in_month,../reference/api/pandas.Timestamp.days_in_month +generated/pandas.Timestamp.daysinmonth,../reference/api/pandas.Timestamp.daysinmonth +generated/pandas.Timestamp.dst,../reference/api/pandas.Timestamp.dst +generated/pandas.Timestamp.floor,../reference/api/pandas.Timestamp.floor +generated/pandas.Timestamp.fold,../reference/api/pandas.Timestamp.fold +generated/pandas.Timestamp.freq,../reference/api/pandas.Timestamp.freq +generated/pandas.Timestamp.freqstr,../reference/api/pandas.Timestamp.freqstr +generated/pandas.Timestamp.fromisoformat,../reference/api/pandas.Timestamp.fromisoformat +generated/pandas.Timestamp.fromordinal,../reference/api/pandas.Timestamp.fromordinal +generated/pandas.Timestamp.fromtimestamp,../reference/api/pandas.Timestamp.fromtimestamp +generated/pandas.Timestamp.hour,../reference/api/pandas.Timestamp.hour +generated/pandas.Timestamp,../reference/api/pandas.Timestamp +generated/pandas.Timestamp.is_leap_year,../reference/api/pandas.Timestamp.is_leap_year +generated/pandas.Timestamp.is_month_end,../reference/api/pandas.Timestamp.is_month_end +generated/pandas.Timestamp.is_month_start,../reference/api/pandas.Timestamp.is_month_start +generated/pandas.Timestamp.isocalendar,../reference/api/pandas.Timestamp.isocalendar +generated/pandas.Timestamp.isoformat,../reference/api/pandas.Timestamp.isoformat +generated/pandas.Timestamp.isoweekday,../reference/api/pandas.Timestamp.isoweekday +generated/pandas.Timestamp.is_quarter_end,../reference/api/pandas.Timestamp.is_quarter_end +generated/pandas.Timestamp.is_quarter_start,../reference/api/pandas.Timestamp.is_quarter_start +generated/pandas.Timestamp.is_year_end,../reference/api/pandas.Timestamp.is_year_end +generated/pandas.Timestamp.is_year_start,../reference/api/pandas.Timestamp.is_year_start +generated/pandas.Timestamp.max,../reference/api/pandas.Timestamp.max +generated/pandas.Timestamp.microsecond,../reference/api/pandas.Timestamp.microsecond +generated/pandas.Timestamp.min,../reference/api/pandas.Timestamp.min +generated/pandas.Timestamp.minute,../reference/api/pandas.Timestamp.minute +generated/pandas.Timestamp.month,../reference/api/pandas.Timestamp.month +generated/pandas.Timestamp.month_name,../reference/api/pandas.Timestamp.month_name +generated/pandas.Timestamp.nanosecond,../reference/api/pandas.Timestamp.nanosecond +generated/pandas.Timestamp.normalize,../reference/api/pandas.Timestamp.normalize +generated/pandas.Timestamp.now,../reference/api/pandas.Timestamp.now +generated/pandas.Timestamp.quarter,../reference/api/pandas.Timestamp.quarter +generated/pandas.Timestamp.replace,../reference/api/pandas.Timestamp.replace +generated/pandas.Timestamp.resolution,../reference/api/pandas.Timestamp.resolution +generated/pandas.Timestamp.round,../reference/api/pandas.Timestamp.round +generated/pandas.Timestamp.second,../reference/api/pandas.Timestamp.second +generated/pandas.Timestamp.strftime,../reference/api/pandas.Timestamp.strftime +generated/pandas.Timestamp.strptime,../reference/api/pandas.Timestamp.strptime +generated/pandas.Timestamp.time,../reference/api/pandas.Timestamp.time +generated/pandas.Timestamp.timestamp,../reference/api/pandas.Timestamp.timestamp +generated/pandas.Timestamp.timetuple,../reference/api/pandas.Timestamp.timetuple +generated/pandas.Timestamp.timetz,../reference/api/pandas.Timestamp.timetz +generated/pandas.Timestamp.to_datetime64,../reference/api/pandas.Timestamp.to_datetime64 +generated/pandas.Timestamp.today,../reference/api/pandas.Timestamp.today +generated/pandas.Timestamp.to_julian_date,../reference/api/pandas.Timestamp.to_julian_date +generated/pandas.Timestamp.toordinal,../reference/api/pandas.Timestamp.toordinal +generated/pandas.Timestamp.to_period,../reference/api/pandas.Timestamp.to_period +generated/pandas.Timestamp.to_pydatetime,../reference/api/pandas.Timestamp.to_pydatetime +generated/pandas.Timestamp.tz_convert,../reference/api/pandas.Timestamp.tz_convert +generated/pandas.Timestamp.tz,../reference/api/pandas.Timestamp.tz +generated/pandas.Timestamp.tzinfo,../reference/api/pandas.Timestamp.tzinfo +generated/pandas.Timestamp.tz_localize,../reference/api/pandas.Timestamp.tz_localize +generated/pandas.Timestamp.tzname,../reference/api/pandas.Timestamp.tzname +generated/pandas.Timestamp.utcfromtimestamp,../reference/api/pandas.Timestamp.utcfromtimestamp +generated/pandas.Timestamp.utcnow,../reference/api/pandas.Timestamp.utcnow +generated/pandas.Timestamp.utcoffset,../reference/api/pandas.Timestamp.utcoffset +generated/pandas.Timestamp.utctimetuple,../reference/api/pandas.Timestamp.utctimetuple +generated/pandas.Timestamp.value,../reference/api/pandas.Timestamp.value +generated/pandas.Timestamp.weekday,../reference/api/pandas.Timestamp.weekday +generated/pandas.Timestamp.weekday_name,../reference/api/pandas.Timestamp.weekday_name +generated/pandas.Timestamp.week,../reference/api/pandas.Timestamp.week +generated/pandas.Timestamp.weekofyear,../reference/api/pandas.Timestamp.weekofyear +generated/pandas.Timestamp.year,../reference/api/pandas.Timestamp.year +generated/pandas.to_datetime,../reference/api/pandas.to_datetime +generated/pandas.to_numeric,../reference/api/pandas.to_numeric +generated/pandas.to_timedelta,../reference/api/pandas.to_timedelta +generated/pandas.tseries.frequencies.to_offset,../reference/api/pandas.tseries.frequencies.to_offset +generated/pandas.unique,../reference/api/pandas.unique +generated/pandas.util.hash_array,../reference/api/pandas.util.hash_array +generated/pandas.util.hash_pandas_object,../reference/api/pandas.util.hash_pandas_object +generated/pandas.wide_to_long,../reference/api/pandas.wide_to_long + +# Cached searches +reference/api/pandas.DataFrame.from_csv,pandas.read_csv diff --git a/doc/source/_static/ci.png b/doc/source/_static/ci.png new file mode 100644 index 0000000000000000000000000000000000000000..4754dc2945db530fafc0406346383dbbcf2bbdb1 GIT binary patch literal 555245 zcmeFZby$>L+c!!mh@_%)N-7Rt?tSe2efJ#4T(Nqc*Sgj^*Y7-kSCp={3h7O{n>aW)q-v^)`Zzd* zoH#i66gLPkAvQyJm?MdslY)Y-nt}q0u9wGiCs%tM9Mz~a6GBsi0jgZ{JE?2m5mXvd}KK~|=O?dF}Ljb2{j(URk^2k&bGhz$tv< zA~SI_(P%>$8f_66zfauj^AUk6vl%)%*7SwKQEG ztM9E-B^w-3mB$R9DZOA6JT}aFx`KMId03mm&u>RsOTWs5GCB-HF6$#MTNgjDR{wB$ zJo!XXSWVV%2W^&x=DQ9%k!Ke3)O|Z1c})pb8_|5iseE7X?uz>hW6kQqX$m04aheYy zPRWh+TLbDrlNnsLU*WhRk(QT>iQJKKtYnm%eTr3QW^;vtWZyiS^9>Z=eqfIzWqHPu z_?ENMRZNYGvpl!}I;StDQ6kruMs{g*8OSoC@x0;^a^pI~UAa;{UQPA48F-(JlDIht zY)B(>>;t0o9%SqFJn{XJA7_}G9|^g+a_#jaY14s(yY?DiZyTQ5U$5&9{-pKf7MJqd z3+kIG_fnzV+Q%*U$6qaKzvBt<1SG1k&{N7}5CbS~SN>oO;-}!^s`Qsty%|oK>hdC* zT~6CR!TiR_*Smqlkur8lQI(DU>a2?&)I+7s^=ZQPrUp6xOhK{$4P2b88dEub-Bk)0DR1jQ&-^AjbD$adkqSR zXc(-b>}Sg0qccq+%za{kQ}|dG!N+XM!F2K-FZXfW7V~ne=aE$h%#(Wq-5IKTrS&x| z%P~z^b}Oks!cmQcBZ7i0(poWS@do*4({3R=`}zgHz(J?nzG}tT@a-}F z1w{Drz^Qz%pWiN3I+j7P>FPyEM0ms|MNHu|WJZP7Wm>m$PC-Nt?f^6ik?l}HI!5t* zCbM$*L^&Ibm%cUot`)KqaP`I|8pp%2-TMA)-BKoM$2xe%gzoEZ7zJ6jR zVZxoEU;fB{FDki0UoXBku{+Fz5I3A{Ln6VdrM0CMFW-NL8{M_sOWvVCft>beYX#%~ z1XhE==uvUYH6fhyfUm2NGcAX{YmdztUwLauw>GF&e7#GJhe#iN!!<;afu~_bc=`hW z*=O==uXQPiY~*jN;;&k#z4gq~=(ZDN{DE7N&%k@V>hsguTdufLpWSPjMPGgWdF>HC zf7th*k}br3I$ZulfuE&D-?7{vDqwy2jY3GF`j*N!QtNxTs@%8Z*jb!ZIg`U9Z%GI< zX2ztleNSrae_l*5_fC|REAdoS%8NcA@+GTBq7HY2zMkdewMKcBLe12-$^~j$k3T)Ti^y=$_$$O`1la} zFhl))lHjgTb!g|#<|K0^uy{w8vE0>}z)0Itb4go9yHhJfqiD?4QNH2jQ&u;jj}gAf z=daG?!)tz3caQya`7vHTUOv-Oc&wgM?euYAoB?E3ZB`KQ_%!)>GnLtQ<(Q8>Q&|;8 zB~!&d23026HT#u}RhIdkszGJv0sEVh?2Pm|x{O`J(m8yRTs}+_LTwy5TAO2==V*5ryQ%cjT?>qst2YuT?(MiyYx<^D z&ui6HHMKw2X>gd|oC7t3_g+c*OWt6tWZ;)5jDHq?uMZl(&vt_?SUFO~Frj5z(Z@#O zuEdn2%*$!Ncwhb(?w+9!$DQrq{q}4l@ra$3?c~F{sr8zwc$Z?AD3|(OdT~PWT=TSr zhkjW>8CSd)$`>|7c6)wvyd(m9wL2>n27v7u@mtWkrtUH zJ)GsoJzlPAv1+bGk>ID7$2XoR8h)#b%x0h@OECDl9d#D};a0$dXu33-MzI~e289OE zwg6E{Nw2f$u?w4O+iyaopIF{gw%Fvm~)(xi~T?| zIQ`Ct2KH_DI?FogRt_Z*4z?1`^$=&(os-FUA-29BG~Pl(+*+yA8QSVPYLNHKQerRn zDnI4W=F*nXiucaNXA2)7{ELoZs|lK;A3o=JvZN$tX0?i(iip2AAKD*UvHSRa=hvrz zssI6)JR%Lj{~B!7`qQilcz&|CDS?vU@oIbX`pZr0=19J$Jb^j%N$o-xAIcq#rFDVhjjy8>k{K|R@_{~ZGymctd0sx$|-T{^m1;u-P#%@g1ti;wS0Q0R^Au#=`@$R%3Kkjrf0b+ zFAO$^%<(TTe`2%ltDrl~%F5}LN}nujJejYtq;2OvB%50@-85_tNI>PDbXtLe4`V_y zuiW<%<{@6M0`d<`b^$Yt#mu}_Dm!J<7os=lCFv&U7P9YUKg~jBku!P8HXv(Zzbe@R zRo*j{Fy#0(piPfIEw3i8E^&EjOS@-|dR?q#q|9jdV}!5Hh~96 z0gzKI>I`Z+{y@uW52&lmW%i~A;X=p!jz9pwxns|}wKc@>YK-T#iM9o4Yv2am=6uht z(9D7dJEX-&Wc7I;AL`>~^Qi~8%iv|%;lW+!BAafJMu6{vO_Xnnj~Nr z&V4+#)Ys7`kk_9ZeDL8S^}JyvbFMyX(|v^8@2!tzsL;96rpCbcu7hzoOb0Fo52ips z0_5DhNNL$=&~n9d21 zMN;GV99_wD(T6TAUb8who(+``)tas99m7#Q!7XpU8r6;BU0Bi@PBSGbtJEH3?jTr1;p7{&Cgx*^e;g&o~*HzBJX+l(hA53JVAe^I=l( zc?Y<^wD#w7_h$Q3$iK=_wD-34a`JrXUy?fY-{^R$jo%a4tf7j&h z{a3dz9TdRE2ng{D3j9aen5@#+P)S`Ue|uLmMJG3m%rJGxia!#S{+<56#Qa_3zvVRj zJEzbi5y5}Y`fpMHV^$+?doKkKH%y%`W&eI&e`Wsn$iFg53t+qcZ=(1k=ii|iLCfBh z7Wj{cCVO+?(FYyOb7XK*)G@@Iv3p|74>uBX;Qn*QTwh~JVE*Adj)U_UM@>=Q&>wfN zlc>YP#E7%y(3XrslP1i15dZd_8@^AfUa=^|+`daFjH8ym4w}cs<0vvtb2FP>(1t;b^vbi_;xoX?@?U@hn=zTQwUr9FnC^r4;K zleeNw7c_$oJr`V{RCA-!U=Fg|BxbO55*Eh9hiGoTr_QA?!(GrcbLfd$qf(kv7dsgh z4K`afR3)kc+E;IzrDFuFL>VHt-+EI`1ghP+W7>c4=B61m{k8!#-Jwr3uM%a9{0Ltl z-sCNXCUbMwCN2@(t=kdcDBTeliFcAU!bF-wo2iG*GI|h54VK_;MNc^X(k=ZajPX`f0YS^XrKLuJmw6Q#TQ0RC< z8X=#iZDa1KXfTymDTnQnXLqEJd{umNiiaRc<;yz@VQg90epbK@pzny4tBIcl+|lal z7AzlE`&}I-xNJvfikYA06TjosvG4B^0}1L8CUMv_r(-M6%ifT*NtX86wl zrZYBeA~7YImAh>jX%23|=w`p7rQx)tOCjY+wwg=UseWhm=;tx>gW$tDR)dFVXCxoW zCF4`CqRdLeq+nID-rVFgyb4-2(kQF*reWaGbH5#K2BjzKIj3L!UMU~*T)fb(8!=2Doq*% z%gJeE^8wfgJ=C}%*n%E3Y3;RivYZWOL{6mMb^mU`Pmxz|UeDq1T9nhP-e2S%Imb=( zgQ69UI&MH3F!x~~=p?hZRC-~u?_ArM#$4_G!-r@aW0D!nTVVhdH@^Ff&InlYST$gZ zj>mpdWC9qgo9EfC^un=(?V%A+L7xFTkg&r6Wc;RJP(2_`xsu1*rc_LS$SBw3eY~Bj zb0tcpWcdiz_V_YRSE92>&?`cLvJk}wTACqB9*wvj>(FyK&ZF@>l2UK;l3{qD{b8k z(u-Zc+WYmCS!mprzWX74P|m?RFJ5##S=cXcF*(rb0XntjDLQ0$5}r*x63y|kqWA|R zXEU#ilQUAU%^3-mFxd5YCe8D{p%jxNW3le6xB{xOT?w7p)cgnzIIcDw&r%;;wD$Tw zo>Dr^Tv9a+$P;NqZS>UJsJT42@BMCoPeUi`DG*Z$@V~8+m>CEGGDks)@7gyZ9ZmFbGmmanrQZ#)&}i-Pn*$}q8w(j z3&OkxkT&#LW#kjH3&^Em;efomsqK$<5dXq-nM%LiEjv7BJJ4och7)_`2H;1u)_cRU z*{7!<0nr;;cN-;B;K5%SggprDKu~H2y4yy(0?(Bkh`qkj*~&U2t$+3Gc|JEhoF2or z=-IBouqb=w^zTC2&`u93QD@SIm_edp&|M~wCFJd!`p0t!v28V-Ro?q>6fLbl=tI#Wx!8pDI{f1STu z4{OG+kmCs+wU|-p9Ot};$UxK_3aEdr`=Zh_BmHeW`S#QjyYUb0S%MN&8gR&X2rTRJ z(pNQ`Gg_zeZarm_JT|i->LGa}%3ipiL5;Gc?A^EMIiiL9$-KL=&L}4)XVidQqC_&M zx(<2Nf8>}%Oh`{6QU+MO*|k!0nQrqu=W-3#2O3|N!^#56VO%Nl8>(5{GBW?Fy81}( zROeLpREIPCN0O*sV4!MsMoulMI@0V{(MH}z!A71YK_;71;n`WDsgRU4eJZ^Ei^`DP zkiw8$CW)}hxZ?IU>n9uEFJjzKhqnn?z^r#!Kw(ndL5d;pkr0hk^`&T%6Q>Ev1vMsG znvhjs|CxbzQ3e;7VT74*a^*I--WbKm8Bd;?$N*t}1X`56 zT!U+ca$#m804T!f-w&sZUQ;X0Q-P?;O;8}}DRSwPKxzQr)Tx6Eka=bE@GZz||5Tro zZFRk(V@mqHmbT_$&q1en)KuJXG{gX$@tN-;i3gn#>@v}ekrn3EG6I%&@ET`eClq@d z-r@K*boJ8H%jvJp)C@7(RoHpd(Pk1mCyJ83*SMxd_JTLC3Z)-nIZPr^iCQ_RPcCjl ztJ+}Kz><4)rYbv|dAS)qd%x$|@4FF)(mv>qR|3Mdrm{P*qFgylS=cU)iJCsRb#7Fn zuM*h#Sj#$7>zS@y!yd`fB(8Yztz=ei+^}pd%kW#b?NWAL(R+FNJ@m)FCZ63!z{cLs zIfW~}_Vd8`h~S31m=CG4YNA#pzI%W7($r5dm#eV#%Ko42zbe%Q)Rb zr8V6*i0agQK9fqcw9|F`)1ZGO^v@XjfBY?j)29eNndcVK9h7`=9!oVtiu-ujdZ;kf zXJwC;uZAA)RfFR{asSBWf8#OnavNwnzM?`v4^z^p`z2-a%h<)Al#)#KK#FW^f$T5p@sDnm9 zF0es3U*C?;d}{M!!1aucYgNvBt3^8_a=q~SPp#=)nm}8pu&sv2?LS`Kz3rNoEiH;3 z)68<+CO^E*=%c;)%M1NzUKroFe>U945HScG!t@QhwH?RTMhEaOZSJBJ)(Z~MAKoC2 z1RZ&FXgVR$7-j2^CK5M!3Pd6dE&%OcNB0XuQTu)ttv>t^xm7;@Ok5tjJv}+}%%R-Q zg%0$p8IH7-0shvprm>3F`KCgkMfK)k_Qh#)hfpTdu9i?@-Z{0X;P!>*SqwMo_2Uvf zc|iq&0k`Pan$Jc*D_W=)g;_}Xr&-+%qt!kB;(r?Oj|Bdaz#j=LrEOPPOy!ot_0>E!E&CYk zrh|FqCuiNEn!}&k04iInnkJoss>0AvJ4!{Zp2PO_`#P15P0|joT1|uK+TE_iZ+0gZ zHw#NQ07G(DGa61cBrYGXoRP=D+q>mhNGyQZ&Gb=~bDsDbwW8J^a3-BPwuTBI@!wUf8pV`#$Jj(D7Z>CEzZ~`~tGR^2u?3HzGK!yyD6O z8`j=px`Wbi`V{1*lVqIuOl1cZq> zoY#UHJbw!cE!babH<2@LjS?=^|BNof6@v}e#;6n)5l2X~UZ#0V@ahtp-Ea|@^Y;(X z%F%gF4YgxpP9(gK`Y7hxCaoc-N$|bd|I}+s;t+sqDQ@DPU$TCgQV* zuDB<0RqOABp<1-?_ zP(eSi=MbPEpGPP_AN&IgUpAu1LXjX0IHr;(Q27Tih9WSa7GlqABoB)CeF&%pqV-20 zP=w_OSS7_DTwifYtYYITn#A+vaL(2 zzJ0EzGxuywyvc>y;$LL2%c-}7Y7y7wPGr`cTrp<|<`5?KA+>k*zpp>{A(<;p=IK~@DaSAx* zL1R%)Bjgx~A#(jn3tW^`j@@qW!d8FEpr^6hIH8E;rW z!6~|O%EM2XrkH^kV#^NiyUNE zI@4NlJIb&?!UYj3o#F$WeS=UyQ%P2(2<2IR@auG zzI+S6;zM;$=pTr`0#5nhjXeG*D~f0JBBZja`&kbgyZ=Dxa+a(?P9FQG0Az6c36Dt= zdFU_X{?bgRMi*`xP&f!4GUA-jwNOO#5yW|6uRXoRwKSv^z&dGCC`8f%Dw+wo^x1#N z>_0`pL{1SLJt_WjliUd0l}2$$d-90BLuk5{vz2FVwi?Zw@L2XstsM`AZ z3UZmoJn2mZkjA+O489j>qNr>@O5=Sk^Y!jw^&0!z6?AtaaCG@{KwLeL`9<@gyIk;I zLPL?0h(4w4xdwi0fzn9q{;K!S>=QUo=sI|V`Qv$ZIcgGvq+@W*X~0z8<)gH`Lb8oh zz#rLE{WfOo(qe`p62*pi>^RZ=N+%81UKxo}5f=+@^26yDUmTS_k#CkJ!_klfhE#y5f_A%H618+eqa=6`f8=NnpdH_p;AJi@ZGK zQe-8=68kRrJ#vrpjx9%u#kulSH$mQ(_O37XdAs$qH%ImCh=(nI$YhxT2|Mq@AT_~h zYv{8p>5nq!2MIIK=JD@ElEM!@)08i$h`dbu)bkjdOU38TT$netcf#|n2evMjH(r&9 zCG2q5UEk#2U>X`V>YZ+*z|nAHn)+AS1y!*heOPhc&r4Sp^;3Ol&j=D#FrhWyeS*W= z7+|Q9#P`Tg=)y6gM!0GEEsm-4X?grTbI${tgE10JLq8JZ)=qBdF)B6vT`fF*OA_eGEE=}%i-+7`#(hoE^kq8QnUk+rCLA+LFdS6T7=&c)Nw;J~(lr5QT z?mlUIlxrb+HcVmlG$^gZXHLd$667*fe?qFYoX9?GO#z(gtvNTByBzo3v)lknM7cP$ zf?%7~Y6tpwYPWK{lJ|s#=fvh80>Gunc5-y|HA$qbj$1;J)__05-sPpL?T<(v*^s)E z<6q49y1ri=Dg+z4xfI7{o;mY^x>>po)UxFM|>z&5wLVlEj9mw zywCxKXh@+`Hpd$GoWY2n0QbJdc+|!knH>74cP+8jFnG3kF7#?2Jhgm&+)VV|{E_!% zi88|ntCj8Sq)B|+p9ARuKMG73ThZ`trFihE5t^WcjFOpN@_FBoG|DzUNHb{oLONZ$ z0RXayeKrP%rZ^NTDzu>P$Fb^kem}%4iwgjY3yhl)JM&!VN*6j+nqnasG+AR z2Fe!r5|h9^&vn1{7AM|=G|^t4yMJ!vcTn773ow=6mw9i}Ls{E{6{9g5jgHDdNu zKYoPJCOP*D4y$XQ!mWvt{$MC+ERnPG#(j$s^>atOS=&8K+jxO zw)w7VOE|_esfFO)V*f7avY%rh+X`ib;z1|ln(c;k7nx4M)4gJ=S@^o1_ZvV*%E;L? z-rK8nI|3%H_k?2t_K=trVPe!AO0{Wl{7_BexPS0$&KTu_0W(qp>)MnX=6NS;N&?^L zIV05=2c1m&S#LR`rVvFWR_l*Gf7u_12j7j z)BxBm-4cl#!1?9;9?%*WOc`}?4?5V=9GZrEkrvKWZ09fBk7D0@BtIP#+@ToNa<3H( z7QGMbOwJo3OTQ$)nm%+y!;jIudBn{vq24};CxViMWcE?KunH&#@M>Wn)OD70ZqN>F z$_Y+SA@KX@aS9{ccu95OEt>pIq&2)?Wx^pvPHLD{_?pDVL$41q^~GpWRY60{qj-oe zeH(rkPvTbpPgxZ<9qAM)8fPTND-kNrPj7B+T~h6}qRyb!h;j_jQX`4@;|LdPU$jGR z{|5MgD9z@fiutGC4t-ZT0L`YbFO`*lkT@3}d>o!@x6iqkHo4)nF%DjY3Rpu*BdXWl zb|eH_?hedDMq?#&vD=flQtt2yaW#)V^#i$*OP(w&jK~~G2m{V9;32lc+Rcdqv2YPE z8rZPg6&NyMDbIOS72jaq5)UrmaO zco)MlL$W+6&9KVsb5%5dD7`MX>*KWm0yKLpxkeg%nTfWYv`)XKR9MRU6t!-TE(|%| z1vsLAeFM0uKsFrr18%G?R4D5n+oPDHN90PEJlDY|I?(0~Ky_-9p?O+HR@H zGvKy5=bY{xCLY(a1chhW#e8a|gDiW6TIyeHcBwyi4aL%bns$yfBML zN`6`2Qc7Zd2IfpkZt#7Bdxrr3f)zS<2;>ZL&)&D^0)IG93}DMR)saQ<8VVYa3o~q8 zGN|Cc5RJHt5f){^2H>My4uJ*LSvjWt={tB3;kCqfHzurnXx*)DhQBt`Rq`^x`rJ)p zyi!qNhe7!&C?-$K z(p2upK4sYbgZfbtuLy>q7BxJ`Wv|BTx4jn~1>|6hD24{$h+Lac6d^bJ8#^(#rio`E zl~km`2`)eJPS~m=-u0BgTc&UoRVbE87nIbuNT?H7&~2|C)j9cQ^n#Cm%mfL)@=Qvs zJ$*O<{Ji4{}-bu@8?{@;kUk(F3MRN;@iH-usAjE zA3r7l&^z-qsr!m^f)gMsjvQZNaWMq31?-~=%ETAW@v*J3S1dF|IQ~I)tZP2o{KGIr z`-4wB;X0`bj9te7zv;uW-PoB}mzS+SEd_}Hd)-dDxH=TW=vH#zy4LNyky~ZgXx+R> z4Jb30TCHv;%!W_ozmC?48c!=qKHZp0;w2jj^Xq2M`Ns5tfYg+?*$|Nt!wM_r_ovoO z0~SAE@5{&iUTRJy!XO*bzqfJS05iXWQg}4U!U>!)-hdcnx<*1)DlkONrn}uLncs$7 z8yhi&sDi3^e>PF#t@cBV`_u(qa5ku_(b1F5njJ2#=t!3sQ}MN`K}2r>NuSlY{qzz#mFOU?F_sIWs}5 zN}>Afi5CgorAjL**JpcV+{dH8mt7cl;FQxQQ>CIHH#wBrirYmF}gjim|^%B8mDIAt@HXu zCu;v>q&2ehHS22RtvoHtHzE~MGa7s0^gvkkDy=ah#0)5fMDU5uGV(SvcT%|FUmr)Rz zRPx8thQ&3$)0);KV%)$Xa%-)x1Ms5Sw|UvvYy^KB+1k)dTUUo7PLQYz^s~?tiH6aZ zz~fJcy8<0Tb5c=}_}sE_%NeOSXTIzqv3U0A#KmB}Fb-Mq5Rqioh3zZo^#3(t9%+Vai;L7Z0uSeJYTwzvnNs}XsIwj-B5c7ycuG~QYJkaAXKLGFV% zCgJJ&VUu%u*2=Iq0zSzT(ZzH*uUaDO#>sS-R)VaE;w&$#M}vq2y!uoYe^(3Re~hex zCzMdmWPZF2@OUo#D*lS&qxfsA+JW&DhMlFcelb@2D7SdX$3c+Tf=r&ye*DL^v^E$% zNMT)7j5SVX;Wwzes?&V_HYecJNrZJCHiuGwF9VU>YEti~-Tx6Md$#0Qo6XZm#*Oizi5MAS}fQtU~ zzS6Q|yWeDx&~I4@J%D2OP#!eq3_4>uAb6446r7mgE4j+gk0u)YDuLP^hh(hrTyV6e zhDXUqJW5*IA4@zqQ)=DPJlVV0=4sq41q%^<(YI^VA`#wW?{Zfr&qK>DE8)Kaj&%giu-|zhyF);|il~$VDTb3D8zd90|Ic_jHrB4YJ1XAbFPHcq7HHqAJZxuQj;>*lbVQTu$=>WzN+sD{k zR7y$veo+Cs1{(o27UZ|bHI{r~%=~!~8FKEM{`vI+A*C7XRTfK&z+GJYGJhJ6OGJpP z5~bWNL4GX%q&Neq_#kv$E*+Ew4Bgal(MWp=ROnQfcw+f_PrE$P2*@qu#r79@1VF6M zw}bH}W=5=~9q?8DcGq1^Sdi5^dWAbDqszJr3%_kJ!YnOS8mRvl8{mJ6j#3Fu%`ktC z44E!*m?&#cN4@#U6Y~70_q)ZFQ^-#_AzC2w*CJE7-o4W6gY+rq@9nsb+Lrbp^UfhO zXG49Y$rca^m}F91k8<3E7D9o^d)8U*WbhKv{9vFjgDyaWGK8$?lKKOzO=!rgF+|Sz zr0A_|Ap)H5K zQ=zDi%uRXw4MT8|3qhG?EA&7AkvU87HDA&m!$V0!56a1*Pi}3#rFkL(!-T5qeBygl zSb;I=gRy4l7OQV<6zT~x$&L*D;X5Hz3}c4B=`I+qjD?_$Wn{_oG4Vt~qqdkoq?SJn zlzLO!C%2*>evOHtKt+GKwq33>)e1RF9ssVzO4GpCbY26 zt-+l@s2Ae_SBW~w;^YH;?Qet2)q;brAo-KIgHiOU9Z2q=f?ohOLBET!_672sBL%k$ zz4nC{XOO!yfMxzN^f>~W>)W#_P-ZH*(j6^OMrmV}V#u#ubJ*Jh&PT_o4N&(H%?OD3 z=q`9mZqq?p-%x1^R=U*ZAeS9Vi3-0mwOwvsk$axJJA(7b4wXGBxfCf!FxNpb3kX)V zy@TSf!q9*u6i4K+jtki7G+S8r&#>X*BQoj*aR1@dh@wyhSgOp&tHat!?C`_ryd*JU zQ^ygvF?5=l>y*8^)Q!i739L4M_7k;qVGPuUlWfbONSRap`HW&{@UYrh?;6=T^IDa~ zD){E;+o9K2j$X3a`wb>J4_qvy_wGrx`|4=~?YO?D#5K;5SX+Ja*)^900o-2LXV(U9 z(e77}2hsTzgtrDKhLh`$ppRS@V9bxu`8}Jv>7Wv&$3#P$Dhv5*3#oFR6&`NFN*fIp zU%LS4x0qT_p0mF#pqWhrw1ui=*qLZkWF~1Izw>BSH*GyJsu4v=VA5#a0t`Qq)pw?f z>F(}CMeER(BV2A{cnn@N+2LkDb_@w33uy7i85yUclinxafu4a+lyHf6X8|9gsW2h~ zhhLnkU?*hgDIq8<5LK}WMoIDAk6!-m146ia0Tlkevw2vFks)TKnj8U#PLP%#fC*8k z?~us>9VyJHzbiEl90dyvh}WQU3JGB0#3OoT;4CptC>P1xy@fS7ssVc6jsTU(&J8xD z<>E=wPEth7^;U_Ke57>Lh|I{NW2b$R`HV$=)b;cn$Z-2!<9MFIm@LXF=y2_70O zxWp{k-oMqBRgy{gu1nDlTD~29PQs-JRhiJkn3ePF@-Ztq)`?J@1jK`@o3%0-RokQ~VHKJb*dJJv?zlwK_^P~E+=YwRS2fjT%CJjN0#|Qf_ zE`ZW*7P4uQTeAX)aO#Hda=;$km=Om`JGzsocL#3Tu=dE>~F7pB1yH=2pkn%wmC_$a85^5Yce|(ST^wa5X z`hlosPeP`0JP%rkN7%&Gt1Hz8F!;fD?u6UQfMBU9Z(1(FwWvBTdGBY-( zf|ufd)0MCUr!?Sy8mkFt)6Zk^vU~B32)}FX@k!8@=MupgE zU*^VkBtR$d*4u@A@%sWE0{P9H(5%1?`diGK{A|_?$)Rz`2LXL)D-(cm^DUJOY`nGMoIC}{pr=kDM^}9TnV7=Nuw>Cc%bb21OEz_MrY@GYb+Cz~rcXhL zbNUSJLy!)?sw}QLQV04?-^n1Sbdp+$HG+xjE=C93)E&-qRGHpxb}_ZxygcN$DLNFT z13YR*k2D-UqI26{flQ!Kx#7I7IoJl2?Uc}!P*bF^^z;QsDG? zL}cQ{t6Vu_f-+C6;}j!e6Osj-801t~LSq+s>|Wr)3XLkk0#lhHzdQC90T#2W^POG@ zoN!&+{GFo|qYk-Cb*ET|BG3z&NixKtO@+j`i%=Pn?--gA2;dRM?h z4!uFmRD0PWQ~}}B*!)UZVago|0=(qmXWcu4?^js_F|>7rvV0Xrh-!q-5D7l9;ZE6U zxtySG7M}o)Nc@8BzmdwZde$|paEsZRW&hg$avU-X#OULHeGA|pg4;h7!2cCFiHoid z0J!EHTIyoWWMiLmqc~KaXcUt1=GJ%^ccM7mRfcO4Sbe~Q{-7jIW&UwPNYn5yak=%M z$19ormvn1$35d=8PwmVCAtPmAfdJGyjCx^1ox&~M>%=AdQ3m==#I9Vf^8>1PO$zFv z$rO1R{46`W!DrCJ=j8s_^KBZSKn^21M9udUw-tOWAbon_gF~-G`E8X5?-j}3tq1lw z{ka-7T`l_sS_7MmXoN2!Ctf90Hud7+H5>^7(l-6+sCtKn7WW%C!r$l%{=UP&Y%fcy zDIG=}Uo27J*hPvz`TK5-Ms!QvjB7Y|Y^9AAr=P;(&A;6VIgX~)X@8I@>hV#)Nn~BV z@o#swqxn4cIgf&rV|F8OSQI+2&HL9~U%tG_-}Jc%e>TIv@8U6^ZTp){M{=xO{`Vb2 zIDzH;-EVUuPc6i+;n1$c>HdB9PY3@ue}Z+JW|yVIDbM8R`+b%u9;c(kIr%*8YLl9W zwmdYuqEWrYPm)ON$&?sZp(}9z2RPh*UnxwA zj?H~0RJpeY(m&vq?HWu|S-%ya8Z*+yXjQvrD7*=}#1(@phYOiL}$;Ns;UPc4Kx@Df+uQ&bzv33Gs~k zSzP~_FNd{|dSgB;z;`|5RVDPDdVvofUGdYt=?*+iV$oUC6Fm%0L_YMkQm*MvCCbtM zaY_Hvrpgg4-)@NL+^|Ph_O~k5stfIV|742!BS~xE<8Z^_SuA1hcboo)=<+*e-;JRG zF3)gFB{1|jZtBhrb>m7DQ#{lJ)yuAR=#8N#}f7yWAa3>m=Of~=^u=%-0#EAg-V;{^av130Rc2Ht&P2a!GvVB_(iPV z#>zNu1)?yVJ2*0(x9+d6AM9KZwpsrEpa6#D03I$C=L|p}F1`bMsXzhC8JY=3r8J3C z?11&bOgj`)O;D<_N59b_k~C4lz?19DwF59 z8Sv{^2k2954s*!tf@3E8HoP|h>t~e5wZDol#8Gb@I<-L_1yI#B(Vovw5?t|4SE{zO z2C5*600=zH_e;(Us?_e_I~_cx?+(8AyKX$rO+~MnlCj7CMhUF`Eue;6C;2tFMvSua z&X*Nd%IO4uEuq@Q`=;Gk#^9@e7=!;v;Q!weuy+Ei2MTv$>^V;WVjO-Zr64b(phES& ziD$^wv$&6_rXs;F%8clhYy!3#)OU*W?*d<8yw}etS&YPlyLS&(v#MQ0q|soq}t za!8x9FX)dNG+Cd~hk|#2Q|L;x5oVVSLP`y^p^q1jf}Ss--#HWpq6&!WngUN!(w$|~ zcVJby;J**k2P}j;Fb&M|7WTn=y!9phxZ{;KnifM_^Vmb`92X5-RST;lT$DX3;}=2(PQ7hv`v-n_pkX zht#f|J}+jtA$lz$0JsW>+#Me?c^a$j9!_xLB=pwe!*a~Fhx$qB+G$rDs&tL}-IN(waJ@?YOUX9)n7WY6coE*KUf*Fss>OQZI4jhTiK z;Qr}8F#r~PH7^e4gthN~bYD2|dRH{xyol=e{gHDp7cX-%>zl`Rs0Ca&!`;vIljI~g zc=S!!k)_9By|e>`;jm*cIH-=X{>zwC%=zsL#RK&z=Hbup2Z-iwN{17?p>F(_c{nYLf>@!P`)2b? z0^gssqPoy`!W8v-*pIMb9lZ4^ZLDwHHTv~__%qpG`7rosC5m?@uO4zmVkQ6==UlBM z`tBBnr4zoRm-E?Mc&UXk@{t##cb*V-uhArFEBsm<55$QV|RooQSf@;66z$2 zyg4CwLtp_Avca|=0Rr}j5yHafXJCHs78u5uUS4>(khd;`FEm3=S@-4rf<|R~#FKGk z+K(AT`Wln?;7>Lkqi0p{^be;QJbj=Hu`rWkJvQkX4M#+SB0%+-sFHL-#qvjqWf!}c zu2G4sd*?~7CfgqpE@lC3oBkx&%e?kGHxK%Mmk{*vI!5Ct{x0_pJeU?QWL_x1PLAbU z$}6kIh05%1pBdGhGsY?|=YL$$f(IHTb!2n3KUqu?KX#VN~Y`i0H{0YW5Py=Fqez$ z_{>G<$IjEii-wEP9n1Z3ola25Iz1qXm248x{?*MLqAh#=YyZ~;eE7WoO15rExeGP5 z3YnY%9DB^eOkg0yYP|i!spz!Vrh+#>bD0Fp#lAyX^FJ#t5zr%61p?AgOO{5L=}7{eKX$_}H-#Ko_-Vel_?C7PR#?`T ziuwO>>7+J}l*fP3{a?wN;Myl7bdkgnM_D;eNAk`y)fJ?Z5Fa4}98 z#^U%|hfY7ltO(NJ?mkFe7pP-PL&3b95j!0j@MZhjPPTjZK+IE=_A39A4u=6Y_bsW^ zfWjuZzs`>*thS3^8x9d z*3PRNdcmXJ?!niCWJ82P-tPA7!?;GXll@je9z9S$n;q@wj+^RXEQhpLNs-c;gW%KW zc7`t)FE2ehS3d6VGXtgE)KNObOt#8$YEwRPUv&a82KvgM11YuEc4 z9Gd6E>iFue-1G7ijCX`iqNs$b1j!5#@$zefcn1G5l_`1$J`p!4>-P$dSS6`|n?MtQ zueMd5G8mzEQ7R(gDBvR*bl*;d6Qh0BiVwOBW7`8^5&?u0qcL#t4$9;L+=iYx)saDu z8I;JdALZOz;trFXy>fdP{i8e&jCEB6e`>&(a@EGfIWN$#@5>n^Jo?kQ0wPLfV7Q^> zH-=0TcE&EhGUb*I&`CyDjO5r=+Pwh0UG@WfqPSz_ui0;DR_6r(3LaWrwM$2h^!+*v z*Q~`V5Ldo5zn%cjkGk2s5m!t;=<_v?s{-Fe41Rp^f3WvnQB8GWyQm5R7DPlvYNR7A zfb1_zU$7;IJG>nNTD&Gv<4=xHdqOsnh}GZl;DAjEz30obGSW_s`B>8%?f ze~F(hL!GmckIZFV{duBbh$qOq~w=oQ*U`PqU{|&l6V4qLFk5Wzt)lo%7G?3(A$yw%QWMgi&8Lj)BPVw}=+*T^RbJZ^*T-lzgLdGAX z_CjSo+lajl`v0o2p6Y$TPk~*i<>%I3Pp*mo$bD?T-X%$g+YA{rY=yHrQ;dkE5b1qd ze7<4oV%zOAuP;>CnT`r>ZSRb=zZ>d#U%4JImv-F64*GEHVT|t zJf5cGlG8&lziK7ikn;$V-mjW?PAX?0)m?)NNf8EX}D;?UsB;m?c@K!jvdUIPmq;= ze3>{#Gn3JWHuwWCf&N_wxN08FuC!;FJG-_SV4%;!S97zWTjvq_T8$Iv*@uc%ozn2- zM&L!X)%iatqre}b{xQFY$js-U+;pF*jN}!|EkINL&rrQY5!uJj|2kE3(E1q0dffgL zv66Q;#a+v9EpAKO$5QuWQcdw$o!6^{)G_)A@CV5YU6xxTz9h;EF=JQ3%GA8De`tb| zSJlbqQ?1PMno)`Sc3}UBzRiIiaNonvzXM|`Gd^pNx(M$H`alxUVCl+3=pDmWcYT1Y zplStC3rs+lj9lA2C4(TbF4@CXYR+nV3E~_u@89TYlKta{zLp~E6ikkOMh{3n1H?Vs z5T*R$L5LhHF7=U68S@Jw;n4vK1d6o? z^~?mJWG?O+Z@*88)1SB7bi=={77r~Xu;=A|e;Mzj ze;u}Y45KR0pBl<@+`a)LVE3GNo2jxUva!$o`g9`fCuUt7aV-GRyPt#k(0Wev)R|u3 zh9~M;H-b(IGRwr}IrJVxnnS@_6aTCRBs$sQTTTiqQi7?e%#izZi zU*h*z^1NyxI+5cTk8cUuT_f@vm4$Npqd|9A zpL;r;%M;#Y50U7m{WuXCTBs-cMMd_Z#J!IkYY>2>xt;fxr}iHc{`*;V&77!FibP$z z%`UvED?98wd13{#}zcQr(*_RYp6$Sp#;KM23GyOxzjQ>~J zR8IT-!vvO;e@(mfkH!Cqp~AT2KXrdnKT(2Pkh~@jk^j@M_Md(#lIG!4tL}WiZwucq ze-BZ}*_u82E4k|OVi5k**77svrCAU$pjRW86MCBYRUNo_j{b`j_!aZRVBx*hL~&(M zuW&?Gz>vV_)?0luiN^(BPu3JR7flN&PhInvBd-=Y6w1+NpZE!HxlWe`HD?`uFYGb+ zK3z;*zl=_)tGk#pe&>GZ_S>Kuj5bc zr7s5>W_I}Hw932nrS`9iHb2rK?K^KcTD{&?Ep1h%hMomXiICZJDu6^f;Hkt|mg9KA z9dvf#^*aH*kerEo^E{?giggznaGbe(yu{VyIVnY0FvAR=()+2OkQlVUDqvD-tV0o= zqLVIUw;#-sJO;*>p@<(`HHE`6l7xyRoKKuY>|yhL&j(HYZ~BlR*+Lm2JnB6M1^R>4 zMP>(;Zi>-MLIQKf3#&=gNnMijO(oqfmG=Tai!Rt?pBfhd>%y0GQ*r}!B7XGo@c>V$ zbdAp|Pt1uSZwNmni6Ofl)GfHSm;CdD^?f~P0<%W>zwzt(k#GgIF)wKW%zKgj5q_wl zkFSLAkl5kB$X_eb$q~W#$|Gz4vCc{$e>*-weJ_VV-8N~bmG=n&xI;xkk-7e!vkTqi|TL?ZGANFUxU*B{gmY`30iMtd`P&Z-~oJv(%2`9!58 zb#RDsFMN?E7taX%fnH3VdirhdS~TrXYWe+Qa@IN7-@y=(!Fm2DNiJ9^sKCUpHE8vw z{>E(ELh8*#dDZJH)G{Y})k11f>n4`|2@47<-mHgad2heq&Sz}PT)F772jx$>I=f@D zbh(BY5^cg`)uwM+chN8(gq|jH0YAMEGmI9pYzI1w$`H#_nQcnsr#gYaR&Jm^*Q<_J zc2n6Sn_>65pYc%`^x%s0V~c}xoO+^kBjLuG=FsMGutmnj<3yq#2}dyke^b2Y*9QXcYQk(@qiKnzJFn|sV?vY)cS2ZMeAg| zuF&bPjEA!F$bA^UK<<5Vc7Le_#t|FlmCo&Bo78z$B zByTy^EO(gzl;(u0cty{RNVe*67J=2fnKy|%x}(25Itx%Gz$-pGbm}G^-Dq!V(9}-7 z^z~6@Pg^u3uGHomCOCID2q53Vc%L$2YwKk6i%Gk#vmrtt1d?08j-`@ zG|1Ik;ejU5mHILqcbrPL52v@_%K%@3aldizVRz95j?Ycxt=d0aJR;X&1tb_j@|e`k z<>S`>rs(~pK%5gPdj7Q(yw=Gy`=M;b;~@UZM^SCh|ApcHa3JDYVh&#D6x}VAoQ~Ez z@y20sP&i?9HxYLJ%Kul3)52iTkt_$YTiQy4Kon!hKMnT|(2~o5VrVe7-g3cE=f>d* z9C5~4!k}rB&NcSAE$Do%TIpG@3AqUX3E;)2F*5s#M-33vqlM`pY?7r@EeuG5V~0;a z^ucHMyyIu;)CntfJtyvQatA-abw7jf&JSu+Wc? z1cav`MP~c*?ls_sBUywZRy%~WUDE{!+|V9vB@gV?_Me@b250$#gX=P3(Ckk0wPyz2 zgFQrOeT#6I$48;TEf%0vsJK50Oh$j;f#orne>r9<8TYGFYQ%|&Uxp6G*UZ$Aa#tNp zQV>TzCARsF1og1GL>pYvspOYmB*O3u<7OPPzc#6-i^>Yv#4oX8rJ92ieHI6 z91Z7o{C%{xG-h~|*&s9lLZA*MTAdcK=X6^W@Vxzot>s$O*2ii0cqogb){((Sx8;im zO^sYR4AhXACoXYf#%@%@ksiAS6^_8}$b$WT{Sni%4H(>nN_&JP6Y#h%a&dO9PJg~i z=Rgf4V1lf0QHg+U6OL(L!DP?HXB~vzCo)a(yIb}Z!+kk@zUS&erLu=m|7O#12JrM~ z5P>#mcpV_whXP(b%5Y@Vx0NANAVXk&)a(ol9VX~?kbn*@NySeLOES)%qxgc35@B}Q zXCFXg9;fG+!8&;U+{s4nR7=v^8qn_xv~A$g3yT%-L+KYzBMnJ4 z8T#Sx#ALKeb<8=O5_q;1|7yPD@R+>)fR{L7eD0;RQ4uhqZ@;-p7ONBAZJobvJ{VD+ zFDdl1v-?)$%WbF)5}K`~;$CZ`zQ^*4sIRne3rgtzX`Ai%lLJ$9u1Z(*PjUk!>$V1) zE7Ofpwi+{tQukJ3lm54X2uv%1n9;D&Z|@c{czgBVXOn(7GH{53a}hqm9bm0{+m0Ba zI(u3?`MlLOqF1?PCl=;ZIq4qjP>)}7mdTq#Z_IRSTQ&bUdoXRdy6MR~E#ZSw*K$yu zje~yOU2*ip!+;U->0EPp{G&a0Tg#)uY>)ndQ`G)r=&tj$HPl2jb0eGdW=Can)jB@3CM}b-jX?~gvie70k`vMXL&7!3&?9wsFw=?!<5T*5c?6YW zrQW^gJ6O=^C@4>TZY;Lv=U&!@?We%{AP=+r;9p0#O(4)@Uh{TIuFWp1n{eG`f zo|qMHcFWWPikoElNx1agh{nk6FUsgF519-rYguYGga z?FxzVB?8|$i2>a{k-;=hyvT9`*Iuo%CKj52JTDl#8 zCw+IxgEm4+luPjGodn7U#xo(}T*blW*8z0ixC=IdIQ)Fz)O-Qc zwEG#}5bE|76^1llSE6!xhz6^nGGDweUtehLfUJdr^2B|hpPi%#jr9er>`Gw$!raso zDXI?C!YCWfhF+y?Wy?;c!HCJU!$jbzdlez*tOqh_cEJE&r*Zgr7Vp=;GL4e+X3|tO zg@Vs_Y3sNG0gNpL)?xmjKz5gw@CUGEiJu9CpF&QXi*ahln~zw!=*qi-n9Brbr!JuF z4B&6*Gmbvg-pbwtK^lf&i3W2)#Omc51mu!mzZQ6zFw18SB*W^fWDm)Ml(s<%@;!r@R?neF-g3HPoH{@d%6rf@KJXZ`p{k4Cf2#*|$hs8j zmMi!ml|+wS++lhOuPPN!*_GnzJ5NU9{xy2FfcQSz6XqsiP%D<^VCA{_?*4x?ql))P6ctQ zz`(c&zs+k)D}Yx`fUNU^02Wg%)S!v?pJJ#ei0vu0Hbr->)-h|ssrLRP!k3Rfdz&r< z9S^J4A!I`5=Mz89*PQ@YrzZnMh*0H2nEO`Cw072Sa2p@v*IDkHWCe=%dInlM>!Rxh zPS9sIglW|oa_~fKk9dSQ=K_KutlG9`4PA#M-YD^yvMZ#9AUdzQhFO4XI=_?qFh3-7 zcOpI`zu=XzS~aI}VMuC2&pEUk*=#On>1%mZhU-3`*9`)=7!o~tP%@cfoCX!<{Q$&& z|MEM%^3WSdg*-MMz2_3#n$ZSI`Qp?sZ(rSv3w`D>x1WUjX@SImUxkPlWt-bUorIbDVoIu4j$dtbS(}G7P?QFcf4vMbY(w{*8P5 z#x_-KILCMQ&%nMBy|>o7rW-1ci56_t4(@;n{Cj<49fAbUFiI)r$)h-0FS$R z-x>^fV}e6Gep^-NRyGxgtv)>2VxswL8c)REo5-H_f6yN<(X(GXXg67|()_uU%10Uj zQ_I$+dO5Fw;!$_ke*VDv8Sg|{K{(}7>T1;6M2Y97()aw68hD0hOYf`ra!?&lJ|-{W z8<2fukz{ZrA1~{k(>pSoc<28?(-Zx*8`MG#_H{FWO0!7J)-NGC{gq@HLHB1ZuXxna z?Cc>&4`hZck&H!EBL9ubUxWWz_3Xh~^8erT|MF-!II(hKbkrm`xqb`LHLvTc6dnFz zp;=7LbYM&zb?8_X5>>Lcb>hXTHazRdli+l;XKhC0fqTn{Pjlm+$?_JJYceHdG^?Uf zLJ^fZmTU4tS&%rDYJ`sjdwG3x5L)y?J*%Mt&ON zwN46K{*-ZUC%T5Q|M%3Rk$2U&QmWd(DJb~F+ICNf4v`lj_?p#*m(e+$1f0ewx3YIvxy8cQ3XX*n)LQ?BXS7G5NR#*2MV@Z%MjkyMG?GTCXn4q}ufUPQ< zyn&`uyfX2HE^r>5@alJL4PvF|K%*5@2pKcuUcsM-{#o(P zA$fwOb;hQdU7TKh)nXYN^QezPB39GKiOCrU+PxTU2xP~19^K0|2G_?93K-67XfbI{ z(2>^@uBN)c%EMavQPmslMGHaZDzRWxlX=KVVK1}qcW{l<+%q0~y#cynmy_ue+Z(K$ zq5{4YjES-IqL(7lvgnJ8&=A=ci9q2$?i$e#lh?0#(O)rTPElPOS4u?MzFR@Cy*X+P z;!edEhE`jCzPEQMJ#<;i;Qh7fyMG*dJ7+wV# zgQ>HBvp0V@_%$wt@LgVjLx*gvMARk6Y((j8_#WW?HYe+b78qAo`C<=xSr`6JHwAt4Tv=TgXdSS&$CGlU9PZu zuf{(ZiIFV-&0b^(;d|(H7_%Zz-fve^ zxeTy;5+O;XatiM7%iO?b7VA8r){kHKaLdJyM5An>PT2t{y879W>l#ls^!xkC&c|^i zOb5^btR-SK+!4p!LJ{W{c(!wpN71-)u$-D1nrm`MI2q;Q>D8&I_<4Epp?T=<48mmm ziY10+TA1OU)VWdu9#MgpxPHo2Y{Qr+(W}QRu}d~tyYjGnrZK{8P2xlJy>6|kpUWB9 z&Cxe_xbq|@j+Y{zesvRxwXes54 zu6Fe>C0_bQVt+=*aLam?c4GaO8kKW1L+#~~Ynelp270X#k!pV|SUGoFlI5e3kH3o5 zZ608a4W20FNr3bU@3E#0BjqN9Zm5oKC|gc)KM~qJLtz`jTHF6j3@UnvoT6&a$_;#u zu#ZIaSh<|FsU?Z}yniBU?g1K5;V7P|RXJ5M%5%3!rF=5SYw-|i7{G9vQp?zRA$=cz zqCQ?I5ZNt?W}n^8xw{Ab9-PF6J_QMT5cLtT!C62V*VAOAbczc6MCx|Lgn5gX>~wIX znkh7dOin#T%nFgGMsvG(l0mhopv|k9?~FpvOQYoFKr%!KBN<>Q8D9a=aI(6gnXB9N zc|UjsY}F{{bF8;&f%FG+N0EN`>Ff3tBX`_k?Gee3L-bkSR1N2M2^FJ}@k(Q_(%m(~ zL=9}C8jwrvy}rkI8*)2EiRY5Vc`jCky~plr$M#{HfE9qw$LW$xq7E$9B(MZr&Ao9G z;vM87sCaG40H`D(*)?ffKuKeZfH~@GFLu)O(jZe))pRf5F7y4x6c%s`q~Gdu0i>)T z+32w<7bT75(U1aJoq6IGl+}mh zFGyxM9r4X~AKvy(uAP+s9kJk$dwIVi*HL<@qlyrpSszhlu>EF?LaSM-#U+W??xeYd zdoDDxrwGp#&=8vqg~~c&-QgPszzyk3xN69pq+~m<)Q~enDBl^*o?)+c2Zyyw|LH=` zBx7|&-e(<_#KkLGoF2dD6ez{$SWxiH4)cI9@s3d1fCOjxYq?h^QWBhx7isCo)-?PP zdE?isaag)i#zfbfduUKuZtM(|{FUh|Bvx&?0f(6*@MsSAFo!#_L$g(ukQNbNnoKW$ zmgMpX?}(h1Gm z9m{k)INPW>X2UvebOCzb4x&70_9$9P?pLh2)r5xeL|Y1nOh0^6wIE9LdXq7u(Q_X7 zMEEx*C9dpL6&h?x{^*j=+g<3stP)jAOS&8k!-c<^iUP$$vono~+w;R*j-4$qm;l4V zXX4m_#~YB8M0Z9U!JC0GaR*@Vn-_d4DyGgMJB85(zZnrz6NdYeIYmXP>AW{5k7UVF zzwmyD&Z9OwMz>3`(Y@4(c>Wp{E=F@{Jh%C_Ri18FEl&Q-(4f!Tu&}(1wIL*J42-DJ zw<{k};@d_*)GY6}@~XDbla;=RM5w}#Z={!1lj7r8Z^rQ!Ej_;_NQp>fN=%$sGMo@r zl^g%_hinl8z&5-oC9ep&ciyCz@XqlE%0FF0zc85-ostvtNiHOO#{;i44M+>JTp8U)_q z>1rxiin-1$xGTio`&{h4y6tb&_jJ9CG& zM5gv5y*1Lu>XG1TJG7Y;mn>~;Be>o!wzEHYr4s$ix{C0*1#mJ#y`-16&(IjmHn>Oh zFzz3=p4trMf$K&C*$mRWTR$PNcnrMrdgjv=WL3)Ki9CGdhhU%`1zNa6-9$wHEC9?oafJeiZmVv2OJOAK~u3ML#+VvN(kK3 z;E@!nfA$(~L6%LQT3YGF3EZ{j#b#7o{f&_aU3Q#U$6aQ-!y#R%R13VB+J$+!9ShIV z^k}C+!P_-yPTz}}aen#0ignB}cTNkuY+lomI?C^MNIH{3wezfAIL}=q$EqfDr~R-2 zKBA)J-kUlzezs!X@KV4J?c^EW^!9qr`fJ(Zoqg>7!~w4g7Bv3)nOhQXpFPrYUn(PwZzp>Ac98 zcnxKAyosaksar{w*$uNzny}N(s!frrs+9r@ttJ&Y7ll|O#M+ub;qi-+NBGsx)o*au zUmYT@r#JiDSzMczT7Ac$lFn3-nFsZ`puz9YG(9GX(yv!wmoc)5B%y#l^kv2F;#}I< zCAbPxrMb|r*;ad5*`x53EDDA368dLoI@z-g8_m7Rn-EmCqP2SRH=3%WuOr1c zB#(ZXi83hg;#zt5isKxXa-`s!^2ZVAVDVF0@%qRr8ziR`RUM(;bfyka13ejZhU2xi z#ht!|;4m+UP=F1G<6Yt>X7oHHivPo{9L;q7HX+|1u}UJ7BI%-*^!JXKf{K2_%4xVX zyRv*-@^XZlfvWoTfs7Yg1k#_vnB6&52-xTeT;M!8$fz;(3%|Lt^2fUc!q(4Tq>bbk zz3Ke9nb%y;h!uP^`$X`{54T-}C?JR3LDPxsM}Ta1 zr)#SHF$*5_4&G#p7qJ5NN;dh0H{9Q@1J%fa(JsvlK@AlkjMhv(e^4s%G}jRpfTiO% zYfRFwFjVtJujY?_glWYJv_=j}p*2P&jye8ZNV^=|<~KYb*{RJ8GOsmRUr^(MDcPhv z`Y5WG-us1LHSET{n^I{V1dZ7rmgM+jG`QMo)A{(uI+C-0>9Kl2hLFTIACGzQl~(RU z8AaovY^!Z(k8aFTCERkJKfe>s_f^K%O((6&x`=Pxw83qy$WSLn`#PT0Fq9?`J%kf6%&)s6QlghDrPs0;;Bt5&F52%PtlehlvY#DzJX{ES4K;kSJzmjH zL=@x?$2EG|>Gm9d{{8AaIQ@9Syea0POyYvjwJTNyRZt#$K?YzGKzQ|HD#)+aO_qw} z8lx+)a)@9b@lCSB_wkMNu=&aW=V-OvxB6-H+|BD3x>v|=9};MgkSx_htU3YDHV=J$ zc}|8s9<$&B#6qAR3nH)|jq(@Tvx1qwE-PJ}Ge8MjGSLtyuzrK?zC1B(5RMOZLadwyj*}$hhFX5OR{8PhSpUsl@*tMX<42%T zN7oosCu%LXSyDS^6S9{xAve#h76my^4acpqMC}1h5gIC@> z?Y($FGv$8AiTdwlOJ-yY)0fLik8`lFNL) zEb`q&hzfG{$&5GC-3DzBz7O4_FLH*D7nC%93q?r|O)#%DdYmkb1^uc##t;-o0B>AU z{z#|W(|gQ-!pg*G-wUzmig8cvd;x<@72n|r5OhI}WZp;}{#&hg*?dbc+AcG;`}rgQ zJ;WEHsj@l%@v`0njZN>kp3rriG+1ME0n=^SbHVtk&mk%*VMot^swyj7-X?UYKZO%f zR{KQ6a$`HO5GoR97)S-+Hhs2K4G!|#tCi(^4t`y$D}ctl)@5fZ4U+8rQzUq#l)(Dn zUYeX4z*dr<`irV;R&q1Wb4{9y4c= zo)vl@wzaRb;w3R=Pd?}EMYWSL%XeAULMC32Km|FKXgN8RcEZnjta!d8iTe^)=kAsaN5hPfjVadI36oYsygOTJ-JP#eP6Ek64rgKM-zi-iBC7T zh!fOfk^{^nuE6}(CH+>)b-p<4AQ2BHv*L+y(w|awZ3tdsZO}Fd|IO=F&2{R;l%u&= zVfFmp9q^>krgUOtHb^8Xyyf8Y#%p{tU^dsvf6}ziY|BeCc{tNz;QXqwgE4n;8)dO{ z4wJTVT6V3_`$=#2AS(zOb4Aeq)MmEiPwAq(qg}T?;Gm?@dWtnZK6*5%`s07J0C?H1 zIx{N(AMXl0u~da?hD{2+-Q9D$1id)ql9gIiqo;}}U4YiWXL)04hAK39woE-GM@l^GN2v3$^J+ z$~Zr1LWz0mL6Je77^P1hBr6?A%4vD#;rlI!v&JD%l>f{&Ap@W~Zlb-l?dCx>PE4io zUh$P-*9Y#UdQ(9rr?4uS*sdSRMK2#z$F+w6*{QAC<_nKx?CbZq|vLF9yOg-@(bBZ8>% z)`Z)W-Uzp^%;)2BF`c;0wU6rjSd1~nM7t+W5Nq=|g#XZEZOWWy_jtH@KHCh@K3*Sc z3B!rn*10 znbV-iZlq0I()S^U1crJ27Tt?;8YrO)1}|=fZIZ{Vdo8`wL7$X8Bc=btetRU{L{#UfVrigDqY>@p15f@Vsov0_E~T@QlGdtZh&bC(7%Ox?c%KdG-JBf_lFL#7g1 z<4~Xt5nZcb#a>rsy?b%I=-HVMGy~r)-99j`rO!$CChrGaFnG%eC}fE+uDu!|`g?w& zb=ZavQotF-jAOlJA`Y?~r|Z&qWSXGhM-|r^F>k_l5umiiBw?1ENZe@(j9v=q113YS*pNXvSI(OhQJSW<~}?BCopf|p@(vW&3oLi z<(b=b9q>@JT>pKAeYb+#+rS&S)28l@6p?n&r4v z^A@~pX_%*(Xd1#T^;Av6YTs~Aez3WUuD3n5dz#`YSHOZI{16#+MRY!;9xZsoU>B&x z@MyaorHi+`zeGTQh`xjCxHCTC-B(+-y57LG1VY>8Un1dv04rBVy{H4mFLxz(8b(1Jt$8Ya?m$?y&{v+?e$f2fVg}Y zuP+=v0Ma)_i){v^o3p=Wv{KW2e8UL`g1=M2?Sye48JG8im1g9_CXA9cJh6V;qRmBM z?PMPFcN+a52SeyeSDcE-n}&DpnFRWp+d|h=W2T`8=*UzDk00B8^@lW@**I*&IhZ=J zenu$$LQHzet0?86+dej~Zb(cbG5D~~Ke)UIJLQsM>i0%7SU@lfuXMH>) zP0E`y?kWHCODiA8<$;;~m;!PT^CdiYNGvAzzJaM){3-Odxvm6H(mN@}vaH5a5zbnC z{1#?+Vf%D%tawH9@^xW}AAB{*TQ)KaVN;)&iu57-z+9t8fr4VV8u3(AlpGv=YM->v zzOQ~~m0#J)Hluc8#5@(DS_Q$>ZH4LCZsgdoAH~kN+oLK(@E?tCMibn5^ztxEq@EPJ zq_?%@&@!<;YkKlzMtLq4=|oPV#})9MobX%zQDnF1uKRt)gP1&Vkd4Bxdl}BnPdbr{ zmP{H)j-06!!5{f&stdzzK&wPYHn7_j1wSkiOg{kHZ1XTP-?gi~+5AlmwD*Ni+T)8* zOx_=5zQB43vkH5E_?et?=3D%F1D2nSD9ME1geH(tEt!i6t#{+0jvXIg=8{R*Xuh%W ziO@2Aw`)iE9ofMt*MCv^qUnB%L;%G)zltfn#cF@1C7c~kwo?55_F^RdhioBuV6aVi8 zn*UtzPyRC-dWc=2Y+s;lF6Ww553mZA)V;aIPVDagq(bKfF9O>Z>^_gM_9uLMNo#T^ zI6nGsmh`;t6tr?Y_0czb6(7&bT?F9yf{b&`E!4X<9e9=T`#W{bzv(?y`3ofMS6UD;*|S!(SfdAWDYcuXK+Jhr+6T5FV# za~seC$@$fO+ay==GW#ffXG$&rVTE_scH4iM^r*yqdfxKGB-}IW;`mpMO+5x?qB?WZ z8^K_UGbi8VV{DjdE|9c9&e~oJT(p0+a+lsk@fw=Bm&fuVl`dm*sc!uGTM<_3zQPrD z@!N91q4t)LN5fd)?x3zjl~}xe=_!={o2dUy^UDEbv5j@rj92@D$8-#<<;Ae8LP{~| z@zFUy7SGR`AQriBZiZWWJHxXQx(!ZOXN7vOGvna*MLoeT1Ob)m9Tw6!tFTczuAO`R z*+YQ(ey40vq}c!&Vl!gMV8iM$svcUBZY^!8#`5*SclMVz(RM&$Wc6pw`}aoAaauoUzKJ0imABz7jv`VE@yH{*KpR+TT_qUYihQyz#p!D zxM$9-FA6Hh9$O+zfqQhiD(JyIyfA1 zzN&bCS|CucTx^D_y}$uWJ0zS7$hJzqpvf?~%k#v&RQqFzd9M!rC$;A>nN>kNnr4tF zC$PJ8a!;&g$E_C+PvNDod+7yBQIb_2Gk5&X&;$zYdA7SuP{;TKLdvf5+XlL+m2!wp zZRS|rJpN_vL+8l~RM+)pmAIeyfg1rzD)f3u(%w$ky;7sIeB`w;+@8PdEb8S%w6T;& z&b;^W^A{Pj8PGrWGr1<^cyb89XL<#ZXRVaS%F4OFeZy46sOQ)N>$645!F4)dEB5m> zRosi_DrH`!(DI~)#c@dHod)-{`;r3B`nDEz8V|edH?yVvKRCsR2L-om>-JV*O#x+! zbcFfqTT76~y($nSJjMJb-FSv{Nrj^vY7xsk*E-*ub zQBKVwiMXq#^&2+nlJ;?nR)luK?E1ct^O01<*J6$>)x<*@tE;VE50G( zTquZRRDHx7y&m>%cB&VZrS1$rE%^zj_)Lyb@pH=7ha|qLZV126Z}LtKD8uL5Yux=c zCi*kf;>Fu(;&HA+j&<|DGe4hEtY9exaw#*!=t&Ky2$JroJmLIjQMt2q5cN048fk)G zo~1~AOv|4SzIvnB+L-a$?j|x(no6|gLbx|WoGS4Aq%KuD^5}Sdz2QvZ*M|4J$G{5r zZrWp;h%UI@pBrltB=!>(g`1Dx4hh6{tTw!tJG}76|4AuQIk79)^>LbRP7|;VxVnDm zbRcLj1lau%2vzh>$o=uA;D~T|m~`!Bk&6PC4a2t_1`#_}>}MjP9Zhx5Fw!H#eufY+ zAz^QEg$pE>YQ??wc-F^n#o7Kcd2kCApKC8jP%A%3~*pa_+3NwH?90;k!A+? z_B9a-kI;P(ba7+2#-(KM40mbwZ2R_6$ng+E>2py^9jK)3tm3#()cUgycE-fc8@3&J zn_h7NEeUy8-)XY3b}6L|2_R{Gu-DQxj`Q7em8`+w#oOY@C&y zIu$!mzs2*RLRZSM!bB+%vod{oGtFx9Mok!^6N#J;ChE^a+_{=GG60(J@zTZMfi3`>A=n zTe%4j6~1{3+}&Pz&H{;St-zSD0@7jgtT9zjDX!^PxG`%jA z{0gv!gC6ZvdNRRI&2I-8^QqoR>aBNVW=t%ClnJwP#=8=Jco5SOm}vU8C&JUpl23dV zoLDUR?*0_ZpJFU+hYJddUYSfXqJQT=vAzzryr6ox(xkut?Bv-vxnxL}0KH;>Rq4{t z8lj?c-dYtUYx{>T1qVl)BCCCE>si6wi`)tFTntZU-f*P4UB|zDU+;T5sb3LFKda&^ z54jn-vhcqBsNGZ0BTm2e1%@9tx%&>I)h+1N?~Qgp02 z*mWKkNNY&8cQys=`QswnKcOt|_ajetr$hN}XHDK@qRoHZP&N>)dU0^d*da ze|2f6KJosxkeSDD8-a)Ku4fc^_I>fdZg##y#X&aHP-LS;Li-H_bkI<)z0H=}%s?$` z6x$D~IG5x@R1ptjhOg($xQETY*^qsliec@WtEPdDnP;n4s@R7a0;xe0ziy&z^+H z(xTyfMxh=z4FJ`Dk$HU`d}*dX_jF?6y$U_SlD4e5HjvKDX&@KpwM(4_iQ|@VHEh%nhxDkYnaO$-wVGE~~=H%a-rgTh6hiW%73xz_03p*sh^Ri|Pfk zq}$HNKWHhz&-&VzJK?$utyDwbIw8461we}FNFudw;IJuPS}>hM>boE;5;C z0~HQ-Nt3APd*Ao1T`t_(&p}ubYQ&`!{uj}=mcIk(p7cYfjx*(q7jLd1Fs-M(>K^&`rlYVfqe}N5-uu}eXiae`Ch}8Ij=fWT)cOD- zMZz8$7rkz2A$f>G&bjEuBdB;h2#;6Zt1=h7O%X4k(USRa%fd(lCX^`UxyhUkEze-H zJ4+z2M&Qo+-9t7ysZ(ucO~1t2Bs?MVnJ|MAjrHLVZII`m$!^iBFnnj8BuD|N-o$`n zC!VUlmO`FsuWpkmmruFuYv9{vZqJt?uJ3F=jUT@=xq*kIpV*Xvwheg6a_@A7miLjp zy~$kVLR0huzj97n38@ofZHnO&_D!`8U86B`41cSiOy$2FGfXVeVnE`E=7zS%#I*e~ z9x*^U#V5$SBfw@HJ!QapcPf5)-@Hrgt@8Qzx~>iv9|;D3k@v)l;7|k6sOh+>MHs=uKu) zd^<04AHhy6l5!g;O&($vKq|UWJdZzY%{n6A_M^mg5Q;*DSm2XnQf*kS zu5RBz;cX`KC`BBW{iucLyyXkpr+V-b=@>BNqLm?Th(iU&%+wdV+97;!=Cx_=vGN!m zRlg*=m|E67X!C7bkP}V4^vK~NNr0VLgAnh;XT*CyEj;fdeA^~lq%e88F-q{TGgu(~ zEU3+GgdZjt>`0hYqX7OqPUM=mzb;|gV6%;wLIovSGjC=~=e;E-{Q zEFnd+Br6#A=#1YTqSxcp zm_>+!I3IP(u5(+1gyC8*>90Eot&_>dTD%|~WN(j3iA<5@)k!p|LFXCUjtZQTW~BW1 z9pxAFaxHqdk6JW}ZQAhEb;$YEEe_$w8s^`yH3R)PZ7gK`W2y)q)89woe8lk+UdyJ( z{Mq(KKprn>clzheIL`sgpqjQ5#pdWU@J2)ryR6s>b$Z=>9aS+p{2SZX1SVXQmf{Lj z5ol269SuCerVDUAF!XmiA{Azf8zD4#?-%r<3{*j{#VbNiPO!XS-I^^u<+FtxnDJHc zVq2SfsL^}?{$7b++qp`^^PAo6Y~C8b&kO8*Z?kfO9VdSYbQ{du2GjRpVfG+}I^_D@!c+(|&66nXp8?;Fl_%AwZJ&2Ia!1 z%>r8_?MC*B8KX{*4%(7r-B627mR-7Va*!BLTi-IHSz z+x}Rdx8HJVCkmh1I6*NVi29=hI8u+t#4lCQ{&CRM{a5{OkbtT>R!1MVeR+DIbnOsEr8#B z&Qsews&Nbn>vA!<6)-L#plKO7?}swoinnwT&GH;i6Y(68V1b_h5%<8yy1be}3hzxA zv#Bml8kW(*fp^4eVyk=daK8bKx5tr>`%yaMBMKvC{%I5b1ZemKBX!XEn&w6t7juX} z7d@_>SG{f}a=aJ>b>R6}JbIf&`hB`cgNBPa6fZk~OJI{6C^VdMv$ifT!iQZ8`w$W_ z^1L-=OzitoC++Yj1>~bNr~N>xz442o6S&b&-pw9WoOBD>FCC7vw)U=pGd;RHbyaU; zUnRv!J3w)PlA^ZLvjfm^U^61om5L*-A-U`6J&DWWBO;8V{ZSd8#huOUIJHc_nf_DW zwixh1!7W3i?FkM0Nf%{Mj;fI-y;ZiqHfxGJqohc$;|CEkL-!}z7F@X_0U%>lH zKL{|Wk%mFdJIhrc7yxGDX%vtal=R&dj~vtf(xX-qdYx<^Lem{{(6s;qf?C?!Skk^e zc~eg6RtBks$^bt9i@moDiZkfi24jQ}JOl_HAV3Hf2yTM~f;$Aa;1Jwp0)*g$2Z!Jm zAh7Yx3>1re!r%wXYM{dUEOz|KIgiwLp-?hvI^8{-GeY$ z-%fi(vY;s)+VdP|6O)YnPe!&^D&oeZk@U2I_yih}Vf|dmMirm*1>2jrtci`g9i>jI zs2!8_P5B_4ky17KYN<#6D8#bCqc0sAz&_||3s)Kj<}vonEM_sh9SnDgFv#LORGupI zD3N6l6Zq7?$})v*4s!VIf~dKDa(r&kjCuE!WY#n)`t~N#3v+~l$Rhv+wVE<+!ISH( zSLrNmM7ZzEJAYre0o|jcuBsNAVk1cf<0DtSC+7T86jHxU z+%-CZpBmjvYY4WYf8Nj|(a9lP=UW3(EL*~oXp^^fc;>K8?}OAYokw(tWU?-KPv2Yz zS6IPy4o#visQKk`utg0U2I<1UoPU26TbWYikQcNu0ou)Z20KmrI3m*fo#2h6^Y?AO zKO6^i(|1Y-&+tD4wthc3K`KwTk8iM-8F5#=?{}$V16~+pKg2Hg0r2GS!+w3UZTjFI zEAzwHH&A>Z!l*~ipi#G+FT1mR6gfFV{?BUxhX$`vdLY?3sT{B71|0#nPiFi1_(t(( zFHZQHufF;9KhRN9|G+<7uQfS@e@1@%AWBt^PIg&0U1c!K@Y}w%v3#`PC>->UdjJhx zqvocNm&*s?$Frsi2PWv~%e>CpTGXYWTJNnso1`M+Pgp|qYj#yVuVH)CF9922)2d`i zwsB~^c}U?f$gs+ZlhdIU(G@5Jh?ZkU;2gf^{oS*6&9qVNV2UDX@GgNp?}u|@P2%SpW=b`qD5tAfEF+e_hfJG*{i7{>2i%>* zuAn^U9=_+L)<(cZj_#td%^#lxP!E@CcXpu7I)CXr;rZ?f4&{Mhe_g@oe8u5f5Loy%&#<8yH zf_t2@Jq3p#4U8#ZS?pIX4X-#>8LYoS^- z-7x#O{#yhBw4JrszttTzB-$I})r<unm zQ6J{3X&qXsrCHKZGlps4-*?%qz7xsZRD)Mm=U52+)kukG+T_Dv8ES%S?+M`pvwQQ*5!o#o(+FQ;^l4> z?{`K?N5JRLQ_u~&(b6yRj#r(vT&%kp6V?;`o*15V;ImFp+(>tab51gz&7ydkR;YD$JF=;1n1Vn#sCz1+P7Zr#$LmdIQSKKlP#MP_pH0B ze}k$h-sv4WH=U8o*0If=80cfaYk)eDekh&hH#ASx6a|i?riN3oR4q8{Kfv7~-S2x3 z#&T@!O1x2bI^3GFj@?W@gnAED$+Mkk8lqVZ9FqF+GQY*n<$$K9)z0oOD%P2Cs|4g7 zSXBN8Y-vN;|9dq|Pu^2(uf|mi_W|7r2(6$g-HV9Dm^AobCffic6(V+tyeWVynLTq# zkU7>ApeNxEuZ@EP@;#fZ-X>-s;dBhh|51D_uqMKM!ohd|&Y?y%KiEi<)Scm!Kvjv* zsGaJ_{Gd!#?SQFQahCX_X~mPuO3z!Ht(l zk?Q|My{G|LPpzBC4G~Ex@J+_h6%^g^@vDF;@{>G*G4xWU+5Qm%b}R( z#wOPi3*d7o<&ABJ{E4O%cdmt=(vKC$%kisc1gl9>Ad-X;=r0mkj3TnZ#O}ijEXObY z{0x8Rw+f4RSG zRz!G+L+7b)J$*%Q!+#`Af) zt`&6jL9X%JE#W;7h#-9Afv1P)|1C*h82E?T{wJ~LCW|Uj1K^A)DD(MMF<2hZE#IEh ztR1*SkE@FgkCUG5}AP4{}7V>T(M|Atz9HPSb{J36xD8b zU9`{SP!}^RN4rl(E+{1SDi#@q_Q-T9{#4&_MXNeUBZnU<*DW zV#{mNydBpsurPewjW1WaRYruPT;hF`cmr`kV`_|EumTr{r3ec$4|K{~^1g@Pr9K*CrDAPN!`3(*8sX+Au^uREOgnw=x2e=bW#5MeMD2SX~ymkr~^3 z=z_=!o)BEErh}v%R7Yjmqax6}oy!0mf_OBLna-#en_rHF@ zz$Dux#EC{|?OEuX1b)W_b_kzN{>GSkWZ|gjl(Xz5olL>Oxh0$P(xI=+_)%V#h}R@1 zHA%|@FAEjau7v>Ljl2_|Njz*%rH=8HtQYPWLXy{bBo zMgE0qH|)bk#lzdfi;zrIS~Wx8en|C6je@*e2Qp#ah0U=JWt|$U(NUoOSeQ`mp>CO0 z{9rO?Xsgr}ZboVO*z8edAsx7UzJ+h9PVKVHRKbF%UrVlL=8 zrP$r@MefxcDePYMXw2g?EY57D>d&_gRMnRD$nK&Ow79$q%4+lA*Mw8M$__)v@ti+Iu%P>PW~M>iCGEEyu*ruq4r;Nn?5NFJc`b;FNI zYt~+YwgD1)@BCy(}!3w0q z#rL%3LkZI{gBS+GOFmZ~Ao7S#ZLJRZZ&y?1%=P)q&LfsN73pN_@QFWXA27?{(<2uN zY8FqWANy`WQ^c3u+^kh3gjF=`%z4BPL^fZ}N%S*y46N8M=AXXGVK=r}=WND{PG4_! zG%EK>6k(>Itu^J1(c@hR-@K2OvC}NYukR8|Y1h#Apa6U%O6KUG@&^HIY=6|N87eE2y^(7BScc ztaFr$N0)u?56wxAKvYT|?@1PiOaSdQ8y~mLp*pPiXg*VstnOi<^^Z%cx(4|b(UHFl zN?|#%e57GHZxt&+;2Ar7x3}oO}0fsuju`)=H)$=)8lm&qGjiedUtqM_+RP!mmAB4i$|5f z45whuivJO5?pZ!JgdRmBcSw75i|y8Z31kVFIOlRlO zK-5W2HEN{crvrd9GWS=FzV7Av6-MF78;H6AZZEjdNZ8zKbh>{X>+RtTloK$xYK4Fhy0V+M3~t+x$hv< zTXucWgH=o3$HbADEqo#|UmZ2(r>k+dS_@lueFewdW1*O!Z;gcpfUfvwPmIFBeo0p^ zT;#O(gk`<&2(d*w6wnlPT7#>a>$gBwTca@s#WpsHX=QTBB2Vh@v}P5CD&3;k;Rr!I zx!H3#+#iWBy0{gWmp5W7084SQoxv$VxdT`@S3CAR|Juify31=?Qxp|#e? zihR~{ibmg=9`m;V_1fc^vU7aw5?=M3%-4zG1F)3mhjcUtCm{I$EC<*1e8U)v1RF-k ziK8=lWup|JD#An+b71oGd{a>JcJm?X1KDvwAfxWUO>9S#>UqM&8KYl;_+Mp-jHdG| zeB8;bim2ztbVK}yg>3g6vUCI!dc|&awx;=!H2?l7PFET~Y|c+u>n%qYkr8s4sZF+~ zijYpNz3saJNDpo%9i@NbROrPZ-4#iOqdt|*2GAcZxy22x_Ak4PZ`}Ykhjhf&k=dx& zYgCjlGscyjN}0Y;O?rZ#Y@U#B=r~otjYm~oL8RX2A;c}4Z~7#c&~lu|Z9!nukmYrT zPC-tliYgz!Tq&ylge$$qohP97o@%}I4er*vPgkR5sugVY9+S6$@s7a2~v2+)^i)E$jjF!n98yOVFDFop@m*01Fk;;SaH@ zOl9IHNVhZqxE4%cV}eWvoiG0WqL|WED$X@=olcN!mZz+S&gwRNxqx~`2v6rTXBtb& zdEABHuI8dZ4QF8MOTWv%(y`QmXg~a|v=RF0rSQHK-h!Q+TCWqIjWNimBPQf5n|~a) z_2YS5&J=9_9W-C>p1;)Qk*gctFlWn;m*wCtf@#2kY zrf6t(pzmwP`F?9S_Qn`geqES3ZNbQe-BEyAF{3S_fu42-YN4?Mf`{t;k?7%$D8uyi zNj3}o8~j{ODcgfNJx&svxCEF~tQxNk`SD4_1MR}7d6i=@53AP+7jQ@QWRL^B6LnXkV0cRDj37joQf$`-I zW=o8f-J8SslZY2See;DC@Lw9s%&&ZVW)RyOu{u^nF4v|oCMxjBdDzVkt+_X(H&uwd zS!uuQXM8-~=weLsZP@0Dg$j6%RjpL7EkCeAG184Z=jP0P0gU$tJoL} zTfL5yNd7dOv>`g;&?aO7v8X;o!T@QHL@M#FS6U>sGR0dm*cT2#W=2$s-AagKXMF-Q z-%FB2o_`Dd5Kka#KjNzRJqHyA@%P>cL!F%Ol03UlocSN9brg#Jjx1<^{(FHI0%xC0 z>s9y-{4cHi2>E}|BOwu(cO~K2dT4`(cNDzZy@y13U{58S{$Xe zP^jxp|D&C`oXhj1m>w_jeMAa|M(Kg!J)*|I^O9meIJ*&qOCQouPxjq?h|7${xweM0 zLp$VTRy8f=COBnuTz#8$S;xCnWa)Pgt8+jf0+-~C!jPOOjvt>|MTiQ00@If29J*=m zO|L!EZ@2ilu`=qlCNjfQ*~?!z!wMo&*&*QrrIg$_vKy)2N!}k3L3Au(1>i|4MFsTT z4gURj&`HpDM-@SO<@=S@wnSSDFp<}7jezoO&8n?R_zenJ-;30GpK}+~x_iOM%qKNv zAnpr$qZ&u*P<{C6<2tjCLSV`$T(b_Z&h1- z1i$=-_er2<4DHsSVXm)n=VR^9uPhZKrw!ALzCJ@9A_m&)yra;MUb`Rx4#Y*OXT=H) zam$q<0R1eU^}|1u9Dvy(hhDx_jOC+zpezZD7)Blv(zp6x5q>9<7cU#(o0}lKxb2rQ zO?=Mp5qxF+Gp!PO(vcp+_@21q7^lJ*8$+2kgS~li8iJ7zU|LbH+#uri5ugq(Qh1uI z#3k?FxUqIw9<1lKtpUq$!mR*QLD@1)DxmHU;d8GDdGR&mWr}D5e$5_&UNj3?=CR1x z6OE(_C4N1gR$drh#G|vs=)sJpUod&>dS@(xz0qRxRaMAG5p26!rk!nQufOVvq>vbD zA=mJrNDMmiYBlK}>gMZGg|1iuFz_%|t(MJ2x1jbXJ&QZr_xG5vgvI3kPVQE^Cn`CuUJ=biec&OOY`eqVe5NJ7a-PD}^nGD4slkQi2<-=k`t7EB9(}jVa5P0+ zX0*6oy|Z&4AkDoKu-172d-LRIzB;k&wRytKE;2*dR+JiZa^TRZBJd_0+s6qPZD+Cn z@Zam?-BP#@4iu|(`1{x(=)jX|)XTODozT01U=eNWN%_0m6s?kw%2VH_SMiztbWAJ_ zR0r?kvsK7-Dn5!3Q(*HK{W``Q^wJP7g{6`?E>3k}-PXTk>1Q)6c<<4CMD{WmWny#Q zA6KvOfIupZWqffp&o}%P$E!b|#VegMcFxIMVQ(Pof3BUT%CN+crY6(xE8L=W7Q_PX zClDLV&yy{a*$mQ|W2o4U{JsB@TyAIN8){c3&>gMk>y8QLzs*V#pgUe0R-_BK#R<4T ziI&sHyi4Me+gS0ND=)5ivruczH9N|V;cf0U)n1A$G}%gbVVu4l$Z5TDqjJ&Kkgn-U zz#HN}aH%Q~T9)-P7r>`7l99$1kFukcZ5%+tC=l=R!@yF(Q1%;RK%=$ilC<^P_` z|L2S7VLd&>)hO&g8d{}t%}=$g8ksWiDWKWL-x%XH`?+yC`U1$N29^DGmlB$#F>-e& zN`j9>x|{n?xbjY*x7soWGUGAmM{?!sVmde5#SQg1{KW!pv%eZ5@msI(_>wX(6i}oXxD17;gng|E9Q98 zNEg?`jT1xN>CMOR>K{NNJea)r$}Ud#&+2AX!<1OnZ`b<}@zfhIvZD%KZ>^Rg?2Y}U zwTd{MJH2#v1BU*I&Dq8u7afO6L3A^+dfIBip|??x~_prb+K>j(=Cs^>q$;Kqs; zR)2AD4Jl+wA?;B16?GEjyM5Ibzn$)~nAY1H2BxiF31w2=N%0NlTkq!a9Ghr}FYNvsW3JCDlng zi*_}62EHHQH6dnK71@-~?s}XWx)APG6T`=!TE}_8*p6NNMub@?LU0qN2~vGSdnrH5c%b^#)tctBpMk(`G)O&&1hv5iBrCE z>#Z)6r&^)Qg472z@Bh1!0eTFdtEcu)_Gu#{7m zaI>s$mMwRg__Jy|mlL6Y`|jCtxH~;3py1M3UMhB`!$5A&b1weX6(x+44ejxnzit*Rf%EDpF%GDuq3lExqxP z!_|iLeyu9Ax{JrpqVbc1_w3;fW#4Sr4#9SSf$)kX$mtwO{k_)F}@*FCbc-X*Y*7$&2^YhvYdM~Ag@BzfV z_$Ld{D=xa#HuR*6*PdTKyujbNAO8 zcOa;!!Oq1-Kb+Chr!CYxrSFvfv5UKd(fd35-svnMdoquvLaHL}!RW5rSa13P^~#{< zK}wDn(Q%?JsuAZ;EwNdDHJgY%qWU=<9g*~w`K`#Oha-%_s5~?Ik`hli*OaF z4Q7KdYviT=I(&`AdqXN~+MSTj9si>D$$w1x$P}CKvX@eju&OJmdB#0TM?7cz^WP># z^QO2c|9eaUp~Un5ZIX6kkNp2OrjD*>4*zX3cK0&szsKZeV)lgpzfH=y-+%n?F*R4c z|Nq@I5Peg;M=IG19OX|IXIQ*vv@@ER4w}NcQ6!*(sw3&9a>^t^Rrn|Tl$;h$$n}F- zvDtExGgjJKU%p?vA=KPP4XR zvzkMnGMc*~a|BLtE`B)G6h_ta@^zE-EU#{_Bz9I zgq=9^j`KYCK3iYk;j{e17G%(Z1I9PEPM4^Y*6@71k*qY?Z3gO-1%inrc59o)1g#tD zwmCQX`lt6s((DmgiDTeDkXa|tw>h5eBRJ0KesWKH!cD%|PVyj@ZICkgdbZ*O_F>#t zYxQ=wThuRR#-!MR_O$`H8h&y875hg%LGu^v3|N03ULr~-4r~s8bC?qzL+`1!rKKjK%n|gf^cGSl*`7B@lR4gg)td&fW!|31 zk7^~C*2Kp}n`o0L3-hb_1txoR5rRm)`O8su9B{NLs#i*yS#RVb^FCO){pHtk>nk;& zmq=YlH8L2ph8ZYg-Q1mR(~RT%#oJVLK_0!PmWQ)8?f zl4WzeR25BK2V8y_{WY-Br^dOo@k*Hs%9D`oyX~{b57=!-GAr+vve01}P*&)N{Ku@sc^?LS7kHp20WJ<%c?@>F~J|$>zlI z6Cfc6{EfO|V`xo3PwO(nvrEMjU5&HTBEpRFwXM@+V74#S9}tQ>W0z4nx0czuuwPe^ z??@y%KXdxah${WqZ##M(ZY-y@njcPpcMa#gKc{D6KGV@TV|ZC=jws_0(!V z^)J(@e}_*ksLuamKe4>z(Var`dyoH zu6jQ=28W%FFj-$8{v6xYMv+kr&T}`Lt})!rBrSlaGpa&Q?(2@MvClmOYBspW!3PyI zyAaxC+kb;w?s^+m{4Yq|dAFwsSr{hkp4`^c$X+=b`6)ap;4ctmz=d7VBzBQ)^NC{M z2bz?DIG*1qjeMCZjBB4>{UY3e|1vhBH5rcV#^Heo#3RAP956$dUr8WwRSt-D zdAD8|i|!!yhz_INhBBiYcMNiS)0pyI!R&WjTUh?{RRwv$)IQe#5CX8>=+Q_wMaiE} zbY#7+Y~>mt*k!?EZsD$nncHjaT+Iom1*q&V}DP zkL;8-Uj9$?vYF@v^p%ym3Ka%-clf^H_~BSp|S^+o7JmH!fgje&;TySMVD{qTcZlS&$B*Y@{b`9LPUA(dnj!4|26O1JtZS-kcC8 z6xT46Hxq<1Zms@GGh-nv2y7NBc?ygv?z?ohED4Xk|NnC7fxnhxP=5L$Fnf&xI%M25QH&gq)U77x131~gu zlg0>y5ySu_v0_Ri>3qpQGgvbvNm#(2+D}^MJYko6n$+E&AHm+cPBK~+ev|VRrY_Kg zJytjb@MM%$@QOo@L6{W5ofrdiTH@65&9W+8beLu$u_usr+lkO3le4_HJy}7yAK?oS z!_7*>4nsdxtMF??pY$GtAkKn4Qe`Z}|GWP^q#? zyBB*V0)Qr94&#OCGROq(PtyP$ud` zTyk{J6KeojoGC>Di|KRuL;cWx@<&X@Ud>R|OF62!t)8(7r!*0zyQXQybl%6zw@8#T zPiBX8_P}q>v+CahUhAzoAy+23{yx-O_mE5QPNk7nqjCQD4wqbdgB>v~Hrf0`xq{Jj zzQuu#|A<;llf;g%=5mdfSG7MB#e)cW$X4g^?#IL@u{dR`0XtluKRGTK+uu3>KB%AU z@sL@X0!k0c$=M2n7z%E|HZr;PGs>+I`%L5#m=>HJqWk)JMpoPc;I%?2uT4ZVv}aY9 z80)@uHgg59xZs$sXM=c?5{SNbAYyO`!;>y>-bBEjVRT!HndRKW68BXh%90;Jv2*Ag zsmJNv?Fy1HSFr08!e&z-fUp~l?^^aCZhD9U<}rB znD_*AJOA^9vBrZL4ni&S3Wb^T-Prdaoc<$&14Ki?8BoIYF}e#hCP|IaH@kDnzCFpg zMaUshLx)(~y>*`%kyw*#g||O&2rll-trN>9D2Nl0Xh-j4@*t7x8^Rad6dL=_35YCR zlgb-{w1e2=32k1{S@x^QdA@b!#i{bW^V7;;^&YZ7oJ2e~F0WR2Q#oWbF)D0~jf5A2 z4`)sk;R#~F*}<4TKIt%h*Hob;sm>Znljo!lMJ1X@+4a~2ur>!j#+x$B9;BE7uZ~OF z3AFGbm?r*!`O(UQ#wr3@%@IZH*-DH%?Y>$UT)xGdcuTB8w~8_fu5PEG#*D-U6XLWi z@MmJ)2GnO;18=l64x3VZW?)fk@Z;SS6SApf4(B&zbGyaa-qDo~3o>usSx=)61RQ>~ zkQ+Uk!nVK6_B;MXRRtZbFYrCOud^&~UP_G*$(h0=ueCgt_GuD-=fo}{pTtK7Dir4U z%$Lp;9eH%pnjMYfIaX;F^XF9)OgJok6f#@?!uXh$1K*Ql@&l%0iEM-;Bs&?Ut-HlJr60>Zg_ja8+JokdrqF=Ji7-Y9B@*5CL<+xs z?)68kkDrL1(L8xMJ$90~;wS&`iPT9J(d#6lXWHq%UhchCIN^9J<4c%}OP?!!-jVhX zH8Q%g3^4g5Jve3+IBFDWbZhlX0os=Hw|?FUfis+Zeect(cVDWswDk;_;);A|T=r)i z85$rL>Vr^7+q(tt(yTn68w0bQUzgZdGz+tQ+bO`N^$)MyC@42GM*vr+0lXu1H8c-d zRt5ht7!>RiFM=DHnY+)y^7F_ajUTSq46yvrLfs`^CtFwdT-!(dgdz#LWG#1I85Iq` z;OEb0dcn7hXu*px_r;3FsoczxpW1aci4IP}-x~#DQfB6Xu2A)c8clbN?XN0IqkR$b z6KqxBFJt`U&&x)RuBR2_dbWJ8rJllmiLemyG#V<{V*O1FYTPY?wz}wXOK^q{zHfltO7`Zuh*?J`~d=Cs}&M&$k9Bl@2&|13h&=)zeQ<7mc zvRu&Hnk7oHq8;sF!X-cKDkXe_F$F*jSz^K0kqTytXhHh|FoSc>UOEsEOiL-AsUmdc zeng@Xp~Y051b9yI@qVOo^6s4TJUD8c8V&FoS^;gMxthiw)%CqAxB300-1?3&cK#)~ ztueO{o&5yc(F2|M zFXB#d?bIG9#eYcq_r${0IpoP3WH9B+2b!BoR{Wi*)Jv});y`~r#rpeFC^1d>Ju|kz zCy6eu63K_}KaPH}`rlpr|9UQ@9NFfw+st2l`%#lOIILdcxJ7wMp@mxs6ayP|xWF^Y zb~dTNMiNH7cddcE{?cZ(Qy4%I1%$96!g zs$kj|wkb@KJ`!lQ#?MdIIjo>9Skl%t>4vv#zKOdAIMZ!z5m!FJn7Y9tEVQ17Qy|`; zvngeP_ODy}C_7UK9p@(s*=|XYW+@6IIDon;u-@U6k#Na?!f_k7yP=f~7EDpnB?XKZhDhr%b86=6E zf9KrHUDb_eSO!WlioEgHOZFyCvrV2a!O_q#x~4E)CC{7VB#gJy+_Y|Izvm&3iy;%b zqMNN9wS%%(t8j6k=?{#C;vLCT_2Y$~ z-@#bY!1~Lb-&8CWkcz<2y$l7|63u$y6xG}ypS7*Ab*r@vKS#2T!HcRjd}d`o`Jnr~ zBQ7u>KX;sb(EO^jF$;oU=ZmOw1}_>#!9y2~E`K(m^)jDcNxD!c?~KWaf!*13kH1K# zY#R-%S8*ir0H;n7FKnr~Zy6}5Qm-)X!16rt^klV9VG&xk2qde(cW#jPVDrIDSU6|e z)cJk)pW*1X0Q(;nlE&g)W6RZTf)kMm7OYHmcPFizfa}VjM;Ep}?2|>z#JX|UEA=b^ zLAii1#YbO6S(q*5kG>P=MpXCNm~km50rvNk26SzsmY8B6K=UJIUe$FJ{HLyBb(fD~Zb%5wsr=}@}Dw?L^j=FhZg?(EeiQQw(I5H+m;@~2*UcFtwk~qYwfpGS!K)p9Q$GzHtaQxc zQonOeENdb>8NtBC<%g?`h!^u)Ny))vfuPBsyTdj9w}*oJ&2eD14S##pS|CA3p@a6`TO_$ zvfa>#?dq=$y>-N2V6LaYfSc9Bcz{j!W{dQ91@a(V6XsYqsnBk_svdSRTU)z`yx2By znnD|*EB%CygNGpQz}1U+@!Ri`b;rW?)^CaXAU-+QOFIwHvWG^qYOO`Pbc|Z}0BeY4 z<^9L-hz057$s5njlNka!w5rZDM(M(B@8iytr^vMj+xx{m_H12Uy@*EF`upgK!S?BC zpo>&%tV3&N+cNCK>?+GCBMm%=)ZEVD0d_N2S)K~yKWUK8mlnya?n}B0V_eNw--h!5 zKkpicp~5V;5uGC?L6~R9|LfTc$1L!@cuno~+-ZzF+}_Bmsv1XEUP*_x%}_0HHc`6K z72n|#XFu1{xh69S4&*_ys)t-1+JO>GN!TW_2^vryaH{i^(K{f?#_wdej_=9^Rf1ti zAXHarF!r}iysNy-6m?f4d>DcgN>%3k>ffLT%k`qHw0H03<|z~yK}^)1h@z6005sd_ zyMEeo*!9`q0QuLQ?-rj_TsP`~BXc&U$4W0!i@BK#tU6ZEjk~`=1s1d7%u%54`^y^5 zYui4_r>df71%V=}2NMug_vy8;$)|06g_lgRVO**v+b2|&sB*XqqVOmeg*fb8Z2nqX z2-(n{{hYDl9i{6i{}BlvpHi80WGW<3H}YG)LLR{$%Y5-VO8?vvq}zdY@Ev8}9Nuwo z+`6|O#l`ebL4CjBZ0zQ1;7X)xDN{(`;YLXV5Rz!keIWE5wS!PjQxgIOqZ;`4vZ zul||4d36-^k_|yY6u(PRW8#3QlaCLMk?)@6Ufu>)e?xNn&pr7?hh~b3zXr*J^d2zQ zXxa8_;fjv!s$kV?jtlr>FSz7~5z)kYlfmeXH&?TZ$DatrZ{MfPQC|Ar(qm6DQ#rmS zx)0YyW%=)`LC&&T@GEw|AUDulEvH?XhWhcId+@NvrY+L`6=S*5WCG2C=w(`k%ZT5jo`gD=tVCCW_7HutCr?6#cW#8ZpY(&K+jS@s~P}S#}#0$!Tm0 z7+c_)tJyDOo9gf*{6%-UE7`8QM!Iocq1jA^K526c!<2>?vZujM*~xrj4@-Sgm8bpn z?V)zIDqj%pEQI^-x-oN|t32;7U7wW)eVAXodJID(jSn(Sd^6tidHjmxH;o0JA=2&b zq)iMem)_fW>+YqzbLRs0)A>oDY(k^?|JL*Zu&dMh`Yc=5NBrXp!#|$U4;y-*lpM)a z!8;g>@lVJxy?x_2tCCgR`myShyrcQ16ANR~R1sPJ|7lm(P%d&cnTaBQ z6ZZWEpQn_D4@K@8s(eIY#W3?jbOU`)f*>?s^W!nCg7MAe}p5NcJ*bZb5>X*Diil> zD#PTj&Ll+wG*cg(B_|LQY(Um9r(0$FSm~F@5vIRmo}TpZ4?qX7y@>r8{B%f;4y;tw z#-oAw8;m=ws;NrSfP<>JP(8;b2$T$2QQUyVOmXPIH!I9QcbgXJE=78`RFDg`n`0Nm z=f_kbgN%-|RWO5Q@lp6F-q^6G@aC__wzbM#o<$l`9XoH!jf!`iVn@YdZ!@y%Nd(Yu z;_vUNs-`ZNt$+FKq$NU^=PO*5iCi3iNuDrK4lP9IcFVWNN^HJv*@k{NxsN~JY8$-}dGo31E%!ylU~0QXN^p+nF+Q7y*6CXu|0 zhe$EaHthM&gG;q>c2uw%jpL2h6UcU|QfIkgvy(EP$B+2Q((j+psZF`u&xCG~Ih)tHZ{Y$gK=Y zZ~)y_mhhcdORiYC$fxym$!@Ie7bXFItzW~_IpIWoCHQR2?Xwq?tM7yQV^bVlWcRep z5WL%OtXcUQ=*@6`sfnNF=T|}^K;NG#zQ3T*Pr}=FY6OP(*R-wt*V3UMAKP)5hw=@k zj<@}AgGeA4-%~QVah!o1_skM|0TWsP*Xa6|$z%@&EBjOjG?0JYphGO$&V2MJ_ z_^|{od%BUa52oU5=61YIjOoJv+gsWrbvOsVL^VdSqFEFHuCtV9NocMOaJw>pF$~Qo z1z5?Uy?5e52*M-eZzTTsbf=DoQjQAc`Jf4V6_7*7p#TS-HgFP}h4}48RWy_bJ&5f7 z@K6=2g;6T?Rriu9V0UJYb{A-}aFy{vhr#L0ztyb%QA49NvwIlEUV492UE!U{XpkQS7p zsZnBdka|@l+V>a7po0y)xX?0gnPFIImiSYV@qAVx!%aETZ2d_xjSuk$9}kdm9z5d!INQzSnHL?93E^@#`8z8J!nj(`8i~xJ=ZWG^$ z_W_vu@Hre4aT$Y36E4L5AMCw%P!wIaH>x5iDxl;jO3p}<%!r6cPLiV_IVZ`CfPf%D z$vNjJN=_q^1SID;l9MnDd0=MxHa^dJ?{mICZk_YhJ-6;x^%hlA><+v4UcI{a3cvMh zk~kz0Ro~f0PAtnIVg zbniHiY4cKp>Cx*!FU}To{jfjy-Lwd1P9-+W)x5DTY<(BJ=Q1hKN81`qcWLsDrSl6* zsa>xU-=J$n71BD9q}?hBR3bhgO@^9C&Xerc$OQ)W0=5{xT;=Z0W-&1 zWm=Mp?VMWWp@hWvkV9DM4BU^n3I6D%+>CRn?~1l4)y{22+rXt|Lt|`2dENzF4{(a~ zk2`6z;XU6xBpP`2231ag?F!DXEU3D^_HGXfta%$ug=qKBLN63sG&JhoU_vk8H4BA# zD*+x70~4)Z9IVia4{bEcR(2vGSdEedrx8%&Ou%P+14$d^UM6)09|gW^S2A+l38Srn zv9JIJvrYt6j|wmrSX9#@qz`Ry8KfvtY9Oor3A4a*A0nm>5r4uKS>R>~XAU!OZ2X4m z<=UGMAC~VP-YjH&+ zbZUadA*$-fdU#1M{f$Onr80^*6d66{d zceE=vygzlHfTtm~>Hthev0AJ#(bw-N^lA#WqLfcznTJRD7#+!vRSYrE&2^qPePVi5 zElV%K)APF^i$}lm&o&4*&1dfVs!u#HR$m4gABn<4s*j+)Dtdm`^1^$2UUN>}&69EO zTJ0upjI91L>?6AcEdY>N`)53XjvAQ|+Q737Wpa9Rmi`OXlSdq^G72n6f&bpyg zfCV}U6|)hEkG`lvU&yR$343zvtoGH|nGr0LUCP}l1q!}tzsjaS9s2GlrPIiMW*3%qeZYu@uLcQj$e60d@#M%){K9_E?24LG>(j!oHoWt9Wwep?v~13bu! zO%wbq4ZHThYoDw?do?4wuk4cc;pq*!o?Q^(Ge5x@&C;Y}bJ8_`>v_u}n9E93t*r*q zg->h!J|1%8Lp_btoqd%Y`;Ti}a^cYT9rYCefh0!BXKO8K1!I#32e43Ultoe$McUrO zJ(Jc+drnAQ2h9%MK+ZcM@Kt z{FU)%j?u)x-LO$W19J67#7$;uu0_Wdz9-@#U!)Vi(iQ+ABJtRyb9@oCXKW*;$Turd zS7+Jd3BIJSv0->SbBf-uCipYP?7zo23OuD%02DFT zY1tU!|FbCLu|2n!MfljglgC65Og9+tO8b7to3SBpgIPbuY}=CLE2JP3Zq)8HG9DS8-)TtWK3=nv?z9kj& zERDBrG{EM~I18SG*@x>1F^S1HmERV>$ITD^!RKx>XsC(@EMIJSW)?C!Nd`$_pVqlm z)BwKDKv9Y-wU~3Q%x2hEUX}rZu!TpwGE_Dy^UBYu@ceo&=mZ_+NMrIi+oWVwvM+4=&@`D*V+b# zRlN5R&#C%exM?iM?CUD&fim2J(q7%6?#_;TravLvr}jLRWaGLAfA$fC0FNQb@mV;O z{Oawy=Y(Ukhm_gidFS-sgTE!7f>EA$l#X050fG6W7Yr5KG!|@(1M(@*yZ7JK{e)b` zdR0@PBG(=ZQ3lI1jFz^khw_i6i?9~QUCsCnT8T?7BuTZZH%qHF?^bMmgX;5bu}H&T zn@q5u-YIN0X;ajB-X~sF9vd(8Ntc33pQWHHmg!!NPo>gf+_e#$+OKyp{Z2YTfwCn( z(#PT{kpN5+t7GhJG3(H^Amck+JnUs72(WT5n=>raf4YhoEJ^HaFAtaJ@uFsB+4YpB zgk1%kJ(eX|hg*3U4qICQPOgS)yVc8A9Bn1B)-Zir%c8T?Jt&f%L2Y?RAZo{vhvSol zx!=v4p_{Vz)SPg#?8AczUUCb4)r`kpM#JbsKd49ERTB_lnIYcz*;iU%`6dE11kX5E zs>JO34r3S@fIVjc*?vlocN~XAJ=NsTirD^or_H%Kx-b?n7yg7)tf6n#rhKRhndwWYa<^8U=@T;6tGK74P6e>@m2|E2 zE@_dhO?9_Dhc>*?QMnUsUd!dNV*iQ)YgIOl`slvuMa~r%3LU|76BB6501_rc09JWr z$vd3K)zM{P^2JAb^17kr`SGZe3&d!_?Tnf|cEO=LE&8MMeyCCG$F}3pABuni^e71PRF0uWQSQzgqw0^O9tWQ$=x%F3ruoEkH{!ZAL z=RGzg(7jGiaom}BmUIS{8G?;((dt}kVFBRJh{$prXRiBWMqf&A0v7KT@Iy&!#xk}k zr;^XvjPghsb0IG3VTc6fz7FFDEaD8MAg>D(5BSUM48#uDxN8&=f5`g@HuXuTwLdm z*H?Yys(y@gIDv0$_*)OiPA%U~5b2fIwSoOmubA~RYjuL&%&n>Ey0>%B&NPF8<3chb z=3{1Mg5Man1M~2_1tTDa-IG4rFN^W3+Jjc15_7I{B9t}VQ-ad;V<5ATMo=YV+8>^- zr_n>1vCP;wH!Lrv>IgOpG!Q8jo>u*>a-j5p_*19*xDsT3UjZ!d5NNEh|K}fW1pOLj zC+!Es#w@mIKBpC2=9OCG&Mp*RDA`J<)Qx^BOYi(mrfMjigx*=c9-Yo|+io`d=3~(? zgi>jwEi=tj2=1qqt8wpBjjSSBcK1^huFw%krGKw(XW4M-23R0-k&^~ylAgE(T-5m6 z4;uUAqdoQ$?LFij@SYiE4{Qtb z{8OhnjEO^zP%&`O{O-dT=f>^YZc6jge3wTKM#rn4Mkcd0K5S=<=$c1B314pN1S-sH zxC=%kXo5W#guFyVc2k?T#TK5ORu^B->H^16+DV_|+Risg`N48j@cHxtUQFaRo>n+C z9J*;(=|*X%v<014?mJ}7a??P(KU929T~5Hko$F&IIGH>OC;*%*F&9s$?}=s+voWf*F<0Fo;l;Swa;d2l>Zr#0$NmU4V4ztI4-{3$}9KWx9r-VgV`_N zVf@$zqWpWG4v+6`ezHK@T&I2-do*ZW&PRTl&yGB;X2Vob2ZnA67ApWNH?0i~of+nw z&Q$De$DEYD-{NpNZ0Y6`cj-MjL8jNot{Q(9BzsHv^5b`y)Vk1l{|fZdqDU+7W6xuv z=n%f1A@!P~K!meFxftkU>Z&DljQXJm7NpMUBhDH(l9&4bnD4EkE=;iXAIHv6lH zMyriIoby2?)van%&GNpra>Bhzn3p88~%>S+6d z@wn;PsZk4XI4DBDi$2WQl5^v#Y$X~g*92Mca8Lu?5HtxvTL?LjkpZw+qO~rd;jm`| z^aVfycB=6-h97${(-l+Nd0`d35%_mtsjc!9;rA{Psj~>}yCBMx2u%FT^#uJ-)5q36 zPp@Y{eS&TbY>@JLzuO4>V}Jc^O}x0CYW0I3(1zMQO@D$EEYXbFm-q-D+EAq*6qm;$ zYBFUSbIyc^I_u#T$&n%PXSZ66gSH$VHdMI;9khKq9f84Vr5WX7@=qcDF(L~or|U>R zm-OVC(Vss`<>g;QjB?$2&*q@I%8XzmSe-~gQ>tqlzR*i~nV{2>ZWgOCt()Fv1{gKD z=TJ2byf_@Hxx&r2p;~;3Zso+6KUr}8nVo9Q1g(6Hrz)QQeU68}vNKIyL`%Q!{7=t9odd(28OVWY4K;Ob*Q4t z1+nie&2I?!*Miu4jEK3kSO2yRhE`Ev=YCxj;p|sbv|icW z)IN)TiFPYWeSdc94_4Djwz5LsjYsRd4_qq@J-f96$X;x?w=-^x5e&^=@g(b;Yd9b{ z*^N_Gi{9X>bKK{c`ks+9AXUtPzqg-sL;sVZ9|czRaEt&GQ|(}AY&syu z_vogx_%Yq09CuTKokq_+ye?sSpdR9r#^ZG)(S%62sBNyXi{*P`x`%1$c*IJB+PF`J z@0yYzGh20nXTWBfBF|ld6H&dJB35T|M6v}tz1DB1;aaz4Sw>Lr9$szS>U_tdZgO1h zeaR7Ef6lQ2aDpC_y8dM@d;Z3wEVHMPC5e|x0_LZ&dILk2=!3ZXzq=>8Kj^({ccT-e zei%euDFN6axrTsC1?#sY;~nq*AUxY*Mxy-r>2)E)6sRqd>(cG%L&ptp7#BxTIMsM} zv83_?FRtGuRMP~zp3M3aRN@CLzm`(iu5{5j7*~h0=w5dH6H@Dyh}uvbfM7yT*A|5` zArD2kj+KtYhe4EAnx7dz_OT6_uIS{u1iPghG*!aRuWjohav5ydCxj?lO3yatNLa1u zlft85JSA&1e)>aWW%Z)&A3_ycw|63euYh+n7Cp^;G&DuOW)}w&r5JLI_~lrHuTYIY z#A?J$#jEyx^uFhAtsOZF3;kSjANo5Z*Q70*dTD)fOgb9?Nh@M?I2env`J_iNa)6DW zitHoFwY-tv{!7cm3Oy0yNzOY}76I@IV~274lmv`|+si=Q4;uCpopmg9^OG&)Fk@h+ zy?>`Y$qW0p%HllaGbGVul~2X6r{o69JU%!7(0jV@~kFi3}Cc=^=P0-D1 zjHP;NPisJc0HgmGLgK#aV*_GfW6?X5hYD$RPQl75L$0n7&o0j{FN=cJ_ud_0D0 zoi-a%gDiU1A^c(_px65Rdck;kqt{eGm$u&n`CEYEKr5-kQCyVG$PD=hmqfdWwz%&2 zMVN6`&ySC8*tin?oFqwlMZscc_4dvUe(Y1=h4gx|v!Mdo|Czo!8@ycteywu*?N_%; zhpBgd8(b57-uZycmkq%OCuOv8$oT>D z7d1513Jen=L-W4*SF%XxU$?)I3|a;9v(YB~&s$>~xXVd3($)B`1O2DEnE|IA$JR?T z#><2E*UQ9{ zkUj0c9>i}{?U0859c0|uP@|$*L*)s)lX^WG*Dd?pa{K^Gk(XWFca}Z!^*SgI5Rlda zZIJ#neSlR5W5k#5#hU^_Qd}wA2oq`||6T%{- zOl7Uip~-AikNb4XtMpogwMr_!f`}76{vVy_Aly&v^yb2;hi4L}= zc}zU!Mx_(aSa4>*#dT-Yi2|S-;M@IOZVf7fi$&YZ;-ucs-;%IHpOXDBV3uW+QxPq6 ztM;lseTSB^HWNwdkI=35^u#j*HYR)S^&y{UoP69!qKCHkVESHMn+1R#+oic!)pZ71 zCX&}?(qr*SzD3O78KpqENQsk10j66d9Sgu4^9f9y2{Jy<3VHfn z``o1Uux9wRKL^P+uI+zxJeRZyjrEL0_iueM`{Jskgr4&BF0oKxW3WB%Uu3{QAsc#qbPw|LeFT3RkE0AXAKGd_9V0 zn%@~%m0yHv91e)kSG4|^tvShs<+C>_p(=iZU*J8gt0g9Ct#HLWd}$3EDLk<=VOv35 zfP@L?zRQYS3hVJ=Ve8bA!a_I@V3hiW^VD2&Se<1T8-x0V)3noe!vhV{P-P14a0&GA zQ0&OmP9kHQXV22!V}&h$>o#31-e`z@tnWy5BFu?qzR|umwF0@rhD_+zt_{}$gyBBo z{>Uy7efOmh>k-+2yFBsI88%QWw4JV|R|GWtNjP*1x`v4>!t+^X+;v$o%<64p8~&$v;}K{CY%BX2ztmUl3Z&YLZ5)1c@WzvAAPhO z{sUKXQ7*5});)U^@XNnf4F0}+>M3pijMzAO6xb(%(9?x0eFP66eM?);yD|h)acy6S zW6$NyVx2I<4Q8$y2*`$Ix0`nodp9hjMlOWp`{v8IPRljfj~(diWtjq|Z+ja<#ICY@ zvj8`(z&c^>ACdF(V+FxKkaX5SoRg$9h&AP0z%Ez#Wi>dtepjorC-Ad=E>a}US+98* z(f9c-qUx>;$cpx>Cz%8XU5FnO8f z67k%dg>y&h{rYz03Ul_=)dzas_g2|3kj|V)=@xCSBVbM6 zE2eOVD2{yldh(T|1PW>J{#?e}#|E?gnReLtB$V`vNldR9Y8zh{{VRa4Djt0DsXVW9 z|B+3O%3FNrzbyc>v4+RMH$ZVS5~aGVo* zqR6}Yp>8Moi*-Rvgt3={kkiHNU|Q?J=+n(Sb@8u>ul+7Vcw`iYpR~8COH+4`xVNA& zl2|4>&(XWoL!8XV-)oT#eUDRLb#HCG$2rZJu}UNBBh945x8aNyz?&tS~ENg%G0bspvJ=DJsXPj$4KHIsqPB*uFq;{$oJQ^2fu z^MFkRdvk4OGcH0rtm^S1V3nEhZ0SMBZlfK&iW1M3dX!)3o6!xxN`%jQT0K~huW`fi zw^K=${|*97hKfepRoo|zGbzN74};#hldGC&vIV;Se&M7Et>!uXve4FHOi1-#hzHHT zz}zMQ1UAR1_CvRO;x~AgwK8GwTDVC$P10%B0ao+UYgP3I*bxvA_s{~&e2})o3ztWA zzxFc8tT}~+Rv6|9_*30$LkUT=Itv@7Q@RL}#}tUCZbkr<=iOIq7jtI52YMnA7n_4V ztnTvMd-w{_8_R&G6T%1JJRM`qyPq#-Ort^%#a5rZ$X_p)2MLC5NXoHUGB9iRa?#o~ z#3(N~=x+( zx)Jx?ZR>e`a+@n<7zv6m|S7fum!=4X9pV}n|k@ZJ5({g(!Kl~P_*Yn}vD|9L3E z8j6#Sl{RW&V+V@eX1(5s$n4I#TTV}M;$+ZhyY&8)Y_0TVPIS`zW&|$I&uqLOq$jWbaeVd_8UnmzY0h;vu`=_(1$0dFC-iJauIVG5 zt{Q4R-%nJ_^~k6puaL=dSXsEs{U}cq`V5k}+f#F9_+gC53Zu@pozd0jsuiDoJ;Xe( z{fdBfm*J@zPN9Lvcy`OsV$riHP|@hGc-37xzOZ%&=Y z6KXC^k@o6h{{M%Y^OmZ{EIRkA82Pjz88e&o)&JcI{_}qRJ(|Dn1+A==$A4F2{ynJw z>a+E4N&Ock^=4bVg$UT28O*v%J6QsUj77+cToq#VV0ogCvy)iL(%T()wW3)_`E3o} zB)67wY!nWT?c%o5KbrcUcBxlv2#yjz=htP_yV6;%VN};badEbXwI2Vo6ETl3PZrU? zJbaL?6gK0V>Vmev*%IOTBU#|r@F4e#CTt^*BQ>+V1tnx*RvOa(a1K|PT z4s^|M`Rx(Iv7ri~RPjgIgC+?CA1A*x6_m<_C8jEQ@Jhx*8oT_#5qeK*~P()#>kFCo6q3%<; zy4Q|Z_)n}=Od*5#+bLF=pF%Ue?8^%xl>dkT`dOEc{o~cl%QY_t=qdjyqj!OZ17}>WE~}i*QaXGrL;tI$l1rmfDH$IQ!L6RqS8nJ&d|V- zTamG93_CuVJ3{tkjh0tBQIp>(E;}Amg^Yb!Icpn&AS8UM47yB+<21TvVZSO2g^A$F zdQ%yN7=`=$|Mge0zDM9;$E+O zdz)G>_Rr?zLh3*DPRQxMuZHCYrKwCbH}J+=CQ?AH{EueM-lX8wQEJ`#mqPN73GWnp zmkIo%X}wTkb1h%!vtx;E?B!iP)VBIZbGXT1?D`8k(*=zQrWtS!>lK6lY|-=F*%;$UhwS39ameT(>wlg{fDyJ`bbw&m|10 zc)ajl`#0wBw@Uc%AbfbAtFb3-jxS_YSM(OzF(_Yus=+-L0{wjDc+SN@g{de1 zep&r=B`<4b0au>@4N)V}MpiF8V|7Jgas4;~W}Jr|g6zC*lUrc8m7l@We4u~6Q!8A& zoa+|q%Ams25an(+!RT!_!Luu9nv`KL_q zt)kBMhFl3$E2V~`@i(^cEHvwF7GlyvDBuv zPET_S9)glPRPmb&J&EmnEoR@zX|8dt6AAAwt3FGcSH2QvQAc%^+qxj~B{#kD`shp; zj4C>SoWqd^VQ2UJ$?a{$&oRqc0#hxu4{mthtV)hVeDx7fM0bb)ZAUOqIGr-)RN@KZ zIJK#96!zjDkEG7%kK$#6OGuBPpV?m>$j>hra9wktfqCG=gK z)Wv#e9k=s2WT$Thd3)o60fMN;C057*}_yY(A9Imn&=;}9uV}tQBmrP(6_PZ{E$Wou#4n)NU>6`MexG5eVPtR zcg6wwunZ+lm(ed`01zG*HihKS$M2TixHCc)q)}FG8t~e`GVJigV3ea}mZC>ssPzXnTGItY*)FXvcRb=FgFVE?MoM|X zuGVZaK%C#?#z0W-PQ=gHeA09jqLaMrtX%>%v32J8B$mR2N*?@{wHjh1UQ2K}SAnIz)%>G1P#HTFm=*5ibP-zJ>yG z%WEi_EFyHnpVTjy68_ayio*25l!X|xagUk%E_A#BFa-isAWZzdyYkUkKOf$i61UMq zXpTyz%;C=O@_4%N-TF#6m%L_^GBRBR0cwd*pUV84=pOwh+%x3BIg)b z_SOSO_(eFjuH2^G8_B`i8kVVJr5IMJCK z4Mtt^EmJ1G!5giQXH8q-qX5P-Be|TxXSu{`v)Ej98G5sJjL3`GDvPc@w{c_n>~q$R z7j6NQ7&9RZMaF8vEXC!AMOoEMs5F16i_-b2EYo}n|Fo(P_XYfY_H*R6auquy4Ncp; zx272h#}5$BxyOe?J!2mQhH@!PXnW-+wl+WGd@r40Z&H!|IwbW$FqkB6Y9v9&q-qy( zdPhY`WkR|C;>^ScrR5&Q%%b|$!8Xe~lO#kg^x4w=7Bda)j=qOeylW!}=`NO@l^BR@ zPrBEalTYkT$|Vj7U+4pl_!fX+M)!-1(^0R*LfzcYw%7zXSzK) zNrr~kbc{PVNg!lN?!#@q!!e?5KA-bWy<|5sFX$9vI}^=4cMHTJYdC#I<>yWji>Jz6X?yBU~fPF=Ir2}D>H7`_xdODiExXiu)BX_@rXkh4+ zu%{r&M0v`PH{3K=NZTm<@fi$0>OQ6{qYAmWRyExcX}ng=vi? zbF4kcZRum7#L-o}#k410Om^kpG)MoYeER<>{na0`C98b;_cVPGx7H?TLuR!rrL&~X z+lzxw$yK(uUnaTwEyEp-!{)`vDN_1wvb<2&LA&j9MGcdFSDNxXQ{k!y2~37=x^N+Z z#bb!_c*c)#v9@|iFS1GXu-_Z!Pbh`I6j%U~r&9>B6J6^7vGb1@0=K)YEX?hXH`3jl zT{j->al4euKKqzk$&VeB0?e9@PntcR5Q%Sz7-Oauwi94+l&;;PmtQd4N=)yi=Jqgvx`R*w< za+9Ph0Uh!K%H~gx27UL61O~-S5CFq%?iX^jHALq5iOzo5E(&VKndmyZB=*gp4_Su= z$CSG8+pEVBM7}N_4uS%#qA`&5;eE+I-91K?&jcd-k9>gEQz2K1Ob-r^H7ys{bbqWY zZI*951hcwg-MzP<39Qtmi~Tl?9!jIm3ZSL80Xkyt6t&9?PV!^k%%+l<^nckPh0gGw zF5b8wzwV;ppt^{c2#ca3LUrde=~=Bh*n|;ECDv9Ou2!Dyc(`{7J>)~lsVKc!>CA^* z_usOBt|6kaMiUen;_qa}WigO`PD-LrolN})19I|ss%O70YH8qAeCEdrgy6r|S*J9{ z#g_v+_&=9

!bXoZE5{17GN6Y@thqR}awoRwl zFcEp*%9EtE#4Xw8AC4b|ZGt;52u9hsgS&s<9) zX1Lo;ofpz7$cGJ|-I(qUa~7PMM9I z(vC&atONye^fWoZBMIzghs|E2mGGTdaW{~rhm}x*VRPUUU+HP=83c5ey>MrY44uSx z1+pQS->izdt>MUmpf_U<9f?Wj+)~DTRD@yX+K*!M^){01M%G&r%4q2W(0seYo3!n~Myr&!@N5|LmD=tXVct^Scgar<5U?{~ZBT`w}+343(U;h(~GJIXYCty?#$ z_Yn09?S3>y&XeqdvPz`BMWghNYuMphMT(|h_VV8%YNG_z&jy8Olll;J@oj(>CC3G5doqe(#)= z&eij_$&22VF|lys#C%v`ZZXy@X42A<^HL_OaFuygbq_YcWhmD4O~hZ{TNmmZqx2Y^;2nZqs*n?$qKHC#eX_`Zn>-UDXa9gSXI~ z3{MgH3#$sZ{(;BYCcS-+rS5ybjE(F#Q^mOsuY|ud{G{`QXAqepgN zMRdgr(W9m^3Zhnq_nC>E`R8_d4)wF|0@~;H3K>bU8!JGeG%MiJurZ+hW z+PaB}28FDckM&6zZ?;P`*Qlm`LxM$eB2KV$T=PlA?~^tLqN^r1l?d#~jL>eX);B-v zc(>gjHQl9*Q_vdW5GFn07%EEr_J{gr#mlo*v0vz)n?C#>!lLtE@O*VJ(^4|zs{4F8_Cf37SDqR725^PytXv0`#BPM_3#R) z=rK#14@#VkA1C>FFk?+vuu`#sa{1D34kT^8FuxT?L@-g&DV)9ozy0xSK$$YiH=MI; zxnz&GjZzMCFZNp9nMaxh;1t?J*&`xw~f zDCbrvETjS=vm64W_yJYmABz+iN%d1cSdfs94`Do8aHm0ih~Pm-^=b+7rBt$r5|s_T z?pnQM_M2YRtb5qHtgsjn9|!57ce@F#av6O2t-;43R&YT{$EMEh;Eg=>{tdmE?u5=f zV+K7!amz=Nn4bdWrY)pYLnSPQyXkweF*J&38HoZVnu=*(s_pufI)+V`kpdlOKgUlHdy&N?K-sjOgyM|Ztj`s{SlxFj<)&v|XJA1QoCrwFx?lu*4a zPA5ks76QvTd*^st?C2R-iVERr-qvs0PNg|Zwdh(cNq?QtcDM45kP(Rcty)3|_qnJW z^09BVe6HPo-~hZ`cvG0E2*lrGE7Q>pE*~YG$8W*in|JGtknZG6P-fPU9Fph> zS2N<93DL3Md(oO`%t1*oUkU#x5d>Re=bpBT`Iv0Kf%xFq*uf*{X!zcf?RpkSbqow2 zWYoiBwe?)Q9rMaZX*+B0o%Tl^(G)AM@CwfHFg5&gJL9q41X8&tQr{1Xc;6j(6kI+m zOQn{3$;w3tIl1n$)TJ{X&6%L!%OyhZbh}xsqQ;MRdy?C6#!fcsUa`3T7Vb3~Yqmti zN{((-g$ve~2FREf!76NK%-8~8X^4H<&&H^S5@MR-R6s##pPggPgGz^RSzG<;I=Eilb}61cIak1=+y`f0e|#F?Q;jqu;)2OE5P5 z92lD0Nb3GM{y#DC`~*pQ?tmmcf=NMyiI?>+Y2(PLQ+2F=29$z+RT~XMs4@?YQJW zW+6UDluf&XzMIj_h_MPO95Jotl{$k1g}z1o52W!I&`1`RtkwKSRml?g;qoVB8}NRt zk0d4`t~m(elnkN|!{v8`wgF;2KJOJ}{dESIS!~>!Fy~Q~Vc!GoN=Xz=T*+%U^jd&X zz}mztfahZ7dS8V}QA3!{n-ukBd9Cvq+eMrV`q&JLy}KEwDk&pJNc+=Je7ALjr-syt znM;0Zpl-`U**PDLQOk6+(G&b)XmDdxk@I8=6^fCAs+{(6BC}Zq}Xg zd@Z%iH;KwAsdRQY;FvSY8MLJ+Km?)K!9GT)YkoX{9W%Y6Iy?Y0#NDUi3+k?o89VF9 zVftn9;n!AS8aW3d$YTIWWZHP2Pz-=cKt=D6%mC#$dYx|F3(J=gWsz&g8%|VE;=n)E zHGd=M<4-{%60Jo@hry>-nntK>U)j^UQOIABL-*F;AVTnK3v>?R@U^}DDe4q9$oV%5 z(hhyz%(EQeCBTshYf9i$ef~;%K7R;OXEti|39Q5Z$vP;^8nmqV%&|mpVo@k)G0VTP z<>e*y4iD(7c>my%FkS!&g<^xmGLpv6fA}h~CEIC?lUNacTb0eOFd{eo-=XjcU%QP> z%=g<+tqYH@xB_k6)34bIeU&z*#$UBXx%m7Lgl2BeKH9orG#%6OEi_3N8P0eLn^>pW zE|n0FS(t~{cS|%ED{e2txnGwwk>{iL@pSF=-j7d= z-X63Cu))7btexcryH!xD6X_iALCcWU_kcn>r=}F+@dS_Gsl2F2%^T{cds_x6a)-h! z^w4GY)D)fKWtuqa6g7E`eqok^ldoa42x6lr_cGkDS)26f&m2}3T}MQVwuM=kwVQ&F z)+(g@FG)hc6;I*r@wuk*GgS%H9XgkJ(_N_loB7as6h_p1bx`NSIuVAE zT{k)x{LWW zurprdkgrBW7{Xgi>1hI%qWkW1BkJ&n{CkE(xzv9VMU|ED+cAM8Q=8A_?p=X?lR3%E z!6}>vBpBGY!xgJ-?`@LXjvFp#WwyMRkFcqfS64OsUU$>|wva6kk{8*NMt}ZnTeFen zxT%xb{zLTS)2LKMfDi-*>tgpq)2&OP5maDhNv&e{5 zpWv#K#GP>l{U)Xz)G28BY7_i&J*!fnRm4;ltr)f0`iT@ke=lF^h*5v&es0%P{jkAj zaG8tGQmy#OV#~saia-4)r%WO466Mdt(07Qk7(izh@aCfPUoH_`MzV@_Ob>>4OjvUo zg>`Hl*Dn$W4vs#XcGq_-qw}0VATUR)kV5~{>Doyk%RjLmxyqFMQyvU7^iDlK zKN}meZqC4F@0xfNET(TRIb8u$2AE1;Yp$x$G^@(SVx6JSqkAzFMhe1k*RXSXpV24A zS|Y&*Z2%0}ZE$Y%LQk*0yOa~mF{sL&LIl%Q=zLhrn}^#kKn_rz103N&{dyDL@?eyz z_6fl>@W{C0oTVNX6-po)-K0tp7s-AK4_5 z>$M$L#7@y2D&va8am0SDHRl+L4v?z!`P|7uhMvmP!)+D4H9bit>=zFApVNGTht5 z&+>0gU%!S}-BIOO!x+eXg4l_G<@o*nc}qDfFV|4POMjS~JKLAnb%hT@b^NS2537Dw zUnY5BC>U@-2$#=#B(tKDvFD;`o|Xx5?0SuxMIcq}E$bI@#KE~|%sb$yRNmdIUHOOA z3%$cQ9@hCisRxG0OK)5khtyCwt#O1U+Hg_+zGN*^Uxrj8iFiXUsm7>~m7~=887h)1 zhE0gTFsd1qvdS;SyEQ^pZKbX5>!-jjfDY@9r0TGsWYbk?sCBWFsig1el^J5tTN9Bv z-K&{$Y{6xjdwroO-&fFKqSgf5pmn+9e>Bq5#P4t#Ag)d^!xPtUw`=%%-=!ZM9I!%&U)ke`s1pt;JsQr(z56oaR=e?Cbu^m- z$!C9#2iMwf-O7C{C|k|RWGdYik%ddMS#_y z>7f@X0qKGuO*&EOf*`#UCA5GLN+=0Q_U!2Ue&2iMJJ-ydx#rBAxvux`-fORC?Y&od z)_vd4vs!#m9vd|Sg&nNDNJJ_J@u0Hi^WJ{17CNTKWRNSdQKDy~1Pi@9tL|evH0Lb0 zNrh{Afjj7b7F}4~k2fZK)fVi1H7#8+d97tJMn2Ksl7L2##kn*k7WR=#7o76pTtQb` ztjSr*ijS4{O#M*G5PhI=&2_4KzKIE{>T_01xw1i|WjQ!>G4Q>XPtf+Oxq00yJU&-e zIrK5aC~}GcajH(MKr^^?F(v}CtaCMRK`U~AilK1Sk!q8K4H8rjnevd=MaajZJ&q!q zal05q5h}p)N+8Li=)0YwxIvV7tD4+XfV8mDrSNnyEL+(}GH5pgmADW4PJ>F!Zuwcg zd4A<~Il_>2uybMCvuk0C$dOpjh!3+i;AqthAe#D$%Qt(k)vOs*R9DfKH*64a@ogoU z3Oh;IGzGD)Zm7AJ)ViV1Ro5~9u7|Tgy;-ZZ4=NBcYJjA|2Q^v`%msgFzUq37M9u_K z4Tyvu=9Kq!^J}|)zD=n%Ui##i9yO$dHb?G3jP^SkG}ef)ZWx#(GJC1V%dI)&fRZcz znPPo9tMf-~KWq>->0`GeNFf14HEFfUt*tg_W{Qog+0V?)(Zu1&Wpey;G)CeJ&Tqlh zO6W21I?P8zAVkZX3 zW01f4zPf+PXlcp9Rl92_FA4AEN={(C)GF{SX=y0ic;Ap=(9Yh(CeZ828e<6!78Q28 z_cm1!u>^Hj;5(+_j;813xm)v)^}6_|15J=cu!)9Vgoyc^^z@St$nm*6$A}l)hQ>&s zE`4{iB}d^A3FT#C?_q4+CEb`Yw+r8MpQ|n)x}otfUcjh7P7Up$Sq9jVAUlSA$Rrl$ zp8MQm5a!dY3Jja$t;V>X7k-SMWCSnau|nbM2Lp^QuQKvRr%%jWLlz)rc0&ayjo z$h)0%_%##Oi;Vf#NM?$2ZWBii`nhj;DeW+=J8{;t>Sa}mci2gB)nOu-Sy^S{s;)?= z5u=@ssAnqYtOhJmEm}SP$hoc~JlS7BpCnU$G%o9}>j=Gd*iKVXz74`XC4jxnF&(Rr zcsSuV2gTO5;5x*$(D;hW9#T>&O1#X%vVjMiaKSkRGxx>h;bkTFmmLg`e$AUP?c@(Y z_7`!fJCARryE{rw9r0TqvncQqs7T*{ZGOgOpt6PFbc-gHpQ#7*Aow5t?}b~PuqH?n zS4)n&Am{$+f`iZNX_SyizW%u zVv0NLYlS8)wmX!>{sp}i_18$)^yPdJN6F|qVVR<)OZ#tQY%sca4i>PaU~6nTGXRS0 zgj^17DhxOj*Vu9X@$m0l$uNEBVMm4UfXtT0S}GEUsaZ(SeHv9GahQx7M`nvF&2dlD zZ~6Y5`Z_c~)(~!N*I_=hTyZbUB=Hm&bL%G2Qz$&l1igpOr86SaEwL&vCy6 zRq9Q6h)20iB-`$1N?mgjaB(1qZWIz9yiw<5Fc-b zBX<@#A`}JO+6z6|XC-_W1`&m)T=}^+t%H(8Y6V}9^~OQY`Qs`P1&x?;9lxYC53sp2 z^w8~t4}7uO<8x2*Rt9|gI~~t7+Ieg*)DBtI3iYsRfAmsOoYx;(Cdlp*u{gvV_)&oJlK+~^-DctQ(QRbhs|V zuicwCohE2v`t{n0#XzUaM;-67U-rsrcBc#eMSJY-vLxg(WIgr9QROH3#Q%15ZjIki z19uL`PckzJ8~q-)6Mv0ck|CNA+)=P_YpE4$?LJ37pt>&~dUF)tr-_5csDe3G41>9u za(PG5q*rvo>wl~a0#fvYfDZKN3$>S#ci{UR(nmf@p1hMa5TFJO|37KS@FiI$Sy}); zJ%V;bMKCnmYDwHH+P-+OyOw9kezin2%>PPD>EcsaC3F7@db^a`m6Fht#De;Yi+96E zv;MmF-00%~*Gsr##`peOQiKJmlPP)QI=^AN&AiN2jroE^rm9?%*Al(vxDn;!k$@FW z{-rSr$R$fWSsxywr8_q#^;QML-+IP9qT}Z`uVv~j+iJfL&U$3Y_s#SCQ&(k&k_Y!D zgA-0%UVjCf<45}Tz<=S^dt;+;S%WqFvsTZ$Lk^325>4_{6tXoj5~U3J3_%Oso+#3O z0h8;i^#r$?x!rLK8LNJd@H<`7;A)wHDZk!x93Pf>2#_-i1MwU>OOh^EYu9(|BDCq$ji<8O>=D*I*6AIlZRK_-@XoXfHj6DT`U#GhRD|R92Bq&`AUin z)!j1h-*q|+c+KY)%VVIVAbzi|r3OH8pXpxT=8OBPzG zJ~69h)~=oipJve(;wACEb*Is02_g8ig02yRLj_bpDx64e2pJ9{P2<+FfWwbNnB43) zpT*JfIG9x6VZLKkdUr=fPFK^moi$#x+f8pugGaU?NNQ|!fP}Ugs;S6Si`_Wv+C?Qw z7uC(oKf2emXkw`j-yzkk93t%zHSBfcH69q~Hdmr23F>(S=;TMmXwz(6%#rtnYH>tw z%V#$aPq6@(FtHPMu?qZKSsz+AINTa#16ShmhOTTRnrAgX8}L2la?fCPZQV1Xc!SvZ z+%5ryF0Fg+J4Jf$*~L%44I@pXbdmVk$%pI(-@cD7ihVo9(pltDEN%#F%k=2+(=*Ty zj(%9WP+Fu)?`Pw;n&8P=z08&xT(>+^4jRtNjn@AEpc=7>fY+r$7WI4 za^Z~Yx@t@f2AN&aGMW4Z$3_w!iSm+9GJIL;L^RL_Cp~h)jy~wCI?n`qNs}9Mph=D* zisyZP_39{XLO0YaxHFRGMJo6Nz^kxg9uMOKG|2CVb>%gj%NWB4Y{E4R!9&NmpL@-R z)$U9RY#UM*_-lpKu}sl7_#UeM83gRrYT*Z(M4@9VvL|xTyZdz|%Y&^fyIGDI1|~1= z1e;ftg*m+}I!(@%RGQ(>xsGWl`PxJqYCP&}ZDPptxI$9xFy(l_3L}pemP)Mk#gD`o zZS$*Kh8&3>#e#?TtHOa}5l|ITlR^*Q%m>j2D31X}N7VjMC%=pB*7b$y`rSTQVh+&L znQdw4>K;HKoI)gaz3A6Ie0>og;W;`wUU=70bwjd?GnSWk>okvu{k@GQAOv@gppV=U zuh#+(MBc+EZ`o<%W%&1}(4rWjn%tp1r2L9RwM~xv>57((S5i?MPtj_f*l4VQ|6v9y zZqIL=yd7)Zv|BpN;WJxezV9&%Bf?x#2)h-^@W$otgVGfs;aqnIm zaC-r9IgG&)xk(stqN$~rCQS!WQp1G*B9u{B*ZQbV%6` z-Goe+&P++#huiYiSnNZgWcJy+c>hL`9UcX<>-zZyA3EQZ76tAFloFNrcY15t^}7OI z1sUV!Vfp7)D)Vg!zq)Q`Jv%Ky!m%QCihw0rzdkJKYY!HNodx8re81Js+tYxqb!BF34)+;sXX97#7Va{6ZwONV7 z!WuHc(C1PDu17VNkY5&~bQ8`tCsnts4b=aN^lTSY;v1h=rD>Mm^4jdm^7L7dY26{= zOr~yW0O*6AtPBg+t)ZIXHS3HN>o?Ll{++1Nace>NXbX6F+!eBA*Rx(x#K)p0PkgzS z($}Vo`i&B@N~G~pX+yU_Nt8qhjfaMPOuj0d{IGFvYOSD8`kCOSm`XjF0Q;KtVH~wN zz{w%n1vd^F6|qk7&ehyl<@)|)-yYSvYj4PXB~zu(+J40s&^tzTjkQc|>)aitTZUch zw1Yv{$xKkSlg!@YQ5D=+`zzV8)9k!tCVUb^lM~le|{RTg4*00MU=i~t| z8JE>rzKnE{@~aWjjW|0C)mnk02OElHy54gFqNyonPaM;nChu&22MN>Z6)&;Yg?YyU z?L>z>dI%E-@90qN$lWLn{sq+GMiX=k8z-j8k=`p>9i1?(V?O)tmt5m{K92h3Z2w_o zG5m@KQl=3+ai(EEQOM=ed)Mn;TvkyDkb;9P%IHf0lI06-X1|;S2B)oU(xg#F_^1xrx&Kff3J-v?M@yjaxo|o*qpRMQ*X8#XD`HkqKuxpCoFPa@77+2`}(hf8amydWwH&Rb!pi)wkCNSRbXJmv`Jsz$F8S+S2TKMQyeKNp<1_ zbY6$!yLKb+SAlNG+3uTY<32zF=p=INh+le<2D2mO&IT8qP;`h4$IC3C@OZq;7(}YZ zmZTmkJch`8Xh&jS8!&|p-3$lc99qlq)O>_*3{gogY-U_DjHbE3=*>b)9g$d7UX!cp zEVNOrnPwv$hp0y+Y8u)L& z{lA~+?_M+)(qg#kYzB5W{F=PLx52hw5~r+w^*eqKUVn66cTTN#&-_RFV`&&mV<%^W5 zm|&-`0_XH1v|{+kF2@7%t17zqFaExBe&06Y;Lx;Y5F68DmwSu67Wr`x^>;YV6><7v136I)5ISiY%x*v9F>anc=TDsHA># ze*T5s_nYmAFK5xniyj3&3tM+1D8(bgDHg)XtR(SH$Zp6r-Y1rHE!T>lREz&owDswy z-Os3$sg*GZvSYT^65FV(Fpx;*(NsF+8YL=#yrq`aW`yD>x#ya|g6Kr3+& zm8H~LaQ-M*jVT4j`ZL$Q=KGG|;REQWOjn{j`ePi&v8qUN6t z;rYC-GqB0l_xD6R7yv%C6hx{MUzd+50%B2~7w$%5i3BGs2W%s@$5QKM#iwa}hd-h< zy+PK(@z^LfBFCDCt+d8ofXZXI%>BJoNLVguPg9HaTjGO*g?OGX8rsoN{)=ZK zjvjX{&rcShV1I@S5L$%7HMNP$c=$KydaLn{*ds;Bu>&8D#LMr7UTa)=xOr}HSUIEV ziq)x`@i(B!m%y9Xdo zIoe&Jhy>s0iQR(W2M94ssRrK4QxcO8{``cl&f5zpQ`i`65pzu-r#lUX-q8#oe`&lv z2H`x)f6~I$>*j0aR|iBc1ng?8Es1FjZVGdl8nOIc+7#A75E-*c1G9=^uE=E`1+$7i z4Byo(gMNqGlD68dVR|k15lI?GZ=@d)ZlQqb&WH8{oXK33Dj0R_&ZJ@<(q~W{*NJBS zhC8>Onr+f?FWI13nEsf;o5KDH6NT-|5_l{v7hTr9yKCOmwnR;G zX`P!L=-#!-Kt^X?-5Dxl6{g=l{m@$_EMMX^@hl!0VIK{SISKN+viW3#)=h(6kxR&M z4&nSQTheTV{U1P;aEd>8b^4;bkcJVn!-wtN|1xW0X zFGn3;btALLXYn!=*HF0BuRzked3ZbW(uG}>FsV7>0QB%%noceJyWz=fQ)~9AFwQ)Z zJCP-E&;Fp{w1}o0>Byy7_L9SqIcsy{k*({mG?G^FVK}TEy6x5n&6EtVzDXbQ>)?WFo{mK2?u}-lKbyyR z*o_AV&B4u7lfSFB{ay9nu<_F`o4!YVX{yjpv@yaR?UV}t9!@`B{@gjXP5-cKobAHh z$cXn{KlT3}p>3zymQO5jb;njnNWDADVgYZx^Sz@?LH_Wt;ywHJ-o~psm*=x+2UOUF+wWhz9egVX`P|gTfp(wFHVZvwt$d3 zs!5L7H7o;GB?jel38%);mNi{xcPhR$*Zkt++;QteN&sUG0+bPHM}FU>)`vVB;;sDC z3+}Pvr&p|Mt&^#5bwy^kC%J13b?cU@Ip%3jkAB= zg~0hGly3>5%_ITbG6~10c7^KYJ$e{UR?C9nwX+mnz)?vXX-TXGMGa}dE*P@b*sF~y zyR*Q{&G+z^@!fo*#zi?->H2=bxF{YIuhJFMcXA>Hs_T|NwhMP2W$WWzW4?K>I)C+< zaW=h)wYj*ch*}0Re+o10AYSn;A!k6?M3W3B8iw+0 zBMXih(4}yDWV5N>@z5#oJmWPp=FpB(`TQX2z(;=Ii0){SaDhb*`eTkJP<`v^#8NJ> z)jGU#FesIUO>>TwjR0;CBD7m4QC!Fws_{6?0IN4CR$te>Cvtu@q>xz1@S`qo`jM5m}4$~H#Y~n?QITtWAmv9)3 zmFv7^6b*w8F&4AIl!T`qF>+7}Q6`U3x_pyOZGYVj$<{SCRqc2fQA;(47dR+(NL6+= zwd>QQjLf?-an_#&*F_r!4$AFlFR7Ea_~=3+iRiqyR@9XbVk97xj&f)Arr!xfP2GYL zx+zv7=`6krE_GaZ8xD{3W^aFWy)%F^jT`^(8npnXYh*0$hW=k@)OkztBwR*72saF( zbRHbAM!RP(zcyvrem%M#Qa3oYe5c}T^_SPRdDgNfY6V-q!3~U$L*H$eZC1wB3~)Vs zIk(}t!5whrAr|7+&^Y55=*dzJNz z^R2r|^A&@UrG}~>^jA$FVP+a`GyoJp;w)hN{5`E(ga;exe2)~JD!EsvbTwP9TT0k^BmLKL*?BCrE=$%5Om!bQ^xl@(1$tU` zJMM|Lw&a3f+S3LLsBWZcIn0*)`9tKNnTy2<+L)n@t^78zC4eEi)4qeo>P zqWjy}nr} z6Z6Bw)nJY)cl%LauH$vmkLWbC;`HB9xpsGE-Cg!#oS90x&c>rGT|!^a!{Ku;Yh3xM z)4vt{`;GqXNB?gQkp+i0a&|UTk1rG;7!@k#Tx|{R_6yOc$O}ApS)^OaYxU#XU+>$F51U82o;)&SzILpDR=`{FeZk7BBiv2j-l&6R=kn@CVu$i94QsYr z?b%q53{_ebCw*=&FRDE|%XsABbn$TVwX})dnv}O3$AUfAYj6Fd{*SkdL>^tOTyqpv zmpOg}Gv$imZgz2#B{qi%{9i%&Y z^J9#h)qh&i8n-_4k0v;Ccs2gRN_oq9;s0#Hr1YxiKde~!i{AOqCRDkMVE$pHanJnU z7yb8-{aU}8 zMzZUZ&BydSq$W(aDgHOZZdf-|`YbQt@c3q7aqZN*)@RZcM}|-Hj6u&vWqMo^rVz42 zHzJ4@uTC(WlKU@JfSXOoE?B$tSzA(G`qA{aV^2y6UnwR;j`u$Kg|@3F@*Nu7>8KF` zJv@4++rf@uanlePITh&R;lmRRXy#v~Z%t{QH#{K^ItyEoHbn;|`fKLN+vIaN$@^ir zOT{q`2=DU!g7WKcV)Nt$b^?S`MdvG2#R!m?bT)wVrJS$SR4{E~FWsgF=-oqhATjZ2 zpyW%k{Gq8%Y9%>*_jOfXILJZc$*H za&LhttcxH&hT0xg3jo+rfRW~hU#76|zNuitw1$}@SQR1(Ji5v*VekAfPKcP_-(Uml z;Yz$~sxR@~Kew4luaO?7E(PPs8M?upmP?&HeU|EagC-Try2^!f$0bK{o@LnP2<$uD ze`}?c_u~Gm3wKA8bvGBOXt0VhlDz1n`8ZhKSPF7vZmQ{27d|0hp6 z-eK&!75ddBGUUxnUPy#DPt55T>`!hr=u6y6^q0CiD3*U)`Kl_NaGenKlV@;hfvGSl z;ggSxl^3NuZt8CgegbW1ocK(8y?CaOt++?=LYf2XQ1Sn0^Z#wc-#_x-FY*7cFQMz7 zv{nd;V@@hQn!M9cJ>;8IIwkT%EX~Cs@Ft&?M~N&F${ znB5DUf0tz18kg0vou?Yc*xJc)O>fcu2T5`?pg0o5e>Jn?J_o)vNK6pvb=0%Z#ScDG zr#(5~H27O+(CefEy*d+abfa*%l46_F3SXm%-Xg$niG zN&~@d_`-1no25nsv5kpM@`jjsto`@k&F1re@MJPkez!UBVe<+lodNol1r1HzX>gxe_T-vYw1B7vBddxEd@^p)*6);eUS0Bd~(bPJHh!B2wdOcN+f!c z(4hABu=I?d*>|85{ipw*~hPi2EG;ysl=S1J2LgE;LF zF~_3dwq38rtkq)Y@hRq@ZjiY}@cG<=dj$XQR{t;8pu>YMn~k*mB@j~$sHwPz%VuRr zbkY05U3*86__V-|P3l(ir(TC>Gj(>Y>*H_4+1J+?Qkzb)OZ}kIsGlI^S*~fZ(oZ|- zr~eqxJ!};i(S+G6e{iiF3n-~4Gr{J}eH7bsj-npO1(tfhrOo$q`j+~ez$1X7@l+n# z3!A4xH@W49e}s;_tu2&({zu`mtP!qE74(Vm;zRoj0deLvVMu~g0OO`?7ZQFr?azOH)6_9jNW9~eLEISKCmbl1pY9Ms@i8r zc_Ow9ZL+Ic=U%+VI)dTw8o27?^s0Oa@&4q|4O)YdvPnc01 z@nSN~@G+V*s~K>Tgq%3T9wL`^Y4wEf#~+R14HlA~$z*5slfeZahn^^Bi`I+TZj8}d z6+kVRj@>VLUt=C;f2KStt?9a|31;Bl%Oj>wMBblYn0&FuyX5-q`&~lyIi4daQ=pxH ztNZuc{o7IgzZw;;(mSmzkY^I@llgEy?Aet2l;;u!>k< zWXEy&O*yef2zS{;DPPY`c^5qc#yYjb4A2!(ysYve2Aa(llawpac|+rHEb|y_bqhj* zQ=yMt`i?nYM-7HmG5(SBbwiURSNsu#aH2>y!pW067qyFUmCkSi>cH#93tN?LDnO8;qUDqKU z$C2qj%>rrn{}l`!`0OD|Y9x9N4Z4QG5mRp)_dHBJ#i@JT5RBLfW_J;qvBOx2zV6)` zobvYq#r*1qTxvi3qXTz~RmwI-{#@%wtO^8KO1Nl;6P z30(E9!Kwvz1*zO3az8RVGk)?MSAi;@A+L`L>Ym$j z4ue4j^#OOsu)BO)Vol&=T0X28JR+$DnCqdc2QrzG(}Y7wK*|5m|)xe)UJrRNL11*g?jK`q?`}Z#t1qn=+U! zNi&pp3@IjAU&j1LlF(S0%gA@UN|jr6>)gn8JJj8Q1Z_TDFEGyfH@NJ%%sD zy>mV$+@l#7N@4fA(qZrLx6ay@qO)#Vy4ZdQ2j@V+Z8t^TRNBFQnfI=Je+z-};!A)j zaHn+PNx&Udj~_X!a*ZiQ?{nE;@gSQD$0K#FrEoQ(-01X#K~3@e>WRNpL6pDT9bW64 z=upbh^p|#ce#az$964aKg)jZ@sfV-s@H8^*J&AI<113^5B;;IL(lJ-#+Q|;*wf&}G zIqqF&g<@WOI|9G*?4kTjHdLo!Xy{Rm;pYzyrfIbGM}$481>ze~_yeXPjM0^O2t~_B zcjCgmZw^e9uux=*yB#@w8Luhe!R`~s+c zgzpwzX)J+s5G;)K%AkMghC2z8V+hyxpoSPvAk{hR8PQ}&hkCPSo3$ree~Ysy`HpVZk)&(&N!G#GWlgx3fS zq8~>gCBN1rA~b(%k2_jSdC5xWU2y(+lEuLklOf^I6E`|xv!y!NmA%$C>7>xp|3#cT z!xfUv4j%dboh9M7n)u2_2lLi`WjJ|UBS%n9v2X>%nIfXj#41xskT2_1lFK|?FW629$;Kt2n>EEyl?z+rmyQuG_O5_j4li(}m}oq3 znF1OvguN>GK@o*eF`&v(%h9niJCI41HNKSegZgSO_1+#b93-+Oa&*PprAdVkLqSCKMI^|4U?4{*JMRE3uXD=xkT2CW>9jdNEVV`Sx&I(5 zY)ZZl!#~_+Ml(cosn{L~$q;cYIQ2)MOVcI@r)84XA=~X3HepAabQ}S7`OtW|?XVo6gm?pg&ZVUleX3fYzWliYi6a;0>{bD*fq<$v zRunut-jXg!uNH)<9VOjH>eb*A)rTpt}B8{56~oD zIzI^9prv$CH8MqV1nQu_PX&+BToeE57qax)6n2~6mL!CaPmRk8jj1GipIJX3ehz#I zwZAj_;!rz)99Q4IERH#|g^y}uL?Hju4G>X+bu3qa;5P!fym$E8aK`VrXsPCk;BmwJ z6L*!iyXdUlrfII7c=*oEhNBBAksf&2`fI#8cIL9UZv1UJQ;QE$uQbl3O!#rmon7C_ z;%n=&aV;r0L&VS_CoCV&TK0iQ&lkzruthyp}BZV5-(#5|oC;9eu9o7@rjzBGN~ zS~0Y)Lrwl|AbYgz1QW0Vn}Hm3Td5@YL?gSQi&VA?HvP1x}AUXsQcp0 zH*>JO=tJ>0?DL0p2V~J!SQHlJ;CmJY?c9f-nLivSiTkGZ)ecMN$_vb1WHM~48j#8$ zK|#U=DRNt*G^xL~XJ7f*PzN3UyFumHd7%wMy@jC`@mDtX4I)>kQt#hxUNR&u-Y!3j z8u-qY$P9az0_c3*kms2t3#&{mKx-G*I_S;2Z%^JPhiLmz-y+NDiSuQIU&|&L1+xYc z3m8R7^$PgAPZjgSL#_e?kE&DEYY^DeiD|UpvAS6?r_fzRTP&T%`Z!3(J$icRd_~Ht zLeo|kJe{k6;XqRJae*0*RmzCfM)e&S{(uA60EO?ofI8^@!n>6F06Fz%_u^*Q))1VG zktYGj8tA1z(~P?=l(l6tY0D8r;`qbffMCEHgYNrE@(lXp>2?+2`zN1Nu@mB^2a16*~M zcVscU>x==8jbqgsLwVKp;@??$+(~(W@qYMLMW{v>1fdNl7ByUcO=Wf*^C)N$TCN1f z@<2Ci!0vUjl047-XX1clp9d$!pqsfrK%L?RaN=;c$@$ELg0=5Mw@9opp$BB~Rv;P+ zLmy(B9ip*-SFW1wOLx4amzhD-?Pmhw73f1jtGgaoo?ny=-bh>Em?{LBoNpn!bx$V+ zl}Ede#pWT>lHyXUbZ%uyK<=nZ`-}VEf&6%Q46?A)RPK33gk!ttlG{143(R3_i+od> zrqA-PU-7)BXkj6U_q9vd=`E5b^R6qdc2>CFSUoQFqV!AM_|~zko-w|96(vhzM3GNV z$!m6Bd3JFP&O0ilJ$GYo3C8R#!-&3;&juriOdghv#H!w51{ki*bIi=XKv%qhfA-m- zi{rzkD-B|q{7YhFzjZ;qw77fbj*B%vsW}QwoVOxtcEp_pTJY(beEpr%%pAokuom;Cg zg<+UqK!>_0;U*sYcu2KVe_@&}1?3kfWHTc4YAkX*79wP#x{+s3p4R`fO2y!P9=w4; zpY}98icL$wre#i(pak@hAZa_Y17exgI`Y6Sc+$ADouX0xVHo?Y*rfj)6SzS3uXHGM z6~9Y4r&Ol0`?Y~rIA(yVFYGGE9g3E>tAs>yuuFh!?wFBdv|UrCQjyFLKUMIcxP&U= z^&i^x8El**s;3c^c`PQ2`un)8R=e*;)GAe4K(6NRIUvxwx6>K6Y+g!V5;Vw+6GIZW zJxAg-3$X#qO@2Hxm)Io9U(jdFJ7K_Xk+ym{axYA#1*4Do&_=JjR)JJ<(d*ZdwBKIh zC&V~eK8am}kdU6eY`~5iu{W)kiV`6Kk-2WcycG0X1hY@2#0&aaSvG5i=^oqdiDBN? zd>;E#oU9p!gYe%`E(|fR@SAv>XXTkv$aaFEM>N#qxh**Y|N8JKBH9}Y zgdV1maUW8Lv83170F||spIN%DjQsHR=sm0MYn%dP7K}ls`ienMMZltnz7s zVU-lEEGNLMY5G;qWTMuZBrr4O}foRCd3gjK5Gj<$OK&jH|KJ4;mAUG9@Df{|%Y?MRJ zEvlYhxRdLxm0}LU2=PB+5n(}3dBXHOm|;^kdQoC>5?ni5a4;K&teUN;;F+s>#kVTh zn-)K;8IHe{(OuAaa!7nre3lMLtJW)hRxkpoqCi;58-5KX{={wON!HSOJX1AtvxJ@2 zj6k-J%~#BR>Vg@R!zi8w8*B)%T`$FiHdJ8(Ptk)>;0Xo$r{>}K*UOfX?FwZ`ThPw} zUE*(-D#;(<@*jzj1JDbRzdsT3`|rSI3LNpb53>Aa3!jKhTQI{O2o3i(PTcB0j<>t9 zAF=mAvmufyy{+AL7&sEOad((up{ME5Bhh}&Y}pc2fc4@fJt zBVCH>^R)!p35oaY8bo#jDePqc8O$rPg_l$Uor6^=2!R$T_G=Y!X(3a=ILXeLW=Zq_CcP7w*Zsj4pE4c-jD{uiYXeXWl)x`9p zU+DEo(OhaJR>i6110#hS{2q*e``Mkf@)kK4aP@E;J4yP8KAEgh<`I0Uv5{HZFz~A} z#=R7q1Hnjqg8DExfsxoqV#+`DZ8F@c8%kV;5fIIT%t zu?GJ44SRACu&R6ILe?zfB5q>HoW{G0RR~{?O)|)y1Wq1F)OZV0C5b_2N#fgJe4VUd za0JM&<$L4yUXoftJryPInY456-f5>WTLD5x51L6|A*H)ht{`BwEf0^BBS z#TiF{k&kvHSs4`IzCSppy8NdIO6uZsweL)8SiwHRAysOtu2@fvz5%z{lDvf6tilnq z6ja&>b2;6HA)D#6};JQm&hmDK?i;G^6wrx*n8svly zR8}kZTV3)qg#*ez*aK8dPk4Py=;QRY*7FMe}<_WCT!G|at087Lz zBZ*;iKz$A2f}yT_^G2;*7s5JSKkfXipay{4XmlDDhi!?pgyZD)C(tgA5gn;K5c-{&=#W)~2^R`d2NXnuyPyup%>p^55dBDmeCKD^Z6EgvO$%=R;m|O%p?uO@2Zu1$0+7qX@x#q zVViL=Bxd*(hk~m3NjQdQ94RSQe|a{ji_3BHQL6R;%Gvmx+a4ahS{XW1y`Swy4>#-8JB7{M@dJlar~x{8!tu+r8f0Xm`)oi9x-2xOum@yQY&5!pbtY zyA500qHIpkSXeU}hZ~@<14}2Qz2cuNehc&F`Nk9M)?hs`jlvQGgTU1zCm{8c+hkA z8gvRGM;^EG@S3_GYQI>I=HbGdYe#Di=W8bp^`OZ2rdCHSzNL}@aB1RM-+LvKpW)gZ zFMeOCaU2*z0(|<`{w|m+Ghs{PyRaXhX@(t(NW!G~RAjz5V}%xj!X|giD|cMM>^$a# z`11!-uG8;_Md~JA5VoFUoQ6(#91_)0TV003%?9F6GQfl^nv+WYWeJF~BeR+8blf$rqVD9+{ApAEPFqzc@$y;^A?ZlPA zW^8v`gq=kvN*v~E-uUA!a;ux^kcNH&5oG5n@rAU0xQ~({_Y-H+F)cI!alj9QPJkV$ z7(?j$YE&n+&3_SqDPjYrKA?Kb0YlIkui2Ax>Q5zhWo&1->>Z2bvr>#)*JM!SU#~&a z4+$cE;j!P_Vzr=$c=5#+*QRx8j2%S&S!)a|LNKcasP262Id{d1oY-HnrAeN2T(%lY z9=GH}yVY6w=tIcgL%c~oGw!U7VCtC$a??7&kI*s`TJwPeHgiFP&rWfvnUn^Q6?cFh zL705tQZ*t~N!EAPYZz{TzgX}y!cziJT!UCUWd(v?Vj$7P%HOInN?PBy(>!qZ?$A`? zxVCuU*3PgTF5FA|;uyMqSsk#rg~rBCZ-6JN?L^>yguf$<0$laLy9X5y( zSrfA3_*HbgDA^=4Ny!`cAPW!Ksjz_|h_l8+*a320&7qBjOS<=@qyHB;Y8Rml-$CNA zCix;2jl#3f*47;tlLZRA#gyGwT zWC|gOz|;WXFh$fv2Rj+!%AFtv6;s4ovGyJYk$O`MY1Ti!3*_L|kd<(+$HE|REOZZv zn`H(t#s>N0vipayXI&3RF`zu!R^mvAHfp~+bv90e=6_THT?mGN66*^L-EivbI}Ob^ zAo{lT9ixLYEE_s4jz&{{u1)w%lexBk#uG6pSL8EV)t~f z+z{Yokkw)rAtMfPm6$kHdZs5f#Bnp9XM|A<(qQfVzGnlyW$Hpjz)ph8Sd+mQSjJWB z@b+)wkCOzBx9*(kim4H5pmVuFjFIksTC)!E8tED$Lk>O#w2*;)7)&2jFp@DrbI~Kz zg8}zy-CXLvNn@@?_RdF%o7cjI^>K}LlXHX>p4S)kIHKfD?tarI+Egyy^W6W2YY^7gMh-k!kZIeq-wdg%yMql% z7=|Ndkc;mz^SWZm`^amvBmxpCr@!ioh8Cf*eUbw@SQMh%I4BH>M-30_h?EykITH}a zWl}jl28xwGdapgGZt-ayK>02gD55IsYJp({@$&oYW?;zrM_|IPdMIfbiV(ZF{5gpp zSjlzl;`*3MJbc&>gDE0x8@uF7oW5sVlt~EMDIFdb7O7ApiR{P0Jjf{bqS>L*amz~{ zBjrtlOX$y#sx>6KziBAT$Rw(Dy1eOo;i7WZmVMw|Y5@&8!qYw&5Q_(vGTc(2M}8`G zt|Q~iXN5}@V#=+|U|=HDKFVGlbV*y3Z_os82IC54wqCK6XWi^Yz7@?O?tJHCC_)zo z%92RiBKhoV#`*4W+bsC3_nbt_(x^q;d(#*W68yz=SQiRR)|}|tjCmXZ@J3xpkk0R# zg}PNQ7qb3uG<|1O6aV+L9RUFm5RnpT_rIp^7z`)c=`{oK2AXYNd=m1yh3ApOtrz;77g&Q=}@F#djMHa*l`;Z>ZK~TRe@+{!fDAksmABc|Qxmledh~d5V)8`K| z0r90HV`q2=Rz3DNln*zB)jmOzBj2(+l^}P*r8Db@E=?+n9(4_s@VBt#OUIAx8q7?w zGuJ|ZrO#}?!}?&-3Y*B3G#kQJI}<+oTEv}|alq#S4?V3od)8B2T6PZ){@;*WgwM6& zf8+PMq)lWnC^yR@aMfN`KeeUj*2f8!AbuvIHCi>c=(@iNcZZz<>99V81hYhfLyw;} z6)8I=lxWNbt1g_hV0;@wc1NF)>xe%|X8Tg27?c;BF$VDnJ}bf;9_(%3eRwWACIO8f z_WCNp15TCT@v|nugn_VdvQ=hg{b0rMGY3|>I|)l{pyKDR zd{fXu6sd;(&wA@e?jKC*6G|uk<~)(qhi542d&v{u%7aTi8@4t@UUdV+t` zCYqj$v{`uL(NY0DNElk?UIOoFsTYNo=f^^8f8Sy) znuHvR)(-Os#kz)M?948vx6#sG4n#A!FZ)1h5Ot^WbHRxjK_MBSb_ z9>hU$E~7#DgXcS#i^<7zP$9N`)ZOE|(5kaf;=p#LukyMwt^C{RkL|AxzX2;)@)52V zR>?^usIF9xKHsxySjOwLRGYPop77ZJ@-QsYB2US^1(p2I^Hfgt^bK;`VZ&G9$? z>(U;nVTG6My2z)Ywr4Z>>GDChfp;G(y!TATE-$dQgNT96nK?v|U9s=jyXbZAg^SUP zvi57z?-sEu!Il^O&ZVVa8JJZT-NzHX&DXt#!OD=2hllM6;z!S-h0apBr+r|TuC1ot zTfQi_rV{u>^M7*ciEUd$6>@>&m}I)pBny`gQuD=v%QQ3G_jPL`DST{&IpUycQMw}` z->F;qaKg5eymQd)Rs425npDdC`a8$}Bd`)Lx}f&N>1|66)X54?Zo89X19qJruN6!- z2_erK3Cwud$(2c_u>3~?gABW;Kqy^8i6AY3B9!Z)My!zZV-1Ty`b*LoDO!1&9V?g4 z5%lTs!RD)RijION_mE zUCzI6ZWpq!AqL&`nR|mL^rL{C4wQ~`qFP&fNQ>`kC5B=Su)Y+!JS{Za%T`~sYqene z`Y3BRr~pVV^VxOPK9PeP`&45-6+l=fwvqgp<={b2mZPwGB1e9i%iYXEr?*#tE+QsJjjoi*Ak*%q zP}}+m=HreWcsT&sp??`dG&Fb954!QUVL-p@Y@B!o&l7*uMDB2y)$4Rlkb=3oH6_-P zj+GY2pzR=3aqwa1Q*%s{8Un+hr5=E|d;2uw|qPg~e`gy2C ztdQ)l$1o{cKLR9R8$EqVEBTVBhxXVb+puR6w042hY>1t0%7#i})waT+!#FGS^n&Dw z@EFjT-;a7|C=CjG;HFOAFN%qCh3=Q!#`_O)Gb#4^BJ}eF#FJkc5 z&@=m5t60cmIldD(vN3w|Rj2$@2+?J)B}cX`2uzUwP`G2C1OJ%Ro`vZQfai$6K)TNX z#~%PVaT$%kh8-~kQbNi2pd$btI*k}T%2ik9fMk$1YB6{rO|`mRud0xpmHJ7DYFHuD z9M=Vs0D?#?hp0(ojuJ=h|2jv{l^i%J7?_`iJ>3GdG_8_l4I_#xv6uMkQGJP zjXc0X0wA0Y&OR=`$7&+>c?H*Erk;z}+LA^(LE7+@Mr2(-PoIAOgs zQAJJgjL{Szq=r>pIASQAjR4o93dgAptFx-SCdfe}xe_xe>J5Ts!9#GpJoa6b%@=~p zapM2T-XY)=44CE*v+$D9G;o<1NF*?yPBM59BW}L@pGfW0hxm&a<2Z3~S@m?`LgoTo zskaAU<^JREIQa%iip>99-(Te0HBs){329k}B^EJ20!^h)pAaK0Nt)tZdfM-}w>XnvlG8DxuXbt$k81^pn10*p~BwHhD#e?6L0A9IpmjI8yVSO>8#% z3>*iXwg(F`XVLgpC&}gQZ$ze8Q>*A4telhL zh$ty)hXf6Sfv&xio-Ok&>=T(wyAMw~fa1NXdF9=v{SX3vE9~DiOqF6ECXsW({r7$O zF9dp5?RUnsc>>|f%)#!vijZ+!6872CcL#msuncKY_RC@0y@@eV!~m;Xs>4{Ob=WN= zVL)Q~Ft|N0$pTWAthq6$l_XS?xshmH39Lnf(!xUiX`Pm{XUvu=PlusQEw$7H8k_av zv5-Z#rc!Jtnz)C65ELZ-?a{D72+Y%JmM1oA!-Zz#{M-wqwQtkfgzwyA_@vYU0K35K zAe!K(=0R;3L)jdU{LYsF_TNLxqW`6Y^{L9A&j-LU014yxo2RP5K>F+tNS_GwRfPy@ zfWv~sNCu@c7La4=ph-6EpRp*yg$h#L68D7o?keaHDGN?S*7ux@8XhF-K$>yrj1pFl zb}6}&)n4cRw5Pv4UjKp_6d@m`+5D*mLEti{6&Q=4RQ?sPI1joLb?AGO*!ah-38@5+ zZO}3CAf6#aPMqe~2EbZ^O)Ehzt%t3s>G=ArPCmI6pN>+noW^&JsZJbHIp=m0fYJ;E z7%`x>cp*m*j#2ic;uq;2-)jhW zmo=L15eF78P+)~Ykq*dVVhCmnMga=pHCJN2)`Y;B`_J1?x1uUJx0;DARqzDydzglb z>K>}SGBrVNBG4TT`ab7f-fmf$fo|LZw;rJZ>los=FHN(TJ4A3jZo{!bw}aqKAV6mH z4NL#WI4t{Nmz`U-(hHAU4NM$Q7W7GLB0340BJO2BlZ`o3;qK}g)0v@3hZEWD+a{RB zDwony`KUpKVKChi@UoS7zBrA1-pR%8y{lZz5ylOYxfrS^Gah%pV_%YG7mLh;Bnr07 z4?anj2TH8&nVqKxP?>|+2&yAg_A+iK=yE+#`Y$?>j3mXzn|Gf$1BD>@B%2B%8$N># zaipb5RJ_1fkqj3glbz&2f7pc5Jr961*h-3Az-~Zh$C7=Y%BAFj;yW++%8L}4=V*9S z*-=C!1R$n*9IC4c)Nq}RGVLk`RKX$0t~!T_6pdfXKdO$G`V;xdGA7GQS;u4DhaAPi zbt5C?MPD%)#Xct_bPIL`BXnU~tiMMT)I?9hAp=X|z~#=FL`g50q!O5gUOIKdP#Eg= z$-^sw!6RP7Y&EbyBX}fjuB(`MesM}o9(6o`j4<%H*-ba&cj7S=x@xMfEmie{->`}2 z@Zb@;X%@$P!Fb3&sPb>w+Bhp1Fd=^q4k>J6W#t(N+f@Lj3Hj5x7$XrNb2q2Su z9}Ons?MpL;iRz87Ag-u`^<#Ti^A zu#qbqs5(XNjHHdwc(W{h+GGu_*L=3=E!5Lx=ntzz+363 zJjG{$39TmG!*d|h9dII~?`J{n;ZeqJX#w1>RYV6Yv@DY?zmU%-HIfBzT-XkDMfcq% zXywatr$%3{l185oIEa%^v};Qk@nJM`BokK@6;rC+u)KjH=Q1;iB*T0f{nN}kx5L+Y zdhI$!%DCC;>JRa+y|}>{6z(M(JIjq5+7DJ_1HDJnf8~H!0M@3%?*m9ypbd97{}}2T z6RV?yH*Iyw=Tc1_@_l@bXbHj@yq~Gu_(PZSQ=d;YnzmZ@#`w-eo|jomEqcu}azTYJ zT?kvc3D=p)K4DM2`H2}(lJ@7P#cA0I*lj?aMcTJ$-EL6a%_y4g%nOt4Q{r&Y>V1Xi z0DGI72Procgf<<$nmK{nms`!(+q1rx{p5XqJxjtP0g0Rbh5puE?GS!#iVNY4v#f9u z?pxmH5>kp5$p~PHdPv)U>fp?z&fS;{%xLonn$+qIBm(NFzuC=627)=gJwFoG4Y&Dm zrIWycOu)yv0^G}xEUP4eKb}UboqP(tx74!#)K{GT1_z(BP2Jm`UFz`38DL(najELo zdN^gzr$K>1^Y0;w1K*H1UUvV6bY016vMPvnh)`?K)Lv>z5BRq4I`u7ChNO0X+_0sV z;0STmYFKPOuY{cL2&ESEDh-Y2NF<|8f%kglPvRenCGAU{e$QHiOdc-N+uo}*WBL{m zMBJv?n`?qsgty6$LeRLmAMf;En>2iCNY$^vZ2*Ut=qS}ow3YQ%|76%;^m>z_(~qx- zw^)1z*-D!{mXjY_E7-3P@U)Xd0ivWn%PI?Y=Y?ka*`yy@MV8cgctH;L63!G(%+08+2{(pMUh1l0pW)PO2M+RY5hl#MF(NZQXyD2?B+&yR7r`QDv=_OeZ)$MQm&yy4#;-50Fi>TlChc0Y599I8;b z^NtVEfdR?*-+8VZJiOMc{Cgl8b}(I}A=nXot{25tMP|mwa+!nKn3*F-=ruh8v@Zt4C1M2028lA$0dQnWD^LUETT3^Zy?8th9q?vQIg&pkd(LH zJ1)(X+b?`}9GeA9@x=lbt#@`_iMab_$}*qfm3J-c?OFtsU7cs~uKU_`VUiez)fsSQ z?|Yxlef)bE2(TRtQxUO$E5tOCmh$7T(VGzZJ!DNV%>CZ_>BsvY!}zMmqi4ag1bpW1 z@|y)(3N8>4V%Ab3iM0{^GW;~Ly5-79X>D<=+Ihv2Y@aNWm^haK=@4Kj8SDBS0sX1o zL`aO)cqI0Cvh15#f+Q*6aOF2H?$S%E!-vAB%NY2s40#p9I3}b>&HP88$Fjq}W6ZSq z#s^LipnBw~UH#G`HTH$m0@VmO`6J%DhsIYbS{AspTvqy}Eoye0Dr8Ev<3PCV8A*Ea zUs#=qI^VCv5qJj4qf?HHb1-f$+gzrRzE!gU({%7xAq?1vDH=GnNK>`9Cln2P>nvAY zC{<`jgy=Nt3G;d#3+%E$uxwLLCg97#Skyp)2AC4LUV+{V!X@=lNm4k{pfnF%Y$5(RZuHHOaUS z*(d2kw3UH=aL1t$75$A9g_vNxmSkf|1Qxpkzv@(Q>qt2jegz=Tf%%_Z#dA_zJDscq z{E4wR637T>7yY~wafXx!c0=FI+XgZ=12+!AAr9-OI-8Sl1D)oJTeHY6;ttpgXbEDt z5B1zq#h2&q_!+4W^h$9-dsxhXT!s z8GSXQ&qux=7;awt3f_JC@4Ni>#<#6l8&$LkCAX%4D02Uz?%;@ql8lzF+(~(Ma8E5J zz?N5jeR7KJKJ@L@jLkiR>^J|WAk>wv6cchnth5%%ivf>e`^?)u0=z~p+YpWS%po(b zY>6+yfy3Zfz)}iWWOFXVBx+!v4IgoxM=&l}>TW`!9%`#UN@U90ob}Xl9p;3S#3Uo| z&iqfx>^>Ip)EJ{I4^J|!gAeZU_h>2Ju8Oc>(R z!o^s0XRb0MQ-e{U==^c>tZ39=F_Ff!NqLEW1#ze!&xGH2)^rgbA;i2~51kA=C zQBr77^!kR4@J(mWtc{Ib*o(m0I5w$)uE)If^#trMFkL}2`xE@h=Ip~hDQbhV$Xhgi z$IN4vFK5HPw)|7E$SpVg6|S!yQu^lzviaq=NZ^QGM%ewJV`hF6J%b9PuV$Qp+Wm+9 zMF{m(`T=D9&bx14kUZlijk7=7&(EU_`@0I6Mh&y>F}{x*MiiS70?HwZtu z=ckS>78w1Jd4m+Q#s5-xzPny2FsoM2>HD3ps-qL^GAs+Qh%lkB^8p!PzU9x}k0g7E zo&o{V8~?-kjluu**_N2$1DPhZ>5^@0&C7%Cs+XRtW*p+a)h;wn$LOUNwTBXJw*-pv z-xT)UUynIt%n&eNn*cQb*1x(bRPNq2aEp&egDS(%#b6ZmrF)kh&|r;^5<}hH4;nnH zQ}p7Pozkx{Rxp3B{1Jv3RA_HCt}7XJsp_$>PoOs0XkxA4$at=ReDPRo2lQMPGkxJl zyKutMN5d!=d|unYl;h^IXBnp<@D-_1bY$$mIb=QLESN#EkZV=j6X8~ungz>w{dG91 z^U$eT(OK8<=O#eY{k;U@)E-?AIYkcrmoZ(Y}DEG~;=Z@{0vwRa8D$>ja> z0Qxj+Kq-5!$g=9Nl0ZJmSW6+|lATx(oA~VrAaCLGP_PQaqIK%*LF*7`u5!4IlHs=V zc8h3V%=8H4U+VA-d%0i;8_*zojWjq6I6_Npi5IFZOyb{~`r;WDx0Wt6Ou0Os%T9Lb zu4xuv+t(H=^QjYWVLw- zp4h|{>ePjO+Ixi<9}{popO>PVB4O1R5s)|yzh*HB;+(pjw+IcYF`*e`JCi76!1-QdSLx2-Tq}$ zt(x%as!HN2Ru)pmZ}{THeJHJf*{R?35%Stew9QvSlCz8b74)QwG{(_ zZg(f*f3#1pa6~ASu<#LZc5`+MPatmx;;+d=iG#;<#g_&~s(i#pTyQ>aXQ_crBnmLN zn6W{yh@SxLM$)9)8DIW|P2OGCIhi%}7>=TbuVx3!vyo6tgIJXfj~xhOvs>`hHVlxo zvsS`yqe@3TT1C(QyMp!8m*&*!v6G!m@{cNusN=AeQGhX?Rnjw@@w2H3J14(gAJrSP z!4BNl#aKX=5gHYf=DUZ!O5Nj)`+%!Bfd`;D%I1Dhdb0Gj1-L<(*#l~|dO)c39F-DLdN+nIK$Gt{$0RFiw6xati(iVHs+fA_qcW#y)`=Jvlwm2q-N zU$c8~xq(x&TzpJ1 zJ!y-q)eq+zu>AA1QH^*a45Gly}KnIO+RqC)50YKmxr-DJMGd3@!^;#nGA zzjr@WHg5h*8IBvz(6(uGqjQl=H6avzHR!o~$p9Wx_IhNBx2(NgX(k+fM|<(#?=$oZ z0bM@BU)#}OgSZDNg2Cd=igL7Dvsy9GX2GylZ$VQs9xQ2FZe*8!MPJXcCN@h%0puhh=d z0Vcd|$Z$k@UC36p7^2M9QIzClb~QZ$f4Y^y4+ox?8&svo^YNF=F$^T8jb*X9Jeb6p z;scVe03%H0MY?nDlA;7yYz^VIh|QjI#JUz`zg^$i(_0lb^250*f}wOE`H$J126}(R zJSJ$wO7!1`&rLl> z50~5Qdbb4+FLYbgRVbslD-pNS^Q427`1C)U*<6oex%sXsp2fdjMZQ*dYoE%;dYtG;wU8T8V;N|PA&i8({g0}zbSJ78^IE_o| zSXN!@iDgHSm~-TVHa2@p5ljvW;y|E`#lbI+m5q85JdaD&VE1EkbF0}jW1@XUx6^^- zHOqF_Gy&6^TZ(HU{(tTo#j*4k8IsB}%{p#X?!9yA`MXvfwz+f_TMexPnI*$MeT+K+ zjww`Xq#osPg}0d|Os2cZd=kDzDZ@XT64cQ0yK4%vIYcMIsQS9n(D%&j$4d^^5fw_8 zW789+ibqXui`;a$V)ACfEli3boBfPJd*~e&aVn}((7StFWxsh08?{y^a7=iP`$+1H ziqu8VaTNjfh5CZ)>CyQ%GBiQRdsaVKs>t)t+?OX&3nK8iMdx|c1H7be|Fz9O_sE=#amVh&+U2k zg-t}p)7g7E{QRAU`pQuHM>@a??e0MM{dNxGD=UJ(ym-V2g@t{VUPH8)v-0vF{m?-7 zf%K?;P?zTs^8}39;<`?+s&GQ($a)FuS%P_RO^kaC@74Yy<6#;SCDd_9(`e1)qRx@{#(QUs zO3_}=IKHKW;(qRDaz2P)CH2;LxtAbZ%Ganf6K-V90DRJlc9F4Z#LZLWixSN*4lWqx ziD)kj#SsM#lmzc0P*;E>&LlhmeDEYF;<73bpp=D3Y6YA>PMq}rFjb3&MC`x;cs`_R zrTr;>0SJE=b~{#OAhXDu_Btu^xb(OBtby{T{Qf=;UH<#ohs&8Az_Mx?IuaU|id^GJ zM*=^0Am-jBLB^93kYJ4xh_h@!e+-AleP89bMH%_i*oC@`!|Oc?yt3oLt|fG~Zz-Df z?IkQe``IjIzSAd_RCWm1rdv!^a%(Y~zh?^3SP4ccO8Ks*3oQF7o`*opz0!s*VtlL4 zd9pXgDL%H{tPx(+lq@FZGN!!MDeEuU^ZUGw{BFQVhRMn2UCui|ragSfoqz1x;awmVidF_6L>?i4NggklVkhlRsiNsVXC-VJDl?YUq*k=)2% z_RsZ&of|T@?WY2Z&i|Z#MEGcBC8Sss=2^P<=18*+Upyr*o^uVv3gI%Y%WsKdTQ=zaoy8%vhBvGFJ?$+Lj? z?#hG_^p->Iot?k&;aNCPKR9ITuCbPyN~t3rx)WVo^iGu|^cy56z0~|niw3#aIMU%Sm!rJK^+X(8x_uHT5<4 zFUV-u_dsBa-WHLQ|AGYTHeVR}ZF@G5B4YnOOFlG4{;pgsAt3y0=HHAZA27XExbsJa zu*xgKhSA6p(P*0z=Xx+wCaZHNlnY?dbsKCPA89HcO;>5+kV~G#Ge2+hKkTBIW@dtn zu3F9CiMq+Bs?IyKJ(0&riPW?*O1$7yN(kS2+bAd_ZyyHdNv&dZNsM1NJ~6oP6buTIk`{PGOEnl5J@Nl|vPGyxR! zDv$heo)&yRU+a9`CZT_}!C~8#w`eVWwgS9%@_5}&GhgC}1vV$k0Sqj;VJB6O zm|oz)RD|3c*pDMLt3npFUs_A!zPscMIT-0Sy0XM6-I(ZAV6d&#bMA*TjO-3*7J(n5n(O>FCdj9{kL{mTE!dhbm!-iD|6Wi_I%Z2 zMq7H`OAXTI5$o_L70(h2FRM!`j5J$)KNj;@$x;&+r24NqEqsB08dGR4`uuu!0*tV{ z!%ER=OAMs<`I}=>eo#lij;dIfHY^`q?k=+X4?#UjQ3D(ct?krXuHIqF7*kH>SGUS42O zRf>IZ+e@Abdtn-0ffx1d7H}`lqg%r=nG}&$OdfNv>qw4NlZJl}WmkLaoprf1e(Hr0 z=Hy?JaD;cYdMxpNYVC@#jOire#gX1LbQW@gtbq3scFHEMJ2X3$wTX%zk`i>PX3tJrOmW9X@}l?J~532!lnRf6F& zeh`-!)wq9NP?yO`OzRhR=`S1ag`-A65VOegDV5jKbzv#*s5VCnKLlFEtxR=Z*C#!k zsS?Z*DdEPN<85L8#N7*Ur@j5oVq+})5DU_r%Z+kXz2~?8rtz(YZLFfH`6tNo5f$0y zzlX^3@l0rMNwjRTT((xiDx?5+P@H7VSMv+gCe-^1uE*0%{HY(GybyYndHj^b zE@QPUoyQvbvFOfH%Y7l8EB%i?)`lMsCO3doEC`tTSH&j^t`#;se+KSqlO`i=ylKR7cP+6KZq~1|wecRurX-grunMH__$i#cqKhuJ zq{Ond6X;g4FvwLB#ir1I0EyhBZp^|R+kjiMK-Wdp(3sh5e z6wOH&yB^(=69#>({qLAqsMI-E>^F}KLxOh=(kv}bN)!1*owK~ZwM;y%6H2j)il2orB(G@XEn@O^0M2ba`gx0JUdT)4(j$OrWB z6y|PNWKr(M$c#3JVvOJAt1$dQe*1lipB4X33%V8begw13rXKL*r?+8OZwgJ1>WHDs zRRLHaP>1V4LP#S77oWc$&RKK(+XLBs%EB<8FgXngnJ;Bi25m%MU+139*%_ajEOZFJ z!=|NJ8Ykt>fbAPTH48Sm$dlLU&!O+CQrNZsIw}hxNf29wKX5#^em3A)kMCf6ZS}8N z{Xwo76Wl2KgX0n#ExfElJ^e9Q&zhUu$vbVuLe z@#TiZgdk!LO*{4=pGcY2Rv2<%{dW9cxBO9suPiwZ8reh5UjFYqZ+)~qj@$4sa9~$# zel76ybSE@oKsxg|sqk+{{u4h{ko&h5N|aET#f8iUlCxf7{nOfYE_kXG%nE3V6(TT(#Uv@%;DeiNk)l{0hWc}_*JlmXz zesUg(O;L4%g;h(M{0KC*ekOG|U8CvP%T2vBy3(2;8U3i-l$qCbFMn16+$o^i$mV&y z{QhT8Zt8v$%~sQ1oX0}_JCj!|*1FC{l2PaThswb9kfC3<)P58yC-RZ#kPxly(Bqa^ zplSj@CxpN5?Dl7#KpC7t9w<-^y7%`e7=#9$J!3izoiA$cAChmG;qTv9HzXJ;}z=t4Kv0|a^A z3Dq7OmVEbvI*i+2CYVz_zbe2k1^Qz%$O++8(q5CDNdq{R+f=edv}3ND!aC!ETe|Wf zE>^_h%p~_~q~&T~x4%E0)!rexEjA`UoGE{wChoR4oPm-czsi62eYVF^cC4(|)8H!5 zk|D!jEMM5H^Ytw;)_U~RyV*yT4=LkVp& z&$T|-lg7{F;!hAHzQ@14$*IcsDq@PoAmTp!D`%k2UR6=0**m3yM=kfE&3e>Jn&j!b z;~}IBVfTpFzOU`;J!_2|if4U}p>dJ@lk#>^!KM?sBbJ7Fqg$Q{(i6lC)sOo|8&#ZP zT;1^>PlPCzT6IV{4eo-b1&od{iOq>>_a~st1xu$VfbJ5-Ks@XR#)lN}@9%x8m63&k zhRhpQ>wwO3{lICjhqy@nZ+QmrhIZ#?$*MkNjf>$R&5Kmsn^Z%q5kxb56M@cs`aadv zc+5Bd`0RW4TvO%9+_*>*zC0%SZm7M%U*i8b^D=a^77?n~i~FB1$2Z;H`;zN9e3QQ~d*;WG z(o-=OJXs@Vd0A|NLKOQaR2VY?7^%}5=UXxUsfi%bJ6gAjv9z;)pC{=Kv^Pj{_D&w7!;J=fSuV}$ckHe(hS$*NO!EUpkDr8{ zO!Dzi!mT?@PACt>DpX|s38*rgluEO*%!(hypLVw3epuChyRDP@B#B9I=&euPq34tL z&<%4%5Ew@ormn-%d(#6>{+X^^C(V_~3Gy(sNS0-``yXWUVP>?JuP+wng|ZjI@zAMS&sPg1FZ$8rf}kg zFc)O*e-d89W$>vqwMA*)tj;}#eWB5n!9EB?P1lJo)xZmgcEZ7}A>3vM&y7E~^CH?+ zLO0HNd*5;lkBdKVJH zuMDj%01xe{VK+AdzK>%3RXFgZ9>sEQWm&)069|@WEOY&ZY!YHNR47tY-L8^W|wV!r)cd*nw zYfM^MnUj%avG;OiW*AG-bEs~_h}vAtt%6$L|7i8t>ZLl~jw-gRHc@>~0c_#YM1e=d z8jB((dOTCn;lcwKUWbYYLE~m6;7Y=Ck4}90K)ML$#u3QAgxdwb>sa~RiH%opDyLOS zY$I`O?Y_DA8xd_MV`hm7e($sFuLpg?omUMjoi3UNAXDMpBCI1)HU)%V7Bx4X ztFD!xg@%yxK09lw^+n0xE)-R4G1YtFy+Xk2Bmgv0d1nvYoqNT;OD@DH^JpXY1QvR= z{pm0;KkZoL^c7)}*q$Y4o85lAos%L2d1ZEUn7?WujmENahDNZLK2pav(3<4;naOqQ zXWCR_YOV~8n)oA1N_XytZx6O|zt=W+zB8h|2hL38apO~=XlmVLh89p}}T6n{WyFYLgFJbfd?(lU(Kl&YDw`Rj?Mq;=ag3izL>z!(JTBf*PP$lHbbK{bB1^CcQ-nTJ~0#>`qrfsCDYyK|;tbV^;6pUDx z5El4)g-Cxftkp)cA70bt$a;`ey>HD=@n1_j8g*T#%{_EnIr-E1E?@E52-9--1}pOz~}$*w0zu;cbLvXNZ*1CtrgL9|ec7NTwIjakK91(g>z1 zaF~D8SG?@svkYOUU@WWCKbfsz=u^X{t}?P0b6ja!hE0FS6QV1d7wK%5gD(y?sM$de zmam93ym!ipn=Tq(oq)>*o(>&VcbBAdLW%&g-dX3`F~_qhHlQ;!xSZPR1nxV3(n`AP zIit!{e4P_& z-j)3cC&#Tx8Imhoa#&9tPAY~T54%NWECcWvmc=z^xcL$0HCt{Fb|%fc%$`@`K9wNg~%L_G2A`! zT;O*bdI-@UdxatX6>JX;-GRIz&~Hs507-%>_;*2%7C5iVA;a8z^Af$;G`cjrR|vbI z|J?8))k3>T2BEZg3z1fyMgdEB@MMzQM(F{9;^=B^Ap7spH_hJh_hxtl+Rh(dVUeM; zH)o?nA=RwCml*#t?1FtaWhhy1YF+I@R$FKo`MyqX*)D`=nF0#*rYD#zpOLxy<7=I4 zfA?`H&$XcG?E~+lyB3{S;97~AC$l+!i>dI1)Kiv1<~EIepRnO()s>ogPg=NCPAcDg zy)URk5CsF*jvAjO@M;e^M6FB72_V2BV9E+;A?gCP1uq8vWQqPIh~Dm?%uxE4>^{RO z4~RScp$ep1^Oq{sQ#6|$+O3)^y6L3obfww**A2r;Do}J-HQ|yn9g?dRdg&{=?jY%r zkbDFIB0TH_%v-*P4*GYWyh`EP1w!O>EH zvW_jO{{8Hg)PfsKoP|i=FM?8hN+h&(EINgI^c~pj(8}kY`4>>ssAcO8cTcKk?8;B6 zS&kzq5yBO&3~T%Os~c~dx$hzyeUE#>a8G~sZ_3Bp6XrAN!*IiZMo#0%Sj%a>VdUkf z5tk~fXwf8Nyv%hJ=`hJRDjkMgfrJBB@zi7R zu8ZEZlgVYsH53}KL?W+kfRX_4DS?Gd;NmB-{pAv%w;beVgm3dcY(|vGnthYsBG&c) zh1n0%e;{?6JKn4yd(?g92>^BRtCjbD0B)x=S%zAW^+?b}hx4mxLQE#`=}lel4R^^q z${+1^s_iaF{Y_8SC%>uoJbIRNHD-9hM!j$XFFv1v;Ah4TEN!7Ulf1CLo7+n zXWF+QUKZ}XG0A5E3F@CUdS$6o#*sZ~DBSd?Cs2iZDkDXAyle-+3bDO?Pnhc9eC&ML z@i|41shu6WmGn_E^FZbJMG%kZ;BrRBatm8wOm&^N}ImBkuRx%vDX{+y?Uuh=$20bZVNoA$(pDF605NHeXoE&oMUlF zMq!?;o|O1o0(pwfC&C=ak4L{FHt1wWJbt;;LSlgJTNTjRsQyj{W{4+A;z($v$%w~| zO{f@=mVOb)%96Rl$^0N?6>$;o7bsZqAcpaIToAwplul+wWPaAy*TnesCaVqHT!cg5CmF_b)?_h} zul>$sBJhScQR!XaX4eB5ha$W2?3pAt7V979?3^qR1mtkZ_}R7oqBOX>d0SVS3rlID z76$+d!(APrvaM07M>%U?7EsTD&>^q$2ZVmr)KpQ&iT8nnv~E_;nAUo?Ig8^AAyb3a z0Oft~6=WU7tT&CdAO*+vM=?F8Nq24O&A|qi{drI0Y{Ew3Rpq-3Q%I)r^9^BK!&Sb` zkz_Tco`ui1#e3ww^YL7bJ%Kx|T~o>BrT;0SP_Sox_D>8^pvx?${8b>lpIn1Fb_@r9 z9dtt;Qq5a*Ts|>i{KYb#0=~jFOn_0L@+U!#@dEgxUb^2sAYeelv`EZc?cT$f^ykKp&{iCB&cs47&6R6W zjNnDze}63a6uxd{yhNszL*7;{1+xELLogWpu(ygI(X062-1lNb;ce39c#i4kC~JW$ z{OcFG)wdbt;AU^+5{zl%=KpI6!AlHZ?=SMc<|ZAmcRM3OiL#t2@aPCRrL^iwU+I2l zF<|!zt2pBQ(q@*lq%;a--aK1nN?{Q5z%pk^s>0mwIENvGm(G2ouavP*RE;l|Iqa%y z*C>^{Y3qyeZ^R9pgu9*1O{Km`qYrxsM}+J5D$~;R`uD%t=Cqq%)!?SR3XLkRbq4GY z;WuDEtYp2`o7byemyewP*qHLDc&4>_QNW;_axkxKq;BA8*0P>0d$5#s-&E7v`Acq? zsRo2|4f4;I<=vaGElUusNPJtye<7>h=pU3iRZ;do8TD%p# zz2dE|k!Y){f0{5+DKn>cE25eNjD4OF1yFwr1XCA(L|3{k*;SMZlKd>QamlxM_`;*> zGhJ`aO@0~>xBRXtTVV_5RB}FSzghnex;feP+QpFsB0M}bx#ShOUgJU$O*5#!N%j-Y zylT)p^XY{tK~^f9hPL5bX1()6aqA)tDAlq8g}ISyw@WgIoOKlZh5?Q@mPJ&0$=+k; z2|tk7>tQ$a@_60w+lm6$hR2pxdBG9_{aRs;Y+^VIt9OzQeTb!0e1Z^fmRf(+nTUF1 zd@XjEo8Q3Y_y5s!mSIgcaNAdqE>S{S=|;MQsfd6yBHbY%-OVTgX&7CT6r{VE(v5_m zAdK$LF}7XL?&o>mW8e2-yN>H$=lMGgxH$k7yMW%G50gjUAu2@jpg zs!>1v*$+Tx3t=8S4C}@o6K#a0dbJ@Dn>0{1*5Ut@J{+2pjH$e~B5@zUCoA225xs9K z;{L>J02;lm9GW$Zby4F_r(h4suAHG3!v6bJm-`eqKfYE_@^gyXDW7OEoGv)dymQ|i z$yfN!=2I;qj)VRrOoSw6#kb=yP~&~rbR=tE&IoeX*?9+;EwiYC&Wy1qPlm_Vsmij= z&H^XhnQzV@N6)Y-qz~Mj5Dq|j@u^vvxd#Hy-^S!eFuQSVjA?sn0W^-xu8w*`joK1f z9$ydN4dHWh+6Bu~IQLpAGV#{-hA&Xd=HyjqLTmJYg)GnplS|Ym-A5GO-v1u8mmOI2w@1MO!&Aw+ z0+8+uzjQkhzJY(M25cI}d~W)O0^~?eK|jDN*HtBLo|!liRR8aCnnlEZost|zztZAK&J zNGDE3BGT%7RlLniu5=0nQAkz(-al>nr(XrN<@s8eC{9JmYV$fjdd7ZJMf&_g0OyS3 z2f)1+rz2>`$j!IG`i}&88h-)@V(-AY3foJ|f4rbM^1Symh<6r%p}NsEfwb?N@~MGr z!fuDcINZ5MH;xC0SJ#zmX$W4K(t`MVAxg<8Iy-3_Fj{pYy>{I9BX-O1C_;|?FSowX zc)gvvXMZkbEhFY`bUgcYe+3is@@BQ@`Ty3WdRs~z|CD*z$6=)*{XDNx2w%RbvNx?c z-J$7YDD~o0-F&F+;|+|hGW1nPpE~=PgRqKXL?Mfd=!dl|D%+Cb&4%@ct+5HIrx4kQ z@_toPjupr*u)XA3`gS5978<o6PA`>Ot98GT(S26eO7L~lBEQi{O}UBXD39Md&bhZGh9iL5;@JuZt!U?^o5LW1NItKs!GcNe$_vV6LJny zqGnXzt7Zr(@WYsh;j4C2DKEil2I`3nvTt=tiw<6F%R0(xFkK@? zz)8&A78A|7j?-2iNBh+FgQFP4`)^|wixK5A-fQIT9H#yvx4TK)02zZs+DG;H&#r&Z zwU{x41MCL2{G77(#&Msu?5S18lsJvje2Vz=yTi{@uw)l%x^!N;IF}T-7}4e`oh@*6o~2hZMlPyf)cdPyThJ&(&xvOp)2pMD~##kmrwO{H(VcOx8CK zRKK%0pxjh3FzY`1_~Hu&XIn%f;#^;A_IRaJ zCNmzzARmhg+D{ADyquK%bk$;WpZbs5i?`DW0;*4yntC{bVv;ia_D=p$jw0S|sY}O5 zxt%`c=R7DZxRqq0fjI1*Pxpc2pkl4g3zP?v{KW2<8Y4A9uQR?Du5mCPkp=2EhLZ49 z-Wk=6pnf4r*!R<^{Bi==YlQu3&i_qJGt8H6Aw6+nC(;hj}n+*yU?h~yd#Ku4z*YaZYsu^8h=^2&8ugGhr zk`MIrpFeP$gc~87DWA?c|I;~p|IDcS(`V%&s5|%Hi||`dYfoI$@3|>^q7Q3KG=HS? zZNG<^4O*oV21yMIz!8t97YM#)G2UcfS3KtDq_fu9Q~EH7DjC=usxxw$O)bzO%_RE# zJj@^`x+&9G^5h*ox4ZFE@_J6Hx0|4)x{x=ho}O&c&k3_jF-$8RrgrT~Ptt;-=wu#F zDWxiMw@)+B`lFl+c0%IDLKB%n+S-=X; z7b~^Yir6-Bci?me_udPSEP6ZYL-@7S8uDj~#~b8k)oc@KK?kWdAO#pL04V^ETi~58 z-PStc_u-->8&qGB4VznF@cvWpGPD71GzbuYn1AazNPm4hv;yGXnD;TqqKOPyCGS+Qup}y!mihEN(qVbMy@k`p=QhJdc^lV-ZlS8?8wf&k*>av zMOUmVAd-*qWN{-8hOeODK3dac!vo8K@+W5z=TgGOqpHG1@2YmSw|4i81u>?@@dNC8 zBG~__F-(nEgS{ek(ChBwzae5PwH{U`J_X?~_!$_2xIp2Ez0977?pLLk=3!Uj4eP zCU-iV?x+;&-R&XrQ+FrC&VpOjaIjYKiDHO~S7f~4SHIGcj~BfK7dJwyP(sHM4s)UT zmbB9#9%7Y)UadE8!m(!4=LYX`l9CVg+8edhW~^pZP+v7w2kZ)AvL+t{4Lj7_GXy`pejRufL-pUGaswag&1UU; zX8liyR*)BSlq)f!Dg?6$!1pj}EyG)+StZQetRt2V+KtI@IBa=^zKW3BgIykQjF{g; zxo^Xpcp#L~Jk&LN8fSYKt{VZ|oJ%e4JHXfb=u$0919=|TY04G$=RhS-|jHIQ-5x^=Y_+EI=Dl8rRo@@LbK{Elx;q288_UF}U^5em}917)1s0PP>ZR!%_ z)k70A&o_NZcTi;CFLe8N_G9>tF6XsNM^+s{4Ft8b!u?2kvhNXD?xhFVbj1x7D=_v1}$%+$_@Y1P%6mp+WR zWT$%ZW~41$F3?L!6pWgZ!ARCDC-*5OJ!ResNeH}Ma)q^D?o4HB_Tp1`73fd%unvL8 zzzfNRgn&N);(jncl729guz7i-u!{LMg>A2>qq5VT!YeRYuw zd%OW+clt^-tmOEz!&ot1#Ymxun|56|k&%c_dsQCUXfyRpk}@LY+;v?m+*RXdSt`~3 z15u61Ud``yA=8?0ZM7noQey6dIqkFD9dCjk*4UIfpdz^$MNAaN_fbiB#Ig|~v~N=8 z-I@AbP-Rw2IR17g9X5>6MB&9zLE>mvk&l2_T0YKTWF#+llaFP{QQMRG7mPA3-zs-w zdBF#gPN7x}`NqE2{6iv35#XQUiZh*>=Ce$f!ONL$TgZT*mjQ$~BU_tuBeStE?=c!3 zhFI;?qLIRplRSTB*TMC!UY2-i1*#Bv$YM=?aocxoBAyuM5_A8|pd_^yoTHCW85dPzQZ+5I-C0_fc7K|Yp^63X*+P?c055>4t zF+pE~H)!6fmnu?56(oYIqW&6lm-^)J~A~w7BN4~iIyyG}G<;T_q+eOR-Ge4(7 z8U$`NOQ)r|ldzAl`rdLiWL~eUY}9%P9tR%<(04b#uZ{wo%Kj4Y8NMeF1~beJNR=g0 zf$lVEkC%LpGZJ!JD+Z)wQ`n4YDoh_wDUze@H1abGV$frAbv@R?d%s-ILY&@5d`E6JXk&iTWC!E=oUTY2-d|sz)l5;H2f}Qt%jVJ z+zytYj2)@H9wnJJOV(8S>3Q$*xw?6To9jQ;#-6Ni3lYexIWjD_WE5`by%ifswOgzI zEy`vr@2>;OX$mXL0+=oULqyW#9X3V=V~lL>5jmVb1OG$roM#64Y=T|_un;swJS=5C z6ct91WC%`ID7eC^bbfMAU2xrQn*s@Mq)80fZ&)(yJ^W+&0*}Q4|DpByWKN*RWL%-x z33FI|6%*&*FRlg=O~JW1fuE!AfJLP5kpy)DyrSg8NWv4y7Tj+gb%%4Tz9&MR)S`0^ zk(%j7E_I~M_C8k{*8cpSjCgXst_#&T1*6Mln!geP{VV%EP@;r!?L|Af{Mc!VIeV>3 zw3r@9 z>arTscTB&MzU{$L)R4s=m!|j6@YNGv2pYcDcF~ZdbFvn?$|U%BrtnDP-aL(C7?19n zr!f90;Ea*H>!_FCi7I{K;Fs80$*c36uxonXVAsu}ArzjoLrNHlyMT<`B~Kxiz)!U9 zUz>i+hzR?v^iuzcOk35E`+Pu|HUj({or*}WP=h0D=(mc*>oDuuif z0drz*j^8@xCmX($3|fE>@(buADK{q*YR?j{)$WA(h^8S8c;OP3+wo7@Gdn^mFYZ<0_|2Zf8zCPx=ekJeG zX`zNJL04?J>Ci}gcH=6!Th=;7_z$w^+`fNq(WCNtAlG%_@6W%wEC`yGy#DK9uk?4G zg+paa0tbynJ%owu>vKfaFO5EnGPXfvJ8TIRp#b-*@0;W43r}(6`G9zmk)*wqTSAnJNwaBo1|Kp{Q4G`ke^VKhy|8c8*v)Pv((at@U= ze})ZuW#Zl1P0jF}J_l1;6<$ReD~k84Z_u;a0%XfUpR4dnvV&~tT^ znk7g86G71Pdg7`7J(6rMw|obO;WXjmcm`3UBE`7nQo1a0yPf}F!PxtGInHm z5Pl9NEjj<#7nShNZ~Td{4V~3oH*&u+U;=jtrkd%};IYNK^=$9tZ+C9m=SL}*(PRvq zvv!mH($Cjik9S13`jSApEy;T8=Ix)Fy><`#3E!A$4x7+%Pk(_0QYfjA_ipqd?aspe zAF;hv1EFvUNYk)B@-W()wH3iQ-mh(#6_{pg?Cyifi0~q-GHZ3;P2fI1Po-|P;7}qu zvgap~X1-mv)!8yr>$B(P*#U65*Jwky%W5$L3*6B|v?m?UXfi(pe;3pGC5=!4bgDSM zJc1QpWle~A?Gk271V@eZiBk`%*yod=uw}w4{auMAyuj2A_sImEzQN<(@88*}S`USW zu!q5)Sf3aH*nwLj2^uQSv8We$FfUlw(|DPwXrlVxF1wOYza}616aN;IO9)Atg=Q6(VF9-KeA?uxhYM|w$D_E*Zkm@($j-t5%A{j_ocUSBZl zam20ao!1R6=(k&7egh35!=_&bFB)7CY#5h+7KOs0!%Js7Y_=S@#rxVt%mbX?K% zq8Wi?2{&ou;tm#!;Bv=^hrv;{8aJoq2NheAz5Nx4N4v?H-Pk!1zl#!|$$AT7FXLkV zD0lIuTpJ3>wC+}0ubVx&4;Gt7PE4AP0k3cg`bsxw@gF9t(pc7ThUl*`N2Y%NSlvMk ziKgt>?ptRBUKY*QBmPc_Yza^5x)j$Ltg+6(z^>JHXCP;cBbUFM#R0`#O@2ZU>I?J$ ztbA8fPAfR_EJ!R-jeZNCm2-SEwnJ?1}1zBp52QSQ{6&?0aL)gO=FS{2Td z9Ia?Sdb;BA2DLAtyCtQ}85H{jlGpkJ6nOl?5Js7s8S1*bDt|6wb^W#o%VB~M7ipJeqOm!)=+n^=ZNK!RVYpf@oU8~N$D zbc6Sj#!+9Su!3;SE(u+N#?qCL2oidUdbLRaljdiuuaETFicGE7^X4WVA!KaBoIY_! z)xz7t5asz)AZreD~@EJ4blH_-NW07ZN6^TTzSluz=pZ=1^PIM-U)bUjE zhKvDFZ0t-(L=+$9^{t2M7V)3U&AC09Hb{AAE~Yop$zR%#;NQG5#} zCmiC0?xbaW3kOYIYXD{p>+e#cJsPT_wX&4HuiVf6T5Q( zg$}Wqa!V#$om{QA!3W^ZNx7%RFg`Ryp2;#9qDK+Z+wMn412KKwT+k6L@NS@l0_ax} zwOLr{eneKUGn7QicCQJC>}gEb(yoD8Du8=;6OP4vWHk|O*-aH>GHZoEFah3JSxloU ztk)7c%uv+dz6PNHzq#eI7YdQEJFkIpMrR{LzcRM*pPhk1S-J>MK~KS9x3mV>L#68~ z=5ZTyETP(?7n0p!_gcQs7R^=A#LJmX%8Z zllDZw&{C6Jg_maYw3(4ED z@vo)8Sl>V;qQ|3F>h}t>=0eVYyjSj+b#1*fD*7yl{>n55(&-DcW$@#Bqq9(bZKui4 ze|O+;0^Mpq&kOhtG`}Yq!vuUq*!kDI_*SqFKAP)ofH8Ap3=8@LmF*5nYMa9AzZ&lYPq|0#IB-2Tz0^F0>eFukefk0`ZqqvW=5bGftrGUwQOPaj~r#16W<9^tKzpK8qTq#kFM>jsb= z-`ne%PBPmHr~3g?0QpMhVQ!n)m6LyjT3d6Ks%C1CGfI8cAMYVQP~`laRBR{`ybyx+ z#{qnMkyXM~Q(0cVc$>8XCLMac1lv0f(nJqX!$fJ_kk=l9gNe%&RJrdx=5RU%y=xCT{dw21^oD^UGvBVT$C5bHE9YSrYf%~@%QRSvV`O*GEvpJb!X@Ux{y6NKNp>Vh z+}vnR#DC^NkU@X+2d0&J@I$Ms5#h++A zkEDeF%#@cQoZ2Hjz&H}iN-qhYQK;7Q>``iz(LN}R_#7yT}v$9 zz?AC@mTqHsSGq=br-01lk80RhexsQWU`-RL)6hiEz9yUDEM}J6z&>^J7j5mG0*yQ4 zpE+0-49xe5^lNO_M$`S_y#vlyIo|FcKe4Nwwv6WpD_9MMrQ)MN?)#Ctm3og|j0=Cb ze{wd0>Dk+k$=>e7wEw1X33JYhDpDj?Ju#Cj^t1gZmZF|ZDck5B69-eesnLqfNI5_x z=L;yg^tm#UP>*xrT0X<7Kb%USF3b$#E`otQi^ZnJx&7|YL~W_f6iy1D8b!HE%v~wt zd*tFc($(H5tR(+j-EA)VA>-bR?(GpHgk6QTI4aswPKDd|Y#O{ZDvtQY_{p&N=q9+? zc;~rLIV$fyNBK(lQ!pBNm0>loDng64%Nvk5k||%GS^dhl5Sz%S--t;QB#2YxcK!o> zM?F^Yp=4e$u)|*tGfTt-wif>T>a18fu#z^Z5Q^v?S+`@JdSCjvh%w!uTHTGW!myEZ zQjIMRO1;#2!tXd=-K;2+*FWJA zm4u{Q^yY-*y22}WrR|Aaf-%WKHydxy!#{;gRqqP3-UK$fvqJd*7KJ@w_g<;CtAx_# zg^+fQMW4_Hn(fHDrV0wXHcysEBRdhs&Apso7EOFfd*C6Rq_>~&2X2H1plk$^ zud*<@wc1KO(w73f_$uMPE8%xrbmt;fEm^~*Q}Vg&)`KCjR+o(4K1Yk3(@?N>cHrf% zZymC##@nCUu&6E4sz>o>$CS+(1n^y@7kj^56$?#v)E(laZR5D90Ry9VV0+-F2l0*O zIjG4v@Lwxj0?n4PsC|C@Y~4Ae@L2UP(2I9rjjOIwFq)LSjL{7#boufcuU@6JjYDe; zEbmS4PDdasHk;!t#lPf0MirSrY(5gHGTRh?b8?)&^p46-juZW_ZKlGQjUW8y3#Vc=g%9OWPz=Uo~*QO%2NT(x(pF#d#)A)*yT8`>&b$v=R zILQj*6$SXfPA?Z4ms(ErWIC)xU;fiK%V3hd8E&9sNAx0h3&b{P zV-GS|x$o15@3ximth;6FC%HO2%<_D81oDWQLFO$POUxgp#3AB{mrI*HC!4mg^4&Yp(}sBZn*u-P~rr~OUls4s;r-o>ern7Os_13(m)n4p;lNP zmx9sS-mpbz}F)8CZAc0DIli6Ubri06=HbevEAj?57JL?!|>!B^H4NFgj}w&SPJv zVj74Rw3v`m$V2Ra*_sDu64U%o0fSV)Xxfpuj3VMlu3NppuprT9sH=!c5?il|q zbFL^#H#X?xbgu0r?*9sNZ|!zcy8*Y3U6dP%8+8`ACzlIhZCI?Tx{4u)dij=H%nw6Y z1tX{6W-lo59Br4_3A@u=#at{LgY+x1oUlyx_gh`H%DD}$Z)JcULvE~+qcXojTX|i$uA_!R3hw{=OJ15GhwyBZJYZcTy zj8uSp? z$BA=YU&oKMK-cc%<}*~B*G4}Ga5lFa=+}G_Hl&MHnY*Ps{unV-Bu`H}^+;)-reE@j zChPdWI+0RiEF(QYd)q67;sZbr0LRE$Y;8>P-rCEcV$Im8JKnB9lIC7f#_=I+_;DWACdc|nit8ha-7aH0LfVvQRIb8)S}X1*VodCv3lz;Qk~Gv=ehMqb7U1l zV>H>Hn2~?kW6kA&v#A{MN~fic3B|&DZeMeo+uY4kawTiqH^3X5RFG8wB#e=J9XM7r zph}gW4$G<|M(*}eY;jh>6PeLwLvTKDzHEyUf5SV$kijhM4&8E1LBA$mc&`vj$eIXn zw&^-k|9k6xaeB3~OwW=)+Tz`OO1B9ENBx zxJ(;ds{9hj$T@i5hMSYpI`vi*dyD)T(f7-h4NOpF^~lx-Wj9f9G{06pyGr1U%Zg~;(rIJ&iM24yp%Lb5Ecf2P zAI3><_rcsmzJ}CTb#G`s$pHGDP8BSD9-wNB#7tuv``q@lSL=x&wgt9{pA-F4RhM#s|56#W0Fuol+To?5{!pnOXoG@>j+=*$^9eGN znp~(3*w`AV8KLlqY137;PV1mmLs`u{so#EiuiEuj*ftLkun^gGxDt>Cd4{+c?JA86MIKK&W3HGZwA_U6z;w~AFM zF$jy2`k?>fkf0hqmch3%qxql(ynO?u!`@m26}dDctd@U5K$?Oonequ;=s|`63b`-B zX3d`jsFJzzLLycFgvvkS(s1%oKYm@mNF``ijnWVMU?yYm4-P(k*c2?}Y3TsgmwZ+z zk{oosN7#jWq9GTA(8B+mkY){H(nqoM;t2WH9Dedg&em@t35xbPDE9(9_VoAuVNrbF#!ds8E60Yi zcIxXU^WU(5a%EHn%{t1x17H7C#&eaDr@~_k*%cpt@_z5pKpkh<=MT4z=VP_75fg(P z=1_ij6TX)zhEIsqIY;3RyqtIBHkp>f*lzWY(<67(&baB*1Bsm5%+vfxokkQnK5(?DxQ^Va;?fJ%NPx? zk4=wYm#em22E0xY#yRT7X182>wr-$oOp_;p!qiIO>!8yFcNSIoy1dwv zG{3~vvfK+=LBcWpmzGXjwTcjY&VowZVeHpG`ioUr`Rp$~{-&!&lm5tUYkxp^2$@Lp!*|O-u$jQR>f)3(i}^AAv&I~WJ$(TkuxZ?8%urVZiw~5^iSFB zHNbLq3ZV-t7{gn;;M^{Rv#G#E6=<7&i0}p!w}5FV$OO7to=?B{84b!7H6rE9SrFh)=5*|czaxM{_85ZJF0PtDBY|uKW@i5Y5q*xxMbdJ z+V;m@t^kwh08QRYIqYGY?n{a4f|Hg7r{9+U?HhM`hF?<`-Grcl`)aCUn@80 zZ_1e`Tj%L_NQ}0yz6np7d`s8u{>8b>>2e#jp#LJ%{7q8%A1wwQ!Jf@`)C_1Cm+>^L zehgtcaPtBdh&`edF++h?En(?3s&|TqnX>!BAz>+X9V6I zWE9r~0}bBIhr$QorM}ba2UWUiZ(~^!h`7!pGuBSwCCBM`Ml><4VH1ro{>Gi_1!Fky z&$utp#W9TRU0!=)*IV{@Menh%LywiS%wkqNBa*fN(1vRVl!Ystos3j-9 zdCY@{xYN-}*6(_C&dt97AVqN1?w|T3o3mQ%+($K>MA1Y!gQ>mg8~C6XRJR%K_*O@s zwI(KqKa%SL>hfp4#+b|hV$;XHLE4xlved??`S2`ZH^KO0m|Z?>47j^FRskEdPbiG{ z4vj&Xz(M0oBFg7GqAbh;n8uySr-L>;W<^$QUd4R+SehJpV#+M2_X8{5t1plbgNbxu z?He95TLX>5fc{-iM!*#VZ^%jt;(qW_hWe-WN0dOaC7T74ghHTFjox>{)-H*Z1J z$-Czn^;hFD_M&ph@{2rC*4G;O@d!svAvg4ZUwZ&TZ-WWnzjF02dp% zjZB+Awj)h?_SU3Y#`^=EdaaUuK|Nl zr^UME26W9%Cr7nw)_wR_HaJyXPX0o?`|mP;aExU$v+BX`Wy36OH1ur#4 zEluUGzPSW@%k4X=bK{c>Yqemnv8nuSl@2>_9s^x*hxyl&d2$LkhGZiI zKi7q{p29BQI8A1`QIFbuLY^uy0&d)L=QP|^^`=C%*cG2!`ahMWoa?t39a&?QmdIgTe|)%6cN_?2%?7M%P6R<4?_oBhd``oe((%R6SqSeP_`rF=X6{M z_aH?{9_5jL#lHx76R$qGbs*^~M+t~2rJOLxwV<^}^~-JU?+c9`p(=ISQ^yNcbx)xi znx)QI4L_Bjtk2my0I&DmiIgH2j_1&nrL4olt z7jds0kouZv>MBC|dzKeJXUct&n?fyQLFm$k-wX>4_IjDZKE+8q1o%CD1A3I!saa#Mi65BFq{0usaUSN(~9TW?i|y&{8AU#w=lp3klfP-bgL# zJ1yzS1;s|RPXwR-dM(M8cME^^G^GNJ)q~szlU;y3%?(+~!JxqOXs0ooA&>}CoyHz%HZc+knNp z9yAs<|G&P^p&(#24DfdFm?$Y5G6xOdR73pmDZ>RvcKyxVpy&q5i(!DZOj<~wfABWyTQ zdy&R1W9>UxUe4)^%i5Jd#STr`$|LW;icB@4^6y^3LhX>n~s3K~rJLeG};z z;l0666z(q@UP$V1+poYbnb#}A0_!|!TN0Y8X+u4aKelt3?M?+9w+s@&QUaoNYMa81 z#}+-9d%v*_dam+)R|Imz65s~KqYrn;@x6`#z4hiYF+`t3mdcjPLkR)|s83(ER zUaVOk?X3rsDeTMF%!J{fv9!iP&}Z-yYYk?j9YLf1%&7#~+u9*lyTrmnxB_^5ck1>G z=6x_9GbNGsFN9q^S6+R8BSLuNWY0pj_;3m=*7x;|1jeg^hDPFzQ7iE^S+(q-SH&NS zd3Do?=dudmjd@etrt>FmnOot3L0SNyAsRCvsH!I=q1f)U2uPVEkVy z(4LTKL*!++qKOT{_h^wVAxBI&UkQkcBKuEn`kh34rHOSCWJHu0A-8T0&>2%<@J&y^fIs66C$Yiy<=FZ+ z?`?yALoAoIA`bd{Hw4RG@S2fefAfeVAexM+Yx`zCVM!$5(7-TEDnsACR5{OH8O73d z)1w=C@TNmbVwZFxjEa+xk-K-WDFv?_~& zz{j@k_nXrOOtqcvi)A#0q^p+MeyVV2w>>6_;WPLoSaB4()oZowvRIUnom1MLfqwrJ zkK4DO`qlkcuOb71sf!-bo$;IHliqH(R~dF~d7@*oqFAtczHhFmXH;ImyC*TrB~uoK z1r)&#ebhouI=@j*0TXBn(y13{#+ihcr@ovt+cOs@Qw?^0gr6jDEW##E##^RA$$ z=4jMCF|n=79p(P|??K0Z=6=1`Ad6=7NaoFi_jBBSp(m0?>@RLQ6||M8Vz|$C8!aMY;C0zV4go{s9Ae{1@hiEww%in zUfVXtMDp)|9@s^{xJE>V4H1OsXQFqeG?R1P7!R9a?aU-&LRbjvjIyRV=3LP7^y@31 zNJnvCLt`8S4=`SB^@p~?69#|nIk&DbN-R@(KD~AwGPCw&fdY@EzF*%+o*LKg5^qhM zfh)(6*Kw+t*_<;?^j-Jl;LYfP3!!MeV_gS$bWtGSk}x7s9%o3l)^^JBssRId>|inX zCuaN_6D`moiMHJ2I4pqg@*0Mj*f*8mr^U>u28Fa-Ove9Ri-H5sPy|9Fn+Wqx&u`D~ z2~{(;0X-r7O@%9GP$V{2mYGBnj@miBj?cD-Z@J2~*!T>QBNZrCKu&nl+Hn|ofv_BE zJ1V!%`iw_O;_DxeJ0!Ah;~GkVO?Ng%5GEZDMEDVfaNlF5lb!FW7_pA>MAa^os{T1) z2#h-Vv4dDbph$zpGtGm^;vfqH`F*yp?1pn`dDi|v3m~7#2VsK1x2dV<2vwf~L^R$@ zokW8gqY~wib#($uw!gpM0umafPvo{^pA*UQGtlo8gDzuWQ%-k#ZqE_hUlcT2KZSJh z`=}Z5MnXve4l{bcpNRFOvufMz#@jit0J8DKK9RnUg>q8HT6x#9>)+l<+U(TebcblK zM0cRI{5d;SC>^^oCk?&(quy*k9(xI|BIn*S+@y1;m7-Zs9R?bFthiKxAWnq2{a$Hj`jKV0U(Od02_3`Kb|8_bnv6?h zu4rtJ{ZabxUSyB)-m0E8C<~{{uhyvEGPRIE?$>#2& z$+`nN-RAT=BVCW%%ywT!8&CsKNl@g$aSlxg@Z<9I`p|zKa9OH(JPGo$_F7*yabK8o zyMmW}mOiNlO}MbV#b^tx^3mpRxN|g$3LrpF$7%W#0rABPOz=-vjLGS zxz%(mg_Q8DJbSW}B&Hk%Si9wCl>hPXo4)pi?)@psJ1!xkZ=RiX6p0mE?hV`eRRJLq zljg1byHklCia;@PIzvE#{u$=sIutamz+cZ$_1D8vlsBq+oc>0uOjjx@~CTnMb$%uUr`8{09&^VT?L?w-Rwo-#6KP+O2Y! z)-+`J7TqYi?rSIqwqx8H-SI?8F4p@BbxHA5&DYB%o6#18AoqSXtLl65K0Y?507OaU zjyl)i6re-vzJuOVS1s>Cp~OJcUrjJQHX)0)c1T{h#65DYjo9*_I&g!#5oloO1A!q& zS(ot=ECB7P+=QeVSfJ}O+oH@5!xfUDF6YSN)&2dYW**Fr#lnscaC5tE2=@NFd-~V8 z)l_f~Gz~vA6zyejRUR?O?*wI9&fMQ16+1Zb$Aj-W_kM>R5Fj`GZ^{M95PcExFDSr-IPidto2P;Mx=x z3~H*rJ;8~eARuYGryVu_3w-Bu-!RoDR=mqiN{iZ_O-%f~%WR+*X(d#Vh!q)3D!c zWM@yb9+s>@n=tK1FoR3blc7oG;LEkQV9XrKcI!eY7PI&oSmVL3?Kuzd!;^){`0R#C zxA6=LeF#4_x&x+c^^zT=p)evgDR% z24$K~Lg7nHO>1q&y1b1Rn7_airJ&rjLVuJ{7VN2-|7?=Wa_CTu zwT1vO5H#hnKj&a6XkQ_VSpQV%D~7ZaQoJWDkcg3G5lGpQSS>d?4)^M5ce;Wp%E9E= zC_5vOCk)B?uCIE$!vv?R9WxatVYa1Mw(^d78cO{<$(Cf1!*o28aQ|dHccteTw^CaMAk6snJScJVzGPC7dvC zLU^ztwa~l}AEHu1YYT>5)ZBfRsm2H4{v-X3#BvTU`kLriB9Eo^y_Yrag|4?XU4k(f z{cqhYD+5-$+ftL>SBcjVY9IrhJ6DxG@Gw|26u={a;J%d%4(SW3F@2nrUTBcz6y6iG zRojqQn)5^d$3xpgsby=+cV&TkDs{Rc@H8zMw20Zg)b&Or87XF)mG>F z3!zN#wcB5g3E&e(DLHJ2DIp*FXPOG0XKfaMp-{yv_OhSC37Jrrvl@LhI)_Z)$wl7=iTzn0f?8H2h*riE&Ylv3>8UZd*EHvl}@bmk*;2#f2 zTeh-+6!YdZ>`VH>{L)plb`_ih#$VNoL@z|4A{i*3Vs=Moi+o~dk5^CJAuDo zO2E^34}trS^qyg4y?Ryr{^18%pNo*gqS;+$yE9M;U%gTNHfa+T0r=!54N`(yR7cdw zwk0ZtAKreL;WaW#2!El97NWx(aqE+wp03d9lZnE+6_zOfVg#8VAKQz)4JX;{qj`EO zQqo;IBttZKUKKk{3YV>;A$++*BJ$6j;m+fi-v=Nr0`Qip4U@dSJu_aFT!&$`VQVUk znJvgqm4AceHpWGJk6M?R1=}yJ_(ejdPnn{n^CDfFO+AQcuzqJD8aEV0Z&9!Jg6YmJ z{?9YVci=;y5!(x{2{StGZ5%jz|KMX6JyYjoOQ!Zi)wUl8Fecrp1}0Xw9MZX9`dpn` z%Xi`J55D0m=sos zoxrgeMIZ-Q2d@LGJLjM=fLtkVp2c3ZN(!^^Of%mCQ=4+ez3}>5VFP`{)^G09pqdv{ zh4j3dJ7-;6MIMS}3LBrWLboSAbhNU3s^9O1XZ^d1Dn~hs3^#tY)vTi_0-U~54!bQ0 z(@mJKLuI+#i*YvLF3)OCS)@x-4jl-1#|%;`Vm=M>`^gzRXfaMDF$(Q%MUTIF6(dz2 zzc~k6t8GTWKgBVxc{AD>B>Z;S1WT)T)m!CBx;|~QE zz(*!00Uu9NibJ&Gn4K!L)k)*RQ}+K*4h$KJXcLv%@D_mo_S&rVZ?ZCnL{2~HU-@+7=1hk3Z7P8h?Nm&b&GRwf9zBOUbT zqj%gU%gb9FS}4$Fok)gD!2JFNaQ+l%S<|l3#$cyNg}yMvF+0mt)V!B4y9o0RV-59` zYZmJ=KL~Zg8FY6U4MJAzvE;6|&EnE^4{Et_+ zHoDF_^|c_yC5+z+jqM~$i?Vza-%`_0^v_Ebxf|z`xfoiFN~o{lw6G=~QdO}Eii1f5 zbOBW|z$cCGR2D~;lMHyGM{ZYt@?dYhnu@_wipF`@@LpMLM*<>*ekwF>OjtgC97kDh ztQu$8!RuW(s{ypnS+XS}()Q^jXg;B|s{&}_`|Z}?hB~=wmg|+f`RBYlem3Rmxa@iJ zKdgOqR8(!$FGzQHcXtaibcY}UN(?PXgCZz8Al=;(k|GUCC^&R?2q@AZrP2~JbM6^^ z->>dn>)t=^n&kqI`PAO~7tcN`J?DxC_|{Q}c`FEBVn%uLm5VR(9#bWFpYRs>46@}V zmR=0};6W*fO6Sg4>F&UeH+Ahd5-v@k#Kc%pqIrPQ_9+qcA~^eG`PJJaGW&U(Pt;x= zKN4wPAYjk8GVB>Yt@Yt8{t$xp|6V*CQ<{S4g!W?ivAeP7&amxjp1|g|V0R8}dcY36 zKvp~NhHgvtc-q)w)EFOK5k4u8zFX~`@8YQJ+q{AvD!(h2ayrGskuIh`V0b+ruP zZ-|$kG~mDf-6OirPMpbj_|=E{&tKf5jO4EtD+q9yPmT(yqF~XY``_7=9W=6&yf+$P z$GezFyxx`YmDD2`WDi3VyE+g>V~m0ZZQWfPtMpz|VC?z%5ys=uG$zZ&h*DT*+rFrR ztwB$JuN^_hAb&n^GJSvwp!nw#%7ANLU;V@T2zs<~HwGYONMcDl`Ey zs@0r9fPQXQUTARrV|;U$5fNgKmtAkoxRoGqSpoBZ)*07XLk#jMZ6o>cmo~g;lJ-q9 zkv2DjY<`1mY799>chX#wi?D?J_e1OCBE=ai=OuIDY@-Jwfp;@4eFn|q)y~MAFoZ-p zfkS)9faqXThz2N5gP1a4CS2yxE zfIeNZh%wp4Uc6!4nUZ#}F}H#TnD^E)viRl?z+U=d`0B>jwYQ?qijST+#i6LC7$kU1 zsR54ry?`Hz>yqnI2pPsq8LWDhHAJKxK{OT3@ICMD6Q=gr6LL$k6Air;-!jWvh%YVduPU|DMK*(66`pYBOJ7x#DZ$O$`ggV z0vKjtYWltJN^mTU2bT8K(*5mI5ziJr(>eYS`gvuCtBpmds?JFBbGzszc}~X@75PYF zAmqd*)plLvQU=*?S+bhs(TJ(>7L~;eOeh1^JM$GSuX6*NIpQ{kM$hCdAxpjkEU&#+ zKFO-gR2Y*b-8y#7K3hHj!P$I$52@k>y_^&(jkkkd_S++=s{PqMSDV1|oCiH#&~GB` z;%=r}7bV7DRk5@A3YgiRdcDt>!|8KHkPsw#O~JR}7b~eJ5LQLe%d~S*#i)SlOvx__ z%@=h7&FRJ$Vkn_MA0qQr;@1pFi*qt@vp{TOD-BCZY5r7128sj8HAcK#22-Hj1boe7 zgcQmVY{#(drF106wf=d^pc`eJp7;HbBo8>hUAJw|8g7_$^N+4FX6T zCj&K~^V0Lb6ArAj5yH3=WKWgBP!oSwdIY;ia2hzW%-+NdqdR6w3-(Zgca-%;3y>zV ze3T>#if>9Llp`^D_By6b_@o;@^xb=kyvC(rqBvxT`h*3bp9qvB$_p-GIPmYK7;b4U}_@BpFhC%DU~OuZJ(`eZI`|*tJnP^!NB2lJ@R8M zy5|$sazEz-Wt917q{*)$EHdIOXwc(olV!{5qbCR|p<;li0?@G0dYXb-7onkfGF7-; zc#(8Lc8L*oQFr0}8pxh}GYhuB%e^^}^Aa`x&FlZ?}_~Ow;IsMi7_IX0~;7v67n`f8Lf=fK&5WIAy&+$Q9HBLB{+{ z*vB@>+rZW-E`Ap8E!IHq8+MjkshENHYK~e`^>pSxchW)8 z21~sj@I?m_taJ=Xek4?jA;!Q*`~FqdL?N-vG(?9%Bl=Wq7dj0ug*kI_X(iaCn&<(k zD9xLLFK2KiEyMvbxWrJWx4|)ke0|M5pvpJB>ix@rXyX2k#cZpkX!sW%{=pP0D*n&? zHD;%=<+qAaU$KF(ewGIpn~6ixE=%U_(PD=as%ke{xaB=MzVwgdiK}5GKBP?wIV-f- zwvu~cuZ>@x{92VyXSpNsDk(Zi&cZqcL((c1sH*)W$PAer_?$eZAG(@;hwkPh4>nTy z)lQ$$DQ%8riQx05!ZRcJBk=))drkw@@n2aMSlU=W-M!o&5hI(of!Sm${(kN*tVVq# zjT)twd1@ekTHjMMDvDsiXN1`qp4MG@OQ|C2?2MiK`eBG>d4$+e;VUtP`L4T&f!iGD zlYPmBymKX|@EYXk>T=|7-$L)dmtN#UDpwC?i*0NqznP%e$nmDu={}~yBt-zwOw|Z^ z?xHbnFICcdq0x}Td98jPb&zy-0=1f`t9&+ySwhr&TWgTLQ^CjG3t1;WJSV7m(TMr` zC?|LvlhXRbcN)o7vsXWHEH$+<2a5@sC56gOT|V%wuK&cTl>b&0>nJHS_W$&_owQbd zSSFUjG^`^ZGuo-N!AtWQyHNV2D!kIZk)C-kl{P_Wm5Ml5`&kKhQO(60)yK8BPwiibUd}4t%QPx~`ZTZW zO+{+lm)xYRCY_17>cpQ89dC8DtmM!}-$x_5-Z6?O8h}JaL7t!KJ;i?`umHSyntV4XNU3 zbfejZ_Z!(LLR&#X@|nx~~YhIq9vMUksTlOiA5L+7x3TeEBVq0+0w^ zDC>Qb=p#E|}-RNp*e)k2GTT*i?SILcc^O3vfNqVL!5c;no*hJ zF;kq$0c$6>RThl%W`PBPCSv7Ge>!JIp{4w9;$mc8y>Ec#KEQthLiOURge{WE8zGXM z-_J4djNyS>)zO5k!cSQg2Xo+I=vutgCs2EC%SdSY^}!H%WwTMpaDa2A`DYK+Y(r8Z z|GAAsuCWN)!97#h2;YJg>9GTG2(r_OLBLf0jfXk-rCpo@xYHYg@IHnQ0nNe)Z}468 zBe*5FKK6~ox_XzAnkj;q>sOjKj#%7`?}G*9i3r|vn0kG%knQAx?6>|P%;zva$2fd9 zK7}Bwen>P_4i`Vm`+4{AEKL5zlFDk01o6IqC7DzMe)~cTPR#9BbNBJoTN+a8h_I4m ztpj4{`MhdM2?2A3Fx6b|%dqX)KO9X7+6mPM#6TvXvRRNLWqD=z{vB_9|) z#8)l*xIcYzvh!9_ToMqlz_3_j)iPQ2fv+ z=mmHHZlDa%0n$)H$inkqsXHs#n$o!kgd~bY2D(6iq6Qda&~pTFh7F_AkpkCuz|z8a zv5>7C8MN0+<$-)~8Zu&6@$Ln3BEUfF{c=J9r-_E7hC zD)~hzHA^X5wFFh)0_7Lq6LnczB-`*B>@!1Y zrGZ}9Ak~|lBSX8N!L&eGa4G`RI~mdEkqj1saj{f49t+mv&Pb$ec5suM>IriZ+uv!( z=#BfEeZ}R(bxUEi#Nh2Fu#j2#JLYsG!(*()!m=RM6IJvgf5|i=tZGLF2`2gVy`rD< ze$=b;+KBF4uEOxK58_CZY~x*}{A-$B&ifj~{Kh#+ATI3F4H-Wi0))6dfPElODy${w7BE^fHxHiuKRIIZ8WYz%`yhPyrI8BItYx)){|BaEQ4#^LK4x z1Gs(+a8CNAB6s^Cwb{Ila8;^f$OUlyNpgF-ulRAob>cP&O!ftd!(r9nUl zXh8tVWKJ-d_mHUgBt_mIOlc`Osdb{#={|Rf(r_TndpwpkpW7d;i92^Zbk(1K^XkFY zQDYhP$#g^y_rj?)&%3^1C(LZ{;f~535PIg>GDX zU~oS(2f85wQGh1QciG8*z$Ih5kRtjbsloaPijLrp@#sX@_E7f;MQh1E#VnsB!yft8 z-MM#g1>Lhd0=*P6}~-vOuLHzAE0PaV0#_=`15-`@AX#MXD-{drgDPLTf| zT$*p4pLS4?Q=wau`MR51dnLet@#j>o!qah z6Vrqc81vT-iwRLT=;5CgT3<&NMZZ#YOzPQ@ja=*SNq4{HaC9NoQ#Z7SZ#n3xaUg@a znP(?f8I~LrE#pXgm=s#0KdfRth%yWU7ZAQ9b_~8+d<=4!J;>h+v);^k#qGHYzo;9c zO~u6$;2}%#bI1g9bloZ0F4QF1Y4o2+l}-0=3FAyCgIyXk%+Fu4@Cd>(atJ+-TBMB> z?TxNf(irU%Mu#saaAu<8r5#Dp-&Q|7k7C)1tz1>)EfEcUq(2YDvZRRyq~W^=iuh+V zSo2#cT#1(Vvi4F86d`_{-IiBqe8umS z?L6q-5YDFfoe4)CTL9`4ADnfj_iaw`GQd9)&AJkR%?@$P^GVYiB*B~PT zs5h^syb?z5KkSU5`NS<)iIlWqf}SDR(uLJg3=onbxGda)WdcF>E()VuwA*-J9Q;P) zdcc;c(!VcuJKgWAw!AUAdR#7CkZ~_8Gt%VCEP8IsR&F%THslxlNiF{fzNB1FfH40+KU9*$&nXHS$V z>~hcD4wrk}W>(;LPl#*}vd+9f1MbRPm>u0Z#Dq6FEf!SFv+wa-f^RFn!b`Se(Yky@*T)Axmi4+^Lr?m2t-J^$bMf+?wn!aFg^d^vMjiXamec%wOp z%0!A>Tb(~y$A7`6Opgvjy$(pVgI_Zsi+Vo)2GcR#mj8}0`^4#C`uP&?yodetLo*Rb z-*!8H)hlNw7K!2HK{>u{Gp=telJKXxwzj=-IxiFH8(^i{3N9F9GK;Hm7kGg{SnH(|5f?rzHddvRivMJ(m?&UVUTq;ZtJ zT-op4wn#g*D8SU({7+hWU1x*HVm3L}KBoI}L&ka;jlgb*92TOt?>jCh*yLw4khpUW zzKUfP#|iI<6CjeoLC2jmzyxJ+CvCLC4v}JQit=Wh>kLQyQUQN>wxXm!j-a$E1=-+8 z8A|PPe!pF+PEuauWmCjZI{(88#%=OrwVr$#0mH!N>D_dnaJL7ZvkVHBBZ z>d1`f+=%^s!_IyeoBCHqOLf{TE{6Fb3U7@mjhO4HP3!}Sqj0m!)uO)1&VkKUO}wJJ zt7UgLei0TaB*Zq$2OPx7_0B=aQ+(8dxp)w9D0JOGSm3raEw&D~OWw>9)fCq9UB8zJ zwSHsAq@RF+^D|p38G*UX4de$>Cb08d3SAQUC8PAW-J*S7xGAN%J!tmrcZzStZpywu zA968+5At_PVH(S-!R+8{DVM@brfJo}*EPY?LTnmySM4gwn9MQz6|YA8saSECX|K46 zMs^+!7|S6;YLsQv{!!Rr>_{vs*Tr2n^?owhWtq3YPBd)kf>R{}A=k|2KQr0zQp7+q zRZ3NVK>zNYAQK8>afWODVbfkaz0T}>XX^0uhZnb0OP@Kr@x)(w$MRTZ_lC19@29>L z9b6^YwQ!9z&_rZ8vo_IE81^`dVTv=n3u>V zL&2EMmdwKAeVE-4-wW}{LaKYMwd$JkHr^eAKCD8gXB{B6fCD|~&`@qjNs-U4>ZrK- zRm&PW%7awWG@Q#qx|{&zDlBzmyktRr?YI-(b^cZR?7Kt z=>5goymC^PI;y%nhR1Y)(RyL%6!nT$H%gdV5B*<9=VQJ2(1Dy`6t^rPt-$q1N;-|i z1MkI^M)220{=|Wgrpm(rJD|^WQ5cvmr0Zq|fo4DAjI-D&jk^z2S!cf^{1oYAXjbE3 zE0C{Jp!XR=D|O4Z5Y6T{I2bYM(>2n}|Q_>V7=wuD)9Z zYz(kGaAltQ?l7Q0?hz@B+vW4amBSo^?u7UbM$4n%5`pc-)gn^uh#p6Z&oZNo0s459 zYZyB|wv{GGA|T~Z1NO>MxdDAJ&{!d!PwSr0rT)`o)LN_uC*hJ!2FOn>x^0L4i2}Er zVA-s0{r5Jh(23IYbzAlC9IQr3rGbfhB~R;FQNZL_Dg87uHnp6g3qwX_VNaNG%G5|AJGuS zmEyLmx`Z^8#d_)J#yA`{zox)_yLnih`5-Nw`kjB5CG8d+T zFOQoO39_YPyPeq4V~Ojj4}z?Z2eLdUKj;zPGdPyx#9c^6q#)LUqd5#o^z~5S2k@~K zK#m;Uj-Bq66f~n0%LIW(SY5mhpm)L{z>q0Q9+rMu7tXlBM*uQ^@a7P6n(v~W zVsmDw_A|ceA(E@$3eKr?QQc~-=Kybk| zK@M9lvdME!|xf9kPX;cQACkezEeNODTc2#EN~@QNS3{!`^T9J}c+vOpM|CORgG zE=dKz?SO~Q?YbXc>f)i3!`n3&jW;-S%kXW|7WTbs<$`urm zp7m2$|LD=gn$U)V2CMPyz!C$bm<)o9@tH8W4V{+VfD_9*{T>-Vc}iBLI@XN|B|)~O z?vYh)RXV(NbGfap4K_Ev&keCh>%-5V_VK*$vOW_-@aa(?eT~qibTnA7=z!~$jN(D2 zeMvd>9(W0kLy;#p8L3IS7EpR%41?Ca<5zG8leE9DAu6g@W)nyO^ihyYwk=o&&^AlE z34VG1136J)1oC-D-cEo?c}%X_{c6Uif?GKTevU06gCN?!yW%C`C36z&vgy41$h}t6 zQ)4wLssG96T1@KO&M-}L3OBWY|CvH>--=0W-4H#9*Ciy-=)BqCSMFuS5lW=BHq z^Y-F=Pq5*J4M~>5lG$ILiXH1yhNBU35muvKqb{~-#HrYTTDMkYcJ|hmM$V~N1q;Ys zL&{)^Tp57W-cmrEZ==C(VP{ut1?-DSEj`=>d6+OH+?(>=-e%#-s#!c|PV1d3yE7M; zceGy2ZY7R~IhW>-ae)VKtshptNpY5rDd&y7;q(SJ?;D+#xel9bbtC~0{Z15ifhSly zy}q-#!<;jP$6-b7(ale>njha2NyOQ^zPP^l?5`+>7adCCPh^S3`DVchF$e!s!brK; z6Aw7F(;H)YrsFjPrGN+epF&&^issz)fJ~(7H=T33ANqf3u4JIETJt|D73u}Xb)3kaC%+voa<|XU>t+Je;iEbSb6U%P$gNPZZv2X z6MTxL)S`UJ)B{5PnqFGoly9y#@(i0Iu(L+3$n}fq#LsS8qRun9`(G}G8-+ebdS(p= zi^<)gAYYUa6uyl{NfY$Ve`K4wO=HT|QXEl5J(Ly+I?eupe{EO%Aj_SVmt-P8nT|ee zdF@>&!Vibh2r|pJcn`r-M<|1|&=NX^fE~r%I6w+d42?5VH^UHOp8M!?$UOtmI)hiw zj#0LiDdPA&3Z%J6nnc@k)`;VBc;uZF;*fK*)j5rI)Fb`UQStv8$Iam;Nyh718tAJP%V^`8Wm^V&9tpKlI>lk zS`-PE%$BKro$;gN{c7QHfMMw>dJrDt!rPvz^vjt4Ya106jNVdfl^s=TR zj3EUtBXkdw(0+_YGnwa~-r1q;7-LU(Zc6Djkv!Qj94`uV)D}$H>qDfY<0)@ZxF*9BYVT??smQbAjW&lv155_qS00rIn(r2m0| z$r$Ru!$&r#ay(B>jCPll!c6TmQ6S6wNM%SxCX_oAG##N&p#sMyEW~Q|5G0&Wv>&9k z4g=H_H>?kn8hhe}HKJKBflVMCkQ*nsgvQIzJ0=5AR8-#ygqQ*LGG%HI<;d@$ciH25 z_sx6M1EMtZlxaf4#W}`Cr@K^Rd_tf(A@{SJ=_v7;G50$Kw7-m>4a5wl#2VcY<6udg zh}b+4<*Rjkz7jWKSzduyEesa@_5wF~Y5X0d-{uT*aXD_kI&)+Dghh(?plIr`-}O&E z$z5WHpv!>_6lNPca^zG~F#GY;S3#sybCnjDVwXvTctJ) zUns_LpS3cYy2%DD(T{RG5&sUo zgF(u@72g}yBD|xQx_M8gredZ^PU%JO!zxe?k%ncP=n!@Nm;;TlE3{QwTa6aPUo!`WOz^ z#c0X#b5+7brY@ySv-+e59|c;d4B zYbvt~SF(TM#Rt}2@x2J8S%XlLR~nu^uw13yU~Z}`zT`u3{a0#L5D#77x3csJc?zAp zmUq+WXb?6QZ-g^jM3R9fkSvh;Ux{&B6}|o%SkdN$9;h9s8A!(cQvzr89l?pay1B9F zcx`&>o&3DA2!n)WvWdxw(xzrdBps=?64<94jnt!YP4v|lg;$OfyenqkTHlZr0qu+l zK&WF9&gcE3grPW(l$2qM$E$&iG^HgLQW#=YGAMIXg59b_BlZS1HuLWFdW*^AtBKpR zC14IQJ|Rm}MO08P(X^`~S}IW-mv?l9JEJ!1O1Qbe`qOM6n*47bA3;UOY6eIJ=xPZd zt4vF8@FX&!#c34+_ONNdIo(7d4y`cyMKrB)6>!(+fJ!x*>9OFIbeMfi%v}nj36`=S z^5sIhYtFzg5_EB2IHo!dhI)=&K@&=>YiQo7JJ>DdCA9U446Sf-CDn^Z7w!#Zi7(eCJ`JWh+OKddXR}T(_ky28&@^snn&&dWThfv(`dE|eDPYeV(e|o$@ z7Z7eoDj~gtJ@j$j1fV!NatWkFCCF>QWFL3FP-V`Ar8sJQvVViNGi88B^Kk(?vPW8u zj&WB{lSuW7?u|M+(B=^|LI!E$p{7ISYb@T4kYZ}z0Q+`Oc8(5nJ&K!2u-Ce3IrT;> z9r;ok<8H5Bx89xvVf~{}PM~6hI5lD_4vqb$Ar=pjd5XLW``V=Ro{dGvlnR($V>JMc zcH8~FUWG<;!KPu{{2q+S^!aMF&$r*^{lqc*|06QLr6|RHriewZt5g49MCMpa=C>KN zhJyc9WESu>5mu0S3tL4?CZV|UYz%_iY5!IhwGO)f*27O7-+!`W47WjtEcBsG+YUeQ z{!_o-e20K;J0%0gDC0{gx(;S|sfwB}TLT+!A!2+U$VE;e#i|~;WL`ag&fzW+1Z zAZOQ$*Xu`Im^9hKyV9E#26PS7y1&2JSk?abCmE%Hs%17ePyEhvw{wjx_41aWcuT>W z?49e?4{}y^EM&G!7K*tUxOT$&PuU($DmZj`wS${SvIZ3cT@B->K=}(vPUnN$9JDpA zHrP>(-}0paB5VaN?@UH+))|t{2$aHF8#P@h6e1s(m6w5AI29G6+a1J-w7PoI7Rz*+ayC$>R)vu(mo7yTijFj$d2%11+`{|ttwE@whbj)1Wi)*dpf{& zdNe}ys^vs)cO?=JJs-zeL2q=qe>mKoga@Sy-RpXTX`L;sKHFNHV_T{h(hFf^>NH7v znMq@z?Jh%Mh36_&=E3NFx8c<=t0OJO_?d{l)!dgeD*0c3RN(J2x3!T`8v`WR1x!8l z%2`+9RQsKA&!cm(LYj{EidEIU6QntRZFj*UIYAr~_a#{@v8hlB(x? z2O?5iuf;%876=ROGSm4_ZpJe*EN3Ri(DRe0Pzo~Nnk+zWjRTa4#EL@8_aPNVN?ivN zQ&^%+ey$8#h?sr#QuaE=eJPf(AKBG|pnl8UCHu@cdGd+jqVDfvwHy4Bf?PCxn#Iv( zH|BMC24}rL&=nCB4Zevtqo#}DG$pX+%3!|(-?a_l4dE=2G&Qm2j@9AuNich9fGk&3 zZF5W=_(t-}-&Z0cyUqf&Xp;I@6l+n3=fMg;CSqrX=@?F1-k$r4y!%=f~*!?O0O z6YxWW!&Kj!ebsNzZ{)-K10m{O(eznQA-K|f%R}8aMgLA!Htk=fe`<&qZL@joy2uuZ z1NuWCm0TZ!Jg@v&#Y9N(l96Ng?l_cnW*mqK=4e6MHGT?$);aJ<7!duZmE^yvzs^B( zS+VQcu72rN-z`G=0~}c!jBslDUVDrF>6}(vYU2?GMqU_qiTA+-g3tg0t#8LRs$TKk1sf|Hp#XnS#s=y-tCidw6CRo!M`5 z+0Uh=l3ebk(Xz8EV zJBGJ-A?*h*S{hFA@4N#CusqE#D zxC3u?u9^I3x=XfKDw{CzG<}ezf^7k^{rqT6IGKc(HJ&~93_`q?sEe&QJhz(G^0NE$ z8Cg#mpGe)HcBy6dsaNc=aH@H{R5YmgO9pD?jZBOP+6(@be=Gi#e+3VrtXrVYC@bnZ zhE1|xE|O^Wxr46T>1pnq8#c3zBDaW;M@UeZE7x&*O*`&}n~?Kpi`7f8+=lFonKNp@ zVl4iOzuf;%;;-kfwWBmHf$|Um<;f4)jniL)02exT-R9ner2<3GchFWHULbEDU-wSY z?-+!=FTz-zx4`vk;0;P?Scc4POz>MrwIt42SwjP65JEtLnp~4RL%PjEE&a|{3?agl z`gw03_4J!}7z8n5qAvVnbqMa$RE6ILV&2o|a{A#W+#)UcN+0>VyW8oOgYVcRxhFL} zCY20roi4`;_1Dy!_&HIQQZASP{lbS*;s+52E9now7#@e>1bVQi5dNVm8qq-O0TqBF z;^Van-%J`ZHzL7}omCRgAGQ?y<8H8^=3d-PMhSpq0N!>4I%%VM$DC<=Ot)f5!eM?{ z!|!NJ`OoHe@nZU@5&ue(EGH(^<1buJMv#HG4|o%QW%n9LzGbo@hZ%tf1AgYEXfQLHS(+sASR8sN}9K!+T0XQY!i-wwfsWEAra@8I{y#&9{$H}Xq7 z!9_~S^Jm9L+Dnr6BITE7w@ibEookfd0MY(-7GPEBh6=Tb1~hV1j0C~m>R3mki&FCi z9mCuUc-@tn=%Q;#SCu92$+X7wyE3r6#1I)obuT3&b6RhJEH8+aPyt7OQtclrhOYpi z|Jrl};6&rYnv=hZa5h>I_MEofsE(h%4}dhe`Hd_suVJxjTc=*0Q6-_!GRg9|V@}GG z#z!V~g45XC6So1U%f_9zhKJOd!HoZI_7Ok)p(YNlYbAnl@i77S`7*DL@cz zugG)9h`H6WZ$HmFzHYrNWcbpSjFS25*#`_T^uTw>2$Qx8KgLjQ_$YIr(pqc&>U~d! zOBpzjCAZ6o`CNp#s!Xmd1T@ghVkA!5IpYY*r(-y%0!?h#C8Jk)2o;|G#_!b-=qqD8 zbDQD)R}ZoRS!N1kfzx*JJj|#kQTGe91UFs+rfH2IJC^*~pzJat+P$dqC4VDU2P(cdzx#3d-f}koraiEk^I7yxaZ3H zK0j+Bo3$qohT1}}1cUFb=%FO!s+RV8(=unY1c~-*0^v;YM88k>dIGW?(nBxX<%RXg zlk7g1XDDJYX~y6B!)jH4u(;+{Dosacq05y`n6yln$HR@J&D3LH`VBVkCJdJ|gb7A7B8b_8@Koi0U z18*}xc-3;LJRyJ#FlgEbi|;2~Gz?~s24caF17|dlyY6=LX8ly;RRze`|Mk7F&Hiu( zhEc{JBGDebBAmaDyJUu&EVng=QI-Pp(8zeU_^ukFlB=YCORk0F0pE#!NbYM{Aye5d zPy5sq)09Rt-^zz?2Pe1)%Gq1n#ruU`SeV#;HUl~%L^i1h8+5vFf3V1W`c_!Tkzkyb zmxC(m_lI8FWDcD4J<7E%1|15M6wwhS7ZJ|fQ;%muyv?h>a;eMTv^Xxum{Gzs-?G7M z7o)Dci4c(MrKYt95R^&lQ9l!B_r8RhvS_AB1b`oI#Z2yEmH-oMP~3Mk%u@^BfOUrA zMa)bnNqNkjIDXcD`TkrQr4qISvj@vKZz-N}P+#uRq5*VG->yQUli%y0$gPyaD6=Sl z#}6fLf~G!3O6S}4T(@8Nqcqeg02w_@hiG!}kAz)_@#23sEG9`>OxHcZdoskfKH!_y zYTzW4sBi>j=eNh@x7Ru4Lb~T2!H9>l^f-=w_;7e0hOrSTYy<6B%oBHhjl^ni< z<|I7Ih~DF!;#*Kldu%9n$tGMfdghc4tE{%pHc)8S#`!S#)<~?h=-y;;V#`W2bl1hs zmV9F_!K@-q(2yv1o4V5(Tct`5=t~?@&Pv~i)hyQvW^ZI=;}^`VBXsAuZ&EJSZ?G!# zC2u*>KGoT}rkmeSPK!GicPXbaxiLq_cX8`FHemRjXo#{YiJ1q6$n^g zuVpkK(`cI3lL}9T3$-D(OlnKdOpw9$lD>u?!=gkVB5VBYrv+O{_enGoN{i=&Re8q< zqE@HCNsEsVc1(|y=6qv;y{^r+CCo9wSP9===QMEH{{#J~8|Iu*XfUWc;*L*dYcb5uMx1}92R)GoO6 z?&;8J8eMUhJx&9`uyt4T;CT#1L{ha-VP7|AC8p2?F1^*; zoyhcbeq8To*^;@mY>fpc!bwbdg03+dIX4?D_cV|#jKI9L|6Te=uV4^q64jV*71&|Rc}0Hj z@Q*%TsbzkfYeY{3A|woYGR6IdUkUc`?Q0>auoTKU{T@2qi$QDWvs|E2-TS5O!uRP| z^RaPu9pV`0t8bs*lYHXG^CX}6OC3?tEtJ#FqQ6iAdie`#OUmo$PXCSPC zFTWr2=~Xix8~+u;%8Uvwj6THuBVP;GA_Xq@U25Ifry+mUY53+nDIWd5e5IQq`y$J( z=(Xck3nbD=e%a{;9q&wQ#m5AQ=q#5z-fbgCN372^t*x8vL0e`5TV;wTca*+SdCWW` zO&Or#UgkGJ-`+%A8{;pzAKBrqq2<4yGRD~xQcb0rh)b<;9xZwxJ#j0o{+E4(yOrTDtQa*h9V;aEYP~JWSu%jW}F@(ET z;O{rWOh8X16|SSd*>n0u7&W%v`_HDsuJ=1+c`3MeWfl{MvYlJZMsbf3!uc)VVgsin zIS;j|A#II?H}sHV94L^M(P5q1H|^n(aEB(uU({8Y6P0BG#FL=f-@ESHPVHiZRj_4W z`eW|)z`*26V)R*cO)>k8dWm0}NT%Tg6#Cj)!LysE-=4a&2^KSTiRax;GRqR(kKC{Q zs2sc{1oaI$xF5nXJ$?Cm0)`Y~zNo?K7OLxr%*iS{72@cETjl(Ju)s2p*RVZ9AaR`LEz>)FEqmcasH z{Wcr{%cj53|8;}@nb1W+g}lFDlTx9S{#DjSB1-!b5`ulXQsS&-?X#?dqCa6Du~glv z1BK!h$Xm8g5;+m_jpvp`RO9wI4sW487{54hwEu1c=f2eTI5a+nwQU2l zE482Ya>XC~WbWPTwuOG>@=)Uw?h$+o>yURdZQV$O(@uc>C!!){K3h`Z-wQ_deCVw^ zT`>h<$oKO-A}y%4Nq|)OV)ZcP@n_lTrV&9s7|xATM4U4)dTD3ddOTb;TKgJtx#tBX zwenhYEGvX1Dapzz9sdHdk>A>87}x551#2Hap|~1|OM?4q4o{I(l^atp)Y@SLuwS9%x%t5Lyf$Lvzw%XS9hbMQZfQGMLA0tJYmnh-v zL{)cn@rUkZsW`cPH>T}y9kHja*3Mdt@pv%d4EEUE&i0=9GigGQG*fO4adwEyF;PfV?k z5ordd3*c4b9(A%?)UtiG)nyVuxzTf}gZiGtwcU!s+C@P0;{heu-5}ufTk{6w`rDHp z=-Tb}ALn0tnkJN)mLpV$zC)}0E9$=m6cT`4Zk{b%#wpVDJ1+x@zeWkO5}ALP;afj{ zob*j?#q+8;7d0V&Qtg9B8_aP2^*7U6bf7P|_ z0tA^J!*P)vw~#;TXU3tT+xz(KrI*eN%t9X#$hkUEGA`|ehVMVaaSC9!PD7?Z9@2o8 z_(jbT{MRyGw{Atj{n5H-sDKAMU&^Y6;7t=ZsZF4a z)daM3|2BefYr6;5yUu`16oX?EboER8Mf?Yps-FnS%{t@0W@^r1VV_xhu z$rCR@4h8>3x*z%LxqBFm7XoZ}K|uY5TyW<*Jmb1KoV4}Tc zc)#y+SxcCQdcOn`t7>RbF`7e|Sh(^pB-o`~9QaJ0D?VeAAocIVw**@krORA@TMYe3 z_3_cJ!&x=}?)mTuj~1Kk%W=h`5$R9LKbbu1RRhl>bjPi~x*~uRfR&&^#jOy@O3Ous zT#HI(JwZJg=-M5;Oh2-y+aE(wGZw;bYf8a_`4} z-1|PWJ41Zy30cky!0;%H6}Db^nBPO%LcwP$#w;Nm?Djq1N)sj|2q3pK5y~Q!ZzVi{ zOMnOr%OWvsuVs-4T=jC;90j(%t50KYbk)z!{a2xmNull z^My=a%gW%&fQ~`M*gvSPh1 zJQ^RZzqWwWF6XJ|Fm!9e5t<9W13wakW6a4sr2yLRmu#WcLHLc48%v~Wr=@8|o!%!a zJuA)c`T=Em3}a;)qBPpQ?v)1dMUwG{oG$lHjX#-0l92%@rc7HP3C!w=O6%EbU_eM} z2_KVPNp~IZp59-%dEWqeQXD6S3zbb3|AcVcnGuRbO5p@_CBTtkLdAiRSY!wnOBj+Q zF}#`U7jOxlX~%>8PULSQCXf2pPvGg-@ZSa3-2#CGeMeM5 z&n*t0h70+oc52c=A70VA?f!(XFnn0U$!MO0FTY?SO8avNREhrk|IdMNH>FY4~wAI?9p*WUZt z>)y|E-PiTG+g*g~bJ<}YW;@gS$@*>2v6JzBOCQ5)l+NgHm!m(V;T)8-`TG#g*r}G~ zG$CAcJS%+_94EEYB8*b5U4-6xD8jB*$c*JL8eYTV$JKF^{+{jDM)QCGAs~VQcNR?i zbsxpsv@}i~HpjuQV55&k4^wY2nb?)EnzJjM!60Z~{4#MM_ZB9#yC9ZTcYI?Jt>83& zj0Zzd0kn$`fcA|<-EO%9^Mh8K?HYBUvEsUE6%0mpZ-h^3nsfekN)z?1K0+fC77t0a zYcN4zV8X4BfW69XUqmSqV<8`7g1ZIF_ftPUr3YmGidkoK__e zN8)qlgpw)Ya)`%>1QOLlJ612a(h+nk>T&7(qG1G$I7@^@=ti&F&!vuoxXQGTSCA}l z4$|Td*_>ReoA~Epmlrj%31*KKUceD=h^Zdvt%$6%j-G*RAp$#0ts4HfWEKwI&`}C3{TH{=wp(9V<+N()biZ zI>5yO#hLj{zehe4d{}J9A$18(dX=gPL}~nc%#{kHUYv;>;-DRy+Q^Puu0I915Vt!k zbgr}c?OqN_P98}Ob?)-u#-5I#W`P}`h*7JCyHR8F?=FlY2bPOnOL&u);48l#H~B)z zr3aQEcax6dOK{u5;NMiWW&a}<9+yAQ>s>H{1JA=I4-2*-MEZ`m!Tzn580|mZ!}QM` zoo(1*r;q{ITV*Bys(w?gj32cyq1|%~{e53K9#40Yyz8?x+I&Wz%81>kx`7^9#_d_9l$C{h{bm#{BLJszx{3Q@a5==@^M;p;wj&Tt84mDr!Jcrqqb*LZ zqTLRs4OTtBXbXAZ>Kk@oH=cqT3kh##??!)|*dFQ-eRH|B-eiBhxP%s|KD{!4g}rz~e|iG+w_mYU zYhTVHy7pUSbL4ijjq1B;;8+(l0?45VfMK01`m?=Xv2VObL-k(q^~t*gS=$WTy?J7r z5{iXOV1){Gs^?jwum^TX0Z2@%e;VU;s~}37+6?D(aNd6YlpT<8>E8(Nq*Gf%!+;8p z?n78h&-h%ccD0+lch#x#-X9nhaq|n@3Hj`#HlQGQ7a-j*gS8Wh*nId1l|BK}uqSXP z&m!!{;7Aqrm%KeX+y5~rumzC>ewUs=$Qz~~{QD*n-kgJDp2anHJY|EJ-WUJ^xfrpB zBlh79|2Tk2v=e6WB@nRPer>URjGHamV=JGH9%((PH%p=RvbfB;w%C(BJNeGADl-71 z+wk$;Wz(7?lY_@&z%hVpKsWI#_MR#t1%_bv=i1!P>^HdzLy7J#hz>5*7HqSE-3b(8S9S>AY0w+W^%@5%UFo!6yfo@Z515ca!Di#vG`CF_i|yI(!c*f&%StX zUVoqyP59O2Phx)msd3F{xd+FSR%2yTM;z;#DON-Xv#3+mc}$MyFk-AwJ0iq=VlOxt zO(&~f52+_-lYAihv1m9(d33P(gE^QZWU78?FUjT$e>lfbNjtUZ3JWE=IqMu4l8uTC z;8|hpn1NQi9i;i5--Dq~v6$=`!%7NVz~UP6*9%fR^1g2toPo_2GY3AQ{{4V~_oC4G z-m*Yk(0MJYJ2tudycyAFC_(6&U89`Hl@EpEkQcKM8YzUkxFZn@>+7cH9+$)84kM#Q zsjO^0TCzv?xO{iIoH0g)Vr{Z^=9zK8lmj#pPs@>Ni$>()hut4*M_DB(JPBM|m+y(& zJ&q{t($D86Etf-mU6ahPnXQW*gJ(TJU^#yKL{JLWaF=iLzZyKA(| zr>~bdn}DpV#D?~7iI9eEyeauu-M6bQ6J`Jj(nN3ea-UFT;XoiNJ24fQO%5Q$3A}>o za5_8%^!0Gf4Y|3V0RMA;MgTebgFE4Qh2}NTfk4~(;v!iTn+?-3Gr`XXFU}>A;4GNx z+Vk%In%3BLc>hs=W&eiX%-GjKaPVzGCl%{KOn4%#D(6SvjW|}l1g=DaQ?7Cm7NzI*7H&mCV;{EB;cZ5E9Cop^up$(=uGCsl=@88?LnHx7TQy2lSP#E6`~mC47mwcIj=lE5m!Bx@pgmny zVmVgZ+0z6-c?5=smCnAzVer@RS=>}lg1$wN9orN9ug<~waI*_Ikyb(~Ic`|ww&(3S z-s$(DXFdx%F@qHm2G*r6I*9x=&V6W@b8aq|OIG1p$-jiVamHCVKj$m{IZo&^lAC6$ z)>h=)QL!Mf(zkZ41?f(Bqb2oreGG3Pv&T-AQn=7X2G= z-1P^%rTcs=y88rOq09!9cU9k5A{XfTsmNQ3FSj2cq#w#%OfgCVc=lrx@>C3Wo%#$1 zlEg(@^>28=TPAW`lH(;QVR&{Lpow?dY@4@*;0bC)>7%&u?YQS+T~R`7AiwGSnH$iCf=WD7S!G_?!@&jT zmdMZx)(SP{ZoCm8d96VA0+Lto!Zx90Yn|!&2m{{9@ISuZQPk;|&n-K88m1?VY!Ui%g5tq*2mO^Fycma}A2xiFiLxm8Gp_d?rxutBtz_HnV$rz&uOg z5F{3Hxh#v4O$D|g$yCXmLU-pj^X8x(yRcDM2j?GlYM9|jXNIkL!7$3A!;*R2g1*U$zg35 z`T)Vs>AF0a8IRo>thw34>G&3!$N^JQrIC!%*~8)77sC;{ zS_)zuA9sA(vTb)@iai)ni?u(fCkZu+z1 zWKAh0$e*`=L@c3gVvmQjWu;~+lw>pKkFz9hvqfp(pfA1x&36&{7wc;)Sx+Tufct!i zJd-O_jCQbGMTfE9U3n9`A6&4R05){Sk?i_h*-^d}^XdNL$I#UFS2_;0zfn(f zpt!(}eD;I;vwg95K*O0VIf0ge>8D#Moy{TFOH^#k5d+B((v)Z#sipBg{1!mmNixU6 zI2r%EUXH%b91ux+kOJAu|-=V^ipa?2V}MjZOO%fS31) z5=+g3My9*BGF=nW*d$eh_UCUPcd|5s@2^vq)zEg}v-&@LR|#&Tu~$Gz;{=UMB?-r( z+#r7MH0;x@?%RZyyeU-QRx|luE))`tyuJG4dIkbea<_*GW~S?y zTvbr(y|Ex@Rb%eWTpjPtPN;&+TdoFE+&kem%wGA7U@lB4@VLY(`mS%j zwKuEO(SWE`-*AmhB3E8ZV))VgQ$p9tPP^Pbu@zm#^4vSLdo9!Y`9WlH6mH9=z;Qd6 zSECP;^FJiu2L)7E?LqDJ)~agY?ZioLA~2K;p#ZIND31acGiFj1?V}4(<)tTPNV}TW zeD$;o)JPteg%Zd8oIbcdV9l%hVm;YB=bkz2kfu!>>rlY$(wpe~iINB$PsjWd*ITS0U=fqQQ-2~zQ7Nw9j`H(F>WiH$M& zD-1XF{V?elhSaFTW*og7(|MMRGKGVif_>8Z7;Z)dMuX1i7uN=};BL>w(H#^17V4{h z;WScrh{L>kOI;TLlgX0BlOyQr?d0KNBh@kzb9gaeeo3fpU8A3i)$SQmska@YgkzUM zZW|j=svBTk6HeuI_`qqwt5eJ_a}M3FRe2kE0p8D^NB14XA|v03u#KQd%4t5D{s&ij zw+x1F{wXx&WQq@!Wn3col=TFAR@uCAcTDb9!s(AjfV-neRo7+f;60EHdF;d45mhhLhhQGb!gVXiYL1lGo@p8X^h{PRBgiU`lXW2tZ)Du_t_bX$FC~BeUQ4 zHS2|MU$5t$flg-(223Wv4GOYzi=%X4|e`L3(Q^Yw%Q9d42Oj3vJi zIno?L_4nb!A7>faos6cqc53Wl3~w&A)63&UYWK+BYVv5P1lH&l)5Yfx@?J*c+bsBL zH<+M(MeTXVa-pu{R)bYg-pqDpUjNCLP&*5QEP~a1m}l2@3FFzdRAj<5!TERs|a(G$idc+4T8USKIYC&B;q47;TPYVZ5ZMY-Fy>10b77 z{z^P|Svb%$+beMCAy!8ugIGbivhoC`y*w_9J zcp&8BXCt7O?w^H5#w>hJzRUW~&Vg|I%y&t7I0{ z)$0-5uovZECQqj(>OuaVH&h4Fn1oBxi|Tl(N18!P32BMh#+e6>eT(AMzyZu_Zj$}Z zOfWtSq44J3HAh?(>q8Aee*em20#G4V`PF=t;w!a-|o#P$pA6OM|^cm4n zRRELJ-@m>dj={0o$4m-NH-CQ*x>GM$cZqxDHigS#nG<+P$+!(<;qP=rHGI6x}AkaKV z?}eGXJU}}BVAhaG^6e$Y_XTR>jeeT;Z2RJkMZDHH!Bb28%n!zk!dmRw&fLsqw`CZzxugQLMvQV{^0AgufL>Jc#->Y4lkyad1C10wCH1h#J zw-K!#Y&}K%9!kw#vJq8I%@DyGt6Yu@Y*W7!Ppws*GzKo=p_u*@8(uSi!`YKjK2i7w z)Gt83LJ?Sd)2wzMaH#@}F=>8Ecw~=AO59e$z|Suw1z7!5v7Va(%$YrBf<#^ahZZt{ z!L_)8|BuS7_ti8q_I}h$2IKEeHD%oDHD;t2x#J9ChxT35lVZ)k={8N%H_~YrZ!Cx9 zLFiY+_dFkvPpQWD1RQ|V z)smOy2>#nlGmY`XZET)-{1nv#1D?4q#}qwLw9(;M zO{kDp=r+KX@NI)ofW@Kwwv^Es{RtA0>6VhKX?4fV_um=FZ-d@qwX?8xb-F7Dh^rrK zT=&cvg*9n#Cy;K2?0pc-@g@8EQEITjaivCHZDYwL;NtC?uTk{fpd2baz`0?yS2 zYJDd{DYYD^PJ(l|mwSyz0yHmk0!ikRXn5>;^ceoX)MMs@9ZC8|7mXO_)cRBp2e36* zmW~hM8Iy?8o1g_F(goRzl;lIHIv8mJK;L;06HaDWhg%p7ev7y)o}te)&pR|K+!kp+@9lc6hxpv_o+=g}{-UjH%!C zrp3CZWZe^K?8oSpA44#vV2Q2MFx)4o0>CD!-(r&JsWSp-DQr%4nyCb4{I3^}3_!56 zCCGVyNdFvZS$enn^!r$#OqS1)bkl#GWToO4`vDdPby)JL(Vekxu~^=ut@2*;){2$6pNHzJt`_H($TAFm96G`!qb*} zoFRWZzm4qd4M5;m`wH+`J+M12lW-czn6J0%)r+|XxFt{hyJE+zF6ddFX?PB}$sg@) zMP@X}O5@6Be$WIVYUR8fr`_fKU-K0*lsR68SChN9buK-|rR7PWa@kA@d?|>t%*?74 zux%f{wtZX8e&Br0a|_ADT*a}iGYc7SJh*Xexk8oem9P2#J1U&ROrvkpX%;i{sD#9Q zLxHK8J8)=jPL;pJh&HA343X8wk6Kq`cdxz!Lx;OwWR&B6lB#2J9C`ULV66%m(EpyU zM0BoF;Ldj^mp(HIESW|q58y-$Md7UhfB*CO_lCEH;!4|7w?qR$i$j03GCg7FCAd$J z5FD7<@7v3eKL|;Or4$=CW{%Wd@DUdbT&U)>#Rb^l^QhlJV{6#yQYaWcQ4zMvgJGX4 z+gxaBpCk@aka;B2Y_q73{sGyb33>X61l=5xx5fb?-M7k;9e zU5=kC80~L&ak#wr{)cZnnX=Idg4DF7sa~8XCXdVIj$`<8-iaU%NYZRm43GM~85g7X zx(hn@D}SF@)BV-U)HHfh3HvEN*Y)t^BcDhrmO7cpK3))P@5y_$O7H?o!G6B(`z9?W z&8VsEmMk+P4AEv}Wq+5}knSHPKROghsk7PFpRH$UOStW{=KA*K1ME>-j!0tjTgr7P z^YJi}`$!B!0$Xens!*0`anXiKp=XSqVdBKKJe2MOB!=sc_n9$4&U~~j0lzY=XX`{dgyTkAD0SWA*8NBxb{TFSQ)+U=ur<#7; z?*yNnPcD7=;e!7ZZsh#!mV>TDucX=YpRk~{F)#{BrXW_y<9j91a zcwS*j`#K_P44i%#m|J)}oZ2F2k-I+q-G#mR@2zcpTHa1CAL`;zO7vOZ8VPtDf(AGF zb}aU^>BB9oAe6qzp!8Krck{qJ_)55xeia8xn{L}rFU2rGEvJ6{{B z;LZH^)T#DX)+ztKZb^E9GV@LBe<_q$5lo{I*hkn&Ofb{3yX+y=eM9%nT5%zdS_iIG zfig}^OEA{|=$%thYV(TgAmJMfmASzLza1j2B zt9@~oFej^@b3ey2TY&nW91o67-muNhF*e}ug|=(WlUQbvwckF6VRh4oB@K*Quo3v| zQ^-YhX%Q+p4CLtJ`Y)We#bMIcH=i&t`48MW=Bw)q!2HOXc$)xM(5r3=4>lIKCZ#ME zJm+11n8GA%o%4DW7`TXjV8JBA==oabLW0Hs}xLncL(nhyj)=ISd%zfu;8qI zLJXVU+f4fJ50ZXI%;Y#4hqykNJgVurz_yKz?Kc)K>|XwRu?ln^rVcJ8MS(f%wfy+Bp}CO0h4;e01a807Kc zEAHKR`Oth^{(6a3*AHpG>AglV%i5+KXB%fnJZe^ZxLGzwyn_kb zi1TxtLn^cUdO$?>TcGOK63f;TG`C8ed;QjM zEe;;yEo@E}?EVdG zgz;UvOuVsPYj@WPEvm3Df zt2S(EyZXE1oO_tI>(gHZL^myGI#0-j)YpKuSdo*FDVE}Yw9{*yDwl^#g{lT71}^iz zHTi|>o$-=eIvwtG>3!6dS|{WvaY(#cAeVNq=T|p_bQ~Nxj2%pl0EpB`h;}A<^(*n~ zfQ7NS2Hl==Rv$B2k-JBnR5u;1nxLwV(pq|3uIwTN&0UyMY03tU)^9jN=TWtP9f?fS$aHHGu6iT)M z3-UoITn06txl~6?|2*BT@+0A#Kt{h^1q=|tJPm9^{A!PEFpUjJ%MEJSH{L!Lz9Qkf zDuH6dz}jB8Wz1aIS5ak7lbUj3#Q9q6?}$7*hjxnO(8W2K91u!~oyDj#G$d z1^f89pwHmP=v#WuctA)5cD%3*B#peE{B6EbCBvc|OCJ2dW0Q@7$HBxM1Dp#$r$(Sm>*07ps-NUe_L=-fD9cK0u^e`@GZzy%7V$j z-AkFi(-~>tjIKc*;_Z|l!r2L~;?>M(>9Pk=+DCOMv*BT%Y`aPqB-H!9a7{j9I^jL8 z(<>*71ado}@gEp$xTVzWG$>tAQbH8Vtn2=JbMJNNX`kqr=CeH5XG&AzD0pTQ7sk}A zuJ``+Ys?cP)I~W8P6InQO>&JFeC#4Bo96f4XckFYtqgsXpU>3`Xka|JMpQK(dLAx( z(@bw=6vOE+1h}uCQ?TR6ir_X(O=Ty!D*$h8*TutyM!Vf`8KF21zr=k)%qNLBsjv@E zt?@(sV3VnpRbH9iVnCz)%2K0~N3_^PC>lCfgFNyG>LfsQpNYIUtNSLQI77}f$4ZTv zji$+GlbDN;d4-!LbxyK;%#9m!n=Em2O?*a8@l!nbde=P3;lU~(pZcLLof?D4@5!!m z(3Om#&aXH}46E9d&+ubxrjGh^L9v87Kds+L*-nu^%buVntXVy(0)WxQP*i^uK#CALDbO!1(X$-iENwx!NWT3zojX%xTbgN(}8 z`p4&>S+*buppst8+1A;9UF7b|FYY%JJR?2hTE^24_;4!}w9i}KzoUy|g~f{fZR(l} zOPtw2OLc9KDr_O)Ln1Bd%Hv#D6RPAVUmt|Q?z4AvAnuBHS=%snUVGa}0q2LYcM{^w z>GirvT+(g&VL0si2{i6M1aT*QU}%+bvN#XIZx&UZO{WpX^F-Sl1-`}T8WehSio}Vj z1uPI_MA}dQ`D6Av!DEI>hP2BmH z1YqAeXI62d`;+F@>1uTX&-Au&si!>YDPaVeN3cVG z*mrMw;>Kv&#sU28xUoy#&?q@EFqMIbFr+l>zeclVRItYb7Xtdo(|0wRhtD_)Kul8} z_Z(+_B)n7g*;i7&IvDx>E>Wa3IEQ#ROr)q>FmkhGy!6p-t{3)N1}`=+`$cUlt_~dr zWak5$auL6kAnjyhnhYLP)iMZWk2y&XLZ6g@#BY38kBDfwL zw#CMNxX`HhVKQYQaQwdUa+e8;sY*!iuVw~1q(Aqs$lFssNbT1ymNz^r;1x524HlfIjpe&qzBue8DUR6Hg9 z+;QUR^dGQo8FU(y9gB&Kei)B%8)OUQ0oCaCzkc+OrU%4je=(81xu=}asDus?|BWk@w7GT(4y*|6+u*Hw0u$(d3X zpdo5RY7@@4#NG91|FHy`NHBL#!rr=D{IL_wvw*EoI%- zBZASEzR)zsrni?Y;Rm0SF+M-g|I>u*#9$p|{Z>JY-DAQ`5hCH(R{8uTDMO=EU_`9XpWk%D?*B*Oy`@ znV{p}nh((x0dTOIXo=&`Oouf{-m7$o`Zk46n)ZKvw2inuWJ1miA6VU&`F zYVlp4TYs1OeXvL_Ok|Utzk(dMXT5#+4MCkg%E#hpqf0FMes_GgYSe{Z24VRiSRK~{Uq zR?MXvlxK-;l6sZq;yJMQ%`fxdf)0sR!M;q&!Y44>`F~o+V6O$R#*U#vAA*YW@sc(Y zPPJM7l&?7AM5H){l0n0m7wy-;6ri%0>(@t=P({!Y%v6$>XOb6k?`&%R)mvnNa$>8( zut6v;=jrV_3=wp7s;M`%H_`if@$v6O-FH%I`T+hvExL9BwIR_=ziAM~F{S-MjDcrN z-&=KkUQ}**e~eQ+fpuM-(|h3x02mqYy|;)snflZ3V(l)=rwkfA>2d=ADCoMpd<78N z-|^6r{U<+j#lTJ-3LpRsivjentZ##*)`UGhw_#0Ew1ihwX27iSQ2`uev%XVU9bBkD zoZPA10d!+@72NlKUfYd{1ej%qLR-ghCz)MW;q$XxtGWIdq^q%A+oKQ4_gA65ADj2( zc8Q)AJeoTpOxq62I=Mv%^1LZCI@OzPy!?K0HJjEN?4zmLw%@TtQbGJ*S8dbBLVq5g z-yy7sEFk^_LD5wdMx30}eVLw7H1&Z>VY(_Nj+}@49q&V>B(#B1!F^p$`q}$-&rzB` zjmPN71TzPJ!E9h%K-w;>KVfmFnj=e0dWPBP#8>03QIMtS7bU3}V%H#JZ!DBQW?1gl z2>fItIk5SO$SCP*KA?#%G)QpYH=iki)7n%EZZ9C__`8#S0fZQjH3gk1>Q|}kU)~j~ z<8j08{hU6(Viuocqa|g?RS|~+-z`j+W2`2pX^ogA$=4znSKGfjroj>a$ zT}6`2$z6``lKk36h;RsGLisD(`E;E7=E6OL zD#|x#_x|^^mEOJle^2unj9Xy+{yPqWWHT;vbj3Cwgo+av6co5?1q|dv-?Q6UsA!g% zm6r$@2v3f#o7P&km^4qoSbugF#W2qKK8bs486SX;BGKJFbQ9^oywg~>ZO}Z@N$fS= zN=+TH!sxbHR3?1zxV!7qO8;5XB7CQZHF)}`RI5OBTX*}(jfrbl7>O9*5U#QvGFY4t zmaWXtf}xMUz1%avwfrbKfX#M6#v8xB>kU>esvBD@eGo zYma~o+m`%C@xzGg`figQHN*5Ozc!s)X%L; zZ@89vi8M8BGE+sg#mg>a%X;^rqif3&F;8s<-}>jeGzBO|h{#!^UI==+l52iU9EMHN za6KJH&7$FPBlV}7)lvrP<@%LkLY@K?T;XIiZc@uB3%=CsCZV`6SeG)(zoxK2jiZ<1 zUf-e%Df9YK7IC_JE0O!9yOSj*t)+PmpdA>^jBF(3UqaPPW}6Y^;-k#wU%mJ#s#=J| z&L5wP^Or8#Px>-lIm5O<8+-h0Fm|b``g#wgEk_ADjgDL4M(Ync1NV~+`P|vpk6Tsg z%dQelwIm#7y4#~J@?LhrUk$8nRqcJZtL?5!`O?>vVB|%bAL7U?F^^vl zLHC_o6amAJKeqsNvgf)p6}`qWEvBHr1~JuuM0OWO85Cl+r(acjtk36yY8)(|+gYgYZ&~HerIjzW@H+T)1 zh~XIyAI>sCiB2>Xf|4RKb&DHL62Ff~_*W zoBc8RF`D6z*Z$0MrP1vp{)eB$1zbi{!G*tSQqfR4tq3m_alt!_%`U1d?L5~0**Cep zYRKL+-`Q7PD-z>ZtZ39ss?f^@EM@R2fM+2K_HBc2(%&863*X_AgaO3rtg}sZt5s_J zh886y2FVHu(CdQqlV%eqKS&P%R~!?s`{f-gdE9|r{`y$m?{c5EQ9U)})H|;x;UY6zM7fRa zbShQzC_Ss+Fhh7A2@vT|^{U%`-8%tC(oCt56BQrS2i?|ZVcVtqAg;sPCppwu5hTKG z(bwQm!9HH6%}~tekB;guCc{p(`|55?dv)^&tZ>TMLmbSKh7eQblVmAW&+}Ie_$g5B zj`w#|@ISsEA3>`KR%ce&8U=#{sP@RQmN65(>wjBO77LPbJ`d;#I-n*x9zeyF=TDd6 zt46O9(X;kQ>cvOUe+?}npsrbr2b0qmySbV`Te2B3oi<_v@sqYZh17p@dhOL{DX+aa z)?nAI&6kD=vB2Or0lTBscP_E0#Juv}>RR)OrHlmly%HR0uFG7V^bm$uebYvrB|z<; zaEpfXR8d(AJq_AmlI?T+`K%v{uwjj;>tb=5=^)#lqKVRg&DMg%{&ROEV@0BVKk7ml zCws(tft@9$+G4Ct*_(Oa_Its&@+!u6T9lxpqJ*V>>kV>e$=mnh$KL{VyZpxErQTC2 zL}`=jvvMyNYbn=%OYp=WMw@4%ToSO3+m+T~*#0Mt6QJEn)uYcpCfz++1{iQG(KCX^ z^Pst-)(5|hIJ9g!&;2NCCN7%EBIn&2&7v5~w!@?;0KFlFncKbyDQKC50{yt6aHe|N zlK~IX<9JP@Ex>jpQ#99Mve#~gjox@<2bXq&zJb6Szfvi#`pCl3DOm>wN968o9xC3>)UJ>@j28 z^rI7zF{!2bq%bGOl;Mqd#Rr|ynnAZh%*F8{aL;&lsm4P5A;hm{;Ypa+hEsKW=*y9) zdmU2@)nq^3+G&tBFLy(>DYn;sUK!qo;6jLL@*SmtA6k2nwBCtR(+D_r-ywpp12B%EiJUH@LCDp&ldb#IB50q zeaapoPF6azp8qTze}cJ4n^9cM4H%7q5CyjU$dOh$o0}(vNjVK$^6^q=lTgeT@wwkP zl>RQ|Z}wZa&Fy?oXv6>1pLMi@WUub7S^}(ib0f%edfK2BqO-I+SJpo1e{p2N?bfd# z^55$!>a^R8^5rsAR8XO9IFMSIx#k_IBG;d_R6sIwk@MD-IyKAqID!sEi_LY+EH-R9 z#g{4l;XZn6tR=pJtWICH7)Py0)l0E~Z5MxBq@~BHuSMX_)-|=%vHO z9@EO>XZP=tz#z2J?^`nNO&9t?Cq@F3(qT*@$USMh>_aV?%}2LY^SwIa@fi}mm50$E zf&E+ToebG<c;IyWW=(9L0{LH!v^59D?X)64W&w)?Kx}Snhei)xX#N1HiY08vO zVA8{**y@he?54QJ#n~oY>gnQn8Tpw`0}Zld@O@ji(5B}-#YJzQ(+-7fiM=5{dhk6M zY|t^7jZ)K5cWLfzjAea3J{tM$btQcL;Y9qqcwll1{P^~nW6T!57(-C;Pw7dx)v(Gi z_QD(oIR~7{^_Yo9hwG2mqVu&|-=h{o(U4$pY(qeA!j?{s-EsRM`F|MrGB&zg9;Mmr zG-l3T5I?x-UhEEndO>|1nloE6T0-OcIRny7#`i6KD31(ms(5{>*XsVu6Aav~t#Vq1 zn=DaC)B5momJ;kQHN1zk-qxh*5mhSyth4u=Mq)>;InP(Pn$cy@<$7oy-!R6<<3!i= z*q@&$mqz5j^gxtSIn+Ix%JeAK;|5n)@?UyIq`+)dV`Ed^;|2#gJ4Y?DoIY5iC2z~3 zt6cUv!(9)Nw$}c5W7PM6+vHrA1P;WWoX;|o%#%0Lk-fkHrcj)R$+NF0ul$7J?@d7L zU;h)D`1QJvn{35pr4^|M=&v&aWeL9$f4@-6BH3%vY68 z<`Y{mB1pi%E*j)yr{S3KF{>vzT=U!Uns_X=gmCNlXIaK(IXLP8 zlK!2x?~O^$FCfOM`)o4u80Pg=hg%Hi>770LQI^F*n|x0_nPvvtVMF`-7W+$~GthUZSPMvH|K{84eUcH?AE?#0}7EK8`X93D{)lx>E{U&#dzC1LY?2Eq&M>M&p3P#SoipKrJW%b); zkE(n&-m$I}WpawVgD?rovc2%0E}3+c%faCxx>c$+0?eec4xQrPA^=P=gH3K;6(a*hi*t zes0lao4W(tLgB3oUi*tYsSCo21D|aYGbL{lILA!rgQ!Qoi%+)ZJx)g9#a#1LP5nVF z6^~ZB%$Jk_($!TaiCnhy%r_8WdXtrZ)No6C~3J+R{uSHpogMu3s(6^VUtHV%kDlJ$i7-p{RPqeVI*o$bxr?@UW|&yrG


~&v*X5mXgJ90lN=Cze8 zBQC)pUS!9}=v^Y1%d>_y>|I%~3aYWIEieZ7z>GVzp$WO8p@=p>Dm8+bnf^+}sn^k8axisXay9YX?5*%x*`9Pjx&o{VZBfPd)lH}rrUaO>w#-vOe zOtLP?oi~shz2SD1z}d3}J4pw0!`^IL8dMFwCO_AP0nQ7*NnAW59&hN1Kl+m+?~x&k zBW5oD@E?UMk#KWk2cy&koBG{+^ZOot4z#9^I=^^VTWChUd$IHB zVOYjvxyw@#Xh*kLpo{0AP!0nLL=lac*wS)}%^4z4ay=u&R0Ah8#{Enw){3G<2Ol2% z&Kuki0>A+C{W{nMYq=RO`A@;z=e-dtnBIL`3c&9X^5^zV?ykJx0M zRefM+Ay_!GM_=rj4y0kdf;(V~0h*&+Oag}h3Wgdh&PE%gq7VW^?&opRsJ`zbM4iCq zEMlp$dD&H8>}R*BpM1xkzyG2AY`R`&Ry+-UWW5)zyfTp_Q8q}<^TwNH3)EY@_MOTt z*QJUrNM*qd-g+csWaWP&V0`T{^sW5p6CI|DNO@aLAxT=v& zJK-gv8figdo>TZ)zY%2FAZ5a&iI|18&cRJ?Nq~HZHmAN7mL8ZzfBoh1W8XJ8S`@!! zvm;=>U_kgdWO*V>u>9|gp>kB7c;oMt>19C~y`M_Al5Mn<9?fHwRzzs#+6CV23j{_VPnYd*T}Cw?2`Ye zQXY}hRWBOZ>oW+vF{RZCpC!UdRYA73WKM_$s(rRZTioBr>-kcs$xW8X+0RgazB#ut zMIbyq z)$4Q7OrQLF@dq&{!X$-dhSo#>UVa3LixVaGEHIR9VrgOUgB5VIUZFL z)2*|5PU5DO3m4BpC?O~KUQo($QYXOQRrDnF9g|bhO8fc=+;-V`w$?++@njvbBGks% z3%nEM`%F^as2dpyhK@QDtEA1NZzDa{pzZBmgA&63SwH6OvJQcwe|#Uv3;wS`b?#&% z>SBqaTfqc=(JxWK5+Vjv8g0wq1DQQg;r#vVV5K(*&t7^rqrzW~njNTGRh{7R?_Y11 zKyhOU!^P%&>ZD=!nxDL>6$-2N_=`8@pP))DmU(CY?Vf9|;nn|(4leOa^Kuj4m5=={ zN0Osh=0Ie%f({~rP9T5~QL!%TeNw|$>&@gLucr2MTc^dH^ZB2Jvj2G_ymhB^E#EG^ z9)MGNtH~r5eR!QvM^inVbCz8hQ=N>JXUEJ#5kH&-SQ0 zKtIU4_2FZ++pQ!6iFHR(##@Ab96mHzRx8TTIu?LAD;}9aO$qx zY)IG)Q|+c_j$v=6HRHz25`!F#wEhv_UUR#Ps@kB zm23likYUY=>pVy6iJ0H|sM1sOsgDwrZ=@<2@Je>Y0~*j!(qARnN9X|4?jdsA>xfiw zI^!or)xd_U5yF_@&27opH-VRCbZd`VsW8vYRlOyTj)$90te zMVadP|qs?d8)k@sJAX826}+A{b)Cls|F{aD}xHSDx-ski~WHZoU>SOLC2~I@eYX z{Dr;XxzFvfGS^WRs)HA0m{>|l*9E>71@|kgf4(hVLAvJTjy2bQe4BM~FqLCZF<8Yh zwL;N-3?^wUv!;_e&GKD;sp>3OeJrx>8A_9RjFN_yhHNl#vu$UQXUI4+6K;_Sg~Fue zkW_6*u0dN7go@Peg;m|n*kVy0r?&sb@w>L^X`jpx$Utryq}@PUcD?#>DCn_JAT$)w zaiSOo!8sZ1<8FJj^^F*i+-C!65(D?dNEoPDh;BaJ`tOMKtM9WYi__5s)>71nCa6@O z&eR=5Lo*Pn_>LUn9V6n~Qq-c*M z%iXmqrcCSiH1)a8d8f4=+X*WDRz}GbuC$L_;yzsEO5|Vq+$GL@W|r}I=4t&m&~a4t zJ3BU>-dDEX^uGjF$rk}WZn8&)+``+OtPFQ-u?j8&?^-6K)53#{&efySGEE4XS)26F z^mL<2%{EZnJK)FXcQ_Bt7k8WYFwk7T%7;p0g>AlXD#+SttM71*;RJwG>_3*_yGSik zB3XMaZY=y;>$QkQJHmz>RKUHJ$EUxR?9*2CkA`w$dW#PrQwmz`J`-MD!Ttfw6-%$| z(Mv&Kv)^vwrV_3R{39RF?CzZQvR`8#W|ILS#x=9>C}+wxT&MUCcJg5;x;Kdy9^3OI zVa~2M%nJqrY9LzA>CP8>F8dw)~#2Rk|ag+9if)K8W=ZzqWSNq$*kXhl!?EOK}Kqr z)hd45Rp}ZN>rUKO7>|R|WxGZRi32$-?}^yIQA!Y|T?6q~?EKqC@7WZ-eQ0ZNSRbZh zuL2kZra_`nq}>vs5}qcW=k8(tMZm$+y4L&i?btB~XOR5eaAzxU^>;67nS%bF(&Epc z7WwC_=-OMMy-~y5+pU2OJ9MOprihV&)2azIP6X4>9~B9PQKXy6GMUJaS-sGrL0 z^LkviW>;zw^yzf=T={?d?NCyXJc@fojI-3(3Fp-+ z{Z<^hg&0pc5Clg8v782K62qN){O1SDuUt2)J|uyg^LlO5D70xFHxlJcAg>mV`6_dA z3GoJ*9`M22k*XUp4CW=@8PPwr%qTzqPP~P^jaE@2xG4+b#nisGaH%r%dMbV=1EW9W1_@b-y;=7sLo}H5OU4|M64$vk(1wrBvRsneMhIiT&E4^S`M6n}X z`CDB3J3n)uMzsobEwm(_=qAHYYEi`3QCl)1cY)gE|x# z+7-mqdG*S!J6hQPrOFitx^MUM-BO$jf{^(VBWbX}sA6s*MLzsA`xjly`(OIe9{?6C z9cg|+$*JnKrsR9DpkjU{<>vP(;E~WVxNUwR)E*o899`Ba>iO$EXDk#qv8}zpyYdk^ ztL%7HU3K~}gm}A%s5y-6;|6+s?mWxP%QhYta3?u4t=82>08AaMqX-c5yCL_`waJx6&~naq$q z8NC;!LiZGUHSWJyClTs?+^>UKj)kx|zJU+HAzTAhnK@<|8t!4Af7A6)k16nb6rVuB zbs?IIEP94Qvao}qDS2SsuydQsq2c3FMfp`MOhgP9s9c#a-He>28`a*b71fgTFgYPt zI3?sS^LH=8u@vUT!XzUHeab)3BMC|&#&_0sY(@0?Pfbj? z?VwddsC#}Z0Jb^y!mMp}3*&WS-K%~ELh2v`PUk1q+4<6|CjIC=38(Bna3;=RSuH-f z)k`Bpl9Z(?pc%LI78udH@Z&ak#d?x)R=)Q!S!YGChYIEV@yEc-=~j?0V&_T-1dOS- zsj*jE>3#o%h`QHrBQeL)mhwSf`TcQ>1U|Llh5aW31$_4>tv%W!FQ-wI2MTTR6XR8s zSI{2phxJ{JY#UA#>CK(66a7h9Wv%cXF0Te)W=GKnRu|bZGc3PAz}JI34&hWBw+JXX zX!*$_$G?bCGZtB|nt#=im(G{>EsvqLy<@L^9(cFY1?+JF4IYCo=`g$Aeqd*;*2_3V zX}&3pFfe7;8Gq{yqUtO;;ezJQYEhr1E{F61xc>dZ#D#5dTV)r5B9q+GzT@XN7fiod z8{Ym1!5LdSH~B9P0j=wZ`!;SuURQoIRbv5d(|71xNF&Z|e${u41?KjU&lh*6>d!8b z^(=C2=rINeP>nn3+tL9?J4VlB=_;fRyf@8hX_zW)(0b@N-Cv=PY9)vn8s(P?1du1+ zuA*^)AmbVyD?R7lNEMHst=@Hg?4STy_TIlX>X)Nbgb&Kr3v(5Q{8h;O%HT*CxbUk* zn~OiqPNlcaM&)ClJ=328#Mz`A6}}8lOY$T=YZO#xg`Z_LQI)=G^jdU_sy!e(S!z$G z{2TgDC1*$$l_&1nZaHozABfb)`tWhAdfZNCL6gQv{B!evzNtWuGd}TakTj1Nhr5b@ymMx9C zF8?boeK|BX8U{u=!f2yjr%hV_Q{0?KYQz1w%xo z<3xH8HRcD*sK)yD=D$htr#}Nl(Z*Ufe9x2d(#uXxU5|-&r~2Xl``S1xm`)OSfPRsB z+UjEn)b{xxljbHr-z#i+LSca#(pZYls5!OveA?$%o1<$!*5Gl(W?ebxFH@ zfyqWzntt%G+S~MOpYk3EzKN0Ac<&pP#|(~H>$FvB42_^!pLXOiS#5Qw za$u^pxjs`s-%s5Ac@gHqz$CIhhp<%3mklN2l2JfLG!NmWw_20!w&~^@lcgS;fHq^S zXXC&Bvcl?>m6#3+ROd?+(ltae%1@qp2QOZ263P5Bx1wva;=LXwgpOLOn%q#pHW%tAu9ob*0KsG{SUp(tSgM;ua$6Ex=wZ=G%4_h! zVAf85>cXOx2y*nAW=ei6LX^wE^jXe4A#)HN%Ny@^d(*ltoK4^Q%nH0qOW2BQtQ_FI z+53F1GLM1^_DPT;n&kASL)o7buF`|iU@|bNlX^{_JPV_asNkxMIel&Psq{uF{@8Z6~*qPITTV{=ovsC8Rtd5A=x$51Fplx=jYH z$D{mOxWWPF>owL9p(bpPb*f-8KM0Twy3G?i&U81Z6sJ?t7sWN9*ugv#zBpi*_7KUh zj7F8KpB0-!*SBo9pT*ZRB-U=;h#Y88W zQ6j-V)^n{hZf3QM)ZKTKQKu45P41Pd?@9xmk!Aj2rBQ;F=y>5=shd6?+H><;IaEcX z2ih9Mf1dsQ1$!%NHOXOYwux%2P7~M! zpt@hdgV07D_kQ<6D376N_fBc`<1A9_?#abgR!PaPnDU0-nRRhwJorRh+$9UV1z9hw zdiUKtZ4IURtR=d9;k}M*rLz2J3}?@&F^?>buv_eHt%~*cttsxA+i%j@dkxDEKWdZt zpZF25+?W-Ik6w=XPlXk*Fv6E`1b;umEiz}z(|zfZO3Zl=q=tdF{vov1k3*AOCpTKB zZ+pX9?g_-Q%hGgo3qiI`&b|AUMG;nA!A}sA9WWD9N{lOw{=@N$9!-2O&Y}!^hd?5$L?E#(lWCkiq8v(5^`!SPjRy%q)*K5J<)Ta>%F{RKK3kvluzvz*u+70o zQ>7?&bYh;P!U&oIqk4S{XqLsrq_l301tYEwrcQj=-z_nXsy^3zb2T@o=8U4MDx-utDn1p>#0=G(*z!D0xN!+Bj!viV!Y6r z(KyPE+^nbvk%9-WKM3laSH94=WzoC5O%l%_aCZGwhc+s_%O7PPJwC|GzIA30QVM~i+3_UqFIg&+UYlz zUtJSTnHGdxOULwrk!RUZ^Iq3PlTN{gn&X+$j)M;>JfaMR+hP3vcUZ8T5{;8QrD6cJ zFLsXl#qkr|gx2rsRq4LS7|c+_1e(Bn`pQN=377&9Xy^?Ef;ln_)cr7g@4~QqgxzL< zpabPTnR+&yX3K?lJZ(w*?&O7hE1^yt`2I=ttJHvwJzmQUacV)Gg`y#P!9SPbpUddZ z%m+gK9^nwB2^^~WkE~i=8`LEuCRpNsV8PQ-PJ5rp;S+f@4x|HkV@{RI4(v}`HZ}+4 ze%!F=4R}j}*<8Ii<%GX9ax(E30pCA0j86(12U+Fu!}BHF~(M8gtn6QYa}efb}-t^~~&(d_~=`X$ze&gSzg;y8}O3 z6C-c>+C#@0^ZHYLP|RDhW8b&F1n1uf6E_GZit7&u`nfzFU_ovRxL@($tLr!DlwVn=r6Wrww;A(`~ zay>Ur*26TFU&IV02c+v$0eWFt&dwMV+X|{P1GaYSca} z^@5_tT#m&he2|1powp&&m=YE8caR0UVhh3cYB)MQYxoC96#6m5$Wh&GUuLuFxb$7^ zckiaQ@ClXD= zGzBOf3E)GeMc&92QA7Y?E#;?H*E%n)6Ig+ARLlL4f;mWls~PRAyHC4>oj5`ndV+uf zA8F*h%&Z=I+e2@+G&YX7pKOQUhZXZDBpTlUP#w8h|7w4(7Hsg_h3>}^#{S?#_7(z- zes+p6FS9}O4<&s_{`EnAkfkSmx)Ylp3P6onBAy)>oNr!r+_OIZoAz;;y|`hjgf9q| z+L6#4d#?93m@083o{a?+7sfR}EAqR2<&=0nshwf?QJ_9x@( z_~hQE1l$VBXW6y(tfifCY(Z`V4R7cOI6FpRq5W_YnU*c}J#78S=Y z*j`$}!0v7vGV-g---?K62HS0y>Su^5S7fibkyB>ib#xGz!TK$FVEcm}8|^pPr{NGfz6?pSt;88;%^lCk5$Ap#crmosv3@ft z63`h0S~`%B_ohu*C309xupc|M&dhwVmiaDtLvWyLpM^{4KmsI9cuXagWt}kNF zhx@lf#)dfiz+CxN`;QFIaDlRELWQ@xQAuj(If4ZfF9S+cHA}G=w!3&0-)0!1i?Mq) z{h{{Y)yr={0Z3HJbB~yPM7))f`mx-wp;P)`@`w;2Xq)>*MTT^tD3^O_jt?e^5SvVy zww@?c{5AITQ&})(CeZ|Z+J4}7o$^`AzOYbif9iiYzQ4Tc|L=X%q5ryo;E1%k0Y8a$mTM2-G@7Q6 z1>UQ`|H(U*fBu~7*6EBp*gm0tOm=hxd!e^YVavBM%# zf17s%;Cu~g#WJ#!#nyCfdEOafosG~Qk_RZ&(gK&?`+mX*v}NFtowQ%;ocI7;>Fbfz z<6WKGoaf-9aS@O4cNu)w{~dxIH2Es(NY>U}dl`fo4#0?)gT0dxs{r?T_B@{iEs{GenWc zSvxmb_j*@1uzl}X%}*%}alab8;er!hrF!}sA-@6&XgK?l=QJf^{GU_U{p=xZ?CzwA z7v5f_ge6A8N!#{=qBJ|5K$*h-!@+>I~+3DPq7;`kwfTiLmV5N0aeIj zC5ADw*t+nXkWS8~QOeTEKE#OzXtXH@-rjbRWa(}0#`w5_gIPLPaVs0SdGjJ(+jNDn zsnEf=m>KGZcC)Hz|WwP8p9`B<3YYzfOUn zJ0A6ZOzzhAw+dnC^lWm_P5GoBQrQ%R2?nn%maXfW2ZM)0gy2>@>DoY{O!3D!hrhyT zndLjxI@+{z%fzpp{t72N7tjqJ1c$}v5Qo=q41n8)zJbY_d@17=2_3hM_Lutr2LO7+ ze{hi<5V_^Dz@NOK;j#z=!_NJqBGRYdA1D)}OL~>+_!N=U%>z}d?mOS9i0&wC$MeR?TZ&l!78&2ND&h@7>T0-&*L4wMc?rk zznir4`7$c|5&z0$71c;R&|cH2gM(^oblO@qWvQ@vBF2%(={=9t&!+}9m{$C%N~CXE zx%7Gy|;H=7cD4=w=VtO9Q-~UeBtT zJruFM;7_BAT>iuF-IFH+Cq93;F}~oL`%g+T7)WMHzVtepeWZ}2U`R7q;xdui2NT0< z_zH|Xj_SbWjI92t{M4p<^0|`*K^;}UlpXaZCHla^D<$MH;_uN_szO{E>OGHf4Oh-z zwP(CdvJ3-dKN1;+;GD0-dO2?YJge0DTr@VyyKjw*LsK9_lw}`cz&&P}AIKR~cbppl zLrAf4tz@7LY5g$BiiUUQyX4J8e_XEW`H< zqpW_4(WjTjnlTYQ(Gvck^O*C$R|>PA6ai1jcG5^g(@SOmVLbzyr1fj;L!22YHBua7 z+Py=#=d9&epf{Bq0mL_817#L19#ObzY!fg(_%2{tpTH~>+|%(8zdlvqze0X!FH!0gEp{AYzNP<@vZ#*G4u59;ot&#UhK8EdgJeEC5M;( zq*9~g*GC_z9wh(L85!b63k; z3}Mx^h`}UP&u{Q`{FL&F(qEvOWx9RSjYH2i(S8)4=BH2le|sCZvr~o>qhqt{atf}$ zI7s$#Eanv{vFE(EsWCr6zX%5&FzAuF!L9?#duyF*^t1FCd<}SGs&}(`(J9~aD%t&a zprlH0`ZhQPz{spy+_ZsDSFl{a=na`n34&&;DG436x?_WcJ4iuSPQ~nhVxdhU6{`|0 z5B4kp3@iO1lPzNRmm{kUQ&hJ7o(*`y=t!6;tg}BS?9T$GACJrXrC)s`{=ze75eMPv zA5zvBOoYGoxS|nH`15DPMmhJ&mTJ?P20(n4E5{9F=ie~``pN2GGw;02DqL*Yq4wW_ z(kv3UkFCq+yV+IRx@j=dp+@jP`u*wY6~M3~G#fwLkaPX4pDrWz$A{V2_r>2tKdbu? zq5!Qsjj{Jm4)$dC>&b4*waQA9rUcdCH0w*~DZrU3q-s!~TAM!l-Fx-FREq6l;$Za~ zh=9HyIxbMlBPiw7@wd?54n1<(O@XTLSFTb*n zVS_7B0^r02-Ew^gUM(r4j#H(0{}Xi|l>0a6>K30wl7f5P{-}S&8_j=k0=Hpx>wxPS z)jXEpn}lY#p@p1nA1Bg1E@p@7DOahlz&@yvn3acuh^#oWdMw~8ujnaCon0hqiG-_E zDq=MqUi~I(X3mOU4U<4IWg!Kh|4kDv1mx{~HO?~l3;M|w`d$Q|eqrq1XIt>~ZD!tI z<;0&U*87KOM17rO{axduA(>A$Ul-rAV&-;Q zf>UiyV~EMF7pO7{%C8Ql<5YvQB3YkQOdF29K?0VskA8^kk7Pr`@uxlEl3YgPQ{V-f z2{7#0tI9~M7d_b@{m?kKM=T$leI>Z_?`<%MI6uC}y%a2B5on)Gz9qdyi~=IFWjjPt zDr)|cMzaB1g$_e<_w{*GE>R9l9h_2R&V}gp;9ETa+p28S+y_^6Qv%GDKL4`LyRt5V z&%N2kRnPF`T?~4Oxi9VE+lkiti&#`>>0JZU2JG;V$uo@d&U93cmTOMY?YN0-UxjUj!6YHf&k7sgtdh7;fAgFa8$QSnzwJpELGzexslA zoeGLUf;~nTD*sS2wtL^Ju-|I8@u6)0+l0WMQZ(eBX_YLjm@-v?CYDK>DXHvx2~<}E zNo{%e)HPGW@uF;=!77xgC2#`M3HfdZe9X(;QR|IQhtbr0Jb6hvtm@DC&A-1(0w>Z> zn6F8=zsCg^{f;@&lLkiZ6AEb#mzY;G)cYtf*MhLlt4Z9f zL6E3%H8V@rHFy&T-xj&4@*EZ;hWsk-l1Sl_k3kV{4rpYxV`@e~4O>D($1qH5mq;GR z0U#R`!Eggv_ZD|gD3Nx5$Zn{}@P`O+iML1vhzg!wqp3DI>*|;kv=a#RPTrTD-&*Bq z?Rj>iA7u@1kuJ`*-nDc81ZV|HhP>#Xsl@^$Q=SKJ*&=D*7cjFOH$mrVi0J=DSW6i_ zPnG-4sD+e_H^?QF52)CQyb|&<;L`EKOaVyv6P8a;I{rI6C~l_KEJ|>|If0HEx&+(E z?jY>FV#3cSMk{Qcjba#!ZYKD|)|q zLEmy%0$?WVpM<=kV%Oc_uzW8eL0<(M8v5)HJrGRTv2OSr{0<30&T7=?*-bEj?yX{C zDp#}rLXHerm63&0hn{^W+nt^zafd*kFJa8wBGBkW(96Npql%+=Q#YAA%M>T?wYXCC z!R_3_A*gD*BE6vURoQP}$;tBMO|V8?jw~YbY8Op$&48*yQv$j9phhENuj0gG?HTCh zvJ8}XPNLWe14=4LPW(~SO#5}K>S+JP^C|!ra80_lNg+lm*gBP3*rGhMPFsD%cl2~l zgJol$1w0Ex>cvsT=l?MgKB~+qZ_5aAyuZ4?R2kzCuAS!65{?J{fqmG1-bS-Dt?WNS|0bm(F;t@7?Qn6mC_$v5YzE9}jZ0yenNvxPqI{Lzs?Y3-I zzAC&cpA!o~q>uf?fD(q6XZA30&C_x}SI%mDN0I@=rY7lvhv z&^fN%*{0X5C%}?!-mfvqFLaN-F=$%4zfOlgNM(w~Bd#U87Xsf+TfyVVpE#?l+N zzugm%64+Kq27XW^+7bFG^yavJ^EBa7BdPvrS~cbfZ_^yxl((nIH6)85Yzpj%J8(M%si#g@gT z(C1svR&#`U8y3>(;zfsBx%rPPLiCBG?9r54Q-D;;NIS8!cht z&78LnO#2EqkgcT}!_?;0wMP(AXr0}6N_7^<#toU>TNq_3K)7FYxl}2~i}IKMA@ECy z(WY-qb1Xet$YiA&0_nF!noa7EpTMZtW4A4i^rLFgz*bc1S;t?9 z_R<;!VOgrpwSYdQfaR6?w33y29ZaoXMnm63{XYv}HYCJ+Ob;dk2#4n^O-dy{Ts7V5 z>;nBl;D8|;@4(B8xVHE+LBFzGYkkTKO~wu%72%cBQ)j}TgV5!e;tr52Q>tx3Cpgd(XUM+oGb5Wrt@hPHJE)?EUrGcg+;@kIgegu<7( zkAafCqTn-CN$~EH$6BR=?adQpPrm}&-uJuI zey0k#Ly z&AH~FaTUuUzaJP7?XpxpO3coHm{mI zL=s2dP3#H8fy@yem+_P z?+$kBK*16sneWfk?O^Vht0|eKQXAMm$=Ok_K3>`t91tMxI{)fB8o{aR}Npou(YW^ZdnjaVy} zs>N9s2uj63_&Us&vnC8^m42o$s{gs`_Eg?c?-y$qP%O4mjjG!(6AHX;H4f}GCyjSe&)i2Un6LQw1UaS_q%z^9N5r<`oFy&nta3+; z-MS9c_#;x;Ja=^tuKdnt#eV`RPF^5?-8zUfrxPX*=(%DP`&FBl zZ65u>{naLVLOG;JSL{rNld5Up%zJOO3S_`97#fqlZsR{h=GX%Xn&UE&%LvFe4}Qt^ z$?YR$0-ewYCr!S&!s;mN*< zy`3+%_T2@vs2I`enwTMsc*G>y6B7w!9cavspZ{1H!_tYfU;sA`83c}9U-S)mQOPC;!vSAE3HaO9V%ePu~4P5 zA?9y5y@r0X(iE59-!~32eUVoMxx zG2~;xkbxU=mze@uDjp-Jyy>^Us2(PXFZ5<&S`)xNqN&Og4Ze@|e55em-;t$nEazga z-zXQ(Pq>2T_T6j5`@ql2t9o4oo^-cp-uyaDY9BQDSs_S5_24_!HKrywH=km*_tB#R`d;DY{REMY za-04&usy_WZ3&sf9E}GMkZ9TJqk5l@cHuQsZN_Y!Km_k5 zkM^!b&0N+FjuxDN;uOXQeLo^S(7s|DR(t5ulyC>`>FNFA-?k%_*zT!gj5iQ1I;Bjg zWwsfFkP{ejB}&2v#SKfF+zcNpjmmzVeCl`o-!N8*l<$ppTFKzhagtgu@OMrpm*An` z5Bqu2eS=_o5CjeJ>{B)eDf|+k&nlV!xQTCBKLmd&(PR8*;AmxioczFIIO-0=CZY*a ztMmO4WX1Pu{ECoE1zgy&*mPU19DGM%gL}a>lOCLgQsB~mP6%JBvW!nt6@;=5W|^lF zAT5Pg#1W9IjE%|k$*g-*7~Ju$?S z_u`M2pHknOL16O0s!y@{11Z$J&Gu_nw2-OBN`>->$lCzc4bs(VMw?j0rAg`1>Y}!@ zYW?b;0|TAyZf(Oyk5oFHQ?gz`)WYaePd^}$%B8^gmGlh ztIcOM)NxYo&#lmhl~SJy^^^FxpIV9AR^EF*2z!3|{A5dr{~CuCTz8AOOV}oTC$XWo znfAl&t0KOxkBpwRZg@{$%1Z`D8*W8ucWn}M@1g>B`2D}l%4p~JRjtvuXtPk>>(iOa z)SIYd({}r>M1~lf=cmc&kOq0$*B~R$jZVD@;^mKe)sdy?|4~5eYU)1k@Eq)jM}LCyi{OGs|~bKHCqa&l+sP zY-%GTGTZCmJHKV`ilq8}iuUlkX2wEVLRraig22_^Q=zT>`myOvQ&f zLL;EyfLjs^rzzR?#enr~kB;V;UJ7%nORfuJ%K_Nit@lm#<$BT)ulf3WfD`s%7whSz=^wlK$h%F>1ZYo+|J7 z06z6r?WpE4DT4tIclWj9l zC{K))ZRr!=yf);d0DRvC|ANO@F@n`lWM{b3&|@y8j}L4}m=JO&BPXn3J@L1CBb?y@ zU`N728goMX+rNi%jTn^S9I&bhr2a-eHG z$P?T>^kU;T$(y~FbK}L`Jo`#|nU$N0+TVQ$SwJThm?Y$)n3yjF_BAU1z)HDAUL9o(Dh`dt5zm)ZUb1k7n5St@`)IINJq$N1=_uMUUQo1U~$ia`Zl%tQ-V+E$8}ljD^%RB z(A`0XwSE};-JE6N2k$;2?ms-y`4uLTf`x-&4J(z|zy*EcslU ztNeXdXJvP}jb1tuE=|1*j=O_Nii9S!pF$HHkI6DC7(w%C@&*2erH=ltyS-b9gZsbF z#)Llg?z7=cm@ZU0kwEz*QS8i>773bLLdG?m>Q2N@b`{+J9X}QybE4w`$9`Bl9>-Vi zu(X3n1kga{qVycWYOLN! zs`Kb~G@uVtvR75EBl}*Tz;jxXD?i;oDU^UcjkWSi2EpBcmhi~ipZL}bHF|}Oo56u~ z2^{C55(ci;&k(z*cgxBr^|mTay9A)39~#--Mw(V*+*;S-mBn5|)@mObHN=32dPIP4 zej9{6fa$mV059lHa*5rK7N8S~O*Cn8Prqge-Fq2e zBfA{u*>uE(TwSM3#zA3d{q2*jDVlvtg9Q?Yw3Em+CXa!JA@^-Zv7c!Og}`F6wY=}a zsI{i<{${rokwJ)yKV}B&@!HNi(EO4StWOfGnD&^b-uNYFG(Pdt_`8W0Z_4lpg?x{y z=vg#60$|-8uiiS>K%R`Zes#O!37bh9vVfv-cP{yj1yp$8lK%0%x<4F74L@6S=84-; z_Iz;?YP;NX!j+ALGA7PwKUw*X;bk{5GD;T`{_aZYsF8La;8z1v?;m9G8^;iaXI3m|Mw#f%TEi9%^F=Ys zuDSX0@#8Jst~T?(HZS`&<(pP^csqPJ{dybLlyE#(E$J=M`)gp6B3CNSE=VyjAGR;&E6DC+>2 zT65n?NxM9%9&h%x6C04(t~c|D7^*syU)Kpjv@in^=m#mgS-kYWw}h@G`O~XQNE*~r z%B?KvDr%(*v%y@k@8Uq=r?nP#U20tyH+NvcHn>;8&-YA}m&fCyOEr~hp0uU!_9Tltw5I2bX`Asn!oStCC|bL}v-(LFwM9M7}6 zs8Fhz6db~NLs#L^yAV5xnP#R1aM)T|OPi5l-sQXhp7wrkPCrO)?UlJuUKA!m67+$E zR=?^85+VuqVz4mM4aAGi^OyaZuK5oZe+hxz&(@gRUFo5#DO0@kw&f}#MkyE4zzIw) zceCI3tkbra2nNmMH-2V)hV7;RxX!f%!=GbUVS;%%_lnzwT#ZQ+BYj{rJH?t~R$gIl zJSQ(o$f44+9tVlK5-OjP0>52`+G3u;>Va*a9<%<2Kt5w=1Oq2|#qVpk;;!D~gUMN7 z{mX&IY3L%=L&i9ax!CDd=pSb`tshgcz?9B$Q_4+qUsX)&@Fo}e~wJ_4#CI_dMyLjEq@tbDPj{TbLKP^NR8`@s(~+xCvTZk9MliNg>*lkel^S$ z2`|;ZNkDI^w7hASg`n;#3%~xugda`kI83swXU$#*Zr&({WR65UCxr0}U?~BZwQ#U_ zqIz|?tjQ%uoW}SwA&cy@Q1d&KOqg^eMHL_2-8D;3J)s6(Oyjq`OW)4z+-Qi?6swtP zIhtKJ{1v)&x4P<6_a}J8OIgOP)c)g^_whU5`3L(YS-17F+(s!vtmAQNN8R}!0K)Si zp^m?{=f1C%s-hqdu@_bJwcW^3I5=`5pwP+5i^qyY<5ChDo+!9-pzYM1O;=V$ zwoAHB`Z(Hk8lNZni`7-_MB_q!~Z(F`6!F%=pRQoho>uH7d2kj9{H`f$dARgtUpw%INpkkbMT3tHMpAUxtaE#FJFyCluR4Eq}yyKi;ZAX)_>H z8WmraZhvmc2a#7`75H3^oY$+ed2?TBH0;gX)ta@O6KbrK)xA6T?Y?s8BjRX(X30gv zn!4qb_2TU9MrpH)%fk=fdRMz2|GH)4buCBzU6-oF5Pd(!vRV)c71c(`0k|-p?b?#o z0TK`a8Qj%)Ks)HO|9|Ff)Rs|fC-bhaJ@&@89Ve$(DP6S=i=#1p@}6JE4WTY{D5HS& z!Q7@6Mi0L7;60mfMuovIA8x{a$VL01@0kZQU=7CjqUJD5Ik@xt%#bO7?>}-(xl5$y z6~Sw9v}f0iQFML_(<5)(ws?H_-zAhN>ik*31`}^6j;Rf9-;N9wJpt^u?B>;q6hQwx=(|v zSQZJ<*{>|3(|@~h3wl`ue#QTR&Q@aEdTzA~VLyr85j?+P$0}FedN@#h;#)9Z43~uU zxnfPyB0F8p&V^EL4v1)!E(%SV+<}w2^ou|N9&uE05ZC#C`+qnRvO5;)8Drrqae>8E zmqbRo+gIUtsopM`vsSowd$#)EbBQ#OFBv!gN7H%#Q~CdYT=tg8=2(%)s*H?N$rec& zg@Z$c?7dE8WS(SiqSRYv+2h#9D5q35$Fa9_j2vfO-}Cw5`xji}_Ikap=k;_My3g$HDmOt`zX$d+Tb(a`;svEOKQVE9q9FjzKTqNRDEO`43Zui%0o#6N(rJ z(_Nfs9Owl*y@YE4b9O0+pB#OgA%Zg_VEtYmIMh-5&w*LE718ht0CSpjuRy)A`2c)9 zD^4N`P1_X&(8{O(_R9#@?IL5QD;8Og6Mi+z{|Eq{@t^5jiY%({_R}EKC``DZ)5t(w za#{QU3iiP1bgAdTOr+_E0QWQ_?99rspS#}b!vl&=Sh}>TUgagzZL$0NUo|IgYMFP z#K52stfbQfuB=CU>@v;H-0vPeBUbq&rP#~hekr>^<^H|-o3@iOzDL=wal^%*QXgl~ zNJSajPzsz~b8njFb&X1GAE_sS14(E3KSjf8kb}}W_M{83(qXuZmG?b@L@r**IOP7t zXE3?)4{98%(V{hcGDC3^^kzS?8AXytRb(%*nlTU`EFUi!nQNPTrWqX_)oba`y#5OI z?`50S(Pw{FLH{nRCY0Jk^9^y*VJD?H-*u4#2wm{kV*m5ZBT!YcBbi4g#cw2%31Lib zKibXELiTNaEy!m7#v0XQR*bi$(u`mKOlt-)6QK3^PlP6d0H^w)K)sP3&ieSlyLX9uF*m2$*5ijDJ@e*(ix=jpXHKLB@C zb-pPb?5|{uvf`7-`y+ny`}tAs8e?L?eMV+3!BsW{bX9d(B8E%L{$ejBQ~=(daAE)0 zq7e%HJ@L5N1rJg$uj?-r5x(;R70BEUd0FS>Uw5YSRp-S{#| zG?eqO=e4m@)uwq(pF0OTaXF&_35qo9Z+k?FxEf(^Y$DP`fhx89{lP$iYwR^N@bJ!u zH+1G|&3kfbNtzX&@Or*s$FKfMfTf>Jc4PZf^Q)FP{C94z8dcoY2zcnhq})Rw$eX#8 z^3^3&}*v#?GeK7a8ThB1crp4S;ETD-jzrd>)qbM~D}*wouDK`=;1u&>?6A z_c~`Ec6D-rVJPnep^_0IVvwL3=mlj;IR)G*-a?Y8$?A~uTRR`$+}^ITT6Hk^Qny&& zRMiVajcsa_9{OS~ECBIWr9ZKBM?fb%5adVX&^b4oS&94jPI7pj-x{}}>(6b5_t)^K z0k|1E5foJZQLp`Gp}pTyyT;0kTLNuW-kuM&VY)mi)l9o#yj|4U7r_^jaj`NQ0ZsG4^3oj1%UvOJ5j|KZ-9B2FHO+!7=f z2yWOBD)QU%lJBW;?RZ?QrR0#|VNT>^Kpj0SDN@YolyN3P5v-xeZ^LSXf679MY^7(6 zI=zAlglR|jMS29++&#WuG&8HpYJs7bF-_z=y)qe;ZG39w9Y6`nJq%IS97_=2@$4<~ z$$=Jp(EXIR;LHrI(dYt2vJVT1)Rzv(ie1ssegy7nB7+Fi2)$OHw``ImsNdryMccQ% z1Rjd}%-uc*t_{v%&Pgh&v4(fJvp7t)_*dD2R-#nqc9{0gQskjj1HQ+5z9oN9ubVFs z#{oJTlW*e+sPDF<@W$l!f|w#U4@?Gt@Ja&=XZ7vp78yJ@�o2M`oh$f!gZg%V`;s z)K?LmkDd7Rj<=$izdI;id+#GgK6`GUroP3qO{5E|C@H;ykjD+pdQ$#-KwKg`&36M)ygFp}lu!oZ~5CIKam2Jt*ks1?Rii6D~jLc`8#<&?l*{ z+wYcgB1F*I)|sJ}YezrI*w9sS>PE*(j5@t|F|u%8T_UM>vo7bxYMsdxz*40`I->`- z0TiZMjqX;CBgySDB&kf(&uPx?E&=UTk8rABmw~*O`qJ#$b&qB_u&aZXny&HZ$1>^@ zpmPk_GeAdM$SW?Xj2at8eUjt}eedjSGz>#J#;-ayN~IoLQ^?Rz5z8~dM~hqSF%{g~ zNeVkq&}5F@Bm1|+BgK?61g1tIZmIjaRfFec&9*BEz^3}OWwioWIh+a4cy*B zEY-8$SpBsZh4VdCCqx#7y88SmX(mlFwyVB}yo6k5dUX}>v#I)9whzoyYD@(wE~q3E zP-Yb8&(U|Fs+zBkf^d9mH=;)JAS(hm^h@am4$6zS72%IzijpWOfcv79NMgu&W&CI&E>wNdWQ*dtN z_6O*I(EdqGu=&HiPT($P*1b$WI|KYWiJi4XqO&MVGz^BBd4GZ+|66zv$<7{F&KAS$ z@W0RG5O%!2m-f?d^Sq-1pQ4v7tg^*3pwOGm*#vY#iPHl2b1Zxp#>*+4QN5+K3q7n+A{*V7XG$;M6aE!QlmB8z0n^)Q`2^y%ru{R|#`=f!Kig8QmbN<5BLC_t%LlBvYbfLZACoF{O232aa)2FmfSx z_L}=!FC@z*7~q>UoJe5_pmo$SO0HSJ2L?Lv%P7WP;*sVQW z{7nV|FKN(%s;{Htx%{T_%U_w28U>0jUZ0rzvx8!AfU0c&@b#S!ne{vVF?~hDpk|6< zteCtH{z@>7%7(G`@R^IfTAVvuNVicipAne$_j6GlKzu+Pjny>XIpj^(+tJOgJ-hC@ z!lfK*M(l$%HWYkoVBzax7mRB8Hc;7cYrhqAA~z|xV;H?-FA})m^W&DdR|2*y$$7l5 z&CN*;7Cig@YYWfHc3ayzb*9x6TaRS1kpI%A-E`CG9g)E->MYwBHUPZ3;g4L~Xo$o> zOJSG~4QwZOURw|^@Lx$5gJ0?)cwRff-{@~{o6f>-uC42Yq9tTe?81yXtZLG?`#ad# z_QP7Q?)Du<^QJcan@4y>B`E|;5ZK^`h6^X#8rLp`K98ZDr%qFZWLr5)gPI<3M$?C3 zK0l|%A$Rd5YR3bY%=N?8Vf9o3ZOD`m+GS!6vLqG7rZ*+C4KvtR${AVX>aU-o#J^|| z(apThY^>o=g0-HUZEyb8%R}hMb2Jj4n>x7!Z2xxxY#o;+`RF-j#^@KsWZmLtrR$_T zGO~qv_Gea7_z&{2$w3K~K$qE!bLG&oJKX^0Zjl8%(2=9QN9WczyPyN*4Q!6dK0;B3 zrNE~#;c3D%i)Rm-=I}QT8ZG*{`MYGCi9X%;w;J~!J@p}#U`g3OgR!Pz&`qwpGBTRt zbd<1z{Ix-85m}k(VavKnbsXiHd7>m42!47~q(@j*MlWo=q&|q28q=y~E4?>h{P6x7 z8TK^>%{8-n7)75-x+XmG#-_E{qaEidg)^wqy@aLPFU3I8>V!N3>?nUYHm)1)K<-Af;` zIO5F>FT@I-%O4q8Gp>1-Jv)fruH*7429lniEXJwT(gsHKi|s+?D$F_A8Ga$_pjTWoGhD~~@vYtAYY}_)r*9`dMM)Y&VIn&2k{)5!^++H@zjO1_$G`-Xh z-g$#aSPZFDF)0yQCm_LB2)|icOR{~S(I3M2=w;V0EG%mLOE8p>TNBdka8ukvl{pXq zEL3EDF8oGzb+kd6`OLo6EI>IU>;{myFJCz4i?U*3ZrGo~-{S3_Uhl*|r~(V=_;z3- z+HF$SM_8eI-%>#7ISu=6XVgephA>%cH(dfVM>K0$&E3#GAFkIhboUu_vfAP^dETYp zwfTv4?Bp$vk7U@64a7yVJx}EGbF9tN7aw|wryuXku3G+Q)$sfGGQ3wbPk05d*}v&C z-3V3UUOOgl2%j3&xKmOqf3>~W=A)i)zG7k9I-Yz^vh)RJd)~z7bu-1GrQri)E?t2oqp{##Zat zV|6d7Zehi1r)F<8g?2+{ScU_uHd@pAE$jAwsjVUSp?M>MbnhkT58=NAQac))R#sl0 zwY|r4Jr%m9epPI}@VKnUA1S?y;Fgt9iS@`}y>+NNe(vjT_P+6D+2AicJ%4yO zfZF<{F|67zOGdwj_T=BmI2TI%dtzkhjmvF|&_A z_c;GfSLbY9Ool4^g9^kixuMVzF?-i`(o{a^w;jeZxt+Ggh`*x6_u;PEyauXELOGP6 z>l(V;!*RfC6@sskZO9ZQ5mxtZKNQc6sna?%IkCX6T_0@!!aA%~lVWq86WMI8>$n5d zDLkDq>A%i#xIthDxv6`Et;BrDlY7ILp_OXj;$Gh8E+z&dKm>O(Eac6Ff!T&xz;{#ONUlNa5%sSR;)Z<#FNF0TIc%cjPnNc+>Qc# zGr@SBycG@EkM6YQ|K5p(E8unVHBj4S#)^X(qZzwjvaqN~9Or-}<7*UGn!a(YLx&&J zg2N>vJh>5iWU&h-heKhKuw9=ee~L}kA`@x`7m{_ZJFgeG*xquT!y^_hR!+Q(VWL(`~D zIoi5E%8&7j`T8o+aHa|dpU-fltOt59;A{zxyL zc7UhMZ1?D?P>!A*3{B9BLG#ATika<$*Pry;xkHg6Z1?*n`NB5#QC1gPD61he1vGln zW0IuWM&=~sFaOwas;5nY=O;5?=wkCiLG$}>fX>X57!UM-G}rNBuyDw)Ufb*F)K`6< zrWpj-E=5&gGUp*N=a>0f?`XQM)Snz^8c;$DyrB6<_G=J-m zb0NWKN$T8Igf)w$spt0HVJtn!pT1za?6A6o@hN}==RX>=%lOvwD^XQY?#u&6cLTZD z=S2rLi+&+;gCz;S83sLd=Sw`kI($@TAm48fYShnECo|Lgeoiv5SX055lHH<57oIREkv%z}iGGc7Czkj|K2q8vibTGa+Q1 zyh2>hkOyx|V^iP2GJYhzmi=;OHwFN%;w$8w3i>4x9#^q!lm)T z`L;JGrh0n}n`QjgNJTF%VB-kE)%ZLAj9LTSa*7j5{np@>h_4p;D!Y?4fMC_iS- zDKl)p7n2&EUxelA>u>5*aUSThj~cENt0ilG9IZ26<7Qe5=;O4weSQX`ycSPD&4`A~ zaCROmZ-4sg^p3={_i!g3_ePlm@CN>tBT7juz8UgP?4T%TilL%XDF6rM zO-F@Bjz{!pFLv60lI2{0U-GK!fq;YRmzFtC=l*z>4yzk6qUQpKmX2?t*b6k(u=m*l zvtQ5D`KF2l!T*4hU=p%V+n|?Xi;Z%B-~1NW0eweeqt6_NDG(>emGUBo?Q)}}V%kh! zI{w^e+Ny4ck~gzhPI2kWHap;HC+_hv`P!@Kfu0eHJx9z0EG%yaPdv!+UDGKHMF&T6 zIy#A^vFt@wab5dy)R_$15&lARe$8K88P~&MKBXzh#$AD9i&CVY6G}zrGyV#@S^glm z|6cm~K&tKX*zHY%yNb%_Nq$>`=sJ~&%nc4!5=-~qM=@9x-?@p`9t$>B$oKRi7wrJ| zX~SQQBJ9H+BCUMah-?ICLfggIR29(B43rGh7mSRNPyIf87slVNIt3eiAXp({Z?~P6$6Ub|Gd~(lZyw? zyY&e#Lwj8q@#keHy;f@0$dv;1r3fCI1^X({8^}MPv!ORuJ7XV)Y{Oxi^pz_ zCO8h`^OoWlg-TYGo+)q>_ToqN@wJp^pi}#>!`1o^Z4T-V3Z$OZehhzVE9*->pUb*M zPeE$CXK5!GKK$F5P6bi5^B-}TEVWwrsZ*m}z7TqkK4uZvJBz#Ia|+(-CEu$T)0n*67*t&nb31#v zhF2FGK4s^gs955>*Bfe|_BY*~bGhns4DK$|0sJq#@@-8VU}Ss?Gv`&uEMMDpFPS1N zw4(>`nD^HiP3GrTBHpopQ-B|x-a?GM7u9S`nROZ|Ud@LW$N!7PiD}8WZE^?ZVGt** z2f?%cHgc)!qu1Otbw4wtHAZaa{RMEz+GEYV;7~K47KQ`S^nI}3MUSYjKXSS*8W0YD zpZSRlh42Sp>dXWr5}~w~JRZhWNm?pnM^hsAX*bY1M9+Fi^VB7Ih$rfn_u^|Mp9`<& z2yMbFJf)Ai;{dD=FEw?Ne|+u3mo{oZ?tbvJ%cZ26-?i4PWt~+aad+Q#YH|%;JE)d< zaF6u?R4&svq*mvQl={#IO!r1eq&Z$o=9hX^WCcab8M&LP)T4k69XQCB^3V3VLTR>@ ze}eh_lNve`xjHgwli9NX5%nE`>c(p^Tzf7JIBKa9fh_n1Lcq^{j98{!eR&oJ zFZ~H(>qoog1t8aGsvZ+oE#cn{LyzRDS8X2)32-bQ538z-xe;nc;Y8af+17#& zLwQKctr{+miDiBKQXzY=Xu|TuqXpwJf;?)0>*RzXfd9lD{r9s~wI7JR_K|K+`dL?t zA@DaW0o6Ik=#m*P@+(mwm;<8(>#J*^Q_1Yv=6gZX#L+L+pCKO+`N>~YqV9_L4JMl` z#kSxt*)}JKT#0g6rs?PfA7w`O8m0j%y3OEBICvGDNZns7~Yli zc<%Z+LZrGGaa7yQV6p^ggn#BVY=ZP==Fg0qyQWov~ zjoHWwk$=q)(@h~fJJK=heLCquUM8>J3XgZJrJOAV!Zg~rWDTDyz(Nk^E^FQAG{YVa z4@!r|?y5s2=J#7i9Tpft3@cQy&ZW-8r7cy`S5A+n9$WU|(NzvSZh zGONO3E8-jUU7B7jkDy%20AS9x7Jn#Dq^>GbSd1!fRGoB;V4HTB* zXm(^s2~^=@q%b1MavlkgQ7U^-FhMlS%lP^%r7VWP!dERKo#mn#B@86E(ZqlSw9e+d z;0F6BsG)DqxlAv8*PqRMZ3lBT11ZbhUzE1X42hw0L4iMaHkp4D0%@b?Uk0nNH%cN+ zT;5&5fYv!`gfFRWnBV3rJ4K+xiK0K7pWT#H{{a{{`1#njLX$3$)$@R8e4k;(G2b&8 z617^8n^8$VOg$C3@y`Y8JEF@Cc-6M&7HdqVbXfN5LkYyq+L>+OOe&V)r4(6TW#usi zwj4`rW~XMdbAc7KKG*^Uqu{}JVxJ?FOe)i=rbX6HPz(vv=rHILeCqWUk1h6YjL@FA zi)ff9^0lqEbuuisHUmLS16*Ls)fRRf01sxadl(t*=X~#e}TmOWdwa7He ze;5F&utnO9VRE+o&Gw+b3`Sw_J0ooS2UzT)b{PmF5w;58tqfz$zVZuggK`oY-FM`F zztXR6yAh-~X4^K9OS0tC6&K1NjEo#B-ZwiX1Y%4rxkTjBR}-~DXGNa$htfbI_s`um zXFoJirYL{t^b?&sm&=x!5GWmh7B8?~fa3mbOM&8E=8^T68*yFF-konf9|qdm_G8jG z7-1(@$L{`=z`;{x+TK1y&=PVKzIfR5y0c430a}C^=(@mzUD(luLYgd%)TAGG_-$5W zoNp_>U!PGt9Y6|`5TJ10wVZi+-OxNsM-}`#OBWZi{IW36F7z*U)k4EEOX|md+r3xQ z_o{T^w5dI>h%=4(uUisWc*OD!KUX3Cs?0|I(yv^5$`wf)>OC6I69aM16X(@b-^yWNlFYV z#yqQBF3Zvv#KKlOzro;nen(>4XhDjd?r}rz8iwe+SE2B7p^6^-4|qD_n7w;M$TB}$ zhy=~NcQgps2d0nwX-Df}=~ihQe9(k!L6;&(}k=!YVoIXMh$>rJ3P{2)t}q7a|J;$9N2IyvnW`-{KAuxudniRqkw z(#wICXTaUDj>~MnJy5Cyu=~%?4Fd||51-S6^AOKJkUz*A30l{v0@H<_M6IZyOy`tZ z-~B_pVVMg`Jm7gO@LiGY?@~XGLFM8==kO5UN$(2aC4QHo{{@o*|3*=gs*yf`R0kjL z{AoCRmDiv(dGz+s191N^Gd}&)KUTR3b>!?*to-g6@wL80L{ace@xQpZ`xF)oSDWuq zRdgOVC2-}6Z^AC9*F|K-fIFVau?m<@`lsCJc;$N4Suf!=<`?>1?$6)KG5 z{J%9##3Ad9%IMEOt zYu+=;eGW<+$bd6}!kD!j74_RxeB~3)UhKs^d0vz_QS>t9$$;`FX3IgzYhtZS5%L-7 z^D?Ft_%cbJtgBzI{u5pWTILU!`O`(^JNcSi80g{PwK4C59orWs=WqY8f@Z0uD zv_BHtBQF-3^e+A}GUU`m%TA>l}W=6DGDk$HGIE zQ3)9H_U?bfYxX>3-^m+U{aDOqUq0t+qYdU`i+SIq#Ym<^w>cLlGk;dtPWI=3`_tE3 z7YeQGo&wKap8j>UnFaQncmgV_QV7So!}4`)jGF_uK<*1bx{hKpE0M1sF@%FuBD;pFh| z^9=b4%jOMSOTa^fRzD5rs#97!p1k8!=QrYAF*TG%}-z;-?BNk^`-3u=G* zq#aeoG+R4L{ETf;k%1+0Sgz*vRnvsXhB(@%uYMIHpkS9M;qF_UTHbJPj+yP7PYzf+ z0vAd=)S3NAKE3GZR`59aR_9&{bZ#lui8*wnue(2ncpw$ikIp%Jb~jTdO6=Krs7Spu zWO(EYWePa{2ctWQQOj`Z5FLAe+w~s#tPfYtRH2>~@wNK?PA zZZ^^Dc_ykYT~y}es6?g>b!V_b>H2#W*E~<$$?(-fYWPf&lh|7x*@o&-1$V#-ow~T)#jQnL;UMyGg|kGT zR`wCULzuH>-HX)+YF}OarDYE!=lOWDxNl#02cU;zLi#>o7Xr&i1x5+K@eN#Cxo1Jm zKHW3{7&<{Ne8hqZn`iH@chSN2P9RrE{0)IGj}BZeL*Moz$kuxSEB?bOM`h~hi*t$t zoyL|XYv5hKr03qSrSq}Qlapr`zX!d#ZpdMXUyvAbh}MiU@At3P3|-!y2>Jl?q#1|0 z6M8fhi$d*xt)IB#VY{{J%CYH1-hSe&~D1GwH(&5<>{j1|EGm)c0~~sE~NpBGZx$={G>Ur*r{KXJZBr+i|Vpi->D3& z#;}>Oz%ErrAhD(y+g@}%Ig2;DxU)GlmRQr6qcO}?EIRl4CT%U~ z?J_!Q;8y-y!0kPF2s|&K`IRCImB@(8@%!1k+O#ZjS?X#I@r(KD+3wj)EAGddl45R6 zjj&v&NB+cI+39BHLbqQYTJ_E)pa1?p3t)>U2g~y6B%Eou!mDBYxFV_7@VyoWzuM3F zwllzHmV$B-nR{t-B*ho+pr4nPxmp;A|A9dqb_0%|27HV%3UcaJJ4Dz2&>Yp7;J<)F zs~2blcT(i*_;iO!@i00*X7Z)3?*tIIf$w&INoc^H)xGxpGIeL0E4a9K=b&%mXNF_= z{~$-bbZ3cEGlKdyEM;0T;H8I8YOhBB0?6#jjDiOi@GzeWo1* zNhvQ$?5`5mXS^~)(|WnV_8|X{!wt7{_pAqM9GlBx`k$i`i+h^M{cZkd%w*p*{S56G zo5?{!VD))J2PT%MW^z-26JI=oS^V9KjxiKa2IfY91t~=fVlFJeZw;ubjP|d4G92Vfb7HUa z{F*8r5efP?t2!YNC5Bkwe>ngZ{VPWSH!^%uk>DR!Om{f1E)IE~AFuCh4O4Vnov1~) z?B(pLy?MMbx`WCx*hzd1UySsPuQ1Nf*1~*MmXXK!^|2Aky>f$S^`X4pc+ZgwV?Yx}j-gIL4^{`= zVhe!`QZ5KpK1hR@%uA-Oi*8rx?X+Zvv+Kes+MI0HeF>Xjf6~6&$lWFG!~ASg1A;?D zWY|6+xmUJ!>DAEfM=IHxV%3*LqnKY@zVe=oTnrbkQ?UEOO)?<~N4aKqX_DuWqA9tm+9NIwNBn_Yj5Ub4keU+4kAuR&8RGF@m3ncU zze^fy1KEnjp~_4ggS6|4MOWtTtSED2aaJk{a6~6ZXs^99((s0fm6;NNfm{UtdHiZ&UK7)RLom}(aMoc5>7TE1)7_OOOy&1#`4wGxII&W3IYiH93fO(Y z;rw0nFhTx+EZzld-yCiJ+rgH^^UuzjMC!6^r7GrKtI@?mgGz%i()Fa0H00i_8i}}V zF~HE;BTEJ#?yUo~iW?qpMRSre!Y)$^m4%SV7UGq?(o9Osyl{&iwvex0B{rp|;9D_ueZX8;k*~vd+F-+Cf3*irQKIU^t3gO(7k&Dcwo2O_*lsL&h@BpPh8&Dsx9p{Ss9sG1{Bo> z5Y<96;2arHF4@PYh#C_3qVQ&%&P23SN8-7BSRUik$L-P+r-45*o8mM%0EN9`5%uJ; zV7(dIP!qAZ)n^D({GRVzo7=UmXx78ku<`Z^=K5Hv*xbIs%muVGTFV`4q1P87?EOZ` z=0Ba#HD(dx4l&Qyg?3V3eZt5s^FWq`Zoi6l)_{$2eqbKQp^}&7z+s^#38hO^=)!G0 z6JYl2Pf`A_r8x57`592(LwR&&b7CP4@woX3Qe2sOfob*IROm)m7E1Gr#CB&-Q&#;=GOg`x98|D zlW5}M1I|P97_OTtHnyyZLgv~v@@I1!%P{Nhvc!QGa?}sT} zosTu7n^^P|Ymp(?%J{{wk>Q4|O^fX-Wm-Qs3u~_Qd^}%8M=l$dMgM!@K}W|6|9TA? zut9=r6Xit`%yg&zyBreA{Co_niLnqcT`{U>*svPFepvxJ#Q>_GE!ylROpK4I?1DLr_Gh&|5S)v#&hY$4$uX`mW18hT3F63a*8YYz8H{6p^)p78h=)IHVTPU z+VZQtF|>SEo+&dAbe}D;W%a#&T3&WNnV=LK^UvWAuxjCi?XA;^VJi_$)~Y0Ujj8@s z%cE5HwHd|qL-}T7^XvnR)2vL(mk10$1cCO*Sf11C&stLCXWbuAirH_kk_Xw8zKQeg zIt9yRo7TVrm#HH2w{1uzdC9PQ2^khwX_Dt%**SXGbhssknS8jr{d>)5Al&Z^KlyUb zXmR|yW+26?mh$5TY$;evFw3LM;)Va-TRVCr%cXyz46x_;YSJ-hMjm-D6*VBqrpPNrClGdDcpSE_A2fZ)byWSXu7o`Ig;2P$_Z~A$f0+W@%wCg5 zBERw5xpHIF1DidQ_vYljc->3@>hHo&mumu9%6vy1Kb5by7S{-oK3gH)N_BzBYgU*c zge@x%!y58OJX(a1M+|mDR%HNii$Ry$8Ezxm;i?{lEVv4v!eu^29h4^k(37^hM-St} zH=oh0x(d%~T8fu%6JV?q6J0fiC~pZ7pzUVHMK?sDVAMg%26bRuSq(d5&t7+Dy0{g% zy4XXVSuII9#?HG8J-an}gT_rq%(k%wn?BAq zu=7#>^4U=4$JK^ZzQ`wjw=cl0bPdbj8Xo0aFD;JN2y6^A%l?Q(iN=6jj=YSwR<%QN z&}ugbzMNz^MNj1yS`YY)fA|2^*Aoe8e___^2&Ioy0AnJB;_tyx=tWF*J9(^FBi%^< zuJ$7}$17U+$llbfctH0xE(zQDZ%>qRy8Jrs^N)@o7I!Xfy&8E01NnF>N|CdQ>JkkY zUp_EU80NoY^qU?3`{zdySeUGe+gOWjZ?|~Jo*ENyn!gamqm~8Nr7*YTe)!dr`r{!W zZBe=EqCmV^!vcR^=I&#jU%o4m-dC-CP8D5OI|RmfHx&5M?j@AXo8%v!Kp;M_1<<05 z|AX6Y1M8)KY$+61R)R)GsGHKWh(>E?2~9$Cag7`amR3@{w^_T%086NT2Xqs0-;@QB zJ&N5PH4QUy+5S}YaUCUd9`xVevQ%$J7*M0UKAwmjzw#{MT*YquX|~tVexGj&z!QVm zyJ|xei{3yO2I7Kv-TSO?&*F-3-Nq`u9 zGT)kqv1MS9+k;;1$p@Y*4u$aVG{6?qFZl<`T%B*&YmQ9g{?LutU2+s^M%;A~A-Q~) z6F4hQgps9e?b6_j6$52h*KPE5(qR)t9wWKmijL~!roeR$G(=FdkpIk~^zeVHqRdFq zdGHs|VSRpzR(>wTnL#Cg<7GLyo{@EEK_lP<+RkggTlKjr97;nDwq=+lzIB{CV9%q! zB{_q@9?2WwR)+y7Tu1RtdrU>*(*oTYnyoL_fOek&e3PI~+T|%bmqNk<&*5KubOZ0a zM<8Xj-#-Bg{y^r6z>NTTVGg~E>N(g49&7_9dkb1DhFSlq#%XzQE`YS&gmR_!;|bJo zQtA^CEkz`BE*ktgd&?NW?pjDZ5SGni0N*Da&ur}apD8bz$k(NHM%E3w$3z}Ys}BKi z2g(WI^-nW9eDXN@Pnjsk#oHk7|E6h z0apF%#Y5^RDGdbA8fSGtz-+2NKUKw*TJWm7Nc^K3?Vdgcldz=*j;7B4s&%8oTybHl z3hnS&8O3voUN+dL1IIV};X%07Hrzs}6epwBJQ9kVK#bw?stxReFz25sVEXfVCK(w} z7*^uFeTY_Rf&)B=`W&chc-;e?lTE|lE;vcGd^%vUQ7N(rS4hz60WLmpXR_Jj8g zhgIwxYbcM4fbkGB>$S{2MXqtWpaxzBj(x`vm5f+9)1$$Lz3$@nFt2QBEVA3 zjHmO$CWrHK;lLg7-U>5~YZO-3fus=En~mT3FEWM+3ESwo3>YhrB^6Y3@@NYNB73Hi z=#c@plrG&S`KAG)Ekvn|sb5gxA+?J*)ZZ7OOaAUNu36@jYI&HMw>~qNzSJq3ezzk; zpN?8qX=%Kch)*K>-yk9)C6YDIA!Ifn(3I#2J({>xXH*=w${&a~YZ}^k7BB46#UlOD zODE>bKKS|G73byNyXDh-&dpD3SZo|CkYDe+@I{0W425ARGN5{ppB;HE{*M32ywn8j zClR(shLgV|K_D1H{v3KOBs8>-+rnza2ZlLrFjJuFei}`?(I;0ExAKks4KiL1b99yhZL&4tMb8a{Ox!jNt=&N>K%Aphae~7|;OD>+lxRn%kx3uqB`v zh=P&wruB;bWrDy31r1tZ##z@Gj3DVvcdSqe^Z8ALvI#@yqD>HOB zA%^W)0&;5>97TH5tCOG^I#i^Y_Rd%w>luy=23j`$I5)TfGo=- zRwIXZk08|#g7x&v_4K;qMB!baSj7+Nr8fSdLm%5JS72dH+0-BFsN%CJqjUUuV+$A_ z-Od)~+I+UOs?-MMI0KjKngbqz7exT_MFQ|aS+Y2$V~I)6vsmD;pV0Wb>=^Rz);d_1 za*=z_cLLV)fI?h2MkQ1d%Py9N6J`yLxZr|U5YNg_NtkRN3-%KKq)94uCKGk*3e3TO z{RxJ5mf6zPDgupG7kEUri)3Nsx5Eg*W`vrOq30hi7JZ7| zEjz1?EQl}67gh~B1SWu9t{eaoHTuDs1|GnWC0!pC&|it|MTROr+5|+b-f(3U^%{%V z;tiBa(v4$C`z;S)z9Z@(oPz=U+GKW@!Ggkiq1Kdne7*5SA3wQKgBX(Vg1cN=qqk zoF*$(yLLJmhTYF`S@CGPP_s!6d+1d^xku7VDJjPtw@bqnUCj4r-o=8y{-IorjlP=m z@rbDoHXNIY4dq$Fc2Cb3X*F6q60b z!s$b!A7rxqRiAaWQcH|sD*8lAk+1K$6eB1rL+v(m`n4{3)?klJmAo#S^xC;T2wVAF z-9J|I<#=Wwp6ZKYKVz9Q=d{Z!eyCP_Vz(_4DT|6pss zSY~vz%?o1f57NFX!`Rt>PzEdj8TRUl|#G-!9fHEC02svEmviYaMb;xt`e`bl-x z7p8gb*3PSZ>sx%N8t?uf)JtNnODq+#Mp={+aW^*j9q8v=b zub1MC8w%UIM%NVJ{u*$MTnxs{0=lwpz6&o_8*m1~MzPcOfze?H=RiM%$J#FZL&LbD zmMk88pU7{4!(-_d=5@wb;dA(~R*XyAncX#Gb`lK%$gyz{bd1_|JO4<{pw!uG8{kxg zb&zyS|5+TRU4o0h{M}B%RZb%|=>gvfe5JM}l`{cGUjjZy|Cg8M5GA|| z82`*^;~jQPrH7{1vJE8smcp5-kn&%Zp&#mKUP{NAZkz8eokJs%QGZEzAXqdh;o#8;$7wrB!qs`FVjLpQ?Ab%uJ<6Mh zEl@5GUCX()K%tvjCM4~G#`x_48b)RNl(2omG|B+jQU9E`l^-k`mM#C`%HwWIDst|7QP}x#><1 z-p{u`E~eGAef;+o+oh+=aU+AeSq6OVZ)W6O#IcvxIj-wn9Mj;v@5V{jc-av&^e;R9 zrJ;>z#>XiNk9fmwUp5La(h1mf9mn@8wt|wSA z+QM~>&>Iv~LXA+mtGK<)d|>^m_E|{_uye(rkTh)&!JP}U1p+RYx4r(oz}IEr06#%F zk?-1pBP6OU<{t|1TaF~b(ncQ1p{+{tvPd6mM!L%a7bVE zO?_lZ+2d;@L#z3dQI>2%e|0_pH}YKjDZtBc`c_WI*}{K+AQN6|rLvCve&jtXCLhP= zV};qd#&MHfnjh?kwu=8MBU#@47|@gYdGZ~^9qIH6Q-op& z#O9_rA=cU2Nn8L)O8re}Cvxwk{eYv7W`uIh2JMTQJ@;ydj2ohB&ekp%mP%HDt&7|i z)Yu`#8hazOzGC}=U@CIiZUs0wT()~$+V`0kamQB2&=4GqbccuMUH6dvwEqT0pAmXG zVI;4ao9r-Ni#)VG;~%q$liAT0*Sl(pf}vyG)+dXn_pL9+^@Rvryz=D@R6`$%jFngm zZI}HNJAE`#cy{rbEpuGz)i=f4wEE<SXOmwNXl zlwUOWH2AU08x9R|H#Md&GK%e8)q zEfhtbZakKp4Inu~>FM|?ogo+ff@O+O`Q@qIK$HY!dCFNPLEY*$gaaCelza#_5J_4e zR}*$$BAi}Pg=6l={NRr@OBrG8nVW6+hH$0E)kxIb(Mj!C=}l;X=(+7^X^Ru}t=SY` zqhgugv>Y`k#UzoVJVUA+9=umm!`~n7@w(@z24@OLC)2@`7DllzAM(A|Put3Zi%Nl? zX5Wprh)H{{TOx&TMRX6w0duXZ=N?m)UxrBzqOQeGmU36beW zYhj+r#wi@M@xmmUY5(U)7`dH~u)^obpBv~35}mlH&_IgW8gs`eoQ#AAx2kfhA3xaM zHekeFsM#1wZ2*s7koH2;`?V47kT#G&Q3v7QfjekU^wr| z9*qVbf=m;Ob-P3;2bAQ#AE4pr3mhQZcWDL#f9@f5T%0qZ#ptCDCu0uiu=*^cK~7>W zz9%dp!!ywjHGdA7M>{s9{aQbX8?#$0xe4t6YUQ{xl@IlT#|~k_=9CQnjcD^0DU}Z7 zrBg2C+Gt-$3QK@LDd6h`-^T}x;KrjRSa~`7>0Py`leE`NZzw}sI8T>beb(tSYKKu; z-Tmkb_^Q)wE6y%{QgCw+vzt|G_HSr^y&K7 zx|a?+-CbK3%XdxfuqBa1`ZjIvtKYy8a`F=PvOpE+Lav|WjzMptyO4IeNsZ8n*MRzE z*VIEBv^OdW*Yr$q9!87P2<-y)u4)fT8==|;@0VfUfXU8ALCC(~-pTKFF6i&#Ig(Qf zvA$;VXKLWFTelxujqf0uilE~GhT5if?EDwf9_%F_J?dJKh5E5Nca5s3eHDc098bOZ zzigWQcob^p85B+q?{v!Wm-K#F9;y6~UBJ-5RXeAa+N&Lio?z-OLx>rK9&faYZ_+HS zv0)MS$grk9%28FwAgR?6Z!yS$I8I!UA!}R@34{0ftzUUGk^KH18ki@uw+H{V71)2~ zKASC`wot!e7wJUGl6^?OOx5J);Obl`XAlzr^JI`)tnyoMsAZGj>MY10&XiOHLa#;W zNm-a29n%JtQK9HS;P)XMHryS}5{$;d*B2eRq}c1fY&x~ufqj80rmCk0-5Fg~1=2n& zm1PF=4P%PWFyyn#elU|jYyfZYLOd5@3jlTd9KKm1nn2e@hm(|F4c8QACM22Zstg+9h7s)}%HadI}r_j|E}T|hsD5j$*|BMuu0 z3_a9Cd|DY^rU|0tj%k`wR+gzxpJgOHjX*8|OP|DoiilX^Q}Fyq9^O*1sWw{#yKIk- z&UR47c`*{K$HzmL6lpI$fp{WUiP6EkAvzVTd%BS7>tlca*8o&hW#xfg7Of51AGLf) z;R&UBj&+j0T)G1MxX|&(ZHINC77vel)P95k>kmqyub$VmuMpVG`vh$QIyl;$S}lX7 zUIlqiWS(bDB4tM(?VHqfwp!pXlt_*~Jj{Py*58!(sZaxPBw7A}(~h4%;x~T2#1`%i z$r9$;Qg$UeeZttn50iK;xlcIb&=YV_IjV>tc*kNkJ-!n9qJ^jB znK-_k#Z+cQkuL|SChdz(CbPSB(y7(y5+mI2xO6Pw*+j!gsv9TiH~3VpCKSVlV6&ya zA#fZx$qr)aBAJz$xGd^l|A=v8(y!pe&Dl z^O^oLoxazaZVIkIM!jJ+YzN3I*H3%q@v=|c^_`pE2%Gxg$QPA`6M6^57b_g*7DO#l zGQ=>5AL17K;Z0nr7doBT3{pr+w2fjkL7{el)`^yGg!_}U+l=PA+~?9MOqcwSu$OmE zeX{ATJk)n9=gd=0goh0`;dHGj&|1h5w2wBraQNO0@^vJEVTR4zWmAd-tE?K=ZQgwW zPhAD25;vLf0io~zCq%1^KHzACc7oAKo=>wK^6Jt01p#KAdk-lnQ&)gK2lc1Ko>1W@ zqMlH$o3tFBP;O!3CC-P4N4w>@H``(7#d8MEFS{X7$~m%`mk`BK+12b^i^x(6IQEzh~{0$dvq~) zYwTQn9`?tl&%H*|g|$uneL*t;0hYx#z~Y)Vz_6l;^g!bF9Od&mpD6c9(IgFe@91q zDS!@b;FOmNk=h+q*aMj~j+;=^7K_>VD@SzMF{>oTxs4v_-SN6dlZ;mHDlo&e`47o8kF~WFQ48Md)xQ zong^?+CI`0oQ5?!>vgAP%vf}kyG5Y#{Dl~qKLX?0XT(R)bZ{OE?PlsJ2RM-YGo~u| zv#|eG1AjfZ9TTn07;{&EXz+pLP4jq|i8ta^V?Urwjl?t)mhJRzmJzqZgiMEDY+T5{ zy>0dx*S!O!rx0_`#dRJqNlMUDx?@P}xlIO1w5h&DG<)xIg4)qhDpAS0EIYl#korL~ z4p;tjKU>o$ZQs(Zg|zxaB|_MMC!4VA=tj3DdhpdW8%kwxSSh`)HHl#RSim>c@%T_& zayv_DUnoLTVd;%w(!{4SuDH^2(U8B^I>l6kro{hQTd7~+Q>r|2nxV}InF`+vCB=$imen~zBA zeX~50(?`^<+7=iK*y*;bypJVjY`*@KlxL4Ab7k6?UDuhj(ke4lu}Z}Pq`;YV3r3Wq zY?)LCJ;9n`eGr2_5_UQ7;jp$FCW4Z9Gt=V(Je+5ziQZV15L{dVd`a3@xFfYc!12h( z$6P_aND?0fl-_*rZ7j;T->_jx?p4Y;jMsu`I2ATZV*>~#g!;gzK-gSHj)6TXN=XOc z`Ro;GMgz;|scz8t%J>!%2M|$z04r&R57^h*TP)K%80Mi7>Q)-h6L*$;rlF4WW!sC* zESogZo%5cRc2#t$BuC+vY+uEp zH5Hp(>%#weGh(^kKg)wvTaT+5tjta%PlJ@4u0=1?EW{v+J3UoC zSE|6?Fh1dlb=~wj4h6>Qr*2&H*w#UU(SE=tn7d1DKA-%@EmW=C6B3Wd<%0Zu2e}8O z76LZt08Ah@j19QXWn@|bb}o!D80+gH*xVVy;glDF}_IV5EPg0(kdrAmr<-7 zI?_FmqDS(s_EpDMsEpn$5zCN8EbY;VD|_f&0&#jByA*Nw?NaDaYCl z#1x&rmo&xEC~(YPqWU6LT0@)q*m^~>PO;^7UgVb!v65*K ztMh{&n&`Od_5|Y6wYZ=&&NNB%u65sm_YxB@g z$D>3$x15J@N4$BDt?ZCim3nOR+~H|VZGe32_MMl*fADf#I9FfNxyYfwl15eKKnl%u z?r+ErW`#%Yke88v3dYI1D1ZITPI%!t3OnZaiR1fquiCff~fs;BgFPb3I8 z30_t!rKCL_JpOv#q|l8G8hi(e#1@R_0SUt#Ho)D7AUU$udrmPAIT=vN;x+6-C=RcJ z6w;frMjiov5(d7;S2eZm+!4pAu_4|TG*d0R`d)3^j(fUGnxaXAuZ0KA7b=ZeGyK90 zzVmsUBXf!-_k~2~N`lF^d=TY3pMUlEX(;$jlrN2cR)TXgn6_eh-+f4iI}wY&xNJvQ zO0@a;sVvgq(Puc*@g$x9*=ef%y#Uhs+^^3>6R5zqL2;jo+V|=d*g+0mHh~G+d(>MP zvbZ2DDK+DZYbh{J^Ifzd6$VzyNWS8MZR4`uzj-P{viID0_d&&|#zSwgl;GGR;lAAk z9A32_tb*wo5KD}DHJg7&?ETXOAJnoT#Yt9gKPGQsk;#;x_pbfDrXg{GtU||WQi%Y~ zPMofKf$Wc+Q2!RE7TThR8ACI2m+!LD0<{+2x{lftnW-(BC*BbsF~4K8Jz>Ud6IxT1 z!-odks4~<-YlQ#4>dXqXAym!Ddk8UZNTi@y24z!O`9eY(I1FZM$vubKBqf^ zI!lepz*#zzFd0u3D0_fF9AKCPnHEgWlXr2Ohv1T6j9!n)^s7gDsUNi5-3@ylw&+3p zhS#PO(Bd-Rjf13r=mNH{au4iw=`r{Ixom{SPEpWg`aff9c50!l=6!QgOUq`*%(ne{ zy}Ih{3ZwQLb2X3mBBRtC;bj5|Cgu%h z>_g!Xh6%^aZ^{ywoO=d($%;-Nb$`yU=$G+-VvgV=dB}I~#ywr(FY7v2fk+)CDr{~t zMN}}l5(`hI|I*ko4=jLB{+G`O-u7-AC$7X4=SlIIIL*kFYGKzH_Jb<>=j?JF2^uI! ztD-2b8pcY~hlr)i%cuXXHUDy_n+t0vfm{#G!~8Fo&ndvu2J_X(!{~8OPG@0PCxTWM z9~x2c`1rvq0aJ@TT4-8tuNz)mLC;iT`<4;AUIuKDwLE&8Dtjuik9i;qr{#8rd@!r3 zM4D5jBBshz_WkPQ=<7D9!o$F)ZEf#wG3;;4ht17N;xxZ;T$zFG1CI@1m6{YdeUa8| zcf_Fk0Yxy)5$Fi-Mb+Ac)dy#4eM9FGe;N}*zNb#L;TgFtKt@&%;T{~v@g_+S6U z*2+s{aKR!BoY}Y#xIe!F;M3aYQL@x7b7X6Ep=8PEFajD3j&Fv09n)+$kg&Y!`V=U? zsOxYt`<0hGYP&iF>D%%M zu9sZ8I2-+Y##HMliYhE-pFf$6<3tZrm)>COF}2<35!w`7ZI#SA?)1AH+cpLnaWKCB zTXa;7bNb?h+o18Kq!tr`e6FLY^cx_SJdJVto(0bSC`!%Di@5v0@}e^< zZTLP~izqC-Msjt`SrHkm!>bbX@G>`~QzsZ*g}^m#IXjc1D;dh3&00O#U=1(7GjP{% zSigkTfe*<5?#>;mdPkxUo(dNHG>IqXSxlcyei45#b6QFbTSkuNKN=q++OZt|?DSvO zXX@W}aA*Cr$#KRnO?C%5D1u=Z%BP1hu};n`k};b7FRAl~%fWZ$_47ydpNmgLU}&)q zsE98Ao07y)xTVen3F{~E;Y)!lz$RFX!qn2duQQiR=VE_d$YTfwBNyCETDRxi&^)$l z^TSz!B&6wlgihy=FF)S=tn=BHj~6E+J15+%)yy^HO&yR4$uXz3eL$VVeat=P1%Ht% zWxlRYln{qd95Lr@fM=U4ggJ>t*2EXF({zP zzsg<_l ztJz!916j&XOj}}iU_wd$KZ&Ia`%7^UM+pgfjk1u(b->DMyzFiJvY^L>En?%@4~)!x zw9?tJUEWbb^rYxol4cTsAMYw%TAllz{i#5XpkJUx=qp4&)z=}rY)UW`>o{G`|BXKX zyybq(P~-fZ_%!|*Bp%T)hefb4>J1t+f<*F4iO*P~Uy zdr28pZE?r2z(uF@(hKY348TK^DVJnz=JrNrYa)pnN-yyCBt<{FkBAn_r$z=Htq<#u zdn~j&yM7yfu-3MAA(9p0z#6_oUqx4DQYXjapxTEPDgqGB@5h?gs zRp}(c*4+A;*ryk+Ohx1|^)mpuoe@zYC&(9Ze0YTcCVwN6)iyqcc-+X_1)2WU%jn4e zNRq&J${21$+!I=X#WjOf?TRtT>BFYiIu+v0%FbU7Ctb2SP}x6c51agEfRKIVA>Gt^ z4@isQ6KjY}(+o$P_~Wi6>sGeR98pz~P;tDTxqc_AV3W}jy3&nI0rHZ?&{3;(_do4L zGtr9r)K{Zm?0w7P{hboOQIT{aj?_0RsYcIwmkX^ele9%bh~t;YOHGzQCh+zw>236$ zVx1@X(LW!V3?t1m$_cps0x6p#zdFR_^Wr4f9fj<>#^gR z^QPO$xT-ejcg5vpWW?ruQoA6aIHV}UnFpf~h^`mJlOi-OtCk@~OCYCt?uGfdiP zS?{A3@a8}Pdt8>NV!H5SZl3pXhLPdp4HbhEfmD}q?9cT_$ZF|xMKtG_gRxOQn%?Ce zkR=x^`kbEg;qx8w;_W~LS@}@(uZlOtoj<0W5qFVmR@#nVOlka_H5GPomXZyHxeLn0 zIyTdp$vME5nAi!%tNbJqKNIrR-D)QVsEnB7TIxuZVZuDFD6MK7h34PpF3ck*t!u=x zsXRyax`ABCFwIS<+Ffihctifa=?AzYgNf@GMTNzt330nEkU z3iOZOaS~9dMqoRNon2nLn3UqwLoANICz zkd&N$x>}OgXMEf#0Ys1xTDmY~+uLlzfx-Jz@nal4abr;Kzz$>#IoEjKU!U~{t7_w1 z2p5l@jv%f_F07I?zmCncTQrUKd73yzdhI`NT2QAxT2n5<_;4qXkYX>FQ2HLQytWW$ zQa(^3&HdFR+)h?9$V1CU|M;;Y*f>zNWtgn_eJ0hV#WitQMayP=tYSb!@?}NqjW}^K zYdSB&aS6+O-XxDAeSZFF0{k~VDWYl4u;#|{>QO*9D4ik0lB!l!i0YvSyR|cuxuh(? z2KMWmUUglkjoSM_Uh|7y@b?_9Ap-vZXn~WE&6z%_m(YzpWbTTY4ke4rMsr0T&B2QS z1cHZA;_CR=sw>-!LUI!W8Zn#~k?Pt>``9X2F(TQjv2$UT`qb&uTRJM3{0gR_LzWsB z?KXYIxP#JN&RC}Xd)|W(?h4w_WcwFmtC0klIvhzfQi|d~&-iYO$v2OUYkzWL*F;yD zJZ~kcVYD(dtoaPLpf|ZJnUPCzW6hvuVCQ|@W0mpT@)<_SuQhzh!yH%jIffu1{3d)h zWkhyTXo-$whs9f6kHp5LVm49Dh*@kykb6^$6!+6cP`wKwKT}-fLVgD`Kef@Vk}w+A z-Y`i>>Axbx^aRd%t3@r!rMVx}$6&RKYK4|e0B$^AGbb`bt7`qywj81Dl zpBkZ5EE}X^m|uX2$p30le=bO~-NXJ>D15GG<=?@;i8w78T^CA{b%y+aoD4uC79 z$e;XBt`TYt@j+#FbsemFqP?#vlp>d5D?qKY8W!=JX-xR`!T9np$=oH9;JW|k1bR+2 zCP6QLf_(5+6BPQuN?KmFH~)|b(+d{1^0O6`-pGrF&YR*aG5d<^*uJ<>-_oAFsl<( z#HCgYq(a*bPdwE?^aO(AuGlld1Ll(rkSG9GnJo}Q+2{5}5!2?MnUYHdYvdYQ+obD- z4LW1=nm;0$)bqi6UjnS*+?AjT8H_~hSmyUFBM6x#p>M7=W4lg>B$W7;D^ zU;8j}MlHDVAa#pjgzlzI_Z}L1LSH*V!B(8w1!o0GYfJQvj zt%UcAjr;3U%lPz&2aAb%p>>rdQ=O9`!Yj+v1R_!8pb>jzZJZ6M?)_cM|8z|d0t>Nq zA{*|3vBzJal4u?_%w>`4k20S>pkw9>B`ZEyEwTaiCm*tQbO+eY0riCXU)L{}lDPL` zx(AC-FLpjsD~5)mUphZ@Mazr|>y)O;k+Aj=K(nQIW2>M+bxDWl! zWp}mrFD3>3#(1tqq-+6P^6;>eS`Z;$)O(~V^Gh>4xLfc{`_yakvvUs1cZ(NzJe=b3 zw-bizgr4T~cb_=t(wKhH*H>(Lq5Tje6ZfpwCulwaaEpNets4sj%{f>OMSPjPg#REmZi0q>Tk7DJh3S0PrZ9Z z?Ko?|6IRU_LfZQWT2X_Ln$R;1t5B;I*T8jOEPh}Y&+sFD0qjkN<^zemx(Ugq#sHx> zkM38Ay6J3{eS~pGV5+5wJ^asbpY-sFc`r>5tqMAH{ONfpy$SWZn8uXs7SEHTa4hvO z55e!12GeBN_qoQY*^gjuhvlgmU4xVY20*>=T*n}%4%EP*xSVPS$_3nyePyJixs}>Q z&O@>|K0Hyv3d+PH>yVnpb-K(EMtYp2EygjX1_Sd}N8)=w#!hwbX{il(q*v){vTwm* zOu)9Fl??nvfQE2Eekg@Wfad#O18YgdSKPyEAZyhbS6;J$0 z3X1@W`yz~2jVLwT@83*HfxLn2yG%XN@Ewu?(qT&f=|`IXnBCMn)(?t~PIub^qud4NukiKoUW^X4=>jLkjr}u}WF*t(pG76EA|yWpUuoBa0f~Z^9IjK3 z-9YGx4!ovuSKm`t)>CI8-6_A-m=#y$0wqn`>uwgiF zoy_2=kG8|OBvLZgdU+ZW(fPcN^ZL59$g6(gFO8H<(vHwysl@=tS6LciQdOQg~y8YKT4 zuyNS1P9iZdGK1U(NZF57d1Y_k?2W>y!m@#i6Yf5Vc7d{8sYd{e)!NUeXBnA3vRGOqA0?{e#qIMzCj1dm67akIa!8BGHl>AGuzqX_d&GDj6f{n?pPQ6AzRDG6;GQMzj-N5wzZFMt}n zElv8OP|wTFLD)F}!1GZwMy@xaxZHxbFz$w9@102jMWA);7Pf$*E`sPjTJ8 z&HAZ=im|{5>F1b%+8JLZ?OmySpM~2jTb4HWpx#@qkXZlc6XJGaApMRN5v|bcHrTsH zq+9wqrx_T`7!tW(Vu;tjY7&S2i=R`NCV)R0i%gn~RClBP;$b@s^Y#3%3ba&H_a`fFZtL7oRWpCj0 z2Q*TE`#E|si@5`1WUO${wvBX3CZ$sa{-w<^70{oS9%1nhKwXFoRO@+K>8^N1M(T7U zR3to}_TQ$DHL%+w-Y|2i=AbkZkNhns*;N7!Ca(K~eL$0o(-0339}gwyZ8x7h9@h$P z!gYUxd_)AcF>9ejCFo66@_2nk@6?zrJZ%(7+!mhfP%6LwgWF;AUL0f@aim%<()=Ae z(2zVwPkWRnJLOQw!d_?9nMkJb{pOY#@PGr`xRorcO-BmE@;on9cfBYTD zK#%PP*%v6w7>6p5m4JtUA6uDbq=RUeb|{__Bb8xD?^MBi&J+&0&RI91*>M(Snm(Ht zDYy(bP2B-aJu3Jc>@)^ZWQ6oF)pNcEjCKDJid{6c&t4}-Fyz9hS7k(nJZ*rhwsD=^gv;ccfe?#??pxrHns3bB1DcqS|-*!?z52}5$YRM`O5UTA8< zH{9|SN%yCNPun69(NH<+hw$bIjUV;ZhwreQh^s2|lXN(Dcj(r!wV{NBcSs!tO^7m# zCpO_c8=D&NEd;}c_f=*i&IQQ^9AHocW!*^T|4~xGkz*X(oqJ^tFxq!(AbNV%f$0Q3 z!<~l=(?{Ex4cBb2aWq0dn`$n`!0+V3)LAu?0KspBk8J*Ht7EoP(YS`niTX(rM0 zq8fM8wm}WZl6d%`R8zGW!GJs{pz_@@e%b66dRKF3UR3V%{*tdrSgTs-{f)CZ!ld`E zTK-`Zj?(|f&1+swGp3EE$(*vo`=nLP>XE?)+YxUlYY9;Ids!WQUZoioWX(KarcDUz z?#!CTb-HcCnv=!MrV5vEJ+_NttMhAnMI^T4sEpGa^qF2faMtXDi+KM0{Y-Voo}TtQ zO{MJn2Oe+^vHNZI6WfPJXGf^jOOD{UQz!@6q&~kcMOJY0PcQWmYcACCN>khqLjwGP zVVH)4sTY_A-x-_jlR%lJPEcn5Bd(9HUlKUH-D;VFU4c^a)?}_d0qE64ia=#bI7TMEB<4@xK}%)as= zd}Tz);F4bkZ*B4VBxc6_h+jMw{7+~e9Bopiln63*BKv^!5_HTP;>M+fAFGnuqMf5Il7{;;YjZJ}lP^cxplNnO+AP%{s=F*^`KQ+nZ?!Bl|FLht-e9Hrdr*dzB9&r4UgqU zPh#U6d&XQOGTVmcYfMD_LQygwX^i(iJQY!y@a~srlD=)o0rXcw=Z4ey9@E|exIYA- zgh?1~rhToSA5LLY-rAk6^u@u}Y;sCM(p~AKuxe5B=?msdJ<7qxBheio&fdy*o%Kk! zYFbsh6igpqYlpZ+ep(~#Q$-hkYE0e$5U5oSDC%;jDH*0zgE$4xSc1>neL352f}qaZ z(WrGO^m4Tn_$^CZE&a3Rv6Ynk%r%^Y7*(kmTjQK1NT~~O zKfgN5?mYcb{&JB)%`xMWY8n=DzCTl`V;uY=-{1GJYr14E;Cy*?2MR-9ZS9qY$Wrf3!hoxOJLu(sF%&(vir(BHK_f4J zryoMD4k)0gb@0W0JD8MHF7!Z`ra4jPoi~l4Eu3bo?-l~TK6S9ugvZ$xW=K)}d};ct z%2+13gkbtmgr6AXD?3j=;E z-JSmkAP>Oz`0tk{2=YAl0((3O016Mu9vweX=tp!`{P>{My)p){%w8RMNe^CK!vLTi zhCYHqgJ7^FbqPI&0ygGMb(}Z&1SniBW%2erU)gnay<*>DiSTpptxv8Df(FkK<=$HX ze0g&bD!4SXKl;%IXI?GaVkQ!rMxDZJ$4+}*{CABtDr_ysvS=wH@T)V`naFE3&Q?iGCNXv>ZaaQ`V2r-b74ZUhQ=tJk4Ay^Mr$p^taO$1s+OicIn=7!xJIW-_bFl=1h3@ zj}zCN$9H`Klam#S3kS~Kl#U81IE=y>uH+X_xXxntVJHQdp=WoR)!LTC`?e#0W<~V{ZX#n%}yU7fL}{L)P)HO$PT%wQA1H!@7F5W-e7$PBV#Q2CQDPz6Ir9)nSGBQ8eTBxKM4eLWWD@h5)PqEUxO5X7~(jqJ)QV7`2q zGFXK<`7ibW{-IN?%R5pp5MpRR$eZ*^jV5$QCL(Oz{2&xJ|D*atXc_N;^xMEYPP6yB zvFpAv1U;h(iRuoQk2sJ7Ki%XH80=Jge2(|HnsVZBa^Nl2$Yqj%?Bs1^e7%Ce6+H$Z7(#exJ+v~StZmwrh zTBShMwFTL)vXA5mRjzcXzgDtkKeJN*$YOt=E8yqC4B?Nv2b*X=rjoWgqfg%M`BCcE z7l+w__S^w|pA&qq=7a<&9E;@SC^GY5rksM`-ulI<6iboq_oUhHw8JLR=mQj5d%;Mt z849ZkyK)y~WqG=Fy@!rC8c{>fqA*JlWd)zA^a4O5hoJv-CBlHM(OEk*c)0j-DRcsN z?2x$i>H2U#>O2kxbezwE7Vj>8Ja6|W1|u*LcN>$z1agUxM@P^IavS;3f4+3R4*34H zg%A675a$CPiEud4Z!bG&d-Wi1QGxH*tEDD9Zr3F{AIT=>AF=(L#Qbz#@=FJAsm+av z=MO@}tW)c9P*Xy6W-u+l3$6O@94Dw$Lx3&`(7wKKGBsY&tlvyztK3a)BFpc<)1glH7duNPY}nBm(&mE(Kk?bSj}$;|3Sy5#3#K#H?4N0 z^e35G6QR(YtB{%?C=#}e;4r=BPzcfua3Z@JI@000f97UZ<4lx5&DZjN5!DD=$QiS0 z$YhysGANj<D@`r*AMJ-h@vdJ zy1WDFL)bx{7|}Yub4R@UGP2iQfjUm7t_aR!=oK~Rh z-lJo5BXN&i{u#G6BV{oee0yu5F{o!Ob?F7kO};752sjywoIOp;YgW}T1$^M!JYz6*|;TAzNujgh&=rHY$Qi4sg7sQI~BQFXCQCg^;3GFlUk1FL*Rig36O(k z2!}`fyxN}s0EgLjD@4x#{4*8S(g`R9`5el|j*bCAm5ryxai0lKu1l*S?(0vB2(}^1 zwz^(TAFQs#A9mmC`n|iE0&RHEme%#`aY5`y9e`)$ItFdLi56VXZ8BlPUcIGXJZdSd zH-D#?tXk!z9}IpV?$J*0?weiD&1xQ|h2WFyf(AtU(VTahM|>#XfPGA~vE^LT z%Vz|@!9qhWDO%H zjOmN%(32A|oKV~rsavffMjz;5^;fH;n z+o$-lp&c4kcw0ix`dT;7cUzhopY6SXz~h?>*k1~hB)fl>2rPMzE`3;&6D?oFUj4;C zrHz;CJf4@A5r&}9N2l||>bEz3^E5!uxz7x@OsZy{?ah?GoSt!0CjXTVmpQDkc_Cq2 z>!!%h_VJ1OrswTQwgdsa^dzM37Mv=0^W_GpT5yonbMuE?|2~>GzTfurs7G=;Z7k>x zI(!*ATjN!&RrucbSVb0M;y&iQzLqO!Y7_G8NO+n(RFn`n(n-;MOI>Y?0O$Ya5Yveu zT>x>5S9wJ$2^8)jU`XA5B$7-#hRqftuX7K7o zB4F+4r%UhjUQuZ<5Ya%B=qF4x%spV}$_9nE6_yJ?*wnX16w`#W|C4R8<@f58))ZpI zOWdb$s(wyg@o2_cP9vtGgCkF9lttM;6ePf|q|-J&KFbE`M5Jjrsnf5AV4PzHY5txY zFEdoVSEBBIXNLc;71KKAo-JUg6rXH0*4{Vs3zTnF5Ubbu==w1_eh_9_Ike7`z{+*9 zFCuQxXrs7BGI!-LjaDDBg97NI2I!q)MKbje_s%kcpxe%Kuhz{3p{mk={Z!W~ApZp6 zoiFR!NKQv zWa=t#RN36c#_bh1V#SZinO$a8gWE4f2Gj}=<`RotM#{N>$fp{H7M%UbXKf}!WVR?JQrBJEEm9%uJu z(;g#M%RM+0eUKfuJ?FdfRenDSL2x|3M&sU3W|h~QucCShHC6!>$lKd2lWtH=Kwdvr zbxL{i{p8~^dNfOztJ;9p@7hj+k7p3=&;UJe{T%V+8b+}?rl*SHxdPyqsnRr|iO+gJ z4wgcZm+}?It881nasen8ZI*D$d~2D!Oyu!S_+V)(LzCV*`3E)AACK&wz4A!-Sq@E% zPSqKX+T=VD1kW z&O8eSZpA_rxJglOpxdWf<7;h?NBa;ud>t}++*CZ2RsWRY^bAni-S4mQ6}(Z7%W8u2 z{n1z??u*l^Q~K&~<0AgKK(U^w;VgS%F7;yWZ)@J{96)6yG;_ksW4&Mc;D-;j0yWW?Ukw7;SK{?Y;swKx;tdhS*j5kc5!>e-53$?pxwV$Duz; zd<@rswlbX#v{YxvxI6xKW=KLRU#*@oNjC@gO7@+h8z_fO)Gh;i(P&jb^!^!F^j8nQ zz+XjXWcSI^eZ=dZ1#ZG%l_(B8l)I9I$NQCEh*jdgUtP}*?!EiA6SZbX&TJSOa5Q~f zt6!}@Q*EG=b?rS^ivRL^hSBQtT2vD)xhiQ-FUglk3MI5pZ?)^lF;w(uV3_qWrTIYE=$ou2nmNA6SfHTke&_AO& zNh{!E+#LEz`fp|6`PuZ)Oskb$Akz95;X#%}Nogn(G=)`3&?`%rz`~Se4dTC`7Tlmq zm34B|1EnLs?end?YH0FAJm-~t#=((!t+5^!A%6plQZ#esn-4y}BVjNA?fMD&J>)|| z*!g*T*GZU=F*{K)CG$FRwoS*_O+x(Z#blOH=6H#s?YtLs=lA^5GnVadil;pmj{u{5 zsZNzlxndQjBfYlwx68My4RW*h1SwhMUZs9cp&LU3njFt*55%SAVus_AAlb&b&&wNT zT7sQ|RWqsXD}MtKp&eSx%@8{x^72Ig<$m_cMLQgHHAr++kdm+#nLahV%CoS9_!}IInx)yYEs(?{^@Up`fB)dx!Gf{_T3Qrk$WA8~VG#@t!GdvY;kPjs@ zt{w56!mPCpRVYShNh~HD6U*j#YNW9nv7cwm6>E$2p??KX_X#V-Wx*|U^{?xArJhQY zp1*5J-4hh#(Wmtp^m-*|)L|(7NvTV+E(6aT;}-QcsedAz`h2*HWuJ4tQ8})w!#b5G zk?;x+0dFDfd3HfD?PW70y|PLtk&vZ-aN+62qJ%NXc;8`ur2I}KSM3rvDN1olN{n59 z&`sbmwz!d(g|3ZEEBSp~^s`b2Q!nQa6gz`pyZee5*-{u7I~u)YW~8PBXDs7WOz+Y=f4NSfWoAT^shW?V5dp28Zed9&I>QthW@qg)GYhFZf7zqafe7;Ub_HjZH2OLn16hF z8l9X8X^c4Iqo%#1L7Kn11mcrn1d@{0sD#6TpE_a=3(wmgul6T|?=E|G`p6gJoZY@_ zq+Yb$Lft5IQ;KP_m!7|D^+M5Dn_B*={O$(6+PMXTkzEf6Fja24`8y?yk(5Z(a-)-R zrCHasYCd?}w6WYu?K$LhwPR3~9xMYadb~8QrPd{yR5KPKWKZbC-vyIlnK!E1owR3B z{9dZU@5G?6_oGJ71Ms6C=%kMLaAr&=gt+sh?qDiUGv+?9-5j$B$iV9N+`mfZA=a@}|C z6x@tZ3*iP41Hck)+v46r#4Y$}rb3X%rmVuX!dsn6L>+Gqc6n6rJW}GdkYM{^k{jKO z9!=+rV8$xDOU5&^5H`p+2~3#Zk-_#d>xpz2mtJ10?*puwe*W16v9OGFeA77k1W6Y^ z!m3|ubbLO?y#dq%J+xWEcpzksTeck^%()G6c-JD3$|ttCC-CLZ)DqUKhfs|rKs`B! z4fGdjj=PvKoYGe;lR_CJAh~yVg3e}h9Di*hV%}u&6Fh}rjX=>$xmP2v=Y^PowYe}) zJVBBuB)b;t0Cu`jRvrJTJGZ|(*1;NG$L1<*%U_#ys8hhq-C+8zDPJCQ}`|I{&JJ;dy$8z25fZ#7A=&tknuoG-Odj9@8!481w# zPb|7nLVZ726mJI{531=YjrCeevd94D$18}0&!Tn&>KGlaq%xA6Wo8?$w#}-D<)r~> zF-Bo~Zxo4+z2JCS9J&Xv7oDAU$Pki!Bif%4=F}j2w+-V5jKAcv=9Oll(7Sl1T(=9H z3<-W5CcPu92qWJOxY(b_=Ca?xx%9kuxh8v+2%K%%{_IhFxcU3l8hVyCQlj=Xv(iY5 z&vi~cnP#e)92^B)IP&Zx@0u=A4_8jp53`Ma_dto-)oT%YL4OmiG=ZEQl-WJ-S0Qw7 z3-#!~b4~v#mFmi%LYvpksNDZz84DB4s?VPH3v*Y0?>QPB`)oN}2r6p$K*(qlJ2ObZ zYphUn4I4bHMsNS&G#1%3*HRzEu%QpJoinOZikf|j?Fs}{1QlP0l zoA>8DZ}isrN}GWsZk=@xTJQ_i`@F}^f}3-*c;KY>$^|h7(9YO?VHR=vcUTDKkU%e5 zxtd~_P)K`d3VbjXs2)@+Aj6+-a2OfLcltyP#3Yc7wX#NJ(v4<|sc9GA_`aPj6gl#a z_90gq)PxII7Jpt!@aqOX-zt^Jr4v|8q2KuI)y1@p{Z^q|xCPnihVTQ~DZT=$Su$}h zb7HKq)AsR<|9Do6o&ULa<`a&ucH=F?x*cNQK(}J3_3kI}Q~wq25$I27#1uvGAXW?v z1uK`~f-8+~i76m1yUq|LpjY|KK;{&jEo5twkrvv2E*<&qKERDD@Q=wQU9(E}H8kWg z{uG}zlD(Aa$bpUnhoSjvnfuqz+CVHJ<>>ALhw^sm=X|iRO)pL^Qzo|S?MXdKqrAOkaYt?T<1hVl$wpDV*z39zT5J;WB?l)YRL+yoF5f$zv9Kg zrxHMZA4we(Zin6CG#?ad722S~Un~}$A51ImJb8kgXlHycJW0W#WNCx_w^WC3;Z8J3 znMUzR%NZCW_|_WR+X_6CNNd27irvKPT4)=w68BIPk%mJ^RpedS-zE}*RnA?2-iG#; zL4f>csTH0CFrZqX{Y`VOuQ`MR()vvAt0Hodj6Tu;b*K!nhUlPVB20D^?@CJzGE6Bi z?a0#4H&PpjFjlJ2Y;<*k2}d;eN%Q)rzy*h`z`RKczcp)16V^sWhsjkTrL+oUZB<&t)#vX6_#GC;;iDoQZuf?l0H10owIX^FoiBy%{UvFBt)D+YmjW4mRif=rAm4K`Q= z4c;kCzmO>W(+omqa$!FhDhvXjMAd(O{^W=IkE_O_<&3SW9?wBiT-Dfh+7n_M2h+l+ z?>_@~{qw2x-**C{uwTB`&gSC{zrUID7eF0O=Vv?ed`z$t`uxnmk%=7%5+Og`=&{t# zf5D^GFFxb^-qgWPh=?4RD@(k5F=v6uh0Oy5r#oNyFL_**QJg5I3YnB9qbjqSsIgF2 z-Q4tI(GC)10S2$?Y=wb#uS#9cJx2t@T~|ef9uu%cY>%)yr+fsa3uJjl3|KTIP#@ct z#+V4!P=i6%KGe^@7%Wo~O;^HRZFQcs*~TG925q!5o@FKFh+_->e6E6Og5NsIuO#%o zr(X>=sCaKNS7s2?@ch4T!{IwTY_U!c>itpO75)4{$bC6D(UAtq1^AqBCnt~6@Ri;v zn=s1+3Y2nr`rCq#S36*o{Jr#F<3suJ&wu1UqnMc=^YrhCLFiT6r5``^=Jad$!%bDk z=tAXOU$rykQ-+X3SPb3wIvXhWM%PW|*_t1WMGiCRnL+%E4}EzysCm=R7=Xue z?SvbAHspGw6yoCWQq@)sbkhnw1DAfMMY%PfeWK#|1!bL={2(!%I{LA^>kOfT>iBdn zd+E|lmug?DWk>d!gS?XR3GhU5_akh7!fbJX=+L26l=jZdEBl#;c2;lP*EhJ-I zpBc)-!B81*Xnv>wKepw4PofwjH?Ebm8<&(vc5Q=&;pj=5FZ=ynNb^Yh0sD~MJq(?F z4NZ})^qZFw+5TYce>lpW%(%oE6%9YMD7HM0S_N!B@U|^f84s3$d)g|W@==oR>VCg( zY1q`~&hPwZy7b(*vpuuFzvV^0%lhji!PqpMlQf8^j0Wx?DEOk0N#TP@`j(Q`09Np9Rk{eVz%y}9g;=DCuTLPtz zY?*f##v-ZJd`iQ5$_oe~zGsnTKLrV98?B6Me^)7*2V($l5J#l$E?=a?r2<+#H~C(b zs1N7uDxr9GhN5PVzY$_QCid8vh^bICgj@wT2A?Y~{@7a{r}Ep;{@?twT&li&V1-WL zvxaT$^*4Vezi&c3Px>_F?bQF|$AhDq5VLavRg?Ch;)C-;)2mZY%Kavqa37>t!*mH04GALXG&6>!mG!?t;732H#ZE|91KO>>Zl`i&#K= zhSba6)p~XHBqlCm5Q3t;!H*QxP=B$~@{V@B%ap`TP!Ark3J6GjeYjy}D+HtBKDD|Q zCk_1;IooVW?9RP?9n-o-!C~IG-^|-jTSL9>+qbOprA;~Jey)3Etq~t<$Dg8T<-+HF zVN52Ol+NE0 z%l+jz8Z)jSJ1bQl9H!`GY06WW*H1MCw(F#372GQWu1YV44 zcNtW0@1bTb772zIiEcZM0Nt)9wG_UMiZHLuk`-7a_f=m+xTh)Ui-hFSZBF&E1Q{j! zhr%`>o8OEkdX9`j#gUT~PR_K>Q#Nj5=L84!f!F0*&R|V0(!8l*e67DDSkT>q)bJ0ttq-0r@}r!zq_#ZD(3jUkNuH~aQvsVDVY>6hnBz|TZh zGjwvaka?4QszBRZVS>1NDsvTFH^-CCO4s&EZ{tYVkm5i%|MT_nhVgi&P}!J3n7Teh z!kk+_7prc}LyRESsi9O+O5&uca76`zg8&su^0*J)$7Lq$CGTxEewWBs`X4 zdDpA2@njQF)}6uG1|aM55ZR#cixtWY0VcqwpV>TLF8J&Bdflyk5SA8UF~d~2VxD4; zsw#F60{%pdI9jWGn>j;A5ff&4*oR)kTS%BBUZR;hl|0{TFdkMS%SPAO)r^aZ&hT%o&W6}0z1D-%Iaqub}11FtpBGV!NN!GhMLpLz$657=IAh6`!LZX?f!wfkYN$&gp-i(h!ZhiB@m4l z(-Dl&?nh^TG4cjq7NH>lm?Oy15d#4P7UbGcmk-E1jivx9E7G-LEcvhgZQby=-IFqveaVS6z;%5H6x^Ted5?&G&dSc!!4tyJAZwomGv%%!9dU(ZkcZQNVJKi~u z*bg_#gvAZ%&%1SBsTy|?(}tJ8v|53tzblPSX$-IVwXHI@f5K=Q9Y|qJc`xX;=V$tVj^NQkWbn_BX?*O8s>RX2Qa6R4lCC%|n5@ zg7@2n#%|c5xWh-Od8=kD@BWLlPg^&X?frC(gai@jd1@Pl1(g>F?Z73zcy?O z6GzH@k1kcmxq;g`kUZ*Y)ArefsIyuMr51NG>;8^G#(x&9r{lh} zzH8)i8ecH?KN@9^n=wIVd94?*9F)9oq*B?XZ#t+fT4DR-6TBX1UgIA!mQU=#A(MS( zm5PTSfo>RrV}~cuZw<6S-8hkQ$n$+rMpVcbX(j-dZ>L<0g*C$P0pIm--j;so5OiGz zUPkdeG{OKN_!bIGoq@@s$5hV1b9WUC65N(%nmf}#P)f5nMYeLX##zSstrd>8beUG= zkkPGVR}fJW^aYvK7xpXQOa6l}N0t84IB=YymHWSu3|svCu1O3T*&g3TpDi3nE6yfmEcC^Wn)DmSGY&nm-f^q9F%lf4J<-` zRadU)1Qxtj@m1Ggm*N}NlQCRDy=nHRSIQ!U$y&n5#AWaCt=oufbWI`pQ_Zm*!oua9 z-&UIMX;ofXI8eth$pwq^D3@$O{g7^5Jvjs9Q;Pjjsrcjr@8NBlD~1o#B))30BYaB0YZM(RC!ME$C=L|l%MNii8urWP)krYZW=h5Z52-w@q$jKPh3lQskIeaa0f8bqi>35aMmf}^7nsY{yi(3je7kN~<>~T@ zsUd87xh8I9xHu{Tj|95^(FmKb*3)8j2A*?#EfyVXU+b6l!;_m@=}ni079<^Tcpo%Z z{#H61{-H`NpHooT(!B-;+}w2!gi8r?T3=Vu7-`^sYukaMs_NLj5pC6}IiT3X62TGc zhhpwSBHy**FW+#-%}Smu#BgsYcISOyf5X`llKq8?1%L?-PtGu#Y&s+Qo1Jvy@JT0S(aS!6YF=t)1p&#Ycx!-H*n~T=niSJ_vRzw zW8qf#aGRFOzO^+LumiBaS0I(}p_8VK9_y#jwLtM3wX-#cnQ@R+>`GGG$n9XqmP4_N zd32dGOovjTX2pKLn?s22Zt{H6$if)Oz-8EC-7!<6RvllmFZ2n`oA!;K#{&KZ@{JcK zbuA{uP(>V~DR#r(O+LaB55DPE#HtExO3ltC2b1A51ZK=wwEPw?6%&r_cq&60QGHPQ z_xJqQ$u;d&V+#XzZWjh<#a*{_uvD`$CC)a8CAv&21Ln6KdSSLx)SNZN{I0}8>=!|l zTc1=TC6?i(?S1oc13~BW1oojO#F~vhVqq`bXH&paLM;t@Hl5!({ve%s)0h z)C;bpZw{S{JRb#9$GZ^0_%DtQT@Z64_fYI^utj3rAP%m7>d}@af3B95yP@7 z4XACpf1mumPT-Zssc(6>{>$#9<7o2>gbfZm!h!IGX-p)!BJDS`1pNwKH&%&O24>Pp z^HTL6pvaxX%cnTPHi%m>;0eGu+dY;CWqW@w=1#}`vj&cpW`hLXyNZ&fUi-ma^gm#V zt|MsWyp!NcSa*fLsCDNu`7aDouqkvv{q5CYJb={J>kBue4b4lsYt4L9B(?B9CNPN@ zF8BQEzG#g&=MdtbY{;YZVFvP~queZ*5Oy|~AY4&qx#A7@NIfY9!En4(P8pDr|3Z{! z(f{Inu?k=6^NM!>Qu6G4=RDQ|p(`59a*)&SOjYQG*iJb3l6eNsRVN?oOONi%FKs2gFk zhR@~O_ZqkvnuF9d!0hZNG2F+zS&rzL`Yb_9rTPEJj3Awy#Yp6Yuu)rts$)2Yxt{m1 z>+y9TBX=@`WTvwogNG}cCO@P54=xIXEQ5Whg}Gz!uFameW|}v-oD@qWg2=DRcI*`~ zOfKaJW47_(%cz3bi&fQqlN>Ux%L)4v-Ws_ZTZ-Mi;kkik(f@F)Qj zWWCN~Fe*Hh&9Uf_`R0q_V|?GAK6?V;3{5DDbu=85)Dlq_5##oS?IZfmad*`t(DI*| zGFA+$+s_le57+e1M~cr}Ra-VKurnlZ|Nik>{*PDUwf$6|mKtge{!jXn3@7f|a@_o$ zY6FLGN2icQCYFj6KS3TL9A_7?&B1bNrzkT4TR7?QkO`KyGw@`EEGq_h zP6z{r5J0s88@0d#JpBFvnXtKY3##vbOoskXu?`JF$%bB(Q~!l$Ovfz}D~p&behR8t zn*!md+I%Gn4>}fraM%aj)w^`|$$wV4MMkLocMB6-7ER0P7^knxZyP1}w#JoTs^|Xq zt0F=2_+?M98~tio%n~a{-Tp^T>^}qA^e7>l9^TjYC~ok#&i~*YTMh4BF>DHwnAJAD z^3>D0Bk{PfWb110wyT@-!VEKXz4EnfG(9rGbS{%4MCg1%p8WqDl6Of27rs>jT~9&Edqr>G-bZ$4Sz;-2*_M=JdY zX(5nHL#xx+S}KJ-d^QAeHx+kUwY+E)(>}U4$3r#=Mp%2lz|Xz2EIGYN{iyKeHLsyq zutmtEywq&%(OdiaPjrN|dk_QiFT-fV*yywII(;^q{JW*h;>@>9q5KwUwwL4a5*7Q$ z2r0;M4Q%YjV1TvdZMy*Mj>}L2kv(l=%0M$bER&P)^pSR#HE}?F&JgVVU*+fq^sm&x z6oodm1j9^4QU5i*?#TMim!r)i;s3s>Tt&PJx#s+~TmC}w+iz5muRMOT7O|~@w(;Ay zMK$Zz2%jYZ&-$bxji!v?(Y~llxn>1xsre7}eO*_uClKY=$y!sKA2^^6Aq3p#X+ZL9 zX~P5nQ{o(P<=bQoOT<3!8aFHYQi%(9Uo7ufsCun&d{7eOo4Z#5@o$#199xXWdtYC zg~~-v_%a#XsoB~7hfx%#I+9LWQ}wJ7h$r+>vA-5l)hE#`HT^u?q#<|3svdY=pe{QDytY8Rop%`Lqr<%!f5nx(GX{UO`ox{U=f z(yl%emifI1(Ru{m{BC5u%}?SP}bHsw_cEbGK;wt6-Pu zkU+_*^&5Q?}_3Vj|gk@&4HhOfr5IuLRnwy)0P7 z8oI^UGw{L;g+KA`9n`KrS~Jvm?Rw`*KH~8Kr#Zy%Ao`9~6N-s#UN4usT`Iz7 zv_RpyUmd%3#zPg={P@5otedrZQTMv4HQt5ko+=`D!OeG79{iqfVH(48A#7GIq5HM` zt8K(4uJHnY3#lN0rjscdHrL@Vog$$tx|>z|hTj`AB=@y{=$1(QLp%w>Jz)2UN{v_< zi>t`M*bcD`e=TH>y!WP(YR*TEI6z9DHD^A=v`We1_7qcbw<8V*l`)AJTrUG8V>=S; zvK9u+rUt_5#_5^#4Lj`|{Z=P~H}1@iv5KW9gpAq=CQnpOFyTCU93=C?AHluIyJs(A zBS$bD7ykrCMyb`P0)8pwzc&tGg>0@h7NpTyGl-FhSEZWv9RU}Vv6=ja11+rzo zQYFkcn0?%pdBa*74l?kZ9Bu>K(C?Y-{e8mWK3O&}Z(!}Y}%A9mY<)p{{j z-f`o69S8qD6|$nvb)=xfP`s$uLDm?igs%?}9SC(yV}?QGFjT|Q5eykATXclq2g}EUgxe$!>29sC!Xv;#`^a@h*aXofX9~Q zedxBv5cu7JvN1Il*9$b6aSEpX&-bi9M#c&hbhbJ;+47R`5d>7-?d@laEVB$+bO zT+M+y{!C&&@15w7QrIL#4gRPhX2{s9<^YR1jbq$%rM>1ig<%!)wuYzrF|8b1O?^OA zy^gYLeD`1p7slV-FHi_gQ zgF_y`-{*6cnfMVt1l;6I@;Lo_#uO1H;fA7@fR-z6@QI|79!lH{T~L~CCEHYKeps^> zk-6xxL`+UUCpoYx66@h(F&G{KL z6}nWH9HL_VF_(iyJ}haVBIK7p1(9R!!#WS)MDrnJwU@}_OQ<_IVkdA`w;k)?cp%Yd;?&SZ2XX(!1xlPLQd zJ{tN2mZM0V@T!i}_G1#WsOFR!>&SKyBSD_TnYE-OE2GDp5VM6xNLq<{)=Lq_iHHYL zfpP(x&ypf=@;DkUz1f6wfOSiRIIdDxoGX(dBkH2A-%_yUo66mlLxI0;M`M;JL8+O` zV_3gZNB_vCePD~yGK}e5{ zUjAB1*~1;%1{Jrl&;QPInq2I;?0T7G?=@x-#*}yO>}%dV-jm+>-|^kuzYq^9)6G%& z5a13b4pUv+vdx;PzVk4So*9;NNaupD#Qcv|Vis*o}F#4p$z72np$0SDt0I&!8TwC5F{ zh<_eRPJa1@uKVH6kG>7E%=zi9?o-tk$$7ZU!GA==u7npLX(`9=%BcLS{Sk#M;sH;Oglkv8IS zmnN2=7D`%#fu1(}_HoX6KYrprVoY!B`SeF?P1z~AD#5z?wXQN*M zLZsu*Si89|M#nJBicWlGwxVY@*Uuw> z2y(yZL8-ev6Ga4{tiI;^T32*Feu5XCN#XZkkRzo!oWfXF#H$36^!PyWn0A%|7X%7r zL-ES3h>NG=0kj8NsO6+D(?bR^Wv`f#s}j)ApnSXeaDE)l%@A4mD@zLA1pErHP+4C_ zhr^%#*6Qs$z`XdNcy%TAuyO1SbVcmu5^a}tHKb}u2&I}pmRU0E%Z6Gm+ORBhCv`RW zyVQ*?YG0W%Xdqm}8~EroWU>a{TsIdnWBOSJoXU#q6R`xWvEqc_=Ph;k&M}zB-Ub!w{rfz@x8 zE%cj2b;a|B_PORoGid7J-_hpNJUx-INQqpsi>_NG5v^or#0pBB4KkS$>aAJP@h-`3 z?glC^-1D6Jt=i86iaYSu7ki45?$m*##Du9abMbP1gZjr{Bu;XXvq_s@9(v&Lu+3vSWs$sKsI5JyW0 zpB}kz94U>6X={F&0-Cavh!N?b5EIdYRGYUc=e9}Y(vf_xRNu_0Vl(va`0Om6gD{I@ z>*d!)!#mY6`=9|w?m6(_eVvKUk_jhCIluAvy16)eSPDAU3Q}zJ^}i2175RX6U*<_< zM0hhv)>^6CJ2S)^I+Yz7!Hx^p1L$srzRL5*+_^}&$1;}nEKD#=h@|OYw6gvqC$8$h zlO?qc61|U%)*0MWcBqqJ#6YQUadd8q6NO@n8xci3H`%MJIWLQxshJns(07NzMl}|C zd*bywgC_hfo*7D<1#pmUFB6uaXZ*m*#WI0{?V<=v{STY(bPkrye@UKSR9?J#~eoPmO$t8(j@NVJjU##DH=Mm=pu*H$g6IV04y>nB#$NW(+j~ z8Z<=|g2lZuN)v-e9?!cLM-&6E{52}ofbiLDH2<^c^VVIhZ*-ov+)AhF3&tZ!Si#e< z>1!sS(G^6j28zidnX`@Zt+sc(7AH6-C|pGsmOhTUfpcT0qb%?bVCh?uxqnhH3MN+F(uFR?!BTXgp)~SrQ7jj{^u-$T>!c8&KiJ4~6m*b+)U>zFfe_)RHUu~QYFOp|c=x&e<@o`q~jA$bVOhchl= zpo{wcH8!4ZO%PRzd2CDM{3b-0{lo(KZ%5?SI%`A>XnWoqnL`UKtlaqgYHX?YJJ_SO z>0^kz;xl@GJcG@xDr#zpBz(x%qT-T1;3)6%yXp*>>u|+YqSA!0G`pEV3kFJfmrcEj zNRRuLUvRJ&U9_TIrke9K(ONrkR<;p;30N=6qo1< zesQibX$(KdU@C|PjQgV!uG&Wywj+tZTB;f^7p;Xe6tBK=)bRZo5BZr${s1+Ef1P)6 zv$_)g30z|_ZIvhypg{%*J>@q`yP`0p!Z}>6%`a;Bc%GU_D?F-Gw>KX^g9?2^ET4^u zFY^U~wh7MRn379{U)3an?0wdDTrK-;$xP{8%fBMb66_i?vXW29!dk)ROf0L(60ru! zW7BWeC>75?PG@np5f@@T@KUMfhnA2c@q8q+yey$+h%uwSNzn(q@q@>sy1lY3W)A}D z^?J-^4p@XGW(Gq^N7C3qF7jK zAXU%Fw`*9ZQ(jV`{OfJ$|16C!_lDP_Kx;rR!k>$bJvun8!SP`UGE-7%*y3avbFQNa zqquuhYl;JU;fL_qPJkHKxBf6~!TNikf3gD#*ps;KtnN9c| zrJ4mJ3CUrJAf1CwSH#hUMewCEIg{1Lj{t_!@p$i2v4_vkPtjvWTn)7seLjn|RkQLU z=9Tt6t1^=b7ZedY;D6(iZcCjCwKK!qe7?0t4c*j($_i`fzQ`$C8y^a+3DJy^U3l4G z294tUn|t)R5PbY9X(+Xk55L`hxW2th&3_q6^Z^x<42l56ccCDZjKDvTB5E$v!yfJI zz5i~}|9-Aqg)&G&m8mJS-3e7CdL(`KZ}w^aU^e+Qo1eqWrMtG}$-&Fxwh9TK?El6} z*C#Hv4p)s>#lTa9^sQS4`Cynpx{}3L9Zx0uNKT~HRO?CTD|f9Wo!ToXI?PMqr2bSc z%zQ_I#XMdjY<2zLXbU~cc4Sit)e5p_4lzz2O5{s?kSCtSga2Wj(R$E&XgPKBnj60o z;qtgEEVq&cOh>aZ%B`5!(;BHKCs2NuxzC0kzv?_FcpMe;lUD;XRv9VhOzHfQ|I};S zjhMN8J3K{po7r=%!66qexGXn_`Jfj!T^M2#_&1cA#I?XyKwSB`etoNds+Ko0#gMZK#v>g*`}@l{$@y?wtXOBP1m|hNa%>e4cZ^%9 zR#I38mQceY9Lwj9Pu%{gTgI%xFaY~mG*W8P`Kcgt#>YVIMf2gVmuH0TK)9LmwW&iM z-oojvCUA-HwU7Z9^KKmUGfJxzCe%;MWrd*To`y>CK13OfC%x$ATf!CsOGj*7aYh_H+yTlXe8b&)tT4i83@d#AN1TFPGETM`HQBpR2(R|5vk<(z9nkf@`z$2Hs$?nQwYo6r(8vi0 zL;t3ae}x5X*I8mG5AhVt%GWgf(i4KwKnQJEcZEDbNIUjP>d0(tlUPlGL+=Lh9I-a=dhnAkljy_+h0}nDhcju2NNtH)J+N49&g}fnQgdNs zGKxHJ`QTS@0%`&B?aW;l?=J-TmuMFmMO)*p66lV8*(Y%S}3i)p)o-ehqJnB?`=ai)?i|7IO+*pVLLGv<5cq z%)H&_H(A0~nJiV&$VC&A_`Sa%>)%&GD&D!27Oyq=q;5W$a;HRs*I4YfdPN$U{7py7oD4FY2f?k@Sgqz?fS*HT|aP`f6 zNpi1NW7Te?0pis!Sf!ddAwJc6icuZ#chL@8%BTu_LlHOP+rIM+T8o&3B2hwOj&NQ` zfYpPL!fRvEh-icufwNsl5SP3#QZEl`)^y*^UF5K0qz&YVltG=()X#wLes5Bu1LbrW z?r&M;F^Ugq_Ms(+rO&j0R!7+WX9+L2dyh{$by{~~=;pzYH@C#~y|X2f2ojY4EQ=r^ zRh2+aIby^hR?F@sI=MmVKT{lGm_2g)zOY;=ujTj$^7eXujJ?^1^HpTRa)|i=f)u5y zo@{(VWia%KlN0ZDQGnr`?e1d7sqy`anA&Qt|39?qq+2xujoyNL9L2=$g{bdu91uQhffrSZ$H%m)5oQ|af6Ed4CJ4b+dbOAi^3b}n<{Se!>$fhTRRp9u6+ zo5*m+UuVd?shgvR{OREj6GkPFJqq>U58#N9UH*y{YB9;+j^n~JUJ!)S`%N%s{HUPq zb=NI;k@}*%WHh!xUPR3w*N9{w{68qFRfAt4;#q9J==YB6XTNW0!U&AFs+#dQ{a0m zSYHEI5{aUfK$$Pi7IUp5&Qn3tbh;cNRz<;&w)jRy)4JFIh8Tfq^vw$-8((`xK!-Uh~s!*5>-s zcKTo~@X`923lset#%L|)8%j$Z5;C!0&gcskWAv{&gA*h$^*iZJUT$+$TYg56FVpbtwafpI27?v6X5O(Wqw>>b*vI9pE6>t)bu}9E69p!Pn0=kSMJhcllGT*i5f-Al zsT5o<-r)~;>ip)pdfb3(?COv0zku79eI3>?IpCPP4CR%%v$=0PsV~pnssNvnstYUq4p}D)i)*ko9nEZCUJ6Z zd;;E&yj2n*v}cug#W#T5>Vd-o3Dg=7!(R1&G39J1HmSEmL2+!WKC*H`1m0Q_-rk~9 zoMuZKQ)DhSyLNo+24DKc9+3Wxo-7c|(<(`J5h+v*k1w~+llH#Ok_hRFIxu_}{aEZ3 z{q`W{>JO;Co=~ zRbRP$5@@fuGSXhB)bz>uNplKXMV}l+*p~B|@m=HdPIXd7r3sAPH|;medcWA^^*o1j zZx5%^NM;nP{{0`G&cZ9I_Y3!gfOM#Ih`{g#=?(!ArIGFq=}wUtx;qDuk_IVh1cs6> z>5`!l5Ey!ZVa}c3y=&dSVAh#+-uLYN?EQS6n+x*;S3c%8o2snrC*@~72%=z&e}vD1 zg!om&b^$cL0&?#dQ+4G?ja0WNsBv04INCfZl>lUPIP*PCS;dhK_@D^v-7?RSg!!Dh)-(G)u(J~00M zs|q=msO{PFp~>B2Hxb9bHT7Ky&u*1^wYb2P&5AUPjxm%r)=5@2`J~#TR4m){N+p3XwVt)!6_zmlCQz!VH-P_E6 zSV*61t|HBE^i8AysorES?g@QQc4Yst0}OUO!r4(Lj+BeO8Th`&D8)RjePPFIlERVB zF3#bRD&AaPDYciCj7s?+{7-d-o1wG)JXf$6_zEQr1^wj1Eez*qxGWkqoUxm=d3s8Z z8jqBg{)842&XN$?%j$ilWx%}O=R#xOHnkO!vazhWfOm!&vl;sy=T;A}fXj_&5HHSC z#l)~+1j?97pU}MDwQ{uE9K2R*4fCJR_9>W<7$8T*cX;MV&6Ft+%jfE4hYJa8bIbIst=PM${!`MePI01;+m-N`l5SqEep6aGAEgm+(8WOv92E zmMz&l9*{(7zW(|+(O>vv%c#Iao-qEiF~Uu+rW#~0FmO8y+dP{mf}bF6M=MlH^K!GT z$0KA3&z~)X$!9+2DYVpst4<2x_MS6th2an=>P?U%0U{EM_fNooV^YZ!r+@!Qla;g| zWsiRWsO9yfq9qjm$l{4Uz86Ma)It|RY#g?m(I3D5qtBXkzW6oEhISQO<%`B`g@miM zIUid2a~^8P#y_x0#;V5-imkMxqg2A&obo z*LHcn3^u>wGL3>cp9WKbn-%Y-x2G?-)h5%0Nw1(`ksdInAD=#Im-YxVEpg!I2~+_~t%o|2 zgMWe=uUgdiS%Ju5*igWhq=r!KuR@4mtpw@gq-3ycg$mWQToJ*usl2wa0<=?PD!^EJ zPSdzs|A1s>h~#PJJu&0yI)BdF$ao$i%2=xYvp?Vl`UsGr76g78FA0hrxjxq4t=-=Y zYS(A(swMQQ*}9uv%u$z!2j5C#@9}-77z7K{5iu+)++CW%VRz#^CZeI?f&W#$?aB5I zs@>yEn+HRyb^wTn?FJb07@jpr=X58;vGy&$V;bWBS=|byf6;5>kDCy%9AUM#$@>PC z|K&=%X?QRckMy$V{)f#dva3`r9ShHap1BMg77xs%T6;_%);nT>cQ=Ed9B!)v3NsL- zcdWRM7^a`9eE7;998Sd(HndCi8lt@s%Ap^Ck&Gk!Iqw$#JZZE`tAz7Ox;;B@U~SsI z&iqq~aucfFU8yJO?m~NxYj!M8(r>1wT5EGdv*fFN?4!_hh2XP%^$qG1ZP6&39SckDVD>#exLx zn{5s-nsU48feb&R^~gW7zosu>e>LpqPz?7<45p;V|pa62QP|AF&>&_@|waJpL%N}k%{$WQH;q;%aVd%l5q6x%po`JnC%({OS_yQ9mOD zNEo1?10J*R!{t2B(^+B~A;9mSU`oyV9-4G=Sfzn+$kw73pZHx!l)4kPcAbNj#HVPb zQ(H-46P$Eii?J6uvB2hS@(A8BoI6hDbnk^iewyXMAfn~l28|CA+>CLciw~gm!P&I4{G_0Hk z4ba@O{TM?OjmCGk8#^SDwMKY=5@0JtESK7mkg0|zGW;XQs&)fiCui`abOk{P8G0hL zoIre6v6-x;Ho8~&h76vS8WKVcif@kXT-otlbuybNvC-1_k`ZX7o(|y?9Iomj^bRmVI}-Zz+)@=D^=bf z=vW!9ACo{QWAxuuI}=JW;b(K)&PQ$^d*D=PPr0uYzwhki`~YNFHnw1j(!aD%io&|2 zJNiQh95bZzxl{^LT(m|F*)TY}`)}2ov=|wAdd(iHa20Bt&I1>+;nx2k=b+rhSRAbF=y z!Acc8zIf8m*DOD-*|#f(26JJ1{IHo06-}?v9WN&nTUZdCoowf`?=VSACHvyObMJcP z_M6D4@%p#}K52Vl_8ceOUhi5uQ}dEhwqWYHYhxnxAqG*$}k?(is>(c}mP3DBzj(PdqiuI}~5p zUR9)0Ut#>J-|k<)8E3sPX_Xf=7#Cd<5*^T{U$evmkFm@k)-QS5UiA%F+}t~V8`!fidfC-iqa9~ihS zELN#p1O6*&LbN%k6|+2O!x0103JOTw<1Sy9`9w_12Caj1|+*`HJTWLG?84$_+@+WVJ7k;HBP!znX?1qtG-o7F=0iH zX43YbeQryXxz)l6xO;>%kmf75KjoS8RiiifQDldcN>m$9n$YKh-ZBd#1US z*7)9^eXG%j7GpEF1L>%$f{&r%Kf>leH{~)0|G1pifOzIh(e+cc+mQX`jeJ{(04H7J zwp`cFVZWlW_<;-FxG}%Ev*RmXnN*?#UG7gUO8zH~%Hq~Hy9qsb3Lc#4bL>Ru+UJ_~ zF84n8$_?*G&j#Y?UMBQ^&DmUADH1<}uzp>NqYUF%=xZgoZ-xiloW?(dF~7$_cTGcS ziN{NvkE+|o=eJ4F(m$7*;wpeqR|&@WY7qeDX&g4s!D{Vi{jBiW{nSY$IaTvyFaq1{?elkD1 zeaO%8-#7?5>u%Kunrtt)^|zH2w6mS!vYQgOQJt0F+p}=4;nbgGrwxAiI5fnXuIz3h+>4}C}F3RdqNw@B4QicfQjj7!zk%?6a z3`6m@>|>6WU_-Y&$MSB~-0tJbz7?;FY8t=81*Y{6y`V`=nshy@G#W8P;M|!8Kqq<5 zZP>hv{W<(MeyoxxdMWChoo|O~@M|+en_2wd& z*u$om1DN|ZXtmaV`pK#B3ZNlT%32MZL7F?f34l8Q;8dHKJD@wN;q|h#e71q2GTC&> z_P4F1zP@f5?e{*yT%7P>qiCLJH z6>vX*BF_QPxf15O9h}wzIXV%X`Yrsim{#}iw=dPtZOw6fS@lCOKVvTbN7s6qZuc`- z{rxL`0Y#ff21%5yUzetUtZwx&_^xOy)4J=knWC!Rc%7lgcY*ap0H^RI%d*LLwA=;H zMeIp5eELLY{ux~VYP2jnD<^nVLIBkE7r}(jF}cd?Shf#V>>q7~K$68b1! znN1?ahrH*3h~X>ft?g7wR_!V-+@B2bPh0onGVs~@ak;UCKRig4jsz&9-u; zKdU`}*5^mkZB)O-usZ=9+7-%ZCQX_$$j}2r49_=jsu=*z9~hS*T;Cv;Kn1O~@?yj= z?8hgjF_r5g>9lA6S(K~!k(ZJR9R2*mz=^Ci4o(?wvc+uTY1zfIhG-SJA{ipp=#`^Y z!Dmu;7IznlUMr~SuT}DH@Lo($V^;xIB2;v81m{wZ*p=2yibzoq=yWkLy4roaG{@kP zW5E~P_bSfqQmoGR!D_8=;-#&6Rp#>sL7`7$#+ZROGRh-TSb0{^N7QR8Ov=7KAV?8q z6a7SuE3kzh-$72^Iv}_qfoNDoh*&!pW7&vK;yK=$+veKUMW&4fmfwrwe$O5K_RPp; zqT*WF9!aF2)UiCBslIt8Hfq!z@y8J5x1*NZwhC#spcQe(7}}j9FZ%2ShprLPrSBEa z2j)oW>S$49J-WSe@$U02paaWm=@J$nf*-H-716CD@gc9kzu(x=W541YAW7o8RCu;G zVL0*g^>0J5H1-@J;X(I(dap_PTj=e98ZWa?)x$q9BaQ)M5w}$OhMIwxg~A#FkgQ=) zg*V9p-YkF$U?vtXV%jJ zQk+CHf!`Cnj8HP~6PiqQ(m{Is)_G7+lJTq%1>>N7$DSR#@`47aHvWh#9`rOK+g_ie6gAIm=mBx@Kv zuaz;Medif(os}T8w*t|UR*#!-I%mhRQDIRz|)07zH&7@IUJgf zn*$Yz{*kXrB0FT+N$^X#Ffbyqn1uih^*@C+SjHzir#YC9VXh8Scz(?cmynxqr%~e9-82@IkhWepU-98O}2{+Fh7 zQ}aDhX;;{7BhX>}$xhNsIJHdHglnaa@XM2W4)Yq35z@wGlv}^=)ntbIu!5h7DzzZx zgegKl{84E2tn|^~i%{5mgjdmmn){*c)Vlacmai3+%9=03CmxGus(FjGMXu>oDfBZV zPR~}&3Ysuv&x^doZE)E@n8OJ&gKdxMbB+twmOrqNuYM_yo&Ahq;ah%7x7Om|@=YKY zM-De%^Yur>@LqUX{NA<-&*DAgRgNd|5L+p??TUp_Okj(UnM&1G1{n7 zM$b!8bl%A{-Yg}^!L1L>7ego9buFFZXiP-j$r7QygUghsL&ULs+Tpej2P3I0m`EwH z5j*qUH@^n|bawNI%6`dCSWg{LVBtOvGSLHfYH69Iyz_{OF;b1XJ|dR2|}7{SG@;~7${ocUrNC0ueoV?^EeYqBgO zq!yl9nKi4BCy+@4sQ2gY=&(Qnrx2?T1V;f@@PX?JRkzyg?V|om$a?Ovt%F8=YwhXi zu}siD*Nplp-b)XXsy1&2{zb!I7?u8iIxeXu7HHV4@5f6WO1>D;MFW2f9s=Q83I-MR zWXhWkMmR2;`T?gL9Z-xiSN?5o6e)xBe}#b;Ib5OM=YBA^4+hWQb6{kiE~I{Rp`guB zxs`6b;jU?}J5TLKF>N{}~ZohG8R>)~dxNUt;B`$?d0u$mq~_2s~28G!ZmR@zqA^Z5Pn5jW_4f+)7exylPqzkLBHn~03y_Ft&vd(WCcRI_f{4G#)^V5ix%-95w2iFbI%tN(Je*(3m$$GwzN6|Ybm#k_YL z_K;@@N+wg=890bL=I)WZX+>39$!$wS(8ijBnW-UsBR*jK4mRS5P$MJ)Pn&R}hY^TAcFZxz=O0sk6@El52% zbLmbJ1gZvi&7A=T$>3B9(o6!2-=|Bl+GBIbyy>Dc@{a13i^Rk*R3Oc&Qra&1BnU9 z!JC=hHpcsUf9d}1J^r#zDAOr)AF+u9Es&}5{LrLtAXV!Cuv zmEkFdX4G2oeJ!gq!v!TBLs?c~KzZ8kRM5{3GR(7mZsT`BaOc|B3*Sjfa&?bC$MdKc z{CqC`YGk}!v;MPedpI!XYf>3vWOqP775KLLjWN#ErpPk4a@n;T=Hc1xLYJa zul?!OIn;&DnfS*9WpMEWVSkk6r%__#B&v&VPw5x__Q(We^@~pE@R;QER?34^wwT~5*l8k7`kF=Nm~lB|yH z6@1Gg7gpTBxiw38{;p#7bJ0(FnR2cp?sQNWM?SOR@jduwt4mp+cq=Ih`2lal{Q_te zl+f+SHbgVO#kK$#^L>8QY0~fG%%o};%dF~S6kfBSvSrgEL{hhk_rT9o2Z29< zB~j7G6j9UuUng#PdklD+iZ&p=oUvd$z-9UFC;1I(8D$!9t;O+SzmVJ5*MyoGTY`}S zQd5Ro+Pw_&&gbhHd!r>G`sOV9mhJ=kJVWyb6MF zzq|g(EC--it_jbtl{ zV37AU6RBjmY3KDUNPDD+|5kBIDX#zfMz+Yl!>=vCW`w{ z*BxOg{vuf&iXO(HwHa0*C7L5Y8Fn){w)TsIghFU;ELL_L@j_I}rCwBkec zSDT#ixT8xYk3OLYaB%xMcz1+tXV=}D>|9B{#ukGUB43Cj#9~%wetMO+MAp`bz+vum2F4-H(6d9 zA6e4gdvCO4X0YQ9O;ct3(op&4oSL^~Gu}rc5(SL&LPuYBy&=3+T$$DCnFz;383Iw0 zW|0H|d?2qsrH?GF!1b|=&SK?Yspeia&DPfD^w6Fb3xBN2o_*72maO9c&jQFc@$5fW zxWCoEH|Ihw-=JUq>v#s>{}n*K8HEw3!C7-TzR9QFJFsM#I-^%pbA`{)mTBm3n9L1B zrqvi^5JgyRLMNAb9J^SUo_!LQJIAnUIku5+W+anmx+H_w2|QE>-hRDkQZ+%; zWF@A3<>=+zMR_$UtGem}TOTuDOG@?YC_k&W)UAtMDD;fXYh_ByVFEyIypN_Na-X;k z4m@w|QnE7Z<~s|LuffiM)72-bd8UNXz;KPPkVia1YT?coIGV{J{0^<6%vzgbq?*53 znDDn1!r=QDKh5d>O8;>qOXjeOJKHKbd3Qn{340HJ@#2(>p4UfufLlZClWM=DnJCdj zF16qJyF2j|YMm~hsL7rKyxUk{@>y$_1MwO#*yq#<5|kZ?>=l6 zQlkboVw+{WSQ?5R&y(!6%T-e^_QnNu^6BbHcY1?GNQCkQt4)9}4>AuXKIlQb=?qYS zThgMmBZi0jA?^ywuW`7@FiSsmeMoGa5=r0$tl@&*Rn1zw;v>7ZU!hGHRe1eX3*a@Z zZvNMsX!ozb)Tc(jtlv6Wf=m-0`^!GN0W}8UdJ?7OIrzS*@<=`N>1>)qFe|J&j?j@V z7Oeb4x5i9&OE4>3F2E%^nSK#fNPD!cHL%ajK+2g=g7O#$@!UCE{#~N+>mk7zc|#Ye z?l7^(#3k#ET}Y80^gzNly4CU|x`O6qt#9=X)F&p^7z58f9FOR&f5`)^4;!y|IKM5r z=m+N>PKv84Rj>KoDEmc!e3V|JVi^+;&d}6%jNpLEk6d^Qu~UtY>>s~I3~Vw^sT6$f z3TqBrpw|<>Z$ke_75x1NT};I%S)7k4U5n&L-`@h|(+z1IBU|5^9bSGCP;j}mk7@i#f+k(Q0md1(Yp%KIV;*?CJhoJ%yWZQf@(c|U0Xxv;j0d&#o~W@Qz->>Faeq;_*j zKhHm@*J6yY*g6X{YW=1$y>F8IHq)sTm=mEPZ9Poj)UP&C<1q%WbAB4jl^)k)B;J>H z37`Nh@1b;fOU*HZJ?{9Wff@ez2;yQ|_pP+DRlsVY`A1hNz$;p$vqur8=QBPAneXf! zqxeU(k$THR%o14L8t=5uM}VrST^&iFIb-?IWz&uH%#m`!Wo-^&H3rU@Beh7X_48|} zDm{ha<1+F-H82Nv^fL&dXcQ3`#aG~0?jK`iuHQaAk=gLqm+oTDl7G&30n`0yUk7CK z5XX)to#YU*vN7l2I>;{;o=gjvr@kOGJ1$~0k9)z(21P{~dyk}uNM`aoYz+^bV$!sd z^lX=rpOAgngg%@K@mWedc?6SK*bw)Lkx>8ghz(6}lIZ87NJlDnqUc24Wje7{lC?ag z5w&d=0GR?M(CW^|;P@#ZlT*&~sj8^F!ubM?Zk=iP{cAxi=0TOb*1^&0maM zvs5ldPgDh8 zwO-hj!@XKKr+U`pZhRqDZ87hU=h<^{|)fQj?yvNK(di^931!#6=?F&j~3rz&b)b73=cJ*Kir|*1Jyy z;s_Uuf#ctv`&R0rmGktHuHS|#68~oob$SGA>u!(_>Hq!~Sthz2h5qcqEvpO;9V#6M z4L&q!pp(J63^@}=iEd9|>^Mc7`V0gCK+vr$=Y?Fy)602070P3_NWKEJl`CPKI^G8< zF4+j;FZ}%oQ_>xfj6*d|Q3`&xrp3^E-zOOXWcOYf;DRU^PB4=g#Z7ji&zXYySj`Fo zk@(BEE>X`xa?<|ATl{?aHz$~vgp8zU^nAU~>7eu8+_at&8ew&vZ<+X?cziIKD?nLm z^t#rDHOsH~)4v~-@gamQ{8i!_+nywlESmF7BF7@IXoAvv8{q8b3zZ3~hFhS^4JTU~N73bx<5)#|(cTbXR~Xh| zKg={22=y0arDqZyO)b4Z%kLuE2pw_;sUH61?J&jm#~k-t*JYz69N9NIer9pVMm(%p z`*>58(A!;u+Xmq0d7Jg{M-xwuT9-al#ic6yjK&(^xEPu^nk>EOd5$Ld63mDrn>0;+ ziWz^q+Csvrb-lhDPOcwr7S=pY^Hsu-PaVY$JH4H2sYv>FWQf3j8~8Pj$J);|_U;LE zI&BAxyRJ?}pEUc1mkyRC^`)%G{{b^%GdgMYlS7;NSE|$qYu_Bhx^(fD&5=0Ag3EFL z)+t@EV^_%@*^$KNt{Ia>3sqXB{d;v`1YSs1z7a~^x!VF$^f}(o@0>xv8h7jr6T6?j z>Z{w4N=Z`1yS{uVm9?%+i-fTlXlpt0Y*X9-?vDc}XZP&KXMd+7L}5c8I?IdzBi^vl zr!9c$*=mlv)f)KZ;JEB8gxM$}?Dh}KLP1rS*k60am2N-q0OW~Rje_V*o(5ozKt{%Y z>Njrx9*w30Lc<6d#byW=3N!x^$#1m5g6i++bnE_FQs;5$_$aE*e$eSC=tAuj`^YY3 zg`lO5I@I_mgkI|77J6`JHu?i2xE&>*`TE~C>|l12x1GofE&`bgMOtYb_Al2otjAC! z#qXUmgVc8yI=_ao5d<+bzvg9#3j4a=uXepyd{zlfh3rOZ9W?m5&JwiN0x2eK>qNSB zKB}!T$3y0@P`RS9Yp+UAwGt7KDOzvnxa@@l+EvdMGS>U05dFq%c0Ppx#Ib7B_Y^IplOS#~97U!zMxu$;a z0G2#bt6z~yu2wI3a-I!6rONe7UE@uy<25NeBSC#z04t|k%2dKs_IR=nlx_M0gdXeH z?~L!VIL$OelkX2GOb%zSfo&h~yUZFTmDtfW4t|GWFiJv)DNmL2t*}!Qd-0gAmoDGE zJ3OaiVEnXuxODKfxx>VF8lH?>36o^M^kQk6+c)Q6+WO%SvSgj1{*$@$l&jC9aeQ%p z>{$n8pd8*mnHwx6ZZa$XRxa_`j*qIyT{c*DVnijZfjSwYu2vZw`y`_K4H6m!*4>N4 zJuP@`#Wc}OdnWzLE#BFeAb1W3q0n^6mr930@wA(*SBNL+-ho0bTo5lEo5lSi5|8Ti zUu5~t`JaxtaA4>gv%txmg9JLDblHWi)Urw}4;p&Bd0RkyU0W#kD?8Ghi;6lYX58w- z`e7gN^Z0C2+iG?TsIK`@qb#z>VK3Nq7D|w#n*AxQWH@PXQ-*%LywNQ6hBWnQYrPA= zZQRyhr*lJ1PvE?_mnrPzZcODK3v@U0`%|tK5-sIwSnq8%p%S;ZOJ~!QYp#g57{ooP zt81f18^~F(5H#)(VJ5r+j?X1jIQOyDCd~W}jyQDb+~%3Xui*{*gC|hJX!>6&Sp=KV z>znr-TlaLrnFBJT-wt88*>wP$9`MJ)^#!NN`>kzEx^%J6Ppk!ek;0Ly;v2ajBIzhn zhF5o&vI~X%%DAJ9^wQydBX0d}^JJ}K?oS)>CdR^W2wyWok|@Yci+$&Xa9#D8WXc`; z^y-iq;x!SQFO{FJ-uT_n8a_*ci`We6eHN;OhO2*P4v2hK2N=7lXHDgsYlG$OHR^kG z>PgBmFQbR+p^uy`jti;v(3SV3885U5g912^*eM@(iZ_!A!Sae-%0<^WULB5!2bNIp z#91ar4fROkel9U*A2|D4;%aME3Ziouk!r>;XGS#nF<~mqm#;r}^!{3hdDwHO7vpoq zy+YPuo?6~ZDPv8|K@@;7|K;XeHW@Kf_2a)`iS%q)`%R6vedx6TblEl$2L|it$*z(D0{*k>Jc(<=E_R~eN_uF#{ zw(y&ZL7P~s4{G!ov2YHDCX587MPLJ~fggWVe%AceQl+nCQNs_j0>hzXx_CJ`o&usS zKjy~YBwAu5s0a`1T}s8H29(ZTCs{Olui>_D$O)`yCGcXPIpMQMf>JQmd-_kR0%5gK zO+P0K)67R@f2Y~^x_(ESen*JhE~ypzIC7>-OFFfaa%Mt-*R;itdV$TaDrML}=9QXK zA!pR-5%tld&HOZBXZM8;;P+t<-V1gq@NXc@c^v%FQYK8>b2Z?nCaIK+*-DTST z&fn<)3sv7R9Y!aC021e;?%8+4OqXYWntnd#(K0xn9+<-n#~p7dW(m8CZha1;-ge&4 z6{zIYCoFX2hZP86PIz#sZW&~YNBpMe%;kP8d-BMAlaTw;%0V~-$KX}^V0s6{#R zASJ7c7-{^n3_Q%x5gN^8b9a&a?Pyx)1ijoqkv=H1REc9jdX77KV6yCY;9;;9aHjK^)*j+sP^jQ8G4Oqy39@kNAGWtu8Yp7`7bG zumNr(p~zJ$0jCK>$t^~6ADvt@PH;wb*IgEX1J@6g)vJ}C#6bO5*)>V{h8*_iC5@;n z@-w%W$d#e8sszHvteSV6S#!XX?;Pziz^W3YNA!m~(*1n5>P;ae-hS^j^({_svGCjw zIB;4F8(dS~?c?1s|M4FGa1rN{;7vP<3tx>uvTtIm?HH^hy8hizn*mJHQ-TgrV2B#q zd~mKAsxeisj$(iBa!3|9hx&(U#2Q2foUHj}aLgGj*;pu-rjlm!3@t}p^<>53yeQ^e zI<=UI+!6(s;p;%Hc*0iUE-4|d>!npwA3!*zVRZ8YUqeyxfuB99EVjNqMfZ*91n!zE zio>(7cRJZ?CMs%Q8&Nj@Y_(#wt36G*JlYkvH>aa@JSk@awhKi-gfF=39Pq!>>2ydx z?Lvz&>dh#>qj~)7%W2vP^Y6(6&_gEQx}g564-xMoeVGsOUqn%{su!Op(@Vd~Hw!Rr zacg+2at%KJcBgTfqqV>d^Cxvl$kXiqA4JBC@k`|sp5LW9x&_y?LK#eNn@k?@!GDY< z$`-?`!-kU>2uHZ+b>8|O{imZl(d7#?8tamSFF2OuaRK$;By5L_tA!i{MO^{6vvq{L zGd7{`Uk~~1tBQb4Z2YNazyDUb&qD)8EoAU!$TJSm;6oqq7f_Oaq)uy)7f)r{IX6w%4>(Mvr%BzxBP7 zW29oJ=3pKKzERcu`r_!g+pU4Y4|_zO$O`V;4MTsl;SsWaQF(&$&&( zyad!eww)|9*RJ}$w-@N3aFE;#d*$)NbC1&>+p!&AMskt8)_9YGWLU)N!h`xMe6MWY zL*Qzm=Z~bNs#&S~2xL9{X6Hk-o}yoifPtFCjm%BWM~lA&2tuTk zMO3+(P+NJRvxPM%O67(0xLg}Gh+!xRul5Nt%HV80%70sq?_c@!S3(1g4+&BlbtXkC zPyMa|9IA|5=&GLAfB5*l?6Q6)j}F$J1NwA*-jRt)l{msYv#7kD2Q#y?MLLI_GSw;y ziTaNFCICR^x6RfG>a?wCk$8!qWqw)<)gQaRUdEb4QlzUUpv$nUm=o|ICL2f`6-BzW z5)s=(f1?vEYn5iGk$Ifip73G~sHA!8qWVg!Lc~I~np0m6{5+gJM8*d3pCP_(JJ;N! z&8D=e;rQ_;%iH6AE`ylTee)LnmFoQM!i3Qu79Z15;s|28Zq9(46HU4?^c`&s^kDzT ze<%KrOwQ_upMHqIHM_w+zgyoc-92SQ_ix*58w~5&-!gRU>nPP{QD6Zio=bm3r5zVC zeZf4PTqZE|5-x=b0^$?EGN`xOGt5Lmii&{LW24ylmFVv~yEBxv)^m#VuRt|gIv8N@ zg{az~9&C1`tN!?T@d3}fPOJ_wtA4=|PW;>KIfQg6gNT~UCZ)Rv0gIuyO@La$;ZLyf z-U9>P-USiIC+|5YaW=<}r@-kKIw63Fsv@JK{+wtz!D|Y7srSe+semlnT2QgglKAAI z6i5E=-fm&(!Qz=GVSmS$u*A_G3)M977+wKT9??qmxx%0+Aziw|rrSl&aoP2eIp!x9 zl2L^^K-5i!%R{c!{-*3qiC36W;6XAw>Z;8>z24@m&)PF+abBQ`ZE;#cX021eYV*~# zU6fq_%I(TuiUGj>&Z)mPiTyWF6Z#%20ot+c@N96102~cVkEz=LnZa4*S4b=soEvqY zwHr`xoxy*qL1MCN>#Bqb0S|-^+D=3=n_q2m(z^&c*u?m!&)~lFe-v@bh{-(BK1=5hzsTGoT^~Fo zNotw!k6@i_ZmDu{|5zX0wyYFi3JKvsBF<^%Ewnl##Oyv1OP6T&^LeT8Spu-`ylRv9 z9O_C!HtXtb4J|KiqZs-v^%kvZDSbH_|2Un&5yC@>jjqS@=t7HWHwTPDJXS$iUbzc~ z`tk}^58P#pIgM0g{xsa>tupqk^g_jQiW5F&g-f2^1vM!zgl>~p?mp|K5XK``^9w}dZ_-sEG642SMc#J>1t}>ueea>yu#aQlpwi?N%hsaobsjZ&x0#-(m zOVi9l!)a(w0sVt^BTy2Z3$=CYBg+v^j!Tz`uc5sEChB4DT_rKf{Vw`qCwUDEA#5I~ zd9+yfq8Iv@B&kLnIHF+@09r)B-0m0?l<1e2&qV{Nbvib3vKLZ9n4OQ?eXlvW(F>b8 z>L$%kSV5X1SjQ{fdCz(E-XGVKmaVCzR9aZC$sM9D>CMR>yQ%!5aQ>Vrs&E-8H@Xd~7mDs2h-?+f6(*tY;ESML3W`dt zAs(bglss{>`=kmQ*gE>ifwHK2NgdnFvr^ZkC5^gzw%mm@8y{{Ql7I!N2cMkPp}vA& z_tk-NB5?C?rO(xcTS%XC*;~RXdqSsg*acBQKw(o9VD-ygh zq^UM>0rYk2I^5()xN7=5Ixf)N`XgHUjKIDJ$x^R@w@Z~SjIv#GBYt^Nr2q6iP5Z`X zN%{g%{rKPI<9}YQuWj}!YAm290mL4cq--b-rK3fr=9ZEB3v;t@{Y=|%R4q}c&iZ3ah(;Npj66+?&J5B(m@uE z#^~9UIFgE6%OK~9i|+Nu*ilc2{?#*Sm295$dy7|M2rll0VB6@iIUj24j3jsX3NW0& zSd6fC?LaqjKZIL_i5VEZEl_C@|D|Y)FoJfYj_b3x24FKj@m9|e zc9!z*CY{ctqlUzgZBB%^?*5xAH(60H_I6lT%J>ZSWv6 zk5b|+-x=ixH|%S)81g;ac=n0)37^JB5S}kty=`jvJ)Lp9RLLqV5lG1483;rfYk+T{ zUzbdXAL^`kx4qj}oQ{E)JntS8P>x6B$*oG*xP!Im%;UmH-w=jCEYRP(2LF6<727VF z*wH8tH7vyCei%Q#aZoAvCxBo`7^T2O)>&Lh8>GR?5hOhWjp6m=8nmd$fxqgAz=Yww zmo2rUT^gh;We~Lbgb2Ft)!QA>B*X1pk3jj+G?PB>0;H7^g-@c#vrF7iF$TE9ygi7n z`Krfn!Ox^~R_nWy#>vY0FaukdYTAY213PYiXchNX9`VX9jGLXq=ZjJ(QIo(!p)yy{ z&C1aepYJ+{`2J-ye;K#7+&CcN*L*z#@c8GP5yDy`Ep9(aL|hHQ{nK8<3<3mKk(XzU zEW`X8W7}!-A2-J>Zu%&G$p&0J<+bU)+cySHY=1ubnZCFBb*U(0kQzckP0FH>-1izv z?CRQc2jU15zxZ|F1o32vaK#eF^cEDe`d4+*ZSW2Dj`iIbOXayoI#SnF< zExxSyyW?;g1FEIcG9)GkfB9C=rqc3q7eO|SpsgzJnkG89Avp0G86e+;XF=I@W~O+f zOL7lLC_Y{v%Di{Vq0O~))Cn@G>jqVrbw0ocUO4(qZ=t6w418JShJTn0%-fuolx}gZ zx=4*APb0rvs?3?bnx}wE^E^B@29UnnF5ADFTY#+7VXu-&R-Qen_US07Y-h_ zQYVMY)T``PPL3C2_v{L53zbG6mcesnDufO&?)I}cX8@AgO%rTTQ?~iySd6N zjT4`@pk+*4`a`Vg-mZ4k=}+eyp4Eaus=41=KewR%f)@&c2pll7NIS`hlF)m_>aM&W zBL)oglvPCnTDr3)mC(j1lHhOw7G%bm|9! zKfYDS&Ip6)8L(3-Mz)bk&xf8WXH4Jw8+ z@j^6nbvxe)iX}xVkLrsMEUfT!CUYXeCB+VKsX!6fRURLsB;IHv ze?VQ3ziPqwWiio-qs-rZn^^=JdX)dhuoqfCNCV$epZVQ>4d%g&6amk;VGEbZ+Vu-EUba&{31RmZpkG zi$8-*;u?RUN980iiXL=kC3q)|#@1f)~Ckw&@^5r&c$kW>(dZe-|Tn7Q{IKi}W~t;JfGS}P*nxqkBxbgD7BXVw3bSfFiZjIuCytxA3f5$E5&y-r)`X^KF5e;SMq zyjYP!z?*T!Ce}zOY=XU#a)3Ma{(R~%)6=fEGNo&Odv9L8oIV${(w6qRt?p6wYH{a6 zOWZYUJe4ORuBp9J>J}bBN{XqJ!(vMEx~=^Q@~I2R8B{G`N84oUfWIkc(IBF?xkbct2~TT*>=p{V1w7x9ssU`efY* zdc!)Wl{9V_Mcb#3zV3a`0714$unYhGf95s}d+ho)swv_konaat8@ zJ=A-XA+|#>Dsd4ll{psy_F@GTC9AaT9<2&^B<6`Dx?eZgs9;H-0VybjaR!QU>(Nv>HR6bmUC*P-SN?m2b;FH;sx_u zn5x0k3Q^6icb(6F z1a0|mCn=(t`HoeHlYcz2xsY|4xF;UTKifkw1XpHt=&EsNnOr*#8DzP;)l2 z1!WYcHk&7O8YywyOxQu#@{B$wGYKSMzMeb=62$T44jZ_ar$_k(zE9B*a4O!gCT{LC#Yz?@S9!S;ZYPafCLs7 zikS$l_Gh1xF@1&Zz%}&n$$vH6C(qtrqAZn%!{4nuNMxcZp7OjQS%e;f1nrfo8qu?L81Zy&jsUdy0 zey`bSl);$A&GQA>?g|@p2G?iS2KqhoYAnU1YO5*s&A-T;XwbWvYll;LWEph0#8b}J z#3?JwdDWxO30&=przLcIWI*b-mFXiB{TY%!_5 zR$pJztT?^EgK?C~apdu8QH3}DytStZoj*Rpa0KZRJrZ#r%lIBk*IK*O@{}H%eLw&7 zWvBXTiMj>s6+_YJ8LnZgDUu8!F2lx!AB2i}HPp9_!p-a`pRID{=VYN_z+B`S7=zb* ze|XO_e<@<7)TXam6CbYE6O~FJ3T3qRCy%d`ML!XFA~W#1(cJrVHDEu=4-1vRG}|fo zx@K|)-yHQYc6Mntq*Zo%YaU#lQZ?g)d`KZKKs>U|CMzWS^n*CYl&O3=j^5NDXp_Ob zQvG=r;EF4s=2AW1U_Qrd{=x{>fsFtzJ+AorfGth&ZqtXCb{~T)a4BQyJr3;5EH}#B zG9*IU*_2x3tytnK+-TSgk<^4^x)SIB%vt+F`Ei|eal^4!(J7L~dAaenC^v?}Ux;WY zyad-pBqQFw9HnRAj!9rwP{{~JJ*3BYVo3`#y{eQi)2egU;DPlaC!6MMo6~2RBB{4` zU~Rwvh#`vWeV3QxfUWPkFF5Pz@#BPwZI*3Q1E&6*1Xu8V+E^U%H#|XSN(Utv0h=#G z4jJ;~Z)6N^-hRw%Ru}z~GqCl*d?i_Y3uDW|cdFGt$hZ3SPll|3m^n&kweZkL zW-KA6{V_&su291OA0+r##zTp=yb~nLE0Zl9F~GNP#Fm<5#mT{$s8Bt2k z8i<(w{Py>xdJF{{#*`rqQa%cWV{K(nxjeuMDC)?Hi z@I}_#2cMt>D97F>H%kLb813sooIW8wYY+}YP0m1!VD}c@F(Rq!1#@8+ z2T)OsTvv)(@N=iQAq$X&={vO!eI}YAD$``!ZCHd8_qrUby)Lsz`u>CszbSmurCPlV9F#-j)20f|y!#q{qp++${ID=u zFCL8Yf$DyL?|Qd3C)n|3%yxe=p`mTRd8&FaU-YZd-8IRd{q5RL3->!wxJ>?K8fQE5 zaj{oj!1z2x=)vbNC^>@2TopH+L%P4|cU?MTyH}rS$ukYG8w!|r6Q}k5#Y%?}R5`Qg zO!6g@2^-Rd6h2*c1enOfC7GhW>n#4ZX!ujrGw|_lMs=3-yV6n>p?CQjsG1V)2gCFY z6HZt$9(&rU#&k1`T#v2ZWWnPI=21sv64U30_FAhC&WX&{vm1c%-WI<#l$O=!xgRB3 z)Z#xpD^sHPF&0ig&s-SHK&<>Ybml z@BQCYPWOHYxN;Z29J(kh+vu04Z#Xp4>BZ6MBpAb26F$eB!JP_f1@^m9mg_&Ze#XhX zdU@~!Rj~+ERZ+?c>}F}%S2hMx?H#0q!8zk|RMc=<){ljF68NR~XELH{zcF!lW!_cr zm{z}phZ9r13zcRjHf`QmScczgs(avEQpw})WNvKMM!F9VUZUzMQ*hgp)~>>qM|V@C zM8z>qB!qeB7Dwpf6~?!OHWW$*jM7h(JwzUTl`5RAex~Ifl`ndK4IUH`n_SxG<}+0| zCh0Jsf|maI5l4(kg_^A>Tb9=xM78kWE~66fnRS=oB~ z8S62bD^>}!WY4ZmR)2&S#8bX&(-bhq_*18*=ax?iUfi@7MFEL${X$QW-&v-9fjD-P z+j7;8A0KnM_v^V(*Crae1Q+I`a#CB1Go1T$u^-h=62$oG;-Dkhgq76n3*sdQ^Te^y zc~RO_)yW$TZ3@-5#3586{e!rBT1~terpPZH-N6+360QAoxLYa9M`uYIx*&O2jKoI| z4+l59hA_pmtah7F{ZO*Dq{l%uj(k*0GnxqAGzY_*pTe-ll#3et%$H$}8%SJt7v=tJ zop2KLsSZEl$rk$DI)!zv>ec-|*jh|QPa0Zt^;Ep$UYVsTlIG!9J)A7RgSnmKRY9N% zkE7^LTt9S7E&Z!Qh%6z2!{(@wbnv~iUMge6Zj)N8!)%tsj+95St3voy5~^Z!_XkNJ z6Y;M+o};cycNRs@cM-2vUjJ&f0r2BfKN+jE^SndUJ%1)_-A1vNA`8iUSTa=go`)aI zNy)pILM&hY<`4gr!*V281rFBR*4Oe-iGHA4Dy#|?PT;769u`&6 zi%14;?N*d}*=v(M&7%dLd2|uA#x1vCN#iK+j~;y%le`_lNxx*lfpDSQ8QN zjh=aO-1p5$hdfJ<-+}G+^pq)mQ6(bR&DP=)u6MTeVDRNS{o^j?^8{5h)~gn^dF=sz zlpda04p;Y9l@wK?dUFYMnE!=+6#w|rXNqZ2@0XU1IfXNO(3XKuTYpqSpZxXxSm474 z%kz%y($Poh_~~8jU`>>6?BY4o^+a<|hw8(P4)aGY#D%-`9TLdxiEKfG+}5c)9q`3u zmiytG-m&{z$GcT#)8ubbUe$2`Pno8}@%<3BZXrZe5p%*qSp@GKuCp4~Te|Pnb+cyP z6TmfVXtKYfW=BQdDM}Mr!J(10(X@ak{1f#b=_&H6;o&g$HnF3?k0dXZWW78c>`Zvy zeX9Bdx>qXREzK;|#~!`AJ1LQD_!^tk_=MUn120F`P4u-k+i`%@?5Sancw+sY-Ktdn zKCBTX_NZJ@^bk|&42c?ycwB3txe7aYPbzY6XQkfiyivaZUAOjxWts8i-zE3(HbN;T z{+qCguSCp4YGKDiVJ0FC8qF_K$l5OV)_I~oJ(tuTnGEI7?8kf7@^9)HsQ# zVKqxwPqX$C0#kh!{yb3pb7$+LJZ4Pk-q9eWdUd7QOVHr$v$Av!K!>~!W&S>!kg#^z zyI(Wj^wYv}mP;#S=~!!bZL!rwPuOzc;ZlVJwvaY5{3e{cgjW?jCr;#Nzo?u@gDUR>+_?fHl!lDX}u zU8Bge-E)L$`7cSGO#MLkGirWxDYW>IR^9aef+B75DuER@d_Lr(#w; zY0}YEB%Ibi|7>0%IqZ+#le74MIEK&xhi^l4J?5oP(j=|B zrxUk|HO7!sY^mX%tKa8gM5g;x9LhOAO6@z%ZA;MJhsZiW|9bT#$zdQ%hYXad8S%f@ z-wM69!!;om(zbeif6&b0TIi1HxQg0+!npttT>FNz{w1gD5sbHnE#Eb;$T_}z(jnQv zG#fUwm;!r|@rnIPUn!-{q2iPSaeV z^HrI#4kGSl?tDd3h4=->enm>H-+Z!Q>P`*d3qSJ@>}{N=?&_Zm5U_0G z=N*5mgHzjE-}n7ne{wNxy}6*I^?}Dq_fNLUebce>hODuMdkT)Tzn{xP@$6~b`Y_Gb z1-gr-BLa}%4s*@%t*e9wc9NMLbWxX^)(0PMhQl71=4DHFYI<*ejKP}$PoJ69F_1qO zz-f+8Z;f0a&Sq4cm0siqHB;^n8ed!I$VMpMoU3mgGsY^}MP5LL2;++z&eYdGLDcqVOC52K?=6(f|KOu!hZga%A_*tS_mVmsCHE?_~(+9KCR_zAR z`zGwuJ`WE)y5C*;r?>pVyP5OHrZLz7p&~Buoc)lnTrWT2LNGk4%`Sxa^fteH(;o~x z*K;T5?$octMXfV#(_Etnt5ilbWE{w^q#EBx<|Y?R)lk> zw4HcqI~)+~&pEtBC7N2HEg<{|M_nApsI|UY@^cN>9bHdhXp5*E9sm3~4koGTd>#7* ztbf1wgMNg=GHR2qA7qeUCdbtUMEU)8lGoC3>|0mNBLA!@BLE$vGm< zR}hOxYSi$FeOfop!}R>+^rs0t!+I_oOp<3|U z6i%;_OS`ZH^n0M-mgu5y z3HsaZ-r=(MDTZoAS4Zk7zr#sKsN3nMEte|N-o zAe|vgwQrOa_$WudF7yegf|h^sz$9C$Ux_VgZ+OFFUF=lZZ$v(ZbfwwjIO_Jysg37Q zP8daIM4PJjVS}ZYhHMAQPMr3G*bw-sXB)jVUSd4+-nTFaOm0mK4ZSvhjq>s~&F-+hCaY%#S4|N2_$ux)CLyKRC_*>NXZ} z3AIa_NN9S0HWZOnuzg*41pA=eSFk>W$ex530C~^Q&D%W>e*tXF@lS@FMv`@hrxfPS zFWt4hO{gT?UeyRT>c^GA;FthYU#yq9o0-*3v-)_?rt`x23+YS9`?0ZOsA~FQX#WtO z))qOxfk?9!Mlc>exW6y#e$ygmWM&gy@vd7;f=jb7gpQQ?`Kh!S)MUtYkp z@F9(Xv~nskQC)uInd#e*DXiAY0+BuKcJFj5u7u(?TR3hL%uV%~WvE^Gc!UDb3OIMF=ItG7wp{h?!M!+@Ru=+s{(OZ@BtyCK z#6Ndx>yk!*k@&0vNb_m!RGEGXQ=4T-rhh3S<1%jwZ9CMgL_WhCDP#ETrW=_ktA6X% zA~-Tsm`uzmL=yA9Z}iQqjbc{897X3ZW2Q@FKQ%FfxinX?z5bWaXHfIQdhv4_`u1i z`a~%fyn7DTx5Mi}gaQb_dO?MwujFPS`QKAW(%-H|ux4o$rcl_BVX0yc85#oTs?Dh# zt?r}Yt|O|I=A9cJ&q2NjaK3hFL|WVY$8*2KR>^>qXS$H9VA%z0onArqZRYKx9MEYZ z$l5BxG%g$(I8V`W;=M?%@+q367VlHnQ?QZ=Lt6qqlk>>%PBJ6ef|=oZ+O?;ESGVN{ zvflWbgA?dxN?cm+c5P%*b;cX*C30Tz269ZK$g&~s^0Z@3t@T@timV_diBd`!>nY z__`U6`Y1S z^^JqRx0UaiU{+~~BG0u46HUBM=UlvQZGa|cyxqOW4(db!ns$6wj3crVyTis!+5^cI z!RN?HX0R1n9dn_J19ib^BieoJ`#%d7$^9Qn3H*NFE2VPnqAhegh^@-we)RV!sHOm2 z-`4QDYyS~enNQI9HLbB}@a^oBqT+lP6b)fsA=*0VSi_rLuDn0}D=rRio=(DhC_{es zKTzwQF7EgWwHKJRI8X1>h!wj3vw?4z(kkRB`q#6~x`S9zt|~IqhlvqYU4)`9Ff>5d zC1bJ?6w601lU?-%^ZE$z{L!M2KX#EszjClzxVQKA?7gn;=UmU>)A?LFW5Eq-c8#4v zySw-WPXIPFtP~GaLal#N7L%ti6mVJi2+g_S5fYjQdaUCdftpoubUalUXa!Q=@2H20 zdu|;XU4PMtRbu|B`Nlrpg1D0PVdgM58u9+si^5e6E)%Bh33g?Zg1Per(X^`%TBB;u zH$(2O-m0YOzWmcUF@heO1L{{m*M-98tIT|{B4#11B69FvC&_@0kmsKsckNkQz^ItT zwIeCmCJu@iF;qWC*<&4VzzH+Ea4ICVo!!k~P05+^Z>SG&MML|oY>YMiq9(KsJSRFr zZQjl&RlBQ-MZD)vO@dkod=kDJ+eivb3S!{ZH~|BV zjUp4ryKO~wL1!c*sS53Fqp%{t;^D!uZavMkk{3>s3F*9 zJiY;t5Eq`fkaa(!}O~P^L{FZ$(9U|U5*l(-dCT5&YO~fBC==vHs8iJi+Uf1o4xv93{bhx^IBh!n{B~Br$)`T)$`S*R(iW`zH zj4VJtO&H?sHOKDnj<~w>x=5vAVGpAe@{eyH9iu>3Y~~XzA|$Zs%^v2hF*xF3FzfT( z`V$yZQ#h(*(Yu(&uzO-5*lMJWnpLH-ObLzpFi+5O+5i~T0C{SLEdX#Fp~89mDB6Ige+E!RJHB}wQ zcIXJttGM?05N=Jk61ME2-+-lvh_rhC`=!|Rrs8sa96KflPuV)wWb+kULYGPzFHJv( ziG-VJ$j)P2@(}WT&_P1=QG_D1;T)>y+;UQFbLu9Pmp#;Bid ziu}Cx68#dh&Yj+7NJcNjMNBn~6t!z)pZ~$R*>Z`SoUsJ^iKJQjvaHOL8V}XCsMK`&qHg^Ksx82Tv;zH=Fp-Ff}C-5djOvh0^uya?l=y4dvZ!5j#=|{3N>jvN+nL;hWP09}h*B86!YVU7?=O!tJi-K7o z@97mh5G&`35^8;z$iVzG4k28iMrF}NmP9xLc7@%B<^XxM z0BBSUb-x8XKb!2AV@9Gbuc2`8^coI3K`%|hKF&GWV)wUUa{F2#AnB>~Z>IA`AU;zi zAZq8n^ngJ0D%<-klA?v^fl{2&e0VG1FZD1bF-x3pUhstDYX{S1F5atwes!Juc74fD z3WxoWTUNh42ra|p==gV|N!9Fvc@Vw1fnIRZkD7}5X{Ef8YJ2Y_Mhd2Q%^glz|FRh7 zJPb5C6OK&`pqro{pZkobJgdMzL-~=?{;0pYoU~3^DnZBAL<<&(>M2sOI1qkZj5_*L zTir64@X|mH0~yNw=7YP{7k%2K4)~pZ&uKe#$uF~yO7u?FS3mca+qp%eS*!3xKB7xzp!R$G@0s!-^5RN*{9R7*i7VEvz zTc;EgZe-4K5)YyL*0905r$FK*b|GhHFR9ItsA3f5hFLUkAM%kfFuB|e38yHr3NsES z?N;^5gTQtbgJu?;B60nn^x^{7!lL(jSDKm!_VH)`)f8{>Zm3{Vy@+mRIg?=ohP)H3 zSP_w@7vV0qF*%2#*uKPP52(TV71)ei?#9K^YyKFp=BZfbC8DrQHaYqV0RuVOxS$bT zpMq5f9aXeV3rK*i?+Cp_c5~h`7ljtFKk%!!bO%2b`4F(nE_WL`7qGRk-h71if&c-O z*Edu0OkCnf_exj=8mt8Gz!bZb`vGQc8v+GiwfFSbLsRm=?_UqMt>FN2KM84@ldFe8 zY%E%EE1E_TpafX1amk>TuU8NroZ7u9o zd$!&ip5yCD5DlbOS^i?Uxy5TQxwcZ)T)ivMYt9$84?P}|DE^JlPtSgx^Mvt*%({RV zps7&xRX6e(L;TQ)a&+i;&r7Jz4f~Tmr(|Uk?=o+=%DZlV)Uk=5kK4*!t1ruo2x+Ot zXy1oRVd_F}S_S&m*&vhqDLuT{pYlO+5L80YZj`dL#2;0!fbhs&6tTzIT{jq-V_``~ ze;DuICWBXdW{ztEIz45be_k%>da;Y`PM4*ssNK}t)J-oLM5z4K*3Fe&COw8b?lV>j zb;eRMe8!OvhhLjjj~>CXjHqniq@H@qD`A$KY|w6OFo+8XGf*QCi`m|W!GTpYq8`Mc z7wSO?s?YkD*M)R{q0yXh(3CH$yYHLyLp%7H8YG_X7t9%cg1a98XTN8%sD@xS7p(t1 z3PW(D7Q}y#?bjepdr`p&?B*t0n9ttsDQIdZ`RWH-5TV$IR!>?FQ2~@e+TZyx0t{td z+%11~xH=Irq*w3FhI+T9J~fmZ5SGyKsb2Otbh!v(gt_nc8%04W`=Z)xhvlO8S>;5i z^#gZ~56%ql3N5L4lvzYq{Z03}b4P$09*DXLiJgDc1C|>gwErl$XRZ$=7Nr*-Q!m%O zB)MhF^*QM3WV$QRuj3-eg`sCvEsD=4N19AP7~`?rWY%P@!Ip|ESQ#yqYQ!;0x*t3_ zqUSalO^;h39^}D=8y~ONdL@1=q|N2GwqFs+CR!uPoZwMeP27hO5X$ zspEDOrieX1O)7+8G5i0sO)(ClpVYgYbM^3XWh#H|SKtsy2smj)_cWdO^T{^k0mZRh zUYRI>87hQbokA(dVZg|1EjV~~Z3G7~^+!;)a0wkbaWOstUOnMv&*%V6qNOClD$4|z zscCNg`FbTzk38);pIC5z_%yxFrj4D}^ez3g!YG&ps{Zt;fsJ@}mA9GV+A7k6F{3C_ zy|XG!SfYS}Hcd9v@bl;VPku@CAj#`^&A}r3TuG19DmYGp%D$xU_9&gy*O7W4M{ZjxP?JWpMpKM$~j z+wOnxIG&P%77kF8l;d_D=LX;y^h`YziRiJROzgr`ObQaP80N8zMN^V&6w` z%Ouw3-tJ{#DMJr2sBof!L1EjotwHDke64bnfl^U?l1sKmwto8NG)WLE|*TdAHGQT6P%wlW&e7pUm%e2-TWC6Xf#L)iNQ%CU_OPWDgS8pdz0-J(He zcLLlweMlr_EPxMm`s|9hEw-2h6}VssqT33wq>FGI>4$cqG}{A)0I{gcGqG375`+`~ za?h(PL_G|Ho~(ySkYUh>BiQc;EBLEo2QUmkw@&Jie!cs;3tSgT+|t4Th(Z|Z6e9Ik z5qo?#G-Lf=K>FuE$;q##uAEB5Y6eOdal_{bUh`y~NzqcHHe%imXy67~g_2JX zRY6JOt(VvKLe%~m_xhl94N5_4Ve*f@uBxzu445QV8IwJ@sd*v?j-t6yaoDe@mRPv; z62*sV-y~gx?CL)bk~|$YI-wsavj#&?T<2aza{dw@&|djpZU7znqcaH6q|(ghls*17 zW34uc!L0v#bGdBsuw0?<)^A7%Iw<=@Abp=i%rTmVxBZQ6;RzKXol&P`%&8Oo8qt{h zF&)P7D>D8u>I?0q7?z#B-}O4ISVxSdcIiEXvamZmVylGyM)%vbpS;Q2srU2+|Dv@_ zJ5=pVj`3YZNT%2zMNeQC_zrN%z+*<=l|ZV2vhB?USwdBudff zCpHif)U0E5^!uTJVIIlvLN!e4qcG7&9cz!KF>~gdU-s)y59{qm&fPwOmTQFpE~n_@ z?f;>clc_N80CZ?1Xx%-+9uYN>73`MQ* zFu+9S81T0Rwzy8Yo+D`qzjdn*A(6|ojD!E)z#E6h(^y7v0oxBUfBv#C9Jvzn#@c+r zqp3Q4&t&y|Awi?mPh9>0#o?^G0*T@t`Tc^AzH&?y9f;bd$L&_9PPtLG2(I5G`IV|MSAR9^HkJ}hc*W>j6n4A`P_OJs$v$e3j zf5cPt|GcEsEunAtB=7m7sTX&)p-@okJDiy-1WR<^mNa55tdu~^G7x=U(W~)3UPR}c z(~2q|V+hWIE+HY{=4D|bbQnblzo5(yw}cbKYvH>g|6GpM~hD3Dv^dJlQ=IZL!?gvUv1aE3@@ z+hLYDD^D>f3Fo&ksMG%+uQO)L^>%LOKP>ToYYqVqCn2g7)PBQZ&NGGZmDzYy!*&{# z%#YioFhE@<2f!F?qOeyLQ1E;k0bmfP&~7IyVWowA~8;Upb^JSeKJ2 zP$SkXvwQ!I1+e{$I(BmA5svv^(nw7AsSwuaiNpR}DwOYEGyI31MpSxbWMRJqf9o#c z1RMZ@;NBy+xGV;kc}Qdu-EjI3Y{||p^~SRX}T$4k+8go;1X4y%e#`I41M z_^^+YQ&~_?>EpUP?`Xu&u?J7{=UU&L`tN$}Z5ThFz#H@b@CpltYSz3D9 zNa>WCb@odax7U;T6b>p*&ma|b>i>#Yt~lWosUh`J%J~5F`mR_x=>$Elcy(HD z4MUwX@jJ3>AonM2Bx>b<^h-|b!GW#!w(Lk84ySo)4+mA-k=I6Cp)LJ5uzE#+4#g!r z*mfukPh&Qww}uUM_HV_Z`_6|nIHSOMTo7x1fiU2b7{Lzp1cgf!%U9j$np>D^Pi%jm zbjyBPol21OPdHPi%CMUCce%nhG%1jn?LWWkV^~6Rl0E&AoL@hqxT%9>5JA%csGHOW zw|tbVDT4;|+Mit$(v@+wo6 z`aK1Uo}}R|CNY7kV^Z3mBl!YN2Ygvx?%=&6BhffC*s+D+PFA9T#zT@gsQ)E0 zlq8C7jm`R>&0pbShS3A5bc>v*3OHACY32*YA999*gL>F-VmW4A6SwzxewfJ9Q)=Ya zHf$!q64IOb&xytTU0Hzh6~vOLiKV|dS-SO1!A|wPJgouZ|KN0ESu^jqdgz@)F69** zLO~(-#U@&M`TBOh{lj{3*mu~zdgwUPIBdjLbpU5>KYE|4OVE6slH2$X3F@d?8j%iC zDNy8D!));K;pbWoaJuN}o^<8XOhzkB>)>ce9H~bF3 zYrAFedZ5~cv&+F=fr*KZkC3L)e`dy(srW&Q*sHf}68PbqIK@UNI`jy3-Gp@u0zp8?u_{x@%!o#dz;aNqvGQ(}o z|3k<)L%eHdT1<+r1`9+j^x}l=WYt50}65n^p3OF)n0U<9tOF0(P(OflplssN+h=H+=*z@lx5q zA^QLP$p8D{cZS)SgkY}5p31|DMa+H6LIWu5r{73k59FLVVBjwYO9jLg`M&c+#<8T| zgMQv=t+qL3Jt2vzmV@Z`CGc4#iwd;8MIw7&#CQyFY26WK8twTSshBkwye9@ZLwoer zcYetzZXBu&rkn`1SspH?p!}gGC+8Jjx&AaXtXt~<9LOU1A z^B+n#E;$=08mx!P`@h3g=g?P~>i-pz!m$qoZ{ft;cn=)^VA13<%F4@SW;zyFB0u1=K zxAG|cwny06*H+j&;x&Wq0G@&Sa+Tlfy8;gIk6q?(x_@TFIEzc_-Yluox9w)hCgfh* zM*a<9!PQ^o&GKj*2Xv78#~$VMQ5^pe6z_NBS(Itt4>Asp|Eo&>%T26bG=7W|f&*v! zIPl;Z*fiYTcOWHii~+`u;GZizCu)Vi*CSwPtYDHf@5B)l3(Wirx;R}!Ts6_X_G2^d z)5*3`OdeFm%G!L=VK__w4H!DnACmUDpE)iGO4ZGfVJn+4#+TP~)nOgk;djZF2*GkD z(uDXmdX|j$L!JJsJaFC5K6^LdAXy{IuzPEKsC0R+>Vpypr@FG+NO~$_qCzf-6QcI_(H-mC0!@z8A>FYaqsTl2hZef(3Djp~&)p+Nt zyLS(`=xBW z^4*nax;jfCPh;c#&Z*a{;!7cM+q4H0;dLigpcRs`i7 zOk--6U5(gntUEhtH3q)`KEab1`FgSBRN@O>8^6)Y_1<%*kc}ju5wN>s=b3WKEvxaR zu`9Z^ssW1ASEL4*Ft$h{03Q|xUxf^6BE?SiP=c6T6v1(3gT|WC0OEgVtB=XYMu86C z=E)KHFNm9d$R*$zLM;CN7ex29{fW)>%jK*4l6@-I`LgzF$G_6o-vo(TBW)D7j)bs@ zHrpeiUtc6hsr6h6ZbPif1=vzWx|8}ZrZ{a|*v=l0N}g$GVok@PemZ*=pMTbBc_Y3o zIktb@Q)8I33rL8)odvlnfl2(VfgVs_Fo9C+bw0gIi7137^9nVR2}OrFT-HJnW2$%T z6^I|==r&Mrt)YeyY=7 z{WCi4m)-0=RP_cPkf!<$cSJ}r+zOu!HrP}8rb*G@7V!WRiVCL)^OQa&UZZY{G90jO z;`8PL27sYQ7cf!qD+&1JuX)FWOgh8ks5FjGmEK2~?=6T|O4g^#tJ*)F%$t>!__eU> zJWZ`Q_&(94sMPoVQN`+Ap6wUbS2T))@RZ5 zX}^N>1G4T9fj|A+n;O^Bpb0arP6i@Je>*ph;r|`zqsRCdO7I=57n*Qu1%dQH$>COp zJF#eZmZtej++uq}fqPXeLioq9HKX~6=_m`nT=&nvok|v;R>%Nn=nJ!!ovn_7tT+B1 z2J_8E2jHez-TtE(nOv>iA1A?kdYfaeGT5%Fx~=D@W-0df_dL(WUTOyaI5rhH)q0Yf z#yicf%hHxJnSwi9kt?g+n2}-Y%(?4`l+T(bnvm1F=(MZHF4i*U+HHTI=emIp#Ep&4iC|H05B+a*V%-kmEQ+LS1|qg0;um!d%50;gnHi znosynI8YFb7)p&?#s3C|2x#Ga2hVli=qFUek-@mBNW_=+{o3Yai-03|4!c!$|>M;&JaTpli7A zvGnZt3nDZ!;8TXHzY&b){8UaM#dq;bh~X&a5q-v(*2Z=fwuN_QlMm;nu-aQo;+Dh%n61iJ`M z#s8Ufllv3cPY7}N;!Z~I0I&u_!d1vul5SMk(XUDMSg#l%TlZc2t8F6xE;0QNbWS#$uCD`YwaY;MZBY z)b66)uEMO}dIYd9)+T9H(k>20=X9x|$0gq*9iBH)Jj9wv5+5*j<$qrCVH4lH{1V+7 zAv1U`)VlN~v0Z`WG?SUXNN^OvmTCos24CYQj`@_CtWyz0tzxMNda_)Ok`N$5$E=F_1m=se_t_&N^!KH&zITnu!;h0> zTBv{xUOP)42g79ruwm7OShiH4`TTOSg5xfM*x)95NV&6Ul)@f+ZSB6s#PZ~Ya{)Tf zeZ$C+(-|5N`?mM-+<)iRBJ%atzmq$tXIG1Q+{Jfbzb1lF0HIn}j2~X1;03KHtb?=A*3CQh;=;;aRZS8DP#N{)<2lHBj|1K|$Q z)S0;GrU(J;+h=cKc|*+DUnnV{ztI-6um$ndNmfw>OzK$TPHj(2{M`T|o@jDR3C8{; zYQ$Oq%)vlV#ZFWLFz-69Lg}j|-oDAg`+iv%w-dRc$Pt79EL<-K&u@bHU&~&uGd?9lB5mdKGA{+7=BXBjkIYFm(CrsdKX%5tc=~38APmZ|~ z8p~F$J1?z*y#_3=X}{x6gEKKd&k|)LpQ9`GEM5ceujj$$z@oD`=Md<(Q0uB*s{E72 zhy7=4QU=(E<l}7kcu(yiol~Q2(RD>x-kbV3J*%Ug=hE4-kJkS$QA+7Rw zymd0nf)O$Ri32zWB7cG<61gV6Ux{`%fjo=)2krPXxD3kX>bMxr~cy}C>j@5{h_`n zn}w2gkMBpI^X*q$Mr(s-{3Q@U^yr6ke)@cGy@wLz(qvGgK^pp~B$E=QuL_ahg9l$% zkB@BD4GOUZcvkexKl;K(Yx%5Vpjv3VuvX0?x>bw*z+?#``&L~n?C3aEhA0r{>3U{L z8PXNp%!|n3W!+bI^B}BE2%dC(S$EIP;0*(v^9PY;lEOy=wVug;HNroyi_4{DtLj+!E6s_}&_soE^0t zBEYvws}q7VbLYTH^?K(lnlKILqR2b5Y$cEoORVuv!!j`aXPTB#CBO@G^B(XM zQ=5GNUfK51L-%f8>vr2dc9l6SC{fLArf|p*GFo`N0loS~4b6J^q)=v)h}%zEUSsIx zdndw)GMSW0e_e&Pup&}N)w~UCZizL-n zsN>`oCsxSY9BU1i8^0qenVHJT;kWX)%6ov@N`T)?%YF(pGCgsJ(>tA-gYsNOdai*|A?k0~~BFF$mNQ^dRJR>8-R%irFCE15e zJhsS&IALB~^Dn%1(2TQl+lU%?h7(`Zc0=W!Z0b&4Y8~zPe6MH=G64obAahkWpgL%Jb-$ z8ZjYlX}bBV#(yNW)sNjawu>jbbApT+Cx_?UM@F8i+2K#0ZSHKS+b9;kq5`i!CkB7pw1m#>c1f1GQs z_@+L^9aHhZwZOrtj-v$dgJ>`=7khX|0_qX6UDsEci|u7wLIDENocNj zK2Y2E*<3ww-JA?O{@iU~@@8heZ86#ky86L$@sB7;ycmxf<&D!m}l4(Lqhv1hwe zPzT?dX%3$?SBo&gNY_&%w?CCNw(@7@PAKRk_ zk^DO(VhJSUw1{0)4_7vXNhE~vf5-gzd;i<(Iq*y%?8DO!SoNa6pO^NA-A4R7B3~Uz zrvWaXJ3>!r=NK5eeN7+>*P~S}%EY(M(Od-+4+s&>!gQZeEKP%L$4)sp&KaS>Lh)}G zJ0xQQxg%9L=b=Q?>~*bfCpBs-!FdWXGs_I)#cq;3hnh1F0{Oge{Nz@Dr75_avwzec z@YP!e*+omEDafUjly29J9~qjRHD|>EE9#F+Yo{ zD4z8{cxz@*J6Br&UCmRS@JTj@SV>=w9<8p&Moo285PeCpM9-L<#w|AK^lt*JC>Jo<&Zd0Q3`RV1;VyU6Aw3fPfubaf2YK!Z7 ztTO$q_w{(8@qv2I^6K+Ia>Q4BSCK!aX2(=#QH3ee<{*|tEDD(Z_5RF$V*;s}kr;>d z19_aeZ3`&)V-c>kmj5jXz`B7m8vA%|ib6^z5Ujm>Sz=>7HulA|rL^>wkx~UF-|HLP z@4YF93#0RI%^zR3=S;H%;*kcLh8>urO$JaJ5`g-*!3xt2KEuVH>0_8{Rv5-dkDYmuYU~eXz~SeYvGfAiMuSL`Vi|i!+(dQ{ZZP z)71oGX^J)R+vtz=f8v>p$KK!0sEhMOa`O@#koD(L)w~bP5R|&^JAylfPT2Q^g~JQi zy~0y;p8o-j7>9j|A0ssK-MJZOb+|pSc?@lChS#)9#z%VMx7FeN(pVXV2=b3Z`6SD~ zykmHW8v7THz~g0n5FKIkYgu*4HYrl*vZExvLF-8-c_>tBG*m7!)9qMCeSOfSwGcOb zt)DC*@2JjH?t^y{3wJArPyCmP;s2z1Cloy(h;jU%N1_q<{Om;*%HgE>7RlP}Ynni^ zF)VKpgsKm350mH#w4ON|6F>4x;CV8;$~5>yn4<(T8-P*=oUb8U4@w|6Hs`J@HU_Qu z!MFeD_lq6jO%M0?8KiH<*r+X^S#iBnTsAk_!0g!a^(Q z_;N)r7}aigQO!5stKLX|yJ&g$N8^B)xrjJ_j?4stS1m(M>2n*dO0%Wa0UT8UGzDEm z6^!~x>VH8k7RShZ7D1EXDow-g5W&7ehLM#&Vi%x)HO?JRTKaPTP5i{rz7)*N5w9Jh zIG+3lqFfE9PbB^=$Iz?ozj^Fl>!!!o4`o%8>{-(_{Dspq`zHBQW_oDd2hSRU@p?PG z+!z;y^6uU(6`n|cLr|@{>Rqc{mErpvFUM)!w^Bt~X*k6mB;+@;b+g<}cx20VCyb&S zOFhi66_FjqVVqE*QlE9{{^wvNxhcgy4udx1c+|Ny^o$gO9ZqNw>&ytN6Z=1v<-_nV zBqoqMM4QaF#>=_eHHS^UBItW!)}MUzN>AT&d9xiE4~} zRq$=gIfg4Re+^6#JqbtE5}vmSK=$PP!t!|!wj1!I>+EcEiIUc~5MdUneUHPrrq>lx z$gMpFF@C?2km?2$fu!=Pi>^Yl|!PT<5HkgJTK0-Uf!M%!mAkp`n%eDT5>dU3GrK*oFgQOqkpXn?n;M6$`?U6z*MfF*%(snTnoA3a8BEk z39jCY9u_F}3u_rU<{^Y=6r3EMQN-vygxi2^&4*Kw5%4EJ6M^RL(1aL=fM3o48h73l zYwOY8Bz#APz}N0k;&Aqf%0)s7!M6p5r~(NJD&`E8NN>PIfCbbi z_FND*2gG+(Hn1xbg@>(j`ZGnc6vTeqmOHe2AzoG|QH+m91jN;&>7cM^^6@9JF+;rz z8N%4Os9E<}hI?{>r5pQ~p*Uwd!yx)o39h3oJ=eNH@S|nIBq8AG(q-+}eM1>C=viOp zmq@gxs{V1&5a^_DK`G^-WEN-Aq|)G%CvwE>%fg$46Pq)elK5cGv>AaIoPd3n4c4~D zVfGrAB*ULs5&?y*Q-^p&wr(;6F#_p#XKf!b&IKmeMK0qqkVT24gk{S^@8WIg&1aRO8{DBVDKVUj4zoTpkcn z6mBKdSF}0*4*a#RUJNoGo+Bu(b{t=SoaREG5zwadPI;t$Du+niMvlF92TU03WnDM! ztI6DTk96mE%VRNSTHKdYN~UbQNbN5t9}{sqQU0T|U(0M6c%BGvx!{V1-`_~9(th}X z(Co>l|EI{mt#w3gPFXosi5;+UlEVDm?X&Zfe|jvhc(?ff6gi=S2nhod2o)pcr-pMM zA--rP6KwUfbQ?LEZULS%I#&{Vy`=k$zg-sSt1wkjh5nW3En*b{NHWiw|3QW$Re0B& zl#O4-(g>G(;B%e%b1(SWP#$~5Z&Q47Vd(Doh>&$-v#_W2bjS2eS7G|lFzdn~u@{CD`|=0=03R^*SKLv4yt zp0_mZM0ixEEQO;x#CD8diH%j{x8T} zTbwvAI3R9_JB%V&+^AN94HsBa4cp{Td8r6K=Gqax+OJ8*(*JD!9Hv_g-a5&QQzb=Dtn1sg7sNln?_*Ev_Sp^J8lC)lS!r ze)TeuP}_JoAw^&I_^Cp)kCj$?Dcp~V>e#CeJ1U19k`)&_h0xYdbF+HOA+Owr*Ac~e zN*@@d+fT2pn4_TC3(MBI`{6cwCfU;gEa`+Da!OJEPjSontn9wstvxhh2p#MC+Ip(6 zE%#fH#9z(*kwmCC;6Djc&w(SS3haEFPi|QdtP9A#`SrW|7V3_?0%p+l7vG>?t%T;! z38JICQrdgzM#slLt`J*iOwjPe&=R_@4>zn)O!#7KA(BeVAP+00%x(a?n%8CATgEFZ zt|tb2&pJKUvF|%)j;H(HE@VI(xkhO&p+u$U-c@5KZ`uJ!9Uz5_e*t3a(oXI~-U4%L z03}jWQ!^cPHN^(XBT`Z}v~~KK!mAdX9G{PQoQ>5THU%}y2|>&&q>{}2`MFyl^xVFP zPMzIfJUYIxAhs%*&N5d zoB81lmM5Hk1;cO`^@Zzur`Vd%i^ytUv!MI7<{@l@eoq#{`U!}MHv@xrTD+Ivb)Sl= zaSzUfuMWJs#=uJo4Q|A=2oj8<+GCjTxewBr}!|)>E`oN*y14 zJ1V$X9`)m(q#=63l7@)^8wQo;KkskJeY{lCPTIM|Q6HvBjoit z#xBr?W4y-yX3QWph5z>6q#EXc6y0AKpFI2V?TwjWKVLojMN*5_vut7ewD{BD&$B*1o@B3LsL*TvSC?xrw=iR!Z?CDj{omQmgj40Q z&D;ah)ET&k&jK(1DwAg-l{8LLHmAge(Ro)B10ck!6na2gP6*?8OV0~ok&H9O+cWGZEC&>p(Oqk`4-bi_ zvkpw@O~mr^kSx@}9G_myAvrpq(dd?}yk#sj*X|`*o}`b#{8OKr+v%7E+6ZmmjnL6s z%tQ0O&4^2l6#e#Cp;S-dtKNCI_Nrmb+AL>kEzd|)RsmIQG}EfqtCWyvUwKl@Q7@#u zb)Fk@6+b-GEz?i&g?dosUM5TNKJt}TR*VSma8Hv9Z-1wZA9nE#R`-{a&a5yP+lnVi z^5{im@foZe?u_I&ymLc^B_@Alb^+HE4@X8Y6e@FB-r|Gs3q%cb#x8KC(_f^1^r;w` zM4&3drb;23+9T%3YtrO0eO(3IzCO-@{A2J2AOieGxt?h9od6e`6m?odNimp0Lz|()93qa zK2progLr5lAFQ#eeKdiWViY5Xnf`{gZ1Fcf=n+5R+#(9;o#u?R^c^r4Pzu5a1?_My zvMsmCkO#7F5fP53VfmN$u?!rPxmEb`Z+#2B76&8~MW6m-4|^qi(-jJPjS;#1 zrS#Vf9P;bB@Wwt@kmT)fk-siAlob!}$rDNHM@TVy3Da5`2CyJKi7;Opb1+B1JtMvR zZB@*0QW{kf)l3gVx|*Ei$l4JD=A6gC3sKcLSnMTyxmw_o=+sP`8EOcGX~=-yhiqCY zuM~g7-8_Al^H4i$@+kuyThJu(BjI8?{W@VHW{nh6y#~9}i9-*hu}yXyuefdwG9Ka_ zhN4W5k6vOoGVr4Y4RLQoSApGM9=+LWF*c5o8Z-7Kq8i3PySvpHMcq&;=8r9u7VWp% zQ(1mCXm&CHN$K!nqXv&yFpoQ-ej{}~?wBngg|Q%~rVE5NrOprb;t<60HxN$yh3mn1 zO7f(4sg@@OC)b1jLvbW_u{!^^GS;=#%EXdbw?xi8{IER>wvB1*_}9$c8jtw72P3@8 zi33~7YkwTBKAFC+_7yw4#Pf(D9%V=mw=iy4&~{h)+?t24UM27-;U>tgRRNobQV>gr zy-li*n@@NQVBe?sc7|1Kt>;@@OCNtJ>SFgdENU}0<=C-jAQ378*LHeuStk9Owtimr z#NeT|!NDHDN7x{o){@`(jYj8(nq1Rk*51E?tCe-nCNBa3jZ$W+7Jg1CW7S*bDhz_mfrk(4 zF{0N5=mn(aBM1}y01yW(p_->pK_nh;r?7?8j|`?_xNr;))}v(;8QTKo60a_7LM8enBj~BMH-qMNN_Kv)Sz`hlU>4@;?vC{ z5ZgsmxL-c+r!D>|p1Wrp@?OyO!Rg*%sKsV? zVmBxH6yW;?E7(%RhgNsGUD{MAu||KUVq! z_r9KY2)7~h{x9o7fqxf2?G{X7UGcWVYQ8M=UpncQOv-Phw@CUM5*5p zgasS-=lYLK!<1TKpsyd_Es-JTD@?T~%H-Xwr`kgxK4*_&9xTryl~FHkitbW`hdgsI zU;26!<*!Fy$&^Q|y~?!nz8GAm9#M~uBbBi+$2MGA?jbeRG}GlIdG}VXeI5?Q#edIfrRt`PIVl%J?**=+RARistEW+S@(;q_93EH*0BQUC@448 zpYSKI{P1K$D>XvXMWYhDF z$3Q#NY7Wg^h6#zVpjiF4p~_^K>J6AoH!9hp`Cqhc_3y{BCg9y|k{?ezqU)WDlB)G!!BjVWu3T0a9#6qro1w_m3c^ z1ln82*+nw+3{uluCpwWQp@5ScDjtHjS zE&gbxd|Fns=v5NQIM{$k?^~JJZ{0y)`)9V@HM37)qYEl`uA!XC&C*-M!tZb103cK^ z&{Qk<5k+8Ilc2c)kdwG$ZZsJI7Y{9Ktu`dY6^JBb+{&(@9hxVBL9v^)hS#H*c^WAO zII=TP_0CHoM&I37FrCoZih1Q?RhlC0LTb$9$wWHI!;wF);3}aX@rovi2K|zO(>=&` zMLj!ponqsA{o(-}eOb>hVgLi-za*4FHeEXGm&OL{-#&98%zJ-sczOaB?f1-VTnzo3 z+q{E|hXws5ZvpAEa6a=u=ARX^w8Q){*hTC2@WtuPNgu@D|D=>1!-atbZzvkw=|ul5 zaJ8E*dF^4CD}mizC#tlFr0DI^t*nBj60UqEzm9Uw%uG%oYj>c#DFFn~iF+`0KuM&w z{xlz3O8SMlZI)PNL-ZJGuSCq~34xut$*#a9i{wx<2801d56o*1HJ-gdCdWn1)EdJ}NqgW!I{(nEkuppLt1ymZZ-UZtv|5Uv3PRExT z5o7vrmaZLLm>-klhD57W&-*1kJ)H*SJe&tv=qpD{W&WGF;G4Ql=Opqhbj!&Pp|<-U zK@GhPj0>WO&6>^{JqEbkN&3egym6(`nj_xN-V=dr+1q@{|MV+f@T#r^@N)~^XKL|wD1maWMAVQ~)FY}!@BqUT zCRFZ{>nS`>9H>D$q>|D}n2NY8<4SnfZ`2Mlu^WiOB$FXce@sSMmEasCoo1w2QGb}l zF9+V{C&?3VOnzv%>Y9JR?mv;I2>7g?g8VkBd@Q4t)AyhQ91r0Tj~f#UxQgI1vTtK+ zWR!2pZvd&ubKbhQuz95XZ1Q|U3J5kd57;1kHCO&Rd#L~CxTonI+%q2&z-h7V)!~mL zS-L&!(h8mwe$JizfHw++i=ArsL-AvQd?4VvPZ41Z+JNPcN!^4_GpUU4-T zI)UAD7a7_}IzfHxHjf+4U<9 zOOZ}mFuqOHi8ulzqu5K4yxAg7 zcXNwWibrUQ@I@%|Jm(yAzHOSTzWhdo)5FBfzMFpl*;%2mWm$yX&-PdduD<+cP!oGR zCFAfk6%$;69*J)F1&3l5q=SOsZjO*I*}qN?RXP?>*^)pOhEnhA7=GVZ;uo%qH;g~Q zMJ~*z!D}}Jg;1uAc&s8qg}@7DfLXZv!UMSJG+11}AF~9Sa(`U%&-|)bXAh-Q^VYpw zyBtA23IZrLTo=OBwrWM+P(Jn88n!DsZT(yr!ljB!_A*e>YdC`QAFHUjsbD!eD+I*s zdA)&VVY#q9u@J-vSz_miSxu>kF{rC22MT)g=|gPjtupk+cZn4;<-ck@WS1N$iynSz zqFPR8=>K(7Zdh`3z??@(D)3cUO;AH2()Hzz#F`{CJfq`ve4H(5c|uTup3H%XYMa_9 z8IVO)L2NbX%@m6CN4jCRa!LP+!UsnR9j#EJX!%TK2C)tDi2dAZm|^I^TeVK~Q`J<5 zd1e1vIpdedu=Nf0lWAVfk*5C9ChWz|;=QuMfARJzwQc&1 zJBepHAH=G?q}Ct*tcX;xpkTegE|S(FgU+|keGS!=J}%R%GN$2@?Rp``pcG(zle#SV zqc(9bFYqt@-ck+h8ao8Ep+n-7{TxU=uxN71ltldM_0<&+$=U4Ktou}~gO~G1-Ceah zaFTfw@;<{UFqV-xM95=Pz`gLjh#w~8}sTE50z)x4=<@Q`VIKlKk)|ANoqv*T+z3Wyv4K3kxEA6{lu82P-+ zr$z1q7($MEd32N5Q64!QDC9hq0cH;V48cYK`tSAZbR0e@7;FB+8#=+4LjbGw8`!Z3?1m>GuSd>JZz926N;e03x>71v~ z(iNH?FOR|Q|CG7)a2?-tx{bAV%Gy3Uv42vr59lE!jexGO#DY}?fODkf3g#HKZ>3k0 z`Hm)0T5-*l?Kc(s2iVDjM-=@XBn9j0s#1LA|6 z=tV+gMWnxEJTEuUfjjt&L3%-5KLcE==dQDQ1%4l+P9_5gdN*GUrbk-U-R%mEoXl9> z7qs)=00XAls;PB*NI-K&xg3qA!J#QI-;V%ZWMrjfMa7^NpKaV)@(;eL{ z7!noa<#B-L$aXy7VB5Tga4Tl`m%Zehis(edgofN53XVx}yXQgG_kzgDJRzFjdpQ4O z&cC#X(ugkmMah<5EfPkd{uzPpB_bz${I?llQKW_QA>mmMP4(}VkrxR-gY_#riyZT3 zTGh6@L`Hst6?KP)MrE`g-Mb(NXA0?)hpv9fDeWaHdh<;$_U(pgs1@caU;hOfs9qKQ z^%S+SO5fH{r@FtKw+MiOkzX5nVpRU*9S#pVWGut+knK^;;O1kn+pqh6XOn}ekrQGE z&*??H5EiQdNLGBdK>w*1AvdLJNBdDqTNEhr>kbhd{J|C#(KZCkb@|Sh5s^Yy<4HL= zfNeDFWJiC>r%OwUrJOnTV$xU74#rbw)r)WR-7-l_mVOrkA4%PebYU*`6$F@Y4PJ(z=1{aCNMLDS0L%U554-$~ zrOb;Bh1ySFU-auw2L)Vy4(y2h%ugK#w%jGWayh2@DH?p{#*NQ@ap(k$j{}n2Ln5N@ zWI7s`*Ds!Seh39tylaHjPgL#mGM2SbM?8TasQmn%CPs0H3Hsq_sB^R)oB@X)VHIbv zdNgR!72(}-_yApn)%H)U^UMccZH@_R^e=38q)K7{a#X$|lv037k^D860eR$br~r1R zglSmq_&KdBm`>O#b}S#j>-qhDp#rGs-RfeP-7?7`2HXyn>L?8gsmP+%RQKaF4wydD z^p4-)t*uCWxchfR=L!PJsVicE8gvJr4NG2aXGid{{@wLCjjw~G@(>>7o#$^XnKhW{ zKXyHnMPGG=>0BkcNL+P0ORTH>!apa6j-);vO$1}gj)oiquN2wUQc|>g2~>=w=hhv& zHxlz89|*8>iz;gD#IAF_@k;UcWUJx7qr8qWoD;TZ30;8gs=dcj-%^m-6N0q{Zq5FO zT`gDd*Pj%#zwXl{2^{|uzLD^u`=AT(M*Hst>w?zfaRXUO0Trl9bNpwy2$F+0DA*;& zG}C{_%$HNt{&`=HQN;jGo%<-q)o$TLiupa1RhL<-hh6d-1hZxE65_zIK(!g{b=DbDj^zP&3g7GqtM&yPh*gTOoUZp2ZiCs0QR&?4yM|ITKe$>@wtSG-@eeYoUu`_FlQ_}~WlAj* z=y?$EEXwmr?l)H8$NsACuAu8sHq(|oi;$%J_U;~)oQH1-+l}S37u#fh(8542g-H7$ zCMh#HW%7v>TToo_KnIJfM+!y65P~$buvBf$HL9q>REm-1#qOcj=9;o)#yE{qx zlEXX4-Ap_vDe9;hlD@~E{zIc@{53pjbU>0n`;nhbI>G{pvMIJ6pzrjo-f_|T_G+Tz*zl6>Jbc( z-7C8MDYaQZ!hbzF2rMUp)86o!D4jtjR(ytGpvi`ONbqG@V;k``#lTHXQ;EpK0qg@Wp5giLCo>f?R+J(bR*o8?_}bf zxqYkKsSBtuYO;egQ&SP+vnrR-{CW>`6hMhbFj>%x?e{@cDy$~9$yF5 zD8Qy$6r0x z&!UONF|W9py-Rl)0;n`&g&2#4U8cP4;XswxpI=vz=5_S*jkBwi|6bXh%rL+zRf)ON9=b_T^eS66JYj6C5zdBCknOSrCXucSI3BbE+VvZ%qpt+MQt3u|G{e z0q`zPP($H9ZAwjn$l_keLqy^wzZ)`C$4;k$aI4}`fJHDpI$YkH6kbhXjZ^|0EChL0 z0mAm_4Q%~3{afccnbk}i#W##>b_ybxm#qb)P#+sBhC7>^wR?*$_FVn}`ERO!9fegI zUP1rq7lhB08R=eB#hI}!3!)j%nvw1O3zV2DRJg8X|JR~2txm}lN`m$Z&dIccNw_9< zp0NewhfMI*7Hzp!?cERQAi`n|Z9kAfYmlqAZo^M}n##jW#;F$%B#rhJ*)8oT_ zw}MLRD`K7Y@oq7<9$g6-4k)&SABIs3?y1LJG^0MKCdCW+lN;2Qd))kfcH(5#z+#v< zP>BniGPE_4nl4W+PI%OH1h8FW2bINZz2o)K-UVW}c$C+l)H-&ErDlNAT zeG`*PPsp9_12jWs+1`E$K!gHA-30S})w@-zhI?!J)p{z#51(=E&sEK?j6A1G;WNW; z|4#9FDj;2>1l;Ae>xCG$d9%UXiN1Jwb=ZC+L(ty{zNl*8-whEjyxU{;`zgejJ72ck z!}F+9*$ojz%0RdbDp*H<`$xbq6!l;((d(`3jRBtpw?$AG#u&%=sfuw0o-ylq(qV1| z*-`ylvIiTly-|V=o63PUR0gds-7X0JPLwH3MRz0Q-g%p56=Zfg!>F;|VH28I2C$-|so((a6bg(gJY;#tIv*_PhD>mAt(# zX>rL;`tE}tr_rMM%}>He>4cb$(m3XuI*B7EBU}FfSxwLHyd4%)0&!QFm|kTc?92{s zCKc8HkX^buxw_z4L_(J__g8W4#CPa}|Nc1XM5W4K%~edt+6FrfS^0sz&a9LUsZaGzj?dI2jPW;ZKbn;xEo$)orR(Ywd@syrL@Oj5!~fov4#z`DhTR+Zs)JNmiE( zQo9~-dLcsJBXiDbOPtqLf!nF%k!-KpS*q&IMPUX@6T(Fi2ivU8t{Q^t`CXR_Gr|7^bVBH(ADAJ_aW$w5a zYDFU$OgyzqC2nvp?4=q&fxgtvJU(%hz$}{kG)e-0NR7gwiIU=?URrP_}m z|IDxb{Z;5Tl_&G(1kjz3OIp;)LAXj7G4E%{O@9#zaAV&*bRw{sp(hfrFDvZWiahPV<#{}6SrF(JqP2M zK0?cwK&^>@CODhlW)K=#S4`PyZ`|Q4=y&~_UbO<+qhIZJ1|Fn?@7Y(%&J}`rp!0v) z)_s9{l^3GxFw|d1fj)4-deKkK#b{iNEB%W{8`~zP7f`;9LGtuK2gCHQd1Qdw4c`-m zWW#7RaJoQ1942#nC}wYLXI~dm0FVJpM2|$twF$SRF)_M38E}NA>Fu4zQ8f&b^EBUk zUwDfxCu0*8@M>dfN}mkseu(qv&lW7thcAiVGk8*=S}mVJPi-XC<63_(iwYH)DdH6A zV$UwJ&}~ucCChE}ATB>t5(!W;Z#$fD`&)ZhoJU=OMKQpBiPM!!KL@wPw~!QWl@)a% zl&@F~RuV0x;qdWJ)_R|zuzG(TcEtvm~{z=o4c#rEqw%%5S`g^KhXa7{U=i`;dLejJ{N7gy@ZE=s^i(- zGMXxN!E&pH3RlkXz(9h8czFz^PWxoAPR>^Z5T zC0VovL*eEaq73wdj(EYRfK~7N`o3LV$80zg z+TwgGv`##BNBN#R@sh=$`ucJO~EC<;03SXsz7L% zo882q3YX)8eb>fmZaQ5@Frt)g%l`=$)M361-n{&szdt8J>hg7?rSFD zbTYQ&VVC1NZ^yg!4MH$Y=gC-RBb0AL4-&cA2NLT zso3WT=Y|R8ueKbpQ6iAsmUA;`LDP847jg#QFz%vDGBuYP-MO3lyh9US{@>(na zoD-gRyr}@q(>Ds$5iD(XqZP?(0BN!Q2FGg}@OSH>A-o*j~@-OH}t#8(K@ssiGA5=2m!O`F=?`f>;>(j9nR#%sdY`JJA0IX+O^0K5)6KMXh-ve6vo*V-M} zgSo88Z;hz&g@2{yw0w{h-Wi0sA@Nn8XZ#wj*g4Ao<9_VOkF9k;8pr04c;*Xp6{mZa zMKDq4`t0NPkU&%|I#CMKFy`;v>2SeU1zhh3zL}n&x&9w6ZKm-&! z8=F&h`)pAi_{_5Y+dFbb@C8@sXdd`u(i29W>Q{0ZSa?VLgF#a7FvQ+uo(yeha)0m$ zgSgk9>Lp1L8ni@SC?Sl918Y7UgcX?#UQh-$0mqzqNttWY#}}T{!^82KDWCYn4C0R_ z)xO|Zq4-UaxsrD_?rL%&rTn5tJVexWY_6o?r{U`KIpXF@}(blYn#ee$gqY#>4LBs5=AoNKvJVkZdvFNN4J- zXn;;B-sY#~TK>jSl@A>6wdBFxpq)(uWIcBDsUU}H>fM9~;CGAz@b(t&^^3hr9uy1O zMbS3_!M0eF;VFMJn##*M-{`4%G-7wwbQ{Vq^^rjytZCOmn`d)d-%07c#H*qZdG;&#RTIlyNI#(FV?i+ii+If zp9nGc^M1;_cNxBsnCgcd__IKy+h}@}-S%k&$5-&=!euQc93t*3MAl9LK4BAasp{I_ zRV&PmD88bh9TxnuA%>9$YWm6UGJ!oG@ANU(C~Y-oETobRz(4LaHh3=9>xrgzEcD@INV#6bFN-R2wi3(oYZ2Fhw# z@s&{7!@be zk6+8q`Hg!v#Tl5t3Tz^!|263RR%Sr&dWrPvaiNlHgasA; zFr_^HP(6Pq+l1t0@*KARMi+c9aGtzNo^){L9)%Rgk(K;AreLC9CrO>)sq%h#a|6ED zHDZ{P3fGjVczQvF_%G^-r{MmlSl>1d+W;5+Ot-+Rk^iHuG&4_D%m)%0T!dFlSQ_DE z#&F)EL56NkXE4=TFc{@~BMg@ECJmr}e334whNH)!KI( z3SufwO)?-p-WZc+9D~+XWW<~wJt=f0OaJWD>jb4HX4Hi~x1k+b#$!wZn=IzWpLdsU zc~w&ggYxr(npIr3SniQTq3pn?Xh3lBb zQ!`HaIYk;DpBlc#Oa8kxP%uLeohnw1W^T-)0@P75Hn{j%W{zF^R<)16v7nSFdvdk) z4XUO#TC(+cqcv;UBpeQQ#b9;leX~W^x_8}zsDc8``dPP(X|3aY_{j6~+#je|41_!L9$(ax#KiJD$-&R=>H6NJ4I zRh=EfzsAmpE-(vYPI9>4;OC+BQgM=gkXx{Zo13jq(#7Fnah=A2Y2SU){R?-a!1wy< z$o;j`qRlZsVHuHXG$(UlvX@l-61)WM0Iy0ZuHv|RNrWmjc*d#^+8c4vX|4op>fE=Z z+G&6YjlO2%NOLNgAs++vGgIMQFZhU(sLNqhhDOw?&e#&pZapZppWu!P&XU)?T6 z%5DsoBLJS6TAh=AcuQ~%IdRX+sQmkmSiD)LcD`EI-j9|zA`-N&6KJ_(@aF?xnWylEm zpQ~won5)*Mx<3Pl^u5Ro8GeVWtVX=qmo`F9i9$}we;cc@uaOPg1kyOPKI~K(J>j@H zgOyv$IIOgPeZ~YUy@5e2mdpaXNYGwvnE}zhod=^kDbrn*eF5nBjold7AHShMkPW?Z z^J<=~F9{>T7;dY*(^-YGYjU!Ts7aY!=P45b$N`tun@vp1XLKA7%in{})r6gP_=5ZY z?LjS0v$sFlPMkYK|1>ZnG}R@ed<1}Pym}`YudjSLDH|P{dGV(-{`v7EMDjWj5w2%? z$sHuxoKG2@!M@*b=ta%;IRsD;h53q^3zZ-Vfr(?jKmzNX#15buZrT!IMRECpIvTj8 zUS9?qd-VG6Dx8G~d+6@BzPux1DCKYVJ8pYho?3ct>@~pCFz6iee;RYFb&JgSFoc?m~U@w>pFGe6QgyC6+-A7g=t+Pi(hyl2 z_EdhebX6U=I$-4S+|eghQGBDZ^^#^Xo!9c$#@Ebvtya-Y;bUT&P_Pn3eZS!&4->n$ zPgj)q1V(R!ogQH>MpX92NL`2P{G1L5_=%?S-4OBP`6mVN%CYY%PXAyad=MsvmIos~ zj5%B&4oCkcJYpa){^fBpIJ7;91x);wGa{D%5k=T-W0(sk>APwA7+>Q(7~3mzn(n)L zPwKt1ih4#jH!Rk~PZ7cZfRS~EcSLAf)6!p~k=`3;su~w#0`q6XymYPy;im;?`h7xM zdjisAN?Ai4Q%~d{a_>|31c>wKJ+Z&Mant7}1{0L6VJl8|`JIvckZFw7p?(ix@>39V zW^Sb{y8BqyQ?O5D802AEk6b=+GoVnUUCeVI2 zvmyZ&sm)V{bNx(|tv}LMl?5T>PX=75?yAy*8Z23cw#0*}zk=5Z-~!?9o!o>@XV>JY z?9*-%YuX_LrvJ*Z8wrnQ$%p*u{^^2H_D5a)Nxel=>Aua%5RCh)0qkP@qoE-ic8=>E#!L~>)Zmb5|hOznt-OS_|in1^sO;N=}zF(PtP8_9bqcO z)!z|bXcDF$E?bQo7$WHDcl!4jzd=nbQwL**L1ilacm}11e-ARJu@=OPUL*d^%_PW@ z4g6=H7&U_`MpH&jldf5~?+?xtPuJrf5FDm*w%l8Hn|3o~%aAe-gZFFQ6lpR@WOd~S z_xnZv{Sgd4|67bQ;_80Di-|oeQ|sBBc!+*7q#m#6EIF9LWn9=w$Rw{$l)61OqY#;z zD1?w*S!$R2@XT=VY^KXIu$XfPiny!wyvhBWQ>SQpRC6k;o;^+^CeXe0Av;9k=F8*s zHqK8KGTl-5dqOgKfeD+xPvA}Qk9c}HGY8>#Xv?a<#YyX7x9s{i?v5nFP}$lG8g=;# zWZb!Aykj|nj1ryw{08?XR4@{1@plYqa=FEUn|QJ!%gLfL|s;M7M15Ge@&wg zf@*N5@-1gk$_>`(qrX5NeI4)p%nF7n$b#0A+QU7O4X5qR7E@;T_I0yCXuJK>$GE@fXDq;35SELW*w{Bk!)>+o9&OEnLR91*PQcUl|{{}+&vs{w*9jR*M3W2|s8^5Z$>O+m;| zZ{EYc$-cZzeP7<{)e{;O7M-8TTseZ8>3>``u@StGMGk(|4Npg>)>W-_FqbCEQ_`73TWx zDALZ5`9lnAik#Yr9`iYy-EVdZ5A4=;S{owT!k7;_`W_T9vLl(Ec#%11AN^E}m7};= z7gN(1uvm3GcFdGm>_X{t$4ALiWj_WvF>+;+*_yYvg3}f-F~{1~$E=wc%6KEa2xU$u%{bI4LXm`^2 zao@}Hjqw)%Ie62RK0GxJujF1sU?Pn^@LzHg)oa_Wk-1XWk7_BH zl=eB`afLqkU_u~`9co1?0a_b`o%aE+>DmHR2|+w%xd9a$V1hU}ecdu-XUsk?Q8_vI zY;|C9hbY^<>(AIvx>Ke*sCOmROgXtvQ%?eujP^;}Ft^v=qZU|@7L30U^4Q8JC}>Ve z$7ijQ@VFVGQ$2QFEKL|6jFsGgGS-~7pJX|CAtwQ(M%(2%ug4UqHehT2rxYl7E6vXj zC)(ktE>n=0b3ZY53083>CQM!C`c{r$iF%Cgw2e+>yESBW3!dUIWWM75@tQdg z=HrW-h+3t2dZ0Fcb)}9C>7KcF@lyzM_6gFiH9WK~({TDF>S3WebVDxaU{CVO4pxIa zo6~Iqo3sJKW4kTTAh0308Pbi@Z^-4_U*pp!5S?nOT#u3ae6#}t2`1>cjE-kWaPs|T ze^1iel9LEN;41$KvAMm=j+7)qwwq55-5oC6e{1Zxb_|&RG3jb-qm)^pKKGo_qo>S0 z6kD@hh_wVlw4ZWo>)nG`-l>j1ddVXLHLst@tU!W@P5VxeDF#) z5hIR)L+-!d9PBd$qcMknzP9casMwezf2Wm$U`bSor;R=}2rhE3y0_1v zj`2Wd;f%9ll2d&6xx3WbOrA@|Kj3Q3M63z02;9_?Ya8@3PnPzX6O>adA7^xb)ZHV- zGj4zOahcY{i@MCkJCo)`e>;tb_UOaFG$k-qgixlDBsppVBkD)NZk@8zn2n^^kT1V}V@z&m zq>)a5Y}l;JfP3WU?XB_BvRCsJueX;^sYV(mC(}UR1nk76fEAa+B16Lci!&rOMcKPnFk*et(W`$1}~T=kblY-if;KTO&WL=X+@oAy;(8#TS&m;IL}i`b2z&xD+o&RsAn8X(UPunG%>g9P$*Ao>&LZJ5O+{`d zaf$G^uWxdLuS~3?jCa%Ai`c;0Q<1m61AFeRDM|iP9{0($ttBe6#Y*`E>!EY&z zgSndU!-R{m`%_3UCrM_~$BhhAG7G^^x0FM(xH$)`K^uE!8@OJr2fOpa^p6hNO&yAm z>MXH2(mkKN^9XjrRNs|a@AER}%-|`g-3Xdk#fUV`AXCU?c?5OoR#tL}t+GB~mqgtAN04RJ{~1Dx>CRSZR) zEtgPF{Y~pL#_j&F!>8wX;@DkJR+Tct;HGBo+}eapF;*eJ_$Uhp1mQIMY#V92{|sdwBbNT z_Yc8R_$t39^!jH0k1#u(Zo2Z=qVq#6Vb6$-(oXE8h7hV^!w170 z^01zhbh$;{kkXPw^O@*XXe3D>_S+JtK`r+prhcPW_#6%M(;Ml;yq_+gbP(E(OHXf?w}U z0GjJYLuJ_74jzTMuS|b_DDS+zmJufDWU4JqL|*&EgW5v@(zOnpJ3EFrK@$2B*x>!? zCZXCGuR!W%Qg{_6LfMVSf1NcagRdot@G-!F13=496;^2UNwdlP$Q2Da4`TaW6H0)I+F|*J5j#J0&B}(wmEb<3}ywGVMFD%eOzru+}s282_Bh$ zb-g_SMMO~8=DOYYRYSV<7v2tAx7!O%(!=0AcU44DZ`|X!@!tQZ8{${JAUq_x)!8}i z2MZ@URP@E~VOy?i;6~7Sx^4EMb3>->q3sMhg_8WeiF#tetHQhnyXw3TcK^eh;W>T! z$~7zmyxv;WM1s@H+y(DAEtaiWW&9kX6A3U#_{b*uO+Y2(_pgbJUsplxalnNv8= z9!a-^4wFDHw}12T-)Bi*D&4Jd)@%EkCL}HoD)+V!V7%<^(pB#kz9E$*K@=`jDe~My zcxW%S^-a2!cm4J7nGS+j=pc1aqkFjBp|GldiJ54jnIyEn`)RjRmg&8(Dp1z9u+Tf1 z_Y#5)ZhO>4KYGzTJV+s7LwN~M1hS#*BCjdn{kev?!SkD_k>jR3@%AF=;OCiBP>&SA z?Y+x)dMV`^6N3KrFSTdyPBgz#X62c^p<{fbbQp3x>+LZ~_$v;VQXoZ#>DV*>DE!b! zq@Z%myVi@E7-_Pd^o*G2x!m(olx{KRdHVBSZ^x{EV%8;^0Um8N&;vtWK7Ji;UNIi( z6dh`pw6axWeo)Z*AB$bahL(Rp{_OlZezhS#wcBCcZx6d0OvU8tN1klgVh6kzCgS8+ zk>Kb@})mjt60Zbz#g$C=ypD(c^KsGPs(0q>r}`1(tg8X@naAGUvmFRFAF ze1qZ{9QKO*?~W4p+#)97+)?QV%UrSyaQOeRgP00MsgLqRF!S*1n&*!QuG)p+g8s{i z)QVMe=Kr{Z%IktkFVywE$>>FmsJUZ9qW=fe&|{hP$XE9p(CLA^a4R5D@Cz7CqZuxo zVqww{rRWKBK(={l^`k3{HRwxUb07Mc2XhFwuKp3-v0C+$PcOU2SWnu%s?MA6I*p;0 zYrE#2nWtxGgoCN%t&S3}YiY03oHN3l{=7#B$W(kylRQvx8ES8VCR;ECnBp7q^X{|b zYh0(h{B+r;TNDNYN09C%t`I_?p#SaC;}Hc#S+d4*uZB`DVXM}aA2DmtYYyp9x|Z(f zPLu2|-7y8N%$jWVW}DMwSuo#`%Co?_K32j`5{SwBq9ewwItte!uQq!*V(Ul>a0 zzcP}n(I5Vz%jh z`R+WqVj@} zdXKFb@{}UqEE36|@Wu1gwx8pV?Coeb2!G_Jpriu_)*FEoH zbMc02C}-aTQ`<%}QljX&{pq?H(ADfxPtGJ|?79-* zZFASi?dY}^2bV4;c;0RJ2D#^RAu@R}DI4iD%`+0zd12$XI|Z~TCNH*loGbOK0=|u%>EJ1V&?XW3j z^N`!u6aB5UlVHwmw=Q;A3?x2|VGa0-^c70edcaW*i)fKXe{g$un7;J;`#RvdG53e@ zMk$sg2NMf?HYTKmg1YefoNoqLov9;U@qgmV&ozg`VHQI)N~U_D!W5~a-pW%xv!iXA@vo)&kTfBnKVM3#sW z4kn5kd7QXCS)7?8oo(v()&m-zEa&0*_2)NkeME*^c6MMIH9PvhA-Or;7!;-{OQ$Io zzNp|8NLWi^5J8Lb;{4a#!`(`*EvYhwjp&_HK{x}Ek|XXz`(@G_Pmnbd*I^z$L_;3( z`?C!)6rQhjecz)PCC+?;Fr0Int4+UM3%;25@S^Numece8458!jZ_0u^upt`&si9I$ zsOc(>`65bQ!aMS;wxSzGmwvpcx~xkB^QV*lkw+n*r@7uBuaq5SAS7!OiLR@V$Gw|_ zx5+RI??<_A5@`_8&Gk!gn6hG>_|5=hwO!l|umBAmyT;zDQOW{KHr;Ca`KgRSr++jO zCx5K&TrA$`Ad*ukLUL`l)S`b4u3601Wam;m{=reL;`I2F8OVOupb9h)mVfZ`+Fm|0 z%!R3Xmzd=`_oCd7ct+#e-vzOU#K+&g%1*2ld2Cw47@W>*eV;ls{rJi!kvxV`tLU9W zLUjBrVN9QaC&0lWGrY)^PP~c$!%w4dK?)W^m%Z46M0vonU3H(Ou%fqRcq1d0&$I-m z@R2KuyrLnGtpv6@G;(T?m(%H1829_W35#6Y(6{jEwUOD_h&*DODmo5>HIZNKVkUBn z!UWl*h-0atgu$i&c1uy~r%;V7_ta%%C^LTPde+%>D^p`s-&$KzrRFJ zeDsVyb?XL(b#iz>5NV4d;r3?b#c*9aDs9bffmy|c2n+NwPwbmR%6DNu6wbQexgI5) z?$sC|b+QTjQ+-Hbzo7ImGR2|CmOzSiH25vi&U$!&b+}gfk0+0TCH%YNBAq6P@Kmm{ z8|#N_y_P1upp^|XhTHk;c|DdBkis+R$D z^75xu5LhLFNpn^n-EGyew~d1wne&k@-V13fwtxC~B&wW_h$?r2yR_gVp8MuQN|R_r z%%NQq;{SXO-QrIR@#E-8+OOS$V(a|w(&=^o#l6@|llnryp`BY(>q}0UDvPB#d^U=j zy|zZSNtIVLgM0FXS0=UWU*%;B7f$fT`8#?GYAY7y&IoQ0oyC8LxxvWu$Bin_p#!(Nf?a6%@Z9vLl;hcs663)2-x_*S{Y3rG**3nTsWvt^&pwU z$w8}D$Ao$yK^mEd9D5H!zG=I-&s{lO#FZz~Ucb*8&9-dZakvhl@x8bD7r7CHm0lt{ zbj_P!U8J!F)=0M$`J5|^qw@qNa~^Yd@}P`m&+8CZ_69U~H7+m_z+V1Pm1mq;rjl?V z`2HQsb{Ovv)^GyOZ2eg%ys|gO*E8uv{h%1K0a^az;-NelqU%;|gc_dBe~E7;JLQkS z$55=6h=SNoiz4fQ%;FJE+~-NCvH}2myk#r;{0aISGFqA>Pk4(+?*8!g#D0GHSy9v`#RsbgaX&De3SA0M zLT-E}H4C+ZNQ~hOLY>LAEh^Qv|Hc3!fb+`QQJFOW`Fs24Ht%cH2mg=BIPN-JEmPAe585b6C*X1Bc7JWM= zC+y}JoRi`l+w@)&#ezh;d0@5>SYf+Q`z6r7Y?xe#HQU4$=Dd>5p3n{nBg$FS4>?6Y zA6KYSmV20pvJ$pFcpn*vLzIf+=2@Q#rSs|GP%(6y#tR%vptkN$T>$6I-)r2Gm9dYd zjS2`5hUG?|zKWJMS{JpsanNuJ)J@Mxth5yXJAhoE9IRt`QmBqI9;43L;`Y>)v2Apa zo>WfvGgIOh5v44taew|LOPsT*ib$jgetSu@6SsAea`;P*#QJ|p(C;5f(16P$_zm%@-f2v>pGrxEWGFhG@%1;057n=1mjK*Dk+iD{SbG?f}t3fRhQL>H9Pl1@gai zrxlI)Y{O?#AQ)(U?Ig%2T%4eXAdy8_(2AoI*tKTi{P_aNBSY^%0edX8(m};{vDcX* z7786VYM;RO-dz4bg3mJ}agJYjn~&0e8iq19$)Il_llMww<2TDEytOc2Ywc*j7Y5O( z>1bv|h;O(BE8fJ{Aj}`LdxhAOgMa-e>iyAb0*>RHR9ATI#@Gc-LJ|+}V~%VQRHjgQ z3Drvp67>__8b`_mV6nEls!aMV=a^Z*OHTv?vM5RZ>B2zba3^tCSbQjP((`vBmw{PF zhlP?XXFG?rXpO_v8wl;1u8@dA!rR|}sxv*5iiQFlQ;=l_yVo=AuX&9GLT=^8sH#o+ zch_K^NfMflE5xX9l23o4F1)a_5ACa+Glhwh?(i3d@2?~%cVE!Uc%o}BQgM9&aW=ve zMpA#o58k7pq;_-J!?7>zUMFXVlQUJDTrf5YXJRU!*AFvg&!N67w*1Dr4PXi}KZdO! zzripW#S%YZY6tXYc>T#N&Z!vC;2M-dO8?mTn6*!r+gNo7)d zO-9dl6!Zp6GJ3S=8s>98a-6EV87sO)5~a2Q74Ydk6!Vq|15PBlyXZQu9QO8L^rOlo z(099pW}xPm6|a|d?iAXFZW3m}4RTjU1m|=H6(-hfk zzxBZua+kneLUu9v`4=E3@DHscrdQorNq&8TSa1W4mDY+fBI~!R&zH6%)Ig=Yr~tUip81-^~f)VM!$a za&t^--JK(y^)O#)3ibrgvD^y*fZ~;ZK_UWiKVPHbW8P24=kUC zl9VY2di6F<&o@3uDAhlR*cWRq&ImHMy7mfz--QBb+Ia#4++-nvNh*#mpc%ZpD7Egf zWTL}f`&s>4OM8IBO!=1+#r|*E>T;fVO8Y86;^SW~MoUY}d;hF+=W%y0(T`gFSD96n z7&SfBfqx53L_5i?&bULBG~+avk027`iPTy5?9wUlL6MUxH)c zKF`wMv4T@Omdw7vb27B|#4r--ziI0*T#t?r-I&gxHnZm?8e{Cxy%p%0-tSAbb_VOv z%zRM=yZchP?Y@2W3FVi9D4`W_`YOBl19l$24zg^Li>M_|lJZpKrO}CiowSQ1N|)D1 zzu|HPaORBrUk}pSi*$$i+q-lXez}Agdw^XO>YQceNnvlXj9 zC0mK6C8Yt`2);dk?O*iLG#H=|4Ln@vH%khtv3xqn7O8Uas~}j&<-IJ2yx-(GjWGr! zZj7ONe04=eF(!AU^BML)_z0;UYUkOL?Uip+zj8mJCazscx?4$tdauF3RMvFF?t=fK zPEUo}yZBh0 zX+Fsl`A|Xc)xws>)$dEavV5YpT9)b}fZOrkVt*N2>w<(>G49CvUq{$+mGZdZ02h-& z*i;fb@zILbOvx}B=ReXA$X&+3ddB5cP7(Klf*`!bW%#Z9$3hK2oZm|OBJAD_G53@~ zIH=LF#{?0%zT5n<`oKh9WB-pYbCoWGK4MPr<5DYH<3HxTKJs?V3@O)c=P0Syt~5ke zWRXkXp1(^jtzPCu+Ku&B!8Ir~g-K9euzYLs)y^3IqubpnsIK%Ftc4`Hlq+KZ%*3_c z3HpdEi`u>??MbQii{UoAH>!eHJ%+d_qL1o0&U*6VysvOTkeVwxIj?*v=OR*!3lb9M zU^MdnV%3q2!H7+&LuzT1JOrgC#y9RgXNztR4z{`bdWf#m?5-T(e2q=J(35x4Z z=+By}^11^F=~sP;vNf!H?f>#*ELH9vY^uAuzu!IB0tq`PSjk@Dw2H(V^aMa&u3+{^4E_97Oo>-`>Cvh z3?nk67bI7vXtUQU;6HE__zE*q#M##4Q>4Lb_CioPNg!jI_TZE-;t2!GE}A`v`w_$7O4E&#;QaFZ;-q|fkiVmZ4E zZwp_m`o>B)@R2|imf}3#+2v1{Egs+^W|1+zs=Dfz&qgoe>Y5u>YDOQ%&iY|5oD`R9 z`C?%X=O><^%*V7ttu~Wf&)Sq=SxZ&@=ZbdtRb^r&=_Rmdxx&sw$6w(EuGdLDpeCP; zPMm_`TTzj$6n;jn5kLi*^oNF5g82IrF~++h7mwhX$rR}!r~D??EoAe9NAaOjtmJic zGi2-CK6`(UV+xlqcA23;{0O;0vfj^*w*s+!cs9$Zqg~fYVAJznB4?B;JYZtsULVYo}YoWUcpJ@W+?{ux{t~XA% z+C+O0p-3#acc3g1YIDJf;L_oe3%HDeeVxY#2R1wB_X^uR?eqpJU$^82#Sv*=PxAfc zd)p9DVu}+7*dS{8EtSu(05&cBu!CsO-pKW)YLeRi z&&jNxx`@V+RKx7w=O8hrX#GvH2e94m4x2&!k4hPJOD+#qj6fda#!w%z#{4?9SjjV%2kt$@z8#<7^@&kJTEp(|jR5V^i43(Ve5n5{ z$6to(F&QG!k(&UQtOVg>u-zXoe@xqxPIyP^f`X^Km&0##_tVJ7XVg7L>GtQZ5w|=_ z{W`mejKwDNJow8fhA_2ZkL^6qVW<`9t8d%}(zp=*(635skvmJ>x9K~WO>aAQGLG5S zL|!;_e|GhjL;qXFyZPI?vmtErBPOQFq;lZa#{>~hfbZc<7ldp-l%N&#Dn7SR`}m^Y zu($*wiKLVtv+^k;d?7Z~!iJS3S|;VJ0HTJ3$Ay8yh?cP7;9;T{mD|4p&Ri;+-&t}T zc4obG#__D7LTTb+|45)Eqh%qtx)vf9oyLXQ0lj~VrSq>`7CY*+!ZW zYVp7KebFF{hxDf7zshF<7NwYXIYI=Z(ti6ozX}nmeRm+cHj4Bf~O$)-T zbP@y0VsDY-3YSE$qG26x>bUY7lN)Ua{LuqRaBgxq4iF>(k$DNRL zC@dCBr@8!QwD_Qb**?3K_k~bK2-lB-9a;Zv*=00{MaV^Uvq+^P@l z@%!+=x;_WO-s{c3Ybt|KDyZjkPW*t@-PWUd9(kdeOHuk1m z=z{flT5GqGp?$g-^eyaG!Y51fyyw`MBeLZ%CZiveAms=3R(ocBV zHj5}#&v1$*^1Syi?Pth4KUvTYGf&-qK)6 z#2if_p4Bz9oIGC2kidfAbxbcf0*2oWa3Q$S6E2pTLM2k-X+_jRLNndYE$A|Ca!IOd z4FB$d#Oqp`ho$4gUBecBaLc;$-3Jt(V`$VaXMp4{u`(>OBx+puekpxdXox3TH%cln zD}Sm_ivx-Qi+wBa9WQ3b2lJEke#@ta>$+phsU(Ssj*_f85D|$qAKPWGr%sb;>Wkc> zQKn(@exJegiAuCq{e5vQUG!{$%E7#srP(eq35u)?YKHv$kVM^mMJg zN2Yhp>xPP~u&X}K2YN9l0+;j3^8xZCHH(Exyn`QM>ggn&mcMYs zOrEXU@P;MYJ`6AGuFZ4TFkm)~H`w|qaBicnR$^oxBmQ{L58_!zWp(>j;s&3l0A-fM z1xd;?cISQ!DgB)v!l~uO{)I%-nmN{LM_XFBv*o_{*Y@@Wx}BWZ?pk)MCzM4`=fbZLD8i|duc~t=l54- zVY^Yb=(@y}u0V-{kW5QM`XGhL8b@sUK`v)UDv+etD=lu%iH#DdFVb$E3X=Zv&PnE+ zLlfQ%-qb9;g%{m^oI%NO=?+;5m6nlKooi51eu~t|>j_ji^peaFtkZGpSgv@BFmiW` z#1a2pMDo2~)*mn0aOZuZuzZ7-X|e!65FCpkJd{>h?ioe6&*uA^;VtOpK|$n$Xc0d1 z02H6^RT{r>->135%!bQbM^U+3tr-G3q&d>4CgO1i79;#)h)uKYrz`=~G%u{e_`CF1 z>mmAaQfbs%g!x@1#|_u4(OVy4_#?=Ie|=NToD_NL-eZ4MT&~G#+%qi8upW8p0)U#2 zLwhbbwZ^=`_qhD+g2Cu;V*-Y$!Z#tUmisz90cXCq0LQWGEs(bX26fXyg6_f_tQP^L z`_M^1!UC|;6(s-US-+X*i#)rX+=T)qDAnF#N6o@T?hI#)-mPl5t58T%Q1=7FOl;M@ zZ&I-itxk=3bmmRE4EzC2!`NY?m*Vi9wB2Dl8o2#qPseYO>)xj@qF9_Tm|YRm6zY-8 z)irfHiuFnD zL_p|f7-k(T>Rp>&MwbdFDlaKSQD_RAXO@~3Y&+ZoYmw0(_J`d@A|YnJ-vlz0MrBjX zn0}|eNMW?{Af@RuWCy)S>vtY~7ubjAQHoD{@dC96y-}*#+D3oo2Pm2d$T1&#> zC{!Awam5LAL1snp@wCBczHCMEwLZ+>o?VjeeYfOrqu?&h(k_XY=3I!vIe|ZtZ{Vt3 zBIsEur_$bi<)=6ry=5P=!B{}v5UdYu;#p+|e$pV#Ca@0@sQHi!Gug1yE}4@6>v?R$ z9ohmet6p#CMS_ijC5{*jMQaroXA4dhb`5A+BOx z^n$H&2)Wtxu0{O@#a}2GSjZuwj{+%`2%Qtlv6+}uI5hZZ-{>3QcAir&0azKl^Alug zhE((~l%w1X?*AZF8*H|73$P=?VCBj<<%tu|s++f8&dnBY#=7(a$IDG$jV!94XNz|U zxGbM-F1lT8yd;<-wBZ7vz^dv@JCPU`AyE6aS#$5(m-P~(iuV{>3zo>M1b#7TKp+E5 zEzfuQjoLg2OaAtnD+s?fikLPc=P0w~1VWix8ADWx@d{Lsz4lC%Q27l`skvsS^Q~Q- zl)L6Vq`q*_W_)iw%kH$h>pW=i8zqy$?>roSeP^B;vnKWSo1-{=*ELZOI(zq;uaac$ z?@)(R^$mA%@8#4EwHYyvp`OuaLa`g9tRF_V&IEg33z`7;#PpF(jth$T2C8?NL3BGX znmtz+8trod0~g&+yVOBJl&Gsp?*};!-8+MZeeX0UrHJt;%arV^)w43z@yi(!)^f8N zP1>oTd;~1_AAQh4mI`qN#8?b0@qZbvabC)OWt^c{a%_y9HOu&driSwO91PH>} z=XGK<)hNp1bL=XaE`>Y1cG?Wi)ooQeCN90$a6QjF18BreqnCdU z$uB>%C*Db>E#Q`Jd|OGm?0Lj0jU6*MB}ywk%2X4Py&HC$RQfNM$MDryo^*c(V*-hY zUFwhr1~`<+nhNrth5CEY<%n$~uU2?O9&pmm8-5wNmMS^hvg)?N7C!W(Q3t=@_YoB4 zNnY!HUxVNFG~(A}&p2m4o%28bcelg`m~6%(E~^>4u&t4l%BZKITtR~`$YwxkR!ez1 z&z2P|NAD}Xu5VTwx>LB!{7vJmBU>rZ&>4iQ|x9yz7^gT?`nR(K*75CF5 zY;LY>qn!3_;M8a@R4ii7pQW`9sS48#UV|n+)LL~(SNQ*uj_#|~2|>4(tENkB`ogLJ z^eU_Lf7|@Hg$F~KBdUy8D3>~Gl$OHa9i!J}QFLM-HM@%XIm@z0*T8=oR_Xb5gChQP zSOw1!*cjhcV$+ESDz9gII9FmnCZIlW_wdTjVJ1|vJQH+%sG-vph0>5C>qT1-58`iG z@{?r4<03(!@tn+LC?gM^y(te!;XYJ#73!%}9yv5L+% zpZ>*5m+|vZ|11_pUovn8Cgp*dj56$p`|IKzmT?0Ay~#@kdRt}Bc%($WkuNUCY$i`zFq;CT z1GNYCi#Sq(Y^)dK6}A1rvwU12kVa-42t|@?hfU9ypEm`4_)FX@R<+H4V(UtFASNBZ;aOKu?XyCl{rYj0pRbLR&4 z0hd^qvzDk)7;|<|zem^Qf>6DutGEEMQ|J!+XP$>ZCClM($vIL+xlVA!XUyga%0|?$ z_(e~}8-cH`rP?&s2hsRX)ipO^^G@R$J2!Qpic=X8p*+Fu zms1%7vQIIPE$P*1SM=hM+mO5OB|{Io8GW{Z(L^vp`g#Mx5bYkYK1C)*-UV0V#IHK& zHYZj;q>0_KV@_K>`dLNGDRRPqL>oAsJ4^eSQ=>6PcTmN z87KR>B5vHeJ7+q%|M1a$TFz@7Xhezbiz&umCh?<+!6Zym5r_K?yX(r5WTO4|y$n2< ztGQRMKrdWK4OZw-AXJXNzXxwiHzS&OaS==^@=&&(k0&wlwofrg!~4)?5A>K4s;-X; zB!zLABOP|WT3=!XJ$qW4cx=*#uK96b0s$uDu!Q!putar}qQky&W4uR0j2Tr&mhXA{ zVH?cT+(@GK1Ci%FdJhugi0@#F5QoLa?$V1s5*G;DlqmG{_T66K?4x&vrc$IIcs6$3 zX9twkL1elO{*GR{YJz76^0>8P+8&_G_`N$3+)Ds?59UpsVplApVzR#9a({!3sllsg?Rbq|3EDq_P`@c+)AP9e7OKcCH44?#1AEFegci##w& zI;WU=>~(!M!1b4Rab2~JS+gzu6;8s)WlRwv6?s3>h>kA3Y%=^h10seS4KOhid& z!}Kq?o}Ox@PTuxn*S#_2G&LR~!%cA`FJa zYhML)Ut_O!z~Si?Ltc#)+;Rfmi(|VGw2=AwuUp9N3zO&>f2+?;LG%g3n2@d;9 zP}=VUluQ`$oZCq+v$y{hCgt<{%l&xR{4mM9ZzulJaqcp>?3?fZV*v;>LJ}6t`-fSv zZQ+{9tdTaz485u-s&ca3VGJeMjLw_PpOl*FozBb6U*`f6=mOWYNPuG)R42C*f&K;E zb`ALbU0Pkt`C|xXLdDLd@ef?pkxy-&uO;pLn9#uD)4y%N?x5n4B!Ns%Xny9>onJ ze4om5Xee0EU?RHdq~DHy-H|)r1fShwpjSO*S8qc~e3)X!<tmiOl}nXzg2Jrk3lMM3fqW!bR*Mmy$uN-bMK-bWfYS zo_TOKe&4f*zgmwH4%X*sUSoOoV+~0iHnG^q7H#T222ke<1yN-O(dNI?Ezy5D-^>c= z395lo=jt*%))8Ng%VTI;~Y0UXI(X+XxEqd0f*Vg;_yX7$zyA&l@;@}m#SsNJ0~IX`qv}W@wof}& z`Wb{5&Cv-UIUmCrOHX0`yO%gwf`s8vU)1aMuq=MjPil8DGqPJ!IFKx!lc|z#eKmLq zYK}NUx->rtt`bOyaPaFjp8!(FwODAVPlFoF8A#l#gcQAg%NY_Sre)NX;U-b38DnoT z>!5VtnklM9@?5OHMQ~d7%$?A);j9^IW{DeG zK4AG892HQal}%^zS&)1)ql4i|?^ewUspgQ*6KIY7EGf!Ps%ro3)OU`b+z}b*E?ff0 zsp}R2Jm9`s#^y3&WC>**MUvJ(b zNmnMIv7a(XDSz0jOX6i2{A2MQdDcqRJFXzgjSV8L7e?iny=<0M1Gb0?u_K*)vO8yC z+!VqycPqE7&Aqdt6mcz+61`z@#4jaymdEjI_@@y~s+SY55Fm(V?-n}0odT#*D7L-+ zo%LIQm`PkEBWSKin z(Q{26;jk>CWnAGOX0A!>$^5Is59+ zv{@D0S<1g;qrvJ7>Xlg< z8kcA((!&@reAZbc!l56RBY#B0p`A&r5` zXX_f|`#9E#Hp%;y&R%)2^TrkOo0u=N1Dt)aPF}?VD(B0Ii1WU~w?_zDVg?dWf7IyYiOvvUhSK?&zpkikF) z#w7_NNrksvn%oTCUnx!gJKNg&6$=W9r+qZnWI?znuU)_whdkRavrrwLZkp|IXQl7X zD%HoiDtD-3_x7@!%E6lqr9@jI>mYvVLY5r{T1EEB_h(%u=PAzU`KT$%b4ct#g-Lf3 zV-4i!>9tqVdwVv_*zf>2qc6Af37RpAJ`7%1^K%8Btpg#*`zv0U8T;kk^xFN(E>83L z{U+=^s<_N%^aku4X(FNwN@r^6>SEn;gOrKPvMcg8pfxf$R1}#VC|-rY?js}_!&L{S zP*83iRQVP;y?L`JxSIftBF= zA;DL+jV?UVBNt434Qrj31w!bf64wnc-M6as@}yu3q`!Fe={lknpMnS&K&tArXok67P-t z9^hn!pZ(knZ2vDXc}uS!+Ugu%gLIG!%1x`HjY_W_SC${QBd@o!8P}h< zQmgUfS;Wi!;r4Cnd6xz^U?BQZGhQ+CT@2;RhHaQ~WUSJfL+XW944Je21G1%I2X;cm zPRnOH5)JGm(AhLLI7*`L5%bff#L{1c0m?|FO6}vcKj1?%Cw6fvpt~z*oRm)R`7S`b zVqGY2Uv+=l;oUx)!v&Bpbch%y2RF{phzIOMIl*oJKq3h!qcyq)Nx%w{!qPRnJU)Ms zR*`B++d;=#g9>y}G$7YNlW`l7wuh7h-UOx374Y){miL9$p$k!bW~e2DV{n*x9()!L zq~hZ^nHHfJ_C*)4x;l^#`L6WY0oVvJ*pAMol2GuNtlAfSoNr`x87H=nWd5fEc+589 z2P<8C31()5ER-K{kdWq(c1y|B^kWhm0uBqW=Stt3FoeF%i5E2aUh_MhH^n_M9e@fM z&_>KAjNpUPXL4#x>YltS z{PdW?E!(*rFd1|Dl6H0P^Vdz^o#N*HA!JxrW$cTLjt4>b94qD$?9ULhNSL9fr$+cq z$|R;|Y}5Kz8xS_6GJe<;5h{=toBJ zFcVT9?eCnO0>4X3+F#g?U30pIE!s8^5dfAUu|CZ)eebU(`%E=a+&r<6 zz?q?i?B?TH!m_P;=P$$&qoE2NaQrm&m;DEBTahOM+5`s?N|`ux8E4y&GK-&~kwRv= zNz^V?ucLQx7HLXi7ccx=oo!`x)%Q>z*NH6oquFlL_ zUwaMEGIe>Q!EuxO6mJ8aMy0$wzuBGl<=LH6=Xghh6#76C-k-m5Q?a49Y-WT|zHMA# zGoCow?SaQu46Cjs;`pvZ!*-sv9S6${Ey|u*W0|}4^9w*+{h+|$*`qLTd)=QCaA#<|% z{QM0Q1kEtHNp~FiL){j~G|QwpzbjIw6j)FqiUHA}1zc4P=4!pVR+Ul1xb%<1WVBq? zBJ%8Ri#r-l0FNi;|FLjBcVLtV77c#jd9JG}^$bljeDgL5{yAdQd5M-g?yKqfG74XI zw1*eKG)^bPaxs5{0yp=HX1D(cq;!6FYT!;@AAGg1=#Q0^PowO+BRSG5pmFc%r|`fNHb7r#RJf1zIt{D3!P-plE6PsIKgd zl*(+)EF@(H+T||6+6b(|pEzwF&-@=<5qu8zA!P6i!Y8+53#H5@x38#&PJa z5x{vsdb!$kB`ERf6CUW_2{5J;{kndJmqW!)6RY_)~7&)Mhb?{(f{q8iVIKBH!@)Lor!t&1p)h5pW9T&xTq8^U9a0GzVr)8`Q_ zJwZB75$ty$son4~D!}Ep(Y#nKUYhzLM&-o%%!_$rOW!=`#5RpN+c9bL?OvV1-3y{- zEzX#w%V40Or}#TXVbFH_%gHL)npY7KqeQ>L&aG!H#%Vvw- zyI7Rzo|BT)-^mP0dTgjE(dI93l!yqSWs~_W|0+f+7GE}1N5ODy=RKR5?ji3d_M53x zgm8P8%(aH{x6YVMUSIlgx#Qf3emicQY|`F2;@b=&D`b<>^fiZ zb990UU+{H#b;Jf=A;XUwjns(H_s-m0NQ63~b=49PjJa@ZsSAOexix^CJi_M@kSgli z#YVW0^97)-$B+1#vt_dUQKZVec5+KE`$`WF+xN08X)Ea3-R`-d>`BTCkKFb|`*}&3 zVPEP1_@`GFm(xEV!LVUR{hX3fMKsQ?Ql7hg9*O;~CdHYPVu4twd}^9`P}cjdNCc$^ z9VDbbLz)o<8p}2@FZwb7^Me(eCj3j^<1hXNt^iZV#CizYYxJl64JV#+BMEjK!lqoi zxt)WYB9l!0YUdg)B;9K06;0XSz9@L+hnMJskLTzFZE|17@zF6o0>*Z-O9` zLj8B+&j9q|-PuHRwLw+?G#`pUlIPeqtxt|>184hpjD;4^rudXom4RjcVwz5qU*Wxj z!{FW=9iLwFui}%bKitmLS^vh~wV@jypo%13rslLt?6O)meFF)(!u@@${i~X|T-8<_=AwNejl-O7 zan6F79$9B1`ImDJAm5!ovUQMwvLF?4%i|{P-SKG*P)*@=ml>ITumsR@nd8mf?%I?q z1$*HBE{y>wmiXzP?JH>Kg_zjl1Ltxp4wo-=b6=dLiX6#dZ(+*~^UoP`Uj69@fNzlW zy3LusSg`Dnee%&2;l0ac!-=eyXJ^2Wu^q##eHYqWNt8mxlzNSkA>VW@4;(z=_i49; zr#lKQj!J#6J~DL5qQe^<8Dl_c;M3TGy^m2pQRJn;`A4yniWhKS8e*3>o?!)ZT%_#i zAsCE6RmgtTBD`&dRA>-eNRz(%jt3>T@3r;qLzrdL$0Y51MN$XU_djum z0X~kAlmC@^FaEiZ(DiD1INo%=OXE(%%X_+UbGGIYCS=w@=B#Z>AyMpiIDs7?gqNS9 zXk*GWZ1AP$;Cz61mrI=DVTB0|CIy6~s+Kbp?r->x@0zIZd^o@w4S|!)$4gz=RH12H zarV2&Yq)JX7m?zFRiT^@fwT6@T=AB^Hw`YOR|9HAjpAD(mPCe)x9bV-a34Ib^Ze|d zo+o?5zepIKvnzTxPlRu&X{3PY{@d`Q-1;h9fNuM^9(cU)i|8>?yJL#`#RVo!RI+R` z1uFZbR5ROW`}TO~;u0L<38T_qe!IL@-9vosES+7;6G<%Sq_=g?JL*%mD zg8aAzo*-Yh}HNs){=%9cHU!tF^sNrOG{uc?}kHx4)-mh=3 z&BHNu4yXg1c94o$pI;1Z01LCUgu}$=_f#u1(|1k1@4X%`ESsOFEx4)%yp;bSs@+~C zZ5IaUJ$S7WiToO+N^CdTJIB+4G%-C|1x$mB9!8_-A<^R?!tVCQEv#UV`?YfR8RqLg ztG3W|p|=g1=X!+2b}-Ag+iSvK#(`ouuU>woIsJf@UP1aH7Gw&AFS?Fk`a{oYt4{t3 z99azK|4|~MnO4T!{B_J)_0bAT2ks0ZC`8C-^9@<^mL9czv&*KAQx^QrajajSL((U+ zga9yupCQDea8hN1#R3X;dDDpHQ6Q4dIh(PocB=28_!7=ub{8v;o&>L{5-t^ zI~cdmA%`KfQ)vdJinQwT*4O~x5{ZvwxXvDKf9~o9v9cCrImaY2SwC&fS6=V6TEjxX z?l%YpBp;)@IQ1tJ`&r1;Y`woE!z2}I%bZh9oUv6v3W2YmR`Z+W6dU($&#wYqmwL8D zYITt#b1hK`c^w_?2w#d2Txd|16FSxUkwr%VN;o3dPp zk{kmo{V7{v=Kt#9j#F>33@Zm7F@N&4>hDd7Ac30a3D2IHZd@MuVSEq2tha-PtD|hpO!1a6-nRlo%Q@8hd$YQsB&t zdZ1LCm7UZvykFSy7=}ht<;=gpVpb0M$}`ApMH5|&K^wvwH_SBX$J>`7`QH;pAvMJB zy-sf0@;Wzz@7abjnKU7Jv5;2Q$qNF)@35Owa0NhmW5pLeg8G|Ed-pX$}emqoplPs zfYv`v*{5KamsT37^7hT=v4o6H%HiaCpcYBr30X=Wx545keP?mqIl`oq#V=yPcn%2| zFR)4UX1Th$LV+^M$ol7b$RI=?*hpjVPU+tU7MC}bECS1R0F`LRdd2>N6BV3yHV8`M z2o>$q>A?D~Pq2OP4Orws3ZLfJ!1K9SN@m8fyDLJ~s0qj(l?VK8+U(N$!?wVSpVUwD zP7VI2J%^z7Cu}R=eiIle@7rtx(aEuELj3eo4*f)8bottpMzu!JcI0`GP2gYihnx;Q!UP98 ziIEfg`L7>z`@-+akwxnlC*cVszLzJuo_jOT=(3?**<`C$Xk+6SGB zc-t{4X_ViezrOtQck0nlF{3wT>kL5$zzp)+=}lSuS(G{ti}%r61gHu9cu;nZdDUN% zM^*gz-SGXyeGh-%{fN#V{*90`HSHjN$9$3>zf>N75jx1c4X3oqpV6Yu4(+}2xNx(b zk=KklWTPZdCMfjOVU{6%>_pM+klKlD8&>{H?I3R66Pe{zSd*(b7cfBjk6&}=YHHX{ zHmh0$u}6WyQ&|zik55-V(MBsdz@Lv~V( z+{IW;Zcx_9@;_#(23eEUM{l0_D&H8#$hDAW>p#I=13`PJu5w#TugG!15{d$w1u}%ZxrX)sc^}Ov2ef-jwSP)7z1VD`dQHYUqgO2( zbmNNnz;c3lGM$y(9TNC1ov-_1^J&h;@j`pkT#Egx`+v2A9@O^s(;ei@mCmo4Bp8Ve z>+3Z8W3Fui1>y?AVtyAXt6GmO!l7d^*qb@tli%Tf56J4ys1D~z<;Hh|@na*8R6Q@~ zdC|S~CiSntR0s0e%7}iY?R~;Cw}9FI;yG<5_CsaJ52bV6WXD%M_w}7P z9R*;$^db1>>uz~T&M=uiN{hBCe&aO&!Knx;b8^;A2snlu8Rd*9KyGk&l6d4@VdRVe{Kf#Ia7@hXnEw;);%<@Xoy zs;bMJ#y*_NRja48a+%_Etw`8VMK;KMC^^3BpIlqS1=2L8*`@F{;7 zeNTEYnVvD4evEMRuN9EiCZ81KtD8NeN2y1E;;WC14e@uZzH9R{D9#=@I5zCebb7); zRb_8%Ru{w1j5osd-k9Y=Tu@#9$KuUu9jZl9(X@Y9;cO1(y$j3DfC<%%cAlakXx47O zM{#8V-{CKi4+d1qY-pfP8~ME1%^tYqa^levfd2aui!*F*6oPniv-Gi~iTA;CcY;Wa z><#&Tsj5p+@M2+iW#J)WP`PMctAft;9^^CQu`uzA6s}03lCNIEKarA(&QsRRa1*zl`E?^bf5Fwud2H{;1+X-&enqDUtM2Z+qIo60v@d$d- z%;rKYnoko${VA>z9xME8&aN>#>S41LnvX1lpu9Arq08Ujr1M%cw+NsZ(iSkP20A^W zo??JM<>O};3~%hbZGi;ER=(S{rCwDQ$PtP+NDA0#EezyCMQKov6@@BXD`5+ItU-pf zceRr6g0|&cS9<+&8A8{GgjXb5fNEZGXgq$p$j-?EoELPQx*rq5{A*OTWC~=B~NnozWI|g(rHsln+yKC>U#yU9t z?5F+`{m8}1RJKtoV!;o8>&>Loi_Nl1z~vl02}m%q?5e$eW#;pn#t(D{a|7P=zIKcXWM#Bs)1P%;$s&roos|(KqEi0<`d(a*nwJ}sXT^Y3 zAw#3!Jz!_XRHE#S4z&oXBZ&j0met6ud?--IkxaG)7*hp5QjAtrXa{{CkREWXK?wD9r`x&Rgc4NBZ!+a8xz9Y~Waoas zi&7J=|>xbvWSlu$&680&$M~flVVkw1Q44K6eY=Ep+C)D9lJR|W{m8&aGkAO60B-tqgFSnJ}(ml4QM&eKd8)MRe- za!U1;%Ap;SPQI?MWRb-H|B32Ohl`p}7HuEXIHjvmsWm;cTPg-#{V?3edN`kZK>Pdc z-ouHJNm(oG2eC9e9J=B)Kj*jdqYxfLKQZshfAFjR;28Uh7;_;3+L!gy(L%zhmnJkt zEr_ePQyCF^Ub|4saPVsD#XNHTX{+Hao+aW6QmOJ{Aq+?7Kz-kn0sDl7u+##o{Hedj zT-nq&Oxi!{iM82VT!3Jy9=F(R%tJ6{X;PMcy&SIbqNoU-yKwGAe|r|EEU z14$ndCTp9$hm|Q)w}Q`a|7S_m`CeT5|FaPlx17D4Is9-n74%Iw9V>0!|u(x}ukvPfV= zhd{B)K)}j51DEFWOqnD%i`&@J^bF;GVeGWcF|-fNsRS5*RmW2N_bYk!sE$?|m+!dt zYS<|Ko3eM!@Q`;J!XF2gDFIINnO|Bjc}Qgm7$F%GAES)=bRiFKa{J4lea=~TTJ6k+ z__XPMPsygJ((-)-@|%_$#S0Bqd7{L4ry!m628oNla!xPVvQ6Pn)XIL+yKGHL_av=O zgXxP)=+Snp{qz?FmokHs*h7spo;0QzRJ&D6=Vy8p1_*qtG;7M*(OeJ;cHv^}<*|~* zxIs++lk?frN0c1qh}qf=O~{2=Z_O9%&qqUzVOG%Kn_SkOV|R|CffCSgnTO;+S2*~? zif1k*$Y}Kxo&m2+@?_6%2D?TEf(1%drH{mH|JVgwy=vOlslp^>(R(^J*^x&9FT-)X z@o~P?t~n?7I78k9-TvVh*`bxf7AuU_s~wvv?7-{geOT`7GQcn0Km!-!Ms~~J*W=Y= zKe}x{PG=d3&qbp?&2JdJ?=16dd7z6A(5sDjte(Ob+~>gN9)?b!pNRY@BuW43*EM|q zSAzn+eAz?$sBxpeS4k4)I`DC|D2mCYnqS=D{-+nqMc$q;-~ld)lei0dW2wefM9 z3Bz8I)Gdd{l7pUH9!#1bgg)%Qz*__-FJ^$p2y9(11u4e`=5t2Fu`e8fF2_xNo_@YpX6dQGRnRDI+$TWqeHh=px;@|HP1Jld@&1|q}>5MEKquX-xOpMds&R8 z+zHT1=V{6N+d-K>6-&Vu-E$>z$Erel!Kmus8ubcy^cmn57SJifV9xo>@Db>v!=MWL zu-adGr~lH5GG|4g;bGw=nBC4Dh2k=g&0{tVny<;zy(Zi(R5b5!FNdLiqdWHFdvk;1 zBrvz?vI`Dm3janTn5*yt+{-kgR$Xam5^Xg(`-G$vh8&LiNOcdIv}Xo=6q3-SSAOq| zD^33keLP>DiSd_>Dco*EDu$3TH(OQLM+Fy(8wngQSCdNPdoj=8Li^6lkPK6DbxLh7(sXwbRGT5)yWm|e6cgGhWpT^KS!8z1u1b=N~-)tcCAr4)za zz_TeMe|^E=P4kC#RZxz&+d3Z0{%t!#4{F<7_!9U4?>GPMs?iRfR%oPL)dZ=~cn+t4 z<=j4rQL|jmb7(g{q;$<*fb%qu_xB;)&&X_En-Gm&DTGScYl=Jse)QQ+M?WrRYi0Av zbOY#3TW!EZ4juK0kt=$cV3gyb?2O9~;+Q-wtAwWLEE`J%VzC zssyk<@Sow{R!zUYDd#Sl#jklhLU(E*jhpoE6$0vhhjV!%?@iMDspBQR6MM`K@Tj@< zuqQ*XU&%?kY*?P*J0OSmpP?YSnnTjO+o7w~H}5rnsXPVG%!JTnWhi+l((aNWkDDAh z{~5cB%25#^FCS{)G?a%9*Vr|fsV}AX_I8DP#MawRb|eM<_;h9oq3av`PtRGA{^l2b zj(4F(FBb*2a({k==NZa}vG0$&0tbp9t;MPuTLL~p8gmFHg}pK);;DO^%4?(B&Mtk% zG-bVE4>-WYB_2o7!Ov-)k;6;pKqXxRcGV(-vIo?6@Vr+{m zH4((rMVm%F@&u=wc;~y7nd#Bv#v?CSler$&n%(1l@4B7@yF9;%@UU*>47pP{#~XHg zz3gl{2jN8VSH*eBI~ILb6wxN$jP^PpqELGHap`E*>Oy|u#8XoUO5?%A+w;l^lNi5D zEZeGs-)bTc!;i1ioHkj1HN7jm&+oiFABH=36Y>`e6aQZL(1;7!ThTDJJ$wD54rXo? zPZzRU3clA%H)iu?(|zd*y`o|l5XWlvn!j&^*Oqpmh2Z*%&Kp0v`y5v-TELU*XVLBR zWPEktHMZ~%+tb`v>1AQnw_9DzvIXZessYNo35b|g3iBF*)!=dC_=r(MQzI4IRJ7yN zm-M9tMS@qRfOf9<*rlv>21_jd5oHeguf~a$GQ*BC@;2NzNgJWt2v;4(1W;Tl7zpD} zcQs{g{(S{EQh2_@t4%A3{%jIU3E~AmT##Bn-=cDUa@E%7l;TQ~O)vGaSb+$}MrDL; znc>xN0OK57V==6I!_ZQj@u+Xsj*7xETd(a)>Unj_F`~~%W|ON+xwtKy@{7R)g`Pu%0+!D76CQY|1BUCopE?RFi zvd8Nvt^b19sqegIe>``uC9U$&z2gwjN~PV%zDuf25E_#;0F~UA3KaGD8`R8}{Z99t z%#PhI3sqQe$i7;Ev(VIvSb`ITQMHEaM;Mr8NrM5xf?&@dVarh9EBW<6| z%`dFFWo%HK2DA$!^C_%56#a)L$JHFn{67+IvrKA)F34iKQ&=GGf%J)}u>xK0ScLY+;2hq(Qp4;x#Qr49bm2#F#qP$ibn zOX&jjXVOl;2V7t0v$`9X1t7L#_ZVEw3u3WS_uHIwLFvBQnz2B~hm(IZ)NuJz*thIUkKL^}a~Vj5_0w5b1ZZg&;{2aDB*$l!)2V` z#Q>bm4u>;b#r;;g2`?h2Y5w|JLFicKQ`93NnCd6?70JUmbk5e{SmxWOt?z4j2Ku)3 zbRuSVc`-F*2Z6q>-@#1eAz6AGr%OLUQSQIttKR&b3Ivf<1UY9CjR;vh8;(TwoC)P+i_Y_TJ;)@g+A1d zVj+rA*+hMKBu+*Wi^Z_$x&!0Q5Rp#+XxP*hrSsGmzNIH1u=ZG`>8GK?gctR%W^BaO z`!+g4ZBtIEYa-6?Za~*PtCc-pQui2%!nsrPIh0%4X-?B!=?D7CkV;#{c8+xO!xU+LvNx78WmCjOjzGyC&zur=OGg^ zM1CIO);#e0g?jy!Yr8lV91MC6u_aj~x*19Nvniw~*`y!k22rcw-=ZF5O&S%2pLETp zh1twCMa^noCqiP>Jn*+cV*uR@q1C}Qn=n2olZpZpq>u(bww=p+E=5cRgdj%CMfW?1 ztZl~V%*&aqBYtskrqErTxb(F-2-;!9@$D9qC* z*0|W$2bm-Ja4)&U<+kAQM7v#OH`TUkbd+?=ZxvWOgNQ~rXS~pP_*rbGjwt7>7cH}9 zgZ<=A5TqD6{ThO^n!wg^v-3EBjyZ(et3oBRAEKVjmg&6ph@`ReK?GBKXHch0p2G{! zLZFP8T!D)9+%u8g6c1R5RyI9zn}WU#2O%=(eDh+Tfl=o0>GVqvE@CE(?ezYu<1&SS zHA#kzW;@w7kJMj(QGRpy6*zC9hCGxQW3PS>v-s4YvG(j~u;O<(*`b)zZM!TZ&F{b> zJYRQHz&FLix}3+=ilaIBdhO!o8H;>CYIyHFTH7QlqK4VXri@*!@g-rc9<3xuq`FQc zZw+H6M_U5`Mvwr;IhCnz{$@a+G)w71s^|M-97k?G{$QHC^>37bWL-^O5n+YkL{;u$ zL=(Gq;F&`Eo;s^3C)#khA5T$xd(D&a0kU#*UX{v76I`Z~!N-ZZmX3Uu_;+?okcx+- z7`!bKt>@-4n~tVYr!<`3ugPKw!68H9bbL{0Yv!+UHV zFET61A_kyWhbwBmWzHT99R(3QJm+A%r@zS{JL??*hKoyAeQl?8+%dhIeYY1@RPGH< zkhWGn==yr>S#yTgvyI1yE5fGy^Vm1d4sCV)C020_kN zP(Jh#8ZJPt+`|0n?>N~XfPybXoar@KP{`LavnNJLWg6*s_(jXW5%Gj|Vt(pRi88g{ z11STR9QF9#lHAtx7kds_`$Wso=LhcMgvF;bW;e66b1+?coh6T6WwX0u{#c+;%=l!~ zyNESSup#*Vme7SVl6I(~lq#~WosgOaU zvr|T_4x!Wj+7PRtbopsS6Fm_9>T=vN;#n*@+>fWsjH+d_Uz{&m29^=`Q_IFM`FrwR zuI-oIVw?q(`AT6VnAzcNCAY>)q)puWN)aa%X>dYm({&mrUF`lOWWc&)-+WE}qSTM|xK$*7nK+(h4YVsvcn&`WaWlm% zrk3a(WF?q*=0!&C>1?t59UwJ^W8u=eB%((QPzifJ{VB2vt%`C5TmeKr?ePW!+ zyR8qRJIWAW!?MW?Zc$rJN3vhL1CV}GK;elKIF(J9c1u*bRwfGlaQ?A$+gt~9%&f|$;t&3qYf4svF z%_2l$*@$>X`=9+LkkYmL`qOPBkB1tDl`YNr5cwA? zi3hNVc3T$(d8$`>nTlW#)GqwsQ8cuS)i9$JETwV$^AzuoM)=zyGNwfaCj5t5e!k}y zCYIm%rZa>iO+D`+coL}?hePW8j01cJ%HEeEAOhn{QDXV8^%Q~>now^&`fRcjDj~0)Ms|qyVopH$Xmifd**4}7!s znoZlkz>a!Vj4;HlyATwKVE9LZ*=IdDuytQ~ShMVLq%-X$(jcE4b}T+N{A&72aUswg zy?g!i2qmrkHv}sWiVqSU+@*S#=C19rhEZ<`nRdG*71ze1C51IXJ}L4U(YL5)l*DsT^sW=00BGUuBjEu}Q#COy(7QKz{vc zOKO3fcU~|B+EJIKsZ2s?{o$A@un4D%cGgh#at>_7y%ECS?{Ye^0SblKE?e16qkbAy$=RVEl8Q&MbF1@u9x+5 z?O#!~!fUyc&3b!~DJr<^rbP~3d}s@6^97 zW7JjV|EJX_S#s7>ZTey9#;Srto}^)v;Wv(!JG~eB zpNE=VpSPfYn4=v9${Ya4JalAC@#*c6xr>rV;;$*6tbwmWXjb6Uvvq35mr?yr8OWtj z@)H5u(46USDd68P%))0$5>Ef4FOnG z!?8`WPA~kJu)ze(x`W$DUa3I=p2SA9(stsJL&BuDG;6GG|2+!}U`~wu`U$Lq?-(H! z>BvzZO5Ghvj-EY+Fy}R0GI&j1B+VJGfd;v>vYxXC?b#&CWM(>c!JQSXXVdIwv{QK5 z)>5HyqD#S4TwMpxeB^mb<-r zvOyi%ngw*80yiCgMv(vs-gpVVmTvZw!m0NQmZ2>FgkKg_(x_J(i`bBI)diwmnpE)APGw0i*gLl2QN`9s%JuUmCC3<_UjQKu<>j!R3DnH005D z8#86bS7P{m$F2~>?3&f+yJLnqwBmyX6%@#zETEuY1s>3`sUtU_H(k4Re(1;R{{vcm zwP4xGjejV=f7HQGTO;>bJu}}zyntaih(eY7Q^tqU-O+EiTruz`Ex*%${Uu%}L;Jxj zKigZq27nvc4E_3Ubqig$Y)unIqO(LP{fae8u9 z$anNyy-%rOyt`HXv(E@uhou+$j&bh1j%waT3@F6uMW!9o*yV)urW3rZ&5L)fFaqqd zPV}qU?OTK(d9huB<0@tw2e_m7bM_8;Job?qwAvQ~%-!=`odaDgt4TgAk-BB2p+U}3 z&)Fia({x50-S02id)LqozXw&zIe(!MJ>z%oeyde!q%%2^rB5}Qwe^0FetdQcVFSgg z+uU4op{}{3j$2qodF@bzzw-e9vHS=ul9$vyQvlN;zJTJKvnz^#D0cq0%;v z#W-Ov4<^!rCC3R7<<>*zeAb=NE=neweOh zoRnL?JI`iLx$+3aS%sFoP5x_E9~Z)q$4-vhH{gJRkrcB}Qdo8bYou{$;xrZVb6eWU zFBr2d7kzt0&72C81!fLuqal(fxzv4MGLC*e-B`y)Hi6j``*aSgtBU)?Mn%>9ks!QN zJ`P^gn|5!$E|p>ES@E`v1FOvV<0$i2x9d>e9?zJGC%`P8K#NL48@WW)F_*IZ&rv3J zMH2FvTZ*l^{o*gJzzdrKdE$*C{RCziXPZC9GM2dm&Xzh;{)~t#77ZG2)?KInu0F0p z&D+%}L6K(vxYFoZqW_ZM6?106Bj^8{ce?F1np5Y@YSqPjntI;jXk zJ@vjQ9KG>j^~+5XU~2UyX& zovoCsD+Q2w6~y!OmM4>DR)nx8@LkQ|Mu%ykE=(58*yK4OW~1Y}25!9sm=f7wK)2pH zO^HrEK`7;!O4oc&eBcp?2E~FP_yyStgNNZi@EeG#(d2vW3>v*&=kcCtLYyofLmYcI z%dKy+ZMi4IhnJ<^dfpG?ZBkt=okCbQr6lhs6*E!^E1XXMD**_Xr7$A4C!=+!n4$v- zVXTz{5<)l}IaiaebgrwW2{hw~3 zcqW%ts^nj!i6FD;y;UD}R7lY6``f+kvFF5M*+r+S_ZU%i%adRxE#yxc3QrEeYH;;| zkz&T6T1w$yY@oy*QyBmsP$a6N>@mka$!9-UKAX(6m^FIW-mJpKgTl_X7d2d}F)P|8 zPKq+^qZ|f+mKbCy>@xSX~D9jCges{Ln3vryb%8rD*nZ4GF zS@7ZxcvrLhP7I@y*Mkm*e+XXX)%m0_FmiM<{kXhtj2LAfRyVdV+MrnffEkKTpl^Vv z3sCI|TNv>&DbH06zT4+AqNzYMkxo7?k?#rNfA(FNTs_)Kw3l)Pzk}I zJCqO=l~`QPa@zBfr73t5vO$i;F0O7gLM z=YGJjl*4gupV+$i<=fQ_Ek3p-C>ymWa;)3-a+$=cbCs%oc41Lva3tVp7y6QN!;$A(60l=Ld1hR4zhDPJaP_EMj z$ry{bi0Y_@JF13n5Afi&87s{z-uX`X2R-R^t0z^?O*3fn zkpa>-y!{hW{AvQL7rAYcNLT_)@8tz@{cFdh?N8RGNi;kLJ!9Y3(BY^@S@yiXRIO{o zJ# zP2H;P<*<*@#~pPRx7?=nUoOx3G_@Z4eCyKd6!em88r60^T(^azZY1+gUV#UP0T~pJ zumbSI)|Z|+Ede7Jb@!leMB_9~-y*rRb8>vES*l+5?Q|{J9-%~o&`OtBE}BeLc#KBq0~GT027mg+pbpgW)zxkl zYqbX40DRrh!0cW&o-mi(yLif1QJ?+spm801Vb{RZOP~@0Pro(o$>^=9hF)k7Our>L z8qUf8Aopy(Hd4b)dr9>22`Gb4+~JR!2NY?j`-spN+iaB%>_G0&gr>&1tr)PQT*L$m zY0b}eZf{5GNiD>p!EIogy~L{#iwFw%tV^3%@Zoy+?O}=SSw5LuBgtIe_!Bj1GlAeW?ZZc`SQ90U_imIF2Mjo zX!)n7OFSCoD#y@`uOR!}*$~aU7XZEGX!-8(ZvMe>#^63stR8NOz8IF?gxjNALRZs6 zk%KNj5kUA(4E*+0YWw!>tF<9cin*s0NP6cp%sO4HLm*Epbr)Q|BljP`jTD;fzEO`H zr-28fr)U+vlBSS;?f?zyP{N)P#K=Hy;XJH%| zqCy_dzyN`5@Xd72!w=7kE==~|z~wyRG4{zi4355@{{bAV1Hh|G#ORsJ#Z^M)As*DO zSlqV3<$L4d!lAG+dgEK}igASY6ThYWnd<_Iwtb1thp6Tjs?M;o2hL4aItvxV9RZM* zwAn~cn$Im%m$u=0NgC84pl_X--DBJSN%}%9o%=mi5B%FNNS)Vbg`0+VblkNRa!jEt zG;S=03Qm*xZ?C&F?EZlJ%VkhBoiCCE2J!P!^g{H;Mt(VM?0mXUi1G*!qK8pw+M{kXcJ4{p@SKS6XES^)IJQ3BCfCG#3tw3>wmFS9IFz!95eCHTt z0hc!veoP3JZ=nh#d7pBh*rA^|+y9CGk1Cs%)z@0m4-A_p`sGRW284;~=HYT5(H5MK?_ug>0IXU5sx)zcbaSOIq8kbc(;@lOhRQI# zeQVP)lOz&;`b)AU>=jwcLK9*f2m{XhVQ}O*W(K8vL7&dR;O^k{x-keeH~{GL7ZzT} z*S`St73{1QMwv!(`RS+q`^-cE&sv8ryEVtlzG3XhzV*ng ziM6`+^IUF}pK~Nx|I>lEl*7W6nK$FJ1jO5rHkpf|w4#M})9SvnDOT%T+&`Wsc8wn2 zQ^}(H0xw0Rh$?I%&Q3o|F(%y2uV>2sI332an(G}TnRVmF&`5SG<>?th9C@d!3CdzB z#b1nidu^L8d>h@)mEZ4{)embB736u>usqHz{R8xC--?r`(UI>|IWPM+hCQI}Z(i%{ zM}#49TH@hFaOcgG5B>TSD2s}3wa1WGJx*_VJJ*ZG5my(8W5_z97HC0lU%l9dcmpce znI9I;gdgF|AarsbUw5Nmdw@T9{>>0SOMIB_5Xm_VOWG3$Cu`QBD9A%15y80<=SnsCqeP52eRebR9 zzI8EAVv|oUZy|ykDIC>1h@WK~=cw?z<9G_u&SO;D#sUc|_-u#VW^80dQ#ScIvBED3 zowp+hv=W|u>X5D6B~12ym-aK@_*MEnbio~6;2v=4DZLIK13F>WXd!cS)ontntPI$# zm;CYkDcVv7w+}UhYvGhM%vR`DWFWYng=Y{}fIn*T)Mo}^4LrWau@r?DE0g_1rMxfZ zVES?&;2v#coJ0G9V@lBkbAuP$`_gojzjgLQH1B|Lm2L$_Vh&7}T1{HVMZ}bbRwc2@ zd%ZPUD_qA#q++{04U|*ilvIy-VuN3PP$DNarKBP72Ti?bo8pfb_QJ8hj2h?b$RciE zgtxM9!sjJHG8(NHkxZ15{0|-sV!Q14C5pkg6mI3!;qhnRz1dk{@u&KeT!%Tf*hSH_ zI}^^)S$C~*?_kL?0$Z*1*B!n%S1uT?AgeBXODNA0RXh^nL*1ZxFz)wyf|HUqzu5qGVdS#VG?~`!zyhyJq~iyk%m+ZDk9LDy z!$p+=y~`*G(})wP3Nb~XKvl->E--UYr`F=Y`l6{bd61F|-2&+F(c$IDz?(0e64GSS z(o>di<3YAb+~x+HS_3c?q&jZlb;i6x!^ zH0U|V{|yq$aR-WG4^)q7?%-5Stf220F>-MI2HZ5Aib>BA20s2k>5#VJ^K09YYL44; zXek>idh;&~*PmgJwT}pT9h)5+c$Ba8`r|`}rV<-_-SF;-zVXg%tGV$*e3`V0i>I<> zq<;O07*R*~j-?1yz~T$s?{SF|S<>}=a$~to5K1Jt(eT^a>2TYYOTrrY=hi;e z2f5hhG-@%h`xQ#Gx$Tl2znG|zIAdYup3q_4;yluc&-rAK#XO$G7AL)L>I=o~k zi^P=X+i-9Np>Nz%=#A?<74DAOE;2Y=cV(~wlArQDe4cCiYLd`hRfnRz8Ks!Lb(S+* z+o8W~TZK~;Fj_@UWG$wZX9MSPZ=v0m9{ z{&4@~g~j5run3u0rWN`w;D`3)>RU}2ww;0fr#>{n=FtBCX~_U@(Crq0=(rQ*;~_#I zvC>23pdAF1wphkepCi((6zKNg;wAbKOXxeNN`}3-?#+OPbj%ZHh8f^1gd~I1D%W^! zi{|!ot5SKLMyqycxOpyVwQf4&_P`!45x1(Mx(nhxXWgf{y@9f9RuK(S10Q((;e5c( zAY+|osN<2PwG$8QHW{m4d1Ua32ew_vYM*UDd@A%bQ9(&oswTSECE#mnQ&F*?G8>=^ zzXymg6#y%dpF3CzfbY{VD>Q8!-i7a_C%=Yz_bElSqjK!FZ7sYNy?dsnFPQ$aK}$?? z1`*Fi$1gh9j9$HB;J@LNdX2~wi*FpjluPZ!k&9UQbW_Z*3}e1Qokmqs;keLwT9@df zD#+w*1zgo{+9LaSa1f${qAMNW<=ITccc@og)Nci3dJcT+Rp@*7D>eVO=GNeZWY-Ae z5Ise?-!NUfImcW(D*UQ}a4Kf~NV>x|5~ya-Gl=%@cj}PwWv47!ERtNj(ZqlEQM)C} zdsA%Og^jf(D#EiTA+1u53{ug>b1^oinpZ_0Bs&D_LE!z-G2co`*|&87Z`7b__pr7ClK ziQbg!?Fmll`1xDR;c;7z(~EwdJ8`N%91Mfw;||#)8b3St>;wxBlBZ~RQzyOeEjys4 zS(e9k4H{nf7_`Vg!=b7{cC}5apbQ;sh`CZQ9cYXSbL^!Ry=%;WJD+xeo3vV4I;vh6Q=8N8wN$&0`Q@e6x_nrnr@x0eVa+QEOM>swb_m0{JMpP-UQ~)u2_nT21&7 z=R={DX_BppXweY6BQw_6snE0UC9A_~zJG@A_e=%rX%#0n3Afwq5XsF9W$mi|6rtJf zNC=C~#|dvdx>fA<{rlD*wAJN^i0)i=H-cW3eE$Z3#A91Vj=Pu#slWYZJ5vvJ3BFE- ze~nL1{rCRI;MVA>1!KSJ9OX|Q$(oj$`XobgR62InVEbCU%F(5tfkoi|TjN^J;Oz|W zo2D{fd|3b|Ic$KzI0ghayFN`|33z<{fQ&%0lIEr5v)TORv!F-v8`d!|Fe?F@x#}}I z;41o_2RwN*xP9&Z@W8sBIfM((;+uZqIOj(4ppyStstT7^`#B-eGqbE$!Q;fr;exLj zZ#ubBSdMMDtG%nw~ zNyNFHjC^TiHYg|NrM@~nStDDP7Jh`x@ zx&A^HW?UJFOIRjA8s9sS?RdH(#1#H_&J4S%NSTGA(*_wQJ0yJ>)?t{@va@Xd2aRQZ zT7gXArOby}NIc%(#h>i9_Hk?6U9L%jv}4z2ET2_?sdf%slK$qTTWg`#8{UFiJ!7bd znYI1*5p4ThIPXOU^?%=xg$9^tPHGCr>>TK?^P6c2hd+OEpSfNa@8RqFHaUI15wFTL zby{wb$_U@jRmyKds9)4EKNS-(f34PXi`_5oVw`sGE+wPcZ)eR!vjql;*WtugtaBZb zK5L#f5|0SGJ498i5*7?HK1SrGo2>5=Tm>NsWDKym$fQx1$Wk0Ux$aB7K9N^|;StD> zE-{?aR#B=Mt&t^rPGeMw>rwJ)^eMT*fObJMTE|M|gx{sB_f)S5u$ym1GX>X((a11c z|KC^wN_GI8G6ARgTcz9c-f%mQsv%te1$n;IqC!-lOp3os)7#k;qSz`=IJHW7=K%p5 zYoe*28y3~O;Gq%=WyOigX4NWpcU5=K5a&Sucp?p63yFl_>kftZq?k_l^p8S*lc<4Bm8d9=A1;5)#3TC{~r8J$BTPO}gj$ zN4=CGhI5WF>7nS}u7HwQ0cAomyb@Lfr?nh|7XYpQee{3b{srbWwW8RN_|2=Jr0^G- zwtMcuafmsfQ~jH0HOI6)_^5Ksb&vg7&sn$!D+APq1dO{hQpWdKlh^^C(;)ATH8qmA{CFX$UES!8w(fSJ@0s;2g}j7`p1W-(YA$ zL?kK)aoNlPcDPGu0?8JP24i~;7jS$8L|sETSd8WP6{oi08{=#lgikWxcB%Q`#k|{u z#A`jB`XI=b=4C&TKX{a(ZY8SA(cng}vB30BSbkKXi?(QDs|__*KwO&3DUeV`D6jy7 z4bjO$1FiI55;JEfJz#xP^FSb4<3vKph~#eTV|B}X3!G#8ZRds_bK6TW+B7NuCMAa%gjTmt3glxc?&l>KD=PBW+GtD6*f z3QL@Ay7YsJS`I3NKClLQ6(a@R=^sWStsli|$mpR0R3x6yw&Z+ zBfsHf$9itRRSu#S-i)Th)ew`$6fh9EjF9|ma9~F_Vdms8F~(P9>Z#b8s|88P;y-rs zw&mRpwW$A)1P>HH9W3{l_gexerug^v)U##e-rSn$A^R3%0SrIZ{`DcjNWYTlgnrVL zn8fCEd_=JFuKjVokbJv~$|HxI=AUh-eJr3IEPV6%wn?Q{A$8GRorfPNegz0RsH-K_ zvWD#xAo#mD^*19+)1v8z0vWUtEqe2J7=_*r&ITc6!IBk#UPk8piWx5Ae-|diA-W~5*q)YPhAkr@y3EDZkb6Oe8d=6VFl<@c|*Efq2ev+JXjhF)}3%dR`FL2T_ zoFkM;X+Ku*o+3!qll_wpu3 zt>%O8QvzgM*9yFzT~S~3WYDpmT*s9C!htQu>_!&1$o_;FDB!|3-tP z!$piZieZz&lsS9wL1PchSA!_Mgf;jU>`P8{?(}XQg_L`>AN}&`cOacDLuFxZAZasi zIwAUfbRJaYeQy7{AS@yJ!-}+k8TA{SbZGFkQ}$M|k(zS+XQvP*{&cX?fUvEU6hy9ir-;%`c7ZbQg3+IrVRx(>-NV6tdqaS0>EQmfFusXJw^YWQy3wuRCUqd}RA_|?_VLMqId z_8)=pQ%^+w-@K(UEa~xE19>xGP-OaW;=hPw7DbW9{Y!pm1g{Pt?Mc@7WjoDQX9QO< zCnTOu9qwz*T>Rd`Lz01CKtH%-!IP~=E#WcVCbpup<||6SkDq5F#5Xz){-BGGi20+d z4O82*Zk#v89$tD2rKYuBcWxQUk;0m{G_j)J$2*EGX;NmeU3VrW_qdj;QZ9?L#@u?J z+kN-wa+vvhH=im~?pQ)67r`M`MWDx*8}C8tl>QfC9AB;@UwD#@!EaJww8@xH3S>EV zFq|4&=%8r32-K32eXnuUeXmKDI(PM&5TXos*}8gB6q%Wrew1-NKL=EKME;r@f(-Kh z2DD14bA17E${WVbz%;6EJ_+~;qlZe@sRnf6Bpu_6>z1M~7wxpUYX-K68l50}+D%0D0fLhceRE8jrp{7)jx(Hz{+zuPHf0BY; zej>16y(a=)fZRb-mMEhIW+~qIZI~-CK$$f1itoM9(lBv1rB4wdx*j-kP`3B)NG;%T zil){2A+OZ)R20*4W_r_aYddt2dy0#dl86N>{i4g~EW$<~%O$uc#r7?JgZ#u@DiKpu zO>K(Lee%LFS*qki)hdqNAAFj&XZ^5^j!nIF-FtUJ(SA1k!@g*m$``1CZSc#cy1hrM#G)MyrmzAPszgje#0EgNC~|4-mOo!rFbA)WJIB*t~f4!M_t*Zw6`|CV3uayWnS! zn-M2joSItRDfQN@;b{S)f2GJm3EuhJCx?FpH2LvVuLg!b#E6hYekqQ@g6zCQY7=Jn zN^8<(MK@Jjs90spWY*{okJtYBGJyc8_xQSPmz=~FQtj2s7+kzX>2m4lg^UsH`vU&| zRs12X$r|x9O1g*8=(gOlpt5ghj9Lw<_!)zx24j56R=`jQ!BPthgAPkg3`8;Kp`6DC=3xU|?L?6DZt)`ILL^V-3IsJ$=AKWLx27iH)IHlc2 zPGz@7@vTj9?7P*vOYGdG{JW}E`YJ4wJe{yuobkArY~%5!i4tZ-IUeJMXF|y`+--mS zZX|;BA?4BHNxyX0;NtQ4B?TKBhJ94!N)#?&$m$G2gh5-i;_Nx*&3#6_jxW^(6^H@o z0vI=&+}&Tuh<#i!NQOZzF@jwvVLzL{O<)i153sx<+k*!Hvq78iHm@VQk6P8T4)*Cr z+A-v%aY&O^?$a2o<_(}RMni}(VAn=lvoPJQzQ;kXw{=PWO-XL19Pc;W(8H4Y+F(78 zn^0ao4T&4JZEiIJe=;oNH3p5Sbvm|_&bsbEEmo>OacA)-YK~myv7CKf5xAk!K}kHC zTE*4dBb26inR3upsX>hVT{~Ljj;u; zuzPy%tG^JP#re%PMEb>7^XH6mm?p@u$r-KAe6{g!4!5E;UN2rXZO@nCm~ceCCv1`! zT*Ba1kcv;0&!GpTqx-~q76vhH1^5-NM*(yG0rv;QRmy&89eaxZs7$u11K+azU}MZ6 zPiKiU$1u;$nhL%=kb&@!7*&kFw<`1$qOWuIALemq($N!ZbfX{qHDnYfvwO@BUY~3` z?QQ&=U`c0Jn~Nu!Uu>s#`xkN1D$Xa3H6<9o{#zI5;H94vdDwl(Vd zK-{>Llzfo{6F2Tv4drXl)H^o&Uy0v}`%v=Z5Nr?qmp)KWO22;+#SB8Mkn(gaxN`2% z-)W98iJH=yy1Bb|)ievSG8~M$?3+Uu2Qsh6JM`}gW4hzeBOzzz|BQeXU&(K({Y(%d zG^6SzW+9;WXLz;V)$PI2-l*})l>MX>9Mho< z@@WxOt#1R82J=QoX_h_<#OA9K$E)!!93tWDE(x(IQ3-( z&E3&qV&O+T0!agUub=JZuBcP^h->=wqqoht?*jsx(~(9mfFnP0KVJHTWi|T_2AYFT zA5*v9gBpWO7aRV{b3tw_^Lw0a&s16_ik9aI2RDUB*xF<6R%+)O{nHIsqkX|=WO!uf z5Jf8H#$J}xamO0aZ=*F4>!uK0xdQ8vRew2Z`KnTsdIpBGKlLCs!?_tv#Hz5=irPLy zT*E{_^WZUxC9<;ziWj{--w|;Qdn1V3-nlERn{ZdO>-WKZ(iRk+<_)enwE2@=Vfi6C z9^2pS^SRneE0j)D#}D9I^w5lnSQomUmGpRZ^Xf|H_Zo4G)3Q4E=vM11YLwIU`Iz4h zh^3}J?wQ4${?s1auv9dkJA(ZfW{Q>9<2NQRWY$j`a;aFu$<`rxVG)tNWRBI>H{aKk z-f!U>&S)+QkJOJM46@8lM*IibRE~7fN+m5VAqmudL1d>^?ovQ^OZA_!#86)GuW*6< zBLAf~eB#UFYY?yfMKzYBnEF(T(M)&HLAIc$`Q7#k5$~hD)E_#{)?dXZpuV>7wIuvj z#Y*Ar1ZR?r!0eQXEX0Iia4n8)cpS#hPfR(~rMMs5DRUZft)PR7G9$1(T|j`?ZS-4= zM)7A;Jo>jTWX=bWm$*i66|P&g2pl_|l=?dw&OEmrT36q`!Hr#EWcw?XRl=Nq{8#!l zQUm-hJ#R(-k2yZ%z?2cO<8Mw;eZN7c?`0dP6uhzRgT`LlzBk@S^PMB?WpWkE&Hl~Q zls$M)L-e&R@EJXGWBK|G^4*^Avx-I|vtiP_fm9s$+HBb@gTO`HU}}yD<*@h-NUhSH!TxgnHW=jbel6nK&;p!(2!mWtQ0zdfZ&mSgrz>8O+T8Jnw> z0;Cmda5S}5%aDk_!!4g)(c?$Sm>T`sN`GNsY6_HbQItLEKktYj3@dCA*?+;=;%GM* z55ltB@Qv_C|0EzNZjhW{Z@?7!QJ*PhZ51U5?ujIOD!Sl6uD&BtxF({AR z$8z)qUS7`+P%wQV0UQCdt17g-4C`wC^c$fcDw9J_<}zmJ27ThJdzFX!#oC5D+0Kia zDV7FYTx6R_UawdBmiAW#eeKVB6__$Jmk?Vqg_<1Q!I=UnFzlb3smD3o{No1ieR38R zH6ld((x2ng4lIp0D&E_SN#Qh?-b-Jpo8~wfIQ2}`#EBF6^Tj?p_%Twz;4aF~@<+kZ zrP^)9+e^s)yEThA8(3*8&5J#Y88!s~HOBGw{qrAT594-VtZtA_{Z)WYR_!eQ6Emqo zZJ-&7J5j>#i(U)x+`~VIeeLdojZmH=Xx*ZMIbFD(Fgp7J3D>qIz~;$eyCTKMi$0#m z+R%HNytGGocs0PLLG5O(a;X%z_}ge&R~qBEw3K|R&50wyn@I#5e?D;BcDBX;8EnAL z;-MHpMK*3(Zc~#l#|PffGCbV(L{O#9lJ~X+iw1j%R=_p*{tskXWuYGd1&S9mjIQH! zqd#j|b(ZxW#zCx_Q)3%GW(nGj-5w=UX<4%x3>8xNq+H2AAi6S3X4YZ;AoduuOwIEB zuDF>)(J|J3nji(ECDUZYpp5_9K#=cIiZ9>gEtYNhCk5mZer5Q_|83`h6?D^r_Y@18 zpg#T7Ox#_-D9BEE(l@EH%RT=ZLQB>bo;n9A!+#}WhH3O3#@Ft&R|{&^{aJHpSusVn z`hEV9eLB*jMY4LRM9}t2VXGu}%qrst^~KJKNzYe1H3mMg`zp^zXai3BN$%>fpYWTJ z4joAS5M@#t`BYkzs?qr_A*BaF1Rg48ovO+iZjAfzg|l?utsf>`lN?Kf8m0ovKZnXB zDM>`sqMCey!6qx?4-)Hjq}pW#hCfK)M}3u4T>LMd@^T+K9WTDqf_?|u7a*F#$U!fM z(yt->P#vYEgzp$(Jrv2QS^W5lm*^#1!>%V<7E(gl()Xev@Z%d{b(EHl+v ze|r=K2q?aBP22acl+*$dcN`&4C3cf+kCTXB7p`EBAlZpmFFT6=n5%+2vGEkvEUNgV zU4##8_{RyJ7pY%k1R)&V+NC?tpAS!9v%6J)NI+}a&j9U|s!7MAJZ8quqf!7CF8Y~- z*W!cm9Puu)cD?B!YeGyypk#cJV{Bvj3zp8*6JG2F_Tf0+G3bIa`q);N9AjvTPj~&O zJo?(9d0Zpd=QRT(g(=5leBBl_UhTq_nO**rlr>;1sNY+htf)!^>fF0$pa7hUmrLml zPnE{t#}~veI5-t3Bt#M=BIgEK88!6#d`pl6 zr{iezT&IPuvNN3`-}ZV23s%B{O--7Ox1VMY(!FOZsnlR0gD3VMpu>q8=CR4A)UrbWr>k`_-t|9P0(iAs9XfP>y0FGGS z9qw$>JGF4sj}zfA$MlH#+!aQDyUQrt1W%9=zka*A4UR!tpln-FWg*%I+O5VMkhX zFm8CeO=0cmj~Fseex?#7ZuLL!62aUC#9PYx5X(sho%KirMFvq7jqsKj28rW8Xh{RQ z`906<3LghtdSXwXQe@<%RvC*HU*Yv~&shP4Ry_kMd?}c2zUfq-Fv?O+`Y-+Umq6Z> zj9M`s%eAg@<7h^^6USiSilK|Y~FQdF^NZJyof{xwE(7kUc=0{2kH%+oz8Gd1#Z) z#-1_9!)%|Rv3L{O_Y`+*FPo}yxt&K=4%(t2(M6xpWu%>WPFV0;C~D?UOsN$P4u{c{ ztt%E6u|~Gg6akooXFFjLsE7wodR{6qxN)zmSTl1uM=NjCrG(0{C(`&|mLnv*W#M@I z;>J(4h$2jy1Eaguxsy3jI8oMu;xIU+Pw`)oZ?1f0>Pq;p@Xt5x4r+>^SenBR{SB0> ze!{3vYv?=Lf15v*ghJSWLd;ENCO=?y>3UulGQ+S@^Vj4q`AA+!Q^xl#wBJcPG00S3 zPbBr~#`aof#mcyapz2b4;io(=j#HfPqc?R1iyF-nd#(*9!szIsarKK2)p+HHWSQWj zv5HoDh zPP#en3nt;fpC7e=j;I*H45HYm75%26tLORl{2HrNrZSO7!_+@UseiAY_NOfl^yK@X z&2O~K+v z)0wr9L(X=U+1dM-Ev4g|@>W-y;7BaMl0EU$cqjp5F^^z^aP%+YYK9Ejdn?B?e10{) zcl^3PNaWH?|JBCrZK{sg*Rw`xUUJHO>*^so9>y#MUJ|QY|Jd{P6@i6ADn%{apm=Hg zM1KaRlR>DdT%pyB-akwEXH2U2QH9bQc!>}lMUiOz|F*fM{?Bc0IC+8? z>MQv15!e>>ObVt8w7oQc(Ga$YNm2~S9p-klqBhSeXNzPk!nV%V!835GP|_94rwdf6 zb-7GT0O9uitD3}e(xA>IwyBfoO(zNqo^_Se9xfh%k_Lu-reNFw>i9IJ$Dj=!-{D-y z<>YnekN8)@R9;%{n4KNY1lovUIhBDQ-fgbMR2*9Ts>DBk-lxBbof8tDTK_wkk3UHJ zsJY30SJjCtdefjlgnd|ijDW41M=~lY>TP&Exu9&(D59QdshrDnHF>He7xdJzVy>{}RwG<^yq>*(USA4ss1J7VFk zD(haB%ErFMy6Uls_f_9VUCXR};@5W4dZ0TgLoL)bUO_`TYHV2KbL!eIPROZ!9d2H5 z4d{5+(t$SO5>-wtpuN!NP-PaBN4-pUfoH&9Ob+>Lcw6_m*jTyokmB@b20k#}*KUl# zeLAoIs!7kPJXp!I@9@t%tY+HH%dP zz2uGxaX`oW5x;jV_Y5LSLY+>NPnkUGU)BYT(pxN^ne~Za@=x8M*z%w0F<#8ljLLhN z;~`7CJ3bkbDL|UIb>`@1PFa$SDv#r3_{BYi(4>b7S=fW((M{;`&04Bk%2e9+$HSrG z0U}QWpN_>lQR(OvwLK%{>a+4o=Yn>+fu?tGb!XQKe>p{fO)(`m>0%#0QCFBVBX}?H zgu10%m@%jR^WECBW_TwWDp?|KYeM?s}L9( z;R~;t1@zB;0}(%Qeii>-KC4@dK=$lHC}U<}cwyC{#wc%4`>|g-Vwmg!D=q91SE{1^ zII+F^1gOlB$U;xSz#@<20A37ht&{!40qxrX&`EduKXKAcuecX`Z%mk@b8l2=xXa-T zaAnKF8D~WYZCkB=KIW;dR@!{<&q-%D&9}#n{H}&g7*Y{kQs$Ct=;51Rd{CVdEjA)R z=7qpd{ey-R09J`!KdPt`(plqXmUf9D9b=yi#t&B?ZDY=p_bHsfwGTyh z%W;Vf;Z}m%8Zr6d&&ccj0nn1Y4R0pV%`(VA1d1$RGW?jfyNLOai6>z+?cJ}k>cS

RS7y)ouLcggK~4p52~L%0YdNTwbY9&BB?7MF&zEb)>3?+Vtk$wr zs5Kn(06stueOLV5#6J#^tA(-H-xxht`WzbTy!5*`Ue8^_2gbJ^K1pg}!K1>s6!Ii2 zQ{b_-hm41jN`^?a;i7J~{X0tNM!CAH9r@!0-p}l1_|^E~9%(Lt<-Cmj!qK!GW!pks z!PoP=XckE?4l)*ruW3WVmlypXeu25^?Wl3*wdOdn_30GnvFoCNHsLR-kKakS{Z?YG z`E8l+^{GO_{RJY>l-8YgJ25kDim4=EZ7eI~hx}lvjd==Q-!tSZv?4*;?MykYqzfc) zdmV9XE2G2pFbzj^4-`cyHaI3?($&iT1{VgztLmE3o=;wG0|vvq_qZyHB^DcJVctLs z>gE2G-}O8SvIo3&@&*1TZCg>VJQ5Q>V>XHuM-{yXw~9o6`33yPxo6}2>)Z?5JQOme zpQ0s+5swv2|85z}!0A7!8$p~GyujgCLs@Q^@dBFzE4B2}f`>pTjt-Rm@~4Glp`)WLTHhFMixZ}917I2n@k`x*niHDmx&}%-6U3|?LpRbW^md{$WTWHhB z6|2mzHT+=I@#crV1eJ!JiIoel^OJl!MII#!%Re6MTCXK-1k;^!p#83&D`#p5xu}LF zj!ciuiQgSMqzTIE?8U@r+-9-5mySoO@w zW^9a?{3(iWn$ZMUFCsYUd0G4_dD3jCV*#e|FG59Rm+ezwmt<-pi981>&B-w0?=ZCA zcQegZVx98S7PSHYS#R^EcOqW{*1p^M`4@g6ℑY7*h@7A+M3*R{?18B2YH?4~PC= zM=VxRePJfiQ0)2{h47Tfao=5D zy=|hPy?K?+yMUWRuT!h>C-T7dR7vk$Xz(nR45^?cbEbypHIFLNQEsP(M{d5FdP^wh zV{o~m7o@7^Rn(a4HrAf@a<69fYOL)o|2J|K)Lj1(2cJbQ);l$InAc@_jgaHl9?wHV zuXe@OPvaCs`|-_fIndIfexhZMkVyg(f7}8scDHH`e>UVL!3d0^l7OawgV8~+<3QHU zn1}FhS71}<*(Pw1#(EC!VJem`R?5@vb1y*Cd!xajJpAa3oB?CTpz^P{*Ni0c@BeR% z9GeUyc*fT>*t|;=PYzZNJ?T`KU7&SvjotM`^H z6(Oc(3_c8L5M*UFOgQ%s`^$Po^G&F;+IBWUJ)NZJ`JN$<=YKg}%t(Kzx;X!9_kbu> z%;I_UPp;ZN;&hL}6333lW7~Daqy4ItG4-ZbVQ~-@_?C?95h(HEq`gdvV)94<+v5|l4^6id%W1nvv&@6Kp~xMWBW zlZ6nMBg14NcoOw_S(zQ~ZqQ7X=?Xc~6lKl%RgyetwtsC%X1XSl9Exs;IX#Foz^^!V zct4;h7tYI5ja5Ti>;>*5V?+jcF$;J9PLFpkNs7%DhMy;DGx2_%YbSOXTK}6$bf2Y7g&w~B5AJx0i(c60t+(B1y~L+ z^*}@t+i{tul=L=0N`%>HR|+Pedr}n9!infzp$@uUu7t{jQRg>GSCnK$Wa~ zF!{lNKjMc{evp;kt)(s(H1nD1kHd>mEy%zLxS>oO@>FAG*212&@y-~Rz{xK?OSbk` z?O1hHo_ipiiSp60oKJn9nGBQ-W~}TzMHif+emEv)(+@XQL4bHj;<=$5(5!WS#p08m z&&&ecCznw$eef)hI}xG?jLZAHN3MQB3^k0t9F#O8%X!zdhnt@)QIPhGdhq!?7cwH| z7;$+7A@WU|SXi4xpAu46 z)(f&B-??M>2R<>8q5{0>G@nxV^8NL=tn5-Ez&$yx)aeYN~nq?{Zs<(P)UM2fHdDh)XZ9x;o|%O_NUQS ztmyOAlNSHusG>Kpn*MtN!$&$H)oDrF`$f4Wk1yA@B{PJ+`_O;fa^gb1??(X5e>SvY zjDr|y*7&>=-lHo?GAzaXl*+A>k2(HKwlR($ADNFN1dYDQc*PTZEt5)Rd&n+IyDuu= zz8(@HxVGKJ6Z_2kWg3%p>KT!{*>6Z3zoCM9NMY{>RpsS_)K3@JjGz5=`3n%Y4H6)5Zj5=LaJt$;T+(94Bo0c zpA|(REtnWwE_c!KtYNGA?XP2sP~`m6eR=7F;`^Csjd|JKwv5E)GY>YsiT4d3L_XX^ z^&6Nn?z&@L5hWx1l3P*0#j?iH3;`s&9tcr!*`{DqHnpt(=+$ue-2X=>!GQm;P+f2` z_VZK9qK?)+MmTt{?CK?hO?uFu5VL!^7q2BAJhu30*BX-cHMGt<+vQa#XR2*!U1OHk4B{dB;mjyV11frRdyIVi)$6HGy84n z_^Zw#|9CQ`Oszg+{G~OYN(mAe`(d1dLij&>#Qd_u4gQ zaG>~3PWTu_<3JbF8rnac?hoDRKc)@9Hyf`}_2!&C$So8G+uWViT0Bv2u;X{hA*j1E zm;f)PuF-}MdFP(BAu^U@Md7sqZSkkR=e45FSb0!ZqAK8jazwq_E5NA%!>o_i;EZ6& ziEm-03qO;5rku>DJ>qZtGqdrE677W$4Rzs zXx9B3-MCQiraPZ{K_!u{h}&JiE1j(%Emg}gV(ZO6qrT9?bD<$pSV#36v+;jWT(O3A zD%V;rGuvlwGZytV`hMe)(D-LV0xw5*E2X;= zo}wG?BNIIu00|Mrf&7b~d!$%=uDt|smA(9wCa-!weLmRr4L3ti&6^3CFUk%-62LsN z(wVX;o!3*G*0)nuQ|>_@VZ{;Nzn(_TG4XQb=G6fj53kn0b#r*?#&rFB`>mTD&Ahf* zCm>adCeXn;`imJV!cRd5oy=Fu3FOfP53tYyP5CXz1!j~5pKOCS0L1PuQzj0fZxr|W zpJ^J@O>OX3DqP7f@*O&$@NNm$=o(>VwRViOta?$_i9lM0PbTo@q2nA#>OKpv)LbLx z?_Q-Bri2o4neIqY(+en)o$&XIp~eS54;%l0q_|)E`-d$bUJlDkN#3JseCSR=&-)tx zBDhcgEXeY-fS5ZJaE)|z4AD|djzgMo@*GmYz;JPE{|vhHZ_jAO)|H*d8;`4(2?$Ms z2yr!K=!r}ry#MKz5{B%~BBZgCEIi@M&a*{47${cUY|T>)5=!{K^*OO91f=5o;0f&ZhMnjJM?qvM$p5_YhOhRhZju{X1= z`0bSGi#BV?t=EPRfkL0{n0c6>jB*lVPFP`o;C4m0U#Gzm%nq1F#LDqUGGhDuam`2agpq%$j#)QO zwX;i&>2$~jGVj^d;6=;V#{qUNx>}^X6U3P-w2$n#9L}M60~&@Ws`2CmaXZ0%OKv(N zJQ@~4qR1;~xB41_+_`m9OMC85sd%mefPa3IJw`OR;SQUrh4LJBVRhcpy zP(Ay5vUf9e!-2v)FgC90dyx;Y4C?OjI?XG2xZyVdnC?`#;Bw-|=v>wu&CbqK9LXbc z+BaACY-6{9#%18f04OKxTb&?sGG=rm^hN2bO>p*U!ej_2GNMn31A^6agda$lek*GJ zC2ROre1%c*-Mwx1ZW=B3T4GYI=vuPK7VDK{;n{Ec^aEXbzYaUDvtJjH;HkOYsrZ8M zhL)aZ5C^=H&-v!%=HGHD(%VT{E$QaD=RQ6%J}P_L(u8qxh)Dsm(z7fE1Q{2 z@Or+#^=?GwLh2H*y|Iv9K|M(RBScF0*P-vU;vQ}`?Zs&RX27?A7=m>mV(--6QSm6M zSl@PE>l_Oa^%if<9OpqHmjX-rXOQhtZb`oz2!7mcO2uD_I%4E~R{-*1z1`@h#p=+> zob2{%n}b3_fQS9p!(Hmi{3AFR=+^SWbn{S(dVpz1L`uuwyima~| z3_<3Q4y<-SxxN!wnyGMi$4rzBB)$jQ@TZoFFR?*^=-l@q+~#mh7y>qOZ-xl+595)CM$gDvGiA!P|h8Jr2@f@w1< z^1-SHz6+&eUyBYLg=Zm`i6=-WBubL-l)mH6)VTlX$N7tPdfH~uRDfCCMlY^;Vec$~ zt+EE1Vsz(IBpZJ(?HMV~*@I+1d$kOzy8dv7_d*MVfnuC}nwD^~4=-0QZ0k$r-2+guy1L)qm5|U?&blhqdC3oD7!FDOqr~4Cm_^ z>^4M?)}~SYGAqyHb34>v%R5%?ni$p@I18n5tscTq#Wl0=Xv*eY;D@EJM_|m@P*`l< zSC8S4Tngnxp;e?HmHNYW1iqMiSeGD&5)xB)LuPLya>1RPO!rpG;lMDF!~BBwDu5xc z@Fojlj5gBl%N>y`y3Za`8G_t^s}*8w!bJ;bb)WXbk;ars3}R)8%CiBtNuGJ&OT`@w zt%29?7gLdy_=;^XIGEyi8W&Jax;vAfHd^Z$QQGpzj+4RvxWaKTMgGU%G(NPKO|s!g zb>sHl^BuMC?An6n$M#(r|EQRMK?0edG}Y@r88k03FQG31Uk?6SHLX#N_xaKuVcJgm zM+NknbSWjAP{ZYJj(KMt)x+o)b+BI_+A#4j`?DDlbpQ>df&-prpSwsepj{o`+r;JL zMybQ5_4Y<=Jl|2&eB3`>w#u_N5>%5pL$81BkbkK$!G+To>;Aqf`SHeLEKj^|chEBf z1<8O%-&hFCI(F9kIscyhkM5;^lYI-trPqhl>1hL=UnE$P>6@r>!RY}3zCx0 z;;6R5FE566%2sIpGZuMJP$fshIER;)k&;=F0<9IEAGQlA?y?u?wo9!XZ<}vNau!sp z@&=qEyRGt=&WU@YGe2BO=pmz!?K36%+e-M9b?oE|v-Dz$UwQBo-;zH;SeUecm%k-u zbj7|Y`_6dkmzsA`(Rg=BL0|)w(4EkiqIiMKr`J>-n{QP%l-_#l1fu@ckK1|y3NWmR z%Ith<9#kdCs4izj3GW`l^$DQcKM}s0XD3(<-vjvVsE9t!<|(ptJGfZR=u``nsD3>N z;rHXu*W3KK=ds~Mg8v4iK*WV27 z7KVbg^Rh}rlull|wtNc4EX!U{G!hbtOSmK3i`><8UHYlcjjcz!xwwMe$Aq?BfK;tQ z7sO-S7$@uFOiXOfokgoJ_`kO>A;GU8sb9L*0<`6m`ckyg#wW~-B6Gacv1eP_67TRrsfPPcL z?&Xyz$1AEKdmpIa3&@tc=H<~M2v6Oqq}20iUIwf_@qc90x@zcB-lXPthFxkA2#XSG zXKxRxi^JZSD;cIjsQ6EF;W|N)*`B4CeUw1ne(Lr95wtojN4-UmWv-G+;w@X^hnzNt9D59>3X0b}(Q0rK#o3ufV%cRFM?Snd*_;e{s)XY`{GhA*!Q zdTBX<6SQZh_vj6q7nh}v8aL7jd0eE7dc*-pc2M`$dxQHxsoWcdwxX2^9S=|9#sL~5 zkDvwPq)va*O1Ql+EHr3nZ6dUuU*35%xgM10 zH7=NVM70=)d9qD+0#gX+q>=PQ}}(dFi`mT}`8^ocE(s`gc7ap{!CFWRT`g zWAi=6-cO%0YHhjRht!-gj8HL-#rCKV1dt+4x`pLfV`&3NK<(?Y)&{iLGbP_h5yfTX zw*V#cL%i0gQ`*``o3i7YOsd?NThQ5+4C$hG-eClk{EKu=8>l(4%bKNp{4eEgq@f0r z9=&mPg2w{J`X%IUtlV4i-Xh(Ba*))e$U0gj(`JXq&ETX{jV#e;0vB)$3@;TkE~ z3zDgpI^)8*x}uNhHRWwdFUmyQ&C;Pc>?s#$1NZd<;h8U;exHWVqr&UW+9KMF|0I%W zt5jRMX_b=G<*c=-%sAZlvKaTvZc;{HZJ~*M_xPQ2x!SJn&RlXZ+*MMkR)%jG2Z)h{Cv~eI`+GOjTmgnp zwoQGLg)nbD>Za0e-)mHl&C{D)u_u)^-F0cIDj#a+7}Ri##=~7!5&y7+;fDd_8Za)5 z$I55g!xhQjRx-uMDsiFe@d7CD$Wv1*eXrCzv#jBnRTag(Dz@yTm|=ggPI0kTt9rU8 zQ|1=6aGCUpGDL@X!0UONTZ7?mT=VV?;t>2*x~3mmC*gEW*g`qzHy%eBU^s9jUjleI zR5=c({GKZsYx9u{$%!{V7?GgFN|!a?KggHT9{X>iU#6@h*>g~${^m3fURXY3Jn*mL z$H0|flcd8uN$y|VI@F=g))lBS|FWL4OL+oLauKovhPd5XC-ER)VJq$%7${$#Hg^G_ zwC`$i@MK~Dmw6Br{T+F_iJX;$)lwdN+FZs}=2G>jO3*A3j!a$jodKKkG;T;Cnonbd zB?h=J`cw{#0d@H!>(Uc%XtQX#V1v?(qe43MG?UM3=6B70KHWk4c|I66ZMs901b01- zH+v4#v1xhTUZ>=D{c=(g5FCC`$gW~Z$UpN8SL709!{coS6AR%#zBQ~h%NH>2FzPe? zW~Oi?m%>BC@x6I!-TkZg#pe$bmy%fryE!;t-un}1fhK_Q1n^n|vc+H)dGw9drt)|Ewt(6n~{AQ_Y|z?nP712^^Zt@ghGk#jSwwM&X?}<57S-FLp+ip zKjE{xI!T)K7Wd>ZF+($F&E|TqD3K;}Z1jeHrg3O}k(4g0ywDagebYs`9u!mjxX{7l zqwmbGm-V}v_XaQ>tCv*zxgrLRf!K=mK5-MrzUGoUNpIpYi4G2EBvt8kuzc&I2sy)^ z5L62ueDr0%`HWaqINkQk2jvch5q4ppxp9Vx`~HcQzBtJTR}(*7V{iwMk-*O?afT5t zMH)RWeRn>_<-5mq8MCVsskh?0hIC|@L|J|kzWa7`*ZPndupbmHZd$2mzFpzYC}saa zy*Yzi4_i9eBt$s?c;v2E?~r~HEMUDq06!s-^X zPZ>^NR;Y09-cR)Qq^_WMJ`GlT0SHICp6ss(o5DA*W@aD`4>-*0SDG(F4XfM51OaD= zQA#_{g?4s?_|`B)N#%~QR#jA%M+eIt=%Ypl6|D9{{!Z?`rf~b3Xviuk%UqUOt$EkBQdjf1HQZOYc0Y~9%Q)n8T3H{r7 zR8csfqb;9?)I7R&ITq`kwAboAm@Ww2iEC}f8n2X>vC5?)^U*T=vyMEO`Dk%xWFiNa zVc=9m;L@cK`TcLPa!E+7J!P3pt_=_gK39e6$52imIfOHv2X<=C^sd~N5 zM;Xh=HvC~j#VnP9!xlBEc_KwM-tm*+#;ZBo6=1#h*Q#wyOANQ7?Op|Ck7|>K@aT{Y z>i~zjv)FEVkGWOFfxGS6`BlI>_?@40D`b#a-b)$)_+L@ z6sucSb_Zj%JJ}A{ z%e+A-5iT<}Rh?e7oYFHwp8mzHA&jX8^_>Y-Ze}0i3BYE4VpY(gR{J-gToms99Cmu<-4G~ z6}0Il#CRWlmi4P>MU$cnTrFXTax93<*r@X0+8=_xt(m@|;zpEc0-?xWQ9_AAzuZ=D zx~l(bjS_s1Y&wJzZ2BJ<=#sMF8tjD6qVSKRPbRgSM}RA2kF_{&`9GV9jH5Kd-&%a< zrIue90m1mSwBs>k*|7xSD*2+&XTasz)#aksPig(`VDcts^mxAxPgWF8q`cJOA>1+B zJ4}dcu6Si=wJ|^O4*~KB(3aqh0 zhMBbKds5LSzqLC=Q~3r#x9%pYBpeh1<{PYu1`b5&Ckr9h!e7>2bpdF>coHXbBvb6b z;yngo>3vp{)|Kylh~6fi1ApB4CYD-AJE-p3+@N(jKxpvk8C-F*_SjL75~aY;%^>O2 zGtBWBNDVXmf?bB%A$p;0N{a`@%zm{L3kuk5ub#|laT07idzD^eS+zcfl8BYokPl6u z81D}qtj$b{EOn%Z^qyAy`f&n204EeDBL9L%nJ}|7<_38P{_{Q;n9jOMTp}aOpgB;(8^m0l7jrEikqDrn#`*PK;nml#R{h2PTiHij-8B zg(MzykFMZ{pA^&d@}NWJIUTAxHJs0@8+T*i!-*q(%oQgk&z0L>`_0YBv%!=%U#P%7 zPt96xySr4JTNS<23{}3~{GObKTf8ClZ?>iA_z6tvguHE_`IbBXJYXuOrSx1{@Xk9q z?W(crH7_%n2nzoOkg`%Z;B2=rSvxyq2AL`C1R08g*f6#d7T|ZN3B^yWDIci{-hS~` zfg9I}B!jUJv86BV*b?5mN}zBf7R@^mZTU%~wCHc*5N0LF1<->QvHSPPcB-x@@KQ=R z&ggqS4`uk4DSRCx`zGUu_)wgjZtq*&vv%LZ?Z-a|{Bn5G`)G{i%er}=PScx$OGL?9 zT2Rb`1Dfc41-b*azAxa5M+}N-?IGy5WV@0g42@7f9%kS28Qy+SX?u29;rsimHrO18 zvvdQFuHX;&iZ*zohT_sC-{|Vt6ZS^*E3+0C*n}MA_bIy8lY!WX!z#M$@T*1oJH!4) zTDo0#c>!I@kCdkpvVwh&5eLG>XQ+L1pIV9t?jt96ohbvZrNaOAo&xIjcqM`kv$XbN z@13_bZKVNrP#MF)Zjipljuig4mK#T>wO zKP{(SSk=_|(Qexj5zUT>Ah$Lw+UJn1`uL?FD|RSLfs?Z9tZo@?)@@b=!vnt=PBOf! z8mG@a4tVcLt(tG6JkBW02{|v>_2Q+#So%;T(8-~B7FEW+ygIpZfS>XP5pDZyvSWI}QCnXD=E zWmZM6kBYWeBl+EREt05DYF-XOmpa}ylv{u5(7kk@WXg_w6q1ihPtS+{J`-#rQyIM|91tCVQXY9fx@xZb2!YAp9tHlBNRna~9GELrx zDaKl;6Jg7Ne(AC+SIcI#cyNKwvwsqhi*q&hYyvC#!~w?799rZj?5O+w3qdYn_V1Qe`efcvg7j0oA1velpM=1v=O}Kt^cTrQfUiWNl-r9 zoncWEemZ&O=aAU?n)V(yOrIj*+rW1w|66Jj3E)}eVylva(N3!)04DC3@c9Vw3{FKn zF&+iV)DO%i%X_VEQjPgl9{w-_EJ(EVA!8L)@Xc$&N(q{i^-*V7(xh-XvxIH+G^rEo z|EkSN6PtlYqC@W~Qnuagb#~}}Eq`m5ctc^$){#D^fBBBWBMyOkf3~2@3MEsrR*!)d z+l}FY@_!xkRZkizuTTVfkDwz(dxfkvJUh7y7aAfP!H~I8} zwGY1R*|%NRP@ffaV?Dm@>n{+57N-xqyEyXcHs-$^s>_ezSv>!jk?uD9{Xjw-?)yG< z1pH_X>|mfu%w!e(j|jfUVBaVQcdc%sp(e4I#RHT*io={rOUP3SNOdU_sN0Sig&+m> zbzd$}Lx2=jugziQ)akOiXQE9(d%8^$52~Qx99cc<)AT%(@H?r; zrmk^bb&C5ns$RVp{8_obwlaRa-!*XEK8#wC7rU+*}Rx}!P-@{;2h^6F?R9HXsvB4``OE_6de?NJa&%7 zERSYRzZv%t<$in-rFv{po*;Q}5N@We7V1*@DX&0kFcDh= zalpwO^){UUMq+%>ts}^4C_8iC$Cm8U3aT~^ZjyUEZ_UI^<{Zg_b5&dTayZwoHJ@q( zjhY@TjZ8)_^1H{z!OQaO$)5jKzalZ>@e^+g4*h$!Q|zBjj_?AlumnIYkv+-0ODZMf zH90`wRYgc)zX`BTpBdrn>9aZ5f{n>nMW#l90w1(d1K&(g9Wtz?kW-Ev-7JDclpy+r z7MZXTN4qn3NQ0tbrb2;)OWpe4;c`saT9b{bI1O^IsN%(hwN&b)nK#>XaLt{A z3vIGQF`m6CodWnaO-XOEoY?UEmZeOPn^4`zqRmUu*~wv4bzGyU--mZmb}zu)ZmFb2 z56b-w5Kmu-?{=9=zYl{eT+Z(;9t-_SCZUZTvEEN@X{V_qXAfI2VKZNm0o@jhg?JLh zs)rVWmXlcHyzLkH90x?r2(bhR=OEe|I&1!BFiR{^6W=K3<)qEquxs7?5ZyMF7hP=% z@h+Rgm(0qtPQo>b90KaG`S@Ep_H!Rtymm%Vj)%aOjcI>*%WnE#KyKbR8E7TE$0L7k z^iB1}*tscCE0!M)i6*M*qU9&Lr7Y{*KH?gDldAwC2e*)xcFa;zfYp_Gn-;-ps{3?^ z^o^9b8a&Iw4bMUPeSD|?T1@4GwQF4_0JI%+OpFKewGO!-$={a~v>RcamzG-? zK>9mTtn?^=~T{TX6Nv37=f^ZwdfYT9-?k#XiHY(150z z{%7Q&L>kV6Ftz`8tSm_ZLI21*Y-IcW@#t$d+%}O=E4LH zuF;xtzs>p6h8Lrk7S0ZQUJGDvY5BUBhb04`<00B#7~OlZOgWrS}J&8dLw7j z-SXx0Tc2nx>`9sKq`F`{G4%qV(NjDBCbRES6i7zavNRXAMtE=vy15xlysRez@>@J} za0tVv@)3mTdA{^2GiA;2x13RCQj=O81kZ_bXuj78cKVZHear0SyJ^LIFy1|cfPer% zXm%-w#!=-mkes){!~LvjHW57)yperE3uvBxkGn^>vpJzMno5BP9sc+zrx;_U75rUD z$>;-g!A4!i!|!a4MX$fWi$B?+T_7T4D5d$Kp3GJ^6&)SXSGXS@-wSWL*}LMil5iNW zYyoP$fbJ4f)!nwGl+Ys0e<=(p&)JLq$>3On0D-!%gihY}0MZ`MkN?`0Kg+rpa@Efw zM|t%!F;LB4>{YP)p9XeIw}hq?cLr>d&|IpAwin#hkoE5=wuuG-e-- z*bDyXWY>BU(sr^t!z9m|57#){VfCUIHMjuKY3HE5^?b$Cu^5%?N1F(}X?8XD=rtxznStflp5u`;r@>ccUdx8aonRl22S{i;yFFvDo z9#Bqa6JOEvWK&ggCBtn*nFQ7jMSG9o#7VoqGIn=y%8LT?t_CyccVqE&gJ{aq&_P2K zsQc1tnio+g{MZSr0&p_{8LvF!TN%yKMfbFu-2PRT!tllnQn5#-GeAT;SjuXLxPfJ0 z_|JR12dSnV(RHZXWajHtbd({Q+YZq^4v$}JBtt3x5AqHvI-N8{|4EzowYiMA+3~cT z!6S8>JOSFQnyJT?5H7I9DNvCYb7tN7e0LjqC;dJQhfQt_0wH52b|~VFs!vo8wBP4V=7j#u??{ zT-Uj{h%VVmJEbkSU{IpTfQDv&8l|EQT0MuqrezRb&yHz*pVf-%-!rjnBTt9l7?us*dPWt3Z@% zq)S>bx6yI(C!BoZ1FdKV{4ckJw`Frs`kEY*@;plA;xh9vb(~|FFk#xL;vioe zG^@Gx53!Q@MIqU?vOc`3WlR19?6QxSOwA7!VFhG^e2!!|G{?aJO*|RFzn@=|7 zdO-RQZ#bt3!e8E7Ox|~|KIE=G71O(E+8Kme6kI@kJCC~cz06NE;osbA2MyV2f_KKM zC(71FVi-MHSNdQ{Q78; zK@Wz3@D(oE)GA7b+0cI;n3PMg{>d7C+UGo)G4;W5NfbiyHY7$;!9L_ zcv8MD#;rdZUJp3ZG##D<$4HinHYvTlgemGiN>H3)48H9Gbn1gvoSH?)B@HwLJqb%* z_*|t+E70SfzJ0UjFJ3RgGhOS`73_3N!qc%scaa<#geNlVu%CPCz6AgL9A;TIvBiRYCDbIcPcM#&45P~1sm^0}qBvaqm?FW^();p$0Zr^-7>)OEf_UBo- z8*~VibMr(TPg#dqR?z0Rd& zI1;iGZ(U51rmBVrB+-=W6Y(mi-V+BzV@;J?QMaur>rO9!5t`0E??oEX70wop_pg<}sk&g%I=b{29* z5BusC;hl3#cCj;{ufCd*b=9hMZKP3Q?tKIj4nA(Jhj&!zEeT$Lmxi9wO13H5yT765 zRKLv?pt>7sDZ7P7JAT#RTJ3p&SFn7`WC4(&Fr*vTOhf4VtXmfS*Vtz^ zyjI{Me!5upa#*`MGkg^G3+g!FN zNTd~u7gM$4Qoh~NTS=mFMIA3f4n53=33^1(_6eTLnGZ-jkY9GR!Jzluc|A@oHHarB z4e(zy&!si>-WK@*1)>GOXa=McBA~qiA8X5z4$2~`!Sk={?Y@ub)Ametc}^WqEh+&7 zgQjN>PNU2|Gpy#D)+ViGj%@w>#0sY*k&nL=8>ZJJnp?Bau=BsWOa(~dKYF>KunWEC zRsBx53edtOMHD{DP+bNr|Mkm+M%B@F2m98C!jCSTna+CjU&m7-{ap5aW(mFz)JJShrF#FB zJZ&n$s1^)oHp%zs1*6O@R8|s64ok>TNeFt_JU?YHbCsSotS@S~o^#K82{20&~|R<+>*)%V|hV7n9EAz}s>002jv8eQ4XBz!CL zIf-8qG1sQJ-sP6K@Z2+Ip)$w;Z01LE z2&6S4w)6^+q6r#SLYZ1dY2M{8$}#ykkn~9+AF~CcY(A9(tf3>-N$*Zdz&>012fz6| z(*sigBy0T&5*q;?e{Wh#x3sA`lq7uX1aqZCjfgOH9}4y4oXq9#v2ray7W^r<-^E47 zLz#g~wH5Bju=4<~8CnxRbt^)}_0;DVNZ~i_R#TrU^;D6Cn(5vu%weq4awhXp+>$nK zoFov`{>`0fx9i{T-MQEyO>qbIo?i}Hp{_98FX$Rx_pg;0U6pmq#4{f5_;)*Lrq-g* zB`U>TCQ2peciWUYfOucGbfQXn+6pl9)zn!a)`0{j5|o;X7i7<8U=gGl>1W@0L*hT| z1Kk;rhq6Lz=jlGcZzLS^Ge!t=u?WWz+~?~BcgM0yoY_@!b6bf$Rt=c=17nTixBsXI zYaA)&#u`6jX6O)Dk~8L66Cruv3UC2QJ#|?d9L!GHgp2>Fyt)vW8BACLgd zQOB&BaPF#I#3o-LLwlt@2_BQtnfbOIZ#>ZB4EaU7Xz3$o+2X6Iw5er&_l)$kM^jju@la9;#N@j{< z)rW0C%-5F?$9bg7F@0yk^J*WE-12NWxRaW)X}O{uun#UTSBm~sci6dTlg4c?>oryH zl|z@=?@N@kh!2C8kO!yyioRE`M4iL^t4|YPh&^~tn9UV%o%#7oDzeYG=e`2pDbir_ zRkc9+6xZwSLA$%=PeO+dFluunf=9K_5=zaU?r#3O!4kqCapQ~yH7rQc-TjIDRd`bp zye(oJ!)dG;4{{8SNTw%8SLapAOV?FYNTTjn8NA-d>lI%V+C*T{tpO!-^D9DT*ox7d zw{!*j=1;DUQ7i!D-huSNrk$~qb+eDtO2FrQD-NWtl{U>^2Gk|qBVM8OKQh!>mt zUIZ30?PDCk3yi4t5i0f}dj=_B0r0h`BklVb5jkLn>~bhy+_jnrz=$rC9>Hg9+$jQp zuScp&=63GqE)kVND(-Oa@)sg^MdsyzBX)s-z-UX*#Lj-53^gED0MeGbC#ScOyp|}v z`|29FY(`4_`5dIDp`_?m5AzaY!M5$-`__wCee|irZ8_xw_o1ltUK3pDeXXn`Ux-6# zp?Ua)JlpEd&)pVi7FQ0GV1}D}wgZr_8d90)Gnw_ms`;PaWKJc;)R*)q#G;C8vdg~r z&#s<6<{PnA^$#Z=T>BorM}@Q)TNaVkM?q^>_kGIBf}G60p5qakSLo|1E?!1^BIPxW zo=9}+EcFR;Vb3A-+OK7`1hzE(qhF!@ZRqoj-bMBA9~{RxzDk5$cowTKPxYwl8VTbG zYM-AE=djjMi07|8lOdNRo#c68!&IuNF_`xfuZWb7_aql|07GvyMoI?g2(2c`kM!%~ znh9HJ*#)bRGKvdHQPv8rk{mZo7Oz_$Bv+08BlWwW5XsS$vJjxTcvCISV;McOa zKJYc(AkNhu4ZKhJ?lf6)u?0#&msuKrA^tOw5NACp5ximiw($GMsC(z+KHA= zuQ{jP?pfHtg*aflynQ4O+ELBc`q{M^*OqiXF+Irr4~*;lgDmF+QMwXHu>M&%c}k%& zz4#|~#ZD@?~2oV61fe?zt*%MMP`bUp}SL0-AdWSAm=H6KdTy~dFLfU z{CD{zUJO)@G};~1o}-Zv9^pQ_4|TK^fdG#4<+Ev@2MFEu%iTSQ^OoZKn=6Yj(KhEUssm%ohV402#@^>dy)cP2F{;}xdHo`r1db4)0< zYJFPv9X(OD(p&il<9qlkaVCImzm)y)jc8~%0E%T z5#2}gQ!n0YUaCl?r=lKzmXJ0r_1v>ND%_)xOLS`JI6BjkH39ee)~X$jf^=>a^I-r_ zlyQM^!z-BmSdZ+bV5#DR&>wB#Y1cj#G_hAadBrWa3{8*Y52%pZw^o&zW4Jq-0SU0i zsmy;g=z&w$cqE3nbKjBG*gd;rO{yl@xr@7#XAqBoRGnG}$s;}T;ki-aP5}x}FXV2Z z@AiAq>#=$4biaR|QbEc~_X2p1dcOK%>ASD?ID5#zNC~crn+r(?VyUSgbYg#V8mqh- zLoK<_w;3fG%D$-nkSb*zLwBVooC7RXF$6Pke)j|^^z(UMN9&5%3$V)h z_!IYEH`c}5ati=v2F-9HH!4zfc9DH&tTXKgY-zEkij`X?H6%PrD(lpD`?rW-zdV|% z5?prnXXlE#X|A!q{9ZHLb{*t~G3>0EL!U4+h<;7R|AGtcIe61XarYyoLMj?l2PmNYZ0cAE{I)crdv)jC@6=wY_Sq}~y#zUuKR z+)n88*TbxqFjvOWlW$U2C;+|%x3amyT=!V^>w=tk_H*-lZ^xdUBK2eO{bLC#t?%bXs>(Y2 znQ7+{$A<%N8p3}_J5zo3_+6;3Ue^zyPTPA!YNz{pt@30yg%BSTdgxYq+$nHx_>W62 zMS~zbbhm)gky{o&da1u(HdA5ZjLahNo%0^a=q|~52A~W3u(u)UJ049(`e;6QD?wtz|re0fgg7-fnZyh1r$o#Zrh;|MV zm?3~0&#ZbJaAeOsTZbnWoDHv{WiNUCnOKL5F=fIP)ZS4U*&nhmd?A;V7`(Q_;lc61 zo8FoD*#ZmH5beQWMomktueJRE7N^bzuG$719@c$b|FWPraIgDtIcnjiniE5}$kd6j z*kR}Cr}K{=2Pb$4Dsy5K_k*Tu1DNp)ifd&-(l$;hX!RVpMQlJn;8J9pOwy#Wk$2qY z<-p`t>xo?IwugW$u3un}VBGlPZ`$$h-ipuG+RAa_{L}KdFLVx@AGwM@`?zXw=`~#* ztlLOUeO$-!k`Tv#6IQH2?UA<#*O)SET?Of^b(qBTO6mz+i#R__ccTj6fdHI6I7y%` zds3k8Z--z?<>$Elb1${kJ69mc{HV@L;A06>zvq=P*A=r43!E)7gvE4CF8kAa&EaB~ z43w6W>J>{1+kvwGb})KmxQ#+FL9gK_`=jCr7(IE#^i1=;BAe%RHDkPC_ujXfe|Z?C z)Jyj8neV-Jz0bb&&0^?sE}hOzr1VU;a6IeXvw%{?y649b7lJk-zhKpXZf6nVAM7iD zp3(Zb>dppX{w89Nn?-)xXQ+m?bHogW0@{f8Vs`pj;zqnNrmf2f?a{MGf4$6)7c;BZ z))HyGHem>JC(_}1())}Yxo&rA;i|kPJ?5#Yn(Sy=N%ysA1@%eSWOsQ?_CJ7_dqGw+ z5A*5-oajv1RhN(+{vBHakzimtjsPYx?tC(}PO3bc6$XXujqu7l2i5C@LyEClYRj(`3i1r(a^Jxe5iN2A(+A?_55yq zw%7RWI{T#tRza1BmZ~Wl$yV$o-MBW(?|bq;pK3GVQJ#?xQ_ds) zzV@aMPAGswLej_t1`j5Hk>}O5R_0MLgGl&@SVq5>Kt!L$#?tP<(#cYgD=g{`tG!E4BC`tEG>G;R_cUqC z=Z}JaC9FD*k4Omkk{IS6k-avW5=SBmeUU;5dH<2a!l@BF1&4xA-MvDi?Dy&tiDP&K zj9vCdKL)o&(W_utk@ppH}j9HCe>Zj>AoP2r%w z>vx3Y)GHbcUJnRhkFl#?i|&-ZuR48|=nV#f5C1ldf*yeujPKgLvN+g3XL08y7q$OI ziAU6u1^H8MuE2*C)3G|Z*zU**EbzLCRQB7YqTzpdJ%Zl1VE__*Tz7$YS%%BG7<%el zC`qnLxiO%Q3yE&w9_Y2sp6Rb%8}NL-^?LdYy)%Q@7>oGbzOzm(Nme*XP$QHoch)tXdSM}C%E>50j1{*fGxu6+rtQ#5V!! zIZ4BACGAlcg{ zscOIs!TbLZ_0>^Pb#J%|A}NTJNQra{O2-IDcPJr8x&IsbnLtCO@4S~V#p?6KBSUnI6NR3n= zHlO-Nhf7<|2~7|5{3Sk|FuYkpCZAg5f+cvn`A%D;4_!G$u_FuF5?eV>F=)le_f6y$ zyg*g3B{4UrrRdiz zFWoy!z%9(N1k>720=AMi)nvYdtky0Qh15K+k?v^q>MSHfy8yQ5b9vK#$Y)=n_6Bii zE{~(a8}|!GwVQbdrP4dpl?UrD5Z47zA4reYqbhonosij;jX&Oum?6PDEQJTij+x!Y zSsbiCEhFHLZJlCRAi7P{ZI|{f{3BUw&cb zcA!8($&<+Q#3>T!`@eaPFNkc)E&J|{%-%X2#Ap^mAz-Y@C$rawa)eOxK*l!#Gg_ue zVq`b_t!jY_G)9F1Nvm(d{QJB`t}n$){rQajxG`HX^FItq^h>uy!Hif9p|O-JrjFML z2#%lJvAj`&p*TPgfFtNJpFKG8jOcoQ@}6I%dCU8^)o3N&?kDLf_?C74qjf4niQfMX zgySPv7OVUle^h@tS_Ch=F;e`|W-OnFe+Z@QgKNbDq=KEQ$!Bt87i=)E3#zQ%&iH>r&{`T}OL_s#q2rZv~S_zDaO z4A_YIbq~9@#XZm9mtNrt#B3-wnLFW|7Xa4)s_UhbSH~5@2$OUGdlD={<4T%{cas9+Rm6Zg6X~ns<@?k|n5-d?UMS&;4L> z^~FSKKRah&`f8B_Ra;`5S~rUrTJGzee%*cGEIMP2K+f{arLix_EIi^!s~ai z2qmqRTzJq$PhdcT^CuKO*5F3yG|+>_X%hd=M$2cb`Vh>!>0TWexU4(J`_IoN${^?F z=H(k>*Tf49gamS2wl$NO!S67Oplv!y+=A?sgzKO^XRN0-`_B&SAX_KBHKoQ=MFB$F z^gubU5-DA#k9qsSRJu|;T_$U-?#H&aui#q>#%h)FN7!HZ@F|f#q|Pze(7>D#0129q zjEskr1LTIZFoqTfRv1e#^)%14`TbY87TjsaGqz<7vmPO2;-kP21c%f8B zdB=qf&|6D_;pPxv>k$uuV{h26jt-xa3jmK>6@@A`JDl(T4-S%DOnjUSs1d5Ves%tv z{BVa+-uTG(E8&+@bB+(Ou5UOx^uH7A9OhaE6=$9%+JIJs&^_@Yo z`p#Mks0UI;^4x8>4}>Oxszg(JQ|6t;{keBCJQC&5IY39zMEdZ@9*z3ez~xS(fwmec zu%xu6#me`v7v*|XHA>TtT( zqTTE3@}CDPLu@+_noT4)bM;#A5flPLnjF2&7TaceS6$U!{oo;iC<2 zHTt0y!zY&@gE}4RIQ#;w*w80vBAO_$KCwMVBtVaGkH8ZO!8V+#7)s=C3UBRm(xE|5 zyavF}3?Bo+SZnnBE(@-Fsn}130@qjIjD?OLR4*W&oPe|p`s*I(ji%478J{T%Z&ld; zIeGyOtz5WAIzn!tYxtWf+27{m!!k7He#qIFvV$MIW%OsY*#YLWxGC5J4F*PNjTv_{ ztRl~bTi?=rvzi`(lx-NlIq@*8F?|is_S%jp%YsIM+BCm+q8lJ!zuhY+QCec#72~onar{s#8n)BB zbS^$;J`5DsV?UO4o>t1 zeNgf~!ehVZd>*0G&yEUOC+-uzZcU4#6HTy39T0LMrC7@Yjrbs;f;~T&kZS=b%<7 zMVL^BLxhchNs2uly9NNX(*KJINKRim086%r~a`AtzRp1Vz-?9lfZ|_Jd>XV z#d{L-jqPi6R+>lgKaGzYc!Iniyw1fIkeB*Hi>+_YmMg;{g~mT9d+9&u2$rr3#TGIp5bp@0O#A7qKsDH-dojl#cF+B<$@5k^brn zHDo8`uzz8KCjked7Qyd|zn?KDEwQaXl6VGh%CaCEi&=LyZjm3C+6$U{av-v;>?G(L z5y|#$1g^pota);mjA3Xr{;!usp zsEbi!Mr5zYDm88~6lfNBw-GYl9^*AghJ?s5`MO-O5IwFmqOBCM=%xgh>c$yq1|Xf? z*UVM|yxEWueW`M{2*n?3U*fca3VvQ+{Nu^8@Hr4XW|4D>(9D)t*ii_yh`O{>;}-mj zeSs@~abQ_c`bFjqpiHUKHkfEd%p>mnd#L>>?BszrlarpOw-gi->od zUOOs-WGW6b*K44@W+Ew$eQEZ}j5KerT0Z?T`zPLQOpyQoubN47zXx_j)(}RBG_ZhK z$4+{2)`8-ej^Q1zQZ^E1bkdgo^|faXD)bvwoBmMAqUG1ULz~mDpy(S0j2>le4}H~f zh+o17!I*qs_Q2nwj5d)|(t#IgRT8GDNLPZxJ}R9Eg?E%N{TN-TtHi*n(0&_QYEOc~ zAU?yG&+OL0C17C{hUJ+SAB>db!;3!@*GK&~$<35)c$fwZh34n8gS=}u-&gPHXIrv( zZ-~;N3>{5sj*)A{j&?lp9mESbWrZx;gdFg!GAB8$S%AS&KtYrU)PZG**!r|*ZAKe` z=f3>~>#zsZbeZl`r*Z4jq}gF#d;+0>1|7MyLbkl_A+|CG@p94-Z{74L1pH*dC=-1_ z#lJ$Hx$5g1y9lC<8}eO62`#ok66oc)1<+Z$or1~x!7(NtzFAi%gp7Cl^RYfh%dyg& z((8j$&bpc(%lp}vCag1ZU)_aW&6kIS&9>8`i8pWtL?qJjZ#hlwda}nag09y9@B~BA z>ML>b4P&~I{8?!~q?T-qJcNo-vV*{LPQFw_V{O$u3}iSv1KOY}OdiiB#wpNfT4?w( zv$UBOkHi2qCP2W{cY4bwd1s~6**%$ay>u812O|m-n8no!TO3ymlR&hfWyG=TDlP5b zTz4Zv{)bD7VLH)I*ydz!0)W2cFW{nPs8;Sr{n*Syq6uPPi|cIvJ$J?V=PUOJ$3lFP zw)!5x#kseuoARKw>|#>UfByc-T=q?)4E|z`*$@w63qg#OL_~0Xi8|gh<_%*}+`b2C zd;|x+4FENlP%$${vgO(jjhOjGSHuHA8T^>Q0m;H0{+se~ViEw*+;A}gT06j8$RW5G z%mAds?Ce;mn|TG54TH$aG%e2vQ|_3pPi#YfErCUEJ;R5$f#4P(Ee+j(JKO*1RX@M# zW=eB*%yggmAclt5wNPR$|FV7U1O=Fktr-MVJA5VpD(f(#eO>*w+*f3V)L>pv^Sf&Kp@n`&~ zsSbViQgJoP>d{hkPRI$&u~dLMO;e1SHVrq9=qrs#{&$tOfPNc+Bl>n;HePyQQf;^w zA~rMwuS|jMRi{@indj9=HMB09z;;kqRz61>kKHm2j;b^7|WjFgv_K#v(^=>j|83us7dh|%Q zyGPluy)oRjSBhgDEX$u$?o{kuq{G>u`?jAX+M%lq;{W!fy(3qd$*mi!8#0!Ohk`@y zfu+|g&0$*<#ImHmTN#s`I%5H*?cVUhkY0$~F{>nPTsIOKxJ$q0a-mhI5V>7Q$kk)a zV-Q6~&zn{zQzBv2HdD4CVc^8uuwsdp1)&HIrf;;E{IU}12Hlny zZx;?xae$I~3>98YJHKmD|8?8atMAoypXRd%3gkMjbTAv?@ zQDp=wKInPMH$AC#K~}$gfYaYb6;UNQ5JB8N^#q(RxzZMukS*64nZ3_{@h;iz!%uL2 zaJAF#;E$Cd4Ij1sSfv`9ptq9G;3L(zLWVVsQ~OJrj#P@0<3bovY4Xby-1fV>xAxv> zyp|8Iix7149}$oz?HwxeScOf5k`V9x+WKEj(=`_ru!#NN$sYYWDiU7O^eka-o~7D~>f0 zp9Zo>m-VX2e%*Xu^f+0tQ#$4n{Jt*uYIRFx{p`0{G2iu%pGer4Ezk zWO*$5vKLHfym2~rYCd(PQN!U48Goh+QZ+RYj>jV5!oth3dWKxpI_6?nd0*hkL z{eozD9$&&FkugFgJGX=N=vKtXoI6losC+$CS{&$V61Anj(B6Xy zP#0cZiX%zE23|);{#?;)TRM9i*^9!9z5-9y zIDh-w1LVte0fD5(4n=tQD6c+DCr6VY@nQ|Pxl}VNiMhqYJIfca{NKq6@85t9@a`3z zlBq^Q7DJ}Np*Sjsimqpe78tcd#9DmNCS-V6M5MI~iUOBXe>j+Ez>=Ipra-6xNe19? zS6qY*mI2WxUhKIEh+&706967sMD5M|HbpMLwAXmEwhaM|U{P=Z;730@O`LFi~KDf-Y zl!>7<=b?1p%nWbW{*ejcp^dPvRbD~eJaoC{{$FM*sD(Ki#1DpWuUXzqjCYSw#lvSat_Do4v+sc5~gBW=|9#y(+ zSDPNe_UBDaL}OOt0w1ztdJVpWy#w}!e62sgwd6-+f}%fMSIHmoIZH5COVa?l9yU88 zZ2j-dc^KBv3e8=pwshv@S0oVki0{@=;OsZ5{^LGBZftY9*Er3P$VMNZJeZlNmztTG zV4|A6Kj3{Xvs`9b#}aN%ntqR}3b$?D-CM+Lbcub+GZHWlzk79Cb+gmoTKPk-|Fmpf z!EQBl`>xX^JR&xK5i#RJZQE7{D-B%KO4+7UQFI@n<=2#-HvDu=u8_qu&u(4)9NBK* zD3(@E)~?kt@uA}u(R2k}v8vyB{=v5tlf{a~%0-xlk5c`dVIhfHn4J0fxOGV2Nt=k* z;8V2v{ZTqZ%ZBPNxnNY^FsWZu@!8p>(e8fKxzSp|%KU19vrPv@e-!X?`O>&V)iQMC z3nUX=Vtc-C*bAk0K3sw^bzBoCefZUOB0%IDY)G!6dX_)YS)#>VS50@jZoWs=xkU8y6xojKPS=KD%3UJXD11L?=U$Z;DZqa- z{xw4N(Qqr^0FkMtNazLO$oCN-_=lJMns)MqS;xn!WpR&mnXA8BDZ1`gU+w-er4=S| zohKhomUO|4o}zLr+Y#dQgM+$r-sR`kuW+KyzKI zi+$ro@ZB*J4dWRPYS*oCj+7s`0?D?{%1*(dE$o{88BXiRheRkEe3&98wW0tIq`fG$ z)0u<-`BF40L(E{AD9WUE*({Y6ks3$f<#ttsM7u!kY{xHmgBv6uH+R(_|uw2b~ReBd;7UR~w03W5XO_@Oja?IG>s&^4{CQjh6&jdEAKp;Da|L)cn z`1Bg2P64(v4@2MBK0Zkqi+@?D=K0=V>$~i2#MPKU>mK5$;oBeFAkN93sW4@iyE45j z!j(|!QHl{)LtD`k*&qtvv8m7XW($gxkabY5o0pC6a&Vhfzfh$_Qdt$@Ve8M?uGa`r zJc|DMGSJ-dhKk+bNKIF@Fw>G)mi5J7qwhY)d+<(W+2?w4=ujEc?e7dVS)^{7Zj4>Z zNSN{@BaK6@WvtJl!^ugvpUg&!(~`*?n>p1opE1ofsfg2W%+?Ouq*l}Uc#4!nlvKA7 zS~!gBLD%LveCx%>p(DDy>Fhb1{&>1e=hq5rAi&YV}KzF z1$s2Z_hBwa-eMCT#SY_VqBWo)Klpuqz^(ts`9R@7-_q2%4G6BlvS-Q?V!<5{R*$qR zM&bgHCAq5@!Yvq!_Igc>b-hqmc{`)$c1pe9Q1Tfqbt?Q-8~qbCc`zfK>k^@Y55S=S zW&D8NYLnai#sY%l7P3R}L`os#)`9+HtiOiOeVMC6qT97RPA%ox`i-KyjN3Y>5K~!G z({#Js*95*#9Xs(0k^1(ygo;;VX5fK4n0H_5K4eE0iMUX03%u_Z#E>*Zjxbrxb^XOg zRVGu}D*1SLtV61$@9{9?H&_w1g-`>tY^g9G&R6#_mA-M$vXe{ic>+&2+U?>oUjV7wRei+@jadcowth&XX|Y1g)yKf!BanEX$fRdxGoyF=T-srQ-;v#0Oi@ zHWPpz=qK9zy=5@~cE;A-i2I&JH)MOMH0TR2TrGxsYxZA*A0x0v3?)MdrN<7#KX#U> zNJiENcG3pc@~A@6v*fzfjQ)Z*kUH+Oz^o&j@KoxD2G2nEUGt zc{zsw_4Vxe1!qjQ4)~Oa0uf7Isrs#eT=%S=fo8RD`PnyrtL#_#|Jgb7WUS#FznLWAlzPFIN z5-em7L{=_!Dke#_hH7?f5kVQBMl5fD(nr3vEdYo+k-nPKr%7s=JyAQ#K-Mm|kw9dV zJJD7D?c}}^S~65mw|6V0;;sYL&9WZ{kwA)h@O?039eSW5vpZ&@86{V2f*bJI3r&Ue zT2tq-Dc_nA-csLcVSJD@HqX8}{5rSYk4V5{l0Q3EUot>me>auy^{9Wq&m}8CQ0=RJ zhJ5QyulwO`xPCppY27t~iLv=#vIA(tHUB28k{Rn2fL0+QuTp`R^nw0C;J?{$rF*zN zx+)(NTX`gFetnfKjl%i{XIBI9qFt{^-h-t?021^{0W>W9{z$k1i?TfNt@KvHOku(@ zHw3q5W7h~MYwmbycm_U6Nbe(9-@ZuhF{&-5O5_~~KE zELi@st1w-8E~}onBDszo7v3U9|tX!gG(0YRM-n+duiL5txZbOy~vOWuKEEl}ACZ zhZ}SzB8t*U^6z2hJ9h(?Pn*9?>54UHdjESmxPQ;gLQ=5#X}SzO;@BAzUsL}C zp81|wa_ZdTusVrd6kn|*d7sRhC{56~Tp%C3EbiAl$1$tHc7wRFl3+-TCa3I0jMHMP$&v+yFIT?r zxQXfG3gs4@@ZVvf>qSsRXvfD18FLBp7*A8l5MqrPhZ(!UX6L1s*xvJr3F~UxZ{)qKA+A4TQPBS9hWH$M&X5v6I+mN^!%ntu-$j z&sES81q?|%570GN-bN!o-c+osP(@m5H50x4Eri1in*%)XnWGt$%Ny=U?7W8+l0|rc z$ApA-R0!X)uo*;C;dWPUNrz*R&LW7QpW{Tv$(ZGMk>6QrO(0j?IrCQDp`s1V7L5hd zsrS@%UQ_Rgh1c(;^K$j=y^E%+X#U8=FSoQ-SY8&ny7ON`$NjO)uuH3uSvXar>wbA% zPKDu*R1blqQMvoppLhn&8EQj=509@KpYdO>*AuD zh(l$AKJF%KpZr@;nd`nyAD@4IHbZi<&qgtCZIEa6DARp)%IdlrM=jJioI*J0HdN=+ z3fcK%+v@>lSh7MUy44T`6vh8kt8%PX9>0ff&O1r{Q!&pyNh&<CDrQ=a;2J%mQhkLpO7}U`J zX|4xjrnW9}lT8ci@ZDHAi)Jb5j6d4nTo?_=4l14EQ@7)i6t$Sc6d*~S@6S0O1P}8Y z+rq!RJ?`kyTLRzhvXMfcLdZ1J z0bsS>v`YiL-PtS47FS7>4>6XkFFH2edTcaaVXy0Rwl+9LF-XV){@fu)I@sMM>n`JQ z*BKO-v=5%t<))WMl@(LCxcbG9+a61=aWKdnL*iNZ?{z+BG52)+6GW6={HsGx^51R3 z!QW=t3=q?|Zzu>tF)~}E?COQN&jkxEs8Rn=E7?_J9Te-9=tYtIFx(X>emjy6{6)U_ zkG~q@mU58v`QaHe2FiXd+14IC2JMa*0wpm|AA_EAdT@eycr~#vZ#;E&CYl)B>}<+} zUa8P~OmCn_lQ}Q@NakaYzOWeKK+aJANQ-h`wP8x<*}l1o`o?Gf)P2_>p{1SnRf~;9 zY9ocbPaNAv!%8Jiv6$_{(p=l$l{*sT-VOBuvL~1S!iz4s@P*pWHEwJlTs_kLce$7G zCBA|-Lxkfx4L^Q2<3TRYorRj9dC$x(v}A6iJYHpHJh#bhYU+^`bRR{=9d1B;4jqx^ zG?67@uE8e_4>%D1?5))`4#?qm-E9-KfZNiE{|)v{6^lzJ1V73R!hVyAn< z!yCQ1I?4iG;0lq zTXQ~pa}#EW{;t7GVM9U z$lePVzv2(nZ!q_KEBU$}!CC^4e|XKgorN5rIvc>$@q}@!Lgq_G`a+R8$=6}+D?Q*? zW^vWJCmCOu88R!xhc+E8f8i;~7j-UQgTJEOAqP{oS8Bky5kHQ$<Wi zHEsIDdg<x`!|%V-;Uav|25mdriI-WVw%ufJZ@|5B9ExK72Y4NW_Ksd`XXaXw91|Hw1!2a-MIOcx^HLcs zu|!*7KA)FdU5ZXw2%b;yg=CKVN`9-uTpq5l+hNXG5Vo%zuBsCtFGZJblFhf&-H(Ot zM$U$=t_01Pq4pFCWey4*lDLa+qvqH^ix!o}?!Q>-9#?kaW6GL=U}LYOkPn}eqMHSB z)j1fKzIEH-0r9lKT)%D~C>zoyX$f#S7G&>OxG(=&DBClyXE||9==gRE+3K)89A6CK z&8&vfsz^3WdDUUzbAfk~rj^!M6Y3J$o8B|6pOQ6Q9Ix7iVr!r3S@X8&eN`+?@tdmmNH0^8hQh$MwxzVSqsiCXmCiVr!~ zKv`=xR2K|!G!$@L1h@1iuF&BoYUl$fY4}uOJ_4OeZ2wM{Kup(CT`_@7CffF(jNU{` z69%M3MEXk%)agcq@j&VkDAhP`3(05v6h!T(<*w;N5G_Gr0p~Jrkt6Pk_c+$cfaYvG z-sDBf-^(+l3ehAzB4gw+SB=_uyzPe*9CR87Tr+)s9x;G~e7yfKwU?t-`&adDh8jYY znaH#gsdDg#&NtjDp9JaQ9=lgcXTd$c9^*DADUChHs2`192mQDHRAhS16t(n@Qcv z;XRE)LmJ0P8IL=9_;I>%n|2qxNWT6O(ZZ zPF`wt%J~CiPg#xKY!}7c!XrpfBiUj!>?}etzq`rL=4-jKh7^TrUa8{A!E=v1Ny$XN zG+y^<*v!4Fz`KF1pOf%%zb$k_LtU47eBopu^7%)Wr{RF1W;4rm-khBbO&Tr&VQ1>} zy+&`1zYp;NS$~PAi3kenxO1ZR+{scZ0}^~6=cp>C7!xXf_cpy3bxu+rTmfD%$F5y9 zWOl#JOsL+hc2N6SdZ=l!^;cyy7bn39+c<704Qyjy;93^t6lFhUy7|o2IdfP0llm30 zZ=%`*wIG$Qbz(R`iPS30E_o&0Ivryi|0r==*MR0(ckPrrA_nMmDI?Lg0>B-=iwhZ=o4l&Y(@+&dE0-6x~y9G3LVV)s+m_ z3LZemCZ*2p2V&EQqy`pJNSgO2obN)@g8u-9stNudibnm=?oJ*xhJWwMB@cCAuHIS3 z`e_%r2;aVI`bS4E8bFproJaXbf*L`=m^a@#b*$f{PGe-$95WU_1#BgQ!qC?vBu>>Y zvX4P%x8=RLw&GP5W2k&z3^qUHmYBz=`Wu^r053jjW62gB5gRajFq{D`h?r!RDY|@! z8xj?ZKHQ8)9DePLQH#>4NOP}!?;kXfz>cg0h;^~2_6wd9l6(S~AQQSF)T^lVy%R%^MXiJimx=<$_ z)8%R}h7cFmYrbn9xxc$G-wbhSS_nW{eGipa#rnA+y*K!4AVJ1B$SE_9l$s zfWCtwa)aAQMVt8cKee@OFEX|@+*KY=ZV?JD{Om!BFx?wi{FI05`v0)s%_|@8oox#J zQ(=5c)P_eJ*s_it)MOO@hCBO|ch|cH=yhaRU7=eiv(_i?W!r}{jNYF{Bi6nl;HnF! zU|wD3U`F+P$(q;5kGTsxjh!9;;xtMJBL6T7ns7@?l>$c9@A(Yey!Nv!z*Qu|Sxc7y ztFErO>92P5Ib{6jTjKjW4*@pi3$;R&U(BDdc}#OJJ1=e6cH%-I)Tf-W}7 zkQ{5|3i`fymDer1O1=NltP}hh zm!>0`HNO#p2L`)bF}?|^jw*`(a|;9g96oL(%ow+%nqb@os@{o@^v$qj>9~XUb3|*q z5;h>$r_8~#Df)Doy^$Z9@`iyWuTCo8hcz=eHY+%mAlOQM!PpzVG>I~N;_O& zsk>`UUPy2nX>VOFEc36+ypgC8^<|ZSz6{J8@Ay9CtC`^zo)#SfzUtKENm|Ga{;)}H z5+xTq0+z{d;pBhrXb8*R%`TduW8v?YoU8KB9shJ3H1`!q#|iA+4(bm8@mHuBnd62C zTr1k;&ym;$Ws5uV-YNNs&W?LM&ic6NGE+~OHqX_U42F|qN}Ja*qU^{q5ChbMkRlKn zu>Pk+r-+^5R{WMNAm-^Fw2wI({2=UoSRE78)Z1InKFAEsxt?A4?Z3st_d zr{(A5+lrpe!JA8TEWO-kd8OR?xe~LX|KnN0BQ$p2VQe?0bG!XN=eRc^o1J~zpb_qj zE#d(Ue&%@R1$c9vHB>8646+$*WiJk7Wrfwn>(mGl>hM)3|Gvgm1&xq3GuA|@osdo{ zgc;MLwG<&gw6r^{WQ3A0f@k0Cx;%3sP!SecZ`O{WSrvSFN#FmI>o$&q)G}y>KIquk zso=ih_I*HGm`7K0wXBsrY-TVH2 z<*Uf@Or1%oI~X7Q*E4*FgK^=f)?}V^=25|WV`7!gUVRa{>}ydp=qHBYdV~w~l9_8s z?%lS(u@BPssGChLpibqC(NLdv)SlrI*c{lhMU|0?CLXe;6CX+LVJI;h(ptct^yWhz z@izL$FSHI9D5}{lR1N3yF934xn@mij+1n};0fH+&ZW?kSA%9B>xSyZTu`i-61sZhY z(Nw>#_mhy>_(`U+l5afLpOFP#_>O{fG__x2iyQjvmB#JWVU5w`$LN;VDBpFYOs_@T zCmQOU8#jyH9{pC`!j;bxog;2RM?e2mn02@_7ML1+?*a1$G269~xqZzdDZ;P&`Y!JJ zS^X_?Y3UT?Gb+Ii9Nf`(&j6@p%&WLL2_=OCC~wV|fe^<=u5K0S$Tuf?xV0_hJx zYO$Lf7cu>s*f=6V+#BaEV3Hg-lqqP=$x!IDol+$eRsZTPb95^ouyBWT;7zY{L=5 zrF{j16EJCxreHsRpELsiN4%4(xCw?oYYk2B6Hb&FPh-(R~eNGF8Oh&MAv?Shuq>&|rHBVlY`kcK5|t2N38k)^NyO5pD!v`|>! ztq%o|D;&*_&4-!Cr~m#{!SR79OB=htT-CyFarI(A&*Z>Ig-~Ekl${={JPUz!OlNTG zF`|8em0j?mdxV%NKS7VQh&CzNLqD9NZ@;9bk+{yIx&9~lqnt;8KnOAA`O?6f6V zI?FSA?|fGe8LJ|Kg`z@)E3LAIeMW0aH8NAYdX;DXCt>A=aS$g~NH zuSE(3V-q2}&`Ipkx?`JZ2;R4m3}&qUs9mRGv%NdFr`|8G3y}SOu*9u**lM4&liS)o z`%8`%Pz%E3kL&^PnuAQvVe>j4Oa|P=R|YGC#jpv$hbJ>bX`V8BzMW_<$PCUZ&vJUt zrhj{F1t&+o<|s>5`xyLB^)^;CPj|AD)2=aVw)V}-0c zk?tX7GTZ-Bx-R@24h$kD5lMTmrtVwh1kc{{8&3_x4X}0LoD0k1FW>r+Qdaen(3Cbr zvLLVLxla<;!)xsz)?Vo`GBCfuh>aXp<+;5E3Sucu4|7?JMnubj9==T#D%PZU`26P< za&9iG5oNv}T_qej;e#PR79y&MdH@$nx2@tVSaER_{G2Ssjg3uVL_qciJ@i97*yfBI z1b@aBFJ#vEu#hc>V?k~of_c1l_3Hik9%JFNKu06s+YQp&Wu(mb3_vN-16nzKpW)8y zPZcx#1PVqqN`aemxh#f%%at#OPC?#h>`h4A7;y+3`i@^`Jcsyjj>n#qh;Ai7r{=xK zS7x4yaQTkh%6JRn>+2J6bRtv;17s{geU7CZ;-z^TYNgsqyh8eS=zt%)zyk&`HOJ*h z)l&PeApcU=iH=g^Q!AXlhBonqn-M!|V|Zlhe$qUY62S1ythx3B;+v~Ona)zY@m;q+ zN4BTq9ns&<_SXEd;%5=z04l}EQNt3d6k-G9ZCaq*Tj?pgVt z_s9u5<_-WYreN5O>(Y&YA@HL8-{VS{XANM0iy(O0E<3-^cGiku%xC4eFt21-s)BQC zs6HfM4@@m8P99AACNXBDO1~cEoBonL1}{}sZE8woTTT1hE?)_mTc5Q0lSi-b4&|5Z z<8H#vrF~ALf4_BOtfVn}+ubOfmtsCq_IoMmj3kB?gh5ZrB}6JaDDHjg)xH1P-JbD> z7a8m`WfzurP;qLjIeUPnrfm!RD+~-jSF{;NXHkbVZCUd8JG?7}u93>Uy9{iGKCMcq zwL~5F|8nDEPrlTMq%iQfXmNxdmJ*nSVxv~1zGbST)Gv-L)vDaYhhK_ zd$(EA08MOZ34z@iv*pFhV?3-oQIn`nK2tDq9 z>2+j~dE9wBr#%`mlS#E-WZ2B-tRUI*+=evn;@7yz@1scF5TIt4pI8LV(dpa!>2muNwA+smA$Gq{G} zbjMv>QH2I5z5@ctu>Ux}jFz=Pp{1L$dzIYMO^yPk-GPHv9y1{x>{xf7xMfH=W&rHL z1+MOu&1aH26*TU_q$O_ursBOJ7rW_W?cEkCy(goI^`y2$Par|C*V|55)XX66Ki+e} zir-Mku$;#--#I-)5Wv1+2Onfc+mQ6F^Z9HdNm}QK zu1HTaK7)BVWA&^~I}W3Uk~lzoa-NZ3w*e0w-_#%uQb#rDsGr3gnZL;Aop_z7i4amu z^1VXfJe`L%ddz4rN3m~ZRcLzpTK?K zLXz0U;9UXE%Oo`+O{9B1hA)Nu(BV`1BSA`WOI*VH5jGh*>g7oweZGXSbAh?ZIIkH*t z`Bus_wX2x*?wxOZQqvSe?Ei#iRP5Zi{yAa`y^i3`c=a`VlIxSY5&1}b#)i7O3^5g9 z!jqu>yIj|@Ffg{$pPfo&sV>5KWNvsf2mhO##QK`I2Hp*#v=5xJo2B++5t!{->$Gp< z|M5bTuQZtX*5bS!&7YYESp;lZj-TF)&{|g>g?^sE6se{n>OgziW|;lWK_G^lb~Jfp}h&FNa1Ct-Vs# z^m+E)39~`;d(MK_1K^m49NP;Mm#MV1pIq>{KI&iqermWup3p}2QSoqkzj{68u7m7< z&9AhbD051Nu%jEdVHLzhNo)(z(ONy4X>(OdmpJ?Vq8nl^krvsC@G(h)nihXn225p1U3rzvM9wOy zGWUwtqg{?fwshf(+&}OYm)gKJICfmLbbsfKnTSlwE1tHrk6tA-91ks0yA*!R0f93> zb)fVAo=V5_qU_7YMBcE;%N`O zL$BI>Z@cQLZ)EnvCtbM4DreZ>g`hg6zrv^>e4CnhN}GyPX)`?D2E7*$lRHeDmHzzg zS~}u&)L_=P-gUw$eah{WnTx)GSAm&iQp|Bfnn-kh#)gcC9^YT*&z?v#`U^O&V*9l%}-3^;`|oW5aoqF35s z0{F>3@GM@L)m`KL2W5M$lyAv6$Nz;W{7pFjn3+cHe}=CyVsx z;Lm`j+2}VAOC_gu%zNP{Z<8s0{gki!aA8S;O;JEWg#BM|oEPZVpTlhTzxq`_$C}r0 z2uMs0MOI52kvn<2>(iRz+;rSVnU91V=#a`_a-1yqRai;-dg_mX|BHyH&)1$iIbr+F z6btdcMKmN0vACRQ?UwMIL;tsdcmLa{$=Fv1WB$OMBXO# z)`TaS%jBQ+q&|Fb+1TsCD1(pQZ7xEY@J}&TNZ&zLmr&9zP&x_{`O9$q~ea^o3zV@}Ry*JA9nu5xVJuY5h2KE6J2Yf=s zJv%NCNJx`+N}kV3HGbXzeT5cv7SDhAQ@80*jF4UaBjcKB-eEDntIk)ZUJbV92~&ji z;{*O_k=N$*3=9-v7jL_gOS$jaN}vnFrp)3#4dgw{F1p4O_xbK7(ukmG&$xV$Lu2q zCUU0KT;Olxw7*#{7QwTptf!VFDTI0BW&_^s1i1J-|9sUbJ1dxAY#Y!bxG-}CJ+wF` z1gFaSCc&6teXwM3^qmT?9z$O1@pf7aWj5Vo0HfT)euZZNMij9J{`sBt4#!@5E6^-Z z@GS-!AH|WLe0xs*+xZm=q&Y6X_+`?^Mb-nmZ5YN8`Zuld!lFd;kYwE6dmm5ZYM^%s zU^FNV0&|^AAmwc^yb>m=;LC5|Ly5>dsPGZ*ExXS$N`1+t&(A?$Y<)h=XGLkV+~qMq zH7DYP%;)D?Nt;lAov!rX?*_aM8p}50bi>nxnaqQtCsc6!I;e^8=+%<>YOCT0aio0e zvSQUqhfOG+BV$X9=w2khDcTepGbe{{lsm{mL~hwHoFhK!qy!MrrgzT!A=ihpG>%p{ z)LH52PB*%xM4xjC5h!vzv}kgN@uR7v;aOi0iSSQMTdVe z7H)!^@ac?0*zm$ON1_o$yf8nj8}%I1*AQMd)dh>SJPMXi|XQZDXgWi zaw=w^^(Kt`aq+7^uYrHi&*3PpsM-^K_+=6UNg}W@1~<@q)xRQ|%7k%jKTKg0SM#2D z8-!Yl9w@JCNL#tn5g+ew#{Juulf&g53B&Na_?NDXTo(Hif$yYJ|==Om*Bw@+jZVqce<56a|msZjXm2*;BtR3s&n) zczLK7U$RIf(51k}fJ|tK7VU{??%SbeCAolj!UcIdNeEbBMQHovXiD?a?%LlPT_l#_ z>C`?Wm5h-x#r>$QZ@uE5ri^CSKygq+l+^bnsQXVA z+d2AEXgB?@qh9@n!&0?|-Nt=KA=(@TvsyLb)K~o0nn$e9idUf3B|ESQyG)=(gJi1G0_zDa6rU$3;fn zN_l_>D9P<#v(amk2HJ8G5RgP{%%xqLvOVI<{oH1zY$7W#g^4G3$#`K~b87aUTh9k2 zx{#gqDkhgyv}L5aG<{jSFf3(k$wXQwS#4`JLn0Q*4lr;*$OVzV&>ScMLdgm zonZa8o{RSgGl?eNm1f`gPzqjPx+u@58{WqVh6Jy&Vs&8!2BD512kdV&N~gmFQE2Of z!q6%6aoLt5?>Ikpa~X*-qD1#NdVBoM=Xv(Th|?LMq(9Deot+{i=1(cxn&~&eXEe<5 zI&sk=uZFUnz>eD22gzvJzS)ij@a!P;8io$ipe(@wr<WL}vg$$KuFOyYQdrr2l?g@JQwU z2zdjzo-@&AQOaqU#@I^?6t!``LH2fqe(<#c?gniP!(Q!4J-i^#KT*IEzlepE!o_aO zW|xSULZs2*l-{HgWSC-|7J+=k#zs8bA^UpsWq-8?P8&6&QPD7*O}rx(!$x^41~SCP zhGE9R)*&FNW(COAgY-OY+#yYBEz6G>V>#gC;)TT=Qbv=aQYoXryMp7EU4%E4hwSXc z_4o;XPc%Ok^LOdw{y2tvjCYolN4$=vAv`d!!z-AVW%cG$qgfi>)`Qeuxn4)%j-bgh z9hx#%DMfk=d7&fbchd7b!JI(+GM`skH+^mVz}yi`si@wo4$KoMrTMOkD#x0zQ7uLU zP6oXuJQN{yJ3oUfl4k@e{9-2!h=Zo6D>JoB<;l&;&}4CNcj3%+7jBZacULbZ^KNc{oHWFY-Mp7op3?teU70n+X$mxkUXUgC0c#mfQ~E%ngeYVijX3qJ;f zGfdNV;^$9%^+dUD>pyYjeOUv={L>4Aj|gU9L=BN6;B|1k)^|NC`aSAi5h7utQ(*-Q z57gAyWbHG5@I!B-hCTMu8c@&~*hiGv3`|dxqzQ1luDcEcT6e#!WhTJ5-X$Apncz=ox^J}K z%Nx;pQqNo-|4=8`(8gx2PHuUTG`_#J#P2xSw>t_I2){_pc}G-YLKJ9du!11_$?uuX z{2Gq?{E-edA}S5?rp594^at-eTBp{n*klE&TP3=0X73Zm-)risWfA=9x0Q6SP!>;^ z#!|aIRc%&+2Sx9K&|nUl6i7|~$CNCZV--#nHFWF0*t~2ww6VPxuz+#y(3dmZYe;z|R(G)}xQx!>adax_-1Pbhs+zwM!GgoG{{j~pF* zRLQoQh-Q9DyqA|OH{bZXMs9Q7J`rIKDlc*FiQz2a8}b{v)oLUylyFeZr>338*{UEZ zGy+)*SOLxxFLPN{`y*X(nrPS@XUhSy_Qm;)(Y_)H5+~WMWtAXe7cL*l!Xycds2Ahv zb>blD zKn-*ruznSK{>1jK^C&_s5n(l@;WKPjK>bXsD4Q3H+zY~_)UEKK(`r!iA}m*lsD8X( zvjQSL&S<;Dxk+Wyx#+@ggt#&mhHb3EVsY9>S?vxY46}%;oI(@Bzgz1FOgUAUclPh40Lu zD$kA|RU^gU4(3IB&=1`DBFAw}R!0aZJ-tzvmn_}=+?G>r%~$<%QAHm(;fiw-yZ*g~UZj9PG{@9jjv1#6k9xie2o%vw>$#j7aauc4FV`>w4t@e99D#Q?spa zG!rg)&W69oxCAc{crTGuSV!Yn8i6MSkN=K+kVbb%x_>=GV0UcJ zCzjl@z;eZV-2>vY%?KNE3SL+wyAHYu%qYMB1v(xv75H>Vf`o4qV0%sR_lGTEdZ{PS zc?KDv*z^F@;-Zy20K^F-qy&snt|V5lloO3sbq8&YynlnS^@eR$jEsBX%V9+vx(19t zM2Pj>KDK002jFeyygFP+m!YC3pgg~(b&b5oBtkq2mqS)SZX4n-^^{shkh}_uhoOB* zMW3a7R)}orD$*Ob%k-TWG2~2Ty3AQvKzhslsXMuFze5_qQCiH({I zBg}2s+5u0KET~Z;zti)XTk>e43uf>tJ2+^AR%f{XGsA1mD1wrObr^kR5aSKl2jU5_ zs5%#e9!7M^U;3A$`!;&JmxE88k|$5$uRB|zAjKOX!9X-Ed9lkGQJIro{2RO^7O?Q! z;UwtwS*^4sJkb1GjhSfqn$RY<53tqk{}F6|^QVue%{6cV4SDgzlB``Q2E9Pb=V5N= zOv1e457ZyD+T=1_4aA`J>9SZH+a4(!W|(QPLq4z1rhk8$R!4YL@9b)6^r%ncE?HPK z@e&U+Qe5ym&*JSfkb1&Q3GB9?_)NK*ioVCqu=n;kH~NpgYJ?yD{Wxx>u;wU^_b)^| zp2iH3iN9u9otS{^Cd*=R;5tlQ8re-R4*f+aP*xdCyz-VDkhJO-O93-+hvPbJ@U(b7 z+WC3XaaT}1@)Pl0R1e|um$qv0;f88u7x8KXZKY>QZmwl~XMhV{>=`8} zLG?wFD%#2Vr`1;84*wc~j{t{OO*#8TED;n&72W_e*TZn?JD#Xh&XbeF33kPUN4oojIJ*-(6AOJ8(>ZMLd);0yIYt*$tMDCFtwqPFK_ap!y`oRCuR z?H#C;{X|D$EETsUau*s{f%slg1EnK*X|H_qga)*5OxOaES1aH4=raSI2}AKs+4o^L z)|LY6ZnT<*_EFyx-&8}H7mIQp+Eb6#SLu|0`21PKP#Ctl344_;;>WpITYm#oiu~=o z4+cB$St_*E2f3AnYeHOGmf^?35wr5uXsdj9Ox(vj1PC?IxkG&uzFbp`8WKtTZX^ zS?v3pS`x8G^`LW&)Q|Z8l9znm>d$*ZGavjPmuYVtdoIs}3qokM=aRMv@LP580%WfO z?`B{>_jNjJAv4ORpw#jcZMKSi@jTtON~u{MtTenOE~B%2c+YP+hw|j!4M6(h8V`dh z-V>JhI&Mf7d>35eLN;a{M408tn%LBYuFH>A-j#picSwk5_Eohd(uV>ka*|A(c7rg} zE4P0kq~|(!3EcJ5V|P-iLa@WoW4Ao&M>;A)EA@{u17GJd5ihK2WsNuGpsW}ySTj_0 zV<_yW+&{0PJd`haVmw5?poV@x!svZ=?gdpM=Gn6m*VU8ON8Xz|NHyZY?@_X!OP7`#fW>qikSCNjClQV>zM|fRI@JGv zY{R+mc}s2|j4%uDm^X)@;oeJy4H?>R0`zgj_~lwgSLrv@I3ZplPPaSp zI7amEr`{w}I6(@L7`2I&N(lFF}{9kQJrecspP1C{C$yg?R)LQ`d-d;gT`NZ45| z?pWd@*U_)EhIrV@YHDp{9_(k4;k&;mcdRz8-jcv zZ*~(Nd|QYiTuF2zlwWx{{>ImXxO|SD^Vp&j{5zV|r7&N*&C8Js5V#_WH2d!9$ZjUs zWWmRTJkbe(-D@_iEN_kT(2Lo(h)G1nE$?l8-L$EI<)agnyL{}gnm6a`8Tx-fq5H~&GOD5}NtJ&uZIuSA=~dnDEcrZTlFI<7x86+Hvk%V> z&hxl<^>JSMN}<23LtM_tRSRE|*>rUa=i4Oo#2qg~lN}}C=og3|-tLL-)5RTge);^G z^MtPUNsIU`rk7N|4wz9XfS8C$5Yh5r3XRM?|KiJt<^J3hkR%tnDQAI-Cv4)8Rpbs*6BhjJ5(2H@JpkRZrh^J zuaRs`Xt8UC*woE=7HVsyJoK((`R2SfNZ$Os;<}WekV+`FLNcFtv8`$widFEkX89c4 z%(bb|2oS%U4r!H3uToaN?^vlJX}ai6;G)#}{-&3yc+nu6!%Zwb;KiK#__tMD*}8m= zdu}bf_luJkb017T3Lj~eD7)gH9xjvLD1nqX6D+*{BFtaV+S=9(eW1jZnAMcOQ5TcZ z{jL%^U+nM_NQixU{gEZ~bE=ehH~2t0g$D2SfjPMjIuH5jQiD(LmEI|zc(YsT$j6t; zRtAd+E;v8dY$l;Pz(lA072kmE)!z_O?iX1Dt)H7HS3JSDfEA*me3ebX)OO}Sgm~I4 z!s0LY%G~&!r-h#fidBaq_&cI|hr$_#O|fzc_+K=n)I#^Gf^yeXhT{9v&M7jYEDdd5 zpDTeYwzF3@coXq{KCi>3zN!^eSiDI`h-G-sjhjcIErGMTgJ}=Pv4gpm$^|Q)+K5Zw z*SIdN7XFAg13T`VcgkJdyV>bCwGgRKN;Cz%V?Sdt9mEPLw>o6@|M}BmCTf+p63pjM z`8Aqo^(*PKP{vAi717jUQsU}$NW%N9??<0`Gh@!HyiLn3~c@yumq3Q^-l zw1?3hF<)>^^TjK+>LwDKU%UK!Xx_4z#|CEnlTdaN9U!v7_NsF8A7U0^olUEOaxi$J zYUCq-LHyNTnQQk8dBU5TPMd#0zM<{U_!{B)s0?D0+h(d%wNk@qm$QE4Zdz2>c6Hl% z-p!xlOi4z%_W>=$nM@F~2(@RLafB^&c2LwPby3dZbRfj#K1iT(8X%5iKzrGl-)z z(LbiB7V(MVdwx%2HM_CqgN4C`2V{s?8v&;i8FW%?Bj$Fbr&+hX6_)yUKlDPxP~9h@ zlTbP3Jao-FaA48KL}V5S6)^Q>eJj&Ghpt<5at0o#8VOveGfeC{?HBwTTBF+KOvaS& zY1qQln>DM))l%2Eg!X3)jqp5FuWOox1B~gyUEeAzMQ(|lR&N@rl1QU~u=zgDAM|1D z@hm~^dl|#qiDkUYdNGVKOiO6B+cAjitVbGi$n?zcq!L5bh|GXki2pn(%0kZ*yKQxW zIjc0A`J_f!BJmkxgQfw*WiOs3>Gc>#%KP4B4LY2lp2DcyN!Bnz?`N@4GWMzUTH#H- zQVMW~`eML}_?5;Eb)YUyGB6u>>M#MiNJti#4TFqD`S%lwNw^Mh8}+x$xkUW~0}7?D zqkh@g%4z;Iereaaqwfrpye^gD-TaN%sY@qvhCN#n=MWu6yP+v^$W};?@Z%8Dzl#*~ z`ek^jW@UIooeCPNix@?Q#hLD;y-4kpFkSPNA*0E?ANS*$G$u<65ol=~-#V+?jrVh` zmhq9pp(kFRj&>hcIR6sn@361KQr}3?&#Q+c${ZDO;yFbECdCv7KQZNcN|gQ9bduv0Yz5xZH*9Wk=CD$zTpXAs zhRt&KrKSop!v@)TVK31hNB;)w+U23`t{C_<@kBWOG$7yX5Om7LayM($|WJp6an?1rI*6l!%Py^s#;;^5_0(6@Qfl3zHkx!xbbq#hn6Dz_tj_5*UpA+Hv|O!L zvU9THyHn?(^g)sGtgbLtjSzpo_i z4oH$b;>j#v-EQ}S0~Ji!b>DJ>{w%Ft`a0=X(Kod7nGD9W5AS$6!>${5ykF8n|lRyFw&NakKaMF~` z=%FAJDZH{(J}L#>2V)e$#C8LZdj~?gf5#U?RfhpXCNyg6V(y~^hnOI@Z}hKv37K;|RG{{<{% z&H^-*fN`F%E@0%qq?ze{_4vJN1X~5HKSb9PwRqyjr{IYRT^JmL?HBzEGNsO>|58t{ z0M$86JH`1Q%`tPOpotg2a|gT(AnW<%eSN}d0J!OT<;q=WSRuzow$kNp!Pes9T>09i zF1S2Owcr|Iw%$M;LqkNQnk<2LrYqscPgP4Uak=fAd1*Q_@$=e_ff1#b<<#wrU~F10Bjl|61Vc-SWY#Z4B~aN~Z*mIUoMYBx&XZdsc=LV7BKEu#_~w!8Z( zCGZOCrOLD|Jgmp2(FPK!c0_PT=pFQWZl@p6Dm0cLetCnN>f|R-Plb0N&{wP-dF*EpZ8(mqV%K3#DNm;}{%#+una#9CvJCp0FCy}fb5v9w zwCAjWpHd5ISpsSorL0-sHG=oBSQlhRd$BuzUEECrXLqapj)dU96}?a$8>(xzWYyMQ zd&5a=+@7)!OKCLQAcbQWC)UFrJHO$D1Y1H@(h#8F!+%caTiQ$h>at%*68XULPRx;H zg(JzC1hW4x_~UnWEbDlpwA@rVCw|OVe`NRhKhN-w7C#NxLb`zH2kt>|O_S!!Ba|Z7 zB?snsU@ur%;W1Dy&-*F-n8TIM)%siZV311rw0oyWfu`n~y9CTUnnX+0K!ofOjmz@` z(f67O2{XfZVLIsF$Wn>% z(NEGqCPWADPV}%PwtOWBTK2l06&vQI$xMTptic^4hR4<*PXf{>| z8{e^U2K+pV6F*^q-Tz>`Xm#sHG%mU*b#gH;dQh%{mnBOXnBpNi6@9RmWqNf5DI>sS z?gTnfKJH<AC!sIj-$o6MbG+7$8qhSVr%oTwSfaU)x|KzYtEs-Zz(||TG!J4&^ z4{mv#(n4tFt#nCctg~b#;4uti<#TA+N!`g{QY+QGC+>&r@$~`2-c=!oj7sK<@e@^N z+7wQzTG`e4C~yzSk#4%hyR^iOyKS3_`jHouH$Hv7l1WqHt<{qu?EW*Z-)Lw^uk?11 zdhc}yEuqh&;*b{VkpqPS!C_5srFh#-)SqnXDhKc)q)CA4x^mTYI(d1Fb$}cr=4^6K z$~pX#P+g$uRKq;0cbm9*WZ`Z-MitvqLCCR@z2}Arn-gVBf8{BYKjty{9+(Y9))qHL zVFF2ZCwj_NUFZ{HOg!HQg}ufBaUy4QXl<1{uuvvDHh7ci7?Dn2`;Fw#TeqvSFN+cp z7~aL~6btB=!oV!{?uIZ^ENj-oF_D}fns)jv3lk0FfKKUA=dCZKsXQ)AcP)ovKRN;- zbB9hJivgd((7;}vsZJ9R$5FhrNA`=m{_lQ+Jm9~awVE{_y<26wo%Z1wu_8!QX%jZ5 zg1mN!ji|Mu@08fjtHKefMkrpj-5I12mX8|UNpprtUHiSim52JiQ#Uh@p0!hVNA$tR zrTVIP-x9m7NcjhF=&o}&|4d;A(azVmbqtW{OoTr#_p#-7i(}>-b-XjMAg?#|4vWep zSyVfb(;`YbGDVx~$g5+NHH-V`LzITVtzTg%w=6H1w3(E*;879IR&RgKLrWGtpQn#} zMZfCp*5*y**dB&(?}=nA%IEi-nUlgZEib{Y1V+SP1`m>oAFmaf!|Ic$Bbuv=j-+;r zXvA<5GSXe24On<9z%5&WPDznmsctWZDg(t)BuJcl20T(I5-?nW^?0yg$Be4K%;f~l zR3bVLtrnyeD+WS;(F1HXo?dR8)~*v#jhMSBcy`Y-hxiaWfbSOv3@)yKDJZ+0>DT#f z_x1}&=mL0T&lFL{X})SeiX6^-o(>+S9$$C_=K;-nB3_*?+L4=EsKlrw*bT3cpQU{xk5y#?l6ZX#ZMnQZ4;3+7U$W&;syvk2=F-fNr1%m^geVoeuTZ z`SFEIH0-8Waif=@YAQl;ue5AkNQEqD!PpjO$@?^aQQDG*BCk@Zd%xyHAUfsA(U{Pn z9XF+{*&{vAH*#Bfs=l>)Ki?Nf;kAI(_(r_22!qkCL7VwZRU5JVF^&)KLuPoS{;X~r zQBfv1nB>|0byF$c5sz~>w~_)zvv{5%mq(B8{q`iyB~+PJ1=!*lCYG&12fc&)1Kp7ZoeB_CidQnK5TQmgolSIenLSZ z3-QL|>4)* z7g=c<@c_)#3Q-AP7A`u;GV^5eMn0zx^Q%mtr$E=WS|YAt+s{25g1Di+x--;KGA5Nupvw=9b5PvM8~6d3vKEEq4~iC&ida5=A=kxeYftP33C zVXIvfwc&ET&03f~fi??Sy4qjWWkx_Lx&J3+5c5P0=g>#hy8k6}bg2F_pQ`(Xs7{RW zEXaaSvj_WxtGR1UJ~5bvhH@RYrKgme3w1#K@sWR~gR{MHGeB^Q5IQlT!V5%Q_i+~$ z>LCqAm1o& z<1zzJgfj)YiovlU(-+PEm=1(ZJUtW$iEucYUa8x9mY~H)_31KhC!*dSEO!9Et{9d( zegMvZ*foVstfB7Uj!m%&d(5PO!&97dXTUgLYw&LsD6EQKe}O=r&S(j{+k-6Ch+_sr z9U+7+t_e16SnC}!MQP_cS+iU@4xpxcCS0M)G!)PG$1yyKmT8H_^NLFnKN#h80rA z^NeSpeT9AyK*OFPxr5x*i5?z>)GMM!X&{OFj||Rk8Jm;Wnyb2*iVBA03RZopoBv?= zqg%@rwXd$6ZpfXAvA!%ABOx#FRQuEA=o)Mi)&xf{J6jjRCQqRl2aRFL!8Bqv^9M+l z!!Zs!si5Q+^i{jZnN1@|HOz;Ue*+@$X;!+P9RdCYmwiuV@W&z!Wr|i9Z6)iokG|MlqgHeuzqk)8vEMd=0IkF~>=fejs{P>uCOb$hAo5KKmV_r7K9%O?qcGi8ScPs@qjo_Xb~x z;PtPl|N0OyXnpZf6zo+wlx$rM^iqZ8AOtOYVPY zm@EeLod#&g@-l7Yaa(B;kVR+;5Y?kuvB1sH!Wl-tzf++ynZNdpq{eS0V;!aSe0ToM zn$ORG%S#Jn5ve5C2MGyAL@Oqi?aZNjHMxd&3_&o0DW3_zILDk7sr;ltcGWIO{ z{LwaNd4|O%fh5Yj+-Mqu+eY9RX$EMo)rjUVZH#7rS>B(vq1T-LOg~US_SxepaD4Go zef>qIyaqkZ-pkqWQme1uhpwRFx03+`xinfAosVq(XCRrs6Oz1=&{-vpE>cy!C!>L^ z8ojufT=Tzh4kpmLU~K_p5GomXKrQ#*Cn3)H4ZTmVL$tWyi0e5Mzes%zzs*NC@uMg; zjGYWFj_B~x$u#;l5F-E9xdqZ}kU}lNvbuFV#Wc_Y)`qDXj)#?RWJIUex4;s!Q_1m> z-w#429&;CG%KDLSu^r-toUYiYUe5uWV}df@kVZO%Ry9G?KUet1-7h+-|2Q_ z0c)ku&GV=d(l&PWm{g9Sr=~Z<3?ivVY+f#6$!PwjzB&&4-W5Y4h{_q;gmU{6?i4FV zU_JDmOmAMDS*2SFCS)F5n*px!#x2+xriBF7i@p@Tf7pyYqHFWh<~?8)cN0*^Uu;Qz zpY{{70Ly~%!n!S7v@+-tvVm z|LV4=8i4iA%bo*Yu8FrgfHZ36FuDe6p9P9774g~SLB{UC#>eCN7W7)G)*uMq_r1Hs zR&#MAs$mt5TV&64+Da`?N4ohsjk1=AA9{V9(52Tjp&g8M$9s{P-lC<7>*1COCrMbm z8D9Esi1629!HZoln$`rDa9!P_01k0WTyOuUOdJZ9lMN_OP8lk)57k;|5$;%Uv3#va z6uibb)%Wz3dS!S!D|Y+>x9zX&tBwZ)_r2qAyw9CJx@bi_#uxd(0=FFWC?V2*Z@ilB zb^%bLf}-8A_g;L9LvwIB#=YBT8)iE{atu298hUgB{oLn66#V-f%KGMg6zcWvb&)|WpEURQ!D~j2za0a{M_;$>H-lHwpV;~f*sa&v z?w>bDKj~LHYq(tdWzv7}%Pa6%Ewg5UPh>erN&nmbziJ`qLmv@2H>~4}+@`=`C@o2b zcgpx`f?PTh`*^Lavgh1TShY)v;)iS}=4;FFzER zY>`G@g_ch}aUl{W`d#@0@OjLAaYt{()~?yh3ivF+;Y(4UNL~I- znyq;^VxUjjeIhyAL6!g7e9br%oZ}mE;EtiwDqwbs-l`xk2vRNPofyVU7dVkH6h(@o zjMoWK37a6w+nld}IVgH-dPJZKyZckvkiV~%y*{HiPMq9x{2fwTB)7zM#~{;D$?El# zhtn6#S%%fzcr;N>Deux^<#_M5l`M)_mJ%Ct8rG_pe8$t!T+Zn{a6ca(`bu^Zkg65o z;9EA4nZ*AGyRwcd@KIbaSG+1t#sng}TL^1vCUrAgi+KO|lu;l5pqeNS+sM+}H5TGH ztA54st*@Df#4wWY#?!%9`^LIM*WI&)KGECLNkgB*zN6JRB`GZ_oDhrG3>ku}i7wjO zK=+~ScE6MaYFkPG1?zgFR_@gO=dea=ub`~K4h3Nn)-)%3&AWkA*#E*XVy*E`{;AuL zG67p&IN@@bM{WvRBTFEOdLRIX+N9DhL9hKoc+;a6=Y?+vW= z>MWR5>}C!>`#2mI4FeDJ=KWT_PxB6>9m6skM-+9>L@emPte)&wPqwGylQ3u|Q1h%* zJ(A+|VLqwj=YU*ZFA_k8GFOGlcGyA&-o%;W%Hjtu%)nm&RsEEH59m*g&RHxbfowz! zG5v&HTlubjEmYdWQWUv%W%C_<$~>)Qr?}r#3;Wqew(VC|ymeS?xgqU|EytbFaRIS} zc}DAvFx+gce@2KrH`|8%ZgojooIjkNZ(M)QH4i(Fq7}B()RWs(?Jt5wAm8lCFV<)! z*8Pqk7?``>Axk}41_!h2o6Nigy9%>#FfnWQ;z++?7@a=_H-U;@zB}@?II)qb<9-~! z`wnO=xO6`{-{*$qzJDZo{PMTC3lYrAsg;NKhvGavAA@w2jNkY6!y0?7&u(x}}#gR*rQrKqL}R*xySJ zN?zEI9+zNiV`^g0YU%rM8Ou`A6h^eAt5gmC#tR{pw`so5?+TXKoTzNfqU z1(!xX%SE$GS}wm%6?h**Bi6FcOuAz*51+l$kAD1!yl#iL8;IM0(G`sm?K_#9HJ zB)8(s2wS#(DV(OaMpe5(vD@_pW%DHMo-f!fUSUG-;9W0JRFhpq{~niA$j7z$)4Z8> zu6Hxb{0|`i64rfnYP4MCz;$)L_eXcDD4?C|Wu|0Bb*X2u*xi!5jE`?7`J&9PKPSV} zl&(~7%DB>}01U4~)dfwe!uzEzkIjbWpNi?R1+9oLFddL>%r}}*xO$_5fWS*&s1A-O z(=uM!ZuN7Tqd0a#U6C32=USE7_Y(UqM3}Ca$hwfwtsUA*1Jx{s>Km>c zl+H6Nqm@Paxl|5&sVk7vH0Km%t(trKa*LzyRQN^RDk&k}qudl|6$*G|)zLryaB zNnRA+X`QGJb(h<~lHci0?stD)364_Q-PIq2KXv~x_!;s=)CP9^^||EZ)c5ACMrVIQ zAk~)DUs!y8+BLwwYbV}PB=cemKGAyk^(vsJDH+}0ariVar5nh(0$WSNgwvpx+3#^) zxwAd1@*BB}&bX}7*v0R+F4w-+iaDYy3l!f_91NZrfoY-6v-Fj6!~^E*j^~wD&obs@ z=Hr!A{&dXmzpz;S+&VwR_Br>3GU~JEC*=09QEsSeOm5PUs5;)ghg70ZEgBUgq+DVJVD)53|9W-mo|afb~b3J@k7V< zn<852`g!bg3D39B@sUXkqJ2ZbG2##24sd*@$TrrmPHCzIF~V-O9!U~Z_@o;8vVPiZQg=(yq`TLk-%4_emuo~ zb%5pRA3yGc^-2IB42B;p38k7vzdF zcQkKi{Cq{ve~tbG-W36BRs*a)x#fJ^{z=!Odh-XAL1O6mk4?_% zId89bqWu;BkpDm{1Fu9!+Z$Dw>;sh zd+y?T1jgF#QMz5H;5HYt9^D>?PPiaQ`SlXe574Q&Ee}e{#BUuX;A?Iaw*4fUCRD^y zC92NI-T@_OWjK#pNYb&rTGWfN*|jxO${aOh47urKyRE`W1G}Yg$o^kcX?UxkrX@i3 z<5j$7jI(@x^eW@MOYf+|n)kG^_we2^y)mi69a0Kp+4Ns&-Bav}yx3IWlH3xVpzB-a zd|jni)E7d=?|yMy38Rwb)hcqdP2<0VyNi^HJj8;d8t%#`U+MnvnB}p#I@a9^)?moW z>?OH)L=&2`@zI8%25d2^ol+u4!4TEuEAN*Bzzv*VWqJW-X^(U+ch=J7)X=HM#{sSe z5IxS=vABY5d3nxjFg;Paa(sK+ZPsL!B4{PXXH+wYaf-1-x|MhPt1~j{HxQ5s+pd2X zovI~>NBXtX@AS@S#0sE!rH?xCdrG>pG~CEudhZ$+*fp$3_TL(N^h}hf^fovQp-3L6 zefasoqo!?^6kw@05rMuoi@;sE{flCx5ULHR-|~(oly4Pb&C0bdV}09y$5_MX&yclV zLcrz^3;J7@Wt*>Ry>1aeTA*xPY;=aLyTULA(T9S~v+_n^z2jS9=4-A%&^l6ML}8mW z^yaj9>%33-a1X`Y>%_&h^a3yJXWSg${@XSd4KQr=d)X;GF*s`k&{w?t_~r0SA%VcI+Pjb^*LuQW>${S`{OPFWj>tB2{-yp=;?GF{8sksP=dF~ z?6L9ayQ)Z@`!5Wvg&%$7Z~TNp*GnutW-H2o%|1l$_yv;VRhYds(Q2FXM$sQaE?4{5 zLhCKUGWHcRjP)vBwj1n!GGf=NgmvsoT+oWNoJ^scPuGj!=x3>j^KvZ3tblHDrp*WP z`^caRv#!?p+9*FA=_*+2%h0}Rrpx&qib89c?}@4du}>=hoI5zt4^&AXm~Ft7Gyf?b z9FAn<0AZW2%3J>V|G)A|$hNH;^Hx)#c^GK_PD4RRCLcDnj^;!;$)a-fYInyq>p? zL~?ZmYnA|x5~mIA|3jUrX6hwKjZtTM`>3)F(X#kk#)=7mAi8HLEjUQvx38NdxAUf9 zt+oad@2Zi$LJj+LOT@Thf`5Ues+(#q=ub%TiH5bZfz#=w$1=)ktNQ3(3{| zIIN!$2GNgkXfUMdXSoO^PemlI4guTO0IU=6kDU&ev~;@7n3k#~-z~>mdO)!RPsp0j zien4G>FPri9sT?cjxKk4PS6~UjOU82>~#IGb}scB4X#K>j46jYPFn?hx}-RX%#IVj z@>>Jy9>av9o_w`hkDuK7@q<-kCJU%_pL?)fOBQubmutm%3iaXVTi^>t9#-k@sV<3> z1wYk8g9{b<=HKSLE0cVAe|y2!k^l4bYopx-+;T>iPseM+S=B{))2|+~d{}>tzrOOT za=CRD88TD#*=hAH-P;@dA&of6uKdR?TyIogEai>%-v2;C>-zZhv$vLG3$xE>Mwf1) zm6wY`b?+(0|G5KR=(^!|=Jj7_2m8cFiUq1qzy(bM#5-LHIKN?+JPewzvh;t7AV=o{ zPd~|oubasopBOudp7?CQVjfYbm1LhmJzvR|I( zo$t^4@BO=z*_qAG?%aFsbFO2l1?Wd6bemRdD2MW2+}oWizdMvJp21iR_9E!Q5_k1| zOLD9+iNP3N*_?1KHw+qoSa8be8dEHBwpkFLx;@2^tP;aI6yxeotcqgrYj&NON{SKx znD3j6$@^2;mjSqh6(6*D+>{|~7Sy0>vvHV7-D9yUzrJ5nCeoJPn-e!Z$4Qw`dBc7y zN5C(Gv{DUWGnXG$zug5xh}GD<^!R@%i2PA*(0khct&-`aAlFZ8(#T+87ucf#!-V6# zxyKnjzj$^(a&J?FA%n5n&igWoa49##$XJ=~x>4DFB_%_6Gdy12 zH-xkiO&{o&ya+McxVsn!-7u_t)g>dOOcoN;R!@1_O4@3dR+aQR7#38cdHj3@2W)e> z4BkBzUAL5}3ypVYxw}I8v_{v99-%`j!hF)HrP}8vJZF#Y6%NIjfaEE}2Y!Ed9IzxQbM+MI zlXU<<<_L!D^G@KmGcp)h*QG5OE+A_2?-~JHfJq@zC?G82|J_D58eh7g5JNAHzc3po z5uV-rUCtn(Le_Cxt$nNWsj$~E!7&neJ9r08Y^8{_DoVXCg8oc2I<`q zLv)9B-yNIxPbc)t?@br`T5JRLCeUc)gXY7+!i#6Wh}$3KMa})ea1^4tY3s{_!&(O95$DD9 zL@GhN*|Z^cpS&xFiO_+sO87cWfc*ewT>jmAk{~;$M}rCIQhyG&W_K9(2=%-Z_iADM z4-(GTBRTs?T(#*4zuZ$qd! zQ4Gi@T*O`GAz;ixZ4VP5PZ$zXL>F7VlW9R>QlD<`oCb4kA%7BsE7S|o9ub^vJOE%m z$9I}HBh)4W^y#Iq)z%*2yROi=Y5#1*rF_J^jaQlwW`Ra!)}{JVo{9$*$w14_UOVHn z=m%T#D9YJ@gLWR8kOKrSbdGc!0i+Wc6^;eCQw&lWd=fasaH>AP@i5$RmR7GGqN@Ff z^N9E>I zGYNP!jWz|vZq&+Ge0!SA2~q+L0}nih66bBEr9H;}nKl1>7u3HXOFmV7EBZEa~8ai%EvPCNep-dPcpK*t6vdr%>u$cPN3<8dE;f9J5X`Hes9AEM|c-q65aEu z;1V+sVkcNmo{tCGmeqS9Bhi%d{I1{|S=z?9HqDOAE~~G<*UEuxA$o*7xLY@0#`W}! zFR-jT<6i~4MdDV8Ea(XOG$?W*`ihKzzn|XYR(Ud2YC?eswnCqaTuEc0xS^^64+QMz znIU*naciuSWmN}QmDX)N+f2y$L6=4t$ubkIx99#|jT=Y+6!5E>%_o)E?b*7@sm8+p0CfJ_5TTIG0oS7`$4F;NgiWTgw}P21lM=WgHO38rjCk<}JA5;P=HhrEm!OV?QiEzcpLT0LJJ&V{UPCw&q5$yG z2;VvTV&Kpv>E~1q0AF!NF{3Z#t|ElHCH*9MQo!kidAAm%WGOOpO#s2_DAwz07q4C- z9CiPFwH8YYF(lEuKJoobL3zbmUy66;*8Bg2AxNLriAGMp3FIObN=ZJVzWt?%_`Bzw zzr+p7BN-GhBK7a}R5VK{-K&NO zm_4KV-!7c>?O^C8^ZUx!^d3gH+XM26u$No!`k+FD6<3W8iju-rs+OZ1i>|7GSlBh7 zp#dOH+AE)h^i!UIOp?tCzsvqImooT~7`K?vr+ME0gv;C5kv}1V+gChX8IN%db8?vIpkINqCaf03x!m>zbcdUu<)nuLL*=NCa= zX&^a|PL9egp?LZZwFZ;(9vzhq8<>OMF71i$wctGvlHiFml_Ic-nSBnvR83ztzE~-xu%NVNVKwKzo;W>vUm6i$itD+S>M;ZwGE=N(KZ(gnhce})wdM+#*KN` z5bt-(-^4F+gaSha<`kK_?wF^ygp1ui1UN+ zVDUPGcgE2ErI06_2V5+^DUM>&s!|TG#A( z7tn8dWVL>-C|SW>Vuihz=igvnzJ)`%-AsW+raFFt&Ct=V#5V+<5FiKkWpG;JLbeZy zk!;^2xigm6_#H1J@rp9nlp?n*q@%P@&S+OW4OshLxrp8N;H`$*G=J|o<2b5<`=Cv{ znx4_G$mu6BTx+lm7y`T`z5rh&uf<4p@KFmgyT93r%+oq$#K?R|e`qYUCV0#X2Z}eV z((KDr(yQKlZ%zDiYgT=58SG8rOAaDAMgcFq$vRZ7n7xx(NDnf^3^;>gn4*s|hO2 zl@-zx^Va%(#SU!D!36Oz&ngr@sCAcgtJ}zhsHpuwpD>q95wB2pw#pTG~;D=~85~n}sRJbe@?DSkX ziHMpoP#8N$KDv4>@`l7ePAwxFpuEmosdImo+iU*qY*GKH9u}YuR?sXyzvzh{X!fYK za%Y5DB{N2JY9<5FC^^fJ0`b!D-GL6h_tBoab3DjQv*MHSxPKh|=q1i8L{IiaE8_M; zwWxrfXs8E2GOR;a3{3NmF&?uy1xhuZbC4%lT5E?bmCU5BOwiV(GbymuJkp zO0!)TYWxmi@oap#+}QF}moHWyoB2~t0sHeAspmfSw$E%Ii0sWbq`G{7#w@1j-Y+M1 zU)PGO12oLS11R0tsbjTiIi+vX?Lr95&J@6U>9&-h0Y7&#(eT>!X9#t#3vdCU4y)L+ z<9jI2M?t{J$GMCetE_%-gG)pXKJD>R!19gF@{>9_Si2vCw{~L}o%F&M!tYE7M1I+( zenss1<`7>zjNtAHoF-GFI3Z7YLm0`XkiE`Vw0{23^!3&0?l&bO<*5g=1O9l=^Q+%a zNj7jJqL*R+QK46q*jV%ot&f&BD(QE$!mG6bzE^V<2a1;*ttJDu;n<67!1>sjFpOf? zyP@^k*TX1+@X@0I$aeSwPY0XlpBzbKQIs{7Lzsae2!{z_PS-F)kAXwMst8Tifo(&W z=Irmz=$huzbK#>Zljx(ik4d}N@xJ2a6m@1{-k0@x$Uhb&Jn#3uJ^3}WM;ZJcZU@FJ zM9aEnF}+q~dqdLxwN4})l}5Mv>GMN72+s*!P0v#*m(KlqG%to#+LQX$nqFnfQjji8 zG~gn0ULPYKoq6)-eIS+3>;%g)v|L0IS_4D}cwjP=8XlUokc$hYrs}@|>j9S0E=9*& zDUREhXNYMOvYvOkG5TD13nqV+=QLah*cW7eWhM4V5WBOXW`x9cBI@S2QHwB(sXbzw z&J%)qU%ym_7w%{F-Yc>CO~##axdoL*0tIafPG7gFDZaPG9W+8Zo<*ln1ijz`%Bm;b zm%_Y$4W&LbfAM86te&xtCQO5+O7mq&en-z@%&^Q^|DyHs6Rdv|ZXNOKiHy|6O#N@$ zXeE74L{fa7;ANu-J|$kg+pNF9Onesn8(O>rqswqgw9Kf?_+sOeY)?W86$IuX{4S zyd>`sxmBXZVQ!uGLv&{}LoSgh7(UH(J&vOaC1Z|7RJe6Kt_*isxlE4|_9Djw)B}G% z_+l*wh7{(du84XhWqiB&7Ac3dx zi|L-vGujZ|00^9Yv030O%g?AgM!do7E-lcj((62tR^k6_Xb7$6-^z;2O}UY9wW-d} zJqW9wma|O}V*546L}uQn?E8Y@dd)8$u(B+?hxsId6&C6dk_wnpBGzG${KHL z_#hVzaR5EWc_qI~c}?UAIpr4{_3~P6Tut%G*&MzvRVgegmXX1or(k>v|*i_$<(rHU;{*3Vh~eg8$O8* zWeenaZx|noc9agDDl^KE<1zVB1H*u&JEUS~ovo_UeUi+%j}GKXpl1#Fe%rJ1_$BPz&c|E?cd`t{jNT$l3+JI)T+ z$dzxhIu5MJL#RG@T^J_13GERE1541#N#w%gr3TKaf<0fb zxa8n2rZpYbz=^^#_sWQrxVtn%ks;^OvGRFcp2f`3IEX=k|*w&8yQU4!d|D8G~hLp=jDzQHgM$ zIN2CwXeaCR_^^U$=Uby0Q%QhLdN!oB{ytBA)I#O?09m9 zG`aKMNngq3#!YqO%J&iO|J?lPucs8bQ4J=7EC^z6niThg4)y8|0@%f+nYc+Kf#kmR z`~A&|y6GIUfpvjTJXntnwhnT|HJiW=khVifbnC$(UOrxiJK6! z_XsfoRD3~U=JkyC3552RK{Y<@6dGn^Qktl%nLx;AC_?@`u{f@B-2C~yuzd3F2$Bw= zb__@(iGZ>sKu-5m+dwa$F_0o7E~Dxx`{B9VqAneH2L0db)6d$+eb^zxTEozHxSDJ_ zWA_U@<901G6qzd>7B!@bIPOy88FN< zJW`nzg4tyDty!nq0~7)OlI#ccFO3?P@Kc-6Wm3!^>wx3H1z_OZp~YJ_omc*9{I8wt zw~oIxo2TjF404?)w%GC6%GQ^tXPD{xLmBtW5XG+Rvv?7bZs)%^<^HOIAMXLbf}0$!oaH!A@W)@k>8zza z!zQN^03X-uJCAT3cito&hqVe$UP7N*+~t(>Y+8jds@={DIOq5Roi2AV{E!{ES_ArV zes?`QI%O#%%5Op1WTAfO?4P8udY-WB2^k|qD5d66EGCM6w}|16pw4^ThzfAZ)6bL8 zyvYXA9F^m7lLp#PX*=kmk9x+wJBe~n?XJBNB4{2!&p+^igf92ypZeD1YZM*77;tCk zGIYS}sO+Xv2Ygh(F;ni3njW{gOB=3Ds#r+y#=BDX4r6C0NL-eXdqtMztYTG zvI8EC`d#Iq+N1K+;8V$LY7&fEAT?n8PiLm-k-uq#-W$pbQ@SF^NNtp|T5y*KY36;~E2JHT+h$Y}a}i zOOo1?_BW8LeLg7#%S|2B?vJ=UUif2;R{{>8*O!7w#hD)*1x1=X1CFtK1-PI7HSdLZ zlpGF|*)23yX~_7X$29s$Jz8UV;7G1)D61zm2+cqHMUE;&GrbL)J42EFjP0}iNE)Xa zw@`+qmU#%6?w?C39%1!|?~fatPZ$5{L>XJgCM~CC&l~*7 z3`&OK+C#?=JVN};>IN7rXbSfjap-TOo()q2NN6qIN^@N|AG!%$zk`6P6Lb#%Egauj z#5jZ#_=>T-ob>ob1cH2CC)Ns35?9!1fDjZUbzBL^hc91Kr8$xE6YBN3aoHO9#9PbT z4J;%sQ*_;1Yd=)gdIN~#sx7>9^01!cc7EQUoQ_UScR-K?eynu?Vh@al9_r!+Eb$?_ z7}xKsXE3t!MPpgZx(x2u(IsUdyaVBfm%9a`>P+Dq@P^M&>1U%YpMk!=h4Iq~h_8Qt z<`Hb5Nkr3T{a}axicL&Fzd%E8E5J)eq^4|AxCaOJ`*Yt83cvU%p~<^xruQqu=c&KK zeo{q^71PSKMacne7RBv(0Q=311Bc98A3>zAPSPhP1d7$!8iC$a{W3DClyUNDbQ7$= zJ7zU7wk0`2b{0+W0UoF-R7Q2eH6W8yZtw|5cE?t3#6zz;g^>Xup(9ytE^a0An<`2= zxeoWAFFdBu8tA2uUfV3O5~_SaA#v1Zx8)XkFP-)sre{J-9Y{CG5}l`)_q|2Q!ai2T zkv6vL&68j0!i|(-7(CYH0Bc>5noI)<6zGQLkWbMxN zE$;ZQIcEHq!-FNCaoY2ijRUD4>x5nmQ^5<+MSzaVmsKaj-NV7>(7oZmCQ4XFBWfN8_DQQ2FddW^_h2oI3#wac#pEUQYjKVM*l!rQ53Fajb||i&ZBHjj3$>Mg zWSUaFH`ib%7+2XURD|aX!qhx=o;Dk@{&PUtZD?dB#sjWPxB|+=ZDkfZ09|l>Zriym zrZ379*GSKG*K0dFY$217BBY%>?NsUGbyt`NkRfifEdp<Ir_sigX0XJL%Cg1Ln&L+h= zQvk@P?*+IU7-dc!Atr-RlbhSJrt_EXe+lS3C~IWqu>}F$jcL*`sduJR)+MSTv?9T; zlGvdaivf*)e;F(Dxr}DZxCblCaxb77TxMQZwr|{Tr^7+e;LdDDDo#F%6(R0R?;{m9 z7kxq?g)K(Vm4Om>R&9!klU{OX5eQbl`&u)%b-%@djJ9yC`{!}ZW4k$h)KxVvR2E;Q z3iH~fw)&lX-)J+l?K#g;CB7twGB*I_b@AZmLypgoLo;92jTE#=ec!lqrM_A=IiYq` zmQJlHpS@DROLwK2c;jav4iND98~yklbo@F~I+O zhIhfOk5g~1(UPCjR_g99_BkNhB-$0GM~yL8k|R7_<*L*+ZINtu(7P8Q@7(L$v-ort z2z4Zd@z#UTR3L`I{@c*McaH?ge1^tuYQq`b$O%^S8wkMI|BSD^RdNywsRPzh;!{N5 zrH*u0E(;J^C5sglp_{b=H}8(x{-Z#?E>L!c;Rh+097a`y+q(_DF!;Llu{j=4VHaYc`e&8zL~m%fx}2M}Pkc+%a< zJA`M-s{M|!i?{Inq19B}t?NT+J^O11*7>rm?khO^`)b_YFaG{fgf{Nj7Eb)&_f}{| zyW^3>bP

1V}YE2cYj5C|Tn2eL+X@865W?z3|o@zJ+lbR7-4KJd0^r;=gm@2RuS z##^$2a~d-UJ&Shc&fE0L=lZ+u;`jL@>0br*aw#!E@*;7K%Z)1k9xj;j?N=+8^?dDQ zt4xHuQQ#d0TbGlf{s{%bSNr)TX0%k~VkNYv=ZD1X?j1l6C9`djRG$9yprLnES0mEi z-Hpc|KihtW=iOxt=~rLX-GctWlbz^ZQ|Z9)1(G9`vyLkp^1vjuwY1i$5IiM)(m}PT zBl@mNwbd~zoGhQi(u^In_Vlm#M2iaZx>|deZs!5UjeNhh(8^WuP6~L2=szjnL;ENp z`h2PP4j{hNA2l!aO?(>c*s7J5&BAh&%mdE5#0d1HhMs~i1ltyB*#m1lXWeBIk@{=W zLX9DWAWADVSN8X3geDDTQMoU!S?mBV(h53VAy67w|kDC7g4KbU3&Lmi&e#M}j_!b4ab^g)P>2 z-eYR=iuvPv2lcWK<~q(4Xo_JAGQL{}U0w58e+PmNe3yPVW4Zo?MUR0Ijpo}Q#W9o^ z9!7k7M}}RC$x~ZB?u#vmuM`}qZ09^9l_5+<4`rCrzFk<9=VUN9+bZAKq9a<3nN2bcP z6L92ERU0Mzrswld@BT&ev>z&x)8IJOCIUcASJa@|g|;sJ{D-+#PrRLvUl{K{6&zi| zl{j_v9NmiJa^AkfbtGja6L|2O%3^qRh%xTlq@+lam6QRNj4-CEz)NN+uo`a_ADUQ# zabqKLLceY~et3(`3s_{86j(CDrUTu1W+s{O-x|U>Wmcd@eY$tlV|| zXm7OMp-2t+tR+BZdF|vA=%_X*XRd+NvZph(Kaf$UEr5?)_s8Np9GGN!pM=XisF~a6 zfxHVov6g0uQi``T+yb2HFne|0Q1w?p!;ZZ;*`aR(M0urRmiOU;nI2k z;ad-7$XUb#+zQ-p-}u^l=sFFlp{P+Rhvn$s6u$kwcy+c_0t`)v^4zEHRY$|D;uZA2 zSwv-!Ol|@)@z*^|C0?p z(M^Jh1|x5K$s#6Jg11CkTMNum!Y$PTA8-X@oGKn~o;)c&brdlwPL!K3uumPhv>iq4 ztW7-gI8^5^pLkWSc+-3)m#~MvWE??4!PslPS)etqdISsk;Ot}J=UHc zRyC9qa~1p$B5quTc{BnQ^m!=2=w13CAZ=}^t^BExP;iHm>>|E>6;@@XB2#zwOx5}t zwBqUHlVDOGEsN{miu6wDof_rixjP(*k7|ao_r1?J_Kxo|Jv&`q+vQ#WSTFDym4g-LfZfOCWqy3Q|lXv=v ztz;3PfY}(+MA`inZtK5*;PPAL{R)fejDKs6=d2f^5++?lM{-Uol$d14kUZ=DVx7j} z{q?@*y2UzuIX#wq428cye>_-ogSt&!XcNNUzr`Z`q7V!vboThO3`?()xA{Pl+H|Tz zRf~qZ{PnA$FKPU54(<6 z;CcPYeKF?sCK+4+>W@5`)k5%>yvSe9i!DRMm(li$`+6_4ZYnB4{_y7-CI*jZ)91xAn>v|)8cjW6Y0PbAjAb}eqk zvahktTTT0N2(q8rz9z-48Ay#sm~+(IPfeIN%6(xNxpSZ1CzkUL{JW*{UB7)|xp=++ zY~>A$+{IM+?D2G#%OLIH^LzlzjGSm(GS*wWH8ogo|2HYdT+^$|!^YES62}KsqO_+B z=5@kNV9*6eM7`?dZ>dHuzcXRa#FK6J8_1#~^4;mYjE9`?cYMEKk$*SG5=pD4a#Mqz zRcwZCPsRRng-OwUUuPk2t#-jOBSO4f6m(*KO=h>PXAhA-uw|QD;U}Q2eYh}XWKNTm z^6&r#bjV%H^xd_P;RN6JR?Q2H z%&Bu}f28vI?RAgT2o$AxmCaUseum{bcUlbpu*TMY0$EMLmf9)AhdJ=7kBzp4D_)f= z{vavhNSaGc1Q}+YyBomCh=6O7Qt^3`Twm6-UQNC5yb!+x^6{c5o5+UGZftig7~V88 zw6+|+XINcun)q$ZFOzR?DF4fI`>*&RPp@rair{*uNBdFSljkf@7g-yqs~WpIe5giZ z5bd~d&oXT0z;F2`o`Ku%ZEr>RRDL@Xe6J$xPQWTSWv!luxqdWDwlU>>E3ty@la!4& z72?)s?q`nBEY^`_>TYMHexv~2c680{6X+XnoD(uWejxnxJLcoe6EuTpk}QEMr4ZOS zO;B?)Lz(`g8Thm4BTSDi)QaDCZ$9XXIx{wAjBBPJ1#9}yJhbdk$_?J|`+E6=C>ghe zZxxTuh&r#LYb6|Mf~hz%Rwdi`51Ocud01X!8TZ90BPFr!k0W#Pc;m=DMm$39LtF-$NdSQ=0I|fLV7VayHCc&i*t?*ND=S^tfzQfrrQl zrDT_bi~4C+Td|K>*K5K}P8<*wCBffP^k{Y*IhM8x_AWVRe4+yU3 z@qTadr_Zhbv9Qn_G>EYc;Pu~yeSuNIkfyRkPC(tEDcl_w;wY$Vd#u$** zlb}Sa0bk>kQxgQ0`W8N5CbLt*8qZYl@88(}gU0NT`PJ#p%HKMcZBtaj9VN{V=*JVj z&glvuV!6b#`3IoFE9l0Ies_~3C0Z_FKAY+%DCH{ z_P+z&hYI&0Y(4L=3Dd`B7!eNW+r8jiY|puoXi7`^$m^NmM}uD&f10T1M5e@oaWY6Z zVQ~nVVm5pgv$}JNd0#=v&}J;<*!$558mbhrHf*4q)U)!yl433>OqWON+i^!n+w-d< z><>ZBml^Qv2sQ)X!Ors8+N6{s)aLWeo2iqQK~K0^A(qKrpf9;StC8_Ds26CBFL|%B zDRL58NP2X@ETloBsoNOVU8T**6l9Q5u?|*gNAAY&EOAitVoW?iu6SyXX|&H)cyQCF zu@pMo^9`LdU|_wmTg6kB@63BLoFU(x{}yZcX%frPZ}4Ym)%tU-4#aTK=z%yAks~VJ zaYZ?O4|Z>#XtH`pY^5x_vnh#ar}8(Uwy+10qOQqKjz-wbp&zXQ76Frc9oBumuVjUc zeXelu*G=h7-(&_SE-m4C82~BNzjW^jz#d!xJ3=CR2pI~1M|!mKufNqt0HPK!0nmK@ zz~leV0&oGYT1TKmMpYRt<{zwlH~Iyy;?!l)5~_F{!Y#`TIu_?;)++wJ%x(RfC||-a zof{#x1Klh;>&d~GzqjI}iU>X`?NaU&-}{~@9L2C4AMVhRM)O30*-~x&UnS}H(GIA# zbv?1%Taa<}4K6t2_UaUKCyO_v_<0My-^99@T^_{$N6qIOxPe6!HEH@=jobJlI-g0_&={S0ygK#Q-e5Sh~kt$kXVwh;f zFlU&Dg-QaR8pOULfwER1Xgvo(Sh3mEON&yx_>pQg`X-a>z^})-{RWEdYR##hmNWsQ zMx|#TFhpE$QgXtBo#?duq8OoIX1^>VMCgAQiZVSkF~w9*Chi66sAAxt!lnSB_sci* zYARv3sP{vxr$2E8@m6$RJelu&zHx^EPObW*A+~gDrK%G;KcsAP%}3+zA;-31SyWE) zsK}+!GSXtNYLOcLtA^BHvEGILK-o6wGwzgfkFE8O zOonR0X>)asV%nV2l;BXv_!2xf(9F5nhTV0A5=fA}y4Q>z1=nrigi^ptseZ;24dz_j zd|n#8(1T9UQZWzm*!x|tQ{%vZ#Hb!ePV}Nfh6Mv$KnS#2Vvq!H;(bxVuG(cEh$_-6 zD3(HrI@YV5;l5rMHjo6_`$Dn(NuaQkcZD8pZ)a|M|2o^7=Q8h167 zs9MX2-#iB$*p+{QS@))zy@r-UIgzXc7&%xQ)@?dD_pvSu`M~idtYab%S;!&t#nYF1 z_TwisjBPpdf#vtM)z}0b3hKehuT!K9%=*!L#U~d(G{p>iBn9C`AomQlgXCWc@>ie^ zwulZj8A6-^yP3 zkB8vx)aBvYTg&%iHCke>(P4(c?U&a}e+dhnUqN9X;(22Di86nJ++n~$7}}c@VK-pb zd||<@33j{dywViuVO5Q8#;9km+wC7Y7(!hME0qNbuDQf1LuwWBn2MoVN@CS8y@#Px zXhH9FK1u4(bqto?Gk!$s-}vKv@sH8A$|_e}Yc4JzA-}cbrGI`4jcD!40Y!HNKA|7W zmkhYCLrU_5=(##cvonG2M)(V|VBI<+=|E|kbc-} z-&fpJm-Vqc(mFQx;CqCBQ!I19v4hL=)<-T}Qi@+#!V5sp02dgZ0lv9Ze5IC^uD#~E zo2k{1*DetgKQOi^`W)-o8&i1SiR@reGMU zTpI0WL5#uszvx>Po4GPaH=K61g?`pV4hi6xsjvn6x~SavPoW2kOzI4 z^uWBvtYvAzXu8s}0E3}l6x-`MsU({{n*%BhLLQO@v3NL5DNm z#BRJek zbvpdcW{-U5vOj+{c7V&%~Q;vt~-@_u-M)m%x&(btc6xx6nL%m;| z*|(56$@o3It!owyByLk#`FJqP>zK4Mk;!}7B@-Ub`$Q_cKJ0LdchTyWRF^Xm3E_)E zd0dMR79z|aEt&+Efeo;*ENuHJN)Jt)Veyy5124jxkp)OcK>G8Drmmrb?b~OFo4mxR zLS7IdB9=WU;TSOfw>W*+R$Fi`-AyNO-BauMqq$Mw(4kK#zwgmR-?nFmmn-rozC4iQ z4}24R5XJ29F#A*eTAi%dE#Sm;EiTF``IJxo{m)w>W^ay8gb$H#xI+osfcBVebHJ^R zD%wj;-@#!j3C3z*2=(#A*<3=Ew)!^Vu;(Dfr@6T?k7{8E3A0=;-p4<- zAZHkP78KB5nVqMXoT*+yP>vkp9yUsa&L(QgU2&5PmZui+IB)*pignw)7oyYa{}Xxx z*hWI(bf$R2jcYHulUmAsZdvqxJ-@WQ_mx8JzIO(Iz8=KzS{rAH$c-q-R67xQ1vvd% zcHDejV(;8}P+2ZhcdO`8KSW7Dc@sK_lpp|h=Lyz;@cd{~Jzu*HYdt}lAWYFucR37u zN6_4c3PEo_Um$V+F|#IK^b1>iDf|uSDZk!b_(|&oah%3gv`;^}qyo1ti46FmRSsBl zLXEEDUVj*Yw@~AxSJKIr<%U(It znPM$1m?~dr8`MPNebd{(yTgZf+N~PQV`wH;R@>SD*|aoO1|atV+0l;rf4b%gzDmv1 zf>!wkYez41SW<0>r$9|$>7EZ}-V%AUH9_c_>Y;g$xi#&o8JUM)vj`4HNpvjDj5p32 zzfK5q@~mheCiQ&_g|F}3|5{rl7@IWm?J4Q2zwCZnMHz(-(jd_n;!`6yd{*JRbLKBV3$$aH$V&4(8as&RjM@$ZRH^`1$sxlDGodFx~nc^ z=;sAHl~ew?yx0fX&+!H4jJ%+iI^gzPTKvd3bo@sj$eFJ8C*_Ut#=N-qLw?qlrTUqh zH>iDjli<(y0K`|@{!UVS9sV5-y(&}HQCt(KfTt@gpgdhaK@eH0{;8r%I?!{OK-1r_ zX?RDL91YT!?7Cah{6B(lE~h9N75((%jv|HgQsE~VE=GC&e~fPuNb%S~V?f5>sOn9J zRy^cuyjhfYC&@-mXlo`xoiIatq-^Vpa~glJNc!}{^(9YXWxM4Rn}M^vE)N%2ob6zD zmWJ=X2=g;b#-0y%g_!}jsLkklhoSp;1s?D4t`+VpZJs07$S8u#?2qQhF00vxbNAjd z$e|_H2MzN3LJgOjp4Xbx+4B=;&)l$yJ5RiK4Sv(b3kb{}DN?&y#IsYN*OilS=y>)0 z&Dd+p*MeC!jxCeXZ_MYPK6H!S^U?rHas?6eu2Uq)#AjrMOLiqc#Ggfz6NrK>;wvBF zr~3^1!iRLo3(DtOK%Zv%me`8;hhasIb?PF$P2fKv#_@XlneY3Iw9Qk z@9M%KebfGn7|sH>bBGVIsvJUOFkKJtMIIGffF@jNyrp*;(C-ssQ3nOO3MzPysXG&i zV54zDsR1Nq+-I=_WBR21wf$dEH>$4>W!dgu-*&p~@K5iKJLvin755t!EVxAMOD$VVFyszFv{*R z-OI@qK_9IaX}%PU?fZK6WLIa-LHEH%9kuB) zag-qTMr2oHmVyTr{+WVg61bik%mo#zY|2(Gn7$X|3|d2esC0kwzw@;(mUA8b*(yThGqd=x3=8DRr$H&O$zd8duV z5i~G*^7RBI&6ve*x7kb-6OE0V@ux7|7$_e`uRctDD%10@1o8SRS`e>B&-IBIO`TMH z9sgRwF>?BBvnC}KaPQ13l3f4tNdl8~_kqJz-XSBm90?GiD)HoIF?;FkaA3f4o?b-t z*vD*YNZ<>Jv{eI9pXHO;W7s-Oc}yRr68GZjo66HF^n{8h{G>qC{1zZnjo{64^+wom96EI^F$hZ>}@YXqwncR{p(-v1{tQI$r za0~p+J?Nh4)8rd>lo$X)tLL8l4MftR!6nmGLIAZg@Dg+DwiE6=Ky~P!UbtXo4`ln` zbqxCzJb{lftYqdpMV9iss&==`37euQbCgKG5y?qVkAMBG_;pCdEy)colh({0aplew z`2g=Nr8sr(B<9FC?r=?=c`qC+!SBHWZioP(s+fJf*IXQZtA~Q0mXZc=o<<5)$o3mj z3)MobwVQuP<=erL^dDCn_x_oSEhnoSuc_PIIREIj`lE&c?@%WGHuUWTCbNwT!_#sa zqCRE7#22>Q(bSeg8|F;Uiy3SdXjKLxVjyWMni*LNobv2!Pf=M?=ggNMM;AV7srCze zG*VmsO+)w6SKHzaB7t+lQanE57zhoA4M-!M^>HyQ5AWLLM+Y8F*kD$7^S#<${=AJ7 zGjcz&_2c@9S0CBCJ|+>!tH3|I`StLq$$xLg#2eSzbCjzZ>&Wql@If*EQgGvGJd(yy zoM7J9H=z3PofuoQWpK?cF=84;whaXwv(yIjlkUt79p4wzuHJT$*cS@PxC~eLw3RV; z$m7BJUZ1SVFN;AV2E?An{AWcfp82O6u=I~+5X$2=AVGD$TRxMj64Ut>;&`S0M=P^% z{Y*-+p?vsu1z0I0CdRN0q?*^df`JN=AC$q&q81L(U*rwGPK;#={CL_s23X+s2ng0m zm%0;Y4ACV~&%me90JP+nRL3XunufV!ux{J7~%@L^>xR9l4FOF{|w zysuD%m?jw9r5CZ=sE=-pvSaBA-1k`;oW_U7%gi+R0 zL$eAa2q7E;<9RI9Oh_1zX3&1*)adj#dUS6{@uXchALh)r(4Bk#+LRwZS$&rO;}Pp+ z;cy#1mvBdx^ZB*YV9M?4^~JXR@R5V)F}0Jdyz32Zj1YZJV2t|ob81)fk{6qTGGE}f zPIlF!LT*Yr%4@Ts0YMh0Zyg^5buvRy`KnY83^2oizT?*NxIQ(?KPNqFDzv#djwU6= zS=VVd4?aO0FD0HJ2YWQSA5v-$y#6IpN{^dg5w#&G;bMpurQB<7p^P2F8<0#w&C!o< zcePddxm^!mUy(s?>B$~Gdb&|}YL~qIJZQItUhSDFthDAHx=Xypc zFj&n2`wWI683rga`@uehuwdQX@-eNy)5H zlYMT>-yRt`4^Tnkw%p$QJY6ZV#j!oK(AUZRQ}Lr357m@faShx`R?(Pj$6gHVtS?RY ze+D{3gVu{CiVBFB#}pxD1Bdbre!w_Rz58IQm5=-u9~m!HMS} zDg^-IYjaHJzF{T14Z)P;+4BqDcC#ySOWNEa#cz}w zkZ*N`G?mJra@l~DYgz{uA>N~YU!~TAew3VkoUWs(yX_$ICm?GGL}12&12?zJRd6j)CVn{* z>%VfcTn7j52^fF5bsM$?y{eugyQ6*e*scg#CH8GG9tsA2iOhuG(E6Yy9@Nra0Md7z z{AtYfRz!cZxuocWmwhmD_DY&7>aVB*pfrQLsf<+yi~m2G&MF|Pw(Y`nmr~Lo-5mll zlr#dO@DdV&q=2NL14AQ7cZal;(mjNNbc5s!T_QsdFf;p~@8Cb&XM3MK&%M`L_qx34 zFyw6*<IWW0xN(L&`Yiwpv8j!b?x<_YGLC{wb703iFe6$D9(9aVtdZU$j2UOXL$r zCIw&N84S&MIeb(ec5t4#5-hC zu0nhz-}9Ey-I*6Y!w_juerbjE6AWJx+u=XROVd4cPTyH(bJR$ewmtLGme)Qk zqh@KeHfc`?rSh~(r_;}sUIo;>EzXDRc2ScK$_J_vK3*bBmVM!&*Yv}XCoaq;@W18I zD6bpa7?TS9223C8Ub?-|cT&`GRD@A}g(~tF)zRH^7GD!ht`hSwV5W4sGt!CWa4O>R zu>>*O3P72w-WM4(;4)^$|1}Qe?Z}DlK|)%6TENtzwnH(cAcqX|Ws~1_`;gW!M8iq`UXk}?YJaZ_|^RFp8r&C(2zt6hjviE+E!BZ6?1?>k~#7*!YtRnO6O- zMFcwg)!7eZ%m6Iz%2&Z4Iu&#F19U(xI`$G#%0>qALt8{h%6kwIrlrg>RaiHHNJ8+y zlUddcRsjdKNX8gMy=AfQ@WSFf2+w-`y@!ZU$$dOa$xm%B9Q8lQb18XGTKLLrG64|Iq#j zaq}GdqQS03_V|z77igk)HnTGU;oe(Y!Gw7nbS%UVL1}1jh=(UHyPHcHK0+l*`pmvq zppbyC57ck-u4TY$sk2)7T!)HLWiDYlvp1CdSUFf8QF#oe9FvTUlDzmUefzQ|xdL;@ zoq2{rTm$@H;4r`HnQT__1YH{y2}&{qET2$f_wG>d1d=O|(D#Y%w#f3J*(dp-gO3Z; zri>0(m{lCD>p(;fVF`RAfJ}uRFVqPd?N5!R@)Z=#AV7yOHcT}&``znJd9PVj@)W?q z8~ux8vmq?tDhcE@^YPAa_!(X8k~K@r8?iS6FG$>_WV$Viu0g$`sFd%;s>lwGu~=EU zEAF=pT=bn0KE_Xu2C9-K?lzvQ?^e=70Afs35o<}-R6f@%E#vii{_1fgKM@8Puych>| zZD`##hcqExz^B`y6XVD?#SjG~1E6#Q;ZPAIEX3^(zB_)8|1|vqjM*01OaeXsR!q== z=RoM5vkpOCPz(Av-y5AjsD9Lmxw|-pV9(!wvxb6X3A+D4LiDf*5Qx0;F zuRwOpx$Gm;$Eo`rC*oPF1)eA3Cn=KW?Lvj8`3v;_8VBF6iqaFF zsEGJ`c=~&}KGZH>2laFIdwOP$>$U{H-Uz6&YQA1Dax}Coy4)Jx|8QfL4CeGRhon-~ z0h#Ui=<{)ey3@l-M~i?<}v)< z5g*cD?rzXue*;b-BejNWz>|=EkH*_VJf$A@1lGTifQ{y@0KrHl(M(1+_>cf1{-H9G zUq6RIKAiS3hjer$gf&}GFN=@#TuqnN!e!;}!4Gn`($%`v6OF7lg~ak%D(@etBURuG`(Y#6EDx`df$Id-RCr2{)+s;r}7dZpJZ^f$9?=K#y^ilTE+g2Yz^uEBW;hvVIECjeX3cAF zp>AEeW{sP)`imCi+{t`P{Z;WRfy(DaRNd9yKSxWiAsEzcx#fZoV)H!Q%zPv0Q16y3U~81Iv)d{8VV>U&(wSpbzyBwVDBF@K)O1 z_gjzT$_hhtI^=~1c;+|0W7#Xr%80N8u!1l1xSUFbhjqKl55MCL(t+H@yGb0&boBjIz>)g)n{0^7K-r@cFvwug5Rl7U#!lsGO1FC;8j!nF{!c-X732v+*gc z!xzJj@>9VsME1$&`DxxOmrQ*uDuXz%Re%9N-CpR*xrY-n zw{~!1L}1)M1;B)Jrc|0>r7uj)Q1o9EoKut_t*47<)9Ye->hbX?b||q9>Pu?W+NQQ7 zC|+Mh^ynneQ5K$^zK|w>-2EyWVy0dihKDLnV`>()5xxfGUgmk>UVkcaYjC z3D1`HO!D2%e+Aq+KnPuOxYr4CPMHcYVM($qj{H+2{C?CALW53X1BuCWj2Lg9=tljYkwhntPy z(WFQiMff>CSGFU&mvO>~Jxw}yFTEkThC?c%LESxmPnBu37w;mn;Z5P_v zeA~CFWno$$>X>QplBtk4V$B6&8rgamjk28SPU=!f)t$G5i(g&3s4Yds4aC3 zSs0&K%D6vb!1j32wrCqw0Z=vcM^2)^QtKYRI=D17KQ{@zw{G3bSVn`~Uqk(XskK&) z^weLbctN{yX9_XOv(H6kPNpt{Kavg{&I_*Vvd0Lfv(!t1+G~gNWlDoUcN6J5jCxA< z9J!YjPmIh~s_Z>RZgyw|>{n`44H9&m@ktlvzZyN)h~KNIWMjuDef-<*_b>Vh!P_G6 z3Vrmpv@VrFz>4iz$;MjTO0tNl=cob6+glU>ngAnU19Sx!d98HN76#vYb^(tFn`-iftE+Vtm+y$}soU{;>nT0kVgQ3M z)C;Jm|KA*mVJfLl6+-O>FIn#?O)KjLo3j~96#e@{*vm?jMzXlF{WKokv8?=xhqeKt zXncR7!6i>;|2z1e6txPN_e1-YEh!x(aC@1A?XAtAAo>BQSCiJ=RDLBtCC(*-IsSwo zQeSd3o9`2M0sj0Z^Ys5g6k=RVEH-W)=%y)Cg-B~e(uEH3r0MPIi=o}N)*p}GL62fC zq;BVEB@RE1y!`4f$<2X`a{ddI|2D+K&6$llR5oZ}pT9{GtZULQe5c&x%v@WE!?&3w zPSMXJ!`Gtt)3)QIK6Ctv_$rn4BKRNi5n#SYaPG88M`KozPT)-{E(FH!tERNYMWvXZ z`Mdjt7q!0)cXd16^lr2-~V&s+ZD_A_qY# zU@|_#=XI2G;$>1zpAI?X0|=$Yajq%8&o%>eWCod~43IEh72Y+P`(4H{t)q+tqgUVw_y!sK|auGH}7JQ(yW60)Vq|7 zeK}C1a)RH3_BxzWY@Ma%m01(XoU33;&~?kC50CxtTn5{O?=M_!HbK%Q0yZ0E{QI!n z#%HreaH+9D5Zv0MCp#A~uoM6fyEH1`%P2D_RKNbaUMU!9bkd?oV<6SAM??jBw<3bA zDT^+uc*Z=YDGuHX-Y-aeu~{d7<~U?K^M==({I0n~{DM2`Rs?)kV%h3fQZcu0*Tk@w zsq6~!9g-P|8dM^OT?27b=6YS%Y%PxmY%kBiBV-9ei({DbisG=x=c<|GanLB;cE;lM zyWwinR9Sb1Uy$F)xQMG9d>ZbTUm7F~S|6$ahB*pqh2V4VX{dm~rHvXDtDE}8cjJ=% z*T}z0cx@6C0Ym_{dPTpyt)>GBRU*r6j3WEW;E`FvLFRdvs60Hz9=$&lwqF&6Emgdi-9aQG6I31*ODv36+La z(4PCn>`#X87ZOkI5LPsM%rS>MC$gC`q>x=8&uCvX@&r;XXQ!#Cjm<;~z~?wj=n={j zXRheq#qU#crRW2xDrQ~wrV@b)x^Gyq?jr@=-g^A%UvftARKii;ek3jDJyf3`VI6gj zs=mrI(tzm0Rda)80~B`qSHv(P$5B&jmK@USn&TwYae^Ao59|M!A6kGW|+n+Fv<{rm46B>P20qin_qU&(AXGKqINolj+U+Ka!{-jlclecG$7jl$VK zzJigU%UA3+muk%j@$#I*4sz z`3OlF;q19=A48|nCj=W2lIp&x6oyhSF67xH3Knn}Jk@-}*nbIv^R|lM5XZ9ORh%j>9m&h)Y_dbukY07puS|p2ykQcG4 zv-5~ciHYy^@lhU?_#u#Om$pE{Og2wqPbU))Zi5_7RF_yDd|+T!7ym_TN~;9B_RQ5y zwTXjHS*&}VP)Dpxq#VTrsd=xqnS5i=w z3s&)FO7;VGiUu76;6vm&qP|BIWV|zHoc|| zAPrWkWqzdg1e_=r1DIiF2SC`|ev}?h6R(U3-Y7%(7jBgBj%;Ce!&yw_d)PbLD0|do z#4VH%Om%Ld8%O+$0~To%w%<~-1czR!xmNb(q@;8VP8UgFf5OEo0fmzR=N=+RrF{O` zuq~gV^Z{=KgW+-4`t~kl;B>>86FkR8OmY_;(c$0{tu+jA4A5~0b3P>6x7(efcBxJs9OH%bmW6dQaX0`#5Vl=)Gvr<% znkQ!r%|MpCT0Wa{tCm!v9kAU}?G^}ZWn*=*3PiR7d9C3M`pw)*D>I~cFZP^*-@Su| zAvD@Qa6okVr)8hT?Nof!EOwXiiV=G%3qtevT3pjd6*1&CYptgH|Dq_eobR4~HdhGp z`qlrSe3dNatwVqORj}I|Ws|R(mJ4`p24c4_i-xO)9F_SYp}?M+!}%P&a7$*aL-B2# z|CjJXB15Z@36{3^w+KlWMmfz+*Sr&mx>Niw>{0pVPVe<+eIK2dib@qNhOYcjXAIX# zwpKP>^a|_Y>;~gC`dCUVtsn-_yX&w2uQxyUsVxC3<5tpzQF!DZavt4ph#s`jqv7v2 zx)C?A^M7DDUntwB!~L_+3`XKwlxkuzZN{o&+j(e^+nVF;*>KT3#}NTCyBT@I_W8>z zM4tX#rAFSPhW^YCQs?NH`){aQ_L$HgA{)-%_cR3bP&Mk(9WE*A0@3PR2%~%jQjYBT z|DsNLv#Wv$E8Kce9D2Y3JTG@yYLDa+S`HOtq*oa(*@a~Gyaw(Ox()=>qUivI2l(zD zwVwnkh{-3FlaRl?pgtU63cslTfbN*PS}KD0U!xoHAycnAd4H{XEnzAfewulyN|35x z#h4x(%4hVBc)O8j+rK+YfZ0aSX%CNow1YUAlil*T>-G~R^4VzCH+w~}MB5CW*e#uJ zJU`xo$KKHx3@!NW1;(v>2?Ru5aduqB*h!@WT40}-PW4+kq`uOcI&nXLY#s@=^5{OY zY6Sw8qoCf+Gue*M9Y!(6@%~R^Pk9!{wJ0*&^(*B-rT@E#RSC^;D08t_KX{3itp;iOjP0NR}ySJMuBQ@`Y z-zRLw@DDsbC?h0D^W1}@m`?;xCm3G-NTcyE^HOt+(-fwZ>>-_b`Ou@>g!b!DWCH+@ zgUubJ7-c9#yaTDoKUw^M7xx>Wi&Mtxs2GKziYWq>Un25iWOk{2E$RB(3@d&7pHDdw zv77`4#I1{>bf+_|+k?Wwd14r+URhf9{&~T|KnB@4FPToaJ{%CIx~LPijW#}qK7!~2 zUuBp2lm%lCXdaUOEJ8nG;k!&NpDY%S=Lw=g%MKvZ4T|_A4Kika799e|OJ1WhX<3N# z6PnLyKiRGUQ;Qq@8p~T=-pF8zta=;s?|0iq*HE%pPU{(r`ZLiy`K*8U=GOQ_=A}(}`uYP2E-)eInrNIJrYLKI+ew4uiXnvbnnQ)czEmju~$)B&C-h(H4M91HE@BwN&H0mMt*vquh}NnYph8H0df00r4e#rqmO1)wL^iwq-RB5{Xy>9Zcm>}+X`~DM z(-JJ`!j;S;V}OH7x^krXgi{4$g}E4LyM#6Fnlq_+x!fV%LufnDZ6LEL%d z-MD-70KWr8xaz0j$#nrzSd|2ME>%qLOusGkfkF>RfUY(R=p(iMbO^NH&@3;=R z6xb!z3@dz<--BABsfcUPpt%?~4YGGFj-G@@OY|Qb;qa>>;2e4oaqz@JzwI$0x(9(z zNrkZWLmYlEmgO_~9P&pXJDy5qBFwX&kY*NGH;F)m=NShZyvU0|uq+ab*TBqMLrIvB zi}LRdkX;|KF3H|WHWQyTTSQ$8xj%IXS>UF0?d}j=|;)hBr&M0HagX9&pj|aS!^kAoJc3EbLx7a z!x;RtbAz~w^0~fw_Mb8{p{1fd2@ZYHxdX(9;|~W$sHM7f z0aNFY-$!zcXR1aWWT-6jy$FO?RGo03IzY)H-WZ)c!HTZJip%@ z{8;U@u+qS4dNK_jT6;yCCH#m%%%*}l9?VWDT~3x(;$OUZ9$S`3;mc(sILccjqO{b0aUIPIi?kk>fbX z*Y}q(MRVMVveC~?4o=N>4k1HmiZIOQ8HX0Vh2G%#nckMG3ngYG$7G?nFq`47$sn=L zmZo*yvPD(SgwK)7&GQdW{EsNEquc-D&i%>+RC4cQ!JS9s&%9a1=zc~K(nh4I=@$+5 zL8B$tKIiU+nvyJPyP4I|WdhK_PEY`f?pH#{|58Eia^nO1m=DqzAa4@XeQAy-km0Zg zn)s?rFO6wm}Nm#100DU#i&z zcrVe8>0pq)OE94P>J&v!#yqjn;gd2iu}fAXny>WtBf~71~3h( z7j<#&K_q-i(2$1t?PfWw;*M!Qqn9ex~CVej64f!H{!Pgs;G{sq>W2Y|H zkeSUX~;DhF;H?zE=SBOO1XE<&`>qxHM!wh6u0TBo0DVBHZw1yxkiP;N}m6~N;tBa3(&RObi8~W$- zvs~K!WRCGL$#X%&=Pg`FuCBX~-u(d?Srcmb3Q)ihKrfrFq~7yUWvUJv-(eS$ zihdd<>$pym|NIHl2im>gr^$Lej6EjN`vb@qZwBrt$nr|smZ%T?7-s55Cw((eRXjE% z5*;CGupTUVg`41vjf{*guvik`W2UZw2D*7;hZn1~`e=a}zqfeEq#u{iu!y81*<<+P zrCe0LfVcWGVu6`wyW&%uXCndegUBM4U`vZ$zu2GQ$>DEx+wkwH^p@o}-Xmez3uS`l z=g=hs;Z36=70MNN8l2@&#n$Z#Im3>}7RP0FQt}4f4*)Zdm2E6*kW5L^(_P#W8JlY$ zU0@IY@NbQ6ptxKND`vrplS$|sPxRR?j1Ij`Ki;0d)l%5$t7I@-UV`xmk-^Af6)>Lb z-C=>v>LyWeCWoa}0N*=V!FN&Hg-lvgz00H5@e>@k&SDI=SI|Aw;!aqK3XHdh>$Cwj z>Ojto|NA3Zx@kp)U`5K%n-hljMi5gg-afV;_3u?isG3DV9|g~LSbb2|CL=VfQMosG zIO~|#x{lLDXg_S>VrYpJG_82~QP`F%;EK_`?5(@C0?v4Rj?`~q@g5#p{^zrbLguxc zY+UV^TT`v3BpmW`R0g3?4I)A<(|UUQ(S?DjGsK{j!x3FywlE`<64cE=} zX}LHZcO6Qv$->kJIpAsKFg>E~@Kt-12;1p+RbwtJn#Q9mLf+?l({E5TOM1xp!<8el zk|Bhe6|dKZs=_*Du|qh>vtz({)m%hlRZ{yuaUJpUfm07hIwWwSaHHzgw zu4(RN68Rsb9qCRB1ft8~H)G<^7swi*-s*cZow$71@@-BWoi z+fD#j5_^W{(|cEIUe@!hNTL)qtx{vE+@)y}i^$u8K&H8vB)s%x$h-$hg7l7H+!y}& zUJ^DeguwyC zda)h*0dygQ6&{xe;1Ew@*RoRa;qRbmjHM9;Vh~FTwnAcKo+w%}j$#TX9fyH9T$PEx zvuk2!0YwC( zs09~!rrkg7!IutF{E@gi+q<9nL|0vbtl+C>**;N8x;VV3UK)eh7!O73vLjem-heqK zPZ8gDsg3OPwKaiGcJ_8eXg*FK2;KZlD=DY=O68oyu;azp`Q!nVTt%9gB3$m#wZ^tC zwW`w}Tv5l|;WC6rszafq;+?5d!T>e>)>j2XakF|&!$4^uwSL9%e?x1)zh!QGB77pL zoyn`JW*4f9Dk`^D01~;#;q6fTWb&iG+m_%Xz;}OAKgkAvo|-4ms1Wk78={09cRw`h zvsGUWQk;qnl;pWZJ?sge)MXV zK1kq`oD26B9tBG{^}s$~uT5ztS<$E66p0MOrjCu<{lClPv#t1I#?Lm=%SKWM@>@4O z^dTO!HJT*Euq{|Ey^Q=!A~8pqu0?5w>me+(qL2&al-7-&YBV|1=Ho%wa*P)peg}-x zJ@JczC~N{%NymUA>5kX@Q3Yi8&+4J$Z4W=4Q+t!fB0l*alF%+PeT|3Q1+Vvb6%ACz z&i-*xUV`N`?N>SRaZ$3&Yp3$-R|It`;3;Lx@Hk)i_A>636L5~V& z3ZiHzfFWiA9pabQ*qnzWP}gWLAlfaQ2vGp{*_`9oS_SvFx$=A{jU|{d@?F|vPo2Z*1x-r~H(M~WnAFA3dAsP6+d@sA zLSQy=eHcDhbV;%S2z!Gu(vF--f=%ytV9S_c4ggDG!5V4%ML_y(R)zJK!F1t$=;$*` zW7!A)-vwZ*>rO_;q_Cl}2W@DxPG;DqI{4Us(RNnXG?KRL?KuqoZCf$+{K?0?+@0|1 zc|i~M_(ni#K?{nZbLot-C_C65aWqDct0ADs!~AA!hE7elsnNCEN%z0D_bV=&=0{!H z0UTkENw?(V#|6C=DlgF6jM+YH5Y^+t**mq7glh}zC@s7Ml_#$Z6Er9Ou;+KQ*pPgC zI)2OYqH~U_j{9|7dt;e-B4ia)Il(}}H`6P5%b7hNa2}DA520Y6%o;i-ek_9NE9k;a zlT0409!o$tB9uEvAlLniZ=p}G2(*Jjg4mxf18b#qV5p>oc!Dxb??&h9iI^4iLkLw;&?aa zy+dN3(rQ-?6PlnNRA`T|HA2(_ef$C40YR#!dQtnujIi9)`dhOKHPglgeGkkF=0f@g8#=`CU{=fkrjY%I+?x&;0tePwn14BR5a-kP=~_|))5V(dT315VOj&1Ropj=G-Dm8t@BmIw`sYRr=JQ1$$=V@)) zw)>f^Y);#wiCCKz`>!NpVGAXq$T@MSi(Z8R(`K4?C;QhwokK?X;jIZXegjyPy0Gj` zS^N4fvcVJ@1_svo>b`I+)Vzw)(tWKT;O9o#*X=a+?X266SsW9ZeeftML zEyF9l;Xp!<0PM+-x~V~&w|cKr$)^cm@a+==O0FRfSuer2zw{I;6P-tKaC1|IU(`m< zsB7l%(7b=aIVy359y$L>ds#pi3VJi&rH~G&$~t%X{o_g6AK4y&_2Od{d!3drkC2w} zvEmOb*^l*!OzZaT`8?vs(Im>+TU^Qe2M(!WXgO--VIPVWFNryA|Zm_}LD5dD@I#Mj3ey?PRXMEnBY0P(As(_i{i;^#fp?)v zT|1yk^_Mgxmh>V2)>??2OtiNI)z7zz8%)qEG}$kd*7hXqu=y6jf=`!W#cY(gC`s;^ zz8g9uIhP&#t1^w%z0~cJ7z3^tjIdd~Tu6Vid9cAZyV>OTWRc{y8mozfdKm&=@hI_^ z!Lty%CH7!{lqq=qBB~RWG{Jiage22dja_# zK9gnhVOcOeu5wq;-1A_kBR-n{(#D;i7lkn#kdUO;kC?)fgy>-}kKd(iAzrM(9>$0GjZMd?l6p78}yg6Vk(Xz!0PRXq1vGHUy8o)C)l(twtbXPb3S|LZct@Z zCqcIo5N)ha(%sn)6PkD+^#?;mTx97!QS9wUc#La!e0{L?fG=>HrPy*sd9nbiY6kAL z+{_X)qfr0iV*G5mm?D^3ZH}1FZdl!o>XbjB?%9P6_bCVQ84=tgUs2)(lw%B>UTR+-$}i*^we+c4?Q|Bw#|UQ=2m|SVYE=M#nCFkG$c07Ya|{qQjW8 z$p5|@*%8fEBgQxLbg|Y zI?lRtx9w*yg#3>1QZHWarj$LYcTprfS>}A-wq1m0{3VI`L`?2@}M2hfdP z9Vi(G3)8*`cKNG9iOV-w0QVC7*=<|@T6|8T?EyxK&3;tYO0F)3BN9S);m0^R7djO@ z{J8`T?;_kzk2?E6R^h#DLqwLSwfXg#dM?#7yUgQhA&#k5eEKS{$B`xxILBL_n&tz2iQc`ZU6LJJVn0MEqyQ95A4ZSaC>%LrY zpvPXg=LYF#gl4)2{nz_`2=)qbjHMek)Kj(BPuG4Uq6RAg7P!?*OT4~K>oTNMUm`ZH zhRc>&B{Ed(s_H>O-`hecd;6XMpyVe_!l7TNfKu(mLC{{HhgtuAVWyOL#;kM5pOrdb z%z#O{FVa)nV~*Wri-mSG#d0s9@$DK~{tWc~fOFiQI^Z~0wZDv;0=0L4`gWkqRj)$i zz3X(HuM)|%q`WzBBZqIfd$6=6HK6QKkR4n0mK{87Dn0m=1*T(rETK`qGixlJ38u@hO zdlBo0$5cf%U`E7j(RK|0-=txOC0CN|WQ@{}3WH_0PptJynEr(Ibdig9E}sn-XaZr} z^VGcdsr#?7Yox)<^_(tJxnY@nAN#w5zY?6a7&559jH-T3!d`v<@H=VDoab-VkL}I2 zSyr6PPbc(2*vLz+wlEeg{3q-w+eDiZ5^6!E!l$nxaY#xO9s3&kb-+eBm!Qx1o3n&>r;Sq)?kmtY0{o(U5sK-ty?H{ME`9=b4 zRs255I>S$Jd$jp_IRY^Sd-tUmC^ulxR*3*~MgM-E9i7-Oo7;iWg`CXZ=$4v4z5nEh zvR@)}mGFUv#Y`oXvj6H$J$|?W3K_7t{29^NYUoLBTK`S5`caRMM7j9SfcnztwAMrGBf!$$m#1hQ} z2>l(gdhr|?n4WczVKEk2JG)ihS`o`v`Go^}SDsa(-$he9HKe9ow3w!2&R$C7XnFHs zekfn=+glTn@H%KHjPyFRf5cZGabR44=ALj2<@Hj+uvsPBl%WQjl!wpABx*=46^sXW zg_klgH?zQMtCu{pBLh*dC{P}8`Y2b=ztSoOaDpi4V}5Cf$VBZuUDdFfJa>FdQX5Av zir4rvwE=3=5H9Tnn6{fYP|5V+#H{4fzXnngu?vW?$u76BJrlQ*uS|3N8Z{5q#R%?> zYph#3S)*Z`! zhn0_}zGI~VUKJ|oMSSxmZGl1 zlN%F+)!78SUe&(w)eWm8|6S%iv_mg#5hmkKS^&(JoF;1c4t)V#fHez05nxUvfU`bx zwClr1Lb`aA8;_rT*?LcnCMr~-w69Ckd_UXF6qp#UOK|tHPMy+PnYNc+@w|<4ezu(2 z;0p+yyT>&Zmb^bzq-N>A4V$fwyfh*&(xQ|3^YKLf`0ob3Whb4%V=z%+DSW=d^uGPk z%H-L3hO|vLszLo#JY)2)Y3t(^N|i@!qfR5K&*B)xzvcdS8Pr0e?0g)2$cwNB4f@(( zoS#PjyUuHQ5TS|~XbX~+jRYvZlekjWk(PSs_v#7K7qjVk{GJyyJy2Ndbcg|;k39T< z`BcIwbN}sCz{&$bkk_&pH%yk1;^9_u{qqDjb%-VQeTlt7xP{H?btXhy7mjz475=~ zVVN#h_Ixf~*WFk|jNmO6~mpY?G49K%umFRzXqq4H(6t{}TPZDI6!0 z)ml&f2o}95xZU32s7kATfc5aH^<(6)rye}-;NTGCw8pbr^X3^jxj6kFRZ8zW|FTy0 zDDGM=R2Km#GiO8=#{;4Y+@#CK5IXy4pE;aAdj{C2{n5f$)Zf_cf*@R{2i z!@MU}a%coMTuW~+o~fL8qFpfzZ6v*XiPi^Aj%$#UHDm?xS|W8*d9#LnpGN2RqkajI zLPQDELPsve5NX+LmY*vMa4{?=BzGjhW(-H_oxgh(&VhtXmv&MC&v4?KhKMtBvfTCV zQ2S8A10yvUQgE)>=I&pOO*vkqsS7QR$9g9l0`$VREFaSGe7>HlP7#G|XB=wVrhomV zs`__a_hluMog1d{rX8q$)f1;oN&A)VZg%_{Jl;bwAHR#zcc;H&;TS9?9FuzxLg}K< zDE88LPKFJ+dS=8d>$*UN=Y;GWCo-*PtNJna9|3Ow4n%&GPZF;rfSc^zo1BeN=i30| z_mZxY%7e2?uMsm3m^#fLB-9JNz3gk^$7|PMom#Mdx|}7>@|n8$e+>%rDwIhTdr7X zls5r!QL_Efe!*izM3Qne*LV}k^^YBSX5I1r!9S+|SVzi^VR5f)kN&d-N zv`ubtFP5`^Ojl_MZ$<2ns{Yo#_;>dHFfOZgZyeAR>tB0%IMss_1rT|KCHoOYmajQz zR&o4h)Td9=_E8pz3Aum02oEQ|`If0g2YwEoss*(IhuQ(Mw1yLM!t10&lQm| z_=mJGl)Re`M3+EVkYcSe)yp+D9GMlWyWMT1f-5XU`qHk0B6o}xCa!yvy1*QK%q-r0zql z4|vm7mn(W^`7(OB6;O_qrKK8GB#XFi|1JU5k4*<@zg-vKPk&xFTAtPxMy_FJo#S2M zjdS^766Db)hr32XZx9`oq(!rYz~%ih6C*;r4&Q0a2qgcba6ch){|?%78&TBrqZ{=mDF$JlqN^tCAZzMntYykvRJOL*wq?FuMOZY$q&A){hqDQWsCrfNEnE0MDk+=^v zSEowL2OFe5=bNmV8IhX*HXflY_cxpqK0%i8{!ia8Q6bb_JK8{0-qfnhFJ@>bk0$@S z*xgx;EtHGub883EostU0-*VFRQabs>eI-47WJML<$I!S;7RH$i_g9==r8?yI`h0Wk_JBvDiD|_^H!?Rph479Vk+C`8O2;s zY_WEO4GoQGVhRvPCoH5ve6z%YP4k!*PQlbZ)q1|3^u|SA^@|&F{=7Vg-TXEu9xz;g z!1;lSRcf*T--9%r8#qNSxE!wBCixq$CVAY7)ErLa>1G!L z4;z%*Sd>46DHAtQ(?l=TMTBTfdaMTM(7R{|8AE*0YIl}`}_=T$p ztl!~ee*UW^g9z&VF=Rd0YtVVm zlz}#=sABKEaW$ev%&N3H8%j2)={j+=qZqJj_0Sw^WgH{wQxZ3f2YKEkoeRLsv0gx?~;QT&&<+pZ{a;y`!QE)_p+~ zBqv2tG6<5CoIz=ll7oPvfCR}oCqL7JfC42pmx2`wTt&_H+7VNavy z-gD=^dGF0zv*xY4X3jsmL)EUT{q3qR{_3ko&@4w0DO2&j!IA(R=#BQRyh2>Rl_W1D zP6d3HxyglZq7R&_eMTI})ubvR^fNdzf+G=cfm+{&7~maFe!E?Yo6KB;f?n|oN4rW2 zQ8IyBNky=W2NY)X-PTOhwcNJzE3c+RnalWJ?0BpjHF?4ImrVz%d$kkr4}`2ju1if1 z@{D(wVlmXLH(zp%X}q*#-XTlS6af%dMirI`sa%hW62Z=ud8iFK$0v|fRyp=?t1I24 z@ML`LV!-FEN}mG{bDE2{e>e@ORk!9DH$l0YN2xYnsd&9dnVk`E1&mh}3ifx*@^_w}X*n&R zF|M!&WI|?)(Tw)OsxYt!eo{G9yH`$HZXe5_1@;%ytuWk3bK9e~t_r}nc|t=E0^}p1 z>)YT?e6#CUgG(i?z%W?X0C}qhmJ8+|b2aGv@Z5SwA62Hy3%|CMNP9K8Q9WE$X9Fr7 ze4$BtV%Ec?ivAO@f=&ujS@%F~D`dq=rMSIz#}!sSKC{qGlMU;$9#tH)D_ZcmesGzq zN7)~1^fF))AMplCc>BF2Vc@;RYhPUNdHqn+aC+|atCPdNt8zW}j+0@+dlaF)msydg z19N~>^<5_|BB;GrlIb)>r!r9VSgdtT6+GxEfMaEM^luAVfP5@#6=^0uZ z-ixyY2e0m+nVG#0(b`k@~O`|0fFUtN~c;xh=XX0twnV9Elx8en!wnJ5vf zz1}fUBrq#tW=Aq6t3D6hx604E&T@l4vm{xD6)=;3a%NT)G`~vJM!F_3p$&MVa%4r2 zL~qEzR5`1P*fpdj1iy}i_~Q@JlN^LS#t~0v7qn4``lt6*kf8W2^TJVDlPsY3L|wO| z$|?mzS98?!*3~+&FAn`7iEpb!gU?aaicBO>p2$?3v)v3%eU*-Gw67je`rPX0%3;p3&VjVlSIm4O z4eyBD)L76ks{8hv{wy0Jka@QNQ;%G?5tkuJ^~Y8Vm~0AI^0zb>Tg@y0ug;d4cBIp< zOVCInblXR6p$0CE#8cS1#?_=;yl4u?!y)EiGY_{a6f`4%+SGRExmoSFuVSv$^zd^Z zD_T$c3vpFt-)!Ic$r>5p^Z@6b-{LbSA$;p+!8*}eC=5I*H#TscBqW=Cs}1%rRY%?kFNo^j@$nwj z3YdA$@TN0_4+ud4v{;6Lq&?1Pk;ni(k++(oR|t@D8Cz$Fd1KHnSCGe3=X*qN*!V7H zr_gH+T>9)0CY$+vE_nBl%DMYM?+o}lzU#gI-3jeZ>aO2JwXC@9Gv|pGr27G{1=>`} zjVKh6ckWV>iL5`s5jisbUhZU>{&b01vHjL^yR|C$6N|V$aE6Fw^g^L>xbbU zhxNvlMUqZ)N+Zxmnm_g9L_P^+OQoAmVXaCP3epV<4eeg>#y^$d%pA)y(k1tr<|O2o zp|4OW6%+<0v&D1~B#sOMS0f3&P8HncM)p4aT%N*a=P!m9gF0$&l^r(q&@6}&cPdk|1Vv^JMexRz2E3Y(@?hcz>6Rf5XJ+h43*7YiQH2WFih1ecX z-H~umF5a*z&hOd~pktv>l*QS{Kz&%b*qhMz;3<+z!c_H{)JvwOm{iO4%) zn$F)39M#U2@D6NU;}%7mamoj<$vh#t^@b-)HdTfp9y2v^I+kt*guFqtL_%8a2#yxt zbeJT#gtA1Wkb{((l{$aP;f&*2nKpzJJF8*AHz`)%nl_%;6IM9uQLQMW;&G~QWE>q@ z;uR5X*!}$em#^IT@nN497eOrB)XK~1qJEztkW1ORoEzwNPk*9iZ^kt)QFV2C;HgBp zGWWh%$x})09-kcBZwGJb6Wzt$hw7(3uQh8+n;+?-6q}}_dxR|pV^KiaPLut}1xNPxNzIS=^=TRnMLWJU zKeYKGep7k{4=BuYoY|DfHNy*|inmR}K3)&`!SnhHnUR;HTcjCKp({qg{1K$$lf3xI zAM5anlZxccmt)T~lzlXB-t-gES9Y0CY(WF7I;`~YC=ofrc^{N5(oWGeMC9S4*B2?CEBdney`7EpIzXIr_BGQ9K1O9$*8fub`igoBV>t?~+hWn?KtgW8!SC70iN{>kIdh$3s1!tJECM zh(}6&S9sU9pJy8zc7JyfwIQSri~UlTbz1ubeQC@UT;#?fRs%g0qfk@X(GNQyr1Kf| zaPdd2YraZeSq^vv5vtjxR(JwkrCJ^D+B-Tak{d= z_4=onYm)DvgdF_&Wgp-E@<2|5w~~K(m_!~(SN?#Pf`Y0|pj?#O0-01E25pS_PdhLD z3Yz0*fF7E$rl>~5xxts4nrwOW*v~jXAy@1vUOKOvN>%j$TeD|6XJ)8`?3So3EplE+ zh}N$;$&6cfa2{l!tAHn#aOy^dtZdNwOBh&kqW9YmH>hr3GZTk$V7xYixvPN*KQ@x$ z$`Emu_>Vpvd-4^?V}_yiHQ;Dey5=Fr=wa$05uM*5NOENKT@Vx5$-)}MOETwTh$|&c z3G10ot>Mg@pwV;zZaWKstvz~cu@#aQd;>qpBy^vR- zjo@~&Ux$QM?tEoiwRXA6cSKJd;v|e)$3;bowa2Sakz1Vk_{d++U{HM4AwPme(hE1t zg^~K3W-8BD-93P=kQbiKImLEMpayh=Nm3V;X0<$k`p*_hBG-uCPOzD2x7Ivm^QDBy*{cxv?Sv9J`iMij6olVdk>%p^F4Oax@xsf~&> ztZV8IkkldKKS}!?F0}$?Jy(TWw0-XCNI-LUS&ykx_UlhXZz*mRcl?4)@nj9~L_Dxq zW`1;wPx8Ae5)7%u8dk{Nkg;IB=3kxlj1ISl;=`F4B)?j}x&4Ez)JCCNY^T|5v|X1% z8(5rj<-?|Y7S%W5Pr9P_6!(Umunk8whHmte!0fXN%-W&{H`UiwCY9Ds1;0`ec~&)I z+enySRzI%4d?wI>;_GFvBo*e}i!kus5qCVc#zV-%a5b+@ zV2he;^m;IQXV7xW;;|zt$?avKfpXBb>*A?=+LW>ERGyhzGF~bB%?gmGTc(2M=F{4KyH!%O(bG| zx0_J^P*;2>N!cg0yI4Yn`1{OiCq9^%YrjSg5wA_kcmdn~4Qc#h!|ylWQ9s!JWlx*| z!zao6V^+@6kkbsl-q+$gp9U20ee3X2=yk9Nj$-Z+XO6;EWpIh|fcuKi!|1VlHx=lfP1i)@ze(`M3i(-TaeI7BHvQo}`Evm* zwjsspq;KhExLtEks9f0G2qnU(A^g$V_9DH+M|}$~6-CrQ(FLwyL&kHv*5D?lY6M&zCquZVFk&_Pti$He2%-`rNI=J`0qAZLBv% z^@wk84~3#RX^;4E!1X?k_pBFlYpaQb4d2`&1*cA>8Gpiap-3KJ@d8+e^{LR_QJ}cI zCi$kmgBLF2&XZPU-WnY~WT{SkhWu$J6WF9ufTEB`m+byLVA^qdB-HT&U1WY2tQDmW znjG9S_N`3A6{q>bSA;)$!WNpvE~S4K#}?n*NwAgJ z{C*{Ei24LQG%VLE4iSR;V;KfF=&wS;A=$ECKYsN_owZpP(@)ufhq0X{Ws&O3S$U!UjS?T{pUMZ!ilS*k&k-_J4LG$08~yDq-9`6(0Mb}SoMq%@q! zLVQ$DNKQkDSNlAcy0nvFgi9^@7(fYWmi0#Q2U{%n6n02hO#M_S1?l>;eJVUlFq65( zKnqFXxs|8-6##o;->*l4nN{F8GTI-aCl|NFFl&>7amB;M)b-1KNk6|r8R);6Uvx*7 zYEnQoJq;7ZyvuhvS_~XR-r7K7!o7YBbQd407vc!I-7&qCUE^lTT|X_Wi*;MTQV;0m zdT#E$SF|zJ13Wn=d6lsPN&w+_o}u9lFu9b<>`I2|QjPCxYwUE!;%f%=Y5p}bWvanJ zLS8~QlN)ELke{iklKN4nQ`94QYk)~_z|u8heBnmYUZ)wV%gXB~l3TRxB}Jc#gnO6G zsoo%@6{+77a5%T_8GryR!vHW9c6Yw+xvpY#2lwUei#3exWQFTf?6$BmROVrRW=A_R}tC=HTBilQ=aSw zIAFkuUK4g*qW;7$iL>oPHrHb~=i(iD)jILmtV1vxb0>zDwI?4SDdRRb{B(=NqG~;x zei;uZ6=t2fD-&f8&>-k^JJ~@&SMUS%6{0y`N%n2=9|Q3z&TQox`QJ~f$MgsrUI~%d zffJT?Q6I`}&H5kSw#?=?HA%9U7ce51ih}tHCJjH38(23>e``?kJy^)#Yq;J{30W6N zDeqxd=$N>tcIuA>lZsY8(@kBQn@S|BR+v74UdKzBKamajQL=xb;uY)l>(u+c1UnGU zxT2t^Je!$O;`r>Qu5)+2wFiAaKH^}y^oVS`%SZIJJ_1=;AgAk&)xQgFXER0joicPG)eWj23mX4+rUoaxVX zX;i!VC2MUN2n-t72PU3%6}kjHZ#VAGQ;A$d!#*vrcpr`VX_6V$S6<;Jro=x(&=C&T z9 z<3~pvCEJp?LYGyzro)c;PtXU-)jt(PC_fezf;Tvro!(ILD(`D7i@taMN*|49MHT@x)}vkVU}6g9(MVj3R#7Sy0b@=QnG z$^BC6U2eynbh47OcsfK0C_=HzPLhk~=B5Lub|#5O8Xd6mv%buH+C|knk#a|21*fu1 z`+oeHD1Hi8JRCPsc1m|Cxt-JA{q_6!@nSDilJ|*kM8V?313cIGdK8(hX;;v$5p=-h z6zH!mzuU1iQl!RtJvtfLqVit9 ztKzJaNtVX2fH|TYa-W5%P<-3|i2mzKc zku{?OUidwM7=Qwg@#~zPv7^adw^z}Yk&)yTB2b2de}h0g{PeW-08?9({VN$L+zyMz z&^599xcSdNota++qcQqmU=IrZX}${w3;7q{ke)p$?C-#<&>7@=e>dZ(!?#G4vq&rEfcF*_VQV>NR4j>sc!?UL3B{;G!D-Zo>s@aC+whV+zBA;3L)76Z}l;u-3NtGf;P0}F+{5t(xM-k zE+Fa-ZUsg2z7eL=}BFjaM9Tk=Ht;fNyUAep+ze9gzYvlEdg%Akv zO@!y;9=oEr?8i7hFm`zl+mxrsd%HiTxZh^B{G&*R4@2KYV6`kO>=$zHVeU zbOx^neV4&tEWyA!9J=ohl%~E0@H_jUamrnth7Vgu1do`G&b7pu(5%E6`;#w5de>qO1YGhT6i=lRH)xtx{1t@qGbz%3Ll8$K_t_N!0s* z?&CXhyl_fbfCqpiZr-(9Fh*YmMRpUE>)du#IMdQ8k&a|WPKB}d8z+cunCsA;HJ#Zr zvhxtLEbiQCj|5tyN?ZI@lrPheW}}l5$iDci_4>^q|ek9aScSPl@QNhJYsOwQVhtw>!rdA1%0vm&MZ|G z0ozByK=FjhKiIWVPjEBt)`Re8yKvAS0?*+-LV?p&5Edx{;CAZ+0mEwZ7mf6IZ0{-< z1M|S6lfd}T*XaQ)o!Z*lNHBf}0vrYF&5r|j(AqgdQ<)?=-5CM7M^z!;qp*{x%*|kR zr^{8|9EaL2+ekK&cQ#fwxriLcQL?QT^edFsM`$+QdE{E2__3Iyj(qRTYNn#nO~L^- z!L|U4b%}re>1%1K-@W-#X2gg{{d7@pr77>~%Jj)P+YutsV5=}^p)rzV)U5JWKh);T zL%~y38s1L*;d6?*Av{H8@|uCE=@Ii;TyX1|*)V1^B8B&nv#w=>Yef8$G|E18i2)|2 zAgXSj$@~3aIiiz1BG>l{&nEB#Ct3go^2A=*9(*lLXyh7JsT9@Qv3|8!z}7$a`K`Wp`QtJfSs!8z8ot8N8i zno-g3=Q61&{DEP&O43_xg&nWrL*q2>J!lIWYyHA_Xly;wficfnHG1axFehwv=;~`d z>6>@>`WA#<+UQ;;OUAEB5X5i#*}HgIgeTSK|2nt<&TeQl4LCLz#-YwBEdf=lWzmA?1Um?v%9{5~z>Ub}7%s z`@7H*jt8n=PBS|&8eT&}&v%0iDjmQ;&kV8XRZ9SS0<6q~vzL)b%hUZXy?SdT2#@by z1^=Nmfk7MyN8hXX2O}6=1s`G`@R9t^<_|rX1h5GG{dA82JdE{tU1uH)9IQeK(>PCE z%oXMK_*y<>mo8XzA3$PEmTJe-SZ!DnhC9=t)^6dpeJNj6pOn@iF6%bSXva?~Z=oZU z9aI%Fjd9c?Ly`P#Zv6yCa81zjMs-Sq#Kpn&sC95d$ zmN=2>twn{+omritRAOigZMk3q4cM&q)vw@=&!j1L4{c(f;n)@*dcCZK71nMJGi}*s`-w z>IDdkN1|6b06Z2p3D`eAo`=4)1b&~X<0eU+c-+shV|*F)oKgMx z=csSB63N_)eOt7)@!7xnf!Sk^V^v~3da^A} zN^+V#arFYda^p3-)Uv6zAJT)YU58@;!ZENv?A}K03J-A~h&?4|P8GW}10F8lE$x5l zuyt1ZB!Fb~IIHr@DKN1m@~h;^G796%Y2Zy}$4)cwgLk@pG`(R;55C zK>dnFbJl^|(Za9aZ8=>0eTaU9BZ5?24(eKhYt35rNVQ7c3(#mp%%=VdY7UAh&b5@D z30WH3Z_{iTK2_jZK<{Xt++FPIeK;Y);skZGM=tRkG&v;;nB*~zmlU+wB=_s3++i{1 z_(th8FjZfhX?EwvNrOst0alu>N=@#Cu$_4Cw~QMBEThUh{bV2E3HZ)atngqWJ1zZ6 z&`q57RiU5$<$GlvRP&niudihRHz+>SAnS93a#K@241PPkn2jz~F1zi3^EDW6EiQ0GwD`Bm80Ju>waJ&l5Q|_g) zxAOE1bv#%F;gI?OZub0t!dDLf+Y168Omh5|ew}k`>GQ>M$`&!XvE^R8a#{)U6_zezR zMkR1%@<`B*qwC0uiBe9bGraW=?$nLh&O9X^&9u_mFw$*W>{QZd3Qi*HW_6qrG3rx0 zkT_V1p)-BU&NB=t0J>K_hE1U6#!L>O_?tANWt7QFs_>4f+~0TcSDIOM#bpi_O$rYV za~3hai4*pF_~vFaj;6-(x$dqI8s6v4AqmEJoAq&cnd6%Ts5G5gwv00bfF}UlJy1nJ zAQg=uus9S#Bs!e%fe|6B1-*aUS)IL&&rbi@Y2l@lHoKkp51 z;AgDTN*81#sl?!nCL|oTzhgo=F&DM6M!jKKS+LxWfPDevM_4C(SJ0Z&Oh+Vtn(1^Fwxnpyyv+w13ZS9B`J{D(Aa5E-V7R1{*)lNol*Y zVK>@^Kl|!6ZZ{5Y1tY@}a^AWw06y4pG#?DQ6o4N+%-GKF0*aVOkwQU07Lp?fu%?UO zUA~S?>H>x^e=ouK{x+&*1YJkO4Mkuo<{36Of*g&F!-pzD0K%bd!T6j>XpqJj42s5D zqp)Gnk#vqIk^D)jRJUr0I+QEk&0S?AQ=%z8-_a_ZHnQSLR4?^yThE!MD^=N+oheDG zGm+^^zWJ*Q#e-rN7Q>I#&?`^$FY+;%m)K&nyr-DR3|H(&6xz%yGP0U9=I@7)=ZX*s z>5Uh%BxEs#h$Pk#FQI-!ewR*Q^-JxGWXo7B*z`U*Gh9m9ooux`^1pTcR>%NU^$5Jj zv7M$}*Hvlm|D|?oN536evj;%+&g|u|?YkqjZDS$0d5hvS)l>7@-zVXDqvEEqU0aVo zyyH3fp!yY`GY?tW-&;(Fos*2ztm`D(ij)b7GVi4$&Y%&rO1mr z^XVdD+$#p`q#e=Yi|uvvgUQe?9+COsFGus?@8p`F+)vqU(EXjBdMF^21)fhc)vl%y zU*bS&?xrsQ6@wbHo(a5z5LU1cem&$Z!ea%zi8EON&pb&+uA6m+ak8ggu~j{!;hjUFF1sK>j$eT zQ(5I>HzvpfEjCfU64A`-ca|Z?a$5~#(5DD~G;F^l&Iaq2|CsI3?VXl*1liq?oRsR4 z`kr;YCkPRfeww+>A{Ov7rZ!@FsOV>&SpS)13B~ew%O~SlpPPZ)S$j#d1+N~wa5{yu zg4^XbQ8@^@Bx*TeBvOF8W!raIEx!W}ctUo!*(K0%b84!p7MPMAZipFw5pi@>FR1dskcp&vBU*CiA zN3FLLf3OIgB@EwEvnm3xG;x{zdcR5+JEJtEsAdbo9C>RMxtr*)QX%9eS~WJ22D*t3 zQ=y(+?+!~7#9Mo~9Pq{u6PO0%G;EfXRHj%k-sfkJ z8|k2gqtX+LQGCXE)l(1?1GdL%gQBNKFIhuAUzxZMVVVe%!*8NeHa)S}@IZ2@ielQ! zwN+gGW@+=^!n2)?J&wMSlg=q93=TyUr+afpeH`9+%#i%T0Tvq9CEZ$3#yp*KS33N{ z$;8BZz4tPtZaud#*`o5Z_@^F>4WZ&pvhcaKATilg=?HyK=)y*Quui-LlKxYkygnxL$2qPo=J_d#eO{=HCblKZ;Hk)3#>;ie^PgJd-(h-LpvkG%W$V zYE|RV%$EuJE>2NG=zGl`kNL-F+U>4rd~EXR8OI_QuDs$2TG{j(<@xiBIcRv(tuu|I zuHlV$nw*4(ydZ;Ebad& z-rzOMj8t}}Qoz07hgLi)e$uXV-Ok%h_7iW*@#YmKOK9M*-B3Z;{uN1u?vZOWC0if8 z+CArj{jz|i6fi{d^N^%%bK;?dqDXCLFq7m|sar~((}A+{bOriXBhG;^g)?xI3G}2i zs0;lGVtG%E)547P=7Kv|!QR;6L%u-{8I>E7Yh=`YslRn1=nUf?=(-))0_T%=@`OHu zG&qLq?(l>pinGOWZjO|YVevP30WhZ%bLO%p~_O`98z-L&^f*$W;SdZrkHpIoW8Y^%EXerx>7x6FHQZb@`Y7i5Ah zI=1ybI=Y+*n!LbCA?oX)ZLL?$9=iDFeY_@mm1zM#un6Ie6Hmv1&R4A*e#U^1rDnQS z_RTg*JL@ghH^_-X;wqCIu!-&Gu-av0CKHeYwy`+~j|r&c)lF=ia*Evz{=CJ7S8v~N zbHi413H*H=lk~nf?7^EZU<6Ztei31&Ji?Ez>U{33@K^XssQRAk9pyG|Fb{@T?H)-V zPG!mT{i=-CBM)x z_q9lRb0OU3vJcY!ryy?S4L#96tF_5+s8OYk4mCw~k9<)-Wfi(1L}-DLh?6m_MIqJS zo(TdU4`#e$NAg_U>?_NWYw!Emj+f`1dE6%tEp26N;hJP*jmzwmQ8n2?K8_@M;_UM9 zU)BCakbljP|B`DVXS{3w6N`vWc-ag*$}qsHQjjV&Fiw~?d;?uYq5#QF{)vTg8du_m zuaa@9;VBrMvbbw-qmF> z=;DGwa>3_{T}*UbE_g9hfd8ED?`}NG5vSS2#gz1I74P0Fqvc;oKDP|to?n!U@2`Nr zC0BC7L4mHpmp+OOQzgd1IMkiId}5T!0~R8FG4iJWj=Lcp`h0@Zz;6Zdbr?+md3hlJgF*TyXoVIR&%3*NC_`| z3{wA=`2Q_&1!u>&f@|}Lly9o)Rt;ikScsOGNUA1Yt~BDD*c)*z{ufDJQ%hB{!)08F zTpj2u1`In9vgBzeGnMJB*nURchCMbL`i= z!2W9ypDs%;)NxsYaDpbJV?VXm*|g!mRurrHpV(dH_M;D56+8V>O!@x9GY^h0 z)-~d9I_-kpIlF{bvLDy&>EHeFn)`OYi$?w!bkpN`;wNndZsYz6TATET@pHq1o>R8cf{$Og)>!k_rmb{U? zLiRnFrk6}0KWvF$zZz{O7GS0(Jy_az211r|_A6)({2cxBTN%XcsVy z0sXof1nfQ_m8shNt8&$O@T<{F@_cAk;9G;mq2`^k&tcGbbD=bNOllB%^O zAq>Gy#3$*;@`2K>S=OIsf}{795BCxXTgZ6GkOhb~mwn}Y@VPbXjZJ9sW#KwQm(3^0 z`BDo#a1YfWyihCz30fDHJ5HLC;`6U6v~hI$!aRb{*wCK<FLG{svoH@gxzM8eYbDU&{!tS7-GD>MLc)j(a8+j_|kPQ1=Z4!Q+*d!LgbtIY{pF ziO2W7Lb)>Wsl8#-F%^Wd{o-V<>j*0Cf@bO^eML6)=VGVoWLTP4zjAcFQ*26+Rr7Wd9spo z3jz0;b^uE%q*BH%%h50oVU5C@fWgOKOa~iI?%SW}|Dz3p#&*7zNllYt1HTd9lfXc1 zt$1#Y|A$uI?$uBBX!t+SS-|rXr(DlqjUa6V9%dimB@#Po{f=KHom_^;YHGbX@1_eYH$SC+`40!7fCTG&r%pyFPqln!Hu!=#{YOe*ysk-W7DC4Lh{$V?HO>~wouud z!!eJp)AxATN@h)z8j7+jk8TBXZ2Q>Wqm&|YHC5@Ois<7w2VUaa3}4$HXZPIW3IVuF zkj;oTDocX!VB_8A+5^>t$CO$DSHL?b83(Y70fQ^>{gtCL*hzU95eHL^-4Ka&6Nwb$ z&A4rWhw1GSGpCElNDN$2AP9f%dZ_N#BDmBEZiZg!hYqBdrL`T^*PkxxkQ#WFmZbCk z6^9^S$fAVW6OmzjOxU2|bE4N~0#2nA{~*Rs#TJLy1`F>DHt?BW`Tm`)5c^g)Omh^TL50j}V=*V9QR&A4fWh4WZRlYjBXsFiPTgN=935W$T^5L-1ITPSUxhk5b7 zHJ;q01w*-nNPq$eyw)uQvz#CzCGT9m5qcql8jU9SO{bH*@#AP2ILPzE?Jbv|s#ZawaMDQirD|d|;@7y!Ed-SMo++02IhBy!r zY>(-DwrJ_*=-6Ck+9Z*T0}VpkgL4S}T8{m$zVFe6C7qV!tns&#zffFDIb$6tX9YTg z?f-biMB)W4nBuRmIBjULSbn0pHWL^G+#7QEI~y4uT&t%(iwjt6e@>k6qELldWDugP z_zzV;v?|bU%Cc*A;2MjgH+G$xlP%eP=rCX^-y1uA?(r3^dhze&b5w+e+W?f$)XXJg zjQANv@5a(wF5%5Php+>Eokhs{ykHIM&M|S?amLOEJ@}qbc2QWpgm2Z=D@(EcIpTP? zQD~Q;_VvK&u3I>}ulDPy&qS(Z?C>6D_-XxMy$H8Y>$+LM7?`lKfP>8S!AtR6$;6fp zKaB-=TQA4Roa?TCG}MF8^D>419c7b}84HLHCT3CUC-K~cdJ}|Eg9()*NS}kRMN~1aLI?a+=&T)e|_8qs!HguGw`V|Y} z!(hwLSPH+2xYGrEWvyRjK5E|JmCDC`uWd_l(UcT|`Ezy2;rx|u=(C0rOkND63y6F5 z+*jp_3JqTITe1E?9sVM-xRjguaqRtdQ1kTJf&} zm0+X$kII-xM_l9&zoZNhz}lY1$m^tN)+O~(Ozty^+j8rd$!EXk8$B)3a_sl`R}j&% zSEPIP{22w z{+}22?{=*4F@I`f=NDS6ey`9q=KkK`RHOaZ32*}}KDE&OHK`;wtpku>@ z#zcmuSkZD4yC;rotp!{fu5PRn?(1{4OLCT)?|Iv zqDBB+UuDOX1(YVA1{h0*+ zsVO-;DnD9n3$@Wyq)Xaiogn6ME;t#o!Cn-G`)ju}U zsg)jvgm+wi7oigLIIuJ36O6PmUN3+`hJIz=8kS^4n~{^yo4jXvrty(!B_KC@QPxN0lNiF)RNifZK!mssz2@=@i&ySKaSFtc}U zF{%HRZsFGGu1x*iCCWq?_>%V`BqaSdgQ}BsI}M0YJNNynq5s$g*Dr~+6Y?*(_Wu0lZ|I6WGp)89TnE)w#WTsp{Xr0^p5(k8Wpq?u{>v z)X~x-hy}b}5Ri(X`|#*5LsM{iGkk6)b^$*QOsl)F!wUC3z)#b z|6DYYPXg*O9(Qz(JaqIV@Cm``)leq8yDMpnk!EO%28JcaL(}X}pQ7|Sn6|YZ+|E4l zDUp@vwzcYIB8yEkaem8SD}sRRC4HE8q#MPXFi2#x9dqtJys(LypApOMW(7*Uw*H|M z0?pHnh38B;y~D@%;y8xxm%v6Am--60#miPw^Unz9L-@oP5*POq>Tlx6q~g1P_N5i@kUseg_50Wm70-4dD6nMGp|w7OymsHH+Q7c_ zr|;*v3o<2_!+Qm2!&!76MO?mOoBAu1rP0UaOAs50Om-D1H_s67mkTnsu(~RQJh{8% zBbo;q$+}e$$oiiI1c+Q=t`V!+qiJKQ8(hlLm9*_#3IUJu=9N8*E1gAE!d(s7JXQVo ztK{n=)*cvrv5F*FD3M=_{{8)tp9NWOLRnQWIYpzUUk}YfNA<6+|HUB?yAMq~dj#{N z|L$1D9G}dYV@&ovIj^&7Vwbc1UjsUSN&lkI|7;rn|C&M)b_B<~QqBT#`#It0NkKdKCTnue?{J$vtdGAbXkNQ>m z?&0H=HtLOwO7i}6>HoavcZB>ifz;Qk`W{XnxQH~a*T-mO>9D?dp*>4;LFrUEukN3{ zmBh`fmg?}*-uqa(6HUa4DbO<+vHdYMYnJU!PBim*x6qK6Atrxi##fan`ISNWZ98$V zpF2XjG9*MhKTZt(*~yS%#q56=f$!mqihp)u%Q3g}A4c#Y@SgEMJ87i*<@+B-@T`kn z@SmM5kokT14unPKW70OlID?H~UkBfQ^~rH3FBmyxNo8D?_%;N8o=UUY zLD^=yyo>cz@?KcCx42LychmJw&?u_y7J#a=3{Jn@Snc*%Qn%%|^(fZGuo1WX?i9xS z*zp;`@5SabH6Q%WY7}4obeIs>Ogd8`#O7-0-9LyLI)cJTcLvj<$8YyEKWNC&I6ZuK zZu7T9{RV%%)(Gqj(W}!rCXej=6SR8nEGleG-bnI`Vg>u!_GnygaeQRClw%U=l#q-B zrNzl_Ydt9cS`Cz9$B3Aa25<_hr8YzXi0j0^ss+psET z#sucLW8eDmRXslpw#anNxqsEllzR^4WB+bCX(3*bGr&CD=8@zsiHONKC|kF*pKkRE z8&2moy9hkVD)++e5)oxFam8A54h>zGvO6U*yt~1I8GHM=fLX@!%{`a&My+$F;kn9B zKzmJrARB#DE793=EVUBF|Q z)l0_%JhejaZXwV)@+RHDFiWcS)5)vE?UOnDy(NB1rsPHa6*H8fR?+0)OIL~C-Yp7y z-Jbm5(*GEqDgG!VGEFfm9w4uL9DP)j^;}7_>36T1xyJ2_|AyoA{vwRQZEf|rgjfjU z_e!^BRxM5Ix=!C5?Sina*wX=*5#6@$clJD69zDJdZXJMRVCq6vj|yIbDr#QBW8V1z zuFFTU7`G?vjT5RhEA+!xi3f5>dOiiIGIRW&p`nt3SQ5!~fSF{y$?= z|22SrQQ*HC1x9DONBr17ic$1wJe9r_@QBoMtLjv#_EI&?G#?35w%N0AM+b1N4A0<` zFZEJ$FT}g+@QIu5W6f4Oc-wqcs8~?$_Nj0#;8U;IhTS~k$Y`oltj>M)Wnt^gtZx4C zIG-NXkGaep))Nu#_LiaBD&H+g=4h3EWmnfel76{XW``hiw~&50#b}pG=HBPJkhVP1 zAoi^h_fUS%LqyW4Y2Qrv(zGxJWV$KpRapBdpuz4B@X_O&7q4kEvcZgg@7(9_Fhk8a zu(((jF7y}Xk|(=J%eMIPn{ceZ9DbW%Rl*y}pKw${WP69uF&J$&^*>H(m?bsMZB7>` zR~hHx*55ew#xwPo+Pe~Cl&yc`3=Ulg*=mANq7jd4vru`==2pTn?S6k7;%+_Trx8KQ zQ!_uM{&|~ipXl1eFJD&zKA3aEh%{LGF%LlXj$Nbl-EiVrwb8&CE6;3<) zrw>R4dx0Uv{(ClcSH7D2jf=wz5hK#Oigm7-2B={5#53JDZE6xp#XH+ApaJyYZT6OU zeUoV61ak(kwdZ@CiC>bBhw840ao(-B!%F}Q_(otjbB&BtriefH%g#3fidrcVly(7< zKnPF#aamiQLz~wg7)Le9vn?G`Gt=kK{|&7WJV&-V2gDI956>OlW7(`3Lg%39K#S48 z&}JWOx>zqAwY%Cw+3MN%Y?B3HtYCg(MV9lopx}gk=6QBu#`0K(_f@smvKtXgy;kkm?`Lk+a`&?k2XpFC$=tYmFxvU!8-P{|%rVqkenq0p-io!CKyP zH1Izl&#VN@_=e*u0WbYVi;Z~DwT{+t{jH#Ue$_p*i_Kfg2^PZ&L8IhYP$<|&ufD5*Mr2T+8F=hp#MA2>C^vjz@q=BP-p^9 z8=0R04zfwngFq`&+x~#T>HY$uDHMY5mD${oq}N&#mA85HkkoMeA>x)qh?!pXsO`Hu z#ERCDIm-mp$C&R%!#(E_5}@@HQmRn#*?!#RkN&6WARz$RRG*d#K9mydn+FRE^-j&) zT)Dt24pbni7t|V`J-#J(5f6+PBxWvc8=g9Y!V=7X#YEIN#6rg zO%!sre&-*_m|HUUf3WwSVNG=J zzpsjd3W9|qf?%OZ6%>&g6s3q%rI$z(kP?dY5)c6q0jZ%^X;MP(5T*C3bP`$!NC_lB z2qc*kpXc|#&N+MU>zuvM%X95_bIsh7%&fJNnYHfE_j^C*_3Ie=bFp(0QcA<_ddjlz z3!Fw(YOTO6+Z=s@OC;RnPNBt!mA`7iz=Ep6>gG=P3U5d9sSIvvGVz6FR^;_ZFch?ji@gz6swh(w?b6O5%h)!b``I62<;EL^p18mXey_-cTcPb zL^)+pKl)g;?J|+<=UdsE{@bwaAv)?m zzt~Ta9D{cO)+1vd#>R6WS)V>-dBw2#fxRu&%DJSxxhnnrq#BwYLy7s7r$dbDwt3AB zU0fRb{I2MO+Hqp}hfX>JXeXzaXktD==($^fF~48M#+YOjyZZnb(qiJXwl;E)$#^r^ zMUp)-x%K~od}Bucm#+1H@|OO0*#Ebo@W0#bf4AHJ^Pk24XoNVfdGI5>Ev`OH3=~Uf zqEXi~O71jyzjT>%#DI62xcc!~p-s*k%qi<4p?t~XqTc@j6%Je0$^h`?6eH=4t!o4I zVlttuG(JADk7NYMP5MI@#f1NSxFe=|RVDD2?x)%X;$e@+uA8vaIK>dtU2KASc|h{1 zjDv}$sHa5&N8wHel(*HGgiBaN>@Qs?QrE{Z_F@3(&sFXjhs26Q9QB*NKzj}loYeHK zEr3;NDf-K@#`xo9hG0&V{71SNjSTpLxrNiSZ=W<*-`Gl>xRIW4IkeN>`S%GaXZ$y@ z3rT)&UHZ$uYr8|P!<}KQhGr%`KScQCFR*QUA5E(?j96Fr_iY}|z~~&Uvwk?tA0M-x zQ2z#F-1`-;CB+|}WAIADzW)U5Eb=$d@8k>O_}Yh>`$mXj_vy`4n=Q&;DE-r{i9I+* zM?UaZiN(@;4?a1~^(l(;?(&V%2aw1E|LDh||Tf3qBU-ZRlQA#73i^--}RE93thb`(hl#%C)7C7IRTbR*n3B^_D7;zQHS(oey% z3kL+->lL!rwSFDjQu+&}V zvbj7GGW}oddeWXG)BG(eqw^If{)%;2)*QDiL8qH265gBTdMW$UgY&O52rW4+kgo!# z`&#gDX#@$A0;omCsiUvMej{WOVYiE~QmnEXYw}P}@?WSeL0|uTX=cZxcLT)XBUpr0 zePQ-;4X>}|$5z#)Vxt@B0%L=gc+`vfIX23+N;g`_suycJpyeT^dCnp?F7sUfzr*nQ zJd`0Rl&*6VQCUp_V}J15K&YhDSL|t7iv^9Y{C0Zq;&YhkYYI9K&}kztU=wXYn<+MM zd2{VabHD8$N*7dr{hy%x;>*U+VN6(>^VKt1z#oTFbA2-#mZzzg5`vy-{I}l<{7d+k zgg*W+mR~A?a8BopZb~5PkD`wYgw$~fJ!w;~3teSe`K3Q;npY|Tb5~FMYHaiYv!)-NpFd@6RVGn?7*K8^#mR18@9uBluHG!wfBLafcOiW_aZw&pk3Mc5L`vhe zwt=bvF);*X8!}+*P^!a)`-%{7D6LFQm#kkfDcd;KU<1mw6}K3F z#{Nvz_6j%naKUz@isHLTEn4Cr{fr{aMw@WKS!lAq>JnO>Nl!7ifY0Qn0K}B$jqHDf z48Xl*=o$iKqCknzN;*Md1JS%EJ9=wFY5YAO?}jm3P1m~r*sDeta5xLsBi*Q0RWVRs zjT9Xt-O@y~pbjNk0pNhN1h2K4+{;FyOY7EBABv8U^SUtZW;1()f;5Pz)PS6ZQSOR) zK&@o|_Y$1xI8{ke+-1sH=DcRKaa7JDNbli`u%HoG{#SnSI57R%oc5VGCszlL>=xHc zb`c!TM>In{q;0O(Vhj7_l}W=rFIO(1O@4(neXj}H`d7QL^Y6)C=%Xn!XzTpSE}0Ix zX6QFmYm8gn^7bOgL%rT^3%BaHE&f;FCU#hOgnMe7|9N@Ezk?fnFeP{7dWJpZY+hfI z(&|V9?T>#|8d3=(MJ(VNOMm6=Y_^?K$)+wq7$O(g>= zuuSbX_dYLcR~<_Cv)MF6!TTT~e{wPHJHeK-_ux9lbLjYQQCSZMEx7CXy_Wai<%gl$ zIb-@fs%1)_&3KZX2F&q?B2323CqsS4yr1tp`}M?aPg*#Zr!=^4k7MSY{(XVS7dmX4 zw}uj=xQJ`G2(3&3v3$|H_&`5I5ho>VN{m42S;n@a2Kv1aC*)Kh#jSVK+|0ZQ)n;ZU z-u8HNX|agbBW4x)qqvaJeHUq}K-{?2S4OQSGnI+n?O6A(h+vPUY9!1)8QbwLGey(q z?9lFcO>Y#VtAxrc9yP5`6Osz$d~DNG`p57d%zGiPZJ6VLo1TI18q+QNl!@T?TVq%0 zdFgzV&;OT1Wc5JBSGdsHaA^;AF|^YCrAkn;w;|osq2@1uf{6qRa_2qo;s%aN-4+g} zZY!ZHl*~;I=e!{Hva&lZLI?2!2c2cxJKs!Y`gpNWs1H^=Qg!M9aw$SfnA;!7^_)%m zWK7OjCvA^1{IdGE{;$R)xF%b9Jfh<2Hnt|;HbMSy7S@mdCi802+%^N30SOFcjh-s< zfcukfuBYO>KgLBy=Tu_vEll^QFr2YMDtlZn`C|Ev;mTWSC>LzbPz^k#wStS!nxCDk z03qA<4xVjt>2*B*K6MhU6_9G^AN69!3&ob$vECL^lc)gwcvbqwhjz{!<~}ks%t>6MaBgiH@gw5%jyHe%?f21tWmd7 z#K}H1g@6F7Yd|yK5dPKFGz`=rMu%}!$w@;^^%~7L?Y1qJN2HD%w&jG#a?iRm;fIQAu-7@JY8mLxrC560K#z?Wb$t zxZuG{IMY{>6(*-w1U5YkL1)7Pp^Jqn4$2nC*7ctRJq(5@H)J*Utp^$yFW32 z;!2$rnz+c6$?D8iW?fEjH`~KQATOKJE~_&HmkVMg7&NWY1{5Ec+v$m{%-wpkaXV=K zsV}z+Uh-^~@XK+-noq2JPNrcK@Ax~o#esU!=KOTU$T;w00-o8hz%O#7=Tm*Y<9Xgi z^VEcVmBXbB)fP6jQs1{#gZ4MVzfP2RxEn2)w*4W(RY?nv0)1y}kqS1y8%thDmL-K< zKCLT9Q~zL)Ix6ie*9y{{lb%15ZZqEK&jc)ei)0{3tNi^Oi2p0%eni&o(wS+87`CLl zF^?I-9j>aJw~xM9PWcV*ubad2Or0U3tm8HMZM8_xvR@FN@DAS}AmXpcye)#U9Nv73 z=}2m{!ODXTqrF#hN_+Xxbcv5UoW8(~-XGlFUART@l-+t=-uCB)0~9NJEN?SaxMOKM zJ=}ELy?m7jQE}S%12(goItfrgG;baL;!i_P4fsaRYL8qS`LWI5L&kn3(oHRurvtn_ z6CakxB1_5m6Z`o;-X!_`uT!JT{Yc9h&lCPcu+AKMFB~xbGPhIy%hP8Ck2-*7>4`S% zK3%FBo}6g&d(k)&u%q!+_&_PB;ZL-WkOXBRqbG|+9)S98&JCyl+aLj}ROs-+USy1j zth_y4AMlU>iYeS%Pj#@9%SG5PLz2KpNH197q=T_o?4-!Wa#lHw&$f%6VfV@b&pGT% zD>Pi7t**-|CXlVRzOW{napB6IPQdPmrP=@T0+5XiTMT(dlH%Mez@=OROx(J+p>TPE zfC^4dzKa;%^xP=Wf8(WqfJ%-$rkptoTklOZtLI)*9WIP!_548l6=?1t&Z+5*4maiK z3{xBD7e%ybH4c|hsBqVxax^K`rc3pcrW=ei%JMJ(d!70#!K( zCo6=yP*i+HG_wbcHa00xufJaA@gO+89Hsee@ z|29=NO5Jz8@${XASIcu8@wLIKS1Fws0Aveo^S9fe+t^z&eV6LrW!EvGLOZXzar>63 zaO2SMeHC-xRf%Vdg`w^gpvGE+Yl}#>fll zawj<3wt2K%i|FbB|JbIf;v)~aO$entJz%);{j*d6hOkE9j;Go#e#jyz7((`ZIvK z{R*;y0nmR8RJJtWnT_X-h7VK{=5B!hYzS?5oaL}jzDCV-o{HVMNA-z!>hQ|J0}I$d zgF0f#Hh(k%B7@t-Bwa^*?!$xDy4e3cZhGJ`Rb! z@=or9(K3A{D8jscHteqe?#wDvu7U=A{4c8BK`a+^>&Dg3`%S!W)~_3XnWwgH|28PSciWp>hE#}XRfv5&dFRW#;u>fUtN`R@$}VZ< zVDcX+q!Fpa`;b^=3*(w)sQgos~;13UcTup_XLAj z*BW$UEf_J8l*$+loT5&DWxJ;{bg;Pc+f~mpQ>q1}V=E5l#3+y^@9nNYnq}enx;|`} zv1QeO+lH4aRPDqo7CxRvpLIM4 zy*9Zr>o+1la3vgZdlekto2W7Uj+Czs)kYGLR{a}=8RnU4Og>CLWQYC`?juEcT26W} z!)$>xaAC}Z{LGt1>}BBb12-~}1g`1XL2I;(9;~pLOT_xm(ejsNn>wAH$)q))`)P z!*YK9DLi&wIZE4(iMt02^_r*!-FsZ;4l~JlU{{goO!bZgkOWv#>tTRvq~Z5w<#+E#-_-4 zHdw?|7EmX<^%PP##{J@#i14EoQg0zu#L1+`gPnfpNAW^#Z=mBM#jc3PKq0;pSKd30 zu(hK0xWAA-BiB}yhqqlUfbLc1J*v%X9l5VEx)a*L5tHk4%+tul>425@wx|kxMYD%p zKN=VS%x)`;^vJ37mvNwX4UNC9h4#%^QCfx45eGCiViE%HY5AT|duiqb^_%&{C7$&u@ zbe1+0>tM;e=e0YEG zEPjIX{r<)+>UQmaza3El&3|-RhwYoY@;`AbVe|-hM`#1Qn1mn=Y|L3-lZ! zx*nSR>Toua>+*7xnqZWKaqI+V%`FyTle*Hui~;&Sp@tk$RRiJF^tJ3fwg9WE;43PS zpy9)Ao{PDns?%!!-o}4_yHqm;uk3TM5|5vu(1O}l@r{cwX|9}&`}5h^3>?&c z8n$yw4T)B``ONHVdD1`IT(?l;{dVF@O20*$|15LwfTCrVqijaxmzTNQcfH>qNba*b zwo>R(N|2X0OZ;}_?ejdp`+7XZMP9ali!AVW?AT+n;$^hC-D-w}4hj3`U43%v=DmNK z5Pts`{A(fTQ|{Gt=t)7CuWyk+m}_dtqNWISP?QM#6Qf1hGuJZ~PxVSIJ_eQa*nJsC_~WHb^|~UVZRQv|%+$%_y1k5V7&0N%#C2>1Eyc zyN~}@+Vy{??f?HBL-Y7pBSW6v%>F?Z0WM7Z3(`s}{N2e~31 z6WKfEn}-nqVXp1_*6ye6&Di*gTeO3}grmc*F^oSbvFd!^aY-tn$B}&GLOD!|e(qH1 z&q6ZM;>We~I60zZg;#wFw2gW7ByYd&k+S;{{mqEapzy$V?Q<7sKT2f`@^+l~EBBZq z{+N&UUC8PU`SL62*Y~26zXeTaOk|&q{4MNNc7zL~4am5C4E8=H%8wo%%nnYJftt-P z!8?9Y-Z7b(cI zL#jWz*xbc z*YIPxy*(02@^fndM&T;~Pv{}Mv)nHhDN4|uF_ANB`9&$;@7w)&IkapiD%in~uN*UY zVM&?@Rg-xHZb%wCRHn`_u+Zfx^ZGN2Urpy%j5DtN?Of)||90+lF5;X9RIxMQBf|<* z@mK%slc?l^h!{S(VwtGF;%jp8vQ>0KaM;zx-DI!v5AVrNIaMJr;tWJ2Pq>rlnDOrxQ@e0%?>wksNj;`RbWO_%xk{d7Fsj^{yEb_8?wxY^!3%#S*#9L5pMTqT zq`)S+7auQQ{Li=u2B}g0(O73tHP#V#gIDACy|+mA7H;fibGysYO+6QX&~6AF)>)hy z*%F`L7r(!PvLJ#LFMWhW?yIQwluvT=15#qmjV+rdSb;*Yn{VsxW7epctk0`mOq7ef z$QrdtR1{C>P;H8F$E5M}4RHQyIQ=n4+;F)TTmH_Zq_YXhXVdwWIilk>+T2~g?BMVW zrq46xXPN>aHQv6Ohat0|KaON=uE7s*h~O?(?&>8alsAva>X^az*sK$j!ahg5$|yv_ zT+4&9O9v7z%xJ42?S|VXg@Z^-=*pM?$C-B}BK^mkWuR054keqQ$HC@LJvM&NyhTph z#A`fSTdb)~{xTv>AmOU%c*6r+h!t)Tkp}#`w84=I3=`e6jv5z z7_S_131XSe0C@gRTNhfk^A|$Np=Hs751^*Qzzh*>UVK)tqW z(c!j~BGNL3Ex~;R1UG#@#*4OKcAwCY5=?Wfah-9bE~3Z~%nOqurhoyke50vP7t8XB zBjxuBmjPGf(%t^rDN1yCA5(uZ3J8?Oq~rUWHp@&N&F;q9^xUi$3dg37+!UybBOj`|*3w`;CZIaGwK0<>lt%~4@~oWl_8y5F(J{R5SrF)v z+dYobXbx+u+O3%wDXJ6|PJ2~KWZoQO-lQx*dUM_;-oV3PX8OZ2o270@BB&6cb_qTW z3{!DZb&8%{P>z``a#Y43{vnU@7xzr!KKQUmx zFWD!BeE;AeKC0-ZlW8Vt)vW*f{v^^cigGGYWqVB>OnE1rc06ZeV5(n^*r9FOVG`Wb z1vw0x zyEjKL_yKjO5rv=*7G;|-*fcvH7_hMhLl(jls7Cb&lB9eP*lQtt)`x%rpkShL<8r@H zAAAK^;gZTK$A^VNh1@pSZ4 z_Gz*BnWcz1jX~gC+z330Uaa+Fpx5r_s>xKUq2JsWTqAo_BF6+IYXKD<;;EfsN7zs&JzC{_KL!_MtGAr3kJ0H4S&bC)3g z^QMHk3Bo`qoB1;<)Z4pOSzH&&K|Sx116qa>xPev2)U07%9801lu z$pC1s5t(M6e$qdsnM4HQ)B}wIsWVP=4s`F$AFs_M1oGe>IyT1p<8A|QRK}I5iAN*J zIgm$1P{r5kJJ0d^SHiiK4eSd1R}vsFZpu3UIAzBJu4j`O*j_i6*fX4mnl89#rIE@F zSvm9__Q8yClHF6ipnTh(Fs5yYY41Fh#7o@7^WQ@qa3*p9YJ8qNkwr?_c{Ln=2<>7q zNW8?g=aPC?N|0mxF>YPtkyuGtUFyxq7_0+Xa!3q()DE*RmyYSK7$|}aCmc+k(5JX( z*3Fe$1e(r_faHcuv+)b-|<9MH#zLjRQ#G-bbg^*I~(bsO}? zK`}PNxSUM(u-PoD7sPWl>&<*vBS}sfwoXd&k*@eMI7f~oxpr5Sh0sOJhK)avqkXUf zcLY`jZ^wV_UDNgr>v{8*O3V^TLr@CW@TfG!qFlxKew?D^L-zlH<;Y)r)``$EuRE%F zOFT`HOLbGTg;HgQw3IlgW;GPewLr@7_)%u0~M@d1FB2mk;akJ5cm*egJiRig_^q#Sa%b1hzqkLm+f`de;elZN-BP{8^U^&PesB(}O;KY~Q3j1Zft39N&hGR}1U zLGTjshVT7aVBj#{cvi=~^<;}sg}dn+l)pxGo>nq9qvp4`J!d?(Yo`+L#`wAuNkCw1 zn2~-+!SdR867IW}mN{%O)$}P#7Y&%-v6*_E<`< zIUKY+f_iK2gYrY_aP=a>BU*Gh6@W4Tcj3VQ;VwquAOaW61gI0SBLRt`(ZkPkX~BE7$xx9$KH`&!*eBoLuM+;g;A zjbWEh+@fw^Ser~VD?L0{iqC-*TC-R+`==xk#$5!qdZ}Wwtu-Jb1o*ax+v|9w`JUA< z96I}00rp!%{M*H-G-Rws_CQ1V+&0N0{DN;bb`IT3yjOr%;=43n9Nm?QiesE&j;0g4q|xRh7u z5cDuk16Gkh#5cE6vOC4+$)OpMFCl~}Q*tz1kQT{yg!Vs{L#yM-h z9eVTEf%|vONfS7go37q7j@26d^j;{~%atjhw}Vl{Z-;p159pV~y0IB>wEEA_nWJJy7BADv;#x2}Hf3z%CU(3LX zG6DF-3wQQ~YF|6BmFejjZFp)k2526Q?%~4uTCUfhL>}fH&l0mvt{H#%=5+fYgdk$d zGsC$Fc-ktu4$G}H+eArDCb-RwPZ{DRpHpJ;>nHRJ_w0ZWJGo`6+^C6yi##H?BD+2Pt@ZagLBaOzsGfc@8|OEVzTkV$ z06-urF$UwBa`b6L=Q74U>*T4=00?kTLo^+wf6A~Er#L}~(3*xxF}HUb)!W4SY>*Pl z@dt7Fh(xDzvQ^7Jl%kS|&3FN=;VTrC)evT{O93G`$-`a=bn??f6QaEkyVzq9NC4f? zP!_*X*8674!EQczo6?Ty;|4E8vgIoe115$3^_(Q?v$X|S+Vi46R*JMj;x&(z^1>VM z+U-u7z!LLNUXIcf8tzG@;=&K4>i%sk@5#1_Ir&e>22=j*nVH37_1fAqzm({PBhjR} zTsvrsVH@>5wO9VK*lJV;urmTx5IMzt2kIa4wu=Z%-YZOiwK<;t{r&W00ffo2 z@j>qF_b9XJn!Upu0>qJkFxG=dx8^vF4O{a4caoaj6NJ?zvoWv={PJ5?ax?|K`XDum3IIa z_ATEptDetara)ha@e9sTE$O*5%O-wh-fs7ec*OiRr7tMal>N$*^_`r1v5gj{bq{vPLjXDdpK0(t?RK(95{ zzY&r}dOLATkYS&Ie;*uhvYZuUs$616f*s&uZqOjQesEwhI3-)EA!kG|kNn^wQ%cC8 zmuwGeXGu!{?$hq@_&QmOqsLajji>EOvRqrvMSNTiggP+=_k!LcS5`@D<22V4fL#yg z>Q2xWbqyv>c>j0dfnpZ%OX1juX|4WHTw3CRh#tMDeF6!Q$i)gQxC8oJ&z6q2p3}z{ zW**BA02Xkl(+mXBx=n&=LXujyfgpH>4hD>B_KE)*xAC!`b!3BaOBhYMF`9gGwk_)w|-oy!@}(KVtP=zY}B#vn!$K zJnoanGe?T+U?(VW#H5md0Rh-f>dG;qBz<=bI)Ptf0sE?3)*X5=h&1ipi9}_de2_>`4@9{q#@FQ3PQ)v9GeKP< zTK1Mk03k5h*vMtH#+^xYy!`Tn}wZwtcrNq8mdt&?W2pj^TA_h65#YbT)PK9S2f`5CZMpCOG6lqJl&544F%XPX`fQDj!?G(L|I{pU=8Smdlx6w<9J zl)C6Pb^$X|c^UQY?%Ypm4v8@^w5}0MTK<3=mr-ySoWUbHu4NfUo3vDvHhr#=#5cjN z2i>vlZxfUM%F0|90*-nGi0B<1v2iyKp;v%6;-^#Cz5m-HJW7MeCT&Q--Ap z>9|si9I6}aNpaaIcB2$dcq6C}qgs;ti7cU%yO)SxMxC(Yz}PWm3`5m8Y_-z;(CZqbZ85ZD_5)tNu6U!8`T{<#qo~;QVW07$F7wyi z>|3oNL1$Q-f{KdX-+K}Nz5A0+L~cb*KiPett0~$Z#`FNv8K*S)s zJxeiQwstqOJN$vKfhQkDjJU!*lrF86NXz)Ctyl@m_1X7{}C@W%=G_}$=f zHU12N?T<_>Ibuq|JBal7a2HH9h2j(w!@#GR`u;Vg5uw%DBelPujTiGLd}TFLm=M|e z@cjB+Ez^QZqr-MfcivJ>V&#*N?x7)MfZsVK4497ycKI3l_V`ysg>TDzZE^_ti0;*5 z!E$f>#YV}3SK*#-=Z_*(Z~9QmjQ8%yIhZn^oCK{%ycxaxfDSPh{RoieE6jBJ&JObA zNl5o(Py?GahV9E*#jLi)asn=7FeCk7qt?jBS`53RGXKZO6+9^Vjr&HX$_?o9CJFYC zOG3<}v!<%k=r}!V$uyew)3`eK>yKQY&zM1l2l~0)hcEo@7yO#c$2&ZX*dh&-Bep0j zUb;J(M!(q61Of$-FBo$#Mf`r4q|0HfkTs?QjLRRMrGHQ^k{}+K!BZ_H{YF$9obpy%8USR>cSV4@b3LD9SWqz zA(Yrz-ZMlt?;FWwm+x9bX=61@r^l^N5TTSl{!B+M;i(Z*j~%^oSh*R9G37H$j64N{ z0Yt94CrUv6yG7UCd} z2ngmWd-q~-QS_B+zfe`O^`-teXHoLAYxW=a15RI+o9C8Z*7;Ui+6J61Z@Q=?o)QdI z|2d2Zu1WrQH6r{Zv1Gz2J5-SDzUUl#5#H(&p7bq?!`ZRET_T!Qt3O6S-&3j5?JUvc z;vP}!2E3()Av_&HGuW~z&_)Y(5tUCna0U@o;gSl#ez5EnLD(>Hu`_Or;Z?>c$k8<}5hn>7BHVa!hcwnk9-fofmkrA4;=oH>XlW&9JZoEvckFvX~Sj)`JqmPJAqEy{*7fcC8us z!Y;|MLzlV4mJsA8`m|YQa-ro9f_OX zE;ntQop^rk!h_5o6jR{|ierx+Eu)iW)?R|o`{|W9Uh+8ai&Xnpz4gmW9ajwphrmAw zS3Yn+jf0(hXilu|Ghv3)AY3193UT17XRi28X2Azv8pK$-c(P?E*q!`?(%|0{!VlgG z2l9ka2o=csQssSVAQ2ei&`(2UoU*yH&YFjdA%yXL7}sjKaZ^9KLCfqRV85F2@`8@r zjph&R4{TG$R&q!`pb~A-&;VdHEYj&x8dwF@nP?Y%@3i`%lKrvr(|*J_%sfbxh}e0z zl}O)Ua2%a1xjB`lUuJI~ad{MUGGDEMRL6gE(4eo;!hyfJJb6Huv`4yshpWac>L&Ay zdB9$?)oF!K}>-D4x+a(Fz8fg}xQC@DB?JPi8cK?se zXIo<)-GeqUlF4fPDe``Y#Dl_{?3_9u{S|~_TD76HcU7$Kz@CD%GS%~CP$hjtgA7C^SC3z! zQyw>HQA+=`KM5^!6$7FPu;*k7d?rPzR2fa+5KE9p?4O&+v*ixIngy_RJjGcVD#eNR zfM!|U2fIB=hNO4jJfYZ&_sR0*&va5YZya9PQzGJNzV8q`_x9!Uo^Y#yt8Sc}@f1l0 z9MR`j#1?rJ=mWIR+>i}=D>*zK{$`7_zaJzEw3B(7DHUqg-hhjgnvs}uWM!=L{Ts5` z)Sw;E<(Hno(w1t9)(ipcNwy(y*Si)d-`nI3%jGETW-Ky_sy-1r8@~XRmepOGfXqW> z9rM^SI~tz4RHgYp?fiUleGM#XF22#-Ew#8=De_@~_DYhUHB2>__u^HfVqP2jQx-Z& z6i7TG`Ilu-4_j(7=i6qH(00uSNdO~JY___0FwlW@y(dC#m}M{KjA-=q)t{NSFTQ9` z?pA8MIwf;Pg{AvZ)djDkTbac3E=~adhE!8!MB_@6tm+^wDyPQi9&^H3uG1D{v8w<9 zu@bFTm~4HsN?sXD1sT+8B1-2Ec)(q4Gs-& z^vFf6(w>uy%oO>K)xLO@S<-PD#04`a@nCmJg%`dL6G`VMbTvK|(7%zA(J*&G!~3{! zDJZi_fC1)Nm`?JO=GmUigAtEbV#fGQ;>VsgFt21pUez8gP*PEvs(KNLgUnZ)p6^|V zIw_~z^5S*+K~7`36nLHRJdii8kYq3Zgy6!EfZz;@+pbWi=%=S0ECM{h1YWtMB86bt(?;3a@4;t)$gejK z{oa5$Ptl16UUPh*cdx@rWPU^Ep5k!=2m>NwmWT@)H_`^)Quilm*^K; zPo>-ZJiKzr+7}o}CNj^jHTs-!d{KI9(g;Q8o8o{o3stz8=cK z=`zA5EI)>@&2jt#jJ{biC1T|BnGW(fCQHz#ya~wGwVWqPIV~T@uXrQ?;puTth%wA3 zix+NQBqEAkZo5nDXwO~X?P!(vjST&A!litaMVNPNws?yjI>@?yA#!Y;KMc+QVs zd)VK3vI-yUKN~As-~FuS**OGwr3+1E%N9IE|EWNN-rPGqiU4zOMIPw%g#M4u z?M8ljgeK6vBQq(SSqgle{o~TwS%LdLJ)(pacm<)d{QQ}HFGMK${EKH{6&m+OtIlRW zMZZgi(S5t1;H~9$&2aMpk9L=(E=&AJSMnZm(PA0~X<4G(3Y~^E8d#RUip}4QaoV13 zd%lx9p~3~xhE64TP(CGvNe#qJGokN znS^uP-DbtuRJvovpx-3(5s*k;V_3eHx17Senyqjdz<#OT1v#@EQ60rswMC=jvN+)-0gVr7qZM*P78Y zsB>wP@I&mX$B{sR4M)@ay2mn<^p&zVH~mEp!EI0`u47JT1pFtLdrPryUeZ#JKohV_ zQ>F72!@kq%d&ILLg^(iZ3=Y_qeBiRH?Rowu zZlE=B>iYdrOPv1Jr6p=LIBxVn5^H_4fobdrXPA(EpyJ+WcDOuHI)LgRnQetICJCup z=fCbBKh=IG16q1HsrhX{!WngDo+@3fzdLh<1DYxKDzrt-cSU3W-LL-7{!&skZe;&N zHa%o1J%Z2RW9%6^)qf69Csu!FS1U|STNLgc`a542|8w4}e-Bfr580}d5rA-*b!alz zPn6PYv&NgM9KE%6MPvex6nI_6D zQ}1k7pIahA(*sY+Dms5gBx+ucou_8Ul_nVQh~KJXE~kK zM4=Z`BU`_?y$GLF>+OA}m1jjG-d(#=Q8Y*RWDy$V3K3|X>)hOMDX$TmL(bde*A+WH5d2m zVd7RjX|C}FhAF3iyp{R!p@@FoGd9fQK{uRu^RyK379_ne@BivbLi6R(LSfJe$^F-l zi0jdx24G?i-b%MlztKw zDP%5g*5pA~p07e2d$&ILVB~9E>F-gtIKD6avWNE|nQ~VG6zz?E7g*W8aY?w)FCMw} zGk?ir>K5DC@eAuJmp2(7ypF@&N(}Gx>AG=uIqTw@=glVN{jt^Mb0ZOneDpwYGjHKY z@`e11lQM@OB=>aD6PgB-ymRKy02RZ|1;eveLj^Yhb+#u){(1_m$(S|K`vG(exC(cR z)6Et)F~vdh0mchk$XYgoh#8_6HCeqVTHjt29+v0a?0Y4EwKQ00O8+sZ{&51BMc{|B zf&#OZM@!iX&kGp5CH|^Nh2Pq;I>pZ1k9aQJYOq&)WpNVny@U45x>m|VR%c^%KhAL? zodT`hXaoCk%Um7$2vud@%DHN6tvfaxZcm?IyDIAqQZVGCDbX)Zc=c8Y*;rf#8N&(0V z(msNW>1L?!7En$}oOoo1W|8;T)dffv)b;#xPf85OC4=&M{r$)B1#%Y3%bJgTfH!2k z!qjNurc>?aGUEiO@mODtf}>YYPuuu3-NVg&AN@S@3i+P4OHx-x;vME1#n5Ndo@A%rSYfY~IMNjcS8}joYS8GYJyKcs7Bvg8ISvae45EstE3# z^R=I)=a$PV?2gYL4enK1i!;LvjJnWuWwI~H1^!q=BA&RzW%!cTs`FjKLJu9IYQTr% zr>Bw6nB$XZT^(I^eC&U2A9pu4Uhc1y^TDeIIGN1Ldu_4$0m20Xze?a5Io$oID{eZB z2?dgG7Hkp=ct#Oe8cqAuEe>JKi}uv`0IO5Un~H>R%9u0R?1sniNEqWaH%(YVH|+wm4k1`@4-iO#4H`UXaJS$N13?CYyITnE!QB}& zxV!7%ZiCM3{C4lXyZ@^Fv{kq6m-}g|p6==C?y2|DbIuz=A>hL>{F%=_nxEY$ggFAI z(7UQa*M7O-{Fievj`kDGextJCaM%mwQLVq>v(qHK<^0UV;{(}G-TRAt{M@v5C94DS z4gZ!e`=!jgfmzkLbg_7Re3s)e5nY=r-X3Dq48*4ZxyNbT;V-@C*(Ktpx;pZi+RG^g z?9+mcOj)mBg_bFPny{sZeNt8O;NxhBiKt?mpmjuj4Ek2O0qBxbsSmrwd_Oxw$|UKJ z3gY08{cKsHytOdKl)z{HD`}; zjB$XRA=nS!{QGy|A>N{gOce^4r^yV3z2XE9x}!JyYsdiq&4ENsM&FLR1#uE{K)d z(31`~$Cr%9WK^~;8(cvPelpr(|0w=loz_-NSGyDoH*AbZ^Sp|b zGtKw)Q>^wK`KlQkOapqH+6|raWr1Up45uTnW={xB@TALUM@toIYU&cwZ@&5RgUsx# zR>*u`^@L_shMaU$WEL|e?&KH&#(kyGM`^Ou`F0G#n>?B7Y$k)mPIgxePS=x9su{tu zpBy{)XeOC}Btcgi?R%*b$2y*u%7AxjThVAV4lh$|5t`vwPpIeMNwz6E9Fci8Jjh0F z)rD#ZC8!np4$oE&L!j&!uGKDC)dBmeC((n1N6Ab6G-Y2o6egNe^X3dxpO$p1ARYjR z-8&UJ=L-cmvevxM_`!nxsm_S1XRWfbfK+tX>tu?Klw24Te0$HoKvw;nUpttc62qVS z`YZWoa^SVNj@inWD1P<6(ce64jkJni|D5&6@VQfdB+U9e(E_*gcFouB*8SQ5Y-|p zp!vi*L5~JP?jKH98dL7j*>rrhC|*oRdvp{`GsE z+74l#MyMxi^Gw@{cmG0Xv3EWd2Ck#X)lskER}BbldAjp(%v~TwgMD8<@%_B<{5kyf zGe^YSx`=m}MV+pqF2OHmtoyBi7p0EY2H82)AK1|4ZOMXG+>E|;P*wNU?RF%$N$=&G z$yii7E`2!fH?DdrxCC3n&_A%h%ZEpQC(7$ z5snmSDj?U)wC<(rUaIh@x*w4o!DsI7l%#oB}146`Y?paW(3NVURe&9 zW-5MRVPwonZ=Kjiy8ko{t}+LzKPPN6*rOg6y<#;cea5+lGqW&VlX0+}u)>93SDmb4 zV$-6&7~*$bA(MtAeS#Qm&DUCPFHTM0?mklbSB+--S#s&t(yQd_TJV{EPD15c#qHk> z8OvalN%0nf{*;G*Tcv3-3+*_Llud!ZbLG=dB<1`W*&NBPa`Ce{R)n3?*ZQ6XztzQ{ zc~IBi5~BV3(clqV~#6?4HwB`J8h~+ zBDmIi#mvM`G5!IgQg7qoI(zg}UXpnK|E&c8dcxpR2lH+CT~q0U{cOMX>Z(5g@apv% zn+I+-*Q6Yt&vW`2@H!a$xj0$R0;Q!}CyCEUJ~^(lneN>KL;8O(H`me!3{{W66}5~K zV7GYka7+!H*EU-EfD`ipZpHHep?miMCB6i|hj)id+bT$WWj7ZtL$f`Z-K?D#xJLEo z(5X8M7umPf{E0C1l+gb|?EI<1y;2f4ANxB8XQLzxQv?lK+I+4qC?}=dHUEevuiu*U zglGu!Fm3!kr`YjNlGj;N_)JsuEhm_+d_pA~*h$h(kwVp4+-w=HRb%=^y;+QI)!kDh ziEts}dvKhF3B$pPVSz_1P_fZ&1<&6Kz7o>n$+lb#DAhWM1mkqQ3Lz@ia`!L8KZ39+bsdw@D}!{5^JfT*s2SHm zgmTh=?|>-!s<;8CB8hKkL*Ol@V_?PsMs=FTV1R1r0?{oeo_}RPA<0w#Ag$DYF^o!(8&57P%r+bDj`+^Oq*`r1;8A1D2!AFI z73U~qoiM{{3?ZW6)ll>ZVApCYhxiShjwjo`CbOS$aU0-byU2TxNsx{p=IwVPyTDyA z!iUa1j8T}1XLj<{2$)es(ySxPIMFw=7F{K7!Lpi}otD&cjrL=fJ!$l9so#dh63~7< zz9Vq}<@#tGbFr`K7kQknj3TCp2?C3I^Y$xB`AlQ&&tKs1UHum91n;DMYVo(s!a@gX-T29*8!iQsTXy1Zj#{LX2!3sWiZ=k2>WmiJt!H4N2VxEfx%ihlR; zoR|_IZPyS7OU^FQ3Pywu!PxCUM=gaYi}>_{md|k~DGdkh8{q@Go6~PAO{5je>F$(< zWG#o!$BUg6$3$^c_z<|zNosplqUZFF(R31SVMnfDTUU%NR$O1?ZSe^VoQo_=ZWd6h zk^#=~CrI{p$p?JDUDfN^KgA-xHP1@T43;b91Ne4#=WryTf}c%iZ`)nQIavHzL{wwZ z%+QDRky8vorS`?p_u;i$tRDJki_U0j3fm9aZsQfb$`gX1IO$}a53}c>4HU?u`7I{L zMi`-ufVfynU`T}@iO+l9AVnE?UP81+y+N?LY|#E6-;||hKgOItvCgoDZ{*&=H0Z&i z8{x5J7`R8=EQh;BrH`k3sLs|7dZkCJyF^-NX?^#hPv=Dq?2vWo5k=WAzK=7?_`jim zdnB+|XYjG#^ZdhGp7P}Z#XkkTY|pM$JKo8{R~@?K;2Lx4mKWf1!K)&T(uA5z=6+C@ z@6qkUWfn@~AZBTC&Sx-$RQKkstFprQ25thA@^gvRa9@qXhl0;{#+0w$vojT_dJmYR z>ngUsq++!<{T{wBzdxZB4o?d0A-nn^qz`HjydECmt~;&y#r&o!GhCFtxpEPk;%Gpx zu0=beMv=_}dy!~t8k(20Sc_2T6Q+aAQYs3c=Nwl|vF1|3p}* zu^0GvOx9gbME){Gv>RuK5_1^@icQmJv006|Z%&@C|8tpZZucv5mhiDsZKehZ>_r?A zA$U6rOXJD2!vDBD;@2}vKdD?8e$JHbEAZWd^ zWv?ak{w+eSVRHQ%Y4ZcT`bX1!9BM#`qo7aCC1zGbLamBsyf<^;m%Q70#_`NJzk%)Q zq)~NhD-Rea=tp`wa^%p4JQ#M1tJv{e?;sn5<~vQ2<(L2L)r5x_`aRGHy$#*6=3tV9 z$$`l@G)TME`UN|Ai>wpB7CB~T9i-kx3xB{Ji5+mBSgxn=Ag6_|7_0J|-shvx|63c= z>oL7jGHQUn#P>7A-*pY=@syAI1+fEG-e+0K{4@H&c-8*nujQCck;IaVi(~F|Q_z%3 zJ#MwmE4$d0^aiBj2&!?XOyyHz!{kUGKp1 zS$2Qv33!tg6q1Q2iN2a?;985R@N_Tqf@T7}EEzd2o-aNK9PX%y`RG|aI66;=?7`0F ze+I+}R)1CP$G*NZZVgDamj$~Ed50FnfcEiDl?L!s6yF~NaJ@=^@h=$QDkScs?NOp$ zXy0v^s*w62+dfFu0KJEMr4kPuf?<1POB$w(p?`W;L1u{JL%g+eTu!%1% z)wukykv8gAB^0$;us8NNN9N?kijXK1B~BF2;dMb-MXoe#8u#(Is3U$je%`+yd6dyp zXDZ21ZL}S2e~8@8;)MX#V3MhIO(pLktrjV-i7=yD01COhX)&ELz3e&@2KM=rj;=TO4s3$m@_U!j|L|auKprY6#!fAsAf7!yW z=Q&GLk^w8V;aNg{-~Lj72?4Es2=)}#B~!neNoMC@DppKw4A({veKh+jMPpG-bGENB z+Q4)eyJa{UE9EcssD4p{WY8;*e2^TNyi*Xq6D$gS&5kKf`lq5fbgET0>b~g)$%C$d zTY}Cch^#d2B*qyHC3N4Mr3EOqWFOdQx&WtUarnJH^R`kGTsPS2{KYOF^Y3KBm527y zcMiROf1I|%Q5rZ~?&@`nCKy%5pXf(YsyGbHiz}PAUFv*>Bx>}xsT$ox#)dwc-oIJ3 z1xA;ubrd;TY_kp+iVf>Et@q@aLV!Vi`a-JGe1U;Y5bPA`*RT`W}HJ1iOSfbh7#@E&t+f*Llv~S%XBkys z8|xrJ6`ogDM1T5s0SYuf_}pxEVmx?1q~v}c^84pgF3R`%G3K>2rDVO8k0ndYo0x=% zIXK{JHqq{fUA}mtpe^tyJPkL0^f&(e<5r3*&0boZv5ZWXz~Qh#P#{+8&zuoKYQqa! zT5MJHZ_EL5C={Q-%suUJEPMLu}1+BkO9l!LhTbn}D}v+{vdq=NgJvF>@n zHOVcWIA9hV%d0HRL-m0jel+(tE&dyiTo9sD_Y1O4X$X_+XOKHvs?*Z{*MUAF*BQkZ z4UkPsV2sRg&X@U}s0{0vjJ-9ISEFffvq9GHE7d>lb%`N$dtLJw1J^RtvldjL+s@%w z@AN_~pLNk=|I+LNsT>T{9tV>-og0g`0OL4|LZBNUnuy>hr>-Zz*MrWl_KjR%O?FeG zt01FKD>!ZvB<$uvH&3_TNb}M_Hocn0(lq@JkIZT@!AxVGrFharEd1@9xz|jXXmCeny3bp$+)Ym%Y`m8RPBk!X|X-vnfPBi_v(LE6b@C zHDH}ytHQVXF%iR=Hwf?-A|Ltw_efk(n9nwZ6;0Y&Y`n)nJilmcl$6CS2!k3pUty1d z|8sD=`G>3#vKz@NRpl?v9f9RGTfJRTWdeeMBfj3v0|3}9{SZ0wOLuqRm2xA|JRGxJ zxT>ut%)YllvZ=?dW=JKJG*Ly-CTF^o?o3Q!0Caq(4JcbcnHz_51(+Hg14Mt8r+@VN z4T5WwCas$$+}8-f;}+cWUW)s72jk7U*?$Ih?`6iR8)qs!PR!i8#ESL+E_LqiDl7Ir zH`C8Ps`*6W>neJ5;FBhH?ZaHz&eIAC)9wF^wHfPe1iyGq=FmB*4!)#!R|3}HJy1v1 zrED2vkmyo_^RDcZQR(vO9(VPs8my`Fvh~Z&!+w#=t!|18ZKcum-_=sNLdptdIfP)T z_n3$i?s~0~P#tLmG48Pz7-jXE-&Uf(fV2}wt5V)*IBrWMhQ{|fUa|)I`6^4e5-iI_ zs{dMkMzVTS6E>jokotTv&_K+_q|*n*-;k`aOgE7oG;+Fl!+sHKwd8s8Vw6;*5)qG- zK}k>JTyQpG_k`EPohUO5n)JskbR13Q(xQmi3G{>0bxtdPf}zY~Vom%JSR&@u)KE|} zZ7e>yJClqYVq~e-INH#`bUFHALHxc|)6mO4*?Ob2EEND_SWml>QdN=8bG;m2QDL_EUXON`1YCB1&f=hNR=hAZeQf(Ty_bR)>6<47(>8Bf%0AOzRNOhHh z(`&P7l}n`7kOnvg-!?1%hRS=p1?a?*Ybb_1gOMVK*sXZdu~L4qyS>D|jXEe^0co>q z0;cY2%*}4=ZYlvvZ&c{M3~;;-W4%_UMW9pExh5LpGz2~7shDqsk`C;KA*{u-V@EG# zIZ1t)MsTLbhjE0Gcj@Q8rU@yAc@X@$xS!!16X#)}*XdM>>2cmAAVN;BLoPq0=sZwG zvFKHANNf<}rlLtmZAJW79#+!C9!~w1yP*EvHK3e=+#}5iysO<`@*0=&lQ>lY&i3SzKPN$Z8LnjmV;qnIT~1s#Y^sh zSwf{gfAa&79GGSAeav}HxQgQ7=URr~-2i`~bJfY(sC<2)=@JQ#b^_7Mb8x3W{O4bI zAe7SBT=Np@>D@Ahov%gV*VCvu+1YoJe;w( z#Q$u}^lI91bF2%s$FLVa#>ScDC5MyB+Dv&m{aA_2st8 z5$*iGRi1#LN=(R$tA)XaIS0Ovf0yxB#oWsT86Iw(s8~uk-;*98rvtFV?_SqhLMqjO zS-g~YQw0a1fL(vE+^s#D7=#d1rxlp0?HeVtO;IJ1(*OM1Benp5n(C4s-zx_xXtOc# zPgdkMyGVCG-cpW)ypH~A_Ba1&(yyOtTfuW`O2w} zypLcXbpkckB7dlXS>+1)jfQ>1?T^&2V;SX<42k`~R|J3%Wc?P!1&a7tJ#N!u*LwCR zr*=oU#CP@Lv&$!Qm@w_EFW6SHA6U5dL#mtgTPK_P5sABi-86>)+-sm<*a!pn?PLn&j;3}C?4f95c-SrI9&M)* z{lln0+fkN2_1e#})x#{&f8f0nRTD_TlFt^&eSScWf1w?IPs(P#G1HkJj;NZls6|aR zqF`;_7g=9HhK84a4`(5wD~_kaN2$LUU8u`5_$GwaIhw%%wo}g4^|&C=bTT{Ck49ex z^!T)4g5Hb={8LHD{YG<(WPc+@iL1^E#<#t9jj0VMVabyhaKa7}IxoR(nqLsAr0d^= za<|W>RdqfN|8bpPnuhMg3gJ0~+ivCK$qOmT4raTmm3y*&d_JD7sW?QlJ$DPRo0>a3 zt^%Ju;6tfeWgLQIO=W+*AN9o+HC`lSg%^(b!@4H`p9(Jw8zh=;bgEQxf9(ykbCYFR z@q-?%i+e}9BptK^kYLQp)cEYqPlhC9hi<7=oVlfi6TR%{erAPelM_G#eunynW z@aU?&@~oGFqr5s7%H z0fQ&}cs&8{_2WEY}X45uLE`SXx3n$^`f7||0@3wLe!9D370>@U~K9bJmezNYOoCwB5GFwhFZnVZ8T5E+?RohgqT zG~g}oq}D>Q#|wm4iSpA2Um7gp#DM3}Zc~?KBEELd(lnoUq>0NP5qPw2Q#EFbb818n zSb7>JYxe+uI0x)F)EQWc>RWIb*3OOt{k4NbbK5p+PYV*iMa<1cCFWQ{&i)45@W|2j z2(>x;d-=5D?QTZGMJ*4fArA0>?X!2Q*Yys`v-x>NNUKpg81^p~uOf%yU^3NCTP96c z6P{vl1YzOvR-baZT)5Ta*dzt~%sXvG=Ei;=JkXKusr`62j zP8r$HkQ|Jqzl4e@jATK#`vxcd&|B957EQ(fz9N`VPlcVK-^PNDkjmXxdf31o+}fdCgSVWB_0TLr%z2MHKSaB9qG z#vZ7{wuebVdnNE`b(`!1`E>*z+dNK#-{+dx!B+x>n3UZr`L7MU^H1M37d-1V*iZv) znR{w)z`trAELk#7@uGNd3bWNqmV3$H7h8yNd@nu0>aCaCQL<)1muknB)f3qUZM2=I z{cc!_DV#Wvy#m=oY-T#mQpw>8&&`5}6`=EGPz&tWQSDcf_Bya|qm@&iu6J7-F)Lt2 zWE~GO3uWz9$#&J6bHnqd(~WN$DYO*{!#?1-HmO;#%o$7yG1)7$IFaeHsy5f0c2qxC zpyL>GV@zfXpkHy2unacP^&K8~fAB|43fiiQv*E>iqKwr+E)4`!)-Ukq*lZK=7Rt&f zvfukPS=x~SFOVl7)Z=UdY^EyA@?IhEazW^Q19Z(~sh({31v1h?v$xr4U@J0iGlGyZ zZk;Bnur=m6`&kwoOHICL9!W~TRZH03UywiH3>jb=qOH^i2-2?6!kx7$@m8Y2%{3tq zChJ>x)(jGQ5?_InYOyQ*ZY$7(NlzRemoTY^{oI#Sp`;G~A8$dH6$x;k8% zn#}io+ub18w%@uACGL~8v~;`k&ZzF=Ob#}YzuiysO!NQ2gA2F#1HHtXCO>`KfI-OOUo_aRvIV~Uv=>x_*$EXfW2;$as1^IR{-tJ zVqABT=GY1^R=5cA`@G%zI?h)ya5r41RRGX6p_ybjm5*p3y7u-yiahxUed}clbM>L5 z-~D*@W9;i++j!`E8_(oQ1{| z4^$D&gsv#cXkR=xdJII9T2esl^@WkB{pP>mWfz7sFrufS^clse3`atj8y~vsR4gPz z;26KkTL62t6x2Sw9!+X9^h&G0;O5^H$oEvfI0*RN;D%qnWnC(Ai1s2zt|o0ZhVb7% zwO_|pzx-Zvc7tArMA_2m*j?*9#_QK!#hP!aFFi2h@U_8ApR%*570nTJ8({I8SnU&c zk?hYQr!yy%mN-(@HaOydViaO#yawXIopvNhdxs*6Gj;}EG@Mbu@Q0I~fL(%Ve}1Oc z!YT^MKZma;muYz1n@C>fm59g7za!VELqfMbAuvT3XOd-eXdi^stC&gO)u6Lz@1QMO zpjyRgta`?eSm*wWI5ERfss8t>dIV#VwzT>3ed>%vG|AEcY1fZ|0(PzKz9aN>+EOJ% z4)P6$U(=jM4=Gz|U*IDEb_Hw-^Arv^5Al547269L;I2;l;Uu?r?>^g4?7bRhO zM@G>h~_2n+vlOtmTL+X z@r)eX?BD7~)F;@)>^ctJ$R8z~g4<_401R3%x~acieRv+c@xDE!X;Uv{MP%YiJHyEZ zC24=K`x*2KZteM3dA?j-%7~aD_(+da#rqvaOKU*5I>M(BSH8~W*lk{>3zqI_|FXAy zT}ngqEBPY$Csm zWj51~0A-ILgA4Fv=VHq9c#Q{cqgL`zJ1_L_=7Ns^~pYikLo`59GcEhk& zSm4Gaum9j&+{kvp`e0-TKHHUSJK0u}M{4sLt3q;*vwGgJ72b|JlC@JUj&79|^Ezy| zoP8lhLlGtZrekF+EUyAHXszuoUC3#mU>1lQ8Zks#vC{pUV4lY+Y*T?7mal_iyP+=8 zlu@}pGcnGOkNegxJOO1mySLk~oAO%dry8;QcU>vW0O-tyq^*6VpB@6LbvY2S7@{%~ z-%Xumw%mA*#`yRXM{&hKOoSntt0np|qE$Oj+TNQ73K1D+k$?*mvp`E+l*r zK*hZ5@bQ)>tkXw42)Xgvwl{p7UlJRv_ddI?~mB!@+`Fovh?|?uY6>sBx*`Nz8Bi?~5 zCj<|}R_i`Jnr?tkW_$LtLi7fYIZRJVr)%Al;WU2t1T;Rx(UYs`yH;ESp|)S)7Mx^el>jsoHex0)*8)68_p~-g~?YrvME(+Yl!}m!65mH^+F>8 z_}MAv)XX)!E2#a-Y9#7Z=A9#b@H?iZoPhSHRC)DPUqSnj=28z@Z4vlXd;N)5i=)oU zn=2_Xu;1h1c_o#U3d_-Q-De+N;fgXIIuR3|7OO2nI4u7;6pxx=SVb$^iqJ|)FZaqm zpu|Go&Dc&YgqH;jcBl@cH~I8Ri4u3>932b3nip4 z5#B5h8CzX4p#FMs);-Xd)doQ?lzBcB=$iX+MIqflg-3;M$jesii~t%zY%h2(EgSSe zfz+6276Uj#doqzDR(=1$VjPJeU=C=Xz&w5xpm#_d_t~61J5PZ=r+5~gSCP&Xude@kK=Q`?YqlnZB}lw zqj=gHhE|_(v(SJ7P(F#Ebqwh7p5@+DA-IIc%FyiMET10Yj?%Cj`WwG)nyOodXcHWb0cBw$ps$^qttaBK%-Serzv$S8Jl-lr# z6k^Ktyv0R5B1yzkeyZfri+jV4#VgLsX(I**=Baq|W^(W29ZStGS|SFLPlmfLx+OL5 zpB7Pw%bih9YLP$wABKS(<7gt|RNNeKHRl~^uHxXg?6t9+V%=|eP{Xc=h z{}=x^e**!347vVjvX(!jHsyn&jfg1p2dT{t2{$2vlH27A=d5{AXm3C8W3i z|Be2?_(neu)Njn=fhu-4vAc&JwIW0R;r#mg%iNr&H2AH|SU~}Yyk(z;Y%Rm^Qxen1 zZ@F(tw39}|O5@DslAx?gR`22!D9mthayadT*BXV{BAJx~pZv7Vd}J3{sG&O(RZ&Qc-V=+*>}NPj+eH?HY}CiQLVin06ZPh% z{Xyw32(hV7d&lp?QkxQ5U|PHI0v*aU?xVpWvzhrJ3UP+t4*=4Nvw!1#`YlWx#ZZR| zR9;_dE}eV7l*_}O0&u+A9h0ort~Q7Qe$-IJ?(-Pq$b$cAx%*wFvO%hXtWsq{)V%ea zSYP^#PbECLicDtwPPhe*WppibJ+YZ%%SD_xKc<}k(`P9-z|-hOC(}o2N*NiI1FPG< zALr9nQA$JeQbhS}A8^lk-ZHSd(Gkg=$G%|T1d%76KF5h1V#3`T=0@PA`IAI!H7gLb zMN*?6nt#99b&7tmQ2Oq$G==`~im$z*kmauX}iDow!p*LR>0ZLwb#C$DwFatqwp1*h3n&N&;38Z zMlatvTa}VJ`gGJM#Yd5!TXBzGx>R+27`oVVL4_&|6y=!~BfDaH&Vz7v?rtU4)rQ)l zW?(@>9ir*c(XXm8bRekyXkHt`f`sA8fk6NFcV2DKM7;c~_3NQtA}vYNZ}=(V6h8n! z9Af;w_^?%-_J=J(ZoY;zG5;$b5zH*ipM+hSEK=o+Ug_~FK1{nxA-q|$DAIxo)T*)T zTQtp=b91mZAPo0$JD3m%!Sh<2{kH`o-xZZ#z5M$A9cUPyoSG6L94=#1e|5A7G0dnD z;UyY~6+qy+-N`kn&8lg}HesXkac~;$9++aXv1ek0np7*X48OEO^1w^$J9U(kG+k)>E2%CG z9@aP2%ufX6go!u3?T-D7f9Fc<&so1_3G2VYIk)hH26gA{k*|XuAp1rL4TW_=pNvkz zyWv*cz&223|88b=8rpa$jh_{p1TuLgc+h4#CQ;ya`R`njeJIzPrGd_PzXW(U>f=@~r7cA%Vv_+++n+$3>%;De zBy_a~(nKf7<89MMdM$)qr_}*|I8)}Gvm5)j!EOc__-ETU^8IvGc&vdZm=hIhaP^PY z$<3JMNf!sqs*j7j&EvM{#OM2uV<-V`D(%Rttdwmdox~r z>bO3Z7JJ!%*5RV7h6`UiJo6FCD`?*Rh12VASC^Ktflb)h_xq0u&X6Nkv)G<2Wk)nY zSd&(nLkNByuSz}5MYsOG12nw6z(6LO#w3YTUH6gxKR5n&N150e^3y^zlWqA=pK1J& z{NR|Kw&MCW2NM*5q=U|?jSIZV`gC4sIn>jVmI{`MVQd+l0Kn~RX8#`RpRG_xpkL+3 zzJ8O0kB`XQrB52ra{?lP*B;!O)*U{<$(Jz9^d zazR__zb8LSeEZ+Kd3Cy>1eWvvobf(2p%&ry|ED3?M#ET@m%d0Mfl+n6_+zfK6c?k# ze=%Usm2~Bqe^m@-P{?aXcsBIT7m9rTZ&B`N8@7+5v( zQ6dv4ct{dB!mmUNUq#cqu(JLagL=tVbVT4WWn<7Gg!sx6y_MNjQ~NK5VwGs@{}Se6 z1`GdBCYFfBkp< z;u=mRQ4tM7QL-Fs3bPOn-}oCa3+VQ?tWy7btH*`Uc!X}2?%fImK4QYcl$~#?wo>c* z74L0uR6ZnRks9LyO9dzIvMT;uV5WRfWG~p4JB?zc!d65TSo>;$_IM~Dd^WU!g@bV< zBqiZVE2D;TfJLxqzGi0NFgrWKE=xpl`cq2`-!ucI2EGk${xBp^(@#_^vpcUhi;*f^;IxWnYwu;w{hMB7Ax3FQDh3HqO^-(3x zShiN~^3K_InYxg#24T;pwVfddsY*W=|Cs&U*jW4M=t0J=M;06~A~Vmb{AT# zx@JS#e2L+@*<~~qv7nhwV`72ak(o?sElyCs*O1zQ9xU>aea}Ykf!iQVh^gvdu8x>S zA}GK@rhiz2y17QeXJky;smX}b6``Ixn0h~A=sCp!IKW!GY1nz`7(j!5ROmvG(LXfeZVcCkNw3}}N|3R_cC3G`f=s=z@ zu%_w7I!BsvO}T}uswa?=_O}!k!*>qIpx`Ty{DvfRKnyCz-vkib$I5&ON!^e-ckC(Bj%%HP4cl%fx{+oEfRhZK`Q+n|8fb*D?-giTmH8pBM(+$0JjGN| z80X21#7t;;T_faJYWWdD1J2rtM~~ckDWr)zBz7gwWwEhLNg$j-bT7@`ES=@a(~IM{ z-fas75xCC|2}%Dy;vAu`Z_ZaR$y(gEX+9#C_^i51h7#p`+Qt{Mo_O;eHIodJO-!!X zopG@>c)`ShN4!L^6y4j#ULk*G<;;L1tC%slz zy-R}f<2jPGJbHQd~g^(;4NZFEYg9nSEdAUIp;0DZUk zrO;T@X>>WQv3|&Rd zp#Iu%P>POLrHUfP74wwi)^LN0>ea8PMDWk_YATD<*-dZeSJ^mk8lremzeov+XTRw; z7aZ(f_%E(7I~lMi;Q@Onq*Xv+r|{E7uO5m1=urT(%$$q(Fvqv-zQzE=WppMYa;86%eVlYr+anTWMeK4KZZdM=Q@d`L_|&H!$7?zFR5p6Av=dh=L$9XH zM7*E&hKDL*=i89N$m2uPQQ(~AGlp*2v+a%g5As5aV$gIr7|~)bR(0qF{e->T%)~aE z)v|SFVM=fL|4}81G#uBYRqA`?lwTJMaxlr@ zfo5kq%Dfa-YoJV5ny?Op_j(;6SGZ9VKQ<_1yABc(y9mdX`pDTMN&O z>e{Ca{eTbv`;qP4h(L*v@xuNPZ9OYYgWLv@oI8r_Wg_HU?Dl5}n_W7VbrjhQ8KCdf zTlFn!h9@5e4#PTbOK$Y|L)o=-6$KCH5dqmi=sQn3LQV0Gv|nBjrq|>eLBbq5?)0!w zLq)W;;W${pjl0dJ0t9zETO41XdP_hc z{r)^n19lrD7nxTmK#Ef-siCM#`Y(#y;jhM?%=vPJ zH6H-8=i(<5^%$=Q9M6@8$BYoqVo z`rzO47<)J!+|j`>wv8n>O=?cb(T*3$4}8MHgpl6@_W!t!4wH=hnX4=WJI_!SQQR3| zWP;M?v&Z2Gb;eD}G}Wts6phgf6}(qmnh_~W&z=Kk@OGM&z1VTM!hdL0BZ)GPGMc5M zwsWKoz(3FZx*K|riNA5>;ax^>o8dKiMmF0Ye(=H_&{2?cbs7gFFZ?W&Mlm;|<_G$* zqxEKTPyB7XJLIU&a(?vizWMJPZ1b+s;%pzwzw_R>ecCOn#w*Cb9phzZ3X~3lpF|MJe~i&+HPbAKIL# zl5~^x5%E+{+)KUtqV$a?x$=K^nxaNa5V2$TO={fozxvKypMT68(*#)&rEdI|$#)9N z=@Xs7l$_7^kaMGl3>wIzYszO`Bf=o$!BV@LlRBL(Z^hd(t7J>mZpk}YpHu{B_ta%Qdec!u4jxadY6y@Dj zf7$)ZeMd5UVQ=vhdf;$o?U#za{jk(6S@fy1)(Gs-8$XcEFXr*_%&APbfsmKM^QJ2c z8_?c{eIvJ9$%H%h3PINV6kPY=toxm_lAvbJs+FT)y(5vTgc+bKueM9+%+{}7NuP~r zF4?wLrU1U~4H8hwXy7&S@%nG(DrPx~(zLI{D75L^QZ?t}ornGoDHI81PN*GX_E zxFkq$cMoA8xVyW%J2T9=lYHOz*1Nk^d++YnR^7Vw`)8`AyH7t)Kc`RkWBqBV9b|O- zj=-LNlEf^H(RMlBx0rBH9?5)@ElHE(A`%6MABZCazdn3dk*tdrF8vZvc*&ov@KU>b zYdHz*8|ZcGnHp(lExZS(hs1S9a}eoWfL@7~S(i66YfSInta=CZdVqgYAiW#Shucs$ zvjVIjtOjQ>jNmd_gJ);CO$r?b0!O>qn+i?u(YkFl<(L3e*m}h;;rLARuV)bC@jtmF z+r?;Jgqq`kw`CMx0bx*qfD10GfIU&-I>4$~wfJ;8rN$knvSCQm;gQqR^Mf^6wzoA! zRqEc)IgH;tqqdq_w@-GHd;1{C_EUXmr*Y8;RcFrO%WF`RcS30$|lB!_vhSwq_0T0sDNI>$$kOoNnJ5#i1UMYoMFB9KKAlMU3~^t_6%Pp=H!b z%LHRnzQdzaK!Q-o$2ait~Sfo3b>s#rJdOVbkdN6o{4*@{psI&wO&`N*|?I+I^oQI*%=WZ-W*4k zDTn*Oo|qfYRI`Sr=}>p(EGCsQYm^;w%(xY`rgEmit)~55_>kFdsR3;jj?H*eQCKp- zFA&sju4dm@xJ%eA=hQ6i>1r9A(Yxm>y&VM@ylW{Zh#~WU#HDe_wj!A@xO-hl2=CYe zgF%i5;MEW?9141k?qo3&!eP1>L2|Vyv9$mLPlHY=$<9I=p>7#7FuVOcmo2UaN0Uz= z!b0XLtc%d>^K$%>=Scsy`4A`rb5_cC1NKqVv+j6e@E8=3uLIct5_N171(+bwFi1+$ z`wy|F80aBX#mP04m<|BspfY%nMU(Mp16|(YM&nLVj?_+ORe%0XN$1aev?U=K5YLtQ zejhjIJ&}W*36Yx}?e0E|X94h9wp%W)SQ4%x5eAymPn)Q zfF=ef-LKPlR3}VId;jd}CuYto6NvOd6F)&a6~j8< z6vmNMh*OozEv}UO_z3($@$b^+u))RGsn$g5rS`u;voGP!rvJtQ_}^eS|1XEoKl9U( z^%3%emQqa;=cwqhFFpA!uA>PI{!W=gB@@~iwO8>*Jux>Lv~WrM z+W&?B+SR;!avahCP^fZ*v-d-X!}PZnMUQ2ojmZOrqd5+7aE{?qK5mvtKRSnHdh36t)MHo|JMc~S74b4C7WmWx33 zb2c(YNR&0WA;225GxJ&EgYL`S?X8sCbHda``^6Bp9zZ4oJuPrWrC4V=YD;Ay316nC z_EW#+>p?i=o!ql;1fy z<$OVR7*edxDtB$0=0mjCPOy4qGar*Qd^4Ddg1xp>($t!?)V))GJVAi|qu5&S#F^5( z#$s`--UN>?s4p8$Nk#DTqIzqwe)$r{zA5IqlU2vNFy(SMrd;A6$U3dUSo!!Kd5eu0 z13Pm(`+nwWh#UMcuah)fWF%B|M-u|Rjx-nXYI=zV(f z+ogCml9wx0%g%s+>48|a<0WZBO~BZL^ADSn+8VkJDU8pGxhXQoO-%6vta?q?mUV&} z@1u_{>Y$CIJO(-y!@nA!)r2h7AMU)|Y`N*b+xvA-KhncGZ}mi3c~)?xYi_44LDbx{;yT_Lpb=2>6W1K4iyRo_nHAlwU>kpCe1C9qZl?sG8&v8jjs!%1 z2CkGA`tA%sI7glQ*8Z2oLjcD*{fhn!<7bM*TW~4Uf~F`(5?ef@#;CXBi{P~fu~QwV z*$Zh=i4RJLg0X|iRE@PhTnFQIMTT+bR_Xur^-n?L`E4~-)=5fRmpklpVYo2pVf2Me zj;H6`u;*Or3nkw`_kG$hyN}2f%}cgiVZ+FC?Y}&vQie6T69Z`)$Me;DQAirvFsvkX9G^>hiNT-cFT5Za&td-BN!uL z=%6oIqPvR30=P$+{dfYjBie5CSHii-tHDs^^cI56CFe_7ospjCTon9@<>mIhaLSK@ z{$3Eb<0S$zp#EWa@7u9@|2mQvXymh#bw9^vAT@7A;fI(IAvMRw3HTaC9*#U$d#7u`GU6YOhl+?3aRg&6dEh=p zXEtEnY{%iwK$o8g8G>4s|0JF2d)bQT>qX<+uX#`qRc_|RewYwM|5*{_|2x{|p$pa3 zS6rn;;mZwjP8>>*doEP8Ks`4MnW2@?n{+Ir1zClA`LxrGH=B^f#(C&gV_*}Cm$0R2+GYo>UgK45|oPmV-0Bo?9Au-y3Gz*~)|LMFVu z-@t+R2x|SE;kIn>7y=pTINhD)TtUn>eWHyDLLQ3v=5d#@J#b)_-YH21)N@Mf8nZIQ|7K!pwu~9e{OdkG`WUYxb8h2UUmyKIAy#yLp8VTta5hClEjk1 zj8;O(=otN3o;^wiMm6Bs-G5eNYFV(EanXfgmFtS8mOA2vzV~8|39L#sjgY)vgBD*ZIr6~N$`$<8_4dmW2xa0H#b=lKcbf!|fSMT&+dAVC4UFAz8y zWy!{Pm}_%iSB}|*AbfLhSq8ep7d`GbIf@)Z6mo?p_nChU)jj)W-Q1*l;=byESOWD?(8C`%;t^qx{>3pt`yR5oOh3b zZ@mg{XYX6+1xLp@iw`D@V`q(Bmxq$96Ezk_mh-M3i!}0G z3G}$`-~M9Fk{HgWz;TgfJ%VpvDYWXsY}dq!bQ`{svi&6IwyUHn4XGrd%yW(QG_E{2 zmx;x|YoYZO8Cdq%AHi|bX~Cdx6MUzU=~qfJZnj2NfLGx;^D`@ zaL?10^NGXs5e%Ae3NpImA_c7qY!CCh6!>(1EW|0Jq?DZlJkGaQ@7RBP-KVMrcU!F@ zZ{G>w_`SToIjK-bww5u_T$0eS=mNhAD&fkg&@{{5Vqvr*ZV#6woh@6|aHAQ3{p7p2qIXfIhKRcdYNA`Xn5&KU3 z%(Jn>nEbb&w(f$mf_AL}L((*1lbAt&m3x6S2Ysowq)RPWM5p4tr+9x=gT_fnoOSfLHv@Jp*1aSxrXn|;70)w>{;kBvIpUO|+b#BLb z(De-2XHCo1P9%u#Mrowq7W6jN|0Q!p6rNdx+-ZRP*U#3|A9G611?yZk(bPM0Xv*`P z-zDYwBwf0dj(_9iab9-0%J-Rmwk#gt05P7+)Qb*ql`C%#G0_rK!Tl{=JcW^aHuoXv z4_$0-#w{e>g0d@S^QVMT<>p9MFex0yid>oR`q$dx{?!bgah*6q``rx5O1jzrl8Ex+2GCp|z_u!gl@f6xdnB5iu1I%&ud#eN zDUTTTLb5tk%@U5T$n0E2xhzhyIFa>NPNO5L z<+0AFmvuZ;VwNk;vqF`1#{=aMx?n%V=W|d+URP^dT%w;}xV5sITK~YsXY8s&$pkZvE6*C^GGg?kK++mzW%H&a?`>e2 zxg1}*%!E;@+U3Vb9fS|`Tk2kgyBPMVC*5Vhhe4%ps5`PLDpBboLHotu&Boy49N8m$ z40$EvDsX8es#OL)L_Xk8f2|MnqlPV+RCmI<;@1ml2BBQQ7@U*y&%=0+-T8!z3*~+= zld4AL$Ns61Js%!;5$)NbK>UAT<4*#P z)@gy{K>*5s65F`u1w13#6w-RavGxaa8s|j4ibi@3F;jy2WM;peaeuQkBnq;^W%=@mvAytb|0Z-r8==*+L6ApwX29CGG`! z0U+=usf*I@8omxbyB6EfL^QThi*N^blICVWy>oXFNnPMnhy~)c zc~2CVaAJqDu?29Sw1(~VKv?0Fr*jz}q6(Kkru6Ej_sH~f|$?(D=`|7(B+MMXvkmOeRpgi`hTd^P(+ z8a9HG#{-s6DTq$j1a&OX4`+!`vb3X!{%;En`;S69X!0Kf8%dYeTD$1BCF3F6lPS@# zWi99`hOl|tzWOEPi(k&(F(;`3z9INb7|!gZ-JIo!BXh#8eq;MQ4a=T!Nbih?{-UyDIh}kWxx+$Id@p^gy^Zv~h%ib&_8$mP0n_|5& zrQXz7pUaZV<}*JEv`qs(v<9HOS@CUJKZJVvE1$Qy zT=AK-6}Gedc#=2IHW}IXbwtd~U9K0Msj*LHuTGn1cb%G2_+gKrume9~a0DoCwn!kc z&Zq1y^YZx*q3Jp1YCD$P(~R@bJGxe|Cw%uSCZQ3_Y&Y$1eCbojZa2~u)!w>xqSG0( zg$W^Bve%TCMvXf5O+X&Z?mvGd zuwOd%J4cgk9gG~f_6-xi*Lxs8Oql}v?li-*d&?{YF;UIMk{Zq+6-KxnfsXL}q~uHm zToM~EezUpU4~F&XjxyX-4ZN+ABmRj)(u3u6{ZErHWD|IE@9TdxL2i8G_vjqd_xBo4 zzVF^r3qf+WvE`luJu5L6aN4prh#qllMM50*9FNuH-*v)Ua+aBXb1UGycf2Yb<4o%U>j zCq6S{bRK9(RuSV`6Hc~bEQ^kIx@ z*l0U6>PHUI1$72b_RNLPW4bcVMM)tB-x>0hgzz)}k<8q`Btr@r!nS}1{;xw2&eILX zra|IKzQy;T7rqE%oFIHmT=p6as!w31E{S0`QBU=Vk!<+PcLtBGdCN)cZwy|{w$;N# zY@__{u2AUFh2gs$xZ6!pYoR9k-IvgLo}MS|6ltLQZ^W1$7+GW|8?=blNJ;efOyIVd zEA_phd84NvHy)>5IQKBT`a1!OD#a@v(T|Xz`=xK`aym7$zduGYZ8F^8E&y3T8fvX_ z7mz|W!`#+8hB`80l&>A59#iQ4bvr31A*GF&2DV4xVtEE3MQVJ#)+eg%)cn$KlmAtj zbCZ3fJe~KC8Ow-hMMd5zDCRHr`23%7pc;wo2Yf(Y=!@k! zRMEu>YSl^nV$Obsbleff%Zd;8`7ghIU{)3Ed6@TBT5b)y=7*HrU2`(#ApC0_bbImucH9>i#b$_i;asxYEUttq zSQ|xI9+sx#`a`vzH*#;HU@&2YQiqL#iZSHbI?)xRDw^47;jrwh0S(YD(pgzdp`uAX z>pGD%jVmo;{Z|jSfD_~;4ZR3NLwn=L^2tLe2EQZd?|N>5*vya2&@s)6pRDBMD39L) zdm?0pGm;5Lsq<1k?jqv;to3nFn;e)T;aj;JhW%`3fb}@{0v?kN`m1b94TG<)4Ax9M z5xPag8A2Y+8V#PBW=nnCKRLt{R2P_J;iu2xL#sF9tXE!Jv;5nH^P>pCkM~ zD!mev+N6$A=NY&<&UNi-91kAqJ$2eI)!DO_W?yTG}iz+y+uEo zXj6AKSS$oypDmBv`mha`SfPsYmKrN9iQgONSF{yyU%|AtyLX8R$wEJ?yF29^M@ zKoiDQ;;Qzgm{}t`_Tg=O7vzr>mwOxh{wvZ81+^tUv1xnRAf5w@Zlv!QwQd6TP8i<% zw0v$w^4uqbxuNQbkQ%1^yRp4`f!x|CDQMp`tt1~oiHQ(V-;SX6LDxZ)bQGg7X5#1E z5!QmCoPmS+&Vc+iQZbRg20!w12JSOj02xEv*APvjlH?KzxLc?+Bg6GGNSId8+X9U; zwk#J$uFDPSF(|t;7g}M@N7epK{YU@Az0p* zp(jz7zuGc@S^op9l(ar;ed<`j>YWaARkc_^eYDB~ms3GcC6-VR>D@@|C`aE6*smB%tJ$$f zYYxh(`z0K-qplIHlSlIhpg$=WBVRHJfIdIqDW_8y$dHket;mT#i5$tMIIW96pP7-| z%^T&}rTW0WeW&ZF?5jcJZ9@X|<*4}n+8hhlJl?5^Q7b8ULe}62zSYpTjnhmzG@>Za zQG@B`)9|IV@ggkr?Hvv&M>iSfJTCRBe&;<#`q8E-c`M~D#GI>AQ!*dP(){N65ba4b z8=Sr1@@$&Vo!-&g!z_vUl{fW8fn+QY2aMme6F=dk>I&x^u}sW}rNa4`eXJbq{_{)R zfex4k*cMXyEkLVj0dMt!TVeCAil_|9q^(+4u@>TDmDg5m_Cz(t?W@T>CHFy%}6CM-2| zC%+oXTtPU1>(&RW6$io}?&eQ`-{Mbq5=ZMt)+vR8>KgaUGr~4DsF8;ig2`fM-Ka&R zhLmY;PrFl3Rb&)&o9v`WTl#GYEmuzKX|CL~ZOr&<&R5lbO0a^kt7K#!>^p@QTap46 zi2Of>9~(t{)Qe5Z64_lVW5y{>8G(OqgZad)Tv6r|N>7=ldL?n?Bq1f*=eZ>>%h3uqtE&=j ze}_ljjJ-!ZxjLtOeP4P385@vx-k+}@*#|?JlJHkareC0Hz7OhmAF>}}uOcs|=7>yV z3O|XaWTH)Cyz}mhjfpVrw7_{nLziG2P!6J}LDL9jFxF;K^)-dWy(1enNQ=rONYsi6 zl=a>?496&>jnjmo33GZsqtY9TV}G_0{RVN4kiJhUT<;QmjGQ0*AF5(rw?^YWVa;$U zmFs5{zR3NObNkol$?tX`cwS92z%zUnLa0l%Ba8Yk&#?2IF{kL~3|b4s?)}ow4(h!I zTE7;qum9*5lKG17I3L|;#r}!)5Thp3`bxQrp%3ATfJvpopp1yTo8oc$ZzK=lEMG>> zw5I!N4%S4GRsU?x`A*$KoFLiX-sddQ=52p_MFJvko-54tbk^G8#%WK8fWK5Zy^NW( znk{f_$_vPP$VWm`NK=ZTM1aK{Ovu$*5hZtpGeOu62z52k*0u|*Q|BNU+^1#n8O`Aj z5x@paamVg>N!%`)GTW3h#74v6iw$st_ci>hc>FW1Mpj}hf-?dGo>WWwu^99xlnXbc zH}to?xX3GE2}Vq6$0RLaz;qGk%Zek!#19K}>43Sb#<>U|iP_IIqT@Hif^m&Hv|*Odkuam`EMJWO#bo*s-XDzIXa#6AEg+)dnK7c7zKP1|D0C!$K7MP=W+KZ8#sbH zm449X0GvxJ+Dl@eW+`Ll>DHli`-IcwbD%p8l=l;G;)x^yksb~_eq)N`O59~Bkppv= zaNu-8zXos68kc<@mXncANggSUn9%}KqRBKrUQO#X;vLox`A#?lPUf_9UP89ar|$;7 zLj_S#61yPyEo1QRkx)B&+ZVe^eUU`W7waaCLARhtsJXgU7VxG|SwBfvdyW6a)*$FRRQIxyO5Hb6B<0zP<^__d<%%3}$z?e@WD*nT7=GPS^#=!P1 zIB{a_A@-r5AS7k6qGD)>t;pn_6ag6b(O-<9V~;R{4RkbW`tqf?L_!mt{Km>-2S|I` ztKQDnPS&Tyu-s~*^qdml&=pTwad7T|=sRcLI-OtfSp8x=teZ-o@V$MGMkyn%orH^J zr0Su`Vx9G3d0$P2(l%t9NOwi53e4?`_bVuTm3TfMO+4~T2*+2@wrr>W1QN%t;VbUtP_4(jkf+L(CeuWUzhh?3p~ z{V6;p>&+So_%j?U$?6C)EBhtDkH4=C|A}UIR}NAjtp?-(%hPa3(xE>z=hjRn0uQN? zF2wxpj0l|5e4XF5xhjOvTtTuzMS4*Rb^TUmtMv3l$RC zKbFNhkuQSRmkT#82X_Bo4^^b-VM6vHeYk=bGF z(257HFG0*fc=bv-^7zSY9|7WJ5J4Q&1z^{#`I>%r z@m7kgF>#yxK_Mxx(Z$JwcD|J;)0aKGFHqwU{u_EaLg0?sx(Z#Xix6#Y?Z7%aDD~lp zfme}MHQmjTK`{4s?$Ln;j$oH8|H0yWsg;ln0R)lZyiQq*sl07)T_>@2*cI|pV`+Bi zUDM3Gz5h_nhw-j1IKfK>_-8Pn>-)pM5JDF~ID7Xf%m{>Ae|)iBdYVnX^zL(S#ucc06rI9T^T^HXppKK< zx*~%4n~f8~YX?G{w)W6XdjLUZk~zz;T?Lf@47OIg0da67EcI%}MbffytnUXOG{^_U z-fFKmXWQgarNSmNBB?W4WHyKNEjg8m0F3ywFr#C?DPnB(qFrKBaPevT#Q1t zva^)v3-~7dMKi8fhxDKpd9Z32sEZNho_3A>c%oOs#M}&Ns0IOxJan6w{k=lgQsM<4 z%iVTFwOE*NaXP8cI5-E|Gct8qX4(16vFOhVUnAq0Hx8P0If>z9mBoiVTL3oHDd{rzg>PNm_aX zi%m8fb9MI_7K2+>A6ndl)rYw~bw1$tOEwijAFV@1e+OQS72A!*4GW6mbt&dFI&I4U z`;(=ztf{3^kR?y%ImhbA0@t)T@%7x)?q}L^FiM{@l4lJkj?R^S3Wq-AE?UQmhSYni zV));bUa)_;H?$F>gq-oExviQ|?Tr`zw2RYS!bL+yAl~6jlHhbc#_t%=lmm#YoW2(f zg)k&Cgvicp(}-U$`Q+Hk)9~1myP{2&>f~!`b%pK?grG+KwzOH9xA{r%(%}TrVx7~k zYYOkWV~^xMzup}kFT6e(4)`TeeCKQjokW{foyZSSihS^NZCL~m-Ip~|&Y;RF0Kdpb zu-BeK0RQMQA8bL)e2uZ5{qh`7mpwwZ_k6&z^S<6i`L_OVwUQiL0UJs5qk4@W)_f9b zW@7lVNM3IuhXF%&Lj`2lA(zQaA#LtX1LjR@sbsH;GwVvV$2zw&o@LKC$-zTK}8d%3k$)}doU0t{0%BP|l zgf^pN9>x5i&*wt8&k>NvUw?{;3KQ#4+^N#kWzYZo7Qhw+8exUNT3{!?xF{P{Sm#qi z5>H2&btFW5n}|5&7N2rVNRL&bFG6k@WC3?*#bbwe+%QHJEF|BBo}pFpq#k%_RK$(n z*H0)h_l9?=iQ!K^H9WrHLNqha*no`S4ojg#@z1si;u?({;NUMdthAAMeKO;3$JAmf zOXCyB^&|=sKIn%2ER}r}sG>XPnATL=%1e2oM1mKkC$WPmI}0lE>D6r)ns!~aliQQ( zhhLIi15d$%lI)P%w~=?IY7K~+NzrHe;wgGMLyYRi?32zB$^A#3SZTj6C57M*6E!~c zVl&$BRXGyPl?>+9(Bj4&DKSISPOmo`PPtC9M7Z|5DRV;?ezSb{q^!(TIGl5KU5+a- zf_+TdyP<^bZaX7d`#UD{sW4_5$1`N+lDc*a1GpXT{MU)IK~#lWxrm59mX(#4o2=XPhnvo+Yb(QOgkx~;|0?v!#I*m z=7Wis-iEQ=l*3SwYq`u@O@mRB%b*$`B7Je|C@w_UsYciDwLyqL-FK+J-6Wt+FU4ui zcnAE#2IVPdOY{rQ*kuTPEh%T72BxckY3QXqgA9o@|P)O=cfsO=LuX}->uRsmOa>s_(nvU>wKSda3^bsTis7fhixDI@QMGm@CC8ZNo)Jt zV+nn?&2kFhJn1Id{T>8gq4qSoASPLatx@QLecuorDf1r)1;mZrfU8cAy}Pr+qgUp+ z5FKPgth@luHl>6=NCof^G_Ixs@lD9CKI zeQvKJh?DgML~VnvzYGMaZ-IYhFtN0&pDO+?5XAGo$tR;vaLU=4xX}%IiSPR`ypOZ3 zo3cX~`>XiQkv+vLZ}`w12u$4HN})x;h%VyC?78j-Hs>xqDPgBrm9paB5gUm8 z%`I{|5Y_JR*xq!RWLnJEgE2?ZIpc)LLf64HCQ;RwmfC03Ov8^T=Q6FG!xjF}D>q zi9?@bmXew7cBRAkH3mcMP0k?^8=g1Im;36+x^iIOQvz;XR}xYOZrrbp?d5A?i@CUQ zplM$o<;pp zDc~aqp8C!re`=S>HryEve`Q!g=PzrGIJF6o$w@|wMoWn| z)$2i#79*85r9H!71NHG1N;c)ea7mU(1U+nM8Uh7g-*vY~KXo|#`n1p@E)V2^lb8oA zK6T2Hf2W(FAc!dNv(X=I50w_a>EtWV0_(y!^M-TKDSYECr;Fr&y3L0sYUQ;|;3W_t zf%mh{jg|TTu?kZa(2TC>f%*`J4=FH-sCPQo!xoTIizMV<(w&GP$KW6U+EL$yW7Dr~}EX7Py8;Z$NUH=cZdRxv#JrVJ3uZ(C&8B)9+ z^i}x(R{W&wL3eXQNeQV^rElsD?cKXrw%P293X|b%C%4r~-PsvRme8gpEn>agH&*3h zATr*?H!E{i6 zgiiYILg;|xbF^SDJybdCD{a(!bg0PTZ72fL9@4ntu4c)M$0l0i~2IRim zcdyQIv&Ykt-Fh<>oMt}v+uYvjavO=g&z?1WYD0OnA7X_r4V023v1|sIn7r0%;yRG+ zDoi~;hD;)6QQ1Tc&txrU>`9sssfJa_<=FJf4ZytJIKljn!=Gwh%duTha6hn5e*WB3 zTUx;O1w9>v74}B|OzP^Aq~h+hdkFYR=^jE6-(if_*s-zJg0IJah26?z+%j%cwfa~} zjzX(p>ue1!LPO`U+8#!suET!?DLyY>35hs`zC~aQRt4*hNAhTAcqT|m0p-`wreP?+ zJPo>pdLnN?y1v`TeqYxeUBt^#h=39?k0B#wEyWSd&vhEtf+;_~oUEmsOhF(n5!4>n~W2CJ;dTZ<_N3cW5jW9Qig>v*B7bjlatA>{9a9$EY=*iBQ*eHL;v5AgI-8d0DGz zN_#qfYy!VOOh@1O(VZUai^O{te%5s1ZCPV23K3_0yWP#_+?R51F*mVW71;Am$3{Vt z()X)!uY}^!onqR%NZ%U+rqX7Wr$svG%XslWh@zp7#i86Ajry8DL z6YK0qL#$BK287FpU=NGW6FHmY*HqI%yD2i0-RmY8|dNH=S>Y3#L7n3d2|! zD>GfR#lD_0QUocLUt3bRu!RdmO4SP_UC!x>Jnre`zbP*Jg23v|Y^vBsEgPHt64;;b zAXf|G7lYU51&#)^F0JadH_s#!ra{GD2w4hpPF1%?YqXvg-uGP=i0C13q;`BiUggVy z->-kFHu#k~;TXt&^2NfM0GZJL&Nm>TxW83>pWxJR1|P4V_P{A6S~+OGUZ$F#!S6#J zkk|Q~)d)Dg21Rsi7~ujz&l#(=+h?|uTThL zfmYO>B}z3=dz`&{OL9XO8|_@$qv5%s5?h26>{`_~v#>#ub=sXz8>O=GvFkGia+3q6 zTVw+M_<$B?;VdTeSk*JKyHjupY63B=x%M*W7@D8TZKYDZnUwDtGD^jU|A^WQvR6aG zg=V4=bbPzCf**DtUDOu|^{=AzF#o`R{zP`op&SJSv?X9AIFEl3b)gb8K?XtsS8ETD#45kd!;Tv#r;2 z#pJ2|GQt%8$l3rLtjz@;UG&n@u8QeI41dvM8=iKZ-AoKb*^!!!CBQ#`1-J!llM=x#jNG<0-&37N zX4ofzz;JsT=-uO={u5d1#WgGi1L@q~T14+%POutq=kzS2OoT!XP4PhxUmgcsT}_d} zZ>)BArF%bs%`Pw>((6$X(sJ+mkhK;2nGTngIzPt^-u-npy65Hrr8_Ue_IK{GEg3?2 z5;14rbllecHQ8Hp4OE`Yl9H?|2UE2NCv%xFl~$egHQr)4H6Wa}swnBhnR$o$oD8O7 z2mD*~2(@bLapXTVZ*MOrs9O>yIC*OAGTniP0L%RE_h`i5@J6>NIUXl=>sl@lI~Cf; z*MVH_oKxR=gq09(vySx-BBSL?(UjIG|H^vj?$S1V$!B|e=u6c zP%9Z>7Q+%HR&y~@mta)=!Q{2`h)F#e&nzc~^nX<5E&Ljqz`QTEiDjTv%4BL1|CXSr zDp(=YFp$qdg0xWNBhUY2nTA&P_Zf6{Qd7gZ$;>bJLz%8NI@@L0iJvs3@mkWP^3!~T z3B%a3QZ>0f_Ct!{{Y@JBKH+7*3}_~pcF)3xvZt4baYElX`J)6Pjjmp!hg!y&JoNgl~ip|WJCp0={G zf0Jz`<4f;WVRwv9_S;3&Kn9aTWbjBh^P3%1R=>+3S2qgWS2|b5oo0WaJkcy(T{6@| z<8wDlOspUO#sXNXA0*H%2g<%>MoQL& z-kppTBhN;7L35=!x3ZngU$C!si7(fwQuD(;t51|jfBzAncDE8?9RHS+*QDzq|8-*d zPA)LeO1#3c-v!PNsx2U^OBC{fs=K$@r50|sd0vNWIrlDijSFNnO>PS!+*@jG^rNHZ zEX3!GsyAilGO)f8Eptbuds`J1CYyIk)LP^}BO5J*)x5tpe8&*{`vApG3a+*ruyrj!&0S z7<#pY#SaWQTwHcEW>!sQ#_gAcy_Kw$NU>IM;_hxjvW8)rsjm&&!UEbZnm#9U)XPyIKSf+8k5~#CNq`%YK(3vl)rNX+y3=TgyVH8b>#QLYK53V0AMDXMz&n+2 z0L*ZjaX2Mry=3sSbocZ9Ro}gPPOL;kb6UFB=|`HZ{jrcA{=03G>6sA03*IesIuN$w z0J;LLT(Q|gF#C7YtW!Vxyr%pTcIlNzg0%JahC|V{}z)g73c|Pri zVt1a4OqzXr6|)Eor8uk`e_k_RCSWXR=DgJG;e1p1E7t;cR7-Z)xUazFac0ba%GGi- zS95dsQxoEDj6kzJzQ?BeBJi=PeLNi^))BgddcF8<3I?sMtt$C`K6!rf@fW|#ZjlJ; zVr^@E4qw!S=-DluZn&re1pEd<;a!LOtf>I(d|!+5J%YdS*h9tkr%hd6dll=Or?!#A zQ7c}nHf{@|ls;D}*97x+OU-=n+Ltbij-1K8n3f)`Nnl9aQ3FhPpx$B*tLS6(LyVM! z-I?s?c}U~|OU4+*ZtO=**Lz4WO$93Ofw^QP0O2`(f3y^N=WagHoy@mZfoc&h|1P30 zu_}XGeNT8bzSW9zuoPXIbUm72ON~TKSmbA`pI#J=kp@s&uObO%A1D(A~ z<@Jc=#JqLvXOj*7ZT8J}1}$EordaO&jSJJ9C*`lOTNVP--!a8|c zyQ&-N7V?YPwmsfqogX;uLQrWeZOPveYcKF;A>mQ{Y@6>D?r?vqzKOmcz8-jqSV(w2 zW2U(0b#xmdu4aVHO5 z<2`jSHAm>NEfKca`=6Z8jE-)W@#ap#WxLjvXtCY<&ck3TLZCn)!+74%W?e+eVl>L9Vm`LDA7Wo4kQ5sfBW_t#jY>T*n~KX_EczRP%YX3-2|)O6!3YjW-LDdaXAD#va=nllPxxR?qH zQDrRGjNjnVb+UQbbJ7&Xz%}PFfh=c{Is5+JOAaT{ zBRj~IX_Fv4R*-&#LRIWXWs+pte~>Yc@V&p}nYeEL@_RL4qt(}J4f_x`Op7EzS8&79 z^Li1xwKV5^h&N$M^H;Wc^UYZ?7rbl!(Xw+~FdPD-&)8r$vRW=F+Lp z8l%&rz*foPtbvK8Zq=kqT|>v6keG`zE4#3%ge2^au=vf|Cz)2tH^Pdvmmrw4hjOmX zpqkppK8neEn&yc#dUlNkD?$z0B&|9FRO{D$?2~K9|Bbx&ifStAqJ_nVNE47EEm8yp zsnV+=2nd3p6zNq|dap^OOA&-fQF;@BNbgl3bR_f+p@$YaNg(a!yLa4we6Rl<<38QT zoMh~?&&fG^uD#Y=b6TA=%vc4ki?nhwB$bJkWp)e%ax=PcB^`OUdD{^q!3b~B+yPbH zM`vJ$ph35t&uPz)S-z#MSNvW5keWx9m5PT=s!z5)Yf0kpCwQq=He84AzAU?NmBPBY z!un$9$<-L^4K1dxq^89OteMY6j@9JBrZ}NJ@YwzTtPl_6Vg#?La-vF@%!1GJq^z*CVWw}oR<2X^QY#jqsuw=j%c&|v zla46`O;%ndTX9$_@61P&3Z$>^iCL)1a`a3M<1X)Rt;mgJsjU?3nLU2hg}Kl}Io`TqWpO$D*`?ybwYA(_uje z^O@~xc3x>uR0}nJ!!I*@8~61%Wb%fR#NXiP+b)%}e3`5+PtH>rk1QN%x5Ii#B2~$U zp{LWiH(ST6V!KoPkn|my&nc}GQn&s&&2#aN-3&DaOS^=I7*f{oj-?Q=8S=H5&xuDT z_aNEtT#S*b9}c>g&C8z>=-eIz7%Tq^6GQKkD2z=wQE**|UZvHn@Q2D z8MA3~jtl<5MMduUPk`6tT;xwkM0ZeK@xN&|wi0#LC(f{yvS@p5g84%ecBd!A3N9~+ zcHb$}jhd?Do%qv{;sMjDE%yDF4n`Dj*EYR^V+32-wMA1(AJPn(hDyfhby-Yym#k#Y zAp#0See5s=>cbLfD;Meh8Mi0=##W$B+N0BywP-G^!QUcePWT~O<_xXbc1lOvexlGM zMS!b_@H4*d?l%1cW;I5gCN;oMbL{s%7H2*sHK2dqa5huB?IcapMq~-Pn3AoOY!ImX zIFYPE!V9Ys*=?lr1q9v!*?g?mg5UaWpxm2}FDFkM`HUW;qnA8tCgF&o{rHX5J@>E2 zUJKmy;GMrUC7a}`wwsp1Efmr!;Kz%_kZ4`%g97ep;v7&fzm5U1FL6VA=Tx*&J^5{U zx-f%}(8sz2uHU40z}59>W zs4}lflsj%cH`abW=0;3PX_K+&m`vzW62P4uJOd{m^0n0K5p3Dd>i-}Jda)om9W7isIqXMp-)vH{h#Vh8|fKdgZ1EzV@<%z zG;4W_5AEcnzzo)WS8*-`zKOhH^=cQDf$dK5C5P=H^g;IaUtyYJ_6J9U6Vrbr;FUw6 z^~Ds)8?rPlYIr5vE_DUMB1P>TVa7-bkv%YoxXRp}`FRc5PhLmmC$9YMEPK@i#1p(f z=6E+8sxEY3@iiqzLn(P&AxH_0yJ~Tez&FM4!YSJfrtg_&oBw&v&dJUqo;humfTB56%qvB`$YykwdR?<$x15lLO|gLK$o6hMm(I&t!)> z0nr8L9SNS?6RbRLElys()iu6kZ_5cjef-3Qrp99Wu)U(&OiDHu>8(cp_tamLKla@) zp@{@Ye2H%hG&OakPf|T-Mgkfsl^&g;WNCW>?5>S`;mUyCC&1?pL1(LyGDTrfWmTKU zSx;6Z`4?+&4sJwGX0AJelq*dciO(7-6ML0sFIbU_*qLDQAwZx~i?u{jip?-rNJnW5 z@`UNblJB%nn@0+ZQQenNJtJziXHhH<0`+p0e|_n)tIJ-0J1G3WKdZU_`68Ec604z| z&GaGhQGVrzY486A+wDKh48?|4RSyTeucF9*0?WUkg#QNHwmROm{~E3<{~g@(zd^OZ z`2PgE|38dgHe>kj;cUmC$p4=0sJSrje}@8(f1Y#x_iTflHDLb@`5HCdd{jr#886YXuw#qsbHbG86itCBik7>C)<8&ic{g;vxl=iQDEUmAdb(19> zv}2wH9gPX94d`uB9kM>UEqI1H zs-_#qs<8(TDw^YiSKg+WJQ71A?IVZpL)TVWo$z~S)VQn%px@@jGSZzD8%@Q4)s{2@ zWq$i(W2wJGli?Ud#UXzqCB>-&Gkg~C;|cyx-w_@O?a>V)UQ@WFyJ^p67q$f zZ*Jgc>(tISosr_r)O8qD&|sDk2KG=nmetUIrRT0NO~rSqWgK=R)nrH3q|NWi{oW1VPN)?vr7FQdwdEj zA_T`}Y&TgzO}SFU0qU{h5dLfYVV=#gDer&-66R1$Krdet=A4eG*j*2b&u=+&$|$$y zI}VeY-+HYuc%w$oiO_Ll;MX&)*TE?MI2bUfbY)? z=w8yvPH$gFf$#w!(Lw7*+n4IEX@4Fyn`iGbLR^xq8QsGjHe~ihH2-8w6*+QD=Fua=^c7Gc|LgsYFF3fu zJIr}^{+F=+fE!%6Rl(+PL|;+C8o-8W96ckI)+mwx)q-HExiy*fUTmj+Zn1gV&N z-Qw`ENF|QfJ^B+`cu)oyDa2W&>jJ<}Ok57?wcj*v_fLnNek9M|zz>S;0HN3?y%V=; z2YwEXHJJVhLd1zG&3Kq+;8d}#`hV~4F9HMlP zkR^ydIgt&_WjHq7xlN=nfOc=DP&mO|ha-3Su=3^Uj>MZCNhssyvyDNU zqIlKG&MH!|evU@Xd-$(t2A%BS3+8)Zrz}u4^l~#ojT-h^4}EIvhwuIh?|3@=w^K}| z#jX9(+JF$Fes;)~{9O5W?@sb-5EFz^nl|^-8dI4T8Eaq?&>`t77j(oo_#TJh#GzM^ zw&NaiH?es)6HjkL?}D$3{X9;VSHwm?4)$4KC~H{YeRra~FMNnRo zC4&M)9?uL-A6v;^dd*9Rn1S2;ZOHM}K99_9%nu2F{axS3f9lLMFS*51c#6|S)c34wiTY$49!?@N~(xsj|tAN>xO{kJY%d{yy#pRyC&(i1*+&oMJHL=s|>;=AxM82{Aa)zA3g z(KxY=XBCO;5724dgb)21*DAB@fYzX4#Y5h}h$)Q^7O45U511_f!t+W-x8=nfQtVWV zngCL9JjpDeQ@Ijs{i)`q@R%dc9}#!>?LB76wb@M%w_hb(zvFUWb)@lpq$MfjX&4t# zwE!MNq!h+`k+ksJ;Y{4Y&%oqJE=q^I`(d8pQffLlnow{q*Vg20R&9xA&mGzPX1di~ z=g%e%vvqN5#)*CqGwy?D$mYE0Ah@^wMh4%Gq7#&<1^5H>~x>&igaJQS{{?jVL zv}K8XpWm4TDB>L3aYv_v_+s~h351=(d}qoMWxtdF-I_YgA1Zp;($+COV$@DrHDwQ& zOBuhet;;LpK>OUTy5;+lTgGDvD%>IoM?7X@Jfa_(Y+@iY86K1>*1nIt$MG4`q^##R z11Sn2G$4MJ;d4V94qeCc?`6ku#E|qATU7rYfneS|_OW3?Ce)UOqXrqSlntKhYKW)AV-^zIBMcnhgj z)!5k1%Vj+r%-N)nomlkmVV@`A8O-DT5mG=-PqalBczI{$pk9%buq5r_%JW^WY1&MR z%ky(XF%TOg=vb%x>rPhi{bbqX8+gxFM6WPX_1-{93Y@zZB zlA|phT!>vS?mFZlYAsWxxtgZ!ElQ1yfO@Id9u=$yFsH;Fsl(kH zY8zhkt^ zt(czVPF0&;!Mi8#*ILws$M5iy_VGw}-Yl;m)otU^g8+l}!{rw3AxnclmgaY!6MHwD z0>DK)-x!*`0LK;UVh*4i?Gi?Dq8tI`{P+J5yeq>tpUg?tg}Jp5*tQB_jn)K!PSM{ZT6Y3d8wrkRIdofVAHljTID}U;e)S<57*cqjZ2w-e5jU* z)nIgQ=_RUCmf7z*b(D9|X|E!!11<4jDdSL=@TvPUM3wWi)9MH8r_Cs=lfoTy=2Y}I zuPR?L#)jgx8)6Fz6lKu?(qGXea^`yMtpzL9sfA#Duc8e?Idt_YGpm)#b+P-ZNq^WV ztrv5eyfWjb6!54ax`5RoVF4ii^VvQ{+Wm`@8@;cdTQGqrJ`|PXDX@A>ePFjYhfIk@ zw2JY|537ew#LT>b5@RW-jwj0IFeY{^LgW8iYpVezT6Xsvfe%659z zPaQ5atn9jM7ugbYr)uFPEJmZ(vP1Y|mc1O6G0#G}6-d{7<=aoPp8Uje@qkw0acp!) z&Qg-Gvn3)>xAZTzX^T3Dh|to@Uz*8T43*Ico8bO5aI(cy0y=t-^Ovr^Qezx-Mldsz zARtfQ)si#-1uyv(>n*Zxa*odr7`#JdwC8Ig{=U57gPV1mtq)90DX7j_3yyEW;9duC z9se7^;VO7gDMmSb9XE3tUjNbb&#qIjGK`x3*ZpY%r3X%I*ED20e6y2MHlx9tYHepc zDg#_D(4Jtq!%{Q8ge=!{h8l`_=LsWDf5Nvf{EG`znSItDmGOSuu^aFBR*O=`dsl&} zk_(q))|^M8h5d&hh_QkSeGQ0s1J3WSQrR;Sd${EDDiQvGOw*-%jyBU=5B*NxK(?!7 z!B0uTJM~W*bzh~VM$A8NC|X({E%}T2bJH%xfembNq=DaFZ{~1MIc~UV9DGC_0=hnq z%(*ya9sQVIj9BHc83uT~2av)V3s>Vy&=(w#kVbb}Zi@KeBEx)~?-cuNEAU){08fVf zoJXbv_MOSyKzt}GtT%n;>q)9Hh!lovM|94tJYyu%$7m;b+kfb;c}%2-IN2t;)Dam{KD?Z-hf!#oC4?>cax0QI2V6 z#Vo=yO!s6n*1M%=meecqmN>!a?Rsj^>zXpix(kGtP-zzp&h5^gUVp$J9A&A>=n(1h zM{b34MlTg};4Rb6C947Al;kwWSLf7{7K??;+vyOUR!_}c>KRaYq$`Z%U3p&S(m=}> zjA|D~g~JM!1IXXO6|ZIn;0=CgRVWI2UJep|BAe@HE=d~vq0xzr%$uWDuu~#3d+Rb> zC~8SA*(&J9483Du#z?zhj%g!{%Z{9jpsM%QDfCi^EiKD!YbAYo!GQGj{z|+v$0=$?u`q}U_kx40YdzHoN zGmcxZtFCxD3Z=T|r`h)8*DF?Lz5J6pI~-J&av+)P_WrJZ{)H3SevYWtvj5pxjJahV z4l6YvmOH(IuC<|FEKhwN;*GKsjCFnye;4dps{-PjI%Qie11>9^XHqDQ2z~Rg`XSmG ztDrJV>JG?WjdqQD%}PoSh4_ci8C>_H^_oENtQF8wfe4BuPSF2f~HaI0E`#)nyUWa`R2mLJE{;I4`a%|9d>rwM3BKftn}VR(A&wF; zA++-zdNt*6C@#gLnMu_mnBQFD!UV6p(ynOs$dfeBAgkR*-tXK{T~wTBwZpYuURo%1 zoMN570&@E4&m>706%;34Ya^JSC9NRuc>q4jUTABKc_#jE;>y;<7MA9(-hw;1ue*g( zm10w)k#bM(261=XJfx$mz9OK32i}$88s=pC-I$h`bkp5^L^VvI;;^K#c=DO05UnfO zu|{C%jz!aL`x|>;=NzCL(z@x`-C5Y4LTK?o(hzICu3rr(2mdc|9!Ca!HO(Dt(whF! z(~tCzOl(J;9`}%ZNZJ@{Oh|IJD$x`UP$rWUb2_5^?{y~KA&I;rIrcc*HCA3tlABM> zeai&z47juPc|X_0=mz{tlGZ*|B5d%%!>P9|x7$jE0$#+c+vx}J8;d?G`898}ZDiE> z#EzdaxQiTrQ@xa)Dpxb!9F}%(Fumj}ESD4!`!1-o_PtcD>_5Ma`yJ*4)8a=*XxeQu ze*aWz8EIw2ti<<%w@OfnQ8+F4?HiQPS!wCYJqgAbJPlqlQb+)E$yT{w`W$8e#RYaK$MvzTIy9P4tKPaDQ7 zkud)DrEq3QR+XJ0iiF~Z-VNNBxF|z@3Mu>4!<+;m8zeX5O#phDu`69K?lXk~$@V6sbg{f%S zr655Ev6m!wP6c(4Eh>{?Ke1b^LBCJNaac8g1Uz~Lb1nL0H73C(rZ03HX7A*p5p+bv zf=2CNs#Y=m3s35MKK-Ju5^=AL`1B;-1Q23PW>X!F2OG_?z%fPF^EVUaSEZm`nW65p zt}mAo72Zk((s-z)FEmmb6Y%R*f*&0DA8#nyy3mDh-U-P`?pUuEHdZ(iB75ZM=;|f2 z&NZi${X{w(vud%N?hZ0&>7HQ6Jd0mL9+Exze&v~1K{b>oj|Wi5LU}=G9qH!rWvmP6 z1iVej-;Q2}KFR_II6snf8@HHeSSYn~H{fS6@89kB6WN{UW`&9xReja#YYUGU=2h_O z+wvJ)=3!{)dGwnep}T<^=%wjHS0n=rDiq49Q_9NIVaDa65<3pirUMB)s^IoK1Hyze z0>+wE zhCCv9=eZ{FXMQpIQN}|RV6BSfRjfb6seH|mOf)Nh9nIbGLF8Qz5APou+Q5@PAF($# zT}s1s?JAT_7>_?Mbo>go!dlJaPnb=8W0~{J)GKho)j(IRkqK|vE|5veb-i>t;%Wxyyn z*>?9CA=+gj{_+5T%S7$wZAOwG`M0lZv~+n(BsX}q(@O*vxB?frOtZSEn4qIvK{t$7 zS9|J&9Hft5-dRR}o5Q2baO2Y3Telq@ef8$9r+lggF6lu~7$uekN%v~oPU0PYfV|)F zkRz^uDaJ|br{^4~+HSmU!{3vRO6MqWX!H5Ew^h+~aL03n04}2=ng-xQK@65Gz4eAz zUe0ONU|2uGHH->f;cRt5MraXN00^3Uu>eHw;^!LNnkJuX579$U_x(A*w?7m+f?~PI zKPi9|1ZR_P)al2iy6JC+!(_HgaHi(D(!ERO6JT36-F(IC)O%?(?LUcx9QqRDEHj^l zFJ-`v;PsYokRza$m8XAC(Y4Cb`k0K|2hN^p(U8ZjcFCy2@IMxqWEekYk;`l8nOk+} zD_w=T$F*sYf1F^ySvMVp{io31G#XhHqiHm1!3f;bdEdQr6Xz)`3$J_3Zh4Q!Y*MbP zc{>YHEkKz#f(%dexEj7aI8+7$rYn$L?+okR}zgkS&B2si&XoliIl|Ao){sc^v7j_x&24tQ@l=jQLq zit{;V&)EL9WM0=gNxf!?-CpZ735EV`ctc--e7|f3^3E_o9RC;%xuB9!M zYo4?jYmI!o$Z%^5&jz6#`M*=u%X{+c`7A88pZtO?#d*3e^w*_)B4{`CZ$xbVhn8J; zv|c_cRNRXIr!arn*3ly0M{TE-z%4~3c~nHaeA zZ&IO*{)g%;6VGJ)=vVMjsw$S!*(oP6rRJgkN;_^v(_|r*pf4veK<;&rJ68|!x*~jW zLMd2IY2_C=t9ts@Wphwi;0eyHhhI>jAsJSfN*kkX@f<1&jhl*`cR)3$i99){K zMUJ?JuH7a&38H%v<=S85=OmsUUu&V8qu)PR1$?n3o_L;mEDai&KX7qb|N6(mv4`J4 zOIHnm-~{c?Aq(9btW@t--OzTwTQ@@z2r` zCCxH%$E4`IE-mq64l??GdrFSm4PK~tq=(T%1G3tS9N%s8CIwL8Pv+r(Pi^r@yo!?# z*z__vJ-|P|PtZA&t=P|g+TpR# zuqxV44jt1Fecp&Nq8vI$c#_$c->zX&SZ|57o(LJA+w}1BJ7rIEAg)++&*XZ4wT@UK zztF3{U-DPo932$F7QK!P>8m7Z{ALQ7H{AW9OL}?c9uV) zKqqRY=vM!PzH%s8CBXHguQmTIIfGpD_K&2dilCKffY0oh^0(IKVI|b{*0AZTM~;c7G_QDqt15zCbhaA16KL!T6K`}=cYbPrWROP z8i^#)&d_zC!-mK%*udvb=;!0ilwgwtlkidzZC$;eTBwD(?m-3+RaDpyAX$EK=4dE4 zA%MB;?E^u~S8Zt5XSiOkOvCuKOex}Hv&L_9FATra>!H1pd(S2J%Cvm0J1Whp?rl}< z^H*PPplR}kTuF4mFe2FA^)2T)fJ^xFdxoZPU)ZZ4T z_^MMRMBEckRdV$OhKt_6a-Mkdio- zlaytxvt_zvADTppRb$YR0t@Y>qbEcVBK#~-Gdn`3ikf08oj>a>!?$%R(l^^c{85)H z&(6!5SR1)xlUceU5KX0R?NuTFKQ%#r2-m7^-_r^Cz;tRkp!@1dhg=6h>}S(3b6S=d z6+m`847hA}d0d>GvV9q)su+YyXv`u!Mi z_FTr?Tnpw^mmu++vgO;tBlyAMzm^t%1PTSMe<$?h2cEdIs7^m8T+x;wo_M(4ok{O$ zO%%s(9@0lp0%8a`W!Ft$gzWS9U?+?1#Mk9eTk06jO6Iak^ zeH8B`0b0|W*h$y@a*(wzKD3pOvla0fN*L0}%U`%t3h|wo-x1`t7uJflF*l>S!nOEq2^VRFRjo|WIFrPWIDjlkV{05Z*=Cvd*1w z8@aorxc>!tPe-Z?K|OdHMWn%u>Z@UPnj@3aYtPZN`i;5k25RdGV0xyi^zF87&M_>p z&ud?70i~AXG0qJvAi3S`Lry{@B&Z0XI2$`d)!gS(^TcczY~wgR-9Z8RGyN?N=+FJo zE2?GrW3cX-iBm6ut)lN#PBBAQ*SM_v#^TB7qr8xNgDJl5;UY}+_+@{N88Ofz-=)8( zF7_}&e94E)!{$A7#3)EOgT(X zyAIk0UE5^;sKZ!xg;>9EnEHAGii~GUpgOff_>{V2UjL`<`GRxRw1wtUUq6B{b+;n7 znQqcw1YOm(Bio?Gh0KWw78jJVyu4D@?BBQRMsWoWd^Ph7yS7QLu&5rGaITC`HHF=O zVxA%d$!kxg=M0oiC>t@~yIF>UhsF=P7>(;1@3WFj;DG550y^K(tdwD;?rB7mUH}cz z5$~@2!2$Eri^M?hpz;WV@FIy|5Yyq&ke3flXsVx^u;2X1=~@JMwMl4@k+|2AyZ7~e z7PR*xz$n~R6KBR{2p$TraV%so?DFmbaq__gmnB9TuDv*?l%-;VbZ6Ca?FdPX-+Hvs zv_^V_Qw^FA{*!h1$X||6u^Z*T%puxzCZzP*&&GHC*Q%!J=EDy*axZRA9_1%@i*%6Q zG%}WnT;SYs1%J9>iu2S`MF?HZbr-s`ws_&0aGwVJdog@R{6XZxti@HD&-psgthH zGCCE*`3a5S2-YpP_>&He-Hp2qTH@|*yFo=cs4bd6-*Q-KY08RotJ50*d1(|!&;mhH z0-f)(DVQKmv*HD)!xE<#_B|UPt6AX`N`kEsgKq@UOlq^Dtr1!1Z-8aH;=~?fe+~NmsK1W*Y~Do5Z@|lnhaxO(3;9;9&+nS<+Bvs7+mXh{_@m! z)I7gs%)pirCK;5;riUL|ud628&IIu48ofwvPpDWBmXCccv$6l?IN6Iu?YdP~-Ylkr zgaD|2m^&3?;S}_6>LS|r<#@@UsewW+BDhpnfmN*Bn0O_?_zm!sgQq@eZ=+|F`4*rr zZ6w*l>ZJN!o&{_7%tbiEctT9E4$h+I@-YbV<9(NV*io~We30FB-Xf0=&W^!z8m>ya zw=3l&nxrF)9(?M5{6eW`G0_tof!>HXRz|{%f)#Vn@%wdhHgR$v`;AgEGZlOrBD>!s zG9Os@Eo4-)uKIY){-&-H8qX@z(=Crt21N74mlpmcB;FZ3=dd`Y-2D5gr?6giFcpE~ z!4tD)i>p*5VzgJxt*DX9*q;{c1b#90P-(@aY6KYl;}^AmR+Gi%?X~fNpak>)J?@|L z>P@I!2yn6Z4!;XD70<7$i>8s~VpV+_usuiLjV>nYOlrC*f@l4fmKk<|u2Jz%MLZoJ zI$JpP{OG>8#+tQ{ePa|0Wl{;?%M6|<8`t~gLwGE!Xl%C}sl6Ex$@R?S)$UxsQRvQC z8=p&P+hbh$1V@?L#E|Z^CoX)wOXYbErO_*J{v85>t4yK2q1VN0NypB5@O!PFR=kv& zO-Ca|S+98Ka~yKDXq$5uX?#b{qk6a(!YrP>pUAqEe4_AB;pkSpV7l%qOh8c1-b;?} z>zyxAXBO)jWl^EQ2lGcH-zgY>CtqcLaWsmx_esJItHm6#rSSw6n?sw_uK(}hzWY#B z{KruSO1_Yt{M>w?ITu~F^f3TujX7BAYG6qAS@)fxq860{6#g*;(W>{iN2RClN5#a} z2EkXKdg*r;6MKN~j(P7X9g=kzO9^sB1H`>)*r$uomcz$_xe!;S31P8L2M#o&OG=}i z6^5_Xq5ns^)Go7I>jlgHSG03e{}Zz4Bu1rmxI9_$c|0M}j@SQpCW71(ON;1okeDdW z0kJ3afrc`Vpi0c%nP;j8`X9nMCh!+Hp&U;l;-+g0&wrpl_gG1&8{Wu za_D@bSEaJ2a2-x7Yv09yO2*_o(PNKD{IbOeRDdaPhg?Sn`zaxS;$uVFdIIOn-MNYC z>E7CYc`v@m=_R*CC>!ByI{tQ!6S19R6xjPQX#Z|P7Vgv5ql)Lgp6=Tdkza#tDrkRp zJWgho;AIUD_8b;otu8B1YE!&Oit8ZVHl^dbWvy_93Bsu(E)nBVNWiFazE{|Mp8iVK z!g}NQj;p7}yb7hA7jtp-DLOsf0&LlhtuY|Q(L{rAd}L3&kjI@mGs#;gDT$)!5qAQFkB3opSvB83zxJ-z%rR!YI!okWgU# z1^(n5nd1=J;iN*8bU{LEdbDRE_)=Vw_M^o|7HggyG{qnuYntK^tgeTzYCD&KM-u%z zg0~N>XT!a|LDSF7>Anoo>DyStnDow>Ag!vs=owbR*NqpqHV3uUUd`~lMboCU@$EXL z!}el;@J;AaiLkME)KBjFu5@J>2G4NejlCWDcxIZw`Q3|(C9FTie80;FzRZVh#24`< zZ?;J^Op2Tfw<_D$6Vz^bLfPJrlM8L-e4NiXk}jGP}c*9S!(3_*UAK zayll5$Eol4jaJqEM72A~e@<9!KY=sZNN9G%Z4TGU`iQ8656aCi$*<_s>@p14-P$?6 zNi4as zJGQ{e;g@r_5oAkatsOEm%1Q2??d-NU=mNY=5lrzpr-=ps7*SS^@@}IN(*t|1|MRL(TXUp)o<0!RAQvcRc=-_e$+o}zp40B8TGI=W zMEG0VUb$qhh?oH*@poqWiR%w0V`lwhfvb10?l-Vi*W6`5v8=)$?Yy3D>DR%V8`H+h zOtm{JYSz`&s^Mu_{)<2{bZav}tiSgYYt5VOh9NrJKXt}wdti3R@4i^l5jU05u)N7x z+2_uonMsfBy<0my6}?Wq8*5dw(RwEOrbSafF1^1S$p-Us-XA=SOL>whwM->!h`qW4 z>%27<&CI}RAC}R9l07yekQXv)!5m+!fU_#WKb9Al=rXmD%Szi%r2ciiE^dhZok8Cy z56}9;VqPD+WHHP%ouXZi0K%+?j;^|tQR9ReQbh+AEio>)+TdU9zT?}!3!^p3egIWE zN)kVQvR@R@MxEA0{Mu(BBe3WJH&1_a~tYuGiXbSsf$z3RFKY(Yuv7QX}zGURQN>65V;2)M+wwXe=4s|7R&bK4KKMz?ip+`f6oJEKOv0~SF%`X zR6QIYy89ELX|@~WeCcr_&fN3xdH1@5gB>G;>i9WjR(d?z(3xuWb2nw=j5Ux8=e!fp zHP1X#Sip~dG{0WTR-SN-)QP8;CO_bH%U_5+zNY6F3+9`bVt^z8feT3tO+;Xks=UNK z6aBKy&nc>GNO8YKUpQgb8}sVn8uKc!v36bAZxQiFfWZYIl2+)Dsx#@Cedu;IJ&BV7Aeo0v3q-PsUVS;!i7)1j8-<$8! zINKo9mWQU^z!qUKlNlfp;3^d2(SADoF6HGtPkQBe<|-r&BnYFn@Gcxl9W1FGj|hXn zK++iv71B#qONbqkw*=K>3x@LtFzVqs?(?T6Q7Em!T_IIrt1V8wP%Ic3Z3sAGC_vgmVo$XWlWDcyP!atwZH}w3e zTE;4nu$C9VavW)Jqr1_{^W1lwBqt$Ht@=VuaUR5K88iI|7hHVOhkaVARe7-l2?M~_WTeLq%z|$3#Iq^+*P(Un~+^ z?j6pY@YK#hL;*bl@RKg$7f_UD>y)Onrz93wmMhdX>pl9vwE%`>NJ1(eKl7aEmEgnt zW!izR=X-*SE$aA-Wo#vumpZz$kJD`G*E{u-fGcWY?Fpj;-ZnjwCRd*n#b~?9j{?Zs zlg~9;b&yLWlc_{tb56QN?;X5NESpM;N22ztj~WVs$9K?o0U-56q5K~;|IQZxc@_qI z-8STX=Lmn6Ui@Vz>elOfV0hD!qClP5U#s%;&yF0(`kNu5g@Aie=gro(DN469eaG*g zkuIaD#wz>~!CjJD7Vo5TU`%yjja=>LhwQ{0fu^!>j9FN1+q zW=1$frX13^;}Pa6h`x*snO)PMJ3No3e+c)(_ph@Fk^-CpHj?@tOtvq=HCQ+J;?@&Y z?(&RHP^!<^r6`!_o@_MPB?BnyUhaU-)BY+S?0~tzfvyD4j>{4M6CCsAz(S1MG$4r^ z0pr)^5($axvf9{r3o#_)V7%RVgYE}1SU&wPu&gVJU=gaidB&^H@%PK`ET!JRJt`Ik z0Fv3c(V@P+puUh1J}3AsVO*giSO?2ON5)-Q=dWa#H7rpu!?HxS^-q)t6y^J!u=S*p z-El-#&E?>6N?A%JZzuJ*3uf0y%?%Omt4d-gJ|~6$YYuUwy=BJs$kW#Ti2=w<4b+B2 ztO)Uc4nA_q`=1js*P+C#?HFIb*mWo^h;%nF)sT@Wxmxl-@kpN%m|4L+N9&?)tfpTe z05ugU2)v8T6nnQ%QY2#XEWGuR+;H-Kcq5bq!exjv;MP!0vR1XegMjVl$321mR1K2{ zCVMdce}$DZ6q*s3qaJw(%0Fs31l|Vjfo*7M6;l;+gTL$QPCKtrZ^ubU477*z4)m?x zpfEAw`7Vt1=JZiA?zep?XOp9Rbaj94Tk+X(->h&Msr|EG1b|?p6XCtY-@h!MUTOZZ zXLGR>wrDp}boqgF3o0H$yg)Lzx%aZkO#V&6vI^jCW(EAb8*AH=O8KXP)y|T9!Aub7 zVdXVUFG$UrNtb5x^0t~7FaG1Xp>XP`y`b?;U#*8GmpQg4=*`lrCChs1odKN~2`epC z77%dH)%I*#;u{l_Ly@2fE9i1Z(AH`HK`-k}hFdEGx@hs`*}tFa@!d#guqKUyyi%eO z%^nZA;9CadBbTM~Eu?$2ZwtJW4o};e@IT}h?F6YQqMGQ@W*J}ZG%iHZR^Mqfm57NM zm~rgcu)DPxJ8HW7CUf5)T>c%t6*{;fG_BNAHkP(?q4FxID7JI(`lN6$1fH9(BGsM)E z9l8oV&d{gPuG8<$r%!Dj&h&MguM6HEMh>s$6OOAzb;eYlHG|12==6V*@2tupZ-%Sl$Wsys{)L$62Ng&PPBrD)!&5k z{G{P1_pcs}`d=>|4$C`ZJaG4hu6!?lqZ5D=YC%?Eo_BcXP{d6^OjDW_jEgU;cjD_J zw#ZbVt9YU&q^nIoo|+FtdLMwlNPI04A(zquw^A9F;j>PWfyV`#E=LdSFf`;3Qv zRlF@4hNcg@rsmcyw(q<9p8k;}W`U=vu&;S?ZYh(cV;IzJ#<4&OKsnut%yov_oeRmVwjObmYjZ+*G8$B_DvbyLDUzcEmr#?HuUuR zKsIQtig8f7v`%+8v2#NZBeox-4bc&Q2ZPZ|>-7?oba8fFp3=H27imF3)smqxDKGcI z$W^|~<+eJJ-?SxHkMtriKhd!6EEe5M01@;2jvdAJc!jbPe-x8InQlpkmscSw0e$qK$~y zxEPU$o?R(z4n822L04LJ1uBFlk~dK#GA+Ni=2q8b{w2UIl2FnxdlwU9zp0WWuztFS zsncFh^6r+4_%^P%KQa}>W!h6~##V{blYiT|2zqdvlFO5`t6}XqWoG|Dx(yb+i+I`( z-zh!G`jDX?@dnO$wfzZa>?lfDv;n%k+__uVM)$XUgiWj-f$qcJf3N&!TnSCyupE4C z!>9X|8tc|y5+>?yNx{DTJ|w5n4AawLPhN{;CEyOi(>b##%DlvScP&U9WA-!^A)8Y) zn}aDfWV5~)?M!m}X4V}m(1jzFcHlf!pIi+JpShv%3}B+`WoPL7GumVKUAj2%tV*2A znAti?ylgR8rf8eQdZ#`M&Z5-hn7RVnpfH@d9stM5 zVOT!;a@Yxl(^ewCgNyu}nm7M`N$v;`<-fSl`6o=W3Lj<+^1 zajMHa!1tpJrV>q9JHxH!3{V`sS>x^6=W2LwJJq5OV3^}<_Zb|cRcB%qpD0mzC#}k@`$9c?pbEH_^K-|25 zKjEKsyfh1rG=fV!%XnDS@pa1}+dJ0L3U7_KUIy&S@=StlT-@qeP6Bm1kPoi z7QG}16DrQg+vvqxwuKC!zaI9wuO35F$CT{6M$&}W^~ny&6!dFtqRVkKmVxR10w2z6 zxBd_k$?CnX7%+@&`HNXk%s+<9YsQzlR5t~+pPUfIgRjCr@3f+>Cbuc1q&-I4UmGOA ze>l^9dT!hWSm_~=oE8uzvw$QVfAznL$F`cMQ^0*WB1RR=W~@U*0mj)HO=j;yM@A7j z$k!bmp_6@l-o1q%ZN|ei#6(;2@TK{bc?R-8wfCv0yA1*D+;xf z$L9jOh|!!kI_!(=&cnWmNxvBkuCmD8hGgRICG^G)SYZwEPE$knsV3@5#S_N(vH>Rg5+*dI8%t z=k!*u1@mGiSv+)^GBSp7L;P(R?NcgevRh()P|dD;Y3@71Y5^oJW({DwkM9L(u97D{R4DIXa_!IG=(Gmt{w6KFi}?hm~M*G`ZA9)my1M0^F$097>T)!WX-4w9@W$nkNCPh1YnKBmWiIFcx9dRRDmplm zZF&|XQH5}?Y3jBzzhtJ}^7!ew`RU~Mzl88|Hm36Dw@iNRCA^b-T=!H=5s*FHm;S|) zSy)@1C?-R197bz3-`nvjF+{Qz+bcL*?A`3KP%>0BTK~TO$s*a9Wp#j6rHfxi;6G=g;JD2UY1N{57! z(p@7;r*uk6NP{3DFw!L@DK!o$T{6VP{f^(?_c`x-Vm;4!&Kv8zYkk+f*8DMh-@ESX zy7smA{#>73phov>^|a+0v5#G>OU4I5^o`G64}JhVu<@f8Gk7m`ck>ou1&etu$h&HULw;DXSaM$$4yQ>c@IL}APSeum}KLDMojE9JIO+w{69biG^EXAr~2 zr5DaMaQ+LZw(oPN9htQNVR>792zj`bB}8OuCQ0Ql-KEdWgW-`Hcx*GUl(-@xF!#r( z-w{#=M?93a{vx#+_Y15*gMheoPt5RHDm8yprwM?0F75ZLzgR9&=zHeS#9(1MF$hCK zAL`he83blOh)fckGegRGT7;sqg*}uOUVXiJYJKOzMDZ3<;H(3L`c+cDWQlxx_?55A zr#{Dx@nd5}mcE0xMVH%Kn{ko zzw=Uit3UOL#IQ!6D2p8SN>K#87M;+1GHwMmWHbU}o*^7JKG?1RSp8-gH^q!f1a#m|SdnkMNJZr;twU&2dm4bC?qnh-Nz;l59eM|fvm$DJQJt~c(& zb3I&iS?}8Mbnw;9R)yC(;@y*%@fF=~mTW}b;u55&hDL>;u>>pKN&waAk*8?=OCU10 z;k$0_7Y?RK-;SQftgPzsFGqrxWH+|+AWtKZ4-9!cNv+B>j~k41f`3yRtYVs2b_c9Q zJAhE^lz%qpyM!X6OmgwcNh#?|vW1bvtp{|Tm{+)GIuCnXUe<#Q_f9Wh4xK*rJ(TO| ze7K}{<~{KbWo!rBMM2@~i5qu$CH@-{1PHtD+J_2FTCkwv!6uovaNBYzxrygXAjfH5hYKx`| z$O>lD$bvBxD3|*n8|r_m)$(28DxtYoFI8Q}28*6pg+dnJD>WQJiq-(bb%=|c!~KF! zz$_9{C2729?VQ*sOHA1a= z^lxk*0iSIeG51nj#bTUVxi8{B?%8bG9wr&y4lgc#V4{_#O8A~u#K)pK_#1jE_OJd@jRB=+o2S;o)a= zPLNxOZkb<0>*RvM+BQsnR7y;uw`sl)jn)y6au88Nb3nI+K=!HBBs-aQ#!KXx7R01B zvpk4AQxdCYz~&xRR}_!Uo?0(>bajY@eH6b;I`i^xCNRXXtsu?sM93 zWdkJUVxIO;QpC~cmu(k@!ZhexLGVe*@p`vT4BEtgKN8K!^IYEvUQf=QHG@f9@H-7F zhp9SD?~rM@DE)eST-Z`DK(Nz+?$@oI?c2b3oN|~X%&0PqP|wRfC#=ru1Brz*WGm+)7R7QXwU{FJi(9qCvKo>D;__c{k}xPOb2)HS6CRo=dZV=w7-(?u061V z*s}*8iwD2%lr_rbVme8^Dmo&fa4}R$Z244bZ5ie zGj&tM=ry86&2-IQlxUs6+4SZ8>IbsFZ=iD@??WWX*N4dmIjDBczIw0WdJtvjNN}fV z8B(fni2g|0%MaMf?uppW+tSH)BG??63~s6`GH+Js>kvcTqD+-SBQ9)!yWyL0l+5qp zVR(P-u&H&}Kj7kbB5OR3wHo46io9GO`zbijJ#GcvdWJt6RBG zsfA~uoFzUE&c?*XZu-xbu2n?kL zJY0E|j6E9@LZYqlCS+ddW|fdC5gYnC9NfA%8PCXdWBUB#8fkc*pC!+fr2hCbq?RS2 zOSpMPfhx6t3CQ?*TkyigcCD+ZJU>A@dp>u1llEm|wcn551Wo7m5JsP9UV*@%Si zoe44Azkv38biH+FU6&E{3uqJyLwP7Nt*w+d{JlSor5ZU!n^`%GoP8 z=Y!|$m+*MtCww?&Wff{7lXM}i9*)K=l!fwD&Ck_o?3&ov=wr7O^5uxRxgnfE z;OjjRgOAbC(lX*C(3?kR9WZBox?YT8Jay{w;BTAY+v)Q^VrIL|v3pGFA6hBmDO2C$ zQ$j$B6mhT2*(L9Rzi`&j5h=e~^_d9qOyD7rmH*bA6#p*LQe1R70TvnvmbHpaAHLpe zErw%(FT{Avp$`^+mg~QK-#qt~rJ^bVck8{7JZ1u7lm6osJRtm4rWFN`K#@6VZ|rbo z%GqQR=1tO1e}yk=0WU4&CoY|HqeC9(Jlcp9!M9kkN0@8|3@p)>n3~vq^|5I@5C(%2 zac0A!nY}cvvR&I9`5)s`x_2QtI3*_vc)Wd#^#5~IMKz4rOGtO)(+i7{02aI}rK z2k-2tp847)#k-f7fi5F9H25LkqEC63HM)5|W%0Lqv>8cWc+wu05KMx=DsWB2;bJ!C z;0-TR!B0~Mq{C&{1&dbNN_TP?A8fBdEgVdg2xn|^jH!9Oh_0aO-xyqV@LWN%m{?(JE>#z5VGlWTKYpdNuE2zKf!H>r*qwhGvSmY>xN(!_p`iu%Q{ zQ?b11ua4;dn@)jG!>P!HD*r7dJ-v)-?vav+@wlvZK|{qYjDyNUUcQl(*~bA*lL6!g z^Zg@(|1|R7aj&SnR%2o)Q}4#Lf$n~g<>hM>75VsS;m3XHygChs@>D_!mLyDe-kp0D zT{D%{E#G28P$3+L)1XHsd%usGei{0GLxEXwSotf^MKknRYyemDq(tUSSwZoo)yhRb z5{PpFg0H;K1R!5oHyj=;+)JpUmRhny3&Z5keyVgg;lKUu)Ow!Ovd%SCQF+scU4DC! zqD&0E7%EC)rDkj6iMbLc(07LTEn_#hlZ}cTMbh@3chIfP?=BlScuSL{>Ni2|U3i&0 zMm#~<1}!!Np{I?{YFULM+l)w!SerBqoFL1jXO{a(I3?CQrJ4A-ZXJ4ALHUZ-`EAlB zvGo=@93A`W_CoHhrpwwM3?L0VauF*Q^_jNF*oFt<0-W`wV!`p zdK=27lU;kaA_>4_@&)^ADYXg4rjwa(8obA?WR_eL?Qn?|N`poctc-QV!R*FmKB33# zBslloT0^^ZQuFb=w85trna3}dTyY6oNx0j%b)JMVdHkGc=QDgPD(~Fq8xDOBeG~4L zPyf*imA3DuV-1>>^yv9__wzbyC^(!x{plw_L&#)Q@he~bpBptGi4 z;aVc}3wYZ>K3ZD($YoYHCc_X9)CzXlYA4}cu-bXjNSjJ`V+s*}K00x%@7PL% z#$Hn&smcDJS%O6E&wkl_$*76%rhOL{1hWz-{z@eFjjWa3`A*vG$@Zh_OVIjqQAImt z5$Bhv8nP7B9{+Y$0YhP2lq zo~%?cUdPFnF})u)B3(~r@IbiMD|q{or-%u8z=6P?%F*Oz!wLub28tHez2q0&>=*ny zeR>(H>L@n+1~vVng&!g5EPs=xi{?E?DUSPqRLUftTFdcn-=+%4SXWsZz|WO@pgZ0`cjygxOFJW@1H7>%)CgDx31w;R`FsZT%^Ac(t7 zN)WhF>oD3r0}ENh%S^AeIs)_wHj8)h(Z^q)QY=B>o;mpC<^AhB=}#4>(#w`@^?UDp zB54N9!Uei^clY`a--(fr_qar?ZbbK*W`rJz)-5_S3k_gk~Mj<@b0< zoY2#q@MB5tA6=g}Qn07})nx{Xi?Y9ly8Y*3EvJ4?n^3x~uZ}}GrdKZH4EAj;~QZ9fyFukOUw9qB}j=<#e z6#~|0VatUyBj>W!YS1vsR62<2bEx-^B=Svu`yXxs>BoJO7#Q<~CEaVxXike4xfDD1 zVtI{Jz0Ov5R(L^l`f~h!cS9IFIp`vrqvH)(dEQUPx7*5!fLqtjouZ?p9o0qr%LITt z(p*|;UlSONqFI&=V*>+-ue?9@G7tfk;GMU0@d{ZVR@z`)lODVEIH~2J6sQ2VHG3Ko zWSJ;=l`XHM&2ey4S>VWPqzKX>Qnl^hC5-0l88N!PK3&^W5vE6BFE%TDwaS8tL_goz zz}?b$m3on=a3N!?$WjPEI?gkU+gelPr$?n@6j5Y~vT*-Jzg*UcF)0&Ld*&c`$i=#; zP}k%A^8p$nGl<*SS5hjPJU7KFv>Wuy5_Qmne8#hj^+pwA%_>cqW$&)H%V+9j-DB-H zk9}6hJd4&bd|(vRQ5!43hkz~Bfy=kQp9u}|Jrd-@XMapR^zrR-z4oS>C8P>bw`tRS zNH<)0N4SR;*$+5Sn3&lv@g4|W#O(PmzWrH-#n!~5+@OHXdPb2p$+cW#!l#wY6QS!z z=$^c;`)KHI2#h<&I{-5YFNud-UnH?=uDS0eCRa014AQ-8K9O=|i;fxQ)3^UL%cWk0 z*fs=n7wTqvXmk{|M+78O`?W|-GwMBC_vbg_U?3CXMU)Z-WM&~f0aAZ5vH8o*s9I5Z z_7C9EH->M?h@)w#0G%3OU2Ez}TWxULQpX(UR)MnbtdQ$hR%A<&)Y=Q;%W+42a$h~g z_LK8l{d@2@=R@Wpk+%%$UzfdL=mpzTPiw{^a}GWfy-oGXb$~6=EyhQ%#SPwrJU2BC&Bg!L8NE z)(0m20qoqGdRv7d2qJ!X=^OK!*)ZK&AecMvs{%J0mSZg3xA*o3?!(N1$yhhGc$zr1 z(qOT0TABwBe^A5;KH#_a_936x6zhnV^PY@p)QBv_|Bcx`>Zjak*&~6=87pw8+g;ve zPNf{Qfry3T3K2+m9e*&r+l`@!6H2UuU8vGY)>2gPQKC1Hupi2TB)}z}Qe5#+kGa6> zx|ve#j0l%~Gh6b?Lx+zsQNL$fV{W$q)*eUobj1SN;3tpm{jp>Y^>=T<2`!Mh!DxIxSUIs%6f!;SehZYR!At8178)=#{T<_KN^oHL~$8GW~BL? z8V_NN;xdFc6fQyn_RFo;dkbFUznCHr>~R{B$bg3z)Wrp1!Cvt*Pw^(B_X%iv%I*XlV#;)01A|6p5H#RRJyO2ENJnlU2<>bZ*PhXO+D~V(g z!QIR{Tythnj52Ah(nC|YcC{|jto67!`TPDN8bc^p|4{mL5`AkB%pK{uLd}{sNrOH3 zL3e7^pbdaCEAyn`NzlOV>tbNuJOLShsL{+B%IifiZgd0$0x{Z22S>u;bQabDp@n31 z;kYj-Xem=yL>lDvA#wgw`fl?6kL>AUqKR7Im|C<|K8LW#Lo@l7`)JH6X0)z@VWEo~ zUWI2$ZgGGaRp7+i?-C1@n~M)=1WuELi6}*!tYVAm6G8m&UXH`gK}s3yp^Z`X;l)wc z+mHZK>8Im*Jlv(#a`7R1%wVho+h{IwwO&7Uz4tKJ3Tj?Ldz0L#ri(VCzwbkJmUsxK zcpsXmGcY_ZD*xcYlT$zZ&Aj9f%QiC=U zt84IzUXpKzkcrOYTiiN2gsI#ibDz-y4GZjzZ-QkbiKFcuJ~Q6dZeUef_e}b<jR? z)Sno1gsyxVijKc@#{$LHIO3(}9Fai`tkujx6#0ak#kNSEk7$Tv@MDC|ctQ{wT zg`2dexDi_)b{Yl?SQL>&R*S9MI-i|81+SAaM+0?^<`U+wkaA$xmOX&KruBg$X8^di z0BeGI=l$>IrG2@(`9t)B{sq!tU>jCm5M=qyB7ne9b6r_2g!A%eFDaLcATk31I*}wa zByKR8u!K{Av(9W;1pFt^knY&kQ3qZ3>N-~l8@OrEqtWO#d}tWBiL~ZY7$jt<{ElnC z#u7h_(m6R9!U8SOcMX44qOZ}JUHdJJ`J?mZ7I9IZgy<4OR7GO$u6z?4BNLQQdZn6b zL3Df84X$$6fH?p=YYj8GZ8|;1S;Ot``i*-;;bpbU(+{|7%mRGHO&~`x0mj7Udu)Fu zwLhms^6S^!)-~98f$Kc|0hlNz14)W^M@koe$DdK^uR;Li1(wg;Cgq4Vvewf&vlgZW z9w3|$f?krsrpKcRl>JX+F(n@irf7ouJ!9iBBKElHJtN zQpP2oqy_=|I0W=E6#4UziJr=fI$jA?q4CH`_A~M4FK!v0_Gc!Y=oRLyrI2}mP?)VS znrgiA3wsJG8pzKN>e(b>u5t){WnMcg@*H0=P^^ zp7ide!9(kb95mkZ2>7W!X+Xw{VxUcE-P&JfLM*pR=g57|aWKw&kM_Cm(1F1VW3pwT z{Vf>!CO?5yWQW){`O$A8X>8sTi7d>C-I`*SSk6=8cM8(BvxYcInTpJbs`v&PmoGwy zU{reYU2$Y?nKlLQq3DpOc}ibZ$d+ns7wc(yat>anQ%-+Dhp#=ev7oa|M4b-g_OiM` z39BYr^&dE0_5>B-VsX$vvHRLt?`FlO@>(H^==kcxpGLr`Qj}2f@!RoeYHHAB_e=l# zpS6RpHV7LEXUptv9}jtx8fKF8_KEGG;D%;f)U}SFd_m*^X#HzLw!2b;8Wmkjs7Wi6;=KsIvAPEK<6}+BK1V+rcIValrI#*)*Q4)t ziS}52Kc{6hzj~aV`U;EAcMV;zKQgWxT z<7Xa3Y#L{wb~RmZs3tT>!8`#?3+oO|mnYQTec!m$LvUx;be|xgTdOXzWe~!>Enp?S zCQqAP6|?}To<;c>#aj;=4=uH%jySDrRE@h|MsGKiuLoJ*;-O2vWSFhhu8^dPTe@KU2TUOZj0T>(eWj56hOkiC6<`t)ERWq~(V{J20{g3i&xQ z7IR4#z7Ad7*7$e;X{6?+*`~cedg3I_=EZ8L;@P|&ZEbJu)@p`DMIeK~yl2fJkH;j8 zj_Q$#AYv=CNNZBt_fDz!-~C|dOm||#V2@c!!k!oJY`I7bhXATnB6DjYCEgf&8??}x zq7U0RJYZwv{G#9{yjkiip0YU`rt=6|ZqW*1Y6E zO+2$}qTt!{9o|3d3OOK_f`=mV^Yr$!*}h3kn8VBIte}a{o}OIyS2NL#pj?;5%2dqS zTrS(nw$JhB@CBx0kVyVkXu-V$w{e+m{O!Cu?fT`{29HX`=X1na(Wo=)zVvyGA>Yp+ z62d-2z7L`vyGasBJZGg^G_z)<>i_$Mr?);%C>4plvW^JWstjX|4A{yiyg-VD?c=Mm zGzx{3Nqm*umLn^COz2Buw?esoEK&84~=}CT7 zHcSO-zhJDLwiiI5+9#8B`Ez6-d-9H;EI<@%vD??>=NFl5gi*SN)t_k-ij9e<=6U(F zSwAqmvyg;wtNj>B>9{ zu62pyOK!vBt`mgWLR?djF&W%^bN$X!*&9J#iM<=sjiV2!6Q%8-#=5cnjXD^@2{8~< zvg7q+0eGMMH$a3BMTP9=wx6``{glC>G;Odu$l9+8)d{|Fs)*P(cl}W+64PYcO2zs` z;VWrvWz{`b6I5-`e!Bv~`MvUo>ZhaAd!(T2$E`$)A=F4)eQo|}Yoe_UAVpJ*`n`>B@~dMzl{!gR z+jdxXEEoLsE9vSCK=cEOlMgK$JQ_}t_h%U#?%QBL6qnsJNu*3WDoJtxaswjids*b% zSo7)89m{^w7>lqN^(PJ|2v{KF#DtM02mGrEi1L84rMn$Q0)u&aRlms}Mlq5zVX;N5 zT~t?e@{q#qb%q7N{%hYoP&JAm--^qzw8uNr{@8Db`0@r%XfT;-+u6i`Bxx%ir7VHs zm&0r9gmi3vnZxpT^GlTyi@D z&jIc@z?uF1;{Yr|v{bzOfsWq#`4h$>uDpjsLuW%z_fh+K-;)2UlRks>x0J z{`L_RvCtEK`flI9XaDE_Hv40NCbf3AmqccZHi@0X-@N~;pbwj=vya;s1{fOW!Guu*mQ>z?S7_^PM6mP8bVTGFFWA z0!`IQ$$D` z%H(_G^L|E8k#a6C(t4m-ZGAX;xlUhXcjh@-Lqin}_5cF>l5=j<@Ut|r@kv6(_sTuD z7dPq2>!uUmi8ZY_yLBXnOa?~E1YHamZOZn=F)m^-*ezl5(B6i%UYWoN*D43h{8E7> zauB~)ik#9PlyW#mWuehz)r4abo%=i{7piMjtR3x>4M*BUwjwd3n&uAUEJb)d zm0R1(*VnMY*fEjTIr<=N%B>@X5btC|i?%O^qZb~bwyTc|4rK&JyjAHfS-;*Fv#iSc zelA9J#HOJL>rTI z7(rtd_Fm>VKKWhugL|5#+5ONzotxv?$}5x(Oe*DB@r&f3ykFE$Hv_f=_4jTR_Fn?` zdz?HaB#*uj!I^k$pI2K?yf0m-B-)KI+&43xqAg%M@ff;wRvu!$at;Yc6Z#!Q4XdU7 zU}8%~VtyfOJ>Qv1s6~>7oQ+9sE^mK5Vtb|Y`)=i_{OIoUO*GkVD*sHIy+DcV@hP`< z7M*1P+9%jsl)zSp)vs1pd+Qpc()vBqDhSv1h0t%3kKkhxaw8>Pm^+1&F zyxxYUa^)s32+{?VULXAL=VA@s_>NYBmLziB25KjiwCrT9iaC40j` z1oLt|K0ne>!Nl?rwttefH7HC!P>NPha#~X5UV=T1G3T2!#fqW^fHf>jBUqGUfhb1J zt-jVe+&AUF)PmGc0O7EF?Vwyte8t8)FMB>;U<>$#^>ff=zJ&J-$B@&`TP1@A6f|oAnVsZ z(n!+K1h6#S09sN7$|~ZZG{kmf?&TNhM>`M%S;ec2wm>znIs@4qic;Azm1$kMM<9sW^(E_}&zQhxO`c1p zgCyNU(6`|v$K#VR73Favg`idfqgTHLg)qPx>S&qI?wXo%mSS30wY%5&Dr7f%!HjAh z#!e{kdiHyiNwgz->oz=ln`AKbZIUKqto1`$5z3E?kFy)uAOC4n9Aa>?)g0d5bDEX) z7~dvazS4+DaWZ?3$OwHq8Bj^$(T?Dq5}(o5{S%Lo4_!hC z%o{A)aL;RM0?HRo(o+O~f50YYGjkosxclomK|(QQMZRx@eq4&ZD>9$bkF1Ty^-wFg zxSsC&KvhRFZpHrbEb;6eaDQoq^AJVzTjHh_!%VR?Te#}h$P$!;>6`~& z;@fe)*ZCpkxa_&~H)~I&D#x1p*|JFb%F1xc)FW#f&wy}eMQmJ$sG42QTu#bd|L%N2 z(Qf)o0FD&3R!kV>%}Gcv2K2<*hN8&)>Jzc-bz%L6Oa*K z9%$cN{h=q201Q$e1ejR4sia}7t|2=>DbR}}V1Q;T8#gN|E50w{&VGY$0`vm2Jt_~B z60ULT+dEw)=folkbif)nFQR;MP0T;72U>H!e|_%-*LD~?iYeWEwm4eDd%*@4IV$uO zz+s)yQj3bojRQ21NS$*~s1NYVR!5EXCCtjd8@H!Hl_|RO6jBW5t` z@bJ*&5V}h;9KgFiVe|r(95al>M24JVDY$ycN3gJE%mD(jn@t-Y)!>xaOH`mo+k-~a zEyL)2MSPq6sg&h;`DI8;0dyS#R2!9t_q`pR|~Hw7uxXAot1J zZDEG$Sa4yq&^2Zn8?QW=0Nf}t&&&wvF46NtkuJH>stk<(1hPU7wF0*SdPS&wu~2VW z-UnTm#R#)e33%WL40cGcOL&PiV|s899wmxF*Ck>FNOS!G8rlH?UxbTd;4cexltq;` zq5xYg$67EwWE5F`iXmXWxw7a3Tc~5>oq$?>JF5eOdkU0|Vpx?zS1*LH8h=7K3lYb2 zw?jz)2_r6X8EjXG5$1DWehCC9zk86`nGq-9p%r~S}W&P*XYBar! zGzC5LIGD|coI?X_>CTT!8u8}!H6PGOA`b`f2_&gWv0TsJu+`YDV79mZl!W^ql+@w; z;ZLKv{)0yQCCs#o`!#R;uQvFS7uha==B-R@BG%#jK- z2|U)<*wA+TK>iZDj1M`AYrilHEVsCsELWQRgbL{G&STMH8Q#H0?&Eco^H~rkJ3Ky2 zd%|EHs*|z?`R{X=0^2ow&51AfMw8RK{w^gAI~QTCqyx@J)eY}Yc!731khL%xL3Ph^ z@$dL&K`B%~kQFx1#Y4iQCbDz{9%-~yo>#nh@o!M;M|sGWt*aCBd6#Ne#ODY#jo9V; z3F19YnkgAH%Q60TT&`?<2>8vDIIqONTC0VvKSZ!Ou@dsagzyz%G8K870fXb?KhtA6 zCQ9;M;A3rj3n8CM^L@gggrWD1?0AWbln7ZY>~_{^d@Ua_tHQt~C&y#GdUrHe#*@97 zuTXSMVSMz%n}Kc)`I?8}gnS6V7Q_1bPS29fa(L4vNRi!P$=32u_gpJw1zg>e#oEp_3QOxI{hS?FTP0YH&PlXM~{xqap8<6qmXW*pp z9AEDcw24UtFC;-DZQ7|j5mk%91^9#TrbSMuKKYrNQy!s*!$=p7wTRgjX9d#Wc zOhCA1NIzG40Z5Acpi@M>Z$V6Ie-4`v{xOs$PTX+UbjE1ETn=(tsE1wt-z>S-q$k*{ zJe*tBB9q$X#{f#GB#nJ3F+VKHq^pRk_+aXHj0SR=2bg&q?-XxT##VHoai;OHdMlFu zq978a`LF0;m!~J-5~zP^0>9+s&yMzXPT_#YqvU2?f4%>r zn(kkZ*OeFo*k>naZ%Y{7jj4|4iThh?vAx2HWTjoJNKGWR(9*RnEgUlC?Wp)Oc33B$ z9wPnx(t|BQKH2G)(;EiCO9#hg;*k#s|1(Mgn-rQ zXHzunH5`@h4-Qyg)oeMD$dsaI^OQcX7Rn#(R5}nQ&yhcq&iI#|4Y0xneq$XrG|v%b z&#W77{)KlN(Bz#-`NR!we25imvqR|Y>!S5L*DCZ7uY%SFj{B&rJ;!s7)4-)lOk}hJ z)9B^&Z@mBvHncn*O zRyOL19Tr4kC`M){Vh6&~dtIO9#tTb+8}hIAO%cz;A7aJd!0{T5y6G^fdtUY0#wrgN z5+Z`h)U7=}(yh*VfSLE*WwgiIYOK8|kB7=i1~;tN#fxuZj1Ga#tMWhE2SasNt&N)0Q7k z@M2c%JO``Q2E;WbW~iuuX@X5miB{NH*)s4e;-iL1!ARE2lgRepP*F_U2`zJJJXDg( zeF3X`UdJvR!viz%jx-JxV>_kjZ2n)LzhiO|$`d%94d|yIOzP%NYa+#rCSR4JB^Y%W z)#u7Y%S;N3B+D}q_O*pe8c1i!e94q*try@=50zhO3mr_DKWa%w%)7|H;xx&{RvQ+% zm}j|I(Dufa|Df)N|Dsx>WV*=ME#hXG&L)r-?h3-K2{bO}qdiR@>CtZ|pp^i~)%SDK zntkCcH$170)!1*ot`#^(YzpSX|6fuj-=wI$f9GW*ify~&-2)-)R=qI4R+whH$$jA(M5E8po$Sh2=RVUfW-vMX3FpNm$r0HcNd}9qVZ{oCi|DbO4KuDtMJAj=f*OOL~ zoRz_CKo8qNp1~h!^F6sLUxqZ9rT^`neKEtPV!gBgOa&;-F!;}GZ?$<1HuA4q#p%{!tkb;r%FCqt9!fuixQq41FDs^Up{>*npLCyQtrNFa9#~tQ_M{^~j#qp3~W`Px> z0f?jd3zWdR+h0~bAtQ$G51n*+ko_p5LjdNXj^hD6FV-btP&JjsI?(CcLgMkKD3g59 z6p<+@xdT(N!$*NX{-ooItN-?2Ow`Xvo0t^;GOG`Jv4+)Zahvpq5}98#+k#SkHr8AT zq*YPX9(NU+r}3qI(XaLHFIJk(VZ28vJ&+tbrb1n=G)Cm~M`ll_B2{l|}*6mDr0IZ|UeoJxy0xjF5eCr|DdwV60mkbh@cn3>^< zkkX=^1wA(~ z$mCq*4u7eEzL+kDPpvmwtE?ne#q>5Elhi*Ql2dsKNZK)YJ)70r z%{Z%4^1@z}Fn!lL52rF-#-IT>y0o%IJMkKwySnfXs2Gjvz32y~?qiRs4pwKD`X)P&DSqst4-?g;n-AL8Q)6xS{t%6REHj4AN^!szX?|-#nbQaSSC-GZ z*9ATi-+{HEcS$TGjw~YQC5{oo3`_g(O8K+4R@I;_8E_AL3stETFl+?OTZ zpHBpSE>76=*Nh^?91?UtZgt&up+aW09VV5{(hGi#9o`M)17Vkq`<_^bZz2t&!?W>L zHNjb|=jrC6!yoD!z!Okry?+6mqNbCd7!ze~UX{p0hM0|9-`5-!U}%;ZiuA`qVop(3 z7|eYL1WO1qii|o%JJ zm!R)L0p{0O>RvOVUPJ2-;}Zai3wuLe3xn(D_oRf%iqoAz8e^6>Tn;RBY%`C z!}sT16{_cN@`h#x=#t->xyL^JlEe7)CihSb_*Fc)Oe=1=T60y~BkF&${4*~92m@A0 zPJl&Z`#a~By-GB5bwsR|d@}bP&0G2^F+Ia7;`(fsj{MDjpNr|pKb-%ZcqJHH!}Z3$ zeBr4O3Lx|^GEQH9)?gEzRne@1TPo6|<=_Qo(2YM2u5I38uuW{63q)MJx6t8!4Yhz)o!FEsY5y3QD#s5O` zitAOmIc!N$grM8nrCh^bcB#OVxJzyO>#+5CoQkjPHbaHQ<&x}|)>?^mk>0$)J)xs( zmK5HZ(E?QdmVzTNuwj+BEDO{^5u-RpP*#0J&IN4f;cyIc!g&`IxF0Xy4z&LDj3~tR ze0L7PT%sK5c_rXy^V>(4{I?e+4&fE~wR9C0sO#wqAQbJjDpoqLtqxy?tYDvs-0h)( z!=0kA#s2N23DyG8;F$i=d3oQq4ov7%(k1Hr3JG*y$zjr*ojq9tcA}O96J0M%$Dx}E z!zQRsHZqEM$s!iS&`)vUw7@n-s-j=`1RbHHuV1bQoC2pjfHC8_Q;=;VRDk!sB53E^!SrE#@vIlOlkv@lPW3&$Ep|od*xWJcuz`lhOA6lN3yDct!1;3|L@i%GRxI@)d1qbN z+?CZQ36%0_ntopP?Z0Xs2XboQtSRMbYu`9^A0Igv`e5c=h=F(VrBwFZVeElcI5lU` zm(G__8~014#R&dt%X>h=tQW+obH6`Zy)b%p{^1^~?gs;gEkNV`IQZ88W=&bF3&src za4(&rg?tc${~Ex;%K^@45zV%rthH3QY>()}${XHI_tPyDR+V#fhN5~mQ`nSlX&Ltp z?-h8~WNT|}SJ7(x`ajzH@}MS~u3r!YlublIS?2ntbITtJXTR78TJh#(@H zAP`12*+fKkL;@|i5*86z!y;>huqKc&Gv5T?=X?J7?yXz5?ydWNbF%69{X6IM%mDIKs$n7?Ow|kY_&hzeAAn!M(DyQPJ{f|Yq00Fpxv9?{ zfTin-*dfEWsqn@T-_M>BH}jPXy{lzLzzKxpjt4zIscu+%**8IkM^YzS{IA`Eh~5qY zpLF;(x!3;RYJkVWm22NKg zJ{|L`IQ8|^p$eg-k^{aFF}js9pJjE86k%*wO#9||0$vGgEIPT+mtvnCX(=IqKeCqZ zJ@%X@ZS$ea7v4_$Q@1?1ywmSWrR!O)iyITN_n9tNdGOUL2%>YWHQsW*3uwM1`rdm; zGYE+-u4GvgK}yyPCDC_EHdmI|c0YyqiOP@*o_M1WI2x0AAaU)3XvV|W*cT#K?ylTe z{-An#yH#HD*{=HF%(T{@c+_fJOJH+~#lC{n{YBzo#5CoCZW>WbnDM-{Vy$%e+%6L z!Ax}fpr?1opg)|R;FqXMZDO@GJyqW~FJHE>O_y+C|Ht`_f+Se;tk3k%7bTGjMp{RiK|pL;FqA zBQcElKAaC32TZQT7#kkqwJTknRo*);W*UyX0ZG~DaFz&eR4o5o>>SgU-|>7RF&QSU zVBne=G^K0zwGq15nEu{4D%=uP0YHrXY22pGnQCC)6jB zx3{|KJxE^e7L^U#dC_;=iSwoICK{Se4BU6sVoaGUELE9KFe^6PKmnr6&uYiphRh+YxS{pA>HGwBnR!5zrlemfSj`z=f50K*F$Usb{3UPx<~TK$U|H?YwT+!M+&gyHJsxH_ z0yY&wb$mZAPOD8+c{yxK6TewC$0|Y}9;Dt$eQYCZ<{cUOCG0#LE z{B~MkhaZU^LRDHAVm51X2IxKM(AOQUdL*yo)Vi)y9N1jLxQ4~)wZNmwW2`zYuu>ORoA?3DCXy}FycSpq{{3oyxn znDt-ISUit!xf9tq!cboM&)?HMGlSSO~oin zyMEw{SXqw9y~CULxh$PBx)#Y2?x;;UT7#f_M+uv|Thh*EUSEA9?Aq&aOQfZTJC77C zaO)6Bq!no75Dn_RTbz>begvDc zZa>cT=eTgQHxA9*m6fY6R-JNTh$-24a(qE@cr8Jog}ZWLj7&g}GO@Ywse?pV3+?5@ z>ITdj^#8h=Qk-lGtcIP_;P$A#eLzq|$IQ;|NLAb02l~Nt%zGP>HE;1**j7mx1YIEj zU?(s`4Bi8pdddgF6|fs3zK3_D!{mmy?L7weHv*vY_A4@NeKiPfx+ zZQ>K5!Kn0G32b%eu5NMumfyZkLZ#lrri?G(gr>o|aM%T*(qD?7%nqql!fUz4@Yngf zjf~0_Vjs$nfl8+r&60d=Uzi=q*N*7QdMNx$;Pa;B$-6B1uWJUAja<1gIoj@hq60Y1 zjLn0$s0`x^(2y~7(npjvHN(IzdJbsMWdm7@H0C0tKr%ffcymTQ+jvU)heWvOk!Ti< zMIxiYvuEx{&0Hb%oBP<@xn+G4F-RyJtPNwKH?kJxN87uyynkP{fq^E%^I#MW_$1F~ z`Lbc!tc>^JlM(IT>zkHBtO7)@UV9^}>%3tFn)7aX3urpxEZ6=p5nt*AZeH>EfTQYJ78LErDhJ4ghP0Sa6p%|N?!P9YA8;r!INOUWK6SPxZqoLr`=nQJ?vE) z3n;a@op(M=?&8m=Q!N`63e3-+@ZSvx+&pw8DZ_MR)3#N&>L@JTmH@1&!ufY`{ey5Z zgW^X%P8_9=bdwS{?kV_oyH0ffLjA7sfVis%_NJbUM)N3(=d_s0&vS>bDTgBRE}VvzAi^#7L=avf?)z?6b9I53(%aSdMf z5{8ZsuD&8rz-P`eNwX&rP3>TYo#PQ|;!^t=n6OKR6D<+lGGBQ(&+qB9jmfyPYzw6z z>sb|*;5#g?4t0^?A2T#yoHl6fbajfP0rvGu#?l@&JC|0nBnLfwsNI!CSOG?GW^2E4 z7q3GdkJ72r8(QOATs9u^V9WdRbL*Zxb4*3d3PiqMlVX2B^v-O9)H@N$3hqstN-R|B zX@?Uu^*!?UgP?O`)tO!1)`@Z(OdCTD-OlASos)hiBEe>?@H2>IU~)@ygv4Gw)>%4P z3R&{#Tkp|Wk5ye@2MbDN9S*&nH+4Dy}ujNT-6Y< zC**k*3!l-*BD0h{b-(J})#@=nkQ=C`;(5{|O%MVb1RRn`m{J`qtjr08X z{6MwY+|e!zlJgvu0xhCQ(6jJ4sx!P+jRxk(D5i`EUJYCnhu1PHO*+D>(eRhlwsJXm zZR{(wn1yVR*PJPkBQY0z`GS_n`0l{Gy7kqO?94C5G<3157m;6WP;9CkPj*Xuy3M~8 z>Q8(lIf;*+i(P#i^7zA1THRZ|RG$mmUER5y>C0%=7VZP7Wu&+sT>0(8y>^~{w#k=xlV7=)k6mJN=C^0pb^m~r=z}X1 zUb?)ngzBxt=@dg^>h0}p5YqPi8bd{cO;njU?IM8d#i<#UZ5Zk z6;xMw>|Rv{7Z%p#LKeoq(OO3Khk_Y#!B6=7cHM1<4tfybo&`2S7=_Hr!ZrK++IO)w zOGdJc$s8S=1~s5(wj2Y45T)Z%R}Vdcr|0qy``w$x<24yLMcm&K-~BBS`TGTSN#*b9 z2MFHYF${_vh)YaCOOAcNIk03oJ=~KPo8=XtSiB|TxW3@*mWxf)<8U6j;!*o--1o={ zFriY{?ahU^^ARs?g*Y?%f#2j^+eSRRGt{d1$LWK!J2XmkPRjp{!Uhi)DxIY^TEG6e z+q(ZRH@^MIO(5&kDfxuH(MhfT2U4!=`yk}8>F=)2qU)t)FP!z!Kbo-Z-n`h(ewnmb z#5re^F>Oz;_@@Hy-kba%OKjVQkRA*cf?3#ME#bv<`KefDxYEx+At^j_71lTiZSUFt z@ZybbwQrYCoRQjpVuG(IyPHD!TIQ?nci|l~{1qJSuXElCh~8#h30J`1EohM2d|#rY zjtS-?qCL*dRq1<9>c6|rPLB)#cWQMh_gEtntD$7@o;Q3i&E4P@7EtIA?rtDQkOe=s- zUePeNu*`BO(#u&i`|BcJHe+Ul*_pG&2QGz&2sQn{nF4#CTF~=IXuEa&-e>bIhK1vg zBJH-8Rk=#ZJBc$e=gyyAR~k%ZHkHlj9@=jD2LJ46a*_)yRT|p=)GDrEf1-<2n4>rj z{USC<8}LO^O7p9tQa3bi>-$J0v|lgRS6zO))ZcddPLjQ0&!Jh;H3!XMh>B45^$gl( z3d}gD>*E2>EFVUm;Q92?g6@6&yr!$$T1uYz&x|addGTT{(hI61DQgf3^Md<+ktHj4 zlb#rSt6#3)VVW|uTozn9S@E@pTa)>;`T58%rec4X+~^Bzn($&bNsbnP1e{y7ZTO?U zMMW%9n(rI5rPjvD>x35^AP`cEYspk{3i2MN&u-PRFBI5(o;4srt{N21Xlwq$!rvJ# zmA-Af{SJy4TnV#vxKG~&zO69ngv^9FxeJ~1TH-lRPrkSp+5cGJlDZLyWcccC!TBFgd?`pF`fiH(ZZ94Th}7jMt`&kqqni{23+}-`cK&-4m}X>vjRW5FLnB*>R~|$waZhuF(A* zUo;Cpdt^@&%eY|c-oSo${3yiUE9Gw={_q;}+S2Z;{nSMf#BUn7dlp%0c6-nBlulXK zgTrit(_~|=N75~7Np9~*->o}~F6(^0pMm;%K32fBv+^0MmhDc*R=*4P3v)kM($-mR_kp@N#^~xis80 zP-A9_z@?jdT1IY(nfu&wuLhD&w&3|C&ob&s?6(*MSk4n?%T!EG8%?g8Xtqa+Cu7(c z;RkKYp`?mj#4)9j7MzRkZHx*26x@qGJR=zA`ES&04OMyn$v9ZR0?OXYAqF|u3<7*KkVkNn&9m24JI*dH_(Vtzt-NY;X#!yW|Mr(3j5`puL&%0rkpQwn-|inL`4ygt+Qby~2?l{uKCee+`0`oqjMt59#)V zt|^v4CEN3-^387IssaTcR;p-nXhr)|i_sN`b7iUM1-mGC!FqR2X1%k%EgOFsX`8SC zihLqXWoK(}7W}j5{>`)Kb_oZ@V_wPR<)%rFC0(ApzIC8_w`d5e%^iY7Uva*A?Hw0+ z#acq@vGrzOxYJeHXSN}n?XLC7&5}wLnep~fy;?)nPwf7y4y-_Onp~QC%LgPsRv`VW z+?Jocd<7_hDWkEcG2tA zOb!Qswruatb>#gBLE0s7v1H1scV5aOk}dU_TcyNYbe0yQjR@I3D-UI|LkUFK-lvXz z=5BN`LC)~_p*5ywYRTy22^`Lw8;4xUBM1YtCQ#673;>Pe&`bnPA#iC4%%Y(uz#HY) zk*u;L3-a;3`KkXqa`@1b#Vg!IcPpU>zJmgG9n9GB{9Rhm)E4vk>cFkk8q>|3VVln< z3Z2@HI?Wt)4Q7=DFI2-Vjdy`&I?qVP2 e1;bNFu<*SZ-y~0e&IcUG=bWCAZr*8|fd2r3dk41w literal 0 HcmV?d00001 diff --git a/doc/source/_static/css/getting_started.css b/doc/source/_static/css/getting_started.css new file mode 100644 index 00000000..2a348e5b --- /dev/null +++ b/doc/source/_static/css/getting_started.css @@ -0,0 +1,263 @@ +/* Getting started pages */ + +/* data intro */ +.gs-data { + font-size: 0.9rem; +} + +.gs-data-title { + align-items: center; + font-size: 0.9rem; +} + +.gs-data-title .badge { + margin: 10px; + padding: 5px; +} + +.gs-data .badge { + cursor: pointer; + padding: 10px; + border: none; + text-align: left; + outline: none; + font-size: 12px; +} + +.gs-data .btn { + background-color: grey; + border: none; +} + +/* note/alert properties */ + +.alert-heading { + font-size: 1.2rem; +} + +/* callout properties */ +.gs-callout { + padding: 20px; + margin: 20px 0; + border: 1px solid #eee; + border-left-width: 5px; + border-radius: 3px; +} +.gs-callout h4 { + margin-top: 0; + margin-bottom: 5px; +} +.gs-callout p:last-child { + margin-bottom: 0; +} +.gs-callout code { + border-radius: 3px; +} +.gs-callout+.gs-callout { + margin-top: -5px; +} +.gs-callout-remember { + border-left-color: #f0ad4e; + align-items: center; + font-size: 1.2rem; +} +.gs-callout-remember h4 { + color: #f0ad4e; +} + +/* reference to user guide */ +.gs-torefguide { + align-items: center; + font-size: 0.9rem; +} + +.gs-torefguide .badge { + background-color: #130654; + margin: 10px 10px 10px 0px; + padding: 5px; +} + +.gs-torefguide a { + margin-left: 5px; + color: #130654; + border-bottom: 1px solid #FFCA00f3; + box-shadow: 0px -10px 0px #FFCA00f3 inset; +} + +.gs-torefguide p { + margin-top: 1rem; +} + +.gs-torefguide a:hover { + margin-left: 5px; + color: grey; + text-decoration: none; + border-bottom: 1px solid #b2ff80f3; + box-shadow: 0px -10px 0px #b2ff80f3 inset; +} + +/* question-task environment */ + +ul.task-bullet, ol.custom-bullet{ + list-style:none; + padding-left: 0; + margin-top: 2em; +} + +ul.task-bullet > li:before { + content:""; + height:2em; + width:2em; + display:block; + float:left; + margin-left:-2em; + background-position:center; + background-repeat:no-repeat; + background-color: #130654; + border-radius: 50%; + background-size:100%; + background-image:url('../question_mark_noback.svg'); + } + +ul.task-bullet > li { + border-left: 1px solid #130654; + padding-left:1em; +} + +ul.task-bullet > li > p:first-child { + font-size: 1.1rem; + padding-left: 0.75rem; +} + +/* Getting started index page */ + +.comparison-card { + background:#FFF; + border-radius:0; + padding: 30px 10px 10px 10px; + margin: 10px 0px; +} + +.comparison-card p.card-text { + margin: 0px; +} + +.comparison-card .card-img-top { + margin: 10px; + margin-bottom: 20px; + height: 72px; +} + +.comparison-card-excel .card-img-top, .comparison-card-stata .card-img-top, .comparison-card-sas .card-img-top { + height: 52px; +} + +.comparison-card .card-footer { + border: none; + background-color: transparent; +} + +.install-block { + padding-bottom: 30px; +} + +.install-card .card-header { + border: none; + background-color: transparent; + padding: 1rem 1rem 0rem 1rem; +} + +.install-card .card-header p.card-text { + font-size: 1.1rem; + font-weight: bold; +} + +.install-card .card-footer { + border: none; + background-color: transparent; +} + +.install-card pre { + margin: 0 1em 1em 1em; +} + +.custom-button { + background-color:#DCDCDC; + border: none; + color: #484848; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 0.9rem; + border-radius: 0.5rem; + max-width: 120px; + padding: 0.5rem 0rem; +} + +.custom-button a { + color: #484848; +} + +.custom-button p { + margin-top: 0; + margin-bottom: 0rem; + color: #484848; +} + +/* intro to tutorial collapsed cards */ + +.tutorial-accordion { + margin-top: 20px; + margin-bottom: 20px; +} + +.tutorial-card .card-header.card-link .btn { + margin-right: 12px; +} + +.tutorial-card .card-header.card-link .btn:after { + content: "-"; +} + +.tutorial-card .card-header.card-link.collapsed .btn:after { + content: "+"; +} + +.tutorial-card-header-1 { + justify-content: space-between; + align-items: center; +} + +.tutorial-card-header-2 { + justify-content: flex-start; + align-items: center; + font-size: 1.3rem; +} + +.tutorial-card .card-header { + cursor: pointer; + background-color: white; +} + +.tutorial-card .card-body { + background-color: #F0F0F0; +} + +.tutorial-card .badge { + background-color: #130654; + margin: 10px 10px 10px 10px; + padding: 5px; +} + +.tutorial-card .gs-badge-link p { + margin: 0px; +} + +.tutorial-card .gs-badge-link a { + color: white; + text-decoration: none; +} + +.tutorial-card .badge:hover { + background-color: grey; +} diff --git a/doc/source/_static/css/pandas.css b/doc/source/_static/css/pandas.css new file mode 100644 index 00000000..a08be330 --- /dev/null +++ b/doc/source/_static/css/pandas.css @@ -0,0 +1,52 @@ +/* Override some aspects of the pydata-sphinx-theme */ + +:root { + /* Use softer blue from bootstrap's default info color */ + --pst-color-info: 23, 162, 184; +} + +table { + width: auto; /* Override fit-content which breaks Styler user guide ipynb */ +} + +/* Main index page overview cards */ + +.intro-card { + background: #fff; + border-radius: 0; + padding: 30px 10px 20px 10px; + margin: 10px 0px; +} + +.intro-card p.card-text { + margin: 0px; +} + +.intro-card .card-img-top { + margin: 10px; + height: 52px; +} + +.intro-card .card-header { + border: none; + background-color: transparent; + color: #150458 !important; + font-size: var(--pst-font-size-h5); + font-weight: bold; + padding: 2.5rem 0rem 0.5rem 0rem; +} + +.intro-card .card-footer { + border: none; + background-color: transparent; +} + +.intro-card .card-footer p.card-text{ + max-width: 220px; + margin-left: auto; + margin-right: auto; +} + +.card, .card img { + background-color: transparent !important; +} diff --git a/doc/source/_static/df_repr_truncated.png b/doc/source/_static/df_repr_truncated.png new file mode 100644 index 0000000000000000000000000000000000000000..8f602703587613cea6ab354ad2eabbb103ba72f4 GIT binary patch literal 8040 zcmb`McT|(_vhRZ+U;_doNC)Yt7<#XvR{;$WiV&qpiS!zp3ZW}VZwdm^dxs#1L8M8C z5I`x?dxwzRcz%2Bd-py2?6c2WCx0Yw*5rLwX5MGM^OlDQa(izzu)#$Gxaov+j;ufxIY8fyTV-ZHZ%A_)+;tgB z@;2wv{kxZ5UjHT`7e(Xq?fr}R0g;477)B<_GVDFM>bZPPk<7jL`4~gE3$nf=fyQ&Wda$*(E$(Y||neqwB)4Nt%Lg+|N&NUEW5z+3KeQ>rB5^QR?w z#?-B=V+&$e4BUXqi*vl`{?q{pc#jX5Q@>6nf`2gY;aa9HCd5#C9l>|+6l5);AMm8S z;C?7d&q{JFZYa#n=b@1&<>gmjAGVjSzVj)2TpQop5}wFGnHz7fpKO^!UzRNzF2nQ6 zup!_sRonOc1bs0rQhC`FAYz z;30vT)t-rQS$en!I?&URz`Lye6vM=o5P%nhbF)PUgoq zvu)$5j=L=u&jVL4=ASnO9*rA5kBimHO6hN!kRzwvcyu~AlBEo$To<{i+VVOS`NPpx z0`Rl5V3mXG#6kAg+bY-(=^qr}SBfsJ0Dx4DtC$L|RT(@Uaj)pecCj{XtcTX#2#xS{ zFw~#{hRw5kNCj)`V+r^NG{8%_{x*Gg%rp-sLXbNXtZGr?!_LJWX?ZQX<0Eb0_0m?w zE5By=V7>UjOoe6!;b%OYA{RVEsOP=ZTzq_m9d`l_Z=R4JtDy#-M4tC6Qz98-*z4KB zv++W2Rth^}GNZjGKC`-!eh$}iq}2YV;yp`(b8}$8pBakep0^6NVCg_QW5I+iGH$FW zvn*A1*LD*@-ENvA27mTW12!vl7uaLg%anO%+Q)3bOnNz9egM7A&-QuOO){hSfrtc6 zQT*L5xQ4;P6DtYz{P6etUE9x+dT(1lZ*Mf;T4Va227?(b=O?uN7BE`vU0qBrsN;^; zn>hG%Z;qTRjF*ERu;Z$sLz`MZ?H+b>Guv5E1|AB8@Q_O zrjz^yH&1NuHLGwA--SlK`8IEypYJ)(H^CH9)M$`f9X46iiQhi1elase`zXDz*3><~ z7UvtoEJ?)+C}`%Q0k$IYNB}eI73owE=&hFWH2{(lrM#MZA<3qy6=09SRXr!W)YkSh zPwxvuQwqz#+>jh;>tz~XIg>5`VC?c?X$@3a;$Uq=p$chz{4wr!Qc}__*gqCQBd`lZ>aN8AN9NRQ zvb1Vl7xue>%kwF5-9v5__s?!^x3*L(2lHtB?vVUcSn6K1n|~BtvJ@*_6Yl=oL1)0P z=xK$Yb~KM9G(fS349Lj~699Z3?Am(Ty2I2{+y`vpq# zlYNMKdR?;P5NvFtCq~b~pFCXRrUBwq zD5f@zpJ(%-7{`8&7pEBbz7&$pcM@3c8cVY3wBHw{8S|+6;t!}q*g-zVSfIK&08)S`CLVRAI2-szZ9O|IW76r?0)rHc-NiP&@quen(BG6rjr5swKa>R z9L;qpoUU|OvhY0z&x(OamB#WP(X(1$zkG*6x`XnJ(biLv>0&p6-hE>=sVSy+zl zlkP2pRJ-?X#tPS2Tik>9C5fAeXml^_tOE~ zlNBS*DQ3fieC%yjd3%>o5^DqjD;*_Vm-tHD!ufW4#HQYzJQTnx-7o@?7W+(dNH4R8 z_l=~1D$w-4(W=${yIVT0_T%Q|fnl_4=-svO_FVr8d^hVta2?KQ7l(G^-~xLSA{+>@ z-G;+fS{bMg6bDzJU(wZOu3oKqcA2g}y0^a#Z+|U07f7dec)T2si(jgLcCHZ&-0V%G zg9N+KAt?#fTE%D8EIqcTZm; zD+@kh&Mk(DOf%IY=_of( zE5RA3>|xs;7wycYAmr zP7gG@s4_##0^)z3Z2P4K2Sd4M8#>KVzPq+8P6wi!a0+yu<+%Eg+ff^cM)``_H+g>& z4WClg^l=JN$!UU3Y{0z4!l}X#a!nMdxGjgoT|ggv&>)kM+s*1DO2S=B!txbX;1KMp zH;Y1?H7baeQ-Q?P?J@58H&#%_jeN$NTk&jkb7)i^UlklGq3%PaHrOP6ed z`;kxMi$R{6b_%r0A=Z}`sN<@Q5psk{JbrSDeb~sOa6RS#RS%SK5|in=d`Ga3q9CZT-^OAqYT7U>{xw}VCy&HUyv;7-UKdwph4}aT z$KXDr!`L&AlirnSAdoSHs*U1TQU%piY>y=fk~x5$?2xBnTZ#VmQB1D+34O7y#JSQQ z&T*EwbBHxFIDpgAGuVnD$T6_sFj*!Nru)v_wj@K=>|@g zz2l6M-5Wt=^7ssriVuOfoFox6<2iwfPm&0`#b0`2LFm-A7w$yW2=+%XTdVc{#9sHS`dJ!o~E zascG`Wyyk0O0PZqMn0_d*IxNAz25;ByL~;?cJWg%jt;9Ng51n)@T88P>&6zaDH6D+ri8m~1BMxYhxbMOH| zRIcZe1xJx8-#>Zjb&EfKqC3se;?^W;SA>_))mAlBxmCSS^)p_zWoIHH0XVb#DE_On z#3P%h&KySUX2`j?Y;;GfbBy*4<>kmZS$YqTE_fNAyTwvh3*n#9`M5jeNYx`o_|YF) zjvRXIa@s-e(cjafe6%L8b7LzyFiY93FPkd~m8tLQXx__wTHxeAnhYleD$~8W3h-^n z9VjY zO{-fgUhg`foHdwjHt34}*A0K1B$5>(y*tbNJANnw-nxrFvNZsEpMN{UCSCksSibhUHY0ZzK(1J+R6y!lloFVz`s4u5iuN#R3m3DE z=3-J}BSxMdeKhBLoo6%4algA5NO^D+7B6wbdeJJu{8Ed{JG6k;1JAEcYHO(v&CDWY zYL?Pa)s*X2%rQIVgP6_2f6BAoO&aqOV;qyy*A$b-rC3UE2g{V>1zgLc5TfUHv1ZkZ z904jaT*HJIT!{?hVg5;=w=nn^(E$TN7!c7uvzAjo>k{tl|70E=DN41BpX;1;ONyHK zBn$xBK>f<CaL$vi8A*pfoWPVfzRP`;R#0rwbD4{a4+moFO~MdpcQkzTXXd( z35UH&@zVDpVIXrw6dws})6YSS344rX;BPdK?M!R|)xmEXbq5NRV2#}-l9$7fmEKdj zAAK}KB)G{h!VS9hGtTUc>CrqCu#FTx?ru+wKKC=xF-!E9E6Gro*)RaSoou7%Q;UEt zc}Veo#sh8znhS9b+QXk~-io-g^cX?V&B?DY|Su zy9^POk(d|GsDcJmfta0vf9xs8eFKKRT|$g0O4mEaW3sB7v>8^{$!B(=1P>wcQFF?_m(mnn}>ByoP)@7{qK95 z4nF=nvw9X||HeOWjz(Lgpyn@|zjNIU=4G{U6fN~~XP{EjB|x4eg9=(EFtd-wTJa#w zz#TOfg<$$e5Ph4DH@@8Rm6!!kM(mPlodc~cF7U0ieG^ep9sBx$?U@VDg=d-8nX9}m zefk?{yGhlsXR=y529{@%9>o_SrtQ7e)>aO0Sb5m#$$wBwoDOebyA0%PoFV;IbNbQu z!h0rEu(z|Kp_^1GM z+&mH(Q}2<>AE%{Jbi2{KaDqSIwh0*d@kux&g?H4(x?VTjmIfFkI41jOE3R=}@Hhnd zqVIPA>C5}<;8)l^a}x$ctON$^Ttonj(f0Hw)!kY*_zQ@g$Lr=d)`|H>EC*w++kb}E z8ggS#rK*o5ahz|5Q!Qhy2i&}FZXZG&{OBcJKO|+P!+fYOeLsFFv>+5aIJNkjUp=4? zE%MP3v16tDg?ewGw1|(`|H{^kyVKLX+j$TNVP<;Q1|g_M@3xxJQLoimCd;^>Zmx7p2|pyr0X<_WPBAl{@?UUs!eGefjOWFD%=c zER9WE5Sy4thWER0K1_tHtpsx~NK8GNb03oTc<}`>AIDDe<4R0gLzcR!|6i&md4W)D zltE>>LkCJfWHNrW=(`;qvh$0^DVLkLt*aI#cm~`J6r^}qZ{NN>P64pM9$ZIK&Z7Pm z`%3?R?E5@VOy6EHNj`@yaxzr3(rX*YLXf8>_5{&g=Z{T$8HBBz+vY*c+8}-`yF4!{ z7NKEaJ6G=DL~K_Fe#OPE%!|Kvv5oN&lR}z$;fBZuDQdwT+j?_%>us9 z7iAzuG2A@_sft(K#&XNRCkTHhZQYM3&4&0{R;rei@FmYeWm zF!7q33in$nrjZ$t&Y}IMf$q*LU*6C%UnNxfUiY&F4opaF`($X_;x}J=4k4VTD-!;4 zda>&oK-$(<&g$SNW=TdJ+B=o|E@T~dz%Ik zYk1vb6$DQUnKDPDpl-o0)K6M3ZXLd1FmBuIog92lLoY^_2wxa1eHkDi6M=-(t@C06D#Z< zyd&tYB~SmEPj>7&#O0x-c@cL=)7#)^MyE~gL@4aRqv6IQt6ox{P$miwdg$*eT!=3H=jz?90rVgoysCJL(bMynu z)H!UZ%dSGMctTK+pZ`4$!QRWHN<(($yv!h08NhXDQJI27>Yj<#uJqWvqoq^ftP&90 z=uz`*Mr(q>!rTp2<9E3HIaG8)hvLn0w$r9xVrrpEVDRPjxx5ZbWBzXwIrGK=!_g&F z0#HCepa0o(+ST-wGzWwLhscPHttfNU>cwrdQBKaxOP#ikpI6L08!;Tx)=+WC#9IzZ zDr-8jP&|IDM9lX^@YP7&TnhV&T&y^J=zY*dk-a-}l}L&AO=A@|{ztpe!u zYh!0^xc^ed!uZ}Ts46jF_YFV)on(B9Eg-G8+{3xKYTI1|_w+5oI-LDBYkp}VTfHv1 z2g!!x+A=^z1~=!oEG_}0TDeI(uGEkK>YhP=ZZcC?3zM`6tpfm}^DlqEr+|!3`ge2} zUP}{jY!L=a3*ExmFH;(B@jZ{pbu-DdjC7?&y>idvLBfAicNouQ0n4D!>}y<|m}jDI zXE{kQZx|1l6OB!um|4>`k|4q--c2r_Wrp}m=kd> zl?HoM*cGMS4+hF$%>jS96eqoqsSs?ioyx*ox$?zmOp{ch=ZWj|lbDn3%Z#3j`^&Os`&BV@iM4A%V)W2%D08fx8Qe2LA%3bB+P_r=E z@}7@&y9d3L1^W;iIEmT%c9z5#42P37M1J`kQ3$i#l8%gIejp)1!_;2?ZQ}OEbLx$W^0ZG}kRHwH#@lGNuG2v83*A;?tO(-3|wZ5Uipc6hP!w+c?q{pRdly zLmc)F36{^`n-Ve(biRX zJGXg|HCKOcx*GJDpd7q0r(Gkm{BWMMSDudKO^DE{My*No?R=6s1zR+K?QCPIclAZYJoB~>60sCe+rga{9QbKJ1q4Sv8n zOT1G>1Yh2WCgI@!NDi`}oFNbl!hGF?b`9(K5{Ke5}t(1DcA#KuQ!DA)^eA|-g z`i}v2xe~Tj(7MT_DX;;sUk1N=t%%w=y&HrM2!`cb9UF*i+RgMjasJ}q$3Nyg>WrA5 z6xe0JMu1AUE@TS{0xzX$<;4h4;e*)eQ~km3;fiJx$70p;d#AIa=Gu=0kOiWDHN-Elk?63(HyJ3UZ@g^rH&t|plKP8Jk-EPje z`v|Ih{P^MH$Dp;fHSyJ+S(sm+j{f{2AR}{MgEBEUr>3WuWxDSnfAT*%vT3~-6@2>? z(CNTB9MVUiUT?P?IY8!xYE?(1U2j9EqHV)DlAr+ApqMEXF+40^;>@T~PV)CAXx$f_ zKxhVCraLs`?4YDVD@j8`gPNKe#?jGnjStg4@ zor#c;Fn(wo;(v8G7qrQTvWVb025wxH0saXN4W;7Z!U0#l+LZYA?VFKc6drT6_09G1 zBD>p(&bcqJhT_%4#8tt~)z+^>^||BK4sw!#_52h)r=O?w^VOy(;G{6Iv5j3>I61LR zO--5bHjb6ydqQ69orEc&QKq$BxsuNBOh4eb;i#sD`#M-Tc5^RRD#}7#9Zx|?sW2Fm zh=@pT#)>;XX|lUpBJJC^cSw|6jEqt3cLybk4eQYvNh-}w+j1}OYOr0DMxs<@Nttq6 z@fg_mD%bDGd?jJw=_!!GYft9$a6YiV)a3kzGPu95Z-U=>XZ-foy~IiTl$0`f^gHj^ zhHo&Ymb&_^z299+du3%jxb*Yq`k6XQOUtB9KCO=*H&s_P%5^NyJwD1=f} z@U0IL;kTH)>+;03f^Xlx-T$4;X#2ESV~znXx&20#M5mNVaw)6;U2QQz71a#RkY{Lj zG~+#gY#vdJG!tjUJYANz(T9f8(r9o`u&H8dcu!%lv4H_?UHeyL=JzWaV)7ePH=JB_ zEG)4Fg@uAg7(F$W6aqce^51#Tq@<*Z%FCtYITwK#T9unapYp6SS zv$N_ZW@bjuPml8;$e+%_L?M{4Ntjgq*`a^GeG_?(kH>6r-EwLQ8Dww80Qo6yd~y<6 zBAh@>tz>3!bhLlbzJnP)37kVzlk+Y*U;IHN4~gMn*XfB@nz82|FtT=x`fwE;`lwD; zS{gFM{jjV-66y^DgM_qnD81s_eJ^KcPDpqnvW?*@d>LwnP;*1*Zi3I#R@^Va+}Oxh z#l+36mOcpr?M-te_>!D^=hv8v=7lL?BhtI%G1i_+R!>PuscuK@rep79{7sq}!c?$? zo_~r>D-VH$<-MNT3yKK40V#<6D0SziLZ|7KSVcC-P6l)3x_GYpkU39qJ9EMluh8IN z3O+u9kdP3bv9QokqwZiNKIfh2_vO4zRV%*twj?6n1UWf5xo4<#rv($~)c9)e-$yM^ z;PeO@9EuV2$bJ6Iqvw4t1@c4Wp}71dT63BXjsJ1jmog6XMF+SZkCzNmTVEgPh={$| zU>_!(C$6skRa0G8*EZR0_}ig^UFmX*+h&H_N>wvFf%od}@HgrOi#XtmP<mIm z0h;+-05Ddj^G;@gW$VtA6KS&O^}I3O#|(wkf=3YbpIRFV_Un>Vw3L+%+_X-`%6StK z5-5G`-u@f5X$u24=Hld>+PRl2W;4C*!`H9Gv+vf$ijs2aPLrd*a9p09t`)YSAz zdHMVr9mT^ZPQxQlGk*0+JC`l~@V?&IS|Oa%r{Rjs%*13~hyI;q1!Unsu*ZucsXAB! z)YV&lkH0iK!1I3oWN<8thyZnNdUkfBHv*fiI+h)?RZA>M?zPQN6U%3j;oAU2A9LdsuBPm(6nXOm6xn-w{aMAZZqc zF&U#Gj-mUL7@w@p?z^~56(|J#v_4H&FUdcgEyEsD)6keo(f9rHx}K9D`iZ@1aQMQ^ zcCmh>{4i!+*Zr8Ow7rIt<`vSmD#LMR zqFg@Swm=3t1l-=HAk~bGpnUh#@44kxfQAN^nVFfaHgp|TPY{}2cz7g1E}v(Jbf%6w zEne>BgJtFAgH;)DWw_PU)F6;DbwSk&LSWkI%{pkRWVtf3ErE@9cn_ElK+)4!S*sHFQs&N?un@eY=%Zafu3`0XhvbwGkIFlcp zuqG~Scz1}=F}|@OBA!A1?zWg2FQ!;Q`~QaKT0aw=TD)L~U`(a&>@<9_*f-N;WsN`M zl2l4E?g*kM5V7M8{V_QX%!AoH36CX)3{nkPnErnvL-%Ob_FHHV4-Y(36g;L*6xP-? z$|wTV(=Z~`?A5HlwS< zG-?7!qFg*Y8z2$&24RSVx3&s_`w|SLup&T6_?;w6OC~Iob#$V$o*#B!YE7lduU9m$ zU$5VuZ>F*v!$a0FUbC`dc-@@z^~Vrb*)H--&&^SDa(=Wi2Rm^GXo%fk) zEx_CWZREAJkp=_=l>1NZO`~9AvUZ<6Q2Uz^IMKs} zb(yn-#aKgPHkO4^9RK21)UQ#aQMXBeqt;rzs5ao9B;0v~JJHOWjhLoJAP& zVy#FDjM;YAwYZ8{uU;9c;Q^D}-FcRNs(}aYT2xvpsj7+-85LCp$VS*PmIRyYfojX{ z;o+g}${Y=cRoz04Eo#4PfT%ib^Udi7dy_G9W34G!?H~~?#fqfEUU*AfS!s={t83%a z-C>o(x&$l&0=1|p`NYHo1lDaznXPVWVZn<1%Y+3N#8WCr0nRMiWKW|n`}yI!yF6Y6 zY=cQM0&{hY21>X}v81Kt8ysXfkO~1$p-7NTFJD}30!ePrgri86frgfriihX({GCN? z$&y^pJK)r_rJtzD$OT*w0b@wzccz8BSnqZ%2m=#rGXH%F`aj=-&(6-y=Ua(Nk=q4u z5fKrY7)%@lZv(z&L7x3sVUiFa@!wMdOK+RP%3c;R7)vTx>3XOR@z<-6QB*|dv0K`E zDOSiRD9mL3AZfX8CCDtz7EVZ;n!Y}1+9hN9R1FI@KVN5!|K-b<-h}?i{PzhdDPhgc z0^d(cfSo}+z;c?8N-8U3(&wX};)7FOec0ihZ*rys0gcC^6aKMK(Gb8OP2dOwyL%O+ z@IAyit69(;){F*h5XhjbC57AbZ)=%pJ;9iU=H`VBO{#=!P%#;je-}4D)_qh_F`6il zr*&4t?}u%1JC$~E;i|X!6WHc;^OE54IE&;1Y$U>y?+;$w9gqX{3O(k2o}mmM)S`Bp2yHCAd z!IE`#t()mt+A>r5ze^x#ePU*2Y#5m*;?P&-w;--g8aL5zTr84a;ot;=N*uviW`hNe z<;M@2lD{D+_#1l@d3dHiaZrXhQ*)1X0KP{h80*{AIRgTgmy^?V!i^eQnC+^z_IyV& z{)ZB0xKuw@w8b}UbAQ0ilJa}adD%=I<%ae9E6R$xum$L zNDS1Udi&KA%Z~)86~UQh2+zCb{_p#D=aV_ROhw=~4(sB~u`2468ImPRX0(#_T?AzF zC!yw&f0>M1)qP$BpD^F%)j)7DI5gDbJSH5bq36-ZpsBB;V{=u)14?WGd3F~S6|JZr!;MZ7AsXeCcuX;evK54M-b6|U+jt|H zEZzE6T*14Q&LPX{=#ad4*za1cvgG@p9nD~4CohiAq**D(q+er(<|04}kV~mF)tfhO zK9dImci%fXsb5S?3BK9mYxa4ef^53^xY7@N#|td`djt_ z&$tqLfP4$INk2(pIVqCQj}e8n22u1{qB&oM7=-6Tx{041uzRdmUTs-fdK4IoMDR1}uV245Jc?B#0Yz|gv+u|OLjW7i35@LO52BZrJ%#H?lgi!n7k+Lo{}2&}`;WnEd4`DqLDJo&Nnk3fzX zJJOP|ws^YE+94V>W(8h+0yH4U{3}l$UM)WUY^IUX%!XVUf$Amcex`qKL0_LN5zkIJq8X-cXs-S zFx@zB_d`rju=CmWV}(0xSfH27RCaHV_J0CYEArhh1TiHHh`){>%HXA>1F;)~vtqyO zVqN4+FcXS2_GpI1|Fc`b>OW|Q9l;ziJj9cA1&YXDncOzv|0!8|Fvmxb$KljpodXCo zYT=Hricy#Bv!FjUp|dGOJy$_L6s!h4?Jc*+PiB&xwKTx2`cUIUr}*DLI#Ybi6}C=A z`JZLLYv~~Fg}7>Or5%b((1q_JQm)xGVK3!<55U}STcO1LlHpAz!Q@9a}=1FdhXh-5dnXbbBD=9)+OoW2@FjPcz zm39hj2;w2_*A=u>b(54VNXNe6c-qtnZ#_A{NA}M!mqm_PHE|e?r32#fxx8}!?MMhc zALnw@tex8Ir zui1`c+*~cVoxWWwC(CGB16zRa`@caHV#A6jBS36b?&n6T>er*;O$R?qUI13qp_8*S z79pYK%;;!_V3k1^Gz|?6g`glYq|)QUOvi266`(muAQUE}?c6BJ%D$jUip&y_(EwZ% zxm|;)DlIQZ2mBB~9M=C2RET%O)4&SgRZ~+F0Em_Ihr#Otw{ff3`iWAE>54eATZU^- z=A7YwGS@GAvCh7e!wGAjatPiBv3inUirH1@mT-3IrU=;FklVfN2aXc<-@kt^)r!Yv z-XCz|y!Q5$4RIyJ{Tp#H#IEP3)92jQRuaIa2hzB$<3Z&M4u<98;xYi7_;$qJ7r?{Y zPKju+;g<@Nno95j>#S!>yTH!-?&)Z0jYiVA-H)mU#%E{YA%nxik}4{%UP@Gb;Xh{l z9PfQ3`tPaC9f#u=vXgSbHmMAAwaTOc!d_Th#3CTzTl*{_A#rxo@l5I;f5pejI`;Dj zY_AjGvED;H_jNcX-DcHN=SABF!-MH!Aj`a91he5($`h?daIOHprtC@AFY=qb^HN-FS}~XT@X+ng=;5hZ~fH*|5aW zES-NpQFaM7h{81`KNP1iPfXG0oHQFa{X70WKfV1r!~NwoySAb2n{%F2e|M~d_rexr z@KBDp1;nOYOibABtFPH{qlkM`+lSlf7_?R*#yQm1A0VjzaJY-X({g_+t~k(9e6~qR$l$Ynv!e7sqx@7!4G- zTmtA1s|`@UYdT+(-s)>fqSUDQ@r_~ zeSM#@pE`#t*x{-f85l5eabG``6c>vFEe8ZS#89KRw3NwasSyhVn~a`bW*-x~7-Ii- zXAloBhuAbuP#%Vd5Aigt;r-$bBO8AI`Gh8ceJ#_I&_wUlMN7*8^I=FSpkBwNSy;=9 z=wUSzTbdifCP=u|z8d5pQqJKa*(psCEzTYe_!HC#G)g`9UVTr}5aP+r|Oo0jk&BCIaEVH5=LE80xhH@WZ6ji23Kw zANP$2I!t2Xs50#at?8FTIelCx=5TnIVGSiYkYy|Yd%Lavl z_oOEWpp0G7z%f6VB)8T9#5M=KhgfsS$I-GO?9Ru#cbFX$RWs!^m83|zki`Utt(*yq z`1p8#-c|39>9*SpclWK9Q*h&P*FiM8XdKqg&uu=BnA3C8D_T=L4x7@r*vD| z%_w|+EbyFFJT|$ZAwzVyQI;ym;E+)3%l$qe`QBrTl}v2odhs{PWI9`}trE`12BY>9 z1p66d>?QQ9x!FOfWn9$q_l#j|=eiqM2|4^jjY}0b5tsP9ge!^=e*WQhnRhMgUbx6| zla5+-naM`V`c>Q@fkTJatntP=v%4#*WU*>)Z7mMaau|}4K?5RQ*2d2u_CP@3X8bLN zc)rS~+7_ceWG3zU;T<1>=p3<@mH_gh7zClcuzRuD$I%*TG|mUZn%dV5d|CCq$(XS) zP0TPZ3od6B?Xb1QP_CS}DVeX#Eb>|Lb#(a0APV@wQB(!~UbjjVlL5?MNk`o4{Ez0V zV6+?T;2?Wfo?Ba62AlmcigT_mEW*O10FMG`4&o1kfX;Sq&=|Kc)yC-t8UHdSma>HQ z3U6IJFBD@z6Gina5&||3g)^S}G#U0*B#nd8^C!%NEC#3`D3?|1?7|M1wB*iz@Hrc> zA(gnc%0=dA*RNVR;Dwi-!hos*#0uD|!_F`vXy7%qbD~yI z1GOP0CWa->?-Jp$s=K<=0P=V$*kct$S0%6Q6s9qvLq+N25N8AZ$>wX|*7sN0InwT= zR0ubF7ZiUZGTFyb&aI%>W02(m^KF4PDk)bfcA1X_0-l5J%STRaj8^K}Z=ImW+L%J67)PsYHdnrI zN}M4Xz41l2R4*Y_76ip_;P3X(@xn*IqN*ThknWzID!XL?ARhATIP^Q}l}w@Xb~=)5 zIUTCaEU#t-E)YB;%BG=w>Tk1n+QJ-nM5=AX>?)b-F(+7}_VxAUId-ON)eHMd9Tozk zFFJUtg)$n$IHo%((?sG*98rOg7sK%OSH3dN6J`6-#zm zbh80xVXH9Q`OHn%v_&2eyV|ZPYu!+odh}kXs;VkHJK7Azu<&ri^Dcc|UFQC`Bs0eq zq>!TG;;P6*APe^kJ&8wKlMm(!Li~)Gwl1Cntx6LGV_htiFf7*ZH`w2G^aRba=|s%M zib2*AWXQyU@C8&6!2iz9dN87P(MkCHL4VNkcUfL@>?l+q3X_n~lI>%5Qta&q!~w^( zm^URdjO7P;BRYCM8|10<-m9|8h=W&A;wTu<%evWN82%W+>N0wi$!b-ikwmX{ zNN^aRLDF(?AYgXpT=k=lL@;7SU}^cCx5K6bBr$R^@=XyZv-huOagM0_`pJY5-zpE^13RB=HHLjLEn)gkN1128Su;O*8 zIBLp&@8mW6S21`=czvVt$s6B5{#ZO}1;4N@R@Fj0tM;|AO+$MX1bl4I2w$XM>RZ~- zARVt72ZwhEcfTlkwRc1;-ed1YPalY}oAm6)$@60a3 zsueJg8^-IMW8_-}i$d0zJ+?2*Zs`|flYA$Bkpp5Z?0G9ZK8oIuwr2%2SU3R|D9B(P zZxC#V`#e=>%M0CUObF-~9j3%epwM8&L9eyV&WcO_fOx!*x1<4v0POn>`-^&+p>yXB zKWJ-5Ei7n#-=8Lot_tKl$8a9BE%B70B=102nAvw)T!MQ1iy~&L10~`tBHN2IM%3%T z$TAe7rl}eJ<;yEoRn>W+tfZvitu2!u1=W8DqN@UGUha`(VK7?PIXB?X!wu>SMR7yR zFO@m5H+A!l@hzqy2xW$-2ygM?A0E70Z&tiOYL-=0#A^uWK5odF+6Rr2my}7u=Kuj* zMYGBXZqY5B{fxh-U*ROVZDqR= z19#z|7{AmW<~HI7-h2r!@?!*rcL?WK&JjN4!Lw>mq6EBVALZfAbT>Z*5%hG&2kzDP z_SF8urs$P=K}P@wm3W>k`2o>@>+Q=D3i@FpE`o!;Mr99(x+a8_hmf6+`ov#uGui$k z=rXOTw6d~tSoOJCUlXUqbp;vVWwZmdjSWEGi=LU;b*2n77tF)K88qg*MOEyB>gweg zYLm&BGr}-l5g8KHX%G*`>orT!U%Ac!^#|admsIQB#CF<}oecpBUC&`fN9@0g-ObJb zn8(^qgJ`l)BH*O*-_dorwfP#0U?2ZpP}c2QSs02KEd5GYs)g;%%4a5IY;=J&N((Se z^GMFjL>Ei#*4EWM{;ADQpclso;?beudyIt0Jc89Q=eA{$B8ul$rNi3k>5RB<=%M2` ze_5JWj;X$&>jz%Y;~;Brj~PT0=)6RGul|hSmk$|-BQA%Ad%NUN{i=*6;zY6r@nOtH zsB|pavV67+TqHv7m>l0w5irOkjdIUSHHuaF+|ROmEZeVvhzG0^B9Y@SmoEYOT{N#E zrd;o>3xE88)hfdS*FfUahk4o!QK9fbysX=R2z+rQ$O}0Ka0l>K$L0}I!OsSNszJK!OHWT1pK*05udGzYNw%CS zFa&)G#l>F+shhuV!{AcJAR94Wky-YT$GgVE1u|Gbl*FbONQT?-B-wxEU9*Lfr^JYX zV>!P&taw|jswF=$@*n^GvYZekeAh4kf7+F$I>xaFR&`oo$6MRm>sNF72oPYsm!6#B zpI-)-eKY46Rm>8d%f7s~q+`f8|JIntn^43W*T*8{JTgJbY83fqw0iY|jufL1D8d7O z<(o**CVndA+^_f_Wbw`&=x)j%J_LYH4r7-gTY4AiIR;93I-LA>mR1pMx>A)J*kpZ} zUE<)ma}6ZUJ_6%dN~A$Bp#-07t5Gp$JeDbpez@l{doTVzU*-FD?7>2{(tomQZgVq{ zXB!!aB+%pRjl^ZFw49=)^XxXb>{2+#zz)WYa;1Dgun$lx{pynJui!-KDoQ|fwpN_b~Z)a0~Q7yKo4NJD3hnBtT8vPIh2#_ z(IjEyT|XahtNG=fOOdEMim;j}^wmuK%!9wb#~j-2UCwpQ{qT)4@v@6ZVjh;~CNr61 z9={Qjx4!PXshX%`iBL*5~l?>(JP+#XTMP({`I_17&=F|FuvKSby*MXepl^^ zZKP#ZSJ{hB_@EFU?CyqDR1QTY8*G-$MZbd@?c?_-R)GQsBO2(&VW9Vh&yp zJ?Y;r0tPj+tE3&i`)I=tryjQ76Xq!3i^aUz!NP%dI##gjpzHCXoR0Eg<>uxF-AR9m z95FCkH90lahu{a=EgPGgeLhkoAf^=aRdPV00*(X5f%-|^mewz!2SHny2bvHLwJz@R z((OgQhEU9{z@9vX>6)e;-~0HW0?Lhy;Hcq0w8E49VNm(*S4WkM&b|1FCW_s!pW=0! zcKzKqGsMyqddw8maI_S8L6j|@(c9oW*)Sktv$ha7-Lw+<8l-KHGrt8$dI;F?bnxdAHqi{yyUR(g)XIoNtA zw(1Ad=zw=y{OWQ*E`GnhFY2=!jBVLoyE}SqJD%zka*;NaLd~B;Jh4T1J|}rhbeb63 zcO(Dd0C?Vt=SqZiZ!tSu! z)XJ@$l3bsH4;Jw&OqWx9yfOvf^sN?4D>|Anp&KC;3vPURI!ROpLSkZKLSkZ2XQ!CZ z)%3^mR@|UiH*EcqUCcn=aey?_0*U{02DYttfyGZMJYZHb{qlr!+WuS9w;sPIy zAhFf&Bu+ztcG(OUfO^_*3jGDBf0L6cLT8;YvhUybEW4rX;{la0<|P+{Mz?00XI+Wi zfPp(&Cctpc;fhcW?}W;cBPG<7aaN*rgOOh8>j+DQmJ|-g<4}ql%uBk&{Uwx(Uhgu6 zRsjeHv==CuN1P6ffcE$@5UHs-2paAJ=gJQ~l0gO7t3P4S5lK&@;vGebERS)!^n2w)M9VX&-tM{M!JDFzrTBOOUI={IHZuy$INYChoc>oKL+Cci zAr80l_L|;($xciiDzUV>yo%{F81g?MNM;dV|b1kYOF&` z0fuloUmBps1LzYm`6|WADk_0sLQd$R(rZe9jUcmg>O9W)>e%9R0}6tY3xkDY(AIbC zE{gu^Lx5~7GOW^@q~$02yAu+yr(E^r!-M9c(=M69ncKZuqD^lkGig~zg&(LFv_qKP zI=ZODjYrxE>FLo3WbP2qh^Bw@1`6V6AH!`uTj|qJn1_uqAUeFP(a;-qshvd)i-Z-B zxP8~<4Ubt{wi=}d@YkX++Uzu}@*wxQW9yX0@CI)hE<0<2O z`|lkac^EL~h;(&on1+btz~4CsH^c61IVKiqb+q9D1e+^^z&ymLm?Pn z?B1Y={_x@!*MYm`fz+^5bg=`M5u4{1pA40)T9g-x`uy50B~k)5dOwl!`1#@~>4?HX zwaZKPGq7M|PBFNINElWvQcz0b0M?|s-gP^04h1`=`>t`Ae5gF0nuiNA<~JK2OBR{9 z>+1egBQ4}=Ow-SfMVQzqF9t@ks(_5{dm~`ZCXJFA?dN@Da<-&K6Z-!ANa4fnRAJ2C z-uj2~>CGl3;>z=WS=omrMkCbI9#c_bKt|0*N$&Ju!CXMs$8!t~*s-u~HDsjB_c4Lf zO77m?CC$O2zLSW$DLGk;gn4KwNP`dmEzi=O{MD~7nBHWe5tee+-^@O$!rkvMQSHkD*6j(LAVSr!h9 ze~u+wHZeA0(7@J zzNs?QeL5*Bf5T#2kpYHoC-lC_hXSKbEcY4AcVnN&vB1PBSuH>y?PTbgf>b&?aU+%= zUrvL?qQy~sF@_H1;M`jar`z-?lgGXnWQWbN-%+F4vQC{of?$^unA~LUwXDB zzb|QQB#?i9Q(V(b#2Sv7Ah8piwJ>NLqcm=GEYagM>##y)`tfUgZF8f9J7%6$HrVFUkKirDtvZ*6n7GtYB!{sW$qrPc6cv43PL%xS$il z#E*#~9H^&5EvsCS750%|QTJ;IV#g-&JDquDHuk$cx$0fT^N+jjuQk^GP18%lBQ##E zbPnFPTeYC_o#~>iJXgVy3b%DrUhO*UCSzA#Mk+^NY-i70HnTrIJYeGS+>*a4@3bzI z?d|W^YAgU&rlO*fgktaG!9kYla6QgQ{AB`|e~5cIU}`{$W6B<_j6>KUkFjm*x_G09 zC*&6F-O9>SD6{!37k;`}aKf_~K?!$uvuWBw9?h{MJQ`NO%bnz|3is4^4z7U7Z>+s9X5YB3KuFsYXyve{*SU2pP9)L zDe=D$C=clisRN!4Kq?q|1l;mviBr&%)6>1u6nXvumkFYmczaz!5Xh$ns3liRyN1KM zie$dqw~UarCw^q64kwH9Z|hG?ivnow^+jYeWfF3?*${G}6rvDKUOV~iKJ?-gi!h**Ay}pHFUOk8=vd~T-^HG912PNSv958>`?tA!MM=B4EmSQ&D z{!{`aI#4O1^@sd7MO5U@*Ojblz14$Tey@l@R?$vEgeeS9GQw6(*Bqa{)GSW9I#a0K zA4hK~S;;PYd(;*!JIO_DJNI|cA^yWzJy~|K(hsmV;o%L);Q@uWx%FuFsye|ci}%B8 zhz`a!Y`$Ol3_BCiTE)dex4Jp-gC`i!QFlLBwN$PL5xEf3KkYXE(j?gYhg|3QbZ}b~R_<4Ibt9c(YGmard zO|K&Ower8x2tf+WIovt7`2f?d@kyzUT*2E}FHF>1C#+0Ay$&<(Q{oTzeW2z&+`XYz z5%Q-*27_s}X;gxE{|4sdl)n2feN-q?bK9unNFFG}$r4(<7khyUXlaTFb!}O1MeB3l ztr~x*cYd(ZIoLVYStq)KAH$ZG1&R2TD7lcAGtL*#GTY22BlCNxg~F9Ptka>+c&U9; zAy@9#iF$LS6eb+LBFxBs&HTtI3Iu<$-YiNF!?}AR$uSZ9gc+5*-uPWW&45aRnIXi zA;CK~C_=a*7h$%!%4YSk$Bk$8eEek%T9J~8&d#{ukgxX)tP9~~T)*G@ZFU@=m+*p0s7>>B!XRsbt`tBa1anxJ1I(Yn+ba{IwQZ%GyP1Y4$&f|}{B*F_XLp-qV;*2m_@48+VY4+g#)w=LwY{6O+ z^@BGenKmCBzs%UWg^!@8dnEAXzULqE{ec#zWUh6tL}#N7Be!RT>QNuu4E!kQ1#l~;za%a%%0kRfaF?l<>+q6PjJMr;&#R*}e11T5-UPijjl zOm_cTVT?EMD=AFbKO~DHC@qo!Rz(EqTt06+2*scS5~ERn0FyW7RyAyV_?vmmKM`S~ zKgBUHTJLo^%cIbHxtgqJxuW~Qn!Wi0Wh%a-lAj2*7uf~{K(pdy_4J_lAGSFlH?}YO z-}qVd1)OSU^M03QUrFg9)%aa?^jlf>L3+o<+MLj5bAx>lu526e$A>967^QX2;U2RK zioK(EDz^@>y#4T-oj1&T>mhYG4f^btux{JBL4kjp}22$1xrVqSt=x^**``}RE zHrZu?oGI8HMt%EOm$){B#SU{EYrJc)Ry=1yb^@r_9v%0!-(oaRP21`tUm)NYj&o{% zKkQq%^v)j~(@Xu45d*H|e5@Nzw9~Xr#|$g!ppoB?1Pu+5nn|NX8C>VIYE|)sg6-U# zQxbPd%CAV*Qw;i@iry%|QJxaTc%|Js|792z# z{j!HS<e?x?(hGc0bgL)7wWYksIW0a zs=q0ly8!W1k7osSU?BME2SUivpG07NMPQ#qNT9tDAACYM{K(h6!;}y%Q#_*wVB1Lu zYcYx4BXh0LR!SSg&1&|SNT4P+B`O&lp=sqFbgp=!=|gEKz)b%@t>u&s$ljRBa*jE1 zC@7d46g_V!;4?R-;l#y$Jdv<)78He}qj#EO2fK?aSkwK~7JxrKu$*<)`OCs zIKSb3I?Wy)3kzbEaVTv^+a2DBhVAT5r6^TQ3x5}oxtiFMh`PV{Z;(2!@i)3OIr`&# zV9;uV(a3N_Bm3+Vrrwk#=9K8YlUUruYwf}l$=}oKm8xs<@553RT+D)}yM4Y2Fl*|_ zs;QO{*7}2bX}RufcgVF?;gPjbJlWB;LoFhJzIA{T@9~BLMqW@wGhf{JJH?=eJW1RZ z{XoDokOc<*GH$t_-s8Dk;P^2%`7=l5cA@>9I~mYx-2La!gB5sn(ak=Z(gT8LEs}5z zr(4yKa$firE?Yw>!RUCHt+$aKYfIM1=CPHKQdzF*CaLmiZ6q=Z9jo$xZ#W?URu0m~Z=Z#2(ZBO$bjuj5c`es5B{=#8toP?ZxZc=#E7G zG#!~gmEktSUGJJZ*?4Bd1Up75j@Z>!gRA*DgoHW8b9|;JFRF0wAO{E>dt8XmZiKJ48PD#}W=*JffMY$vXA)2UUi{{1^!~dg}w<1-R zk2!Q5z)>qH8x`DGrTac^B|pY-y5Xidt{Ur+a^M(Mfr+ud^>CKr=-e5mL}-IQ4q&r= zIu7)%2<&Z0;-P?9@Oy_6_W6~Gw^cQlfgkyQH4iwPEhS=7^$MFniKJ4T_xup^lSe}W z@z_sIsT0VbXbNcXKyME^7?g^S5_sRGYf|Omxntra2Wh_t6q%13B7sCQY!jLh6t&4B0cF~giC~ms}fjV3h?PqR|{H? z`5R^A)T_;k(zQjg8fB6bWep#&{wdPis~E$U$6$)p|If+Y zy~`xE)yrN@uU845mEYD6=zDN#fwD0-+YW9Ul7)+qVELMkrUJL1;&xDwW-6hREOt5f4|ap+q6T6~HNi94Uw{hx^()}gpi4z6E4aPA&FA~jsBvUJ>2`|b zMIFxq!&+d6k%Cm7Ufk6q2+hB^0+&3o{W;mFwP#5*5&E%jB$#Ni`7H4d8In)&N4Jd( zd$^P&*6s5{^6M@HoUQ&oijSL22VlaHzFd%hZY9Q#Xf6^pST3K8ZWKAoBXyd5C&yOt zf6oF8=~SvbrCfAl*(~u_{t%BcMmt77B(Ep%f#viQH11uigI4)}j@SIYTVE?W5nGrz zyrKveqrHeV@$-?PV92oL4fn92>2Az^W71l2dq#u~<;Zpa8_!3EvYzb>=ULlHZ`j6D zW|=Pa=SOI9N(#5qs-LGrSVFA^91k%cgG2?&`sd8=pD8;(M*3wQJ(4|e@c(}O>ogGB zyYlH^#g(o_iJn2D9d;XCw$1 z!oZ-aV*dLv@WJspeWi5VkC8vF-uKS8wp>?djgOlK(xA8ATsk=?g<$0S!=R*~$A9kV zNb6VNZgfKQ9CEm#d7I$>mQKo2Lp=m>ad9;(4FXV@>J@6*iLK~9BXXThQ>~&?AcixG z-i3%jxnS=&#(VO{N;IRj?zLc$o?1KVdqv-D_Hr#Z+mYqRe5DQndi2x9h}1dXVN1i> z*bQU~TYP7At@8LKE_F8HrOHU*TmR-i-o9-JXqf-tuzbXpLYqYsOmT7GAp00BTBT&Y zU=tgud6v%p3F(%F7LZMkDtx`IHT zkJF;0gBi58zuO@hSO9-^4%Hw~vwDq0>CWcy3VLJ%Ba6uG>Tn3C&l^N(->|+wHLa$b z2KHjyj2)BC96kJnzFcn0qDIN&lorrqqyUextDAy}rJWtMmr*tiJw0>~>0n|A2MLx}>$@_^g9EDEQB;2gkjFHjPzDc2WLA2$jb`u-@E zhL8A{xl!ef<)k1QY(t4IZ>44^A&_?+FeXly{tI8@Zu;y;3MNnZb7~h@r4~&)7je7k z!yuZx+Uxb5t_QUS!_k<&**eoVYpZ4vM4y03C(pIQtZi&cL#*WrrzokYBycc4+PuwC zzSN#uX=*pR1b}a=eP0=6tZZMl~)s58wIE2&Y}TLV_O=Hwx){VSPDG;UV2k&RRhV zi|Ku1V`D!voRZ_4gCLFTzuzE|ik>CSzJGOjD`N>m@BalwS2KZrK=ilgRE6l-qpz%& zK&OJL*wY15yd%uBYr7=NKyj4r?cP1D&cXWm?}(_|p-9*XsjJxuty*GiQI<5aP>=J( zSTZv540YpoMl|Ko@#|A*oZUYBfr7ie6ul8Q_VkS88pss_*a)H3!-Gd>@lqu-21_6zj|CDafwnRXzsYFHwf9V8_5cGmUv@%IB zjrw1w_4w%I1nc62#NJ{cP9eo%K+WkabY())Elzniq{sF4eB(7qX`_F`Y5UT_d*zaY z<=>Z)5u0cE?9$3*HhrZ?Z7|YuZ$_XIzJ%=)5YoozP%9brvr^JTebwy(-Hh)u8>BEd< zZ82>^N;CMeo^R^dkbQF2WR{ZIpoli(f~@nPy9EC|p~1ObO?@Xo@wvRXc&nLe-JtRr z4?G70lvkk9K|?^%U>?0vczs&*e|2`&aZz>M9-kqGW<~*rhEZA?1qq1(BqgLCkWylh z&Y^n<1%U@iVWbpP5D<{=6p&Ivq(wmK?zo%heee6ZpZnjve1@9gaQ4}It-aRSd;Pvs zoaYgOAT(|akW3M>kmZqXGWfIQ?nbhXr-}SvL3)(b|H(Tvjgvt_K>uW@`ax~4yjx?# z1fR{l7vo7&g{_@21YfX{JS0gL^v&DATE5Ygu z=}bg-=y_U>C2#9LDT{Oa>5vMv!rQq2_3>zANZLJiW@vsy91Yu@`+?VJUJf*uC5ul! z=1Pfq!MC0CGf7vz^Ta)kRlIs!5p1meyk!Kb&eeOWjM0EVthdw?N=f}wsnbT(Xs$5v z)gu=vM2$GLa9?IHpVy=n^<5$BS{1Yc*JLoU)@dQtS9VAU-;|m>KmG9aoRq_}UI?rc zw7w2Y)g8eIM1SqSj;7I5oRT1T_hb#%`91YD>C-6zi<2A8sfsV%Y-;6l4*T9+gb)(b zzL0lr_{Ldzd_+oyJp1Apn{f1K%O*$g$&B%Oy?q#u9aPqFLDI^4enceU@37~V+|JquFuaHx(8uTHRDWOI-+kyO*zz#@Vhe02NJN_QLKyZjSVa2EAC@a- z5ke}PV8hDiBI??gbbhOwAWLN2Ut{5Nd@c_`QM;FG9@5!25(MVDS&&5-Cse78IdQQs z3i7X9i55~pF#L))@!|%{DLLg)Y&x$Bz_?a3fixCzG+m54t!=WWwuGa;V)x)c!ne zqiV8G&ra$Ck}Rn3>%6U%@OVIe@)K-AO$b>2Pvb(C^}i(Iu+D^cj$es)l* z?Yc}zaEaqm&PPHb8FvC-9`Onyjn}U&$8jnWq7hTqU$}}p<_&gMpvFHaB8TpZbl5)0 zT<4nT6kK_=KfM0H?t|kuR(9+Urz*p2ftD0_!qx7iH_thDVpX`vZZY15t9W1N+)Cj$ zuC(c9)My%zl>O_#yl*ll>Bx5-pFZ;?50ZHAZ|JX*8!~($(P(IZyPea#g@q5&;$iun zWs&u{=A0NB`P}X7uJlIoy2WQlhoku3E$a=p`m-Tp)7fU(v8dnv8@C!wYUq(+%yI$u zTbUY%q`%*ytX^5xc*JvMR?-(n$UQLo{i#?_uJ3DYZFjzCwqWGxC)Ujycbn_xWw>k& zh`-NF-hBt;)x)6#s*L!jX|2aAN0!6akMpX`pT_02`K%E#EQK@soBOdD-J8^>uZd>8 z_Uh*|KXMn{pGC{I}Cq%E#=E;a8gC{GgGZoKbP3!^Lw!w3O+6v!M|* zCiUw=5$=m9t^8M>ZAxp7&w1NcNDAV^V<)4W5ntw}y>ns(>iUzdn&{&}6^|NmHpXkU zFQ%5{;ysXIhYJt$;stosgd^w-6AD`L4LaWb^&pztKYo5HegGv()de#98{V^Wepjmz7d=1Sx*h#y7-sk5yvH{EHPiYZeA;cU5#nsJzC?)utaZT`vgT~$Q*xqm z7GiJenoAl3WM6KW#9d$mqX}JJwbutwi6>da4Fo=Z)FbFXh4l`*SZ%sTK~Jb&LR^<4 zy6Yin7O13JnGF#QZ*@Q_d+a1dG1=2Ansr>yWSR3h!t1QB-mEy%#SnA=^Vuf-<)KiI z-C~XP1@ejDMLtuhV0*T?Oc|=iL*6SZhURfKzWkiLrqB?Q@Yw-b!t;!!WYNle#-dJg z>=UdUU2=k{GSnItF`&|lK|b2Apa_S~fOd=ZAi8L!LAjO=Q=s^?$*}A`-E?{6&y!0_aBA z;5EBJ*AoNb-w(2aZZn(;6(CNrZFZYHX@q@pqk5a{94dnhsxbaE=}#AH$GwYlfA)-S zXw&p~=vW2?| znn}$W+G=AYzca$!T%2h-?T%K(W-o>SH)B#G z!$|Ii;Sw?xp&Wz{e2MjIH+Pmo<(i<=7mPj^S2AVJogMQEjnF&EFkw`=b`>Yi8b+Kz zsp9~d%OngAU}>2-C->R$nq5slq>2%Q*8MRo-LrsXh)D!7p;brqydZybkO5u^Livhp z7TmU+!Nlpb*NEoRef=f(8U!iP?7_*Ld(CC-V)SFbjg&?u8#UbFc=>FgP?$6|!B+pR zG}zHtQG0|oV@vTvD*l_q-v)^TaKEIySh_pyo;+z;pQ?jGLggWSeSLOzc4_VtAH4wm zLfOjd24ps4=9G((iRqDOIs|h6{(U<(-O>b$- zCut23kM21MO?hk0G`+v`hxzBbX2PIN33HOT-H?kMWVP_3$eK|JuG0}(TmtFUTheDj zDY59Y$dgm#Au$vb6kSy8TR+qjiPRFWI&-@}1Z9_og||CyFuV~MwCpQO?|arIN%H!j z#jGTx&YbxC6o#O@2V_$@7Tfc*Qx|9Lh=+v#my{=WR%>vHVta}aR7ulC!Bs0D9_MFS8%MIY%XV7TR|0O-=E)h2vkI;9UmpzbFjE7v$juebZWCj$ zpX0Hk!%sUKQ}|3o!XjL{M-V?MUhpjI=w{f=iA3{(f~T?@S=!b;;rZvyx5q*kxV2Q3 zNm28s}(?DaPc>RNyIp`M*~a4@{zl#hzeA^gHT;NzL?yiI4Mk4P#m)C=+`!qn$=P{Z z2VOyg8{jet$9wR8$-^Nd^aLBj)0xhlYlr0nJSDe3T~HcUvo8dqa0$XAg1H0Yr|E{9 zM&<;?4XnPE`NPg9V*{ab-XeEQvBcjo=hUmC0|bmzhlNos{?yQ}Y5gDN%Z$M99{eCd zlQP5*s6GS7lAmoa4h=hdZw}1jS@MVrWQl^;88$6WX(1$lh!xcK^+BnZh3XSFQWOU( zIs3cWGd&aW-ho#Nn^Apv1BE6PMD%__6^ricj-K-!8ZFM|6xbEKC9$UmJBs6sC*(DB zNrp3K>=dI7{d$sv?Cr^ZcLATp!A}PjHXCO^baYVKyy7W zZdE7wLuh)qZ$)~3SQlO27wJKLJ{hlwxiTP2B`Z#%QjI?`Aew0UP;Yw>$WZjFr@}h} zfzA&DxeU?T49T>6c+)P^EW9C)uB|Kd8(UNm8v}IX#B9~y-$H4#uI?VP%jGSBMhpHI z4v}boD^pd87z-4XS6h0=Y66= zNAu}I)T%!BI+ElPl1}ek2lL@oe`Pud_GBh3Fo+^8vW(y=diW`UYAyb5-f0iXbnFK> z3H&+Nu9yYnWL2tEdUPnA1f?_rx|Bg~@6qojD8u0_UMFW=eRg>B_jvc7ghKg(gQdrk z&D&HQLevYSLTl}=zniAzc;RN&H;te_uWw!)Nv8e5mikMCFAV9^p{%hvkVI$hs1~^- z#A%1NO0T*c+5HYD`LGjtQk2pfIMc*uUm~_C(PO{qfKrF&=__y)?lmcKLVCp}xgDzy zy<>|_`Z^g3r^!T|z|iX4K(jh66!s)S(>nrzesI~ZADc>u9~#2=`C&H&yFI;u3B3VR zGKU)4!h%gn7dNmJKJFN4ScrF@Q1WuFVlsF6y}n`9754}TL=ehu$M}mRvelGi z3p8o^n*JGo5nzJ+#eXeLlPj&y7Wgms2)16zIAw0j$MWM};BiUa$j^UbOT!NWzISk3 zeekgqF&wRGsOJRP@{!Rk`D7vhgTZEZj`D}ab*Gw}Apo~oFbq&-hXIY%Cu#hBRY64J z-p*e)xWROf9)Z3F<>v2;N=scQAy-R2aHx@0S>0m1Cs zv3*a)9>(6haiEgiT}G)8-9U-`?-=mnM_k*SJW$||7Mn>gG+z5%0%c-I-#)RW>7*ww zlNs|9jJ@*txf_^-c6P*q5Bme)ow_C_FVg=Cy=4?QfE{;wIecrU38}t*qQp zeI^-jC~O{dwD1M?*%2Uy40tv@hZk}wPCF21T>on4$kNO!vQc5Kw+=Su(p{Q%d5csaoTuO{Hvhj zKbqdCHTOrc^3Jhh$WBs-+)=l-Xl?Mwt58TtpRJ;i5mHIF9Sj3&(BY#u>p;i@xUu0K zkJC%ne7q7l=7v<*p8qIdCl$`51MU;HiAOYG}-m<9~FjGFGt*@#|_4fA0tkK_Kx$BqYGF>lepEY4>Ju9Fo=0-*Z z_5CT~{gG5m)e~!rnI6+qQ}|@7*vYKkg`a>F)#J^R(O5MX?o&^A z-FBH&!HLEy6DP+XE91My4hwX&Jd0a+|BV?PmLtAuM6%reG1&}6aOS`bn^Qw ze@!_{06ppBp7J&S6;CM?DFKMy0LX}X_Os}i{|{dghbHI4ToX?9);A;edL~FAvcIHa zQ_VytsHH%DjICn<=AovTaNmk@Lt@%uX?(X~WMxk{T@wJ7!O|<*!=t=xV`-=5 zk7w)3p6$rNsHHGZQ%o*t$ykg$yKteb206Q$PH(8YRO0>C+xTB*VyH|rQPJCOmCT+X zC7W_KGH0TM`81gC9voo$=tFgSh|9-|PI0}hPZ-MP{8zSobRIq|tdEmP7|0(;9FLu9TOkv>)N4?zX9H!siX;k@)a~8gT&vjb=RX3FyG;R&6(f|vgyJ;w2 zIAYrY@N)!EfGH$|NJ&crVl1I9ZTV0ChoENOYh{$T1h2&%d+$bt$y(8Ip2U2Te&U&F;q3m{-B)zLeC|KY@C= z1&F@59NGFKu5$H?`uX_{PES)qXq8meL5tou(}(jsQ!jh%n%^MY@2f?EVE2T%eY-Dl z|Gg7HGHar_VmiW3*6ZNl){IvE--k#>X%#2}?CvVIZkv98>ykn-RqIvy7r%NsnDG7m zrS~^x;E+qUH8AKF$*8uukeLk?5C~WUHWfTQJ+n(n;J}E%c3J7TqsZB1%UxWALUh)j z14x-NOA=nIz}+@`#Qu?ZMjpWoh;YfwYrx_G&qj~+wp<+l=08cwuA#KYyl9-WGC=< zEzVW*fC?YV^QJjXK8y3jKlfx|xo~F7&@UiUUw4pBzBz340z6(sp$yqr$s8xV*QO8gL#4jXdeSN+9 zM7t|lV-6xR9vD?{dYQm#8ZbP+_eK*z#7Pfq63i#}h2iKX!a$nxPW_ zIcKqTz&djC!$B?b(8vfe*xJ^mopZgDRCD%jJ=}v;Z+++J5^)2}ijs-kJ;er%lL0jNcn8Cf3fxG8KVOEJ%^g! zk1BT3o>t#qbf$AdLYYNbc=ZiIz)-j?dL4c~d1xP%=N5VzGv`}Q4NG@M9c?j~BOWG)rf;>Pv zy5Ih$50*`U6wnik{W+MU2t2RU_r1jaIQFr`edKy%EO=TQ{np2_2w@6h-f3A zT^$-6%q|Ls5+dCoC5?2Rd*1K+ z&;OtQoU_(h=Uv8&GtV>kbI0D-zOHLeq`IoyLrgME2n6y_L0(!D0zrxgzuD1I!N2U) zt#^XIpw2J_Ep+go54w2-_!+}d{*^NXf@^~Kjr3K#;57t715uEc)bhyKTk!TtJY5jK zo7((?B>$7{eGYQ|w{w?Zq6bz=h4$5jgdND=C0{SVar7QHJ2ZxC3(UKVCpaA%UHTOS zaillSXsO57YAx9Q!Fg+45X8T5^o1p-iGgMG#rv$km$wNUisPTdKMV06Y_5%={n%`L zYj{x1GSxJ-*)O_4UbD*Ve}7=XVDWBhW&wMIbz$-90^$g<9{*YxFpV zv&5o){?z=CoV*II?ob!T#AuWqRq1y^)=OfbCZ_H2&H#mBUZ#F<&d z`-~ljQaGfxmLDQvYs;SHxj|ZD*hG}=a~xSNMNcS!5#kZ8S=gOGCr9x;+wbc2?pz)2 zt5>hA?d@fim80i29o?kfPRKhEEKN6iK704>9UdhmN3z)E??h%+*3L?^KE@j~87RwGoRr@vlU+t_sNFElZ^u;Lxv?z=JkT_jwQB-1r;`US3{CL`6v%7*K#qnQF1tH#ejD8m{-7-(315jlG-jpfPFq9fm3x82X>+qhev?=Q$JDt68{I8)Yw8?1rV z`$a^cEJ#auvR#nps`iDc)9{=6RxZ2;IC}QVc5;aA|)hHs4)q#Nlob_Qpw`gv^IlH=< z%sB4ekt{Khjnn8;zZ$(C;Df$??O$n8%zuv+)SjN6ZrFGRW_;RrF~gaPoO}=q6H_)i zk-PVNZ%$z2G`=0Iz4Ek&!w`7?vtAEp=VzPqZ=D}Q1E(e@XZPwHmYxrjtjWbuJsw>2 zyS5u9A#_;PE~DrovjVehTx}UMUJf?naKrkBRf|TJi2LxKvmRmpg8PzPor9+4`|)GM z*d_5iyuF)rEmcgZ{VsABTCGO1)}y&wJ&sJDe>`f<=I%{QOr%_NP!TONx;})Osyy!! z@5+7u{=J8r+u7GCDP%OhPlVZN0uEGfY;1BT-f+9`8$5dSXt`vV`MuYEVj6nS65Z#OuW(1()z9L zceeY=pvg_e)_k$WTcE1Cy3%^=N#!O1gr1S{2^$+$O-;?#)>ehh2dX!s2M!JneINO3 z98b11cm)LLxViBI@~Mi7S_!rq2P*Kj(iuc5Cmvg$^l3*txi*qa)Ntw6lK zL(9wNqi;1eD~W2)J7%h_G&D7N_RSidqru^iZ6cnut6SJeaE^5ucX7K}nVp-{c<~~* zzFx3kj?~Nzqc8KkuD14xxHvUfi5{?FkA5erAy?3=_ohyx@+FK6U0wF|_Quw$`S>&& zzNZU}AwWM$17|6an~^`Kwx%Zc>sNG#meb_voMtfT>_*KV^A8KN1beMB7A{(RhjU8= z7It@S!wZsfWfgJCn#IABLmYqi<8uf8nw&Ja zBM5)b$0jGkoHvJQSy&X<2fu$OXmZ&Ar|zW;m^ssl0bNn2=mf6bjK%I|&*7C4fw}2v z-S9Vb4$Vt?F&4gWkl<76ZcnD3`(C*~NKRi5T%AY`&rS z>M?3>{Cv2QDoZe|*vEGm+@`y`TcT83WC5JC@a^$}wVTVoRGw`$WIG@tcA`=H1c2G& zK2koJEmd>Qd#PdAxaC)SCN}Z=E0& ztW?{1xIs`A%-(=~Nf29~qbmiv+0Zt>&hYEKUXgG5`_}7fVy7Prb*$KI>Xtr#rf33PKPljX~ieM;5|837Qum-`_<7~!CZb2?@EJ93TI9oifxmmP-{jG6`9J*6S zgY#wu*qgk3e351x6M9Iz&XZrlSkwfD_7lc?oNm83=9o zsi>$hKYdDUT=4m`MER6d`IPrFQCLL!+5Q3>|FZroCa>oEyX#vLQm2>gjPXZ`Dk?SU zjGqb%F(6+tqv9b+NlBp~@4!LGg|N5OpY6^B*Dd+YjU?dfsJwHcktzr)D=O+t=Fq<( z#-=q_E^j`5ci{_CZkb`zMxS*=Q_vlTIdhF(41-c~gzpRnZHULnXAfi~cCpG&pFYJW zCvP0612{qtwLk`uBYwc#R3zNhyqM4P>%<##b90F&34+b%zXuWpUH^PAOw(eOF)~{G zS{*_FL$4&Rz%bvMj(ztI^YiD=Gz<(fKA9OA(HR*OTQ0)kW&WClj}-HNZaY1D_ADVS zZQDiP#)j?c`dZB6ao|&7;aPjnX?4jdN#K%ky&Gc3k?C{N!Y!lhe~n5$x=D zadEfEKOAsGq=*ix+ksoX$ z`Jba-p!MPN3(V|n<{_Y1BE&%y%lVQ`V$dQxGZBHupxld#n0~)?mZ|;C*|4$Dn%3;g zv(x{1h1Q-w`2O;I`QV0Vi)WVK$L@JZb06)H0>$MIs(-*<5cd5s@{k2uz!(2%ObZRT zjUOc0DP=_X8ax-_*{1kCsj<(|gSXDkJyp@s*d@A^$l`ZLabTCwCSo0Iql!C`G=D)R z`Kgj(VIEjI10{OAH}1h47>`k0Ru(3kSE)o3U8yj&o`&bY`cVz8-GXc|F^BnB0M_sQ zXm>-aty>xaIFU&lnlSw?be6MmUiC4HQtU3e!p}t^MEEOp)7JD}5jG(Il)S1Qot};; z$nwJcyE)Paf*#0YP{?$R4YB2qkL!o6AoAf2CUbOESq!Qh8R5jp(oyxjbB?X5;sM(S zK3Vcgx7Ido!41O9#?~cvJ|Fz5+R|kEXTG4vVa~!Ccr*}d@g^oFR{e%!rTWdgexPvO zuNbEKU)B2_*9$%Fy!XWK^u@j6^Mrh(ipZ>-L`jRb!TeTNXLfgUDK9TC0fQkQA0Im& zt;+1q)?l%)uz0M55<8BFoiG0oSn8&9pEo(r^1VJY>3R3q<9LASsE61%=ksT1wbckJ zyy2*>u8xkD7D>O(UIy$=zq`NLbo~6<2KaBM$_&8>X%M&G%yeI^9(mpsh=LZMo*o6@ z6EY;3)9~HamNi#zh3k&CE;>OXq3HKaH}4mUvm4;KTkw zq8Yw?3CmePb7t=8rnQx;l*36%NwGTjx;$Qwh>YxzqxKDX%%+VPr4Q@Zva;Sx_eG(N zA9EiayB*C;A&|_>%!`}9Bbiz5U11Lq8-$lECN(~cy6@GeC-l=tBjUBu^lg)-Y|VbP z`3W~RSU1Ge%*=rvQr>K%ZWBuy_4NGwlexCT8AA0g+XP_e=V}%rwjM}wYajqIb8t|e zdPH;6)6+-S-~P;3QXR2T;jy!`%j=cxH}B9d@ARpEB+cWDs$4^ig1QHVyKW9JCK0)?+I?U)%-g_9fhPzR(HNTLuX&d zZ~m;8qNseo>EL;Wuq!4xzis>VL0jAdETZTO;$Nf3)b|&C;3R?%M9a}EjE|2GpcHY- z$;oNozN4cIkB7I57Y8>qKeBECD{q1m5>AbxbQgAyoDLyFYNyq0=roT$>h&PYXxW~oc#>*O_)Ft)@&x`9G%S!8!4ICJ)*T}vlLqmrD zXOb@%&uwj`dfPzZ)k3A1Pe2gaEr%b58Y3I5l)`BOim^MK(D(koP@!4NpLj!?IOEUElNL?TFWA%Fy*&7|jl%VM^tCiPZLHvCd&9Hj z+SpoJI}QOx36Be>iqGlbv4|h|B6E|);819Xa9Q2cqfMc{C;I!A!lI%zOWU2#(-|`e_zt zaPH1*maU$DdyI*k`j%nZ7dJ>HbE9n;84p}8-@j)0?1F{wqH+n+t2!6vDInSJjcx1p z={^Vx&Rap-ZDDS{yxb9t2;BdXhi9rRtZi){onQ6}v<3#F;m)>{my}>vTmE<}IE+Yl z0Oel(-OK~U9Hhc#ij&>2@ez0t21petY^f13|2vM& z_w(@HooODEJYa~&>m!i@Q5uIrFn}q`1DSz=0py4G3o%*T{2Mv+y(C6n+YG$Cyl`;F z`+xihRm|@MAO(@<;h^~T*sbh41fgq|ArG8fP*cJ8=NmvOk+HYu1b3v#5@%p|(9qD} z@u#q0bZiU>(mygH_vQ^NzwI;)6%(j`jr}eej67Bz*e*0aW=+BbNT)igys?q2$^D?$ z_k2N4X@lS}iz;>L^5KxkJ)z*eKxM|l9S6DyuEfO|4;PuJC~HyYaZu;^)11=crJc>l zOrqFT3uh)5>|r60*v$n@O#f?&KH{hx5R$XCEvcW@vb1E>Voj2oL1Yp{2=`eU@xOBh zNDwx^3BZ61Kt71n4l*PN#fWDW^gO0_+nWvj@Zk|CT2_bBp1EztMMhSpH+X{c10Jkz zAdv|`ag1Rh)hyA-@dCA|!S(l}J$%`n26vc_GFEIKjB#n}AiLysS5GTwq%gX%W*e`Z zCeSdtUz7|^=ruS8TSN;{Sb>}A);s1J&Wwh99 zo<m?FRY}jgC-97cAx+U%az$ZQrqqxv}JVqDW)JHTEVS z&ULb}Uj}d#q76+}sE-1IjgBG zPyxS63_FM90D;>sZRRLc{d08P0A(MNlCPl=(SwJtX+T_=zlsw$5t|IN1u+O!<4<1ZcnxTQJA1z7IVoC|Sg0~m3x+tg0LK8-OY`(8 z?mwjLU}8_w~C+89-g|@bFZ4pWBPxU96c#`(KS`BYuJ;%btpPpGA9Sn!jDq zaY>#);>(`Kq@$xXJV$Zi_<@QQcWCUGi1(vc1p1?w+y@X_+#t|UJooYFb~T#kBNaaw z@KAxYUtf3FuyzzLLA|`~5LxYTlt{U4n>_KjQBJD*hyd14a&g)Ait+i)xs1Mvt!N0E z07?|k*7kT5A`}M^2)G4NUq8H%qn^f0gSbt-tw?HskJH~_aY`e~Qt0DhA&&*>jwQdx zTefv}^73Jahh}h$0q}Y^cwLf%kr5TdmGxf=cRw`^Pt{^d9slrFyu=OmwIWT$ zOD#rG)kZM?NOA7z?MvR|&0h$%sCiUISFzD|1cG~1&W#;uqd}!f4|ns|bgbBY2_KvaR!LiJTSkW<-T>*+ z=j`&?bzL4GKO-lykkGBhx9wGK>z1KF=o3(=l1SfJhg&QyikB4w%;PW|E->~49vDUt zfhD*Z5f^vvPC~_ijyz;p4f1=#k%559&t>v?TU2qwo#|ox$0)T|CKdUwel@X5Ja9oC z+jjX{ULJ118E#Y_u2~o&z3Sr_uUZf+n-{3e7;IE7r>KZYKq%IfTv^T16}(`cIcv(E z>$MOVcE3{|yhovecml3X1$NFt2>B92*@Hl`P=_r%S!@x`)ad5L94D7f;N#B+JuVzj+e`hGf9$ zKHXMUhBGrWv&x^Qypbq~LA%!1q_TWKjdGvUS(#~eO9vMyxOytMS{0y5)0G%d`;2d}3lSsIf})YQF2_62N-fe^?$q3tkhN4cB_`LPSm-YZ_|GOGf$qvu5G= zTS))+@15&|DH=LDXzjJLl(gM#vbLN3-;wlNV`qheblVy63binUJK#E&ykxXVij-%3 zWHHPvEckPxAbVzc?+Z2G91qg*Hxj~{qX3e!9K8Mg`8G^{oVH{}dXFMe0sv4z-T@A$ zi6^*Y`P-FZ-WUxXos>^zdO8-M!@hiB3};FJcx{#=6cd6OicBtGFYV&u((k$u9TAc0 zVwl($TsG$oLeCfX#enT@{G5^6`WMsl=KcK71N%S77>Tt#%f0ZtHl_RkuxVsuq}#+x z02-fJ4L>4`sELv0Op(e}Uq0C$mse9GK2K512MWhqL3>NfjR>|WR~MJ)#wRQ0kBSyv zZ8wUI{2V?)J6vmXwcXTBLdz-)kwiziIdsPD84>xdL(k66E)B9jK!YDYeys4lc5~7P z818*yVgi`&?rv$oHEF$k8TnEh!2J9_Y%rgY`;jH~JqqHHIn1ukjt}R73r#r6tGcJk zLDZ#@&%1SPKB>v(UZjvpOzpAU6fN>(nR$rxactvF+CvkUmMfiAq?!E|C9k2TJZU*o zJ4leX4H;Qb^fYr1_o-!%*tM=+=b_Mwef)~@rzgga-BzN>62IQ?{rU4JXUsaoUQIA+Ud!0k#F__|uidZSc{U~_Su(!9j z)mf9`2iHyA_4CHiTa4lW(~|e;6Edh7bz8jn0N=<+Axhir?lH%gy5vEM!0z&U6ts3Q zJDRy#J1KB%fRNB9=9CMH7YhppGIH{xK@e6JUU)yK2@Jt$+&@~&2)H`0EIr4-l7Bx zj^;zxSPkWJDPyhCdk6;l>z5Wt_VA%|0vIyd{crlAA{~fSxNfigJ<+sOdWZR#UvxAS zGmh_*fPlbS!)BJ0tSle=nZi}9K$4+G4#LDvxL=ws`3c)0pL^tGg??E37A+T?>)ZH`T4n@K6Sn*coO1) zA+Ce=_ug~4#*cK`l);`X9dNWE?1mn=l)}FYU*N+;qsbpG zhbwjL+*z}IkPTcX3RpxaCjKwY65L%svZF1gqf2wtOG7#WJCqY0dU)^WzoLnEiA(Ss4Xk_ z(rRuuu`j3x&+m*NgkdT7w`6aRnHmi^wdE~mAGJ%`1FnP38T~Ab=}_y>4B}(yk1p$xPwLObB|H6y-4XO z%UFQ%(j+N48BpF0H_sCXLVo`I8JY1Bk4Lq+fPCXv2hQn7WNCAp%JZ20fE#@-JMXvT zJbx}zyyxGIm7_jt1at;_ky17tw_wa#{lyEE7+ER1Cm|P*`zd1+zuS8xscd0;#lXW` zJNPrptEXH%EXp2|EFdDS^~L4Y))IkO{&(X?Fvt@a9sd+uk@tN+j(P7v32c9%;|pBTfZMvjf)=9++2md9H&HVU}R zTtfb+W*fS)0v950-#2;0}rE zV*(z@<~p>St~ZD0amb8$cEQr9ZU=f5P07Is#4SPaUi72HyQnA)W81B|RE|l566*By6d~%zQV2asc9^a}+8oISXtZIlZ@$f69HFgb zW#Qy&My{@|;-A-)wUueHayu-F0-B`Wl`;g1JltTB8M}Fhb#rPw`yR~-C`X zW6TH92xHG(4CzvBIwmGGAU%|c0$2=81%RQLt}yAUk1LpXvuC74PXII@$?_?YQi2zS zS{{2fQxFj3DW107RFUgQM%+8?iJLq>6Fw|&n{|-CWemA)lY97^9`3;&n5xW3&%`9M zkPL_eLMGLE7tvB}Hf@<#1Liyh7~LOjdC49^a&mK#K$JleGQ^~%TS;jPx%41)Jo|PV z2G$;ck7<>>5k9$ya!>Zap-d6t0<~;eJ-v@zwV-NB(V+)~>A#qo2JPa|0rMbOZh!%& z9PlTALho=j>S?2kD(oh(&Jd;iIriNcTEL9$TuwcH=b!LE871pX9D;a|jkL>+Vs5X` z%@E`jUot>dhK7_t#;ydZGbNTONzsIhWcNGZQ~(a?YJJ-Ymq{(F8^C&s#WtrkS!p@Bu3%>+)X<0s^zOdCYasLd z-E{ri`rKtotMlS)XPMH@R485e;kfoXtjd&fn)g7f?dK+?u+P!2KV#w!9~2qDuWx## z)0vYC1CI&+3qQodh-X6{3E~ErGgvO(oE&(>N0_1G1#v?|%I21q!eUl(a%i9=cyi;k z01zrjRvt2Gw^tMo_YIJMfBn_gFycRwmUD?^$n9_s$o!%qg|C&y! zc34EJWyMwYYf)Rx84+=T8X^}K6~SJ-NR$%Vq=@EDnUnO1A&}oXTz5^sKelq5ki5qK zEl>dSSiXik-*+)_>yCjL2zn!ix5MmW6I}b)Oj(&GB!mEwR9+z)oy+N)%)8=I`BCUr zMBJ&x=WJF&HI0qqfDWg(lYs}hF^=K7_Y!w3m0>qqYDQr{*B}#9WqcUNfY)FU3o1gd zKaL?#hQ%eTG|OA}gXu4?vKf&EeILPQ>u$&?XsI?Tq3@HE!#;i_W7n0)vrw>JHTFgN zTkeNsK*sZMWNJg|qQk9Xo0pC+%?E5gpqEs<9%mFi>L!#|P;hD-0hw3KRTzehx=C>p zcM#ZiOEx-X!veH2{Uj{6b5_0VU}ZfjC7S&Fe6BQQV9OzlA{!ep@0rsF$s$E!t)RAZ z$4ctLgmyZE9%R}GTyP43WD-U;rXatPJ<)NKp7IagXsxEO{UKY?&jRoU-qImR-UH^p zB2;0>2m8qWJ5$L6xTT-WXl-xQDn9hwVMXIn|J6m-U`+yVVH7L7Wg2SZL_IN{!E{gv zK5Qw4Yj7jpBMxq35}liy8wiHmx><8OPxcwZabEd(5rv zsz;c})5orevMf~N+<;)#x`qsvvCNTT;#d7>)W?)9gPX9EE)=dHaQ&1EhTI=QAP?X$ zz(F7}tG|57w{bUFIE8_s)m%POrMTSWFoXI4IoU1tG%i5pbQq=<0?orYx&2q0KI|@(7RrQNY5V*>@j9LV~~rQ%2rDw$DSLgo|#E zZ%TDK#P9mGnbG0y3uwuIjv?~x$IbLhx z_9?&dUE>D9Y88OhfcyBWRitrVsk(Xhr?_18#3iIY+W2Q@iLkZ6`wWH85~`fjbtz*3 z60mvR73e2*W}1}oH#YQ#j)`Mc&AZ_T$n)&U)pKp3^&E3!!-bi_@sX*Hh!Rg$TRWKv z44tli1{=WBWrsGPLeBCJHID%fWCshMh#1$PX`_t>BMpk&_)z-4s}Td!zQ;;RN(m_` z0%g7fzdMA`?(Rs34w{h+L>=bvNMENl{n>vBU%|h_ijP-d%pvT*yS=Ui3>qZhf65Qs zf4sr=a@-gq1vTb70(tb6ANN5B=x|TMPk6%d8gKF*4>0ro$aLle>FUS|N=jV-l!`u% zqAb<6IoErVkOMpgUM+sz{KSa^&|VUAo-`fPCxPMqrwl`nE!t-Q0bt1ls|TV3mUXBE ztl0vF@wuX6^o5fk2yZx%5{!v4A0kl)6HL-=|KdgsKZ742jw&Y(F+FQNFQ+d#A zziZt;ke5ftoQA&t@Bvz*XQyMyMUtUy%*V^y4(F067)OGfc!`11EdKrbpl<1Fj3Wf? z`JzB|CW++{rtN$^{@B=9a+WA9{gWriz~6U=VMVJ7v07N_8>f2Ob3Mxfl(Q}K^N>gV1^-Gs= zptz)@`+E}G>kmG7s|$>&#@G2~BSkW#c-rfMhT?`bM_ecFYu4mXz0HkR$IaS_U>ni+&?mgRvcke z$SB>AlhAgvDAIHv)O$jjVGr)^H1t440j{2kcUn~EU zgLbdyzUp(*BE^E&$o(n@el9w_bYQ`QR`8{>n+f}jK1jgh;r<{IVVtTq6_i2=z4GFC zpuvrI9Mx{Gx4hJjPP-pNCh&_jd+6p<0zs)zwK_b5azU|kH=gr(F{LB0a$uFdbx&jRlE zNiLckkTyDAPZbQ(ieB;R!m9j(=|Ysz!PR^>f7{x9F7Oa^`OOs!Jv|#5_3q3V9)cH_ z1BHpk))~~&$A-n(g_@WkycEfQ@lx@CM1xLCB?UaM*av%+>}MxUW@k}i3mKRzxg2nT zp(A5Qa5We;W3)4lZgFu*FftMiKb7qQ9lXxsXS)Q9kyeq$99DnFrnHL42-sNF(Tv8G zRJN?81PcA;G8n)fe0~Ck&3tjNSXo|nbp=y$oj9$&C8nV5W&%}2iqnq*u9P;@r6yK4 z;gHifNK}fZ>tkBlAOdt!fkn3;JP!w$nEn~@OmTa1IZC=PfG>q|_$3<+TdW74`A!~zm_)xyooc$vUbU#RVX``5;I@pxc;1be)_+KX|6 zr?em_D5gUCA0M2xGm}J^)x_A`lf%Uipl` z`|$AKg{EPihZrgis_3L(u$5Xmw|6864GqECB8GC#JU#U44l5Rqpn^Ftglkx+QLV`2 z?uSx)4O{KQD(i}x^!DU=nV8=p@%S3Z7#cULo~gLhu+7Uh1Ub~+T8+HhdI~hw`yjdy zXei+HgfnqKc><`I(VbHSCnKLRQi{+xKp6!Q^uH4Y_y%A>Us8MZ<-OA7z9opbA(o^4 zTm_sv#5FF!Pz_8>{1L_$w95b94_GnKAaI^EbpwNFpok0ZTJe$rGil}9bw!8-0OfA` z^O3~HK4S$(s62~HxOXcUhlJrEb5@G(h6yd(@CW73Z}e@U({{)oxe#;jeKfjq1_h=eE-R?lC)d^ufZ5mg7n@@Q_uZT^CJvp^@IR%a)*I>qj+ z!$5!iR1ntx86S|WtVx}4uIVJw7+LxJpBhUA+QpUT0|e$47D5Yt$G;VTr{H^;a;`dj ztv3dE7-~z(h`YVS?@7Xp_rfpzpGI1+3lsfpMpnKZZh;5+mRd+lLxEe}m-5Tfm}_{l zdc!`_v+&9~YRZM%tsN9gk4qmAmw}ZxN4t3MJ2q<9*9lEf{cC7w$SWz4J7xd77+9DD zs#R<2^MW~K%C=9zmwP&a)mFBl%8=_bF*w`bQHG@4;?f^0z91{qyX}3%rF)3&vXIL# zUFO|BWU~7#ui6R{wusc^FXD{`-y)T;D)&4zo^vxRC!G`zba+3ges(+8LS_X>Sin#= z{b2?C2H=t2VRX;R@|^HNxc_agi=N(duUZByMw>U-fC->-!ow^)X_z;fU|wSZ|Bj+ z=3Eyol_S@Y0Z8$(m=PZdP+ysoe0tJFlTHv;{)42Yafxnoqr+(XK$3%lLsBEO=l+~v zC>$5b|9)PZgb3Ab3bW!dZjeIneH5W>+*uY382&O0-IoGL_ry@Pwx}fZ-P@$zR`@NA zJqxJaM{+s$79d5(j+YIiW!0>pN49~bnyO$1rP6%*GF_|m1R8=7`vONsS{mMqeNY<& zJt0J|?3*hKM4*s7vH;vmi#rgJaqV)GkQC&1$d|Xaw&lFf+m}_Ak{>l*@_lnj9i0@T z**81>IUd*!dG+~p<7tE%RO-HF{$?+mj6MN5geKiPmOvhF8F*xtbOQVxDYo`1?NT|z zEB4XaOO>pDO7Xl-yF$$oIFWSNm5QpQE;YREA_!TJ?9kss)kr`#{Srp!rAUJK%koX? zx{cyRJQEVqSnvQ`Px_Dn*&W0nxDOn1lZV@`){NnTy)DV-`q9D{2Z$OhFK&owXsJdQ z9Ksf-(OPnG8#|;;Zgs%NSdFw#qd5hxTRLQ;gFDYKMRXkG6@&e*H&9zrk8=Va(56o-~bf05Wz7KxNKCV zfBy^V@{5z4XD7L(b1hI>F8I|qM`m;U_`xSMDYEb|UgU8G{9rZoWGFs9zneqXg$GNS z;SQ;@_$rwq4N{2vw~m{+lz@?YQ??IF5R-92_`uSkF`WMM=}G#SY1KFJt#B=lS8Z*R z697S#3 zIlLvIOxrDq`L+5Yq*ATq?N9RS(ftiVD5P73Tp3S_Aoz4?haNuyJr9aM#2k1*djAIt z_=;+%$AMI}ay{2h+uM`u)+B9`DshlMU1;JK*>U^`Ur2Z1 zQEb1H*3*P#8H{cKIR0H5|kI@qpKfWr)egC_`0SA zxGu^`b;3l2bl=Oi&7mxlu&d#{eI1gDXNk=iMH+1JqLW23gh!F)IcOv(zo)wxTY~IK z$@Z$INa@~7EhqDmjdMpnOa*sDb-0oul*pbraxEwQ(s2;uYnke0ckO9(a+;qidsB#@6op8SIeV@SNt= z%pI5OPEN$d?jyy`-}KSJ;K2$V$G>S(WN0*~&@jBf^3wR}3C1|l5W+CJ(JpG(l3CclgQ`1CkXPBjgZ8G#T3r~Q#=eMwyIZLfU;M}2pdOW2&(Z6Ec(@!!cbCVY zMd~09#Q%B-;Xgzu!WK5|=3_wdl|R*=BUV1M!`4A-bKrYblqf7Fm}W$RVp5lUMS6HZ7H4G!hY@D&85(@NrA64uk;gd}Jh1`y z>Ds$WHHxEo)7`GeFR05tvcGWfE}GI9`oPt#iq#kW^>ut`I){R3k;UTY`q#lrYFOdy z=mnFE-BaN95a0p3>@N>+cCGEa*rBmyMxW^jn5@L4Tu4ruoozyM?D-YW*6a5z^$HZ- z;$@(fyRl85zK? znN#^Jv;MS4lVh#Mk z?3#FJC58giw=N+1JfmN!kL_i_BRFIZtdBrw=b})*M&Ql^UbiGOQf%gg%cu4Dy5za~ zP3(qRbKdM??`NB?$9l?h^796(isqKbyQH%yoP(WQgG@#7b?>;i9(`@0jYe{}H|A@KIZ4%c#+gq_a)k zf^PWaz8?t_S1m0J9+7UjlSL-}8e4E6YAZ{- z8k?UDF2mpz?pA%+Rk9^NPa^g}Kvr(GcT2wC0zV>~3};)=i>aqirK`rqFmGQ9opu=N zf;{m!~3LN9W0$Cu(qld=?Ys-A*za%0VvM(Yc3X3Gp(TQoOX?;(&&* zuF-3n0*Vu;ut6OFssZ~05wgDnWQR+w^sW2&PjXU*+b`GS+PR#m%6t~zu>}V&xeyxs z8GjQ-Pw{uP;E9kQ>qd`SkMb+%D#J=Dk?KoD;KU#T)5lnSK){wbAgfHXw&2by{ztxd zJ=zN2WM58@T4L~*Aa(`UGEcxDhgqvXa|17Oozc^tDJ$h*dYD9gJ05;77lT3j2QJGr zU%n^URV*h^NpuWV0CuBg^Q1syhO6HG_g1slanWQ`eoo_^PEuOXjb7D210tkXpYz#v zMia&nSELR7i)JCgMs4B&vQVLxaIUZ1Gl9j{F<-^{08ghETmts2Wr-nCc6ehipdsNZ zv7DvR+|tq^i@%N$5cpBKi0J%v-8rBl{?q)~@jB@P+jW+xCqaQ~W);Fl>aiS%c6j59 zC5q>Hn8r=VYlZn`2ZtlqV_|P^57@4cW{*(LMz#>>n3eh8=vYCrcmk+>aW+f2zzz(o zWaj5Jz+F!v;x;s>^C;EG|JKz%e3=+A2(9w#7=gvZKj0=9PzWX#md4AM4*;DFDjD0Q zR>OI14UNFxzs-OD{vF+L;88zr#6t>v0{?mS+}wE$sP=$wL0(xI3i(`6FlWcc&Q5XO z3c4ahpTdhP5ZxX8|LV;pIIGUS-L<;Id7=(1b$v^;7Y_{Gt#iTSEH>Ns3c34tGJ@_9 z+NdJLMgLo)uDRw_}qqqvJOX zow?!-Z#h840!(6%0N?|ld-`-FpXCT8R0Jy;5L?S&2taChDep50Q3vLc;0_wjaJ-UW zN5k8;(lP6925N-8RkFVWA)Vo7#4vYwfe?M``Pf~#)scjD1H z{aPk5wQp9W&mOvz87RGYLHK=euxARiY=A>1>AsN1!Nva`m=ft27<%S%J(G&t>R6K$ zGn7<0Q`Xi~jL*EH7nlFIPEAbA*pc5g@yyi!y*Fuwu?9w;hj0C$t}h4k2HfM*P449v zjfn24pfb$Ci3hzvlmkYxX3FHLhAR;q-7>?xF^POlp^`#Ec7xmdkD%GtZmw1`Uo#i5 zYQ@^6$1rJXYNB!674 z!DR{Pkyd6T1e825sesB;9he{ybv<#e*N5V^H6TFN`&{w>8WKXYl`Y!0TKM2R10#(I z&%eZx1bx1R=wH2AG;C~x=_{U~8Tq}`jQl@q4H!(xQf(>l3uxeFWvaXKc*@}8*53-z z6GUD-1O5asarKTXK~G_)%sORr*1VrUXA5ZO?TC8X8Ug(0qUZC@iH)0?Z1>a#k9jb< zlc+xl_%F9^4A)rml1T#_jm0sGB!)I&?KiNR6GYT$3u1#MtUKj{0QUt_g=i#n&Dd)4K`(enbq$S`3X-&% ztIqG{Jh@d>1Q1}0ApNjsSmrYc91EDCFdsX>gQzp_7I6iK7Eg(Zz5=Xofdca=FcAYM z4^qXAstqrh)7;_G(b%21vb4O)muW!1fWD$Kz0}N1j=T4diHYY*9R+7wJ1m>0O2@Yk zy9w2%TXa~HRws0fPvxe&AC>_D=G5!guU`_m>Jr5hrV~Xk6Ytz9Do#7AYP}hNI&86u z4W;cYJ+Xm6(D@MG3y=i7b|D_BV2k3z(2v;vb@KmDUHKSkh05r(<~%%PG1HB%#S6=$ z*>@)e&(pHP8-Z<59?!%CE4$q7% zS@Q}H4J2n}#U`=o2=9ty$yAppU0o6IeM$^a*VFss!hzXKLSp6@!OVs5B@uHPw8#0K z&<&*vI1Jr}moEYHn$y+EYYJf(bVxU8c%%a`VuNEtDNIrEfr_Ve1 zk@u#u>IooAb)vaJzvb4cSJORQJL7M>DoudJUhJGho3Ft&=c}sj+;bJ+;+ZTrj=Q?@ zQl#-SHKYP((*!J+=`o3eTHz7<#~R@I9D(j21CqI1a1&^){lmjyiuo{E+1{@{JLcB;pbs{t-Q=8;NgkpMlgT@>!@eJ)HlFk zFu$+hH2y_pbwnG8L*Kq>8SHU^j&9IBE7?%{P5|_Of2yt~R1?4D1y17~O@TQH!{4#9 z5)n(>y}(+C0{TWqxwguHk|$L>foRMIb{Np|JpJu8p`M-|qK6C!zToMic35=50m;kH z2gc$DfWriqOF0dVgnGkLZG2|E`OPGb1b?N)#=?SeeSRS4 z>njGbDWdTxD~pqg2`ns#GKh@;r0s531`z#?e2#kX?}N!>WHSVvklA#~y2BolcbkDs z7&$>PX<>yFeJxvQH^*PF4u)WgOg7R`dd<(zZ`u*`z}nhc!qoH$FaeB>js}2;TDc|# z#LiUZu;+GwK7AMEe0eQPj2@<~Kd}C`;AJfx#xI%uEsPLoG(a{0eSpoZ;vv%x4Eaq{ zn4z7yLo;jlhx|c^8}&uiys^A^J~<_osNv;x_4OWnDvo%vE2)Y5l=)M3_HNK}$mVCa z)Jh2=27tRn*%ZCb`@pFr34}c0F$u6^nFb$HS(2M8X**qqW6s9L7OYnq2D+3DIQxL? ze(RTr1?`#>1sYXCLc+?Zh|RZ`s4X=`%hID@x|u=G12`Or8}BT&^vuv>4^Lw104Kw? z%c^kzohuvF74fRBO{_lQ@58KTlb`vUTfLhBw4nGix%neK9Hh(@;O+$5Ywfu^@E-zU z5;$y74eA|{aK&%9ghfPp$_fCAL7dH{HunP~uwg*3SQ*Wg!r5a5^N=WOYWhj))Dm0^ z?wWBo6tFwy&)zhH#$XrY!d-1U<1cTJqet-GU>Ipf>$A3w4# zI%fNxy(;j3+4M8!=anC@%DQ*e8AUNQnd^;Ms8FnlxdW@=*}>A&=T3-r1j}ktz*vLz z1bS*!Qvz77$`?Vd4F*Yy3~szMiMTIK5&{x|O!(~^FCxMTXAb9*J6Ks2EbIV8(&S2I zwGbSx!}4ol0&IH_lV9HKr~$OfnG$83^3=HZsoxP+N)}m3%qQRnQ5`vm$x@mG&B4z4 z#Wx6yQ^dL>zX{qsL89X(1zXt!gaJ1NRsjK>jO<8d)e|V}W_X5B7JQcn*sdpDErzb@ z!1DvpVh%!{9?aD)ru^bVav&tQiL(y6+dwmj=+E(Sr^{mt#Eu4_XZ3{w=w|`WU@%3| zz>}FF?oS1RgS@J$TVa_oYtj>t^1$PR46$0Nnjwe;6x<^>5K?e5RX2Y&5_t#&jlasc*zAUA5hCX1WG0~$Ahvaeqy?Cdx`Yq9`;V9%m* z8KPGQmMa1Ls)#SyP-h1AAsd~0vBiW7v%NfZ=I#(%UtyA+`G9qn&kYSToEU;^iSqOa zllSx;lJ2Zg*xB8aL|m0kRm6AX{6hWz`>LG(ukY7ETy!rULht=@NgfBZ>Ovq2GOE&L IFnG}a0B-H1S^xk5 literal 0 HcmV?d00001 diff --git a/doc/source/_static/index_api.svg b/doc/source/_static/index_api.svg new file mode 100644 index 00000000..69f7ba1d --- /dev/null +++ b/doc/source/_static/index_api.svg @@ -0,0 +1,97 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/index_contribute.svg b/doc/source/_static/index_contribute.svg new file mode 100644 index 00000000..de3d9023 --- /dev/null +++ b/doc/source/_static/index_contribute.svg @@ -0,0 +1,76 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + diff --git a/doc/source/_static/index_getting_started.svg b/doc/source/_static/index_getting_started.svg new file mode 100644 index 00000000..2d36622c --- /dev/null +++ b/doc/source/_static/index_getting_started.svg @@ -0,0 +1,66 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/source/_static/index_user_guide.svg b/doc/source/_static/index_user_guide.svg new file mode 100644 index 00000000..bd170535 --- /dev/null +++ b/doc/source/_static/index_user_guide.svg @@ -0,0 +1,67 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/source/_static/legacy_0.10.h5 b/doc/source/_static/legacy_0.10.h5 new file mode 100644 index 0000000000000000000000000000000000000000..b1439ef16361abbc0756fbf7d344fd65d8a1a473 GIT binary patch literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef + + + + + + + + + + + + + diff --git a/doc/source/_static/logo_sas.svg b/doc/source/_static/logo_sas.svg new file mode 100644 index 00000000..d14fa105 --- /dev/null +++ b/doc/source/_static/logo_sas.svg @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/logo_sas_white.svg b/doc/source/_static/logo_sas_white.svg new file mode 100644 index 00000000..8f023fa8 --- /dev/null +++ b/doc/source/_static/logo_sas_white.svg @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/logo_sql.svg b/doc/source/_static/logo_sql.svg new file mode 100644 index 00000000..38b3b2c7 --- /dev/null +++ b/doc/source/_static/logo_sql.svg @@ -0,0 +1,73 @@ + + + + + + + + image/svg+xml + + + + + + + + + SQL + diff --git a/doc/source/_static/logo_stata.svg b/doc/source/_static/logo_stata.svg new file mode 100644 index 00000000..a6e3f1d2 --- /dev/null +++ b/doc/source/_static/logo_stata.svg @@ -0,0 +1,17 @@ + + + + + stata-logo-blue + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/new-excel-index.png b/doc/source/_static/new-excel-index.png new file mode 100644 index 0000000000000000000000000000000000000000..479237c3712d25a790f1e1f8e5635c62465bae1f GIT binary patch literal 11506 zcmc(lbySqy-|mS)7*Le%kQf9+x=TR1TN>$3=@=M`76b%_l%Am!L6DRdB&9p08-||Y z4Ej9J`}@7?ecyA|Ie(l#h*|4iv+w)f`}^6~_qukZmWJXTd}@3Q42(O6k!FfdEe z|99YFp?|*tY0sknV0!B)%3_rF({7+Y0PJPdWiT*4y(PGMhJ*eL^iVSL#=szKyZOQF zaxb>QzpT9PkhGu$#(xo>HcS5*{kDrO(tIV{T!?5loGTS(Wk9m1H`)eg6q~BM+@q`p0 zvyV@r&VvUrggQWE?lt^;LUJ3jkPL!a-UCz!3;|v+`WJlFB)PhrzkZZwiSsrT^#ROP zQH#11XyInm?698Uk^ zqq|7wfDXj`9N_xkE%^KT#s)X|Jo;a+c6WgcVoIMaV5dn)hMY{=^98ES1sr8xpZEFM z9EPchgC$biX}G0{w`S9V!BTuR0+{p{u#g6YT?85)B{7kLMU+%+>Uy7R>YJCJGJ%E&kA zmb+nLsaaW4=PM*sl8h7B7NPCe7HV%htBwp){8usi>ZQjU@YmMet2C#3$xiJ-YzPKt zNF6qoRY>5)+45W4WB+`LQ)a&w$2E&b2P4aAuef0I>-%GQ+T99cy%1B(O9z2X3yzs2sFgO2zcA2#1`?u2w0vZBd^&t;PV&nKMx|OKDZMW_E1`K<3KLF z!sQ29MOr=o&J}_+%l|{sM)SI2i|EA`c6Lpg>h8c3jm>#?NFyO$MAz`}*0R8C8W%x%4xv6=Td-uV@E$)A{b;Rjt%zy-8#Y-}{1&Ls}X z4N2wV?x5dwws8c{fzUvs^oaCJ(ezaG454PgiH$2L#zb=yHiWxi%MN<9`QhO`@O99? zE)Ku+1vm5;A~rT^nvrGyx;EZ~@>c$E4?Uv!*URUNfT&Hw=6Bj3A&S!1gU#;^O|L_s z3+Q#F`mdKYpZrR(K<&7LFPEaZrLM1zno)>~Z1f7tzP?)1ltvyWYD%3CXQPe}x){J` zi*LYJ%M9pc_~LXPe0>*u))utacv?S?Bt#JtN<&Z=Z{wph?*Uq@y|S|b_Y=@J z+Hf)zlC{hs`==5ng9Tcu+oL_W(8YqTA%CJrpyLWwbWL%$cDzJYqibwQzBcvdhT*^_ z1lE_!spuLT=s9xS9QSg|B4}IqdZwQvFP?_Cj=_d}8}Lh#ZS$-5uxpt_dhAsY84+RKcgQ7~N`NMuJ| zN%8Vx%fIQUZb1NfEVdHFdlc6z4nEJ$d49~3`RRq|#pt3i%~x(V#2!G7^NK@IMymUH z{5jas9DKlJ=HWfsr|Z!2YrOe{T$(ONA}2%ijqY5JXb_Bza{#(-*d(l_rf~ZDwYkZZKP}IKPO3%oG1YFTGE%(7 zVix3}F+tC5L1%S@N+m^*Fh>pP@l+H|v8Cj{I;Wrvm*uPD-qhMAnol#j2*8d~ev8fb zW4(Mo&BEi&>bhag8^_zHOZlkGg$fsvs*3JSGH5xfKOo&cmyJE^ejZHAhQ;3ao}GMJj(Sr|vPc-QB*|yPX0^uY#V;z^#tujAl{y zCW%CI+M9_3zXJ2Vu#xc2!e0}`ALrXN0(C(5{#c?K#WEG6S!X7jGtn5l|#zSv_a}i=STieYU*>)#wg~ly-lv<|%L}8ti9nQU|laCzA|}P4{5)wtl*C zZRI0kBwKyPL0N_y!Zd^0J?4b(V^?$b{83SJ(QBE)x?62-wd1$8KzPQs(UsyE2l3kt zDb}6%^NKz9SCdGCy6Ki(nYd%HqGM=zWC`W&5KKi_cgG4kTRhb$tn58FBO>phGpM#s z$SQ^3`xp?ZpV}XPFnpmsS2$>pxn_o_KRYBXf5ArfK15rf$UW=97TrMfaeYzGvKVI9 zglYl7;#u!G%`FtQ;Jz{+4VbQQHrJOv!ea(oqcc{3{Y=V1-2$M)Q1US`BgM6p=4ZF= z(wZXKH^w~jVoIW6I;k5^!TfXwM zA8(2>U(9OkzNRNKBJO?55hkN>XWjh7+WxlPz{by)v>W295mg#0mOoby`1nPeO(G_0 z+iNyX^VSFKbRE?F#YKwdJ5q|)MO;+nl_#UhiwBg=1F#G?3|EfwI!+3Sg!V(%EIy3` z6j6K)LjY}fhVWf^;ax(uWd$hdmtcS6?7EL>85E6cqc69k8u;*U^Eo~CtG=%^N`HHe zZC}jFO@LJqo=Onqoml`+@Kx% z(pnYXG>!1j{ExrdoHN_Da1T|Cb!zXMU`24|ePcpTb&8foHXGq26`eOx8f7}dp%4#L zqDGbHbVNK~I3LSLP1lmVBXwcsD_p&`(57FMBsSAqITNCs9!J~9g^6TBM&yT=l` z`=OZYRI*}-n-Brxcx7uCW6U=a>J42Gckcl+KiIy~dMYS0Rq~28&tHZRk5bs6O3L+_ z@Pumrhq*|SJSJD@S8W`gp6>;;s&fo!BHBjP(}pLe+OYimnUi_Q(f-NtegL#Qfu}lf5 zBtdN42VUsiJr{Uu2ta|}mxSEUCyQ)bCz(cXt~WaVHSKpU-y87xIb0o=O9w7cz$;H zZx?vAnK8#0(|D>#i{+DXjlxEG?aBg*U3w5Pq3QD&V>YF4>!J>El5!yFB#J6N&vZ!i zGA{i?NHy%gVIvlPW2EI&W~<*XrdDd2Y<5%wFP(Sqsu+vXMgR}g&Cb)RuQP`CehE{W zz^f?=03wH18E!sSfh4r+;pke{e9_N*_udTQZ*;nx?^lUFeSw%P_kUv3EiMhmxJ4Bk zF4}WGk=EiFO!tGNv_+6yt3HvouLGe+!VBwpNqFbAYfg3TVrB-gYCWUq*jvnkLu;HV zYN#_mEx`r!B-2~2IeQ+mQHpAOz#2wAeL1{pGybw$xC~ebCZ#?17BBt{j?Pf3|c&)0Gc~go1T{wp+^S4|~&gA0F`GQEE)e^RL^Eacg84qcD zdtEjJcAQ>~fpzI&tJpy0vNE#qitL?8nNOZEro+%PU!46-qpkpTp76f?FBLN^1xB3i z%Akhl`9KvufMy5cgEAEQ!gj>dIJ?Y6F=_Zo?F}|JzsUd;ma1r9XHsnwZ+?F@{LWb~ zr@N;EZ|iweBugcfDH^ao7^`Q;5A1sCx|xBx15?tE?p8=2FY{8l{&1B%>ssQGy6eJ2Ku2$iR|BWrzn9d6q!{>%Ia$&!rCBqZBvJ4HdYAMk9 z(fhMq(5dte=Q#bTGsMwitUhX|h_CCNN32j66*651C@etZ$M8!T8FcPiC@`&`+@{u` z7Sk7qHzrgBd~ZQ`H)PFp7;tcVGf#2WD$jPRt@e{u+nbbz9NAx=&4$bna(Z(cXkv6b zkgCa@AIJAZB+Eo|6c$o;$PM6qNm67j_@I&^I`>3xOm-;$bo|T!#>df+6LW7|IY6bE zbF}OiwNHZvD;nCXr(2w#DSi=r`|#k2CUv0bkBaxn%=|^bRWK1C?%EH+GTj=bT_69& zzy0aZMb)e6g{%SJ4()^tNy+Tr#B#{+B0LvLP_8Tqd?^AO=H#Qz;|sVXes{VXttGdS zq^HlZ2N--NiR0kchRm2NjL^LZfw`5%g<1LLnn_#FxiYzi3b}6VDFU0ylYai#NRa^v zwpjlG*9`vuH*g8d1d|+aF~H$->uZgf`=Gy>g_y*D?(72~Esw&=i!s%633a!_sbBBq z`2M6i%RH1Wv-F}d8&W&(6`7g)dakarNS*W(FtTNxh^?V;W(?^|d;& z{8mx&iucVMOB?N=Zx(xVEUD9g17;gH2gpat=opO~&^lzy+gBjT0JkL(u`b@Px zc8K%f(O;VEB>y%3mnrU+uqa2_j;}du8*5!TDmFM5pts8@6I^gAlo$bBxfy0^6Kd&I zlt*hM`-RDyUAZ9x`nNO*>PXYjWpN$2jhf=)%4a_xN(Ygb+)v|8_$*NalqCpW{`JF~ zq2g_;V~Rw))uT=@zW!BQYM$U zwiN&8x)B^W&k*Eg_jbNHBI-!`XDqJE<;HBu6I(r*d2^)4oq1s|L%Bdjp96wm6Atg3 zRt068!Wr%a=Wp7PymUQq7W#LjslC`521+nh=KmR&da!Z(d;e`D>%)pIssW8qT9%Ab z{hqv(THO&G)ou>aF$?==`ANWwB_&qBw#fRed7r^b&j{;fAvb`App3EO+XBPrZ}PlL zJRK9jJhfDgXFB~(kipBd7XVE`88IMvuiWQM`w0Mnp+fkL0xb6hz{GHMMeU61KW(on zzEOF!qm$k*n9yH%HWFqE9FfP6@jVlJ^wyENcLGvU`D`!;SM%RcB1y8rqla(FB?<@*ywcWe zyTB_yi132q{dF7WzBcx^z(Cb;gJscp25*PPvf;0wC33C>zHwn$q5LJCLtQuIz8Nw< zyyfewvd!Ne)!3 z+7T#{pO~#p0KMX3Fh3f2w0Q9xYu5NbsWEa>_)jYbTiMbzHs?>0I_r7Cc<2<+qBK?LFtfZ z99Ct@s;mT_YM^|EP%x5)FwV*IA+R&H{Qy+FJWBjN=Yt8stGt^D9quji7VqVe{>?0k(x8_z0VK@o>*3})CHR*V+H9%_mvb} zNup&%d;*;5MU7S?ZN@k7>Lhs4}$|Fd}dF$toa=RK4N3qt!dciJ@}1DFmF zw3E2N)U(ce(}%*Op$pzxFVz_t4y32X+m|`rOHc|ey4@c(b+SUhDI~t(%J!My%T3BFK8PVj(-s-qNVZ5}c z70fFvwx~bSdXfWc*l7O13f*xiNJ)S`AB;Z}!_w==_Br^6W$N?1xeL949cVSFHFT7R zZy7(t%j3D1fj>#0y4LETPC~!s5-5m$5L(@qQd`n4x1~awp31!{JNk=UI-)4@gYQArAg2m%5*>ms-HIGbq!ISddat zfBM3Q2VKYon;E6V*7tWcw-ldkoIhwuLpupnIJcQ%2&Y1Wq9W3?I^INixrvw>FTi4z4=`}_p#elebVbdEh z-Q1;NOCj)3Xq#U93%b+%!t_Kxys zSNb#&$|ma?ES$0WCU0-N34p>_h4mGHqa{KJB%)p?1k;>jeE6=S5F+SYJ$U|u#IsIB zZw@RjN20s9bIi|}BxycsJ7kDf8@+j95D+MA$cl!c+pMi&a<>KltQmMbB2UP_owuRp2x4amkG1layRIcUA0)cME3ZEI+_-K}#+)^|J|Iw0~EzU>*D zWQqLpWbt3&rmIC4QRzm5^nhp@kIy!?PwR|+)_qh4Ke$%k8gl&gVr2Un*XjsAoo13$+s-fR#p_50^!5&dnX{aX7)FaBwHc|@G)U07 zb+~A=!4sVB6xITzy;1t+&#tacC1zWbJvB($>CgDU8>?>OH?UKjvPHuV_&4kv*CK(E z42gy!OqDB{ztv(~_}4E3z?MU0+k^LM&Ph~>t(c~6{F%W-@T-8zk{$r71oITMuG&-X zx}5mSSBuR1Hbx2-oL`cSQ{?X6hVQJUTgsZ2){vx+?ge~F$8uWE04ZHlrv}cWEMn@_ z<25o7hg$SB+0%wFU}bo5^epJG=K;_6b;?xd%Zw_`Uw7b^tJJoTVjtSdU5@pYXf49n zcy8E)?kl1bJWS_a$pFX#*C^IwJ=;~*C7V8boLD?mQb5l7&8Fx!Vm5NPV zs?>teW|qs@{yPf2;^3aW%m&Mv_uP+P{c+T~zBO=AB!A%GE8nv8G~$*UUw>^6uM=*- zs~)-iUQ?=^qdm4XmPhk*mnC|A_>;F% zkJoIRXd?ik>&1>LPe>gBU|#si*jzU!!Jv%FRr^B=pjVYI{>GlTR8MJ^kDVS%kuue9 zY47H`Nc`C*Rrp%Q`OwW_z*|Q^6 z9txdeB&T!i!M*PGJw@Eo$OLmc4i%KGm#sxCZ@BdcG95t|FzcErfsZgBy+Rvhi7Ea! z83Q^t`%hN?e_*j+sO2m!VGVQU1tXptV<>Sj;bRhXfPh7zt?*VEoe3x-htOO^$7Se9 z;lJ?d{!w-}iQFF(&KqhOI`6TxujmN#BYj_U4imnfwJKddi}*_O`gyQdCupK|^FDW~RxQ#XrK1F_oq0+p!P>2K^4Ot`iHyv6V446}-I4I+t9x zrDZ5boi3Ov;5c5MKq9YMdF`|lW@(d|8Q_0JKqEHHsax515>8vxeWDFR*qT(FH#dc;9rxm zc+S&p#{AJUvdozdZ(A&nq~wrOBBULJ!aRCo6Fnr+Op{*2~M zJKHL~+nWB4I>AK_!ohANGf2yj!fC*U_KoXCbl}@0tA6hY_ClzoCh~V^K#lEP%q5Gu zsQPGAsV)C-of>lt*G9j#heW?ekhqOoR&g^6JL*RrElidRcyC_F%vxd`K(Ia%HTtY9 zH0D5aG+AXN%f8g?Qcee;~6yu95WOsAde^q#PCjS%{Yno64 zBmAV2BFF2Y39F{LXYBVzXb#6vSw}q4*ny_+XVx#Ro_(w0-l()WYM>G|f&BeE{X3+B zi~Q*e4tJlYGr~uv^FJ$j8^4T={iCm|g%qrV{dKh0#5sOr&*7dL(|N637M{Zo`5QN` zU4fW$#3GM?J#>qBPi5AN$N9B!Q`)qA+E<0FnKB@#BGwo(F_TvwN?_f}o73;Jd-dud z4^;ZepxR{QwWbTCP4c5Fq&D(GO{)ei?@s=acY}xn@)_eM3^FN6I0oouuYZtDuS4}| z>1j`n+>jAX3+O&b>VqtPwdWCfTpzl4}IJv zl`|d3{3ka3nbJ$W!w07nyYgP5sTUY5(9QDa41Gq?86 z_Pvm5ujQxq|8e93KHKz6M!$fsQzpYCg^gWHt3B6EBCo(VaX6 znaSL4n&W&!T7yVAhj4L!3CtzzomU&{*w+ko%reNFMzrMK_mnBlx{_?{Y1}v4nbHZe z)9O2_qbiLV^%p^3`F1c*gnktq4%Zv3Dc?_o=3H1Mgx}REPG@u=TdXAex5N|`i|ren z5rr8{T9w^~qz3-u`lguysy6DBPYF6$M<{XML^4E}f~K?gJiYQmY^@%NhwC zLYr|pQr+}MSnrFmlg9Xt^IN^5xPlJ7nXoTPa`9b9I!L>>3%v>gvw8~6!j;vvz%gSe zMJLxQpVV>!r?v1Y=D*43(+qm@X{RlqZ1$$9J^0N&ZGsDX_g))0uG(^8i@y6QlGN>j zme+UIYcM6FQvBekLf?FN>nvM!0eFu^KlB@Cf@G&| zKo-p_kf6+E;=-IogfNuR?Tt_pE}k3S-_hli1c6*vmmtG`icRl4{chu%s~LQNR|krT zLXSywN2G42Ts0mzkcb|XywQ;4p;33E*l2&@mjWx5Pl2LocaWL6_}3T&lT|5jC4}VV z&9IEfyoEGwV_8@wKYf(j48`A3#|XVg>|WjqhZ{-2?~XET*GVoO4(55<1^O~nn`Os> zooRLxJftTK#eIuZHQmuHF6}Gt)A+12fp;9C;?9~}96bjEa_Ebw?JBp-6_$=Pl$6f? z=)Ns1Q}iHSH2^mv+GV4#=YYL(ZIV-<2xx_^)~AWonrL%}JRc0OZ@^63|Cs=K(0xzh z-uN4kBb4wHK56M&%Xj3R@<0D15l)X!E7iq6`#X=l7V}!HTA?O$fUhutTuzB~i97RK zAI^oA#u+E)>F~gbt3PoCrhptI)YOeWvCq7W3q7_@Pf28JQy=7l!*JHaX7wC$*Ubjt zgGnH~5Y6kWn#(_#Lo zfw2>8dR<QrS%0``lC1(U=MjYP0IAszt?!Rl-`}EUdo2t3i2gEbgg0vf3=dWbq zJ0|l3y}RsrPZI(kZ<+8CLY>1ZTUUXGDr^pUVz~@9^qR_s!a!T(_Zl>Db*K% zCpC|ay7Z*r53#59Y_C2MLWs-LHAbj!iUh!dVQxceW)2!8fE)ztNhr6Du*_7;3fK#*e;Jkr!96Jv`AT6AzAb_A#As zgDVYoKr=ZCI(AhM(MXz%#m6Ftp`VBO5^?)=!cujG+xc3wi zo5H#$xCGEd{P2f}#{b7eyz1Z%J=m?^LtU*&Ur!E%p4^<lJqoHOe6+U0CH+bmz2k*-K_oghc?N+*3c z6tgD&gy?k~actrjhrzeT60tCEP7L$z&a8NL_5EwqK(m$eWrcg|S@ShP&`k5yfTlmJ z!x54*nprU*gS-GYTQ-2EyYeJ;kdp%j?@#b9|{Hpkd6J@AWYnRB*C_vz1X?stdJ5le$m+-axzt zFFHs8{0+=1Nm2EkwsZ5K13|Wvi0|+I_yISNq7!%iCM(20Jr%#C%s%U0eos$l3NOZ) zs_4-tHsi&u)7+%BxkC85HhOM;)LoCn(fSllTrARDgs@Tzmuc&C!xI5QBx7fta2d#D9-_l)9~y!c)p0Ym5~U4{tX5?J3hX~3^Q368>C5KLZ4H{P?pzt JQZ8!=`CmRDiiQ9H literal 0 HcmV?d00001 diff --git a/doc/source/_static/old-excel-index.png b/doc/source/_static/old-excel-index.png new file mode 100644 index 0000000000000000000000000000000000000000..5281367a5ad9d6a16da7a6eba5696b9fa949b502 GIT binary patch literal 11570 zcmb_?bzD?mzb+t1N{1kb0y@&2GJtd=oeD@dN;AL+N_R<0*U(6VI)ro#A)p}L9Rm!_ z-T15bJ?GqW?>(RU2Qb6T+Iz3HzUx^}tkCDGa(D1(@X*lE?kLDhYoMW_!-3a}I2ge1 zRK-X-;19ZshMXi?`5^5&@CMWBnaVRXw5n+Q3sWrMJ+8C7z6%-}Vf)PwdY4m?IT{*U zmxA;&Ezg(RAE(Rd>SK2=S&%ee2?s^H(MVUkcM!`GE=heUpu6bU@+M4Nq?ov*=H{5V z7}%}*!L(DKn4le*TnA{7;xL&{VQlWz5dJhfhcO~qY0Aw-H33} zyu;<9T)1bzA9ZndGY0)VQLZl^euju(FHDbm1@7&v(}sActx~i#7rH$7AABhbA~L8B zT$8rz=Q|IzuGi_{|H&KLpJ4d)7397!Z^IGN|0wIFgFbQrsnglYJ{ijI}@bO z1;Yhrnq#EtDrfm%_bnO^=%CkW)UdG)eO(Ig4nkh`m>z-;X)3h;h-SFE^YW`tPwo_> zd!V6XX8WMH@yF~gUX5$&S&fKcW4NOne=gY$;`#3;Y0Xx{BIqyQCXI*hgZM&zQ8+gE z5C_A+uWLJvYG~3H8(@9E`ToH`N`u{ zf75LLO@|@h#&Ps_s4R*$*O*p}*Y>H^ugqAW*)gSnt6H@i#%CW7P?sPUtx793XI(qB z`_;h)fzKm?QRoO&rrBWZUE2fg3P!)V8_GZV*} zn+Mzg_qQaq{y3M445qFQnD zSP177yS_jo#Ll-y)x>>I50;Uu2=UXUcq>GP?*%X>zK6rXKo487=|z3d$iEu+?S4UJ zZN59aUKNT$_Bt<3WW+=PH+%`%?k@Ba`|~-*kvqav@S7#%vcI#w)#Y^aEJnbBKi{7M zdL&K-fPmk{F|c?L9nFQ+^nZM{ad0Qp>0tMC8*>J{%5!jhvE5Z6Fe{hn*Aj9hy?%Smv&Bb?-4ADp_n zSz(4vHz5|hUP$HaITmzH)sVmQZtvQoBR6sGGEsR zb6ma?Y@end2bv{u&h6z7#egHm#CLu%0%fp+8!^>?_fpAq47uY)M$im)o!|c&?T~49 z6il_(O&2i&)j*2EAVHu91i{?0*Q76wK0{_Oa~;*jd}NJ$O@3?>ZG(7=NS;~Vr&AO{ zLPo)M?dpHrem6s~HESx@@x1$3c2j$lhB_b_;6*BDa%&xLL|L3?73alO1;xj;wP z>Su_v1(%4cPmmAD#k;xC&~on?uv5Up`og9y!IRo|lkMzZy%nn0GM&U4J|cBj!H3s4 zoomK9B^e%wG?SL?Dud-p-ImHJ!~lqUtwgB1KNo!uh8@Ir9XqYbCHoYk$IF#c?q)KR zkWPrIhO5Sgt1?=BMDwcAf1131ndir@2s7~s9DzBZDBi$pD2JT8-Swmkhcm>}HLb;M zY1m=mZ1v4ozugQJ_HM@CH3D4puNnc0=g%7X3>o}gBUVK7UyP5YwmZMvIv3dc{drQ! zW(Mb8yf8*|MrB#32cppvl^(vC*;ng^07bt-d=qFI7q^10?o5w1uPOsqE-!=NiTEK# z^rv}UdroSt&geKQ#65|d3pa9?hIU{L7iZD4c{of^l{B70=Uzi05a)h}a}m8zMVb`% z{prV78y^8&S43@Y_vDtGPHMWV4bwD6_9)mpD$zx>KY0wHiNE4Y zy}5nsEYfI4GUyS|2dDhqqj99~dBP6N+pMyS+C#>?3IZdyCGC;b&fbY@1107Kx=ez| z7l6^Jn7J9}Qus`z%khQd67{quN(qebt3+e3)t(T4m;OO+an__c;dfK?-3;(aiO0>- zXRWA@f^YX5jZI{jl7B zpFXNj6;B#fa`@~QHoZuFj*(I?kB>vm(r4SDE}{K*uX`7OUNc_RXY}_bW%K&klBZF$ zh~sxrKE9{7=jg#ZjEZ<1BI=1I#pCXV5b8_+u8{ptzn8lwru3&rtM>o%q^=5u?G~gQ z4jhBg>e_Rsz8QbH*>m?Rz`H~M002e8lMOjrrI`lJVF*XL5MI@;UZOXFfDvto5^Yie zrgU&(cN%#$H|+w900+PuF8?Tja1S*r@jtEr@Qa_(Ky3h}3s8dN#Xr8PI{`*Moqp;! z7B>v;5LZWBb9|ijirV6)felfAD*zlclPWYi>Kh zvH|S0a!8|%49^JH7@N0q3Uhh0^6~v~YRLi_-EZFTx`ubBoA_wmCJ-%_Kz*hMetO-fznQVmDF*RVdQ%NqxPSUA@S)S)Xe`jBY^wPOpSBtw zqyAS1mDb=j7IW>T6m_7}SWV$~o)w>&Ncv&ya<9m0y5@z!9z0Fb4->()5X2*yp03t~ z(*^ns7YkBY|I{d;J5$jS99xdr0O>0ba|DMDYC%W~H>ozCFmvSSE3%H5pHgL9FWa9? zx*TE4xuf!`FyyO8eG|x0P3)m)C|{wBP?8^`3XhVQY1!+I?UqW?BuvZh zIJAZSCzX3ew=aeH^3=%QEk#7ZaMTi1AX=K3@e~E`HEKQ2QR{-a39?eorcY&WN?wPK z?Q}S#^-*92p}3}axjeUG=m%|8ES75kf-e|R*7`q3`wPyK3@q&8U2qU9Uot%$7W!g&$)Dsyj?BMC)SEn{Jd4=zXyI4mBUKQ!Mz z4lnj<~&_4y%D&5PsrZD(&nY2M=Ip3f5sm`_1k4)-*%8331VSyHN$#<+d+Zq8d zXpKlqjY~GEJXx}V`DkJSTt9pPi*>W`Z>P6C9=a~3%j+i_1DnWiz|QG9$4pGM5yG3a3s;*E@pn=qyj zD_(@?h8&Z!lL`k8hfJfYo29p@mW-|w9L5ySndZ#xl$B!pNc7%-3VPQ%Ld=rg@U0E_ zIh2*n^nNYHSTEBSw|}grCptrGqPyA!meSKE&{jIC^IoU^-8gW1zkr$lcKdS7#a(>1 z%;m6^jW;;XugLX0zvCCoPlsne&^y8gz$vl9qDD*-`ywY+AVILo^W5PPHB6l9T`U`A zrw$A6FTS>-IufdNx!?wde41LC&OH7*fhr9zbBtGY-}2M+<#OcMqYFPVTlp6I{9|M9 z4(=D^{9;#OBRk;)t{ny>TVseDO zdtZ5`r(#wh-)e2wwOv1o=KV12ncbvj_H&u!d3RNR_Xc_KvYI>wOLGB9>7yrZ-4)LiF9ULhRciA`21bS$Ly3n8R;BD3Tu$t&5=_y%x0R zZ12^wZ1`AR1;anO6{J(_y2$5iO^e-Og8@w7SRHRe-#+!dC)=X;0WQQ8%v|T|;wv+dd zSf6N~+!WY0ajb`@)@^0c4Xn}UV#0Ym_II*m$jLM#@TB^1>XPM{^0VcWe$4nvk1LNe zp>$6vVWbSAj1j~W@?Noin#KBke7&A2VU0%cl-Y95ew_q%wBS-=u#NR{zj!58ls{w) zYy(fN)c(ashOi9(4Qdmu?7#y1!XR~gV$vmR(bApkcOaZ+C;xbmV}oRiNtLsYE27PO zJ3Mrzq~axRKADCLtcFY?sFplynxNjrAdP`z#7T+Yjvv#!n#AFZ8EK1xA+bl}uH4DZ zHp7_9(%q?-fDN2zAd^P85N#qC`s=-5@y|!&hH4q2r*m?Zo%$PH?1xPI-R5kp!tb=t_-cU)?ui@_ zgkB*tueYqkA&ZfMerKbZS4$xA(?aOE_#XY|w;w}WMqO!FX?|nHEQ?zRNP(=Xh`=b zta?mqer6u?-rVFcd)6L2Fu?WgP#=%)X7C2XFc_nirR}}xUq5q>O_7NSD+ZTc@Q4^J zpC?ee&$^sP=7fzmRd%VEn=TIrC3aSsBXKSZ-c^P^LDr5tZ4J?H^u}N;>XjNC7gQIL z+(U;pLK5d#npax)o_`~cR8JEN_rF2qxo^Ps-HpcR zsrKp#j?QK4)92i(MV?Qry(Uk&i-`5#ejPSD-xds4Y!a803sTZWD*)8b1BM9aS0PuE z*{9F*_#@6EyK}Cd`^~$ggqiyXy|r%7Q}8?J*OB2fV=y~CYg)Z`jR)R`&&>T~IG)HL z`Nd7Ac6V=OU-UP=bsj(4%s}uP5c1jm#P~s_84=z4)?bc)E%P&XJfX^5oUi3_SjN;T z6&{I5*UHpvGqZF;K;@fogAi97o8=~w`$8|Ti={^LDRbNW7>BdP$4b;;$?jzmiiOBJ z4Xj*67690(ug@oDE62-`v=)lqqgXL<&;I7a4-gk}*LnD22lWwd%rb~3IHovu;vpN^FPvf{L>wm<*) z53b_5QaugOihRdROIG_rUk8~h^)ShuStZFWv$LQLh7Kb&$jOaKq%}IM#6)&Vs#P{HS?y;%zBIWWw+;LYPL4Hj7fupRqno)cu#@nv2uPIue z?_FQ^SJF=pD3WqAo$Z1vHb8gK`?KsesjIfK=oLT6XtzniNg5j<<6R@A6c5b86pM@` ztj1V(Gp-NlosgdkhQY5P2T#^MFQik>>zzgN^{}13=RY4}EKp+{Dy?ODE5wV(CR+Rq zN_b~|RQjEgi~#|=a>%jKfbnA;bpL^RxQC`RX;wnQCt}rbXu?~&rx%`=4%kK{O-BNr z<4y$e;Z21G`V}3?r{Bx{-((EW>yCqGuMp6c-Jk35f$LYe^)DQ01^WdI3IzdW}n%dN-vsZB5gdmmbu+o7>Da0(~5VFVV&LgyK+H`-c`r{HzWcRLv- zX==$o5x3__PR@628as1oRhy{}IWg6Fs8Mr%sKmT8V8B(miLJq*3?~{d^frcI(iP@K~yfsCgELZG*E! z!$irpPQUhjrbT?5B!-0z4BoWU7}x|60p0q4#oRn-`S2I?jDkg>Rp%vL)~l3M7yli= zvBfNB1QhgycoOmmg8D4}f2o-N5!Zd%?ts7INA7uYf|X_;vBNx*kUAT0`?aukR&Qv} z)P4|E7kN4tfIgTYS^q$}FD9wWL?y9Vf*%OfmpFrPl*i=I zvKJ(o-s+Uh98b;ayz=LG26c^~y#E1aKDdRKYyJHxbr$?lEu~3;t=G7$xjH<>_?584 z)mljY0lH2G4=Bp31V>rUxBPoP8+_)asNcyNZuEry$>7I9!g|`<3!Mu$dNyMF-S~@4 zJ0_fzhkvII0h-fx|Q)Q?}}!G0bmMV*;~R1*G^e);I?p7CI7( z5)oTYsUs`y0yN&lbsL({N0%iUx^zS138|F0FczV1w|xCK7Sm*DtBYMkr}t1}VogKf z{cmgiBWSI>tv5107h6VHnPZZPK6^~O&RR`6Q)aPHVgbpjg$;`p{hLixew0rTh_)` zP?n3_yk8RaSKL7LW%+hSF$b|1?@%9C5MiVG06_fmHy|?mvbBcZVUU@b2R6ceVD@#F za(J{JJs*S#Av(C?s`G!6LFmho5v#q9&U_5>QlV>2-U44rHXqrcL!VLRqZQ&+OT@CRO=6 z_RNAg>5A?c=G)Pcy>DaXzJ$19jyU?+0ouOa)meJILH( z0r;eEuC~P~|ASBbUr{;0js^mZ`SV@)^$w7jTpmw3eLY>-tbku_lvI4ZIOya2da{5- zh+hLinAqjvXcDKf81(8QUQNvFcPL?(NE`l-6da=mU9SgQ;m4oFB~XCHjWH1`S04)4(Whr$AT#v;9o76Di~L7-bh)?5 z$~zijj&v#_b8_XMEokcc8(!a=oKhNCW_DNS05qTdeL~%B?dP;1l^plBof*<=N}IjU zlk&r?Kt{IOM`;zN9ej>Io_xxIQ#>Z-_>5&rqCT!_xc=@1UFwh08e{h>4OqN%&qfnp|su0;jkX(iHsx5ZE4GW$u%&t zO7pCvL83#dl~`s)v?jt`2wm)~-S%zA6n$J>e%Kk`%b4QsuL0KfKBY0H(jteP%&m+p z{VV9bd!;7x?Wti>w{n$JbvZwY(9_dG@hl<`dgy%x+nx_f-XL{`x|Sb2yW-F)%`1^ zDrC0Z(`xPKJHRB_6e4H}&%!tU;$w{oZ+R=M0k@zf!opao4uE#%l|BI?I<{2-45pM5&W)J6%l zWR7#c;)Y~KM(5#}*F?X~R8zeDACa|_8mF>GwCDE#@B)gy%6;Aadx;?VdzQZ9exouB zYtE=<9(ND55*#JM1j2(0eipXp3hbYji4Xc&$q>+=WD=T_2 zT0q5jegDHa0aScko!St)?s2H-4Zobc{g^{Tp)$d!Q@8}DN{Bo1Jt1fs>-ABkt`q5~ zj&`YxGNY-^c;}-_B}JS>MHQ}^S}RQ_=Gu=22*Rh=OSLt2?7mZ{(lz@u%~l@B(q_2t`f{m|0f=Dkw=zCLS3}9C*<=7vljW061)&dR^XlxC*0s9T+I8-|X$Y`34k%k(8hl#rTr+axPGocEYoxc?K z842#_c^SS)Ae%W}eHO9NJzdA^;_s8szMiJGCh8QJ=!y*B+tWOK`9=0Bf*6bGg-+QA zzbw(QL}v=<@M*Xt$>FDft&fw%5uQ(J8|rqL#rlgD>jM*}$p{nvvNCB;{(?d67YD7< z^YuQPfq#ZI-mQF}6}S^j272wO(=r9#AvYK!louDajT4(TBrM6tPikl*)=zOBjE1QuSji(&yq|1C6P_-fB?tLl{Tz| z(1Jhjlg&FtG7kru&?w!CM*-?yDeh%&RjED)I0S)c2rqQYLMFVjO%l zkzqI6%;~BoiY*fQ@Af5rZjnzn8gs-Pz%5TsiUWK1OTaHaSG2Aa>2~h(=?A_PNoBg}jBL0^0fbI?JNPH19jrT0Lr= z&91xWS}HK;Y&{9O9q>^bZV^I_xW+2VahJ>IjJKs^mxWX2n&Ghv<3Y- z!mnq5k7v4KhKE?soeKk}X7vkNpxF$IR}jT$2<;eHCe*SFjoOs%n!vH?n}$&Nx=&I% zG^bV;Bd1eY=Wa}~NLcaS=0?U4*RpPU$!xRXG+8|e5lqNEXhWjukHa{^2R72XPxNAidXy>b_@-MC_>lA@m zCpNu3TqT|71?y()$+h_L%fW3 z;(RR4ppU_i?I{EYmg08=DZh)k&5L+;fY_3Vstxl9>F*_?Ge)2Fti}i^6(oS4hJ*#F zCvUjqj`bSQ=sH5nD_OKHI~Ygvt26aTKuYhiZ1`VTtc>k2W=mwlW54sHosl)9mdbiE zGttlGud2anA1Df$J);@c{X zwo@H@E-F;?Cc)t$sX8s|2|bJK9A^DSu`DvC&mVY)Si}97UrOH(^5?A9+k8`_QTDL1 zoRTvslgqi7S|sR|^%s~MOaqqaq77x$%@J!EFDTV=_a3zsWnP+WDm?k3xCws++sLR? znBmSuA8d=+nkTNK#E2>ziIXF|*^ekbnAy+7ekJ5qwgm(L?oDg`IBj{Q=q zSu0q}piS;5277dA2Vy?Br4za|oY{L|^u%>wx;mMf?Qbw-`X|c?F0wvJUICCW;BUjx z@30gDAJ#8>h(+)IzMLd~A!o`s^ftk?_k^%~V4;F0OK$l6F|doy8&1{QDwqWpQeysO#Dx7XAMY%r`)qug-BA z0|1ZZkD&O4J&r~#(e2H(Zz41ki$JJTZG#zLEn!69vz4f^R&vE;~v~h$BT7Jr%Dp z1kdoxWdc){hAE|;xPct3_j%6~?bs1LDsUczg-API*Azp-hM9$EW8=`+haK{q$ZM!EvFE`Z1;62%&2JraC-Ozb5Wy#my{*w!td%>`l zX$R9!PnHd}V2wC;6lW6%!q1&EUX*~2T7USV=n|vTKxrr$G9JW6A+~q_{6}$yMN8HWqO6* zS*j~5b8Fu|j8u43rKlbXgftru}JerXK2k5%6kT~28?3vV6w?}Ssjs>=PYw8EE z>1=B~8F1GuenGCkEk@c~m_J^O_K@xAcp6WBuSv>}{zmUBw_y`JW@*(gfy*lES`p0r zG--k5nHK_pSoY53yRHe%$O#=QG D4m)qq literal 0 HcmV?d00001 diff --git a/doc/source/_static/print_df_new.png b/doc/source/_static/print_df_new.png new file mode 100644 index 0000000000000000000000000000000000000000..767d7d3f0ef06a3e3e9c3840d7bc0425685aceb3 GIT binary patch literal 77202 zcmce+WmsIx(y)yaAV`8kkl^m_f#5I@+zA$32X}XOcM0z965JgIcX!v1?0w#|ch2+w zn`?f|tkqW4)m7DZcaSVV6cHW=9t;c&QC#e+92gj+Fc=tw<2#tQCq-ytHeg`z^~OR% zvf@HQgt9i4Ka9-`z`(?U;^JXdP!}-zPNkfE1oq?vExpmaz$Q)iCch{MLXgCwph5)y z45TSrQXlT3lCQX&MnU{B8sAfLIg*& z#4|Eby~H~LS51?Ygo=OQbp{J|rjH?+_#Vv3nbsMD8#_oSpV+K(_w4SSO!?vPP8G+y zQvpXEXXjp)`p;&uXEN5&uNcf_JCXcg5K(yrCt$;gIqzPdse6|r1QVO3FMFEd^}d!T*FB3vAsN_Y z#Fs&mRbQh;7eSepF@Sg*H`|{)_W2tc?MPV6-ahH|C2=kgD(Pf0(Yq${UTxAf*ZrGP z;?=U0e)Who{1Cl9A0V(sWDTxG0L*6M3seP!eaA-?NNtqFqy{cv6IR6gPMpv5Nh#tE zLI7E!v;o5u`doEYl|^s{H+(~n+s8-d-S=``V~+ID#|i~f35;#c+m;Y& zfe_OS$}bb^2+T!*^Cwh5=Qk^eR0!!zL=7nA4#aVU6K_!g2>%XK2Ke4i5~wd6fe;gf ziedkn2 z&vFV>doZiTG@O%Q;)9&(0wM@QqO-K|IA zNt6{UyGX%|2D$A8FY+rpT2^q{S>ZzwnxbEkXl-_rZ2h;7)9(&YaYFL5_Klcph!-*O zynf^;XU*l*XAez?PK50NuFS8%oY9^Ug*t!gNiiZ}k&)rVgbwtg=}IupBvvO5B{nC1 z9_GLZQS8FeJ+5e*$1;Prr?scP0jmQCg&%h#>GoN1q*9Hd?fN40R$3)B2rlt0Sua5^ z(JrB!5U2ajb*Znno>w~Z@eybT^ZWBhwEKB@Wsu0iJwU+o_x*8)!f)~weOc^9PNE{ahh%SqU5zFYm&_}I;pk|TQu9=+mqXMgLpB`WVdqo1_h0kMu(Pc&Dykgz(GY*kn_(6wd?MKMt>!l0xW0`8we`Y3q8#!BHU-R5?^R+`TG#{?(&?4{js5gsvdsmj-ecV!>$zjz4qyprTJh+lM^SSVWu8lob z;v3P#k}zqusVabgRFYBKUB_L5G*Y@nI+_Hk1jDkXvQKj+bC?#r7L*o!%PS4)4G7C) zO)`!gjwP2yw_lwe95I}390y#49phYhuV>EzH)WUj*Olk0hf^mZ=hwH(`=GJu?^Q{) z!{7M1+wI)bUi4oXUTt0t!O6h8JZA+s#_6q;*ER{C3EgGo8Q%SXIfZxssEbuawUv-J zpZ8gkI-U=?_WcOrF?^0+>3ePrHvAL(Hr}UizTbQ~vWex`nCPn5Y={P#i%29swo*82 zUWH-RVrfYMY3SIP>bUI9lIE&-hx|9pHsCi;XxTN|obPMVO{gc8Qq?J&qiZj0dC2&v zUR0hil^BYHS>rpT<}(-uZ@!-zwC^<7*aQ7h(>_Xm)F$C1jU#E5vX@wi;~BPFkGBzZ z#BZ@#0imtN4#oyeuT3XU7uNi&@wat#c6CP3PR|%T9UdQ6Nt+;NKu3coDgLB%tjbDt zg{c%mIAQY*m=7^|XB!gMvl;|jm0C?z#rH$>M}`({eR6w~OV~g%t*O?Qj&g`{@7=RT3TcE zF?-N4tiyMQGlwP6e$}N>kcK#5V6Ch}Gpu>PImF&$zS?H2cJfzVoJ)|)FBeh+aD$P@ ztDEEb+q_hD_xi5ZY@POp7urX1BrBvPu07|9Ho2UthKtLH7J^fP3(jd316f69EEkT$ zvrFPA(I$7@Ck6FVB~6XzT7PX~P8{c&>e;foo5Y98V>_#~28%n#rSlckbn@iGkppB_ z3RbiXDDq$A+RvX}FI(kb3b+8wfVqqb{_0nnW3z9>(+&C7H9;T}<8b2>({s~(BRezR z$B=t^R`W%3^j)+ubAA~fZ7cT!!BM|T`!A3EzqUer@Tj@*HG|u13xLLkV+OB7%H@vQ zjnmt+cI!n`Gj*m&b82PGW$eoptrPR+Q*JlPDcWN<^EWIjHO&i4rmLXE`rOke+5~k> zOP7^4n*gWX-sQ|@w_Bo<<v!I*YgTk}%jAd^- ztC$(-E({NBQ-&X}3rjz~>vQwoExi!*80DCxwo&U(H{pVQa#8r# z$Va_b+#Ghp7u)A*7ZzvRz0$Me>E_Z?#nm}im8UE{8vUbv$A`PEuM?s(5u*Hze0FZ( z4^O&Gj|eA{3(A}2rK?Zvyy+4d$sa3xOOlkp{N8g=!g)Hdg30QFWnL3KcU3L8JBf^& zMeqx)@t>Q?@Di(onP`9qejVpW_Ut|e+arz-oJZ*)ID!9eYPx?H@KtKG;tYK#Xv;ckeP~+_}qpeR2t28=2 ztPBPQo@T6|Vyhx8#ieIyPN$=9scS&zXm0hE;(~$kIC8!HG&iu-A#^l1vjB2A@)G@3 zg6r+~@5l5+gnt#WHRUBzk(MPCva~TEWTj)FV<6&#CnO}~vC;p*CHGb2Kh@v<@e&!? z+FEhZ(>pjg&^a*ES=t!VGjeir(lap8GcnP=m7oPWS=j10(pmtC|8C^}we!^gsApqr zWov9{LHN5}9bHR1TV5if-yQw;^Y=In9F70!$pZKvvEBrv|NVxZk&c1>ziq!&<@x=T zOV-%Yz)a<W&qZDJhF8HBbLH$3IuU^TXMy8;&N>xqH_At>v)S z!ottc$cV;t=UR!n^ndCl9L5Xg10mk2SI;@a%cf4K=q9Gv!@tJvi4vIBagO5p30cpU z#Jz=XKBBiFLL?)cT-x^!5d;ZEAVi5}IdR;21G^WF9M$#+f-3QY_)^Dw#r*!5m5wM< zD-!Pn;tA2TLOkRz0&ZcIZBMiJxq*;>&Y4y0yJ=+R=D*fO-BW^6FcG-EYMgl}(W{(+%OQ{Iy)-lVX1so%RyQhKSM|k->}fWs_DI zM>(jysr?#SK*x75SyxxhhvPPDUor{!cco1CcO+D6frhqr`_**jGgzqWLia`T?liRh zhg|WCHRaN{c#fK&!?s+SKUI4mm|6Bk#C&lsKa8)hscz0~rpc9t=HQUa-*K@|Z%B5t zw){G%f|W<@dC~u~;=hIWawfE@f$(yC4C~{=S0~T4Ge6y8{;@Ejs+2aj}|q#|I4c zKlA-G&!gaq+4&%=rMZE=h zz6i5`nhu=WCR{lf3#D<%`5A}8ue#dmo`p>@#kJMbys-*ft0pcTkFWnv^-7dLcB{Z& ze9_Y8c037U=ENhlb;m^aw$pe6(5~hW!Lwx1e5rJ)so=tszp@>CKcR?R;uV(e{jsuf z$i14NU?vttCIhDYVlEegazlhqZT)lZZB0)S19X*$#?~ZPw0#L}=!2H@NU$qs-E5EBlYwipp&803}QooBG9% za@#7$v8`MGYVXZS5`OYh@$t5@2V96THPfl3^_kbhk;?;ZtCYE1@FPlF&3l~P=oUB6 z@djqa4HJZmssVW73Ohmz#To%<#i1aMg@ZN)P9<&w`o=$9t}K);B23o;=K%45iZVJ! zLrJ*RX60kmTw_`cO)5XB!yxg!bybA8l6J_op6&a__P3Ll8r@i z{oGJ4lYB{)*o_1xrNaSvbnE}jIFi>Q-4$^ya8fv?GCe|q%hgt4{2M+6FjC)=z%or^ z`mB=)V6PoNeQ(H9H;QjI`JfWox5=DZ8nZ+fqM*!$6gX)E1g1Qldc3AAW;=ysS=2X^ z4ye$H|9jQH1re`gsG=wL*S2Ta8xQ+~!h#ifSy@>zvQM9^zxRkEOI%fH?kXUYhmaVi z@h&{p9&Q#DuYE2P`4*~ojXgRlZjrEmZDjxqpdcqFcNjTft)7~kgt>QfzoUL;DEc_a z0FU$y0sIe}*$nWc9)NbJ!aYG_Z_zrfZ*{(HIvCFqf@XnS;g$3HAS0%&5I_w&ZWtA> zuP2!uv$9$CAF5nj0CCPB}VJ}4J=gJ*BkStQ(khHhB`}0 z{@vN{o^?qhcf`Bby8~8_W?MnW)A_zgZMESs@Dd6Z;>FudVKJlP1fzg~G_KmjOgMi~D{Vdo|1dO@&8?9nWdg<1M0=^NR4eNz{8AW>j*A|Iz%rtdnrboJ?*9L<*~>+b|bZTB#}7wA71TcX?u zDa)xfo*zH&*G#RMjLr!j6&9J%SKLy6e|7jtrD4e?Wip9pWDP%lAVrc_AH0mMdP2P# zA`IKsRpotJQQ3o|JvUs273l&$YT{0&mj7#)zHIc@Us2o27Y?hC!HA>zr(e^BSZ@i(_#WJ+#YTM7I&?x( zzgNWR7=nQ$yzzEJ*h_6u&6vO{o!HH=*IVEdY{I6cj8+g!!UHNnhC{mpGJea6y~fSf zs+Ds={y8g#>E9;$?xWSj>iVcA*QFqk0PiWY4c@40dug_8%0Oc$*Q`Wsq$wt>s&?02 z+up6mIom#%$X;+zg`0;?AjmGwEp^ z${3iO@(-9QCg$~pYTV`lvik>IuN_)5B*9`4Y)$HvA35fOT)8Yma8CT9a!s%cN%)TN zb2{t`SZ^5*HSYz|^vi`?Vyzq7Y@Vga6xm2=C3NrZK)+`|Z$p_?wJ}g~QjP0x8@fBn&J9xZHt3~Bl58+jS~uU_r^2jm1OP$W3Q-4 z0&N^5;&!$3&FRpHWFz~p7QT{wR|(l%d*J#$LM<1+08|o8>H9;mIC7E%1?8N72i#%jeUV1)(ASeTMGmpLJr=4^a>Ig27Un3c@m;D0+47Kr75l|c6WwVIpmg0ueAoy3<7yU$*- zHSLwj%^bl4SR4;z@3%0nU}c`+!n(bT2lb{R96=$ie@72rly_z2Lb19b=9A)lTpvJN{Oc4&dKE}Z>Y_wGn-WE9qD zF+EHP#=uCkVlIsz`&~~O_Iup9tm5@_)bN2<`&L%5J#)CwBChIqAgx)hs&jD1N$vB!TXmJ{(-LYov!+2T#>dU?> zC*`u^`ebks!tv_cDHt}t#i8e z8h{&M-|Lb7h{smTi95hQ-0fYvRK<<&e8avs!v-?z+jNZXtsE&aSl?~G#~a19@@7>6 zF7e#-zhWdxZwLIX+pk0@8rr5JE&T>gG>x+~^Zo`!t&^Zb0NWs&j+3TCa5kbZxq*y* zl9mG%wQBB?cpsB;jSOQLZOWPnXpt+6{6qEf+W1%hTr24iT_ek7@I07o&Lh$xug z%lilmucWuA9c;b(jGUu)IeXSHD7BE00q8o-aEJMI$ls?Y!{?cVw#IWY&D>FD;OLqD zjBPtAkP0G2^>Z1jsQ$V1l%iDU!Oy47(fvOA#nlvM#qC9h$MI20%42g^j20B+;_*oG zpb6x5XD%VHqkVlB!OI5iaJw)H%OZsn}TL zJk1XQuC18j$w_8NDXM&aj(FM<01QZhgE<2#1gsRnJVty4S(Iv#c`)dURIaqi?Ps)U ztU))sOCi7aMn+~(M_(q8Gg`cAkcb(b=Lf_JJm&m0tnv6fpM&i^n`(!=+JDK~J{^A2xO?QbmV4Y`SWF8*2=hp0D=KGsU{4rY z(Iwyte_<6#`_U@3lN^M4zMfXTAy7zwx9^AS@i^B$%ty74oqh5QljFw-3qnH6(o=^E zmSVUN`YmlF7R1hCvPPkXlj4`iB4C+*luezUwn=Acf~sf0{Ls$UKp8MVc#k zsA}%h0zvz6Abe96*OZ{T>Ku`N%WO!H2)4GCGk%@ZWhcGjyC(RhCS7l4LpX)lI3M$z z`%SjLH~Qxv*V<_h&Xk?bH-p_Mmids`pKY}Qi&W7>1$zGhhUeW?x2^Nj=ouU_x{IXXn(nucTmHaGzy9x+NwdZxg=LOY&d?mt)5N@GClVNwp}Z= z+MDbiPq-)9yWTK$nQ8bS1f-&;EGEq{gB8lEq0|GOubL+kHu9}d!lZc=QgpuzEuXOO zi!9YMKID^A{l_!_gcTPobDIwbvxRx^-YNxyCeX#8YJz61fq4LZu|0q(uh>pY`%9FY z0FPtA7Kt*`=yjEeWkNu4HBZo~Xhdx2(hml@7A)?aS6Etm3+C0YhOZa1&&S+Z(Pr~( z4brHi#%jR3MeT4r?iJyX3)VM@kF-~sR`=kv1BP@W-{yTpE?G~Zc!-hyRkMV073E>O z)OP68Da!SgUH8w~gtz4~!oE}?Ob7M0XAb<@AwV^$W~lngHL*IgR3>yD&@5?n8iTxX zEzq5@bb|sop7*u*OeZW~C7!=VdB+}PA0w!LwT?Q6IJn>+u_Z9NDnDJKM`oXRO|n%Z zh7eoYzAH|7RTH(&2&&=Qe;>-ZjmqOZM?d`SivN)%+LGmQC+$9xk62pP_V1kmN*I_& zOud=|a8w+cMTxei3}fA_7*tp_SzI>=m{G(BlwH9M%=qfsVT82CgwX-1sFUQ&hK>x% zihXu72VEy8_l#<7?g#Tq0^>DckqP&k4Xy^~X29GmqmsC`v#K2EO#^;nLo=L{oVqt4 zw0S*_OEf84+ooK~*&8cGIxd4j1`My&~`hJ0xs@X~m;cx3V7OTYP9k zSKK6TF<0DVQ9N{&xum)QV7Xk@~da%8hjO2@C!)y!E*={Bdn+(H(Nx4#7l-w2NP3(}6Ck zGjSdUsI)R0rwx%Lzm4&qHhcxr-f^@VeNGi}n-kUUD^4S_C>U%iT_&gx%dH2^HKq4w zie*%|2{ZxXjydbUF)7*?Rny#T27cdKm-rt}iWu+d_KbJH(6Tz@;k#VM>dtmxw^ zavR$I_}s$zN_ZXwVLH;IoL%d1lNcpFNX*@2Z{C`RGP+f>U`~-!uI#X(1PMQb>adZE zvMF?kX1CjwlwDIGyu2U0tZ{8I zbhL=6mr<#yp%M?1C(+R#EgUd9r&js;zI=dXyCF_ZT>zQeCA~#vJG=zW!nMI7zn9Ql zgvyfgG$JH`u5?o=ywz2m#E-GKFinecYVLtQkeIJP4(ZytV`Ox+E)4glD=S`ZZd_qu z;UC|7z@xb4!y(C$f*lirqus(r?05^odgzqdB*yFva`ZKn4GakABKp`;BXBNMe27@j zaFE1M#QSd6ubMRIi{BKv4-E?Q7x12IZ&^_~G%cvDkEU!ZD%&lQw==*r9X(c<9FkBf;E7Mm=CVUEDuJ;W zQypOp87kIe;klgY;Wr5QOf7?UZZw^TAAM`6|Xz1nou8a+F% z(x+oZj~V-x^92^Ofd;S`SY$8qGmH+k(IUXebc(tsmFt}o;}6CBjgfOzS;Hd0XhdPJ zh?TqL_)}4Not{Y~6V`v7M}S)CoEx!1TAOE7A^Y0p`nr8uq6xjySuY)!v|F?1ULQlmyPow6`t&o3A?J1Wl`R$$g2Mg0gii*==(F8pFANeWQVN;tP$B zvdJbOA;ZJuj1O=B1lj&D+mRun6){^_N~4+PF?skSDvWJJts|x+)v1yX1*K8!uo@bA z;6%#SY}Si~Ix?FMcBTf&NvYu65qPC2HU&|^)llpvzX^t6_KJneyUcKZ1vEV+2d5pG zOSV}{+7e_&)+BPrjJ{w00^$@GP!=8Y^VgD{Yc}$@ONrS+Gn@%z&GI3IBv94J2N^rW;OSgb?lZCDW3IBd*rz(cF(;GE z$#xy?VVY@dG*v@u6Re&%Q#Qi=c}T9~th9U6^3_Wz7W>X|jwu?c$1OqX*IE1^(RVB| zdBcgZW@gGk3Ix5y;XVU&%MC-={Ha1ErTFx|-g=G+QsWn8Ug=pE#mr7F) zp5ZBprrMHk-yF5@hSTH_WUW{m669&C{ivEW(7$EQ-KDFA8@F|-H)3e>kt{7Hj1jNI zo?0n>O=ie#D`G4Sp}mCxmRS8F5970^z95~MXKGQc>MwlIodDB%@d z#{IQv0JX>Yf|S|HIHYFn_8W(jl7>W(WYB=$j>C_-Uw-!Oe0lfM%W4KA<-Wbi`&+{L z|Mg7*dY@66V9GcTJzgJI-`z?@ji{B@I;E9jRGTpNG}ek$;VcXd##PDN@kZoM9e2eY zPV+Uyd>nO2wZRXSaDKCo`k_XP1j@HC?X0Nm-Zc}aQHvnbG03$IT2<&|R< zyEE9(Jv;6)<7L>q8uS#<1H{tOaaD)qs6gnxJ2RQ>DXQIVcGY0VkETaUWZEOqYiS{T z+B-vcMYC||I2a@avd#D@CdTVj+xKUR8+l`r6@GUNeY9He{t}*-F8Ol9df#^(w9sOn zI`Va&tR+dIZ7H#Y&#m0QJM{Btqy5DHDOKf~F3ndMfyrO(n!O|n{tfz~aC!Q&nlX=D zIG?Xt@%xQVRT0ytGlyU1J{iAacKHJ3pC!+q3dRyQzur?(6Ph@qzSyo8HV`8Mc1$fq2YS|3-HM?Y1_7XYJbOl(fYk>PUe=78P00{uP@^%CWAAXoAmZ~QheKKK zFO2Sc7H;Y;gzyy_Fzz-UJSsD#s>n3Maaog!bIjHw+d#%DDrn9pyBbDP1diyO7mG68Z;pGz1}-$G4o2t%}13u zYm8o`2AurHLc~`ePKFFrK<;u@j6CDd;Q(%S9FG17@x117T2KcpwZt%3XA@JAtjZUz zbVx??WnRcl*!+%p1uty(Zr7x)QQo+cu4_XBJ^gAbv||om2gMdKOZEMD#0yoE`&?8b ztfLqhK|bb&m~QmD-CGzK2eDs%r7^?joSMp=s-y-fe5=|oD9|YZ5D7g<_CkE}k)zSw z^m-2t0oKk1>kU8cXk=bw$IWd9dKz9Ru~Z~9*uDvf?ske^>Me)WblVx1v9>$0IQ)~? z2yBvenEF7GJ|?FuQzcOyaykq;2qsvRlZ{28h^zYw8tK*^&BSwX2o-QfOws?Z>hG?Po8x!vu6_*Zj>fx9GE* zK2nwM0KfNga_76$8j0hak!eUE3ZtM-Y{tJ-U* z7sk5M`(eA~>0kJE^(JPK&u%RZH&0UtdY_^&MXv_PJGKM?3ZwL4^OB%IX-6&+hqq(8 z>V1g%m(BZs?+XN^A!-}k!j)xAW(?>rgCh)XH0*{i^qBAp-%v$?z#hsmQ}f&AbM)qo z*i8Ya9Qq+r(swEH)tlji5k~H|WjSe25Rt2RLWwI{kW-no6slBx-)41&fC7PHO-haT`@(q#+EJ=B71L`@WwwWkS2AFKV4y6M8qg zyUahmC&@9S3yZP%u-RE>k>rixPjHWRm3RlSseDOD4FBWaUPN-DO`1s~5FGE%>~ zDlag4JbDK8vsta{`*PFahQ-Jta&mIg zTU0_K40gfEAcx>cdd8_yhn|{u*Mzf+%~{SSo-!@kTJv6!snuL?sIvTe^H51zVmnH$ zI*jYc9$-VdnJS;_e3n#r@Qc6pth8&{63``5&1uz*3mj)VC2e8LZKwPe-l$9WbNMs% zt3dHtQiv9_Rl^lZ3nk|)?p~_1?_x^DLu8I%S@TTD- zfh#|(hk8})LpvNK1w}BH?6~nZaL4BU{NwSkRR@!+0F3hmI*1q&l5HF?>L9ElVDI$A zG7#qEp>FHrYYEuR+l-v9TN?;*=POF@>Bz-73ljQz^5^$mI4T zDwW_XQ;b&_Xo%4iRunsR@A{-5PZXQ~o@Za=y;?mQ0eveHL5$s8$)}4Xi)+dCMgQZm zbwsX`SRd0-EW8rru9 z@!_~oQurv~ud=CO@4BKfqq>{AZ-*wHQSXTt_PE|a%zk6*@U>Nd_i5}Q6WyVm)<+=4 zgg>C>p5eWRHFbuqyS7U!*($Q*UTI22AYVrY!;z529OK1gMqict!g7P%b^;nrYQ%2u z3tun$FsVDnlR_fxWPGBd)4(mNhW1980(+WJ7$cWbPD~YC+SRn!iw> z96!m?L>4~mm#|)8I&;R*$ELW^V{wRQ%XCn3r%G+B7F9mIkIfWN{Na|gk{>BLShCpd zz`WroFEDNeC=dbJ7`dbCVn+$0y>22P&C?=rsT#JX3Y#Hlo%PWwTPw(%m$rPml>LrP ztEw?LQ7wv*KHEUy@pS9E8yOv?{{r##lHB>9Tx0#C<6)^POXI=y>Emg8#LY2?n4TV4 z{EEj;SQxr>yMBI&bi>)iq^ME8hm(Mcf{K!I-990@n%CCKx2L{nyr}=ZK8G?aEcMNs zN;wmV@p6$46c;Gx3~rd_u6v&Sq_e13Dt^Ez`>H=zYVbGc8JLd60S2y{mPM~I>#G#q z$efk7vaBUH4zeCS`PVnWtyv2R^qgk{r|PPh9>nWWVZ0*`pxI%eu7op9fCDvU83Zt` zgr2V;fKZkrDF?<3E-Z;E_Dlg^9oR3EG+}Tb-ol$EndYzn@2rE|gyK9_NelfSXCdT* zF$ZHYg16|soiUAP>AaW&nWxs-g7d14wRiZmy^V0D1ta zMa`xvh)mwOAl2ZZ^SM|PUvt#p?W_29n1;Rk0GM!5NI*jXU;S*xXN$5g&6O2q9zYSl z16O;ls57)q<@lz`V|!OhF(x>agpmO1sPr_!U9rS}V?cX|`}_O;fdR%-`@$Ci)|%Gn z%YY54b7s+z6!!MegPSy>TIyUI3$(Hl+f)#;MfU*RE?0q7V;M6dB!8?Mi>j;#i9>K# zsaei|YB^}nPO)y6=L;zUKnecayK!b&C)%9V6U<+dy3>^6dz0Rq>f)cF0wvR};Jy(3 z=k((Z)U5@gjL*#$nT>H`ePcpTv`UMaq--Rub+3} z0_r4MAg7P7t4`v#5570PC5LpIP5(!SCNcu0wVb=Tb7}hVihD8!1&aYD#B*15QM9I6 zrKgz`>7Vye6%AZ0ZVRp?M(v(uZT^Srd7}Kp1$Rq3u0e9RC_8$CDpX1G&BRoawz3KY zsPcTA%CJK44)}7t!dMLBFVR7j5h-^KPCy;qMTe@DyyC&zAvpVYxB+)i>WxgYSP*n zDCF9G$g-8_wF4B5a+1fXv))tw*LeP3Spw$oe%jT#wT`qMF}0S}PdzHy%>B7$Z==1{*N_m5P#R!1|1&JCJ&33M-U}z)k~|~8$k>6 zfIs>kU#pN09ADKrYbNv^$Q=ENyFF3fk!H@$JgBA69$k+sMVSs3R8`HzwX9s`V59k^ zXpmqvg%3HWJue)tKcM{2?K%{QTJrDO4xqUm`_>hCk~9m4oK^J-%#azUaCK4e$tDhjc8X2<*$?JIA@#s zda#DG`7=()_z$*krSah{;?#d^d_gQLHb?0iSQvhruQW5W6VtnjLsU5j@*c@&P8}=2 z*%iK>TLTk^7#6pE$;@c|rx>k_z%Yh2n?^BM{r}L*WlP#2^!f1rM=!(oyVdy!+81v* zZYU!LFxR}|+j}Vo27bJfBe_D18S;jK;^{<OxBBP`D>abDR#Jam`a|J;@hr6ve-E^4WIZ zk+pX=)U+{By@rS)xq3?=X;3uESB)2{!U98sLglKt!uxP@_&ctTOV@^RMkel@z!2&@ zLL=%en7DVr6r=w*FL_2~;HQw0VmHw&wYf2&KfE?-?bX8_QBJ!K((svcVo3I}iqwtb zUfe^lP8jlGlLiqMS1meYqaO|3#d_p4s3?Ta}FDYwbvRy3 zd5KM7XBg$q8myVdxL#6Vr8yVAqv7zxfp_c9i`ZNkzkDUZZ@9FW^<`~;ZvKmG$qX~8ebcohhv0Kfe^`&!4xi;!%=Ongg{<5L zcva2;mUgNmao}cLadlLTb})DQ&&>PIY0TzmhLd3$DUQxCQf<6mNOi;|4{`a_xzUuV_b&up4!sBQ}D=X@8#d1*4DY9XK#o z^zl^VG2TaGV=XWZY9);Y^zCBkg>l1)z=dU$g`5i|s{zLf8a>v9DXl3fO9i9WyY8y1 zMmVARNTC1alA+%1nXW+zPGE{UtDX`?KP5i;KB<5xC%^bpGp~t7cGnhvzJ;|v6HJTK zsdhkAav@u5wI+M$A#q*CQUu>*)KO5a)`HThZJAO*euYxE#Q)D=-&j6*tyn z=+bhgjQ(Bz-hL8OPij@&+Pf05obD;@J~hT+jAp4J5#m2EumNeTQ}dM197gP@vBVbg zMixYXRMr)!Rlk|6i4!({1iB`Y}xC3;^y6%$x5}GkwGWQIxr;nI)C()t2Xeenm5X;#r zCw;s3FjU3w&FIRBco~W;aV}*2utO;B2zgq$o7Rg;x`tH zLW8Ah-mIHS@0H)>3zer%SihIa=>378?0^pamAR69*YIx2uY9AvHK60nNeIL}``(O_ zy`rF}BM{$$CCh0<$QMU*3=%j!z%{xZn)k6(Z9H0b*;5(A6rD50t8eZA!ml zD8n7c^^xH3KE{=-Se5kZibs}OwljQsV|=%n)wc*z3k^`sMqVqkpOj&)_14IRcGh<2 zTBun^7w>J-LQEIyer+J0>M8E1Vz~?|o*7A*`5=@|sw}1^dYwbc>zGk>NVS9WBaTbk zT*@O~-h2%y{~~y(%l2JAKl#$dR1%tQ@36B@`WFj-H0+K8#C@uj4HZoMgB9lTgo9`P zN_7rw0(rvba;a8H=JL;bcgzcbn9ywpZDbbYl*xefL~D@OtrAb@ZYy-|+WC>dhgODe zF+e56s?ZzQyY=H}?UvP;ZYrGRkel$ET{cejF*3Dc9!c{SaEA>5wWuL(*^RO6GW<;< zZNuM5NL<%q3{?MfFWswGh_I7ecpU?Ged~R)U5#u|vTkk2!A~}rzAnHwlE8=5?1g>j zx2vhOfwQgOIEB-Q!=b!`EOmxNV4~IqsCYvxDlai6+@(t>tq?vnPD57K2H^KB3%?3S zg!VC1&lr3Lk9Vq^6w~Cvccbv4<4xDlH_(T6NYTZEYu>ygW)(k{wIC4&|90Nbvo{eU}V^mi<0Y}|Ma+2Yo z@l%Hin)gXNm^0rq#giU6jsGNNw>K&!1R`)!Vo+QLml{W2GQambYk8Lz5P=Rm;kKY6 zcz)@!@Y*}LvNcCE>o9>Y)R3(`;3~d;-A^*3?;SH!wbshI#BYS>j_7dJgpnQ{aekZE z-lO=MB=acCNYX|I>0js-LB~;@2m_#L{>U<-)eN&`i1N9dOPyREGTpOD|IOc*rn( za&czJ>_Eg+&U{fm9D3EM(*I8N2;zy80Y3DL(^?a&Oq1V9pIpnc5ZsH%7(V#GUXtE3 zj_qE0c=``WlT{W(!=9!>p>$BqK~Q|}=cBA;8hpP6e%=f=xAWKW!XXtYzT3~KgL+&o zaTY@%p9|q4jiH-!xk6}KVAx6#hILHQp(O@5g3^7W|DCpRdyTVJXsRDNXJg<8UC18> z{q*-5%T&Bh?AP<_wy#`N1NTf$KZRX-CToudNwBvt!kS-db(so1#B_HY+k%g^%=>eX zsGSwxe6T5Y`+$^yFX&`~wCvq?3fEt+R9)lv+Hgf$jZ9*o=X$aqUm(GMgLc9`^rR8W zo9ApKwN0z*E?Fc0D>C#Y(4fEl&fi`= zjT6U44+hn1Nf;zU?`AmSG4J{QNZf(DL%#`V2+~1_pZ*cXN4hg^I zn;fXhwbL|6bOa%b5r^%^TVs3Jr|in z-p%4uggfq5Xvh;S3|?qfydC_vW9?_dpUdcQvDjaDDeTUqCU~7#BP4J=)FSrQ^VB2E z#r%(HNY9Mu{++>@;x+8i~5IRj$rR_-Sc~a&J65qGMVRb3zs;@}xGdaFkXxw-4eDDbXy#Zat@GD9h3v#Fl7LPy^$$#N# zF}l$2=V|iHzYgD;mt=kNwp-(}%B?Kmz{}7q|r8w*V6ku_5 zZ1G_1G}v#gSkV1_UA6M)ziper!p|6rNA^i`3QQ6m)}n|J$Eh)o2}Xcl{{=*Juzbtu ztY{=MQglJlWn#ttk4gF8A8(8+G$uC5k-20a)xRI!KhcMF%wIXb^)4cu|9?6Lm518O z!M>M!mWh%8@V_#}H-)0X+}vN=$JGmD|Cg=xFYSE~#;+V|7UJH>|2*jLJ4FNaJ|boc zi>mv-44+>=vcP_!mPV1d`21h3>Tr}64$RxS>KUm1k9_*$UwcWSy!{(yw^G}G^lE=& zY7Uv07VIJu+V<^mf`FCS%l17z}+LWRiEFLyRWV3k{bXwjxp*@px;Vlt=B4(jUO}7-^EjUl{c>q zxg}XO#orJ>vvQA#V+-Yjrt#*6!Bs{8=`v=k8jJ%)sYSXQ;S_5dD~&OlZNEpmGdMtbURiD2B6Y z;Qu!SflmCsdp@FeFsDfmIn*%qMSbiz+^?!Ca#4L^5O2e&8M@y=*4JGudY!Qii%HP; zRH?1?Q+ARb%L|Xzb@8R4hB7fI#cK|mp|5}j`Miw9_o%(>;dDKWz-9II+Su1!#s1~~ zLATr4V}%oeR|4xy^T$%v_-LHH>v>QFL?6}1poSsax@MQX>2VZH3b#|Ia?_xLGS>9~DVB9vs-La9FEr9*ZU!s)J zswmHSYvlSOPoK?$q9T6X&$MR~wHSkXm*O}>f^J}IC@p)F92amo(y{HHVzigcFkfv5 zL&7$}M2Cj=o^$1HL%-T+e~7zIHIRf1&YeK+zOCO`FsADxxy!)Zn{CT*KLhuHs4ie^ zqWXY4-_+F@24@!m$30Vss46Bi1gy_Xq+UN#$@yDsK=p;8GK1@VRWg^Tx|P0Cp?`V} ziGn{nPDRTz+?hUv^)kYIeYFaVdazTTk=+@=XJ%mibGkIscdD+ydqio^fbsLcKk<)H zu!M4o*n||`=SL+vNS*v@xt01V9jeNk&K(Tsh@Au2i?%2xyR4%gKbPf9oNM)~YcbIWk zFw5+y{3~CLhyAsjH_@&F>hax`M3uIXD5PUrw547Z#TjTjUZcu~0=n@lPvt~n61;>e z&q!%Yef_h4m{l+lYh=1!S(KMlwY6P^{sU@Yogv-op^mVG$Ec4@5jVvNC?Z^G% z>2lw_0_?WSs(mgbTW#g-)=V*w?s}$1)CbVmc+3SX+*z5;kP=JthZ%0w^IV}1e|qBi zxJl>V>zbLY|AD5QO?Wu9?`nB5#8D`YOsEE;7< z?bXUy8@>h$Ud9MVR=`d7S?_3<@}+9%>y_8LXFraNE*`xn-2gE$qI1L~GS3J5yW6gy zyDe1#dMcoXjnEa>r;~>_1+q=Q`)%P4l>60?@axNSMekFvzmi2>JbDJ+a1RX8qv25J zR~y|6^=*!gKk>e5v8H}!2op@}kn`PM;KKF$2A^kcGk%rOz1zx;0s`lnny7Rf`kwLZ zl>ApJZ8_4R;>69&n=wPcVT>Ivy$)1nj$Js)VA?f5{b)N}At)$Sz><>D&)4;bCC#kQ z4T0M=IBQfi*OOHKbwCKn1DiEXjV}&2Vh>NcJWZyJj%ygnBQ#XL!fd4W-K^$RbA%B7 zw%<7I=VSy-Du*LHhe-+V!Tq=>{#AiB0-hUIAP1FtYQom3p`NV;-*qfO7? z_Hu%LjhCC87^HWy7!M`D^-Tx{3^If9?-dz*%varUdwKznZDhunCoH^H%e^XguR!fx z!65mzjN737I<;AB5`tvv2%Om*#@4D!Eik>#Hr}0S>hR@I#7E;c z*)1~rIB4ik^6-2~xiqJ~Ou8oB$8r{12NLPlVHTk)U zxfq9kVWBe%w_8$lYMR8X^{hl!rEbNqjKq228w-j+E4!zGv)0GQU-7F^f@kPo2s;d~ zdN--;h0S9aoyUsCb}Od8g!ErZ_~@Gi-4S4++mBx>{-;Bs- zu48)hXLzyxz<~8=ww4MWWvGTRtiz+rEa2W>&UxQvRD@5$r+ry^P9_{#4lMz}*#8&( z*v1P00E3R&mkD#hzE^FIN+1~N2%|m0t!>zaEP5g0nkdWb8slSzK|-vqMaJ*j>jV5# zWd2m@wl#7qZanyuaPg9)-l+mX7p~R>Li?xYq=)$+#>?s)w7eb<@m-jPfsO3eT<@G7 z`KWhRw${;b>Z{NBT(HiG)Xv7gefw9&(1r2yr2C9HnkX)HPB{AWfk;Gss-u!HrpD0{ zVw1%|)`xjfnPNDBHnC2)jFIUYt5CEXxacx+@y4de<93S%h|w?v6novrvu{bL-ANET zu^Pp9?op1%=|Sx6i~;Wh55Q(U^3hJMEfmAUX$GeQR8kC=7EyssqrxeqxY*jgE0yMMluwO6bs&XI)3dXy0 z&ga4a&UyYvwPEj4{__$(0hMpN%_orY(kxL}5nep4;Hyp?j^wn48V!M|`ujY!d`qMAMUb2L#QK5jdzfXY8kFg9k=aF_M*sJTZgjld#Q zmLpk>Klv+UQLaSr-t5(9^&kS#V~1X#Nw2Y>@Vw+MHnq^@RAd-;vd!mw@~&*fvnFX* zQF2a(RF^5&JVJm2{UF2}DLcRF!0TX&FvEN$&~prS*PzxiX1GJ2vNs_t9`oGfG9T)$ z458DGW`gH%h>u>a;-NSUI^4xLHOwwQ`Z_l6A!~iD zF5;uWt~5aJu6SyRO2Vk`I%s|2nn4pqq*2&>_F5&_7FcHbY^EMNO^Y)zN(I;Ua8Q#W zsryeylyDU6G+3P_olC1u-1#xH9;1G=E)uu7*sKMm#YxMp?zx`Z5UD03INITaa0LkfcI&>ex*F$wf#c>85y@Mj+0Wxud-#hJ z9}|RYceli#Du^3>xe&G({owP9qDA;7ACd1V>tq~HEvY_owIjY}aKTt4mokK|SV%zi zF2j{kqnzj0PHLUpqiLUnX0M_)XMgo!OIfV1whx=p*w7+p#Ty%@XBHkz1rQF6cj{Ku zR4WihvTnHoBOlQ9(4i2rIf3bYFsWb=3WIjTH+kHZ>_)F*Tom(-V*qM*(5sGQXJUhv zY7ESC*+l9ba@vKbha=;e2$c0O#NoaCAR3Trb}pm5st29WdP)P3N>lCk9rv`jOEP$v z>D=L$>+||M_*2%1mBgbdV(olj}j?G-0g{3%qMz2LZNXw|AWDZUpkZ)(E2 zbrPi%n3P`?t-#}EkH&8scnPS(4r`-r+ z>pkvQEiHD}uX=#}PCv?C)Ga&l&DN4KwT5aR;Q6RyIG{%hnL#ya;6e8NsRr;95+Ne) zI?``P0Un`SE5ui02*1u&o#Tsdr#}_^kY|N&3cn*g5_Uxo@Jd z&_mhceSsjE{ZlR~qkJA)7%mV8f-Nuj=iI13+Lzfp`li#J8GyF7rNXVK@%o~`bdT;EA!Gv8^Ch#x4vb95x4eiy9SLR%;WYfw>sZZxaSzVGT= zp<~et-54uK$XVOR*~AjVqG4dDsGE>Q8`|bkV#Mx=C5*o}SXyaH=i%a7oJP)^y5SiP z;f>`Db<8$Vj0;hYe%n|l?OX|l!rCABZ3gbx>}Tv==)5lF&;rRe`A=C3k$SqOd#d;k zw}l@qdaheZi~n#1E6$dy7^^XBh|`p<;meZDwx_4@fQ36SE%M)`%RLsQ89TXy26@j` z0;@_pes_58HCLs%GI`zD!#k(q$1=IeGw)@^&jv3-$}QY~^Sqi=sTuTa1!#}tGF>sx zPfwsXDAX&`#d*dQU0!2yOXS~sZS-uR&zDl}5eG}n#QKLB|-t2kN$uu2}_3N^WL)cqfkm+K9qciaaQE-vD; zJq{QB^YjP9?^c={H`B+c3}E}aPlwK*c)a{OHu0*8q6d2SwS9R5b$tY)?X*p@7W6`33v1M!DN2`EQAq%O5B4`#*(zO4N>Y@wYc*<$r>NM#_evA5 zQByrTV9|c6U})%luPQMO0=VpF$aNg`y4PRA#9xA}AiH@VojIwB>;siU+J`S`vkuwY z<-aUkp55KIW^s@$FW8~SqMrr4dTW@SGogK-e&DN^#&r$xCoS8u!cAz~NwW-Nqf8F171C~i$8Z+}1h1>keg6SvP< z8u*!Pa%w6rIoXEM<9b#GKoV`9E*LRqR}KAD*PMySRK}4sD~xJrchZwO2MZ9%?zLLL z=%0zLVutGOwmdux$^N>RMK)hU;jj6jeLTpHW@v=f-}AdZXI-%B$(#&*jHm35OZ(k^ zT9%EU!{8?dZ#(vEJ0m`ct5$W{HPqc=MW%k^NX~r*&-|&!jq@#igHD0Hw?eX6f6j3x zDts7COoLxQkd($41%D)WoY9USHW~JXbm;*qtVeDY^ZYWZ{}@Hy1g6)$73%tOWPM75 zV`CEX@-fZ0?;A;#oL;!pDPFi*Jt4bW^Kc|StsjJFh7iE;!`XGRB+~t(iBZm~bIxxd zrjNrLh*`N^B+}7WLM&C}a_GEZuu`%PES>%*9Dl5xq&gZ9-Ck}&yFYP6dp_YRU9`s$ z`1!PLqF|H4!4wd~wC`5r{FUs|f*mm?JiHX5D7n`Ox)Y#;rw%fJ2I&IUi!RQlZtVo3 zI}h1VM9BX+MbCkHgAA#kq$=(m9X+%sh?!t(AB=|{7%V8ECME=oT}T`3<|B08|BN~H z{5k0@-3mEhEZ!5V!L0u`us8ney`t7RPwJn-9LENtYNc{W=3wj>{T^QjEpR>7Hx88belc?{7K-;J; zA;jSSfHLZ7LElbbr*e&FM(h56WOs^Hu!QD{+s|nFEBAQY%Ph$G;0KHoK2R8if)6&^ ze$bSDcWxeHtZ}o`ubC2x<-r~WeG1F%PnO3O4!Z34)H?58MeBmj@MSMeJ~cX1L9TyB zLxOvv^ickHvxK|evk!83-1k)}SzSq-(LH_~Sou)Q|0IJ?)*kNL=DDRCHdd96*%m!= zyOnWwe(FAKU&{2lA=k#|@vb3mAx2+61fB6XM8Uk$iJwOGY37F3G%IjA1oG>$OgoNs z=e4ufUW18IU5l*>B~CET)fd-uPR{(}chVqsVM<9MOKip`6O806cY^aJbH~Gu z-7m2(tJV-1<{|ZbqaKBk@i4Fk_k94}SI`$}UnMoUOh0VD3lduNvm6$I?HmJu|JnB{y*-Mhy-6(~G+KqQ)H*Jgdjc*{yEcO~!K& ztctMEom%5Iwrbr@zzs(ru{%|hKMx#@!V`=XwljhKh?weiD=xKU-9x=!=!5LQs2HIy zj75HwfvA$C+<*G@g%Q7G3U8rAECOU(zmge0=jp-%@rm?&QN&{}EPDA^2IPL3+Vsqx z-KZ44;}h~j7k4X#UyUxWWIVoi;_wM2Q%y>bh-};W_IlLVO>jl8-21H1{R)Ux3Q)YhsG-f`fbe)F72IU*~G?x*FJ+B(L7Mr(1 z9Nc-qmk_V5JPvG^JHHW}QuO<8y#E&lw7k3Z0tw&3{$CXm94pao$_8oovS_{Etc>ai zOAcvdk4mgbbZObnJXtZbE<{VRgCr_AzMj_2bHZ9^0%xMQyT~7)10t%L-oXnXBp!yd z*%h39dGMLmAf{$^=vo8y4Lr}ZyI<{8=rQT|43P>%78Lu|+4Xl(358Bvv1VSMZds6c zKn2^8|Cg>+5%1qJYm3PWIX+!-(>S8ITBT6 z9TIaiY1#eQ*cU&({8zEoKH;}x&&|_XZSC!oi8T z)l7#7<3szk=piY7-$l;HtHJn9xn7JXc5&UM&?-|(xlTvR-2CUEFg-lci0Y$=(s4Oq zh-v5G+=LJZ%)-KgB!K{qw(lVYS+voEA(dvEHGy86Gw{A48BbjcS@*eu-jImXAjE!- zDplCtde~wTb|l@jo8#d_c@qA+^Gf#Sf~hmbIy1R-!WTp}>EZbw{#b{C9IurjsoT=I zM}-9GCIy?#Z?&_&1Fsg=mul|hGFn;_jwc5+XvbM zJ)Z93n+iBB&77nNIB|#uO`az$n8LK=bprFW0$c?n8zX zMjh2y_YOKJx{nn1o@=xhW0Upd$Oqq4n|X!LOSU^uVxk59z zm<8o_jTiqkYt4vJ&ZKjB$Knd0=_TJ2g~Mk!d`BmKn;-pS>l&8IV|l^1su%d^A3=KJ z6t{Bf^0b+jt8sCll}Hf%IK|UFh1<=4d6m`hyAk^Ubv(6 z0!B7BRy$dUhqS^+O!CYN2_BfhR2EcB(c*R@EkOm85envUsU&MNQ-tB(a?K2^f<_oe z7aiS~jd7Z2?PoUFeG&I3(p|{S>e+e&15-gKt5a{ra%W7n;U{r7+#`=>j)q-KY>oyt zQbSOZ>7$f4@1mMrf0g%Y@Bq3(!EcLC4eU1=$t%tKON}HoG@_rLx0z<+O~}`EBdI9; zL&LeULasXSJ}%J{%FQf`T-m?1^t2}n>%n+DF)7tmwMv}LKh44i)~9f1+JN+dOG?D6 zPR$}dJ=mXdi3pGYZ`ZzAA!OKFZP}EglBr*`iqUKWi0VJn4PayK+X|=hi_C7M?*JBJ z5k8}(yYBUwVZ**W(=31yG&Vx`bLTMp-yhn`V`9&cEp3g~b-LSJU9W@R06=q6%_xcf zERn!0>YcOC$=F{y9zVVU+TEdIws^KlWk;MD}1|Zyl!CO z=R{=gKIV{IykDr)(AWWUKSlyrMMr+5ej4+8^>hK89-Ml<%Ol-qWN#@}064$AKI-oc zXZDK&6t;hSvS(CTGk2R>bImTK0(Y$G^&!|=<&F8AezLNCL;_7wv~Ult8TCm-33Ee> zmH@Bl>dg5pUj^3kJqXh~4^0VqnPpe}?=d*iC5Y}@YK2Q!^1khv?^m|k75jD=U6J|K zY~YD6CR^m&9Tw|)qfgZ2BkZ5{hXxEE0WqtZs^cmOtyZ_HbW1~zS2A*A>?m5U=76u0 zhFTbaLWUF%{!?&aEI6eVis<(C!|A)Ud(jN2UXD_CL9lmyuJY(^iTxPK)d<3=R&s<&R+F&c||*^hllDicN%*?xq1!VHh|JFCD1x!lcozE(-(g>e=xnVzA9fW&-VAvl)Pqu?z=>^JYKAOegfsNX3k#L_L;ywWWGk^u`SEM$QR>J za<+JS1-%ct>;jjDi@p|oCmv&ccHkad6%yzRCb5}~>q{_}@uawr%wH@*A_ z9;e!@awzm`Gd}`%)dMKa7N{c=3CVhKUQEf|e|}*VYvE7H74Rm@WAbrgdj_DtaMVAI zRZKT^E-_)I-|6=v_Sh8}{pRe5%n%e&h(@1$JnL4ZevuQLkehm)TSe{g@WfRSjhz&T zg^XQrznq*+HkM??-@k?l)(5~Sxp@VH;*5kzi}}*W19oAyUh0Gn7U#jVI#QG1W^-_t z=*wANTr<<8b`kCMS>apxV9f7h`ss`PMHQDzX83%5uU90JZ1G^^!;$!D*VbECN2lOqsj;jb{MAp2O$Gk=eo8YcwfUrB>5dK} zmdQ^aY@P}6BwrsK8@62CsM;BRyY{i08UHcB{dV{A5@5vnKG2V9U+^$ZDsrg}4f7&8 z@*QW$(uG>|wPVT8Gb7m5PDGo$(9k=&5cleL_JRl6>+V}_%Om~IBwFP5t6G;wr+kSi z*W^Tgc=}&^M^dguUGkH9zzscbJ(7UPXY_-c&Q~%Z1!buu9 z2g7I-er!q)qg(&v1DwZte=zb(A6r>aD^KsbK-0x*zh6fBv@_Ln@b{)DOIdZLnxE?T z4PP8y@TuE&Mee48Qkz7fs*cFhW;({N2m1w4*ASdS^a1$ea}6EC*JgTitj8hXj$ZUL z0l*bOSt}0N$r|__PbWoFyj*t)>0E!6N7s1_Gd_*Oc?O}S8zWL)m!y|(fhpeUtM;DRSXWto$ zN5@H{U3;6hnI`hl(B-~2-csv2NzzR24=|S6FTa0ro7_bJ6lk^!#GHJTh-E86SD6e< zyttY&Qqz2&pwG#J5#<{G=Dhich?ntFJ&A%YKK4RDhjFoy#K&B>ZI@E$&>6o;m{Mlg zQXpM8A!4vnP+hsUB*B#N8=#Q1KlFoUy0SXLdsqLWQMGcB(iNmA0yUljL(x)30GpL- zNzJ?b5^=RT0E2thPb{$YEK-2>;J|V@BtGu~x7<_pJ)q-A=(Q4PX^fuqaJjjov~S;j z^+^u(+U3b8Iq@liee?wmYheLwQ(;|9#w)E)d+Xm)bR7F!eB{1hetqYI>mv0?-$1@q z7x-Mc11#<6@|(GPALD2vQKg2lmFiEFBEG)42*gW)Uw86z6MGP^AQCvUjHHsd&_WBVcNJe+XH5Npzv;4e>G+qazHHNP+%jQb0 zyLfzual>?q2c$uSP)>p4aC-Pa#mEk5u*A{ZjG@(bC4S(4>wdTr?FBo`P<6t*@Y|U zVTUNJu5PMjwiTfP+Nl4iWuEm8WE4ZbzV+43Bzy9ijT=u4nP>g?BAIQ55DCW@&QV?N zv#xuVOtSv__nGcn+ade6>rdFD-j05_dobK&J@<%F^y{g32!{SAFk7Dia^alIjhlY9 zVcKSlY&45>3~YRi9Y1nn7^V7!S#9HT2W?<%n`Nbw7bb;@CLuCLZf>}HZ5kSq;iQRi z`>OmVlnRlR*;7VdpBPw?xAG#V;Nxla?P|X0J6F-L(P+_p8kHXn54rc$bmO&;Q<*x+ z>}jzzDCaGQARBEjD6yoB{v1}8cG2d?HXC!?#t7Py`MSL)W{-S#1Q}!!68N&#>9_Vd zjU~91bD{~XU@|Rk-r*^aG?eGEy?Kod&`iQnak${ zWhX%3a-CoE(Pw9sVjZ2@-Q0=ye@I$fU`+}6k?YKHW{o2YHTLR31yP{k@cN@jy0B6k zl_@7iIPcU9$f4bLt>9~pP^8KYjg!*E_q=s6#>{NCKV}~6aBQHj?irlFO_zMuk367r}ZXLe43_kpM$bQLrF$zChd)EA0T zI)gnss~C2xnVVx{KW$4i%D&$9tHtxN$@4csZ6V*9wVS!l9`#FL+f$(Qz~@~T(u)R; z^hc`gGBkf1I>A>OTeu5f!7Q9Kv8sx<#dGAZSQ(3=Kj&?mGplHL+wlIpLw!0{2R|ff zXlj0#tJK9RD=P~y&sD_mSv*j$)Y-V{Am?uO9XYYFvv+RISXx^~+q?OzjFRu@iHlv8 zbE_|&yj{Pu_e=NNUSlUdc4yA5fTh=EbPqE5Q#^;)ACK>s_1FD^4n!UN10QjUVhtaN zi_p8&r?@uCR_s}BJ=z+&u}$<(q-~8*0z%gtO^d0ydaj?p`zIC!^Ya@hAxm9** zT(%5<>2u;SZO9oW;>weuD-fft|KW~gL@kmxcZj`HlsbyLK{J&H38mrl6@ibfbN;>T ze$H1CpeAx+4oq; z-d01?4Wv5NDIz!pu7MOjXv5a0i6a*eOXFsJbca}8PenkhU z?aPQ8;M0OZyWR6e({ExWPuif+d)J-*0TH+)eQc2oX1ByDkrmmVjQnifD!+iQ@RJ32qbX5@upzWz zKM$sE?!H~mp0de0XJeaKLID(l9J;DT2F(vrOhCJSJ9S^Lru6kV0MP(NmP)7B_>2wr zB@!LhpK+sc8}bodApp6oAN~_fu!EgjLP-j0Ac^~JryD8*iG+mCnu*+!L82h!#}ucR zDHY(3eiVA#6-4Gz24g+%ak>FS&7W(oSj~Tha=1&}*O;zhair|0cCcq+R{ved^?j}1 zN!8ehHXI+vcRv+9zoeuj!XNVc4dRKf+mpmk4pp1pB^Bp%TQlkuLr(F zD;^PlVB}-LYX0dW)9%&8|2?hTV zZ@U>kYzp+Kp|-9NPXis=PC|`0<3x6jXzXdOHz+Qz&VNu}7(Z&4I(kFd=i@v^p<1S= z4qmK(8ffJnMc`#U%Y}gWVD3+HTw-gF%7jsI$N>6j{Fm8|V@Sv1I#c-XOMRuIocI%C zm+?G`QfjfiXXP=FVw;Y3X+slvUeHcww zJ%Y&hj5K_CMo>ZwiHF60^+htB5svz6w#SG~chA99S4W-=`DENp?6#oZPO&?O=EpK7 zNgcP35yNB;wje>ZntQS6eKO7TXtKwD`)EWSv6>VV|PbFZSCwg z+EvNz3{>D+xN~G;XHtn_5$z!GZ0sfA$%|xBpX%i9v<~N{m6RyxL~LNpps3mp-18vx zn%`hq$t-9DODk+3>J|a}8k02tRp{`MRlwCGDn`(Wj8vzHEAx*Ul+%kkXa2!dTZo^} z@z8WEt2gM6DWhU#1ZSQl8&5QqlsU})6Xx}8)YOFUEy6gDOfedY%B{X{03nB*yx+OY zab4@d8SmI+g8uxDbq~1w()Z{S6&VNR{-ca#S0mjQBxLsK358pC0TS+s`7ytFbCkl_ zrsGDBcZ^8}?Im%7&&^@K>rd!{4Y6Gu-9*&t7$(G5uCP}oyna56)6BQ~ZX9iV(+z>**tqVDwgdfHJZG5E|uk&m>ku70b)-lOrlBYN4Wt7>KY zT8Q>lKg!0tz=nh2gBUHTjsV}K*vqw7z-4mw`x;?i`rAt9E0RB1=zTk@K}$3|1{=zl z5}GM~!YgYJ#u8M7SSKAVfYW*JpV?37`h~4z#^otEX5%Sm3`M5q;nzFUAitI~rW_{m zIMWf;m#K#?Rwq}|ro<^cc3Bh#E;W4>pzxWgK2kK#taSr2dY0fM%0i!CL%?nKx-Z_< z@%#i#`mKLl8y(42aXaIV!DYs#TytrhL3ZEVwXpF0CH$T49o|jrWD(xrgVPj9FiF{q z#k(QlX3aU&10Fi`M$rnTGVf^WiVs{Dd9$$)DfCF8D?tu)9P6GWb?YQeJiCWdPFkiA zI@i1vyz7q{d*~Jzv>l3(oe#b|9EAXS7>6mQkgHKjaWDH3&5_rqW zfoBd*2PDHUS8%#$+QJ@V#3v-@zUtZ9V|rx{R@4_4yBm8t=GjSLSX0s8^mg&by1qL> zQ23Hki~i}%>Qz1b6@{LJ)obzclPHM-jeb8>9;+m~2iqi+ALKB(wYN@rc$knY&s{Y;&+A+WhF?sBR+E4}TrtA(1;vp`8}(gg+> zhg^{W+Gg$rLL%V9m_cX)>0Dsgj)l;WE{>a{f&;ZHFA*8*=4jcdc8gR#a|mE@PfM*I zzIi;C|xrMx?}igwHaIV;)e!gk;#Q0#F0DgW1_E1-x#Hmm}E%)E{lfK-%bfWBTRP3~ynJVa`rU^~_@ z9nD0Nh>WMoQOUCM+UZm_T<%e_S&D4K8`?!mmDJR~a_sWbHQ`TX4&hu(mrwJiy^~d+ zu>>tyD2FAuB$VGcl?i`@@^M`hO}f z^TD!Rv9n?=Mvcl7zx;$sr+KMX^`6;hG%WH8FtGYghqs44lmLE6WCrgzfp6JUEFOUI zm|$5=&Vw>j1cb(b*1OnJ{#CQqS4jA36#5;0qgFu{3Qtu(BUMMWhug~xTl^6Zr(Y+F z`96wxT9Ks?5L$5s*CPP^LQ%8UrgYCC?N^FDhlfA8GU+;ez`Iw&l192Cv;y5TiP-O6 zhT3<3oEnr{ME^{Ee{ai+$W}Ip>Pv(U5FK(r4Fikz0M;h5K7ZpGF8Mg&=tUSBhZ|&T z@y$)Bk&?W*hL?g@aTheDQabF$=-k7AwN<12E9PvWr)jJakJ+CpL4*oEdg0U@qm89T z`GIGAz|<|XW}m-&@Vl`on-L-q^5xsu{K8YY6@DYyAF1>O7p(%NB5#u(nzS+9%pVZE zE_=*_&=y zGak?Soy2TBRSe|RD0-ej`5{kM1}aJlxe!?`dIpA#9@iTJ9{86ho5e^fi)P2Ih~(D3 zy3D8bcnUMG0JyN1!MO7_%d6h~_4g$~$sq0~Vf|z3f(w@`*}a4-;nP}jlx9eQp7v?f z(B@`dkLiadc9lAO&ZZx=L|loCX(6yWMgbhSkY5_+MLXLUvz>|4T(c>T6->=nQL`!a z3vtxcd`Od?QVBtk>iTMU#a|ej->q=Ccunc>ik_fr)iIY{qqISc$cFjIxa`V9iyXb6 zoXcx9>7k)B1fS@69r}uRF3iZ#%mj6fcr)3Vvg5x|RZN?&*x-IWngT7`qmMqGw+@00 zM#Fa8?YAjsx;XLftXV!$%AW+;t3~V7eeAf`Ut8N<_wL-K|aA=T9Mkjc!cbVX(HHO4eh^ef62e5Yb@HB)v9Q3|RYqe#S&7VGH*~HdS0cE4`TZr}>{$N8w2|ZRlNM}{ zyi25I>xO3j27R=)DQCVc9b#R=Rt&!XmW4mxm?{Or;(6xO@B$-cqB}-5tH@rWs+r+N z+c4X)w!xC2m%^NyPy6g24i7)ghS)8QO~>F%L0kS8#K|eDWBlph#~Xe zGTqv&j5lLGz$AArG7huH(H?eoL24b-0{_wMn!-w@aym6JL}v)^Cgm5<(7rT&K{e}k zexMU7OV_d&xDLP-VwBeBJ*P{N5t`c>FrOfxbtm}2KzkiXCZ9l#MtVm_$0sICzx;$7qpYYe<8{=YP3nT zhukP6Z{29pQ`=>gm%O(5G(&3WGU)WxA^ei*srT@nG-EUvALyXj)B&C(5U{Sf8a~p- zTG1n`b?7Vun3Fu4_zTbv`AlubQ3!=c8cXPbGt#P!S~n&vtvzgeZvX)oBn^3J_CAfa zAyfI=d;L+ii~0U7q0xTfd}7sqeg*OI?Zo7`ADfHZ?h@#a3|yyrJMWApy{_`t@##IG z`QI=#nhUX6nsKzjx4lA9zgp@9#qx6!Yj<*AkSuNUZoaJ=bN@YAin`A6c~A!`EA?Y* zrDF?z~hV_VS-<9M9SuDoCTRTqmyWqzDqnpKYqsl|HQ2woCBENmqmz|3Pd2#hoiCIMCf+yIbfaO!;)S1Z9VX3 zLW=E%W`xfwV$&tt+nSOAD z^VLuYxrJ-&&PtxKdpV=*;N69}v%UWP>WdAp;4Y8;2;1Y}?GB%q2hCe@@ZZ4+{TF1f zGq_yx^Nq>)e+6o!@G{L}bzUq%?1!KKCk$ZU66taT7U=&QM1%oND!|pvy@A!#s8G+} zM;_Rgo}L5={6W$Te}FcMT3;AShv~;q+6fR8T)9r#Ps#~$5f^5) z%&@bRQdeBEekXNCXI7c{zP|-1@m`n-k6>Ld^vwb-%K+7q-nZa3Qb4|sr0ouv=BD_% zaB)8+#gT&BZMtt|iC%rg?K=?}oZkE=**$U#7Eqhk{fxM2rZxGezo7huAt_$Z-@26` z$IHnF?&h6of<=`zVL3Uu?{A2GSHzs^!Q}qCDG0Ofw5^lgb-ssZBAP)+eHNd)vazTD z>X#MxR_P`BsdOL9zc_)e@yS(*szcl|nmU=31)wV%33&g~|N2r4A*C}>TU|BkPR~s1 zX4*cfjAbS$`5sAo%xorMVvCV+SM&XB%kbZHK!c$D4-~gko4l4AQm=vP8Yihj!&P1m|Yim=YdI6vbkbj7u!(f4M3Gc17B;h_BJ#I-a@9un9^}P z(L^el)A;*>LTH!G-YuQtGOUHjehSn! zuUO|(0jZ(VzPndm^7VMW&URgR_~E@2 z)2HC3JDDMmf)m{><;vkQr52(;L5_metaNG%7u4gVTJ-`s z!!Vzkuo+`LPSBkYF9&9Fx}WHTem@PFts5B`f9DaWY8Y$^oHy*pF}XSnzKm5?&pQXw zeh0N>l`9_qS-XHx&oT+^crSAXshZ3*Qn1v=-^c_%&HF#NuG)gJf4cfX)YSLJ_~aLQ zXu06=2@F}B2u-yoSj-`-#`41hln6Th)-JR+t*S>NnYsMXW`*33gIP4rm(upT%fa4) zo~qvkxwhJHsH(mWP*ou|;_P`l+I24kLwva#x0b^poSEPd$TPRlPe?bnPz0pW@wCem zZ!~t}iF;IGy^YA6ZB&3;CMkj*$xPt$Ww6_duD~$JbTUxA8D8;>{#S6h* ziWj%y5?qSACMi}NiUx`mr?|VjySuvv3x2{rckg@V>^=9(olHJ3NrudZt5Z!ReDhwLNC{I&M(RN15 z9DJFoXg}mV<_y6#4tBkf)1=KsF0jcuTvPkDEk+Shw zT95nOMbbj8WLXi~?VWT|C#oX7o40jt*ul*>%g5|Y!{ok9$NWXypvx$|GpU*7xf3=) z{5fSXPUVm&WGCzR>f6%P744oJ9mV!`7MLA71VX5|%rlQ$;3#5Bawi)?VEq}y6>$3# zW5&wfkk|Q=E-l#yfDP-z?^tW(DcPtj05F|$Zec}WS1Zgq1aP-kE#T_ckpd0Y{&BzH zc9HDDUuda4Q{1YYh*q%X$X55_Jj?693Y|XqaD$#zWmMsrhyp4zEg1iXNEbvP<3F6P zBG|Qembm;@a^wez$$+ARZQHX!yu-gXgjyI~W&&NR@)~tkBCqL12Tb1vE)la`=ZJM` zOnh8F;LlY3{w?%-h}$7^(8t5>#Tu;er^O3Pjz;aSP6n=9jwHAtE_*Hn7t!uk-p0w| zNI}|I9prGd3ab#KtI(4(QdAC|(>wFI-bdN)c zLkB_ptwl)}TN8F|2%I}f-^o9YdrL}MGLE^WPBW&okLcD0P6FN848pkG#Ji{+C9dG(_f;KIhv6FK@|HumlMNaXaCOYe{Ji&+%|_Vbz$N%zsnY}!`qwoG zf)e+iT>wi46-GV(%+eymU}@z(vka01Y*aC4yx4PzGC>YYZ3z_qBffe?Ta5I$kwnlb z`X&ztR%rv(|Gw+~a#%akZI8|s%&{>bLT9$B{p7P~23N(KKpbVR^91+=J&2E*& zEE2-IOLjb zkSDn+Ik7*XRf!Vc_W<^-go<~-er(5?*m_Uqx3~X+@K>k>pQxHt+UzqnTcNuPdL!dA zSIZ*MvbdO!xd{!w*dSlM{&1;w3}34ngUH`fLq8+tr5TDlVlDEamh#{37D8L%g((S_ z;Z<2%eq9<&+LF{v`Eqe~h?b0ZJM5OIxBBE;@g(qFw3Q0R9R~jbby>o(>6LUwg^M#+P+zdaQjD zP0jo_z@ZLP+xnr%@A2)A$KxBbkl{!Arv#bry=4k?ygauzzR_YL`-vK~n$Yh;(;``% zo|j=0c+&`e|M>G0WJ^OFs$M4Y%(_i!=enY>c{`jT8>4D>c~JVp^5>PqRJiV_n!j)nbk z3Z!&~c7^ru(7hDN3$95fpS$(i+j|nH-#w;0S{I?@2|@0pNeZMTqbNRbH!+F}mSV`f z6TU>aRcuv)@czcFcv=f@`c}WqIKYYbAZ*LF41Wc`XqL_)3S86J?9O11sF1`KO0h?( zrNzBNTAg$B6>1r{M5gHm*_4Dha6QrtQ9jjbZh6%X+0!=tdXpV`KBY0#^XGYmi;*1M z@BBS)yz$gap--$@`hXe6ii_CwEediGi1#7p51ww&7U zS_aUp2Nj~?1wDuMhjHhejj9MENQ0R!GMfR)kP)6s0R+fzl(dT_hBm-py!F?scL+u< z#kY$#UugV$k1RzddrNbDt|H}PwrM4XoIq^(w>1BZ#NEkF@&2GSirBCCPsWqy5){hr ziH?VvmDiizaPm{MtoVm%jAk~#f2oSb*hh|x1$G&VkpB=BL&)3`@3*XM|1>tp^&B_o>@?`X z($Y+|H6_ym!$QzRX16Ff3eico({I#o&&mL*!UNy-L(@W zJY%K_NkYpWDB8|2S@}lTGa63DOhs#njnsn zFaViken#>7p=;uYa=<-OcDF%lN?H#8$)W@~-CkGP!?@+H7P5vnqk|9R2Iny7=ppZ- z0Ryj8LFQW`K+Fw-$SKO5-4-Z3J^QxSCYy6VV_#k4yB5wvMu^)_Ua8*zCPp>%yna98 znu1r_xA5%iS`LycX8mDhhcR&g2-{aIb>uBO7i;JH(2ItkaQwS3yPf7Px;MrYQ$WvI zay)KH3xTJ749DdcJ1P#@(mnV`V(yJ^u{MCd>uG)5XOoouT(8WS`O1{Hiq~4gu%~K( zbTZ)GLob&{`g+PU&HaIXB>6{;>~puJY-cbcIrz;7Bpd@18TM_g>}jxqor>lI7% zmX@~1T1f1JD>~6$e`uvxhBQ|Mj1HV?xp&ynp|QN0dJBen!3dlf)V;yS>A9)yK;?H~Hb1wNN^eL#g8f%jTOX8lEDx z98(h^=G=vhY>lj~MPlZPC3@AjB^^o}*stF}h0LSH@xQ|LFGqcEG3Y5Aw>nJusA+sc z<$cL+kpSJ!%}>XLeA^}Vhl*KCf+ z4$&_|EZ{eZ@j4F2URqS>Y1fV>5YJ4Kw(%A{rBpIF3-AJa7FJmeYh_ioLjaTN_uw1PkgQW7 zPI|AA@e@DFSIKU0N&@cT0RFW8k_$p}B=T{GlWE6U@!XSBJ>Ey0;Ni0r^T#MF6yFiQ z2FbD1+^b#2t0r`Ne|S%xo4Y1Z64b`UGt%+NmNx-ynSt~g-Ec&T-ela!(o*^hb57U# zAUjb<%DWDB4Ll}~DweiZ2%}cb#;eFa`2g2Nmg<~ zK)Jbi5ZA;TE&N(-lugn1&mlqCJ7h`^xp+$XVPkzc5j>XQ$3v0PaYq^XV~!hb)XjE$ z;$hU~8y3X%K)hL+8XFQ*0b5l!L>qV|0@NiI+#w+NB}?G`g;~@bk6D0uVKrJO8ILfb zWlBxCHRHWwyX(R4--}j*d;)yw2NQf_B`$rLxI&<_-4E|HTRSa+Rm`*_Aa>+(aU7|- zISjSu8&sxNYrZ#xBDd|-74qg43ErW{zQJ&Z9Mz7hBSAp4d=E+1TLL`mSYkM$JLLB zpyh#y8C%nGFj%VaNgAl@lZjm!*lxtboN$=Omvg$+_#*6LYo4|QnGiQuuf=NZeLmn1 z0l(00ZF?nA%4!71T`K1IVlDdet5e-6A@N&B_`8h(jhg}5C1w_I>H>22ND_x)Qd?)Y zw$B*2X8#!#+}~2U=+^|_^OdZJjNkNLVJNv6569v<=~h?BxXm=KM(V6LoH75O%pR_w z?D&^F~UytIR!&cx!}gvO?fO`NYI5HE5f>A`4fesY1ZdV z`Upg|RST=+SuyU(^e37GDt&K<%<_jCn_VDX)YR$7EtJ=6*$PV;1_YdIyCJ4rH&w+V zie!`Q4yxSWWpoD0@nVKdz4HoUU}rZRv$LPTJ??NX-mIHxA-h^%hZ9~FgwX@oamX~} z=rG2E(T^AyV8}FAsI}0)aPwW2g4!ZQ0ob~V)*ZDBn40fo4Eh`uEWB|Be+S(RGzQcF zGmI4^H2GI;7L#gKUt<$E+MDMt;#@>LeA!(V*eKwwqj0h-UaW(yB$>PRgs!ZO2&_+o zZ4uPndn*R|^!Q@(k)zn|*2i3ysnB+=OpL9|l|-c%eO*o|XgJ!wlWRNG~Gw%q^7*E!SZT+FpuaG&y8!BI&IMR2^- zWEHUOA2DG`pAwUEBHHF)SB~<&qKS`tCZM@$1=sIc0=v3xoB8Am)$j{)%Thph80~AX z$eO@rcgqmr8yuy_!AroS^OLz0Bs;aO2IWQ(l2ZlV4*#y9p7(*o;;vdpt#Qkyy z#2s+tx%=U6(}g!{iGk8mQLwPidRd7*t;dK#$0FDC1S^&8u@0P(;pv>%i%eN=n=GY% zG(#Bp*sFW8EFSozJ^y?;K0k%Oi{ZQ+U-Yj8?QX!>c88`jzkdNR4>g`4RL!A3Sz=7T z+``Nxv*7op0LU2<0Mw>jkHJltJpO5grv*#nc z6Sz{w|JC&mf{C0gUMS$O6H@+nf^MhrOxJ5oYH@M?hgYB9PfGC2*bOBp*(pA~`)`j{ zv`)mnh1mZ$^MC%Eivx+YT-AgoqR3!;7FO=IFwSZ9N-?L&vuZ#5Iz$61GT=CM6GY4F z8~L|s2WE!?sY2?+W#(i?c?t=6vpO;aj7>0o<@))1`4EaaPJk?{P9_IG72Y#k@o^YZ6lkDb}zXi-U z;%I-&xEKFUE&~;{#;kt7U6R>f$jjv0qCN2C)Wo}!p^(;DHG&F2U^u1m?;!w(_IxHt z7N;?HV(;s6<`xCTQ^x&kfVjaqMfXI#V96L%>PGQui{^?wrTRVtj#1I#2n&tvsDFu1 zcwsB?tVb0;)$IIxRbb1(!8taZ68?Q=SqzYFXF_VlgalMbNf*4-*?OaXc?7!np`@6d zwMrG9de|N!#xF9UkpHZ@>x^yhZ-sKWBVvGej<9I%QD)%!ElD1__hb!arbpp=yVMy> z@iRon=CQC)E7pVUuiPQ0h@L0s`wE7>XGfodL6?Msxo3CkpwU+H#`tF3jp^~$LVu&L z5+4vb{<83McLJwP5p9lYKL8R;;bO8N?&1}xB6B`mYyT&7K-sge4cU>%pX^U1nRb&S zT1TM(;hLR)A3z{b20arwbnn)AFCkO5T4B+Z)ZQ?~&7Bs8pa9#5;6fJIXc>a4u@fHF zfA%hJ*px=Vll-zeiNF+Mm_VFj(Ha7*2K>%0o~?f^Tvijv>;tM}z`i*AIiijdjWTMX z=SUxMGX3hMu~eg@!4??WyIIf=5v7#@WkvJfDH3aBG1ISf+!|pSGv*7;q&axTJ|x~x z>37_l*EzN$c9K(6r1(3e!)tro`NQzY85~dew;d-abig~|!D0H8Lk$S>(^Vz_5(cBEr?%#imAyran=-Hc%iF=lu^6Uzv2S3M(v*$;f59 zP}j)b-WsPt!sy!e@Gg3nT;ry+j4AlaM4PBH~)B2&(DFi7( zj|g`=nGIJx@d5F4Eb;f}N&16};pAjQK&qePcO_3Z5cklY%!4PI`=Gwp)L!wH==KKJ zNcw^mJ4MVfb1Hi;>l^c~yiqCr0Rp&p>XIa#8yOGVHkz9ZIGzY(Y^LfF5vF`&uf;Q~ zBI|}`k$Gm5Ro<7T18d1-=t~ZzYwxMf#c10%2|L9qmOGi*JxKTyabO#DKScYHIR`TGeC1FpU#Gc2AY|KYTg_pwC3lMI$Mo2XO}-7C}~9}i=fLSJ+>1e`}Dt>2bf zCKcL!b1lp>RZ1Azgp;2aZTT!q1hCy5{-q^{6cB0kqG6ud!<@9LDBciK zrg*YX<*}Vp2yjA@vF~@xZ=4WB-rf9?I_m8lrRs6va?-oW4HQ8?^ckCt=qSfsx6>F$ z3w8d*P~Wz~4+@-&yy9c}(%RH83maAGhxl6mN@8(+OiKij?HRn4~)=n+AvaHjcv zv-eDznM+1f`4k_}$NDXkhz7M=HlRS&HnCbCbt|+|I5f}^#@_tXfmJi{Ho8z&%2~)J}Hbs8$ld8<D+(Yi8_-e-Bf*1(fR0Ys(#Db50Q*F98)o3Bz|;sO1!v#?m~rQDQ1pPJH>VoxE@^_pexbsk+0w21uUr(}Oua_59<6 z!I4eo?%Oe0lL2>6Q>zQ9TRrj)`NX*15!?;L@9(EZ62$IvUgC@Y=4N~PWhd|8=aV}mYuJHi(f2{Id` zBziOQ%o`lLU2M3kdrUI^AFB6T{uRaWOP7lp!W$6i!$@C8;BK(+)90o0A-t3a)khI! zaui`CX3!V6RmTD)l@EV%8E>1o!LN9Awbh)exQGW?C|>|EPPt!M2{J?hmB^K0s*O#e z1AN9p;SW=l%!%b;3S`fkJ7^0ioAT~t?s)h0LE@TXv(;H@)UCNpdxVW-b%00jE-T+; zM|nWl7joy7;=|$(1OAK3+uX;l1C^J2d0F4?D%{bcDSiJks4RrENx({6_C_v?+NuBU0-m)U^+meS^LEf`O*5Q-^$ zbq(q(b8Cri)D7XP$a^yH&)9y(b&&wA=zu5TnB`M$Ihiz)MTch3k(MyP=w*jQ+ z{=aC2uJ2Yw3^QvEqzA~$MQv2;= z5y3Pr0@Yv9V@)$CEHvjzYQ9sovTCshVLb^wS(cgsk2~!rXdF$f;Kl+>b4VpEKIDSw zh;!ds&R{ugKrx1V<3|_uaLw7f+0~ScfywK+CFlABGQpbwJ*yZYY$7@9bjnLmV(LC| zZmQIAhaYl(+@2~2&3sXAl1FQ+kAI6Z<;vrdRR3kWO~j?uktDNBwvAo zz3Tt;-VL44pd~l?N{&zbtL={Fk8xzF8T{JV$}3~c^><|-cM~GG~TNQO;49&&+d!<>8%dp6?M0HG(MRdXjKR zOgf+WT&fTt#ZdE!*Py2G0eL|2>|`FXNXJq1>aW{`bDEB1pwW`h(XW*@Ta!Yw{lHfB z7k`;hEhki+wu9wlk5}eVY(H+y~tAh9MzscKslwi{B1@5VTk}!@|l9_zP^BE5=|CVt!a?gwK^Oz>#^pFuSp9ZZ*5Hws?e^5SK{D!67C}Z6N5&F1remU>+Lgi$vBxwxTX>w zffE!sQdH!&dGhXT$ITTnh?q&HDJx8~zav4DLa=v8BEV#&WvIQnfK_&K!6B5@Wk5=1 zckOiC_&N;7Hg1?`BU;dA7=cwy^t8Opv=xSJ$mJY@VVYg=Lpe#_lw%%;|6ne9 z$|$06)ne8`){i=D`Q|+obkNa1xdqc_MRL(J;lDAb}(S3Hd=ETU>a&? z2ja8qo5s^0!mqVYlq^yF#$Vp=Y;VY~vT9W_}nLJJ{ zmjR7Q(Fx3o_@KhRreRp>^s2=YJM3)st zPu|%WQDcSCVTU+32I_&?o;%FFeEqfn&=E5daT7vLPOLj!xj+DN~@ASWDKpp$s zO&czY)#!Anbfx6;;GiD~5!t~nOEA_|7{Ot`qz;@NDY*TuyRf#(_IAZ~9U?8#wIf1f#%Q;aCp={QyCxediQJAfzuW;JMQCg(xdSH9#R&d8WQ`yNae1B572;6U z3bP%;bqiTnh?1(QkUlTKLdvstU)OHSKuZ-x5<>XlTQv1ul}o>Sb4_-BTBMbNPethA zzrskr|B2{abDAy@v^d4Cf$Fb^d` z66P6-87XgJ(OC;%ccgEE2c8MWM}C=tGPAOoH#_4XRC|~(I^g-nl2L%-(fbj1SZFe$ z-8z6Yy+g`cIO?5W=6>kE#Vl;XNWs@#tTTbJ)8H#}p!~J&2;tx{ zrtAvtcq+8j=`NgFZ0%&0&R}eub=V76#7>RB@9#5Y;ybq;Aax5F6~*M;7foG=^jj`V zLtaWU_vWO9S{@I@K^&gh^Hqs5LipZ}J3idGVvjLX@7UOC5q@DR6n-$oUphw;UzS62 z2$D{&AdK!jK!)7%By(EF>=uf&Y-iFG z^%;uy4ax52&;bnT9(k4C12vo9wHNsnrU?8vN}a`L7nY(Rd-3N+ykNYpKa!4ob*HNrU_r}0a>b$!RvxkyFjL^z_-j59;xqIs9U8Z zcQKp}E8;GZKmM)cP8V8kjooRq^%Zn){A^$M#!C}lXL<_q=XPH?&C2v;qxyO8IJAF* zW18nhs>k3$(VzHHc&?FxzWt%b;@l&9r4VuM`A0s@x9^(WW|kls-j>GwTo0NR)*8h# zV>F$XpLBX4RKF@#-iEDl_K)-&{d%aat^Kx8V+VEMD=e~1tq>S&BULm`V@^e?M(E%b zYg6ChjX3sO=Gz=6_J+@PzWLpL%g%^5%<%YVWV_B=qy^WR@^Ub%4=ecm#aC7SWb7g3dLd{NTN@y zx<3VQu2TI`z`$2vAWuab(@S74?rd`|;K23NUu#sGiAnjgqB(0~?~;j~wAimJsFIE^ zbG$U?>~`b*N8wL8#*D>}63WD>H0K)s&eyWL=uL4>=ajQy=L1dFf>xk5sU7W5jDeF= zU?vvKi1YBESbfip*VSImWxodPyJefu8~|1Ek^5DJ_CQ!~u2IAfMfcqr7PRIm*5UcG zrOoS&_sJ0Qv>zY`E@i!*48M>!6X;zn?bP2;Gv~$aWbUYg;PANyL>=`Uy>^{A`D{Q# z`oCaTqqy4oCMkOS!-9hMx3FyX%G_p=VwI{J#nh}D2A%rMo`o`!Z~S`B7S9bXuQd0} zXm=N-FTL(|O(%Z5GJV$3Z#p|1k)vWLP(m)SA?IrOb_S2GDzEwlgbR{js}C>Scj6=l zJr=(QRZ0;P7Q&nsY~;Vs%!@(EwDW!2&)GJjM{6f*uEz@SSKFWYv+uNLpO~tcAg{B` zGGKi5V|PJ>g0?sT;7PpSwIG(s-9lIfpe68)*k+l*)MO%B9*f7FNOxabalCvp>)VE( zUTcN&czM;aVQ?l^qTSA;c9ZLB-!itDN?))+^!|%z=LrhC zi%Is~H7krAZwSWlG1Yr2gzg1Y_I?QqX3lCTPa%|(CEa&3EzkI-Fg_7R-lHu~XUnMFdzn_T_?l zEO&t87HLrCqHC4-uk12gnU7WIxJb_P1fgfzaa^>Zl#7Nb zIAIyxZ9$gUb}1Z^v^hdg?7QRoffrDq&!${mO#Dh zJ3jyHOigRfAed8mF)$Z-Z9PuRe%b!!=H|SUY(cRV;NV>s&7Pz{TBsQ8VXZl!srY5r z;IKntZ^!aq8uzx68SeB@%JtKgcrjrjjVYUib*XhBRjl%c(-Y!-Q5mF!^davOC(b) zgHUdl!MAR2WH9sOB@l6p%vrYwGHdLIu|S0=+%G1lKL|GJqqtl$B;r3c$dAuehwKor z&Hh`AE$nxL9o$BP-Vi>N<;W5-FaMDZ9UeM0`*}na#}uUNiPNB?bE4UphIGB)D?KAyt?%1OyaJOSRpl5)l!1=j0a|4NelaiGGhh0$S^O$1AJ& zpEblJszoUR&})I$rS8|~Jpkf|ox478XYv*Mekc#vfV^69TfxltJ)8Q?5kMFudHXFA zaB5(Nuf0|j)$tmhPot3J{Rp=W?K?eL8-dAoD-x-kSBR~a$Y;GP`5xx$@nvJerB%!VO8%fIclmD8Q?j#h)mz@`O#e;G+a-# zQFv2c-P0i~^VhMVJ=YU8{AuYul#MaW>@V%(j<@gy{W17Wv!OvMjg}uGqo^3?7jL__&^BOHLs}g#{Q8-<6Nf34GRjh`kMCb_PR)$1 z=-*d)PHmHiIVq-K2SP4!NU58}n<9l3^?)}sF!TP5*fEfdq?>S#$whmfSZE()Z;y6Q z<>14^+L32L9Rdg2xrH3txqn};yK6n?NVsuFXJyJU+3K z{(S)b_MTZjmkh=!rQy827zYg452YZ_S)O#Co-zdErdnLaSnh*U$8!c(>=S1SZ)B>$ zM#9{Re}=zzZPmw*1s0Dm@*5$Vs!Wg2JoTgyr8uKj6pwA5g3Vby^WuFaIMt3k)HYiP z^c)pkRE;k+Qask8=}9wJH*6{lIM^`c(OnU~|8y0OBLhSU7M;$i`Pz<)>TMz%?XvBiv34u{j|>?QJee_-TE0|tfnE1yp6^>^XCv&#@j1@ORM|26KO|s-FJ6d zs7YKiV@mIBD9IS^k|${T^^{oyrD$azXX+lP7{i}z=w5T#=31zJ0^K@ck1Yp22^!K7 z=-pEXW^j@T7?gToJiuq6|DJk@$U&Q60nNYM&JAA*H{+mB7)$5yXA#{4b9+1DMK{Gh z*KzeW*M1IozUulNIlt#m*ORB0D!|<2+`*rxiI8ySaKGeX#R)yPqYSJ~Plt7A`A5c4 z!`o#eZ3TaPgeASdn|sHX=1PdoQd+DK)P3K&g)q{;|8E6853yFZHHnH9vhj*4 zcIqt<4Y<_aP67AFYnF=*kq&N&w?Y+33T~;{#@I{Ds{T>RY0=6fgo*J3LIS`XRn%Yq z?f{e-HaW#{$ie2K>xiZkCxowD%7>B~!g%|E%O~=^uKxL$iZ)(z$vCz-bxPo~7EQ(= zY%N#XEFj^=bITT%s}EwG8&V@Aa9^uGWV?%S9Xsx+9W?D10Vlg^9+*>-cr;uea72Pp z2*@9Qn|9WcEdY*scV+c2x!cdv9kyl;?!xt@`s}vYvb&W`cg)FjSj%0?2+(R7KJbrJ zQwv9StpfKjXK+vD*bfTAa$MSZgMZ>tSolg0N+kA8+Mgx zvC840|Knfda6tbCeEr-7R6jfxGk;Rw$2asJtJ}o;Gs^)gc7FuHw3$@-piVjfsNyic zs?yDsxoLjx7UqEpS-ELvrWS^0p8)F4rs3#!4JzG#95^)x{%M|Sbpw_>azl6lkB87^ zbj5?w;kPQA?@D>*_d{j>f`$CJG#Tef^50(lk&c>CjnVkb zTw&g@`&=#4QmSf`k!zuG@NqrCsU|{dm<0b|34Er62MNMaLbJb{gftiY8hNg5iJx!C zSyM`!{Gh`>Z2J+lD%+h-HBHUzKM^{6mIBt2ShF~i*pvyFIa5q7VP%We#1K=A;TIsQ zbJ9s_)1K}`ah})#NsQC8NMv`;gtDK9ZtrJB7t70>A~CtWbgKJ7vg{5?xsZ3VpTa_i z{F2o7^r~MIQoOf-_SMS8uV$Qfjv@(qi@kMeJr&13*J4e0ae}4!i(wG-l4jCn*sqE) zvqM`_4r4RIP<8to@I`l69H;f?n8U6L6@lCo-p@2{TN+S;U5tG|)_v_}=Y8ckVdfjx zy*&)U=wZvzJz}mGAmX0il+(~XoJPO3_~m;Nc3I%!Jwwq51{+?`z;`}P6`BLuU!eI* z4{;M*y|yM)!p0K&G8^`j)%R*6lLi>4${nj2n}qS(RdI4bsr0{yguUElONSQ(zQ5?K zZ;r-wDwI`ar|?ozd%ch*t;)xX{8}AhQhVlZPw2>L!IX(V<%}2$pj#AT> zl}jF+lC-1Taa5Yogdlo`nbO4bXxnL&iOIk zR#{MM00JEU!z+cI`2LO|(osZ?fE3UEd^16WpiV)9xk8!y9w{I%@6|lP% zNjD^ae2gKGm6bJez?qpdXMtvGVUFamss=?Ok^oV(azIQQv3Csu|t6R>REOjg7jnVp~5Ko*~!hWb?=!6lOQ6P{&BV+<`3S(Ow_LlrAjnBm^%xlN)XKzv;AQ-=7)HP+Vdp7rgJrybVdJ9Dr~FM9`; z@1M&i|Jrlt-ON_)Ld~8DD9pFBM4w4C-y};~BgTEjfZTqaT3pfrC$yA`#}Yjjsa@=q z&kxXKo(0dO2=BqsEwLH4zb&u1ndbh@2Hq}(T{Mj2q8pB9DGsB@<#;=0J@6*%CG79C z^*BmUrm$5(&blo1J84-ckhdqm^uktMWB^D&wt?_R%kb9uUBrp8;3aEMCj$gC4=rFE zO7_SxEu`xn75+gtV;-R03t9JT(Lm1t3y=)IT#1Q99`E7XEp+k#A6HuWCcik~XHrXt<{kyv zAdw>j$C&$~P#v18>)_Z_4LhXRq-AQ!?P+4D86~s*&qd31pm`xBAPbK`@@9cY%&LHg z`-u6)Z!(LDaX5LQ^(&PMX;~DS5>8$>heQ7)fWz#k%=Y$DInvaaAtG7aaU*z;9>PVw zRGF&L2fbucH&1-!Hx5ow73|`c7d8epSx^QciHVQqg0xTxLr`G|}+wPADq= z5&J&Htxqh1xIr2HZEs(8$cPxn+3I(!x%k7su?Az5=TEnhSTh<(xs;pJbOyoWrcN-d zDe@&v!im3-IyKvHN@!bx?+~~3F%L)fdSn&W6kxah?)kD$Eh}%YqbO1<6&3h|cze~u zR>Bn+$IxXVDSVfm-djB>xtvSl(>tq1;WDFDUvyW2=)Mof;w>u_^T<5~bS+8fXMG_L z7I)_``&q_?KT7mf@-59gXT7wP047_q0XfZEb9=GDwL@xfM$t@gjU3oL3}aOB0r}$& zM&!$6?FF3fv@bP=e#mU0U@x;h<^16s6KUK`383pTiMj1w7V9V^`_RPMEi z+82TU&)S#&BL%Q(ZJ_*lmDbII9`;CH27VH1W-KOC+frAqCM-`9*F(CLN*x+Ii}F~UY5$ex@cmD;$nLVM zL-y%xSg8lTL!8~n=T7;ZU9yP}=rF(L%`Tc=*HE&Urg-$)ke-87HhCKwrnI4%thAiu zRz^DkOx^rw9J|7V5!JHdwlaJjNoClE{QL7+x2#Teq+nHS55KtrMnhMBx60I_H0w`6 zbGy=EgHqw@Jx_S=exp_RPpc-g6!#GlV=-zx#K@{2B+8je)OsZXbj)+#Tql+A zAbn>#r_Ji3M_0D_H}_Osy)dri{f+c(iLsa)JlOXq&{vTwa3d)z4K2a}IqU4Pj$2-s zn(1H$%-V>eS7+mNH1CpIqzsGbHpMV%sfqI?3QUNRl3gFNZHXPdVD|V4Y`5gMPFg@Q zJbHDWd^t(hI3{rQweuPU(I1oBeP#_5vU{z2lgK;7ubY7I&d=wXZ%=O6)4UVB?kYT( zfukp{m!t&QEz8B$z0vB>VL?|Ff4!c2W;WM{n(7>LyCHU|b19VIp5qsDmagW#jU6Mi z5mbB<-@^ZghM2d$2;l#Qki(5?{nP&d{Xa+=bWpu>3*jo99F)#KMdBrN~OJIJ2B;Ab?axnFL@E8z3=qqO?{gQi1gsE22z^D zeqvOS=+NlM)+RN!8KWeyPpB^G`!HoNC#-B3wXZi;`EshgE2m}OkzvrlxqbQjm`Ql& zV8%ovC2$4l^^Zvr>ID``qr<#Ot1~LDLowfru9FIZ+ksH0!kQxSHgcm0Wi6he?k+Lm z@%#HZQ{=gjz7dQR|2sWDmw$xl3=_o_Id|Wlv{ZxBg;ZcATT1!TV=lxkcz6X=AV=r2 zwSSx&@0o?`KF9|RpNqaXDYVw?d%|TtmUW1h7>CjHJUYCVJx(VvU{KO z$eHyEbNp9=M}wPjVwvaOm}(o_lS~cShd0!bga6qD;AOCVxE1OQQ)eV)4KwHq{@pxUD(X1tI)EtMG%gY^xIq5PpIc61 zHPkkLb&1BaFLM&fqos&A_uu<2q1fedg&()fef0E*`hq;gsCammddr?Db|t!^zoj1J z?28M218l6=9DR3)&_TWocn0iHaz;teaJc@=F0Gsr)0Img=cMnlLSv!XY>RP`+SOUe{iGC_wDU;_FDNG zVu(J+RqrEc`o{21U}xeoa6|oESvsoLxyM`jq0r=q$?Myv4}+k>gIRX?##xuwCdXs$ z$&1dM--BX&+kuL^JN52b+^1>H6`6iMW)d}J7_O$W)4-be5#+@=16(OXza6| z?kF1i@u4d=iD-*a&PG2xR=n(4*Y^oSL|$`9#-AgG`0&Oc3UFslyrDBB@res$ZMb2p zbfEWM`Es88T6hCVlLtG3QRm5lr!IMkk?8fjh{4{wBI9hmI$2~k?$0=j*1+t9Yv>g` z$`RWn(ZXzv0pybYb$4_pt0mn*A1e)x2j_jg6BoILzLco((0_v)LFQ>VN!RUOziQ>O z@ljXoh<+F3Xq4uc`6thEaz0y{Y~1MjN=;~~owUPm$2MG%hAc2J@KHpGDb8~pB^wVt z9bQ&xnzLS0Jy0_RxB@4n`2=uX%~atZiKlmOFotb3Hc|{*WP@`o4KpEIxB7hy)ac%; zBL|*0Tc7*9vEx-Cui_cxbkg5hap1a}2Pa?E42CyZ=MuJ*9bzrVVhWy-moPG z(wz=}MDM+1W2-SgaDZD_=}^)i@^0( z@p&TBb>j!1r?0PKHTQFdZUx?oyB_P-{I>MFaNtd<+QmTa@gQ!1clIHD;XXfiQD3j^ z%gr6KZH)IZpcNa#4mQsdBA>4|-q8KnxWm6gf;KxXeX}_sQ~vSMs9eP%>jjhF&wZ7O zo}OHHcH7NY%l2*ST4Db3#LRO1xc;d2TWQz22vrU9#zwLJ_5Q&#$k$f4D81^`Hqb59 zJ)+0!K~S*$^5V>jH6;){s9=TFx`^e%s4JS@#}=7wc6z#maWnfwtAhkfBS<2I{kcIU z1Xf=VBf6}N=7<QdbZ&unl?3h$*Fx7kERE z${9->SMx4w`kQ5G7d2%jj)@fZ@4>(RrtzL5dVxtOo)>#u>ptP#(!Y1Qt#-e;_R0{* z;vEjqE$am>;VXs6(c`LyZ60PsHH&i;s(Z>r-$4HXceB_zMtr`+RCR1T9jon|pJUaz zNhh7Ai>`>QR2hD+)Z!k~i9eDsSQ8N;ipZBorrH`0=1oVY5>+2U8hXHk)3&FrraHOk zoI>r_{uh#WCayNE$96P6yKf`e^ah8iv%t(&`1d_nnj<<-lV>>IwQK;6V1#o!r#WO; zvFdi!oJX-Jho4+Gnp3!CGHV1IMvs0U$$IeTVY1AV-QD(3GZypx3@vpen~nYYzW#(9 z`SqJ>A8>pyda&sp87?mh_er_R^f1SnbQO19$PKbbSm1)381uc5v>eX;6KdMZ^;+T9 z5scL1RzuP5U`HV7I3L&w;?)t>PgXhJwxJe$o2_0cZkfyK+ZgA>2&3oPdaa?9l~QE= zOC@0vhCa%9iS@)0Ntq}p_x4--*d_D>iu&0J5({Udvfp0Lca+259E@N;j8{AYrAAD% zZjG1?eMy_H_U2yirQHauNvR*QJvGeG+8lv1@n@UyTSv|4RQJ4@)I>ko-3bk(`A2A6 zTf#AR?GjiHeyo*$@++a@9m_o~Zgi>uHj!17HdZy^%EyxpdmlW<9~9=ZiW(YHS}T~1 zSs@MC$H85%DAXBrOFQ(-ZKXyE-KK78G`t*M(yH>uPMS!RnQEY=LQ6Kr(`zjTA3Avv zN4tG(42gE9VfI>MVx6HmHkd5NeP+Vch$NvD#F+6tl8{^6eN`DeTh69%@*+l9GM$!` zw#ZtW6Ek;KUDiK-KJw(Q<1JrdO0#de@+J^v!O1_}OsJ# zm`5G5NJEdNs>U#%wmEoj7muEZXm!$wW2@vhz;ScGBYK|WV;!|ps>piQ&j?O>kNvB9 za}F!A<&-2m!h$=V`W@UaK(Azv&e~e>peZ()KMUdcnaY4!kEpV=; z;$n>|M}=8bJwJG?=_Ah(T^yH(YKiYtq3T8e5x2bx2r(BeCmVzFDH(D{aD!Q)OK~f! zqDJo`nHn?EN)DT0F$sFgw-biC@koXRo~D`D)4)2|>X!!hk>sCIz!8J@e4Qcw+7^Ai z=wzAJOGvq~jbg*$V!@lh)(cp9Wy?iT0E-bGJt`iqzsMsiym()pXLYCP$;Biuw!HsK z1(MZG)?&J^eF7wo6LCV#Zud%nv_(?{>Pmbihq2wk|>7$go7R0AVcMAm^@)_T$2` zCcO1Xtya>fWd1$1-h2HR9C+Lygz`K{b6kHrQJF7_#G(BpzIJ0J9sOqUd#w8mgkU}h z$@a69%_016;-JGjq{-H37OWahmN#b^A_X96dl=ezS9Bw9-Ur&M@a6eb4{Sk{bB0=J zp%l(&|5p>Lav^P_ioUicNIokiCkv~(VO|M1`Gg$Oub6k5j3&p|R;imD?f2ye zA%}#@?X>PWiaXyU|0ZhAm4c1xm{)tIB#0lvJoJRV2CAusQQ_egP@IxtOk2KT#JGeLs*q zgsWX{nuS*bpo3T+L!F6gnPxqro5V_sIOCJE7iAf-2_3A*K0Cqq*NqCcDByEXB`L|+ ziY7)r&J-9b%%Ha^fvD?PyqNZ|))pax&#{_<6wuGKz6dBaTtF~OP>e6FraV+eu?fgX zzITanF)N1UB_&&D8RHZLQF))E`$z+#HUZRDv)dvSAJ@`^w?${ucJ~kB8!??*S@Frm zsD!{kJbA8t4OG>b$o~*te2~`4&ld18g(rs3vc=_LPa5;Fzzdj+0|TnXSrqDM)u|J? zj+s{(`ettz1cvNuWh2KKb*~C>bubMtDFC8H&I!V&g*T8!tk*i-GdFeW#{9!|y`QZ;>gX!Fp_T#jeJ9?oCucxfLl ziY^YBxR2J^r-vY;YuU{Squ;?n59zEi8ir45&MHsMVvPLhKJ z`_|_O7&@nf;D;;qODYvVv7P-IZ`-PSqWg71T#XpHw=6e!BKfG_?~K=}5uwHUM=NEl zn0eN~jNCZTOHHr3F_n(PaarxU*SBeB>Lnx?Ni2c7gVqB4>sFqlq>Fe9Xe~y=l-Zb> zSm6iIb!Q)Kl=BdeqIFo+GvoLDAtjIVD*CaA@s^t^hq~l@QF&ECeR+)9?kbWeLW+85 zqg;wn4sC(VST&^zr?{p35*0}ggYK;^9;trCI<7M795HLBrqsvAg8Wh@GZa`}5-m`0 zJs9lIse7GlqalMbA6S?3JGuwwl;&dF8Z%<`SlY~hy>t}y^iccE zd5jcc?L<}$Z@Y>*L}PYRYbIb}4<9D^A)?FY=L!9(QT%zuo__y?RtefFm5}K=dIgV@ z9i8IMzpli8$qmYpu4I|d@Vtf!$!z)g8Pa-?tw3BviI?Z5*)p0Fy29g>*ULv(NHR@D z;1xfClDnaCQh8>X$iqB{9lZmLuaQ;*h8z)t(o;XV^$kI$E$&HWHts!9$f8Lj-| zjSv4taOlLj!9Q;*K2a zFqcu<&YFD*B1ymI>b{V?dWFW1)ZvJ)+SHMVWA!Olp^db|N10XfEqZ^NVH$6sQ$2U* zPTR^4fs|pAQx)H#F2wp_Yn_N*{lReI{CCBm{j}eU4`Y{?bGIbtApH0y#1>rP^Bw_a zW1Sh(H1)eWx`y-P9LSfZir0F6!8fp(9W~Y8A-wiHbV5X%*p{HNiIav(yxuj^HqX1h zJGUt!(4G5Sv2AAd<%#4IeP=D;kF|4q19pfy^;&x+PbWr${dvC7`BJ>}dwS*35Ba5B z)kEf|r}IT!Y01_K4aB$%`-67r+v6O@9Iowo=vJFD6V=jH9)0O}Cqxf4J_f-P>6cF; zsXotb{N@?QHfIi^eb6bfa+`5cQC!MAsRezG92c(Fl44?88MRVF*vr{u9p9q zsM=6?+~MuU?<>jN|H8!U}`FN?3*o8Y{4iuP#7;qrz_M#9Du z2un;ctAgh6lr>YOx~cH;^@~TL*-9nvQq^J;!RF%& zNJ&bM*YjD~>GiRdC5`MYpD!$G9yvnVZQEb7yAtBBtWKD(qD=-0EiGgRK1OJg#5s?p zuLlEo?_8d>^Yfe8Q}Mxamt-Dm4JqizH{s@?pf}#qm7%f2c@v5~hebS-1pMlf#}b80 zUv%s&&VVV+cJd(hM}Pyri_Ye1`u3@SQZaz7sOo8jBU@%jIyKH8t<4{5|lP8?5CNv771x zRe}2>Aeyfcm6CLUD2t>0+9_k_T+-nfzi{8dmy~kM#~8pKLoxolYh6aJ+}aVT(I7Cl zsFv$a9<2mRhtyt1l#P!KZOUy)J)S~~ZpCVovGV=e22jgOIFVsQ_{4JNK2$vSBO#`F z3}<4bW%K>}>Ly4BJHlpKrC$x7TWv<Rz zm&9|?0fW>L{%Pfjlh;?teiz;A>p!o?Dk?69W^7vTt2QjI6gkVcK2?mH6=EnqWF)YL zb6!{uSXS{GCDMb3C_cXUmW7N)Bg>SdRZKI>=`GObW?OQYg!1Y zW7dPo16B7P+Jxp}=CY>(=Ufe~gopd0-6O&%ydlP6dG?q7!mG=p2ioftj%~6uLaZmE z*-`|5!{ScBW8P9F^O$Zxm<|O&p}C3nsiQJOjDk@AIAqOwSz$u$?po(cBW45h^R%}? z0qCl27R&8c;Q>6{{KpfJYX)w~2Qkr^vn##fit0K6ql}mu%Z4ctnq;kc4Vkeo)l|+M ziY5+15W6s@Evg%>1H*KY-?^__g_PbebY&`bPH$Fk^xzl0vn?Y&?YHR_s@vDo!e2!` z08`zZd6&#(J8wLBT|X@sebPmco0r z%fsvZUVV|tT=C{u6SfQ+m(s(F7$J#mkMVZT-*j6K2SMSg7 zR$QXD!geN5vv)9X`}i7pU$5|I)l*#LtE|#(>n#roaGJ{5jff&!3+vd4XGwYqI9|k8 zwF46(Gn|QIhEb%tb!-3rOJ%IO3h)sAu?}mw?OTxF#yRL86xUf*lK_$d&7d{e;k}~g zYb09dFsMd9>~!U!K$?vIZT95URQkj*<(jciZgnc1eCP78(<1}LnMUdmjJ{@TUI?W1 zwh#EW)K--%8W_5R4c*_AYf{Yj6Lp_y35=d!>My7Ml@{;<%WYaGz59|36pT$dJ6OK` zUqQ+dQM2k&`?S7fL!dpQ8G%x?Y(4bUOo??3e~u|`On|q^2~4_Wo0IbEyjM`pu*#Zf z{Y`Hg`f|u1bvW|qlQTJ>iqx}t@F$_{yvvZL=|@f?Vv*P@F%aiTEpq0R-FS0x&zIB3 z)`b&|Ggwv*;_cDFkPFL0Kh`kW-|XkQ7knYIFP)Y*gU{_!YrMB$(Xwqf{{Hdj{oTjT zxgk&O`s31x1q+v~IrhIF_uOy12o83!QA_#Rc)_9_$Ig8c{jbU2jqiB_?cTZAs-^zg zZi(GdwV!>hIdPwFWwi{{LHN_pJA1^$v$`+8nu48Odv{JE1uF zF6u|}sXEfwa_K;lN%h#i#erQxJvP$ros*C4btC&R6Juc*wlc^pNOc>5nyQ|NJ9Mb5 zRgg7-(LJ@}KdS^VEBpGQI-T`qDb<*3hP}5FR#(F_jTwv-ji*er#rn%LhNc><8(eA* zX9nAuOk$d05So?sq{(D_slSQI{5KQsTXmOc$4q0mx<^C|!x?U>QFUNgS2ey-D(q8) zBV+-7@TlL0e41IYQ;8s<y!J}G!N?MIdkXGmC_V7!{SI4d$Jpq}g%HN8NqxIyCmXljBpVI6FTvhob zF&)}_7ZHxo9DE%vCc2RLv=2L9(yvIV7&05>&lm~Qi;rJ7`5|U}X;MbDy73WPzUYB0 zspSmo^v-|8_88`GZbXEq&IxZ3Xj#Lw9V4w#LAme@Yp$(v^(e9u8ENOE+S@UAkyXtI z^XZ>|-}N?}uPH+mOE4-%@B0x0mz~NB88(Ax+jdnuzjw8&g(6YrB+T`RC#y|B83MO@ z-?xLO+(|TawH#5o8`n2(tV60r(PYVg@~Fzq7ExV6z2#i0h&bvrncIdcnbN|HyW?Xf z-;rcE%uf<{|J148f{Ae;?*)7QQ$6f!3 zntLxLVOk=~#KNb4O@+E*=P8)pqt}>7+i}8U@ z^kQLUZ$c|L3*{7c^(uuE`2lX-JN(pz6hx(b=1$i+a{EsBhYLq>)fU;4(#YAhQo~F? zA$s^_es9}OwcdZ4-&$>bz*|6`NtQHx%!2$dQ_mX4W-6;!iwFk+){I*&ybkx{*6mq_5! z0UuhwgY*JTo13;|Wrsa_fRDe%J8Q!S#Z8GP^T4e5JYlt-^Xzy#tjA`SmLn|bPe@M~ z|9ZzdYScmcI4PqlL~kppt{?6pA0+pX&pK@ls}2*)#=U)f_fQ-fm39R5^lE9|JA%i-+A9f8LqAR5_`U1Nbu<$neW&NqY5x^EhE1n;INvdl&a84hpt*|^_|qbSU9OlaMI06Zq`FKK#3z={15!qDAA8v}2qMXwV%EU29B;#G?v zB5zt>Wi`%PenrZ>7*sKrBEz3jD((BnHp&vrR5qkaWhN|u}g7n z4h~!{myTvld2bk;@s7T&@p!DX-eFkI+Pd*)~$My2MN5 z_lJU>?#lY$mqW3et+K;J_7}F{ZMhKICL|NA~gXXtXweO&dMI3 zJNAk-9+KNs*NPJav(nySEm~7CS$e;O{%Cb_{WE!^Ezeh~r(0;6#|`;J`Kaf(Q~hv7 z?BnRyl~w!*Qm;Ui`q}eY_P?ywEeCN8845S2*Ddj*l;teoT+0hJMzZ)STCjFea^J4#AN3@$iwg`fi? zr))7f34(5E?*=Ve^;fI@1pb0A|8?+y>*(~k^zQbn6m8+i{cO>UvmZZ)75ssiD-3D!O@$syMh+1oWtlq4JHrHI0OfXT5 z=2NL0a{+bwz6V*m*H@WohipMjSs2G6nly|rcIP!TaA#*l;k`oT-P>wN^da>v8B`fJ zJXsGT$k3>c=3bVn<~gYene=M%{!;j} zENW2&LPW1eqh+#Wsc7=Y%ewHWsp$!euTDDMlO>+A?~;egM3p^8_&%28=!`zKcV2K+ zy_M1Pj19iSx9_1mC@ht0V)T5_NoAg#(voU}JTm>o z$d+eo)Sgokts(3%FUCFxNpZGgXjq50g;gxYnuT$J_19IBMJ2Ov1iLn(3X$uJZqlR4 zt)#@%2)cnH-J;qyrjXluU(N+<`A&c;{mT=&hPB8EyR5uPwE|g^w7FOIFzfB)Vnk|n zJ_hyH`hx8~aQWwCn*UvJHmNt8l>CR7>bT+rseaJhi!cHQ6iZ8J zc)}w4(L~_{^%j zl%-6l!rQp(JLnlURxWi|!@Oo+e(Ajpf~;xz4dFWdLo<)#^ThRT&g2g z1)pZVT&YCfSM_w?AdWex_BMa|Lq*EcFUj;W*j>+kqq1A*s9@|(Pc&WZ9n3aOGpVlA zOr|mqP1@ew;#Jjc6rxaykoMzc!E8g(vC=SA9xQ791J?T+W_$gBkKHXD#hu2O!tC^^ zH0A7W1N+`}m=x1{Y0Gn~5IN-Q$*S+~(k)1BLQHG%=d9|3%$5V-~5c-g=Fm zVML$Id@%XE0;Ed)BJ&>7oh&MEeWKcr{?m4-(0cm9@bnQ~hqumpJ@!=m>;IP#K^PDb zgnadDPE*&9XJ?H{h-=H}3xF#DL#!zTB{F&z7DGjGH_Efu!4R z+yEL+9sMU}_`BUkpxt-}qxRQ!3y!a4{G%^_A^uIF3va!82~uo5`VXW73>DDX*8k6R z=6Rsg9`i(Z9yi~JTd&Jbw&?ThKDGE4a_aW{X<%nCRC4ujgCU zf1bq3^+bdSxP^*t-Fq>pGCr7btK$0*6ndcnDf+!*xO=r!*g#D~8FxK$c{ij|2BBvR z!$Iy%#2*bewj?`9lOsx^{f^F@l46vF{fNwN%a$2CFLLTDQ0K4AY+iIiaoDay*|BBS znI4y0%cadjx>9LA`YUR<8c$BeiJ!@D`?^$M$s_Y`*0Guh3^Jc+SIXV+lV<4-9bHD$LbuU#dy=S#2 z6y@D9wKcdZbDpuJ7{%ET-p1G;?J+-(w~`KGu9?Kn>@~cKTO5E}5`xE|r$m$Cs`Fpv z3?vOaxmrGc&pQ6BYx&)_bA=)It>lBljmDMbtS$(ONkXnJ-R6P0#^CaehN`{MdyxUtP zA9GCP4&54NN4&6x)h?X`?m;NG88iMdXy3aupUTd8nWl@2RUK6#%_Al>3-0F6eLhQl zKPWYK^E_FYv;>Td?;O~6`zMEwCH{HsH#>C`xF1jvX1&e(?p@lba^M6Q{8^t@65a39 zvCsX>{unKz>$Qqn47CUK<2|0NK@In^KUNmpCah9}$gOomM`iuC!yqD`tQ>qLt^Qn# zpNdsV2NaTV)d-sGe==g}qE~%2qsjk_dcbE<-RAXgk2a|H4=5E`w{8?(qD3J2dZY*{ z;YBGU6r?k*nR5AmD!pLQ34;T@G}PoS5Ep3`vkvF8deuLc`O=F;NLSNyt(yL2+7UHV zpFEcp{$h?_{098j?RMIyb^r5Me|@+uXTkCFR^Y%>KZSe2%`Jc_@A|f@^yIHgblTw9 zxu>le&wu?I=yQpk*%62RYyX)9&`vLHdvJ1OknJ?dDHE@~;>A*Qfs%LHz41|8HYF z>;#ej4|0@g0pF{l|6K7i5e1Zi=`MAye=+(-QM^esd-O8vXI66AyFVQeG|I8mqWXf| z^-fUap8YyMlX^Evn$NPcV-lp3a9hQgOLFIz@YF)Ny$X_mS8 z+U_0yt-n}t=ji7BD}Mcc-I1uFKR5(vrZsZ*gEe}-3TnD#@~#%-n(b_@M(?LjC)*BH zbAu32lPm;;&)>Lyd6dal%1jPRv$pk@eC0F8o4nkFRvqiu{hZ-1Uruja?{GOj-by~R z$J5b7+C}LrS0iW1uW)a@B57So9qO#%E?BUQ;&Ae~+b@)6-&r)BZYhg7ZSEvK&`@7* zYQ28!zV3SQmVlmR%#p-_t5J`BY3;V*>#@@y{S_86E8nZ-QkJ>iSkHNVRKi&Q!Ry8= z45H3;s{5FrcpuYIzpTl%-_?W9y$RWWU|-kEUtr) zE?}1E6iPaji(Poud)}`&$Hc!Yeuw_{$Jt*upM00jXztWr7UFyDteuo)Cq7hD)Z*+% z!08GzGr_XRY`prG@&UmY5H zmLZ4`l~Oym|GI~%_0{E$>4x=KlVfF4j(G%G>K)P08mBy|{@}rnD+k(HlGI_CZ`kVb zUx?atW})Kbp0nx)W&o1LHjcnZm$Ka9CMfW2*8t~=7kiF6*N*=d$Vr<1Ui0)9@j3RV z`WJ%}5`MJ)9y@6d5oGSuLl)mp#+Fx;Ry?TQ{;8*~`a{^`2R#qQ1)kHttk24v8;U!7 z=9_vM^UV(ryd0%gzk?jDBYxP**UU}qe)fS;L_eGo%RJngeQAx_72cy7Bfc)dnPb)! z>-uX7J$OYnpf+B{z!45tPL{8JTLRRH&>GWNMx?xRIg$tV8aZ3tVt1~SzwoI}1x&p* z!1l8IAWdfizHr9=Cm#NGRqMKf?xHg}UfaCfgh`S?q4P=GU#51??psBNb9AOp)O|1& z87Dbxm!BPVCIdqU_YJ&pszk{pz@>ywF<%t$HcDlzP=A$BRTpIv-SS>|%PV;j^I_|c z(~Z|S;UcSDzXkGlnHUI$X$lTaNAblTH0@~hX#G!1uy9G#!sY0``4NlCBY=3x)2kEp z(APT=*V(qFd)k!}B{|eL>|siJ1(+rZ@nBr4|%NAB?D$iIn^7@u&D`Sm%Yx%q2~&el_52*vCY43=6_h`w(7x9&>xaX zu6$M@?N@baAipivn4NxRVKuI|ZXPHL9?|#Q8$X-pjvWdpuT4&&gQqbtaT`fVd94@F~$DC(M;u zyhjhps&=;;#qE9adF9-02=A$zURkOqMfaH`&FzI*S-PfS!`9zUY*}!(V8Wii3!i#m z+mo`eo|1y%j+!@R4tm@$$oR)HNe8gvlAU({$oaE`g(Nj2&c|4A%J5rfz8St?!eHh8 zD*?r66`T``w3pv9?JAAMQAzNKaF)Bf3G8-H3~urBh_A=n!Zzxw0uY4s|+bgpi6Kf0vX_X#K0EySlMA?5EZnB;M#v zsy7GYxK=#8nw@g2`j5$wiC)-IbPHWQq{g$qd zq38^R|29nIn&CA5AjPj9inW0hWIFMuU6puA<`aowXr*}Qr|lf^+ZL+|^wBmOY&%vA$=KS%9RzavEux6+|GfEx0{_DSGAWn>Q8SIYi~8bT;pvLfC|o;G2~QW&hPUlIOT3C zW8XMEZo1})`CdBpt#f_IKKR_DcwbzxsLP8|LEjC_V(iO;XQjTiiYQ99`Z*EVhGjy* z$F5I3FyCjaGADg}00ooo#y10suR-8!PmYDPFaiA z_C1e&Q@6_ONlyB93hW zXCOWCg3J39G39Xu>hvDG(A4gnTg4_Xf+E5vTq7*N#5j&Yvxh({<@L{!ibs%RlM4&OfO*h8jsCWDuM&Id zp?@h%JeGydIs^71OUeuM+Iw(A0Ufa|LFu)&?FPgRT@7=s?@Rh$oYp#m#YS5vws00d zi6?6qIlhk@!wR(NYiqrY`T3KeqC6sZ8|6O46;^6H})l{5tc!TCmuZ|5L=$ zoO**oGXx*+W&o<00oy0rr$I_gM_&%*KNx8{XeV&RVnZ}B<0XQpzv7yLQ;QG85?bG;t%th_9hlEp}yxA`2S@)AEwtB zapLIHXis8@9yp_>lOA|MPzaApmC)?O0^Q|Nw^u$GTJXNN$U_jM;Sm{A^oCgSZofF{ zmf7_-w5tJ@$tja$x~16P-qpxIF@Cip(i>k;;Q%oQ=MNXXl%+~Ksy`Ho&5PR`B)=H2 z1BgZOIB#OZ2Ny3d;pG63ARnF;ihVk)dy#KOfM;d8iL6>P{QU_f`w7s;XiE6S1~&mv zuLDeFqV?*{sKJr~n7T-E=_3{eD;Im`M?1dP;O1xOr?+GbpM)ORB_{5V<7U3OeC+D& z?ID#YLKq*JnF0d-$gaRZnpDYM?ATvGoOr|61h<4Gmvb`cu7DA0ejDdhx4@pB70b^T zl$!SX743(Yjm_3{Dn0o60)R=m`MrR9Z+xAe2wQGqRXMTw*lU|IBm*Y&ya)aMsaE+kFD5%QG#Gq+Dr#8b0?AO zD-3Vno^gb1qh}#`7R@`oJ^fE6{x6ocPx=Ek~K!+B3BuLrRWvp)1gh?~h zGNWmE*D-^$OPxnL;%zoMh+m?B6)S@9n_NTEoJN*cdir+TH(iu}u6IrRr()=l`!S&d zS)k7}De8#w+6S9^gbMA;yZp$Xo+Zrak$tS;Mm)wOmCH#!0`^*J<&IW)0B-LsVAph|>rL3ZnX+%JkEew2^;g1T_{>5v? ztB=~`**y5rnql_Wx`VDlTA=%+wPhjSc8YMegB;~Sjc)N%Bn1u<)XQ^G( zjdcg^6l|@~t@&*|UDjxt)02Z!LqH%m`@w=l)&EvCpxQmoj8hdy?jgXR>Qyccn; z>j6>G(H<$UAF(B-7Hfzu%I^;D`6+^Lw7w>^hbQbC590gB6{ndtXLdP&H%)YxjhGP> z1m_D}7q2b4w3$^FI(Tu9!C8WhL)MNxPZod0#$xgD3s!844(>}KL_;Z(4!3bL69ebP zx()&5@IyGFBM1=V&4qCQI39t8iVp4Web4+qBX?dnCcGdRr;h`m3btkBt&f|5`0^!x zdTC8@2Pw(bCeM>x5kip^W;GoFU@1Q^-F^9ktNEqcQV60*6ufV+uBP*|WCPy-6_BQE zH^$~Y$xO>X{)p+AGgEo|QHvvX(od8%msbSb|7;5bP?D3m4mPt#wXv%*VTklz@iFq3 zV6T5-k zz*hiiyc5RSvoIXLSsOz00y z-B-ja%WL-#J?W_-hEQPT0Qy-9y2y`N&9x8f@3HTRQRh!w7hHHMX|D`U0bY;3^=h+q zsQZ`Wr;n}4YcKlCr4U~&c(rU-QRU5ZAb;y4Pe^8)+R?6BpsU%%BeOa3Fj#~P|^3u5CE!=$8;c{ zg(kF{1^}lx-Ng6$B!oE2>vs!QYzKr;$GZME8zPrUkFQH?zRVA^%*X@~L#fuVC|7`i z(PE8$sijXl|6&5fBEsp8WXb*EI&*-eSoJ&AgoY6U&xN%UZXO~3G)E5QuBeg`W4}QLn1Wv&#qvtk+=J?t(lnv zD)%u>JqM$I(7~VgcbprBnE&V0a!|(sJwr!d+f?9kH8Bgtj(S$fU&*NmyC8_*&oyZr z+A~gE;1$DOMJ4iU9sFA8y|G2bnZ)jm>c|I+^tqK2JCR>{iO%$kt{nU6GWOsXXpbs` zKuCD}%ZazmOPVG_>de7R0Nl-WqX9JH*;6)>uv9IxByMnDRi2#k<80WDL`bPDqSB-OAM%opjlsDc60&l#`GnD_a=4n8=ycr76 zDA3Y#`jy6Q=fqSqCgZP@*+CbunS+^Ss88cz3AS<1FA%SVL9ndExG_WUvk`&-L!kH` z{agm)?;0S)xcpbVls!Ah=!GuCw$!w~(USWs11DU{~ z`igF^esIP5>C%Sqxw?2DN^jl^FKaydH`ITk!6*1$$nhBYpb?TvU|Ws7Zg}AjI9jM} zL0U1`QrIPnIFeVMzV4qjJ(jbynp@dadKdMei3s^iYg&W&6)xv5fxoAy`ReKnow~t$V!(dhU(-K5|GVh(hE)JCMGZ2zO|#PsJ?nh~d_wJ@6FCVded{NBI9#=WE`Q2zL54(v$W(ivTg*gKc8Mo`Fw&G7Rd$cWy032N@E?3JJKWWOl=>XZM@@NP( zG2-|9&Ada2DIFE>TA7XzbL-vs=dy@&&|&-YKZ}dc*)Jq9CW!xKb?M(t(dGe`dDl|9 zBYRN6Waijb02ISm)n564UD~Ifws^(u1;vJr0pXj2QZ_`=~=8P^K_+a zhEYb7C%eMPq-B4@TyXz^RlF6PUw~St$1$NES?73>pJxyN4D<=8I;3ZY_>8Y-1icjG zt>yUE*&@fD9-YxTGD2?Z_S6HjjPQce3UCe#|3oWhEJz-j;{=1qd#oDift=~A>p8>? zZ%gAI%(?zdF0s0JWh|hY{-Uf&mE>@qylCcU`7)5Y zM~5^0{XbfPCo@>F*q)l}r%7PnO2DPU+?hb)2t^BfGPk!D11^s z_i22I!#KTX{h^81{&kX9QI+wq)VI|_Pi^?me2sX`DDNI6`0QWoL3-uOywAG?k4hMXpjz zf}kIDD*k6`JA2O}N?%RGTq2y<3vin9hBC8(_W@IBWn*FGOpnbjP8#<2_g4gVt{MOq zWUC!2jM75?olq3gWhF2vXlbZV6qJJO;6%uD*%&fzugbzbhy$Z$Qe(INg3{)e&qQhJ z@wJ&7(qR-}Nz7V29Dp2ki-%hfzpmyf0O><5GXT@Bo!PZ&V0X><)>8CkUoS6?Y5L-? zNPI|oDbd9X6qZ&NR-IR4D(Vjx9JW9AQ;4sXsmG8>sf7QMaCu+rW#6u-up%I*6lUcI z$?{4Ow^1)|Y284~%us{-L*Tidw7*jtSxvPekPgorVu&}hftWsANA&a>k13Zkt$xjL zH!W!jm(&r<%3w*u0FLuY8%b*S!~uBH7~A?hGY~+e6%o(wMc)x)^P%!@6Kw(NT5C2-Y^?F{A6*;7T?Yd~+K>VM!9U&KyZmFD4!GGocokVX2Z*uS zMOjprIR}3RlF}yW@K5=B2IfXvBIItZ`Bj&N0j;m7z^EkvW5XbqfK&l6C@IrwOq=r& zRX=y`XB={%cM+v`reV&#KwaKl)&VfYoY+XOK8hJ~5LYvs^ooOt8>>f^$IzQ216 zHy?&H>Z6SKCl4qIKNG1w)6QEi|yBr zUyA#6?AZAg-j&YxPTxtbP0mfcyWljF=(&{@u3uA=OLDGm$&n9zzCa#cu^s5tG*~hF z*QUy@TS;F*ii^Y!S-zdA3GB)3IxGBbZH!;-=ewu>fQ$-##dg)*G5K@m>?L=B6-?Zx zmCxR$@_s=KB){-qFn#vFxNjG7?=AWkQ5zvXEj2bVHlpvV=Bmk6Hjt8^S4#c-M^+jFnY~C;813Nz}Kktdt4uHsh#jL=3VxL*J*mNQMd#NmPvQ_xPRF7 zcJ+$AS+#58uUbXstywSU7CHOd#!O(My>aPJ%$=C;yZZC~cHc_=8v2?=^V;?`_YR5m zZms=wcKMv$_e~A?;L{=t%o|h8ztl;FKGS=3*}Ha4{u;lOu2-RJfkWb#w_N{cwJyji z-2BepH_LN;*5|tZ?S93&e$861>ZQUL-dxFz-+RV29_BEH#wt#pPi^~e$E3b>&E5Db zV(zN%x0gLWyd`JfhWz-aJNHPwPmjNQY|2*gt)f@C6C$Ihe%mzpLhidw8@C`P<#?7K z$YE;AOFn(88#ovC^v#s5p_%=2ALbPwJN6~U{JQz8zwg%H-m&Rh-EEQ0IeQ=;Mwu`R z5qx19vEtJHHU9f^i*!0%}<{or;-yQ25OC)B9d_!+X>L z{Ed$6Y@yFrU&;@tO-f&M>dwZ(&8wup@Yb*Io_Y4)>OR?${U0P&pOZ%(J#09zfaMpj z+NwV3lKnRd!+~9lH)~$`wMhTExo+X~kHEtHM%?f7uTo|l{Jb@OGs`AKP1DKp?Dez# z*Z+RIw14_O!)rJD`p-u0eCM+3=e)O3za;OUm*}>>{jp3B#ba&>+ZtW%AODt2?l0W- z_-#z>bKs7)Uyf_i&5F;Yihk2ll9XD8)qKD)Bm$8!6-X5{%d zZG#mKz?1oYiWD!O5q$l2c~raf&yZ{%)BOAD&5cNAp4sfMYJuw2J0v%mJ- zE1m0q-S+dnUVm6B{i{ln0o){qqDCtgsnUq#FD`R^%-^VA-ss{lHWkS&a}t=g^0usb zd{W9|&YI(?=VcKUCL_x;=LbtTT6FGgVBtSpH0Rr$Nr-l$LxKR4daQuRBA((624{}T z+3_QdLMSLOsn;@ySf6EKGn~4IZ+4P&6w(}-Ch$;H|Lws?d+W^?-n(u1_HK_L!a;jE x1@3%#=9&-BbnuJ}Zyh5VK4h~R7mEM*<DTPmKLoe?J2dc)I$ztaD0e0s!|0WnusT literal 0 HcmV?d00001 diff --git a/doc/source/_static/print_df_old.png b/doc/source/_static/print_df_old.png new file mode 100644 index 0000000000000000000000000000000000000000..5f458722f1269a91cfe38c1e5d16d26697e23b22 GIT binary patch literal 89239 zcmce-WmsIx(y)z&1Og<%0t5-}5Zv9}-Q8V=!6CT2ySux)Yw*F{-Ss1TpXZ$H{r>;B z=EtlxZB^Y}Reg66C@m=j4~qc{1_lN%BFrxf1_mhr1_l9m|L*Ng5wfr?7#M852_K)d z2p=D=w5^q)iMat7m~dch9E>v30&4%MIM9cCPnOrp8`%qN%4~0nN1hjgAm$SiL{LEh zMcJCF^k*Ie0W^MObvO%|&oUGg-tdp_RODkED#6Ie*gEiA+)iCi-A=hyQ!gLKmRN36 z5?$I)!MI;i2|wj@%7abaCw58WI05a&DX^Hp-gCoCdr?;CRtiAWM?`>m8jrnbY~F&= zTUaGa^grD0zAD(J@PEbxBY|3n3JXXLJ44sEuZHdt2a|RV&|+efqZt+Yr1%N%1Ktw* z=wS5{$LNP@io`@D>;tbe7_c*aRI!AQV9r2FASxSrAYUH7dFSrg-FvC>!;zgT=J%)E z039H(kG}r9dCZxVP1GwY9S=ryuV~zbIVD|}%LTtT{cO_frQMTNZhqkI?1aF(MDi*Qf2?We!F|r|-|2rg^=uWzTWXWR_2|Ycl3c zC&;X?(WD9|O-&z!zm1(8NE-hxgG@OZ8ojqqczubVV+WOZGL_(66LGIP<(lIzqnL2D zEUsTYY6Ck=tZK5i!!9vr-Z}?R=cLJ;U7COzEdYAi#YK~ zO5>#t8YNTbsHrF~g44KR8+qJ5KGN-el@%0GIg`1@S%D+>_8X^TkA_HC>N~r^W0`A0Hh#SJM!;A*Duaf|ZhdBUZ5?4VO zZpvT$J9z*&3!kPm7&2@x_qjA}kAIIeRm}%7KhX@^y-)IR&mU{K(K5}Xajw8HJpD4A zCXBv9N9#Zp+kn;_JS4c&5Dxdod(K(E<#rDEfT8DYLc#(`7-_f00|>FFLU3Rt_FV*4a>_g zTPbrcyFP1pQfM-CPx8v*3Ji$+4A0kDpeOzV;VTglMs&zvAF{6KkC}w(gyDqdgzqEF zsKE+d7`n$5ZS!BvVI3(Q$#1~wz(HZh-3Yq<*32nnW5~Nd;rc4A6B~G!IG1depqD6@ zKAqsF{haGkTW>wD1aNZVXa#ZkafP@0dU&N1NPl>Mg5jm{;#20Emt^KOCR`%;_?als zmY**R=|@Te?~*VD{wxwT5)RU8v@iiy6o(0EpwJM0Y@}pVS=2k>^nnX~q$(JeFbX0o z{J99wAmYF&DRK~aZ|oPb!5s7KX;~_nG+EA}3H=d+1ln(j@U$qjp|th-D+6EjuML*< zl?HYO;)c@VT1nY>38mo+u;o!5IBLBYd5`nM1}C>Dw!OEfw(EwlqMM0sWwD8^a8C)x zBdH^}C_F^mBmA=Vr`V0n7l{@>FB&XHKqz)fdlg1xNtamjW(w)Z%V^qE>gH^i_QLks z1$`lmiSqg)M=D{`JVl!8BAOn{5VIVE6kAU+N4_eXCasdOQ501mn=_SRtF&EBrZFSN zpx}^i8hwDoP?Hq6C|he@>th-}BZx{uVXfYwDP6NR?d^Y1(G&9WK3G1`!$`v5 z#DEBO!-%IBq>rW7V34DCGE1BLHk)O_Vs>8RXr?g%6i~`(p8U0Ea3%RrP}fvfUZ?yR z;TV3C6P79YQ?gCcXADUri;9m)#HrrW|D+%-PBTNZOfz+b&&`{YFTJ>3sa?#Yt78Ip zNV}RRx>Jk~g)dJhOeb|+4?id}c#V%TD~S!6S-L-^UzSu3<;DoJ%$>vK2EB#Des!(@cy@;b*l zUJwwZ!^Y7JXnuBl{s=AjUXOgUh@)?1+1yuywTaLrVQ3_C+qG> zob*mQSnD19XZa41W}VC@mZd9nNXCgz_XIckrbF9z)VAWcJm!&W3v0Qvam%&O*wRlA zU=3h>eV>!Dn3UYZU(#;f#!(L`AM0GnH;^@`>qw|5U#enTuOATOxMH4ZYAH?B#vMV& zFizS|XHH9?{i;jjKy?wx!L_mujnL-(=3qyU`D)wo+Ns{$SeHPTUKc_GaD&mutDEEb z+uRg2_xi5ZES>g;7s^Ls1Z#vP);(ZFn{0Mf!^LHI3(hId1|oCE7<^C7 zV6kX{vWq-!!6n76W$k{zJLX&I$n!YRyA|w%Mb3t;5!7a%Z)ajOZtyy+R1VN;oZg;w zSTFiDQ)h-Sr&>l=#~rg>q>Y!$RvpL6;|8Lx(B<+9Rd z>+ihVx17=Jc8hnid>VcjzqGsLagTf43G!D#B*xk1=wqh>^18`B$UmFRSoO6t2%8h` zzGH`JO84b(p&!6@eQw^nrRIYkCmk2lGHxw!Gr6v^n0INoovBjdD~B#87J_|^c+`8v z%w{rrv45_1p$FRUm7X0>HBJn7Ot z!kvgMC~cOPu0FMMq=}{{eX00alBfvg`;nRSgQpV%n6xff#x>q^SJi^Mv*3hzI2Yd< z*SWbA2fiAZsXBN7{{$DJXZJDK9)4WF{HGqA6Ig9Cv;8}Nevw(l>Ple=VKZ4)hHLD7 zCYwGm*W>i0B=e-C2h~u5$4{O&=L?)}s?(cb4Lb{<1?a1X8gGvoEq#0##j)uTB``4X zR1~nBh4ovA3kJpxV14^)VPLO=3$QS^v||Nu;QdvC_3iub z*VK5pe-*JeZ})i-37{r6i|X%m2f zxiY_rg@L8rTOXXvbWH4jmH&T!`CpHJ)KvLjO-4G}KWqN+%YSOJQ~ws=4}t#H)?aVm z)Wr$QPW@libHXxNLPmmtaf6BQ^T-3hk5WH)BKKauwpX)Nv$=Fq-NKV+cz%N=NqZl_ zG_=7Z4p#m(fJkQAgFXqcPK_}M=w%B8c*=l7heK_8!H|6Af!>OEG;EA(HF7ene?4`O z8lAnmQFpOECOG|ms1TXjd_u$`g|Nngu;sT$W;xHdq5+ZogCY&HbmiKjr>HXgv)rA|!Wa zO+QLF2`6{t`wta$Wa3)OLkSB3acu}fUWLDIX5K+ZasH#s2C+2(6pm2WSFAvsLbw~g zA18d1`I{n0ng3LEKy*judwB$bbej!pMz$!_2N=B)m~6ydy6>|8oC|B~58p!8wzaQh ztW1S?KTRm>nNzf1bCPcSYudiyL4>08(8sO5>$GQvc^CSluJ5c+l3(vZ(M_1@pF-M0 zgrb|%`<=cOxGa#nx7HY7nr=W~;&NaWnG*g})!gr=eYp9c@%)q1fw&*67NvRk8s?hj zFm&0k&t=l<98#|{_o~bEVjcf60ik>unGj+8f=cO}hrbj=0iKX^~=WJ zIGAKZmRq?j$H)9bZ$G)+-cK_P6o@@G)lBXxuiiA3v&JL(3%I%UM+iUio1>d%-8gW5 zuJiLR(gM-sko_sr#Cs?N1>6YZ_P}{!-NT3{Xe_qo7K`m#c4t!ee|>Yot3_#yoPN7)P0 z3jKD0vy6-Chl^B6QbNm}>K${r<;oUW`}q5KgXq%q8tg!!0UjNS)yc&m$v-DG!{keW zl=i1$3wk#K4FZ<*RO2SVm821KoK!yJhhcyQo-MH&F?w_$fzq_@er)OxQmxYQ z#t^1UXKKN#@lB+?3~fz^&hIHhlU7k@~j6BnNcEe zYq2G61{ko*6RR=!8nYT??*=X&T4cvzy4a)!yc*X@HUab>usuL+zM@5 z9EUV*|7h>cLVgP5ZsFk}U?X;>Pt-sw9^0$~@kbT1ur83Na!B>et}vRm{y;kpybCit z%&8vEno-FQBOtrXJW?p*3sUN{NHKTND*9HQS{1MUrxv;2lR%umgN!^%*_zZZDMr9n zR}{)QpLbh0TpANW9ZZj6+n#)4Ug#$@E9nK|x#KUe4jxwl4!%J-?+NGP6DmvQrGO5O zuW1WMkjp8OTBihtQy&5isagLVyH~b9w+7n7RwBZanz= z=f&#}GYV%B(%E18&L2YbcM0l;uYfsKjeIewsj1bb+eIpcg@rzlFJKS~;9qCphiKj* z$o$Kij`GZ zg>9GphR+ulKrFM@o?Vl;JUT5ecnb^LlLwLZqGR;~1|@3ECxtts$*(0{{hg%*e=6~} zAK4&`IFJy~(OFOC0NykNF2+oC*3a6=@&pc1nuMy?+f1?S<1dgla@5~i%=K$H^;@8W zq@nREDe(X5qiL*gg!Xc_v81F#r_ENYWk;iakzT&%IHL<5($!3Ew+`Bv60=^r)qFyioBWtAZlS<2EhM z&avIh%uFdf9~YWAp2)vC@dCU*1125a^%}IaDIQw7aQE2O?uE${MXQ2`TMh$b@djht zcrN{5g8&l_5;t{|2cr45Cvve0W%{mDOIP1yWZ8KlE3FWV+V9`>Ar4Eo+HgE_%UU`$ zyt@*b8YJ$Xi7NNKlXJMY)V@{fSUHu}sF^iSm@XlnIN;A1H4zpyLu#k1T9T{8gu8X~ z&4pgoaVunKJ3(LmL9@%BvT{LRBPlVW#hV&j`P*Nv;UdvIp%jRUX1Xk1a~;Dtf0|1D z+m9wd0XQU}!!YHgkx6l7J!)cCNQHIN&GYRg9pZYEkJ>gY&y|gjPpQ9CwOXlhb|}dhxHEw-T1gI08O4l@Oan(-W*Z#L*o_b!7L}P`KpR90 z_dA^56n8Gc>GPVQVI1R?oPoXA6^M#og_3j$Z4Rr{YOtmDCvR4wP|9<6I5!0aZHkYoV{Vpiltp`NvPkXC2 zqVYR{45(&*FVXj(ttVI4%a)ricJ|aB zifHqYO`@(?R5ut?t6uDpC001IDaI`4>bTi?@6F4+{BAKlb-9|!ntO*6{u$Y(@TX`}IxZJI^f#vTJ1L+}eQg%KHT>&iR%m0<21b?@MS z40!@1_3V6%HSLp5c9YcvjfM670yj@21Ws99n4U6G))@I$~J4xpBk zhKoH{i71b0xGN}#K^ruAoc$QQm`L(hEP>&b)?L<{Y;Udlysno(+f10(YOSD*i+pHR zvP0#iOZ#qH!=Go#`(>?=EAs<0jfAuLL!_}wCE9N#z4xMl$h>w)sB!dtHLosBtB#Eh z*#^ooHc_nDx-BN%zUu?YXdDAPZ}j8$9a>8kNmbZcKtBUonGvy<#%@Wqn3D=V??`_!T@eGn>S+yM4%=j7fLX0$yw2OD(O>#B0@yLQtD^3@ zluGf*CvNwoD`P>S<~si(T2QaG zRbQ2%jeWWfL9D_2gj41l-#9w@{o#nz4@$k=bj%?}+)@dI+F$CV=`}R#cov3<yuu#I05clWlLBLl+rS3i7@Z~h zSS!|lL_VhKzuADjv{JoaX}xppw5JPr0KxAAZWkVHP9^AQjzhJ+=en|;JZ)z0d2CDE zb)T)wUvvur_A=Qb9&h*eqR7|;CdZ46|61lBKGV$2PJ7$EI`{X=82{?XoK#w@-E-cZPbf!Q*)O#l|2{XD&&)y4LHJ#C=CQ#OUf!3H?otOE{ za*Ox-bd-bv?jn7)`9{iXf01m#bAg*&oi0v@m3R4{uKHQcZDn_#*Z51M&o7;w;n;{$ zEsKf|o|4lUv}a>wU}IH%HfH#knw}VX`Qi!}b~yh<(7)Oy?Yzn)`$06;j&{5;!k z<@Cpc{-FV_l4ME%qLnks;}FE2IJN8DWBZH2jf}wfr|TmwsM3&y`Kt1-sUR_FeeXz| zzqX~{ckQy$!Puc#B^0Pa)?HClN+Y(LT+}6CqJx8o!;Q>bMgUt#Y>Ih`{*h38v2D$g zNI$KH_F0z_JtwdBK=Vz3Itm zs!}|@m~~mp@D#7b^6EHvDttH5Dsl61Lh@uO&@wT5H(-R{QoNk%a%QQ|#E3PWcGSRO z3@ip)*{sbVEFFtB{riU64zo=kIDG4JoYv@QU1gsWZ#B5G8snVn@HZ~Dp7BrnKTwC~ zsoRs1E%ff``oI6Z-vfV}rcV69*q}0wWPCafMCui;cPDEw3@<^j4NLZB6&^+dj8{w| zX5~-HEN<2))6jT}(^nP71-d)2dfkiKdS6$vRl~Hsl-~b@ zclHoFqH$1UyX}4IiXt-#W3~!mam6<&)m(KPdxe$WcvulhF;sSBhqN<(c7(g^staXw z$9f#T3+rVecI-Tv(b`1xxM#D> z-SvJ~o0WA2Nk5236xUYYxs!QWrZ)(&iT1ttj(Ex^c4u^0x?- zXveOrOs(RDimKTIPldwcLQ+s^#+t!EJI^qsbyh!C`HfyLW?QkEGlI3**jz%hSj#I)ic&B~Om z7BD=KVje{bfPI$8TF#+Ccf0Fx@Ug91$7e~nyhX0vOYyQWpBnUo*xeh(@;Q_x2;3+|{NmbhwS^96)~M#g{~g$MKd)I`yv68aEQOwZMZR-0 zcqtYRYzm6N8O4RdrlGer+s0$dkO@fb)jQSC9!?n&-HAD*bxAxtSYs)mO|Ow>U8H!` zx&NZrBU<+S7;&{M3*0^w^=zvj5ZADkad5>k^DlsgrU@ii)og%k^Lp&CZBnvqouSQU zgIIVj!NRk;qV}qFsx7yrMn!Y;G3XtxoHPoho=XmXQF$K0%8DZmysv(YZ~7gPXHY?f z6oK8Lr|UOaj7f~cU=?j4-W47a0G0;*P2W$I{}s&_5KUNdxAV|4$V3gSX7*{U38Ri*}5C`ZzfuR2L7lDIthXNR5uw7D;lG| zK+;%c;KojCFy1JKsgW+7=p>IpM+$bs8N)6+b|acUq^<-r%v=bpS*nUj{|6X*dzg;p zuDPz8B3&&iv(>Pqu7)Reu@o_LCvywU0T+XBl|!~Lt|ZF3N&yWQl|7LsD?5Mj==c49 z&Y=ea9s2@sk>3uQIUiUxkCAQI6lN`sUm7gvTbJ9$uKhj+U3~z-9j~4>bh~zh*7J}72NPS>U`(hj?s2JHw%srE!YU3lcU163 z_pOI@avpb&t<&;ftHRCVGux;6PtW=g7pMMDkDYU!nJ>3K)B>zm4vty%dWB#C`Q2^u z8r0QPs-iuNN$IMrjw!-HhE{X?4sZ7Ikd>;~r$2uPSSA@nwMjdCPop&`fi}TQCiZri z`jdo8By~KVwugsdvEeZ>p^02+*v7`jl8}<}sHh~WNQq>g=7>o^@46BY6K4mt)il@C z#DG#I&#vmnZKQj=LZ1T&jBXs&H1dWfZ8iE0B*_}8OxL-x_q!-C&GG|$q5;vC#Sq_G zOQRO-4D{kbheMM8f=U%$T0~wo5;u?g`M&0B3-AW`Z19Uz2!~0`V}2N@ampKSr@|G3qmdWyz$J_NPet;2zE^jLGs}tX*flGjK`lW8TKI);&WVa)s#)yQLJRWoI&j2@qujEe*O;F$vfH{r8K%S zY$Pvh=(;$4#YWOf4}e##S6NQ$V>3$4xz*~g!U08uuBLNA8r7O&U#)j{J>PCDTv$o- zR-VnHo@%QD9S1zdz2wf-`!*YN1`$bqF@5Jlb)bME8}W||+!xXzoc z;GZ0(XN;4l`lefomQB#GhKVmzu6y#kG z&^3qjCp*XEE5LOete2fggt{fPU+b0J-gIoqjTJSgzcV&zn~i}SCqPpL?0@;t5HW&? zxHu*PY45P7o^a{I&`zeLemj8O)G0dt-;LanNzh@YUn{9-pimz=+$Ppz`Ub*eS<3ZO zhw?r@Ph2;N$2L-ezFo<$W(Z3(l|hEju9R&yMX|)jhS)vlBDee|*JYf4cFh$rA7HXN!xn|8&x1O(s~Qh|1IbK9MyTd?odqBwIYhgb5K% zsVz|JBv~+DRYe7ejk&tPAD?rtHA=5?+THRh*%b`Uapn=f63Y2KT~Tc}oe=Q&Jbb2V z!dCNLe>d%#p32N9)Z=&xhQ?r%75Kbis!xyeUTIzS=qjxHDt-Cq(NklC$%`4ut$xCT zu6>_^5ocI&*uJ@AwZ$?0(*4Zkyhv@yPW*aXwN*`mFH(-{^H=jejC4*0$pPojU_6+uw)@xgAzVArdoo&aj$5E3>pEyn`7s+gh zb~XH>{jvV_pq$lnj+3nOYI#PHVpFoO=NFxg$*w-DM{ELX{zr0W<=mNpeE0s;$&W}) zHa|_ZU*ZJ-06P>~x~RxbkmJwZ7RI81-TtKUEdp^?IygO6mE$-{>Rv66vuG|(C#t8L z7V{9((RkGu`C626cY7wyQ7x0PRcpkqP`=ZAo4Pzf@TaqGI>6QAoJ1P#Y&3OBSg>lY zUt4(BVD*Nqpt`686@B}!DhjHa$7|A2tAia=GxuEj*^k83cztmJI z$JJNC9HSufS<}+dL-X1Mh2ZaFr2Nz^SPBKp^oLUaaxLKeJFH!22JX=WT2^51GqV;8bhEBnk4*jmX!&(wGto( z(%0q~&F#>-ln-pT#d4IT0mz)-9+541dT|KKRM0}E?>>GiSPX|pQ4`S$`tj@$dP1c- z?Qu_BlCzuQ+HAOife=6|_;&l$5AIuS18NS}Q;P5_cja(=sMGCz!TC`&F40hBp%pW! zXf6q|vvCU#_GxGFp~>65PMD~+8d(l5GKIw4KWW#fslZvLMro{BOFvf#WI>fjpI3?v z(MnJ8Dm2Q|$!LeAcGnD&=<#Q*P}J>hK7Kcs;EQ zHp_1p1lSyhnkmT2-bP$*UEhew6qUhcE)UPW{A+zw9*El z6WS`EiuaIDm!(C)BfUlpJMRUaYJ+Gr|7J*kMP?FV9lmIq0d`|7F!+KY9Rrd0HYbg{ z8{TkK)*Bte6!|_D4vm^~!{(bY@@g7rqlyaxRWc7hFCI>idw(TjRJ=JOO+`*+L#~)G zyO};qq+-j50xWZBPhC)DB`clu$xvrcXFgsmZ=YN^mM?L;nvd+quuDFtL(rm+jORA( zUG(E-XOZP7&|0VLV{BPonin~BqL0iBw*KYZWM6E{p2w5%m5GYCQk|=r8BehZtoa2H z(#o9PTJu$`(s}ujr0fd36ZG7K!vw9AIuGL`#?>uxC{{U+=4ti|CNI5M@{xn;cuim~ zh}ycpsdrv-bkSKQ*7Bg7-Hfs9w69qfnOTN5Boi68wm<9oR8Qbx|0~-WY{E_+;|1bQ zhie#1oTI;_aOavWm!*o8HWwFB><(`@1tbmyIRhlr;ZT8~4B8mvkbTW*0d=H}|A18;fp2)gb!i~mv$$eUg-TKho zK^)j72`cojOdUQv20aFl(DwG&PxM@~8e(kfXxqw{$zpbYTqefthc`KgD*5#?2C*Ozqw=#hl)yDQ1*A00<2o%dhth*^<3h*2cLrO;H_k-j z%t+rWZO^q)a|sXuVZdvy^-|gxWq4m)OBwQIk+4Kn7la#6X`fMWmD@$I!{I7B! zJvFY(B9jAN(W;sB14xC~GX*tw2z&CR*sH@I%8W48C@v%oQs3Gj8rjEMYCZj%L9vJ%sO)|9MAL`(b+h zhDDR3P$9l`gq-sYpwIjmMjW=Ucq0`l5Se^6!$+(NN-Q9@*jVv#iB%mqb6{HbPs?L6 z5>ndDcu16*2BwyQE84KN3r@fgKxVM&j>os2iH)gF*Rr#D9^-CBy@rYmh)S4?Wo^iCO-@y17nnq$B;US-@+G;ZAp$>JCmRu}#tP?voa(L54!U z4n1ZXBC(lwCU@X=Cri23rBq8Q%X=;JjT@f{a!@dph99_@qTZNOIc_}(5_Os0GD*z` zOsUXunw3Hg>o2_qhhk{DCkJ)L$NS!y%i*D&(-*ZirQv-W{oa({J`!X*A%sR-e%|b? zvpD7<$Mhot?+{mOq2|#I?RH z@Y*f%T-GtVEZK#GBi>iUx(=b))%N4tR{E-QD;@Gn#c7~1ZL#c^-#(nG9rJ0;B zdP}k3I5H`PeDwdWVCjEA=$O%hmb9F=Q7-v*H#T%*Vq?EEFfiBy*lV%%cVk1{E0`2^ zxBXg6u2PD(--Gr<237LxMf)7Kl?Jcs;8nvsO4F6=O=Zc#XOk?Zb?WV+$kBIGvS#?k z#b+p93Vl7GEnV!s%qEX|nXM#*-ZWBg)Mi482ilIH*zp`AsNKV;35=;7BKc0&$P?&#UT#&uTj&;$tW`q`?$o7A7LOo`HB+3_3kZo|$I&4@KSE_uaW4{{} zn;vKiYtwkeZOc^~r@DSY>%L=Ta)RBz!=}RR&7}g=qbMg#`2rjVdCAbfvKUug+xjES zZB)90tPd~ZtzS7AH_)V)c*|kl=jkJe1JL}Q_n=hLeSQHo{1DsHL))n}d7riXy^xUw z;7RssE?DduApnt%hRoXNKWMO8?}LtG!@LfSiFccIM~Iu%-4nS zl)M7oM*q?cNq5C|qI_4b;vF8Z?i)VXr{ql6t~6U8XrI1*uCV^k%-j&0ezLz2pX z;x+p4TjqkH20TsD+qTl!l~<(YsE=>N-j_a0wbke|XdM&NMIc<93}nRdTc@_%jvhNT zvzp#|{e0W%fQ32*bBE;T5m3x3r1fyAJWJII&qVPW z$4*7GeyXumB;$3|PL1trapjyC;gx~3C6G$wwTvKHd$va>@l^&juGw zR>QoU%Ah${6EUM^#9!Ykk3}-K@*#T;I5CX!Mu(dBLp?NMDf9?~#h`(=Lp_@qXxrhh zX=mTq_eFN7xhXud{=QxN4VA()A*@;%qKTBMU))}wZzn7Dgtm5e@B-8F%SuXe>Yw5g zn)%hGL_~f#1u^gPK?~&O<}RJ!+13RkghkvbqNt)IdVSnISkvy+d{gPJZH>?L_f|5t zzYtebT0?sPscSFhWJxDVq@oAK9O8-c!lGTSoNcS+t;}U_srgJoU>ZuJ)zZdK;2z6a z`5i!;R|2CyA&p_9CbuQUed_pUCvJ{m=O{S1+vkEsb=p$QU4I_x_iL|pQ_>`wA3_DL z`@lsEfun`t)ZCS-MzS5+QbnyNzbrR%wF`*$w=qnPqcG9;pEd%|avm;$du?4%09v${ z*m+aV90ep2GLMpAM*lMzVTu3d4lpNu2*Gj@n7_K40UgekcE3D7%19n^)Us_|14nTK ztPFA5bMpc`;Qj-kj3bqmZi*XvB-j+4O_7BGz7Ar|BFu$V8Pui zN{4Pzd_EJ(d3~5;Ax(`^lRYE`5#Y^%7=;^uD5jcjn0f*=sL5M7j4%!IZR;ktff|FU zwsn7)^Z#6>Jbz#=tv{xn3645+paN}+T8%GA61I-ex@auSuvmp;6pZ-<((te`D}=wp z$4-6-kz0iQy?XykF}?Emanr7AqeG(>R4Sh;L_*P#_mM*7XL>`*THcLq?s0;#OZJ|=ZE+lUpihar0g8z(s-f>6^m=WAI?TS30aGGKn z#JiWZ9sEa*-J1vj>7H#nv%^PThm-n}tgS}5N{wZ~3OkGrCTKhL-@UVF{Od%sAg7P7 zt4`v!540P}ikGN1oBqW(aiI<*xl3zVc5~)Z_2U%oqzv*G{mruHuIeT!4>K%|GDsr7 z?;|M~xLDriUx|)6JWKz-11Z1(cS}94L2$Y#J33+kktNDC`;eLKWfc4%%5$|XL-0cu zDT9<}Y!V;(uYvwg-Y9`!hlxh%?9Lm#6YXy$an|J!thba-hsQz|zy#KK9kRB58%(c~ zX{_38awlor`gir|aL=|eM^*o7r{<+Xp=Cj8kVW?XQQdVB&PYZ8jA?p8Ync>L(i?bl zKfY0~A3i$H`B$#zPi}JvNi#EI6?&=3*&WB!R_sx$R)dyuAdo>;Ka+ek#A6ncP{7xrGT`7+$e#^saS{j@!E0=4n^v+WD znf%VYZ~$yAG2e`2Stz)P`e&vV`1gpf;ExZBnnqTY`jWsmgMHOx;S-DaW-yQ{q~jZ2gei5U@y_x~s5$X@1E8Ika*E`|}<6b$nAT7<`fq@SdcA=oPB!x$8iIDc`L(jtPYmn)XC) zvrEoLa}gVT9$pI5)l^(S#jm3vk`O{pj<(Ewv$Vrlaxo`6C!+oJov5RDH%mD}Aww7| zON`(@=35Ba>y3>Tf1hj5bf|^rC16QjaJYHP+!=MF#Bp@2l7bp0y z9y@<^iRs_6vynPn-R#-YuYT2o;qW%1!|4wdbLUn;L3fVPS++|it?b0XaCj`qs=P~~OlAin_5DcG3`(q-q^oW8>ytJQcjE!Zju z@N{Bdb=(M1uvc zjsHf5Hph6e+BYuoI)W-LHALRJT$QJ+VM``@?MXK^iPv~lDX2uXq|}m zm9lzfP>>`&J}Nn!nvXevq<&fN&W-C4&r~I8`8|%ep*&ZYpqs1TfSqXGL#i4$_Qtuv zN%J=wL{ldPZ`QGUV#ay!QyLq#?VblSt$IcRQ8b4!mAaR%EFag)d!`!=?tUaL#>#(B zhZwLiV(k7|Hh6YS*-wdcKT&Fhda`G<7U(2;rqP5po^sebb{c%{kz6FBDjPa-q@Se4 z;F#NVK7^t&Fh<=?sU<)4qmV zcIJGRzM4`uMc(XO&)b>Tmk;G9ni6=hH%mU&n-ehBCcBFAh}sKPMnA5b_va1L7RC}> z*X=`0vGmGEPW!vGDk}rcYiIlpJj$L#Kb9N&s*y}?W^p3uIh}Q-#_DBTp(p~0 z*II3GHg@8~&emWx&-#)ntU4udi$6o_n4-5Fbl5-f4}tIVMXqnry3*)>?x^D^mN`=M zyfDpwCrQf$+*t`|nKe*M(go^@TiY(IK}16BnwoQ=1!DJIyTNx~WQxn%Qmm1O=7?KAY$5PPx*5|TSzUI-S8B0oDZ#EF}< zoNn=iYwXX-GCD&YIP)^8-P+k(80l~sKIyM+gOEJ~)MX}UCg>2jlyuy7ym39Z&yxG} zA9w35${4~F?K3T;Fm|;UF_whw@^K?pJJ%V@6(Jir=tfvRQr&LGUB64k_ueRMoXv7Z z{ZH=hZ<}oYp8iZyCZ|{M?SBjmu_)zr^8m-XtVO|{>lJdy@L8Ps%8pH%Z+G~VSq@h6 zHwAV*k=!Kbl;$eJ#I)apa}A~_9ouvl%LAF9t-SE#fw*-K9jy(eckO$zFP{u+8ks3N zXsH!33WNKz5eesW=aPL!u0|#-fRix*~t%``{rQ_UpVnZJ}f=vV+c}Bt-#~mcyaFc#mJMM=zet` z`2oEQ<>tKqWV6c76t<*}%pId3BMDqoo=i}sWq4QY%;1=;WTbkQT(VC72EODB(fpd}z zOaOOgC;?MhcV)^{AJN7+RsjBb&e&e~Q#aD{|l` z50dAp7E@Loe9hI{j~w_l?+*mYura9tr3UTGd>=H^Igs@<(>)1os9YfpxYmCuq;E!t zlCubQ429z8b{>RRR?Ro*OdR=d8PW(k)18=jMlyzJlCm;}H0Q!V%DUZ~uz6JX1&8#Yl=hvg7) z=fxgH9Z*!AVa!($G5e3sAd?VoG6NArqyF$HM^IgJ9gY^Mhw!y$#w1)0X#u8RpFXN8 zAO^E*GMl57m!g0b?B9FzSXf<Lif?R)rCZC{!AbIG3{_^~6{B@r|!hY=cXwZFObW86nUun7jgS|dS5n3~`&a?3DW*~yE$dE;Ne#_zSwdC$goUf(Ph>`aq+gq^;6lge z!TkTl6rUjA`8|~=c3?>mJk0xple&p|>8$@hTHZ3Mt*#5#E>7_lic1ScS}0yzi?uiu zcX#*T?ovu|3GVJ5+#$HT1-Iaq!}Gj)-t+GpW1Z|`UahZN}yq!DTo6+ZLm=1%ygcx|u3hSf z%!hRd#1WNoWh`;1GgJhpz>0*kfTZPuvyyN}m}S#fKBV_y}KVJDrWndd3M-crIa5ypWO#2bUq^0H)8wHU z2o&A@2)GuLTz!|u6=@ualf#5`(*sWPpEpjGhE#azSptxzw5gsl^?c#81K;wS_JGzj zM^VxW+c-eKkEt6?;6OtiK^wIB$#8ETRD1}tZ$vt&kiulcmOS`1BxJ?I2K>R2=cHPf zlIJ7yG5pk<+W+a*^UDbpqAK;dx~kKO7K^gJ3nfnKahOcHcca<*hhDC)iN}rnrT?Lh zq{mr#*0ByZ_p*4b$;9YXZc#_5~Ti+|0I1h~A#Aq%Gc9{jI! z$@&@f_$ywqOX<+NXNEd9Bk*yH_%#`&M|!}=I!7ZWM{Oj9;?g;y+F}X%n&>zi%OABs ziNzsRq@Rm;+4$H~@z&EU6g+vfC{1OCYbUOI$ZFsW{vUg@;fAIm+duM~Xd9v1>-iI6 zmqF!CWmF?s6TFF7{~XX8-Q2|&Sv{%`H)QR3gj>V`V!c?23Qc*MN4bv71hM~o^nPZDZpXZiyCY^a+30W0 zScTuLcgGmIS}4p>D&@R?_k7x*=L3%)#ndT!H(OP;5Q4$Z-pHe^qu5SJVLKp!XanP)>ux_G^+v>oZ7|dRIs6Oly{I;;>QjHDatv6V{0aR>N#^uRh=bwbEjK_+(Ww>*a03^nar)c3ISlVx*-xn z$qJDM)PlIgY2DTAe7Kk7_niE?(M)gZib5dDxLc>o+?sg){DWQMY8u#s60(JZ!77>P z`IOH|5$#>H0b)74CB*~=|J0mg=a4t|mbZs;*PI;`Cg1jp)z|?ZN{It_{%E~h+bnL1 zJF^f&cPhA!pSdp}(mcPF_&Jf^Kjj7+O8lkCO6KbQWUawhl#mpm?%Ify8>p-@e5T)jlaS@7`Pk0?W_>p^6pSDr2BqOmSOjs)8cEeu?JH2Gx(0KjL&{RMn=L}l`Nnhp_YEn_~wzg1A^d`$p zsW1`;d9Ynqz6$Jl@a>jipAc8t+}cG_P>^EZX|)#Y@)Z_w?e0F(=nYM4`g=cgrvRg_ zl89%SAGWHbQR|a^0gETg_kbA09j8?v83=z}K7=eHU!89R%KWY*;HTP6IYQ0(*AWXo zu0L*A-Tju1gg9v;+m3-3%1+0NZsJKIr@CMX`if_Bl=PfWY|Qo-XDNq)!M*s%)xXkX z14<+D_hyUPst-x9^Dlc~QE34&(0l`~USrR);_YGGK%_nYwKVs)%Sda62vK^F`kgFn z;d#cA8vtQQKZ`Bk81yB%4NSd)8_hEl@^3u8Nu4Qx8Z*~kwD>hSb6_8lmr2fR{(Hpm zyHB`h&wpreAE&aK| zoTy`Vu@w%F1z`R?Sc^K|*!kwEA_Id? z$S0+Yi`%vHQ>2IU#9x7Tc>@#^_aw$HffMxR*i!qsaIuP5ewL4{~jSElaV zeaFi7w3K%Y9hIwzdL6W>GUS5G3c+l=0zv%6yv^#BW<++8&D5+PO=lge=pb`+(9{0X zA8dA=hx7r`!<7d$;7Y>|87K1a5vbh|__=8G!*;GpC-Y}>?pg61nxuPB0NWG6WVu zo7)CPtO90h2k3`v5NuUc%nEv$6({(NQx{G=EasN28s;qewK(K4M8J$P)G5xvEte$VTkn6JS>pKG>!5aZ7bjeN!q<5q5dDzRR z+ua@~0cP=H;9KR>p48Kwjc5Sv`qMCDNW(RSuY8Rf(lfL0SL_FzkI9Xb&aj2VhV@}t zu%0aF>#f5fFWuJ4);QL?y2>+NC)`UCh0}4-e|y_d;)I@dpQ1*k{z{q?lA+ue4zEkH zR}gxub-0AuXttkDzAPe@gT(LIn}w7*I$dQEjCG9^Rl;_mu_AdbP&@Apw|l?ZudyY4 z$q`ky9@%eZ+ehZsHj7Tu8r{Z?`r^YQhnDQz{zZCWGCLxCS@#%RG+bqT#iOBBkfT{~ zlJ&l4tU`~MsX8Qw?aRsVoQz1rPhiJk#A+YE*P`Cm5edu9IoeEevZ`W;PR!R=y&!{b z`aSodt)%lStL3Q%GLErkI7f;SRPO^>FkUP`wqL7ue;r5o*I1B>zL%X?lyR^%g9_NG z-OKxK+W~T~UFS5$!sXyI9P7_5RpLx) z)_{nW+Sy~@yI$dPm5MCqO7)#yW}tETZRnMTL3;4A4!;G`%I@QIM?m`dqqIrH>g%cu zk<$2NKl}lCL-TIKPplQ%a-)Ns4-lxVs^~bqsd?5_Gxe7M-Cm!bk*PWY7EK}OOLOa1 zg*~L(Bt_(nHdB*RTOjb$^)zcK!O175==S`>JX=kxv^&cYFG?TL*3_d)$ z@wmYlWSl}G#Fji!NFl{(rCMDIPzNp4AH@IT*xo=BEqv}MBxbQ>tw^W?Mc`PcIf`Rd zF#N#P-v|6z-GvTC z40WqY4BOWn@Uw23R0*RbJpnA$J0r%>yf0#`=b;K@m{#pni~1EdJR%EiVzmoXvOD2LJ2gEgVQGwxK}AX*+LfUs%=ANG&VG-SP8%*3|vA(NkHP^ z$`31LRh4qY5v*IzwGsE&+StMjpZir$`Oz_W zk@$(&KJT3dS9G*`l@~g3#@`(vvo5Abw~Ln8 z!hl(1SnF36I`BLgzIC)e)KK;GpsH@DIriSCi~0(B!*ls3{sMK*V^nX7{&fr-K>TKz zF+h|30lw2%4%bk4joN7`6}M_Nt=`XaEhWlDTeri~G+pSer2@Xj&BXz@tgO`_d-J-N z?lbQzAT5bbxC`G8#?qp+AmC%?Ol}EnxuyNPAhAwS+ww(oWYkxw^FJktU(;()HBK0) zN|efh>IL(4hYvQATCTYdf9^9T>Q6x9>?FaL@-40OB4Wf=$UG@CaA$k;KbvQw2xhdW zPBL2s^G7uN*L3^*5cxT$VXS<7ttdP+{6@19F$Jgt#1=<|R^z13 z#$*tRR`T532s%4ai72noX3pu`+ zHo0<9q_(@gT|utvtq6EB+{D1c+<&(yL&)Wq5ovKw!r;DVkoHESLL+BqIPih9?HUq0 zmrFE<*%5izSe}kj?mih%`VdlMl1^OX-^nVO{M4^^aF+3PWaJJBAwiD>Y20d&X@)ee z<#1;faQenw8A%)k%nSCK8R4Dq5JFGt?s4<1^77}lm9t>80M}%%WFUH74&?xn%Orv4AG&3Tb~Lv_O1N{!M=0<0THNIpyEJ5WzKGD014 zzKHc-vapXZm2ZCMzSI_Du!+}_q7tIYyam+ITj zPa(CJMSOg#BQDW-<)7c;SW3{h@0{`p;=pvYa)$_6P3{78je8q=NZ6nSCTt(G7st!}{ z&9;=HX@2rtHCYv&DA{Q!AN)hLD@e+t`>*N^d(I8?=*GnP>cJoQU`z14J`B zco_#wovLTcKST(2kH%iH+b5B@r?4-PF{?e~tF7l{osh(}H_cI_u&LDBvwGJN+v1Ab zbD36Nqxbc{7V6h@qp`;h1LRCg?f5>;JkDHyHqHqBgXFkzw}N@Ld=6XRQ z4f>YL%%bdhyASjBb7cZCvD35!{f`r%FHRvZTwS7qE4GB*@PPx`cZV?{*)v|!5_78l zogoP%6xI_~JewyTCz4c!cI-S|;`g|@WSqnhFIGY+a694cRveePywvZ0@6)ZEt{ z3fvbElf7tT z(#LcW54M?SH1~`U`1tojM(o(a&P`>u@th6Q%R0Fx^+Ac`?=CoP(OvV8Qx8PSy3kEJ zw#L3q#&79a=>#8CB<9>tQTT3!1Yum+Vq(1>{YG;?dW+YOy!<_e%qNI5KIMNIKFe$N z!6k^IW*rz0jZg_|58!x9x~Og#>R^8jNC!7cTT3 zG=!sD(3e4N2pJO3VIRYrA{%xyn|HO*@p9FlGz7c7+^ZdaNv^$G{75x#hxZS6r+-1x z7rrN(X^t#LCYPz56;tK1fd#MO?_42(m^0^AyOEx>Z8E?aw(5`#yIR`7aK|sYIO%_L3zjHMx#EkYthYqT)$k+WbVRx- zv-rrcqvTJ~huwoB!qi{?ovF13W2fMM{{ZLr;~zOGB6kZmA_ZY+z%_xlr#LTONcIOI ziR!#8wK;JQ!&6m87wT@Qlz&VDTDs$7iI@BJyL6nIud&r&OO|KP?C+nN=j>N=al0m3 z&^GAleAiMDuyL>D2cQe=N=PpW1S^`YDZw?+wtz4go(Psw{7Jwd@7m;;eWi5SI|95Eo+b?!H zM9AA)pt`1JlsDzFCB+FSSuTjCsHGK|@I^D4s>PFMXn)-!@3zoo;3X#*cXNt-B7zi` zL#|op?$&dMUZl=40hv)hxl^R~YcYTQDpc`9c}%P%(Wc&KeyYuxq+2oe;L-j#(lXZeIp6i0RNo|Lm^3?>gwwC!PM|k@2lT2 zIUcK3CaTqjpL80j!JeAK=)+|}<t#@+qrMi9+H@nj-D zL(97~>$E(z$qKjLb~Uojl2iX1uryybXcvSM<4PJ%Kr7RopY5<+@rYm0AazF1YK6y} zCipteo+(4UvL8dp+M?)1;6C2-lAx!1m|l6^@08$g+GF&uaANp*JTxSU4Ba_;3&p{h z=R6qlnU=5&@7V?pfA2kng~O_DRCUXw4uy(2dNC0FTjtQ zN#uImFb>3?76;fqp)?`@)UF5IYBBMs+92;S4mxxN)qvCQ;!I_G0!(GQPwpFI>tB{P zY5dD%)pKYiBlm3e%pEyLa9I9{QPLMb0jdH;#R zhF1%tea6k`!ZgWkW&b<6H(YgqavV-_1gyh^&u8vSqjA?OjCMnA6P?w9A0zy;9}@+f zG09UdDVEtO6K(W2mwFBA%;4R9sAU`k#9gdRR_oLLIML&uCO}Ld^B&KqNH1-7g*75L z1}+-UUUrTltlChU{*P!4Iyy?@h>VQM*)J0(?5`E~Mnj^hY$NOSHFOgcqZL4#&(MXi zB}?7u^5@+3jTG9Ob?k?d(Uk9YtQA@|NLM} zeT=Sc=D&m0+&p*Mlg%})`^m^t%5;3JOCaNAvF-&MTSR3I|F#+&Jc0jnxL`}qnyIw; zIYK_;6X=!agM?_I&Nh5{>lc5`9e6s4S3j>1+l#3$u7hg7u#)&Kd|r&!D?+`4i)NA5 z--9Pb2#1sCCanBwXHrOn$|id1df(sXb-LWdh9DGed@1(zBzt>ul&_2w;)F)8L%ZmqYGnoU&; zmaP123V2VK3#Rq^TlSyVY!IY}Z?)Y$bc<*9Gh@8XDiAllD0Z3lpHp`0^`?GfdtOUT zC%dOT19OS-li}vC#g~7a;@F`!3dc^tVf*o;z&J;sv5_%Nl3>yWd zPuFSyi?v(tz4oT)@8d1 zM&_LbN&a#}i#h%Mm+)B zRl7oxwK<8%|I2Me6^k^8`3ZPtIpG4bvCV?93W%QG>8>JRcdL?Ur}5Vq2lQflo!~k1 zC1j${QDmJYSPQVOoiy)myP}#Qp0&Bo(0`74Vmw%A-D-KQ^$Wn^`keRq(Q5U41yQye zY+JXQ%mp25hqC#wJjhCW@x-*W90+Inc5|tc6uxdTwZ6fU;;(F~#e$=N^E}k0Q+&Pl z3{BQOaX{^%^hKy~EO|VHGA_>5u7rR;WiQ1qlmhx)p$qLHz&kdS_K}*Wh1SH^PAP?uMpv4#D**q;<$#!ua+<}470CUqStD&o z2QtpQaEnV*1s=z%SR&y-hX;`qehCNexvp3H z<$?Q(bbMHUnbBM?@BaN#Z;5gpS!}8PAbtJ+9ezdiGA4`t&yW7sm44Py9oOV|@hVky z4$?p4n!zGTzxtYG8H)m~E7jWzJxe+O z#?%CWUlwpz&DGUNC>m-Gt#8=P0~*z=tnVjEV1zm<)EP|!)#0r;#Lhhsuui8QJ8&xl9{%<#asBb%TNkld=qr&gmL%M9RE79pq0`r<-(%({by}FMi&&S&)9xo zn+S>BtH|=@LSzO@1srel32&m7L!lN* zl}rl{C7aQdH3BMqa1$QAU|=j%GyqFW5QIVkY{3H=aQ0G$MNgj9}sP$obg`sd!k zIz1Ovqy=tED}9J0KZ}IdfJWC1F2xqvkM{b%*4j&YgS5@RwHVwQ>Ax=#_B#%I}ygqx{IIt|QbNB^;g}rCCv?2srOKeoFjmKowsq-k?*b z=uiu=Li=NzC%WRb%NJhg0uNW#g)yy|zd8B-??;LAw{)5L*!itt&6SK}y1!Ym&i>BC5PEkf z7RzF%v16Q=AFwkMD6Et%Hi6Ktjsjg!I==r6ytvcSQu5sW`qafL0FG#CLW-3w5T)3f ztY8^titi`l>-FF6-3zqFUYG_anioMpmzUA-ovb+S?9ixqGA~sR!59?{HE|~G`s75` zsdCu$veK>H?S^icGNNP{I{WZNIP#A+4A7bpJ@nEqj^JEAJ|UhN!Stu_-nbEyT@J-@ zw9z43rD8>BLQSP~Ysmo548h{QiI4u9VoQ=Pi*t7jPVGyRcXdq>3A&1`gkCK5tU1=b zW3(5 zvu*yq#?82OJ8V!^A%WZ(PLOgcdi@^?!#`LnZP6Hac6!9koh^S=6c?4uudHv#OL!j4}285%59 zibd>`c)j1@$wtOA&AhyXUAjX07$d0G0g7zuNf1B9mhZ+9ltA^j+CwRu+DG80z0O7v&M~o@-W&5XhJ%cGDFrjcWi{i zKCl-(8@owe(;{ApVOtd3Ya*skr!S-4@mW;E+29)Pmv;P-JgxiG?cs-_PHraNyRg5R zaqLbvc7G+WjM8aJUneP0vye2MT;fpaWv^eBhS_6ZFxx}qA@7X0P0e6vQ79w-0O%x3 zxjI7J``8nBQaFM|747e=q~rFO`ie>aDRR(aN#sKfgC73H@5his?Ikrejx778+C7q~ zzGJyW3SldKrZTY7mHMG&{F9W6h5T&dM0U^w<62*B20yk3-4t6C zE2|?|Ck`<+3zd3KK3v~p?q}G)l*;(wOob-Xh z_z|<6zTigPm;9)$`eJ2i;+qI2-^hv@uj-Bd6*71%zo-u{$vnPajIVLQWv=WI{2q#M zY1!Z{El18L+5d2szKXt8tCw$T{VI0TQc$e?VP89sVv0g4!(R44E8urWq4yD8S?)Q5Jg(JT|x*vD6R zxJVR#dXBOt$9emXwafGS`JL}Jbg+dYhy2rn)MX6c;inPwq=lpD1Vz6p6D|CAjI>`F zzv{OM5%Jg2zz*a>6-$yrvzXQ&bwQVi{O5ZgnjsONnr}TGcLKvSb8Beb3sLNEyw7>G z?~g)^1KkdFqOOX2cwV{KW18z^qO46Z6j*;{zZ`e<(YV+_fptsQuzeUdOiAs`&vlmH zA2xJKvj@Q4D`FCyEeJ753IH^uMY?f+%TYBq>H1vLJCRK#2srzKfbs-$g1U3SCpJlS z`)~qf2|03%3xna%3dbVYIYHqAI_L|i-rVdS^{+A!LJLKnpC7K%**rRXny1rG+A;d2 z>3jGm_QuCTG{)XJF2Cv!?T6D`6Km|>K4o*a0x8c(ID{lM#y=Z9#WpW&*M`@Q3oUc5 zO)q>Xt~%7%WCB28B@mqvOPHb&@1)Darfczmlt|90u9^$j&u(L~Ue7Qk_M6FTj_c;? zDg%Xs1zLS@)DRzvay(*&$%lqE9@8`i<%%u{K8=tlgW=(TfZW}n` zL{cl-!bfE#k)BNL%nfjCIs^&?n7chun7!gzbH2oNB7xez*n0%Pxs5Xsjj8wuA@u9! zmiK#0k)fgS1zj`s*}zUr8;mUEkZ+SJpNJf;RwGnM`tP%kP#cw!SKLcFywA8Rw9QxV z@GG>dmbeydg1zu>1<`@aQ$?QD5ULH@dyk(^uyWaBS{)_tZx1dDD$e#K{np9>{z*vf zNU#zwGC3k+AYT({d6s*u{r<~zh6@ogMy%h-{B9@6V$bX4xDLI%9DL1lHSpwSAD{zu zUyg?>A*{h2wHiLYESmxr2#+5{4e@y}`J?9XiZwnaLt5?1765aRBylzf-ciytA&1+;b zPNxANk+Ngm_m2m{bGwtb6^!SzG|i`R*b^IA?-Cz3=vli?5iKtW z_m*RYn?Z-!AV5d7$vxqe=dIwt1%`~W!4oVG>zi&n_t_~mqu1L+)#Ag5-U2nL0B}Jk zq(%U!Cl(6eakcs*`#2~vZB&)N?L%Y_R;YMKdpIzUa8M|>D3X*&ayk9vI(2s<<#eBQ z^SL-k)$HC!D82bKr{#o4NaE}|@AS#ektET~OMmI|3aM3Tf?%eTme`Mfoy09A?)psG z92*(F2qy2-vM59PW{dT+{X!0=Ud}ZH+gm69P5vFVGIsm(UEb_tPvb|IiG~b+vQALi zw6jtScvGP8BxJ}SRb#OzF(WwYN{yrbTM#9Vh2;2LVmyDSL5esJ%D$h?FgeE7ceH9h zsX;;P1z;S{0QCK>1L(mL1(B0?@O~_7iL6cB!FE5=@ z37bAjODR?n=cf(cx#VU`7DMH}LieP3n?#;}@6br`=7%(s{Eq&8TE>1({*k^rW;HE} z(tQHHG+7Pw10%_&mQh-^e84#YbG*IgVZ`2rRVg}g;+=yQ^OdQCl~SaROjt5RZm5V~ zfJXpD%CMh)cC9+HWN_2I~yANjP~)vj%yZ{6Y3& zsfGp)~e1k@frI5r1JIat{AecW z7P8P2G6qZOY#olO87rz~P{PCR)|V8hb;}=zFGsY5T&;EHL4*rl&SA56l6RgXl>)E5 z`K;*m-&nO=Ik*3Q3UXA19Ze}UmdZet1~c0Wuj$MrKI_g$GtaadEWJ5KiI-E^`6Ou1 zw@s^OnWSpASgc1fM^vpzT@j`3jHre7a`Mi63vY8z>fV>y*o8p({FqOUmvs}|H2jeV z4E|=yYS2g)rFII9zbweVIkaTU#$}I511hldChaBx3D7e%cV1C%l%cc6ij`sxnoOYP zupGDf_wj0Wpiy0_IuKR0wnq9sJicr4-n#G=Ytaee6UtfiAEgt6rgsi% z(j#gWUq(ZJlpYUJ>3(2i3a&(5p4Np++}FwJI5g-vAx09Y^P%Y-+d5)*(=gO5?YRX6WrSiY;z9n86vT)zyC+5m z=orp*%Lgq_uh*ND7ZyK*v+QTe5swD4o7$g?Z8+nB1BM91OA(Gs)KZw& zTDvA_+^Fbklth^u$&e~E5NayH?~Z;m#!j?3fAq&*oYo_>JQ@ip4Vn&B zO~_qZWl|&_YgIqf^)o5s3YhpeSSI?Cr$_Ki%EJDhP$8wrUQbX zsovw{8ogrH*bF`ng;Z*w`P`m)QBp=}AFAty{6oJgj@zYHn{B|SBV8+Ae;)-GrD&<9 zF%sBsTyU)h$L!&fT5^UnhTC;VtHi|Rd26Y&{FC(dFc34rbm3C`j2X~%|MrY2V+|8_ zFTVilIrL=Kl{yszGbxYs>%Q-~B}ky6>wdl08f&w4qep@QGnndG@1G)g;15S%R7h>) z+G?PO)5QWB-!j|`Y8yyDtXy41HZGek=^_cfkQ-+AKXUO8&E~-PD1jWXcpLdZ z9i5;E6yBNgLiX#HLQ`n;DUBWX?!q#_?wvQs>5(5n!3KEa`1_uS5XkhD#&bof~VthDz zyqpPWn1m{@;;mESTwwdV=Vp_NEAhK#X2+8n5j~O*zPhI7r>kui=}m3p(_I~xDmazdsjHa$AS{aa+A6#-Hly$?0dFWa`L?@c!8< z=q?feRiZgKfbz@&05IQj|AG%e0(c5qx&)t z2HE@v&{d(Vv_=tYBKg6yH1)xCjmyK!l6WtE8i{ARF(M^3wdj0fDsaWkvbbb2DzT8H zKjm}{1Wa-HI+<))OcgJM1!xI4lYP%xi#ZjZ-|RrPbG-b;_1^r5FjK68Q#1Q-me`U) zJ*tgK_9Cb(F==!&j3h@Y)1D_YgFV0a{oUd{2}Mw-uWr=n0KUG)js1SHK-g<4YH+F? zMibIfWZ&>x-?h8jiHNl@T^`33-#+8TdU#k|AC42>9gPvsHWWU@w?{SqP}#JbXK> z+b0z$19IBk@RfDj`BAEGNY)ySlL+CDL((XSxPpHikdFH0i?Pb6nR};64&<#Pqx2-Ra=%Iu>_(VZAxak!LP} zZKw}#PK4Z0y3$ckzqxUCRZu-5cfkd=;0XAh2?t_-SdJR^w?o~?LA{~$B;CM8#ir!* z7teg`g8TJ)b+MvGliZhwtL|M01w$zLxD`bTPEZ=ps|stg>ei!bPN{4lV4pY^<@w7f zjTmbg_Pfsv5qEb07S4wElly#Da?hwAwDV7eqfI_u(sOh?ChY0(8#I z`J4Vms>1(5s+kL{TzQS(0FI81bdCxMa|Co1k>|v^4B=auQ&YIW4BJxWw81O*{cx2WVX9JC#711eTqnruRT|2|n< z7CUJR-54|`ov(e5Y$u>31q)=Xad>(MVH)%VOEUoO-s%VdckVOT83T&R)%sNCm9B=@ z)pdNtuZhZ_^+(^9Bw>UU+$6Q{KHz($2A|XiBvzi>Lu%HSsl04(mXe~rE+us~Hqp%)brcc=DscUgy9;!);wrBC}IQmY7DYB zfRM?{=yz)K&_BPJvk95+d^R@SX*1V?*eve3dI%XdFK zYlnx2;%1v_;&9@cH-rz}1lw=eE=P^d+({OHq1gcw898wRlbHwZX%Em>!g3v-h-w6O zc8+in?4=&jP^Yi^~Pp?81*MZ(W;iUM;w@Z!|t}K0n(mS^i{^Ojn4$) zEq0kzlZlP@!?Kt|p5pxkjIQVwM3K`q^5^dacTNAi1g_?*>u4MD+v6w~?$rE;?hrh<+jR1sv;JB0 z&)m$cxh|kDsD7(zzx&zcLBSt7#h?m*uhr`dry`@=&W}|ZydQ(@SHv`}>-A6G4Y~wW zRZ)9&FqP9O8yM*AnWbf9x3+3dkk<-~eseR@$h2PJ9$4K+E}vpkd{;Ms-ZS$gbBd;S zwx9m6NjJV;h5+zZVuCql$78%wg8BQ$dhdGI=xE`R0TKq+4Zan-d$H-2KE(@q9KsOW z3r+z!weR6D^bLZ1QKriaEmA7CKQk<%!D`pH#36z9^<0C}=z&)p?AY z!LwqH()B0IHNm<{`4mJwQ6DO)Apa((g24W9`}@2vNDE0e@Bk90`i$`x-K zg{mKmO?MkcFNlJyn0TN0>lOQwm)_gsE1Xi(+59e}}P`x0<@4KfiR3xK@_ZD*w?tkHc@5`ku)KVq* zJ^aVTs{lrZdC&T~`3O36TDt;!tOqZW!}MI3)Suo6&D~$2+^B9UX-Fh8W}&yKO>+`4 z%lF(IY8M1C=Uh%UiJ<2#%Cix)oiiP~9Q%wXLCnnWW7~Nu&2P?y6NW@2s^i#qm8moHkgd697D*ng^+WU!_!3m_yx6X>nf4u z6(}J>sIlKMZK?)$Sf}Mbhs3SFaHYDfkNa^FPPcl$3Vuhwn)y}5p+jQhlM7yaVS*+$B0;M{Z0QB(f$cK+S{ zqq|&&$m5uo`s3%(ISt(9>voMq79R`Q`EbjZjUQ|#rScS@i99}H`+xQaOYnwu=n^@q z6<@Nh_P|ulP1+Nug^l_tQ-WGLqrJVr!DrKl^!ZO47qh$54OV$d=a9y|T_ND*1MWFf z&_v$(p_pGmvtjxy@o3ro?&Gm0%;Lf8dF?*4$NVq-Rnff%Y&Gv}VFOied2WDv=nsQd zM<^n_)dlundL`}wzY`AhZvovq`-5Pb1GoJ>r8xRBnGv@I*QOkqfw6@rBBMJ>{dBpI z`j`{wVFDcyBnAPHXR!&kSVw%Fj~Uv*|K-zk3@)Gsdo_rx681gb_oD*Nz1d4E^`9cC z3uly#E12f}1xYbmrUGp4;`>@^0Wk9Qgj1g%j9w@%8||xc{Oul3kIgX_-B(K6F3KoKIlg^F~aT_Tlr-X2nO$wZXgXH)qE)CbDJ^ll;d^ltL}1%lr&X&b||HuxtcLa zy%yCbAe^2!`V(?Vge;byq>?m@+L&iUlh0lU{@9*h`)S{yrFR0GTV0v7jLtCn1Hz z!&8vPyjS@-MYtDup;0O#-=6liiCMB-k(%NH|Mn0RX~=Yy6oY>8ied1G{&?LOf~N^C z{`A31dMAk`*j8dsH3hKP6RG$fSN3C~feYv${H3o5`eI5sxbls-B=UZHr^I~IKr8@V z2Z&hj(JK0-N@OI1V6?9&#aB_CJpa;~BI^OZJ9ZY&9PWEXIYx6#KrgX5&tS{R{CqZd z8q2C2KXopmCnXEC(eoCFSBv02WrKd@JZvR^I>FzWC8$@YroAcRg9YgY#g4MDE`TNP zIn^^eNG5j#OFZ|f7G9joMF91Nm8LIW9jOGtf*ct}ifaRXeins>ey5f`Ez+OFT^# z1?oLPk%*p6b@rubtj;iYXx7Kz7v}><2I2;`!fY-{O@1i>Y!hL~y{H(=sqt~ zcR*J4^C$hpIl2=yR$O}0fvHZ28b4#%(~LMtx}Wq>f3df1p9Q~2nukvv8;dG1(Lhc# zg`7A{PX00M;XBiuF7|@Z|~nk6=+*hn5Dn6k!Q=)>q<+_rZ@I)Sz5}v)H8GrZ2ZAqPxOr2 z)19suaTzxAE(aHSNIhFsKG3Ls<3YV*% zU?o5l);^5~R=FU081Q4Yd{ZPE?E*fD*MhnI*KqmZa+}IuX_FK`X-~ol^{crl9ipPv zN@Y&UK3!5?>1Fp>?k{2baVB%bK&aJxI~Bj`6{g`G<8!g;jb*2x7x-hdG3ZNA6icFe zJA^i1`*!l2)A-_j`7s+8p0U=)`uW_X-Y%1)*ge*pZCmS5BEJ$2sPs21g#2rN&3j$9 z5%T08-3k7iEZ6gwD7P*VrBZ#GWleLp7!UZW)EXILJaSiuU-Viw0czxe);`h~2Jx-DBymjV!~^bBDWhBE z*W_g(+Pc0jGoD#gm)*WGq8+!9kLnYiNkSzSFc$VzqcjAANJGkuK8blP%ycRur=&lX z&27>6EZa|cPLJv{G`}f*lEBm4pm9bP>sEXQ=#ChbjoLjqC)w z{CEv`zEe92mo;gP0P!5(*danpGKVw3j;}@1-LJ}3zJC)r$8x{&y3YvP6hK*dy3Z(l zKCf?G2Mokn_`IP&4mIIRyW;LIy|Mm4bl*QQT1m7ChnMd+r_VgMOE=!k1Y`+$T`Nc!Q-Apaj%R~~g zM+7L!bm={O8A3JrGF?^O`~NB;{BPQrQT{rsht+4qt|}1E(OT=Ww^;96H=_kkgm^ws zo;aBq91Z^0T1`p$f0kAiv80=^3COeGRR5QB_NQAo?|fFcnQyK}5&7Oj{$k$o%CIl@ zHW)ixi`V_{clIBm`7e|7)y8oM%pTzG3`G6k55j-e-!{UJ6yJ8S*gZht|N6Uc5hLNp z_=NsXp6h@AY%Mq9qLZ>IVRWhS_}sw2aH;wWW|vma^L((DQPh-XTmc6wO;@Sm`HPa5 zy=FJ%l8s#vBXQXG!c*?2*;hvkX7ad%aDR|W}ex+#6}_6LR5KI6W#W2NR`TK%aOzs5mRp0&~J(CIU~95?)g zn2$6?OJ(9FJtfI@j6$8Te4Ov3i{5Hqje_+4B*7iu6LV2RU951s$8+?lFtU$s+#e_Z zI-`FD&$cWqy4L7=*Sq^IWCKZ%sJw?gc^)5*;YDh`r{BUG{{q8#^o9Ix)Lj|=kk zqg;M3DEt~E!`-se%c&M3J3F60`1s*Tj*D+eA*x9m_Bf|r6v)zA5+eC?_H%jrqHB;l z5udXH_XP9vfNQc50Ny$72~nDTAGrQ&LVvwVZWA%B&SN!_QIpbaeL@~HL}}c08PcaO zfVT!Uv*h(o)^ITy zev2;_l+wYM+t@2-gr%%)!LFK{9Rhs{nG!9>#`+6NO{C=ct?ZXr9G`to1p797@`?M+y{U6$lw*1|X`@Ro+2FwM5^-=Bdl^0J?q$K6NC zb28kTj2SRY%oiAgQ{!3-FG^(g=0{MJP%a{ECp$rP)|(w0(a`ayg;)$a2k3w9+ffm! z1-|?2z&%%UbQ{?5*bf|cECC!lv9)DAz7xB;CY+UT6(~RcYOY>5SJX@XCs;uOI4i*? zc0oX?8_XaiRCh}67wdtj6r%N{KVGfB>5#(tBXPsqLHCAS+~Xyt`h1d{{+nf=oo*X` z>Z(1o@u*W}V|wl6G8h1TgLl^jLm^yLgREzkhf5_%-0e=#qan*P72~w({oUt;MyV~z zu)%7XIQaukb_aT+fX!%^Fs!AJ_!%R?v+t<5k0$8!BGfFG4vpvgexE(_WkvO5KZIX& z1f+DXrxPC+#=aUy(m6`TDCO>Z!fb=yUGmWOmGEbEi4LOlXdy<}>;I*=Ua?K<#%dW>{(r`s9_v=YMb&J0(x8 zxY_@mLb|W*Xxb>hV}E|Y-2_1!4Ffh0_azHt)SZ(f zi54r%w+plRiO~S%yFCUqTeif!Vgs<5b_u|3`EF-o&ii+uw@!Tx(qI-@UibM}dp8Ew zgMJzz-9UvRYM`7lu%7%41kt>LeI`8;oEX(@D-_eilSEcI9CFE@rek)`Gy>dGsVBh5_f$;l8jQo| zxqKz3{pD|uu*vMdH z3C3%ooD(;A`?RMz^#IeQ-QD4H2>X(48EQev_IMB>B92k>PhHm^;Yw=B=b|4xZ(TfB z_$36sXW#8S9D4FTX~cCc`E8U4!(Sy$tWu8svDsmkL!{SbL4r{iKX`~kQ3YU}p_IP! z^#ipkgRm`(UJ{q+#e5*u{J30bShK8v0LHC!Ml387DjHI%AP5}^XI8APD}TTk^b3^9 z!=Uz%Gzk_X4lFu+I}IgZAyiSLi_BK0DH*a9jZSJ)zf=1S^v6<{EO#T&PMX) zKpNuKSA1bLO@q?&6^FW?{ly6v`{B>*#H1t0n zUgB}o8lQX*qDD2L61}px7+NDfks}=DxM$jIt_2_i_Nu84TWA?6i6ae^EM^BTXqa11 z<*MGwL2{KcLBC2o62PE0wP8F`zQKO&$oJ$k09}au0tXd$vvPgPRo^V6#7cy5tm2gU z97=TYOF1HCYSnA*D127K3h0+!z$7_ZPiB;woB0bKw9f9V?Y|w$+xp1k!)G{28`bpp z0)P2;79~kaT(EF{Wp*FC@UL`vP`Sz|Qd5a#Dtim1yrkRJ&yxV1t(mwyyocF?PX%g} zI|IlIYZztDCmw@WlSO_mGnTAB($l1oTFpKQ6g5=(KP|30^VO4FtH_~0$7*DO$LF`b zo@Os__Ky|d4(GFHDOz;X3la3t=MU0x%;EvjJx$v!{2HZ zo7h(#?0rH{cHDWL%GxZBj-zHBTakZ|+!buaf^0S8C)ZgD&aMW60zmxO<3dXLyBo*L zrTHw{G;qa~LH3uh7?i(FL=~67%oBU8DZDV`pBvlB`zQ^$z6Sdy!j>K}K@aWMlb9nf z4&OiJQDF^%;>f*~5<&)7A0tOe(G6C?SEKaPJdtNNA!AWMnlSF03kM?~&;HG7Tr=ma zw8#(`M#UfU>BfWU`y4Jdhk!p%F|43VMS(;Z7gw{Q_Q(*>es^$EQTm0LW;n6!9S0Sp#>scVr_o@4rnV6f{we=-H>SBR-%669EE~oRjK(p;v zdvOYwY`%@|uy5zQAROP}_9O(1IjZ#&$Bgq z9dQR{!vU*Tl>O!mndRU+}e>~}ob(ll2dZM)*EkISt7F^#bA4cy>Vc6GaYzk4tko=Kl? zwbl1dXv{~2WdOh|_8&pEJpnHhqkQ^ZAVcfnpK(Fiz{tTJ4>yxEg9xo1wPbfGyyfe) zV`bf5lOQVw7u(Dx^DAzpo#-glW2o=9w4npCPf-W!Tmu5cB_Y$0=kPzC&8hK$&<qYd2whoN_lq|U(SjQ_~V>b z`5?_`Ad>1d3^VP}_lMU2WSZBUQ)-y2NMG=A26dGyo4lW?j&_sw-QHhD+Ai#-+Jn7tkEY}T zFJLIpECg>)hm&1r`mLP_3u3{%(yYmmS;c&U`hg=>wl=KXZe_}mCsC`fPrK_*Tx;%M zueHy)&M4N!3@5#m>Cf%IwCxUlp8?qOq2K2z35%B=ZTrgfk0PBpsjG4n@q`@B+}t92 zry99f`Ctk+oLhmim=3+HCBrpw;O9u3@FKl$5f zW5!zjv>8g%S=f7dFu$J3A;hVgF)7@&waH_5X;!;eH#xOdkk^{bGkmW4AGhR*0B4QG zo%i9jJX9AS39Saa4B_Dx!idUhhHoL+$>D&4zfUD^6fqz3y3N_8$(`n96q8>X}YM0t~MDSDiO z?(u- zh?gP@`bU5j2lku=hl_ACYfHuFp{yYt)vFWLI+l00U%k#Qw}ZCbkN7$2#c%aD#R7OT zGJKIQ^~z>H`=~s8kZU!C^d3h=1-qj0AOrPFhbwJMc<=|*ZtK5T$7Dasxd_t$S@>S) zbsW8ZV4WE(^2Xum8R!CB*uXtjq85Hkp}^H1E(J+CeyH_k_T{#8pB;_sKc%=33E`uU zoilauiC?HlYXgeJE#EYP4Otg5@~gOEQ*l}VyK$jdKigRw2?4Iu$S9SjBt~7%>4DEm zy5@(97Yzz-U6nJvT=?>>Y~>zSq>MW>3}!!Y#|ie{g4ixZ7i$EWXe$Vdl(#MkoQ+W<42s+%`_Fgb`xTy) z^nu|^bI+H*Qc=?y1!==P_f_igD?)Ggrvm!QO2eoKKMoF4El5ti_3oF|_>R=`v|%|+ zVcN#}K3i?Jlk@Lg*#8jNh`hW=t+ZCEo!*!ci}&Wp5J3k9)8E`I8jnBFDA1DXQu&_A zXC#6AC7`kT8d)E!uyIK9#BzyFxBdR(nV|c}aAVn-dq?eK`6{h^R!;^2!t$ce4O8vS zeU(C=AU8U?X{Q(6y%OsUnNW?0J*M4)_sYkuDR&9ES*))CwbU&zES&DQ`z}^8Ge>6+ zA1nSaQA+M`7%P-TsG^?;3B=d8MPJ;#?xBIG1$L}3?Fx1RD#*4WHCUo_;d3`)c6tb8 zrZo|A6*+sB0>=7jK~a?~o;+?H6P^sx8PAcnBtGL$-8sg$5SgE>v?552w$>7$y_l4%V>c(Iup?8}kqcvWC&w+6;%A)GnT zvaonSFAu9Kzq~A|-5dMH`)b=A<;See<^Y(nmFZCx*B8yT3Ar`bW#jt}q@&T)6#MQr zYq^T05+;6Un!XOoBgGuMP58 znMmx;XLr#5yePp0(LNURe=J+%HA&YC$P3I=Kyew{SQPp@lFa@0cFs;2UpZNC;-NS& zy716r1Lm)QgD8G#czjNIy8Sb7KNm5v&C}ljCC%42`ST zGD}6CgS5yAwr24^>zT9rXb<>$syxdZu~%{7=wthTj$D#KYb7j%we~56I9MHHuc^w% z44g?`e#6pmB=u$P?2d-qc3Y`ol-2SMem^YmV0 z>1uF@ODMF3wjNJjcUi5Vm-Z{MA75{$MfFjJ<0kvl`h>P89b?P5f06D80Is}4lMd+> zQ8pclOid6Tf8yaO!noR74INr2-z$b{>jK$6l?KVBW_sQQ0l9AEa1=Plz1Z(Gjcl3J zK%0cWX-1v~PP@vCDysb+DDO?uNLvukf*nr}3-MYF?*c7Z2InreQPZ&w1|@d6&!HdF z`BFexcI>rT8K{=DAvX6WHc3h=kx{moEG@=8$*UdRckD`p>ithMv*TsoYkW{2BFKjv zdf3citZn7r5QLiW$EqEKdm0ZX(pBVd6+UU>UU$H6X9_Xl3Zdvv&GikSPqDs=)uFq} zDBOiqSaTYxZa8OET(>wMdEySAJh8}Bv1zSaP35g7CAWWmR4nZCTmm}+utvJBnx-Pp zV}|0tDF8-7w%1>@>)&7XV-gjQ2M#%9U{GxI?_j}PM>E%GalK!d`UAeKD&D6N<*x+` z>Fw=ky%?{tM>o?7#t23pTT(+|q0tF`(QT~4@(IgH`!*pFip{29S}nNd({)rM zf(RL)v7?flvCZ(-V2H-n+ydEmA-|~8l+jhI8Haq+ud!EpDbYiMlD&fUXll!Gr-^qI zDo$F~imKzUNhyrvNFb6}_+nmmPRk$Aj#5T-hC?{x z@Am74!>>?Cn;K+aRU*MYO&^&blw(=dvJ@pM-Iml@Ki8>jWIhmFnwj`ABvS}8|5A=^ zthu(WD{_slB({!R@`HV;77@m8e(_v|bfr84%4(MP#Uw>q_ila>p)Q3U{UWOGY_fvZ4E3{aI#uls8l;O`2%702{Ih^FI7DR`1~(Lvj(G9~rR@aNK<@ z0%SV7h6G^gqgO%?Lr}gGv&^H>-!1Z4lj1_XEu4-{pEi67Puh^;Pt5+yxBpiZQ2ofsj7G}nf2v*pP%*<%cPI@PX35Y{n%l#t&y|!Oq>L<|t zpqNn1*z)JV9-1Kw8c}mciu0;z*Dg|=FCEpmA`b?qRhRWA8{ImN><{XbVe+(U9~Hy| z_6-Ik4z#~4lzM~+RajBicO^YaP627|BQ{Oc9Gl;Pk?wGh(Pbydoti-gz@DcypD%BZ zmnzLP9S^L&Ih@{>cev0)WlGPO=I!l%92S$;&V>-=mp0LXGkj-b>HBI=qjn|}0q)Pi zi&}3vSw`SrsGB57Locjw4R{AYaeZZaN&dc4POo!aEpY+yxz*_rk<5CD#=U6awn=H+ z81oS>j*fT;_?_5MaJ&_kV^Temy;!d9Ep*hxyXmqM#w2sOHM2o4*z5^us_dNeC7y*N zN`o)!{EzLrE5`k^JN=LdChU5_fqFyD9|C_Y^S{*!H^L<9-*d4V0{9~R9Fc7ZIQQHbQNM4uUtBe|KxFEi_Ktl%$)4o zzRa{JOJzlur5GS(U=krY6d8i!_~&>^Y!`N_SLil@f_mZ8tsbW? zvp0=cyJclM$*nG%G;^vsH6g#3qg*7>N6(V&A}87L>IP#>V8oDhX}vE?^W-y?Z02JI zGR_5zN)ucl4OS9gP*3n2>b*hY zMd=s}38d{U(~C-GZN;ZAj~P6e^`=@q@2g8`UyslY9dt|_XXn~mpWxR*hi}mLqhpQN z99Rm81S1jSZ5UgB&IJG{M+5p&azx*)K0F8_Zuk|%D{V52HonLlWfI(OkH~0C7?%`s z5YFG;QS&xe+mUcz^<+;U3AF)9qwo6fw3va%T>Y} zGWZmb1ADW_PT!h0QD;O=h-2O?r9Q{QtpzS6l&=>B1x)M5?dtXqWA8uU3zyUS$)!HN za%C`_%x`NvhJpJn3TQ-r-9YsA19tg~Ts}fF6B`%dPOlsg7l|1-%U2nS^c7-}V9aQX z6`)P{_!i!Rz)t#IZ>mE~OcBcH$cNh{fy?dKR#n9=WZkx*I2#DsMaCQr@er2#g@6Fv?$3T(U{U4O zt^Dt{MpuGwJ$_0Xujo0tiSs*8pcsA|{CVLi>q${*L^!cX6x4fF@zyU{fJBE5=8jeuQ8-Z5Y47myYfeeI^ENDZ1gc zC=}#UW9AHk`4T0Dq1L`gIYCx%9nfOZ%gjQGcrB z7>25$)_@`#6?~3Cs;1u*c4^SfcIOc3?eM=H2I&nTIg5njNQcBi44{CGw1TaAkSS+R zt^A;W7kzO&jrh&1pWOFHPj6AT;E3*cI$)(d-4uk|_C)SRwf{^`!|s@!8?_1kc2E!- zq-*rNf59TPkN>#Vn}>x!KAXyQ9+{~1Yr)ygR5&F{&;XqYs2~HEz~r#-EvM1CXBmy9 z$-p&+C(OdA20esu@rhY0@_q|G%QE#1zA>v#j&s^(?qvY)3(VsQO--g|dBIY$BU-`k z3TiBj{^~=|7%FfK`sq3bn>=sR=SFBf*zpdB!d`vz^3kGIltI4ww(fYk7xsrw{IP`r zWO)6LXkw9j`F~4@nGhaRn-udECsq*nn+_;z{ZmfO@BwtTeBXWGa(aCu^=r*FWALlb zH({SoNpw~udgMal*Mdb3Xupe(6FRo>F3L8jg!=2ho9Y>4zv4t^i~gRxj`sVNtIJ+B zg+`H_#9O;dDxsT9b3U{DAM0podMruPtCtqg0PoJ&7;t^p^aW1X&SMjK-edtm=BPY_ zAm4&~!@BqE5ok@?nZWlGQcUZoW#G+`0$C({EdI_3{00JORU?B4p^@+!9dt!xDt>w~ zrkAD3U9L)}>MdQ~SeC&j3;fe43U;}9bP*%R&ZqXHV+0FL@1 z3(4JrlHcA5`ZU~=X59+QRDaW!i#N^#*X5EeE48|PRhh>*KzAtWj(&O?rG$2)n}^&2 z4kjR(ek~y`Xbc!EF(Q5SYaF+3hdIfFPnCDBcYtOSmtpnkCl}FL_L)2-D^kwegBAer zo}sF(l}KQ}(;l-TN5|oGK<5$R^6T<8^f2Of_7t3mI~>JpbFOF&pY^|s_^OAM-#{>P z^2{g8#Xa%o@i_N)4>D`Cpx*1mUk-689*#c~U%f)g%e6|fu#A2W;j`}4>|YDG^MPkg z3#B$yhg-Ymp&{s|n)O_$eUA zH~Mf2Zc2K>uvtC8F?*G5dJADi1Z=v0odA>{Dckz(xR+-82)pX4>z#XZD9wJgtT46b za?5q;sHN70IE7YhCjNQoOLq%LVDyeT9%Xi0%K4Yen!|lrdz@wY7lHPN?4EQ!`=6W( z>E9cHoc)|yo3`EVX7)xW*Z$Q9kUZk@XXRsR7%%@#-T$SV))Q6|>jbh2xHRyI`qhVF zq^Yp$P4G2#RYFriUIxf2cE$Y$jsKsm${#)l9ZcvkyCNI@*9fpB(}8)?P{5?drqn&@ zo-R0@$a8%AdlQ-T_+jsMXzb8f(ALhTR6vdP@pt466cPx4KOve8UAhFU@VY>(Ya%=) zU`>XK90AlF=(A9fpOl6}(@(S2XC7wIt-;r$B0hZ(dv1jyw6%Fxik9a`l+YNnr;#b_ zmd}9$KhJ-VUwLQ|7k{)|+eF!8ZDN_&3t9PUV7JTSmPJ0`;2C_Tej$NB!G%ez)A&~@ zR^>mPxK6}SK${6G_<<*ckYSkill7Tk`P_q30n_)j&^}0})!?|Bp zlalu`wWj=ry7+pbrxweEsoW!sK8AvT@U1UZAT`&DMilS;7KK@F6fE(!3DfE#mR-ptv^4-b`J_NXveb) zXUJUPF&t+O4Ngsz9v+Dp+d|~CWpe(7y(iFJ!mU69dE3@RE)$bVKjJ_vol^iloeGqZ z4<>2h|6tx5OQD9IZO-~3DrwLHvZFwXF4M?-KF<+^J9WaY`3+sEx5|oxWiqaQZi|3* z+?DHGp2-rqK;5(~^NW_Yhk3!>>7V+Toste`+G8bBj$88*?{Msr@%9U>eb^TbFc0*X zFv>;nY+-Ik{Z{uz$poNDanMC{z~Ox^9^?mJk)L88gUQ&aIHDSrdaL+G=Yd_vWqCe< zjvBj=5c>GmUQ*UHT;NS5AEtgv`>xYoG4aZw-77dk|M(R)U@@9;z=CdKn>psv4=7Qs z3R_z!EFt=BW!w%iWLbH#A*~i)8Z+Ty`7EG%#{u{O9n(Ki(gz?p4)82?*JSb(ogs3ut8*MI0$bLm0tQ8c$X=ZVAC9hV zocF7tr7EQhl;W{vRXnDAS>J0AH9Vr5Me+_+7}y$(gwKQMvYZQjGd^+$!tfe}jGu9B zrL_BidsVjGxhmdwwn^)&UFSaL^-FYOKXc*xRqa;lz7$HnJNgxsX!AXJT|?(5dzPDs zjJq^0|0YCAQS0}dcPni+USo&eoTr17zqHXJY9^qk&;h$fTCE)Scu9(Dozr!w175vB zY~2c_-m%Dn5VJ2~rL~yAqK!XDryuefD?}_)4dC-RV(oe6ylnDkk7$+J;T2PPFU3Dz zK_)dhpb5_cF!;Tt*f!tRiK|^t9HK~8vZMd&azc`}g8@T>-fAQeZ=DqHt7dm?7olfA zbl*6f41jqY6C$AGJgkEzMgpcM#Wyuh;?XeG>B>d5vj%`zbibSdZQ*G44TVwK&>L3p zV3u@Mi`1_zbUGWX^SS;B?MkR)R)%!Y;0(~6ITv$I@&rljc>YD$$kz(`BoOMIz=2y6!9imyGub`R9A{!noDHtbS8J)*>)JA- z?}u%WFOok2t{dJdM6dS!g7M@WNpDWHyWTe|6(h1poT$>SHpjEJa((DYh){)uXBt4& z8u15aML!sS+1V70(4vSNE*`P@>oo7sm?t@$?qid+;n+?JpQn4AJw%kUs?^uKe3UaX z!2JEAsW~-ZVmZM0y+|n!di?-!6ZHsqVaQv~6G2Q;xB0+I9vDz=NkKN8vlX`g7v?#{mS048%hl#IXF zd#qUugpR}9hXh}#m+CCJr(sh*xX(6-70t4zRFd+B*%(r;P^~f2h z_$41Tq|;!I1BcQDmkfS%u=ZaUC3)A`1s0B6NRF<)T4^3VkeH*so>yd^@I?i7$g%Y& z=%!n2P_s;~B81*+wqE`=pDJFg7RDw#m!UX;xnaCWQEhBfvXpNtdEkY$FV{dutak|H zV%#h9=MoxeeMclRLCZ3H_YEJQ8IA77Ipv{*DQTF`t^nKyj^zyg#Ksi4oCq?5jzNg2 z-_pTJ?$R;xDaHa&>nehfrJr)~$vcDoT#gJW6IlEOTQWEbmUzz$905TC&ZhHOw`&nw zo3fY)F}|FYI#zM6Zr>RnN{34O#7S&GqzA8qY)cFT^+51z$EUwZFw0;@J;AjiH;KH; z|EijU>~O@_${P$F7Pz0KSL}p(7H2-L_*!fXxdp zioizJ|DTM&$`<{qZd+>{2k-NTw=0dHpZ;!#^LScIF=c=jUuS}^R&HZ3TEbZv4JU#i1NB_I>tL+MM#~pXt6fXH8c-QvUX0>zlAr%Su zd;ZC0W$-1x!j$up03IF6`<0N_*n;oSfd6ejuwkCt{1Gff`)f|Gg-p&Ny%QKA$4KRR zC>zK^W%$483ef)R3WWdu9Q6N7S0K3dqdo3S+pKFT)#$*0WHdW7)#_`)YvUciG4-}= z-h3<8L{1sm27*-9w9)v;!Be}J_1R~n{J)mWd<}xoeX|FD2Fr?*C{*j6h1Se41#?q$ zYb}~>?F?fQeZ6PvS)lLAWh8vhayjruj9vKkcBSu(8u=iwQCl0R@hh_%pl4AM9MRrN z__{d^c{a|+GPl3{rbPL8EKj!HaP=wN(NWNo$Na;WC^?|o60%2;23U)dl|AB&(y`Xq zC7WEoSf(fHeRxr;hf~s{g*R-V(r761$q{QaA;(#>EM)hByLmY)=8HF@ZsqL`Zs*y& zUe};Sfy?MWpLq>}?QbhW4}xc6=X`Pk%hY$7rq#@tW?7Z99qX2U*;@3zc&z`nBD&X8 zSA-|vci6#V3=h7lh)GdAww+XgG8!-cyfcTya?U>ZtHBGiWMAjPyzh&w;DK3o(RUP+ zOUp~lRS<_8|h9EVFE94pO2sU6Q3 zQU}hoi3!ZHQ7#q|dz(HW4@69H=CIL#_FBn1S=?c;p>!M+wgeArp|8B&;i|Oh-Eu84 z-;`zmzt4N;JS*?i;emx$&?a@VBT2HysInY5T@9#W@2A^eu4RA2)A;3EMD#uCzI2a} zH^1Rz1H8(6h<7MLvRh(r*hf%Ri2}Eyv|T?l=Cjp{pK7z?c@$}ZhxVc%f?gIB9Eo2D z`g|D-nIcmw38ACk2;qD; zi!~%u$XoW#smH&jln6RhYy2~EL*jPXkG0OD=z4r@1I~7Q>tf?C%Wi@&Y<(^s(+8f6 zU9;td2kQb(m!jV93k_)Sj?9Lw$0Aj3j(|oQ4c`A-h}$SDTv02eFi^ASi~~8*%+}kkrWs&F(>w1OyA(6eMcK{4&By`shumr05?q zhXKV{3^tw4znCEWtc{A9fa<--zI;0IkCyMh#qYn-^tAh5dTsA#H{#>IP_yyTMLf5d zJ|qb^kfchT3GrY1w6IgIM}VGM(zl+qV4#rT$uI&;)Fr`EV%fyNek&X1{O$n>3DWh6 zSfRYlI{QngW_^ureGQi47wLmmo)AOH+3|p`h5S7ueJy0gb7L%&#r#Qz@!qOUWj#*> zNagE#RMLZPiPY6jot`)%qD>P6&VA*@k@Fd&dg0YyBDZ((qxs?}H#Vfj(Gh;$@C1S? zAky3Sv1o$HXeN+@sM_022U6e|w{!Qts~ev!@UX<9{P#4&b@#1YGHmTcLKnDkQbHE0 zEL+Mt78ejA=PICj3DB3C=(&rGQV3_6f#;tZv-jKIW;eeh;z!`|7Jv z$X}Uzn{;!6&cg9i@litXn6Kt6()}63=HRAyXqYZgQ@Cp7N}4F$mmuUk`-s;c#8DeB zrypxOjj>Q%p_t+wLHu@t#M-$TG+WETyS@0j_Xh%NHTB+eb&@T!qzmA0XXeZ2zb5W@ zJ8jutVSufL>BG_evE5|(BV={2i?&(nwEbwMO}JXS2T#^AJfOlEq0g!Z^-RWo7f>4T-+-7a4Q`>i{acy`5~fR$pO?V>pg8RMT-DbQ=MQb&r!j|SN)$`NrBi1RO}RqWW>+ei#a-n^x~7hIq_+7y~zMn zt#4(qZfv7b=Lh^|(DpAM2MEgQ0TKJ60^+#o?;{wu~%84nJ(_dy0zg750xPab87zYQVzQL-tofkz&4@mhizd<&K zJp`8j3C2D?YfCH{zqBdwdOyQawMYF|=p>(eq{qbgo)y?@khQ**Q!~nzD>LidMC0fW zOZ=<(zTvo7|JVU|7nqtxC>D7MDLWQeS^#yGsR8v&?63)gR zcKc=eTo+iEMRB0#vFe$(PXPEnpgvS*vS*f90Kiai^YV?lx56UM{$9+nZzKqVW%@Am zU{^f+&+4>Hff86|cAR1i#>Vt&wdZDSMK#m5XSZO6W(sLjm4gS8i%7Ky68geqUQpS-A}9WcfaGi&xfCbH8pH3T1-1Xp;U#NCwScbGNAAVWur@< z$e*peveOSc+_lup!b>kGn|wD9N15cE_8qn};z7}zk{YK__eQ)q!BKqVq z8SLq(%8r%#-xk1UeU975pg;f1opn;axD$E$#~5isgrhQJbnaF|eb4j1e^4T{X^4V( z-q8y!f0XfoHc_zJDH~jV_*z&>%5_p1=mI;S61s#B{exUCV6g9T-eK_8XtbaLwu_Kl zc}_5oZEr(jIJ9+2LF%|6i=XtnmlWq;_FGuGiSFcUbQV%Bou&|mwV(Hye{4pqK#43T zPzu_455+q!GTfp-8+#~LQd2EFhj#sk4Nub1?M}s^d|E?p(J<=`?s%Y?8ee4KyJ48O z+EHzY-U>$eKUjOqur{|XTDX)J+F~sQf)pw4?%Lu-i(9bb?rx>DP&CEe-Q6X)LvVL@ zCurcNd!M~e?|HuG`*nX3R`Rab=2~NpG3OwuW|q}MOYn*?#%D*PO)^gr|4UlSFDgqL zdy4@S?5?X45|4WjMFPw@{A_{3onQn{6jV$e9!+#8f9X+D1g3Sxkf=+S#n%Z-mq~rv zv9WNW3v}{1jCsc47@M}Ja&lF0FHGkdSb7WSZJZ*~%rW7(Vf1B-4^PHSUV1i3N9`n~UFoN{v0- z2#-TLbSCt^UCa|^60>P##D!}|W_EmJW2+x>nP*CCmEPU#jXH?$gGZ(Ibb_y7c>IU=k9s4u z#VsK9Ey=6YBcGPi2^Y7UhaDFlK-E$7w>6*7%YEfn{YN<4F7=hf>aEX&BW_^r%Z-!w zg!z`!0q>X8yo){@0xQl?sGIHz_xR4fUxGyd4)qW6qC!@0lT3g{-hiD)o=1-v7vITU zXAC8j&9eoXOxzzwY?0vNv?RET6v?wCk*D_@*aW3JJ=+r#f?miT{On!w5^OP|sRX%? z)^E`emo%k^jYS0b*ES_&YUYWS@mL$?n*3{v7d;L86c7YrkJFut<^fbp?)n?$3{q!> zw?QE|`{g|6;rQ;JOv8-|%>kUe^THr{_AvuLCyA!+zSx@lp7d)PIwfTHq;B_+?x;^7 zz0PnvtH~V`1x9R6Misj{M{c5gC6E=I58fIhvkn)eRq-<18-^LjnwiMjJiN=#y2s44 zijw%|j$vhMU!K3Fw+IxQw)%XyEiG)dFr6(vkEK(KuNClhlqaM4mp59GZob_ZbSBG~ zbFd`;5lOSkD{N4v&}LdYJG}XwNB%JP%nHXGdOQ>pLN1d6ZJEUE@d<%+F)A%f$a&zI zE)nH|@*c?`jH7oBc1PvDF}Zm_PpzQ2BgrfFeGa{eh=s%OSUGy__}O$`Cr~wEMoUJ| zz#0nyQ+)ZI{Csd^aDeyu=QPVhctn`QFw~I`P zGt#b!=VZ1DZV%Uk%1fOOAyYm%mrt~9j2bf_6V`;`Jkz}xAph=f~uR$`%=%x%ll zCKqUNWcB9)RG|hJp57##$JdUy92f1*qFbMXZ%7>w8}%+iZjUf;w~bnAnJ3mc@Dmz$ z1TxRXO?yokQ9qYS2;6y{uDNwgkDBG-Bh5_&S3R%Rq39rq-+R78mNQ66G48qC+oXr- z6NoZYBwEL>(#iWS=v9#Ispt~k+TbrCIbx>^yzLtSq+q8&ziL@ZGzj$dqdv;(5tLSm z1m4}nz+ZNP{?d85L)_xWKbiN_Tbz+CVC_!YJx`jydM%kQDM=M^;U+6EP-@ECR8%C&dq3%-8;HWzbOs|CcU!@qAz72GI! zjJ|u<`#`kv>yl`>4c%Elcvx`kO5N8Dgf?@;QbSP#ajkK*3lV%Z>f!C}6WicJr(>FX z_26H0{MvZR=!8#N9JR8KM8Syk@ZFLRZEBsVN4bMO4%uZYiy`BQf`L~6_mRt&oOh&M z^m=j2(!P1JBf^vgsjwVbxY65tndPFeY(}oKRI<(G8==xUUWG;Wmtdmn5o{#IWLcS2((;k zu%=obFJBJ#@1XQYbs`Q=d3W9T6|8mX(&jy7(`7v_PA#-?HGiA@?y9$$)t;y}+A8qy zhtEi!Ynz6g3cU~9{xFUi!7eCg<&DB9Mv~G-8Oc7yx$$}9VmQYrMsw)xwCioaFD?D5 z*@s!&3lQk>(`o4)l(-)$!OBQk@UfB|K>EVBg<-@Sio;FB1i zk9<1XmG(1H7b=u%Jse;OM1q?HPQ#SY{$t;K(6a%1_(rk*k3a;Cs<}4 z>K9psiS)XB-gPM(*xf?kjol<0?8gQ?Z1eH%3uantDSXEh$TFJHM8uWgqq=TO7oi8> zm!0Wp&j!nKm5{1}dq4E(fU6|~7E2`d0@88z#Vm)i>-vpN44LQ-pcO)_16&Kk@W5p( zv$fD?R5@xqoqCk{0&oWtE9O<#u-wmFkR%Ru1nsT~^juiJ*~J^j2&?H_7$4$*-p0Kt zR?M-kR5x}6$s&o$#E0L?YvZg6vn^?|M8BDjF@3aiK7h4AZqe~fD;6u~7)@E;Dpj5H zk;j5Xl(n}~SAnKZ^85Q#Iy#ZnW9hP;-{mCsh3>AbBNxN+{KH#>2%~#$9+}5r+r)>B zK&G{Hjc-TMjBj5pgF9|lvy$^N#nvBI%Csx?Bb{MnP(^cgG*LiEppSf_Zj+tWw@0
  • B!aiA=hVv&Hw2lLL&rj75#n$Vq+Ew@}@-k{wTKC4)sM|o3cDIF@ zo)&B73!+9dkj}5;xY3w)q`mfC0>!)?lrwHZ|6UfkX{tFl6s!%3C#J*rKv#p}CBukZ zna;)?oA~|pQBrc4`!PS`;;+r@!~Fmdgq@~pIQqb*?nS1@0NqEDXmac2NfHG7@8y$a zgnp`gl8l1_f1Iwl3Lu}QsVV-jYq8ZUWzD^9#c%ST@l;&mmdo{!x+?vkIcDS67_LT%J#g8jktS%OL%IPo{IicUiZ$ zW~|=-ywP1mK$GU+;K=A&uvz!K!~}kdO&p46+UB%FOBr<2=X^CLg}7z|$m{IZtllRl zcjYKn9ip{Xh(CAUm|DbiT#7WFXDOw7_9O+GqkBT~2WT!Gjq`f8|G~5*yW7P$+=yX{z8Vx~*n6_U% z{BEOTc(}OVvu@v2?QG41Ne_YJgdwA=~+AkpR~jo5S`k1B(V;V+#e6N2eybPn$G zKJ==R+juXa^w%Ok?IAShch~oZSQW#!4Jp+EK7`FYMt?)z&{~a0X0v*w!0GNaRG!w) z*!${bnnKg4b&xWGqhknOxcr+2F0{Rf1^Ik}2Qk#n37!#TbA#`^ssnNFNXs`FK zpBQiJv1wR(Rh?^@o+{C<=xQ&*JVtC5asc1J;8ZbK<>~Ex-n4xP-Bq*Hl}S%19oqVT zYbLR1Eq9Fp(IkiFZ_(u9WLitE$HjldZkxPL z)36XQLqWstRQX*CZGW-nW-`#3o)_e@42z|`nN}5TaLx)R3nb**_YpZ)XQ$VbXo$Rm zm)DyvIRzK!3Xm_e3&(GvHWYT`jr!Ky8D<0fXoeT1Xm@J(AD__N3Ai0LafL}ulA7+$t_`poZPnDw1yZtWKC|0T)0?2j~4)SL@vg~x?fu7Us zW2+3(4H3yeg-ZgU>-3yqP)q8A%r}C%IuK>+#$>Y%){y?`;gednH@_kSTy=o97B&JD zD{`VkSt5K7Q<7}ST|__#+3soi2q}5N)le5|9M#N*;p)>YA%zKZ6<`%yAEb=6vR!la z>9V^KLvou@%{8)~yZ;2~d=)J&&Zg1g0-4CoCZVFA5n{3Sc&|m%V)hJPu%Y4m6`HXd z&yBWxG-7abfu7TRjRbkYgyE&_BGFWj>) zz?DxGyZ7^?PdP71_hWWVh%>KjTb2H4GpZ*G|9H6XX8^^(XjdvA{?loBorHPI6X^Ac zw^zcbZJ3hEHQ;eFO_;}Opvy{(V_oi(?dh3PDKeX?%{%A8M}=mhT~HZ7!{hI4S$NPC z`-ncj??)1Ao{Wt0cDDdwo!db!Xf-Um&!~C&d|4Jh#yw_Ht;!9sajNVZJ7+4q-lc22 z?V-J@cZi)8PXrS4s}_FFhsLzuU|*3KXi?HoShiKNdD{B3sI_?BC!THLzDG$>P9Z6; zTW#<5eRi8esU?Wm-z|~8h==3Mg`bA%bi2RJ|GK&nXgG;gi#z*A%S5it3PEgNP=mGW zq|Ea&mo6d=85f-^Ry-Etu0#TZY^+kK&0gZ`{_30NbF$5{xY$*XKgka`++<8^s$adg z?412AzN5odjAI3%k0+b6GV7xLV#tldtLLXos61|z@HN~ue>@eC5v9xcd|!k{3zvfW z6=k1x8Po#`IHh3wM$`AS@p#)Bq2WkRJgE*2b#EI%34_eN;^tYgMqek;kfFN!R#Ef0 zpD2~pOT{~-?4RT+`$;mlmTj?XGN?))6Mp*y5JOzs1h@2_%x>ngNAMnbO9 zuXiu6AojP?0Ay0XMzwMgBOZ$qQL#-6c-Nb9gIGWqN7A=&SkQUf|4&d~P!zZ-H|aecPUXQ!svGin%yl0P|H$^egnFMeArH%0wLa+v?g(?fn; zS(w2<&eMoVITPffezz}w?DV44#b@>s-jbd~qDE+?xxL=QqcfSFmKM6J&Y<}pFZbeS zm5H|e!thd@0P_eE0?HMOHA5zQGW&_YCox&Yo8a3kvP3q?jBb_s_wKyA z$4BT|$*eM1p1)V}j>hA~x?=&sLsb4A|J_+{RR)sL|NaZO#Ub+kSN)<7iK{mrT%l^s zH>AD9nmYoawHd1SO#qUg=;a01;_P3C^)48c=7I&Zs76P9_}4f}>44yMIyTp+_J;=e z?Qqc10P%{9B>G<-<9>9_cS*dG+mYqJ#tl^4`LYDrRWSeAC-=MW^0_r_-_EL$maw6G z@!|_%R#@jzDmfH?yE;CfeDvdm#s;pmM-a))4d5FrNDx_ zm+uD4OV;Xj3rYh1l>Y+O5~c9PawU3Tu_R`xzu=z$c|Y$`5x@*AQ-zHb|1v`iG0+Zc zo9u%2uh&^9?s{gDVps(q7USWlrP~7bQ4C`&d1&T-bXBdSKm+Yh{ffivzn#zB$ijFN zVqZ|GlGNe0r$)vF@2(m>-pX9r`}Stqdg*Zd?mN-^KqK4=?-Iqob}o53scXQ z$KB5*9U3scsxisK%3Z#@*}AFT{De~P`)e8A4JtcNS>|7Z0=|&`}PdFfkbIA5P+acG?~QNncbVp-lX*J13%OOaB+S6R^XJ zaXa7pR)+vLfP-&aHqA*L6xz%)k7+S5X?`hYj-5@UwDqss$wsVm8qr zhFON?j0RAa|Ud5MQoqr@*_hZu;vfdUtG^9Ug3sI$uZd z@^djedq;0S(zl@J1w$#1Cn}x-xSS!xp*`SCH=&qNRT&<3{o?hRucPxX9+wQQ51E-De=d9=VYLE^2s~f zS2WEM-}=!0waJsh(z+PCudp%G8AOgUXEB4&^u^Kv zZu4b$T$YhcEY(q;tB^+=DJhsJUsGi26PF*5w9L4KY4eSZ^k_yHWI;RXXfr)_a5=|E zQ*87+n*HfX0j|0t=vMS@mdx}amOrO(%RY+0+^9SY&%P~IIDe680Yccy6MXvaoEW~! z%_Uc<3Jk-63u{>uM2MHCGiBV=icCA^kTQ=XiDb+&0vnL_QD)8YypQYvCN>t?F&ndbY z$sCZBHEVrd5oF8TB@DUIVrkt&vEsJahvOv|dEN?I#lnynqnpdcuF!NL=@oN$nnGN) zf-w1OnEqD zLw*3fc@`dm{Fo$f>^5KQqnpA?UUMdX|j9_!bXFctzj5;=StRltdB zjYIoM@>e&9Y1_%|6-1NVso??OTgh@rDKX$zF#{Wv&NS}gr@XzX-MOJA?=Iem;eF!j zDPNPzMjrxw1sX8>9*D+LVQ|kJin&eT=*_%$BL|+ogpEk)z!gDKq>_*n8~&b$QKrGE zO1GV+sEJ;*APv{mwc?c{*3F zX+00k+i5|%qV70rUJSO)v%a(RJm$sW?^rqWV(-n{eg37vF<{I=-lrm(i&4QgA^_kT==1^Qs)^W4il|^evA` z5z~8$lW3o*flpBZ#S|qWnyp_&2ED0n*W3$4LiKMpb(L*ID2{ZIa61eNSm|6c@UGeM z5$zos92Gd3R9~^zy~)QHVTU^Fpt8}8H_vyS8}(W!f}ADl+$Vk*&?*!#8t16PHxf3e zyPv(Q;Ac2R%MKI!yafggDUFb5ID10e$A2^xqncLMJ3P~0sZr8B36`{%)6IVSX5}av zh!?qYmGWHJ%h#PQ!Vt|av!o>xex0}%*Eo(8le*9+Cn zWCx$Cji>(jIuWQIPm z8DHQu!Ql%^sc+NOYcn`1)jP9%N5el64~aRc2wOEk+xtAiDw-W{pxDdXu4>pnEPwbZ{d#J< z7TSdoGWgYX^f`3gA{+zw?gT_h_30a`r^nKo=uCv zug#Kre4T5T3nzB?5_ioXjNQpjG?B|?UlZ%GK+hIOJHmFv6?Kn9II2uZuc`Y za?+Qeb7uXZDKP_Yht-xKU1!7RqH6PH#wU0wcnj+7GSM+b{=U6aN_MiL6JgY@l7gPG zLwvn!n0Qcq^F!*LAiAgs`&aO`vYF=|37gF;9xy>c&Z3Fe3gZ%zKbBT`ykT+9OR_l9pgGxU!o(9{S3+9{xSK@u%wRM22_F0fJzafmn$5 z3dj+GgwK%#NIA1uSzAV;Ejw^uq>$i2gF7U+6P&_BfDqg%+}$-u;RJW5(BN9QOIMO} z&pr45`=NXEOOMf)hZ?n4E!k_XDc}6&B3Ma53Jd)eIvgAvmW;HxG8`OYEF2sHF)9kI zG9?E)QwwVoI5^FKIQJ*2`0~{*Bf}_~@zh%>G3%Joj>Ck0 z)LRwCO2HJO61Yl8s!F0t?IO?E$-mN36!^kN8Il!d_)?I$qTYC4f*-G;4PL{8f;xJ7 z_f8tlmy_6VuS;@zdzJ8+;XbUg8|zT&>6;N~=r16NAfm3o(MD2Vi%acl>LerP+_^to zRtbLDrr2m$&ntb@dVFMdYVjTuf)klWy|@UwKohg4gfn+Zmv+E}OY|0J!6n6O2qt(s zC@qPK;F~+!1rJGd#PhH-0x=VN=m3!R5Jg1EqI%)saCr|-e_SJxxqOY1CiePnr}QQ( zDcDdxT_lv7HL z4oc0$b)iUl*DHZaMFg=8Cj`}FkUup|PV`;5u(y+CM4F(y`Va|?f1vRuerdH|kuAK1 zR~j@(Xov!b>?w0&iYmGJtot?5&x~sS4e9mLy01e09CSaryq|c)P+Gh0XaDRFV;*wd zH-2&hx@*tB%fi}!*uHu~m_+t6m`l}2IRiYwqo``88<|c0IT-aY0=tqEVj+-(Uto4E z74+JwsZR^HJ>zf273pc+e zG_q+O!U{v#6v5d>)W;3cT_Qt3^3P~#i050MaS0TCI>`^1P?vw>P>(3H z!uk15h}}j z37d-LnusMr(=V;QKEEC4eDZ<#o31hL*<&P#Tv6EADOtDUF?5yGBQYWoeh|qNmov~E z`E~m(GoyfZn&{5D5BnDK3q#CG)9;-8c3qoiq{KCB9okPzL#9QpC>O>C&0WZX)kP$; z{HNdk$one&81rG@!1i%)qxBYIe4oBH7lqdy{7rzV`4K1yEuU^J-M;U6eNCZQ?PJw3 zcjWw?sO8CJ+krjCN&vL2)SlJT3hy!O%cV}#Q9dt(*_QlkYKAaB2Z2eR$@GI1G=~^v zo!Q6#W$Ty3A>>Sgryyx;bkxw*DNz?=Vf`3;(n7sv2aYNRIO{_w1k&RQDfJ@y=zSA0 zXov2*C>^SD@kogwgd(Cx0LRqu*!yJ>&Jcp!Hf1JMx&GaT5O^90?`J*}Vc&ogB48bP^Y_3*}hkZ6p%alR}ZCNO)uHxwkBl1o&_PVwI!% zbcp@_=$SD})T$VI0k$HS86@U#K;$s7iBaBd0(8lzN}+Ttv25>N<$fL)1!eBP_fUc_ zpi_Oufai?ljGz|K_{CCkQ4Ao7mGwiZx`6s+n<^XU;1k&nh6)mEXgMBg&rrJCiQ`;j=^RL&8JELt5nL$T$J71IPmyCEiHzOQ1;*ciVh3 z!%wGEr?;RFpqrvArSqc4qd%n|d0+QlDECgJ;ah%Z$O`w-#g(Dws|TbOlH~x>cAxyL zte^SHxmB6yS?_ZBMV$k7$*0~-q5GwgDU4En$NPvTh06V<>5Ik}=;Y*N$K?EE*(Cd< zRDo3?R3$?Bk%|L1_bZLK!AJDU&{;W8$-B2=GL+)kVr!x?;wo<)Q#a{{UNcjdvXzoB z2fGN{5FTL7f0(D{N8#7vXKZ9^6mxQR0ywFhy#amzA_5I=}zKy^4CL}Cj$d~y`sc&h(0kc4oUdm!1AnhLC zgW&e{6Urd|ARH-{9`5j&6gq`Z3Od8a!__I^6j|K`T}j<2-CW(vhRyGwhS~=IhLG=e zj*6#jN9HF;$N0w!tDserpHr(?8-R95pTI~@pV=V6fJWa;PkldY2ed7Hc)9IS9ga1F zVue+SYLuLfwU^SI{JX|c)`}JRJM!n5BAIJ%7vBnhk@%AO1qZzzRWZ2YTjE!$U|oE| zr@1eto`pRteda|T_*~^F#?xZlJp!p<#&1u)B?Jq{iOA5&AjzmoWA1nCD=$88DYl%c zpb}M<;!rrrTK|q;9owMVs2Va!U?FOuv4gNS3CgItl>jxL>InV~gMCXMcuiGCRf0pqNd>H_Oi@pXP3}$RyM*jtK)h@(b5L4QWU`fw9-&Zy$F zrnp+CZl-pm-e-+$7k6)TReEjvP;>99Q)anv)M8L;-aY-)h`=R*EJ0l(r8wKFb}u^F zPp1;{#8u*w=`s^Q;qmO^%&o%X3vl5i=a|Y}|FXl4&wa|hG^#{eEr;3;59$DQCk3Rh z301euw@9>1c)#)v@J_$uzQefBN8myTN0xhHi5!nKjJ$)Kim8ptgDpgSN7==YLt{)i zNnb-AN-@Z|FZssHP1?;2$yNMqmq?DdvHH4ZthVZ>Nx`{LU1BT=Xs~VA2XTF8ppk?T zPFHx3#gDk|`%u@=)-;g>+_ceTy0pZYEBZdR9ZwGaIobvz*5#!d@Y=1@h1y}zg$EQW z{ZzV7?mE&k7D$ej{v!HVW_Fq(U1`>^eJdC(YG{?=j5m z5e2*&NPd~*kes>MwRs)i7GJ5JFzrW_$ICxI2}!bjw^(AaWyRXm?Zo)V^Wd_15GEkw z8hy*A{vb<4_c!++a2*hnJ8=4s!mlYcub78Y~>umtdu6bf3pmz;fX z(5S563H}+ACzRW=ncuIdbE|d>L2=d|S{HKjDu*s2nm@Pf(ZCil4bVrI#G>lz1RJs`|3sHNJg zTuW7RU$aX8l^(W^{(aPK%)wT`^SHP}1BF(h(jVok(+VXarS5|62QHT6vS3s7;bez$F#{KhI{iCg^pMgdFWfa5xqSJK1(d&l>ibG zckt}nkbI0>pRR&l)nNopvDuq9v2P+AhEB{+H&3xpSCG{Lbias+cZe@`J9Rg75m1rr zGsIy=jYsCid(%#kB{P^&j?g}8tp@&trzSE{*uY^pFG92A-*r3RY8v47;eUeAp7H`dI@@N-Yy|<*NDYUhy+>zq=#u4-asL|um!+H= z`@%z)wMAtUmyaSM8JMN{!Mov%H z{Us%-POnFW>ER$fYMh@<_oRnApMb`vvn4az!JpFaCkh@o5zEt5z>y@AL>z=XZiOfB zwz-eb`;!Z!66~O1XY(@(a`D9)x z-%f4st>4WBe6b^0E`7lq|2aXxo8)e)s;srLa?MBUC6;1#4_nNvJ9?rr8mY9J!b{3; z6I~NR8D>V(#-8m=J<-EA@p@5|A=T;0sXd^2-||!+aYGqa>?#Gj{`Ol{TX5n$qpA&2)tiM(ij?#Sfi6eulK(O}butg;&L6P|I zi!={!@7zE)Kd^d+!s}qfje(@|8>|-^x@Cd-1C8Ew$1W2hr{)8ydzU;{G_p-y%;%7K z*I4MuqS#{c_IAtiq}GJm#ftf*Uyux7U5Y)z5eT_WOh4T7xedKDeT^jN3B3n{SCP~aqj=3(mlDJ|Vnn^z;B?xQ<+;|&R zI?$WU9S8MBY@{DB%Ct^upjqAQO$a7b8B+GNz{&Eu<_`^cGluCJWMBKgbgu*0p^nB<81+sWztm zNnyvABfYk`#Ib6MFXyA?oUZ3+UVM1^D_Jk{H2ZRjpb(u=R9Y{ zm?c>BSWuz^ZgsXa_CgM4UL$TI4%WJg>YU2`%Ki%Vdi@%U3am=JN)J1Oor<2Qq2mCR zmg>sWK-n0pg5(T{&Sf+p-Xk9HO)x-^2<+6XAM>pF8f-vuE1i)Z)#~7DnIVHq|Ay%c ze$jJDBA#d7LKIBO><-MkLVUdQ{DJhF^7}h)mfcpbBGB@mVLg+Iej7s=lMV$pKrbGK zV{Z$2$1NvhIXE~dvA>%TzQ!Nd;Pa=Ge@BDyvUKV-FMR2BH)ZJldg zaHZXMycVR0AFrO)tbdVlXVN`bIK?>`*Q|D87B z{XAK4!aY*}|K{5p4uLV=>YDFWJC6sCt2`L$Xud`k(t%0HzWy)ZkW;18_`a^Ey1|tc z!dGY@(|z#qr8q;7wz-2|!;|M@y-@Bfv`3%xhpf=Q zd=etP*^AcqRx*+nALqG9LPpT26#fh9ZyZt**$k0_c8Gm@KUav3>}sCgyaXapM)dt? z?kxHsTUjg}c>eV*C@V19zY0zfI za&B{Cvdwd1vuUvdj4q5H`jq>N+WGtIB2xXqVV(Xl(bcJp;W~b|0wdh|954|5%BNl= z*f3+%_CwZDVz2XVzgDgF1_MtgPbKgAg@uPetknGDT-;T(>uS1Cw;-jkVgX4nzKfjpdPw^7L>(Dk)UMz;2 z(|T<&CFt{-x4{fUYeX#P_GDNBWJ(2~;?-z474Tg(}TPLr^z4v=l>#Dxx*f}$0_bw!G;EFWJM@SoVbN=CcN_*{QYd#w_*KNo@QRp%0wCQu{_l`bZ z$*Q8Ox}r{hk=aF}psq{|jMzkvZf(g7$v4Zh`(ST;HTWGC`JFy%MPEOaMPkI*)2kzM zk-TVn{WRrs3Z-uD!mwb8V7VLPmFKkyl5ZPEd&pZ*#~}r}Gz%}~G@~~$U-t<$XU@gD zp5Cio%YmosH|vhi2l2J=31ao>U5mMj)eDK073Ycbqq66U&c&qkXEaz9Tjw+8j`GIj z*K}wAd25?=7@uUNiV>%oc^q9nU!-9_56>RHV`(oNI{Xe$AfW{`HKRxqFVzaowTODx zJ^X-Y@!Jt~6)O+}cg|DJQpRL0N$&la{4v%gMI)&2Q#Kp|X*FQq(0wHZd{6Tr;xMht zLvKdGx>F%1w7U)ID3@JE8mPiM{o?q7YhZtjwc}K=g@BnZ2B0iYWfh4;+M%3 zO|()JW|#@Q^?(TKa^!U6)&}2zjMB(_`qD!MBcx9dKo_Z5La?$x=mQ`AKC5K|985P3cuLGvE9 zs}v4DD}$Y)0dMOi{H~w7D5;pWm`5iwMVdrIRQ|{4weM?Ec%)RziyBcsBX1bN924vz zTPvQ!rOjeN_ord#v*@G**95GjBXqd}T54sg$Llpd)T(OU3LAHZ^482!i=DFc=aL#Z z<%B$WTpG2T^$j)E3$I#KTt+s8%1-YP0}hh`C=r1kcsDt>tA5v8D*lL_$mQbt8qL@D z(}rA^i)){f(1Q9nq4~o7-sm8H&U<@PQ|wdFX~>|)y?Z|LIPWraI^-@yRyHVsFJaHS z;Ni`+(MJBn$ma9S=O^z@`*&Fn3_z_w_26Qc;vS9L8M^73nP<~wW%A{xrA;LSWy0D9 zkIc7_%gf{C2%pEL$i*Ybo651Kokw#Y_lD9nM2c+$U&6)ND;7++GF;#D&tYLxMPXs* z)zVe$2yKg_2X+EFW?0N{8_COVZEMrq!#+Mn*Q`TpJ+Z!Ud3$hh@TnH6nogSXa{NYi zHY^{E?LL~YxY^jlN(yjrf^Ph1Bd%*o?PuLh`Y7r3-1CPu9XA-iEb-RDh z`1>1~8Uj*m;YTCd|5yy}*Q6-_o%Eka@c+C_Be~T)D)Ii(y)QX!p@f|4`*YQG2UtWr zvHfw3K{<$FlE0M79>drHgoN^^CmPy5q+YSkdhX|3iKzH+3Ll_+O=q^jF@Jxyva`C- zVj?e^TK2_=)e8iT49&`q4l8Z&5Z}us55=2HDCzgG%+24rp1>r6vq6og{7dz57Z6nP z5W(5#qNs($i{kKOd%7wm&*j&>?L%WRVsqy zd=5T4E6aGMOfPGM>W^tEGqy367seS0|9bS3eX%`}AMsk)dm*0F?Z~iUziPnve0S<( zGuf_xvQPy8>5rwSluzP{dP*sov)vN(DshLY7U&TYM#S5{1or4|&{5aaOpImJP$+BI zeATd%mz<+ktRBs*Q|AUb8<%D6`}Umiqm53JOX*ha^z{YA?y@<&>8Ou3{p&DNveI~) z?dBlpqSA7TVGit3pCaHoY|5(FSTk9wqp>?#NU7K4Qj3ii7c+Y^k*_##3vFeUO<<}0 zu{Tp*-!lqnDCpcsFkrdAIjDromyO ze?x1_OIMFYvm(Q4xW!kT`+Va$ zqvzUJX9J;<3V)5JuI+Kj?|NwM@iRp(jzO);kaPZ8cA>os7Od3+Dvk<2 z-l??Qoyd+Yb?1!3)NUhALqBSJ+gwo{OluAXp69#`K%W-z&?J5WP16{`rq_MRYnyVm zJ0&XzXL0Gd9woJYP`?~62ja-BpsJ$$wXZ9%$VzwHt}ong3jll8vi z&2#gZ%Y2%%$i1EI9R85u}7{s$0u*~Rt97w#FcXaE1uF>22_`uj1h;_OjGmcyuQ`h6k zTh(M4Q7t6>Tb0@waZiJZn{9GTem}yK8J6YO#B@-psySk%nZ+~+jaR>3?(liM-_lA| zirnn8|In`6MChv4*lasl5Vg%Vvrd)D)gyyJz=EGz%aeK5{)Dva)k!fPaU9)yip+f% zFv=`{4iC2s;*u1p*JSk`Oe?$36oC5GztIJ53JevryX#v#|K)G(5%_QO^+OVsIAZ$U zcy^cy6j&3@a%VnYh88Wg=W4|J>Xzt(8}pauABY89-1K|&*kWG0tAofJ#>?~sM)qp- z-3}YJ)_3yL>$)wWCq1M-Ou$8|-)2pAh5Rgyn(PXqfbsMf*13}wq5Ueu1XJYxYAugV zC*{`%Q=zOoHawdn>2|z3HjG7T#WeS~mjtPyKbB(@TC)AJgj^06=%b>d(inQcX6uD2 z`GSz2EQxb&fMVO`W62Q*ZC1hKF1!mD7KMg+&fUf2{gN%h->Ult8I{iew0WH2m#0}l za-x~)Qig>mBb(`42AWtBpSw?luz(^iEWqp_nd(9g%Vz6gF4Y6z0YYxiatqq@Vx`93P-JI_MIt`pr_cw=< zW7KH3(xMeMYcQu%Ia0Se0x&`paGmbYX9SA(yM(k||7`pS16K*TfBKL!6pzjPtd;NpEcOjkHuyDMB1vl3O~e!oSb*Ne zGtt9Lu1VKr&V&cm#WZTlN}#rfyHd-1qKYIo;8!S*`~=f_LbH)5n6UhOalZqJrUT14c>IDJfwr2z*zK0e&1Ek-vEzeik(5W4A&5iSg%kxz1~3MJxY8S_vFa^$sl z6qmO!EX|Z$>`pN*dU#J{5aRzvPLXo(2>r%yqp)D+h^hup{9+ftbq4EORsU1E*9f`< z=CGP(8ZpJ19~W>p=mgDH;*bcML(O*9kFmd`IHLg)SPhn{WoQS-s+6N^Tt@_EAGXqc zo-OLxG5%c42AP{yf7%|+{E1WuWL~TI&>217Z98-7N>2|9xLKTF{+T7qNbjvwSDH7J z2f439ynY)9hd%5i)<)pDe>)?6!>gm!G=6W`abNs6KVGbQi&)H9-mDx>EMR)NHIi;{ z_TzFV^%ajb-MmVj-Es>OPN}F_tM|Pi4- zPOwS0reDB!%0mfwxeqi;UXz_rKY%9s!>0oQDB`XOVcc3U1TxtlQU~ zu=mW72zjwotWsN2FFJ-6l)`1VBzXKl02`b%|juMj|kPl_0il0`9o>y#&b2I=!s z?b?$;>7t%-ka9A&6%7pg#ZyX!f4!(+)~=BbCjsmh_QS9lDWmzR_4kSlyoNUb4}O_& z{h36i`6`nT)iT|=YWLfV17#Ta7_ou|cwV2(waJ)C9E@iA6Mrs*H_Uw(XMKOB-30?v z5QxK@TWU28+Vsn6g`Dh|yEif(4pUP6etncg6W(x>AtJ zt%j(ZIx9(g2;#9xT0EcDu{&ql`vyZSoMUiGhpFhC$T1 zwG!P1<+BtwobE84t&iQ``uq@|?C<_7W8e+ha?V#s-5;!Nd}~%Y)m8=b%4r8ozmsMp zKVOvCztD8_Gffz_9XJ!*iHPh2E_huovT$n2XLY}zSME}YN29|VvNC;hzCOjD=*!W5 zN5d8p$6Lssywr*`@Xp#FEl`h!OzuQ;^VI-S2s^-C=CJ&HW-W0wyYeIn*4-f$D^F_t ze8nH9ad5x5@{w{e#moRt=|^&pmIZ=h0$LT|dLWK*i{tXviRZ6lL_)xH z=T86j`z8Hyke%La!$Q1Ski?JF>nRVrgQpYv+{7OVU0*0cJzI21_#AQ;yl)RGX%>Q^ zZv4%sF#V5bH~P_4&)4V7pE&|ctc*Lppk;wS@lQ^BM5j|>uo04Y?6ZRa9(CX?W6$f_ z8Reh}7TxbkH8UK!GMCq|H{QQz-kT~m^FUu?6f3W9h9NPN!-a-p8-*$UQ-43t>y3mo zonZ5Fhj&4TdAn`dTi9F6^IBs6dP0g2F*eTG#+^Lhn+d@toyw33iruB+gsI9& za^SABAO8ZUprEb`bjc=YW;arvcvq|PW4ET5IcMXzWs$R9y>05YTqCgQ`9wCYhMser z+2`b%f^6*aDWmO5uC|HQ27dhSwu_C&*aPw*M#{f)OpzmWSWJO7ZzFh=}a`xAR{j`L8tos~brFwUmDX=41LYV(SBfkWCvs1ABBV`SwxV8Zni05*gYgg z$ZLL!JB`ES8(_NK|B0736ZRkK-?tp;YX2;*#I9ej*{$;aq0)RT3!iue#v(eviV*$B zYdv@itOhNfC$mNoeak+NsfHvxSLHC$@-dCbGXAX}tVs`+k(10dyE~#^HlOXw#xq6r z#n8&Cs!rw-#EYX9Yn0dZeEa%Unu0d@H!jAN*7Y43?(({`*@MxS2d=5mmRpv^;yR-q z?Av>zt>IL9{);E1o@6(v8-7nnr0rX$OLb)ZQLxrwJf8v^4$&Pf=#Kl0U~$W4wgqAl zt%tC-L?S-H&aqeg!zq#BCLwsbpiH|9JMYHcmsP`%rf*Mnhoa^48oB4K!fAfcnvm;73eGS zzwpd+zMZHc#}E94RaRVK3*GdXnK`ryqyRkB&pbr6Y3?5)7uE!N^e}oay-d;iz zZV#JA=!M$14i}qb(*)huOY4{3{o05MY$L)W7_}S-^Aod8u6FcF8NOcreNmJ_T-LKn z%|Y(lL?#FGwFherTWQs{H-YOVO}j`2I`<%5=<9u2ZkT(zMU9GhLc&ezcqVBb1O5#_+~eRIv4> z3%4$~t%YTY$^4`tZh_><6G0w+c83gPi29>4tx+7m`emzalx=4Dv$6)`H`R_uEA8W1 z5*QKH)%GyegN)d3)e0>;O~_|0OFZ1{u{PYD3>=t!_lCN`GL7cyIg|U!t0@hGI2d}K zhrYbg%ZD8ZhxKfQru+GAr1zuwOj)X4d8c$Ll>esLVuD7|oA?E!+(5WcwJ_S;{YEca zy;PgS^U6LrJp5w)RDTpIrR|dWHh{|Ya6vgs9L+;M8>Jzbnz$bbv8k7Vkt#-0?Q=a1 zG*t?z{KG{^>9u7rMt_aJA1UC@RZRV{^c6RTvtQF;qb!UIG+7?yVw(tQ+!{)zy~%Th z>4#3*AczyfEALflzxtL?*t^-ws93WyYq3l;!*$$B!pUu<6Gxaytw=S>22wW%cYd>1 zo>1QUFz*fLwgrXMmxq!7w9CL3+8!y^b5(sXE|H$p=dSWsN+w`cvhkczT^g1GAVrcU@mWb09>#woI^`HY!sr zXda3QW8lt=$3#YDU}WWs)HZ0iz#%Ty`dQDVf%kPcN}=|0rBM&R)=#pm?Hn$}|n2J5UdELE&FO9jnMnRxTn>WRViLk@r!!F?2nL^=WO^T$$Nu-}Cv@aAkM?VKi~dM^#QOFzP|#|Hjf_8A$Y z@od38|4YlB^>WK%ypg1dyIDkSi3up`edY8H#)OqstIm!q8Z9Ve)C55&0gXoQhWLj7 zNQ6(F4|YOTJ=11*v-41Snpr0h+^k2v=71@F{D9ndJS)AAta2h&5g)u06`6=2cXGbu zw68s*<%MG^hJNg87PA{n3SU#a)O!6b?70duH;)m&FFo{j!JHJdF1 ztmw6%c2WC~<#s-$0a-1f!L0#*TOy>DOEmO&eXb#KzoASD%h$^FG{8V-5T{uR3}VMr0J>rS%om$7%r^rG#SF#An=+QB~RN7 zvbptYi8ZSLnFSip=^Yh{Z3liYn(^c_)nyxO14%v)M! zC!d$~q^Gm*u40}F%&Y=3WDR{=4De|U_MA#Nz4Fek< z?ET)K0bD0y2JnC}pb|aPJ;b-^(bK!!b!}sITi2o} zrRJ^3&zr&sT^Mf`-?r{;dtxRv@2w8P6)(F+C!YmfbcYZ?e3i?=Tw>OE3u?58^WQ%i zeTJ;rj#>$xdxT#sL?cMSvS|BZHR)+pGs1~E9p`v9@JoF&k_q1at7yC{>a)!ik;Ess3TIV!N!(#(|J;lf0a%sJ#NcO>`dr6i=a|bwV7>o$IMV)%oXCTZAY}-jLI1Ql;!^{ z?Xl5pW;4@gdp4dTE3aLraK`JnDJO8JYF02sn(ulr@BDsD#%pGbm_{+pn)YgfaOYeg zV)$7(-t7*~&Zii>@9rbdhE{k~GXhrgFeJ$J7ULPSL|hMKjL8 zZIT{G4BoMpSNVK9eger@VN?1AwNJOzqzDmZ|Jzuo3D!M#!?C%Uo+)Wb!` zLc%kF;LFQrX0%&P;UL4&rKN-H2%)M@m(+u$ksCe9nDB*n3_Gsu#Gi}n9Vw9$eXrNZ z$zfP(^Q~QidyJ#kNgpjsD&bV~r^dl^IG0UUPG^_qbMDslhCw%P0hPJOhfCoZ9O1ha zj`oiw&6^2t1Z0cc*PK8G1!v{E#5_25GQU-iFNXO?UR+Ii-*1;rHYO_%xfuu(_Yprh=Fi>2r98p}ouq#)4?8Q1ZXX^Yy%Ta*H zECDHf$Yy@um4%anxn0ouND=;?`|}syWwdP~O;{=MfGDJ;eNPNg037yJ!6;Q}oL7Bu zm91N}FxK0JhBjj|dI*U>ZN%vQ!7wnl@mZsdv;$(l0JM4H;pL&{@SY>Aa)k#* zo`=;%4sak#%Q(&cY>vSZ_s8?}Dn1K5lzF;_n>xS>|H;qW50bM=45(%W$LDAzT7pvXTk{sl6ZIpb7)S_2KKBc2As}-Dyb%F8E4CmVncV*WWeo# zXZJe^-m2Lv<==qC0)KAQ{7w;lU`6oz%LOIQe^BAD%(47e^UawO-+wmyAXnYkO%)I# zc+(s$zvElJ016l5Pl^7UNGDrE+Rhw&;}QO!`iE2l2Z)9Dx+(B%M4v2Y)RJt8FAKul z@K5zQqklTVDEIGcibn?U!2him;%JaNS>AomiF7B{R%;j~{csmuXv_RqpoBWvQR@)a z3%dzcXcB^J&Mg0{(*8S^BEWKZ4z>}Z+TWMe{w@q~=)!P0p*MQLZ`=6$12+*C=l$H5 z3b=on%-_wCtG{Z+|FhnOjW`=u^f3t4wUfWoa561~MFCeCbs>)5mr!74XO5A>kfB=j zX=543rUBF3Yvkx5`m*1e(0+YXXg&Bksv}`9^qOdtw7f#m$HSXT&c`wQ>mP&7K+oaN zz??);vHR;_zH`BQJALO8zQ7jrdl%G*qMuvHj*vh%y6?BXs2A;f@+@1TFE-Q@gfx6-;CDgP8OY$Ey>sI$HfkS@I{3%T#&Bq7lh%;^0l(8)L-5-s|m z_}Ayn3YC%w0x1Ho=(oZpz6BP1*l?tc44HbMI=bl0HtyT^$q~PPb~4RceycK7fboyF z5v9b9gg0?gt+Bo{PZm$*0LSb^cF;oO<Kq8~yC`g;{@7#DrzIGKZ>cJRZlEr5qK;h+yJ-nwPLhgY zoTp9}!boB@_o9H8EMai}1}c*Jphb9QQG##;5(vc^XGw@L6@_=ACSrvE)EY zJ91rwpo<6Bs-4!MD!W^6&eyfyRjpLzZ4YK-sy4n}KQ)|xvOKT~4$3v1c%@vYO$ZV` zz?rMCj+wO5lG1jvX31qtIAKx0u!dO8pT=10RLj3pXQC&D?%-Mpu>R4pKPuS9ivUlc zba>@m{<;Bog}$#2ttm$&wV5YlfpgCn99U)8d8d3BAFXaZgLAunmR9}j`6qpBrZ-GL zfN|w?z(F1H@-Grz7)P{L3Ut9P4kXJlg(W7#))M*ueiSiEpzH^ z`QVLZxS+6ifoj19w$NoDH0FbzeuZdij9pyG3xGmI`(n;*28&BAN`7Y}m`Z^6(5b5L z;#^=Is`c3Wc?ZJ=6d;RV1@TT696<_N>1`Vmu*RONq9N6ey4!=m&3mU9TS`dE)*3#+d-{8=T`shL7Y*gB= zma|?<09+4MH;TzW!=ir1A8JtU3L>=c!Mq=wevjCLLl7GM!9bYOq$vK^twPYzCRNkH zKFIOpFayVitq}Ybr^Ti9%>99cUbQ*nH3uN5eDN|cKQ&$GvH^>Rn+#m*L%Vanu&7)U z3bDE)!n5%nD!JxLob+Tavtxx4WX)E%u{4^Wedd;xyv`V_;bJp?V7&VKn#x9@I`cT^#Z8+0k6lySS70sZj)_%;y0< z9zw$LdBhBHSewGqu zo*%rQbXp$p%Gu$T-><^w^aOu+iS*n(+3N)$QV>oAGytdkKu0k1dJZx^r4c>V^ z`5~{Epgc;HI4xG!-9aY14oeRDb7=EbUr}8t&?YzLEU5=7lw@m=2Jo8JVkU~Wi#^dV zx8g3{nadj>eLVNuswEkb!P1#=eR1IEuYPVVQV7Tq;Mos-uB2X~M?PH}*WHg_4R5mE zK-UCKEA;@F>~Lo&j9Up7^A<7i-^KmG;D3dk7CcmtFNf<~3IYAJdr?t|hik}wx{q|$ zNW=bMrN<%`<*3Lk;9mfUrQF$>QZL7BIRUe#Aje`Vr#!*I)L*}9q_^dA~r3^Y(iRT<5b6fQKStkWefder;s3z0hOcgST-J%+WMjDCG$GGt(7 za!tx;yfk_3Fp24RZ?a%55F9=nyPGo>vV?aq7s^_CYX&x}9iHE%0uL+GPLEACi>c7- z(VMDI$6;v5c1~mM^W2+lbf~3mDC=JfGU92CW;QB}H^Qht8sfTQ8_K_s4IDxy1KYI} z=W0$s%g>q@~2}x5v{>@=cUre6qP(rxveI9L~&B6V0+pyD5=&Ufu@K~bbV;qy7{7!kBki*jO;Di7P& z-cY#JyxHRrpbuB5n}EpWWXm8bfQ2d@!0#?lu(f9((fs*AxJ#&`zl1!%6hUYOQF$VmqB(rsE6Zo#eER5g+m=Ehhw5^ z4PG|B_&jHAw_Zg%Bi-`KegBK#Fo5R@W34LP3r5dVSX@G0-=J3`N)T+gZ^Y4 zU|`3^Ylk4PUhop9a{1yU$S^h7hwF54t6&_6MngH663;0oGI0=?VVH{q~bdRhFgF^>eV z1PrlLt$NE|M#hsgZ$_T&LG zWwgwkv04iEYGC7?52*fNVP=J)mr&#YHbJ-Obq?Cq($hq(1P8viXh5TD(*aS2p3l2W z|HRuCgA^VhQ;^+1fvuPml0wr@LFeyilp@%1h9|R76f*Sb^AK*Bo>$9x+R~;>{p)4;i@%7ozjKfkG-5U#sp=9L4I;I$gL=EPjLpJjmFY$C&Ng zHIgA%YBleXd>rLYd`})NDu`le+xeX5q-<*~hHNU4j&58r1m*4KZOnz+^Dt6#Q~x^Z zkLiJ&OZ6emQ6Bu9-zrRr;~a;%6J7AaUkSaJF2qIUj=QjRe28Z67ceWsFi4fCQFMtz z*ZN_H^0lxlDsp0%$;wezyIKD#?p@qG$PCvH)_Z~{Wy*L|C|-_;V8%*KxY&;8XPKAI z#0}RAX7&0=KPnw7WDZCqplriine;(;ApD;k1vaN^-JD|4y=BpDsQ!2-qrx2_QFVLN zRy_dAD_7>Ue$hy`peJfWp4xOfKGM-etP^miyR zL=-EG%|x>07v2wM84m0O+a**Pcxj}=rv?;T>O9T<3U8RG8Gf>l76=)PP`xE?47-0! z890PHKA^vROzcklP{!ZwYGd`E$qf+yXJB8m#n`Y0UF(Sb_pJV#bM`GZe&t9hZ>B84 z9g%SO%axti!qHiz{kfqW!ioIVkq#}5yq{k&-vEhx5v>@RU_*8HPq0v|-?znF- z*{QR+c-{3)KtI?0wQQWx6VAAyiDG$1xY^LQA^O($+KgXS>6S+{akvMP`_rc@g8*GbeIJke!cDfu`hfe#>qfX+-A`@s zw}b_c|5(in=+~^L53j%lg2?0Z2Q#=UY`hH-UVT-w1Sm$#){bU`F6tqRBR8qMhLVmO zSdsM8!imB?rVhGfk!+6(U$M_w^;M55aZx9{qjEUt-|B5YjeK8u8pGs%PG$!@g9HV_ zb}6f;&haq)Vkb0>r(7s*5vLRP0|CQnp10!?8thpMevYH`SehqSt^!1%&ag`5sZ5Pi zd;4vh{3LK42HdJG`hILAF#})T8Bb(Vi2ZePKrG_{+nUsYzc%pao0q>Oh!u#H|CM6L z|1R2OqWK3V_-DKJLXezD#_zU&*Y!_6ZwA5cXd=QC{=G=g@0=mX%|`g?f9IV#5VQjL zp5mYCy?@P6Og|_PgAQNV&|n+POp!q&Legm8CQWVzcSZV=e*9g5Ha6IFhU~R`+!wT* zvq?F^^ST%gTGjw84*0WWRj=~ig~ZY71!)AfWy z>5_R9tuh^JCcmFt2MMP*Pq{wDox6PkmN}#BCH%(8BqB7{ z6pkdDf%~)#^U0DzwMH*JYv&PWwGv+qWkZ9xR=?WeSKtaeomPE7$W~m58*75CL#Gxp zE-MKm87mh4T$P5|yp2aT@nYC8Ten%S_4$42+eTun)s*rSmeU^AQnRNZY_(EpS$@ym z%|iUQNdG+OKr$3qMKLeCjhq^v6(JyU@;F*_xws!$hYc!EmYqoC zMoo?Rhf6i164j=YL^_~F1)b|jGcSkX7_}-Z+18`kla9nAVy#7TKq(Ryr4ni5K??J= z)Q|R?y0(ix{o=*4D1)NA7|*g+tE&Bifb9pmTG{}Hv4Gp!#B#MEoe>u-Xl#}XC5u5S zj`yDj$mAuXiEhIjTn@)J9`bftgiFTL=1KV(k z6Z}9TZMm<*SejoOJ|xQO1p^`45&2s8tW6MlBe3nHGr90QAin(IF~QkU zm=ebDfhl=>pZdH|HY0kX##HOZ@0K?Ys&EMQQq=GK3mTnrjsQIw>Va)AFn+6aV5A)Z zXpQabf47$ba5_=)a`{QZ0lG7<9^91r4n68Li-#sPn1r=AC%sARg0}UFlXtkAl0piZ zrj1n(f^-CJBAXFu@HXoJQ$9hjCK54h6e-B{V9_)1LnkSGeezdUQUVneic#-{`{;hS z{Z^KXMCXE6PKTD0&8P#|Y=FT1s!&Hknd^_|cw07;%rtk+ zZ^0mn$zZ|``2wNySAA<$|30W>ET$rZ4CU81t=QDkw{ZM5w_3_qM^YL7pP zhPUE$%(euxOpkURC=v((vf-FN4~Vns0B4SBogr!B7vj2?9>XDUg$(c9G-(g`zb+|& z&Xz{vDT{pO05$G32{Oi4&~Xwn%dBoI#mjfX8Sj~`Shv;?-j}b#{p05$mS2p8nEtww zwkHJ`hw^4?94)}x!g0hfw_uY+Lqh4ua07~AC9f5YW{hN!79oY7_b?*uW;!7*L>oHE zhUav6t!1r9cDv#A)7+83-soy)3yFMUDHCK$5%orDtPHapwf(6$_7&Dy{Kvw|SA<5< z@%;`CR4|VA6rtcW2X3`I=#?G=QD50%SXuTM)>Rx}T2FhcM~5Ve871^<3bviuRI*<2 z0w5RmpEH>Y8!?@Q??_Rfr>%UG?j0oXUfF|U2Fyqan*-o1=PaCZbBdV)79Ws}S3?`lh?3zF(u zIx_ChdSGt=z3QOz>llTu9G$Bpr&S!oMrT$z4Cgi!+L2|=3;%?4qFKyl+U#gB(1NdQ zTgR3~$H3(HR;}`rpy(6D%YA4ve^9gCT$9f6(X@Jv`IZ2Bd|5Nf=PQe212!e(87oSS6E*{08mSyX z=1|SinxXrIs;=oQpT&6tmmQ!bCx5qD_I=2(LWTMk$ktKbb4Rk_BEY(C$#eDvapZnG z3wP?U84GM%MSn2IT9PvMe1akaB-)Fcv|Pf=68zx^FdarT_fa0acB#e-`iYY&c!J@z zeeuD99*w1+F! zxXkZ=ONFhPU1k?!H zCq5Y{R>$;Lp)7f(Z2|+IAeA(?C(U0ho@Yf!SLHe%0P2n1G1t4kLM?*}{ll1bb^fLY z@9tFuf5)Ujs5ovPQ`bJlVQt9|=ykRoZ{({AL4RS4{hcxp9# z1n>~`hEBH%w_^~4`X|XBlLZVUR$f!S;EFu~aPy`sJNkUM?(K`o;d{;6!6AOU>)*QM zyWtjQX;G>So0p$wz9F+nwLj&RlStP1^Ay->K1us4o?f12uj-q|(n#BtdeCIi=s*}H zEwyq`g1@drNr;#e;%{a!dV3Q2&x)$CytH><<~YuftufiK@cQOR2Fw!F8bmRPd57}q z5u8e!YRA_0HPOy!Dp|u%j5-@?dmU ztJ!H+Xn|eYMD``qwUY5{%$?b4TWTE2|ITAER-bLyV|gv1M%Wvw08&P%TP==j4@$w6 zRyi1~f;5Xo$H|{bil{BmUa5Ukg`)S>fdcCB4?C=i*f&~{Dppvf*xhnLBKILn5Ki3z z^koirKxGn{$eqrW-7=+=9Ddxwh>hF>qO0ZY82Hf%}K1stD6%m z^aoqD=WFdNt;dvW>_>R$b*c+o+lnO0j@lsS{3;|{yB$rAf3`_+F=KFcJ7}|(Rjw1k zTYd;{L_D{TaIu$U^(2WBA6lsVe`B{A@>>1pXp6v^N#8!71B|;(#t&Xc)U((%9$h=c zlzE%y8_WbmmkV2en3l-Hm*?>$KARdJ4bb7}w2idW9$CrqPc=q z(u&o_A_@w(@ckG^_N7~>W54RJ$>Up;TyHcQH z!FWbJby+51haI%cQP|WwTPIw4ucMF^zVq_pYM}blFGj?JPx|LKEF!MFz6k&S)um9&8MMJzi{>J-3n zcT&|`Xj~D5JIL6|bL3ug3Q=E(;N*r*gf`(kyW*F+(3}C8)ET^v32$Pq56lq+Y}(NKq^Eh=X-WG+tSvG=C(qpw8CBiY%;Ds?PXR_ zgp9nL>yclUFyua^pBY?kwdRFi6J1y&aQ@PNHt?A5-dv3|6`uA&#RrAky6b8zvoKFM zJpI_GIfkqKYnZ`|NhO)cWi{Fi&wZ;ZatZ@eM8NHkL+e$=)r9TbvU{_1f(-}A#DjIW zVR~`X*DR4Kvw6V5c76}pYdydJ5OAPZnYiQA#OjN`(&V&Yo#!j$0EvkB`f%< zp`1{B5gZ0g&Nwh7-prge{Z*O)Vli!qP`}OOUw3By}8U3bab$O?+xdcy0_nFYo-!PORbfyKk>~TX0p|@wpqHS#JZwQ>tc$fa*mtk(5Efp!nWB1T}4~(hCNV!fZv)|dB2NV&&*|()ZJ39hA^>; z4aQzHnxH9MBCf60XE%HEYK}fTF+wkv!Q)oT14CsUZ?ta-7er(GQ6=H%0o2cT1*v}3 zGm?7-YY^WA<;%nXt5P0mIRnyhKa_*VXm(gK&hx^jA$(b4Q_E+w(c|N&sm0jCEFTrF zCjIlTt+NMtSs#q0R%eeB7#*$Y`2psQ<>Q7!5UOl`P0fdlI}1XTiP%;iR*baT=N5IZ zJdz;Tb?sbGkP7b}w@(iX40C;P-EX?Pm0mV%O1u-0rLDXI2<&ZE&oy2TWbvM@?BpZF z2(&I3YL21Q#tP>6yr({KW1#N_Dsxg&^7iGS{{JD}`Z4}+u zh_~kv;0R3PlnI;?;wvo%$6hFn$BoBxJ8Bkl7NnbbOCKs!gg%NevJqi+ik0P9~Gbil8PkK61jr!fdjzPYlq!+$aIK`bcbpQwRRg^<64pNL|7R zCkrikQxy8{iI2*q;&xI5JzcCud+576AZH&#qb?~j+7-BGP+i$M8Rk2h6LFOzU64oP zZErTq;IYs@VU#j2>+D*SwPHa*qDbVle;Jl^pixuF&35O1q0n0Pbbryu%8|P)=C!&1 z=4&IMGqKcA6R_WWjcqC0(XZE^}q74krfHK6{Ec_vlmeBgCSlH z6XNA)%W%TQejqC}JI#mRkdh-{L@4ki{Cc=85=_1Sr4_egz|r8}?R}B6Qx-SYsl&vG zC(O7th*LLm=$)~bRc2%H*4M6WX|6hZbGUoap(^{SccI>N$AR#E7zO0C>QksQ`6JBS zTu%UL_kpGsf7rr@GDWyq8Z!OD2w5dEm$?TV6LBZOe7C5g%}^uyFU64ke|D+kIhL%gCEIJmqstFbn8h z!d1;jWtHCc-@weu@#>^r5OLBL^o7dTrAXnIr#?Xki|O;n!yBJUC6D)*Kxu^Nr4mNm z%q{_zPMvG3T)3%^8_0V$pBf>3X4J2<3^E4CO)+aM42G%jM|rX(WY+2krB81cPk>F1 zG(%pTuaXn1q32P|5~L|@GIGcLvX;->j(t?5A(Ad%?(?^%waoJ9VB(^c#t9(4i8u4u zVbroqS~aum{P_7MMuN%4r?-VIxJOkg?fvH4f=s^A5hx_G;Ulq1?4o9lfcdGjb?gk4~wXij>Aze`rPt|VUp(v`l z6e@On1MC+m#282dr|l+7(i0h+&&ny3vH@e^siH>f3G%SU36q6`jczANEd8>T!M?m3 zGV^ujrKqodYJql!QVc`UN;VQZDw=jF9c*s>+nY)YAjBSv?oJt-Pxq>JG}SnI81^mLg+&!~WY(F}|ET`P@GuOXAwS-K70_>~@#j{u^cmju*&Li&zAjSnyDx9s7Ig@{p-VL@lW4R34M_v)Itj51 z_%B}Ef9qRFz|&+&@uSixxAUqA7y0nox>eqW&5~4>8V%K!t2p}|53$uu8`GXIQDAbh z?hlc8CN4NaNbEyfpD`=fO=>3fBB<^qgm=-UgS3b6r-9JSpU<5>$d-HhL~s`m*Wm+7dsQfm7>4suE^A z2CVC6+gYC<1EU5qW?SoD0FeH@FBNXP__ZTYS34xK2|#U358zH?dy1oHp)R60M57yg zS};KcyZuCB>U3WKq2MEun`i4C7$6MGJL%TfQZ)Wvm1)(nC^VBwJg#!# zpZS<8+t}3vRss30N51D$oJOCbVVR|V3$ze%AQ`6^zxot*5Y96rnC6l?R7=_^`e$V78~j)ipfyB zAf!3JuS6To?GMY6&Sf z4-gTY`rUg;jGPJ%uro>xbGuBN!tFp1e^fO7yeoqsyONM^Cp?|vH$3z)Q0xX^{(EB; zN;*Ha_YLzFev*rkVh!nN9%m(&9!@fQ%T-HY27O$V z!u0SeC*Q11g#-7TaL_Te)*$H2M#Z|D0FpwwI#pJ5ugOaY70amC5ZeJ>Vde*H4qOjX zsRoxKKcq;a^V~@rhku-IUFMZixA_2l`%pSGjexy03crfD>+xdA&otv*G5yVB(lci} zDmxu~WX+WOMu?2UIx;C81B`{et=LQLpQCXL#yL!>zyp(O>0#M12L(1O) z%3}@_XA7kX30XqV_l|fSB~}6>*J%xf`$ykS9tekw%S0X{x?ohIRf^SBp1c42!!MA= zF@zOO>o~XS2Eo zaCi#klQ@A}dUtJwlcO8dzKz4(WJo2jVDSC7zHO4BzOdRz4pvDjGlAWRDN)1LUEXZU zt604@@#5I48HXWNkh(9H_bn`P>bxF8x{kgZkJfXKI0)TG*EI#`_SE#7`nFGZK)j#g&v=Pz}&#(8ErY0L z_k>xk@kYMJ1w6iq^Cp`d>zm0jBmep|J|a!H`GLpn=)I3s~9kPbkbiCjW z7|U9wvegu^2)2&5Z^gw^K+rSCg1E z)?1=A0Uk|>Oj>=3i~0hW-U>D5VBzESy7fA(kZZ_CF( zE?vjJ-aEiBJ7{?lBJT9JOkG;?iOme-qZd^sV1`r}$0@ZV$c^fo)L6}M-Iy%-@KnNo zCQ>041*p&jT!b6U9sqd{&sUBzwnBUa2O%6`Ho(J9monf-3w?W6el$E?>u&4luoO)CvPF-q_AzygH=miXL}5n%~p0Z8>C%< zonify!)nunBs+e5ZJSBo>L`Qt#E)SD(+S!hvg+=F(GQ+dp`#qTBb5c+xJO3~Bc)GC zl``nH5LUw%%CgBe;uFP|T$rof#)`+NcwDedyb(ITI?Eb~0x6%TT5{zay|R#?;*8@G zt>193P{A6m)k49SHj#V&*Qp9mE^WJK`y8E)nDbm`dl#a7GLqzCuom}q6G{a5~`9kOGuFXX!DIm~+DDacw9ZZK}7&qi-U?*wl!}{hopQP3j@ALJ$!gj{FxxHg7 zM1t_P9Zw5F63yGKpglcM3%qhaSlCjlu(5{#mqm8>VtFHBT z{0wqt`^rGNk4z~{F9~s(qjWnc>?;EFR{R4hmVSPzNZaB*j2|j|@q|5Kw}Rs1+CFlj zAi@MGGz4`be-ewzQzF6ilg#194~IlnYZO7}I|iWJ0i7D(tq@j3{4HZCU1Xu#O|$0n zm1?ynl~n!Nj)uc|9|acMhjfLhAU9>pjq|awF@UOR^%Z2_4lZP^rDw!a=j?xo+`z zRY+~vjwCEis!6%h{^s#+(iL;x(p!Vg3qo+!EP^*5zvoqwSVLKwd4G%6u)Ur>Xvauz zb0xGRJ}Lt|gr9$MHT{~N7X}uzJwgeCAVhb$KQ$Pb@ACHddFl*>My^%4OqmnVJ}}B> z*vZm!hK=8PQ7$^B1Np30Ta(FKb1;eC3gysS&$L*xzDekM&a=7NR2Y~>dHua|&33%- ziCIOQ0hXw)?FW3|0!p*jrDy|emNu1Xdl%hiVt)0T)RN>khl4JpS+Ka zGl*}4OPC;x38E;&@d&MKkFHc~OdZ~#SRRT{z_7Sxi372>v8QRvxdaVAIi84g<{)R}0_fFEVjLw}YHvrb~s?+XGkc?E-L=KsoJ;qV zwqyXudZ34l$l1CuSd}t9>}O|h1puD?O*s2v8?z@-}PtC z2cP;e#<^~w9vYN(x4)r%XFGwNbkN9t=;3_ys(@CqJKtQi=^B>eaExrKA+Y)!^-lXv zTn<1n!yFH@Pa=7JIrL3+)(Gtz?|Sv#J_K?bYmp)&7obqMXIqjI&Z_g-K+eQpHq~|A z4;g)s&P#TLd8pVoh%T>k-m`3?%XS4gyGaD&9gUxKgq;ptc?69^=>xTlCo5^4&ozb8 zc`Cv_4}A0mZ@^C?+0FUB+K{g2hE2GQ-}-#QX=o5U+0FZt=39lx;`vd!q0()4iXBxv0w^`U2v zOL5I|Hp?{?1a*VbeqT6qhx<(33Bgx;Vf|!&FVQoxB-QQleU4c_9^c}nSvP95=P5b# zL@lr8PY;)WTVGY$3OUyOxOS5LVK>9%+g*9EXz><}GL!VkaZJfpm1qh%^*Zu|`Z}hU z9rnaJrAqseQxE%0hm$&NEI_Q?7C#sb2l5)+>Dar%Tsj@-J>6zgVlNo*2=#hLshC{N zJv2_44e07RR=IxiNsG+`h%Uj4?{$A~z41C&!!O>z)C`=$i|;74EGB}M>p?$wSB4Ky z;GS^>A}T1fB$#Pd;ei%z<(4cbI&MZ*emnIL0BjF8caJhA8`M`Ejm$$F@Q-g)uemL~ z=0+Fit|Z$3`LH3Cs_M$@bcJ7UDj`gij!{tG*tRS<_@id;~w>8y?9N6AgAG z?IbI5PVt!4UX$ftKb9QM;*I3!6dOh#x13biF5(kiO|lMjaVRboyOvL)R@7(g)s{c{ zoe6TjW1~lyI`BPt%bjbuc-(T#1mLXsyi+@*kpZzfM}vw zK7jYo9ljg%xC%2DXuX^KC}+D7C&h6ql;10U^$=ogGb9k={7mnfi?g3fkmWzP90B|M zY%)Q+o~_JwNsuZ#M=zA(GQBbQ80x{?1I#9w!0I9MBMm0=mKJ(%o!>$3UZM8^_3Y5R zCz<43wuimZyYl1U)Xz?CnH^6B`f-yHLTs1JOqs$ue(Z{5)98lv`@b};KL2_ZGwQ3n zH2>>_6h77$534t(8PFk{w!nixN|LWP`gme<##gqqvwGg;e3lOw`Z3=KXs(0y@5r{A z{ed5&p!;E-6+=40qUA}`>?x@~`w?m~=Lkhcrgb9wQ@>VhV562B_i41zgE#IwnCu2N zC`(q+I)8J7!%T)^gO~szWX#Z6iIrvrWI+rW4>Hesk}nN6u3j%3+%h(zcZ4Zj+CwTnN}b6R~8|4ddmuh)hU&`6LXMGifWf6Z{>PBiioHzd)*r> zCHYFi1?!0B0m-tAZhV;!NnTBVK1;acR?BZ&MI3J?o2xby-l9VBvAM$kRE=wQba=Mh z$~jeM&Y_(oYRswHr4Os7&UyNy&1ed-RnI>@fHgyYBtlF1vCvS6wRE-BG{@xBaY2{= zqfz_kjM+*8Ncr95!#W+O*^{i_#A;#U!I3xWjAbbp3atFy<;1t>R(1f@k4>jg*KNRB zR^WovqE1u`gs0tUz`Gm0Bw(Ska))}9ki<&nW6seY_{x=*o7>>Ni31~QarBK!idR$7 zs&6F8Jf35RK<@>uS=biE&#xNtzNc3i#%ww)OVlE=)6&bR-33g&oNqn)6oVpe`NM#t^Vmkll2BRL1eH0{m;ttENET4 zxwC%4vz`H=!zDlXfJXs|VVR@St^MQ@hc*cv29X27xshcUlo=8{Kl1l+bhnZ%VKRD` z#fxiquZ{R8%R)qVhBq7yocRPzz0VWeX7n6Ev01{ zK0z)k{Q(HfIs|r4_f(~)#f1-Fyvw}yQk2174*$yg+YNZsWhp)B-6J3pTsK}xY z^q4Wl65!hooDTEn8a7;hoU`ij2Xxohw2pg^CS9k#O`^_`i_dShR0he?rkTw`(I8tH zkQ*qF(Cp+IQa>6di_$22enUAtWV)*z3RFADk&BwCjL#6<9C`;-LUnk(Q=Z65gPhtL zc&JQoq_EAnw!I%-JSK$XE#2dW#nG0tysH^p(j2Fn7>ZN z@0NPVbH4L6>DuIRXs%_Q5%XeEJbIQqVd+ORT?FNzAEWqMuyW(C#+)2=^$?BT8^-<2 zzPrlyJR9J-8e}bVU%(BjbQ#{CG43QE&K6k`W`{_n6p9_KD8IhhmXai0Em5MBu__mR zkGFpnpj7Syk0#dshVx~ME&Jy3@WbUan} zRd(#PwcU@E+X|}Za{CBV2^&m8f+u>eSn6SkRElYL+$G-Lrsq!`IKhtwnoSkHN7M=$&IfOoYqh z1LgRz+HhEEGv8OwK{I-*tw7I%@OsBJhVG8Xjz#)IWu~o>1&*?Dck2xpm&H(aNl+Au z-(B4M0{_OoVb>Qv5pTJzjqlH-6~vT~)C!TGjG2dl`VB*SZy07BvmD**k4X4%-*yP} zEs~SjmJ`ptYY+D@>-HIX^F-9Ry_5sT=*$RLN2#(N2*^7pI?@lpou)dwwqb11J<{KA zNf5Ou;?UGg&{!coUXOtN+wcerOFJgo4=Q=QPooz}*XoEdV9sCW2;Qs7oZRm!+g$zOUKYQl+vp;W- zk=b%LWrCi!(tms`N7WTrBWWk}`MaNz1a@9Dj%ZMh>;|rHtG&I~n>b-diQ8b!?Z;_ZvZ`MQ+^|MXF++ zAB)F^K_F~CS1wrWuS|W>2HiJD-+lkyi}4I}n}qeuNdKTZxnmHw(LU&835p^wa9S@g zD`4*fP*E@l$@E+#y7?>iTREkkd}Zc0cRU?m855~h|E@+9C0} z-+~pUwNutV#+WTWH^L{;9rj)YBBHi#!XynX(a>L8UpJon0r&4gk0FDOw8QBHAmgZ# zb89r3+N3whr$IsXD}_F$hoF}Ljzfi#r`0-GT1HZudObR|)BL0Z<$5=Zoj{-}WZb>t z-*(}GXDnttTHfEHPEHoIj8MbVOTD|DyX5gMWi}sXtt-P!M~p@{KIpw+_j;sOfZF6I zUy*>!u+iT4e~l&39TxJ$iA;mPR&*pQ|14|(1O+>Cd^bp04@ql zLmFs}6Ui3a;pw{Q57JZ|IfT|W_HpK7p^#Hm0-*|#*mZFJTH+s@w}rR` zYsK;r+rs~8>5vPc51Rgu3osdwzve-%R&AUzpSiL%+e@)+m`zDd440C>THoALf#iU> zUJ1IXr%w2u?@G-YX3e5;pH@qPo!s5KzL}H{SKmsNaGt2(g@pwj zlGYE*#|t$^Dq$xoO0I?p?XOr;62#Werng*avT0hoAl}--XbZrPT()4?{%Y)p`oFF_ zY_zsDFPbmWdaHfYo*H=T;BBRk5b1qN%yjLGz*m@LSa`US5I=dkt1kbuBg%0;OOld0 zg)dh>kwcm?#aMVc_D!4Z&|TTi2W~(i$7iy6-%M#6Q-kS0o_)q`oCs2&dPZags~H4M zgnR8a-N<(aX3=OB^f)%2PI;2b*UWN0yTDvRZbb&$7!vSqxz!6_?}{9*)QK}=_@5F^ zUDsL_m0*0MwH+qVU=J0*Y_@bp!YnV&di2X^F2Ms-1$%gDv z-8E?0XY5x3xPMz9S1(2EzIw;5!q+0z(0Kd6mK|WlNEId%<5=`H;}MpKp}H5U@KZ&Q z#+QFTayo<{=Aq8s!nFWYOWj8xF#)>?yjna~mD|>ARegQWDbnPx!}V~#&hoymg*l~u z=7DrDgH5}LWsm#zN#HqovTxw3Sd>&w=a=G@nVg9}m-p7M=9-}b9$MgQ@$EmghBlfI z<0IoEirmxK*#S;G4zD%p9Q8!O@Y|{HjE5YHoYsDtA1*&q82X!L$qvoDH#w9j?$b2+ z6XgAN+=1|L*`7z(7etR$Uo0y9>Y>0ZV7>)c;#?31C#Tb|O_O@xoe8gK~>4Gm3}T?E6|PK3J4E|m-Jzosymp03e)*!Ahs#jcD&nv&4bB%Pn| zjr1ZocfOxB5u}C|AHq^lV4n>~h`tBT#&mVB2#7NuG!53=4Of}{+XT6xM;1I}>;sku zDtIVttXFLj_pf=Sheq2?EKhP@jXs|9Zf-zCUBaKd(bBuSfrLAc<8bkS426~YgVK!( zBY|LvR-V!}L(0eCw45?=@M{d?QIF2>H|E|Y_x7dull@zE#9bPg<<-cBm!LSPwNu@Wm z%VAD>iF}ywzH{^4B%RCh1Mg;GsGQ^QOB?>ADSmLG^IT$s) zByx*n`94|HU?TjQWua;~WF2L0VIkLc=LM7A1;JzEE%~NtmBLrtCRb{#%2Aydui8+` zLyK1q9}f%xB0oR&F4_p|MBdwkg$0AO--1;uNfdIN%ZEXkF7R<{V9W|(@D~_80{g~~ zxbTU|Kf~ML|L!4`QD{)i<|4hU6QH7C^l+9B5dz7P-)a(EIT|6%GEDalR79=q7NF39 z9dydC61odS22ib5G$YscL*~Be&9ytjZDtJxTG+pHDn8MpA37~QFdEge>?k_FaO#^~ zeF&ScMEzt-Et8M(8wzWH^wtQ>Nl_Kil4B)@>1+GYM_Bn-@D=UGw_{7`jpYo~RtG-O zYv$Kj_1?lkUCt+oj|p8^bjGitO&TSQaM(Q!ExsOqNM?Rphjp>0b-Y#+-aNm+LnBIM?ie#4#~B zUS*Duv}3S{uTgqOPD7zjQgk9pueEtWfuZ1sf?}!sqduZLRiuQZ&1Sl%{O8Lae$uR) z?55HmhPv>v;&}JI!s8WDtfXmmu(d{{k$5AnM{fJ8#GZ$HWNZ)f9Zs%wj0HFjU&Y4Z zhHpDZFM6#8w6>25@{HKQ1%bGTWq2^#eLrB%!dhm-(KMYP?z;Em$Hi1|H{MjSf#Sn0 zV*u1l=Thme|Cd=YZk^}IqY!I%&XV9hqe0XBADCy*cCx(FMCU4Js^}B5;6DC%Md)=O zryh~h6O|Bn)WgYnTKGdT22JU^QgnW1+0d1qnxm0E2nI^esHQ$C6)5`u0CWoIq@2i* zadf)MAjjklLo+2@nSya2>ks@k6NFege-FrzNoH+vwlvs$KEG&eW}NWzTla3^)5?tT zen<6U63`CEq}o--RRn$9cwu?U1sR@o8aXX!9b}*+&Jo>fmrE$Y|7X4a3vNR)hvB3BaE|;8$;;iy0hX=!Fsg%nq zzpGf4KEn5L$XRf3RcS~f0tUdBV5LyqKi((q5bbfs9_rneDKzgbNqB>M%|t5NQ(P`|gz1{2%GkiqeWXwmy!R z>k`DMS(Vi2p#Khz%8}jyTta=*80O~}OT8GkmLL_Ln6VG^TM=Q<=JdvxQ09ltoiAGs z^vu5Q-7-sMEqx+x?UX4^(^Kv1%mSe-*CTeX3d6Av$9JF4bmvR~5@_-AEx*_DiK`@Du` zJXy)YG0Kl+o-@37@3@st<6F1{4m!KXVYy3yYvq>) zqs0!DcCE9Mz`$xpV#CYOP8w3o0q?2r#k!ibt`gmv4OQ1!gbb;PSsQ|@f>@j)`kxw> z5g0<`yPqHkek~v@Q=mx?6=nr@tg}76lb-8}_vY{USVwD&=|Pv(e*g0}iKNP9Vi}3=@r+LL zl_7@L4hQ|A(5q#?wQ2W{6W~42i=};@{;Xltk9InNQ*&%op*dQMOZ)XWLDus1nQ3v+ z8)Rr&cR{hE^@*9BhJUu!ro&rhn8+?=8s5w!4IJs9m{T79S$aeTP!lvSX-GQJy8- z;o|{smdjuS-~)X90CyG9v<2c`7NIuWdI@v>yQnByT2?K92dyKaeMjuykBuQ3F8fNx|k^CtQhb)y3 zfE|G2kH6mQxv`^H1Pg#Qq0IkJdv6(5RoAr*%O<6}OX+TDX;45?xwUiC`0iiNpO;@8dmnqPHOCxtjB%dx9CPI)fYq$SN%#z4Xu#Sz&vQK0lAebCV*_?1|+uXwK5N*P5*z$pwly00o)g?lq|hjTI<< zakAsz*+#D6Lm9xGF)2Z%KNimc-+CT$^W>L#eSaI!qVX{6cr^@-F=H%0{yA>zo}fLdJ1a_2+Q(TBhl@ zYo_Ar!&n_)_l(t$&$4BhRf(Ob3_Eu2;5f6F=puSp5a{Ap$H`|D zxX-HkqHfHA*amL2P#4l#YMW)Hd$WEQF5mLJrQl*XTIxexOv>Q{Sx2E%7b6N1>>DDh zENAGRTF0S6``v_DOUpf>!g6+YeI$%?jbdip?rycpB0ai@n#%rLb9st%C%0i4{l-Xx z^&3Dna}=DX*6d?cOOK+pGC7}2MliqpRZKo;G0q{~%H6w*P_Niyg!AZSzpx|J-Zm{U z`e3$WItt~TU#A#)&v2H-&Oo!TZ!z1O97{sl-v*{P6a(RN&?~maW(72nJnu-kM!6W& zAuAk{%+?N#By^kn_Tq7BXW=~#o=2ouGP~&S@6S-UD(AkTbQzSaEUD1&!W{dT(I>hh9K2*6-M6d-D9nk2%|Q z`A;Kmr$1whd<7HfXn4@P_0j?2NM2AR4z&Yo&s!;+HcO@-yK+Hx2IPI7hh&BAOQrVd z-~rf9U!S;8r^`cwVI*$rb>DuntNqqb>3e;-7@0;H)r@Dm67l<+DaV>Mh7GS3+Eji! zT86hj9P=W&DZoev`nVu0ym8FStSocD^xbO+th-KEg{FzYD%pHPozN&icID-g7aG$p zvQy-CQJSv8V&LK-ilp;W?Y+u1^l37o%^TEXs#Q}K-1L3Q>x{t>1yx)h0z0w( z*k&b(0d9;n9nPkt=hfhA)}$&hN}phVd#N4D{>jz?AqRCAZmuEGAFLs&3Tm&9{g__> zJA+T`j4h`arjb=w7lI|`bnhOY_ESl*=VR8$I`8WTPG^c7=Fd*sg<=me!5f&mP;TGG zU{=b4DR99;fsoD-7qi0yun+kca>;SAB}}(-=2oda4OO94#!Ioum&p~Xx_5g z|75&h(@u*85I@+`_~QTh1>^#bc{5T2YVlZ~}3v-$W`;#u6R*CcRUI6ZY#H!6u^^A@ro~1 zes0+8HjLWA8!m3YzkJ?h@q^U-*CTAf%X++z3a@)EKXu3v54gGWL1z#@$3W2HmAEwP zpTHyPbAU&0C)zEe*Hisr`TJRd+<9^@R}ZDvi$gm+2YV)wLe%B+Jh^i)5@B0>GylMo zpSNMxfO&jiUd+a=-&j^}qB3AM61Mt(N1}4F*vk z(qEjrOpazHo(ReLD7-c=?5qoZ-a@Q9#cF~&<(Va4m9>ap#hOhfI6bGP+8O7$y&e1Z zQ%&MOBl_ljZ&%4Y14|PgIYFF~A3i|w>vd2lVp!@Jd~&KZ%mP*pA4zYQ;(YS%X~w{} zaJzjKIDx9d@Y|xt0d=7UEiGR(4g}ww|5|hJ5`UU7B9lMoDo?dUHSh zzbX-cutXs7Q_B>64Ucfj$!cV%2x&%)NW4#q=ggGYBP}|cnj%zI{~%!`-L$_@V(%~q zObP2jLgL9f2iR%|dCUC+))4kX<-&0sB+^rb*TGBsxTW+yymQ~*wY5#v7QbHproKKj zD5CM0OP@8(HP?L)&{Wp>c}qh_3Iej^1N zVYyNn-G-dGsN)Pu0zSXtGj`j$L1tJk@$)&{ac-{*Ib1^Okv~6aU!6 zv7a-2ze?s8F()# zjgqJX$c$}8FFT2JfyagrXL+^(FTBUPKjCQFzUH}on__R;yJ`*7E6;V#G|vT@X-t^4 z=x`^VmPAcBc>0KV(dl+yq^Ww+ryp8ui>5$M7mv{hY(%p3O!~LP&YEJM)Y&1N6pzl& zX5bX?Yy|ckPvu=34{WPWg`)RVsVt=-D$`_0RFOjU)$NTt4YUQ~}p(X<#RK!^d9$Yr9)&l@0$1}{g& z!mb!8T=kBqN*4(zC62OmIYlf4VLJDDt4W;F!|Zb#i`#--lYa* zYy0~!gnN3wgZCGfmAlEW5|cibx4LI{Be@3c!*U>!ChE(h~*~BZB7w z8g%aJoN=2X+{6?zb5{235O>?KST<@0qi$%3weJ zi4+%5A0&$aWj-o$vPgwWBk7D&7RMb@+}3Dsh6QyrS-Ff-+z?w@Zljv=sTG=PR;~KA z?|xNk`B;>&UUS$;#j*6o^wpGS#=~0ND~vcVZYF|(eBb)P&w9UvRFei)%~YAGEVsko z>F8+ZdwJ7TT3$Sx6O~V^zzlCXBcB#-z`}ZiM$zxWu95h)%K(Utgkh;Q{%+6orQ(|? zDB0K1-~z^zj%5x-0(4|)A}XMA!zVT$^oO<91&?s9%2y=wd&VwW8YJ?M@r|$P4Ilc9 zJl7M;f4#_Ria>dTZpisT6fK<*V47q`g%AYu7^-Ud1SuKLt*H|B8kmDXxc5^2LP{V0 z{|QoN9{l%^vhNO3zPp|pV|Z&V(P46dC^u>k5XbXY`)f^yF6&-PkJUTF3?zEC9pQpt z+!BCbHjMJzYNQWSzGq0-7oosg1yC)Osf3E+VM?dZ27VK1lDKMzeNy9wO~kNoA1^fg z{Wke+-jdE}m3AXti%pw_>yDXKqDH^z%q?R*gQ&1oI@m_*~S0t*N<4kE8mB*B znFwn)(0ItPH~W&1L-Lf#P4dOVoUfYs2+R4)txuYcdYFAcweZXL1iwf77QQ#);iAGz z+{owK-+v`}Q=zpoRqvW%zO?g1F-UCdd!xVSF-rXS0FEaoLH_8b)%&bLs`L$Bt!sAx zkG4Q6q{+UN>vlDzVUk#V?)xoEBGik&cTTvih86Ns5nxyI)O7kAV^XsrD+N2QifEi4 z!6*$vmu4SxUIyfvmy@rtmaBZRgws$_73 z4y-?h(|yxIHRijv%fRd8rbjdL>FmKeessl06#nc6hOMbeUD(=D;0%GhshMil-QtB$ zr#%%sy^_68!u*UhV&@AKY`ubUnWOwvf5Rd0SDihwi}Fnp*TrZK>CWz068E^p#U$S6 zP42$^uTGBav%F$z(M!uwzc(wVD{S48iKSkyYuFt?DEwEnk4XsQg3BY+iqV?B+rcRG zlZxihTf1)K;FBZr(<_D-*G@rN#0Vr`_6)iu$TJ%D=@}3zB^_n8wbCoufztaCfI{3D z{^ID>JEq*#1^Xw|#wO8&l;22-jft@UOq;O0grON zx`|}!q%~iy;@Uo=sC+1zOxvKr&MNe#CQP6bwk4RhK*+4*;+23a(awvH4}`TG9}{$s z&EBsDHLl?*wf!DdT$TL-T{qi{Fi{3qeluNjXG`bsL#gdI?}S8 zXW{Q~fu6|lR#c{DtjCResV&!J6e4)AuffkAE`JcInJE6qixSl3Sef!aLE4`GPe|J~ zzu$$j+uGmc0ychRHI6oeFc&VuThxYeXwWf+XOK_Hd9^8}R(vO+LTiI({Wo0waAu@R z%gjY(rSt}mN#?p=EZVa{-I}n){wJY_5~V+3|&%HeuB)cit)^0 zUAQyXz{nXZTc(FFyZuB)>q`bH2Q@Z>aGxrnD6l=5o{wt?eiz}yIO|n=Im)3!(MzLw z#ej%-)s36S_Uh6&t;aQ>*%1lQWVXy^2MJVJ-{$*s{sf~R4F3a+mKaDs+fOLmLVTTU zHLIZ2YDS{Cto2RjwwgtwDcHhPKINE$JV5|ZPAL>_{{o|mkit3=b(Tb=^RvROw+n_g z@1@Z^P_vrwOSpIRNMS6t?!p>hI_A!A;R3(~Z|N~+*?a04G&C^2w%EPL{|!C*HQcgE zkrQlkLHujkkyw@PKC)1qid1KvhD>^q!^4dBe3<#0S(^)!{c4g6Hhe_V|C4*1p5 z)11D55I0H(=f|Ng>gmjyag3X=5I z4_Z_(MN>%Pz$sIGbj1tgm$gHZRWw&ADywcGDCp>s%EI%L+DUcr!}sFa8S&$%LTPy@9gQN^s<~oa`*&%tc4Km zSdu`?tUx*7U2Z(t#XN8UEwHg$+|?`C0p4cYQowc_EL|k9w75Z4gZBn+XS9Lcx(90R zzBlMO(fHa=oBV2?D{p&TiKVwb`M=XzRK$B{Jv(Hn+7VtZLepQ4&ZC`H#PRivT(M78^Xs%0x5dBWxLgt6Q7S9Doao?O+|?a$b3c(8jB9d-8W#5$?@%B9B9$+Dms7r zk~EP}SV7+T5`%F=07clI?meI(!0o(cL@bsB>4o>!A6!vu>9+YY19AG8fc0X!3 zn{KqVHy(INqiVclZyQit7e*d9`Pmz$0xE}*a#Y&m@PXkCg-xEZip$6Pu+lq**5CB!CBYjF+b5;7IO-4#G zSW25W88@C^IH}t^3uFv+{?R_QE)%Xs(stK7^}#@L+wzi#6VQtrPLWd{SckBt2}rN^ zebSJY9@}YD%ZE z>r$-d4lw{q!wy!rmVz$@YSi;-rQcY=$k`AHtCtx zolvF`QDKS-}`>3#`!b&M!KsByPOIf*)xfv0UmFb&I)aJCBBf zq93F+T27Svx@-+eqe7N~3r0O7B8%NuRp!7dt@ZJiC~a|7AhYoI@8->|mr zIX-A(BasgfL|SQ&G6gSc(f$oZG52m&#-BU9YUU)nMD+!$a!~j^Sy;m#yA- zwnVSvtm-Bg`cA@;J~UoeP2N$3*m7NYtqVJ`Cs4ty-1Bd zp1>-mWuWMShoT27>ze$xts&bXCrh$7APjWG<-&}ky(gkg2|el=TG2^Q!f!Gy6IQakdqIN!INUQq zjnij&nVb2-hz$^&A<)sef3{f)-aoB;U!hqw(k2)aH(B2Bm)=%F0O_cE_~jyUx5W57 z>jieLUnBbfl4R=DeE|<{I0M$n(690w>4eeU@W#f`gH@M;XC?#uM;n7r0MX>d8rAv) zRJhA3E{U1Vjl-A%oZ!#A>y!mun+zn%#&)?~n)^lx zA+d;iY(#IixvbxMt>>2H(-z}&p{vn!%=qZZeAZ>D`!I5)&{>OF+fo_Y=s`Jr$pZ!s^qWwf54#EX8QJ(b z$f5WHo##03QK_1G8M^)`r2Cc=wKU42Z&(#P{J}_Gw}8V}{FA%C?In$lz+tAIBpY&94t| zxK1L6kj$Wmp}#q^+cuhRN)3D8;Y4rGzglGy2rKJQuQV3_x603O1aT?}dAfu%wsmwM zY`dc=_oh9r1cZ{_AxGRPKmz9mKq8=(%vnundaxs!b}2*fq{oWWb-XwnNcDEtLqw#L zm0Byu#QPL?>MHZv%Lv51Nw8aCr-kXg%R8u9DXUf)dJD9M{7uwy`{>KF?f4O%suTb# zvAonif_brat2%9}?U`RQ4>VN*JNwwAV}_&-q*tsCIY&Z9oRA=V)C6)b+Tv zS$e8Q#nI5JC%d&&N^ud|3QxS&FZD_otBP< zVTd`*Nc1&+Uz_Z&)*&FzTcLd3EFfu7ne}V1-Ha&sM5>u<6tSZ$LL|QlU>K2|BwPxU zn!HV(O~Sa9w++yI#rt-H7g9|G67#Z8aBl=`u`t4SHAAv$ES|!(5vpl2ucqr>wSU-X9YL!iI~S|$fXwh!DN4^t^F+o0)n8DiqE4Nggl8XYk%Y0s$=uopIQBF;v6rt zgo+0^Q_o(y(FlqDXp}+qP=_gLv5Id*Fha2%y)!isO#$F z%lkw?&%y^hSF}IgkdXo>)L=o>f3y`5H}P4XmnVSqV=f?NG}n=Kt3#p|6kT;4Kam>U zd~6OOuT=)JIBV~Q6nVl$oNn7M6(j9QK?%ZgU}#Z`z(eG%hz#zKo2<>ZX$S0KJ#urNx2i>p552Z`j344lD*D^TRA7S$p<*H;jN;`V| z@2+i+X2!1$)KnUPQ<-k?YPaopG~)=9{Wap6?)WJYgaM5guSOubgfe#HLr5O%&bV&QHq>64t~!kZ z$6gfM?hU~}q+M*}r;=0soD;tC`9YF3xq28#ioj(Vzc0V0y5 zmtU^qjeH`YU#&(3+Cy0m!@>sJ;|FCcXn`{6T8b#P-}n%_l)#x`E?p42!hi@Mtu^c% zq#NtV@!ic-Y#`0HN7x=fHwFrI?!Em$SA({)VQ&ws8?rNZvUyeE-RlG$fjgy$Fk15p>aQs8|>MxUHqfdttEFomx$=RpfogauNYXmp&6o&?lv%)A_qlZgjhG`6585`o_?~@&QsSw)U1LztngNl3|2G4JV_^xc{OD7yv!|Y?R2s=W}P+#kR12 zH{M_*9|KjJLBp7v!{Rb92;Q!LYs9o_P)~{-X!F=J0`?%GE1~^&?UBI(Tw>B3W6U2! zi}kZliTywNl=j!5QRlS$Xeiwe*r7#e6US>>DZ-qHO~*nbV@wZ}UFP?9N2oh(f{fY{ ze`Bf)ZyKOQ>}QEHUku40^*sn70j1QVUhVUiT7?&jWT&J@0eiLfk~8hbH{E*4xg>9& zYSp1KHyfN$91)&j&0>bPO-@cG?0f`vlYhsN=cV^3|-xPFjm7_ z3mwL?oW`rwL^qaSl^`s+s=y_c419eQAAEDK>9>Q4!f+7)blNIf*QTl{+unvHP|Gta zK=h}xJ-|CpzT#)qrU5aE(YilRW8Bx7mZ|uu%gf*1)T8MQV@d>D;R2L)7jQq;o{}E4 z-uAbS#T%qf2k{=-OiCg6#qH5I-jP??Z0~4do>1>4|5t~!}Lr1&pH~Q5z8R?Fz65Z_;mqEz*tWS zwUGV;aEQV{aY}2cRP5N?+mqDVh2LHNFo`$O{nGFhA|Cug*|8?Nx*4CVA4L#Y#7*7V0= zexe`X4&EwMXf5Wy7vHa9ft?rTa#5X+>li&sVi@iHcY*Ja?3ZQ19Haw3#Pi?bqijA> z9ljZ}lrBCdE4Nt%x`?^#{9Nl${$HO1KY+=W;-O>n|8u>cBR#(#14z0pykWJ$i>h8c zt9NlO7@tpohqSx|55T=ZnN&KW*3q6&`s?=*RcJ|Ns$Qw>k!k| z{G&?z`ENU*CetEi`LlQM%bMOG1Ry*kAhZ6_DX79rGa6wsv&n zbgguxKa;vjd|nA2RaeM+KK`Mr6aTZx!7;ZaMJ?KZLci?+<3%Txr3J_`)pB8`FiLqp ze~>6(B05C&cuSe5>-#Gm*P`hQs{5E7`ZFO=b^Wz9*B|%K@PIpO)PCH5rO!fPmgWaq zeW-L4(4t%16|(7S`kGlKi&HIBZ!^fd&~y;CxOcAe;Ypj7lcn$LQF(kN+7!hv?IkAj zD!FcDpdPu*$6;#Q_V6(b3mXMO>F!Kwnxd{wYNVEAmOu;|1e1JX1ZK}C3Te;1lG?mo z#DA`^op%WfxAX-d;?r&^vM`}zq|{zmMYkg;3Sd}yS%*w(MHcMheg93be7&y(j?H{e z`Gk;MMP;=Q8aS&n{w-c9)*LaRJ|LNr%;;BYAGI6cW(by|}M2Gt<*r z;0Sfr+uQiwo?WWv-AHf%dCIDYWRcaKB$u6vPDC9bF8v1K-8fY#`if z35Z0KZVy*ub4nS=52dk&%aq#hg9m}x{ORCH5Sk&_`U`$%8 zXvGnD`e)C1d;3xY>#*e9pJT)1Y4%g;oiP%Zf9fx^C~SFkFacj}lfCo}$0*Moj5mT) z@^)wzYaBKCOBMCQE8ZDb-&OZ7EK1{%hkMKHJoJje&JC!S_*J$yhp*n*C`!gVRbqMa z!N)fTOQ}4mTVN-#gB=4KtBsIuz3l3(rEI1j@Wc+CSYqfJL32Y1Xt7_J(V3GefMn^l zgu~C(Iv|d#w^JR(!+mUj0iSs=&2v%Ty1QQ>?ayf|yuZnU)sXXH25Z(vF1-1n>8vAv|b3)8=`^EWmv~-kBRG~T|1Q#e9FNu zX6#%MT@p8(Z7rP3Hyt)KpM|YPJJ(SyW(sQAX{8AU^2t^!s-woPd2w>>C7Lf0%JXOmAMP9lyMJ_4s(drj%Wp zIkym^Kprhjqc7W@&mU?0aT;|5ceuDFSlON_aL^uy^BEChZf6kT}!y zQLe)*%t{gX(fYbP4H^SPr^iI|vlB4^XYimwk=CLK+qv+AY| zu_~-%xba<}w~j*WLzya5_ScgAcz1SKp@r!X-r&HE3z3-ecG0{K(0VbVUsz#@-F5*1CuKy{pYhTs*S+@k-su8_~+D z{!Qz7CoJ1B(#@BSYwELFLWf4&L&jX+mhlu_-t^buFsv;f?P%S9z_etmzD2eBs4hs9 z!>q1~Rv$jvOR*;FWUt4+@4zMegf(xhpD0%f;%4=X^oqRDd}2rUi#}Y_s4ehzLS21q zlv}#;s%}@ImCsDE9@`7I&u>~jePI8YPec=eun;>}XnN3M`~f#ZlojUW!&fO4JEGY} zy-4h&uo|$_q1@`H?q_zVL&Z^OLV~97iu|V>oyXc=7)juxzn6SiHN8x0svyiFzRXN>O8&cEH6bPJcju$xLtEVI` z_Y{xoe6XNP3`#3zBya5Vl;OTC)BT|}_bDfsKQ*^D@hVw;WCV1+DG31SDEp9WW3d16 zN#4*jR`m^Ti$cLZQf|3o#+FUqtv&ZfAX5!HQOrvGmaZekpx`3#igRk2x>$oCcr-)m z(-LNKntmo4gJ-^>d{!&ct{4SbA|2UP#n#m}feM-YQK=P0fLl=13m?CH=feF+|CsG~ zL)A7SIg7CPM-?DSbbrsChnKlo!y%ddJqVXRRrChj9bc?Vt|tyd6w;+Ew}SNHJECKx zCsV4$#0S~ox3C%5X;tDFz5IF4jAA3N=~P>Xm21To@DIvVWd=UA_#TB(upT-@w` zxcCH`vk4w_K9n`*{{DVu6sgiaBGbED>zd(kGTrrWuw?^ciVo+s!&l9n!~X{d|Cq zyQ~CaZ`KI*Zqoz8^%YPFtkS89ou9;7wo`KTo4aB1llf=8C$rv>SE-EjMM+>>IY!FO z?_q`7TUuIC!Zu7T)KWDh;dU8#gO68jRE>0=&(rTfW~Q@I zsBL!SaXn$k7E~%;?KgWl4u-@`JFgxQ#4<}q^Q`3x;pmj)hhfb z%JJxdwA#9V-l$!JbqbWk2f%S1iPds`tLj!O)I0K%L7nP8d~J{KN8in~)#Ap`-GscfEYstp0-lVRwe{D#4$93rx{;fIARU zU4bC_bt+rjVI_-Bzmf65hO7W9OPF71f={N~cqU?uTHB0#qP3+$*P#HAYAGM23p%@! zQ*!OT(fG@o$_1vMfY-=#9{&=Tk1x)jDB0T)yxtImWe%ict0 z6ywpk+fIGb1?SmMTdvUE*|O*#w(Po)hRdj2vSEBK%>2DaO!+UyWGT~D%k{PheHz8vSl zU4_j;s@)X5D_VqwW}18~ljHNSw5nJ17Z=?dGOdpYcFVqMOH5;ybE3=8zAs!%`g(v1 zixeoM$VBh@*;(wH%FLh%w&?Xa=ZmoJ%uUslbEEHwDdY)HK1;VcV(mQ!DBylDaVV3V@xm!z zE;a{f@lGn@gN%8%`quw=SuMO}+micv_;i#zcr_tW)J>dr8NyXf#Wl~o`qUB4_Fco2 z<`6Yvex$R|N?yLTG5Hgn_fhd`LjucAy3(?=*lgK>SbFVFLEin(bj|FBQbPnI98_t8 zTbxPTD%A>1Fc420a*-JnYNvnh)-cNf)|9eP1YEr@&|MjV4TKf0HiPo)0>WZQQ~-dInSzWnD^fc4JJ z0`LvPeP4S1g@pd!4gMJR|C#8I@BJ@&(cW#xm#)48d$#F&z~56T1<7J@!?*tjJ4(KX literal 0 HcmV?d00001 diff --git a/doc/source/_static/reshaping_unstack_0.png b/doc/source/_static/reshaping_unstack_0.png new file mode 100644 index 0000000000000000000000000000000000000000..eceddf73eea9e5d3bd0efca118b8f9f75010ee4f GIT binary patch literal 58533 zcmeFZWmF#9(lv?)_u%fX!QCyv-JRg>!QDMru;6aN-Q9viumHi`{Wi%yd!P55d+*Qh z$2Z=L9vRQ`boW}ls%ur%oU;aD^0E>LusEOo2FZ#*cJ-(+FAH)H;pb$~qASehj$I(Uq9R!g}Xq+UzSEH>w=hw6_{SR4! z@r)pc8KL79RaK!Mcw_VxPF||ePS>nFUh1SsARqgdjTULp?}et~a`Jj#Hk}?%$m!l) z%Mm^fib0d%F55(7r#8S5qnM_r_^w~u*~-vDOq1Rk#GZV9rtrqcvOKKH71%*54;jVQ zhXR58Bz12JE&dC&4-fZec5T46 zq=DQO@oX%zY>l77Mt48as83D!T~)6PdM+B~8^@{*9M6LoPx{No`az*>)fnjY6`bqB z1{CR3zGcfaJ1xtMhXf)2Hl~Gf3*W?P(k^FXlppe)cJ-hvJi=Y>JjsvkHV8o;I2S^Xac zFV|d}wd%Zy5aTLel5Ez<{a#e9f2c+vpQ3abe02Fnp^0xD{i7~U2C>{<^=Udf`IlU! zI(QlX9F}G1+lP^!HwO6O+Q!J2FR`h_astLq>Dt{dksE}bDKROaqY$RZtikS(cwGx9pfiUw3n#B8?Ct0LXKs68{*K6YyoZ-Op^kWxo3%!Jbb7vdY=HS zj=+>L$Q(?RR7pfw=*W#(As1r--2^+rVx3k8mKqumtK*a9N2wy4|3a)I3 z89#O=kb+Lw1#rxDzflh&Xc6^+fHoooV?SOybo79P^>>_b489}lI3Gce)(PA|2m-vB zpjU~i&p=20-K*eLiJ`lNW{|;qq0R(_V~qWK+s}_x)b$Fm_MX27$1G=q%*2kH!Xu!#nwn?SkSj(xkj!BTlQ^>!0(hbOt$01 z!<7Xq^sDWTvtw?g)22Q~Jw`mnJ_bLggnSE$6oeN<97HR|DaI`ZBZl8+9d3r6MWsq@ zP8~!wOI1$gMU6&%K|P^RufU)GDA*KU*b}kNc6xoQ{|V zSk;jvkra_#k?xVn*bi7csk^D@DR`-|rhul1Mq5X@3#QYL z=MZP;XFoPlH%xxcZXj&)buAC^ObiT}jbe_d4b2ZU3^R70bY_jOb-t*AFlLjiGs=@q z5;HLll71v^R6G5;Zb>Xn{5D50XH$4ZSRha=Fe4BNwgFl$tSUSulq^ge9UCPdV-__E zwH(!pIQXp+3LHu)@&Tqq7;X5Q@Z>OoBta=EDF`WLN%+I=L&cT1?WGp;Rb)bn5-hUk zU$+|3YZIH4o0TJGFwKR`)%L*lZ1&bz!dY@yc8pgCzmKF17mU;oc_*MHU&PTScoNIe z+frqbtkHC26XmQI+DUKZlLUMX`YF_h*yl`cLSCcT_WfH;c1^NF(|HwaZg|}=aZ*SL zmWs|S`mD?BTQ>4KtU5~zy;<^Ek(q;;LYIic>t!#Sn>?rvD5+dUqxT7F(=r#4zfv%= zFr-SQErqoNm(^M{9}=QdWUI!>GZ>p$nvGW|RtP;TJRUBO_j~Te@0;$upe~^hp(qi= z5$+Jc;Tdtpaj9|0aaY*-vC^?Iv5?a3Qg2gc(niy1(vMk>-jSv(re?fLXTs#5OmEQ0 zrh88xtr4jXs}ZbKuMSF!R%)bbTP{|XTV}hYZ(U|RY{kDcymVZ5RBu#u@vXF0tA4(2 zqQPgAXdn4tazk=+_gMYlwnu8Mc+z}SW63@1!U)qPnJ8IR?MrE{W!*u1x}R3{@*7vN z8@ihuJ`zvV>r1yP&p?kK=Xqyj?z%VKZd~rO?&Wc1k}7%RwrD2~C+>uNS)2T|?Mv-q z?bF^k-a+13k8F={PlaIbz@j1Fzp;S)3^5M52blq{3C)hkkN-&8OOr=oOgcmTjX08I zl=e`Z)5}fL%?!d-RJd0#Pt;g-OFdCj*?xw1F;W{JLF^>VCdxom*V)5J%m}GBdceFt zsqZP$HL@d9Fc~>>GMy?jCE=EOh-vQ=3-=;rlM&bJJd2d8V5wx?Q+^Q|y>t$9r$Sc&g|$<;nLd zd44y)*E-!X4w2!mVXnNWO|GrtfZ@r0d^q43z0*8V`lYhmv~s*uw-TljyG7qKr&hJ8 zzCOB^;e}-1@UU-B<~aGD@j%a}Jh`mWO5bL%1+1c1H??Ws*?Bi@u`c?6X8r&-2xlZ6 zE6pK2=U4BqyU(4Ut5uWd{BR36xR+*@(`=+y%FK5x8C&|CXkXZ$U49)!@kqITJ{Dii zXu-6zt?8-V9P`?FVC1&uerO(TCg5lKWY?ip_%JRK6>`L9!N);R%xmFZcG=jZR^6}{ z_A{Y?KfnD~;jo<6gUZ7)lnc@i?}DqtRmZ0tB;Ax&H{Oq2oJ?CxmktYFZio3NBgJYt zl^D7MZMt3w-2T`@UNR5$#)*#K_A?e*f|>KHwKnfJ+t6IlY$?1b%+@42^d4gG;_i)x zC^O~ilpK_frh})SSZ!DdO-aqvc53*iy#Tx1=bx2c&#$`g+#$hj1x4A0wAXr7YACB8 zs@Le^=pbt8KE*vG9PI?XO-ee}lkMOy@2}jLlP!xVZ+K#V;HyEYuSSs~NvPL_)ZY@m zdjLNzhXS#{1i^~sMU?w7-r)iZZ(V>7!YK?zg|7}d7T#qponO<=t`ecUsEq88Nne<5`5hE z*n-dAl+#4%G-jj|lrI_^!9PLCaZO~mk!Y?;U@W-z-L7_8M%X^Fxte=VTTHE%=9bD# zdKJ2Ab&O|sS9N~uU>*`3Nt|`+Dw}Uz{n}mgy=2AXJBHDK>BO<%0W?nqS9vptnl#5( zo-2aOO!63*3Gd35yo1$Q;8itwhdqOWUi&o*!FK&5{*+^7@NUEI*05L9*k|U$dDAW_ zGj+9w{Rfy`(T=b5S7r*=@XAc3wpfB0To!D0x8if@UAD8U z;q>CTWZRRdOs`~S45niy11|Hk)%}23iD|hKB|0Ue=_XcKR-;Ud2&qN0CtIYo|`a2S@dT#`^T= zs_~NJ>{XQ~5h(BAmZ;rGew>DM^z{DJyHrmt2u*SIAPvuD^~I9quw_S1WPF9_5!tLS zA=<|+b1g|tJI7Mz(C1Fq-zlX$Ywrd5S=$4=GoC+y&iauHrFNkZGvq88kZjCIl0`~H zi_?o=^ttta?q7-UNo7jqO#W7rY(aSAJ%zriY_;F=t>E$z-l1RsG=y#^Kxh%0>upo= zMH(U-p$)GC+spaXlK(}JS#;9dx0pN0TX=E;aq2ksEZKBLymddv&8t~4OGL*pGCjS$ z^Ml>|+ElY;@xs8jN7AnD5ir!WD}r@Ln!W4KT&4vtK8`3K+_2wL$h7p*Uo9`WCZ3$H zh^(aV?zXSZXiTeIuYbJp3z6d6l3D~FtS#|p61cf*y%5MJ1b%;|*6j9fx zD$BTWeXh}Kkv^(KTJoY*B7-6y#Tq3= z#kb-<`eMnJiT_Z4Oc0%8n9|XS=T{Ug5%y@flu1?JIw}wv*S3-77xB}|QJGS;m)-Ma ziE)5CpJT~S``TaTqM#%3v}lvHqW3PI%|6XU6IN4XNs9XicX_jFbLA!275il=d>H~Y z!U^sXn<`T#b1{oEhY=et3uAp%ZC>?Z^>CGHgYGx;DuimZYEN6ey{dt@v9lnh_S)); zV3`EVqV()#t($nh&z_(8!g+&uaoe0)brVoq@7nZ89wf7~;yN6BEwZJMsX6Ha(M#Tn zDA z@!euthJ}TN6tU3+8xMV=-?(FtddztYZB&czoy+Y|9PXW2^f99ytujx)w^jbfqFc?O zv&|4W^v|l9t-9CQk0yPi#j~t4Nv$g9W>FJOSusVcdI^pj^41pkdtcAVf9BgJ3(Qw) z6Kij`Q@E})GdC4i-rI5@ zuR)k0Vewc>bfmX}$=M;366hG@;Vcqjxiqo7w%|htKi6?jZNH)1V|jp)#tikh_LLaN zRF_HyUxf;%ehrQfr~#1+n#k!^!Y8jnvnYC7v>b-h%Z~FlE&W^nH``sqUGqIg)?HS3 zrX^NHCJkmjqif^mA;sa6F7Dy_n2dn7sGfj?_}UEGXf3}7o(VQx7NCg6@o5kY)6br? zG5C6#GUz;I#cXSj53`}ut5X`Um$xc|H2iLeVq|DIYS@gHisqAwl?sirtn!%(n_7Z` z+2V(p*za`HpC`G>mUC}reY<0aLdR03MJJWJM!)k8{|q!KoaMj;)uL=6yUzTGOmNaxMaFGgb`wAGW(}ny6(mitHv8wgTuw_*cz1_rn)s|3n&DSNy#ZW` zZxm7#YA2g3dY^~CsK)wDyyvNMV&e2<=QxpHh${eVHaZl`lCh9;%QR*#v^%P( z^32tLu{b+|DQwZ9!|Sn_?RPxfT^AFjG*(-lwM4Oud0h2ly5(eQd)9hB_e%XfY(XjC z?{r5XWU7){Lbk&QtK-R!v(8m@$FXOA)>b+qc;uEa#D`xq_5RA>YF2aeerjzMmqqJg zpy0*c7i1R{cOCqvAOPZx^pRQxu_U3mve9S!ao3sM;C>Oq_aY8ixeK7s_*V#eO`4%2 zNE2*_4-4vuc=@{P%iZGfhNudgeSO1hnDTBE`sV`+UMJ*FC~9~rP3nB=gHyv>8D&^y zSLnGwseK3%^6He}m|w8*9ql<0g=Ph|26o1`qm4l3ck&{Z*Z+{h-vZO!V?{haU5`Hjadc0>!hS!<;rdwHXQH zUe$a4{oUp6QZajS*ny5DUHXV}d7i6-_s&T_7Q-F0O2jO{7`2Oyn$*2Da-T1fzFU^e zKcq>Hm6oemvmdRi9^qfKz~LA2v=J8ZZFvTIKW<6=P_ptlX6?Qx5t=ZFP|LBkSoOx) zOs(?Hx<0#pH*z?|*nJ__j!93b$*hs2#Z&kEeA6BMGUbapNm81HBnd*E{ie+C&Cds~ z!Qo1XsC8m2u{l0a)ZlM9eDW;HP%HlZ3W=ffQbmT!0;3QQT@908c9k#T`ynqNN<-R$ zraM=1nF`%xu%imQ^l}`?Is-BGslL*kQGUR*rq-crgA0JkrA|PorH)`J#Y!h>p_Cvo zgHPsY;EN%zgv^5MXz~rpE{`p=`!;AW#a$ipbGoI7JSqP%Uv3zFsCwjq<{ec9jaDhI z!pP#$*RUz>lC}cNf@D9ps=lg~-Fo}1p_0^8_uBl#S#n(JWm8YS5aL26%+>>PR|zC; zMjBf=J&ukW(0xC5Awm%=5ziial1#CtxI)AD&Bjd$G(xhq6}7mZvG=rXEYr*pJL{ju z%UeZ4o-U$b7hnl7?=Tq&C#dpyG*l{-&$hn#kSnWr%WgmFf3TvLSm}|Wz7ki_{jUA5M`r#bz1 zH!GX=X)qx}tS5y6!``r|+^kP_rlyD&sTUEWYESNkkl#7hPUa#WBV=SklDU!(yo;VW z?~Jw!rzd{B{q^=-`eJyW@klSV!$UQ!)TMMl?O~p3?%O=-Tt&r)%8T-rGRz7AO}!WT zhvl1_v$YtXm(|#n(`C-;sg}K$k3Q~A<(uFnyI{W9D+{*_@E{e)zE}QHQL`mcQCGE+ zHOye0E0agIJX&T5^dQ^mYi^yJb8O>2K1O$pW19nsz90n$ARwR_=E~|$>L1>78`)Yj z7#Q0cnlQLo+X0dS2neqmH}KKg#L0lj&DzSwk=u=raJ~}zsaWgWyy1FvBvNG5@m@zVQ zad9y+u`sf*&;wV{JG$FA8Mx8gIFkMu?NnoxF^%3;4GM{jt{XUjcLR!}2ozZF+v#Ba0_U z5D-BSDN!M1H_*e3H*V-EH~n%%Gm|aI9w#c{zOc}+Me4|1uwT0g`IWWX| z`h#Hza)Xf&wFNlswIxrY5h2INKCRvKW1Dic3>}*$j0NBDPH?lj+N7@?-o&|1H0?kw zCc;EPNc#Tg!Alm~)pK7a3q~gRpGO)9BzXt)fB&wFlAV1Ci;Eiv0u%c`4|(}>p8pz# zZ2c`LtaN2nHOhZp9{V~0`2S7-_J6gB1es9&aT8xszs5$+u*k@nzEoDT&vY6!oT_Id zS62t~eWtZi{gK$@p4W%oso-zOK~af7Q5!{k35I#-+{FLoo2QI^LIF@x71~XgJ23*! zlC0X?&ii>gi9D`0iw(AVX{nf$!mro5()tly(Pkalc)9U&{N;BLV4%>=S`9XP#d8%p zTEm~|)4xvhJMSkd7s(6RthQ3KS`u&ud`&l2R$X-CiJ?D8%<@E zsLVf{E{wU(HvB;TC$11(!Pp^~?6dBK{@KyGsJ@fttF2AXm&Ys1Bf1N8%wWLthhbP0 zO_!=w_LelaHkgj#;D6qie*bl2was(;`QapaD1ox&a9}nHhs|OT9EmsE_T)#?^rdQZ z$%MnJKM?sPk_rl?I#?~T{_CZp`=L?F4nE#pTz!cXx+*TrJa>CG8~+jq7!R(dx|_vh zm<(DW=na16O@&U2w4`KcJ0>n}XOm4fF9T(_TBtH$v7DpZ`_WX(`*b#q zkyeEE_AR+>+tp`n%tA*_xmIA^Cbtd zC7yeSbKMQch*fAq80WT=tmn;qU=$HUz~_-N-#-M*N(cB7D}%{wiAs&fmBqzU2AY-m z`R!)RL5ZZgfaxffYg#qppW$O*fZ?&IT1a0FCP-;_cW%<;v==`?0KTk4l=vOm?^0uc z{}}q~7GO100)pes%oC?KQ!e+H=3UI?*rF?dFJ66PF&n2FOJlom3-5S&Zt3mqO*vZi zez3my;o*PcA15|PmBsB`V!wY~?R+q2s$;v99_9Xf=aH?mfttEs;o#%G+Eh>lk!5JCo0?ThjYDuTwP}M^a ze0#W5CsTFJ$jz;(HZX@VTdJ09F0)y*l zq*~tkz@SvKu4JSg??KfdJX$AZo?ffosfBRB+rGTj+Eg!^@}%u1bJ>g5HUr_LM@cDF z*4m;Jv!P#MQ5Nk;H;lD5tM`|`YjON~inE^Q5nKAw1M^gSR631i$>MPo$0}1eA9o3p zaxK!^+AYQ*sTuR;r;%bGK_6xfK_E?EFn(_Q1d$-KVr3t z?bgcY2s@np6ytK*jlI@vMtjz5b+a#4E{>l{;wM4B=?;Rs$GZqho=Do@(oUTUu_qid z1ENr&z@T(EvW^>lbuwipoTq<S*LiqFitnqw#71JJRIrXJ0c-Yni^_ z>uv)@mE;4fc&Yo)#$`wLJ79qdp!lZrrf~!&BVsWLvKWWzX~xvFdp!&f#*to)BG=z9 z+jfxO4{*nSwOaZnH6N1FlNkps<&6Gr7niuYCj?3EvbY?^Fftt3e7L2Ekyu@|Ox@te z`E6((!yxZco2P5Bc9VnuHFAPTFnrw0(}~YLK_+3U9B~?2?e<9W2oUrwRH?Nas}P06 zQYw>z?yJ=!lp}4&!Y$Y@*SA6ly(cI9_zm2W-5QRUHhMyw>s4~wISg{6HB>LQN6ehe z2qYr0h=DvfJAcx85lytUXt4|KspMg3GV+-XGXH7-UhZCB=g&G%V3??5ZVngXU!4Oi z6QtEv@6%b3{)cp8iZl6Bf{hp-H%x>8R|aO*Ub6kx^I_WWIU;X(Kux}htqndm8T3F^ zACrIlAs1!TV6)=nfETK1W{B!2pObigx0AJe)R&S7EnbtM+vbrPBVTL!VX@t-g{ZVx z<^5N%3S17GGRV13Kkza3%uIb*Hv*CO!jc{&6wH~q19PcZ?=5ss1+~WPpJ-a!`yB!M zDfsN*4cH&{EM=8CS`tQ2Y?L63Nc}F1KS}ude|M!@M0<^+5O|<~5+HPt5gJl_q_xsZP!g?-5^s!NX+D|XN+l*MnjQU7M+S?zM9Ddbi}3wX-Op|E${ zE>$tIEVNbNjE50&>l`0m^$uxkLS|Yn@<^6IN1=|Sv2mUbZr%0VG0sFNvL3WK*?Zyd zgah2sUJK6aKmGl`LtOw2tlP59T62p~jK(fNo zQ>w-!s%UGLKx|RBng?&($z}4IYg2FfkRHB0 zplD+*%?@0zZI^LQgpH<5|A2!|Mq$T2Ej3QX|W5B-Ob5gH#6g+yNB-aBx#HmjAHmqzuZ5DKH*nqL>Qb#YPJoD zH?>GuHnNI38VcYw?oTu4%}YuKit^D|me~ZKBvezpg^b?hyxy>&vut9HR`~USP+W`P z&%5NM6cpOyVy*vOH8_cYq$;D;mTA;3t@O8tSCr{>c%M(p378|H1cC`O#(gO;=m}mT z4Qj(cvDlZ6ZQkd|AC9v}AMzuztG8bMb=Z z;d))4AcgdVb(mK_`(Xlsb?%YeX2;L)nlF(h=@}z(p(PRVp^JcOXc;5Z}+<^Dv*?`GJCLebICCe9XoZf{rHcKax z?9cRC;j=AU>KyqMDC6Nr@5?rd)eoRgXzCvqI(#~?pTm*Wt%^6Iu|1`6oZC<&#Fz7; z)j2{Tgn;szt(0UB;n#5#7c8^wD%>?r9ZtQl-#q#Wp01P|CH~ly$lgMTk(b&0ToX5c zNbNn{pICcJ4*<)xb7R}r6uXWF7H!}eZ8`P@!LS@0snDpgq?VmpAy{C)x{|zpq|-x` z%-C;%TCL*CiruO9^~u^=1sxkh`}4geeLs^bfILi&e>Bw?523bEZU-Ro7tMPI2M33r z*{h{!p<(6N_9TQl>Milz{2--Ot3W^OfkCk{j_X^Jb<;*05k9aWSQwe2@hvx8R>2)` z7}$~cSP}OHMr&GqeiO&Kd{r#bV5PjU-gb?@RthL>M;$Z&Afh}(px&I>7*%-PQk|q= z(UwD1tBlmObABoB%c~Z9f2jcFe_PBZkSidGa|Zokg^U+&&o-~TWBZ*g@HU9q;D-$3 zr874xLQuOIfTCA?o|=LxZm|xic|7n!T?3nOPH;$8apMrDBH1c}iIr!S0_JC$gF>pu z;Dx{Fki%#8ope3MvJ{`sW5#3e@&}Hg z`3ldJxrqP<$#kjniQquhKyLw5Ore2f*=Df;-`d^9&Y0;OlU(hj16UW2$K^*Esh@>c z`?IXH?^BqFr}${c!udI|ft0O2U*@v+z0YWZvD*tFK>os1U;w~MSUS6;^&IwBI=!B2 z&CaD7Yt`lMq;Y7ar}R3_-x8@5NaB`S#eU<~SX5vqDf!bpyy7)KV^l%)BDQx<0PKUg zKCDGV!N8zUwM7!uvPgGEFf%mLYjsmNF*Bd3h9%A&ovk(=ivLAVHY%i-iPh?<>Q@T@ zQPFfGEtFCT)~VtBUvCIM=~syLYW4%zj$&4HZQwyBx(hAzeP#cWR%jiju3MLhxTcvA zo5hS|@>t_LM zy-qFSBBus5wU{e`bisa&;B?ua#*y)bC~pSZGb%dzGr!k;DBH}uR04#UDrf{BRe7e4 z<+k~Bfz*73PPv#=W>3I900z^qs@l(*b}|zH2%2g#ht@g)lqhh$+71ZI*+d($2DGYW z$Oo>2RIbi}3qT1t4s=jP= zFk5<&t+9w034WQ7=$taNHd(#U&rFS4)4c*YzNI}t~hS&D4jiAYlR z(v2hGbCW^S_HpvK9;3ISSyEMOeyPrYfEU<_qYvECEMq(u`oZbWR+!5?uJ%1jFl zFgdTav~l{?Y_o7Ws(b$tSzAmgQ(1Bn_^b5}k2+7SHJKCUfnd}f3EiwlinAhGfCTE=GQe~!yGKR24Bjt&6@zQrep55!VR4GuQwkHn zflgfe?oWOG^12eAMn~n>ucs*>-N-27jQ|7|x}_3D5|)lL?q4i8BS2cug+eBRzUk!; z1oFw$MjiB`Xb~lgy{5=VuXlO#E56jfnt?PXM4=O?N1_C@B#n(haOk?)n1`NfM*39~ zog!yO$m>Hhni({GF7W0$T5W4Vl^@>Q7zzDB(+7`D`>1_oob$B>I5rE8*Vh3#utf;)mQFF&%Euf)c7_ zCFf7q_Ak?*W`}(5c;w>#?ZZH}`_}qdt&OZS^nnL ziSz;T1?sSLv-dyGbant);Atc)_qC$`_d~D~H9H%AL2V%HKhOC3yfQh-0uq$JGxBRG z`{w`{BS9hTs>*1z|2zZw0t|M^=o9^4j6SH0uRn{v9zNxNop}ZZ+tYi-%lMsA|6U3J z<30mCXJ1>Vc~&hCGjAGr#!<^R5z|9vt4yJP;fi~dB0{{uUQ$Qt?f>1NGjYcP&K zbrj$+sS+sVuBG%2$+m%LRlsW${dbfEbQp3!fMAyiLB!tzXhrJhM^@9(Nh6(BH|D+p z=}K^bMHgb#22hAZjN8+_v-{Po+V=izX;{5T8^GO;6e|^?mSf`M(^s}#r)f9Z5l?xv zrm_R%c|p`or^Y`)R1kocO%C}^$3HX>3HiT}JWI-AXV-3WcHzOQtpN#5vq?Ultnd-8JVD`5{B?vY|VF&0gciYK& zyGK&&Lo<6sEG&v0l}F1Bh2y?Mc#x|*IU*s$aEz@)kg(`>OUsm5pT71PBs1thOt9Jf z;Pe70;QR%vdI)QWt-&0=%v|xPcmQ*fHBR_{69YoHbsN(7ZyxARzWYG|a<*}Z*;ZoH>734=U|S$tM=}DXXnaUl)V?WKQBOs za``;BA}X-mHxMujK=%N)hZD}WoGYWQY&{QuXZ~F@VpyeARiiY02ijc>nto2{`*(H# zvwnN>=H&S&T33tBvOgr8biLIg8Bk|%li7FqL%Lj*VQvn@%x@YZehfHUcb6UpDjUk1RnJPN z!vzH*-Eww7V<}e14Gs(plmx)~cJ%-OTA%?pPPZ%Jr&FP)#~?V&&j7Vbp`{MMdp$rx z+Tu(kdPKdbS_1G5!}{ILn$O59*S`bm0x+u$s?$5jtpIO2WstcZc%xEdEQQT!m#w4I zs>Eak&}9q~SjA_E_PhW+twCr|&F66$3y4X(&z+?JtIPs`psHb-!1b5AaGZj!a8dKN=OXR7+LIsA_l^^xCz@Gk7TVKQkMN zdm*iv>%z9jf6cW^ZbS^tP@HdiVIB%T?foG6pBK+1t3Im zHC_A|LIDNS&vz#{u9>Sk#T!O$|^rDmXoy`w*!12tk1DX&3j(n-k zk}?1a;kzNCL6#e%YR}WF28%rLO707|rn6g*(jfZ>XK*=+#|G{K+%P6I*Q4+0yq=GF zT=?Dd<+Pl*ikGMkv>Ws!;5l-Di752%fpB%uBm~dH%1hb~4ydfCkBz=3!MQnEU&SK! zcN|(n4gmFb_orr^#UAN>?+v^mh9z0l!+pWFz%61jy_V|4wBqRWH%Y-K3*kS^)1P1q z4FX1^JXO%j%TA7mHmVwwNkJvzhw84LXR4ISld&pc;O6G$?@6V!`Yv5M)6i88hInNy zg0cOhS~T4Dhi|k>x*R%cK@3ZY>vhx$#!5NS^`W8AEgo9+R@Ky?2A+VrbvT@)KYf%W z&pIM-ze}o9Z?_>_`<~pjnz}NbL9Kg|?q*BWB)5)QPS2lW7~EWGm8HdD8)3$1EQ4pB zPR(C7x$RZ4z?Mx^oBYgStBbpuF{|BH3CN|p-ic<5nk&b6UeqTQI@k<}^Qt}qojpwC z>sHdIi-{@UHu%&(z?ign13|Q4pkJX}XXZpV>_a+g=bWj_&TxX2=epmU!{IW_{s+w( z<1hyA%_t5lX{x2fo$26u8uU3}Xzehobm?&;CE_-h?N&)GfZ|NUP~qPZ{axDsP7t!t zow*@)Qf`rWtj68Bo8`o<=)rYvr&wLfFfrtkeLuDrR-urK^;Z>WYBAjxpw$&{*=kJbPRrwnjz z>|ln|Ppuw6(7)uNlf@(pNh@LB(uAV!+^3EX?~`$zz^#D?j(5wzP0*=tWNdgDd?=mO zim>Khp9;J|&lyz}LB+q^{CGM@LU+GAl?!&lcJFMu9|GF-hCoiulx)^d?Gu26Jow1x z6mfE9`@PJLrWji*Qv~RbQ1u5bm%KQ>VEFWZi!+qM_Kgi3rQk57N4#zW#L ziVjV?@@0}QxqH{VALxa?V-1|DLw^WGHrt4)2=+W5qP)`N3oOMs*e!FOqJ-bb9UrCf zKu!y{rUW#s!xG_-g$2YjGs94Z2=kJO#&y7s&;iK&;(1Ng3T=(b>0S@1zSAFd6 z{-6@_WPFO~9G-QdS#$H_Of3OGrCZVxYwMH`iF!RE=d@PJ&NtyaVdpfv-9R^}WLiIM zf*q4S)&XR*_$-7(Syqsp-1{nukG=lNlj5`GZ23$Fypix#Zf3_g-gJO?AApgl?wP8ts!j&Z z?RJ_yQW5v~<{A+DNA{)*uP{g(`#nW5)W}pEOw^IWPh6ziwA40hey{>aCG*VC`k&Ny z$^-fs7kyYh=o===GPnd2w=oL8u_KKo6iPIN#F9#K@e$CjouM?Kk?W3Ggp#2*L`jHB z2W8}ka`b@$ikljq*Kt9(J1q`FWr|W1xfiKsp8Ekyha@M4EpM7ij*bpB%$~^jOb8X* z42qGe)nRV57P>*y&E5gQi&SH()&0Whwm|cs2$CJ*b1wN+ySxuj@^DL`)a?Ik6G|lJ zimRpP?8Gn~LoJ0vy+i6wJ(#boL_^rPd$`=&tYL>BZ-={;8UApFnpzYVd(qf zU<2*4F(;EjWge$p`6oX~d(%N>SkSO?LHM2~+8pDVv&&HD2zAY?R_r; zUbpKiC=|jTWQ!K7MV8I)yoT14!uG^l<(V?dAZoN)xp8Oh%JF1vVhD4*Bv$jO8(y93 zUM{9I1~G^leK#Y6!jT?>yl?@oKT?lYTGU=Tv*U%iTE=R=i3)mEfBbi_2`VT^E>%tK zr>~UWwj7JC5{8(rDO1SmNJ3h`s-(-5t_fFI`|0__=cV=u$VKer7)K>EjGxEjOGHCH zxSv`Lo)^ziFC|jWnEz5TW>ERsQ^4!ZzU^L9oX5mo2O0E%yubQjK5cup(a)-K_`zH$ zQ$8KlPQ3a<7@bbV+~Q(lY%R5)p%RR*mBmy3hoGNnSFwF6LFr2~9-_D`AvBET2on{p zzG(rqbd|%)1t-NFUDk7>Wu}GuqT~-#&NHfXuMuxCRt#fD8iaf7d_Uz0!;6|RC$q9n zFNt^XdWbS>LD>N>eJNo28es7b%1Q?__fwo_KZe}VR2SXR*j9L(4bE9$vE={Mn7PJH zYg=L$H*-_5L~!QnuUP^*REkHKF?lIht5HMQBXheW$4chtD#eNW#Xf5pmpJ!hESkoN zsal&Ey_2$2HdyrdROcM^!Ck@Hq6Il4N;PZ>!Z`|zH~n0hXH3v)W`o%pxs&njUyRJ} z{pANjKdp%ss zLgt?bFDe@n(H};#BRf4k+%&jYI#N(6YyiufH_~Y6?^MO2*@&fvkMd4>GskW=j@RN% z97=o{$&D=is&KcvP}WN(Ab)dS?Ep98rXz z$|S@sgrJeZG+eRI3`LV_xl~;WK}wc!k@ja9eKDvO;)4}NKG4`=>SQ&MB-*oT!B@avHwctBG{ZJ;!+!Ir3i3@o zl2E^*nTAE%9%fF*YfoE=O6fiX_wl8lyipm4n9FQ0>d(gMWQ8|5Q@cou!PWKiNyK3@ z$15#Ya~y8A74VkyaYV3d_C{m*rOk9V0n)y~&(_qOlaE zM{2Xtl$?QPpkFUMrVsSrt98lFSN-f*-Z10c2mS5gr0-3RW?Y|c?MfybfsDZ7b??+x z?Iev99p%f6!H!h$hco|c6@r>Xz;qa^U^}sNU~drdkga?@H@bt2uX_6bJvafMGhcgg zBQY%tS^NeEC94;$`0?5P27yykQ~Vhs@83siIfh0>IhSV2Q4Wm%{frPV&z=_c_V$mi z%Ss>12v1)bwdc159dCRK&yv)1%s> zc>y0_`S}2_eSAX$N{;cDnS53l*cT6NUUF-}+>+nDdPo<9OeUq<0 z`9Zvaf>L9#f|JVSTK_(sO=-?beW#3jM3P7cKv>CgeIc+P!5G^v`i&>AQwTs;VbHa0e3j&t6-3s?^;nFBCK`G3NkOl6%L z0N=51LjpMGqviL!FnSV_?uu2QU%*NlVmZ_O7k0}zV79$$X{)sD*Zxultwy^XjRKaI zr5dw|iV*-p+I+tF(^LmUlTHqZ=nPJS2kU#~<>NNbLvHOBgEoeapIf#tGXrKXUEnU( z%4A6^Y>bjbd}!GrEGy5-$SFurF}gq_p9!Ea`X4dBcHHqpx5lsHbjuj1!?^>B#A}CS zdvF8tylOho*x3HK9)MKVchCFo-8&&{*mZKTP+DQfow4*n5zX?qh3`|p@XJPVrdm>9uv%!X*)olFTh})m$$k#>%9-v=1YdRx| zL1VaL`g^|g&{+Z{#LdOy!%bkF|9zL*a5B9aJ?hnFHylcFsl3>fMGvd_a2N7p@cDOd0S$vcb#b`+e03ckCo1P6fl5dzvF zd_*TH%Vd%nWLtXOf~k@6&>3~L0qhV-;uSdgPXNk;_Z!5G^Czr~ekJ` z7S_Rk8FNvMF(Hf9g>gGMm#P%K)%K?RUFU;C!B74?0g!Dh;GQFL0#934KQeuu-P`I0 zA5JfUBw^5Nl1aLNGg8dG_c)IM+CcHYsrTUwdq3Tir%?7%-17iz6ant0 zmVkkQfhn$W5*(uJtyWAeOD)P@T}MFliWdV3bN%wPOo)#DvQslAW|&Erdh$s1r3|Ku z83uw1V5dO5?DxEu#KO^0jBQsM?Q;)*G$|p=vRExJ6~IiN&?IsG1})mW-y?EHvwo` z9|W2uGYXCvl#60BtE<_k?C+67kR8=SKGp%jWf*~SgLd#cY=ETGJGBP|oUa&*>Kh-# zDAW&Jhz2p>9sq6;uVrR_uB{?=9*?F;T$ zgea8(?`8sk@*p{!s~(>Yuk2!%AUc1^7l{E56OY%{@<1rKI`ITSI+nyV7tU>=-rn%(;nwl87}yv1$Tazx zvsusxZ#8`r`6M~Z{`K72URiBbk8n#t8jZS@GasXowBq@EFph>)q$o!RK!a!|pC(W# zLp){yU`wfBp=2Ze9oHgH1*Vv)>X9Igsa+V5s2k8U-@PO)BinCi4NI=#T$_4YmOKWA~9F<9A~8Q*TN z-D2J^)O<=ss)nHN_|Ax+`YMa=bxi6_vc z;%Q7K_u&^I>sGP?iT-SU?RdHNmk8!PTqNxNy+p*R@)nM1wCA7S1?JX+euF^uBIwVM z41l~?uFB+N&wSHM+rY11qWGXUPPnR?z`EflF&IjJ|2RVpodplTnB8@*Gx9&k;hzt9 zlYrz?rhstU`+qZM;~$BoBMm<|9<)?khW2k=7*=e{;jzG_ar}tZmQ2sKIUi+ zb;Hs!9MpWcIO1XFian~-llrrt|9pT2kR>c!+W#5h7wW5%0jiQ7N=0ISF%i6=rqOMH zWRB)paxRW!8zm@aj?FMaGM$?XAvbN$KIfaH!v$I*lEZuWcx* zvRGtNxw|D%?Nka%7jbM>Jclh?$Gus$a&+OLUQ}T-NcBu(Q18@`rX-Y zvIbO8&XWJCpOB0GjU8da*P8aFl9^3uB`hq8&SAKz7XOqr4wx<*8{BUzJhdy0N2D*? z0sUt_dK&wtS{+;RgwwJI>55jdfT3~iVLx-kA`K)eP8609|3N=CZAp4)>~dNP{ygbH zblwSBaA;-l(3Z;n@MY9y&qnWV!h*5!vM2ldI=E_ zdg2iYh3kqsF;4*mc`NnTS|bzvTL^hk-jK7xG+w=bfvJxzfE4bikVd`=*Et=~5i!mOxe9%h8+-kOsWV+J-@$k&=v z=U=;8CS1#({fIK`*_`QrXZ1!`zT@VH;bu8J61gmX`T(=4ozWBz6&xp>lV1 zm~5dt+v??II*}K(E_((jefC{ctT};r4x09&t6u=NpDT_Iq;+&A-^p8adchD>+7h-^ zk2oq-If%DdCd%)Rk>S*t?zzqXq$>oJxy-o_`vP0l-I8{Ce zP(*EX{r_2Uv2$s1WM3MGI8J70{B%+w>F(a}bdc;OtDJN;NL6{z=6+vxD33YP8~e6$ zaHOL)fqh>oK}cv}?2&gO;L%=d>ytWOCguZG=3aQr`}C4>b0>>Q=%fYD_%8)a(k+D? zT0a;cRG-|<%LPT2Zs~MJiXOL`%BIfOd^qfT7!GbdS~b&ubmzCqNXY)cGrdW76^BDO7}`Pz@O|z=mr%)n&!-Z9fvOf zLCtFOqBBq8bg4iAf5}9y@j;6N9O9wE4krr}VkwoSfwU?I+zcqP7>;F$sd1|AI~^}+ z0^djotdk{34Hb?>oX%M!rV2pPZ6Da4sBVKCs7s6Q{Uo?|E(bF0ukwQf+S_M9sDnm-&D095;(CUIRKXbf-U-6RR3 zNTq57>3=7!&Gb8O5qLd>G!Bb#9VRF$C^3}QQ1VZ5_|W(P_2rAs zE)5l|Ve+I1Ar@gFpj{sdpULS+W84ioDq`CW5V=+VWZL7t_$lDK?FUr_W1@Tvi=#S- zI|2o=H+aGY1p_m-B)0!(H1xF(NEtT)>)CA`c{3T+GfL_ZDdMYik%<@@O^r zyh>V4uc5r4Hi7D0?^<^#IRA1|yL|XDl;?!Ay$p-7ViSemv`xtF`sLw9FyWQ>&J%D?2yrf3AEJ z`3elWgLswBZUKjo&C7z|SB(n+;lz=jmrsz@=phBjHA`S=zhulBe@+L5~r^Qgng3mUG6hx)xN8 zXB&em^{=W{5buePF-p0mE?f-dJnESI>aP{kHNzW<^=r7TT3#X33B#a^>+eT>I5{i5$o zzqb7dGpi@fx zb^KKg^bZJ8Kp8Z5UqiF{)XBvsq=cbR&no*XN&>$k><$;TljO(MrfOJ;T_fV`G5t+q zGQUSB8m2UG9P=U>={poh@b4Y5>pS^fReSZ(8rS4#`Q+jLH4sVz{`q`%4s){+Y;!$I z#e$8|=OsC}@F$!I-?3$D@AW_0E1*-#V5bX%!@x_B0Nwcm9(_s@{(}ZIArxZKb7zC3 z0S>^F_wFe@2aAgkbhfM`(=s4(7ht3nkMj6ZIVh6sT#9 zLpMn8d3*<%VkZ!Tf2o1#7E^Wvo$7mM`cMykC7vn|UuXKiMKG1_rhsoH&paoPqIJ4tI<;@%o{N+L%w*-nbEMiCe(k2-)V$O#=TV z8e&MiWcsP>4pj-gNhXZ^n^@B)=-2{Kyo2as8eh(UV;UxUffXaVx(yErd*~#9Egq8q zy5sGVLJ%d8(~BCt_Dh&ZYNs?sLxR;!B|sWP3$QI`9*@J9KLk>@OGtFJRCM|{eqKFF zgif0y^Y8xupOToBsGWbY+6f)p_nLutY6hAi!YM}rWK{m8y?Q}#o9YTC4D{puB;&ME z^@%SyTxx-b7}v!jgrk6}es`3<0Iu@KQ@nEc5;SoQw|R~G?8ex4N;{4ys>-RmqqOxN z0(Y6R&+tP`K;kuR;T-rc9`y!0fWqQ%lq~HQxFJF+=T*QTMC1n*GfN?MZ3mc98og%h z4*l(sbZb)1$cxunR0daFN;rV%WD@7YfjyK=UMAP&ggiSk%DNsGsUT_>ykKCtWw1Wx zI>tS52SeZ?8G?~{5IPi!l)w0D^YJrBk3`?7|6pU+o>kL2)oHh!)U0 zi`}rVf_jXohc(}uRile5zV63Gpl{Xnb(%~jQlTddx&*geEdGreba-8WaNHr-JE635a9tTf@>K53mL5d z8I`=)_Ie&DQk-MpmnOd3G7sVnGBu!kTz-Bso;tq(^YEd(8uJeVe;u>|1aM|mbY}dW z6V9okIzXs5z0PW+$+QJG8vJ2A-Ro&~1J*h&N(FKOp#z4NjvGqm$5R&I8PPz=2YoS7 z#Q%;zk@p)0j?sc(yqdoRsnLnmnus?bRgnpG4pKJPBW`lKh`7ETD;$Bjik75Ko;1|* z{a$4Fe7mjK{#8_l$~X$E!u(F_k#0_R(eT_yinz-2V`lG>3Xl@kyAX?q7qj9ACg3Q# zY0YJcwFb!LO1x=wP16t*T9CUhlOE?OTk*PPoCd3d{~rgIhQ>2}Go^$tSovvp47x)x zm03IkVTs>gXD!GgYq%*jTvNxr%BS};DsZddM0rW$G#aiRG>%A+2%*7rITt_3s;NNy z#~>qZ^eJNkHiWKZMONZmuakBZtQl@Fy(QInHS4>c2+$dOqm7sBkk-=k}(1Af{Dp;pzg53 z?E&Pm)xO8lQO+L6=YvwT?^r-p569%1XMH+{B$GdG~I zulVx=tsXQA9P+Tf$wb!VG`nqa?@(x_l}X9ombwo!K@$ zK^)tTwQ9XA>E+=dUO(d*-+q0T|M#fw;wwX3(4CeZ>}{G;vnLe!tu#ysi1u#`@miRM zlm3erz%y7-vai@)tr`{}h%V}ED7X;4{X>oa$;=}$z!t7$3r+^+hvssAq_9H#2dVxo zk-b6ysQp)3NV$K=*MFZjjq<+75hvLco4htA0)Fzbq5bdU&b;0@;_y1<6#x4dk-w^T zPAVtAd))u7UADaj6+0F{N5rqNj{xL{ND9p%keZcdc2F~I8s+e2V`OZLWz_#WUKW}U z5x8MU*kJg72uGcyM2)zPO}RF1C`7;6Uo9;TL42}Jy}LMFe_x-=?ewJ= z_w8dRSETT!6VS1Nvb_fjYDmMiA8IuN2@MA41F>jG-oDYx z|MHmsQBWYI01@f>IV|k&z54qzz*S-Y^$-6c7f(WfgZ=;c1$cOsO-tc&=(WDSMEdb? zNT`K!^K%m-n#NFX*D!zIR`d68qQsye#AFdtT-1R-a1s)ORLFmll`e2JHCTAs=W5I$ zTWQ^gsDE<)A}G=YMPw%0chIsI|Coj6W9=FiDHX~h5fv@8vESMv+8#B-^go*l3hc|b zFQH*Eewh(s76y9iEjA=IIx6Hk|DBw)FeK(<-13H>2Y zUp_#&gMtu}(OQRn=r$;BkK0U(2WNT=v5t+JDWr=gcgkt=!gL4yKTo_oga-_qBelE5F>2~A9P z`RpzZs(4SvBR`tYwr@V$UG1on%NBVnSjwuJEolpgE7F)=v!Y6CMzZk?#t=4!i*FSr zW%D1}jh)q_$d#!yd9zR~H+jl$J>BGDEx8D~fR~D`u6^EUzk(N~p3I%huoCPm`dGR7 zlP7RPE|t>}2Gce89juHKWFTnk@8X-JJ4vIaXEL#jY5 zlle`y%527z)#gF@hf7Y_jhQrkJDFX#EU9gggQIibe_iMI?!?=K2J`>M$wn$!@<4gw zy)hFz9-XTuO-(jJ)9`ItcB?p5U7XV;g2eG|#NO`wfI7I@R-LA(o=OE?b;sx7ZmDn9 z*l^ky@{;Z8UHNqbQelFE9B$NHx^fi35KQl>sqw~L6y~XeLQA@zez-(Z0{?3zS(?-a zjeuslG50OMdHB9N9v0C6IP{G}I*+Wr_3K@?c_oVke9czmAfe70szr)W5}GK!7@kNu z#kFqZnNxfEvQ81T3G_?HKC5=tk;`MU)H}zY-3Ym_IXnq+-=@(Q^aVeX8m?yyRTv$q zEgC-&++H+qjw_SdI%~jA5OPHHhnb%~(B*1F%0#y_@I>#074A0N*J4HTrE*8@BSzib zrB8AgP9B>RS+8<#vHsI;{Kt6!Ne4)|rD!&bxWq21fE30gPaa;}eJoR{W@;M)dU1!Y zeQX~M+a%=D50B1>ojs@|OZMip33N3@esKf&64B8#(S@S1)|7?wlU5uyp}dj;`GZk{ zk^r&sP3AZdrvu3KODue>R&54;2x(Ut{((So4UH5@batub#`o{33GY2iyNQ~Dp098B z#~fsh`;=KrHRRGdN3`$9+=p#(pNhJPz;@-i=EV}=wc~xipEi+ru_;cA7*6;l3N{N{ zVv4sOFGn{}@8$V7@u_h>u{&qX=Gs=6FIpXDA294m<&QVj(EMO%!!(^q1ktZf6l{%f zv?Ls9Ui@gcIY6EZpZa$-@ppiXgaHHX5J<{3qY37;?JJmq7t2KbDP!@67CtyseGPklao9P zT-kHmyz{zt#CqKM>RnR_lG*3jSN^pK5bZTCeJDO6h*xvDn%SO(u6J0vrOU1NekkUU zH8`;kLgxbk;9mP~@U0Va4RG$N>B-v(Od@S8NUJVoH#e}S3&+};=hUvx-W zym{HKd3h_4`c*6<$67pAd{E$T%{8!1M6q82QL~T_Q8*lv82vxOs zD5sP#0+=EyMMfqQYOR*&LpIy4{N%>$01FtN;|zP<+;f~`tfMR4`+WV8&plxxf=^ARpP}zCUn$Z#*ir^D@uBFnNF}!k?;-9_Ye*zLSRJZ! zzl~uj5kfE6SoOC-ElzV^C;7w0vkL1CM7Gww=q{o8Pg^r(*{I;@(IY5v6dnV_ChvB@ z7kRv-Dgq4RT2ZZH)mbtfRQBh1reL`(8#psM)k`&7a&X__%#H^h3WyZrbe)8rrl~6k zC+1UzHes~3AYPxNhG~+!GBfoWbmIu8=o2+rd0^J$n%)`_t8tkE zrYxWD(!12Y%dsCeZ0FN1!7WIHq{gXn6{WF}R4xqa$>0~ahcoj?8U9Kmw2cE^Eq6S# z=hs@ZEv|t@iS^ z9ap#5c)tl&PheZS7K7qC4ms~E?Y*$5kAfiPvfvh*WuB_-Ar{Z5oy&t77fCfi)GB=> zf68+@AKcYFv$17*&*Ut+Vb8%%vpYO)@}?-vy=DRSuF`OpITabQ&r$1R@FyLF6qR(9 z8ew#gH@eKmgPb1W&r~;?v@V%KxGv}4TkM`TT*yC`*BK1q9N%g_%*zGWzt4=hAADSe zzug=Urr!-EkEZmpqQ3Y7S7N?Zq^R!QWjrM`bNHi`dG{&mfwW44&YsFix_^?26y1+u zub`LP&EnXWkN-P5p^=z>x7(hycF`Y3B9alLr@8GITT7FrLu=(yL=jbAy(Y^|?+tJT z!37|HawEgmM!QNLOks(+61H+Pj@T%O6(HT9yGrRwi)9<=IW|nHUTM`5k9%|&s!YzF zIR7qJ@rYfTtiNn}*rJx7p$77+RSCmd zQ`bjc8Nui<_iNtUmyX0U3`piIQP5MO4}Sf$tdk;-Bfqt?-dnU_-5>3MXag~xrztGb3lFvBg*q^x0OMZ@cD z$j!NSQ2-=0JgsnT7NiIi6w1zgQC&-|nW?4UzF=rrdb3&m1*Y;^1EQ^QQhqRsLl8n5 z4R87CUaPD%^R#`)PF(Z~Yfdv%Ov4HH?CZvGg2dLthY{(UYS$#!!LQaBxxYht#x8|WF1g#paIW*{VrV##avV6 z#J1`#^T^(PG8Ta0-;#g(-0A4{`q3Jh>f8>$MQ%Dn;JN)b_@UL}_on7tP3@&2o?IPzyzV zu2$HLhMtlq*qF$3a$gn;R$P0FUpYLbsh1o{62YRBHeBh;lprMjKFUoTb<|B>%_Usp zDeQ8L^VxZi_k4jBdKkHA+P~#ydHYlR%-Z{qWUWyvapdtoX@|SOD>t7&>P9jV(v&f5 zJ=znj?9^@5vhbkhHlau1V>8+lyV8US^^Qa3dp~{iI)U|Yi6|rR3#}DWFCI$cXF0FZ zyeo&+B8pk;C*jI)``T1(jv7VSa8L&v)D-O*Mv2m5PS*0;3G6zxbym7(b7~TNY4I8l z^#tt1lXyW9|Gs?U$WN6wfh}!1`lp))ZV#0 zMBwX%hAvNikXUL94e06wD-oyH{=7|klJhyXH-|^8dN`I)IcXs)P^Yc5i@5an^b%`RW-s&7p$G@i89 z`;vb21yC*5@r7kG!c%%_>M#2z9Ek}GS7qT)W%ks8LtZl8^Nz4j`4E2~&&M9)Mx5M6 z#Xjf1B-=(AVMS|&{QT>=f`wU#*q6vwBXepp+qdP3&7y2RAL=WNlTuXKv`VmN$);tF zqUR}!CgVQ)D`h5r0^eRfi(TEY+%AKhBax_Ea?N*7w?mh4M^*n&Cy7jXa@ z{StkWrUSq@lm?LPj7yK}=0_=dgsKj(uBtTdj9W@jQV7#G@;|k9EkPv&y*4kF8ah9q zz=x-{1#Q{#v>4W@MsX~{LEcxQY~iFoh0Z!{JViGdyCGuZ`eRTanz!Lc&SNPbWK1;_$V=4r!wH)_jvFX+Ri}OJsiOwUHOnsj9NEr8EJ&w2rpG$7Gi9E zP2)fQ0S~ysD$LC%x68c2(>+<^R#_ncS0y+4tjpMXC}kU`Ppc|gFl|=aE1x5SS`SE5-;I51Zy^xl9@3)wGDwJya`eGC zh3|ZO*M|2n*-H=-sq%Fj%)R@utBE$w`^)6JpavVsscUj`mLwz!O6&UY^!YgOj?W^# zXWi>uuUC`C7O?At{U^QgxJRyTNBtH{Nv-sLGs zmcA<9b&ZSs=>*fY?tuw63d!=W;L>FsOsjgI^eKw}nwz?wk}=xv-My&Bg;af& zebh$j5(RNig4O-S`|ArBvfiA*)WA+$XMpo)l#g?87hRbwVj#-;!N7eGrCBe5XV>$| zrDCq~NXeKB7xBhpT4DF(rx5Pt4=Yam50+veyAv&bq*5x^G=$!*;5XP}j|p#&-ie2c zKl;e)2tLdvO=zwBD4S|-^Aq4a^Fh-j+J_PmS9_dix!Ph%on*>VdEXaFF5$ZkunSS< zA}ssO#D0~V;x;_Rs4K_IRlVHJ_Rjzb!??2*&scR2ll}C}ei*O~{ue#bw#fg_s#x08 zq(oOLSSA1j$#7ROug2Kv6GT0SaZeWKaD7?q|ILtrJQ)ufpuEIht^L(n^>0SF4KQ1& z_PXSMea}A{|Cf7}VqSI4|L5BOa!SJhPsYC8A@`S`{AP!IhyX^21SF{TzYY8^hm`UT zN|dj@MO#R9ve_H!>z($tOt&0TodmSMCIY#fc9v}O%}BHt**WT*hn~{xrkPo|`7`x|dEzi2k zgep9?^FLiExFi2JO&cjZJn-!Oo>gq5x<+Wpr_QI_ZJPJN8^<(PvT`dhpmt+bQfIyL zgockl2+2QyHvYZ>_{CH{RbqIomeK={s{c={lUJn)xq3g{TYo9P@niN82`gIJF2S12 zg_4JiO=6O6A3NzXs!OYt>ej3(+T!zo*vy{xWpf2KM6$!tR!m-U)Y<(^>Bi4tDZ8T! zI&!(DFA7DIvS~YGBJ_rRW*N602MKnYdWi}J9_jw^`9y_VQt<+fHKLaRLs=gSNwZD* z&Wtou{8L$}&Pl$+61vq&)I?&*wcIt2?IkG_Yp?1#UOGoH`n(nqsyPfVy;gyZ;)zXc zvX#e92>#^GxO-K!%H_lJJV(4us}VyHcN7(xP~x_XsQ#Yy4o^7IzrW^<}0x+@%k5to7SZ!*`q(m$XO@r*&PznlTdd z&#{T-sM)-Wj(1CoPwD64yq*_km~0LP>A`Co-?=JGZ#h*PbsF4R9a}c2og-RmEv`!K z)>f>j2_h(>oY(IZHCrr)wCQzsyf$OBdOo(r5veN>n%&O>f1obOoC8Lhq} z(<*n9ZH>MGm4CXFh5!$CXW})uB#E;5?6t=UBhGR{T-fjSh(kAj?dcTI)wRT2`p*r! zzc%@95-)FHlNJC2>|*oC4byHFV3zJ?-L!IzeVunLQo(oNt=$Z*=q{Qqm@}H8eQx6MG0*oqZo3ax z&qPiw{C0E)v2cg??!Mfrk*pwu3)LSjqmYDz<~Y%`y@LDU?N7zmEL{@*a~%LFHQ^)t z#OePB7sP#0t=KnSraK_7uQLp9vv(mph%^}1RDqfD;9sV6U$&3VbY?2=UW|VS+Y+@C z^r7D6NEXzH51m0Q^*1trK3WU|-PFh88Hy2 zd}c@u0sA%R426o|`Oz^R%($OwXs(&;dAcAYh0RlTI4#Jy#da5~-u>xI$?{|8N4Y{< z36C^<{jx95O{KhfYw`_pQk61kbtGlF$-U)X=WU$mrOTVAfr6gNlt`GA^!7fYwc8rC z+t@D-Zj;dV57*93bh37u#(fY5g!Gll-Y-W=hFsc6vhC*Sw-r2BaT#EsYO-7TGZV-$ z-(vs9CghJ7B6fx&^lX)OO&S~(A4}2~h-(#93MpZsi-qY-ee54rq|XbcJjB1N6d7f1 zsmcE6?(WO)bc=I34gW@^Se}CtBYf#ABmsB|=DXt1VXRidE#oif1UGFf^^6B?;bF+J z5MPMG#HmXqNS*qyv?Y28g?7~$O|h@G*hmY6Y0KXxNv9^ z92yp%+L;zF@toI;91FhPOX@T2^7@nBtRg?o95!f_GT~t8gAo6aeTqtH zsmM%^LcqqLrSkOYn%WJqrvHbz81XM^)G|IDr`Kg@k?}+z&UnUMUftlmI0w$TVxJ@* zZe2IqIX%^~i0rW!eL5Nbi?hHzeD7W&MvHYEwzo~F=%CWKSdH1{LF5?2IWH<=TsLK3 zdV|>1&iVQ3N-ePbNC{vzD$6D_!*2$%jR@noFSO^YZL@7Dge2lvMi$xX$E_nD$}Y+b z`Y>>74y4tm%1O(d!+Nt%Zs<4C*zK2|T5LaWa6UGoE5BSdPNiBShhz5iitnEq%`0dk zKVDwa&HDcUl(EtOK~EXv3wbdpF{6mv8*9w^mD~YziQXD1vH2PUgDVY2?Tpoyy+)ev z#GIIELbp}21XYR2DC>6GU2JD5DD1yfuvX_dEcsgxiF-Y;^SknV6uIM6l%YwNr(sfy z1}j=ES@t5J5F1M!DHRZ^FbY2IP^0aWR`mfp0tH=uzNvI)F=`UTEO}^LNxJ>#Zg-*; zb5M}Oa6cB?`4yvRV(q+z2KRmg`LofdXnfX6f8=7l_(QBwK12WF?_BYsgRccdsQ^ry zRB050x^yQf+;UxzqPUN9UsFn}0`Iaq2wk~4@W`F;r9I6fu3*BPLTcVIcG62aO1srn z8ATeW7|aWT&57Pq-g#kNE0#nrfpmH@!{LAhx=>we<9W6bzKkrq6@EoK@~z{?gEwxS zNa_cg2hBYuUjXH*gruHcs25Xrmx2`CsnkJ6c|sSG`N3!qHfEj*Ulavs3q#`)h_7oK z_Q9dYw#)GRJUz!bI?~PJ;BZr#_;zDD+FQyIHcA|IE^;N$d^Ihu^k+G5(6(?*H(TNl zP{-jOn73T#QU0LF-(G=}@%k{rs5rnIt{Pbf2Rpl;?zFJ0=cRnFHmEfvn{%z|h^$q^ zL_Wuchpg3zY)-1ZqUJw3lakFz->E?)$7jaCfAS)qf7reSrEY*%GIDY_@BN-)07b%O zw}r@1Un2ZCwV3903LE|<<&==UiCGO=cvCHbKC$W@JV?zU2Ue!>Qyqw5N|=sg9eV_bPx#9(v0AK32GI(Qu+;Tiz3~x3UwrlL~7Ye@)5ow;@Mh#Y&_Od z`T79uY*l%O!-(A%bYiuydq62*VZ`7oZ>zModo$Ua9nfA>`7NlyN1`~ZwZVF(<7g0{ zpkC3zhfg%F4#$Adnw?DB$`Fr3OZ@lj%g{jz;mFc0u_31qdbRjHNRDUq&nlb!2opfX z-jIadZ_e_N0iMThrR4;Lr< zWfOp4BG-9r({Lv~@EpwDN4%LzQ_T7G=P-#hZQf&_DAUA#O`Mqs;r@oMCH0!YQ>zXt zVtvgbZ>xyn_mFL$KpnLYTGfx%LAJO*w_{Vcv2vnzGx+-@v}TA^g&LgbA*-F1NYD% z=)F$)3!x}URCs>)vdZtlA63T1*ZNyEFQen|@5uWUPq}uX4e~m(;7I1UpJh0$j4{U^&X)sbn<`|PPX~3g|{hno{At)xx z^Jj^K61@iw!blF~VJgbN>V=zw)ykPF8%keVNJ8!J?s*EO1O-e*AxQ5-NDDgvX1eSt zH>}S64kkq-MYhHjz3N~dEA2B^Oe!iiV%Q~*bys#w0-`4m7I?^}$oP?zZ1KlQgY9m? z?v;q+X@Zq5p@p{%uS=1@WlMxrKVgrehYyGUu%8}O5Bxd*olLtBD(`|u(c~^`IKZD1qgF+7O4lUJTDIZvl1`@S1rxH@GmPS zk=R^4fJ@%!P>_5<_kr0QFq-oyJ`|o$IPRL(Z(%}Fhj4;q#`SM*<|27w#3^;mE5+hy z*#|RfJw-PtGZF1^diSZ=x;*H0kSZ4*-C`OOJ z+z@XZ42Kh{GZsBvZ#}-1{xEIoQzofi{DOAQLjiI#@3|<{4s0EV-}0KNwYV zCe7pp841d=RAMPu40y=6s778Z{{@(CQUPuTCAap8&*C4*Uyqd#a_Ikz z_4$D_n4M0mJe)xKOn>+st0R}DJW@+TGKd@cb;!JJc=)aI2-w>7Gd2B|=}e@L>F2pk zx}!$AyxF3F3iT=7_0r@wxhp+lJe2KOgSC&Chd-|umwMAUIIJ{anA1Any!g8-iVl2s zntFo6uU+mVi5lrM+OeEKE)n_#jUA1KW;jU;qIgO6idpz`Ay+v3Q2@TUd&y`KJ7=gf z!OjCy_6)raN6jOeK}3e6fbm^K7t~ zvo5>p{ye1Lr7rqlJ9-;=IW-UzKh|U}ZOXyU`PsxoN+8 znsY&6@x^Uxj3sq-EDo+%x@CwZrLD2@d4zmRxHIP_|iZ?vjmtdT4|Dt0%(xn?YKYEV72nx{yfn>)nSV1^pB{#92N8CzF=dOgo-V$n+nRaLN6R?vC;hv1g&)=BOp4uw)WSR8a@B{SQKClpvey zGpw6xw+C7H#6!y5>#8+I=nu8qS*stGUxM?FlDB}C!+cdyVlk9p&#P5Er8{G7-PN{W z2D38NT1uPywBjHGPc8rBHrQ&l)#u8-j9 z_UG^*x!`Y)1`p%s!MVX1fgK*k=CbCL8J;r}J!6x2dHu_|DY5wU=N9?ArI{WZiihia z%v#uBbhYv5Xox7|h!lZ7apKs4&9X%X$dMdk$}|Zeh_so3-zbMqGGoI^`^17CekEC+ zu7Q>^OS^otmSbH{y}xP8LtC#W(g*i6^~p`zyoj#Nbg`PJ8>a>D?b`h*k;LHtj z(lwl)@M+ z7qn+_Hk)$2qg^vKMKT#Hqk?hC)^Peja26FUGmKSY+8;r>uf8&|Tj_l5X9{TAMMp!6 zA*Aj~Y|rKt^R-o}a-%RSSkBIMwRI+I3s)5vU~}Q~mYp{#V^OmOKadA@_OHm&&=DA$ zE!KR3#7f0Nc=$x0(vUGt=Qa8uzJ(o7<+sdI7}sZer|gPdazyhZNpth(9F|K3<@V+C zxAY2)VE3bs+>PYv_DycoQV3@p~_@wYcAq!8xzh;_2$l4 z_bOny*>WkQ=dlC+j5{|4^hH-nE0%`NX6?~hk>;X6N^o>Xu`9BE)kB$!QTqndPUR2^ zvF_^mTdCZz-ew58o6z!Ct_d5Ae`6^~wpDWc`${37$#W!gYGaQ$0WH)ZhmHixS8Lt& zv8_X5oWs=Tum>|9IGH3x-hqJ(S5Is>@B3}@q0TAyDSH;(p4%te%mGANyQ;{G-H~64 zk)wq<>!%_r*+i&KD`Cf%XUSQo7`UC8NP(nm84iU0!?HZo|N~bi-MZuCKLD8IfWi6Z1*?8iE!SzX*Ce&2QFK}EW z>`{oSsiN9XfLa>cNTZ_^3rXXx0e)9K9nIi)zjj0A>K=K!+Za~db z%0~JT?g~3wX>W(K$ihr1`f|L4fTPK(*{+xG{7Q7b$@r9{&SlDs&}eWArVkCM{c_nj zz-2X=NGV@HEf9#QR<^c6MhTa?D47b??U_^@ePq ze`e9yp#h9Q<}Q1mT`Si1B{MYOe76;8u_mb+(FC6&^Vg$cnfZ9lss^M3OigRt9(q} zy0VclkaV2BL>rGTYvieZ8vZ-e>;%Q0ujYGh;ffquS_*it;_&NDeue@|&|F2d&-ZHy zzM|OJ+P2SIH~Y9cn3*=DP+|RJG-_Z^jc0V*|1vI+a46BLn~b_S>@)Oc+>#D8iLUbN ziAC0atdnieYOI&~H5PK2E3j}%%l<{bb5;}8Z=Q?FAM|E)>OugO`fo#eFM*-S?iCBE z;{KkFzjw2qb4KscZzM~3w zWt#f5q(dkatdPo%CUI>C@oi63k_vV75cQ}e_o25x7}I^Xy^qoxYCB4!uY!2=6ezHs z^AYmKzNf4-IB}H79V;N0QKYr%(Uo+c;TxP9BYDK@ zK-x_4Mf5tH2X9eklAkpW_LAujIDTT7=1xipfWLWD!;q|*M&)V18gL_5`BX1sISUEU!Z<76f zo=i5}6%(2N_6uZDA#V^v3bKVbvXkxp^i?mDgo<_GJ|`8nV=uC8dH95phtS5_5`1GT znF|ayRW{F~8<2z9D#=-^X&K1~?TVs`X6GZ3pb+;UxlNCs8TUGd+Mf(qryl4bQ6D4L zq;2qEP|i_qr-X9(^jCK_ic&hg#z<^dVQe0%T%`iH+8_`mg@r<|Q+bcCOb0@ZlWEF~ zyvuE+%OQ8Aq9pJi^GkPv9+&5oyVZ;v%1yq+{Z=fg4+8mB${&|ml z6}|n3zhVU87BKx9Gn96@C8C{anhWyd&g-Jnwd&v$!i-TNw_Qdws3%*#Cw3j=iNM$8 zE8IM)G!F9te=&((4O*0E8k;aQ$!6?bY0YlNFjBceuDIKD6zKG$C_K2IWV|R z&3#44koTbO$$8V(%kT@0=`~HxHM^iY%#&*blVjj$N=D*#D1PGUmi2X`3g%{$4$TZo*2WF&C-mclEZph?{IEwpB0?d#i zpEo?1pX6VQxk%e66-+bJ2 z@!HgU(%*?=Jjheh4)ZoB-L94}e$#ks<=bXQS2P*Po}b!mGW*KL;p26`ZmDGadz(*X zJ=OM=2pFu8jm#09YwrV3xAMBOq@|IR?ymD7#^?K<-&$v#KXkbs_kQl0xo75@Yi93{Qn8txaVnP% zXq0mhXYxFnNErlP^D1S=I2E`HPV+qJ@06b8Ea^f;aT>BED?3}k)%Sa90k1ov)H>{^ zJ{@#s^mys^(Ny$mWmQxzpri7MkG(sLL7s+eklZ~{$yl=*q?dObZa$7R;UtZd5l-*gMj!!dZ(I~*D}>kp7dwdsmYO*=SjToGDXuW z<(kUUnkFHhbc9!#v)vCO> zR?kkp-Zs_Q+g}sZZ}c*mx-h}gn4Joz-D=&$9UHT5u4eBs=rrUfeVM>C_~=?`w!&_b zoiQ43N&6-5$FyX!gn#zc9d!@Uu0b#5rv20<$CJpXk-L3QoIWKTa*2BxP!Ff_4%lNa z4w6UlMDqLGIJ4yYe3LCX-Hc=G_mzWsE;~ozTgj?XF5)xn1@7dhlESK9kgQn6>IFe` zymScGa?&vld~333^2GgWN9JZR29nqQg1t!PwGhETm22Jn9E)~iV1OX*O@`1?q|WqSQCVwyN9^i$aOz7F>2M+R^n6Uui=Dl2yZ5EQgAs&Tm z{qkkewhUOwQWeK#UMYjIjVi%PF{1@8O#oJzDf5VE?{%g~-?Fg(HUWW>mrH@ zec^p+uiko~_)UaEj%Irwoz@lmhKdHWd`AI&_Xi?`$lS$IWA{S!Cv<3-#P4gsn%xri z8{H~y*i5A#A~reA)7a^|vp$~zzktdj*gxf_B8{Xbdr5J;T$p2sQE&Qbwh9p;p4Nz0 z#XYfK(?;`zsVwatwak{}piEn8z*-2(Jw_lVdI#k5O2u!Rq5vF2$Io-?j^Mf%oMU(y zKtvu(x4b2@ImIcI3kZsyM}Uk zlS>|7swttNrsz{Pp{~Q+XrK`|~jN4-N*peUGt!?W1 z^Dbo1#03kQ?6x*s%ro%b09OeTt&*$c{=%XU(K~db#E%|<$p_5wqyTOT*NhYDK$*G{ z2yZi*%2eL}0)o%UDzc1tq*{w+XgMWl_n=v;|A}LFYAZPL0lWgX>u$l%xAw~5XgIa& zXL11eW4e`3Pfb{X!{$I+J%vw5U*C%Hv&wxI#=9d(ubWv}<+6G8Y1cPX)Lv}0br>b| z{GGy%i)SirhTs4u+O5I&Nl45ro*Rw5ESObuhp4eP3;5S}^a2r5M>}C_wpPb{{eFsP zF#XX6Jhno9sl~BLn>xfJW{A9i2!4#`P?v~Ib7yrOWIGJwMT%Zdeuc{Rc_NCSb{qN9 zgWH?ecelDJ#@lw`)R}YhJEoWL7P_eqp=L&#_5x?lZ?Q@H9ZmZKV2Y-98rpeZXANv{< z{G;skl$dTK32RAM8q47@TymJ(edv9Z9%p|Bn2S($DLguCX?cfsG9PD{YLk~O zYy4jZD@A0i8=3XZfz@)dn#36Ml7m-b0xA%&2HbEBEa9s1#BZecNb7kZ$(1r+Nv)q z)KgB)e^YTAKkaB6)D~fQjU*cewq*UIo_uzT9_r$lcCm{)akN6enlMD?YZl%u#RZi?wmx zM-txy6;NnWA$-MCRDvw0Y4wSqNy-Bptg( z)(~X-+p+A-HiN=i-AH|z@1GBho0l`jQyoY5)Z?s$i8U}Eg?YOvw}ICBaYR~X@>qX# zOk8ZH`p|Bt>lN;N%u!>rxOz5_IV;f?@2gq;`Ic1eoT!BC(M9wFY2uhK*i=*{97c{w z&~HmFAgg*puVSd#4QppHQpIE(MJ63$t{%r|B@AJ1GwG>RMOnqv?%m?RwD8afYRrst zY2!}kr=-Svi!Vh|b+9_#I4T$NL*_|&gqj@e*q`e{2_-M!r_XNtx|s7`6AY`2WH^x^3v}}-&dX_=)F*ia zknr&B4kKE9#v1R_bj|aqTCRCy$ko^5oQ5))mq$7@YjKTVuAzm10^QC-ejx(R}HUq zvSe~Pjpo?P+gqYuHQ(+bSZnivB2aEJhMF4(WUN3VcRrd7F*0#xgN4}K3UZ|?j%#OL zz>~qakh96LaU|N(f>L!wgl6TXukqt@biMexN+E1x3kF6B`r{SUpX7)7rTN$ifu>mP zHIk^iII;y)vCq8{!7Xnz`FB;NU=9Vf?8Tc(%*pYkM}>~p#t1tB$L*$enV3$W&3{6U zW-Xd03J5~T%Mm*hXZpFH94_;L7uVHyCb?4&zjnc*;oyX`b)-(0KrjHrh+op?w+o2i z=lNQ*L1)^f>j5C;GXn{s=06|eXLCKiLAQ1lC%ukdkAWBPdTh#Ix2n#5dq~tMKiW_+ zBS|9C;rL8+75oE1O-*8j&GwJOB6n`iWvUB;fcxm)s}= zfTkZm_CDN+;!yXKKjp-b>zrY_M%DhBSlyH(I!oUdaSZuK-m(4%mLsp&u&3zgfOAF$ zL-tH~hg(5L?9fUr!gzp61_LT-S?{3EiIA(3 zb=p2&Ztnu3Mv}S3JO3x42oI7C;Cc)_+Yh4lh(njduo}cjEFt{Q zxQGb37SmDW%*BlASf;lE{=Kn2!~C7MI5@y<8WgnOzN3Vx-XN%F+0+pEZw4a2AY$Ck zH&g0|r+P&AolR=HAMZmgD}vdh_7xivVWjf+Df-A-&B8pd%5uf6dPH<{kUI<&+9jbD zuCdgUz^+SYh637s*&?Ts?Qsfzi3Rb>y^>)K|If63>1Xaql^rKsTcho>v3mPM6iuG; zcL{^^?Sm5|jr7qGqmI2+&%U|+Wi4#_l=W5ctS4t&n{Gh!Z_|v>N-*TghQBDZVRm{eP^MR zX*crb4D}Zq-JHfTKBRs|@VqCodp~PQEqAAEkx!?N_V>`UQ2kwGVyAL!Fte16$jBjh znLQYi2Use6j$fZ(bZ8-e$^C(H>p9WwhCZIg=ZeWy^uDr3+7vgcCAK4klN}~Dm{2-% zoWzRf7WT3$G(Fv*{B8kXeydzYgYw@HbpGBF6Z(ocMHYfG*fKmp7Qb@7Ytjmt?|LP` zt!aGPN<=FW9}qjcsPtDWvFV=s51zHxhrWIOMkWPL=us2XqYZOtH^(uxuYJ021k=z}{|(dc*7 z{?^fr2tDA1%;H2(v&o7QIGC2F>rRIku%(Q~k-#5)pb4#s1_6%%zKa?!}0-D-`>ptr#R? zdFGsZS$?I67d9}f9(uu_tpy<^1B3DwC@Dw`);F|R%}jZC`Kj*p=xIbM;+`D?)W;G9 z;I?OS-Z2`H!bb%>8#@Ltd6_rNJ;;+S{nM{(^+KNyR@|$&>Jw?OACP+P{&0$O;2QQlF42fv(WSkazm& zEivl~-tNq!Gj%E^JA(G;x1M37drvnz`?lY?W6^lf9Men}x5*Tv(30v31!!F-lF?rf z&4sBxkH(=~N@+8$QuVO+A+;q{kNA|j@nb22kgtDSY!5*us^#r7OjY%Eh&8vWuo$lc zj~Y_PIN7_7=+%lr>+Y8lhJyio^*?=~>g=?G)J7D6d;@MeYG10bc88?R5sSi{w%bqZ!jYV6k9ApaSLyKNe9t>Z>lcKOXv8c zXJs#X0<0Ru>2er=yy2&p=?DZ;`r38g zG5I7l>T@`H;wVS#YG3mQfMgk-uVSM=6&DjwJ|kT`r@`?)gfP9Sg;d$%R5Y>KDv)%*`>d!or1qV58NQ2HuAV}cuunFj3T|;D#0i zQ@Z|cT3cy9U7*zt1MIQgR4}kpL2XLgrmov&$P0sfuj3xfWk=%ra0^3vOnoUJb!B72 z-4-0QgajWH-_!f+wh|DN*X@&Bin`ab>mRC*r&Xz0L}v}^(SONBLI|t#RGW4+7~y}1 z+!VAuy8*y45sEbmjT?+ch}~gk9FRN%-LX(?^?FY?=7K)-SUayIPOrr4kLn8S$QQWN zjN6SNHvzx#Sqb=wK$1xPWGw_w@OsKvpdfPRc`0?P`O++6(-cb(z?Od3R$s@MUJeyE z5{;h3v&KY~#Hc<;m<}5$@fl8N>gEV~Z@w{trTe@?Z95|({B;6{uz*}U>s*blb5_zt z<*5ybQ3uufA?<>?s+5WEyRbHwDeDhd@An2>m9(k`7(1O6xR=r zvWMIh2ZPEYtx4>9A~38D=WW9`gvSx${_ElWL6z|7= zbtbl!BvDxu#gdG%ykdXUFWaqOWn8z}tJDzWIG}pp;Ju50Vyl&-+-7hsgm->PVbY{< z-I{bJu0PoJZYP5BbLb z1$=2Ib4wyrUaMTYTg|sLCD+a1UxcJ8m7o@ajK{GqUIc;_+wnea^!OxYb=s11F%l19 zqD@rcfyi27SCr^z1_M2$SC4!db_1YH0K%6zs?=cG0pHB#h_0PRJklz&hP2?H_$QLl zM+Q|n)f+pE5Zf66I|J5UBOnNTJ#Fe3+KFRp3p0Jcw#r7K(^%N4i7hTA5Ex*^YPY@N z))3Li*5-P^r*S3r488K*JXK=iO9n^-P1MZ2L2$L>ujoSRo}LyQLNq!wgD; z$6tpJj#NQ(4j6vr6em6d`;C$rFPsDTR8UA$wSza^Wq&RZTSDoecCFr*ngMaF?cX@o zuN$g_J1Y)z=c!D@p8E$<%w?l!G+e~{@<#I2wn4BDY@|T1N-O$uHVN5I03PIpbYr7! zz8kfZe-)43pe!xV!K~R(#b9JKYR3oc;cb=9dM2J2p9nZiBl_5<=R16{UXl&?cVsW8 z=|VTsJ||tFjf@;ARUlALUmNlO1G^P19gGfRRsgA}%B| zz2Q-lolsN>#_hDl5OeUXGh zeSPlKC>&nPO?*(c^FDBVbaE>&biQ}Q#1mxf&$$u)Rw-b8EA(rrnPi|i;N25OrYF_D zkdOxF3@vQM6_d>h_-;2TXKjBn>ay{j~+bJ)KKF1Vf$K6)Xy-`)T%8t2WNK_%~S zVJPt4_e1^95O6 zv~VhA=*P4}9)9yaM$!B6^Dql*V}H&GV0C%G`xf6WFpH(v|E?wgVj=+s{TkoR@9+Dx zB8IIIHUz{DXipj|b-QNe-nIl{&LL6_z23c-kuzB)=DV3-^(-dBU{LzjJuLPFzrea@ zt|b(5MUr7Q*f;e3v_Xrfui=Hc)4nWo+0XkSB(LiqXmeP@f)V?=gh%eZO<&&oilC+) zz*q6*?n)FImIj-DFXK70)r?r&XpSuH_d$E!CTOeIRD#!Uq#bs&CTxoyz2y;mG>~Gr zmpG;5(%vD56cYcKJHldUQ)aq9@^TNh_%Ei{z4;v#@J}DY?z=gme3Q=Vf5Bxug1XKB zDKb*}q{A)220zS+Wh1C*5!yU4la;jZ##_G@urt_96d=+f0WXUge`X4a@=lyw+mlj<7Y3vaCV1PPQ1QynKNDB>joFa z+13s=k4cu{hR~*1gp%1Z*+$B9=W`j4rw&b`%d{pYGT=_iIrfltQmJ6_HZzw8d=Ojj z5-)yxK%__an9a(PW@Wj1y&m=WPq+$zl31^_>SAClzuEg7!FDcZcf@Px%pKAjWChdYWN3^<(z)V$%?B=g5Rrz2k;*yeQW81dostg5&A*(@rPYT}{gVYP<*`NEKIJsDFJa zg?L)()9{L*r?gM4tNq(F^ANffkfXYl!hR^2VWoj;eQ7-!bFH{K97C+$Bpl>etCmUM zXZE7D+oNLO@9W5zK&t$!Tukw+4S%2gfdYL1rkzY!aed;n=721_&H+dW{ePY)If+F!KA z70iT{he_f<7pZ~rKY*GAlrlO5*D-lre{k2@3Wp=&?q@k4hni29fabJ`2VEi8e^b5y zMd74vCy_+vMhJW9|M^!XK{Va}sUy(UbX;q&K)QonE!9gFE6 zB7z7}T8dlXfmQXmE&#vsq%AG$Q`bij0I1mlr{7~~+FRt??GP)DNx72dK4ap|9bZlz zy}E+B<9ma@FQ*fBJ4>DQf{%qN*57-3c~C)$)pnGA=hn&VihW(>&uit|5daKY^EqYZ zJ1ka}ES<4cvsxQaoF&rJr%!ss?5JWdp^f)f56HtmJU|s=G&MalGWXA6&RtxBTf7SX zZuu3)Qk0r^4ULtkzwsjneQx@*Zk6bbK8$~TJ(Ts_R=EJjrL{{C!g?nhx;cB2mum1I zq9fL~SYTA~G{K8Di={&;2=M2^UmcB*GS` zGFw^o7B|`>Pt%@<42DDx%2P@&#h=KZJGxsKv^7vc5n>n8X@F5UZPr_iEp6{uqH(j%MtA9RwR& zWTsZqc~6ZvP%SbnXuAsw?6@al)eY7wii)evTmi?#v`ZBfcby85dM(5|++l?~ZkoQn zxq%9iW)bAbcV9~RQDbFVS7wVa$|7xo0MkO()=Nxu@#cAOWikYG27m3dXaZ~8~{m-K{=m5i&hlRn+3e-9*4IV2NM6<{K?hW_7 zR_-=my-cppn8ZIj$cX{}bW4>%EST7<*l_h?t)2i6v_HB-Aq&w-44@rl(5tw~ug9*4 zMU1hQh9atR8BOOCI3lKSfF68$7r|_ccP*AVR&ANA=S{>HhN?V37B5MSjBEDO&z?^c z=G;Qp^@sw;?n&pt1pVODm@$`o?kxoAwd5 zL*6=eW`H{d3*b|g=j~!A`%HeZxtKKNl;hq4S*eO$?Muy=6o@o!T#I$qw7a{^L$svvO4`M$vfYIU4KTkz z@DlfOXM&DFK)U>C#ktAZUccQz%AF-v_vO<_MR*kpIXOw99H~+*OXz~QC;~4X1wl0P z4NdpGunZUBHN95bfQJM;^={We_*H3>(OeqUfqPB7ZZ+T zRn{nbZ!<$TsFMH_cMK1-X@Ls{s%LTqYEnLnMvr34=h<=?JIS{-M~mvL*?(uCz+>Ff zfsYtsuK!t%vIxgp*a|dRdC{=f9e>Q*Xcep2PMxmT(WF=Yvg6E~^)#{clcIz-t@m}2 z9{T0MCV}*Naj31+Gg|R3?Upjt@`o_j2=P>q8AZKCtWIT0x{D-?E5?d5DTsP^ogVY( zvz?L{6!%^VUFHH}mGd>ecC+JDet;WL3w$ldwO!!pZ2zuLBd4QrOo7e0<3_F z3iG(bAr|TkwE3^n0PLR+R}Zipg9uK$j^mriA;GEsxbYiL2`4R|L=rXFpDo@}&^EI~ z5X*y&ex@TeFWE6>%H1Y(+cYTD=sI1HUJ+JJ9bc_EEhkG>X0eP(K_~nQUN`A1q$)jP ziV-^*leQ|R47`!e4(2gH5r(FRRvV`8Gv{e8Holo4xqGsKnCHLP35Oa;OXJ`s9iMVH z^EXnZrQ6U97^3EN`Ifi=rQ_{5P^Mw#VvqOXY7Zuxl}02Uw>B))V)Y5twT`FW$*I|Njv;C6!GYH*(i z(&qR_onVZXb!2&@Ohr{@u9}LLr0LHe(mhr z8osUOQ+wbtsy}cU0i>yh?R-P5ANBcAQ7VHOczSWrn}unu$4^nJ^h?#BoS}YtY{vOAk^8u^f3y4C z;Yzt@mu6-q9rNY+OSB!aM2F27ZA?Dsh-6p5%ZQAyDNf;2+cg8`d2Q2)@h4Y!piCy` z|3W;Bjkt&)5hgnJMAk&@hqeC(A42Y>8Y1HC(zZiMX2_ zS4L~qUb2oa>eN_2Yrsq6e%Z?q?csW{5fgA2;7dUM@pw-I~^ zz1(U1s$d8zCs>Z{*sSc3=+gI9I1gHw#2*heTV&T1VEJV~vpLcNz(aa+^1L(@|sB%R3kbpW(@_T_ZwG+NjaY zM;E-o{bFQUD0jhAU)?F-@xjpqL0|*vcCR*lGvs78xa1F;*%V5^as3OfFOgD4$Oo+9mmWaNml99N zMct@!v$0(F?Tq>O9j9vM@*IbdJ3~S6alI2I8bG7-SF2RaHj5E_)|aJw@KKLOq*XT# z<_*MZSRXq`3}H<%p%MW;yfb_WwgPilMwNCO8kTLFA$AR#UWN8u{q>^Ooj5_WXbK2S z&b)ODOHdM07KY_MDmQgH=C}q#NFUdhH*2jK>Ndt~=GEB%9Tx!EPCLD59f+^uq-VoyWi0zx4=KP!tY21_=df*Sr563>|=gpogXX z2l=0iNOq+V~#C#W{Dwe2Fs87s$6M*h4MyxbsC z9IUFxB%`K(bk9vjy?*G_e{okosjYw2@J!%Od<(SQ017~6v{*60Z6?#Gcq92B@!HZP zc-Ivsc8aAsd^kkduapy0Rj2rU;ZrFTXxP|cy~?^h5wHde0fH3uzJ`+A0wI4l4k-5o zLY}H=b#sJV<_c@kquXs)5h#AYCat5}8)G4V7E@=bXbt5Pqj5`4!c6ZuXn;PhNVI|S zcad4Rz_L9oUL#Jd495$1+<#-HI;WA#&|ho5|ClBxs4aBNvbWF}oYo*XVzp|#cG0i` zTVXcFZqlUF29yFBYnXT*&PAeOU!vuKud~F?zkpkR_S%VMA!dGDkmdQhu9IG6O`R@IWqMfsEWALt$Rf` zBq3ZP@nw0T-r=$tL!SC+=gMfI(I=oB1L`6SZry_2T94QLdj(=m%DZX9msjrVcvMNB z>*^$0RCN^UOw4vJR(K9!FSxaz-LfQjaY0iG-Fmy)E1?BeJ-qdTv~)X^*FROyjjXOhR8CL%&7J;%en<9`eA_?E%bn6#?-7`CA# zy?*!OB8nujbiO|@XcdbDY_<6!bDBB_*eJMmZwSPP?^LF3$6Hy1JW7+dAZffn z=R^&0MmHO4AlOJqwesQ(XN%HSSL=^3aSjgcTTJa0tt`VKX2#O`+h-j0Zfnd7c=E!( zz$1OV8Mok}po;V=;VA*a%&tqQEj+UC+%*pQ+famwOi9#UM$E;9%T z9b%j*QfsOkT8$iunyW7oHaIqFQh2WaL+Uu`+8s1ftZ{5`B$)}1XsHnZY~PrYs$Wgi zR*q$^%@Yx}t{i<>U>%2}DIqYn`U>{+j%lkCnLX)^qr!Q7nYPlv($@QmZ%yB;9g^7< z6UVhUrj6rct7lu5rmt)LDVhxc68InL8HNWq`h;-<01`*a> z8Hw1P$)>2q|79v>IYXSuTebvo%C`z!BH~N26?v;zYu=Q$wfH}Sly1gz)u^?! zVOt%Qm2OueYB<7{r{0OCA;`##n{{-LZZ#50NUis|B__%4ykj zcyZ)R!}YQ}F5x^;IH76@wE3<|iS|maXrrTK!1)j`jCaj0|z6SYL*k zQ3!T^Qx&xYk^!sR(lYs+BXD>M_(Eel3OXL8|^RxzE>fd0$47vy=VE?&BkQkx<^P+7` zfOY0=(~a>bCHdQ6e|!78#grswP;JeNml-G3HS|{btyZ$7-=sCH3&{YtsXA|~>K}p* zblrF?9k#8(u9*zzCtGN-M73gSRY}x>Intp&uiAR1ZuG+hZLzvb(2B8fA~D&&d4h+W zLByXC+ihP--++G6e>UvUo+=D9(M4~dBo3jQp`j_A+ITq{FR`?|{H~1H+q!5$Lt*;o z&mQ%IYo9b;cVeK%#4S(U;bij=LA$ed0W+j|t0hQ7`0u3@1^@rxx30SeCx0Vo2kNx& z1RT*_cAh2Doma1=3c!Vl4~kb;0$jEf9_A3hm2J1%ZTmEKqSR~TA$NtEe7X=&Li%_A zz9qF6xm)^Y2te!wfqb<5((gZgSxr1I$3_1)&lOO!TK88dbky53@m zqe~|(#%Zx&Tuk~Qqfk6Tb8$G;;$~;fu;L?V3mm9Xah2@f10_R*p5c=5Ij)uQQw;Zi z3-zmYNma=&S3wGv^rcwWvVwJ*iCy=c!`J&~nq0+EFS4D(LV@x~{w|-ET>y1$siEC| ztUv!Hbkr65p9rMEhFqlum0veYc>jke`$m;xSGA`pw}8F#VjAPllLC&*D^##a%QbUa zw6KZVBiAr9`529*ru2`M0g1?LiEMiMhXU*=}jikZ_Fvg^rX5xSC_F*IF9= zU|_=^qGekIg=>MC`gv7dbqBx`zd4Z+UIlMjO5&(`qX2A9TOpi`zuuy`oc`(9Yj@+m z)q!%y-t7QvpaNN>*)8Izj&Fvcx+c;5$1)@wCP!!l{=&dav7z(Y{lUuV_nLDM^9?_7 zSJ;{Ym=q5ywAJn=l);}o>CC@1gcPX7Xe8D6#Tc#MQ*(?gxiIdxk~0b7*#~TbxnBEu z@@sdG8dy*ns0kHf_N;JN34BVIJU~ennDGCbcAF#tKuI@Y>b{Os>ki%eitCJ!{Xxfi zqARs}hM{;ER(5t~8HARsxPZB}#=?0I5U5#8uH$F89OqdsXKr}x&5`BF^eoLB^Z$o!oKaRw^e83YR4{y=LrtnY~xgvKFtY$^(6NHu3l4C87anMBG5_t zy;jLpYU%{Qt=;PRbMB*p`fRa*l5ZM{-~@eLxQ< zCF6A+Fy=Yw%OBaiG&L)9%oU@=06F^e4|QvVTL_J$xhYE0HC$mc;Wf1M>%LnXyfN|)LyWAy!ppF2*52ah; zhGg5{ek>$i(M3?t@k^6AAAF9?>b6R8aeGHC8Tv#iKWqD(?ac`W65$-0tn742r6qM@ zGDKdVvOx61E$rOJxM<3v=~<*hh{9Cmg?NAhfY*}`@Eyb+=Z3YMLzs!6HFm)De0s?5T2Yj`qs>(LP1 zFkEq3Q!udGcLONeHui-nJy0PwgZ_Yc?_!DfP&uHr#AOct3@9|@o&qq%=#CEPuF&-n z@8R5!ukqyxo?gg85X1di`OAMRe~9q>xS{9f)8unB6>M;Z0`d$W8OHttI7^Ls$xJBU zuV$2zS%&!kfFW5_fE%T0iJTAw!p8zWf)6FVvZn1xAFeL|9NDAknTNNBCY$G?V|eMm z3K}lEj=!N5HhI}TUQ@vCh^qV0f9Wx_{LBbsxtQuu`o)13n!wyR156IylnKT^7kS8` z{Q8jJ1Q5r5zM?jUrj4Qn_85bS-6#xDqhR*^zijV>2ex;EvpP3|v54QckI54jf40EfGFHnZkmKI_+*QMSN7ZRW28{h7hzdXsMK)wA9Ecw4WfpPOV(js_#c+{?*m|J zH>0hY7h?R~9^~;UFrPu7Kw#W@tU~`kv;PdNR&jQ>WAy(zAz0r^{Q9b!sXK+9zp+oN zq&ORLv1c+rTI=4fe>f#FF8`mY4zJwecqNiN;d{nqSA>9;IxCD%7Pnh)`U*ch(;L$8J7@gR&?7rL5!)5hZf3N{Saw6^eDO7YK31PK) zMddqy5C91ADcr$6^tJ^d^pPw86GGFLFVEve(2=$cZq=B zr#+<23;UXXALXC5evf(rG>^2jR*nB7(7$Do4-m8zrC9g>EWYCZ_GtMxgwsQ%PXYDu zf4~`iQ4lo?;Ze!^v;|+i>+Rvab+f@?<)|$YWjIVgLP)hfl-&_u>EF zpLUUZDLED2{QrfDS!+I2?9AVk2>jn!`kw$BB;7__{@9S<-{y`1)3j;4)Nud%=y3}- zarH%#efV|%MQgw2+@22{_{^K*>Z3oR@%wB?0XUBR)}=Y-KLR52a4?(eW^4Z;V&v)p zO-QbG-KhTWJRwpa`aX>f4uk*b>hBYM)duhtPG%1!|1(eShs!2EH8cLdmyHiZ$cY}9 zVT{4vQ|sxnQAWG$+hg6)^UqTHD%BZJ_bf#Aw0Mu$Gpze*PiV&w_m`l$x=KKN{a$TwRAn-G z>;BlUrk4ofPIr*V?AKolL7+)$eUDXr8?G^XcSzhIa&FG&bRVr0(Ct;Y1eY2cS&^$9v#~KL%=&}6})ake}s45vt}~_DZpEddHSfg zFXv6F%bPrTc1@j>TFvrf=RJqLy={|XNjK6K`KV!iIZQpd%ZDjR?&{u_A`_yEJb{2J zq7iA*6tpUP`ve4MjEE+0YNdQwUAI7KgL}1h;l##q!!@C``R4LtTVBBMw>*e~BJ6q6 z{ml4ehilKdH}{ea2YcAqEhIFez^l+i^gn_=7b9Zl2!Y_pDQDqG*a(e<&UP8xYCL^*57?qDw-Q{7j!7&;lgm!5ENR{xZG*Ha-~Hed9; z#GOf*x4yxY%QKei#}zV&G^HHo>kI-v83X}^qvkhl7q+c+w6pn^?N(0q9Sqmq&~mwv zeg8{(qs=Bl=_i&#vvJ<%Z^ee4`vQwgj5)bgj4f%&S+u8EiiJlQ&60fcz)qxm>}^f= z!zlVj%T@awIw4;?Uo+Rvp7l~ZnaBOwRiKQ0?97C@2b2; zs;5bIa7p-H=YSYE82hj%yY#z7T0Xc9s89>9KwXqcJTYG}go@92Ezv$5Q>y9NkS+~U z#ju}952QG0Oyy5|?MRJwiuFlrn`-rSDipqf-jRIr1i||C%je3^lJUT`8MfNzdE8@K zv{mOr6`#}8gctcW7n2)c=i8!4!~_8d;9B6yeEjJ(e&SwnK-FHb-#E9Y*oM`aAWpQm zFez6irmB8Hk8$Gr+p(jW^I)0$IgL&3{<6t;o+_neui|1Z(7~y=<0>hvDJy{3tL&k9TIHlU)Y2eonW~t6kwFkya&OsW-Dc4K z>@0x4u~RFtX-=vQzTUYIYtUMXHCc2B7ZB7eOF&RR7pq*Gth|6D(}zcsXFg)OFYG!(Ck1++B*cn<=Og2@-ClhH^g!}E_nvwavp8+0!it@_@rnLnv5gn=MwMYtl(40O9z6(!q!JIQApX^ z{W(VzcQNwz!~UyMmSo?x=VP84V@uKBW;wnn`J}I1c&~+|I0p3XnEh-Wg(}bIaem`j z*4KONkj&{s%!M|9LR}p>@v)g{V@k@A+S6*Q8YO<$^C^8P?&Jnk>@=5ieF@Bd&mE@$ zblLRv=M4oshwN#@<>lO%FXX1*Lxxu^qKpnqKtn^5Lzh`SbQh%!J&S5Q1U zSysu1mDlN1^T+0O)Fu?qXeft!RzJIPVr`#$I2mcJfT z#ww~K6+fY7=0Q@uZDB9O6CXE0T1~2(RyKPoG{$eW6>cpB9lp7HRegS3iorWi(`1Jl z)|x4L9x3%^_(RF9VKKgk)_2MjZ^q1Dw*!uzpG(cRB;e0mrhk{#xWFw zu1C9elkqzD*`^VwSn-^j%b5E8dOShf)YkiPgSv9>^Ll&&!pikT7aiSay@UM7dXH0i z4NL_s;qO%ZmwiXn)^F1Cq#=;SIjss!;Ra=YW;-$5GFawbrj0_#Cnq|n)ZFB9%kmT) z)2u+kTRa<^ei4#B8N}WfIJ%J`5K{p<5Jc*tBf>N^~Ro( z$OWyht+`yNtWAgN8h?z;J+~Ea_qbgnQNO5o9c%oFdMP@i-zyqS+y-e>G&mGCwo5{kUwDXlZSQh`?*X^ zzaxMnTVib2Ya0?*DK>zP$cME(*e?`uwC{hLjHpiIzjw5J_r_&p+Rb2C`-ze(&Dqjm+fvRhe?+$|`v(R)vPd&zs(88#X|Zn2-B^GC1(v_$BkW z2bp>ygEbb-agoQ*7qExQ!LGpmKk=u+%F=P}F3@r2&V~WO#o>@;G43i&w3ONv@(9N2 za_l&N)qL@EL6|`&F|qB;*xQW$;6tikvHGU(sPSu^Zm|&7&5Eg32}-KW?0wX;Ykuik zZ6*nl+Qm$v&ArUGr8Tt9yjCvBB^~?W%Trz4*iv z>+YCpU7mjJIZu9n_xgrx-@vdXNV9p7g!J$YgMlB%Z5$CuV;ZHt{NFZpJz=-*_O zSd+?w?{2%P7}}|)-@TFdEGg~Z=g7Yw)iSMipPR)l{aX9YRiF9I^MP#$2BxdRz%bml z3?7Cj{N&MsM->2+%_KSwB53LY3+?}2BvdNPkql7+wq%fsR+abYxlm+3hqocn||MV=Dt1O=KOsx zzS!u)l_gUR&GzoqdouNF+Pe9A_Lu%2InOHmd z;Bo+%c)%%$BP4LaAI_@KJ(_y0FdET=g0lPkJ zEeM7<4dRZ5hTp6l8h8Kq?QlkRhr@xXOe|B@)#%<4f%;^KJG=2e^M{T-SqTpg6)^yT Mr>mdKI;Vst031mTG5`Po literal 0 HcmV?d00001 diff --git a/doc/source/_static/reshaping_unstack_1.png b/doc/source/_static/reshaping_unstack_1.png new file mode 100644 index 0000000000000000000000000000000000000000..ab0ae3796dcc174c6a173d64cef214b6d3f17ba2 GIT binary patch literal 57978 zcmeEu1y>x~wk{Te1b24`1b250!3pjfoZ#*fg1ZF|?(XjH9^BpCUXko`_I~HwKk&x8 z8J!;8Rb8uU*_@xuL6EGp7#s{X3a>s#K-J=m?}r9NEr~=*@Rgwj)^HWIJUBvO&ZGB8ajO zin0Q-?fl3LMBm;M=Xrrf>Ja9qdl3^lLfw1ZG{4-P)Vq%iajR=;KG`YT-%P=S;1>Pt z>yt&L0nuG!FiiBCoHzFqMi)?^U?YFrh?mVmw2mwGpG1P z_2q@euEk@V7leNX>iRnL8d}Jj1mufDnz+q7kOU85T0{br`XCIXA#qVCFt6;nF3`mU zTNGC-{X`l(S9KnU18{x;!pJ^Q5Ja}4i=I0OLWggW;&|?LR&TtFl%brin7h1GOAx<#>|WHLr$W6K7>~^O+4Ztv_jp1^&3r9G@Yp8; zMT)a%8HSZu3rmD#l$_wTdTnhb`2k{*euq=yj2cyiS~THGLd`S26AQ#OYwTA=Y!7}m z5L&drOJJeCAFb1&B$!arE`pi~gRWv*wuI$+5aCLGSzA5GwJIA1y}p8VoLhq=8P7Ir zoMipLIOQTnz`KQEYS73tdK$aS-pFT>)=dan!-H@tOy57{lFZkHi=}VTIt&*Ixy_HT z3$BG2tg%c82H~6DQXj{$IqMKgL(VPBX*waKdB_eZc_^`vC8E0$bY%w(4b!J zK?omN`VwooLgxLVWbsQW81WRTUH6Ma6}cLoMcA+ENJ;o&AEl?su=s76QWfwb-Wg1@ zZ)gvLoo{vVzH1mDUcN*m63Orx*d=Rpyo9U~xF&=rfDSB+G323_Y}xltVxr&ZVXB>`R)eTN!SaqtZPEy=I(p)U@EK}!6hnZ7fEJvraRU!rvn zwJl$Uwpt$+4W80A<|9$MgT8Z7H@&!so|MkC7Vkdw;@%Ot6HcJdPsa+Ai>7qitbVfal3 z22!FpJPcIG+O&X!0iRa1H9@{+lMQ196^Qxq$>L(K<6^Npj-JPT_>dKh*OGXM!u1OY zEH5O#CI$#~{Y#(U5{S;?o3?3V-tx^l3oMt508l4>-=cS{ZEz4??-|KC(jlh2S?NJ? z+F<6uF;=~YTnM3rRC;}z3E>R9xvf!AeWO>I*JXSh1n+`8;}?v8Abp$9pA-RV06#BC8-ale;xAM_ru80tz!xSXT9!-^ zR@2{-|0W&(3y8~`P@%~&wp|PuQ6$-r_p~weAF;E2CIk{Q4&_{BLG#`#B2%H*L)e3T z_HXbr6t2r32OGiFS8}>6q3;v2fLAz&eX6AaXe0D_!Oy~G}4+IleMA#~o(BuIp_gf;}Cg%t#CQ?@CGacRhk>5K7cf*klPu#VssbQj1t zAvsk!KQz!c2-(@&@z^O`vb*TIfV*hlf4Z-{yxF4w;u0t%4q$ed~kktZZd2=uf|#L!6-n!WdNUpY5$3pzAnkOB-2fdr<I%o+g_XnmU-ubqGGZUUav-`3czyDUl_w|0!B|Qt~2XI{`fnU83-# znV>rVqH?3!Lv&bzblC`53S9$ZgTWH{5`l}U%fsdIe&@aDef_;VQ6Lb zsv#;cssZXXDxe=w3iXw&ibaaDimVoNEQ%}!%y}0E7LKcrYV^x4stPOBYi6rQYdtpz z_Yn`q*2Fhg`k{_~8~S?i|5iNkr?xj3>&{PRyTE0hzP6K4XRv<=?MmW39PmPrq6 z4}XueN0vv}r(7^*u&_6u-kQFNgBW?U_a^0?8Wbx$FWw_b7u8R41ClAqDxwhLp$~_m z?CwtDPR0<9!h&7=KZOmHHdSKO6u(Y!&xdH>!HJv%S%&HgYuUT#i|8YCh4q^B#CAW0 zIEJ*Q^2Z~ljwQcOO^Cjw?5E#zW8|EtsMn`kS*~i{c(A+vd>nY~dU7I;B;Nn&F2Xd% zg$OPUJ?ffNlf0SUouQL5izfO56N~M+*SL4EY8SQ5%&EYYZ*JxIyuiFW$2mtfN2{&7 zEx}g*X6$A$-Z{tRZrR})N+4UV5eZ4 zVDw;~VAqeRAEgO;3Bq}J9XcL`&GL2j#wV(yy(!)f+6`h4a17cGcq-m1adTz67d$i1 z?ln!ejzFY1tC}dzYmjMtwn2AgJwEKU4cloLE&NehY*ac@s8tGGiq)v&no+4#UsDrS zN&7;)uXotJCwUxyPj{egSsY(fYOZ71*9cb9rIlE}Z*RYwG+!NdKs9@S)ZNC$*p9ywQs%eGHD2wDkEPTXHO*y%UvFKy=Boer~42J@9OO3}4? zo3-4dIeoDD-6bDt3}S4n_EY8>0~oT))i>@pno%53tjOKTjaS54wI3qxBJcJ4DN<#s z6>JoaCIcp)m@JtHj7W@?cPe-%-GOnr&ps=>o?Uj{xkG^6^bfTPY^iiFRaI0uRH@Ly z)`VBrdWw9AKHBj|i;X?jmTu)O?kQcHkuC}@u6<&C;Hf~WDMykZj;_&qqq8Y^_W*uc z3<+Y20fHI94KMR+q}2iDokb2F2)iKUdpwml!{6KWDXke%X=AW%xge-ng60H+b78YR zyYl)JN5E8s=I+^J*u!mx&%a!3U%)}FzWMC0;U^^AA-vRW*InO*L5hD!75gr7A|fZw zgJO~}naY@Cl;TBoEnpp#498G<3xVpY0NRvu-|1?nagfE0#nHrL(sX>eFso2<%stmp zy>%qLqpa;qD?`8VV9c~#d(mvua%M-xM8T5F1iF5&(daS%0TfpWM{xs)@<+Bzt}Fb@ zRI+gBQIFEbp9jm+z*NU9Ylcuq;3*YA=rP?oEH zm4eJ z#Z|Rkm!bN;^0}6-?Yb@KO8Knl2E(MS0<`m7Qkmei$b9C)0d_8j<)8QR!{#)aDPtn@ zlKb&j2Q=6}N}ROdn$VqjTE2QJxLkP09nLi2+*x$94K5a1u`C}qFu6G%F3XB_y1z)z zj09?uA*?q(5ghM*buqY@E1KO6`kMAMnfJ^DUYe@V9DzTD!-&P|lz;wlm*wngAUQuW z-s&VY)jgg8o&K0!m&4?2dEa+hY*MB`fm%U-vYrWsNxu?fJBk;_GvikNVS4vq^Ksta z&kBE~7@a20C!Wg#|8csaq_wCP5yWK>V{bCdv27u1Vf>V|{T0 zw|45@sFC|P%}A2q%CzK^-o#q3(iBf&9SLjIFQT_WeX4R52|4Wkx3EEzjXq>(WS){d z5xm43q=O_65+z!^(gH$A@`v&RpKIiGWwGRGKHDnn$P)-;wrNI7*{U4W)g*_NjT9WG zFMoa#g7gSz4BZXk#jZ_8P3}p&OLSF-P!m=0S9M)fnJ-ujTC`R7b1ZQVb#8Y(K_TH=JDtv>!D37&<( zlG}#m<$Qd>=fdAOEEWw7V<&zSSB5WA1^X*wI&~g*_4K@CbUUtA|H+fRlH# zQo1y55ct+$($zg2y0S(|fYxAxN6neTB>%;iLB)d`)?0GP#x9zx#RbQhlk+8^rR3e+ zmX#^h$^itGN#rp=+au$+=Tdu?`GVX z&zH{1lQ%}mMJXtjM?Ja-mt~Ui6S9+|?}~0kZa)mqrd<$V1hn$pd+3+jP#Vsk1@?t+ zr5$~cXq{4qHoHHVNhg=c8TH8rDn<@pN)U1~2S6IC{kas);+EI;xJd8=o99#{G*z30UkZUcKh z!t*4)A~;I86PzO! zCHho`d`5dVeHI)>x|*`epXG<;17%9JT2&@xaOEiFu2$N6WxbKZXZ{K;mE{)!lF?>) z$?1#gH&Hxsu5moyx&66un(dmjqLG{Knze}^#M9FvTW!2d(vVpkj0_|qp_mXPMYgauAfI@9`e~HOeZB7 z85v37>kP4QQAc|WTKg!6?T1l@)Ona4Zu=u~?u^5Z>8w8}a`kwaXMf4NRqH?72$VsM zQ%Y^px=w#I>>kRWW}1p^`h0F2I$EC=p0}(WZM!CGVT!kxc}}*TZ57WqTdF~%vDHHE zxYWQ+ON6;=t_OW#C1AnA>lFFMGZ6!BD!Z<{GjAVvA0Wy){f>w0<- zUxJBSJf7Tv66M07%Yt&Y!8mzAMSz-$TbjSo@aBs`$@O-#hMDqRT&2W(8!W!v2i-3y z8bN`IfW{x69ylh3ica>OQA{L@DuUYzy#HW*73b8d3h5rx1&k!Tzo)6QKv%N7P(0x3 zn_yyQK$LFZVeMu!3(Ss99H9$MaF5OxwYFCU|;P z1|I!ugXezvfr56b)vU8C+U=8|*B59JQGES)mvjBia_%w}Dw+y~7mWP6|P@CA;S zQi@5p>SMM(d9znLD(%>I&Q4o>3=SB)B?$E7RZG0T(!H8i+qfTJS;k>hf9TD5`RWC- z3yQM}zRnMjczqp&CVotDNRD*WDIc6w1}E5UqIX^J{pQYmNK`&0{O)7MsBjYBq2V)9%$AOI*493;v)U<`9Y$Q z{cD7~h`ZG3!>!w$A%s^OY;x#3zhQm5WN$6K!Eoo^BFsCn)IrS;wua1_cD zYVKShnInnLToyg(nd%wu5+mo9UY878Tx`{5*wVljbCDx|=C_&AV5KxBrrRs~#Jj({ z*ik6*l?fn=o?63LnwrK?-reJg$`TF%L9;#W-7qJs&1+x!H;=_f- zpDkFAR+WzME*fF+a=Drb@_05~{X8BwC4MQGdmb}&Toee5>IN%kSeY(+U~eRrd8A#R zT{90Jj?;Bq$h2V45U4Sz#;S8wKR@4eguRS=A&(IkrXh%fkfpyZ@_xJi;NCY-3K6hRHv(@s1 zla;F%A-CM%{847m zIA=j~j#*B;w^Lbn+0t&!*R=kE#6;)H?3igX9Lhx_SDrwkTzZVA12RW31Wr0CD;aIJ z)*H}$Z)X7lA#)+uP8#A=k^0D7y{L`44KWk~(v>CU$n}W(56z5|48c3AZX?A_LV-^g zp)hkW1Q>T1bOfXCv$<41mnfcXR(X;se)f>wdeo6Jrx9D~l%%{8RsLCu#hSycT)kae zUsd@FyG6labc?s-;t|~cIGG1BJiry@{^!G*_uYvhQeiFSrn{#Z9p;;*jju`2 zf&EM;xqJg2Fo~Q@Pu508@E3^}!9&VV&bew&TwlBaPP_}|g|#$yOZyI_1kvltCp2`rAT;!)*HD|yAKZDWRI#M4v%j&69PH^vJj=Kur*G{r4DS&`*J8=RZEe!2+37ss=Ep0iSc!+=hf)n`sdYF!w@b^#b%y@`Zq+|(&tZWPk zS!kJQ>4|w^2nh+fZ48V!<%LE6x*Yh%L;S_g&YF{s&e744){%+U%Ep+EfrEpCj-HW@ zk&y=Y1&yt-SM$xOic>>Ha9cZ_kSI4(+5M1$0soW|IfDgf#Z+wB0!-3rH1(eL4(q=loCg0!l( z0{?gG{18a#bs>ah=5eokt;4r%YcPJRD+t9%0TK0Vy_;z?1&<%`2aoGalVZ*%EIiIH z=%;N~(7;2AmS{B>JiKg#^L>n`)g-o~E>^FNGnvfUGp5$4lii;!JvqBLO=RsbA6E&( z<>>fj)ENNx=E8glt;HA@eExsQc^hZGpv(dOgn=jT#<;bL! z;Lv<7i0SP>PVLd{4CwcH0}FA$4(EEl8PpYqx5#dP;69cvQ1NA)xcR!mcrAf|UZ)uCCiZZ?}ODJk$*X|Y`5 z%>P^%2d|sL=h>1{_U>#OkqVVkURcX{3r*APWVty9gUN5vDe-q4BlLli)euEgVU_$V z5hr|9Rrhx^%N2qJ9K;5*xw@|#Awn(DzxT6~ zH?X-e(k~+a3V^GF5FU417|zd+^~c0BEaV?!`ihUW8tfz@42*#hceu$Y0w%@5Fg2Cg zWTHzWu*v1hl*8pRFIVc^<9DyU>5u5 zq9K@Svc!|rj1A~iitnk1>zdv!{+udQmeCdidpz6dZ7dr6J$?LONRnRbjCyw!_OFef zs)Mmv{)#x5Ee%wEDffr$pgCD;#IUpw6&6B#uUw)Lsa|U-B7D&x1yy&kGYY%xK3pbx zzTO?&p#F8eE7Ekjz&}5@T(3Kd)ncLV`RT!M1>IGQZkQgjq?!r zpN{mAwV9Vk4kpWxyl#^;qOlC2xAnF=VaGBnE$-QNdlPC?r2045l86iMudt;8Bskm1JEesz+R~k55;Dwn$5S_8lYtgATA$D<#9c%!{c@u z;|_F!rkz42O6_}pFhF>|D%WWS+`wk7kEt??7S2G_Ahr&G?SM7`ySCTCldUv=fq$NM@* z3)S0XP#wVJC+%eLh$k`Ur(T%PSNLC}hwsPIsLwPJsGsRP;LFsoGvI81{s|SX5K{y$ ze}#>3WZ90Poaa)@mMJAtqU+0Gx)%C_4XtXlph;Kx?2m>s3X4K>F^>J{d>(F33YhrR$7&VGhvZ@K1=4|G4~<2KrJ(>$>E zYhp1*1MaM6XG*oL6c&5^_BlQmC_>lVP30>jA3@Aw3}r7YcNQ&{>x(6;31OKp_K3u~ zPdA-(BJeSP?>y49AkeD`zmpSO-l&nLQP67sdxL&E zhFHbI<+RJY&LQG@e;q2ULNZ{r+?0Q|il-ZvE1d!}ll-QXAgZV>>kDR+_!lOrBxakV z1r`p6!-Q*de{x|~+wGw!I&I!8%BL{vr9$P>ef>*}Wj^JAuXT)Ba2uS+vzr64lC5Po z#2rp+!(Cb#m>Ac%OLbqf9`6rIk&~WKA8t-cZyc(Q55BSZ%n5>P@i^JF_pj<)^*gD@+rzFmIMhP}Opa$1E0|%A0-Lp$|AN$f_K=FP&J>9G@8uTGneq5+B zu}`29^e(2JI_Xd=Tfd8^(`G#g+*;j9^PJh>Po^It+h(nDc)b1Ue7PH`x|Aj;2@J8x zcgX_;oNXPaKL7`D4I*KTq66sy^fh9wK0vI-J^B#O;F5AONv1 z-oce8fb?5rUM|qJZN%TH&+#NKiNy?37c@h)Tu)>k$bRoI>D!umm%uNfHyHfLgtNIi zkVkDVH*>@zJC-b+id(8n!!}OUa&L8BW63uo0CHu```On0e&!G_HP3vM0rtiK#%iO{ zrk^tkNxdWR&rdhnm2W~(9&fQ!k+F&YlYF7G{=qQ)<$fnWbd zU4P@dF;(Gmhw<2aKjv*R0|k~QeWQ%6wxq;qDrTC`kLUEjHZW>scn0Ej(oWc0DuF1~ zpU9hiW~I$Wsew1`z;w~3xt#}DqOT1>g?4iemL+`F=*Tin87!;9NW4+;KlOc!073UB zZt!m+uOmS{E#K4F@z;~Yr!J;I!n=n_FYW~DZs%sHYax)wvqh1>LUbn;A7mR_$cKZ|8 z**C=Q*5`Z4Df~drd^pBj!7tC3lQQeOUtwy){4$BE-{zpcm*r1lvpS9|6P_%&4?Pko zLYG^gK#Wrvot_vR*WN_D1bFO z0a4UJR0Iwe&ic#Uq1okXv&|cv^MRT%n-kk_{{HH~9yw!wx-fY9U{i%Ha|G%9JB@r1 zQIQG`bm@m0Bo4RRGA!a?dR6mDBy6VZBIf;oTx+%k=H>Dc#D4$%nPM~4y`TKMt|~)v zG0ys09TQf+^p6TR*gF=;PkyIYWb1GMh`2VXB)>*5;#e>JFWO&b+EfqPF7_}duvNj^ zFx(E4gJ{^xJib0Z-l^RJv30Nn_Vmm3(c)|gHKTOP!!Z}l&}mTugZ==J^0!9$UZz~N zl#10pm!b{FP)1{xj(xW?J142KaKikGn;OUK?xNc0P^^Oh zrWL8gV2-w2a(othE))o#s7WdR^V{F3fQ0`mmUGb^>FdU}PWIA=R4zhz*I#5$NKNvg zinY$ea413U1O>oEUj*jfSL6%gVzHV`kT$RU{EClta{2@NdM%LPqj->`;6c4#pX3if z8nN>Wk~H{6qf(ac0q@7%p&MEskd6?n6X?~Gq|#YqKCg#+f62vr!R2sB|Dcln5$z)3 z_tN19ixdOE(YeZn#Oq30W%b_dk6v6t(ICdl%B&76wImx)7XVkSFbEnqOo_F7d4Ae< z7Jpl=*E}Rc1elT!0eB)j2}LwaI(ztFbscRv!-{uwJtCDYO&JNyoj&s2Y~}QzK!r8q9$T} zeK=1h8AsiNcq{!Ia3Tf)v6cjhl|bb6>oGF_ZXS!99guq3k9nN-CdgFlZL;q$q3Zfr z50XY#Q{91#-mIM?*Gu(1gi-lKwbF=%j%Xf!!KQchsuJb|Uwt9Nn z9ud#*!ei1C&RvHFC-BKo16$fBq`XH>6Y3gRYxP6s2ZyQv<-s87q`AJ#io zMU)tVd1#CAwavCTY`4V&;qi(@i10fr@8^xv60geeD+~wYY;KM02VGa4axypPxM0H`rcbe$LbgAP~wpA|s3u9xB;hFe}EIlm$)V)}F2 zV&-}qJNfShO33`h%A`{`lc2-G@wmvLc)D$` zkC#3qpqSN_=%-EolpsM=C?oKElxwitTUY7kF4b;r8Sx4RGwAg1D^kpla^|T6f28-i z>vo_4Xas1knV5yEd5T9Iba3stZfN zYdjy%#LBLmBmV$5Flc^_i9eXlExE1T_pQYMi9Z3j(3R<8O|cYN1GIx|zI-<0qWn7J zTmzLtfhvZ(?D~YEJK4nHe!!IR7Mk6>k~KyQeGTPT%=OBtq)+j~?-8`W-DJHug~2dX z7Z!G94?2Xx4k(yBtokCDf8?HDI`|%h9N$wcz4CRbPC1Aw*FHp$Owi(^b@J6Tr*QHD z>_i>z`h&Vvv*B{#OmcwAO1aN@PtpyJJN!}OIVl(G&Uw?PM8<7fMW>=cm!`ik+8G2U zdg1fL_-jwvW}yiUE>}EXrZb}3og-E$eKFqYCWOrklbD5&h=s-ENF^_j9elku_wxc4 zxCZ&Bgw;cvZ-(NKRThh^i7aMuZIXf38fHG}h&ZxnE|$1(+|daCn8RWDOkubM5y*x`_m(|f%%`FoCtvgTDXViS393_s#S!;avwZY z3AJ>^;(OzCFjL&1K6g@Sxzx}yBXS-1It!U-paI>Yg?rmNKyb?h1NEaK{&aP^*6D+& z07dARLM$K^@D9t(=o=yl6f}?feU(O?wLsu1aS_T|df%=IT;-KRZabFSdCVAs87BWV+uo;04hn>7`q~yfvEphxA8yjUs#H)@!$V04)3tQ82>2yXAHr zc8QJ@POwJ%FTQsJJ!ZT2#G!-mXF2};-gfeO=0>3p^glICubWq~(-VH~4}<=fx&%Ds zsVnf*9(?|||0NWX2*-?Pj~+D-bw)ooBw;8{~fgdU1|TjZ~os2Hn))8fYQ~s zJD$~<&D8pM9z67egGpmSk0eqEKMBN7wyY`CklQ)M0T6aZvbTQ*Gh-V zzAfO5ZD$;6IST(&| zrtks^r_m7R(N>k#M!6NhJ}!rybxZ$FQG`>m>Ujr&IqO+s;lE5T?k;wG=zC79 za%Iv23J^}e1>D?rGKP(Xy#R8vPq}}tx4@S)D{eOK&KRI8RDhT~u3IGf5O(rvdyMiv zd;;77n(0Di$EL$eT-|c0%Ip>xn}T;g7)odVfMF&>|sMKyxAmh!+@w%&)qy z*-mHcdT?2OtM6YKp8NCNPD3a%HN0K`0b)0CXMz z{vYnE%8*MX{Y(qyUB&0N0tECKD(P?pJWkSV$@m$fD8ve-WO`knSAPJR{8L^h#R{N` z2;}8t5p~r9TDQw1Ak@cdHM=-dGd^4&eE?XAqgxE2Fv?Sz8bI$MxNFVliP5MO$DTkM z2|RAw)uaf79^naiH5iRYia2yzDvXAyz#-q26$W#>>73Pam5L{sno1CABH(fuDheN) z2mDw`0F_@p`YL;ay)oDw%b=ZQ)C(JD6_iII6(j3N7o=_agdYi4o zd!1jE_6JJtDtWR*S?~KE01@2abhSMS5UU7hHPALkQq%$RP9geGvS0p-4XW!r)UpiY z@0AP?I>axNIiW=w_42Ub-y-j|HfN5-?WOlQ~k8f|r0&qK2A5 z>xbqCkKDmuMJ5p?EEJ7U;yRbti&n70cpjfr0HTRrw0i<@?$CmJJp1xxBt1YT;)?~8U6_Pd#i+pwv1jC+ zewVQh;4Rz-Lb|cy!W|J#q4`Y5s+RiCOoohF18#^tpJw%-4FPdFdECG3o7cf#6@yS@ z&!RCJN?;`;_Ptc|&xNEE@vX1AU7#f6lk>d!fUk+8R1oo!767e} z2EJxCg*X@v;N%_@oQftRx3ZE&m;;7X#Zrs=V##tC9+&yYPjgcX+>JJ5%a>0ds}i?- zA>k$r2jl731{DKTl9A~fG=hXHf2DxOPS50UdkI*|rAj29XU@ldpv&->0oKZ}4cW2X zd@q5gdE6SmGpq$*S=QQ_z*@f_#yErR?%{l;y{6MFE{#IYr!bLwT&;##ZBNa>CMvnS=Fpk%l}5u8sp3xi zt8iUrK=ZUE0 ze7=c9HgyFQpzpUu-*xPOtW}>bcj&_^ooeW}ltIqycRvAbL8MAMeMUMqXCYk`;0lA3 zp%Kgl^jFGL(=1Kn91l{&rd8-rD51vFfy@`?74o(+m6cS2>#*M}^A!x;?=pe%gdj{5tMU!y0>q({0i`W0pt<(fH6Tk!3cKPNV}^%fTQnMyF;1y zgV-`xK?{A$XR3rD^efeS(&Oj%()|4GxR2Q{OKqONZVgu=uIPH%^`!st?&4};2zWoR zPf^hhI#rc1pDWWn2ht(*_Q*Kl?K`G9$XfD=JhLW!xDPXWf?bfc1Y{mDvRbOS2YR73 z+|Io&>fdg^A(y6JhTbfoKt0H)Umu-@{M{3ALS$&2W)cYWnKPn`U0@<;#68yQMn+O- zb&v3CJ_v}80x@IH-V^b?I7pVy(P5I0BZ~GdUU!NnSs)6U4HaUEitT)(qg7yGrvJMI zp&2=}Fsw~`KyA9fciJv{o0-M&!L|q`&Q^JyO8N)8K$5M2Kg^tukbeVA6q@cZNk!rM zTTnD5+|B1P^blMQ`O=iG$GkBOP=XjLCwj(yy%9mP+qG9fpr)Z$Y-Bxe7%9FB zV2lilg<=;KzVD+B!o$VtDY{!a00n~#4#fC7CAB$16HZc8v=K*>M~xBc!axKvF$t%C zh=h>yfe`5X_WfN>><8tU#4kjmQ=G$uQp7_j5`INKbmC-zKI2(p2Y!sFbGj#jW=86Y zBFTuth=P4S2Zx5tK?eLYWinaZbz>5w)82XAu+U$AU^M}eK$U5Zm=uMmFBN-1G?V1; z;+sc7b)=ek8lNYx_$1LVhGhbPJEX1s4x19#42DGXNFM1|zv#eL8YSBoZ8(KR>;So7 zmsAAP6Ogm;3KMn^<8P@~nUGQgMn51^WVrv2Nmm#Q%8^F2uB}u<}w+X%{6@G&Z zD5S`~_K7N7fmhB@8JZ1t`6*ECF04+qiUG3CJWwrDolF7lsB1*_RX`HjvsUHCIN&2{ zBlIKW4=I+#Jz*FrCm`w2FYkUM_!UkUyxbq7W}p8{$y%gH+9`JoPCzpjQq)3)p`0XPmE0Y$Ani*{Rb+`)mWO>DOYd)vS6BAqVpIZS9K3WExqK zFWg9(BZUXz?p%2m{E2;0u(Kb1?+kOvyABjr>vh9#vfng+=u2W72VAeV_t=_v@bf>} z2>fvk{>TNVBw(genZ*FkV%<`mSmekn{c(?}9<8Get(?;#d*wdhn|-3-Gf`52T;S3O zR}~l!Q(l$N@}mYoSL{ovLeBl>RQVB7LEM!DA5rJ;ua5xZ&;A4Qfj?dAqx_2Tk|p?} z*?WfUT0Vw*Z^x~~YD#az=A|6Jc&Q-Xi}{Z#i@f){B<{!NACS@3O=hYs3G`<3IOX~s zhg@{h{mO=?l}N+XtI}zHWkq4kvn&>T{*a7(W7LSL04PAdOW2}8e8~eIW^AHp&*n9S zf{7L%Ly|2D5RH=0As1r^Ck?5Did1LW+uYO=GYF^_i=~2;vc3;DparSs!Iy=;OU;Nv zoR4vwb&((_B%r-C7Vz(dtPcR=UgXT3(XVb{>f^yo`xs%MuvRZnQr^Y}4Uw*Hs%P>~ zK;HX?OOZsRpWmC>LsK`1tdxOrCUQBVT&18*9MRZ54rBUb7o zV(#1XtwH%R9f6}1cqtqxv7Uxep!i^~Iw_-`;wL9mny71^+B>@ZTu3WQ?z!8L!e%)h zi?ACpFwpOV;$A~ye-nsT>Di9Z%i0WT7H}q3Hr9ykF!>ajEq~2z)Xed3A_7hX?uZ-A z-+SoKrNSm)pLS8QbK`ygxo}>Wk9Qvt;OKDtxI_I*QTw+91z2K$ZF_lXWBtzod;N^a z@oF*zGNlRri^=_q(#a)(bQ{K`YQg^_PyRs$#DTz>P3vuiOm=S|3P=DMUfALhca0>4 z%_`MnrrK@ zN~>q92RbWN&Z{o}@0V98Fcz1wi8_4T*92|b5H$gcJXH#KbQc{2BASQt8NFxt5-+*DM2=hT#Cx7DnlSLsl8R?91L(1R#5&$OaP5KLu3Kq>juL~ENXr>D z$q8e%)PS~$JrJWjmBXFjFdL&XeN=6^v`~UeF);W=IfuKb;TSN!%m=tk6~M-o^5w}E z0Sa}re>7L#nFCQ?{92W^1(5ZLn-6OyaR}JV;_)=vEx)K0u=FgTkx9q40OHR&==`sJ zN{Yw}A)UgZpUmJ`WAV}s?f_>w*CY|YVt1|yc6G+-cKL0IhpJ=X_y+|k;xwRlV`Y1C zp02dFPsX#DDb3efrRfZY0-P<;*SnUnC;(9rb4EIq%Tc;SyR~F=O{A-S+GsSjSO%!@ zv7(vj1xj>a|&{e@c`GYSU@hDUY z#ly*iU$xKunX=8!kRc87l~DZ>hm1I30iaZb>@ywqZXEmt*%^(7-UBWx@)mRqt!Bfn z>|_}{tRC_Ky75o~k&28iOE03D->6;+MTfDF85|C4Om8>=te{%TlYl^S8Q{5dSs;%x zeR3uz11fZo5fODz_Z+`j{&5aoBXk#H-OFmrYM}Y1YMB~wECp!5n2K?LwMuX6{K$Se zux%K<%No5h8PURqDWG1Fw-Ug7s}Nr+qsV~5Q$`@_(BGUK)vnAe#=2e48&?38k@e0O zIeBBBosX(#eMkqq9Z}4vm+pkdi&U9tG|IOt98Z=7?xEL$u1hrPGt)wM)U^RN!gwO! z(4bQ8^ZdL~G}ILI_rk_U;-A1b_Iv;b18Zen)F8bI!r9qE=)$-6VvF`7c-3__`h(#$ zW#Va52w@KadTR*OhWJE810^<`cnxQPZJ){6zKN4U2^!6Sfd7-A6w`Jh)sUJKqw0Bp~>(;-hlDhuc*lmOtx<#CxZ zp*jU>F?Y3a%&_nRK{B9p1MP#A)SF$Zxvi1ceauVgp+v=Ot5>p#tRttr@P@3DcJpS z(ph0BRdA>Z=YWW5U^E$!dvdh9Q07Zp+EO>7b z@KH%0+VPWsI;0sXU08H;eK2^$y1DPKDwV*!i92ohzv&S-Nd$?ygIuDX;ZAGg)lH~@ zM09`lVEkrZc~+Df8Rn;~>w(w2T1SXDJ-KG%_AJ3)EK!voQpmamJ+)|=7hcNSGeZg_ zR(p;-z|h-&P_)5!PeYDzum_g0i2}Yzr4-qmxhY((6tq|}vY&(f*G%C8DKfMQN;#8d zUkk}sw5dD`;JK@gcx?~hPpxD$8!^E+S&4%nmUl7pl116)3JraS^~B%#rGOILB}l)OG)nq-gd`tBAG7x+l4pbA8h=$x{wJEV81-3cwFdz zfSTXv#wirA3}?qK+W!ld*}qmqr_^tX{~MnC^`XrSqHgrRuR(lcqmULMfWzKCY_~IDw(yOr3;V-`x{SM zJ^;%F`m9A=*{WapBs4<*1s|c@QnA$fdY9v_V57FeY7#V8$o=`Sz9o!Pc|_A*je?Mn zAAkmAKu$!5l1kzMFq|ts_dmdGI8>2lqtZgP83myEKgZtz+I&AC<8J{lA3v0cneAky zRh#bsC^5*$Kd!U~>R2*%%m7Lfx!en&JktO`bh15|2LJ_{HL%HUgqp0D^HvO=GU1}A zga!}~M6Y0KOPAxWg4qUQE0ur1qv;{do;7$rv`ljTAIjbWsH(jS8yBQIq@=q+N;(wj z4(XKc?nWA<>(GsKcXxwyNq2YGx7GXJ`@Z)-^Z(|Xah#2E&faJJRzB-l>resiGI{j^ z`GjdC4LB%X&%ZHq>B#?ps?w#m<9H*0ntk9$@{ z4{s6H<~~A=HVsYqqgz6LPdaRM=eO8?*LS^kT|k%vxVQjs8)FYBP{*5xZXorr24nhU za%Jzp84gq!c|31b%o#so?cZGnyJ?I^L?Y-GFs8M^6ePJk%}+-1UfwomE&Z5FyK-}= zflA#M>m-BqZGn7rI$9tHCKr^;ud)_}LWXwVZ{RWhEhlw|RUh}i+6d89awNgH?4@rQ zwKR%#V^Q$_PM+&l06J^c;k2@LjT=R$ax$w4Nr6IsJdiF*0PTWYpu(y&pJ#4zorw{P zq1fBWLav7%OXjf2stK9g008b^EYe(;i*0aXvl@VZ{vFoYa=ucCM)7;b1DNBl z#Iq)eFCY<{&%IuMG)2{})7@Ys=`$1pwvIBE9b7D8!1i}Gxh(@b6AMP%1AmwNrZ}W|8ug7_4_3)x_lD-)+A~RbY!{^3<6}wre z21sAqbF4P;K4wDE4Z4wBhfJr<{0Ogj%7~PgIUC~#-MbuJFO#(Vn!=a(YWj%}6o`-7 zC>3>18+?1JRS~Qs%koLv#?&nb?sS1pc(CV>o`}%>iOU-hB;`gUtrHw(I)q9iD)>3a z+0YT252e{T2CcQ{N~KEYLPff^FX0EW%B-QP)tPgfTqk_A7|qq89tVzzlJE>SM`CZ3}%2SN{{lZ zoG24SQA0dnM&(` z4~O3gyF&q4YrdCpyHVHp4{3nGb6Jm9#+jyUDJxfr(N6<8)Nrlwd^tXLe$!kdpYK6W zt@6H&=QBa?Yjxy-O#HxHwMpe+inXVQ98WLK%7gE3%G4C!I$2P0zbuf`=9Nac~8^ZX2K5UJj$ctUQQ8 zwg#~`0S-UXhJ-e}5+V+~XtWvL(zr-$GcEFZt@~;;Sxa48E-==pDJwrw<)yXB3*X4| zBYbBv?2lS5=B~f8_A$1&{opa8({+-pZ$DPC?O+7$BC#1Ienpe$VW-qbH3VYGCu%_6 z{(2oP;C=5vY2SL1aB~{;PqfW~1arP8LRcHOlUxR?fRx)a{?)W@2F5Ub5i(E=Mh^kG zEhreb1TZ^8R;0nJenR{jRw65t2lpoeMHRNjkMA38m^L>89c_2WV}9 znEt{&KzK4`f<&HA;j|0;;sf(l88hn$;M~t{X}h2Kx8pXs-`Oq^zyXxHQb^inbCh6| zvSVP+L`&`iWQ8SL7{)r<`r!~fTW$LjP6Ug@o>8lQM1do-}-!i;CuD~MXjg0 z=*~AE^QjM;SU44j4;lsS)K?AQ4Vs<~DmH~R^S5+P8S|(kQs#rV1 z+@h_1hpmaPA7NgNyRUb{s@9ms30!isIR7}q=|ILS9ZvONNZzp|SG_QbE5k&tF+*Cb z0Am6lt|UiK7`*jF-)teq-y8{d_!{fQToIw@O!@=e<0V}Dy<%wrd^PGIUC_G>JU6W>Y~*2m zsbAPZdpTNbo5$yN^veSo%TeyWpl0`!bBg|xbHRXKb;Ew7SrfDUY1;)VM`-A2e+%=$ zA82xPLo{cg05f{wAVFY?t>mDyY1>#y*Q5oC5&skB#~UKJHH+$nV%4f@O0+btV%#ox zUj%T3&RsyltVM#mL^hJZbki_1OaYK@B=1Doyt1P2PSJ%a_=dw`_`4nTYcy+*3CH2= zc))$l?$rkvzSB8hjBSJqf3|WZMuu(*KYED9&nj8StHv-ADY#Hf-+t@?AsViEq12Du z6Ii;C_T#h?fs?uaZUd{@xT@9!Bc4k8VUZt=-BV-WGhKeloSS@G_z7pGp9y9#26x_} zR$X5+{#tHb*$Qg0W;L9IqQDnoL6tAxC_frW%u<=aNlb}>`24TKv+(=a5>K7CY2&b_ zt1)DkmFBKOXRo-Q=Uc9|Jv3@;FscyvkyOS;;0f?_2!MiR;{&|hi4s5%Y~kDr`a8|+ zj1oxa^SGROeM=I2Is{}bVnFfPxmm)}2suVLLqD3xL@L^e1Ju+((lBU7#L>cT;U_>< zJ!STQG64s+Z5*JCLYy}`Oyj)@>b@6p=d64C$w~ihr(B6S(NF7eG6|)xv(7+f?rLEO z{C(9Z4a`P}OrhhxH&1lGS#nx1JD5X)NY{m0{81dtdfZ_IX2wMPg;+-u$1_$mji+JK zp`!E{wxGEK`buB%hUr{-;D%fz^H4rRk`IGKeUYL2jfH^sM}%vS?DRZ}2&-tX+-9(bC@<+Q4Xg~?Szcb?xldMdv`tjD2Lx764n|#I;DnS!dcT^;T0MTNSc&2C~ zBU*C#DYW|jyMK2Gx&-FSd7!&&K@r`pNySo&=yr^{3%wCOX#etRJ~m?HUKn#WQJRn4 zklf+)aDc_*ZPx%YQEvpvSQ=m5a@K<}eR(DOAfi?N8Zq18xA))dWR}mep?GEzkryO@ zQi)OZzddvcnn0dR@qnPAC|Tx*YZ(|MBf}s8WfIsdhy%!qaC96=TIJGWmhq2aI;jBp zV;KG5w%B*9B7EZx=nvM2`t(`;1wCL{tV?kEq0F3ey5U%S;mu+@oTL)j&8HQV#}86# z)*$Ecae6~Co790BY>VQ1SdeK^%nzW7!ANpKdFnw&000GT!j*H^iKv1b48h+&8tT%; z#qIZYBM?mFwR$4kVzC~6pPe610=0rLC3WX5K)=`n*wT0W=3@WZ1rV6nVboz$Qm9Id z0394dK5>BF_Uw@uw!$MEoi9ca>h-)N%}UBNc68tb2{KA;StoaIC`EJ7#gb{cG~_x^ zN|N+R>pn2W&&}3sv)3%UlZ+#_rZ3vKS{+q*nLYkC#C}xR0S|Peh8cmXL+)SokdF>5 z(a#D1iC%EqloZGlL$AN%nqlH%2?B@VCIt(VGy2sfgy0Mh_{8`NFkxxd58n#V@$LVb zfbnstu?Gfit$mf(!SGZG7+mQ*^hQyrIGY=U3Me zIZ6(e8GTF5hNaSlZG*+bQC?1_XSa6DkJSl4S3|pU74<(=$Sc_YX+U;hiTi#0s7TcD zt%{6EB_a4!xOhwu<_H+rI{E}NW7*l{kB~V>unBZD7yj=ZNFK+zb@0skfO4O?PWX8_ z)Fd-!{c%bjiF5&5r7}tOqkHlnXe|qqCKdvvG|K$Er*gamxv|feZ27 z@5nYyW?Cxn6xEXuEO{l%m-^>PjUX3&E(@QcFe8MQ>Ref;2fPAmUc`-Tdw2yG+^La| z`A{_AL*okM#F=+S$tK&93hGol>hqA&Ga(L!2t_+5O0Nh``*xtTYycl|{PO1XT#MkJ z%32_n;+0K6xfEG26VJO-<_}Z4L#RdpjL-eOT*g0$aCn@{ue(3Us#UeGbJea3d?CXA zNJmUlZqN^aYrd?Z4xTC(urg%VIes7B0Iru_RI?8}Nb~Zwy5s5+O1P-&*K&<&cKh8S z3#wI+1u*eBkI?yLobqxO1#l<*0rx-w3cC8qE1hHr+$ zPJXWi$pnkvrWt8`s#W(>gf=>LU5EC8!=N$#fJ|(rNgL!M@{lO==R9UQM|Lhz3ti@%5$n6sA>?}{ zGMM|-qHyLykyAI#fNjoOA4uZETVh{B8Se*;g^N|3f9wZ>*xQd?bpPwSQGo9Q4QqmE zD@X;mD)cFlVmWGh3WPOPfx|<1BXHW;WH6Gpufkrv`$~=OfndA-2uJmsw060<7PB(*)_rENz|SeKj(zLy!QjUGpd4*EqRb; zKNzK&4}NXZ7@1$XU1Hz(_|Pucjrh1UrG-X@fFD)gQYwk2Ko6MEk|E%Yvg;c?7^7n@ zrx3+!)@gNpPWstw%Q9CRg>&5*3{UfkGXN>h-xv|OtBR1{nGR$d=<$Hr=fMz#2glFX z9%|vo8YrTj{Sg-+__Kz-&uLh86PPL;LX)lcCcOIT)nxkG%38Vk_48uPu$M?z+Fuh0 zuV6o6fWy=USUYTUIw)%hV^E;7Ar;ksf_Tky{fl$8yiJjnwpVr}uW>&y0(?w62D)D}XaD;L5J1$WA43*$9Fr8u2 zb;Y)VqKfWcfuGkOl_j;F3g?uv_ck7=V{fP`+w2k7hqeQojB^r95?;ltDp9jBX z06^E3RNU+TW|I9@pvM3}-Yb$A+~WVAV+HnEOgQZX@ctK4`$xcS01aV}U7;uZ-xBgZ z`9*MEYS10~Tg-o5v|GXWnb4wX<1Af>j0u5*lNtWRK3@kdg!bfV?ehp1ztY|#dHC?} zM;Q0@)(#m`s2UrpfX(09J2Hwcbho8`64aevwkXqBaU9}7E}M6=Jt*L5cv2)%MmLl~ z(Hf~tg&{ljn3b!vn06FPU`utcTR(ttM@#-+m>lgL%x!AD``MvC9=?-!+LE%?kZAd& z9b3>TDST_^utsOjcA;CDb#Wp)!v9jPMQhv-l znY_0-n%Q)|`ja#`oj8qbAVzeS^ZtU`#Z|NJVMI(I2~BZM~> z!drni1cU3^&ygE$GE`P;nqtYE-xMv*L*6J@s2z=e6**r_^tU}+*|5DE#=xjdQaX?A zC;i4Xn=ogi{Y-SaI45MgF@ZRsO_8oY8Y4FIbRB^=?;shXNJF-D5E~Q5$)b`R7n;vd za7Gsc;0|Kk^l4G3J~5`Y(!EYXoB$vDUMF)6 zpEg-(WoI&81Y!t~#sA@6p2F=q`klfEDLt-)wr=RY2iHECdNTLN>~mU^Fl^7HT5%eH z6%hPH+>GFR;<9fbj*#z}J?|7K@)FcS}K6-DKFD=9$--KChz5# zb?)<{shucX&er{h-!h44yFKm(haF$VV8)*^c%E*#P3Y=yi$%R}&`>7mVdVDI3%6U} zd711)Td$T&xSx2wsEy`QASwf`49{H2;am~X$(6Y;*-kvlx`ppikZaMGhB84pQzPQ8Sovbc8{d3Ga>y}A-;O2|`oigT=qu-c@-K6Z z`qW;=lP~~B5%plCi6|IXci{1 z*nU{3P-nGIWBA}Qq{SPNNb-5Y)E;3!=F<6|RpIMMxg*2}5vUC=D(UalM&3{7K6%&P zb7&XvFjtyoI9*OXE1W;>vCNn2$FwRHcna;Ylm_y7=E`B<%-IIDlQtSIb2)h7ipAgO z_M!HrOFxSS4l60>)~3=tr+Z#QVvNFnoyA~OS#mNRauej<-I!HZj2Vq<8X%3E3j9~R zMRECMP5~J3GqPVwa&#+Vx@QjFm02_v^lbDxHjycHCN|QE91KTS3w+5ojdhWU>ieN^D6|P<^L&)!L{$?m@Nrb#; zt62c_xXp(5(ri%d6260@^ca`bo9W4@)DVQ3aGH|FYdb2fVaS*`Vo7h9BxzDz?|#1t zYQ0$>kT6tNVKK#?@!O`4Qb!??_&XW}r9srwt6rwu1YuvbEV6H2e-r4F=sk2HT}PEG zlxl+uMWnrh0Bt}v)cojcAYVEtAQMgzU?i`7-%x#@Bw$Q{jk=2pZyS@2Wq)6pt!Sbm zw%!|&Edp+}9k9Y~k_lTzc6j`)=^j6A;i8YxcpQ4oY3uMa{WHSJdWo42fu^D`RUvJY z7o{~XS!w<-N`2|XcleWh-5bkAE8J)zn^S^4^;t`tI@6OPmn=B=;z*qFtfw=b(+v7t@`DQU^FcVA*bCs_|MwH$j||+u5amw>X{z+|>{% zSbE4PjGM;+eM9+r2v5VE;^?!ZoNa52&^iV38{HYVr>on$s-0-r;8KPa`{(`v+ zTN#JXMiIrJHfhU(ck>tu%6kwz{q?4Ew^ICYJZYncKd!qBnJU}f0>m!S!Pm6PMPd5G zQ6C65(pzw^CC-#XR7Y<>eK>Si!<{-vD5GZB_uw*=2$?O|xPMjz{GEdm*}{H;x7iE2 z9OR!hnCjpz@}fA8Y_9F@I~6d^Q+ixbWu;iM6m@=sS8ORN$_(u5|l7{4W-$a2!@oi$KU>aWyR-X-Hv zfd`RW83rd1rRju5eP&S_IvCN8ORNAEYNUH9tB2D?tJr)I`&L&Zu|Q`63DP0vnq^!d z+A>WTC$5JXS!(=}?1_m6Z7>)&Zj+{ut>htRd67Z-kLi0!`cLB( z@{&2G)Hfn}JyEKKNE{mBP`a2O<^H@H2NR2CjFR5gJ2VOAWklaW`bo>8wg1wbKofd+ zUso$pTgM_cL&4Z$^UQn(IfNCfZxU_7xHStf02;ElCGLy&^wZH?$`xrou^C0Bped6d zyOg-Fsq!9JfzC$gRN3rmy5+ORRW?*g9aY7$uPgmp&Mfgk?ECDgD*2x?uhIB*c^W*= z0$P=FKajI%)H<})U13tq#h{t-M#;^HzV#cl;5RGr&uV3geaUE>n>@dP_3E$S?c3@y z<&86X?=wmeT9Y$lq4;s0tmv=wS0I+M-D2R;E`*o@+NqdyM;ebA+WLA9Z2tP@q|)Y2 zWA9~1UM!94-Th%1WZU4p-osWsngAv8;Y24C`;k_ge4fWmushQhBemi-#?t6Dn^%7j z!3bpM^t;$6POE5y#BxdIosi|sj3J}2xR}6awjcdrgixYSPOj8DgS@ItX3Yz*#CcN+ z>wH>|ew8L$?y6izBAVQ{~V4cH4 zCy<(Zy*1))KfqrVNgAG%JX8O;T+iL(qn z-Bqn6+gq_Qt9*$8nwl#eBF{XJ{23R{x#tqIT!DhYiK8NF`QnM86Nw?iuyHox$X+C8 z8TcW4Ra>!qAAYgqWo=VkyQ2x!yVjdUBzobtNhkh?4}~*U7D$hiHf=)(-&Uf~;>`9c zu^7{af}glAnVP!-@R!iBQ=K+f+EX!Pwa(+X?$5h*mukwpHI%;3e;PBW2=TeF16Cuw z)6>tOoH8C-S$-O|164SxbqtLUhPi;4rs|a(x8BA zrI8Jrr;9=zFwjadQp=q1#u)8msFzG%Di3MWC4^~Qm1uBzs8a5BaL5};lH=GmYZwJH z`!#(T8?18$u+vzgWV6~w)vv{EpHvFvPA)U6&JPC^@RiJhM5_(3YY7Sz%d*f4u^`P@ zCk-U>J5?t4S{T1^ITJqJ+{`Fk+=kA-9@g6+QaV&|MKn2(QQPkp1UM(}TXIimeI)-5 zk zs)hFF;Pl~`qMD{#(^*hyEORr{p_|)r?V8|RhQyrypAZP zFgszTN*Rh6oaG)Z1blc@xrH;+ebkc%^3sWOrY1<-3*TG#9|#tdj5>Lm_fSYhN(~6x7SkYkSwTYO;q$i9Nzbw))Xb zO`{XlTH1er;)(Gm#%$#lZ5Ch-x&;$PLPZzii>9?Gxrdm&HGRWpVqx{_0N>t3~N;F|b~M2df@$ z71xKHzKaJr7mrZJb!0XGt=)>Hug+|i|BZe?w%1~ySfAwhHVutBAlF{BGKSfLVk27R zAoHE>`YNxsla=P|4GGU=V?4V5fIOryY%feuJnb%PLgX{GQb8VOg2=TwD%rLVF}2K3 zXV~icPojr%aqh5&K||0^T9#YTC;8LGJ9Fxj+TYeEw2i`_{;RNNhp|>?X4kosoko~H zPi*lqf+t|;3X{y^sGDIag}zDCDyc%KeuZrkUP_OJ(<~){GyS; zxXmChy#AQbaC0K0u+_1&$Q5I?Rylp?Kz7e$z7o&oDph>-lVddTX)6WS;Q4$so%GQl zZ%*bP+~?H$RV}$2tupP?!r3fCN4!2hRBeotVj5&(k;Oom$QSiWWbFg$C9RWOTXqem zI`S?DbBXKAGX;!^n~zo!?+yA{LC;roV$sY6%b{-#?a&h=6a(M6G(G6tolQDzO@QYv$}=px3o-0a<$-?z*X`rbMmC6VX*e( zwpO+u2r`Op5;f6`=Mq%HS8_jCFc_)m_EPn*@uingbHDWu!@c!E5;|V0w~uTvU)(t$ zf@O3N^gKuAP4HxvJEZteKKu&S`6J+}Tt1F9-L&S_wh6?~K?FzdhGTa!%T2sFKZ|G+ zbE)AKQz^b!P2c+Vp3qb`q=l83F6yyDDHM5FGEdipon}d4d)oDjvrEU$p#S9dX>lj& z0X?FibFpr(tHegL;l)kv!Ks+3Q^2Rj{=^}&0R^!(;49zulvh{_@3-8%7cTm@rN@j$ zlQ*OtX8wSWr-I=0Nvs!LT2(_)La$aJZ*hX@78uiC9|sq8d#a-JlDc2Po2WBq??F2e zLNw8XYP3Wbqca&kS_v-14;xRW-f*)YL(Inyr#^fwa&uZz2-dDQvHnbQ^p-~-OBx>) zrUM9Va5JYpL8y}(J9-wYAUz*dld5fMy8eKD_-pGEjdWdP;hSk%xrxoSPD*PA(az0efY zhf#&&6A&#Te9IO$eh3AxYPnS{tJ{+rD^7_OqTOhEfqRkQAtFG9uSZ+0)n5x?k;fI> zHQUyJyrktJOd+z7A5d=o==>gMYXO|JxpN47r!pgH$aifzN2a=~TqQjb%F*}VnDk>Z zfGT>85xV`}UHIQ6INt{VRiw=G`Mn+be*oT>U)#Y~F8Y@L#@+wJ#FY6p9u#b%L-}tU zt-TGPncv?$R^0seEq-0Eq5};=k1ucMVE?g+KX~$~6}Tro1N+q|?EzPAv|bhe*@>#o zi5O7|tVSbEbZMAqlDtn@+>77E>p6`e2v6-=WeUl8lrOwW|G?2c-#@mO^=wr=utSz% zMG$_9yIbWDCDMQy)}*n0DD7W*@F@d@cIB$RM-vAhOJCePh@v&PS+>h;{`#NX=of1L z0()Cif9dsz2aKK|_<-6L>zbTv`aPbZ^pMHHM~Nu%$L1+&-hPXhQkVJEjy^~BkFDG? z_^7llvfa2k?sNmytZpMuy|pNsAp;#q>xAxXQoqh@b7t+z)@72;#rmwgTRc%CdYgfA z;6gdBdtN;8O8W7=XxdT}F?GmP(!DXirFNsf2iF6zf1Vgdo&fHw#Nhfllg)qk}XEbIXQc5AmBjn{Ea_9vI5m(BWMZaLHCYBG2VAU3|bIoaQpUeW6B2g&H4#!k!En}5~}cxXEs|45{PxY%k~ zP@Kdq2PLh+0J6h)7jj1c2VV7jp#}vJd5u&;UF_A6?Vd;Jo}}T2y5}Xt zGhdQ(wQZvFCTGWnV1-G4TAUH{k&nxHm{rJe&P}>+l!ARUU;zj5H+#wSBJJk*8{8K+ zY<~T~FV@7TEjIDSWOE_cjyY>~@xy}thf|2K$hq?#p-O#}0tL46=X9F7;hN-*m7${l zF;EmGaJf!997YF6>gNH|U>tUz?Q$c1(uP{SsP^o;)d{BH7}k;;hIs*P3H8ievOx_| zb0Db)8LSM&EGBHOZRvfuh3pQaC+B-o2XJv@TY4@`4L&#d_ho7sfm(OpFefgO3-0R& zap?_o0vEs|HdhAK=at~QrYN#D3nm9B`JX8&zVtc(%11~Fs9nu z8IfEh1UIitTs<}aMyoT)-`Opv{7^Dzu_m*2TAH*r|0d-hFL2K$*GY)Juy(K1!e(`+ zb9WZtF|o-sq2RF;cm|IId|(c#t;RIe*+w9<5;fZcmF72BeD zUmpXcl*kO0`cXc~rwhU>j?Ufr5~hpUyA8#CfM1>8TITrJ4!C1oT& zzxVgohrT{&b0It@k7>>>kVudw)Uz(=qFVR(wtHVF(KXG1|I`S^ux>NA`KHE79mL6lW5BE{ZT5^$->paaD*-Eb7!eeSW3S@y*Vb0}wg9{1@ab zEZyVEL#JDrLJ$eI)@dmN9U}^DNua2Z;Y-DoJA2lwXUq$hApMi$;6W)@B3^1tn}ko= z9pKSL*u#O;>O^OjDW16@R#H!u8vPAvC<#p~x$6Gn4dEkAXoR@A=FyrKWufh?Mumr4 z*~c-+$jlhjL?@D^%KYn*`YpXi>Cs77LN};3Ml72YviZW-d<=5l;BPWsK~f2T!O6Cx zfMcR+^lHrk+5CHv_TBZ*SWG+D`>!S1m`*ZAf6VIWnE`XS-v%7CMlo>5>0_wX1^AE) zB#fRP@k^S5bVaKs01(1H^t)DFre^~O{QTX*>LRc5>SnQ{1;c0q4@F&j=~n^8;zp@( zbqq)D8_%UAyg8%eH%o7SNpZ>TCi8@p7xn}2AithT00)vT>$S^AQKB!E;KOqgfmhk> zTuP--v-?t!*p6Cbo0A5^MYCT)RGSs;6cP!u)859ry>`ie!|rq+8y+ zebulmN$&Vw8i*$Q80k#?87&m)1f zbqvoFRFXAPiuxlz9R#q@ zdS1#CW+;YzM9Q(GdMppY6GfwbjkyKx&JtIar&^Zu2n^DpTKQxB!4Dq#(MyIP0=m`R zL*G7aSF9HgJ!Z?`9WtYcJ(#=4&D3()r(l`UyH}3a*&3~15nC?xD(h2Kr~QiXMHW$4pVZR z6J{;r@Umv@p&@c{Gq2?qz1|;c(h4qbA@J9O6G*vcuIa5ihKqs<27^w@%lu3>7o~ zQrV1KaPp=R#YcB3Tm75d20|JMnQ&#{K!>7ngjyt3k5^UV6FQwDUkHYUg4!HEf|@3l zg%Bf65fed0#hMAdDSHll zq1X1QzO<)(qZ9X+BK4r_0E;jv350&Dl@dewMO}-t+1f);NJyLdrBGs!sCFH(yw0*?;g@9k;6B8sjZmkuJl-ql;B&s z>R&L{K0Teg8{_OJ2f-}QOhiHEpBh8YJbCgu%VM*!q~)7jX`ke80c2SF$U`Rh8};%2 z}QG8>+MfN|M-$^Vbb!Cs%9OZCq4gCe|m`rh8r^d(5Cw8m4`iI#YRS z%nSj&O%9lOeDo<-Uvs`1ak4(`mqXW@ zZe)xDr3Gh{eT{3Mc!Nd^rps}<3yX*CHSs24gYDPx8Y_7_F|aM#({cPh)Wp#C7PkVG z^Sjx|*Uyh;>nd~e^B{?Fx=OrpuO4DvIr~72f|0De?=xfSs+NyggcB!u9htz_I*t_Vgdk@r4A2yAhR3A@r^##=5d?$>B>)?mi!FU&DNQjC$g6JOnjGj%m)| z?T0pDtw-6DLNZLJ%2h-)$@gbi_27>-WgVCsA=l5?c8pk{4qo?89#Ef*``ExNp#=_!>10em|ErQ2c(9cZj@%4doIhdBbwDaqdKXYW!IaZzI!u1NnF8=*QHt23d5F z{}o3-kk1XyVv@?tgm#8}5qVN?<2d;ZBh^>$x2?PNA>C|cPLqhnI9~musjmGr)5vW+|82mxyiM&XBIZN<6Jry4;S-0N#lTUx?)q$LJ+ld zp`-GND-HME*WYGr3;V8qxTgaH&RBVM8rf$H>~6@4w$-FeS;GFz6sei__36kxINwXu z&o3}O9?)M;7ni-PS?DkkADfvR72Q}(Wv5Y8S&+ZM;c?j_WX@Uz?TH&^xUwyu(Ucj+ zj>!%hkvj}zW_>-!Gu({ACU@p}R3tiFI*hgOl$rXNYlvYVc%J(EF+!-@Eh~Eb)^*>! zbMo1IL&V4QLd!tF;h=@c!yN%mQrXw>?uN#s)Sa>At#=o!JHq}fUR~$Lsq7ds zO8Fk+$h1G?`{MR1T;0Tn-j8^^y#;g#wCayPHOx47xa)h&5Z4xG;cLV9HN`o>EpF39 z`voo@DHAU6#}I$p%&898a)Wz0@{PPe4ERBte5i*u5BnP&M2N#z*91zhhqSN3V|q<1 z)eJRvQ=7&%UOey7^COyr7ucrd$ojrvI??0a>nscvF-^5$q@>(E%)`}tdA$j1 zj}7{%bVXcgg78L#Tj}5KS_tNpF_px-J{er?4faoNrvm6J@JCnhb<}qdAx(*r0a$aD zPprMc4CRgO@=pGJeGc7+E(bQxo%Gda^!Jo3aVk2Oe)$sa;hgEG7c)oo?pJROTR1S{ zXbhbuS$CM#zsy?_27O#Ld!hPl_K?ZNY(8!ISvE5hceL3J7G!c0<<3H`Y)uUfHYhsc zmrB}(UEVzgaqtNYE>_dK!(bi}^cr$lYjqWttvnX;XsPEgv}geQdVd*a+T&u(EHTnq zra&e)L%Gkts*M|lPoZEdD zMf3GigzfC)N%>J1)yzyT<-97VnI(yF+|AW`^zRQUQd2kzPd;~cl)o&;?2V4K$+wbG zF7;*svzJ2};htA=75lDbqwVbk^AGj<96lAKix`RXbwkKmVx-!)(^oXs{OoFF0*dws z%WAiLX`Z$uP}gqVuEc`{+u?EeKP+wRQXYt4&OQX!yAgkI8CVCt$T~betEo6O#x+oW zG*p$CNRpF5?B|FiEEaMD%kOMmfDdrIccMUqT)?WgFInYM+z_pR?84SHGc|=;)~d!B z^5pQt$JO#q?C=f-zk`@yVLoj;QKp)VlLS4IrQqGUYxCuRzn{Y1e0?{V%;h;xb3$>s z@RNGw{Ss6}JjU7SprdGGn(t<@_jqm+%TLp-V-uqEro-Ao-U+mcCfKc1Gc z{RRIb*%}^_m9d=IOn^`omD9KQ5VOggzTA~Ik~ppDQMY2d$an^0uIhxua99=I$%zv0 z*5Q59`|V3~!zCR<*?JFXBvOve#>CQ1$lH5Gwy$WaGN_wB<1A}5W0mD<9IfjD-I@M; z><;ip!b%-1%Utg~$j1wWT^36ils5>6+;E3n-}$@-d}t6?y7%@#&RZkjRO=P z#LG0P-VFryuAcR9=Y!+d)OUN_b0JMcM$hi>X#}MKEwAjV;Cym&^qX(a^08TLsd~TR z5I?8dD#{lbJ1a#4uEyIR26)5B0DU&9NClmjio=>F*nPYN5%6gXUT$zCr1^&ieQY0c z?PH%5W&5Lu7$JdnXgb7f)0T$fm1>NHGL0??mb?t#Cay09SEl9LzkNpr25{_ZCe-mU z;N&RSr}`bx+`$zew|SIL`7G&^+xyZ_I<5HI`r9kb!+5v@+Y2U*;`YQh?Gin&_P6J{ zg#JWBZx=Y=Q;j+57EtND19M53$cOtOG#txxfRiD`^3`A@tP&zC3mp|V+0XI`D^yMw zzSRuJ*_z-j8?Yqv@kgxWs!{b0twUI3 zVy_hYOYFixGHY7Bregm4k;>hDo?+eXMf+?x8MGh5s!Ks8EbzsvZld-^9F$|*B5v!vrMv7;4DPry6gzVF z3VUh)LI>!SGh8Nn6L~LO6blu{ZD|LRe*cH}^e-(>$}<%DQx&8@1RxRw^-=P{ajUD_ z(O?e!5#<{yKsAQsC(l0p5tP6HWJ+r4S?83|k6eYo==5Qh-zkbWxFH=N^Dx{%p+7R$ z>Du2CHh8z?{ejI)5og=y$6#y$j-(9#P=HWL$ z)96l>_~W25yvYH5ukKbdNd0|}c5?{x9+o(nAS2o>mUzkEM<@U$0VIqj10_R^fBZ}X zoFZm+`zTs=$Do`p=&xss|D_KT>T}*#5<#KD8khLweQ&RB1EaxA5Ys-_8|S_M>N70K zb`U4w{k(+eiDQ^3Lc~P<+Y?vqbVbIe`Cv*F=}n+(uI;TQ?TD7DeHB1wvdT?qn-Cn< z8vr8QFF#Zc1s3jQiCAKu!F8Q*#%-CB}^e}%g2=&aK+&@mxD8ZrG{ zbb8aYb0<8K$+m+-Dp`$a7SOwScayb$WB*~>g>8LQbH|^p7ucv$^@!t98WC|2Bp2Zk zO0Ip_L3#3R#%jlR3yy=6^UFe&O!h-pDt%paG0vZaG6My0PX6uIc9uTcvrouI;m?`o z&0a5jGagq~td^6I%40A7+-tLP16{Zqd2P9eZ79wV4_>MzbjY5P2o2h0QI*ZM1IeP| z&G2m$7C)SZ$qdwOzwo^GsKZVeOKCqUM^P+V64+eC=}Py+K?;{1WO|fi_K}$NZnxX# z6nuE4E~EpBrCYuM-w2GQVj9)~7Tt`rt+L^99a#trC%vCYRu67sEo9p6E6xcBTp{5pA4uuQMAJ{MhIVYsPcZSG z9eB4H7sJKVK3_f5%kucJsXEbHn0xnJc)#xRnQLlaumB2cN|nZJh7^rgCTZ-cTpswy zHD$?8EVJ~I8G3=WiA+bT^^J`xD!`(HzYORM@^;wZtfct9-)Q9#p(T(%lim5^fPVL8 zm&JHi6JL#=h2^#KXqw_RbSmn&q|yYvHkXHF9W1_UaFpSv`;{z^xDIvTo|p3~_VBYl z`2x>RnR%zJpV}%{dy9k2Ua^t_v*fiB>KTp>?QIFJ&&PrS`jP|Z>*GCG{WvPi!ID61 zVLu&A#`Tyq*kn6aIq1*O)Hz} z%e|b$4xKH6fa+8NUvFX2Dr7Y9m+cFq0{-ER(!sz~PYHNZjh9NnslS332^|NF7_3uI zt)=mg<`!Ecd8i<)9zM{*4PH(&v^Gz6^AKTUQ(PlrNmJo?WHYYo(;$E2h&y9I``huW z&j|rV>^|dVxU1{MmT{VTpfK819jnj-tYty_{x6Re9+*m{ya7f=#jM9dpr!%Hw^L<- zJtvF;MVNTJi<`GD49@z}dWY%z{zGQkV^x7g+eAe9z2`?%ts2J}t)ARENV%1z%;rlr z!XS#FMpI4Cnz+emkJ?M8vnDT$w4`fNOs`hlaVD`XWg>KKP)BZX2mP6?MdOunA^C~4 z7N^rk)%dUHtfusy$w6eVB{b_)G!y1>ZV$PmwOSjX+rFOMf2TUh)kk?<(ixMuZ)<3O7K96!Je zaeuYv@5Pu1T}JbnF8kx#G9lM52%EGQX;+VDdASrDQz~1FYB)GIXXE*8Yf~WHEp-&y z2CjVa*Vr0r1A$A4(^I@oI3C;xUdIc?8ijHP(j@hVIdm_r2ISG}8?ly^C0tiZwV%t1 z!ii{=?X!Q-aCb*k^-$t8%Cu&l}<&vyJ0CQrF4gMr!+|2ARQu|q6kQL zm-M2$rMtV~yOvn@{k-3J|LwhhY{r1W;Nm)So->Yl%yZ$&Q*_u}hH25f(i?w9PDgKy zS09b##D5hN73GP%r(>*%Z@wX_N3+^cp3}5FRq>S)zOLk%;Um9pXdjNU!NF!IW*JX- zh?PfhcSnm!7Rv-(Ag#AU4a9)BIb*F;_S&%`o0q>jcCgxxc*XF>g*o7?CaQwC87Egc z6(46&L>Hpbr-@{tpL{UuIwZ9T+PdB`4azrP=IdIc+sk~~+%>6Otx!*uzp$_q63@SI z?r=s`rF@J@(aII@DVooG@goTo*p98}%^Q`?1Un4gvQ2o%g%T&(V?=iJrs(sYaJ@@ZI(XBF($fqHr1Vi`RJb zD!G*Ar=Fxt%3CW6f%Pt!SQ2C88)ISA6Q2yFf8;{ z-;kgUt?Kdz%7-QmewjyE3+%ETOTMuPfb3AS#9LPXH3eyQ|Y^L zKzS*AM`7H%jv^sZ3ovO*!P@ECNh_sq}!mEaRO2JLW_iN-}P`DFeXY4 znm8|v-L9CjJ5$qLI=-s#zbQt=Ye++8y-Y4)@|xjoc&2WBx$T@iwnS^hd)qoM%9DVM6@e^Ur}>s&-e#feke~H701XbQPUzjE*_-^yLdtS^Z0#IW#~gi(#4wmUbL9Ah*ciqSv}m>Tj;zUf#b$-u8|Rk-PP@MM6%CAnV@haJLjzOh5qsA<7;&zU~*dQGdnd7i%S0ZT8+GBMALMi%8#igd46sH`f~ znN+G;$;M}VG%oa@MFJ`3!5ihc8VphlAng6FcIoh4wtDO;)D5{lKoFgmv+Lf=crdy- zg6GWO8b^(%ghd|_#5}UH^=u<3%G-mLKqB4v1UymVM7Z2?&E2B^nj*E-a;g*WRR36w zY6q#Rw)*udO8>L(<%UB6-lfihWuBfG+sPvmTykD~$WheXBHssPkQ-4uq=fdZg`eky zz8nv-EgXeDYs+5#$RA72$wb_Rm7yt-YG|-1!@?UoWWE)b$ zw?`i7gYh21l`)a0HGW{G+ClDIqtkW0fIC-OaVXI+Zv%(bZox>a@6zQX&^Bjk#rk~8 zwSgHSs>ftqJ9}a|cNW;_hKe-a|3u`CCnH1ksAY3g2}|;miZx;c^1TtYlB}@pXD#z3 zh@!h=dEV9kIZ83~sTK!-w-cdvtX{!paZU38UWYQ!Gy*CG6L*gCWPkb`x;++R>x^2r+>*2z2 za##77*Mh@H8vXh=9Q9_JWq|L_*lTcLH71&U3Zx7O#f zRD=3gkX9)oF|h;V^B%CD4!ypiDGHCtZ7z3v_%tA89B(YE8}>k}EX8Gv4LZt4v$Vn_ zf;?9^5+xJ6bCOa;F0A}{=%>%_H|Ih4tlwYsa;@2A)?mKO@FqZ7|3WD(Bh{-)XOS2i zs@t_<@z(l2?_+^|eqIlk4cx=YsXX~)i|$_uN!<=Uk=1Ww-?6MmlQt?-EZvO)UXmP{)u!^CO7M3eTTl$?nrds86E^z@^8VHLtaEpFq;HGgk;56M4 zd1{ytgQ%zPhC5`zvqvBqU81OAe02bpHOIqvcIzK`BDYV zmQFU!T~hV#hzAClC7YX^sx;$AV2Z331t9-F;H4kpcc1!$NDIEScXMX3VFL60i(m(8Z>E9|zpG4h&P}DG;0n6DIuxOMXAqV*sME_V-2& zKLg!g5Aq~|esMUta;1J^7e8BSQiIM+TpLpJe+|3=+JMu!!BiuSyz|gJz)`5z08Otp zO|w7H-Mj9v4Y7lLtKm%C*N2YJnHOfq)TH%$YJhYCdrH6wAMzZaMm)`Wa1eB%Yj@C; zb7Ve0>G}v0OFcGse`XOm84d867+2r)xfm|3BR>F3QcA8xQPHbOWM>nfN@VpX+yEQ|4)UgX1(>kb zCkpAGq5a&~gbZFWHx*JxCLsNQvDC6_b+Xrh{YMu!YZ(q!&ymm}Tsz#D=(y_4?D+xZd|<8#!5H#0DBdOr~5%LBzqi?2j#i! zzl>N?qPfj>V05;b4+?PEJ$%;K?=g!|)1Fm-{d=KROqh_x0v@nJt~O;Cj{+!v(HOj;#c}6dqQv2iPyc&Vp8>CPma%O~ z{^zK60^nB|YR~rHqXO1c%@0r!7N$mHKY1y?mf1TB*f&Cg_{kr-{I}+SU{3IHV*}tk zivQ>!3X~7#bnmIrDyfN*zb%Vkt!$}q+^L9x*q&61KlVxXR6n}FLxlaxiD)grL zZ(Vk!U>qYXtZqC?%z*lPTZ)K%n(H0LjsB&{{%;960%g5jO(Ir%Q=+GonTtB+HS&c< zatWoHdA#NDzu;NO5mdX+swr>Jw)xU)gez~2)?=mO&(Z24VPo&nPSYvB>8gw%LZvTS zs&wFSdV+7$$Td+!U_s9Py?O*hQ5^Vtxj`Y&`8TD~BseglZC|(Q`Cwq&4fmGbFTl2k zTdL|A&oK!HQiXXesK!wg;~^TRhegQ;xh%w#PgLx>@=R#Xc?!VC!C#WI68$=l0P|-C zI<8g)CoI;K+-<8&kd zL|{}}I`n&3P3O==CGf{ls_!W}!Pdpyts{fEmCyu>)!k&cM2)Cp9{z$?O#1D~<+jew z-+XjUG_9HpzVPLBK` z3v^DB9=5*9j35(fwt}7R zp{0Na&zhSWJ^{SA%{^M)JId(Sa6`lF2LQHKY(4yfA)XsHnsg7DpZ|LaN1pxeOo6}X z>+j!s&s|TH{+VmYolE#jv8oH~Z7wdR&zS_F0HYj!Xh5!;aQ2>|SX+%6`NIoxJ$~}F zhr^kpn)q6q4525fk+v8iLf?JbLd09TSkJuji3OU~Hhbcr9}Q%H%)e@Ej&RtWvB3at z$b`y=Sv_i*OZ1A2yR8J_Ip3^zxv!aIH2V~n$TAfMsa7>zI+`kQ6NXn{%~(Uqq)|uf;yzQSdVH6SoWqVGU||~6lZ!_ zF=oGZU+9j>BQ6@nD34;7!e$VOi8tqXK}Ti6vTl_0Go*MV7cySQj_cZEfwxwk$PHS@oZ^41Ts!9XYn} zy_r^7I-HC)ovh}rIVKC*bR^u2h(THSs9I#z=4E%1rU(Sq5-qV9AMZXzFKj4MjN56^ zYJkvFF1&7?jQwnm4yv`fn+{+)a2Kw-$q@y zG)s1xZj_}1Ek-!T%;Kv(5J{*hP>fp?VSG4w}7apQWkRzK-m64!2QCZ9~<#V4G_61evW2 z?A*yiOB0Zf5nKnC>E~GsFtCzTAB%8Zg&HT}sxH zl~j7!zYI>G_eTOy4kt5|5nM9OJDsMNn%=YTyd_yX;^QP9uml493*$Y+Y4=tU&vsMhr|zRLI0<$tUt2H;rI3DXX=aSz&iDPKN&bZ5iD|0 zAfw{H*`N_kok4Y#V5hxi13;Lj+8e~o9giBhf86tNrJC?-41%z=s3f#hPOHbQ;V$0O zvQv;`-LY$tsR8jIW-8+)-BsR+MC*ygxx+YB=H=`=nWnr5Rhn+W0GU=_6L{KZOM8pf zE764>fl{g8-Ka2Zbn9;0#3%T9GS2p?S@OLM{_%%hP|4nWYuRrsI)|Y9*1FR!UHbwLOVI@bCTuh)_`KkSgfa9D5C8dAenl6kbv&zOrdef+IDNB@*%N zThE>TVzCRVb&QkvFwdsg<8HSE7OwW~t_jnm_sQHf+}kXVnP(GZRGxfcPC1E2Ywr*jVlG~>9m4K;1!}{Iv$@K~yijZt zo5JuMW_OA(!ZG-d+!+UBjg>&6pMl%qCj;_T6DmYEfSX+7npzbdv!&}RfzDLic%Jny<&<^9H|`f#s(gI=et z^2=Lwre&>{fe&w(9G+Gp$Jpns=!QmwDR6-cD#pe164{kc59QBr4tqd-2Gx1r6SNB7 z_OvFleV6P6J@A9MTg5mUc_W1a5@$7mt()zM~bxY^G)HnpmP85 zqNOnhJZuTvUwaie1@>aG)VPd_^;FkiHVxVI@Mp6ha~=qMAnEhqq8fL#Um0u0sj9iP zGIGxh&>pzHPzD!YpqG6^J9L%Q<~v&3Go8(hu*NZo*coP4Eu4PUsFm!$?P0aQlS*%E zuTsm6x28oJtqGD7C&9_Jj`jopGuv0MTS83~heLh^ET&_$ela5L?RDpfQJ`|biq3XIJ{%O?2*a+pV?DV>XuV9ry zN__D0KTpUJws|X5%a8N5$ws%h=%Qq9j5k5l3HtM&A)NNK+W#A;-N$A3W$%+UJ27K0 zOfV%TQCz5yh;Um_(|||E-mDjvEYStBSFcxGT%q;MulV5soISGXZ6OH}>%R1S#&_Gp zydd{M?qkPXc0(r19g+nVLHFb>fa0zUzZ0cx==iA#Y+cIcZ+)c(8@&Ox2dC%}Cn)Qx zkKJT5iYHjH+``O$9j)i*tDQfH``+d?7ayv*$lEn(!1r88*p-^$Dq@3r&Eo<8)qBs+ ziH_^xyM7%@P0SYiON--04kzP_C=WD6wHm+TcY7vpm8?f-KF{Dw%M>W7$LC(nx#nKQ zBCqKI^D^I6qbB2)c9oecylD`~aDkR3yhxXp5bkj`fN8k7X1&4OlXwYh0QggCHHOa* z*cRn~dyAkJ=)^_`^`)pBeM~a1nGksi;__k#Oqg>9P(BnhU9Ups325Xr4b6QV9O=y4 zA~voXK~+Dn&uT|6@wsQjP&DO2R(&-B;cqs4`oGxl!aRP@Pv!o@hEGm1`$>jJ<_$@5 zK(_pzk3Kh#jssIPllP>b!Krgmk;SEIGi8cuKVdP&_782Rc$UN6Ni z)?c)+>PuRPX!*MS;5GdS^Gb1PCatL0ze+*)@ihP9$KTwQfTsa|{J#4eti{3HOPZ}`Bn-)r$MqW2;c*uC`yxXisec+;b=|D^$JBc-w+keyJ+yBq>cpm8QI=gCTEry*;CCwk@ZHp>JmI8zZ!bmNWbJv9CJxz1;9@n9zM+J-Un0U6gA!OCr7ty%hyFElIUD-h$# zXvihPtcKan0>K)22;DNjHM$gC`h|5dWj;$p)}P?_K8kBW0W@VDt;0Qyjc5(PS+Ob}qd*M4Ft#aAk4y7||K8Mzy96$LpyIY7Ycj46 zaMtPbJug(;XRQkl86OlDAVJ+S>RlSY_d?kRh&{6xSDnYJHT2*)iEB+z*6W(Hl4}xx z1pCE;TIW{@z|S@EZi7%|CcYT8{em0smj^i>f?#aqx&9^dH{9QPm0C)WnzBf zmwJ2vr0}J5HReCU>pJQF|8QOGWAF6vdGuu%Wm&o2;^ z?h9IrzB~X5cF}@7^lw$Q1Z+3!2wj{=;Y3AaIZQ}(wR~*i$VAgVrY&xkOTO8Bg(nP0 zERj!DxOCnXxoF(H^m|U;#&9lc3Z=3gzKQ_l|3o+EYIhFd$tNFglBVx1=6V<1I(kDB z-}+ZlhmeSc+;9qe?+D&G^*$SJZ;+yTDYqR8B=^7N0o>X7@yX2Yc#}WyXBfan8Icqz zj(wD~{@_+e5W3d)ot7c_ zbfx>Kp1K>$(5Q04$iin-(+BlnW(6ZeI)?EjohZ3TdOP)J^6S%CEts?OGn;TC=Ch7E z|0LFppn{^xwP-|tOnY`XDN0*_PVt#i^rPFJ$)MiMRVLVvnn0EO8hyan_Vg10ecXea zTB5VW9ul?vTV}~ef~JRGqC}?CEvOsolB%uAeJXG3acF7H{8vw>@}RO8H&oW&N>4P; z4`FLWIi`b1q3#4=E<=f+Vc}q|!l7%?zcwzfT*t`-U}Y<}h;*6GtfODB5d%ojuu0)F zTggX+BWw9f9-tYJ4;$iV>UK~A+a6`iW4u&z86WQ*?__N0I2D`MwmGFNWA4wt2{K%| zAmB0+qEUCucx|eFUDwa+X48Ef`QbzGGPG|u3SNudu_4Q9!^qrXioMq&Lqkm-Ib5D9 zgyrh~_53*_1ti9=>h~>tDaI45J0F8`o~bRn?O^8wQm|Kn!QUFD>v10OrC&ev0-_D}8m zE9Zu|0NqlCYvf5o4tq-0ZCsU<-e>3f^wsQGDaHJJWPO^n5gx#@^6|RE<*{96BPC<@ zcKEEAWkbHAQ&VN3&e@^D1bBl1xK-{aJl`<#pDRqjRgIblC3>d4lOA=eW_sv;K>j2= z@7Yj(NRjgqC^H~sVXzF3Zp12!7!U7_=w#Z~XJYyW4B>#_6NfvZ{^Fz%SJ>sFJD;_Y zus`&%HjnNlu?Rw{B}6UMU{F#lXIM0^17mU1OeGhD%`M|4*MnpN7X+kEIM{McJ)vPz z8LIl^!kd!UkiU228$EP!*d-U3v0;TEiC7{U^a*0(lyutqD|$i&Orh)DR6V7%uc|;= z1F!$NIS@AdlG7hAmNx6Jc@_7mcP1r!JJOt~1)<5D?svs5@L0U-7v)!n0loI!M|7xQ^Tc@D}l3k_(+PQ?4JG zbEDc{^c{qOOXHu>>xy4hV$k5B&)$+z7JY!yPiZcsnP$p&LCUdPs48~P!PZha-U_@l zm>b695E1AmC^_+*>3`IZ9k{$KuiY4dB8KuS;C*vj@154l9#@M26{VlZzNdyET`Jad0k%d4-0hlt`0a%P3zDlSE}`Viz=)Sfe3`n=RTG1I{2?te&;;P>b1^4J6EI!su7PDF4YY` zw|0vl%f#K<@MXN*Q#w`lGDO}Cb17|Fsw&p=;mp{|WkTD_VmjG^XKhL_js98j9Qhjf zB;Z@Nnc3vyVco7d=}!UW&(_0C>rVCh}!4?aS9FB{d0>00*;`)E?hm%(B)l!FrNFOk33 zNeZ-7%S>*LzU3hlzL*n`uswMOeb~3`7DLEZw81HKWy-5xEn^>99hn45A>8EiB9u$F z&-BeAB4A6W)JeKyS5RbcMOm>SrD z^+m#}b_Ncxp-eiCSPs^OLPf_!N9Z+P1mR<)l}*L^xAMt0Bw9YXTXQOhxg$j=iNcLi z0+sE;tqxFl#ZT50!-JJE-v5Xg-*)8`y?KGY-D0TbvmaPH;mnfVBtt?x(r*U|7C#~9 zsX08Ft>pr}_Gyr3*6B-NuVPq(pI`Zv%ArXD{V98H^A_6t7imDZ2_Owns>y_ffDGry z@^&`KrU%zf=f9P7fw@ffYD~GTD0fXhKNh?wI2;NS&+QLIfkR&YK?2kB$CrL_c6snH zaP4+v7^rF16Q5Dj_fb2j^MW`$qI#wc4rjZKfnW5cP)+E!z9e9LgC(li5%7EMH?meK zYs*_&16$jcsO(V0^2^2jpw!gfN9q&e*3wl;;#|l z0+~wf`?|fDV6vDWl*2WU!B4}6ZosCjeTlFZFDKiy9zLcv;;ECY$1PEoT(!xV@_0k{XL>hg>YAUc(}Fcf`D)oZcoZQWZbR5WH1>j?*6AsJ5c&{c5i9^2Pgj|l`Y_>FFkfPn_i4j7 z7778}pUeKs=YIgF1Oy2H4gELg;oeUNB>-v4*n~)e|2_ahhqv&*o$sG9NO}(tqIJu3 zqW<@RcQd6h%r6=J)zm9vNY0kZ5dME2e062&GERX;vKtHxY51hnAYR`-7L*>P@h-&J zoE^WjM+v3Q9%-PlRvfu-P`}}1FJJP-3;MF-!d^mex>Rfb-8J;zQAWBWP~kV?hLTux z&9CAWwuZpfcKobY#zk4@E63j)euYFW7r7U(@82s#sWojG>d`c4ROrH9o=LwaiMTI$ zfi#&2am0H$2~cE!I&FZi@7WV8i0OkdznbXIKaNVh-{t=4$SC$hasayE_YR4@(0?l9 z+#7EG0&RXK#J^x|9d6a!E%Xcuf1n3Gd7Nyk;gXxMa>F-w?e1I3{#N$#LYUp~oWiLt zqJZyBW1dPZg77J{`H015hC;1Fs`uD?-wIz4L5lk)W0UbCWak*$SJ_PV)U^x4r?PCKW6-D$08l-(f`ak7wXT8#Vf| z!=D7bF{A*bhp1C4_-s2V?X_x?^SS2{a;Bus+j_XsO^&ACTs#2By(iv_9=&rTUk?p5 zE_rW#F|=V&BfCCeQB-o>D8>%dW{-{GYF|b?UwcbYWU_>@7;&IoWxvaucX@0jaMw3} zc?47mKmmknCM%z*Q78!9znIVs;QgKWP2HbdF7~rakm`zU*#X}sm2^8qi|a+IRRJ_` z-jgf_8EUkW`U=X|FB2?QfaE;ec!s)1wP|5Km9cG3{wZDM5IFtdy2}rIV%5r6{LU@+ zhCcUv#5<)PhHYrrc4P08>DWj9f*S|qaw4t{MmrO>KP-GE^GnX=fAjEx>_&abj6tVp z-9@3q2c{~vcu)I21lwFS^~nx0F_dcp#NQ6jm@1Hw{E5bg^u3e#Agc#t#j8Aa&{JJA zEGF3&Cm*pG_yB-MPt}`Pbq<1o2us#JHhPox^7l|T6Ae+uTF<)~6Hyt=Y%Wh3r9u6J z`m%F6xQ=3YmB^-Jx34qh@Z@fSiss3mQqJH^*L-|-bP7)CYOO7p)5j1C>RTdR_(pCF$yzu%H!;!=R>{wtyVGV~sNVzU|Q=9rMU2QuXfv(ajkV1)6yU(v}Jyc@1El z2F{ai`)A+LMl=7Z%^;&zEQbBbmZ|IEB1;Xyl7N9>(6@gfqYq~d2YPO&@P)9Vlfh0iK*-9mrsC@USuk=a8 z)?8(tsDcMEadPtDS4A5mualZh#1eZkZshzDk!v1z%QR~&PJ804xlVs*I42U=j1qk( z)^z4?)+UW?Y$Y!jX1gs>3>fl%`Y0<>OzoYiu79xjQXIWB#!@T7BgXjT;AI^t?BwK8 zAE`}FGc+KD(>jCHkygtcWb#z76|p#MVg2*1ou{dUw*moy8XsTo^k$_gv$*dGq=0IB zu0n%gIqU5aV2yIzaTQk=6ubL99{1!K^47L8dE(hRcQd7wsXjW@CaVR zLHrTw_M?K#*UAY*zr8n1KuB zn=~~>Q-RIyo9QKS5o{9?0m~x5gr_0XT=r4&iUbuMLi|IKy<=9V{sAL({uequ4NoCU6 zmhV_8i+pg0i05rEn^ay%QfF?N@2wnwfVVE3!5WH@tgaw6;9O1^sneNL9a3Rg>WQnwJv=Ua)V4@W(n; zhcAKs;7EzkJIDQo9#TMxJyStPUq&NIk1`LKnv#EcvgzIg5O8&=opn5;x<=L4gMe;w za+)5XB3HBis;v8YtgMiWH1oKo!!qwHJbxQhJr%&l_kQJ7!~gxKdnaNFsKw6dbu!5L z-PyfY5FZ2h3Z>@gxBmJw%CA57YH7&-8i&D zT^~lhAvHr*#Y4ztqlo}hQD?oLjCg0ed5p%xJGu1H$&qDY@ZCH^Z2se7&Afjz#AKdE zCPTMUuX?q$!DRL2`k1WAgJ2tlIW$5%YOl#JQEwip7K&vkzNo#!#R_l|#na$~BZTl#X)*&a;P zAgBplZ|^l{(X|mXi&$9oL-+@f0V5EYTGmr<=XLi2%O=j&ZMM1WaRJ zg||kc369X^F*iq^F3y&!KrZuqk6HcT#}0(^)hig6la<@co!A<{@G5A=p8SD_R~T5T zoOgk1THwx4VAt+~JP8jWCmdSboea5fS;t8WJ6 zHF(2gLpV@LzyvS0JZl-X1@JthdcjhEz}KUPJTA}C7>v5yoLL(W&FV#g<2+Gu6nYqH z^u0!@Pz9+!Bt6>4NQa;$pF4Xk?4i%c3%2VmC7P-7oD`c{N^1yiYNGX{Qi*X4^{+#ni8o)TpIOO1k}xJz8S{}i0RsR% zc%g4y;g1QpVG)txPe}l)rUX%C@sqsnU*4K}=RVOys2y$j(toK(J&V2+-?2C<4pOulDgATyoWbuM< zSi4=gpQS7vN~YK3XW`GaHa~V3jsp^4d%+iHHa;*COiKp`)d-Qij+gKXtxarbt@A>_ zH67WCS;*TvmUgEe_!{NeQCrYi{oj>#)yDF*DaW0pwO-qr1?7YJ)a!v6vEN8|J7^|+ zPgVuab&sxIQ#4%MU>8>Wkyi6nRk_q%Y*VwyAIIH_`< z`+S4tDO7vS7BGFAUY!q*u{^LB5N#|6pv)>jZbORf;LIt`sVUFIo2$1>06>Fs4@j%eSy zNMB^H@`yw~lLvi*PFC$XM(e1~m$B)i2b`K!+WiX;P(wWUW%|qhc0}Gv$pm-`CME*3 zHV1EOeRFK8CFD+d7hyN&r>nqq2+Z(e=^633Uz`|U=2jWxL>t-WM9=fXF-UqJ1t&DH z@>a%d^t#?(wLqfNq8(SE2K^m27Zg_3Fs>6fWfO9zZ8ilvfD5Hv3wuxoN?ON!4|t6{Du`tA)_VO@sPj zAHf3uvE*tAKCANA0sVt6 z5I>ymd1E8QttblW{Jgg851i4X?hQaE)sAXhzh?CllOP0&hjXF=^jZHix%g6Gat_uc zv432|`^kyJZcA(Q^^9CeqDzmb8E+ZF#F8fdPEEUU~xSqUEv z7CQX6kqRJ=d0_gt(wki})L7-_=&nKofj9}&ZGC!5h5m~W+|H}ooZc+uh3`$p>knOF z;qqjFN#^G@t84#flI@s4770~(^$&yU$LyQHkd(N(Uun(1_74Er0ht-X|JnoGWB0F( zje*?5QUY({_@BkX7Xbu_RVm@eB|LxiP5+SQK<=Z&!!% z8(^m1t{T)}LtgY2LxJrJ_4eQj0jz!z1F(8}>N{puKeSM68~<> z8`=$!RySm(zuSj&1u}y`U@GBj{~2q#w+=9C$RGL;BBr^pQ{-=uvaG!yD!XV`x|C+Wpv=NYVRX2Wa{67y+nJx(a-a?2E z?!yK+{S54n+5h_h80R(W|IGLQ8<5{$_qNcU(v@e`Ycc7SUjDx4PT->()KrgGP@K>Q;v{jNPj+ zu{}Zcqc<*GFqxofv6HdRY9x-GWLTe))P{mItZsfVnFkzmgvd;SJGdH6_moA=Ho=w` zO|>z|Us|>h8!m2K)RmZidIg0^@ySrkwZ~<3tfsB2=dAVw-vprjj=V4!NFjSug1mob zt_~OCEaX*)+aDw$-$V#E!B~;3IomGx2vqiMM{sdhgH;i{^MtQrEAapDdaqADQKDzU(eVLrRBe>8(Tcjuf=YE&&{tb$}db-iD95zJL} z(yzlOAdp`N|h-focvNQ4Mvpq0MPvCLh@%?#{mG2r_Ar zx9|`f!7W6{6rr$QcYe1gDcWkY>8QzEfP4nO>6b>G`dP>18R864w~`taVrP z&8!j(E43o#wI}iW=ffvilYvB~Z=>lyP3bw`RMepy#u*Qoum<{Ow>y(TimFSkCJGB5%0E^@vH z2oX1a@RlG~q@WitkC8(KVUyrx9$I=0n3)=d4t}xNxRe~`klk*T+kPt7bD=$cuyq+V zX1BZ3pqi+EmFn)fyJpCDaguz*Pqr1AYC->!L@sOC{?eJ1_o$}O zt8S>K9M(z-V!D(geTdN4vBI{vuH(bbf%;gGHS;FKv#9ceRQLHXl0}%YL!~iviSQ7W zxYU|H$MkcmFW?VdZHg5u$1VA3J6xXvpGcgEeLmWZU6CJ-ZO|e)zLpXvJZaF-*>YWt zs(SH=gAsRT{^)`%K+<<_T~b6L&1>jv!D z`~u9Xv$OQKuZ$z-hL@vubl0Q&oI};eb&mYr9&q7VKXw>{ovXPCHtwV>Vjy2bgGMAD zV1c?+z8np>DLD{BBrjqB*;dOGS1|KAl}qZSzi5{ko3L*-T23s*D$O-y5H}e$urtsIT|A|@Unh{;Y)98LG!`W#~ z=`XK!4)F>P(B4RySBpp>ok;dR>YF3EvM}D+aAPN_7%JX~oe4C2%ZoM=w>ng^(Q>L7 zS!#4OXC-bWTivzB24#5aaKn3?!-S->5KVC=COW;gT$n|9d+2L!F2=*3uw z1$qDltD@`uwtOTW`2Oc5CMf+aG;G{SY27kE+@C-^s+O$o~nvfv9j8(G`now6xk&oODZMtYKXfpX`__YLeTs!EC%L##`Hce6f7o4 zF9tZM(yupErDe}ypg7UV0)pYxsD>QzhOgv4^cv`wf=I`<%!Q*%__qnZoI= z#Amc2*?m=(8P}xhP{5v2nSGx1h3lO|(7TeeRKb)I-xC=3ena7}=MH^_2j%>Ed1Yif zWo)zNzMmK?*T!_GV%FG!o*($MBM%g+oPE*Fw-!Z$#mtuVo-g6b3Wz~&WTfeMi(7tL z*8?Ay$jIqrs>8K|7Sy4b<>=uL8G9lUyE{ zwmB9cB4Rl{MjrV((dkrrR=gRSmKpH! zSzmFzDvhgC($wBB=4-x@%HLJaQ8!ZMvroJuq4r{DRAlb!s_1v@&Y9~lJm-(L?`?QH z=(G$G1xz(}sLRn!D^R1xY#=rw`KzP{UQpw!BLfO5m}cUueK(8`l~Uc!+w({)W9LCu zces(g3N`TzB*ooAlHUw79ghZwwxU~2zZzPZAF#KD_uzRM*4u3Khl=*4mc5`%a`rFC zO7>uSTxuLGz<6KQ+EeOV_wH<33N+p-ZJ!umUoIf^<*5f|$joKgfVP31q82o4r8!#!YIkR>SC2R4V*V zUrNAOHN=s7gjCBZUS3X6oGfB`>}LC+T~HjsU}-%_xUQ(Kn8(6m@-Wv!|Hgf)b#5g_ zu`%q0WtA;P&$jB8gE%@>+qg~*SE;8(>2Vz``C8hG&ht5Tiq^@k5@Ak@!h$SIqQj8e z*Y5>64w|FfsVO<@pYrc+vs!QiL^P{QNy+E@N%@YYK~Y14ecOWPK@fd2zysONvE$s) zFyIC)=utR(FC0~7=^%Jy!*SJi^nU5{%XFkja~nE(+ZrMAwbW!zcNRJDh}Y^_tc&>%JIVmUwt4sn(z0&ufT%I$;ztT4TnRHV>c134L+cAQPbDSun9*S9jP=OK(xg zyd0zL?2m#?yAewxjp`yYA+<(aqTlh^q^C_9XE4v9t(G_` zIBaC|_JitMK?V)uGR( zms<r_1A`fTQC9{ont+=mb+-~r^3PbY! zW*tMiC=+-a^w?Sw*EpKBYhC+dh&S5G*1yLgMOSs$q~@O>r=9cfRFbK8dD4bJq{G$~ zX`payEcH0IoL3&%ryEdnhxQiosR)o|hG>I(l>p;2g?E_$Y~@_)?>__2oDng#YCff8 zQ$nmSq`@CQ=1rk|r6ssAVnlv70cmIEG01i!+$kgW@Bj7o{sf1}L^3O&g#N3$AAQ=s zd(fQ8KNY0$^#f4{(^;XOYz*ekN~*IfSp hgZ}@4mG-%VxfyY~aHbYKegJ&E5R(%vc&_LB{{UCzYRdos literal 0 HcmV?d00001 diff --git a/doc/source/_static/schemas/01_table_dataframe.svg b/doc/source/_static/schemas/01_table_dataframe.svg new file mode 100644 index 00000000..9bd1c217 --- /dev/null +++ b/doc/source/_static/schemas/01_table_dataframe.svg @@ -0,0 +1,262 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + DataFrame + + + row + + + diff --git a/doc/source/_static/schemas/01_table_series.svg b/doc/source/_static/schemas/01_table_series.svg new file mode 100644 index 00000000..d52c882f --- /dev/null +++ b/doc/source/_static/schemas/01_table_series.svg @@ -0,0 +1,127 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + Series + + + diff --git a/doc/source/_static/schemas/01_table_spreadsheet.png b/doc/source/_static/schemas/01_table_spreadsheet.png new file mode 100644 index 0000000000000000000000000000000000000000..b3cf5a0245b9cc955106b95bb718c039fa1dac74 GIT binary patch literal 46286 zcmXuK19T8{4*R+t_$Fw$1l`zQ6xl=bo7}-RE}C zoUX2V>Z$5*1vzm9SR7aY01zZ4M3evk+z0@`%%QQ2hGCQh#U4#t48nX{9Vv4dg2EDQh;0g@tuDsF3MSMw(myHd4xoP$-IHtKK-+uKZB<*q6lG;$8&}^sRBPJ zIZbw+7+vhG9s&!Mb~41L>S529dZfb|f(fXpJY3D!A6kXHACE3QLOs`T>d=d7W;hg{6J^Ty6$P-$fi8DQb z<}&r?T1K~}1B2~f3*lRB>|yeoOI2?9kXUrO=h~^Gf?AD9CigzyZV&9$`I@cp%te8? zvkiUh`qjiW`V<|G#e((L+l5PV3Vw zcmBmo8_(u1wJeyGKM=TaHsyvFmxSddBNx`?MOPP8NQi{9ranLWqd8oZ3BAlYnzq&T zje{aA<(5cLgh>VB3Dcu?s%vfd?rgq4CX&R+d;$8MgjlweX2$g@is_$fWx1C=j%G)& zMhDc>_FD6`Q+ynFZ#v{th5?aRyJuhaA;~Xh#f@C+Uyg`EMwxoe4;FkeM9{&Q+TGUQ ziZeWJt14ksZg6=F?@W^fR{l)$CQ7us(EYC7{efUG5`#}&6N}3(=Ll0)R<_=3jRqSG z^zJ*1#1dYfoRo`!|FuJJNV}G&kQ7_ICOHU{URvgH^0B$nVlNihDk=uXK!;j^6`mN5zaZAt=H%k z_5`9RPcjHI0vIlEthVe1LWC4i3V%uat+?dx9fjn)E9@Upy z!2wpJi_M0tl7i?of5hU_(dhNAm$$F=G3dk|VA}|AEe8IiEEiALxHY#$+8w#6eear@ z>&|$~x99QSFAM#S?3|GW?u3S`k-&(gZEe-<)YQI$_2PNby%b;p9}lyc=kvMEV0@{f z4|={u#)^QS9sZir>c@AOz;#Ws70Cav)eqM{=%qS=YN;X(Ug%=)KH=( z_oKw9AZVY$Ao5ybW$*#-xxDO`{S4DLk`3u}k%y$aIq=^_ZX+`OrDGIG z0A}PKjE4*X_{Aqh%C`1z95amq8lt*sZV@LBo@ZZ3{ z9rRy+zk&w#lgLzwS9T_(aE^+`z8Pnx$}lxLU(#>={lj-PIJT)ltO)$ar+nEl|X*V!u1Uj(ho083PvRJ!y!=#pN-<5 zBMBm0`U$GBS3mFNFn)gA%rpS-vAwn#Fh-Ia!@(oUlnYldx1nws&CrirDQ|c+<2kLe zaPmdJ;i06k8wpgzBd8~s?^KdYx9EpnK@`cf0VM|s-3wxs{A-vtt>H7 zyWFBN?aA(D<(T4II}dM+&)?d#cb-X|OoZRD0Ss`8N#J)*6w#Q8Nl8^&Epp}zV7ZNr zPVX=GWMpKGb%oFJg2~24iHaA=+Fy8=i+2?X}k>uzkL-$eQ&jzL@5R`c86PiP30~3dR8J)KfOxB#CcW@7&*hD4si;_Kgq;rN5gicDBMK>x^r6B?Qd_;uv4 z3ZR2JU67!SheQ=107~P6SR&`9n|mk#fDi?v6rH}+s(H>aC=e+t+M_54(En@?1v6v> z{PHF0^{GgnpS=Q@P1HSGadj9d%b1l@i$XJCz!Nj|6r?Pg2w?*90fFC%#h~r}frXV6 zmk48Wd|=_kT2q+3zxQS-@otfc%iDYGVwMs zIXQWuL|(7Qr%Qk+H=1mUA3-rLBUbqZe#K=j`||URXs6BPcyCOIl7wIU{FNY4h@_9| z7w)>ceViB5FKlFppT0mmDHw9~ZKjJZ#EtW~kZ%iDAbwsWr*l1yZ7_;uW8h8DwFTmH z^duDI+Wo)#;FIXNJxvu@#;(WO8lSKqtvGiDqee_E{pMd|eia~*OB&RRzGSS6F8iz% zwegzdEJC{{OTnl!d$O8RMBCcQDMktA%M^>4jGoKGuC2C3S0p^_-b z9+pOwP{g>e#hB?IESleS%CWX%JU_-}dlu~Lav04O;LdQzAh5!I=$Bo(3Vkadc(F6O zXl`G6658DHq@aJ2y*w;Foc(xBJFj3+<3T4Ul$ai`Cdz`}e4WllQ=yz|TCR$zcOfoU z_+{A{)|~uFwTKAzF8gP7)!5Y3cB@n4SgIPNPvePHE-o$>K0>(C={+-=5vZ|M>q;99 z(lj$BJCSg&Rfu_f52KEt(lAykv zjl*<{c>oH;^M)tm%?YLL+e>IUQga>8WO-0nFGAMw?gVpDF z{@zh3+edXXxi%C%?YPJ1&PQ~He0|e4TTHo|z1ikNzHXLQ>pITq@hF75P<(-EbULMj zK}+T2xPXn#8&KKJ8ML7{6;j-0Xgv;Wtjb4VSm&7S}z0qr^8VLP(l z8@B77YI}&@T#*$0=Z5nd=Z9%t#30?5*D6ocmcr9B%YNI0$OzM(d+hq#FLQ~^W0O6I zTM`Rz>wOU=G~A)V;!djb-ecBd?B&cxVrSi_We88Db)@?nJ1Cn4`5jK-F&BJyDYt}@ z0Q2s_mgPy%=Fc^jF0zUHz! ze%{X@LhsKoMXPnm=xC`NiVl>FuJb{3?leOtviXY3M_K?o$x!}j!&-~QDro=zvc z%uS}#YPMQ+xmhKVeNajnnT95CtXAp>yB=&(q&wtSpuFXE3CU4jd~xLffS)cNg_l>m zLy2z(jed%ztiKsP9Xn?VE4GigXChu51HLbJz9&vZP{3(5={>i{$fRtip1r>x!>-xA z#m*Lp$GYDhTt9p5Q{~L$u-od!67Z@(RU1yDHe5cgPye3IxlrmysW@-tlD(VrXw>+; z?-1uS%=K|c9kAV7Gx2B#L`*h*YgxazILsRbU@+ONli7`wX}R-$UCjr8dbJfB!>nXT zh}&gud*@FyGKt@$!8_kt5C@x23HK)#c?|JGtazEXFf0XV%?3g-ucH3EudE|Qxhz|C zXUK5Zn>i|8dZ)aw8-5`kb<-OB0`TyJLv}w$>8-DR|`?Z%uf7HAOr$^Xjo;b-9D4 z*o9K*iV3Z%KeydT%|F<$A0FgnWHQ-pw<0iT00DE3uC6Xz16s?L*qXD;T@gRHmQzZ>?B9#Hx{9WQl2{qE`Ln=odf56*t7Y9P>f zIs4m=Uq?s=nxvVK-_?p{Vg7buqy&-s!4WyW)p$0D`@9)H<4l%UkNYU@HYU1D7I!v( z+;%FOFK=bLn7N^|Qs9r((N5bOTiDb2by3BPDJpFgLkmnBD-$+u?w*x=wxiWsh;2mH2+k>z_Tv1F_i=BY~A zRRBo!FQ@_Yd-_MnO#lP86-#Izf4;zbwBJ2XWqJ@?4>sDLfEGgGDtCIi*i4_%92>+w!Rf#3nHqG3-pv2l<GP!P2;~UV4-1^0F$~MwWZGhozs6?868D z{L&lKYrO>Zun>W4Z|D6?gq9mS0a-2C%JwHJPm!93X-zvG1XF#|KS-;0Bee%Odu$Um zMS6X8rlN%$c zbtRB*YRuteGgxjPTBJ1C_dxUoKA$&xWb*SYcA{(&*B^Zpx6v>ZhD&DZgqOOO_=23b^OZ;!p!|J zpYGeS#6u~|HoM}sT=VP6lN=wnZX1PSW?8dy{1AHWTPE)j&$L|S^R?l+ zGJSy+@@HJJHX%Y66jL=-Tk*IJF1}j-O>+}qp0VqN*@6Vk$n#1Y2>pwqbIx_eHz7{i zLn5b(4ff1-@o#7}Em5ahO{STIF(D57`JW0oeosC=moPl99kWJ0aM<5rLjJVv^$yB5 zEBB#m8BA9J(zoQFKI^6$6o|lYx_k95mvSap;AW2au%>yvkxNs}5My-GP}xJ<001iP zGj|=+_0-$Rt2UdJ1G8o`ZyQG5JVxJV&9DHX`oe6>WqB6C{kjIWq6000Kh7IsUXHrk z$8h;-w!Dm4DUNO%qu!4lHD`s9>Ml?2mJGs(;5Si$XueyICxh}IbQDpR_74uPRj1si zW(NjOckf%yHlX8^wGAojw%fC!UvtlHHi=7iAc*InRMwsw z+_OxAPrqN8Pe$+QY1Xrqj1ZP^rLmy9ANmk7Gr^>-X}7@?RK)_CRaPIJ^(~KY7C1%W?Msr<9?myh(qS(FTLBzUs~mD$%$q( zrM#J*#%|(b`(<5t*AHs#YR-xLU#kpZ+kdThi{W(gI%kch(rWnU*{U%qIT=rO7Q{;= z4j)@SA$Smb=AL45tIs<-8@+7&ayHI8LW5gXn@(Vq_RkDC;AaF(?uU`mj$fLtP{%S0 z98)?kovzlPv1m{C_Wb$JANbf;oBkeU4pE_mCSp+_Jejwww2fWl_>gq8n11D?e$bRB z0Ef+sTeRB?z{NZ0zETWuGn+7#F9KUwgDkRYd)J&>IyhDq{0qSVB`~G`lOzRtqsuUc zDXp$*yH;nWogQY4pV?JZcN@)q0DmkIf52l06A&;V%x?SZy;rAIpi>t+GV++wyl^MNo&>&GW@#P&A{2Gf=27p%RIXc=r0b#@NwMVV{A;!w5^IIf> zEVVz(V7+M!?%~eZtK=Vxk@0fzDGu9`YzQ^)@UgYX!Vo~6U42p~RbpU{qu<0U zFPy4r3nHw=>i9O4z|BdYvQPWp6v+_)up=I)qb%;l^gG7i8drb)is+2pczp|cyO#u` zmeeLWE1)*|_*>)i(L}r=Jrp35(_AH6ZlM1nCI))(yUk5&1UG!4XhpzM-gR>wa-Z~w z%1{NT9RXAA zevJ1DEQ#lSc6ZR-(`JtelFq}boS%=ne{d~XYkQZ<4dVa9XwUo8`Kw9c6xqlpCt2hx3c;hhy$yeFF%D?>Bu#N8ylfOIbS_=LGJn+7@sCRvuUz2uVA)$47Sid?kowdBrL{PN( zMr0pv^&1Wy5Mmj*n4v6p3zrsCiA@O$(Yf$?(CzU|-<;vKf&(cZ-a>5r03g{P$iC(6+U5Y{YFvWRsa?1$ z@eFO|{Veqw8o=wmFch1-8(n`Tz;H(6j}M);4KYurO1(V_Tru@v0swS*%;pS8V7`R? za=#=|?#YG%0CdBT_HQKZN;R^;^SROsfM0864TqWhdZXPfOLpNyK*zZ6J=x$ZSO zyGvl^NiB#d1M@1^poa%>=o0?nV3;$$pCss3t0{xO&@oq2#T6-v^zXv(kw600>!sB!ftOM>2~_|kf&>h-RsNYxw9I6a@ocf0XS|=i zH@s7#Iug_xNE0d-4p&y;0jabm2^i;UmK^1dc_y^*Avf5@zhq2)IxwLZNT33~ZMQ48 z9?}nZ@OybTo^XaSJ8RP#EYX<(+?+-_> zeA*f&KFv(lRbSRE7}E{0R7Y+*`)7Qxen5xGh|-lyH2nO^Ucb!|@^108bjcJ_e?tr% zp+E`=K);j;k-#U{%PQy;ebcQ9bU@l_uhzok>Hc{danD`7jd}=$@My77Ao3gigwtXq z{2wMOZbbT!EDCG@xBvhRDF-&?i@Xn1I@~qAG}0d`NwbHXug_EL_FrS|lLCHGIhm?5 z7Bx&GG_}v6q4JREfX2&_kCL@`@{~zH5Pd=pfDKLApsdns$!5m+bRoRELyX|U&nj_a=2Um(T zG4;>-X@Nize}uQTptn*K*}OIr^W$~&2&aLulH)dM5{`xjBbdIUdjI%1HF2utWp^i> z<1#tKLnPa0y_5s^I+PDb-K8WI3_wIDA@eVR=+G2{X!!&GCF;f8iX$yirBHGIy90ra zXQEEkSO=;h4crm}HAShn!uWMptTZT%X#SD$=!ME5cpjh}bNPHjLWGB?rLI`^lL=*i z>@bC{$AH<&ji$L)$zbPiudqLLk1y|svZ^np91IOvaSph4B$k;LgA=%>dk?oSy$&ML zr`yLOT1KscvIs?P)6Dv}mY8n!o|L<+tRPHw3=LenN#j#uqEk(W; z&-lLALIL+Ey=TI2u!y#?hao|0OP9CDP|Y`PxSlOh;6#x&+g@b~x>maxZVyiBcmf~S zXW`<_RYI~TfXgqM%N6eCo=5ZZzH6)VTc7oql`ZRl)o4#EXQA(!q=Xv}xgr`H?hM-) zi&8Cb$7#xhg+M(Lg?!JoC0cK>^^9*!G%hSpTE8%P zb+Lh=$Kw9P+2zNCYuY$xh5%`?TG#OUuwC!*hW)lb3vGePVk>$4!at0F3>uv0@$!Jy zi49U@hJUttFU>xV0xdOL5D_Cq>dSkp>_0Z;`o_pewoNJuCL)E?BPzo)MfCHRiJSPQ z4Dm-aeeCcn9;j)(9fOCZUKJTvul-hk)4RYC+pHzInt#Sdn};bm4apxHpRED8QCc$E z%#&^YI^Ru3XYEsxW$-^oxBQGtFV*)E8Q~^9C zge$_|{zah~eqjOLI_|d`oQ!f3UWgN1XD)M1SBpYlL#o)`2kyIZCQsX(tdAp{2ijqo zqQJk*9r>SjJ+|?Ui7>B4~DM%4ie4G{%)muKKoEbRZhyJJbEtpSKx%9Qx7fD@frj zYh^UtriDZe%@Uh!&AR(oHtB-P%%cj`>h7jE{(da$%WeImTrOsKakG5FF0Dp8JZ2b_WCdRUz7`Y=4`j3p((%GoW4R#JiGcb2Tk5ZHy4);flt<9S zU$hy$F_&xzLEx}4EVV2$j71rb?4cV=hPd|MYjK@x`ot z(`rv7q_My#-yRLcY65BAL;kb0XJgqQBP_i4NagyD`SVa|ca8Me%@+Q`r!URu|6dF6 zB&0i{_lYecum15>Qby+P;UOE8wVO}Ut}_@Kj>bhpL;D%qGK*Wbfb#)|j=ZJEV;B0K z{N+2I0$1b_*TdzCf{%IB;}Ai3M|;B&?aQTk>86V-^@|&b@2hd%pU&czH69xn80hJC z`}}w(Kcj85SUNe4yuQBPuJ%O5!71l)yV)D-DOo_r#(ugV!eY>UTjz3R-Lg12=(uo< ztr`1%b9MFfVxXinzn!Lz1xi({C@U-XC`QT2$+60_Ff)H-C$6opG@w#)bH8pYyjNCM zs^~IMQs&|_;H)(ivKBUkJ@A|9VcM$g&`Re9AdFJ7d5iLpP@U`USuSr%p?P*jCV7 z&pD!bewDPd3tz~UUvo0j=9V%-LW(7MClGMiD3OZgbL0-;rugNI$&O9}f+4feK!bjQ z8%lcJ--8RCKf+^neLU}td}*S8`37l=$oXrW?ejx_ri2U$tXFe{o{i++cIHj=?I%b3 zEqYns2(AJ=E>HDmqj5sIsy~fd_2Mfg9iV`YfreIavs}bqbM~iWWq6!qb_LF`bgM3* zH#}&?*6|i@BpPqE%W{$(H9v&EM%SU%_3q|I742ZZ0_*v?O^3_1!dd57x!Wu0>SRhf zfQXWeRO6mOlo%QZ9T^>UL#RldTv^9>dSc=`H246crRAw_IckKIBWg{}jF){Machn3 z9X$RQ>2}E%?s^@m%yzvi>Mjy8c?h6-S(~3i_meGI8?m@8uJJBPahKxH>KegXhnD;P z%~FLW`%*Xe^$`&7HlS!swO$c7-_s8XvlCPbLJg+u;t;o?4EzewSSoU#47A{@EAa z_+_Smf4RMt;XU^9@}6t<;7z5)-H;Fwc{hJU>in_Z1Jm{3zMul)+C9<|Dsq#{qqV{| zgIwl$E~~^uK6}cDIh6Jipga5$KuO4WRVj?p1UomQz%XP-zv)})xVBhrs;AVwt#QSZmU)re5o7#N1bQDuCaO_nJy z3*_O(kGFI+9L5`_)&y~aBogJ$|#=N@fjD}n$2MTbK(@*M01Z`-zn_fL*mQqG4h}g5> zXnV@CE)**lMo2p>syP$V?cv2*`13~ulSF*3L;xvFckvv0H6UZ(0t`VAgR zbm`*{s_#0R?n?FZ>*E(c{aAv+Hg_Gx1oWZZ;~q&bG*na`$K@HJ-*H5ecob4nQgn1x zQWN9w2nZ`xy3gd-(Z7E)Nh(tB{|HBB9TNe<&(NLU51dIc?49h&o!QCAG^i6?&XDUo zZ9VA5{*>r;%g}%Id+XOOj3|%KryTE}lq!GNEU`#)cXuBi9v&y0Fsjeb&mSL`+NW#N zwlOgoHfcy=URYRYYEH4Z5hQD~wGAG@LpB}Zb_>$odbA8Y`BSP`aMIpnIzvFmS}5T& zUqkZY{Cy-mWJ>JgJ-M>Bb~bQ$c8wSSAwruMiroXZ7Ya?U%LnndM!hI|Hx z*T~*7OY?RXE|ASBndlF;T7_GC@+L+o8Q)^NZH|KawzG74%5J#1xlul{m(45R`pLL8 z_{VJUN}q?IMi7Tu>&w0#gy0G9Qng|^nYeqMb|C-=(>Iu;4i73_HV7z^s-;LHL&WT& zi{f*7l(rU{^|yvYL3P6HlG|*z-S_r+3qv6(n=?@P{W;Va57azaLVvB|nE++-mmAZa zi;=(LjU@5jS%3_!c$-Wg##0!#_hS=x1KrtCc}kC&?XVpraDV;I0!U1!lpOq|Ea>QQ z8am8VM6Y8r`g3FV+05%Ll;0E?MpUk(sQO?&MfTDSJ+ZBn*f#0D7c$9mZCt|XK`I%i zm(e(UYc6vgnOPYXnUIt`esi0&Ux|h(CP{kVYGWX2Y%JO)fQ*fP^|!A8Q=d46K?hB` z&>%c&EM7%bHEa;I;H7;+mPDkfjRaayR71sR@g7(08}`8TMFYN+v~(4+s+dazEdzt# z$p<=~Fi40p0;sTXV)+{V6baZrJ zegpsBvZS{ta}^>~RQCMp6szRvSEQRnW#r}cy1h69`_#KUn*99*zc&)YZOI%uMDuv` zczO(L!l9-ynq8lvGc`%47F+Gl50mE&7N=;byq5?sT#jSk${5$~rT_bWrE z-?>Dm9*ozlx9BaGGigx&U2dZ}9xjue?4Pa-6_F{E%l@kWWB+&|i_M;VZ;lU&n@zP^ ztlTeliZn|* zWA1m3#zEsdP4eEKg{r%1XWRE%!AUp+B;vb)bq9W`75xR;~Nk7 z5qVgzDX`m=hNtIK{+7@tt~5G5+KB0h-5s3Da0Nt3NVtMRAixEJYn4b*X$%z$BzJ=s zowG_xa_*THmWSD>>u;A%)sP|T>g%EWCS$)D?~|cqLS z9+pdo$n*;RRtCE*xPYBqZFY7xIjTx%-e~_|POAsc2xd~L@sx|@(CZ}i=rcPlIuH9_6p z))SC|&;x(vGSl%|Q9Px^)z}$(_2^T!eL?y^5Nf^NsHmD(hDPD$;f4%hf3{Z^5dlk| zQqdN|iW{VP9$0Uvb6V|UtRh``le<<@>0ZZFxA;m69ZV8W8Z3k&h%7lKDJUq&?Rq{r zIVqw@%EIEfa-jptIm_qpo#Pbr^WNJtZZI>c7}K&2`OwVNj~m3|s>zF?0DK2g0Gu8LZ0a7$|@h zH}~Wpb#JWLF{7|S1E`R94r7B&-GbCHqaP9y=D}}Q(UV^uCy+MeuUwe%P|fvf;@ZvT z@{KpLw8X^3V1s$M{-+3x@Cjo^MPy`ITw+&n4*L$z38ph#&PRn?4TFql!}qw*0YrY_ z-~xh&^jTm6{=3|+wpFfO@>5nn*x1PVt5D0k$uJ|6e4U-0dzyZ_dAmQqc6dT-A7Ia7 z0914|4fRd(-OU`&YX*sZ4%Y8ozB8gY5vb4*FfcH{Y@TrFaB1SB2WBXO95GG81^k3G zCn5kjmeGwTFMdx)B8%6(!eYCvK%q_ss*0pOWs{7T@-#PJ9h}I@rr1xbk4}i+YKCsQ zxihn|H4W8=4;Jb(V#bGumi&qodQhF6&8Ie<{loKhxl1FfpiimnPVntJ-pGhtW+h%| ztNUq9vvs+uN@{8<;5Yv#1@7cWGWF_fK#S{6c)ImTN(t0!cjtOXOETRIC@ipy_zy~$ z_%U4i(1?(rh$@o&h6YH!lU#21oSvTQDWt{{m1YyH^z{*QQc-c0I~oqM)zwuJI4Q~b zFY-g(k2T!=DamIhi>AceL3Y151zcS8WVn{^Ww~z>>QqqikTq9&e@kVt z#uSY!to~8|LPYd~JMnrjC~teaUT3K_?rwMRQM?P46A1@JT1TV2F_BafTr??@FRR5Q zy1J(3It+zWQYtb6I>O-fg?inIHQ2z)YUlXh@&3Px;z1r>p6F=VMKdL73H@-ho8bhe ze>MfLA>qame@Zmq9?8YpmcAEOpK@`v6I|#b?d}d}1|?x)fxH5b$6IN3w{C1TZ)D^R znbL9KlN+I}9zMPe$Wow*NzI@N*H!Cu7*K}fQwHRsi~c4B16qZcT<#8IgsSfOmYD7F zM(nj$S4EITKm}sd-RWL$PuHhu4Vq~F&a&Sum?K_4wz9#L*;yE&BzH*jh+u-dL|=Pn z+0gUiq>I#IYOXKJk7>oUwKE;+;6Stl2;s4#k&m9l>_g1;W zd3i9ZciisMdfn8ucx8_a4FT@&H?q6>=g(r3moRxkT54*i?`Jot%s^HhcI?>w{EW_y z>ff&+CT8ZvrKKXu`p!<7-#L|!PSO;gY?n`a`*%N*>DBs&&{ zNn6FR}kWdg{0{xMu&;fRB8>3-IpkI4Xrxe4w zCks9xCdT#5`E7};S+H3ljnY`tf}rAxw!$(mJ^0I&`7{Wl!G>BYUw{c5PW@Z@iZ1#M z8ynt+&BFJ_C5eA#db)brj8)H<3>uV~P(SeY@^v^AYO8!urR{5u`<)%ZguONrkN_&J zBh3>4qr{&LXKH$CI1==|B?1CSARj06*25LIaODi=^NW4I0{O0&H$IQ@RyqWRU}4x>%}+ z!}lF0>T?1+Ogj}toU*W0ND6e(&gc2d={)OI8$>$x6^X#F9I2TdJx1d&==8hy`i!Ud zr;JZ=gl#Izs@17UOm?4t&CVqZ>;<-Z^U8%iB@GP->{OJNQqt3Zc(8p1KyZ(amKGKc zuBSOMnif)tIEiKtJ8;UVPMsDrfCPY^mPaAL-r4385|x;yS4dhZ%_yFeLnh zD@i5e=ducthEYc_&=t@n!O-l2cb2Qx6-GRA+7D=xO3+0?G2~KXiiwE{sif*_Zqw7h zP{N1H?au)uelx`lR(nE+CpmgyA`3m@Qjl2*h=ASaM;Gne^+H_kYB5=$1K|QX28P3H zHyfP0|5_TU%XhQmg6mXhOjo$uru)Q6Soub*dz0h2%b=)FcWPt|d0QA?@g$$2zu=f$CM2qqQ7J|fiH zV&xTtMnJ;84RCu0IaUmoVWp_0*u|5+dv$*2cCL&WS{Qf_)fbkHing+{3PJidPhB~5 z9a^C?0IOK-_LyMKADT1Uo^7Ut#jk~qqJG4VV5XLqA5tZmM$#M$4nT%Q1@PKa(;-i3uw0Ik z;ieBpa{lq(EVF2&`4lP8J4P%iLoIM&vSx8h+R%gjnk`S-4}yV?{JEK!Kq&=KQqV5B zB|`@!CZ$9U#q;sz%V(`6D1LwgPS`vw)q!B^@PNzts?WoYv-sE7`kR~fkj`#QW=FCf z8OjNUB=roY3DFw^|I8^76o6G|QMP<&#B?%)VtNb=5ccM6Ad56Gw2dPjGh}u)o%B#t z1Ov3Sv_g$1qE75$6OUI`c9)!6v{tGWwmoucQlu#@)5oze7Wdq{kiy7w@dBGah?(1D z=u^gw>dU{hY11eCgXp$ABtY2TXmk7utTmbkzI~IXw5zmL1Bj|Oq4JZ5iJ?!h z-?UB&R%30g*qGR5KasU2)a#w9Xmv)4H`@08fO%f;HCL#{3QI{vfZ$MW41(|N?d>;s zF(04qf`S4M>n56D0rrVM+!_ZV%T#6gl^aUlqEz-)h2Eji_`D*ty|-xmMn>TbqdG!b z>J_$G9`{GOr&2lDyyZ}e>8m-Oj~Rn+=YLZ58pgLaeeJ~IcrD$dPvn~@A@>7wy^>Eg z|0;W{O*P~?FQ{2|HXexbRucped1!ZePzwYNG&{nKe~}%u;DI_&$f`~sX*#40MZ{mr z<&^Fv_V}43tl_O-O`Fu$I4KJ8?T1G;eM`B0qBOOr@pJCM61equCHkwmM}uaI&4`K# z;|2~HavDu~1!PX|Vxz ze7`Xj)#Y|lczm9#KjF#%QOZQtnGO@LXj@@iAvE#c(Q)lr6wjUmCtzfJNCM65bS04F z(2cos`anz9F(|-4l4whsj3T2HpE*53Q_lihe~PN4#w=Z>t%FS{+ zb=FpUg#|7ejmcokX_srS@h#5C-2Av=wPFAEAkFFgr76zEdVMDSHo-yk>2S&uEyWXN4p+{=QZq7WcqhmiE*R3)a;4`?=;3G%p!>AXh{GXDCt_-rZBB2jJwV-ds+!7$_1 z#7A`+-z2x0jfc6RV#(4x)O~FfcJK_@38hm5y-# zd9pnu!RU`K>u0Uy9Q|q=W@VNVM;Zj@`P9*rYX6&gQSbWgu{~F23Zpf-{a$}r(sUOy z&?{L%0Nv#< zeLRhQXOOQsN#aD4^R+9B`g2KmYsK}s-W(|UPU_`8}7$eu| zBRSE=_jCUSLM=6?xID>nR0otGHd1{-h9qTt*zD|<4@_F%y;X?IdkW}9gs3|^mwI0NQo}d<#SFSfudizN zn0NOuekTZ9C-Ji#o{t1fOz)xa2@-|-sNHqdNr81O3aW^JgOHR`p$G}2-}l!?14>QD zzC;h=9@&c(%NJrqv|DjIs1P36=^o_An4dR;bF1AY)P}_&@n{qQUlFKVT^auEO9z2y zv;24^CCw%?1n77P)ZwBcwHoa_%JI3SrGX)``#s5|VP4@ixy# zdH@0f;>wNZXdZ& zWwphqDnK#-10ALT(T;DFT%Yd4w|sO_E>@dnCMI||<#Wvn*BT`xnyxg|RN9YrKxmnz zr8yx6*PH%y-P5c52;^n`ejB^Qfc_R4E7l67c*ZGLmGL~z;>}{z{@+H&Cd{YBO!#H4 zWUf%*i($yGp+XvgTD7Yk{N)!SmPCXsEXv0XF|vz_ z-@|aSjTd*QL+f91&3UAOdg7Wvw)qD12oWH6pxVQ1-vMNrOC*#i(e-;F0VP2%xS4vK zg;ea$dgjl&V++IZZ@39SL_ISCS!-)+s)Mz0D>ibWwvySH<2b!MHQHfEYwJckTC7Dg zf_M{h_BYG*9aT?xW4bMOTrfbfoA&e}K(ClVtJC0kzUcOFW`!p$phVso9)6zB%Z}5I z$o=Y9o3>m$sY%|(Cae+jx0u}v@z`RYPw!i~ENTNo*0DX0l~=PJ4Qh1QeRD;{-}4QP zrSlO1^3tXm&8-!#zc{^~YV&@N-G#}G=yg3bG<&2b$ttQyYk_!kPGGiwen;QIp{ouq zxIcwK4_sg(nJ&p6C0>PVG)P9S?7i(kQW$xBZTU}LS_9%2eG)Ok1=x$tam-ydo`z)2 z!&neiXK&AxGSO69E1-CXYYnEDqOONEF@#od=J3GrEcZ9wJwG5Iz}390Y^Va_dp*QV zCFfAshSO0mpETA-yTcTht81*s_j;BqsR`e88l!+RA08g6^yX+*E4-V#Xz#pi(oxhZ}xi8KzMo#k^a$#@4lJ8Tw@ z$!2^lUfxbSyp@IBe9CgQ9MiO?x}DK1Z2)_Idbs0Tjc%Q+Qklh!1|l9uZMODDBhJHq z;$%Cy=SfyWr8ot-G<6bFYNdxTH99kSLV*sg6}vjrSL zlk=9>hR7NW!Tt5?3_^7}0XFBG!P|&&q*|+I>t?ri2fbk#Dla>`zre4hN{#Va{fFvm zNHVfvMa6L~@+|H{EKDwUkK$--{qsE)UES&XyG0R`u2b(t`K84h+NsIO@=d8H2a4L_ zBCd$H%F18f)vST`lglrtpaVUfZD{@_X&J6Nw~7#wzkcD#Y`Sc*m`+!^w}YZxR?B6g zy2rTNmD17X-X+m#y)Qtysnf2r>ge@ZXa0CTy5QVx#17W*@_7|cF^)zvn8@oR?f;Hi zAb=5(kqv>u$Yj%JcH|=qi(Y0n=nRpcmBcQ`MI<|~eDV}NS9L3$f`xp2@ zxhI+k5v23+dTHux3-tN!8K%`)*Pf}49AxOk(8ETIm~^_NNm8V!;e4TA{Qh!OPwdt@ zYcQup`7`wlv0;vf3c6eIn#Gaw=NKA(0bX=2~uR|SVuv3(2W#(v0+}aFMVurKL1;= z5mMx_Bk*3f%>A^OsJs+jR`jC|L!OMMyF8 zpf&XhL7u+f^d7{ur+-|mx7a3$m&j$6o3ox8OoI#$`~P1HkW}`_Y_meczEL+&o+_(3 zZK0CP*H$M^Nm<5DIx|bl;Sf~aUFU+*<^E!G{q)oSUFUS|-`R7x5g@=+xl&vG-2~Bk z?eKZ4%hS1QSf!%B$$DK?){npWvX$_&w7SDuGej1Lzj{kOT_j8jRCDuM1>#?&$QPXL z7;{*}`APR~)pu?WgHM+w7Zy<1)>}>GC(IXe#X{6IJ>p4I)0Z*bzdv|bogpNhdNabG zU@2>mDJu6Aqs#WPeUFX!sv-f`XaCG4D2BZ4~qOaD_9`#zj1q!`MVKDxZD zet+Mti8gw5^yO!P0}X?>UawL1h(@2+gOsPsmCkY)cW>*4a3m;?pt1Eq)smLAyam})&ztKM_^+a?_*=^VQ#_XC#UrhWm(No4zd|v^ z!Dq1z+4=vnBHm{O5QhF&S;i7--f1}WjjO!xD+~sGO-04?(@7^b?>`=gPkqeEdhL_b z#d3c z?;O6z7j!r)8|$kqde7A=p9_+TPG6}Klfe^dsbcwkH-decxiQZgai~1U_b{Gz4y`de zV&j*C3c>y^t|)*a;CvV1g_J?m{DQ)iKoU?mz#)eaO6MhCr;fw*&O~U=Ih^6b$iqkhCF#Cn$h9m+I(^+`W$(w-%4`2<1LMC*__64|Pp+|%9R9af z$)s*WztWt(5+RP$kVU0j+y%2M=WgL{oBP-x??Fi7aXAr^oyKM{%F*MK^btJ7_S9)Q znyk zJdVua-I@sWZ^Vffl;z|gsi@g(CYx-cUMnxS%ws#HUXqQ`m}_7DN(%y{=;_tLsgv zF`>p=tk}JQc}R7u{li@rEEp1KdO55F?&|FCb#?Ue`PSyv%bVksoyoyM^!yIbV+_t_)$!8AiJwg- zKjh`@4f^v;O^@8Kd1YpwB?==-J1gVMK`R*e`s!6aqqExK7W=w6IXTPkdVEn#Ax}d? zgT?_5;NrrC&sHs4bN5{ir>aiD>$v6P)J~YJPnmE8?4C6}xNwG&6_ID|41YhHiYz{( zn+|2B2#3S_9-E<9cOKrx_Biy-xBfUhF;Ou~#LC*3+v`^cc_OptyGpAQan_;t3S8*y z$^FZ_mbwc5mjKzVU?4TCiWXi}k>5^@o$Y(Pa@iMvV8U6bg&IL$A)-P%VVFlEuDwyI zyn=^jV?m3CzOHU?i(OeOyN4w<|gD17dVGSP#pKW*!nakkw_&8`xxD zwD0iromJ*{cPt5difkXyu0r*556o7~7P^;t@iPAkg7(wZ@8O4J66nrao}N~-+psXP z2q^wqUqAaSjy@wxAni06mQpr85a7QKNw?SQmBZzClW{O8C=hbE8EXuW&kz4bwe2rq zztvQlxo)ppR#rNz-7vQ?#LCswn+F2>jEbO=vK|A$6N>Nt zC65~zjAhFN=D{n|rm;!NQ9~^Zt_${-S`w^@3MT9n;i}i^bUGbPlqKA6Or-XIcpv(} z3=R&85G%^^H>H_J5<+E9`~Cf+C`^_NQOTm%Y(AsB(TQ5!D7-9$z36F$OmwCsm3Yi1 z1`+usphM}=|036uM`ep(bT-%bqWi)+jWY7WG?vnoPUWuULo5IBsIq*a@A~borJPu_ zJETxW33WniJWC>NfYJH-MIU_;Ef;H)Yw>Qrz* zLK4cbBP>9mEXm(c$Unb1htHSm&pN8s)&ZQ{1FtmTCP~77#dl9r^hl_fEsuYl=tLQoP*!?F|s|?wFX>(b}%pX>d)A{342cA%3t%j@5|BR5UqL)wbEZ~Nx z)!ve9nA0?Ck{&SraV6mhlhfS_vR?T|XW=~Knu;v(lW?q@X6jDNsiKe*7xVow; zQ&klNs8@pCN)>-=<|wZ^5z9sKwu(zg(1|>I+OT;I7qwVv%mj`ZcBliguApG<-P+F1 z&fBQdYjJxy`_ZkU(pD^Ww7dmlx%=t5ti0Uq4>7Ci!|j32diT0JxaH#1oHqMTzrFW- zY*kcNbhP#ex-_vCMU6GxaYvK@hLt>~0_rd$EeQ$85K!Zt2}|yy=}5FHDYx(QIYK zVf!7AXu^WJBJhLS^AQ6^ty6{e{@RFRGYkou@s(`JH1#C@|N-Ef`KsTGeZtNb!~NRST$jsQ|nsj)n#F5$WF=8A`iI&>xakFi237sP(e!! z5{Av3ApJ9K)~wM=R9Hw)Z?~M!uR*t_^8Bhg&o>GtjZfI(rVl+aHfHe(j4vrGgU91) znG8PUf26Ih%CLp^bE8gBTMAz@B;)}V%NLqc4V3>9w4R)#5m2p#6|n9vSZjK!td)h=sVCV@8aGF60g5@?y6O_oO;n%TlO%d;dXe<}0{ zL?8hbukWTWvjY^O)sWF&1lU{}QUw5n)bL96UTLG?1(p4BcsxM0F!i(MQTcpkyHDfZ zr{l^K44#q=&wYfw3iIYlmF^N@9n=c%uwDOs+bQ~A&C?wDT;6JC z0TA77T)e!sH9dFd?RQmHYQDfE5#lwitbGnW{?GB@!#4y1>dDD{%vUZdQU8ojLfhK1 zY=&<-ST)7EAUjS$zKKur{d`;<(+O~YGN*%zgQS;V{6+1u?-R&_enW5o9nFS8?@YPY;2TKNugxS1X~ME znX-iUsv(z%nB9fbHerStU0t9Azpk`2wfp%M^3tXGc}oxO(&Qf^IFS6AQ&M_OG*1Fr zuJR|-nscE79!t+NGn(7I&KN_Cs0XMRe#xc3p$%=6llP>enDqNqMa9H0FhYL3q(X@( z9kri53hk1$yG!^ILPJBZt*uS-w_j=gJ~%m9>BS?&9f}Y}*YSSl&pc)hZ3O_JTOE0! zx!sy~unL(K<;5eAt*&L2c~`Hjst6XUs1%4Jml7nukHyD&M}@SNR)c=>lRE;3m;apI zI$iQMx^nyqY`*Ssi-?Yj`o6ol zXTsfaq)B`K<d+uLA;ZSnmX4Zu!O-XVJd#MfkQxaloWNvsmjuBXTJ3qY17uA zfvstza5P;fhV)f@#ZkfjLKJAR>1=SVI_r2YZm2FB9S@E{cPSe#C} z+9eG<-8+Npw>J#ck?C2E-XL!aS*bN2Sad)^aq;5Ae6bwp5v~G>F3tH+fjE5rDKdCx z^Vw@5$X$jv)(v`EaE`bG4^hhpfmqxnJXh$FGa5+sKc>ZAF+sBL+g>)D1ylhD% z4a(YlmMI!QTvJ2VP}C!{uZ4e!1O#QZYoZ#0)RC%>67{;sw9^09@+9XPIFZgo;N2{opxB3 zxk+&%{OH|UX+ zQdCxL!nM3>ZTYKgw|xPe?!rKKV`Y5)+c@-KuiX}zmuII2%F?I`%DDjGLYf{OX%g@q z9bw2hI6u1J2@@jH>wQ1I@}XB3hfnj&$tWx+qAxDdRaDj1&{9lKAFrz>w)`_$2`~8L z-PUST9=-HT%O(@JZTjiW-E(Q~r0DJc%=j*7re>C%uA;h(iEtFC{9v zZ21>+JJ{>y#?QMshI)cpF3z~SGX;qXe26znUTzLO`N(I|+MbDljqQ=57mh2d`B0Iz z6H-Z0@$){=fnc)MUj*ZrHvR2l1T3z|LQ?X!IHB|oTB>#GM5t(;@-4fj20S@(_Tiyi z`8gs4pNG8K4>Gc*h88}2@%`St(dHF|sFogAjV~8lzFnpQs|ij@G%llT+uFZ+uKt{@ zt_dr{P>tfS>3>cq4g{3t^(p;*l7gv)M*jY#=$n6j&zu%565D@G{8K9%CUCr(>S7(o#rRh>H|0h#2d~yf=7Mk!;|SJlF4~rA=tK%M^{U%^uE? zweO?Ezy(7Xjy!Quj;wlGn*zaxK*_jAZoEKzBPG1;)(n{kI>T!~2nii~} zl9CdT0+E%K{aAT__wjrQo3_5YyF*1qJ zAVh>zQW_2swZzoa%mSgQJesl~XT2t7l(hy1OR@LG)#ev1+C{~LethWE1k()CJFq&ovdC#862Lsv-`E!9eB2P8k^S0#B~lC zrmJS>-jFJ6?X%_pj?toueqUc%qBGqS3xD&Cvzd_9&HU-1Jj0>DVbwYa$Bj*&AjyB|q= zJQodf;Qof!>2yJqZN=AUxBG2%J@`8)00Ka<2ygkjLB^P2qtNZ1BMvXvG#?y!{{jwE zH&wYi#MyZc-K_D%sV?WV%s0Vj{{fw#qQ%o`aq;!{&97X`|Gs6PFU{p?PkF(yNvOei zWP22}QmAM}F)mLjn(nJm;d3AM@5M!w_qK59_r7;}_-hAN+C+&Qwq^JACAMu73+JEB zyh9YSIgFM@(X!Q!xI0|~{mOOH9`|DE(y1cEi4vgSrHe0(AVNv@>xaK>486&HTYbUu z{*uPD;Nt>b~qn=d>Lc~`;LY`xGEHZH{veCXY>Ha6$;Gm$aYVs18cluG6cNtK6 zaqL@PU>Daql|8p#vTsD)mA&n;`~rc^q0#X~YPDLO4JU61A!@jPHGy;*`3{D|HYM*= z?aB}o9QhtUn_ft-aeGE>%1Kd&{d;D1lIBA#tOz$HxDYd3f>c6E z%7`N^JA30zE|`P>5vpV;%>~1(hHDu;+-R-*Eq(gsW-qv^%IfXACTR%dqy$b$X}Ylp zadPcDr?0(gD@W#aFBFx4fP$ig3O~E0Kic5s1x2vokW`=F^z?XTRh5{7O-3Gm*caT< zOw}p#+5E|g3CC65&mzTav%y3JD2N4JSId%>u?5vqz~{Iae|oLWlQchk1u>L2EY#O&HT>Wx#U+X6 zzVh>!a0Y<)u899c3!O<WX_zyT51_x8P;1>)c}htp~3*7e?-kI4czQR33MD=F{J?9=km{cXEL>O>VOzTf;F zMHPwGSN7XIHMydss;JMLW{Z#)7n{K1;2IYz`@>~hM_-NrLdRqRAe7+tVC1V#Z&g*5 z6X)*4<{Geg-U1Nd3Ctc&b}|0r;*NqGRKTG(EHqi&*klDCwNDx5tbjXwb9mU!xD48A zvje7qCh04%ZQQWYD~%cz0U4NRU;v=o`Ped;e}7iha6CXDSv55{avsHUtyQIl_^qr2 zcl3{3C_xme31o^N6)+>6->>mYm=QQJT(q%clT-O%N^)vzb7LpLqk@LUGbrG*`0j%g zvhs)&<|Ox(FHjOTP{0oj_FFN|`KsG5Lt#-$v={Hpl@dvKZ~zKgh-?y)57K_O05mEh$H4rU!q2%v`F!4k zg9B-w4=xG{bxFy@QU1-7@^6b6;DPUwuCx};m~M7slW<<73dx>cNkb{TLj?6p13zMv zpAU1jRAjQ`bT>yF56biByjk?WUeRpWTD~M#VmjWAkCxy90Jfy16*tyqBcoI41ZNE6 z)*v>Yj2+I$KN}}Z06?VB-3C)&I%9HDGXK$GWMt$Y_=P=Ps(*rMw%Qo1eYwHq!BGxd zx{mjyNl08vP&1HiYFb(yAo$bax^V)O_;Ub2gO-W0lbp<#AHLgJx;Oqe|0gYKfMr}ue(G8lS);!e#36X))8 zcT~7`6QTkIaksJ5s56|DOMrB%rr{b11rEg&XE)+KH01Q|lge@WM8XH>vP3v&@~5ul zeH_sNmVu$J!#SIXhR)KYz?Q#3%~^;L$Ly6y+ep$Z zUIi&0?o9$39e)&Onm=vKg8?8gxqa*Z-&_DqiZ=5pho$YGLgUbK-g2{eP{U320f-6x zZJxay*<(~SrcBkAgDg9#d4PbH6V2~WNq=2ftTbBV^T}7`+f!82kE!g3dB$gE#D4hX zBqwK8NiANp3kgL)n7nt~;zhE1y*=t?(1R96zNl0~$tLc28Q1oluc>M@P$K@zoS`ow zu8L3Xlq}7XsuCR)U9wsd0B~4}LCz!5#)v;sQ-zMWi-L|?L2G)4tg&}8%Pu5r0tt(V zis;Zlu>98>gySeVC54oDgcITF8bB|Cw8gvOPf@OZUQ0(!BJZncrLUyFM4|c8$E+t% zPyBqbpN&H170{#6fb^{eH_>@_#xYgb2>Ixlohn9+F&X7ZaEGdE71@23K^y*)i%mxfweS|E&@7ybFRQn}V3N{H+5 z-BV0!{o>QiH9n2cg0-bS9QYoCV+zU`*Lu!b5g;S=z8bjoWpMp_q>2X*3(zJzmO@ zDut%P@}%2l5kvhD&zAY&6TEfd4MBwj4F*Z-IE|FoY}Wk;8u&7#d2xbsVYYP~aksKk z|0{42$h}uBbfK=MELPLox)!%Rx<3|8`RQg9iMEL#i(^9hNqP7JB2#k#oI+ceUbNP@ zh%8NrjxQi8xKpiyR=RF$0ZaSRh}fqitFL+ zkQL~<7toRO8^#U60f5o|J_+x?NWt_kWDU&QzAAG#3p?39ErMRyte`4UK{_AU4GWScv`&O&^63V99^Yh+Rk%hVsvSp9-5feW-#+R!p6hm7sJ$ z!4Q0n|8FMqU0LLfHL@~OC6n=c)mO_N@7QY=h93M9=Y9sR1h`?tGa^2~IUKDT&(c8r}@93vQiaR-58FJ9j4-r&U+F1rVP=3{>p|dn=a3R{er2eVK}6V3mu&pW8h~jp8X9RouCbA={(9FIp6lp!aGFR(Ew3WWa7M zM;$z7$IF}9rl0632n=X?!BZS z`SM_cP@Xh(lq56q1NU~sSZH3p^(TA(4soB~o{G8OzD51yo9x-9+e&mWy zO+|})T5_|VpP%QkTLhe*h=_}8Yh9hEt7!{W7cPHHpQ#D09<!Wu*Pjb5WrEPp-Xi z?s_-<6MHYodws%O|MP49x_4REqWtgpz0u{&REmCLU9$04zu1+QKfGf-kytqd zohD+RFh*cLU&s%3kZ0^S++NjeCd!S-#n4ih8C&w3j6T4vH8ofTUg7%>d8D(>Dmc$| zsfB^t(6jY_WaoltaN=mOKCTUz{)*-=PXuiY$ZLYgrQco&Ihvf_3F^~YBen79Rz_5j zLQ=vq*P@)Dlk|H09PV6IE>0paxTjtW(gObC`F-+C%GHI5KBqpwK}y{ZZOhaLAx4ID z{R+kj?%$om@>Qd`x7YQb0C%|@_#*jW^-``q<@dOAn=V7haps_O9325^I4`Lw{~mi< zK!5q}uei)8F7QdaTTRmc#f0Q!{)>R*pR!&C4O`vONxj@0^LiQ@kl+sMl61wV6oZZ7*sSV&(i zU4Eb~Qgs{m{hSY;u{gTiHK!G5&S)R9R=}}}Oy}ERx9kW)%+%mMZ;kb>Y7U1S?%(OU1 zUEeDbYg+52y;s8OC$#xful?HGQ;+qLGI~K)S4_JrUiq39-R-Qpc{eSLhh{*91{b#S z74XuV67DOM5OMNP`*WYBxIX%?15)+VO^yFH)hhkAMl^MTON9f*%nNC5Ct3T86x>qEth&T;oWn*Gss)$!;GAM$ge-1|xV=R6C3>z&%oqLu)wD;|wB z)&-MZajBM(=Rmm~hA~9t_j2Q_JboNx=if+FVqk=bh=8BhXFf&8n_tdrOCvo_&#Sdh zI^&lTk&>64b9zrc!?V1qJ6AC2Xe;64Z4L00wq=-%XoFQfs16eUR&~_MdJVaWNd&YgO;KgHv>`_OCJcdqqJ0BA zBnUzmWk_j52*l;^KQ}}G8XCHx)kt~$-@8z%+R9ZNAfzq7mP;cv0*&9S=SX1Zz97}} zLCf;uj8+~%%A4+ZMz@r7fAz4zg6Q^VJK}kD_TQfS7f$4M+^cpHbsFr~;2XOX1cF3cz> zPDI)7q3wX@N1R7LVL0SgtR+h?+Wn3Nj8re`YM-bhrqg~L=;!0zG%KK$CRVaw=WhZB ztw5|gFL4hhV1n2n$AEY;NhL1_}8Vbjc<`DN}s zhX>Uw53;rg@4W^ca6EKS2ylnfrAtCF&vkWsW>Bx`!z?#3sm zv1mBbMIsoJGF$q>K+nL2LSmqg)g8xY=x%`_I50e`7X+GsRfp^klpv+=r;{#^%ScTx z{ZChPXZw{T3HiZxAlkRZHOuxdY`U1@)>T%M_6&~NPK6P(4p*7edRaoM?&)zGcQwt` zq?E+OE)Yw18+E-M7SB&If+<+vcG3?j^|sh4nyBvWk)+cm<%@qb<-9g|4I}X z0l)rqw{SFqY?KBUR3FyY7h|uq1pO?nrfut!vxKwlO(#BMxXbW7s`#*=Y0 zqd;;Qsa3{?U2b+baEx0-lKA+|;=?n~wkJJtMy0Z|mWfKvcN}w-ZHW47$8M3*L3_ zq@A=3DQH}r13{Ynm3`8O+d8kEZuxgLyW-*OrKEyCSL{+D0cg6pY=f3j*Ba55PmR|@ z4gHn(p(F3zjxDHx$)7tAv0zcbf5YLhI0XC-AJbbSQiiHJ%S`IrR7Yy^ok?R!_}nJd z*qb}w($17K<=LkAAdZm#$zfi?Bs(BIO9An;Uq;8tp$`q3m)|e{6C(-bnQ)yA9$j_J zw;ReEOQLIMy@-ab?VAKRlm|(wK=C*o*`kao4oo-iBL!#|PJ69Nwr$b+mx{`OR&VFl zlqYVy7Qdc2kF9VDBuSI_14wcyWI%<#^P#&dpX7o6!0h!!#u0~1qE1$7PKsUx0_GW` z@mp>Aig_hU=b9?k0sor>w)ULW)|I3agut?Xmwnr}$d^@u39^#w-@PCsCieq}ygAxA z4P>uyY%5>mKFtmd5cP|z4OHLl^i5M!qG8(D6AA-7Zbj7}Dhro|-}QTS_(T4dYi;o_ zf^419nmS-^`sEcSY*_!l0R%y){yhZrI{^Y-g0#{EHMqRzdRVesehbe;De;D2?KFsv zqn&*>wBS&OPXPEKA;MmT8Pqb}4tI-3%GNCNlc#HL6xBK|xA9DIb@Qs9;820@_z5j7 zEnc1F@7dqrUy`56G(|_PABgJBb|z7bi-11WZ{}LU)j*zV$zC4wHD{7jhZ$t>W@wNl z23LDU^fk0t;6IO1pb_Mmf{vod{=cRbXy5&F|HcSN(w^JP#nEM495Ij1&i2~(pATkx z{kHKxA6^DdNg$=}7QaHmHtBoZULsUGWrEeYlz@Q;%3vhon6R4O_g-}A9~!b52NKKr z8#A@%o@9V72oYf2$Xz?5@3~pB@B{y+>zBtzi}X?GYaSaH5-|?SL72dD%m^I^X6DAE zoWZw_1*$M@Wo@nDIL!(Wiv`Amb7Ez54vjeT>RlZ_KGDbw%*MWL6|q?P+;B=P&-=BX z(Y-@u_n+CkaDm1{&FVjA^q=NeJUv)GUWrccbzkE9_Ik0jFFwBK|ESIzo8fyPJewi1 zpP0^+gAtPUivgE(9_fszsHhl=fE=IzHi{Fob;NBU>aV1ufEO-EMg|VhEhA4Nz{L!o zF3nt(Py6{>Mm4^~EMBAW0(spYj$`k+&9E5lYg^bcd+$w`36h*8v$i)7YTZ&!{f$Yc zGM3@#HW*xkr(!s6UjVXYYq%BA`DfTmF%wjyy6+DHA#(vr0}DuWq}-I%QU#Ql$23V zd*3ixAKVE3O50+94;LVQh%Lr@j-^}L2M<&}wKTM<5-klkov>Qlpn=8Awb zwX&1t^BtO(3Gj0;Aft%u)@y8I`Bk`jqaVV3Iat7})%>ORjvA`_MQLI|rw;??(ZYGhw>2+T zwwF#|wCyI4lU^P_vfpO?B&%lYmqIUS^%vB-P;_w?w-={PB9zU>S<06JeUjHkmLx(3 zw#vpL@fWR1S@zY85E#HU|LRqqQ3<_R`Gt(N#tb)-rq-Wa{hnL;?bF`UUeIH*9%=g9 zm-#_)?Os{c;TK91K1WsMYr4I!{uhLW0gD6b_CgqNmNGh7EiH@{MrwOH48Hb`Oz4z% zYZt~4Qv{XNQst2q>$LsJ>c3Wx`Bc;rX7~kmPM))^u1*)C9+|#FfTXTLDMMThLW!I< zuVO1ZPEAgHCP6?_*WHsjG-b6^F#%Jve+}l3l%YKbj-2(Vp66&9cV^P96HjVHP*9*y zVNB!kWlKKhDOB{@9eswEciLGx>Z&WD!`(8{ogG1X>yY#0EIn?5J5pr7hr5MI!}1GB zc@N9c?#*9c=8vH&Fu-d0;$n|`Dcl-nTFTvX3;X?USOSxt6#}ZFv+LqGG_=oFL9E8( zbV2%;t<7&iVa7Sy!#%n9ylE&n*|Lnd=cI4K>(Co<&HD_|v1D4Ys50@f)%M0Je6fBc zg@n&{k{=d$S!i!963Zi6nNZZlY8)NcvusG`;Bhty^cwQosJjmh*huN@v-eP^CMGWNGfJW5wvRI_5r z&&9@1r7*E66F&B;RQ=7{#X56riIIql2DNRrqc6uJvxn?+(!f~4F?-h*Syu}8c55MvJ@`A+BGd)6V_hF zCOKrhKXw393}+|aJRS9O%sgsJ$-0DAR?q@KA?0a$ZX_iE z@h#~q=wU=dVK{$T89O_X-zf&{md@stNNKE{CEX+?e38!dtBt?|)N0+W4H9Mc;&DUkG#jE~XEmw9_2v2G`3l|d|4;ucrF^=b} zE`8`j0hnszcE|NJrs+qS!^gAt9p+ zSyfp%{2cel4jey+OSI?^!Z#^l_*rZ!!uk{$<~b1P=n9I@30&t3>nlR{2gSwqk(q8}JoNJ$xM z!zX!&z5rlpslfq%QdAriH$1I3iY(LL_GE27oUSb8!v_tNlU(WZ;Yy;W1}K6G+WKcb z_}i#EDMDK6&2{JydAXIJ|4PstIwFr&Ewh8_Xc4>my$*7t$`WYB7;!&b3`jG@=j}L-`$|I&rliPsQFbYO2v|{e4_epBDLXwgQMWoCQ$o!kAbat+ z24r(ryQ{M1z~aP`(?<(WVDCaa3AXGhO6; zdlq8%O+5hA$Rh85_wL2{&!zyiqW4~IN~kWM>!JBURbAz`K_-5O0PIr35-OVJ2o?qD z)YSofr4|>~2(--8uat;Znw`!1Aegsc*W0r;nH&oM;Lwkj{z14=2EL2SRzCENd2m!7 za(RjxHsDpG@W@mqFMAEb6N4dIHM^ewHn}g)j&WHw6tJ3mp@Cu|reU?mEo5iTpjEQo zgr|~VdKJ6#B-KHp{vgLLRxqKntw2zYucxlw8TuxSGwk?HTn5F6T z?i|K5s0y2K@W0XhKo#=nS?WDi%3t#$O9n-E^=I!^gjAZFmFavz?DK_-0T^4oSkjDm zw#$1Ls5G2=c;VDEzyluqnJPn~owu@O$Mm}LLXJ34a+1(N@xj#H#E57eLGrd6tv9JF zoh#|JtkS_oe(QJneeU^jL6Z?#TumfDrFxLNq^GEEYhl++-{rcKT)=Yh%h7&79A;|T=Qm!r z+sS7yj=B3xGr^Tp(V*eZ%aNA}0I`UZ5!Ixx9EOOGKp(mPpk>Ru8)UEuyFm{4Bty(PN`Q<~kY z)oHhwr$2YN_5#!Vz@FZ1L;QN;ll|6bA6YP7)C{MhKDt!xRS-x72c`_q6;6X3Yw#&p z=Er@WNb)u$EJaBxFCBqy-EMhZZH6O{$mp6Xjt!w3d?OQBW(#l!#t|W`x}sD*7jm_!y~wwJ z{^U#SGfH?BA_U8W{@bcK(Oa?OBjbu7$dkM#r|o^``4&81yr-?DDcbH6QktqNY~Ex4 z{x4jauw9}ZX83N8nZGmjlwK=BhDi7PFUSd?O#A^m%i?%` zJ_xw=(5{LHN$k3FWbW*@fx-e#EWvcwb87kBSY`nWuJO+#7_bhII$s{xk#*9mim1s3(*!HqE*etweg-l|8 zhAQ~9b~~_o39kNCNqM;lslw<=O!=GsJ5r|;o<^^V(9eU<5M(m z_>(NX!vB$Fn~Ox9n}UhCsYKWKKKD~2NABqMt9npUYb@6z)1Bk6I{~Tqa0FUn%IJK6{q~9#QDp&MLsStLdQY*ZSC~ z_LSvje$3HM2d1R^cE^@3_2*q{jQ%3jF?$>4I~!w#CqK+23|tJ3h0)IyvHIM_-06I6q9w-WV0_w)a4Non$uR z&e*jw{TUe1vHC$oefp5j$}~?$=5AduB|Ck|6P6sFIwD&oYla(XW!Obf0Rn`*v}pi9 z&CO$0WHQglTkerZZBE7S>hK=oFFg!Ay-ec$L&dKR@qrlm5nUeKt6rXzq+fL;_im7V zw#vIdQDRbt|ETy$!c}95?@OK2nEr!2@}~pM+keL_ry`Zi53^pVZ;zjMbVp6DbHYo( zKug(qaiyUkqt;(pkSyLvZxwSkxpezJJ#kh;L&x%}Z-zlBgl)v4zG9RTT{4y>rc^UhXM5h20t2$MjGQY2KSkQm@+CYsDoil4K!9yd8Pmu9f|$|&Hc*G7^F zj@wR`P6{`Xl}m5sgHGP9UP8~NMo-)L+N4pY+0aB=hc?92Q$!j%Gvt?KN! zGzA0eaaxcj`>_5x6=ZMxpSr#>EUqS4a}pvDJVAp68C-(9OM<&waCaSKNWuhncL^5U z-4Z-FgF6IwXK?o9`*!!`iD>6FDEk=vOmf#CTyjk-#8oN!O z-$y!5ollOb&LJdI0&EEoJlqdlb;O(-ll=vQpA$HV?AO-bX&5p%0J} zw+2_&bVA`HOvG+J0F`;}_5k-=Mft7)vZbK)up~C+JhE`cHDbK#?^nM2jgrOX zl$S2PHNohDvd||&Wp>g_G4Z-RE0q&~BUO))$TYM}pkL!eU(Lsf%vP%E4YMJ%U~|I+ zASL0Lv4}Akt_1@5Fm^NkXV?WS@t=K?LvK%KN2lt0Cd8>|=sgDSf<=;G9>#D8P-6^X zVEFt77#Pnx!?Psp6g^6hZmF?@B{(!kA&A`*^8g*VodPrY9}V{x@960BpS}E?PH>Ss z_p@KZbMzzX*tD`@zawY{B}^+nt+v!1-|PBWvNXOboD%aYW$#C^Rq8ck>i6C-sM>Ok z2-auKovShz^)(t4OK%7Pwyd>0J_{MSJb5UQH?NKJQk#NJPVX@EKM>+~;hn9`2>6s! zCYQo7L`11G3<9|dNA`&;Pqh_>%ap9F6F3Hh3j$eeNJ+G8XsTfydcUEl9bwiV zm^w+xLIU<5ywp57=Av(NO8(`0Lbr7gDv@yakzq)RJm+5+###3w=v3U5=tCP8xeAeJi-&a(O#RPqpdgQA&D-{Nrk?_$JKp4T9VZJ zO(y|Q1=+134`o$*-33o%JjdjfB%@uRn-$4Rl#=GFeZIo!bK+} zya?UhATvkvl-4*T^zWU3i98VV)pIi%q&t}mwFR|UqxJ4McedVJ5W;WLq}8MGzY0~p zn#Ra3wbKx6X+C#ESZc0b>h8wl#-}L;+C7itRBt3Gz(8ZB|MeLFoaE7|*C7b&449>*@3rcXa}^iAp5?}h)+tldl=D}+;CVo*JOXKBt` zbBaY-Xm4pO_jb}JnXr{%xCqT~Q(&#BzX)}N()LBjn8>i`-k0LkS!oLFc8`yss|Vb7 zpwC=35)%L5JP3E9e8X%F=^oA9!h-TSHdLsnpxgjbZC&jpaZK4X0DigppzchE#%uXy+xD(q8KY@E0`xxMMW8MiVci>+N7AS-uG zbdyTSB!StT7Aj|sep-Dv=R@)r6?S|FvhM1UcG4}Fm-zPA;N0nv6wW0!3UU=0+ z$XT5y@j^W4KVhx~YMQZ$oHs*uQEv4{jRRygrHoW>`Lrfh6R15W8jCa6 zOYL5`mx2}F8ViJ-9?p$Ld0az&BkJUJRJq^^pVO1Cc3sRc?dr$Xh3hBH5O)J*$LnRy zAwsbMmzg=6eNycBNz>1nj`iFwMfcB=bU$of+4_Q4`3B5DVro&HYa@&)Ja&eCYgbpj zY*?EqYqz(WgYfH`*K8snV)lPMre~^zWD6RUh zGp{dmFaVth2vz;-*Mlx!>Kx_vqdDeLqGKVOHU%xbE2c|5GMm1CdNd_D z9G^pI3kj^dSzsOnY4V9>r5p^~x)B+8T5||L$!<0*{lofW7{1`Zl;!)^Nzpo^G8#BuUU}{glc52>cwxb8kaywRtI80 zl?cNOe!c|l)A*1-ANc&^+i?ZrQ~TOV8fCAKd+bzRj8FA`p+ZIz9Fk$_?e6Y-w&sKLO#r=)#!{o)%v! zn{sg5l?Xixv?F0EER;DT6&Rf3@fmffSmSE1{HCM#Non`Awkf=_`&0~R?L`WKsc<@)^vm(6G&0PBZ@$X3Rj`)nmu+S0Y55GQ z`p=z}V(#qF01ly4Y9=PMq9k>agCPZqSZoQAsCK7j3XPr%b>>bS$!)c!q*F&kZN1P0 zrg|kID$cbnhCp+zc}ZE=yKv?*qxMq;(y3oykjVcJu>6dhJ@uxBos@Kxl$^&2DE||X z3Vp&{b3CTupsMa9^eS0VZ*sz0*7M2qa)p=7NGWnH5JcEh#|B3a745^0eq zAc=LG_52e)6(5`?SpI#;JmlWwC#HqQ`zM(oKcZ9za?{r+ zgr`%1E;h?od(pH#{P3~MDtq66UAXfdqR(EdAZvxdl91Gd3zxm%j*pAy_s<6El*C=e)+Tqwyb5FGJSCSZ- zl&4{!J$CcwGM?r3H^6XpAwuR6o?!7jnS{KlGsjqVXJMseKN&WUjcleF6iAG$MRF9^VXDBE z-aVXvAi*0tMAvn+o`WCBG8E&}0`0i_Y24y#@!B+B->+#bh+-Ev>XFwyoD|@jS-+XP z-*}@S(ba2^S{~KlFm+Q=^q^$wWDb6FjCx#4p1`qJDD*0rrru!WM;AyZ!F5xqul`qT z>v?JjgD7{Cp#d8#4A70Iy3b1ulNb*}f0@Qh!#-SZm zvS%F@QQ`@uO{auMwDGCin`}TlzEh<{J5@{na8u3bB;@-@|4JGQCfC(L(6uBkjsa=2 z)8-+f=kbnTVLdQ(U(@F0c;8<~;2Vwi84yM1-?R6m7wop4W8vaYzIAG5oO=-F=l?L@ zuH5)Ze1X_ffU!RTAwPYeAFw=}{NlCg)6A5D@>WYLV9#^hJh+9*^f4KeZ(w-;giA#g z{yx)v`*Wt!`j_$Y=C&(X?kcO3rMu&bEEforMP~mDZ0+4-Q#&ap>avulj#+W;%sB-l z8|S!~b@O-f4)sTdFC8tA4Jv4rvxqo+`MJV*qQ^x3>UU9L@!vz*hp(Ecq2FtMr@dAh zgk?&tFDD%-uwqk|QCe+~cHHBR99z=bniz?i$csGS6*Tu)j1JXy$o4 zOf?FVy#9?W_U1J{{wn2D(5MPsW)44%Hrf=S&|xNcm?E$IP4%t#@@oqXwy`Q2{)O`m z*blWQgo|USBQ)xNfY5eecK5y=#p+RGQsx6xM!SITc`{LPsspED7D_PccrbFXqJ)^< zk6rg$ZG2IXLEc*PuzGkVRo20VkpyEQr+$kMjN==l{r*PDZ^$iQ__#7Xw^ZZp05E43 zbjSSQxmbwpRR)OI+VCnm^_8v9Z(JBYoIZx!eE-irw%W1s|H;c}3&i#w6jRlu1v`z? z<5dkGXZyteE*GA_*ZY2lrGN55UK9rV7L`(z65#GUpUC52NL97|LC1>AnsBiX9&{?i z+}5zyDfizw8t3+hVrz(r)lVbjN!cGC4;CSM@>WPMkGB>|;rVfD^f0QpT3fe+c^spU zl>97493e4zj3!q5VZLv@+4~GAoU6mY+EuD*FuYv*J^kU^4^(dUi>dvk(B+Z4=<)Pw zfi1{z`4;cig2m)g1;6yHE#;m)7y2Zf_Q1f$9XwV4GK1Tmc=7Sl^~PGsc42mp{yI!w z&7&*B>aT(uN_1aq*P4=ce>iC~hAd9(=|Js=6RW6f*O>K*{0vx|8|C--AeD-Lz#%x) z^Al5-14#5GO-*x>!)oEwzY|XBMmpF;2?s?RZ+$fDYXV2RPXp`(YGEPdA&HOZOai?*&?lzte*dYvn-S#EW5APHNxJ*Wq#COLd476tJpP0<4x`pZ19vyFp;_?T! zm4fY>Z$hz!J z^NauSf%Eao%k0MAIuKN6GrON5J={A$Ec{nGG41p)xy)j3-+L++-kH9>x-2GO)V_O^KkP;PtrF|+aA&-lxxjZWI4A49n#w$ z#_NhXgKbu8FP0O_NMn9JPkr-}I4J2zFA+Jyv;GST z1WZC6p6)N1AS_WWc%>=T>>0wol;rX?HJsAsQu%RFBc*B-gMAb_b154>AsKn9ulMLqD%)m{H8 zr^}WCX}-Oa@XrVu8%TF1jul*(-QrF*oUdlK`%0EB#6BGjV#2ohv*Qb%o)|KAyW;&j zu#};qsmA8?%j)g@65{V=*`k$H@ybb@K8}5({VItVeu?zaxx&7pL>D$XGcAQ);Y3zj zhT*9y^D&J=n%2JJ@$N-N{Cf8=YgzMTtHLwy{>(U7)WU5& zS+IK8+bIj|GW8Ec2JrCI@-_jzcTIlx?hz64-e=Brx)JKV?nOs`gtv8Su1Xr)=ru>Q zj&2QJx$GA@#x)em1jT?x)k{8LdD_KvY3r zxSQaXz)&9!QhTkbIUO_E!1oQDPFgi1tmpP~yKJ`p**E4z`F@5I>D7VM=;!Sb$f!#~ z4p-YlC-OZ*juY9)>dV)f+-pBT2wWpf!*&Q{FA7m|5L}wCaHL+)QmEl@T)r0mJhX?2 z*Xg!0wE3kMtH@QS3P#9(YLxu{q<#!Gm^ysDAsYa>} zL{Dv59Za8r!Djfg_dF!lx$5_nBFzoTWU+s*F;O;O>f!vJP|jn|FytlD7@R$0rK^gR zd|hu6GEI260#O|l?&=2EKY;_>HbR-kRd?VS`LO7Grwre_&1QfmXWB38il5&poHq~p zh>gQk{2uCicU>Y4+9AwZN7Z1dk)1>#ojg41ys|(3>*W&RFilbFZ+p zio>goA3p|qrxgh*_aiD(dq)Cr3TpSdhX3T8J=E`Xbi$@jSS>V~S=w_CI3PjT#ea$p z>*=)Sw9ER?pQwR7JP^SAgO^&@$hTM>*R|LXPp`noQGJw6Aq%;auf#V`@^RtC5?3>d-{kOH~qbGeO(D`?D6i5#lClj&q_G zge2E3y7DEGs-A?uN9{U*|mxa4ZJCEX|xhBTBDyL6k>BTuyeCWz3 zq1&{tD|Ls^2`;T?Kke-WL)>&R^q`japY)5)J0>2j=B|d`)J5#$vssN?&hbMFCM^yc z#V>p`Y|}+ zyU=4n$pAa^3w^t>`ZtNd;>8(Y*m7}proU>W9=4EH&aHSXDiOB1Sai-|DWU+0aH1s8 zFJ*|l`!R9Be|#OiY~Qguz%kojEl?GM3V7XL;rME*kcPzyqdt4OPz}?0>EY@)!LvK( zGO_G3Qe~<2#|iKner9{e2;7SWc(y>uvcMl07{~(?nY-eh5Yuo#8EEI1qaFwb!%SG+ zJJ~)Uy~;xd?i+BZ7}Dd~CozffuR;QS)_K}l0X&JjR2|nMv?<6aA?B)k?wHC|CyRvf z3JBK3N|3Aw4=lqZ%Fm;I!141ge}w1l5^*(t${+!}FS9u4eCqcBH${wM4PJJE=XS)8 zJj`Tf|9ppHd!L(Sd2}SOhsxz78Ok17w5()VTt%Jc|JN1wjAQy@(|Q zE3#-IL*#vK%+%o4nG>Jrp%<+PIcVYqKo|bXw@o|{BTN%*c&74|#o%J|(cz-EX4#^G zha(X4-w#vJ%5N$Y@8zoBqOSvR7woO3Gn_e;{B&2S#Q6A z3nIc@di{;oT%3KEX&#P#KGWw40m3V-`6M#}@)}C@ng{o@FE8P(vwq#OVdzXKRX_~@ zVOCd_oqV?6rQ)^!wV*5NvAns}n{@@O*Qrks2*lZ0Xjm9yo!%ECt-9zsltd)~-^?hK z#>O+%BXmTC`Y(PP5J+i_&Uqwe$QWD9s@k|d5HsU~YP{e2Ldct+xgYpGR)XQj@hW&O z%rrMcAj8r6R#230h-=IyKZ5NU=<{2JFWw1o4{6OzZgv|DzwIDuvTJnjsXS+U>V-iG zLO`k=r#3p~`ZMmTLqkQk=LW60vylphDXe=pZ&3byz2q93;Ke%iub9&jKHz?(`xPP| zpl$wZCj_LFC8LC+ekHy-0s@)+nW$L%g$tC9UWgH~kFB&vsN z1Dybx3-?WeVuqah`6Ko#faJCU*v@cvje)U8Uzh|K$^8A1o`PhQZCA22)RX%9`gCSQ z{*DMBcHxz_@S*m<9ilDEDkb0Qi-^)O(yM3xPnkZmDLGC>$K|v)+Bd{1%I2w1yoTb; zGx4l1Bb^BY_I04tx&oAKrX3Ec z5Eyqba8657H9q+6fvbpC^lz_Z>uKpb56PP;PqN6qdiE5D4+K*COM~%E$nWOdN?5oZkeBfQuVRLPvgI7M0lWH~cOPe^unp z*vh1(QhrBC&rJQWw>>T14h*0oBLn(#Fz+jj&Z4=Ihx^SM-U#7o<$>RDvL@GB+`meq zNZkOanq<3z!mkXorM0y$nDdXoPTkSfth_014f~c9(VT5yvsKC^3j>(3J5ns7t^Uxi zR32NVI9~ka(tR_^>o|U(Znh!cz_QCq!r)yUJq&W!scIV!p|`1ApK`E(hs{;@Gr+~G zLC;+>nNA6v#37=CmaP&>Kzg<0f33z=j}F*ayL^ znn@p!K^yp*+eIP+XzK`wHm*^y5YTq-SY%OUss1?`Kpr52cRmM!q6~2nds;I8oGT6I zQsoJ(__qMcmdcFKDEQx#StIFVGeMwdqN^|dqZXAui{sp#YWomz0zD3F)<=S2FZUFf zmx_;BCr@GEwyz?jK0=xKJJ!C)INus&QIt)M1wcx#`yCHDfNaY^m%$i_i7r#>OMF;8 z(o=7ITv);X0+bYlJ6g@ihX@j@j_x~KJsOnFOU9SHqc>fRsza|;Ybet3GB4d~)r1<&qp2965;pc7l%vZM$aeFl>t7$sH6ixavswcAZkjvl+#p$Z$e(F(WO zowjbQFbq(ZkCAp^w8r9@O3b+cSE#uz59H?R>gJ^_zt45Mu-EY`Kc*02E{o6cUjQlz zkMdCu6#$xm1FkTSEHyOa3pC0J{rWGr6*)RP5Xx;+t=wRlkB8FrO*na6e?GST8vUF_ z!qDR}`d!KQ2`G+%vH0B}z4x1{LsgBwds&50C{k*1h{Kh~=rm{L;m)7yf_KttSGp(F zIlmnE8y*%kE{5XKg>+52Bzzq5&W`g|_sRDnzmcYy5E5-4t~>D^*av?zsPxwuJ}U2} zShyTV^8b42HRG&o^#oM(_Cp3H-MQE{!u1wVwRgdJdiS!mWejBt+c*sfiLUtKM}46h z4Mz@+6L_n7-E+Sff(V0lZz~7BtB1qlKm(eVgpX=sic2GT-O7Fn&Udj`6!9|JU(bv- zALF$=#cv8+s`&;wU_I{Q`g%3WCP3wCEq1oH3Y1=!EqDWIPiY>J_+K5U=MCH@4ciC~ zx?C(p?NzcokmM-_8%RhNd=yRujH!Ycb4ESdijGr^WlWCVLOUCp9#OxUUT^l5dYe z$GN{dpGg6TUhHu^>i;*SpOqQ!BKPK?6E+7vG0o+PG zTfa++A(I@^h<%kE6Y;!Ql1Ma~;;%}Cz8648RXY0ygZ14^W{LVM3H@ErMiGYO!DvA^ z-8^WiMEFhS3G;o?01E-L{kUcZ1PqqI*?Elh%l*+ZA5CrWs_9DQ?m+BjZL-uQKg@Z5 ze86o0sWOehi=@uBKM6&3VOsx2!mrnB{asV5y{B*ic3IAc=Ees$`+(&sc13o}Z|)`g z@^|$M5U6KD*eRXK@(r3Z>BDRv?*{`qwzH3`?x=pBuH>{W^io_0dl_g&#VlagXs0L) zj6}-0kMAN)=r~*?>3K}Z#C%_zJ1-KSxm|p>_(BRbD&&}>%272`6aD(2h2|zc`@3Wd z7c9KG4=}7Wgu4;3n_M(%exFl405tw-OZl(S>bRbl<2r(O2iKn~3%Z z7U>|grkTmc4{HuwKaA%VhrEM$e3tkiHu&j2_k--Ijb9E+d6sKQc`J$dSe99r(&VWK zov`^6yuAzVE5}L+H4jN{hfnEzm*oP>=YY%izFX}>4eHdp)*IJ`)j1d$Q;vEL{nW{9 z9ct#1-(y)aLhYuc5dE|Bv2!zcgF_v5T)FL@hwTXOLCc)4wv4~V#!e;*RU;xIa1*2< zkhl;T_|Gpr+1by98KgDXCeXVzDdJz_6|1yKN~EQN&8L+&RN}pV-PMtkXcN*BH$fCL zy^cEZN3?Kl8^^kH^M&nVQWD-GC=$8>Thz3J0lNcS%}~+sy5n>_(6uZhn;FpOGdlT9 zwQUn3tNwlo#Em$KDTQ;;kai12alCBdrKNW0bhl=s3QDsmBG}-~{&^B!(TB%Y1VG2U zu%J!jsnO1u0hdk(W(D=$Ty-J}$Xiil)xO|mgSm$fD}IHq#dd03(gkzcwGStS{e<79 z($8)a+ML89_Jv-=3G5X{O-!&IvAb;MILSsxMIP4;F}r`RTd%+bv@9F3| zKR3+|9=MwoMAw!$-(9%Dy)~B5@uDVqTR?WZPTW#)JIxj`dq4trdOT_^?EUqGNP$Mi z)0U3aS!acNChj^6-ynj`0k&ulcL|`DG>d82eriA`$%{9`S?sttpl1M5SBiR@W&cQb~xLkK7Wo;3^}^1b!@iA+(P{HoCVSS;c$M$HWuoB zP*;DSaI~G@iw(}y{!@EVHTm>bAmAIuT5kKYqiggwg|$3Q9^EZFgj5Vw+}PxwRi3iGk#b3RmguQ1M<)=c!D`wa?RR@)}5N&*WB&iHUi< za%#n8h%+shnL@8NXmxGvIFh$r6pdF1MvHV1?$C(+KHfpV%vJ>Z7<0;)z?fPjj@_|< zw#>%yk}rHGCNEi?4If2$j{~BvI@A;=fG@XceoN8#kh$mnc2k0xsQ#0g;^SwgagJCF z@8MTXOMYKk->MSGsw}rovyQS@z9IHSN=F>2fV+NB*GD(tSALV0Mjo4*a(8~~^M3m? z{2O?c%gLav-J&BW=_Tfo!P1V?_Gwya8%7GuvBc}#wvQ6VQOPmF8=IndN~+hM{`S$T zaLWj2ans6)nB{TE2^wl}Wrugd?e_4Qxo-l83OS76g7i}>`_)6>n7pgsxmE5*p>gF{ zhnuh0!K?h224Mr{Co|g8bY8*Qt_-CiNBb#gx(^5MOMBnvL*aLD6Ca{?MMRnMe{ z*P5M}ym$yIP>hH!_RVk!x*OQ+-`Jel-6q(1+F96*|E>!k+nQ6p3-c>wxwf(rvm>(0 z7j{5xcgm=w*a;!cXzkl+ta+Yj?6x!Qz7-w%`#zA9_TvEH93fj#k;QKhGXXov14vUdL#o(W*cKMz|Dlt$tg%{ zZef;e!0@w8?DfuzTfHLQEXi0CP;?;)cU_r!)d7a=%Ej*D}y$T7H*ewXd;U zD_Ri^jiJQrcZ`hc&VChJ^;tom1S9aw1R+0_Nuy`SzISLowfJ(zNNJX~n> zu!+?CBpzAX?xteIvnI*>mR`Uenh``@gwtKD8WQ zcfgd)O%H&AEmZc`BQ!jb_FWXPC?FYQVsG*9HQ9b~he>)M6S(JT?&UMuZKCtzgPIydTZ~*@ZKD;ZsMdDc zXW-hU+zW$~+@WKni(sZ<_n~3n2NmUEN5l~R9$&eAfj$Ch{0W|wx3+d;Qvy0Ct(e6^ zlPiC04HZnu4Q*F-^@MEu^2Pf2!4C`L(*u*BSBa4=-7_6cClNt?>xcJfxegS}0ys9T zLJzbuvNu`wrju@2>bEcGxIKEHS^R_vO$Hg-Z`Rf|9df(A%B$(r)qDBg490$0W11{n zz&F}5Np(LP%}t_z&A)atn@X+fQR>Fru*~RHVb+myyu}>L!~^@d)FK0Lf_DI}$|_Iv1V2e+tA?|0 zKWE10Y#u>Lo7@@5$yf8w6tHMBXmz|}t%Q}$Lt93IdKk4ak8un~R(-4KX((lLE@-PXvn<=YoObP0B z?LRXJU|hajrlAY{sD_<~5?t8xqhq!YC0*RfGs5iAnK`Cn$uV|qV&o7RsoI6XV6+4C zl;^$07G{&-3cmdI=e#yNc8b4{VPOo4m2IS|?54KB;4#@+61?(0@lJ5PSqloX5RT05 zZ~3H>SLczO#MN(oUX5r#f0AME4Yp-UJ#h(4o;?3xG?dCA2>p{3JN_xXiSM~Ig!!(- zpq1>7!!|tKP2wHa)%A5euYXP&nd)f{tv=MASyMfpBBmy2gVdsA`Z>>zrUdDjl~E{O zcXLUmb$4-;$x!&xv2N0kjA7s(Zn5|~V)a=Rrj5yHHcj0&hvUNBn8!+*bMF+SbO^N2 zus=yykEqgS<(pVxZhl^VE)_nS+Bj^gxS(89QE?~0DBB!cQ0P5kga}xj$Cewh_qcq9 zKEGI?Z50|{5PLJ8+x3IuVH7vdG*vOlwmck~SCY#p$RZnfR8t(aDPnbLP=jZtg?oMh za9EzB-Z=NaXA7$+L2sCP!+n^fMQCGA;D3UF6y!X literal 0 HcmV?d00001 diff --git a/doc/source/_static/schemas/02_io_readwrite.svg b/doc/source/_static/schemas/02_io_readwrite.svg new file mode 100644 index 00000000..a99a6d73 --- /dev/null +++ b/doc/source/_static/schemas/02_io_readwrite.svg @@ -0,0 +1,1401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + read_* + to_* + + + + + + + + + + + + + + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns.svg b/doc/source/_static/schemas/03_subset_columns.svg new file mode 100644 index 00000000..5495d3f6 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns.svg @@ -0,0 +1,327 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns_rows.svg b/doc/source/_static/schemas/03_subset_columns_rows.svg new file mode 100644 index 00000000..5ea9d609 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns_rows.svg @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_rows.svg b/doc/source/_static/schemas/03_subset_rows.svg new file mode 100644 index 00000000..41fe07d7 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_rows.svg @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/04_plot_overview.svg b/doc/source/_static/schemas/04_plot_overview.svg new file mode 100644 index 00000000..44ae5b6a --- /dev/null +++ b/doc/source/_static/schemas/04_plot_overview.svg @@ -0,0 +1,6443 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + .plot.* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ... + + + diff --git a/doc/source/_static/schemas/05_newcolumn_1.svg b/doc/source/_static/schemas/05_newcolumn_1.svg new file mode 100644 index 00000000..c158aa93 --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_1.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_2.svg b/doc/source/_static/schemas/05_newcolumn_2.svg new file mode 100644 index 00000000..8bd5ad9a --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_2.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_3.svg b/doc/source/_static/schemas/05_newcolumn_3.svg new file mode 100644 index 00000000..45272d8c --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_3.svg @@ -0,0 +1,352 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_aggregate.svg b/doc/source/_static/schemas/06_aggregate.svg new file mode 100644 index 00000000..14428fed --- /dev/null +++ b/doc/source/_static/schemas/06_aggregate.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby.svg b/doc/source/_static/schemas/06_groupby.svg new file mode 100644 index 00000000..ca4d32be --- /dev/null +++ b/doc/source/_static/schemas/06_groupby.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_agg_detail.svg b/doc/source/_static/schemas/06_groupby_agg_detail.svg new file mode 100644 index 00000000..23a78d3e --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_agg_detail.svg @@ -0,0 +1,619 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_select_detail.svg b/doc/source/_static/schemas/06_groupby_select_detail.svg new file mode 100644 index 00000000..589c3add --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_select_detail.svg @@ -0,0 +1,697 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_reduction.svg b/doc/source/_static/schemas/06_reduction.svg new file mode 100644 index 00000000..6ee808b9 --- /dev/null +++ b/doc/source/_static/schemas/06_reduction.svg @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_valuecounts.svg b/doc/source/_static/schemas/06_valuecounts.svg new file mode 100644 index 00000000..6d7439b4 --- /dev/null +++ b/doc/source/_static/schemas/06_valuecounts.svg @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + 2 + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_melt.svg b/doc/source/_static/schemas/07_melt.svg new file mode 100644 index 00000000..c4551b48 --- /dev/null +++ b/doc/source/_static/schemas/07_melt.svg @@ -0,0 +1,315 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot.svg b/doc/source/_static/schemas/07_pivot.svg new file mode 100644 index 00000000..14b61c5f --- /dev/null +++ b/doc/source/_static/schemas/07_pivot.svg @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot_table.svg b/doc/source/_static/schemas/07_pivot_table.svg new file mode 100644 index 00000000..81ddb8b7 --- /dev/null +++ b/doc/source/_static/schemas/07_pivot_table.svg @@ -0,0 +1,455 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_column.svg b/doc/source/_static/schemas/08_concat_column.svg new file mode 100644 index 00000000..8c3e92a3 --- /dev/null +++ b/doc/source/_static/schemas/08_concat_column.svg @@ -0,0 +1,465 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_row.svg b/doc/source/_static/schemas/08_concat_row.svg new file mode 100644 index 00000000..116afc8f --- /dev/null +++ b/doc/source/_static/schemas/08_concat_row.svg @@ -0,0 +1,392 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_merge_left.svg b/doc/source/_static/schemas/08_merge_left.svg new file mode 100644 index 00000000..d06fcf23 --- /dev/null +++ b/doc/source/_static/schemas/08_merge_left.svg @@ -0,0 +1,608 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + + + key + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/spreadsheets/conditional.png b/doc/source/_static/spreadsheets/conditional.png new file mode 100644 index 0000000000000000000000000000000000000000..d518ff19dc7606fd55be29c775a38a58a93f9f2d GIT binary patch literal 141383 zcmaI71z23ovM7qXyL)gLECGTHL4rF32m}cpT!ICd5Zv7@IKeG=7~I|6-GT=OX6BK9 z?|sgB=iPVL`ubbFx~f{MyGy#mHPqyBv8b>R5D;(`6=by#5Kw{;5D<4U(4JG&>25|4 z5OC~lWn?rIWn>sMT%D|J?JW@y6vC5J(e)D*iNF5gQdCf|3D77ET1SaDE)1Gv!cZW! z4`PYa_^5)x>2agcRUgUz?nXy;YKEAA`XeCJ^miyK>7F<@d&Dr9oU^-M9AMINL&fj2lr&& z?Mey(K?&cu7{sK$U|OW%UyMDCTir(tTxJNkSJ%79 z{HLY`SVfwc5w|^5>%-Y3m8P7#ye?MuS^4vEU*vps*AS&Rq`h_OhUs4J4K3FAe??r1 zp1_R^vBwgX&LXESn76hHXA+6qa(q(~fhD3C7tH`58SEyWdzD#~NHkkJ*~-n4Su~%! zqm=AIT1z+RmT2$^?i7R+vkWZOWl>QIRtd1zW>*Riu0UPVP06IFY^2m=u^7NxRM^9e zcyeH1!|09Al%;^rs}T<)idfsCJ}b21CgH zD0Te&$aY)U-QBu&13B2eL*bu*zY@MpqRbg(F%3oPt?az})~*n|j#XDu!KW!fuFml_ zQ+tnoB7vlr?Z;Jbq-!(tj_$+eh!l=#AYUhn>!LsxOB}K+&xhL&7pb3EkemZZERYeM zHSN%>3OVI_FETJ9I&tbVNB3f;Hfl8s)96U%B^e@?78YdSQvu%LaaD%8`mbq@QZ3xiszYuHop1|MHi5RPFheLGkmgBtiTS2~$NJwh+RCFhW`RRTTVg zQf+<-CKKYdWN`OC4Ki=@f7Ub4+KucKfEkmI9f9{nj8oKgrVqP(v@-NA#NdMa)xi50r9rl~a-VuOGT&IK7-b zB{a{g0&FmceO>eDr-0iobNr;qNiB2DpIiOjMU!F_ld(IB-?0n{{a`oL@3iP@3j8`m zcx@Cau(r0khM>`IrxRP3C=JL%S8l^gDNBU^$# z7zhWA7rWEa$#{B37#AFQxW9$WP4OxMkd0!+F3=DeP=W&E8Sq#Fg!mBO8DRWrAyGjT z>@?Cq8txRRp{hX6?0i>4e2Vz^MnVKh?W^D>lMU)Z*N5gWuRE0|an}PSQQjzE;N~&+ zhoOFq6k$FO0}urTG4m@RC4^WoEQL|TGYiIfD^S)U8HC!uDT_CWml?#|6`u}SeM`rb zJxIMv9FeD^L;q6dY|^Y0Ff3g##o{iW5v}$%a`NGjr~#=`+Oy#EA?6K}53Pf|NxlR{ zrw~7yRd|bO2L*Lx&vY%fO2k;VQmtzRR(T)Q7TlNAZlJX$R)nx*M`acDe3M!;%&9M+<#RB3{9Q z;4ED7!dQiQpP+=8oLHFP4^d{AL+?N@9Z|T2iYqH>!nT5u7UGYfhLna(_bm6|**C{? zle0CG&I@6a3JU~wBz8%5r_&KuYAWg5dW}iyIZvDu>1DP(iOcWHeb26 z1Vt->T{NjB2}_A*PiT*Bud=wfIJr1?-$pflpJohe%rE0Wi2jp*;jw0Rsr|7?SypRM zYv8i%FI7L~f~iv*#s>HIhz^*Q86F9DU+2r^7F_4HZ7zRW+39|ov;8pTQ=~fIIA>+6 zKFK@HyCC`9&!9~?w8FfkXMXE=bo$4vXrY^SiF|66n31rWu!(2wIq%wa>%^^skB85e zkMFMsAH!RF_jJ# z%}X0=Qyek0(D(uU;l+vOu6qj`t3d%03SNPVm+c?#yKKTV!{R$jXKkvdJ{PF>tL!-M z813x!-}e7ebtYy@oMP2hd6kj%>WC^I$YP4&q;M1ml{rt#FIFqYES^zq$}ko<;};Z; z6iQ61d?{u-Z#m9~&p*L$V(CBHKJ(3y(1Ni}-_m7u8+GRE(8qm#uC$W0wUpu%%@o%W zo)n%Gzjt@^0bR)EUjmk zRt$FAyILIB%}O^kE+@a;HkCQl*Ag;Ve)rd|dEZ(Ce`eDZ)3DJUy+cb~JYM zc3fFK$6KJ5boIKt=-+Z$&0h6BPd;A|x1l4XtCye>H~#Ya3zoRB&-zvVm*}han{%hP zeJcZo#~)7Cj?8xwx9ZmqimN-E+JArCaPvn2Yk-&h<)0)WUmnTtfB%7Vb-rK_Hr4CA zCcI-tVQ!4}qOl96}2}wY<6b#3+pp> zatIv`wDZn-!a_LM%q-rl$c$&pXKQHdq?=w=U(P~)R@PnKM@}u)K7vVBPM#;Wb%=lG zLS!!MJYJI7I!2sSHCipom>kO9&+(ROMg)uYrR(gl=AVxa=6$udE(|fmO#sfhX z1qSbR^AZ|1sNK9~?DkOIUpaOMEgj(=f9%`<;};fnI#9MymcxXv}sw-*zH` zDZ)QJM*Tq5DaTCI{5&dD&_J+hRNBgN{8NsD#246UjA*=@$9U1`SoY(WTEokTZO?11 zgO}sNnLumUPA(IujAM#3K4sO2pvH7Y+BeU}Y=kLR&W_xVXD`*5-HnS2@YtT#bl*ME z1FE`j0V&QWq!g_y3-dDb5(C<5#l0TJNn5!h-iaHFezIKS+%KEf{ryhe$gg9y-gky- znCc@ZSe_tCA=m8yfxe(7yUDH^i{G0q`{!!VLoM$ z=d-Sd?w(<|A$F7GT1ws7DWC(a<0QOf+z1`PyNtQZ#uQW6ebmwWEwJ{p0j)BA!Tp@B zsqM4+Vou2W)McJ8lwU^f0{pK00RCh5xBaj~=oq>${w&PuaRm~16Ik?FVXdL{+{euC zi_CkOmAv@S3~FR5d^!(__MNc3_~8OH`Bb_BnoLjy+-vGGhgDH~A^em$Hk0kuhJ@(; z?oaIk^`BWWl78nZz3GY82G+5;*y5hM?eGT~a$mr#$xVm8=g`S?*Tye@;Nte0mgmxe zt4z6xo%u>wqq*L<>a1oekK56g<3A%8%&VK$6gpf^_M0=$n(mu&Hpbha5VfuBjyAV* z!t>9yFP(3h|^lG8#oPUkBx~TebgWKQk$zwol!gEaGQUZVd z0Q~z=W7%%C?YiLiKz*(jP~s%>D*dKqJ8~zrzk15#_wB)-Kg!KoRae>wfw*JCNSkAL z2sX9|lUps(0`ESP>0sr*26JLQvAxSCPu|9r;yESBy*y<_xa+ba&lfL4=wtKM!jO^> zN6UO~VL*4zX`nuhmxh)8^5dwn&R!r-f5N$FlUI%gpVNBp;29mh1 zg#po@7|O1XJnf!p(IargyJDeWgrC_l%WsNw+Ml(L6ia5gaV{qy(l6ZagRssH^&P6$CjfBqwS z4ng0M{)3GY^d0#>am3wc7{VKE8AZkCueQ0XrKO{rjgz}9K_|m=0;aQqfg1t>*^9pc zQBjNK0s#TZ-Bw56U0+q@wYif6ubG9@M@wEW2j^#O1O#!f*UwQ0OLsE{F9&-^x7S`0 zO#fiKevbc51~4)FgW_%{!KAOM!64)0YRT}5myef^NfL{JfkE8W!s@k_?Aw2nKmSQE z*|@tqzXkw2Jw16n1$muZtpWU^qM`sk0f2x2&oc&(o42F8nHP_v8}q+1`AC`QJS*{!dB&FW`UI^IwD-wqBO@2C}x# zly1*$k`xjV`Zv}8hWsBX_5YiapZ}G>|0Mk%p#MSow+*k=U2UH$YWBAiCHchx|DUw~ zjjv_t=4Ai*FS@Ryt-IuNG5?VKujGGIi39$Q%l{ase=)&7sn1*>i6svBFOHJLq7c7P zMnI54P?UY6yI6e=T@He94vxE>tcJOqHg0^qT1`} zExL)U>J^ohnERh(N-HB^8l+|ApJZ^iUG=ETE~^_G=Ql5^%7z6jOmzqKuw}iFS~&@R zy)iH%4cB2NdO$W6%tr;du|O1IitswCKD5MTlRDrKtQf|23#QR1yp*Q`y%d2pm|3|z z4N2-mm*iYp)09fLlrb1mgZ7w>qCCyv%p%{1x3*$=>EWyp61N0!d3e;jX)xM8$otFt zH!HcPM7ZIX6CAGlrmc20jkBc&tQ~2+B>f8;u(+xgA4I+>J@Q(kk=*xY zaLr4j@;thce9F_@0leBNo?%#-$$pBeE;`U8!Vd?uDHL*hRwopdXH#S6MVfj+CPKSs`7pi)v z3ItI@QFFu0J@0i{$W$h25T@h+f$l-uRg9Ist!6PSIFeHn7QEpv9+cx7;@D9Wyb@qV zcd4i9G%Ku+*u?RW+*x?>-#&e~=cnhgp&)&(N_zi>EiC?TxtM`z`x_+RV0LG3K7{_E z_noQKzyAI1;ORaq*X`2~vvG#kj-@{qvnMZI7sIlD;!aW&Vj(9G-D00&cS_(jp4L5A zaZro~F8c2$%+OA$Ekk?M@4hPRpTj#wdRXFj>61f0DnigHHRygoTlN(0LdTevOfo`e z;e{~aXyEicS2S>ps`%0~1624x{x-q|D}F!M20sJ%J@%{0^wWklVMFS^J&P*%A!Xsg zbFMd>*p7Vm!c?ojJcjc!mBfz!E{OtzsJu%@zf!zXhm?3E{Dj~lYNi6bnaTA{hKLmulq}zu`$x z)#PU{rX@o$&~t8MfaaNHV7R4KPoH(sojg%^M{m|{?u|Mw?@wSoWpphti&?J`Saa4O z0ja;+y9<344K+#j$4SuBOo!IT^-tZTtN@p}MK=|0^bj6)xIXGI8IZ;Bh{#p~(a|2t9w$~y+k20Ea>Z<@rHn8x_aW^!SbrJ15mT&wof zTZrM|=dWYJ@el&Ft#^gE-1ZOWh^~^igJ^yVzI!E2<1DBvw9U{MB-l2^`2FnET zQqakbzVho?&bdyW%GU53xjLWvb(19zu0d7_3#4Hb71MHkRGWn2>9&t}bmqyyKv{}&_O#b5jc&%(4GxfAZm;s|-uu(SSP=1T`LAh`ER-H>~ zT9cE+@O4f5A0B?zK2b78@PHmgr0rywKCXr zVUHa7vERYe@Kj__w|ixSztVO;TGzRwL3W^nj_ZR!JzZ*19N9FQv04Ih6PTfdWB$-cmwzewg4qSRiXGkm$UG)nb86A zbr?n(W1O@l~PvF zA|`h@vg5u{NP<>{0b}tMBlRWXvKuGT5M86^rEMwCm;Bt_-w~+U9LMxe9v1v%X@QE# ze;0moQl^g8b(Y7T#`x3f6PenBOOZRNAMiYq=UoW0VI%bew-1(jOgIqo&6)2d6Uy|x zI?>gZo1-CnrUmE&-E)5xEEcN&9XDpVI4zb(?d4Ai(3fkIGjp;`(hRpio_V;=!t4bd zL!xs>zs)`LsBEa_>FWd+8o$Rn7c1E)6*}qg=1U9|@3-k@tX55&-K6pZnE z#;{^-oopRAiS#E>3#9?!%?6LzG<+7O{iuEiSb|%?&CpqbX5l>WosC&|8;TY@7XqJ! z13F+}s&Uq(`LhAT)G(E`F&-5O@NW&tkWg?*?v|m*`&>6Y=jl zDA;U2eXYkpngnS0a@=V(N<4&~7&-R`Tovl% zy^+xmTB?!34UToCtUa>0`~fdw-yPmf>ZgxWq5>sx${&NSixVIlt&J~lfy(W0+YcYk z5(B@XT7&nebPR%^8C15;cYK}VC90d(&S*A=;)qM1%y6(W#gsAs%9e7_p8OYcLMQ5i zKb0Wza+g+epu8gst$cYtytL&FEOU^Td=||LesUn5=FhpGd9969kc5)Zj8X}d-2*gx z5+^&X;YekH8!@xsX_IjbOiuxm3Q)!A2J@`yx{#=%KTgnQs_48fAQ{tGhb2*5myS4_c1|i&X{pNuhhcox%g1MA zc~DljnJ5A%!Y4m%K5w&kY)dAeC2I#1ps}M!Mez_5dU*Ma!Vdl&#i_uoF^R$sfLeVs zy+2ncrj%-o0<`i?Bg;9{_0r%@8Q908$UMZ;i2MaNDTwj{rv(rj?h-Sy5yLzMAimbq zhW%Op;*=)tT?adF`wjc{R;U(c3;iy>+^12+r3hDAe$Ttx{`Hy6?p7$su z=e8Emqw>j=-ZBSuf1)*?1`GHNp`7H*!pQny&eki;v658AUY8FSmy5Ofia&52RvK{) z9e#ZlIqXF4@sapqbyulVYxownGZ34)c2;}09BB_Kuf51`?Ef}BIh@=GrfSgTh3z)3 zuh4ImDyQrp*@N;i?u!&l8l4#ULvc`B-(G(x`Ai;ZgMrwjLBx^DfLT*Eo z$b;4wXS^@$9^S!%5$~(J3k1QQoJV+?O8v`43}m}(M;%-C=K1h#T{j0lQ6e)~6JYK^ z^djAObfaXeztc(^@XDAA@PqcO37L*^hiDQ$6+U4e&Y36Nyf=jH!IutlgRgl-0IPm~ zK5Ru!fHA)O{^==l@fEon>F^>FAWqNwum>On=G84*){cCI{lJ*?t5{E30Irnq+W+w= zip0f@h?H1|d9{xg*A?S0HDQ#}zc2kP2_0+i4Xnv0g0jF1K|i2!Js58WU$Z=GsQ?H% zEJ#W(8A7knW$`=-Vi@h8a{nr=$^E%B3V`*?CaaSvk{P;#Ku>J%AZfYgZYx}26N4BE z!n2G_ALaSguLu`u8}-jU5*%QT$T?L~p`tcY_Y^8AF60Y2Dk%k39S_jju)o@&qkK&w zE|dqG7R z8(HyAo>h3sq%nFHu0Uz?;@|4~b5GQv9&_6=WQxN2FE&C7ie_GIa#e}M>Ckk(Ekhan zbu}d=7uw)`y&nu|_sQ0PrI#*JY|n8>P%rCp*LoqndP1|LZ`3Gv+UW)pur ztv8!3)iisfx@wE$TwtVRqQ4E8HBgec!CaYssxH0p?%mtU!;kb6+qOJuE&Fsh+oMka zabNW+`T`ZysDApfs4`HF6{(zV7zV?vdT~Pkf}2;HR|S4DS#o}~0B9e%$*sA)nvi{c zKG^F2-**Wxv@f43d1 zJD+B$uJt~OcBgB65_^}aVCZ>^}=jQH<5|PFDs`?n) z56tr}_6;5J4zk?vg4Qt}cC^-x*_+?5FcrS$Ip$*4u{LlhI9K9q=U(iE!jx zO}6S%AG%dfwtlwzr1myCC**J<7t9bvB_S}HDJEP|Kkk#KT@o|g_H;t;5gZLC`P_kk z&ABE+J*Zk0el=lFD9|>W$po6_)(j1v%oVkJ=k;_s5MRRD;4s5`+B#Cj`O1aH^)N3$ zo@v1M$z{>8?X24>(;=nEdntwI(L5vE&*xQR%&Vl<5)$S99F%hH|FjpXAxlsk^URx}sU5C%6B*EhEYyO}f3CT>pLW)X%^Jrn@bnU4O_Yt$$Lnt*3Zu*irdK0~J4WqC zOleOUFAxAb`=i+fh$!)6MfBrlPHkuN7b7Od9|r$9-SL*gfq5K$uP) z4_3`b#hFOt^rMQSjRirdyDI�rz$xj)+we{-XEm;$;s{)Jvq1u$c*QV#&?!53J5t z>MrKi_$Sn$*>FPUw?WnjZ6{S}HV55See6zjmRC)E>JVMUhn`N-@kH*rqFtcd{wN=0 zXDSaZa1+&Nky4cN=6p@8p&NyRtmzE|dw(Hlsz6@JWau=Fzu+ap?^zDDimPwP>Xxu* zvkMbWZgi(OnAQv^=@U0=qIQXj?ys>onS0yL3u0!RWnLQz>hcQk#Zjl3*Qn__kr!nrt(L9b+_rKNRP_1lDv_xs(%PHQr=iyqK;YB%wn=B|2>8bh*O7_eQ)2N6Jv8|q zw3n>c{pT`}&>QFHLSz%83|i=nn%D=*=X{ti>Xic_8wAi)6aN=kS5R#QNh8m-+pXE( zARrFilZMC{dK~%cTWTi?k0tMg3K*Me1`!_cluz;Z6ZSYhIs9omxDhy=i%+{HJ*{vW zC3)Bx*o{VA0eHgR0ZTrpX9$VX^!v!XH2&)I_yF)n1R#+vgW}-}-|0)FfoZj`jL6kQ zA0EvHHyPtfF5CEbTSltHn3t9}?0w@eI+t9uP^%&Dz(VEg0BnLI2 zfJYWU*&rI^7a-sRZ5j^NeOGuE>HW4hZHA&2U;wOBKPIz_HBBn*H`IF+fKIIpzljSLW7U{nw&5 zuxO+l%3G$g zec21c>jg4j{1|F4OG%kbH6(0sj_bi z)4*=KbSGj`Qf|v^xSZS*(Lk(+J38xHypcdBRKjBeSqyO=58AZ!PHR&wH!b1D9Et8%(55a|84f=?QrSKvH^nlnHu`gHLx&d2!@A)zvlWLX>_YJ6TWa&ILbv;& z34f|$)j;D{3Gh*vDT*AA4UQjHV$sx#d)z^I%Rx@h!f_+k$)7W7pQ<)qvSE2fy&q4J zHv|3x?}#7H7h8Jz(%1ehN>t2O+AX@z_-|#{RcRhse4tjOr&Kxyn}@1JNgFvFF8)CF zoy*fkUl~()@aP@yXM$7daH7+_BGgInko6=zU2>&98bqMR>hMNd0s5f3d`0v8M^$iP)rWJs=7ep}ZU%6zIZX zfjcw4SZ~37d`-B4djYgTj5LW;^B7JozSoiEuoki2bNTS)=(>rnlED`3LGlRwLRScO z&7kPV8w_r=u_V}~9-p*}^J<8Q@_mg^4>Ab50W5&T9yl~)qwoC-<2lD5d+&$Q+UAXq z5l%h)Hsq~OaZRWIT!yk|A#7nDKAvm9IJUw)%i3_&Zm zoPrN(!?X$~uF9S|2lg1YB~NRZyDzqS1R-3N?Fr7#3ky!5>lJ5q4|(oRrs_|4mbx&X zd*OFcJxWihGpRQ0VqV+;am%aRkb>9Nxqc}g3LUreVw}`wkCGSJw7lJT+5MB%)vz=6^5kFI=yadUaA_MR&1RcLWXpI# zWLZDiAG+ip09bw>%VR;)4rNbb9{WtVN2*NEP? zmXw6d8DTh1YH_IO8Ly7yQJ&hr^j`mdmde%GM}7>6Tw{f*^47`Tx*HHCHyK0NqigNb z-$%faZb`qv@cl0WD`J$qtiRp{X%^zsxJk|DlpFe8>Ft*?d#jb?J#{jzlEvltw606X z&j6;t!Ly)=90_DLa}!sy(3oMQqU@J;RusV|H(zegi_A6TAnn>8Zw{yQaWYU(ojFrU z9bcS!oUE0X^fA9FB8sC6L(k%{7*#+~s6T~$EXiy zmG{+B8h5HJkUVhp5bc7|60fEf=X48fl_xJHlg)k%fs>qZ()BMIoO^C3KDST}f~lC@ zyX@ZD-kO``?>N(YHH3ue?Or)Z5`O{WEcs*=5`t_L5cSeqN z&F(vO$hC9(DeUogqLLFBdiM!fXdDic__InI`#yn$plb~>0Sc);YasSz*}UwB2Te#0 z@M5j1h6Ld)5?-}!z$IyF;IO=b_jaKNCO(W=&QF-Hry;wE>f1S~=skJpv@8|#tH)cp zvK+!#UzxZLOelu;Cis-is@UOdIbH%;H`&CW(UZ#v6$^z@>oFwE5b5iZ`?1GsibL zu$7q9_uNRPnsSmJ+!Awb!8lFW z5%7nwF&v{ZB7(G#NP!Of09@b!M5ORx2F7WsM_~_|TJnCSgoR1sbpFyd!pR6T&kRJv z#geCSk^KeHqlb@Gf-Z-*xg(&=K%PaIK`~_6WE*U`0xAP>I{o!lkefZpqbl81_w|ED zTA@yrQ>-UhAw5dUq+x2zsLF!XXu3JWO>%l_z1~7aGPxHqe09?i8jjJCX+dw$e%rc4 z{qnihqTAoA&VMvFRhQRzZER6$?`7ZduuparPBxr_J1u-ph7C9VDhm#Pn4~@B$z(-p ziaK;n7JYImztX>q%!3(j)s5!-`eLqW!{YIBEXgbmE3lm`xW;R;oQ*u#xrE^Gx`XAM zZd>ni^)d~0<=vO3kIN-`PN!kW6*PvyX@SJWMI zTG_C(&&}2K0DpI--}m)iJlidUTAGE?heMdv&ldYeT(#(?j<#L4gF44 zcAK-&{V?!}vB7qxUsf7vGrBs}*q_5tib}+p;ao)2;PO=0W6>X-wj8qG@%ZhXuGd+z zUJoNWeU$5*`i6$~D}COY?tm0^P&8=0+P#my3gc|K5%*wAlFYMLX}%Xo#|y^>v=jUZ zi=O)hBscNp_o5>9hd%-_0QDj?Zm*$m7`bQtFL=%_h~oVFLqSX^<+*u`1X`U9EgGGK zMnXGn8*dAP4!z$T@&u$y+ZayUf{a0#o=WVW(&;&0!`*nOISeM99%cfjf3?H&Bd)cT zjfaQqI^BGD_jctdGFPAM-0@@jYuf|)YZv1)vnNm0noE& zQQgwX{;|9oD%=EpKI8yN{p)eEoPup?*`BDp)QAf<_cT5hve~l#8i>(;jL0<>M9}7_ z8j{>iV52*%5;-E|o7_(ek~p7fQs3N5SKm4RUO)a?%*bcxg;^0ES7>>4D;D^b#NhU3 zRaj+r&X@C}rQ>#7!Ge&p0h1;* zT2HJ$o6X|y^T$;V`D*eRoxe-Cp?P4LX_k6cOP1QKQVM-?PmAY4i)Sq;?^e>sIgZ|u zfA|&ozIoMzigOx#{^WM^Sq599GZRuR*#*na&|Bf4XKPcX1}x-PPU1J)7#MNe zt~!3)%RZO@_DLtVIJb+HuK}mXFK%7`sF^Gf>%Ju$UvZ2*sI*Go9Br$VruqJDl`90_ zOkSBKY7I#qtNMc9Bmg$hJP19q&LR7aT+8)w{#0~J7~CSRk|n~ALkr>S`1aYE!!Ieq z^K=mLS*kf@bc&aZ4L5L^0$i?(qmp$4QNIC)sycxHW$^YeSGTU7F=?@CZt<507K&{a znYX2Q@Drz+H;c_1{aZw<;Z4tWJou^gfz@V`!ze3?H0pBG>JxsXeD8}xs#b)SOuQ#Pik@wWR}zyH)0!qcwRkckc8* z^jjjt7@yhCHwm3Y;c`&sZ6@dygspy+fA7Y-1n=RG@J@?NX(>ph>1r`CpeY1Qo8z_Z za!YNv`Rz8_b6xc2D6j)1JctS2zOy&p_8U*|LRWB|+VaR7dcFN8Plg`zcl)(R8lRc; zVe9**NpjEJBP7sPDVCU@!@wl1XTVS`d^qcJf;mN9{4&U+(J>}+*mKSDV3JzE{UH%F z4{jU3_o1r2r`4_it>}g*yt84MqQWi;rSVm`f821wiWR@JNs9r1AI3QXZ9ASKZ=E3M z`GtnxO_D(MrzR3-xw1r1b|+oopeh&5=d(WGc4*0+yz-mC+0jeAI$+IFO#@9ES-;5; zj5h~X@|hHfs}@`|PDV?r>7o)De)E>%w`Zh*0FZcVk$}|f;^Kj{W!T~aN((f##?}uCQQ3V`QvGG?Tei^(nKC(FBeA1G{l|WeHlT3 z6T2)-4T~7hCZ>+(f6a}BtK^#RTPHq@cr;b5si15YVUzvztM<-!rRG{Q=r_W?|1AsbPn^LadBDxx28Am}bEg|!> z>)HJx?bT;0|F~v>C&b7%H*G8ak7w-+jabYTpC7XBq!Wl`tC%sB-b(i%0wx zZ8dGiYTcc4dH?8GAcFH$;LQ+$EuRTXvGlcZ<;>;JdTN+^pjI+QoW~;WjrHK($^adg z$kjwFfF_XEsn^`ZtxcpHPM!l|%<`JX+=Bjj_&ni?9uyBLN_`S|y0Hc(wx62Sv@e_q zarEC0Uc5<0iS#&S z6W%})YhAjKD*tg-%?{coF(!N=z`PL$B@{n-J*ED9!Z3B29v7)nLGTn6sqKUYf%lbClYRMYJTaD8_48pIsIGKmg(T#~Z6 zJyzSJO4%~b&(|z}(Ujph$3Ljwe5iwsbAG41YT{?*@7nt547Kxq&@)Fj%w7L=`SLsA zGHNe&oge?z2;iFeW3fvO_9Tdo`f#pIFH7aeje>FGjT0JK!+p4yKjo(ntLd{?#I#On zMW4xKP#>-%Rn70`gnIdP@y=r1yw|zv(a3@*CiYWWEW_X(i$8BOH#Y4;+}RK1$W2JR z@+6c;v#~aMQ(dp>7DB9Z{8^uf|1A36Jsc2Cb%=a(7m%DdYm7|X6gzLG%@xlj^r@JU z5qtC6ErA{BvKHlmO}*q}{utoBclLWwFHybK#lc>H=Z4ccH81$dwR5`}fM}`wD|Y9G zq=iB?`60ROW54D#U?k-S2Vjg25xyl|N))R^Q$8c`6wmE>!1wDB<+sW60x)Xhq_2nb-Lc z#(Z*N40jAY^XU9G9dH#nfOV4%5^^CER0+;8few`S=;|4fva04-c*nakkO_^0H^qH} zE5Y0MrLmxo3EW;5@^03H@_S847gfjrrFhu=grxSpmyj^{8jb(MA`vc3~ zRkbWQ3$ED=h#p{T`^cDNYgf_o_8#ZPTQ;_cYaTeFITR14cDxrPqf!fnXnfTz>oNZI zB~*CBSJ3zMx%Szh*zTlEPi50O5gML2tM3l=#$X!#)dhj?uk*J2-&b&9vc5wF&|n|3;|NRP2fk`^(58Ksvox$obXq9*|!QTvb95><~_WL7dCcnDy69zevhx`~U- z=8*7OFF5vX6i%j7qh&EXk`rFh5!H~RY6}XN23S5+Pgdqve_9&4QB^e5HiWwenjiHI>yeMd>jRz9ft0;lt4L60 zehzv*=dT|c(luYj9`>B{)n}o87a1~(>cJ8fOxqj!N*gMtU#j`U9C)C5GZLH;O3n+y z3wVfiJQ~S`j24(=dxauJBEJ(Edgk0c7Qo3Fm$^@QMJ|89piZpTPM()7Q54@Tb3 zmaz6{KFDc9`JEEL{{E+9a9^`nZDd1R1{O$V(YYYH`)CY7?T8QNzD_!n~hGX+3x2y80&p2f6YAt)DJ? z4~c=EZlC+NspIG6O&XZFo-a%EFj4;2u8SgbYykswRa~(01%qwJSlA==V0#5!-UZXnBGylQW7}>lYVMd9JvQ;@(5TKUjuGe zQS0ONY~;U1?vbbWC-HZtk{m@CaGAJWFiE}4oe_rpg06WyI=nYBr-L1O9DvVfd23yE zBxgPfjm-E}{_M7^?kF3{idIA*zln`bZ|&Ln$ow`MgIOQXp8*MJD?3J70$>xo$*fGN7otL8eKf_%29a^-)@!>+$?wfT<5Q*8U%!-ZCJn?hW?_6ayp#1eES>2_**Ul#-5- z1`z>48V00W8YG4ek(Q33yE{a>yJ2d-^Zd_y&X@VX4D3D3nzhz_U)S|pZ<4GlX0gzD zQM?ycR)SByPysMZLI0QQj4og?-y|qZ^(2~X;7Qii><~XvG@!^Hfqy2 zT73Rb)p3eXWa_PtA*}5=A{JXp+XCl1$V<<@W=UTX=@Bpi3L3o7X$5m_T<)mSFUz%pgzY8f{l6;6hAUN(N zB&Lm8uk=unzWzFIa;R7SQmY^J0!$~qUqE_ZnA=$U= zf1hz5iTsqR^*qnRh)?Od7h`$i!v`=W zVfBJAS1Z21+Jo?vA(AY&hlXdgH%t73P{)1tKcu*4YbXsxYInKDgM%_R(dmV1=tJWq zf%^va>jd!5emcCrF>I84$C*y3{<0coNlB0Nn@W z7lJ&4{F~51O;RWaZd7K#`FQ?Hgw!^zu;3u{)Bn81R*<1Sx=-ZcwS1&iF|Sw9Y>2~0 z?u)08D*B#-E+$v6^;&irdLxePXu0A3Cp)(fAxz{!Zt4<0#wx-%RB#*DWy7zowIgMN zjsF`_K>-0^$`KgtZ}zn64*=wv?! zib9O~-4Y^vUFn2e^#trRO5Pl>9gNi*EHhFj&Mu^BVSC|eB@qF&JJmvJXOCoq-!cY& z6hNzT3kpgoW(|1eESEAlza2t<13z?H z>)O5ikk&}mS@Il!e5BL z_!-b7eYbMDu}6V5bPqxYlXjI$sKnN^1LzjH=%_BDXQ^iH92>v7);eXY=z}e-28QWE z_Cz}3q&B7Qu3 zmTvDSS|H;3Mb~XRqOFP})?z8{Wf6p1aaHQja(B(y^uyRl@&T+GSvl)BI)Ml7GUj)Z zYVUZHu=CE`{hw|0KZ`fr)zgr3{rfvvKfM|lml_b{gEd)R z+u+eJ5M{EXRf6?=`o6}1P-yQpZ=_RHbth4wVFFzs_p`W*_0GVAKDCa{cjDcxVZmU% z^IrO?b-zcgCElbaoTFf$Q1i2~;nyK-Q*4>)3NNdP;a>KFa9kp-7Yt0=-ZL!EHFD4KFqdE4r5;>Gzwoauz2-?`mO7_tE?QD;;g59!h2 zrz}}rJ8-yJHiuS-p$>V9r(4q~J|L%A;f>My9SsUedG&CSRwkU{o&z6`oW&?9O1_ka z6rh}MCmuEW$bKnpfE+!Qk@RxNRh-|~_F5N35P?j$R+pf&%$FRM%R<k=VhkQ8UnE|Cu1+`oJlwL^ z9xI=_?sUeR55M0nY&O%I+O2r;uP#}96Q%?vHO<+26}uJVL%ii2oWpQmOZmRc1ij)c zp;+^{hiuRAL}mZ-j&UXhaAC%~1*)FK6s&5Z63w*G$$LK7Spbn#4{m?XxG6`gf#ioZ z2A-9m`h6LZ%~CC7;b&Fg$MY#*%z+ zJHO(!n>7c}4?o*OtBd|C;HCzZ&$cPjpqL1TKy<)Mo_W|T#&bgEB0(Aj;Fkd*=U~&> zw{H8R3Be;^L;*TMO3H^VT+H~OuzBN zE%_jjN8;B>m6Odgh?(=Ba_1qnwvYH5*R)56?7yRV2=843!#qx$DF{?7ky_PTVjQ*h zc=<)+X|aQ2VRy40eri~+AI^6Cb>p__96mExm?|iJ84zM&f`2(K&fqB#9mPJne_SV!D?Rl0&1Khrp0AzFR5Qm-{ipE{K3{SP`FYGBL>>b_fMn-q`@x%WIw`&8kXD<__S6`sdAj05r=U^-p!G8L z?8ey1aOL~UdC(4%Q@U)QBf_c7QZ$XDPp)3V0n(0;vp0Gw^S=EYN{T557nkkmjXdQ8 zq_}eZE|yFDZtfrU?vy?(!)sWhjK5&Kw%~??eK2<&t(~)m+$cB;6)yiA0aJdtxB zrK{}8l5z)4Op2MgPlM335Tz14z9TeFyV^H}JF3`9^=yeZnO}!EjJP?AHNF_a)kT-N zk=vCJf>bl_r%Qc%&U`0K%|wvZE}2!YU1;2}y9d8Q4j&2bUTuhwsOaO!$Agbt`lM!i zdR}y&TYKI;1Rh2G)j~J7HZ;F*`}{6#h={WURcrYIH)J2>hAbuyoCK4CH$HRwby;uI z`)$kpo(KJyMt>CzuO^+Q)Kq9ahoV3AOb)A-?CTwW$JMcjei(_o6grD$MM_-Vy0vEP z2Lus5gP^CO&Q5D#Z`~IQ4}Yy&Q^({X`m%jO;#}TbV2qtz@;CY2%XcH*m~FN*(Ku-y zWFio8_|>|f3g0AN>imrfH!b7O0Z=?9Ympwp8_Dg%MRHZ9IiJ#}y{;ixN20y;3!ntr z5@8g)TP{95kXxVCUadBV?i=x=02W4dy*2pE9=2#NkM9VQ5LQDs@sfPG2TD_sxls9-J`2b0OVF3(s2xta1Grn zxYGYRf#6S&e?%;Z+N)!}cELW@{a9Iu9uc}G#VA}BPrthjM_AH@1?z-?++~ys_$MIo)CeMnvF|;9K$pyZ#BJF9+&b2 z`=^w4lv!*2Q)7K5oB%xQe@kZhe^~&9DrtMQbGoP1v99-Ie&4oF6*b-19$fIM-|Oru zINK7t19s;mv%grMW$kqw!_$gK(>lTK>kDFMfNHSfN39Y*@{p^=d)5mL<|t!8&(vCX zB~|DV_(yKh0WayjOj^Jo4G22YI~S+mOSUrtE%UZA{d!5d{MqL(xNF=*e=|0@Irc}- zQY#!xg7isOjG@HlyYX0WC-YYyGeKPdN3~A_wu2k+t|eqiXGUG-4NHxekosX!5dY__ z;dC{48^QeE2AX>6ZruB0El1I~xk2=gm^~-4tsZ#UwbW>{>H#Tq3;_-~;|)wnk7K%0 zWX|@T5KFIIU2oI7#qZR(bN)d<M}l(xRY7kQ4>2&LGYtYy3KreJ~v&9EG+7k z;kP}ldCNPvz5UN|B9783Mk?R0`+9!Ub9MMFVc(`dRQ-;+Mde~Rt*o(EHZ73muHAJ? zdHv3bj_OPEK3${!wl=qnZ}f_W>{Y|OnNNu8xFp|EJK_sFom+;tv>w$O)G=gxjueMy zH>#djHSWSGL@kLbZn+KduO$cD==s<(o$I-jWGWl%8LK`k5P5|k!j4IQ3?A{}%v`>` z=99q+-piEYON3&Cs5_((@4xM9aKSh(ci3%v#uZWEiOak2@6>{x&IT?0Ms}9?XLY~p z%~Mz0XVU`glcv`?i8jAza zT`nf)bvOsHyaTSLHO@jhaT(h=PYi_MS^(xfYZ3%)M>zgpXPtbIk6~xNR+-aujH=umukzNB^V(5Vj^jCoNjHvlbE^n+fbSQ%! z+Ic>2$6~E?G!zZrNQ}VSj6bIT4!vceGx|9&A&v4?-+ z{^9KtlNfwbFa-5&dle&}$Yj)0Bj;FlJ>`I&o) z$3q7Y^NV@S*9`@@XDnQhgPri_bO){*+`a_pYDee4OXl0je-dL_&m9E78;8axo&iUy z)WY|~=YEKbD9gF6f~QSy<<1I=A*R(>#zK&HltS*@OJqw7Lwy_ZZ14ZIcZ@}QT{co$ zg$DL-EVwV%IZ%l7OtSm0m-_GeZa*7NeQc>0Mrt_ye2S}4Vy5PLoXI&~A*K^|xyLvv z->L0Y0m$=A7Ji{cF%Z*#DAq*vdXK@Eqty3Zc!)>)*HEfqSOW#cyXpH%0!3>7f|C{V z7(tk%WhwEx^|}wAKU$n*GC0{W?zy*XYId?*KmPMwN+t_cZZDF&SLHb(zaxkHEWh|V z9+1*#ynYpq`wwvn!E$`uUn3psk#SUyIFZT7*HP}S1rT6NsVwe>lt9fHJJIa$cAN;C zY^l!^dE~k6O}z8poZD-VS)GJrILX?DIP{S%KVhe2?}(tJDl80ZvPb=q+Af^gW-cL%w=)fHat*Tmghut$X#@pj6V zq5fTZ)1l=^zhzg8QyT#qp6iEGxfLgYzr&RkvWf%N;_`QirZmND(B@_@|g2a#MS>D4Ak!-+=*FyGW+QfMs)JJ+k%ac ztXRUMv+JZN<4cj-wN8)O5777bMbT283Xaq>R#4@P#f z#XyAAFJg5%_}?cez5_a4fmuw7Y6d+SU^QX?Vl=xE#j^xS|011q1N$%5J6&_rP)BZ+ z<3+}OT_Z-$52COaD{y{-HRxb8lGGgQ^(09=s79iI(U*EcX1WOxWM@Dggx|;xN1k!| zA8)Kkw?5ve>L#3Z-_#TPWxl&akN_QM!$dp2%3Qn{dZqL1r-rI4JbThr%&#v4=7se6 z(hqB$hGEqDE~>PLjnu-}XG=*7*WnxdB|HsFoS8)$f4$3-R7SUfzJ4K3-Ogk{QlJmxua%B>%XK&G$t&L-Bkzmf^O^;Mv#ew?OM~6!_LM zgZmrrsKgv|XK^8q12XR3^K)8Qgz)fuQS-OGNy{t)pE>mINcM|=o|cW|=pwbB)aa{X zP*{xUA|*b>Dgg+&9v5Fu?tZX%_9j-QJQpDvZJFAE*}s1!m5P|KTBJn5wrW&=tXRkW zjByu*_STAg*xXoUJGfyLc-wjpwW|tJ5o=^up;sB|WEOr+IpO?MqDF>lT%)0(Z@cEQ z`cB0Ch?2QqkRj%(sZqASGRM@pGXyTf^4yX#m=cj3vj?l5|F_Nuxi5=-g~IP&_ji2* z5}SAwC0@4PI2C**B4s#!Ggtk94_6Vb1oay|YgpBuENPMSxh0oOnZ7R`zk7tgF&L{> z=eYdV7eEw;l2#vX^uhj8`up8iUU&CkmaVc%)2wfE>FbeP6(q$I_UW3|kbGg*zq`DD^UsHGlHQMYnM%sHv0d(l zFg8YO2a~1Kuv=|!L#ii2IDmY`xM$-M_aF9xO%ubYzw)1LEAB8C;{YZ{^Au$U?0YG1 zu&pjA`b#upTr}UyXxU7Oc~i|FG-Ib$ls6h$fy&F5!^Qn#<;0BWM)D&#bs*xBAh z%7d?wF+qmt?uRhYQtrGzs`+@q+HqG0^%~U(u)7Q4h z3_9zWv+SMqq9KU#Iq1{O6B_kW(*XQJpKBS(=GH+~9(Q&$vx(D4WZ!oX50S!@A}Y?If69E*}-bN-7bIU8&8}X-UI7Jb`mv z3AQmz{RFX7|EU|8r{G&aq^^yOlj*a& zO(dSKtB)H`PH=^M$70kct$Y4N<7wUd+G%lV}nL_cMa)IbOUS?$yGY%l%~xw z-{tOsJw?#eWJ5QRxSHb8r*j>DodZ}VBqHh?4-vEC`5<&2*e`}Y*E{@_N^%+Yz4ko) zem;xCSUxd$Rcld*J`XT^38of6yIF zuWwIU82~S$H;Vn32dZ62N|=MO!lV|RJ4iNlP^Gqf9mLb+4J(m|4>opO4HpmIlUWog z@lbp37V4T-Sk`b)aCj3jW!Or%dK*3nUye69Sxf#Og5VyKEE_KY0L3&qtx{PNyg z#_GpgB1T1pw$^N zP4dJ=ZL&p`TRcf@3B9Vs>&da;+<0at1ApyM8z*V4F=xCq{*3nz)Lz+I zU?+wRd#-ar?6dB--KZV!vBVujNPelUr!b&4Y3MJjjd!BPT^>IfD!=JoBqWa*V4>(} zAc@AOyev^c2A)V;$GG2XY8;cwhkgjyA6(xS_6<$V~c_ zQ=t7odZad1*XL9IFRjr0u_)ip)8$kGtha=k78H-VI+LeY-IvQ_NFOF2)c1`KVpVq3r`xSikWIj|b@t4Rk|2*pnV@?FLm3`pN@{!9MP&9MKVE3?V9YdaNO z0i%{p=Dqgc%MHVUC~*uyp{yS5wi~?d2cMQ(7Wd$lVUQG^UwJsS%w8yuuDqUPebZ8n zvGH^vQ4`nWZ;jN*=HJxZMC%@yE}!%IJq5>yB%tADSwqC8)A!RCM+4mcXm~!&VBpNW z`x3|R?=%|IXS2c^8v=h(x1cJm3ve2GJTrtT%1v5rSzUt##T0&!@w<%IlJbl@aT%6} zPE_Q*;?I)uvcO3GJxY|_swfZ8*TKpj=S7y(jXc+XOg%~k*q}JBOQ)_~sKewLHg&he zf9?HMz}<2V-4PIau!%%n+r4@lSxF|K%+}|7V|1#o$+F(fpFO;1RQbVVu$D*a5zHt{v}+ zi@nl&EoRbv^5k6~cm8U8mS}vB8)}6?JB^I<&A823{2)l?9(#s`qO`Mk5%|FL-;3(S zos6*A=5iZ+0)h!qZgzGwUv&EmnDssQ`Tl8jKW!jXpm_R(1TVW;CdjPUk}kS`Hyo1| zKB_B5b;Yq$;WO5?!*?}KMaqTUih_H}{>IX??p&9L%l`)W1R?~(zTh?*pfTxOhtA>` zu`sLnYIfgh1EbfASmGgA4i+Da`}R$B*O=Ai+4%p~V8CMKwb|sR<)7KDF!qQjQ-deQ zPh0Y%BiP9Vx)g_pK>d&2I(z?Ykl&A^mV9+BZ6;$t*AI`pDK?(}V%~BLSKQDQ^LAxt z%|X~+5lMW6_o#*u2R* z+=9(CjYvQLm%L;dR{l|K;xV0@w5Rh#kU8Smqa)Vvtj~`ABVEg>luy`y#=;}ybyi3X zJ1{8rPbt{3YS`V<_n@uxdX*R`cRq?0XEY=^zKY?FM|T81=)S~&Fxq3qQyMSlo3>l@ zPtDdTV2xtP-Shr)mKxo1HQalT!#S3t+FPl3Ag6xrmH167+PL8pDPf-rZ-3LR=nbA} zi@SXilA;eF`^zPN-+=3sq%VuCeOW?YC9c_y14K$cje@T}vSfsLV*CjA7T(pU`>{_q z0%pKgDss9`R$`eBHsU@LfE{^=_YzIja`?~tFzf7qt8(L{7qU69jcfjmI@jv{OZz?D zTfLoQm@xUo?|(;OF^qnU1!!@_(q4@1B~RqwiZVF!Ss5y2oyVQam!Id z>BQ-u1$R4{5uz-BONbYh?s10Wjm_xoHTtiJPzL5ogInW&)VDo zIo)(HO0CUU$%Pukpnvn(uBXku5|esEijOo7!5E+9DHq=tsi#NgchJy_bBD^o&GOjA zI;X}j#W7pN%d3y?p>04Zc3p(fFEsIPyG?ac=g_%O%V!b1EG5kVK^_p zJr>FG_d|;6V!9OLy}Y^W-K%|hS}j!qyc1*Y<|&iAleJmyOIn5E4_eYIn&CsN^AHv| zFXRIBK>U;EpECTTm(0fTkb?<~hC#-vkwp!6{7V#NkfjUZ6=@s3in|uIkK`F6ZbBLZBd9dSPgZm78Mvtxph@VnC6+`-H10^-YjZ}J zxTn+8bfi+yLbe5?Wi@L#Z5Kbe_LKk>eP4NZ)V1&4Yx|Rn@6on|=Zev@7oEOM7U_B% zzY8RsR52pb%em8O(P&CxZI9QV|Ez~LQ|gaY<>2RzXMLq|{A2D((ciygF1;ER^$tE2 z$xlZ6hNuuJPVnDFYx&`Ka zaV5RJu7{_z_~t`fgXUefP;&>nZ+oGt2VP`9nsp1tm8Y8zE0{csK5Rgt;zZ^#ChEpH zs0e!e1WVlyEO49$nNvx)0(avzhdIPSh6 zcTV9_TH);bo8WlNFc+c?KIS7YDZP60ZnX%Q>$1pT;+%#>r$%&NA2pd(MfDnnMH{|` z6MxeKX!H&OD!D73MPFZzb{ujYJ8S_kBO4U z8bm*OQLlrf60&*yGL^rHtAPZ-KiigkJLg_0)%Ni4diC9%CZXJiDVScPT-)I%Qz3Zr z64>$%c39E!u&>_dZau7_5zP~Gqlo&3NZr$+Og<*MR^h-!oWk{?=+s+K1u!}blcPHp zAEX;4`xy|5Y0@9-lD}+|AyZ;OoNi*OexrH-Ja?_p{UxTfgxQXtTB31tO?E6{q_PAj zsr8YxQ;PN@#T6!O)i_U8vp0lHa(*#bDOQt_LnU(Gz5a0*KI5GPf4u-vf@|0L(tqR3 zg?+owef!UOk0FddG=#%v`a9`FinlyH=4J^INjVg_PMJ-oKtBih}YSVsThM@PbA8gKl{b>L^^pKA>CoJp^1LE>YWq7hjDPuF=G z+I4?GoYa~{+k)IP9{pbYgsh$RwB>$2t3K$X^3?5D*JL;QGZ^#UDg^Mm%jq5p1ixG+ z^*AEi#{6vh!<;@a%!opqr6UN%D()!etlR+P%7z8+Bj2*Qt%rfo>mkEl`Royy^EJQmCJ6rkKC;F#H0D36RTwMF?tm8S8c0Xxe0Pzy+Punr=#Y0RW6HZVj^EL+V1aKe*ZI)5jHlt5 zu~Ti``N9l5tUTdoGukQvgN@YMvQR!Af6b`#V?_4lH`8*elTQ2cI{hVRsZMd2(Am>- z0ZS>u6=&>2md4Gz2PY?I-=5i|d=7k_>JmzlT^Ww9Mq1IMU5?Z7AO4-*l(?&4KW3me zHu@cBO0i6rw-;@R-Qg7UWc8(-FMReH$9LCU2F>??KCGRa+PMQeca@cjYGZ+jhXbyf z0raW^c~F2F>IE&TjIS0P+J-gm^ZiDr3d2GDEc2~=Cm3i`Uy`rDA;wTLj`;hf?Ks8x zwCy%Dh*085zu|mumbQEF*9X4X3D5#ywi5C}A)2xj|1?zr)zCGz;ACMtk=_W4>>o(Q z$kZhlY(h~M012va8Nfg;@A*d0HKt&gZ$TbfcYV684oMQso7~49o8jq8T{+pNKq6vO zdAU`>hm+Aj{O%*s!RX_B{=x9`?B5Sn)vi*R?lt;WxG7BC;(y(4L_uid;)To6k0tMZ z{0a34)P%yFmO(nHYOsgqvVYVVo^In6iP9+pdYCgL{<@Bb9agq5M5DGWONYDkw7T9kzeX4#bHlvO<22T_7_oogz&csrWJU>~W6(qFUR5}oKoz!O zi}G=$6*BdRXj!QScAzw)GNbt$94LAX(xN@^()m9;cMK5D4mXM)z2i}9ZWw_byZQRk zg*&d(S9w4?i8xRqy(*GZj{oRg8$PkXKZ?-kv`(KCIiQRWf~m6+!D5p~-z$GvaZVP% zcjXe6u!OIxcaOn9rs!7qnbi`W}}ZzT1IKNaec~h^pLXr>MCmi_K2V=P5EL^4LO;eQ3=Bj zw3`G{jGX-u)P-6R{;WOPopSHUnKLl->=fZR!><>smT{6@CwKV{B7S31@r(Y3v2Qo| z7XY_NwK}Y$r!OiSd!DwVbEqRbZ!G#EJ3`dM0N@rh{l#waq=V)uXZRr7> zYCN!QJw-D;hEEZ0ksyEnZDND+N!1SqW~DkH-idKMB`{iJoq^n%eZoeK`TL93jMyB1q6m4o%Tu;@-RS#uJGABXS~zfM?7jFhR2s@bg0Phe3PQ3TC^A#-*=NzG#e_>J!jTH=K{@#Az*TTcsaznNT-7H zHJ}+oTQZQ+wN3nghXhPg7MvHWF)~l4or`jFRER_>B+*0@d}wR2RwHB2dsbce<~fA4 zriM#~tSDG|Gwt=`^B;^ny=N@&1bR7(1h>+3x5z0|+HJ6L2+?;Pj%wZa;=v{wjJ3n! z|C_5}1IOSeAAJOT2DrH-uJqSq@s=Qjx%+peUJWB6i&ZyaHct?;a(Z; zSU_=lM@3)45uH7`;> zqE?dccU=gK47fr?ZJ}8DZYnj$-;=5#wEpem| zCK8YVF2jAteW)8>ggRxDLm0-iGvzY3#2p#cuS~^QH<$tTxWrM4}dmkL{T?_@*px#kShaDm#TgM zDWpla@k~@fIenJn#gN;J(4zCW|IwILw=rbUQ``LgGpW$#ypoZjBJ{lVbTnEN-KUzn zgOkh&m&;T=pWJTO_GG1){vaRPQ)|?m81CfgANi8hkK%ojbpp(-xc-r%ULN`EfsiFR z`vJaX0$QG7A2*`#=CN|)PErJu0vlq}Tl>?v`ZqNtq)blBUwypsD+{9ykQuU(EAFe%m7@YQ^q+`qIlcNh>#OY?^z@|6w4gYioaiDp}2? z@Tp9uVy6|wP)a`=JfQ=R5i-xR!a`Q&FTb&BFn3ls;5S!KncfgHgJvh1Psh-;WAgmz ze;hgSCbq6?@_V)V{x@?}a$zpY`&5`fO7F*c*m=eQXckg*UqNCh$D8pQou@fZ^h z*{Xt$?7exW*3S+#7R&n?pgAA`^-Wnx`Oe@OiqCy#FqylRvQsHrZgY;Q_!L(sK}GkK zBQEFf}qV<0)A4 zkdB60#QWrj2R0>%nNKD6!K`?Zj<_?hUl~rA={9q*R_1W7oDoQKZ|T!yCC2MUP7{4@ zT%svcS}f518pR(O3~upDW0P^|REE-zomnUnSBCeq#d@jpZ=Z+p%k4laE^S{W#}Tc+@>F-BLK6xr?xfqaU zM$s1%o;egpf^WuDrbW8kk_#z5BMYCZ=E9Kel_4#$v zpie~|lltvhqt-W-P}`-uz^-4qxAj_!vw7&$@zLUCR55pSnY?v1dVl<5xR$3TbU=ZS zn`uom!i-!2|8cegoyTC5>9CwSnIHYS?fzi8a?s0{w%ZJ-Pn?fQbo;X(?Q^B#y<1X< zzq*fmH}}I_-VEyp1PAYt|BiV&c2Z~Q$cUN&x>5DqW`4 zcXK>Xi&i)?Eo|G8GYBcm+MgdD9yZ)btMD8<*kp*@i_P^JSImA-7e7-H8Hg%c`d^yr zN6zW~-Ld#8EIR{j5t9v;zHlRkctyd@z1y3(k2}-~Zdi#&L~^7dc7lZ~m=k4wavc}O z%E}F*tLn65nF-ov`-uPCSf3}Qf2Mhd{o_dE70_s7pjtzm+}E#!c~SWp?0VQ9se>UK z|My%@NL5zBeJb3*;kh8Sz%y|f**uVe@ePb}d2pi(`riRLA=Xc+D(gCsW!D&s4hi`P zG)@`Adf{YD8vPSvrq=P9e_%120Jk)U9V?)uNJKTCtxrIZcdsNZvw*am{%m3+;3c*~ z`lsOslsK>a!1Rp%`Gqn9Q37BYP*L?7!6b#{2slt`Gq@;8D*vv3kZaiLP&d32@^GEF z|258}K!ShabKc$6J9>$I2e(}WiiQCiL6R@=gGlE+V; zvsP*dF#MFxZLX!XjC96IjVi2fed+i7adfe^D|f7QLFT|4Rd)Zo*sG^~-;$%TYOVhe zJBw{FPzv&k;%TnYx%80kliCJbSjofl6shIXLBXky7V?zATDX5ZJ`YY14H55uoVa3V zrH@6{`n|+S2^$+4so@AtO_BS`-%YwqSJ#Jy9RO@R)#3)vu+Ii9{iJ;~_W3(czSFIM zK=CGvQNN>CucF`8Ah!vt?IU7+6>twvY$Hu*lA6+sy$6rA68YK_QCb8n2&Tdxu`QhG z3e_vg^^m9B2jb@cEtHD&JeW-j)@aa;{=dIy!T0JTN0rOp6b7FOQ3w{E6BRtS#z4-m zZ)XZ#qs?WNH|gqPJ|iRQaUiq>EXp_iM|=t_WRIhtAc|fCc^}kh(*t#VhJJJFun3)I zw6+OIEz*&W<&D99?Tlp0R_heat=p=6s;>BrqnYgTG2n-&TSqN%D2|R_#-vYh&HY$R zn(6gu<4EUWkhchlcAtbMyDOZ~>B^z^jpmE}e&wR|>kHCH1hESNGIwhp@aTEz$HB)j zf4^TM4KzE)XpXU-TjYb|0YUf}bb6lmc}a!c+%RZ(zc6~cP?K50BHH%{TMFsBToNTh z+lxOWVyLfZUBW2tct|`XAg?w`2o4rV+v7}*UAxTaWWL}^QFJ;j8k<&Hw zC~nq=2BkyOB%y90e@}4@{c6)_f+l}a$EagDCuwSrU_x0zN;-(uYd~Z;$q&cK*cR!2 z$^R+(^l8U7Q+e>X9rm0{%mMNlYLApv-uBl02QVWZEMks33DwJEoB8?Q$t$Jf=%RCt zKc%nfC@oXtfN5k@+xzYN$84rimgrgUi=;vGf304I5VnjynI7!q$R2Omki{M%`4P?^ z=KeHGJO;yMViYMHl}q+_*)L=Ik(?-`_`Y5h1!_nx;t0_ob%%Lhtv;PC}o zawt9gRrG@+i$n^FCjzkk$e_fZ30Pk}Rat!Nw_%?7Q4svnugKzf@sjxM0d&)% zO`NgcVof0Nj_rvj=aj8Tp2LrJ5?46VOC*J@TXzgZY~&Snr7XD%e*OGQTD#Gs+^=wJ zB-7+wWVoEHOG;NU%{T%$YI|BGtgjoo;(Mh`5+KWCN0V9hjN6b7}iGc1MM8` z_c|QX+;`+G4YFsI(O%q_KPKwS}6Z$wuRGphir#Ba#%UaV^illJ$DY!g7n!7a-JB)=f( z-GWYpeX}Qk!)^V7j}Kwlkfr2w8lo}4{->Ur0}sUE>EY_}=oi%Cy=^_=ii~>;kE7HW z8sB9(i7ztZcb+_%{LiLU40ucPila0$iaGx{Rb$Fn^Y?LF;+DG(__oo z;UBtEpTqu7s=QHR1xXP@``rAtQ)Nt@&iZtsWm%E^&*8c4`P=6#t>U)A-x}!cBbIYs zYG_;fo4!^KLarpbz}e#)ueKyMHD9bGhH>+>UmxaSg4dV0YWK^+WLX>9pMNr|d3^s6 zkR5KE?KY`U#S84r?!>On>uaspZQ59S+%HG$)Y{hfbr*p?I!a?Z$+6>IUzT<>Ou5Oa{ z_6CIF^NdvL_Y_HudL^zCMo}XQQDm;D$P}?v%$$qOUubSYu1Eq)X0?&akwzz1ID*?0 zf-I{3_V&y=4cT2~5Wn4I@4T&8#XVlBETXi!oSx*!52L2mT{k=WvAlMnrMq;;w&h%L zFmLpv&3tyM)t=UzW41TCbQkI-@(c4YHXHF;Qh(G8vLBxZh>ipM!OK!VY${;~htNf3 zYxnHZO30M=%slA0yQm$lyc^@@@8}_a`}X~A0vw@#+g#_@wj_1BcMU^3QLsw7ADn`X zqOKr_eA6^IB2^Oj6y+9Ia`KYRF+JTL1%uE?-Tnpa+fn5=s)ft=GAr`0w)NhvU{CYW zuB1AR;brJAS|n7WaOEG@9p)zP+j&q6{hD!xZ%eIT;h0h6T$PE#C?hMjH*mr)8k(*q zpP@L@7p#8#dv8U$Hqn>s4ysGlB9RuxV`4D5-C&ddW|hVD&M0(lm4WQs zG>7>RTbpcI7a<(IwLUIFV&>D^qttkF5>C z@#XsX9`o)0f}&AX%)a@6zc$En%{lVSjlJAlC8g%imc4KSq%X;P0zKD?f^Jr*b#XY| z-let(^J{->2`y)awHSv@6<(9X#md~q4;%VD{cB-_K5?S1r(f6Z8Z{}cN$Rc{A3Da7 zK{r+EdLl=MY@FOmZLk0I&i*$qxGleNoVk~>T0J-~bu6UwjxKVQrphV*sUQgy>_W=5 zF$W_X&0dZl_2`U)j!V-n8_4s;PY*Gn`~Flj)q>Ko#oFSX6RqOLKebEz*~8)$SDD&i z;17Or@*a8OuGpyC`;2mvD9rf748;>-auxd$F5$!pz>G zpN6n-?gDB8)(&~M`NORBJvAa1h~RMwJncu<(ap?jDbmyx&i}On-9Qg($K8qHz=;Pe zW9+c$visd0>~vw+pBQnw8x^zwI}KZc_>>m=RZM`g=nf8HCm8?b@u`muXuk;sy7H}t zdh7Ewp{tJ@%v@0u{~u>>9uM{U{*QO+lvYxwMqwsN>Qp8wjAiCXNXne3LxqvihHOJL zW=v9z8jPh-mZ>Nykrs?)RF)aZz6{yd!Po}F?DM^K&inKEewO$5@q7IKvE0jbU)MFS zxnI}vyzcu=hUFBmey$QEil3yv%3qPcoCys9H{UPf{lD9J2@jlAjXuTs;&rmaDJU{v zAn!x${n%p);b*+mDr%x>!G%NRyNRjccsFnV1W8}j3H(A}rG5hTQK214OWtyRG%$Cu zzDTV_mbtJYv`md8ifwUu3NPRU_=XVW9K0&lc(wEmld&WL=_*XELiP#LuB4Dh>Q(wA zuf>Ju-py^iF+O3_o8Ep#0Z$`&yzTkGwZHk-T{z!i7X7w7#FF+c?*t*MERN)HV1ECY z=uFz#ejDi{i9L5F%{d4`meI2ZADh;ohLet4DS= z^42J{(YtKd7cvkxtl#+-p^Ik2FHXHjFBbJL;}YHK;?YoBh*)rF`12N4C*3I3avl z)eNyi;EeKFaAW=P$v5iqm^*K%nu+y)_c$TcFAOEBC$sZjDJHsmKb$=kl?l)A9|`b^ z#oX|s<1g(Knkr`hFCCdb`u>Xq2WD%c4>%e>#ao4T^ zEK+1yB{~cn!>(X*o4&@7`QrWN0diSG!VlLg=>2J-0+~@ zv)uQj!jxoudL;jv;$;?_%u6BiT!*X@TQQHOdeVnh#{Aa|I+&3X}!kr5)90k9XMbgh{mLq>HhuI~S|H#a65khAtji>fW;@I?fA*D+5MHQo%&n?K zeO7A~EcF9s!_59=HoM#jTIa=}PK!FO)7Vxc{M2D*u5@B^U8&n9!JC&4zIZ zqbx@ZinK^+dM~KO8#9v7IFB0P^9}p;L`Q;PM*bb4Zb;tOy)f_y&1+)?X4k)@q~5mJ z9g~oMio9Vj%t)^-abS|{>*{Wq2@Bu-=??W@)K_iYCVf);zpwnWFX%$9xCh}2mXCxe zo@BO4^(;~LyjXaZr|oqF=3|(JW!cD-xm8o=b7=)(;I2I~K#-35FP-r@We+>g$D6i6 z28|`6&|j#+!;5%aXhE1oky1C%)ereZbj*#s_Vn!JjJEqnn$%3m;O<<)gZtsw&69M5 z1HMBsM0tVrtVnFZwTA{jFE& z3Z6UT2GWs-$&kp5d$?;;Hyg9yC*1m@1(&3+uUtJjOm(v7zLom8tZzMU>nzvhcB`jP z-8X^e8q%Jis;F6HwFhsRBq_6(U2N=9G#rAPl&ai*)Urn%O{D#Z?eol| zEux6y!RfG}!TPjCX|8r;7j(~ChxrdV$Nj&k^J+w``W)U}I zr{Y}(;KF{32Vf)I#|z|j8xxwCeRsL4Ll%}!_sfRJkiD{$=(iP+L1rHojnsxW5u0p& z67mBZHz3s8-)Ut2c4iIg@MjIp{4?lkCAggg?DY5HRn#xcG=i;5hBzZpU=tb~JMew1 z{RBZ5NpVp8#cKWRPVDi7GGd$p27G58pq7@e6zB@tOPVKwy z2PJbWG`b;yxnFKJ#y-HAGBr_z(TZc+gpTP9K?Eit_czQeTUL7BnCf4NM={CQ zPnwh*r^Z4Rx?E8D7yHeMP8qd>1;`3O2IN4DF4P!oFN=9z47(ElYw+*+>?W=?20Rz@ zDW}>EPw>mb&y;Xq2qLt>9`ZlNSDtSQJyzJs-! zAAh^vsOfsPljy^o$>*50zUuF6n5%109o-z=rvBOv-tYiV!&Ii)E2!G)Qps7B7nj3% z_x_9Km{BmtSG|l`7)^Q|pvY#<21n&AR)#3DzZjFb0@JEMiTw*)Au@sK8BhxE!4)d@ zAe@-FN%NR`*)Ilv;Wy+^qZYS2Pg54va>9cY*}RZQt|0RS$CtRD${xjIN&xW^Tw!!C z)`?ciP|!bbMCM}GlvuLG1EyqdUut=v1UhIW%W6V6!DFwJI{r0c7J6zi!6UDCNkURrtJOkKZGDAiA0er8N9hdRvrA5wp- zi0xR1z4=qPr3L>V3S3%*_Iv6IXEkCy#2#2xr0hR>LuX9Ki-J-a=Eqm*C*8VrDad(_ z(a-yO6{)|qWS1^oka7wVu2VAeb>z-w+dl;+$WvdHJdS;v#NUEsR_sh1-nZLn(;um< zOWeB(^!>eYlUvHOHq*UIXLjHSqD#|j9Y#YUw%WMty=4?j$7k-kvoz>o7sd6TK}_DK zUX73Donc0?gX5E!Yr!!!v-~UPqsT_FHErvZ{FRZV=^vcKjbbS++)rP_of7Kl>}U4c z{vG&!Go2bM$;1Z!Bls_YJF5$>?p&IC;5@V4aQv9g1-J?8l5m!oJte&T52mPQ5XHebi|-STufZX0pdmF4{l(pSxyi;>q+3#uW{+1*|8 zhsMj?NH2sgqh&4Sda}}X(xO=Ip|gAf+y#y3(Y-;M?`1rtL`9`cQ)O2A$zhZAvQAt% ztGQe@fx=B(iG zZ-ylZuw7?L7AK5Kv>2D9Lu2uPvcZ;V(_%%R`#$GTH%0FEX@<<|mp4C#1&)E%Duff} z5iAWkh5+}K_JrI}53L^)y_U?(Ti1`_*W?y2tLiJg!tMv(afe}YCK_+ccDM`HC->rt z7{8-Th)uX638sSN1(tollx}2yjdIkuIV?Z->@W7 z2%z%ss4;#wF@@vUj+g_jj39?w(!P+oKqH(gUSxMoe{0;bM8R)+)R$jO3OBI>+p!0XH;mU0Kbm@E1AJ7bWOf=9APfL|sfI zZG6$BNfSnob&a41-%%gwVsz?oOoDFcYt#sZFl4!Q8vh+4JTaG)Th7(gNTyPs0kf;k zewHcPnbvi@-mXLM$SYvp>`uli(2iVVX1oQ9+K#ZbGB7g#Ioqq8FT97B^Oavx+j3}*0p zKP6itnkd*wN?{TjbA^90RAAh_LM$Uf+u;2f^w7RqEJLjahn|Gb?sI{xPCb;OB_p5YZy}bScAvMx*kCk;1)jkh z=qWsbC1HNi&lR&Ptv!(Ci2klDrU*u5KYCp?SS1|1pK@d1{E^Fdf4>Yq^#-6%-7KmFwoHuZh5;`{&NLRciOA6D}Od=<81erkYFEbb*R+6mo{-@N&uShdz`imWM)5+ing z4C0V3?Ke6C30tM~>sahVg=Kb0hE-MZz&Uj(Quz&Ms8Hhi>d3-?E6* zcpCQ~wZy0=Gx=jPJH)>2{m^fJPad-P8m?AudzCO)Jj;Vc^^y% zZ#MRuMfYp!YxkRt9Q0O=uctS}+7Fr*1O&%m1v|<0z+i`GQUM>SIk@sR=F8y2L*#e@ zl)B-*lJ&$!SRP`Hk{;Qw^B?i1=`YH`Vh{vDQK^ZDAf7~Wj#_Q%Wh|$`7EfJBRQA$A zA$R4)Z3iK)N-I?p)+iNMNS;MNjQl+a-<6d7*JFle5GKbLhv$E)@iB$%GDErJiWBlQ zaD;Y$UgIhyJ<*_=xAg{~H13kpu!H@_{;Pa6+%vFs7QFe9!);r4+Qqz8>Brgtv6=G+ zY7BOvxo@S$YDc?T>pRhC>-4DLwVSqHs2CaO*KuCIsctYenc(WQFy@lW{VKkdOlbMz zx6H{cy%kcuE!-3i#Wc{!7yJNkJXeYdd(}(sql6r@?vRPM3Bxq*Ci(knBic} z;#?QE*axTJBR^>L(*rg<`~od1f*F0r<>33%3QgB@7KL|tz}2d1A=GJ=R%l1ExAJDi zNnrise)OrXCaf$=Sn*8yl)CIHs4(E+ce;ea?73m8dv(~U7w~}V-ZRMl(zGPh?5Vd3 z-tJ&iUYg-gXiq85g)%pC{5?#E%pTMPauT3(=vsqE=-^!K5h`Jc#I`vLSrW;Ep-XVy zkXSEX_~9mF8GM2k#yU^ca*|oFpX1?R`>Q;o-TNkhr! zm57?fAcVmTyaZ*F#_{Fh`*~|POiKaqoreDUytpZ3_le4j=BSfT*LxlOLkDMAc`)LV zj&(`Ez$Npb5FHnpE+o7!HnQ9^0y(}TDEq(wcoH%sIaY+dusqf#n4$Z3hX=6NRw)46 zeL8(Qsp#9;oTkzAyM8ec`bi{Jy+%QC8y8oX;`2(bhWx>zd>_VP_QD+9qw77Fkpp0& znA|f;YQ#<68NNBB#p2%XO|Peax3`t9u-Wk4+0q#Y8#-E~L}Xb_G4Oq(@xGdOai&$Z zbNrG)oBi)A0O`3$104Rm8dZ(f$)cn@L30x3>Wp)RBL-z1PW;yn*K5ZG}k@d!rn74pP~B<`$Tqn*;=G`O98RS|}qmEC*xspA8a1c1D%(%Ljj z+yz%m2gCP^{0N6Qcza~K10s(uvu00DP7e4uH>HS4@@BQ7ug^w8a)LZ?mFdx4s-<5- z+P{QG=&|j*<(cqDtJYg9?und0-l6I*4sKTT`W}{QNTZgj4COUdQ%ccx>+B4HySnvL zdDQ-4d)U_|r6MN$JcV-uInOcSAZm?!c+tm5v(?%L=$qf{6RaP(^;_aMAZAl1P!9D+ z^A|MC%T7;mWHfOXN8cx;voqT9eL59cL|^!n+ub#+6LifMC7rjNW8}f3%Z_9Y!WNg| z!fRWl?PX7l!Lia$3d*FDwO3H$j*%uCcCmy|jiHmh)SFEO$hUQD zENY=Q$i;A~aPWz0SKq3!-bG^X;*o-q;RDTcX>C^Ar(siE15EbH--2U?9B+A?S$#8x zPMB?+4mFYGU1@vngQ>XN%H3EBJy*HIK(8(Tfm)pz)O{~Z{0C$i0Ulg3hi>gROBjFr z4aOTXOPI~NQmR;68tprEp=)4kEfywrG>B@-T5<06mEXpeMVPSckf;%yS20A#M}DF5 zsp60c^LN%EwWXPrGX6;88;o>(q?slQn5%?`>Xx26wY@ajWb$M46PO?jc6DB&d-+d= z_h;%=>NM8hjCn!GBUN71RMH<2J0ll_=xcRyk;9Y0Xw#gVm4me2&Z&Jp!lIFtJ|fN# zNn##aKwo@2)kNt=NADGf5hGfRzRM>{d=4wbEK@bKma1;ot)hzt`cv?_#{)MSpJ{fP z%?mx=IwdN?(**;RDGppGD4VOzMDnvnUK*5+b>*wCE#3L3nLqjJVuzYcUBjL} zx+N=^ojH45TZW^Fo%BzTuVL*p{pv zY^&IboefL5#*)63@T)!ioS;i@MB@%+KT4(ob3J9;GTA=oTW!Xq?Z7uW7iW6;WXLlP z-?Vh+;Q5KXzwGHm&Pr1bK{5zK%X{Wu8XO5K?(BX-lXlQf1!YcU%9v`0L{a`IH912j zUa+VTuN)ff^ueiR41`RX-pG) zep&43?X~VXHPes_GxSp`FxPj8tuTH6aOWYnjd_S|Av=94?3j*(NX#O^gGd<&)ZNp= zcw2loUwr#dr+N$Vwh)s{V2{tvEmVBeCG;PL1!V+!5V=*bbN#VfyZJnz6Yf~a^L5Md zS2!Ddg3Z%0GJbS@nWU!dFg3VBGkX1Z$F}bJGqRKSFU6;2ZtYUE=Uy;ibWQ9&3fqhy zx2HXsS{uht!bV@40w2_21A6tTCb$$Mn@wz1zYi0#te$|&?hbY<)~(d|NhfsthYsQs zBXBTr>;lYO){2p~9^>(INu`a+iokp!gOjf;!R0{e?l4gU`H8manDqtZ36`hA%ICS` zLc%!yKr_iFJ52|#qk?|x(=F(MCin^%5r>WHGSl$m%DJb(nbk_CxGH*WL?E_3`^ND} z$`Z{vW{LlJ;hT|oTcf=fzP5zE9Qa_(zIuizV(X{vl1_6#<%-E*)`acjX>AfK^F~g1EE(V! zvzDEucR)D(Xu^yZ>GfV|yC`+rgPN%JUX@UfQ`gSB7#_A#upd}K-rsK%+Uh%z$9#zq zy;w&|?(BL=pSEZZZ;h5^VuZfbDyT65E%`qE)v_4R$ z)5PrSa#2LmJ;uJ#Mrvq)D0pKj*KH1Ljf*4T0swCp;Wr2Uq8P)=M8sbMd%;?TF+e1= z4=WgyOt)MxMF|- zq?&@-$8Pg-r9X1Dk|Zt1)INDcQ%#hCD}Q%M4v%A&z?FCK5j!<5WGVCVU>9Enu4hyz zK;8pQoQhhi+14>Yi70V#LB|&k6Ikj6*ak~4YH^>p7oewJ>=Gf1~T$+sqOK59aOIKG`{w& z50N`!riA}9z1SuY@{F^+Q^_y`x94mQ(hDKaE}=&daLkioo?fAM$N26S95Y_etteVP zq0sAzU5xjggFm`_$a3dM%t+g|$PBJ;Rl6Y>*BZ3yJ{uUqrGI(&?p0HnsPKr6{&nAd zm?|z&@EN2be}Ok)UoeCF-j}67S+ln_3{i#)DJ9uded{;3^!*gKR4aQe^0?Zj+NDI zaFx|{B!n#Mz^*7ci0rfY#xd%uFxEDSwz8@Ub_3x|<=o;8fwAOg#^$z&;fI>gvRQ7T zS*e&4j=zL6Z9^lOLjhj8J-E1a>yd7}Vj!?O5V)GX-RPsq1qFJ{Y>^_kQDrHYFj#Q8 zx5$d8%~q65vTzo4Z*M)kPM#PX(J5G99;+Ovzyjsv-`@Ly80D^lIpP~F6OcsCHYJn z{|P^P2S{v5Qi5NuR#*kgdy=mC<-`E9s!Izw10KuIlzGRo3XKCFUn>N!YLmu|!O#F4cQPd|ZOOiKoi_Hdv zPRBjL48L@6pL`>VQQB6fvx}>Fd%Hn#M{Scjwr8KT@IY8Owe$z-dWEb7jb|2^qJn+C z4&TH2hYrLbPJTR9X=grk5m^Bt`dWLi;L>jn41b(l+*@|DqsAp+LS&oOm3UG+Q+f+_ zEypGB_k<_n)nviYTyTK(24S%@21Pwa$PVjbjn>y@x@BoFwK_M_fFrCkmQGDqLhtuG zK4I?vd%|{+;Xjk2MrqrcS7H~#(4k^8?+iM=HOtXsN>gnh9>_eVEy5u_&kNmQuQjRw zW4I;8tR`Liws)cgk`LvVduQyMV^SWxxQDFMKnhKu&UTuj-bKdZ5sauQF%BKHk?XcG z$$dSF6}5h9A^M9sHA%vWacAB6Jtx9@ZEs{hwlv$l9t`L8;{U#m4?95Qs~9raU;Nn&rsWB<*F~ z$Yt8(A=**^z~j9wGB>F%v!O0<}f zcG7_z1@kP-19(49@?Li*={{W4KNcTLb6Lv2&L$4n2y`VaQ}ha({fNU>OxhnSP$~8Y z@fJm=@I&y*#FaVe76%eT}znX>aJzhzN3T=2=k~5*a7fJIUWUux$y#1q> zOKuWfmDkC?si+be)tBLlHoFAOin&`Fmi}iU;VJRZcc)e7klSGIl&z#Q7JE(dRk%oI zA#|>WyaCBf*95VMDYCXUUm5BwXl&><%N_(TDfY;g++a|MF$NyO(pu9gKkA(@$li4B z8h6x;hGv>e^*%NItNwY2WLz;UEoB!(sJXjs1-uEj>5T$s%MWB1n@kmAksRMrs*gpJc+uowd#%QaIP;J?y@7k_94s|1ri}mrB z9tO@&U$ax6bZIa+aeA{|VG;)EH3K%~y(evhR80`pI@Mhpl~e$Hps03G4ju>}Wa-F} zTm+R8Tv!d8voN_CiSXfd?7ZJ77|*JzE4S|gGC8Y)z$f;p$YXi6^-D< zjuYirD@)|W@m#IUkh&C%iK|KCTrm_eqmjBIn}KRVxjb;bIUUj~_7+ke&g{^H-Pq+s zjO*EjGdFPs(1x*mkQe7U7FN>;*XtOZb#Af5uBrOgy9Y*?(Z7M^`Yei~R1*7Ql$90} zWKnuK;lGrS_ZTM2C&N0BINLJtxp z=5)Y{xlz{;=ajk77AbVUFMMLdUYx#m3^|aexjDGc>O1V&z%QDxlH&!Hj*I;M{smFq z7YO!9J?(8oJ8gEr2JS8Ei$qOdpTc3rdVHr=PS}&D6&BXMZ+X9PzQ}w}>>X`0eJ#B> zp`}HTwVE9-$CD9FF&l_l_lJupGPLrltJRIRwT=!DpzTUqo%tkTr85%gH z+2=Yzzj#o6x?PPV;T2z&_ooyO;Zf&LtaUf|HW^=I$zhY)o{m z-{}0kiYb21E({Yc7;};CldSd9%djy0e=K2YgC{AdFoQ*waB<1IIc{c5p?0jk9 zuG0|%C&S)HW!2BEsh@FV_9X^Hj(lBWH8=(&1%7s(res)VQ;qL{Kup(YVEACKsI97!%8B7S{0<{$Px@3i`~$=AgQ;v&>D*h#&Zs-=AsSe*^$Km}hcys*q zN7B!C&n#!#f4weyCMn5I-|C(PU|MGf*`IzZIu!Y=%#HFq1XT~ukoo5oQ1F_YQu6h^E#kCX}% zh5rvF0!ziSLH%^ry2o$eMD1gOmA+}|KDdpZPw5vT?PNh!A{Y?zLz6aTX$knGpsAoBOLL#k) z=!TZnqDyfBk(db>Ki^sgs^KgB2^l?6^6`M5>9J)l|0QJl@it)ZO;P zPknWBU=A*Iq#7MMo8VG!y4W=Wf;@gd&_y9bFW|&$)5W9<%NNTtOiB6LENMgaDAd+y zEMsITj`EeO`G;uFpXz}tPwabIy!`M||Vp|(W%c2<2&=0ht0NKgR8}`mGG0V*y}_qc5zaJdL+iD0<))Y)`z?-J2TFdZmSQr zEGlofE_Oz}s)sPuL!9QaF=9$jx%lfLOpm0@N0#U!4-MR33~REg3J8k8eB@D#&l~D2 z^J`p*np5HGPQ??|A>kEo%u>6v#nx}VRo`sp65Tcfm~+{~On_Rll4S)`7pas|QERSu z&ul_Lp>g+nsu}6}NywdqYV905;C!00vVsRd?mc&iHn(6B2Hw3gg#6^=2UMP)%fLRN zCJZONHfCiye!7+>siqxHbn>@OM;(wxSeZO;eEY?BEk!5m%b;xU{L=lZ~yqA-zxzwOfz%fJ~v*A5m!cq6c03)we>ko z_!R8DjF})kQ8H!9=5xxB-mm@MdOGhbMpj=#z066xd%1NJrkbX81nVPN-^|6#2rKNU zb~ZvAnOz)6Of-^W7RuU=N()c`xOv_;)bU2C8zCjcRyZI^l*Gd8Y4+O?aalkxSCrAWra#cFf083A-9c#dqedydxSz8?25KM|JGV-b8%r zbTY6?4D`45I0m?W_zb9+g1UI({%iT?>}BFFi**mBqzIIJ|8mP+qjLybl?R`r?*yen z!XJi9hA45!SC5h+u(r~;94bGY8(#;IP6c#;a+=^`v`-Vy9X7}Onr&<#p?R(Ej?028 zEIe^iaEJq7BtLH)!1#2qNiZrWK)s~Xi3!iLI@@bZ0DZilee})J#nxBT3UnDim)2#Z z;1{%=_$^Fl3=g|f%}^DwUHVRPP3g9+kEoOjn{I9 zUaNfpq^Q{YZaSKMsXTgOQ0@J6m251Y5hF6(YhI?}YBi9&bMdHrl9F`OX8!jvk=nu2 z9ko)C>ME>m4?{ zdSp;0tj*GF(`a^S{Kyt%I-q3jBeg%~D!cxA6kfc1h4|uk*|gN|8s_x%quO%_)t2i} z3S)|5MigeD1D|TP#xtM6$1@#<-UCG2><&$r8+bo;RVi=;Xw|K6lX%~ZB{We>=62%a z$Io3V{z15vbl-B&PBk9E$e1bJ8rhyq+;?+RT@zUM^Ll zL$||L+AjLAig9BzMRlWxvfo+ao<+t832fobmCb&qk-!1$l#7k*r%ERY%0-BGhwMgVVe|d8-c2P^mB=mo|})UEt#D0 z(HK1gH{?ZkFjR91bD#~-P>cn3=|{yhkT#p$fkk%Q4^&U)e5Rhl1yt+Geti$iOWOo{ z_U4-vxat)o(Cg2jZpPSy#BVXdoMgS16uL`EZzMj|#TnEXkWT?OZy;}20?b)A(~9Wf zblW{kNWbse#khag%Txr4rwamh{TTeRJqhJ)~3*NWwSE`v%24v7C`#`W5MB=9Bj>` zyqw^){)*dUscX4t#YWYPJ{(WbM$|oilSSQCU;_-5yu>(K(r+ddsy=~LXxon4wBP@v zX)L+@D!YV=hW&yy>H9?~iNquqM#N$YCei}i&W%BgiNAc3xfdQZN013a$8KB1U9dC1 z;Ze50uj)d;&*DFK2|cNh3{yRA?@VE(uqs8XlV=j1vR1iWzpdVEpb&$ zZVRRwyyHTasz%INsUnB|GUA``3kiaR+5Em@{?M=#OV` zfh+@sm3NbL`F9!)@GUS)BEUY;r}C3B`EzdH**Pe`B3hdm(&#ZDFZG$ADT& z2JR{} zp>ngsvFG3mQ6r;7feI~Isq*#i4MV@ro(|VJ%*{F4eO~~~kSyUGND8jo3dk5QQO*vciI9SkxV;}c_4@{?R*eF3CTy+*9eDO(K4`ad~ z#vReGgzK3&M1oypi_nA&>LJv-J)0N;5Se}W)nAa@J#pEF_@$6D`y->-K+gda`B<-> zAAW#&Xz^3#r2uzDR8>A(w3zmnpUJ{``FwWZ%n#?qBfpQC`~A4E{kXqU<-=Jzt<*RH zV<=s7v(&o*wzJXU!szoDunZ@M!Ct-~aIGVFx%iXRGQv2|6 z^F?SJL+DxL8mQK{#T6xh@`gu0tV<`-WZ#k$3a6CTD_>L8FuE~7U^s)3C(8$7uFIL;;l;c0t z{56hBgiqdGN$OAcaUY@vR*80o8jIQLu2eAd$x>oYl6Up|CaSrLY8hy^b&Ju*%n@raS7 zQ!EYO6|7Fy>0B717TPTVOa_{9UqA}CedJLt;S#=dM`TaL$f=1WdmQ7#y9#tKON|`^ zpJf83W0iDvA+06taT=0k7|b$;!FhFyLDIY!mp-IaU{ z7ho-=tmwSqvfggPP!#_vRJ@EU9U3^H9~_I2sL-(fMV%&O0Ds7ZfjDW2cO$#OV7`|Z z9TXtS&VgVZikcoZVaTHISCH`sXE@8N1Zwfk-6IHpsq5xw!@n81IG_|9OP?7sp1m0N zv_j+3_c8A|wU!>Xk{@eR51lzWv(wa)Wc>hjWNKk4>5h;I235PGZUB^pX}8~Qlo|_} zjC&*sTX*iO@uG^li@VNlPWjds19|i*{rc16WJC8g(_PpAu!PEDD!4$fc5~XyosqM- z__KJkOyj$fkc@d{J)+euomHc1=KJ+#6Ga+|=e}CWn$%p}6*05@R68d`GG`Be^Po*I z2byjrb6T%LpwHdb5shN+;x%$fcii6Y8tv4-8EJJ(pHUTEQBjIkMo04d?y5fEVT{$(4qubqtzI=hl7r29%A?Sk9DGmn9geZak7g08l`E+ zHE=zMK@}(I6*t%uctLLz1C+n)hf$E<>ZR2iTRv}aJ^^*A;5$G zfLuIx6w_;k$t$l$pM>2g>kzx3B0J;`SAEKBrgq={D<|x(P#TMpFuV^Tngq9f2o&U>u1zZv{5LEh5 zX+OZc-6tu_L3Q#YMmhH6{y^^ibG_ObVb@QTFblzX;o7<_56&FOS!`Xk`&%B5SkieN zlP|k8SWw)XO}vy9p9 z$mTJRj?ZY`H=i1++~L7$P559@a7CQ#B5=i5`!)qqbnaVP+F&mkL~&f=hE2 zj0uy{{AriIrKWSChis#04+_q#%X&f3BJCY(5o8JMWN~FR>{L$^gC%d3WQ~vH68)n6 zN|nm8yFqD8E^t~gz?a2?x-6&Za=7QB+J*UTyse!R&zI%v(!ZPlbv9fTws@xI2MD>( z{49|h(*<=uAV!9iyuVT3Z&dPLi0b$fwk^)Je@`X4QfU}ExNmsLV}DtNu1uUwl)NAi zLni>g!NFS2)WWm1ON5G-YVH0zA;4gn`};*F_<{=Cv?q{RIF)pE9MseJ7yg^kB-^zL z`kdVS@J}Gsyzr}D!CFCt@@0_U3u^aGj9NCjQP;{Xv1^*e1+bNaLa3#JH9_JqSz!!RK4E^g?z1X@fEfMI7KNe=uDzhYy0Sbx02Dsi zNN@d{b-?N?N{I>A^exk1Tl;&zn?Fk4M9bjrI3fJ3VG|7W{WG%gUNW--=uu$vAFc22 zo6Cp6V-f#c;#&mSVkGEml`4^?aO1Vv(?mW%W-+D|wDA__)7%&jmv3+>v{z!(jYZgi+z z+#!-&_|fhP@EznmS2_7lw* z49im%1EA-iW)IyTA4yGO0XTJTut&VhSF18N!m-f8SYYC$t}`_?yaL-s;km}dE`dz^ zT6UL$N@-vnNqjZ2gbHH^TPja^vk-vDx1g^#*$74*BB?AT1HCG%65Mb=rKeL){EZQ{ z--J~|>H0iSot&!z(?xZ$fB{tZs2LDA#)tsq?33p=0Sbefv*l5dJmqO8glW~f^wOvO zjJ?9omWs7lVw$Md|@gVy)G8C4)^m)=~dra2DX;_oe;FNc^dgSlE(W z!JKbnx=6R3KjmtkhJ4^qSPmQopOx)Oj3Cj1kCS@8BKLxXUwJ)t*`|!!&TN7KF!7wn zFMadQ*P%+j-8s%U?EM#j!|x_XM+$BDaIQ97Z=}8z<<1Qb z$P4BUwqyP2{`!^sJ289HN{$Z)IM3}3cl;l$ILKo*KzBKog*SlG_9Rk=>;%Ao$lbHB#WgcN4*a2HX^y*Dk1;IF%T0G8Lf_Afj6 zjVto*%Hg+}yCip6EbOHGWc z+l$s)8XM2JlL8Z z=)u{je>Vk2@~U`XQ#SSGexwsP^ztGFI82^2uWPhs>JeK@}-RF)#4ZJ65myN46E+5+@8*u9r)ig@h9(nNVslVF1*; zfJ}cqOt|%jd{XE;@l-K`ZJ1E*_cQ73K5v{j&!5lbcXXk@9m14rz$GND812YU(gx>cX9uUj72abvD> zN2`@mQ&9O+XoiPB(V(TUZu{t#n;XSwxR1UL4Hkr<1m;M+`(8Q>nE{+NC+Y@V?co*Q zXk)#cO7He`Zgvr)6j_?*)j);2c<88FZjMza@ujHRoTqzq=X=D7{`#`?nDEvt; z!ro&@EwSZP^kSTJoxQfvL<{Wg@{JX(Q#@LL_#C|EfHgs|s_v9Hv2pp+CFvhO?LFw2t4WZ7OT@inRqPU4$gGxNSKUZAU%L$W z)LCn*Q_b8ODcx0?&P79~qQ`(*{nv9t^?3`bY)+xj28Q2_su|5YPN zyL*5|{_x$&0R^7Sc01lJwD4>deDjB(9u7F`;nfa(O8RGdAm|`ETayTi(+P~R2VPSD zqHAk``5qv&#}1f-EE1r=3jkI;-_khy8PSk~;~+N`=Uu(cl$*fW0(K2hpP0F$*rx!# z`W++xZ$*$DmyrH@8#V2K6&l?^b$=N2@_d z$-L-2oJSFuM6p)PniwCjFU6Nnesc3sZ4#h4N;E%ADW25W zKgPCQU)mJ?_9pUK;qJmQN#O+i=8OK7ZAs4+M0Glx+AZ)jufyv&_|w#xgmx9b$L#i| z+nhsK3N+i+ggL)%PH8`DQafMZ^5a(kZc@3ML3&;#@QwZbIv#=6bESzov&KK)UtM;P z{q6g#0sfrx9)~lXD|bo{pq&%X466!)FXrD2gh6&66oiH?PU41(^TG3k`>t^Q(gN_{ zOI!X;RL4Z@xDuz2{QZ39aErwQa%9=}+RysO4PZ0N+*m=?K6-+DmECsD#ML^W1^*m# z9R$pw7OL>9lV8K|RGh#8S*}GNv0JulgkRifO&v8X{C-P;_+sQ(O0M=^F;7K6sWSIlLSDb0zl(C|CgmP{?X35tN&9gP}};eVZk+-y%(&@Hj-;%y(-P( zVRsf<1j<@>rh7F(Wd{_jsDbII#sAe?p2|4$$eQXmjiNuzdp$J}U`SUOIa)&?ud}$J z!&;XVy5mzSYLUp}Z;vk2%%J^3nNUo-egb9u8FvJ(ET4zx`Zq0rGY3jjw69@N%UM=k z0oqOtue1yKS@gHktpP^71_A5oA>a|Nh9^Gbg*&EOOfiU@Vy^njexQD*=YP(}v_$Q7 zl9d6Ju9z=E>2rWrZ(it4qOgy{e-yrmM6G2nd8E@4en0AS8Tqg4ApoWOv|1g5Nb-8g zu;V%JB@THUiRY~QiFd~LG*=~Ka?jb*wXJmJv(N%;l40&qV-f(v0w|;b`&lejc-JCC0=ITW8&nUld@*r zo}Wd;>USm|`!W3>zT*aPzjwYEDPI1?4g6=U#rGEqR#)=Cc49H3|7a9hFT8F&xE$wH zT+{mm|9;u=TUAUl+Rk(5$eUu%g#+iE(#6=(-?Ta20@ALY&KSI{l>XJ1OM#I9c7$>C zN?eTBfotwG$5{`s=W`eVPZUdc1<60;{MD)Rjv{Xfh!2&BrGSGsc`XpE(uvUDI-JZo zcd@vtj^uxyJpJhr|39>q0>~A@J7n7Mf=Tem%}?$~o@qc53Jn?L^TLyYXrJVq!%{O& z`h8PsFKM_8u-5tQ4O-nWkI?)v^E;NJNWRiO;eMW659p?!P(?hLs6D0llFjLpfp5$| z&c^1C@}s2|xV?upuEpM`svuQ*q}#I?DEfso2ml%XnL=wD^J!rq0lC&SpK5&JiMn zAYO3SFftx|S&*4^&mUS}@_xAKxoIUXF1GW}U;mF4du!SmSmQ1js*H@Fr-1 zFwMa$fS=ch#pL%uhBlfGP1xyvxb}H(vMQLY*(F_GCe~}DzoAR_cDIgxhY^jk6LhC ziwLbm1oyEzc01&$&zeC$jlZpPda&g=+0bD`OP_b8ORMa0ww8aTdq$R!l%2olpQL&^ zn^3mrKkH`U__MT@+);1L$-3Y0tR)ThMQ$rBn8&l}nNy_SLnicZ$t^4H2R;5WFCXz7 zN?aUz$Am9>E`A~`RvQ2BP4LeiVO^q=n02^g+EYH$()ht+j80$_F*MLdcyU3C0V5$} z>d+B^jrnSR^LizoH1Yo^YYW|?@HeIATU{Fj6yiq$c9J*CdYGP)7GVyM?UiG$v-fAb z+%iX$`tJXmk~F;m?D!$$r?hoI5*PX6@Xn$+wMQ01^-S`Nn=qW3Y?DyWj=w{x;Xe=V zKYO-?{vX3{`bPj~Q%`#HC%PwNyuGITA|l2I{>`wqb&jQ^_qE8*o2?SHOexp{n}=aJ zuKzw){%diCf>ns>jf#M@=jrY1d-RZT!-K%|VBNXZU;A)Ff~(29DLjKIW2f1bw~>DP zuQvRTt>jQykgNawXgy(2hPZZPi9nxZJu&Psn9C}ho4q6aKHhq%8^6u}a}>AG?HO!T z@1T=(8mYRrOB#;FwK#Y;L$viaVSTzYnQ=1#HNSE4xZ`F-sSd7$av>@WTS_2Xqd7VQ zm`P@Le4<~|(@c#@>mPGBi8TIN0SwySuF2XL-@=|>ANUK*045rDpR~2>$Q|`>Y@-@I z|F6|`Q-)Y~d@fTtLz#h;5lpXPFf@Z=mvN=B-W&pL1bkr*0EyH(9xL)> z|Iw8ec>XT&oG(l@gXKs(fV*SR?gfG>d;M(YF0*A3@`R z@7q%XuS0@9_9B&QR#w`1Oh`+<@v2SN{4A#^L&i)MmW4 zAH^MM6!x!S|Bc8Xr!DZZ*yM^UW6vK*sF|BPJov8aEkd?UpnBc>ZuE)?**j_!@}9Y& z{36TiGHZ?b_K0~Mq$Xv_6-IEdH8ch#!wy6=7G^|4*4x&q6XAfWTYC|havpdh6JHot z>;%~AeJ$f9es)0TDrAb*A?fT8IlEx&UrY9Za%!Wes?HFWvxEnq%Wux# z2o_$wE=Tv6dT<%ZZT)djA@@d{BHLaCA<J943fiMBuThQe)mo^5iNR2zV+q?$gCiXWysVqqFT5MJ(bOykN^oufqtFkrq`Ag;F z2!CD!-(D1dvtIt3ZFd`($Zq++Rz%20@5@WwUV5dFB0&$n%{=+juf>S^{ZS~or0@91 z_Uc%c3G^rswjZUuzqq47W(EuW$;ul~4+z{|uiX)HzqC0+QmM(264$Ic!Zi;sJ7#W_ zwCB-DJWhuC$ZrqHi36$`KBZO_U1jw(kL6`dh`F87VW zH2V_YhfErYU79*gel#y$@k8#Y@)~Eg6VlqA?sr(Q+smTcAUDs$=k0w6tiHS&grCsH zUzNsi4e(#>tlg2(j{&6gevo}u1e1p8%pFAC3P9usJm?6toOO=@PO$0%pmqim-S%qw zKp$F8RCxE?*LgSr_=Bq02p3-DzRJ*-065l)QG40LuA9t0ycC14w z`D4CKtp2-%`0?{{$cjontYbAc_GG``l@V;4}CqBzgN{8?gKx5y-^9O33~;Z*gQkK(aHisjPo~p6F>!llj~<7>i{q7 z4JBm*$Jllo7ynt!0Q;S$#K>`!miiWZXZX!zxU?(3R@=7Ei}kL_7VpF==3I{HeJ{z| z#^a>@-e2~&6IS2YoL=yq70uowhRepm>U=oJV8#BwympNw;jlmxre7T{b(EYZvX1sC zRgij%64qGv5#sp>cm*wlG5x0PT@}SC+-*aIy!Js+7vu&Cu`JjAk`x-*aD#Vn9QWdS zYZc7a6*0Fq{+?MaIjA!jSb=phQ`ttlYXCR7-}~_W`c=~WMN%kiS=-K2J5S(dL}2z? z%W{2K4vaD5%3>J)bOG}wSihV9X<71aSJ_!L-(jF^$mX`YzjCUzfLvJE7+_GfQImNA zned%^iITRNQA;^6##MipJbA zxf{QEo%?%L4>h_(jW2+yzFfa5s%3nAJ#c&N&Io-dYsaLL3_r;lc(qHq2$~gi*T4MB z)0Q{=#0umC3~Hqw(j!&mCa%5PC<4}(7Z+fupIsmBtmB{DP4bK>s%_i3QWEH!r__H*1M)@kc?!W3yuK@nj*7 zZ!)Yrs}BkpN!p9Pyt)HUS?qr07(0yX;57=%>DFUlaUchx{}MWF?)FZszuHv;s3PoopVQBpJO?CaQ(XbSOIQ{86XiznU7zbTY8FU_WUv4tE_* z|5nH&d?M3-*g>AM_i~stcqo*w%vFbV53heT%+&o(-8Jkh{*AFw@a0T#3chebWj* z#(iJPfC)Wy)4XceyVUx~_=P_{TQ+v1UvsvpaH8}N%y1sQJ1^7N=&QE6?W2@Jdu2Aq z=?D?&+<$yj_f>=3&Od_1(?8;b0CL{e#^d9&EJF#wquYwV64=9N=CJpdb6cbf1$&A> zoOV&V?Mn{*F^B%9p_KQL!~7ncFIrCrP|lI{ZRX2hBC2jPp>0-lvajf2x% zKsVyUeCrn1H4wfSu5W}@T4+dOX?~nVB;mGgbkw&_1h_V0K(nk{8*)8Ua=aPg>6*qS zB3oB+&YE5igr$9`)#EWr3Hc~Xny{^Xdr7>T@;Iqz>7mxamkV2SFN6g0D(+NzXd|M? zm5>wd7S~o`3yfge#nQEYlc^PkXkU8avROVN16~YI`bugUvKtOq1@=5upxbQTExZBQ^@0Yz8@Y2E_c<36oT=U`719+>wYd=a;_`)xTHRX(x9F&*v zAzMJVz(k;&{1!Tx^ku8!M)l%4OsK)=(jUBZF!?;W+u{AYR7I>OYC#kHcgg`_)#-BU zkHXdmZe|8{D-7pYPqB7U*h{)6u(!FarCfOTe|#Wj7OUs6PVUEg(4Q}pYPn1|iF zYrfVL@-5pv^`_Hlf7n-YV?O4Pn+B^PKvjAZy-u6-{yI#b&uO!N6_)pL4dUZ!pN<(fk47E57Z8X<{Q9yjjZ)y2t%GUSf5(O! znRwWCvh^!CBKAW<;7(p=PREdMnNEABSaspz0+&P5ZjWT1H+-Fk53lY0EX<3&B>N0Y zw^uKW+e(sd!0TL)!Cu>rzU+VC!LR6q@@hI*VKcaieS)XUwWB0#wgT~u)h@HhCKD^Y zx$N)F!h3zEOh*ax*G3vjd(HTRvwlRi>hk-wtZWmr!k01}p|DuUg-cEY-E6t+i6F5D zLHr(E_aOE$yEU^KC*XobMddZ1CeP&zX%bnx^r(N^t@7Ni8YUGs{ou88jvg|Lt(D!6 z^d>nG;&3)eeltYgM|re;U9jIcGC0e&H=&JUn7?%)J1?5W<9u>G(l;q^St*tF0H+v;$*;Zgr+&v!OQsd7J zJm%1WBUoOFf;y{^bUu7YoSf8Bz_#h#jw95Ppj5P~nkZpTJH}QlbB_GAkZkW(#e`Nc zEYqTKGH$^|*xfkslGxi3M!f5saz)&?nBx(%FWL~VH*s=(JP`hJ5>>=}@SA{fu$c9g z3xtQSD4lQP%6`VSL)7iN@Pn(^DMSb7(=nCfS+1ZcXsGR%?oU;O-cm3Yp@MiJd;yP* zRr;Jl+l!A)ABDV)y?3$T7ImRdnqj+Bk(pR{7GP6!H^}RZJr-MS>LpuG&H4b;7CiES~1BGWHy+K<~F&>B4 zwfNuLs=)F4{OY&Lc#4k16Je9`-q$^E)0ciI8|_h@4;gTItXZ5Xa{rhONQFO4`(Q-< za&6no6$ybaOKi7k{JSRwJ8D0NpuCZ~nH(=75-v1aboU&ttNN)0lQRT58RR1r%^nTQ zuQg(rq8zawS$(_ti4eIp&N51xUnGKAT< z3DY{tmjYl=90}J)a`6*Y{*Gu{fWl06U zv;yqu=Q0hxf>U-awU7;PY0I~XS+ckH3It6N4BJOb-B4k)W2enhtX5o9%r zBl!L|ytF{@RbbKr-sj38z)<}GRgs?$Wc+Nw^MkSdrX@tMY`00dEStPIEBK?<#WUWO zH_+tlg~70W^1R-1g^kZhSlL4uAB03q2fDvnpR~%DP>P`HW&)1zck8M#TVyq(NxCnu z(}ynXGBj=CpZPq&<)Hd+X6Q$0*5%PdHp8WG;1F@4ymPtX&I&l$a~QgkwuEvQDYz}D z_o|lTZ;f-M#k`RYs;B0vjcNZGDD<&Ko|j_LU$*{JV2ia2zHv+R+?p@>BLUA}O%JvRWJig~@Q!l*aZCczMw+pJ;3v%-cYt0rmtOFQHsFd})JyD&5 z%Ac2x#F>Q=W)0Bv=2YRBRPwRqI;30ceRSt(^nykB^~wl3FZvQ7D>-HFy2$zAnSQn@ z)ekeWa-dtrFh#SyhayZVF)egnVMqoUY4sKS2uZDb8LYzNTeJgx>pXQPc~J)?LfpO7 zGH^sWwp@Szvg9qVVpjHZcw(^JW>K+Y|8`uJ@DGo=??MN*!aWfVG)<_=A(U^Nkg={; zs-8{vYkN9V)-0*G%kv-5X&L^~hj48T#vPWp8fpyPHw2)M$;l$xzMov z`8V77Q7J|pF)?jgeYvFAm|w$2bSHLlMTek1PNHo(jNaa62uUrF8w z7=;zpDcSf7LfWBWOv809dX)d`v|!9CcS9F7$}s7?()>KppW%#wHU$jDPyj$z&e$pD{KBnouD*;UpNN4MB4c8hv3s}hOHA?yH##s z#e#R0>#vl@Rt+m*#fbO}scGLtzu-5w#rql`%d+}dj~P3^mb8|> zy+f$!BI3xawO2(};2BAtXYsEWEp7benb~u%DH1A6GsyDZ4$b#P3YqU+ zVeq^cMABb@mL#)ZnqD@Zg<0J7&qs)TE`w-encAcv-Ozo~(!teOEws z1UI#-<`*rM={GO4-+a4FF|rC669_>!KMm*jS|VY3PsgTKs(4Zg-an#s|D@isP;|Gc zy=p`tdOxKA1*$wmA8GVrckkPKrml~syA zum01)&4Oy>H3vF6n{FB(#`ioaYsAGxy$Noa>UJJwWcE>f*I)ZsV*C?x|5FbmL9+W% zS7+mh4395&@1{oI3iBopbN7p^#79-06kU8El5?y-rxa^dgYmM&+{gP(Ia)Ei5jJhU zj=V;A-|SbZxkpt%AK~afd6L6L_xTlC)}v$(*px30hC^c0nYQC&_AvENUC+$hd3+zZ>2%-@ zI(`1q-Cjak^!CGN$N(CJX0=7fnI2f^15T_g-$&0=xbF!h(Pu&r*I&}O&RaTYb!=AP z#AjPju23NKKFP0~Hd`$1VPdG)UF(=c{z@3D#~Km!N;3E}Dvyv!1A!^QixwHMXi8E> z`OcyfB#)7In4|7t9Z*ZbNBd}oUSV;bGQ7ky!?37X{f_D7Pv*KIIQ|2({%p?8!Rtr7 zN=U~T^T=S=4M)fU)VhuxUMl&4(l9E;`$c9PBDDlNM2hf3OA4kQ%4<_oOxgJ7ZZ62B z-XElEjgBB-piov53dTE%Efx|x45P~9>scw)$YLAp*=?1WEIfbQdpW?lB~dQ{i%#kvQ&fTXYHr#BQo(#tRBj+H#{HQDexMb#$1qgn0OTF#!nYySG zpN~chING*2HK2eNMozG?9=wT=#yWOR<}_U>8{jAIY-5`e=ic8p!TvFu##`<>qWYrK2XR47EI+FZvWa*CqX2ijmiv^@qocXTer6``QghSUfw$_4&QXeS(=D zL_MYzd{Nn2+^=~v6c{&Cf*Uwp%ZoQ>`6ECywGCg~m{ieT@wUc)Wca(iK z83gk=rf*1vQ{~-ECeE9}=It3m=Zai!#A41quGXrDfnJyxUdzEWSQYTu_noI9EF7I! zmDxFo0_#LvwVv|hWZ3~8{RO?-5^f!&P8gHi*_IDiVn~HwZ}5oJTJ-47a=az(ks~)? z%R~g*Es4TV#@<*S4+fu>7@Bvq9)+X!xvvU;25ouQ|`@x_%VRG7d~a1Pxa8FK5IfD$Czq1lu|G9GF|8L zZjRri)&KSUP=H(rkFHJd3qG2fMSbYV{pR*8_)p#idHy2?&G@Y0JnG?!^Hq9{&HXy3 z89_T={Enw@A8T*D_7-Mj8es`)SKoao*WTmE<-&CROM`Hq%QIp&m*xhW{*Q9v5$Oem@1xl33-gZ zp<$<1qcTAUZ#}dveY*s%AdBfzZ`6~Cld2W=DF&t7QkiuaPzS<<;wuZj&py~^RJV~7 zNoh5Z-xW4pa5?VdOh2HjP=z91|5cx-D{xP=6v>t*!L<3phJ{ zcx5MN!3OcPl-qa%JH5F3%@W55nLTs;{&n$c+CJ+tMdL#YfeK;DVa2D$*)Q8A)LyoP z4dSb4q|rI?1c1YV(7w?WSLF`hi-!}8nC!{!YAz{Cg>`MHY$IgxPX*Xq3)_B8V4tpQ zkf+GBa+qN&tvgf}_P$R68IfY3C%RT?b@81AOQ%tlqho=1faDPZ&OBD0LFzSeMT=u7 zo}yN|=Lh$jE%0FI-Zu49K?|kM3mVlfq!52DG>w(`jT!R`R4EtYHU{cL+fYcr%kWim zWeUaUEPrmqU16PUW|Icj$7nuVhi)?m)Pr3xmTRYVb`yLAYwQ9l4%_21Gq*{Qt4E~8 z5mM1~7%QIQN~}(SFVt<1!C0y@$%ZlS*YnvUt-@clKt*nO^Q=bgvv+kfe1slVOE^dQ z*|DNWF?&FlTH;$RdDMJyzbo|5w>uhxtUn_q~*E{Vwt^M^2DI(Mm19AZ{-Bp4#QUy_|3CJ#N9Y~SN1eTOEh>Bjf(jtVP z2OpK_cFS(Y$Xra?{7Q(^gqi3uf4+$*jxS z;wIppBLyLY6z8nHcY0BZPLo_>^;H9>N z_9=Iz?|s4c-?Lsa^~KsRNgpq`exSgR*T)z)9TvVvZE0P9*q-4q;#u_{m1v4AEPTbr zx=#Fi@oy;dgF-FAcQrq&}&UpU(@8v^AXWa=xxi{K>17Jj2$Y z&GU}|{pX{9FLspbAQFhbnJmmY%@<&vGW=zU|1ikkKfD8%lb4gL%IJiMXsQ_ zuYASk?>q}Jeg6bnv@*c`kXK}tvSD^hDLQ2}5?n&DHFa>c*FZb&3|>=@M`ym0c<)gh ztVGGSe-R5On@vJ&h)x#!4Hc0UNG??Ahl)&4?r$PC@SU72Z@R962~T>I%ry9)zV@Fc z6I1oZaEjN;ata@8L)WsX2L~t)*&dPVbBpRe1<$q~=@y4j^PaNBJ*T^&{(H~<_7|$| z{f_?N{Lg;HR88o*OYfH*LI=g%SJKTBvha6QwZQe#E?Kr58lDMG-TVKS@sZj>9W_Cf zv{87j8ks6;_0%~-)@9f{Mu@ES*P-cLP}OF%G81oW7O$z6=k8xq{=baR?~IV&sBXuo zA7~O^Jj$J9Is7)~#~{!K<=XZylIl$7wXq~x&2dKi+?wUQ%E0eL`nPWUnazLe;&Tc) zYMfHUCk%>`H;?KgLQjLSRp4l%nB(`U3Wk@z0ta#n4~rmhS7#7=X}(LJu>cta|2q-t$l0}=6AtUuD8Eg$P02i#)nzMd z$^)>w`+(d#<(*1Rro zd%@Cz!J6^XcS30d-)PM{28Zw+Pp--Wcpgv7^J10d_WIcWPC(&Q)qLrVJo#m>E8CPo zR2#gIOje++;bJX8ch+GdREd8h^=tE@Klz;O*uJx-sXop?1KA#E)fb=5gHAtqR-ga{TI2v!1WDYh3O9DW8_ zH?<^-W~T2>VL8m$d~Dnj6AOZ|zmc$cah|B^c(Z^rvfoFeAJ4G#h{uo-mOz>p_b%2q zDZ~Vl|GGK9z6VVs+4ZrjI4=d#5*fatgV$~sK0NMam5npY&1qJFT#m)fgAB}{z2A># zzupc9f|#4B;OYeZDj4IaSY+^A5sb9fV^k`_w>*a6n1^LIz~^)h8m7%`8e;C`(Rp&0 z7)V^8WMjT6C?9@@xxI#ghk^)>5F(HFC>A~%M2a@g15JTpVyIDq<3TbP(BJpOdoTS=mIGbAx<(wn5r^ObywwbhP!8|S*p+Lrt z+rXi_(kiC+0;g+#1zA4!{OYA3U10c^_|MamD9qS6@KWl)8;FJNJ`QqPq$d!R_wxd> z)a*YF$+TQnV{}2xlxb(%JPzu=;YW-h#W1w_hMyd4Lx_95i_ z@WXHv<+7ni(_XhXEq8Inyi!)@hRdzI_r84H+DsSS%qpw41+~j>Fa;2uW*t-r`CRQs z%OxJLoSJo@bD8xJ9lVa9WS)MAr(6CyF1w6q`sJ$}JtchM2mmx3drq6qe?{Yq<=Dv~Uzf?I6AP!vaYpf}OUD#k+9vr9K z)<4ga8O_T4d`OT2G~Qdl98y2OYQpxEi*9Xsj&Ogl*5mrHgVvkpR9yIm&Nk+8T@ z^YD-1sd+YMin>GD2?+c&?iw%U{Ozc)UPP-PaqO-xDpOkSrk>vI$dmiI zmFcd&=`|AbNS4J-$hNPa!TUI%FVPD)=bV=bPoh$;Xtq zd-&C&ac6O?n}LYp5vr23@v-CWAWrsh7*Wh!dSUIQsAWi_IS|tdL57Sz57*qHcOa2Io1`Y!9EKB^Cn6=ATtpkX$2^Hn&}>R9-p#g&e5 z#}i!TN4TgZeHBWVU~79?1^tH25DhjkH+K&A*k4a4%#!=(N$=CNW|qsRi(%47^9MO8 z_-qM8d9j%fjWWFn@$N!K;c(x4uZ<3XlJ-(+qXGv^`o`gHM3vmXdk|9;tf_yy^(hNC zs!#5|?FsQrY*WKsHl9XPo17#awkw#%0{e)Ejr!*P$Wd{cJs1>UaNh;C?a#jRewk`Z zZs3;sJw#}a+x$|s^#w--k&-cj!HSM)jccRh?rMNx&-#1;xf*vBtHz5pu10;S<}de; z+08pW1D|~l&^P%dIcu~PnR&R)L=nL^<1N+aOaXn3r@2ilC2+oV5jepNWqNWm-ZsEo zM$8sri4+9MZ^AYZ{!AE_a9QRph0$Vp(qnVH z=!TZ^?Q_c-`P4^bjR{U`Jf{n}Fv81jUMYuKD6ttdHIZF1~S*i|6!-1)XL@h$=FD{?7g6w(xp9>s_v(c=_pCY0CSrqYckL(#DX zESO8Q^A`8mBG6u&4cq8$N16Iy23eoNkqrl15c(?prS5S?SmJiu_B~%cbhET(#4~7j zPL{@<1=|49)c7}EY(8_1=CUZipl>d!1i0A~WMCt!yPje{_UNtrwt7vQXd}F+X@Ho~ z8D&bmdIB z3L57iEhkldYFI&wCsFB6ogc5)gRbYBA7D?9VrwEH2zHWIcvcqfUCLemX2;A`O}D)3 z?G%diT(VsW={K~ZpUOByWW<)`^1KzodG&Yb5;i(DW!v)-W(3=T;manv7qkO5LAX%5 z5$DR!MtH%5g#sqH@>y#^&@i6CBg-+O^%P4^dX%&XSjk;L3K24wlt;OX)HBq&QUm-B?YO@ zml{Ngv)L^^9tbsy2I`E{yobw>@}*Mc0FQHwhqRCl88=E=WbiooLGtq#N3XnTl)eBh)e4ztQd0xT#gkA zWh#sLQVhI{4z9`u09b{5C?z7GWZ;-DX7^r*iDE8QTJQWS&3GRh6_ce{h8G(0M9n0P z@D@4`=a}?<=)U6x0imzvJIuQY4zVuY>5_y!Y8sAua(tTOj5h1)2~v2Y-H%LhxJOIl z6R{V>xLkVFKD?RnX1R0=`X2(OIR`d@453yst-k=vl+za9eVpH=zfklNF!uS8tSiei zvuY~yU_W3MGe>>nuVsu3t`K2CivA$;Q=lp8!cY449yF<}_x1aiO;&ssm7Te}sWJAu zTl-I0;b4;q@PErP*TR$lZl$Ai|b}ct?GGsVje!K8Sgs z+#Z1gU0@OToSGftrxsr7&gZ^?8Zsytm5-BQg7|a09M!os7G29;nrw->q424XnO^gX z=V2Q`_>}rwIj<&zwPsJn;t|yCQA%Ay_`N8Hp*yHcoXxax*W2WqB9gXFQ~kb2A1nj# zR8^lDQ?<%3qo!_*PVRTVCHO$xtj$hpn&gUyV2UEobD8?Od7-)!9SwinG#1uLFExY> zX24V9#7OgoS2-ylTXkesVs>|54=1Ew5pD-L)lv6$LSN+diP+(pzmVw{=8YG4GZwC- zLTv3@hH6uy-`e$=@cCm~Y;Z?1dmU{f=~>om+9$D%*aZv$3XjSPL@=#G6*es*tzexwNkLsK}voPhJ)yyz~v%`yq`mayypb=Rg)H>8)2TjkSCn`UX?L#jNX$L zyQRU08gF-BP8^#FAWDr>u(1u@v-Aa)RvxZMhh%w_-lMX7BQuv*bufaC+`o-#kL2BJ zFJ?6oeMSo%7Ff|!cfsyK7a2sNRIQw+XdilX?~srFO_!WYWja}R&n%3ZrCH|lvugRc zq!5Mj!qx9e{wFW6$tlfyW5$>;=@+CgM(Xj4YF}tAc7HvO7O&rfaUeP}ppk!4=Et2b zw#3*a3rqLM!7y}_PiXwn0Gnp$eWmq2;Cj+ksH3$K?F|Qm#;|R;ffl>w2) z+U&U}Z+-l!PG8lMS*ak>-_OLkp~K@)*5Ab*)4}u`x_1|RbaZ;9KR zpnF7GggzZ6JP)5z&54vBRL;4YKd5M1y-6`nb}>5=PIyB7Q?6{^OB&hp)l1Y3p7wK9 z=te?;#F*pYyIRMSGYkFE%4wthz6%m>EunitjQb%0Z6u+KkCodjSNUG{XsczL9Ca~k z^%YgO-xq&ckxpjl;&wiWOE-o?e-|OTh9N~Miyl@>)lWhKl#zXmaKypIQAO9?-89K3 zK^?2&aYssIdoUD4<-B{8eRo+{*$$n1-5mIpk){;g<0AJ(`*74SE!3w#2E?wyba+T> zFjvq`2#IEwhqlq|&G4Jk=ZJ{`6iY42V#+o?e2G>HMk}1jG;O9IHnA~DB%tI|9B;2y zZzR2HJ4yo+E@j=kVD_(wnINR8kI}~DCdAFXqe8iMF&vf@voq?qYIdt3t`}$KB+V5i z`W@|C)Nch&21U{#8y_E0@-XM`#F(~~tjf_?N^1Uj|8`X z>u6Br5!v^cU%m+KBro3u5PE^^c+KIrl2SHYeCt`q(U!G!IE;$rTOc<5O~n0|IdYL!K5x7?E}rQe0?lE>n+CuF&WV@VoZV%-&k`@? z)xjEDoRRIP{8?`o4gb)QDkP8xF~*e!frodv^5s&NtcBs2;*FWSmQPIENO=0M-x=kk zqd8aR;NC<35Q<*g_SUf9V zD3I5u0u4wT5JI2ws0A3$aomqRYLVy6D=f^IsMs?=wN$}_R)TdP3(eiG@sv`+gNY9LpA zbB-5o_SEZkO9?{ii8&(OM~$5oher&I$T$u^G3eXt_8Z+4l`1YeQ!-c8Z0X-$1gY>= z9|R1v*Fn5+1`kI^RlgmT!yI{9NS1dIfAJP*^{_A`5s4dezE>JyPkw8XB1TO7cegJ+m$sj*dB>-*3t%Lz8djgI8`mnECOG6tZx+mI8o)7^O1`*NNYQ z6fJ5}wa-rm9VRph2-4z4?1pKcbKm7zCm|X5ezbn) zbn;y5aQocpvH_rhTqg6L=`uzsgmG84c0DclP&BKZce>9e5MN0c<7a|oh)8M5*Gx@W zSwK9kZYIE62NI4$Vs#J{9G#v4Zxk}Z?vNJ!_Zw{l+9B*?f!?S_4ibotoz8Ho#sZQK z#yUoHC#NOhg^~`kU=%EqrhB2nPqmmDXSxXLEG>CDKL5`PFs^9=XiV!-dMT52C z&1D(lGOX)Q`lJJsXRlhnIQXJL)u~9q4y4Hd<-0@rKegIRWk{d(S>{| zz`XvTSNBy(zUox2(KN15iQq_4H6Egkjg6Qu@yVy8lT5UeLbN0KQ+`EQkI^$XfcXgS zn0Z~ZIC-WY&d1>xK^SRd_+I!YbvW;bkKzO(t0wPs@KB*FM5uT!01qa*oRG-m=ZI># zn&wZ2PSVEceGfU~PWmIBH!m}x+`D$yW<(Z{I6z>mQrMl4C~V$@0D;JMKbf*+)k5M1F#!E{@}PwCvL%WA3Tr z73`F}Yo3+(>77ohVA7;m`xn8yQS-A2vzxl6fv(^lraFtA`R?u(E`b3gO4=*<<}H_{ zg<|EiSZnqHv1XdNVD9M*mygQi*w&;7_k|LfIyi%(UUAOYv~baN%;Bm1AHvQ$Aj)pt z+khwof($90N+T&EA>G~GH8iMnNDSTGNJuwQ(k0y?B`B@ZNQZuFeD^+kpYNP^pFjO) z9G-ceXRUSL*YCRLr6_$~XO{>T5|N749(8hAk5-Obi&MIghZ7WBT-kAu~+>yneCgdLnx66N40mq3U0?f@RjK0hdrg@crjMWWq*g1 zpjJ&p_ukQoBkn!I&25ugeexY3`V#u4~oZoa(WSnnt0HgTWkJ+k*GkU31#DpGK&&f|olN2ojjD~cG=mX$pfFrG=!4 zacJt2IkNmVGZe*`=>*=f&mVW$Bv7;Hf|_zZRwm%0J(b(aW~`LyTu@G43{>=xjJi*d6Y*cP|=*g2Yd`HwF5L9(L(Z2^H>Ax;{m$08Wq(gDfTAG6ZqZH z$hVdC{AM1Xg<2t0ttR~3-Pw6Bq=ThvwvEyE>~;mU1NZCj6l**#RzrR!VD9nj9QdfO z{CKv(^m%!orgKe4whsFWl~92F7}Wb?`YQ4)x^rr`4G0~Er+=o%>Ce7?S26YFnHq%$ zsDQS?{S4|H)T?UU8V#E*SMIkpz8tJ#Aip08D-azy&@mUj6Sqt;A7M_7&ff(kS%pzV zgdeMA>L;;jt?s)%^p7z|$(7IMR9u&;gfqPyE`8)ns+3Bw3)Ski>d9c0@_v~V(ng2b ze6Zq`*U}vxNPorA{rf}!1%{rxqIgY=D6o4olyMiqYh*PYf3r$za6FRtC32Lj7aBUQ zOpm^)yOO=lY7P1L{2cE~^TqY812(F~!x82yC2`a&{rAv>%Z zud5hyw9uovHn0JLf^;>W7>tXcY|L7pK?2p$ocahcL>tx3id8&js*x(|IDr6J-vq#wRop~Od9e6EnmYsF$!*5DJO{?y)7 zadI9t^#1bfeTUv&?pu!Z#2+igg4~{L={|3C1ZMsD5%YDT2ulww^{x<2Gd4D$euf2$ zIBV?Wp6v642i5q~+6zr2S~Mj`a0bBTggww^Z=iUg#eUy-k*CVpThOnWR{W zML$x~fn%I!`jqs?GFIS}XE)VUo~z@Y#_aHTAEGMk^u`SbRw!63dH7?4BM2*H*d`p6 zq2d!V_5A?FtWTk5C}8|I zBsc<)7g<+$$k~+X)fE(!h9~6f2COzlQ7ckEloKn3a87ko1P^MRs5SrlHE8~Vld2Ad zofvXo|I+oUmS|z^#9NJrRG5KP$OL@5ZJdF-qPSzy#j1}+aAMYaZOdecj@DH4Jy~C@ z0p#TSA#n)OFx*(pfz!V8(sOediMmT!H|VAIvU z(G*J&VC@cq%#KKk5`27xC*j*og>K=rSHFgCOV_q3H;rpGn9SG_<@UD?dF~J-G)3ct z$R2IpqnF@_&niDq`2bU{g*8Xi-SdUhqi(7J4ApWYH{n!iUv!&|!Xt^xPP6G@Hp7gR z?q0FXZuR8PwN4*fcK)s7jR1fCtIr;VV|-m`hvw`A>3e7N=Jqrds+R=Aa8~X*TXLk_ zBj*{LeC~3mlAdEtSZV-;j3=djo%oaWJ!+iaUm9*?N+p2l$CX#*dj4D_O<7!(aU${79PW1Zo8pb7mY>A00Jj zd|6YiZIB!+;h5QQwWRE#&MOlJ8vrq z*yj3HwO(9Ezze>IskLo}e=+#Gw*}%0>7u^$kA-t;ex>P(P%ypw1oYv*Dy=69IUh4u z=m&v!&Kd!|;hH!PMLP|}QS;wEwb7mLqB}#eUtCc2BZ2^Mq8$PuCPhSdLCCLKxcqFtgj7QH5Q*F@={{qbtH7hscj5Zfu zhbPEgd3u1~ZR*3{e< z`xpO2vtZ=NS3dw@Om_onkd zo=H=3B88SO>A|m6DZh#^w$?*Y?=u9UGfm*Rm1flq_L5{gZDlwHgOl>J!sq|wc2qsB zC`eEiKP`Xrn6iW}Du5+ult zht81K*4Y?pKKN`}7K4w;R_dOFySZIJ-M{*ybtCF9Tc8!j4u05P5xcfU_6Y;Ww$pA* zt`X>69@t}f5cV(uutl_ek>O%(699_NO#q%1fs*O5o%IZXv6yt-eHbY^gy0H53-t|y zoG#w4x4&=bN1lE0R;j-HFz)l@(@w|lTSu%Iqu2NRHJ4Pl>i-a0h^@`i-R>WcQ}j7m z>St6{?ps!V*uqc}I1)GELBLg(2>N1YE)#TK=^EP4gGn7i_u}dx2_VN% zb%id_u#Gbh{^+&$8y-)q8bOBUutdp+=}f6Wo?>zia(IgvqPc#KmV1gw zxLmVbRU+o8Ih%f&d-g5K5#1j#uvCRg`fU}Z&qwZQdAwvJ-%EDa-FYVf`p=C1{B?bR zy;IfUdt*p-==O)NqME4mkm0x8xfS7`+^uRoR_jyW11+bu9dv_7ktQE+Cd9C@umw+) z-2o7k#O+22X0kc~cB6k*ezU0Vx!RgatG-0h9Q;``T7gUBdq}lzlyEHi87O$9JuDWJ^sCe9zDoBEW;-w)!8zMswTAt6UR03)R`3DAa&&au48hGI?oTV7sD*yY(DMAP*-IoRfybW z#i48GHk^|$S243lG6m6FbOaP~0aO|f>jUfdLRihg?e=O!*JJUeYR7Tq!zUD8N{k&w zZfO23`_0Ue6oPyU0MW-kIkg7S@mdw~PH;VI3#aw*^!^5)%2HTzNHuAhMeXo~a_lyt)4emTg1 zUs`Rd`n2YRr0C&5X-h%rcRPxT3*vbMyoZ&5085;i&BY|GzS*Kq5SfX5w4M_CdOXaA z4JJK}uSLC|y#(*5$lHf{Id3j{C?)N6=}un!N;_G+w{#a|DbFsh0hLGwwt5|YTU@$g zP!pJTzE2kJeOD<84|8Gu;@OW4+-x1LVBy+=f?0L%K1`)=AOFhnD5v8Yi-V|e`*qff z5(MoO61)v&Hhi)a!1KlXXEAXeHm)wL_2(s)DDXFVa=-LVd(^^W2-Z!=#mwvP-wh1NMSX_ zOl@58+<_U1jqH@aR~$+3$2%!Ax|LN4+6Q+t@x&+Li4WkNBo&n}Dv51JilVWo!WHy> zFMxm~wCfeqH$A(Wk0>39rG;`otzVDA4?sUw&d$-U&B9h#FwaNK&MXrI(|)$s8_L(?wplqGfGs$?nX0qRO$$tb?S>FU~h_Ydod5cqt76(zY3m- zQ0nqRM81yhQ(KlOqAsES5c47ngw*1&_(de_D;Q!*zlR^&;0IH=0N4c5iU(u91%gA? zdaOX>ox%)=%HJf4;xF&wtV4caLfUDKSmL6aT^A##2U6{kAUj0bvxUp~t7$J(@ za`pS5QPN`<5!aHwpfEO;1%)_J6lP2@JfmYL!$PP)^4`O6;$sCC++ zSW;0WPmA-s?|W>E7yshAlv;N#NYZ5Dj*p2&k6K{!`7UPyGVswdIDEj1BZPJ0MwC!w zFroDstQjhyW60Qti#3@;uNxboU)wC1slTXPdDe{(4M!4l(N!^+SuUb4*8CTyGt8Xm zp?jJAZnMV89js~+EwL=IhYGzeFHt?w{ar`oblWIL$R}d*BUL`dq@1ELMI2zXcwNx3 z)0v0_IM?ms^z_dseb6O-suojQN3kM;S{uS2TSDA#)G*5IcC>@0^n+f0_1Nkxz?7)c zw|6imL&5G|?H=(_%EErA^Fk-B+fh-6oiJXHQ-p8YCrQl4{k_IzKETyHgU*9jS&cBo zn>6I@Zdr?j1fsY0>r9dK>`MWu+X-31nnQDT;yhGiKZoyi1P#I(5e7$;o^BRc!ifyh3e;N6{OMf?nB|5EB zpwFskec|ipx5VU>1NU-0D9)baWM^?F{%p2#N1!$TAabT1E%XLo{u((mSjCzgpLoD& zpD(b|ez4H+W8P}MAwXzalxzS#>p^l3mVD1Wf85iy0XVMhj|0(#jtrTIQc%wqW*RCgxMpM84<#erEGqL{Uv8Gnfw zIs4if&A+ex(=hRLohNG#n}JO)lk5WE2)qn$Pp)HGYr8215jalb%JLSXyO2KkI73Wc znJqb4;mqmPXvWNVJ>*w8)L_dRGB@65g%_VbyHf>bI(z6IE$~50oNlbiOrWdDj{3#F z_nx6vuishF7(1b;9b>>A;!FQ*^s&e05-UqEn-A>(S$;mDAXuA z_9;=zb7A)U;$=Yu?ur@1@Gn8n)SeocSSp~_VpN?)V};6Bb21QsiBa3xq&oxX8u2eh5v+evn~lC6^vpuseV zE(h?$K46b)Q(bl|JKw#D?sTG>*lm56)NDd|dNQpPBU5^|4kLO=LTzTg!uWvQc&-m$ zO+HTRE~^H247bB5ZJ*boN({GBhXWaTA4Gi6nyyI-wH??$09wabC*O;}8 zx&gr%OzYl@h9A{vh8~61A2c(ERNL1Gu%8I5y+7JNz(!@~9NQo)ZFKS9v!WqJ{p3P> z-hRCw5&RBOC7UO#na-1W!4po@`mXm8F{w0^zBLEq`^*=}1Y1}o*bQGGXL$Z75i7h) zNIz8De+UL3E80y~q*u}<5V8?{6@}=lE@8vp*NX*A+T9J!Q8hhQ>X5tce=!L#T+*nh zTAxtbVd_PGz4&zM$Tmvv!>%}j-2t>a^LG8inoQk@0OR`n%EG`4Ev`+4A%Uwi=OOAZ z&mq^k5~AJs^x1ekq6227C50G>=pnvS{+R;**G{y}55S1vraut-X}l^!45=m3?z{gy zT~AAYXGX>Je8Uc^1<8u@@H4$(_JE-?b^DIO2Vm=>OCNp;SfNe?L0-LwzEBY|N~l&( zUKd|!@CC1ZGtJdrz&T+rHgYc$9LpqfhFaeZwXwUA$O`!a5H%>p$oDznXgAKfN<&U< ztPo)C`<)uXZm;G#q66}3`5x3$SX_DnmVqeOpDIwox4AFL=;JqC2a;^Ve4Q>oe*!x@ zeX2`1=hHdf4-St$Xg#-XQ-4soS>v7Df7=%0R~J~!hCIU@ zIgMLpg=1u{(VNbG1HW?RlYA)qMI6ma<}%-9NaTbuZCtb7axQ*DP1Fdd7Bl?`*rE^7#uVdB~KV!VvffsR%aQ zc)>R?wJMhp^$(E4E@}B@Q3ZQ*)8QIt;3=XUf+HWh`$*>3OpuX@wQd6z0LC6HY$^4u zxet%rlgDiQ&Pus@1Hp@_oPKG~H`}xPOT=$hQe`2TeC@fW);pfu9myw)ICV1IkIUJQ z!L1psO9Ltq#tMX)p26y5d>gT1pV>{Y_-?&S8pSz`&4bAIDC?Xih}L;MAze@u5tZ53 zvPK^_?+j^DLQEd?6-W@>_W`GXwa4_B3p}jZmdhji@?%U<`2sn;`OWOl&ASI|s&A>^ zO&MpluK?`S7}daLvDjG8#8U-*DnLBT3$RVBrsxm~2@P&Y;-}>?8o%I&y8c8#jf#hAQU>*K893ud`}2+wj948}PT&{{eH79VXJq z{Y^x3W<6w*Y!Cp+CL60d_vOaJ1F}~m)Q;_bX=p@B{69=;UpZ#r3~WKy?I1D}G$}93 zV5|3?>WV6vaG0o4+8kb+0^oXNAx4?|D%K{e_u-#+MQ}fP0i#?Eam|=h}%-j$aAna$Dw8 z;_9s~ODfnIZ?BgkIw1QEluNC+GjIm_$9lp$etB%_3wbml{1F3O%C zdYKhld&BHnD>~Me_k2(b(=5~Vdqffj@^qKN3hL?Ad-d2N!zn>Z+lnhOBjXW1E zCO5;o+kMEs#84er@YX;=%Fi!((zdFN@bXp(aNSHT#jqvnek)mKv={cdrRjoC0n?KTC@Azy)-3kD#Z#<9GPJPKuhM z)t<5N`ynOpp3brO)ROW9a!2rl0v7T*PG)Co&5 zu25uRJv$Ee5x45lc^WzXZ7oU7i6?jBELtg6^dN_yaSm(S(q?sidhA$dwdUE+mG~gq z*RM3{L|Xh4-zYWv*QQKjLlQ^jg5U46ci&e_I)HGq^ zmpZVBenquKh!Q&#q3sh;{ieI7z(NQe{i}rwdAlb4%sZDKFHra_v*I}@48&cRM#$FB zrL+{Z65o`Wc-zhPQ)g}vIfr)nykC3-%tyc>a_Gn(xLA?mJUaE&LWHvlv znnK#}vccMnm|G6Jk4%Wej{1PZ0m1os9pAh4no5ZI$oy0uf)6fF2~&+Wg3^J~C)Gmg zulHN}1M}wcfb#JkD5%3 z&2$ax?WP7<64N#or&TUqJC3 z(B(=CZQITQ)!0i?zHS2yWr+T)0(1ZIhG`CUSCMQp`0|_fiyv2T#eaK&6yme2G2*2_ zr2EW(;c>qQ7#{VOk7}=(h!;&fee19q0NzN5Vt4ucu=9>whuJ)?CqRMC3MPLALT_xk z_LB7+N{G!Sx1Gt{+A4xM@P1FvK?Kg`%_hHgpq|0-Yg0c(Y2GcgQ$|AYv7ODf2EH?We0+=7(s%D zv7Ey2jo26*W)-`0ke^dK9>mmx!=$Y}kNwr@DoGD?u|Pn>CC&^l}y;+P*XdK^NEjy)J4{QT2(hv%(3Ppp$R~I?t_Y3c0m3x zx{3TQ4lngWp*(6bW*$h^Y-&FwT40*|_H92s@-}ZBQN&X(HS`j066N<#6$c^LtD3Bu zcu!!cgKwr7y?BUA4su*^F+v*Co5Yxt2_(^dv0QqC>5qx;ntMIGJp|!NyvCd?u=AaK zpz{7Q6dQ|x_{jXO=NF&zsDtDy+ZBTc1YdP7dew^S=QhO!>_lg2BMXO-gn|cjR@Ylb zo?U|4P1r#ITm}*UOf_qj)Nd0ItZac)=X52jte6mrT|^)G3IaUlXyQTLt++Ru1mBbF zbL{mKBBBnEc)cS1AVJ1AdE%4tgxMY%BBuMHOWf`;l{li=aco{ZmY)`E;cZJUWHlfF z$(tZF(U>i9`;(`tN85jZ#T{)g34!y(atG{Y(b_E-2V&VNZ@zw-rrs)SEPLHMM%e2$ zCF^DJcpFiou?AVrgU*<{$SYW9evPu3>UW|u>}qA|tl$k8Lfstlc2BFc@eeDcD`-BG z2`vi3g1+fx7j~~eMTHv3X;`+?2&|211cb3Zmz+b5YfzBczdXYn-Z>1cD{NiYoF-gf zcl+!tWk*Vs(QvN_2ltp9;CiFORo7_9+(mmLkcO?u`sW()1U^xX_TE(&;G#1tb?;1K zAujq$pnU1aa}UCL2%e-!8VB)U^lR`k zt7k^4aqEaY_Cdtk$_&U{%5D-rZCTxhr<66Fq#sJ2=hW@HK0vmAZ>=usdBe!OGH3M0 z$BuT2kOmtr0sCouZ4T;0m4nIA8cY%M>XVGFrhH(acQ-JmcehMWY-IQ|yAa8FwHf*`P?~nG9%~%KU6P-qt3BI;(Ua+on=T{# zI}3aKMh8AxD?iZw0qx{T{srw|PT+fjpW}|6#4udMV!Varb8^u(TS`;ucWJ#%uPJ$t zjlB)X2`)-jYRdez?N-}_7jw#q#CPqlOCU^Tb~du3WB&CJ#np?Q-i%tNDEp;Tm?G@S zFq@_MuVU{HbVF?h;$LYOI(<4Zl*|2soV@IJB;0jc*S=cSQ(QC2L)kqCdJVsq-*;5e z;i<}VpVGk{_c>p|3)Iqfj*N1BLrHxI+9CaBHQyhlzMD22f*5KJ@~RY3IQQ}hY3imy zx{TPq)V69z71`4∓>({=v+Yq_`}Wvv4LHHGyl&eAfqUykzBPotyGwgZ#73kcX#l zFa>{_JfT@Mh$E*uV#2=CMSqDO5&``Ngl;i<960v|iC9xj%HXd{`k-aPuoNnG1}`Ib zdUoBBGvyyK#+n{W$IC*<{+3H(3-2>y)k42$XH)yA3a;sm5Yn^{3%torQgfu5C>B(f z%ceZ*oPb*#$4oq}N7sD)%&rZQC0^s`S}S79A~@VkyO91(OUVO3v^ zB1TC&HC`&M|EXI~F|@P{s`N7yB%7R55BAkoBjC(K;z!m^Yn$;!?!*Jfa*V9cqLivM z3Xe5g!KCx>SB-$+$zh}&kJ?9Fk?f{A_A4T8m)ztqIU97KO$=sh2Uis<4XQP1T#{Z% z!6ZLKQeP&iOBQu3EDaZYp~iC;_r* z8kA#GtaE?nE#xqA5^jHb;na|I^j_RZdFBhZ=a&*O6~2td>uWTB1Ugp9&-~y2d;su# zdy2aFluhQKY|?9T_j(v5529x-0e6z=H3fZ;(`t{Vj-QYj77;y@h|@9>%}}%TiZ926 z=(dlV{FQ#?o$yfZXpt0XWo<0F7yLbIkPg0}Q|F_R`5Lo3deb-$a!bP7w7KC_iVx;w zQbsrT_1gl3ql6(Q;cza^>O3bV){eE0M))Z!kE|23d6|1_F7XSP!`WnwdQ-Nx|JD_v z9QhZ-Bld>(C_X_p0#f1e2jQW4?I~0)N%aW%{%k`40jY!IAP{u89LYJsDOC+M*~ObK zrUVEo>(0{I85Fpl%>3s28zYNw8Kl}rts#(C1k-E^VX7Fprx*86ipK~>Q#gB^`Nkbe zq(hg=h}#-E*DNusxUS}}(B{4-zP7#ydN->Z<4SY?!NoB_f-;|>2eN-_RrQHu%WM$y zfDW9RN{D_rQXhuSYcAa80fnAv+oOVRvY=s&<{1rdUAAE>50?MKHPVTS+9wlOYm2Ul zALR5A2u!39Bbp@w^U<0h@5H(bxK`%;I(phqt`nwS1RAq=LdFOCsXEpI?9N^7C~ z&KVj0^6depcd8K{c@TFTJ>E>@I>sH!XeEB%#yMV*(WcL)E9PiX4-Y$&3DRZWm?~3H zc+?6}T3Yngnvyf9bKF_&DQ7|HONv8J2vu)iXQEgKLcb5G-(GQzz3T;-%->^4=8#C? zYTSwgaZ&-;4waF}SI#jB8`_F22Jf&D*q(Te{04c7NODGZUzoKi+3tG0bZEj|<*g>n zJfZu<+^%rw zV&rr)F}+<_01YaObg*nOgW`9)c$yb$1P+bxh1bVL-pNLrVN%xbsJT5)V0Qu1@tWKp z$mKbczwcG=0E)+dg~Ic=22-z~RS=UDKtR?12Lyy&F9-mV5B#XCCDk8S>fukCaC_(7 zH~nor)F=$f1S*dn#<3PLC>10Cy(@5H{r$C*7p5XfO3n!~zKlR+!AM|UL0_0HY&?>s zH6#uBViR%;@WMbS2${m5_0`7ZO#8=f8d>=gLD}zSC7?24rw}4ohou9esNMqAs9iA zl5*}yTj}r4_Sx&Ew!g!*TuP(l& zUiJ-W_XY2e%q3g(EdQHD>wjD>_FwZJ$R>BW;RNWK5IiH2DqA?u8ZD*4l0b!V|8cjP z_z-{8dk|@CRr5u(ZvVy@WUMM>3{0O$)Ll{Cs$Ia;lk6{b1@JjdVkWRk{M2k!D5`t8 zO8sI}=I8N$poi!zP$gw2P#_T1*Y>w40~kPDcdBQI4jVdVxvxdhWMP3@bi?}g_|eGV zKkQpD$V8DLu(PZJyS|pTWtv5|9pFCdFa`fRqbDo{{g&jcS2dGQOQmF<7QK;JgG1Yw zhP&1!(-dg*&Z%-!tMwvcEz$6a#VwFqM0&_SomCgPS1~Q{k*$%%u$zX|aVP&|{{bb+ zZ@{bcyaoNjLj>0%FLZmddf(_mG!-;_Gb$(0D7~SEz+7-x6XU}&BJguWp~$xPVuWiP zj;Lji09DN@z<*xTo(Z(8oeti2Qqsdg2zyMgy%4O?zcWEEr7doNDX1*NcVi&VWt~)F zs_)x$>6X~-H?-Lf9;Pal8(?oG{sd6+0oMr;v>-q-w5s67IMSljp%qX5d^bu}mth*t z$kdp{qX|^(R44*F01{`s0aIokP~Lu%T6c44{$2=1?q<7G(Zu@3=siGu-j^GV5mL9!{AdstmPo#NmjxMHN} z>ZA@K!v!bYm}(GS2wrwwVx7lMyGRT(B5nza2 z1;ZG(C{)Dda?DalimOEyg?GGV02-jQ=KjgJdCST&B*Bf^HlPmtcQ1#e6+Ye`F zkc7j3Fp=F}z>g>ug?U7UBVi`(!K8291X@zH&Wu6=Yk;Ng2}J6q;&vPZ7wZtzsq+hQ z0YL;BP1T3p_d`JGD4$O8VfjlCW(RiWZou}Ge9cKZ7Rb3f4i2{u8(Z{&QD+r2mxk4* z3cn>X9sp)&w5Os7@3iK!rK1f}J@sKi^nMURIkKANwnChE379p?B~hJg(!@?w0RS{@ zmCj!SFyx^WH;yNQ-_dX7hfIKT{jvaX=^71v{aw+xGNeNR$^)MdvVVzx1*`7}^Dhtw z2nJ9AeTSmpiJG!XhLXo^vk(qA6SH=Mv#G(w9%6y+0i@Np2(#Nu81w8E7>1-^!l$_j zL=^i@K<^O@QdS%YHElt+IianD7_k{6HbQ%r!-0}TR>bq9Ewkr3@TDEyS?cFvZagz| z<0fl#Z5AXrVIsqD>twkeoh*$&0^IlPedf z*Z=a*IDyUss2do9+t{fS84a^^9z34}z)(m2)HQXZArSWP57rY#*8v1mi~=CK>U<1j z!4IVrFuVUL)**9L%qmvO{@}$IfYdFdIi(|FC4AE_^o!#Lym_!8cdUTQNt;e@0O3a- z+62JW;%Z*R6Q&yu6WfOz)g32Oa-^1Sz(~|v;fzI`m0N`)Y_<#L%pB}^9S&*zi{>#k zZD{8#G`0Jfyi57(Fhpf130?3Df04|6WbT^(&hiwaM>H^q6QxhUWQuybNzJ(G0*44c zPI3Ry(20rVkgm@#Jw5rAgJFXwl@3!U$;A-jkFEA~NU}W|AV8I94ex0KN*okbDLXgz z1uM=63j$`sXZ>3h-qY*`%!1Ygh2EdZU!czC_4sZ4{&n!X8{yJwr9nU^pQmbpo~FNA z{X&!~5ZCUW$sC&AW006JRgW9zD)}n1vj#*9(TvaLjRSezHF@lqML&81zfiOYEa{Iw zXJ6Xqcw~7R2Nm{4J{IJvcVJ2qK|q+scck;)0`m%t^D8J$HsFp$a7M`>6X}Qk1;IZQ zlfMS^=Dmh{Gqn2;*L->;=RoAeC}w&63LhE10U^Ud;#1It%pW}dQi2$EhJ$36Uo&uo z7#N0ezk=9L=6~NnF+jBh)Bl`CIQsDkhrWt~8YP`t_cv~49Rzz;7vX1YUHlp$)6t1_ zl!b&9bJ-^lXz!w1TU6kk$_$_HoY8dJFyg(V5oKT$wJp{{_y2|~$ASZEnXkM8f0LdK zY4G_sL_Mf6V7XF~L;^-ooqhu<*b<|RyJJ+vQak9RavG2&9vJcq@ZxvQU^wfp_7H1V z6qE9^rzaOh*a=xGG;T?Q4WAj%5B*42Q?!U7wGLH%`6X z!h;8F4Y!$|q{u6_4Y-`qt6m|=3m~CS2qaGlTA4LmdvMmG|Ap7-1Z3j21iwFm*QvXo z&_D*fNIgNFPLXP9G1D>cjkW&(JKbRi7<9&e0XwFE|15oe5>UhZs+7~Fkix=Ru=N$B zSFI0PF5LtuY)#&LChaacmDbH382A;SOV1`+jdQ3~JwI44oay?4x`BRoXh`ovwi~5! z>dt;$b#CNUu@whXB-9#%Z-Ag#%h|;VFSX}{iZIIr<|%?y*Ij`eTgc76TgUJ)Q@Z+Q zoZ#050Al3}hnD6%R=2#kL4XZs^RRvxdLlYJ^{YwURlxlxkOQ!;J6og}xT>6iLy=&Na7%wlX-l&gwuVVz(FO3^Bp=1q`lm<((q8mP=JTv%$J;UA<|*q*x$L|Vi_#HBVOM)am?cE9b{@}&zG+>Zo03Wki>frW}taxI|<^dq_pByV1YN|vbu z;sSJYzu?vRzlNd*Q2m#>h73~oDzo&;j7_aOr}UPWBsf!Y0KKhMjm)@nAERYHB$+8) zilnjB`@>#w$nnd@TgXf%XPA5sfkCMPXdU{yTJwT4) zdv?1B(kgB6I~|U1-lC}osOGL zwtjo*^8GyP0NKO8pqSti`<~H_9f3fciAHP%Y@Z$NQ8rxM!V(X~c?dOj1M0DXB|=uS z_w_GXO_`_sSr(|F*1674-KkLsc8l?UmL52oDXzU4Jx|fu7*}CIBx2!J+OV$r$5kWS zkX<+oG46PktYaBgun1@v&HO*OKubG4to|0^s!4aS3)#>+07X+K#13WAi^4`|jGBq9 z1E|-DXvfkWA~cK z7hnqFhKlsJtSd_7hPiS8%Q`>42K)Lb62Z>|4I^V4MeInom;8t_vw+MK>*p143dRc2W1h+>7s2glXFk!`-3Oca zr{OOfL!idqfzUY{o?2ErkS&PdA>*rQOFQ6B4?@I0!~^)MT~xlvwbbW^YUp_R-eE6X4&f($tm7nfIUKFYNuP=hO!an>&R&Lj`H3$_YDg-L@e&`#FQkwAt*ol>?P=9g-BFecH1SPX4uJ)U@^AkNP4qrR@`kI|l&_qC&rR@7|?A$KG zsDfl&rtA@Tby&>v1a?#|T?wfRMg`si(r{b)Bcc>S8UE0)_J(A}%IpX}LEXDVF)mMg6LE%)c_wJR7Qe zv~WzncEVA#BI)N$pocX-^if8YW>r}Q1-SIWH?2dyQ z^iL>7B_y#=n2?lqnMQFwb_-5dBw?}SYK@S|cxwNq_R9&#Z_&|7;lWNPAhLBp%e4;6 zqKTWabrHmOOVZFBJckW8aR;zu(e)8FA&8bLarcj5p7Z1)!od}k4HQqOI2lWYB56 z8O4n~j|NI)I_Do%3!gqe0V=lesS@9rid{X?rZ-B9rzZk$4aIV%*JnI_sYFqk_?WUA?W~rM09HLX|+X>^;9js zQj@g6=`l`K%d&_uIA#qM^uuWN!F~ccD%b&s91D-yE^CJ0pvNJ>I3pUYWJq^p8~UY< z)v6Tg!u=HOXbkx2XCc5fRnq-lL%OekSYTM24`t30@&{ir0*1AGBwPQxgb>(d&6JU& z1wEbYuWFLvQm%st_yhC^BUcD=!(I+?uU}+EH4#z67g0Tyy?C|W@aPO6Ty+>0ez*2{ z&mPr%AL7n&HE3h6^0u%&ukoPQc8(FX{PqLWVop|R%II7NJt5Y)(h%hJ%S>GYtu2S- zcN-!^KUv`7WQMCV9e;%51Nr_IOxlz6pYxV1U;6XeEGT&-xPr}Cj5is!4x5rYNWL!p zm3-~op7BBDCDf8ss(Id7!Xt0qtUzQM#P0rwXJMka7J z;kL8&cpP=KOELggsdDya=noac+|QV8|A5fSkkoBWB#J`E8=hSml~LoSrC-qU(?E)J zmp$ji&&H;Zkn2Qe#wG}-0j$EA&C&a5kTY=g-&vJ!({+Nn_n-WORcYyG%v8jChQdN) z3?VahHUUQ2gF(`>0~w_kgjF-_)DtDnHnU5Uxp!g6&Z>+N%7~qvfOUL=&2#mjUmt;N zhsdOC4BmsGX-DR(AicqcJ7sKiC$Q^AyFQ+|BU<5cjGE%}#}poN>6}c=c)sX55FUfI3?wO&Buka3n2Ed8hBU2*3Pz zOheZv*&-k$ViiK8a5uK@2xiR%{bvqMl06|v7h8nG1Z(4G)oHg$?3aHrD!ISE+oFQV zTMcYU5ygueL+9j&#PNa5UuGUC(GYLFnoDU4TZn`RZ0?mOg2?)ZT_zPAqjnyjHoXur zepgY1%F=Wrj zl5Veqwucjy!n7z|*o(5D!s_90efQ}oaA{^fdahBSlOmrk zlBnz9btU!3=>3#>QhesQ2cey*lWKrPfmjDc#~>lrBG07_=^`S0SA9M0(Cx$>pn+qb zRbf7gD~asUIKL9ao{jw+Ppl!XrvAN#sMuyNo>F8g6(%j)ic6Mua38)@>dzLihgok~ zu^}EaqC{3a>ZQX+@#kCwC!4jOlHa1JU$(~)F;PCXQ8%`Tv0^j9z1;xIqDpd72sR!H zq-lZE6U=GvrG4{#i74^?Q-b_=2+54$dsZN(J~uOi>}jD23B20PG%+Vb&Z#qfWqE$W ziURZG*bi_YPivk0jWBW77|~^(r-N}m@o$toPY2!G`20JS`$|AzDPKT`!E}kJXctim zbS9(DFeFNl)XTcW`^nmfJ_8wDiLAnb$aIQoR2Y8@URtUakTo;7(L_q@b7sXgN-yeO zm0M42V9`H9*fc*P1nxU3L)u62 zBBI<+p=;VEI6QD*k*lr#L$gc(%$oE<$8;F;PU$~XG0R_kQ0JMtTp^00)kW0v=~V3E z`H>p0Rq|cooq=Jq0P!h#6sRAqOaDdD8+tjRYP4%2QfH<2TO>&a1&RcMQwkifL51uMo?ieS!(yxzAgBBZU(4)?p<@YBsYUm#?;zGUiYX%#{ z;A;Z!X%uAOXvIKWO@RWb!a@4!(^Ku8#^Jrh{@{*Kx7r?1?B?(J^4_s&)f zyrR-2kfbSG~P5}tLoX#@j0dh!D6X?s84 zfRaU+(3TNPlOaMW6Q{Z~JSA|`+z8%4cY$Pi?xp=H`oAi>%xmkY)>XO3>a9~w6}(9n z>#Dx)y+yokA!x|Vk=E*gul=UVS@G^+)9`7Qu+S7F2M4S<>&Qqcf8ltD6qUC9KPCD0*Os_}$GlD(4~H8aKJw?baq$ZRqyE=QI+ zLgMU5@p12ptEKTDhf%4-vdF|U#N+F8(v*dUQb8mmb8_QB+n|=CIrx1a$^Ab$OJS)CWTM(k_ib?Io)o zuPa<&RG;g6py{864UC(tSUxFF<95^8Jl41PJhpJE?_5Oj}f{{|fdt^kCz zrmq5wJJYFCK%z|-o%d?_QCh~if~*F%IETS|6A-VVb{W2E|8_Qjz|@>WvHHx+LZ@Fi z0)4U_sPW3}9%WUXw=+Ao?Ysod&qhdj^N&(mjCVd0^wq|GaaxMU7G*H!@&dTVcncV- zr&^pmm|ZF%Pl7uV~J)v(5YSi4m{dbIm6nDFtT!*cIxb64Hdk6T;?m z_s$q}q^C-XV3}NW_(_{ZydRj1D!cd22>sKJlk)Y)IPIBJ;7m=*X;4WG^~@;-f#x+8 zB4!P{_r}UYF&CcSWgdEzx@$zQRkfU@t8Xmsl$Yi0DfUBJK~Y#ngD?9;-KFL5C`SkRQZ#Tj8b&~Cq-Mhp{G|9^bFWk8kPw!ba7ghh9ENViCLcQ+`lfPjEV z3({TEDGd_RAuTPTbSwmeYU)<+efOSy}HzC+t9l zy?kx1MzU}is|qcWYCCcKS5jt4XI`g`rv>>bQ7l{97ef_(QI@R+K9g)}=kqzWxE8=u z#RJ#-#z{_)TRWcV`ZHBv!m&+j(XQ`HADXn4U_?6wnl`z;EC29aIy{B)mlw%DgMSw| zV2~3isnhqHyKgTFvnrJK6Ry8n5&`gw?I<3SRSItH&|K9i#Yk2`f;LnQP+{=lt#KvTF zX85&dynvwcOFNp2{pL%IEbQ@{t+V7P@*U2&(*&b!s*^($Y zz^?g$H}+bg8%~PEU;&GHjmLxhKR-x;S|}AZr3sJgT3<}nHavV>um2}IxeRUzx%;H; ziw{{9=D1~OA&h2`$Z0Iu+M)MdK5|CK`yH+i_~{#rRt)J985)#$MSHtA@%`&V|NS>) zD!#tUT%$a&Dwgf_0YHjrOiwO_!-}0S$_R5iaVTjt(-Ujtdni5}a50XuMh9#5kR7n9 zc@MS|mbTN!PYw#5ZxT%F1IA1lO1&C@ z1~XG6S8Y1~^p~~}!!co`i-A1=p#m=n2jsfEtf*}d)_RGmulf(+@wRM_-%oyuT@A_j z-^b>kzb0@NuY36`Nb^YbuPXJ3l9r+cm0nu;8%UAzHyI`@bA_=(BtANCzk4{)U7#-L8rf?Y=RGj1F9;aQg%BwtZ z7fzNa4tyw?ptkK=TI3gg7`Mu5zKQyFIDMx?hehB3v-qvSVEf}xKV4(pBe2%KoXQy$ zyhZ+WBKiNjWd7%{(7^_|?=eFhnQljJ>2^Pkv1)#_EuahVPw^@( zVaD=Sw~gwd71HU{T`yA{<}9XRyGxy-Og9`|lk7Akl_~AEDoFPKzc&Bzi(=$xsYGe8 zR8{Tn{z#~S*N!R$RkJ;IEzJRb>-{hVMvVci>_l5~xo z!^MN!t1;Ysw|)#x9SwgFKc+nFg+|?FLe7q zkQEoH--IRn)aoIzUvQyPo^!J{J0=O0>?4{g!|W$oxAtHA7`tsHw0(|ez7hDv=Kr~$ z{&{ea7)AQgp^Zqs|6p~vy4^K;RXZZMD@uuE1n=!+8OH(Z^85dM zmw=nTTM6cNB)#sCk6(kUwOP%pxtYkrR6E`r@0hIOBM8rls(%$!oLP0yn{Pz=B@X@{U&J0h&HuOFCxvAw zv1_ZRy2i>Rl;kMRuP-wG*O01E*HGazuZl^$|KrHAC|ZaKghU0Apjm9Z(F@9~t6@ox zY(Gm8^vAQ@&`o_hs(PRjZfYR0I;_iO$@KR@A*Dh-e)m_7=i^ggd$fI4S*y?l_7Cw~ zvZVt5{s(WNBVY45|48$3zqhf?F?CSgdEubjL$=fm<7&Aok7$9&l*TQVA#K{8GE(a6 zTFSFO4^oD=!!@E!@OF^FvH)xuIpRRFw*bhLB+lAC`~{Bs(a9PesHdv(8z+|$p7R{YI20roS zlu+FRxO;_ojDYz?3qWCbF1v0oyBq;}u{$*OhWQoZ>|lLycpvlvm9K3}5#K>fXO0tC zT)pE0E4Se|;C*e{*_7L(%iFgr{7X4Y+D;J28Jd)vf@+2NFb&d-Lq%0; zRp-Ty@{d)($UHj*D2cb+WAMwI%P%RGe}y@M7|IFeZ6!p{z6I9*zr5%tG91NKFq{~aI0apSDF0dhSM=cOl3Hi*WP zCi8$0dCSYR?i<*>0-ktM&vC&SnjS-ZklbQwz0UdV6~a=$;z!3ZuuV=Ksu;iv)wSiM zQ@DaDQjAj#Yt>TN)6NvO*)crj=5X_pOab{-2dN+!hNUiIXn%ol)K5?&Xm{+n|BKjg zqH<{br0`MlL@3GQ^d*p=rEEtqm2!|8O1Ep7a13_v(*^6P$_79yaWac3oxBBN4LnYP zmPaM+lkL45NDFlFYLneu`DH7(jUF8Gy~GZ5ZVaFJ9!)#|Mu;*e1jcI!m6K%W_FCp1 z`#N!w1u2B2Ejre#PhOPCCwrBl1_!`$3&6RTf82IAsvATg8C3|snvzDPN`|@|jWt{O zk7OWg3uaS*h>Qg|w4{;_qR!`4gefGVmo;?-ct1-orLf>Q^{gu!#EzL!G6??TGn}o17Q#H;`VTzDO3m+L_yl z3s@IG8NKav9`>9fM1Gp>s-RBVN4x{YSH1c&|0dQYlZWY;9Oq%myNnVH*m&O)?Wh$^ z;3wJzp>fLOGj>?gA)Ek)FV0-BQ+gmED;mL@@Ch8vpA6$a#ojGGd=W726mlted$9ZP z$;%9B7&4K+?OpW#7woqF#jGVy-MYYJCJvY%I9}6TX7e?AavnNdX?SZzZtjVL#P3L& z;CC_g_A`XKnMm)0zj!J&75e3@yAZX+iH);OI{yT{y;b{4ch9=W7BC&9W&5zF~mZoJgEh+fh-RVNpWRSuTK%JdVrnE_Rf3m0G%{Q$G-ZYX%(PhksPQ+}_cJ)N+Ewii^G_N; zm?4wtqrI;<D^1^fSuu6O#A> zzkD!X$p;2T!R=;P<@QIn%#DMfZGS)Kis>n9)nD;uNlShXvecJ+ zJVR4v*2xa?467#v19{Of@JA~O=zIs>e>EopUYha-?0stF^=M>bH1qPPAg$@YZdZbpeEF@zb+Qd~;NOGrI^6i@Gzsp7)w>eP=p+@9q2&Ut|2874Zgg zNYkB+))d0yyy9rCK;J}~Z$Ci$UJ0&@mswvm3+71d3N-e-!Y5A46z%wa*7RaWZh|FH zl~X{R{pm`;ZINqq6JF3=xTp;fTD>~7F3R;d3Z}WDee{FIBqGEzYXImvEG#m6xzD5< zs4lSa7T|9pZMaB6uyOg-1VQ5nAzKk#!MUT8b>z_!jI{Y_dX|; zJBQZNyr3C~N*um8D8G5_K3WVZ@xsC{Yb>I2#^@V$;;l@~J!t}6mFDE}$Cb8pn2hMu z{QA8BH+Rl=U{UZ3ki|vHs$@8p=!TD?ob~-M(PM+fJuqq{*b%dp}O^UH^_l@8E) z=%GQ(Zj(%9MD#L{<1uqPy&*Ex;AE#>B=;7k!;+zo`>26J^D6*k{C)BVPpJ~2hO~wp z&Uc+wn;8?X#*3SaR*qk-2=%Z|{BgGZCJJK7xT8*)m zpP~hiGp(cY)+n|_T+@Tb?`wmmkiqPl1-8aTD6|Q&kA{%KN6ELv*zPp#czQ$;SSyp4 z^f{-l!W&Y zxRG)s%9=VLedl~1N@${UH5=MsG0sxdnE`%82sgGm7qK+`Cz6pqeV zKCL78-JPT%Olw3EEZB7y*XIu~xLWc~KPP;iHin5TQR}riZ&qB1uJUz<;PGJlS0+;#>^TV1 zW{vw@Le*wIHf^b}I^l^8sF%COW84Jz#JsD*mLQ}F(hpCY^RjL%s)+*ht5bU<{9A1cppq!6E_I?n&$JyJNN_udAp*eZLKD36DW)q&VrBMCd8?ytq|(k-iDED;kg^zlBb9 z(#8n9m`uy{uJ;V_$lNL`j2o^Q#9xG)5bstzvJ549(?NW&W@Ua?-^U5f`bUPwxn;^V zI9vt*n5lOjo7} zk4@7NBOA2cqUshE;lGw-BPbr)ED$$=-5DbV_R{?4VNde0o`86}9$@r9ff5n(JkKXp`} z$|74u<(sBlqIJI|;Xk)V{ol97A(CwcP6!XGWeHq*6Er&CaCtYcwbAE3d2}-JOLe_W z+hiv66CHtqLGU^bUCQzS*iM#ed&E7*M>_p$go%J{o}+G>fwMgif@Amq9OIxEEM>pv zLGr)g7~q6K%gJH9Aacqw=p@Y_j9`)bQ1VeqYTagF3^I3`mbab!7Xxjcj8O$cCw z8CA%`!L_r^omP1XLa!Y&HMSLR`Inm zJ-*EXg_Nu`{$?Gb);1KaC~oPy9wv+%7lAR~YUeSjl39iY^DPE24S*_ zH+1b;LatWB7?!_5#&qsg|MOCG-L8>g>I!e$zCW&*j0*~+IMSyh?4}=zI#gnfZK7ZG z+3gpUt1GiN3GtFTd26hQ!{!fBCjyl6vXL{-=A0q>4s$2-pAe^ve)usFa@FS~lXrgX z`S!L;DMnuJfSYW)P`mTC-Of)h{-E8AmBT7rL@vQ-`^ArPT zM_8rRk0Qb|ZQkeu71``z?F`;HFD1)g5zko$ZZGczil5FiIDIORz_fV@^Lub&9rP)2 zq%wl5`Kuc%8vEM3gng=!K9QwRi_$#$8)6h!1YhsA(mY>~kR_(&1mK&KnI`R?+7&!b z-YJFM%n`vvCa8zmW~=;0h8-R{24e}7?Mt5({Kfh4nhx1g%rBVhN z@UP7KOe>8PahN-8GGcS)m(!fGw>y}ISzKB|mR+X`w(ECHy0*eMfc;bYI62F<4o3J; zO2D;wXH1MA6-se40lt{(d+r|}kURBp=?xkSNWE6hzIettB%m~@2VJ%tIXj9U^;htF zuEp(lCerP@aY@I--YuDO`hv?=rcE(KfQ?zVjv&H2qv3Mie;FIBSUe6rGr!{wz3okS zgPU~EA4u`L@#`a9ZCBCR50&i-&xCmT-)j-Ez6BMzfIk(vn_5}mw!|(|j-ODFqCkxe zGl&e7`Z_y4tV~AAOOQhKLCvg^0GVD%z*OmrGJ7H_5;F5|3q5j~7cAIzI(fG*u>vO= z!hKS93_4GS9SIeEuk7BV;{auh0_wsVoHxNPtVS>x1u4WL$rDdqticO0@omC^V-b-H zJiWs-NqA&$H9{{}%J8Jxv@@mc_=2iCL;FY9m96@P9u5A*eK#?xesSfGDrVAC+IB_Z zgy{Me^ZQ55xZ_4)SBn!Udj=g=oj191{JU=xakr+$-eslpK91@8NsUcp!HX`8jIlxa z*pl1~=3qJy6|5Y>uARUTFtIsH#6;RM8((9!Y*Nb@m$H{Gama?#(-+K69h#CNoes)N z-;B)2#BEiHOA$PY_G!315k~hNQr#;Bu1Mp#3R~2zCj2BhDmLnTgUW4;z160LUDb;A zcw3u*+!Nu16YDTj9B^4Q z+m_^CO90#Uca>1P)u5?VIa~7A90UD#N&237UdX(bhwXaF#29;k0iVux=>W}(!`c>| zM+$jUM@7#1hT+6{_xe5!bI9z0WvMbtA6h*}<}=<7)0ZxtOY(V4z!M|S)$f3ef2}PP zEO}T~ZW60wM);LyVzv+Fj~O2o|6|BBRnl^ewM5qplUY-DAJ3vKr2%=q-XGMe7=3k4 z9D0i6WYXzrD!!3=61yTa7-1rm(MC^Pd_e48MI*1*5Ia7;v&QqlbPHbLCt94MXpo5_ zELAqqE<7`YmsT;sW``1a$pNSEQ}YYYITH=Fo%?6f6zT+*1}MMreI^Z}kv~@)5!^Ao zzx!Oe9d(c;!SF2&Mr`Iim7I8`+e4S$U2-Ph2kr&lvm5$Pt1+d23e*NsrP71$)gk?X= zs?{t&b9cc-7E-04R)Q9e`wbesA{*SSb5nRU2e8=8gsmZlcs0K(*#uZVQXt(k6vJ#0 z6wACsCoz1fM9Cyp-M{{^Td!52D5NDNTZVJ6o1 z3`S8S=)e%5`VPhY`DNhJ;Fy1p`kl0&4!-6tyljbW`|Wb*vfmN~DsF}~y^YwxQzGmod8#M}}!FcMpWRV^G(Ur>UV-Qj4%_;l2?_{;+z{w5@ zw7+ZBPJ41VErVwr2Fb&n2zn*hneH;Fr6+VsgSp2iPI1#}x!Cdzi$wR|+1h93vCxG}v4T0g^YKzY;S=-!KvEGVIu^$IFxa{6ti z7=9L-c%RhJPi$J`aSNt^iX9=3vgN*XVV%(@P&re@Uxdm>2eSQw{&&eY@z#akh5dE_ zuNw8!m~jJ;`;?idO%PAy@LWD>>*eQo4;&LZd;^jfPqMgPku755r5=r*5uvRHHi$&K zc2b+dj9}7tMt@`V9_oj){fev4qhDK$_TX{~ey(?=LC0vr!)=*2IZq`9%LV%kOH}JV zQ45fo_oi}d@U8&8rsUbR_;z^dH<4rXK##waJIc*#vkT)y*f_V?vp+ODD)QZ0G513Y zO=8x@4z+d<)VqOZ#Qg6KbYYzGma3e5zm+V4*`?KD1~q+{W=L^oAD<2dcc2^8kM~*V z;B-yOOPpH^*ZTDfs&GrRb;#zi#Jn?fqs+6&+XYEWDvR)Fd!F9)(^^eM)TsU|7VqUjuUwmFWbt?xh+#!6UTbGEw#&JG2Gd z3f#Ty#j4YB=gzOj^-Uhrk4@x*m1*=+sV!NOd?NfQR`tkOtIQ4@+Ybib#SRjD0wGS$ zE^DNv8PbR+1@3Ar4T5GTx|EqO?6N9>W8A(+MQA@fL>eB*q@Kw?I-5F)aEf`IoBy?r z5MCZH{5>7<+XILr$vzI8(l*SE`*1k$ae;jinFr<;4XCXFv0D#C=K}!BT+mX@ORW$` zI+fTF^cTJ+OLIjqGuyrgS3{(D=h4Zs=Vq0^)E{;QiTFd5+-07o#m779`H;AI!7awR zD!pXK+c0#n!RN*;@|hr&{o;8pKZtnBijk6}gNm&SzIK1xV%kS0dG{AUS43xGbn%ZZ zreRGODKnHYWB%JW{DF{yx+_9v!_5)>mZfES9$g8#p-qn7=0NRG7E-o~Hz5!DThd8l z;h*SqULIP}O z0zk)x&)}1Lfd*N8&p5u!%UqA(@+K8Efk#J-S!235c1Q`R=V)=(gAhvPwJDOCV2|U5tU6qHKLc+nxA2&2&j8P;~PC zqQL@!!=ag4<$1F(jJz>i;X_R6le3cYhr2`wI@64DWMx2S?PJ@MucjGow8K?Tp z>SD{#zgF#>=&2_UXfsU3xF~uK<~11mGf5q|^K5E=*`&(nG$kmbR#}SvVc=``aOLOH zs@7*9jrkWj(#+eT zDt4)nW=g4;HG=eM*MIJ#k5eMMhTjb*R?8ORwSFejL&vXQTM-$C+xHVK$9=)<=l2~F zn8L1d!mr};ApF+k17!;~AyrEJp+aRAzn45U1z5sMPiOLIc_h$HqL8x+ zEaVURf31zNePO9&EqO;`I2!+~*l>A`b(ZxzJ9XRtAVmK{9sp+`bp*F~`~+AXRJ*?G zX1qwiC1sYzNJ*`cmq}fVm9vcYN_ijCoUW;c(>2T$HrQ@u#-Y0n_!+NFilf?<{{$%g z`E(0-pldcCQxtMGiyrI6eJ{F4$I&>3{+qF;nDH`{SNd|TMsa7!3bv7;MiKkJzR$n^ zpa7mY{(5Fyiew^u)P|C^8X0_yJVCY*IGU*)Q3*^>Rjyz~U5VS#$faV^!y=)7krj!B z2ZbGGnuO;cmKM1UQDho4+as6ei(vKZG9NPUg0{0};L%uDw^q^Ze>vXZATdI_2}Vch zA%?Lm#?1`$>4a~#N_=H-jp}_U3Ai;HOIR5gl!Th8EO)7;jy%9rH4g_M;OBz z2&oTUsbaO*yzIuq5izAYoVQm#Cf7vIK-2I)pI-$jc+8-W5I^HDGW?QnX&}{D1@}?^ zeL!gD;^S*7>!IrypPbQa>Km(6pMlmq_ok#}Pe^$Il`{;m*AO#9<{bVg+=mw3#g{Z< zaJpWlRs0xpCy-@m&yto}_Y2$h>G*>()B@V(@IWs$@rokA%;Zr=1Su?1JCMuZ-`ReT zSEDo}lYS*HI6yQN)tv6Rk#tAVGBNG=ia!2*^yYy3@W}tosNBo92Ord~)Iy(Ta~Q2? zi5_oEP(rhcj~S^rr^4PmL~*2L(i63vqT$LbSzP~L?jmq-ctWK1l6ldz8TIH)g3{o` zf0ovT4ZIrm5lQ9;Zba{o-69RMy&?(vz0QWcz$C_hJE9LnkRM{vy_QPQA$(7W^u|KL znfGxI>Dr0zvi##vcJAW)W|nRhJH;+$*TciE>Hqn;0r5Au$PX!G%v4?nTikn-#nsaY zHl>9RfiZ6v*GqG`F}aN-SA|F1elDr9Ya9IYk5nzeu{bn zQn5pVYW}rY*R%CTFDsGaGzxEkd~+Qx0FvV0FaCe~A;EV22^bDzth8s;(-zBHRXn3d ziZNUJrTDnNpQkw9do{c--rU}$@NJ6{AkQQY=l}ah0m$jh(EQcI^C+NAw1DXm{?R7N zFQ=ETxmf$0_fr5}qF02j(Ws{PKtsQm`QNvR)GeJ<*DKIGvHc?c+pGy_b0|i*C2?lJX3i~XFxH#r(aQmZ5<@WjDa3|KvCsi zfFf`Nvj0l^8@-fLmrZ8DunM(MqH4hY{{Fvs8XbAB?@&BY*snJp5B#srQYWL&0rQgB zJnfVWwR2Y-4>vw7fSz=E$DY&kk?#NopY)@g_l9+6dmMufRI$LF8}PU@RmAWCIXv}U z*L%<3Y09u1q&Bw~Ti$^Ge_Qvz;2OuF6-?Pe54Nh$PY%gpxutLYMHT_#psUZUo|#PL z+J_gcwU;}SD1dVC^#|THqfDdm)11aViMq!(aX)BaUb7$q%f$mA^G~Y1$DU1kz67GV zpKsAynZ#xD&N?iv76Db0QWzjj8vwTRwvYnSL6vtI;;4MWrAohx?!{I7&D1Q@{d@!N zk$G=q#+H2P9TD#yNw2_eB;#C`G+>oG{Zhw#Zf<)O`O0 zsXxe{k5+6ME}!)D7haHfLyceXRdRQs+0_Bsm+y*GhoMl@-ADUAhM9ekR>huYQOybR z?IdGcPJ%xSV7J}CzX*CKv`f8!g&WD%47-W`lizWCqffGzN{NbTE8^&eq-3&f`wdsSBwT4gMd9;Cz z(!-3b1r(}3WkWPiT!ZSCwWtNqMKktRrMa9RtP^Zs#d_COY2(%>#nAox7XVmPquGzO zL80ZzPW@Kf@=@;ZqR4Bhl|1$pd}P}dj0<#PQ)pZ5xYifB08A&^f?uG7C2KVg0P@(t zH`Kw)A_faNCv!ihlrYPNt{*aV4iWr#67U;OinIX;d2E2;%k32E&lm+Ny#+0Y zU5HCkGLh`1SLKQWWznLPLKcJWMyvNVPrmvP_!b9%(bbKr4{$EyhtQ}pAtfBAtu;#lYJj(vTDHv)$M1Np$$nG*rH3x2H3k;#H{ zg2+##6}7mgGa4e>MUEhlOkL(ZWG7je>pX45e7Of6GQw@ZDrBlN6}LHW9(O05{>tLM zAISXr0s^plk10W{*oqBg@TmuA$i6+$#|4?Qhm_n4l0Zu$<=F>Z$^saAUp}VH@PpE# z>}SNk`_AYa+ExzNsaIwlLoy>Yy^-NNI8!l^-GGbhKFau~3*c(K$SJyeiQk&eWq|Cd z4OIJRwQm~rU#D16x+XA*JJyFcou3Z4PQ-+c$M`neS=GBB*NVRlMfZnbgf|K&5GAwm zS6i6CGsXdNC`^hx%?Ugqd58Zzg+inR!Ba?Y2?U&#snghlMihTScWe+*=!=mP-@7A90vcrqgrxNUyBY57h!!%|``cVB+c>wHw3R>kj$XRpO4QHd(Zb?Q&{B zrrU2rZDnO1kBe;4jC%5v-DUYwOo>BTaQ<0*jUJ{Klp!ksAs@*4v`8R`4T2JfT8jkYlw4@SBc;Ob@){{6N@* zL3Gd{cs~fQ8#3zc;GuMYNENjvXK3;-bG&9qH1k==yVkCWjrE2p66ASpSy>p;Yx@Bi zT7T|y;?B#ZeT_RcMzsJtvweHk$6N=#oj7O9n9Aq3+0{}?#c);DQJEuM`wMg2bHf}SF@dPF`IJPvh7)7ogG%>_aek*wB z3J5<&0|PD_9{xk6p``S~0DKgpBfXZ%foH{>CE6`ZwbB+h=YP@gel$=g%~ql&B#ju2RcIR05Hv2s6M;D$|A}wCk@~pf?jIBH?N0lUGm@v1y)~WV~K;YZ&;v(DYyZd~nhi6X; zb5OXOrqr%u@9aM`Mcba*j-KEvq*0tQ?n}?~=t+D-rcY4S@w1J6J7sqDksK|_vf8KLSabenhS!H-32Oz>U1yg-LC1f8B`DGVLry&5EyT}FaKDXU zlysh`=K1PJo3yt1SL@lxL#41#*RlOWbMsVx?{$F_Jme2K)Eud$<`Vf_Rb+b`Is1N3 z*FHV;(OJBjE|t(%9XxVMIdl?Wy#}qkWj&pp1PpTBOuse#%rK9)&jZxN&RrLZ#k>av zL*Kruhi?#MnD`Ebl5^VTE*g{-gK_9CgbsrK2$(s{M%V>i9>q|*E|rOIpw#R7pXd3> zAMGi{#r-|+UQ68_d$2?aJ+utzL=za8AnF1PXuI`2gUlC z+(!?t4gA@xV|wemqw2}s{&Ly&$QYM5f09+GR8RV zi)G@NS6H843-V;1Yjm9Au0(%5{gw>IIA6^BYR|+Ey4Om6o7g~or}l|tNO42=N@63r zvXPnW=vgs!Bj&C>IY_w5&rmx&-)*@v{@(F~XL$6XiQpAI2-jVUnw#iEK@e@ks*@SK zJ*S%pkJ?UtYu2Ci*#CpASR&M>#|Q+m0`Cpz5DH|L@WiQNui2H0*=CV)y zLfGiF#m-Q5*hJdS3!1UBm)EOkmjsXMH_{X3mw_~w__+Ip4gNeT!=D~N%OC`{am>>S zGuI#YP2`f(a9sEtX=^u~!l^ul<}Xk!7m+Vj`TVnyDXu8?t{)EhTNIo()t;Cv8caOy z%hjqs1{|Z{<#@P6vcd6|;RIZ|*A7hJD;d{JhNH}ZdE!~e)bBrdkte_98#Q4KQ=1D` zNZ)pgV&$t3;0%*&Pqt-aQX*S{PD=?t+h%+P zy70E{Auna`m)*?IUjcOeYst=#9@<2`qh-zwb{sJ8d?Q7{jxbwU}jrMlkOqPsOQG&IbPUKWI@Zq^q@I6dd` z)Q?C;Vz>2h+CW)0EJGfXII6GcAc7%I>0@?T(2gZDt2yhR-9^!7CB=b07ISZPkJnL~ z9IaagtPuh+Ca=h9sZeB;EO)Sx;dn664!#upn>y%eo*hV2oAc1LDk;g0Y@ z91iJXp<7kMG__59VAAHmo9SDkM>A;Fb#E`f{xy03(q~V`2iEQ z!c_b%$O=?{;c;lQ8sSvpDE$gV6i$-90l`$3{&%FciDH|m_0L88j^WcV8BF5)UK8K6 zxcY(lKa^F>;Qoq;R@>;z*QvaE;soFH5FT3FwJ$Kn@A-k;S0RH!p^?l3DKj+wy0^65 zyk&JCg0)ZrH4}vl$X3yVh(iQ(=uzb4t!jc?UK44Z&c6k;O0Q8un5-RsZR=+k`X2q{ zI2nDVRS;%>Q#w%lTEksh=Rw$MqCru-#KqPz<`89;dbZ9S+v#~zzsXXZ$W9sKyw+P8 z*fq&zrJ<2!D2HKU)xqhcY?~EP$!_>7ScGrdDS6U`*9B>hhQ>z%IxThvM|PKL_NoS* zD1{MtepZARe?W~qQtb};df_fsNdtmz{pkc3?`0SN;vQw7o!aS&VjfUU2PNq`^wv?Bz#~F$aL!EE)Rv@ zP(&ww3+1`86>|;6v!@LU$?&Ru(_cOdmcChaay%g++TLZ@!=}!)qsc;7(Fe6Rf&I(8 z3P9#lnir8?L?}58vHxL&ukNi4bmyZea}!oBRpQTvD;m_T}euenfK zOVv@+5KpQXBls7oQ#_)+J>0JgtclX%QomewP3x#)jU@Gx=in~xUP^M@HIG3Bcst+Q!iwWyC`RcSbSQo6pD`>;|P zN={~7pdvB7Q%bOXeg_L#Iayy7Vd*R+@`&$_?M!`AUy%n;>H?06)=X-s``bB`#Zu)@ z-|z;7z_t+UqemK;h3Y7ARu^)5;}J6(jADSO)@TO{6Cl6~MClQ^K-aJ_vQ5#~p|i`f zxGXZJyBk?>iY_CLpdzDJ)25dRx>Qb3^zYXbX2^;&$q zpx#4Q-P_FclAGw#=zf>;`!;K>qs#a^7wuiyj(zq|9hKS9Q<4qEn+bj!a^FhF%C-oA zaap}rAz2^tuQs@SQ{~E5Y;EIb+WD+T@|8^D0of4%=P9vjV=}!{)S<{(%YFjJy}GQ4 z-aF`$ciHdG)*|O<&*55((8%4=1-hZzt(pH#oo(%JuY)HT(BGf>m3rM)wTJgNw9fY2 zvBoC8FBcj*5^=t;VhkG?B=QvJfUzf-5q)|nh92*KfLo%>XNgG~ zrHG}=R51z-@Td&%_N&#TR3G-lf>BqlgYp-~k0JzP#gTzIL=llHqfR=sGqi8?0LBY9 zr^UP`ZszRMf5xpF zaS6ul4{~ITp*=?U`$E-~`Qh?+7t`sA$A-%skm$*mm&N`HW9`%%eEs?>}``$^~ z9u(?=M-Gc>>4kQ)hW{6lm(`yK;qK^xwVC^bKg>_5iM`!pJ>ne=_&)dE5ySYwF|Lw6 za(d)Zmg3Wp)HTH)E;YILW;~P;zbP5bh#yZY+sj%zr2pf60xpB3WQBI^=rU4f>aSKA zw(aEs#%7}lAp|LTSM$po7v|4#ExLyUNYH+n2n*jw$IH;-2Ay06HmXIt-3kSsILkW~`IQ%S zvD!&DypcqVut_S1pi9y`(&eHSWh6aPR=(1X|AU(dx&Yo9ms=3aH z`9csO+{G3}Ze!=3fr5&ZE4ui2!FkNV$=IB9PXzmO&WDm5nLC$a1s)9#Q<}2gUtT&F z{Aj;K3#`rFZ-~D8qFISjub@{2cZ-&B?HBsgH+mjwPE_hAkW1~u4426^WP}e4u90Q1@j=VDWjBcTa`LDi}OJe8awM z^@#8}FjUkmDeQzg)>`>mO!u+QqWDV8i@v+q14Y$F#s-?R0wi^&UzG`|WTXnJRppOr z+{!=V->U>Q$ws#wZ>WxhXcEx)*dtU6o{K!nReWJF2C%`b=mI*Mo}EV2rp$LV`D288 z-|}vTL9`ZfPZ?1v^5w?cxPVe8s1a!Wm>q2*J)8KRyWwBVm`y6z9syOeU5i)n1s*o6 z2^1#HTr-oR29ni8Tv5KdD$_khQ$FkVYwP$s81!I0Z)6(~!3pM;d|34Lm{I7{XDm_Y z>7!SqF2v&2Q_FAXSMr~YW5J+8fDMDT!09;4bUxP-%X5D3)-D<>9}Q6@c#KEq1LEJq zAK7tVczzc(KkFfxYzh335h`tcASYt}y@A2ECLpKh zD~QxO`WB3%LHbTop7R{owKY&$e0dL&!q6AzpfSQ-kYl5X@&ONf>Pz!K&KJp5c0~AC zqff|$`@$!y70GLo70U)$A@4*_>7F9s)@1)-7~A6WTc~4@j`V3X|30b0<~ELZ7o?De z>Z~J>TbUg8kEpG;&xuh6CZ=ui)`78>Bx?yt@wI2ha}r5nmHF0>o>+T^2za0Gt3Eu% zGSc(gz#>F0xNUITh000Tmya3lvL=Nbf{@9z0T-dmNK>6IF!O zXlXT}?)_(^%>Y?0D*SHz%BH7Ex%qIQpNFF&jrUPZ{Ed}yl&;D9eu8~vKM&vVZh%G! zso6zn6-TA}G1!n{(S1tXN zJPBt66bl>8eB9>++#`2PsmHlC)1cRrT%>lc3Qd_Z15BG&pj?Yu@_Gyvb@m3x;ys)R3KTHyrT*JfXOHbp`vfss~ zMozc*MjOH>la!!KV>}U$%9eZ(XPbU`Su3|>%@RVprj$vyRUBtbDIla(ZhGQ4iMSPTNDwCH{4ISvWLcvH;D|(p9qKD-`o`#a$EfQ_??l!1+}-=@07D zu|lt+oBDSKzxbclSMCzSHCoy3y zqJM%(?d0GJ6q60?$x&?IS+Ka?tbIuVo^q%U(1EOBq(woz$W*+~pc7a!XBPnxeZD zTMrmqTWJw;6U)?lAyX2yNcl;a>!QZ{uk@CE_BYwSIzPv=&@NPJz_|zkdVCRk^d+Ak zRrVEjGe#AE;Hkr7U>KT%3`0{Zf`8zt?mXJO8=<&HlPcc_`=QIGACk3GZ+d& zk9xgO@-RvhUv902p%CfaVYP~RYT6`j=~c9!nYriQLsqZ7mtvOjXPD)0i8S~nabd~o zaYB31`gJ}YQCrhm8H%PsV(A=;z0LabH61aWQVoKbG;tpyX^ zWBnX^Q46#Ep(m3);XD+gj$&_p2CgcG-Ml=ZxGR>Fzrs7XAo=t%7QWSL42! z^M_4`tM-b9V5@+styg-I^5^7VQc|{S{wqaw{5y6y`W(Bi%W%&!fj5(SXW6c^(AN>< zX?2i{b$0DujDHjceI*1Br{2uH2T0KLf2++Rn`xFj5D#td2>^NtywuVVtkyg#-9aOo zMZf5A=CsopI@5`%QOYJ8B3v^g^okic^@z+Knc25))@ABc=ghIEdBOtkcPaG2 z>XsSZ_z&P*HB)`u_+J>E_o7{6hl86_E~CA1>fTCJfE2IA)wzgo_N6qA$<^z=!FRsk zsLvb3<8WWb9lTF%tX9}(c*&wVM*%ZaDa9-MV`J+2r;KQFMeuK^YReNwKIWNO6;SxQ zVBvS#J&DBchpnm6AdIWPsmS98-ce20B|N5dHImU{RRqj+wRlB~wZ?JXu-u7q#XrPF ziR4bD=sh4=hXQ9-u;|U zO8j80n>*$l;~LjLLFdDT)N>tBU9FMpU;G=f2@oA+iq(?#eWWCa_W~$@GSXa`m~x0e z3Jk0_I;g(P7@JpsNlU`~lJcO%S&BswT;GgXZTtKAaPiy{ZCT}JWfU|6pOXI_S>+DV zv$X+_A=$1o2fH#aNecWM^m%BUo%l0c$swvZK@^=s#s#1W+&3eS zxZqY@8mP|Iv;s2pNYK275;psyK_mC+O zdny$3E7e};D^a>5Cqtt6`ftiEQ0CFw-lU~ejZk`D+80uC0Ltp|FK-DwCTI$un*+nl zKA5{d=we$YyE;gXFpgXz8^FH3V_e+Nz0Vp}p8>QL)5Jkn?f?3+d;PvXNm6fqwS-^d zdZh$RXtkawo0bC7twtvq+f%aaqyV+!P4B#LuWs_mk|@~ozcYF;EJ#EzMadjl2Ou>O zO5;@Ci7^sK#+NbK>(bA=-fle=IHK42u=6C@&eib!|7G$?3F>an^C@)Yk8~PRQ`p81 zWQ~3PozN{0N*9Dbx~X~O(68K1%a=qa@Hgi)?3IYZu8HOd1R`zB8u@F?=nLTE(WWlI zB7f=>DR^v$F8(byCiXt@0|8F&78rEJow{83yHkLj0ihR2ftpLPE2PG$nou|<@!i5^ z8T|P|7zglAR6XSW#9DO@DSsNB{kubx5{o4@_!$)r@hSG_1Y{M16jy2D-f*}6=I!7r znW20>J3}I%ZOT9tT37bir2PDU_@B^y^$~LE^+H7w^K||2YeQkyCc(;**fgO6mG?^w zRL_sZ$qJP;!20U%Xie}M(IoXe+7o;NtLc40*TlV$4HIZvFAO~*7*TBd@JE@8o)2JQ zssv*8U;P7Ftq+IuJOP@)Y4S&qVl_jrg;I zaUTl26?NFi4C88L)wzQqU4?37;NB`HXtUHDk?pz_mwd6Qb~k|={fn|F+T$!59RX&! zcLv<3DX3VRaem=DF_LAf5#t0p_vy+{j8*Ngxd2Y3@9?R$y?@AgC!g;QuuvxPbPAt! zK#IEY_O80wT0pdCqy7hQgX=%w%)G@r}2;aBOJl&;4qyoVpcmJ016&+b2b6VKOnB zC3t}5gKsTG*&{QT|ogNre&=h65NNl_)NUBl6B9HVv03j&Eo z<7Ut;n*@yB18Y-(dkRQjs!LSWe5wUJrF8(7c@88tTeaX%QESr@wa0|;y!}wbLK1@S zhVY*y0COF{LbQi& z>ni$QkO8>%$06hQq+oi>-2S0_$BWsw?0>kdQ+EJLanz7+6FY8ciLB-W6;X&Sp=Uo( z)1-oEGtK$FP@Fo(56yWtS+Ja6qWGf@p;cRe3F=uvxBD(Y$;!o>MHBH6mnSd}ATqY% zB8RNM@aoCe`u#4FR=h5t@gd`Fo;v1N-)i*_u8$@DJYWsMU_`QUk_Ig=a5DdKG4(R| z{k{ai*=JSITPfILWn$~PfPrFYME18p+YkXM$7e~qA_398=3n7xEaI7 zyQ^I-`Zt(27=xssoGL9VJD-O|iH;KZ!a>ViaFhI+?wA%$nA1;~V?eADtQGTM)4DUy z++2Y9rmN^md1q+#9-Jp?%(pPN^T{H63S&eGlwf<3Z~2tHhqo=A6k; zTXhXj5)*)D8zSY&HrOv?U z*D5n<1_jGi|8uX{_4(tEVU5QzvIQe@5z6<8i3)FO?toRYLmckK8O-Hm-tZpeCXaD~ zc&E)S0EX2)_c{tUAN>lGGQ#pz{@_qxNEZ}Eb!z6*d%h@Q55#eTd}}H@ zE?~?7K}>}6sis+}HfhYGzYWbEQX+o86aPg$E#)dsUfrV{ZD8V>W%S*8=b&OA8jEu_du~n)*{QQ!7V5a3W z#>>qZ4tssA=xtz{v0a!v9#-&Tdw2^7E~ROR3!u)vGy)AO*4jQ|y;e3%sngKt2Ik&O z$uGJey*{dPU*4TuB__r|8@HcVZJakrZ&2nhHplh_#S%gD(Hx9ulO&{WdD^O)n$sX% zTvAX8Y@l!46B8*U)BiUgQ5iBi^@`c8+y%6ZnhzLBx>gh^e#0`faEGshBA+4Q7ySgz zq>tlJVK}|C8feIh>{X{r*Yz?o93JD@f@RenPWFYEGuSrKrAM&iv2`wjIL1?REibIl zbeqHg>+$B{fb%&eZeuTtuQit8Jh zek|R0Mh&8Kjh6s#NQTLYr2R46^&!hcvmEac=gvL`L@`q_l~6!M=oc7Z zsZNBN2%G-jX;8WNs5#`THH>$35kcGVgXR|K7T2Wdl5te#=MclNrny3OAVjNeYTsfu z0O)2aOao|q;5x`VsU86dh%&RQ%Op2Et_K+E-qyryPUb)}`l5N!orKSMT4jJo)AsHJ z+lE$HQC$ct$3Mfjtp8Hue6-$jf6atRECbls=vwfXs~Y}HvC{W%pcB!GL7cx=#>3wK z0iDMAHvfQ5p-r8qmA%eVEY8N?RK&-o^BF38+d_n%<)ps8*^1A5YTVUMSzx!6r>pM( z0~P!HiM?AQBc5J*eg8e$w2Br!`iMd~tjmKXzQ>OE_W0}Xr&ro{yQ4*i>|jW@P>4AGx${4ai}8m$`YJcuN9`i#VV1v0G##E3ILmeMgZ8< zXEOpr;xxZ4+2U>yQHMr1rr2s{#987O?!LWtP{+Rd@-|#g=2=M9R=Eon*S(Lz@I!P! zF2!Pgw!CQxnhtSg8GWe!A}fV%Yim(bvcMjU1OSX_iT*wT(B;iR_5vxg2f}?ve=w#m zba$-H-@imgDGWCyO&^DR2|c_i$R(=cH}nS_Qw;fMC3k07ECsPDi{NtIK=xu0{Z?^y zF6mz+Q)f0HnM%Ke0FvoLAomk|)!E6pDN;nx29TLPr@~*KuIvWrwX!8@%lA{3Zx`)` z_hh|V=CFyfLe1N(zT*_-O@v<*M+zt*thE=;USU)duQb=*}8k|B3VW) zFzH~pKQFsFN_lqnh%e2<-a)2QNPsGP{hU&lrILR*MVIlk#8^*Z3NmepIc5|WA@d{R z@`QxyvM@K_?rq>OuI;807+)Byml6@r;CuKH-wjLx)rQ$@Drp||abN!r#l&G-7E;Lu zd&txQgCjXJG5Uv*I;QqAfr>_e%T~!tt7|Y)XMPtZIpuJA1kd3NQUSXPI#;z;u2kjo zYlsW`_YDZ5$Yc$0bq*k~6fl*7w?pv}O*nTzhGf^Lx;zw(w}|O==+8v%V`K}++B%^8 z<6Z=532cpTMCP)(1>z!b2J2P48q2`?u~C{4BJT&Q@MS(LQ85|m%_%i^gd9e0Tv{wL zEQHVDSR}zXPZRR1@!-iYvK46#k#QDHJ>Q3~@hX1jQUZ^A@r(4r%l8p*x4t3P{2lwIh#FM`~GODf4HRNU|#Ex?uR) zmuRsGS;3}1#5d?75m_8U!`*%oC3BR}TBbuKW7!E*bPk1BpC^VA-->2T-3_uCUYV1e zouVQTnBcLhm*jbfGo}NVP5c5SDF#l8M_~PN^HrG#QQ&O(7ucgMuCCv&QA>j41g!?Q zciIgR`kDj`Haslalt;6pMAOFq<3oz_c!*@1R0a2>X@~rl z*}47I%dG^X?CqPAFJGYl5F;H%aega1rXn$Hgpo|xvL8Ms9_zO)z$8#c_3p4K0(+}+ zSn_7}&D|^+!quHUN@%D3Tpa`OBgwA*@gvP)hjg&h29|gl7NczRjhuk_e);%YShd$3 zvAT3Hi7dpP_<-!e!P^IUrSXu(7XP=XoYs-XmV}S!SiE|5Vt4GoMx`;na_TpMFL;_( zn#l3k@LM$!{RJcKxU&I=y-|y^^_2jt%|;AyJl3`(;`^7K{e2(Ju8v^py7?9G{6wTi zm5h*__q&2wT*0)19*SxD91NtTp)c!kuRkprnO)15)LQ~e&kToH#X#Zw8&-F80_1pI z|2Hk`Y96fFC8PoJh>0pm#XWsECvRR(sA%-JijiFp;sH%k;uvWFzuaMXUw5Gt8bgds z-RohU3lI(4G&DKmlz&Bm9$f9-zr%w=U!PQhnn~G%yk{q5k^Qsho+k)y(2Ckh^e`x< zI{k5jzvGBH*Qh1!LFNh0-nf|m$g9nGR&!O)af(vOiY2`tk0MF;nnKn0KhBj%iMVU4 zwMhwwTb>2u5{mv&m#`2`xKe4VY#sM3j`u2X;8_{U$0lMO= zDyzT+SOn8<-@hWl^!^7b(uuT%Nqc^g3uE>@4Ll+a5O(JXr0A3+sZ|ySSA)to403)1 z^HKFrMH78^zOsqqPlCWui4j%gMsEJTP-Udrd4>fer$r&0mv2753LhO8IvhXSLEQkeLB*ov}9BV)X zHbUb#-1dU#>xG$}Dv=~Z@ra6c3JI{c*%5m~N`FWBSCz6u^v2&>ER^WgiKs=7+XX28 zct#|x#)BB0=ga{~n6YtzkwTS15j~RD(yD2f&tL*e(n`2n86{=xH9iFT5C?gGU!Q@` z5z!5#o%3l;f5cy*2gr*A4V8`a?(ErE1gNmn*%}z8dJx#jplKe*;c6!|9~yj0o41|w z$XErhdBeN8J!ytGQOq2*nJ~zX)+8ohxP?W#et^x-q)|vW`X_K6GgNFd9K2s!4?Tag zdF}+HAaNmx8fQr3iZNm>>cnqW!7k?1@fh7%YHq!UB0025^(oeOsi~5b&eUC#-Ci=s z#;MJLK+Bz(O$A<2VuVT*^on}jj@09^DFXQVS_jdQ&{$;Yrv1z6=yYoWC=t$%?CWlV z`|xE|2!+ zFd+bUB>Pz+y3+&83#-J|A{GS1XhFwXLE@D``DOQtjHfz!KLumo%##R7_MALYY9XO1 z`igPU!wV;Q*leDd-sx$}2eguzY)O#85ieC$vc}tax7$+A-Uy5~2LW^#W%57M)?55_Z z{^EHg^p2w>M0pEv?4=fe6&Q1z>Q6qVdu2tq(ML-!*vQp#H%o|^XN~eoavT}ixW&*4 z*MJdS!d~kM?Q66YN&&|s!lpDet_!Rns)F@9qqk`v*%#?3Y5m8bXC^dgn&yRxaXwA} z+8g^0h8St|%9?7GlLX6aDtPKDCSCgfia!Aq4B-kkV_A~}*1et$7X20GTI_u||| z!~Fi&bHy{|Juv%G-Vaa)GV-VC3_m^dRNETnjr>m6^K=h`Ql0G-1HN8}<$Gi!$%283 zW|&on_#y^6J6h0R*wpYDwG!P6NJ8=QFRE@;`EP=VBARw<^-q)l;B)l_%rr?bITtG< zs*82Q03QSx;B&+ObFg@w$LWDDcxdoyxR_6{OAIW*aWc*Sh4%oBsUuzZI~_wGA%RC} zCTf}pcsoj4(N0V#x(P^Xf<%$8I93ygt$)S{NnifdbOVi~X9J2%pjVMaRwSBlB9>u&($eAK*(Bf;BH>#6}@%psQoEV!dI(k0#BSW_P2Rg5v}selrDZm=(_$JqIE*KEWQ>FYV+qk)RfLfM%TGn z0B2>0o~psN)4jkr5G za2zNTDEpGf=LO{1VY}1lh!VV=`x2Q%Tf-lDpx=2?4m{RSb)`0t2G;SAUWnMTtWPD? z(3?#JytQdZ@1gOCCEnPI_UO<5z7~4)7H9!lfN^k%Dpc|uIt808CH@z}gF8S$Q-(i7 z<$yV9+!!Ye>Idd&8Fdd$U_*3485&14%!5xxZN|Zcl&Y`xWD+JDfdoRN%hct zAoP&E;0J6BI&YBnO};%VNu-{Nn3gG{MqL0BvXiO%EC^VRIifT*7~*7WeBCHxRzd=* z2R}2#)n#?scOgEM99Ph^W0=5r3Klf>y(+pJq+G>WSy4hus3Pi4+Oq)}QGbx_g(kXX zhLZcTuP&Lz92R4Ur|mdVRzV^l0il2xjMA?Vy=q~8t@{6NyJ+$G0-@M5FH>j|)Yoenqa zNKnY4idzwHJl=*mkH&(lPy5%Y1_~xr2R(X0eL-o_$!G}))z|1v8iQ;M)tcO&sdJ}t z8Jw!46Q#*bCxGmk$JjU|s-Dl6_}S(h+6=_=miwWvS$s6bflgl4 zT9y(=MwRIt$a)#h&{SkOeDh3*Zf@YK|w7e2hHEzUqN(OK4qyPLtu0#Pi)a?)yaWJo5-R`t(g8nMG zVXc)=Au{3PZSC(RAhLu-eU<)+`ug}8m)xR0Sq$PoP9mZur0LS5$|px3P_t9aU4J#s zw#KhWq7_4fCIaCwpDe)A;!FHyzawN^v(a!aO`E3iQSR~(a<&y}ct8>K z?CS@R(8aWn8OnDS&5QuCox_N%F0R0w^hKuw#;EErs?E+-Kh&Ukwfs!G1-iw{nuqZH zE}h@5KYTj<@Rv}idpJ!z?;inQvoLv)HE!hpAusyF-!xl!Ebj4hoak(KB+7m>{;9qy z%7|;A6eo_v0i6hgRkEi8%PRhQN>t!F%0)u_F~_CGxo zEhWo+LZR^^W^he=0)Idp3FokpNluhrf?a=1ffU3T0s$kH293#`-2h~qG`81i?b;&)oW5Eg&^{=*17%{Gij|L-TWAeG* z1_+tiOn$&Z8YbXK(f=#*>&m`f7E18=+GYzqi-6gLnDkVVrM$i0(1rV-{vuJ}FRE}c zV!xT}E7(7|aZ@e26UtF?&q?AF-1E&v9^K47Fr1)OpI<)ge8;c~G8ol1il66@9(pDU z#nwlDbJ_8!F&KG{LNGewtnLosm?B#kb#pB%`=O2dVOnKBXW4NCzbWxRI{(<9(|_+k z#ht(?qA0kPi)dws3>2Is4#evYo`)CnaQ^^(4C11|ves&5Pe3ZlCKqQo$TR}Q^wW`* zcp~iOQxHQIDWgyv!P-4bL9W(A*usi~{LXH4y`ND)zTTjdL6&cX?)XiAsM^+Q7{2!> zf6J*=%I@wri>T?9{Qi?R5Py2|$bvl8*LA0Cx-L*5Jtc)Xpj)EAROP)2HM>6j72e7@ zxo(#5NQ5fl4L-V=zXmRu95=B%&qo~Jr-I6+Ksm%@_!I&y4+s(ORK1=}`F2+$?qeE3 z%haV>vd1f-t2nd5@qVRLRHIdO$MD;oa1IOzEC2q#j=>qRua{)b#6{!$71#UcX`bH3 zSAh=rmDQ{vcG3Ye_Ww@WNDon|iQu->5CsSu+Xu*UQwsz>wf#UHO3rmm-!T&EfkK^4 zKz7pbcccyJ-;g$+%K>TAK~qd1i#Ykzu*@fb1^d&Giigq#_CxmdP`8ABAl+#{#vi5T zODc73kpYSg9Fu<&cKo|;L{s!n?3XjF74eVYFO%-GcWAPbIIM*c7<;maaY{ewrD6q9 zEZ@8XrNF9ndt9XGn*c_1FZzMC=vh$`QPA_>mLMy{RfwqjyEj4x?~JX_yA6x6-~<8R zM&#JJn1WVw_9c*DgkcHolb7jr*-(NnWDaQRly8J{dK3_n)Rp>iRKKec6@uQuY?v*l z=O$y(lV@=w-HC#K(r5ntP5jfD!VwKu>j(NfOt)zY#-D{nCS|>q^NfTiwBwXrSE*8le395i25 z>;8nTwd#+OIc8dAU)_E*e+=$+`QL$g|2jX$Q4sJT4Sq>(e`lOxA*Ims9d|s0dZPZv zi~hF{%pUrtDg9K%PW@Z|kc6HcR&~($ssQio(%*3})|7g_idxk#V*yU{Z$JEBA0Xkw zqXrDYSp*8ooPV$ri}e4GIdR-9GApm|INBI&N?d{Mk{9fF4`eia6$Eb`8vzSZTmooe zV@VQwF~aoT)&QTY5C4IMIR3=Tunxvr)erAh_AV}AZI(|}Ny6Hi6gQP}wkAAJjIR~y>e$u3e`|9;ILkP*VT+Cb zB$z7a;NWmvny%-E>~YTy5^q&n9vjX=Kn7ND-$kLiE>=;oxA5vc5E-=qC(U?MANDss<+p&&eo9J1bM=S)c0gtA3U#|n^f*Q-d4$Cy4*g>`0=dsp21~F{&xA< z53=nm9hcf`gE=lzEakRh6s%?tfw(M!%Dh>WVt~>FXpXile)m>D)C|gl5yuabKUw7EX( zK6z^p`lQ{Cue=D%K9(P_|KyG2p+)e%Jx%7_h+uDr63KU{sA}{xgJstp*lQ-A;ao?# z{cu(@hwQbVe6Aee_>HUG=IjJ|{AIh#ZB6c=7C%`#pzn`_e!TLm6CZLDi3Ts4V+}1i zb*;7j)g$wh9^y#E|K3sl*Op)K2PZE6m)(&O906h*E=1+aM+1Ylt<+V5{gD^DnBwV{ zkY*sE8n>hdzloQXsKQkgIH!wuSGu~QpBFFR0bYZ(%79FDuAkSo)4*8W!;q?b&>dU~ z&ak8;#$1~63eXWL#@7P@BGE0jcB|dK_Bx2!u(h=9xxHRcFz4v%JxIL{RYoJ@<339- zX2o&^?{Gb=3pB`Kc_!giAkOTWSzid&Fn!xE3dkP#+%M3ca5DFV(NycOz=Jdk4;Lr_ z?##!SeGfm+h5Z+s8#CF3UzP{kN$D=gT)l6mTDSy=*RNLGe2MQC?YQ=m z^?${3Wjm!mt8;D$;}k##_*1T^VI>mBGteYwY69#_8^9;#Sy>HOZYCtjTZ*&ZkH8|| zY&Igfp8d4>!h67nkCGCXn&L_hs1<>fN^_i7+_~pJ#*#N}XGtHxH<|)amRtwb5MX33 zK=9jmE(i|auK=7^cGQwlj=e$X@4$FW0SwQF_y zp(W@D`>a_NW0?fAQGP-K)VSimyoP4U2cIToxJWnstljOK4Zkf9UbG(DU;5dUeb6g# zdpFAcnQwy=p~5Z?8xz(*v;oqy2zU(;g4TWpt5f{!C74TKfEtuP>|zo*q%jB&qPG>; z;%YLatOom?Rw!SKPKwL}n6ya`4Fnc&J4&#+nm%J+%J;fNA~hWn))XbMA$F+$I4+{uf#vLs13X`YIXQfgCC&z zf@7E_o|{8ZCb#`kX3g4s2gesYa)vWOQc!84;W)D|bl@dp;nU)bNdQDSz)**G09*5O z2k=NoWgbUo!a2AmF6sA(bCuqN%-Q&7-VY+KwLg^r+dL8KA|whiJ-8-B6Fnh*jNKQ9(_3407cnU@d1WL1#lC z!*&sK9uL!Xtm)TJNsUrsN}R%EGN{%h?}c=uljZ>i_mwIz(&hYJVGLc`9zjv?g$aundT3?IZ&oW)T_8PM7oe3hygQT3VK3|diia{b z>Gb{LGg(Aqly&tckxksQF=svc-O{E<{YNKbJf?eGUz#J~{PD1G^l9fQmjQ)#^yERN z#W=XDtZna6G#2Pb)I1(ZuX_9~sptr*2W2Rm=_6XBIe-sH8o&I(WvNm|(t@w+QL}Ou zZUU!?pn*WySg&7y1@IrIe2K`{XLH6rJxMg?YyDW_dKT#R$azgxw*JFL2P z8x8TohAN_OoIik>GGaeDAN?5o1>VcEBg#q}j1B`~+L{8P%)q>pCq+&uPs19i#9}`5 zNP7`n51w)0Hpu@iiu5~xr9_v&!P{wwgpvqZ3B%B$cJeXFg)xvR_^ayl5wlp&g$dDD zE`$hk?g)Q!vBlg2C~N8>fbDLLUBisNImd8i?7D9by}NS!aVll%e!0 zs6x2qTSQn=Li2G(h#ETJ9onnmeQ)zoNn5_*cpNXHZ5H5^5cktuZweTiK};NVN!+0rV~NqzoDY{AKDgEF$A1R1!daN* zZDwhDeV)Lcb7J2FYG)!f)k;?1&4*zM7GA zEmeFQ@Qw6_NN@(D<|X={*WnJ*A=WvY%^=GUFOPtc=qnvU>|wjIyhUTT)w?#-GoB}S zD3LZq`x3YO49|9dXWnfTWjicfEbs-vLG->Le08!$?)`>bz6Q-4&Jp$}StKqK&P|Gb zMaURv`^}^9u#qD&7;f--=XW-tNlI?nA{bsUr=vSvtB)Twl1oc;=a53dY80$K@gInK zHA&6C;$wF$UCJ3#g6aH}A^>H%)#=3Ru38)xWEom@03H@|wplk2rp}A?BhDN!);g03 z*1O`V+IGT@Wj|7WID+NIpdXL15pixKo_@sJM!3heHl7qd;~11SuGo(kd&aFY0Xo&O zoM17VaECS~Qn~Z`QR6cMTg>->e=gH5I4Z|NutE+RhcixBf0@uknh;+_w1Trhw_;#E zWzQ|z&}f(JpIrC?@t8_+^oYi3gqKXnpP=X0I4JG@xV_Q$<$D_=WJ&7&jtUR^cP13G zKUGT?GHoWMNSt+)QR}ADHxFq6=Vex?o?c`o2^RUH1%?ozVpZ4O zRTh5_NrY@TGy{W-Z%+4huuQ;p?ne!ClF|$&H79+Cp@7)0tB1icGU|ul2vi>V#=qiu z+BiJjj?fs zmHt9qX^YRVSol=#J>yK-6lF9dV zNRDew`4`cam(1HpHty==$+&;79oeO&e z@+RAcER-iF(ba+&nMaRj@!(Z=D6gU&B5!{Dxoii{#TM6ZA{WR-b9|v@XPq@WQDPlxgQ5BOv4tIA$XKwE!Lq%U#xcAl9%<2=v%<;Sn! z81AgwUCDj_`d6IiRomA4o5jHz1N*yMwdSt-%a!Kf_GNoUL};U#B{E$EItlkUbHns& zuk|p_0NMfRr|%gh#1T>o9GIT=S^j=a zp~=T1Zv{34QTsNy4B2;h=m-%_hAsh^FVkGD_!)|f;|5%|$8+sjG(|K7RU@swat4(# zWPw$0zm}=FOPX9ED-dBGcvjapOil*CGLa-Aec{sF%rJtA8;6EGm=AKWt@|`*MqP`k zeOA?Pom|r_-|tL{f>W*Dql~=J)KeDHbRhln%iwEH3JS)af)+04ic3WYc0xu;{Q zj^-qkXp^r{8?!J8+4=$(2e~2i`yI9!bkj$cMEu!13s8f}qb1(4!4a3x2wC!)`8y2i zsk~6Sz4a^W)~a-yG80~wuDiSPYXh^oLn-r5q+D1rk&h$P9cq!&FsV2&t0SV7%pqgB zc5u0lh0PT?J2X$G_j-pFb8p;EaM#W4BSVF<1w7ZMI>u{Z>2_N6vlicQ2WFnsC)k@u zU>w&35=bS8%%uza!-)k`9RAog-YJO0RAxB0e|03LCE5>2EI z+oMi(qc^^<76l_)kjPLX5)6Hk!at8Urr&@3^b|Mpoto;D1yxQBE}xTw@rz3}|71^v zGT*!-Gcm*k2R@BqSBjtWk9YKK4akVcgDb`LUSfW?q3=M9<1L-%;n|AEu|sPUaWP2az%$-!ZE>8 za+xRf*X0k|ZSRR`z_LDc!__tF2eS%2(KqUI49XPdufG&iR&Ek0eN-VqXO#8*j!O=4 zj9&=W#cG)R*Q^AwRh0dO_{J)k5zvN;Q%#6;L-1a6TT3St!C!#ZTQflC<)uIE=Rm4M zd?@WgSf_!qSS8uTVkN%SgGsfNh9sS8RdlIVS;~GQD18JDs*rMz7w+O`CMP^ znnZG(wU@nanmijY$Tz&B}8p(-xZ{?VtqU zwWWh_4O1=8)P87ct38nJ;KwRgsKhdhJ~Dc;0pCfN!s2uBeOoUJl%EC^%@+x11&wC( zz-$l0VvOgfaX#xHp|92urzlm5?^z9QTNmE3);XQCnVV#88ye3EuGW6&{|dy^dFT1z z~X!fnid&QC?lmx$DZJ69V_RY>;{`cgC2w)3)R7Gv!~{Ynmb0n{B<@chn1 zJeWW1LxrGyKONe8394WNXE)dVYbYQMJ4kd0XXY(MW2zRV?Ct}Ve&Hx&Mt5bf(X+Qr4oo~etXK&dAC0s>3jnm#f_h1Zfio{QTkNCS*)w>4NZf6KyJV7P&pviRWnfzJJ67!60+ zF4d+4qgc-gjY}i@WK->aj4;WU2;Dd+nbb8iq3P^?IGl4P6z;^cx0 zW6g+q^v={Z7aU5;vo0F6H)QXKF@1puXCnJJUiIC8LI#7|lKo)+BLC!MgG-91!ooGO z`lWTKkb5K@BPk}uVXnf&MN}_CffJ(FD05Q=muR;Ebss_eGJ(Wtv=AMcRYU1wJ$u`` zbH3rq*6k4`L$k4LX5(_^#xMnTt%R8&C>(q}I&h&s9L!N2%wwxbU+W(WkPOGY!*z(?Is0{?m88jwQQn zr69d4CpST2gi+0$d&_s~)C8odKXlWFYChU~n9NY!m`abe0uk7W0sVPZuW?RcV|DxX zDk3HdL|_)o*POe1t@AFNI>(fri}{h?3D_Ajb6Ru)fTKwUL<=<%V)^yga7&Ir1E!#MHlL_2v5;u~)xffUKwM z#b^BYEB#^zt#1R<7aJigmonJ<{sO)aAAtCghar`D8Y{rzpNUJBRQ%(4a&!CTQ2Gz| z*D`L0oNS0Y$iAA%V| z1FVQ)4UZ6WO70D}Q!y=xashjb+6 zD=g7AL|@?<-l6M2YicV;C2z#KbaIT114 zp_3mPgkK*a^e;SU*u2!oM3Yyiy@zPWbAFus6?C3iIUD}0)hr{c)5p2HtR_M!zG?3I z6{$LW(M^F5Yo!(HXt2=JeM<5lNiDTmE~?EArVak>mwot683)oe$0D7Pd$#3g<3j|I zs7|_ftEZYvCNq3Z3FlN_T;WpTHxvi_G{U7k)Les6BMG&Z0<_;m2$k+rmFIls8`ne$ z7&zeQOsB*~IAUi*lo9tqeyS;yORJa*RO+b6{zIr=_~LfjFHyeEuA}*KAf_bX;^Y~5 z`t(;3U^k^Cmgh$v!C+v|D~UzCFq8eM1emWM)EBL4A< zE=raU6pAD6A9>&Ya)oOVo>5nts|_)pR);uB_?a3(2oOMWjHvVwa^=jJdj9TZq4`FI3jGMB%bNRHz}m7@7L{jfVIdG0 zv0I}rPZo(ct2DmNYTkSElP}#Z*1J8YVq(JSO~{7s$$gFSmF0TqlSqZQp=<|(1yBvy z-L>#W^F0&mUeI^X)AcsiaxdvJoy>Ysdp4f6J*zuEL$-5}T5lRz!Cy~zb*k3b5PaoM zyRVV|+IqO&l(cehhf=Xjo+44*!h1qmgw)vTCzX$|5qlPXcZAbm(edRV#B?Ai>PUQT ztVI+ahcEr%%UqTrRNt5@G2e~Lw*{!j(V@zogC{@ZJUsEP4gZe=rwHf!Ig`hUDBpf+ zlgH)P-YtoE^s2ZG+r4@&8dYYENUBk51@{rgRJbVt-W9HH$*Vqo`(x71Zx6EFXG{+! z@+F4`Zi0l;sVIgk8hSYcJ8V$v&J>%Xi?kU`_H(uMsuo2m`KR~u&6C44+|BUkb{iEf zF`cf>Dhz^zmZ=nXZrB?s16UrSJbZ@T6W~IK^6s#>kkXUw)lF!;uG<#F+e?^_qwvYA z#{==tm?J0CEdTbcbT>x{{QzhFVD1w{=@_cquq7xBvdQFnpFek@a*Atk`(rwgQo4n? zv6-CyDSOj;_P?JeLiyAb#B4>ZGusNaojc^ieg;n{pZ7_lX!Y#K==qOE*Gs=vPvK0J zm<2k5maur@r1$~l%T0#tmjC|a{<>IxLL6zi<+uHO#1ROL6UW8vhc9B1Cj;Dif14XK zSZ~vg${jPmF&P?!^)UbI#ia+~U_h9BtbEJ(m0-88<}fz~{3KioF8P%FdIxFZSVOqe!p0nAkU8a0Xuf*URr8p9~Nx292bZ>nCr752?Da*Mj`;F6#m~ z!W1@ej$dmGvwWc38bo*F9BjNPuP;*`RR`6_zyEPi1VeO4By#MkIC9)Ggk+{%Spu>m zRJR~=5z1EtSxoVk{i7;rn=jV%-DFHZ{N1|*w-+IHgiKN2FhmXnIyd>|0On2E0lEz- zg;o}8%`}z>KJx@7S@VQs%R*$ZJ^l3JuNOp54u3TTWWj^CVAE>@%A5{hJ;j(Ey7iy8 zpOTjdr))Om$?vC04lxvkOp%qE@yRj}nJz8^3oeXYQ;@n`!)8mA@|T)cyms&Iu5Gz) ze^0f6t_s_;;NSfhf#!YbEc6<5iSs0u-iyTWS(hLf6_#hkd%p*!`fO$kWs_^mURces zoP*NkrRkWFBbXg#Wb|e`f{7V)L2Av_@E8~f_yuOylc#2ejTR1hY&&CuFHb0095$uy zKY3lOJ>nYUm}&+_hf#T8C%jB4+`Iyl>`R~r`jZxDADofzctSiGQtNaAf3{$b@2e_@8 zaREQ{4;x;Uvf7$bnAri;yB0h_LDa?%nbyujtr#%EM9CI=yIAm7*D(YS@OK$|e*bhHV{ZbpHdl?02hvymG`HZd&R~l`{gvbzn{W1d-|V#-uKn9j&3OANX(-1-2k7Fe z1FJbc$`BOQZ=&~#>VRL>YKTw+wvjD33Vahl5oP`h1mxHclDSuaKIplDu%hN&jKVhN zF7XTwS($Q*gxi?4*8lX?FNJpyP?=U$!OVWsxCrW{^bM@9{-`d{;`iwDu4d4HnsW(( zV1YyC*3^T-=(-;O(txKrK$#%0ws>F}XeBx^l;v0hwSZAW`%4!8GT+Ik4O$TPGTUUU zdkzkS>I+>4c2p)*U$F9Cfi)jHNtu8@qmz^wC=qRHhiTt|HiGZ2upwx#xWuHWYMDhRJl#NY;6e6+>!z_SD=l&WnXHj{VXdH2=aUX5n{N?gg~dP( zBkX$(;%OwT479)a;LE$vCrNgYnmRL}1#tRp{ku+r6vr1-B1bd9)V-H|V1F#E)$dJw zDe3hTkrF9vj9FyA$Qe(v%7cYf9OLPNQh(^nPkgKr(n;P}QSy}%&mFpDvi z8#wnnYK;ZgUOgtl?*osnpI^X+NkTBo@Ti7)_{zUgtgUn8xHzIzCfDc5<1(6C?Dm-F zzZpq?SvM_zf7ZN;P(_$$m)JWp19ltxDSJ-^ZSZ1sfA(;UP~XYm7qNOTQg;`){ZDV7 z5_R$i0AQw2jbi+Yv3kf<`OK+%gE7ygd7O4O7vbMW|w_W=slaZcJ$d&FAG1ENV^G7O+FS#Ve|Co@NlGg0l|O z>3HuZoF8RL6UxDcJ>0=k66c(`ne5CDZ~|@u_$MMt3#P1j%WM~tF$_d}uGQ_|Iq4B# zwSu?ruG(SHe&?(AW?vK++T!FaXCp(qfrx$;ga5Z;mtRJ^KL}6ix31~OI}*`i`}TpC z3<2y%O?KpK818}Ju?jHzaXwer>RM3ZnSsZYiufw{7MC^k1l#vo zUGX#IZ%$)5AJ>^>@y~pyM`T(OP7-WyD~OSBOBv!n9-fbPm@3_i9K(-p`K~*a2P`@v0|9`vz7Ic)^d+ zQi;K}n@)N$Y)Wwg_8ga$P^m8y-?=#_Sqxv)zaiprG^=11c5er4z%uBpZ0t&PKm~*o z-hr!b!0P0CIAKaL6{5iX&AUN%(sjeXYKPMK`d2o*EP!Fe_QL?<%#WLV*5mhCVwk&1k(|EA~qop^|3pL&^NYB zw{!wVc(sCt!ddioE1R6<9CSQSRSr7A(1?zf*w*lqu90|MK2bGuM@;=YmdwjV4EV+~ z7K)cZMA@;BFQWs)4Nzzh{$FEf9Tipk_ItoVDPe~0ZlqJ1p`-+qkZu7fQ6v>)6-@t-Bd*A!Muj_MtzVLNG4(RG_Gm9wY z;;MHTFnI$QhZjL^+QI_w~xHZ)RV1q(fTw57uB-UFD ze1Fu(DCWn~4C71FLN`a#k~IkYYpFtHgq@dsLBYt}(kxsXTuJCWQi9zJhj*0l6Odfc zn%uyHNhp+3jSUMm2yA-e)C2^Dg_D~~&e6@#*)c)=`A0KM0iEFYU|Vw=Y&u=zWRCaR z+n*-zN2P^&-2rBVul1L#XuQ$&txNsB@x-0>T6x5=|GlXp4UatlC(11a5|CI+&wL zKUXl5^xNp{<{Ut}V|N9+R~#vRlL0M$p+$w&>>bCu(QT#8u!S#=t)h16=GBmc03#7} zlhYNoH`S7|dWE;lA~V^v!{{bEaSMISd@oHNRn>g{#LXfeo47fmK_yK73v;U!I)VIj zGp|zhwF!Q^?aeC*{4k>{Z-dvl=X+*8pwI0`N0n5=*yhVCVmmyQvj2=cDGB5X50O};hZCQ}Y5ZE>GAk%XNt-GKA4>QY8clG^W=M-EV^v6K^(GzU z?-W!XJOVq+qI!hGQ47}$GpP;j?X^pBsZMt^6S6&=2{c@%W~X4Bn)Z(F|%(fIY~KlCMSDBwD<}(P47Gd zI?~qJOIhFI5NGYz0g_7WQW^Qd-p2bdxyDhrlj&-_#oeQDu&Uca@ahl`(mTaM@;hri zW00;ZDnxhd=SiL{ED(*w6K*se^A0qyH7UZ2j8s)rN#2o5w<8@dlUAeLKy<3T>lNc3 za0>LKEQVCa!xcwIZNI7JxwQ6kmZ&Mi`vrXZ{Jm>kVDR@bi(1jU?Ym%Sgq4 zmeOoIPVd`c;SX}6iWt0zU_wdGwHAPebzL7HkUp98+3MtOnu9sZTc+`-CB*m4TC@26 zH*(>MLIWuInBi~17SYcHY#4XwOcrxNx%aD^R+78*D$mM${TP?lrVTlNi#`p@hXsWtIJ?)P5MfgwU`~%) zLu0ZV=Uy$p2z(e+W>j8h^jjxy`vKPMc#8kI&hh(=h5ZYHcaPEIH?%GzlLW$7#sT8>!$bXWVZXQ3BJa8DD4VPMPJTli2@z4IruW$;P)0`OPNY4 zJ?(-s3BSK8=a_w^ZPE~c+4?7gK0}04W+1L{;lMszX zCr4wJTsP{H_;nKmU4;mfm5}%_E)COOy7T)!U%4I((nX1#1wfWY`OpIuhA4#)>z4gw z5fjwWv(qGdn!V$5Pb*Ew=_vOzN6mgsGRJ@m-a2RAC?wHUs-Vkq5g~<{hwd9%UY}Qt ztytTDPuz?m{79c_oYu6Ho9R1oo-Ob=8tHVE&oIkG#X?Dibrg=V% zhH-E77)ksA46EPKBMEEiesaCFQ_#VVBC>x==@|$kwbLy+>RobLOksiIo=bL_QVJZ5 z1w3@kWIW*1bfrY(T+Z~w=CrXN^QVuXQwaU#4uRs0Hiuc6exLcvJ*#IQ{wCmdc6>jF z2iWMmP{%tiTE|d?n`kQ06(~(z^KB^uw7=xc&m5{ex_H1u!FOUqoj>7-F zgnziA63|Rk$rs*cgnUuT$!|b+lu7BUu2T|yi}tDfV3KmC0|>YotYm?#|Aa1uQ72G# z?@pAbeOwFxPzcGqqX@pCu_N1XhUC+c4%Q^irRo8*%y*tw(H0vxy~Vevb7{995U&2y zuMauyqkK5yafkkm3-VbuvdBdPlFzG07~{1qp)xB8c9?f}tqjHTtG?H zn6Ne@;xQxxU(LE}8yvVl&S#`X54`2?>!~_q2{6Dni6422i#-K6(T|hNuK=oiAY3hA z&$aU@k@+;>{lqQcrHI!H33$G@&~}~Z#nY+csQyAR_ybCUJN17MCA+5}ee^+eR##d$ zM%W0fL7~)A17DoyGjikf+b>XZc-IG&jy!OHa#lCi{f8+L#AB#p|A2~4W#AApNLn{U z{xJ4^(`2bcnr~XrEib=p-`$TI;jdc*3)kJVvV9pVnu7X?)R%6lU!e{}keh_GlyA!= z*3xa3=m_dcHPF}i3XUP879P3#~|^7k0P)a=^3DI`*2sILfqHfUPIDZnH5sM zV2Y5k>JmCpJmJm7B&9MqH2F*!pwj(~cD7dpS|qHjqlah36Mg`__r42@mnqg}wU8dk z{tti>UIQ8(PI@wssrC?gg`S${%r}Pd;BT>KIg0QE%M5vbhB6c(x6}s+IQD53_!B*$ zBz`Wu4^5dUUc5x&N%(}1wd$M<%^aGA69EQgmz0EHZhj@M?S?IhDv_f8HDePRlK&YB zy#ktg@QJsUts(a|3wPxUgSUxdF3+rW z9DPz-Jc0I`DF08q{PeBlJq|Z1P7VvvW{w$h!**UHV#VAa&~Uen+6#+Pjr+C3N8gWxNWI$u zFgt(m^*96*LP*;-gy-YY9?5^>Hm>s`2tJvq;;A(B5HfF4HR&#b1#Ecx)PL=hHOPCv zt~kor`bcud3+LNNy3LlZAa9aL^UNnz+9(VoxoyL#G8gKgvyStlEebdHBDo1$?id!6 zIwnV_q9R;uWnhM)zJV!nI6@nJpD~K3qRod^=Y}@hOt@%iZ0(&Oj3kMp$tmp$ank?~qL)J}nx%E!GgrGSc=r0%`~pOZWa%N4n(fE29m59xr7FE3nizi(ja*^9p{tk<)`~gZ$aR*PMqrNkQ(GC_jHWT(>O(j znW*&7p`8j$l6N41gm7lUYJn%F5J|RM34L2Dl}t6bp<-6Pf6KnaRS%tU?BT@4x6eZT z_fyLo{;B-Bf+A6EgV4)<1OEGUR?JDtlGhpD(W)i74Ot59ok$Nnn>v$Y4ILlb7hGzZ z4R$zL67`KMVDQ0ZvQ81hDw}hsk-KWfwQ=VY_3B}*eoOZPCKG$-LbKk^TXa$3r`0SU zwE-G<`8d?M-;@^?uC3S3;4-fyF+p!~tX^u3xF2MIb94Vc#LGc(Z3}kos*px`;*M(m zlPA#d60n#CH0yZM)GfeVC$+~gY<2mhd-Li=MSkssVnl(W4uW!PyKd5aM?WQKb>^gy zQJQ=@5ZcFR*_c!Q4F zr(d|hEI+5j7Cs(62SvTb7%U3z7eHir;u0uQ;raf6j(CO=pFbcfCE%?<2k81kv(2j3{YLVQFR6Z*O81pSh{huAJFS`oQ27&e+QSPsd50 zezi5L2mEbB#?Bd2X~GR@0x4qIq`5M5Ss0J_tS0f7uI_GCeCQu?-QZEmeg5;SKfaiA zyX>7l@(I_XgaBF1V9#{46NC4S3b@hcd7T&Rju2DC=OB&Tw`lY_7<%csf$F-bO{0j7 zT9>;U`+3AbF=zBzL<9?66c#__50s$}z0>}UW%{I$J|}YG9XU?Ti=5D(74x8OPC_vd zM6*%mXDM$gA3;xe8kpYle4WTMh)P^%@~L2pl4oiKAKq!S`sj6$;71o27`nOD zwDPD(2uGm~n$oeN{YqPHpwKHt-(Y_lI^qm!`h*ke1`P|1n`PzQ@O*R$yJU6j&hBdTjpn5?@d&c#2U`^MDQXuw5_XXX^-inR<66|Qk7@W^KI$dv%GY~k z0kFmu%sgHHfcJ*d)>n}?I-eZc|X2x}jHd8cXz29TOm%sDGUyIi@It0gF8TO@ynV@Zk= z<-XtadaA#pRU!l6!k?=^O?)BIM(c+B8T~?HHOkfpS zUr1BYY^Z(3q*&T_X_Z|eq^4*_#&38;Uc9x@)W){ zoh=0oJp_ERkEO{C4(gB+>JUvMOjY6NiKn`?$MTXP$&`B<1?Y4ULURD?(s9fwq#(D_ zt+2Dfxp92{Czzp;NdwgX`z}8Un<4eD=sHvq9SF9LW#DeM%%g@nPwmPv2eb? z{z5pusW87ecgQ+(^B0GoD6!M}(0TEY5P~(f|AugCy8>wz=>)zmlCl{5?g*A@7dcu0 z45y4Vc6l&Ud~7Q?du)S})PU(*Wo7@t7M=6fQ_(|V7ygIF&uMN1-nSA}#MIz?q;{@| zd4Jtda=ZtbBeSB7XaOA*32Jg;(QjwmZ0(mYhUY0vG@Z4- zAIf0}1T01`o#&KQ?A155_EM4VV~H5d%Bpw(ilD+~Z+A4Qwanlzf{V^NU*SYGPc)!! z9Acrl$pn$WsYBfZP2!=OG|mh2bht~ns+65lJrOpUk8e7@jc5tK7RVc!dpe=*?(jX& z1@*yx_;dkdjt4gwibV&+NxaS;T-A+Tuo2|1HRMI!Ejnz!Kch$I7+h%*A4y2HAhA~5@ZGG1t8b18L=lYAHk-mdz$N~g9-bSJc!{z0+?)!Mw zG1^I`TWJF9q(%D=Gfh+Y4Hwtyd9jw2NrH-P6VNh$;%K;e zd%g{X18N6)rVmb8WS@0-oaQi`X2u!&#bm2ny9P4_q(ZrWNozyGL)n$V{L!r-<3wdpA+xur ze3jFjNaRR8S}pWWdj|7?@%}>kKC%r0&qWvRX{b~vSr=u{QVlfWV-PsPr(hW$Dz!^q zA2a2zdIcMd{J8npRRf~#LS!^7v=Ro5wGXdBLK9+nm8RGh&Jgpf@UuHr$2g=;MB@QJ z5i$bkG#eI=p-udDY8$;wUKA}!#qZzP$Q#Jsbl>pEFT3M`yTYp+*wIT?uB$l7D0*)G zt~32>(KKd7su+HZ5&QEm(C<{x)6NDHfu0Fb4UH)VZ--|mz;CoQBY1Zq^v3i1|^1?l7ikh z`bhVUL0c)>1&tbJaob#E9!>W^wfD{1%LRsmhfVG6-l1(88!HrlM)awFD5lG4+Fy4o z21KZPpBv^$S?U;TQY!hge{#SK&5#d0<>vc4;wS|1t}l|dNCDCto%5IOZ_tbo>DUry z_YEf4@h6Y-1>}5ZR`Lbby zw|y6n^hO!&K~x$zIF3P6qYWjW-~Ja?C|3e&k?ab=**HnIGl}Te_C$n1JFD=s zJd83w6qRy7mSk|%6x1Jp=w!~MRI#Q$R=2KD5DrsJHgB6=m6FqhN5b&$zuy-k@f7Q= zZKh^lL|Ud^hRXJ4YYlQ5OI@+#y2Bd88}c~veGF_>cI7N((wvl4_^s+lmKJSDrA~qO zyd4=yVewtSg_1w}S%>tR_`k3bfhedYM}qS$_aMsdLd0#FHDMbbVl5^@?kO<3+w>+K z)nHuqQjw7yN3DSv&lJ8G&CM5x?`rIue&7Fle04vb58Ie+S5eA4q#C^`4b{KkSMomS zm>V!IaNKh6%y?5pA(x~?GaOpM8p;!y#A6j}8aj~D8s>c!{;&bncY6STEWG8rKvsKa zn$yw|iQB(k9yGsY%mQ@`dJ(AztGL$yx8%<!DGa#TK^C7g$XxKONEc3E{G!jLW%< zB;jxDJB?+V+w3#!=a43HcO3R zOP2+z7f`bcTHP7JcMZ;887A6(IuVAqcbn>d3}Ap+nfY>~qR-rK16;v=F3Y|6%@?QF z{|=@IKr&$KB9g>Y@~E1KS@uFqS7gZvY!Zj*a!5@QH8n&zLb2g zd!=I1LjV0D2=K9tsKH6);}=w{)Y>AeRv%-kUfX7@)b;B$_ML*~6LFSo$mbZYU95KS z-s#@~z-~K;kZ1@hgxiYp`VQ0W0B+eg9FWgC{zV#VhV)Bp8134UDOANwU(qe$fBXC` z?_lk7*+4?EQguobe{1Z)vq!2u8V-_Fk_m`HB~Cwt`&!*qFOlZf*dTTLe_}~a5J_cH zgnBDnK2`3%GGF>&z|;Q-s0_Kv1gMP}`EzG&f=|Hmbk*MJy1Q?aNA zku_R9`z!GSFNCINVpzqChpc<;HvXhPx!7&_3_?%( zUTl5il@lIt{2zCOjxc1??z;j1Q7P33UATJ%6YFBsuNXwHHtTB?o4M7gY8?Ge7$Mze z7!d`P!jdCLp5Wc8vd z_qkx0*>+sRN&Xuxf3Dn=T}nE+joy2z1lVg?90FPNX){8$*WtSo<6Q&pqH3H3<|{zc zY&S2v;DA>~gh62B%j|RU0H}-oYdCo%VwTG5sBOu0q zfEM$1tn|S>FF;^@20Tk+L9ArI3_!G+2jk_=l2#GYWNCv1B5UphDo^omZ)hRtMK%mT%s`K1NxGl)Bz$fy&yq4Hc zd9upvM_GB*=F5{`J3Xk-^!MqtKstW5i*4h^0u~zw8pxycIlwf1_ZZXq*^>_j-#`8F z*9NrRb(ZZ{)HmBJ1rru?xz(u`p5j|-6{Ig*?pg;R15aw9{w09@eqqZ4x*=`S@*&pDSMZ?jZutPwCbz_x zvIEr|{>h_te<3O(r9PY$N7b%$25_Uhg7l%^ZiCEVjfltj7 zP>@w7HZh{#(UCh`nuHWc=Q~MZ#o$#jp^9NO(vHu+jn)+!NRfFHjn1I?JDaY^%}K_r zf`53EtT(*^TwyEfrOe_GcoKbA-kM7AqS%RYAmJ@7w(WHr z?#BdBM81;jJ8{wn;TDx~!uktVmaoCndVXvie%mfT&vCpK=vcN}ZxH>y(HHAi;_0RK z-?&tC*cOmw{JfwFX>Ixw{SK;f_{B!kR^%6BKv)}CO}E`n0rztlWpKw-1~{0^gU>*V z`^3Ll+rbgh>Al%WqWybTuO7t93-tdLB!LE9`JdBeqUwpyKy-9*7mvC*PVTxv3dHwZ zUi2DRdJdy?wW4LpPW9gyklnVXxEr*1Pej6X(oF;N0>j$XUV{;=S5_n77jKKrdE4yT zKV^PkY_|Mfo9coG`eYA07GF-1QX|OVWni3k5P@co%sG`Zz5Cb#H=}97E(VWFen91q zD6K}4AKu@HmAm$K+m%n+d%#@>Og)XDtW=d;s63YYVjrxCryd}$sd2uzR1+8}D(4gP z0-?cWcLGO%3VR7VrPQWCAs;i{`;Cbogxf{cQ4~DENlVR3^|P+e%PZ?Mw8elkIjF9srDeya+p+?Zvb?aq<;!AiR`vZ9-Wd!!*f+vPwS#r<~k+95hPmPtb=&v@8aDZmFqT zqjNj-su?wos=LljUH>M&`-qXX4J1ketZq!8_K{44M%wWRE z9Gw!jfunjagfWnpq`LXM>`!hje})GW!@iEUaV9Dq3=7p1-JF%yY_4@{U1|KX4$5{> zQN^-q@Ow%$kL_+NU>t|kbP%J^Qk7o~%Y+P_PiBA;rrqIi5+~3Y$-=G$rM4!%89HIF z{n21)R3}j$NAa~#S#(w|vYzzdU8nXR#5Y!#v}@uaHD|*j7*-jGG4EB?^%9B1gUx7F zz(>PIw+rWgjTRsjd5B)A```?{1H*5lZkWaM#Z|wT_e9HRG3)&V@@%4d(GVjx9H(f2 zE&6@|8wT*n0H($BPjro3@L!{hd=&L~4QQuSa`%fzdEF1$luLSD|GOF>(ypKs`%rIN zXxcK6P-`2wPDT+d9<~FrS9Ht5GuVFEH?=pMrq<+*ow3mFlBP*#wiPLLi{aDES_0}? z=I&u6NqwfZ*S%p;bP+)NsEzmMtHe8TjqPAlu|xFEDt|~^F!L#56;N^x{E+nLwmTB? z;XHxyaUU$xC*tgnZgJ=A;$EkoiYW&Z zG9YNq3)Fn@Wk++1-;MAatHmwv%Uw?uT=!g2)yZM`T^!q4voqinO5PY#I`$PB zOQwNoy_6mXu;N|eEpThnP4dso&N9=vcr~A`9quQrCBD`^1}b3*nH}D!{MhP<9NiaC z&N@1~lbqD&qZnypl`XniC%T9;$t4JvlP=3FcHA{IXn5LeFN;5~Q> zyZv?fcS5&8`KiOf6e(`ak_eM_f7E!s$!cxDV|1Zsbo2EXV7DpJM3%1{=@KPwy`Qhh zK2fmEYots)*NidoUmy$Y7PT|KNrUQR-f<0h;dBpIC0}#Qz^O~(oac+?pjQMtj3}|l z<&G1X_l-X~)%LG)ByGgi6wMpUa9IvSs z#NsLv40`Fkd)C5X-8T{U@98f8w zh#)+=+07&BLFuYJ2#2`9bjJhxw#*2Gt-UkEJVBIUy1GBU2lZBv{_hC(j9x>@pP!~#rUo1@a6LA-h$;@|%X`tKU+Qj95rk)0a$Q!!q{RGVdCu9~M z2&kmM?fMShW?LYa?na9l#e^Ns?Ys5GeG;_?WY4C{$Z;Qj5#z{!fSkl4i-ZYo;Z-a9 zzEUB`Y5hLv@r0$$iYCb9J=g%21$O|`H(QJrFetJ>ZWr9U_sggT5Lc84MM@w&&Q-xfjG;sB6O_00M1JY2;fw zJR?M`X~;UsXE4cqD{3|q0HKB$+m>0LyK`I1_gXC8F?iyVIsxa8f_4O)6dHO zXImSsxBKG9eBWJZr1O|5_71{f7mZU1Jbv|iRQ00J5`&?>ytb0h_}x?Vu0PJ^g3jc1 z%mGX8E^P_z=0%uaC19!ElV+2IaV@r`%5Pf>aw+Z5Na<5@kPoxOO0;VqCrNLAx`MS9 z)0@0xtnb_VCAjxEN*JHBF87`Z%I)m26V=V*>E?j{rNoJ?P5oa=oDjEXg=Sksc#9XY zS@&2Ja}&ToEeQ-h@dWxjQ~i7&SeKY&l`}(Gx(l2Y?ArWNjUMmo+rFIwc=RH+veAnw zB)yR@Z__h~^=+m{f}FM^rEMiQBdi2NxXgrPx2XLi@i`tDzvH;S&yC+v%FO<|&$}yh z@u2)#-Ntp9v9*;=jet8n8XeT*P4Fg79Yx6O$7!p%2CAKSvz$HB&tn2Gm{N3?Sz5Vo zg{)B-lW0bdsuaZRCn&BX{K)S4;rTm?Ey;nm_7}69n(Dk9>(^qcp<$k=dpT(m8w4?I zoI8cMR7sB~Zrx6VhhX4K$vJFWO6YVxA?j99F8XCuB8z$tFNM0Ckm9#I>2!3-cNhq4 zS~Cd1aM3^r_=4Re-;AVQGL3P}%lkMV*-9UFJ8XFllFVX_eiKxFW1@;PLc!EIfr+CR z6HN2+Xmiy?!cx3wCZt#_BXsne5kRFZPvG+dPw-mG&AgT;i7}^tTLz|#@eJhshh96) z9+5Q)h!>+n2$LG)+_`l_!wCn8jg{aSG!@3SON2j~@&L{77~#BRgbXZ-hx?Faz@hUj zgf@NoPsUlIK86~`%H!W=!gUqF#p947EplpHC9Rfz);EAhqV$OJe)`g$Vx-}Z48@+X zs9XDeBI-`AS1bWFKY`Blkosl!iPMpV3Xxd7`>Exh1n%0lhcAd-G8LjM zGrguuC2=eV|3>&A;JZj-}4%vod4M z^Av3IN4ePA#OTgG(fA^+VUIJ8kA?HZ$6o2q zngc;mjkGJcz~rwT^YZ&RT2N1w@ zG)RsdAdK9|-@)hk#P{=!|9`x;9qy~|bI!f%zOMIqqphh*OU*`2Mn*=f_Tb)QGBV0| zGO{xl&YcBDw&Eq9laW!6+9@k*t0^mA)pmtHw{x&2BYW^B76a1L9c8|=?z?>L+8wgx zSCeO!&%C?jLq@fH_4c*%H)1Yc3Jm1D!qK3kbT2_RF#S4pB-OjZtIPtSpD1?EU%!s` zEVz36_6d2auo&v-WLVLD0!bqb|s~}oATvq+a>CLGb*y5R8Kz%qae>7IdIVK zU3mABH;qEqtji+ob$D;D*~`|>Lj%$XOBH<@0=syqlhrL571L3NJ1=X-LOrOQQVz{aob?QpfY>ke;6XvnYP7&7>d+iW&r zhkWga#fEL7DU-u7)BZ#Yb2k3P6ZwP_!DYMmA6Q#juM+N&n@5YeBKIDP1zHI_FlH5P z^y`i}y1aFRJW8yS*%#X-95T;%!zi&#*ef`ihqVWqgOHtD3onj&%D&WL+SFJS=!pg;L)Vcbn>-M212y=Q}MHpt&Q1qzn3Mb7nhcm zo;53ao|)fpP>}uXm0nLbUi2N?C)7RsXD@w>A@v_Lth0LjUnvnF9+ler{`W2W^u%c$-*~8i(zyE^+*2(jT{`ti%hOp+Z z78(VbBV-N&l2p}~s;^YjjE6cldRT%A(xo{xIc);m8edNvRtOlA=TUgFdeV8aONAc1 z!ZfbTIuejX=^C=s5J)5VM{4s_3;hiCJoOpwK$>f>zF(WWSaxZb;n{hq^Fgm%K3(s7 zwRAuHYR1K3#*R0%P3ldmO?$4cuKTXtu3>XT*Oj^U`N(#Ys1?k?qsP}jMAxVhZk)HH zStiSla8KP%dBn>ett zf53lGcQ9-)aS&{uCebOuY$q^0lIx;|E6pg%$l*0A*4~O=pcFLhFy?<`|3yDnKXWLn zEV8V=OuPE-LEx@E=Mzq?o*i_21YMaipdy|nKyJft0?y?+5g%fgx``c2Bu?4Ix^c0#xZ8-g|B zN+PQ&J6zmJ5^4VF`490niEJrvtA~<1!d>FIBDMCx66uLw64pekt?3fw;fPcq%GTEVv?gkgJy)n7jEcA$PJ`)pW&gj8q(!t{&GRf>AUUG|@GoudIuQ zi>AtBg+1jz{F3>z~U3aN}K4*@7*=YHoNoxu+nm-9|nrc9OK^c+pUcH$norcctvJzW~ z=MK;e$f1OsjSBn`bUP^JY$7Nu(1_(b4F!t>Y{T|r-`$2E3l}f`xG>7KDLrsQTx`2s zWkls;*jHu+`r`9OOhXKsv@mAm%|pvq22DeuAyFAo?^}0oKGgHmGZ`=}(Q;QS5r1yl ziH~0?WGg%ziIX-q*86o_(brrT<|)7x?imURT{p2PvTA;xqb?DbA*fIsUKC!s>P5kO zO?F26hk^dNoM4HB)<@qoIm~^FJ&Ry2@-knN%=6gGU&+PH#p%7@zCJ4G?Yk-56J*fh z+qJh~usM}+=JTrB6?IxQg?F-1%*jLXky=_06nV5SeWn(AzGU^Sb&2`eIht2LA7MH< zKX8A#+J_XmVS3%E?eYDDbj>%}b09TTnXku_-OPEN4s^b@@ioJ~50)v#w`^_>L-sJS zOR=l5YdPEcxv;_dghHQnHd~F^gx%7!_FKbrd0Q|~X9X93{64$)kT;JvSn?j+wrjW> zhEd!II-%d8+NAmvSQnAUzIPaU9Cczj%v-I&lvu~&N}bR2lqHtQf1-2JZ;dZ8wH=i( z^1kGYeGYDDV!UAd%(zdO!dv>dn{jh-7aU*(wb?QH&kXm?u|=E)9jhI?t2chftj5HF zFy(rz`VKk7XKgvF*_&Tqd>i^Ysi#mDR=l$7)H6ZrTe5d~jkGCtvgqr3v?n>pIvg`- zVP9;Yx*j$*OrSZ!R2{J$gzci_u1a53VD;FrNY*u(xR$GIv@+wPcC;}=Y%Gr?a@g4w zIE;*EjTyRM=7{C+C)xkVuf+yNmVz_EaA|Y*Py07^tjrk)V*15@cG}04HI=!iQp*oc z;>Ub8rE;d|l7{3=5clUhhl-nuNj7}l^C|3(jAgaZR6k^z|I>p;8WEP?rpE4tE024> z@MDgIU`uB1N!7C+^V?4_IhlEV`4TYCn#qCXUhmD|#)xQ>ND~dIWT&!Uu62WB3iSu8 zj_C2lb>go|KaBUDPtT8(-;=GAzWxONpT0@Hkl&dTsDrBq4a7zSS{O|Y=9d@p{j~FW z!LxXZ{Y&Ieu_NeFvQ}d%l*5d^#)lWVix57MA$gC!`>j1eGtQD3auf`&H9mCRh}{?@ zt^}fP9;fd|EGIIeypH>1E{TZ}^|w7{Cl>bb>-2sFC%sTM+5I`oeJr8yhZa7S4w-@a zO}unuZRB zbSX4WK#v%RTJjT~T5q_?O8v?1eLHiNc$SPQ_5phsG4tvP#mCF{)~U_p*8JpnqJG$ogNrZAl9o-8kGJQ^ot@)9ABE)RXgZ!7NmRCBKA#TsO|zQ0=*f2H9=k_#7D(`6V(+;D{XfBO_-c zqxy49M)r`L{h!B=$#4G6LqSFsVn=r7Z=Pqs_vz~$@CKy+{-%5%Oa=n}y9B&nrBVEY zn=(F)@*l@%E&wz#C0%7THQ-y<($(79$;}q-PWap|1`M2ce(=X6| z7;^pU9~O6eh3keI+E$v^bmvv#v|wR3j2gF9V4 z)oX46N4P6ozkX`ypRd3CwDz+5mnA2+zrz9ogq%_!B0|ED|4GcBc1rn6>`z>O znUg6kT%myvjuF}db|FNlH*?<A5@TCk3nLs|+*^l(rWt!Vi38YApk_ILRwX1Ilj!Hk9} z=?IzidVVY1vyG;W*KuLGfdum9K-+$7LO+9`@#kY~1yUhKZ%Z$I{Q& z^P33=RS3aIR4(L18aYG@W%>0yPQH@Jp^_m#y|QiMwj&}p#gqv-$RJ28S1-(12Cu*E z%k=Ljy6*2hywj%shTMNRYAGw=KFYy_E;4Iwq=JJhd9?r`fWZZE9^F-^iYul_KnL0D zt1m#de16u%uY9V^kRF-aUK-nvmePxt;dXG4oE3T@FZv&r% zM1?JSnH*hV(KGx@|A$2pA^JGmJc;HNd{0H%+Yh)X6~DHq`OVmGYU{|6w+qj7Yx0i6 zX}8Q#<;+T z9E&~}U5^Ec#45@ob+jh6TUJXV^169jJdFvPjraYDRetHF(8;RVsDFt`g4KWuwlfh`jO1JIcu_>MMJNQDr)TNDi1r+L&a)OZ-ghOstZ}Dc1No%(({GmbCgrY7e~gagYhe_M z|Jk6MpM^fgM;mFfSQ~BNk2vVL*tT-8uy~i~F?bv13J3QP+S0y5uq@mrCc=G0iH}n& zoCV85NQdtGM#`APh|t;o%qa=#x*?k7zR?>G+bl;}g+w%M0N0BZcn85MzHCh!)g{Lb6`rF>}Ryt_`Ypg4XIW~fKgV{QS8AgQ26&5K`_&MUC>=^ybP zWaT%uwz-M6kaJpdb9!jEv~0dP2I7%DjmI=R^IsY~lZjnHMe2OY1 z{dLEUse2}pohA#C^;i7Tp|j$nstOgoq2M*XL9Cx;JwI}LJpqCX_K290n7mt8)y$&^ zu4;jOz@eYQmw{z##Us-_TxP`EFbp?Uv|a8{{RC8+F@-rOD@1!8{{CpP0wyFJTGkb& zw&W(yNXR2!j6L^m*`FEr@U{RsK0`+1Pic+dN5R{28--uIFC~Ng%cEpNLLT|F(5irt z1~t5>W{$yx!*r+xq6_1q%zZj^5)WUMBBc+XRrYf**|Pqnz0vUdEoMury!EzD%!YR0 zMLkzIQK=AJ(hW0oT9J?MliF1Plm5Pyv(n!*GcOkRARO-NEJ%uvNzO%xGXAlh&GE#Nu70c212v)>9 zwL*LN4g3fEbR1ZCPREOP-(G8Ej;QEedfyCQ4K8>x_r!I zoO(6&8N_lMSUvw9hvNG~X;kPO|I+334;x}}96UneW8#0@YB`KjPTcr}Vn2}PZJyN* ztmw5J|D%U(OBIm(-{Wvxo!1?{Y~ZF8B0V=OzHqUPkRl&4MC))}`jB%U(m6;-6DhsKiat#Kc3_XFe9WaKRSJ-CKVR z`MdgQ-iK;7a7md)*DtR6nIJX1h)SIOB-vVr8VxqcBIHmbX#PH(cH7KHA;#23<%KfD zaqGw5+)&8^mBG1?@Vfq1OQS{bS{n`9&Z%g+qO0vd&bs4IFOB23Un;-1*=`0tT=!$- zD(hWBbFl5;eG|n2Qx@~CZEaTLuR_2j{B^6f8f^J(utJMGJ81kbK+VWqHzGo`&GHZH z?7VB&e`9s6ta32Zb~Kg$;qar8e(da&qyOqGU&Qd-ltewRd<(_eTZk__y2XCe;G1!t z%m)X?$yu?n$O!+>CIsvCF3Sf(epcJb_f5-|aYhycX=ExPRwYFAcq>}5&14Ze`z9(n zy@Qk4Fk-Q4TCr!1w>wZMDQKMD2=_Whd#;=8FjtaF`UTFay{0W%NKap9!R52_1r$LEwqaydDiIEoD$LhMy%sM)iwLybgkG< zOomxX!$KJ6Q_{EkBI{0-{pEt{RIF_9mZOWj>VJ6bXq!M2#`r@d2_D4F-qS~tMWJWp zcOz%yefC_q{0;D|-Q~>$j^zBUJ=Si^b=K~5Pw--&=a=)`Vk~2Sd(izC0i#tFc#E7& zw8_wQaWLQhH-vp>eBT1{R;yc)ufJiQ2V?wrt!As+6}+FRNy^kS`@o;ZA0PCzvh%6H zt3@g5*a1?W+AK}-<$U|YyxqC;$>K2Qw6%@1J!V5|N*+7{==Fam>Rs?(x z(0nW9#N~JdaNE^Q-{EWSK&RKQQT>yfMA+lw`AaY0NY?|tQPX#P?mzt+0v0xgmZ=n9 z$QIzHh0cqfRG8LKs-he|GIY;xd@`%L0JGYbsrU@JjS@CCB|Z+A&w5!GP-(Z-lKtv= z%0ZHiMQ5XA9QYgivpg>S zMdEl*h=j;7gPXZVII7A0CJq+Q@il1J;$GYHyl0u^tz{Z?a@My@L4HW>P%haQ;v>`G zTRs*l-@Ke}p(JceAV+_E$TQnFO7J(u`t`n%1+OiM4XY%p0_Sk=3n(+v=h5Op(VmDL zeI3@l4x%897_gL`!qlQiy*Csxw)~eHveUWxNp-9Ko!vp}BFXFO^qQPI@!hHiS-Oht zpV0fz=Zl5FWq9y2NLTe#VDun988mgOgDXiEFOm!|8h-Ltau;B00Iv1_m2KUk-nscU zDrVWzgUb)S><9C4#%G;+LyQiXvT3H z?T_5yjvNmNir7m>tzv`J&71tb3zq1a$?!XOwb0hSUX=P_yf>vQbgrJ=mL28r8bptH zDrDM(p{-n4t!}?ww|O_N@jCow*<8#PR^Mww8#6zv;x~Gc=3qp1GWhY6fuT1tAv2W$YE_@0p_@j>FY6PVyes#fq; z9w%Dzb9iNS$kX{XVQM;v-?P&hVBXg{NbjUB8@aZVEcxXDsw?4ookM7BaZlTO|6?x; zX?D#(Y>Lm9aDrh_OTYQ(ad*clLeP)?z%Zfv|_S@VdzCdEQ_eC z^TSDPhqhab=v?RNk3w708w@a43v;pa5yZ+v&eWQZj)#Rzbn*k%Tp;MI`s5x_3Ic`U z3gs1D8C2*H;O*9pM&{c7RvaajdQ90dJ|{oVoykp4S37C?<0fOiB3rYj8mz}KmsCY!e!kqOG}*S=)J=%OhmaYy^vm6x73V`M%yuk6q&Z(5On};zB_|yQScw?-Q34- z2(B4iaxG!UCOaagP27@tyWN04|MI?F)zP`@>)`&imN2iT;Py$9H;1C-RRdQ&aEh$j z0xhhCHQONy$S|V2vY2u64yOT&q<#i>zHaE#Zw>!FkaMMK>zdk5F zy?#GBGFAC4$L{AM>6B0)U;m43ndlbp-!|)Y-RWcz{cQ^uMClmBB2q(7H-M0-`{VJ#p#;m}XUeil z;!>K7yp_z~GLY3BQRZlS5or&Z)T9e2t#ello4QEp_OcZ*$p@+q&eVcT@d&Cz>I{nm z__c*gDE3afJs0ShYRwWFBEAp=J4ItVY>kF@$vOrS93r@3Ur~(X=SPfniwFUFM@fxSR8N zWOsdQOU}!7$07uDKTz;%M$W-vvZpCx)Ee`odbLO$5ybxMY8z)G>2nFHS4l&{ zN_E)+TBIU*hPB}(ZEW^dACp5%>g;m(_U)si;fPeN^e+TRHx+91>W=MKK9643{5HjE z`Lm?(A6u-zZajjIqu}~Z#fFK)hKnp5SHF;8FJGJMHJ{^s*T@mq69Kwn>G75hFH6*^ z@-{l2qaI{!!XBM#Icr@XSf`71tt7^Z9>+@^M2FK>#ZKH2ekc`2fK;l)S-A$p*33;> zfvxjY`Q#idFU%3W>(?5-9K7Wd+PqtBFzuL1yo30qHl-HnyH=*UgSV+klY2lgtT2^` zKY7vVpVjeRiKx0Yf}-E}4K|iVGz7Gdk~Ga4h@9(Qsykh9q}YR;7YA$Q`5sgLd-!h8 zIyc1B!KBnqx#+XY-{uI5EuY68Pd#wk&2c$oTQsQOAH-MSB6E<(Td4*Hrrz_;OT+cY zJN-3Y-zmA?(McPu5TEX{dCg*+^h0(GBXIdf5MvwXOK$76dzz^7+BL_VAbP0REa!Lg zYxb;hlzPv*hbtlk&$1_k@g>JqkG(KHcIh1WT5s{dVd3PZ04a*aid&^blIeu+88;{G)TzwgWDeFY;|WNeHN|^lKZa9#V}=IVzy=u7voa!HBkQ6=LOe2c*76c4t7he~P7^*o@EfthL`6!9vT%T(F#& zaPKeZ`Yut+@Thi{r|e0wR=QGdIPy-b#bM~C41jpL-NWB7h2K_%RaEMOK$Jo zsxap2;IVwF%(7`)zFuG~gRNM>t=+y5OsT{ny`c+E?kEVd!1*sSfl!OqV%-fqBd;}0 z0~nFST6{u;I^00!_h-g-@&(o}gJSVGPE#k&@F}&!h)ub-T>WKef~&tttWS$rN|fX~ z+3(T^tHn7zsN=0zI$n3*xe(~~N4%Pr&*N?c|7Z!ep9z(7X8c@S(3^iF%vIh_Ncl)O zLm?%PI48kBCQ)xUt|jWkDGgn-(^9|40`qg($$=f+7Tf|KPJ@=?q)ld^%b%maVro~d z)t!=ekEn^>rYDHJKC>fjPl^{~9mV{}#llkO4R407(j>S?aUv~!ng0AHebA8TM}!Hq zzl<;csdiTwAC&N>(UQK|inLy}k~ex;zn>^^Cqfk!6u{R`RB?0UYEkq%8$G9%@JcPk zD&5#0`1!!s&(t76PaV2q9ZIst%AMedE<|sT#fjE4!6NNj;O*Oek&OGuUWg1K^jex^D0ubOkZevE>m*T5BEsfHb@ARw}Q8 zVK46iK>f7rVK;FpC&NZC^=Q>5bNpH#*!es8;KVv|4?h>Jl?Ozur6()WcQ)1db^kwI zuyN?Mg;dhjl=@Crl@aFAxn9^iA>Sgn1a8)S!cFt=9e_CAl$iGBI!<2RE9uwFnFJiw zUra?I5zAjN8I0OmsQv!v58HFEz?EOok_S1claXl?0|j4W>mL#xl+g@f3LnI zE&s_mS8En62r3~@G?oENOC{)O@7w1#KR3Wx**h$+Vowo%$K;!x60l(s(ziDCA%J( zXN>=IPN`U6ddd4+{f{X;>ju|ysxKT52vyH+;y!hmsBEBl@{SwQD)NK5CSl+e3w)?5u zGRJtZ^{M3S$|`50BLO|&_zOc6ax-#n?OKa1l(hLB27O%a!X?+N!=1ADuGsU#_$)Yk zyaGZnSty%+m#BGTDJ?34-vGENweW!fnsOM3PhZ$ix2mOk$yj}tflxkn0s^Rblo3_& zlE2mcFqMMRbNV(AZ`q2btH2&>6=eDORwNri9<hk0CFpUBm4Y`qwr*5) zr9=zxQ~dO%Pz*v4r)$0zQwf6layk^Bv_8iGzK-@;z;i z*yd=k^sN73x7@K=V1^97P6V>=gkwd>S58meOWZiM4HP7Lq!!NHkMVWb12(qCRvy25xZ3Vd}G3d=?8<%SyFRpIdrGPJh<_rfVORPUv-XV_Z$i zF4Oexi2v?L&KNFO>g(?0QOjV2=Y#@zk{7-$)|5=|*#Wf!n_tE~0}+zO9SH;Ahcojm zQ;#2W*JyR8<9yD7C_w}Dn>)pLY=Pxy(4Y}UXkK}#++&>0>Dqctxpu$lGI$F^cVY1g z1}NXvy{STbxiTP3WhG{mlC^#`DE{U)m3%|zu8h=KC?m=}?*^_UqeV!f4r-H9wPtay z#{I=e;Ib%znB>+igU|3Nubyy>58B>jYG%HT18Vz(+%WJ_VNN=iu6$*vEF}3^ljjGo zJqC#H(mPD@G5gYls$Ub$Gqz~Pa*qmKYOTUmU1yg_ z#;X#jf}nOfQ$5rR?ZrMRojnFEp~}Nw%*L4cLu=K^ZaL&;zhV$CyX~ouiAkczMOSuI z1rBXwkQTB#B5yK120E;rXo5SMq>kF?!GvOtdhBBdiTR=eeZx7&ZeKxG7HFGyANYzS z&=4Z}YIdiyFhhZ|VXAQ*aL*n#dyT@%uS zb_coquB2JHyyH3USk5Eq1jea{To5P^32EoWR$oZs;L)GL@iBYf;Q47W#2NyH9|8Til{l=rTXG0J5L1LYjI$({GZ66n-o20BTgiGg^U zj4E+~73{3AZ-$MZ+4#l9^(h9JoOS{JrX^#S#EcoYLWMSFkfh4KN0mralL~!WUi+N|P_=Jt+G^o9rwwpi z0XSxGm*s3rK)!86+B--@UHm-I3lmbcJhbKJaQIBgf-hQHsgm_A@IP$*zO_art&N_k zS!rm~v0zXfI|oXXJ!G}_dU-`@S=myU{>=Ji#jL|*0~8ww^CKcC=3B`EHBqo2gwu7Y zcc!~sR-Du9VH$tZJeiwe!u{vwuM3>R9K?60!rTd<@C)^lyCZKsN*6sT5duUVS%;(- zAu}fHKA5oJLtsmQeUX{$!IsCOk0XWznOI44=Q<_M<93`Z*9lc)j5K(|jdb^sdw5X1 zRLu-+2wsxpT_d@xyg}YSm}01n4FrE_%n*EneBnrcvnnz(N5L{^p+)2&rWnj)-wwx^ zc&LHm4NMSEPPbsRicjxO#L%!-N-vF#iAmbhxy1Pb{{Nh$EDPQ?6)vx56;!d`f^#r}_QW~DQyh4R7=t4pZBO!713cEO_>843H zGUl5-vcC+-JB)v)yUQHt6ba$_)az6B2Qr_}YwpMTrmBBM!qx>xR<*M zT+Sagi2f1ff|~92!r&5*tE#qil$?5!-Kt=BAfH=OT!VTmsm^EGlokd>7I~T&9oV6z zW}{i$l@u#1VyBG1y&>2iU7Qm7R%I*qsTw>`W7^I4hqqzb2i6w6X}-PXG^yf4_pw0FEsFt+G3MhD?iW5LC&A#8T&pAy+OZS z6!l(5W_{1lU{|#@sDFW0*v&E}yfqNGvb3C;YI<}~f8x7SoO<-_=U^_{Z;rdvh7Tj1 z7u;>;FK5`07aUc>6;!9&0xCvyq&xWws~{}DNkyN&5*pjirKdhfq|dVDSElE zdT8=Z3;&-?^*IWpfj-_EUVmoK0+)BG&!SP$&{@yYKRLwkXAqj79uISFpi-|3!>>3H zpVS`SYJd3^xeSkjh2d;EZ`0Id6KI;-}nwlQlP1p;Hz){YdyG1}U61hc5%l0po|CoP2l_St#Ax@Fgs9zxFC&aI@74bb&uik=F| z+DfbH1lowV>MfBuo2qe*oAKS2E24{i=ubX%cwVVWHPJpkb)h=qY*SD=T}{?jn{zmd zb;UIjW^;8^o*18GV=LqawC$>lal_;kYeXD6#H8qv8_w;+mh#KUPS=QfA&m$-mpvCX z!2+Q7gRbLG#uDr=uo-crI;CbV3x#H>4!_QOMR2?G(S;PBye|5@Jb97eo3a74ae%Jh zmi+NA;=5aLFWce54&;}}DQp#h%n}iE5^U^3?DhJ76g_kmI2pqndLh}8_!j-o{ubSP zow^XK-fa6;i|e(MNx*uF&tt?+@N4wM3Kg_`Wbb8e#M$?$DWqaNO4HDbJInN(d%pq; z-Nm97mH68Q=KT_lE0}#+lKg9yCo5mIGt|-rqeL3q0(yGoO;@r)ypG)Fqr2>@p!>ED)`onq1X=TPf$B&!q7(u!OVq4?E4traf;8YHmw77t^{+Nac@4K{^rlR9mn(T`ratou&M5tdZk1Y4pbti-)+n@(E@ zO#zWXQX~G45F0@85-@;Mi_G6@l|SnPppe4K`uMmH_!jzKh1kOgtyhd&XdT-#b5f*x z^Kd=QVepTjXjaWW5$G}i@G_RS+5~1JjHr_uzM9H=An6k{ej7ae1`Wg)J!vg+0>rL$ z+WC|@DYlHDd%#py@u#gLiegh#`m?(UclgrG4gZ73e8O2*4pyQpo z+wf)fBKYCz-9-NBax8RpQdLiC#Czs7f8W--Bf773o5{GdkZXos*Lp2~@Aa18Sl1o` zs1&`NFZD5kRjq`?Kv4_;??d`V2IY55CvGHc|8Vj1y6KWMw5+MHGq8Oj!@+aXd-0)x zxE-;i4FtYfx1B6GBeDE?p17fnrUE1078ZyXj@&JLUDjq3wnq^HkI9*OHT;<+7}u!$ z1+7sv;W<@XwWzTE)a40Y5bmwM159`Em|KqmCca~rPm!dy(}zZu_za|T#X5b7edL|p z5H?h*Q+0~KYYP}@3cT(q`;bpA!XdHwe1Kqy29!%(zD_3($!HpF${I;e9}6whsY=R) zT5bbHL$O*3eII{%T~Aprr*vV}We(+$B9#m%0LL2OR&Q7FkNFrB*$!e~>h-udX^CzJ z!Imyk2K1epqB;$S=Ct-2d_SNeQv=$7P($hrp9aEJDYIV0>K2pN%?zGTXL<$Io2~qa zK2FiqTx1;q;992QnW}LyL8w;Bx4)KeCXc_tGC$-RQPYfUz1}RIWT5{pG4sOvl|m*n@Mdl%4#{au%>6+E$y3xnyP#ib)FQEL(avO!c9;;fw1HDCwQ& zN8s0;Dza&*0L_?A52-ezm&++D96(6 zh8%e|8Y7zkS%G1 z24BrL!%EF&Q}z-?P2YI{Rowj7roxB_2_K0pZ*x}N-=)AQf^)1#jGl+@YlyCuKE7Ua4#w` zo238?;pH33k@b5v2?uTP{ z`+B2)FDFudvvsDTp;P7}hz-l=|NUz*kTWiwCVjwKC8D^q1KXd9Lzo~L=;dsidPXe) zgyBv9Of}Zy2FXy}@(v>6^*$BEOKArvI7)CnvQ^!rff4W0N%|KBFS<>5_fb1(QaA6~)(;I$z(U(#PK!Kn#l~zEiKL|A2G{tL4UGPYu z?ELX;U~N3mr}rIc{K^SbI&B-J(-=^;6|GH&pA>Mx_fb2l%fR{!f6H=Yo$B}Ex3~E( z0NI9H+=wELywV-Mzx@4rYAC}NB~8NCaEKRE3b5d^w<^wh>z0`F0J@EdCEttS6^=Enm|806(4%0Ps zfo?N^2V~p3il~+T>A&LY&Tg*acbF!^^>=vZrGQkLhpRLQ8bJOSxPcc#9DoO89wt|O zFu&XvN}c4g`$4UGVt?(c9H(D3LmbW5@u^wK!BS76A)@(-fxe>L3QNSU{e&5AT8szb z7CptHSK3Yx+VY_`klmO8@NjLDh2t9xQ~syL;F-)@#(6!ot3X9@kQ~IJJ#qBg6O7yv z2#JlB(eL;Ago>`UET7>8JG7!5A-13ZR%ew zBV{sV=VNq##y&y;+saNZ;B+ic&yEe(?G`Xc&WI$m1cCoDKYiB?sv)}_Ma|E*#j1gO zbF8r&IT4H|%77hu=Z9!yLkAoOe6GYC|8~!mqlJ<2)`yYqtgot4@F{8=4evWiY*k&!>)deQF{O6yqE)6C98$&yg)nC_O?YA>`D z_W;9~X(vy-lepU8s1Ar6jxGJ--F!CrVhrw_Y^8x~;2N`nbPCZL+jf^nbUV7%bc9}e z`nUCoKh_=H2A>#MYr&$zlbW!++H>6}bWSv7T=^CxOY8VA3VY)O1D za!xW6k&1+Z#%a*EpiW80MhW(>{2+Kr?a;1V$*x^cHJjt)NzUFE^V+q%lirHE@q2_~ z&CvP-9e!4D|L5b#QjWvVq>huotseGo+K7WGr$)q+@qIyTyux!ZZqKnfn3nzL;G^c= zz4bh|izNvF+sV_V2>`aqHQ;<%6lT@8r)XsM5h!>KQ@TEyer^-YCsVQ zKyVNS5F3$6)*t}Dvgvu@FwOB0;a7x0l11zKtvein_=V#&-3g%+20>#-8l#< zi4{(GkF#Rh;H+qS_jd1L{+$mr{Ql>6Q%@cyFs`s7b$>2>Br0LHrr&emU6o+~Q2HBx zT2G9CU&o8(C;SVy^}HLxWTzx;_FMoopkq>gajM>LktEQtb$2@a({G$#a_YD=%tkTW z?^?@`qON8jM|Z?kp|=pnP5|xbvnv3^t2oW(84+LrwD+rpBzvGX&t-YO{#?yHNkdT9 zH6!sWY6SoivAga6(EiI(nm*6!m`}!IB>dAzVBg%-AX0BtvMK}Em1VG+9954=HN$^O zHRtqmr(Rav58}uBm8|5f1CCDpr=x#VbtI`m66PcpxdFW6p68@vl4qhC31LiPd~G1? zMbR5_oh3OWNPRx4Lu2z4t&&5*F6oq&H|nlI-E(*uk@CxULEZ;;{tqrv6)4k7qmu!Y zqCZemm-pamB$fF9*sokg5AB`??`^I|@9n6nh^>)ZEgA0Wb8$QTF4mL~y!>(G@?Y@8 zzoq`>@&-VWcmTdyc08h_09xz=V3vJANx$22QDhoaePM8+dE6@MjV!v}QuY*;`TyHG zpw|SVc!cSf%h>u2F|Ct9t)>C)`T*x=|G`9y_o_UAtlWuQ_A>$qDM4FH03vjW8v%4W zwphE-KkbEVIX^DMck|6pgR$=a4E6up=_Gg#?7Oj{Vt9}iN7mx>KMbBl5(fk*D6;A4 zwKXLnAk5rNwk;2n8Fi}XPsvScIUIiLK0qoS4q0#|lmpK>paGzH%c9RI-h9fa3B=BJ zlwNB&Ph0{!UNwpQL0EMZ)nxsZmYyE0e{-Oa}X!LPWpRu-^3C3&W{)HV&B$x93 z$R~x7-akL1tEwkiXfFUnG_U0}o49)V48?Wm9~vh=R`tX-dc)nhQkaJg%Z+kn1QRTg zIuJ0tkITV!8@(GDi%Un9y!k(*83|U3tOL{Om({WNp_`u2*~N`_%_C+3N>jJ0VutUK z$1PqxsA4`!H)#sn)q$YU6Tm|R8ia=6`zOXwhW>-TDa0(dX$dA}jY>voKDAgiu3vpY zum!|U9hdd2#8ZH{{o#_oXM6Jexto{oaHuR2rXPFjkP*Jb{(&QrG8)|CJJqsW+mG%X z?G440pFA%__*o&uTY6?H;XpUe)G!5Bf-x;B6U5}>q-L6kGV)kkpruFCu@$!aQ+=px zeiL!Xo5ZRZvdF5jtKnnrjqEe^X&Wmuu8W*{#R#N89O*!Z5F*e6i9i(O4!vVk>{V89 z;!N!rCafI3D{o^l>O1Xd3d`5qYF+4)bd-oxH2L;|#0Y>EhLdc^hdPNYTGCBb-#!qq z0g&C{Iids$GwM3O`C(&u>wdfGn;e3r%~iDuVowd1u}>rb8#nw-y+qAhpBYc{2h}a> zWjk;HXXO;#nZd2I)$l5KXj_Cv_J0pp5H<$U$JTCXkhH$?F#`e5gRM8M!PFeKB@x6C zrF7&^f&__?g$=j33Fe^$B@`n#<;7dz?Wo5JE+0jZzzSbP#bMHADvO@rW8vqE?)i)Z zcG*IDBFP5z>WwT3TL3=*y_1?(%uX@vWmN<)f>)%q9ggTgr%zoRs$qxcw2W=TwT>Nc zN%tnD0uP{wAa#$U@o#7%0OTe5eE@*vdLcyQ)E3D&(3Gv?MOi3W0u2wd`ZUU>A2J{W-~haEP2XtBzwP0d2aix$B$qwj#%zVA< zLj7;Ar+#yJE^Nf4TOBBb04i?Tc?K;_dX;PY^fKB3}_GW60!CO0EsfRD_7As5C)o0fG$?=^zB@Qlv=lH4#xz zY0?FRfCvFXl^%L;QbTB=_at-(kdSgV&$-_3d(U@%@43#k|1sGzvuDlhnYGq^-z)k7 zWPS^3(-7^c%<2!*)n)(hia5yt|Mcz7Etxm$h~kKAaO1G+Cb3a0kIWShvmny_UqX(k zW$?T$jskU|0+E{&dI8%XBo|9KAD{cc9PZtbt_7M2CQ|-TEAkkL7fq@MNJHa%)0;tE zUE~^Q?Ily2mS^nYlrMkvySEXR^7=mm8|-Slofxg9ivXs{b)VVN z@`&CDJt>*RMP0eS6dxo=+tkNDZtvunuG?(uOb^wVChvuhSP9LKvhU>k1pg9TI#-A! z4@tnFhjlw^+=WDM^tkno8shF%rx3nSsUK`R>V{&cjo@} z6p$3C-5F+7V|(Zm(Hr?K)VTK)a8m~7^Zn>^as|hQwH>VoIl>4bUztm{eT?s6>ORiQTXU*`;^$x32Pw@TFb%$ zy%mNo%YgqCL;x7IGh|#1Rd}6ClHG8)Y#}8S58M*2`_-EfIVAQo(i21Q17s4dJJ(bG zk5*<`45u2)r9cC2?!50GJWu)m?qEX(44N0?0S54|+CmA?GALEslH>si#2hfhf7J~C zj}CPrlGwhA6x`}Cghb3z@5Js+7OEAv;~S1pxCg_1&)X;eX1&^v}zs z71a1_)9G%MQ%$A+{B|g%vc^JzvY+BK?d{?(2Ar<*;Q?f)!O63sh%&;_-_IRTt|*47Rq2#;{fc?h|WEypX0FCQsF&9=lch04^RASsG(bCarpXC%m zS`PVyF5>tM=Jh4K|N40uKBURsT16wZz$PpmDL*1UE zfzt~RrN#vS8c-lND%5rjc_ck;^;X+E&wr$D{jUaFa=UqGcUkp#UWAQUf-DsOfBliz zW!g#k+JQ?#14f}+Nvr%XcCC#x$CkY<#ZnhK%=26*W?z>Kj@lFZ1qe~QAnp92Tep-h zTZCg+0HUjT&7H$9=Z}Q3dw?)DOhD$(mG?g(M*#1<#F$dp3ZRYs)3?=OF4q~=s&7o8 zxQAc+f8D$!TDuT}v&>*VzIekzLsR_21Up1&vGCMM=$|jQ-x#BI_NfH_(3;m~@tpnE zuTI6t_H1O4J2wKSp8{^$YT5uK;vxXICR|VSpH+0r2V4P=*pUkrAYqg2uQaAamGmC{ zhsz{Zx}*PDv2t)I<56oQpvb$e$8N}16!gNcGpdspc8OO~=FoOtjQws^SQpf1cuYff z+6cBlK$nx>vUr>ce<%rbWa4_1VD2dc<|ClR{!gSYz=(I)=CJ~8;bz|Q8+mKZ-RQ>0 zMA1qfu&-G!R&a?a0Nq^PItwla4*S}Lj)h|itQ^TdX{B3NSYnM{ETZIytH>0mWNJ}* zYSOEurH*t+1bVBZjoVYR=s9-Dg}M;i9qICT-g_`@74(1i?;7;K`%i#K7@83Honkib z9%dcfE2m*BMQOe~M$tdW>;lP_EBD-aiVl^g0vcS5@YwHVq;{ntiy1~~Ma`x1e(gZ3 z+=eAW{ItK#w}38Gq;2ifFP)T)JA@UB42aw%ZX0TC2m4jY^-%TK`JKF`fGT8e>8^CY zmgL}Dmhe4?ZI|hSy@RJZllJpBWPXTM7=f;@+~vj}nbRCub9pW5EwK*3dgs|3<~1g+ zpJV}dy#NCPs8fTigKzcIhjN!hbK<*u$KR@N`H0_nhK`|LK zkCnz=hVVQZujp(KsQ$OD`tSQqeq!c;J}Cp%=+)i(z_Z_G`VMG+%`Q?DU_bmFIGlNG z%(4OYzAM=;Nv?E)`Z-6tcK%l_0DnN#(RsGZx_5|$4*f+QvJSro=vkKFDFG_RusF?E+BXF-qjp?%XX40`HL1MlW9luc#p=?T_E5c!6zO!Q(STxT<uANAOC5MgQTRf%y*j*kIeu3K(9R`sj&5Z08#E^PjOD7eJe;l%ba(byuUhV<&S>8rJf3eZ z7&5q&!DI#b+wS@!L8#0BN5X9}kWVc60jT?}$)CGBZ)b553-uEo5ipoHxsNfG3rks}M0aJXJwh+?f* zeX>Lpz&}4&75^l%xUTrwpDgq{sejoD;$Apuw+l2aA%fj%f4$V~U1kVn7ikwX+Pcqq z_dj0x27x4iabo-Am6M0=;~l@tZa+8Zc?E?0x`DrT5bwozVhlNxSsXO&Z^&Mm0b9Hk zB=2+P1T4%7Qd)>LC=%L9$7!2DJ;SOw^Yjx~WB=o|AgDQYGr3SDD*iv-j%9Vvd+ZZx z-~O>Ds7eF_a{DWWo#D>H)~SyKSMB^rS5)xQ$)!DmH=&NKqoQG*Dkj>s+i`_IZH~>)IZRZFt-YiI>~9n=r9VVZs-E1V z!|UPgvHdlAqw|f$fHaIc4}t>pDC061jpF`YhSfxnyVh8>?z&mT9}pA@U3S^}X8P}o z)KGf=K_8K98}4J`j5uf!(3>XrdgaPaZAAbR3sMlZjgyNnX0l?uy&9?zl5Q;n%7Vor zQ#K=E%WPN$|JeX+Ce%2!ix!BaI(QJ1g|3JFeM?h$B`mlJNC!%M7x+Pmd1YdUk&J`L z_|zTi@@96~*IsoyDJVA4nj-Y-pYB1V+VENR;B2fsGB2q6eL>Y7p6_&AJOLB3H)VH3K_YujeS_*;8g{lf&Iy>@M*8Q`5A}N zqPHuO;n&z3ts7Qy7xNuGP=qi15!s#-F756(VmZz+xxkWO_l}BbM6%0~CQFki7Vcm- zsJ$D%gSrcH$`b`pIlab2omCa!ZU3*Uyx{Q|d=!00Q5?-wGJ!BM_40V<{;g-6gNHFj zAR+v`JV+9T>`#+w-&t>w3gNYsgLZ{fCi;#7!_8xoXi_mUgK6847!zw;cZu|ST?iwg zP^bcQE`>JDpowW^5r-F3eRwRV_R1^Npkf~JN(#?;^a+nK+!bwvg}-V}z8C^Pe>4VU zNk(T05D))kpFbA}MpAV?F(W-J#va3Nm0+zwkCeNrJyGvX56AJJo?UlKAfF3RfuoQ1 z4m8Qwb_IABOK(d~`Y#7SVre0G5yltD#tz?7GO^DU?po^^mgPdiGfz|8!Id?8p0cks zYDf45jeDpI`*qRWPEq;&tSf}cP?OzXoJj!?2Odp=-EVf0dQ!gAZT|e%(j5zlDztY1 zTcD7z?p1%dIfjv&TNuRZFL$B_MVpibQeFqFyCGBrxGKSucRnr!FSU>W^#b8IhxcO- zfZSi^A}Hx*5-1K@@;}}qT`LZHXS6B9Q4ziLYz+Q`Zh3!)&vc%Ik*<85oxJ85Lzv;- zFqPkjX=Lw>E(T)-A>=~xnRi0+JBG2^p2p=T#SZ-ym1wH;rp{e_DoL^+xZy;oKD6b=_+DlQjbpnu0Do)hIg+hBoimm zn0g!gFb)B58Sd8>J%O&c4?lCt+Q~{Ga=;Io@ebwSyGL8Okvy0GP3p2_RSN)h1_xKhQo`;w*1u{aV8d{7=9JVG0zby{kc;L@(+D8w+!LLoxg1d{ zzbfO%LP%*x+Pi6B<};^zc>PwjXq^bE^6l}={7>YxEgOcSz~eFt)FNtn7oak9M?G8n zgGgUv?AJ4Bj}pB5o}L(~W`$4&BD2^oarksR%Qg0zlt{`upx5+ku=0fgMC zA=2pEbTRe8Dq|Kze@m*{uXP&b%3#hHU{A%$ZPe_S=4XESfZt1Ik;4 z!?C`CBb)tBc}@1i0h|jIu<|GIU0=G9okWa4WWcHgiX&j9U2Ynh?}wxn;+nveaT-`!6Oa>44z>VZcGU9n$-wYs4!ady7xF;nB&IFx{csNYz3 zE`VH?;eQ8O$S=$E<+#hnvY)as7}oF`#|#GE7GKdpP=*LA8IioH(wMK&09~Y0IX}h* ze1jCUQcq5`MXBXF>PZyfK~t zMU?QS&-e1=*Oi*TEIhC4XHJS<8SZBxHF4EF@RJ&sKD-L_z|ara3<9Zcg1=+;M9RHt z=6SFPV9g3_9f@yV3ABv7GF&J3@6($f4RagA`viT~h4%f^JRSBxggpVo;lwh#0+uCJ z$Jl+KHFWfa@Ny21S=+6p%)+l}58c4}Lsz9G9E_??YD16J)B=*S9QtY>-QN48imVCX ze>s6yq=|VY7b5iX0Kb7Hw_SeS1)r;B=%1np!{MgloAAeGk91g>;lFxs*@SGamh`E! zlvCV;3)Fjl)oz75T@S9pq^!rbd9_`&zLm&+GcM%L+$-DhttIGYQ{2FesMQ}VN-=1Q)C-bI7$CTxizw1jcW}8Gpl_t*> zDs@?*5VjqIIK%OPG$Ae9H!jn^Rs}FrbkN&5vO=XM1Hr(7Pkzbj0W9W1-~J%pSoWKx zjsxrqT1q@|?gsVLwQ0J;+)*JAq=z~oFA%QR?Qfqa%3neAOGWyP`cFSJ z&K%miN9dE=`Ro3N)mZ!r72|OjVj4kchz?SbH<@8+G+uUV8Nt3vDo;lz6_PwgB>Q%r zSdhExsom~#UjQ>V9UYX9Okm9`Y4fYrD<{8PH?y0*gihm2vv>2+nn=C(@ zKI8}}l)6Kc?;~DM8X>3odW^w4u+VnMIM-&@T3Z~{B=I@ATChB8aU$UCb^g4fx`HO# z_QS+J|D^{zAo^yA@*T1%4QqOK5&SII`iw-5yICfW#-s^~2a-QDy_+RS z`QMA!wP>)jMe9uyg;F$gWO{lMT` z;}6$$GhoSz>2;XPcSmb41&zFcVm&sqNc%a+)TE^dUKdTw_VDrF<+2Qbew(}tzM4)( zWywm^0misx!U!v&af4Z25O0>}cWnrolDjseLS2^K{)uutPA5R?leULDucU0Ql?djW7`qzy)k4>ixRag}S3(_~u)<|_2D5eoMJkv#ck_QURwC&(oj3uHC0*qW@#jlP14+JZ;1adHwV_~EMqWkc@HoyUU@eC!Pz$EyZ)os%4COXRC5 z4F`h>b)RzklUP^mYYOHjmm&MgEXHW_p3alml5OB)y7?qoJaL`Lv2i5g*LGS?Au17V zWuhjVh`yhh6=MYEdyXktK!cSrZ`6`fedhISCs_+|IQOSSIDHu&{nfDXE6+|0T}LCf_fT0T{?em_wNWjN)cN};meJac+*mJG14Av z&i0ThcTexkN9?S-7X4TodE_si`^GNA2ydElZ;9}MG}aqWG`m6XUsD!%bzgd}WIYs4J4bJgTlApFkx%oU@jlJTharGG@NwVLi(2-_HfW3B;r6|aBY`w8;AG4uNq%cGPQ^QsZ8R)Yd^gJ-QRBVA2iq@KFH3u7503?#$J3GlJ5BQrz2 z3|sYV-fN~+h~?mipK;2JxQAX-tKbgDxGyS0Id-qdISycgrPoh{1<6KeOrB=xQ-U_@ z8DL88gE1(S23TJcqwk}xR3UGdc^=UiXKa-CQqn2ku?l}S=#k^s-=;QoOu3nkx3R~6 z7kpYti(VG?D9Q;c=8urRXeeEUe9H)&?9e9#Ax5D~eN`2FYKh0W6qHrGJ zXsWfte2P?72PYa%gMaQP;p||C_7`H|!JI$#$q!WB8y1!_>g9(WX3L&E-!bGn9Hrxh zDwlGS)HhqDqEsNA{_*umts|^=2OB3SWq{R?B~zJ;f@u$r+tnS9 zh=k9@6eR+%T1Li@*NzX((bsGWoJ}%o3b*MNAnc`vGW6e@1 zZ1R_X1bTg~5+8E`eC|?tYW;9NnZtjE8Ds6XfHWMoL4SL*j|^Iv(B-N9{zb5`X+ZzB zVQBu^cJea0zjPw@Fk26~VR5)Du(}&vX-b{rGRzdm`%ketGd0Tfu@DvxuR8?iiiKAr z8g*6k58c1#v|&}Y5(IXCs280s0?_c}q7?bR! zBF9#A{CzuZ3(DT~2C+#KUF#Gjvp?`gKcQoT38$U$vjjrE*r57 zf&PiLSeN~_^YhSkIqPgTdkbve3ofNqtN*NK>L8EwZt|38bdASJ9VhD{-^Ufs#7|$6 zOoZB0w55}?7dr!;A_#gWCgn3S%2^emgOBQInbsX_#s+~tDI6*#tA!)&dm4S~JLTr` zgmj#=ht!7(<>eBKo@fNge?~S&4nFCb7Xx?`$D*qzD%IjE+6n8zDV4$w=}aZ596JX4 zaTLGx>eOyA(DPcb9zY;vvXeCOTdIX{@{AotJ$;;hxPi#-z2O-EF@(O9k>88`S?OFO^UeP(8j7XC}jXA4iRy;`fKK zIG6?yeZ`!Sv&gBwx6AZ+(kiojywJqg0k;X5NerpH2f8_Yiv}1g)$*^*G;lCv?15aX zZu-%PrieY6p^<9kN=4pUjAXY@Po3iMH1$e7P4T>pERvZu;8LJ0=Cq;AxH$) ze{-z>4py}o@jIDH-$g>@q==*+?h4ODHMKF6pah!jGI=W!?jK#a1b6MOR+dDO)6n~6 zjQ(ZupU7!9#QX(4%%48IwFGEXVN+~*4`j4aQy19jRq3IGSWml#9T!^8sa;r(Em227 zXtY9hhPtfNZ1Ve82ctR~SGqK$=NGgrek4V7SF>)SM`}fCJJ*j=LYp}CXyUV_iTGdP zzaT2VoL<|t8m?jUM! zTK86jDQd@7(CMMZxS4Gg-c7vEZ)#`I6%*$XF#eJ2*^2$cikh-ULCKQ*59%f>tkovu zLMO99cdMw4nJI!^#VhlG=np;_sAs;1#iM@8Fy6!T5S6hFiHj{2M3>e*0@$R4x`Z3MI58l>Y40WF6B5Xcv9X7g=@n4RET1s zCLmmB{XuB`m8qVb0OdV~$C)&RscYTdz>JWt?5i=>OE&V&vxJAasG0Ndjm?r$siH0cvi3$Z>G&+=)h6wegm98{4TXL zJfHat&+`_PfiDqnr|;-MEYe;pjq(PPF9>?6<6vT+ib&P1vr2N^n&AA9-5jRL%b7fE zH06SlALzlym^F8B;4f?NQA8k6yc9IG0yv~9le{Qq)qq~5=|$2;jyZJ2Ua;R2qAG|2 z)_>Jc_vm#iQNK47Oy}!>45(;q|D-Gs&#}YHx_8aRuo?n^r3E9(H&dOkqogk(FdTSh zR}v2kX162FiRE+KyeH!lhRk?v5J2P>ZvAsj$Fz7vy!SVb4L+gK&R=X>|AuR%(r)UV zjKOVOS@%_(D0mh)0{2Q>;9YnbVQ3O>0Z`l&s1Rf0$IT(Zv?YY*1_S z5@cb0;k3&9@RMO6XO2i!Vy+j7EO6b-dQz49@cp=_ki(L)vAldR`<2}-2UEu+Dc{5F z_~a!!+xf}4!S)H;+*+8$QA!$Ib3g5(>2)K5Y_Km~Ti}lkIya3+Ssv61ehdM=P19!i z?H^{0xD#c<{Z*Kau_qNv^iY*v*PM%i|TUt?bOyYWLyn3O66AyJ-AO8-yY%F;m(~{aAM_g zMscZvO2BlfD9u`V{y z!DwaFJ^fn-PN=LT=Z^nm(=Gd29|_)tkH!f~P#Nz4tu;*){x8bO{;&!3Ib*SH;t1S} z|1sRtp5+sbLOMdt3s%gYLVsC$ZFdSFQ$&TO98SDK4OW13LjcFS=CP7M?$C>X%g>2S zlawqgWE`XdFVOy1fig+BZMi*AgXtC6VR9QYavxB{n>~VTgoh6wH$d71G#_U{V_zir z(farOJMuyPiE6oAmxiaGa*VXQKRo^almkI zUO5|j65_buHr96|elJRzT(JB=fifWMmHxIQY2{Ti2UU`|_mBt^@NkV^{OvpIPL{vu zHMR08i37+~CB+k^Nqxf0B!fzo3_Cr7`*y z8825wh6?>VHAd;$f#5u}9J-6RIr6Q>HgyR5a9GJRgvV3=QUaYd_XF?W5mQ{2N9EK? zo8hD@?9W%0lmT_(hf3Zrl9>vJtfh$@YQzv_(lrkZxJwXoNpxxpDC4p8a8J#p$T0kM zWZuC;?`s+A|BTRj+sFFS94SV}^Cx&!ZZCyM(j@;K$P39F6KP{uUyHCim36nc_KJY7 zqfFt*)xtqN*vZ|nxL$J)Tgqtuv>m3^D|6JUuKv&@eRw~t##jt`RiA8J6=mnAU`nC?LNNb?U5`fTHB*sf$^;UnCNHY zRd7o3iMVgP9!nqSC>i_sDj=nFPW8k9VWTD*QHPhQwOtn9IOvuKxwFNXbZCzIzXlLc zvOfDxv|=I$N#*H=d4FA#WA&O|k9oBoqI2~TfEE`F3-W0va?A^7y1ojiLHEQZn4I~Y z;I_g?7l9|3!hFVU{$kC1U%f!Q>1VI)mFq$G+|No)zzA6FdiF2NEz6YfE`9q}F|qT+ zt=OMp0n1Va##th?#20fk&X17BETAj{P@oF``6($LrKGdeOn4I?5cd0T$X2d?fmsB! zVoqY2v%lmf2aw}ilIxD1$^(kT@5}-T%v9`mm7nL2fTQOw!)dvHLnJEumCjWC4LRTX zCJ300Hp3Mn<)Ee2J3SQo-|)Nt4(T`8z8$)9cKU)bLq0c=5;bO7G3cy0pDWWwZq;AMs4j<$$TZy_S* z<9m~G8jkdLZN43RA$f^n34y~$pfqJXPf;zATf2~ zf4X$CAImiSe5%D}`5y<{V2}sgHwsm$4Xb*c_8%@YR?fRDKXmTe1q%IVP(74WzXJH( zhcfAP@umNX0Ff$KD&BFmTZARuQyMO$5@bbSK-?yNF63#8P zcjnE^|NeSh_wn>w$kGps+(xNH)KPK-3prfH^cL&?_rCsGog5}!e&aBM#7f#{=VH`x zS>XK%4=f;?DFC!sI%Uo5m~VQ{%VkWy!}~zZmK*?Tt!|VX&DTjf%F*RM?2~)ggE>;H z4o=@%%S9FB0w4lH7it>d;SrNO@f+QE*0G<}(WqW;rZrplr2ErV;KWlbqwV3Kersh= zL4@QBpl*c$Q0dEBx1y@YA=lV*EHhl?PvQ&6h>=@Q#{rsXDIq`ZJfO&>8|mG`KM0h^ zk4|`fRANpE_)&^`GN&ipE27Hh!7uCg$_kAEjXf+s&Q>iP+D%EQr&@VX@JOTs0f4-B zdU03@-6CDN(aPHX@^o9{*~NZ}qQF6^#x9_jEFCnu4^$P1WK~@|wzG?=$eG#c0V2$!EGvL z)1qZsJe0+Q_))@J-`3`@$zwr9%CFgC4W}u3iMmHP8pgFA2=EeC&1s3UM|tYpqdfd+ zC~Pz^-@AiThqibP{hlPwL_2C9tnAvO#OV~w2vvBez?C_t$pxWJ|?vU+qh`F?wUdf*uXh< zHfA>2kC&>EIyDLk0YIO4M(?{ki-@zEZlGX6yYGzIv@)+4QBAlB}V>19)zEmBc>DQ1pYI@*3YCVl; zlGEhgQ?Jj!+i98;0imoAVEfgkq;@#99ea|?=z$S}RYz@}B$zfgpse7u12@1S77Z9) zZx|hV^ZQvoe2IM=Bcf9ECd&Jhz0wxO9<7|X*8qy;^PpJjH2@j6wZO^UI6e5j7mmYG zZQ@n9GQ?nMBxvUm^ZEaV#{+G zAp}CXdijdpx<1@BK0lr-00D3L(A&LjT0rR|9*n&m`eWe(15inIFGhBu>}~hkJ4Max zDE9|jK(3&Gax^{e-FflVj!`1r@VD0fxBD2Sn=Fi>d^kx~4`ZP;-mYaTFry)}80#($ z?Cv!CvbUQ*$AMt2?p2NY)79GjDV1RlunQ^xlu}7OV!xC+Y*q17(Fb|fm!S2xu?BS= zi`CJ@je3P)EzBrAP>n|kYg#^(1fECw4+Qe1xW=#L;a)pN$sf$9VNL0a*dW|r$up@5 zIMwUKa z8y*7Km&MmA_D7_VOj~3a2nUpiB+TX){aZ~jzZfmA%_9B_$ZkN(huTQQXv1P2vg;P48xh))}czm;zxeX+Y73=qFH{^dR zq(-to_DB{c+bD{h(c7DQGke%&?CNl0sAZo26%HVrphDzV-!@mSd=fCBcRc^f4}0y8 zCwI4UMMN?iY9+NZHCxTMuo%`gY0GWjV6m6eGu`@n+unQS(R|6n$9Y!}V$ZA!1@(R1 zKUm$oui*5T!>*NTA+#=7Cju*lo_WC2Y}IssiwlMr%52p8G;%37_< zsK{reF{?^U-T|EYO4#XaDn{kxZd0J))OI}fJ&-;n9C31vxCTWJ3kuW9w6SAG%w1h3{Qve3g0ck9K?F^W%&_#_K>+xz%N%1`? zz{%6tIZeDR_aFyRWD^jHkW-Y0gMc$x6FylBDN za1bxE?8RFqJC~_f15Aa)nKueXK8tbKX5e_=l^uClOHm^#KA^1Ga(C`*w1luecln`E zWu@%D`g~GhnSY{8lJQ!gP|cB=8=5o{i6*dCIF^h|%5a%@zfI!sjSNhk2P(FH%Gozx z8Hk)Fr%Wq%>wY-H1E;yujyAa|x`I3{`XUp{Fv7ncL$tJhgE)PX%km@h}jUh!r% zK>vbx&o0xOeda8N^kd&U`eg+iUtGc|U|(|G`GQ3412s;oG0K(>?Fj0E0Eg+B9BDg2 zAG+-s2rORq^?ZyPW+U=iM+CZ~qOaT}ru`!EqFl(LXa#POQR2mWOiy=&GJS^m8Rt1p z{*RBiuY^5Ue02Y^rWoJXQy0R8DMA|kHhJ0EU++j3rS%njeer1ND~F1Z2BWJa&Ajmf z+hy=ettYXN1_pI6Wn&_MW50Lep4Yn{Z`gWQ%l8Zp{g)>^hQR>4U_5ip?wE&;hSWuj=8cOHFle zpNn<`+1NaGtyWc2=9LvN-7%jYHu_0RBgWwe5i(0x`np;XwzI079A9l0S4K zyzQEY8uaoF$aQ8H<|0(P_~^0EXe)s)B`$maHYHVUB1IiG2O`h1VvH!WG3t0N)2^G| zl{t+>rgIxH9M>Lo5Pu8N78I3fsu>IV7qahsz31sQ=>aW0HmwnGF1k|*IwwNf1-2}l4@Fm{Pc6(s zL76DZko#+QrwB=$!l@|peze8 zRC&0F?dnA8>}(UjP)sQUl}rqAEQ0A|qrM*jZC7L%SCPAX%M@Z^pFS$E*e#%RM|6?# zAsNP4@L)d;e=y|pq7HlsuRNWS7m?3ds215p^qz_`+zjWl2>fC zoodvAc}}RU)wvnR%e@k@M&Wv{2V(e6*0#&Jb_$)PrHuEexY;e&(L z_d;I8drIm>bDp<+y6K|DZl%L@!C})m=qWmfP1G@V%*HxsC8K6RjGgcZqD}ey$EkWv zQsLycP;leTP=Dw4a=-m`lQZnc)t_qDyBGb4|ETg&Sw3#zSGdr+jbP;&99iB~i(?-l zJG8=Ixxl0Sk<q9Bir?jEaCbtu+(jwP6KB`ch zVH~`2(k4C0zNy-qEgwB+O;>12kl6PO?hM#wM{s@WgTdajr%bN?6UL^v>&QMsNY-Tu>3v2*2ioQE~$M=>j?aDIs?|Z!y;S2 zO##!#3akZE+y`HbJaiujDURl9MCmR`Fo%?@k*F4{V90OEnr0u&O*Mxr(s|+FTL^g1 zl-kt$@Hu=%@Qjt$kltjeXGXV8>HhSz5bw@;rC3I7#g75;aHiuw_0Df3&s^ClWu6aC zH#cAjj<&rV$0uKP>%&=(PvYK8Ot!aQZ0oKmnP!YDK7MiW87tmAw?-uUgp{0Z&_5jy z>gE)K&v+c<>jkjiSRgU|QV{l>-!gvpMLbV=>)_5<~@XDTdVu7)RcF#=$XLg10YKHa)$xC7kEa) zH?iQ8MH6eEof3=61aYo)<8w~DobjwzTLsoT>1)`M^AVGK&b>X$J(^1s14JF`sN!!FTkHU#M^ zqP0W$`o%ZfK(w1{EQ=d;ha#}{Z(egUB0@|}F()0Twmun(C;B|y)J#4sEPjV$j5pLn z@FOlA!3=;7$2o0wj0?uC>aa+<*vouZj&Up0_rOC$D=S<$k8LZ2#XOk1kjZ)H%QQsC zvP^_Y{PaJ$e)JImszki~K}r4R1DcBU7kDHHs^g#e3vzLS0$BFyV=A>e|Lh`v=agwhrcWQyq7v6|AW#M zqh7$5Ub}37EdB%M+Zu|st&tOIShgrv_ojW8nt2e?Ux1DJSOpS9(v7-AVuq|^6{Mdh`4g><;Z~15qE1FoC zNh0l@G}9mB-}d6UR}^`aw9BGKFe<0ooBsKJV7vUWLk7vsUOJx^`u%od7fOxm+Jddn zX0B@LS#4ReW__=hNQ+Qi1I9~ad_fuC?!-JP?d3byyZiyIq#=xttQpQXJk&VB2fCU4 z4+&|Pg7YQj(_{lwFCtT5vwj7o^G1X0fr(iLT1!k3khbPLW+tty9h&ZN2_l6!So24s={0t5IAS`eXhtqaMXR%lPCc`2@(zIhNL zHzUeRko6!ez!|>2-&C{gx*^*Dr5>&;!x(tmuCPz$$eP{yp8uV8T;a>S_IWz`ZEs%D zrU&yT7a!MCYj3(U?~GvaT;6y=*k#orpN1YKe8QQ9Y!KVxK;%vY=~5F6Og|C*N9WHz zg;rrdHmDJxc&BB#^^>AB%zg9KNy?33)tRKvCgY=No74o@xTNl3Go(o4`opnv+p8C) zZ5gy)CHN=iMP~+{Ek|CC@trI6Vrub61;u}|&witlZODvhee5Vur&-`1mjUtM$Gf)4 zabm0!4$b#k^*39BkT=mC=WWD2L`<7z<6IU$h8e8~Z@rGyebQ{Ei?NL|#=^)E-3Pgm zAHtAff>!oPB9OrkjV;myrl;~gl+OXW?4kx87xEAyE!V*?N^#QPb`&}!A zf24g_&lbZM@k;M1fwD>CQU||5RhxXk>sH1eDM`x7g;ACVHELhVNgqfX^_Pv95TW() zX9yh+9kDJIpt(Drvn~qkBfQd|&!Sk%iWwA#6{^L=ueNN{L-nKyIE+oz;$1rOIPP@@N$@a&F9z7ji>m4@xO=V&U~R3#KtwYbw+z9}3`DLCJT?htCA&KPv^? zEg5J#l@%5WKJRl7rDQsOPoR(yo$0c2pu&Dnj!aNu;imGLEaWKZt-xQF?}lnn@XYsg z0}396KNg9&mL1s}v%+p0+Hv}2jw~U&Y=EGRuf8LZ*7W?*u@jYsx91!$qkQI`Z@>P$ zUtfSG+*e59;MLP<8R!jp@M&Ulk%vF)@XNt@myqxrC03z<+kgW@swge`r{jvni80Gd z1NVbonY5jQO`W|Yc3Uafgl5aHQTqkb5%jdGqx;2Rd+R5;?U&DB3z|r_l-(Rr9rwhh z4kqs3vuziF3Mcf7n0aZZ(_rVvaoe#Pq3>otbS3uJJ@8y8`g~Xz>RstXc$Obnx!zI>DR*z}`@&}`pDBbgt2_tl+WGcTp~JdMTqFO6{S8ESdZfFKh6&{}ztq~U ziMA`lhw08>&zIkFo%x-#ku8pPgabq3@?da*f)C^$ugp~i^+$*y^ZRMok@a}Oy_S=4x&G0>ScA1>YdFs^yN5IBHJGjujAw7E>$ z-R1euuGKCmLnx2VO5^CErt>$k^b7<>Qy@(1;TcEsS<+2i_-_jIJ~ewsB%yDaKrSP ze1L(mqVjM)AODSvc8IxUbcBe4W_-nxywE>L8TT-Ms|W?@456@Ne96eWClXG?Q!Dke zxCtiZ(%(d%!*jOElkY}@=QHd#F*1Egi!Dq#r+xwD)n5~s8O)G&z;|Dgn9wB z$^HS>k698C2UBB%Iwv|XurHMM6e(=AjO&HymijupwVY<%0ies6qSI_mcFYq7$f{JU~4Ds<{-U7hE=_8@Dn z^NA%`NhH^<-e@}yqHw};Xofw~$7aIvwer}5uw5NfY>eytM=yDA_{(j}pZ8)okf*m# z#DHJE;QqhJ`p&2(yQW=55JaRX9VImB5}F{z1f)xe5D}3s9jQt;5Q>0EZz3W@dQ&>m zJJO{iMWiDTS_nNP@7c3w=DOw@nKeF!5ZNKk`uzxU8H@(V z$K7TrAq#{aK922Ll$XdNE2+_KJMDxh$7-Od{ z1#vWWUGdlQSRAP3pFwuU^0a+J6L zB6hmxl?CIUM5SH>D3$}5lb8b)fNyC@EG3X27(?BR{ZCeQnb&WiR}U_Hl4dL#fg}jV zXywXAl-7^Hj5q-oxLe|a^GMgfjEv^q`BKI~DGe(#8RJyyNKjSp`t;am{1r7CkL@1@ zbtY^aN2me2s*z*1S~Z<5F&3kgw{=%J#Kb~{zfS<_=-kHsC@&bYv18HA#ZJ(`(Gcfa8?cqqSw zYHyp;@1OX83Zen%~o0NUHyOg8zYmS#M^(NR9%--4s(lj|#iMz%f3q zIxjO&MLiUH0nuMCkOrH-X7fyrC zPNLFivJBXJJS{LB$5Wz+vEhNxg+z;7Q&h`>pYfBt;QR@))H6gk`=Vbv<_d55d-zuE0?^BgdMQF+O?7`5y+3N;v8*%#sX*ZxU2Q)Gu>5WdfYk1{b8~@BMoP-P z+vrmwjYvzD#nPA-1;plzmyNzPH1~{_xk^1KCM0X=?Y}wAgMEK<3i`7SPa#bBmkv&w zZ8h&F@CuBv{8=ahRJilM_@_U!NAI4~>%02)HjYIhQVZ&|1?weaVXL4BiSQ2$nSSsz zz42H5?ebgtzln97&75ntywMP`>^wRI@lkdj(8+mAF4~}GGGYL_kkMpOXjv@leQ;X6 zglZ;`bzzRk)7wyp>$Yf)@eSr&2RQQ8!XTGNL(w(2Tg&U7QUn2kg62wFIU4LR_%xWu zsx)N6EDc=QE_A;fZanz(jToEtC@L{eykR;f+aLXc61mZr&Q6@`_p}cAy#a;W$(n$rH?b%!y_;P*-uF5_-Xf-1zz#0@1C zUmXjf&Bpy1da4fTNiyORE@q3^m+)IoS~=FnKI%%iKU=@D$HolzwxU1KwU;crE~JcH zUvMsgXlK%BYf5{0^k~wY#Co^s+>#ZWWMx|wPwmWnWwbrGLsX#xxFjK-`x&AaZ+K|aSU7l#>yMCK!$cb}pkSUg5 z`QvB(_67H9Fm6|X^$GRQ)7o?aT!OZ=Qz3BApBxy4=4G6n#h-$2Zs70}ZbM=Z7Y9kq zE|F93myou$zZ*Uud1rcYE9?>$s~<~{q{$5W&PV}eTTZOY&<6M8ojxGZ^%#LwQD(c%prSPqjtWJ(LJ|;tk3#us%#biG{3c8 zi78tqjawcpw*QHcQy~RL`8Fa(+w0L3sHr_026atN67Z*mS9sUG(OrgLW-@V)BidYt zZ+efo|F}ER!M6kSAE8gzXuMWxHbR0s3M=X&H08WjPjatc>9Jvis~~7jWm!HrX3$=gETM^6CiD;6 za$QC|{n*58j9vqC!R`ylS4wR23^Sr9LSmZaTc1A@{NNh)QMO6!GPUvB4}x7$v{JJZ zOFi`E58@H)$#@=`C9A7o-o@d{gW1N{r-Qt%{jRjfQR37jez|?2i9o_&k65z^Fm9`;XKAuo*n_9`46xL3~91F$p;ii<56RvZK#iX>m?|gj})5IDl^Q@ zj`-b(g)r@+hvS5fp}wZc0yJXBC$zjRw8Lhn{U2H)TjEoBai+gDhU?2e@inlwl%Us~ z-X8yUh1k6h#jWGvhW6j~%$%~vWr)%Ycose*ZQ_~GG!jFG^ao7=HMj+s8=QU7H*CxH zH9*yCjLJ-(t7?)67apKn&%5=2iAQev>M#m(q>;86b6k)hJdhRZE>f5#wH#iFG#)HH zG9JVnlzt`cI1btW?vmKJ>Kh6shvm~4CgvoRV>A4*V?`n}Q0&3?GcoF@kTOc?E$P$P zts^zLhE1ps&cUoT4N*f(JUE4IBlAH%vFKP%{9UOYMVmkpt&UvS=ROh5-zt0Dn)oTN zxe>dZjl_3umD9q(}a>q`cW@DqU3vKuW>#ErGmg)xomyo2W? zHwai7-^~e<(;~e^`OMz-`xKBNwYVfEbfB#eqgO87)YCr=5f?V|A$5=ASRvU1dTnhO z+H7r<38zP134E}_P?xE4>)oKs4X>43f+eCZT3Pn;1}6!PJNU{HTfoPHdAFPdeD}Iz*|* zADe2qg!rQ&KDH@iswx1~pweEYxP-n$P^eRgGuY*3=bc@TbbUo8v#+G%vx?epQ8Klh zOT^grd-&k@#s{xS`qSVCx?(^()tE^EfTvyTMiuwt$E7Aq&6HG8M=s|Xae|pN6A*l4 zBe3{Y?}*J~rbGPdJ>K0s5&6qA@;IQ;xL7GZ?&WV*vFC5<_eV4d+KB$^YVckd>a=veX&1H+S*_uqZ95L$z$ThoDosbkqo}P}`d$ zSk}spXwQwhid;{I;C@yTM(-vry=9L|kOWFRc0`us!(^!~v&mve_ZY;Es8O+=M}v24 zi*Cy7FKTiqN&zTQ(+c`Lg1)X+V;AVm55^JdaPN*^_n&<~cyHvqJBoO$)=QINRPDTt zwG)eo05{!p#8M~z*^%&gveO?Po@Y2UJF#4_GL)g@SE{HGn_YOQ*<1F~pTfLD^s%`A zvQOa$8aj6cW*Nqa#ZI!0pvyD&Vt19uJKx9rxS{ZJru|i{7a2JX`{t$FPlRoR9nm2F z%yyyf-LT!yYD_l|Y*G13K`N$8ic~(I)YS>=U6+S}?mW`CdkhUF=iH$0euj_8PiPfy zH#6B>=0HUehE)Rm)I_q&9_4v%@glh8>iCJ_(SrP?j;*4>5Wo*^P76r;`O~hE;={8IZth-ZKJBc?an>kmlzEj zjCZ|RF~yO4AN#{DUF+SXbh`7NE)FBzKIo zBwTA{3$OVL>Uz=a?OZ4}@r1DVTIc#I`C)qA%#8U5aqD-@f$y%-b!HIytdTO*<$X6- z9dYe@3WwkZNO!t~s=Fq1m6@k@*?wt3QDve+H%rxr_~*YNSfnC(o7jBErO(u3Ei!8zC?HY`T&-QsBAoKunromeg5S zgjgm)#)S@+qSEPA2;$&=QOCluBP_Q>D8#S7qUol{^VnAN5Z{OkFtNL2aV>CFfnNb9 zWo70Uy{uo+jk^1M{C+a<&e~TtDZ=bPf1R&=ZfQn|r!!9$Bq8{fmb;~Fn75r*DYP}n zszlUrS?Q9R93F#xe6{PHCyRE@V!HzNaajGaaljRIPF#Cms8}crXP<1EGp4vU;%%Qr z|Ox2UKaC9{vL6!I^~&i=`Hlc8Q838rew2l z?y9_Fw*v1LJ~uXtkfGi2`t`i3LbO)gVW7Q^qn2fxj19C_NE;JzDI2)A3T)>*lmf+C z+IjIT*6u9>Y3t=&jm7BzIRf~Mm z%H`{7j1>~`rzt?B;?_^ku{@1*wdv(~c}43U``SP*w4<5!)NJB-LYkmu82a>wywBNH zWVA%{rDDyQZfkVC^V)l_v$D{OY%gw!63npoXpZmGQ9bJNfe-JX#6;oBp1m}(IC#3h z)kO82nV;NQo7rL52ANdATtT$KTMv(|m_ukXA&1>4=9-#;`pp-c*H$k|z1dUiLcO@; z=h04ie_?kOztFIRQw>esd)bf3J#s!8%cvJ$SkK5Vglpe;FiRX|}weiB|LMPS!>i z&w>0h9w7^JF{{Kb5OVX#aN_C~Qm;8(%H|jHR+lSr%HS|ec6urwu&yLb0=wSyt97O` z?aWIR7c;Jf2)8)$tGyhZrMW69MWq(nd}~tCM|Jst5A+D=5A|6PC3r+M4+zPf zt?|+7RK=QPzA1r*_>F(Bk1*f8so?DKRO`tm%r+cyMwtasOHg zv5nT$1Q|<*4kOPr?Fy(X1^c)^=pMP34}KEjQUwhS;54TgyG}C6F2%yf0%H+pxl5H! z^-*t{!@CDwA}+4P;*gcr66ZYX#b} zvA35rkm^q2ZxY4gUpnuzlN^j`K>hAY9Vkkrs-I>-ruO-|lvoJy+=Om*v>N2#WKFGl zeeU>pf(b7Z8hZ;VM+9z_dK>P+T8s0QpF~axIfhITLthg{4?Z_+Kvyp$Zs9;U>2mIc z1C)IBUbwiQJ0b3Dh;cYncsE`zG}&#W*uR^4a4=EIA?hE~CnFVAWfow$9hnq)9c|xk zR~Bd69^0DdD^h0xV$r~9ig*Z)8mQ8-OdS_YJpJ$_h}ylK4V{Fbi)>NyPAGvL zZx~x5Vs8=A#IqJRIM6fq1N~{<*u3ZqVJh)xZh*Ld0zP%>g8C^*qc(PO5ob#qV;wtY zXSA!d^rpl%@utXL%-WG!dQ;*Ga=lKY<%*kWZ(bZ^j+U_7r2%ry)w{%5qQfSYeOXP$ zB-u0m`E0&}AB&4IlXZEYR?h?WK;8{ug?SM-4LwU6EIOt;PsDP+tH(!31@2MUtWEH^m2zapx`O!)lB(zz%np!mYmEHF+`3^~%g2k)gF4>b0I9Onw$?fQySz)H&3J;y$AiAwA`t{8Rf& z&Y!6ythbt%)!!eY%S6AC8feti8^G<}sHkf26?ai^HzX z5)$u#WrEP7MWQ&Z=*zn1RNu^0eg~|3VswH=7Q5NU` zX!N0yFOR0c!S^DAlEa|FLp~4kKz?yQ!@`f*50214YM-J10ipEy0V$9=!Z~3|*0G#$ z*W5%xlI{pFo3KWaS)7WkGoc|!m1Qm)XtEx86pEu$55W$FIC|$8`1E$u901ix?uZ7e zJH82*pIPDo0wVtSX`!#OK4j{Rv=8dR-RD4 zyYMt5#wwvaJZx+~58N<+nWimzsT*0k&S(-J)vam_IRGYiq^dl=gA+vj4N`pLM6Rs5pQ$SsOeeLZv zpPHCReQTWk-1^{g#>b(-4^!EizRwB6P1b8M)QVrxU^ym9k;8h`qU^Y{GFtAAtV*c z_m@)q%q%28knkvh!dz7B5woZ|6FruhUp+KZJoH_ZY_H=aKNX7ytu>hudV?(!iVmYv z$HfINz|Bv#s^{te0!RJUYp0!ORgNpxzQ2VZKVf|A^NX1*1Q5p3US|bM;47S+9JC4} z(!&Ke9!}(>2X!*6SYur^ zuM@N6!@&Wl98qd`FNSRfzhvVR5zknhZ-UFcdp6=Vhg|*h1GV~I!~{KE^nb7H-*hodN+7DL*|4Gat)My+~S~EM>u^kc-^v z*)#{FA+eE+TbPl{oH4S!GqqW$Af5gEw0fVuWUyBR;~lKL+*WA>D{tPdY;fG#seqqX2#(d{@s@Rjbv~u`-h*UW&p$XFn@aOK-VN&9&$8WJwdQ*JEpB+-cbDDk%H8 z-CYrYKFNH(z2+2uKZXKmIo7P&=PZ0T2jsRl2^lhbf9?forQ40<2E>`r5Ww26bM8|~ zt-L5OJ!AG;5PF~WGoZB6`qmzjV&e%$8k~Bl(i|IYZ`i zs7i1JCgoz0z=*F#qZ-$YUl|kB#U4dJoev7fdZ+f!W9a{^4&Z|_xC6jBY#T<-WjQZ~ z%R^4_5rd61;~7fi&?2Gz5=?WvKjYTeiyJn_7T9{s1V2+s!fkJ1zw`XCA`pyg+frwZL=uxruTS}6I<+fejXUV9APK=_uf&S%pZx_zpgRn#g*&%^~)p%t+(rX;FgI*ZZ7j`-p8|_>6iQ9-L!dfP4Pq>mB-* z;SHEGZa>q`EsEE9?}u~^`~9D4;3UR}sXbpyC|vchJmJX!)iW>q8p31%;tPmpe{*WI z%6^o0$G?_YX71!CGnDsFn%DNs6tU&yu5A%tfCYoz;T0BKne~!WzlZp}Z1IQLv>(=s zKTNkI$rs>g-Ft_6?0%%*{{>6=ZIkLGp4&HKAiLf^s!1|=M{2I^t6?KAwK?0KSec*n z|Juz9sBkCX>AcL4dsRmfD}%jr@u*O6D3k$b(@&ojImjCX^M5K9QmU&puAQg-VzIcX zuJ$#$;Cc0Pl|S`5VeHS)K=0GI^Xb-TaA7fI!wgAx473AnowG)yEjy{8-D>PU9dtLB zxt`z}7DCx(0Cn1(_rivYQ}-sJS3d_o-tGJSdZ%$4(2v{ovVYJC zC`osv0X?;68U{~McLpni=74c?yG^b~s{rle0NUV)k1Ep%%yhhJ9my!*!7MDLYxlW3 zEEQ)?nx~br2u%EZd<19Ikkdw~$}YuZ};P9-^I34X&Q^uTK@VXj4^GDE1>@`@p|h zY(Fjh8APF4+pcKFs?YZr?1lkd#M5&%B|l@@#WOB~9qy_vgj!DwjD3o2Id} zKmPb$O?JHBSd5@exmM@s1F9Q=AXW8j`Gd+=h}JkctFbc0vB2Hi$;OL=IkYcur6Dk- zbs8vJ-^-0?HiYUwxg;5h);J3Lhn^U>=Z7qJsS16hZ%8~EA)dUL7(!h;fC)1zix}3T zi|)E^I09bh;QGZA-Tv##ubdn7#u|L~BFh#N1WL0NA)nB(tyC%}?wDhJtMBS6+C!UD zDv+8P&tC__2^m$+Kx-WI{$Y~A$2urJgXg25;W8swhN69pGqqB|o-uZktkwJs2@kvDxMspUxuh_X#xI4}b{|HC0 zn-bc>xXnAd;%ROJ74QgO#9f5QG=p)QQhMEU71Z7EF`&cem1S4)%Pw{!*2cN_@9`C= z1@B_d_OLv`Je?~07xwO~Pqqi-cJ^CudnxNcPOMN!Oe=ZcZ;jjztp6?dmW$*y0E0#P z4@lm-l!ik!hJ9-TSkl^tOa1KkeZ37yix8L8D}nY`pOn=!uk;e!asq5D@$M~Y=TF33 zE;t45XlI8xSeKWCa$j8xBCbWrECv}%c)%>?yt(3)vw&3F@CH^(VTz# zSHy`*_2P6s@;8{?hiA*Q{qSHT!btO{r|v$BYkbqNu)5 zJ}5`7$HpzV_${(@t~h#p@COzS;{0CAiNZ zrtR~-`&d)R%6V?C0RX~rCiG7`ZFuHfbkk+0=L3@= z=g0fe5e4l%w>^!W&54e;l$;g3o%#$c^13zap{X+_UNb9)b1uZfx1|u^r$YgBZ=W<{ z&X@u@OPG6HER$Cz;<}`RDR@>^7m$kB1@+x6N-TBmMB>t5rTzMcZ${Plyy6vF+wlYX zK;YeLM!31(27h~N+J9R?`+6(s=yO2J*{3<+YQ)8)>Zgy&k>9ZWDH2O)ubkMW0?z%1au?VEd3!jVp8_0Brw&1R9m_)1{fYQw$ zU`M<8oNiiA`xwCSV?D(7yuBUsky>K*1OoTYRA!$zDLc*0z1aycfQ?# z$@#gmxT%Ui*MqRW5S-7x(_8JZE_>&6!0*<9)AVWBp^gpJ2WzUpX{GrS;!(rt1e}P4 zLx*uQdxN#rv;KWF^I+sz#EU7e16gY6w0UdZjxJR8lZ;uPytLmU^_z>-khS^9sXHgV z9Rpej$LE#i3%{HPW@Kig&qL9^n-IquBu&API)X?z+C~uxu%k{kmzvv}$u;TseQ~L! z!Q|Iw?ON+z6gPI>d0Nww=GklS4D`hd?+$>zX&Y5u zsGzNJc_(g|7(!f|9s$cUP9R1DyV0pp9U+aP;hpOtARJEw2FEiu2m+qt_JXeM}<8 z%yWt~CERR3^Bl=N7dlg&BQL==Nt8KyRfl}GWmY>!clMHGa3jOpdtk^>d5u`8U*?ys z%&TBQyI74Oe4fzRvQ7Udy19-7=b%|AVHd^{xQ7R?o|5f9DSo!z<@ir6UY#poXaC2D zOT!KiD&jE>DW+hO9}Z?mfaAO)#`8lzYQ@Nx=g^2Ghy$JMdeY7_XuE(OMj`1Zr2G5v z1Q&)orSTC@A%-0{x_fD1AUF2-j^|J&W7j}pE6*?I+LiZfa>YW%8$SUCe?X6?kS$k0 zR)`YQWv^b#L<$ZDTD=&~AoZj?;Y*A^EsFA);%t@J5tUgWrHO{@TBB-6;i|o*Oov06 z@LGbeydh)1>dTkco&5U+(%En*UMmXQc`Z;>7G_K)oDEV@q`W#|ew8&92S& zan&riGtap>1V{Lm3;_!Bf^HvK&JAQLRL^=Axdg9f-+-f3`z~S2B@c~2(g1UAb3oB>~DY(`g;33UiCl-DqW?r7B%ElRruiWu_#E1ETl5*vPPGl z2c-^OKExp_+OsD+QvO)fVOc+vPf(xPm@60-cRc=FhA@)Hp}hiUf%?T+Se*xEG#4HF zBN9yaFL4Jf-{72SvEnZIe4TwN3vrnF%k%e4E%M%k$w{72R>)PaBWea|P8#|7mScvP z7zmCZq&vPSvzV-QtpCdJ_Io}W(vQ=Cu6MSf;<(=PwDSvBmm>f={-q{`=K9L(xJCE^ za=|jL>q+k*{Js-L@BN)Sc5jZ3mx^Y{L0?=^e&4u0vewVsKRbR{y_#MyZ}IaB*U^Sq z_1o>1yYH54d5Cbf$d^+6xCH@$dHhPtgQ2xA|-`&qMa?Ah5NRTxZUC| z&<-V-l~8YUQzAz@LLylAKqL1|E_s1GBzRA#hb>ttJ-?(WiKW`;ynuuzBiZ{ixEJB? zb^{#{NbV@fi?*|@6KFUY0W@4zq59wXN|h%$ZMxm8^f1s7rzeyZJ7eY}4}b zCo_`zCFMEi!N4&spv3^j>qxvJlS|}5+qI{+_i^1nxxTXtJ#8MWi-DYVo5?+$Zk8%= z`t|KRlbut)eOl?X>O`ndX+TX?*bYW{e`)($IpHBW*87pFBK?DVjuO>N{>j>3#wn2# zAs(cIx5HxkpjpDGFF#4Nw$vphz7r0sbA}zt1uO>_p{k)5_(UR3g^j-TgIf}=#)+2p zdAj`=2OHcET!`gVS{%1>W97(M2KnvlHpdOB0&-2YCA^OSt6Qolll&+f&E2Uw@QDPP zWZ!kHu-ML_h-m?_N>4w9LsCB<}#U@iP73*7V!EuG*<5)%1zb z+qv;8tI1SV$L7U3R8{vtp>I`UONhx$g7UhHNK@$<5W#ybIkPYwO^M@e{le;q&Wpe;K9ZSOkf3R0mJn1_f>Nxm8jfTHmP}PDRwPm zl=pUsR_a|%*el0z!8CBTPJ#)M&U}4edJZvKLU|8JmM89i{K$H}m3-?_iD{$5rah;Z zg|cuTl~d!v&y`}EzBVXjx;H8e#JJ-riJCN14`ksX=XtgVv*YnhYBYyZBbVkJ+)dTHzIOu`wli9)!3%{@rp1B zR#^31@7mz$(Vr+w-2_*fRBut{%r3K%geqK8{>=Z8O9({WEM!`m*FV>`8kD%`wR=Zv zGdk$e0k~VR9Fd+4&%0Kv78BK2W!JFXNm|A#Zqoc)99gu}bmWYWwmFVkafQW;8>mPpJnU;1 z+8P5bM-4_1OyKCfb9@4yeml@CPbUrWt0nN#Z;uI+RhW5hPPrf`kVjxgc^2@7=`r?S z!*0gc*0cheUYpJK8lQn37(A&L(enm$OHWsR(a2*Ix{gGE+Qw_5pJ;kKHcIpBVVC52 z(^P(HPLhJc#T=>xk_VBIxwACZak52hxYz75v*Kk#82 z;7*rSvIuxKEJA+-nt#$TGnh1R+|S_hQ|dYRstAL+;Lu~1z~wAr&anITIFLFD#{azn zJ;DOg7wKkpKZYW61ieq{wP_KcLMeG+jQ@p zXO-RB{!Znm3nI`+Tf-T@!QU)WlrkYakP)->NwQgds=e@cH?6$O9h z!#CI3V58ayH2q`2m_agKFa%4M!qwLvaIleDbLGazlOHd1@b$v~S{X@3w(Y?#Z}ygu+9=;~#Z;{}4VRn-fQ?Jaa!H!jCk_B;|e zPLM=1$oH!2hQ_)9LS41M@GY%P76DW=-EP5!`{9R0w3bY-$IM^^bv7N#aW~wz zzZlf}Qp$Ti2T)!GLWj@Jbn~h2+rzFMwQ_%ZW58>5>Df2CbrDvNIJx7`!Gya3E%8wb zv30sk(JwJP1@b9sgg^Y!n+EN8xDG^Hd~4L+|4DyA-X~VSrPp9E<>_rb(*kr+{KLx= z9mC0yhegSIXo}X`6TchDH@8$xL>;nHbI^J8zk7woy&pd*^wm6%@GQ6kXn2X5{-5x7 z4~@@6oQ|7mw;Z_+UR?jZ6@xvIv%DVbf?G+}zG<{S9klV&Nl7v-*zVrkY|`q+>ytQ^ zwA>hUmmGc4@b(beW6p#D~UWT8qxoaCe`8AQ*(C;3_TC8)4Se>tvCG-vb5Mzr(=bD z#@r(90osIK3152UD`VS%PwR*H-E_CokKp1Q_?&wY%C{}4ff)UQ<-PzoJle?}n*+2a zcN(5&Zz+s@@WIP0#=K6i7DTR{urn@6kYZIY)R#IO2)ZbhlSNUCI3^>Y0{U@Uro@0S z1L0+azNES1)nVB zyF2rxE0<2a*DQfX?G_vGDp`+Z{h$n#N!K5h17w6Gm4LiAUxZU>T8(sF!*6yhOUM>?K!=G~N!)%`o&#b>w?W4Al}RJD zUlWItnsz^)N>Pey=J{>@{E@a)zbkP4Nrbf`XtuV}zKkKmek)XAi#tHOV6CwoaY-|( zi7SQlebVi8sr1+Wf4Dpudd&R-cab1eQj~$(m+hmwF{M78cO1V$L|DnB`AH>QQq=+8Zfh^wuE3{?RTNmxj5K@bOZTs;4NIoR9U!gKYAmo3N#R+yyMzcG5jga zQoFh(_Ep`Fpe)k8BdZL5k;uPEz&~gqC0$E$+h9B_q&cLdGnB{+r6y{fB_rQQ75WEii3ziG;p5~DsXW2vsoWm5pnM=ki^b7FL%q04X7uYfb=z_D#(duH4 z9V;v1x;R>e+DSpF>Q3=ttqF?3G5*~dw=VHdQL5hV();hfO-ak2IwokQ)%4_8RL|+y z?AS<{geRUz7NZ@qfWB8qv;hKN^6 z!}Sb>gU7&W=f6MShyS3Ki7^UN#H%QsGp0tv8FHYot=W{^n?W>~DYxz$gI~%`Rv~`o z?=wR=PqrikJx;Mng!9}UI%v#{`FQ$lFxf$~DtopfvziZAh?Ep1RaPN}`3~dM^Cm(b zqnCI;!RI*jL08n*?b!PL zQDL^0FL)^S*GKbOw*)SSTx-Mdi$0E3OG{VIb*5Ustc$ki*;?4YiYw!U9o4 zt@_tssPQ6$F0bDYQZWAyjAo^Fp}9aaS>#Nv(P_GJH!v$m?S4;(?bS=#JPWA>Y4mgb zv#tHnW#i|@l}_QUbe-j_NzAaXXBbAuL_d#79XLfAPc4B5NjgPRWkXw}|UHT`O4!LldoU8Ok4@E(G z3r(h}BCo(J{DGC;%MP{sotov{eNQZ^AEcOi1H3FS#OuYAk}D3vFaF1T|5|bE)xbYL zQeUI465naVxOXQo&6vi?<((cEI7=;d>RpO6qtvpV!#~NNl~O=nggF1?^{E>831X_T zn{)yP=wK>zO|U9JpwPn=WJN`dm;HaM_E|&MVOS+dm7@77@hUWeegv*P%`we^0Ar86 z_F+{x-c!rJ&U%HerJG-YDk`2wH*dYT5dP?`Vtm|#P+WVA7`%l7>Qhw&#nLy#+xFh! zNog|wr%?It=Jcv@)J*^*q$ovJkPELBA2K-VC{c@B2*({?Y{9pooSbc@jl_1Z((ipri*Q z%YNVkpC&F`mQ+>3L(^_>Qzoy}DJb%voAp0y&`lnAZ39k2xvV*?=A(zd!|!8>`W8z= zEP2u>itqm(k!Do+*51iK@-Q!uTj7G(t3O&tKvP^ja1c+EkwO)HpZfP!2V(|O*E$Lm zKT5l#1IYN%41j8y2^55|!g4O_4t`(|78fQ|XXKa>{$w*=uUEY?<={`RRHi zI_&IH6hLP?(BID#;#64imy7%F8h%EQVlwFE@nKQin^EL^oE071G0GLZpm9{Iq9zYIjD+Xly zoT|p3tWfXH5zngY(S=1bkt`DObKEiVi{Ah&{pD%mL=C{J@M^y%T?4$v+#SR6jtcD3 z`gB#1+~QmAnNo}P-i>VsU-=cmUmdL0Z`7wmB_|oB#tYIPcbo)p##?Ze_>bv6F@FfE zO5n58H|t3hng0K5`vQm(L`5vj&hA*<9nJ5Wi;oF0>&$$2Y+tB!$%lgFFWjIFAko6q zBEwY(8JNh^gR-HIKvu}D;Oetk)>WrJqpwc8%Br|pztgnAj=(5RH2Iy9hP^r`mHh|H z4}xlZr@hQt*eCrjo{tI*^*eU!2M7%0>3#3-yaJS@^d>j=LYN0tA4)Ayl7-#;H8g3S zsJuL`^N__fx&TYbysl*OZRw6o{6(-2g+Eq)*SY*PZj`TS_`jGg<`jt$7yO?{#h1MA zBkF;UC*Q5yEX<%?-<;B(nJoKpm2=warxGcJ=oJy5xoInu`WE^etKD;HJa}IZ)SU^E zQ#qQ+b#mJ^?dKy(SzcoRkhlgA&2775TqBy*Buh51TQLZcqZaXb<^xFd){B_b)$}Cr z)r(nn9_M`lvi_Kji4qi0X8HZxBZL_y+z@DG<=A2fyD26@Nvlte!eV+UE-HDGM~ZJ1 za_97`(o%A&&5bA*Z+{dsv1|<@i)I=(S=dQEDVsd!Z~f~Wq&!HuU@y3d5q@pNk)u1i z6`1l4#yyrLb$;pMxL%=8l=P>}%sNfo2~9L>Xx7 zO$ef9W3tRrJ|E(TtNG^i3l|XGTL91Nbm~2yCJU)?9LREsg)lA4oWF=u>M(o-TwN+C$xgAygUYsTy@Ut`VwVma*)?_Jmp z9}ENePd4A^(RgO!3#esV(w*yZe=-RD3=hNrI4m>tc2*?7GlYW4<%cI(Hhck0#%-$wEs;nsWi2d>4z0xz9*!h3nn2i^7*IwYS{ZLA&3 zzB5A1wJ3S=EuL-e4MKOC02y(+q$QPIS7d|d|7Za`e*oB!9x>OnGnRHnkrJkI@Ai3XYPGjB55422Z$%u)_BT|J(O}eol&Me$Y$>5*F_3TD0-s z`}4z+NY&%O4MU-!><{#)>kfX}Y>&cp%Q$_>UwJGG|1qybs|H^f)mQm4`;2WLC|`Tu zJs-cREUIfKJwrS{|Wn6M+s zvb%7pzhT9{hMdnFIFv_8DCj*lqm+zTW`6SJ+sm6*8718e%+h?H`Hbo2IF~d!evj(q zoD{7^c`t^0cb(7sy=Ru+k6r2e{qgRLhGR#VR!+527Rh_ zt8z#R;np9|REel@`u>i=ZC0Ft`|~_?@O2$a{ZhUw77ENi>7F`du%j7Dxiqn{+Q{ij zU*U$e&+HJgBxZFh#*g=^w;MKeh<;=161nvwRl{Cc&k}1)@4v1Ed}tjUCQ~)@s(5Hg z;^cpyndJzD^}2nf3n%)=Pmc9z&kWY~Le7}*3zq_eNIxX-F{wmUW#jr8##qm6 zNbk|S)Z?ae;W*z+KaYmjz;tDupI)Mtp{%{GZ4te%>*&9Q_>E1B%utw{Q|wzsCiZfy z7vuyf@?5bb%Sg2TKU{rfK$P9OHX;HFNW;)Eq>_Wu-6@(lB=b>CN}NKZ)xMSR-%^4z~b`0Xukke=CTY8=MiM33Ob zcFFX~m=s0bUsnEQX02KVvDRI+l11lxIGCh&WSG9@F`4i``M-_m=FoPDAA;uezM-Lo?&EntLArP z3Z9%stWc1;fRIy6aBuPrs}K-s#6+Kn{Y<7%_-L*$Hj({wZc|Db*^ zjmq%uThs$B{|68Jxlr8j97f|JsUpx@%zLpBU(|@_b*|}_m(@eT`iWWF=IGGerp|OV zlT*4T&&j_Pp1ZZ=bz@!T0cHi~9LdPypT1ZIIO#_@tBfC5u6Iky*7A=$2W!V+o77)~ zmE;rfUHrUHLUR!nE}uSIrIilAq<;7Ii_D80lrldB$5tsN3HRQ3(r7|0F>mf<4JJj! z9C99qT)u`s4MUo0Etgj<12kf=+z<IdW;u75MXk;v1FzMd6KIX88drOkByQiE_g*a*xRf&9OA;tHx(Jk49y zEAvYO02!!d+e%=)hW=VkU{?P-H>P-iCw+AW^<58y5v>Oh`0F)x4DzJA#uUm zISuANvR|M{cIC7t(g($C7*4)3DTt}si~cl(1#)3(>cxUdckf-gD9E;Ew)8zR$H)nW zFYkzm+GrIC6#0+EXDT&DQX>BzH{TqF5A>lcN@bV+a&m&R&RNuGX5fTKCxMwrBlB(D zgYusB=MWcR$tL`N0}QQLsMxJr#o$!X>}607D3+DO@ZF_mb3HNnNB?9L1?c??z#CmM zmog1F0x>nalX_|Q@7yFBt2 z_mPUp5mxjyfV@Q%tA;i9e!)=DbYPqaYWo_cfE-hhvRiTr3LBm2NuB+Y)k1{sKJ9J6 zGr74DMaITy1i|!Vq6|Xp8aWS_GMsF#|x?mgvWz|R$;|(3 z)BgjE39}$=nD<@aP*e`Jqad7gtmFuut}+lpdwa18Wsvkd&(tVrr0Eo09F##i;sSHc z0uEO+5l8o`j`_c6B+u*9(5x67$BLMzozJq60u7Hs#k@8XHR?gzTqjDZyqF1EsI5s9 z8Q-*b7RR@;ewX(9NUCUzw$^fy1bnOhmG{?3j3U_)*o-uSeUx1;vDB>Thl<|)xgVs0 zt{oc3#2!Sk)`^@}3*Ucum_GQ?L;g4Xl;*6w7IW+FhNAJX^m;(8G}krsW@VYer2Ot6 zV7be5xMj5?_@*_s-y-YH-z^xIeB3L??a@fT0h;JUKsNk)SYiU=fJ==Np;Kbqdtxtq z(<`g&2dPX9!K}5ADL|AFxxHX9dt+<;fm8QqpfF@XEf=?kmnQSYZ?SJ-j30hgf8QI+ z31+QO>Q~pc&dcWpuc;?l8?|pyzS_7n+jxt&H;_GX*X)(1Dh9j<&%He}i|YRsK@m9^ zFHX6g(l8y*N(GY<3r~oVL{dITwvDE-s;^ci2%Yi-B;74HoRO#~iZK%UK{`TP_{J1& zvqJABQ6_XQ<}F!gzM6hm1;wwMn;Hwa(L$@?9(<~krMeA~g25Bk4K^P>eV+ z2?RyXZL`Iqg!&#oj7yk)e8I{xVK@{}T+%RJ@7jQa0u3CC^-h0<@^!9@c{6oRh>e>3 z1S4?Ltit+{{9(K>{)8g)oG92`C)%5h#C9*Gn{I6)iYQS!*{8Nw4x-qhma zo#F`7TmSWSi<#}r>w~u!pn{?Q2XC76x<=|NO{vlJ$j<}?nyWgML?%C?GeW~FcJr18wDchjpSER>E zd2j^ve@|_ED$0lOw}Oy+5-(COe5cRsfbsG;X&DvK#rFOm2IK`I_}qknp4rAE!o-qI zbiPxrTw2e$ZyTCJ2kpL$@g+YliFTRNuP?4w+&sIvT#sJ##}wGE0eVL|W91ju3Td1} z%vy~M1V6UO-LE|$1}gS9WHmkf0R42ryj#z%9enkUt&oZKA`K)E{O1W;i26v6wfN}+ z+VZy(a(RiI%1_|Ke*YHLK-zIDtg+vXS|%+7A%;@=(N{mI-M&2wde;rg$Kw|)MID!8 zEaba!cb9^u+3$M|#YoXKMZUT4-*R;<>fK$Ogx?RQ361;+JEDz@9NQ|QdJyyZ+lTG@bs7H=#uO(EiY_|F?OzKk+5kk@%NvrR9O> z!P&$eMeWgfSPg3&l7wr`e(os!*u|i?%HC z^XrXLC{^Y=V2wy|&!k_UAqs}Pwce#~%FHIX2lq(ZpXQqbweimWo~u$X9sDQtkhghd zXTa3!u5406E0+cm`7ay&609l&3HRjB6r+{b6$TRDxY1bv z8AsoXmn4W!#Do>u;-6Ku%2jx&vKYs(@#bP7(_LxjzWGyCB%rD2J^xmb;shyYTS-Ya zW23isj9{qO4vmn-vEiabt2Nl`zXoVx5=4ct&$Vzrt$v9_IDuecC+5VE=Mid_fy#6L zH=C=+S&TAWE=9e#zk5Ia4IPn;)!P`r-jjI95kiX2#Xc5!^ZCnL!56Me75Aqi>O}GB zvWJlI;Oh!bQc2dK3&kDbFh|L`cLYv)R+K*X|Cj@APhicG5dsL zd!`K<*@UYsb68ykigGbcFMCxnl?yme0JnR!Igng{ZUE5!Im9_~j(?F5|9@YpGKJOhacLmX1xolxBu6VJ=g4J)#)wLkp(;sAfWoE=dXdmS63I1$Q-3kF*rD4*8Q{Vc30#bmLfq;{UmZq0Gwg(~tA@`G#W>loG(D0rY zbxoim;!&BW>O;Hg(wOX&PebYZFJLzY1@T)d?zORFtbStI!xIXTd{;wP1REy!p+)lO zNw^(D3XLK{NH!I1%%rLW5#x1NP~m=G-a*7zwAl{kUQlxm8t*p(}RD& zqhR-1U-|+Q;q(<{4X@=t?+ZZu@et_zML@YY2AV4khFB0)d*Kg8hCECW#!a@uw|o^b z-SQx;0Af*QTX|&B?kM-`5dd#s5i0$00a(HUkU;tc-BX|ED+>yVJyFz z!J|ul5t=_(uF5P8xo&+vAZo(^9QfDB|Mi;$)DIQnNo8C_aar%fSBgdnhv)Hr-{5Yu zAYnn&fJeHQ;;}xMH-6c^!3uwNq5cxW5ahs9)r{RD?S@a*x4H4KG!u69?;pCucmJgp z_}}yXiVDkBZK(`wCV~3F59ZOVe>)ax!8t*Rf`}9efj$XO==g#u_IT`#aAD)FT7g&9 z1ceG#sKS)5{%i96N*B~(S>C-RA)Vc_QoIF*m?>wqcS}IB7b1uCBbtAgL&&hO>S#;9{d$pEIIq>7W;Eq$vnbHjS2@OA; zU3Fh9Ry*#faG4L2QlvQUPWJsslzOTD?a25VutogG;37w95IKG%MC_>jbUpZ;;Wc7Ro1^_ZNb zhc37kSO*t8FBZ4x6h3b>gL~iQHvQYppPDSby>FEZV4TOFdC6fp&EyOSu6hkU*De(N zVtV*Qy^w02x#K@s-VAf?_PYi3ef`YZ)l4~ioWHErOPkKMq>gZa*1l=upz-e;F0dJu z&nG4%)EagK)q!%CVYrtz=Q}$-n>d6_+Lu}~tbUYjj71P6Y6_8vmrKX=Ns=^Xd7h`< z{LOmU2L>z)DIbdQFCzX6Kx_9qI(dT{t)sv`;5O+$-LkD%y-~v3-hEl)ksh~9yBiH{ zay&cUZJ78IK9w31r(;t1LUS^g7q117;x(jDv2@76T5SVT7)T<0aO1!H1xt$)rr_ll zO918#a(Lf+ID;C*hS!!A%Z@H*QyPo#^eJ*sQ5WI$!{-?cDFiAN<;Y)$cf7uGYcBRs zWibIm9DIsPR|}Tsu*CwTBzPPKiSZSPIb5i$$Iz#?jpdhs1Ib-Ep&# zKYaYcXgEJJhZ%@GRl&<;E+4q?a=vV7x=ESKn9I;KGavD-smX9(ZhCM%Z2V}`wEZ>Q zC(;O3~xYwORqxlef<*;$B^why)d0w`JF-0mesXXTgm&7YhZYrqx2u}#7D9_>SbIF0`Ei&>aCbdg zNL^)`B_}UJNi?DqE~9ojQQ<0$fiO?Ke=))CeY;N4(|42!`e40^Sz{wSs?8qb8pH5l z*~0C!Q1oad+{T!QQ6;uMVWf4SbR8!1JRMs?P$P0gA2pVwM|$OT>!KeO@R^l^H{V1? z!H{p{cIZ#^zvoB@BCU^b+R3~sf~coLJ+VEEGjA&vIhNogvBSUwbp!WNge-dPXzkl8 z$cL|ts`cuo^A88HqHoOz#{hU`x!5@WRgOj3?6M-s#0?l088m6G9+eVL}TE< zfZFc&OH)&y_Mm_7^cRjD70C>MJ<*X?ZIq3|ZF?NjDk zh;Klm)avML!VhW9gq+(W6=c=V5^(eS4plT#j?3VG-^{%c2n~l^x!$oPWN3n@i!Y13 zvujKXEN-ZYFU;HgD&A+jO-L0H^1%-luptp?yazOC-7$i)I^5F5nS!mf8YZ+-{?4j{q$E=M6GlP6>!Vep{`+ z(=XkbXiqPu=Z9iI6C#>}>$Mpi{3?IS?GgUf)4lg3CzY8=%c7mU`kTomuRi`K62yi? zEa}5oakitsZK`#iJD%&@M@!!H{r;nPLc`O8vzeNYI!Y2!?q_8qkrn*;?fI{B29E6!Pi#}}X(w2y?_ zzFU3Q5DDio7FCX1H=iL<;w4o(R+9k?X3r>=lX}U1kXm{RtOb{`a-R-NJLtFRg|0&R z0I%Gre?i|WaC&ns{(mN(80<&jS(V1OM)O9?C{Hd7i~5= zjV66v0Vy$c0u-n(B4s(q<}hQLG7*t{&(wy+z=O0K|D1NCO9QU;2BwkaA*3F8Sqfk5CY$r*I@HO{a!uC z2>*n@^>)rZX%~o>SpHKr@kWqL>@S#sB3+M|t#t_4Ej2U?A zW1+vqp(3USMFlUY#XiQ)4l;`%L7I$^>`pfNnw=u}|J3i55^1{ug-oXfdK(=0T?BJe z)Q}tZhX7^y;dXelqjB>!zg7r3CNyea{pI}ooX7<=4PAV)ypOf(x5IG%oKq5{rBMkq zIA$AbF?SNBLb@RX5som0q`h3AsS0A8Fx7_2v{EWjw&RGxPG5Nf^&IjjX) zc37yL$1dg`=cf&9eLbVIUhrj{K{97p=;8L;uO6&h0QK+LfHShN!$>U47Nj&B1QP1? z)<=Nkhpt8|NCJ)CXENt+8ST02s_-#uwsylM5w3Lkp=|}mj=L&N&Sc-XoXWlvZ7~SC zDXHwriYSgODounSQCg=M45=%6AP~zRMW{XMq>u0YC?zm8M)#DT>eR5h*j6Ph-Lkf{6= zY6q-xe4IPJ|J=lAu8@zk(>w=2Rb*cc;sdD9xi)sj^W+AL z>+q^0oL>sJ(`QXaY1z~oeizmB(bkL|!1e`43hVsfj-kFkxb*4g6)xRsMC%u`D!%1J z75g+9_e5#_b|?kZB|B@{RW3AtDQsHHlKG*X&6L0>C(H3-eRJLX;r5F~tu zYEt4Yv)n$FIQj6*$UstV@+)@mY2>@|);cP&`R-89>l9xie`Qhf{NxH794+I>*QC%? zt9exld;ZXfdK!xEE+9&wkgEIyqd^xR&Xccsq;$B{bcco@q-tqe$RN~{QeKK_Wd1G! zAD#I7R659SK;wF*BB&=mPKi#=nQakC(}AUP?2m?sLWn5Wg)ND?qvFB+B#HZ^@oZY# zEAnMy0?MjG){|ID!yLLXu+eO;v)icUkN=x{gTqd ziJQ({%?F*`)!<4#t2(~pPKhzi;Hj~wFRx?$^a%c(za#%cGd~ zNFp23HeYZeEdjRFc|DS=dx{ruVhivEqKw<2$Q)P(|Eiily}ugq{zPh8q3tM0M?zly zJFX}OVh@C2WkPgF%{Ghmbs!0h+*aPIs$(uS;zQHz$-vTzDmCBu`OZX7vVx`+w<9I-s)0;(OXPEf&rO$iOB ztx`ggj3V8k;8%M9r%b#vQ4MI!@RCx;;dMUPrvQsfFmI`20JigS7hyx9x)$Rbvv{%y zxe#5+IiwKfiI~|t{B`@`be^U6V{TMU<^=9eBbTL`TCT-sm1+f&J2)?{jI_LNowh|xSx^D5}UBTH$%WppY_;G}6;K=QNa2H`j-`%b$J?Cd<^rw16>DyK?{;^6*ljkRrl4gUf zh6%M&@qWpATKJK|%u&z3=vO;?MZW$({W$xD%IuZ|G;JWCYo|M@jsZ)W*!fKD#lq?t zh~>8jTbtv2QNGnlCHOMkSj0l|s0>p+WYDVWfF$rUwz(U3@jV81ak%c2*P@XjF){HV zPL4&hTRN9C>NU^hivL~k=ZHof7>j1I3^6n{qPSW;ZYBHflGe)P6?Gw&I4arr3RR@b zZ9g=5IUrowF2X~yE*|MT;sZ=;_s2ky+vr}ti4)s3 zN#Wj*pdf$!;cbQs8XgzoM1i8S4&-b~`i#wz(HFYf7hFrV zW@C#Y(F2^i&HFZFi4A*Xm7T_-loS{3yv%n9rugxSPb}C7R1a zHUKBsw8+`$;oUn9 z6_S8q{S2=J>Tf96M zzliYNRH-C|p4-7;$$Ko}tT=MIJG3i5^57+cUAOTqN9Ri?Y{KJtYH43%_*JzLCCs2? z!ba^viG=Hg6d!~J zIyE^L$%%;#+3U?`{OuKZ^VZgIh*1lPX3viH<#A=nIY3W{hAuU??{phJ*#moL!iaSJ zzxL5NN+tEo&wkO*k^3Y&htwb9qRggEIYUU`TeXQeLK|Q)854TNNfD*^J7oX|o`?X| zy}~xh-SMLk+UbAUCDZ+K^8>wpqKppXSouXk!wKr+YMf;o|HOq)P3mF!eUf|RUzb;Y zJ`A7Ut9&T?*#$m36;$aZY|-W&>U%KB{FURDww%acxZBkzkaRb2JgSJ}{ugLiAxgQs zHZ8GQ1q$*-Q9o>FOJslzU5EK%bzsda;(4G8hb6sESh zM&XBNPOnd22Ch=+<|H~E$UThL4?SZvUBwvuhw~S%My0f4Lw1tMm&Q_l?u9Nylvay{ zDC>37QQ(2fmgo9=JKDvOHBcs4BghPvby6d)VeRS4L?X}br+rYpY=<9>`-iB!t^eTa zxDpyzF^EC-@O|&sNBm`ZPnsl+H1@b!!H3(Aa4)uLlcbyLRL#2sKG1#U4D=t@yLR_1 zTaUuIpFcIl-USTa9&ozG4T6U~(u3mKExX%RffN(-+d&MxCV}(WDGbPHOO*ZGLiqyN z>b+E}9WP9c`KKrT%KzP`-iv$|-r911#`;eM&LI;iaSEN>vv7Ag=8R!i6Es3Myi)1w zBuAVb?cDX?(TCn(o8$3Hhey?F;f zu+XfN6~&+Oq$hW)nBF0YBA+7p0Bl*=JH>|9^0a4g+hZOM`H*?v9%GoligLpq{fnWx zUB(qpn?!@S?7`8s=<)Z@lJvSK5Ri|q$4O3g5dGuPN26*%#ei|@w{6=L&e`FlR z)2%-@{UaO3EZ~kLIwoQEoCYx1I9=btY% zu|I;SBs2(y$B6Ps976a+nu7f)#Z&rmn)J8NEskP?D84cgB?|(pNG2?QTJ$KO6cg{5 zVydS3#rW?B4_L>Y(k8VSEYL~8sVe^`FE7fAue)8Se?O5IJQ*vku=RMg_-Dx-BRY$1 zH_IjvUp>TiC|(GueOc%6X_}yB$~&dYkjy^DQ!2A+oT^1oPBBU3rd>%maYKYwSU_=!^3c{`D12 z!png5M`l{oOOvpTFrA$b=;&C%`>oL))DdWkPrPZWJZMIlBISx$7{5uYa>EW)y7Qt# z*X|w?##f>NuK@!KXm@YzWzEP_>ebHHi7IQQWRjNRCa@!tdU0@wudAbVz z)9mo24(rIlc!w`54!4L;?&#>vdQS|E?ya3gKpgi~XfkOlMckSivLR7PEh=x`Y7*id zn|JKZNWpSL1*i3*UtQ&5v;VX+pG|drPr~`wZ+yNgrKdz!x4AM1b|Ido6;i9-6Jo^}e}b6Ckm z0agX2T711XJcINN?+(pFqrS^(pkOS~bGT~e7ch|Q!BDchn{$Skpf`(=SesN+N^nU_kNoj)e}HfRpZ=^8K*q#O~2xN`A@==xnt4g*QB>Z?Z6$grbc%mV(;_HEmW?I zq!K1;NT_MZ#?O1>ATmX*aX?VZ`C6^?p?k4$6$u%u)~cm%_u$%GC=`8^VSoPeFU%We zK$i79C{^0UOx9NZ7LBzL>w(ki1+f31xAcMZi7Q2CkSZ}{dCIPP5JX~Q2Y!d%>#%K! zMUaB*VawO8&1+=V3RH?~OD>?GHgyZZ{~Rzb&hXx1QVO>vAYO-yLhC?nPvBV8qV@>) zI{Hk5^C9ms62t;-w`x4;MJ7(x{gm3m3gy5Wxdcd$PPp%kiv=E>(X(?^I03hs*fjj%yyzm-@7ye{p8s%gI-%&?a8O_J3($DC z?k?b)AYplDh4KrMYjz?{4(u0VdfgxiWVGexTUR>&)t>1GD%Vh7M$LM3RV-yxO)R{Y zh>ISf`ya^|Gy2|5it8D&<_4U<$?{r%C((L%1IXM49u7I9NnYOnqNEmZO9cl2Z`77N z-&}7p&A5T*shBc3`CM{>$0I)g(M_kTj^407oDIk!pEyeEXUo!^O~kbD9U>9+48XhC z018q@qber?RCsW2e*m^1j`TG{Y!sW{ z(Mvu1ektB7s_|NeaL1EX`m)Q{3SyhMWfh)O%(o*gOR4+CUtzp-n=I z6j95UU94WKN(?*vqkrKoXhrW3XsFWb^2~E&s_G3h-%YI zWCh2dj8OVK2Pu__Y%x$mZ}u&--fwf=`q6-1^9zCY76?_tReh7mp*j^gofU z$Gwu|0C<{FS#B8D&ev}{ONLsidjV}Xzn1T-qwHm_BHGy^ql%*HgYmkL?tj|~B0=y{ z1l-k4(uX3@Qx6x@d4%9H9Pt?)F7Nxs_cWHx6hmuQ8=A2m)`dYwLEsUuq=i-ka7nwL zb-J^=pYQW_ah{V|+h~gBm4S$3+Yk$5j@KIib-p-stY>U>DN*b|FM$lDQolc&tLaUS)XQg0gLh_5DH9oNVt-i0Y0%*PcaRNFkr|}_n=78 zmyG@umxXQqayl-oZTXng7OPT9JIsqsn;Txux>dMLt&h?JXjx`9dvZBFsT5j7#H>*& zJ#e575T{Wv18(U~LqGCA3M1@>Kn-O?EzD*#=WB;~_b!#&fWOY=vH9iEs-COyXJ)9? zV`TJ@%ACSW0@CTf`tnfG8Och(Cv}` zp_6I8*x(r7*gUS6)e{`mDExsrS5jaCeogr`PBiT_ygPf1U1yd;0e$ghC~c z9^je@uk*^Rx*^VAgJpbD$iTnaO=28@9(aP_RmgtGd=>NBF-T+~`c18M^<4Ll-x);E zvc{7zu2%wL(0ca6CRwa^pYizCCo{c*Nyij%A5k6T{3t6UT6xzz;y=@VN8}; zP%pE3U&+zV*fU0xu>}fQGR`$__&k1oG(z96U#R4uA34r;mVix)eI`K#Ku+BC3o(nj z+Pdk`F&Rm+FJRof06x#>VYWRCr|?9+D6pnnUWDQfP3)se6C4R1>FszHQ0Iny=7ze1S@1~ACdIVT;d%9zN{c0{Jr z;}lT^t^f!0+##_l_brS6gwkDW7REHfyhTldPmj25*^AGzL)?oT7|X2VXm_FPUTdyT+Up?~y?n#%35H8%Hj4@%Lw!(r1&=zitlLC0_`C7;0Hi~?z-W95-@S9Z} z!jrgU?qgh*PhXkObcn5MLt{CDK74r0(oH8xgjBkpPnLfZu&r2n zXk7~xn3zqa#YsYYF8|9E>DjZ1qu>O_$e-K26(+ouoZ{c+)ay;2TF74%K$R>USVMVF zO?+OKzOHRAnO#&W5DhvST>Jgi`*`2JtD%>`J?Fa@;6RkpQ7L1T>@KA#15SN=62JP# z_^qg660sP%&`hpSk$>eseks(5bwNdDNK)>KNl_1?4tUfb zG7oo=bHy0Y;6mBwo}a*WyWhSIv2faPkkPr3h{(1}pw?t-i5@$V@YzbcASEii(0#4K zef6AyN#nLQFyYCnC-F(JCDLn-Y{tX)ANz6U35eAqw)5iY<7kg6HQ$`*4xBaRk)o?9 z*AMkd8{?8~&_lx`shkpM|H?LFk3`_amajClMMOAu77~(K?MzP;tk=~3mhf4A<3<4U zGso399)G$bQX1238-^ZtPw%Qq)i&9vP~HF_dEp{TO2YspkxVRg+himRYuZdSUmEI& zV}{3xF5_xR{m^fX5Gv{eF@BGFLPRCxqD!sfm!#aESHPSU{j2airZk-UNO<<)YNUp> zLylKqALf8c&zN4(cTD;DI&nje@cBry3G)#efnn1Y*Y(n3d3rUxnF*` z%i@B~>G+`;%@ZQORORAnQ0SB{@G{TOH#Av^e7)Q0Gm6t3p z+*}v01rx6tnktlBP4oA+7JYtAXi9rhsFF%#k`Q8?eR1Dyao0hL_OeqfK#M+q{@^^) z=ea;!;&{X)Fh6^QgdSEI5UcZ)CISH|HbR6L1yh{VBHJL&mtpk@#ZEoS1ymuZ z@RD}YBXv_bwdnE3K0U;@4RwmM5W6pTr!YUX9JhAe7O~!uQEWxDnh@-{oTgCijF$GVlG`S?2z0Wd9tx|k4tevhk30a|^wKE$5 zx!|SCAB4J`=V)?5L-wKzdnUi}91p?qne;W`2#)>rIZk)kA*+SngI2$}s!8}d*H0}7 zA~V4wBw2=#+}lw~r1){v0ftE_%+*|M86rW4z5&{7x;6a~@pPsnd?!yUrEchy&6 z0K`nHN_qGo_NbJs2tNpy{!LhZ=9@^2Gcez;Kx#}ny#J;xUn+}!Qv4px#ak+KU1`oq@p}!S&j()hp8ZO4&H9GcXtZeU?yok?!y5AI zDhc8>ag_v@#(u}pP9~l$?;}LY*8w3C8Oi!96n23u3=eLY$@Di(A}RJ%o-v)y1T8?{ z$JepW(mbux&iW>EqFw8Xy~2kk)4j6QAi+Jk8A8a)F&gvwC;&!pn%O>wj@N`TR=u@3 zR%g69MsP4RBlinkp;FsK*e6puf8$4f*bXj?4pv?e`8(T`#8wh!;*o1=MTw@8uKoYB z0OA_YbTLdWa>OEvzgPuYz98+7?yG2LgT`U)aq`R~O1xa8 zH9knQZ9axkF+q2FdQJb(1SebWvN(w)3Cp*QJE}xI8a>QnLV+fZDQ3(`lDPw!-CUI! zLuvt*;eocM=1#cTXFIxLR%X@A{7%bBQ*dTSMfuVE2t{c)BgQ90oG{aTQFUkH~e-_w9ileLav*`G;=8Dl~&PTht+`#J}m}KQC(B<t6fNwc_gb2(ej)U}Pte)Cq-%U(Tm`fEO z);t+%efvH-F1h~sddB%ud@JDl_W44k>Ll(mqe_Z;2m!BiSVEVdMmD#17fu=e#P|E7 zV4+K~>A57hFZkzju{$WJKC=Yr%;I8flwW4o(xNd{*SQabDRP5Of_6wH8z=)98aETl?>sm9k$s-3NX{WybfVKxd$33@f6a{GzB+a zMDI_?CR=as2xO99xrB@axg`J0G#mXQa*R#AbMk>pbKE3h&HOt(U)PD(7&I%lKQD;r zc-|MQg>%4QGWySHy!aM@B(to7ix82TRWEk;OQ8`8NzR612T8MLX<4cuW@MK*$f_x? z-AA0mSTlrl@B6EJd7&jf-bdV0r|@9rXmyKWx4EWqs88bt@qOEAi0^PcZRyDM%X;RS zKihYvrkggiwHBEu&GK(iHDh-4`dl*fUkdn~wMdaA#_6tE@AJ(sRM0G-ZdDAhlvMsm zp7(q1%d{#x6AEqCS+K(;i>S*T>$V-aSx9+vr?ZQFVq~zJZe&6VOQ__YyA6qI9$;-* z7?FPZH>IT(%OLCs5DkM6h<9e4LO$t@`DI)(=-ng!Q=Z=N*eLoY2VISI4sZBe?Al*i{#Ip_1gY zY~r2L1g0CZbEj*8eD8}rhO2U(@1W8~aNO9`t_EFVtvPypr+rSVCW5ym3A@B+XK?e%mW_kWw=tC%Ww5@H!sg()~m$87FyNW=SPSXy59GH?_#Yo ze&GG8*e30bzdL0*nUq>_2cY4Vh3qOa{)UJJ6-K62=uYY6(`Qmt_w9bR71&yLJ`wPFcGK0E(;ec4@=GNk18x2tQ`J zNNB{^U$F-9h;%mM;okyQ1ta-lq!Vok$|g@tsd6ZJf1XG{EGLu4%!WI~7)_cWX&wgq zf=8{7BX@-@W$(DR;);40dJF>nG8Yy?F_Pdy+5Cm>lix(X8iFaS{ZO7dmt$(KFB;}t z<|04c%?2;@GMmr)4 zV~B5}IBS?z_qB&hHzBmV^@SH;AU}vEHOj{@e~9aDkkEGOww;~szT7dnn;Y@^jZqEc zu$NJk2Ln-y+jQ{Si8M!KGvnP(F36nkea{8S+z>vFIi+po5GQ#u+tO{9FW|gSQE$gZ zF0H9fv2H!mTnq4{(S&w3sC3vKbBwz$7zR6qL=Hfs-r)&2hC?@f{@7%2F$F)j#ua$~ zlTVfarBOXRpW4Tl(XjtLs(==%dK7ytxTx>MLXEmu;#=QgtoQ4CdY=a|q@~!|@l(jSZEo)*wL^U0>^v_tG06@m2p!U7V-kHSw#4H|SBf=3`hG#SE>Xi}c`n__46D?3bq zQzb42VqLXGHX7TyO#&gKFq{yPzC;>cZit9Rvx!^&Wen?v2xH^#&z85K87hi@<5?)S zM4!M((AB15ckopgZa3|n4EgZg29FW11NU8)G4%ZdkZ%!?q?|4Cxl*8+M+={D^(Ejo zD;5me>kr9WZ%=*vd2aJ__|Mk?i3yXJ3#YDQadED%Z3c5SZPN5~VfgFoj4!h)d_96M05S<)w%XETa>)|+?F(h zB_~6=UZ@77C)R6xFU6F9j@jlpW?D3lg-Ip#Vk$lf#f!5A*ukfRa8?y4o_S{7sIHsE zV%_ZB&5XDJ&(D6b=0n>}W+9|^%i@>>vNE-gp{jaVf?`4F$B7f{NU0y(0DSD4hth=) zO~j7Q$FZzFUvfYE7G@|e!HTA;wC&^%u{TTz+z~i^vj%$-6dN)v-HT}#w-&8m8yEzg z%|+{wBgz$d#<3W*q?G^Whdm-9nqWTt`mV5>;O~AhqD!?T)^enKRO5xQoE~u!M*c0X z+BQ4T9Vrsav^f1T?3&@#q{)#XR|kk{_@(R6N1I6|WWSQm%syX0gbXx?Qr)_bJ5F34 zVjfLC>Oy%Oznpv#$-%bsO+<6THe_|6kiOg0g`uH;9|V7JJ&z?^C35uGmo|30X+xo4 zI-fPIItS9q3*;9QHKL+*WFU`fBzeJxB7&-aL|itW4TDzpB5TLG1l z?(WV_iiC6v2oh4#-JMEA%wv3R2Q^U7*|1wASc zx7e(

    (hyS@@`K{Mpk+Dk3j#(-aWFvnOb@q4n}HS6q}#98Ir)v8?U5)lU}>BKwAH zAkt2ko6cnWFo?y_pQPvb<4qso2k+~NtZ=J_Z*r`yT>NUh!KGVsV9IEMqez$oiU_e84B}gtw#WHX5O=8AdB+^QF_VC) zn5OC;tQSl)^&kQB;9qm;dP+M3u#33AjdO7d2rN>e`Ss8z49+Pdvfl0&`*vWSE!JcR zrr~d<*pe4Vk6QThhTVX5{X+}Pm=<-=7%tkwlv!YaleQM)6;;*ZBO_01Z`eK^(gNh05;TG_!nkulD=-}lZW9P5a9t$GQqX<8d&*pCBkLb&> z%_vb~m8}4PB34y)O(nlBC6|5Lt#3I`{#c#1=M*{N9H;yuW259noSzdhz9NhA{;=`@RX1a(S; zjZmGHlIieK=Vlol84BlMTqz^rlJChdyotA)_gMZYAbEw!dcY%E9w;QpUr(T+)a36Z z{Dda`Qm09=mfCYiEF=|(eT)*1JboOM?J}B&4&JZqU*7quXIgo8hJ{00TO;P2FD=I? z{` zxfJtFuD%lo7yEO#(Q#jd>!wQe6%E!%kYs$_I+gw z&0UP4`2{(sq#Q-Z?y&!0j)Tvsw=RC%rFoua%l+hGW$)713%d%V8{>+E6SnT%f2$m# z--!f6W!ip-e*gMqz)!kcj4r@GTTy>}twd3kB~<>&+biC;xp~9U8MeZ(Vk<9#7pVkR z{4_*MlR-1XG;suj{vB#mC=p4HOIb2&D(9dSld@o$=v3#g$vmdcf~IRo62qN>-(={O zcHfSCYQz1NR{b6=NAH{8vgR0ZInjenA4cjQ zkyEx|FWRVxE%=#&t4oEmbf#m+aEP+VoB?3Zo}TypGY~ZuB&KjC7Rr6qmta zQlT=6G_a5c;#YgvC0w^qu9!~hfG4bwK}GrM0yvEl=;6FtP>7rQUPqvx{Ncb~-V|F= z=yRMJdXGXIZdI|{2BUdv`@F}#%oeq$mj=DNYozh^F0RHtSv5@tXj_Aur&r=>ybn39 zq#f~|0y1WE0B`b~AMw2{)Mm(Lz@xB?nUz%^*&50gbYS8l>-t$J-Kzd?Rxv_K>rHHT zUbb(}({AR{*-k_zoaqRZw=+1F_BFX39)L;U-x3Z?z zj7G|c>w027-xAc2&Bb*E|2MSx=OoS=AK3DWOuT!bi{4eu^XMgb`h&_(?K*feD&f=d{ zaLfw}rYjyQ&aK?DonNfXv14NS{T$b*M;F~G#C*_PE9QI`qkO~9WDS4WP5N@p=(_gH ziL-oLKBE@ATwzjhY5XB!bPe8?y(^Ef1ljO)4X!XevE1Hl%L5}|B-w~@`OY~0@V=a9 zMvj=<-(~Y~Ygl70dZymlpb~!)_2crzQp!uz#HQ3NRuLCCY7>HA1@Z7SJ$5Uv3ac5 zZ!rMhfZ%tAkY`f{u*pHF)Ogi8#z<&0oK)93eY)w(7}?pUCW+S%x>xh~i_0ptVn|PuW7}S3ac|8ii66dd6 zWcUKLk5O7fjF#q|z*9o;PUMNiu~|Yo$mVvs?4h)HF6Fm{uWG#}mOCbw8&{oMGO7zL zr3<1;<8_lMh;R8LB9Ck4mkj2gZzIR6xT`>(*U=_ceu=eu4d@)lQc4cqI-(D-_Y<3h zlinj5=KXm!#|`rl^k?6Z*r(!RTB(+2A_>^LM!>qaq5Rkfg)~@Te7Vr7m~(v)XE^Kr zw7>;mpl66aw%`<$a@-|3HaJKY)c|)uL59feH$y~`s>d^ENpZJ5*H{Db!oM}@ilM&3 zUVJf<3j_$b5%c;Xj#b83i^9{sp#(+i1MzF06wCqwG2E~M>i2Gc5dDij{A8-3)|x_k zI_)05=*n*Ca#~-s;?bX+L6AN)Tj;6fr9f0m9n12*X~bC6Jj2Wtp$06(MH1|v(Ubug zT)|ydTUM6wCQGWTH0e7nP2o%D?EpIzDS;{%+5{|L)v7ax9#8edT6T1F zUKkh0QVgfe+jt_gXxBLCq`j$SAXzxE0i^fESkc;f@^ctpEN{~8lcC@_u+fPX;ugmq zxEuf302$2x8v{dKK%qYOG?h- zLmGf5AmWOaJ0O$8U^@E&oWd?nczn;)a)P$;FJ#&Y&U$cil}h*4JPz*;ycZp;Z~RH@ zPpDl=^Du4LoNLi?lA$aR10~*Nf8ISUMHkh22eo9Tp#VFvMX_pys3dNrON;f+##{3l zs>XU1X_)sb@V=I1S-G0rYE~?c$Ym`&aJR}i_+-d{dg%Glp6Kq7h#@WVhVsYbbsKL9 zy^cTNRKcv*foa)b8ipbUhI`z7hF9wHH6ea+l%raxo28v?8|sTj5{vzP+fpXAe#f1a zG~!8gk+CuMDx`NfWDr+n^Ob?S8si1j4>EQXVD;M_AtkA9oRDQtV*TPoP8PDfuT>*^XT2;!hJbXNXJeC$z4 zL7e7sR#M~~U$ScNUDpgB^)YrVuiQWx***U1Cq#Gw0p;k(*p>5`1X$Nshf=?&zr*}3 zmC??Bz0Z+`k2I7Rn6aUoP>`f%Dj-^%eL&E>ieUO8D8=|i7|h-ZgElz%)Gc=-zxyEO zw!Zx}!>!fmlJ)rY1V6cd@`~%!XRi2T05IRClvevVg;QTwl;@8~n^CDZssyDD)-?kG>Jm_8GL;F-dJ} zj%*%6cb40Nw+b8ya^|F$+0mvBbjTh@6p!-!Yy3qinFF2(L+DuteS^MRr}8tRJV`mk zXI$>F%q=bVM1Dj}R^InblvwU8ge)Hb;2E@SwCIG9Y9bh>XO%IZWbSTr^Y+}IWSI2!b*WkARHgnRQ+PN<8hryK)fU@OrHXgkV(UE6 zZY&RR+~vH+p5xwS-KEtBF)Y`9chtJiZo+uaHJK>Kl+l)d@C?}wL~|SK%Ma0k#Sgw# zmJmE+SPavOhs?k9M&OAXwTv2;Viz&}$;NF?;RK_Quqe4$4S>n)ztE9g9qz#|0X>A&e89#r;n`LA891R=x?|(Jo7f{i8^K5w-^C{1_e(N{ z565$4i>(p+K7u{X6C-U3sKtwPQW$@jOWYsBanBNulXN0Y_n5|0qL6ol8e)n~5%t~S zeapRB`m~NAN=u%wzrA{s;I3Uo`9;hH#Nhr0ubOlh>`hKg5ftF*%*5Sott_%^Fdkst(;I*3xxOqtUTZaBd$`m&g&Cui*bas;=YS)|a}JV4 z@?>vGlxdoB)pCP70(iYYvGfi~z1zAG@r0>kJ=$smwgO!D24a%nTOe=yiE)d`OWrn zv^fm^FfAy;=5;f*okZNnyzB{()E4i;Hns>9<0cfN^>)AeY*}V0BW8^# zx%!6O_)=z61~;_M6M?)>*GcZY2E5msQ-iQi8d8!BtO@G_KH}zM$>y!A(05iZdJT2$ z`@`$zM+7!p#Gg|L>iDCEzj^~>XoPzkDy=x7c)Z5CS9kL^o1tj7(pp$caHG7^$+BaBq@`OTQS@Zbqkh6IHl zd$FN^C-N0*NZ)$kiG6CpLQFHmv4h+6m7B|aWoaTyKK)@ar$yOtP+|MtrShp>Fuo>5 zDbGF9hQ~+5p8U*#6hblNm+sQ3V~d}tylVTL!=3O->}ryJVkPWd{(uZhfh@qJS_&d> z3)GZFXG%8H3cg`><4wgWLzg8_yX$3!-Ba{r#*s)7V&(-y(xW+iHk0ZR*WY842_mX`Nh5s3aU(=aRZ6Y;kiD&Usa;0D_M!P=JbcH5`XQ%#|F75n z?#13bJOvK^n}^RkF74TEWPVz_XyrRd|CII#6~YZc*M1QEnJ z!-0-3nsy`Bu~Lb-f7?@2DSV`FJT*ye|GL>t(`1eW#|KQ=x|j{?=Z;toqXZ@jFoUhzQJINDw1%5KgoPCl`uIg+m0 z8KSbWRy8&8+q#crI{ngfREWd>`hvlDGJhMAcbSLVZ<3k60zg?U9zdJ*h=^$XXF4CJ z&5OX+>UA-iuH_e=_9l(EkEzUh?%sXx0TcnPhdyMo@{x2_E5#_RR(u%{K1h7(w(RoO zZF~~vvexXhuXK#~6ZiHG(hV1YFLIpv6abf-D^nEN~MAK9h z<9@@BqjH1H**Nm-%){;PTds_k1)N@8w`h@shrc>_Si?bgJrA;Sd5|mL{_zz2$*j{t zx8Y8)KE3~Ww)+5|DBQ+$fE{vIe)V?yB0|3}_%YmRO$@C_^M%*2kSbjSNptOJ@%xHL zlnx-!=grObN-vc3^I_*`7~wNx9MUm>VZ!?%u!iCY3a-cZKqNWo2_j1zK}5FO!kJ0m z47-_)|A;&t0cyZ=Mk1#$Y4<=VJ<|#uJz(4-u-;WBDJ)VP1GJc(O6iZNgNC8hcyA1a ztTqas31PgwwJC2j-~3@psur%_Xy8{jgw2KwBYT)GDkx42GeXMW?eKlo23<${Zn>P) zS(Gm9-L^Abm$oX#q^2`!waHAF{5YdSQ}i$MrIXJMB77U{FMkX&;YpYLqCl-Y>5wAx zTKnV0#Uq#o_7h%`x_J_c3$mg$aI&?2ZsH#yd#rD;gd&UUxw{gXiU0sIynpvrPr#jN zQv8_!5fboSI)YqZ^sobPe%4^po+Y*4nz_2Muf4~lX$?XI71b;^0@WmON!KxwX@gxo znTnb(TxdO%v0QrQe9rMF)dC?U9RWRQI67Yh6gvS}<2Om@$=Oyjdjg-X!zihK_~4?E z2bC$k=8Nwkv?kjApz-2<9$kf5))=YfBcmbe)-wLob%+X6qOmCmvkw$$q^SJ!%7#BO z@pXQ=ZDViZyHoru&#~tPY$<^vDyXG3=lqe6Vl&j@J2;AWnakoaG%VrVteWFWbK_-lEiK>9I7ImK7duZ))h)PbZH@3*q%2-ke)f1B+G=02 zNbrz2@)>Ij8`FOGuQA$8R|=-WZVKUve1&H(z)scV(^g&*w}a#>IqP&k!FnQ}wyPf> zn*d9*m|ZO~9n6CR?5JO|=n)W4B96~o<3-T@>w!05o?dRS^w5^rLz#y|PapHFtPTAG zB454kG%jUK67|FK!plsT9gJFcV4)Q20QUp>!+-to$;auq_pSW?+LjUFMV~Q*a;yfl z0xP;i1~(|+&e8Ym4=s8_(GH{?)SJBTYhXa;BscfH-HvgP`$3T(?Ph1oR({$XkfsyW zQyPC{#F-U70iD;OSgJ(F-3fUr!2^m-w>HgkdIE$FO=jv|>N zMd;JVk75!j=>cSd3cys|sh4qtIijg))iKRJm%kqP>l5Y2aT)&aU-)0YcWTI&3wpC| zW2I$~ePTFcMPbVtEV#m(sa*M2v=rn1_9QFpF1lZ0|7rujQ6}zX6Igbx;rnW@cqFPm zSsBlJv4FF=zu?^-{PdBXgFeVn$6k79cE1h=n?z4j-`FIY7?XDW83lU(RP>6sAPt?^ zaXwWHeb6kE`fbq>md)+u(SBi0B${p^mz06jYQn^TGri(K>zasm@74@XZh9>&q+4m= z;R10yqeA+lemt|6i?|7Hk&#bo^rOK02;6pG{`tWiC?d04!GBY+fMHkl$NXi>AWfZp zL5B5QH2E9*r4Ah)5G>6zt=aGt_vKA1mYU+9f~@x-m)yEWae5fz``k;hFBMe7%`HGc zI~PkWH-{LxyK`y>2W3}bih+1@JbR|$^ypz)rccl`O* zc!uSK70eXhNUecc6IN@o{Ox|J|7ws4%JMFRY<<&5*orR{Z8;BJh%fm1__4QMAmvjm zH{$fD=$2&74SZp%|8p+|+cDTO!PS0LwVVFcfU((L*Oqm6ZZ=IQDeVOe5Yj4GbCNYU zv&@XdzV4t91l%`u=^Cs*GgT&cD8c0RpuTw)`R7yHNrrOYy%<7|tjemn2BxD*=_)UA zQrV0}XY&2aWO-K>l*dnHS+pu&mq-UbLnjlOJ2-RCLv&b3i-0GzLJG$3rOy2B57A`Y zk*RZe$(H9%%R4^?z^7Y zoM@PG>TkR6OtJ2(eOET6;U3yCLIrV5`pcnO8Yk(FBQkx^xDb<1)7lmRZYie*iW&TG zczuu1(o>d`do# z>^ftQPw=c84+(Myh7vh)cgpz8Bt<*2{o)|& z+InGTT>;~FM>RJhU`){>hkTS7B1Fb=B|oMMdl5%0PS1Kt_lu90%nT|Htl(%Icihx< zM~W-iJ@@~)<^TDOO+~-TnccBGn9%_wbhs&$TL!oPkFV*o(2CID@YaqoH1AtfsgI|t zciciLS?(K>QRO^yK^UAUHSDq^tU;tq+_vToTj7M9bAZ@P#Frvi5dV(%6K@p7A5lbh zlpWX-L`t}cqYyiMSsjq{29n_JTUi(xuwDGBv#}l`9=BEM@)ZHPxQByJ{5c=BTY}6| zjoOI-x+E=}Uj6`&e2yt>mju^4KP86~RFbHGU zu1dIyhhR}W`>TwFas*n2|3}t!$5Yw<|Ar#tAUnr6_TK3nd+#kPJ0vB06UjJabCB$n zWXs6jTegsqEs{MWe%I;y^gQ3^`}><0=f3aj+}G!NPa6%=K(Za=e#60t@&&~#{ib_Q zN}t_q?G5Hc(Yq9Haf(0#s9hJ8JI6dORUURPGZScQ*rGyu@#bbMKi0ww!r)fR3a-j3 zU4DF9DW)Y1msR}4L4!5Qb7Y#R`qScnFHrJpc!cw9^-wt_OQd-^=t=Z%?`*x9_#>B3 zYVVz1MS$lQo7tIJcWYAqhcd@GD{aV<1H%{&$a*|G`;74R{htkxI>wdtn2TRe&s({a z8+emU9`jvTicTLg5qhG|(O#WTJs6T4ZI|1RlMUOp%}8vLoqY6+A4mG;f16xr5~xbkroy)o?av0f;}VVIthD$G;v--pMDClRhK@e9RlH@^>>t_G9Cl>h2w;&UxP5H_>xwKor-*O;!0Xmpb!l^xbw;Z z0~7BKaqRV*xDMStiDNju_yIIBD%NS9Z-w`|=_zqLbP!?Zn;_-gZswz-9k4s!C!cCi;HR^V`)9^YB#Tc=nWg_GwGaQ|L0a zHqY|PAfY>a!r#K5GtvcmQBzu0?PHK*<4CO&Arh85AatifA6*u^n_CzHPrT$rdzhAK zY3R64o%mQSF2{g@uJ#AgKD`^8Wb{~4@z@>KbyR=fT zqk6aknv`xDeg05sb~Vhwu8Lyd*);qkr-uzwtpbLJ*?MlcH=A?lviy+I+HKDi>&kK6 zK1LxFtd8JR6?^LnF3W8cw*Kvn`H>;+-JtuF$Uz+0doUhG zkc*KSJ`cke+tmu@NRjW6DN|%RJX{HP8YkkmNyRt#pJk~^uYrArGhIA_TlTg^5CR6k z?uaYB3~oCQyQ26~q@YStE|Dq9IlT+0Y=!R3607zV+g9ZVroGX4Ed~2=dfHE?DQ}ql z`yQUs&YwZUD$%=r&&R&C`c8}AL$%N~5XGbf1wrq9NiypSyBN$v!YQa2GT}_8J1a7; zFuU&a-M`7rjwvlfGr+Racz0d3vf{r7qhN`f7?$^XI<-JPH`{Vd0?2xYP^ye3giI4m z;a)LJ0KdB&^#Ur~s=o_bcN}iZ6SmL?$qZ(1WEX~L%TYGyXyo%bV z`1g(}5FwEdU_V}Gp5)GHIG8MHG11QBE1`Y741oeZBth2~h&KpIf}1zk|27^={$)Hi zUCWA$`S)kF!+33SAAM&Fee?Qf)+)4qkJfy2FX&osp`7z7QdH4B!sk7bsC@-7#}xUF zm;hV_!x)U*l~i>Pgo1OW+qs62jvR3eYx8V8xqr%&b@8|ir>0~)My^B8^Y3;%WNWN$=2h_W~;kA(SO320Y zCzX~sGkcqE){A0YH~1k&b$O&w%tC=kG)jRg8NVYY5@DfJ;KTZW(A|04_wt^YL;q@j z(vsVZZyMLZg=H8;cQe@1v(tFA)*##T^lod!HL$i^mcH83`C8y%XnWs+7rVy`r3xpyw)62Mn zHtJ{EVwmd6q$zklk8LncjFqkuCKiVrz9NQ1qI|pBJESRf^vb)ljm~pm%W>RUU>|1` z=OQz$35Iu|yFv_~{-`ucAODGZ%3oys&pR>t##P%mo!a8XpKd?C|2+EfUF>qx`JUgL z&qTI0{zTE!YL3cYcFHqlOwP8(R$Y2^u>@Pb+!g&;nrUkt@_V>??BQA_=CZk;sn}GM zo{rrC;>4#Zodf1r%Q9wRoK@Whb7?r5Ty!X0B}Hrn2#w8&0+GB8@A*p&C1odp@B}`@ z+Yz0ycxl$DZi9-)n&fF&LQm9df{n-qR=g++*L00t$DN+N{fz07E2VnV3MssiXvpn9(w^soyUH+I{#B~yP7TdX8*Wz z72)TEtH~2B#mXdw2*9QAH&vrM&xy&E#>PqenAVt}DPLa(O-|Wqk2Db&G;)dmZ6d$Rn>ji5!oTU{!`)unQT#`kCnq{r~>~gE71ccdHfqU zdTdbAtk&A1F*_PGVj)hD=ZxBEQp@kTT<1Tz`fkLtJ95rjwLR=Rgc#KQSzP|$EJwxN zRRmY$G)*SV?*LBH;V*;zqX}K(1L&jI4O-f~nD-mTDuWg|GMiYpks7s&9p*V=y^5wo zEPA4gu_YoK^Yu!ZW#UYMDkptO`kYnn>2r+si|CUD5R8VsP(0q6odv<#B)S#sr=cnt ze=g1pOCa>h_D@+HvLN$3RlaJ*fXmM%QC}xD%3JuROa+L}l&eV5T^}VHZ+l@@e!oBP zW2S502oT8ovWWby+w#!>=ZE%9{s-q@4b+x18lcZ4etkfaxd=~ekC6-!X>6_8JK}Du zEH3+e=aY;uzi^v9Izc^XX(>wFt{Ec%1na~J14H91@HtST3|jv==|3R#-zso(Pv<(R z*rZa~yaB-isVw8AC8%KLUrQj6Ku22n)L{+x40~Ecq#XRfhupv-;;t>% zGx@q2H__i;G1Qf-bB8}#xNe12eMTfz=nXItbkD{9V)PE z7bqyZmqoe}3^EjwWzH~WEos9A0CvM4cD&_l)sN9s7k!P&SF;MDp))jGHGWM^RC({p z{brh!Wco;W>vNE$xdlA*F4n2835Fmal2V~HKZVEqr7+Mf&XVXE5uz>?nN!DJ8 zLcnrl{PjU%w_tqICilSL-FN@kS{?YF1<}ABW`fm39C$@Zj=?%s02@nII)vz5iMy88 zm@=`Z@gU#o4glTF0{%(-+14POHpE9{7#X*l^V)zbH;5&`GF)C} z8vAG6yotC{rCoMJ#*7Df=XL$fEOxU^vVmGl(b`H> zDOw|C+WAhFFhf^Ylc3nIF>OHI)nq>Ty=vNya#c^zwFsVwK9gOa1`PmTh`n1@X1YKv zK;SP`@57CiU_8oI2M71*XZNasoVgD8R0F+_f$|h1?WMXU$v@*#h9#DnbUe@{%aPoq zj?s##KAqW|BJp)rTl;L<4MSaMWt`{;Oh9`8nkm%5NjuVIJt#Q z8;J4UKHTwd=_L@bEUfzSq$U4x!V7^%!QXB4r-32)J0NWXb~>0MiX>xoH1gZ9Q!D(1 zs@%=7>9+9Of5Etz@n`8`(=wrABkhj2NfMp9eamn%2SxSP)kMmYy_X`!q#$ujoG#ZfR};3M5xVbo7|#|}7cY`> zJbuieqc;IcoD)uJICGs}D7zLhG5cYdc>k#6@v-j=r^vi5efSx;FY7T%Jvz|^GI_Od zSA6kvhwIsQxh2zV(0CXStK&y186kTnfETb-7#ydlM7em)1Hbz0f7$}BHb~pqVmW|A zrqG2l7ZwVa@Ve2CT-zbW1D0MJRKPot6Jn&E5?1pv3R{HYn|;Hfu7?{sDWMDL|N3ao z+y35@v3K(fJuMHZCEiU#bYmh)fUN_)vUchrG-CAzB7 zTaSA;_H4jMc8fXeb}rGOqsZy&w&({bTC4HJzyxy9mV4=xBnP&3tJmKZyOCJFgXfW_ z#(TUhZ$0-{Zx~C7iPoMso_fe8pNNKy-3dKdBf2~pT2qt8Xpu=luu&vfoXS)4F4H9Xs?QewG(vEu6zXHA2Vb5+OweT zi6UH;e9rIk`MrnjAw#1h84rzL#v{;{T8(P^uZ>CiLb7S7Fk(WREkYYq)Yd&Y5t|VB z=^_l&S>}sB{ak2v@IhaN$qVGA)%$ud7RJ81PIp?CqH-vtM6E_X5VkXTTzepiA^(ms zMAnDaw*dZlJq)7Rxy?9gf3qvn`~BJrZ>1;}%!Ucv>WGp=5#NB3Td3W7hxrV&8{ZB@ zK5J+?(8mlmjaW0>&PM?2pWvtKwu zG()R{?px8)waf!6=15L=d8*3RXLX$W?~NQdfY&E%`cd*vbEnhpyJ>q8FJwi~-tV2e zO+%#SrDfHoWq~o?z(aq7m)!F}XvDDP=0QKn*$;ca??~4{2u{?Hc8?Kn$iBskkx@rr zRY7J39r&QF)H76a~@UAz01(Dos*g5bc4!gErEicyQ(8B4=MrhoC1I>v99cDgaB5dip)rPTc zWQuM5!I$-=PdSQltc zeGuBTf=*LOA!`JGm>FK}lM_OvqSJ1WgCsFHe|u=OSD%k(E6fM9i$iB2OTwciA;Mpq zKF)z}bt&Vm!6z!;uBGXu`1X1T+R#jN?09{#{!Oc(@og06wMM{OLyw3d3^32h$K|7A zzKHDl{gCfegTj@MG525M)ZSbxS5JR%Twuz%zl;49XUu~UtbEtGVc*C;Rd*#{it?7&?k{PaOftm z)?Y!}nPhN9q3HU%as}+pY^X|j9|$LEqjG4z+Tx(Y*n3)rF1Uk*btnn(XDVn2MdP|k zq3~e(nHi{Hy+~S5E&b(=^8?~CmQQGY9hG9~B(C8F|3v`t$4YZJAtW#%VXKo4?l-pF zR))S`#cdhe7cy&pLD~{@_DMw9i|+K-$Y^iN^Y!6uWSi%5HXg)8WoJit<#?b0NkbPw z-yLlN6$xHOAB-L+xJ_}uhibG()2*{w0-KpaUkcM?=-H;-R3~@4B*GFm|)^iO{g^% zjW^B+WtxH-kIWbI!yhuaO?R|AA8O2ar`+=BUYhs8 zMG!B5%tiTK#kU?p4^IwD{6yix{pz0*n^%D!2;Y>V8uU*26x>b5$Bc-Q>aRrYcRjc8 zU~BL~yZ6ZN65w=F1Uk&Pi0cd(sp9dH(1`3fwI3&2%S6&UrscV`-Xc*qVw!?CJ8|g@ z?ANqGgab(&M^lwO`0#EdEDbuC|Bd@+q!32`Vw_evx^L@8xRy$}@1(liqHY|?cF`{9 z>dfr}A}k#WR>!pKB~RlpJqxu9woFKC%m@tMWT0mmUHg<6hr!2pz~eXlzT+^cuu(d zyOw=@%cUFHioDyCwrd}JeuEcaIc1=*%=~%=S}zujS%t%WrmWk`KComn^H*J3`sq}4 zgl&)OeXm2fPca_4PL%}A&v_NJ;xoVBk&|Lw;4}Dv=MSD5XMfWY%@N!*6u4zdKw>#U)N}E zx^G^IMrywp$r8gnWC;7&oA}V8CGq6?&4JjHX26Q%P@VeneBEXiKUF06^#XKbV27oX z@itnBwF+6n+O2z{x+um!5%U!$n)d)NLDpGj#y#XL;zNqBM(4Y02&9NSM`s(seW>2X z=%`|Y%05^ASK@Wg@%;(nrg+LfWDC^|e-d_W(P*QnO}06Zg!r1SwJzb98*|%jR})jL z7XiX;Z?*1c+Kg`0actM*^-}rykTrQ+_sma=vYUPKYv+Bu@AzOY$nV-fa<9@)dwjWf z?LelK6RSOoYxgB}kO_tOi~kVA0q4JY;juQJdxwF%?yo+r@lzyl+;8Pk2|g~{=?8Wk zvXKfv{2n>3n!SZ8u~GzSDZstw*{m?^b|jWH?8IEw5-q+O_WKJ7NICJ zF0%+YpS3BW$lpQfgtK%F zk2GQd_Z?8h2fhLth&Foc z2;<;f_^hy--fHl~r5Tqe4!-SN;sB?32~h(yi4m@i;$mTUcy>4j z;Z~8pgASR_D4Ol+&j>bK*Vud$>)x<VI2O9kuv;P$cJEs7vE}(-77g(s(5Y?Q}tU}7pZ5PsA6r2 zZ)NAK*Z12^N|yLdU(9lo;bmc$T0hRuQ@yCeTmgCo2U&qcT;X$_(hx+BC0S3vMSCudEyPX@7sh)FN;YP!TsTQzAUbK>q4PWAC`>Qq+gMWw@;;P-!YIG_Di7`3q(& zwW{umm8lHXjRs2vb{{$~{Po?O#OO1nv&qjGzQILO@Gd8ge06oEl|rG2(JuZDsLe(X zVf?v|l;wvZva_Ydu|)=YoFp%?scW*_pM5Ov{LP{aQ4^SjZ~(%Be-m?Vw*K+}vMjXd zp0UZb*Dl@Q)t60DQg1!@vqsa@*Whb%KGVt5-Tmziy`RIrNr2m0g6#mPh`9m6(aUe% z8aOW?vR=)$5aLayI4T^s%>!Re>#4D<`~(cdH|b+Lp%#q4mtr=86Yy0*4 zVXx(qOYlM23ngo2R+z(3B3Ft$Bv(g{r3*7R$BC8Ak~Ok*Bt>Nh_~d5Wpsk$MGq-tL zV$dxCM2rg5uEPAh$1xptfsWxpDg7Ur<6w)qbftL{={2Z|0Pik)P<{mun2`=nhfoO# z%zQYhx5a-MfZPv(df_;7Q8W&`8rc<#>&|pffRz)hH50bC$hs9<)9O(io?^*Hz5mk8 zAHLt?$~y)pT8T|6;E3bs3ZcFeVv8FEiFD4cq}8f7y$A+_7?zmmaB zC{Ng`_;dpbPlB4Y4e6zfojkRfq4tt<e(_vtG<2cvPHVCWY2LcQiU#FL zgSFG=e4_21JAJeRhg)0XA>@D2GAhRB1EVb5WOGIKgqJ>lU|wd5rRlMz}=hfUTr7ww%Z~SjfJWr5_fAp zrq4|He7TXZ)5+bYubb&u3HvM_)LZWL7c7%X5m$aG_#{Ro>j$0WUMCMtHg{HYf&ynxD_t=Wuh~0!6utr1%j0>Y=&mP8am?xvQ6^jqBJ-4Tcj`8MwCTW0W+YY87Yhg@ zA~qJxyl)5hSUnhyr6K)6V4mZy8%uXhKBOW=0^vn|(SFPECo3~&o3T%!{XP|*1b|L^!_>Kfr+;t{ z7Y_`vq)SEmx=JJNJBHZ8$~3!W**$UEiVwgAUlJz%lGoQMgbBLa1*OyIiGQ^jNcnk^ zqiB3ypL~DYqRF??cT1`F3KhKc^N-r^O5%gXy$!o^WbW;Rv$OWTAI7+!no^P;B2Jg> z=HMzlh|BY_MaYU5eW7s@VRPoFQjA0CJThdQB8mIPLNhP4SpT6v^{VAwfCs=VVtE(4 zt;9aZ`OYz|;37rWbK+W!xO_P{mL0)iiiy32Z$?!#Zn+6aA0On4BWj0ZI z?X-Erf5lI9&bIBMHMBi}u2}7=?cL3LDaw0RZ7u8|tc$ZFW|RDA!%Oe<}IK1 z*NK2QiP*QL?-|8&0EI6ARg@%dp`b!IS(Xne+#j>a(&Au&LO|eji-wXsgu!MHkrV!o z*`zspElI?yc-%V`!UkbJlh`nER3nX(jI>u3@;1ozyr^~mzV&GE&5xV50vUy09!2~; z0^#~E63$b7b&W4(OmEfn>ARyGJsXI3gSFkHMOT~JW)p5JFy%053ePV8#TH5owwzqG zNmTgOEae^^h1U1f-VVF!*$urgSvf1KBGdPcoh4f}?IlMx4T-7?4~kbf>|*d&gqf8a zUj2N3yRWmxjGVE31BF`<9x6~^37|*+9w?75h7%VBpE8AL!fw^AKXQsV*oQI|(qEGR zyPXB7X)7JQqk(mpauPvSc#cPo;j+U^oL5-XtPERufAP`5vX_fwP2C@#Fdv7Ts0I(C zXUR}p%RKu}nghoHYd?Ne&$;c3rRfcPs;K&hsg~d;%&-V>_nBEARv|3v~Jmu%;amX$n}jAesByy#LO)Fksf!jva>=kssrMenG*ICKQWU)o07 zhaLr88DtTShr1=EXeB6L^ouK~a-Ii3eRnqPk}m9_(VVeKw9LOX5=bh z={7`hCP+}QRwnrfQ*oYngFO+2m-rT*+342M8|K|Ai9j~K&`dnT=9Vrr_<;)D>kl0l zF&Ga>F3^}XJf6XLsR4=}pMNSA2NXeX&YN{QO^S?+QbQicLKHBD~I z14ncJm$5|od?U;B(rRpuy}9oK#XWblfs=RSdnMW0Z6goTngn<2M#>{^tMrb_2?OE6 zSG|@0CA^dHq%U>qP^$LmS>&M$#b2C+p~cW2etfE`SF+_rXL^er5XB;;ZU?&x}Ml z$9|;_sp9tuPCHd6^|`%jspjqdAmF<#{)G8|Nd!PH4b3EVeV_Ni=E7(F=Mkj|ft#<7 zUf`(lRcYIb1JTt2imKDqNy3QccQH8fnbNz*|4~50uoQON)nJYgVy;)%vJA`wzU$B+ zEsa2@+csgAw_h>DRanB(_nVmw7VYt2&mO%KJgY0e$0S{*x!js3Y83FBv;7S}eJjM; zq3)__ZCN%hu9W^lL_Mhob)qhlEG@E3B<$UaeNNY$5Ic0WA0! zIUX9tbgZzEvNmRFVj@Rk9?{sHF4vI0mRI|^1sc9iLaY;>Qtg*8btN^=A21AjsOsOK z4O+^K9;cW$**671m-17fw=#j|xDJUe(v9bbBa`!oq<$~6J=rVrYNi$M=w-8cGzDFi zaH#Pzqi@sWQ*@8On2D(oabwaonm0r5C1mg4ym|Ip#quAGF4)P37(jhr`Fd$Cc3IQA zu7zh!V-uH5gHLdOZ}OHB61{Hn&CEGul%OalhOt_1eEoGqT^qW>MnH5OV#$huPK`{hiBcIh-A z&Cz1z>D8f;*D@WzT42sG0-eE#lJ>i6l6*bi(M#oeSA}}KT@-Gk9aBX^7gP2Qm#-qNd zQ$9pXK~gF{!iKW29@F$6#Hl3lF^U$<<{eTdY9pOm3Ga)Mz5DtRD(k^I0rTC^x+!io zqPi@-oa=jexZ-58U|jJyCYj$la@zciO2qly!~c-of2+>lNL&$q^10B!{8>1)s24+3 zir9rkdJoqU;GqScf9n#{p4ngBXm8JKyJ3k%S)`>PBHQ9n=zN4q3=|XBshexYJ)cvw zMMYuuEa04fo4r4Csd}=}t9AI?cV0O$Ic16teG`?i6c%=-m!m(h%u1y#IuYOMVDFH1 zX2iMC2}ANV68B_@t>hrum5tUyzZ=Z_LwACkNXvf>Hzxi%YqFBflkN+;v`-gWz9fAM zaV+5*ceK_vNtm)-pWs2^rLJn0*1Q-_w51)ShRZC^(q*-z;IH7%>GoC_dPBph#K*y8 zkP~)a?;O>adEW>o`ucsf!$NDRjE7Z_J3O0^tfPzGk!k+|2GEiE~-y-kZKDO2l|DwTu+mJqN7Z(r#uh ztDtZ=6+6d9$67`)G3DC*B&ogsQO`S}gGn2`~w$ zh}Oc$Y~Zmhbf`PX6s?VURBU9bQat_jsrT%s*dMCLlzy+Zzn|v_VRJBB(61m|`>zJf zkA#{N1OXX;PN-oI!Emtr%G+G zs-G@%nl_58uQPmt8C2*LB=Twk_s-uR5&c}nG3s?lHs3!4(l4P+@$ZfO_|8k<+vCrH zH7}4AzRUqGQj8c+1_8-j?A1%#&{TV_pvJJrDu2^);}uO_tWOWKX?Xy_|9?5;mNvaq zom@@|Z$bY*e<;>FvKSt>&1GUO9W@5eXs8q-$cZ7oQ!y}_85vt~!hjS(!F<-&^7{Yh z^WkIo&`7WX{kFjOmj3JZ7GS6F-^6Mk0PbA4FO;PKP9^*j6{m}EJh_Ef4$&$WaD=Sd zzCYF~(oR~_Ncf+{q)MxC`>bH#0;r~Ml@?8cPv%7b;~&})(L&<^L0+n3jCja830m}6dk}h^0$9z`s9yPGyJg8Gkq`tX2 z=Rtqh@5x9`5O8C3s;&BCy17S*J&50@)0wPuFG{}qH1i&9yoI^9gc%ToY=M-v0<@ED z7Pz>Q1d@; z*Kyn5vZLJ4`C9a8yGvblz~?RohdY`BzxfPMa&NwyD9|hU_)fbcH(Z158_~$mFO0I+ zX_uWdqGfXKj_K5fIQ4h|)yXW7R%~oFUHT&9C`N(ZW(g#uuD@^iplF22fF z1J1QogTEzt{*Z_(sbrIQAd^US6LX62Q_aqSP`TBJl@Cx+!}<)R)@7~YOv=iAzr&|= zjMAaXx_e!A0pMjLCUs)D6yM&`?njPhvs(e1t#oMOu-6^QkTgVQI=0V{WI)$_lu9S} z1M6x^Uv{KWFQ-9yj%I^fgy$^8 zJbXz;7ges$+au|7JnP=p6{0do=z0lSzCVIzgAh?KD5`ppHTF`aNmcQg>Qa>9*#B;$ zArgtbKaHgE0* z`;39LD-IqtPlh0FHQueWQ7(e!mUh-}Qy=oqK#I>&6$nNq@B`HB#HT5cEzI8WuN7l9_>gj0{8?N7Tz zJkB5l@NOhMCAffJ&<2`IxRuYNrONX?X)|E_&cxi0mWDW@jf*4AoK=&phd@(>F7j9& zK0~SD%68OPcCQ1hnR4;KCE=Cetc!N_cH`M9;}*#H#@|0P19>kEvuMsYhkBy5)G+v` z6z+=UH+ySFRyt!~VoGCSK780ah04Aqj(yAI&?pw!dd}#=lLEWene44f*Bamb*{3>1&Jq&eQR=L*G`+YE*^k6 z@sW@|;S3fPHZ$m3HA5MyI?>b7GM4|eSP}8Mp!fl$<=SF(`JVKZygh62GlT1ET1D-e zMCgt@z@Rqh9C$@i-UD=pfJr2`t5W@vzd%R5HEmk570k`B;%Ln9ZpcbAz zE}@o3Ks&+{d|+wrwF{iBJlJy-jPd8_ zxU~{^TZ=nQTNn`JwDN1rFoXZM=P%1h9FYm4HW8a#Q(K@3WbpM|j?a`SN;}bI81iT! zfmDMWKMLR*RyPz%*;c}@v{A@9;iBv!qB}fa8}{HMsre9yqybUTUfqPf*IfGQ2M9s^ z?%?Zp+GiU4gO|LQ9WyR71#K^7B1~n%zk%VOw`j&;)kemh%Q_)Rwa6BS`&?9842D@> zs08&3$>=iN%h`;!`0t5bf|bCG(fjjrvoRDY4X|!_moiStmB>d?=fYK!dz%rU2S;8L zuXW(%{bE>oZ`XwuG>t7>Fc5WJHAR2j#E188=Mm*s{e@uMGFhSCg+^349vVKQvy0*g z`@w+ANnk#%2MI~zuM$1|GSRpiiCBFtH~n@ViteZ}cceK2rOctst?zR=_V}NDsi3t0 zxD{mjF{;m=Mc{q>rNa*v5=3HquH`cAd&Q)Y9l8KM1)Cl#Z;D{{50~G74vuWTEcpV>4PloXpnzXcN6I;JOd)>W>2_`;@kry`iUK7Xe9x| zGSe%aUjn|MIZIcuz$IIB2I!A3JzX=UQvoAKh?4^(11SIjG;4kUH4xj!JElea)q+k+uUR42V#p)oF zGB9$~eEqfM-Zp;q0eOQ97#NBy2$_P#zx&*+oYKpOs2p|u*GvmWO*7`z zB_kz_`U5xJv$TFU#W~x!UqKOaMyb~(Ehgo zaARZ%)Jnbd75AsjifPVV7iVli2SDb%{k=YJcie~|ayCHH+~K3-6P}`Rh;5ZzXDJeC z&9L1-#S;@LVVNMZcN{~An?NRhn@P6qjeVf+r~|63wr7A+^v7eq1i7E)ugUz4AZho; z<2n8)yMCWFy2-At8j+SW3rpMNPgD08YGzKAXJVBNJ8+bh(8iljn75fb=T4jM+$uoe9%|lZG5t-qqT7bMX8E>w^-%{cW!H-!r{16we<;{(6`fx% zF=x5&=jWwiD^}@I2)o&STbKcM)R);ki+mbh+qT`CGJZMwSek4sbPn-$l2=1D?rZ7q z?5rps>M+ml<`|iFzrGLp#p>!nPgmcwnw11ej(uK7q0zw-=Le7ffaB-`j5EQ|1N=|< z^m7k4l;jKpLF~+zfYcx=Chsr}`PyH4q>$D0%lcP_A9+?NXk5pfChFV`o1d)N-ohxZ z-CzTFa`rECNzC01Lq|u_>=}(e(}D1!ruhZtx8^ed1|S5 zbn|p(2Z(!l+hZ6aUrH?yNWqc4^bxm5GJPDFBb}8Ls1~~eK?4TNvmXx*`r@Ri{eV!Q zaU)FJyfowwgNhdXdeSw6Tbuv7HYn`_@!@NeG4Q(cRuxRPt(?5b;;3wuV|}2Lf!h@E zYMTPgf4cX_go3;7xZ+}v`~C#^_54QtV4j}`QV+ffU}!6^gLn3Y2_(kS3;`>ulwjKv ze$Ly0{2i8dgwxh8a#Atv1F0iUdkzG$EVL=kr2)$#vG|IR-HZ$o8mrCf91#@F;oYmL zKBNy`;(Lyvy)y$E!~c4jMKfPhiC{y$$~XLmP}Wqh($@yDWyYnI^`CNo@mTqpcWIhX zQ2OOQHgdK)Kl`fVYogOTIb+`x?;Dx&Om)Nvw=X7rA@|;lgY;u`5HnOX$x+=rf4YB0 z?3pEjowNE#EUABVA}~P=P72Z zEOR&wv#^?Tb>Zh`U-xC@JIO_MlMMnTeB6BBuNMv+ju)DbmpUVGJkO_|6coM0MHH89 z2KJzFjQ?_rQxNLcWX-7&N!Oxpy<5Vj_>}36#bwYlB}*(NedpF1XZCYJ{QQNdW6At^ zVq!L0WKFr!@0l}An;uLw`B~6kzAw-MrsB#XMTyK`qoDb~)V+g5kAci-ND{N=9ysbi z6(1P~cDR!Zu|Le6@F>ajfSwuJH^kfOm94&cDg@Od#QDL);BQhSPwmM7_hGYOVRvdg zZrDbws+a2#nkGIbYAAf;UW1lxgYrL<@fqMjm<6eH6w7o=pxq2x`r#>I+1fD32!9Q) z067p(9R_3+m_sten=x{wje3IxPT}D!bcm=Og*BG@R8p+0!P@E|Pif{}6`Q#^lOW$C zIN!e4)X~k>vaeuZl}d3|((Nw&e^c6vUmdVDLP|VJH8+Y((7tC6M@^cPH_ank1sM;p zh?UU1$?S9Y_wF!|*+F=Cb{+DYIPqvR$+$(tHp^a;6Wj@kO_YugOD4E)#&Y$I{J^~G zm3A;cyaA?Drl(7e8^s{mAjb!-=ge2;%jyOq;Pe~^!7=GxI^jFpYEcYoPd0t_N(a+K zg%7fru8q*aS?T2?ReWWL)4ewG?Lh0V-NpH-c}y0~&vB(w*lycs0{ITFeBAgWVRpsc z`N`0nK!q+yx`&5Y$mqVZJ;S*p)#3rI^R|975*Sn^7l8audw!hfK`@)PsS=mA=s(2r z?>FV+kpJ}az1ig3yz3*oY3fQI&77*e>)jl$ z=nsKp@1$GRN3Y3*-Bl~osq$|Dbkkn+dRq+Z}N zqSIyvJk#LF&5m0%5VlC=MhK@ktry=E(Jf>swYnGAicD^Hs5B0qkj2-VvVR#(Ctrl3 z!WYfxXR}1ObbQ(uBo#x+d?6z{js$s_URyIYk#mY$(_Y_;$WG>Uo~QW-;gAJ;i})lj zgm4>f)X6>ZwVx6ky7TDIF5qH@@j5c-dCOx~DFxUrP;n(>D8BmM%u#2Jq;h+#kvQxY zO;a*X+mv#fB%Wl;kNbRZ*q{yRBbQ`u!TUhv=(QJ5*>~EJ+D9F?zSj{<&tuz6-+5Zm z68Zy01-a^rp~^|uK9LB{*CSJNi-kdd+28_z8>AcxE50ZnsT_O9!#3H zmo;Swn#u}B_1z^!!}t~$Ie|I%$FddTG^KJ_!{TE-K#a#qe`e6b6_R}?u3i^4;9<%E z6?>9tm)QS^gnxsdpki|gzx zD<;tkte5&KF%A-S8!s|d8buhm*y0&TJaVQ`v!PAeM~|o2L#PE&H0kGV?8Vl8c9r2@ zxSs?y*4Sr(imrEbb!?_*IASR(A6oaW%;=+GM&WBtqzlpficm}fVBew7H<4v!-C%t6 zu6Rw$v=i8O@PK`XpCVn_v{x$gGMg``w|eNmX1d>hU{Vd-4F!CvD;s#-zC!H3DI?l@ ze;hY1Dkyg4I&lR)_Gy5+p#IDVRp?%P830}ISB*n`s<;RY z&0sR}S3W6ztdDS;LhfN<73Tlk@rKK9enprGVTp}xes|^&0mo?XM!O@+k4`!vxx0Rmwe?lH^P_KvN|6KWmkQUDCmxk z4TzFff{0~TNzP|Si8+BDcvsN&bSn}@!&lQZ)3!=*9OvEOvaWRuwxl}1R--qv)ned> z`b20^|IaWHjd|^7G(P*b+1zaTR}W+#h+In=^G+2wB7=GA)PE$@ARp&$99F5zEFC4$ zd^+FkFFRKCK3KwrVDe!YrxN|U@SF^Bc#g-z8`9Kj8NKldL<$euEOU{|9$7iWiS#u- zr+YVlz7oLT?gCiby2GRJFA#wI^~5pPUHm$$0|#d$ncAk2)8IZ#6h9E~9PSNXhuE0q zFlWAf$zhK_Zyyl7yt6eAcAl9RSFg8vyT|M3-V)TCC^jbQiaoB`6p;!MnH%3|U}$0+ z)V_mG`69nYLD*~_%}=}LuVGrYtip!J4WXBzwn4?EDDJYGmZRQ60-Zzo=!--=+7{#m z0tw0dM=vT!ui+MWg0nPK+Cb0nStU1=SHAg0RF z-9J>D6K!+d9Yf_K(rv@JHl@DKEQ>37+D8<0I&vvOt7b0)(O4OA_?1_f9oKp^WfT>6 z6?iqd#P&2KcAO>P_)ZKgapkQsC?i&(opK2vc| zQkN*4XIRI^5T-qHPyVN$IjTVi0~uSnWCQwW+W>TIb^toF`@L@@q|Kj>?5CROz88QT zfFmop@Ab>=%U?}dyX-o3Umgrx{`z_uR3iy4F`iP$0@^PxzvPE2se2p!K)RZf)cx(Ynv&4Up(hLF&%acAERCx?v z?1-dbJ^9+s?bw7YDI3XU>75W7M$8tR7WM|&lB5)ER=G+T&vd%3^gQTXbVXNf^Hl_& zwKlLE`F=Bqb(0x-h9E|R zG3EhShg=j>IRX=tuN`T0vK+l3LRtm6Ha}pbcxbdSN8;_)voJ@O?2Ou=Hg%h_YN^Rt za{`lz&CnQ_9d53nQ-vHs@amS+7p?132!cj|w~o z)xo1wpvMl4_qgVzIKBFfIe!RiQ{(R4AM3Tkv^`vk83eibiEWi&Km zdeBILq8De*oY4}+`k(JOyd#7(_7e`2$IF_2|CWFRt*#t#bV6H*RxBM|WSqnUS5zl2 z3_rG&E)qHp9Fq_e1O=)~Bv$%)_Gf7cNJeV{{DUPs#{pDC?;uMOxS~w=hNbsRyw(|I zXQ!;wsdS4}`8MbPBG_;?9OX-_g+}|m!Qee=5Gyda47{5JRSxDC@6sj$GhZn1^5uit z?U^C!)yyiUvb=R7brC|f9v>yB1k3C?Zj$VUd09TF3E@{5MuZXDjDsbXK}6epq#vZ^ zJ51BP1&1@Ce)hNyb$!h~0DfYiB6hLbZ zTw>)#TPIs+i$$cs9!J~A%sKn=;a~?_5y$3rpa?9Ec+*!eK{#D(nGNEt`3ycCv73TT zUodR~h=glF`BQF7w1$_o0qh7IH-V1B>c*)vVhz4+eZI z--?9?!n+nY_x#wn>zX{lSuRjqKZnhwh~tawNa*7w2=&LIBWUUXsxu3$yWa;u`^lrF z;jx;UCfwarpl$F5Z?c)U$#!4vld#_d?+&%dD_+r2FM~9`Ha^G6|Ger-P8aqosIlu} zvH4=LfpA*)=STGb5Zl3=rcphn*CzaUwIBOZs@D%fyuGVuT6AD4Q1{Vhl;^(kEL+^f zXRMte^PKjZs$$PtX*~|d_1d!TobKG~2#9Na6-1th_SXZ`w%vOr7VZiYlU8A@I21u* zKx>S1$o2e+UMyDRL1lOaF-bY`+LYu%jNCf70`C^u+~VMg+v?)tAg@S55MyzULpMM* z&%-dxXdhzWfW;i14Mv6ahfwx(6VH`+ycFfv-|kv2B%v*c5gP*Wn&JJL*>W}P#UHa` zxa7o_0;zyT&ioyaI@_xZOE$uMqtoTW=i}^%{0NW-8Yp>%ghDhF{M8ZHXy;kJy28Bph<23Ml~fdMkI?D_cfJiO!KAHM{R8h3vUT zxJqkM;E|mEI=)RinfJoe@)d1;ee#s-wfu<~@aDiZ3Z#s%)0jG?%^|*JZdpr`??Pkp zngtI6GIKtX+Un1sGiv11pHMpjH(&>1ueUj8Emz#$p)l&kBcVMo*> z1CF7g5xK)$Kd&a#by%8wntI6U@zBRhgAzH>Yl}!e969#E4P56Md06M2uGqA`$#KTPTj zQ5)=U7mv)Z#_gEAhYh3Z;Zi_QJHw1W!dk935H&S$xubAr&~yfi3A-!tk1Tim^v#Ta zPhVQ^{^xK|sI&TZJomouVB#xzt{E&rW$Y61^9krDR~G%q1QV<4bM{+IE1V7=eloia zhTYY8mTRH-%H?`tuZhd_g@>XXCi#@LbzgxxM*W*wh9nE0Uj3r#?RT2zpKq+oT6{7$ zcVW7B)?V(NsN4b@D#(h5*~}xf7jQ+!{|?|{_6gkZ8t?|x_LKH{py+YJ(up+&EKTis z=MlV<HU$&y-} zPE{SZmr^5mzn0b7exA_u58w`j1wdjw+KmM`+M+r@*euw&fO`Y8Ao(jLR4fcS!&-ou$qq>cUV`NvtMCeRWb9PV>BCueP};3C9RyhJ0GPM)+j; zNo*APL8~3qcAa zG@4_ zyNm`0D1`Yv-9|OY|8;DwCcQd3$wI&)UGBlyt({VvGz_H_H!hw8EBq@^!}(}K?^xpy zfzXZk4dO;}OA@F~%b>p8dH~1nT|=nf!vxkb^t-tv4?0HHjZ}}`1Y={MT}Db;0c^tW z&`=YPSBVmrpq=FKpu-TwQjH;+Twu6YCekG-S^hSKNo`J3*DiHzQd1!7-z70C!{55mc6#eH#LyEphw__1?Z8cXuh?-c!-vi zNVRnWLJ-IwLsKn0!#Ouq3x&*f>*aY`U(#P;>PEp%$s89vr|f-Tba_JPSMqKUPd!m% z!pvUWwpj3Xj|DuwOtcT*Z!RaBbL5AgSStA{Z@&8YM8tnyrul0*%s@e~;?*woCm$)l z1Y4&s@f>-I0{}JCS(Lc_I#(@VeDH-k7BZ0y>_`20S8$Fx{Gss_OIv4}Q9R#RvLT3k@#w=nBF0W*+-EEd4l}KG&FN!I)5cgGU?|Q;_%jdNa!6 zL1y|i0U=HDf^x)SVs=Hz@QWQMw*BC_?eWrZ$+#6~D;s1Qqw^`P&nk0T?2Zqbzrj1V z*AX^4unu~J1WPDq%x!-UYm}CTPAhteGWXQ138N)SlU`wdROE%gKu(vajJiNB^bU`k z?fc0${H}ZpAN-c{bnPpg&?xm5gCnyYS9btY%;I$)ww3d+d8=LU9f^u8d14z}=WK4a z58d4gRdn5>oDa3I7$;4hUS#K?Fg*K`s9`0XQ$`c2N*|&sG{zLdNR@*3jFdurpCO#! zq2sXzn&;=`&^(w=N~DX3xQsPZ`%IRX!MB>PxOeVuh#$8e3N5qproSCo$b@_zH4~xX zaa`%?>0I+99OTjvz}_Y)7NTG$P47Q4G27uAIC#ev!Evjgh|}0Y4RKznYy6;_Bfi%#r=hTLyy0#o3X~U{|(E!5m5K!nttDI7&UIRSr0i|zrw;a zF)KWe=2fzA&hB!_B!=kx4C1!y%Nb73Wv@4A$&O2s+7@%7EzU3-05<=KB^q`L(1Ml8 z$gnaInDH95YV0t2U^78HIQ@g!m1+%8{dmLzHbd|*uw3Ea3yaxOw=Uof%e*Hoh{-c_ zlgRRU&lb8Dzo%Q~;(Wu)@j`EPK3%OCg~lDDHMGgl%5T{{wwuhZBIBecmIlIY+ndG% zTs?YUR0T3xMlv^YOPV^z{)`7MQph&fKdBeATWM;xsm5x{258tn#&fSs*_AVwad%x- zc4)G39S@4UTmU{-wIwK zp+0V4z&o_9d2-)5unzCnFKPqVZxb_j4{Z)4b6vwa`mSs&lE10@adEF2sBgp+oQy>p zNHr#|ob|8hU|lFORF>k79cpnFl=Ag4r}e)@(Zsau(9Qm08)u~lH2VF=G-m%gUrA{K z*ieshdvB@9{agF^xjrbQu{ngxV;T_$EL**j;0jiL%%G7J#wjiiTt0wf}Km0zfZcjgtV^4PvK zJ22;i!IY-VS+=>69XiZ8t(v&+{PJur?fnh?KQ~yKl+k)rN^IKTZCCrsy~4dG)7smJ z;-`b2f+i-WpC*njzmMXXJGLEdJoH&Ne>Ab&$XjW2sOGsE_I+?!`+&t$$>mo)Ud7KI zy<2o*F88=K2HHRz@APcRl$gx>YcHl%&o9i1M?+f!M{i_WR{|sCr)BOtj(~kOIeaNU z#%JYyVS;xzbX~H2|09-Jj{L9QXuTX!o`KtX0i?O)H2V2Xli@nB3k%`AW-^!#Q@gt(~WgQ3B>8|@GSwRrn28M+Le6(M{ zsi~xSL!{h@%bQr3G_Ab)u#DQj6IE^? z!$I#{JoZ8h+@wym*c4Bf#cEC%=!$$u(pQ#69+tic@rD+cT*;HRJrhWy<4=mlf~R8R z{SFu6FNW7uuj73hG|8ZgUUE2a5p`X}vVEf;tkfIfCU71V+B| zXj5p0rtY2BG`t~*ef{T~f0H<7Qg^QI>qfg5?r|(EPnp4zLH2`+&ab~rsd$6eTtsnb z&A|B!bENY{FeU352Xh$wyGOW_IWe!GNzbu^31k?Ue7|$Ya=tSUy@O4UL)SxuG#$Zl zP~ag_a(w*2(Kcf?_~^qR{p%1Ltoy6cp6tx^2JD$X+F17^*K~@k2QUv>9|B~v{K?csV7RtG9Rmy=4$dU9r@yb{Vi3k1ufDrdM z>U?^{Z-W?VNR$6kj_fhFA;PZe4oEzf--TI$M86mgB-H%OYgDb!w3h9*P=QAywLeXK z7R1foi|F2FJHeS^vyH>B9&;q0T2+?r;I0Im)Uf5^XO7s%{*WnAd`XRgK zkASu(2A%Y_t;hB)w-;S*^!rPqzcNu#3Cm!KMBZ`xdcfEDEuDpJ72d08FNg;BEs{IS zP4mTuMpx!{ow7dvX8j#+t{vsi;@&UxY9`vwO^Oe+cUHRD{a~t+Q>JyfLnh=8?&eCw zJUJ~e8_?k`kbau-s+y^TA1ciLIdIITwt1L*h^^j$wXm?KSV2XAk$RsFda;%tqN3oa zEo39^vjp{}&3qpWif#DJSaAH%{c_ZNh#D!7Y@dAz;$>Yb@I`awunyfXI!j^Q7av{MqVs zkVS3xrOQXvuiNRX^EVKZO(<-t=MSNQx$aCzX&8XCr-~|nsDd@_ad}yM&R9mphD(kQV^-FJ%Tv);B|Jc zb&stLCfA*ctAbR7q}xiUTkYVN6dY_=YOifY3VRcA=7c_Y#C9dJH@)MLcr%N61ICR9 zN<1SA#^L1O4YKTh{88E7{K7k2bXk`%kr>>kPVo(A|MpQY2aPA?Ow^}U_l?}c3fZ5U zl?|FZWjt3#Uf%h5N28a&Ce3(}F0%zHX&A}eNPYHUpQYeiRHaogV8Xh3|oWDE8rt09=#4&HM^yOJvSo7X8 zICezpf2ck1%YDf3Em)txEr=E2tXH7qNM$pBOL(OrmG*Z?F9)X?avW-^Y~QZIELFf zA-z!FK|1;Qx)IGwj0Z8~_unaptHiVp-f&Bw%)Cse=Vl%A!zC8>sz*dQ z9`l}GpNuFUa}jF~5}Lrsz13^cQQC+QPv-7K&MS0hKj2_lUAahymFWagNDysmDzUh7 zxps#%BfozY^uPa$m)YdN)jlLQqg1oLnzJ1h91LY@*Mj} zj8TTEsD8vZ22de(%h)~>`AucDU4l&wq5d#=!Bjd-v4BSiA02KS()%VI!p23TQHUm6 zFO+u&hJI)O1IR_N+_sxs${BU6{94)bN6{LCs^})yc2*ZZi^7NVc7tjyG{fuVN9{{y za$@<$RiGTz@CU%q+Xp|e39j?JSl6FtYf?ukb?U_kYbn+lt+T8UygnrK8x1x0YnStF z6Q8yEr9&C$v$^&4F&@7B)M~Z?AP5>)!77|$P9h)6DX$|+NM78>rp1)4KJNqDR#G-i z$Bj%&RGLSd-1{1m=xzCizb_}dnd5&3^r~C6X-Qc|6q>2fCqN`u7^%iWU`aJeL5A>1 zZYGAoe5_laJeqx)o(m66^jL&CXG9ib($qC@rq?&=g>J<${#~#zXYkMGJdM87h=2MB z&2f)&`jr!VYM+7Wgprr@-52rmFP6RiXC@w=kP|L!RULLOX*CacqUH0J6Yo~Nqaqd9 zd4u|^B;F5J#GsTtMYZ^FJ^s!xoe~X>19w<0o0d5a%4OIB#(?UZ0s((%2# zOOx?*#&BHgSHce($X%qm^IN?r9%mQmTjqP1goc=`B#0*!P>0}zK0_aTM_VZ{p+=qx zl3T5F&N*rrO3|ON7A-uMLaQL}4oJVub~iw(I%od#n%BQT-g5YKYwB+>9fTG$;O-9+{^=N}rVJ8s$aLHtkZ2ti&O312WpO6tMkN z)loLroew+~=D^`#8QPFIyUWOGs z)C0wF#ip0OJ|!iRCk@BB2`WfjEp_^qj}>IdEu*IF31^5mkQX0>vTIL_zu=*N|GzS3 zYvBQu44U;fq56*maE=9w9sfko5=ic53j$6%4T$7?D{M0slo{msjTtRkX+tw?`NN`G9iR7;8tsvu)*_3p^pGkYB2 z_W>0B5V5^ICZ+S=(}Fm6%A$6EkFb+vsUqXyrb<=^a`E!?J$#Zz{1E_e#l3(&DU}2&Rds^u0;HhotGtocn|8jd({H{Xhq`j_ohbsB z`94r^HnIhsEgi05Dw>W0ZpiBy@3Xnj*Ja>%#o0n*J^beuYK7?n9I2kz=Gy<$bQ}Op zKnp~z>9E-ob>=(?u#@Y5?Hv>aT0 zBP`xPXRwpR;5YW~QRdH+ml){+&k8K+I4+0Gi8t)O!~T2+Z=(Ysa&jh?%EL7FJxiTj zJr~KtP{TNCC`cG`lqP$;u5c^iP|1 zU7h%u?Z}rtn|p-15E6*4YZW2%Y+`^egl3?E36nu?T}5;hRmkSC_F{T+IO7g2Gb467 zVwB{eLlO#Sq{7DYR23j?D9XM*yq0bW5LnC4pxQ}H!#Y+ESAnx_Kq-cr?BvA0{7(aA zU>B~H1_gh_0ulaTd-9EByDbk~J&p~v^F@uz_Lxy?X!hl`M=;li%qQ)}zR!3hK~?o7 zkL2U;VbBEpz`OTQ*H{e+F`u(C;>#%IrNn@8g=p2N$Tl`WL8=H@dM-Rz5(9e%AqjyT zj)@!rt3qFc-7N1`QP0bAm0mwhB+yRF<_t{VZXlQTPPf)f_83KstF++dY`d>_)9K91 zKaL>2_q_w02LJd^{@haWcif0>Y|YJ!Ujoa|mi=~kO*0Sr8(izo#GkvCDwVeanRy-j ztA9Lcz!=AvmLW65|Lm?cfm+<#>}NY~GL|H*t+QA$Z$I6`gExmfA{p2Wz(Gqr&S1Fl z38K2P=0-;3QcIL_b_Cq^h#}p)b~7QneZN@3TquvIe*(gs(`tXbJ0gk=3`!hQF-QN` zQ71-$?73 z0@g_D@sdy`-W&l}jPaKr*dYm(p81xA!%^o|@2`sr2dTEB zIzM74E$Yf)WyjfU=wqw$?%XpOe9&{4+ zBPq%Q`2tRQnFWDK$wEw>4&0~XYv^Eok5&mf!IzD6SSQ0@s+OB(`mN}B^Jfg0!uJ3l z`2dt1r4P^Sjlt&2d;YJLl*ba;0o&Yv@cG0=-{~XYmXZPHymsxQuZH7}G(CUrhrmm! zfLpP51I@JTbisa+InfH%9*Iv$it5*rO0YsDDZ&Y;;>vcI3H*I;iOSSSkr1U1yVG@+ z8Dnyr-4TVA+Du7_oxZf%uo_C=pU;`7CG;WRKvq{sVcUG?2e9&P@J^fI_nYW})mXIklWe2O z5HJ(`19RO84Y-_k(Up4~Vdl>SO1haEmt|g14gCEm)B)H}I~~M|p0n3Dt$ZPZhvSy2 zz$0*@2=T3V3R=+UIU}X zaEGZh`(gHw)^Mxm$=u0^=)tR&ZF)YJEfsg*_3r8QTpbZQi4g>g;FpSv7Otyc92n1W zo3Y*xxQFfVLeEi>jUP02xZ{YN8&$OW+y`$MsM-487ez01fWGAK_nCZCx@*Q }n zzDO-9WZH6#9pY(bKs8x^_Su7{>Nfg-F>rN)a7xM`2`yWWgh)u2T$MB_9X-SIU(an) zWSgP)dQmwFpR2&qcP4VPq2qp8$anSmy85Ua@~=uoesGR~SXsD0wt^MN^*Vtxmi~F= z2@q;sL8ihvnurY}j_Pc3sWTYUCd^)91Qk#;~Zb!O&jlo zo0WCSQ~1&whhlvo!hUyy_`52~$R=obC^R|Z+4sz8iwm3diUv0mxC?SU_z71qmY>$( zIlaZ=_gO`KC!7LZZ~v8GFRB=Rhy20h{S_}B9$9+3ubRHw2(Q;za}QJqaI0)01wbL; z956;g;)9X>6MeKm9L2rOavsT3)`y1k_V+Rzltp;go)q55Zclc-*6@0<; zB$-DpMX@NYm@CBZGooG}@D_xE(Wy6l&JU`c z>7(^!G6iH_v#40F8p&Ygf{Dg*2z@-I|UwC96Se#*XX^sDzx4z@rID7Ar~n zoMtQfJ#(!=2D}q~*)lU1vq?P!ds_E1fQ1#^-FEvi^F&)yMi&gizmu+GqMSHN@uRQj z0BORa&HWhGG^Om2w5C8grk{~ zo70Mnt#vanq{2989c~}%^RagVCo6d+S)tTkGb72<{iuoP0ENlnV;x>35Y(@7Li%*m zZ^f^!t-amix|^1qC*CGTGwhz8`4%)d?W2jF2bx0PWQgM1XZK^WOa1UH@jH3F*ds~w z--DkN`R*Pgx?t38n9KFC0mttea1$CgC35I0Z#$V!oP9R%$WJAMRkwY-*!bR%J*0PR z;M*`Kn7)X~s^6l1^-f&=dZOQc?&AUbaoHtZd#O%M!RH(g6Eb^;jU+SkFNA?0?A1-~ zI`MoT1Td8sTtc&V0b^ml+PFJum~N3&AK}6ThFW(AWU$5i&iSPrfC54zFoj?lFs7H~ zHB#sM_rr1dCBm@o+v^#1Wwu>xynZ=o_7kfVzbmjAP_ToH2O3dX7w#TW=>vR(W< zFgUudm6`ne0tyowMK9ONtsPg_R-!v4^iBJ#WL;1B(Ap*BDIgl}J zqCN9BMUf(Un^2dwYabweHOsYwprlWlSo#aZnM53vC%A$dfR7$CWKIGuTT2KGZck0D zm;#L3DQW7VSYe9YY~uqp(+l?K(Z}K?qZ_(KG-+)Dy`AX2!DCQ%bBQb;h!g?dMb9!` zB?j=IDXoA7%YJ3+G5oNgK=C!N?6d;jvjq;2bH78g*pnp1YbW=8C#2?vEjIklqWj6% z{jSVaT{)|YhVM>*j&J}u(}fn{$?_*m?;Lg^7ZmydC)vqx&k_n*>)a}@SLOqae~%xG zgm?EiurJBb$sWwg{f*L*wx!&lh^n(gp&Idd?Y?Dmj{UxyrSI8Jhd;UB79C@HUj67{ zl<4r&ASwL>_IB>F(3k|$3NQ5PJ&~)nd9bP7ukOZ{U&wC0eHgY(LLTDvYll#Z*!k#@ zPMBI`A*o>c`yqFqf_Twetx;TV6rvfU;0H;=<^yjpru~24B+E)o>M;Ka$PK9JU{lJ4 zdt_JfU#-bvb4R#ukI}_wK0Yq@BA#JvuxpyvRHT#sNcZAbG8SOtJ5jxP^sJRO$@#uu z9Yd67W~eiUwTmz<_qEHxf^g>5ayTXZY9@6|lpI5WJu?f-;~MSUf2X~#Q~-i*{Yk75 za+*c06ImSv*OQrUbG1JThi zoQRg#cA~?){evhjF9lmmrs_?=(%qN*nXN}irc7Mt@06PtqGTzAAWx0IDYO|~7DPO{ zQv+!zbHaQOsE1yym9A+n`FqJs2}CD9fA|0>8C{GU;I!ZriEbX_QQPwKg`sb1@)-cj zD5a?}JmT_tXStc8rZm`VWB&JO14@a}jpPThyVH{FLdnDDr5FJeKbthf5Y3a&gh;|P z+%FNjX-Rx(UzUBf58lhTbNOVR~0{{%r|2McZUz?ZEVoqFJ(`Rfxp z_2Tb>c&%)PM>SgruixhRhTx;#_2%}{wPM0D28kF0r`-Rx8DgeT70p~Nm*zUI#eW|*^k zP3zh|?(2wxzG1S+K1Cd!!HG*0^@S83gU=aSBvnPFU*Z$Q|V$s3sCj*)kUuhlQ zY*ZW=<)zU1nnzk|l&$aWz&#v=7z1^U)Qf|YnTsE&I?c)=Oef=5Lvs>18Xxkmi*oq( z)=4HyQOfF=sc{9ZS*yFP0%{-%b#F1PVUc>5mFj&&1F+A}#GMy6uGWj%0{q?U1Cv=iVGN9J)?a?y&U zOy-AEkP=y&L{UZ83^cEK)rcjhK;iKx=*QQ(Fd7Lee5B8EOTBo~LP@#9Lvr7;cnt># zJoq!VSY2N5s%D-)K57G|Ut=-~_<5Reu6|zFQwz9 z%2>@!u>c{Q^;c2UJCIWynp^yBu>9hyDRSK$xs%Mt@^7>CgF0p?Wh6*@ubE7WB{hA` zMftNuOHvUl`TgEUhFD7Ix@vPCtFa~=0%kmW0QRH!-Bew@uZVlvVjXCZPOP`xHt}GH zY>e6oiK%Q<7tb_O-%U`0Quf}%>pxmm$@U>2_Qd_gq$Zs5$eeYn8zaY7yDNw_3AD-F zj{n#(L~@|C1zDcJ7g0*w!A4isPRJk9VkL{=79+F3JItHCfaoxTd{EegpJf2=IJRUt zBq!b^JW-=w`!0XZl&C7G&+v)NLB`(=%}7Iqta9dT-T8~o$L&2jBB!3jPa0py9J zuLYIf5{>Mcj)J{#OGoyKwM+lsXa4ND{CGp)D*c$0W6_$$$|3N9p+BCIvhE{*6jAfX z=GrriTWUKD&peEbnr;*|o+bEF4g&?f$=olWSls`2LGmUL?`@?VEUva6w+d#wCu74& z_4=l1temrhH|pa?R`KmsWAB31bssoMQHv_NCGt@}y0Sk=yAY)goGC$A_U_SGuVfJ- z`HV1*4Fryb%zXxiYEy0&U)@Brr?~^`>4p>~GhKaKQ@|zkx&bwA6&&S)o?ZYqEsO2P zprii2O`k_}!F&{L1I)Hh*b?dn@)(37R#gfT&>d;l9pq%6$HOjq_}5@P)*dk0Vn^VONOwlbjKaE9C>`6f7GC5!Z}2x9A27vmHP1 zI~3G(2%Ta%0}L~4N58L+0GB+@|EZVH2ON!0zJ6n}4575dZ}=RDphP7m=5}mmVaY># zF$z6fbY_L=nO|#!{`tNF`@{pdFeW*huA-?CKoH3qsq%0Kg(Q^)bCQ6bc_bOO6qfkz;nMIPZCpb>d zo>n*DP6qDI5~I~x2RH7>yTl+kBwFvX-c@W5Gsn?kBVh9fCT-8W8L}P3mn>(5IQYl5 z!Q;s}WBzNUzKPvkqDmw*t4V!#SB1831oJ;zKndH+ft70>(yU=q>;LYS8r+1ORCdFQ zpPi17_NiNHMbA(H5$>#e^N!M?l*TrMO6CV`FFv&Us?oguvV6S) z@e@ri$3{jZqPERjYV>;W;PoeOTeTj=cciQ8^D3ktg*o=i(9a@Iq)#n8+pGoFullV- zR3@^@*HVz@?4+LR6h2YinDHqDlfrHoO#T2b7X%%?^)KRI3wdbyQ?}>y32xzN%29qx z-5^d->7aNvdwXhcvxyB8f#K{q=_4mqJ^6?PiD9}jenbQ;G-6<|8=VI2YzGXNT?E<| zyghYHVZr-*Z~%bQX6)spXc%#OMII$+o{|3&y1A#9hM_$Snii86ybBJI_>{XLu1F8zI`|d#oRCSOV3wE z>WyK=UIyh?>u2Z+TAuSCUtINKJnZZGedS*s3 zZ}X&I3zy~*amLr4Nc@|oPJ02!0HTMC7)~6sH+lhzIFTuckOTmWT%xu3R;F17FH!0u zLF(>yR=$gjgsRv-#dISgZ(&EQwINUA@dpR6Cu6^fHuK`rOp9I&DhDa7^*bR2-|&zi z=~3oF8}C5ZE|zq57gV$c?r$3erhmwmH{j9G)co$%hnbij9qt`w*{TvHJyQ2=d#FU&0~TjA2@*KW$tZGqAt_Qtej)6r`cOCskv z$9Ki^XxMtoxt{bj4|+rHS6u(>`Mx|5VhiLvKhn{r_U@+9A%*jD4A1hkbC4r67y1UE ze^i+xR)tbTgJvIbv!zQuE$dmOaB6o-Yx!~+{35hW#d{F3Z3SyOBH+>Vo(M)70c#$y zgv7=Luidp3igI;9U>djz&ghxx$^~Pgi zxe$<)^*NbQWBE!IX0pr99`3G7()$g)^g-si?8oTnY35aUx<0de`N)fCjz;fA`|dM_ z!|WGco2qsM%>Dm_U;hzZaF7;$Bs~Zh4!tKULvh1}=9VQh%;?$UC0lfK2R)w)~(3@YNAv*Z)s_n zr)SaF>4clj?dzMTud3Qk-=iRnkDM#nfQ4Zo!-6Vu-_&%u2A2eNuG`Q6U19!q**9+H zIuOzmlu{%KwA&7$ehO^bROrunLm24^QpOIG!UmGNs5v$)DLds36sk>FhvgJtXSN*O z@N6Gt7KYSI7|%kgae=g6?}}o?9c-$R_3Mk1?vug;+-8wp+%r_yulRZ|3}oKjW08vc zpDNHRSO%ms;%zYhK2D#Ll|?5w2wbvGn2y;~u#` ztdP#Gzp@hJn5|~jy|}Mmrgvsq@mWI%Zx@FkX2C7rC@AU3Q&m+}Y9gO?7!8%b;60k? zq+4eYir?jpUX$5i66Y^fjt5H;L};!|mR`T&{mu)E@Mj^*Kjw;^orTy08UB0bN>Y8Y z)SDvyoJJi&Ky**cD`$#6YTe2$)zFuE_?4#PZCYfNg3Ht#R>@)0Rx!Kzlr5;8HBOG~ z2>NVm#B*xcw%_v2k0oj>2i_;5oZR7ryR^aXa)lFiB(Gi1FljhIgmT1-((IvYLJ~)g zfg()Zj|(YsP>~@uPC2<>CB@5>ZYnE$4E5;A^LwzS)z!a6{6ik~htt3PAb*8df-uk= z#Y+;RNT{4hXLt&>hO8dMgE2n<2kFZz!s#w5PImBltk}_INb=QHxQiU`m|7+~)8*X@ zrKOHNVM0swfz!z!%apOxWKqvhQHn4hw_U}aZhl4;~k=ogeh zc6>^xPwf$Wkn<01`%OFg8(W8;Me0SiDm*F6d-)Mdc%9u)hLjHk#QDd?1OH~FPbKsl znYjL6KsO_Lpyv7mBmZx(A{N&u+o%2&T)^faRws2QoZSuC@zMbq${%uhk4NA>PDtTf zu&Q~B@owlY$ML$091+y>um_vr`npci&!mX;zr{58dVnBZ{f=rHL1mE{8m7T{$np5A z8E}CYwqE#I!>38WKk%#W<2IZuntL5hM>^)}TTq?oP<-=TPZ5qdk|ds>?%_ zzWSLrZ+xNtFD4?)(2`oQNVRB-;J`f!7{n;fNm?aEB(riQe(23MVYy ziAq)?<*D4R9f>u6#DFR!vqs{69z;p8O47jVY7Oyu0{-367@@C8-|;Sk?s1r@R~t9S zh?4||6ztf1=~qt+$s(7ixD==)&~?^Wl?7j==+orO%3WZ5U8HVovl2m1+rID9L`sPvuGk$e-i2lLj9k)av=T=?>7+6>^`;M z6RsLb7zpRV6(ePMI2#SSfXIcn3ZTAzMDhDH*%NrZr;;eNpe`! z@^m>r=E@ic22+yB*kGS|5|n|^fd9>y;rr4?M!x@bVM6W#fWBI7hne%>ttz8IQGe@1 z#Zs(4le*?cX`3h7*WdQv5zlA~Uq3e|wtEO;KzW+~+m!fQSV~+9z9aS#=SqZA7e8%C zQO4UsFL)ul&qT0#o{AC!^cpd5zP>%SV4J_dO8wv077Y@aZFS(P706AWLn5Cm>~Y6x zF)Nz)){L6%G8rhL{1cdzdwM4{2q{Q~yXQU>zINwffwKeZVE7t(l%X6(FP&uQ<46S$ zrcx1F5>+R7+55~H%8b4Lbq(_&VG(eLYM!@@O9(4SIEjD?u>43>yJ^V_2uA-|@Wqx3tXW{d^_5k=d|H(G*Gm{?T zs{e7vzCGBl6IgEU>>3)BzCCQq^# zXm(5J^0Dl74MCr6wA=lz=lsQiij14#@h7AI=xt)bRG&5_r*xf|@#J*e2;C?gr+riK z`q@`|JK*T}ntH+6Y4;I0^t6aOQP1ovBUoa`p6h*hQ@=+0K8ELFAR(EVL0wMaN?#Q% zRI1ryK)&ilLziYE;f5j!I7xuThB5O?KtoKzWOww1>W_$zM83pw^yQz(9PQJ2Sz$hI zj)^k%-{B@u2>oqUUu8iDJ$XlKDNYzd0)1y6clGj|IVG`Fk>#IM9t-d@xI|{l@yDnq z1gV;P@GpI{DgxOaMwT%~jxk&~hQZFjG%e-yVHw4`qo2%_axxUy8Yn-o>$ijiTi3LX zu90rHOq@OYKsgZ!jc2Mm==j~fJ6UDuE2)DZCH%c=A^@#J=!iT0Rx{*=E?mux_=XcL zk6b*c6P=MS`!0~zP2-i zOwhVL=en!;$9Mrz6jVav>JK1?S8L#+Y$F1 zuVlfouC7}8U9>l68$!o5{%HavGtBV|?i}17gOP*k6q`q4PkNjQhV(R6ygs5B7*Rn8 z;L{RJo$?~~tty(TCf5}crP(Uh9?Q!r!bhF=I^gQcKY)pGya;`Z^vT8(6|%V(zvvp3 z@a(|PHI}e09`8obs)WmwvQT1)w?9iGz%QU4e&y`^!+YW0cL*V z8)S2s&AH=BpN!8D)JL%>_K7#ldaT;;c^ta$7bUk&+SJ2o4gtmfV_RNeL8z%&)1VQ#5CZHQ@=7cNeN6%z@<1H;M3^HkBjU*jRm@k>7P_PrsZJD0 zQW%s#HE+d@Ht)ZlC{?ga;NoP*J%4|Jr0FX#m zU`}{tyNCkIKy%>meb+nlbyH3L1Kr=JHC7-JE+dOFYs*_d2$?)UWAPH1gPwq=waaF& zt&8ezh3g+hIbU>BHlDqm@2_tS*@%Sfx0RvPuTsZejLt`u zugqI`uk~wImUWmAS^^d0TDywx`MY?h6$&U@D2k}#Aj6ltIB{VR%@PZ4)ws{+xtI(l zqOoxSm1SLhRw}iG-%B6F5<7{4LBBzSI;zqc@UgxH1YaAuQ}{T*o6YKvw+cH>SdcWEOLN~lSmw?5vFT}TNVn;H z1kwNfcuo~G5 zeMnF2pG^Nf(U#F8{VdzjtywkEeQ+gb?J;J!uNEgf37iBWH}r+&A}T)*4A*ly&G%E( z+D{gHi%UAOAzJGK?v-v5;f)VW4853YQYvOLIP>S)u3mL@3vazy--olP4R0=o*A2Fu zV81NbzlY;9BAD^cX|TH5lg0sv*vOkny)aZ93doAnu~QDekPi8AhH0E5*#A1Qxj_`; zOtygS1f5UNRZB}k&3L*s=UojbBRJ%c%zXeIw|er;Zd{Si6*A3eK)VJQ1Mt4anax56 z2P9XSQW?CvcN_`gvlYeBQHsGaLrs_<1CCVp8!vuFHOtB4xdZXR_Am8CaS2d}^|hjI zSl}+Mfok=S`y4&Iv$l^H{as(FyvfL_N@wV}_+q~^@M`DeoikFJ@n1IysE1o`{Ri`S zhsGP+rKd!v6we^kifv2TVcA}{KewL$xIOPTZ^)L96xgd?k7}8&d#RjHE_muaSSxy+ zUwINSKHcWMVn*p%)aCD&k< zk21lv$MnJ%ZsJ)4W#SiyLkFWTm@HFTT3LN}eBYXghKY$$xor_^BYZUEb#$l?NY_NK z&huA8M^j%IWM+HmW7x8)l&~p)XRGKWIExFcWF7nL@Vp}VZyJ(*)AGJi7?rVi%5Vio zi9WKAN(w5OBTy{GH|V{Q8PY#nD4jZMRXAgwVj2u;=rBZ!-_9M8Z-i5(T_`oPQ?^Qd z!4CwoSk)H3DHoNmyXOEjoHi$+07a6}!H7pO{pNxjZ`}Ne%_&F~S(v;_?vE&4Lr9*k zNSLCDF0@&sZAGOG$| zxypg1cD8>%F&oz1OVLX~?3F3w+Ib$_HvnQd{(ty->!>Qb_4`|pk`j>a?iP_v3)0;n zNF&{iba%IabO_Qd-K~UDDz%Z2?)F_lLza5#6F(Z|jJj^>#tCfQYpZJc%rvD7ilRHJ>^T3gFKrvk9P7r#FLw@q#N-8>bl+gkzv`jbEHu)7@KaykEq>3_ z!FcjHPF#|mD55K%2%~tvSnc|3w|CQ$ie*qSM{r&toBv7p9Ka%QD|*VMb@G?E`an@e zhNGl>!ppOi?FSuj@Y2K|XhO}r@+cvRa2$@(6RegK&o=O2yMl16PmPqII|gI=LcnD+ zp4H4=@2p;MyR;y%!vnOL1SJitUl>PB%C)0F)|1t0HSOOW4|~CD{$r{4K&YK1}nRb6y|5?H_)o2i;G8I@?-MIr?_4^vGHH zMFq3xTz+;kN4$k9ApF%%D-lh-+-BQ(dj_*d+rjLBjf-~9;QwZP@jcduZC z%+AK3+hq}qJp$HG6iUVix;)zsdeHLH6F!l-%~6q9M-30o2SHI1Q>Vk~!tw)@`3vzF z-tu=-olu?jk<7-8MmTpNiwtyT$qvV@2M>UIw6r*-&zt!A)p+Ql4K_z(q|X-sP9@vZ z*!yU+q%Z5MmKC>9`wzr+2)3pRevfFRYos-d-+PyD5ER$WVRo$o8Z7xU2A>5Dt@^dS z-vCyHM&B1EVmk>^z8mKsq6H;5YUR?Bn=aPV<`=`sCM#d6xz^5BCu)$v*=b`hX5HLu zh&-IunbkJIfN?Ox>dxRT7;M5>E8%5h!4C|z_<>W`DG%c%1q^486 z>&X#u-oGsbUpeGcHegjvk86V4v?Xc00 zJmt{9_lwi{RacieP6pQzglqJvXkQ}ixCT%U>6?&^o;wx9@T#U{0S+?IcHtk^UjBHn zk%%pa-Ucc?D3eczt3&(|);MA{8$Wl|uME=Vg(LAK}1q98i`6|IR0uU9^huWD9zbqsu4*oyn00@k%8+wH}Gnh!v4 zCSJm(dTxDbXr^`LgYzefR+aV~37>H{)P^7=|7imk9R*SRC}GVr{6-y{lHmd4ar7PY zdGp+2?$sH~YV5I{hrH|ilKI|8de9o3&$7gfSy_B^6SovAM(n6^v zw({Wf@;;JG+Q)V!bG+Ioa^>nv;3Qu$Q)@-n%=&`sy*K&NZJutpr<9@9@aE!hxQ*`! zz$qb(T!Apkd301{|4mVj66@&d-_uvr2Rq!B*@ksq4IIBG8QqO*CF-pwQ~NE}RSGtU zlO;+i``(?Wi^8U9S3wlC#pAvgMlDO$6?WI(J5I=4jA>nt$F(j(0*SvX5Bs`l*4AQ$ zcZ#pRDUk=SPzt)bx?SrGQ$|j1x1G7J;oiROgpBYo`GTy2`7g9g$;#4GG>laJL4k+b z{WOWorA|ldj5w;HYofx0;zOD~Q_%7(K4igFt!t|wexT+1XLSwOXw0^B*e+i@izbKF zvg`r#fHN!izz$N!F)Shct=@7P{-N5>HKt0y6s;5hVm|ywK)Jy6Sf!BU$P<&~(5Om2 zPC)-_9|Eo!tazW0XjM>PR~@Mu6F_2jk+Y;oT~n%5rLW*CBr8Sq_46{LBsgGEBKT-+sh(;p^I9XFdEa>W0|DH_uCWKMpxW zHah613Fpg+CavcC1k$DIboN8!u8G6cr@$0Syrjw^HH)E7wm6l8Z47u$X9B&z#;^1xJ*Jx9>0>;zOf#Pr&_t2 z6r%SWQ@;QW&`P9DAL$YtSZZT;VUoH7N7 zlu2X2e_EPycOJqYyvu(ZJQTOfaj}vVqm>v0vD0JuMgNq`NydaBXUJ)9{f<)O&DCl7 zS*YLYV%dF6?Y#;9@`Wm-7eDTH?7qe#+FJC(jghv zpgD!{p#ck?wjp&IZZNT(_X+c`n5cJL zN{o$lqRkMkB%b3*VKlBRmF0};1ZCoK05o_YGQ#T}jf7;&0_DSg5a_#6wX$)1yxIJ$ zvK5^Al@9P{)43=vCu&+3nk~ROy+XXjpSFPF0#14dRV^YJIA=r6u#SBleuet{{v_LE8W6}vX(6M`&r1diP)z4_%(pT z9Ae!M$HZFC^bHvhHtq}Vk7>`@)QwxUOEas#q8G+TH!-tXm~$FsyJgMF$(T#@+!-ES zD&7xGLd3vjI`bwj)p_*0Jy$g9ng27aD_Z#DLX)Nuf^M#kX z`gb2kE2lcW`*~k|3hfN@hNPlGEKh@yd!_g@D2z{UbnM%EvPDEh)uYb7xrew@cDfS0 zoLPudOkp4!SImC*t#Pr^VdHJ*SXjtt72Q`H7P0{EMW0QC>5`Vy!}4YrOZe{1ck7oo zWb{wjrj3!oT?%}MIW!(pg|aT$gTBVc2YtE;UH)~0rSy`z^3gB$K!Z>0q>`j+ol0B! zAk0#2W5dQT@-iiD595RLrt;Zf{~`u z`=VuFNCN~N8BEn9?2loY*N$#^k+2JIO*d~>vToc;c`a~ko|CbNKTb(8*P4|R3<=f~ z-@D)O6H4@)og;GSMZ$Z1kqKu1rsb2tL88HmYj!gXR87qrK#lyE3_$j0 zhVC0H>DZ3nyA-el?|SEdZv;uKg)!DCv9_B?J=2M7hN_Z#_|s&pnT~FgQuFkrN9}~H$Vf`PeQ`rDP zS&R`cqz~BDOpVFJNrlM{-c?>CrwSfH;w5sjw8hrTFunWe$~*j$K0p4eIDrdCZV?sM zqWNh|))1>QDN6DEi*W^9f2!3P*@<@(S)M7LezldejP- zC7`O-fx1jiZ-Tz9ubAMwP`@FuW&uQhAooaTU=2iu@z91P@kOGL zGto+tYfzX;WN~j}y+aosrizOjeULy$UQ~2p@$t8i@h6@MK3$M%#Q=3uaT@g?@?0OD z_k)eUv!5R7_vlwpLfVj0v~aNM#}ZELQ!@?d=b4B75qy_a_|X2-p#wUU12>C&Uoo*E zw2I_je4Hmn`HK0?#lmcIntXMwB6Y9E-@UPfgo@Jq4zBkr!RoiI8)OQ^9dJDHRoK1g z@irc&I5Uw$LTU=aX=H>8%WC*y`lujp`!wSCUK`ft?*RWdw2`DayB=HOAH(|}$sB^U ze4O9nyn%EbLvT5#|AGzw8nZ`u+TUM)(OM31e=0 z)YYD2F*6?sJ}G{g-|>|9zFfYT1l14{w2gg zJPFkXFiDyj!K2t@BhgJJYLzciYDrU-{3O87O-0^`cZJHptZ-c~ks+k`YN#a3d-Xv5 zy>vs>arHm9$=^a4sN|kg0gI6KT3E;XYVTd-F7G&Rxptd};}1s<;->>gGt6_mu6*hCA*>VMA2z{3~T z(LpM4*u(K%8~=v!@x=q8ejD(9ao{b2mRkSg$^0#J#ghf#jcPT6o}(pLKk0jr${vP; zvtdI*HeKrHxJn3pU+g4yu}d1bo20L7TQDxfiHNoMdM#F4?q#(vANaj25k*2!md1+B z4i_w1Kge?lAG{pfjUyXv7DkQa9?mB(NQdS%(X!}XCz|~au)r7iLuA2Q;N!^J)~m}m zPcVN!C03vkx-{XrtTA#T&xmX+wJh2cE-Dk((ekrY%&DP3hRQH!86w*6+OZ1{(f^+x z56*^Z-UYl_N(WV_);Rw46Bf!lTq!GhT}yOiNsfU)ER!1 z2%A|Y)lZIy``^l#f6Unhq)9cHVF~KW@*}!ARd4NVcku(PBHEjv=V{}=zQLcWQIb;_ z@L$gCXkHlrp4ObU*RT>1?Q8i)qBAl?^y5GieL)QGbae5-q(8_xSCCX35(C8! zQp$V~6a?B?5 zzxiJ^T}*=T?ceGBw;kr`B2;?n+{v`A6RQMBAzOZRNve|~n{sC)d+C#YOi6f@zY zjH$^ww*nd5?lBG41gEKRjD(MRCMP`;+F}vwcVv&C3BvMh3vvEo0I!qu=rPoEOylSp~oKx~xllO2|#6&b%4 zj_W8L$!^~yl`oX$uf$+yLO+#%&$Eopc*O*2c&t@EUnkxNRJh51nw1Qh(Z=qi1g+#O zM^;h_GW3;B*=+O>Jf3la)=|!s2H;X1W;1DKpscjz>N^c?&LQ1CurLAfGSs7}1oM9% zn?!K>6Qje0A5-ccH0`hf+Hw=4nt_5rRmVc{P+&BSXfZ-lJwv*E?nQA}t1{3+)_q+u`^Db8q_ac>#G8;jkZPy;; zzD;iVv0WcOm^=^AZ%d~?XF0MPbP6+YDz_HKSA%zG|L)zun_gXuPFOC14e`TjaP9a^ z)hPMedKF5?Cj*v(nRuU>rPozs<8KZ2T3yTCe6BSi_6E4}-&Ncwyp?|}Neh5j$Y5Z~c*9lGU+aTx|^aA|ir*OYvs@=c?> z>#@jm1pTpU>>ZyQ&^MkD4y!=z5ea~k9~t0|MBKQGvS`D&FUq7y!&U*R-x-uLI6COV zxZvPImgd4w2nn*_z!dNC%|wUGDa#Rh9m@1xa6?b4vY^t(M=AKKl_7ks&lju-PH~<( zL8Z^Z8;U>=ELqN7K(nTM@e@{jk`i^?BE!eEr@wc>44mRwQIY&vFCHd4e}J~x94q@?FQ zmOsY0pIWI~1K&TMFLvlm)06e}&vu=+n$4pe2HRezBfFd(N3tyfr!&01RvN~{pP|E1 zdT)hWe$JeGzt^9p^f&PsBy(>DTZRcAP+mSP*<$HFZ${t_uu-kBz()p!6h5o=GOPmg zsppaDIY`q4Q`>D~@XmGp?i2wsP1Jl;q%%7xP5A($cJ<~{Ij)1^klCfGv=6{5%b6hr zcm_`af7p~?4GLoZt_;~Os>S*ENem(4Ljn2Ei26}wE>{sr_mCm^g%RcC+z!l+D9!r7 zwiHSyL%0X4UFkhdlo#tyipB@*kWGsVMOB(K3|Ao+g%%OY1?3XW9g1@pKH#IsueN0? zzOfckc}W&P8elz#*`E_3&OhV_9l?D|0KR%*!+E9kMVK-+TT=M8>dGZc?#dIAvR#0u%3-4>Rwx|&M}3z2R6;5@T=tIH zIZjA(gdMTD(>FXu`>bZ%$|o8HowzU5s>H#^X4kipeAvGT;N0e%Fb( z*BB+vp!0e!=k=arwt#zOe=o{sSe(w>CGd?0xFdBRnm!qZ;Sn0P@e;e{z^_n^tbmoe zz37=R4K*zLZ3Ri68I}SWwvoZw7imFceGM@w7;}*@&7j$uHpz>T2ZEIxbPMFe^h04W zb|j(mNwAkD*`qO~dUKzyR> za8TMirn%DhNa^JEYq0VSXttq@)?Q79LCi4B#Q6=Gz5_ z+zfgzh5HPE0gTgYh{P|e?TvlMJPd0$+?n(`A>p2ak4e1~C~?IoS26A~D&&Iq1o}K==n(WxY*6yv&ygf&Cpf@b~2RhdwXtadJZGe&@_Und)2g0S zbYR_l3N$f(Pkr#!dAkTi^KTCrvartJ%%D8qy89CFV4xE}{V4^R|84~7CAv6fwXJhS zec0Xb8bLS24&;b$W&_ym&J1q7HZw(<{C+XJpUsbcWh(LB=roX1uq5NF9w7<*Y`6KT zG2pl;4QD#jIGMep*%kbX+$zonR=6oRjukC_ruVOwGs1gw45R!2`Q5!%x82Me7Ka~m zjrXLmy}(N($?SPLPLvuP7hQ~{Sw7&euzc-<)j$mypS&YPPMTDORAwy#!Z_GC z<(`01Q_jOwgFctJ^g9q0q)*D|uMdPM7L^=( zPZm)^oKV1xkt`YKIT5V{ZCHFfH<_%Oy{$+U*bB!!oG5}u#$nrFFKwwRW?aCqLDacF z7jfV7YU;vlJx(HDV3jeDU%2=W$}1{d_!8&wA1s2xi6HbnrC08Kw8)Rz2ipHz*otOJ9Uu$d>zp+xv(C7#$e+u8~c*&wu zN>dEj`XW>jCf!zbTfuMxrR$w{r&R|Scu{aQoJzu8;Cq^+W|HA%9jYhX7s(=(kn2Io zL@tK=kZP6`vCw4)WrS-zQ#vDpLimVI&T@M!or}gZ5!fUZ2Uak`jyIscz}>r_vMX{H z)?-!LgFCBNkj8nt^3ZRHg**f6oUTs5I5DUU@uP?30Bh{!@x}*0t*n@My6yNf>K`cx zBLawFjj20qHq~>ANih-z{iwBtOkwJhS#6TeoZ!KJLfbMa`DzocS;d&R!og5uF1^0C z;5+avL=L)MRMui4C+!zL&(s`k%Bml{9}qovh-Xh)tAIpfCdgxRV5ew>N`{NgC+39P ztrjU28R>MGfSh`OCO5tmm#uO^6p*U7UESXY3arAKMFBa1R ztZk|Cny5I=WWjkJ<*PV=V{6BvEYox)s|g` z1^CUx-d{E9W%>s8j=i{wF~X+rW^s;`xecjHR7NC6q-Bv=mp_wmi!m{ZCF9eYgVbkT z6D19Q1^Er|DYSmr?+{?!yI+MLn2#}>`!f5!Am~4$LK({uTC{IEwJsXR(9Kq_*qfic=#{T^Y01RGEeUYh_k?wHKOf>fbJq3xPcksdRfVBhi z^hfFwUDSG+yKi8Zs``sehTAd$Kl0u*F$ciWLJi`m@Z!{+Od?&JnaRQ&b@Uj1$i!JA z1b8>Lb zCzvEDmgE{{*pNVYAgLxX0uN_`+!7!*U3Q_tiK*Z@Wn6#au-wJDI+jZ3?Jh?aE|Euw z*tu2w<}1*d64b4py=rlHD_1>NF9oJ#QG5h|ixxVM^Shx*=3Z~*9+eMe{ruCkPB~sP z{}vVvJJnUNRqJWW09u2+wwZ3vCk@!-EC3u2q;;HxFU^V;E&zd;4InZ9?22S1ay2}y z2>j>tlio|_WGT#h)()GZjkvKeOesS}Qk+18i>^I7eKaV`yO|*KcCf0?&s*Svl;gwm z5z0TDHYSJ`tf97%=X0R`{==LGLZ9r;elU($V3H5o8~Sfy$~gGV2;nZqMUz6NYFb(Q zer!5kv-0){r^%<8ykK2YsZkh!jh>tmsr-B&i8JWFO`)t)7X=ANxT4NRajzc-`Zpgb ztG5<=dFIA@W1|0*Num$2CBG=%g?T#pvat$=z=*D=eG>7r!tF+>V81%#zP*cnJb@zfowUS$msa?{ zGlD91d>!L8n&+H#p~+&bw@1zF=2MgFDDKi{BAa|`(Y$*h1ULgXaei%I?fvao`soer z<~!w0Oi`KpPO1Q~s0ovjFa2vD7N#!mR5JI-+CEp^VG@N>%Q(;bR5xHe94rh!*;PwK zR0-xE1Z57~VUuI2@-^J>$RG#PEh`(vCmUJ6%}6HjMpbi2EOz^Gq}R}UvWLWe#vb{zPdgG8Vn6cYcYFcS|{IyEnXWj71oj1}| zt(d@cep{#G@7_Zh9pre%&HYqj-_s?2D~k8tX+(151pmbAy#;ICu*R#CvLmz`@1OZZ zas0?=x`goZG-j=YeYi8_K;qk2yn#hN4{D7!K{t|^;9WPRiG)fiD4|1)GaPt5Z$<)V zxLs5V)K43_v4NAk>S->7((@nD7OcarsxrFID-4j)rIyY2pu2 z>kn2aYo}I0(b80T2%@2bCMAGIAh6O)VrfH>dsJqUUr3_W64kNp1z|IdG`N>|Xl`I- z@j-cxn(|Af3oIDbGPw^~-dc;1=3`oFPMSU|$}P3ZQakh)iOLU8eeoL?yBzueE~^rcLALu+5CB58`vfE+ zL2f(8gE|T;`MRL8(D(M;Ej&}iB!(;jE8{VmOJS(?#(%=eWDyAV=X;+!>UhH%*z?Dq znuDP5umn4wTw}(v07I-x50=R`E6mK*3W^7*+wfLM1n^h76KGB2_=sLqXYe0;#xzm&K;azf0&WRS{fvN$Mt5uh_B)3SxJn&>Z7x ztgPvcubwQu606`F4==fr$7ctyjI~~Bwd6VumV$iJ3;>4YMMe8n8m*$XXCnD{GKuEq z3%$2@GYkbgk+7ep&+LMi&I7xcv)QfuD7=ZHdKIsZ~ttF)lvqf9Kx zAaV}VAaHy=qRGfG-EBK|I)^5e?aNOHe~2+4<30?q;Q!?~Sailu%I6T`&W=N(QF5%) z>PgM%dvoDnvc03t#6_}_RWm}}_cWak54azF9k};ff3p92x&N}^9e3*mC>*1+;mU`{ zN4yHWbrj^P?a#@K&K&4#WaNvhz#6qEl4La@*9?o%YUwF!3q-H{W$BY&0~5?37QO`KYcYuIJj!O$tWapc>5cY2hVzdr|)*GbBc~(N>qxfNdpJ zy9}jJz`aFLa$2Ro4hZ`nV2g=eQJxz`q6voATHHl3w&KRypYnpH-Ba6E(hdoZtH)7d zZLVHz2y&*Rq}T;hKwzYkRc51+ub2Wm@sshufL|^EppQLIi(BxIO6eS)DEiZ&q9pcE z1s4L#43+?*c6hZ^XAH^Se_NqdKxdA?eb$)dMWRn zwe_vM7$vgV#G0?iwzpN^nO6dy&#wYACNo@%n{?~oou@T75&k+c;Y7Q0Gbf}9Yf6Yi ze`?2eSnkP|!ok$z?xvr7MPobCRV+k*t3wFkF#`RcE^f|Nmu)(Sui3mapk~`p^-;C) zOF-uEHuF6(Z#Y^8xn~QuXmb#IhB4N)S(b3dDd2=Db0E&IJfc*$vPJFYY!LPVg6`ON zL&hgs4FqS_hQAQBgomN|LVHK+^(8Hwuw$9(SM1L#`-`t@G=DJXbP-~XAcU+wBpTeB zm5W6}1@7`7?-;(s5~stY!KaPUxLo|hatK*o4a22R5SGAD-@~)Hi)6R4p=sJ_pveo( z{@|iNU-F4@_n5Q;{HlxsNc3+U*j%$MIB#=?+;P6%SN7)MGzqv4V46OZEA_6GW!v~Z zdj2(S0jVH8I{oMzR-5fB6x4%H%F^(%97Zt>6_pb={NCo7r(}$jd_^D9Zv#S*(MECdx4F6YI_^JVv!iOnSMk>%fC4>V3tq`7s;2%0mt-@($`N(*)LAEs>l9)2>l%8{{!2r4KK)bprk`1|DKZ&_C1mHbCTX?2A=Nc)(TyLgK1tkN|^paZFky1+RRc!xGM7%Hxkg+N7C&xLcs|NgTkBr(biLx3gS-& zqC}AT6Ft7D`DNnbK~{)1x=)jHl*rddq<9_2uNl;HptQJVEkxgFNB>WW{`fzIM7j=}FHvM-R7#FsUpdzEpuO zN+?I2Dqp31$c`Sd8^xFqUc_tAa1JTf6EV-u6cX)vXE1@yTfR9rc-ne&x8hj>$M9Xo$~+$Y$s&6J)pJ zLjhI_%afTVR&WH@px#1dXpJqaE?XA<)&G%_MTLPh4X}&fV9h3t#7VZ2(ivG&t5-7* zGAc^ojFuYgzVXjl;=Lo4{S)X8pvI1W6uve{l}*inaq(i36#tvt>DSVK^^VQo^q&+n?E4BR20u6Z&ImP3weYO`x(DTa>v~Y{ebojJ zGo=8$6aGs4V;NFpED7;&nlcp$CZ=sVJeBdGqVLfv1J8)bF;V&nngZm%Cr79kD_{Qi zl>L2)G?luySf+lBwqr)Q*qIP5Jp_GgKaiK zR5U59z$VGVU_M3Kbu32}-u6Q8VgI*$wC1VxrlIiv$G-zwFrxf-DqJMjH|jb|k?ljG zdtILWkzT;2vCxJGKB6GZi7MEEef@?BlS5Edn=iOVO}ga>MaFsPfy4wr3ll&GF^e>x z)M=RB=V3WD2Z?*Tvz_@pfZBE4Q6e@d5mS;se{lT-Fku!dL9D_kq*d%E$-fo&(0jUK z#ft_rv=85)EuslT-DL@49#I~dTyvZrsxmhIF$P77i;hTOQd>eEF;YM>`H~8$Ah}ND z56T$6Hw(L}E9!BtNUaQ)CEsW(I{HO8KE;JU-uUPaRBu?q2|d>64Xy;ZzU-?^u{80${!ZO5E-27ihCL*!Ps$*hhKrpV6!7OPR!ti&gdj;w%i%<^?Tjl;Fw>+_S7ZQZy>PhT`( zc>n?78?fL8sSRd`0H~iGC^~$9EQLG{4j(!sBM&6=X+RLw?^!zuR-PBhu zI@d#)6{~p}CAYB9qcEM3fWniJ@&ZDcg$%(-meJwD&~&;?-DeQFlyTrJ?e*zWmv4U( z^st}7*CRR;GXR()DBG%6iZG^)JsZ@$p?vx{ za+fF+;)Az=ofad(_<{Y$hCDN4*IfC-723Olu-#jDOr}9qr+-D8TeJ*bLOXj^BAid( zrzINJXu!c`1-!9q9TvJ+4r(Cfw_BF4x0uYApC>x5w?G+qMdfmPI{N{(f2a8g<7*dq z095^~6SpIK9OMV#Igm@glnveworYLW(ZcKsFevxziX{^f_su$w>^v5z%W~j z7F@{0fS@Kli%WiJO}q=V{WKT$mGzu%xOIh0(@&KXaO4I5%Qi?SUiI44Ph=&Xivuhs zeLq5u53q%L;7?f6!LI3>+vv!z)hLcuzq{kNI-_fWXdG>veBT}QNu@B4a$y1D-R8Ae z$D04<=_SSQU8B80?JlwA8sE=1PAJ>{VIiYuHb` z9nzg4>EL~6*#fWs;gnp1RK~V$)nq~g z5;1xJ*n-o6*7`sgbk0%o%4rgWgSLUH3($v263ApO%G@~|J-AwS-kSiBCKIb#>wuzW zLI6A^29bg!fV$0ninqPkWKY;F2%OU|LF0@#xyXGabd;#HXyPZ#P{8GUGhCZW_^f+b zUY3X{S*!J z`|ed`9n9e1(~$SNV9w2L&Iq~5VWlVQ^Tn4dkRW^@Sf3Upd=5)9k(l9>FTMrpE)7K}ZZosNi z8|}Aa6vlR*{VLz$Y-fSY#pkD)%@Fq+I`|4o9T1iP31%Ep_zoY|5C?J~k9APO55{Bn zi$yS1e*z;|ybJqUD@-bM;2eGpp57!(dc}3v4y}%UP5w<%&?RUuVK~+`Mp(yTp-zq> zl@?10h>>Ji2~bvS!w!N{G8eKVx4~>MZ=HT=FAIRg{WmZ)if2(Q#Nmm_+3 zodLb&9E8(~>U0+VGEyvD5TL9hclpn*x?OZ*y$`pGYImNDvuQUJL}thBFW7BaB9XsBrvc?7ID35>LE~f2|ruy@V z(9V~hspreErhrTl@Rm>V=s@wiN1Qq!xtoD97>cn%)!4}x{2jb6gJW_g&lCx-8HoTD zO!c{{q9LsWu0Zxdxv*gJp%En`BZYY^ORR_>tTZN`Vze-Jn}zmc=_U5PUqAATt)eFJ zZJK(VJw}*HpzT=$eCa6WRBvJBIz*6!re`L_(8v;XXix_|YD>QSB$ zneov*RcYbV%&~636LuBj;qf@24h$@v@j`s?v{*CZ-UO+xiYSPAr~F*%^i!G5%NYeH z!ca|&82vWz*A20|(CMO!O)3iP7FvA3bevhg$g}v%Aag=b{xZuwrK&tcJ#nyR)EgS& z6-Mr5(fsS6%#Km}DCxVe-YZ9#m_GMY)A==6M(?+^rwz?u>6{w+0vk%K#Kb3^tGUGY zyHw(#!HB)-otFPtidhv9s#m~HWh0zS)`kO;mHEBz5{$2fa&P()H@2nX_Lj*#Y3W+c zKN7po8I?gaJ>Olf&&!pkDOIL4*&(ccm&5zx`Y_A@sXdU4|Q0=~!yp7i|OY% z#-GT28a+P$5$J*y)I3?dB=@g3%A_#JHT`V%ojE5d81W!!eMP3mgV=4{MJp0WX7N*i zA%iv{;yI>ZGD&wab(*q}hQ1Aw+yIv@hLz6m`tCJwP2Oi|PYa1oR^F#e@sOO>Dxtdt zB{8iqG{~ZT=QrDq%O4?Gk%zyZ8`w-QhX^;Xv(<=OFusa;50ZJ1!0|zDYsAImJ(pIm zal91S=lOlJbYfKZixrX9`!{IM}5`X>a8S)bccGMPC(k{Tx z;cw)fEaMm^^d7@sefz-Q6>Ba868<(b_s5_d5`K(N|@G>!^ zMLe=PN$ev}YD)IwE{*IyDXI<9kW(k- z#gS0PCL6vczX#^sJpiL}ft}T#AW~$mxDv5G?DLf}EHA?3|9h`{K?pymI%im0=D-RR zS)MbJ_1gYExfg93&rJ`WoDnMtyieMaAgUIdt}I7ulmBVjY^+7H=V8&<|0M^ zE)a#u^cL(0H~cw`|Y27sV#h;%f3%DsCBG&Lm&sh@uz-wHx5MTI@4l;G41TozFG%U9v#S{Szvvr8B8K#WgbNSwu}H--SRO$Nqe~g|@lA1V*E#$gN#~_m ze2(B&z%Yt7f(5y=snw|Q7SAPzzwtR9J2;v-R*$2SGY%tjbAZ{DJUT#!M0&!{;75um z6xG7lJCDzSal#Z@Bma9>03J(2OB%`#5$9Be@$NK@nMcwlgGNomp}jwPiBfj8B%Y^; zjpO)+V(1Qha}Xa?uJijv+}WknuyjV%nXCG9JM8RP*3)%kLeN%j;2-6- z$tG6C3b#eK7Y_ks68$pvQ&t2nH{YQ-wUw@{R>AX?op3 z)1J7`zUxEW2$_vUcNJ$r6`4IvQX?~=>kSVP=oPazVO=TgFu$W~zl}g&UDHY^$q&TH zb~i^(9qc!8Ixj*v>?QtnFgyT-C6Jif-Rh;G-b(5?a=DA@rap1T>nJr0Z&0_){LTBA z8`uhG@kG1Ay!Kt-sOGuMF;I2 z2T$LdSEPTJGp&te6~7d4-B!2vb@zIG)zx#CDcJeLu$ad5KbY)*q8QPbxB9{DbR~($ z3E~m$^c#=N)y(=%_19h!3m=#?A;#kPNycn1YfZ?V>QS!-Kk{{18`Qcg$m?pDt#k!* z-nJ84gT8oK>a;4hq1_Y!QzlJE5%d~bV`-X%XcOTMvSmkp%M3>3;XF%M8-5p412KnN1CZ8luJ?h54&lN)^@CDWzB=ZIMMK(8gm!%mro6R7pUeEDXzuKE&dnVbhN zMU9t&VNMglIDq^vHYyKh7C=uhdw!qdU2Tkw2RK)Q=JtGLPC*tMcRKjLRNnExdv3UW zuko*=>UUU$PW+)Y0+t-r7sSqIFe!!8X!QwAS2QW)ZmoXIkhBu4>S;Ok{7 zL^_-0b$M~TMN6zk84cVxsFSv<;uYNPhi*564%gkQ_+RJ0ezer-lr6!k zdpIxzySs|J=2Ypi401CNGB|<@EE+}s3_V&L7jhESA`JD;J(8;zr3$;YzO(vg-}@;a zOS{d&u{zuMI^vvWUw7UHWM)WVbP5u0)D`;~(yjWSQYt^f&}0qMWVYH>56+n>(e8;Mq&SiwLEiSk*p!C4ce)fP@r{uJ3uN zPBXcwPLEDaALc*XGP4z8>whE=(g#s~&d7|LkR=M%7$K_4!86!0^m*Y~3WF?SUHt$K4h+YConOT8b6TWM(^uz8Rzh&o^b_#VK`tJsR7(H*d#9m)B?dhkpEpMD&(3{8-CWMFglD zd7oBHPpoPlzO1}VK%dTjVy$`6Ygm&5&|o}7l!Q5-UYPG{R$&OmopLWqQ@gQpcByc4 z_pKi!gepa%nDw`h7S{7br*fOBD78lN34Io%aNzVw8A|6TzhGrFTlm8fwW`pzmqR4GrAj3FKd>79!~h~#?1Cy$HzX} zVW|xB79&-1CEUENP77Hu z&5kUwQG~gOG>5GV$18xcN@kEO)e{4E{E2Nn)ee{?1nhyH3P!;$_(B_rPK^wggCcj> zlCqm7VOerh6_l^RVEn_CJ)x*-V9W8pEykj1)Lqvayf1mLqulvvbrtqsAC(pq$PQ2- z{>^InCqIxymIzX$qJ1lv93QG&Dv!ix8xhxhl3*%J&dsQx@`KGHduNo)M4D3^9d=^I(Gru zh|l1G_74~Pw!?u_eA3a+m}Vz;_+I*#raCQu6;GhF;H8u?NnOoF!~oGE-ksB|EshCR zQp5`@DN-pP!grn;um)qP?ISpdr{2p74=CyX$JklMMY;7~Ul9cX>1OB}LRzG|LApU2 z5k!Fj=@99zp`=?%N>aKTMN$Fj7U__B_UO@b-}nE`^Xi)O}ezqQtPMF*KD z^?eefE(BxP>{L|?7*vy8+qU12ks?wqiosv$G~x}>&0Q<{i9(HnKIh#_V6R8SI`pDpD8o~{a?`o`TX}AtZhwlh`q!z7J zX1K78%VfdA|AVj#qs4A1_yf*69TnXfxEgI7VkWVyPkFEYYyZPV`>MSS=OfULq^0iv ze6}-=Ry`H^**0jS3Xy}ZcJMl zg;Kv}L1jBuOldWn(ri>7&i-rD>L>rD2T!ApP2}2gOsl7sB0_BCYZg6ics{2ASd3LG z@8SnY5T)v3VX`;qX=AzNNpJgSQ;#(&?C0gaO@wCC3nz}PWrq9l2|=*?@4t{V?w82A zRs$$^a8metyaCgJX!4u31>Ika<$89C%}Q9i$s}I7a^FAtwkF=(xiHS)u!0wOUD%N2 zDR1<4WoZnp+eni!*US6S{1If%S!pk~T@}5)2#-`;T{Fm)uRv3zaSvNZW7i`vJLZJA`7JOE3>RPq>S|VuL1XrV=pu62>7slJisNCyIE>lMD7HXc;<(wH+;C}aMLc8Yrl2rNZ z>w{XlL-Ei6rHoe+^$%$-TXLJ(H67+-3^U~VjvB9wv45z*7gaOVxo&Bk-W}J7$UjX; z6)TEdTaj3xLnzz6tFo_J&UanbB^O6;trPC^pT5cC+H=g`P|hAUI)ZD<0qS(F+6#Za zqE4-Rf&UThmSx496Zk3$srEx0dqMj){a28zEC%hp;?4OFMtn=nSX6OUq7X z=j!tOq4~23I5lX!CuFjI=5JnrrRAUN03GKp1kq^+Cg{EtU&>fw_G{TlT8DjlJdqj^ z-{oA~{qg(Z{pDJqfac@~85;yF#si%F;^wg8y zzkW?9&P_Nv8fpO+O~gJmR%n_89EVO8Ubn9qCA29Ccc2P`ZQ{u0pFU9X<8tlONf8f5 zs8+vK#9#kemd=D6>y+O;sIkEK{Rd(<3NQntK#By0R;WNCMd8o=){l$4A`X>ue_1t_ zFk?Ad_Uo=1VXaE?Z>z+gzfJ5Ws;fps)S;?5usFO=5X`T>0dhIu9hD%0R8dvHM;%Ft z($B2i6PXFoh}XE&0+Hv>g2&0e;X{!$8~0$s*X&Mx@gGtUf&kEt4Gp`j?!4kgsp1%n z7yy4)CDp5?Aaj(38=J*MZkvzcId8Fz`sb;!+V%Y3QYLmea<4?=7Tw=dPZZjVnEq5kkT$>vn$%|em= zJ{^7sOxMn`GvB59{MU(5FhQQKTl+Os&F5u zFvz2)i7=P{k>`Z75=-xvtEE%iL*vKW8-zuHdOJkv|9oTJ_jT{^#6aeXz?s1RKPwEF z|5O-gBCGQ9xvMj)^ZvxmH&S$!+O_Y!OV4&&=B+0a*B)*xqY+bc-KA>cbA|u;yl4`c ze&3ucKOIlv2E9kSbuZ_4*F2ymq6sBj+#?n>wui=stZ)>L`MK(IH^jj~}!!Q)_2@-FVzPq4Z%uwxUytbj8nX zQI(y+K`xTP0Ux)KFJxC7%F6%7zug<~pkL(0Ywx?8l~NIjKr}ZMr{zE07C)}i#GV)N z0yOX!AR~+wY07xbH9yzGo*|*EG=g(-4TkS7=H1~Aw@@&hn%s|=a~u90Jq3#Sg1M~` zCaso5kZH!=)r4TG%K_H_YuuC(Gi@hfj!_QzU|$@|%)E%4$;(Ofg)>K$%G+kLNC5tPAQvi&}Q3yJyXpMKk?)IV8yaC1jD6rn7$LNyA92r+^A*-b6FL)nUy=&IVHWjXui70~EHS2AFZ)eox{t}mUA{M7L`Fy<0dj%Zz z$x<1xD3#a1Nb&m);Ef}I{O&^BPX_I;kBqNZnt`J1eny#Q|_ek zqsFI!(DR|Ts@E7_Or-qpCEmxEU;0avK`d10WTADog{S1o5QB(M=KYNR87JI;^2^#d z^WE}(nm{!dU*c@Tc!P$@<8=>`)embK8Eb5f_P#!Q1@4qqBk$}J4b^>0w%=RFx7BZd ztz|g0Z@xS>{3giLBM3PFr3r<5Eyg~NH{LZ(tbVb$-Ex~~{^Non+g6VNC76m{i5P9Y z`0$+m z9yc$21Uy%isU{ZYti}#q1XpL?bHc!Zz4K8L>)s=?LnHT%yagpa zSoBzuJtQK-jeWmZ{}ghVkajc7aR8R2Nx?!3hdwRtJSeH}dwxXvz2To%fj-Fj1Wg$q zL8R^C#4A5RgNSL+Eh}5*3)|}?O_+G>O(F!B!UJ}e<9rS~;x_}UFiAC+Jo0U3ldYUF z&5YgXkB;=8m$Y1&Jv5W5{;{IJ9D%hWP&*>xnPoJO$xj#0@9mj$@lBwO@R>Gp61zOs zDs>P97eo9(bK4y95y9mbKIOP~HE+ibPn;mlTBxE(RH||QHlE@5v{B|t&vpql}HXG+fg~nap z~!S1`Mg-ZFu3i#-Qv@8^bJLa06%|=tBb87?HOM0$NiGY)imqFCBKh0iZ)oZh6 zrt(Epqg`{MF?)jPUP9Q7CUgW;pc}7KH_ormek#d~_Fe%^8pnS0qcVa>@?jJ#PZ8!L zPhuo=&J*s>LIYmM(~PMoLUzq(Pd>PCs4+*U`s(sqpCVH)o#%z4X6bUqJyKh(Y%peR6 zuZtHs)?gf9Sn%=$+~NduQC7U*b3F|~3=S47x;s@(BEk5CPUr_>h+=5#@Mk=nSpv#` z_q@mP@waXF%O}f=3E1E*eNI99&#lqc32fwqd*0K}i%AFGie{ z9GuQG93uK2M6KxX6hHLgVXAYe^WG36ag9ZhUyH`dDj(gty|Nr1oGLYr@~mGo@h66{ z%tFN|6n{96cbtrrYr9|?`=b}FEweh=9=EEJ0U}y`OO~xkgt7vhd^@7ke&{2{X01VU z`k<3!RGQv;c|9P_;wNfTJR%GRQwEE%m;KWP%L=+woz)s6SRcJ9axb`Z$aMZa=BiR4 zUla>^_u%$iEqa9s2VN$;2r~8Qi}b@Xh$>gOW9T+qqTK&WG`;WiKv(8|n#YUT-K<+! ztl!aJ&g$uwC#t*>OGPL8?qKJ4er|Yzfwod2>SRoMOv2H&St5 zyO4-CzF(eJHlM+V#&LIkFmkcZA>KQ08WT*>m;ub6{L(P)-A~izB>t5pVcRv z-~+odf6PxfEEF4r{HgPPG_PgR?lOwMRMYNciW^lY@r;#3sLqM4L#md0UEdio2(5XM z>#(BR^AYj;gbxwP+*S=>bN54>b(o%#1NI3N_DDL^JWqs;Tjnhnq`Nbhb9~zeI?2=4 zW-y_u*HGrC9~89wnh%f@1vjHC|DJFnq%*P4!eB5OLauqvl(zvBH!>QXQVOjyl6;LT+`NLR8LEnT2 zoeu8FjecuIv>a6Q?cmj+l!B-7rtc2K=K&^TeFyjxc4eKwiW@W+ka4lhXlgtHD$%Ee zfdj2ig7$*5TBn`U-~Pr?Ji3r7FZOU2!eze>VWOAtI?5JtI8v^NWoT6BI0p^GXIL6h z)Xbn**i3!wbL@2NP+*(PP}q%=g8H|GU(7}f0(18bm!?*7fZ7iFmtn5W)_%7KT_d?a z0guNIzQ6l^-+YP8ZWZ?Sxy8r1Q9pAnbvBk-wgJuEymrC0T~=V~`HG1AwePqclj_64 zmz}}4y``rIOUpAy;7sw}is)-=A3iz8a4Dy|W;1V>x%H5bdw0pDD8`dCO~elrMRWw- zQ&j$`)PkfrqtVv^9hEF~f^=Itq$L$ZyEY$)cOzuqnl2Mg8p2ppwJck>^$RWDxtfF# zuaAi=iwcT;y`PpF)bwG{F4;Gpr*$o%8f^6W_sQ1V7N&9Tkqht)D#kf7tyMp%*tlnE zVY-0o3CI^afEa;E0IVbkt)9asxWt{qO{M^NtF%WSgVv&sgY@qhJYy-Pu2TmD?e&(v zobKuyP1}7%uD=gw;KmTcqXmS9zAKvh=b??K<}J2-+r0ahTTJ9NOiYt!KZozQjH#tv z-q|Rmi0Ht&{zPssb{r3(^ceI=Q0#5dm`&m&pRr7Q=~UJ<(e}evXCv{+k8L9#uXJO6 zO+&&O8RXN-wz&XeJ8a>^jbbjYL^U3(VTd%DztG3*o#|%7U}{C(-ZNDK6xZe67$Cd8 zfPo72dtqoXQIh_geb~nLTienzz9+XLuh0~zE7K&&iQlCJ^+YR1V(LoEIjPIg*g#dI z?b3dQ6bu&s_4*mRmP@3T%|7b9N6Vma4*(CoH5lb1svqWx_~P6HKRw!>?5D99)nFB? z9@jPqSmXVyFy;>K>AOu2%{qfQxM;kcnItF6WjRKw|DNSP;-j(}z2}=!-V;OxQ`R@O zyb^QL?vx;9P}SFm@1qygo9?X7c(OlGm{lRTUEWK!TBt>iezUEtU@%~^-8tD4i zS=1TXns2^p7zH_8F7G7uRqC36-UsPU1=LRIM z^2a^o^F*)qqWTodZK48tr>a~4vA{HH0I=Wr;RExz+YdA+6o_9%+rg=i)aOVocVOH*|I0~3;`906x~jX zS>w^uBO>RlO?>jtMkKkCdh}Qc+(nX?xgNs5Yw$phfx?d7M83PKf5sbEQUhn~@-t_o zQL*Otgpw~hGuG4l6Pv8~1O9ehTBQmMS>TYa;vlFzXxK>!2`(RF|yjIbIcd15f- zp5Rt|$@b?PrV-xsbTV6K$)SuC*o1~cC<~G#Nd%)CQ(LaD&Q-5~nM@Yt2RCm8tr z2UDaTBHEi4I)Ya*xRH5{s!6AXmT0s~o5B9~VspdqF=)O+G{*ULgys|xlx~=gdzTgc z33|SwR>edv&$IY!su;8Ss{E@x+tFPA6Cbsxj<22(ThxtbtClk#vkDQk!@|8Lt%J*f z@}0cojn~uZ7W>N$bCF3wOnN7C29rni8~HW5vf(PvLaGL@pV!V3DkqlhA3`c|+}OT= zy?m_MDsTR0fK!j%CL$^mL;nV-LnMUDZU9>Kxj4XyX)R9eW2_QJ*QXrWAbhe~bL<+O zSgwViE9+UVRQwSz=!&h)9I4P)VtaP#E&6T;;OB6?K^&_mfgE!Nog>| z-KToxy*Ry+%wn;k8}ijOh`e2B8?j+ERW9FOiY}E!)Wby|0nY-Rv{3CH%EMD)>KNs9 z!cwXMB(bHQC>5)UzsT&5Pm!6KHNx2T+5F2Z@XaRsoCiQEtzbd7i>jzRLoG+()Qzz) zUC}1hzCG${MQ0-_@MzNL!^HD)@qRss!26ts!-A)`UlE=q?$ca%)pnQF>1;75mmD?` zkY9Q>jRll8*hGA_B>Le%TFs2Ci@gRQ9E!t;X5zyWXEVOoL3$hv=D=YIaDU}|*eoh; zFV}$F4QGeQw}Vrm2p`#Qg;pOpuFCLGV{j18m`u?*rW3m}fg?b71Q}5{KSa)5hZ7ay zsN-;Y0M5(4PjJ<9FZBEZxOp zr~?)rK?jAxC{_$t_v)A5V&uUfH5%)t6W$2vdthE{crc+$YaE{oo-_miP2MUDgOsP2 z-S8wCF{<(N{_LorQb73NGjnA!h%Niay$RFt{P$2KO+9~(QqfYL;bdZtljueC73M_I zJF(a#W2kHm*wo!dYK#lXvJvf~rCTA5>)DzrPZ;>YR=RhDTE3KY>s#R!9rH!vH_P^z zkQVbE{T66DlIRGVM&U}e1+wl(iQ(I8ea*1xU}<)q3mo(Bl>=;!2s9H%JK&tKiI$zb zbJt2n=~D>XyHF&&-Cpm1+CEbL!2OYQUOE=p8}1(}j@NW5rl1(N{jA7E;_UlwZ2dRds0_{Pzc4Pv~IdX}iS zSNU21OaYhqmy+-2pWxnICx>+Yf`#X8^Aw8#c4`6eaOJW}ab2w6R*tzB zGjt=`@}B}y9@3k(?9zl>J zm-$ENm-d-~M<1LDkDA^b`VSwlNPpqIb@yp06j#vYOqnc*lR+DWT*R~P$M-oZ&UR?e znT7Ia!1DXS3v5nx3R5T!2+*S2p~zEF0nT`LkwZh9oj3~|7~7%K{KxX!;vS6n*eGbX zyZkusOVinEMsi7TX8Bu;4V+@4S%MMWxUUT? zm2@v~{+=9xf5m~~T+x}7H(rjH)UwlZt?w>!!T0W^_GMP1J4My_vxu{Gta9-R?9g6W9QloCN#uC0hMWX$v>Dftr);z0TI z6gVLqleIG}MflHeNP>w|%Lg!8mPn;MB>a41y-Uiaq8F=yjUjyvEVNwP9q8#wM7@U# zEbq+!Ie&v-=^!AJPeDCTJG6cASKsNi@cfqI@1I_zaB4V&^)8Wf?_77+UwMm5=jVJ< z>^s_cOcB)ivUFE`7cEL=sErkclob_jrG|FY*1?DytFu zWx=3h{m9r5a&aai{#5_XdSu%Eg#wCUBujPWSXz1+5(ha<89CN7-;4$fVgKB1d^Xdn zOcI=@nA>hI4pZaZtp``S>iz96Dvy$?+F5A{Gr^-TeCS&AT!fC(Y!Qrv(GG^EiSVqV zaR^HMhdk}IM*@k@sH}J_PALy3^sLfW?3JgN04y>J^W2(inQXBGS`4M76aYk$!Fmii z@GAYyOR2}1zl<2_5>ItWn{#ph!5ncZje_k=O6R>e3esX|vi+#ieq5mh zxRw=2+4tc|5Vg@xEi9u^n7|Tj5&B^KkzEEX2Z(s=>EqsfXp#x_K9J}gl|m~zJ+$PY z-o~EpXhNE$TV3*uX>cuk#|mjsdha%MuHLch<_*RM+Aat>h8pIZstBbfr@}DD5XUzg zek2{wu=tmwIhNDeHxcaj*NMB5-QaabSR+jR;;+XSPIhx{e9EM;#We$OO;}dsn2;L9 znS@f#O_nLjks9-U;Uw&XO^4nW#CLxG6xjlKfRoN&RiVO+2FnPz%b}c1xl>dHMhxoB z7OgqQs;-^dij$_ElBnEg=BO zC^;%c*sXudgD6OqRil^j#TjI&uu$4(&sf%jWsLf`@_AG`|;k4Znesk4EETzwD(2?7yX-4nV&gM@JroLMGRa7=|embow{n*I0!Bl%H8K=3U~0K@G9_tyj;c@R6QEqX?lG(V<(Hqij z)n@eXx``_kvA+9(zUZiCu=gN3QF-tgXkCAUe)GFu@*KrM)fZBEEBnV#GOzCvZK@0b zQUcoA4x|*p4P#kKN5C8-MvrH-SLn4_*#KqndQrWiEl%huqlj`7B7PjP@`rzbP?8fO zfWPnxmvT4xJjyGN;?cCllzAv#Y@o%ry`2Ih`T*6(>v+vnuHHS`I+L3(*9qFA2WL|gp+4f#OyEKBucTE*#gU!P8%R(}3meUf{a;01 z>c%{a`_*X6~4rPx885(ewC%+MOj5EUhM!6#H{OLHRd?&{VoalJdFmE&`LH8c*d z28DIfuc*DBN2oHZ%Z1`8OlL-qq4!k}=t)qsAQG+ZCVl4b#gUx;I<3o4DVXm#9Wpev(V5-bFM{r1OliW?s>g&@m)pQKF1{reM_n2jWu9=eWWUf76^S*GCrkJSAd zdtFuU&iTMsk0}Q&t{pU+>7-vvmYn2`AU0=Y^E8?uRV{lYrnOh95}Pq_zqip1kX;&M zt~kS0q(XXJjrqRFBxPr*xA zn)MgfEmITO%+enNV8m7+u__S2v{6M1;ZdfsmO~5UgXQ)DvI7>oC-w?w)UvW3 z@LsI8M=#g^epQ5O-bv3@2r ze@EM-2S?Bw-Zh-VAAa?dEy_xr2K`aq_ zIl!^-{2Lsl%KP z+zxdM9fA$Al%=q5KE{(|`g(d>VANUhVZZ{LhitYNN=>%kdux3Ai>=)Ecj^BWvDtS27sK}GYWoO2d^{78=A(48JQl;={^T8Y@ zE{Huqb98aC4C=lui{~!HNJxmwQ+FvtU)n3z%sLx91Rdgcpz6QIUsxyze+A{`oJM~RVc_xgdj+Pg)6RexV>tD6`IC8}OO-3C<@G12`6ZNY^{1O-{qu&-TiRQI zUMyQXz!pZnM`W}<|E?}O$Dppf-KPnm(vS8%_VKbg)z5d%e^hKwD>J3Dp)~b=qOo?) zec&;W%DsCJOJdnffd=maz?T}`4T#>a?hD&w#8#NtD7h3OyN~D!>_XslwHZLtpvL)& zjfetDEG<)4F1s<5c5$6l4wJ!}9LA+)(OJk1+654wYGUYe^A@EiNiLp_K^;WY0eXV z(5JjK%x9h@c(zP=InX?uM8@;B`{fU#tzn@m$GRdNqrRq>t!Ha4u<>z%7hMF;X4!q} zdxcLkUm6fz@x50XzCO#izB^1WvjAii%0(ar*|jl(2HUL$(B)NXa%C6}w4n5*UKvDq zM*TgQgfi}_Z_y)tkw=!!X4mPAm)U@_K~?pkOz@a~xwt3{{=!b+`t~X_9<~P(bF6`N zUi@p+P1}ktLL;4$^vR<#+3cEL=2C}pF0Qa!rXPtpY1g>08v9|t_>R@`8B01oxF6_ zVWD-2OIGwadG}H>khNsZF!W&FkBDhAsKJtw@aW=zy#3-)5__Y)xb6&N;nNsxAchkG z^}Wnv-WO#CEFgyOO{WcshAF!dk?@a@Cc0EdYE7Fa35!}83i!PSe{2b+EKmFbj6tW~ zLJW*6S@&Siy>j9)1b^JUD0l2IZ~N6POx5v^p9_yyTIJ*!lnAW}k@HnDC<0lyEt1{h z9yd7MMe#9!@cDN^iol#&ikA|yzVIK;J>Pw*a#yvqG=`83Kzw-s(tF257i}bYVu6`J zfizrPzN5xcqB{A3WZ=olbk)cXekJdmYXz9>NsBy}v@{Rbe|B+w*>XL}tb7?jWF!~3 zSNRmfu^TN*eT#tRL-k&!sQtF8KxdvUxLO^-G&C7}LZkd>T0yGHG<L&yRF|Y2QI@vqI)NPIh?s`5|#E`Ex-j7D^)o^g(Z5SYCntUpTrnZ zF5CUNRrUcfCEb|T`z%xioDX^vFf@eEr6<+w6K4-(37o6R>Hp=;f}V(J9T-K$r}9z@ zN1Xn^xXW%2im#v``$Bap-+#eoC11z5l#KAhus|~TV2Yhlx7P*dL9NYYYSHa0q<$|# zhO%m!)@%we{CVVRLV{Kh^kHVw5sZQvX*R6y!@>Ig_F|&5zynVZM_s}YHkIPsl3sEx-Bo~o4k4viB~hxJ#LZ{W;daj>SJ7e%2ptVLKq<~khB zxhIU?)o&@ih*(~4>vWhtxv~@Ry4dS6y;*g5VYmUB>kM{NfyDCyv=M}$=HCUIl2+4y zW=_5gpmX$Y$~{4VQT<^uSKsKj2IB?E{q4&R>qm`W)%7F;gW!(~Vd=fy$ z%Q|iVT7SbXY~J%)6&X&|-sT77ncAgSrvmLY41i=&CIsUT-~sJWNJRY# z!eEBiSC{Cq1~yHv{c9x{yk=uRL%p(9hba{98tvB?Kv|eL*;0PO@tE(i3qgux>(lR@ z0s%L}9X9p?+e~yQ^wo}<#@wyyHb*VA3a&lyPbO9|gKA}v+VExkTFr6K1VkGLn1b37 zQ(eA@v*DB1=xlG-3|9$kfBR_>nj;BL>qIPh_XibT(jV;GV2O9HTpJU*5a=z&ts?Hn ziKLiq6W*7zp?Nxz{$QuP<3e|er(yneR#E(ciWCD@jsa!fL(*-Itab+u8% zCmO;+&H)uZeOTK`U?%|Zx+T%x8G|oL_S7<59SR8WI0Dj`)NWAfds(V_Gv`uXr&JrX zeQ+TQ)ozpuAO(CtB(!(ARSrPyyMjopJW0FV*afP6#`r+5FzZ8khUXu(nVbOgW399s zZHg~R$PrAitz^8NR7He4LmR*V!trej7@S$aB^e~28@sKIDLZGl3n+R>uGKM%7JW){x3Mvzx5e=KKG)9f!*$)G?283#6n zJn%YhN6=le1cY%8ZSW=XCRn7LpRI`Z^@@iRsH7Ie;KH!+`+fZr{eD6j@ky+iSWiQ^ zT_WQxrGN3uv)N&~Bya9IDDRR1(?dfUT1xiIK5g9{-Y3JU$*dP(Uq?Na_udi6Bgko8 z2d=>PB5zsI6ek<8g&LC$vUJouGwEKGYfL>e$zE$(4lK?7 zlPgh0LTc1GdJ*xRUCPv0P3EmD1MvKom-k|YuVj8!-dgquP*2N7sK&ESpI_fGBM~^C z^C-Oi3b0%&JQhP5x?Y)ptyP2D15k*vl2})E?||FmSm|EYGpdhVUv47_mGcn|TD@K$ zVvX08S_)xIM;!<2>GXUnAx-g^dqD4tA)y(vBe5bP+%$=1Cv*phD1VASHky*dW_y7< zlqwdHXZNdS;4=Mhu%}CtK^CP)eUf?&6g;4U+F-oG zbJQr;m>wLK8UzeSZ^{bnFn2)f2!`a_!UFcEyT!_u0WL3|4IC^et=XD>m7NFf0|nlA z1iM7B+2r`vI3dG&k`j8bmX6N3S8k9$?;Q-{d1Vkri~U*^`q$!bj`Oy(Fy#%MUS%<= zWs2zt!b%mSj!Zdh{WwxV0X67%_}E5z>W{7~+BW!F+qkJa(eg)xMZoc!H)FdsD+xKW z9rbffRR>ltx?o>vkTSfE^N-AV<*%@AQ?;|9B<_ib>h6nP6N1^<+6aDCso1E~R z-s;R(a(J(eOLFHEVcAOk@-V1hMkONlNoLo`+4>obEku3JZ?^F1)=~*VIm@Pa-X|PW z1yF-A7zihDOrC={@4eLN)OMs!WWscv^vCI!wjH($Sv~NzROtX7pSq8{|7Mb)j7U?; zQ^t@^H_<-lILAVsW1XHz@{SLYj@Y6Ob6#4SPcbSISAc%YACCwnX9XQx%6y$BZ#TZQ zi2*xAP&@nSa^yNUb(~3K_2<10RSx>z2EQny-f~Wf%chZKSSV(|1tLEIlI@9|AspYV zL0TBegGkV=H-iRD^c_Vp`68%PXI+gFx8EgYo8mg%CnxJORYx8 zLPaGO(uYI;$na<$DAfjWLmL3~hX=WFOz&^e+QT*OZBkAtnTd*Qf9h(wkHMRN0^48i z>tpV*&H6mzgD+p8B6`Vl*d8|m*#(YnnzXKeyG*KRUfm107w9!UEojnt)Ry6rW6L@h z?};CODJ_y4?QWWaOOyBUts40Z<#iJ5PN7fxUD1ZUC6>!is$af5n{@AJdGAcBP?eex zd4nvuiZo{(AkVbND#ZEGkHlC?-^Xr^7wZIPfQlW==F5|BqT7mn-ia8rDSQrQy34W$ zY`~HZV*V2bK>|Pd5xnKlD$hr5;LTihd{~9s+mU-jbIC?DID8=9vz3do#e}E!#_IP`0dESBiy()JxL){Z}ubX`(Z6CX>OX`9N0d1_44fspy7 zw>IH8yf*l>wi-q(uh|=*<8g&Di}vT=iQN1u1cobJ$z)!cMu4}HSvQM?NLsL#nH|_G zL@(d8+!+PRZsy`0Q(zsqbaYm8b=3;4>UU=UMQ8E^H74F`QX%4;1)<1o~GokXPj3J%x0?=?a5mX?Kv4l0>kj>#{ zt1fIkArz!_h;YVq;QQ6L>G*p%;aIfTk(=OvmkeiW=a@lMThk%hu4>!SSl;$Z{8`sb z3aRHsXHh;V@+P~}Rdnh0QIbV6J!)j{TcX0E5v{rA`cQb=NdK2cI&fbnNwww3G_#0V zxuIO3dwd_;Kc9-_p;IO59c8JXSBZl?p9yaa4&|Q1PTH`bvRI3e(?)_rK*tt4@}AFl!ziG?XW($>Pv;$Qa- zE{`S8Pp#;{#IsX%`YM74xoiio`)7iI?MAgZ)FwuB*2BpzW^H^b&WtPz=*um01Hm2! zn4^yQYm4CBrF7i=Jlqq~2J9(po?cy_SXVn_X4uvoLBo~*8t{C!?En{cRG4IHRvltH z=Xc>r65eZ~i1*Lz4;;2ydxYjxWX1&rzhAHZlkwt+R?C)r4sPS92fH!no{j%W+kS}A?6gQ0lzhFUcHQ9EMM*Kep zg>T3JS7NTBvbKE4Oz;seA^9!7w8{}am3W*2oTSfp>z`yok0 z^>Qw&=Z5Dx_4?@;_l{}E*TYGLmL+@fL=`Y!6UFAprv{`grxtlJ3H5`>uME$d$;UrGH#<2(n$~A zAL+^9FzEcV4n9Rjnli4eDsLeshYE&!PFr=lVX4cgJh8prxYV77bc@lx8!sXSpA%hH ztRJezY_vCjBh%IF5{dE-uO-dCJ%yS+uo}g(Zn7AMMo>Jmpbff3vM9;s36mR-1SEkQ zj=WR)!RI)X5zmDaq#=x~pUq6V-UAo`!lFM>1i%f->fJN3Yj+L)umE;l2k6n}+_&yO zbU$2g?SKs0M%vJH()``Cpi7y2S{!>roJ#uRi~2VEHaDgthr#^48VxRFeLN&SR~VGv zj)@*3%pVOcoT`9#h*5xETQOYbjGwXc6rRf=i4BXgS>jo2U*W_L1d#?Eo>1SPWtm-r z1@`7gsIO|EMO(t*V|~Q$hD&2cWA7DmHsW^ zh(`CtjJGUrNtm+amEt8D{l)ll!)cvQqzr8CkZ*g1JHGg3m*4^eWf2#tf=l9tHGe1!vyU+nDb^esAVta;!$TJ! z4dGX}7>kefBN7STPm+a6WL@%*AssGO4D|q}9JylU*4)MwK3@JLeT0(5=R40IFj~Uc zQmVY3Awr(NWFT;U()DVAfbqZ<^XgzPahz{=A4^$HP1ZvL9qp5s$>cX}5_Y?fxCcs7 zG9*O=sM>s`rWD@&ScG>z=Qk-^OvyL4(uHn z>EoN9FCvx>vsBF()qExmGhK7B?C}?LghzM7qPV*H3R2zT7qh~JFx|xnD(6RlscxfY zX6A>V^?OZhU2qXR36$^ zY(2sn1d5fo!Y?B5F{6i4bzfW)3f9Wcc`U3S=R#lMuCnZP_NQKjuFRTviu-<|a&Dw& zC1YZIsUw8JB+;Su-)MGGd_pCVrdgK5!8O?RP*< zvhrF^Bhe{d`vM&zea)Jz7@(o+G`m{*vBl?l_4!AAZe%X1SH3e7atgs)v3PQZ!#CZ4 zNv;`lk6IFw#tjCgPl@*P46a-5ape1C72eOG{}XlhK@~s}E$IHVGPn9=EBT#TuE%aQ zGXEs#$uOb*Q4@ck8X}7dUfM!6y4#xlYEY4T`#)G#vx8x(lYcDJxa ztv6`;Vh#8Cmz}Z)=czT*H(O;5E63jbSMHh^sU9czd)05Hs`tN@sQ8qidPf1S!jmaZX>)6U#B=&fLHb`K^=~VTDJr}J)WR3T7j$&1x-AiFy_h7j zDno@I3DA}n@DYbq$y84{%mRose+pmBZgfJ`U3TjuRfRi^HW`hsZ54r5&-ni=X1{-e zBkWzT$SK{3iWZA&t-iYA$yfBh%^uh{`MqEmc^dEp?vT-##k*?o}3*~L^=kDw&c>m56 z01yJdvwy{6%!`3B4XLJEp|@$SO^s{)GE zg9W&P*&F-m?>>7b5LK1nd-b!Q-YUv7Rg(X8?uto!j-+wZTa{D+Y?jXK4EXQbflaa9 z?(gTL(cf@=nlDFc@9(|ePW|X6Z5~AJC43fs89*N7_2etuqI(Jklm#Zanw-LJb!PeH!1(EZ`LWN!4&nmM(?wH#!caRR!{KW6M0?lS1uZin<)|d zdH+X~(~Z^G`sF6RDO{Ecx}5@0?9ut+^w zs<=o9X$2L|t5-iKTN=Q)qzjZk`*@y@cpr100J%wo&pLTHT>zXRs{r;}e^Ix!_V~-Q z51*96f9)V*5{eR8ttAoio+q)>I z2N~Bhi0(-H;*v^**l!fH>OxS$7&!LE)B+w>e+XYBH6`_Y)E4$PFdRlu6&YjrJ6_B- z)FTRT7qXh+hOyTFOuc`bEHHdQP)~G$!>pi>t;488r_)L^=%!oc20o4E%OZ}Q#n&bG zpFmrG<(KDt-*U5dX=`XKP<1Gd7M8mUT^pW2BLrcExv%pAxZTd!Z832YeYy5fKa@NvR4n7h zF~JaAKU5~2pMZJ-uSBROkAV6{nV_st0sM$U!3Y( zg;zH|sY_Lf@1>2UJ==$u#YdQ3Q6LveD7eVp__6J=gS6}q6V1BmDwY}gL?=WZiz;Wm z=~#amlaH#J*+QVB=Ig{zn*w{4(}1)IxKJRJJYJkA2#km`V3>VlWb@NEsk{fur1|ypq2EyZ%Ze zCgvh{$dRJA*#f4V;igtGD1Cu(vjyqHGv=C-ro{bh zJfoCL1rG+ta9hh7V+B4b#r{&k4nGcJK4I`4cy@+b0%-1tbTABD^~8)IgNHbY)OkWl z;2CnR3l70!ocKT-n7#i|0wGY1Do%r|$-#3BmV$jsb|ZU#j2vkrCIk^&aMgtkC|#HZ zdq5)@f<9?|bu$a)AX*aB0gQCi+p9n<`4o$3_Vq0(hDb*b26*_6L5b_4Py}2IM;vV> zx_v<03yv4L%#dQc$ou7KPAhswFl>`MTwh9s+dmG>-W}4St^Xd+f4}XLadsK_j){)r zFt_h7R{oG12-H}+*&6iOQdetI#r;00b3uS1&*;zrnctW{I>WL z8x&HxKb=^cXObYHmx9Xf5Jx7Xc0`U>9uPuPLEdoGX3Q=2wCX&UMHS16`97e3(U`{dQ|s ziR=C-8xBC6+Q4QJ1`gacY6Q1Tt_(26<-H-MbtM!th~O<7GgO%jFdQDuBLIH00Q76D z+N7kk_iYH2ttN)#%`Q8r@U8oX6GZF~h`lvzisJL#&1v9#GfGuM zL%Y}IgRdzcKdX5PYUdnLaoEiXflo6@&v-Fle8m;A^wep4iZl1K2XB`@ZF{+P@N<>W zJVdxNa{T$(5+87;;8|7c^6w&ENAeHc4Zm{BSI9B>-ge-3gVF?_2aWpTFWM`psiB0f z1Ylg}a7pyr)vpi|)GtK}vMsAIH1~&Igay+hv6}rkMk97ug0h)44C884255ZvjGb4n zEk?SI^#_A4iH^7f1uV}=__-bsq51luOx{a;WH7fhVJf^ZV;tQN)YMp9=b%3PU-GJN?JMvX{33d@!WIH zz3(T#sq?RA@4eRkE#u`(li(Ys8SV$Oz#7vz-(s>;cq|A;m@?l1r#QZA&J@X#-IX@q zOj(#G3U1*hhg-M@LO2AU3&)BPdd2q>CQuJN-vovYrUOolci^p43j98hqCt@SSB%Pk zP7&a~{4=7~BLRmrSTZW>w%UEc8(2P0afl7zPq8m$B^zF!Y@z>(ky3qV3Y0}}?}?}< z;#UC>;%_+;xFDSa!npAMw?+fsuTN*kze;qg5R|_>^E2p>kEBpUC@$T}o?PDrK*B#N zf+8|1LPhVIlJ`+u`P~|j4oNg)%u#XZn@i2iPyqx@QVN5SsROQ*%uGKX=xs2rp!_5i zMP?f%a7O>hK-XgICi+NZG|y)yhgnm2$rme*S~j(Q2P6+G77ncbklPsT;g}R``gcoH zjiSx7x9@3R&WFE5sS=yCVMPAfBCg_UWmHPfj6o(@)174jLwuZjT^sB`g88SqC% zTy~0Fn1Uh+D%#5E=aets{~pE=Gu6@?y$JjCWG+P>`f59q3|`xwT6%6GbHqo^rRh~e~wQO)O^V&VHJ=+ zP~%~PiU85xb`3t#y__`pcMuJ-#=S^1)~`DcQ2D;*M(*s=Dk-MV!}Y}9^+WUb`yBr2g(8kR<@uI& z`q?y3rGi;6{emjOBc&>z0*DeRlxYOw%Fj*Ws(G|~kZ@yl}@{>1_ zV2Nwr&*Qb@N1n({sFR{JXI?@>9Dw{hUTt#EBKJCIR}QRtg#xcDy!%|Ccv%7aKBh~6)xOLJMsvem__KqW`HU1M-9*>I9T!-Q=b@-!w zdL!3Kda8hA_*=%i=iP>-X9|jgk`&W|awPIA%pAd_A8kJFh1rPm*1pp@!Ubzm9+57$ zc?}OU+#xA?hV{2J*P!oH<3>LIA5^j(il(PgCj{@NYUi^{=7%lZ7jO7bo)>t+b5S+p z%==8aO+?7iF$FT|U0wqTO3fAaJn;?`pL~(l@cpeIDw3t#D8a8}b3pWXipN0Z1d0au z92&#N=rnoO3NfD=SWraUg+)d^X#6--8vt-rWUMKV(hW$ognd-#7|W#Z=3|Doy8(SV zZ4*HdeWw%P=>GH=Mkw+35q>c7;;|U4;jz9V7}Nji>cHfp%itCN-SZ2{UP}1)sCy%F zE-p1`$DJfEeSTaUbvo}(jB2=il7&{_T@AVYmg6Ql!$E&W>3+%Arig9qDYAyyd+1-^ zQpuCt8OX9~TTfYoWqo)1!u!?RnngvbYL&5(Uh=8*Yu=yJJaw_c*6j&yMiTm8^E^H$ zz;J(n79cwr*&Xf)=}wJ5{k6ciNh0GRbBg?tGi(>EK0}I>ka{I;es7P{?dEH5Jzenl zIQtq=;^T^M9@nGxgg^HDzp`~6zB@AUUXQCYYDa|EnStC(<__c}YVRe+1^i!rG?$7$*Dj8xf*tllgR<3kVVG zHN8Bc(xIx6RI;+6QlXEnyQIh-qj;mESjKGn0j?cnE8h#yQ#zXT&!<-`fM*3GlHt#| z-kIzG&Q$!Xjs)xRo)4(>^s0DDL|auy5@MU{!(z;v_J|RzdlX6e8fFYfT=kO_T+EE- zx@zU*MCd`pLhcI21(egha$$)^K@jISu5?TymOc>EM`(HS79)L5SG6fbu zyTvf}b|boFTSYFTyq6QZy%VO1?#w??O^|Yzn7)$0djvkPeS4K*Pu$-ITMWW3K$V1b z#$cvG2K9CMuHEahGtsY*!a=huaRK~y{-|3sRfV0}I1$7zVM6y;#mMI34y-%iGkh9F zicTsefm3x2nU2*cA;H~>5WjOkzYny%$t&+dL4ERv>bwVTEN6bd^L$w004|Ef3;%I}Hd|ejFP8_34GOTLs&)Wg6iqFjo-Z5Uzg%(Vp0w>U(m_ zMUnrE$w2u07OA>G>nuy%wc~~!-u;Zj?_ybVM+~W?P*$!D3Q|X=KfrK(XMNXy`(}c%GFNgGL1*fT`>2G=T%3PZm^aA0iP>*BT8PWQ zV=J|fqt&>?g;X7bX+%Wh&Dn!nQEEy&8(fs~d#Hi8pv%UOl&2dO^8`U7lWNn}bdJFB z6-2E@8c3|?I%94-{9>v^Yv?=WV!2Qx9rOtvMowKt1s6~O52UCeB}ScKv%Sn-!wgGmvPLInNX#t zL2RuTSose61&g2Ms)OX3gXLnh5Zot(m2JPRD?%(gYdudtA4c{fw(1n_`7ku;=)#G= zS9@F_RQ!2b6V%$1;Zr{dz!|a1i+b3G9I^{w3dy38)f<}V;S4~F$>(@b22*Gu1a1@g!CC?AFQ?bDNpYz9k8zRz-YWwCGb!?GdxeP`?C4pUtxIv-auV|=l}TPDx77^#j+ z;$G5sdK?Q@a1G#n#IJ?MAWWMtd#TPX@H^|#Qm&FWUuVYZF{d{gW0Wc-k=aq9 zl?c*fcoDGn*J25LP@WGy&f&k&_H)kl)j~^7c>BCSxW|IP@A*}D=X79eiZmHWY`euh zXU*AbFh6KH{c|6H!-`G6Jl5P+G7O_?j!2BP;F=}0gvR`KeuzP~`%2mYjD@@pzX{P^ z!&z^^K4ACPyEinu)aUN@%WV6acI(0ml*In}TUJ|VJbi&= z#VJYHC9WtOnCQ72fIpudM79GT=!t$NP{KnhZz#_%&ki0;q{4{32~a55R#+Rn&4Kdf z4Ywl^{?V~<5b23g)^m6UWCkP+ch%9O&QZFV2roSfyhPC_V^#;AK8c5}$vg^-`$IJT zqq1dTq8$8Spd00*+jIogoHL0Kh*avlL@2>M0xS3Z3TfQKq>g=*#v=#vsN{*qUa4w$ z0_8K`zkX}k9=b54f9_XX< z?Xq>psB{NV{k{0}RM;m=odXiJg?}WGGBzO*7+*RTE0@ZFgXd17`hlnPbFuREoAG}J z?OCso+>-3$!wkF*yUl)bYxWdwDs&;b0u9gS6+>or4zu+;;BRgOICXvI2SH8vtCZmv zj61xm?_64VoBXm(1b1w~%-2|?+pE%n1J#A~xPL(1fLXB5Xk80dO!BO0kg}M;tPi%r zZ=N)YK7un6yvr5L$dW#uFSkecx zoatwAmw81>4Lc!8)w5!W|7bwU%!o~DW?cBXFR}07gTiP5qqci5!H%vxvvJK0{M3%G zctISgKzRZ_V}UAmmA{>|nmZNi4%V zw0-ks>xDuzr*Q|;OY7<9bztVw7{NW;7Xjqj6(ZaLSF8AF+L9rs#aCCTbES;j=cxx? z3UF#iwK3AkscfXl?m4iWF>R9o-L2=uBhP_qJ^Zs^XbMO3NsNf@0Y}h-$D1oJd;azB z>mVZXgaU@BEKkA>NNm`wrQ4;3m1_t8))zEcCg15JTz~)R0*dxBpDzBUOQJ~$nCI<- z?myV}W3eIyF$ub3rtql2K*~=Oz_0RG^x_0+496WD;{&gf(1>9)m*7djn2X*1Dx;;P z6~tJHDUB00veqBhsp-zlh{M+~O=OdCy-It9svFAlJDul@;4E$T`4Pp;n}b+nMSqR( z?DYePx5F%s8v`04kuY~NQZR)GZ?{CBDfyd8|3zgo9O)Nx`~YJ@_<4>MI{W9T~)a75FetY-4>Qo_>iy>{| z*Xm_Nt{zWtns@N#Q>(Mavb*;M{ET57D^RgbMrwEZ+KEwhQd#QAI0#6nD<5A<&++J= zT>OP_hR%Z+`GI-~rTQ(>Iaxh46I}L|(Ux)?c()dDL?8DXBjvEO9|$t{ZF;|dshu5w zGP>Hm2g50w6c+T{EdWR3l#2BLMAggy@Y&{MHfWlLO1avkKR5n3tUl1M(EzsYW-Y~R zL5EllbeI&=?4}o(e;|oVMudl;&u(lR+6Tx--HG^(C29noByb!-ZOFPxo2O|>)63K9 zVMF%E@7-ox&O<62+U_w|!V0Wk4n$j~T_@)TvlHBK61t!)k+6NMTxozf^$b;3`%jI9 zztAERi78=sO+47(IFht98*rEyEazomxDB}*mfnr!iPp4he?6R3W@q`{g$!JPwl|{p z%n|z?#QWErrGC8|QDbAU$nR7llH&CXL33Mk!`ghBxw9NX7)j%`CdNqD;BxkSpz=Bj zT(&<~ue|4JTRRS@&|GIqvozsy;l6!zj!D;MLqziK6j(q6EHY`LFW28z}`SA zoWOX{fdN@1@|_SEmf2Kg!dY(Kc#{M>ci|up%^F-QE7k7J-HgFzJ~^-Z@iQG7BE#)c zd-od2Z?>dDT|<>!Ux#dB#sm55`QSl(`xIfJ?lmH(*cSuX7qAi-1!YKno0oCNy4>qnAGN|3J3weM&noO-7W7P z7bTPQ)!7~~!0p=bFF!vz5P?yN0 z5cI_?vs$M=Oq7ZJzFA>qrf0jL$u6EqVV>;TSW=T!w3N|3?0;NUR9-ATZRp5j9%O9l zARPYInj4a`vR{C}CPKywzt!Jxkn-GA3!|OTZ9V=c1;!DpKS^FR#tbWo84`aWlXx!R z{P*Sk{tOYc{G~SGoxe(X9O$6-qB9%vY1}LarPR*t5AdholCLMb?^ys7{W8dJtxY!@ zJj~y2N&~T{vD$(H9UMBNxLSIc-ZwuDnPz`=ZY))O=8K3J(|mLlDh8wG#^Z+Wa){38 zS1zu-0{OR2Iy96vHB~=Y!JU0Qha!ELCmYAry?&7G%%e13W0A?#pIrwXY=~!ebMpvP zjq6NJjMGc7oBQPc^Y4d&7jgT2e!k7kj#ld%s&l=5z|YavoCkT8rh{4zTr+A}>|?Y7 zJYP7@)w~mfTh3R)YWp#^ckzlBcHuECRGYEafU)bFrH_@V)~~8|*q^u`e)zq-b%<899*bNlwL4Wxi4}iEGEO&)-;5##1SLl;6d{68gcYq1UE=D zd&>cHaCOQlSgHDY87VM<_eeLCeH7~|Z1IWIuIb#anJ&_gIrszo1T&h0QYtdT7Hl@c z)|GSYq{kbR;-u&}9{3edH6R}2{$T$Lfe*nsN z6u9ARVGy3SbcKVGiF`4j)nc-kyXG1Bt*9_Vrd$j`HXejCUU!MGp}31ter|fBsn0F`0^vt%Rz!u z^@j=$ow;0)AVK!h-C<}0xzH1&2(N@yy`uz4Lfcu@*USw8(T`f={{G&7(?fDDdt-ZH z-Qaz5IWx2Z47uc>eKO??n%%}R5X5u<(}@(nWR}7APX=nY!AP|31c>E4*QaGZPvAVJ zZqL&lbFkXbOJeG9W1JX&wE>P!Y*6#(9{g^-6F6Xz-2pQ@*LqMo?U6CwI-WANRw$b< z_t!S+K@RiHT}qp(nbBYDS@a~5I)MB(TW){1)X=CMReH!5+{7KRxBa#i(DM@VE7U6) zu!3FGrhkj_uhR_~g_^|b<-h}H`F`>P!OUur*x{p@lY+(@PEZUFl!Xy3qc z-<)8V&bE{bfS0{d%27sX;*7SfT8I_Q`S42C*KrYq$coa0;^qsd{9g1~j6VZ_E%WzQ zGJL$U+BoO~;z7=jxAooyJK;n@DUuS=4PeLf>6&4mHAQ#e((at3C z$uV)+=bwv4OGV+A8sRViVOGG=?t(+Vr(u;6AuQqchGvOCZ@=B9_h&f(#{aLQzqoKS zB?Dei$GtOKFL{xB&dAn*rQOrK%O)CQ%>@p+;A)M9yS`JPUb_}TEwM*ro`smD?L zV=P0NhOebHH1v7aqHz!NSMx_du0VgumluFM~VO3KXi zBsgL88j5QoRxMEJVfhX*LeVvcr<;BgfIo}cl$5itT@=aYb1}FKB!!ojYS*fP8u~d- zx|<$iuiUr(s>_rii1Tth$tf4$71Rpx({_Igt;(fnQ>^1LBr>%5V(3x>A(pSj*&Bys zTeIv!W*iQ_(P4URm@#HUC}Hc-ls|iHqQXeU%qG9PK8g~YW#PZ`nmRZdL|rWr>m;I8 zDh*}hF2lAAc)Z@|Y)p&vgVnev{Bu2b(*WsE;J2DEoQzAbK5k=tvNg(0Ke^Rokz@;8 z2bGIP@HN|^>{XhJ0^XErU;R1NjtaBHct1NufR}*GgYm#wt}n)Bteu9HR&6!2P^IOH zqFqy?J)k^1+%ri$a!z!L4=*huk+_URq*4r~PGMv?&xiKKImVJ4ft^LGG_v>(@(W-T zFgu@Hyv2jV3ttT&s2CFIbp3nG;8I2ZC{8|1z1zRYW>t)*Zx&^TZJ^*#et6&Us>8sT zd7IuurxSBF_@=wveO3~ZbV+Jdc6imNi1Y1%lQ6?Ky$*z~hTYLzR;c%7B~Mrb&-Gzc z*a}QT&NLq)24D%S%b`p}r3n#CPdZhkzNEinjS_HmhvvT`Nt$L2w?gf_yYdO^2*78J zi1crjnw- za~%a0MgigRIeP7uAducxJENpJ!61P27Z9%NNj#YbbBBjALhYYB*uEli0vf^WFS`S{ z!3)L~wQhw&&hl&bSwNhIq+VHxDi&LEcGp^P&JYh>CdUw$pU_|~A*xqaYBkvEm`mhV z%Xp_{3y@n)Sn@yV7W+Xh`R6TtjgC04bdoVYU$?<6HRP@dU*UPH?k7{KqHug-~dec|Dce2|kd|SZ_v}fb)A*{+g+*$s`-9z1CUis$FmgPT0Sy z@6-?XepCK>0at~RffIUOE6;Pf+A?I_O(A-_Bpn-}2cuV#*w)N6v|>`TNN-MB$%bc^ z?GE!w4k?qAg}nnLj=)GdCmPfrNReHHD6Xnbbzr^1S`F%qs%5$c^@1wbr5tYA*2cJw=`!V|lOt|+l^F^j3_goYtQWI^;Ifa;H_&i#I#_+G?jXM z&{+_vT*_h?ifb_fwJ{B@=t>>~5Ru@nBpfGhbq*#OVl}{HB(bVoUy8YrxGTSVY-b)j z7aHHRPng7qXYsOVB|qu&k({X##M~XJ5tqqT#RkzAxEAYW_KeO#36+AQo+eQVA`P{f z3s+sp<>%3q9{_?wYn}E`_3m_q>9I=fqcZ<<-;he;`wY!h{vD!1P<5z&q-3WJ3gP$zjXXu;SA6DHH6XF>^amPgosdMR*IvmT+?8?Ll;2n1-$uiV`p zELs#F+)JI2*W_$pT|+@Z*wy9^@bdX_H9F-kDYs{8c;C-ih9(J?;lTy zqggCqrp`x8a!FUQY*%3%#Y64T=h;Rd>>6z<{4NHV0-68_U!k~%O|pxe<)<6iXI)<1 z{?2wcF7tG$ADIocYFY7``)$7gr#&3Ckyf!Xr=BndY+tL%pDXmO>Ex52+Iz!8F4721 z8OliH6I;L_DFZj;1_TbqY?}7eaAM0!gdzuYx_oqYk|bm>l*L`o<%(w)hdQ6f_%g#7 za=F~_0N7{oxTS00ASlIVQe^8-Q}<_B@(^*}hsR9X$Xl zFgX@Ge~Q!2;-@LR;1aUBLwR;wY>PO%vg8!Uew z1M^N}CH7zNJD)nsY8l_+lyT;e(BSZHFGU#}LG@#^jsn!@%qHbhHz-b|$VFNmg!OV9(*zq)g{<88+ zeCBCRiL!ENCzDh(7X$N{s&{@X=!wp5BANMak67<>Id6Tl2W*7Rb(J5HpX5Sq zk9zk0EoTZSvyd|gM&=FO2qIZJn20k=&YvAvA^+0iO{HCE;6p@opQDb%uvY9rgAtla zI=J!J4T?9uh_qsodQ(`GDUWu*N1-T~#}AXyr9s~~Kh*E+#Ml)7y{kwK-21FD&=19#C|x9h|1(V3waf`glfkpXS+e2ClqyW5e68yMaP_vhSzv{qM_ z{}2!bYJNjDVZ(cCJ;Jt+C%XuT0O5dB&v{hh41xt!M-L{Hw&0kNF2FLW(}t?Q1_w7f zApF?Vbn`M}3`kGiH_t27SLI)mD+-O`!imNqSjq}!7!)CE0`1X|j>V(-vW5*JwZPag zew@tN7?xFse_o&>6nmxRmIucEY4S7E^k6{&Dz`DH_8D< zwk=Es%6p=9S@wC((tcV>eJJ@^C=94E=#){;+$Sl5@4vFhnRL<~ItZi)Y;e*vWik=E zKd@8%cX0O25vjgz>(=ad-Pc+H;>jbjM*l<#3gS;R9GdU6dtP!A`zs`{#?a`mW;$IW zW1P9Yxr;$}d8@F=QHqDsOWx|rwc-@9I+tj8=h68tM|JVTzJ56r@5{J=d@d5ZqwIrQU zVIUZ^+mw;m$-HUzM0t^?#KiTK`trj1l9r@;%b)PnIT`Y)%>8m=?uSr~4N)fpS|MG$6T@@Se7TsAq(Un= zN+=>|#A}1|;T@O?;*+^8`Fh=Geiu?(5gY%zgaVb6Zz$%LPJ$=08jSh@9264QuNk)* zGE8%&RGT*A^St_@zVoQga)zfP@f8|LeTM~QF3YP}k27YTwGZviN=opuwE8!Xsbsd5uATmp9sRHxFDfE%HDtQr(&dbegp{iv zeX09V={V%El5F*=N=tU-CtS$5d0J5$Fnmoiv9f|f)7VYe2-ANdN18AF-sjm$TXfS zk^f#oO)I_D!Akg-Z6Di)b+zTs#g1v0C%^QGE#2{gSn8OEloMll%ux1?*4@01(`*ml z7nzj4{IP}gE@~K(F=wkwEM%q&-LHTJ-OGHiyukMw{2&a#k1eo)NMzjkP}1V6r#Ax;I|>+-5(-QB-cr6PzPG>(E~me-YPkb&cG|MkFu z(!~Mk6;;@cIl4)8cX@B+8UDqz4hWEYEK{flyTQ@I?xf4uB7zCEmZxnN6|zPx;+KR< ziHY1?O$CVfIdaClc;RAP)~qQWl&zc0k^h$GSA9+AdJ@}<-gt%gFrHk`UA@vn5Sex< z01JHKCaWt7m==QNcKDwan&{|O;;1ioyMPBC%YHr!Oc2{!lt4E7rVK#|v$nZY1~^#0 zxlN;Nfu)qX;}Z^=cpRDLN8DakCekMhf&w?DPivI(0M^*Y~C?&cMe<;s-k^ zzngsvGx3uf0E`&aa_YjA>N@1@j`T9Qc_inq%{!M8LMTllT$+X+k0Vafl%Pc95FB*R zk>k;_2;N|{`=_>dtnIf|nKvc+hAjd8FXb*08M^NK-mKVGe$M3DiOZFn|EKBs)B9xc zBLeeSwpp36oUx(Ev<)<_Cs0Thz;6Z3L+ZY@=ef0q0d!Hvrc4P7UCUp#k&6*Py-&E<=0a~tq(_G1Q){1T}hCA(t_DaX2^&F@8xcDbZO*nLh z$mjN@{2;`T-Fzb@I^7QpQJ`H~R31}m4`@MQ&l~VJ> zug~V}FvNn;pM1s$l7i7hGPw_#fhF7^Yu@>)0zUY4Kq!9gA?*p||ei#J)~j0^GdreH2J+lVEXd5Z<( zn1GKQyWa5Q(YSs;XHBClmvGA^HFcL~mxWtPevb_Xp%42)>p4U^)KhV$2ac0hj}TR9 z=_z0;9ID(n63g@v^(68E2X7v+z!<~{S@ILGWq6w=sJ+LQu*(KIH~*=2{+yi3gosTB z>VT74FfeO4?V^1Z#x>*Hkd(z(Jk{{$p1%HY@9~2|42jN$6~`m{rxNt67K+nV-uLb? z+RBKFsFSM*M+0VVV3!N4gajdpY%mvt&>D7OcuTMN{Fo-rPW8vz-v2!XnBg>oh4S%W zQ}7MbtXwqni3xoFki3?_L4jL6=0EbC+*2Um$>k+0bJG(^ha%_a-WME`7!%723sUx1 zlOcPIVNUDaPZ4#wR?n!v{;wML=YLEe)fTE0y2r&+(VA6qlkW^}3CE0KUYdYp;9okz zumpw6E>BWr40dpCa3>udCfSknrk*)YP9D$Nv)ByYw6!7<%4Y^^pKmwLdI$eeVd2u! z%u{-EpZ>)AVI?;KPZDcd%ctnaBle*a-#5^jXJ3RF(he z%au*vbes04AHl!)DQ~OjnriDvgCSVlmUmcaP-?b5fIk}?Bda4)YNo~^2LdriXx-;A zUzww9@g>hZrd3FdSJ-)ec=9`m*L@o@@Ti;GV8xLvK~kEq*)k<@R8{+ESviUqZpA-JXKkYaw)1V zxx0eeF`kxcj{pC*Mck|XK4va=?3w`Z=C7v>WIMUznrZo%6J)(#3Mxi^vrxmK>Mj4F zWnKG8L%SU4>l&f&W+H_pP(WJCC#sgSE`LXnZR!3kJs9PC5%%S0v9xs9&G*Yz4q|#@ z7};PoAs854pXk;T^p5aY^$6T{YQ1J0=Va#y{*}-Eymi9bNaZ!>Q(%q$8&G)4ftWX4 ztT6q`D^7~ba#`}rpYzbs$`|f|f8K6fiYz*v@*H4R63<)Ll*J)4OQ8xIkbyBY z@gy=viBc;eUqjN0)05$qPi3F~tk(J84^$f_eaO8tT#Vz2n(`t5GE?injoqWU zk|VyPc&dx`u~+ml=tBWvPZ+wix$C+0P{ zDsP;oSBa!K!{tt(f9u1b2yuhZGtpI*M=D_P|Iw|%3`@FV>BK?UKUE_BW5*+Lu&G$% zj7P|b>O&M{2omkr`%`d*94rKY^pjead8GK?&oU3dh07C?i zdCf0FuOTB=o$9`fJoo_ZpKb#DlgeZW)=b>8@Uiak8MZ5KErI_{0-y;-76czrISE%i zh~Lis^8_SNeZcy-w&oCND}sPou)Pw146Jve><)bl`Dls^4zT?C_$aiokhma{vEZsO zX3pd_r+@the|}p$z&l`H85zappC#f&4-SF^j|ByTtx`Y!c@{bXC z==PtG89ZHUOpo6nQ&X8^k}FtqEb3tZuiHn9M=k=va@+O_DqO1SU6iGnV+oRwF^VyI zu|8i@u#ajN2-a@{f$@ZEVq#)ijTVg_IMYIo((GIxbq1B;lDX&UCJPC@ z4Div04C@dOE65a{1-EsDV0Qx9 zj}#z$ZHIlpe?|<{U_{RA@SHw?bABCdz-SC4TU6kW@PBftdXYbt0oH5*6Rj>SH+JI~ z_5&{bwVZyjn;B2zViPE!N`KDX|L8OJ zNRHr3mR`^dVX>|rl|1`kpqNV{Gh=ojGX07Qm;h~gk}FSj)@qma&t>l`ebqvlFQad#Si zvG|rqnQt9Zjn!0k^6aWqTjcXzA@6>yk0KhEnnLZOV?NSN^< zI8h|hq6}nE5g4Sam|d92e7Sw}S*FubN(BjrS#YylNf^LE^H~>wp*c$5AsRz@z}IUxM$s{b#L-pOvCye z;?pI`tfq||#m#?GM%}0SB7wdUPN=b=$1w;}2M0S5kV{aYVApZ#ATF~6H>uDi73GI!|KS?@_KxSCu-)>RjkJ{XynaM$h8d@W7)3&Bo*A9#HwV!L_pjut zk{6`o+X@i*?Tu`Bu6E6r8yEG-xEGts(Oi@C-{cx=;lm)7HrN;QPThPEbTU0+o`KO| z%m=;8YjI9&35lzxC$%B>hdy2U`j{UInfZc{Me$seU!E?Ttz;>_fB(jaJI#5`{(sKFZ=;xP8zHt?F2~@;aHa)Mg`ND$ zw=SIE?kJVrW8>D0e;>d?6QpJiocYT`Uc*r|5RIP$zcRa&yelSBU;UX9X`2;xuGhSPqY^LTfaURhNi+uc7D* zhgl82`%}u=_?0{r%`;rrgd?-u7&^XCC%3+09gVC_(wa~ue-rqN;2V04u3T?Vm1PdA=RE7jFC%cv2^0&Z9-wm!R)X@g zH*ufV%s&(zty3qg(LZI3XT|_F&_7d4_#dz3Q2JbD#AfuTU!vl(pkdP~ROikWehpx$ zn;)}aHGjEGAX+&(0i5^Q;*n-pCgkl(g5U>g)JmjXOG_6W{AxtqS3xJ$dBv(5s%6F2 zq>KwZtx(g;ZZ>Z$*XeA>UUd%sT{o2LRr0%XQiE;6{l$LmlIlV<5AKh~PMP2l6|xmQ zb%xt4EYhwI2+=-)C6ihQG$EqG4a#UFPcP;vf#l7fwtgNPw4JNOqe4kyu zgkA+;sB=^n#%s6#G*RmN^>*p{vJwO4h0XUJ3^tBx7W7%uy=biRonom zV@`ZJ@2qCr_=+3d(Gf|QBE6X(Fg{ZsJXYKW_DVw5uUk}3ngIXcgN*(fJRi0DrYmkd zFUS!V4wRXnK7OPMb#kLPQr4sAISR2n4wicuD);v8B7CFh8`mT!$M4mJRPNlWbzU{BWgDu|*kft9^hHv#|2>xXFIv)?T_Tg`TJFpn>RGwud6C=%Ze^HVd-zD5)bd0UybmX@qb|1ryjo} z28+t!U`O;E{z5)1&%X(OLY8Vx)cUB$ZQ|UN|Bc@Z{W%=AQlUm`co+!X(!+Tf+;T8ecXwHK$I&ab-uAn$^~Raj=^7fHrryl6gyhF&wP`d*V?2YHBZ8yVM7iIt={0fAB50jWH`#p z(0!S-e7;IXw0ts-x-SKIN@F}cli^2{ixVWAv{OsoH*Y$9kvA+j29ge-un%pzcFnl< z^c3Fe1cJkq-r`L@{dKriU9{{V(EjtV`zeULJDUba6Fyh|LgTfKZaOHfHED{8Y=>>YB?*gVGl^ zLuZB$@2|?NE(6MSUH)x24Oul5;C42E@dabl&TZ0%j9WMy)oo@b?W;dzlRwWmTtu(< z~s@aTezAT^pq+B-)+LjfbJ<+%bydxsOG&ayR=fXJiqZ{2=We_xkdFeX!b}T@fpLo zt|jD*pK=*6@LAC9+XpM_w+L@OFE;#|Z>7HiOWChckVtsRnnF*mq9!eUa94BvP{QNot#p!`WM|q(3gjT+s(fLDygFK!15|H@($LeVoya=NIB)oNmHW^e5&isF!9z-h|YGm*rn~GjQnb6rTo||O5+j1{lxR)CH-Nks0|-mkYInZ(k!K5Kp69s{g@0UIa$=m% zV_!;74f+s|ldmGIDyS#conMX9G5A81Qp7)0PNXBusax&RYP^6j)51>Kan0!yGg@qW zgIk}LE?2Qw1C3l_@@UU*u8XrGixUuM{wOWK>!9&+rx??G)h~qthw({U1Y-yB6#u-5;zED3!LyZ3oX~n{x0&~ z8Tzi2aoGkrO&V)GDB1a`bR|1kcJ|48_L1h(DL==9Z!g^_5-2gK((hk9Z9Qckl{$^% zKaUAvmd=(~Azyw(zWQJ$J}6pAkADHDVjk}nay<0Qp_XCF_x;($GoqgXzH`a1|F5Gf zab)`c|Mk^}PDk`X?vN|wJ~Kx(_iV1@svM;c$(1QK<(AuskjTtAxeC!-F_Tq}MEzn| z+T??=%yZIE^s(OHwFd=U&VvE!ztMx?q9L_(y=*N0M5{5ebF6ZIqG7Z24m9fs@v6c@`^ z%`Vh_r{u?SGn~ME+kLy+$BFp{dX|OS<*i_=0)=ah{$5&g?Hg9OlM}j`9AOy!)5ZJp z1(0`|Xp^Ql<~-zD>!{;bUjLs*+dC(-KS(i-69+OM9A(GRS9&g6NC;c2hKlKgxd4cz zB@wyx(UKLKW-5eV$)SQ3c3x2^f#%@fHu1R5258G-L9aA$|D-DrcAU)T@(e%9k`6<* z@}OV4>v6EIffi{v|NChRX3?wqKKmpA`?V)N$RcN;!^X&(p;XCE$0@{RHfkY_-Q*~A ze5Ij*2Z2GUYNkVkhAxBJuh~WLVCm3#${1|n>gFf}S5_ea*v+PX4`yPEYBS-*BB8bg z{R)Mbgko7QTDk~ZiB^?cO9P^UXCsly7ae?KWE@gXi=O5kb@d}~Qx+EJr+Gd$a5YX5 zlxUEgraPCHmN+>JR(G7=mYrabi9Q$O4BkygLI|J^l~ip+8+-@<5>E$|viFbB)L~E4 zcm5<^+43}UU8s}iy&>_$;`mPJvL6s020;578hwC{vU_*yP?LdgT%H+|LCzQig={u0Ob zgV~08lq#~?Fpd%EoqD9jbz9gHXGaVp2;q9d*q@@t`6zfh4wY`Llj?F@i1*nPp^ANz)yy_zJ=;1E*GhKYu9f+N_-%3PGv0GOPy@2Og6*N+mUJxm8G4-T`QKC z)i|!pIcu3&VsCOX(cV&MAOE^~7Vjq%(v}9}As{P?I5?L66mtbm_p4u$eTUcQfd%0gYd75XNu|`;>|hw*%;a zMmDH^M4UGK<+{cHU~3 zNR7Qfl4&T6#5@!l@?$oJHHcdxSGYmKY9260T(xR{pLu zrnWKlF3Tg`34x-uz-;F&ZD!JAcC!oc+F1oX$!(8*y1=*dW&t1V{wk*jlyr-Wa zlh>)6vj&sQGw4p*x>h>_+#*2;hAuZ_*F!#{rg6sk0>*~F zY6J&98Q4Zi6ohhtyu)?H^XN%nI4l2=Hwfm<{g!(gZalT|JrccTg=D=Ev*i9{Sa6=l9>F7YR2>>uXk!fM|k$^mfu)FKp-&q>v8Q6m-)Bq2XC58u!i0g zdioZ5i+_6)8|E|FWsLI2Zmt9bosqj==UrlEWF#+SfxJ{#U*G$S6!-uI216*PgRcM| zllWv@XH!b?yY@B7p=Yi@I&`zn@}ar5Ira`!qi4 zu!8cT7MALemBmHV4dQJ>k>&KDs(6B3P+rn;!I=S@zwoPd@E5yumuuj!GrxF;qgQQ@=AlD$ZbyhgO`R; zH(s4A$XR~MuoU!TkL7c}Qzm}<3vCjYXATg!VXiz|$mZA2olZ=-*&G4*yiR3>=cli< z4S(IP&%(?00q33+M9H_a1)g;@(Q%q*sILB|T#TUHDkjL_gwlm~-SY=w_3Zr8lbjHq z0lu9n3OyTBj~cT|rMrK*0ubxKV)0h;UPpMJ!kw7LzN$w}hUjhS=SEKF0*ZS?rEHtM zWD=BrHIJ_16qF|<;^|v}WxETSv=J7S<)n_Hkf9mp@_&33g+-rOt@KCEgC=|9FRr_v z%1xbQ@3uVt`a>L|_VAzK^ue88^wS!JJL?81)GP6|mNhae8SPSM9RPX1-#>u1a|X;Q zrQX8fHmfW=ghLV9!(Aj&Q}-Rq`xGjvFVAUCIhTLK=;TmK+S*k!9j7Nzw$3+?!cFTV ztq)?2{->s*W-4|PbmGWt!%!YREG~Pr5Q3`+F1xyU!j+ZJr!DZNA!6Ch_dQwX2U6`B zwR!&e0h7?ofdVo({+zxy8u0t6jWC|GPDe0{@cD^`P=>xB8r&n?_z z=y^+EQ#Ds#aFVE4VXQ#4O-hfl468zo*arZwQ%4~5GIl*z&mvBa28KS0HKx#|5~xq_ z_I*O1(}VJtS#N!Oq=OFPV6w|yk#Tu13?rKJt-gqC2^?d(OYki|J&j)UX+WV~n}_}h zv%Aysc$0K^Hh1pb*7E4r{huD^xz{`SW+D3zRuv?AievRb2UPXeugK@$J9xAUr3=R6 zixCKkmR@iC7OvIGs|akQA@kpg@sMRfQVxW6gmR7(BjRQCD0_&xXj=6Ei>Ap9Z#;!X z4e;4PZ@te?jZX)^Bx%`6(o7{o-t!QIdP_U`y_;4D@yE6J8;>I(-Jtj|tcg)7_U{mJ z_)Z|2PtZeOXE~#(0~r3g#9C%!69mNP?Mh_OgN|J;Iou(rtiUenuG5yQ$#o~`YM=W~ zc~IF9b;aW|mjer7rXfVf7RWur2tj0?OAZ10%$s~iq8 zcgQRl&27nq@uHIQcMW&va4)MYMP5MXW&UHwrFb?_LfaIV5=`%0Q@g$xz_K(J3=RA! zL20^SZe^Z5s^%D;ce5ghoG&|R*mlgS>Doj7*(Y0xQ;L5PPr@-; z8!9vzY?F_e>O#>^$kaKJ?uW2K`(U6O?whSVV$zrlAmFeaA|pcSo-D3|%360TL#X z(c%NFOnH#*Y85ou?`VfDUl%Xi(;@E_YeVbzkL>Qy>9-qGxf#=_dAo|~${V9yr)n{9 z)ICm12%sYU=`UdbE_KAtQ;r5%rT&ZFxx^A};3e;XjTfJa;EF4R*29Z>_PLTYwISz6 zR-T;?YYzVbE`yI6brfaPp@SOI*E3X(@-6>reQY7>`^W&xjR?Rf9)K zs}&-;yq$`~4#!w)#k49toj4~S+@M3gbxgqA92izpSFUXBZ(7{`010}zPH_LMbZvnCNN;o97S0Cx=#V+`q`-ylw z%yfh%t|#*+xo`*+a+I}r1Jx+z6UTW=5aS2xPQ7ol2VR}G4-s+`U07XC`nTzf<=KRM z^G>HO;Z^oPA$XW+Xf{`xwgD%G8z=U%;jsP zoJ9o=Koj2I&@FU29khteN{{;Pu;S})XuU`|v}~n&M6FR5!62$+n{O42vI6QgjZ83M zubigGb>&DmgiqR^?HvwL(PK!ki7vpBa`u*kjIRC+|4QM9BvE$KaZ=RB;KL@^GQPkaKGSJqw^HCMKQbpoSC;MtV4CtNJ+ z=H8gN+DyV7dZFbJ zKz?5>8}?vUi5u38+TokM@y8c5*B4)kFGDT#`e(@Ym)3#sYC#1YyB~_^^SYnzgBl_( z8w~`j&%a32wXY8b zPqUWazLS{g%Q96Ha-ZZOn2n+H!ORr>6>Hi?O1v))oR8ylfHbh}yw?e}_{~QpPIXpm z(v7MnXU;O*Q}b-XQaqXcxGuW5Hd#4nQ5ja#gMr;D8F?(d{kbH!{&(lqpMt@SRr_Q@HwYdt?!=?e+^+ z5xy-`6d@FIGYI6a^C9hCiEOq56uDtfu%|d=PH{KL`OamO~tGS_-93uO;gI^q5 zugI_nVAg}7)_IBOifCUCC@^R08O!VAE-etgr(A-tn6W?_YSE~AKV=Qfbs#7%@OIvK zmV6gNE?O>l#SC9@c_wW}3&#!(u@o@oOXV6`CK$U9`O1DN1yJ+m$Iy`|*OJU(bYq|}Ge<^3YsVjNa~_XLu`kKPku(nC{Co)8ha3jq zt%|m^2(@Iw#?d~nPh_cF>CmHG4Pnm4jOrZZw)(OC!TpRTAAu_9wR82F)F?ERAVTmb z!g6C1C7}d#RB~}UeTm?D8ycB{jUq_o!)84^5x<6k(o@KuPKhF8@Vl;(T!mZ`ySay) z4JSA&s>QHNzwaZLbV;kKnvJ{%rcCtszFG_F&kpR{Nja;RH>I~U&3U=}r0Ace$bCq( zNB8B8_N0XYz>FwvCJC7k+}F9p7gNw5PIsG7C!&W#pB8M>uYBdsF^R7;B+ptAEy4)% ztLr`V7biBQc;&jdNw)(}H^vLd>uk98_`Dt6@=F%phbV>`uA|Cu$^{!Vk28quk1aKZ zN1!ghnBcGa@HDVi8YeKSqpAUtExQda;KhO8QDu^N$X`W$MBv9*}XhS_=4b>C=oSznJ3 zEvf9|CPCeq*l`Bai@6Xcw%NkWvhLQO_-Re}znOCE3|0pNBLz6s3>ioHv^ z5=_U?!!ep)yg+x|#_#VpbR8+g9~?`31L)3(`iU`bk z{53kNm#!<8V;Pt7;Ke7Lq~4LZgPdi64F!0R^s z?k2W{-r}Gfe!JM4mCGVBw!pdIEG9e`wznawjYPLdleoY_Wx|MwPG=%wjT9C@H8RDy z|4y!fD%5MCgErnP*!Z*-J+IfOk?aKZQC3O}O)ElZi`f}-vCpzSJ{!apuoI>YKG`RSdOQu0s8Z_-w@I=R47|KGSb%x~%4 zm90ypsC$K9U^ym!x9Op#O&*wS;?YQ&>Q`&>sl}@qdb*7WrEneTXiO7(Qn{<$Z@c{vD2~U$KC5; zZ%d`Cye2tKl)kXiU^yVEB%>k|CAlZfOK=|?8Dt<$e68WbhMBRWVxfmx71NsCn&+4A z7yC>0X#VG~#-Cn`2|ra9NnFWYKe(REidA?k?tWBaL_WOw9s5W+WY1-B_+e z`+Hi6dQ&BaUIvHwhwmTo)Oh!W_Zjx_nDvA!AHPa91rtqf1J{<}(z6mco(k=Mn zb|hAv-x1Rhy`s>q8Kz!3bK?B0)z1vo9k=dNK*n9fLXG00&-{+_QuM+NmC2YhO9AuzfT+TlGu4%vZlsIj3I2LR3rCI{mvh66 zf2sDc#;(_{#qR#_?eM*(7a4oz44b~j%TM_)|2!=Lu-f2ws{DBmlRwKXsnDvxt(enn z`(!0}`a(!FSvWJdPDH|G!QqDh@r&sf)(+v5U2{<$PhLN3HgWJ?+rgZR7_&Qg!JS)~ zyPjQ;V!zM+p>Bct@CZ?;hoYQ-6Qkk)5j1Z;dq<)F(N~vMY_b)p49KDv~=bLI_&)H z#roxQ4o<(8S55c2`o6n!*nZj6x%^pOENc0Mk0(bi=V?xSJ;5UNqTS-#MdJatxN>51 z>64Sj1`jKbV2{OedbGv2Zu%e;Iv+LY$(>ELgqyt(GSB6SIggjD#&+if3 zz0a9MZH#-bpWLxvuzY(LMCVK~O1MV4#4t!Oj)3T z6qR|?I>uh?UWm=-pQTH)IHgKbXui`*v7&-;409?yofE^O7xDQuu6u9iZa>s?>&=u( z)+Trs-W3Mj0}qrYmhLJWr-pwLkhE3mUsia#GdO6Oc%CVjna#GY8K9EQE?VhanWA2$ z1$qB8yMy^_*kt1ip%DuWAyu++k+RHL{n@t4`xHXP&bMwOVh!HUcRfe9S>s<#z%!i9oc*dG6JQHebt7Ge6d;Z}ACw9e=7KL`V z@RZ9+iOX~nb5i)nRP94e&q7;q`?HjIAyc8YNjXP{A8!iXr9$8*sp9Fr0YAzorwSfJ zn#?bgb^@>U4n=;5<^i1GyG6{vYR(z1^z1bYl17_3xzJ)~+X?10MOUgY-u)aa4qt9= z{>QG|HgNy+0I(kX9jLmHkzKx_F3QKkN5EXQ>!5np@Z3n- zBCKbvC3NoT_*2NIyQJKtxUVl=bLwDSTJ2|V9h~JW{QpzZPD_wT;uO}@-> ze`qyrg)C%`^7e@}%PaiMEQqEy0rON}>B7$Zv3)xil7ZXp& z((>3}Zc2$C?0wZQ)&82FDjjyFG59gl$8Eh;pUG{ z*n`$~K~Jae*^{%Zjm7WQ@ZKBQdB#-WiTk|=;6dPIyIeUj?M(VWDjcG4x92+ot!%B_ zpXMnv1Ox)Z%Y;f=Zikkr==q=XKX$0Wf5CiV-0{XB7{%3M`Pqwg`iAm`$4#DaS7^Y9 z#B|`4)TI>h#v$nZQD?<yl8U2^#lMml&SjqPoOKb2|I~EvM&t=Lit*dx*}U{(1p}Kk{nD_9W^ykxR7NW z6b#g-D456+Dl$o<(*Ac@36%o{{hxX?6qI-u6pa7$(L&~bulLCGcg(*zdPW=yHu4uK zG6j7``>)>U(x1`)tBh)dY(tUNmseFq=KA(N4h|l^&YphQijqiW2nE+m#ncxCh4T5| zgsQ5?dV!pO!NtJD&qPz>mA$7spY3Z;I|sfXcdx(mphyP2LKfW}{A`(m+}%8UUj<1q z|I_0Yvi!H2pPA{OE`F|3%qE&TO!A&S4oolk1o#A)rSX`Um?V8(JHFCWQ2Nhsm(J{^Kw{%0i*-~T)o@&Wn(w(!5;6X5^vxsgL9 z|JJ_JaS3v8GgWYLN74+rhP05dpyWUO|8Fh-v*Q06Y4SfK#e@X@_sIX%@_&vr^mXu& z_jE@t=_ma^+x4Hp|GV)&110(YKKcKm#J^{d3hIzd4ZUZ;nja@%%1xttcomD5?sw20^GNVc4H76~ErCspF{PbVsY`pP=3G z9fjP*eKtt9VIfDwq#);vB1fHJ|9n#|{Mf+pVDKKBql8#7Nw07gLoPywOZ5y9<~p?! z@}uK1U;MBzkL0XC_3UHw%H)dc!sN+v$X#z~n)EC3xY$SmVPSGCEOMC`Stt(!9Zm!W z1Q2tB@=P8qN`jTnAomgDnarCPa1)}S7!EoI^xo%Yz8qJ{@MKsPymCW!4L3Xq!3l3m z0sO{+ZXSYZcTA>w<^^A>=hTc?YoKG;q9hjg$)>FGY?-79bKC7o*zMEU3q0rYj@ueF zdu`CZXRz1y+mYb7XwghwiiMn(rXG2Mo#llYnm}SqYOe^QCWddy_P4C>@;S&}(G zH9Cv5H}zlv=UNgu3D8t!;6qm02V86N0c*CQ!BP|`|CJ(@BEmqv2{eoN&(kIdK9qlidSjV)SDQ|3n_&WMe!%DeH#{hEfWH%o z&W(H+=38KF-epn9cR6BsQbE(9Qzhj_OKmJqAUEO6#50_oIV(i`gRV9MlVpNY{s&5T z0D+>kpW%)5Ny81mn{5a|j+j`rc7Ud*zesKP?t5?3en( zn_}_rgE436} z!l(r{Rpt&>dZ8v6{2wCY0-}tQaWYze{OqIjVz6V1#iOLtoPC~23RDa)SD#Uv0WBrw zV(Yb1g=HY_M-(j_X=;8cxxt=btGR!+qvPU%5jmZHrjRke8N>lKxLBrN6aLL**eQ5sa{wki~^Qt z=3F}j|4Y*N@Z!Hj{czbf32^hlitqhyK+x1W3HqEMh~T+C_k+C%uU~u2 zG_0VXYJ`QgIk7FHAO9%=rfHKQ!xkW;bi3@qft4s3%gHT#r`pZbU*%|={CD?kO74OE0v{{oz%ESl8^ zcKxZLOpYL}D4Dl7W@{*^bnn@mJefFXoY^-qi&{b9y0av^H&3>vbc)bSFB=>{7{{SbAXVYd8O6NR8 z8J)%*=*4hBRI`yt?U#s}dsTgex%l!+c-uidu5G&YU&(Psn)~?!e6br*EMIc|<{XGZ zWu3)jL@RLwY=_sPd0~5fd2*Q5|J^LVI^V=BQpI>L%^$XlH^+COJt>Q&ZiYi@QPgO0 z5c^*q6KKm%Y*9uu_$m9abdAcgm=vJ4sCGY2 zIQb8PUm2%scgs9iMEqrVp+z02_9e&x%DL%%YPiL;UIWG~dR#%OkB$}#dNMQUH)yG7(rQ`iKT`6|R+emK3P8|DkIDf`!LF)odcZitWkcow zFZUh)EjkWunFLAH0k9a}{(fc&&C_-;-r1}1X*_cL?J=~dg61_SRz_%IGHQc5AVNT; zaV5yf&Ud3uSthwNNFmafZph0?Ne=l%m{qJ+>u31<%W(<94%nJGix^HrM52>;)}d{~@%`Ce0k3xW){eJT6j_rA_!IL$)n5 ztVy|=s>}WJDiWJ9Gy%KQnW=6m%S$H{m4)8bJvkre^|N$577=HNtX@(TTBcmBgIqKt zeT*snT8nOgT2vHsQ8l?H1x%jo<@=vMZ=PU^P|v(yGH!iY_7|%dLy096*V&U0hSqBD zRPX=}(3vq5-trU>wP@`G6W?{3AcEtX|Mug~^Wh^XA`J*-F*lW)k@2tl<=dS{WGDMyxQ&fuN%u>LpV2j#g-*R>4bM!-OJGw>Y8;$Z8EXEs_teZe~ zKnJL%q(W9LY1IG+@*Qfu6d?urtx-CwS##QYp!Z1-o-%ZajbF5&y=NGp%dW@mtlIW8 znMOsy1(lfvn~q7Y`E7-fjycJ3ux>i>Y3p%U^{&wr1Ef zq6Oq8WhGFbcvI!iHI(hgt%`X!l)Weht5bO|z{6WpzEDMOK3m61u|#EVwR_XXEZg7M zTAOWFspt0pPyGK;QkVc8w(UCwc$o=kPxR1K*##oi3MWjN9y=2+wUiBO;uO+}za~Do zHJNf+wwAxq3^%_?N!k0WRrFb?A=D)a9!ZajO~d= zj%{&61mW42Z7HBSd)`;Wg0J9g64D-WKWD$=s^ga=Yb0}B4Tc$Aa6MUOg(UrW9#>q3 zsn|oHTcyTzO93+;2rMt+(ml~ONdH~hOq)X;mqGafZv4Plkx->-aG?K}*}Wk;%gR|C zaO1yyjzsxh5IpDQfVY7+cYu#Hz>L_mfZByK3eW6{*jYd(n(CWE5Wy|s!qaIzN^Zx8 zn)l>ZX1T@(!i`HU1+C%yc+JJ7;YC)i=ozL8q6d~X|7jE-zQ&L1!FzCBAA^xgL=^Hx!@3?FWSciMMUU*PX3tqkHBA`eoSYS@)uNfSizGm2#~gwu z!6+VE&h&;BLKt1x&}|7j~nAVu0k(fP~yD3(8*?%qesD*_(`5wU7>c12Lfi^UN1mzn{|4y)pyi?xNZ@flHW@Tqo_eLW|FzC8@gjluhq>2!@B7Ph0jyQl z<33CM{OrI>66GneR3id#u&58^*)3XZ&=9#M4YbXV&o{iCQGL({;z-b8*RLx}9lkzrhV;W&Zb@pl=>GX%kg#+;z_8vdj2 z@l8S6_(H`Xhc;O;3ni>MtYkOo&)?+?;pQxqylM-25@;IX9PKKXN$FzNK{m3PU}djT zjm8u&p%X<7p;2i*Dfq@51!e~6IseL5`WoOG<}2vdvDfp?lJh-+pBWq40S$N$R4hZ~_U#yzRZanzO*}Ky{8H4EyZaDI# zH#SHG=a#O1DImt-9~fo-t)gzLDIu++oN%wq+lDE)SK(r5t7Ed=RHJZ5*@Pte@|W1g zcM&E#`VftR&!Q&(09ayJWEi^aE7N~(#Wk#dm_E5Y?{nwUy2pY4>!$V;z$=GhN8t~b zIlkF~h%FTRFh!oyNzgdd72~=g(lTH<*cpkO`i;!pDK@SP?_-=iTGKUcCl7yHV11_? zdDlh5w0l>ab=32CM)m7<(Hf9pqb!qR+`{?ovQ=>Sn^*Nj`^B?}=XJ6LmFvthVcSxQ zlt16N@zP=tZxcI)4!)CJ9pX%xG5ffaWMV@B(b1U!XjVKYf7}g6((XXyKh}wxBnK$( zH8CyM1T%UE$lh)ZudIg8`VVd@|jLa8Jv6D zZTY0z#m$K2Z+#{PI2@m$#X;?|-Rfmf?C|H1hGFlP=D)MHXv2>3y!!{f$h z%!y;7iC%GZUBvS5O)@Nty-yVNgt?e#vEq!Y8#Cb&lWztBD5VpSx0TjG!=UdGJgd}0 z@1z;pZC-Cdy4_EuuWQ4awoHP$6ZcxiZf>+#w#Sf4%N6A`r&Hq#esZFF@jucd-l|X9 zp`awmfivN17V%r3W-q6M3$C2n!uQwS<^U}}G(gB$E@`ETrThi24T1;@pX>8w?Ma4? zn`ZR^slZKotGni$__l7CyI}gvSwiHeOn5s=b!U~|RCbZAfU%9~`ek1~_BmMPBPywf zOM1?9V5o^`pKR1I3zpJ+-<(2keloi*cmAVrN6@qSI1|d)KUS%N$%;Ffaa$2$ee>&V zLvLS=g8uM}#wPQB=g=tV&;zd>O7oupaLF>njS0X%G{&0sI!hn#-T z`5j!lycu*3EQbjkd!GO&Zuu8MO9*fQFbfWHyGJaXgMaC7h)LCDql{Mm!SWI*Htage z;f{Mf%#F`jtF*BB`H!wBKg*PIYA&>y4|;1jJH~P@cpR@9Z2W>pi4)tV&p0JEnbM^| zNAzR(NG!dvYVFb1mQT|U`R%dTEctPBrMt@V|n1WYStw(lV#_WO~<}XU9y2Y-F zhr!qp3BUYAuNZ;Q#KzNr)krsvlh!#t+qU%p3M~sMwTUIl4TutJt^K2amKvQqXbg18 zkDOjHB%%Af@{aKQ?V`lMw9a{E&z--m7xZ$Zbb~+1@AP)FCX}xun0kb)r3lBh_M4fB zZ{1e~tYby@QDWMNMz8IDFMt21BheAuT2QV4|{ z)FvwL3{0)Wf3+{>`Za7hmL^92=RO|41ns+F<#r`7MP>0-8ekiIQNtZxCCo$RwKkua zwSM%RA9sY#m*6)RlFnze9^X$7E+(Da$h9PP{rbKf{P_%PXaGt2iACTB=}eo8BQyWD>X^0Pgo`ai@TrXWNhWNPYC=x+-jsdVboD>i3jq zxITEw6Uc&X=4E`G$6iENc*~>FugZ}Wsa0hS8BXV8!z>|m3`g?CP;Bj+6I^=wS&Hnz#HwgQ5YFiWpaTj~YEiH#_ zN-Z*yp?k$LIT_nD3sE!admCsWGmm?P2A__%V3CM z3D-^7nd0^v9>)WlNY>LRf-{BggZT#Bx%+&E1&iqT*v2|?hW^ZJsu*H-kkrLtjJxy& z{9;V|3^g>s3}qQh3A!!kjqz~)8u2>M75J~+=?1ie)H;!-@bhaVZm@&?sw(*5k^#AI z7DT*{CFbUAdG@PXZMdmOS2WU(K+wr`%=tUaXIox?(Vxr$jjK2qavK=xhBbmoT$#*e zuzyVz9CF9&$ob}>FeFamtX+Of4VQ^OMdQr>3er#2g@T_>rpO+m1if7_=g$xMA#OIW zqFHb%9yaZDNFQi)wF&>$7OQ=Zpy|7~N|U@Az3AM8X$25nco<^TK&9?KrM_Lqg&(=~ zF>&eZ@n9kYx(ai9Mi^|%vy5k4hsR#%%-$6R?Wk^3i8V&yXJ7vi=dbPD=qO1IIs48Y zdQ-}n^^z`p$u>6^^vWV+{d}O<`XQqgF)8$eh*ol!MsS&OJO7!GOhl-=`5PwFvNDp; z_z7U>0$H=zXQ*Ys+Vhhndd4T!Yt>~|4DP^c;Hy;w`1+Uo5x~QX^l%fi^8p>tQNZz} z(7?m=PkM!;CAD@v}wwcrqw;4H%d)S?YWGNXd_iS;0@_Q| z(e)2>jlTV{vjw*6{!JI+_jDJ7O}M?K9C3fHSp|*7!jx@hxZ~RU$s^ zq{FowxIK#3w66N753AXE0})~@qzPE_X4k!J`f_ii=P3z~dwZP`cHj9$G|=z^FRtl3 z+lRwjPxhkD)W?$+Z7_B~j*6JeRoTVHeV3>t0y0fpBeQwG5n>I#^$e%?0ACmFKQM-@ z8SlRhWmtTp*hO`F7udzp1uf^#3@n3Vm+41C;;CK1LAZZe8 zVq#oA+YsArUkOUV<2sO**x{P5Y}wjA6(HAGN!sE)LF^xs_iL$`V-HIr;#A-$EhU^i zcL@~@WuxtK{$LmI_>A?lq^;c0Gq6P%MPTRx5tT(y4;sr8$Xf6t{g=hJ+jJ*9PgEd& z!U~W__qG_MTa|4M2nt8JyKMl5iR~p@{}LBGCdB);tqpw87sJW)raXx(C?KnFAh(SW-$F9@B#h4)a=#mpE!^_aae#a2d%lSx^JYFpz`d&@ozUI zq3CI+r`@7NSSSm`;Sq0lz0k|ac8=niH4r>*?1&`pdK`8-Alh>5*F`G{I`6?JL0w*% zYKR3mVEy9I>n!-tZsvAbz-1P1J09?$8-#e=Obd)K@=Ql$7C~7$U60^ zph!>@a~inxLh=$^d(oT&lB5gX)2+^kyVE~p1JS~gKZEDoA0_^P{>8Wq{XHMWXzuFR z5XKMIb&)&34qBv`pKn^82D!oiF~yYr0b(O<+r`Bqi*DWKR-JXZC2eVxs><3<3-0fU zct<-reL6k_Y!MkL#pPutEV9C?AUu>6p9NS_a z(w%Gx;IK2Vl@bnD(u-7O#b+i7KCzOqfLXLACk;dEQ5JTX0_ zR#Erq!V&l@K(2ti@QYH$BrZN^>RF5zZr|OtPIO=wj6UkeNW;rDuYul?P3hs0O{Seg z5LkSWq>X4&>S4)#dm0Geyl4Qzg)U$P6vp{czyUaYG;+2P;k?^g{! z?@h%EE($-O+5T92zaHdvZqJz2JfZ8^qdzxwD+;N4&rZeDdtVbkOgt5&0xk%eUF^qi zoEwmVQ~<{tPOX`uN&%zncO&Qw-&O<6k!NV1U1KoB2Ji$a=ZhjnI2(E&6+%IWk0xE3 zg=m&aFzVg(aZ%~lPK!l%MdO}XkrvTNl*iHuS46zA{Ezd}s?{GVv^ilC#zXqoOwo{n za9O0jgmR0*jnXRFr4IlzG zFa_)suWcUZ+Sc1@mSrmnVg&bM_q%n)0B01bwk@;f2UyZ}F5Bn>Ke!?W7>}Y7^_Jo6 zmtPjf*o%^=U=298%L#es!w{JxXKQfKa3t?M2{*!$>1HrL-I1 zkLu$pH;}dme(RZ_Ire&u12vSL(lM0Idq-jES^6!zTk=~#B<8H5n2OEqyfMBIrF3bAk zweP-7WUYihFAJxuYvR=TdfMNB#t_daB`QOY&1dDL*4^i9@0?B>_Wjt443ECSqDEF^ z6WS2WMW2j$znD_M0__piLLy&&Ark02;l?N0t$GhdpOSAxX@SXzVs~-`SxAC&3AhLf zm3LQ?Ft`N0a`c5#O7w$;yygv!iNX#gHVKI(!6+VL1aWOWr*+%AMc^1FonSyG)aN6s zZ;mZ`mrVFxPTHUZacC2R%&x*<_Gc7ya}9D+@aJB@5!)W8-QJ>E%$Sg4K6`^awAnjhrhO;~`LWCtgHbO{Z5AVI8gUw_ z3NBcBHX9iF1B=nEW2J-u9CB7Jp<)51OQu=~K3aMw3a1f6w*_{|-=wy6cYk8N_O7T2 z4>U9h(Ud&8qdx)TFlO=Nb4VUl+`huj(uF)QF>{XykRME&N8sK*qHX%G(6N6EyWaEU z!U2U6c+qNnJLJzzj$LfIsDDCf#WPa$&5LQWP)LHus-2pBy3Sy%=yqw>wm$~fc6^UB zj)Pou%_~go_ZB}6uXR6gMiQ ziZ8Yc0&zRH=ovE+yNn2qeCU+^-Wg>Y4)BkGV`O{78XS(otvEMrr$_&pS^2AaG zT9Gvn5o?6U{v5o+9mxJD3UftY-ZVP})qB=OS>0Oixt_nrr_6+{Sbv8P!PODxKM+(% z_d!~U)rr6mVmLC?Z5n_j4emRQ2gAofXYd>?4)06Q#Ss6NeFZ;0Xe6FDP5Qv3fJKhJ z6~B<<&Pap6-=wORl}o*p@-e-nqs(ChyAjV0H`!clfkx{rSa4bDevOc2Xs9pnEu}A} zaVf{N$%S2L&Owc}6p;?^7N?u!T7-nOQ&Cue;g3%d1@(%#3Gb!6cy26%=v*yNz`r|1 zjhqa~gF;T5jKSDo_wPfLVgg%YT_HXpOCb&2sI+*lwLNj8CbqGO{imZWAiD6t`T0H~ z(a!`6`5k+FBL#uBBTn@9IEB55%0tde`vZ(b#J0ubX#w!x*nn!AU#G1ua~PS74|^e8 zY6n?nhnhWGU+YQxEd$pRJ)xm2UFTBg!^(1P*+WlUdx<|6-h6F{wchHcK0+kNhrj4L z``VyS5iP5mA-x{TDWsPy_`7aCkxGH;wYJwL6EZUq*={%=v?msXBV{y03hh0 zpjtRZJY^SkJi7o#XZjf6h{M8_$L)*p*+KVtSdelRo>|eVM<)d~*9p?C9;+R`levw) z8pdQStv3t^&D?uzPwH8S$1(*F_s>~`WS?_)M*$xNI81adh-P0|SK3v%b?~Qft zfk8ntI_=xqixUzIgCFFyuG&LLz5T1lC&bBLuDa<%I4VD*Nrz2K^;-tz#7QV;K{@2y zU2pKc3LD!G5Q1|4V1OP27ym+u!vh%9K|>NGN&CZJI;r*rtBru}^~@ z&JmOkXa)@SbZv2%PpTM0# zdguFQWxspd2G|76cREZ(Z@qEZ}ozn1unG(a*3o!Rv@*R3$ z*mOZM;A3kz7Z>0h>O1i2cnj>gM^i*kq96XB%f&&&Dgxn<^!>hEuvi29(!#0z;2pRfUFg z+z|1kd|N|H>)8I{F#h&;UwnUS+F9zz?7@V+$)Syt3q@Cs4>H+q2e$B6gH z7t&9kuPU`S&B0&teUD!@X{o0E(GJ7IFIx3F5v-p_1~z0iqHV@UzGeuXDov*eSa)x0 z+$EAAa0LmIEGll)sqR&tQtc;NFcaix!+j4X30K{&rOUATiNvHsR?a$EsyYrNq*v`m z;vY60axA^t`;ZY7SWQ!=3`G6Vvwb-4^0m&6hj;mj*~V%h3mQU@NWBa zG?53lb%K%?%+SR3@qQ)b6;Wfm`s+)X1d&A(?s4}`c)%4z<#(mmz}Cj&Wj9wVf`rZ# z7PknhZw47iYZ(xnhaYy@py3ZpAemz=R7`GpHc05flO5oMsDl22zwL%AN;l}9!z+JV zKL&%~m4D+U{(32O0lngQ5AyjyuitK>2(m8ST*Q~P65#y6oHimiaGaLgcBVhpR#YP! zvVp;_AQr8VDxHnXJ6WxtFqNHmR+jah2iA-!FJpz&Fr?;rQ5Ubf-|RSP2-j0dSNLe$ zHll3dy`TPi{D>L!CjQpOX^Vdx6I;Azci{Z0Hp#MndMAvIar97pI)q4zZ%*PS^j>M> z5~+vGUoNUCp3Qk9-$jA8+t)k17pcUQ(l)(!zD>R}LsYmWr#*B?Z8NBPHCAl?-KnEZ zmvJqXc3$Q=rfLgSKDbS^rn^Oo}GkpCo8Ig=`oUADiHqA%jEt`Ybh&|>W)v|oF6UMt6^+K z51xUrycHmLTRFzH9>Tc&7Ae{L+d>NC-;n5kovnk$;!|yR7e@CZ;1DI30U&e)7>C}J z_e&MQ2;GYJdb(x59SlC0UA&xn?ut;^OKXMstIxcC4ZS`822%{*KnO2e&wy@Q|2rl& zzIcT@`%*b>f9ZuAhHlXqBxedS4qhDq(=JL0{*kz@ZRsbz;H9c(Hp*|dYrl{S)eZ0W znZYQ1Mr1yZ;}T!A67jKvF-^!@qz5%M-UBbma*l+Z*E|LH-ZMPJ0elF8^J}T2Vn?0u zJd@qXcUD`JZH2Um5;p`-xhjxDMgv1Hw0=A^_lg+@;lBr5h*@CmP`R9-JX`VQKf~JU z1RU3q{ixA0M^8y6e1GvogdweaDBC=yfg!}A*0uK;ns2|yTtMbfpU;FQPGwPeM?T)Y z1drCjV5x@%*qPU`gU%yD`C?(n-srF}B;a|G&*0}K^8IkUK(mcB&y~hR{$-yUugMo9 z9O!!uBg!v83G`T3FaZYs)nF<~JLP?qMB(@o1LdX`9?@>Z$sBb5QgaGz(vVTi>J+36H}-NCzFFBytD2+q(zINGO`p zU1BV~A0w<6x@|Rx!#C7`$|d=$zDcTXaBPJ4rwUm%&+G$gjQ1bPQRM4)o-VcS>KAje zPZ_Zq%8f(li_*@%Nd%;x?d#j=N*_%)+DI|;youY6l`I5f^n{eQXo|1NX!-A=?dDU4 zelyJxN+(vi(G}$h^_sj9KP;%E*5x~>Yu5LD{ENbnPBgkSd2*9=>Km49c9u3Vx9;1# zJslKNuuY|sw2fL?J|gd(TeiX z?ym&e!IDLL2{y0k~kJhceSKk055KfrFbnBcbk>pr0rs<;WnPm%787GxWfH zUC{txEw~o^V9Zv8P8&MPA~bYi_1Li(%TG6X&+{qD%&~QvEhbxVpy#~ju{@sp^=TjUxHCWI_T?{j0XJXpnQLe_hUaITH{am_-4s$MGKSS z(ttz+L_4$+?Do%^tLpD{jIZF}5$PXq#;QVWs&g4)!jzFSWDetL9Lu>$rh9z%mZiNaI=yd^u<`os?M*_5W|VbXuf3dk@nXyW?{t+=}u{)Wu}7S&=iX9+!&F!@5b)5PntAp;(`1&t&> zT9ZRo?QO-~S3I^x1R|8z8Sq!bE}}`^5$B$z5a(X$ik1Nrh~E{yJ>rxb7}*UN_)+kS zSXlEawm=5_p59MwTQ~gd0dUca{?|2(d;Ug~s3G{&Rk3Hh{}!{!!*AjAFqN1JF}UnJ zKsfNM-H}#$mnNeUt_jx!KAMZ+SGI;hcNufepI(+WR2DwCzb7m%s(mgY<=PNE@T4qC z_mFf*7@P;KsqZjLy>KiAhtmUC)gf_-DSP^m^=}|r$oJ#Few;amWY|0mbOX3ad<{MJ zhf*LGwt&j3NQFCIH-?-2#)@9Y&7f+eXa{k830h)2q<_QW$Y~!8Wd!dr6w!SW4YUfn znNjZNtjy;Mto;uDejd)F^V9p=z#j|7ftdwj=;J1MVR~IG>}bE5_*Ac?+Dx|k)tIl9 zCZU>4TmcE z-bnc23+sx!5Yos0FJo`!G1 zwf}H|Za&b_(DHK1`W*O=K+bs8%uW8iabNFR5rReiZvE=kA^h!@)R<~B;?sT^wHFuO zyg2R2vQ_hVS=dVy{kF-P+P?_(gwC6>?ltRRha%dflgCbIPshO3B&<;ka!VLLBXG9# z=+N{VIFm17EL4Q+3O4XG0VvSu*uih^c+|+nSFrg=0nVHVpp<5lxN@vdezGf5nw=K@ zhdW{&vXcwsYp^1PEU-KYNkxhMu$6Mf*4xl^5NHDPvq&N`z>I|NW;suUj;N4a}Ni?#?_mi zC$K-K_WZ8lV;DU8(0`)SojZFJ+<7|oYCph6#fS6G#xMu7z7XtDRo)~ZAqah{X)oKx zdhGELo-m6VpooVZ1!8}!rTZp2S*X4jZ&)c__uD}aw8>+5iqh6l4K`(mRMOe7^^2nw za6WQ0H(D{3u$I175zlf?U>9Kz2>Uc0A~$wCo?fY6(hZaw3ll(rh&j`mH?M$7aqc{%lL^d)KqY&cjNctniU+gI1?ijxQYS2DxK9hN-V(`gNW(+;? zz>t6RpbR%e1Tw!H9q4XW{C8H`LGA6egI*4((Kg{_P1CxkHvNkxD6^b%j85Hdy#Uvg z7!R6_yQczF`joD-S93OWm;F$m4!_;@f-|W%-Q?}B?4$ZZ-Lm5<9hOJpwq29F^Y0XT zkkew=jf2R?&e&fR|8O62to-fO{HKL&8;zR{1m^UDl^2hQcjYO8K*Uq@c|YC@i;ha4 z6bzjIV)xhC=~gdThHKBVE+j49XChw^o^Ah}o+mw{C6;|V??(bR?DYGnOd36^b3clDbEso4zSe5%#1C&o>rgp|(R@r+K0`B=>NDMB zA!aqs)Yho1M1O|(Jz;@MYAs#?aVPY6>{p8tHxEIxjlPCQBVUgQtYeZjL5Mix%#LLHJR$2#X0S|XYVV=*+ zww{Ut$xB~BlDeek=jyNfI?s+-;8XXi{L1>Cpd5=xA?~<`Y=3?+yYNxQ!~X^7h8tKi z449TK5h`@@9aWczL+;#!Enj6_o`ngcQhxw-0GJNmZ`jcl?jfO;FkY?G&UoZSpTI~B z=+lQ$YVxM8DT;jtT}kr>$1r@;5Y3hLlpflu_gg-0*s#Q@PseDVGrQlWZ3ml89ySRQ z9NS14GxJdAICSb6_3-|=N;gG;GyGS}N)C@UA3Ij?mJ8N3W88Z2%6(^ijvROwX7Ovw zoeGn;eI3ory6WFn6vTdq2jL})I^`oMZy_XUlp&q;SHJp=N3!>|{*h#xp0v%9v*P!W zhXlSM!abTBgY5=>=0tP|!q}3(_3!%5pC_Cp36>b`>ost&>ubIabatK+frw~rr>m_! zzEs5eDAl)1^=;L6-G8AL_{5kDqjHrNE1jvWuefRrflg7qHE&UWYb&+*_*?yRi;i?}wC9rf>4Om()dCJ3R0YVOZ_E zr5f$J&~Sjk-eMOeD!KN_C{rNd`=L`tyQ=SYflj(4m`gvOx;3gTk6YM?wGJj3r4cU! zMc4bff)3!-VW5s06LLG7?tM_Fp@T0rCE{kgSq}^Ai+ToSzW7H!`C@jd_x`*8D&g;V zYg#&=-kXdHM7+fih!>J`7wmTKn4RM1O7byos1?=L#40t*wp%xL$j{Rg+qf>=)vXLq zIsf92j0~qWF_Z&~xsdemg{69@#=_2rSZV%cCmUv8ko77M;7AV0j_#PKwNlO*MUCXb zj}P1I?U(*icaQR(UD}jMl9KaXhc3K#AZmSqx8|@p4!a()&MUJ=EnV)1x4W}s*TKW+ z+1q8P_~cn{x*TA<0A=iHo-B64SB$fP%e*0+@3pXF3+C9F&-&8((qY*l zihgaD`!Kq*>L$}3%l>Yt@3M(!+F7$7*2GRy#Hs!xii`xOPX1%X=QJPO*^6zIh;`^q ziu&-y;=`3yl6P}n@cw#?H{}CE(=$%$b6nak;bp>rF&dj@-QWOJ&3IG|raoh&7VK_| z+@;@GwO%`oop}J})jjug?tM9Q`TPTdQ`Ai%T?VF-fRG7w`7gBrX7XYmXZ7OVu;9B4 z+!e$h1>b)B%P-ePGzl{gKQBD7$R-6HZpMf1&6=tnognIM6-VEbEwH;^2BaNcbhqh` zqoUnUJuc(6KFTRUDqlUBE`81J%3Fr0L`rp}oBrKcu`)<}{?#PDzn5`NI!qTx1*&%W z6E5PX%9wK2vz4OJMhnw#j1do3u$m-2BrO!$xTm$r18Q>%=Ub!6iLc|>3V({mn=QJc zIz$5-&qY9n*1Q|{aQ(=?f^f-wg2gW2zUNicBwGGVNID`snRbc=UrP`N9YJ+0(2FT7 zr!m$r7f9g;gZbbwkKwqiISg+d zYKNFnO?XJe`0$Sp*sSM@gFC|yL#=NJ0KzHD;BERq0TJi@w%S-3=qt0l9dG_EsvrL> ze^3w{4xM`R!p;qk^Pkq9KKC{0_oL*wcaEEX8hcB7-;r?ouL3h9Q1C^M) zUB7Eivm9)+*IMuAwck5v2>z`*;?_+metweuVpASXFGktspliL^s^5v}#As$q9}-1> z;Y$DV<#3x+74NHzDf6z&iPE;{^v4HxP{i4i7jNhX-HHXQX!e)7C#4RmMQ~2^GKIhGHZj+|x2|B3(m=0*?DtBkvImDSSXq7d&tJ99JciodaQz(Z$+ z9y!mlX`TeLSY^-mOAA_^i%*A6wCh=i?;K67WLwt^h~da)FBVKZR(aN{XU4^K>g~-o zUHZxUm!2C|_dl`(Y85fnCTn0?{D@2b><@H5D+pIc0c~d-lr$8^;(*G1rs)D)^QTIK)?5yKGDGr4p#EZYBbAML-NqnzS zCX}C=Y@sxF#d}?c8b-X}_a4pnUs1khwCSdrPY34#m_ZT9tYT&O8RvNcnXC*sn)z5R zz$pXq_i1n6tk3v^85w_Os?E4?62MwRv9*SC*@ zx53#cNiLA-12TV9&Se-YQ~ti%viHO#ed#MQ3Wdt{U#?-6dXrw+={cAlGBKGe<~Ec1 zuQAtb^8@xGQ7ARJ_Miu-htlc>6a5?am3>cn5+ccI0;hdT1#~|+ z$%%C^17Bb$Is~htl724zt-~h+0(!4!W|o)~>*$L|lzew`PiDLE=sw`&9*9^1k2slY z=El2&lKH9fTQW}EFDgGDyxBoFM%2N@=+&Z7y+=!LZK^-H{gZ$Dzn(Sx(mwi0hwi?| z=)A?dg@H=@nimd9uVrk#TzK4WlSy-20%+dc>w}2DJyQVaRb~FBZ!JfG9=K7j`xmMn zea=*5=07UxKj^>4_lu$YI&=|f>ZO|xVj{*mF4lVT z-pYa>m!5&M8uRK^t{J^s58gQzFj`ZFKjq&pluMsJ3Q+d?$CUqo$x&G-C6iM&Yb zV10>84}U2=t-ZWha^cjDn*BGd>9e-Ty@-E?`MQ$0D(FWNiHWZ|UCOQ=J516CkToOg zU#rm-VPd8b@u8@fxB#{wa@Psc#Va$)Js_Qw$d-#&Px|woIL>|jtS~h!WzMqQPw1!^ z_ELU-wCU{K3qFqI<43VCBz^QB&tAS*$&EO?;{>Oco&jTvk7_FFQQmR7@l!uOSs3`| zobr(iw&0Vd{PeW$w16%4^$*PSO4^@mw}iVNL>ZpcRgPHAm*dCYE3>FO+L6qmU&Je+ zrzIh^b|bF3{Qj2{p-ZY6$!Xn^0dq$3(x*e)xFl30m+v02rNeFZ?rIB2qWwSRavpQN z4T-oQoxSokg?E~MMAV1<8{;c)zp3nNac@PJ#{#So@MqK~*>kTVc-J_CZoSepJ5u}l zp`epCrzA*HI>r1V!|>wq?~eZrzmwxHJz0Jt__PW0d|a5@6Vt6|g9hCzDIYvi?o#O; zS1>0BP7EP09_QJ*+p5mrwl!kBK6-ED4umN4vhkV#RsUWJ7kEhb^1rhB#K~UJUZW3r zGChGWFX7?sv2td@#WM3z8zS_N!-4U6x8oG=6|t-d88(`OF!c2;mem6E>G&t}K4Nt_ zAR-@5Q%+ZaMq-v+g|k>E=)_2<{xwTWt@e7 z^SD8%V|SHGRlF@FIQ5q6RaH;7#mGgFr(^c(p7VEZP&1r7CC>Nn7Sq4j+_-OfPewZ> zfP>5KwB1)VF4>oz;x;bPQ5W?|Buhz;NAnf0QCYO^TiQcT(#@#(++h>fuB-@&lguB* z^X?k2Vbt>QoAtK@sPEVl!n3RsH_xMoFQyojy@J&bwcVSOAd%H&jYG8_WN-|w7`!7z z4EbHT{^}pML~x7g*W(MAD@mrk>i1wC*WIOQj^~0+!sV>R-&kvWvYOXrym;|Pd8D2p z9aDZ;?9CWHt@@y?@a}SDx%*M%IBuStj{=-x?Wu@-u2ZlRx+Qu z49h+I%BOfmPuXrc;sH!kc9T>2W%|vEwbD zNQrS1(Q6p3CUjvk1joP)JTteRP+tx7BmbN|e2=O)60;F)RJ#JZ|N9f;3+&s=8mOD^ zSN=;xpd4S0FK+pEslNVK+Tv|hYW?cPl*4Q@uLb!{S(V)S1F7lEV^ZCM_w4lLFT=Nf zO26#AH+ttXTH-#}QKFRd$+q8rkO~j#-rjga8st%iQTt&}Cz_Z8t61D;HXjXu-k4y8H(F;19k$9`@7m zN{Y;@0;0EXA7VRn>eEM$yKfv5Zy2~!lW&VfBaXk+Oyd3EkT z(tpdQu^+SU66z>;)PHz1EV2yrx5em8VBAQkp`H&Ehx>X{*$5P0S9*-0!zM{ngy zqsg5B$8pd77?$uSlr8Tz>i(s$N5#yCyHU?hs24Bkf`g$;y+zkI(GIWw7pztior2R?K)fs(Rk7vTN>>o(~#E)x579|Q*IF_H} zd}x}8aSG>?zf%0Mww0eqG3E$WDSg7Xo#DMXN+&k$>CUdB(l6%xnpo;&>N(s%Z( z7$x}_Ki?{5`z(I(5Jr~LITP-BnXobW=EI19I&!6k=R?fkLu%($ObWlq zQ1dIY)PwwEiAxVi&)Y`$xPKf%-fygeK0JrWLXb*#t(-1X%dWRrT>d08!znV~22cO` zuyL$2a6PS=hvpEP+cp{*3p-u(@vUWS@^S6`g8M7t<_~>LCCj>oY3BL@q>T8^?*Mp8 z+T^KM9J?q*KHH)D$(^@#b3O=AyY;S)bqh{E3{8=FV5(77+{t!^bp&WO{+u*}a($qD zSwUSvVbqrh?Kh+F@1Nc=BkQfQmMX6B~ZEDi3 z4~ftjRNKJs?BBz#gfhaVYpfz3)wjlP(yI;EeiS}1OM7zoS5H3qqF6R4R4g)KiP9P7 zFV%lONUtD$ajNCmar>7_30XAbXF9efZ&UMUx<>!Gt9XI$Ea}$CTPI8pn^9#ff*dAY z$hp?)qKbQ@`FSLt2K8d(>5E7Ft~`D{_~7th+Qic$yBqce!uAqeM~}PY(CkCsF7}YW zg2JXobl@Z%)7r>%t@NOcGPWk8&PYJC6~%5_Rt5 zeNwtq!d=~rPVKV4Y*Qv!L%~krn?6ET5wEqcpNP)l_=IRuOl+5I?vKw$PRU9n3i>`j zf28iR(wT4T_8Euz3$imDY>($%z8hVTt9(k6Bi~Z*(5cl6S?l`q6~9Im?tU~k#gq*d zK0tb$zH0$$QgQtLg0EY(sp4#ZAV=9QSB5RvR{Lb>8@>@%rL*)lCJjb&nH59c%UGyxX! zz)5H=$<6tT_?B#OL1_udd3<+Drs4;lGL=eQPvNhL1W#=zWaVF}8GmLeshD1NNN7Yw zNJ)&>PO}oUD>3Z$vVx_QJgw6#OiUk9 zlj(mJ+>ccf=r3SxM;bI93gddR{D^vc}eK?Am|&ofUEC%Pjlwz@19$=(MY${`O&m6@XvaAPRqL8`KgIN(ae{* znIhs7TOvkT*89)4`o8vprQo z3d|1Y^H*hEMKS4@nz37m47bM-XCA&io3KmIEfht6iU&GMZ&_ZAyVItLLdDTrdRzAk z_UMSoZTw)zz#=IJ*MaX^O`Gi54#wk(APcp3?0Iu=X#@#y+j1IYelU5L?n`!?=_~5p z-mASH%;*mZhHO$7SF~xi^svRv`NcW1!R9=*aS>gdP>7$`LM5P+hQUo~i`zM<5=c%? zFqs!|>v7A~(#6eF&q%a_J<|X6(NJ>`jF1ekbiEMpkM{ekb}gl%cECuBtHZKQ9ecs} zgX`#?5T+PZ3Il%*#xH8ItV1&(9vLm7#<3I>MXi| z-37z2nH#CCdp>Y<-!E|7ZlV9~`Zh^Rv%rSW4NU0SA`>asgZOa~TxcEQjoBaV+uJ*N z*?a3m`u}CTMiJz4+$lVMARz#0>58jL_pDH!EjX3hw!7WVK@d#8YDc|P^2(`<@v!jH zWY81gZRLtO$zV36kR8hP+QoEW68&RAWsjQ=<4fi`a@iz}^Wnd#+PI&v^}5-3*qgh# zMv=6+B-qC+{)tJ^w8c%4g?Vz!CN&sXrFpXcCKXtj&1qX(93=-0tck6y>W-gvV^|HH z6dbZeZ$f}Ea0DI?EIt@x{u~c{Ruu0ZiPpA9Z`4HZk)9VwL22y99;tBTM;F50jWuB! zjcmn2E{8IJRovUM4tWlcYBu5bX$Q|zV9h0E{XX#;wMEy$rVWF!i;DnfqybB~2(SY# zDkPXO6N0qO!G{Dl^@kuKFlM>-$rM{Hv`IgjFMmzmb!FUA7zQ6lTct-c^SpFJv~&a znbWpflcz#d%?Y>6P|z-n8eRk^GuX3jN-p85Aqa_(1+~S7D{OHJK=2ticO% zJpi2y#;gCoNqsd_x232rEcyE2!D+z6c%bZ?H4p}Aj|6;QVBjXi!cQ^3#X`=!+QXaf@Zu{N1tPcwSli zNcFR4R;%pXwC|2#X3u#do2m=V>3t=eu3HA1TYIAbzw729Ws|e zLU*1y=x}JzV*^5UGvQHR5{|o8u4;_ZGtOSmm~C?%4y>eJt3oOF+-bn({lsKe?<(2; zN279NR5%Ai{6yM=II9N{zEVFB13k;aeT4Y~4W$)g>;tZ!)vDac9q2?WovZd6W;riv17y z^H}l{9z?UHe4ahq@~{eNS2Y}2KtXo2W0vlXm%SI`T1P`sV%3uDEk7vJMYbga7Op7p zI=R-NjL|gGYT<#ywP%xNa~WKvCl{2)j8yNmYwvgLllRtPf{=MyjhL#I*dnx1p@rJ| zvCV0EcE3BQmzSsi>>U;KT0Z7bW1i9vo*^UGXNKDgb|1M&pCVwuChsqfpB0jGVA~X; zMF1)bJxc&%3-_p5bmTO!0WDJa(fl^QfQB1ow~@Rk=JQ}?9l{ui+d|HV&;Z4Plt<4F zgB#)0oE);==2qG&)P&55DfS{UYzgN=OL&_4Ri^{bf@vuP34C|wSbwbW^QsQp<&d^C zeSb<9aaC~gX61`#bgfWT$~kb*bxFI%&WLk3NqHM7M@?VBkSGl2&L5L<}8V z!QP*0qWnD~ff|*iMLxV)J3A{(?fL%FVPm|md@Ehqt(@J3{VX1($E(wN`Mt~*d3&i2 z*s>DPVZf^q!2h$o?(d$2cu!j2%+m_`|6U5JE!;cMD2v;G3P%DeJfzk;Au0)C!x4hO z6Iub`1~_^fFaijzzGU)_7?k~BLxTTrov=>yfx+m(LSP_)`)i*a&=i>ztMm?0y~!X2 zujArwIHCm9K!kM;3XeiA1A`sx;^uJJCXo}JAX0du(RAUz&mKaz1vEh+esFjognn+k zXPf%J$;WFtTD6)wykGUu4^4cl>j>t&1Rc6|K1A^ur)eO6a;q-_Wt}Z zr~O=KI=wt@N+o&-1snlHl!BNO=3Acd$I{TzKgwy$jP5Ns^$db zYLYu9YEKqFun(AQc{U(mBikeQOa~4?jQy_LZneK&++1$bOxT>$*DjOblJPCac8zq$=rw0ZM5|wfD zi&7YKK>rWy)9;R|>Iy)_w|Q-`7z_r60qljsgYR=d_J4+jo?XvFod-=y1LhD4jH(2* zG@u4U8JY~&)+TyZxHLyl*nVbAB4|C&E&4@79*Y(U&jHq>C}`g>X#_A12^a?W{?x@)K-H6{b<%iM`k*^!GbonA!GJ0bLxb+x@j@9Sz@9e0 z_CGU0QvJ2PXWPDie_Wz|48o&)RXIP(TKPcG{a@8+t_*ROWyiaH_LDaI^i}OdfjLr) zptLspG#C)k`8`?8!SOJL0I(5ooS>64-rRaa>0NY5+UBb2J6e!fHpv!=BR!u7n9gSZ zTLK{72gjW-srs9A?RYdm23RnZriCUtpvk%c$)N37fdJ_}IKD<)b!yxquhu|v(0J9s z;ciVY)!;m*-nYfLjy=U^J7T!u#xy{?c~Jl-2*LPo_=!Km1Ag6mz)&HRz?Q9In;emV zqs=OLbAd|0cLtbN0+6OJsiOY7Yf_HI&%58`S(VwY-a>4u-7zyYA`?jhvmEVFV$V6$ z(?uUzI;E7-R*F><<;oP{=zR!w;lK=ir@)hLxSNpomwE8^JUje zKCB8|c1)x*HB#$}`#L@}Jb^4~q+vNU>aFs&-JoAAhiAP|{#igSq2xCtWko&>SzihD zjJkrj3RqFmiOD503_lNC)e+ca`2I!%3_<~_gJq+DPEK*d_g9<0+qA(Jcdd^6gr4A$ zce(ALj`dzRGM#Iw)Oyy;oEvZ9+f?nDt%UNb-F}IRxVaXt6~ZuTA@z`*MLdPUvoG3W z1ogNZ(xE%{tWY_GMV{Yu(tsWKOb_u<>J)B&VzUK62M}RW@u->mN?HYxWGm0A!4_Gd z2k@W{?}5NAhbqQ$)%&p9p!PDLN#4SPmF|)wTvE4?asSx6g*;6Wui9L?XIY!TG%y77 z9{fFT@gFmj^UOp12=$V(AjVixWwuQaT|8dxqDN^n8#c&rA(-LDTMOoUn5ACzG*?qn zL`2maT+Ds@x*>K?9BPDi40G0OxXH$$cFRu4ll7a`vd9StrYJM(VM6%Cext#V>YdmKwnVpk%&wh0|0ujS_e+}PcLM83#hP*1$`KA)(e(j-rebj zejB?x&~ydy{2Sq2J4i3n^J1$z6Ih6Q7Mi;Nrom*$W{R$$SN-y*l|F%)a{#czp!i#Z zdaWiBNN?X)xndd=q#L;MCnYj>_Yv+=9Q5m}(9_C)BV2S*ds&N`x*M*icKt{wtlY!` z0HO1gM{tB_={6FN4{}945o}nmw(|d31a)Tto$E3hZMKClsr@~F@9qjMYs!b|IE|JZ z=dYElabG;QxsCsW!NzaZqn8_}j)6Huyc^4U#hrg;347Je3;Y$%y5|0{G-Aub4R@`v zNK3`m@O^1xZrpP16@)r`@-~;?!|f=^kg{?~c{YdM$IKMs_gBZbBHuq=3YX^Ch@w;| zyiN2R_#t^lr$2E^RV=^84@$l!#vnBSJ4NeXm(?K&!u(&LD>#z-VKTV}$B?6MML|!1 z_D4EeM%^(sRk?bQp8mH3<~%thdv(AcS)`oGG4?PV?r9<0PaK3$y2QDEK47JFv;G|} z(@9*8%b z)8MH-txn^opON>JbKvoiwmp7pe1i)?Xpyq2_#GWK?4 zVwXSBg0hr1sq;$>%`irU+-oCe)t$(((;?3jk`DA1{n<{_jG65U=*}(l(BFZ=tDk(y zdz6C*MB#&v1kzay0C@W|(0Z=axKjZB_G4j5_J;~5Y_kuT#{n7EoGuxxf;xbn6VsI3 zcIqhHcA9T-$=_zjU$24(|A}K8GrG$e4O5?-cIk~a;vetMpoqkl5i80i>Byegy}R?m z3;?}o(FdsJH1Jm;Ahx&u;*k&g*)~&A7)p=+{#aGQe1)@?WZ>4rmVLv?Q#4va#O^+4 z2@n~oQl9Mr<1`VQAq4(2Ti;`VhSOQCJwXi4cAK%Hp-VWrsodw;Qd4d_HpGo3I`_V{ zMfZJxQ49I4as003_vnd#Iw*c3d$K^xBdj9lnXaIMcd~yN!E>y>dDYxD9>VS-+CUbZ zs4O(k@>&^=b^*@pHAb`4+PQu~9@++{76BG86v!oQ&qaS=^4 zaX8KzVt!x)%@e`_lZM1^-ub}A@Ow`S@QtiXA7eGo{v>`RydBNdTyZ{O_D|0<(L{=R zUz_NJ{5kV@6vn4GutCin1xrivrYuXO;@xmeGDgtE*!JIRFyI;pxJKYcK4-^>Ad>W~ z4V0X>$a_9FSGH!CH9j2g+{X9u6JS1ZQEAtJXlE$^t7MP~z`EpZ6@Wu!#vouIF0dgP zL^~I^h|b$&Lmv^=R#m+NL}!3_k~KrC>ELWQ+8AKh0Zz34@|lg*&Fwl`2tAA5_kB~@ zc5W`D7L$Mn$Eg1G5^jh)18A2qpbE)!-<`ZT7;=aA1V8YF=1H-`x)XrHaLs^LN`JmE z(1i>cCAv*bJnLw04Oth;!4ewNHmUyiVmpks7qfhgr!*iZG%H_#rB#oCQTe2dpg^;Q z`49lp3*-v~@D}I6oWWQlx5*xuongYA&!=^dbt&86HV#un-_7 z0WJy7#-j# z1dg7XM?dS90iXc9PMS9Ex__&B7&8(;b~R@S%tXefZJMSA>^CI3sNRt*x!PqOXt?0n z5fwLPUm;lIIXj`WkupGdeKp6}KNE_|=>^r6^}8=Qvg9-%O^TApmDM#;q&q-*HA=YjYX>MsC3>W%9O9s?GF`wLPk;-1G=2sl*9%zn{s@&2pxb&ILx$R|a zAf-e5s#R$#w|doe8;Z}=Y!Spkt2cuz%@&wF5=*ZBw64@JH5UoW-+9TCDWy*2TA!8a z40>7Zs~e=rRuNj_Pr9VXT{v3CZtQ0+idK#3m8Tei#eDittNe9Lxe1CDyrAB-O0^F7-MF9afI4*N|u z?|)^KWH$?`+F&FzjGX{~H%MwSe10i&s(x!aNC9zSJlxx$6{26wJ8+T~Gd z9_v>imLJJjg>!Kxlq0(gZ0{{!nRi%b?2hpkEL=no%@hgnjT-9tCVuMcXpU~wvDm0+WF%%&)?{ONkUd=G672X6Jh zDZh@Lz8(~EW`})xyH?QJ=jGLD^Rzv{cw-7x=l%i`THdVz(En28T7pgA+DYw8G#{FisKBp#g8@Wx;fgyYW*T3p*$S?&!kJldJI~&>q`beQEQZm500E*@>JhZG$WJOzsEvL& zs0!BXfBALz4K+S9X>l5evB?$DG$~VIFTt1PZ5u7Vd>2et9eEs<1CGUDyc00E1I_YZ zh-zXh#BrhZYVwP{oT1Mf&ZhF(@enC|YbbVjmuv;OY)d96gcWX{*`Owb6Y>&f+Bpsjwp^!WY4Yw-ZO4YaL)E(sq-q16o8w4`kWj2y z=<6}raUk=D6QPJFe20F&`86^k0RuD6Ihe|@QD|&rBY?9mv(uBXGbtbIqDso}=~slK zW`DlVH4o~}Tt-r__4ULlE;YDK&k{pJ5y1R{nLttn59A)kn*j7r>`4H=QT*$=+o^8P z;|S%^07QLoN5?SK{2;z26GZ@LkvtmTD>{7EE;j94Dt?mnQO1F`HoC{IWOQXFg!eQ)=^Y zdOSys@@!)GK@K&s)6gx0g8$JH_07_gJRLa-aJ&q`Yv4}L`{q?8_RWXJ8k(fd%B>3% z2R4Y$kflomfFG(m0-$Dlb0UJ%n}^MhhfulzCfdAtU;K$G-RWaYk9Nos8_v`uv*jEh zGu&e(F&adeRBbi?2q(@xm(%`WqNQg|%gvcThKcrfy<%h8_hDyxb{FZl-=GZ^cMe>U z#i=HecMJNuFIzj;m2i>v805TaS4H``O|Pl;7zOQLInf<9XfAN0$yV?mt&YR{JReR= zLYL!V11(ZgP_b>;S!lzc&_#tlkmi_|aBz0j-K6@|t=e~H8s4L>XL z@Cc!JuBQV)Zju<^Y~DM&mjOg%8Lr(SYh@iR&Fr9a0#_`~+XKxw55&IaqFEMQ`eP4O0azA%fyBtFLqq(2+=BKM-BX+GsM$Q{3RX;u=%s5b_L(T+b=2X8t zgw~hvMm=K`M_tHF7+nUkG&VgQ&TSBYAMxqcSb=Ppc~;^y~PH zHRA+kY^0}$D21iQd({Z$MjpB!mt#EJ2buGegZub%Z4gGvI^*nyi<4XK{KZU-r*e#>ISy4drbhgrT%rJ z3&Ub?AQ_hwYwQGM@_%-1@BG-D1KkP3b?r3Xd8cX{n|TYo4k!|SbY<)&J4x$iq%9>d z3~U%e5eC+DFoABynkrTi8}$z8^BW}uxw2R;oBA&{q`#2q9QNlggs*X#${am2EL|>> zI-}psJ^r-$TdBSr(e35Oi+7HhA0Pi6^~%7qlaITbWMKo^kp7&^^%`=QxyGq?-t^Tr zZ8himYXpRYzB)d@U`dIugwGg~Z({sT^i^IHE+q$NAq)diA`55Jph;{9A3Fvit+0{WaeDcs3QRdFHJ#{%+_} zHEnG=1l+p|wkZ7$VLS~!IJ-G|HYF6n|K_t7c=Jb9S@ROZp>I~h zNk^wuCrfFIK)}n{9MzhTL+an%-5;&6YgM<>@?U{uX{L5a<%B#_tg};0hxk8#)+NKr z9&3;BuD@6}g$iJNUl*3j$~gCQ>z(!F(~IYcH;Hkel_{v}(<5RgLX)MF{|p+wO-(PG zCbdFJ_|+)C<`t5{%(G0k@;-&EUmG=0e8wu!8Bpn^&L(*)o(eF${g+II`8%E916NDu z5@j94C1EzKp3VlPdWErjw*!S)F*`#k;C?NRM>eeDPKU=~=9wAqrHe)KE;AcciaTc? z6t0ieNCjORRekSI?wG{P;8x9Sf|tH1nJ&=oIpIoJ{_{3yVmEzm&avEg@t5Ohr5k4s zM?KVMg``piA7<5)umQgr_M??v^zTFl@0#Jp#8Y_rUXIy2tpGx+;JdG0tGk|EUi=N0 zLNW>TW&OX^{xR7?H<5jgMT{REl92Yi^ok7suU&V(lr-PILA+XPQPa$~^+IG$`h?%u zNzBu|J|2%=S-Iypp}id7uh|1d{3 zR)??&u}1r?Rn#CS9__D)dpSbX2leW2O4)jt<}**s*XX;`6W8?oy8J#P05*nyG}ygS zD3ZQOGckvzrO~J9{t1^t_-x8SmqFd&ziCVFv_KQKpx7D9iuveMXw5+?Li}vCbU(SJ zP(0tbT&nMqRW2!){w_3H{%_LKZ+Li=BB>-HmtNwL655rcljafG9Yr$Oy;ScA`?$Zu zFSZqy@<{ox)y7|sFcW9`$yYVeq8b7DwKpmH!naq4N)%Lu#yb~i_fHR{^ua%$e=GC^ zoDBV)J@qp5GxFEJQaECT^NKSHcfltTO>RTI%{rfcZZct>gnt*Yg6>KEv6Y6_L^)W4 zwz#J}@Z?-h1|&1sKX>;Qb$e}Luww|n9y!W2zqpI&SnVYr3KSU!+b`|?&965J$)!6M zuBlWbH*|sVkX|tLNGA8{aK@q*8?W<6kCEuhdx0m{3sqWG))(VCYEiizx$B(s-YuYd zxFH%Mem|R(V0U!mZ=*~g{p6^%AbvNR_p%8kdQowqFEMx4w95N2{};UaG+o94?V)D> zSH7>5oMOVT2Bj^kNFa?DN!HNCML&5w^?G)XFVjSQ2at%vn&B%IuYRE~?}`#3`|{lT zxwL{^9|j+^(3fh$sznUY>L43vJae?Fc>PA;ZdR~;W1Z$OybZEERsYmb*~3(V!8dXF zDf2EpYvc9k#Q(X)5kwEL|8bR8!fTzgigCx4Fza^nQ2$Ro=ml`OIp#Iqr{G34CVJuXBK5Ps8p%^?yFz z%;7QFsE{c=8#KKLz(OwuE^EOMwHM_(mrWd%{3_Z`Ko z|Ij^Ka8>TTHw2aFM`{zVO@o(j!22N8(*FA$z&j{^L;WG=F3XGEnIL>(_|{Kus2rT3 zj&4Fum4qOx${2r~e?;z<-JV`AOZf$ghJV!fvR3i~+;=H7>(>}NqZabmG3-awKwaZp z!vE`$qqxiKg{rTWiPL?n3&aH@Nh0vVjTg>mgZU=nJ{Td!j|bvnF9trI?N%uR|HT-j zA(D3tNdC+381;T#P(2(BIe~KCLPRGd;659d!_#aXMU?e$67V$n17dm-K>T6$PNY-| zR@g?*_JCj*3tY9W34PZ%2e%1EGi{XW`2#-o>ISY#?E5d5$Puvl1gQbXY)72D4e3EY zxGODpY&9(1HAAU}6`kuBWXv^uLR(9Y?ZsD+}Jr@ zlb+6julzx7ps~xhpP+Skt1QidNp4{BVIO@%`^#YH(3CbmwD;pyGIg#femnKLq@{!%M--NlU=~V|@pmJ+1l%5hN`RrcU)=3gw#^eBg{L!Pb+;b@k9X zi?saN%QS1v`-xzZUcXddjNiViKN*-IM2{w+J}f0VJjhPHCIo%IAF;_Ee`#MIgyZlS zt!$oY4#d5EWEI}gA_cK7gY}hg2|9Z$`0kcz>{g*y+DpoE*i5YeHaMd9=K}F&#;X&d z5vmZEZ*OJN6 zP_1hRy|b^IYvU9uq_e+_RB*sqI!)ZeD#ZVT+~aQj`Kg2VW^@7=>;4ea@@M9MvGvsf zQGMOolz<8j2+}YjrP7iTLzhS^oq`C0v@*a@5+f}jAxL*fw{#;QElLOs4FWUd(BJ0w z-uK?`z2Enj4s*_#bN1e6t+k(Lt!L@qa`WDrXh0DUhY!wpEGA$nYmEzUOqBdUC z=7jp}k{W$#fWI*`ixUp&{e%&*GS^Ww``}ydk$hNd>z(!M;+}+C#<_y`XxAY=q|Wn) zK!_9d^bSg9y2VZ+B=N6^YYjc!?2Zk=M{K&FB)>K%#$T=o)7e9EwlZk;p`(3tKV-J5 zS$38FuJF}8(Am7G@N~)160krIOJz-0wWw{{Wu7aDC0+fPLbF~@7sIcrjB|&XfBsNX z%73`9Q^L{yEfu>a-zG2otMvAc@S)Vf9U4he>qr=>VKLYcMjJ+YB%nJGRYn3gOz4t(tO^qYrL9%Fi}n9A%WXD8Bzzc zxs3WLt+!zo@&nTs<-J>HmOg(%W{5b+=XpkrRiQGXjMNBm2)c1G9=BRZ^tH64>Cvvj zB9f;nt+srv?x{rB(qlDn>3W!tv4W4w9d>n{ZwUU$yq?)G93Ku!xQ(X-?S5wu(b98e zM7Uiu{pYB(4TOoZpuM+31OKtrj2O?(==Z;_m#p4zkLxvNm?npgkNFc;-PRMQ_tssB zgkNG!lE2L5SfowIV%a>@6C&#+w;>6o{X+-NURREz$2fH?$I?M#l*nhMmk-x~9kXy! zFEx|e$=m1J;5M-3>|2L0@hOR#UOag$CAG5qg@@7AD+UH_@$#NO6b1go>vjw4(E2EU zZO{3_F1QC^k~de+&Jz3oIbG%CI~uH)XtW8_4xExUrNo)HJlkk zYkLk{eL_t2pa}_ok-M(m#-_kr9jzNx@ABwKMsw-YTo8Ob?cV~=1s=`=-0%OspeL=j zU>Rn%w@%_HVH&c$dQv9=2A$2*1YD{D`W+0cM}hb4rchWiHWAbDKIqid+4-#P8vOJH zR#n(M`LO)pP9H4KmC@Mu-{aaMUs?p&b!$nKRHA?~4e>DToS5`pk2ULc3=f>2V29xd z-k|=3M_O+pEhHek(%!W!r$GqERxqP3@pDOkVWezVn1muvBNT7g=d8O=w;jb z0F&mgD+$2Gn4QXP$R3CcUb%*sQ-*@Wq!V+3Ex@TH*!q%d zAyb!@gW3g!2x6Sv-NR24nU)Z5eHcJkq@(yfA#UlI^ zrCHL+hly0T6=jo{!=iL1!VJo}ly(ant4A1PTsVr_#Ovk`s% z`G1`RP6j`uFM_4lKnyIV8EPPa%$(FzFVv-%d3Fu=%}WacN=VScI)fHwxSogCz6_PY zCy~PPkV$NEIfztOf-*SIwNoFS2=)c8`k_`psKSvA>O|jE19hhp{#pSF_04Cnp{ z-6{HyPj**_Q?%@ey=11+6u;BS|GF4u5iVBgpL?H)ju=H#peYfq^-6e{vP%wMhJocy zIXXRLh{oTFGU@is{0@71pN$Q$m)EFyGab+1y+6{2>Iy@ys~6c%c6C56(8#4o@onW> z66jWWU(Vv^COdCyc$gZWR994!Yf4nrY-Wmt-EMJ(KmRgyrFp9fdv-nsgz%w)n9D+)MGV&&`mo&+xzWGK?sOeVSU%l*U# z?!QLqNS?gKq=#r6{EEu7GLcAElRECx_#3PDw*_^lRJr+l%h6I#^(L5B8;d*zxBBZ6 z@@^W>A)J(6+^I7)~nQj zSR%wX`N=%tR5f4DY2dQ0AXyfe>Ja}y}GrEQc$6;!T6@=`rt?}Dt9(S}IJjYE)>(rkUU zX?FPpc2Jd%u8zt3={zv2=t|s8SyqU)n zrMpy`s^Vn+THx9;U&Fwk3wwMh+1MmLsUQ@~O*V9@iIw5`xzBT^=)hZKrhCG-RZVwetmeFr)O?P<&9P0IxSPE9>FF7O*ZOvc(;Be&@eVWJ zY@C0d2xt@Dti6=4Mz!oBa|lWAT~#3sp0Wmr&uz<>C;1OJ!!r|MNu-Bg}eYo25FxL~U zD9=wO<98jM0}eJPIVR0~o8lCp3kUZHE?U+Uc%S5|r%X}Ma)mnqdGe`d$qVeFmyMg# zPSX*4e*$kUbf^WS?pNh{ zWVu5x+A6seqsmiW#D&K}#Ngz&ys)YfzNy%IY@aeJ6-3_K5x~hh%e#yA^CHv@9N>)d z+0WV%>Q{Jd))Oe?pMC&}J<7`cCDHedwjf^8T2H#?dd!sBwad)k`_1 z-H-opv{8x2%0ekHZ%+ucuL*$iDm&kHn#1QvTm)dQI!|{1TNoak0`Xpxqdf;XbWb2^ z5R^jtb&14e^`Gu@pr_yHRg%na{)IMH$#q^IOxQRHTt`58E9>ga0@^iCde7xJz1+%I zL))?b4&%u=MP5tBz5|60l2u2ItCF8nJ0MbxNUHa*h#TGw9FZ2#cD`y@m|le))ia&H z5&eiZ*Ij!uOQU`H;uPoxig>qEq?%irr=D32&X!v$g>JS50CDD2?8m=Af^7^aNwF{X z{3rr%WccLM;{D;Zd5}ak2-hvvmzijI!-wYq=PRmwS)S+PR(qwB{D^mEuM?m%Cp420 z!+Vci_1#@qJ|c;3-xu(4I-Tw603x&dvQH&vinu`1c4ve%1>DKeYNbHwXU|H3 z$?H^GZ!>vf|VbNASAp7!%E0COYNV5wH)yM|@VuY5zKtGZAiv7}5t3cecxocM=7V8(Z;( zIhX=Ln-B5?l^aF6Un|}w3%~hJSQUj>Vi~*25TWHw-L1PGdHQK>$M@3fU6x>JO*Wvh z65_mosIcu;(j9;8y`-nPs_rPIbmIJ%nE5}weo)psEoaNTU-w#XNM1{)r-`AGIG6d| z=;3D4bN%aP_((J(No*IFHCj%Q*n~@7{;zh{s%wIHY`Oj{G}YZ zNXB>ip)efYI}(zfK+-s|FLCnc2dmSVn3cz_EBlhn-PM7&QF=`q&p&_J7NqyTC($|z zH zrPDBRLp^Rh1z_j3a;6yk_2D0qu;%QHB6s(^W#^ z_`018-;=SlA2vWE%lIV15y2wvGx_b$L`M-3hDDQLyaMh-#MgQ&b-sQXboc#n@=Hl% z=knD)NWYMxxRUISnMm_K{B6+mZLwLGNc?_@R9!jh`Qs$56%`1!2=%) zD~|$PbZ6c~oqT=voWgsm&>IZZvZk%BV<{3rCuo!i@?>yCk5STd?R@CxqfOz z2qcwDXRc+(duMYbBrjNA4q6}d7n@30k7zLC1+(Y3l6yOiy_Ej>(5h$jMz9+>Q{fCR z;s{@Ul*>4#Nm%O5RMp=tGLi37Lk@z$l>ks)fM@F>Y5z6d#Riq#&1R|rq^xX19zR_F zhS$?St&GtXzRUclksGGJbPb=5B-NO9PY(V?EsBWD|6Y20S3oij*1?K~Rmbdf4IUJ_ zh82y!(R?h^WPtmz@T3L^Fu1daP2STq_EqMsF*`$1pDt->-2s-y>|0~EVq*m{WFNzC zy}EaUjkURRRS9%v3*C7;@Wk`*l&NvoMmKceB#Hx8=I{7n*c}^B5Wq|+65TDLWC+|0 zF{Etj8ER47x6o9iFm(u%DGEypLl2Li$1qT-L zcd3{%*43z`cjvon6bHl-ig_}AkN zGV7u=I8jQ{c&}+Cl&|~A_mIYx&9lR*V(6Q3zjJO=;CnutpUPUge)8%E=sqb)w_Xe; zGSYEg@npJ+yFI|5Wa=>1AXTDPrMa_*%jiqP&W4WJ-dA2so6>Hl(oOxVZkdA{0~M3? z&G$2W2tv^~Tl^Hz^|SBa9G*$0Z7SKF2`R&EP(J>yNzK%tQ74Ozu@pk#^fz@}+fNlZ zO|i)9H@`qEhg=JRT)PE@T+y47oWe~9UmdHL@<1&3Zr=a*)Wfcn!3>$fE6Vj@;j>(8 zg70|M#E?-Oh0Ik_bEZpdkI@$kp}Iy?Gdqy!yHUH!(-LkVdxYj}d*Ir!M^Va_ggd--Yr*=7hzc5&1@@g8E|v}9(U8&jsV4a;KqXiCSw zm)-l9h2hek=e&xCw#)Fz(v|lcaR+)}hvg)%Bn)J@%H71yZSJGpzz%4N?DX$~(Om?wMu5HNn;Uq~ zF@Gy98Er6W_av@F^ZQUp$miMjIR#hw@2}iMFd1{W9mj*dSvOTb>7*^O!4k=snG8{M zeS8PCo5*dEe_Y9JDwd4qda!S4>fZYi+vur%Z)C3on3h)+Mbl?jo@DzRZxPp!2U-I6-=3Iu)5xPCHZoy%R!O3FI|xS-qAvRxNN|CTn4%8;q5uEyCYSj$~MFluau)8gg&QjQUqqk<6Uo*u)`kO zuf|+4jgVb*r~FLNC+w#8d%fqjIN@KDv2}uthrxLg*mHIgW1C~UUN8^-as?(c#pnRn!p)LPL2q1Ycst&7n^ZKx|5MbEgfqkGDcUCu!CXfiYVq{%Ek?qAEIx) ziwA!c5cq{}Wts`>#@D+e=qvs1{O))`pN&8FVfgk&20Xa@p7>`0?&-b`d?9=aI!dYL zE=b+z>9GT9uW84-R3q5bJx89hVfQxl7i>K{*3)gCNT<3p$}fLwcSmWzrhQ5f>Sss=Q8u={8#Xxxk6u$YRA$bi({S$S9w<;Ci)bnUebX61i+@Y>E`KTg^6q$- zbJKni9vzD05Nn~{POjXTVv}#e{6!6HE498Ssq7IRwIp9XvOXX)uE~+w-7BV93-L3B zdbW=o2V4Pyd3(dJg>lOLriv2^-)NH4C`cH;`~9-7-}LYiJznL8rOO|WRnmtHDYXKj z@HIpp2|rFKl9|w#K#h}0u}mj;P5TOH6X;ZDR2l=IQ)KyuY%;i$mD(4#E7uerc^&@k zEGB!^@?b;OEL@gei5!wemQZq%4qtmcbUOVNj_%1Rs=h*-H1x>t6wH?U(e;@;r+*A^ z!(Lp6&z@a!{bF1d-~RK{KZh?xZYG`ROK%x6^yD;!BAwU!tfw;IHtdm$nF<&_9!Y~z zMy2z~QO_IC8YsxIbCsp`*(pg_!P|<@Wyy!>X%%E^Klq_!oaYs#G4qOYM|U8hxbo5z z&@P$J#DekIFTNMY)jmCW;g#RB{s-#)ALKWf*8jR1BAC{v`s@GuHOW1^lfvn{nKR7v zCmXUzv2XG^cOY!<$C;_RqzUw> zIlOx(#E_aG5b}1!%I-Te!iF^Rt%?FxlUl7Nm!Tc`b6)@8*$GsbPdm^zRs7+uk&hb( zo&qK_fd)3KlArm6o76z7S69GfK~CsE_tSVgGV|kC>xOpZ>y57y+SC(mZ>qn=2@|}g_XZ!F@Ri%#M7t~#&K#_f zK$dUP^(na6L|Kmo3VvXJoaaFhWJ|Vz?wA-_n30#6!&MkyR5cUhQ%+3Tb(0?vI$<=K z!$WE?Ss+o}+Kzk%Z`jU2XKX%8H8fw4dnjSDSC)UvVG>7w>^;@h;jf=0s^3wit59b) z`+*38ERU<^W#5Lm-8gnx;Y8feGo9jouEjX>E2uYD`Ha@1BXyev+NIGfmN0bd?j!tpdQ9XR7RLS2+P z`CZgz#)GiaziixbAL6L}xey;}scqP3*BoBXe1e&}q$Z51#lBp*r#w;MAKEt5LP<-l} zn%`W!bv|p8rD;r$U99SF-X9eTlKuc`K76tA`PEMVTBgE45ygH01eTbzY8fSIU0(%^ zer|9aGDYETC-TP=j(U+W99nVD(x$^vfghq--b7kyW353<67Vt~0CV1sHMYZ2O`WTf zu%^Z-;cPY8Sw0n+bvs#2^F563UnbZ#S{?|8C{BP{KmE6TYzZ`+;U8YlY?ih#F>v3E z_;uAiXafxV5-UY1=mt`A(j#>djV%sMeYBWG87;2$))}VBRsay((9W`H$Cjps<#?m< zv%MilYi0`kkXHrd#1oKWOEYG^)4eebsfpK$zw~}OpdlD$Qz`PllXd?ta|pA7nhweZ7jX+Og66E`@E)0;0Zyd0GWP*IHH zx+BthtWPaiPNR*Nl=8?$mp|!@U!MA2^4=q)m3zHWkQa%?L}kK9!sxE6v22Dg_UqEY z?w-Wynf8GWh2=!nAVoJE`}`P51N%*y*2kXwvchUGZw3*)AKD1zbHBBa-Up=F5H0dSW5 zfG@uPF4M(!(~)Thk%V+2zh!qm$n_lxT=W%upKn7oLNg$^xX2i|fOgqpz+;BFsXywg zHy~gFv`RI&mwa05M45Atm8+Tjrsi{f9<5=}EndR1`NLUvv$wxrCMd-476#Dw$@{ob z`h)w>;|7$*jo`>lF$-+|#X0zSm8WW^PHoptKCncV>3p)n61<9+)tTV@S{0+>7_C5c zJ@8bb<#FzDsjF)MmJPVBH1_AR?e^ppm`dn6!eN zg^f_O_bX6k+!ws6z-{}wS6MSuR)~W`3MJbf+uK^p}$VpaS4l z3V#PGc0PZV>XFOeYxsCkE+Ve|nT@VecL1oz$s4e5)id+X0g6l>)!!c$u}1!}h#dz& zmAN8i&2MXZp|Pg|pO@d#_+!C647f;!pN)t)C!e!KNo1lKoTGW-6+g=Ubv+`aFXwyh z|G;oy#atd=wf?G(Lcxx5Hhl%wAFd&=Y}cFK$&^7Z!8DTNZSorEYHj2sm_EvBQ{K)^ zcA$G;5Ko)t*nHd_esYg-;1*Zi{Ch=_#K$<0&bL(lf-rlK2fotuJ`mo6l_pE;Ogye% z&rKeWlDe=_q~fQbagTzXggb4sC-*C}s*@<+vP5H!ndFs1w$i>)De#MS!I0 z2UR_;gF*>YUOP>L6KXAmN6fHi)yDVSDgAkyvF(SYPEJ%q7f$y-V=P!VJeGe;f7%A1 zXBcSHhQM^?mN9MnZrvkx!>s)Nj>H3x2(< zc{vaN#Sk_k`Z z`zJrjgh*CIuEk^&fWB)EVVrMaLMh#p4-9O&UybK$CB=XHC8$4H*Pf>SE;O4%YAOm0 zgH2z+#O&kMFBFuw=wBK1h>|h)job~sUb#_NG}W_x3P0VdO?FQjTi{F)%?4J`kY33l zROW63q?ccCu|s=g{>yUv5AmIWKPwv>(_cYGv^D&r2-D38d+(hUrBf(NvVYqoAO$|e zmYfN7BEM5>-Y_S_g5ljdeUjHGl&(GA>m#R#8hJ-h=7LZ3EMq&2J_6R$>zLUtEu{722K|Hgt-(C43nv zm1n>vVgQ==9{nm4eS3p&jOW|m%mgEc$D5Evq&4F`jIqZ|Ftz{gC*Bx%pKl=DqwQZ; zdIq_ybrrDT-fujE==%LpRUt-KI}|E(-8G4S6kxMaYLN9v8vb@Q?d|DsFlZ&;w#AA# zr#iCMf4N7r5vJJ%Eyy9+eo5HA(_D-?CZe^e=}l0Z+6DmL}gy(ea(1D`bN+ z=3o?J;4w4}Aj-=?0le`E6 ziEFmee-F`o4Z{xO)M_0?2S=o`q>ts?B;ffRH`Zt0O%GBP+L1uX{v0XgHPK4{5M3mp z5B~Ya=p&Xe4wmOU0~ZV>R7{W(0?e3)^GUMYl)g!%r#~zd!P+!%ZQxESCbU#va3$Hgd0@ zhi#GocVk{dIcK=w9#Wd9vh3;4<`}df0HCnfQ+ll`Nt|2E3}l^6^|xTG<~KZd3xAOB=B(d9UB7SsBH4N&dRG&T^3s zw%8Lj>3y=#4}Y;Ae=D0WXP*}4<`;B5Z26(S_?IPgwA@b(-4ym579UAD@GR<#Cx)Of zxA=#?yC@>agp(vK`viC0@-yddw&lTN@p>vh5P}&?S@9#*a450_Xo$8+GX8tQJvO~W zS^%S7iNpPgqmK;wouxoIh6tBLc{uNeeUo|`pYl^hk~De!hg&LhD^;FA5owFAgD82> znM`^V8inH|*PW7DwFy*3>{J$nDN8ciaMwET7x`tcuGzSn?mM$ZT9@imxem(^*@`{x z{kzi^!xe7q^rbjhv%tI%N|9ay4RD4zvM{~ZuI@%1+Z?<5=A@15&sSmvODA=Et zt=Ff#awPE3BnDI6DbV$e%2#788N#lA^{7YCkUdj@zo!&>S8<2a`NuAFTa3auM1OZh z%*<7{$1IY|REpsNU?GVFb)%fGpJnhfUlK!qd4%AZ8&X~rtl_pE$y zQV3CY0Zm!S4*q`7{~Ei*oyKi*%|Vuw*4-Wz46z=@1`0E1`cqu<4u&8*t~0|{fAn@~ zc<*+RU;bnvL&<4J!gppU=E}0Kmne52BlSdf>S zJ3X7NQvtS&*U>wTO-gU&;T#gHxm@w~+uv=NO=A{cAVQr43Vk%O-=h+5ECmIMFemXw z4k2e2xBb%=aUOoD2=Rf>fZ#+sWg~bDhC^trljNKcd~bi|6&7tv`LV#P#fMJmLTVAgTnE7FW>&xbEUi9dbyzd7aJC~_{<>M;L5!8 zhsx2m2VMChR$5r%t*-L%);kR8QJqPrbqgI{2(n<9VUoT*L&V@opQpLP3t7}}76Ms> zvMPfo@|N=_i7%&2mW2}Ad`;@m8bN2o9EKcNMME!NB)0WHE@>ekR&?F6R&l7w>*yzl zQ(k0~@n{%(gPH67Du@6!#$d_qzn`x5d!%Ib%pp(1TILCei6F9C&xk}4%L(oi;4(Z7 z#8sVxZoF9`R`-^x!&)ek72h}g^c~38ob7Sb^e}nZTL%Twg$}!>Ep*IbTUDIap4Hz{ zx?hQ^po4kzudWy!o@&ZCI9az4@LOa6&xxi{p5`D(s@Z+P;UNr^gB9Tzy?MEh!996@VV;Ou z!-(%Rjy29<-%&7gmSeIvKl=Q0&N8hRC;XU@)YEA(w)d5>L|lqCUp3WFVlPM$W1Y*v^i(M?Q`adv^1eLV#rWqeAGS}} zoiV*>1C?vJ{C0^;|GctlS#iOT(LtTLSJ7i}_1~`o^m_YQc5!vfh?b(}In1FMN!s=c z)gIC`uDp2vM(50mB-uNYK#|DSl+aDNLJZ+Y$x`QxPwZkIa`We#ZybT<*=eLP*`(ST zt7{CdXJhK(-1kzU+~j}LnLJ4-;t%(1_i&j6aysr*ILXE6Ssu+nO|gUZN0+VFPx z$)dPqtNVPbe~1OY_;8D_uYleB22qc(<=!bkcamr{B&LxdpiFN8JGEcD;@`wNgY_BM zW0n*n=yEXrI=3LH^W*8xRoTv1^g=hZ(VW^7O6~yI0YMCC9(gU2eG-pMc@e*YUd%0j zf^(DFuJ!bA%P#)W3lA$ql_*cH#vII7#OF3-w|mho`Rh*QQje|PIUP!FqJL6tH*2SJ z$9_%BW;mT#@C82Y{G?#WB_gJEF}A3VN9sdK|>~o*$(pKc^`mm@2FKPsV~TRIh8!&X-5{? z6zV_jLWBMwh1pE2{uko|=LIHYZ!M4|}Z~`WnY0H=$&EY&;k5{qEi+CdT{sKhj#n zf4#b*71+4fb#ZWot$;_locnV*mZcrCFvfyqt@zKQO$j>DePPCusOnkt$dV^s%G2jZ z1WhMhcVOchd6EyJ(HR_T6TBtp3?H#Am5@M)q)aKyxiWHpGOttnHnqPsDfHG(?#MvX zUyLmw_jUTKS-&f9az`3F{CwjUl`fVnFuw>70KiXX|FnlLECzve;#188aY~C@$YVvv?RtdHvQ}5}QV%w3P>y^Jp!K`z=xZO-itlahdhR`QP;srU| zXIeL80wFn?a<+kFuuqD2a>(Sdnpa+Myc{ z#_Ne-QJSt6V(m!>Q5sJJ09VI>3|pQQoiHW6;u=<@<)@8%$#yS>hjFgxsv!0;HgPf z0Ni$Q6tjf@$$@N;fScoaw4Fl5wO9FXaGd=NN!tgV8wn`py{<_JK-Yv9XG1MANUv4YVp3RDL2mphji zyhIVkW**CTBBau;Ad)wRSCDiwKxz;&^k+K_n>>6P2Lbp(rKw}n!9FwS7Xd|}+r}k;1{8mCH^`As zq|;@3M+FWR<4jrl!-aQwGTE~Kky%cV_VFR((g^~%ukkA=>2=H%RrB!ZW4Kw%j5*oJb}D^n@?S#N?^ zN8`<$gt(4bsqekkmWzM+a~0%c*F3_gvIJ|y5Pn~tDmn?Oek`jv>3F580G)yt1hg(cXg_5vi%+YPL+NI5TGksG^Dcw)N9;b1Q__)TS+ zW&awrak}ldz}z?2PQai{UZGyzjv#+Zsyl0HuAS(n!m{Ib%cW1&-W7HZ1#+2mEoq(` z4|GJv9*OiLy^m%mjecfg(}&ZYbiBtEqNq z>Zpw?r07%V^i8f(br-ONtGWeBq-ubwX0|y>j~r}>@|F4JpUTe1YQXsMbyYY$FF+dm zwEMdNQeiZT4S|-`sR!Evv_EZGrne+%g{67yCYmk5xMJb47iAaHR3D|7Rs4EHf%-X! z%o;Mubz?Zb-FyQx^jMCY2;*(e2XeA^ssV9Y2&kH$tQR0aE&`A&jGx8+Ntd;#Au1Cr zebKi7O-hQ;vcl$%WGRVs`kx6t%I}V3EOK(fHArMH1x%Nr8kkAag-Qko>HY;ugS?F( zYEw-!?}NDLxc=df>2Q65id~DW2w`ne`2cILr!xR+)_VgKIfWT#H1iZ^L2Af~Qv_zQ zM=2E(1weAe2wH*e?0^W26)XL0Y0cE@&_kp%U;R7c|3=D#Y~aY_R`Rs`Uw2h1y6Kep5f_<(y2U~zb*b+kW|Bo?>oSDOZ7`VJV2b)A7Cf2 zr6M84@K?5o_37zTeE-*gGt;g6*%pM=L_T^#`^k2*ynr#1P~>UL!|T)wa}3gD zqfFs2zab3xrlgw31)mgl+w@HVdUlK6K+QysiZ*IKdL(^TEl+g6b(QbVle;!tu7Kro z|7)5CP@1=X`vA7fx-M!bd5`IG5CBP_ynNfPm%XY7DW*c2TrR4%lpvTn@=+zh69nl% z>c}Qun7?HF*j|5>G6_Q~^!ih`3$=Pj(-lU(q0Rj%@0Ts2l~QDrOZ7Z}&541D=pO6i zaRK6g!{d=Jso}`SjLk1LQV3t#Gt@sIyc!}U2VTCS*p(j=drylxQr!PMzCAus>jjV4 zoFf5|xMfDt8;zN>tHKO5{NS46`Ar3ClTH=A3lNtt-@Jc~k9gSc(=s0^%tMPD2G1y% z2mtGGYS*9Sh2!r~6#hikr7$I2r7ywErvNOQ=dLG&5oMm&P(;jabXju9AA0C=VPPWnj1x5 zYvD|#%}ZAq3U+D^U(+M>)aQ{v}HUd z#?DtXSG|iD`6Gmla1TkijQdPY*?9BG?15u2U|nQDkxWQ3)RjUHMti+qegg&GZPmx$ z>KWFSPy~7_j|lM)>ezX?T+<7OIq$9N$wR7=`!F)<%QsjC;M0=gQ-A+pJ#&^N63eaP zVy!iw`RKg36gkeu=z=2^f4#?`40j!kjU&qdQ^ zb-|Fx{guT==fGW17Gk$(5sy4#WHu3|e1lWZPaOE+J0hL!K07c?QjU+2q#sN8{{lc< zaztBkP{QJlF);hWYHs#pVq*+SC0%!c(J~y(YQy)@aPI2XU*omat-ccs*=;4JP`tZv zw_`X%?kkDtsM){Ag!y|kX2+>W4R&Tt>DL_ zU&C%+c zN4j?O!W=6qfF(!0>>jTV5D2^xtgonPgXgtUjxGb`-k+ed%k`AA_aOXExkd_^DSkz)#bu_ke_{zKlsn8{;pRe=!3d(-b071J;Z1?S=+2=!W68 zSi;RbIL2$;)EDGP3l%qqEZ#+YV9{=w6BSDG@b?sbb2slkO!MMW7-4Q~zQxDrls-b9 zPP&-X?D^)}zM4`H!D4VYDKwv)7iU!!$)G<710L`vv?scv-x8Zn@}-BrhoM8|pT$AS z@-vj>)(@s0E?o7v&`S)_bN4CRJmsX_#W`ZDe*{vz%REsrhSYfo1SL!+bGb3jOV{RF4Yf zR6JW&vI8j%{?lPx!4O9C7jA@$ttrg=6FdK6ka!)vP%E{72&~b?WmDzuM8<@0qO7F- zZqe}@;g`L~S52(v>A91p-WJ^r<3OO9bs)YZn$EsVh&n))>)h0(gx=rrrH83Na%{`K2k;IZOh2jfxBlFa!NuhbHT*5y0VG`+)i5dwEWZ5G|zg!O%jtvomiW)L*Ay|Fu8! zqms7~OC10LF^n;Wr)RDMs16Sh01|ESmRp_26?`s3`7ikVYWf%01h2sJNhha+t^b72 z8@5#9&x}78A)qL^xPJBuyUJ?|Am16_+t>G)yaf=Y+7Xi|&@ec>zQj44Pud z6R?vsfi|$o2(mX!Jiwf*iEmY%WXGz%Zk3hT<$jCrkeIPZF;zx+TObP7f$SI=sWt7L zrES3$X#7#$OgOzfx??6)h zgImgw6Z-g@b1VQWO#d9Rs`~slY)SF^G7RT%d-q$!(N!JP>5kkgFob?ul}7G!k{k9k zvdnsv&<_D%k)ti@Slv6xm3|R+o5Xip$i3(PZPG#HT&@(n*X>WZok4^V8U#3`(v%C` z`N@F*{l?lfgib&wkstNSfE3ELD#sA6{GlHytqbdXh4`3!s>R90)*0eCtc2($>W`k6 z0>Z-{L7jbp#g|twJH0v+Wf+*x<{4-G>x4|{awL(hU^ovw*nqeEuqggxDh1bRk>f3V zcGYE$4p@Qt!+Ta(^EvjGIZ)^)Eian30H_fbjw@=U@qf*#61iV_$If4nHB9Y|O*3mp z0`_Oe1H`RHvzA@A2y-jzGO^MUXSEBi0Ep%I&JRbMb2^RsgkDW)7b%qHzJ7AVSvf0C zf4ozgAZkMZG3=qp@THTA=yp$v%Wr@rG>+eLInI4SA#+8?IiC& zYCV_~>LhaD9VLO1*kd>#U^Du49shT>bshe7`?iBlLnR_ghE0iAa0Kq=eF8hSYZC7jH9g7x5Mcx0E%42 zH{0ch7Fw4J29wIs!p!8|WtcU#DCQ2;((PCUo~m|t9`5Ko$O0m6m0*Yqz~4XLL`2ZT zuJS=h1?+#75i*yLe2!p8%)Fbab0)Ru%#tL=V*PTSBp!5esmIVudR9$ffS|lXz`R`6 z{528=t`JLPL)a@~j4=sRG1piR3e|t&z>5fm+?s9ciL7@J1;Uh%3!6?zh|*djltr!} z=NgXxpc?ujEsMPm9&fyIADT7;n?!kdAS9&h+0U`B_#;tXAf`)tsH%m~l(2grOpsT% zkSs}~6|m5H>VzI}KasuKU&N54fxZ79Ve?*mFNEfLPs$Kl2Kj+rn#-$$3t~kCi2Vf* z-3}r~Ypyg|2AV+qdqIxJ=>b7f<^iAi89GR_G>ut+?*ec5$9uvj6{gN$I1*<&xNwkd zYf_5d3sE~Jb$9+|AepA_XJe~KaiP$3-Ze<@gGi>9y&vL(r$LUc3ntyQs`@SY9KRND zC=Pp)EExEsqB80);5@+)7TuDfC+ z_`IJ`6_Kk9yN?)q-$Fdd7>X6zdR9!PRnM&Y2;pQU=*vgxaiZ>lriH=lQ5C4JfHbf0 z7hqJRqZl5KGF7zN9<7gYjLx@T_Hm>YL*$I~V~P>V0tmF0j-D@I*H9quL3VFVYV&~) zBcHk7yOy1i_7_jIY01$T+&tx73wGzG`j*90k+RF7|AFW^vffWI3;-%3VEU$BDaMON zO8;SiK?m!q00ZK}fex&Nj}EE-vHUlMe;@BrV2&ov|4(ga9#7TU|NTmt+2)y@Df5&u zu`>^$P(mRZgd+2}&GVF5MHDhiN&|{A%aECput}z5lVKC}e3#DYKIgvA{d#`C*YkS* zI=!-;)?Vvc*IL*6`~H5ecscfQm*pe4P}J@D%_(;)(+zvb5VnSa5JcZWViC)+uOY|` zWOV})X{wb(>sy?vzI97`mT_dWodjyLiK*BWjCfQiZN)&>0gv04)1}b<>fQv{jvzXT z$J#cCicd5%<^|A(uDF8&9-S3U#0xEzcXVh^K4%Yra#`zHC)fg`*x7mj-I}2uNJK)d zPZ|3@7;(PLiA-VB?TD_W?Y*N-ZIkbrV9~v2hl=&E1}sHalQx)ZnvNzs3>2s~%G0vP zPNaHJXInpU;SjUuz+kJENhIEdk9mcCfaRNkl4u?sJl;TPD_4)Ce(HO9Cj{;PeO*?M z^^sDaykeX2_tnL3Gu~B5p;+m}^Y_RT;Ew0uL#SocVn6a7v-PSt9`XDj{(GzlbxHkP zNlyNWhR7|+&%l?i7Ij#jsM-i6raCU_4cIMwRA}7E&7(k+@w>Q(wy6Ee)+7t z-zae@91j*xy9nRLOF*F$W{E^ylCRn!3wUcNrt5V)AzkkXlTs)?zSyxnuIXVcR zqvKwB^BF$JS5d_H0!}??_KbuhPJPY&%>Aa_&+|MT3b$w091stC%%sVL8BmKC6A8oT zTs9oy;f89%z`Mt4r0BE6m&NFWk%Jis;hGlG0BU=ZOf%V*h6W=OdS<7Q96kSV%J#S* z){gkWRwPjG^Q~Q}6@nOm$Z<@3*Jy%squ4sl_NCJ8#iVG1BCnGHU!vPj^cF{3B4%!s_w`a6e2#M@9XSEnG1I7=kDfqE%RF-1AiLbd zs2M#;hzA1EK+9Dr<)}O z7@7d80e5)=a#p1Ia%x1d!D}DNAW~MH#w>d$4sq3 zD_Ala^O@&WNeEmO;S7g_I}MTR6BcO}VJMT{(|(^sV^A%~{&B_*LI{9g`X(B(0|bb? zMeD5M80GBVPYZ*kH|IaT@H$)3*^T+R?IsJXDFZKX$2mlC^HYj+Yg*78teJ?6$>!+q z1iOqBp1eCcs(bTHHd8!+b`|1T--a-<`NiksXHMx$g`WkDa_~!Kx78Ex?3bs5-!x?AB3cbIq0cF3#hxqG{hrNdB`97!vusKvKIN0(?`vT* zC$>P07sL^dJh^hmMav(I(9IAz!Z?`Lz6G~!A1?zw$45x|C;@|#&OF45@kvfp;DQ|UfMZRo60N~*HsVwq<@WZ-T2nOr zl+ogKTxfXiio7Ip-ZM^PdP;8Gz~?Yi-UTpPia=J!S3B!Xy;{%LN3NCv*{)dLyBSuH z3!9K!UW0m$)?{BMN)cNgJ;9+@oa>A1a5eaO{zV1yD-6-?U)H4`t$oex0iLp3h4mj!*b}`1hj5A3Zgu$*C6U+ctV0}P!rr?LB{9Iv7(Q4 zE^x!`U>|>_lbwx2mr@{mPz7!Thg7xu`}aYsN^~8B!O)qmZo`f`&zD45wXI%5i6>b(}X(naXO&>j$R{qpV+dF7D`NvZZo z^DHptFZ(TuwLFuSYtQj((??u(!e54acEasYX7!$Ec&QHwx6^Wpz5vt%A68abpV* zLJ`sZjrUj32CvN*l7cEXU^qN4;FqR)AGS^~6ezZlW-5%`e*DGAq3wyDk{Xj@!Yq8@ zh%KZtR+Ia-u%%9XINTPCSTT>ZQaz@ua3=+hR=;`UPlD(#1}^75B6EZ4T!rl|k2oUQ zWR7{(FmsPVlf{c0{xB1KFvzN(!>66N)hi=-ajMNg3c=4uTz4iimu-p>Oy3a>nEp0M z{r`>AANk*M`cUQ3iK4t4eeSFGZl~u?Y1D=^p~HYNQgYH&@u!n4 zCpYOlJ#oj!2|{9M6;Azokf^r;qeIU%Ej%dW+i~ETs6| z#hmZv3x6*~_1EVB*H}}b5Am1VF<9TT_RK46?Q&6XiBCCdeAE4j?6w&b#}%p>X{?=3 ziWLz0HJEtv?0HKMprlC@#Nzs^VRyhq>Cq~xSm3OynFVw5^eAmX@Qz-{aoPHKkJI+0 z)siq}v>8Mdb&{M7xiF~=kD;NU;ND5LMoeStAidb-flhj+u;0$Q8%H%$DAEX_n?Q^r zC^s`o_w=0aD{~N`M1KmVJrt+Xp!!=_x1yv;EgKEJqS{K?eV*AQN4D0 zx8ijtEM}0-S+X<9$9w<)oTQ}A#a9impf%V(H33N4|swKHrxq%Z-%`ZzSeEpIba1< zP+HYk|pKeY6f z{e&ioD+_m9@v2uMzC{HF4LX{QM$leurX{(Oa4~&P;qm65gwPT|w<&2SoqizGHI+=Ktt!nMgbEe2XRe6p z{?hr~cqlV#U!3(C&HgKO%q?Za}ys6MSpPW=yJh;?|S4u6hg77(49TYWR7O!y4uejz);4`&YqKK z`WknBdqChipHGh~3}T&jnx&_HlkDj-^^Ep}p5^(^ADJ5kba@|qqd;mfiMUYyWQ*~s zFrDE|1Z%R(M?(B^Gi?JFiHsm&9Lv@V-7bHSp7ac9aawA?e_` z@b!gu7L~f-rj`!ewyH?3rePUSXF2RU>{l0tyRQ{B zx+-t2ytw^~%ij?XR$xh@xrquc!$z@V+PZd%7}Mz{HI!$0F;k(_I`~3~XMW*Wh=&l9 zzoE3Eb6vs55LpipTw87US-gX?r4;tfuwmZKq-f6^IFGv>{#TJf zzWXixGkc_eP%BIXMLA;{>t~j`d|}yfUX#*g+W#)9p`CJYxfH zYHw2$yBBx0FOm|vEKUTdD{W;wy%Ffw_E9?(P6_5v`V?dm8VHvY4$!o<){(zwkKidJQnrQa?x~Z?gAuH9*UUmS-pzJu(PNyBI*nFENKW>0?Z;wY8}oN= zU0@P;6VIhQGVlGZOa8ekKF0{ZkNCzpuX?-BtZGWkjjd{DO4xzU>mYQ#8&(=J7|%|R zMrb4LG+G?IqCnVo)Tpt~hrSBsH#~(RZfQ&E*do{WH3ps>7Z*#rjgGzW8~XtY*P#Yp ztWH?|<7_9+sDM$#NIz{3Q<56Vz82yWrj5}Q?-*{p6_8+6C8~tw>HEC=DdR|Za*4jd zSO3^ra3qSrR&J}AJ8tZRK@4d4Jf!wop6w;7XlKqM4Up>0m zIuCu#mQyTFUsP#_qu?%voBe)Nd|jET#S}EMY)DePwvpUQFgg#xlgv&B{Gd@vj$8=i zs1CgYvlLsQHZggv&^LeLTY!y^*x2Ped~dcimFuZVZdDu`qMVBI`J(B*vs=F?`?G$5 z54eaY4l=xB(yOIZ5_k^X6cLxVQX*8h3fjtI4YQMYwMW#M6c#c5TN-Q@gkwNY$^$$N%Q_&Bb9~H7|@KnF-=!1vw*(^4GFJxai+-k!yTd*FKaldaVcYLy z%GtJ6I4N~RGc3EmzK39b^T9zod)qssr&PPq8;+o^?dUFr@eqhpWW*3-wyUC^Dv;~L|SdDn}t z*71%4FR`DrVNI)=M3xz@oPSvM+p`~@KQwQTnog?gRZSl$yngpsYojdkEO4pmf+E+- z)RWQxs^y&d>4ig^QZGVH4!=AQJPwA3Ar*BSxtIqsETk&X7S#7YqS#~vA5Ca3=POdot|q1r~iSE`*V|qnqKQt!|@~pR}rhvB4S>rrA zMe&umz#c^37t)lr(hA7%sjzorsn()xYX?d9zITf4ap^E`hZRx?idshO&I{Bcg!<&316kApv~p zh?A!MC2QdlF+~q=; zs{~w`S=FpG7YK6xTP|jhxG$1kO#@tdX8wzd4Y=hI6=%p85H{(t}jUcs+Q4lVW>32Qy;>B3&X@>5dJaA$Vg+6dUf9PI8J@X zeI$jB4DB0YwKEA7io*WvR2@PO{pvjP^W93rv2yn(8gTe|;mMtDya)dV=IJn>BFfWg zAvNao)vs!d$mw(~)!;eAZOCAh0{g+Nz~=>D-53#q+?Oo7o5v(@>YE+|C&(Z+M&re{ z&+ad@Km2}N*NnkPQm5-$Nf|T; zxF`;hx$lQEiMF+bV?je?Ur7Nkv_B~B^Rt&`#H5~)<)kg!t*g*_49WLHndE%ayyan> zdhObWYja_!E}k<>_`gH@-GRCG+2DhoDTC1mE1EYI{4k);?z`wnM~fNIN)r=`!sqZy z4jrP!sXLy2pBw^iYPk&d$zQIyc ziNP0MOZQt1zWe{!oN<7y3uTv2DB$*cz=qTESrHmKS_ehYG)M2BbS7@Gzl-sAbrUc@ zzU;^h_xK+3dkbLr6oF@c&OoY}!C|?X*|f47G3X+hc9!NXWWn-?+vbl zsWt!`$)`!!h5m#-_53Rt_J`Pi!rSR}(v;RIII2qfWL$~>S@e`DP{-X&nQ_RJ*#U9i z*SOCYB3mSFZk}}zy3XQb=mcv6C=j4mA;d5LN~WV+2Ss_{*4jd$_6-j6ulr)$4S>%! z0<0Q)O@z1kFWEBv?_63rPc^nTzD4l%mHd>^OwFJ;&R^>LS`}Ao8u5{}iTcnrV8S;u z?&G-`v)h$LMZkbBfqe|w{b4CL2iYZ}YC~`#l}X*Bigt;-!7h>am$YPi+8-4!Kz8npp3vlF zL<{%!!@1ViA|;L5&_gKY)kH`6*Ajm<##IEoGVBW;EoFbNTT$+sT!0sge;MNKog`~; zim#4LCF>0FP@-S%F}L;q3Y81u+f8_(pD$=iswO$vSw2Vrnz~S5bxJhiB#%-WUVo|) ztmsSQE|jhlS*?@@|50z>38JR6^~6K4@$ejye*%wAokkkZqei;CZBT(v`2iWAWm1_5 z+kaK|C*U<*e(5u%bmu+P(Jzwm<0qBZQhk{1Z)f^n2l_Vt+uk?0y?JkIW9d3+bnzPs z@hTOxOA~GSJunr1N$iUV|1KJUl!c56Bmx9Uf9a}a!Kl7dLHUh@D33f zFE^sooBBNWJr_M}p}u5-1V$Q=$PW}7Js)+A&1FTqO=x`r$EoMAfvjyOO9}w;l%#9! zuORwIXat?kpt(H~1wif(M9%RGkt94HkEKp_;5Ov#H0<{f7NKMh5fpqgT)|( zUtQ$N?X~RT>Ten~ z3Y@}ISm5j~83pToDeAEH4hc0HIv?oY{C7s4<6mGxSD;)Ftnw7TkBG&>1n9`>tofA3 z{b7Q0NH@2YGMtzY9cb1)K}ZZczO$|PHnPgnXTV6?yyIn>H0F|womamQwKBdMsm*8+ z|AVkTr}_9Wz2=u42yTCXB+*A)Q}e?Xr3_RYN$A%MFkcsxy)?oPi8<_Cy&oaQsjKf$ zOPbF9Eu)uTUD=NLdfoJ;#dRx_GKV(;J zTDmkdCJ{xx{_n9azrFPn-|9J>CV0-?$Ax0kA*kisK5Kg<*J;Mq@vBQaef)NYq}f;E z)<}*fs=Kf~Zfl*s0wej=*Iw^0GuMu~c9;jU98-8%@8Q%qDHliT2Iu-im4cBID0084 zx1~(pMovuZQ91T&6~WO_B3_u@|7nIxg;%IYf-1G*8LNwujWY<-^3$jYV+t()JN6z% z?PBjQHcFD7mNS2nJg!Ua{ctX%o|5*KfZtVyl>(1BYY9J2Crl;CI}k&t=n z+hdP;@)7T%J)cZ{BnWM~uJD%a?G=LcsqH`0SK_H2e5qh%D-3s0M0zTY9X16Ap9-B3 z&@aPzbrMH94X10k1$4HhqiQveH(smJSCg4oj6TW%|9lMbHg#Qx@=t0FO4nTHgg z*=O0`;2pkztX#zAdLfUcuxj!i=>rjzh=X*P5{-Gua*)piA>lMZ3z2}j zDzJj6`8~tGLI2Fqsjepi%iGR$!e{he*Gp>Gc(?%1&A;=}irT4Tc`PSj;}6j9m4R}U z$6~f-dA|r9=8Q+Ia@9<;+>o0@oM$jyZ>|tzG(45?`?xu`v!Kc6E&YR!5zVsL-J}*0 z0pli4HqH|%>s-y~j_Yc_=2j|nP(+?Rt0|JYa+5cpv2NN;po zGBbXvdSM>!^{dq{iY*e9R-%yDm3-E(V4O(jQDW_0;E0|g1iMmV9}sBSolfa)vp^2* zcJTyU=vzwp7LVksAsMv0Rn=6-Bi?!&1;=BD-U^^t4rNF@%V@3MPZ>b!Y*vRBfa_*K(u+*qp!{c@)u`V zFiQA=r)NH5R_24r{fIZW5xHaNRWz9oTxJw zsKZV=N32E$1t>ZO9t;~cH*>Q8Gh()cE{R&nleZ-63uZjCYiU2Ja>M1L>1J4R-Bx}H zZC3gf@WsBWL}tn_$u@FnxHQ8n7SS*Yz`Cfp#!e9b^>Q~V5>uqV-3jS$kNtLQ;rabi zP$}PZ|Euv|{3vn!rvJVfFAx$A^6d&ARECEz?oEialAH}M!3KHFAbU541894?92=Nf z54Wd3M)9dGRrJEme-ge+MEAxrC?SvhLrw(ih#M#E8jW@C2tp*10tcu6f>_MaDduBq zZ`gIYo^ZejOFrD>x0p?c;5TsAWW%_>tnt}^Mz=nlh3(y%2}sb-SE%vo(Z;8CYijg!VC|nRFd;OSS z=7KJ7IoQluSDNZ|uPrlEfUezm3o01-o{1FFhLaiEyOYDAH~1XX>5PlTIr6hX81Jj@ zEb>_I1XM$hwjONQqJ$QWa zq}cOX$T%wsfMUC}IybhB(&X~K^duCgH1?jXe&7m4!|;6+E0FFx+Y#QYj924NBrJ^j zYe2OECh+lgH`p@021?ml`_&kky{;uwqAvv}+&IikV3Ws!b8wp7_FaVyRkBTg%xwR01gkI*2lp?JLAEMm*{{w$}2;bSTJ2K6sY~0X z9lK_}dCxgy2;iCHPXZsCjHm+ft-ICZPh4=+v;Z&(+Mg3!)`rhtXP5jGsf;OFoq|W* zJ@qE4gXy(4=_Glv`GA?3$tKi2XNLJkJ&B&5*;Na|vAdzeAZA%8a#2QuW8{ARj^O9Loix^Imw z&WQR4q3iZ{cSGZWV0T**?;HD^5h?Q$PK1LUw7gn& zTsv2kn;$~yuLqd!h1~XTxq}l^jUYp@k5yLvz#pv(B>Mi-!l}xo$Nckcj1+d{-YADT z0aAEe{BXNe_W=|#^e+xczS+GmGqpsh(DyS7{~h_?X<-XA^?&8R_zLmNg{0A^(@*uP zjJ?zduMZ=-R1*3Ar8JBCm{cuWdQcaWOD*t&+Ju3!I`Ygho{(A|L!iUY+8VpHHdy{1}GiUz^ z(!xR+;!}Q6T3@+;QQGk^sg?fDP=n0IHrW`Y4B|dlp!NM3gS$?lU-cN(2C24QHiMX- z)E3EbQyLM>*a!JnwIfd#K`5q~k$`FMM96b-XT9)^tf52^;Iud+<{%R3PRYnyQFhqXTPy};*WKH(IY+Mr907A||aPlFl5u*V@hg#mNg%;!Lq6{7kIshEGfa|sxtt$wM6 zCr&->>9FtnaMXj=C+$My=#sM9p$>7ZopOQ6iATE(@$#8e!*w{AhTIq(R)Bk#_7um4 zKvIX!d)63XY}+1V<^!}C_oTN6gOhsdDV1xUHimFkjEOp$C*fk^B@2)20YMYXDPDrYeYxgfSa zN{dmb>b}@tf=~Vws0$H+x}Pm^`#-qtJ?H(GiqGL_yiAET4GcV)W7dQoz`a{@O;Z=r zI8+{C>`Bq{c^zj`H3&WA3=1h*y9b8h5Yp5IbS*?Kpe7R!wA#MSfm4-Wcy&$A5~ln9 z#6d+++lS^IYn*qr^a2xZ+CCcamhFAoD_4+j^3X|<`!+s@zUZd>Nu0WmM?cvcu;f*0 zU-~d11hu{N+wtRP0Jf4vkUj`QsqJIly9@Iv&|yU4`~Fn$FAzwdpN&z0@lc$YZ1w^1 zm8s^BUxK)pu#AyI_y2B!?~+r;Q#M|gPBR$2AgbL;GbR`w%UZv$0Z}RUOkY*~-2@lg z_2UynIK>bKZI{FMZ4}0vtGa1NR72!Hg!t@!&r#u|C|v~n-hbKja2r0~{xHuI5$Od+ zrn~k2Zi4Ur!anzhPtSObx3ry4C zpJNrT?w+nwf~atHpZkouD+hyWcHqfx0+1?mjyeD7G!!gIO+`)!5meO7Hek-5y?oR1 z41L`HLz*{Z?*I2R?+u2%14~YKT_I1QAMtGE&2gd38E2IQDgGU^cFM8LVqZexTD3x5 z!_q!KjmO@#u%))Vc73ECTf(41iGIb+G1CZ-w`r3_2uHh8B@LpP->)E*60}*XlmEn6 z|CfY&;B(^pUlh0}O&w=L0tLYamK^0Lc+guS3uwxe~3ZKaw}n=du%Y`37ua zH?AKH+Piw?eKr?}cd$BudClDkmcam|^<4@0c8tyFj7)41yxj?CN@K=qq8dA%y1`1X z=g1qrnQR_BK=Tp^e?FU(L$6Ez3c3;JqtHKTFp_j@FVbUBJKs0tnKt93-xfh?$UcAQ zXEmd!1*C?MQ)T@j%KSvuyI-U+tG){ca_{+QO|q%rFEqS`aku3{yx9POS-)N zyiy60{cI?ly}bOC`B#R>06QV?uMyi@+!X-JGrZgZ?Daym_|Z~XCt={^u=(-RhcK%R z81id!N5KRhWqJ zGdZKnQQKHh&F)lHABT6W?2>Z;cl~gsK1vB6w1uQ;moaMnu*<6I+Dg*qGV;DrrQ_8| zC4YJ~WMt+mmE7mv8dq6J8w`Jc2Lf2?+a7@jDRQ=Qo>+Fn+!mW5bY7j9f1Nb`9X?8Y zalx2g;KXTQ?8!n@TCNZ`(y#O8aVG(ItMq)VdxaFT?T<`Nedk{9gXNWgL~S6nL~ezvHKMW08H;l46H*_d z{x6~0r0m%f)~T>G7@wDwKoT%~oE;155@sUTH%m&(jqMUU}%e(D#Ry2{i>5d2|dNO^)NSpJr*tZj1ghbjrDI(2IJHiJ@ zDOiQjR`l;6?V-TdsS9qRS`@{P2B0U-!TW}JIN13|lEpnLWLEO%zp^#(?m)WCx&Guk zIvm^1)4GbI_uj9_UNXFpP{vzx$!;E4?1#zYStkt&r8JK-{YnTam=>3#GM&K}reyKV zy-8R{L|P>OX8!9Px3^^E8_*>?g8%0r?U8BnuZ0;7!boVvh=S`WN~8<$Mh+`FJ%)Hj zl-YC4=gPXAsp+`WpAo=jMYc#HE^=Q#KEh#9JDVG;FxP$_P7!wRi;6wBid%ysy^Z6E0pt~Q)SNNhxN@hQn$uqVvFKrBatN4b(yW2~f4`%Ho5A5orEa_?R< z955XsVe=WzXT{vK@+~g7=wx7FVIOwBKQ1X7Lguge>UcFp(K;o&j@!b#;SA*)b4#2x z<27uPbtn=HvTGb?a1Icr#uI~WW3lgJ11S(_nLM+#x8r>uN7v_fjo#K@AZ_+%&95+w zw$#EpuWuKfCfh@UA^Sf1+~{#6c6P48qZrSUy6=3i_)n+>$p}KbTmGec7j7n@9A&X~ zFboN=ZRS-?7BDuF$4J8$}wFO)lmhb`RV3*sN|CTGXT{cn@Al|KLX5kB zJE|uEFm%62kB&mMNY1LyZA9MsE<7ftld9^_MHS}hP*^=m*O}HTi(UDZg}EECUV#gp ztkR>Gn(qcCa0j|hn7;oOj(c8Jy#>RR$X0Lp0b{^TLd@;c<;-R)K>wh8QCQ%A;LiQ( zJ}x!oE}4E7RyhbJ`#0rS=rcvSWaaNvbx0y!`W$KG#?ftQiUg{6SPr||Xi|>pj*vW< zlhHb-FNGClu=F@}29pY7Z#OMF{w8r2ON1}PpmzzF)j9L6n?g>Xe-_oHm<&uB(*L?7DoOHmp_s7DM=uiQz>vJmP zr;gvnsgs!9sxT{GfhkacCnSO6alqqw87c1NFSJO;$z;zk1ZH3MXsKz$i6AK;e1j+* zbcaQf0B33U`!`Sb{Ha%~liXs-s#kEnLh8Tv@K=~n$GPzrn~N}o9k9z$(uIU@E?v!J untg~jeQk$xyizFQBx&uIGVqBXfl;&6F*XX(Rj}vK9$n278s%!%A^!)0XPc7% literal 0 HcmV?d00001 diff --git a/doc/source/_static/spreadsheets/logo_excel.svg b/doc/source/_static/spreadsheets/logo_excel.svg new file mode 100644 index 00000000..ffb25108 --- /dev/null +++ b/doc/source/_static/spreadsheets/logo_excel.svg @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/spreadsheets/pivot.png b/doc/source/_static/spreadsheets/pivot.png new file mode 100644 index 0000000000000000000000000000000000000000..beacc90bc313ec81b72036aafd7bc8c3c95a9688 GIT binary patch literal 159941 zcmb5V1y~%*)-H@CxI;*A3GNnr2#_ElxCe*eHn>ZILvTxQcMa|Y2n3nn5`qn`gEKgP zv-dvxobT-KzxTQG^e|0#RdrP@f7j}8Wknfm3{ngP1O#k3*;lFv2#;V02uQtXkAW+G z_}R}85U^~lq@vVZh4aB%%iE#hbk>e;ZQ!Hd(pvu9t^K64TvXNwUz z;VJ!Cm(pAV@rnD#e`KW=dFhS*{xj|&{Eo>3k~feL!WSr6&rU#ljKNTL;1rwLC5;uAqXjYPP|a#`P(7E`Rh(BRTD9MZ+_d z=kNP*W@XpWBW~=d8PR&(V*HRiYXhW zs(fTa9HujX?1%D|n;Y2%irL+*Vbh<9**g$!E#4mgaTH}rFP(NEQtS7s=EpYK;6;qD zMde&7VkAn;0cmRMR3q_3y^JNty^&5$bgLSd%R?`*i~_m3P@HCYzCDjce#NeTrhk}X z{TvDWf#^LlB3Q-dv3Wj=Oz&YTT0~b?EXvClf}H9?EC|8M_h!mvEiD0T>y|lA(i|Qb zLR7B;MK5$K)9EQcB_n0NjM}0aYpy;r>hP>)?z?aIdwH$U^D*^ly)+0fu0+g84vR4a zKQ9dbmCQT}Za4DbQxEJ95DNJB+sD&Kkj%rxQE87?O^TL`^O;O6$#=2qOrtorGQ~D= zm%i*^k*x_#-0#Ra?px0+mKnMp!Gj#NJ-9yYJ*u~LU5qH@@;|(7B=&(j6z~7Af1hY8 zi$f!FBd9hcH)Jy!nLE8SBa>vaVbuOBkWReM!e7f5w3p{!*Zaf-wYqi|eNWU4r404$ zp5SzVes?sB2iRRq1y=dN5`ECeDTiuIe8q8!n>Z=4d8)~^#aAaTHBRLiW#37m_ddin{Vmlog7`(N zZ|5JumYGDKZ9)@{bS_MMC^`tMdX%7AN1+}9cS$elJ`<#)-=W|bW6`8>1uIK3G1@Yc zKTi*VtD-{1;$+|B?T3R74c^ji;1v;)5DMVI3DUysdi#4R3BupMbz*oFzap!ridYg< zmt2?TlI{{bDX}v(I$1pmo(UV3ox!sqvPrbr8y75ee7*WbnhIFF{5fNP+~u0pTEUv# zn)I=m6KZozu6#`qifTNQP-1f;#vAr^zIDp=io(Leq{6HXONF=%atH>*H+7Sb%Gxh~ zM9W@|PE#wENA>158rD-* z`eR-N3eyc!=2l9h9OE1_pln~AR{79!)1sbf=+5wX$)r%evs#f%N~MUNfTDn*dku_Z z;iP5cOxDZI3+m<5e(9xqrnnnfhK ztQvF8Q|f!W3SQM)UGF>VgDZdu8571B)ZV^GO@FaXnk)X?2+cuuJN8xz_Bppuu@Jp* zLZLC$fM=hZS0Iuv;d2GQh}E>&PcB^U5pF{>zu~rtkM{WQX})TkInJ*Z1x#11+_$2(TuVnl*ZsHl?5D3^jMYq?`;%>V zZCLB{O}(ej12fw7OBaw@y;?WVrO(N)X|GAIy>a+LjY0=-G;n6f!=pQ*o1@9dbu*M1 zIT>@*3MR}fCV&0bS!?^&Y{z6=yrg_ITACxEU(CUlBAG&(5?YBh^K`~!#%o5a$2Oz@ zR~NLmS6yXqVDD-Fdme@}Lk4p4I6CZuI?QLxd%}`nGoqH1_>{F`q@o7i!rmC70$z*9 zx!zI7@0O?b&c6Td*WJJjgAHe_+oO%u@DZ4WiAFPg2@G8^*50@FOyz=*Im;dHiYp z$Dr}Pc{_$HW>N?x)@|3h#mIYsU}NJr;{s!Ls26kqy4y|lO8fPDnaNi!GG4D0V{9X6 zU%i%Lk7*g;UOg0?N{7XP=q#c|i4~$0KN*nRGW9V_lTHX?Q1ClV4ywRS>`cGcoHk)0&D*c{mRF9jfImNBTy4Cu2>uD1s@2Q3U$DxNe%1a{BbB)8%_TYm z;V}bwMzL6&$kEHkG<5u?T74Gwz7_=CJd3W*i5_btV#>UFVX2u7JiOi^lJBq|g z?R*fcxm>G}2-*n0FVRYy3J#gZLZ+}!p}ac0jl+`WW)}-!O-TCWSQ-z}#=N#6@nv66F<^3L zW#znX``mcZJ<{V}dC}}IHyxi`uqZFULB~PJ*;*s&aru+Dg*8G`)IiAEY=LE?bX?;= zQ%TRaW4_jBf^?AdGW9&-b3{n}3!9XR+cw1}n3tL5`r=vPE=k3G^@mRAU|(!hecJGg zB)h9Rox02E2CzZijG=qR%tXMU*tI)wK?I z7V7IBSH#V@z$hDAU6p1tL*Av#v3o!B9yAt!iA5?Qq=PXiD2}ylBi^ z`q_GWr3lUFXmy6+!;%(fnho!|P9>(OqW$;m;P&EITzk+0neZ4GXhY2J^40m8bN@|I zUD5goTc(D%yT4yPZ*J|`_gNAOPI}JkmN$2kx6Ze$p<11{#K$uQFzyA4s)DNPB{n~s z8@GOu5qF5#kr?jcX6M1R@|?|l>q*{0e{GhkxY%ym@t4!)mB`hUzN#_9gR@OIT)s)Q z@>mTa5F0Xxv<$&Pu(U!Lg*Hd=Xu3X8$H*1#WI?}S)XX4BTETwFzDJaGw8wyO{>_{u zSF{x2JEM;(+DkFf$7%20>rld24AngdXiv}Ko&3lFlW1KeR{s7=A$ND2zm2{&B3h=F z3kR|Up^adr1roU|go&fxP?w%;yW8BfV^!f7_IX(md z75I+;_v8#?&5mw)W1# z9%8hAv=9c)A1-s!QvcD!#YT))TS1vx%E8Hu`UM9U2Nx{}gPNLJ)akvsu<9%6zq$ke ziP2iRxPXN@Io;jeIox?U9GomTxrKy;IJtN@d3e}?7VOTR_AbUA?Do!de-84mabB4@ zn>ty6U924JsUOBQHgRxu5u>Gjc+lUUKj&%YVfF7P**pKWEMS404|h1ZIk-6g9vkQ? z`fycP*~-JrR_B$K9k6G>Ge81df}(%4|HqwwKk+|$YX7?@H$V6P==vYG{=2J&vze2W zgB|cp7tp`y_1AO%^X6Y2ML8dq{vTWM=RW^&71(GHhA8LXL<3=PwJW9pbR@TWrKAr0 z0$ld+K{NsWF#Y)poFj^HI0nuiARxR%kb5Pe?t!?Mfto(qOVUr(4qx;}jbmZ){}zG4 zM9uP~g(*xmAI?GXN~I=e_VsJq;%uadj{(#lPZvedP|45kJe?U*-q*JN()*fb>vwWV zp7!N!q0dsQ>B-QVLaOuTPkNOwmzxVm$AjH1xfNRxK<}|GS%zo*VeE zj*3A(n5O6CNYi8M5TCn1PssVHBE&%yi{>*^x~L})yIzy>B%5+Jg6r1khf<9)>F*K5 zdH(meDSlVmj9$AHja%ad^0|sj9lj-U2@DMHq7{;oShdDuX{26lDx|$AFlhJsZce>L zjPerUA4eA)W&Ak<1A~6M7e5mk8eC#*83oNF1R=<~qJ(uS6S^*EGBNdhe>~{wF(MwqKMrL=S!ZZ>aK6nUlC_sb^>&kq=8~e^|n7oatDm*7C{yvm36@ zJ?lhl+M{PC?Org^&?qv2@%5pU64in?iS+VM&aB7-5zqqudAyuNGy3j3shmfL7e;}I z27h;s2dfv2MgGUK|J(>L$p^mK^lAOa-z;Btp(6op)oFlvP^13ubEjVBKq{14Z?c(v ziVbiFG1BG9ra;@?60ZgJ&}uuE)=%;fwNqAHsi#aR|34256GoiKamoepxS%0X8;Ly3 zF#rENL(~t|IZpH3Z$BaZgCqVYkK9NgbhnG|ebV{(&u#gSD;_~FQ%^QW807}&dcJc> z{LeA|o9(DO(1W5Jsg@@C_x}6Ve|7BA|C>9f^z*tc|8LEUQ1Dh4i$ePA{?9X*mjf0h zvj0)#!+$K{pLD-SM0injRs!X=`(HTxKX~39VCw_LRwMQQW=%X-q-*#AiLYk4HusKi zp47jG36nv(zq_s~u##wM48kV)?|Hnu!4Cja`M>!7>F*Vr$aj4O`@`m&^Jj4=+W*O? zO&SP1 zU*ml^M4_G!wFn~p_b_K>ja6BCh; z!fAkJfNt@!iv|f5hXS8{rpj_Z>4c40*rXu?7SI&gTpy{^T%+3 z_>#>Dz&?w}k=Xry{qjx(9lFSoYuS&Ebg0HyJU7FzXG$esw?>XAe#son>-wydSt0SKVd9fBf3<;C>K2srhW@;& z^E{2#I~4|vR(0a5{A+dk7~}^UR!0VyK`C>b;uWebMkrY{-dtz3-#nt|Zr8VuaHFL` zfPx6#6_c?d9WTkD!E3%ET=7dG_*sXj?(KAeYD{A?A%uqsSuL-d#GPqAgh0}sjmth8 zFUVti#;XP^7b;eT@~G!|JYu0ouYisp)fN?>4KJoz6Jw?n&WJ=$CVuzcNb5`<&-G6h z5r&k<`}T>?4mE(L_ZKAD4d3F!nxQ%r(xduQ4qppmq8)H@3evP#?aYP}gQsKCs@4{~ zxX#OFwmz@TxaEV~8;jyEBf<*j+Qw6G?!EP_UZ{n9I<B7`jvikrp6p+4ChOXdeKNfJhyyhUeDsCs zK{)A0w-_`~);bgLU~0i*hbw@k!@s{zr;&QxbHc=@1nf(u2Ag#^s3toaL`T2ODDD~w z?mSpCwm$W_mQ{G|S-Y8Q-w1lUPr9fCY?46RV(~uPGoCT=)lQ!BgM#h>%*Dc!6xDM} z8}(*N{$#&Tsw21zqcb)nWEo#8*eZyU-LKaU>}+tm+qnm=17UfXDV(HPpQ=v-D+#yz zj%OmvKF#S1d*CGf&=u^~tk4JrJ4BIHA|c{|sQ|8PUoxE^$`wl^>TdUqUN}hmQg^vU z*^=s#G+rPlqk)-J7_QLzd*%b~yN}>3VLJc2%gs9Do{#|P@ggOqXT0Y5Z@=)UB(Z30 z6{gwe+Wz|bXj1Bf5RxAO&oF+C0V^|e_=8_E*XGH)|GP2fXSxs_>8h{g*Y^XGW<$vZ zzwKKUE-KCpZafRLZe6ZUwtK(DR!OeIAUvs$06>tq62@0?PgZxyOeewUoNgjC~m?kc_ zPmgGrwW8BhHF|H*;>B)eyol_2kUwB|wSylf8g%`VO%e0^2eCOdljqgCcF!KPhVB`K z9$=#J(g7h@9ASk`<%;D6gQ@1f(F)a`&4Rq#{EYOPbZ}3LcszSLTS(;$kMu~oeRs-5 zyiP5JUAAINq$$1kxUFZab4pr@^Xc(yXrx3)4>q(tOykZkjuN%v0wD|*Khq~09Sm8N zvtL%TFwNi}mS%x&PbUg{XHC9GsGMv-O3WdtEnmFpb~Ub>f?*AIzb)tLtnjgm0QSl} zO9^AJHeJ`gwtBu~<1sfPdV-}%vit0C1*;!GVc zsu3oRQt5@8x-AK9qbuAV2frC75i^)f^yDTSh`1IS>d3#e@xu^-^bVz14&~^ldJWf0 zhexFHc-lK30zgcHIduRdGICp|I++EMRR1~=U9Z@?Q;d?RE?>; zu|M_SU|CY9ofg;`T%#kdGjTB>Ko>tp{!%@|wm(>GFERO^NQU{Ov%&TkZLjK+Z-N?? zdSATG_pBN`C$D2(mIq^1k$t41J=*d7g{uSJoo{yTl6ns61Vbg?RU3W4bD)`*G54&x zDq)+ieF-0Atzt^p2o;ryCdUWWc%4}T9)5&VB@iu4mZ&k(9|R*;&Goa}PIdbNR@n2NaNpY^}qXZC=3Pc>-fs|$nSg}_rcJf&`_)oJvlB6is`a|Zt9P? zM*DQ2A&$*3UslB78$IPX70)4L3u|@Z8R21BL)g{DlsBdYR5-jfQ@ubc74aBxUlun5E}{-OjEX~UI7tN zqyvV(IssVWU3W01+|JC&K6&ErISl%_6K$ILqD7O06rY#k_PPap>KumLbB-$k@eZon zkG){YGKr!%9|-}AlOFYj*W4aFQ=uzUYYriR9BoABNk{Qa@a+_%^}hR|jQ!m?wC!@Q z>jQF9Xxc;E2dI+Z3{-I}Y!X@;TRKn~AZ`3~^R>)MSs->zogoMNwVxSjeKAxA&se>u zK=z1OP>^tPSbbLNrGY64dKD&o82Ss|H`ci8H%85z(fRaN6x>iatnh46*=_yjS>0wY zjWBKSF%;q&DGPnW@`U5{RKyot&lOE5=;-*hFnEWFLew`ACj-P_m+c+()of7G^Xqzk z>FXhtj=g4uFFnH%-Y`=LOrh?65`Sz1vR;+a-KWlVcFSDfeLKd>XtkfIV3Juhl2rg0 zb7crYQnB7Qg2F3-N8hsJb%U=bk7X;wE)>#TuN%;oOl@sZacI<83{{L{Ceh$~7P>h5 zPww>i+!0w=>-E``Xx>=RzR&o8u7U!v+2(^!IgU~BTpSGn`pWwp7Y$x=&u(?}6QPCL zmpNggw{XF#tC4d^XE2_I5OTn!0=P|rfT#QI*JmNqjmPlX5l~*1-<#-NPxylS#4UWu zq0nkl1wX7lHitA~6Gy93PYz(ohNtd*pPqf+h;Ji7j=x1|+bJ>+_Gajl{N3cZO5|}P zFv?2y6xel)y{A+!n_{97P4GTsQf0EI6gtioI+*`hiEMk_~utn6^sJzK6^uMoU5_j4N+VCkneHIvkR;tm0 z(LQefxv)c}?@e(!g$#TwO)U0 zgKcOi$nou6e7yEhPO{|pjYbxt&f8s*UI*{PJM?d=P`%6TMX^iLoNzkBRa}GJR?G@b*`Xrzoj7?vo4s;5Zq={(azb9eV%W(dtLO_wcH^lUct4 zMSQt{dppZxvR`jQ8C1?nlQcqlKs`X7kx zr|sw}xcV-SH_C0DV{qZ{+d|t9E2pagjoq5{wDTj{7a)lCg~4pY8S#ZeAal%V z-o}i;bQXap9n`OHHP8}zwS=_zmCHKdcDX*9F1Xs-1*Dqe0Y5@M{lR)Gd*9VtY}dT^ zp~Zdt1oxchmRh!hW3ouC+qc~F)!aStwtMGwtEttP_>dG7!;9lHxbuDuf9Lo-EI=VO z@Wts8x&d?~?UME(5C(K4Xi=bupLMPlQ=&;}fpa|Q`@z`CKC>ZMWX3@ecd9Mp6zU8s zR*t#ikG{W%FUgZb0kkg4gNKr;V=yqBCU_jv2A4eF`(-tnErGYsz7ky@YwS2%*<`yh z{N;2+e4y;ozIFeF*Wou*eeOyR@*se2s!+eZ*&3JSE=1$*tFv9T{%8s@W5ovx2%hS0 z|H5s|>&BaiwEL&?(cR7t!8K=4$|M&=PoO`do4gF$ z$!0~aX;Y4y^NGliEbf!j0okUR8SVrNXd46@j-np*ipikUfweyKSK5VyN`8rI9M7qK z)>>>voPmw{5_Wexp(t$)YUlpw&i723B`+?MA`V^x*d^i;`|8{y{r0L31OH)%7~9mt z`i`fbfGa{p0K_}}&c*2qt?mKNYW%F3?g;!#K0? zx_uGi!!RZVnsPXX(}j4&F4q#&3*ICtPk}kmK4@b;lL%Q4H(3ljpRF-9DUT7q6I}2) zfn+{yBthNQ5RX!C+JrQCPS`0Dvua|s9?!8ioR<2|cjI>Zy0GEfi1S~!hgcoz5_7i2 z9zU_o4_c9E6900@IYn_9YraEXxCS?7E?bsDhx_E*iKbMLiHvZmV`n80R&{mQNiqUn zFPT=QOl{WFvP8#=IO7uo)#DxH=raQy0#`zg%pyCP^Ks89T|_6bCk_5o=QPB_Uv3&K z8{MTRaLMBD?}l85eO^?mW|_NF5;l}(u)>_p1{8w}z|Yfv>dE3A-9$-7uR$^+ljWV) zP&syp!oefypXN5sn-)6zeRDiNcdwfVn;87kxACxn=#u$}!F8HtbxPlrldq4&g}$Jg zd!2@}$E6oH;-EASc<3E$Q_R!d?P{XKbAgD9jKM{7)&6NT@_xF({)QKDW{E5GCoKTk z{lOc~r<<^~?=N=xglviXhb$cZuE*NPdsO=4#8IH>c-4Qb}zFlUaU-N~Q0LKRHpNegF^#RG#qsSH6KmWFv#d zh=QL`9F{!*lv>qc*!T8qW-L zn7afe5WaEO>{DVE_3X_|@1Ij{aj2Ub*FRQc&WheOZ3+xqPn8lo$@1GwW9c%!M+S*K zvk}+uuNp|}YqmMmpVlc;P&0{HsuPMcz$vs2#?Ro-S=33b+mK(M^ zlf-lR5~9F?Nk3m}`BPG`pXA=KKsZrMP4jAV83*6{Znr*iogs0g1MZStqxG#_g#r@oBvwGV0FjH4m1|#@NDiULM>t^yu0Sw^Iv5T*gV)VanZL?~ObCteIhh z%E_T*ojEQ+8%2658%pzQ!>^u67h)=p@0JidnR)9K#7n8|j>{Fv%kDDF3`eVU?cCdZeiSu-UscIZJvXy2bGjfr~q3$D1a=Gudv-hi-vZwVutW(FQJl0 z_d6ic*Ih0Rz6Si_i7PvKJi{jyC$jCJbltHkv5z zMLO7_Zm>=LF1OBO`8SQ@4Cy(-Eo;T|6vC>(^`t8&F9U*N&k0S}mKq-gnv3I7qr6_z zW_RnE03r5R$%?}rA6KVH@QAVePXj>`>d&S9rJxOLH`JdQl^nT#aJPK>#`>|uM}#n) z?2IX6L1Ku$vYO$~Pq#b#skibG6is>#WE{M7L?UkQB2R$trGTBg=} za41}9widk{X{9X6bT7E;(B_;+rFiw3a4(X|B_n?_f*+Un_Qc5H=~PH`hFHO;G+fH^ zr62~-X?o$?EUOp$R&z{Z^Kl6VP5u_Yd`QLaH7@He$R~Dmy!HysQ+a$t1Tr>4L)+wd zCD*sMsd14$pl}$p$pWwfKBLpdF!LfMRynb@$yrNRaysV+@I94jS+1BN4s@|4ZxQb1 z-{P?zzx^1GgHe4EmdJMvw%b_BX}V{nzP5cv%|dAK zz)|fTq>uuvMK+DWaH6v1xNxXbn5GL#4<} zY6+7C$NAI2wS{O|r6R@sOnk_~l13u^^UR{%(E9Q6!}Xd9Qhu%4L7j;%|1a?Tse8Ko zMuf=$SvI#wa1~|IuhrvQ*mSj(>Z-X{{Vf=>F5N9wmAW}~0v(UaqryhhIq~X`bCIbh zOOl76eP7s+%?ZELRe_KkBd4sZK4rj(B4ly&NZt*pn{2aFsnGVkYH6IV%$Y-T<4Epq zPY@lR?b&h&6} z?r&Ef2c0Hy|GTzc`hkI(*A@GdVdtWjL;nqxfBH=Xe<;5P0O+-g zQfS32l=+bYrmQOYd}~aq207H^0rGv<DdPg7 z8xk4r#cw4e2J?9v*#8NT#JTfuY24A}jXvDpS=W{Xkoyxx1#xn%tkqrTcOAE!UbfQ+ z5Ko-lW9g#9C2^h83Q$w9xu3MLU>n=~uD}_)R&~&9z9#mtY*W^hG0K9*ErTn6&uYn1 zrxfS?MchWZ-}lyMIoh1@&QG%)E4;1eu#-Kn7PwDumj*Benxf;d1NF&D^~4L6U8OQ= zzpfZ8HD&;`%DX&;osN9iPqq4T77p%iSNTzngRMJy_++{#&gyXl7TQI6K%0r*_4$6C z$#)DBi4OY(Vf9KqPYhyGw?J#+Qh68WuYsXNTO@5}Gq(Ku z_mGay4v=w~K@8`zIe+X9pem=|NesiLBZ*n1r9un$Ab7?p9p#h{^cuZ1J_$l6Ok{T1 zpGy)v=le5)j6;JbFe()Sh8V-6tq3*Lb$t_Ahl zeB^9-JZYhP6k^V*gblACA6BF8X)79L7p?ycdGq$4eaiP$SGL!H31j=cw<<(`ZbVyw3P-HAi=w zlna*RK2TA%dZ|_bqT15hw^7{T;k*F?nWimdh3%ir;+DsnJYGw+V@Di(o=n)Qk8H;#23WFVZ&+Cbc^|C-2#F6_TI1B@s+%I z^}@H$dnC83c*bS{!2CkD(H0=>7?ttB(ZY1bHE+wd^$e7P-9>V)t%e6B82x zrTL)&oUzPJTlVtP{us>%!90;B#!3(gAbhSTn&kCu3=o<9U4J^*~zPf&BNR=Z@gmkl+C zg2pc$-(JBp(~X*Ril0FV3H4{A zqE|uZUbTl^zFZKoa!Tt+wV{PP{eJCWu&bGRG}~3J zaP<2&@=k~2#hyMp`Z?Qas)8FPe7*`qu9PvZu{~i?$SQ@wNaPT!>9)jJaj41Ymq(lT zt()JK;_@Q)G+T1SsHf?&mu#w#VLSNR=a=(D_Z>5C)Nn%vv;Jd+Nc$|9sIH)oiuks-3xP%41eN%8Rk!x2IhQvX?F+HZM% zZf8P6fO*u0S7OlKGLCVDk(vZQe&b!)YCUBge{M%C>Hq7i+3N+QM;OZU!GlLv8+C5s z=X(ZtFuHqsU2h{+Y`wz>C*zLt=B=LoPlji2bDvwEYZ|uDwr?;O4L~w;b|aSM>W8In z(V+$kbjPb;fu?aO`K!Yo+ye3Qvcx{24zd1b86Kqvb9oupQoE5O6&mKNp$O#gi$K0t z%0R+)opwDzr}KD_)n)Am{?1~%h(?uxzEEi3=mRJO4f0-`zZl}|eE!6KUA@i2k(B3s zSo&FSk=Fj|lO{Y^o@qFM_ctBzyQzkuLNm2CaBb(l2+JGWeWb7LZjYm0YFX6h+lGQt z3$s3prTZ;jgsS$L8mWM!T29~m>fwarmqFH%q8&PYkKs-)_83F8p~|c~d>ZfA7ggW! zb1({^!^ugD`>$8Smm;d;)A3+sxK2QkZ;OJ5#_(f^_26Vp?vBiJXHpLG=Gq9!|0OC1 zQ#CnDKag^@Hl5que+)HHdhX+Qn)~bYH*76Rtl6!MG-|u#G5bdY!M*DltM!hk!AsqI zu#glUhYU0a1SCHS@h@wx9ybDM6-l)9r=vGiNDhl0hszQ*N8$54{ZckdlsRedDb2+J zDCaXpQQB_%*|e^zUdPlR-UN{4;@H5NSxYPIb4^?>{mbPIZGtDf;>Ad%1;DG1s&)!E*pT{?_-F${HZs)1!zy zaEa^gbT6)F>@Xn!vX*wvoWd5;E?T4127sh>=h#r0osK$gqe`(3G*f8zyMX}(q%42E z_h^$9*R~RUrl$Mg}x&MdV*<$69Jm)@)~$=;FTqwzDfB^#-7_NM}7@qGfj=%n?Shd@% z=w1D6KKGHgsc<`J+cJE?hvB9Rzkw_6zX3vzFIK9P(5Z_mId9HtTsi=z31Y-!Ly2Y^ zcGfmkylH6hDrjfh+m+f0I^e$If52B2vVc0@dfhsU1c|1YVMotLx4yO4Hr;D&XQOy5 znPo3WLy`}4l;}u03esho$REA#LJYb_c#cn8>;Uf3*Dg||*qLvxoedY&(@&ovc)(UE zqTHPgeb0z}0o1jSakSIlA0Iw)`Uq6D^+ysZ=k4bgcJFBR8f|72ZHI|UE6GMY0Z`kx z+7edrk_ERjU2Rkqj1L}fP8C_%!%Nf~Kb`Ljwp)_RvDw?tGrf)UbJh?*gWI?{ zGp*sfS&vnqOY66W$j5+T%%k*iI1G&}J6-^dK0;Q?T2e79SRM z{>^pRehZy+u+@F;j(BCab&WWa zDeNyD)IC=%zRuns@TLXn{Op&~r<{9@@;s41Ho*LGA$`+a9CFA9BCU75bt#X0eVHzLW8doYZ6 zB+O)Jfi_S0pN^HzfWb!$d_va zkC4f3DopR+4D4vdB;{R}u1B9WN5)MB_|^^Tidd9 zgm%{48fZHno8^^-$uK_@wFmNhY0Gi5n=H=nVE<$DEbGSYr(vFW-f3y9&Xw>Fphzbl z53S)i^O3hA+j0sv?6!@U`jrT7=+nNzvxF7@GyL~lMA6_(&zxxDUJ=wV= zWK;~B^(EoZ%T5<^en@VAFBCP%P%7`o7LTfbBnij84%!H;g;76L6#&qkHadJSxKVLS z);pR?ojm?E975Rfge*#v%BPcIc!*Hd_VybjOMPb$RZRTzOT#LCHjD3@)8R|0PtF!~ zUSvs+_-i<~`C zrK9ZeuiJ^4RTBm4%C+Kw^q@R9EQ4tO*CZ*I`d2^`d zu1j_9*yzjE^8U}Fn?8dPM~F~nqi~Ip_l6IWBNK%y#*^Bcbt>^(AjHV4e1m!2F0qc8 zG@1Bl=u_%F21Mt)VB^$qeDE%P9K&B|JJ@@%DOsReL1E7Qn2X=bpp`_uI+TeF7e@U= zYO2HvqPtbl02NT2i7W=ZB2*=+f)LZ+rJ>SS9$U~LlP4BJYjMJ^?fb?%{tk?;Dw8p$ z^z$MRx^%Ipb&H$3DHRe_;$q(uu}?uH;oz!+0C@J+#k8{Um&2+J^#oFZiL(7rn?ay* z5)VxQq1zWDi_ak3@u1FfM3#EKNbqsAi4t&i&n^Hzh;f;bhE5cF63mKtQKHM9Aj&b@ zzi|F->t?eE4;Iy)sTDsnT2Wr5*20pJpm}2ts*`UH6DYW;jL}?9QR0Z#TnC+<%K{)O zRjI##g2xX*%C^GX#1fHXnkXhqB5N5=lT>2JnQzGnpdqEX`$HGuM6N9bNiA|crtM+Y zjbMk<I3ofMvCYmgt0`$41)!jctr}0l=N2hV3K226{e;ee&ac5p_Q}na1Xo zB_9PIVe`|zCv#29TSye1Ttex1jE?e>Iv7E6J-<3OMwDh=gm}UrgxX)SDMfC10vkXr zTfbpEl@dh4oA1W&_7uPi$Z7%J^5{pnMzILwh0y@w@KCRDK$*rLknC{J>hWkBEAo)7 zkE|fiLQyqvEXGo?X0{c2d6~%UE3MEBGJSqUGNcykM{%54SPF6z7uUNu{j!}Z9A^$F zXaBjTk1fNLIosW~!$%^FViF?JFty5hwWiIc^BHU_W=XOE3a;yV)tjTeFSUcO-%B%f ztY5m-0a=Z8QqY#yuGD0wm4$*%6IQ6FkkbZA60@?X-c@qq$W4o&)FZK9~>ctoD0xrIte3DLH*)$XTxRtFU zhS#+7Ss?#pYuuT>6Ao7N32~fJ`>j&Sk&+XO;*+$Vt*Z94sqgQLz~V+QzAKk?+*II= z;kGR4y3vp`ZaN~e@2*O$cI5X!J;oMLt7LP9mJI#8)ecIRCC=xP+krR+PT^`%%0n0B zD9!hdE_Yr}9+=u$riYO$olinL~ zTI(NhDCp;5-B%tz{;_i^8|ZDNx0|^``%v4vg@&SQmBp_6qExGj1IR+W)pGlrTzqsv&@Tkto!S~{=t4grBioJmnWP6;J=v{qC9^}T0Hv2 zfD+Ao=(*P-T7uRavI2&w0_|xE$^Ne!8gPLqPxV^bl;17aEg`4lt5W%3Rx5r5synz~ zefpSG`LJdd(_abp7Flw4FPo?Azv@)getuld552Y=)NT@Wxq3Rqq@to`pBVlMfXvQl zc~Zo@Ox2FSk`#&|T8#e98y2Le+nBu{i$Gt9VV*c`h?ap)Guvr+4c7O4M&p=uqS3D7 zcL+MXW@CD$(o7qe+N1g%z8M?uejYN^6H@s_^H}^K0>cKUSWg$O%=m7v-K~Jk-ii+- zK1mcP*Lan_VmwXBAKH*x8@|am>@4RkK74RVkThth&b+zQIikgxZxxSSY0z4lID{(Q z-EO(aRy@XNUj1iv$Ej9`1(d2s18D$O zybNL2c(=V-iB`A$_!s31@?WbBJA57u{+TN{ew_6C^SdXo1=3YeD!}!o|F2ZJqRE0} z1L;t%^3%&&PUQ+!4Xlw8Or7GpwU~p+*DIfc`&!IO<;AC-;~$GwEY?ee1xO>AR(CAk z-`bFZ9L!(-ZbSMa@XbuZbLQxlc2d46>ClNT4g7hi{MGitJcd2r5qKD1$?5~7}5{o$xBbhGkmDf$~5urNXZ8dAWr1uaqFSRhACj*-k z@(!7L3N24gbJBc_nV<%hJ;oLGY=#MfD2m}euX{TP{SSUi-bnv6+lGD;u2HIddW3)U z^Z}PdLO(>-GqCgDo8_KsagD@VmqI`OhI04SY=|Hd{s@C88|xX5^1B78!>-IcLld1Q z_tl7N?XYuOz!@BK*E@Zhju3RcYzyuaL%DhWzA02#nt&Pc!YaQv7cjoz91f@EQ2|*u zMBToYRM4dHMujaRF7H9_&kY}^stkuA@$|m~QFWa_xVy@bT z1{c+0^!Qwl+6e*o6w8G5a1=a5FOY^2XhO#xe?qAoZrPt;tX{KIFaaT<)$azoIv}+# z?0e<%15faqsRZ}(zEzP?1FJ{% z|8fPgv#ToeKp>z_HdS%=30|XtzKs7cYcr!fjl;XiW_|xR35GSR4qkc~>ja9*Zf}}4 zzxmw5dAv5-YmED;aob9W%KTgFxc8H#TGwbNxM4|{Z^^4IJC~cakR{iCq@)r@tY482 z7XMBktFu3N(#(`Q;x>Nwm`-ZX!Ki`T*Mrut%=q#U7h*tJX8@$<$NfK^CJ!QWp5+5s z(#IJwSFeJ*3%16ShUzviBnp81Ma=b;tC!98=AdUSYFlG(P-tdevaz2Qq#@Lc8m|iF zJv6gtiA}fR6Ni4wt9qN6T!4*&v3%*O+LmfeMJfz#84Ki7qJcL@s4H4Hsrjln>np)OZ0ekl46e@4LeW069NA`ou)G%m60 z0GMi$9RM!VD|l}tqRvuhT9V8~Unr}Y^Q%ckXpFG-tiNXfwdi#|D>v|0~~VfK#} z-N~))C#N-GzvT-HeJE z*8+Pz5#7zlYGp0%s{Qx%*#JI8j zd$1>TW-q-sDe=gl-B_nVB9r!q=8TyX`_WL}X9R%{NFu$RGXanLH}-$H7>#Ifzms&pjO`Tr%v|f=dCD~8Tx2%SPBs< zW!tZ#h<*PFPF+p;hFRKQ)MXx9u0UR>#Btq=hH5eBD|Xe+l8+^$op4h^1NkER&1FO# zvz@`V`?i;rKW67I0V39PlWN^uE+&iLbH%#ks{R69fH|%R?=KNo(j3#G@#@dBu7_=1e(?<$o%T z{<+_y;(xu2bP*ZQcARMeDcggDxNH(%rEnTPsh)3PuCSE@in;6XZ3&c!nExzMTYh_r z*5>Elx#b#?3F!I14@s_uns4;1B}EAM%F7@oPndI5A_j5UD13`SyzS zG9GMoF#R*uzu$#wiUy=}<_iikP(uFa3n5|1z}qF>R*VMQQ7C&#HfzX03bB=`V>Mdj zlVTng3OW7Zi1>Zjd6`DphNZYAIf^Rqsz55KSJR7kfLl67Yga)v+fOQ>N34(Jcciwx zh{p=c+kLl%{jV8!p*|FDZT`#%w5IBn0O}PG16PXi4u`N!)Qxfp`_a0TD5N6)KhoX; zs>kZvTUyQH?XfS_~=(kb1sfld6^3wk{N z`;G5+zdP<2XUG`n+525<%{Akh&oke57I&|RRN!VID}52h5|xUq#NP@`5rlz-8lRTQo^wfeiOh&`*Ruv=kJ2+Xcf@!w|%PEwWtaFK<; zqPl^BT66Qx2_iL&Q}V0e3GvEZabLNsTW&!Fwn_JHk}2we%{wNYidav55ZUYM+`WJN zvm{Z^8(M@LxQz5LWHA_a_jTh$`sr9w$wj#|RC@3*Ng08a>(t<=0yFNfHwt%KOXVNn zT^`l2RWHfkqhkO4FdL#^hs(a)Uc3QzxX&qK>5CksfLTtnr~xOxPQ9!sP!f9oX zr!?>-~a8E`)8SME1?;fAY-l!y4o3lh1*nhwL%$e0}BcWAYlK7V4$nC<@nd&N>U)k zl19S7>(w7!VdD83g-hK6n_PlK>*7A`0o*ow*>sTU>?hi+34t@;{Mi{w^Dd=&Y^5Z$ zX5G7w_VSARwf$AVd7h#O(vO2$K|7V*YElrF)eoI=oJq?XTo3opVtb!I2#L5I<}NBN z1p-r?uz2}Txdz?M=DEL8!A5t;r7d@8ck*NdQHIL$Vu}Z$8s+%dCYecJ!iC#VS~VEt z1ouJlYRd0(@i(A&GlrV3KsQYU&xv-bQKWxO?#lRwnLHW8}9aPc#oSj8zDP=K`t5N$tc;?JryoKUZJi zW8iX>!=-9oLoP>d;H=Vs0`VbD3h~?JYzmm*{N=ru*H7q!N+EoV{IneGxr$)WD zZULUaYOIGX-4E<=|G#$FSq!#V*Po$P90deT@{x2(o`E~FOlO~g1B_ z?H-CdVx32)^x3u2kJjs>58C56Xw(aJqIIe)lOElRmeKb(eNd?1pf|Ewy;_u>rIZkF zW&;Sb@XmYWXT3_k@7nAtL*Y8nKY zkf-xYbm>rXrL`xCpNxBy0Y>@2VL3xCa%uj1WE!i;!|B&hT><^q7zoMs3?87tp+J;% z;kUSO)a+@2u`qSL6=%R%&MjIZ0%mpB-0VZb{KS~Lj-?%Vz9q#_J!rX@^pfo~`Q z1(3Hj1y+o`J7^p}dr*`>(3I!kOHfZbkhtFKSxLIDj zG(abUt`WSoH#Nr1ONd_r zV3uyuU^=ok-tBN}W;Xa*H-(UM6wnvYwjcm;MYY0gXenHVeCBZso{x*dh&ArTaf*^b zVmgJ=^N7P-XJ-6o7jp5}Oi+NSjuvTs5pv~Zkc@utJqsuuUmf|a-Z{7-gSI#!+Q{a< zLI|2>Bca3`;w{{<>-u?W_i&4Rn`gcS%#AdF)&WG*RYjT6qr*RrgrgC2nS?zGnClif zUP_mr+(1Tyzs8qxx%6RrG?zY?s?`n0+~}(t4m(50P3Q@vuhH?_1c4{uC^s3&(2b1$ zGYdc(7#XP{fA6iexhra1rUFO}08sTo0AHG@QK*wS!tpdG1~~4(p&6bj*ZXcK+imd# zaKL!Wpzt^e$08SG;I3F0S^(ZMn~+5ZrH4f*f zdPCG@+)vVOGs!|OOW7&z)MxXK=VX&~Pby!jfu>~jZnuDxo-Po}%(Vu$koIqkluGa= zJuuhcV6!ZCJ=(F~tb_Nzy@n22=j%}dZ!D{~TS;@peQ(OU3Hv{12L0%l^*}FF>cz5Y z+8FR*mu#U{xK#+T0v0O)z9F@xMbZXy_}-%ov zOIl{Z)tiuFNHuVPB}z=Mffl_QK|H(RRZKG8hq;d{%+!JMFexI`o;lUW#od$L0kM z$Md-7|3Vz}ZTe-VENP)=SGCgy>%{d;iJ$Y)DWwJF0L!^`U9<2E+Dt7e8pt6hD96ik zx|-?|d6Q4USO=)*Th=7Rqn5A1;(R#cCzU{ExtJNN*I(X~|LaIk`xM8E$yzoUl;Q8I zU4zN;{Lo>-e!DZ~ymx~W2dyxd+P)22>yuH-x+wu#SWCN&dt4*a@j}bKQ>IWcmu6_N z<{^B2Ota|a;St~`^#X^8>|<*giC@)m;(Y_I(Tb-C5Myr&yqv|(xoHYFUVUvCAlZSo z`S4Sk2ENqE7GA5#+Ck8u<)BA{UqwCx?F*1;`a=rpx;-78_No8|Wlv4?Z|7pW?FWTV zs@_jBX%;DBl5pX)osdv38bt%al*W7?+eMHn_`e6KY7nG`rfV6WDz8Kj*lOpf)G>6Y zv04?h&!`@3PN}dpoP4sSU<~{+J{fiIvE(K% z477J1`$L)!tQfcGU_j^yN>p|}<`p~QJIbF5>(7qOaaUw+$)D%6bsIYHs^KAUL>Qco zm$RinVButOy-W;R-e06pskVm>ZPs#U5qL$(j~4-AB1O0!PX(FNeddR4ztMl`7;#W{ zH8lmxl!THNzU5ek@&jN_t5O>Kd5(3uK|9Vf?wxTQP@K=q)A_gGg86f{ z8m=pv6H{-d+@7UtoxZp3l6v&+t0`eFpUqP@J>6o~Ia{onW~NEL+CbrL?`uadQ>Mt- z@p6H7sG#86V&x%{c+PWmSQS4Z%}g>+v>Rf*D%z0K>Q|96z|AMI$YT79=c*2lSg!qX zqSiD>N9Q}TZ85Ew92+!u?8{dqV4;Mjdq$E_!6$7e^WhChF6xY!L_%-5uUM`&3vX0n zOUEigVf6Zp`#_cwmvJ}Q*ZoekKKU2Rl!?}0^28>-l{gbU;YI^uLqO?}086zw|I?5u zXnUktd@cYAP$)&C)o?9|-j!xL5jmR2*Qt43I3Bh09h*d}b`jHBA(oY;eV=rnG1{9f z$7mTfm%0hAbtBpcI@ePm;%@QAb_xm=K4t)6&H?Nf3E&wGc#M=7wtL^)zs5Yf+xi7W z#DC| zSCNlfn%Hf(j2I}B!fX->_43u@yN2JguzTDCn7ODFuA-~M#hR=}Gs#Bn`8*yA*4!X! zYp!g438#^1$;z=c3?_Ndf_%kc%_+pHSmhW;VCQ8)nQ_k#AJ;y8O~B&8O?Lz8A(HU; zucrn9zUG7t+K1TD1x3FP?!&G(HlM@#sJfFDRqSmbN(q6t0m3`ne{PzT*8P4e&|kK2 zi~m7OcyKO&mY|GH5{{oKKQPC|Ly&;LRgia?{i56s!tD}J1XTgM0hbjz$QW6O@0g<; z*kG`bREGavUqUdCRo(0L_!3I=A&GYwR9NX|blSKZvCP_OH+q^Ae*P%XemszLrgQ~s zy$K#zdo}TK2pL}s?*k&a+is4x_vPPUm6f?3*-{w*_a;x%YbQ{N&{4)- z%Q+BZzB(g}Mtn=g7pgAPui_{1N`uu9>u|lI0jmjSzhiRqzy@Fxk&o$?x)B5xj^PFpNvh&tR>oT zvJ?2Im8qJqCB&7cdwa+!QcG{0g1uJq51+Le#{+gKWquPqJcI^Qd;vw~I#BhJCfX!JYfYjLAWsA!ltTpTXk{QsX;>q=%aIYh z%>;#`3YU588E_z64l8QGxWU{(3pZVRTHo?y1Ed_Mcs>LuqSSY54GT`_`27+MarN%R zypx&^Bw~B-en{btKAm4&-|sW=8PqTS9`g_h|M*ve)jYF$&O!aoD6b3Mg$sX{b zm=+H`r#PppFy~uloNp4bD&}hz#{}kpJae$#&4reY;?-DX@WJ{Rz4!^Sg#pv>oJxMv zsd0+iOH;@XqZ6NSzfIIKSR^>*v~wpl1ccQ0I6bwnJMNA^Q*e9MkLl#2bkd~L;+@%G z=LIORml|IwJ386z`C5DQ<)#QY!lI<7m??kq99}5Ump8jboRFl=rSEB>;%o@LEa4?B zf}n@aSM>>4CxdV3;PXBJdf+Dg7xZ9>yYYd)*mDYIliqr5ccDXuEG;HnkN9SRc3G7ArfWUGvZA!Afgq&6_$bmASb$iK~958pr~vCMdgjpUip(EHE%Xq#YZKpXOn$huu7Z6i;u-T>)pET zboB^QKuKWZ43W@xdqhaphoZix8t@i`GLM@xE^(4!83SjX)&QuFfm(x_=s&Ah& z6s(qm#j<>3Y6sYFzK>#f$54vzO#(cQWrb))S|0;KAHMCY4;*+2S?6FYWcs?wMr|Pk zv%HH(hDcVH9aP3ac*Sdg)NL*rbhN)Z97uqWw)(V73;+owhm}EGCB1sr55i}MM%&zC z>{Q8%%}mEm-s*fd7;p4cyHmd4g{tTUP!wRrDg?s)7g@5wn2J7RY(YxEmx5IFf^2uq z#wrV;LV)bGzr>Vvf7{XG={4S#U|a!rS?UH;N_6-dHW;`>WcToT2LE*3^+T3x?)N2> zkHe%+9-=U9yx>2)9O31E9bA!9Ah(R1@3Kwfc*wT@rDo7>mQ7Ul_Z|9Y#u-izN_*5j z322PeV+?-) z@i^Qv#O=Vu1BZ ziA9RP{(M*qT>gF*lxiWdw*EoE{N zrw^b%Kvx-5iB7a*{~hdWi4kp7OxyP__*fFc7WII{W#K?IlpC%2Z zkgCYd<^mi~`v3PU({+pqF3LjTK0 z0*#ACY_QW;2$bS4P~QR3hken&wH0xpU(W<>u^}WIZNdYa4`S?O5|>?>cfVw){(z%^ zUI|q~5M0=0>irS)kx?+-r-LU3OY#a(CIcVfaFIfRB}vC&aa}|3f^QmCzFgAlVKMK2 zmPBdb_O%WSCN%YaTK#&R1QaS=>RF(}$!K?I$qu2&NrrQ;nzjab@0%nlqk=5Ex#@3~ zeRGDK9Q$wg;Q)VY@I6Ae=5=2Lt%6BM42%82nl{zw{RjX-JesU^$_2ThTA|JZ#677I zY99oeS88=3RHB$Lad8nkT-_STp^@<;1Svg>Xy7buiOMa*bf%6=WS*r4jrH#kK(CKb z2G%#37=y}hJ>B>bKU{^&q}Ow_%8Jd{Xy?X^Fx^PsvIq3hXx=UfU=zj;b~~0Jyr1h| zK^JG^|HS*tB^H)^5!6!e9|9ER9tVeNL1P0L4kHeYAyO-QsYFY$4p6o}x{$L$=;L#e z?}yZZX48sL!jVl2M8UlCpYZ=|9|VY`ssNr^u19R7g9a)S7o=hY(@fGHXDbJM0;K)E z-c*U`-9EtNr?8k^Qn~1Dt?*8+EHJyu-wynt7J|tn@Dy!k(y-A)_%4)zzw_~oETRYYQWwB38UVIx!tx3NHYQB0 z%XxXZ9R>ed-H6KXQFeC;2PPsiwtMY*gt9RffsRr=B;5^6fEjJ+ONocl!y&uYQaDtc z8Q&KPAqHlX|F7cg|1g`28lgKPY0zeiY=+8`*#(BOL6@zWB%;*X?B`;ODJIyR6co#?$P-+9Kkx|I&B@ z_b(tIKu)t*ufjtTw;bk%cpcAeLGnFeo*U$yU>;a0nvQiAqmPR*EKS|+;+BV8H~Sn7 zEUYl*3v3Wrvl+>T07&4B@q-wkt^goSeOy<)`q!Q0(gJTA>JWrz_QM_k!()~JtNyJR zU`OYGZ_QmW?AXWIu=wIH&5Q>frj#J8lNNj`D+kIuT!681-A#^5@o{@*c@?7(<!G_G5;}im~gIYuGk6@09H|2g(QuW3|1kSuJyyy z?U@{fTLSH6o9qY8L+p#r%SHdC6Jj!?2)tok$rCCrU)X z`QB$-z*y4dNZqsn#9ATG%0K)?xNjy%?~%;qgFCpt&HtMMH2On%_-LvvZ`LWukX0bk z#qM<60W^qqNZ#c(yN}BU6k(%_YguX#D7+5BO+_fJjxE&I;H!LHj)52v!?oZAt_zBw zrYhoye|FvdE;v(YELT57mug)DTE*-scZAY%@$2=`_R>DFaB3-N5UzAK85zemzD9hs1W z!9#Qwdzlx#`qSEQI3Xz3tJW5V`NfBDbFl;|+Fg%P4Fd3|e$oPO#61Tu) z@WP+Qw|JScf?amVHDqVT1Rs0dZa+h_^}2 zoPpw_+-dhEzw05pE}$Dao^zMEz-Vb{anCbUQj($K957z9KrYa<{x4(GfLhN!?Kdn@ z|0gUJwwrL!RHClG4YiD3d2_YNR zHYU;uuy8lEbZN{yV8$&2bEu?K9_Kn7@FMmrPfs|V4|8i+F#kqt z=xIT<@0+KVORX_x_l=ZrqdDNcDXIPNYfs5y$ zE3s?G<{(F^M)eZC>W<5K)i?x?4b&FZV>*Jzb+hD(p zSDHi$I-h1$DPTsp#Yl-4qgswA034LmT~vhjqi{ATT(`tPg!`~inZ`2S6UX5VFhUyg zv|13HpN->euq0p;T{aXPp^=w*&dw<<`50l~{Qxi(LoXUQ4w!08{>0$!J(Lsb=NX;E zGI|iuRW^&5IxBJ4h-ZVJYH@r4oTz^G`8xa5FFDa*0 zuEzFk1g{3f-h2EGGNl%zRx! zY&<}_BHOH1$mQT}Jzp$5d=7@N1tB%m@mnMMqMrNFi&942qBd+&z;0Ioyi39+1i&aw zzJ*ay9|-IBEINfGIwpfw=5?hwb}w$wIM}0AmOn3XoC4rh7hU=5n{edo7$3pS3UF$7 z)5uc`028aS(GEeDmjYWePyLWpT zjY^)2H!J|p>yul8iSt|Q;xAtRqWnoFiGGdXOPe(264-l|LsoSI4(1LJGA#wKahvo~ zGCrv+z*)q{!;>V?%k6$ESgf}g#bG233ZAgtd83t5mS4dln(K2Q&?R)Q^l_qiFp3-vXMrHu`fny1Gd=YpS|fet|S@<4WA zm=Kw9gG?xSnxI-vA{g)=Dj&Cw&@@{TZ0?QJ%zcJsU65?Jt&TSb1KKaR}B6LtbX>_?rk#o9F`FHa7nqjxx9 zC@)169loPQtE}cn$GmKT)|s2c#3+}fh;jU|hU~AalBKi4`>D1gJmO$u`5o|PYjxCO zPnRQAgi`^X+<(oyxK0|h*a&BZi(5Uw43=DQ+O|VsDQtCnc!hd(@xZN115QG14z!J- zO5`Y;*u9wwL>$G?@G{WOrW)_C-n#KSjl_fCrVxuK5r-l=8MlDzf2=jlaEvlfDh{eR zArK27_#U4iRvT=w#n6_wKskp1{xp;)Q}@&J8x}u5Ns-y1B}1@w2_MkK z6B)jNB_}nV0&OEqPKgA}LI8bGrCKC*vq1(lI0^r^OaE|~1pEUz#|E-4Kp80Uqa;H; zT_w_CiKA8Ol?)2m0=}))Y!ROXwG=;RSAQT1xKs z_*4S+Mgz8J%?rX8?ytEmPLKr+$3++o?q+>TbbR3sN>A~d@C$h{7><%w-zU;{Yr{zx z^rdeF%Ii7KTm!PV>teSrPSDc|vcSZtmzb_3jtCV%3z`=r>prVH^jlL zEANauXpW>|7Ij(~g3F)Cs9D6!oxTtz0Qd$pQjcM=&AZ>+;)9K?)KsoC{ug6Zyu2>B z_+eIPU|d8{w0gCO>NMuBI6Vct#|#dmR)*MJiMN2&^xp3pc-sSETf5w{ z=$&I}z3_8D@EZWrVOv_uYsCr^50S2jpc!nZP+ZNyZ~J(Dw3w1!Ihza@S@}_z5H}27 z3Zj55aspER3Nh=;!S&NFzvKBHn7Zbr_z+L%>JBCqAa`@XpQ!+Ks>y?(+ZRUIxC)v_ zrg|wdCEG5lpx?6yhVPoNoda<*^rOUDANWTDN5xnVK;)nIRRP~?Y8I) zNGCFYP|bht>=Y45urj^{NIyQ`dIDD%)$=61QLzb6x9K<8)N&qD0m91LI2!aLviNsx z9d;nbrSbM)Q$S1<@cNA8^n`&Ba-n$85u$Bv+nqhDvYKKgPXwM#jV|D|5}<$xMa`o^xS-Px+Lr{)o{oKbrFco;6mK1|)gQiv2sa^4Appj? zWnn`~#ww!>++-G@B?0y7g5z%DZSGgZ>k~Dx;N%p*ETn?0uZFJ+-XDn}SUE@QEnm zTqy5^zIa2#C97a>PpNR}V8>zTtE#{|Yl;Vh9Gm`{nXe9ASPD;fu^FFcFig01tOMxj z6$SZsXupg$$cxB9vY?rO>U~Ko_kVnK2VTybMDC;tGvf1kA%FBNosODz=eJZ}4I z#8!Q`YH+&wz~Hu@ZI>N@Y4rlJ=xN*%_Do4@|y7{OnXb_*)rq}qp&X`lIr!%(hbUSmres>*2|dr6!KbMzdpyHn~|DH z{t5?fvJcd-zOt55utjq%E;iLPAJe|6EBd0ie&I9hJSW=VjT`OBLN!@feziZV9r7Kw z8cF+pbtgTs(@O#`!2Jkr&ikPwL1jIs9w>irN#kd0opXefJ)$QGm{R5iwc9o+GMH+J zGxuJ)VF~%SY1GR8t_s0)q5E~)ZJH#l+MLhRFqZoVeg-Z+V8Vkv+I%wFI}uG{vLcXF z$~e9``Z;)`zCyWii}3smn}9`w`Xkx3ZoEVDb;jeDXHP*B(#T$6P9S&4JM^0U;l`S# z>39y3ywfg%BI6!geI|gWE`!T8x6M89DhPqfCfQ5(IqL@nWA~RCNdZK@F6$U!_azL@ z&rZut`l#=#OWnZ=0IUzvfqm6~IKZL`jrW^QiAZDQn@(e&?de*=QZ|YYMC23b3cu-p zejUy4&KVLym#FqqGha(8-gZR1-U2m~f>H8iW@kxF>7imb5VuB#?&Bh0T3y95 z*%HGO{UE_{^UCBBAyBFRCjF8?dDTYJSS0VSd0|ECts?CHFk16s|rb3e*<~kkiB3Ih>gr4!( zx#LCJV^BOE7s0w*STqp7ZNFAkRxP0J!u7_pc;e zwU++w7M7{6OeS|fD<~ZhPV?4|N6VdD%*IOn0&0cac6ilUi457UywfsPNy+JG+%*lv z8Z0tW(W$WIP$J>!EI%^n^mk>^d6ViBfOk86A;9~rPM|MF$y{l>`>F#*%IE{yAD>^$)XonvBvSr;aVRj4{=6)Pn2dxcrn;jz2KQvV4q04ULn0 zU^W~(^A+7#b<=W&F{H$)H%sCT$spVgwYKbemzadKGT~i!4P6yA!4+?I&ROzo!5?mA zi6t?Kg-A<#n?eTV0j~ScCq8zMe|Wr-YLNbHy$W&ft6k43EL;N7TmIe1-f?^Q34?Kw zD}N{XIn^|l4L0&i@Fs{b={~{b8$sD1`wM-P6%^-TK3G z*-G38SZMD3j=5*hFFm2Z^D>~GeqiHqu=kx^78iBESCu%a?u;pTvQ$=2UEiSmwauxQ zc&4%-0YO-PcFSqH40f(Tw|%@|S5g(F(udDJJ1;zx;6xdVMW^%?lJLaaktWvXMf`Qz z9U+6M3KVzVEspR@)Pipl@FrQ-MwPP-jZak_De67}lg z_7YXz_M||yuyOKnaJ*{m;kL<-@*CX|N;WS#8f5UZSbJ?6^btxViw@G%tBTjLznzDq zz@zcjPhA``q;_ePb%;NiTFtif^rtN(P}6=xU+fkgEn$yzkPS-(wKI9ei;tlS&EwoQ6983zt> zP8)?CJ-Ja(o`(mxG<1<^c3xf?oFv?bo2GqE7)8h3Ep<%xyup`eba5O=n>btXU|i*a zBC(nwO2<(0aO9cuGy_NS(SrS8p_+vA^3PjHoAKZ%Dp^RE%2do&-493wV^#(cKBKNb zJh_Xod0GNnw9z$8o3n?D^ipT9P4>SKUE4f6E0=rGJ-^DTbi;4@a4X4$)&2*!NB;c4dz`PIWu*nyGM^RTYutKFhDgSY;q_mAI*=cbiRYm8N;G=cqhJ$q1LL}^Q+x_FTC7i zidCobIr$Cshy44IS%nL&-vKHpehr-{mY%F24kj_)@Z>YU7%lGc3pKwe78y&OO6MBO zo+qKll=f2NMrAB^$NnzJtHvv9FQOiG&XI_fd+-@%jCcm9L>jwhs&hG&S)QHDkwm-D z$4*8y#ntPSe)zQbO`;+%hAZh6d(kzkw(f;-nZ`S_db#aLHrU@?WT$Iv!%F-zg#2AJ zqUUbjQ#QK1L;i(mIq>p0es{VJqm|Ab*q@69E?pd&qY`MN*y-f7JCT*wYsO9Lt-R!U zeZ{%F<3&50+Wa5}tX$}L=AMleZ2|nmS{koEfx95s!%=!3P< zthdkA??0-4Zi&Br(yY>ehj#uxlSVDvvm7wAPee(j3AXS-IW zxM8lJzAp!1B8sR-2Vqa{(46PsRP~~aeDW+`bTohYna__Ry{zU9yq1k67(5D=!fAdl znn7LfVBtp>sV!?z|7l0VPciMX#eTQ_)^uSsU0gU(%Sn_AeRk0#Oe~!SGr7!I*}2ZL zM654efk~J2M++*2yQ258ThSbe>tK-c0cmRU+P28m4IFqLk^GC0r1`pMLjH066{gK6PX}Xwx zIebLWaj)1387a^9*N+(uzPwjH#%d*5*mZNNJNk&zM~v}0Rh!a=YjZGVse8k<{0sD|WY{*@O%T3lWnHEbowbY5fE&d(BOEHo5 zlL;TUb+x!DCTWusU7i3G^ilAUiUSb4#wzIQd@q{>wU9rbPLo$0JKxf+v6Fa1HI{$d zBf>gt3E{s-9v3N(9-64iB&qT@MF57rwP z^ee3i9{7hjZ(%2Rh&lDR@Uw`=6=6(`KRgD%j{d#3Ilw8gBLu5v!8R2!0n*H~UGGAS6)hT?6{9Oh$%j9oF>f9~x4$(zCsvy}X z<2EvQK0M~&U0FcN?f)3-HI~7qr>+6+mJdu*s7!v6hMducdcd@}J)a35e)C2>=kIii z8%n1X@9_Lzk|~KVyoI>11o-B|q@z0fTZ6w|nQ%(`A`g zs5_7)fh`};{5j}-d$p>=GauiR(CM);Tn>> ze`4no91uG>9BLKE+Xg=@*l=8K8srz)aiPFzx8n0KdiQRe+Z&Ub_5#+utrRrx&jaQT z>0IBt7>&tp9VZD!)h?dY{)`Zo#|HJKDXKU zThX`8Zd>lA{H{x$%HGwa0z_eBt2`SWGJ7VpP&397c+MJ$U##-r!R=lu zrhPfB8U3YxW0$a)cIlTN;@Kc&JeOshe8i)})?n9@uLaAv;Ydr5r*8l?*+6IRoDy*!x6NClg=*cs9j1Hf<(6u0 z8iKf?`&BjwF`&KjVIPFDAu1yHG-w!X=Y!kBingC{~h zum>{M6Hz5uX~qg{YZQbeoq5l;sTeS>+>HD=K;IvRgnN9Dl-vFjK7nPX#v`tTv9~Ls z;-QuZ+@Bvzb6qsE?_EW{4`U{G7T-hqD+OAPpDcS@LDiXJyym?H9 zdJ-zC=I=u56sjk#jukZE{6F5evTPOKQ^cl7{jx8%+0p5Sl37VkN^aJ^lJw)m?Af^4 zliSv@FgAToo>SY6ppEu|YxzEw+r|!BPVQ%5{^9yhv1{M+&fP>eySY5NZzW;9MO62$ zV#WTd7&hJQra_4ov(^~mjLi0s%52w&G7UeqlX{f@sV#&oiqT39CRbKD_!)79Ac}t@ z2^X$$m2}0^y~b91rOjhGGZxCWqJ$;r-Xrz~s#zV+s@~8)D0r;hb{lhVMV3uSs)9bU1DdO!@Tdj9Pg#FFP|HvE4>3H#M#8k}phXBm_m=liu%YzpOC9BB_5$_cJScy_oqM&PKy$r;8Nr zv8N9r@0N!z`CuLL4|R|Pns|&U3*}0!vhtryY*_5s)aCI9DS3X?w@L+Y;3}^`C{u(^ z^v{aZCuaRQ)i)pQ>+G9+`}Un*(CGw?M1b?{H%&aaxh7jWRv}GLXAFaSvhd-Sk^BdK zOn{8PjmUPK%vIYgOyJ3ZLhtF;+6cT%?aQBgX3R3*&_IaMJ)tHGk28Z}^*@MwsY;nz&(<%5yU*QuT2Gz8dt7D6b7O5)i zbm)G;6gY5IZ603rw*$28V(L(dbR&VVeWCkF7*yWaV8F}d=kh&p3}z=qV77&SZn-BD z2r2RTf_;3!0f;3*m+?23r5{uQqN$^#iH@Uc%+~h@h3xf2ZUtYJ4R6?4v)lWj@OJ6e znb2ZryxRPgwJ)3Q(oT<1H6n)Z%uX_zVmfec4(FKH2*1NxQ!dL=>Rp!Jr!L{RqI=YR zH=f5rMACHNMeoA&QHHzvXqkCJ2r}Q;=O-TR5iCR#H7+tMUHX-dtP`-a>|PuFAi5qc z=G|)QT=f@}=?b^n3*PSBG|FE>#)!21S^gs`%O4@)-XOoAFwY%6k`;7*?IZr37iL4I zvN!MqX^gTJ$)wW#5_pWA_ijtGyB_ZiY-gyQz0X9?NPU`vH~QwV*1Mh5*y2yh7HM#c zD9=dLSwd4-Z3ouWa@!`folz}M+UmJ;AR%~Pg^Dz#UT&tB2&1waPBryLX-rAhbjjzd z`|@lzYHpPICLyJBJU2=KwV=oy0mUIEqT%K*Cl;eNvUq#Z9RyrSytlbruMVAjOzrL< zKc{YNsq@%g7`^?w&_*%_cYw>-l!GcH01?L?D0ux}r|2q=QRI3JvC)X6?E^!iD!T9uk1(v zBxJn&)&A?pWLJ%Q5;Lw>FvZIn`6-@D&38tt$j{R(z0cr|smhDv$kILqf&HY!OUUF` z`SuTH<>s(&DO?ueYDvg$V-!P$uQ;n$%X>Y-{ZyXXG1ghJ?5-{Zs^$6H(NsF^62;)n z0*^tnQMgWtAx?_7s2W`6+{D`R4B;+uHDW%VNU!4U=YeDZcS)$$b?q zt++va%fWz#MZf#;^MW25T{GVKGRLnoYqu`2X(`Ij8u#4R zV?!HZB)GYHa(X>Dm4BafwPZ(>+HG&Ep3Cas7C~n5!!CS!%cpNyU3ndB8_jBOwKxtZ zd*5h%LlxYxS(8+5a)zOcEw~$7knMWy{WsnmbMfOPLpO`e11gTGEVnm#YMEKNBKX=|-q;|t z2l64t4{fFX@gZZ-nN*=mmId{N!zgOKO*Ratu1tVdLEBaj{*>Ccir;YH#Sh905W|JP zgnuJiY1HG-EP$a`r>9FuNhfpEcj|55ck4D#+fTG*t`)!hC~jKTpD(BX@aJ3G;2@Yx zwx}iGJ1MLXt3+^p;3i+qYYrQ1*dJDMX6BZUp!Mh$GdtOyli3?COxO$kw#YnDWSP@) zv^_#47N(zmOMJ?yRnh8k-e9rnQ?ZgKNgGvRA+NEAX8?`rr(9QHYhVoSpi_(s zdoFU#hD;@=^vg)<@OaLUv49=wfRVD+?t z)!IE00Pz;6-ZjS^$+o>~ER&1i@BG}}ra46eoXaN10?8le;`I&6@WK}Yi$~$gEx(Gs-#H>>iiRq26Nzp^!i*nw|4;Rc0{y}&?S9Zftx9er-ag*1w)(@6 zr9+BeJH`TW&67UkR2!hxrwj%dglA_2-nu7_Pp7z*b&s|A??(24QCAyoymE@;E?B?L z+-h<_F+a{~!ezViI`xY2^I73WPdhtusw=bPGR2TY4{!o^a$c$3KL>I)Xe}XNf&vPInM4ko%{y`IS^}>JuZ-3i1zKFAm560c&!FXqj|#ox^9rwv_1&KI$*P$td)at+EB>O ze`dLSNG}7L>1BjY31Zb0(Wk1eJQ|)H{7L0rnu@JoERv1BBV*sXb?^hu9dY`em9k%Y z4emm2M+>n!9zy*O&_KHhPuelw9X%-orFWR*dezE^uxDNvq7`|{Slcw>uTXNV?RQ&Y% zqDHLXYx7eEn?lw*_cmv}oSe^g|I$3UAWS7&<%~IaSKP|u7~{h@r})}BvCt{b^V!KK z8Aj54IPlczI2g9C_O$SCpNDc2No8vI>JXcL1wM#agEZq5ZmC?73agUyn>EoU$Jn@r zhpaMj^PKds+kkE!dv{Ft#T0mo$RPQ)tfF%Gk#)!r;9 z0xvs}6*{(=ZLx{z1jAMSEC)v%!WGLxT-vnDX*TZI*<#R4yPyk2+xI1v>XIU1ej0>@Qg z=oq(&E}9OPfVMOxB^tX81x}oh+5AH4uQhFbx)tW~?w}E+y}1g1t`>)z!_qYBb5Zef z_RGco_6s#XfPA!}I(@DlyzR~s7ydBdzT=hwV3FQlpK3?}V=q4n#0Wasj@G4VF>G5RLZQ0$KYV;-SFFp5b_qZLerPd`6u6Z?v#wqEiZCx&`0y;306 zq3Fg0TZR&uZ%YbJW#ZlY`@f-|3v0r;1+FOe%{5k1H}^2Uqgr-W^jUH&y!}l@`gqO2 zoNwkcHZCob3eiq8?#AcEaBIRh=L@B0B-rBffQ5qygkr=vL>F$zlNf8ifhVE`D34<6? zIu+@VR#du#0feEE28ZU^sZv@9bd$bCWPATQrnVZG+NxRX%BB4@3Lq~H+^*e$oqaJNk(cGR`pLoZ^27j?nn)sm zHSHKL`g;h#z3Kppto57gzq)tN|D4^Qo#8bo`6hlPvSo8{OE2SK>m1VV$m9dIh`D+A zc!F}K&61=}jPR|$mq-$Is&cwEQU}Z-Y95Go&9eM85)!j5$Z<0OX{iEe&nVdn0Q3M9 ztqky?O@#vZa1 z|C&a`-ryTPs}jJad%$!3KSvsDEzA=>tI1A^{5{>Xoc4qS2{ganfqRpr>if*^gfF~@ z)BjCbLWpC$P%G%CJ88)^(%9C%VYv2GUT#3CtqLeCXaNq-26!SgIA;HOA4GMQ=Is`* z>A|z80L;p~(j&+I`*+dGqw;LqSpW0;;BkUvzRBtWoqCuVg!1p@nMWmy$m!GaWd^8E z>sQQt-{@FpKQ`YSS>+kIdsnV`q*6hq3r9x)xHH}ewQBa$XPs_9iRowqvgO9fKH>or z0CuDF5;lZX;U;k#PXI~6+LyW$@>fXNp+X9!gu=Th>*w0JNd;b8S;@w2kQJC-GecG_4dSp^WMhm+FjB1y#hq?Ej9e4XG@ZP>U*&{Nh2@i7Ic?-R9L;+ z!@6o*H`fJj>^Kb*ogj09*Q0llBVS0Rv^V+f*j%o<(il}nbC07b32vi<`Ih40n%cW) z!X4e?ncF2n1)4Vf&I?yBf`n^cb6Ye^yR>DeyRETFLvw;=wl$(>J`7~;I~uj5tbWVl ztSvPtmlQZDc{__(i*J|tnY2(nI@F&;Jns60y8XbOeL;^(-5w`$O^_5hVqGhba5j!T z2f4l?V#Iao--$e$;tkZD8J$u_RU0b}w1bL4l{N|tWEB9eonZ6VSw>^W-dB;C7TXs7 z2YQXqRaDb6-Z6!Vb_ymuM6E!KV#iy-u;*N6RV3;e^{n##19P|n=t2J)RJTkN4Ds5Z zsJK%<)BGj(^J*bj_hsw-4FFqn+tV)#lxFDDNWBty$UO4%%1nTjV{#{-^RfH7t2Io^ z-@x}FEY<@59kd{l?DI9;8*6CvaHyjsOr~YN$vxBA^jcT7CSMBXSb|gv8cM|RVGBy~h4B?tVP zZQwEM2r~0#n(p%oP=Q|uZ40r3zOh7oyi2QsL&sco_b(i3^n5%h;nPx;?oAIZWqhU8 zGJ|Tl?tZldPLnz{p%lw^1FerzZ8;NPA!16igO1ke0)MMGp&C+Mt0hwbqe~pJl;su5{CW*?QlB& zv@hQ2j^X_KD3;$^S}42z>35BR6m?d4ud&EKQIRF-o5lW`qeO|S;(L|Ub)({kUc~`W zr4RG04X$6YY67IffUm=7Yj(2gP*J{B&xA7ED)nY2i`UNCvDA7v^3mA47ah|!wF&(= zg&2<_NB4-_^Y-8%zI*N};Qt~3;~`faU*&(-DiBl?AIMWg$uvt$I|HFK^jngO_flid zbArTWCpgH)y1SKDIuVB8H{^v&d(iD@UR^NgmugLO{hYtkzhUTdGFaIHp70A=C!SqND!pqHx^QP(PmDMT+Ddc&YPH{~iE}=>hxVU58RiRjo&z(KNcF)?eDn zz8bUMfz7JHO>85(+Z9o)jXH-Z?#E6bc`XgTzG`<4+xqKzzQF);LK<8WB#?q+V48F8 z;bJC64uAR4?0ajb!4`{2B_lpV(53o#m7@RK)lUW@vsiye%(j;sPFOcmi-C!KyJ0x3 zCk?3?SQkEGL`nRY#73p;StO*`&}^FVlUWF2q@6{#JVspBPuhQZ}S6*yZ(}DJk$Nlx4IWcs7cEjNFiL|woAC!EGfr%_VZ9e@h zB|@S#E)6(@%0J~OESjdqYzvpa=#1=Q@HYIRU0|T6393Emib@U7pHLRhfmf%}i1>>56}G=o zL<`-wiW-inB_7f$v0^uFO{*F)$v4p_`V$@Qix-dC4e4J~+zp(rVLQIhs~UZ|^7w3m zs#Nf+QSjF9xf0*6DSA(Td|L@1=Dfyp5P%qsb5W+J=1x$Cn!q9P^e+og<8Imw3 zU(6?k2A@}(c*s)Ww0mx00?saM)b^YF{!*_H*F&oA;ER)&{H+e^4{eTkZ(N=V*TaON zBm+kVckQyice4w(prLt#o?t#Vwi;i4oBK;h$WY5&oZ!TpjRa|a z=506RcSJihnkuceT(_P7cwM-i@qDC?8-OAwjpWa_W6nDwU%EG~a>MRLeaa~kn*XL& zI#!sj*y*`=Ee=z5_VcSYnq6{TXV3!RlS&?HmN&m?+N3q5s5QOe_eU-bRvi!S`x+1Cd^iEc+F5b`Zd=pIm+9LxBfK%^Z0Ss(fZT< zV5y2X5&&ffWAkH>EmwRnwqBXl5+tc<@lI%BF zYqL<@u09zbi}yMyjKook`fZz=Y|!26wffDZ>S|8tu7iA&=Tm1r#}#qD9{TKkj=Y= zMKE05k&%Q@np6dpOIE-=4eQa^;CIN4@n3J-o`1(gE#@|yM>8BPW!=MwRq%q^I9L`8 zrczJyaxNy)N}umk@K6sR|JAu%_4IOSHT-z@Ho2h8!o9Mv%O%$9H^i4t3EN%L)-H4t zcKVeh_wV;1<79<}4LP8M-R~8E-MBMh{;4ozkbXr|9>~k@YRgVR=j|!^2BV z|An9|HKx9ER;Pf)HkZSzO>Y)0*3%}0jB^f=7{LrR5pvNA&8ptIGWZ7}sT93fOEM?S z^}u$O@v^1r0Rri_Uc$2yRS?%bG4Wwwb}eVWEnjyk$O8bsMxT_|nmNk;{Ba6^B7M)J z_sJ8scji81OPk|+7-3~g|_$<=p)feZ;*T#MfG&6#RLC1YXljH)V+ zCRKjB8G7p9Jytyxol4)I_l{x|iRkT`biN@KNCY?HSV>->+NNXjD=`^&Wbr-8+=8k_H>|oY?Hcy*I zi5I$SCEX|2Ql0RALMrQRzCd(iK681tr{dAO{(`F=NP zeRXgTf9kCRa>V%}rmh^HMs(l}*XN=rPxiNx{MqR*>@a_1C5+HO0YZnj~pZ`X}E35SpYr&Dw}(IxA%Yp6#m%Qp5IG9)$^54nf!Lt_8+}?`f%#QEnze)dj2S z^i!RLDL0qFJTX>Z`?WRs0tZD=t}mD!Y&X-4q$qsw46dPj=-c;{+5pC*NKmMPh%ToU zIIW;ozjmu2po~4wyVWvQVMpqbk=<`kB=i6@7X5px0uu?%(0>Ougi}leYI^S>iYiMS zhB`&@lhPZ7m_%`mzBsmc->{Il&hqHIgW)NtC7TiBle?XlbEfW9OJhR4U

    60Jg36d2-oL-8q(+d& z^M;j#S=;r#KRt#9El2Oy5K3McQB6r{Qb+0?wfQej(>ryJ-cAz=w)FYY6dM9gkWTuo zB}fV~`IGkwttFv~KM}H1FWoBrsbbDK;b|D|U3QJx2IoQmGTF3z@&UU~E|vKaH{w@} zKe=l}o|srZgi%6|@LSG9g{j*^h8%*rRpJ=d@USM`(%@>1Q9_bjP|mDGO(jqUkLI?U zsp*WH`QaD#<Cc`tT2)*8KijTw~He+f)1GTO&8o zlyT&YjYOvLLeKaLK9J0@-!6`MfbD5M{Y{8MK|f`dJ!kI2fZ>qT1OaSr!hW*ga82Xw zJOfb&a#*Btv`-X5gdCF0j3~nq5bi_9fJU{}hgZam5;L{Vg5#A7Z_WrP3Yve8ag5Jd z`y&+0iW{symVFW?WG9%u!>SjLs!iG-zEjKYW9d-oJDYo(`p*oNVLnzH!op9=q^efl zTS|{;rTuQgNuSyn?bkA@@F5Rp+jSt%Km)bsoSewhukx+x5-+xjC`>S^nN*;r%SBF`6_}e2e z&`H+GsD)i~hodhu9jgHZL9e0eblnSN=FosfxR{85^S0=5{%WNd7)d7WTa75gO%9hB zj@u3*)s`>fKDJ;lPrUDSFI9StUD_Z>-K_hyp4fV8Lsv?7<^7^H>n4|ZKBs!-2lxzn zv>7cxiC4#Qq>Ff^Aa_(}%Fpui*|8#!CbHiXCS>}(D+wudK#p1B-codcuvmH9;FEn4 z!d(_t=d;L3I`RV`OzcNFvrg(rkhG!(ah|b$P?rwFs@&Lkg?DXdNokDY7m9AUmL+dP zpgO`tH=^6GbKs1h4Rd40b9hL5I>1iqMyB}cB}=CyPzlLJZ|`t`ltfXiEK9mWiTQeB$%Mma5g{Q9L* zu-P)_V2ihnNTDuMslVGde1xdnTza!{;k^wWaJEp(&jC6?_Op|mII-yK&Cz<2@Xs$9 zRs?EgxT4~*hdY4Gr$+seaS=AXsesc&G@ef2e9Cc`^sf)>FS;jAYS-f$b544-D1`iK zxEBCnB5cRFu_(cQcyK$@b0!bb@pe3l7LCShVV7`M?5sfV*ShR4Xlw`zK<)e!JOm1n zjfS4WElUOG21#gD|$PWV&CB@#K_MhSRF>>RC5wicEbnwM_9S zMcnMxfzTKXx^gN;?FF`zdR*MtVZwKi=hTZ`R(dQLv75aXO3DX6<_rU>8*lK3s0_xO z6H@XEY>;A|TJ!5T@VCcQEr~SP_tKjz%}==I;x+5My%2o?H8cLw56bfaQng7^eb_ul zGv9A{O7seL$vwxOR?6)3b%^*=`o*4?BMCGQBbei<^9v+*MPelFW{jh@VfvyU68h)X zXt39-r*>cJ9G0<{H?tt3B0R;XF9cxZH4NL@LimCAzFrsEM^y~jWdFRCq~A=#f)CYE z6De;GBy(wnoviS*(K%*eEkABs6tg^;fR4=?&2J+6vy5`Ye1bnvC`SePJ6eYeM;~HA zT>=TEDQA(9liD}#4ZY{mJ)^xK+#}jX3!x7o+;`peDF+ZdNeuz&ojl0N-Dw_ z3A_7cY2kpAZU>$#h|uwf5{-7~&BW!VellO9Ph`-fvWjIA$@ijO+m9yf$K7eiUM`%} zK39U9Fn!&lYqOk{ar#W!Q@r2ZT$Ef`-|VzdHAUm8d<(j3FV|@-Ssx8R(PRb&U;KOI zm^7?KxAlEJkFlUhO*9Ye-zWEclVifOF;FL-G#FWW2M}zurBzwl6XEHNQof7z1I*P9 z23!S+PLd-~MDe#`H!}KoN?A|!ReWNS1TT_|QwUM}y0m+frBYen-?AFILsPUo--BjDC;p zO&1T4T|=9<-ZPTR6tUaFu|G2GRGT(Yz4KT2*gLfzF8v|NLB-RutiR?R8a;fyLcU=X z$K{**&tU(Oplxr@JLB8Z+XPv2A5?AcLO^<=m%Xq(uShdhUKVCOUg}JpG{`7krR=}_ zL2@YXUQmIL;ePaor}h#mF$=Kyxta9^Y}-KLpdIGk1WH5Z`)i#$-k*(>a#wqr@82X7 z0nH}?FNL#&UDy@vPGvFt*EsECO_)KMPO4&34%_)Us5xG&)!)(ziU(8Yq@QjR|DIm` zp;Br6fdScNO`0sVTXshQ9&%oYrkOP;GMqOjDxT1P(PyCqK=~Pu^}OS7hIw$B;~V=i z2e2ko$7&gfX*xdKt;p|E=swOiK5i|mr*GLgUBqvrO?>geoe<3>#=m@7!q`ozxb347 z!008Qzx-$(kr$4FmuE5k=)3>r&F7f&@01-cMIH2Q43;4Keg2fDzBC` z#~Xq;Z8waM!I*LZA(aqXW=pRZdS57M(s4#f3wZ~isdN>STP02|C12k{p$0bFmfqjW zB2U5QcQ*Q7nChQdv-|RajlHf#rF_G~rFm69iZ6zUL(NL>i4dNg;4%$ULK;zNnpT=x z83#R!XVgixR2s&MJ?YuE-d=h&yPmUrwz{aS;XpWD&~#Y5%Rhavv9yys;pvd}Py!F+ zKwIMLiV-Zs?qS|i$p-_0lVzkIci|=&#}f-Z5*nUMJsEE6x6Yn=BCoG9-P$kX5(Qyu zZm%1WEAA%Vj5iXP1 zsLtUP;|^VuVdYcpVvcu6Mf9znet|3ExZ@hyXT#{aj@ElaT63?Gz z;J$v?kG5MUXQ#z8eaD~kc1wSszw=0I>5;%KQGaMQ{WFw!O15cii+s!dG0VwkD?Rma zb_P=A7?9qss9o1R0(_@l!&MXB(y!UQf zFdFr~(PSfGZ%nSmB&!SI!J)whY!Y;(&FI^G4i>`K#w-RpHJf@L4>7;8!{35 z97C(+#xb6&SI;Hr$Fn;0-Lr-qy8tGG-eX5&$x9D$EHgML-nH8d^`uTH0*TgV3naKQ zq~yG(f5g%Obn4}~*NaqjGEa%h8Bfxo_s$O-krfC1hnk+BNS7c{9SJP3tc`Dm$Hu!` z^u4O1-%iW*-Vap^b48`{4*V!}8QIWIYb5MLy(e0c!p~381sDU5g`h6_hEb%266XoS z+Z;oYhqk4rv8L?GeOCZDevW9W6eGDoNQy+e@uQm=f>&A1;Dwh-_6I;`xp@7aMO|7? zf4X7UG>z4~@fp`H`;yMzw~(+57^ku1$_oro$ND=^3b~jThc%6EJZETfMvf$Yn@o~t zuzbfs3Mp88+@r+o`Hb^x5Rxbc7rJYX_q*vr1#WVCu#x910Gj8rS@LW1j3?7=#gGz5w9w9ihBO?Gc?jVYRQPTjBoeR7}cy@$vNlV7SSY z!SC>hx33~g-=*%K2%ZqMC?LFP5l>+)DuR_y67g>2aqKxC=`-zKB)wi7ZS{m!*+@v` z2k#rf;9Bnh&lz;!iE>AkPFo_oMV^iTg&4HHdpANq8&~=IC4yd1QqR}IEIT{wakD+8 z&CuI^J5ueJjf9g~@HYSG1M65}g;?E8pZ;A1PsgC&ny?Bm~x_;Jw@s5PD_P;g8*;w*)DOuCiJeH*g-UQ_yS9^7q@l zRDhd&!6mB4N;Df~3{9_r8ZNX~a$?7|Wja z%}Qt6hTm-~z7aWfsOhDAaR^u{n>lM1J}=o{4cb!3A?M{vwGnsARDI1r%XjnC*MZu$ zSVO^fFSNgG5L;h1n|8;y`~@zQ=9>tg>X?<9Vi)CQSaS48Vzg9@e;r=BjgJ?HkWUX| znRZJ%?kKpXHS`C_Mk%qCr&|xP+g=Hpy4s9IO^i1`mJdRFAzeP6a;Nmf>H;?pcAA6( z!@}$NM>8fSs?5t61Od+yR{jDE@!r%%&Fm$U`|RfKKlRW~f9NoSP9a*;LPa7;L5V#9 z0m4DJ-(h^QMe1OYDc+V*k&ke`d6>PyaKAr6`?Z%W{n<9>4@Qx_^+h?hJp1v}+oQR5 zhhAx`A3Z!amU!yyF4>K1JwiF>-K%IyHni{e zG>rt2u>WcyV}-uULZzvhYDPJ4)nE+B2H7RXGF`?1-9t^jXdg*)nnjF^LG~XK9(Vbw z!fn62O7+@>t0qvk(wKJ!Uv~g1rP~TyE&`<6vWR~Ck;Q)%WFFHrFbBix;g)v?8S}4) z2|hcl8%BqkiX$7Gemr@f9V!l^D84_uMT_29Ho zUgAesElCow`cL*z66cu1$m+fXHn*wYRQeC#7+ilpnNUwp1!5ioH3ZWlFat(w6Z>)R zilSoS$~kM$CS74X-? zpJ`C;-lgqBpdRhY{!KH_L9-=@=U&?q?w3!63W{*KDQGBFb*3`ydUFn+_q))W2GQL21cWxE0D-?VR{kC!* z96Z;o($>;IDj4}~2+NSm_aHNG6K66@TK!&-0}@b8o0kqYL4YL~Z%a;lqW`|#r;%0Q zG)hYopJd3HsySO!`%1!Z(|dE`V}b>2cC!A0_}S7tO^IUVApTCKo8mi;vj3f7C z#Sr^a9(6}qo^}0>gP-XBe&@yI!4pG>=FZZ>u>{dElf(2FOWB@0qrIGHxmFHqcV2O` z)hPYK#`GO4T}MW?}c9OQs;fsP?uPvgoyW z&;&DMZ)r$BHx*p93A$2cEg{sC&o%(Wj zC&usXPW8Ufy_a}v8U;aZq5~e{xAt2f*ZWirR*MT`U~Mek8Iq{ZUq-djErvQsrDzJu z$mr7qK&J!-SQ3oqR&)3!AM9w__Mbg_i@9_R7~`a{tpT>|E5MDH;k~Lft*2_T`?0yr zPM*wBxY=>Xz9znGq2e~;5-)0y1Ml*DNSxG?b$&ved<(lTD>pE!hibYQkIP*6>ooO7 zH0KiJe0e?#2lNn7h@rJ&$pW@&wf?|Lgba~zNm*1vfX=}%}xVK;@D3X$D(BJn~qL-O1#ZJ*Sr4o5+Y1$ zX`k&_cHvE2hm0yI-|QfD?mNHON*$;uUm1M`e<5Rqd&Qj3)489Le5%^vM;%-{dGcWq z@3$yL5ntMt>JF*yA7D!>7-bGotOtpXxhJg=!9+Ii$LuBb6O0^7UFYJvw5qtYzwQ0q zAo1Qdy#p3pH7R^{-O#uT)Ylh}U|oT+Yknz=Dl0UfI-e;daaZzd@M0#Y&p#1?BUut@ z($V=~)F*R3>Lmnq{{3-|kaz?Lo08w%U0v(10ZjhaM(UA$8UN9%>6nmEqaLey>AiB# zk^vt#&$YMZoBn%0C9@?@ADZ3i_+DM4#x*XqnmlBX?(D5#k++16@Y(wrsu%p~_c4Bv zZH-jUtk-3ny*MdSTVWWvGM0E$F(~;iM)C`_Ic87bV(F5EylHw1(P^H2`oszu@>8?t z)^jk$^#Bemz7#7zY;)d$d5qTf%*Rq-i9)cugVNAAg|-Er*Pj%rsR39{5pZb)`tpl6 zO;Knr;C_LUH(Lh|ZYXrI7SQNc3ls^u5JqXD0HJPql)R*X3xw|C$-w7-Gk(C8hmDPT zR+b_~@FVVuDWL zuSMI@%SA4ZeG&bT7C^|5JX#ey#zvm&vECjT6(v4BcV^1Cl9MNI|4NLd^9=_QQDxqy zD&R0#D0Nk{zQ{MeKfqBv48+>3zsXtlR{$x-IP9fxFiq(DjiROAow{vY^pr~)c08ux%>-4&dw1UPbv zoHrQ*fAhnuh{@p{9N&o*SJT5`em1!#y}heoLVcy7a9}-7B+WHkC-T+vjRYX_qu~0( znJa1C@ZGTn8d94eHu3BENAd!9MPIcYG+AX8^Pwz@(qar(q+xP1SCd>>=9h+wlsLy= zFGoAWvGl#zR0yA5kC*CZgeW})%Ce;>tF0%UH#ZkLItFt%47txX%U%7Yz$T}XoU%hl z3snf@J5&+>QWE&XV@Hu+{a-aU0MC|7>AI{zVU!X%^Tc~@#<}Z}xOj-7^uqK4xJtHwY;}Cbe2(B#WTW-1rTI$UCRHM7V z>1LR$^6`f#6krjEQi@j^)rJ8Syas@{jBNnx)jS}crxgV}Lm8R2Wqx*t{=(|KuG4C= zXFA6sgm7;0PwE4cMGDD)M$WI-f1w3^P{=a8wslLOXREj82kGJhFs!arM1K`fEHM9d zhb>~z0(!eJylM)t8dT?w)!}OQDFq<|n|Mrc{M= zgpJY=pooEcY@OQomkd~3kKgyCjj8@}0b;x$OqGBox)`7V$UfTZCcumdgyx&S(u9dO z!S7%nA>&IAsr~|;$D-h505R7^F1H5uVVCOl8wlQ#lVxsfS&SoH72w%Htgmv!9!D<S>)Dt$F8sLOlE~_9romdGDsxD?l?MR5 z-arHH72~9SYxA?gtjDRldw?Wv%JgLmpg+~DuYf#zsUDl9rC}q`v(1pahBrCqpp>5h zsl`U}>OT@nK)M2TtYG>p)0*H5e^0yO{yhNzK}R(E=lnktKa~d2grhIglA+imb6`$< zx_2*I(PQ60zInpCyjY;`(6u}2_D1mad5&DCd@PAJXcLzLC}$eKoz5AMUH^00oti2f zZA~R@j>p#2;MrZF5dU%fKo~Q33r79le>VaXb#oCy#IbcT-DIs;^TzN1Y}1yzk>&4Z z0l=(F*ARmRoPPLoI%pJ|vQo6xoFM;;zNeOdvomAH^5^0$4kH8ob4~Bky{8VR_9*UdWH$x4{`_8L zQl$I*cL-BVa|h|m`mLMVkw1tn#TMEmz1U#=a)c7`BTKsgLH|JiMoJJMp{4OyNK!8V zn<}=1Y6kj!@qm+Ll%n-iwS~}1z<%#NJ8*U00m1;f;#hwjHDT0IdvWhW4d@qQ6;IL4 z18^{%qzqi*@4y3po;Cz^OGwwuIkHDhC z4xc0Hyb`Vdi}lh(t>L{s_pXS;!b+o5!_omDU#M1b{~;m22a%SJ2;idNWNSb*$%HxqOgc4e%TCgGK$LwPpA&S_IeokqV&q&JG`X_~ zI8o$%>j0)#Is!vL(Or+&xY3mkrJx)AQrd z8d@qPP+n@)xWwc#{3VC(lYK^{CZ*SVh``cnQq4hqww(v#j>R8@xJ%JRQ-u6fvr;8D z(LRX(^z@LB-O+b=Hs3SSfxQ#c@jPY1B>0c?CoWaMyShj*??EwmThfG~Pi0}?P29Sj z4i2~uU@4V?$*r-Dlu8QYsPs0_MlJ=W#?9Qzv+aRmOlqu{eSh&U!lJ;XBtcpHG3O!b zty#Qt4L(Ye=rW11;io3*dr8fzW(~3TM32c*u1cfV`cWM2b_Lq{im`QNxVOlDzh@R0;Ln;05{fn*H>rx@`>D zbQEej_&ZO*$H#Z-l5<4$ZSfnZx!2J!R*wzCWMMhOFq$1R;hVKK*@{X_l4(=xA04LF zfgN44bwJ!5Hddh7mEwbvop)-v1n(}9-aM74M+~5%s)OrW6h$FGX&28v$oeZG%1c1) z>LY;v=lpYX$6XVn!tqKe#bl2K6^^moQ`;P)dXCPs0hUZT@^8XMbx&Zsw;@l|s_x%!r`m*wYS_^44_^eeo+m#Y+eE{tljf!(; z4l2x8noS$3QTxlmc&SS`loaf`VjTEF+VOEQ055pWxjOxQ(ceHNN(Sjx9U&k_`lED# zat2uDBdHX4m=w4oHc=Bx3px-ZhRSEf5E#w%&w-C;cTq6{X4>1$i!=Yd9Yct3{OPr@ z_XnW?u3FOV;uD(C>4VVqV8$klU{svtth>#%%Z5yqRwo~6N{CBcb*4KUbD{R_0d#lP zv-{)Uh2IVId@^W<_pUj3TT)FP8a8{_u^ZQwnD_=}$Hw|wFbo{~hDm#zNnZeq)D58L zo(rUfMqlu>uYch!$P(h@j0Yz6%(Fo|O>4^&bE<%XssbLpT%f}bTk22Rb>{0*;u>Jq zgk_PiX#DbO3cfmx4!XYdR{e*&)3xo^NnX&B{X%1Fe2x#={)x`@{I?UyP4pw+K&}gD zE^udZB+X8@p+(?CrCY1UT$CW|eczz4oBqlI?0Vj#;2K#6R@3>mFkj}vvm#N_KXGB- zcly^6jSu-F3U`mkHjEVa&`c9vVh8N((GTXSQO2reW-aw%j{{-RfCD2(QOvf`U0^`yku~zcWkC{^qdF40 zmzV-w>{1{>K3;33L}t?rJ|`BF5Tb?>y9OhdE&iXt1c90Y$lB|s&=Xl5^#+_cWZUOX zlagRRv8n=>#zBx$biLE8B9m&`UYQF}re_lO-h(`8eGMp0JiDFXl@zP=2J9$)n}IvV zAMMk-+hzfhvrOk*+?ZR;vZpa6YB~Ch+0J z{b)B9wP#soilRdUxCi_C(Pk2%d@@`;gP$%yj|1xgK|~!30gWf^c12;+M*^aAPzR2W zO!L+-G)?YXN-Q7xj!%UGLey zK=RU)WAt-J38go(bEdB@o%N0~`O726=iCl`jG4+$(wfZ509AozL_ zSg~jWD_a|g;~;WltluVxaY^hQut6g@efm|@W@+AXQzYzf>=i4D2f#BfA0hKyfj0YP zV|34k*lsK7=hZpq7qR{gc}wvcL^ocFxv#3R@jYW~O%@pLktl4~uN4I<`q41JVnZOn zidMYmPToo`dXy>1l`R=)tlCwsNNeYPb>UkYhY1C$*dNlWbTX?= z35+;UtYDDmls`R~0nXhClzn5s@`J4nl_Y;r$H>ZRbX^QfyArzFV|T;x4(n8jHbc`7 zG&smv6p|tH&7K@wrj5xVd2XRpR(fl<0u#C9qcU=)J$jn~SG%dTxB>w%(I~nW4zkV- z6z3vf^AaO&gfb4q7dz0#=CJFtr|ZubZ;Hev@WYirvRe$eav6gAwAx$0vcslIqaHAV zYdQ4otCIHDjbmSd4S`tl{N#e$_^qc@zWXh4{>@r>vgk69EcEVD;T2k=5agW|qCWpf z4kS?A-n7ob?s3N)RE9U$PpGc(`PJ}+{07CjRs5@~KktS28^6TAiaABv0c@W~aJdFY zqulF=JN8DP#93JY*VxVZ_7()d+PjlqP4nHHr=>0Yo)@lq6-zi)qms(JQA4}KPi5mD(VQJ{-T_cS`yB(%GJ?%Fp#rFO9Wj^>zON0Y&7!)K}SEOK^{vh zBQ_6w49K&S52oxsvj|cre5nuGudVt@#-_ulwm$OlyMr1LV=gz#(N0usDsG>(Nyl(U zCN?>R71Iee6%jH)-fY6ZSxF_3CSw)kHz9UK}Q~q@^ z+=9GnTp?2RezpNHL8GR5i&Y$V=Vgd!FSo6=f=9DfcEK$Cqr>aOq;Cx4q3dqpn789N zTQms!$UPAn;#I41Hc23EDO?6?Py!Xx@Erg@fe2~ItDBjb+Q1d~Si zhTk6kK@X1qz2=G<9xL{No;DnUSSWdaI{>r&Hh?HiYE9M=#Xc&$w@hC_TxLABT6jIa@z8H0%p z96Q#;xh*%CqmFcn;m6lN-Ob97{+OYI-5)flln=}CLm>tBQ#+gXl6v=<;de;*c5x5z zy*T`d&7UJn@~zmU(_@~=ZZE74i%m#0HbrsuXe=d#Tu44ZRYUz!(_@{I zmm|nLtw4qk%_)uE`uCKBJv9+&SuGkRW22cEtS*iE0X_5FQzmB){}d5v+h_g#56&%Uta7<~|qh?M7+acLFKrX9=Oq8`FrdGbp` zx$SGN))0f|Vi~`oVS$Fp`gD|JfH#$gjU10;7Az9$;hg;-6Wr!=F62d50#~3i|lz_ZpTLl=+@0|-R zop8SR+x}34Dl&Za@;nq3kh$xI2Fv-W9?P=JROR;=WQ}$W3dzSPcTqo3*MD@vwG{<{ zf^ln-*ZMYA_-_WKM2~c{VwnbsUv8gpCd?VfPdE5|;&*S&2b}l3@*EJ|QU4ls>+h5q zxD&g0!m+X&y#@Wq3(HL!xPhiZ=VP}|KxK}Qe7v@ZPE#`Z@-%c^bIE+Y{M{PNUt@?( z@@J$yK6U*er%_GF=2SILYGfI;aqZK{!8k%k$DK~gv0Ipyk%9OmI=+mIo2)Wn=xJOg zf_D6OF>MnF`Wy@^jhPC9K-bdllNWgHM{76*Z_EQ3ZWUU%Kr? z!sjbJ?s2fd-;c>Wd@-~M!n9X_eMmCn4Q*`38_MXmRZlv@_=B$k+ALY)5ihk45uD|0 z1S}DExlNUm1tV^$0D6zFE~F(yJ4K4zs&32B`p}YdZOGH!G5WGoiI%-bVsf0Lx;0;O z92-%0wBJvYi5aP03rudtK>yq30+hchC0}?~YB5p9BqV}79{hp?pgUI8JznXlFzD~1GXN#R5Xh^y z@(Y-T&=TK@)mL7Xoxz})O->AD#|!w`-3vyqkfelrJTMongPOa{I}+&eAC%%k&WJpe z-ha;lr8E6bB*cNOvnu2U8kXJf&s35%krYNp5LI9obnH88Tt2NsIT`Ahew7t{xHGc1w{P$_7Cf*hU{v|V=oOhG_3-!cs20kC}n9rQqlCW zN+vQ;7Yxv1i<>zE3w3s53m)^wf8SF}UUQDZGY<%KRJw@AxK6nrN9O^)5he8* zuzubYAJ{YJ?R);~^$5+60P@2#_iTz;%iulY81N&GuE43V1U9P^4iist=LYap-M{+e>AbZeYRlsD zdCeeuaU${lNz7IFb3MkOBTT!bHq7alFrO2ikO^&!rLUlFE@8DQB7DtofU1}ewN zH1Hg)cVpPcPX^Kb8Vn%+ditM*FY9TeKy%dlS%?FZTUt>l>j|Gf2OB@~>&FK>py&0V zoYvIQ(**QX-T)~Fy2Hhz+ZtPocM`G@p7zmd&wCCp&|M(nsBIwWIOo;7jYatY^rZQ1 zV%3)B(@r~XXpG`hi|RevXPDU$GD?p&!z*bH?{>#cIfIF2#dMd7wu*hiK|>2mpeg{H zz%Qx_-<__NyL$C$mDrIU1<}^XHeTrSn{V3#z~JnsAJMFkR{`sAAz6BnWuHOkZ($x@y+2_lATsccy5lrk<^3G z{5xePdVuZ20i|&(bK83_>l3!(-#T`@+g2SD*`ScK&lKdIsSfTpZMYO2&e zygDy5qyy-duMKVa-`vc|Y65*-|-~ z%6{tz{lV#L>3HprvJ&$5}H4$oQ()IG*_z^U=*AvVi?1@}Kg4n*7Cg*sWLe8w#(#kg5?EA5H)-BDx- zN0AsXS2)8zJo(oNl_f@XqluJ#WbnopK(C1DC8m|k;7o)cp27@*{b*O@`&vasH3Ed( zzn1aozd{bc*cpG)IZ0Z7POd38tj_opEh8cT2}o->=c{7x#CkP?ojrW7Iwi|I65=-p zDO|6lu>noU@eSJU^gIA-JHwItXn?4rrMVNE7{IH68J#3#C61qSpBmebVZ=F&zCy1w z#n~P7q5GbWx`-z(06Cp($U5IMeU#{^GRXZ4Pl;VzR;N1qGTXOAtHymDCi zf>t@ycszPRtVP#L%iAhI>Wzqd(b%)Sp!~C>1v5he8dcE2{O~<&OstIC6ngXfCg_ay zUdV&NT{sq@y`6vZ-lVtumMKMdT%(~5vgGR&!ET1p?&jRM{^jQvyB6hgQngWY+E^GL zQT}<*6E1yEKR}P3Gn&pzWg*ow`&&W+Nd(e}U|b&CB@BTlDCtgq+d--y=77Ims7)pF ze^@*3aH{|K@7obVHf68O$Vm1MMUk16jAZY<*RhERnHd$?d&?$S*~i}GaIAx4-0$-p zeSiPm*M0r&zxt=^Tp#c8nvdt}T_IymbeYw>gVy-z^Gsq0nJfVdV^~1WWBl7`kg8|$ z@?=;Exfm)5w?PZ$HWm}p)5x2n=?o0bI;v=RoP)TZIkXiO zVsf!Ycl(o!jkA`yMlolLlZ?s-VtXG&ne$pkVzP)|H6Kqrn6+>9wK9*g>>RXOf$Y1p zHBEdtJk0Nll3{zV@Q1|wGAA*3U)mGOfWeeNl96|)`yn; z=dR}87JZA>jZ#8P=vm&aTnxgWS(wTrr)_402-S#Q?04*nW?FxW5w6%ctp)D@)8-vw z+YqqvzWxE`i>28aJN2DGzN#a*`*R8I|S1)q-Bt{IsGViO3nk}lMH z4M12++MTT&L_T+|JIj8YeH1TKHy5IfBP+~#pTtQcF*TS-a;n$K6ZsMsch>D&)u@Uh z6rCFGPiq^(6%s!8#<}Hez0$E#KlgF%E`@RGCaMj?DxI>GP8fko){dvo5h3DqlKempW7al#Z4=c-v`cz7Ztc zgP`QUD=O?waXn!%_Eba&JY-h^4`twYhdL^;P?D5;ew%GEaeiAB*O2-^xT32um25v2 zyh--7_DkVYhZnFLJB1%P`;dk~?$`Lnr$Ia{^G};T>B)|+SG}P>n#EZl+AK9UowK_Y zX;1EBot>r7hPXY$bjds76N*;78`RwqHaWHU=w7dUE4|^0S26Fz8|*%#~_tXySRiCAZNy-d+z)3{sXB?gDcbnrI*e&<0ia=-U%iO7G7 zYy-#*g&6k6zK7A;y}|Q$50df8NUNSS65}l7lXx7OZr9LPTr=}_#8#XhN1Vf6pSV`s zBK)q~n@Ek52TX@e$AMLh$7BaoUxVAWi%RXJi!Je%v*x+$SKkSLRZR|%M|&PtHYhy% z0ld{ak;#5ZI^ZovvN_)h9z-2vmxSXXG`8#O#>-a2X(PP0bDuXmPpFH!zC$!(`F>S9 zd9Cqer^xUN7Uu}0fNkBd|I)5J|0_p`N> zID)qhPqoXb^e*4pGq-|VSUwT04ibTWNpV7gsCYMMfgunlLy|n zD(Fa!Oh@RS-uev)CdCum3dA^r<Cr`)uAgwf+fwlR8*r(H{)8d5Feq-Pq1fX_EN9xitoUV~ zvk-%Kgtg?#o;{@lCJ#9WwhFW8@|`c0SMJ6Xsv=|D`v~`?O5sZ_Y%q$B7+NW%GqZTQ zd>zrL-{d7V?=XZ5Tn%U6JX(ztzGrnNV1TvTxYxpS_KLby_fn+n&NSt`*UmQ_;aW`{ z^fda|{X;I6V7}C5?UM&?n)Hmvf+WQAn|K&gv+d)vRnC)Zp}IpeF2lZa))+(q!kw=E zIS0f<-Uhg5d%kM!9hOfM-Nd&)f6`xlrv8%Q%P(xOmE#GB#?Cni1+Q=gPU=*RZQzsW zT9&{8$tFaLco!vboFgN1m5~pX1JmJL0!ndv%)th;f zFGgH_YCn~BN$A083@7Uz!fIyi2vd)&A#gR9tUtGRJ6hUKN0g!0aOdG!01sf7mGA+?g%GR=d$ZTn2fNl z)>K3LDk~Imm=zz(Jh;9%q}82mui#J(AeR_rv^V6;V!gZE@s04n&}%DyS742MtL!(8 z?OlV|wsE53cfQF!l6vNnujkJ{M`oVyuT;-7pk;(B@FX8}Y!4A0xva=?i|J>$ALc2{)$x@`^FUeZuhl1h4T?`sT|Jh`NDH)X6fKds+D!Jz8m)jk{_8 z`>y=U6voY=5k!YqePX}~o-J{Im}5@}9_j6Yjs9;@nHZD}{aZh^ZgEe(YL7)5IIoLc zHvC@8;G)6g(2C1>@ zO?Pnzl869uD+k*ZeQ3v1awsd4O%uFZk?q%S(W?xH?|5!Ps?Y{{#D2_IyPeMA9+S`* zeJHae_rZWgZ&({J6gq)Rm#`dz?2C~tze{niTDMWjZ)@o$DXeJmC7qpzS)xVdoZGB|k8(Tq>)fW zshJop1HGm0mv|nxLDPh~QoT@}-;O&1(#2H2xz)nKUm%7RJ>ngNc3j0?X8AMeIFXuD z@i!xe>xkxU#2hK&nWjwz!H$PW^j)zD6MF1pJfaL0CfNAQ-E>$(r^M<#nOooQbT-#z zS5F6c+(4%%DvJ&e6+Z}7_hN<3c|5{fGzx}ti=zwIbt%^B^WH}AMvAll^M(6#81)g} zr~E7sKdi@4q~p@K(_brlM=(|OIcjBPlbz(4z{*Bgc<2>p?SIfP^}yQBEo6ap5F>`A zZE$0r&b(sX3lo~q?PyE8`)Hg8%=Rzl|vRR+BdOMnA&4t}v)k z1zuzy$DZNxuD!d0CJAC}mH)o@rAjlMlqsfFG57E(3v2}!F7o%*6Qh%UbJ`)pZ|GN+ z0TU)~K5Wu@h~Y}jKyi!Z0S6iSwpGr)h6e-I=fc1M?#Bc>g$yS5ggL;fje&>VmPF`a zZ$M9wPc=Y6^lI@DMau@gzxW-;(zBrqu}KpHJSKWx+h@Ylc@w?sJBFkPhv%P49BQUa z@hgufURu|TID$Fn>oFqp_koN2;yU}O^YYP7@oV8}Gg9ZH5v7*|W#L_*MRI5NEPLyn z!M;58#a>9K?;VmWwtrVv5~H36mQUJq>u^tXSuX?)%sIcAd4v|&KtSw`##2cEK|2;? z_&}ybh4E|bdp04{hhtJK#hdr20vk51q;4YsCt209C^Hk>G$aQF8Os4(rv&#{BVK9q zN!nbzu%zccc)nYScJR9pAsazMb=?g|BArKoCgXSL!d#vsM7^f2bjLBIIgi&C97}In z180%3P}gR=N8Z1lD@_l6DzE1i9>T7H2aIEZ+@j_~VqM&)j)niuErW<7A3a6PsZtlx zbjKGAkC{JS*eNUpw{G&`gKHRZcBUH`@0St`&g`9!hwI`5Wlo$K!_0%#9$;|A~L%fe=vUFO@tadPo#7$17UK0h!7z#n%7kt;D&O_BwG9jQF ze<%Y%g0}m@N6+oL<$FK;6#Xs($5*_cHI@TaLqdGlqS zFhwBEY&;6Vi58INrc+*j#sc<&X(a#mGjR0f81OsMnMX}r}%9r;TzNz1|L?U(%G zH9&rRzI0oac9f*lQc$0d&Xrc5NiHqBqMNF*N zECZ%eZ$J+nh=(!JFZ;cYD7V zR0D3ep}H(f(zoGkOWl8_qL0_CwB>wzV(!-RUen=E3ROF`BFMqxf84>MFP0N2!vAu| zcI=Usp)?Z!q?+>3v3tb)Hbyqf5~OHTGO1SG^NlwU-1QPyr-pI>2bm z)MnM#O}ehWt3nPTiW&E0%7YLTEov88CBsecNi zgte(TMT0DYyJ0i;DdH^n?`R)v!DB_#k!gsY zI-LV9Oqgs1m1uF6sYCv+GL(Blj%cY(r4=C?{os6GFlj0nFpO@$a%-VEen#zD%i4sCRLObix1rk-cwzMlAoBC9u-c zI;f9XTZv8|W0O{2mPDsA;>AY!3sv;QRI)JeQ}^H8d&mH6RDcNuL*%h0+GjHAJHTJu zi$E@Zl?(jWc!Lw!Ywu_*w0tFXeHQ1rk>UQM>{Z7PKo@)ISF^ypZ~E^(KpFBA^0^JP zXX?l%3~LRaZ7cBh(6Q$3`KzsYxxZE_lSchd5Jnuhk9+j3OXBf4K(NLx)Can_MpZ3_ zvt=v*bZ9&9+$BWK(Q)7qD4wXM3p-37E%+SP*K)eXy6u8rZTyA+G86O6q|VAE>I944 zc*cB5(wTKiW1o>9z)x5JL_NY2QIxrC7y{#|8kV&P#HM)c)8f|@h_^55y#9g6XA$#@ z>xN`HcXOo&T8aXNUgxT37Q+ht^=Y{6OT{=k+c-PAoS%nD)f^Ip$4^{SG+$i4_}0qR z$mg;1t=7gruEqr99lswB?q}xQZFm0;#+-|(%syQe-|cO^zO+a31hfbYAl@gJj_t+e z9=#aQe9s(#{Q3)PK<3Yy{ZPeSWre|WW;e&P2ykhDTk~HjGQRwZ*aOVrp5Amo-;n>6 zGmIR!rDz7#feme*W@xj1Y~;yvOUWKcs+=HCY)ZzMvrQ{fjLTlba4eak0X@SS!^sjFJa2nv5Je^0hOnQK!;4eq} zuq@w7bnf7tBEJI(%fbgRsvh%reVtHZ+7~fL3xs!ysuf>%zPvO^Ww5iU9@plX!pKTg zmz<87aRMmBq|#P^JNwf0)!sD?*p19_5fi)&Vp$%okSA$^ZZlSepXp!?J0-tP1*j(< zuExt1)781&xI6bcPJ#x+JqHC!Qnet6Q#yO2p9C+Iy0nsmDB3@2J_Q_}P^{?s2b>wf zr$tItJnE8Xd5X3!MTK)oLV1~rM4gEyxFN+^Uyt3*Pq077gX)5C%VEZz;IQRpBZPS- z)6jjcnjgFz)9KZL^!q8gG39m}E_4Y#WcJ}QgqcQmQ#ijFbhX8{y*n8S?=@d-=G`3; z+^l5eo7Y(uWB=l>Fk{yuCA2LO(+|l1IWok7e)5Z1*B6$|L-gDM z-mbO)X*y!#>cias+k8DMI z(J&eYQ?zyF@n!RR5PH1qk9R|tH!}#^LyW0Q&9M~F8c1^@4;)H&fp>j+cIMa1nMEO5 zH^B=T_mMxb&IIVMhpK=HjQO`v3>{y~>2g7pj)9|sYQ10y-SZTNAZRwCf{y=;m8UwGQE9K}sW!Hq$v*^=HrTdy(v&^C0yYkepFH+I~dTnKfj^rj*&WEHEzZd)&AFL`K61+*QWi1dF!oV$0R^hNgi0@3Oiy<#(!6 zO{q)bPjFOsipVxO3stH|{LQIB)Rur=!c#~5^RM^}AT+|Jp|*zco1}B#U|R$e3J+(E z@c(*e(~~qPo@oSYwC?vbVAYV%{bVmvK}a+KP+hi$WVec|^Bvl9gG$&# zv*kfr*G>qDVzFa7)NZv)0is$7f|4npXWHdV!lw0hvoT_@x(?hxy7%a?T9Ah%Zq|BZ zOrpy+rLkvvd~dcfbYsmQfe1{$;jLT5xU`DvMNGYpvu$u2mbsA%hS853krpX2C*oPG z#E1wxyb3x3G|?+Edb=nJ@GlY>WqgsFpoMv2P@Fvo?uqSCY5h9Q;q1bdunO};(mF`% zqy3^-TfQ}J25qeVt{jRj>4w*D6{P=2erZQNr{vrJ6{kpK^f?uy_7Cu~NDOhmc6ctt zNyAk_L61oC)Pxx;kw@4?Nt@^ccvU1FC~~0?)`l~11FSN=VWOCK1bcM?zp)=_ozv|| zY{oq?-5mPGJpW)*T3niKo=7B{z+Y?{jAr)E zv_QIV)f4)PY6R(Yo`qd2^dqBIXL~IY+;`0ydV2akZqh zcJ}&V_`|K)S+Y}p$lXCpDWu-c_ImpbdO?9eDlMvWHN9qv%kGGm+CE$m*OuMjVMQh$ zgF?n^d)cfIhQzS-sxh_74drV(@8A51zi`&9l<2%R8E4n6{1$*?we{!Y`u>A8#o+dF zP90wn{tweAo`UBwHOzIx1A7&prO737iatha6GSQeu^LB081;s0N8gv}N_c-s+%k|g ziQq04`+B0JBkC)aGd^}BK%s$vV^@Q!mSMTF$;t`=zItOm_NSblTRSUx@(%zLpG_P* z?3Y#x>9nrUZ#WM4#VjFz(za?_ix(d_P>he!(;1< zu--ke9cl)T*yl0y78$BStr0ffg^ARx-P#HM> zqVC(@JU;H3mRW$O##L0nrK&C@MJFbgU3`A8vbD(6I6(Qaue*7u`U~Zm`K5VOxUt#b zz1)(uADqS7%h|ypwaSy;tJrr?#k`5Ax|mAG@^uzBanfEfX-zI>+(pWnC*>EW8JEU_ zI5t%Lko8aWyl#%l49#CcgMBnc=_Y#hl#h%$nH{D0S_PVYg>`Fo+HOm}_RT3qXX3u9 z=0?>gd==nETtRi}9bK&SU4%-isU~`bLyi~lo?2)RwhdXaqqK=BdC-*_Gbo~!Jq{Pj zEr=pTl$rkWW-uLb>k2jv_gI!?@S8=_UL9-k#+Fp)iL_ty1~ItxrPbx#<%hKi#Xhb!d| zu=>Ad2_PH49%D1qA2)Z6?50Dnw4j8ZT)kr>}b%)yv#&6 zDys+~H*5YXwyyfe{JfwHgn|bxDG;KOP|Uo z{m*lB=H6QNos*-x%ZYIBoP<{3KuWlX;Bh-#k%+W+Lm3VI@SDhmEOJ53q(3 z-UqnP(Jbjz;?%JXUo3ic5i+D4(`hrT;%QBcfq;O^OEP|O4yTI~?1S?C7$O8YE*Por zaaKW$a0vZ$9&rI3G?Dq$e$#j$p=P8wU8DtnKRQj#=en_$_8hk2r+sOAd+{uxpfGcD zB;U<^9AdupWxeOMC2o?Il-;a>qW{MF64~? z1*?WIzoFfft}NfNOJGzQ54g8x@lvy;#!k4bm~y||Kj^sZe*DfbIkS5}`()3L1qQzh ziLja0DG0Q00|K`eM&tvtb2)=N8U5b2JZ#KAVT?4EEOUHH9esezHAwVUku@I!Vm_^Y z>a_^-!N#@ko~lwWcK%(UKab05u)1Ew8elNSEqMixqqU(34{(>oiQswMYcgi^(y@#D zB`mao0TW*EG$+gN+$uPRvl46WBSreuD@~R1XBD)qJdM9AEI?mPWT&_?minQC3B~j0 z9|a9U%xY*y`xbw6QwFu$9dUK$kjOrI#|WM~u!j<3jM=5#Q{CKM|0=-o05nNzX}h)+ z{bZruB2``MX|iGZXH)SnaEXGCNZHk@COel~%$@aJPl_>4^aI`{zN{{24e?}UecWcuy_wh}77-ppV3e4K_uU4Pg^v^S4p$KaHyabg zcq7ak-JSKJV0i|jpWIyh6$bL+p9R4`7d=UyS)}&9@mko8nUdNR7XH!%(v#i0o9r{3 zz9}P?JECT$uZ?Y-A|6(lb^vnjmz`78p#KPt8Q$)Zpt53PMZ;GAu%?JqrhAw)_3>EL zRmn}?z~YMX1?q~z7r)rMW)q8T)9ql9dstPUtlDi;%z_qr^IXX!?8SlzoqA$uE!VQ` zN%4I?na70;L^LLd3%eOR4+_oA&L39o5j}ktmigAGS$x?WpBBl4w&911fI7#aVpbda z5XrY7U9XvvymZJ?>dDy*FPm zL0>_nkZ1PmU2O;`_r;0rR%yNl6?HiZWIVzlDsYG(~!Bfb4MM~v}-UEfUI8SIki_vzQhI6g3uu`cw_8HU~wSnMejC8 zj6ENAvB0y`-B@Lc9jyt?Np|O%!*5dW~+JC*%^y&OoE%MeggfS$G z!lXLRO=Hb8(LrR@R*7Md5hk;UBpkSM{&Q4+QcMVk!Wx7~ht1Dz~I2KgE(`oFOWxeP;Tm+w~k znCW!O?UQ}`2=h{vq!ru)`Mnj$+9B*xU`QFNBj31@V`}1%tvbs8*=0_@lfH5jo$11A zr;S3XDJmmm&u2r-&EyvSr9(>aC!`H0?W%j@Fdi?1>w!W18J@~#!?oA!7`n#nnv4dMgLKC z${cvdGi;M4Cirntn?9y)KE#5kk)i1Uy8f!kq%%kzU=F#P(vj&bOVfj8)= z)}k(aLxSRP-3P**SHG?GR5r9+t~kyuEH|*I+yI*7w}N!SGg{MSKSRtv)5donC*0By zrURCgVe|ORZQj#zXJ>*(itTl_91VL5UJ)(j2F;L?{89^dc^sSA{RAvkDG@RSXh#|y zsinT;W(1OUQf8`4WL9B1dteGmZbk>;bKE8Ed+sR$b&xxv;_VCfDQUerrKIh|+NfgMmQ`h zFM4}qXur{D#J&NC6=F~Jh-+hZ$%*XZ_V1qT=%3`hPJ>IcFVK60F&zuWKIij)Ek17F zDbZp)ykUyzMtovLs_JM%g4!-pQMi4M#|3dKD341g2>jcdKB7dmPj%Nr*qX*x1Ak~} z(53U7FMsC==?={Q!ly<(`520rRDE_l8uV-;kd^xdzr#~}1wvvO7N7M8SrPphTrR}1 zjF`e$W0qa~gYQJScpWibD%*40cXtM-(?cF(xy$KHKlrppk_hqEHP~|UA)bNbCA)Pt zi^&8Zsf)uy25Hl?y6}9SxUgWumFJHP&VMMw=XQ9Nxm!l(UXIl7WpAF?sACrx36u=-Q zQ%QP_9qCQmOS1(02avouSCa;=>;|bXwjk|1=2B9-qp9Rk!QFEmsnB-k-mae>btBpY&dj$GH)Ak zJzVK_o)bY;`e5G*oGIB^Lxp7DACplWMF!hMe^#)2^j%hcCE!9CpN202FrfJG7dp^S z`bEo z!iv3SN}80-UB}3C3QK%4-NU6xj>mRTv4u>N_I=THZ+}-Oik>_)f#+idXWx7J^>G+X&ZG#;W-7Z{!AW{4dGmMezMO!4r6OQ^7{b_O0naROp zM}Lr1iI|kFcXx@l47sd9Y;M|)Eh8xuS>OSP7i$Zkjt8H;c={)yI3flKg~THdpX~R? zH#D9|JvQ0h9-x#D?ry*FWV#5W>IumGK93wen_y^iA&z1$Zm$So!hw|Sk5mMheU$Ka zOU_-51jNbr0|~lE?bxt~RZs@3#2{-{0_)d^+5&rbERsEawc4n?d4DhsAl+b<#nvMd zPm@(8o=)4y&f}nuojJ-M8oqa+7SxE*KKDX|x5H2r&3Hh$eQuM_?o!KK!d2%5%PxE1 ziP273Mmlaxiad1ZQZ06>P3WE(Bg~MJ3f}O%zCsFcyYIer`wjweg`TQ$MBj z3yxcxs=kaxndhV3rL6C@R>10WruV=|ICA~G<`(KC*8CtJ-7cgm)p5McS1yS<9+cyAn%*>F(g6ziAJ>5HgF|3pgs z8}`91#n3InZv{M%v(8T+dX>XPNqO!O-o+=K7X2kim7^?`t1J`*fi?fUw~Iv@`kwiO^?Ng^y7=?Tm4yG| zY+T@gkQIh&Cdy*HB&_T9fCHs4rBl5=zOpLJm~f(}Pcpv%TLp1LSn(Uk5@VX$G(eSc zMWtO*dV&|df0y?NkYi6cmA&XbFg#uRh2}&(;GY;I))tre(bFNYh>(?BI;?G(gKqJl zvszM(E33ejr((4r&|`z&XYpenpJa(nkYz_q|@Jcg@0q1{-lvcofvd26+r4Km3j$ z#g89}$aK@xUJWJIP+v&wy=ElA)sxcN+VdKrT+C2A7>Yss%6Q@s;aY2~C`IEQ5oV8W ztLg{_c@i473Aqc1f058=#v6(K%q?N6X=LmDjKW5E8wVjuQkt@0Zdd=+O>McwTSpY; zXDjLMXe9|-IS(km-`(XP5(OX+KKT{1qgoo1rP7rR!48s`A?80x`7>UCh9c2Vhm+$x z&-{4!AYE}JML`^L<>G><m$f*-uHN4Rd;?q6WMr3ja1zjE~op-=;f;ma>nmVi=;8fubOz>$dN=sj%Tk zKdG61SYcJr?p%vsvGgrrtyegW7-5n4RM;w|bDmOF@PXs9y!Wjq0Wo2K?w|8HsmvPB zQskBzJejq(r!aJR&P1#9a~nM0@7&c+z+QqCU#;t(fXQYRG_J zBl~cv$1DrhmvJ%2K(kL4o^0i-!Y^&g6!WQ2{@J8%E z0(0UXa{Og{8F8uY>HJP#74@9*B+l?W$~)#Xt0D;^7_kZ9 z&6b?O2G#%gO<|guFcM}(K~|E3(cUnZ5ux(8LISC#{_5$%gT?jhX(vAWi|y-)Sr>Wj zNKP*KXx)kF9oZMDiE5Afw!1b*J`v8->aD%rr?ZZK>NSW%YhyCNN8yiS)tNQ~mMe!o z6F@H{LDGpIY-{HxAZu@rUiP9__A*7ux0){yFq0mjdfN9<*|;s`5!*q^Q|9AB3Ae9> zZIA5rgjSw;eLe6Xh?4$%{zb!oGOER7wKw6UQI3se@Rrzbll{v8gvLoIzibk1(>2ZT zd|zkQWrSl}#Q-nuvFa(Cj8v$#z>-xb(?U%uB)Vp)ZY5Tee}rV=*i_HP|I?7tLKekA z4fAsUPe_)1CFt^`0oBX2+cP$2A^`Cd8)3&2c?oW{x% zuV6BB&F+fvO`LJQQ~EcrmCt6vH7U;EY9o}Tp4fqlFVh}>4h`@oJIH1?AFCK$Zh=l0 zdfpRZBw{M!^W>k+L<+wrFb~|+Z?(J^wPRsAz*8IDf6Kp_#=w8nN?^sWtFfK6<;+H- zrL02ru&8J#;58@+!~?_{saVJQo^-N2u(YACeRQ*`4(?r&j=36vrt^e`2$6UZ*Gh?O zzApQo)c27iqacp{RrM}DNMn;IdG(<0so)#m?R7(YzP*^!qH7U^2`#8#(QAWFis6o@ zPRpJFTb~WZW+`;g(%12rW(I@Mk$#AV-7kdhOjyK?=jDpkk^AV?&5UiUTJx?4;iH;e zaXn?I1_$S{yY%PZE!l=I3-{KkEkBNE1nN!)U1_eI#!f-!YlCnuA?dY43QtwToEX)` zFlt>|D(WX0MCcmMuAb@T9KBf6I)?a#mN+#O-K}fy_&~82&3DQT>)K?k_~72t;RS0@ zON#i)!K+=#%)r+`z%CN%c=LT znF4S3Ff?P5wXx&{wB2mA>x0)vd2=58q>J=vRkc@9`a4}lh zBilt_sEkP>UqCA(z$=)+ksKJ-nt|+X}I5O;4bmQTs-{u2U z$=~nMEiKS>;?2+!GDu1@G{Q%Jb}@m6e3S@4&tI&T?>UobdtRV!zV{AkI>9FiwVPm9 zcpfAdBXI@eU$k+lNqcbF*MC8%B zV27Z&Ji+`4S8FT@f{F(c&imJmsovaoCNy1m8t(i=`qTjP^GCH~L3us7tuK<8aj!^F zjl=D|fM=ivG=ojm&2r4@@cwY~QyiQ2e5C**L{fOZ_O;V~2&vkew{ixI`yJnQ(avk@ zqQC^Li>5I*tleb$AawIEB)xbrp9}I5Jry)0by_$LJ8A)O1A8l9{Ck&*(wpxrh^DN) znLJ2$Q|YzoX{!=FCztte1oezibRrmMzhJwc5w|mG3_@TtfK+wZT7(+|%9icm|!xq2K={^N+ zp5B8Y3B*Uk5_|-uDtCE^Yfgfzr&Cm0G-U|c{pFQ0UPcOyp|-9Y2EnU%qaOMQ4#!&7 zT{Gobt7o@dxsu4}2R?>ADOb%1zjgQRz|Bc)1uSlOm{=x|YE0#iW~>JuwUNX07jI(o z#+R*%^Be_dzCA04-hufUd2O*)0;ou&-gxw(0$sJ=;+5@G*{e`9)(QAT7y6MMP=Q#K zcTrTH$0w&gS$t_w#!bH$NNmqz=QBErseZKN!F#kc+*+0WaCKy?$^2VnX2FuihPT$2 zEGA^501{*K_KZ{Aa+?^YJB`ESM2UpXUHJuGth(Km_=(ADYXgbz%{OOseFi#QjS6!T z-LTf=S|cqxMbHf1*Taq;4T@P`eBHE~I9QLF94c|jUUA@u>qasF=u}hqm8DX3oDBd%0fc+@SL3FD@`32>}lA=4k!PerbPXzFJ>#{G6yjK&H zioLm9Td9T;TOJgwW+x)+gXJZ_ zjjQObzxkOLWNJ75)RP)7p-%>L9R3Ddhdzov_#B5E*M9kpt6Is^CGNAm@6Jm%jahOk zcG$S31Y1ta<*t3>^WAyzDQ`A4=<92|xfMlH6RAkMfszW3{}{IKO!4V;;YD|JDeqS? zflE(LYIKhXg|H3CC5e&;f^Wcmaqod3K-h*MVefm+@uJxC$%FrfhP`!*UOVgSLU)X{ zI|OxSUvn4xm(@+iq#wS$t({}4>3WA!{|%!v2w7*|+vu+0;rrj`#f4_qe5~ErC*BXfJHZbWoZYktf0n;f7wPP>$NA4o=%XqF-n*QI`uBxX!a98) z&|PTpqM*IxC{Abm7(iwV<|gT2p=jV|gzSmOq_OCA*7(X1%U6e-p`vBd8^D#Q#&Pzaz z*uk0()x3>Q`uG9V@miDSis4Tq^J_3b#WA*n08f~$VP=71lYbE4GHc%kG$U(KXlNv( zv_uTLMtN3oR-_hg)QSRt^=9HaB~D7ww6gO4}_PN&Z}YnO}$;OTPJnJvE_j4~wUpFb2h9<~V+8 zasm)#a+HYkKsZ=adOP!c)61k#!NV<_f7X&};Afi9fJw01#|=hUf(1}mp#ry5pg{)}}$`dvhr8?htSq(685_}$`FrjpvLsLbcOh;v zfOA+Fhg_XucFc+<+#`!aeNh*4-t4B;CCU){}!JDxu^29*&L*Vbnk5dYRB1 z{pkA74C}HlJl~DHcg-jCt-oDyJXBTwr#_KgfI2)~{zl9&`_clc8gbzWB*kNo9QnT} zauN*-Ei`a$Fr@Q)7XhyuEJyQ#VTYaLBbhew-+B)0d(TYD!Qh~hdJsnxTW13eD&Hn& zV4Fhz!#@X98dVCG0-+zFO>f6=WELk9BsQF&hMA%0E1P z1xup`OR8J~NK#!gr2Fd@LQ;x6WlSY3Gr9|UYuuw#q^&bKEOiRNTNjq9fie|1D${;PlgQb0wFdE$X``LkA<$f5LDvqHHfBl#-lGY4yx>1FKbYO zk8k3=JOX6-g+Sa8n=0mBB6BTh2d!hG&mo(Bg9#e){vW_d%v2y#n0-AFJ>rHMlLTTf zeiQ+%mpFZbV}XTP%ahq%Jm_SEnG4!DTONL zx!w@#et<(yA>M+K&Rf|J-Pc~uH68SGtq0RzWK#o0v$N&=qpk-n07J9@!BT2V;?dUi z<(~hn%O)3iZsK!FFn!eM*dq3>215jr7v0S1kBJJv^*|Y^fg4U8%%x?jg_&MoJwfSI z?43W5-Mf-Wy)e6t$cZm5{fH8OuLELG7{r~nq_`m7sYrd~`t@v?vh__wUqL#6B2&3Q;fCVcjfNrsa>57`KO_%;)|m~ zDWO*6++HpV3S|BPU}ZbN_fl1YqKwky@hYrwPZ;z*3(=|HQ}DK&nm+J6SWV^tdS=pF zZL2*}SLO9UE1!OF{;N>ZyaoJBTAcsI%G1fi3u(F=cU|IH3uQtwz(-gBfIe1!4!}&hbC)&f9H^dHQV^$NBg`2|25 z!FmsZ3amA%-f-xDjqrc|5v7;g?kPyOqcL<=_e0Jur0x0cc?-8rS6V97N&?(Fy%{k- zE9vesBAO~>oh%BN-yfxZ$6SX1XTH~fnQ<|i(qg72wkyc@O1Jw#*yp%-FPgxPzxidR znFKqtE8r*jPft)h@*p5nDPM)vYy@E5x)12=EO*mg5C~NI?tv;ngdpLYvBD>*rnkiR zrN?3~dh+%e!|#|5`!PdZCk-t+2t}$k;QkydAfY5_g&xr+;&|XZ!C?yV5CI3;ZPub| zwjmC%ghTy)pj&Y|$@S;h17NRXaBZPWgJBLazvTIBu| z`+tz4-06VZdr%(U_<$naZDrh*S#Be2mrO%$GVLFi`p;c#DoLsEOt&Cm)@o9hF7_Ye z<-^pOuxas|+&9I70etF-JQBA~|C*7^UXUCfIVxuuns_;FGS4iD?ovgoA zC#Qmvkl|A9WNF><%|n34>iK)cdC)V8SdrgibL79bqjvG?0FAW1r3_ry+9w~*mnANb z#&AdjapzjnQH-pR-TvCA$@^ctMieNw)V<|rBkYT-0?1ol+E#06CVf3}e;WFAF%D2g z{7W%?B_N>MtC8w_+g!}mKfUp!%HD@q+WmO z?1c=h?aecxnYz}_xmAY#VP_wpg4VV>j-4dWmn08UhduxS9nX1&TM*v!_!`_{*4_{? z3SITa zlq=(np|L#i(q0G*@yHbgI(flV2|Azs_Vpx9wepR&H;6g+S}qm}V;@JcFZ%<=SEJ$} zx|=v(cnTRu`@k_=Hl`+~7X<}nHtOPKfWwEek^^Jt6pVM15Xiu5j{^ljrN`Hgu7ipA zt6$_BrG|uh0Eg?UdS@Re#QAh)#~1@(r+DCT)fbho{wNXQym4n2%ROzJB}2v876A0V z)F7dN=|+4)FS2InA@mrMfqaUI23V3F%iVSZ-4j;cLx*!o zcV2=*&a(|C9DoT-dn!ELdF8X26HYK=K!`$>cP04c1wNR#A;oo<8;d5tjxJbD7kT|w zTBd7&wBOgJxJ4&Eb$c&^7_pgN{s`ywvrUF`c4|BGqI@GS+n?gPEEfBswhqfaiJVUDd=V<%-sk%y}Lz zty_xS^q}lzAS2k>6<9l{kQzD_(SI+|kXm)}4&fG=F(V|`nGur4Y;nq(=bdE4J&~@_ zi>2&B1mFK@i6_gcur;g*Y2D!NqY)sX$|pt866#gQ-^(~{jT=R$EFT_Bx|PuH*X@R_ zT11pbm|wWt-;O*((c{1?dzv@w<;MpQ!aL@%Vv9|!yspi1xf}dPmDz>B6p~?_`4JD| zj%1FzGvyxAMs{L|>QY~xa~?BKaj&D7Y)@>(%QfOkLLVx7iFCb_mCwCcOLaD9G9 zV2NbTeKr%E^!p95q*c`dTO|2%*Xv>6*4w3i^@!!qd7?Yr%9%W*!`5U!1WNpJY$vp* z7Q`aYPt+l&iZfZ=ax{6N1ihyJ<)RSjH(2}4SsO&O^T*Lth^G_~ZwRD{GAauP=Sho) zxdF2yz5dgC>Ld?AG?N>cubaBHR`8mG>60>t{|h?wlx&KDzLEj_gN65=pgbhl_nQ*j z-B&8hkYo3;2WDrCut}Qb2h;s%^}fKdDjhsxg_*|%aqyJ6;)$k2leS*yMOv(6^C{Nv zN5|i*gr|;0a>afYuMm-5cUiV$QeiI%5-8m1dQd>l`%O~t?yROlZdy;pcHyjBXSmV3 zC)0_{UafuyiCL%bgw50wIb!SF;N20%WCP0_F~va<3pKmAB@6$DwX=+hLXG;pC=${l zAky8UsGu}bA|PTQAV`TwGfEFbcS}kQA}A^$DMQ!LASFF8Fi687In=;?#&eG6+_mmn z_rv>s@rybPPw&0||KE0MrrL$CmeRF|ry+EpQsr`LX$xsN&H{!?n>_6=u>Ga&HmxM% zJMIn054#K{omoq+^@KAN+ekP=H|zth^6*zb=hIZyqq;s#E!oBKmqhkRM#zz&Jv^qB zVEG8nXSJjA(-1Hq8jVei7Q?yw{i)u)m7;4`9_NW_Y`e?M>pm>|OSfQnw}cguYYBwQ zj&-7s-Gw(OSg1r>=7K8jSkf_1b$b*GkogAQZz57JOXKpl^%KIrPF2~Zc(YO23%lIJ z-+NbZ3?X%wrt1|%h8~YiKqU9rWtSC&y96NYL$M#;Ja@R1ZZQ-%T#Kj+Mc+H3YSJ9^ zBU|A`AXD~^d(+ESt?Zr@l^yhz+37HZ*|2wg$#j^YdfYw`{-E&&YX7YD~w))2-|H96VGRZ>}QIWa(;`5~4Ye+i~~k^u9~^ z$?{?4%LZZjkF~0R6w!b9>qf0$od-{?Z*(9T;pM_h&r2RkBei|zjbEbv-Cxre-Egz= zG3%^)nN`cDu$;(bdwQZDu)b= zoWnj3U(KXdeLXR@Xih3%xoAz=A|xu@s)|=b%^El1Zq#`w4rgcfUiz&sgINf`oDA8m zLE(F2qbEcI`tQN}j-B93Bn2sYdA*<22ppmMQ<}Dz#miz>L^kD? zWM#bHw@K6UG=hv@T^|nkogw`u>-eeXz>s+B_8e_&kq(X^%iZi!q0rEPM&{cF&-|^faz{+jNY| zu6Tb9kkMCzJw)=0{0;2Ohw|?&w#J5fffh1pz+lw-QQ6b3TMD2Gw*X&ZBI=;1QCzZ< zfx9ab-~wBl>w(5)e3x{g;n9&P%i=l7Nd^&bkQ={P+9{kjs?CvX@Pb*TG>fVClgDAS z_0CMI_`fnWfk!eJc>~52f=iDs&oFHjK%A~(209tcdRsET+1qYpPX_;V7jN;GvpIP# zF+kQhS!V65N%D%wQqh2R;2`mLK{QWHgl3Jdm1Oh+bj&jO1|c(MUs?t$Z5lta#W|c) zvXM$@xyCu|fgGu|U+-hLjvadN4c6ocu`mbaTPw$~$F-A16KlQKqpHboNgh4}FXy!a zVVs-ct}v^v)aWP^(M_DPVrDSaJ5s7k7wolG2C2}c)38UInP~|V^2HcqLST5z%xUSAqJZj{-@I&ps3rju( z;0LKVY>%~BM(lIQvw0VN_D>P4`V-B!OS+F>9{nJlXu(`{`&8F-v9+MlgHJ7Auagd& zQ+6S<7BsRhCmCz>Du|o3mDv4IaBemppqccA|J<0tHfxf=v{|^C$FHy(QK$@Q>fs;&k-2LfH=WSX2eVc}lQCNJ=>mNLlYQG;sFYc0h6!Q5H_-w2S-|}dr zykE+>SZ>uoiEoeSEV>fwjDFT~6K5%7fhb`0-vmSz>|{i(hY^XuYStG8j8Qv=r#*H3pI;OSk6%sq3j(HOb zjH$Yf!mj>Tb?XgK6cCQej>ZZaXFgj+qa0ngTd_m-4Y<~O7!viiZ{Ue!s|;QhC0z7# zyHZBI*1no-NC559%okyklEY@D>%&fu2{S>Eii)Z#>iMh&#bIkgMvy~f0HZJfs%yD5gR$?)e)>M~f1t#^5hDE`<2HHW^@qCce zXF8_jWkYGZDe5>-HLp;(?+^rmDdiuPD|K})1$JLs%q0&A|6csM@v2wbcf#(^Zv$J^2h;1;02fq++) zcudKie(a#Wy(Dd)W?EkJa2HJ%!%VOQ$EAOE=`W zv7p6=BLPHHo8aDU1Bv-YCUU08h7?aE>TkLhQBtMEPq}KtiZeW_iFc#VYxu?X^V`GN zkjEw^pB=lFdG+Tas^1A}=JUsOPz88D=J$Qi5Oqn3pT$nW7Y&SyWs#3%cC%aLw$FW9kyEMK-#A%S&Lon$*%SQU3H41oGIjcR%LO!$y$CuMR*OfTozciwNv);SMVz zlieGJsAPpRH@?o1djR8pun$TYZf$dsLZFGc@smH9M*H!?&8 zp&R~f9hdihEZ~Iv7~o`?B!x(tk6kEjzZ)<19MU{#L?>ayflQSwgEtAYu1AVqbqrmB z2h%>%Ml_5Ml;0eAwHKZI%+or7F^>cl+lHCiNh5Rstj4TX(%#qgE@G?&f%MZO6onak z3(-dc(m`UqGOpIY$=b+D`4v;4|3r0HxM^Sm+ zU=#J6m4}&mgfdWHQkQC6ZMn8wii}xXZjRzG)G;2DU1#^}+o3OE>(A}tp=Vq!3_{fD z)MX>42J%{2(JTuYMKG?r870{kOeBo%zZt?zX$&|E$GdAYt}f<{2j#DSoKQPM zU-tN=jIo#Lja5UPY!eiS!qYH_joAeQJ^t-4^|m1vU$GW7su73Sew{4{&B<<&FB<6;0sWDb?@d3F^dU`cpQ(}ZUyo3 zrH(fpH)^~8BVsy#+CvO$mNoD?&M03nn})o&aLL2t0{wX$pS5F83AhpOHM9LNj}4D{ zJIi(b4PA0Cdg%5kq)Pe^2A`VBbL9S$eC~<|KgmZwQRNh#=i+=nn?)(d(6t+!e=)R^ z=vA)qsihAWrT?B;nK1lIPk`z|UV-|ftSBPu|IjW`y6X~fEo z;5wW*e0$OK-_Wa*fGMJRa_jCR97d4EOqJu}uayweCDmJA9&rfqCt)iIX_~_3_m@kn z!~@zt(a+;I^NWfeN$^BiS{zmO1y>!z`U|Uu!#DQUyt_76KFMATY*jY!Jowd2qds>^ z{#R5w<22833ti6JLy(P`P{myp@`Z9XnzhVRlqfk|P_X7E26~ZP9qK}m_ZABm(44cs zu3u(rIMzO#c0nbvRaf(AGBQ5uRqawya@5BF2hMjsgSPr(u@_LU%v==3;r&m(iSEo0 z?4Oge9&k245SIJo*7Ro*qKKpA0ZliW7$GrpngO|FMVoLZjb6Ok688o%q7t)TBJVBlz#xMJvWC|cI} z`B1YTG(FVOOK6VlMlnImD@*1xxe4)`_UB@1^FsMov<$y8C*B0GJ4Hh|6Y#pFX*0sS$D$h)_zBDns2DuYm@YF?NC&_Vq-={#g{& z+h5Ice3+|A!w75j5!D!xn1w%;BZ9gI)XP=Fc}(+muB562eQIEJH6YlO7#cA8kd$Eu zZqq%ZCq_`;nr)wt zk?bVcSm*p;G+spBw&kq9sG_EqyK?1sgiK2K-B_La{b<_;ai~5HK3=~!Bo#-Nvf1-_ zZTFUy2MM-ej+hLt`+9ifPMxt#+?>#6r^?iUe0G6O>f%;nqrLcao3vOUCw3!p*^|<8 zx132(dDv}G?BV#`0D@+@Nl*l`UPTiyG)I;t$ugn_u1tlutZ@f4rG8nL5MLV|uNo26Azu$$$ufv7PQXzXj!XkGk%Nj_Z zI%ltnp3Yj%4Ry_7Nzo9@5_{}+**(*E^AeiMy)8~}x527zg5TJECP-s!6!=!6C5M@c zmbO7yvs!KWl?}Gtulv+bL&s$C=VV~S6%f%Zv`lEUhse4O?v`y@dlW5Icb-+lhL?ik zg5lH`_N)N>T~zk>tK{>8pLuJ38+X)buUYL5x4}@4@1?fD(h+TLEUm*%iOv7c%OeVc z+kgaL%h^@@(?sJCoQ4HNndn+UbwfC?TYdP6e{_km>8d-jfi5k-@d2|6SNpBl2Jd4zO*EjtA=8?KlW-BewMPm55y z`6fTDclvoHkE$bB7m&(y>o07o`ImTAK$E!fY2b} z!Oy^knIurB|J9cYy=UtXp-0M$>(xA=k=u?1hlI5cfbiU??Omim;m)tWt6Kq7sM7Mc zFd)vk-MlVf2+!w}gb_3hX!F{(9_-@$rYx(-q<-WO(~kH{9k4SN?=0|M@~ zYcD-nSeq^jhC>M zmu;u9BqLS8fp~9m?-Kfy22pRvzbhdj0_a)T+1xI97$fEM&--`x(NK6fF}=~LkkCLS z=OMQqIE>oqk)~U#i^hcPr2jklh@a3@2jAzRq*8U&S;Z?OQvkeBh~>hlyn1j11T6CX zQObgHEhG^d`_&`GQh{;W;W`2=H*8V~>37JANZ!QRk>x?LYG@tW`_NkVJx}N@spxf9 zV!fK4-$sW=_e`W|y&w{&dr8gvn9#K!9X#IqcWR9FhSqEilMVzAJZv{7ivddD-!pZs z>o9UicLR4z{_1>P#KvnaVq`$IxT>u8k<=YWVz=H5cUGL6*T4_{Bp;6{?Xuj$)tuKC z6%lzf(*vSHh0oLvFVKKH0%Mss?iiVQ(1T$QroN&U&{6|*MG20$+#ET;06@LucElUG zj#;|qSA+2FdY=vt!P5NAOO+1Se&%q|G&0I-Hjg+HR9{;O+MJh>e%qA)UME)x*U844 z0kOtOYft@Og4Q&)P}=>BPC*{|utW2Na_R?HLxYC$R^HzxO}J^!8BS%ram*KVUqvm> z>)AThX->{7pI#O40~6VVuQ~_p$5TYXxAD&Nj74Ac(2JpDQv8LHt_ycBG?qXBhcL|2 zZwLpFG_{uf5n0<8?4_s3N0w1A>R8<(;+?ZOg9wO-tBIFUz~j`9jhw?wzQwJ6Ei&!C z`9yk~Z{H4Xdulrsz+X?sOCx@z}qELoB1P$Fk4 zwYvTaf!9>`eAjTScT^jc`UX|5xMzzH-yYJTTf6ft<<3Ht{A;vW8C4Nn$1~F`i{^=S z9q#-{(Q@lT6=mYI;g@QUgYSv7Z7Tfy8(#ZdY@slh-nqQRnst6Xy-ND84$QiY`T9R3 zZ0cUl_7_${AkljbS^_-R>)7*tpsiEBcm`s3S-_jopR>()lN~0hSDJ(8qCQS23!!X3 zM+f&lSbAAg9giq%yi3Q*ReuNL4J1R79B%nhj}r>}jRu4}X?wMq|ICsvhhGg6f66Pp zgH$|#)XxNfQFfOB?tHbfOH+lD<#frHme4eEo@0ZaP&%GSTBqI7%jKfU6}ki{zpJiC zy$&5X)|ny^lvnyds?G#OpeDYUZtx4!;nA~@JO&+oyWc4VoS&Vm^n3MAR${Sy?(G#b z{}ji%{qb<&+d4s}S8oqrWL9M1df9zmQ`9V&k~z+hk*jzWdILyKf4F0{mrf!nM|%LZ zAJwJ3(4gFI&y#bXUpDo87tMyYW8e=7f;57zq;T!^OHT;1*&x?C=!ZFewDWA7ZliyS z6Ad&5mGQ{DZGC@sURwAqP z_B3n)WyF13VAE#c8`Olw4& z4osVMX|WNm7uRA|eJqi`8rzZInB_d3&65NL4#;wpyr$#I3j+GUEfz*I*39xrlgr1R zyw7n*S+QP#xRzPQHJ@45y=Z7m)HA{=Kz@d1sXvnnU;>s+Ll=_B9e7xfhWoDX{thblof<~|EbEjo zs~>l0WD`XXr47*cmAO8iN~Y10D{w6@_Wok5AUX;CO}Tu7Al--#vx-ICB(+8hL1Oma zUh$u~JzQomMWv->_MTjwcJ&HINW|2fg5Ib^xVNE*2t+CZ)Lp_nTU>vuA_2|ww@ip1 zEp@#LCv+|h${}2%$M{>Z86>bJL;_?>e}P&*<(h3!ibd_vU()RqIsLcFQ1yqD<$uLp zynrID{C;1-fEP<~Dv|rzZ|36*#f!ypK++{*ASU{V5_}+KY&iZo{!#vIeOB?mq z_0ISk_zDi|5VWOEe~H8b@~F@LXLJC_l1>_E=m4Q8mVk{f1dwIwPSb-M0JCl1 znr&0BukbjJE~zc#FA?6?U623LFekZUCosqR3z?2JpcTO6oP5kj@8!?AtMO4B3f}Cn z+wzQm6kz{czteKWNrpB;*Py$0hZaCr!6W+TBqGS1i3nn&-qag;8$gXnEZ9^d;E%-D zJjwOq{?CNa5(IolfCr14ZOZ%4e_tdaJn_KLG~WY|*1u7x8m|98%IAl?^de7X>;HVv zDMIY{NBbuDzoC5tJZD1-(BSq)Nbd=NU*i$KY;OGqwNj^ce-I?lW4Sadxq!?sSmj!c z=&+7E=ynY7vIH{tj7!M*Kay7>g7Vr2uoGDVdt*+8K@~377*hYz+Sr$*V1l=zYrkuA($YGzKfC1pSu6A!9kJO z7rgM`jJS?-Du9d2G9?5=o0PN>>T$O&5El3yXCWDay7T@WMxKzJhm?g0ml*$2b^brr z0d)VS?H$f=cEC-CD$aZu_>Jkku9%|B1$G&QU*hdE1R_1!;=_y zn4ER+RVlnGNsHrr-timdsIgb?uZCzzzWDXdOX6+#pEN4xX$`_l$plO*yma>;#US&w z7m?sTa2mLln z%|WzSNjrCXu#*b#fAK9+LY{ zhx648QUtH+(6MdDoj6Kivks;EC1l*UG-zob#d)dl1^5$_$1$AYREkZ{<|V&NOM8Kr zOoW3|luVO^PQEFVVKl=Be@^G&;9$IG)x!5$JbaXib}drt0c%%)Ab0&^yo;QmMhSk2T{p~EIX0bOv40J5^u)u%!y;ha}M zm9e6OKMeu?L94dZq_YFS>c0XoaKjgCKvH-j6-a<}ri?(TK!XR0NrEbo(0IHOR4RJ# zfNubqu&*$FGB#{;14oeNO?qwhV1ZuA1+1!jcj^c3ic|vkM-QQ|g9A9J9on~mdeW5u z=O*9)A+b1T%Wx`NC5m&QO38Kb#I(Z1t`B5lzgl25-p9a1^}IScO32mKjZK$%qpZHJDeYRWSbsSuy>{NOWFmKPAuHb8Z9GQ* zSq%#TLS7BLjGg{X>v5WwnWqE@nkY7OO)-J^?f~i;&IHaaE6Ry*2UZj4=UlRE4Tm9a z?CXoYFzjnAXvyP*QV{c)S^|xkLMAMejZq++N%&rfO+N&?rPIZ1DITyaM+yK01v?YM zL7fb^#cSC56K|=O3lDR$rKk4mkLJWDfw+GTRLypsGS_KAKX`q?TRdK*5(l`@Hn$Jtvp;`(e!iG+aS{jm z($b#3ZA#4fiy&9sx0z9hvjt3=XC^1I*18j5a+=ZL8a9%djl5=a%u_p0UY5!c_`{gs zVm>tdb}<6D5I(eKs*qjqYXH?@8+c#gzQB$BEftr+d=8M*x#VNn4s^f|w$7k(SQ9$p zSB)pb58N>|mtznPtkAfs`7r+~FFC(H392PoI^AS``HeH3<5?l>L4Jc~HYK zLET?p`C2c3&4HB=C6~`@L{~vQHx;7eJdQ9C;={5Y2FcN0;~n>ZTy-=cHP)e8aTxza zpnMvqeb|wPVZ*LeYp(&3wtZUC!jz7Yfb{^W$Ig$Oq5YY!sY_*S zXvDiv*)6YY;>F1G<)C<-H!qNLuG3(QjOd@a4Dk1rGXFZa&obl~aR`)knT@x5eT=Gw za?=OEZh<&_`TP-cy&Y&@7kAMq9w;4EUCtjOA3o53$D{wt<8)&I0uzn}k3br|E&WX% z{mEy*Ju>sw2VjU{<{2v;3OFF-me*&1zv>ifJV0}sGvkK7dJo-e79VaTaC4zx`>r(I z+BoAO1=g8lApI}UC!&&|lcuJaDRak6*#ipM(zFwn^eYy~k6*)Y+99HcCA&~K-PZH7qoD8_<LB@3-3Xa$QpP;_qdy*YtAtMizuOHYMV=L6u?Q zMXL43qJ!W%B$7VR0aT``bM=L{rqmgMJH6G8 z@GcS6AI?RmY`nJKpM&KNCU3x1^oe5m@&%_eZjBhdwlt&ZZWE%?n`a95lv%uQ>i7R^Nb`mk@=jjcWybaf63X z`lBYKabrsv&;^C>1wM7J#cS{yaX#KB@fK|4f0ozncVxt`o}w%s_k)=G!jcT` zSwM7PpCa&b8%TPWA$NdhiI+a#CVr?>=rbo`^4W}BoxRvpDP*o>4qQ9X)?{1#z=<2) z#ho!?SVIHU;fE;=!{ybElbFJaD+?G zi{;(;&bcCF**y`gSO22ExV*~EEm39m);O37TZ=Z&Yjt;ya-1iZDYk{gJa(>s&rMs9 zSuYLctCfrJZW615-HhI0qcDsg#Z1xLFdDTxmhKjHcMUx3GO4cxHHJgyE!na!N>koQ z`pz5cjD@XR9GaeI;hDtI;v?n}P`#Y_#Mu}Hc@Mo=9Bdp@m+aE@Yo{=Au?eS_>Z4Hexe*l(91{aHoEa zsR4GJbFiIhw6+%dm}6@4X%7qb$B~M+Js>sGB<<9c9s{=oU*2timGBv=QPSS~X1QvM zF55l}HoZI9LbcRNjN?N`JH03O9R;6cn{3c8l#6?BMBG^SFI4k!o^%=E!{uwfqyOhX zx&6+WcyIpuR>+)=N!SL+0g7_X)qY*y_~6e>!IQI@L}3{ zF_n1d#WT_jn$P6QWy6sLL!~caWG?NVSo4zWuE`G>A!rR~LRM~YwKr7#UF5FNriWlQ z$HrkZFP_MO$3n;-5hLL5@dWr0Q#$Yi$l6*`_m9=ET-@E@hwE?c7Vw1hsDPz5Jr2G9 z-7U_xCccyx{sX7CsgVHoT`-(Kh~-_t%pH`^a4*c|02igX+$g{Lt+j0D>SjhQc;z&z z^2cxX93R0cxn57DL0D0*xMJZ@@8EvoK&&O^+Uqo|=2VhJ=JYS(^h#PYo$^f2p_#Z<|qBuiRm)uq38UA{^E zo7eAir^=t|C3Ii$vQK0GmG-TvCok8vlpA8ZBih#~T%j4Q;7j5TJ$!pK54$k?h95g?RW;$}c~sNpvbg3H{#!>4UHHV6XZ&8p5w{n54c%gARzd<@5a` zzP?d_ib2pfQI3Lan3YVzJL>mb^eGThSa|Nn*~4GOrbyX(pm5REzpTibL^XaRiLQ0& z049p5^EoFQw@ysdqZqIB(ZGb}EF|Xfmr15P7m^x}FzEVMsm~8f{jRZSPMl&{skp~G z6&PpPvIfRmK0_xL36nM^{K2*mtSW+$7cmC6oxMQ>zeMTmDN8=K;Zup;J5i3lqT&8~?@)A*UGq7JF{Y`nAE1K)*wS_mLn z;Yn(E^}VfCQ@oZJ48L@aOro#issd|$FTdJ5+~DJTtJxz)JwPvOuZW5c{N{Q4oHO^d znMdi5ij3{FAK0M$V$bqsQR4;gkvE5K4~O4}MFNyy6yHK^8P^h?e~?^Wa+<$}-t2LN zSsFXi>ce_4Li|#NUN3hJbztpK&_enG!$}QetGJ4p=v7GXPcF;(r>7*SjJ9IQGQI(j z6(PN+E$8WRh=x%RsoP#Gy<+VOLhljnq?VLWJ06$$9uXCJW7xII1* z#$1{yX7%b4uof-Aw{C2*T7)vj|3tLf@J-U{N*RT7yz>Q4yO32Dp}z*r<}yBrvQ+{QnLpcT%thq;-bkd<~@USX|n zDQa}A4~0rrlr}?=~hTu_iWy6J|gpodF!C7Xzbk89LxtUYVLl=oD|ocGVX zaQ`(g*%`Qj;QY&1Z&OCv71?=I9JFXh%i(frK^^j`D$NMcA4Rkl#9PsI$-{jH6Zeum zC9G4D-XdaV+uHQ|U8};g(sqGld2naIW1Z~6n7pb2jY0X!M~)eTX=55C4jj><++~}h z1Z_<%R9mmQSGS6(U6g~zv~V+KX0tPg8G>Gsk>hV(tJ@GqD`_oT7JLuLuRvVfc?(Vw z$cDv6rQ@|MnqqeZ`PHQl`9F1Y;D#wA8_64ePNj&?gdrPy=CeBh?#IbU-y-$B^ZZIGO=@s6&iFQ{d?~O8UuB9e{EY**4pM>&6w+n|i z{j(Ry{Nf0skDF+dUivee)L5LQJsAY%|z;)Hg;)**|8)jVPw$Vx{YQlDaXp|g z%}uFBw|WVIH`k7FaW`}-CF+k!y9H<74GWVBiXLpUHUWmepD9MbXou0%2BtMwuEk>= zpLP}YIV0d4Th{xjZ$OCZM-m0#oX7_^lU^X@pj-OhtM>g`i)o_ldnrG~@;->))e);# z!n1)bL8(4$vw`lk`jO2-zt1YZk$12MF^rC&HIl#dhW!M*o=ozUQcut)c#~IY_d=nO z;}|ce^5#GU`TF3VXSIdi=AD116{~ze^HjP&Xd6x=ae;sE%t`7E6*v$_@KTayl$jPqn)*PiS!( zHVA&dw%ai#$~MD|P0Jo6Uz!jtFIN@#>8z`w*6%rwhv=ygnLP+8SUb&9Hb+DLyt-x+p`c=104*Gbv!et3k-hZ#k z8Z<#FGfr`q<2bEau9KgvTC<)y^fApl5GShl6;Vm(MXVoD(7k2=s9CnDlV2{UlZ!2#xq>Ug(_RU>5H$IBE%0e9B_fMG3d2~p#%ALwL7X!d-p=6Vgz8ExAt^v&CxzL6!spbz@=v4_FD|(ADOGQ0(7reDa8@498GOnn z!__=nhA-s7VL#@p(lUc5PNbPvdIc2+KxYy&!A(R5W!{u^Q5eZN3$JaLy+ldF7zgR; z^aP9?2&ho3I{3YSq*1OR;5u*<$m)E`961aIqxAs)!B3arjlhm~ja-`^yv9cdIJ->bopN@o#*g zOi0mY@7>(u=!MZMj5|r7V z%qH30eNw;c_y;|Qlf9m5`goZ_$5qC0jlQYoDN6+)XdrZ8%MY=jqmIi%t*ffWsJ-XF zFyaQ!c@4L7f{TWU^R>w?{0!RT+s1;d=-t9Gzlvz*Dem5T875}4vc(zc6*8kQv!hVJ z#KG}iHyOC8i;d%)@BsJZ1Yad<%gLR}Xb0KQ#Ux|bFJs=1Gq5c1!AsvAudtQKf_C!TrO%j9c^`PBWde< zb!u4lb-7(EE)G}erafCjaj~MAS_T1roJ`Klewy{il;wdz4i6?huI{f%ucwSdW)GX_ zCkj8gc}`NDKrsTu8E#A$zPnp(VsmJUJ*{9ocyL-cd1-Qa@n~J^x&A#K?oe1c|H`Y2 z6OM8#^GI@+GIWT#F-(?RV**_;W_qD&OV%Z9%les2E^3SAhlRq`wWBXV_hog?)7V^H z>AbNsGwoNm;R}#caM;6c z4(~JmbQgdIq&Z;)=9cRclR;h54Y23=A#0|TB zW07NU0mq#4CN>@fEII#DcL?;T+W@}iJr+VDgvpr7Z)oH2@p_<#I?J`zndma9qxIvW z)aVTn=rBwNEF)fcW_Npd9COm+;hW}ko|hJ+KD9*_2ZQv*g{jvM-uTphqbmGT2(sOm z&`s7e`*SI!3XX~cKQQcWGJD7hg#KdbEw7|^D~vHF(5V54_0wIb>0GMHU(qeNJr_9$ zZFWzpQD`4hocht`B=Ue(BvaJZTqhin9W5<kn`>o*Rp4AvC{Bu*owz!@tbc}wAX`pH+l^o5tM{HQKnjn=!>}tRC!Vj?2ql)O zCswbp_@|_{RlM(-1H7MRicYZ zm7T9Ie(qif8b&9+@IEyutTA5>IvuFWq9^4eASnb8LItR@SS3w5dM*GIM4406tAXBf zdL3t8G;#hT(;ed^=Lz7GW1CY z(Wc}lwd=56k28KBJ1FV7e-$)H#i7r1?e};ob10ekxnueXRlJ#-$8F2E_4qiw7P^5N zY-8?Yn}KB%4^vhy#5=%KmQ@UErO;aG<5Tml+DhLlb-eNk&9WFv9kg}@5!*q?TV6cb zjOsmiT`wIC${`(8`dm$J%1JG9A`fc1YI!d&EDe?Fk7Ungf(%y%+OgHcTp?_Ia^dE> z>o5$^MqPl|)k(hvJGmLSlCLsmlUERj_)mtUQ`K}-A2plZo4-_R8>?mOnIA1mtrw0j zD(tNa5yc)#Z9OWNndu#NBX7jnmR8}KOY$;eprR3=dEn1fj!{xgDUm#ZJLhdBy>v?} zokep>r8ij|Wtno%27tb}H}8!PLF~)~IdD6D<)y=gF~&Y>)xSnMmVLpL9~i>Ch6_H3 zRW0rmubFg>pWIsuP21&n?xcwc;HTb%0FZc1M0tZO#RwFCKxo4l1dn<)R!o{Rt_}WNKXeWLEpO_L8>EAY?7Qr-twU3JC2X zy=^Hc4QpuxKEih&X8~Muu2hng{ywIcv1?+qy&qVF+X=bz4)RqC)3Qyc>;*_92Y2p7>_VKeLGx3eZjm+<0e@4)xFZ&QU9%cY884qJ9#*YQvg2^)IAxtZ&S= zjqF&c=z}W0vaFt@shG8Yxhbk`r#iYeAyJyL1pc6|#{P~W@rq}uf3tX*!55fU`G$|( zQpe-`?`iwixs$VUk2aUBErjH$t#`Z@V2m4@Y=?-p-s>XgehsaWx4om;OW37HXVL1P zjLBpyyawf4SN&P~bZy4>f23yP#>-AorwgfP~`dR1|-jW1U9{nzFYM&`-{WJeFM=eFCsHFCeS4 zO>Rqo6BCzjw1X_$N|P4I%Mq;y`Mg^%N~EbHcENK)CM*e;@x6{?Rm#ut*wK3{Z3gO} z7^q)qfnT3w;=`E^(ZN&&nmYNtym-9Cuc%$oSmz0J1U;gFxc$~1t~GUx7=5xz-^5sU6f?RX*#s(TJPwNhj#lmdcrlQm`GHrCGZy<{i2K4~At^P_!bh#AsJm>=q z?Rh$9wyf9(SY?-P8rP^R*(IhwuDeNDU(2to+g4*)Ei{_07iW z*j$^v!oJ!ff6BZgFBihT0m)maPPU?B9UqEX*{0FInxnTyMP^TmYWyp)Wjj6co{p7^ydFi~H{#ZRS_TH^Y6N8ldRKdu6~a z?)qpLC@CcJc|*Gfml26Aa!b;UNyTcSstM)bGm||&MKhn7An>9G>4Eo=-s(l&pvz zYE$q^Jy%bSwYcp+w@P`*JS>d9t77OYu0Dk~m*XXaR2@kcT?FpU1Z4c}OQh=kbjLgk z5;gwf2|qn+1fxzU7Y6aD24i@pJhx_|IEfb`YKV<g| zyn$o-WjAQ>OFeQ+6nE_j173Vcz%3y!U8kr|x0uYm%ZR@xT>yc49`Xx-#WO>SxPuIUB@TTPmp*XFR74?Y@kJ{j`tqUa}T& zxu32^d@v-x1`oM6Yx_3@!KXsle z_6c3SXUWMaCwAuXoWb?e1B37B#3U8Ub&B1ED_+rl3WtXy#MONg^8OS8?%NXz0mrkE ziDW}+Up3ZLQBp)|jWDHm!e@|mzMs<)f_=BkuD?u#_>tYwp&$mHk00`UzF$Ns)ZYnv z3S;V=hIE7fOsg-g696+Aprl6F>2N*8{fV1&gfHdA>k=YC7Yk1cE8M(%j%dkR_fArL z%y=%VQKD@z*&|gQRTNX6RHyczXEg0;0#gI$ZEYJct>aCbri;XLe!J_^`6!Xd+{MuO z`S2r8X^9D!oT0m((xYNuc9~aqb(iGTet)B9m;Tw+go6`9Kpq}ORs9sm)qsW~98X93 zANngkCJBWRJ(Lon0^h~DPk8n;_;QS7FUSo(!|tAXY`_?;_;~vf5;pTCoAS>YOvDw7 z#yl&0m1-LkdN@D2cz2)d5olm9A6KQE0*0nb<%tJV5Ev^#-mVVywTIM&o#pz(U*6|} zjLZOwjEBVR+E}v>-;qmw`X-4*gRW;v^1i=p#rsaU3to2`|-`nhNjSm>>%8sItxf zQ(*sHL%M$xe07SDN6i05oVPja^H}RyOXgK+UvP3hxyAfW@RK7LCk=Ka<$3yCwlDqf%86tTw-4OgCFWC|9%z&!SIg<-B<*M>%unw7v?T) zQ1X9aT>>RB9)}*8FVl9ugM`tWUO+Jsf00mJrFdJVVGS07W`G_uQz%#nidQt-@2)*O z{i(CW>92)A@r2D%@ZW{tcZ9?Pdizu0y-DEgR#Zg7NCeLFiIa9 zzGzXXfnmGy?yObruF%E%mQT)|e%$|%4)@_xjKsgwD;(`Biq*V-iVjEEoiboAoIyqt z=_3796g{P5d*0vvKpPAHc#65`VR^6bjnMG;u)o&$i!G7+|JiqPguE~K<$54Y5Hu@` zu@Hyf7l2Dvf8Zua4>n86qVW2A@Ij10&u{I2f^{}Nu4Ih(>w}6tvr&KY$3$ENZ8Eg7 z)4EU*;y7@Ys;`OZC-mDpr9u3fG6j6 zBpILhR#Zr~C(K4vZ)vFkJkYZ1Akp&rf)&qDFSPO$Yg-y5O z*mo&MG z7;FP5fRa)kd8U$gg_3yz5Dp3y50)?2{JM<;ZCm(V{8=Y}A_12KK3AB`7}+8o%n`)3|g#&H*$`cOr;% z9&To8w3mLD7D2_k8H@fO*4{d*stnnxLf0q=&Ix|E@ue8`~qCE(5Vl=n+YdSd{VB%Hsbhtii2Q{(yoJx2Z34faqc+$GxVx(TQ~tpD*#^F&J3{akG-p^dTvO zTJ#(4@q1*6VUU;TBr~LseAT7S*W&-RCw!4WHib{zm6CTFLo&&~*b@VQQdRRKCsM_& z@IHjiU6q3QhwpRQl0;+p>=w+NruT~`NH|w^qqo|s&9%=jL*`~`QgBFUeEi^FD{-}EYM>AlZck0_z6d>ae2fyMXTBCH;>W?UEH&yVfhd$O?l2pq zWxxW^IEY(g+GaPJ3*4-kpln6Dzf~sA_iIahfRLO5=cH2wokmU!9Y=cUNb@~T!fOXe zZEFz_RVz)@iWCN05TxNjmyzUvUX6N%OkfJm?tigFD%ng&!jAEDP%y{3BhzUmA~PO? z*l_*F$DA*1m4;ze-cJPhlGvX=!HEkvdzJ0tW>iF;+{$ZMey1-AjT`G&;tt?l^nj#W z6Fag|%q7#GG4y;Gx|OF3NxzLFQS%#N9R+B6H3qzluKpALo0P(m7pLlStvG4!D37hF3Gvp&TYW$Yzr@w}wvs6KMRI#hT z3*4DL_m(Q+a9%I48jncfIii%Mse{(sYF3Y-7aY2Z_-fcqg4uXqYFZtxDktAJ#>N== z5W<3#6pH$ydNRKB$saE9x)Hl6XB!N~3h>x1;eYSLaY-ygVpo^E? zL4MG}>NOdpk-4BpfA;c>vOZ~V{1-1IG#Cpqfw&C0T5jF6*6(P-iZ72&6Cbbu;uk9~<_r5{2D_b`Z# zoI|?9O!sJ#y(Qr${MXh4MfCiuq?4Hyqid2?@G*sNU7W4On~R1R z|4(seQa5RP&x?;%4&F+-gtcO&rWGH!c_Wn5Lkp`2E6f=lEVX^tPH&qr?Pl%!oi8m(Zxn&> zdu+ACVEQL_SLn(C@{)2;S;dP4Zg(U*Z9#L!6Lg7#N5+3Cj#aX02*Zd_dO92{T~4I1 zSgf)b{Oq1nYf7eAUR=5w4vH`({~7EJwf;8y4o-*b45%z&t18U?DThK@1DW1A1qiln z=qU}~J^xmQ1ka#(>N>u??T1ss9_!Q-8%w$DlD86 z+#Zm{$-4EfstX4hK=&S#>xJywmA1w5oh2(lE-kW39;AzWW)8_5e_>433Uw5#Hq0+R0u@R3Ho<)m>5(a@dux zH}rH!Z^H9`SKp=;xsnOuV5-1=Tjq&3EO`cZvk#3{6<}NE7=iukbY`S<^bYcU@+{mO zo2)t~TxK*7x>jdFV9ZU8IxH~XZ(oKF8pl2$W-OAgDEZz5u7`|+_>Zpe*Ntlnx`%6z z718eBjQK*qv5-aXdoTLb1^$Spc5%sfl?M($ictmpMjZx^vW0wJM8aZGLPAGh>-prt z4}Z;WKI$-3PONs>mlcA2N&B@}vY?|M+#`s)*))Wz7J<$^YHGUqIMT;tW<%-x6Z;Vr@%g2an`9xmS+X^pwXI+KF>%` z5vY@hq!;14??q#OkkTxegPOL{T9%C&uzKt3=50lwJeMzft|L4nJ?Y<5M?spVg9NTS zwq4q{C*bDdSAxyarQ2)zo0oYcXelnsEm;MX<_`d4kW6+gd2c56-DqdVIzh3sD=K)> z_7@m#DO=YS$5DtiIyi7&I^9ZeIhFm2%PoiXqhn}M^B;k~_5d6rxY>n0U>luZ4gDrM zu_YRWjrYNKOg6KR@bRq{$1FRnAqKm1Z|(h}%|Sb^PTNgAV0_EQCRJ)Bdq|M7_VTB) zttUj0BR-zs;N!FEB5P$GV#1AE7lO1~e=XkzupKLl%b#exC%LT4DWM!$KUCr-v1sxqU+CCryl21bSe7nh zgu3AycJg@YxU>v4D%M1>&OZ*3r8gW`07<(U%-YWI$)S3Ie5;%8W!-!_^BV;ElwmZ4 z7EV1?1{Irjrdiwsfu%505gBGcQb;lKD#cc+wEq23AV%J$8)lLfGP6%_$h^In#4y}FwyC~%2PjYQMA{V}L4Ee4#wM1z=R0NnqtoT7vY-%TFxy`fgSB5q7 zp~CJ`;?icXkR%9+;7slg8GsgTr+yMioh=JT$M(MdE@nLhkoJ5_EF0U7u%vGk9~%-> z?FrT>o`9U1OY

    aCHobQhHx@$6AUWZ$Yo-lAq`_OKmrr@^m6xX(eBmQ&^?iyR`R zWfq4%3^|c@Z0Jx|H|Npg0$xT-9#15i$++MR!bg*@>H&U|&+Gzex?VgC&50?Q`6jiH=S zS~PJy&5P>Nwi=e2Xkp;P9DER11y|p!BSEoOG<4$kS-nv2{UZK^C8YoM z6M+u#PZ$5TO-HrNd`u(wlRuVy!3zb0{p~cP2sj^Zgq-N{(JSRBbP}YDh^6i3O>@M3 zgBFC2oT+E4dl{bN7ekPaPiF`bzZRT4@{?NJi+WU_fFa9 zLVXH;Ln|@`^wK;oYb~tkevU4%x}dkGaI|~4T%{!Xd&aOsX76BIAPwaXglPpW9U~lj zNN3vs*`fOKG}nsosJ04bfb*E1Q)=)L84|NN84-Ty`hYRsW_|D+1+?xBQ=}hU7^jfS zkO~N(mE78eEcqqgRO^IN8E#(3SNZl|--6(ycC#8RBw{y{qwgKUC&NvD9Tt=9*1Oh9 z;-46LJv5XUq6_43Z3}u<=fft-cH$eHBqS4TRwHaJSILSif`nEh;8Y>Ju9KCt0;hiz zxMJ+k=SLkYK0GrIxR@I`4SQbA=kMk7`tmeOMt>^oFErL9+_zYs(6$_O{lVgGOD53k zNE8(AJNIMa?@@#XzZOxcZL+mg%6&Uu%lJWzB)Q=>_;tGlH;r}_U=X=*dFciWTMBsc z_%2NZ5u=ZRKDGO{0_~e66&pT4ls!r^JZ-FYIjW`YVgPuef+L@w%S*|5aouRJ1P z`%11^AuK%fl9R!&Q|^Q@VS|lXe{n=ro?&a=dgce-qA{?nEg-Ng zw_S2+0Ho$$>3)n;!t7toRuEtD1{yUix|_Xf$kZG8L?>GrC7D59ZjAD4jpVe`#riOj zNZ*`g6!$~;RwQtBZEzPT*Ou)hJt{3<$t4z;a~|Rgs1?RpWlb32=&85+vkqnqzyt>@ z^3E6V04iLxBe>RPID?qiAdOf>x+VJ9z5^fqd(IUC>!dw9ryjLF0~4o{20<2G$IFQ3 zLj&(7LdnP%q1xYl9;a}U{Wmz3KZ?DvU-fINF*qn4$m0DHGel6k8s^due?m;NUcHWN zwfarobN+PXDr4U9#w~0W*jdI!)PwEoqN!ywqkvDRN>*2?cYf+=(Co)-+9aoIKS>q4 zp_#jW{|NPL*gMnbyci4y=4fdYPs2ZN-#wc~+;R$WnJsst(x<%=Uh5Z? zrrXi+CmZ-^%JC*4X74ycDT+ds&kF=XdVli6Fm3z#isF8w?@yu*;WKe@T!**h(?N9_ zP}z;=2;~j_*b=#*aOUHo2sJ#`o&_FSo^=PdRws|B2F&RPowUNMV2St~qExXJA_0z8 z%cY6TwWI65W%Bo~Yl$54Vxgt0#WsBZsUR*h)=Dx2@so{#V`QWUcfR&n_-@{pA1W+_ z&=6EKi6gu|UZ^?Ey=B$>+yUUg$l%480&%6n0yiSv1DLxBr6j+nNfnE86RoJ0?(Bkp zi`D#iYg|TLCT4ec;&26qfJ|;GhK~1&SLlXjCczR*VGyPx%+~8qGvIAyAD95xZa6eW?g3UmJa@f(Lz#d0!D^qnG(;qOzR5GyY3#tCH>c z&*GWRd{yqZ2ay12Q%xz9^PTwlo@bwQC_~hsk-^1$DAyZMek4X?sUc~7cjSoxc@})S zDi&^sib{qy|I7%W3EZS1Hn#SUB1$a$L0?S53s<~#{akCD@DCw+qAoWBphJEWB%Rx5 z^l7a@;6bW=BLVVl51)2-(;L~F0#8#q|D6R;B$?QWue@*1z~7{(4cMoXs&6L8QWvGJ zDT@pH3{x&f_yKOwW68|38a3TQg!`0u@=mhwsz};|ctn>kqNJoqv#DB)J(^#B-An5G zi_FQsh~$X8C`G&6j&7(1OsI6^<-J#LfVjK2EYr6jAkS(qBqP4aCC|F#p}$MvvHRMM zjSCo?lPHmBM*RN_LQ3mMV#9uGs;9*8=DvNTdUgmP-_MS{EnfosoPtJ{SdEs14sU*5 zNiy9e4!%20;#iNSh+?2L%rc^XtpQ zv-FW8Ue<=hL7|+V)*MGKRMx=Gar(brDHqZiaNe@n{X|U=`u-1JfS)Fi$mIWEnf8B7 z>C}I82X>O5@c*LtztAHL#UUyW2uCFmTpgL{@fGqTLY7E-EcZM@ z$SM~^kpriCi#>(M`qjocyqh-0(*JB;p#?GGC;obP0g<~?O4z`ysmI+t!f0fSEzuFe z)s?s|+MYvxpSHe9eiH?3h@uXqPZ$hA%0OB2f0Z&Yc}4DuS+$@+$~lB+;#abRsqVv2 zt)q1UbSF-p(6aRb$z!R7>78TcUBB)k!t~euDRTKo*|Nha4Dt{BOSAEpCt=vU4`ENw z5m0$7N}@ZZEpgf0d7+XWG3tR*~6x%LPf~7eE>j+L}Z!o|9e)nuAoRb%jS!NSJmh+~O1RfLq zo>kCpgi#iX%?jAD_eAD+g&8=YUBL;z`=a6uB$pd#9IDblxS8lG>jl6J76z$#q#_PM z5sXWBv2zM@l0y~*o-piI!x+j#Rz>iKD@<+CoOZKly;uLQ59SPVzA(=#I9>RoRPej% zHsB393j;A|M5#>Qmr1>W)Y~J<@)Za!b~XCO9+!^bWvX5Ys0QHK$cf+O3uPc z>14bMTqs5ejx+8kHS4bpFA^yT*>5nIx+-rXJfxv@y%tNxuPd~>Fud?uR(E* zaorR_6>8a&;qO7NS8n5WrREcKHLu*r+PO(bopT6>3$5-n)^xy#ogu%&9$k3w4lg0- z_|;vCn-9*P2wUAkTAv}ArWCm38iaynP4^BMws}U>6xtbiNNI}X{vguo>N--cY=qui zseRT>@RF2tpNPRIdnI8fUPk(PCa6V@>+~@4YNt#(!|-N-2@X0X-~!R&Caq86KmG)I zO`eIk3sjcQP?hMEM%mK>5rd3UkXG>_T5{yI)1EG;QD$Kj5_Ki3kt3~=O%;TowM=$? zztSc;)4hr!v9q6t?XpKrM1^QQcZi0_K{EyQCEz-!Fi;4MqFU%3HtG*BCKVV zY6wdbq`8WujY!WCAmxWw+IE3pY7l}+H+AzY=T!*0J8K{t3k$n@J>KGH#;RXyQT4*x z6_5ygY_eQJ0+8U8@9WAH`pQUavV>JNqh2u}LHVxDQKd8lOok400M)CGRZOE-_kJO zhwYMz$Y#1dfy{Ex3q~%%JVOzR%X7DNr;6ZO=hhs}K^e?rX1@wAeYS?Vex#y1D?L zdje8WZETluAxd$id3DBz*?Yv_I6~25X-G6^6R6jQM}K4+X^g;HsS-^EGNx9EYq^DR zDvr^RsH26*re)3wgWE@=MKKMNH113!6E@JAjIG;G(E2kH=`FbUA&fj8;X~Ye;l?X? z!fYNUN=$`VwHAsQy@Bw&a=zdhBir{fYF~x7z4N-4)Gp^=(U&-b38Q7DIR}AQH;!@Y zc)eFu_Zgvjaqo6)gfH<7La6pASx{0N$2!8QbmwNdV?;U9&P94r5MRf#F}iD^<|LqY zoZlw4?qwa|#+Jt{ahV$LIom3bjY=Lur4Y+kQ5tDIE5n2)a;81YOSp{0cICD~3q)|! zu^&jmWfM1^6)AQx1!-1w#}x0-s^Ju|`L5Tj5ri{nLq0oTVSK21u5{qdAz zqYMpyF*&~W=jkcZ()$=e)!snT0cSO}%Tjms1%wT_AaL8ymFQtX0urc%raBht9N{}} zU+I?LKLX+A6_zpbOf&}GlM=EFn>q6srE1Gs~7W*f3;H;snUSoBRxA%p0UB1B=O@ z08F>fB+odK889*$2a&%=NA6|;^}{`j9Q7x!C;ODsy)uC*O33nwVmN6ynhj%cEfr)A zXHNkRTzmL%?X>UaH-|BUV~b3J=5gTKRcnA1cM`W26f>)xTuRHura+o3gKT0IIe$*` z;_B>31v2o`BuDb>wQnh_xpQ%fS1T9XLZM=V(@uvZoz@bnd}?AHf2q;C3RC<0e0^Tv zIl^|ZN5>(~G^Q1|MlwF`Ro@zSZ7T7HkwVm$(9D`#zYGy-eQ;}75NI3y=xy8Qz&$6N zWeeDP(d2K-_h_+*FUwC{t=YfZumkTb?k};nLR{kj z5x{W^>L0}1(g4J0;#LwsvceS#rVY_C3gA46Ef8eeb}7l>doht#}^k zOq%Qk+`?8h4!2uWPxJ_j@Dz37J0;e|&@437MgSbxQ$k zk=)ogbzgqGD`CcmJ zBZt+Fa#`g+Y4uto#@(_p!9_QdLJsXnSri#_lD|jK{eb1N}1zgAG<0MXlOgPfF`fqob zQ{_{v4Bw=FMAKk+VZ}{b4b4pV?;E6ZJ=wIcdfV__((wiijHZ4(hdNQpQtfXyCuqQiqLq(;eTaoA~0?f zean)Loq?jqNQ{`-{!xkl0sFPG+$w*@C7->H00RgC)^rQE=PHK>Ge`3WCO=n4VXa~9 z*As{iD*#j}JoX=m*CR(+>xklE?QRm#W4q%S8~vHHMD1yJ(Q|1(89!}>PS-l3BB<}Q z-^O2-OoGjAd=_09T7L=6=MBOM;VJCFHR6pDi@=(#Os1!&A?N8udK@~cb=gNQs8vcd z&9J|l@wV3;R%WX4Nh;>fdzADWlU5hb+kYWnP#@2jbo54swFXwvo(Qt)hljtta^j)` z=M%qN`Yl?_m#Bys{M1=q;VwsP1b}pV*<*^))bNk5x5byci5HjM0$~iMbef>@bpF!i zAJLEHBpipjd8In1a)kVS>~ zpII8_b{PqqYmtEp_tDtU3M#nSpX1-qCH>JT5KrnSMl{K+~yCCbB5l&>nzAqo(NC z-+!S={ zq1?({cDA`c7!HOfG#U0PJeuQwI1X03ydHPS^{16eb;PX}B5b^A7`J61O)Tnb?0MB< z1wS;+=F{h%cxmZe0GOM8VKJx~Sr8UPBl+vD8G&dARciKVT2j4mlW}j-okydrzfUPY z_HtQ2Q0+X0?IxQx$#mJb#a|zHJb5uhG1%+MjMoF>c1+8vfk&~}xzOedgJ)Y|Z$D*_ zVMn`(&P4M6=%p&3QufL6o{dKHeR6s~%p0LKjwPR_vW8v&cL3?!rJXl=!gp;4$tGaz zPv}}QmVa2oK0@8ep~dmRY5KO;u`Ac>yi*@`&{4c}JsFWJ#kvp4R}3R3ROBTyA6=D) zZ=-5Wzwp>(r$74^CHc>mob+m@;x$&=I19D&^W`CB>`p_vuU{^+kQ(^yPRgW|b7{Lx z-e$7T-$k%bb#$Vk)TRx7=v+HtmELbIgBD0%`#EnVqoR6=CzB&xrpoub)rhPD>M~LV)qW&8@+kOg zSLlqZS|cmZ0>c^i44ohR_Rn;WNz33KiQ3jLR;$(=C8!H;xpqy{&zJ*Nh8;HeV3mHU0rizt*68w737B1B_YBD?!p2(ocfp_EkumEqmcP*Q-e2R&FweBumiM||ld`k(gE<9%%(ck@h`KBhP zg_fHx{9I8hlzS?|Fm7i0%V1t(Ey?gUbK{=c%`O&W?y08}CifpZDDL#es(2afAP!0l z#{|d~guF|95TarZ3EUm!*oV62ILsew@B1QAyB|%m-ftyN8h+Z*PiO_gME{p? zRHHmh(c4!=+Lg_=-Yb1P=@Z`L8RFSR-?&B65|&tXoq8%$kn#?UI{|b(RCi5qtxl4* zohjVZ#okRyk+*-%*rQwa$g1n%$#ZmC-g1R$EPVa?%n8nrOmdixib-{C#_R*=5miQv(H>KcT@#T_7rd* z%3XIJZ|(MHUQ_I)S!q>7i$)@&H@7JX423tz@f4NKI(|vri!+@qmFr1nzr)c`juC9Q z*I+x5T5AqU<8ox|iQLWPD0C!-DK_aBL3s6!2%Lr+%1=Zl0C*v?-k`~S$U!UUfg>ze zit7DR7WgWZ2mL#7>rV6W4nOPfmFQ=gnkBb59-ib9UMekaZj7)>7XIb%NS{-oWOSSO z{nnmkXy^qj>!Eujy}8w}xYkcA2jAie5JFC+9V<|64|eD7sR#IgfsZ;lVSaW??P}mH zlK%RP*Z#!C8VkoU_n5Fj7VF_jXOn9(<`N7|nA^@}{~WDyzR6H@@P;&}{U*B~8T6xbxRR^Qnyes6Li& zKeZ?C5fs?&eX7#@grUSCN2{`vz%kr~dS>a)p=X*A;G>-%yWeG%VqZTY=WAH!y{ z`M0+ub?QdL5tHQ+`uPC~0)-wCS?<`GQnrwJW`pg*ordKTD>*b?LI$zyg$Wz!Of3i{kp1=X3{MGz7r-c#`}+mMfoZd`uM_YNTe3x%*=a z=&1XC+?*6{u51KtbweN?yldo2PcIo)7jQs8g2{idI_4) z`o~Z2@}b2hO@g*~DRldtWxr;fk_p}}*kJDTus7u2IYz3uktQBw9X!*+MH7c+P-7qC z*z4=LZ&3GJ0KF>y0kp;hVzzV30RypLxz4AX&R)I$0BBz9Zc_iBq}^3!`Y&;N=!!`u zvoNm^i}O-KOaqhO@hxMf&##k`-6BR&A8b(EA+)}H%7B*lMB(kdya?X~y!a<6HPl)i zCGks_p(VOcYbvArLd%49fP_cH{^tipNx8qYTQ3Y=(C&Od-R8b#ZOJKw=3Qs_%yEYP zeo?q@i>@mLDgA?|!)@NYe#dxqubiHZo$4% zo6WI``=U+@hO8kzWr`Y7eaA(3SH3(j9l-)g$#LjB!l{GpUy-xZequA9ZDuFf<3+wg zCO7;TKkE;JCZf=e zvx88gQ)LU8FS(XN>n9T$POwC;uiyL6l3)lj@ftTWnx|^ho~{$3JrSiK5PxVPGnN>&Ybh@ z15%NW+_7F+CSfB2ts1*ZsUv_^P>DH*rCkDb}OaQi&LC}Y+rnG zl8Y{g+sr0Oe?e(#`Vh<=)OXON^*N-2ypA)`M7Jd3GbL3oPc74gOO@~lCvUA=UpMe6 zY+%B2P@E}y5*bQ%*2Gp@n&1NQr3_Mo(D$0u9cPIh%3y}>W%_7U*)v(GA;r^p{Ohd zBO%5l%~q10)I#u~6>fYtf|~GxJv$TRrtehGNOIg^*z|~xHrXBzp@5} z)vtHbwne&!5B22)Hn%F1av4MpnuX4+o6v4zsv=lc#6W6IB3OEN0h}T#8qRH z<6ZveSm?=_EO6Jw80sj!-w!cWCB19xFd&c1)G%AEP_Q1bH!)&ssNzUePtHuj_c;Bm zE8Mrvq(L&1JttOq!uVw4lhcyf7oKU!nfL|c+G+PC3@@X`Wfe{0zqSNFCp95}WShvUu=$`)c%nEe9*@nyS%81U-+>{`H45|xWX4i7- zgodDqa0*qGPuhEvO_e4MuZBL38#?@F4b(T^{uouqidvPzXcZnX)#sT75AR!=wN|&g zaN9k*bs+8~UVetrdVUW00IjXCy_^xdwzTYg{8E?L4>9#TEszyo6Z0#0FI!JE(%X~* zW~Bmc_GcVcDd&~g2iVQ}r^5`qb4%W^Z8ODBsbQyOXsQB9h0hKeSw$yfYPY*uPcivN z@TEulN#ZgGOac-+MQJhCH}|@y18j`3R&7mZoJps#ml(~?pO)Hv#(Q8jDD@OM$`)$f z`{Q`Ie7cIIiHbty3lVQ)`2w5Wh_GI!ULAeuWNUVaPrOKoqpZC;wqk@t>f%>=E~N$P zYn_`Vv~`7N>d(?#9|m9!)q>01OZ#A`G5GQ%?btaKbwg7KVWS$YwU3}jEWG+xc6Cv! z8+EAEyLP-k!?`ty_BZunGVI< zw6uCOVjL|s#GWP)mnCc&NlV(u6O=(6|+cX`~RaFZ8gaji-HiZQ07e z#)7O3W0U;FE(TI_OjVwh5?VfNRCm6$iu(;-DN0sPFg?t7;R;XEx_Da-zXt>(tlxme z$a1~aJ(mTS*-#iSMMD)Q^}oN{HV?P$!$-w_*IAS(Lze!4{Y@G*n*>n9_yejdF9fO z2xV0&@7Fghk-LB&mRn&t#%n%^sGTT|?rZeLTM3(^mv`bRT5H^=r56W10G05)YA13B zy9XIo1vZ^_{-(n14yzK8J??HaW@^+N-<^J07pGEf0`ioC z7LE*c+=ds=v#|7~bdc=zub0sUUcq1#M0`fA^P+ee5o&$lGyT}RR4+lrzO)I9R{3yL zG@klD+z9q~(9r$NhImSAAxgFyqZl!b5F4`K>-6()I9^OJ;Sn!^5h*aQfQQ$Qod{_w zhzkj#)C$&NT2p>47XEU=3QYVg9pkiqU&`mLeVXKpmmz<$;d+c!I1w?{Z*r3A37$#v z)tHpmt@j6sXBo`*-*AYfe7cv_;K-;+M0RDj6f*imQAqa%pR+T&!nDhS9sF?FD~h9{ ziGJ*2t_^spAr=u1-?})mKP&FA6>^tAI6V!4A=#E#4`(z^@M|pQ?mLAu=W&_q>}e6@ z%zXR172}MuR+mU62@OS!W}d%L5x`|`4j`6_mSi}iJGqj%{pYX&wTfP5*u*EEHn5#O7B!*dztX#DN7uR7+SToYkX$=`ly<@?l;-x9 z^94f_?BaZW7FAk%0^>vlQ(kE-@qIv22@Nki&H7A##cc#n%dU~e^aZBi4+VMgE3yG+ z*BG8K}ACWA>qk0S9bwnrkfR#V&k}Ok#Wg;yttM^<VrD^rXtk+NPdqib_D+o0zAzLvIan%+Yz%iw5xTDRB(JQcR7+K}RwN4g`&2_@a< zSP&JR(w~3q5!tsS1iEiq%-FQ&iSTx6r>$zrc(U9CyFGU5lJojnTE|jNtsfX@>O|S| z`$N9kmAW)8eX11|Dr^IC_EL^>y5jNct@>P8^K5TI|1=wkoEig3U-tizP}e$!eXRXjBVIT?{Eu zBnoXf2{Xxi${ra7zjboT`KESecF?UN3H+Gg|!*#*|_4!;! zTh<}n$MWXxY)pi&#B&1bg&ql~fB}v-)0>5_VgsYmRiXyXR`I%ucN%w7_X%mnx3#~H zda*59^|_imP*>c|>n<#vV!X4YXZ{EeZ=x#P*!qFC@iZCf18>ZQjzp_mmh=dh;$-Q( zEP@B6uQCl7O9h{x=M>gwV7|%8ig;uMF}rYdP22rRaW$}AJuvD_Sx$Q<3-1-9**?uE zG#`~ap(^j2!m<7W>uUSR^Qa#A6pAKB`A51aMc(I27d3W2Jm1;+ZN1f-CaS2m`x?c0>_tuHZx^l-7VKl8A znWEXe+;Tw`|7W_*BDy-9=OFK)(PLNjFE>bcJ+|qjJUoMKu75nF9IPOWJXa`Okz4{8 zoQk7s<5z{?eunu3ixB!mi}^*5?v{C@WtD|ArEKv#c8#|gO&^EPx9A)Tq1-1);N25i zwf&^M)&&MI-Tf;PA*Y&1LeZ@%n_wDzE5NSCoTwPrVU$yG5w+LNao_Pp*S)BBrHv=a zca26MmUf0k;RN^q?I(Iv4EtD3o_2&oF) z?*W$s64>lf{*Qg*jM!^9%&lI^)x?O=uV@+=-3g@$sg!W`jK=W9xtKioV6_s+?&nll zF&lca(uwZ*rPm%$DrO%ybimFvu9W$}1?#c=>F}UIbF|IT^fB!j^5qr`smw25p33R{GWn~g_!)_qZvXIj zK7^B{H`555a35i~j2%^A!X|sDi+H;?hy%@jF;RK$6fj)W=012~MJ#vf<~d zVT^(`se!H$E)6LVXgEoo`*EA3(qT`W{qm}ziBqLCdc|npp7k_=XU+yQnpfjE=B;TP zN8M+q)WG|_Y-=lbPBJo-e7{~_tctT3=-8y5*tLx&?Iqk!{Uc{Gvh~akEBUTPvpfT( zm(GxITubv0ml%IhEUY&dv-a`g9O}4qnohLOqj#&9zuYlZS>Jbq+N8%x_G7uLlEU>C%z*7vU%%kYTh=gLF;c4!;s#eJ8Y|j%n0S7NH;4o; zcrl}7sAM8$=?F=K#|_qngtoz7E~xy@B5CmRq&^HO(*F!8&)nXytGh0@W(RvxhL|!m zFy#O8wGZ90iE~`{ufAxY*mtUkA82&itr`BbcyTU@H;JfW_SYOL;&0+WQLKJF zO}|XIub=<@&RRjmZ@1D>P*V;qzwGmmI?5#3=Q|bnndccH+s~{JzxyEtr~L{&%NG_C zZ_3d9N2d4o2^=y4zL?EY*aSa8vuXZzUxmLs(AUHQ!n`%-Pj8>zMlyzq{@ZV_yFI;5 ztv6bcdiGI=^j_(7;C~a|>w;Q!;@ovJ3_WUs$mq!}kOwleGf<_AFA0F?v#zlyZ59NC z&*{+$JU4cKlieN5cQ*#K&dIYnQi6=?|1Qs-ov!n?0(I2?Q2&hChn_eG$ZVu}!VC(C zo@1Ak{Z)@5qwwCcBs~H-^$DXS`v#CxgQRS7>8k<}!P_NG2$cdKe%*;b*Yf`BBAbz? zs|Xq;L`7n7x^8J9mQ_~LGzE~Z3a+y^|2HWx9)~Cx5#@c}A+C58Cx|b4L3>?5UlmRa z@iFx&Vpb|A5xgY+w*G(~{I@$tPZx+*P(=6r07dk%mlg>9|LtM!31`#{LZAMXV8b)~|E(7{yx6=DeIFr$72&U~Qg>HJ!eZWr zMUll;lXnhMF+wjh?mR@Ep#Q_`RcC2Im>Mz3zu1W6>JZzUV}=l_ApKN^f7Y;n!;_aw z@Oyud!>`=MuW&|&Q)=W7<9)Q%fIkKh)xHvP>jNy__&$9Mw_eUM=>ZB+vNx4SrGHhh z$$l#r`A;@h-QBKh0dVVo6X5-`kdXc9sLU48coaQjiJgI#2KD=kQ~_g18pK}mRWcJ$ zpF4%d6jL6D#j9)|v2_2ne>v*f4~u^k%Zf%m`F%(x88iGo*(QxL_^nJb$d-37UHIMR zho(ZkCj&~cKKe#b4w#gTg7QxH>Oakl`SG1qEGaRQ%Hzgcf8rYdo7J)ZsM(*jfD1$P zG~6x4()g+r%By?*?jQxpp99$tux$(`{NQ}@4b^;HG0V^fG zh7$kh@ANnZ5J3Qr=p@`Kwl(7G>+ri=IUykl|Nmb?QV^axY;F&w;-2uq0|+<@fhy5S zm%fGsUEvf6P7wW}L|hKL#IK!iBDdqt*Ol zVxB8j4hPvUlvVZke$hk(8>p-O zn|sf0zy`Yfe^55pyQ;x0b#=UuWDKk!;-URl8rIGEyS?#9?!R-N2A@0Rr^cm0rCwNfaFtW)bglT4GBwFzwQ4=U%f`78shB;Fe~xbo+yKL zK;}x(i}LfTNUrevRCzOhn4( zg`jhf1A=oImVXxa{eksZ@HLOCthlc~e&{V%iEdwigEItxSyaVpZi@Y*TJQ6M6%&S} z2tKwdc+;tpee39R?w0fmEO#}amFz-Na;U@yb6HW;#(Ido=mU#0x*?ugJRIX+WbhzT zusj?o$5UnA9?AeUc@s)M!k7OO`-@C?O_YK>Di&8EZjb6- z$7@qDcu$z38)OZ3!K3_IE&3l#zSR3#6=WCjBLv%Y0Lz;Y6(paN_sDb*d{*QZl(X1x zR%Go9N{o77_5mlDMWFt$cfnV%hd_jq*I~~sfuVF2dZG$*vHH9#vGN;3lXPUFYrmjy zp@R?oBU|FgA3#CbeCj4*$QTjlH;j7t`}}3AHK^Er<>%^QS0AX;aUW$3O0@ma}pY)m*wv>6$F-Dzp{cU(;`G~yw z&*0(nX+!u_)URF2roi3a?^zolNirp+;6w>umyc7tS$;orySu`78O{B~(D=`RO2LBGKMoU1Ewhd~ z1X*!4SV+ff!6?@t-?R6tXEhIEM5WVan6&MwMbA&AlQBrfX28oQgr2GbqpiankjI6> zLIK-sK0i1}B+&}b@4bDqP`sFX0lDP&mz51kSYZRnu8M0XflF`rM)23NMIH>Kem`z} z@;yJ(rw=)t7N70#60|q>o;|5IBr@YMbcY1l*t7Jqf__K_6{}`f^&<6U*wnAc;uNBf zF^oP%eF^ss-$69^Cp@8D)D!H|-e$f171*_s|b!3^s(cYn2~Hn0<1Lu zEtta22U)UHe^vO$|8lfY=r-Ya@!rI9><<>7Trlols$V`5O#wcKL@CkdbzHau$c-eM zK84+QmjF_j;e9ZEx`86YrHLOeyc`Kzi)WlxYeDPDt%q1eg{K*@JcvI8G;DeU!ZyO> zZjGhs6BK%V0tme^YoeoA->qsULF*uJqnmK$$^7B;L|joDLwW91?%cv*9!=v40lMqS zPq=TYkzcx!%vr7;US5(k)|1TRbnEHa0rLRy~C9mhX@_`NiIgX-c8 z4Si7+`cr+m(~S(-5WhF@mPg*_cU%mstW5hc39q1ftxG>{qIu~{&+z@M4-S((4eR?+5C%Rh zbR;>|Q)|+8_{yQa#vkt6yrl_TAWpcRQ+luqz{dTz75=*$G7}J>lW?QXuVfJxn#NR* z2y$b*re7R>jjcHT`KggphN09Yhl!ao7X(#M4{*0C>nmXM`1S#^!cEpU_BQ;#zH-JH zoLNfwrD>!Gg#)_Z$|9sp+$A2&RLtp12`r|c(uuz}Se&6B^ISgex`=JzuOVgQJS?d%dG^03d+Vqu!*6X^5v5eRTckuv1f&I|q(r1!1f)xH7&@gyK}tZR zr8|digb`^Nx@+heV2JnT_r2$Q=dAVp^R2~Nx>!7U?`Pk8U;EnEc6-ga$Q~kXWjkJ^ z#M$=ZFXQ2_`|%;qG=#Q#$`Wk7QUhA|AV>;vTcXy4L0Wx90)j?JxntuUITEy?0oARJ=B^rbjk)n2rbX)LCP zZEF~Fg4b0K7#hXr7}Tz9+RZTNV$T)`)l!0(BNw5EA`{2$lSfa~lYm#@*;LShdp}C8Ri*}|aZc6N0y=3nvwc~R2jBNWtI{Zo zK-rB9qD3Tk4bjORgBI&xTN>=B^d19Q(jAr;L^#(+p3C*XFt%$Gw{UXRKG&8Utqr<1 zsXq4_c6aL-+jM&Hkp;`}N+soaK|6=M&%?T)Ik_+oZ4PEYU*-o=4ceqO?YTzqs4d`to$ajQ$^VY_u<3FB^^>41I+ zS}&l2P?b7jXSj;+T#kNb+Iq{L%7!SOA!o1VD=-O@6e0U3pd~5WG=3{zBkQ znJeI{N6z^4$+DbgZ^J#3&WbyECGaQaD58Fb(YNM2k*}Zb_>(M9JbYx8=JuVT+;PYp zn``N>j!ju|v+s|N%B7vDO4Vl$N=zhJ8ZmY1IpYPy)AXa4dCna6*K4VLTa_pIDHfH+ z+5*0BHbEr)bU(>7kYprBr_}iT=%fvYabtKxGf!p3y%4`19M$o>x8r`e}=QLkSF}52ZmT09CNxGH6>mbgY~iK`5!JTf-J}cE(R`c`8R(uq~_!1=q5`UC;Gn724qL=^he`s z_C(*>eeNL9j`Ae_ndYq#p-zWkmxQ)`KP6TCoYrmMc}Xaxq5*#c)dZ>n+$TV3FK^!` zz$z9{;$z9u&0+GD-d)VxhfKWvFZDP`EwW-%m=lNda*N_v6!hULNe6a>NDU;C7>%m3 zG{M`{_VO&z3@ZM^*)uK~s8yfl^y5fWae@MK#JbhF@eW&DImJrZZVzM}_;Q7bgq*Bv zIr0A5i*ziCOmFI*O=wrO(h1+z?o8hA;SjWb7-s zXQVH;<^m^h?UX>WquhT=Z-oRc>wjUJ4TrcowihvxHUXHpl$dV+(u&aZ9B8WL@X46e z^HydoN#bEYA_`#`Bl<5bfDcu$t7a{Nqc?SEXUh0Kt%ruENvZ81{C}6ZywhHN*rv58 z{wThMzwmxz-7Y%4k>1+XFeKzQzo*n%mQdv#3$NCS_C0(V!up&pzL16LAZZwW_@@~3{ z1B;4Y4ijQoAEafx+sSH!zs3{Mpz+hd*>g^l#bnUJRT(Qm=4X5e1Y#>x|ohqEhLb|$ydLUc7 zlXud5ZpKM#Ulli}Bm9AX#Ocy-b&}v^O95?JzQ(C{q`8WE=kwQSGFDy>VN^w#DPHbN z=2m*bT;mke!xVEnemOFNs5;8F>b$mhqrbHZw>VV90>v0ZckrkTK2DaIp^}rnV&kj$ zyX%`S6!8$*vV19(cRwQbA?gk4DQq8@DQqz z5ETc=ep9V!8Ld~bV|_qv!UTc9C}LvzkzHTtJWOX zX*0)?aRVx)BcgkM2Kn|r!P-qSx3mh*EWfHMlNfgG6gvx;x<%&5FE1yt0~`7a0yW6Z zgu4ry-jX2=YV53MUo`o?uwX$1kzdfY*HZZ~$^(7ZMnOpDwB^O!svB>gx%dL^A z->yQbe*eQ>n^xf!NfFDvMMk(7QQM8^D(l5WLg)kZchFy~0W8Dvfl$SI_1=u!)6Ogh zk4Zq(SOH;l^#xOpbe&F?dbJ7E!{Ff2Z8*e!rbZP*vIxrkpH5RkKe_`nVS5Q^nNY!o zk~wMN5S!q_1?nlD=P5^suP@`VqZDC4Q6f}(aHY@0R%$(AbD1H9po*S!)QVtGdr;TV5WVui)&aV8+ixedUTzWlGvF z{jQD}3om`!B^)MC%L@%}ZHmSh{=_Gs*d)Ea3X9|Rqz+hcfVU{sDakT%5{6q7?Y|=i z0VRbrdwImTozRf!=RE#>NfNlPS>X@tukZ7mNs+3&@zG7!O3~pza__SJ(J#>{O=ob6 zmD8JL*_&nAJ-B}Y;>WExlqb)WEBu~KHk7_6-&r$pVw~r_SDLW1O&Y(()zOI=T`**X zr<5D}Hx7g6&p;=_a^I;6i)YSDuOyQGtZ3Q(+6=LacE6PlMwr(v*Nrx#qS zS7~{k9u#V(VPuF|XbW-^y9|cJ2T)RaM%Z9g3E-Y0yR4m?J@)1Tla-8bH_*zjX7PMJ zH4dgqpgywGTm~`^Ixm+Rsv$@Ovt|ZQ&%fZybD0Ii*ex0c?`WraxmB3|XNUvP)Xv zuA$zHO=X<(@xJTh_5L%3Ol~Sn=PJ90b@ev?=B44zgt{0via|vlW1V+^v_h#jVZw^J zSiX>nMHf0b%FlIJ48(KJZx6>4neWrB2p-N!{9}-L-uaut(t+`$xqW_Y9n>2 z&gwxln5;QY!E(l5whe-V31?>_Jup{gBZb@C_E`((?C z3=G|-E6G>Ug?U%6+DvuU*Cr)@@L0Y8K!BL#P~FH3PBx7+!WTY}y&4RB+AlHVb_O6O zfMnaE45o_9h<1vlBzBU0V08a>3N=#8xLsmsnbo4j_p(U3EW8rO@EVB4Wm{QIE!V4K z)sx^pMT}_0NNuUKOXMz?PCvlH-sLfBc!6I@fVO-U#1bItk4wnUa9el<=mN^@LGaM- z83#o0F|4Pe#g{GlA&~H^{cXbQE4Bquepo;K&V(f+bV;qp+!0I3>*aB?cQ*wA*o(@K z-&M>giA8%{1BW*&WoJZvyOrx+nSCfoy1s<3f4?OCZ~y1%o5F7%dgV$I-b0+8G1}V~ zPlD70D%aXJo#BfG=@O7^hZg9Y*JrI0a2>@UXy+o`Kc<`5d1#$xG~9K&UOu|{&y&VW zJzTgl>Zeba!Rn%(ay_a%1BJ*CvQJ@&)i)m$NJ|MY&CzU!S1)kl;!+!kJm?sBh*l#KoN1^7HTVPE-uMmO={gX88*v02NkQ+Jq z)K~0k(+NvZe;`!7G%IFf)J|5#nsr9S&5;O6C#f&K*pka=#E05V486&~Lt)7OG3AGp z6d(TOVO`qx6m*&UtRrye6W!I(lRM~lCnPobPxJlbVAOFt>CuU*QnR6wLOKk z=j4ZLT;nrGRwPL0JN&{gPAto0TIxa|*;<w#4@Qimp*!2$nz&fNZ=fwxheFe==AC__r>q@Rm7Ptq90YECax0_Mr}oR&Dfo)r!3^B{Va^o5gdG@;>eXa@!ictc<)6-$CrM|0$LFW{ zR-{C0E%}9$1%49peTGmxC$wZ;puWGnu|%W2F<*l)hST32_<;8xeeArdWnm&?k;X}` zVDyfY1CYp?CQMOUuMLrsRXO5k24no(uTOQ4u_Z|dW$JC-!P1*QY}rj!zD%YMA zo>$S1q?XLdXq&EWc~e}fwFbGGacqeLea9;C{*OT{rYX5eYuoY{*lsiDqda>akj}Jq zfneCKoQNgeZINM29KP642H?S-$duw@%D5t<#u`H~?Yjbu?oApPH$X2Xfr4wgX{ORW zlJL(j$x2*)+1Qq>w%0;p86=AAe7EIWbI8YyJJ(4h=XUj}4F~Jogb}jkLtS80Nbx{u zD)Y{=Wb%Ln;mP#GB~Y0rTmj?M%cdjKK#}fk*}fna#S9b(Rvi0|miujp>fBCUXBLD+@2H;V_=j5jrHH30W1$5F5qK;^ykO{>V$*q@ds?Y z4@yk=g+HFyI1@z04^^b>e6!#}$eOzII;0dk#U^YWNJl;SK!ViZ)h?8W5bxjVo@4Ow zO6t-(FgF>bAQE(G)1M^wa=JX#ST}x-D?AR&lTIoAx_hrWgBSYq>IUeNgL!?552CzN zm+1%a^mDfD=JFe93{eT6&;n}0Tn<~?mZ{&v%)byMdh7pIy9g*&kqUWTSR9pB@No>$ zQTYaOJCJ|n0Ns`KHz3r1J|5Joq-QZVNaGH9O`od2`xYBXsW}KI&Vz~%*g!bJO|dw4 z{11f21U`g$Z?{+43B*1IFsw6e07d((w>kaZ)RcbDEpleo9=%U?0j9mEJafP5*_m@x zm&ZXEAN2fFSY5ERInf59jVIV`Z*JB*_fuY>Mph^f?Jw_03hts2@DX5Bdt`>{iM;~5nEMHZDx=XJ#4*iFO5b5H- z&3`Qs>SaQY$7lH3BtMS%w%{AoDL}#AP_uj$`ho>vo+f|v4~v&+alS<@O|)SUE`Ahc zu3@^5t#gm0H|(Y9B7^3=qk_8lBTE*gC+R3;8YL zM-OO)R2rWN`f@B<>K5w@YW}G1!o|JdXivnGuW$szPtNJ&6I!X>n2i0_Dn8EF87v{X zaB_(a=ZgTq>n!NmR*9dp?2i<#K8=fYPmz1Ge`h-U8dOjleTNq~;&G_)9)lQ?1(z*c z5JWIVjbYK4M1UX7<+ff$cwCpcS=^)nWJnnI9M5}BMzsF@Lp41%G9yr7ei-+_`hYG} zV;z9RM`Yic`ehkb-Q_dzA0Z$K*0HGqRx!q&BkFYu`u87I@}A;lG6)opL`PLmWFkK5 zK5CIj$2=xt3*ZRGym+c+OVyStQ>b11)Deem&Nfmy{jX`ffrSWYpW$|%<|>YGg{YU! zAF0rk?gHH*7NUCoaPmh!xnM($oQ;$nZI_rfH}RqD0U_YhCDy%PGPK7d!xxAA>DUNt z4n$)_u+xudCqlEiD27SU@P0C*roye_-AeYlsa&o#{C<#SFR-2vySfJxqNkHHVOrBwTdB5}H zoOIB=vxcCqYDu6|*2+^MzOTG@{{Rd*Zu71P9;P)hX&KwQZ#vzHOSv7hH_m)>^Oy zT>2(hcS85(V8wA5n`pU!0j6LTi%HlkCW+|)y<4ZoEnsxf@d?Wu7l#yj+V-9$Ip3F; z&DdB3=&c@VpRIKRKczLed|gk7x&GkaOj*Gob*am$cGc3yi`n%Y)gR03Kr?`7h`zWS zANglykDJi=PuhLg&In*nYOuDNQvipZ$3-ZJW7rukTeY*b{@?_vX|b$y+JD-pNN_V=lC?t(4Ocyf10CIq3UGJt^6oE~Gqgh+{HuC{ znofVT7rT-LJ{MPLz5BotZ*i4Hz6hl7$cM2+o6~yK>t#GRmV&Ksnva^$TL4DPvr!J;n`Q6I<|glnn)7JHC0<3SEJg{H6D*lK`|}vV{5J zt}2LR=yWR|w1J91P^I}$`wf&Beg)E&pz!dxIp+8mBL)Weluo65HZSL8?%vBZg4&mi~Pg)2k z1OMEZch?gHugO&&%}cjH_a6VbV5w73+LW!go-{0bW4GL``pn~*Cb$3@_kTXx#(vJ} znD-q@Ol|+yKA;E;)x!`7BVQB`Za}s;69qOKS40%J;IpcyFJ6H_jL1y>4H$a4c_GLK8}^v%xb>?Px!3eF+Myd6_b3#x4p@jnoMJ;#K?P6{=I=_T zV_0e|VLkS{+I)DEW6Lc0@k^`&rk9J7fxbVT7;nF$NEW=guH}PZJaZ+mNCOULS%%;< z=3(L&`*7V^Bs)o`{-5iTA_eel$-2b{lj9!0aRIL`(J=8aQ1RyqE;a>n9iEYKgX#6% z##;{L>mpgi%x`aSU$fjMVfr6GiaY_!v?ZLIk)@y_`Bb;;rHT3F`HAwmz8Ap0uySOX z@E4muZS22R^v7O$2^QoU6bkZgpotfTvOCk@p52Yw@5KSr`0t;WImADmOsqHb_-}rB zO7hcbFEW%J9RFSh9IwuZzLNN8xH?+mSLXL9o%w&<3b-vr?11xXVXn>Xr$Ra+th3Zj zFG!dIitygl!D+}zzuv-oM*=X4O8}a^-5xJ4%(m=l)W=63AQ@|7u)ziX`v+giza7eK zUCDFvsJ$`Z^VoU8PY%p$l^CSYrMl&hv~pf2^xJwx+2%SiE)SWF{P&?Z|M}SpUL_^f zi^7|C74Q@nTpo+$8+e#^Pi`*XpTMP=Fnvg+qqZp7oz{{2zn?w;+$}GklePjDcz=3C zKb>m zSi$D<+`Kyn0#mt2{lC|p<~BZBY1`SD7rYjJaGNIgVVy!>Usl{4B(gL&@AUuoplgs? z$K&_quDhO)A)&2&DgWyMNff~YVu__Cs$e}Pf?$y$&FAIpnD8E_gZROBwLhU;d-ve& zZOU4}9T@<1ah<<0xlS1pNY+TozN&)xfd;qj5Ab`C0aUg7A4yldY(orO>ToPEqc3Vx zs#i^H?-35&r+mAbsWnz7ize*<`J}S`?J(vx?i*X|3>WC35 zgfFq_8O>c^)yh+FUuc!drMh^P$EnX1(NJpqy1pt`F*@RXTYPT$RvF4u0mJb>Xb&ME zTYm5%jNaqxZ!O`d{E)e>rI5{ktSxETV0*zs6Sk*R;L(MJ+~w(`K6@b{m+n4f4Ya~J77 z=-DX7@4EJtsgyy%ULB2rops7rXXQmnmPc$io+%)ZG+e*V1W+iR0yhe#88VCQ{7g59 zFIlAoL-i<6p4Pqr&0(}tU>!zsPq=~(Zv2!2Sj`qdnIMx_QlJlgK+86gn-EbChPp-C z0q#t?)*PU29RlRsGhHB#mgDvX04$HWJF*8nQVX33Gq=kX7`}MSdCU2x2o%bk5V@oX zN=ft7?>rHF0?oi*j}&R=K`gU&v75kAFgkILH0W0O;n78g&&X+7t0F2d!*68oQ%;xq znM1`7-+vm~moNvK$Gg1E2vMHSAsrK+C+K+rMae6t_aR&EB}NK=b1i$Se>?G{hBI2J zeUrZ9-KVhEqQfk*bH1czwE8Q?2-uUb+mEn)%zhBMcY|HNA9x(Pbtk9x2>X>#TE{V& zX3PgkuUQM}$4#e!BP+IoTFyR(qqi zN=s)Xf<9g~)jzwKn_2BzvAO3#P<)Fqg5bno@X^V`2XW&{k@j`e<5ZX*vbP9tn6 zG#_Y2&B^S0eLS6gwzjDe2+tQ=sPx#TC+y{Mb9c+=-#q;m%hnI`m|xo<(DYv%*~%Vq zC7D+B#kF8T(mpDUf~d9d@gGbgjUCl5lf}0Y1iQoIzHew1ZVTk9#(N&Z zTON5kjq_L9M#wausjMs8(&l%y9r2eZqb4TE`0pv)-89DN(pgoz5KR3aHJ;-()}SA=<_IS~p6Q zmto&t>wIID4ec$`BX|3G?*WY>h<%P74e!4XDU0=QD*~jxyptIZ)XtO(&~ELQvbjZa zj~);X%|+>NHe8MeH*TDqOFP?zIwFangjMa-&pc41+NfCe&OyCR5CsapPz1yJCIOm&mf1NGLnyqHnGX zyX)J?+yGv$?k1@_t6(_K-DmE9zUd=uMo({vV45CQctvw#C9Dp;n`*!GZ=UG?7Avvg zpdGSIz*{eaw_2+eu1i{k*OB!XR;r`TspvtM|=bbuDdoEhzkA^PNsZmfj&1>d=s6gdn zpL*nR%cj29*1;clnw6_?jc=6c9$zo`a+U~fA!1_2?$w5_vx70fr3l5iWO!dNZ~AEx zLderGXo=%0x{0N1{Pjb#7VncMClwB5;>WE<Qg?XEwtIf>HvL=zhGl!f{(<<3OoYmQxI8+>&729?sy zMxXGS2^66P+Psf9n3WTGPGCH^dxYZq{x>y%f0b71X85HH2Hu9!+)9(0OF8OH*s7RjdN^mi0ASK`7=CpQs5`B zCk=r7Y_azWkh}W9@I$k%MfnB*2J7{h?R@|#_&tc~M5KfQ5pv@6Ek(J=;F!h#>>?5_>GGVEWq+3i%8c!(k>#^>*qv(k+v+JceV-(% zo;_!xT1VGS@?fXX#c9L^Ei@i+ODusKAv|S^6SjdrQ&=({vQNW&y^=Js`cw8HeCrA2 z??ie9;n{(>n!ej%z0Ta~Uj_T4#}ox+W+cBhA0g+ccp%3ieeLN*>O=^H5Ty zk(l~=sS*I)vEt`1sLxJyxp*>~w3)Awe^SK^Vx$_%#q=2g)au$Nwm2)iQV|JvA01f= z*38~CQz(do3OvCbj};eDdni?rtmZdHPAQ6HrlOCjCSj_YmLgR|?551Qcuj17l8h2< zzO12L9f@V%G%vkgc-Ae^xH`Z|@Ki_cg~#gwS^CRxYMLIpnYUX=eRWHC3g@1>k1}i_ zpJx_=O2|sB-v7;3_@XgQhR%9;i)_j})n3AiGnyKm-fCHaSJy3|F4E0BZN8>{M8W=h znAcB>$YzI^>9E`xz0P3Y6+S3|vg4p4w#irwaAmx(YA|^~Ki@^5^iW{(!T=C64Nrg++~-V*HlP zdkBoVzD;L3Ip(!NZ$hPToPe?rNNuYxn3TIceDe3%PbDZAYW5c9?M5y7DgQo$xNfdm z+HYwPUs+dY&~ zA5`(Ai#>kx>n69-;8r83ec|hfi#ge57qlHe-oZD-o#QzVX@+99G%jAmrdk2E+2z+Y zdQcO)zNm~<&e(2vv9;xL^1x*(VM3c|b^Y?W!o7m9d*Yro=f9BbY) zK)svjyifVZotwpSo+3Jo`Vmu7{SkTToJJ)`i--!U5X529CH>OeU3yXFy;QCkN%WGK z!;XN95n64$;Cr$9Hwc3#Lew3e?(dJ)Mt=MKAygKpfbn^%67TBKJo5crim^QzZd1Fsx9bRU)buJXB@yv_%`c$klEayVo6A4fvKH&iZvq4^hr|YMA-;EwdsF$n&O@ta#5RPgw{b ze8byo;(36YR%kFD=~?`}$*@|p6krxqtX*%iqQ`zU+Xw7)XkdAXooEc5hB-(dVMAv5@tFzh=g8be3#_bN)jA-{Rz>LJD*o{=(}2aNEcxhc z2pZ}A!8eh|s0u=#J>%5zU`*8MT0pOA=&rE%eQ-iGEI>H@0+3_9wZYW9j1#o2q0q~t z)V^=4@YR88F}ETv=Y`uiF9f<*ILe$D6AI~62m&2>NbfzQ%csDI8Gk70 zAgJg~KuvcmaCp;PQ|0U!x|y?eC&`>uu17w_MMeE8G0%rxkajs5%1dv1f_uIlqWK^zQ< zm3XrEPR6pfRHqkXqlPOP{_yYomY#L-kWC&J<~u8Mi%t1)y7}&e#flw5GJk1zkHoEJcXudQ%Qy}HTl~?WZ8AsESdvGIlK0fH= z@tayacofut&lU`gRMQ6Q_+lsA{KKf`@}QY?Ei&<^lf~Fx4Wz=R<7DE~_e5hV`yA-O z7-2RT4X)QS>Fmn)OZ5$H57c9Cwzhn$^=U{go2om45zK;iij{}qo|W||FD%DCX_Tx} z51h%#lsz}#4bfBlPJReoa@>-!6D$tP%3fCP+%nfVZhIMk29b2 zqdtN|={@OWZ|?rR7$4-_GXy4Z9J9Kfv60vjIz??|!gR;61x~kBj9C@6^jNC>p#$I! zxsV&|~;!drqR^hv%xVt6l!-y*_@SZ`ouq0@?23qhR)i^rz7-1F?0v8A5Fe< zNfVoaROQ=@BQNbsG$>>D``^Cvsp+x>NbxKEOyj?SA0GR|&pxrv-Bd}Y$;pjFnN8{O zH{ihH@^%WuYi|hQTvvPI`%DeUI{4j%w)yfY>u4Erdyk_uQBX>=5$Lxn^y7bsvlsJAV ztECa~Ym-aV^Wu9;%d)Q0t9=?0SpajN{r6#H#xeKWGWv`A4=4H$sgzeO26VHXDOWLZ zTbP{@U1RF&Caqr?PK=LvDO#!<*}M;`3Ge>;jpsxd5+b7NpW!z7Hn;54=6eyl>sk7~ zok~(Vc0(Lw;@yvg(-wgSd^G%9BD;@ze(?24j;Uxm6H1 zFkHYS5v~D%#HT2~R-14j77f8P)qmMaq(Ir#Y6VO}tI9ylDgZQ^%g^}&^bX>W2R*;d ziw(x)@=U#tJPdGNqr36YzwV6>jX4y|T`$}@19a``#wSi~(?py6-rSaFJCS>e%a?@Xytbm8XcknWqEpr#57FenM#kL|<@SEDi#f4ljt`m)JBuKh zcCX_(AVmhPxVd&)q57;$z5xg&=b}@`FyCW|U8?t?m}nj{wPv~&DcQSvw2Jcc>G>Py za%jB@-iZqq3$bc1D#TxjahyURYDhdMnIXiX{}UX^cYrLOk*b;@|Dr_bq2yHlKG0cf zWi9e(YO)UUyl{TKpF2M_?#isvV}kS*7_-2uF;xc{UWycTkJ~n_#cG=-uKY{sl?}`;mTOY`+A;xE87fv7SHvB6}>FuX|rKhMBni5;USQ^R`@6tPx6? z>f&Mx*XDyAXHKGDAQ9(LDQcRL;*Xd3b2`K^?%Cz__x{^B^iRZgsw$5g%floem^Bz- zl&h}?=lx8c+;K#1QZdg}qHooo{wvAlIN-Xj5}58shzQVUO=#@peOvF9r?oFS@K`nfUck7T)8)xKew{+`X7yiW9~}9znK%x_srdIdm3gQYs-C|C(ml$SppSm zT~wgwiWNWj`X@tJNAk=mu;~a@7(+Zvbh>bE3C=U-vnaFHuRNnKK)g_Txl==FEi5OM zf0xX$Au}}#x!$e%MLwGFud_1+a@Qg1uWmSMqs6Wxdcs4CpjKi{F=n`nYSwlS5-+kb zL~ISs>d0v&fJ-j!pTq=KQg5ay&zbn0>BWdt`K0_h7~C^v8d0tzOrazoPn+K2>)Eg| zq%~Yx2nmod{VbWgWkt%@Vc7{?C4kD>`EPvTH0@SAVnJ1ZU39&i5_aYG(08(`u}Y&E z_o{Oi*$Y%&*%da9%ZQK6Z%N4Lvagv{OUX}szG2W*6!FM3n`<1eyya!Tz0s#@#-yw}BJbKyK_2akMB5x)`n8;!w z{=580>HvE0EV0(UU-+C;wRJL7=1gsbT1zvB#wT*;lEn;BTuNmfS^}4rcUMUl(bt zBDn|VYnIwv4im5A25s$_aTt7(WSRP(0|5g`!#(ly_@+wk?_6st+G!ksu+)(1Iwx1HF;^&b-EWk|40#^xW`X zn$j5?h-GnOK)>Bp`1LM}9#0~1Hz+3h@!XjzPjNz_4gJ=Nefrg3lD|I{-; zSLbhCHqo3}3D>!CzoF)mLXtpYZPR7yEGC&e*U;`@mVay0xx(tsI)VO=fpyo2i~GPt zGya2$dlpb18=H>P;<*sMp2*Vh^+@s#LJ+i{&f&c+h0s4R>?m~f%bnnPHgPmYnbGfM z6@Sg6&@}(_IY@)s^@9vvR$$c8<1xoNiPv9vnb z8uPws(jQ+%Z4ArqKf86?c}+Y9F1r2HdbR;U&%HzioOwU)%chdWv zj}*z?bkxvc9}Qvd62qB)N+q4nqjJ63GA>vba*{ZI@rwJR!J}b^d}!)&(!A<4$$3xD zdS`Vxr?pO4Qrko(Rd1dD(P=2dYV$z!0BNn1bJTPQuO~*Pe&2B(WbnnqNKlV<$|-le zJnVAS=*fonQ{+_h$n4fp<_5jNYa?mnZnEO?-};;o+zT6v4ce)VBX;Ak7 zYvW5Bzs>hXiWFj0tL_?&nsg4%J5zk0ud_AbzxiZDM`T5JHV+*pl)@rQ^O??Z2Dd33 z8_+vkX(e^?!ouI2GSrPbmM2Uodn5YQb}EVgh)nKYvM5+dc+c*x_3;R$K7-W&iFXqb zC&oP5^w^dQjhce`v;;Z=F-G^765Er_LBm4>TGU8eKBUEtwd-ul4TicfS0R_&bhyO2 zaRS2;#U?NP#D=U+H2H!w;&~S4visn2`$hz^Z7#7>NY)g`9WTW zXK%_~3;gcO>b05~0{L)W{nN;Zni@||eFYfR{Zy!AJ5&hu-pGF%&t@5!_!RZ5JT@^$ z7n3>VTuhhOerhO+zarg7jJu5C*;uC*cy!cfE6UO<(Yxa2n00|Hn@HG(F%|8l8E(s^ zHo8li%z2wP!OpztQ%>Tg7DXy8wNIO6W$@Ac8Ge`N4=u(2Q@p7o0YN0AfMpbe5urMv zjwQ|GF3NJ8^RoE;4ws zK+gUQ^leIbn^?WMX1?BE?67DHysJ$eghMzUIhFDp0SdunQlJtG^R7$`d_n8ht58?V zt7uSdqg4;ehXhM#t~96XExb39n!Nb zR&<~o=O>xx;YdD4aU0h>H9+}!KW3+}H7dVQ--XZM+9TySc2~`B8=S`0;MDTHzd-O_ z*=N!@znMxT`)58{oapxnb9~GZ3_(X>HKr0^-2A%^6n3rTswL0v*Kf~hoWIanyaM3D z_jwBLQNm9IV@s~4&LB_Kv+!uzv1yZO!3Avz{GtmD$TT_uvi)aPRW!Sia*i%|BY99> z%4&%*Z~o;(N_V#jSga_;uAF`o*}3UIu?m+im)(CG?1Ee4lLedvSr5xik)gB>AATJR zO*A>>-SOa%8cfbPlJ)n+T=spR$@|m}mGn~0GNDHQINorN5ILST9$S{*^syk^?n={< zsf9fn49kH3l{|v|n^{H>z}k)A_OhWf-@<2v#f^U<+}E9~OAOVETiWi47DF->Uag6m zcW4xdwpvjW7Js^)RcbiLkvtX3ZBE6IP>o@(Ky=ZFI>PfeB~P@jO3VMWh_g>&m&3nznmxUbQEPl zVxikF-=xFh`rs&ohW<>ajA6wjP%>u{!Uhc|fQ8|d-G2yJ z@63R{r9RHQA4^o_^S|T>ZU+1vZ$P5jg;ytv^fW{7b9yA&@P=VPn$vYdPGLlMSt~@- zu$uTBgdoWgPa&usPPeU5PDS1vQt-7IKMrHmmBeSIEk8$$#Ck;OqDYajU^mw`Cp6!? zvnF2)pEE@b*P-S?t-3JV*h+PY@h3~&zOq$>Q1p@|5p8KRij6C6Dnh?5Y=5Twax+(S z_Fh#NJtC`$8Mj05ZXm{N+_6)_$KJd<7&@%wA@QOnQW==jA}Rfqy{bPn26_T5sP0f> zds4{V8LX+#u%<|Gy|~NM^v;+X-DLj#9!XB6ktVKh)Jvl74Pl5=qf2>)04Djc;jUuS zt;)vi*X)S(ZPGnAc^W-RIdB$xK;ZT*dd+6-}Ogb|*phMs-oC=Oe|2a$|Fg1dkI zZwNl2+YyKNM~wJK^`6M@lqQ%ru}`bk15yiv(1qr5ZE}O(%#AB!1|Ld#oL(gJX`iLf zL*~2J_~N_N1ll@dR&m<)w;2a-HTsFlfA4;ET(p_{@Yk z?pNVIds8w#9{!sD6+j9?2@+o|o6uw#>0Rw#evk;ig|@vA5?%S5_3mP1ycO$Dx;MIG zPer@udx;C(3r~Oq0a^wVQH_(7)exQ6LI4gnPbv({(AD&Vg!~Fit5_#!$S;`n@XRR% z{82-|M?3^<-rsvE38HvQWe!<-6ShFR>hPcPD$nO#9?z!!4|E{JbSCCd0>sPk8}*)9 zQli~Pw{1&RjVG_r*`!E~mRZ3EF;Z7H-|ez4CNLpBA&@9Gsp7H4aoG?UgSh9U0{{9G zgrC@%vdH$Ri^gAEG&1?FZ^^S?6v$bE$hNfhoJ5aWmcJN>MPqd{of1wW1fh>{I}CDM zGPH4I0z&b`FBNf$O+1-H|DMV!H3AcS51L1I8{JjO6A zHELq+Qe0CAI>*aPK@j3(gGF(_8b^R+zH_my30ZQI4RaYA(c~vcsg>@4(nXT~y6})Z z)wbDtJMk`OBEmnz0dnfnj|CeFmUy^r{$uo4fyC>3^Cv%lc%RqDylA;oq+nLsb9LCC zj^jX$nyEy%#W4E)c~f(@ z#$>V-n^)dBXu7OtPnP9x-p{IXerL(ch~6yCyZTM(4z|!Yi;@PAHmimsQ>ipz-;46&c2TdW99 zZg7}e4irIim@*;Fcr+7wPXQC9dK@Y7rml8mLGnN3l%SWB?XdbMPhSyu2m?@>$n*Ck zQ2#k$y!Z#X>nZO)kP2$WA6rfUMwmKY27Z~)c0-N=4iO+J+e=kI!6N~X9CXkWuP~q? zNHOT%1N>=7axf$+ylaDv^a4?&yUU)AI?6YNzWC^t@s!=>SEtT-sF3@9J6*{}3kLhwL>+*FT1Zn0^k?JSf|A zrzKNZWN4?v=7aQVE7q>JUF}`hzDp6B8vBeQ^`B)KTa?7Ldv+I%1y|e?=AJeBZerZ1 z%;Ay#si*84Ww&@7PsE_rk}eJ#2%s^95pG@5B0p-~N#sh>^Y`Vy zO+Z^C-Vh=9R*1G%nynNX+2ynNs~fxVC9Vf)*mE96*nQ`R4$l!+)3Yq)y98PdAs7sU zw|5geaQnAyX zJ3M&!z)|&L`U(4NjLCo40FEsO6*RMD{Q`|pZG4g>&cj5HUCtyUlIMH>SR+@*eJ2du zP>s6`ZGmF|gUUem=k#}IR~glX?ahy8dy=-?ZYTF^G1FKLSGMNw$G_)siT z&Yu?5s$Dd?9^B0X{Fy}F_YZ2{dPtdrL#6}W%(%#c>8DcsqB>C|r?+rW{saClmJiX97?lXl$jC3te z>|yj`a-Vr1AvUE!2UpS7b#GJw{}?qj5Dl&4?AB-rIb5rY#Y={a;!DxM+vl z^nolC5zpJJG0Z|mU?kc-bht_*Rg}9KoBu4M4;f&VJG+1qH=!04Kja*={J+MDYLYlR z&3(8yg6waD{mq5EI}ZCcm!Ldd%ipyrS;e~{aszN;wX#Y7kVmLrZArPPr7o+C9HWNJ zvX1R;D`fZFwj?pF5<#%;)=x;fG^h04PE@}kvxzv(PisFs-cBWL`OWtk-M!t|?+X7v z%)JFvR$aR`stA%2A|>4t5~6gcgrI^FB1lUpAT1Ko-6>^&h#(yg(wzd*t(1av$fM-B z7rfuz-~XTZ_a5gzW1KMxrR8R{R?T=XRa;{&Zu#Tys(R;{GB?7ML|C2cVS}c z_lu)d@tmTT1S|fxEzf!^ztD!38lTtrcGg9tvygLRta7Wr9HZhcoBoK!YyH8ZP?5Ys zq{Kia{UZ zX5?F}GhDOTmGRrxUty)E{$}?sd?EM_QTwYK*7K?5w4B_9VV;lOx{o5sjvy(jqh(&(kNxd93rTo5H82#-9pLI;c+F8uj@?QFhn0QM&ptHNL$ZfkH4>Gf_<+cq-UR>air-J*KDre!b))YqQLECmenynPO!a}c?e>D# z5u?~zDNNFPe54t0EGTY1J|fyAYGgD&H+slB=co^(&b#MbXm-EOFZ#+6N$q{hwR`#~ z_UeNSwhhY&@90@d84C<@dO`f-a!U`t1HOd^4k{B4E>51`4hj469qHU2Csprm;Sam< zd91%%H|zRT?woh_QOf9KyZ(=cb=@%A`-Z(nOqe_JnnbF@QgUNv!~ z>nE?ihd0a?3?(EBy#J+;waUF3+VEM)_uUdP;c&?p8~((SA;BA1yyqv^tiE&3{CT%4 z(6KXIO=%}TPB{70-0s)t+=H%;?DVX0@mp6fi?%1|H6Qi&*fWLt^1q{xPkFj%o+Uoh zEQq0qKqkD{T?&jy#<__}-Gbvp+CR3`yX|#xl74k&<6T5NsTP$~j)LWCzGsv*I?tlK>pd?&Cw-3n7X2CK>iG zq`n_v8omPHdjA!#N1B$X$N|A;Wr;6rXi6CQxEe7>k2cv;^Dqxqv8*qNi@*3VcjQkY zUr@W9fwwG`bg=v6b8nUeEn3xAI_{rZX-coehikQBTa^^`c;M@7eWJjBZ4z*9&EM56 zG!ZAZ?nr-wdgMr!wl1T!X7i3eMcA$evA?_6g|%4d9+wt}M(ao=sX{VPj7KN^B7afH za47+`V0T^1ak}N;hiJ#O$uSiRinUMe*Tq&RDiY-g?LRx;j1|w;9K%6h*{Jj4j!lWP zHNX)b3()s>id%O69h0B0-{lfI?)7dJ?bF|H%gCh+2chTV6nqU|{65$A zQz$L%ZK_R|3-3-!WVp-fi`went*&)lg|mm|Mp$O+bXnH-=1b-l&{9D)m}U07Z+fzG zME5G~Ni*-vo$F^U>M`5X*Tb#Uy!e0B8>vWn(i=IJm-uoWfb_iHHFC=gJT1wxv%P5t z&FCb_hhheqPu*Dr8PL&jtT6^Uzur;NhKT<71Q7JBtpY!Vzn%=*A zzd(=GWy-VU_0=`+M8@S$b+z>rk{IT+j;qtE>+keYGW;_MFI~DsI){`QW%G#+AIH|L zYDgSK?bPK24r!r3{6Z=|`vKEzyfu8|Ds$5fi$ARMtHw49@kiL2)~T5waj~TSl-A@v zUh~49yI|>TVXTIMl1#oY^bb06%kHm+eWS^c(|*=wBFa7lev4V$_^Nn(e3rH-0R0cK@gsnFNaH z^-mU;zK_!Rj0@-g1e8e;#n6eamKFbL5A0=Kie1-zPvYI&K@Y1siHZ`6SZ8`5Nq0v( zD7|{Ud$GCo4!`1OLfU>qK8H@;RHrq6A1bK_Pin#qWAHY|Uhr_48q0sZ7_6b{0UrrByzsjY*OUHUQETW1w@*-SN$3_EMS zW^_L3e*f!@+4dLfq`Zi$beFtdiK&_&3(HyMU~!_gj42ORnCae0Gj5pd6+LitxD@l_ z4E|L9o^_V<`kALo7~~9xcPh7&3WNv8jo2+;AAG1fGYx7AoyPyR1k zp@vs@Y_E@hbf@QFnTCY~;1k4!lRQ1XH==v5XiAO0_-98z28Yzs2i2Im?{sV`hXiX0 z_)UzKV!oNS?$8f97m<(j9p9*!n%1*!SpUq>-1Hj{)l`eVt2qQykJ#9CSGGQPHQ%UL z{~B315FWR_Y9q(vc1Ke39sZAA(6xGRbcW*g+*h)eyUT0!2P`MF(8QxIeZAk20PX$J zM%rhJLmnrWT=w5_*6(0Ypu3ENYGAI!dW{*$?mIVV)g2l9n%2DfWZGus64rM*Tr_v6 z*8>)@p=s9Dnh^sdVrp#LYR6>3-|h>NR>3x!4q{`F$$6fJ<35(Wi{CO`%53NkY3D~_ zy{%zOnsF(`Qf6XK^!2KDUIb#{R=(izjq4SMzWpf=mJ?yQu)u(3h!t8aFJ8lXUxT#W zuF3p)`e!M7>=*hkdU#zbZ&kBq&-}fJ?h}ql3;*EYlS!S|$f(REAuedL#zDrD3NSk_ zVHyU6U~ZADaV4};{`({dM9ao((lfP5?vYQ}p(*N>n2 zc6hGZ0C_Xachyop4}QY((v{h1Q* z#-ep5!6?j&8pxo-vxQEr*`>c{px5YJz}NrrrPA3GV6z0kHd5q5~Gi)L& zbc~;~6}z-{IJQCXZv$Ira9LoiZ$I<5a6uw89aDlC<_$0~rdDPcL;g{&c7gh*hgE`O zJ1_Fn8*7Lt6%lLwA3yM^w^168vbSSkD9Km@rhHbaf-bPUY(`eun z@4*JoE5KPJ1CW(alm_AYdNSC)!=ft}2amYZZ#HHGvy=e}*IqpvmHKLDmNRn^0NL)M zh(@03Tn2y22w6FS#V{llh*OaPaO^v`)|xP!XW6I6heF!SmzmJo!;G?u+A+e zNIDr787q8LiRW~?XYetZV=~dIm%PmhV0?;%pZk8=fw|1z%uRoU;R{=HggDFn4>hB4 zO=guCIGj~xBt6U9$YE^G6t1JB0P}mK+OCg`$3>vf>%u4Facs|#jSEX>u?~uXlQ`Y_ z(t*O8ikgJ`?3eSoZ|I-Ssm_dFP(?;82;lSlXS(&^^L&tVcCgp;T0D_;wVji(Q3>T? zVpceVI7eM)9oRsl=&<)RyBgQ^b*)gRXV6qeouCM3K#80|Q*s%c0VX@FG&`5(|A&u? ztjbN*;$%heXKj$#G?>=HWWU6~?!SMgFNyU!vV8;>DDgjD0s&TO=uvS1EVC~pIxk~O zyzmH8hp?DVhL-8u#gl^Jn#8~@E|@2L^cyY`6>hTusx91cXYp(C(cZFZfk92CUYS)o zMH#?9{Y9WAfCW1WhFIEv4ds4vgo&5*!5mGc9^pBH9GD^{qaPyl-~Z5FJZXc2jO0_3 ztC1SQNBr5n@Zi2WAl~&bMYykI2=C%e%p*_?S$6AdZgikjE}erjZ0S?AVa8=i0f99* z6xQ?|*Nh&Z2IMHU6v0~~4uG57UOw@3S@$Q5ZJV`*QKkP}i9QVkM4IWy_9XrH`=X2M zX--~@&7yW$4qi;@jF+PI5~iFnVssP=y^M!wZNi8E(1-&r!nsG+;i@1brjbtQLZojq z1l}yELXW28uY=LQlXXuH-}ycy%k%)+P(g` zuRfZd7lUCKnadSD>K5pYGyn2Rq~FgCZkF0BZZ7SU;q`d@fwoY%Mh1rs!>*(Nrs)u@ z%DvBiIzfFpyxxQ73PZFE`Cw4n$}C2a(OEg9#x*osY8QGAEo^Xh*8R(?8#U^|ug*p} zlZ@Yar)YVMZl8n>!6kr&vk)^DxfMM@o^UO-}0bg!K`G7dIjB#oui1T@qeQ<<|TCw}I#c`ylVaG0p?txZ6B`K>@2 zO1hGv<9Lob`XxZm#x;7Jm-}uBFV%s$WO`(<8eVFHn7LnVx-#V1{_d#?q2xvdBxE-7AwyXW|viZy{%; zd(Nm4_9Lye_`&EE3U~wl&2GXmDgI?*!8QbzISwG5&;ke;zz;+^s(nYrK{{}`CzZ;C zKUHb?pgNN!+{+25t~KA$f>x6SKT`E5L<(o$3Bx91-AhQmdNR-6<+}u*=zRGUl;hY|gjmISK!_Qi9;hF!Ot+W}tc4+bW zGoXST(8{IvDLWk8>S8vddxKkt;sdC%VJza0Ia0NyU9u6 z*-4I=DbWd_ey*mgNOc_L?C?A~SIls#cKrGIAYUoH0=`P*!^A=vcp(q69%E{IDF9V; zLm-S!s_)4lfY?eSTtA2|#|7*yuJ_GF*4gJ%~;} zUBq*-@46sSuE=#qv|cH2E`V~^KXGhQ&;5I7&U8bjERL=FI}6BNy>B zi5?_rDX&J}2Iz(yz>J(eMntmS`|R^N=uWs>=jo9Hz?BF-(lv0XllhE%D7_A-Z3)+--BRNe}4MC^TS`TzBywEPxN-v=uZ#vd?~st9;@xqJMd` zf;ZNj27WdIHU{1~QdI1`*HdaaP^>3h4!XV{@bssAoo6I96P*|PWC%qUU#oR&iE5m< z->K}#{r(XgJjRs_W;RC5d&ok;+xZpQdll8@qnztIdBWJIH$#LHD;7fg<^Xurs3R6q z@cNFJ8JN}lH8@^o?GbW;F71=uw|}fkJ`Rta0nj=8?e#h`3GZ>Kcm&Qy0#`n&@p9I> zB>fM5|9w8o;ynG6g=+JtfhmNA{>FYVhd@4kVdW`W$w2L3PDDqXN`olZcM(LtAkr|B zZd3)*K!9Y*-@KSi2{5l}!9tKb5S!CL)Q-$u{2I(*hUU87@}TO0JE&f2M*MbPOl(l8{} zH7O|i!V<-eILp81xU*!7ADg+AU0N%_Y%z^KaW+^sbRIXr&BhOSMYbRVhk#v;VQO}K z>CCm<_LS3In10`g0W0Tx35THK-NOm0>e-1$U-^vno;&^E34Ft62E=RDAx_F;xdxls z-dFsc7#1q$MA7S^&AA$xyJZg(U96w(FX zR^dOr0Tyq;72z2B8f$vt4mb*m%86!yaKSH=cRcPiI+kyW&k9gQBvo7F|1KOuqT;4i z4-|y5XPXv%PDEAaQE^N=h$^#OmyaB1rVAwImL>38vPyqN-n0=B;!D2)l2%maW)@TS^6$28m0uhuXrf!@N z<70Vnm;%&KU025eqnEIcSV&gqOq4DbJk0=k+K&BtZ(3uG6m@zq7$7?0ckCUP-@?+u zV|cm<5f7X@=4)KjQE&j4>V?G9+lX>*>W7oVlY2(>lfD4*hUXr zm!~B9dhp25*Ic8`*TDg?{Jx=OgV^u^vh)i^$F9akrrkIvXi9C}8>2Mv97Ol@;*pYq za@S%4Jtskt{d@h!|AzP))Msj%tvVjM_Oa{RGbf7TOk5t5EGP<`nc~Gh_$oO$DrK3K zjGKkUeNq1?3L9Z=2r*tWAc_K0fD4i*piNK0yt&y@$?qJK``$*5GKeEz#K5PkYzk|P zeq+|a2oe{7BjEFPREk8}Ecy+C{tuzXl%oy)V zEaF_fsW)bfSA;+pK#=SSVHG0vAN?}HI)JWqIE{ESN-X&h5hOsY17bu#m@E;$1iwsp zwa}=^biCAZkLhXU0{Ivqy$u*d?au?v5J#l=RuI`#sikJ+cOU$`xhsI|qa+h=W)7E+ zC}?c!(P!{;(ppOlM>*NSjgM8Dj2M-&UnVJU3q4tBjrO_#66CGH3AEJ&j!J@zAo(UB%Ysb2{nV6uWM$VmvR%Ik*)pgPG$yj3X-AQxlm{($6JfLf>`>5$7t3c#gDUt zmwvsiy*+MOT@`sqx_)G|+<7X01K{RkhD3vTFEv0+-6SkM82ppEujoImNz=Jp% zv&e`?!)O#m5fr#pAn9WYj{Q5k^?=T~7&b&N#;i4C>eH-VKbxkHk$4l(q1l1E=U?d1 z5Sr+TgGBR#Z)F%((<&G%U5rEv|8lhFOJM?(T|M;*@TR9e)qprL%j@{4c(avy*(3rL zS)HOI6Lu2AE4Gitoq==LE8U?1&1&AWEVz#8iYO>c52U2AS9Nsa}{wdYObo!`p<$+h8SF{?k=v zR!R!bO@&s!b{H3r)qL<>{bz*OT04hEc`y`6R6~~Do`kDcMvJ;nG52m*<3aZAa_U3I zXT@H4GRaQ!CpLRV(oogN6W-H_7_)01O4Q-c`pD7E&Cf{&*%<;=~N>VVQ@ zt))hJ7EuC<-VqAqQQ+=-7wX=71e(8o5ichCP?NYjj|G9Jg9W%<#w&rmo)#YH<#?a8 zc(A|oNLq_Nn*01iqp`zVA=v{jh#e7V|St=574Rqa+BvVOYGxg zb4c`41xXpxd0COH7TFq`dhF#o$)<_e7%{W$MV9f$F`_<}sXh75tC#Oip&%{p@F`lHtiyo%qw7 zLJdkh#6;k}AW1QB8N87p29H$zg+GP{V^k5SUiqM(s=>%&Rdae{T(6c6uT1Bj_5>;y&&0;$HodwD_)qvOPV2RXNz`t0;(;4brDGl(hgS3*;0*}({473o;jr($c1L-8k8ZGEdd<->?zGb1iN)$VO2A7dE0Lge-^i-bX)m zYp_+Fd5-p&xOMyGlN(UP;PjO8?-ze;_f<_Dj~ z`#*i!ILIj61C{#jL5@=5L~F5RBs)-vi4lo}-0B>TSpNHW7h7g)0^pL7+Y%Kmz+3zk)r_FndyuRz497}WAMLp+h-*8tBfo`r2H8{1@NWn;=?tv13~pI z+5VzDGfigYSrbqX?9;?0%9rWFe(2oXO7%F3le7vKAagN-i>4@S8P2E2R*t8BEKM>YzVA3kB^@Oc8(P(1OBV@sbcf| zMQ-{H>x8HJpY+ORCvKL9{YxzWC7-ekxBsbo^5lRU!x-1jga|Ifb2x!_7TuTOmn~)p zSs^l%RQiut1vd8Ie^`}Ze>$8vc!Mx6%D^9)J||NsV*skvX$^5d4Z`uU4Wt%!jaBS_ zwN*=;?EfE21mI#~i`V5r$lT6XOMDVR;3VWRi^BOWjuy{vAm?X7rJ%f$fn>74KSoeu z1D}X+N5c6LBj@LHZ5c^5AOO^eQqFn7$+J)m)%!}9Ij<*3?a)i3(0>V@Gw^k$5(V5a zty%2V6Aee3KN?3}5$dYDsK~^rq8qS@V^D#}pk;p#dD;)+|6wy0%EVi5XrdzRf3m!` zga@igYbWfm_N%LYc31r|3+0?Om4D+W$+FW3MY;+U;m|^hMn=sq-XsUpL9!zR;JlD6 z$vcrzYX&t+Ram2rGd}eKL?upVgU1;6+0AJif4H7jZ_z8Bib#em733+&g*XM0aSV{% z=EHzzLG+`0VsD0}llT%q#}uuQ3wA=>`wQb@iQKM6x2WHjO(gvko?=&L+=8czvU>wi z;*%wYlhH%DwrpiqLoraYJN9z_dG@M8bj>Dy$Q2wP%6f0prjJ2*cx(hH&L60J5)ume z+fk1b$goJvqF*zG_~0r)`jM>Z*(F`9h=a4r7)&wp5%n?()BC;JI!`p}(gzE2v(5K% z9b5+#Q&3N{iuZ~@SQD7`ipGK^mUE5uZ9Zl5> z+ZGj%>8n27H@kis2HOV8MYKI^HZh%~>TB)0SZ7 z3*-8qxLp=HHC1mDZ9Q@E#~iQ_35&_>@+N{An_`^6uO*Rcenv6e``jncT%Kq;^#cQKSXBlO%@vYA`Gq{ zZ@uUZB%oqKNO)~Gf%&6dj!1%qTIeZKYSfn47{rp=TV!4u{*>|pYIViHlBpUsYbhI)E2#4+Qn@+;;B10N2dz}%6<-^*rRq^)+b z^)BvC&Xxz4hUqj=#BTxjc&2FH7JAHr{x8rvX93E)@ zG$T!P2Ql8A40Yn_9?5;18ZMZijQFT&aWX4DAFJ`}VS|m5wNgoxTRv>tQW*!gL$=3$Xmv88&uyk?>Wc-G7kk96VAW52Au#`+*ivgwDMD* z|EM8fYv`se=AsS&u_=7iDs_>rRss;kYO`+u6On{YeZ$&RRTpqzr9BhwJ*;Vy6V;D& z50f@R3y(l0)D7O4CR$C6RR0Z)|u)c$8kb4F@vb8JqFc zWT`(3cNp7#MFl&!4KD!9jwJsg<^x@3rRy+iAOCmzXKu+FAme4yGfjU4HkSD>TT^Z4 zuK7~~P-oB( zu}=3?2ES31yDT{)l}Y)NoZ&e$6y3K`K{KY>HtWSgU1$@2hjuqBZ<6*Z|I6|*m~N`-g?~{ z^Yn+uM}^`Nna~Q$wd!s4>?^I0tC!}~p{n_b=2<(VIyoH!9fWmO^a~{YZ7I|4>k=Z{ z-2$=2Q*$a>fcGOTIu|Aov-gtrv4<;zH+BLbwat#+3m_K2i-v8THvK@;eW{)(2I~Df zKYG%Z?Y=0WG$?F|G;VBW_WrlTTxDk3`Y-MN66PoVU}7>5R0nmx0t6?^ieC0_do**J z366&^Rq2{v^1%l~_HKRuM?>$Fh%AP=<%NUnMHyF`ca{cz=kFZ=wYe!!0%%S&1rj9! zfQP~p=E>$%5r-J_N~Y2l;BQZGlx)0@RN!#q5Jrj>P+ZDwm2wz@q@c%_ms|hnq$i_4 zr|V1Bygfh=Y<{M>7~CZb(s?8I?fc9OD6Y7Ta9L7z^*HFP5zY~JCg%B>lbx>8sUg(E zN~X5rcVmu@h;46ob@`K|b#Lo0X( zfIEFM|FQfmsVp?P1ZSWKLak++pQN(l@>f}!C)x-AKgW71(|tN*9x8^qO439*kX6vm01ON1hEHhIg2Sb#V!XJDfTr)+oRImGt0 zHYBvX2;UhnE`PeIPklJ}^g+L*^F(0Hgbe6H5Rz|m1GsugQz4YyqB4|ni`cd=EULq6 zM-1GTjFmHpqz(%8_{fuq2NC?~28_}*-F2Nu7x6Vcw>sDn0&gczfUP9uudw*I3KsDe zaR9b`CVJF_NGWqf!0|!px$7!~;f7#~VzWQi<#P||}0r&t4_~j+G>0)&^GP z7BnrmTFlFRw;i@=(_?TQ4cwGx55rnZ+MX^G=nr@JOn69@W!GDzs`=C64bt6~^2$Dv zovm*-k{9d~UmiO+^Bl6=K^mJ~y(@mO! zs6gE^SLS-^7;GQ9Ukwvyp)B`%nSA@^>qj_nDoPUD2p^&qqy7_uLq5X&H9nDBudb84 zICgpkm1=zuD=Ug`F4eSt3LiQGXyk>4p6aUoXh;xS3SrgZl(xpf@zwujZT7K{gul7kp0b z)b9)uM>2?MN!Dbu!5-#%aI5V1$r0t)aI=Lfm=?XxBy{1oofa1XdhgV+-$8X3?r5}4 zju=aFN)zlm^oS^ZlFFX2_X62nup&`QZIZ|ES7cC`F(_$prMlPcj{Z!(W+wtWvbKrs z(d?oZsX^RObmoTN9^w@l`dp@Po^sMITc`W^RxfpUDX8~6lRXeH`j{jVQJa&oom_Rr zbhjbNhU<`&%el*1x%f78u-l_~x>3y^S_5#m-rWqdr^hlYj}Adx0L_O|+xWg`4omq~ zE};$jHCA`ejJG}AFLV*ZX>QSgVvBqAgT5_ZM$Y#KaJ7EW1PH&kC}Ar70yY0sa9}DO zes+>5(ae~70TirC6!e6n7oZiQDmF63Q$mbP;9gNAYFEem;?k#z>AMr3r+`8ZuW@71 zPiPj>HMz4Bi1TW}?!iPTwKkP}^K}%?yc{a>*$)ZK6-Zo*u{G#~^F0itc@`8Ki0uX( z@gK&&m0HH?jmXj;APt7l7Nu=_gdosZoGsPq^g|GrY9x)wSA82^n85(OU2v@-M=;3@%Z|smvz5%P%^>({Mf(VM1_*euL z2+FDByO=$*MNlO;&TjKIuNU(lL0PxQ*Yji*bzEdX*p>PjyW`rz+*tfP{s5YDY($j{ zUo4&drc-0eFXH^_*$JQ_$9%$QB)AHS>#g}$7_Pgw@OyPad*Lc{EUhy4NXnZ0VdZ`n z8i>0n)LA*;gM~j2OLVrdPr=M%iz1paIM;yQX`|Q1rK|VY>d5BYkJD670IYibUxQ=3 z96i;N>?6If&JK9_dTM}>HNzl!X+UYA@>{;ldGm3Tus&S=tv6YWGmK~U!W^L=3fwr8 z!E*Q1P!*KY2mY`Jl`IhtEBlOfQ;?i4X+d{{K}Pu*!=@8>_~WH6cm9cOHv zd(Rk&6RDf^lM3@Z;0o=I`AKl9dVA_61m6-Vjl4Dd$nY*nP-w)|tyhboYzL?C={%~7{%s#Nao=l3Ol zJ0h`1t>WZ4in1uMyg!6Njp+Ic**MDDxHqGE*^DOJj4M3n^gnJZMbPB5($k<@hev*X zZ(thklWwGz)p4ZG0YEOFijXKD9Q3bdsbjm9X9M5MSCLX{aQu7K-;o%x6v;5#`>kf+E0)WClHdJ<14E8AH0nMftLK5u%+UyU?~N zT=X-oyZQWZiWs2A$DfIOVJrS(Qt2DSU&5KdORq`pBO$BC;OF-(Wf4bab4c4TXS?|C z9QnYG&5=!y3Dw2Im(r4c`&Z-^y!l(B0BUr9)&OE%=ns{q^)sLrnPQkbH&mxu9ir!Z@N{D{Xk2FbayFFPJ z;{BkvC(DG)Ce=P;4FllsS@wpcswF>oHD`*kFY@D|WjZ#i`EZ&S&hr!8&tvw2jreQQ zn^3|IK5qIwNtJ%dzpr})9a#VIZK-lFKSA{b$NW$J!>apfIA`%^xQH)fchp32Z`l!; zO@dX<-@(Qp3b$pz0ui_Rv+PQhD^+6qYRW zS~ftg<%H&FbZ;ox*wCWs(4!Eb+1{mEi}L;gEX0#0)nQildWD8{o;53A%*?v`s@)v^ z0zGZo0aR9X^Q*U1EWWKjp+~2PgJ~$!+h-aQ{;4%Tz9E@(0NQ9FGgkdsJTb!LwE*+k znr=GBh&ydHakUhX$wi!585h)FB9@uG7H9aXdP)yDFw~H8C;9r(bGn9DaNa=)DJ1+h zmqYyjvBF1G?znv#t7RA5cPY;9Eg5p#OuqH7DaG5L|7VZN4k8X6eTIQ)H4FMtdxDd6 z@rsaUd!*;9p;kSKBsBRWaRX`z5RPM8DE2q0#x1-VlfH1;1_?3DFCH z+zp8IZ(N4hBbF7|g`Q3>2K{!mJ0n9cJ8s~<${H-RagW;*?NMexBa|ZxJJo{u7E+Gs zmg}u+=qSQ(Q{d{ha5&HSQ*_RE#12Wy{t}1qs)xU3CjCOBIL^FI;v9H_w-q*i^rGzN z9PNLS3ZOT?6TNZbTbubp&mx{APnppgPMwSG*SEsZxqKbU%Itap05W<`!S7&8RVRqr zgt3(a)RSjilJ-sHCJF|0c3{?OW~-}{G?VSP-{|AG9dQ8wxqyM^;Dcv~vo zp0BhIY{XR|u;rFn$e~<~W#6z7WJi)Q(B1GfS{>C!r88LsJg_|-#|J;9;uo>LE_JYH zs9pS-CYyetIk+|NFPKcX2$r_;RvBCIWhw!4VaDe>9S~pIH9xPgD}cPf%2ZVa0VN;9 zkkOA=@A!Z1%=x26d|n*$_A1iG=|z8QfcyHH>2#IhJ9BfX(A5X!YQIJb)=f21h41(e z_czu!)LWdhUp9Qc#7dS;2TX5%QKC3)?_wXIM%;2FEqc?_E=mrjm5d{=S3m7F?_0LbBtR;tjcR# z-q1bNphP@^*X^P{C(iY4N%3AnFmqxn^#+-s)o=B(e^au|c=5n{6`J{x5K5BT_7N3y zwoc$AU0UlB8$fy%pp9e#Z%pK8={J- z2_K!@BS&+!yHhCn%47;;Ced67xpGX|+c+eD59X{hM`cHer}s zibxvMZbsH^`I8XVTbM3c&dSVWDUx2HB#vEvg)=PLK*!_V3(`(6H2)m4VNF zi`0rDt?bA3gH7I~g6D{v{?wRS)&tUWL*J9EAIfxTqKH%?e)>c3D->i@XiWQ;8Dx*% zy*yO{@&dbgTX$l1JFuO~lexej&Uw;Ulb#dOYjYQJbuF|ZvKf&MB^|x*FC@Tm0{IP~ zP=$(UKu-OU_}{t6C&eD_$_!_wH#u&ziX(GHk8m%g+1dKC0%-6f|3ABo@W#zDJ9~YR z!&2cDofT~vsj`X2(=&A@2H$d=ZF1oFr20h-dBA;}aSKGMF1E{`27uwUqK9nyI7Y0SsH+{Oe9j4k%zJj{#UCN0o-PwOLRZC z``*s>3}sqLUBbN+R4XC5x%`dBk?KV}7CjW;*S7Kd9+$B?kvFPT8)H?{eXRk!vu zhtiIYx_4vE!g*sfEYk-cUCyI!?R$&kzqmipme7W@B``s4$^Y?<-DJ#J1#f}13!^EG zHAq3i8pFO)ru#AuzMitTr&X5jCxTnG2s|t0vhVEwd7?{rj{RUQXzhCO;5A*QG-G}1-`&y<+ z|8p~|4N@3PQE-#^r_aL3!hjX`(lw+UBDat!C|WqasxW!Pv*wfKZ!R@dQwG-VNwMdiMw3lGG<1RT}9JNduj5@HKbPw zn#mF%5W7SbVaEfhDq$L3RX*hG2owV?#$@5_LRm*69pSr{7Uz$_!SSLl#7;&I5C0x& zeX_E|X1ZFJY70pUA06&n2y&`Ijr`JuvjdHXe>Ue9pfyc>b_j@vL=HBZVz*GS$@RTb z$J`ZD&MBI9P|ceq)cCSFDbp0rgeVJ*gL;mZZp`<^uziVUjj4+#)=BiOO$R0j5AMf2 z0UOX@g~0(2=vPU3G=|Ng#sC}Szn`O--3dOb8B9*Zld22LKm(GnKtw&r9%dfPBS0?f zBWM^)i-2@wB@kLo=}z|eNVCnP&$*Ek>t-?_hgxAXc2_S-n?@!Zl1FEfZ)LI|5op{Q z*VhbCal>y^D_UoPd>zGWkCk#F_X=Nl@u2w(-5bvP1qN4vr8ba3twmjI)wEfH)HV}; zShPvh4rz{q8X1*kAkuK;r-W!xWPWSqx$QWR#w`GC(tXCy)ong2%#$OKV@3)Ji8QX? zU%qS;4l~j*rNMB6?zDRkL37@kU=90(^U!yT5q3Mz zJ6LEWm!p*vQ-On4uIEIG#{r$nPn7V`Ib$0^ZpevV*NC~A#3C(qLpjk2x~4rZlPe4S z?vpe}6%X4X2l|9`jWo7<9^6x{&8Ql3|C-(@tCgks80eHKy*z|&v+rcn0p z(AO>KZPO~Cj`j5O5?eePif3JmU*DNcls{3YvXM?*2EJDs*cj!%2&#W!ASmb~XiJMh zZ2Uv+R$4UW%iq^u2|nS>%F41z^?n>tgk>_&LSdMSxd`>D1RyEUMAdRsV$L?5yO*y&eofc&yv;!$ zjg;GhA|nIKU90eprBI|jpge5v6VXoogjC5}q(ZDAp2g|EEfCKNnt_}2hrAME!M>81 zsj`8v+`4U+W<(0**<05q_a0~1xfKfvAcajqTr_bbLd=z9nZYR&cA4MU@39QQ0z;w8 zQolm5R#J2_&(duQ-s%mXYS1uCA)IF~EF^<&UO_6x>*Hm)j~g!dh>_p-_YQX)yrZNH z?No!ibpWHIY9_+sZijPuu7 zPu!hyG5ibDJgJ_4<7K86na#JEC64~gZNam83m)H{hB9OJu8_@Wl%`iK=U)eGB0T7R zns>RmunMYAC8_NkA&W%%-0^qtzjYR}_IBO;@mle}^5zaw}T z*T4tn0;Ira1a2Lyi63d~4+jZ!X$FaKQ+*~ur56Tx9~scQ%}2l4#g!GobV%cSw?CjE z6%UQ8x!f+OQ~>Y48$d*#AIceB4K-S;V-UVLh}AWon2_&7E-j8&0&=YJ*l*x4))Qe| zu>;t9-`#xZ{J3`7o_R?3b@w`=T<{bvF3d6DGf^2FYJTu}+)c z&)O0jofk9TSauvW3KSt?=o_&dc)uj~7D|hWl{+Ij^(E09S`pt|BaVHiz-ON_v~MB% z)qtxr``oe7HA09u2`NOvW$r;~JW=`Y?J46k8R>ppf#oUtSE?#1gKwa^W6nBxjIKMr zXPlhB{wud2LGa0VI~3970~5VR=g2ol0HnnHaM_{MI}806_~fFfUjhN){aT>eV!gtQ z*?s5Bs~kLFBx9|!8D~KVrC*Uui?{V@y$9ss31Q+!#*57PqIAEO{QRv`So6=B+=aUI zV3RaWlkAx2rID)@lT0ZWN3yqhQQv(su`zJY`Id<}K8sYvY4MGM@Dfdlzg~kjXi(c% z>yRih=)E~&MxYdF!(WrOj)HS)uUf5joQ2fsPhwZKz@7Ig}r_>hW7(1hFu_^orDi`kon4sQ%u zL9OHeTN%bG)^43Fr10@9)P=J#udO$hdJ0^hstqM3!l`=ap9Xn^u7@sDSs^H_G70pg z<`)B}jNB)Y;&|w%*CyFsPU1>GGK?Z#CuyP)y(DkR;+N6j(}(1jX<}Tx&wmw5G=*&F zO=`c_GSBcEgD48*?2QuY2z|Gq7=Sa}CEYJ8)mZx|tRMG{K2V%gWnu?=?N5B$G5Q_s zP})aiPbd2svKD#IgNCLd7bMjI^Lm5S%0Xv|-mhiKtpg~P?!~V{gN)+!&Aw`Bd_Ua$ z-Pe74ZT51Pw><+GC*9kO(Y`p<3*J%EYAs6jgMUcKa*$4wF8$jdB@Duwmv6e(v^zri z{uSS{hLZF*7T04F*^IAe}9%;UKDW0F)t@|v@|>)S=62OD-*6HR4NZAmYSWXzb16Z;&8fm8GA0QyI3BQygtK1S1SKWT699f68azjM&nAL=Na%xHTXMMQNa zw<{R&=^cC;8;422r?i{g3;U72x9(GkYuOky!oM z_DibaWArT8ci3F2A^ldv7M#3$m6U%JvPmB>f2>$d-s$wSlX^NSoGj{Ob$_ZJqr8pA zA&k=#pdV0-3$sUbN>c;6K!&C0`=Rn}3^$H3X*cy-YuHwL^kJ=EG)c;fV{# zZIH(<1v2<|rsuu`5+sIe4`x|rdz_+a^y2bBj~T|l-8clRTdUGYdepljayt(@4Fy-!ndbW|FtEp}L#`n^2TryTin^yLtZ)j16hE<9pC#xNcL&PVE>6 zt(iH`^IH z;JQ|wHK|dDX0bO2)ztIE3H@qL_Kln}yh8P5^V4h-x zUL12)G{9saws=mU?(_?rxmv9`_d4=$1D>G`avfvQ^Tos;h2Jam{pl67*Mao|jv zcBF2bHbqz&IzIT9un=(EdzZBmt$IG9{EzMwRh>TiC&h^=pjCJzrE*w_H9@H((2f0A zHk4j*#@-zsoIG8&VSOyO%Zy#MXJS?ILp;00b&LAdy!)%lEBVl_47QTE3qQQ{Hx(b% z-u-xqY5Q>`O9aYE+Gi$y9j?=*+OJ#KHO{~9ds>$RpdE^f{(=ZvH}@0N00_+Wt)fEH z{rr)DGe0oq0lf3dPWA)o) z`z9ZYp`x2zGM0Q~3E)gyWtV^Lve7xq*&)nq0NoCH`nNR5_ZqfZ2p z=x05Wyqiw=7negtD*}cT#oKHP!Ua(xd19{P#Fw9sOm?O^Bu6xSt+;wr>%bIOeDDPi zPPyh})>h?EJV&6AYr!deJNtO(XrK^5&boHPj{hD#u0lUt)~N15GC^0vZOLz3cv71_ zx#_Z+5=AXPN&@4h_8P>gEr=N zjR8HsEn{ef>zxQ${mutxDD~UVB1`T)Msz%77Q08|1R*2@|CdO-`-lPn5_j_~6;83xq9ja!lESLCdA8IIfbk3fkJ*Wk(Of>P@+0t%;!{rR}GZc zp~bR;RDVH1yd6swWNvGOtIOzOZzZOU)df1&G_$P$-Vi?XvY1cf>#Z;7@V*|8S{&K0 zafDf$R~mY$+02cW@0+OEd!Zh$nE{a|sfl@}4-=KsP98QTIx8?ngW=1pNrlnq=Tsc> z1VpY|g|38*tt{4!S%W>N0q9!cO4HFwM(16ZhGw#|7CPcBZYAzZg#~NYSEeUz#23Q`+2P$UZ zSUCfv`CsT4xOhk;Zo!^=&WqavX6S;FQ$}gV)t!;TRo?xXh>R9J4LS7APo9(mTxXb3 zC9wgaB{P|8;R`naA2nL#@?>`da}g&5-92s>ygUEewK72Ib;z5Z`g{;*rr=b~>`0)L zn*rmV?WOdo8DzmndP=Y1bxQbayLWLCfWc$mNj&Jn!1y>RQYVdIfC;$aE1VDt;FzY@ zl;$$Lw^giBw{!#E(_C_?pN*&fx)<#iM=n#{A!YzNF)6JI9xy$<6zOtSpZvxN?@1zx zgpb$v4-y<9D+S=IUGt?XY&uXc;l0VP((&teEpL_#Xx2Bw$wk$tq802sfb>=g7*PuY zRjF49G_qUElnn%+evx--ZJGfXFDgwuTtm!%3wAiJ2OcgPH(kXtmtphGU&m={f4+%i z>4qGkVN(FYk*Wm1mhTL4ia{ThczXrH!3+@6^*+?sTOWHy4|RwB7b@VbYm&6+H%aHS z>&&Is8Fgk3Gc(vJLtWioUU)!D&bG66B!Hbtmio+gCciN!1`oAt5D7{a=xY^)69F#- zL4mmz!vw&dxi3R6m|$zY$fYz>x^P$osQpT-{of3RLLo2k7LZF?@*RW}-Ly_&r+Zer zasMwL1}z?XMQ6EzJ-CSy#(>+t0cuAQ$@zwi-H8u-SAAar*_5jbIBnlDH)tA49Lncs zlO7tO`dw70--Yn}8<_65fE@bX)EnM&37+(lLhaO`eywBdhB-iBhU{wO=qhy?`3j2dW-u!?e-t3 zE(WYoxp*(j=6FYv)O5rJ0BpCgkbO+wazKCWPJeCH7R!=fdZ+?Y>|(6ZRCr)l5@2Je8n*3N19J%Xg&rAhk+|CRC$EKB-U=tD!hl#~0lu<(BMQ%uNu>p30;4asmd9D{~z>ydHn> zm*uQ2Vm5Fv*-;bjvLB9&b-HI0Jf160#gTgMPwlVDofR!J<#X zko$!@{KP=aA)B+r{VAJ&gO3fI^Xhx@6ihNmk&Jf7x!kJdgm#SSB8NxSH(O=2O!WAu$F9Ds0;A?!1Ka>S!ydHB_`bgJC_>t@oCKHOf8zyy-Wx8pp*ExFb^a zdMSFmgV7yF*XVtGMVCj;0ZUg_b>QZT-SO5X*N{6;n1|741T zDTFm8KZG^^jdTNB+<#QN7@yEi_SMjJ%R}C2DL6nff=f%VaWU^BvNbkXURiYb3GO1J1lfi z&jHUc?bl@94=JyA5~n@EjKiP*`CKOeGSA8D1z+r96by9{KcCLxhLSAv!||1B>2BBh zHy3d-o%xv1LU0!jm4yY*?5$v~Zi#kBb|B}S;YAO7u0O8od3Br*ypzg_BlPADAy~W~ zL{{c0I|0Tly$_=MjZAr`qLKS9|4XVC*rEn5S$Hv_#J~!$i&P=?L)q$wcB@54ij=LFi#?{>bpr|Ki@O-(KR|^t zN+Qf~IK^UmBw^!rp#rci5IGO-;Fr0MSk&xxGVNroQkg95*aTPXiV8mrOh%=r?g|W4 z((n2lX3jQ~BXm}bAsz|L|4cgs>zEDe*xy5ufJ-h~iWCKHZd%Dwf|-m=CYa#I;@}a} JGGmw6e*xdU=;;6e literal 0 HcmV?d00001 diff --git a/doc/source/_static/spreadsheets/sort.png b/doc/source/_static/spreadsheets/sort.png new file mode 100644 index 0000000000000000000000000000000000000000..253f2f3bfb9ba28a46a7295e690d3c9340a2396c GIT binary patch literal 249047 zcma&N2Ut_xwk`~Uf=Cq*q=lk_AicMMgsw>FQlv=l9RxxUQ9=`>D=0_@Y0{)O>7jS& z0)}2g4I%I^zHjfd_c>?Zd;dI7SSzc{GRB;vyzd|!`b>q4gnGfd!=GSYG*Z4wiOK7Xx z`Y;&jYoVdb7xN2vVVTVvnR`cr`J|7ybwo#f3Z`O*Sx0xvQ$OhGQgJLzz#hrcVjKBbqG zN?-iruGT%PF7jWhsGAYz&a9lo?Xel$m(MMdMA#oYo2BqGKIi{o$hFk|%`Yfe;Gw!6 zXp1JMun$yn-H|rTu!jInSS~0?;JA3}=TAMyuC!b2J>d>=jd5>=2**u6v-duDJ!%SP|g6X*u4o$>vsR-%BBmXi5J-C@&_#uPe!o_YQh#4K1vzd;JJy zm*Jtz_liUkq!=P|YFzU9K1)>6wKVznYoOuU@B8LWe%~KJg<@d3p( zM2=I*cZ|gZ^14Z{jozZ@Ajt4tyKDRVLCf`%x9-nh2}EsOuX6I9kN74Kw1=yvg`K$O zZ+E*`B{->)vq_&z==7@fIu1on|fEICjGLZ@+!ZA*gySF4T&3DvUmsL->QA>fQ2d zh6ty}-(t;TA>B9s$c%)}DzmVE>1O;x9g(4{3lfEF4_V~%BNeiS?|I3jynm(~IfPle zU3slo!8gl$?dBo7KeMxnS*9#~i-_P2oA6rmCVIxmwvn<&Pb2z&s+V~bkrZ|?EMCHB z9lPqj$4F4+EkB*TzVn+)=I-iZ{J!DoOBiz#b!nUCqQO4mmH0)XJjch|pKo3el3S3n zr3$=*DsXW+aWdWe9D1Tnv?v>^YDKvn4&F0;%CSP3M@>g9PI+=WHO#ratDWU`xYbjS z2iN14R86$;3*J^HRi=7<_Ifw=cw>BM?E8@0WZ01EB&8#bV}j$>h(xZt(()%|5HR^C z$DCcUm_PP^a{f5~QAS&P5Y@(HJ}Jv1)Q;nlOsGvDQRhX8pjc4Fxw*NCx#=r*8nG)( z{UrT?DXStN2YB{|)|Y&z4T*1`>)zIdOe;2O1U|_c-m+t>^fJPCzFC~&6LYOzskl@&tcsD#jt;l#>9_t8+*+mz7f7j@Yg`Y`X`7Y%e=OU#f`p^f-%W#Pn|rK zll{)_&wMvTAl(X$QNTuU!Usnmh2!7SIT^jzOH>Z9!= z$ur3_>$8AEKgEg(ER(3YsLNxSzLPeqt zFHc+T!nDF-Tk^;3N{79(G&`RzyDgh6qdJc|Pc+=9Ipc>P=sXom`7E~1kSTZ1oY+-$ z{R0lN^D#5`S?aY+()JV90|FF+gMwz(@Vf+}zGrL1R$q4p%wZGT7=sNpl)^8_qXHv$Fg^Hy@ zmO;ieKq`PlM%;fMof+^RZS;G5>!@R<%Xs6(=G?mFa{OY&{AzA#lWW87%LPw3VKcOO z3a)YvJ`2Foo$j7o^0eG%6*o6%IiNb`AmsS*{uPrQZ8zEM?O!bIq)6gvrZE<$yM1IO zBygIS%s(lnSkO_ZZluMwO(Ti-t;A3kQe4Zi?Vy$TI}3|giyRByMgPT~#m%1}#pg;^ zDr1UXD*j5(Vw@t_6_r$YW9oVYm-i&bKkvkXIc%e4Xf@tHi!!CdadkdaW*C(qVHWim zLu#G8bhhj$J91}@rmhy+fj0!6|2b{T3eQ?rF^Gnz2*_9{|D00vUutiE5xyI*5TEp5 zPQynviBmk!E-&gy!86PUhNL?7s=&VQg2G)UPleT}b40V_M|4K2^G>43K=uAkT@t14 z_m^FwszGwz=a`!hsL>CYzU0fsO7F|xVB-+AH0ZQ-VXBUpiSOw#NuTPsJeb%sMMF*t zPrtm8ZzXB1C~7I9H5{xUs4%ecB?)0!!ij&(`X8;HlcC^O!VSn@s4UKJe>PnEO1SWagT`u1>`J2q+7gz~%jr~)j*&Z3t+ zM#+&bkQbF~%H_$!!;fwFSbh3)uq~+Mv^Gd>A}%Rs{)spr2Ol+meVNQF%m8g2PlUdV zsicGT9QVq%5xre~O_RW;*$UVw1Cjxgavbq7BD6}(F}WDm@T_LX-`Wl}f0VmPSA6+B zxOoxT`Qd$4YM)r5^I4@~C1xV2o7Y>ixOTHg{ z{N<*!rXE(;U{`3as`5s0?4;KYOLe`s=C8C+qvUDcfV%;`$H9Tm1LGIrqrOK z%U8quX%EzNCd0R=V^TTA7Gxz?{#5B?X;yuX1_$muZT}Q+YiJvjeoMypPs1e!LI>j? zGdpa0?%T6D(z?(Hm;1Fer|mnn>H#S+QL9R?(Es(m!^8p9jcwg`N>+diuwn zEF7bF{KvEFJg+hjHTWn^PtG?8o-LeNan$jPj+y^HKekT&Vhrbr<3SiS<7m;7IXi-L z%%wS{*o8-M$8(=9=|SIq*?n1x`PJrKEOgp&w*DY%x2qyuTTXT}75(Y3b}4c>xwCZG zZ1-sOb_rSsFNCZgdF^*UIi8(8-q2$0dm(-AJGvy9Qq9~q&pGwK&?PRB$@6Z} zr0;J%z&mcWq05x{hS$Lf(WM#;*!)>Pf>Nfk~@kz5N!Rs;N7CQT&poP$j z3;r4N@@jm$lp?7R?zhB)q^Th^UUOk`MCYvE4%U4g&&!A+NM%}MaLssCU!tcU=jb*7 z*++!+b2S?c4Lok(oEVP~p8@YWaE1?j!T5~-b*_xhg-7t$^=o)|2zxxje?0RHI9`1| z0N<;3{yq}Kh2jwbzitEHS6{FF^XVJ9UkUzsP9Or@!+Wd)QBwnsI+h;R)-IlQu3kpu5-uJKgL($$&I!pilfHQy^|x2tyWWL`-D zr_R=17ObzFom@PnUdgim^@J2~es!6jo%OFryc}iOpKCx_A+8?QtYUlud;;uX5>{4L z84oKPDQ!jNf4mO-l4ZB^@^X{n=lAvX(07nGEgVPcEMSm=-WW z{;NCuf_wt}|J63|s?60@DX9G`YbQfRduKpqKp$WUApx1cp8sEW{?+5Zz4`oKZwiY2 z_gDYz*8hA}&(qoi;_3|a=>`6mzW&kgzu)}Fi!%IIQ~x(n{4M9dt^$GvlgRM@m(su_ zEBgWUfF7Cb6*Ym(7!O!wS0DTo;1Ac|N8lWPeq#Di>^U390%#U8{;@iIoBVpH2l9&HrO0&++Ohfe;N|+N~^jd@}XItB99nIEQ(=wu^ zrD##RfE)DxIqMfMt>&U8UD50t|1|fr-#~m%zod?`EbefWclqr*I@4dZLMtn&E|rso zCCA~>uNt9*q2Vo?rOggc3$0f^dhS^07kVes1W%d;bbQpx>SOZk)6>4GS7gC1kP{@Z zeeoD0@N~AgLXP&Y%6oQu z{8qmNy-IY@$Wi*8LudBA8MLAWPS96rquD|wKbTseI2fpI5hxt5Q;)#I5j@5~wXc&I zKI%mrpX6S^>4o<07rN-iL9Sa8g1&A*AAag&;EsmliR<^{b-s$O+OqTj}o7F-0 z(6RKTya9n>R>FKv*;&)twPysb4?zt1hkP18fDxUnj^RtafI18mPr9{KU-k>{!X`cp zRP!&iSd+3ds=R>}t|nG5pvV|1;Ju6nrk}`i-O=hMEL->aRTN5LNDfGn3@%Ar!_RsN z!r&L#F8pN9ZzL%Lw-=me-E@t~$=WW6rSXVI^X|+gJNfBxv)?M?`Rgd5AI)ZU8amHD z&7`kEGb|dOyVmW;{CF<~x&Qu~X#ZEr7W3(y(nEq*P&|Vo6Y%T({65J^8QOT04ran@ zrHyAE^}n<>%oz!5Xdmg2<=fz!o zrO^Ln`3BxQvy1k3G8G75@vFc4^dNzb#^d$PGhWx`UPkj#=V9Cjx{!wpsfQFeF4rMv zpWfZ-1{~jDqBrF#E-4wEEAcCrerOzD(t0n@6mh}owUZ=CjEfv7KpfC<3R*s#+a)si zVey$F3#Z*BFdbh-9{Zl-u~%|(=^QfBJ6~1FYV0nz;dtIUQE*FPX@gbd7snkxBCUob zLqWpd1h@n#UGCf({P4)&sFTEF3W94jFFTsfNXW%dkd84i0r#VmwwABOR;_2)j5r6z zk3%|4PTN#ng5-%Vpdmj(l0>>X(6@@ob2z4ruXOUvzh4u}*|-P8UM77C3VwIyB2Upn zSGT*_)x0AbG>kgp4l2bgPhT$GI|UXmefHyMDgVUg$@A{tp%%%Lj)9FUzB2=UCVugz zuLh<;OY?Ma_@z{dO!$*;gN?h%t7cD(qTfC~7%!FcVvghYYMw+xFgUFJA9pN~TyqdJ zH^0{c|KPv%6kvau@4FD_i`1FqQ(}>M_`&f@2D{x_nJVEaUTD5&AtXmtG$D7#z%9Qr zM~lGUUROJx)j@!l=3+$e0@*8JQWeDY>pZ6{vu zM6xb)Fx|W$s!yM9K0d&nKqldV8EAiv-QMNN)#Tm0lTzII1#YOy%q+jbbd-aFoObMe z>saX^zudj%bh(82=G{p;?$XIaI1U~N$H4tBrw4ZzC|+QX-XS+d8~G`YEaW-cC4ZeC zUU;jT$~6YfIR+>kVNuv4EC#ZEHS-L|X0REHTu|vc4t;*c1tO3-&c|3X?PXkb>s`9N zk2={r=snr}&(EKu)#Pp>9FYqYaLj*i=^Twayr@cFXa?0CZ{A9rbo4i4X$CjL#2dZy zy?Vb2>zdRYHgFM|`eBr0;D8BG$G=}_0DiriX4@2}mb*j~{CXQSP0Is{kZ*>0KU;)g z0|jFS>>j22g!0Eq{R)lo`{Yt)N%u`KgsCiHA5 zu(=QFf7z5SHfVec$Cpcm zb;!w=%q@Ga#-N44-QF2T|7Mx1fy)K1!AnsX%<@LiqMyyZr|$XLYW&t8^amt6-$Aho zCpaO(;2>Ov+=y=< zH1@3vfmJASaq>dmWmAg^N=SpRB>z7S`7+mnWPF@f#A+6Lt&Zioan~wx2UCERk2_PC zG~Db>nJ<+~kOS5||IW1N$Idz5DXp9paQ&?4=*Xza%xq1{G0<=ndxStEcCaW^ZQ4uu^d<9NBeRLaCfI*$$h5Q| z<|6py;>|7I%-u^d^2`tkClrf{!rDoknKD<7+|w)&tEnr@0lEtl3Z`DXn+@ zAxZs;!GD@37xguFa@v!SyM}YFD2XY^dO#z{z&5D{ih~>Co`J5mb~68_cMAFifq%+j z32DY*sG ziX>1n3z+g_#uB3D@V1K4)v{UO%)pB2P+VH`Hviqq$&gp7s3XRH#MaO3E%MeJza{p? zCk6D0|3Wd(pkF?`x$n`s3T`oUp=%IwsIG^IAPO_(z83MC{VOroe>>^}KHDl!j*GFc;M< zf#uh}5f;ng{D9e%>F?19B+&9o4S`wjwNA=}DvzHE=_&cuVNE ze`5EmZE`-B2q++@eXJCt#QK$4pDI7O`WXu1=dJVB3W;Ec^1FUPBG4PT#61ZVN?%Rq zr4lJn;~G?P4*?U<*x=0#LKdd7-_rrp1pE7K>Vgvt#FbQYVO-aa(BpR{fi8XylWjoV zhldU_DTl}lx&HJTmZCF0`#)6YMnSZ?lPgS75XmF$1YIN|^8>$aBNE}Uk;7F0^u=a2 z;1BdQ_C!k?=u4sL^AXTjL2_ocHkjretajr)6L-n;5*;OGZayJ@Q<`hj7PRkRlAC(q z1RZ`QwHz213C3fh5XO~f8WE?CfzqH+MFtq?vqqZxiRg^~$2bfcnaOEC<-7@7($zw! z*V0Vr+E=zYd|O$bDTal0!9gWiDsipQJ#l_MYOlW-|K_#DU5bAA`!~p zWXpz;2t_*vKL}<>wTe^B2=jqhgTs{z#87bDuFM0voniln36hnuh5_4E{p^vCmfAk7`#(n=)u%mu|A?#1=lOMfvk`|xiu-y`AHN)r zrO?<~#VVznV)lk06rVWLVIz-f|HRo$StoBNKR0^znD`Khc+)=NK=k%C+CsiXzw$W> zqC_jsT9yceNIx<=xIx_CfT*b$Mo;;jmRBDG)#^Ah-0IGFvmbi*A=Ayz)WE1vN0Vma zo&%D3&Z`y(W}9g6nO#8INpk~ABTZ&X$|nteW?z9>Nlg{B?0ilYhkOz-$m2_kKFVrS z|5;12ZE#nbYK;ga^<=D&n**u>7AZ^Lr6USJwRM4y1w994Pw1nmA3R*xMJYpR@V$(i5~4Zp zz5hU{u^9M%l>s;(l4F*_r6Spg4%T1WMR=^0gS5FoXTdK6k-#iALPCEiV!krDQ9;2q zc5Aih$CAI6|lsFy-6;9Q8b(ECbZ6!w3H7UJ!p-;`5;X)*BXJsf5I z3R4Zy_5@abO4g-0`A{fB7W^3$}Fq=monMFf2q>`x%S&rfk%>+X&!)RxDYHz_IK!v zM=4wgCcqT0Wjq2jjm!-9=+3yh8Ii>9ksrpoqI*rR)aDv`^xRh>3?Mvs!rwS1&}KX5Jt-Q=-&FHHh9`zmZC^}BYCrkT`^{M$pI1$q3InHkwYyE&zJ<#R-->f|Z)yzuFzd&q8+G&P zQe+aHTez)vS;rkuS4QPxQ=cj872vsQL&xH(KJ6#G*K2m~AQ2={$rayr?3Vci+c>?kgK*=Gy3TuE^(sj*_1B)Fyq zV+C*T{u$Nq&fQ$g!=a%m=^V*`?3_}FVm zZ&#LC56WGh7X#1=nA`Z-G>Bj8tc_gyX{IM=4t+f5yPzKsg2{)`74E`M>Y2T3FeZ}a zoxU_i6_E%Q|NM@05U@nZ(wsT0Gv~?W&fS{*8~KQX?}5W4)*OJHe|P^}Lk@RXT)*37 z95=OmKiOm?#e2d~B^&iA$INfDxH|LDdD$Wka3?6YWzrLeUN&7iZ<=m38~ z3Bt2hqUxa2epmSDG~g}RU#MTs`Q%n!9OlVYO%Fut&z#CGUHrVoKkL9aWXYIy(9XMpP{g_y5suFuRsu_bUL8^sO#B*xyv_KDg z_l+0M^b2KATVD>P72Fn;9Ndj^uAUS7S-xEpj`Yxc2FQu4pVq)+aem|g%=V!j0l$RP zD>la-`R!0!Rq;Itrx;liX$H;0N4L`_4Q;=y`T1aJB^O>Tzt2gD&f2IYh|VwtjD@a6 zW%*CQ*wd7}(Y^`Tj6^9CI%N#BhD81TYjlS02G>)1j%PHIbptd@aZKERfbAR)*M^Ia z(;g0Y5p>>LI+b30OD)CVz4FQS$8?3MlTq~c@KW)6vFZG(mTR{ZF__;qdp~bEH$6gC z+ul~Ru2k_gg3ysOlcu4mLUeLurD`C#*%X=4ig)>D(b=Zapv%)P-UlwTC(W0*d@J#E ztVNP2nEQJury8^VowV#$&u;~}5fY;;?(+_V@?j;z3q73aCJ?W}$?SXJq1#bL=%Omy zfU4su+w_T&`kiSbuVE$U^A0*QCec$BxihVVAF_~=qYi#}**Ht=0(*Kw87j$Gd0leK zYg8jEV7oCUGav@={*HC)xfLg1bZkuJeBjC4T3QL~pZ9It<$S&U2TGk49L280fPg}UVJ>{S!yKufDZn=nU=LB1f_DL*9 zalo_MZHBu0i0|!Z0J;9zoa-OIG~tlZ7E(1BUPR?TlzcMlIe2G*aR)>onQhiewM#T! z5!3BfTEA~m6KCgaEiaFO-r!A8xdEgDURpQQ5-6;P6#T59-~<&qCH0X-D3xu*rimKh z5r&AEy`YB~L#E~GnbOhQs1o)N-GKGtI~91ddu^Z#0goT%huQF(_DENL(1elqxSsK9 zdJw#@aqEqhvmnWlqZ!udo>_q5`!yC$HIO26xR6N=YJb17n?15}r*5+(pHTS{sX{xz zDOgj7nWd;G+SgJNmAUy>OmIW+Z653EON?W)FM|6ytbrt(dPrdtD_{AfgO@YB|N?ML@|BFsX}Aiyd`{P;-SA{g0X49HfCZ}Q%;t>|BkT}RZkoGG2!1k z5Uyw~8bSO|4Nc6?*b;hdAMJ89cJPA4*+Y~`X@Xr~<|~YYGIfW^e>~nvU7Rj+e7U$d zY4&xU@{H;k@rA8QFRa&Snz5gNFENAFCV7m#ID2}&Yq;MbGjc7=HcYj_Cvj^sJMaKd z(b|RN>St=}e(Z%OYO%2;&O+$gyl*bl&=g#MWIlVj47#N6F&&S0fk_-%d@v>wRx5GL3SrlZa$zbr%6YV)RR2 zO?&O2y7EtP&n;|&-|h%yxL4@t4uW5{HScsH7N2B^-gyt0!T*&y{Uhhn8c_r?%fz2$ z3jj)Xqj|!vCG7@az~o-eOwT_B3q4{AJRH_a5dqa{Eq$a`&X3Gko>{o?QHiJow#BD9 zU|W>DxKz*ih%rCe9@?!EPXO(A37H>)%+5xVrI6-7BBd=KD}NH^;kC~-Rl)Zk zHu&3dg>4d<5T(v&K7suH)ZjSl`+WW+oo6r)d{l5pCg)tjmPF_es=|0MT6%}8zu>Tp z5!7^!;x}e8oG)u^g*izZ?obttO6k_&2F#P<_pUM5KHdmVZCuJ7J($0et~$$~T|@&; zd49E4JUb<}o)I*10e8dp*PP(1UTm6OMkG;u@mu+nHC4oi{ANq{{^daZeg=Jbnw(1N*)Xo#IP`cij4SUtXQ`3^nU#r^ zm$`q#TR^~Z$5(kq*)t3fMVX&yTBs!f+=)21??>{)Y>G!>xwOj>QU01|0=*3U5@Y-H z8{bFvvgu?Dr&SJpy4et8R7>hr1&kn5|MmOz@Y=Oowq2o&E_N!X_3urr6ZP1?s845e zF!rC}Lkj9w$^A;}Hn`6YBmnQu~D9M;L<5!)_yFi{1G`uJz4 zzt^@pt;R=9`kU8QC()EhilMUFVvYTQG>u%XU5{fV1AgbiBUoXa^wlW^O1;6AAPV(yWl`Z z+==gvcj<0FZ&Hc#HS8%EF9>!qQFmy36tH+p$}lWPsDNu@4U&7CR+FfUfHxan+z_bjI7tA8}ay2hOiOlrVTTxfZ=BjRk{LX?;umTwsVGUF+!$rNS!KIN~^;oUA&QN;kmx+}1oE2<|=JT(}*9{rGMWe((?{bd!efu_mv z^H=-|f~jtR6f1{t_uA}$O>4G*PsBv&{!y#nDRU>Z0d~CU-dnWcCk`&EnlQSf!la|d z_P6-FWiHPTV%?8M(=RHEeQHrj6&~LgYeHMuhu%|+$#gGJWUTTOEOcXKDDL$5C$KDq zRF==|B3%5fcg5CE^1k>?Ph8>_F4bZ=qQ9+DeIS)(=LLoGebj$W3?u>y=KRh%NW{pu zT*}r2igzuh)9Q&7IePuj>yb>{?!AGFCOu0~HKQ1li4VJCZ?LvBPUM2h&R<)}d2CUT7ki>FFD?)XU#I+sSz)!Wiy?MYa4k^|pAC zSLIeFCO}Be!Orr+y@S?WO~fUP`0eTM$Tt>m@59$_-7$S-5rE|}!$dUZnyGIXpL_f{ z+o|7;Ce7Y*Z6(%Sx8u>Tq5oD{m-3)^Gjsj*aZhotP)~jyP$);@gf(9nn7Ox#-RAvPYcnh7fz;A21Zki9DYv6Z@NM>0AjX_HzI6pT zEUGcc49^S(T^x`fx{ciI8e+_PFG>?BFAn}*3p4rkGLknF{s!Ae8Q=6IX(6(Awaem{ z4MqVyVkUdGmQ{@1ZI<)+K19(7B=>N!Miy4U%R{s;SzjV{IPWSSG+i!jEPrXevGUW{ z5KMCS04-U<Lzn z4a4)E@|UrA{zA}ym}WwXIZcoJKY^F` z$9>z##4Fv-e+nO;#GQnQ)2@)vT)jN6hv73Jh%z|Ea~q!-hZt2+^+;i_p876vUVh zdt`%A*&Ra>8M6+_tMq0#EAWh{fO-uZuUU)a1oX1yn0NN?bJ zG~rA069@QgluMAm#d(d!FKre7@$pnb%VrE>PKeJJL%) znr+F7j*kcc!y;stAtMAJHd&EEs+lmZkUL0@5Oju29hY{TU1)@a*107BiSIRE9F-c= zv(3g3k=}v7`W;MUe^Lm7WO&`256(u-pEF7 z=V_0>Co@N}ybB};yc5knN1+@ahI{CriY5>M>4d~)QH9Z8(S`In#b2p-N%O@S6`6T> zG?~~h^S74oV3wUx_7?Zzt>vwX1EbjKAD_GJchISje_{6d)!JM%Tu&NFm@ar?UgzTV}G9(tW-vz23 zl-b{U|0h(YS$OswK=Q-9I(PAd@WNB|2eAlw2Jjyoj?y2la00<8PpsAk<&Rfq|CLm* zsg;&9&e3#qY?hj8RUju0csS#HUDZt0AWs=4Ly7~GJ-3=fx zF?lCBN-*+oQJTFSyO+?BYsb_8a<=@VAQm{tMa7gJ8A%kv%~^s=pCj(0=bQ|q4v@; z*{T6fF#_V-TK2iyltPuYlk&3Ls5gC&D*A3GE|^MnCpBIHp+DTgKO*h{09)4`(~$1t za--z$(^!F*x6zQ<@^kdsuN3{Xz1{lCgAQAsRBVe&@c?ut)`(!%SK zY7xl~j{MU-%4G(%OIenIK_dXvOu_$sX=6aqMy)hBq$U+WKJyKFA8`Wv=>NoLnj9&U z(~=UGKpr#k*fVpZ9Fo*^#0hUqS#(J1ZiJf6O$)1n$H)pFRH|Tx2zm2dEe$b4X^S?~ zSngk;CU6YOCX)Au>H?R>C&u7%G@EaTh@~cw;rqWS_4$-YQKe(ANu~`xBsAL=0_fko zku-*E*xJpeX={D(*t48k1{z7-d({%!zb3M5qCt_tBVHK5@op)_3v!*Kk3|CH3 z(pw>N$P=CFxF8#kft9zdbGMxBE37-zU`{7FmIA>2qwROoZA{@9YvC>?&yU9==^QrA zV=wrXs!F2(_Vrfr$Pj3^?IvL~~a)2TzD0+Edi^JZ;*05(&0B2w;qC7l9DjxUb1{_SJ5 z#V_aKGk+C%%#d`bH+M!9tY)jcw@=^L+1g?rz&-l_@Wha_`^j9A5ISS*^|*-y3er#e zxy%oVh4`4HoIYz>uyhCSJA@q0YQ#QA$0bWy`$%C^%5-xvSq! z?dm|B#;A?F`3L4Z0j5zbveP@M9zSn#er!CR<_WP%sU#-6(Su;0&hkDE1-7mD`9@NG z6wKVU#4<*9;^Nuqwrh}MkmDHHN%@aFKv+Eb8Xungt>!E=o2 z;e%{lu*l=y^_hH_YK-0&9e}1FMUHQQs3>TC!SEz)ZatULhsJn;?2RcI3t z`ve0e-LWp#!$9@>NexQ6GnVut|KjP{{Y(C1U4+3^U4%h@u46Jo_sHVdRWU%Wozx5I zM2dAchpR#X9g(@lFIiU>N&n=Yzrp_~r1_84HNeGS8;-B!i8h;N#-;zM+y7bB!FBB+ zCH>J#s%z%}bZTv{Sh{?ZW{q6pP~ksy_cfEasB9sZId1dBg^1~W{m^|AhCFl$nLgO- zU7ovmBE@?hzsVaspt_%3AnuIZjGF~8dWNyYd{qqerYM<-s&_sq z)63$oAYy>+1#wN}k&1?msU+ja~!F*`aanEGb=woPj1$WiqS4kn%g%AA%e*I-5ilFhk;8j#?kk z8Fw7EYNvt7+&FO&wThntH7{l8Z&jFEZg zi>S{PFc#h@AA^drXP`5v9G=aoq96|W46#tm5V0r`V*z<0t76Jm(kuD?<;5K+AI9(@ zSpoU}pPf1&U!_d5`MPm=ebYG_wom8`;;~-iJak4w-9rT-6a*;NDFxIE;9uOpF5G~I zgki-ZQ!6D7XgSGosJc!Bs3eRn`nP7EWhBT zr`QwB2kM#>YaN7oKh1RBN`7#Wru;-UJqn^hGnHopsGQJBBLdK>Vyvtfgqd6Rn#9%? zKst#90VI8Y<>)iVJ0@bJBXVgjhd$Uwj8@f&?_9mX7L z0m;voPyQPP_4mgE?klm9fhd_@Gy_q}3Q(Ob06J|U+LCtvF=fMJU#MHuK%Bi8a@q8I zj&g&?#Ba^)3a%HnAGucnP>;vwSR}9UAT|;R-wvyld7bHr8?Za|E48a>!ljMJDW3w5 zr?!Q=U1zfalXK51g}L-bxy|N+Ya_Zu)31KZ@5P-Y>9gFX11}EJ#f_b-^d`#>x;e`EfBbq`W)1S}6I_{gEPda|8GFUB z_1outd61ka_YKnV@gDf|tEKRj2%eJfQWrxAu+=+8SMG4P2M6T_=mNWN$#B+qi?60-FWRf^3bktjsD%^&d+L2ZaX`WWq$(rWmjLG> z21My)1j=r4uq{i_6UD_~A{GIFd#!K0EYJ)@UhJqpEfObD0*XKs-i;!KIa=gdn%rHQ zFg6DO{2+{sfp~3KO^N~JP7DwM_+`G5)IvnCW~k8NV>+*x_&GaSS1uqf3*`o9Od%13 z<_rwQFuVMvMw>!N2SIjJGK`hUqnm9dN_b$+&0Gc`42~`3)NDw{H%sbEz6i3~pSGCm z@&yrs6|QjZ-tFJ}Uqi@+IKOylzm!tOm?- zUJmJ4ZL>*rw!w}`ybA(o5kc`eulO(Sz8F+;*Ztv|xGV80cB%FMmi6i7tsZl@VXSSO zN(ovDS&+MD@sqR#0Bxlw?gjwR!!KVpKg`@aOMQqW74d$?XYE2=cjDhqrvO^IZ^pHm z*>|Q>aPxyJanNywl^Wc$Kdwms1Y~C|=YXXWkHWJZ6Sv5CcSmSY=HO|i$+LM_3FTG!U^hIvsTn)B zo04qmZ?Cx0Ku-JWd%xI>;;fYzGB?%5;fwH96@erGt7Z+qkstsz4Jv=f6h&pq*{@1T za2GTr!Lj`Fsxm@_rE9M6Bwa12*>0fsIE}~7&MZA4j&Z~8Jfle zN+1AYhs3yp*VYeQ>bJ$D+2bg1;nbW)5Yd;up7}6ZT*!`gWaC4V2|O z>w4lKAYE1IQ?Rh-OwwXA51R$Bxh=nWnApMPg$^PjQu+8`&S$2ducxEw!B%mwrIJAZ zCe%GyBAhYymI}G_mH^Kq6Z53Fx2C9@cNN<+erntNR4b@2QSr{t_RVhz*3&nH*dcdJ0?gldsC@u1yTjy%9$HsL6x4zIWrVx~9^(0s4kgIob$rne zH*fnB0%Ae8C;=65nHj4{A{5@0qx~aeu79&P<~xj#gWWy)Ge8RHZ`?zWq_zvA^~H!d zU&tv>M+33D3}7s5u5vzX-suo-$G2HH)u}xE{X75&W#S&9S3G;^mYbu`ytLGU$b){F z2LQUOO7*?~+|Lgz5mdnxK|)@?aB*C%A66su%M76v4BIO=yFMUM^K?xw?Ze}HR;(2Y z?dM%iJf_hy!jGz|+iyR4-sZrxYqtQp*dvm@=JCaQLQtZRCFtC}Ellm0XX&n0w$SF@ z6LTB#lb%SnWKoRmC+j=4bM98IM9PLE`k(Jd)hUus5T*klmehzxY-dJXP9O(2m%E}7 zSimtR5H9eM`WF^q7~rD1481B)aRRAIK9cR$s^l=&hHu3$MDFCXM z^@`VyEStAV_Vp2bcIz`iw*f^AwnWn3cgdgJyuEZr^yj5LSCV?RtD${PqW^R=G8t03 zd<9*5Wc!&7HNUJxshTVE!9KJ^X}tQWodm+Isc8n0smKhGhd{M1by-)9nTREqvR&TA^V_p+PT5lXL1O?4xUsVzD zEi6lY25KXvcWT-;Mi%-$Uo#{Rn+mSW@GFa2?lJVei?K($AlWPi0%zmW(j6Ll3}Obyt7k6j_6=Ws5KkvgsA~4zwEWZugdh?rQ;<*R(X+sR+~=IC zgCMYcuVvj+V*DFmf&6y@z#d5jYVID?b%jgmO8p*+ykz@&Ln!CXziH<$)1>SQ8YPS*9>w<9$#b$*m=}=IO5(@6iZ@B%jWVU zGX0?yrJ5sayhj&MyY_|NM7Zs>;jO)-;XnKhdv7#W_#IamzS8$g`=PB;qjT;H{P0X~ z=@YWa=q@6@1P>&bU7(Z2crVLLfqy2UUrRQ2k(8ExH^pDHuGv!XQ;2ToDXjs^D=agm#c`ra8B7^n9eW zr3*id5ROBbOUPa&hp%HTa!n=>8`Yn_y)1#aWRk6dp`XZD!34HScXssLBs?V$R6n)^ ze4tPAhYFxWK!=$?%nx2DaG@BORF4~8mJ+$V%4}U zb2JK_Bvf4mnAs~sFbX=%hM>5 z3&F1kQfZu&CA{PN-2wNc`$7L$%kk87pHxSZnm!=nd#Lc~4=eHpy!S&t(9tQY`Mc^k_joo{c7!-6z@AxuxvF82IU~T6WJ8 z3mAHChNN)wwQvS(d`RIORC2M)a%ur*Nxx-~s0LHTAg%Byq@r7HNy8iwXPI*U@1^_Z zYaVnmhnU-twL4|WCbtL^AW}n>-q4<@lI~3#*~$0v%oJ*f3!tL=lD=Pt5@e}F#t zj74g~s+uy!NB&mm+NS^ZZ zv+t8N=#D{pVL*S#ea8v$a;)obMwBcO925LAj*TXVf-DPQ_7|q?ASWN^R(y7I~*41=0{-|H-im*n0~aS!}pw&+FJUyCSRgxgW-wA>kH3vQ5_Z**w-CI;Gv zPjJ|7;@}=3ewRX$Us#CfKjB51vqNUn9KIILYv4;0qQ9PXTHb%MZN!o6&~);dUGydKC7Y&KBtd+d9nPZ8nQN(_eNQI7l0<6dx`SoY z?nf_yB#r*GkuN|L;Ad#cJpH9}4)_=b`h=xfEaJm^P1?%v3XWA}vE2^mFpqXG%T0XY z7Ut7O%6t_Uq5cQAHy2fbKN&uns=QluVG=3{gzh%beZ}{!hj_T! z;rpHsnAdcht8MhJOeWX*K5(FkvOoVRrH_SO1}tq^g=W&}Z_$|nx^vsDtyza}w!`cW zAJGOu(!+!le0#TF$yrlP*vC2U-wZT6#zVj!8-LoSRN<9oIqJxwnv&Bw0fnT+VqarE z?BJa_MXu!~Fpp>1dk1#y2aknm>5Ki26PW-#Q(IAv;}!>>>&&Y|$+h5~aygf~ zI+XnF)q=}ML6_3LRVJ{tv=Ng0vg|XI0|lXqQmKMGsrCOVlm8rgk?R@26#C?=_KJ?> zqlSnIpS47z?Kp76IhuNOGj^G+e~)5 zsFUy+IIvhc2|UeRy=(3XXZ)5}cx|PGi8+#04Nrzkb|0|3YHmdNrXoyJihP)tp^3AT zuG7;+;cdbdDhw@c)_G;oky83TN=FQ!OUaY|FE)^R?1_XFC8LRu;M0a$B!xB%hDN@? z)PnK%4~~Yq?t=ha_U9JY=x+S37^KWb3Y`d#rA77|M`M}jh}**2vTg;PiYa%6vF8!t zW1zOo7;Y35SA;+28PM(!yYcN%#bU+n7|5o5VfNPA)ts#+;^iGTw zbJvH-6?-qye2vy6TVZT-`@BD#`Hw7sg0hy&$m}xHN3}7NS#h<#^S}6Ko2q>FnRo`z z+HfKR9YJ$fqrrh*<&-PeU`3G8Foxz>)kC+;eHrKIl?I8W0aBY_=$rG_Hwj!y%!Hi5Xwy?9z~paspR^ zz$;j0o~+S2PiDxO&RUJwV|Ty1eX=gn*?hUZ#bb4#)sxg0)dE`^=U+O&|Jd5rw}V;R zGtoz!fo-$o5uLK2BYt@lNG|&{$=uIVPHp}7tV6?4SrA=7G@Wu-E|ERZ;y6g-w!T}t zS#%~2+8)ta`gByLT@QqlO3YbgJ5rRfzB28_9wu><^1*yq6~+?`dAFUl;jGM@*$`S` z`M0~Acl3WsWv>yvaExBD9(*rkn`CS&#HZcWd^HGofg5)!6ZKbH{yb38=*YHQ0c$l0 zlHc2~l|zQc%-73A;B-5D52&e|*XcdzYq6w*&&rL(P|f_DYl@})GL z01>IVBdZ;)#y7i4!1xJkK{*F!@{3`B_Ickux`^i*j@eEA^5^`4&YX4uko(1HyTVP_ zUKzVDXII0Q@RdKOIa2hvUfAedx-*2Q_Df$yCaCIy?GjplGTD9{7X5xdET#a;0c>oG zIw0dCmu}4N=dy(-B(2Lx^6am8%U*L#_&~=5f#bKj0i05TKc!pZbjFa5FoaVe+ju_1G#9=or|)=l6jEm)AH#e$GlfPL4XgQ+{a`$Km*ny; z;nrUdnzX1FW`9nxp<_v;WXrrnVtIXR^IJCQ+Bq4`H1w_Lc5ORc-mjIlhtT*kY8Om8 zb~(TOW@Q1wn*`-UL?p`XA%6_I)i=s`(ixa8e)nq0)dG>_JrEUuB#xT`Hw`+AS@%5v zGTZlITPtoAzZF4^AyniFywexL-qo<#+c5qZL?8V7aiKpAu%OIY`nOfEwkx!vpe$Vv zU5oO4F`l)|_GyX*bP@ln$NTp#qHK^Bf{3WHou--$@yKtK1Eg>;itXX^_f7hA#=HTI zSmlR_vgOz3B6ZjO-U_XV1sXPlfze}{LFk*R^YN@W;^FbIF)2Z_*GpF#EDsg0-nOn^ zHT=mCjr#UA$3+EQsVuGBlM`de+y@a3%@K-GGscQcRo3+9?(=R&^%KW*e{b^ExZI=l zbQi6L3ySW$MAB&ljE??I_W_zXD_dAe+*9Qf>QXS;?6lHNZ+lYT=24IiL!Sq)wGv9^ zoPw55qW*(S=lv_zDM0St&Y(LSk%Or)G|`a(e@!cEpFeKWOvmOrK~l>c7!ke@?XJ<& zy3>_W3JUVzX{*&;V59epsCfFecl^aKqn3*7!up;b-0T}1#ei*chdwW%=kUEa=$WOQ03C5HzSZPg zPQkHM-CC+MI!~Zb?qqOn(+bYJe($^O0t{CvCJyyo4potsRM}&XJ?3rfa(k)nMeMc! z>FkT8n(+D%=lLBVGT&oE-no%tm6PnhP}+j_^6m?UKRN3PB`09-#c#yYkdMD!g_h$f zZ#zSvYyp}L)0YxBuKyTMi+*(S-O@a6fWKjozmUW8u%GQ5iXRJV$^NBY;YTK~nyS}i z(rs%lm{#Y3n?I6ezoY;3R;}+yhcT_Y_(#?@3 zou*V>##M&juKDq4%9r8J<9}91qSM+^TkOdB*t~f((wvFM zUgxdMP= zt`Rl1o-DRkRa?2r#vf=Hvz^tDcW%6^6*_RT?7ixa4UBRERlQE^~nlc#fj^%Z_ zZWx+9=-0rEEO#yTv~)~%3V%+EBB@(Z?O?r~Hql_8vL=6lRb)c<=6XCiD{1F!X672b zWElbDj)N-So&r>d?E;|5$pYw_BHGe9nlD<5g-&{6?c)3{ZP0uF7F>zWZ8dyod1U|s z56(~-6Rcpkqv(A<))4B4#pEA&X4q)oDSX>@=?EF&zR??T%fq!HC*CTqIewV~(jy7F z49+#%oxShloHj-%x@G9Z>=gSswnI_!btss}8cy#3@jBZYRC6CksR@Aae8*z5aJMj< zaZ_F%8A2w>0L$`uDt(3T5*Qs{B7dLzhEnZBKkUY1Zb1uGzvBrbD+$viQ9jyk{NaR< zfVRwPmaPk`1)qd$G8)LxlIaRdfSk*am{FPH*s_nMXW6%%$flKafi}elw+QK#-W#;X zSr4XC5vcY9PqNimj+#4>AS09yl~|=K<1IY83wFVsnx>|?o^S2KFM&leyi3peqip{@ z@kK|-XL1rQiI>Q+Cx)(HDIkt97J^FJvbN3Jjsy|LpG!X`Bg^eBFUt&2%ri~tQp{^Y_3~;MI_Sz=DY=1Ty zof>!a0bD(|K+7P@ootnametQRaY!5q=?dYo8poTS$Qr}DZTBjhp|`@ya03{OrJhT8 z&nA#h3l+JLtwBFjr!~L)rK1*|n8!zf<5qBu(2uCdFi@2!@xe#c{*cRU0+ti~AyhoM z^05izSKgSne+Hg^eRZ6hfC;)Z%Bz#h(982Nhc&#whwkPNngS5|fWtwmtF+cZ(Z=s& z0K#m_o%O3uDRn~nkqp_j#Hzyz;2{PgP#xiXAq%@^I_3h=pDwhHdeHLMgAjC!r+<2h2i)fwKnbRPORZQgiooo+pyC2Bo^0pO5nSa#kBM@u>BR-Zdk& z9-3@f^jZsyR;?^eB>(3xL`uQqzu#n3G69r_^nQGvG6p4l;#X_LMzZ=^Hn=no&joc} z^BUE`vz&Qu6twn-1hku^v;!%Z-hk9VM=XYvW_Qd8BzrW42lIS{TNXGbHv&#wH-DB>Z91cV5V^FN?)rKvc=S;*2fu&9$2*OTf|+Q z|H9j>4MWsl>Q7Gv1BRj0(Xd%?4Z3FVV+tQI48(S8SM_A|&EERjw-(rCdVZY{zAf_k zv1yU7@@wF$96XVmnavo0)qe1bl%eUU>HI%qdoEuFJ=7N(mq?WO4#nyA2d(Y8$*cdC z?!!VBpbPV@+0kjsh0EE6rx!sv$oM1m{#!gw0S^E-3O}#wJN)av(H|hc&e5`4w-V*7 zTJk`Y80Yb)EZtvZ;)drGK?S-*U=I7hqpZ9?gheS*{iDy*gs*{-2C{gp&y+4dh8Iyj z8=g~r+|D6lT|EqlplUA>36Q-?U-8Hea66Bh3UB0-C64RZx8I~3CI5Nzn}9h8GJ$X9 zsD=TL%O~^ZuOJ=dyIL$2NE=ZCDNn%hk{^GQ{XvL2z)f1T#9|W7RXP%MKsH^~fO#@J z>q3elF9={ZpB^uK1y`Bbe1SFDWdzQ)e^$Qo`A7Eu5CleD)&iX`0p#nGbSM;PHBJSh z;Kv&^o$skOJ*Nm!cUb{+;6&4`vs(eklkl|1{*eD}7tv&W!wn%0XsSx&fv6XyD&m0v zn#JO&}^Tlx=(1IFbIiNz9>3D=8u-CTWI`SRdWS-Bs1XZw{E{gI zrw~9q=u3T`<}JiO`r@Oim}h(dj{Kx%-q}Z}OC`m>@Nd?^Em_J8YjoD2HU<(DZ6I4D zpt|BM$QCL38bDa$Lq}Mg$g9u9lc+bXgJ+04tz%JgpQAsdzu+0y8-V9scCq2nmDN`^ zz@-6y3HPnh)jeA9Oy>mHf9poUvzWcMjd&BtQHmAJYg~|DSv`K$pf&xUKdhz!G9-BQ z*1_2&bDUah&>zy+J0X$@&&u!`ngu|e+3K%EP!}GL*}XK$hO|~2yetXkhB&tw&ff12 znfWYk)B(UOi=vme)}Sv9(uyqmLrlvi_~QYXl|R-jstUlYfO7G-T##qk{mr6E=cBZA z&-Fs#S>m6!3t6!WsO*q5A0Eisd#GxZoxuaons}B1`l1vNo3Z$(90S$@=+0#|P|^w!!qas_M))q65zNV3>w1%><$2 zalM{Awg+8_}c6`bz67vkti!Dz8E~FnEgaKcTpIzbeV>zN8jrjdVci^_(FvmQg3~uRq4Ti}Z zGUpcliSt?O0zn$j9U1FzqA~zaWDDl99@#s9L|~Q0GEKo7f&REo;A9rZ&K8=;P#(a; zlExM-J;rjw5t~ zsGJ_CyXC*&vZH*r>jz@wo1A}iod9`}Awlt>P)YhE)Z z9-2{!?A#Hl-+UtO+Yj%iL6y)GrGANmWSgWq zN0h;w2?(V7i?nLDt%6yHJ#EJkOW;KeW8A}nRc%5=S?Snkr&Vb1SrX6d9J`4bKQZJ3 zamfF_kC>h37Xe+g-mr)&@dT+R%BrcWI&Gmt1qiGsMV!1+m+r%e^R0+xxj$6UDP6b_ zT|QCoo27J)bjRX-p*NOGz+6>nQ;|bw3Fbs1-rMCWPh$CugMEPA940jeDmvV#X0z2L zT?U+o6{Pv;lDm$nt<_k^9RHUikPDLnj*{b#{6KXl7S?pvbu``1d@;IXQ%l{av-;7-vPtcm(c5vp0ZYg zOqo=hB;)R8Y7EE+NdbGV^TdvC@!BKi5{9P*3G-){q~OgKETMyZpk16I(SpI@WZtLq z9=Bt1vi9BsZB5GGralV`43kMN`Q}&J-~Guw0iuv{sSMw4h06;pa)9%h_@&jW&jH2d zpQB&a!C9{-bu05g7>W=-f_WiNEbUEmr1aHG;D0Ef>8^@<+x7ldZ=>C@bJE9NHyv_T z0T8Hi*UDcQkb+lTMimI07)bac-v|+Biz2VqErDGsLcsj`VF zhf@X!h~cxHFfbC6m~#wlDkV!Kdynvn4NVf>xa}y>$gF(00qmsVbii3YP})B5$0aM%>H$Fb_g%f9LvWRq>m>3B--eiG85!Hq zG#n-0=mGV+kJ4uk@QD8qNIPmp51Zj2jlxj{SLkmF=`PjQyr)Wa{x?+LxDR`Uz?Tn3o zRD?x9)u~1*C|`VT0=qjEn6&0Eu(*)MGgh-W=-SyzbnU=8h~1yjBs6s!jV zCedy?P0l{}O1sLH3*#qY9CM#Ox!6u)WsItUU*#d~ zSoEmj$Ovf3?u+0UbH@`I@EiitcTEX6mS@X2V<2OE=UszLX2XNqmgKb7iQ;E#TgOPk z91v3`hjwbnyoT>`+57&|xEquqq#r12Hq5?9y3bz&(R1TZ2+=7aV2y@fk#=Ei!+g`C zmhE`FEGWe}xMs0o2G4;o%3WvYH{r16nI63-}H~X>_84#})$Qg;YJMbXvf+BPoWmUHV)vaL- zjbNOjy}I;%&3AdKXK%Quw0Q=eii6J=-&_6o*q9f}7qRY=VmYMl4CkwMx1l^3GVMEIUw0y*E_Htsuhxg$1Sk>uaAG**@{@bo+w2))-G6+av)6RL^$`* z4b(#frONFhp1)wz?9FOF>|NiId~i_)lD1}Z!!A-R{l4az<*(tUsdwlZPeu;q88`R1 z;RORw%P>HDvOfBE1i_l`x1lV>uhp%4?FWx{ z7u^AgRraJ+PD2PREX2exhs37LC4r%Jops?d-Iqwq50tZblPq?mPI6`X(;%;lt>N1q zC;@G-voRMildqw_1peYKB?AmM_BmzlfO(@Hn7yXHKb~3n^JH~j_=hV z@(M=QU{_mW3PEOJ#3MzC5MuAoWdt5z@wBdyh=WMM_mah7V-LI8OirUN*1HIIP{y;#u+*t7T6>4 zSpu4MRmbwA51Wr+C)D3?k6r86;`|vOc)7?u;f>mJh#XBuPso9pow87%+b;@5p91KO zw1r?%>}zE&rLjB<+NJ-Vj|oCB!7bF8ePqN6J;Q=R$Tn>#-Esf@gu0)8jv_N=vV6;9 z2q!G3{#ZQZ>-(4O6%?-;4}vv=DPwih=x)rn!k=KZ{3W)K$N~B`Z7ei6wm%G&R%h!| z#2U$cvBW@Qu>01LhxiH zxMm9;t1BnbAizw_>h#FM^{xV=(StnZe3qCba>#kFO0gd8hCn6pPactTN58Q6+kIKf zzrr}?-{)S|X1_p8wbXpgz8|s9*T*#%BkILS1~@T)KgWXdlybZb3EK=o>cPw_ljI@9 z$7R->z9a)athjsg*t*vckFj|$^t&J(R@Y=$^QCkyh&ge|M5fOVM9KPdG_$iG?>R`@ ze}@Alt~RqDE_?7h4Wy{-_%kMzSy|u&68x%2w+to!q$guY5FowC0iUnz_DgdYJ*v?w z-Kv{>XNx@(ZMW`@KU#zSk)LEmH5Ribc_4~}Vk>~rKg2WgQ z+2t$99Yte%ODH}N`(o+-Srw-O!RNvBTv&$*nBx?KOWtK}x!CCghqqu*jMPb_3!OlD-tTUbH_Wq?LoxG zZ^ycmjx1rU%MrZi^SkBjZUS>G^otpJw6G!(Xjiqi0R9qCv!;nUFi0J zIl4k@_`&ryv4{0!G*05e(95_VJz9{bRwX*HpZ+y0?!~$xvkbaQQXpL02#aY6$ z4JMk?bS@~9=m)m=Cl6}~R7f-`&X=%x8@3E}S`2a@%*d~UCCG6B944f?W7@C0MMUDhef);a0ur)FC{vYO4gFRO)r{xC?b`)3dK2x`t zB+AZNxts>CctIHu8~S20owJNv;)dLl1h37kkQbD69Mf~&!0%B#d_97YFTN9vPZv4o zD+Y$p1jh@9L9Y|?@P(w~v7K_Y#+$lx$0{Kc;z0I*X+A!BEdA>KXa7%SwO1GmTMd8* z|K2L99B##B7fq5PL*rb~io`NO{N6oidb*saB3jB|C>c(-Id{VG0h@`4^{l9Vo&RwL zhAT`w{#|BTPkL>+GjYkKM;XD-hY#f*kZ&U(dUCOA>*|Z;pYC(P$LG1Y>k@(n~5|K1w2Y0~_j6X^@ zhcs?MF3+t1M2DIS#^2Jcn6`nCxA2vu)VDi7*+yk$Q3w2m3~_DZ3MO*10lBJj6mnMISa0oz&W*B;qW+WBvdd z+RsQAw0r|P;B;jKE$wn(qktkn*Tdu2bkA>hSX?7gBMZxU-Pa=&Mc{e3=9`&QIFeZ3 zzriZ2U;mN^i6mtAO;#m#D_?`9&VjYBvaj~GI#GKyi&2k0;)a)r9)w%aL$JSl$g_j2ZZAYJI z;Vn07u)qC(Z|PPs<-@EpV1kT(8sL-e5|~?i{kcH(8ou(cSHN-TUgah?<={aY50d>N zA9r82i!>i1q)*NI)S%7W(c6YsRY1G_Y3=VY9oKxU71j z#DMKck3IG7n>$~i_mUW`@QF3@%k?vYUx4Ngnhv#rulBjGlMhq43VIzqg?cta@15wd zk|Mu!4)wOPc5icB_C9$5f_~+zR5kFbyIco+dVKa;Ai-AGuDB)gTFn~m%y}8KZ3xXWT`}V z1aeQ!WuBY_7XLb8;Kk^`t(Dzw_^LO^;@Lq{f%b$YnF%MGT|~p0)XTP2a7^NwD>=~i z1M4$`RmSHc$xB===YMFCBg5|o)8@%m(kYz;iOhFOe-37_59vV$d9tO*Gl{bioKpb6 zO6v(UpMCPZ{*alF^CO-|W)grBl9a6MMK3!aGTQeB1HHHNj(&kXxOjtQMCN|p)qhwK z1(+bdqDl-J+0eJM&2G!XG64E+tJBW+poYIYw%O%K0T2Z;ZQK5(vS{yU5MS$S8!dBE zrugK=-L(Y0?Q^N$#a2^mc9%MUiR1~>tGSP_tP0g_MJeadw;}_ihFKi0j$sht@>Gx< zDo=B8f-!q<3pFsZi;NRW8iF)j^FD`YHH22Uhe*#KxN|T5Utu`=ic|PwFe1q-=XmTr zA)iE+p9doMMelvznkn>A4bjsBar~*Dwsv>)L)1Ndh+78mr|4dB2nr*BpkAB&LQ(+# zuJ~B@B~=S{4t%yG0aig5i+D{CFR?ZK;v8FNcq^Bg<9eHyjmO|kJG#x-=YyNjk}koY zH8u8pTdW-9zRaKmfu~RExx6%R!wq4>AmRbuH#hmTi%K;M+!hpysQS#1KdUvDJk5|^ z3M{0-DLkNpPj+km08C!)EmXVZi~S%ubfAzFSTWIm7?Y9!J$S`96PX;5;|G+!PxGxi z`hq!Fbzc1Lj)l_QU<2FavKMq4fLWwDal(ey4S_T<#+~>#Z5g?_gaB487CT!6zwIvf zsm|D5+%gcOF%SaWWTTdd%AlttB+n|}Xj{i~1Sm|M72X#`A3lxho%s2kQN%xTKn!Y822D0P_ zS^Y1cr*1d-Bm+&;{)O6nLCQ-#p zj|9@+`?;tnHL}Emm7ZBtz^9-cYTLrHqiDwZM;S)eXJ9@vkV`7bD|8gj8r=)D zZ@bRHa_+)36zq|#t4o12;9qQkyCyt|<%o%gWiW$5k{}*oFjfo{Bz(JNumJ~igD+#> zFJebiko znd5Qy$z>2B{=)XXWiX{07I)aYPSFzxlyr%a|9{W>Nuw1cPA634n$~;4Hiz~hXPPIM zto1pDLBjPkhdZ^C_ya$nP;66azG&_D!=VyXE@zeSwydkPts8kkn{=u4`R7*V@P}>uTmT~CbFC~QFTZG`Emau zD}-5iS6Iu5u8rs@ob5TlVxq;1JiZGL1ZZw!L*$iEJg3I|sMnqDJLdglx3p&yH+BEO?zf^kkXOVvJAS(n zfONrfNy)7-stAX*TZwO-Qc@@+t?6Pjuce;g4CZa&Mg8p09Q5DctKWK&cLK|?v&GGR zrY7Jic%+^}rBWZK<;rzR*B9wG`l`T>M3^rSN-s$(D5+lmz|0htFw)GEW9)O#b(@@B zMx}pcfi+8gyLHmE&iIipnWE!u>l0W) z$QXoO@7$W~JS1rX3+jGNM5HE0Z>Hn$a^5cdi7eVQkU+z~d`X5(-Fp8w?Tdo%G8?Jx zM^MazlIRxm{2aCn!EMCWVgdC+*F?0IU`F8Gtuv_Yn`pVZEH(tYJ?)n7#S(!}2;1i3 zhge0#ttA0TWBFMO%{&@RDY}1M#g(iGE1BPbpbpZPSS&x{cz?D~;#BprPcDLG3;=q6 znzWBY=4&v4bmeY$8fm>VL=5NLYP^qg2zd4&mV9VI2 zuspKSG8cbjQ(}%FSP-~bTUDO9V;9YgU_j8V?B7_3bSNG~z1m{d_{$D@v}Q&77V>i4lutJ zdorD?yFz?cS(~J_b36hQx^&;9WeE2|)6M0!;`cc-C(TLFB+aZ?hvR41TFc}9H~%(~af z=U{YhGa1!?omGc6B;4m=Ur3y2%X_>0;;<*W{X!s+LC zS&c4BVd)xDOXOSiDvpOUp(ilJMHA+N=$6e6YzbE?VUhfk%LJ_QY-HO?@++b_GXrjd z!>OFsD5?LGz=y?gHPtTt$Z155OfP*oC($P^>N#8u(^f4h#F0@omt1A4afh*f5)l$8 zR~h~7-Jq$_McR-YNuD%}Z|n0&%L(j=98c(@O9=|*vIra0S(+atzS79M+N7DO@bl5a zq4SqL40mAD722|2Yp2myzW&4jJf-Y%6~dJ;G~m9h|MxW>{zRXT0|eJT;KRnsZ_=N# zo0a71#e#23aQjq}YA%|0tF=$1z#H{MK7{-Y<&J|o(*Ak;@m?(imtKKn(`}<=5uGQ4 z$vUajhoCOwyS_Q9oWW?FTaw?>^uh?T;gEq~sIMzbtc?`Xt4tcwrm-IH(luHXy>?)W z5)Ou6FiWSn!UB_Mlz)i<=0gj8T01Q1IOogr?b)N3_iu_d5^!c z*%sSRiVfN1pD8&mhb~&uFgyfxVW8xei{vv@N0+(sM-euK10ysFj4x|O@&+F+droY$ z)T1)#Mx6?tVld4sErok>DOnZ3;QLKXsfGIWtKZ0$xW}ILe3n&HWEU@Qd*c!AQgbSdx$GoyZp~f5S0W{c z=a1sF`s1IUsxm#`-@sEFsW-_0E0tX6CluOCrF}EMN~L5zyDC|dC@$kS&tI>afCd62 zhZ^m*+qiCL42Y?``PK6MHEu9}l#LhK7xgY3E`1dObnE#n@D_?7b04P=y)&adFH+&@}(;~Ix{(ZM=3S@xX~Rw=7IuOF*bbyXO{#nPD^DmL%%SakkR_P zfuzIFFpGny5EL$sEX68nmpLriPjk6G>Ej+|35|}mGuYqj0bl@1UUz^_@-iH!@u7EE z@XgCiEHFGNg<3XOxz5MasvR;C(I)r@aA-u;gAmM;yi|Qp{iIxfZ(hZ|!U1fN;<6mO zXc$cL$7|&|be7sqvMkhfq`Yru!&d=m(@nK;VUIzg?~+7`uAx?jAI8~btL+$}J!CO( zq{7K~Y;yow0rB8Y2xoeOFToba=3ewKH2NXn-~V zj4u!u+Cu3Xi|oEhN6$MoG7WpsBlFO=%~|%?p9Bte^KOQe+d7QtSUYp#`5zMU89Iwa zCjolTPql$5)Mwmr1Xa#*ux!6RLmsCbHMWHiu-?^3PuEMzNoG5xo47UQ%r0AE(V+(2wLgY z(7;G+yY8KU$q|Te)*90=9}rU*b4qYo=4fK9&qryWFUpnR%uiHrev8yYO=V(sFRgQQ4r`n|f%bx-wRMtevmQ$q&0a^^ibCq&HF6lJ`K1Lt2Hr9f< z7$BiR5bUbS@vAJ4vchICQy3JdPYhb&I5QdpPvxwgY+BiXik*;;zMe9<|6v(p=A z;8eOJ|DCTx#o`AUjVFD-N@U};HHD4M?gPho!{ybi(^7>fNz6)^Bs#|wPMs4N<9vbO zh)w`6BqA&x5|BP1W<#B$wY=82eS8%He!9GB^CNbt=U&L^AVxm$>#v9A$8xzC&E*!p z^XiA*_&4l-O=1es4Zjy*=q(805qWzEPYd`TOdsvZVc^XlVf*Dl5E~3?Wnk{;6t-|u z8AAN9q#9#4BRBYx))HeCOnl&Dp;vR`X9ymGqVDFgM&?Zv`BrGWa4+vv`r*d!I7#?( zRM*nTt<|3`;A4lBzEXe4h7f(m*zRMn$~8|iJ-H& z5lYUcRs3P^9@DU=N59y{#o9TFZO@`_ap#oQel%oUV2Y7LPDLX0zJqD%q3($6VFFXq z_mkWw#?_J8WjnAc4;A#RR#Uf`IjH!U4!l5)*cF`DBM|aNX&togjC$jZ)i!o5W-W=x z&U&UVtI1Mz>#`81xZ$RaN!A1B)r{3FMu{(`F0S7lHDiP&rw}x-wM<#qn0B3S;B3BX zBV#q@8`?;+!@CpMxc~eu3NojXJ%}%66REN&_dkE{uo(uluN$+NVqf{$tTD2SVu;uT zm&;^S&#WxTomFr7D7`T^GO59I4U<9UIX7o!KtX26c>o-USPf zg`Ve+WS+?B$x`H$Cd$6a?o`7pL8>vDX4k2U*$Ew-zo1G_TBPqH(1mEE3L_8r6fx6f^#azNsP*6_|q96>w8`C`-u`_FaW%p{q)&pAM*?^Skn@_pxlIOcIIENgr z7ts?gpy)ia8+;eO(lUmuT{mkQSbo-Q;5P?dHb6Uz5rmm9@we<=SY6Ocw5sRv!EzTL zFvBun(wIp#ecaptr_|DN;ZC`7`?6?xO|;-AT{a8@mr33 z%K4>10($9wwv7;#97j#9x(E1ihK9OlbqR*2Jmyg$5UsuXW`--=$apXFAwzf^s?7Er(b=6@1e~rE`AS^43VeOioy(2VU+1f z!kBtY@*L)fgU^2J>O z?@pp@!}=WP76*AR{s_sc(jk6H!g+7pMTBPTMd!*$rW_Y;0;Ht7YHa!I#iOqKF3Y92 zGhBm-eZQrz=`pXO&4p0riY?b+s{!amka{DHzG`<*sJ6QLT-(l;h5ThXD&mS9=2<4@ zJjKLoT_mJ;Zvz8P&!_y#>vgU0m0rk*KH?snH`zIKpz!R%0Jbh&zlfj6NQx}Fj>D~# zl&EkoP@SK2%Hqi=SL5chC)Y|ZeV6ZeOVCdMlO^IBX=FT0pl( zSRYhVw2YOO8IzklIw_2y?P*zmgIJdv9%OO&*&48R9w&M}7Y2DoG}4k+vj{ggLJJkh zr$Y7{FtZsKSn+%7yiCl*@7u+hI?Yho(jF~Z=q(%dAVCB~MkR(u)kSNo#Fbnim})tJ z%t&s=No<3Q*=+50%WG3~cw43j70d8oFOujE#W$lnPUNM?mg7D-Vm*WhFwYBk)FWGV z`ddg!ZoDW{VDj8QdzE&Wxr&3Ewt-ns!yMkBlcU{xSm))ia(%IdP_J=pizym;f#PkM zI?Sv}?8Q%1FOV$$Me#5Rp9cSGr+h%25gqQmQ#fbdnMz<-Xzx-atbR)arM%@3dASniEf$X!*rd9xZAgM3p$uM$l=q>nCt0%=h=`ug{hTaA&X4G zyu*K;VHH6s^V?C({hHy>2>6rlh)i_w_{O8Q?u(CTcTDnv$y*hZ7z~&PNXu~6pwLs< zj_f#L0$*9Bozaio7FuGNmf~JHWu1rIQ4PP=r<;2U>w}!f221zh(vCHVohI&c&aF*3 z2vs!h_k6sJF4mz8XgYY61`laZXjWmCMkis;aDP9`zzCKp(EWJdBZH~k*?{mSylWv_Mv$XbX7 zvwHUalk?RpOU~6f@Z&3AYqoW6tks;H6z$DC!_dx}r$gp2U%W49k>)FACD%5^bS-(F zX&#zGcWN)D5@D-2_g|aIpS@+}H`4Gz7w%?Yl6O&@nc!<)f~|R8I4AbcEPzqWtWu!5 zy;i6TG#$b$gV~jGd9#q~l)}j_!T`&eNLNhH5F_s3VCw&oo?Nf$Nm;tcnZs%75;q`# z;(x|}QGL@A3=TC~vLj%J%jPfP7$8^PjhqE$`q7oeL-W?w?u+8vE$}<@hp<_1bYJVO z!0=2=F8O>?4>CG~G@`P*5wiS^Desr(##F6scQ}QuSB)mqGr}-rZ zzh+FxLWzNK|EUgTUK_jIMTG@bp7efvQoy@Rfr+CIRT~O{2an!5l`53N>y6HHI>^QH zd~(@f6OmQ$BA<4aT^gf8pXJYAe0&3BpMDDuV#L&pq8rI4JmOlwvAIq#6D?iUbz*{k zzG05Q$YIX4Q{!Vpd@?4|3+fwkhhfZ^V>uXaB*y2SV$=s@i;XPiP}VecBh#?%7fQji zD&aIUde(0Q1~%Et^Tc7Du)J$#-`h^g-=X#WqJL(1cs)%tFjVtv%B1e;a(V0FasdIP z`8lt9mwiTGrrfzp*hO8bHrmwha5?(&hYdnvSH$RUB`i#ivovH4Zb^3Aji9-jxjW)Y1YM7q4cJ8Fr|QU(#srcA_XF=C-5A$p-p zwWHyZQhGbl_luCL(;wq3_A{TU{MnuI440mj;?tZmTnlotk9kL%+{WI&BG&6DwwX|S zRyQ)xGxCuy*k$CNY+cN2eOdcNfrB7wd~@cEMPBkV7Sl1Bj{1$y>+tF2FvJue;&RiM z>dcxA)ZFB3DRis)3M2gL5;5n+&+aQrHTvfs7y)B+EJ4AoVe(zf*uXQBB-jGvDdyun z{MXhUD=j<2FyS(qk#Pn780wN|ewc%cmeuo2w4OP?JsQ(a+Z==NYJhArPiyR5$>DW4 zzvpL>feAHi-lrax*`tn2`^3mi4*v!g0L~(#e?MO^dC})gL!q0Vr@Q6k)XpRk_Mx!j zx;8@tIKNNTyH!lmFh7=w?u=fHPR?l0-o{kJ0d_jVDg7RDaZ&2ct{4@vu2rw;i;2xB zVm<#ZlIBsUzV`dE1_E+MlhgvEB-@(54MXb^P%b4{1^$10y>~d<@BjB*9X6q;P{gR( zn;KOyYOB3!h2o<|YuA>x#BSBr7NK@gv}TJCo1#^l+Ob+IRzf8A+xI%I-}iIf$9>)Z z)Y0P*2g&(9&)4~SJ)e(f0z<9sIAu6T6m=BYqf3&{S4ZsWz}-xx0g6bhmcz>dS9wFtBl+q-qRq|NF;l=(wmw7U=%+6$wg}ssLjL zza@SLKT3EWa?sm>a3CWy78C+&Ce3CNzVN;vx@o@bsqcl)=8^y@V(~2su(>kn3~v?g zfWxDJdqqZ%nZ_>k`w;%j^fhX}zprrgjCJX#6~6yB)?9_Sd?w+`fSJlq&Hv9bb*B|( z*U|m4@;nM0Y%FC6b&-|qwpx7NG|}kPjOb|gmo@BSE<@ZY&vSk z!Y~gSi4ZX=Uf~)T)FDeGu3`p}y(?A^aYC_S^%fX8E4$y1U6T!kX zU*Ew$i@$Ku|p4;12 za2ujN7Fq46K|e>T-$kN%ArQ2Vlh_l&qyWhaf9(k=>RzfE|L^;EM+;f!?Xlva5|eWR zR=cE@sF&C7=S22ZD5IOAS;c)rNk&wx#X|@E=X(`)&ElxTGtZEvE5t%tyAwqs@qx)SLakd>*k*5C zjAvr0nlD1n&_lur7YL)L zTs+y}z*k2a_ZEFFEQ@g0P12~y9!`y-)0|0Nlhhv#;6%sQmWY7jSwA@d>-o;=)z zKj0(*C6H|-W8=AQSmt~;!>CTB@r$^XR`6(YFhLm7+)ov`eE(}NVk-@fUHj9ohi@jV z^+K+(6|qp^ev|n5m8|RJ@Y-C!6SV0w~=y8;-p+|bQRz9kaoTw+?`Cb$pzgsZ^MdbB8_C1JBnW) z%3&wJxQ)~RjOQ4`T%}^ARQaPP9(WJ$d16a=eHwPZf`ibket)SO+Yd3dnJcM-9r402 z1xifhCh`ED4|y-_*%Y7@xK08DcixG-fCbpR`EKlOrCHupPu5CXeoKKO;+OIj-E?93h zfK?&}n2J{n6EwIUgb>SKXQEu8#9#+*5ZG><6iI9#oPc^*QEC4S=dgVZ+Rd9cyZh{K zpRwUen*y;7*DX;Z)!f;?5GUcgZ~wFy=fIjv0Y8pS*E-PlC9I$9acc-`tYrF#uj=`x zkL0L>eQ_27*dO7AvU}$2ByrSn$W^UDc6gPJqdsH>g(J*PtR}Ni{QySiS!*c`6U3HQ z+`$<}iuf^t_>1~ZZuytWb)<(k+;*4r=uGOO#3T?THpSkuHr*O@oR~U+NJi_Riq@-Z z#9u=Pgfpvys+P}K-IsdD&n8m`Vq;qh9^oAdQXR|Zp@OvlE97(!h?cp^{F(QvBj6IF z0-n0?(^31S3_J2O0M>o8DByzpra>Z}i`3o$vaK5Dsk7|-bc2CN2Q+{;dOynTC|KCG z|H?ZW;7obO<3%4tm#RM|Er z;4WCnOTbju=wKQsXN^sWnI$A4+u(*f1o!tYMc0X4YD43ZLO<&H9c#nW*8zZvb`!7) z(UlLsnEpPDg$#j}O%f{FJl^-hs??3MDdm4#X^_kWhRU1p1l4}_)p?=7QltUnhf>!3n(aN%2~*_m&FI`I?VmP8#~`Ci(C(?pG%4ZXWglmm|gnlxr0l#0*y< z;y#&;svGe@SzvbSi>khK=I7BTUltqdD5CE z5)Q5@1wI5shwn9sAFIwYNnf$a^fU&*ZjJ6cfLA^JSfxxlY*FQ!Bk50)l8`+uq?$MR zZ6^|~FpRT_*7Y^>?mrRw2bJhVj7Sys*O~Tw?*QNUf{6K5?W1O}nepH6NGNK4pw?y) zSQmyBv)6iJd7jWXRfC4MyX zZD;y)$`lKgfErbQ#u9|I18<9E4VZFbC4*B?PMt9b1PyC~t5nvwDKPzia?1Wjc6B-% zdl8UDLF=#o;UYQpLK`2ixy|Q2AS8UBxf*-H>W%>tL3=XCBC)+wYO|Vx zKaKm$J`D&Ymn=iwhI97ntGhdm`1%rbSh&lE6b@U51jV>9}qhK6+PTS%D< zb%3PsYZQ>^Eed4pIt)>L`}qmZ?Dz@Nt5e|7&qJ8JiSvKoTuhR`doBE ze0u32BiXKfH=cCib;He{v}L6N5`=~&KtNKN_COr|&eJbOnbu*1G`NfJJ-g!kwWOqE z?ix^E=hpD|Lr}Unh2nw}a~0ax;BZ1 zF^}}mn^73L7`J=|4H3E*($QOdh)diThvQdB*g=U>;^bSl!>H3rb8QH=xx`7*Q2`403exJsfom$EZELut zkp=jkRVeYgLk>Ivh1^KvC8h&8Aoy{3H0z&IY%z@qJGxcF2^s+VGvNkqELWTWiI(MtA^@Y3HS+WqG6n!jeKAy!*Y`A5Yr4o=Z=ZBz zs4xNeS(8OYj@LSYumx-4N74lm6bArNvv9Zr-#@>=Q3>R4$f=-lU>ElEF%5wK%}v>v zEJFP7`4(NKf+OYRB|a#lQA+dV?L!R58uIF$)K@+!H4GX*aGZRhZ|W&WubwIx;XETLCzh zA=IY$IawCCWtroxmy%^1K*nDn7q2E^Jb90G*2vmyP~cTr+wuDOLh68<;Ro+G6yxv1 z>SHquo7E7<&R%PYGW~gb+Kj?c)FNQ+6<84Ag|6HnWQ67vnQ;xDT|5sq1*}ZS#Rs9)*7pdMD)uFY+4Ajc7(bty^suP}nM;Uxc*m&(OVfOd2Q)XK|to zPh}GSxVEp|Iv&$9{P8T|DJqZE3(x=e6?`xq1(+o6T%gn4l|L)xu?SU)@)5F71lQaX z^Z6^>{g9Tv_19i_mRP@UO&C^4TIig@+A5&DqzC=I2LZb!Vc~GK9ykb@^ro%a; zAK44n4dP3B%Ua-(0=2xQs$}N&a0I$T+S(zF;rEk!pa)k*d{;Y0NIfXha|`DMcy^G; zD>yoFC7Ia2fLbi5mJO0YK2E~yx`mQ>4gBdh%LL3tq1Sq#!VBaqJf;Zob&I_#xni-F z_JQL{axUMl(}EdnpKR)TNEIw{zuc9W;#Mk#;FqQzfif z3ohPlJ~~AUUlEQqv#}dh-AmO95jJcYke5IPD53_;RX)m`tn!&U2uJzQUh{ml$T0<2Gl(A4o8qkF{% z(Ns*MgHjQ@M2UL`w@&;J59N-bxfP+K>-e)PN5w|*toxDZgr3Or(`ya*D8J^V#QXC> zICidOzDsLVSP!agAmg+|Z`%s?61cI?NA4g>?TX#wx7W{KJllw(tuL;sLkW_^n$NxN zey3olRKnboLslp5mRHISV~`q~1ro>xD+5+n=Fh3;#CY!+YM%37SvLK=ixdIgrMwj|Uj)(;Ue zf81zUrjrS-;3Cz3d-j|r+BTiZX@Y(2;-U(w)bSxTwi2^MJqhb0i4EKr{ojeKHT?VA ziMY~&69`Tg**AV>qC!kZu0eK=a(I*cuy}1kG!rNVQY48!AYide7987+YW!-GSYrNP zt)F!JEatvv*CPphI)a(gnN;0B0S8PrE^8tm`vrL}N&r|CmGYqI#BpixOI$*l3=M#|%Ra_CR2q9&0{mRWdRLVzJm~^7Zq5BM15I%HZbevh_GM`-X_Mwufkt8&=e*?1Ey@tX84dZDyDvzsL@FstVxh=oj&~Fz3;-NPWZJ*z+G~uJiN-d zBOBfD?9G74*NlM?ltK(-V78itD%>P`bhR3O?z!9y=;J!`gNI{g4Dtya216eiu}g#- zNxEL=2sM6raB)|TAiaYI7FooQOJB#MJM3d}Cj-7EY zFOwq_yK=O^MbEx`HWD#^`gOBJmgxT8Cnjr~1_rSA(*^V<mfYvwPJx;sbTy&fa=ms2x)u00NmBm{)(0pG9`&+aJh@Y6Gr5EnWvP)K@_cU zmp*+oR3tmtL26P1?3_Y@<8~Me`Ny$ko$m_PRAJW*yrppR+M=gV!0&lS+wI}~J~Bc` zGcM(BTiqjm0Z8v&rHN9W^UWu9CV1r*J)CowHe1)6_IZfqfq_dNGgacFkd|*3S|P&` z)p;Hh>w^Ndlrf?nrnBM>BTQ@rxeS!$;|rQr=he^9@6*t>rf1{pbsEG+-vSp>XOSrX z&3kg*G8M$lMhT?N$y_fg$KUoLSElRPQCOTbkL2(?)#>>)(Eck<`1pKeWTBb% zYJpe-G>fbeW&eXsSXpA`B|&14=*FSeXQHwbIC5aMlrb=bNe*7B_9^ZjCqzL^*_u(- z4*U4(a5M5mEVt-T%6_j?)jt*-KY!z6&s>6dl3H-{0VAx?5|-q4U|Dr$wH+3lGe-WM z5A!aEZ&gfxmMp93X4PB2$iVfo7Ru~ZtAqn_DyDmY$HakH3tRQ{h}e=TdtAf){dzbZ zWk$JLRN^Qfgh4M^pX;B@^Mw&}c#Xu;gVKMW&awxPxZD zl|NnPw+kuXS%rDQBLS!A+9q?g@)#E|i#twxjWyxE$Z0adU+`8oFFDl1@Wp*N0hARU zEINgyai-Ryt&dk`H&el0f;}+TTKc%-fwC;iS6)^};H-uZgxaNfO>di&InR+tYTRL51gjen*3 zt0zG!PtyZ(GBi%KbFvXdZUYCzZkzeN)t4;!`tRV9+5H^jO4(_8R!u-mPtE zS}QUsDDN$UTLZVeusm>#htqa6Bq$Fec<}Xg`=wv4^hvB3qY;|ig3Cla?$^QPLUKsn zSS(cJh17rskhbAG!)1!!Gb|C~3~8gYue;2s5wms<>9!E-Ba2DMd2?kLh6jYmIa`|e zzGM{}$D8U6oE5jA-{tTAtpN&mKZ4W__n(YB0oeJL)vt$==@t7O;XQMdJZU+KB_LMH z6}-lotfe2m`k0udY;1I@zdc}8Q%=VPuW}>)Y^d+6o;TZsQJx$iN#UuaTtmEN>K|s- z&Rx=Gl@J%(`o}{eUA1@*r?YN&s?5Mo92%BmwP>d7OIQ>zY9)OmutWiO(qEuTk06pQ zae6yyfpzAx;mYg^%98Xma}5=)ujW#O%*jJ+13kVs6yK5}#>pSR35bFkq)VFO`| z19Cnw`d+GLbl?nWfPc0Tmo z1PlXTPz=~(DwK87j3hsP!Rb_dPA6yWX6#{Z`w?rXh)FN{!SksWGnoHUn&}!pX3rHh zqt*s=TNGd%D-*!vhIHpZ>hs|N z<&>WL4j}CFbSqCZY^9-g1!9-!b|y!E#NT#GLjV|{1$EL?(7<+U^wcEBr$}D9Ec$yf zkSJg+!rr^uCc)PkZ(9*p~Nvw%7z)Xr6HTTOZ3Gcr$LQ{`Y94xgL`F#GQ zAlxzq>oplByz4dFy&##&v$tNzI#;J&eCxA`>I$1MaIankJ&dNnbBh%z`|uw$!&+9! zHn~vP$6lng9tk~!X7r5lgnCfQbuOO5YQ<>RY?Q>H9ctGKUs`{;Xg0%Z^e7H)t`Z_5v7Aov zG%vQ1aNSSrJ0?~VR;Lj@9cLPI{bNS{yPs!NlL3=k)FS*(b6F^j~~022!P(&!f4LB*H-P)|dd*kXXU z$P31-;TNsgy4KwjeD$=60~KvpIiVEtKX(UixLoR%iRN2GuOc<)!0}c62 z8I_DRd1Q!GvG?lNbdOf|1d`fmz2dv|1R3#f=vhB-&3S?X>NPNsz%dhdTLI=Au(?7a z)>re%R)LD}9Jm zEml(!rn5 z&)>R<)@-W3L$C@U0n45y0PS~8DT0QWWM+xnA1{b&>oI=m^eRL!3r;J1csBsJ|KC2j46s+Y>qV zh&}$-mhra~9W|MxweTU4dzj5K(8x<=&K{MSZhxBt=8-gDZ)hZlVmiqhWJOhs9^c$F zhYvU>O64K?L`Kw9g1Ko;p(XEe+7SkLMaEAG=!{QsPz)1=Tr6Q7t7`uaI5d6}C#+Q} zsDmXiQOMJIGd8Qk`k9e~nMd2&q^Qf(ZhEY-|>uA$sk25x!j5W9u_3I=S9J3Z1 zE)bMF!0PQQx|Jsz^0p5WdJ}`FxU;DKjiX?+|Y&jT0Y)0Lmry5>O7DseYK#P=?)WcH@l*KpG1@>{>+V=6Rm=@}h z5N2`|)}+O*-sn@Xu70zk_(!W^ER}|0SMnHdZEBRnr9_GevzUPleVYsoyx^CV^3yn~ zZNSxSP@%bK@@+2-9!aT)7jh8S(sKEy?+s5s7!BH9>WnRITNb&@rqh=k>g2Md&79zq zzHG1|s9;j^Xz>DkF%VIAi{Q>QU_n`Y!&wtc76Ufr?z!Hi4m9K4Y=&M!uHDdsJ86Og z7im1=Sndr(h2ezjVNHW9yQ7CtcZ3L$zaADYY$d)qR#Gwm0VE4Rfgg2uNl8WnGmxR2 z-{qn&fmY1VeB#S3IZlY_a2+c%0a~%!5rW7sr3z-(9(6OpJX!a9^GlT9@+r31oTHRy zCBj!f{`M>Pw-rWyNp6oYR(AVvohX=Tq5}n@GF8oPH;i;;Cw5r)^-xQ(-Hp-eENuwn zO*uuj{X_f_er4yo;*2n@tHOw;C8G|}#4!!x_SEZh|Lqq~a=8aAV8zdI+UFmG5f$Ox zgBGYk1T5rZ_?)ux?=EOnZ8#uIL9QShNdRF4=@QAS+lo(Sd?Q4u=twMo=rYBVWFg@dCZxFN%(A5y8KN{_DhbvX$6tnKK}JnL zDjs-%4x&9#E^2A5z6DR>184K(38bs#fiFFLcqIjNWIoe!E(}q@Ue)d%%$VzEk#Mh2 zBep*eeT>kXzfC|Vglc%O5_1}h8<3Ix7^t_I{u5H_yjcj*zlZb5AO^TvJ|mfIa-d8} zF?+p`1>;)eIB7M?K&Nd#t$H=aXZ|cCdF((tyE7|Rgf2vNuk-y${OWg1R?h;q2RZ)z zFpb&eLb)M4CE)cwVSn71^~P6=kOY6ct*%$^{!hWTAND-N2~fcqeL|A-Pl%lg zKq*-mB+yD)L|2>$t_e`8pnpQ7VT1hi4(JU5Je8HrC4dL8 zZl?S~PVUM`uRFCbLc0V^3S*L)n>Ii#U$HN4L*03TDfvK@tgf6QG^^;&TLrRvDKU~; zqFFwS?%Ws(L8Sg1pSTeK656!yoa<`V%s0;UGM6xf&0BduD`@*-Lu=2kM;AnYf#u5# zEAZv+YB==W&i0;%C(ha!R;t5z_~R63A8g z@Vu7zfPVz;WYz6JCO%cjK znl$ryrA^-omOxd#wr{nKjaB|(^&Ox>DhiCVSbe>>ky8iZo)ry5HO%QgXiTBm= zxInk`!Phd;EYx3MgHng>DUV2vocEx%`=BFXJ)>8iX~QXjA}wYf-j%kZ$-YG0WAgyLD@m}!;mj=`kY&1{#ZHLiE2c2A@hVJ4weHZR?z4`^86=48k z1$B($gO3&%EUCK>tFGyrH>7k3{`h9KyP7JXXFcl4+b>orj0~Txs_T0``svwC+VS{9 z#cHf+xZW9R#zJt9Nt-=ow23gv8jZe^!wm>!oGYtW_p^GU)}U*5N1%D|bAnK-h@=A6 zO*?d9ae8~*`<=jH+v*7j1;?NN+FjUm<$W)6e%6VCX0u}51f`c3V8nCG#Xp-13lpS$ z<+399V&e_S$Rf6OpK$Hqm$p;*_u*cr4JSoXt!?8|cUH1Isjo$y?NjIj?UOEz(c&$Y zAx){*pAUyGgIJYAV*th7BA{}?C76c&<6O4r`An%%>Oa-8(bWVE1K;VO?5ZcijO*kZ zmDKb>n8eq5V`Ha%`5z==`ecM8KB($Xv1lno=QCw3tYqym^*(3VbofM`Fz_O)t|{7G zj@s5=vIakZGy4-ETsh-5zV|cs=P7$e?H+I&S#32pw;d|h+qtjRP6;OCbG(}LoKf=f z|NB3pWG_AP`xe))(^vRNFMgR38&>X&xlaD6U z{%~#MkyMfDp1*`M)zV(UHw=2@8t7A*UF3HVicVO)WWc_`{t@aaCEvAKR;LJ0Q8}Pa z5rQ>TcLcZJ5zaX?j-R3+UNl1!mX%qKi>mKGs7j_4j{3|WC~f`*zGD?~J<_gwJ?gSE zMf!nzjo_vYOL_^x82|Rw>lmFSEpJ_`P@#`(y|MkxUzfhtS zbq;jMZMo_1d=7Iq7Cxw~^OKFa&-qt)+e%l&>UI*6K?S!jspP0So&D;Nw40e+UpqWR=rzde9<7lt}RXoAj73K zeHCPSUgb!t*Nn)|M)@jK(`yZdta^Jy0G%NO^{viWY-e{L&tM^1&N0AE zZCs&I%{1pFKS2^>8QXyWJXmf!*tzQjuuYumRVs?V_}vZMN=Fw{Xa!Y(HmOz|PMxR) zl7g|khF@KawMl5}qg-0ciKbaddW z$iy)5W!X!zf&oS?qaqT)V9_}d*l(s%g5*0#ouwJyGtOV8pBV&fGg71-xZuhK;vn><)5_!9 z@4c=Akhsr&djLF{oeQ8}Vk08BEDIjrtpx-*&{z}@5!jh8PRG*V>HAz3yDbp=w;g^2a$4?KBGy+HO zN0Xtsc_r-j8Dn|ePY*Yp5{L9=!~3U9^;&aCGx?u$$etblzc;A={fSFne!@<6)7(b$ zq;L}RZhtsQa(l@A1)uDXhpbk}c0To9+Wz2|hSs5v5CtMJBjhXvmI!gTH$J#<=f@RQ z4blKW_{U`PU_acL){6m*;1pcvnkfg?0_-I5+(~Ke$(>n zvu$?CVUA~0PD?_V+VKI z*~5^|X-R9@#@T6Hlz-zE7#_634gyZJE$Ao%Q0 z^jS$DPrGo^!!Q5HZT$!fmI|ozUQihml@1UE$ugMJGrc6IV^5E^>0zX5+VMRY7wYol ze&cYmN`)a5F;!#vlRSw8e1HDu-pON~C`taO!_YfrvH~;Wga6k9B)^Ks*hGImiQAgg zXwadBIzxd-tPY|0;yN0$k-U3m#O8wg6~=MxeU}+6$C#Kb>47oc(kj#<=~55qxFt$* z?(utxB#1FP$Ip#|;)m4wDf^%${IOlpiUYo{?=p&3EP5$6HfrNxE;>X8HupENf8#^T z`=O)g8872+-2Zwz>uz1e?ZrY8?3IGtb;qCCzxwZ4XB+`M9HtQwSSgR_NY>vw(Yj2j zwMBsH5@R78tq;nnwp8=ZfiX)ICI)oRWB)h@B&6MeT&|2(%YImJS&G|F9&$ChvqH_J z-!Zk61?X9UsXN*GpTPj_jpbBGY3?A!&(2veFOp5>pCSurC4hjC7%O+~{n4s@(uA-c zaVaYx>KEN+H#gt;ZoL*(K6j1r3rJ(#F!xdOz0nC*a2&g6P<)1svR?;jE!J&5l$YRt z`ekW62uAyB&G13Qc=fD}lh27!4BeU6^*h_&G#{f(x37Qe_V4#vF(C_c>N=y`MY)n>rZ;V%1on!9stTk^MAt*@x-oej~&73mmInl!9trhqGSW=Sg z_ZFH)-6C<0^Dp*V+M-G2nXrNt9UxLaL8K%K{N4H5lITzT*M4Ok39Piwgc^G_ z#Xt7=>Gixkwhr*Tg!q__(2*)jM(H{Z5{`w*T1V19^Hb|>K-H#lzV{6)Zxyq%e=i_}sVDd1Tn2i@ooklvt@#lpJ+i;%0laf127aq^kCQAl#q=AI1hek(O0@-9-Z z(N~q}j`z;o-ZNP=Ethl=HX%jJplrA0z;N!^NqqyLr22xkhnG6X$OkZ&W3197m9 zc)k0Kmcl#Od@5M3+T8upBki02t#-M2+PBhy%tVp%H{VV_R%lG(c=QduEW%bzv`auQ zW9V+)X!|;7b+DlnIsY~U`jmR}-d!WNC_OU<+3Nq<3XI1oZ`n^!SMqqs{_;~>gjSym zHVP1^4a^T?w}>%9(i&3q`*j?nw$3X||r#6E&``38t{luZaC z=Aq%CUFj&Q7@nSiw5;w{w|YHU*<^~?Zc;faH{$m?IF?<7;g(+dOgH&=>MwOSNf+yP ziOV$}3QW8`E=36aOfVHjHfZNS8PRhm>(NH_X8{V>F9NajB4-8}F*VOx{^zZ6yt)Ha zKRyRy^07dJBNj#=u(FVa0Kj+PR#X~YPf^Kv(H&P(mS{aR1uJ^E;3iQ-mJ)-v*C%W| ztGa+eTVv90{-5zBPMK#0O_&K=^y!}}NW<@cX7^(65*Zf!VFs`bd-(Rzk~fGbHs`;4 zZ__?RZEm2!KWi8RiWi`qyvtkqxMVJ~jtseto(Twvc_S6&$qoZS&q& ze*%Mj5eQKPvs9=sErMt*&_H@Jir@`unv&C1cdjznT)pukPdUWz`f}jJLji<|71dW9 zb5HA4E>^VWqHEx-1zw>%1@TdzIOW;L5$is*h%$4gCG zJUmYG8an&=tK!vSM&E9&mZoGaieZ(9GQ=9!3|-EbKoo0pb+_N--vx<(UYiJhO6Z8Y zJ}9us3m(9j?dYX*R&T_|zHQ^0u#?H0jk~ip4wgGw+Iw#$PTzX^iR+a>Ek}~g&*+zG zvC@x>im>__aYD6QJp*;N%`W#t*M}O>j7UP{RLep`NW<~Jo`ZHubJfLZxbn?mm9xLh z{(zpfI5|?d#)38!fLwMOjhwpMyr8=Ry*{l@`^acR#t_OE;h1)L8A$;Nm9zse`~|>W z$7wi6k%#!yzyxe4SvCi7IhRiOB4NYHI@lMVzHWVNdh}HvdzSpKPr`=wa^hH!NM#${ z2FcR8)h4)O1F8rK8!!)OI9JaSw@)=>{w?wK(P2B-poy}R-=M;b*^pyw2bw0J^;t1obIbPTJ_D6DnrdlTr%wkIc9(8fM^4(lqknxkPTQtA52I9O_zUt!)> z)0wO2Z`svgZyoeoy*bdccIx*ysp(eITY2e#?df~r;Msmz^@{1Iw4ElxS=7a+&I$=H zMEseC#DC^;4*stEmAO62KE@l6qBt$iTC$D3yT88$DFBfoouw1WBJWndd8`}O!;V4*B^adt+Zm|3OqOmVlFI&rRMq#f ze)aQDGN3-at4!GF+;rwqxxBpcVJ`b6OvyF(?aN{2h}wWM$LGGqzgKmc(#q1Ahxp42 z7=X*i=j^)quYB*Xuj%!$EX#pa$(o0E6Z_ghM!H1(TAan}27=OQT;WQHVD6timV;&D zW@&Ht7c)z`T&48k>m4iqSuu=+y3!~&D^#-b_JrA7Ip^py4apO#OYw?18Ro3;CCZ1P(E-_4&mN=FXG18uk4 zg?jXqv18t~_#5tFpDx5-$<&xC^u%9?&6msXmvB}by^W)zVe*sI0khpxA z-RYqIujCQG58s`m^vGjc|ECC29n{Lt|KfSvBb!h1JU*@aaCsWB(RQ|*c=xc9O{qsF z|7W5)Scm)f$_KNIdGq}}o!z-%+eot5a&|^l9nFi2>r3UwKk`5CJ(5fJ8RY&T8?E1W zAZeNN*jSLRW%XKomI7L`Y_IU>`8ho$(z>}WC~wQWpHmULUo)%}^(!lEU-VSxXA9%K z^nH`Lgo=rGFQE+*F<&$-14bucl~~}u7k*LA>DuWROF4wW_Dpv217o6b9gd>0rpqX6 z75pQ|a&wjSrXr1HXS^5V9jE2!cH;Ddf&w_vW<$!SJ{`)?VC$l)H<32w@bQ7Y>Af5E z5U9YfYUcHdre>RLId`q5$SkN_vGoD_e-G*W2Yl^!w}9yC&O$g`mH_BrL{Wp(=~s@S z+AwKe?ap8tm5||NS%zoKBXKF*fbT#fU_V3rs ziVKvQB+uyP_Wkb_=ZERV<+1p9?)sls4$PsaZ%aG#%WQWTM)D0s9^61MQGlGnv#XO2 z9+vZ-f5D|WOp~*4%UImKtd!mcT-GftUW% z$Q%-Xt0&i=b)9M#ekXl+A(+uBD25O}=1w51W92iIE%TWe2&*2VoQ&uu&*w{k} zU{Gz!nuGbDe`?)CK=SrGAEjbj>4aSc|4RMOQ(~iZIdQZ0U9(y}b<02EO-HP;JAr@A zoL6G^{uJ5Axu|v^e#mKjwQ32d1e@P?DB!*I*HY{<uu(+6=uEGmi;v;4GVhCUtF97 z&&8(M_sm!u&d@!dCE0ySB)T(UD?P7hT7?=lO5-$SZ|qCtE?qxJ-n(`7K-Ss#<3RGY zUO)YwoNEWH(VH^Mmd+36uz!kBaDkSEh^LPi3L=y{`PUw()rLPdX&lRBji`Og{C;^u zX%YOtSpYNLGUmi@K583xDm;Og$_u?B{kQi5Jm8}DF8`&AAe_7gefeid&$cxb`EeYwEpLgSR$I(LnMOE6{#o{M5F!mEJ?1QM^l0D*NB}Rq*nivbrCy zJ)34WB75)M6@P*s2mFpjZdwBG4?jy8p1N8V>+~k+E3uh%c}MAW2ETW{#?7rk@iJl0 zFi%-={1IRh6sJ)lvw4OSlp?rqgvlKlJ0J>^|We=^rv-D zY^Sr%CL<`eH-F#coAonkFUN5m7Ms;nvcN9N=wz0gB8a#9&wpk(bH*@(VkP8a@3U-X z1PGpM7X&GD z_}9rn`RwU$om1Ou~PyB|P*P{V|} zBXo7xVb>^Ip+>0%M{Jl=&Iyoz}{MVmwm`D}3izIEN5`(KQ#73-5T zZd%{h$W}UfIM6g=o(GZ_KlG_p8q+NE;h|S??4T2n>P#KBTOX9*@lnkcu;B{!R z;c=iv>DuVQp!2d>N7X`zRv4^7PS25jm6I$}?S3SizKgYqa)_Y8Y-oyBOjhcCfvod@ zk#dVo!AmuMdMS%ihpa=17M%LVRGY@e#Jx{9UNCiPDc`&{csV5fy8{1}D>3OVf8w5m zky=mCgy8m#(g~8ehi9%jyWqRy^RsSvuSUj9>{H*0n5o6b-ygcn30f2w;_JMNUAo*^ z=aVb$uSr;gRn9k!1ib#F&$bVtL9Ee(d5ZEa97Il~z@3~V;r=*g-?9jDO_$668 zaZGf=2P>J(e`1Yd3sr*rfHqYuxt?F$zslvLcA)RhDE<}q!G!@oA&<(S&s*>1>I&Ov zJ^6j}LW+LK6$TiVVmX7Rq`G~31~%Pt^3W)3Ms-n!Fz4N8v^lK7D-YyjE*kUaZUMQDo-}3FYR0 z1X=7ivITmJWXyv>TFIh?RARTd(7~4(tCLS%jd_&u5rX%Ovp-UiMbbXvR#n~CxF}4- z$ypd*iWkNc_VlNuvipqKmjgPC!kI{0e6L4Rn+se%xU*7h06OetwF00M(D5fGuQgOWFOcu{Sdepbb=#?j zjq7N$ne5a1S6&aB2)Q+tg?nj~&rSP}z7^Amp``~bAc2h-Phwj&os~A?8VNz*O1D@>#HaarpmHmQ-;Aka);d`WJ8@ zX}~P)ZoprL)000C=*LY4;=v3YJ*Vhzh65roISdt+qTBBlZ0z>}84x&(#Hbi->1caW z-XP4`M*1^D@LX*#qN-s=?{7gZc?ODfzPmI!DE?qGxN&E;(OUu7+y126507#E>%=yf zx%`&P6Jh>^@f5{kT5V}82&qqIyL)>(attuv78TwnC7N7|;P1LC9l04-Ja~&9y8Cq_ z1n}zJH?TlldP$|5-HcT5ee`vNPPkd>YUJ+~BXsNIOOLr3$c>&#@`!=}bV&yS=aIZ! z6T>Mh`dj{ZWY+gjb^QKZmczbi2G{Sfbj!r9+P5(R4_D%EP&CZHW3eh}IRnOJKk@@F zhuK>drPn+%$Zg;JwLqVC+bGGc{4hbpCA8~VO)dClQa4;GIsJhsQg^lW;co0(nFT62 zxu`9PM#G(>g{1KF4xio%dBn8+TD0NsueS@SMJ)17$DQ#x@2~H!w?YL1a(^p^JO94Y znd!OnCYaFsuU$kdf1i>7gn3a1{f@8-mT1#n>x%qwp~0T=UN|3()ePYv_Hu=ZDxaQc zcP5e|r9_iN(r0_ehX4?LkcG*>=ma zl<9fFgk>IRk~na%%!eNgNgn1G1X|h_^^p^69$j^&U9Sn)8C49}`XN;3J(5Z5`>g^& zgMRtAl7|FXzhVIklvoHthv~NY0A?-=QJZ9#`!)JC=WH)zkn=-Ip*{6`C|oU@$to@& z?#5K)eq~m%{aaHnqW>=>J&3rTxA*sNscA?>m&9$W_$MoM{|{qd9arVHZmkHq=#tJw zcPU7Bi?HZ!kWN8q1r{OQsDvy+T0*2mkVS)1A`Q~r-F%aC?sw1L_nv+B{fnOo?|SEa zVm#v+|p6$tGu$xQ^~aLyO-1H+AqJdGcfY;!o-M&%ZD`1>@cBy_V-_%TbM92X6*e zVLk^Hq_BQ8!Wh-Jz6Z@GdHHgtDh*r*rGBK_oJ~5eHUe4~Hv$^K`0XEp2=K98JQsYb zI$+xhzu6I?x>e{}mpK1ia|Cz@o0VDVNG(*|h(=h(iV2{@sHg8sctWoMb;YHk^zK=Q zHtx;1P{|va*6frX{ZfHW7_tzj$}_p(j$E+cEd8Zdcs;2l zJQ$+TU-u9%Nnklp_qcZF0W68Q3E7qS*rtF|c%#2kJ=E`A#yeY>-O*W{f1kFUPRIFF{iEHgX~cZpkg!*8~2cQ2yf zF0#PUE#X+iHtBBiqrmF5dlzU9&>-bVjI|m&f}tTqwAsKrm*E0zhp2edexut>V^C=^C5qw^7n$g&4`xd@v7uvp^|=qQGDd=BY|bBQJIJD!I6ymoNotKok?8?>hh-7drv=71VpPubDF;re}kxK{#CZkl|MN5c z@nAS%-AwqM!g7|ad$@^jm3!S5<6|JPjPU3=u{M&j!E%dj6fCvhMI4|f5+5@k8IFQR zQ}gR;rviPE_t&$+Hw@_mxW@0bd>7MsCm$k(-EZ_Huz$FxYSW}ppJDX%XCTF zCLN2J7n_0VQ?Jo(iYafuwRN^~;YQZE%M2Y46c-vIKalxAo?uaYkHrIOpM&2e&Q&os@Q}+ef z!VLugGA}ysa=9xv{k`v`!nGY)K?qAQ2WM4rV{MW1+hdP$XWTh~?jgBJBu*o^dD|r5 z`1zbojSntFtYsp--u(jPf)dv+JbxLW0|i1shA*vd8Q=xoaL$9#J@!YrSW>?)2Vw*61Eq&1j`%P|EOuc3qCX14(E3i};jrQCkfs_o@b{9= zaODUdF@9mZ0#H0a^dRo}GlHq{NP`$~No9Btg2qP%Cf&8d^+9O&*q zfc38b8;O@!7IKx+izT0Wm3uyh%evyj2BB4;_umci{Va$4h=tDV6c4eywcu?biO&rp zZD|x|39K3Kq2H+MdxQq+ujBu4X|q1lme(p*cpC!D$i0OkW2bX~{%6M-hf2TleJth` zxV0oS-Iq+i0OqWsG%KDIPAO&(S@Xl%(r~r3> zZ{>r$W;}P`ixfgF=JSv()-SCB#3pX1iDYKk(IcSKWMu?R|HT1eX@3noMa6D>i(X&e z(5l9XvF7pE^{X~X?xBGF(kA8Q+)h{!;?B!PX*p{G=6Q^ODMIdfbo00<#<_VDx7X0C z+w^;hyl5ozePDh>Rz2;$UKJ$Q@cgfj?mu6B))~H;LLS+6F9dj6KONM~agoQ3>S4@5 z4<+(w$7m@NykL#SX8ZMfwrCbCq9pc8&0gJXtk#~)MfMCo*nPNQL3^_GqKU!;4B1+J zCfzz1=@n&G!)^G#gQT0L=hAd}RMnmLX6r^Yg_PGq7UL=ZjUyL@bKPRfO^*G*Qw?o_ zFcj0sM*#&2mK%`*m0eQ9kBw(*47CIrbRIg7pFDY@-~T$yiof+%HjfdisW~}iJu{VP z9YRi?I4?N$%q_J0(ecI8x2aW3e;J+rYwS=EfU0BQowtLGput~5Z()q3X*7((;C0!LeTk%vq&!P+%`7Zu7(`RLIb7<9{%)%1iE z5@)nK(+dEV2B3U8+b@H|*}?qu`d`5OzrV|Y#DU)u)g<2b!Uj@IwG4{Yb}aRzbX7=w zEIfMYqF4vDC$i|Yibii_l78wKYsFhD-89L1PM?3J)VKdbzYBO5YMz0??;}dEP8!|4 z4%EMJC4i+R!8X1EB|flN`qnn+wsJuqE1H13KB{SAbol*z!YMV1GR?74X?)J1LY9}- z)IYd6?12G~ts;#1$-AU2hdB%)WH{k0H-^wLU(rmZF?nves0o-JE;46E60xP@KI`n4 zSxZ7M>-s|v`EPG;5yfo}q~&{Zihj=hikAa%*|-M-A}wiO2IX30gh8GBrR?XCzGDtr zX+=AE*PFo_m*0%TXJB53S^Tlyp0Dvf881Qk;s_>pLtJ(*@R7W!4N}KGsAiC70vkT7 zeimvywMp>qlCdy>QZvSlqSYFNrWgjivzx5KXLe3N9m&7=1?)nH0H$pIG!YxAY~y~$ zHvA`7`m%_#B`WB=r%-lAQO!OEKmthPmsK3aO zkH%om^s*QOCOh#8Ofm|229?(o6An@!>^hP@ayA+TMJo~r`7C zDL&K)xPXC9bwC%;q}Lr}8Ejx!=WL+A2f7V@bX_4npBVA8m83`J-~;e9%fl$P=9xG>?pSnS(TDZl@Ocw|m&6^lCgv!<64$qY+e~fL5vv|596k-S40-%c zc6cn?vCxvy2!cbxAL!ha*}@ZVf!og_-HfmdnuK%|W-KbXu3b{uoZG$!sb#GVjE6M5 zBewYA{&Jt1O#zmO*PF^u4*!B>|1~20V-vs$x!nsZd`+|dNgB=Al`lqiis#H6LUJb} ziuo*@rX}K_%}$pQocEfMF^$NUWeOQXr_b7gK*E=B{rlSoKbv^)dRc^xq$K1o-)ZCp5!1_bXdImI{pj08-_JE2@j}yN*t_@aLYk+Ar(gdVt(jGhXco`1a z60<_#Z${9+JM(&2x3QQ&AWZ9J?#M!l)tma)mhEdOhSN;(h#KuErm8dmaa*$@HdH(q znKGV%?ZwG%7nP`&t9M>_6jW|{PU>)u-V|NJ+^C-U*;WOg^n#K&CJC3y-Q=4wcgUY8 zKL0k;R8gY#A6md>J29T?TcAZ?G1eN{9V#u*Q%jhyL|W@1K8wrbEgpEM6&J#_u}4x& zg=Ioq;re&+(HjP5yI;|PH)|G6@c$IWLaI%>np$^^k&OEKT8j{c&#yDj`DS9FJ(OY!snAJO z?4M1A!pkQ+r7Z?KUm^W7oA$b~d3V;Zl4U}v2*l6IJu)%vp8WTz{9|$aho1>{2`R6% zT3CrkFlF{lRakZmDkm1O%bQ{$9=Ge!kx=p4^^>uyG=S}bmoIDT$mz-<_NpZ?C?z|c ztmrX;eH6VBd^z-fNaEclR2u57t~&{aScy_?>{WQZOi`-4M~7JUCbmHxw%{-N~=B!=G#CG3Ui1Gm*XVS_!A(t;%Q zW@PHNDIUG8uT#rCz5-Cm^YcLDt5Kk}GZbjG(02Ucd>}_*1pPP~UvC&f`^_j0{sP

    nzqbE+JRx$17y)&S>Y;5Ijy$oxd{odfH8r5$rC z;h90p-4vy}$)I$uqkI~R4}5%SBO&=l-ElPZ_mCyO7`#vCnn@GRyCQj5jw96?x)Z_P z4)3>{CVMt1QI&N$KGm-2FQJbAIyqY0C@XMD<$nIiogBiCRZ}Dv zurFPJArh>-8F7m>5w~_ja52${LTl~YY}#V@D=f;|Y*7_tr0a43nZZo*FPQhY$8U6U z;<(KtI({YZS4vyAN0H4g(4?0Xjq?BPy9>G{ykMv>`L6n64tACe4e^DLm>6*#w63J1 z%e;s}wAmC?#i_bH{v|8OSaN~@9*m5soK3Vye5(`MQ*?z7#OL*9GU$pw_^ z%))$}PV>r>UjLaH@vl4j-*-sz7yYZ4H!`b0L3DF3#dIYZ_`D|rR;EIF6k{Y7^9~`< z#kbH?^05Z(+TrEY5*tfP7;4J-YeHrwN_%1|Wi`DW& zt2>CK$?Ns|M=mCOH~Cag2z&Pb(hKxoik}`Nn%^+|pDp)VTcY13QuMJU({M%jt#cy> z?TIL~2n4P){|88R&`V=@`nI-UsITR>1IDK7P-Q|2l?)s;+_r=S6} z=jJO?GET7Dq<v!o5Q5|n!J9qnZRD;8B{&4qFt z*f4<7wuDE~&v(SCA*1?bNEPmf^@!b(w8^ENgc0nKss-Tr#myz-?w(k?4zLmT!kV+( zXRhw0^gJdz`6Dg!-~0rC9x8^i;`dMZ4dHvd)%A+bpHJ8ti)|&?Z8oHzW_~2(s@DJf z2m~*Gmzr4&x+=bK*Z&XfM?-_;L-Dp2Q=8!rPu!p(?51R3X|;52cG)7N`=$rQA@flb z!u9&UL{PK)Wcv$mz=V72nO-iq7B{*i3Fet&okptQz-+244B0I+h_jQu>`5Zql;c4Rs|GQ zizK;sR;>kD$+dX0vS>nQ@ZV#zp%YveGNIyWq28Ylg#=?9^SkwMlS%RTbne`Kf8wUp zw?=5RN3YV*4c3nWkgq#%S*f@T1D>P_aAMI2h>{r)1pobgh?KlVRnZFrH;BbFPr3n% z?L_XxTCe%N$9M50lbz90@aa|Yj#8$)9dC(b(&Gl5n+V-} zk2-zs&!gLaSKk@npls^cr(&^DDsu-%=wxR($dg1(lS;s6WrZzHa${9NvaE`VQK*x! z?oI-=b<|DU?C}bRYT~{$%hMn4lHmx2SZeFrcFNQ{OYKq0c8p%ha}Z04QV=FWfk^RP zc*??!Yf&>04z=2Ui7}W{`7+SmIjd$4L9s{LgFV#AD8Y)eg3M?oio@MF(~SLvWB!WhbtgYHs@n?R@Lf?U|HGF` zirdlwKhh-B)_xV|Q?selg}l`am2IZRa*sh(|oi(qNYSdFPM#rPR*$}B*Qq~OPBwi)6b*OGM>UMNL(1=nWTijR_Z7`loqD! zoIlA!%g+__ihc$ni1MGlSfkx`Y<*zUcmHVlqh3Y!cvRtoJ-BF))0kuTLg}=|XQ*#y z*~ZrmkY%)S@6XlqI}ED;(RM}a{-wV6+%2KliD0y`zHp-LjW#^y+|jUTvt~;)USD2e zh|d-x8)mgkah`W0*lkeb%y~oGKr%q7x9ZoqN?Ww;HbyJZHwREq?=GN6qG>z@*ul6F zfpi{d`;hE}r%l)A63|JK!KRnj@P3maV^D4G0Ws{vWB|83k$GG3NPHA>-Yp5Lu(OX+mkiBcNR1tr~&Myx$4oQb^|!X2@Nch1uO_HD*U4%5F9 zBf}-49{vq_{CVGe$#k=O0w`#6@XC0ObiE{%0iR=7*V%f8d(dn>_>iAeVJ~g`(lvcj zXk)_l&;*F=*4Lc_#;eu~OMEs?!|I{vsh{`~z+fT;=xE6<4;5*%A9gD-DQ8o&6_HNU zimJqh#O%!SNVj$pt`$B$tQ61?4Oqe54T>Xh#>P#ajHxqNcKJ`o@4x=c`d;9kVNxp4 zB~Z)|_JluKt)Q^H$Fyjj`x>7;oy>DIu{65j$W1zL!{}s1WjFmsJN>ggn1-hUHnUtB zf9$=xzU3SSRf+%$2P)V|8lMb z@V*I%+Duax8rFQ&D%6lWCVG^{*NYK87B|o~?6`noJ^nqdO(q~Y+M%(RBgcelL!S?9 zv3Y`kd=+?)8pbyZyyX+_;0Q_rH-}7Y=>o9rYjSLDJ}b3Cx?9o9AT0+joF{jJ|IwJj z&1_RyPF4KW>U1^#Jt}t!uVwoR9e)BGCr+BfNC0Q<4TRNmrJ@`v#gP{y@RQMnd%ZXq zUSlia@W6N?snNk?1a+_N%xCTIZ%f=1sM?lBCykm{WwnP7IW{e#N1#zxyy)1?yI z9Ng-s%vzP_!uc7-9>3!wxf}H$$wWj)rr^y)Z`~fOveR4zEAs*_EYgfV9K=JtBHJa? zjNabf$fZ4XA)s>3KSUadSBc8RqYqF_75xUUiY=lCAb$#;O3@InAww`fb1{1r$dWvm zFJA##fBGaZZu@rLYZiD{Gf)0n`nOgtrsvAS^74Aap8NiR;P7wFzm46Lfg1|9&UvWW zgqo|3>fEgv6|-d{76)36zDJ63V_t@QebYe5Hum!C$lF;^4YC6JlKwLg=GQ|HcpTL0 zWGy)zfzHXib2Gt_`A{H)6%>6B}jeNKzYHS_0D2;Bv%4B ze}+JRlH?SOldLCe-S)uD$j&#KoPPnV=xgi+q4h{x-zMCPbXV0ofE*61+ItgA5O5k& z9d^lLI1QxYKgb=LM<72O)MdC^Z_-p$kae{EO@0Eoi}oD=WtK*fa&CwIco7iOgS^lW|8G~aAvaIBQXt!Pf1T!p;~^p zL?P|J|5M)m$DgriLd+C0`E61N*ftPBVq^($5+^*i(a_SUAEQXW-5Ig-lGX`-&9lm9 zn^0WH_GhzZlnjrDNvRoVBm$2f>4NFHW}#_QV-%&321)qm)FfDiTY_dxn{34*p#EJW#*=mFAf%dgP6ogAaMKCI@2i2YD?FxM{ik?)hr+t~6z&hqN z&a-aTe8_o2ZbCATS(=;32k6;Dn}8pJF4f?g5gF+YYrGOJ_}FI%1}kYw_ws5X+t&> z*+UF+i^~z^Q!@u8M!O=-LZ6I(AmvMM1Z~Olw%x47-P6 zP2BM78Qz(}SA6CH;r5DMs8$M>F`HL`-jp!&4AEbwH!EHgUb=-obm@ePkG^ddELb71 z-LNqj#?a7ni~ElQaq6RpzH`0PctW)(_&^h;%M-=?kN@jp5n@J@DdhVh^upo`MmJ=d zC44Nk>!|d{rtkLD#HuZ(laRMtV01o~`g* z70xyu{oQbq4vW=a#i=vu8XDeUcQOw&7{V$w&QrOdq9eoh21+Phz{{)X8JWoQk{iX7 zdb8E-Qkf&quL3At-c%KZw_ud^ZM%) zA}K%*GviY~Cj3sDRFVBHf0SK56n;TN?$cC5_~`5uTp zetx)~PY&-4QYke4@*FCL;bQQcnk|xqI|Z}4_B3iM4CI@r7H_En4@a8)h5`AP&;GJ+$ikw%PRL&eo9RL0$ zCv)FLenZ3gCUND)%SEe0ay>jqQr2s6?oy1iayt2P9y}j|6femP&cKhj#9{Cbi zoW`(jQ-8*hXMO^%fD_g;FMqw?Ct>JLZbVJ<@cdQ;E3at>o*pX?D480FTiDeeRC{_1v>>hAOS^ySQ8bQ{;bU~+b z4gjFAvLtjurYyP}nwrn-C^c&wmHdZ(XJ}JNhVXbaUIm)VWfTWu3PSQTuk3XQTF#I0 z)gDZ9O&{D-Yg_NXJWJN-w;@C)_@e3oxF2H1Fl*uzmYi7G_hwZvkhx*c50dT$YVjht zTQL?D#g`HZimR$43`>)H6-wfB+-bFU%mVAtue0aJYBPmznxXP|{BPo-4!s;VSwL@y z=>s12sodDc5BY4#BHE{>A2z7jFqED*&X)3e?=3_z-Fv+YdY2O28XjrJjePnIe+y*A zZUnCo&<0ol$E3od`~LP@so=XxKi=`rZ*Cm(xi55j2?+&{^nj11hymM4-ce6^3RE&m ztstC*|43^k7Cw`Q!$=tQS02;Q!&nqLYAUj2vFfYCluedJ^?S;2N&Z8kxP=nO0EvzI zc2B7YGwJuVPwr(tm!BD-F~g4sZXZb>#xBA}!V>JLC(aW|I5WR-FWoEAW5bORMiqe*$C0Zsgg{%3pbskLOu?vY3JN5V08Q2fMFK&oPFa$CG`U>?&skcz znSOzKvrNd@+2Oha|7Mbtx=hSYwzzVxo>)fb1X3Z4&k|h25i-PF+912i)zV*;uhyu6 zW=F8T`(DZ00j$irs-?8-x-wyAGUyx%JPg@{^CBFCgMv~7!UFcpGc>+Kc?O0yOBOUNryaOD9 z5Pb2-5IqpO3r)WIAkK!v#Tjs|lvGt?bSUhQb05Ums^l1n^rb~E9Vxqqas9^?p)s^U z?jlRsa|C#1(W1Kxgc10e>b@I|3h}OiX6GyOu^yKh(;Ej-=c)k~mDIk!{g0Aw=A82Z zW4X1g=`sSdr@B8D=hehhfQ*&lb#1ld0N<6uaoM|AnvwrqOa@w^e%IO)sg9NC?y9Y+ zPAc3EgYh@V5R_0??3nB@;4uag@HQo#2x7hZA;)f}zN5%WQ?x?W9cVkCQ;C)l^~{d^ zNYl=@lYER#u*I?4gl&?7V2)a)&(>T1Vj9mE6czJUv)Li;I%%KgaPM_gTw?N#(vLEG zHTfU2e-a=vo6zc|uSYFU^gkI0voGe1$lwqTT^@?B9I{NV0nzBp)fHGIL%Wb zl|aBMITH7STt#+GnTysKNlfXSCaQnr8X7J4e%VUeo$q-3tF*YAo%<_6nW^d0ie+rd zuhU{Q?jjR0-%>R(cA!}PtW(+S(Oxb!*{Pyv>s*ws4a6MN_}`SfolK}hB?O%~nLWc=95Pc{DJMm$X3uSZOUX3l zkEl9&2b}HK$8sN~163)-oX}_jA^}&sR(krm!1wr7B@xvF>MpN(o zfT^K61&@HGV{=0}Hia9~yvuOxpq!-GcpN<$+uPc&1Uv8*J~XP0PHqPVH7zp{GyZc>xsx(wl;4~!v1#mtMO<;Qqz*8B@3%R>hXANBwbZME)jV@&g9oGs#w zi)z7r!rFc(O9HmXshr7xcxlAkGp*>mRbjKs-1z#CgH=Gaz|jH|sn+p7j{hhs{wvr( z^omTqy{C|njS-su6G`0f9QJAh-%OQ*c8EDRDwxW7_E3XlfAHb)N+~!!bXdH4PGt4o z;9;wyfU&|QQ)EgA!5Q>B2}5_BKyj`GtyVRb30o1FruBjJ;&&aJ_TvGkf$R{YXNUYD zYlid!nM<5$J##L^4%&doqI_xo-2>hYD5gv#rtTHJ^Zhco$x^u3m3G7;8GP0e9B1zF zj!jcXgCC8*(8Yc##KTYP_Lk+&B4ks4sLUOU?cLiTw5#}7K1K3_US>|>@;Us+no?=Z z)zh$}I8%(sCTl_zdDpmiq=H_`m3pvw_90BykQ7Q;3Tq47Zg9-r=Vow3$ zn}no~ecGmppK9{Fu$kq{cEy({e@Dj#qx2|3wl*owVH@!2)0T+J6?ear%)TIpBscI9p;W{xrhjwGB7EH9YbNUv z>|GiniM|0@z_di%H{ryc(CC%m;$cj}T*&Uw?K(H*ODI|wY#INU!bXJr`CBShmuJd~ z!6w5$#qX>uzsRuAozZ*i?J^(6gKMzrrdSqv74>vimw;_FIsE(W+k+oGDVkHXHH5Z< z_N}HTz0W)@2@~J5?7604;8zr;D;%H*BM9Vj4)#WaZGOCEJX{~qb>Z7+jbnOB_#uk4 zPI|19s5mFXUppOk8$Go|`c=qx8XDqa&NTquuTi3(hQM58F|2zu`+}H5V9-9jByVW$ z%1>vzN^vI$JR;j!VoYeF_kvem{`bJ`7caLxno``RGD$ zgVDX$p2Uipm-j1_aCh*{wRzZG1MciRC=ziGt);Hce_tynet2T_V~xLlyR%h^aB(EL zsaxMBgE7@)FMmYYe7O=-r?c8QPqD`;mST$Du_E>+ihd9MIy#mJNcHhtUu8PLf{ptY zl;~AR6LagAj67e?cup+vj_nC+_bwj*Gp?_E^{XA<26X9`8hqCK$&F)I?!KsOyuCqH zH6J6Q49e+4T=@znyj89A{n`DKspH>WFM67O=EvLr`pB5frV|lWf5l<)^u7PlTde{Z z6uRZ?R{olrGb60KaYTZ5tg!6{_hlb=wQhG(4U;q#_P6 zJG3b-J)QE&Cbsc`qxpWDGtH|7;c$ZXY-+LAJ$2(XyL=q!5N9me+oM|Pr1w#$>fHxzR(!-o&9c6Z%DW9gs`OnMi?TVlf> zkt})*7lOXor6Az=Fp;e6N8DmhiXq9E-Ev0mlVSmPz^CpQdEUS)_Y^3!xPz_e_L15` zSM0bn=X#|pPm7H+eTrqi9Yi3X?~!BipvSVX$m5AmlQN17Z5?#_cl*f-40T3aeyPsYef!2)8H@G8p92?V{RO-Z1aF4@yx<>LLAZ zt?9q`_L5zHmjzq6pjW5N#7MiXjGY2lx>Lt6^qv`+DsJnykVt$sB6jU0?WQ(S)4LL) zdN|@IGgVX-Xjl{iVBpdmK**@|v34!u8@KgY&@VsI}lajc@C=G@oO zYMnmKF`WfJ@X5?{z;UxbrefMnb+?4Xvny z`&k6WDq~orf|5($RjXc50UgxA`5;-}RH~CB5$D(mMabpv<$7^+fdo6v4{wW{hcZ`v zqF%5ClWnxYFXoEC7a)s-L5;Q76L-V8kuPFo^vtePrH=8YNMzqCc+rs!fw}4i}`j2 zonj_@u_M!1x*05zn0l!fIU8#W+($?&*7h0;_i$gIGcLN-Oh0uX6R>(SRAwqM-7EhB zPaYnhwsQja@_+cu`S{r)*sdIsDCrZujH4OydwNz z4lHKH2OJ+o@)Pe}EoiE-*(Inhwl_ZNR$|V$?{c;9Q!RHwXv(UMn|;o+>+KD0z9+oQ zsGHB3y5L6}oO!d@XgM-nSdB%+_i&5XI-+L!d9lj7hf39j)zLQ_eta*d{Tw0)+2q=o z#3ePDlnZ1faFy&e0wVYLW&&@!k0QeCetzl`V+Y}7_`=iwW1^;-7=K-4z|+$WIxw7w zQ>7BIquwSN2Kfkx;J(755LQ{g{P91ES}@KI)PkV?4D~4}#}}h!^NvM(ekUlGQXszn z4vO49FA4(>bA`VK5AoV7-g8KNbJ1O{r$)zkYwWq>tPG~twK~1tR>>tC;YWG?$lCf5 zWyP@1!Qko=A)m9ip`W&iXYDWjNCZS{Rq{u695s_3k-coE*fPh3`58sH3%fhZ01nEF zzmpBeIJN*Bs?z6Qujn^E;ogM>7QrnBH?&$DE%{rP_BUc`^#&-feyU zt>w*XF6a{7fBYI@cqoIh==mctR^m($A#Ok$b^rKUWV1@M%3w26jB5kBzkF?%6VApk z(UpKsBpN`~MEr)>yp+k+&cy=yibAaTs1ef(p{+r%!TMRheKSoxjIE!O8XnQtu#-O_PZSj00g@asVlI`;}Oia z$0n9FT^V?uA8gUztnt;iFb$C$`%(L>K);$`RrfWXEA|j`dS3E`>6-Y{#n5VhMlitLy zNu^=*d%u&^lS5DQi$+^CZ)hQxsnWfzy9$-eL^($xcKA@ly8C|NXG_iuB%zu(6!=a* z^Aq-(&@*fp{NHJE26CSk&siY}*2PV$RUh@yEQWEn%Dv6K(y;tG;AnHaJdDE;0#_{m z@+~3>cbW-7xk6m`;Z(}`+0;lnvnW${v>O|h}?zM|wJ}>)ZKJmXO zVa1g=D6iHo(1XsOOViV>)M|&6KFmgf6&%EDd+qP`9_E+Yjnk0?ubYvvmLv|(eI9&{ z9w^i8Gg;%T5@=2IwOpdu1G%>tJIKx3E5+9=v03JBhZEIks+U>6Bh}=!f)mw1IPI{6 zw!!4S8HcFU*Qr@qvw#(J0zxysh)sbLve2 z{)fYF#r^AVbb-d(@1&+0`@CJ+{DJgV7Z4e1zDGq_llXP7oBcx(F0}$W(T3XS3h~QN zdgU*6vx>*Qe4{Soa7-=9EBG3DI~rEe{6pTtm+jeAn|`c-iM~pOZ}fW}lO|ojlz8eO zFbMC<@-Bsi#aX=kkm*+^hbi%h!rd0lKcRO7GW!L&fjna#67!5bz=}OAh~`lHkSzOoxuc;`L7J8tdl}I47DjuAl z5%8}54NXAn?B2{BTzi`79hlv(`sC?bJ1zIPss0_dJ_yD8PJzr2NgFH<>nCg!_H048nyxizi5`A!)MVwcQbn! zhDE(_z={!~lYy8aneyb7PIDCITfjq2%`>nwG=xpVYjhWjR8mJNP6k&< zYhScv>^ku3oy7k42aByQGYhvMi6Z9a9g>TrYHvma?0Q9Kt+Pnqenv4S!2;85s3rDRWBDk4ie)#X*O8l*K#EG8(-Zg5%H% zAsVk!?b@W?D=sm<{9)F}p=J2(c|BauLTY(o!B%;i_`w;(Gi8uwn^0r?$@_+ATsP@V zH(TadJqZm6QRdwCU|l2uXHnduOL}D6C`0>7OR$9+7Y|_S1sJx7#Dk>@_Fuv92Bmd@ z(!VP88gZlLb6_!TINml?@7jyv3zrZdpX~889E*-6nLx*Nr_+d zg{_hC*$)+XBK5~AA`_lUlUHqznuwq6&@Ih9omx+$LbO?>d(%S^lyNfF2O27Knvf;{Xd^lp_VI^PG z#IIzJt|q3Hc{FfYy@4WKT(T8P98FPO#2()=C!lc)uqNXiD|yD)cl}TT04(ktCoa*l0U&QeaWOm)0x?6 z-h^W+fVK$Fo1G0I{V+I^yG~C5$a8)2c`X~t{_qO|LbPGuwQ%>|6>x-HI9SSGr{t<% z$@ZkKRe2;7j@sZH=R8&4Rkxf@q1(1(R*kdu^F>LWWkYE#8K~oYs6io7FTT*@_H(m~ zkdd_fwcB>aYtr37|9F4Zayh!6rf;Zg5@(5$dsT-|a@#@El>}?924&$ruAgIHqN0rt9%1ha2znCxGYp4Wfo@g{9t3=l)WimjYk*Gyu9zeKW91c zD9!pR=oFbrqR3~56xu@-r7&TZSd1yAQDb@aI8DH5*iEzQSv%3eUXSH$892?a?%nGQ z$EVlAexx>s{~@7za~xy$!6Q^eP~hboOUqVlc?W^_YK)I2y@JVS0j-&tC`2%8 z5-14&K$g#1zbe5{B`S$y4?y(Rwi zumZU&SH~kk6U(f*6rWYbihQdC+~y^lqm~4kjJFr%9KENa4_V;nb+_x;7y3R3Ww4+kd zzhaK}Lk(xj__F8i$At0t43odfph;WzP71AMAUnI{?T|5qMX%4l*}My6Q;i-zj`Jwvc1gf;2nhzI7Leo7+#;R)ik)qRT3nA6HOBz-F7j zHa}6_9R7U4DFL1?9h^ldX;D1#f{eaShaO|l;D!u+K1*l{??{ddUc{M z(L#jYNZj|teaovF59hl;?RznYYVzL$);TXeC7LfQ+Au#L;9Wxm{Qy(1n5U%viu*`R zt|7m@i9`Rg*oiNhFSky;KH;W)&E(850>e6{OWtrr?k=UOz-yW&tDp(97Bb*@8i(s=h z35QCAf!a)rQ6RNUJtx#&>dmfYXmn%F>^CNrbXt%zz68Wqf~HkYeQ+KGrIX;OiEW^` zNBFdl(bs?YhRRh%zbZZjkN}q`Om?lyr-NNJ=9(=}tk)i69`|A>Aq62-4jprRSY~ z-`Z>Kz1LZ1pFj2Tx*T(iH=pOef4AXOjqmHnl!8v#l>32OoE=j&u|$9tldj_D>**r2 zvpN+#C`1v*0*-arX+_bG9)ExL;JwYGdUY)ERknw(`A#V_tdt2kpN^s(_x&54p#L)M z+sk`ci8qI7TVX%oqmGuQ0#Yfrs(gI9#6P3l40=b-Y9MvPA7w&%yuyFSI0vuw`wFhD zE}g^G{XcS_Mo#dgX_!d5q?rnjct6o||14}d5*GQme1#>S^pzPaY$=@V7+vbV@HY~h z3>sLr7)aGz!OC$gLGuVOOVxnt8aw8Eqo;sNeIl9g*kmU?4@XhoGseJ5RLRMMp{*=4 zJI?$z`b>#BvT}H56BJmo;k|oiL(i|lWplR1fs^R0o=M0>B$^S~oe_@?8)ZJc`B&FY zx!qQIt*;Y99aXizGDqX1oT8N&>4&jnYV7qz?U2N*43rE>cS}ozpgEzSq+Y1_TYG2B ztAobh3qn&I#rQnnxFMV}BS?KSit1L5fV0x95}oIsQ_SY>#FhV=vB`*>`x>FqDi^T# zC>{Te=w~v{)SaXHcZgM+4qEz^X8J?f{fb|`bl8m6baF$xBVTr4Vdkq2`mnd`ItD!| zm{flDsW0_I(mnsD+viu zc|=;wq?Bg}l;fi}r+R7KtB$YlRT0N@(m<~arxeWGu^vts~9!dn>esoK4dy=pB#M z(PXRuSxEOnmH&0W3|N+BP!rpV1;pPXl>4HVfcvv{+;6_1xRWaVbjU`F5t?Pzla)z> zeofFwiMW-E!7Ifg8o&RWVv`HRbh|Xh1e=9I4hs3gUH(o?PY1F!Q6hbRrrC{)Y|qkyC9?bxFcS@M^)f^_8u(P8jBzsD0N6D``UbE+kG54 zleX43$qSyq&~B#-F1Bnkk~T8@)Gmm*zvA0Z*HF8D;I~)e=I|qcw@FtK8M^y>pPUIC z)tMTYaboY{HALhoFhDm=tM5|YsM_qb?8oI7eX51^1&VS;9Bh3(@V)LDq8B`Ay{3Zf zxsST+%&Mk_=Cw-4GGIGZy_nF9jxVZNc(y*6Zll^s3oVTo1z;ys7}_}_I_kF{Tk8Yf zWpDO?DhBnuSOt}!i{qPTwuwGoleqOzdJ$IAoaa08){~Ci9=S9Po#pHw;o+W><(|1P zp$iS>m%4tuDJ8euW%n`RNK36$3fpmW`r~o|#4>V~!(Hf0_e?uNC*;qD&Fp6LqJwX? z$)$XYLSr9qS{AfCKasU-qh!g4O~XSvsqYJ8Y!Dt7lwxaiBs>aY1}A+UAcpdmKTn*- zreqP?ctE2vbKs|tKt9@Q^^@P>mx$o+n`G9VX>JQwIv0L?3(R?7zTJ*{A)R>Y9*D!@ zhM_r0ZLAM;FAXYbeB5fq_-;m7x4k9N8ohk0Q+~frsJ5o_KRwK}?I~HUvvd-}62+JD z#U%S;gpLmS%=99Xt+T2G$1g3s^6($8zBiNJH9qGnFEyza7>TNKOJG)48#!PZI~)Hu z<1LF8`xR@VhCbc^P4kV8+ALNH_AbLXNq#`4616rQk)g)Y!~KQnIE2IdzCHYvheF_O39hMH~p>2 zJ7DpN<{0Ush5-yRTug(BYI}pN*_@Xmn#j)md{E+gYqGZrANXuFMN|l@qxv}_XHte) zRl=0>M4lWIXfysCsb-hWFFDyWD6mjS(_hN@VR||NGyKI{@^kW$-0I}ejl_Y1a`ea&mT-bThydiKCS)TnOFxWDW; z#Ci7q#yZGk`x8vbMa$DSp_E3aJfZ9JX*HvAxq#wv*;t}JV|9tjRIyRw1mirBBg2dX zz2Jb3V$^$PIBq(tdL`9L#^-x{w zJK){4#k0e1rHF`5=I7^W?NySw@`(P8G>1uodOi-{Jr79vs>ld2dhTDRAv-H@IZEVe zROz_o8>t1kd|{eE%8z3rmXg{mnVRZ3B$|SG7Zw@`zOC-MGA)d77{&cU*>AwE4FvrL zL88!V2+C@5iUNyNps$a^E~~oeS%O?f?aH}u0x>+}n&q74F%W3-T!!`@d2#PD%!iki zfV!M0he9J~L1cQ`D~J@m5QAA2$oa!5GS7?-!X)iVjnv3*)Uv1dUB)D_4pWwy{=DIy zt&%FF$^vZ}+fr|purDXAY!LVM6nKrvSURz`_u*n^YW~kNUDSFezL)PX>kEPE_u! zPl=N3Xkdod)$bMLabH7(2u-x90e>owM?nqLz6WttH#{G-$&hoKUZ!8O^&#Ydq2+4b^%NjD&)3F?8|v0D0W++n#69xkelK8 zCF7u(^tbY>x~-&2t%&`1qlNmsTR!#<6IHjWOs7BvBvg42!}UOY9PeR&gDW^Ir8-rK ze9GBALHRWzPpRtu0V!feBkgBwPtc);SsIx`{Zl_^Y|WwKs%mWpLZN}M6TR5i(|Yib z__^wl%a0a+9=o4h2*oIL5nj)h+$^z@M9SgV8N=Lrq>01Q;on$2ROK1c_4@_e-dt5Q zrP>KL%Oc06jzD3!m`6pFc{VMnRM>PTXwhkBKTcZ1+Fk99uadx+aNC=u-{h)^s-v(1 z9V8cLF{#QKsu<)>1$ga7`&AX@!I7BR?W2R=&Y;xS3m`XV&xwrP;l3Pu!?H8dpc-a3 z54BCu@W&=OKE(EiMWF43x(3RKTYPTUqS{?-$I}7&hB$=yDV37yckSxxYVtEFAe716 zjE6>&@@p=0!bi~m`%UQ2pJcFG@B<#hOv&t&7TpvMHBi@*10S6iZ%=DV9rgJ1DK}4Y zTL@kpSX*2kcZ+AQ{Lsb?c-8^It53pmaL>Va&+TZ0wrODV^&}V5tm+bjj{S;LvPwrp z&q$2PDV=A)9y~1fj+Y1a`t$9C{1@0&WmEEbuQ}!NPN5g()5u}8;NCNxeLGmQFpfFm zP=Cab52}!DOBo-I6A+@&ow)Ys8DDa~asfN1MVP-PK4OfYnQD26xM!vIuA~PvBB%nM z9Zc}JQ2RSlopuz`53m%ucU%j?yeNSfEOL(geA*d{M9BSlTixqNyV`bK(TMy^rRTDt z$so6`AUKKzzdvT0< zJ&pkVX>MRZP1~+f3N{O{FD;R$n5k z)l4}8CBE^K+qH$wo9uE^VQW#mda?WaoA2!48+*em9ud-&NddO#P3z8hUHq$U9neL5 zG0*twkr43kP1fct`u+Cto@atft?j*;=fGo78FoA86+DUHUHfM8v^pJmau3)p)Qgb@Iv)domedB{>|CuY zl_)7sOS92ZbmCTWXrX#9*}px9t9NkS4JmxU$T!UC9`zWg7>B;_Xn7z8)m`M*V(wUD0zXNK2+uMOD?@fn zpZ&KmBQHoUBZFv}u@kG=9nSw{Z)vd(yt9sd4lHt0B?ZzwC2#`E#{|iW0roPo_6%jO zjrjP)EYa)A^i3t9@{8FgbeG4OoaX$=0cx{U9QRv!14m99-9r6va?30iMNGnJ^MRQK z-X=!7yApP&LAlh8b#v|i!taCN%g@;CE5TI0!?%jlrt=uJnIcPfMSYW z*!<$f=7{tj!~C@cBi+p{Xc0MLfOR=>x_9@k(zztJFfdy9C5@_C(==}uoaZ)}R-k(A zmlB3#IIoIFLA{9FG_!`V(~)VGl`fpWzRajmfh7K2N_gPKL&+jiI8;>?W0GB{OQ+HD zfrr`6vO_F9p=v=C+BclIsPY*tHdZA8oMteB@+i?MmfFo=A+e0(TJN{5?6bLcP6c#Q z5>ywCGv8_dCVSYLkV=7Qq?;?;6ci z9*Ul7v=+_ixfF1+-H!*2LtaUG&m!BFox2y%28~fvb+be1$6Cz2molPnH7TM8Cxb94 z7=$P1x#4w8Ffdp}v%~EFS`Yw6k9vwm(#VHOiCuID?00N#1ftr>H17 z<~SSI>na%pC)-nC6b}A7H)VvzQuT4o>jICXTD0Q{b^mEd20dhfo;38+TagjX&GF@A z_UvoN)(mWn;5TBKoSY3L84XjSn&ucvg>UZqw@JW}N)iG_5=`?Ae2PwAR>-m3KS(gO z9%#?TxLrxR9IwqjdA?R|R4KD4zqpuRc0&Xow|U?RJj&I=ezhtZxJ`ZQEKXMLDI_yA z+zT=2dB1*zu|C3?2eq*cr_r9f+79ETB8$gjGwIn4zLN_40uN~+fGhS z%joGmZB&%k_bhMoHrnqz7>||DO4guamv`;u<#>dye)I--n7c!EHu?gp zq3=8#n!V1jaswZHf}6DmN$KTtTXb(y(f-ntDzs$jPJf*y#ynhRAzxS89k}E2E|KcV z^N&G0Vf@QFiaph1sp%Pq-Sl6iecL)vulOLBh(f#I_ir?*I$_QYZQqv0hQty9gid(@ z#GL$4YE+8uuV3SJ0`}5^;7}jRQ8ku)f39bC#of_?K*^zRz}=TQ{zxt(MM!+TCF=G~ zRsdtij7>Vb9kwo%dH?e8JC9`#b_${Fr@ZR@A0^-6uJ_|%t<$Exz}WtoVe9FnwO7-bWYANgE}r`1 z7ujC0f#>^IMysIbs!k%s_GQQ8-?(+gEpb(7uvrS)?w!(EzhYKzij2G;jsHp9{0Va| z?smYD%v@!Xc?3J^IW|TVoPV6)kXs0i^hn5#(*+|e_m@wki{Sd3d}8+$_DanH*Xgsy zvfN&ID&Oqt$d@H`{E`%2nqyiY`uzNUm6&$7gru=CW_p$f-RmDzZDs_w1IEXq%)XrG z^SnbjjYP_n;?LF>4nhBFjD?n9nzGQ2$#=J``-Nx*$;j(%@>J}R-GaK)xtyb=kY&m7 zfPRiT6}Ls73!so{Zx44Dp7y1XaZKTJ)P&O8|C(Nj)=ovBel=&In;{qXPApbDC=2;o zwU&dL1bXHU8Yuj*@t1=YFgGdv+K$S66^(IEyZKvuxwh;mL>lz*4dfNSy}LBsAOjU= z576jDzD`8tAANvH)+1;%q!{`;Z{(BZsl_L&%ks5JS<%#5;}|2@O;^x8c$}R>XLam1 zHMb*KqJnh0fn;t?`?(j#qv)KS-8B(2nOO@Gl_`w{ms^8VWp7gC@%VAiQPdd0No=cY zVA8XAD9yup{Il9B;rTtD)bg3>!Jr6^ozWhGzRDDV@Zg)i$pS+zPO0yuG!v{P4Yb7L zDz$kq7tj?7x9q0C%N&X7TCo>xwp9D?BN1s>lk5#E|eO-6e7M&F! zx_d9;o47ZXvp|t{LDF*S*5$kJQ;Xh3K5qFH;Rrw0STX(RigWqu9KDcS5yeZZn*?b` zta^BK_iV>4!TYiLuj7f}TuAih#gT6eKCEQr1H*wbt}Yshp*|6iJ-?*Ls<4jyAX~pa zQpRR7f3oCU2Xxc7{{6GSD0%*%wQb$X_VaYIZ#}*pjR{ThbTA{{Zc8fFwYyIG>o!zo zhX--d@|n*EW>?)f|BaH_6at&WzZ7klym_kDggR7U#P_NI`xk2rSzj2kup+f^0^d^| zm@)VHJJF|`fP`$3Z!K-4(0(%S=|D0TIdT87)w4s-{Bmu9wEUCD!U<;WS{NbM2-Q{{?CHyGuU$01=V z8J6D%9HIE5u$N>#mb=TFrq0wI;@M~mXw&2a?#9#Z)67l3m%LgRI_@3F#>UO|mWrEI zNFEOKjTbPE2@<{$wHET_-y8&(gDDQ0=7^um^%i4GC-MuLlfZhLB5Z$Z#x4wx!_?&+ zB&X&Sa@qW7mOGlq^zIiBW)-z(*E}DQlGe`Ik-r7x7I=rUnb1ge+<*+Ix&OUP@X;SfIGS6|zX(kh010}n;J@|JimKXvuATFB+6g}cQE~A%$sJavfD&06 zmszWnTj^WWOIbliHcxQnR91c887b4RBk#i#D`jwM)q%l$R@%)qX?Ih39z8uBvGNDp zA>oMqHaVl&w8=MG-OsEZoA98t9V_1d-UC9LCkHG4+eIh=hnV*L5Y*()H6h7TZazLD z! zr<7VL`S4bkdO8P3b9S6?57a_G#s{MQ7mKFoT303)a@69B4&la&7m(-q3p?{tTETJS z#P9coAL)L$;OW7l_~jcp!*pp0Tia;EO1NX#YE} zZp#DdJG`h#xFLx*KXQ&=HuaSMPJEw#knp~y8TXQ76%{=w3Qb0lO-rKIH1+8#`s|-@ zx=n$vBksHQ8otr7Gw zuE5J9#~FHpl**4Z9SeLqIWRP@JX7_cM@zC~FtlnjRtw&0E)x-V^^mXbpownfTn5_# z-haN4US~;=C^iAZW5$%CU%I0h*tR}aZl(O)fSTZ_o$DWz8+p5lFNvOO(}!{)rCUPb zbKqJxM9c6u+#xi|>}HBF2Kg!y=eLJ5pUp; zUmUCC;x31dVx9LiS)fhzI}$k-Sa3b-1M{5aq_qHDCA%T~GnJ91C3fYG{mzgy@P2?X z;HptC$&f5Fh_18@+Y~Di=(4_VkVw&=`w*T;&aof%EQ{+oBBoG9HgE>CK_!>~Z}W2%bw}#;?g}PZ#$T0Hf+QWFPwvTqX&9sXHg3 zaV+=qU+GW{F!~`!Cx}bk%V4Z5F&T!bwEiS+TS0z9aIQ2%yJ}&~>~DvP`8Qx9h8=Q9 zEk@J4Jz2zRmw%T#K;i?n+=q|Brt<4nl&q*$7aC6^`sNMB3*I$qJb0)Q(d+Lp$seB*vBQn*V`4~$7g5m+!DOWCg1S#6ne|2qb zi{WtOJZ=>|bqHLy*#RhbJP=hM2``xq^gTbb88_%%grxwRA%~s|HMicsavb?uHP!95(p>pHL9`(sy=xXOZF+?mA>a(LC1FpU}`hGiTlnBc?DOPi0Rz0p4tb z5+|AyPY{xl1E_jmBzJxIvk}lMJWt}#SPc?p;LDP{I^SbO6vncv-Dl1QHvqmS35QN@ z^T+Ndrx97AUV!0Y2S|i)@0+7;o>%ivJOIU@vJrUEk3mt7CDa`KeGo7dsmz5s+dc}( zk^55Gf)j#s&85LFO4!8iyX~$&b4sm+-DR0}bK^}96{zhMD(~dP{abSc&D%gDbpNCl>&8*aK1|id?~+zXDs6 zRsUZ}JoOMn?d%MfMBKLlMEWp(c}OPkVw?$4FLkS`6KY-RmxzWkkwMxfnI9Q_Y$<-!v_^7h65!+9`vP2iMQ#zn&#$f;drr|Le zAb2GJ#{*0jCSRcWS?3ntr~z)Md?r=VL-L+WS9X zykhnsKlG2U2Bs1Mp~`yTtDx?l&VekdTu}z1YwGh>z7G# z;H7Z_X)v3Ck$IL0k*}!z{vdTS@#Vpp=A8Z#=+$#TAzo^en~=$vAE>92x+2MxGsS#d z;Bp(VU^LIt5D0ooIs1zq9OH5MQ2c^iEDwX9pap`_F}f;52ldwS4D9{Cagp$>wXX7l z(H0n{<6`p|(drwhp##;08O--r!@*9tI(l)iXfi!aVMc|>k5c#oo~m%h4r4cJN!*kyg$6aRMnTFNEoK#b!}%YSoA5UP+gD#2`KYY(>+=y>qv>6^ zdH8!pGJSJq*Ymrj+zPfE_udj=*d{2@R1#UljRtXWx==%#ZqE(2M(dPs^vK))jh^Oz z{y2bn!KU>ZOc&4vG(93S6Gyw2=?Fy+Gcx{VSOtxP)mXc6p_?s&nHEqH>fP*n>k{`5 zbPYV7AzNpyFYyeCk#+rlFlq>N4S+-Q257k^JF7rMLFW#;R@$ZBB6JJj;#Yx2Q@8w& zQ3)*cT0|9FY8pn=(^j#f^=IVK#QT z*8g4^xP;b1=%R)r7iZ8{fvO6>L0KVI`k|oAe)Df%tEEcXhO&P_(HId$R}s7sbAQ|j z=)_9%2(wiEPvGafcczrWWft(T8T@qt(L+k6za8xW7d5k7C99euLUl*e_`fXeiO>CB zR#@o!ja7+4MONP%q5dD%G+~G-7Gh1?hX>FhRhuZ_RiJ9gMHs^ER>LMnic@F42BT?M z=KlBxB1Z5SxX|+UL4FzILBoj}{Pxg^gztIGIA}^16HNw`=7ynmcd>S5*PfU)zmKJp zpXJ)gW?KfPDD7SVbA^Y;4G{`U?WR&0o?~?C&pA(VcKXXA+_{Zr z{Q!{8y@zv0+NU}clt$q>R`xFrAI2Y?!n6^Tk#dBgPCk!MWm0 z=4;7+(Jjf)oCrau69n+L8>tA5vZ&208+0AENzvOp)v>JDEC96z zh95k>Xc_Dd#w3~V|IG#cpFg2=i!Nplh6cU{J03e~7P3FJWW0lo$ zg1~{ra?HDQauMX0pke01UGZPm$~(BB<*`)tJ<>A}{8Q|gH9*tu01NjhD|}X^gqrwp zPpBh0_nzfUDjtizo20kUKmXT%_u{N!e_ot74HU6l>*hi1pNy&;|H^G_vJ zpN7z}2pV1hD&w-n$5ea7TQS1PjQ-DW<&qk_l|E3c1IrS%nmf}X<#aHgzg{c9ic%vE zD}W3{+Bsq+4%(=`PvL#+Hq0;}#V?fuVr8EVO7SCg5Q=$@yg$pOFzPt=hV-O10SbXS znNT%zBOXi8=iLAJ8W7Xw5iR0BC#zT(4n6~Vyy>7*xwJ=kEZ9ub!8k4|L#PCS9gF3t zcL`b0@txX%pwCGSa%uGlk{yggwV5Kue0wBDV~SDT})02aZy#@0AIzS?RBjM5oTD{tff_<34B zM3w@2hw#J71uuT&}iQp=-q6T=GY`ms%VAf1E;th=m2AN(L!?gy$NqU3TY|#W8`v41nQ?Yu_GacW!&U z)?|1p`y5L;h{*i%9P4znyipKtuHgP`79wH2xr~86KAA(o`%EslJm)FWeV^F~rZV3M zRk7LV28#FuM`An#&w3TO>f%{TYI!j7wDm97VM;p2gqaEdd4lUx$AG8fU*=xvg3vSr z@b$;)MN2FKjyN3l&AD*TspO!>ez)i$kImNWbb7f+MCtVOv69vZcAZuP^b~;}setVRD0g4wXR4S27P; zprgu3=VrRw7sGVk(&Ul*qbwk~k(dIx(Fwr434bM7K_p%iKW$t71t_2{L4X33wd`V~2 z=vj3h>>;Co5F#TgHbwoD0^p2#*7vi4-2aSDR{kEG;(6_}c2~W`ICjAb$`3sX`Ie*``F8hCC&AIZWhi z#0mOdWJR*v8(bGh^Q9Rla!vSwZ+^yI+gmM4!Wz5|tdxPM{bzrck_1uY8?TC8Bi>BM zExI3>;iKKt(TM#oC;Y8nbmE^U#+F4qXIp(3bY##=|{CgZ7} z{t#&hS4s7#aPKMEd~xip15BE0R*z8QpH)7OokW0F>ZkL0k?xBU@`L)oIQIjgkVwTa zJ<&@9X8G2{?^YW%vDm{1uZQTgTro|f_jAD#fHsB&N!Z@TFwe3X&i$$Gm%jspGAaG# z8t->^$j=S)6|+5gCs6etMSN-{6kVzb9ECs$Tc3q;fIbKY0JLpN7T66>37!J(Zp(B+ zuR&Pj^ltxYPNdj$Lb}FXocD#&Ef7pPZ9)#iyl~;a;K_LMQ-pPiu8u^d!QR+*pFD)0 z(mmaLxf(_>wmsZCG(^;7`4>J}d`=U5^}Vp4-sJuRI7~%UJSjshkN*Ji!8QNqh4oCm zvDjQBcw!Fzhu?hhLFbbR@uCY)OV45{05wi&=OhWt@N+W}s8fJFU`F&Q#hBjXN9sII zC=)%Q9qO0zGm~aGfGHh^9Xshzn}#O`_8~E>5}`l&yPR)Jr!M~JS+&r1d?@~G7AKvx zLVmY2So58Q*ZO9~%`JG<{<(WetpD}RZGL?C>zlJ9zPX(dXXaj)WQ#Zv8H$Z~7-C>j zA_k`C^i@xk1P@ZqnMc2yl-2qCOg*y?K+?csOhRJm-9PG)d@VyW~LXyj@t8B%%D!+Uchilj(bLW&? zzoeYQKM2q8IoH>3@~%sBUk)t?S;&cH1zTf-Zpcc<9<00b<>?wJf0)TYR`t}2=iI7- zTN;e~Y94+I^pj3eU+{Zj|%b#N8+~zP55ajma=7$8LkVo$R z!{2E;KUWE0%z4G!%)$_6I+wj!?Vk%ipag-gb;kdvlg`hABzw8g;Xc>w=jQ?32@@+R ziymNcjR$Rd69Dby0Ev`Sa_L@*Sm$8f?E^g|1cwtRLLMRjL!t66t$aD91}a6+QEB(z zJYf<~#fZ{qu_DWoKsX|cauVB!7t4Mw*X$AX{T2^#(Y<-!&7?^I))ch z#q3~t4CPG9N)rt4>IphKB6#YN>%y!s^UOun+w}K z?_<*=a1vJ>K4}6IM&kp-@~RQU7pv5*#wmA*2<~3w->%cS9Rn%~P!xDO*{TVMog<(% zB@M@6fQtQ0$ZeOnWa!a5lvYH0PV2J@Gqlm;WZenmZ`H$dG!pm+yHv!9(rz0(PFCB{ zIV1e|HEGHLpsD}u-FpBV`%rPha94;C8itwv#s!(*13Bziv^sy02;A$voItiaOWE!0 z%M#RUv{YRxENy6^o@2ZTmTy2#IQWJRvMGw-2sn?9eOv&9Bbq=~S*y#DjcT5)$kS?m zfjzC~ZaX6i4J&8sOWFtgid9hToy`2jG*^h?I_jT)X8`$g& zgQFtOFCBzZ_T*PJxKj0QAo10f1h^Yiy-1Y!kmfq6?*t0o{l3?GxtXY9SQgz;+U0?{ z0-2uM%xBxP8VW-2a(K+CBe}mSmcL4KH-GRoEiLXmh(<-w{1J_ybd8zQq-zsjxfI`p zFrg7J>i@MbaTKg^Bqur0q(W)5J?N{_=Yz~?$FmPdQcC@Zpr`KxB?^oeG>(HSP}Rx4 z`~AMHH!UnKJ)L(RnxL8q+|nZpkD0DzYg#^iMb?R{r&Taa6VLbN7%Sz`WpCB8N^ZMWYjPAFQ4_&zVtV@RNer_H zSK`g>;1Oi3$Ab=#Kav2Q9D1^~5aw*W)pdN;OzE#8Ml=vtCFb|b+Xu}V6f&JaXg~?C z2W8d{0P-*VmQcykP0oIH@YKim&Gd)C1P!x+%?(B&V>6G2x&bcDeUN;XWd}|s1H%;3Q8+231Uscn9W(wysy$2MQbWMWra-7z4 zX|Sdd`>Xi&AbK?jt-t6ouvUFpJj(ygrNKKHFQT%`~g0;77VrJ@o)-PbZir*<|HjZv3lT$UXS_H+r`Vmo2?~;`l z%4F)@xXlz&AuGQC>`S{XgiA!Emm+afZftzBY8teZVkZ`IQ-C6dIKJykJ4+G=J(HT@l(Z-cMpY_?ivDw zyJ(*n!_5OKfN*EiE0u_>Cx5qC4f2;le20#_NSwNx#A$kHTG8?7%nubnM$zJ%eAr== zP1@1#-E@?<1QTb!IX^%~eUVsLK890ppFcO3{Y0D8fzS8A-$-J&%Q%b44d-zer3EMT z3${?x$n?LrR8nEq-rlQJlUq($W z0o0vdGf=~?>vUy9hDr3{Fny*{9&3O+QBJoYm~-G>Z?R%779+`RFbeK30X6=SLTyI% z4DtowKC^3hQe4yefQ;>Yb;oOQV1p0E0OK1&ipIdTP9|rPHAtQ1grObb>*w_$VK6F9 zA4_to2u9$h@yYl__|CeH$FS2vx5tDuje`?y5i96-dbB4A%8F*MJwfpBGfi39PV>Gm zmNS~?-BA=TCB^fdduW2k>qj{~7QD|I$~*<%T&{6L6>@2WX_he;b_DgMn(#P`Cx?0o zM`13%W8{p)Ad*|aMEP-;iiR4Q6xHAS0S}-?Xd{Bi_vC+rl!6N1qTxLWl^@Fw0*!$C zsfgHzv}yEd05t|aHe;;cS%HcE@4b7-$7nb#;FMA>7VnG`&&tg7KwmbHjMOlu?sRL6 z(HrbDWAl7R5>B8P1WFsh|JXWiYRqSoDfAC_p7>BG+@CpYtU*k**2=JY8x;6URt zy?y#4NtTC5^IfN^0(c#&%qoX^J=Wq>7af8)`xP;dX)YtQX_{!vovx*iodb6a9Xko! z?0d+I1M?0BWWQWe5SU;%%8yyN_qHvFxA43^9E{Jr>|`bQ?BCaxRzUVm$iMp@=) ze%?_mauTHGLCP;hdbu#IXt9Wj9~HlG;rukyMJ(Zrm_?c;2qwE|Gavd?z@8R8u~HQ0 zdo(r^C1S`hZEKjWYKh@H)0^L)cj{;V+#*dg!iY=Wvl1=-Dv6Cr_4VC=IkNX%)X>e~ zOC$690|>2~g;Mu99>nlMWH1;GPrnzcSOxyPxjY%% zi{xZ04Y;BMbFV@QywP}qqrwgwyD)P?StLjK(t!f|h_FcXY54rQhj+#1KP;FvrY|R!& zOjabMP7KQs*-mO=T@Ely-4x$Q{tlw@fnYosidj^YEDH#@PZWFO>;8L=7>2eQ{9ETW zR_$Hqns`JSR#D-xmvTLQ>sk~_#JM7bFqz3P&ItsGCtIl$2db1uoCm(eetrePA9FZn z2m2v~^Yy*_d2zt>`zM#f3WZW!7|>!>%M#QwsN%8=w7w?l3qUK5|4NO$MsYGV?+B~e zF(i89>chE)4!*1!veH&}(s@H(lXIc@75l+S+@yaXgn+81v}Xk&PBJ?SBeNnbAKBnh z&vGyty`CgzOFvNa+5eJ!Q6#pZ3eG_yZ$F+Yq7xLV8luEi%D~yEAOu~^-UB@JpK1QF zH9W-4EMxkFty4aN^NEcvguWob7E-9vM>Jk8W)e-EjJO|Z2dchV{#5KtpcU<9wXAfH zla;kXOUPDgg+ZKomHxg6cHd=T_AGOPmPGQ+PmhiY)`A=kcNUpRR z0}??krL}>*+Kmu#u+!y|HXfh67!TGkdn`VA7jh9T+&%k9 z5$bFbae;G|UIm{NIR*y-Z;{xSiO(Q+mkSTC2fT67nX`+}_x%&l8>RKWEFTLh=9x|e z+}Gble zX;rPh2uO_B)tb^WL434A4p^$1FZY3gPm}5gRD#n!?XfQdu7VtI(y3|EvFc43pZ?r0 zc4iprkF(C)v}ydzUr$=pl)G6vPrOmn~+M+v((8KNN;u6NK*ZOJ_I3) zz6;vN_@Bkwa7DJFh6x6chZvfaX!5b(0jyXiMd~rHm-ELNI+C3FN!Z9llXNm)ya`22 z&=T8?KNA#b>J5TUm~KK$^lV@z4)77H-t-U=X0KK7Ed>C1y8*GFlO?89;1O!l7gy9* z4aV+E_fspLPR_YD9myJPXf`4CX6cU!zIL`l;oHL9UkVnTSbYqaDq@f*2sA@9j>H5O z+p4_P-4zcERzke<_y;Th?8g;+cq}xe@1d$iXy3oS!D_H*FxiZlIhGXI?Lb{j_YpsL z+6-6aWCxq_;3wO0vp~{cTi=~J4oI~myhoNfi)mmlxwvJy=pJpp=GM&1z@tffCl_W4 zoN*Uw7ILj}f8(}1{G?N8VHsrnkRZc_A_$1*Ub{rHJ8?B^MXWpnauFf_I; z%w6X#gYM!A8c~f5B7z9;d3Q^vZHZ6HW1iY&9CDHx$yi!1s-2ymbilAv1|*Uv`w<%hI?>^JeHedh|rT162C60rGHuIUp~n zUbLX>109@K9C0M?hNJ88X`!z(BlfY-pP4sL&__Bhet0(#=qYp(?yfC*ahU638#qz7UqTKTG>zx&x|E`YxAi8y9ke+}90v>@FWE5;1D^bW73e zFP_{yGhXcX#2~1oarx&WT{_hn9Bj>{TK&Jz=|q%~({(f**n zBg-3}<;|vStf_~=XWH`i5cE39ND`nOvU()?F?8x=5lq(q~-Cn3wf7E60a`?e#|SmMWR6swrv5{A&OP4Zj@76sDv~vIE`A+ zXs9#`djC9>_^o3w{45$PHX?ZWZk0Z2v;lgi-jUH}c&O-I!SqvYdsEIuy=?9Cf4zEq46kN6&o2|*5PNN$k-OoVn_59K73c>KDfVf&#(tkeuM+! z<$C7g_cr>wG*mkiAkcO^;o-%ipCA9+oCOWih;DSn&v$Pi_hIPa03@LZ1r{G>5T6Tv64|| z+LXRtLBtM@z7DkzYcYE-^7IrbZ-0?*mwBEC>HZlj)0Y*9CPf;VOr>>xIXpab%u`#c zUjN=AlZv2^#`yh9!J}oX-i@zy{hTGzp0i-No6QK>mxtg-OO)M9L7{Y~E|6)zLJlNF zLpfkQ+KE9FgPyc>c9zQT(#-rU7K;+O?=IAgIAX7$msB1z|JE)-mbCER*M6 zs{ddQWF?f}IUIL9aXFXGdmA7Ff%tbJ?jR?vC{&dbA5>zfz4RYvLV2*G(MIpe?t?0l zeFyplI`{uW+FOT3*|vM5f*?aEJ#=?VNar9eAt2o$Al)D(-61iAv>+%UB^}aIk`hwV zAl&-1P0*#71G#ms$Qah|_AhcNud&>oNzQ%mWz_oO9o^I1GX;p+;~ zKlxT@;z3URv^ae6Ih1~siHI&n3jIm^8hVG=QhAIZ=NF)nGhcCnD$-ou^q=GAnN9O; zNOU(7`37kFR;rRce@bp2b{>53x*nNK>}3ruadO`$3(%lS3&KQsip&NÑw=iFSD zqQUg&gmHepfg&wnTnoQQs|jaMTKL)U+~h{X#QQSCBraTJSgs9?nwhwioNx1Gukk|SA+d=F%DW%DCx-}J8iod^OX=3Z3Q{`@IkK*MZTmg=~e5kr%Xsx z`>e$;g6r0ltI)A0#knHsSA&|z6(PSe%*Dm!oaUqOS?q8ZWTvQ+o=>uZp`I_sirpXTmyd^ z%Wol3#^F`T$6?Ce0Z1<6#1rEO3Pt40*L_t5NvfntR$3t|N6{-aQIM1xagzB|oMN&- zbSZKRPA_3Rq zVo{I^_U`1sKY3d+8UGb@jgpf>rkJ?&Py0!S*zkR{v;^G<3~U~?ALXidvP@r}Jq)A@ z&5hN~JSW@c?ejFXiy@>mm-vFuUSRb#(}o^%<;j&9 zrmgkh`V0fp>8<8vcpZho1WlsoS&b(?y|>KKoHz?K2GYk5Dl1}TsOm5J88n?n(U3aL zutO>_Dvi`=VNN{jfeBB(qS@L`dEnh57J&kdnxX_e>|cw3@?my;7kiMBA`S2)vu8?E z_1<80AL!YZS+1a7si3eCLPs0;PW)`im%i|q*|Y5Hvn;ajhG1~E5i31Ba~?}{7FzB? zH>VblxlW=|HNT<9{ImU;2dGmXWW*8VT-0-XVFE@fm3olK&W@=8yVp<%nQF%(uL``^ z-ET2BGQrS2x$>)J)J5gqxk0#WykuNLe8pNC(W_(Eous?AtB2Es0m;&$i^J05*9O6& zUpt}joP9y3&!2ZrcTiZa>0k_&*pKVzP!uI|CjzlBgl^8pxfnwU<+`2-ffCgghtB9Z zC`F$?h+8zhw<@o@qrrX4IL%%zk;r2+Mo;F&OD)%smJARCHWQ;gMbUeE7rPt2I(;}3 zB$lz5NO+goI%>96ohxGWLzDBt=YT4U&7He9W7yk>{waiYsPa^XK^^VP8}*9H>d&n8zl?=RRFbe_Z0dmwIIM>ds!?C5Pug z5j|@?Rv|~;#6XJKtjK-`5Yu-6zo^+UYY)rCc`q#5S+4vM$5gtQPc4}5eHVRz4Q1^f zoCT+#;ZOoB1uflx5YS$$Lz2J87n&Mr{se`Xe{>~^pIxgL^GW33>4;~`K?kO+784bx zmPiaXah+t^!&*}s^hsuf(ypp_S76o8_CYOivodYo#Ch2?SITo(=s7Yhv=Xx7*h5^d zDqA$jePvQBE+Hu}*cvcGRShr&#Y&ER^ps9xo-A`3OhVGT{)IrFYia&bw_#&lxmk^? zu{ZQb>8;7fywh9KffirSck1zyy;&_T!SK zIGdr({t7=9Mv|s<7t1;FO_Kz?X|JjLWx|dS^BaELyu@0j{A*m~ZaTHaK&+BvZpApKYd+?&Iyd;-{ffE;|)_X8UWcrMKM0FG#}_C#t_bzra& z+f@GiP{L1~FsNj+A46y$^mMQfM5E~!8A-SJbQ@?}fDy{3oO)6ZPda4+{5`XWDdJEH(Y&2+>@m@EPv7O~* z@HO&W&*xfGejRzKu!bMt;&^^~U_Rg;_1f&GkjUo7TN;r(VE(Oj!7Ka_^sXXC^ei2^ztnY;8s`i3h|a!# z271co6f>en3*-t8%1K&U~n_*W{4N=hpfB8!fERyG=0x6J@E(H$gTq-jZFT znS0gW=Z3={Odc}rCe$7r>Zf#7K{g$GHTRhHjqZR-Qyk-FldCI+XSygG(M6v?c zLAdZ`?HtrHQ&VB`un~tXBBzr>^RvUj=XQANqy(+z*p-EwF)Y^sh18Z><2>aV?9XfB z!v(L2FXzo(qciS^&uIO{&m9DcDUSTJmz2ufPznewVozHO)75M~Jz=BMgeWeS8;Xo` zz4eqGdGwY|)^SigXkpg9E7{erCWSE1jCe~m3g@8b=lyj3P8e#S8V$vv3l1%x9h1Tr zr9_@&=`Wza_uHmdO4K>DHO~XEYquIV`q}zN% zxd02Ph3Ky}MvbBs-i2`N9kw43-P~`Ry zXT;}l#k7nK9jtsoNHLWyxq-VDO#=3xb^WCH$C2&Nn)lFkWXu%$*T}NuO@tU-k=q5M z0>@HCF%{ssscoue;5Y@*DI@?#W2XbROF;hm1w!5(;z15G z?)^JHjgHFU?5V~>m#5Yli$i@69GmST^I)B!YNkpgOwySU)9vMr+}9dZ&amzQhGXVV z?u|1YR&p+b`jO>fEK)({$KqqEIgMqIlnF7s`2@tfl|-yO@a2rl;(jT`_+U!&tR+vboJ+jp*CN_A@;ItF+-|D0P`3?T_-Slvf zfd{BU&9u@VSW>Gwf!uxMu1FxVb4O(oyJ#zYy4hBhnvGOhW2Eo1h!}%>dYimuV>;cq zusF$9L7L@N^sR!mYcEO=&3-%iQ@WYjt_@fks?}JiG#zYgZb5-orYd{R@c()Q|M>zL z63FivHQ|Pgnp)4ofS?97XzZ*n_saFEdJ6=64nKf z5DXhjOUub0Sxz+AJfSXjEtFfyfdn+MeqDWdX9}v6OaoQJm(;rASDSAidi^kAQ*Ge` z%>th|;e68n`6~YNJrKtp38WxhO-jH(=o8P$<|ti_{VYG2L*b&E#pBS5Gex5THLOlz zBTw3xWG`Dt7cw6`UtEE1p-ssXO#iNun2o#{?uzc2-2-EXMHTdEMwlTZzi=E&$;TsR z64xi0$$co-6qu-Zx(0kmj*_nZAB?q#>W8>w$A@ABW~f{ausr%g#Fmy14lwnThi+_t zw|Kxe&JCqW02P;Va8~4)` z3F_IoW*llUH4L?wUI`P4lxMjB0tfv){mrDdqK9VO-?*QqtXPM6nI*jW@8&hld>AJ4^~h5@&or*YE&O zA+hQzR$CANmOM1M)z`^e#{zDU$>2u|WAGb(>`DI*3rY_Iv7+B;Dp%&7;IP);z`%+| zYVzT#Q$Mr{%O*Be%HGN+l!D1Ds!AxLFkR6nI@^ z{MSa=d~<=$J6%|7xDj}&p3C&G({)SBYIF3hXd4*3S;XL7jOu6Qgx3uZx)^7k(% zw~%_o;nL&%?T5{9CgpSs&Nh&sVsd5X0JSCPIvv>2fV)jmRm)Hfp|w=9Yf3643ZnpI zivC`#*ZN@kWW@Yhs(|%TCRSea)WE!Y&#+APM$$U4pAW2rU|2%gbxcC~D`+}j=u~)R z)!4}t7py64ivQ!Ykw!cl;l=*XoO*Jl`AnChhQ+v ztB2mXKM{#X%E9Az*wl;RFwIg>&;??>NUeUvC=Nmum zCWZZ)jwH;^H7n-(?J;N3mM5U~8+i{5Y$#4pG53IVku^`bR)zR788nyXB$YLx(hVQK zA3&pF_S0~>WPo*3rvCbe{enbOL4fnuj&a$XskX8PlEtl*K<2HZX!9unkx1nCLG}LW z=DNki^jArYD4Ej-9Oq}iW;)B!IKV|(sh{K}UDS+EQ^24AW!32YFCha%Bys;stP~nT z>5lw;UWYHm|E(AN`xU~076?cm9k}QpLLG60Rq{nPnw@k*8o?O)#eBMV4}{HCkE>`k zGvwdMHv9(7d??V_g{Wi+hf<2Yv>>T5YwIY=@h3$%mg(>sJ$6Yi*duX_Lwv4)bFKp+ zNoaHjp^l~Hu9MH=?hO`1U&u3gIlSzZ1$+ z=dP#K?_n+2iHD>li{Z@2{nG^|_2&X(dZKn!5E_NgESg`yfMIzlSzPZh!>;k6j|i8Q z#c={b$<-hBkRhZPlH3{<)>@zWboav!n&q+XAuj?g9GEW%m~?q^BV1D*O`ds4BCVBadV9sd;0?Op=%)aBQf|(64-BKJFJ1!> zW$~zAsz6e--mZ4$`BI`1XJyZ(P2t4xJtf1%93j_tkL=8fy;c|&`|uVM*STH=$knbM z&x66Q`m>CY1f>CjO#1z5aBRy9BPzd>r^I^Pd_ElIum1x~{Qb}UYcbNiJo}KLM+n5` zl!(Jh*u<@8Gw3px%wsu(si&37N9d&cT`Dv$aY5D)XxgC6yWc-yw*5N&*r}y&PdJ^! zZ=a!?CHa85?Q+Pm1Dz>TZ6*3gnC<3B-VzY?R=CM0iJeW;o$SrelJ)@wBIUa$>3Bdk zype!)Vzfdpga%yFm~FSaj-8-`#)kvE#;FTeF2ufehOxAMIcf>S`8+^y91R3-zXQ>t z6_jr$RnHM%L~k?DV+jJu%@EgjT@Tibl=+$O*=<@+)?AUlgSd^+*8y0um#}_$yJx7{ z>jAD{Sp*>Uf0SbX<3lu};6bHu0mujD+d%*MKD;jkiyN4g@n6$SPVK~Eugb&02)|&4 ztbmAA7vRbX_dA;*&9>KqW)mk@P8u=|Ni~Lav2eI3Zu44Eio%GMlMqrpL5p3 z-Uv`&iZm?Stm13SgkZ??NkzhC@*EdiJvKydn6kZSuQ{lZEA-mG5d|*y#*$gZ((C7D ziy-vG-3EEUyO2W(ghZ*NRv$`BvE(S+lM7Q)Q^S`!!)N{9pHiDqKe8@eCZn0jo!Be_^bD|B4Vj1mI?I0X5W# zf}nwj?7$sB=$bK3teqC*cUmkCSgA@q86&?Fe-*%RK zF3sw29@JyIx|jnk8WvNQTVl6H!`07{PL8@cv->fE9OORI?4Pn1H0%d} zm#o3XQ-L7bqa`beYhA(hH1kqiOExGV<6sY8E;muLl~x3y76t&%{;n7B$?)@ppA;nP z4M*ZK$-Ps}afnSv!2Mp7`f=V!72-?NZ6%M6ymnA9)^LW;j}wN0c-5i1%4rEkrwNkJ z(^|5#s4W9P#kmu>>GAikEJP9$_NS^chNOT1?eI7X5z@eAfCu&CuxZftY|WziFnv_{54BNUlQa6%v!beeR)mS31srgVw^S7ck6-1vLlSlL8n-L z!Hu2&*HLO)h_;b;p{%(0rbGQw*a*m}BHi*=dm%1*P7}DFi zAA;^aZ58d+m@|b=d{n%BnY3VzWmPU!#*gtw9PWTft`?C1virN`b%`$E z;12;o_5Fz^^8jYcsQ_0w>X%f&DN7gt1Jlfnfv0KT@#<{R;T)Jgu+&55So3PP!LmY4 zXuGtU{j)01$K;n^AH4J1H-o`uDUtonT)4I1TCxThW-4mhM`l+M z*l<61wSBC6-rs$5gvJ0Qfgrh{vV5Y{V&+r(a-Rr77&nkVz{Mm*EtYwao0)2*OB~RJMrwAe<=(8p~*m{NdTUC^J1K2 zN(F%Lvi(ndw;;ghtI$RWXf%f%+@d*ZAw-65uf;~@M0#IX-dr5QeK)c^?*)x)^aGfl z+@bB(L#gMxMV9+yA4k>n;dG^WCqksSg|M-`7UE-G1;ERFAQ3XXm}>iF*_*&IW#AgS zJ5@v(i|9pGiz3p%CZw*3&G+R~|p)n0{CxBQ;piS-pEecqVAzBPhAw3~)&h zS&lIR==AKd+duWUxogzmB4&6ICjD*VLq!j%Z3q|}VA+R1#q@T?ql3O7{wp!f{rL*r zPXMkVjU}bMYlf_ATlT2~Xj&|>B^Kw6fe?EMsM;#oY3%T}!qYhq4o$uaLfpnjVo(5HW?M#{U?U6^JF-pg+famhr(!vZ4cdH&kRl-Fqr(G$ zN+vw;E*hs6JogA#DC-L)fJ32OISb0BlXa(I1XyEi<=@!XrZ-&lYxCR4@H))ZR{+Wg zZh*ZUap!y?q2U1(0ZeQ~2)uyclJalu&^6y72Yq3p&(4-g6@4UOe_~uHAY%!nyQwGs z>HGjE%V#r40f3Mj7ydxVVoR(z{{oQFdu21bgdtm-uf^68w;B7^ZLmJ)oGT3iS>mxw zd8YOLG)&^+M=((21lVGsIJa%RdsDSTN?6HrIiB;&V)brUH^_8^i&42F42gI2r%=Ne#M40vuzj-!OP!rS<70OpD?cnuE9WKO^DZ*J5R4EsIA6h`!uoh|7D{Ez zMQ zz8`=0oz|aSGx0HoYNUA`Oe$wr*yd!&cZ7uwwCBVGTQ|I5ma-GjH(B-tgXo{Gh&W%* zSzwVOb!bB9fzQk0URb`WOq?pqB=2 z+W&O4G9Wf6D#c*|OA_*714y5;7d9Wu6g8!is@oB^JQJI*dnXMa_;|v3c64A78#I^SpfYgB^=&1d0ZzeA5UykVb`#OuK%V~|M#teEaXA3?clqoX%c;zo#|!n%*x*tAuXNqf*88coo(`zVW;_Wz1dQ<%9s zy890ZNFX#s@;f>qyQWSlh%ve@9pH+Z`sM74B1N9+${O00iXl>c$<@ z&PC(X0yIxWuQh(7a;-^&Y8ln^`gF>8)|37JA6PGoJqV9aVpJ(TsF9b>+40`dr@&<$ zYU38@cHCS=1C5{>O<8Fv`|OC2EVTx6@8tIDuLE~jV5HX(Nenwl*`g|N%ba77rhxDc z0TNFu`=t#5II4YF8=3xTpY#0L=MMEQFD$7ac!TPk!@qtz+4(rM{y}=xW=595QkrPp z^Vb6(;iK7aCh7_TkDL=j-XMDRLR`U!o;X2ZswD+^362UYErP(tDT7?&DR2d5LhQyz74|XgIJD$b@L@2b*V5X+Xa^xC1brk~oX!x>d$_=gRVD zq8x?W5D*eR29RRrh?DvnKwD~i4547-$EK4Nl?u2>fR96zN3>qK-az%3uO>jX1tYa= zyD z6{3edPy7=_^D?Qfg=T2|pR}ebt$ULMfe>Kku&DO`JMjKrFybHJT`C7lp#zPAHw2^6 zR6^Ej$DvinOEojat~F?k>ue{bA{o~SFJ$)MmW21Y&dM1vE1w%Fw?31M^+AC*#c$H= zVSZf@E?{1#NR#9DHLwDBq6mZ8Zrt9_^}`ra;$;N>iP6$SLG+DYce>PAqN7hM)j-e4 z6(de!nuEQil6^tz?};7u-+*_)%O!o!%TJ(|nl5`Otlt$s6HXxIg`Vj~GZdfoFfXP^ zV{!Yn3W>Nwp1OGPTwI^hTX3SRXpD;e6ZQ`eL}33B_y+!`2p-FJ>T3jUHbw#Za)AP8 zJ7D%W>~_7@XL<8wuh0tlqAbO)jnAw^(toD|z-*QRM5r;@u%VEmA~^>0sh!!BX7`{f zc&dk#3A{LC2?umQ!|APoQP05TCu*%5QIOn<<+?tI&Xnb(ewN`8n0yPcyehQ(f5hTB zAb+sDz}`4AGJsTldIiXd0mx&F<=!2Q$#EfB)hDg`r8O=vhPUBg6Ickyowji8sz7s# z8S3GWg4jZdoeH>2QuU~3xc!n*U^e&SfMoK>MW51o^>D)+Y8pPfU8Pz&C_V%)3zBSg zD(F9^g!zHJ5hT#d{jItm3AP?bLxP#Pd@*tdbd zu0jxve`M1Cw`RuDWo>l?h$%^-tUzqRHR0)3oy45L8nK-!1P_gp%9pT#eU8Llba4b6 zaP9nZhHbe`*S*;3GKJ(CbL?~cPCVAUdOm& zx{v>_I}g->#3Cj!`w&H-9pZsAKGlwCC6-FX(p}bf>za_V+OaXx3Q-)e8CPC^Gxopp zvweYen~eQ0YG=R;Gl|;t7l4PLl6xSN2xm@SyPLP%Xj%Q_J)`mCV)nDmlmvbh>w3qQ zQ}}CUz)pMksAP=P_o-rdEA+|16I8_{uzQW-k5(%U>z#m2vlCnTfB3HczO4HT`eyI= zn4v`o7z8MoK=5Pb9@ z81QCWunlA!e8_)XL<{M>R(hn|n$3ra`xcRo3dcmBhuh)4Mz0VYeysv6>XcDt5{D)8 zKsXN$>p#QR7&Hf9afk5^i}Lyk1^5dd5`?xaRV?R(*>am_b{V6m4 zq&Q@ArsTzoXPZDA2*3?#_+u%eLA3b_d?sJVvs>c%s_mFRKAeenc}KCaukdMrko1q! zCJPcvQWflyJ={tq9Y%ltKIe^UM(S}D4l9rh+5)IUK(_eHTB_8zNx)}!P?_6S9CPgA zw*VAw31Nnwo%USxD~sja^%*+_Zp1%(wE!0>nFIJ2VG*u4WWC%EL;&d30YJWto>{|u zpT?Cq5j%E-7%dE-fL@Vb3;T!2kOH{YDXk^*`Ap5TCU(qc`zZZM%-|u96V~0mglflz| z6d!OyXW+IJ`ta3Kx>Ythh%hgsQv2|)>oAvuEs z$KKWg;JE^zw%;N-G3{jIM8#B0+Au+=j6*T_SFMtVoWLBz0JKdn6yL$tk{7CQf3!Sg zu%OrQ&F+{hcvfQBF)BxO`Zh7vbI#v~*z=mgv}e{a6phNOLV?)<;}d16pV#jfW*NGr za#@2n566E$K8>JA1?V@x2tJOmq;P^30tsbg3Nh{hNnKDS^6d}d4IiK1I(#%r6Ud)g zbRnx!cLoTcyZ*zlqqO}iMgQTKJ@sQYb(#u)(fLdDH%9|+vWdsKxMdw<1RItgM{lS5 zJ&`gX#AUsg@~Z$HiL^>U%W;Bv6`^4-7*AjczYyQz^QzTJt57A0yQH=Q({~(Q2a>`c zLOj9Dvgaf$*GbP)jS^5%vJ$FH?E zxVb`J%>x3-HF`@2h_>`_4!ZG-2jk&IXFim6YXWWi>6ZcP^H(!cThPw)afRMvWrrej?JHpK3F2Gp1^B)4@qI&rFVM~>Pzju-ysXirN zzcHnUVX1{7%AHsfh0v4$gq-aD5&EV^)W{tGoAT0${@VZjYnYVbR6!15PQmJWT~)<1 zo4B5Zaqau$DOH_Hm0E(|SbN7n#ZiJEf?nu%*|F^V{j=m_6=z%XgDsn+8a!6u=5Z7R zf_+}E-rmar;~O5y?9!SeMbjRA-i{6w+jGc#qtev9CUY9{`xn7qQd-AKC-=d_!TIE`{m*06v zj@fUXn-Y_P78HjbMjd&lGK84P=Q^I3w0Z)zCGT^2G=N%pL96j3TSB;%$d;e;{SiP# zNz*saBMN%}*~lePI#p{XFOvr5qUe~PKI#D2x#maLi8{PPvWRf>@*6o{|2}b7kvSY* zY)SN+q?}*$iRI_~*1tR5>0!A-bSBXZaT1(8kIkJQ%OXe=pLl9Z-e^{$P%!vB{8;YM z&VG}zD$r9I;5ZE=|Gte!y+b0#WR85I^cw^caqRK788YAaoc>JFQ)d8zij`OIho3x3 zHBL|`KbGo^yAW)TB&da?1&dLQjmQlEQJoEGz^}#M*8xi)=w~Pq6O3E50<6qUk|^W* zBqr7J)9Cx8d|!}5zv(fnu9xaocNqV^nmPdaUf(eoqeeDOF8t4%7DE9gvhn^IZ)7_g zCqX82Ua%Kv)2d@9hPOf>5)<;ekMhjW4zXj?ZGAf5HMiYqu)$&A^Bn^Rf`1kJ)@std zypD6Aed@JI;lpG7F<&$sXL`1yo9DhgIq89TU8Fcc`yhV~^LL|w$|P-TQMQNP1%TQ# z-wwt*BVeg8iQPJ5Yr3=OyWksZ$vFSf$4N!TKQtW24*zO-q*00_TReo3-u_A-)N98{86xGz6wrQ7tHVoC6 z1EOzNDO0}&*T;Z!FiLZ_MM=H4gfIDhHVfuhQDxVlJ{E;1Bqf|JbRj(tzc7j+ee~gJ z%B>u^vlqP>4bufKf94DdD30GbXn9iHV=;xb^8*;ca%G;q(nDv!!fR zgtg8YG9r=E>%S9SuY;_fc+O0MKg6M3N)=sX7bAn;J^x-2J@AC?)vujUBBL*fjC0)P zrpEI77MP9w`)|9~RN=~8?-M-`vgb|s=&V#SwS&`-|1er_+wk+uMhdU76*oZW73*~YxkFlxedl~Yz=Xly>5cAb3dasJV4qbe&O0L?2LthE( z%N^JIM{`l3&y;ak@1kK18V#!?R7u#-YXXivYee8)T4>nSw~zjW;0iu!EpEd|Qe>XX>)Q%35O@bBEO{oJb++(_{xI$K)lN|$Lawp7lGD<5+h1rY z_K))Hn`OLPQ*PMw91Umd%A=7C^hOzXk;BPd^@^m{%2HsBUKWb_?` z;)Y-haSS9x0&oCVKSQWvntXx&cF<3%)u&H+<@_!ty1&Hd?%r<=Yi@Ib5Ck0@);6r4 zs3_lNrbVLgYD4}*Zj6QKJTa%sA!)_D$85R`nnP=v)mSYnEy>Ge1*S`ml;%*ibUvZR z$<4}_UKdL*s@}n?oj@Bf*nW|n1scJc+RMi^$yDOL_47lM`i>L`YF4&Mi^EtKTqu3C zYHz8ctAF`cj_jPzv+w1jV&3Y^daU;(T!tfDfK)gpNMdb);*$#eT?q%@(6G)x zrpS2nRJSF`hG}kQGJPIQcE!?j*qNuv??Xt|_gXqR= zRd4YGU1rK~1kc#1lKf}kK_ug8k>feF|OS4WnA|8iZX*#UVLz-RrhIg!>EMteg3oHto%1E62A`PW#POLP9zj%pUT^o z?um$k*hFZxbwOKKeoP8qeP8|l;(2;u{?wEfLdtmuc}DV}a#&vP-D-Z)vqts^#ngJe z-$(ai$lB|Zb?hOS0>pPWlI~&)KNLN@z~|@A5_C4B_waYJ8bCP%r}2X)UL;B*Xguk9 zR`c%581qL5Zx(N!S870kn_UJmmN`_Jf2LkIY}V}A+cy<7rbo#v&ski@l?6Doet>5Y z1zjPM^Hjt)r!-%G^4j=jnW)wz%621(~ZvWLwd~5tr`$YcQJeNt%hdW6v;H|LB0Q z*X+aO;xV7Xu8RKQW$YW%e!Jq&#Zm)AQ}4u7KX7jZ?T{IW3u4U$m{eWh?GLB(GTmHF zwLQq`vfmJV&?w+P1#~msk;5hUkU)6IXNOylYLyrv8F8NO%Hq-ZZmO(-kM+$@%XV)X zzheZx3z2*_@r)x%Sl&New_*j((y)j$7Rt@y>Qmskzfk*wwr*}rROJt)J$5q9&Ke|$ z3rv?beAVa3<42 zDI}2+ehmh)j54NpsNHS=qn;`;2>Ybi1vojX_VVo-R{(kfQ$-+vn4WojO2lV%!xI0o zZoy~4q`9es^KT+sr~M+_ci+x8C-s#d*Zw7WwJN2hD02&hP_Fud<1u89=X1g{8!#IE z(~o$Vy_FaP9;J>I_mX2b?{hKn=V^>WwmiS!zdE;ANO)#1-^M}wmWzH6?2g{ zBFZo2T)Rdnjz)``=II9p%hfNh53`8LW*Z4(>?ia^{Fcs5+a&ggR3uhT><9_Mn8|>C zv8BSm;PmL@rgk$x=-PzJ!agATub3^=+e9YF+=#}q8MeiZ8rCF%;r`-T`s6OyDVE_A~c zXYWf63sCwUKJuHAkVtuTfwya@L+}A-#@5nI*-6CCs-rw4&<{ILQX4Hu=Ap~a)ZB!n( zw>D}hH!ZKcoCJ(RITQT2@~KYmuFO?%TY$pqKCte>rcHv4miFp@YyuhMpq}G_${DcN z77XkGQ(8J|V5Q-V2>z_)y1NZwf4^3Ic{Wvl_CBCV>QG6IO(OA?uItn7Wue`unsOYq z6?8s9c%IhVgC!uda?49~aW*YdMh}sqpePOSjpcyF-;|NbkH~fmId>hN=|Kv}kf6lU zXDc%WocoM};|Ra-3O|giKp8l~o@9!SLW^<*q(rI3R!r$r)dD4@Bx2Nh1|rqC<;SkW zQ=CMfFj|A~&MJktYgf)5SRoBMA)^Up+QY?9HYTNC-{MO~f_nJ(Y+4h2BM9es{#5dN z<{2EM9HhoZi|&z>kj{lG=y4*~q#Mh!m7|VwsKkHs z38@|yp8d{WzDA8+IhfBCJotq5hqd&{(igC3pp{J6*ZYuoJf>GNHgC_$|8x$LZ1KLcFEVmE2=b3m@x=ME>ZLBY43u#@YO&9- zn%q{x40iXbMrB{C>0X?g0gpA6vhPc?>4$pRSAx^q7ak+jkQ5f#;tMm@BVsG*cg^5h z2sld0kwv`@GzFT+x)*cUD~x;hweJChL(XH+vW=?k#S!#K{iR4n=Xn8@22`L`rBA4y(p24nBvX%{J$}!xpsiI6aMse56nSH)LU&jw{ zF=pr#Uo-%LxB^>V&={dv;6`{5t@yp|kV)e9Q)9niw^7j;5Clkg?AQ6k4qWY{Mc(*d z&{xaSQ1B_C{PH+lMCOhr7I{eJS{3YkWA z*1V2>Gsb{~yQL?(iAdHt)<1^|622*J$Sq!7t^!7cJ3mA;xeerI_Cw9x=PgqWp9i}q z4?LlL>au~y%4a~zcD^%6ReoRI{GrNUCf6PJBl`yQenFeW(iL|aEu3;C@!nEJ`mp{V zfx#SHWdUP~#jl78FxdK?(0f^jVo2m3CE{8w3;fW+^pvW3N~EQf*u(qQaCFZlG{upb z^SR7Xq4CL5RL~46CgDwi#_xsA57uCq)p2QeZ5Jyo^zo{2$L6sTlV-g{@>c^)OM$(3 zm(b?L+r7Fa5Ud-``Gp_yyLp?Ft)na9Sv5p(iE|h=VC?q#JU+ZEe{8k6`PWQKX)VWH zP?82?$L)!mHB)IZAOHn-|HCIE-|m_>$gG!8opUz?N$ znnKis-d%XZQH*&;C$r{!`4t#Yf-6nl18Sk1O?ydwybcrRnRKv z6^~Hyicepq>h}t2Pi<&bo3}0kQ4%?EQ`2e5t^Z@sr_Q?(4f-tW=XCphDbuAmCcgbT zR{+2k7+^E5?@hIcd9kumkKGXTNbbFv)D|6yMub{knHk4GQ&5VIxGBvSFm@St!-}TI zh{TZ-={Ti??LHd^yTL*fRH%!~B0C|oWb&Eu<$u=JJc8t-(c%fgfX}HE*JqO(@-~|E z$nMbLFqPB2%iMljRtiaJB$LiiOjazW-=|=_AB}53QF&FA{MiRZ+(kgXbkO|KQ*qN2 z>?q5SU9PvbNGRth7wg?qR8p;ClZxW7Zn6>qi=XC8uO1RJ{g*#^Y(~xtN$?#O75(}2 zI7{@uL}Xe*QF&%K9HUeOB=~!65~5JTIS)G zxAiz01>bRSbo<^olKI^P9GCgI1KDb8@7@xT{w=L-dulZLWBz^o4w1E1Sj#>g#S}T6 znJCT6PG7~O-#tIN6MPZ|apDOI0z5t4G7&*eAe=x18Ra)4F+bZby6|G7yKR#t6sb}t z;*g5dmL!P)?@EdNH}RA&V|x7o6qL!`|X!*>5_&$sc|5_IcxqvQ`P z>!`gPjx=%x0=)*GBk_BrfYy&M#O>q@rK=3L=Fcf{xL%nVI-wT65c=!==TA3ztUYoD zF8k@nA?sUr?e5idN%S$T^u`b0E9qqjY!Z`{ItPvk(z{kMJr-NuDSi0iUbhRk+q{fU^>jQ`nx2P3{GUNP^4x4nl42>R( zS6T-L|d71E_pG&GhR^Cx9MP(^j2c! z$hVDZhAThop4_WY9-UVYiAuIxKx(x>$1VOIw(ZAikKi_<1V{D=>HdL%>J{srHAtm~ zudn1yhkEehgZB}fZ5TSm^$h$QR=&-uF)3-PY4L%$YZY_L*-C_ax1e{bJNJ0=LZI;% zg_2lmKOzuS>eZsBftB>6blfE4O{*%9ZnF=PuAH~j+Onxa_)t-FLcV$>ai?zwW6TR+ zE?CK_KD2*3@If8gPb6$CT~vf%+iMq70i_bMm>FO8p*38 z1TcomGc86ED8&TK!3uqy8dx~Q#O!C^$rlTT0TwNr<(0&(o6&_0{Z)4QSxmR1;KAp+ zv1+ggyMl`(Zs5?U|?iFvW)g8LphdWzud5@jCFHZ!c0;#Z%? zPzqrzh=^thEOyM>Vxc_PY>1jJ0G@Ywnm;z}RYkvVn%^Z7pRI&Q&iH|shoY6B81jlv z(vIm$pCG>ebu^!oj;3-9J6XWoaJ2vFIfyW3^Z$&57Q&PhRbZichtbgc^`*2#BXcvs z?3}E`tl-YHRk)MgIg2ok8kL| zR*!p3%&PMSBtk~ysgn>zAv()&{S_|?(y3AHLo?AnLDc_f|wCR6sh1?hcXe?rxAS>6RE$x>LGKx=TtrrIGHE z?igm?FTeYK-gBOF&U61Ke>t;f@3q%nYh9nK0?;RNMze1&d|KsQ6Z|S1`s7_d11MQ? z+6x@f)_-=jB6X{OA$^_y6NkG0mW)Zj7!o>b8D;nNM@qM}O*a6v7^<#m%u&Z7fE7&quHBbBP=Ytue9JlmgQMXmc*1E6_j@dQFKNWbVq2sti}f=G6C^Q)LrM)TheWGtDK zIvla8A-p5Rq{bJ_SgT4%wgvi(iPUm49!x`F-U6dc9e4JQJ>Wt!;Q$t^yeB=!*H&@D zSv(D&N#t~WK$?1Pr>l$zTjQ?xu3j0RT+dcf6e>2wgvS7ytH@{irmq9JNgU0DwwfVzQMR6Z4vX(R7GQ3SI!D2vYf})RyWnJF*8B3dO-Miy*M>ZeTLY+f{)g`z5$PcZ3>xTExwZnT} zDGiA6V;C>{{b@LCF|l|7qk(NY{aVxE0I4Fi2{{Ma=4a~DCz?oA($J+|X-xXPmhl~B z>7Q~L3`U%OJ)E=*%gw-yiHXXOPenvu&Wo1ulbxD3jM+xC*yct05R>-l(BaGgbiujK z--ePJb^H4>yNg95 z+N|$fo-%SV$7$!Z$Aav0zP*cKYwr5Gh*;Qs6*c{blWae!a#et_WNf@A=q^;w;l`v9!{97zaR9Ai_g#SIXB-sN-V@i$H;L0QeBx z6XILW>bn-vY=#BkAH@B{*>?@v9^Q`(4`Y+7Bj$D)yFEJbHPbu{p-T zorBi;u{!9Jk*wejJPHpW)$dCbrlAIOYIz3`Z55GV`|!V9yfT|{oNWXK)i01AVZ@7sw(bE58D0<0z+=!xv7~t$#PYK zMwn4sJpmz*DTD*3^$*`cGok2403rVA{aLdJwWg&ad=Evnd)2$I!EL8y2P)fu-k^Fx z#AUhpDUV@}7fnJMeAKcB(0--J*24u?2{dYqDF=tGRw>~{PcvR}DXtuG!EAE9LqwD9UVk>rX$=jdt)PrBZ~5$D2VR%{rZFa>6ZogYjn4 z$|mZdxj7oYao$@#@(c?JdOKtB&tq})5?^|8-!r@?t@dJ~9c%vTP;Y9!|9y$tJYWiJ zi_Ikc*RFqKi#ie&aN_!C=6u45mv7gwqp>j}#ey#S8buT5B=giS0;^y09VK93k|lii zbhZ%o&eb=wmymoj_Nq@`X|-uW}g`d38cBE{KSooYsY!Fhm1Nxk8RAb zX6|VdLTpm8oB7QF(5jp3gIPOGn6%2(5;esF>{PTEqd}qhsq~TVj%EW31&ujZ47(-_ zQD&95L@j&6?xSRs*k`0btt5Ml>oQ68`79Tbe|}mBqYl0|l$he`$R^5vljv+BxO-ai z)G^T4e)$6o)6PLY5Rub=p1$Bj_j3``>|G`hj_X|g`uESBpGi6A{m-k{>+R0`Uf7MM zx#Lsu^#DYmq*|*JxC*sX4A-NG7mxru#OO@Zmm~k;>1gZ7m;27!B8mj7!!@G|Ik*e? zbr{13a|%3#qQ*^ha_p_^(M+Htv_B8dB!8~yC4-{z=JQo9<^`??PB8%lp5imUMh+MF zyxy47y=MTAQiw|s*0-74_mPa5bI+ru!*7ywzt=;h$#VlIGe^-bXuA=0?e%$3ZP?7#&+z>KNk7ckH^uHpAJ>rGbg zinu;grxNBV#){442Wayt=-1%1bq|eiw+GV$FHDsr9!<|;G4*E8quD08A5!z9LeC*zlpgPx42hP`-=F{F z;4)^#e)$S%@b~h~iR&|sP6*M@uR@7`qxY;Y1#b#oWoH?eAQG#rT+e4~jH?`$`voAF z7H}1?FnR9GYKkh>p zzt)}Tw|-o@0ix5{$Yv5MEFY$=RK2y!=52nI*G}beZ)GNsNa|PQagjYRr=*SY-hE?` zFdiM3`MeF;AC7pjO*4i9_00f^TgH(U1a{5Jl@jk7`kyXem449Q?FB#@1RgsCSZx`xAq zVi)(hSAqZDe>)Fp7j#_q$O`k>yUhl2-SeiM2}w*h`vnL00(K<-qKFqLt%ecUG;4ip zjJ{mTC$|7J82eJFF*YW4;8drQiG*>D4{)Z{JBtzs4$cee7BURWif05-%V%i4RGsQt zzFcVH0n|%V+u?07-$>L_ucB~SdzeC17~I!&_7!jkx4t3?lfEHH!yH60R$M2JR7YyQ z4NxOZH6qOwb(nh93FeTg45UZ2FPI5d+Gzf_nxsS#Km>X6Fu!_b8U05RxL>ocwjFC3 z9y4OX*f65IU%j!IkX@;l1`-aIfFKhx%snZir|UVcwUNK&ZPD_W(gVRSkR$IW&CnGw z@|f`~;~RgKeaZs)_J?f+8ovbZ0@zn6TpSZPRtH_KR$_dNB~z6l*4o9x!c<(kUV@$w zj;T`BVh5$jwb~sGT|688NWo!nFIFCQ;_9MKsNfg+C9j6Dk4v~nL*W{F;lGPM35KKb zoauER_xRkdywO9i(=e|PUnUQG&aUf}1uj%<+lTr$O7%<|8()Z23J10 z{Y`0r^8!2zZLja<4Sk5dRyLS7bLkN5RxUPT?EllZ$iaW<{^*}B;ycePRwz4P^68_Q ztm}8+s@_nEeM?ihwa!H68PF%vy`tIS%V5)j{)Vni^6v`6eG+wP0|x)WeHRdKIAl3E2ig5Ayyq zPwBbEss>R%7 zF?|W{$6VQhnKkCvb&AUCRjfNr(vYzFHIh{6MPrEh8E)R1fahuJKH(2VV;O6^j44zq zrSL_U0z98m+kkufa$J#MoTym~0Yt^DFm6jW%EC*svffLjtl()(k_jGLo`5$8X+$uuR^22yc=L-!#{C(L7eEj5NiZL%Bs?nZFPW}ktCakaqS(yKa)vh+@0Fy z$6wIJ%g|R00dntgtHBhi3d3$1(Tw!0H40&+m&i zIO%R&Bf!=pIkUy$VO-7%`=xLINeebW)1XgLGV=a9S~IZFLtZ(-oe&TVKxkZ$VNpNO zqm+b6GtnRx>;j*sGU-1mgcmRQh8&wPw9?mZ(LK0y44^P~#2&+1KK>&16^)&%xPSU2 zD}3NAY`bluyFs)TanD*LZw?t$ z6ND6(c4p&hmXHK**!#$cvSm)Un)!dAi)D<;TY1cbM!u7qUr$FNf*rSHsxu-%LRN?@ z9lum|z_YCrrpL9leI`FBE~l%r0a>bd8NlutPZuY9R><{u_?_cCcE}&5l*$sj;*LZp zy^O;|p`Wdebs!z@2~;i(UYDH<^xT+3tn&@ZxqNowQRlvE)(70~Y_Zs=jLYLKBGwIF z{CzR_1_xmtm^fzd!>?@1$R*DPNnX?}3L(5Fw1hI(AbW5;^!CNSFar(qNyRVcWNFau zDFT&CG7eJ#g|ce4FcThBYFUU{ffxC0>s<3zgb*=vKwX=`fHdaCWgJt1^0(7hLmUO( zp_wCpi*3_2L|$i*6q0OODrzya7T$L}w48m>%KesSlv3853fSrbeQgX4=1VUjSkM_0D@j-U z-$Ye~p$8^~JTXD3?3h`M)A)Jak7++oYDX$Jev6`3)X-sQ8m8!frKLtC&+o&bmrCgI z5o$h57wLRkI)145&s1^FNBCv_!4$*IMv{C6ef;6%!1sJKI@|o_KFiIY`WJwaE0oob zKF49o8&AvZ6a9`3k=-~j*41FSqSIxLesCB6uiOw~Y%EE<2s8zW2hGV34hz*r5sYHS zd_G@~bV1GmDs_5SC0p)Xz#D@cn8tG(?E zo}P^b2HnTY5w!aznnNEla&`5v_!yY7dh$rPqp7*>AN|ezqDqZH6V}GQf?BOz$`QVEwZRL)@sza%*hAv)lK4BHzM`5xRD;JpI5cRJs_X@CA}5b9=K}P@>?Vb8(^05cJ@10M~w!<}VI|uN#?KDZkggX%LVpP|H{Cey?)zF^t%3YIh z13En~B^lJZK8ObCk@XF{O%xsI4)GB{qutJDHa85C8P+8`iK$*Lk*%gW7m6>BHc`?^ zufBK}tT(ks{39T~Bj1s6iy|r;j6>@oan>+S!%J(jp=jKEt~tgvb%* z=#et4y#g-*-L+yz@@FMyW*CJh^h340@8I6oFVZ1zNs=W!-+r|I(ZVf_42(<5d|;wF zr7=|A6)E_v5%ZxLcjNmR0|}n8G1_*A(%Go-_$}-Y@2yrv*;OQSm+(nwN+WQ@5$$QQxXC*sR#cjZkG)R%W1rvPY?5MB@3s3M_SB z8ty}93`qHaLRaGQw0g}CdL%@9|KR`VkcHFn)9%#3E8C*e>$G9jqN<@oHe}0k5degR zfKwI~E^9zg1E|IcyYIqkbkG)CHs|dov+w!H)zaE(6S->&o`1D^(OSTQIets#AKX}@ zEws=DDpjwV?2b{E0*Nj}Tz`K5E{)=mZ^YL-bjOAg@|UBgWiNP)Dq;as>jAa*-ZX`@ zIBmMjwg`xzELi<%$WDVnD|tQlC#K`-lMcaF)hgExLAad`8WcXm<;~aooiObNJH-r9 z?`7dfAFt`Zjub~9yd+M$_@4CK7bhxsMdwZ7BfC(=UpS>IpD&+|>P%(EV1V~oe|DlT z=%QQ3)3rQt8;S7aqV}v0UzOmC z{mIghC+n9%l#G=~#Dy&9=&A_wiL6ZAEJkAh&s6Y!I*iD%7kyUrgvHk$`tW`0bA6-} zDW-N$G>^buP`#OmI0cFsN2A$j;#O~RPI3Jqg;-*d-q%^SgX)eK3f@(@AF4g;($POZ zP!uWmH12I5@V7Flw56k-9!B}0v%o#C`AhQ8@R9%RkFHyhoRgs8ylC;U>-ckO(Rg6K znmW6F>Mn9fGzL`_?uWZ)NF`oJ8vPUPMkN zQY#t}%U9d)`)Kzybx&nGxMYTN-EYI!(UAhOHp}~jpL77RyDS4H;BcROK|*sS%v?`U_ler+ZC^riNH|J?uW zO9&|n<%|a*-_?AR(Y-O+3i(FU%^azSOFACo9Y@`Jjw)Kd&b2GKtm^yPbam$4%Qz4+ z=gvGZAHAiPq!>ygPpAUFLDjX7G^DB%gv2v!0X~)#;DK<66#7_u70~|3PgN@2Qa@Pb z9?RtA0t~NR#ak3*^55fOP2b9Ae5&!n(i-`Ht?a))A!HD7=DQNmBN=9X_sI`em_!~e zj!GX1d+ILnI-*{(1m1jg2Jz+KJ6~KH(jG*tP22C{n7mQ~6rpd^o3DnpWWiIo?H7OE zcb?GdnCs9+k%*Ca;iC8g{!8rn;^ zFn{#A7c`z}7qSTQ-v`m4h{yAsVv?@PMNStKXe`ToWNN?vG&`E0(Xm^M9&rSSKh98c zO{6zKV!JxbI=1q1h0Jwh?h*(p1zSkQ9rP z3R(E~t}DO~_c!U>6Yu}t_W!kg{C|S}`4C@8)%v3daBNCg;Kap{NIP+T;;*&#(wiY8 zWE#x@v5Zt|H@>o73;Fv(PPM^&y`a#y29IG3xq3knE`K!hth#AbxgRe? zwGvlVeUTVL{=+Zu8~|<6OL%i@OH7p0Sd2vQIV_c?PxUe{B08m@=5fzjUdTv+dI~9LbOL1VXQ%Q$TI*rvNUeNn)BXJjMA9mm+G_U4P$5$N3PV@*FkF|0Z^MxX>{H41 zBVU=0bh*xfQ3ZYU?3n-Y`1_&n^FQxTBhw)eHk1@G3CP?Nm=+--S0fmmZW};kCMF~+i$3JkgJi;kqJ|Px^%rFYNIanu^-2;eGlig_XUJHFuk)-u4-vUr`DA~POcc{M#lHU# zKd#uG7l|cp`vC026Gh0)2H2c^@*dD5hWxq&bmMeVrUR~v6UEl zrB!1rj(Y>hX?jvQ%~FQ{NJ_lJQ;Aa5oOik?pw>sk+QD#oi;4`O**3oXsmm>AurXKPD9w$J+Ij{dix|u+(~zlI%3GAa6@D;R!>%)z95;;Tk=t8XpW@uuT0E_nZ<&_6>#*gOc!FwT3@#A3MVSz z+>_~JRvoG|4PqfCHQ3Bk6+uLbD~&}8D>eJ~{?oy^@r6HBC}G*b=`bsT1kebRNTH!q z-$mbV7*;|PL%<9Fb^D@|lSw8s8xr?NPLjux(~(n*z3bY?m?9JG8FbbG1oWZ9>Zm)t z;?0(rBS5^KuxJG3MnZ~A1yojKdplJKdZ2ZlFMd3n#utTfRZR}EA|(^ z%?%JtMbn3eu{u7Tfo@ON)`Dc1YY&KTi;e5a#I!_T=ks6v7ELz|g_59?7GU8R^$7GL zq3`@KKxav;sT!W#15^Pq0uMA}mf4f#uU1-zvPjfApyskVcQ^g@+k%}^_AuHi#>)e) zgRO~91w-F!`Na&+vPhT1I?4oX&!R?wuFSA`39k})B_@-%T%?{iK{DUj%jSgz)(%S2g)5YYQxsyZi)tGOM&&@#s!p&*= z4QP>>s1_!05bF49iWU5k<*9E1p%9n~=JA=oe>#J-V$c!E@}{Z!3TK=tcL*$maQJI= zbID0{gkp(6ddYP8M0P@!!fh;B;f&v?Gk#+bF#~Ll;-W|+Uc_zWYd%?lhbP4`_ozsi ze3Ah45FU?CVn-7O9jxOBHf4J`SBNUOs^>7eB;xPli7Sa(*G@A0apu6*TrSWlEb^^$ zyTj!c4Q#~h9<(sPImG#6&N8F+z^UHW*(F1T`9VN{1`%h={_*mum^va`aEJR~uj*fm zvT>8Llns1)*alS7_dUl$^i{?(Y;?utwzDQ^Uv`Aq2qvm2)~s8*0K@QQ^xH>*7Eivp zlGV&`RpPnc6CJr;l3xP8H#75BZh<)q;6$KIMSMDtDG|z92TA;n%R)->>rLk&^~*KR z;W(p2qbpbfVjBfwlps22UphPX3C~AF%4b_z^I9|mEk?qWi8|U+P^1W=Q)e#EWAVT( zuR@h*-S|hJT&-g)W&NgjzEINej>9DN`=3Z|`3T0(*{q^6FEar)T)R&W{S z1e<<3^`PF}^?yRWXk37)EJ`~uB>jdKAJLCNn@T$gv!F~E^iL0fSC-LAkNgS}Rz+@4 z-j$SVBC>!fz{RKR6)#6a1=Zmc;_&g4_a4Lqf)I{#0^ctdChP_PqA`KcK;Z2c5_cr? z`iObT$l}rHGaA-XLKFd4(esn7B@(!tqKNrhht8z4O-PLjT8N{RfUFmtJkeQz5XDJM z!jOO9a4^9-b1_9}+E;xQm93AiS#h?gw$xZ8f+4u-uObzDDVHkd@` z*wizx#F^*)R7%*NIuAJ;f|os-QnK~FP(2I{kV*X7VP;4~IlxV&-C_qU&v<6VP6D}yDZT!Z@@^NC+;I)yrq%`m_6tK_joZ6fYn@cnbWo? z1X>$KfbM_$)+@ReVF4?V!NlWWLNuZ{s#%|U=3eAH9uARUPPHQW z@7QLpD@X4|%%HUs(}k!I#rg)`rdBIEvNS6H8xO0bH{f_}yMFXu*yy+Ik@(;NY;w6_ zDGVo?2|T>xk2z=lQE!N%x#rs}glD59m!JhlBbda#* zhepRB!wfjXvIkV*8M69HYyxOU@810(5K$|Y8eG1PJuc;W*DKTlz?q$jsX)OE7%r=c zjh^B~1_4aJD`5W7O~n(qF;xNRIq%LfjUzBlRtS_&eI)6|*xuT92fQ}&Hg!6A)|J;~ z08ArQYiU5$zRzJZS1x7T%Vx@kn;Rfh5*ssELMrx&_`Wxejy8~3gxj$YE8Mj713k~j z1$MHMri$DP`EN&>niX;&U!6HQq!*4#Tpf1d^g`qu?h?lMWs$a@iNkfdn$Pq2cxtXb z|4Bd#Bes3tBCwOzpS&d|neycT+Z6=jv%C}k{7DUvt8 zo@g1Wk{%x$UH2T>Qn)0))M+=v;^IP=L~dm~y_;dLiBG_NKJi>9uqBugY+`j9M%VyT zoPvPA*xZX;H&Ch7o_;=$@7s_C@zj@7zT)l~#JG7SAdDbFy#;Wg>3-*v8zBjfhP`Urr}4>@Ut1^*na#Wx9vhT=?c&xi|ZT2llV;n zlvTBPwj$X-P2>{A@;ptpk$cKeVcP-2XrEMvRWiXLSuWHp7zjDKh4mev(=8~W(>Jy za+Uhlik6AeDg3;P4(sl&f9+KJ)3;#w(%T7TTD)GtCm1^;FhO@8+Tvk z-44+MfkfZjN6_R?YoYiWN7%^I#Fuv`;3e2+Sj3ZJJ8n-?HRV``hYQa^Nw;ae-1FPT zQ14-&&}zBakyWHnrr^a3t0-U`GENb{lSkHUz!H z8QjjBL2a@ndOp{)!dY(#J~9w_9n~D%?`6k4xhE<8!IM!GxHoMDSQVnjz)YkZfIx`8 z>j_4g2H@_*=hKRU{LcmV%rc!?Q|Z?2cv-R-t05rWnIv>1+Z3D5P?s!GL&Cfw5i-3~ zlgPpqL;P?xDV#_pHdw4DCS$rAB~enZXv6%@1#5=k3ums=K)dUK`-D$2>jtVBM!keI z*C0d3G-=a|1Q6X}Z!a;O4dz~sviy6tanHjl2#xj3aKkVvMaIkhUhLdh*D`hRxEzx)$hSL zjSqCV(*Pg`FKEK=<*O-xGo4Q2fVK%B8@+m~0E;A-VcKHgg&7?k?Q&3lHl6__0;Ra`t#p|j3i;baOIb*~F z%htBF4wXW~WGyr|q0ljtPFy?&@+s)wDIxv@njqN2dUwy9kMId{{Wx%H*d(|N4c{oY z1NZ-({IC0;{RD1tEUMADFhPGGV==BzQH7T7;duk2>A!t6{2B<`xg$p?1h;&V>$ugW z2CAH?7ZIFl+jae@k=eMLL1?gQL~Mz2k_(*tz??mFx|iY;e~yrI;aFMBh7+%VVUI{CvGG>pRB9t&uZ7GGQdq+{1sfVG>J!=&5Dpzks%9w%_pyfp>X%VrtK zdlC&9KF2Hr?9YQ<48^yv`h`d(SnPoGKr0y$>&om1xBW`INQsF}JSfaXEJJOi0p!_> z#AUafI#;QmUZj}5O5sq_WH)6kvQrP%O49JHr-jPl>yhHc;8GD~)@R73)k?f+Ecng% zt}b-r$i%veFn+wGUqColNkX*h+|jYaPp>+o7@sWdR+Kp+X04a4Bbf#Iho;tc0Xlq) zZkgy69vONMwOM6f-`I=E?p((6g(bSzu5~`=46~LZ%|L)?dIWJ05%4#M_Ak|%?z&6l zGw%VnTjvQZrLq1odtE6DuTN8T>I8hjGe&d80-Fztdz$WCS-_sJS5F7h%SfdSlPBQn z=9pLT|J04j0QY(`s9p-Yx-_0P?i8J_g6%uM>O>_Po!mP&KoR?TvMcWfa0*b1#cMMO zLe7@~rkW~VY=lu~Nde{`mT&>+B1fT^&p7yL0OE}Gw{$d%pI2g8$l%#5B#dJ!iKaAK zNxAQcoIl!MC@~Zb-BU!sbLp`((gEnm)d-G~i`)T`q#rTqrt~;Yb6)-jQqmTacGjHz z9gkt?=#2SM+AlDD=J>c)V!yZsW4qMcUzFWM0+Mc2IhQ>ZHS)U($ju}6Na7OTGm08aD7aIuu+ccuF z^hT*e%WY_O!g}J=fp#tu*hiiEvS)M6Ct%ngYwZ+vR zBR8m65EhChM2ZTo9S$83pi%=ufLB4@$p<eG~r zqHyNE3P*GArsBTF2pdLV3+~(JW;O*}K_bKtfHhx{0=X?1SdrM2&nVq*hf_ zR9Nj7pbuhRAvS>Sm*2*cZKF$yC8JSyrd1w(C>D@P25Kk;R;oq80*|xGI`Sou``56{ zj^(%CUYj(M;O2gKDMrqmw(gypcQ@L6g|@abX?E4Xki;j3I=;s{zm+ci z+;irB+`$EC56{0+d}e!`PA~-O`k2o_u=!4V<=?sMdEWA3$v9W&Bj^O&0%WT|)GTcH z43j;*7Ce4T=r|BO5iDQ)0_i?~PFBDDyIxZy9EeZ|uyMG2hUG4U2SXy6I?Utf3uvb; zu@&#H1Le&oG=A%g1UJY$+s*)qZARAD!%~;7mu7LfE5ap~lU}jSz|7ec5KlYxl~Q?7 z^rKAUK1_S?1KjMe22HfLiP=&gE>asK`LN$lSz@Bo>>Dk_k{}O>8n%`tpeYNSANtZI zbYK@|bDNk`U%dXwBModf^6{rC62(n>(*RawJIhl zd65XP{OI;De*6QK#7miCsvVxj?b(-)Im^FQ%^Vrt?{0Bb2L5aF#jQV|TVL4S)8GGf z%SgH;$XElrGHMW0|~S5h$N* z*y~*kP}Y1yR`s=-L83tN1;l2(Sewkr;O{=PCLr%&Dm9^kH`4BiNzaq)`VQT%&oyh@ zhJchQ4QvE_EmVO!5{aJAaxC7lT(}0BxsaLxVp*VWU@4bB+mL}y2aId02IvWkS%#`< zH(0B0H#9W3vY8(Y4<^0e8sEz$5J4GJjSdydCDa!iP$TAfel7s{tx?*f0Qs*6Bpa0dS$tlCMdFX^@S5!%q^CIz z-xvDuv~|m@sND(P>U&;OAWu4*a(Ns%o-ls15U{lNRTP2h-;eHg$~2$StSOlHWB00P z$$FJ}5OT*3fLKQmChK%eQWHi6a|muiIIeL7OgCYuFJ8c4RmQFA|3#TU%6#Le-Zwj7 zQ7U`-^B4|W34>n4IHpFDAyrGM#w0ASe<8`-9y4lzp#PhGO-^rs-!(ge*n!mP&mmvf z+IQa56`!Npwxa->=2wq)>AV&I3ftLYx4yhkHf6U3Zpw(uWz79{x^1ogZydSgZd6@ZaSv6(GX??5g$>|D(R0P!b)Rg_apOUntsA#lN>*O1$30bBuA z0MNMQJb+z{`=L@ObNHty1K?Egp8)KmYE9<0BQ|$HA7H063a}G50fr3^mvJwm*Bk)E zp0Be|q+WD>h48kz^&&^3LdR~s%fAvBL1kiQo&a!^+>5HYBCoj|H^thH}N|av$`tRN*I&nNG;@ z1m0=wMNlzv59Qb{HAnVz`3t5I_1|@nk|AfTtBEr@y}x3gQ14A7PsIC>BXz3;v*jJG znf%1A(W6e{i$bb+mGZtB$R7wrqH-FMas)6Q|R~6&V=g_7{}e3=~Gg=I+OU@&JXyOO} z-y6N2wGHUM6?l)g4`+X*FdI5DB{S;oM)TijVerh9DF1qQVqVjrk+)p5zeXuQcM#{(k5su})4-Dk9if0J4=ye3Z}P zMGz)DmEOQ>mUEkn5O2a?bq;zB)=5Zet(kL-hJTW42%i!ggbIKC+4`kW*}$gw0dtFT z9VZJeEuZ|Bay1??cnKm)Ek@p++F;PbYKwdku6QFBbG2+RNR#O0OFdgleTD&{zADa* zjqJo+lfFZ}`h3K>(K&QWeGgr9klF=(%&gVMD4${vHH9XP3MSmU*FvS{4AD)lGFY|2 zRDVxRVck@3hoBMEL5Qg9ru3^|d|r>GE}D^Tt<~3bYE>h>~dWX9L9Sa9B6r zez~I6#-jE;4kytUXH&x^vE`;PYDg)=B>|GP-gC`(7vOE&0+@OxZDWl@6u!MCbv<63 z)dySx+_LU}J@?PP|3>(mYCT)FGXhtWg|0n&w*FktK=KIiGo31smRJV(l4I0_{_BBC z@Byejai9FoGRW_TFoIYK1J#iyxc9ij$g!X8Z=5XLc-ak1TX>mm`~g!nDv}yh*_q|D zF?pXa?TEp}0_nc2`ZeB!PjCodjBPjR&NAMiBE8AuVsKudi0*QT(L?eUP01ZpGVypf zh_HnO<=?qC%r6as^8Lgmjz1rWmA;FSBK!ABQeDRUx8+O2W8{U{L3`R^LVXT5 z>G!lUITrv-wH1f~Rlje_wQ9&HS%mQ4Op+jS1J@slPjSv0t#26)JxJz1h4BE6NxRKz z`#pm8kUJpn!X151e)YUPYj-2GMDk~AX5 zXz&|Iw>_?Csj<|{#TjRSwfZkZ!0l!0iv1|fVuv;9c?WU};ztK_Kv75+GqAwjDq@|J zd>l-;VGpDYS}0nzDX!>0EeL)>`Ik3?TX};G&4!7IJ@YKEur16ZF2t@wlHH*#Yo8UgrJ1lKf~V0C`I@BN7~S2}JLC zA0YZrz4Qizf{~%5ze!lS0U%38Mus<22U-Ap&o1Bg9&rz0CFzYr_5&#m(rw^6g>Q^g zGZpEx4Qr!zNs!k~PR3K>Pay!-ns_zT#7IA`z2{71hgdNM1tuStZ#)?#=`h&hkXywo zEs-gg|01YhJQYWt6|Eg9D-a(Dq%*i@t*AZecSk`^k#7gvP#S3MI@qiPl$aiHYAo%<$k}?Q?c4e!Gv|mtyAAI+dS~rIP2dItXMZ2_8M1#jT_V3 za9=j(hfcsvz5mlt#E4j_&b!MvciZobNV4?U7i@noJL&6p zUo*^1>u;|`V-BOf!^vx8d1|oH>04R-S!#>_2D0{a!FLPQl&zq9?y)#+qb%Dp6o%6sL zC#*6#bBCvX+%4*_nhSg_E9Da6kus%Ry#Zz71{=`6Bc-2_mQ&lEhl zlUsxU4f+?~e1sl@xxYP|nGS`p_5M1dWN#LH2WxKE&VV1P{3AboWh;NEO?IAo^_kGi z;SSl}9{0-Byj6T>xJ~R~+hMbq6aRSD*yv_|$2FoK<1hGE@B4Kp&E2q2p<2h`ceriC zTOrzyUq6R%c=JvxS}hu)L5i+tvX=61yxT^vaamMdbFv6O3VQ1hrhVDlAOn68Fv^r+ z_H=qP)8j1b2fmJZ%-c1qsaje}i1{^r>&7Nl>-Fd@4~Ew>hW5x}Z-hr{jR+@xBOe5nD}Ep7dnahc0H@NA~-wq?9fk z&O{K@zSpdMZu|;E|4T3=Wh{AV!kWq`96!4!Ot3{Zo#|ATypsWd5V@CZ*iBP$9z>ke zFH@qP9Y?)bGWvCDKaf#&0^Ca~I^2Jh1dgMvphPP)Ly&w=pYvEt;>(Su5q^6WY4L96 zksJ20YD`YnI=OmBJd=l37#>glm*G_RIi6lTHhsG0QdPIoj) zfrIV1*Qw10$Y2K_PD*}Sw=7Gj*C(tmp;naZB<%Ie8y=$_MczA_h3$7XpS|KB2z#J? z_rzJD$;vqoRdPN?c{7@WhhPjGlg9qj)APqUH_eRS3%?g_2EW|SS=uI2UxK1E*$Y2Z zBvoA(yUt*5!hrGGx!cC?K|L=biy7P}WSuDqziH$RFLX=%G-HT3?<|F+AXA*pvKMZ5 zB0+<@*a%@-MTE6B?=%}eT*8H7(wV{M?o;Oum(RBL1=ebVqn7|bqv}0g>ronZGNGIv z&dVRoy3FI!=N@$1TLPY&R^(qfNtw^C#V&E-NHMf#^qrTs$|A+KTT*MKMR?EdAMBEDYi3b8}$C(GVbs7sNQ@pJX%Y#;~m!rp|@8Nd7E7^vuO5a8hw1%RvKz+YMd^{ zdtY}aynptPxv4HaABJ~yZK(F@Uhvk$*Aj?~s@+*;kFv9@C*<%K+ z|8R#v&eRSzJo=BQwPbS$Cq^fpuNUtRauXDPWb(Am&3f3V;GCux!2&@Yc7^jBm`}WJ zm7?%8!eZ(W+93jTx=mMgpa@PV`jqnzirO$DEIklLu~l`{Z)oje3-?s~jdrFL>jLIB z)?me8ggj^X5sZePTroV-bni6T_1!KY2-ocT$@Wimt(W4O!7j-1+Wmxkh*7K6F8{9c zuvg+FQgE&?W%+M5v!f>eCRTdSU6a3IB-CC-aqVfz`M?^<>xr*_0sggp@Ka~QG}!3a ze!jf6|8+906J;(qcCL1|Q)~;~c*qGyx)hNMWHtu2)~uqTFQYo~TbqSf%00mGkvsJ^ zs`P?9i4Xts7ERhFx0xB^Y`Tn!HTD5(+ux_qMv(ac7ZZdlDL}mAv~l;<#$yxrE+~Bw z3dQ;8Zfi=^rZTc#`BZe*pOW+R=vJ}|T}|>_|2p;xr$tj>n$H$x1N0gbQ+Xe@Ix+4x z;a$RQ$$bEe8WSw7Qpfo6w^y^}%RyDa*_#{74IQq^Z4DWRKMkj9>NPG`hAMYv+|WhN zs>26Ydt(u!rF8Uwl&Xh52IXH5=?(2xb14{*6tl;snY%iFO@;f@Drqpx1_-Z-nP%Tr z@Q4cyG_$KF7wc<-=rewGt`gVvGHOxnI82PXNR6R=Ja}y5Y=Iabktg;vNd1_hQJIWR$H;&ZRd`jpf;Z_fJ&ked0 zU>IIndzs`2F-RtB{er$%X@htoP>2G}+k+WV*24~T{dNQCG$BBjK9Lvo|kP&tcENG z|fO8g*Fb2s%IAbB3w!ReOD9Do;-g zS;b2xG8wDQciFNRmHmKZ$lkx!zi9Zg%o$PraK3EcV0!nU2}4smVqF-tnru@-WT#s! zsSqY_Z|$@Ts-?ZtgBWHx?LT8zd(VD#pUtmZhHqh~p??4$4r|!O9G7WRU-`OKetfa> zbtK-x)bXK;L{MET;bYkkp#=P(mD9`NeEXs{bIgR=c2N|<88FU++em=IrdtsPYoM$6iqI(A{?9Z=Fn&qe} zxOUW>!_L;y+I-CVc1DJsVc**qqwqSX#tbhaMrOakgq=8nquw9OH1mz7{tJssUHus? zjQ~`{taPF%5@tTR=4ucLV3$9|jejdV z5zhd__^#_FPZBNFDI@1`!EtB;PRWak+dAjvIz%-FTfDou?0RJuE-Y9_N4tLG70z!Z zJqT(ibg_Zm$W!&5vrI*-rJKTp>pl#5V^`9e{HZmQa6lB5iTA*f#R|ei zGXtOtjEypS4jAwR8&dZ`8}TDc_YTf|Ag45m%f%aE#M?~~Y=hlQ5}^$q17Dhm+m;EC zr|8?Hx^X_)jtg>$zL6b`50FEi%#--!wp*GGDmVc1mOFZc;=N`x@G38aRu zdx<3^`He1ur8lE(%UW?NZEUOBnX`Z6Y<&%cY>=ZRg?}#ee4<+n9h|IV*wgnNQ^G3@ zJVklo4L|+<^)KeXH|a%m>2)%+IOe)&9p8iS;Yx*PcHw<^LgZm0$TNE_=KZ1F-oYw!MtoY6E;*BNt-TfsOj&;9J+H1W^!$%^6v%hv3 zx?@IyPD;H4a^NbdbJ8TNJ4MGO&^lLKTUO4&LoQW$Bc2SFIu%oAz~9>Siyt&lO_jTu ze1>~_xNKC!r@ezwP@c1$ob0&c$Jy>)+| z9G7%@oG*4|dJ(eP6{L*>n!Qv|=Ji990J+9xFXGa>UF0PS1XVn+ZNhdJ7e|SyyO7yO z=+d!mvte@#h7w@70O_PygUQ|9mv@_CxlNMHy)SD)INMDHoAxM9at^}DA}s#8rP=Zi z;=hNdbE+e(2o8UIy02sK`DrYlT0Ev+L$?$j6HVZsJzUHuaQ_wS4fIdQtTVr2ELNQg zbDffQ-sZ;+=#t9Q|3}kRwnfpfZ7C@cB&AWhr9omzQMx-6q@_DoN~F6RN$GBuMoLOL zmhR34mYsL+=Q!S9FrVg}YtGEIB^zH%7lR6ySVIOZY%U9oRBH31B0HP96;x?zr1sFg%af8o!434t93E**se7 zB&#aAGV;eZe7RPXQf}Lua^o<*#Bqn1S6t~W+UyY&zBczahTSnIbDP5*k?fFOir?iT zGn3jE(-jWo{+iw#H-elHdf)!-FB8HmxZXs8I30|ic5u)LI6^2?^-7&ULPGNnS6N4r z|D>>bpIYb`SN0hHqHQFG*^SVJn(ubfsfpAtr`_gzc6YcF2XDo#j!l zsY&tPEfi-g7t1w0dLs<#oze^O$nXdHcIT1C06!*IU*vCzlfZPI4o2dpYDVi{|Mwp0 ztvACVruwvX;zh#-Qrt0scXDQZP=l?uJuIh3q;q3%b1 z%e)(BtNCr^Mm&mKT668y0n(!qAjFeE=c~@MR~E35elS%)cd7R9Z|!&RbFmlz=8c+* zSa5@HJ$Gy;)h5oWN)f%lZyCSjk;$P9#C^Fp$VM}4gd3AX1D*tOmbHZ*9QE1@?Ek>U z?#S+j!Ktl9qfgtB5(NO|Y}s*>?z^R63p2lI+U>sz;k8yfc4M)bT_fYh44i2TuxgFk zpyehx?dV~)sz$%+i9~kl6|X0{+@ncnOW>Efx8K+fnfuH0tt|+{b?V&5o0|bEA9MXa z_i5q3>T#ySgR4m*b}K*6Qd&vM{qbKbKaR9UY>N1f`ac)vv-xWvqzvwE=CRYUSc2E^ zum{L#JT7L52S5zk{v>?H!cA^?9P!HB+Uw>}1}KlQx@N&>H*&f3FZlv;x>e5syD8{* znsFs-v+L3ebDlRGl9lRp7qrW6B8Iiv#4)P zd%%kZ*K`-nyrjxyhDDXNd%MC$i3OKCR4DZ9EXKahSo~tUh&{j=n$cLrMl=rgv%6Y} zu}?0s#Aw#zqDD9-G-pL#nf?u*XiD9E#ksAMWXmf=@zkdvBs1h?T&mguWFB-7e17ZTl^r? zH=cp%Ws_c^K}haOo@kspO;IRybb@2nY7j2q@NYre;m4u%uoG7c`87WGf{y!&x;c&= z+$O^{>e0LL(fNU*S%qiO8nop&VPD?MXZPz%s><)qabIqCFip zu?^}L(F(XxJw9F~+*CPlePu$F*5Yg$IlaGv?W0Ccjde)3ya4k!Na_^dE(d7D2y zeCVF`s5-hQY_|<4%P}kU5-RH)lOC9mvQeb6go`zR{$Ywa&M!?+m)n>-iTP=XS=;gh zm)8kg@d8G;7n=KLSI+E4_7?*x1x3{T^7DwgwWq7f(3j0_D3;^L9P{^{0!BwaY^zgL z6TdDBDDX7we){+~&6f2jPhbS3;f#f23+g==53MGR*!z@y0H_Cvnw!Xul0T^`e;U`4 z-zv`_BT77z9}j(6p$bs?^jWv|p{bH-U=HV0VoFM23e8L06pFbrUX`?d9Z73JVne-Q z_%-xO6YD8-FaN3gXYS(>KYTx*8u^s_NL=ve*xlYTrOrzJklrgsFKrI~RqPXiBJ2U) za@IGzcKRHr1RiyRT%7Ppj~M5)i>vWP1bP4knse6B1Q8sWLYE7%4O4mm)oF5O-i2NW zxg3vZJ$rbEIGU|mf4SPKGeW&UagiZ3#L@Va`)*ed12CsuVp>hNaMD@hL{9YOW@a|2 z^(-+QOAT?VFMxLO0~Hnw1xJYm^^*t#703w`HTri4o`pnG@?wcGGY@^o$j9y6wI+B_#d_Zi$g07c0ZhJHQaTvu|nIo*8^sm zA!2^Sl3fk5LIVMbp$b(TT2=P;a-Gb$$%@a1r3VOWCY>ec<&TK7g=W9r1uridsH3}D zB(DDpUXZ;Vi=6kef?qdQEoW=4zO{cD7W?r##J~LasRSpfp-`Fht)ETXNW0cKP##S% z^5Wbxv3s|y++ax@jw)0c$s~EsNxgKo^2eEd{G0b*X;AB}YH8-*0<>o`kK_JvMrrX2 zdiM_{$yGyBc*cT6HbrWeZ_`~(*vF~Oq|o6I56u|+wYZ~>N6Qv4fLUo`x;MF-Vqo1h zKc5QGS_1i+@b;vZ1)(7-O+19~CcY0X(Vjy@9zcP#K0Y(&|50}haW_3P{-1KBS)%Ku zsyUs|Oi~4~8HxhJ5RaylD^ir{Hai(NmdHg1QQYf4Gt63e!t~%9il5F3@RwtA2%&-v7G|z4YuTzbw73KJy=GMpw8!&eJQI z9W-nSYe7m}(@~50Q2E*U1w#1??M7Qp(pXs7AnB=sS+lV`cDzYPS7RqI%^bOUqdHc1 z+Z~MjXzYBb)kk$%OzYA3c@Z{ReHM;k&RDvQ1w6ys0yRK&Sxg)MFnZnJoAM&Mv0q2u zX$jxStKQHoB2+?P=s@7u2;NiYD*OB(hOuiZ|2Z@{MRZT1=m3=Z{lzl)=XiplL3i=f=j;k^HtO4#+YBPuQIgCZ(vAZn)~zC= zO#=MrJr8@_D^KqaxD}yp7uhH?tD7X7W*rqFlV&a07Wb#}wKHzHLuYrR?){!$%x&%z z14e`Z>@5K5x!pGCBHpBSN_E@n7qo`ce{$p(ZN>t>>3;7bwcZ-bt7lLh-TP+g^U2j% z7G4)H+ZY}u2|HgE=kn??*Sh-LdAFWAT&(rw(fy>-a$J`T z>P+^s1*MoR<@B>w5^9FKf^;abx@^@R;6k)8viq(pc_X_q!`$c*_ASzx*bzjzq*sxU zhQD6G;RlY)02{3~#)}RN5_PW2Q|{#X-mUx$tsCq%m} zLfCX)n&N-Ut^Rh2sYm6>V)itvc_nG&FtG9HSQI9v)1!O zyPLB_(&QdkteUL9ySBwoQ7&dN2Xr@Tty92!ES=8e1_!YSPu~Gv&KCeb8&SYI#+*rn z8e0S)+gefK_1Je;(sB+Zq{XeGIt$QGDcLl~=)tcou$8SX_=!7km)(@~ylY5L)=O@Z z_U7`wGUDN)MQOd{08;pryH3l-Ci)<{ljl>Qt{}VK&g&9ORys#Z&UZsngP1h7{V2(` zu(V(6#kM5FiyBs!+*tSsGBi|s_)pX2H2CAwWvT4CGtkBRI1aBRqY7bhimcfitEW@i z$axxmA`-8cdVl<*>i*QIn?Br=tc7& z34A}M^}p7!*vjt{*WapY;&1&^rYZ$qRk|iZT%pP)J1U+2Z4YFl^!Si6!8ZtZd-i!*`O(*( z7pB%cT#3Iw$YP*m(U&Jaqs2{QuD0Fb>kq&U0&3AIv4p!x8kh7QH7VJ7U|n9T??Q?K z^X0%U5Z2-gOfDZPV^gp}2UayVxJ?f=Ritg)S8G}Ix&C_1h6*B;IC!^h$a08(-_Jx` zVI#^9fgCC~K3d$}2Z%pP7I6{6uFdm6@6a4@dVw8-kQ`UxOz$7vdZrIEDAj)xqVMH1|C)`m0 ze|RhW^6XG3Xm4~z4jnCeE^+4+(|+k$^)GZ)?$yb`x-O2ukw#T(A`RemeZK}B25xdU z@Q}5*u<_w%6&4pnP!J*RN0Rhgx1v*hRoY)+Dd5n682Hqqyl$9#PaF5@fnko8u@Hg| z$j#7p<>4b0Q6q9rPZrH@F97Sd98XzsLsUs)34zu73j+##c) zF4=7BEDt*GhQ@8{&%4(URST-9!ylrd{a$7q>CB%BY&H}!X3Dq=sN6&#FklsARTa@A~+U0 z`VLsuLf}7j&g(*j^GzQ+PIpTz+xI{w1qH0}+mA&x7tLW6oHl&EROkOTg{v>p+qISr_#`i63a4 zM3i7G2M=M#t)uGESi9{#8y>15y7osB#T}H^pin^VcE^A$i=NC_a44#C%kba|&Oqh@ z?zGR)A60KHcmR(3b}q{!@fB!|0!Ta&S9+=!>iW+HHug%e_n1a9w)@vK#|vIdhaqxW z>WV62aQ7hXM(<2gJoFtr^_f6E`ik`mpZX@#b0r6zCT7H%p%#fNaEqmoe3!k?We-%5T=(VuKg_6+@0KbNs7 z`vqz`Xlkr~cv5_JUQlGWO18CgAth)34ZbM+BKVaP`Ie)d%RL6ghgk9xgQjl>NN13R zZ}%~IJ=qv73@?aCZNM9;m@RNKnVRLJ{z%ZOdPji%MhoXHS-325E;9d_U;VHN;WZp+Ql3~%vA^y9~^=nnk zERK7xw(5L*Sbqy0?7gFTmz%I2cF4osT8x3RSb%wUuYkMyB+>Cd=4pqu+9yC*$)M>c-aWkx) zE8Hx*g!y=zRobQ|XVJiuVC5zES&mZ$V%`=M@qv_Ugz&sNOh>AC^Z`BX1A!bYLQ{tV z$ovHm!HW7oQkj&)a${4z^G$NJZkN5xiYt9*dC_T%C>sW0&rR%YY9+Y8m{D3Ac^8jR z939}3ra!lJ#Wsjpt1lK+EAE|S&v4yiw?1Na3DZ_ChM}8DSWXyJ4e?;<)U8Ak{&+DM zA-6MTT&GfnX zLvw1pcsyN|;d^WA6*AaTM~MC~0}v?I1TE`2d{KD?E>lld&tYVY%(ak^PBgU z@BDnnc@Q0{#;v|1O>A2@XAG<#P(p19_n&02dCIor3AK#r1}fEpdv|GD^jfc|jB;7a z;;cYM`x0?-MlxHh_R7wSAgi`!$mf{_wnLiEj?h*t`m1*XA#E~TsAQamdmc8X ztH!<>^r`gaqSJk69B!VS{PV;DY`VWcUohAaX-X$Q&NL<_(JOKl%w-yGM2~~@poM!j z0b8_1$)D|`3Ko6~7|@q@K;ZdR?H3J$&vR+#C}}_Dby~Ng>kx`%1$-25CA!s7HT|us z((aluYHt4qeigu0R(7GNxvqe%wcBDPVe~b2W)X)5+};ox>w}OvK(qLCRc6%t1-tI- z<)$I#yz^`=2PxdOsN%_7>0>8y2 z2vxCqzJ6r4;Qs!|p#A-9nR3eGjCe`q z{6hQwRq1Pkgxt%JK)Uty*BS(++uPfPKZOVckTY*9ZRnf$(y7SHT7;A>5a!J+hFjA@ zHuBc-Xa0YHKH_LgRm>9L(`f>;^dL`Cjr*ZH8M_fD?_(l0QTgchcN^p2Js+hXE{GPX zOlyE^k^e}{LN%;A4Z~FfYT-dGGMLRE(7ZTeO`0qWDlc`4s2Dd~Yt@WwkzbP&1&va3 zKw8nb&76}MJ<*O1;S&cEa&NM>#@3I900wG}yn>ihI$;oAkp5Z}` zZ{Zh>(?+^GAXa7VhP{vTif8~M|AY1Uhb7ug9`xTdLiWiOY8~3ju$Jaj)9pQAY7ovo z!*qTXP&oeB(D2OD9EFu`&$e%u$Uf-14#GgFbaU+_)BGHq(l}0Aox}2;$u0nX6w{bX zzmtg52fFjGx{mkUgyEk@E(6`TNC!u7v~nLf)!$Z5YO9gX-@J>TiER59L?*zr?^XTx zY3GT+Vrb!_-d8U;eXt+Qk+VTRa5Pw~6{>%VV0w?gg&&xY?{PRvn|C=(b_V!vGZl7) zh;k5AgZ*N#UH^2$?3hefX|0dVI)jRhOCp&Ug2-Tu}geGdOS z0d`1wOs~R(jW@|Nd}nGAf|{a}^f}s>yV}3D3%*6Y|8wV7y}q`4S)M>Fd%0b3RYD9z zUqN1ZDm*LHLu-Lc=Mlmt)a-lTVExE_kwSv+@XcCl9i%IJN*Q=lD>yiz9rb*gaY)c@ zZxl&aomjz*-;N&}?+jvddAFkTTnT{df-u58QQ^XX@*vdF7j92q`M~^DKutc-;lSKF zf<8R$TmP2#))nJv;#q2Yqu_z!1mHO@24fvh^~T0cb?{X7G-XqfO9N=hcG;$$-wZ^g zOTzor0;)C%yJp$vKaanvcoNt}eKNn9k__0?wL5glUUB+6b0rDZ%LCI%G1k6JKYANy z^u~g4A0py#TZ2PHJa`!DLQ4WO{c;tX|9~I@#rWh?1rciJ`Nrtlh-G`f-ZHFBt5Jg7 zSy4W=7~K1G3#!oEaXg-J!G|jo)fgJUqT>hLvuWj3UNq~DH6d&abz9RdtuQoQ>IrH` z*j9iwWP-%Uyzjy@i&#_bc%6Es=q6Rb;5C4Pn56v2RWTI;>;awqt8^%A;hrD+BmD}k z*vmn2F>-%6j>LCCl)za^A-bHH_PY zoabf|Lw+A20+fYdC!rAouQqje$fF5e3DYX(+n_?LNk^OTJz4BH2OboSoXqNHq1e3G(HFl3Rygr`^Ud&@O@FbLyMWxy5%7<5${hDSK;>%4}?t z(v6pUKVP~vv%s0*WCmB@3&cbr@Et%(PB>x~xT`<*&YwzvgRRXN$bUv^>veE6qDe4t zNM^3?XXjz(9uek4R zlt=LEXe#3=o~p={)lB2Hg$jZ$48>pxyE=n*2Is~%e>i;I;+2|wWto=e*<$)eT>>pw zwadgJNt}y}ST0bjIGvnP;H%B$zcJprkGX2@Rk?;enNL4I0Y(GeSMf@`DyzXG+^3z@ zpWmz-e$mz>(abvzM>WF4O)fo^Uwb#8oK@=+e4EResv_QaAY&7}rnmn9DF|;phstHH z_&0>({x(5;3-^`{WHh`4KpPM&K)oh!di^fe?< zs^{_RH4_bjy?r&3FN z1o#j#1r*SYkC9o9neM$zA>g`3V(-y!zkNezeU{wNwzZoEPL@}AkG!q7La2UN&NU5! zcYJ}-uJX`QVf1O@Lb!yL;49A2I+?OjPMqB;xGxn+Wf)x}1jA#gPnfKTO^xt@8DgtX4eyD8! zf)>39v`w4netK}WFK*YqJn^IY17$5*!>iHP*)H7I{pn3 zVn~`}WujYn+qNP(+_NP~GGkbryzH0Ybmw|L?AkMHSDnRU5+S8g(KDOcqb2uDE-R+G zpZRCCY}`XILSne$c*egx%I7X4kun{F8?R;eBHdx%YAu6v4Ka3aYRgq5am=XLWo3~s z{kU}=X$#&5o6>exD4eZy{ib{?G1BSTrFY7eNb+$VqWY1bjyd8R+iUhW^=*;XAGu${ z4OQ$mRSu1YE{%S9dCN5FC=_7<{VVk?I_ja$MJ=BX-s={bVcZRLH`P&;ty{7^d+Gt_ zRg>F+k_h~ynU~n z&D4a6gc$PkC(&9Bz9mP;-&w?^dUMy~r5k0QtvYq75vpJWb>}GEy89F3x%gW`xO~I2 zFCj_N3ew;o*)H!9gD=;(uJzKXMqSl>ZhvAc#NPc{zl;<&4`DmTzq0Bjr0zqwwBWl4 zINkiA9ypZaW?3#2@CCyH&BePHRl@jbE zQ`fy3x-vN8rHd%R=Ea9&ayuZy$jLZ1W3Xf-Ye=Pr24OsYb2^h>d|$?ZZ+Sj1UeGAw%QG(H?e_+ojB5C-X_#>0F*m_~l^|9!(<^iR-;n*oXN;O$PS_)QTI?Z~m z_Z|JOZP?jcGF1P+N$q5e=rnhVd4wg9o>YajH(%(> z>`Yq=Gm|_%SqYsmk9N+KMBB}@L>QkCak61^gzuWysC~&_1J$VwQ-QBRTz}r%n=0>U zHGOYwEZ2=2t;IQwZ6CH*Ns8(EQunAX(RKjlmyJ!8{Mo;YoB1w9K??lFODw5P%iOM7 zaq9N=nxr(!${Ne(j==?^pw@lknN9j(0Oo+2w?2=SFfALI?fgf|lr|sSWUB08?YPUk z{pwolpD8-U=muC#A4zdP1R7sOtb?(iDBB8hC;2vbL4fL3gb6aj_7~_y zu2J6dXcdgNQ!lfSuDaV-AmCz*o~F)-SYAP2vJ#q*51p!*2>*gi#-rI7LK+AI=W!fe z88_JAK!Ucp*Qhu^ARyVWBon1;%9s}s!WH)$@)HQo0|xF@&09C#+nyPa)?|t>Ua3$L z5JW9ac-}B=7TEk%dUS5T2{C(_aMb!La&_Frz26&#*vvUBXTf?D!tnugTmRg!j)Q+3 zF1L4f_2f0vUr@?*Cv9lv7zJzJy1u}swbQQNx3%e*oo*53$S~g*@Qgxd>z6C=-9$}Z zw~Q4_(w~IWM=_87$}I_g{Ov@)ge9ikbBOnSMC9X^%^)i44mK{5hu(OZpzZWMP+&1E zBK10SjwW|Fd-hwa8Q#RTZITlx%^V&`iF8sbi;;p5+WDD6=z`blh`u7`2N>G~sM9`gL_uFB_5 z2bhm?{otvNGI?>(X&B|>x!@q-$YF^XuGYdM_0`U6SLC3ukNZuoj}jP5>tj~pg6M7* zJ@y-1cmn7&y>TRIm}qvwt^MVc3~$!^bq!xucu3y$gv8^wmSifT7vBV5EQA^NQ-THq zopXtETxrnXetCAz0T10Hkhlw)v@qWj>yx%XHX5BGd03RaXAQZ_szH737?@ILuza z;E|VT5tsKYFUe9A!SOSrh5lU+9;#!-mn?)J1U{=XwPy6lv~eS2yNg z>e^StP1vn`$}3P(9f^9H+xvR55m@}Yxtafr7CG@eAn|*F1E$0@I=BR_a9B!x&_x0! zA!&+MG>*Q;_gV^)l|R1Ai=7A42@hMdiJIqUNts3TcjuI|El<7M9if8X*5*U9c+VME zlRb!oz`TJIenht~U^)cu>fRj3BXvkd)FK#2YD{==hzp9n1uLhJ9#%_&0i6|mzi){i z?SI;P&(Zima4DQFzr3J#iGP#IVX~E{3oYO=~bCKOY3WDf@+8&coKcBF2J*A6w8ZJ}7t4SvIs|nxU zw{7o%zI;P*_+e9`TIa{47A{C`njndB_x9g#&~gR=|Fm0jJE7@3hy5783igi*YE2|Q z%|c%TTRZ>duC460A=x+MD{{3_68950cST*`C^yC0H)ojkIh$G9&S7UdhSCe#r`bIH zsFAN_*|^b^?-YaqtgSPx0Z}RT%#br+ZHb_w9IrEW=d?V5LzJ`{HsGG}oqTiwc6a7v zFSeO5xJjc{q($#W)BeFVqZNyM`XZY_jmQ2zz3i;o^PZ$$trqoH)8RAV5f*d1xudT3 zkVEw1FLw4(n@LL}D|*+np@iy%MNi;Zg1X(P`fF=qBadIOneMhP$j7{jf6Rj(G0 zv?UqmAE_f`r};4XC16PZM0WjzvBM%kkh@1TuyXH%I)ZWk2#Nu2qas)BG8^}n^}@fs zeIg?R^h8dw6yofK{=W;raFx1NUfJ^!#eK|}(0wp@mnK8EBAI0HR6~-N5ks4@#{>!< z9~J_}sy0{V(Vm`<&l9d>yDVQ5KWUQk3ur-2h)VYf74a0=s#Pj+PMhi(wpUoGh5A7M zxUD0m0CkO}G?pT~E(o$J;2q85Sl9AbE-!?05HYmjK3U#o z%4SmgLyrNimZ7+93Jx%|LfY>6VOgkckmf(D4`M+Hcn}D+p@rg~C5qCf3-HoO28aX& z(!P>w{3g}3&}1`r;(5Nq3kj#KBM=S-B3D*bC^GgfF*`ptLR2H69p0nwGrEPYYwXKr ziH{vAVWxWjTvK0)=vcMU<2SR3%97maybfE6k{bU0UyfA+Wp5!Aum3U5PN0tgWu58mtLKSc`)iHxM5Z^U2|vx)^l zY7x+tElA6iBU?N4r2v)lS45@2fe<0h{d3Y4WHd~-$^AIC&H$XbV3PNTP%oS-!2G0S zX&7$WDNn}qh)-DrZ2Ex>g?<;@Sjp+PR`S`k!uw=C#*BmS5#Q){%s6%fvdTncSuY?+ z!qDgRe$NP5P-BdAw__`GyAx#5vAn_ff-_VzL@5#0+EF&&1D48IMLyVf7YpQ!p=tGsDcFW?uvB74R`)AVYs zb$iKWQp8|LM^kVw`hNs17MxznXd%b5VXjDo=wzH`Ffd@U(Wc zgJksAWop2XVhem4m*M3U?#1n|6BQ?eFKSX9B*cbY{*_!QrXs&?g>*ZiXY+IGs*|V} zA8pp;1{MoRj?mgHOIOl1ZjU(buR1jX{67hS#D|bta@{`0OJciupK$ED9`vCNF|>eg zP={~J31K!_sf0)$PCs{H=O{PrR(}nbAO2D5R$5&A zd&NhG93}$-<=+$Zu-J$nck!yW?+?FXzv;>@#6uMqRUv)Xq&EK)=n^fku8sXINgB{i zU=R27@jkItZSGfap8jrPha!^tCv@B-Nm0W;rp=}<^SquPLr%4&5|Z$@N`tqrDNH zO$yORQnFCT50tFw*U&CG?Riya6kE$9v`UnyX^ft*Pk}(v?c`W@zN37l9K;eF43D+-*zQG#+i>X&D*-k#{H|?&2#nGWA|0yDz~q89WmA zVo1SVPaG4#1_-L$^PuWpRLAf-myqVE zl>58=1UIn|S`pr^in{ZAoQUrsx0D3F6T+3o4=(Vs)yBQHIYn%Mi1!+F4cfErpRZst zK(@YCk2ikayEfm2m-}C_ozC%P4$l&JX;n~SV|P$0Z={@+ypeeCRHl^zLsB;rsv{&- zW@VIq#6evtpWX10@2mzXTOl*lbo*g=z?$y$x|ME9=t$n}Eu0-&t41xnIrjpHsK9AT zd>1q|chNx`9penS_Ba!axp(?URSD%Eic_qTtCKcn%HPWw<9VU4@526G4n~lx=It!^ zGl^G67K||tikbClBZ)LM{}3_3qd)R0(4=y=>r?-C2BaV7z$d!o;+(Mu4M;A6xU8Sq zwwYV3*6W5HR{6|+)cZVpa$4_16=^lfyg+83EoQ z+A7U)yqdLe_Br$r>P}tGi*U=1QuiJ+&lx@!3tKufbM^W5-}t%t_!wtI`v0;x-$<#S zPlza;YVKI3joec3s&YX{eU!>Np5+Go;`E^2Ad>Lm^W$d_H?2T0Nc<(#KiqhzY^*3g zi15^#!L~}0@sX}IdC$6wTe^6ZfSm$bhkAyfZ-X5zC?B^$Qg#(*v#HLo{3rS|yQ9OA zrSPLCAC-nQrlHa^hs1=qaDO+TopmZp#p3?`q)-F~n_Yin0op^Zz=GGfJsNzjuHd2m z0pqBwNdtCu^Ggun78PQPL8bUBIRvVMaS$e~~H<$TA z`G;eudj2!j62rPDY(IkPq;-(MXGIyXaS^|!oS|NxYZCgU-w`eafX9C(<~48dvN`!{ zfZ~-cI(wmVDCjod{^Y6dfpvnE?U){!Oq?zZFW0ZP9JzXhY|8i&9;8bjT za8seF8kf@}lqstC$i!Mw!Sfkc$p7n`&t|2P4ahcs&9fA-i?yM`Wa8KdAkH}sP z4KB1$dR8oTVP+7x1&%CJZ4y0-s@JeM@Ax0H02_Y#@nTV*p~LOS9I9f-@hJCm4Q=(6 z$rE!Aq%SBWoD4hF)G~i_vCCf=?lvZYwlo$c_zB}{%h;apb?%$mS}wnN19GK~u$$r9 zt1r`*56ft4(mN*U)c|YVHl6TIGtOr{ugh~5QdoPwGa2$dISdZ1s)nOIvay+>gB>~3 zXp%oroFN&MXAR#-zg0XTK_UXXo@?TS?WD{!O695#=u;6`4ek$B_i_D4=oyOo`ZE0GoOn~bSGzHFw}QPoIB#$*uTHrM={H?4(5#keE&M^P8Ks|vnr2F z?0DqmUDpa8QP|eUbYHaF$T*HukUHttsg*IN>IC$0k^EwOPWxrp9LEe7u5CBho`XN0 z(D;vcnZL$8ZxE0H+m$^lG${=NUtuG^as?{{GCMHC?MwEEe2ommoimP$woghlCqL0_ zAxn|U7#`i}vrmqHvZ2ZNzMPjamF0yi>m3}p$#%7B>UF{@$jJw%P1+X^A}{dlFeSzg z=+T9N>?Ye^TTmR1^C6!_C-Ob;*G6V7sLFLBvSA0&qbeDlP1XY*)^t)h$7kx<*)0Th zrA9Fu665HAv2CE!#fq{k&%uT^i#M}_mQH+ymDKfFnXIX7W=<9gK=5|)o@DF6n{3xJ zH_x?rys`JmM^zbo@Z8-OAIM+DztXk8v+w-CH$vIWmV$mk`bXx$*syi}UY=4EEPv9b zBo_a5&b4QRGk27GJdikv1{q<6X8#4Iu-+q{%&#a;5DG4QO`nH-n$JiG+e#%)7@m1= zrBL-PmY{!Vdba4JDU1Ve&bv?5whqqaz=bQ5} zoEQCwrKQqppOIYxu47QMu}3ArY6~_pMalIMuM&BFr}JAy+~VS>kA~plS?1##Ouee$f70M5EM)wYv zY@QmqWjcZ&%XESPwF9S9uiJ@PqhMR_h#;gNE8ZI!wk?F!(LrGo$ww8<{mdU*ZbaSD zR<0NnrPl2-cc^s%trvs7;hZH6Jg|PXR-=pkKf$3Re6*Y#q=2_ULtQ4iaG~db{9{Dy zUsBp4-FpO_t2x8>l!802@2DHfkakO={_dWA=aRinZSiUv+wv2ZpGF=~ZsEP4>=)&E z>f@c`tB115M-*ha!VuPw%K_|us_BY`9Ui^vlDH2D71f1?UK1)~q&Ok8O;4TjCp5Kb zjebxa2fV zN6UHk%F@4AGF+q6AbI7D=zL9@kDpi?(8=o(o`iRe!u1*8>_`Q$Fo!{ip_gk+;1ms$ zWY55nQl4KLT#(HmqKxQnWc!fspz|9AuR#f(vpDYXd<_NZPbS>HFX$G&JIMsa1xqsi z7ju*8ka|Xl{&qVO2)*-GG0_mVB5Nz z$mwHMN3WGEzS<VvF*vliW}h=Y#5N7TT`|nvf3s8nDPs5K9!#oB^5P}$ zMzf8Pwlkeel8nLqG>%+efi!VpXAbJOhwk!kg7^uKIO~rI7$)c8a?(!HU8?BcjJP<} z*Uxey>X8?^&B_}!b*@Vc*yG2%%b!6Gu6ywx^9pe7$j(JBARfA@`ZyA0l0T;gaYAq6 zEdPc0#%uybIOH508KL8jG;1t%j-llH)~xSY3A@ z6UU*a@N3|)X?EMxkdpN9#5n@E*vl+o!8c69sjIp~u1Yy~sNU-|gb`wN&KGE(bv@h@ zdfw0nTuoYy@Y<)V-!&px&bXK^DK3a*)wFr*h(_zsSunQ?t;~+EeiAW1!n?ZVIu!>_ zrJc|{VO{KE!-^D|+5{p|hxwXzTZf?8j@PPU7)B;)%Sof$*_p-cNFgRN|1FK|ThnSK zBz;Za03rudUP6=}6bFIgSnxhKK!*KWi}&G9^{E;{-35U#0jtGwJ0JBn$-+lKaFWJXA6NrfJy#O$xutt+NKcw9mWTWyekmD^h%)Zh&$vYZCyO z!iqZi^Qy(Dtcc)SXeyTA!iDDhZunjGY| zUV~B*U0pTgP^4BY&oC(^^%IGvf+JV7+$zZ#_fG?C0+^M)MFi%5CQ=O-J4?Ri_dXZ7 zDYQ(oK}|+k%A5Kzp$n?y4OGYj-T{8d(w(NnM&Yv%Sad4Q@?>@i&p_uq|4@s9Yt*Dc zj9y4on`7nzm3f`b#WYuSM^d?wUo%N_DX5m@30{(SkGXXaX)VTS(6#n-L7T`?rEd9{ zDZ3TEvpsR8PM}U3!Te8?gV9nYk~j>lFWBp`wjme6-^k?yj%VAk;ry=6O1*cU{^DaviZ00f)bY&rr)ot)(PDf? z*mzp?xE=f0jA_o{#e`zVCGMF~p*yw?e_?bFXmn0*jd#r`VA#E&`YVORXO1$ubJI^R zh&K%6dJ11*8j19b*9o%(aK3yp&69v^FFe1^XudXgI9LV>jRM?pxF8|in>#y-s9x75 zb!(!_>}Y{|4lWjtH?f2<@=vj-Q&;nJ{jxvzYa>Rh?-l1nHrt{OpeB9-qgS(b1G;kf zC=#6XQkdEUy5iL5)J}uC<-h&y0^WQmz9n$uUPE9qMy;5VDlyRBIZ4?*g(gt(kBa&t zlv9K@4+^6Xj^@k0ewTLR25t((3`{IT0s#&=3$LV*>Ypx|B`OXwXcE@1aUe3mKw#9 z7$`W8eGHc&e#nA4wFGz)(pykUz|tEXM^NgNotS7Y|2N(5f(%P^d+*PqUar%&_n!%G z7TO-{y`&!3zn?b5W6E`_k^Aeb6URA(BO&jTOU%;^OS8n+T&?XGUQklmqCT&JVV;Ka zX?))M%&i#t++kUq+FrW!QOU8T*<6K0zqDa0y@Bt>x#c5eZoX1GdBC*;Uu$po&^wzJ zL3wVick^*V-4j;%gy~^uw0J%Z%Wk%Tn?E6D87Usjid}h)`D`<%ee}p%V82w3Exdac z+3+TQkC2`}GqtwcaRf{*F@XgOEG@qmBpg~1wxFff9?n;1U%p7Io#yUUqWxN#Hj`v} z_^sQ0nC_{pX%wm5rqXA%`EWRH|F+{QMRk3z513)9i(q>JT!(-UV+C~zB=B|?c)r%- zDk%1GD+A2uXl9CGz^?}Dj=uf=_6E`2V%Gcp>crN{t83M>{GBiz_im7cE_NO{To6*I zoOINXPB=Z~8}ZU&NBZsL5s#4ah!CQk)&y!iC6^hdVC|B-;%I;)5vdWvGxR$qzfnG2 zazORkXLRrGA|hY<@GWk#bDbLYwWH8IHuq9=!2!Of4{e&LIKYN$KRCxf@M*&yHo;R? zYZ2PU_miA{l-g_Lw(_^0nomv#{J_lf#Jo4uLY%onr%>!EP^wh-DJa9O^-}NUvb6p3=)? z#hVS7HriRX{InhZR(2_<=okXhZn(d3Y<>D|j=1e>aVpM!Qf(E9dY%$p88NMdfUn&K z@O|`b8-2UYhg4W!>-KlQW*HSS<5wr_xaDnSGe-~wO(#wWdSZS&=yMs^dbjh5$2^|H zcrofWw%khB3Ey~Wg~M_IPaqD z7}VbEcH7$jMjX*rauQs>KUrM8zUH{QWLB(H!q+RQ+)bQk3Y@J_* zS++Scz<$nvppCdON!@DFFA4br`rUD0;=alwJ7jlb1&<=EjAo0T>?@PTe?d+WCKb`cL;2{8wCVplhR!q*zrGn-*?WL zGyi$d|II)1j57=l^Q`r(b*<~Z?t9&riY!~lXr^@){u8=X$(HyLorC(2Z90e1XmN}b z^P+{Pv*U}u7_B-gy2^(=I?~EVd}kJRnPSZbbnN;Qe3ee01PcY-I$j$X1y|H8&^ks? zJ*ybDN4;%rTtvmZ8BuX94Dmx?4Pql;fbu$I}max)8TMK-aYO^2&aLFE|5NMCPW9StCiH1`miEr-21Y>FUQU zQ6U&s{Fu~QIcRYlPZ!M`|D15=QzO@|c57vj0oqOBiMjrUR2=&w+Mt*>pm*2r)=%kD zSsX2c3xiueG^VmJ>1MHiZ2Q%Dj9n|PaHPm4{?Wlv6>G|S`87;_nofdv)$=jzdV^0t zG-=#b3yQxB5nq>bfPB;rv=c0|4uJcK;Xm6?-0DE7mDbfS`{`Z9Ybm`t%;DBMC>8f9 z<P38fv0u!g^J!#fn^8-+tmSkI|rbn~^ z(5}(EKU=ro#Xxn|Q8}^~um0j0|(n z?Fpd_^8&6FZnv@u3=T6D)e2*T1s(I{LsD$1)78!w`SR(3ZL2#M8D|R>*iESaD$ipMn8rTo!7fe(h_Du}GCtXH1Ck$>f_qRSP#mcR2%!S5de-GEN zk1hB-9dvw#7pz>PIjx{XeCWn9i%bp2{gh@%6u>p>p(N}Y1Dw1`R;eou@( zZA(f!aDw_Q-21A@5>1`^i%E?tULz-KtCwTRA5L^sr@&Wn%znFOotT(4=xYm`WfV>T zgJ@ii_A1E0WmUq{Yqaz{_b!oq1Z*FAJU*GB%&01h-rb_(dYllo98^#yVjL@nwp`2b z$)pvt5wU4e_4ZlrvfcL_hGx71l#kZ?VY{u^Yz}@Tvi#G2Dyr@6)LNU9zk?}3T26vA z2#l&(CLj1clP-hDtDZB$gh%F($YF~Uo)uwFRI0SYrymkEGXpj%RT|~hOHfbe8l;2q z&ssSb`SNr1aq3-cU^u13YGrGC;z-<$$7j(ne}m_4s=pL`S@Bu(5k6LP(@|%yeJDk*D@8%-OIcX)0QiaqmnrFpt_97o;z%6y1qW8$+<=(jrG4?LbB)@AFvA=5BBuTzM~D-AApE z8F8qdL6mSIs?#Gfx+WQafsEw)>P+luPciEk53pbxqD2!6L(=Now@CRhDT!ehY`X40 zgFX#jG;iX9eS|WQ)qkI~$T&1sEY2275s?VI5Jk&xR~C~~1e{;t#9r8JV! z$nIEtV*R`E{a=FL{nevHR?<#7@5q!;bPUMQt;oT; z=8O8L9EAW^v1VZF;Lns#9Egf6pys=E`&C+4QN6Sf!Pqjc>a1Fot~+K$v0H$n+h?AI zT5mgb$0_)3Xs^nK1veJ9? z3bxACK}|oc9IteiO+vuUkeOPr8MpK3<2Z zF}0J7<0)1EUaDP#Jr-9`AM{IQ;`jxicQ)0;Ykpa*i;O%FT>lm-COCj{d2xp+j{vvT z9BJH|+hen_e?smLyQ~%ffhYR`21C32TFB+AbMzUuFm}V@F7H+YWuPMv8rgfXk7M@p z#qn0+bHFu;yfDPOFcujL5-B@1>2+HF-ZziQfJDKHq z+^(_HS&NYrSdnT+rxrw~CGDo{@4O)EN%zh$e@u2UksJ}(tTWGbPH9r2z|8GYvDUVA z`jt}!{p<0fTMYTHW!BR$0g(9NYTFS|7g=&7PR9K+rU!%(XunIU@}#7VJOZ?*JBI*w z@lwM#6gJ{>JjrT){lGbS#$kXqv)5_9_-o4D=JHVPjIYxG%WYtUsd#S zuFN%8kn<6c)R)Z=`Pit{^)AeNS5WiR7!w=Q`nEfEi^ZyBPa(OChC?7}o~mEJtWXW} zj6@DYh7bYWzi3#h!Ab&g5E=Lg6RfEmW;l`w#!CldeNma1wk~9}t5M%Jvv8Kf9aE_tayq$S z^>#0fBp&jmG3zCIcdgmCxM}0eJa<;=er3v&>s0BhheibpB1_vZ)E~SfK}-Bt=N*j7 z3Xc(tznROn`ChTTN<|m=LpQxC`#k9bqe;$!VOwDqK3r3Os79NA@j;^W*Oua13hqaX zb3)5qwBwDd=Wd!TCvu`E5W3>(WT6$^W+gP+9f;Q92svX_W7t@Fa*X^T;c+dFFO<{s zkA+Lk*~@gTZ6v|<=hG51(wUuCcVv#ry{KkBJf1L)Jx#LRZS&RpMns-$Ak~n%UX<(7 zyB|L$oEQuglF`+P0;RBGYN)S5t-AFR$9@(k48&oNRanZ6$>8YTic1lW)XW!vb|^$N z8rE}gIG?0SQkgGDbwc_ye*-D-ZwcFR&5X(nV|>UOQrWSPS&nh@5R>Vz-x`AJuwaT& z4&)pmGJ)gcd5(HN;T!b*x;-7)enz!aZVO1dZ!d$rIIFtR5GLZpMh8@6Q*K&b8|Q>? zYcHO@_@kDyKcDpkdikmCh~@2-Z5(913gVpS9sAdTq}^CMD-QHlnNI_6zk1@8o@eOC zhE!P`r9?*x^}|e7=$*o@LEK7ANUccqgHcEy_a!Fo;|vU^AKjZh&b3u` z|JhMHFfmI4YjD(NwlIVlj9c$uE_Cot`VbP&hvUj$K)q06M+39j-&W3(p5(8rm%UGu zO`Y&d*7e>1uS+21em|*xwWkUyd3SQtd7Y9bR;yicLmVi8JW9cPgElv~WIlJY0Uz9e zf}klk<~Nxr%A0{5DWGF%BIGtE&Ete8Chhs?*o)`AFgl>glV^?-Gj>T)7AUN)nI%{` z-S-7r8sN!LjLy+b65e5Z6>~G;sV@7T!RChVl=}lF?jlD&Rn{-L{?hEEuh&htD|bwBbDU_u!EZ0HJGPQ2iF+wj zhP*a)X>1wRd9Z79r0ig5+vE1Oq>x#kW4N?DHR?EbF{V)(JRtq)`6|<&A{b9feoNh4@Jf2F-Kv;vy%P z7_Og$Yvcb0O#F0fN3d?AnMdw!cz(ay2c_Ps*m79u&h^GwVVFnBGrzW$z~LHd3#SLmo|dpo;mBpTGk zteAeo1m+o6nyA*Bn$lq=de=_l*Q+byK+bb0zU9h+_*HE>9QZ-8%7LJhfis(R#bl={ zdO&}nIBdMWwa034o;m)$ZQq}q4vz}*Nu`&+sijV+$m8mXJ@CS)1xAEiUT+oV%_H>= zry{Vuq-1kM`^-)%rz+X{2SGr49FeE%O3E9M2E?RofEHY&8+6kHxo%xi&EYA;VL>m% zvXX1(c8cM?LHLz4FCRtCCr6o{j+Ch2hC{WE%v?n~%J%8YpSTIp8`Nemq&=QGHEB1Hmuj&c3 zO4t)IC1;C#E1>1}7Yr)s@a>+)f2%ldvX1j`GKG%ogMEMf%@+>C2UGgTegThs7OEP@ zvzg-{zZUb6mWv!d8E9V<)Aha25Lj+nGY-I(0?%u_*@ni7LQ237^@gjT*Ac;*mvwI}S6li{NO;D`Bzjw76Jutl5iQFLtZ z9qCBuZHXh-@)ev01720|X)jv^sFX(Wy&)$WGQHRym7}bzOmx5*C5t-XgP4%sws^n=)_4Q&>b8uyYPi~ z+^hUWe(f1t>pAUp#v`X6DM-zEVyAxe*F8Fs=P?lEVzFNnW8Sc(2dmzj^Cw&WrRWW;HktpP4zSz-6b}zpbHGg5% z0*J52=Q%d0T6&n_1MI1wdUWh(Ot10urS==y{BKg2k0@IS{Iy27_deP=wGA>dB<=}m zhGd2Yna{qhJ>+(hNVSSF1_@`}HS6Q|SIf`nG!)}LoQiz=N7;!tOu&ysSHokK;e*Jb zlwnUJUVQbLOAYJD?#5<*SA_qob^ZP|(sE1@FOcrRBdf-+tLpUaPqpji*GicoQ!A_| z$AcjK^qtjDVn^R5yy6J-M+)1s|1Ldy527GJ)KLyfS*#!O9^OJaD!{jSzZW_kiu+1% zS_pM{En|v4+&u~!C6T0GznDym>`hMXBD~00g(e=Kse=9@IZ3`tcc^B$8@VW>+q5HZ zj(eBT3dgkl2$Uzyj<>%`HRoS6I{vB?W~Avp>zhmcNQ=(;HfI%mlhYj>xJl&6?0#co z|0ZaJ8k7F5HoZMM2e$ZPqm$h=KUfavT1m1jJ*QUa$U*@-T$q}!C|pU zC9wUtg{=Z{H_E^I@P-Q0pha73@Agf0c7{3JtpZ_Eh(cwo)qi5K_mF4sC?mhMGF=ZZ z6d6dx4m-s{n6y9Pep0&(^FxZYW_I-l=TYz>1*_k;oXM8TI+IX;k5k5H$uYpz$Lq)X z&a~2|Tt#5ZUIWPb@oSLcuslPcQM#UM_Xaqki$a#a%A&dROT)}~>S%t*4x}lp4wfbV z^mT3ov+*YrK9fuoP^uxn2H}F4qe0#yb5=p;AQ0YHhzrxuL5ZOzw%!rQD;Q&u#~1Fj zycF7nes)eb!fOvdpSbU@>XcYnz$KwMlgX0tg44jsTBFN?Ge-<%`L)H%n$Lt8o!tZywRDoa7o?(kEP zoII?ytsf#~(uV3D{~I=G`wmi{%~l+PCTgAa+!8mC65%{| ztpH=BN!eK+M%xo{F|R$h!zN7sA+sZ@Yj5{v5a{yrQHXQMs-(`Oz?X4U=(zjR)cr)` z*^*#cg<`0(5+}+`K7AB!LN$JvpQVa4(AdZ4 z`e-l`^3-6c-e&s81i1z6cK8~&E@l?_`2^zl-BZrU+rmar@5tT&XTB&LVmj*FKTzRN zw+bdkBN=6~M*LFhNw7qCsM+%=y1O9XZ}pYBP@jR@3PX!=Wg*%vvY$e!+t=ej70&N3 z2jBspQ(ace^S++785<4VHYfb;brz~f+*xfN1G%H*4^pXa)k#PX)r^V_QLf(*MThqm zDS)iE2(;w|jlzJh#0rDLV4RZ|gEWM}kT@oqhdblr^R^41rED(Ur%>z8ryIu2t}f&p z?lLiK{pg(CxES4U7E=EH7@_kYVM@49$0XmUW6&Q)s-YwHCe*S8hrd5<){K^l9P;BO zlFVwQV2Z+48q%QEZuzUUE;$)W-|xD|YFL^(efU_48kcb7cP$VKX8u{C`Ps<>Uuckh zxt{94FiR-)`#OqP2PdcHCU3XLj_{n!->pLxG(B-vbo;!KAiD1MGbv2g=pl^t>AI#v z&tJj#2FIdqp$nbk=gg~Oe?)MQkJMkkSFl)RDS(^_APG`gRv$?i`y%ssl$l|1bPvi* zkmwOsk9v7d$0cBg@@++T#MN(hq)u!L*m~Z`qG&0GVHz;=9@Hoys(G@`SC=1!SFs4{ z_*R$}E!;FSnokYeXv%T87yl zs@5+aCA#y=@e)OT;U-=TUD7KX=%A+%Av~N{jP)GY)pci;DksxJ48jv)7c0NK^?ZQE7QjCMwRKIPSalu80j<;KCiPAETX$Ge$L73kEd^m6C63ZT z(kV%OByL1(zN{*BGy{oj6qz8Vf;uP!ove{_emX+j+KHF!3~-o!b5Ou@NBMaxYR@8+~N)=|N@qdJ@I9yw8+*WbdUr|s9;3E-Z^5KW%s^fs+ z@k1!+jyz|GLLW`vLQ-Ssuy zHMac8H7H=>23ZnPfcxmj3BvsJs`U8I91>)XxEb zt28JW#Af@2Ch+&V)$eVq-$o$RClz~+-4U#6DG&m&aSRr_MOKyo6B}Rz+!)mTcxe@V z){~~0y5=PPlHu=)92%a0A@&)?Y=rcyNllz5&$b^He3Fo8X#s5VT&|-bCp~x${V`;< z@ALt)kHtT3a`?p0$a&3j&4O;We*W+|HUuSjI&xaXkw`6m=gk;S+6@h!N#q~bv@|C7 zi)XdwZ7xahK--G(dz3o3DU`bZE0KSu4@iQlk7n>{Yz!onOWHlo{M`$D7G!~qwUU{w zo>B<&Y^od+yK<;YcNXL^FIr++Q<}KJr`_f~xz0KZqDqd9sr|zqVk)`!<5puMhVtjw zK|w=B9|jnfN(LTr_zO#CZe^&$uRi~T?~i1+|8=|Dk2;39_LE@wQ|vcU&`nrwpB}jp zTd|jRq8N2IlRgfO!RoPsl^CQ{Xc) zmC9x6TERn#s<*j8t-kRP6dZAUM+ZgSAe*<4kQH8B#=uzS~aQ{T04J@+-h-%$Ax<`dBGAHko9-rrl-1BMVBmBx)6#tdxq z$^m}`z;7aHJcckoy0$#E{5wGHrYc}NrUdvae{5uUJ{U#JIKy6&`}y{gQ}{b)IAGf% zG@d8%L$_}2-SyF**~FlWuiv#89p6uEQF826is8_wdeV-lh!^D6H4*$Gt~N2K=rzAk z^kpDItr3EvC^_P-slIZ}M~-us>e`k*@iFR0JAcVjJ_}MYZ)@b@kSc+RC$RMQ&b@GR z&}FY~h?WgH_sJlupLF6eGO`-5F=GLzG} zzHE5q%@Puh-0<5@Q}z9rr}cTJU-UaBz?m?_@ihdBLc-6tA)Tl_ z$Kh(L25XXbuGVC%8>-pZ;f>0A!wVk0c7nMp4fsflzH5^0}3?~S;1{&Cpku=lep z+Hft?I^m6m6r!PyI-YMNk}jx)qcn+?zSN=tA0J;&j#Wci=-U_Dw9;IWM}^paGw9LG z-`IJ#M_t|s&_2f#2pLGAr9RxvuwGaW8IgL1$k9D0GkjnE;xBOsF()a#u`*ugTY_cM zeAiaN{etysiH%!bta8DEToaQR9C7aQfV-REKRu?kHwJNh|MI(qBhR-Jr9d~{r`L%P zP#{nb=Mv>MbQXFH2|(@M9m5I(wu{?*Lt)pEAkfWq2?%hr)@(JYK@|ldG>+NR}>7JG10t1CETeQM;6LPU3zg?y$R;a{hjtQC@*qI`)zOt@7?z&XiqC* zU!{zU<0Q(nA{C8P%zE{|$Iu+cwpqnB#}Zyt+S4$S$}8pt7a-skxCDUbq*i9w;pDLt z2njo%he{!lmx#!JPqZrxO{G9ZD$qGVo^9 zT>qDX^_5DNu#2%6U^}h@mJD=UTmj7c7=CIFeYU!HZdXGL@HhYQHpv$E^AHpTKF4gS z*4LgSKHF#xJPMw;x`>DfD(hm>!*`n_k?zfAwv{sxF3%*h0i}e5Ct`c%F>f*52F!M&oI&ty}57pYd@xps)dv8N#5GQ2Ztt zE(>xTv|-^T=M>~N2+0cEAP_yb+HDa8TB|+{anMc_KU4YH6vVhC?LlaZKIR23D_U5G7qceXzL z^Jh;X$L!oJ1Z4Lbt8GD5_L_IN}?#32j`N9@j zNAnnVC(88jKPxvU)nl(L(nnrVV>SzZ`vHc+%dg%66F+&6>|3!i-T!kRWR`8*X#n); z9&&b9@}J!O8+M3XpWT@sfIg#K781V95>8SZ-{!+dQ(Eyj>T~P{0m#k=MnYd3M#(m^ zVW8iop-Nxu0{hkx8FTU>u0EIBVLZZ#TVEdmrao)S{dBxi4u1T!3Adz&%7C2`|NN6Ok7`+=}ZK%2DHEU zvH68lT4#|W5X#Y4n-0nUfZ}-8q^JW~tI~G#v%4*Pfy@0U zWTc|lh@rG55*PXIH3>n$7&hQho;GM#Vbclg^=Bc{-RCZKCilO|!Jna(+4@R{u;>t~)92@-+PbL*HT$d=TVZZ|aX{H<|ZEUE=bmedm7FkK*#`&Nfri zM6tcni-Y<}g(VZW4k3JM;R=orO*WZ#z@2H!C3*YpeVjg zA@)DV%+8G?3#TF~aOqtu&AHwdt<&Z4HeIIQS*0-3x4PrX z@z18|)}+{e#RYIWDuQ`~PGfhIi3PfDUGq9W zZ_5p`_u9M@b%79}!}!nI-GxH0q{1Kr$c)pMv>*)~_G1FKYhcf2@@pFHO_WjS&=d-i)|GMEYlH-c*!+ zyDVFqqyr&u?;?HV^kmtN0%NX~0*1-8+D$o*X@HyeEwF!2Z(Q}y^y8n(i}wqd*qj21 zAalb;AuoyarXXU`2UgQ1C zEI=JjkK%V&)aF6)@%7+g1zAsO25V#C=8)(HUH^ae2y`)de{s#6MAQ? z^q_#2(X7Eu&$f^<9d;5~5&^&v78U<^Cduhol%ugdOVG!7}{#f@z%eAdq`vNrw3 z!KP4Bft^Y}d!$T8FK%yJ1Qdtn#D@HLfbq~8yZq#y2_q0X?R0Pi^^aW`@+-{#0+Den z6^J^7()G`W1bMp8e)|Bgr-tMa&sWHqL48o0elKB}Fp^z1qGuKYiFes2`AUQf&X`?z{x; zt1r`kh%xsH_C4PUg>_YY3qa|0m9CiHBI`XopJ5jA&Sk&)07V(NiUt6m2ZTf{o%sO~ zi;tvmXgCP;#D^lO6di?xEuoP1@P($`IJvt$66d*>b}|@H#*mRD201H2GTVG5Hio}Z zzggvfOniBzXpFSM;p{r2<;eN<4k;Y!qO2*y^pwx)breSBH8VSyfqL4L$hGt4ZZENE`vl*)FQz0IkZ?UD? zl?tg(WDb?ccV9x4-A`%cmoiVKdq<@I11Q^*U>`}Y8>Y)`w^KwxHF+n`MThpy;>oqf zs7PsN_#@3M(xDZ;GF)B|>h>N|+Erp!0hTKOO!J)^z{`+J*U;l-t{dbML<&g`feesa zjbup8H=mU0A1Mi6TmPGBI4smyu0^IpTiZ7dho{z~gI3_(-@+(vsd=(muX-@|Ds za0^7>*A9TKtP(&p+;Vu2mOUrc2*ppNf7XJPk5@XDoSO@JhErHemRx^uecA8+n+PoB(Y7Ze)2LY&Ue^!s$=l2;qfKUY0@RC*>*Iul{C zW|dMbOX8;}&u%;wMW-T9qY@Hs7hb1)to-wHaZ6DyqC}|hZHut2Xq@$6a_z%}W zBK~z8E}-g)Zo%FM=RUxqy)8=q;fA#7Y{}6aGSalP9sVjnhc8p4lEIUf#nMm60`EtU zW{o)|p2Y&!j{KYpiK+6%ie6$YrPmzG5@0o(u54bRiqH=7d)mh;c^ch!W^o*N>_4j} z@<+F7<=rjd;j1zK)>Ey4`sI6zw%fh(S%4rzhJys9zugB68R@*{v;!|)78?XDx;|xh z=F&wN6O3sV1OVSwOI5;;ve7nE(~MkOSDkf+KVUJ*?s3TaoDGTYo{P@yrAGKEb+SJJH`w-o&`@m#_ayBo^w1Zo~nFygELHTpk}+ z#@@9;I9YT)95#3@Rt?TM3|ej6fSxXhB1WaWHGq9kwJRZyFn_hlRk})Qt8;7FyOLGV z<4k&!KPsSQKN_6>9O^Nq>OgGHRSIw9b`VH}(#nL}v=}DDo@7l1uzc}ebP%$0pOm=% z1_&GSzoS|6G6|&MCyt2E%qlap8dA@wFAf0U{^h|;B+UbXpAGRp!sIIgp^)EK-oaDA zaHhaRC-YeEyMuZSFS?9)ufF%dE~dj=nVKR}W6ri%{+aQ}YilaqP*wsVNZ@z*Rl=*! zY3@l+cPwx50AIY|urR#QuJqji7K>Mc$wTyZ zvoI8Rd$F5r*L(BKXPIvX_#v9PbErmfSO)7^Dxeclczsn&RCj(l8N^H|aiJMS@7y$T zQT*emjnf4X6s8lfZtqo1uf3n_>FBd!9zVx(MFqYD-R|l=+yJbk3D?5g<}3kn=yEj5 z9|VV80LUTlnU22%&!woX|wmAfp+K3Uep@KH2ZNNzPTe=`9~1`A;*%qA7JhMVY<};w7R1za{*yDq`4R zA)+VvDlgY!lJ#k!bpZow8BO`0o3+4xf3qA0;wW>(-`J+uQ}R%u4(y~<%n^GA<%P=o z4;RpppH+C?0ONIP{9Y4&AFn&+Aq+CZ3*Q6Y@^0@n$Q!lV0oW_ztZvu-9R)(3gG+b= z*6;u(m6qey*;;HyiNmnTr;R3{t3*i2&lE>l8w#|csT8+T5>-RZU8a_OS$tkvnwz;T z?JKK!`q?~qfbbuO1+0l0+F&4t2^doI%qB^BkokeW7QNuLgBg)Sz?omQm#vI3xbwa5XW1j)8bw^+>j zQR3qNb_yTgULw6(@jw1BEl=@FP~IX-%*!$Li~mup(7IY{0Xl&d1`Y>)|7UYCR!Ski z8ExD*)^HM2ree*>$7vp*U%Rb4<+b+Z61wf18C7A{q%R)+<5S9<^dq0Y+(G}KV5ZS& z{)CY+dx$j-nn~|2FB=OVJ7cY{j=@M<-0zhCON2?pWH?aHrPOd~<#4xGUftxMa?>=h zph<~2Y`kRx#Qt%$qh9nzOiev6-TA;AF;wGFEh4?oA4c*R z?6F9k%F|r*b{#il(R;jh<_2MY~yzwke-o9^$f+fqd!?B z)~$9gW##UcuQvod<4HqCb{G{6z`P>*xrrEQG}SSa!~f^O1Cz2BdOvMIlpI zl7VpuPM{Jgscw}!bC;%~`LEf$P)d%`bZ(Um>Oq6Sd>7a55M@_`zHRG&gx*UzdLNl0 z2X!5(*uXI4;Ibb1iS%V&e4l%-MxJEr*i4}{LTwCYy(Nm zpN38x3?zSoKQ5A_{#BDUKGf651EQ$u;tO)!*R!=anV}LQHdrJM5Psot1osh&5_f6xY4+h`pp%{9_lxQz&^)zcUuaoycAB-Q6@S2(3+Zwr-cdhszb zR*(MQszn%SZ8hgy=HJ2ya9DERIa!ADzFjMZ!=eD4$yLN>Q^Y) z1#OKzjjf{z{V(>%T!``-Yv;zqD{T}KA*b2R8uv2feF(3S-^ftpd+i2>BtYsiS@a`J zyRF$KxaMGv83L48yaH?3$(rTQ%6&e^$HrUj7K@{+%!NrMxlO6=sY}y|e7a_)XHLto zixMe);ieL`|7wfa$pGK^uec;DK5*cr!#VKS-!hzWp~AtLHXWNO1P_Z&7&mlSh)jSq zcF<7tC-j6m={k$gw`k{TGRdREJ1O*$m8F*c3G_b#4c0 z3Q&wk>_ZtrCK}%?0G^^5@;J^o1t5*6xusq*Z$4!p@ zTj1>T6qcWaZ;{Ot8@C0B z*rnyWEBFK^^y$>k}ju^fbBfOE`pA&NeR-THCkugpI5v=p9eoiB?{I=5d3 z==2ynr+7*LTq`MG^ZdWy+H(LndeZEJ+C{H4Er)r>#blVe3F9=f8m!>`XS}l+Vz;Y0 zP5EfVkq6jHQ%R-w0rQKa2&)L?-9_(l&>u%{HMUIEbgh?eI5$$kt0t|;5P=&D z0~c2&X|Q`uZp7b*CA*jN z=GDT33kq>hlhsAHQh!#PLmd6~(F1)Yq~L3Wv&SIRUH0C?N0{G${14lG7RQ(;_&$fj zP-Ra@HZIMO&ekKUL{mnf$)v0C_9*hHoLRkgDXS?t&`4#ZrFLUP&Nk!f@qdM-6u}a@ zFzdkoTi#^rrfcPyB8~g&gN@n(fo0bphcn#G%b%P6crRtMfRPs^vvoG3jw8l)l6Su+ zBTi=80_6PqQh^AT)z-zC?LDndiMr`=fc+#_3aR=_20j7#hhNiVw7su>+m$>b6n`=D z!fG$7adUpEK<;b#NHoq!%G?;uEkgp4PH{UrX=70?a!*mz<)xrpk-Mt3o_&Gje@QWb zgPIxgFF^1)rtQHtz?=@FpZ?a^B})sfO&9|1G`tu+vjmP12I-n7cg8W8x}Y z4{HSR9|FY>mY))(kiKJLmPY(NC6bqA9|kDxe#Tt`(M>T9DN7&bu`Yc!&4(!&hYVA1 z9Ojez=nd%5=Di3)A!Y$#RCcvXnmO}@T=)mMg6GQ5-qE}!q7-&$EQzCJ+hoq1%KP84 zA2ON$$$enqab%eiffN226Nvwf2?BPLamgNP7bKKwbe_ zk?JD>QQD6?1w^!BK1Xjo(-S4G48;%^vtOOB|M+MCHXc=F8Tlbm-L;}!rae* z|6Ha4mK2ivS9!u2ASZYL^ECg{Jn-_@?%y1@1b*9A8lPo%q035vryLK7A2HhQ3XC*t zW<1Uoi)LsrfXwl^2$Fk!FHwE?FhV2|oounftV0Yv?H+ptLgfQJqPa^qJ(vS%|2d`u z{VS%s5gvC?4#mV*#^%^OMeGq?H=5%b#0yGVFTS5r=zO=wT`FDh2(vFe5d`0G|NH8= z-SfanJBShlhsRJ~C6*LUq9ChqnthhqXd{^x0cF zK-&0d6JYj_?_aY;3Mh=-^Rf9ouOD1#c7wPJ!ZI0c!A}k|7o@ZK;e#alEg>s%MUR$F z@AHu#;HQIwoUkE9$t`z-q!sRF5VCmCxa|DMvy#0Kl zJcjZkGl{N3JQW!lCfY+;tbvnx>;sU;URlY&)WLWUi8(G8%B!Eu;Pn+yR-}SDAw(sa zfe@Ds9$OKClmjnASY6$atlh1WG=n*prjf4ol8yux*THn--$~`XFFqxa+;PA%434;0sXL89N1@Kl_wjF#rTre zX=RldX#^It8WNYffOVMO$BlRqE8gV0qaVCyxwj9oXp$x*u9x@v=SP$C3&r^qKKY(@ z69&DAr<0Pr+@G(0i=Zg_A9>}$r{?f=idlw88trGt>#@A*_Zs}>V8XK;+Q5rxK|sX* zo=!(OBzG1_tYsm0!N{^=oQT*5p@uV>*gWXbEKfx)x1>!)8<^8IHKgL@ z%n(CGB!=I(Q z1OzrV_svls7i}_39o091Ja!^;BT%41M!ox@H{7PaSS=?#2uKLNUIO@Tv59Ld^X1`E zbMf(y;TlRr7!K(=d`HB6y{`eN?V5Y+4xs-xFyg>YOBe_gKI{}k76VH}fT0uEws^aX zPSurj^=}t|0wX=LH}BeG$+=_ruQZcS`ST6gidEenSZx8CX^rcjwy05ATor#l7CsmI zQ5kCVG=NkU<7}|#f;lnA|Enm$PKKE*MFj}CSp#NaeawYKo~zX9?|YaTE5#*kZ{m7+ zU1W-^jf6rTP|uiqjtlK(+2uv2C}y@?&Y8>t1y1$D6rq}3&Lji$^mFv@;i3;8hHQ1@ z`L0Lr1g!>R71vKn`gatJj0Pi4BBfM;W#FPlJ+WKUOMvGd90n!S0U&BKMHyyPPNR*qu|}NNQ1h&{5A&uz{T11BF86tXObNLg ze39iOpcv;moUPECM3*WCkVWc-N{3U@wUwwgGIpNVhJQpa0|ZlI(*Hk#Y0niUESgwt zlaxgxsYTXoJ!Ygdar{6+4&%u)fo1o>XS*iQTm4*kHx^xPCwz;yc|RO})eKPsN+_b6 z7!?!Z0u2{`)&8_U+Z^5n_G0Ykwcf}Qz|=de&A?GxtuBm0C6>7Jy5qR$z6$M^81up_ z8=zn`xns9Dr|18n?Y*O-%+_sh11L#>f+DD7i%{f@NRXVN$RG+*2!aBNiY-Xck{l#w zBuOZO62(MNBo;}ch>@tEC=`Mu@jfrzd!K#o-SqaJ@4I94Uwe%1T~xjATF;vEH|KgN zX~izvnZyXA`mskr%w&D@ThEyIh_Tw-JiTh%h0042c0iL~&&k6yTCku&Tk0t95(_hF z+jgUv4cj`ZM1Jnorh~5GX zV#{0}4;QHYJhTDg#9gs&@9%v&MP3uF_e--6oP@oga*}NoIcfs!r0CK3bGgJ`d~+ct zFtZ0Xb3s)=g8RWbd3o;AblRChc4-&QRhTei;Y7$1qQ6tS4)h4(IYAd_4SOEOctp4J zMYxZ&plfaT=z_~BH}O%%!4&!6AlCZ@o89v7%fwa-R1!b_rYkZ3v&RQ-EV7Dely;`V zQuI?$3pG<{vaH4kMZUsj<)YlH{I0@2X9g6fdX1|;LE)z@`7_|2(IxQ5%S*lvs(=-y zc@V>{&RnZhAwb0&Ay?Q4NK!wHrIsF5r#s@me;+Vu+oOC}{&5jw#%Q$?t0P#qR9ik( z-1`^1oj5PH^EmoF57}qXb?n&+GjdA0+{~yGq(IOorio`ZQZOcO@XFLsVP_-N28=_D zuGN1Xc9wWPFI2>#HgnOAgt^B}6me)!s+?r?xVVqS;$pp&iukrF{|K3SQjAzJ$=cw& zF-?V`SS4aIKOCKY+IT|6A|!A`+Cr*!D#2J$sQdO&d0<=^@(b{AWh+b^6Qxx5MQpT5ZM7>_a^4x3n0;y*+6Jq2K8xc@lD@mf!-AObi<-n)?`qv;4?< z&ab1=EXlXO`3dl}3NlwGl~kV)d;)`l6HCObc*DxLpm~6kB!1s|o?yjkR_U`aD|w^u zvhq#Or=ul~Q}cewCG>r*88Eh#)SUZ&pp++&xfc1-xywEr1hl{en?>>Vt zG;(JH=b!C$;))>`UzzF0_)qe2QOD&i#oLLz@_xQgzB!7)#U^pLL0IL+Fg(XB)h9{b z`8v@Dl5Fe^49+_$(TEJSOfSw+U;9@7$Ky|Vk!RC4*IqUnySw}X&flU_Qa7u#vQpAz z49DY*ix_``Rx$t|_go3~>HUTyDFpzR4Q-T<0JIBMK?3<{T zrqTqaB!^Pi#x(P2d`9)$LAYAJ;}{arn)kc1aDmNBLD^n6V}ydF-b>~a9=>S565I*i zB?W1IUt1XqEq2LikYg%IUo*}|Jd2qY`J^JoH1Mt?PdY16q|lP9{Itx?i#S5vrOb4`M(;^=YQ zD{}OsJN4@9rF3bFXF_Opss&#pjPit?8bRq1g(IbE_;4o2eOgeqq^5|h40f!@>Xfp@ zu3WBdUv04SPAMbWXRyOnkbp-L8vxt$`WnUubTXH1Q+N=1HAko|rcBwoE|lb(@{aNd zLu@LaoJe!K&RW_f`zEQ_{!+IRlX}wVHjK%Ds{-i7|<(Q_5+eY`Hl|<@h zmMs;Um;1&YkysI%3U1>{C5?g;F>wPXAtp|wd~V*@7g=%?Q{mK!=HfgUn#jG~IJZNu zZRm*o8=y9>o@C07+x`>KZh63#d?(#LsrbN+=OnVsOrlkzR^DfqS9lS}Ol zA5YX7hjU2TH*#9vf;`Wtx=o_ zG|Fbr&#>CzI=;rQ1a13+$Jwi2`q%lC0kt@r#>7AWlKZ-7EsMUOhAxklzwM6PsAH;x*)Zx^Yva-w zJOxYE?y=xw4lcwxppR#S9X9n7fA2LJCXlYx8V`KUOkP|1%zBOZL8F!4P_PI*DKUo{ z3Lp;>J?^`X5t^7zdYKL2ZhbVh&N8e$AVRb-RalVc=8n_Y75*hYRcilzQ(If5F=n@$ z3VFU$-fM{iI!jZvi3vKVZ7cCwRM?tMS=bhrNGo;Q{#2(r1-6Fwm0L%@=|Tp^X|}(> zz}ron#Aq5ewT_9tz{{iMo!&i;X{`0L`+A|AVPfj(1x&oi@ftzWh=Q&&N<*DpgpOk; z#cqMZD9J%92mx3aKSq*6<$orJYVN4C!OGEAI(}(;lWz?%`I^xG)at`AVTZxL5i+ph z5nE50F<|$f7qY|M2-CPiaoAPY!uH_>N47sSG&C$l`t$~@w2j$UCBBYF=g0@irCW1_ z3!W_vn6%W+K&(cLL2~{=^u(PLKQBdsYV{jH3xi?$v<&7ipMl-hLQc!8VW>E$VmQ*~c%mFTx8~qgc*(TPXa7Mj9gW&|; zXqL5d8J8w|&dTVKG=Ie-YTd(*CxRGv5omIA87zn?`t4mz?9xll3;7}yu7-F5xyJg* z#oMExX9rTLIe+<7DeKh6&5oGT+N{ru@Y`Bz-r9(@W;Ia8V1W$oo2NHA$&!3v@K|wX zS0=BH(|`N53IE*orNw%?yyEyUm8`{Bz$#+HFdngt>!V}Fhx4-4HOG-VEtI_;apCEF zy7hCjF5BAlZN(de>C;Y*67;X6Gz)C+?s;o)gWVbVloNb46=@3@Cdxpmom`T8r%)!} zbvBNp=-x}ZiLavDH)0R&4CjjYQ@ozAurvnOK_-tXS0oFEXxF2}|FL#`p()izaA3wzdZo2{*&J$f5v5Y&L_wA=kOnpj+?%x+2A?O`@@n5uJ$ty_% zdvi6XP{t(ZD2=S^IaThjs+#H0Nm^Pi^f0D9;a1_;cDg6>?K(n>T?vIWIJ-f>(wA4# z8!aaexZGiwbl^9#BTcFDw{msgcn@QK&SWgdPHOHjHnxR4t;i!_B_aSUiX znHKr}6D04R9V*qp9XY#UDl2U>D3CHj&=<3A`sldrldz-%KLcg<`GL$r#AXf30qMSc z^Mb>A!G$5+3F*Rpdp#x%3f~cEm=9I)UcaF)_{^0WfvvBC`#qG#i_$p$z@y%@epK>a zRJPRqDfUbyJk1SB_A`)r;~_?|%kq;h`B&t`e@6~NX`+@zi&9`Pl?(qH%N77J@m9s^@YZnKqdEknySB#Y#_2jN>}z|9Mr_uLS-;iX~KS2;5acp*#Mx$=6e&(WTbYWRAOluh%h zB7o2PK`ArY+nKA8)D?$cLQWpd3SMi>e#hXnLqW?_SSEf$-9v%V7vS;bZ%#nCF`mV|<^Nt*)b+_&@PfH3bBxT6%Q#)*FCR-rNi8{TszA6cjIgtkv?_ ztbxou`wk?6&1Q=>ou~X$YDI(83S}BwQY+^Coh$_}{E0fJTSxfe_VL-0Qp8h53H%Hc zBDxM!3tSv^D{A2s_%fsJDoWHG+rvq-Zh$C!$K`hVPR8k|GnihAN<>!vFflf1d}t8U zte$WTo0;(gOOv$GVo^c_DcooOa+un0!>1c0fKT!3bXLakHUwOrjCdH>W*dXeSX!Qf z7Lz$l<1T|XpLR8z8i+5vPU{UL1fYIAE~fNsRX!AfhVs2!+*kta9p`rM@@+PWX@Y*c;fu{+_PBkrqfMOA>)V29|hu?8s{Sc~VR_8dGHd``}X4CN& zgFytFs^pwy59V<62r9r?e08X@Z0#)#cFH=`l#We;+kgHhm*GTPjLA>vim-kl+#MhY zVxJA(rQ^n4&eR@w|3CfsNP|6d+uO<)%sr>5e}oi^X}G{()i}8gtm-Qb1gXr8)SDmU z=ksa@8&{_@hA@qTq2%9uxc|wj!eC~FHej%zB5N4v^6-B!N%@spmHG^?h8ek>8oEq_ z-NFUVRpqh{!+Locf@uY?E4UN&GNv4Ut-T!hJ~O~{uDkL^om|?lOB6}S@`kVhS!Y1z zi(K&i{>c=5^zg+~WdKIT=AhEX)$n0-2Te84Umtd+!bE93(qt{@^kBF99|Nxddj4DB z^|J!~9NGfWP7%`);kpLlv(=m(Dq*nshwsx&>*C9F@C|5}4^Hj?nXcn9Nb_2+nM{-< zlTqeve+U!*^ntcKr7!@t3jvF_ejc2|bEn_}_{NT1C<_Li!$gQtOp$M(s%%)n|3_=n|SK%)+wcr%HcT}4>H}~s*ajGhw|8SGO z5Q&th`y&M`Fs$epr62@N@uNpaNDYV&DR%O4v&&NbwIBD~bfyu%2Sx#hMCtu_x;@3* z-Z&pRTLP)V@p&Ggy*mgum|U$@@+z+8WYtm>_%Ov23w>8ZJD%&+0sq}PM~cM&la)0ixFGMzsv1BZ6bGj*ZQ%8h?TxiGWs{CJFm z5uju^D7ovK2=Jd_dmw z4Zp(%T`!&MGbzTu)Gt4sX`;*d-@5UKKr^0(Nx zB>c%cgAfwg$Dt^*`OqQYbW0dN%8=x?@#3F~Bu6I^32J#tc;Of%Pf+x8vScbl-yN6&w}HX}nA?>?UV&__ZaDM4F)-u#marKB z^V5Q_gfCsai08awhk>h`@N~EE9ean@`r|?=Z#U<)t|nM693=zO=|5|DrErCC1>_i!%}aa1hO|j2`LIad=mZxz=8Unl%Re#EZwmqbDY55R_W66?TDq>)-=HLC#du zDp>KLN~Y*Lj8#DkL{F!MR*EE~8V2H0^;nlffg1}vBQ>(`K0LeWG;l4-CdBY4xM~a* zf&%SCMokHKk{%N&k8X|%OkVp+&E%?zypJv+kTn2M`Pirb^adf!Gi%mcrK55M-(FRp z$viqvC2caPlYD{KQA@>@w7_?+7yT`_>Rmx^>4z@qUZi7bWKvHuL$lKj(oSA8;PSq4 z<#Gx2`*`%d9yDp)trc#*-<*Y|K^P0meA8DUjdehll?(kV7BVd~pl&#Pi`; zh|C{f-;7w)ze!0GfmSJ9#V-}l%|7D6C!?DvVGKGdTyt}C9+%#}m zsmG2iq0C^-djaroWpvARY_~Y%sNTyiuy1=XK330wAWJ$(aJ_l+%}R5}C0!*Rqo)r< zvZdUTg!gk*rEH8|G)h-K_796hf;o8rAN?>o*!%Oa*7`mVmd4x$D#or0aAY>fQ11>| zyKgC0snKfC1rZN10u4m7ss2;3T&l&VxC1Ir;4A!(g3ccZzdoW-09K=y(fsZS0c{1I zR#lN3!FszXRwLasa&7~&M58*Xy<>OlF-C0(PpDCwj-o*K*tXhxic1FjlF*BL1Y^Q4 znBB8hCFwGi1EMAherfc$!BPd1$v`=;T8_<#7-xQ^`%cS^XlB2&$%`k z{;Za;y$6!ce>7}QixTV3d>$NplelO9q12H?&PJ#_!|xC02mRuB;+TmQ$IK@uHRn$G z>02u_n1ECH5UL^}y(=NcS=Uq5kK&i^Z1rL}=ToB$bj|I1QeMZqC}}1mEK22_HtlO` zQ)~dbUA6Qx-WGJjac#jm`{V)Ca}V#kYxe+IW=c%boExafJOeBa)y-(~pqpG|%0>xf zesJZe-BYo;O&#KY>A%&chjU)+f38hoYRK+HyxV%1pO|_1QTFVheMUbQbQBy|2)=Rz zc8T9{P3~zfPIOm>!m{KxOw9|P0oWDj3Gw8>w1&aHz1dZi=rD!TI>!hhnn}U5tRY_V z%<%TKc7=lge9w>Vk4WG4gK7u6KL_h;=9V|3m5WHO)mP?5Yrw0QY4Gw>idp-}0>;H$ z#EReci+sSANzCOEqOUP#_Z5n1sSdAPN0;X9{YMu7CP&bS^S$(5xBa3&gXz<1D3jJ8 z-LRzZ+{}jE(*+FN1*AWSnm?kL&q!7MNVycrj#g+eiJn@zIZ;|8*l6Zpn_MAKa8ca~ zzqNY;1^ueK+R1zknVI4pPOyh#x{jH0HdgHlaNAXRh8Pwx0r>jivF6iHfpefh&laGh z7ByQ9{cV+mXhjYrq~<8i4c$9YB48K)`pcPIn2;fz5FxVL`mJEh>j_ z-p0qT_4st8YTcRaq_aQsf@vdEpOkM*Dev=+dQ6@?AQ*GfH*ga4nd9bPqp00Ica5An zA{g_qDcA2vvYL6}#b`BqJoV3{VRtn7KuS$3cRduV*MNKbJ^Rc?QAnp0z|kV&ktU>V zPo1*_gkE9&(DUS*;qE+clreVme8NKq}SRz z1hh0@PiLfV`^8EjF;c~rr{s0y#h%=toMo(e!yBD(nz^AE@M9^Gl7y`b%l%e`EqOxL ze)rs^-Z3}eX@b}#-JDpILD&L&3MI?ELq2|_shS@+>O+vY+0iPzJ9(uN5EsF7aOiG& ziV5N-v{&erDm#np)#kioeWctRA5OtShUS#M6_M5Zpa;u%etCG-*0}7l7{t|@{wA*W z^nE(=eFS`f;7x11!&dh8u*j4apq?@wLgMR53p$rR$V0dRgM=G)sEs@gLju6sbpysn zhr+l}T7v-^m6}YW1KM+kGk5IxHPk2?hww~YV8{yhukPjfpSb^$bOEJypnj` z8+7(R!!S9z7la}O4rq1~ibbcsPjT15NgTL0-M{DFM*06Y@OCB12R(&pZ0F1RAn;H0 zmNMzscxWb-?h3B!ktoO+#-;`Vj?L*tyc$LIK@21kFYWCazMa8-K7P@bEO;c;+ZuB) z`(40ikH^mJN*cctk8A3R*EyqdfIuT{fiKh>6K-`de3}wvlgGv-=`qhDf`n3kjUsyIYZPv zHWr+CChtY^ZZ)ljsnoQ7-Ub_-?YI6X-158FzHY28YKXYGx`quTxDvd~9rh0Fe9R`b_GXJoOLA)iszJ~$N36)c2rJLo)RK^%Z!)u!{1?AdonL1O7s zA0Gwt1}X8<2ioMYrr`MAG{tbC#Fhm(Hw^jSu@jqXzqG&fQTx^!b*4Mr@~<_DO=7QF z>l4?$JV7UVG`slHzbcPUK~doqqZJaFYe{Xc$rat}!C>)^vzxzI7T<53o~v`>Ba-To zSS6CD1v=GAi`nnlp_LF!=7knr;h6KbDiiYtc93Pg^Hs3iLY?YkWE?6rSKYTgmdCLx zjHAX}*=>qZgXFgM;9im8+#rjZ)>N}n8|+FNzpW!~h1mPyTwo3-mQp;mdUz1iR*>D@ z%A`zct&{#QDe;54loKIx^uhV}S8lJsfpGwFM1bl;)NC&E42k4!G{a%Gj7wXd$-JD5 zoV2Dw3M`%4c7?XyVBi!!`mfBol>NVC*4aXD`err$j|IjLk*y|CfhScQSAmF&O?j9n zW$)qc@{JvPpH=1K8|hx91cbP6-ZK}3v?c6glQ?y=XalJdn|bp7r)Sq!L3xC5Q=E*| z=uee&!goKs`1XmB^i8KX550yiHE|c2WOKG!az9ns27U=CemC?Y)t@JSgn>HcN3AAtHIh%+})c z@&`=gZ#jBluM)Fh8$v?wnvf7X#FNYn+}3Dmrh)DON6ZF^#e)3cEJYYFqxsOTbF_Kq z?FBeenWA|t3R7!SPFxJHV&u!O5D0665K!{HhvN|v3`zB`SN>6DV)Ou*%Y4A{EIX0J z+AW3gFdLOp#5sYiPyvEUdw{@aIksB^bBc(HI{MO`2{tj`v6{n|m@P+=L5~?x%7WKiNtZ6pkuT!Yz88<_a)T-bQ$FQgekAH-a6%1d+u-Me z?+_oI1PtRpheu8#i9^56l}<)|DyqX2)B-EFj?9av&?k*?EkpQCsFvY5m@W1`Q%TTGN?(A$)RG2;hd4ofySz42q|*oOOj-OUS%=)H`A|C3 z-5IQ{B5IUgn%>QmKD#B>gX)@NB*?ttJA(<>KX~Rs8iX zP7&hi-|2B!h7fau7|8li14!?NCy-?C zQLxQ^D)k+`1n0k3YXARs6U>~FdkeLZP;8?skaZPbR~5LL5~!DA@9WlGdb*>W>(@$~ z>2mk^0O>j4^ktVaNZjgCoXU!oeH%!zd4loN@NJWb6FRYOViOGW=i8%6hBcHh=tr4{D^P*LmlSN z`h=Ky4LbV!0^Mpypgy5BCrd`{XBbqN*5m)@VyX52DVD-ta}^UNHufvljKt52w-{(v+qra2agL*DjZe${u|RsyR2Mrd7zN-Srh<==(9 z5=A8IN%jv2M-8f-XV}3>N#hnOP7oF29h?~pL~gZz!bH7jR9ehe;`crqdV$w8_~I7N zSq=|^Q1V8pK;9%T;9%KkUy3;|HikS+OVIYSFUzfEcujGj8IocWoF6)3=~ zoz~h9)c|iVJl|Ig@T%<8Tt|gKCpbON=RJYcAB3&%(=aqM)t_K6KOVZ3DaxC%V!Nlw zBow&#u{TEQPb#zqQ?BCCq9ovPxFm6%DBeV~GdI{5v>5s%L}NJlTS~{j2&eTF=x)tt z|3);Ov1xr{uNF)ju-Gl%#{cch?tVoHjj6`X&yA>Y{cF$v+0op?DT=J~_s?CUNJ6Lv zQ6#Z^@`)Uzma7ysg4FiVvBff+0|m4dt5KK*{V*V!P@fIuCODcKr{h?hU3GG@YRD0o zs>EQlvb?l~4Y~6gkwX1%A&#fM6jdx1G>!9Lre9SG`S)fiPrPB&N%i4XtqNkI7AC8i z-{P{pcJC^E(vq0}J4$%W81^)*4V4$_aQYm!?^=gux9Jt*Zgv_M**W`^y~8$t5N z$eTZ$?tohGOIg2_d13-po^Ou{Z&+67veeaZXv@#UwVHbBIJ8>$cz2Y0O$=mj+AyAz z2`~n&VDhr8pV;-EPZX5A7}(n4 za+N)(sQ4$tsh#7rJOp3!!b(2Oqz{6;3{^RF@rM{6bC2WZ zT?o2J8dbE=>AL9p{bMFovxr{n|EP~6I56G6W#Qn&UjFG=S6;{9QZ4q)(?fB$RA9oF zn?@ukkS?rD$S~U)4iuThY#?>LSHQ^D<+A~wVSk(oOneB`#aj>Y*0v~mxvY-_0Hne61oR-*k#S5vep} z-$UYw3u4*Ibjs~-8lgtvTtglC2!elc%cbWJM}%Hf9V)JuD^NwC$nnUvX%r~NQ~E1P z+bhPKh+=#}7J342S*AhfQnm4X+QeQ+79E$|#IL5z2)|csaG}LAbSM~RDo8?C+@MhH zJMrK&!3Xbr&wUq-6pVV#zXZS2DMctZD3v}TvxxvRa#Yun-oSX`s9EybMhVtg*BCEl z80{@7V^qrP$olC}7l(JNB)cD+1=ucLg&Y^tT;=c+>fr?m6#KUB8=#x;glJu2bXsdo zQk*bX>)Sj|vi1w@MairG{lbAty3v)^g$1SAY&$4p`l~By<=3mDOc#VpeSeA9_fk&o zxFsmXsrgxNv9h!$L`RozQ*`~Bu(#)#7 zgb?q|>xH?;tbeU9KL56?{M!G+j=jF}>+4T?pS=8Tcdqf<$CDZN8o$||`}b?yu_cD~ z>Kt{yR5bQ}@Q769f*Fri9!)@V)}!`IZlA@+TP8(`Wn-7pS-bVoJv$wFxsO*!1$W?j zM#g8~{_)=Ephu1AqiL(pE1XsjxnnPX8L6~vgxuRvFCOxD;Ab0(-qgFIq{V)8Cp)YR zvG6NvD?1mE2IxDJEp_~hW7X}J<0Ulom4z*A7R=@I#r!w=;-P5eyo(y<{Q2e|+c$U( zX>fld=xdm39D<9v(?uI4!ZB1LSSIUa#*|jwg=m|aUK}H5kq-}dT+n(S1vHv4Sh$`+ z4YZP&j9f;m$EgFb=M2pZokyoqJQC9^HjfpJ)&yHN*`p6DeW;)M{J zh127m5rAhi9JESHmGoX&2>o%$+SL6)4ms_QZxMmgJHxr(gpd!Nin*RAZP6|ru7`<` z7%MjCQVto_tNzL0BzVq}W^R&wH{TFDRwI;d=)xnzdUZ@nd5gUBwucEzHSS&ljretG zi?g(WWxX`$Ak=tMbAlxK#Gru^kBOhn>zj;6`M-%a?n+Z7Yl9kU=CL{i6)O)dhZ6AE z1RdsQD~u8|dd23p1ZtI%qQ*7T82S_|;^4k}ky0K$GPGD9Kj8*}ZNCHJ{#`yuRp$t~ z6k0Ee3k&&!7~j#rJ)M2|7N>Fz4Q;Ix;ZI0i<(f$$?_Trm^d<0i~ zLW-()*|FTNAZulR27L?hvBP$43W<_nHD)^oN6hB&oZ$i~d|BP^==KwZ76n;^$iT(< zUP7gIV-^&;=13peGVN!~4*W6VOA%ZNsF-P9&o5{!u4?MmbqWSeI{t+kw*frrT&K8^ z>*(Q-d4^f*^zl2DuI5L*r6||}lu#D)wt$UMVRiN$Eoq@)S?Vx2tA-Ij-}Ga9Kzm3B z`5`0$7ivYWqXIxXZ3F?dKNidnw=+q%hSGAi#JeGiyxl!22{lZjgj*%j!Kw%ex^aER zaGrvOg1tkXF;}&xH62PepYoz>-O$<~$#F^Nvb6d==z17e9w>zlw@!Cgi;> zV^u2v>e?%WMi_TEG4^81Yy!an`V{z}PykOYT&I?uTQ%J~5#g_HWTkw)cB7viJ1Cuo z33F{K<;+}QpvhmYe-J$VvWe7?*CRzVZ+hi*NT zqK|t^j;tC7&YHneq9sMBT-IZjdDBz%uB9vPY1%1roO~v#XN~QTIj1-d*|Uiyndwil zJUKqZL~+M{taHFwRH<=FRcLD2jGUJ(yGZQh%nIGfl zvQz4;iIY=CD=ORX6KhnH&{^!&pNmL+B=DNGV5hRHuPA< zPNYuv+|KAZ@;P(71RWh$G)U0TdKdC-`=ZN|$YWWQXngmbwy6DT&bPgX!$&SzS?~-; zjTJ>+X}L9J@ahcF;$_{v_9Evz<1D2knu(*MqgVHMEVNJ*DW9ux-sed1+{7nIcE9t| zbDah%?D_g7tCE8!LJd5-q1@3S`h3^6M&9t%t(Vm>onIz62S9@q8!z<}N1pHoO9|Ub zv}C$L3){&P-|BzZce;KTD8KTJcK`a}2Z#HT9hR1sL}yP*+*sznp2`!aneYc-#iniS z*z`KTatc(zV4w9*i^AJnPeHfl>!uNM-yNbVq_T2x>}HPCrRgpizfbeye@ME#>mumC zdl&X?duq5O6!~s?j=G)o)CWS%82Y-R*(5+}!8U_#Ki}G3Dxx~x!U(;#X37bV&9W@; zG(S2Ix7g&WH^B2QnZ|hZaOMkRduRSXVXbCFDq5ZpYDL)mpf= zeS4sM_C-AM0mezu-O)&k53FZ}g2|_?bLM5j1e-&Sl-Wzd`x9Y)t+PD&+uMRe46#qu zun9S4b|Yki1nY!!snsvDDE`NqtgT~tx6#;Fn`a09ki6B}^y4|bs$sQDH#ynCm(kd0 zn+jCq`OOJB_6}_A*0sULz+cVm*k-6eU}h?e(o@P6@zV0Hsm7#IqIE|uYF!VHB0f7k zl%EB|$QwSp0LMWC-P|&zCBv&`-~~o$rP1=WWM3&L*sO7+!j6&+y!&FS>CdkwRkM!2 z-oCdD%Nq;!4%@;?$U^0MT3$B5wfE>J27qUXnI z`+)2eS1wwHdq#9Z@m-Al7$4gdqo$qZyBc@qocM<1($tf~b?qt3a4O4rIF+)7%a#Xk z=7Ie9`t~1VmIwJ5Bj*<*yDO)x;fxrGsMm!GH9^c~ba7e>PP6#?5pYSM+-5uDPHI-v zk<>Ex)nGkb-`sd)Hosh6Sip_~Zv>95*JPclZio`~qEvHU{_?7FidDr&7WOoM>4xp| z{5FfoYUbD}j_-v3DvrPtdkSS7#&L zDhtLNO0?K!KSm2gq(!1%yCTLrniC-^x;J4wl2MhvJaWY=bUz2g68kdmJ=AgT*Jl`` zL-Wr;tj3C2CUj_@@U0u?f9rLGhY>ck z@ni=ofeq7!%~M=`6pq0o5<*KTE8TD=1NQ}*B$|wYI;H)3X96EUd8zozx6(ZC;B1?^ z1CBPHUU~x;_r-hJ!)BVU;_n;v3X%^9Nj$FE`S5|c{79Cu5$1i9Y?i8<=(x3 z6C0Hc_&!A>zh0a}xUTNU_FN}`#o~Cwj}s|~UVnLYDutEa6^ZAYgRu1aaxZMR`Vih0 zU3MOawkW3Dviok=qXr;ulI>Z)G(7%@|Lp8YrPG$Bh4^}Kjg@0SO|kA8p3i+AnP;dl_2s$nZwx+S#7}hS{RM4Rr50g41~?vZK5@rjzvfhb!TR}0 zdFR)~h|z=`URrWg|2PVmZ5~x+*^^(0m4m2AES|k8>+WCHhx0D7Q+^!-&C6{pIF>Na zjmWbD(9GvxF$e!r9MY76(Y!tEesiCk?vFQ@Ope30(-hnhu1D2JgRb3(mr%cEb*L{N zkkxBfru41=vN}}j@ajsqXhpX48Dsa=g33<`zxm!5RFs>Zg$)3Be*;-rS*-=lh0`7% zwfqz^1olUp>bNUd-3RIRM?Ml7dHlWKVB%0r|51<9oFjKRvVQ6ef0MtEzJN z%|GLbs%Q$f#FUA>a=jDXkC>`rk0#^4Blq!n*n730-99|M5%-ZD=?1Im;LixEr~*u* z<%$EeqHzsC6yi2AZ_FPZ598aquPiT&w=yUkITSLOrG7X3UiR==bD>CX!3h%U2fn_; zuMxggq9NtVUECO563@*LO*?y4ki?d{R~oh*Pfm3Uz;fq&_x?*7HF!NSL{|(=r3?O9 zb99Fjl|TgJMA6mDE63qaKCOb3x)_MpGuydxKs6RmS=0eptmosq1QHl=>Pj;p8;wFW z&Hxe<`k7*lV^h&s8-Zpim0y-X#gySoW>eikTDjm@;5=k-x`Re| z#S9J}6hhg#D5{@-J11uI#GX?tVEF9edN%BB7INj>k6Q{&B&L=)&`$S{<-gbqS)i(( z6ASFv8<<8es0FmGdy2SqcN;g?pqj@s;%?=`iCQYGZQO223P(b^7!%N%UzOf#Nr|5$|$gP-!B5_ZsgX@AXw_JaHB61Y-2Z;=u)gy+BS zDsH_3LUW?A!dQ^&sQ1g015+suoa)+!Jx!;RE=WPrXTIYeu7gH!h3C5K_=F+Hgs#Ff zF0Lp=Qa;-fwlmcCF0S~XnKfH%x?ehFl3xSs`=o8*6N}wj+frDYe3VIMZ})!v#7Bp- zV%x$*g$oVsm-P8UzKAwr&nb$>m@f~)Pl8ZoDO}fk(TUJi3_yp_c5Hn+HX|RZLhop|w zYevA`=?8(3i@u{I9*wqO+aVeJ{1ZDDTrsyaj$4B=sc)w5pvDJTJh_%k<5oY2u8df)|*#vQTDODm3Qm{9SS2ir%sE!9B?p%I2I)1N>tGWQy*De&wU z*-1GTRHeycf6~e@+_xN8T*m&^N-bF07-=pWVfqTciorq8hmK00NHkbF!qcZ}Ki~qm zomdZsS}+I z6DW=LgBx(oY!e=d5=hy0@K;HD4?idT@viECGLR&D1A%Bg65@%)ZpiM3r)OW%C zD?%Pe!SP4ZRs)dE?gtnyf|n`)=sm-*WA3aWdK;(W@l@Esq2b}ypfh>qtq%n@bcbSm z{TQ@}d@*_=JSP6NnM;`7t6?J2XL<}Z_Usc1;PPK|fg|~{wx8;@hYFnXmO63J)7{e$ zLYR7RI5y@`ye!z7_-dNgoQ~ZA-;J^s9MDd?TKQn3q~b_DIHxJct*s#uUJBQ)%m5mL zEpb(vB~f9+=)hjLM_Ql=qsexjX~+$877SD4Wh@@+!K6Q+fRVpBX_zjTuzTh_?Csx4 z!sS7)_Z)PO{R;5=NZTzvY+O()gf?25r#^6JG4dsGF=ecq>pP528rY|-wQ>i?xU1gp zunq-y_zRH*QD8CmbK+Cv$JVzkP`okAhfL}Z`ebs2;2rH0zY9grJo^m|)9TE|TOy$7 zS#FKt9I%jaG*!;jSi}-|4Ccppc*D=R^GNWep=CfBwAW&w3}(fk-Do5$^7$-8WPY*) zOwquwXp96s`D?$8pJ3r<3ORPMTlQuC`;BRNQ*HNEFdaBo3;ZicQ_xpu1g75mra4G( zg;zTuJr&5M9)@2>n`n~0oGxT)NT0;EO>Sx`Plo2)oSrbC;X;{yvNOJ%yPwNypet39 zZ(r#_miKoyeGH(eVlH|mVB6g~#}^S3cvQM*l1A0t)0_nKgPFszD~#8sd3y`)Hioe# zJjgL~yyOcpDkO>6d|xRfM-uo*=PP5e>%d0vHM8!oeVHaO+n(P)%QlNpB68HSP+O;m zTzl)3ir^!+;mQ^*=UBj9N;Su z#rLUbXkEqfolJ&aTOO~xH%y{76*&)iZ~h!lr7NR(1@}XCK4SfBGgb-nf!2F4N9Af) zuyQU^s?a?*a3@CPfGic}i@^htYY@gn1}F~C%pY552?ZmTL0f_XT~^CY)IFUWz21Mi zP+2(ZLlKnm{Zn=oc4e^e3(-Hd7`p8y1b>`g$X};q1`7xkn(&MDXhpHZfw6+r998^0 zMz!u^W)l7hWm0e5`Uz_h1Dx$}xA=4|;HY zG|Xn-JsWIbHwfdnLGLkD<>^S#T3wE#R}8}rBR4Ml z!u3GU%c16L2KXj1PW|4)$y9lwjYz3ZLy16(1%Henp)Xs{ME^DkWjJrD{1aJCAJ(WB zcwRV}=+=1$p4ADv1?7M5KtKh>G4CxC0#WiujJ8#lrB#*8NO z6Pc&gN1-k)(rIJXy74Sef5p0e0d-Z#4?wE^knFM76x|Xs=O2Ki6rQ%?%h`jYHu(E+Ja4UafO4iBtl=&y z-aS7g;Ci*xW{`@Q(&fT=E?YGAGjm02WsDXl@I0NLh|3|+pXos^zl6O(7%EG|iz$=p z>Zspu|DsxO{V8G`%s4oaN((4RGeW2zmw9T(XVS&SCFaL$Q1})KOG?^dWzke?KlS>q znO)sgFJHiW2DA@)Sd0$lbt#ltsJWlQ^Mq7L6np}x1q*dTJ!%HxaDFCkT>Qi! zRBJ20Q9AR?^_{yTgtZQbB(5cMT8OK*4ow!_GH~8g6qQPlD{-Uv%eD7tmEG6?;S8xJ=U$p(s9PcogYnw`EsmC}RD&FTU<>UB+3?ud-}*^{28e~_@8l1K>VXJ6+nnSR_9fXN5xrF>O%Q4@ zd<<{5O>Vo@B>*+nz;W#xddaPx;!X^W73 z9LXx1t{pyN-@7wlsQn!g)+)J9Vl$Y*!X_6bY(!IuA8%o+Jl%{cXbxN8aVb+=J&r5R zI&xCdgcRhQ=Ms&^4&LF#*O(9IYabfRQxVQ5W~ofdE-d7Bv8DF+D^g=s5>+HtpZ}d^ zqoT3ppE&L>=)H%FyWVv3v~}Kx&fuK9k&9)9tD1h5;G$W7i>PBfHIB&Q&G8_MKU5Cq zH}o8>*0viag=1<4OeSzvsvB;6Q)MaO@Cpy@yZ0RPU>c2mBF$F}4;Ri|HH|njz%Lp6_#w^S#0OE@f#kD48a`C8i2J;`AEWiC zp`k&mo|gZ`JBrk&CsO&pff$r`3rGeZw-lSaM8%&wWcCQnZR~*H-yt_EUB`3Kr_r+4 zLClZA!lfrOErg!KenYH*@3-8|UTD<#@?{7@Yg?+|mAT{0reTzq7?JYggM8&umCh|u zd}1Ka`XcgdaS+4T6uk(L{QQt3s1)Co4@FnVSiX?ci{HgPii`uuS~yc>q4$+Q;WJp& zOw2?U*E?ka(2if+=GwV7a4%rxLWhI@2>CONrN$q#E_p6Q zB`pWMTW0IE3%I{mE9P&6jSAkaA%BrW+2!)+=B_QsTmxnIHR}!Ap+E z>#K%84-_){sqB+st)g64;E*JrmePXjpESq1u>4Q`nq_P)3vVeckeA1gy_3MjV0P3;n z1Lz+_!{cHU@esod5v+RUl7VhPDaRr~p#gfQyo8jTJl|;HoqHVCN;m?=tcYFdvchm~ zusXE+eO(&thV_i^x4((Tf*wHy{_*mFpum&KaA~@U!KkL5^o6WXKR$WGIt8o1mOh^NI%l@}UB zzcFc|A83^wE5+$e9$KLim@F>3tyCD059UaY6nJ}if=tFo*5_&dv!?-{LQx|a(GtVQ zJDUNej!lV6sZdwJcc~=WU(K8j-;uzSz>tT=62nm1uDua_>2SlEYwta!j$6eI`J>Ci zuf-~ZMH`{vm`T_|hQ*)!o5yeFWm$crZlhv{#(*tLFH111Y_2Yh{>)-?uH>IcZFU>W!t22k-Dy3ckv*_!TA zje^BnIH^4zZT0d(a$OH%8angyQ|CyH zslyxcxla|}1AiUo9d_MNDzMLuFM2R`?Qh(?EmKjnFYaMza^1kfWA;sa0$Rl7P8h0O8oUucrtk0xXWq^~HL3sA;=Y;*W= zPfAEzc`k8EqaMLqiiPQPja0xgS9u@#ZTswN$qi&3#WznJY!&GF_!Yb#IAOviYn*(w zWvNm=L&DK)RSz~uKQ1Ha^uX;u-lb&$v;KW^{Tu7~3ISVxXyYKZ2$D=b zQkkKbh`_ms=hXQ~tQsk%HUN*^VCf)K_$$J>L}g+TLN-MpD$4d~bjV4C(;u0P)oert z6CzJ#(8trLOCdBq6lk*``fwhUx~_ALsZ0C-m$_}gD>*+w>DnH%+&VbrfPs8-?>=Kn zD2_e)cN^?ZCgP4=&3>=QkmLVefPol^W7A;LSw?xcw$HVtls>(!!4xE7iaY+V*eLqUmev|w)d@wN)4R?QqrA*lz@nogg?4L8bmrI6lsu< zRJyxEKtgFL3y>0|ln@C)8s6`5X6}9Ny?USbnKf(8@*fAzIeYK#{v_gnS^~oqp4vmq zeso!K6DLI+QytQUk|cizz(KL*)XU>&uDU@foZa(%HMz97VS#GP*O0D^!|7{@v-n>} zX`Tm@M6_xBSR}CLNiKd!Fv{t8z?easqMFdt_RzdDu3!f%{;*{Y7=G;s>bGvi;tGGd zUjx-PT^AjYOf}?&s|%Nr$-|#9Q&(0>Nb&ff%EARM*$E_);}HEa{-Gt|2S87x(}itN z3qhpkSttf&B?0)=Fj3VEURlWj;k?Kyw7pnoKN3T!$)c z<^VReeLI+-$KAZ8&T4oA- zrf*ua=VOWC;3%uBs}p|9YE}@mYpoHwyvIu$rpolM04hkoZ@-CN&rcutZx;fk3_c8e z1UHQ^NQU;Mlq)WqPu^GmodvL{8o|1f;?xLwEdeo za)p0nrDW+b|9GFL93I;DQVN4Q3CylZ(2l22XqMHN*u=H{ND7N)%-1Hi92bPEqiUQT zWZSMbzg6oYFmbd4nljN-U>!hsRYY`+aP~z<+$(lQ!;)P-6;3Wru=(jBW5tdG6PJF}r%nnhwq6?QtVLy1_dHRjxy{5eC_h@fF5Xec*IQkPF?ZuGaf z%TLYCrpkS7$DabxX?QxLo8iwBwEbUX@wM=EHpw?j%c9r>ppnM;J#6!H-^RFBqGfP( zmSbz4WjsFOW73D+`u)#FJ1D%Zft*jvWClr~*MuX|oJ}f#?ArwjU?qH_15%pPcYHoBBgwAnh*T<(_9TbgyF;Gk>HFr zPzA*j$k$sjv?$wYo11nG&2-aDInQ6O{22UBYv_4Yaw;(6S3teM(E9QmWygc9uSazP zw%k$+_ofdFO%F;(BF^}Xw%G#lo$e@=M9yn^%?mhF-5RLwx%?66V@y~4Sd7O53_Uyz zu5ijQ@fKbqU=nj3JH8;ZJ?0z28&}H~=sUmBdBZiR7vZD^|LYHKj218ZmpWAi=R#&c zJ?HEatL z-^ZL!Kh=f_f7Q}^=Ha2C0G>7jhNs4a$_i&mdpqJ>Wyjc2{yMVW6-nsn@Al zC6n%l;UNi}Rru0aM8uhA%mT6g#w5E&I+(cqmOsfp!7TUMzgu3Ec=vIs8C5SxjiAwq zkR630`{Vex+|ADmcaQk#*>)Iix)Y2#GFZ+2RL!bT{)=k12>$@7l|jhM`U$ukOBjHw zS60woC#s_8EJ;giW+vScR2@a=A>;tjmnP%Cd7DF|N01)Bh^EeCW7g&0s2fA#tN6T0 z9mv*t#eOdrQ#rW<|6E3keppmjo1E{^D}3YKjT@>jEs>_jf1r-t|I7B|i;4f;9+);n zJcz*-7mD=Wg=)lh{}kYcd;KZE4aDL>#|*%XAfZA=77i7gmC|xtsla$9@Zv=3At%Rs zBnWt~9aog2Qezb6lk|%)#ULkoO}#ynm4ShQV=h(}fn~eZ?5r60au(aG717n$)B-Uh z=^_@puj=?Jc`N%a*P{|>OtHF>B9M6iZml#8^9@;V|Chx=B6VMz+aIf28T91;RPWMs z>ql#O>$G9vg^fbICz|0Nx8Gi$3n1Q|sXrj)A1RJOh78>@Q6p;qUL^5vmh|?|M7k?3 zRw0Cl^(cf47#n-j@N%aF10ToE;f?amMI+Ms<>)SKr5j?t(^|n)pVpOF+$Yx+2L#`u>;eBGzx^+dEjn{ zp5J-cK2;99Lj6l+p|MLHaTmqCIClPcrlI=cPIYW^CF&+JHn3+Sd@Yeg;?nlZO1#I%nG3db29zf zuA4xQ&h3c%9_}K=mc(aHm{aabV^A;LX#nkLB}vjac)b(hf?jCq1;t?RAQh8f0n|GJ z_KizkM?T= z7uZorJq01;t8s#b1akESud!HxzVKA`hI_)fyqcPu{$Y4flJ{Tti3efySCx`vmS9rV zw$CDWabv`4(Z^s5coyhSnxFSyuEd>*mb;Q3{ryp^+0x&Zw0RglE$mS0*Z>H2lk3;MK2S-Ed zV}1K4=F8+^Z*RY=C}86BK231EO7kX1W~%Rn;Hv!@6-w5f;Kf^)>v0n&$49?(ef}}! zZN^klh3+=(x)V>`cJw>VZBg8*Hu?7fdBPT1;L{~YWGYhpcPhX#365>xY5rWv$ zk%GaHczz{gE~Wn=xvo*oSW~da5C;l0rs4VS~9c8vk$q+BqrI}ef}L) zexHWUQBcFI9Q7L?iN^Jeiu#YRg@_gXuyfk*w&T1$6X zg)%4um{d5uMGoFz#Z$YD(pkFKZB1C)*kGNX-gzgYVOxy+KsKqTeee95*3kAu>j)8v zOZymAvp~iR0VlKV`VerUKHITdoaVqBRYB-zj?Kn?A}uC;Kxk}gs)UNz1mJ|;NKZS{ zU#*=U4K<9uQ~>k>vykk^EIRmYuRVod;{f^i#(;GbgXl5?fTWf>`M1Nc=db<^ep-y z1R3W6dhA{4uSwV^Uvsa_woAw`M(;5=pkMk0Jgw~`turg9v?;BFhUJ?yE{MB254~t4 zeIP=EtFlW+#_Ue#DUpNdjLrCRZ^l1go|P5G|Ka8R|46?}4p-JOio;IDYWibX7VubK z28VhUdDdDyK73Y~Oj~p+VTFcs3imS~wb*p0hm){@1wo;GIf$hD#bpO3Df9ePx88|T z&TZf2m6hy|JIyF1jfsLi6RBLA{QESMzrLGmK0TFn!_3(XzJLItV-_R73Kh{dtV$F0 z<-+#A72)x|)E6_FF>g>0)dO_R2A-fO8>T9Bd}Z&$s>J>Gk=%J=l4zToTB7EgH8a^V zOYE|$dqC6|b(e*>9PVy7rnjtxc^DKQF$scpw#04gSo${~xfuq&;zcb6$CcGj7aD8~;Hq+@E?*9&_KF-r*1{#0jk zu=}dYQMwXJpz`C6bxf`u=Mdhyu}z)RICS$wHk>pU@h+y`8*GqO3tH<*whjyn3?2S% zLA+QlFjX9U^S~!uSWnejGVkJVw1!2GPF2nxd~4-K%0po7vzqqmhGvmh10zn(dPJPB z**H>Dc#Gbq`BFvGrTfZPf#I!HF7_68&|EaGG9|pCAWJ+>ASqBdoG3A5MyQyz{_6ak z38CRoRGbjkpz0I28(nbx5i+Zt?($m06j_nDXZOLMsrzl#PRe}P>Br(PzkQ|(CL)$Y zRv)iVFV92KJ1|xQVa&GJJhN>W>^6z@(oeJcoM6J6~>GjkN5&@n=>p zFVV+iP7!Hkg)(hJFmrYp;Oe}EjB)F=y9&f6ro|3Iop>~%Ok)zueV%MdT^SL`ofQ;w zp><8ZH&n+!9shHo+CmPL+S&nEc>Xdw@}^NoD4tWldDA9nwx!LQAPWzZS+cdU(G6kJ zErjix+eDP7GbB`HP6kKB8-C~Ydw07vu3e2Kc`JhkOTrigeP)|xeEgM+kKZJan6I~6 zF_%}V`^af(zjb$JIqHAH&;D6;-@t8*F|AfyvI{DQ?=&nVxb(ha&M&X4HvZm*&D~L2 z7Itt)G-9z^)u0Tv-V49pb1TAnE(ZpM=V0)AFmlH*5SS1^;D>p zHrK!)`eJm~*K7Gz{yRkAeLaHwZCO<5{k@|4HMi!sk6&so`0(#&Qx5|`_t>GdB@4dO z!WQ>6=3lq}J@`TY;vbS_AXuuTv8JVTiXoGgIF@OuUsj2Np*Dk%ai-Z8Jo4`KBXTW)&Y`JMudP@ zKD{{ezG0Q40BJ=EIWQpE`S^{N?@Zl}PwW12u{DVDLgP3;Ck2ND|E!0G1ddcBjav03 z|1(t*9Y^qhk(5v1S^xSa6bGE*o!$6j?mhTn60W#?CFeC;{tv0L<}1<740frjrol|y ztgwMP5}7I>8_6uaj7vGzYRqSjW5SsW2_#;@W;3$Mj zdhiy?F6;7X-l>hzK|Q z*w*WM3ar2-hw<0HSs2BUr#5bD7OZ#g8#KDG-q{PhD@_HJaJ(YD5HOKll0>*|m|3-K z7$;}Qi-tj=4)VP+)wcyU4CYy8hr*}?4{34N1+LndpROT>CgU6$-(3^Cm#EA2T+5sL zm279@zhmk7itnMod2xizjvAC~H6UOyHhkpY3#Rnc>ycCxVxq3gad+V79-U&xu3uks z{)xu}7)&;7wEr&g9f;tby6R+qM`q5E#SdSD`G2WOc%Y$Ryn3Q7DkvnF2YL}k++5~6 zN>oqc6tkf2@J`T`*;h6UG(K>2y>nl9sTF;{1CNO{C_~x+ZZD3YEXKdJ&Y&UrCo(Q~ z5sK&2_U%Mg6w@~Fb$&`ha4rxYZ}nxvNCRsAiK8FFA!+=mACkr=F2|m$>;*SiwEtQv zWnX<-0Xech@xK~q9E?PFSwm9PfVWH^2fGf06=yjO6{uN*YR_tET|QK#FN{=EBeQM? zA{&1Ma;!`$EJ&X0z1{+Rqx-Z0z~W1-+h0jxUA^n=bE^IIw4L;V5A!D`|KGICdBYr2 zo3>BtS1TGZd88UHJzhWhiSpp$(pyEyUws)B`UnG*F=F}e_r8D0cYv;PHCNy*@)2onWv;dx zb`|(xwvbv`MCC4PVh$-QO9)Q7MF~x{2#8wUPWoMJ4$1joF+yqyR{Ik^2*7E@!7Xx- z{2S8!Hl~2GRTw{Nm*IBl6*{-;R7t=U=5~aW7+K(qFUpj_37=qdcHYyv1k%P=fZ0r5 z+Xc{A$)WYg4e6aaT<}U3t()!XeT1-~p5H&`!k2J4L9w)Y@JgwNpYF#IUqXRPMM|Ll zVsf`P2~=?7K|vuj~3XSw`PS55!wH^O>wD0%S^TGAZljkVOV5Ek|4MlAU`d()F#F<_fLtl z2nXM>IAV;*+MPZ*nqI2xzNm;1vQUEBdPx%lgk9Rwht^})2Ck*~Se2W=b8f5+%tcAI zOtPDkqhQa7ll`jj>ti44X7OqNR$7z06-Lvmos~3wtv}yTvrV>Px=D%}KPAS^g@kf6 zLJMU0`5MXHPY(&+*sUIq{`kTyxhD_(Hi?9Ok0N<7cNMph;HM?P^sP&6i?`9$_7r!? zz_Q2xQtKEkm6lC0U%}q?WgRz`NMCMDNb7mkNgkOb!kBcv;>vrY$;Xr@g?(^ zNQWpieK^@KKT)tt(;U?=jeb10@1j@>n~+a~COwOBOOroVn{>k`$(?~COJ!FtCh_#c zg4b;C$687kw6^_i;YyX*!Yn0F0j&a#|Rz{K0x#NZNPC48GPm_ zr$eUo3dtGL>vCY2$>ps1`kT{;Nj35mZ+%c+kZ@d@FkIU}E{?B_((I4sbF>EHpOA%Z z?%@kwsrOKtQc(ZYUzCR5LP=-CzYbB~IetScX4kET-gV}(x(jBN!+6C)y48OOjnX3Y zn>E-e*&8_=BttLZWMD$fH5f0>ohb|`5qo<(&p^s~Ga|bRwg*bH_gt-Z$=W*heLgVS}Ye||N9!1?p*!yn2 zcHjoD4fmxEKH<-=?+y5VQ+QG0V8ZMLJ2XSkwU{0z^MI(ohoQi#kDIOK;Nu#T=HP96 z@IqU!e|kx0xFIPJB6*01Q#|i+o_rOfP{*vcANn*1H9~}ulcs9GevnLLt?^#+!RXsx zU$g+3tU?btGxC%2`NrK58_LbzrPLITh}=nkirg)q6_`H9%YB_SVC0K5CaiixviJ6r zWLss`nLs!kXB;a8QneqoJ@`<>BMi{FYk`yNZR3!Hsxaq6Y)qK~mjValVu&o!o zB*YUwNAP__@M69(MP$SK$2+Y?RChcN98dTbFu(F`{7D9}N<4j&vm8tPZ(I;THO56e zYCcr^0^cxJ`~nG+@E3m)Hq$&}E^_dr`HGNNG{@HE>9&cBwQ(O}8~m7@Td|HR_Y+?p z-&Dg%;?Bo+8OQTWN@GXgX^{N5V%b{$CIQQGDd2@VzAAnkb}wEv`$X{JW?7c@+k?*+ zpO_(OUt`(>-@*ANu*tsp;KfEe8nzvz5rt0QE~LAE@$QS$mGlcMf$q*o;gzDM>dZ%{ zv~$a7bL)i;t_E(oO^{ZTIPkHc~ss!=6pW-c=2ap zn>ifDj`^jSY<|bz)93JQQZOov05;@I`wE$CJ{xx((Oy}md0K@37{U76l_{OF1dJE< zv%gK5%W$AQ$N1K3BD>G3TJ|TQ#T}#}&YH*Pi8ewmiN++(>dL0b^HOemOG|QjW9A)J zeB(=3wLH!!xbB09-)d(<^h=8KJ0#H$S8V;o&=icl=@lA|)2q1j{GWQKF1MYRKCfq9 zS#(?WQM@fA-|4wn-YY3Hc%DFw_2A(&%j|jj3qcuGa4(cgTWuJCD|1Po zjXuq2BbD3_-*A$^IE&s5!2y))N*EOh2Yp)$FYjAzmOA*bH`;;wGt`-6+;m}|?#^9a zDS2Mag6MM2f)Mrdg>vX`e^P4N52T;GD)ZW3Kl~@BhE@DfJ2R`%s%NYlrhj44ht--K z1>XDbNEVIM=t4-$O1!0$=EdkguY{57c~lbS03#Xta9Xb#5c`(h^_qE_hK@q zyjlL?XNP$i#6jdwhb?spsv)Qz|K{$aiq#KD&aA}YJ*T&_WU^^~!P>c1SxkcffW~DU z{LU!xck<0F-s9g9c(k_R@L-J4Z7+{oWoK2W?}wBKctA95wwF%uMB)!ltz*PNiqUHd zTvGvsz!mpZpw^jY=w@rU7}%rWj_$M%F;jWht-zgPse{;PvlSYmc<4^q2M7 zd|aj=)ya;MI7mPqic}y8P08U4VVSe8-m)bSK3TU&DP*TTre~$B`tpgN9AbSUWjAtB zNlD%eH#*Gvnq*%}Y>CZg&uAg}yLmtzWJ-viK+NoKKLLjjZBz`iMFbw%b{OOPJ5pz{ zNN(E`puoTWZ1koTIEdx73|uwF^5-%Zfjtr*$MEuWK>%=9fGx>@skz()nx`8yzV*Tzh%6 zGc7srgrB(sH?$-rlU^#&z5zHXqxg^eQ8G(lXdNd{&V2&G-d6 zVjb$(vmk&4D4`|D*o|!Gz^b(<0SZo4L z61|{J$h#9mf7?j{tJVD);ov=1aR*Gg;Nd$Ve6@l6k0%Lv&ER=Eb=6ARPX#6 zY&uP36@O}@gsiI-AF(Q%=E~D)Dm9TaBtUgS967H<2UJBEVY8K*4qCQXp~r(Iw0Spi zjf&oPM8`7T2Ol?lgjCZ0W0v9>0*o|B7O@K!beL+&3Xu|(`iO)8*w+qgI&tUOuXg|R zr@Y|W{)+--^jA$MQAerJY=WX%)dEM!+By|IUO^+?0J z+p>lYNI9g~tUK~iX`7XqIcA%jvhp9D!f}S|sI07Y#~!-i9JmsQ+gH6}s!Szgw0SqWHHAxE*<}RF{?JChhRFh__zgzp%WMUhiwLy zu@!uc5IGr(bkcR+BKT^)EiZDgUGA5P$i=MacN>m=Oe8w%Sx`SP#7$@gU*J3F0V6!7 zw5yOBYmIx2TPJpuR3FQ%f=iHFw@$X6bd8AV@~TcYr|kmB1(xz1S&C_nYht*dV1r<{ z>}M90GCOMA$!%a$B0#f%>h6mNOIhJu`cvN&xBZY~$46swN4?d9k#b_C#mHxSxTA<5 z;FUSW`LXo7ZJ6mgbkeQiT#YOn$#({z{*r**SlCat3yZ`auZM!G#M`7Lk-tNyC0h`= zp>OQ8&ID0*kvi_vWR2|CkWd1Y-Q6zrp&zs*q z^M*WUcjL^}9y%m03wkbcYG(QZ@lJn6dgW$hg2`IhTX-yKiq%zT@SQdJ2~@RCwKQKe z0u+P<^{MA@_F0e4wZn^5U6P}A&fRO);0qrVs_o%-hY*61E$Fo)ea+wY;P^QSZiVDh zPVDTn*wr`^l=gNNsMPWy1LX=b=yNny`p~c&F1}7MDji#Vn;ko7A*KJefva@yS*_uB zh)>W99ee%jT~O&0KRTA^Jwd_CofKUV(#09Z(&66_@`s{uCbwsNC?~rGm7_H_Tl&l7 zl`yb!E6k8iOtZnyRPwKAK;<_xNQ*>mA8OpwJIArweR@(!-M^O)lMVfeb-O?zYiy$WjkH(XZv!3c zttRQLQ47P@~cb9SE6~;D;o3)}tPKp#5m-kwKbqIB2PTFeJi zL@-asKU@KTryHihse5}i2rSJ(bNSxy5G`+o`yO|}m)P-YEGAHxH!IY%Esi&oJ&v)g zAo7QB{}emws+!0MsZ?O;LtjYFfnQL2Ybif{YW8$B+Bv;FTHhWdM##{2Z$bMQOm=#LG1KKP1`NU^5=gk#~-{yS&WE%l7p396ms+wtDOk%7% z*U~CkoK13e-Lm$?EwQa$Rux0fPpIQb`(F_Eo>!jlp+1I#LTyEYl!fQ8}zy zD^KYOp^Wq55nP^5CUfw@#i$wkO0;d!qySoAkD#=(-FF% zCH|?0q2!uAz;`WQ2!l0<-Xw~zzc}OgiQgPIWg6}v*_fFRSABTuz9LFEGr@D_g8~RF zW-3A?^#{Yqmk0^f^L&SZIonu(da3p*MSxur-?dhdtG`WFUqSn`WGm zZK**btzeWKV!rbEDA*NE-qF? z;=@sPi6~_4KmW8v`0^R68@{9^18AdI zr(s5TJkKm?y`i<-H=)vYmPp4R0|2PnbPTq})7~5JJGzV{@4veeK*%Jqy=wvIvOyJ)#;bzjCQF7j+P?rdve0{jihsw+)n%h-7xBqdu z=I$I-JYwNEf~U<1aVd>1@*&)aC~q7#7`@$F!XOQrJLQoe`RJl}Bg@Sa6`nW$W9bMLB_8jD6ujM@w8ZXdyu8b`8t;eT=-gk;) zMM^E|#C)3Agm6a@nLzpHKb>{>k67sfsy4UU7iH3jZ#8$tpOT^U8IF`BV@Cmw6(97E zlsb5m3|LzL$ka|E@2F_u>)T(;xr~*W?z6lboz$L$@bZA9>e;axh7x>ArrfkE5plvl zOOtIkl?vZj#$G9yu%uU1MJj7i{4tD3Fj8XkP`aBp@4_BIbYkl`Yi#}>G}H=4tk9;} zCnWBd_8!_q=d=cJL;eNouRnP_?jL_ZxjxAw{QGr)!M_Z1`!y8q)1epRbqy;;dR3{ti2H4168jX z$EVC_N;j3HLo^F277_ypAPqT$sTuh%yGoT=5qDwTQE#21ok?H0i1X);1f+i0lOLO{ zc;^wz8wW&VKnB8JXhR>-y;q8l zf|1-mOfw7LmWfnpGD$4xZWcMOZ<%h)aA_LHB$xZCgiP zn%F<}1fd){CbH85NDy9896!97j$*?FxIk!KSku7GOKrW145j?qc)#IAHLl&)cKFkk z09tkrrw#(mG?qw?nQLuWK0JS$^a#OJeHo2VL?9EX=0)V-u}w9p@|*&l>06-@^+<1v%>MY1NkXAsEN+5Y3Hw;X!4u9qbwQQ^Vtf-A7 z8t^tzxpI8J1Xmx}FkFrB@NVDj*qel~kq8LV2CINsM*_($rgE=b0+}=+SEqnb$2cEU z4~@v?Z4w4*Bttmzf?&~KKKv?Yf|xThi^j8TopQ4w%Nwyh($CR}7X)!xyR`SY)O(j@ zMCZ66dU7CD^m3|a*OS5wId%JAGrfQRga!C$9Z_q~Pg;gR+YWKFIND_PVcd$wQc5q0 z!}F-aCEn-VWMyFul;sY*&v(7ob^ee1YdSz8Uu#=hrhP$m+u!EhvM8gMRf~GL*l`H$ zaP!Ds>f`S5LTGfWVgq7i;~q8BTt^&@!Ix< zVu4&dSMp-D#qxOW9E_jBFo@`C>;7NV>G3lEMtmo#O7}9J4y;Dq&Nap6y$#8na{}(@ zm%Mr8A;Wvw#@5#CvEtp#Ki;BCs&8jsq$!oAL#$v?4*#B|g~h#9`#XU1&om?OT~yBe zHDx2n%MgfKOqHJ4xA2&D?m80KCV*r~sJ9~=;>J(bo-cn>%?i0jcuFZ*h?9xy{CQqS zhl|FKLM|?JzIX zNe1{Gtr&57Kvk}T$lJgx-lUwmamaKGCtmoPHE#&^ulK9J1&j?$SN{s;clv=tg2_Ut z!9w)U@F{RL>bEX0H;X8nGO-aQakeJCs11~lQd7|>grl~~d|1thE$2-_^QHc12yKjq z5S9E!I#(o28(MLNegnJyxeOj0647c-z8Q)ZPJezY|0aa#x^q@uo46NftiB$PWb9jq zYj42>uO+0=v~xDN&?slF%!lLM{{H^o@e4QF>XqDZq|O#|VzAXHGTvSYWdb|S2D)Ut zAQNyHtDAjwuYzTk2>w4Jpiso8P5b{fcz?y4I6joZHktc)Jz*~~z<-{_nFit+;E9c>Ei&FK<8>cEL$FiZ;m9kS0+87R;TTx|bg z3?r zDPdefqIVJB4xmmiEgNr~n(=({(;aj$l3?`oP0b9$$%wkQ0I9@g_RTK-4J450HKavK ztBN4DOU;Ucs9XQb*GowId17>uywd7#>fj1@AT#a1L=mxd{2dD%O#YG+5fc=Gd_l4K z0Q_c>A0Y{L=~?TM{eHLP@!Ph<7KB)k&ao^hlSvu2}ZR071BSe0H%JiP1`a(`07Vf_C#edUzZv^gn9mzSo!xYp@b8|&CBTt zClr%r0}RpCexJwDlcov%6%fh*hlKRp-`Ia^B(>;u!wqvxcJG+is@6o`{91|5XHO$Y z2zb=k!p=+%BORXRH*DQ|8W@mjJyY|r2+S8JNy~n!uYuC2fnUDBvk=6s;OaD`EE%Et z0Cyh&2w%DOv=*KN??>1g#)?QinHE9JqD8EMFM28OQ^8M$z}1U%69@ukaJJ8%&*p2< zkr5!k<9(Tk{jeF#^@Pq-|9(vw;XvDWkV-46J$$;|;rsN*rRYCAo+UWEUb9`%E*)5~ z^U>?BnqFpjGfvezQr_}>Zz~yVrnYKC*GI=vOOxZl2zuPe^7q}0!=Po#m0KsrN4JoS zaYxJ5`W)e!*?}DWjuPH|BJRAeBv60R{^Z`%zoLd23epQBzrI6{hwJ_2KWHSx1|T`m zn?U&o0J$~8ntGkLcUO!~o`Ur19gsK__qyXF4az2mhKSi!h3mf{b#+K>HPtLXI2^Rk zRs&(P=mrpb9>{u%g*%XN;z*ItNxObZ2v;az+7Yq^LCqpZ06i-qq01OD!qrs^;Mb1C zR1i6k+=NXymre6M4=fa2<*d-i)&TtFDtc@mq|(^Q&HIt8-*N-uYG@C_g@nMN-V-mL zr2n*BXJlb4wqjNn9w$eiSM!;VJ-JtrW%!M)WvupHez9tZpm9zIJ)9fC?Kb;wng;UH zJ``gfWBDq#e7V&ZEPeG=P&Z4T#POD8CR;v#r!OS96qulkY*m54n3wxya$_vh`LjxS|aU z-_F{<7vwG}NTTPunZ~P)ZW@}s_`Od}aZqU}iD2{AaMiZsC6?|%!UkPAQV|JE*eXJ$ zn?C18BmtcoEKb6I*?O}*I!Uy05Wz1~(YU-$X4|!IMydq7Onm0bHpet)xt6P?!hP(_Y7Bz3CH?d7>Wb60C#3bH441V!s?F`0o|F|CZdf1{wpEOG-P(RCBlq_ zg(bw3URm{yI8OnAxixDfM`nuKa&XPKS~?%tE69SNkT>As$Eb4|qR@EZl(dHR zt$R9uk(DmHliC=gFHgC`rk7sw!92C_kmWVC4_gQJWAx42)>WK0`|Y!tMzCmU2e!br%n7nRsl<3Bs?qwMF*Tg(VB$8VMcKRaBEWPlN<>ft<0Xbety{qDJywkfGIBd)N0FeBciOv^ z8Mt`WfGqnMH^GG?L)7fuc>2NcJqG!@+|-SL$o`_x^KZF_p4KT>r*v`#oM#=wVv)*FWw!byPqGv zmghqzIToB0j!oW&@oW-T?d`*an?$=!-?PE0?2o8M-3bBI+YFCs~m9cQmzGfC*# zn}j+GwT-C{0%)ZdII-m*as05z*!nn{Ep$1ON|4VEq`H|PexV>dtH(oGXF|aINm1YW zTWJ-A-18dDc)?PCNr#gKI#kkGC#1PD@;u}cBR)tT%fS>L@R1i#AyrhUko0F?-r43` zKCuHq%jrb3$6yk_ERk~ugdj6h_trGrV%C8(iR8DrOZEdkM~mKTUeHV{yuiYL`+RSS zlek!P5RMv!5K8W6BW zX!Ww^GAZjT(K!+uMKZm9T64&kl+%zydV3-UMOEZOW|& zui_?0!UQF++=%BA<-)8b8mu4LWXvPHWj~1dU4cKAr&r(t)lMr5G0(M!h>O0mX&=~pf4YXou=YoFUQOe8v zt^7BjW9~v!O7|sxB_xFlF%eh}L{N(gm2GSSmfo?6XmZ7XvW*$Ks*^i}&xgU)ioYqF z6RnxzO`L`=9Vm>BX)P2Gy&uQkU#3EVmPwyz)Ej>G7vBpHn&-p&Q<$ET9RnX8=PF81 zxY>WVIJ{_eOwZ%k)}i`bbM^iKw;k~)-6)%M$p?}LroP*14g08&v$lCx^O}w{O7DJI zzrFmR6C>`y)#;B30pdbB7vxDSifBn(lG(qOpdCA(n5%i&y?5H{nW4knawT0D&= z%RJagOLwznnc$1Xe(DzuImjG1euzMW<&O!`mlCoZGwX zg>YS0h6mEmtl#)fdRjl!Pkd&j;^v&Ibn3V(-}S~YVcHc}*^DG3A5@uy3A3X9CuWP< zy>QzANeM0H{uCqLc_ZJXu><8P;yKm2e{-7j<+EC5;U5}8jkkffd zi^W(-(d)W=OmFTbTBf^e+lYu%_dKPlmPaGk zUR>SEEfM&l!Y%Tfe#n;IUBh^N^Oc`{VI#w0^UC82mEg(L+%|Et zM5ED!g&+bK+8g);%%hQG1?f|9-OV5PQS`U>zL$#yU{zlW!lk}|XVVi+_dthc%;~-I zA)lhu{)}zYTsn06$GnvCD8J4`k9$nKVNa6 zpdunryx+&SnNzd10~*rn=GT`9E(Wm5VaPk5zPcGb@=zAFc}dw1V;eudn5H*09H~vu z^Oq@b5pS`z@T)v?!v7z;5BYLpf>iWx^1#HuK$=eeCY*&qg zM8r4(Z@gaV+`WIrZR`5g3#Gmt^`$WfeXJ^JMZHkMTo^ggDO8OjX7e;4+iH1iKf>1%rd zTkL2-eB(ta`*!`RNHY53aJE(+-m>n0PEFToCGp>g(tT?I5{Bd`|AT{SYwx*oydU$L zIlXTBy^$W%ouv0Khurztcx0D+mDS*R0XaHpzY#tY^vXXKvez}a(>hXP=$zyo&^ykP zkcF{+n!l_8;rZ?tAB83KzGb$vcs0)Ys_w2M#utfOmzC1BcIljDqa+je+t*utV=p+` zj%Qp?YBttM61u0e>o!tVAF0IkNbp1VyR4b2yX_|7kWukabM=}lVYTg~llf#cJ;U`! zJ@tqF!Y_q5oRXbFNWPWQx4e?U*!xI4W-867HE1K!9mAw}4(}pWJni$PR_M0B3o+o` zUy%lDm(_8>=L+@fZP@J){zUrS2pH=7YxSGGFd~p4cMk1l7;0x=Y{lbXYAxE#mpo!^ z0$Gll2o0zgD?od4y;H-Fr3xed$Ld749kC922ywdClJY8RK3A47*=fgNEfMS4I5oNO zcgd;*PjMgp$CSDkcxuhFXP;!HeKWiLgIyp ziHQS18z6T0&AnAGJ7`hM(38=a`>HP}7=u77P+#@wZYw6*;xt*hAHh>W2Tj%h1&u{P z=EoF<5y{Cenuq@u-u5osDtwSd^s%sU>RI~$6paK5T>3s~c~ z)zBmc-l8##8!UP@MdaCOE@rXbsk2}OVD=XTNy=F)Z$6jRfT($XRkEcED|FwRgz z>ndUB!#ewkb2o4>?Yko=i1H;7!B_8KK*iU3Y?-@bXP;C-^wI5JzdQ4=ys(k#6x1My zj~VpgD#9D-zxjgofhyfVJ2M?+PH+g`a-js>>K;}Iey5httlhj!3O-q-G?kjDbW5GF z+WwUIJI~- z(pVOpqk@1YVUQ2bag;yq8mK%1NB|SO3Ag1nM#2tznNVOZ-e#pcPRe|o)J2Q(lGJ=Q zo>u&0Wd!3SF`BAhiB-{}Uyqb)uX}cVnl(_U-Q_6Las+StvAG4UQRLx4n~Pcm|G`37 z`$~|(IN6}nsR>ERW|pSQz6&jyHYxcOI~Sj)sLJCuSfDwWHV#m{BrOp{2~^UGD+W3d zuyrtY$d=$o30ZI}aC6jOMM0=xGFWJ>+`Wcg`c zHkS-hX}A+L*0E3c#2_eS9Z--UK+iaR3r{`R0HsNn4y*Uredr#(;y9xC+-=qL!oszz z$4uLTGG`NwtO*&FBX2CpM!b;_|95$h`4Pd{NvXvH?=s>(j$&9dF>S7NaylxUm#v*bArNh zS@p*P0kqX6>$;l&d0f+b4-kinS{!(#B=ntLF)Ea~nrPs&k(8^jW5hRL+XfJw*HV!- zZ_g8KOZ~VU7*yDvEMTVNu@atZ($$3jl!=JUZK6i=zDo>;?&DFN^PD(FKF?;`9eNcQ zP*y%uQm$l{QE|dM+^Hy)k2Qu&n%TB)GlwNp5Qstsh2e_7scz3NKMYXGYFvj zHVZJ|PC6b+&`OOYK%sNxmYQ^~iePG{Q#-F~JFdM}zeN4YHsj33PQs-hZCPQ#>=o5` zN{6nTiLYCaShp1QU3tUh_)1Q~DA?pCHC=*wz;d%h*W<4VwM@K68bbyncn3t)F2rlUt4!sP?kSGNqSW;A4<%5)41YpQ-O*j?9s#Sf-03 z($dYmkswf+Zct$Y|HQ_b6IGm!94oCXCG zi&KTd6|@?6-w!;${0<0!`ax?)*Y}<^b03r3tu4+Oe`S^T_`+=;q#(aq_1)8O@;`5|S$t5_-H#CU4Czc~??<1z zFsu8b*1+>p>1%Qgznh{i1gRY|IAv?*E}VB-HDn7=B@!frY~e)J<4qmF&B)!lB^)N& zBi27rm|;=>ZIPr~&~EG9`Uf)7M2}0%s|Inz4Toc$GuuNru_l(e81Z^q_-u0DP@*Y~ z(^!1wpOO{Z7}iLAI}BzAi{xL5cXKLOoN!>f65gc$pzmw4;^RL5+)>=g_ceZ*nMGPp zB*$!k%f5uN#15Ws>teTU0g5P)bCqW|=JAQ<2ns1K2_1fdCJxmU!@9Ryat62YgVYnY zAIec*!y1rbz%zpR(z?HoORiAX-|A#2vF%o5G^wNXMZLz0u8Vqm6p2RSM2SXZe;C+OL%mPLv3ExJ)Vdx zIZHx`p|4AI}GnryW$1;PA-y`c_c|o)v zn{Kk=^`ugK8_kQdJIOQOgWOXh*UE(f7^iwTJ$db1-eyicdyU%p^J~L8ka2{OZ4ybc zTKPl%`3C1B>qzYd&W6Nb5CEoLQN(Qa--E8mrR(`v^Au{&B+Wf86=IEaF|{gQa3 zX_6eZWq@Hj6?{PK_}k-8IB_un(tHv*li~ybRF5)+&CM##%%u=aJ{h))%?Mx1PJSkcErw#HE2qV4y_jt_wXo*=ESW8cuQ2DEKtj7$J+JKY1Q4Q2K6 z1CYTMzWwl%IC~~=KouwptizhjDC*M)8)yr1wS6paW^&sO$Sa z%z2xS=B|vGCd!q{F*uT-!A8S*d^d5%!8-7@F)b{4707yfO7;=`UN|MOYuVPfvg!2? z!p3|b7nAW`D$KgJ9!DX>77DLZ?fz`}WF?@m_}iR3l=jQ#XiUuw#3#1K-@T*m8x@`D zC!=wzw*;Z)=7Van4RgTKw0G=Tk7U7C;b1}`b7hd8(tf(Ub10t`!PWS}2X6}y8CY(GqEFUlcx+RR_sY-3 zg#U1|UzO#ye8I`e;qlvDv%PWMx-V75B>n>EtIbO@L@|r-dsea9d4wd7BK0N) zGEBKCUlTQ-bk|Z1&V5{0Md<+vWV1SVQiAFMRx%Vo7(_dNW;d7)F>DKcfnea< z^V`m&EPkNI5fuSooG(S*MBZZYIG_-3I+g(45kdkK=6aB;=tHA$;kk)vb$(0yoFX z)Aub$oEB$~MnsI&zvy^zHoVtxBZ1eQwSA*+;ekE2S??rdM@jL8+l;IWO-GeHM-BmK zhV&xU1@@|ygF`l)<2~q}-bKO4y0{&5CMnVp;>S22u8jI4GV}XC$4bqir0jQ+HHS8( zg{n>Og#8(*47#%Q_{8SsysJ{fS@73ZR*E*W+>_iEZg>7OsG3(&R~tgXbd*D)3{Es@ zC3_HZlN}UHL3stN8_qnd*;0Zft*aq|M}kR zcimeaX?~e>5z1u>(Pi;mE7L}72BWYgWxsD-A{asj@dem%J72EiVyty0vHF=w3!C~i z&w6SXbT{F)A8kbu!}kR2HmimkMS>FU){{3WAyzF_r;GA$BbRnZ7UYu7S$O z@9|TMiejenxf8N@IchyD3l^!i)q|q_q~&mQ+YD<_rl!WzK~?nLiE0&YRaCkw=Shn$?%Qixep;i29Yjji!~0jNmvx2AudBZoJ65P-Aljm zlFdZg!xNZuHRQ<)y4?o+L*upb)lLpmq&D&4%yeJUkeu2ZS?>#p1KOQkRVuEzmpFhFe;bdqb?ghe%^fae_((AI3~K=GIf%Eq_o0`MH$V3M<|{Z+7LwC9|NgJySBlq?@!- zoq6^Z`5m4Yu0da?AT`S43?zW4be`3;e>2W#?03t_ z>LR1(J?`Tw73#pFQLAMzpnGkFQqMADJUcxjJB;0YQqG|K=zApY?Ym~oQq7s+=(FLM z;#$oF|LHwcIWs?}58Jw^c z6HSltb4R0En?cQ6yA`x0j6?WUd}AqsGik`H_!`7#67e>~j_4P@m z8&0>L6%mob`RLco7%CyQ6op@R@mXp=W@A1s9nquG+dfIM2*lQ@;HW$wvNU1dD+WHP zOFz?v)ScLwOhUat@dH(HOdIcx0~q0}}mgdiDy1 zbfS);bFPG(Rg5I%z*kjF=Gzcu6sNu9?1WHMX54!$yqFbeKZzU}2NrCVb(+;WX;YXq zwZbP6m+z!YT|EB03<|?qn2GVzjPZec?cHVZ9CbbI$uDK1fv|G}Oqe%wJ$PuNvs6!f zjy?z(jFvT|9gkNz^iMgNUJ8@r71d3E10OatFx@b)qIE3Z^RDn2M9zM-@V=YnKK!oN zAX)J{&7r>7(uh)mVkA}PnMPfL`{xm_Yxy`kT}vA6N(x4GzkS5hw}bon981qOyU1Ls z5c$gwY|=q#l|s`)}p)JIU|A=lcGG=LxVU+@d|}_*pTcOw)Oq_SJ7irei-g zToc;*ZPe&?J_}l%b|itfb=Vp*$LsYJSFBqzeq9vd2n)c46O|o4#aS|R?S4) z_E05Z@Tdg(JXSwId~v?!=^Qos13VkEz~C+7)DLX_6}n?-?J@(Ui79O2g(+hrA6i1A zFA%~?cAPjHYp;a7Gj`HFiDWPk#CoAENZ6PLyo8OFAgPVTWkS>>xm(@hC$`wBfe04$u_kC1MRkqPMS4a2M zNv-ZYIng*~tI}y>*lCMwvw`pZDp>G#;l-E@K67rb*4f}ey<-K@KmH8Ym3C$M+)zW7FkapQ}?!QFDS z9&PRIp*T)03l15#uXV-4W(wCAKd~tx@9vB%^T!(n)YHoD|49)d%^rou~Cso?|TXg-vwxTjD-05+q6dvDX2wq!`nA?f!o z>0~wEebz#tElTrJ(HEfZbD9Ox*|@Gp7P18erSDei-_Q&hBLEN%aR;K`Ish{3ER>*u zeX~r!G5aex(@O4*O}G{^6T}>Ksy2@UV0V%o-tEyqE*?0ZLG-q9+IoQ(J znO%-9u(7c216SxT6=00(h7%?)({v!sOk9=$U>s{&n#GdpZ*PEwC@>dcF>vqpavG|) z?T^86%5*Q|4RrMIwjJmzYdQFqYg9MaBY}30RfAcx;*K0@PFR{I?tI->2T!Y56cW9a zZHgNBaoNNoCD>?>5hi-fOYjRkr0+`6dPpRpQ$is&S>KVgBeV4aw#RzACqdUPrUwa} z!Le5y6NyYRVYI#-?>1RxdF#y%Hc>&u8dll~hcx3;^(cN_%N^(P#|!#lF)MF@10Jq) z{eHM}-yh#jS=h3FdDR4xh}iiM;aAh0dIHwH7C+nNMjI!0YEPkX<$+W7{3!AoW7%mG z&hg&!kK|9(2Be@dr>!G`#Et~74G6IfcPTHxeyBJ8Ce|%?IWf=>Dtvi&UubKbl?X1} z3>e65Aisg`^drCK7m!sYzDdw7hDoTGiGFH`P4>M-ycsBFJDZ2VX1=wKO6X}F6L2C~ zx-;ELVSP(mY!*eiWDP*NJTCYg#C(R-ixJ8n9nzvN2^9xwep;xiyS$MgBm2dLZrip7 zG{IT09f?ND{qzESqPedCfVnFaR8tQmg4bLk8OG&}hmA2)t)b=DxGzk?QAz&mV79bm zBftShzERL2Sjd!$hkrl@>k)KwFYmuI&2*>+?nfcmBy1?C-IoGCo>a--u`m_WGyEOQ z<h;*t*i&wok9cJrmcZOWTI!yReBzwt2o4>v!Az$6Wnq6<9o5xGKrHvJIaj^ILDW zbFjc6H_Na7WLBghLD4%x4PQ~pjWlovC6^FGYP||wfr1Gcgj29^=T6D*Fv{mobr|MI zHRCbGRB`2qN)>Zjcc@9k1*kF;!u`);rd`WcMY7fpMVih3=)mumJW^b z>uy5M6Olo;`YvVy!S_Rg4n#>uC8m>t zWGi4MEa1*f>`0d1ZDD3!TT<-4r;;~6w_Mu&(ZlwQ32h0y$uT4Q#vsSp1 zrp{_F1Xuj#>V5<*#QWAqvKTDOYVjw2BA<9_yTK4nh<)VgU#Lc&%!v}K2bx`>YMUhM zf9zcD@M&nebF0hpnI>JJPG7U5+r($e?hm?aElHrApI4QC6z_d|n9EKcqHFzDGq};m zmJux|WU8?ElP0o7r8>Lsn!j;r(N24&1G?L?cLzhwgm!T@I||Y6(~tOu%6o=18~kDs zg6NPw%zE-8f}ULN*(zP8m4?z&)WSo02JIey^A@#iv>v*sE6b1+g9DBc5j3~6c;%_* zF>6Snraf%`Gp}TR>`AWX;({83jdwg58bM1Z$Y^k~x2nIusD1y||1e!N&~81YT-}yK znxZ0+t#{+TFssJQp~}B{gw=pqn!?zkW03;$x^=fg#?N=^qkwW>#~=+rrEwl@ld+KU zty*3s`JJFST)sxaH#^%5=S-9_z>kV{{-pbK8`^`p>xlZFJaSO6BL+Mz-P3~&q2tIN z_ZXMAU?w%6RziF*2~M&Nl{16<^RS7&Y6>W$4g(xt-+DY{JDa4hZ5r{+2dLKCfwdQn z8hKA?jjq&jxY#G|R#tP51%2oHWAm@W#csHNP4sAnCnCXd-?-Lu{n1M*&gr&#@#Pzv zD^=g@N;X@AuC;>-LchV?H}418mu~ymJ|@{7TRNTKm z6nt2>8ElYTb9aQPKEC}r{SupbE4k+~v~XTKD$cAF$cywc0!}`!)-`)FxCX2~?q^#} zJK%6}fEL^U0N|tdJ_WC86}#hzx*9RxYVqB0AAVemcrL+}lN1u+#A^{GeRhz@@9eX^ z&|UFxEyNMu1@*uQ&{EIddLu%zO?4K3m2pkzKNT}=Slr?>)|s>>gvSsgLa5|EC>Pu8 z(hA&Dw7CzqTcc9+?$UPjj-ip8k{!KyH))QmSRZNwa6K3T*X5OxUe&-ZY!jfQcDS%d z0^_vT)8AvA@ntG4dI2+4oNs;3dYJaqWHrSX!#;e${&uJNe^_+3`sTpMqD#ylGJ!0x#k}&UZ4hOk6`BUVvfI7*V4ui*Poaep0wWV@xJ5qetRf>|2Jj- zXEa+Kl(mc=4D!oGRUwC7vB`0_81?&R*oC|+=$AF-*SCY@)s!!)W*iS|U#|$z$3-an zc7+dOG_Go#xs` zU98hU8c~L@GciSNO2ezs8AC_Y&b2Wdgo7JcHTO4lR^TN+m2)ZH3dN>xQhSqO`@FE1 z8)c6de}#}`zK{p^Avw8DekmILtJ4i@-|7~2@=S#I?MWEf<_ALfSs_1{_Tin)0zk8> zn<(klM9fw3MVZX9xNp2f_TqY3oZjedf6+r4YsK4A1H!D5q6ib(-emO<_kAEtDzr57 zF3yh??JMpN{jGGBAnx%o+3fE>E-zb8>9(T%?pr0J`d(ug5~HhLpMS7)WBA>>tdrZ% zN433^j_k!(oZQqkXp|viHHd9++F&r4aN~J>lowc|13=n#6}-4=F2)syC*(xsHS2>Y z*Lc7%?9bOKHfY?e>;A0xEX-1o(W7})eqjff^;ZyOLax5GBJ8PiVu()-Q4m7nTgwUY zHiF8lHdMVlyQxhuQ|*uBt#ZQQ1}1dk2j4Lr6A})sq^h8As;x;x@|dz-YkF*=!$ zlC!jQo3&!Xr7!Ew)E$di9H;8Y%%9@h3sx_@eeyjLf-L7WU)aajQ$$JW_IN4Nx_0&H zTav(Ceq#Eh7BD|+%Pn@c4aGU$NIktk2Xa|;Rr%CuO`b&1hl2CH}UWmG;{<&cW6>RV=;#QBlZz^Pv zcAtsnkq;Y=7vSsFO;&ZLbE?tZK@_m?xCeY>cPg5|`C4Bsyv8OEgttV_Nt7RVO-NgYe$sPtcb%K+*%V_inV4p_9jSytG)v=lJQ?Bef+a>fZ;?&;rtcUc+>(ZNP&;d29-c;-fZ^ zTU+V@R{8A^_lgV^^1iR($VlzGt` z!}YBClu|fYbSR#k3}39gg_aMe3*gS2X0f}Z0&ciX|t<`Js+k$fQZ zzDWo6bG92l`_QkMnSGgiIn5Cg7&IYck^P@?~6hj17P9E57Gen zeIJi}@TqA6obkDOmV(ELhDX4zJ_K4k*aBx4CDl;K!hUu@Xb2CiM)VWNLA&%A%F&5h zNKUeu+20+Kr6)^~;NS$<&D{Mj?_wO{X*sI<_8H`<1r;5b%&PiV^!E)?!(0|gst9_( z#ceo?96rkVmoFIuAJNYy9s0q_*uFwqQG9%A;huiG?N__F5La5@AZoZusUHbkPNRd$ zm}1*}fIa#zWBTMbIw81YeA=+&>0KAp*p1KFLQ9JjGISdm@~@oR3EiaKJnl~Lo652> z8r)<1^>vZ$n~2b>I|q@2-v5vwAl~ONX)vgh7EyV)c4<1sTbeFbDSTNp7PzuxHgR^m z+YX$yiJ*RRAi_Qxida)ej(RgP0w)NHv~(tUHAUX|Ct%zSdwx?wwbLOK@*MRBTLAUa zuwcm7Q%N-$ga(HS*mWgMv01hrK>|%A2=W%wPL4owa&Yv-bLibm;JAa+>vW6B@=;@w z5Rkd(NGB69EmW42O&}}#`LeLX{uB22CtekFrts{E%y*(xZcE~T%yYZUf~w9G^MxNl z-W^ZFS_5UzA7o~Y)~U@y?0crrhR@QJYx}EAX@MJD#(!PF2tXanCA>H{ef#^+SXzVi zvL=63#c#D)qy6F~AO8Lt8%hyTxBy2B`gQ!S3&o)~usIF+r!Ya-V>;bIC$<_7kndNM z*O%6}0Wl5+`evmx39nW4K|s)g$hXj96H;D$k-FaCyGR_N$LmvKE(<_O2&)@` z$AL}CB{qB3BRzu}FkXd%Zjb+LC$$9+q>JMN{)0P<_cE4sjgIub!B>J6u%zVv#LsAz zIegJY^B`JjAmiy=1iN4#ynqgv19MNB^im`sg60F zcku#Xu!c5H&PF`zT%i%YM>+FgRHi)kwu3}FmCC&1>_8k^u3D|hdQcc>+PZnW;g7_f#oj7W^+8S$u8*OCf(t! zU(WpvPzGZBNsEVCQ-B+Mr?E6JPUht9dYli$M-&4BH#-tiGI?FF@#0iyOv^~sMf)Hob_#el8~e-Z_9Y7-ybUj1xDl8_V;c{#w{Nq$4{`qjNAQ< zPQNBcylXzhUUB*ODNTl1_`!r!eQG;0ObhWYHj=t!zdkp9U?bqQc1}eySt1D!(84g`K{$R$!ZT(vN6bbSYwCn^=%}@7Se3WA))M)+f+l z?_j*bw%wbLjR0|dq{;Nq8G-V>eP}AWQ=tN`FpBNTr6ss;+lsTIP$6hQO*5#fp>W*Z zLcW#&h53iz&&Dfu4~i;Vz|j0|7bpEr&$#qOn-6CXMBN3UiKWS__D*tEx4g!66F6Q2 z9_SI!PZOCUR?$67itfiDq}=QVcbJaD1od?ywfrrwOg~-6z4c#8hi-)f0J&rEwy6Gr zhosC92%fyyCkhGhQjv@_HHbG)J>lG_eLg=R$$}d>@TX@<8^~>HQIT@xk^!D zE+OT{Pi1Q;gQ^Hyv-ttW%m1)z08-QOKl$`kH6Nnb1`((Tj2g0~=Wd&phbRVweaO?{ zQY9lVu}+JJS5u&VA5e(2W0AOoMvT4KJL`~HdJwm3KHDemGoj=>%779bObyAJIVU|> z{;hx4LRL{QL}~e9mgX$~u~ryz8n^|_OK12B4PkuEnO5DkkSqQXxcR1|*2BMQ6Tq>a zaP2Y!E&_DhdIH4vxcL-F?QMbhkKp02fe6|4c>GfR;M_9{$bgOcV?`&qISSy8@27rY z(fhe)Qt6L!r>rZ3-vv7-;KX?VFe(6T-|6+M8rSG_5d-9m7RaSk2rbV2OFMD%AKHmk zV@y1U8bk5@=`?iPiAa;rUf-U)t~oMqG-Q>ZDCf z&1Jwr&aYa`l@D#D$x3QzX(Meq<7zghZlSyPtB4KkwH|%2;CXRsJj)Z8&YqUIR>evp z$)F3qpA(vlvd0Gu6Z&}~0zW<1Uz|eQQScpo6!?oS2Kh;v_6xu5$#*TPL*oPS>0Aqr z#x)*PRxi*mjw_KaF)@^#z}dAj1y}Ft`jEhhWc~nc8PH&QU(PqhJ!#EJVVzBEGvy-W z)(BMJNvz_a#CuUs&dh*WK^1NSZgxh9^=&)XH-?goruRsxe{n>4eXhVkta*_=v-sH1 zbKf7%qQRhD$BZZ=b&L4M{hbE)<4^LH2K(z=UDTU5GlI-%5tM4l8Q9bDgdDB|vq@2W zpQDl=?$IC4zYlAnm7NINQ?6l!rq~ON1NSl0;}nD!q|se?QIa1<(k%uSMiLl^}T$A$6#_aI(3Pt}_6G78+O^ zDlCIpgDisO;P1l*5JN%E*uQ}{Mpw`*#b%maEOx1HFy9p5jaSPmL&(0D&Py@fRB-^Qy_S#OXa_d?FRsbpl1n~0{ zT!>ijjb~%h9Q7zuKuNi_dl*GwC-;ddB~Lm%g(X8Qc}NK2K?qP8+`$YHEO-3OG z5kkl63KO%ZsLm6}LGd@BK6P}?I6uErd+#%R9pH4cU@pM}t|>SWn;8nch_HmbR%RhQ zY!OK6jl@ot^Mt&Va{0A61$q0Y2-M?pi+b^Q)JR4P>*7C^7yHkKNLpfpVM1Src0Q3N}+%D{v?XQUS9wu$Y z(%~D7^paH*7TeTtqLjaGI;$mEmu+H$PH|Bkt7nGu`3?NYV*=?t3Q6dH{h z_@^$9=Uo)o#EKCskP7Pm0`s3mNw(@uPGJ-d3TvK)ui;h!Id<+R#Tq2r4hmPUBVuQ4 z|E|W5q=cc|J!oOn-OZxi3zm6~eA0;LJ6!vpxt?BRwZYl1kuybdokdYS?+<-VX zonLvo+E!l~rd6!Sv9n~I!A&LQ?QV0P;Cc-F)ya4PYdX!pkFy8`Y<(P!u~)9oLJBx0 zAw{Ft7N4SJ&sTJ2LBK*>WM>8kf53Dw&D*?K33wtY2Sc@emD6zePgN3@`L38K%L2d? z?$`C4QQI!jq&p?_K=`jREF`cg_86Oa^@Jg92h;v%qVV5-mmoj+D>MR8qt!CEvr|;7 zBcvE(ZOl`&((87}lxeCg0~Y(8`EEh?2JncwMbyNOH@>_l0L(%ggB!Ff{}hL*VVa2d z5d1?L)*Ju!-HiIQLnt5{TGW9AIc9ml_xD9Xu?$n9rISp?HlS=dxxTQ}wGT^5jR$Rj z1&j=lz*0j-gy7Jn=Mdi@>Ql;$*G|<96ZUuEAsa2ldfs2|X4w#eipx*${BMR6|1xAk zzu6AvRo7;~!tH6d!t+3c`4THKm62EFL{;&p5MKWR8U&|6I<#G* zLw||~56S1X$#QETP$#%UJQth}2ld3Qm4~9E8?-nS{U#4WymVL~^a$AVDBG_>1}eph za2gpu?<2@O0z4h$kaV4Fc}zz(rdrSB&2?LkQ?P24B&NTYdee7*l}rZsaHM(l^Snnv zA={Rj5qW?t+5zi(O9dyC%kNdmUtc+4(&u2eW~7j-*9m{|=)+8cN@l#*9rxqU)xJXm zGIoEaivI1-As%<-z}~U=ccqMr51_O$KDHs^AY{*1s&cpa2d`1xEoZC7rEFBW&y_eF z4;xbvSd3i5ZB1o~XQ!}yq+2L95;6>E0GgnI&f!Ij0_$R0k9Qh{2%~&JV3~y!M9?bH zNX?d5@E6o-V^1J#H&W&>nV4U4s0|@?Up|s1rJbE4WCYFt{~$`%0soyTy1DCU9rA)4 zNE18qR%KxKU*X@sJ%7)Ldgw9lO}wU3j-`fRwJcznA}M%aCz`;F5FC5z^xIIIJ7c&O$q*@<)d@^=P8qiAzQ5oxb{#AwQKuM~;PJm$x zT*XGDP1KUzDY{DWNwJN}fvEzkAx|QK{6`a{iDPvVi=Isa%QzI3^_3nMYlHxyYPpJ@ zif6{@&C~Sy%#NNR)}!B1-00DGZx@P#sh4Br9~sX#Uj2s>{kOA`+hpcnJ=gX4=FW!I zlU+K^9!=K<&bmUVzqwR!r&0uL1kN8e8O0(oVUba}vGlSs#I)CexT;3*+D)&51q_V^ z90w*X1LZVciWNljTvD|>zdsl)(8h6A9PR91qKC<^SFeWKmC) zn93@76EEhqe^myM2EgBJvR*e5CZDcrV-bLEy&f^jtmhe$L}V=anQ@Q}eG`#AcTYv8 z+Hi{A9w*UMp};q;V!ep$yaVxvw)pR#Cn!_-%9m}31ev?Hzcv|1m(LTOoSfC@AB4jWF)t0@=?M=zcbo|M;-B>zk`os$C1)TU@3xucYQmwusW z4gR(f*zG7EI$9}QfAlANhX1hHfxb9FXNW73I{<2qLE*R*+vueDo7!U^*v!E^ddNg& z8lA{-ZC&S;R(97G%JK7*Y*{1YXm0ZobC@ZFL@{VZhTAhLj%%OC>(Ff=0}=!}_w!_m z-viI)Iu8FhH|g=Q7B+IHpu}`7JMp1>6JSrlM#b%1%W=6ub?yH z-P9+PI$hR&?cHJ~S<=2mW6+v0;q*g+j6F)qBpLpaRTI0sR| zSL!Qs|1>%O?PtqrGhZ=nqlSY~2zvk-#3B!@5fa$O+j%uP+Cq@#1Gmw{0=q4@5OSzc z(loSyLWrM)i->IPOz8;wmtaL4Tl5kLXZDw z!t>w$>{prNcVRYIDgEUjcf+YGii=MX6USpUk?&N1@!+ngYcDvuByRDrE0K}mQ)%G3 z8Ql4BmWaq(pBBblg2%g?k1#C@x1WJ0x-CO0K^N7>Yg#2A`^z;G={d`Hry!A-$)q8nNPnoP0%i?utR!d@5?=_JnaE&v5 z%H1nPd#(#~hXN?Q`(4eyf1li#pV6jGv(pEh+1>?&rJC)obT&?=5QzbkkG}>&Ez@co z-xkZL+yk|1=+I1Df|R^d3pZ-?1t`R)X*`{51R*Nnd0#msn1Vx)&P1H!TB$EmIJA54 zlAXs2Llg+ag@v6*6%GGHX!!e;iDiC@q!b78CpyQ z{y6`OaJoBQ{_o!lOU#PhPuW6}ni;~Jkm(2-5FpdjIi{i*H7c%=P(-wKpVcsHzTg?( z#!LPxJ?XO1qX;Gpb#cE6&BUmLy|28_t)4NBii7ztt&=21{d zW*7C#&aY3acwzH{m5WLWn@WDi0Fv=LFhD3CV3JZSy^<16+DED~Qj*tLv*W@RbpS-3 zUcE)1_EyFeBi1Qivr&ZCf= zs-Vg>x^ldqi)R4MNZvA&>axnBnP2wf1>M4!^Ww6TqKozE>TAvW66b~b+(p^{ww?B* zxw_t%<9DL=X#YAo*qkpP7?Dt&3OsgDT!;)OpH5%77O_^AhKw*dvcAd4pm*&$TWmBa z^r2vF9o0968PP)uqv$Ltia8h$3C8v!*D{rTiA0?a|o2~6gK;AbSMrf^@va->!(fWy5Z0MgyE>_7q^gI3}d z*a~n)QgX#fq=FMC(8n`|k9K^mpB@d`*I3iP!3e67YYWgx0@qIL4n=1srNY>7 zVp57@fb8cXD7bUbxjQClXL%1O+iPg}`f?N5{Rx?-?HC4Smg8cgLhJXK8MErTaH|~$ zaRSBG2|T39_KPI`4>yz0q|x5v$a}AQB;`A}m16u8+8=L3TT=Sgh^~M1JXKO!Zy6s` z_Mgc>_Gayxud6a3=)8Xh{RS!JowZU15hNb~Q8DD={4~tytr=&7su<`J+~;F%heGx} zR6>p&f<2InNhouy7}G8Py%D9=%S_C*NW5EO{CG5?+`Kc{ZViAO9s(vlURMwkmA(#1 zU-5wN=nG!n(YNOB-N9~y$IDqeM56?K{5#-kSIAS?tp9p~)cm<|C5kkWRdhyWaK z$%=D}kLO0fg^KCpt9KmaINhNZfbu;1>iJDQkE3?_3qn9w&QPPE5CrIggWj6Fis=ls0gd9^5L_6c< z*vT+8w+k9^Py5WEwnzCtKnj>G@M^{HfEepylJ=lc@%RZTh{Z0?c7?ElFm8d_5264F z#7%QRLwe@S9>DQXP-LCZmqn0Bg9{dE3WfJgiN0FNw}lr638S|FUIB-H1^{t8C?+&F zfiFSbaNG$Ax(eVcNSjdxTaLk>2O9(!ckWygK&*vo#yr4sYtD2Sk^5(Q@%w|7YMpcp8tU>Ng;#)--HB%np@yEo9*K&A4noP2BKkD91{))(FgiQc0_PlC!^>dyd+A*^-c$TN|azol8aHq4r$~%@_dTpSS3Sn2GO{D zN8`Q5Bx8=xy3^*@4?)v#EIM}^i=ohWGX(p8Vwn7`kw)(S^?>@x9PG9-?LjDDS#!m1 zpM!CRL4uZv!}xPK-R2%L|C*svauPW3g5a``<4*hz0EsCI_8ss(4ej|3icF#au|u>F z3s_A#724yxfOQ2r*2+x@-=7hd0=h|E^r%;4l#s&n$EB>Jpcj<;!D0N=-Vn%fITt$z zDRC^~>&}=6?kM1BaUhgfu$?*UyHx4gLeQeG6`K1ZnAM|9bm>mhqsd$@_bS7d%T%bo!(BS>R(g;Kc z0wN#?xfQYF&jW&Shj}N(QopbZt;na8)pH33gm};HXJT_vEbaQ+-3g zOFCyE7T!aV{N^oURbORat(p(hGveWu%HWq()>{{A%TaOf{%xyYY=K#Ue_69$RN+3` zwmgUXlINZ0gysm-UWPx@7k=mUrg#?DD*PxhU8;Se>G0ZLkpOs?E=rz|TuaIX0*1=K z3A&GDsrDeM2pa;@A#bpDVCz`4&NK^biby7H8Fzs}D89LCrBcGq9C0041F-tG8-a+c zU{66eNeYRg47gRP1bn79l!`Nf!%srEynwD7X)TL<`ZUrEMm7VqZ^!UF>1`mydllGs zS3@>mDJ{N8a7RsJ)S=#+LI2TJU2R#5QbI0Va&FlDFEuePy%q0p5dN00sB^mV{wToz z+d~$VOcbw0_LAEJF@4WvycAf|rco98NwS`aEp^u&h<#5378zN0uXXXh5k)a6Rt!0Q zxvNxWvz07ue5%*8d`HTIwzi5@f^!{WfT;imSQ`~#dl&&<;#Nn7{7(?uhg05sa_{Rg z_@9u)%A71yI{6cjYpVPu%67~pW!I58{>$~6Itum#=;-ENll4Nmh3x=tq}_!qM?v9u z`;3d1^D&sj{>yYUzV4TL6fF4>T^}}fHF(s*teSj6K$$Fvva|4q&;dBNkO3j5=Si+A z3CBJ5TucmQ6w)&j3Yrh(zY(@R&;uWAUm@|{DD|x;SWtrQ( z1kIE3q*u-4HoE+;sJFrkKrQJCU?4Pgy8A*)6dNr1Ik%5i6pjJ`*DlCTA>gp#$6o@b z6kV*WpVWxD@9xtoGHdkGpjr#$ob%&q_!A4>=n7tk$`fct;K41cdS&xVQ>gc!T1Lm6 zH0WjTm!y{EJxj=;v{H>ljkT=$BT2yI;8TrwyR@r8`xlzXb3uQJpL^*pK-&V&QZPzx#|I z7)vxunc#XR{{6L7K$-smILI}JvX;Gz4=NAKl;h}VyWa$I8+6auL*r&)R&0^j`4U}M z(+%;3cm=^0Yl#cc%Q#Hjo_~^ZN^NpbN6kFmHfX-ptsWY;I4|!2wg5Sl`sH;2)B*ix*}u_!d;!~0Q#pG%RGlZ% zwwYf~{>GaDz%*Ebv97N9`Lc1OVB4#vF&pcZ>;m)Rq0o>hi=kgFN&zvUSEst zAX71c>41Pc^5SC6Uyk1I^f0h<%j;=&;GR_e61}B9&dPJa*}kUG zwK!h^nthHi;g2brnTLWqhN1rY8@a-8B+ zPPORJ+#%C{UfFHyxXb}B7ZrDkL`dHPEobO%OEiPD#UQiBc z13yKR=#i`o6h*gp7nM_oV40Ta0=Q8(2hOgALFC(X8Rw$i)(oY+rheWA6y*2$Zr=Lf zVr(gCyKpFwIxqlJ^-Ub5>33_NmKD)O@e7gKz`Y=k>g)s-|1(ON5v)&(kikqK=Ez5g zT1hQ<%-9Sq5FRfee5wrpwW^x)OYjTDRP#drA-m~w9Qf5C8xRzgKAC!)#Jy3mjI-GC z>tI03v^8(JwR~)bVrPdNXpgIPfA`UJ9J7w-ja}qE`I z(b@%H5TWyJ&pj$%~VCh?QlBn70-=TFSTx1O~3#^-1UGs(rr6SnJ7*{TAfq2$o_Cd&dV^ z_qfCJz>U-MvC}Ip6f&0%If1I!Wn_v&vM37$pNkP8;-}$de4ll*5v#bqC+u3*I3J3L zRXEt+`X?6E@0Fig*5lvb$5XLXv}2|_v`e*?OQ3IyquOlZUuE3aj&NJIaWoU`ZYh`f59VD~18bk%9C_O^-4~2i`%F z_dAb)hLTw~PeKpK%SZ6J{5{ZTuHmE>8>EG6`SUf5gV%GzuFo)_R#WSEh4*_ql_Wpw z%hqV6>Z+V-9u`FzGw>UV$u{g+F8D>m6SZecRErh7J!*mK#&ipujy1&D*&VWc%`$sHZUu&hp?H+ zRv9Cod2u;yf^_c9SLk`4Ux=CqUKPx9QRvKVLEc9R6ptn-PJn3$h=+zKbi$2!TZAaT z+5YVzz+8!?Z9}ceo9M+Rkbf)=eJRtoY)){uvLY#v8n@f@)`dhstnsO#>iftynKe5F zfpBKc?y;k#nF~f&aq_*ZYH=_9R<2pU34Y%P4NzMlHM0S|c#Mv>K)MnWG5jk{$aK!d zUeVo0og$1279K|rH=|`*+ZJ1 zvpuhH-rtqMGjYJZz2Y(OG!1YPkaM!$I!Tj1CjcM?G@KE=1Y|3|MEA~(9RH)Ua+6)n zG+dRo&mS(AuaOeLS5wa!>Sf*UKRi!;7brvc>{gij;vr(`#i^vSIqFoBJ`DJQ$=9FzXq@ucD9fmzCEP3hUeW zgi#^MB+?^p^&%RmgYMuaGh5s4PyvZ3+jZ-Mw`xKY3;kG;mK`|67s;76sUSP4A4bEjCrHsoZDtkiJtL zMb3or+4zC0x49P>Kd&R_(+t0&^1{BL&FJpXr#=hrVcRP5KrYYncEGM4Igxq+wug2V z7(%#{lMmfj!|Qw#7%FhqhS!g4+~VQfpuK6rey}f>N5J}`&4_K_8Mthhcbwd$(U*-p z=ROI0hHaiTEQY1^jXL!utN26L4!4dGx865iGFTPR6?9k|{}3&C~{-sT7_&h(c^^>wj8uLhu}q|+2jK6NAcn6_S|anmwe?214hP$xBFt_8DajQ<*FPQcdxFW%m{JYjmy zG~{V(MbG+myY-_MPD`% z*b!O5`$Co3wEIG0A{OG;HkQdhv$Dl1_)Glcw%^^hfB(7zSK?pEbtl#WEAdnl*k|Py zPwsB&B8sR`+9$!g9v||(9h!wwEV+zbWNl{dapi#q3U?Zh{oB|pQioUMK!M>=wG59{ z1bjD{pS!y&U)V*1^dUAQsA0|3_dx-MSS+r$v%)xr)9~(P>bsRjT=k*-JEG3F@11zP zytpQ|Ej4~nNMJlOe6#t4Fwv)jtBktsKA)FasMs?UBm9UMWBs;!SE$7zF4e8KxTDq< zsUi63u!_iOjeutGfVG0JMG3nCWFM3LH?XY^Ji03EUA8M6gT7n(q8sY2E3V>iBe4wN zszjk7r95zc^}%&4@U>MiGjteUs3Rgl%;V=5w)XtFik`(s8qbHn6Ml?0oR6vO`I@wd zGZyYgttvOQ*eUK0IHf)RU5-hGWxY2Z8Q+(E3Xc{(`7#j^5Z_RF?x?kWd}itsvAM@~ z!9qyQ{TkR*`Iu7TCzx;ln@hCfNJ7x|t*AZfB^#XAqv`+B4?g6-PgGD@4{_tKe3UWU z2>n$?zezi;r@iZl9`m}Q0j}W%q@*2L9hd0GC@y5)EA3GB4_H*~o>GJkhVt?hCofaLqmi~x|-tTgd ziKu;c&Adm_lkzxn-qJp4`Zg_Pa|3tNiMPc#yw2r0b5pNAx&3eNo{4D-x4FyQwfV%3 z%#isnZyT@6Q8rtk*3;hhOR_Lqng3qXc39Ml2j7k6FrIq6CcK8v>iv81mtC~*D@+?o z<*%2z$)ZQbZuMS?3CHV937+lvxbZHOABPOPFB80AyWe*XvETgwm4`Fc#fA<&0!z4v zRIgd7Rn0{mh&onF3aP%fArl&G=?+r6)Ts19^gMOby=xttXV#y3t{6Xi*-E2*WzC~f zXR&4Whwi(3pLdm~x~sh}ZEiEzYO%p{gZqh1b63~WpB_7x{jqU<&XXFn&5lz}7w%y3 zu0*P&U$ivLqNlHpvM?-k!2-1q?gRFRJnXx5oqNrMYwRpRLqZD1N!XLB&E+}n3f8-3 zI_+%wfR=oE?q|jBfd@)j)#A5c zATwM4B6548LuCqU$m_YFFzSk3?xl2HMM(x4;UyX6#}(RP8$4J=AT2t!t}FV;yWEyk zX;n`Ng@@k;f2t5VpGIH>RC8(5>qufUcB!Ifyg-Sy%UW7e?~TF}j`g_& zv*11=V=oi^Zu{53yP@9Y5V{pKYIqd+3gr;pirH;SJZ~eAfbiNyp zP(HgiLp*6VS0qlXlGvR-E?N5a%@|3ay6$9*^>G_tS9$2JPfxcM$uD00gryeh?Z{+H z1P(iOx_@l~nxa$aXjCxeAKR`)m-z2<%=bT@`kRO6Eb(94?(K-qNYJkz8;A=fUNyZ{ zIj)5XZY|C`;B}Q0eLi%e(P1Cs+|OHevWj}n=1m@TJ-OGVaOYg4>++^hnLZ<#vo91#{Dh{we-^kc_v` zUWqbDw3`!296EL8uIe);6*0w(ncS?^lBc~D}BM?^DMI+<}l&X|JPcF0>?b`>xIT|*x( z_Cvz>ylNeT96N$=Q1o z8lao!^Y85Kp8s->2@S9QumP}39~Ek6&(&?PQ#vufnB8b)8FCRMKtcjO{drN=z3|@Hmn5`N%i{o*;KsKj=clQWQhTx}>2%bu=auz?PlQdL~=6Mh;@$ z?8a@k8#D{0BmWU(*cYGK*iT6^`IF+7s1Z6mJocD#!~;x$GLX3Q zs8`;H3>q}IjQ6j~gt|%ufwcL&)urSYF}h(g4Hwb?3){UJL0JaD8Qy1+PI8%Rw#B>9 zwfw4?wmzgPnHD;L=KRb>TYEaM0ua06M9mhLpM!4oNkZ=xk25kx@~;BJmCWNv^$A6o z*L>~uON})f)({S}u|YB$^^R*k6v@Y?y*_gqF#wLsf#dYZ2KdDG)&3nJlEm}Q<4SAg zQ?-nLe5vpmt*)+({V5$5etao&oa?9%CohlCT5-4_zfNCYe!pMrhTu80r8jHDJI_C< z;e+Ba=jhh`M=ddP*7kKiSBRfg=GggukpxZCOk*S8&*7MrZuw?A!x$A8czpW+H#+|t zXNs|A{Ml<&mi4uPyq*XCdGV{yMNa3^WEScJgVSp^Y3>!+YZ)zVskr|-fw18M;Uec@ zVRm+QleebR;HEdyOJ!GiuVRevyW>~`ng0aARDUg-yo>dnx82pg% zfoHv;FATo5IG|2ww;cM zN9N=c{q=UWo7K<|L$j@g3#KgzXIXCX71l*-5oaF1k5Z{hF}?G) zQ2t{d@<$s;CO=+nfp`Uy-Y>7f^a~#d3;R`ij=UF2JMTi+MdP1D;eC5vP=x_970oAT zO#@J7`TnAOD!LV+Z_yTBuZ>+wzK(EqS2~(}LSC`DncbLw&9UOgUR3!Wmb=Vd{v0;A&+ zH0?W7zfSlE8e{;}1>&hKJY5OE15CylvU?T}K7fD$8Xi20KjmGoU-&CdUuREmGk71PEl-ZBx zZELtqJwf$?Q;-#Cob_Km^X{xvOKzByOIVfn)|!_G_{nHNa{FIn-J~nYOHob^_pa7I zdpy2-r+1B1vDVnjJ7>xHo+F|7jSRWgVgDlgfKxuU7c;lXRhle z;E12r97EnNO+WjthZCTDwq{DIrpr?8Td48J^VERnF|3fEO{l|i8ILBr8>O#srqt0` z4yl-&y-d#c@QR%Opm`XVo2a>-D?D0boZ)pM{+rSSBV&<&ls5CH#b}JZO%t`@@puac z6~+2WyrM*5x8TJsR^Sh?Gi=chrFSj8dH#J%#XGm`!%Emm-no}MPjb38$oFgJ1$^Bv z86KJavf}2EY8FJeG8b@$pGeuf8>M5EwWD_Dj>3{FHgvCt4`~as)(*UDZT2(A3RTbg z3MR3G7hJZ`U*MiTP6e|ABpljyR?r72PLt&0m&#RcON2dD3-sRx^5Vl zt@q1ZPH$56?73N^+^SAJFu1Y&q?@_R8)#ae7(BSr7nr`$dcVP!dkv=qL9wFISfO1^ zP@q@LQKEWR#+&tX)IBrzyJubJ9&Yt=6Uy%hyA=dxvILpOM2*`@EZ>Q1Ws{$KSt#jn zPpb_)?T4{hD%C%|Ohnf5|8=UQkj;(6%t6x84@dZ2l zV)*LJq&1SOWuSg7{qX*n_9zO=D62SyfI4o;nQxi)ZH^&J`Csf$MmMm+zbvS* zhQcCic5WU@`U~ZHL9g%bkkeXy3G2HEff?({Er}U~I@ts}Y?>6SysHKV++ntIA3&=jpCA zmIU(Aat8ayoA0fcIV<0AWm|Po`ES?o0gv*>Vb{jieUCaXBS#2o(vZ(q4vwYGl5+x!om@x;>Ha7AH?%`0_D+W1uv~l8HLH;k ztGO}oSsuq93m^W{B)pqH7x}1UdmYd7O8 z@BJMQa^(6c+EY(BBdgofr?=dWJ8%Ev@w29%HwoN-sb4g~gNtQO@-1H#R5O(y|JS^_ zmqQ8aMI!7hNU{%?Ut4lhXY9s)fQ(Ra*Rvu?4kdJajq!zOJ~NT35e8!gz!8ba(Vy#1 zaavp4K~VKHtL99QOcs4xKd<}%A+IN#i^PZjWe_wvJ2ISnj<=kt*0$6i+8>r6Xi>Z~AFFF+_A;u+!fh9EK4ytA{{{q^<%T+|Y5#=us03s}z3L3~Dws(p76#Eydj z60ZjgBO}4HB5eJ}%jIhVGbI@65S7c%K}!stERhL5&ZYo%l;M=C2zxrez$DE+i4vm2 zLTxf=q?BfGw8mm=FY7?BP?-Mg9?sIzvQ@(=F!>iwMLH;+9{R>fr35J_l}G7o-f66a z^4v6Nvp<{t;@ee}hk%n$-mQpgp7Tn$v> z?ln?37Ch?)Sj#?)0k|w;^9IiZN2}1T>AqpvrGvI1k!eSOa)=$aS~mdk*N1q)m5&dT zI`oXk+Kuvr@!G^oU@$_qJna~mUIF5nNE?wp@oM?88^*v_2!-hW4a@a=d`6+t!ESL& z*vp#NdSLe^$xj&??Z>C^AmWB1N$$vq%|#^zgHi(z^PlUu-QC^yHsxMdC{TJMPeElz zRM=3>reR~w;6B;yD~5Yl_zu4-3?@$9r$}Y0(l1ZlUm_n{+G^x_GuKi`-qd{r$C^Ey zle|;zCr;lY+1zMM^T-J&oImC0N*v&R(s8Hq>FiJg-ATGeQm6nXR+1I|S5Ktcg}b@t zKJ&cuG$@-H(%3y{poof%=X58oX0Fz_3SO1S?vQEKBU_lZAY7_U(>6I3Ob%p5X((Zx zPqRN52;_gCk@=S)q_vx7G(K$j9q^2Af97YLR@JYL61$?g{lBJ1Jo@ANJ5L``m#}2m!xAuN+X3lLi|5h=C0ONqJP*w zFNZ!3S_*`_xqZ2LlpmmfcCBSC{stg%kBedf#Lm}WF>k*Z_CA* znJdts7x%{~_b;|CEMV;T?2%j;)61z*IWGFUQH56*mB!I($E)yew%_+;@QC~|@1FPCk8 z@)ksjh)!!Mb7O-Lg2$g(pT-{ZNT1;Y#?nKlAomXE%UY%#z`TPJwD(Dr&zdW~E(yIs zuPs+68U9}XuJ3+9qf4iT28Et=Y3%047WLfEa-0T+n|fL`QVwf3h@&sPe>-97Rv*2^ znb&d2y74z9S{qTXz#7U)b`~>g+zjpu+|J$&s#La1*F4$Xu=Y5a5z_TMm3U5D5|^Uv z&)`gC$D#Q1yXrXETSL1e0!+pB5U2Wqz(d)z!U`+%Z{}eXx^Tv39&Lh21y`@s* zHEW%Dke}EiqChtJG>CTc=(BV30<-133Qu$Z<0JvDs^_`wMiWP=w;qjd02ER<3tQVV~5W=nV-i~WWR|h;(J`!`JZ8Mqhh`iR{?MzMV z%msKySup)be>JImL^U}|imp!ZX&C{NcnslE=dmOm6@20OU`|U^erRa&+%1ONr@*t6 zeR2tC19qY1rmu`!;BQEiK5TOp=5HHrlI<`YjGZJ`tii>EUlx@}WBt0B@D7;FB*^WN zP-7aou3q2!EO+%&F81c24SZ|n4EY6;LAK#)1D$))Zj!C{=LhqMxuNLE`xU^Ng3bO#7lcn}DdL}2)H)!V*6y~x>o zhcn1hAqA=OP()zBnIIYV$o;Fk`~hQZqe$Yo+@&dN5svm@bxw-$0 z5_*nf8NB3t>vq-DrX=JfEmxJ?s2<|Z9zFsW{912SZ%Hr}e*>I#ukw)k4vXrVJRT7P z^N0`w2Ad|gKY^HXC?Nr<^&t(}<>)*RtO7j{QSC*`l5WWBdv})Ix|jSW-EwH4$+Q!A zAVOC1I-xMzDA+B_gqE#ooEJPtj4)C%~RWGFY|IRl0F{Jx#OsP zdnH+A=Cm!?-UrCs4^zu3Ph$c#&l@+&wSXb(Mm30^A(d6fm_MKg#EOiS-K{I4rVT?q zfwbx1J&Q$svAewNP{|e9(BJT{iGmV@7m%B*8HFvBBQnGmuRhp4pBFKw2AC@^p*J* znj(RJUOoPh>j5<(XYv#oWzC@}I=!t4c#c|W`RCbGoSk-+b7qi=!AB-{x!0LG<9gQVF)o$LfG8CzWE8mYTS-{Lgmn1WR zQFAko;laQYv$M0G5bzpQ@_@1pE1qRV1x4Bp{rJeZ&&Ro^y5&=JlzmH}s!*w9>)z`> zWbtF(Q6NeBxOYrwz_(0WkZu1cg^34YOOeGKl}wHpdY6>8!r{>)w}@IdmR)t>e^0}# z?(pH%U(iAR@XZ`cN?{*8XEfCCp85tT7p}_gX7)dugA3UCzXhxvY_{?Sje?LPAYrGc z^?^1JcnNq_maj#UpnQ0IHv5mVn+X)|(3#s@mDv7LNab2QFUr=Ri&u|oIUs$2=yO}+ zlC>Xl<$gUWn{s}wjLk@RKY2=|#vgdA1#l#`f zw{4Ik29a4mjz;K>O?I2w3$)(y)`vXmxCAl_;hfHqlx00Cbv)zhP%#941_^(lIsug* zo9oaGwehGd9$G});Qs+wBllSRjtSHa;Z2@SBdkw8v##v7yszJ#y*cI(yvx@OBOxMu z5GLi2grflJ>=nXQLc4gGDu_Lp>G+2im0O0aN#iX@*CMJl-dJL5HXX&-8H+Sf26R?A zF)ar`eTZNuoA#6q{(%bm0LjTPR)oYsH$cnldpi&Ljn~PNp$a+zwZkfRZq6%;opaSAar=C`B8!&Lq#X>~z3>NPf#jXD5BhrELLtVzZ zw=Ao>v(LQa^!t{oHKH6XhpMuUKXh+iy&=VrO-{e-|6JS2Y1^xa_~6NjY}&5T`w6A9 z&5rHfkw;U-J!E+WGLE%J9aU!>G{?61=J86OZ1)*&Ox*q{Nm?fXYp1-;OT~;*{0~~F zfHa;z$2*H91Q{9{?$ue{64qLo$E9qXpVl{H?q0N+Cyv>iE*0XqChH+$iUjV(?MAsZ z3@MCvctC*#!h9urO7Nd3vH2H(*R{^DR{1|kUx1fP%xyr)Gc-&3`0TWCJMbBzrVCJ1 zn_#jG)LwpK@*Gz zf7HZEvfAjYzN2*XCRttXd)grAN(D(n#BTunqJFY2ObF2f;{20q`z>G2#q~qAG!>S8 zs4b*xCJC$TQ78a=K|(3yTVt?ZM104@bodR4EyoPNQ^2LVM{5xdqBM?Mn%ybt-rh<6 zE456*q(+9=y1E`0Y9Xx3GBeuAbCRM(lY@#BA7|{LZHp;9sXR0L`TX7~-C2Weq@Ver zC)J3`r`0cc-{riK>Y*oaCO#!8sVKj~-=26*xb6S5P+wqXbL;kLF5x9-eEz~$q!ZhA zTO+2%yK-JWjPp?^fzNJ3dk zyCXJL?L~3{q?s5A=en>5_M-Vrg1aa5|g~fDaQhW)TCckvfm7 zW63gL`|jmvN*$kWw}SB8a1v7K<$vG52BY~cEV6w&yZ#!(NWLNB8P z`e+yehT91*OHd+4W1?*G98dx&GVk(;`b5YCL&Ib8)+l-UiHSmSZo+0Pys9zV8bZKc z`w_9aW10~x9sBHG{^r|Mwyb)ic_xQ^7#}KF<76!8nQVtlXCV=b6#|Kj-<6y$T=M|i zDMnFCb!Vv(Dl~LZ$Vo+@PX6ljc{u*qeP1?$KC{Nt-Z&z4BR$m)*6hd3Xa=nbR#v}# zd`r^7t@_bH*B)zOv=p@Vuv=!jTIzM|Tc4q6T`|9;P`c!s|D=M+g!p2HkU+Ph$xPnd zq)$h|4^%y(Pe*6tmXbb*|320`2}>E6=0Bl}Jvxwu=}^0XlF*JM&ga)XRD4KW*)WXB zt30o(G8T)h{r^$GJxA=1AStV)xlYW!ZA%Fye|a<#?``_A>Afiv0z02eN@H z(C{eqIeQRNMtz=h<`GjBEatLJ170HSUJCrg88p@ql9PNIs^p1j?x-hwxn7xKq6srd zyLLpD2rT0lMRMvWl|pXojQBmIfWWMfDnq^2xt=ZN_^8#Z%M?6G{-C$}29`_|?>gcb zZ-}7TLcQQ;s2z0W5&gvPXe_V;IkKFo2ZggT?cFZxPZL_}=NLv&A4d#89D~R?Bd88i zu~nu5^T_^X0=KNq4x3Y?xLCHakVn?B==gqm=kH_9qiq3niR6H@BI^J z?uy6XL|C>KMR`0o9V!c*yJ{LjoWhBJ)WH|JyI|jZJct&hH^_%W4IwdYVWlb7_K>(L z|4B&Y#_$V_Le0`ueD4yPm$Kl2a%diQV^7d(G(6v;v4K$e`XFg3*Dyx-J*cgVCZ0O`LQ;7R@BW zdyeP^Hf%wLDM7YGgCR1AvKe-Ixn4!a}Sg-=HLHnvf3L@KPn}isH2*p zTf;w$Fuld+$8&XnDM|p z(2FGP*uO#$m<2lv>Hcj+159XPZO6J0noe~d!2m;}mcjfO;7&AWLmqwqkK)ehke@E= zLNK22n?}MuNvl26XbubH7H1WQIdxCnT}BfbxeDkOjK!0OPA#bev54EWt>{yrX1 zgw*IjoGai)U$yeS8=5OlBLE1+{!Fhtw}=%d11?~|V<~N3m^fidJ7K+WryQCng@z!n zy!&z`P^~~h@Le)a0Enwkh!ra`%`&<}R+9DU{2*wp^tNEJex_4C=>MnE!Tw!0)U_5S zm{P!rUSg2ILr}K));?zH2)QXW)PPu(Rh;Ju4hPEK_A<}$=v+F%-`=UX2Wy?U^TtN6 zZEB{5U3-x|777py?$qBOBZz}iRfALQDs^v5;sQB2eZEr)S2)#$_UF^m^P`fF#IN3L zt=P9t^FvZtKSyC=7^uhxZNZ|AJw(~vzB02hcc2mp!4oQj^qrI)Z|Q0gL43jbrtbTw z+`k{z277uzxDWOP_@1?{)Yc{xe&PY>2XIHGL?V|fSr+`|4_m_;prsmS#WiF9Jcogo zQV{F0`N1bX$Zp9xqnlEOPp5B5i07ZWMUju%-4;yATMJwX5PS2=7pQ~+%}R?tKw+Fd=43ah-IPhPf~KU5;mAFh({_h=s!MhWCe^Bz%@qBj@oMJ`4A16X zoTA_FCP`sKDIdkFb*r^;*+)r(OX1S9IRPo;2Az(wrc}b#r_$sDXqfcr*oW=j#8xqi zJq3{;JGQ9h3ll=CH`2*=7NMUQdnfaiD@QpH+&aBe)dBY-cbfE|D8bzfpqfj>aJX_q z&~{u<;|mjn!+cXI6xK^*45~v)`nMU>W^97jh3Z94r|sX8n*XJ;NQLkKCTW?H-WwXRG2nAKFr1QfDytj29g$a=xvCC(n$)FAp4gF$A#~Ks4 zVkF#DAbIy^Yv41JF{Tf(x88C2YMmzQLst>DAhR37Ks<~NbLlO~_(3^T3bkV-{TQuf z2btN>(@;mMU~vWdH3!SPZHAG;Zk!X`x{mRJFo0+{wMf)!%Ir+*urT+TOjRe*2>>E^B{~nLiUwx>>3=X`r8@I3(SWO2^`(=sJ(6B<*X3hU26n z4gKis+x+}Kv`RPsNCk%D@XzTX?>yyu61iFlg}Z7UY9R9&j%0=q@g73wg9!=@yZVH% z)!;>_@H$jPCFs~q+c2onvFv|GVZa{-Cbcgx_-Lgqh~NB9)lwq+W1U|*;J706ptwWO zJwNzRw^m3{5A*0_l%wdIbTg0haUAsm0t64qmRivO&?ynqnEQvc0kdaR$ zf&XUNRkrbvo}$$)26dimO%UFmumsHgnD49%gQfit(jWTe3H|Z_ocXgb>)fibwg7qC zUxcOrgO`Jzg6Nxu`51TxnZ%Mt(0zXuGI^e;GZ|IR$KAJYdb(5eqg!qV zK};&h%(?k#%7ve(U@CcZPpKLbp{qYph5Z#y;rJI{!b`f&$lMRid}Nm6@jLvHUQjdg za}%W_=~r?p?WmlcfnnnDwc@v`^n(a5e`nKB4#juQk>lqh*r(X7>WpL48?I{$=mm6K zTo3ze+wa^OK3qea9(_{kxKGMe3u&VLGgmxZ2szF5t`iPu4{p58OwmdW99tlU#B;CZ zmT-t+sYt9^mR{!HTAC`pE8cXv`nTR^2#x(OPMa4d2EI#BN}hqyJ_21b&XpUCw%NNRr{BZ=f^ZkBu(J!~5z9@M_+ONDXHUW# znO9~GDB4uD;s3XjxD)z&g&!uFEx=u*~_Nj9O6&7 z$Ee)imlUt?qhqnaW{sauIRk%3>qC>j^||SoRj<1vWKq8 zhPUQfS8&)*wsha*M*gi=ul2WgELp1mKD$J|IBAq36LToHQD=|ffZe9fV}1`RC;7Wb zGa+=^guWSv{mQN~g|Jomy+@1XPOsug*jnL94oc>}BYNUI0bqX294ZM>fLWnon{MQN zVm9P>QiO*wNqFx9Q5NRcU=2w=M7%1}cly3tsZinWUxpJUBKCmLk{sky6p_u8QfzM9 zz8BeCWlfuZZk6jNHX$y{QEj?)jM>9=lf|1oXNVXv4x-0aJ*I8_S8tqjsEyWsqH%Do zVepdE&bzPAn7Vb~yc_Kf8F`K!9~ycC7b0 z7?uz39vU$BrzhdMhV{F)|79qY{@yO={3oB6keoNzI~~hbP49k$0^G?27Uyt zIKS>OoN8C<7~RdY)rC`?cG&KY$XZH$f@fZQ@`Ul}OV;151I<5;yj3qS&OPn>wqWP( zRGh2z!8fY=1xvXtmQsneamaMW!mUKLNXDV9c#ehX(%R5q=fm56z91pI@l&pH^1rtg zfn~l-KQ|_^5Mn_fODE}VHgnz0;B1u=`kK0$W4oj$8hdp{Ob3C01>v^8dkE%NK-j%6 zyn+OhO^sMMwE`o(&|!e*Q_4h?(gzk3y5Uz62^^H7*7;ShMGiwjFGHX609Ng)HciMO z6TD`Cg|%r+4I-BJ3Fp?)Zrxm2^h`D(PJJSB->{ioIbe%#r!665r zURdKKI5i&{-%7mBaTOs}FN*F#Vg5twLj2%wJH`X5d)Kf{xG6ZUVleWbDN+eUghdP( z?|0Ir6CoqQ5(y?nq7+DRUaO)H{(ver@9=)%30;4{1k{NA0dK;k9RukCLsdFS=QcwQ zt}w1r#TBf*Zfi$nmlDow%erj6Z|u|ahYchVsqSL-Rv;&-ZHkAMw^4CuMF=zs)n01(_dVX9oQ^!u*oN4z%0xc<&hup z{xyYt(d>7eP78>(PACKkdNAc@z@u9J*80%N!%OfR{KwDzT!3cyB-MC!?Ya&F6B48` zgY;kb{)9=Fpc6*v?6rPh;6M)OnZD9ajD%xSgW&N^)2TqMp|7yJ$pNXWe@&j7fd}@k zbb|1-5iw`mB2dh zFfTa|0V|C4gzyTFLSsbLpnmP#l~!?^nU=$|d~9FA$}Ox%w|4CIK~VXGd|y;t4sl{25ddD{@QFWQC-)KxK9&o{{Ea?!qMWw5un#c#mrq zuk$UQnnOMoKyI!<8<)rjoZ;@ADAnKGwciG2=|?@BuHzgF_bG~Q~r`$R!G^0>HP zLb^E!^P>ThC-hWoLv*a(I|+0b2hc3%X86$fG_oC6SOJ*>r=VUt+%l1`K$hxZ2;lNt zY-y*sG9ds~V_{77pkuSa1O6ygGTM45+6sJ!7rbj%$4;ta>-&(aRh^?^Sj{{yJs!B< z6wb`hWg)zrjr$&VfAhs)m(%p&Tt(K*nQ-aAd@go=I<0OlO%L~Egl3iz>HP@1K*D-1 zH|y8=I^R7#eiF@(LRSi=G2nDN3I0@na=)Yz#AJeTcc(oNhe8Uoks?q0*xa*UjE|&=KTpQlNG^nFd-wiHOh@{Q5qCt*}TA=%HE6CVo<(sRC z$Fcwq3DmKthN0<~NY_#5q|~4vQJ8Q9THvyrqQUkJpTSNiDD)nFfx1x%0v4PRPmb&1>K1S1LI`Wf z)qF!xQ_^D?+zjxjo6GB}PLUvo6Ht-wJ=f+DG?E;4<+hZngbojQ zthdV6P*r&!5FG4m`d&H6Gdt)dz55xbk#41-#GGB3m`u;8*&-3j;c1JQ`8LNtHgju=vuR1}}ED>w5Q959`Z z@^$&Yf=o<&L;d~zKgJY+dU0cL7oz#$O=D;fFBwdd&7MCoA60kdJ)Rx!s);&%V}McR z*%i;p2k24RV)A1q$1Bg@jGj*v=If(Z1JHpz{VV@HiPO+0+hQ-rQ5K^piO6{*_UF>f z%{uU`L@RV!ZRly{pVyKE-qM>~W0&Qc852w{s1-T1AgaWPD%(-e3QFh7yShmeE|Cf! zG&ts+cYo&RkO9&$MtGI>@v*dAsf5Z&snAUGL-6h*$_JF3rbwuPfDBj278kY%3`m(6 zohr717~okW<}-Dqprv&>MBUH>IWauOD{ud+=F1t_iRU-E*w%sSh7yvtV5vlc!_Vf* zge>P1bBM>ZGF4?TjbL%iw45z8Ljl`NGWfOqk|vb65v93YzYrxWI9$7;fNg~;Gc-LF z)>A7+L#ZL(?XYy5vZ2V?c{XqmUzOd<^W1DwZyu^Cr{k!K9YlbfaoBze+!xj@xH$)R z@`ZtB^vA#POd^_xffMm`*73g;Lx%nQZj^8VR!uDOa{*nwFrF*GQ-&};U^Y7J`!d3L wh08~rPMFDUCr(jF%ZypdGVY)=mjy|0IwpJbIU4(D4g7QQy!E+4vpdiJ59ev3FaQ7m literal 0 HcmV?d00001 diff --git a/doc/source/_static/spreadsheets/vlookup.png b/doc/source/_static/spreadsheets/vlookup.png new file mode 100644 index 0000000000000000000000000000000000000000..e96da01da1eeb0ca26f261c0834555603c8c875e GIT binary patch literal 70189 zcmaI71yq|&_Bf1{pn>8N+)8n1ai_(t(BcxHSg`=X-P__8+*+g*ic5jw?hXNpLveTi z!n^z5{q4SczMS)9CiBePduQ&j0(Hv1z{}KPq+U}2u)loaC3O!@@z`ms zU1*-t-RK_@zu9%3ns?j7M(RxCATV!JM0$_u+z}ffetH@FO4t`k5()XcKaQ1c)T@q& zj~GwB<^O5kX{VZ?oK)F4(s{Ul@Jqx>Vp&E)V#-NxON3OY?@M+&fMNv!NV(M2lpI4( zBTt?ryz8_Id?o?fs8XP&*hs+a0yBGmVeok~h2}45QQ2vY6chPIL@X&jw2SU0$Chmk zEBM*h<^Yzk;Xhxz(biLRb!S%DWoAHUq{xzV*#q)dN&{8Roocb&mX+gcq8=rfPjAr( z-Y|M6}!sk>F35*A+yG=oI)txiLxL3fOe86#iKb8d&#WI zle9-lZ=5E?0{yJ{1ex6<9i(+ibi#Q5Mw5LgD&ZjU`MJ3&kG^PQ=?ex~`;8Nwl#Y{5uyl^Ag>E-#uV!Y(uTFvo zwvyxAJ?srKvmUC{B7PeutTHSrC6LoyY?!0>?Zy3mb~Ot+T~0Tmq-$=6xsH2`1OU#ggKIG%g_tqUp6dLMYM! zNv$prN>LL>{~c~a8ySR6r9 z(w*w8Z!sq&aYu$ncs5@^pKb^2OZkk@o|0VSFmRJ=g}9h#x|0m}r$8`3xWi%NORpi~ z21pLUY)g9X62DQhLb=*&O~`KvsAZA9_^(mNKy_*CI_M-Nsm2CLkMSms=+ zEopmt-ZkQ5%%JuWZfL~IE-J#{PpuH{aBenkO76tu#$=KK-2oM48VigQT&31+@Q*Tk z9TI-(v{1tcjt-can_0B!u!(~ipo|70m@Yhovk{2h#5$Gx|$S=t3(=IYJe^q~$PvoFT&lFY>J0>Y4v>2RA&8ptiyVoPvW77Mf7hT0O3!8PWCbc=35HGm<5W0X$ z7b{wHx!HNyU3CmvAumPmA{`zML zRBZpccW{QIdoN~=W`Am*eE;Tm)-ZqCV49&s!(#hL(B7L>x|3WlX0Q8A))U@CQ?E%+ zu_ONDKb{BYzFk$|e7*}l;~V((N1v$ch|Bsb&WpW6nOl^*&$sn=?{3X*+3sinWazn1 zUZCSV5q`pnyNl6=9)gyLc2)t^1!vnwkX#8j8kL_%GW)=wc5A-f@A@#Vday*F{qW$iQ7yURpciY|Tl-AR>^CviK29IL=% zQTM!GmW9-Xd4$ln{#!`7pDxT`sC8x0whY@1rW+{#-gSx@RYsF^L z#3rGKwlzlQal6#9iQm*2_19{wcD~R#esr%ziyKI)TL1QCaFn>P?o`CuxWv-_-F|mx zc{m=|8zXuHC#GRu2cLz?TmRcEm?_K&=E}*ZQ{$<-==A3}m3ZWH0&xagq!vjrNQe06 zhl3Ae+Yyu6)9?~j!xLY--?Qf^^MUW4+$}!wo!0rL^Ne%XaHGk7`Xc$@$)a1Sn@+RO zT`j%+XrAfZ%JB*pr648e9r!|~6VHrNQ^3{1X`#xQ&%P3Jl0i@!*O=Frca?Xi-&RwV zFxvmQZ^6Ov=b34p*;R>MuHBej#pjXFHG5ri0uB!vy`{el*EJ^%erPr}dAa+AsAA_ImHdaFJ;T zv9^L2R61xf{C;K!`khtvaE;uIYNs-#@^Wx{uoKWKwbe>0!Sdj}q27}-)E?2UBBvbn zmL`vi<3oVnv0kX&)sKpr;F?Uc)^6ds`MKzO)06(pRrKy6)o&SaXdJX+X$@X@tJzNV zB}%(sT=l|@bJrWfd_VA`(s|f53tPx-*}LV6&v2kblT?%w)V%3>1ZV2Xo9Z7*(k~G- z6!M(h+8(vv*oWD0PS2J!7&#oI%sU^4PDhRl+}UR~ch@gbmfabav3au7aLl;f8|16{(7(vo>_h&{ zUC~4DUim?{-T7vt8-R*l+MIl0x|FxMzFE|^nCMsPYj=O$L$ly#K>nGA!kxjF@7%Rr z2oQG@mzC=x0fL7B|o!i z_zd9>q(GqHOJe_8uaQhaWTaUaB>59&uWd}J_(fzww0lCe^Laui^z0H+|IIw4V528s zFQ(b9FGq!&wUTJW)eWfdRw?m&CWY@ockkINNM}<;DJZ=xfk;hg{-sQ2W?x;*%#3$N z-qRqB#0t#xNB65gMPey%tgkh{e}0Ika(?9}v?^iAof*JzxpB*a5nK>Ls31K@)G?6IkSUO!AZo~nM;w{*Uv*jJXGo}jwWA;*g;^t^{j-lMqI~?sAfCrD z|0q%8LXiN7Uj&H9Jsai!^+ttfqyE1-vMu5rl9Z;5f&!w{G;=aHw|BO3fGPuah7b*y z5P4l^BqS21#|K$KgYggnf6Dr;4pc`)S=h|Mj??75gQ+>EyB*{a4w9(5FrsQ_4mF{5 zx3jf(7Iqh-|Eq^EqW;*-MNj)z7pRRGy^e}Ht&D?{IW0dYHzzl}I2J7}t*Fy`3t^2{ zvi}^8_$5Yf1%*O{xwzci+&JC%I31iUxp;(xgt)kQxp;Xw5Is1YJ?x<-?i}{c4F3S~ z|KPkbcQ$jfhCr}en2nwUDcK*i|k9})fQ^A9@B-L3zP$=>;&(?U#;>+ua24<|R* zzhEPViaxdqt6RI9+v>iuwnMBL0*5#s{|nK-`v2|CzcK!wkvjh#DJ1wGBmd8v{~r0y z+1yFS!482ED*kVF{S)o~efiIUqFj$N|3547k7fR=6|v6ZSfX72;+Z&>9m7gCf{h^S zS88t&C1R8P^{GL8KSMl^CF1!+CLNy@f`ssf6kbWabw}RK0Nh%<_BgOi%BwsD(#s;V z8hv^q#V-G57rjo$6lb&A@C8~BElGzW5jKI+7c!=<&x~gAYPeum&M1YmbRGUHY7Se^kuz2X;=t5)SnKFL5A|T zLOrb|R$DXU`T9So;6}^G4S2ng;g&fF zR-4o8FLNu%a6O)g_FiM)`x!CuUo`(U5n57Y=Q8J~X}q4!Wd*wdDRqCFK6f<+dumbJ zgq}m_H2oph!!eimFlblrBYXSb;RUE79G(VEK^D1WA$a@=`vAe;CYp?kHqdI6Jv;;2 zeR2){q4RfqIqqJC}8%{VFuX$0*jVw2(?@$1?d3fZTcrnyEP! zepDNJZskRBecNj|X!_wHE$v(;^I~@azelD8v@7~6V)j3ffH@vn1y<3ibISAT*I!uS z5ILLBoDZe6Pqxbwg3rW|2hoi@j-)1XHTw^IF#-NUaB^_u=TpQ^Exs9YYg(NYJNl#s z#}u6&mkl!2SVgYz_?VZYucJK&3y3IV|5Xr&YN6aIZba%As+QelBuE3Y={yNBE1 zho9qW67yVm-Bqu{N$RE>_g(f|@q8h$ygy*eR`U)j27j5mK?mP)L#JM*{qIF3$7ddQ zXVVw@)R|WUmh}Mf!vHFqT~fEfm(!QQRBpe^CBB*ufOcV$Zq%>NPh>#>B zzSCxQ`AsxHwyM%BWfMLz>U|O)RaePD0mI&Z`1Tj?_ZEL4rEkjMiA^kqZ84^MANSg4 z`J5klv|pM5&$-oKru^qQU`1>=Rk{~}-swFTO&5*(was^SU-{2XDrp)bD}^zE=bwq$ zekW=hZ6(!6Y)awv>8$|qi?mB!n;-5D=EW|4uevUHSk^+f#$>%o6q(Ng%%?6Eyh>@* zduJ%PRdL{MKD*`J!m6#73YT&c*Ke9ZptR%}@r`t6Zy)!Guhi>6iQB`R)DOUO^<~4u zwuhbO`+lGMlPovHx)q9S<;HuiHy(9S&c7x#T&oq9Utyp=!nlz0JC!4#x=uV_zFB-| zddLvlHNkV9vv1fNajALJl7d~k;As;LtsLZc4x@6_;Sy&R2wD+%w#eNlhIyu*R0Pbb zKeaC8s!zv$dEZWRKZ4+uN0hUYd$j(e{NzQf2WNXJp}nx}z+p9J`m>?ll|pWNz)h6M zKE&^`oyL>u&H4Nv!I>3<)vBjtZxnbNgf>F*4% zpt)TFG-e1=;h;ydMfA5?s0Rn+olWTvbHzGn`ueir(DV2+2C>qInVofn-QM@R z-|+M7P6?pL^n1AQJ7_u#;gX*)&U(1r5Ye)!!mM5NZIDRwKf#2=y~p1}#&h@glMR%K zFGj4{;N8K)U9%YGL6F45iG%|^jqXCM9QD>SV=_c6(YE;%^ZlmFAn8vzSE!7n~H6Y<4tXFO>kx_J)r(mm-lz6U&-O z;_^PDp3aWrwz#wCVbT+sUB7MIvsu4ZOhqf)&Jld=%~X`|hUspdXC_&t#N9g*%g zKio7^MNR8kW=t}E;fe;5RpR5!8v0(X`>y-rkGSsE@09Ed4SPh1Y-R^B3VCuQ##cP7 zgmbxqLxMW}mnjzx(H**rjLx24G%Vf??bXcL@5|6!t55JNAY_X1R{zg#YOkMs!=jpT zcZmViGvDzomVTZn!|PiflFfz`=~G%;9lHWxhi-lsZu?pHTUnxu2;ps;b$i(@u^@Ju zgGU_ap*_ooo>jKNB5|egL#B5LYxqfqzC+`gs8F2xMw$Hm&l2PN<0PMrtanNe2>wer zbg_sZVK!fX8_JissgyWD1-e|7pKgd9c;-fp?$B%_RU#x1S!Mp$NMJMoHJHq~VuqGqY9ZQC6CL+a+6{e~TRQ2#FZ&ozor{dh>nEX$D3*QHMwQ_sDL#c>Jyx8 zzkTPdaG)essUa@1Og1?}*`*z&%>H&1fv-I4PPYZE!z(N;DmT5mg$-OpHE;YAmf6?M zx3t;I9z?56>AD2^USwW`8{e)hd=GiFJnf5q)KKJM`qX!=gDZD0UUPZIX1$z6=!M@B zx9t+AGkFXHTBdA#MS`}xU!POu)=+T^ZwVH#i!pPf_KCX*w+lKASUhgALu1w@H`%2V zYX{$xw7bUJEk)IgJN7KUK^vph{)nNZ=G*oCx?!J^Zx`lZ^eQ$0Wp{8pT}tOyLu6DC zj&%}H{Bmi*mReRl+VMHW+l+734n)a{gIWNM7**;{B3DRne>iKtPl`D$!729HQZjZL z*zP@99_dHu$5AngIt3wiX@L@(sP>MNY*7=i!#+YSi)FnMTX7>ohm~wsaZ9*JJo61Y zo}6j_fR4^YzP3`^H$zl~Dsi`^Hg73$Gm~j{Q&k`~h$kQ7G&><<(OD>iwhc=+IWAQ5 z=@U`IIrhFD&jNOy&!0>JK-=7hOoYD8<&TH4zNe><>o8%7S@`VTEF1dV{dVcSqUDX? zo|Ub?1Sw1KrzZYAk)l|#V8$|uRARL%m8v{&>t_n|4eM#L7jz%JXC;6{kxynBhaeaL z9UbcvSnaRQ_~xTX2dJ8R8=+}2%mQ&JIq(r?`xE(bd0u+}<-lff1)j(s?(sr+wO%S? z>T1IGk%$O(D;URSI0<*=EU{$o#(|z4)?rm;PK$=sg z`^u;NF(t0E*A`n&^V>lX)?>}gW1@@M>82V$JTzt&+5%od64F&{>gQ~@T%Rt_%Y$Zb z7MFu!S|sJ+*1L+T1HQhi17i=HauUb&E-4DTp6DW>G;`YSvb@ecENf_NTffZu*^G`p zd(44m1^Evv4l;;Qpn=vqkYjUL=54)X2KYe$6$vMUeKCwMn#EjG@;d6i zWRzgDq(cH%%Op-u^q0j#;)g1ngxKiUezX^G8oE30C}G-v(KI&OOUy3}-&hGw2l-5( zUU}~<-VQDb!h*vBd^T~i`>;X`u5#G5*vgnfUBi)S9Cu|X8UzsLjVr?CnFkgWeoaLK zP>w)ak`rO%U-9LQZY$)7QC zYpso~<>xx@1gUmQefDgk^4!p1Ei2zTrGJX1s~$An?9VauZ}}h1nVun4mf+~{uuWEZ zN(wJLoAZ0PyQ}T1J5U5xn4X745oUOu^KD06KLgqBuHNjXcc^3&DplTglG2s5Rr(-F24H9_$c#s>)oPboJ=>to6lrYx%Mg8e%7yOPBJSpWU;Ol##IMnNS{0EoHTn+LpJreT#wX1O zJ_pW=mt6w4-zm{8Oq=Nj%^VRfYH~EfXFK81#b=FQ0i!Y_q?ip4AQcEwO5B8R(gJ@D zIPKh8N3Z5qfMCEnAbSA2E_&g*3zcQG&(DKIru}0a-63jW_kp1i1=aDQD*+Xi2dNuviOA7#@q6a5Ct(TeEodND zp~GBOrF1vCH7>21}1KV|443zuz? zBH7T|Oxvad7`@4_L6*BW=kn5}5vrWXx?N-Op%}(5zEUB){Bf8znuG8mpvzo8xah-d zuxexYe@*YbOP+>GV5s)=k9HqKuP>rC=5?{i-mmpc?+Q*H+t-eFyc8x8K$}I_B(7e? z`JbKQK(_uiP8DV?szJ_?d>?}%+Iu1tHfcNii1J6bz+Fs#4ESuijEbzWpk=zM@-1(` zi(oRiTvraO;L6hJ8jHk?8%c1euCXv1LW3#1KlncI5mRNT8XJ%ng*&8`)`DXNS1^Z# zMKJ^fIVL~FmT~uV-yO+M2NK-8T&I0uVUePu{N#MSd^(=ClHRaa%EC%wI8M`9s3&~! z<+-0D(d&l0Lk*%P|H>)9Rv?OazzPJL6kX9SuzNh9NozcFw4zHLXJe2aJG;|QTX3vbzdQIvx*;dtIi=x#aM zHAb>?kT?!AbnGPd<^ni<;ePZ@IDh4xy*;ywWyKyC# zZRc>P_VkT*CkZW_WL~%vS}Cm|b>t8KT<@7S$M(BfZcR%aT0wZyBN2$hIC!aSx1TEn zNBR^$IQAm_TYgKph%1I3;qbQ;GFD5HBp`Nx?zCZ%Im`}W)Hu7F4ZWl?0}WVXdxs}Q z5d{RfNQcD@XDwNAJ1Z2Z^bHa?gEV10+rwFqdQLAW0M}3u?xg5KNqOokYtaEo#^3>j z%+R3|VU+v02vITWj)k!cFHE$*#qfaqqCSoZ`^o*|6|lb3*OwH!vUPYehY*nKa2`sz zW$>!xC{I^x7c(q)T-UrP)cbymC)z{Vp-Zf)vbQ`SDzA=r2NQj_lqph`$L@(L$i)yO zmF_r$J;a`e^PFdHxL6`=Vk;=$6o_5f0T!OiiyFsXu(Z2s0rms%yJ z72SfJ6=3(0pGT0Sgb_uih6x`iyqVn;l%PnJUjyW~d#2B$L+6iM@jUbbXUmB|6mZ4k zPD|Gj{Q_MyJGPRbO{S=)efi5So$)3ZFR&D@;kNEZUm&X_hGngJhOf6x9i&Gm5yW~t z4{hXly%uIE)AJqcxJ`BRxd$1l9)qn` zuMi|VUDt3(4vVj2G)hzr1`auJpUF}`*mi_cu6qEgMvBW%6UN%8r@ z*B^n)#~u{Q39=P;3o(N;t6R2@=QtYZEGPkzoJNIkcdFR&Eb{`{J8{nB0j#d_+Vb6v zpo(@McRlW-dVEf%uGW>R?UJ=@&0V~+Zi|&R@BOeXurt}>u;0D2A6a@jd|1P`u)(DJ zcuUQ%KFekLJP(vMGHPyv6vZA@3DU>|XbPf5-&GgunWXunfyC1n!X$2&8)w*h$AQ>< zwZ*hKDx*U$MF;qK!<8^BdwrzOSU@^p51t`GI#C0K?T^FX?t?4Wg-a()KJ0+QIm+2k z>zvM!gIFuF$Y=2-eA5J1dE414+QhCAG2>LP4hHJcZc<@*&5S&4Eb=VHK?@QfgGM__s8|>Nave6jFLh#@^yj<0HhdJ4W7_`46HEsu3c4 z>7vr|bH#B8hPmLT$TQU*VjsI%w@r^|?veaj-ZJSROM+SAzKtf%3EZ;apx|N8smfcE zNEcrAc|;&{`!S$QomM!M@)kfT_jw49YhEXtp>@|~GooX&D??V!(4tgEKQ;ZmQCx&) zclr+hkjFR6xP9FZ194U;3_B~zmuP2ULqt?sC_n)|rClzwE+9+dy7j;@&sRNydPtel zX?uT#IPBtS+mP~4(Ob2LQ#CKXfXu@t2_Y(O-S5~%>=!`u&V3nxzU_`8@Vdj-cbFYJ zX*jIYqDlIu-nA@t&>wd%>VAIt@FZd_yBgxbTInr_DJ)-*x+oNt2y~LCXhua(#VDQt zmxUrTs6N3%&9XLaQQ&%FS<2L(ZsxTu6Bgny$OrHCR~@L6B^|z)TfDEy+A>9Ao~ogC znRs&_5ur{4@GGy%%9gNXR)U*V&gB6$NrC*RDFY)J1II&>0AlpQ$EZ}t|8-9@>z#iVTf$(%X(O?f14n>^mz7{w$JI?}C z!`TNDtHz3;>VWzFl8kbxT}-N)L;ac@)Yew*`jTuVVR<`Y@PK090D6@3{QaLYoE4&& zE`rymP$oc1&Kb9Bdi@YrOx7$&8BT=|F!PBRsla3c_IE4 zWxiEbR0Tn^0j%=$c&P|(Fn~=G@?mYlq9%*`MxG^e@d?nBOgfB7nN*Hjc~uy49jymCPNvTPx@1lzfW(J5r6WV!$ zi^w}L@f`846c2G-#`t-c+SvI5##e=2i<4|`)2&;W6SMT5crlmp2D;${B)+g!G4aQc z!v(M#T+{gGNBLaXI$+AUmr?Bro@q-zGOSHo&)5k^C zwQ<_Hm<6^==3*$0t7LnHz0ghWTk$ZYcgnOy=e(-RDyj(*aP5RWSL#dj=CwOzGXAl=$TmsPRn9%6N>)26gq}OQ<#`9 zor?P~POW!R;_>*_I3FwB9qk1|MW%mY4`a%+1y32Oz9G>AF$!GpGi3MQ3#x>Yf66i57qv$m{ zwan3qIbU~O)!0$^AlU5G=DvxO)U(m}I_-59N6lwCi|{;hl+YG$M4&Y@x;Ichg%OYz znmS`-K>}@wwW!muRpbBK8NdI2ytUN14ZaxqRnjt&+?EQ$4FR^1qp9%WI$GSgH=zkH zuRNQW8KP4g5LBQY8qiJ3!3OA`E7X>U4|9ypOw;}D{(KT1FSikveGwY^3t_|f?yP(Z z<6F=9bts-5fFqKCFi4YXvW93Yot>QOjGWld%dZ1u4lv-F8~De#7Ab~iT+QC-s%utv zaSPCOsQI}rLGrwXPy^5nT7`umi(b7T&9L3h0bM@9b5*aYH{YWda5a}D3rpiSfQ2iP>w2_iM*tBrP#+-j z8!&`K8pS=Q48dwmLndpZ1-ky!PC+FVdZxI_iTa7@63f+=YUySJ4P6C7;UO>;QtwGt zBjEDO#PgC>z*iHetYhIFotirrfH6+%uTTqXAy?hcPpUqH)?A8?(r!`Wb~a>|OMiux z{48*^`deH+dCxMEzn^nxzPqhs%~Aa?@0|$H&|3$a(FfU{M@;&#P_lT%WvBzyzQ};E zp-?lvX+a-Et(TMGIho z@AA#rHW(;JzQJxl&sj4P%0t$o%vY(J!_Z#L=V8kh)B3%B5@XgRhi^c`^y-$N;6wi| zn`?PevRG$ujn}Ezk&oZ|8IdBYu#{0 zEr#V*9`%RcovoamNld@lr-RHkeKlW=>k9ye`6}Tip$26rWJwV zE4&E>T`^)`;O9M@<($a2;Y81_@1!Vp%^+s&N1l8ZT@IDzUgMQ04r$mCidVh>bjujxYX#>Bd!jAKPsgQGai@-?6fzts0JNUVY}Oq zOZUJi2p60NZ4#wrOjXvhD+RjhW!S>NynJxxGQ7l;dZCVBwWqZJAO9>O&D5DmI$c(^%jH?GoIzDTO6P37!QJlM5pM9`{I;NeJfu z^~W^^e&DqnLhAXn7=4LoZDbjLX1SQAzhDa>jt&I>MY`P=$H}Ga$Aq;Q^Y^oFvS-z1<-$fGd#Q?K~Fd_f5MZ3uLLK zMc=#N!;#->vcmVK3V?$~<-Qb;CO4PLn zJ~=kms_(5suoQ}y%0Q zCBapEg_gRGr`KntetUS;Un(UxBBYpvu@QHS7|!1aRi9ig3zmbb89)BkDaeg9*mlsS zyZuw$6EV1;NUe=fXd@6BY!DIi5Nl=W0OO~5_C@pafcns&KIPkC-*di;y92*wmTkCM zv>NJ54ZbP#+Mwt>Mac+ixjGY0nP;O`zw-1USExtHvJpZspvlF21c1t1AVs} z8>IWIdrjgFykr2dKs-lGL>wSRH!Qj)D8-9=E!P$X>~}y!YDh8!zp#&5#yq*>-K5GK zG6=iLz6=dQg17RG!NYxW!vLuH+c~yM7|mwNbI&XHZTn3oBhk<$Qby4$^DWs<>CP+0 zzWl}iM15mLNKujBaYJ;qwkgXrCqz1GQ>psk7Dg`kwX_5Cp$hlcI9ak;SzIMYZgeL< zxmIiNG5M=?0Yg&wED^9wx;rOtf-CSIElaIR;xP<>h1eDPtuGAdVIaq3{l&g$2Ny~S_53{Q1fM;I25Ec~?jw6VI1Dw3` z{?IE`gIe|XIw39Ed88A;bL238AY%|O^u>&}76Ef_s|Cy$_C|ww8<~p1$6sB?Xv;su z`r7C~tc!K2QgcP#+VhjtV+GL9H4*`v5B0TU`CiJPBrxG<<`KUjgwBY%G!wn8oDmouk69jfUc)l zwMAudq32qlH}0lI$nr@k9{ZRosIOFjnw!bxo^(^0wJv4tb|^MKoP6u})=m6TH-bmj zz?yC?d?d-$z6V~5b2nO0!u_Bnr7Lm+Yr@luk+f|Ss{(chIXS_(!X4Mmx1FkZ)BK@X zJZ4}N7S0JuY834xPTs#F-Y#dQc3z`TR1@@1G11G}_C%mK-oedtWE^2}j;=w${Kq-C z&9R)nmXYm$6=)z*k}b6n2IuCZ!Rag7MetRog+SCIIu*rBUrqdKZ6ULec@}ZY1RdLw z4?||b0`OJ8NL8h&%w*>RqIz|jE<$mvnU|@wdJpTmi_#BWW<2I%tCmqSHTR#wA&cT(y+TiQ zVPa;K>n+xnNTC4~GZN2T`(Nf6Z=Sw1_);PvgJu_R#!%6Aki~bR1W4g^B~Dhb=mZRF zaV;}Os`9aXA#5Z6R#Z~om*v+&U^j14U=Mfn?o^6Lb@D1_b#OfbqdJ!FU^|pvbHHz$h385D zG3_s|GuLqb<8m1-8i*RW8VglJbQ(TQKBX~Z{1}x9LTFe!bqZu$mN%8QO2Qp_OjHUm zq)`()LWDpbm$Ky7uz33ZkMO#I(o9D$j1-H?Mi{FQamyEdqL`xF_dG*f_ys%Q$4*St z_>{&xwBmhN=1c`Vf{02jeWR?BV%t1FPL(XF%%CR|xZ+2Xuh`b{XH>33_aNuTSF8|8qkO z@1?a)Ta;7QGNfSRfd|Te#O0nk(tR|31|HAfRdNJJ$Q#A0%rgE!^MnZY8eGKfdwF~c z3rIy6E@)(zVZ+Km#b}}G9=mFAl9FPLi1$KMw>E<_nykx>=UCTs_(I|(!AQsj{w;;I z1MD5bH<{oLKjy(!YR4lLLu8bJ?K0!kmR&2sxhyB!R~(fe(_vgie2f`v&BW=p=}Qnw z+{$k&pwQEJ$gmdhib&_@uAqf}Mmri!B?M&AL%kJJ^^Q5ToqK$Cs@-IEByso6Bx#Y~ z7E2c+-o|%)=v>ktek{Z;!+6XCd}TIp5pkU9xn0IT&cULT4-<}L#c=?w5lM4$;T3(Xmy-W_u=<0d4 zf)SimI9e&V0_V1R#-iqYsqPMb`z(>rC9|dZ=Vug4#*k;RIN^35Se9mNu33F$iT0r zYG2XL6`VhC0o=;bQpoKrex5qL%Ax6=1noLzH$kAsLPF=|&Zz>6xJmdTPcs9UKqL7r zk_`PKg-Ou&x*GjBoB-=1-FztkZ5q@yk;Tg`w!6vgyu1q~;{?(y`U3Q`E8^viXA1I0 zuEPusDBi?+51*`xx+Y$n;`^JaM=n@WWJgFWenyjS-iQ&Aplw%%xsqYF2 z6w=W^0p@~enXSF2|pmf|IfbK}(Vw<;XrbY6W-Wxq4p>)4^uEz;k@ zw2bRZNtpGOJDqj(ysy zSN>nbBg=qIA2UWxX;e~7{(zH~Er0(F1Q=tD9mGgW7ya&l@)1g@znA=)4_-oO_MFd)gVhpUCfI_TmJ*Bh!1x<9+@76@GcW zi3`IwpiixqK(YPlyN|S{F74a0>QvYEKkRkpH;eG4I<`Y6BeR81=k?~v?B{i$7xQkz z<7HNkFVh+=jGP{Vcv8hhp&i^+*I##nJ01tc)MuAILfaIkb1b6N63FG;ESkhEqO4by z|BZl!TZn!A!Nu&Si`gh5t5ZlVDDuNVP;RLag?Ol%aR>(e<50B8oP()HyiHh7Ulk)@ zVtkib{P76HhnW7O&U{(@)3;$w`bCthrIg{EzYg0niLV3jS%P<#Bl7`C#)=M?r>i+T zpO?)p)d9uxo23B6T6DM1;(q9uUBuLm42zuYzA0*gNI?#uJYGBwvk)6yYxVpHqF>K zhVpi0iy^co&Dh(4XMEFBfJauKpyi(#Ze4)GB>sdqIp_F&Q4_wz_si?bRc<~xVv zmmApG8DXp&S6W1J?XPr(&yh69)#CRpvJD^!Xx&#+C2;yaXy9p=7znk*bCbwqQ=;+1 z!Ke~~AFPB;?Oe(4uMbidDu=f6X=f>3}jw-Ew zBjMVo;CN?ERmt9hllFJ=WybDA6E^CXH`)vB&bx-ckvJ*jV;|HGBAL|$ov z@y@s-hlF|bAGOYEZgEKAX>m5cvv)z?RsCg1rQSP6 zhTtE(E7J04`~&Wesi=BGSGM4}bLE-53ZIyPX+D_oDqflKw!h$zxD^Y91})fJ>8UCj zvo=o^tm^spQ4eN)wz*1eQplWUv2kk#$Sru|81uJ?@`D$C(SJNe% zDMjd>`^LH?#}^>mH}pW)UV@_$q*zKL8C3b{)52SIk_%SPjMA%ysGVc0t66H5tUj*9 zf`9%jJdX7{XuP=IsDIxZ=W%9+?_nDMhf3A}idY5ybyD)fejn{)Q+}6}sp>|<{(jb- z@9!?O*1`>{$5_G!t#A>4ZfI#Op+KosouuB5nai}gFIhEwFlvBJyLeRpD%so9%d8P# z;oPG;Sg#*>giuu3P;~m(Zlt|l96w&&$m8Mr=dkH~WVZPp6C*OIt6;y*C9&kprtV*a zDy`t0;IU4zNAyZ*er&#<2*2xe=UL02qTSu zns0sMbS0x(?!{p|ZuKv;2tLCQ*qer?uTwYk#B!i3FxJmNwh9U4 zyO2-rQPMyf$k@H9T2*#ZrE& z!E9&4&RJ!ES$J$k{@31dKC|&)_5GpNt$8bKd?GO8%*8iNT*ABb)q}$xBLXGBW!juc z&Aa8z^g;eLsPg4&1bq<(cX~#j5y$U{sCj7mHt*c(s_t0h7J)_WXvcdn%k1DedQ;Ik zcY68gvGbaV(r4qjqE%L3#71@Ao4nQYE%`hZ4%peZs6>#M%kZd5oIwj*TyHW&6h$E6<$a?=45h)%T zPE7iuhFEan%oRl8QRq(TvD~BnSv=ABgIb~Y4+(7}M7{KE$NjTJV(EI`%0Gq^cx|eW zVU#C zK`ygoxbC#>Se8RV+nDo?9kw_O)|TWrV)IH(lD83c5e+eZZrrcbiJ14M=2hN#Lf`cD zKk9nTQ3aQ2$L*fq)(H-r%buQQU~?Tp2f%m~sW=}PDiNY^Q!f)%x|(;bc-2rNhk&89 z7xqA{YFAQnOi!A^I$!R+ysjwUIAX@@G^Ti#cx>W>Ik+N94VQcR zDsJZ5NA6$0d;Md3Yc}NrnK`-$v`hxwDm&@LN}ZH@9f9Kd`u1?%!R$iG`Z|mysmaD` zuBTyxT0FdJs&%Nk*5;c3{LkjDo>A1klvP7bt&K-eLLo;q$euaN%&OlK8Y-ED0r^ll z0Cr=ojVvzlD>dx5(F`R>{Yy+K%*-B!p7%`?UD_JJ9c&rotfW11bstKLB{+TpcV^E{ zwxx~I0y0YuH`^9QU?uAz6=9BxkCKj3Hbo{yE|}AzOlB4FzbG;^WTUmW zy@-ES_~E;*@}LPzNp3H1Pud8_T1w*tNa5>PW^^VpRGfRkgAOU9=|-NQj8V^e)nUI- z>B`37MmF19&fX+9*2c))HW_Xu2`MMH{r?Di@35wtuWeKjK_8mfP^2SGno5%rKtYry z3P^{5fb`x=C?dp$D7^>)r3eTFrMF1$y@lR;lK_E00%x|r^1au4zH^;F)FkdbduH~` ztb46>ueDkYd39kgavt$sRgqk4yF;^J_4eM{t!sQ+Qfdl>N+CDi?zSm8^Pb%!K%tHq z?3m#w^$nNAnlb-8>g7OtZ_ldC7!9&GKK+3WIN1n1X*AVH4T@Y6-k9-K@D`5vCnpjY z(bit-{eneOG5j9DPK>TKEG-}~gcE*`tZ83wt_YoUrZ7I>R z$tchn01doV0{E&Ihqt4T`~0Ev4yU>z2KLo1k}&g*W%T}o_^G7Y*beN)?N6sqSKQ~y zyJ<}@&{07x0!a&9uSzJEVIIbL)Cfg@LoX2=77Huhe19c@EI~At++@B&Mwav78$k?l zOYXvzYQcqwe`qci6WQ9QR&mSzodqje1Rx#2hWVFdGG+k>BDfabPBq7`JK5Nf23M!g z9IIx(ZAbUy8j zLEeqJ+posmxaO}>Bi>=w{N2OukLB)#7L8{uLkj`S*N#_m!CuzxLZ6*aBD|Ro)xm`0 z7$ySn&nJRyf$6@A!T8Xr3(2w!Ol!Q^PoAvh{xvZTXKQk#3EsRgZEWzo*5w}da6g^u z%CSjH|1MY-Km>vZhPVsScjS)a>pp_&XA)!78#uOTox6zAV#X++~@Q`VQnkv{BJak}PK8j6;Pc7$ z7ZkqoXc9!u%~+u&#;uP(sNV?4p6_Xc4c~tNDIZ43e=n{6_oc!yx^1kZ%_{%YeSOGu z*RuA)wM%5r#~vO%^o#$;cqq_4V?G0?gsKQrL04{t{t#lsRaZ|3jCt+vNxt$66G02vG5tngqQzec~^(F4DZ+M#7Vf2MOC zoq5|E7e9v6R>&gvbo(z3?zkwN>$eq{)}1M&JNDHEslQ#jZ?suX%V{kimM8o?jN1?G(D?3(qxzmc3dAlpLznV)@Z z==bi*1dr*!;e$%crkC5ZxG5K2ud7=Hmz|8C?z%23it9eG@k;`< z{QJFEe*LoZdm&X<^lXjFqj^)oZ?nuilxM_yPMzr1yIajQYw;V~R6F#5TabInF!R*E za{;iB1vz9~xtrzBdUDV_!4+pTtM55#!?RyfS}lmIzBK9*i-{uBeqb%{L+lrjyyWyktSe18bB1k3K4ji4W`^1rc zyWg7O)G8FBoBZVpssU$pCG~7!0rL6#mY{F6;G8mEygjLXEWYR54cm7IQ6~?0U2`^* z+NM126NAjf3Yc<{%Z%0(m3EvA?__8dr2vb60z_K-K9nNNM@=XIy0OW)<0!lHqvC&p zuCL!`TK8nUdUY^6D`hpm3fI>3zySo|Zh}a;Y@M^zHB|-AYRNjH>DxeX?9WrG65|}k z0ds?!9!pi-q{9k1(OU*tjCW3E`6rh=#~kC7D)hS=YPt3Nu=!VuZ0J6_|2|Dwwj_5# z2q@NyqbE!9{zhYn1yNm zK+*JDNGCPN{fKkc#lB(p9u^Rg%-zLKPY>SoJ zXL^cNK2dxDW8S!7?Ri7DeO*-{A5otvEU57T&e$bbf~~7weO`e3b@Jt)!hYoLez=o= z{e=HcXUfL!Gj#64KCP<0xl%8FW)>0)T^}1L`(Sf^TV0D4(ARAkDF%TdI+H6Y0gHv3 zi_%Uz18dI@U|YS&N(3t!Irie4`ot9pLAMj!FRBTRg0hY!%+~iy<7R%$Jr8j9kw0Gc zc>X@!9C=7LI%j>NiuzfEwD0$A$vLhbdxB{pWblLR&rKJ#Om?|n@&Itshbnj2AZK>#=IccB!LNc7L%`! z@~MDQEuPO@!kJJxh@;AGKM)!JHmQ( zxn#-g#5fi(J1o9rk>BU~krO*AGJai3{DE}GVZ=x6BR0<#v4<$AmxH^JLc@B549;?J z7D!LFb0~NRfH2>3%uyv~$AQx(!gt`?I6s`Af?|&1vANI!8`isEY;+!iw3fEM$y3Ed zZl8eZ%3Bg5*k5|b*qwoH1vz2o$bFH$?ShlT0zZ!z7kmuh{X2|`$La0g)CLr6=y)b3 z^Edc?j?G}{#y1;QbJL4k_aD5Ht80(Bt-L_ju|5%teGhc?zBY}|7us??&`GqCRA=Ax z{K+{@v-4sfSQF&pOxD&DSqbfN!d^Vd?vc8CKdfNw6K>tG%{BN*MFi$>u)ymEru81B84L+OHgRL>K*@`e(pG1txB!ti=C@U?(6?Ou$1OZG9(G4cxgXe0 z%6Gw1;v)93^rd}i#D%0aX*kE&trLo~BVJj3fLpi?9%}8fZ{0p9CYp^Y5b6`^&DQ!C z`VZE?*Kd1E%T#OZXJD_H0+x#d`s$wE5^e;Da9LrjY%5ak2l6XVQL{Am>jg5&`48ZL zRQP`CItR)w=@VoJVJ_cI#HbUYj1A**r2f(CNnSNO03ZU{I^Wk4x{+x;$%y!5^J>K@8n{IMa1uhFprYM{0cy{`)b=WqhZAE zH`WcY9Cl;f{X0iD^TKJ<7F4b+?O>{1(S@$4yN>}OSAI|9}XO=7nHdJLFbdmDw8D8GF8(dh6vux6|N z^y@HZnKQxvj3XRH$~mmtMWUeLjez0FeNgplk=HQh9gKs1s{A_?nM~my5wqBV*6?jr zMj6-cm6K7RbVb#~2VIl_QKTlGq2Ql|xBAz{aPKE+PpwRaQK`ZWz@gYR^b zd`Ct3mjiFf>850MzdgG0@rnYLB@92w0J3KHOGb7ES^M40Tr=dazJbe7GV z=J@92n}UFGF+Un+VlsUjP_7XeLpLA4qqYYV?lpxdl?LZCjDpU8`V2{0 zx*ad)i^->9gAx(@8e}da`uQ(+1Sap zrLvDQxX1egS+avsP<8_sC^^jfR)p-K^?q41?8z_Bi>zLR;sE3C_fsC}xj<_EA!Tot z#r~19rZMzHr}-rHr&Uy z=Ulxf!FxqagjjIAUOD~{bNp6#i!#OQtF%RTbBEdp@_CQnrpZd_t=AvJm-{dnp*761 zb%iY{(G!ceCM{D*M`mBIzoa44i;Al&Nr1rBDAB@bZJ2tF7lTcA;0ssvdyLe4wLHs6 zMU_n~#1->l1|F?qLAay12KC*?amw*NunwdX$Xm>SZ>MnCvgs zam|wL^3pVgX}6qp$L_)p>KMjBguc3yI3=Zh@u1G}G05?=%)l=dR11;QDjx+mFA)emzs7FROG#pAT{8Rb7>n(!vN^ zD_tLrel%JyE)r(HQbEMEBj{xhYUvIyd1iFn1iGN|IR~Kjjc_44YH2;fXKUJV4eOmT ztWjzG>9VW4x4TM_tcN|wFy-DOfI!q)#a!RKjRs1n89l) zEnf=ENDGv{;>CFMm4H5$S<|eS=z1?vO8NZK6lQBZN&wRECix1yDMEWL$sR*4e!vkZ z!!bGL-}Ri^YLQ;=0V~QweA=BA5z}BLi}Z1(bv)fcyhhCk zX`T+VOKl}{a?|UN!oFR%ee^SOok@=@R`OC>%gZS~Z~ItET+k76#|NuD>0=}w>M0N> z*%4)VexH+VBMss__bm!#$(DSUSQFi)+t&X9>&QK?Nk^ddoCy9%=i3uPF`DzqsDT7)!&!@SL*woHaxOazdZx_exB4vlSK zg(Cr(1P9hfO~@M9e2_8IP@DO&)rr<*Q{QYG$``fO{_=8i0%QK`cp2p#@`S+Gas2n~ z6Pl(@1glzd@7kkhlxhUX=3Tz+CkOyxXyk%k7yJ5Bv*M5|6*fOb`ed$kobrmaclNQDMD~CMUEWfCU zb6ctI36lec79vHjELDIGaC^*}uHhEix-*>2QCRXkV8M!BRo#|YK<+)&!Hf3#g@~M_ zLk5>sNTdq_BrzYMP9BD{wL&XUN?XX6<=;-Lv#HXEDG)+slzs?6syb^eEV(IHuK!eh zXP-{+SXj%se=YrXRf~#sw=PN9B#~QsnYl+2{d&h0xTZd>1*)C;1cepW z28$JQjm+sT9fr;!zpkLFdZHQb}N5= z#O$G6Kp$}@eOwe=H7?2<;tBdX_D?0J4*|YI;mnrqf_Gikz zONl0B5CDEQNJ$?yrsTu?Vbj_yZ+cbos6PrA!w;_-=I0^nLS)L9RC+8t$K?iNJF77&Vwmk~Ar(3{xqIW~06rtr-X`$+9?&cNQW zWynE$e_h0w{S28a)j192$1B4>-5@{0aI5s(2N5Pe^!fFP(HHNFk3z*{*SB7*T&Mgx zGw>zgaK#bitTRw|sCt5tdDCcrujwnQ zNCX+-V*I9#CYJ(X3rM#l z8XETzIPNv4`6tq|_x2lLEQ60qZ1-&UGf@8hh~-z^;e$^)7ytFUDrk@sKG~gp<;oqy z(H1&_>x#l87NKHtOO9KFN01wJ%#LXi6j^{N0-#|iQI?x zSIwei)}Jg$#N?@)ePZqYDfvEG)l--JAZ#{%-)~KW}gm@-G&%@6!V} zi=0k?del24ZOKqvBnntJ9xef=zMLfr__~T~;{8{ix=J0Wn&2WQoj#FHex#Ox01c+B zcb8I*#+NGU>EvWGO2pw~-_D!kZ4$o#&pv2l%HsV!>QMU=KBMg&I$j~DcL-d&>`qC34iSsWBUvk2E_rl>T{`6m!OrkrRRa~>B>&HiVpo-( z1r)Mp!f_2XJuKPC8EQ{8C9On2<4{0IIeq%qOrY$vcuz7H28T6sY=c4s*^3HFDU;NI z>$lQ|uH+o!E+gZ$D0;?g?ZC%ovSfE1`s?@_Y^wSgO1!0iod7Z$g%rov+-q;Weurlr zb8=ks`JskD!)~ZM-;sMjOFIH`3`k5i)i_%`295m7(*?O6w16)7mx2R)dPoz-CxWWY ztN{O)GhhxH1J0iHFT4;XY;)&uTof7z1s|qGF=gC~J&=IAk%M>pEJ7)bPfzSWo_p6m zzSd1jHS}Dg34fjBR5Xx`=#U-{DH$pM>=-Me`=hLLSi!}9UV@CyKHqU80n_=ru<1ciA`lzHB6CaN?R^Wqgg@zKXkyxCfgy5tYPv&ELkRIb5U>~8 z6p)eVR_;#Bma@RoO|1J-n{>{ZZrQm2;t{-JN*FVmmtWHQ*!y?vSY`dbY!6A_qwU_4 zl2BiAXSDt$W#muM`GG+9g}%7a;j@81U=}Z(D>q7@%!_#^n5lrHa^Me=B*-Q;;UvuB z4O2d`G>4%Fb;i8M6L-~YP4rmo)%L7hSIiea4mMvi-dt^p|DbK)=;OMgby(T5zqN3B ztbQCf-{2s%01d-&N`tfNb{BaUhH>U*qm%!~0QI z-hD^Az(%YW(8yocJW>VBn2p4mfn&|%NZeCE`VLf$%ZFXaR~-7WGl|%O#Y7>(4XdmR z$2?a+x*@k5L*2JmN_A;H2Z`^^IWcdC$eJGWmbh}w*I-l#&3f(y6;Mk;?ZO-EduRt< zK!n5OCD(uD2Oq#dZ23zEj<22E<}n?O%@s862vD};RIo0;=sMQ@6nP-{1FQejLOEWn z=m_c;QU|T#To%cZndQ}zU_o17dv(N9jC_=18a_XGszJxFH1Kn2i~}HA7?Q_M=?T&o znAj%?6klI2iN9{Ve{Q_X3!ZDbVYZJ=Y`~v;?1ybsd51R$?*maWLB5aCcr+`+`|zv5 z(I`l#_|;K+_2#my z#{sYjKsl-}4v^&v2&#KFK7|M2656!lsvM=0o|BQ+{EpxI#s?X%?6fR^a}hT6r6{@v zaENp-$?bhYue?mkYS61;zb#7ch}no;^{Vk~Y8sWZJ4x^STGtBvt@*67g)?+S>t5awJbg@jsKn%`yGXFD>6&pv5H3c}aA z2jf%=_A+McV+;0{R^RtdIg>n0*2Tvhvl^|M&2e0UhI&*KpF3kr_b?cRAM72Cs(^RM zg+su8O!g-^l;Cgd^BKoFoufVzZ0J;8Zu@BY>0&98YcKHA&-vB%jP7C^EUp+<4!Lk^ zw5wDJeJqasPwCu%zc{NccLynm(VjwY+$bTsM5BDMB`~8m3&Zgejd?`3YT0}DG7jg| z^;WBHIDTI>ivb>Xg(_fXxD5ZZ5x9T1?)v(t`}}Mvcl7`}Vcg$AAz!9*s`yxw0QhQz zzkD@7VjG4B=D|5W0=IHMe(hgwWi7~Y?Ij3rZyziGZe@qScaYe~2Sl&(Loq~{ zH>BgI^xF{>OKPME{>&Z}(rM;*r&;MQuT2EFV|I$E^8mRJ@EHJvUTW(n8m|e*pl(F~ zz&Z>NK^C!))-n!PsNUX>w>kRzJU|8oPk|0?F-#_4DFDJ6aDnf|wQ0mq5`LtA!run1 zs&0Um+J|wwJIX7=fF-!7(F-_oQ-H20tPAL(xFQl57AtN_PZF#4ZQDdCf)+t5o>0g& zYY;51x74kr$#<0zH=n|vrk%oYk+@<2Ozu?gHl2XMT^vt%XZSn75>9GXM`iO0>Nkm#%+y{w;?)*@yQ_<)K4_8{(mel{oG{WAgF|9$X8)?adJs;zW3NEa0NhWaVVCD#c4?VE~b z%`Xc5c%cLiqcnW|S_JDr!!Q|7W|58?Tcxjnr~)sLMvyN5zi0~dnBxk|!KsJODE%(@ zqgn|tS5JN1)>?)=J#-~H4911~_?Chl%gn9)wceLB8Q^!}$0Kxw& z=K(ZOC%j>H?@w|hwA6H$;@bEZx{CoDp$H)*YwH)5wpC#lM>yT=uytdZp1I;jVOftW z*K68mA7o=d$M!mhmeaSr>e=kp?$f>4cBjX$irR% z_8=$BotMEEjW6&~ow$brM`8UBB%a8#Pl))sS?qIc$D9;8!cxL_!kbcG=B}ni7=A+j zQ&HyFWV6$hDn4o&B<%4_HYU$Er>HdGsF}b^cL|--!4-0XOrSJ1l34S=%OVcH)!Us+ zvp>KSrTgXeW}4px6(K#C+X@-2+N>x}%Ry|q4w)@wL0(#Mks*%&mG0WuXCcb&=z2b{ zpsR-Ev==L4CEn0T${3H06<2-vS(ITc9kSuyxB)Q@iv{~r-T(f_{r17w0y{sqd1ctR zYp%m+q8*TQOXsFd-NOFi_gyCq%hktGd3J5d>EdrM1OB06%HLjunjD)&3v>ON_i427 z5){t-)?O4}J)kgJzv0z_3VPcbRV1{rsS=BAiZR_dnJ+rY#-o=^+e4`2`WH|y_p;U~ z1zeFHf4f4HJI+yR=T{la<0)=jV5|cKY?DR_t0{1YgCB>rH-&H&5be=8x=>ju4u8nt zlVQ&fQkb%pR2`MJgN^^;M=$||rDz36P{m=QoH-t8-{`7Ki;_dFnYF~A3 z8A-jw)Jk^w;%hud8&hPf%j17o9U@2uB^QQ(&5&40+H@QC#{otN}BJfy}?;)077=*{<>9N45fg zGQAtvQC`SeH3EdYshdtCV!FU8m*459KQG=I!W{z)srjIp&F>2_tNaV7CS1Qp~W zM)hideyFQb;LvjjA^{5K65)UV2eeibzG~n}q*|CW?PqDC@KyY{7`Iz8{FGIO&nIJd zROo+nFn=#&KBmvkuSgcK)Yr-uhY8tbLkv+~G_;;vZ}4au#CNTJVD+~{@EroaHt3x! z(BXtLAkxh@V*GLNAmc*-yTLC=gxZe#EI3h>(UPE2mp|izG8Aq=&Kt zM>CzJ%fBS!^OV1YFFT0$iO~iEZlxPEE*hBQKM!}?i;>YQHBUdyteApL_jwCEI>3KI)G{a@}KbbPS05h}eRScIcs)Scqns0iLcB_!ux?zYLg&cx%7^E`)4A3QLsB06yyW zej*gB0`3cNC>WYjWTm}V2l4S^->80FuWhQ$ry4yqv%p9N2BY+sNFLJ!p(4AZzh=$? z(Ue@_IF_Tj3CbwkmFAX``(FjDNL4?B2;HD)pV2 z;h{fxG%;SC%jPd!bGUQ-iq-9Jo>xuGpet2ejg1lrm5%W;*N8bIpOX)b;O4k}v2G89TN zRq(!MuhXd^D_MbRV-kw%3%;&6Z3z$DqK+MXCzE~7pZMnz_n_#n_Wu*}w+YHv*ysN9 zH)?MHishrZ9V>GcibSMZ&-`!Qkrq(j1Zaa|3{Tz$?$5rj3R0kj38;Re|36`l^84qA zP^#w#pjyG!W@wV4LMj)T4xJ&Hr3GjkJx%|(82KL9=tm4Wx<rrbiO$bYa11tF~;LPL0R0o)ouAw6J0>-i=ON*VI6kjc4+YLB&M)$Tc_8P_-`?4PHrNU%iGhVgC&c4c_MbcKD5tI+jS z8Y6Ai;CctBc|c#AwMYt?pwIzQ|KDMiN+I&A4`mto)!Kh>n(jFd*#*T+fq5ej1_52t z^VAe$A%f_U!T-!;JTEd_d{>_oF2?*Sp(Y!6`yc`2413AB^asn-&zzOPD3N5 z=1yq4uK0U(hFaF0|0js3bFAJQvUBj{3$%Nv1da6} z)Xu{?PzuNTEzmVV`Hzv`=c4hdDg;XL@oQGJ(9n+57=8_tgF-gtQZLSKo(dMT8@hMA z^crx?np?MokQ&zAGCOKs<1Wq01IfVYw)uwh<8q^xLQWggrr~ zrD~`Y$SC@HI2au{^z75w6O3dE3SiJZsmYIPhYD5#1wdQKISFTw{k$SvViF6b?{@@+ zaPXw00G0QhWMr*)=Y_TyHzEemScoT1h@Bg5#8N0Cc7xs2s-3QK;D&a|XH;As@!suyBvGiB0(vyS_Gbrm;{WmF zDs`|62YAI_mLp6`_w0sV90P8P!zPGX#TgYpKo6rnADf7|-1Y)BC_3{@s(p9qT0}oy z0h6`dP#&8LFdV`p(_e-D`^&FOVpV;ohK->;@U<^iswIQy(FFIN33VH6*_4ri*l>e| zf72L=vVp}Yfi{R`@3{_MsdR|zEpmO#OUF%4RzgcvjoJ^5{V<6tvs)?I*GHJi0H$(! z>zy$);)TvdeLSncb}1O5%L#{enfc9gvaSRW=UA7}yS*tcP1iRM<|4K-o{L~NTR0=u z|GpUm*^TC0ILUUuHzHsl5T~)EOIvcZn86(9ibhDWDw@^jn=TY0vJ9A4I|f)eRyDHJwnoh|+G^qcY3(*m5lm z*dSDK2BCqELJ`w*OX~JA^etvMKH1VaQ25Qr`tgrkqyOF|>U_hY=vntGh{$H_)+tt9 zR%3?W|N36Yb?R`al*r55CB^x5kcD?2s`N1P1r)QXK-k}*^eL#uyF!Fj(tReTI+y8S z%8D$I4c-xOI@jm8F3t^$OhW}2CK?&w>q6DLnsPyZdXTaMf6Jyo9^eTbLx({gEs%-( zZ~Y07JLc4W9YYQJkvXj=-9MFg$bVjVT;1~P_4Pvb+ic+NQ<6IdqdQ+GFHlp=JvaxzE&uK&TS4-VPwh@-LNhs`aeRF6jDShN&kU~w)%Zz=D zE84F103a}H&=k7`Q@cw7&{7e+@OWd-mc8yV zb?^&tMqgF9H%Ruf5U#cVdwJBi>DiC#Q2bO6uoI=keTCx*bJW9n6;WXN4j6bCiG z`Op+Qmc7-TD&G{*lWw`gqCB_T9v#9Wh1Ec;YDI~&?|^JcTfg`|fH<13X8U2kr=c?= zL{>`ne4wz0@9cq zNTdD>HL?FzfN3M@iBxuTd-x&)Wm4Shdy7V_pg2YsX({k{$*6eCdQ;uWR|{^^|D#9} zi*PnwxJQb1qugn6sin$l;bFJtl! z!z(~enLtzl=ka?BLN3&~1XmZd1lBa9+;+=5!l^S+46-0Q2nPb&*UG{Q$YCJH0V3fj z6d-Y_mg91r%m)alj(WSaXNH)U8<$96Y4)t0QHHVs32KV4oKPiZagS|SGWR;(EUEzpN`WbkHSL)go7I2GjvY?HXzI+5zb_<^db~l zJ=KwkJjv~TzlTc$X6-TGyY-+if2(d?V}hp2xwXmWT+yT7t7d*bu5x7T*)1*}(AWds zDgu!T3azwKZMzsITs)UPS`T6l+|ZXE8K5r86!VkZ^aB*K4oi}|S z!jxDJ2xR!`bNOYn?@T;9YMb~Nj2#NFEL0#o`e~u40vjuMrVRY6%QZ3rH2!9Lc9#xc zOqhe~VAkWK!^eYhCWkr%!Sp2uOB*G&K^8o%?-f{K=0Y_Jl8cI1}tF)1g^0&dIFQN=8PM z58%=7whTYs3E0Jv9BnS;aLsOl9A<$Un3|cRQNQ&<>D>HKeQpW;kCEypX5_F9&bTnU4r8i> z8iE*j_Na4P(Dppj8yzvP>J#aWq9_Z3ojpa;2jKQ7Z>~SLfe3L%R5@^n16;-ha9Q8E zbfvcbiy!JP@Is45GOS;~?COyipz-aG1Jc;g+NIvCQSZ6%C83&wS(||9mwuFB@hFUi zOq_1vuDj54t_VS2SjFr9iVnElQoaK?;kfxM!dDv}5ET;s1uD)H(ArlJu$Nm7e5jQX z43sj|9<(LL6nHqX5x&d(F^oD^l{5#+-Y~U~)ua6ht{B$0QO&st`?92NpDs6FpFe*K zUU@=?)<*iZ81|=OK%CwsW+wZAZZfx`nKcyJW}_F#q{ch3(?l1}IcjZ8>`zJy0ceyA#xIYj~cO zREZ+3ThWg3FO4Dogs|@3)ekV);PM)LujMtYb_7XTqo0F0fkf% zATB*A7PS9KYz*jx1VWC60_Np}$8hoysF`*v0KNzq)0g^dT}2>sR6hHeR-RE{Nui7x&s`j*#%B7!TlssD}4d z1OIsWgx6HH7o$&f>qSVx5P^`PRKLY5>R?~c{~>b`e#xPNgcu%&1vPenwMbSK7hbuk z-=-X^Hx2wxHk#nK(5O-8I6L2*g*;kkPu}DO0iCVqBBZW}UZizIW>!l{qyxJ#pF3=zTKH+u%r1xzbT3_uT zgGMmJXLuc&IvNPHmjXL`LWtmF?0?n7YjRNQ2ZEwhFHfJ&MgIB|o%aEn8%UKaTgFY1 zN(`>o;4c}UBTj4y?Yq*!)>|FsZ$SFfzsK|8R4`PAjE!4p9p zlV{|slkJnW6qml(AHxF(TQEhyl)kne@t?bI)1MotZsDoBC-R~es^#@6ut7&2+z3R= zX0^R)gGjI_u9xkTf$yFFq4oZ@h@M)Vv5-OPm)5P${1^^u<;4uvw|dk5a(tM>f|tbN z6Y}*7grc;LwHuR}R68R>KY(cqCAw#2+3V0{E$ItdsOGT<&1Xr&EPwglgLT`o8z$x; ztGnG>W$mTs?b&sa?js;tg?m4QmH1oGu%shT{}*>>4|^lXATvQLy*>@$=K|JV04}_c zjA{c#&{1TNGEi62zdBPFP@_{JL$1l(CS#)nb2z8xch5n?-fUD_a1XtoAoQ}}{8aS; zM<8szr=B=?+;No*Dr*OJo$-=Ro`G2GD`9XZ0wCe(IBe{-cL;KzwVPk7)XYVi2a>;( z4-Fbr$fXGTO#*3`5OP<4d0TDF2*~cW^6wN4C;$z)szOBPJ;+@nAm{cML_wRMvv?uJ zZ_@=jW0^%pI$m4?mXsCF_-mFrbUcgCI*QoA8adoHoVfyWEb+^~6tLZx`2hQKUU0K$ zpnj6Y{v}$U+flWFr${Nh(D>M(jZ5a=R@K;vWyOE@y$b6#X2 zC|<>`&M3-W6|q%=im!4{^C;#OBQN3DUXFjT$wnen8#-%Kwhaug1cE#duNm#{US8y zL?y99a$mxD^j>}|M(%9?@|9zaGRsm^g@C&2@9V>v-R{Z%QLoLsx+Is%I3r|!GL$B* zJd(e)2~|y#ZFarPHFtSC@BkV3Cx_eeZJDI_H*egIhaPObxnb2zj^*1seHhb~zDz5| z3)Zgayuf=j8t4usJG!MgtGxhO?)oPQvA)=`5E`~@g{~hQG?k+t;KxPUAJRpD5dJc}4lDLD{fPcG)*yvPJBN ze^vruI&??dXkTnZ^Y<4gsOp`Mg*gYa2d_EU-nl>JdmkiErud2N+De+P!p(u8R8eC# zdPQyb73K%xN;LP`QZKwXl~MXSpSMqeQa|zRh4n+^d?{+%tI5Y}Y7Ai+w?8={*}CcA zX0-|1@2J{I)@o{M`MwOqE1k1~Xy!wawK9R##kq>FrsY`A*oVK4) zJRax%Io*YlgKvzN_xs0x=6saXl`NfPK0*Ms9tQ@pwHBThTlYSw1{sO1kvs$0By-F~b@ zUC^kk=gW@lmN16+wXeT_Fj35IfUvVP$QnpYyQTd-)u7lm!@1XKI6t{uA~6}%70!nF z;o}W`nz>YCm&rq2BlmCIwRls3I4b#~*^rQxk&!Uxy?#$}Hv*`6L;1#b4Q~QOy#JWG z2}s8Y8Ea-|XS;rX%zMwOi^+{CB3UuE-uq8|aRNpXOvv%^v7=1KA&L2SS6ft-Y?Oej zwNZl$9rEj0Eavzy*PzO~@DKo`=e4iSihVEqB}`SnzcH4D-<-(($Sj>1b2wUTm+QN? zYJ=9yRJ+cXDSJY6^H}a-e~2G%i{a0u=TwTf`{_2{Lccd+h2neRb^1|#7>!oLXX$q- zXE@5vP*9&^452AFA?|4Gb<3S7cGy`^kMIKacS}W_FdT3{^r-Po;WwUxQK(f0&1{U7 zTKVYzUI7JccYQ{qE&SJ`6i)RRrpll8*qW>s*yoT7rD7P`@6q86B7qc(xkwh@$8uDA z;&wvo^FZ@Gq8781#iW1jG9z> zst@KF>XJZ;$HnUuG>qx@OSBsr&af$^K*)Cp@e8~l&6`4oJjmnE_24CHiaCsGx6`}E zMn--;)o5v2@}p8Lro`@cd+&)Kw~LVxqh&c|q{E=V6*$15a;r zJ@5w`DE} zJUKRyE5f|w4MH65+B{6TwY#91p^`mQkIcQur4ny=$5z0g=&_mg{pN3*JSpCFMx|bL zjp}$JOz-g3-$4&H**IwDZ{6e!Pr|W+rV;C>6 zgCm9qICx)5QdgX9IVG!kSCA>lW%UZ9n5}kDrQj(#F9U(`r{RoZMPxh+6XH^|Z>H}i zy*#6u9sZV<;+mA~u;rpgK5`V7e5$96Pvj2ARohCL;YOUmaCHdhsq3i%ulGFGv4yVR zV+9Q}(R~ksp3hrPR{8kGqF*$=^A&#v?;s9mX|N9dv{ZF?X7#l3XZTv_DWf6(B}seP zd5YfwAi$;Q>;;u@#{X> z*GQQg$}`Mle(74s$W10Gxf{oHagx35%-x96Ua)?g6vLfq4mn0Hv8-c$BjtYMv#uZ826=hE`les|dPXJ$)G`S<_1l6$A@v^fp=(;DyX zuahWI8%@15m*s+zrF7jIOOK54Q`pb5+$0XdH2kaL(MRN|Q|CIA74K`er}@7+6%;;u zY2~glUxMki3uO0RoiftPm-bxeby(ILz;BF!*kKex2l1zt(xA6F$EvxVY#sfA1?HgpSC~#-F{tqy8!RDS27bAY)_s zvu{c{K0Aw})uV>8OhKKdEf)VMB0N?Ho^@#cLyzg$Ie+gVJ6mkjQ?i#l3zDBj^e?Jy zJ`9))IC7925Sld6CFq^T18DKhN1jtg?kXaMo^bV|t~Xyuj1M3qyGqUq7EY*n)=JN` zhtSD>=eZHDHZ930ZvP#9>&1!&xvT54uw<3bPG-vEi;WcRGD{Y?RbVY1AU&Ip8bKUM z+%&nH0FD;nnljHF9!ie0*e7zkabIp8dOx1Lal`IS4LAI>{rz)k7ua3rbAn%XeaYrl zi%+f2>^U-c`Q+fm?(d8*uFKCJyx3d3S8%Fi-?*+;9p@h~#a+Sl&x!fFT5cbjAw}sF z+W&{Pw|=W~i@JsZDd|mj3rIIA-5?>|ozf-UpmaAPAtjQV2I)ppk#^IebhqHQw&!`? z?|T1$_os6%F896fwf0(TjycAdbCI~Nt2!Y8xcoLspg=@N7#)SiRdQeOt*6Q*B1*qK zCY|g06ugfv8@s<$T3;1R-H{Tn=U}cTUp=TpfVbtta}T*H(=L-&WoGFa(YP-9g!UnP z!Bj8ad^>#MRoP*j#|r{xtr*cr*to1wK$IfH+NkqBolUnP=czS9Jg5d9>Y!tHlzB9%(+x)+m+CCK|N zH=+4?D7t9BL#<)6GsdI0Dxl`KyqAy)!&W-tzNct)sGUx&CEp(v2zzPT}xHA(=C+qb*88?6;PSz81sB zcoviUXbqjS3V~a3`dtMU0$vgaFXO9FM}+t4AF{FgpJX4A$MmAihq$tT!*g2qi>YXt zLmF>u2X%mm;~!f#L_;%CM#A)_S&-)mxAHAv24s331ZD&;LNSwBxKxXR>}F>%eb}iX!E!WIUp!mTIy|v`|^cje@l1W-~|} zJx$Q{$M-yz5v?-R^!i%^v6}9qvLXgBE9$8s;jgrjP;n;ArmH*S7FS8z!}>{oj=;-C zwL3jRhuO-{#^?}USnNl6YF`=V%3fn)%qF}A+aFU?o8O!)S0!4)Fnw%R>6mh^6?wz` zmtwP3tniB!Q<=z(EL$(bT<_HTQ05W5)?Zr{umjyGuluuhMFusuub_l@9-d%T)1@986zO?VV#PI;pqj8H|Y14h6N3CpAY)=fT2cIAd`0ww%0twBy71vTk%cAFZvhca@cPM%+lr%gfoI5 z`@Z*HoMfALc{!wxh?L1zkgK2ua8#pNU2MPLlPMm>&4d!KFNgkQm4UIJ)JughO2kl! zD}}LIxKRyEAVOfBfp@5S&fzmy3V#5@M|hGFiAytEpi|kQ9D}f_pfMZ*{9%0eM=zFAYf?~=M1pFF*^247}2mn zdo-N6xFTaAkA$i(keH-eXGcBi-?=DT;W1NTCf9c#3TcqE(?2(&PfqRotD|Y_+@P#KyeVKcePND3#;241UCih~ zB_0$YVWHBC_V|W?J`RWM2{JG9h(ib!oAb!dP3pZL?=YPRjw?j|Yc#T1Dj|Xy12GRP z}u%fB){Wbc!RmmAN;__r;sHElR;Bcq{ za%L$)43>#C~;3j1iyaibjJx%krIT( zBb(J2wQ5`(E*ZvTaX8m8-dgU^KSV_*4#)-oI`D`#@%&YJSXDFo4uL_C6a+|oo4?GlVQXIJaF zr8cb1In@oDD|4>*lOvQb`)pvJQGJbxYg9|gaziY55{5nBdm$B))ymV4Q?Fj>ffa=* z*T1{qK!L`#G4XB^5n_}9tqZmG5Cp#BZHk)iz;50cqjN`r3^1ChxXEBkWHXU*Nj(Nya61aqs z>F7{d8zD4EYKgo98r$g}frR21o_>$+jj+{FiV)n8i}*mVfcZlk26sM$*WFzn7@jx+ zj;D?k1}?2$e#us)brQ!5FFTI?+E4Im-*a!4!{^~B`0r2ny-Mvaoi~FN0LaeOn0AHe z`k0u%BMN)rAbJDdt9^jju(Gfn@e!&|@Waig=4lNjZ5T7v&Y1)~8f)YrTNvdRUe6hA zU6eYmE`cCi;eBX$4m-)OT*k;&1m$cNex#XOYbqMz+nJw(o+DRDZa<4tRToQuy<;1+ zSSt6?2g#3)>>_CH!|C$?GG<>)js~@XGeA5F`A|Q(5^3Ll@4$Mh43w&6pv-QIo{jM> z1B7F8g4L-ia?)R*thzgs!-~N+FZ*fuqh#%8`JU|#y2|R)Fla#lCY@TfRL>a*CcX~U zt);ZfD&R5DxHl(@Ur<2H*5n5lO0dBe9n_TN!C@?SI*qJcLSX9O(PYs zSN@gSJvJIP`?W*WN!XjmsjtL#!jML)*o57wGYqdDK>GtN?Ve?DzZv~#aU8VW1DCo0 zw47za+~Xs2LCw;KaWdSb8UvXE8!04-LgC{RfozaP?G%?NU2%msWh9b5PJ7huXs;2 zS$czYjzy}`0J>Fj0II==q@IoQ|In)_6&d=XtJ1Q$0LTIkebxkXw&y$W%J>+5KXq{= z`WB%0<>vZGiWywU=V}BuCcvDw1}R)9l@ANzDw`JV4>wi=&VPURqL<%fA9n+Say{>U zsVB#Le(*!9P#+eb7E4)*U?snYDWPK){+3ai-B)J$!!yu#DHFx1?80guYK|zVaR~0m z)>BvZSr2`YZS5o!O<6`sUh`v%`(?&4ZSo2_uh0;shs+a)yAIXVzNSZ|ScK%E_V?}| zp5RKn3DCo;{at*t1{`xlpFIF^DwfP;3Enwgnah&-Dod`XG(|;diN9$~-zIy2{8=#! z8m98#cfmBmLhl(vX@J?JN2*?2{&XJeQkImIa1Dd)?M1R=ipbC@vDXTirM8aezdrCO zFxWk}P&5R~-Et|6t7SEe^CpW)?+UHt+39W9qV#u?aX&N7?I@WnY{ocjXqlx7Z8_?`S=bOZ^Bs zft3{og7!xmMpI48NBqwn+4Ky2*C1u3W_#Dr;~H`9kT<>kLeA^vfsb&-GI-@6JL+}c zWvwU&Aq*ox0p7XT`N7AHYdmz`;oYVM&`=J#u3J2j&GW2?>?>#4s2_g<@U8ElR6Fv-5CwGyU27JG! zdA3pWgJ+s&!76p({9$P5`lW5sTyq`S;bXT0woP|3%6pulcJP!zQ+cO9xOLgUQs?v{ zQ|L5VT)9Xtp7(*AR+@}Jg+F;*7<7n3`wG`WHriIu=Im5gTjdj}2Tc>H851kSVK%(| zCD<4ar_m$kJezntAGwY@;#nC7P5-)32mf)qGnAecd$_jT@E6rC{@N$YJYr7ms!1Ia zjSxslIL8HkkgOwPgPG_R=|R;_w4xaSQq%7%S_~_CuZJut{jUz4Y(BWj<-ih!tXqTt zbRb*FZ7+;sZ2f!lD?h$w{-?;PdnKUZ-ivLp&~VDHmXgPSPC{n1r)Y_X*R$l>hyAnA%oIeFCb~wJ`FK0uAf;@J zn5Sd9rh(dg;Ve5q&$1rBIZcw>;>LCPm=fu?`7Ke{3_Rlb2HG)SClPzrwK;SHjkfZ- zNq2T}nJvBVa=oVU7iK6UN}Dlk_ZliNymti^c;>({1Nlw(ya7W{1@c5V5kru;Gf&i? zTpTY{mWpi^1hdxxLa<#I$^6+kiBIUxfZe^{!LCl>=Xx1dWd5_^*bNWNN7&w%+CkG~(8??GqZumAg2&h_=C zn3P<36bhlaaGVz`68<|SnSzO+qmOG9bG&qcCUUbI$3(DV3-Q-Y4Gvh{W-l&|o>B&^ zSo`_dcm+D?XL%g`7q$C!qo~$jqiN`tK7jrW?%8ccBj$%D(Y(`g$fsrS-Q|h&aiwq8 zyh>nz;oDR9c?jovB&B=QbFjA$$T7sNI!_)wXjKj+1Y_S8>mbXx!p%cXEm423y8bq@ zSH${Mxm4@zXye3Qh{?y1n%jW(Dlu|5h6SYJP^Fo93L>&a*N^V=sNWh#VDw4yD279k zRRkpW)J-@JRZpoyRg?QLP$-lxp&t_$bp}wL$GX&sJw-x#5L6XB?p<`KSvn2nl4R7g zo%}KeqWM@a8HsWUx}F2YIz;Vx+)$}zKHhMg=e&>Q_4l*!F}}$Ucco>N+Ydxf!#QMV z6EVCnb4+i`&;9lMAj2pKx%81#wWz`*E)9&z*FPay0Kc%OD!DVzrj(uXjq|5+s1ohV z6)gH%mr;5x3$xJ=u|~=UR4kTOqL+o`617@v{x~w{S+@MtT&TIpJzuIcYhHoU_CvNt z>|x={ZZvZqcYe^V-GG<&Mf!VAp$vx%1hbCPAO)iRH}gas=_BUfmtWg9$!|gVR&v~z zD3P^#oN=miHOc|-*is?NnFQgj(Tv0pN4UvzG1+^+f{k#4HS(QbF4a%c^uB~Z&vzC^ zTTr!{dP|BT_xakIYPC4#ujWux3QRAPq9~Hx_}7*ibV-aYpE176#7N|#mr>3>F0gSy zM`De{4pqneOn{i)_X2TxA6rsl3kn)7(*exD7`e1e@f>zHx{^0KS6vk*>b~jUG(x2F zM>kj7!An*GqJ~U}2>$b$5DBmx_C+yM?u4Q5A-@6E8u#T7r!PQfL6ViDtu{RrbnkEL zTwBzI{*`Bti)*kgH^Zd&)B5kzYRHR;5=QdNK@(}=ytD-vUS!L4P3qv;_LAVE)#&cq zU!tx))ar6tH!S#Uqm~<#f>(aSi{y4+ z*J!2`!k18|lF2%s`{zr#H++N5twV{Tgb+aGANORx9 zzsvE5tmU+9d#cIj({k{~Dk%w~eI2^03oU8dYKEE&`TVCy@$kRN(S6=xL5h)3$BWv_ z3>TQ6`Ua0TpX_^xWzNgM2d(7`bWjXA=BzWbwj0E&8C+DI49e-%qQT3j`}|J-H@9y0 z?od=iS`8G#q;Ez4{u0bkfYJ0)iW)q0_^dnUR4o9EjPpTwmY5ZMmlJyb#b{`3i{nfQ zhL=;fy*(Heix`p}d{iW+#ET2PEeY+pB6dq#lHd(b+Z~)fgQpgQH#fxz`=B0I-J^WmavGzG)xj8Et7Tb*G){{kpJmZHRI*Lkx|;olA#Mv&|JL}n7sC42kS zr`_Yg-v!54eFXmFG+>3c1hq=_Ow_56=(wOt0n=_~wpEJan4V?)Or4yaY>dPw29SHu zWB~;VX0Xfj!L>I+ka^GL3}iuqz}u2{@j4sW1I8MDs2Zl%mHMi{ujp%{HdMlN`8inj)jU5#pb3XRXN-%?+;s_R?w)sLERRebTt1R`8y+hJz4! zN^`-**F`a>EyJuojWKqi6mC?$kpj{S2WrSuEc41qt^6P{B7I1M&zB&wJ=HJ9bd#fJ zWja9y#`z6hGXaAaqn_y%|n&5!5^B=Er4wu^GGj0H!&jImu6b4$R zZnqtERw(cRteXWQ#L!!Ddg%x(9?*|AjO@94v9=E*fpZ5M)te-J$J&8Soy`+~b-6)^ zQ0U%xgezj^ceziNJ=>wLB$l=dGKe`C$be9vXhb2d0DrE_UOoVSn_l}osy&=xTko%y z07$yS0l!`Vc-c`sN_e(V4|1r*Gtla%(qerR+K0mj>=n6zf^$Y5*N;V$<*u90>G{1kfn*jE!(-)|T zbYFy+eWG2{&MlA;$AGBp%%Llo`u7M#C4sX^@h+oX_#sA#3cJXer_2;2``1)@cjyhr908|9=}TvfAFcFHE{S^ax)qHU|HUJ zXfd|*Nwu=4j|juI4f%-)`ZEL%%2AIzl68CVmjS;>EaPyccCQYxGU zc#OluOYOS}xl+&cXnv*dHb68E=mLF#coBA`E`qsaOdN2AzmJwk38Q-8lsV~TWL?RQ zW_r)ETf3}@{4!pmYU2YVbe9TkX5>}HUg{;7yw91IMqNgkD0jDeHO<7d&GhW9;i%Z| zE<3GYb6=wuK&$IUSZCKUY5(ZVn)o~Mft)x8ps4v(oY*i#WAFktAi=2p)%b7+>Jpqp z^=ge{G@$y+wcxHIGWBaR+K=qxK(W)Q^}D~A8~^rEdc3@%+8#g-9YdHBSBDk+f++ul zy<=d4amvuMcszOsmg;NZd*i*s%5;GR8k1O57V7bf-=Aw{3iz9-^h~7S?)?({>1YX0 zaYv6dlsK&Y&VQT3L9j#BP~hW24j)3TB50T~@@Qkj6~K=?a06^d|EMJUAe?Jye5h9r zx%#c)Lp%N!3lw*3XZ5MrOggATWj`}Bq{^^WLZvcKM_@<&o}0!JQxq{01b7M+W@iVs zVVPpMM`duyU5T@`KLGd!Gb-!}+*QmBW3`Z9AceATd?=2X;lml}1yfseU$xB_=L@1w zWe;(zHpkC5YrA0PCiPGd*&yLIs4$?@$9l?Q^OerUNa*AFfTxX@<~ZBuv`xKa^Q`5z zR9pgH@%=MwA=o!8YLRCI{CZtrABk14K-hODgpV4wWS-8$ThI1zkS+pm?3mcyzU5@a zy=7n}ixHrF>`2%eS+vqZQSg%n4QY51y(vmV14?KuO@d3`vN%qAipTC%jpRA?_S7M-nLy~;FWaJpD+f0KZaP_t;?0FCT0a4C?HUa}N5Ss$ z`Tu8@X5h2*2CTm|`Uyu%1C*R@8DLs~UaU^VH13CCazf{TT-ap1LTV#bLlnx((Cueg z>4Jwr6_cxkjA7)rQ!);6Fm>%7SJ27Y`ZDRxCk4POYka`aAU`P*tDJ8+_?Y(Tv0UF# z-ir7kjAg*w4^#(@16I|=r9SV27p8d!6CORmW8~=LdOgQJo#1N z5vx01>k0W3q6DINJBuC(?trgUddEui-mr1|g%RX6G13Lg{PEi%ViR?|6`ENawx>uj z-7Dz)ejifbI!rw)`~HW&qEqLLY*3)ubYh=AXxaDIf@hl6+q#gj@d8W(&=POl?o+PK zMqa8(%9M2C z)u8tYSQdXSG!N*A0S`vG=#YC8pkSFhOarUY(~uCow{f=K3e>3g8|b?I@N8C&dAYaT zH$yTBp?~^itA65{YR}-i55Gt8IJ+w=ywKE?(>@j#MJa~xP2rct7YOJ*UGRGRjzt-( z!1c!ZfRBf4onSg43EpcqHm5HaJ~Ga%7>^O{h6vX?D02;AMPYW)?gSyw{7;y%3c^;8v07 zKA4@Q#UO_+L)KQeFb5mL(?i&Vy6|D_W3@6b$&Qh;|?A@bT^U^@&K>rV%OC=1O09r6A~*S zx@s%)6d1SuM?3}%rC6S}}Z8P)gr^oJtSev|~?3POkwE?R!yZAcj`i2{< zE6(Jiy2v9D2*jrBU22U^A7M^0^K9?SDik;6m~m@LyofTPvDY9xI7VQRS;Xc8J7ZN2 z*Pi>QEDMZ2lL%YD@f^cEiw5He;Nu*Yb$WJUne7l&~f&H!5dV@R2h@d zY*p9XPpteN^4vUZ`|crIppRax4LxzTuzTEJR|Xs#WM1zf78V(bhD~Q0@WbFWa$SN) zq0-$iXRs0AFxs@@ChlMQg%qek=mrx78^JK=4{(+WIA*Wo)2Pg@N&>Xn>?!J*zt>8y zCWjo=pxkebE^C;f@wT=Fu$T_`T-{ceTuBr>LPm8l7zOf78g*W6yy10hR{(x zfF+66wB@2zW`RZ7r&NOF6kivySsVtVxokE_?od(eyJTajY$@S`T9{hZ+MkLj>x;kD zKm9md`$2XVqlj^^bvN>ceNh0i=PRxZ9iDZqOiU4l>eDDn@Ypmd_8A0tiw32IVapvh zxq#}#c-(LTQa$w{!v#2^cxdOKDr>&cFt8xa99cnAFs{cmtU$v&LKc`9+ zMg&wDP59{XxDHdatfP@h_Vt^67t|yH=pW{FxE-?_ag~lImv}iz~7d$RbtBCg$bF4h>>|lT5 z&V0DL_3DfbNmhz>^cnN#`-`~nIymZo9sF;RI4qX#0V%ABSRi=GYf2a^k1s^NR!swW zBmv=$bAvTJon+L7@FV+ZF<}f(jH`e<(W(6o%vO)x8zg%ky^onmFF7LC=`(=Ig;_6+ z4yoY8&{ob3nBp6W?H1;qIX@qAZac7cpk6u68Saz`m}6!qy2XahkLwUG%Y2u zFz?!Xvi@01mJn;xyP;<4l8oOc%A0r>OU6whMC5b9Ocp-*dfBj!s*0!c`{4<{kPJg4 zhi^chNAcOLO^F`;1IXe=7eo%l54;~F8LAonGQZ1viZm`=0KpJck}&bxc{0kVB+f`{ zd)6|WLKHe7Q;^}Ns)0$tgH!%4UuW)D(|@V7Kxh1u(sv|04i*=^)Z1o1{w3>nY(NJ|UK!!i_F?y%2 znFvS<+ho&kjV{w|XdIYKXbG(@pV?Y4wSQ_1(N^hUif^knIF-?d-cZrbu?qo%-s|Gg z&P(C20y)TEw8x?jXh;{V^D_80h1o8c%ldTzpQ)U_zaR>2XGrq;kjgLSIQ@);1MF$8 zKh%cfwyY>^i-N|c0VnVD0k}Pg50;`< zcZ5x|g*b%Pmva%Y*FTlB;5z4}fA=YFCk4xqOnioDbM2cAjs3%XmFJZ`cObKU*4Tf0 z2xsx~>UA>9Ht?J>5B|1*L`*1vy+O)RX%JbK(A@f!!4Y+O&5xsTU?GfnA})hvyNcQB zZ#IOosH6dX8i|lK9Id9;D@6XzaGcrm3y?NGnS=)dj{Q9lY-cmb zh(uvkaUMwY(RWY7YwP6pur~_lw&V~7vdcyats=j7$KL@voDf|0J&y7$uUsK6w_Y+x z35(z3yi378n5n6?x_+pnu_<2s6PCgrwwIux1vkTDs zpJN|);k7`%xi36A0th4}`H$=nwcWQ)JbhKzg8wAT${Tg4fM~<8VOX(hgx+bPRr{5q+wHe8^o}dI5B}T4FtV-q9n051auUl1#4mOcoAW z(~g%?gNw93b=6V#oN6!Y0;k|IxH|h!&x5YN55|{0gZAUZR*rw)p)kb2hPTuww@mcb z##0iVO6QeCu(3(GyFUP(%PxhOY5aJt?*Jcxd=nKa-I*k9g<7GR){LCWJ4|4rB4e4z z(4Ogo){@Hwm?f`Mg{i`C^)$YX50ZCOl6yy%M)D@vFKY-xw`r4#&FcG~+SB;A*SS8n zMZ;VVe(T-#8mAfXuiy01t;l|T>$TtL^Ba`ZzXnGroQTu-qy)HFdrZMc#o%=4i38#? z>Jwq?cxutbg4dSmN;%9bt?%U^EJ#Q#BOm$Ces-sSc|WCWz@E^E`pcqKL&<3QhS#$iaJ?+&zia&C^3{+>Vh|7?=2@|O8D2h`EJ=L}u81&pRw+TB zjs)EE#oDJILlA{5K*nb}#CP31w*UwM19ROZPNB9*}BDI+gCSc~-l_Ko7%J1Qs5omJ%0mxj@7d85u8IVWE;Y z=ztg5ad%*qEf51db~Uc}copC^2jGOX4yM=1a$}Rie;PzV2>hR-IF<`OjfAT)eVK*q zAkmdDgaCHE3dL~FtodT;^%nSMxF!)JL5~D>gN2+*0Xjbdcr6NqLSi_D|4s@#xrx~E zvFjj@b56C?$B*&1;|IW>UCYDv=S z^gLY5nTme>BsqcnSQO5(4Qi=r$KGwyFLcY>o)KfEYy;AHTb zo2wA5|Fh7hlNLOQW2N*~6v z^TGl2GLa@y*D6kY#Dh!*^Y1wvazHE^2yyUCMrZtLE~E`*a05Lu1vGBOAhvV~zH;&N|ZbDj0h>L?tjJG$Cs5 zNoa!-{y>EYt{<;fBsMq+79LRu`5N?%aLgbW9HKP5}?&^25u@Wh(vxc49 zy2#}~#!wM>bFN3ydiXCh6{6mhhBCN>N;^ORFoS%FB$2On4VZ~KKt z__)|C-glD0$AzWUIA#DJ7ZH!j3f6z)vMH#xo{fi(%Q%!6z7{2j-m|X$F3MmiIGuMN zZtO)zZZJ(ikacOK1Ug@abTyG2Es*!J3HQNKRa98cGf3KZ{HYk7A0w2p$4N`{fb|z( zXoWxpVukOYlOcGNwcO72HmSc}kIey9y-RT0Quh4qR;K9~oE`$15a$^Ppg-L~GfF`| z&@tydD32IrukFWA-U2`}e;bJPyh1S7a9>;@ za&m8mI=?t;F+c%%vJhFm0>eyJE%TM+y;Ixet5$l zU(k|c3=r3Lz*5CoKf+p(1)BIz#sR*}_%u!ZBVxY5oY4E8jSYywiR^P{BGO;@`;Ua}_XYgUU&AGp_dq`$QwCa5#K~`D0_7yV zO>nw9p9O+{Z&Uucl()nGN)YtLSG5pROezgb5&YAUa_3G7{BWBMqde+3@a>t{m3?)i zJCKj-%;?m*m62zvl-UK(g>!(btsT_uiZ279gfd&#g%!yT;`7g47o#9g=_Zr8lK7|8 zCfl0Wr9TQHAan~3< zjT@R@dM0xQ4jf0o1z5gH#D4!cc3x!lz}tezw?*MjbU&q5+NB+se5jAepCi{)rfGA6 zmN{FWW-c0IC+ogilG=7mpdl&VyTh(srxcklGK{_ST@pnvkEwx36TmOm`dbgITc{<) zHs9eZXMaFGH~nJE=xKR(lH;OqtO_8-mz-@1&vv*q*;+n6xXfxRT^DC+_?rLl*>@QN z-%bt6<82E2cF+pqnR`3%XjS-jeq>PC%ex+FX$I}8kS!sfCv{gl;BBTXHcAs;y!t72 zS`8XlL_3LqJ{N`MwO1*d-=@pWie!|EHWGl&Rq@Mk%OZOUEk*(#Nii%ZO}u6@5YyxM z?u27>8E76m#$F57%NT5xt0#~xApxL-`E+V?dJ+INM|d)S4Rgde@M1=Eh1>~^7x-!$ zhK61RRltdH*;aVVpkpjqc;9*Wq7ZY*H1>LYo$WsIv;zLD?ZG zF_rfzKZATiLvHi1f(}X>b*pO+xDcvh&%<}KJv|^pk-G2Mo@WrA!fxh|INX~FN1(PY zXPO3O=SMtcrg|rlLg}StNoljAX_M!>ZRSWHSm#d(jGU3K$Ub!4CfHz^RY^^BIIl5_tf<$ol522KxH)?kU%gemQsWa$ABZuhNC(623l>w zvn90Nw&q6@c&?5j48PRl*z#(rfGtnE?b*e&W_Um+n@?tOCxz1STA>LYQ_rU5UYoMF z`Yt>dEie+%d<$mwg0pd2BO?=({P^ICo+m*q5<4q=x|y#rK*to7p8H^M?U1|hvs>q_ zo-=5ACl*Hh5&2f`n<+(Oo9ueldHpRZ6NRcj4PQFFP82(=ljM7J6K`Z`KbsPx53Y+5>sYXni|OVvb2gg4jf##J7hGVvu_yBW4VM+`9X_)qe&;L zjH$}_BL3HFkq#EVz{Gho|!WDoM{^0Db zr@D$s_{}w$%6=JT9%)PZmivS2JxG=O;$j1@jtdTeHC0+xjI*vGF`&@wPHCeZ`15&B zf!w{!V8{66tGNvd#hzhSRx2sEcM-|8Z}P$bq5zMAr5wz!c1RtEhu-17$^y-1geFZt zS^QuJ+{@Zf?OY2?jg#hM%N7|pM=jNwyQ&8=p3v~0-a4eUr5)7_;1Ie72YMX0&s@-9 zhW!LhDDaQh^3Y$k;77-ba=uJC0;}CC^{2-RJl_d=E+FjDxU zHIC4=*sj43jqq2=UkgWz8Z;AcIyz%C*wsGCvr22t$}RG_UBk6vde6dea&M!35Su^vafgQBCQKV4Zhu z=tjasmgvX6)O4$k2BK!lsu%q;5dAcde|A$b#T@@xl@6;*^~y$ZUap*6V<3=cDLoC9 zs(ouT$~C*o-HOq4*W=pQnP2m%}e*J}JvzNl*a94~?Rb&RFbQm!@`vRar zbdY1N5s6}RXc~ElGk}xr$#v4|;u4DikY=4)Sq>|;L)Bhvg^T`B=JRFw%!v)e#MO6= zutqh8y;L)bSoS)FZX}vFHqz>2Q$eusv4_W0{XjW_G{D`%-ZUi3Pe@?Di)&#UVW=~J z$VMRbfIJ~#bRrgyudbO1z=#93A$&haW_V;FN1=GA_4yj&3$BE4vQ2F`#E}6*7ltaD z)R=VayYw0lk~pKKj-0kdW2zEQ5O^Bbgav}8fT#h}7AnD@p-or-vb&NJ@UI6ZC9b+C zrZojpm6)yg{>nu}zit2`nLdPN9Zk!~y5EA4XiSLhj^ni+Gs-yKZ?aTk zV0kQRKA%!TJFr}5P?ptw9dv)8JGs(XS-3E+1FiTf%Xe)j7g)8eX`=RhOMUV4Pyj>H zG}k9>=*^|VpF|*(aj(Sd9J)t>BYQhxjSx%GjN5{!|2xoSiQ^3Q@^jOtSPR^?EccUY zbq%EPEEVSZ$exIDteG=kKnet-reL(bMU8#QOs4 zkNs_umV>ZElJV9H`!>X;FYe89-R}?N{02t4H}&6Nh>e%w>N?j#N4+BwIULefqk#_L z$QnAn{u_ARq4P0YG9wJVEiL0p%e<`o7c4PhhSNOpMlxHYa6g0tO1^d45KHzq{{whT zsg~_ITYRLWg^sGv*4{1yc<*A|Q~SH_p1RsUB^YGz6(cX}cl)Xg zeO48Odu7PIkA_7Ogd2|GXZg&%S>f11K3 zc}I%aF#Ymy;mg5*&@r3T8cF-n`u4`X7Y2!Pj+?D49mMo=_hM{bn|(l~XtVLpI%QDRl^UH6`^27*8@fJ$Y%~ISmE$#^ zNt7Ln(;fFT7S)Ss={&t3IIrT`wuDi;K(ju(Ob*X?k^kU|ywi^XVyx5eZ+Tz{X@sHd z=!}n@Gk9~dBlS6NeBLJNReuCPb&-FjY1KVk-pUh^J_-3J4?4}*vpO$+5bopNsPiRP zrluxRgwwu=V6pcKbnahhDPaa6PW9Vz1C2S8EF%=1w8bHMr0koOI$_bK$`T7~ z8mz+}GyT@+f_1NR(yW8#CmPPo)C+$NPmMvDkK`@bFpaD+hc}7U zV1s9CUr6_S#r`2#)_G;;5%1>3w;o~!RYsfGU@FEay7YohZi%wJ6MgvCjahU|GAaik zKNS_>X!kj%{`x&T221rqzfwUEtjpnX02@~AMMVGuOQf;Kxw72p{58Yp~JAbeic?3^T|XY5yxU|*hluVVPa~Q9iyzR zaT)lPvP%qdhL&OkonyJMq^DFf=pvhwRLVt^?s~23EaQ*yn$yivCje2S!)mDKhWcB~#D@Hi7`mQ1+sQn2{rwKfa8#5Vc#1}A3`X4$V}YR2 zuSnb)#WJ}3vJdQn*!4!DfDCo&BIxXY2c~L~S@7LKv)3oG&N*ddf;O4|e=D>X4`>?= zav!=2tl`QUtpf+Uov2m?QjDa+pr+p9-a_~tU?VUVTS1oPZ8x~u6CVBxq<{{kdM1rc zHJAhs*mKuNuhi2RBe8X%(br7Xc-QlAM1ZG&4`a7(62^TL3w$1vK_Z2)kf`4rGbP%m7A z2%Q3N-pFYyXl99e5L;)#cf(viSjodYAEgFnfIp^imB4e1l|_tK|7Fz#6x*Cbv8TJ;4C;dpK2EJ~d;JH9 zbc-=4GlIn6}f={BREM8s16P2*L*&LpJ_ejT~J{*xH9mF}_f!Xbc! z6pq-*R_=$cLkHrzR5EUU7bV8gATzyA?ecCqVNfAAh42TZId0Uvdz^Qp#2a@+?^JWY z8a}u($m&DvSqI!f6yh;OzQETOPxGetny+ZkRMS#b?8a)Nt(P>|0cit`pD#!PbK@w- zDRv$htKz#RX7ZZ9h_PNUIFPT*EYBXb#Zh$C^8|(PZg{f~;s%*_iHe*r&5r{ovEz^L zouJtQ?3%`t?<->}=}I_lH){KzK%Dp$$b19P`EyQ|z8*aPYqQ{8vic#l<;YyN5U1;y z;WI^W$`G6;8#K+sTX=6XY;Gm{FwPiMH)1|}Tet0i3=jX$+Ao5DX=69jI^ehAV%W(? zn&II~{c}@zU%msE*FDO7<5oi{LF`GF0tya?3UJbdfMScnY>fYhvgMx;vy8ytqu0Vs z=8oHF&H``??o3qxL(EA$)u50W>i8y12yecYVheN}472@keYiPjI!FpT%luAeYS%Q^ zSu*=T+*%N{A}gxE%-oq-bkYozgSf}{K2oP;qu-nR|2Z!!LFb@BQ)0UShqe=JrEQRq z3Uxl2$0FnH-Lva1>tCTm%hgM;G0oF0GlnVU~rIg=S}&UJ0{Y1KhdnAoHa1j@@t*LfN#678tSuf35~7~HUUvW_|+fd zgaJGSIN+;!2JczYuUSKv1KtPsAy&svBg93Vwdv_OOst1QKXbm=txxDLQ!tt8BfBQ865yAeP2HdI=S;*t?ni9V56wJaOMb zg*iWAQQf*-J}gbKke@8ju1*kYt@XN?z@|!@Qa}@@&r@gP6U1FlDR8af(DVNe#6^RV zEGTe8+|Z?Tr+2dMtMiS3f&!xk3J{PTg@P}pC$B4m|2`1K2wLW)>ll2sWZPuf1+77- zR^VRJk8PQJn?nEX4Kltl*ZT>{Gq&Sq8}6F+ z)0mzTG$Ha}>AP)RgA!Czw`OONmao?izbk*@aqf&4R$koDi0sk7%K=x`4PEEPM7ys% zK8vS-ILF}my1q+b@cbGtEqB!X2r+%gtDxF50>UEQMd@5`@(VYVcle`=8h0m>?NX=y zt*#ThoI!@gwOn{a7yAl=`A}J>xAnkOKR;3Jbewnwc0D`fp}dhnB|BcV4__(+qi&2$ z*9$i3Hu_3h(Y*}Ai=0qEWfTHp4Y=EoC<6mg`}NC+mnU|HkDSL-?Z zgYe;7Vy3SjL0!u+vC^<*%4!B4zLPs9Tx)n|=Aa!7G#)0Y;>QLdA(x{Z2>KNYG7?+d z5PFqwNdHL<#O=scg@UDM4 z2&MZsw<=|PSb{?mFH9Zib4%(qO4-^#z#^tCxOc~tm}SKWf`;;21%d_yuwsBFgu*3R zQAPz)(5eoZ@sH@m@<0na*RvZLWWdYlY9W{V;Nt>g=%_*Grvy?I1EHP1U%r8Cr7Z*OiVA;FcdH`#r)H+1gr-lfbg-+;cx|I2s%QORoo{vt zbWdo})rm=ftA(-4GEQ*sE(?+X-7yHq@})=cYOK6hHaGR&RSKYv2Pl7V@$(71ED673 zpHe`ZXrHnJ%en*;Xo%jSGXIJpzW$R8Z?;Hq9lv<)*{C(pPzX*-&3Fu&Jc(4!sR5t2 zmWAt;oi~?FZJ=^#syWl4s)HT99c0|rcI8b#qyxXV7W%*azVqcC$V)msa;*_sJqi7a zK);BQh{gk2sSKB}yb95~=T)&S#gpm;Y3WFyqRA`t|7+~6!>ZcWux~{`LP}CPq*Fmc zBqbyTRC0kd(xG%ncSuMIA`&7EQqtY6h=BA0l?Ew6eV^$*=X~dT>w5P;``Yg1T62vx z=9ptV_x-z{hot~mYG7VxA}%s6!V)LVN=HyG3^plNx|`O=b$9b;UxD9~{D z=;e`iQDEkgNbVV;0vC}y^AksvRU4Z%X;bbB2!T@=R8Vk1zX>c!7{$ZPeGLV3@<+CG zAhhAR!5r{H6x5p->y2DFHF<$M5W>O_s4L|w?V zUTQJ3e+56#hfv_h?0BafG^tkxwJ@!K9Bsk#K^S}uSEaQ`(&JmRP;oUc!)Hcw}r@dh4ifvKYS%d!Tua#*|n z_4U(>SE5x6Hh)9?*c4hp7uYC4(BrTzqUrEiZ#ADf>@Nfn>05jUyO-eO!E0e@AX)vj zyqtVbXlvO8tScR0n|Z`IwF+{O037&5kemX9MH+-wc_!Vk+kInqXq5c3vZjn)GVi>+ zPC0n+6cJqQv*ZFIb0WH){muCUL=QMT1DZ#8LS4KBVb})|eq>t>svN5b)|03voF+}~ zIcu3=UmQIMhVWHzs>G^R!DpoMHy^jZ0PVmM3Mg@^K`%5Z zhPGMo3$IpxGAg2)+HVIF-wknM+JVCa2*jQHmoCQJviVR@3Aq*Qx zvoBNKp_U+E1^A5odn6YXIHfD(M=PeGS1bjP{>k4yR~5`w_CRF%r%SY=qn8*Y8<(0< zgCJO}B81Y!U8$y7yjZ>O1Jli3$Cs!M?=F#TFzc3DvnO6v%@-iIO1E*MbFBmx3JdQLYS9y{xZB?VVybH7~tEtsa61Yr1!S$$FO8GVCfNHVc9>uBd;VzotF9 zCQEc{M>=KK@P0-0DiYf%wirx0*c;EX+1Zp7F^<@m^=?PcCc+*Zv}@Zf2-6m4OFlJ= zz80$IcY@}fcgCJAuCrLYcbtHLP_K*&zU1RuEFgws(DqW27(6@Q0!iRbWy$!L`nwzu zC;77%Nl3?LrFpB^=C>5Vjxiuaih=|N2Mdco4f=?4Pf?lP2;O}aPE9o_KCPg0-#rlS9RKWoR6JBr-oV z!ZSj(=K3F?&Nfs}04!u15{6590U`Y=g9rh|*SI6=Emds%7{LWaxJ%w?J>&OD$Thxq z(bYt(Cy-u#(Es(+vI$dg4;ek<$41tx_9U@jw;`F%x9^q0P{KO|dcqnh-F1dOi3 zbytR^5mvHjT+lvMIWJ+59sPpiPmqLHjPW(2Om(`RRAGd?v@umGvR~X=Z>J6CO1I1uun6y!A1Jbma zE@@dk^5gkuRM`@+(A}yD+7wE@k)ivC6N6RyoJa&Mw|h^vvJH^|Z` z%`QHO7g0H_+*9&3L2_IwP%7d_)3*FG&b#(0xRhDf7WY8G$}Ng<6Kjhg0BqT z_Tev2zQ!c}J`z|-ll-9K);bth1*Fb) zf_&YeGLR}yq1TyJa1y376>yRM5yB<%^?}t^Z?6ed3d+p6P& zhapt9*Twh-6u z)}otc&)URn=Tbd`b>n&#sB2#eoX2wrs>4}uFUy5TayP=cS%N@*4Z+DNMS*0|e57gT z+~o~1Memd7T!ZGn!cohffeseTNxaSc<)F|m;v+jUYg8ALrS8d&>5b0YA%u*oPY6M> zdf_VZ9#T1q$;EAblQQ#s^-eC;hOFmsPpQ810Rh%>G+U;|tYAnrpAX_rR>9gWTzZ)% z&p0v5^)rBKcm;-U(gTO43xE)uDjhSG5J{}oys9jE;qgi=Ar-w|*^s&N6^>FC{@XLv zF`J6urym+;HuVaQkd9RS5E0@4h6Q1WbkG+~Gvda`yX_xg$M%k?v26$MG44Q#s*7XY zVyNZLLfh8z+cj`XR$;yzp%P~xBrtnF5NFtd+i+v@QTi15D z^Zc$i6@EkTN5km>j|oG|Wb)c#pZ7j}0RN#9vAiLtMtFgsd2195FJ=_F#^(!fgRsH; z%-_GrJcnZ;#|3=ZpIrsO`d<97OL>8-9k*E50xtx+9m}Wtiq%E}8o^iw;W_$f`*q#x zRzVsl+&!H3x72NV-CONPS{L(KIjNTCK+`Yz&;l*^RwBokvdd-Yy-nZ?9Hm?*?C$aU zMbkgPluliJVl$|Ku()pR_a}|bc99+u#m^&yi_t0BkFF4CN3VXmrU9>H)FgCL(t058&azY(5G7n!b%0}kq zHxlW#RhHW441F{rI?S|}-{97672^q$KExznzR=h~D6Ts|FH@vj8gTjr^?1kv950{o zL>xrtU^OpUC)rx8+ma^{e10ABw)JU3BCEy+gu&%>mq(Zm4kRa~(i6`A-R-}$ROgU~dZX*Zy5 zIFM#}>F;Ri)P=KiPTBPZq0gCW1RsEd1ITq2>1;MhflfZ+w>q$h#ok%@h_n9VS`V$+ zb0#GusA@V*I@zO~Mw)DKHB3c3xH2ZhC(R=Hnny)BtIi9onP0>z=ZcFDEbJft9fD~I zHkB!D64M&mfol}Tde>Wh*%YRj0weI^`2{SnKR+msppACnzD|bb0$f+2f^GC6>ZNSU z)d~!a*|d5q;jH<~$^6r8zLwol&39p=K70xwP3RW^CiD5J52qa8Hj& zNrI60yjsmpU1hfbk`DR%?{mGkj*HW=#8K+$KA2ox99s51b)aQTZ%Qr~&n|iy7KudG zG<>wuo~7;ss1TJHCAuzT0ofAFrvk=*PZ>{HG|B>h!CvK)!h!ql3os-NK~EX03j;e9 zK~7Cwm4g>?+rNb0Sj1%N45X4TRS{3L1sUx;=Z@DahtGm~d!%&)>%z7+jHbfI6(jl& zClCA5zv!PNxh0`FY;)Sz-<$t>Ai9&X4+r5vji z?*nzRfJvqW{YMeyBjarxrcuhcJ0Wd}xZ6BQ3YxK8B5IOp0#-+h$ye1ymAh^Ya)I<#q5?I3tZ_~Rlx1J9=l$Od2=PG=RI9= zB2L8|<EGoEQAHjv0QdBn zT!r)S3^rfl%~_W9W-!3HdE;}}_fm_Xwz17$)-lTG!|l>*KUssm7@)v4<-?!%%ifH;FD`I&QQVV9A{S`*>y4xO!Lq2h_ZMk6zOum-g=K@Hl6AgF@?>g zDv!bi>qEc}r#O~i6OrR>Co1*JA>M(bKF+kNg@s!iZ{XBQjg3DV-+@ zR}+H8`PL1XP=n66Zl= z|9MQinm70eEgOtpK3>613wU-O*QeVR21+pEa9E7O{_|UQ^)`ynlq#YpsGNJ z#Qz=_1N<6H(Pdrx48ALu#_DWl3hyjKvPBZsd69F*bwi79!WP%|8kE=hNQYmpQV-B4 z8;38LdAtrc)#BZ&Yd9)jPY7P>;3>kKEywz^&A_+(4!7t==UkXw;r+Y5f4FZR5`UDu zY_%v@lYk*%jGcx{JgKXKBADY(WF=O);NjT2g=nYmUif?J)^^yIvz|}nZnVl7yuHLl zpJ#lJFkWUc$i8OEfm^li=`uFoRSM_&)nhQqVy5&~V}8PSp!j+fb_W`y)tq7q*)TM} z2ou}wj`}5Az(u1+vf$pjeO`8~`#rV1!_HKlq#)PBVwIeGw8o7Mp%F6(CfUtU54-tyxpnd5^N?|_{>93wmD>>(?R01_IB`}qd@&A| z2}tG@={RuObRZcJ3Cz5hy5n9LUuS_P^@Bo&F+q)hATc7u-;q9`1}4z zEs0_-4y>%X@!eB`!cI~EGrY~EH~C?uj&U9RAudIT&ZHL7%gF5uH{V9x*AOYAq{S`6 zA#J1LG^N{MI80(9X6~e>KC~aynkT!b8eb1vNY%u$i3z)RV+xa-c$CgKAs-C&8ILq| z4L_-|>7}{GRvF{-ZN0a?shP|kOm4?!T@g$A25a3j6(w>2lj#%Jw?!eBAvt2@n=7TP zDQog#-L5P0)-P*V0LWKvQwUfvz#v(^-B`5u#S5)Y@`xTI`ioOU>FSuwEt*C=rpCdm zljj)3@J`1cf)nLoA03LFe6OX%tGJkO%1lec(7p?y4kiZzZs6%U$7U= z?j8&D{hpfbxuH5495&(n3r@pn$eBp8{jz_pJP9zH{H6xgYA1hzK3j%sqdPH-;{q)Y zM4U{1>zB|1>Zf>)pFhGj=yHr=wo3Ftxr1P7WR&7kewka=1I^*|5wv`FLp1A^d)I58 zMA-LE#a{Xjg$q~qxM@SVJt6$stCD$su~52NV2-uLxUbf>T5mhnUQH$-8j5=)HF-$# zLje`gZvD6J7wuZmRU*7jZFfTKpSK~lxzz85Q@k$YY}!rg?#7dN>B+G%sWewVZ=dTy z7jr$7K4(nzG6s_GjDpBaFe6?shpD7@`>L&zXI6D{@#a8(!Avsq%ZA8GaXlp59J_79 zsf4J^+ikxKi~U0F8w=7L~<&$Jz?R382 zaq`jC^{sDHt&*yciLuvLd5-fo*c{Q|zW?o3qQ-1N~uiK38K8XtCwSD*6@_Qk^BQBdO}8oU3pBR3+iKfWa}W%G0nc zT>ItBC{@lf;{ns8eO0u2I9jVTVbIslwsTR5xp@AoovhwoJ?R&%Snp02f{`_+ms?(n z4c}f5p8__L!qs2J15bc^-=D<6y&DoZq zPq3EI1EJ2s$Z%~kEnJJFTdt;VNm4LLvxOUFubD1Kl5+icYAHFaolL%XepO7{mS<9y z=T~bcyUppDtJCjo5l6>ZPqYfv#8Kq_Xr!_1_7mW_{_CzjF`ydloR zwL>~x>At&yUb$0kw={`@Ua3N9k*M#5^?_%c{bJx18iLrXhw+)!X&Ym)7qN}#&e(K| z(n6>5qtHqkBte;7t}E8%V?vSAO-MqTU169(fyxhD-4`oF+w2vR_1}J=;E?s3&?rFuRO`=pKkV;Okah!F+XlD9v5q{Wkmpg)^mPyj9NHA2x)H~h2U3tQTFIdHU8U%fMPXcI)jO!R z2X#|y1QUk#kzh9n4vU(N-b{P+ilvMKb+uPx8JCntwN+Ym;SD*rgg?{=e_j^KV}1sj zAC|axUpk@}p)jecvW1poy6U)!g~BGrQ$$|3xUCnWUpP}cd{a#lYz`N!`=Di+{;qmK z!82w7&Kra^-x5+%CId6`Ujc9VSBODU3aNt-!6J3MnoK_XCI}%qJ&G?8$>{_M-=i^Y z<70R+{ZC*ZtXkRCh}KeqmbK;PhS#PX4B0mPBV7CybYif?)yQ`1@P0J^71jQVa6&|& z=rnrXu<46zl>$}i7ZHV5KB6(5`wu2rR1W|74}J&<mZD*YM&4 z=6@3MntE;g;Gz7FiiCw+3-25KA`MJCV$~3tfBtxZ0R$Y2YU4ja@j$x&>uu}hk+

    Qufcs9ZE}(h+U!FU5AC*<0L_v0;r@d{`K_Q*2o+f1z^OXLK|#HtXCX&I;{c!LV$Wb6xXyDVfu{wB3c zFKmPVBTS{i%SVc#u&O@lY}E;`DKcx&!(972@NjQ zX;Vgks(h8ls|wgj+JD$d5V$YFG@bpAnLThueeT`X9%HtPZkBuIS~6(oC!0xOBESVU zuHUM?j}`!I@Kz(35JLht9VYVlme3>#w`zoxoc4RG_U_+;kBT}{7q5`#3d=m6jO?lE zH`$%DX0N$}E+?JW-}O*WxD6n1qq|W!1uL!t6Ln!suLQ(?)<2ccGuA+tla+}b#oQMg z{Kd_M6~q;m=P^|8Xr4ejePik!#`+B*yRWYg-%!}$2mQ8%Zyq?qR`YH;feEg)k}_*z z6|VJ1S5h?Z^Nxh3ofpD7Xn_L$d~-Y2zE}F?Ds04=)-D4APpo38@ZtD&#(d-mIwV;eR1-etqpqlqY9)`4KJ74uh zA#L(E`?cBLjJ;5_`9isCiKSM=f5d2RxeG`DU8aMy>ecEf~@)&}jZh$?h0EOKH zqFEmbGPYNdG+3lWo#xbzJ4E|<(8O)d)2a%(u3}$f3cU1NNn7$MNbQfI-*_6xSp$fO z4r*lcHSktvq(KJb5OBH{NP=B$dgOl2z#`bP93ePedmj#g2B`mq)Fjt6Z-Q?NHOVd? zNX9FSLNbJgAUdBc*D(x*`TUe!X*2QN!I@DcUszx~{eBFfkC)tx_?j2!VAKnq44~o(@P6-JYbX0y3tOB5jbjW`gLR z>thl?N~S}!^7}aMVN&!cgOAr4Vj(y@ZvA!SdDb-sJkPh28&6I$K!I&S(lurPoNK&D z@4?8|FsFm5#k2$aYp*l%OwgUwM*g&Zo=+?r6)LW`9-zBs8m!o)*ZHEo z9uYeaWNZF5XO!G93&LU+VTw3LcuuVDKeus&9828p&b_7`g1Whz>&44sp&VYo-TuQ% zqU!(hl8YQ%22LX)?>(9;x z1aaoMThOws2??5f)B#NZsXwe`kv$;vjq)3Hn@`xz^juAwgBh?|Qr?b7pUIcHYt$ym zo%g&WFIm4o17?`nA0&g?U*}lXDBG!84%!Mf_VQ7=CE1HwNJnZLIYR$O57c=QPM zJ&BF`r5FMhJ??d}NrtlJ#gQY%eMn`Ej`SNjYF05+5M}cH2;nX$&8+CC712kd*-kZl zpP;|qmX<>LHQ96@j!#gC^-I|ck%|ReDsRmomHXHiUbjcru<@5KYdt z7t|CT#1U1Y32yUO=V>)d#quANTXXz3W=PfAzo6F|p5YvBs=dvL5eSbR<@f5he)2t; zQ)18jU{O0Q7w56HW!8Hh? zy|0>Vo&N0^x33mz4J)FC+`5VM#u4zAL zvyN4F;&4VjGLHJd6{chX`JnOVr2sawf7WQ|5)U%lQ~vQO=Kfdj)Z#LOfKiGJ+>9l% zc)`)bbUW1JEEM>OV#GH&=`xcUHy^s4O#8$A$Jk!VgSPyZZE&4v#I{@=@CGAD69Oo| ze)=g48?q08zQP@CO+SbTmC&~fiM2arrUCnlzv4LeABF#8mcH)+H6k0z0bw>}wipVp^pa z*#-}KNsGm2*7xX(;V9DRJ7~>k1vPC~HMAj^9Gc5ISNOrbmnlrnvLiE0s17qC%0*i; zk4JLYN8t4%kk>DH?0q_YqNF7!W}eNx9YDybTIX|OTV$6G0-WSo-FH_Cv0`mTx)&|o z1A9wlL}eavo6n-4gs|uB?97=%`+jp`e^?qOs(f244TGBk_wxY={kaAV(mc+&Wifs6(p8xb+L_B(rtEs)5D82(QR>a4ppTZ8U}&Gr6s%X_hV+E7-ox{C`&$|6COCgK1fz*Tv`_U- zQfPxZV~N4=?vJEv%;!BOh?P%}-a9J|;JM(gte}ZV;+%`3YMQ!&sUns5LkZXtnP-C7 zgqJj^7n%7~`%HajPhn*Q;PLV-0FU0&FxHy(#ZY(|*=vh`-6G|l;x7bCm7n}g$;RK! zpWW>1X)rJ7?7)_b%~-7_Oc$Uj?&N283|%@aZ`yo#!}##{D(}lhpRF4KBH;1x^`9R^V>YX-S7TMfn@O(LA4ei$}eF$<=|*(QuDuVmdHQ} z_^w_^AD0vr2UMf!+bIx>bY9-J%;CWQsp361GYkw(t@v_|oUE zkL+lWDQlHittReuC{D)WKXo>UB%l!)nz&MK&9~$<+Vb?zIWnKGpwzxY*57HDn@on|=Ok=tYXn+-ib=r! z`f*R&TidpN&@E?np-*;2X>z7lvFAbeBcd5`;^Ph2G=6W#fF_L9D|tNqNw%?a$(Km2 zuf8F-UJCTeMGK-NKS8EQ;};c8kAOIvHr_~%u**}g(+8+#DsdH_G#_Y`xP0y2Ubw}J z%$6*eEwHpEk9ZdJnWR2#;zPEx9C*S3;o5o@&-tDrzOA+HeGY7{>BxUg7j`vYOfm}> zsk0uJH=>9`;EUeR-JL^ihfG)vWS*&`3d0ELsAuuZG)wbgc-Hn<)}j^l960P-Rd{2= z3p%#2if<6nL{RBeqEl2HqM5#n!pUGGHPIP!S?yys3(lN7(C+o+4!&>@jY;ve^st3CIEx`Bz=_q+ zC|3S{dD|hJ*)I9@fX&y$rcMviAO655xo4H-&6OiaJrA66aq5cq0ehvn-TR%OtCE^j zT{qJA77Nn>?&Qy*RW6V>9~E^Wm$ZDm{z{*06dG%Z>MBcTH+dE6xh(EIP&u9T+4B6b z-Q_!H?ecZ6bgc`CD1a=g;fCSIf4_gDSyOVRj=2v@k4=_2+%M72o#_KwEuuHib~=Kb zKY=58qn>nw$N4=RkGX!EACXV{i?A#|OvbYuqU=hsBUh&T`ph~dvfg6RgmeCCzll2; zQ;l4ryMSpYPQ9V?ESv)ah4{g;*d@pZJdHJlox5=dJ{0khtOO@p&LVPaXK%Bot#71e z9p7rQV1Ly_vpX58k%~%Xison?d-L&rDAI4e_myb4VU9PF(=^P3YcX% z)|>4acjZs;(K~)}(%z&r`E;UhH+Ec$1g$Yi|G=Bb;T$nW@&^Drz9tVfkkpGE%|8C3 zXgEB>Ny#Vpaaw0&yQ}2&#L6hza3l)ZEcY90^G19# z)h~(Z4kf0hii~gPa1c=oanvQBD)m4T#?8mgPN&!W@AzUebl8h?*BiucK^a2k8Uo*G z^xV5EK|bq#_Yh*67zs#;On?M8CJ^Rt(bVj`mRH5jf9_hgh5cKw3jX_Gch|t)--u{i z39X!621o90?Pb45X@$9h#3Ji9C$kdKtg^M@G~o&K^2O`PMBLtlxAl&bD*3@4ntiLr z*K}l4|4q0{_!1XY)h~H6+*OD<+Pds3$mLt@4WqF7ylH%Kgdl4zKmPGzMLKOLT4!&P zn^KFg$7Zu=?SFdk{P!-+@#W4u4i*jNlW*IvlTXGJl~5G3?MzFh0>e?Kk4ttjud>p4 zR<0%&yrc?T$Uhm|oVuS3- z3=~PwqxLVe+>yl<_9a%0J?d-MA?u>axBaO;mN%qR&5`S%##oh*#L*>W?knK6O=Rf6tVkSRd3NmsE`_2 zsXsHkQg#Tf(t6jj;E)FJndL2*ON6IA&1M@cp*8%M#!}NP)BB?IskW}a0St`{iUBmw z3+`>R5{z1Z0^M+34Z^(UU|UX8kCwhalh|(&XQL_2o@Rcz!7uc)>hCG)wwSrzFwL*9 zfyS4dS}}WRzPA(5{#}(!(eKY95%md}&rcVw@GbA1M_;IixUq{Pu@F;PIJ}jY8FDwt zW6=zZw;Pr0wjM7h-?)m(KfnE1;eyW;!#?cF4{$C8hO|H*GYSy_B7^u0rm(%7-JAr% zIIjebqBCJxKGa--pB0q3a71o?ent*Lfd9w}E8@)myzS^sWol~lI{ zWHp{rD{Cb!_rFOd^40qJw3`)iPxokbypj53m~rq~_k+mvBbK;k3OfPpieSy9Ar1P( zBhz;zzp*b-5ih8GaDoQMA(Uww?2f2b2*?|>Ys(+6lS;Bv-Nn2K@j{ibx2V(WAGFQN zJ%7`O^qI1sqx0#x(3zBc)TEe7J?#ncz(t)c(oyC{%b$QwY$c5R3QzoGxXs*Hk>&m> zgyL;4cYoD~FpOZ47?tWhr6d}LFCaIBJVRA#TA;&ViW}1=tMmME_qYn=?(W70OBl~fG&-lD8%XKV9*WrN+!2l})w)LCC zJvFbKTdo}a1~WNzdB3*yR#o|)nyl$+aF3M({cPiB^&7ko?j;pH*W`UkbJT_A{Z8AZ z+Z%&T|2*0I9o=iqRHw8Q);MQR+)#>-?2q{&*2XAk$P#U$;30;9hKf`k*q669Wb0(W zXP1jCMeB^e<~Q{~u^+527{>J$1!5^T=Cv4lmF?a8S_Ja=$@|(}_Q{vvWR0*}D6w}oY#Ql@84ZFh|P}+Ov z-3X+4MpsU<0m9Ig-(zxOF=%K>*{Lrw9-s94_49M{W2hIz(UpxA9Bt3H$c|ZFs)5cN zdIKK3sY=AxqghOrRjOUyqQ#-RXW(zC*}0u+8c#ag8|Me8+(Rc!p7-EO-(r33VWwAQ z`z`+x&r)uaIYPRV%p!!pyexP)DD*X4_%s?XybW4Dv-|C}8Nhu-|f z%X)d=5-nJ<5RPf_?6%Z+HIy!OqPM9n&KTJ(A@D6mn)y$yJ}2ctw&vzhWj{r&JcaMl z`KsyDpAyl*ijFln0oQ$q3^4Pw?_EU7^9v`+rMpKS!p)kwdorEm*1vG^mc6;CA4a_M zdMH=9#VoqVyN5 z`bCfTH(iJ;Y*Vhq$JJ^Rl0#=gRLJcp2_zgTk-tj0r(YK3-~TEqG)sggpqS3H>STNm z6spnfSN$^dAUO782GwspJk=Hk^bzUpyf&Sn^O%PDikhwmB~Om7e%JsI0b z-e^r1>F-DM0DI3Llo9$lSx=ANY5t3ROF8C%u+hKjd#rMBA$_g(9{t>X!h!u#gU{o2 zgyOLm4TI>yqURi0F@8$eu}=yU*)ekW z2Ko`QQF}$`;SEgcFY!7>D*yB#YYB^y)fy34?DNDJux?tvGjyzIyPFVhgC<7XI242!iEPIlH{2#JfbMDIfDE8 zbUm;1NNy`H=)FEB6;btB?Av5o1y?rrhfQl1TW!OcVU=G5{gbPOp9{F8_Lm9`9cJ}T z;1rB~fEVAJ(aHC8(e3Wnko6iFm=QKyx18N}EQ>NeY$HTBpad^vyCjE^sS@6vPG~Ni zBlYxR#JMBmzbg=;hK3wbG_M=Ul(&&2Du1MG{os9g8iR|lS_=Bt#C}wibfoM#3}XLt zn2b&;3r}u-@#|$LrjlFmxJCF@?h20dISetJ*^CsTiIZh3!(RmPM8)px|qGMWkJVczSw4v|h zOF3OQtUT|utqA1ux~S=i&2F`2_3hx-pd{PzY659$oT<6x{b`dc1xu)0QYYVQ#UyrU zx#g4JCY;dMdF#v_HX|`rWRu0*oUrgQ(VrYOGWVtguIal6ogSxH^hT_@?BK&*H)AmIa3JST}T2xd? zT2z!w$=Sie+SVKjN-8or1wli=N(1 z#8?^%ioDoiB^=^p3R4jVEqvG$H(_{ab7F>ws8N0+B)&FFkIB~!h;zU`$Zz9m-S0l< z8X@pG@Dm1Aj|vpk_3ZZ`;6(IAgcQ6zhVXDGf|gOgNhxyH^NSx;LFncAqaz^w$HAW- z+v>am5aA?b<5-umYls36_iRmuBltVJPO_ufJqJC$` zt-<$$aduijMc<3=IKGoS?hpDsh(|isdU7hAl!^IEF6go=Ig3iiGmP-#j%U6l8QUni zjYekkgzngLTCzXN7AZ!=g3f-v>Lzk=RC|M+=#DBUE+ozIIvNG^@I(UY}v+$xm3 zudWQMz|01uo2Vwxq_9x%7f>ZGsjL^UHP*k<8+-_xtFX+FyjCAL!mymbnGnbct~e6( z%`NqR3|waCd5heb-k-n~>-v?pa)25LcA{$WV8shNIW(1oRw~mOE8m483poL&NdJ*a z6U-Hzd^Li?l=?szk1#4sasUNW#EoKvLL5YN@xh-AUo4O_5N%t84I>z}4%!ylD-Y@f z{z;gC2Q@ZW$B{S-wtADH8L=ifGY@YSF0Ka(AHKF5_5rGd9I;mfI|6w`lrH{r5g`4m zM!didJVZE12^Jg8iCmB>m`ivi??V|PT}Z63+tlO~!2yLU$x#UKYseJ-4eldqp%I&0 zNU{;*65^lWY$FFAd;>I>Eo>g5iBON8V#iOusP;k3z1&-E7f{d0IbzV*NaB*^1*l3I zG_OCB!lPzWm|mVFvS&=QKmC$Q+UZ- zrR+3gi^#~1ibs9=EnT5^6g}rxE^S@W7}=QXT7RQ(=zVxRQ^>|cz(~MN5Gz+RrkpOz zCaQ+5o_+MyT(wELvP`qAkAp!zbFk>s6tdNjwLl}1Ho@|4eSUp<{ciof8NvZgD{i*5 zVgcTa;>`K95VNw*b|aPhNbIK=*~e755yoVfpFfg8sWypi^!M61m~A{Jf%&Pys#Eq9T`tF$T3Eb~!n(gZB9FaB7hUu-J|9P1uj-vEy88T%L- z3(sVD*H5*92TYUEGJW zd$g5t{ulfijJvI|cQ>X#o_)N&DY#{P_G`Vc4-0&F+A?%4s2Vw9vA{~*&d|Z-t$+8~ z2f-`VOXz<38uPL7skhUh)5kBjYEEy&aQO(~InCeaCFjxRsr2FPV<6fEfCk`+Y4x$0 zypBGCu#u}w@IbWA#nJczjFF9@EM-|{W~FAOf1s9Tqouqg+^qaCc#(;TjM*OXM@rAk z%FJzNhptKeNdrwXU2<1^O}Dy(*X8z>6WJzlAxoFOCFS>>8pmf|hE~Bx6i}S{NJg~r zpjy9No&M6(axG76mdS)xmAbt=!##0qIL zH?mdIS<=X|-(&`GmRIa;4L9}fW-(@MmT!%~j+~B8myn|{Mk07_UOWDJ zq`%k=8T%O{b^X`Awg0v{ezG%O`RkJG%)O89w|}WGnj@COa{lVR zavI(x88G?OX&Js83&YpB>vH9QjgR*e+jn_n-3yG6L@E=Iwv5wMSvpm4F=c;Z|9a-* zLwq-fD~)H4hjx0qwxTjdGr{cN<#5`0{0StBeolRccS=fgOKZoM=VWvr#B6?P?g7dM zWzu2OyQ|5O>=2&_?Rb`5*3E1M_vHrcygY113?-zLzl#Na#NM#_V{vCV(mHoq1lz^H$4F)z$iq>0BTYx93Q4~C zo)_JZOsHy<5-Qr{)L4~p?KcTCYBOS0Usl7|VcPjwwokbJS^2~LX7V6Au07wdyKgp$ zlS7>|DInxR{?uz0o@e?@alfs!|94_2wU>FVh1juWKk7c&Msuy!;K=!?)p7jn@A5I% z;^b;w>$LamwuNkym&bMOUKiKB)=lGTef#`^&DzR(Q^C1!m+0HTN=;Mky7798m&fi} zrw{o(a5b(~=pyji=*D=ak2`LLR99ry zKQ4SpwANqocVRe5&0!Su|ugITkK*#DC!r$Ekq_zfvet(!sa>+I#^ zS>#3J7I!Z4@CbSajegvvF-`?2nFm#83`4P^9rk6%8#Meh6EWHCUM zNrX+IL-CV9(};dGW#1NwU#${Hc89G?Pv?2fpW|Vlh3J^|Tbl?@jJ2zNfLcWF*4!E! zyXi?!uT__706-r|rQh#0P~1SrM?Pd3HC48ZZW`X=4XD7zq{}qSKtX+gwN}$|)smOv zHFdCKGB$HCF=z6$b9@hMp`iFZdEdKs=B~zMo_4nOF1(%s6#s(YeeeJCnwf&^Um&hF z0u);EN@SuA&gNvCOdL!s6oSZPWMur#W){3EViNyBfBz;xVdd)T$ji*^;o-sL!OrC1 zY{|^}<;xdl7B*%!HpX`dMi(!8S7T2`dl$-o6Zs!HV&*QU&eo2u)(-Y$|Ijryad2}L zprH7N(f|GYd!FW=*8j`N-sL}Hy$i_v&j>Rs6ASbI7n-ZJ#s3f5KO_G}`5<>dzbgA39@l<^Z$#@|1NqsFU)^t!mH?P zZT_yLe^epJ%Fq1&y!#J4Kl49&`Y*lxdr|)N_I*_Zk@=bb?+ObdKb0cwKtTyZNsEc7 zc|xCb0DRR4l6m}r(qzydg0D(NV53?$WvVXhdNit_k8|BQpRp@{KpR(XstA2-dPv%v>K%_mWb87$2#DDTn2gNj{0BYaUwjN5t!zY28PyGdwV5+|~(%xEzbO&*4{Y4Wr zNT3hwu2tdk^dirgXkly$BZGWAbdyfO-dR`MNZtfdSPrP^ieZU4F+#G01}p;KU`zVq z4_RDyCM67K*l)TU2~@mdvF5QaIY{W($wYZcg}@4`jUq zdafD-wGpUl#B)W({1dg0Od>#&r;}@|>(3kuBf?V$_o%xa!XuuFPMbU;0uzv5c?MzI ztA;HdW&8u<-ReZoBd!X4Kh|d`3+2rZQf)QZ0x@wjDt35!JcDZ(A{7UuG%=$ZCmTiYkuSC)akqI?zh2F?37iQAO*mf9 zLNJ5ITC`i*7WC7Y&Me}$5Xd1yzIF38{=!3pgv5Z%G`XxhxK7Dk>Xt1Q3&0_zL{>JG z?KnBWkuuK2ZHP4)4esw^Y5;ol%8O;hOSGN;)aFWAM2}TKOx zAQ_$(#MG~D>9@^TXTn8?Ai>MIq55C#?ou51Ly_|+&A*!uE?U$xOW+0o+_oVWM=iOE!5Rb`kJzCrF zgaF{JEpUL_jHCuC#m~jVlh3oxWx%kEX9m7w4y6}<-Cg7rKcb}e-piNEbD zcJ1+q`uP2vu$G%eZ>bWi2)3^teSzyWQAbdw-Q;*39;yY%L%E z2!FG38&X}L$o03|mz*r*Ssb6rId`oFkFEv6t9*@C=MXiwoM5tL*!@L@8kqx(6OVJ` z(-8_5LXhbDL;#+j5IDMU#`-W*fVcJ#XT%?LY+Z_dKmPj?P@#WF zF%mC?0oBZ6N)f$@Z2NA;ifje!MFDMQ>vE9v4MZ?t*5X|b3IS0`z1Vzxf8d)Gz&Sb| zmm-jIz-lRU&)Nm@Ih!8~*$ z2^+dX-c;0byp78Vg@Dy1xe=(_v4M}KN}mO3?JsPK1_8}gF~DofaIv)6?OLkuBjyBQ zvwUc^_G2Myok|2!nFz-y3%$FAENBp7M2L27qxj-P;Y8b@xt=l;Xi8z_>&^|P=cA8M z;6~1z>VD7tpPEqF2cI$6_gy>bd*$Jw44Ge!#<288HJ!$Cq-z&4%C|XtMXze#LY4xO zLDCJM#^{36Fq7nY2RDgz{oPC}o;J4Dg+FXC`Qq-y#%h`0L?hmJb-biI?4??2Oh<WLTF7TqiRm(blB@ekir{)z%AVdD9bJZJ|u z1fG(rNsEe8`FFe_j&?}&*j+cpx%m#Ow0|n z&~(o-5vaed2cm@*UpVh#E(y09X%l=j^ls1Hv2+^G{Eh{Wy4~HFj;c;*R_g<6|5z#Q z_?QIPEVgTdrq*^uBCtnC`k7ma&o2O#_F}w?x9Mtwb@H^#MdNo!?MP>iwPTQ*F!hhI zLoWeFAn4--F=c6N`MP{x$9rsfYgl#XvGQhlY@O@F2|!TvJ4V4c>AjDTru>;tNCwd+ zEke-QFPL&5Vi;EPa@sf4Qo8+44Nd|8mQax*!LYYHSW~n3)P!@^A6!M|kGtU0Jh)f= zc7g4WiEOK5wqvKk7+wEDzm}}3)qMYKYJrJelYbD-pAQz5qGpe~ zTAwhR-6quVSxsQ0yUduLUrOFk(s_8q3o763FF{w+6}zHZ&bv{v&YFlnj)o^lC4ZS$4LOsoHU4XzIK zN@7bwFBox9%Lqofpx!7CgW*KjoM(&^<<$6to$5=o^sefU`KbXEBa>z(XbRDINPi(Q zlK$JYQ(o}Wu_2jVzLBAg*+6sW3^>+_A8A)ab|0-Bqtwx^C6r#btpfH&BL5U{!|zSx z#x6^4&d$P62^|GJP`HVKCnsD2KtjZtBnlepy|n??L=U%TYZq;zr!cdxVb4^g$30Ay zvu^)HY-Ke`q}(DlK75TYhl+^5C~wFeb?MTp0A^xL>%P4L$L7LZYGMqSB6wkkE`<6n zS^53H#`l&YfKO}o7d^zyTT#mKM-aL}_rBWonvmL;Q}xdzHKO6XcCqYK{H4vR8KEVt zzvpMOw&lXfw|E9_Irywoaz`_=FCXleuIB7z61S$da37BLORkFHjvC+H8AkTDf;Dcz zU}$xDuTmPi=vT+6W9wf#)T?(TaS19FzTZA)YmKfnPhIm&umJsC^@yym*%E=x41JzQ zKP3In5Xf6cwOB;4EXUz#{3Ova$g|?lvS(b^DR$Xkq?FDpFQ>rX_%f|r=0_YY)oze4 zXvKa0f!GSKxCYCYOoC&NMl+1Fo3fhG+{?S(_(%Llmr{_=`S_T7>4z+H$M3lQ4l6=b zLhPRcfmU0avKI%G%mk5(J>bbrKKk-JC7~&=DKM{)_bM4Kx6+V8Io`0PdDY{9lJb7u zGM)!ZEZ^AhcFGjyFPQb$dE+tZkQvL|(5)%7ThHB<7cP#CY@}v3p}Zd3Zs;V#NXgqr zC$@C=KB4ZeHTi~e^dA%d3;6Zx%l1i=-0JC$ifq$O;cIaEn%ajjQxA z6<8-q{gmg9kjOwUf;{*zM2}AQ>+Q9^3N^AGxSt^fa2)?eqvwSqOnn!;qc&x@#=5TE zhj=9`fZ;@3o7N@B40HD*pTgpygepR4Bn>)UI_&$zzk+s}1M6r}mMLc_cW_PZh)X9F zttXmQO(zQi-kuZYNmIB2$k|)FKwLy|7EG6Vr5>I4SYFEB>(65q?5N2U#-;S0MWOR5sOwt;vjAV zyZ+2w*$YSl4ApxKe6<5F3K*jnRlyoUI&vB<%qwM$MRy|>nBe)DQen5!G&>6_<9R9e zhTv_y)XHob+6RdGYm*L21H9LwYfBH`e>L9=-w@+Khfagjvx6~&Xq4e-;mU;)TW$2a z6H=3!PLH5^`oA)G6VLRvrw*`?bW)PL_t8fltq*%?A zHIJB&BR~=bc=WkttPb8;vnRXir_D3Ux-j9f+m35`C~q&T4y<0#a?YNvCcLPpH}A0< z_UXfprQkyZUV%uDLY%ea=ssJ0ow9*qL#kzaQ8~(1>>+c{Bq%p{h~J}cV{9DC;e~Yb z(C`^}_BNu*l50x=c|yEF437|!93~d zF7=Xs*9VpLs4DP$6snnzgn9O`V1jRVvnB?byfU_|zoF)Pi%c|J3~%RF!h;r?p!qQ3 zcxk8)4VEl(j~Q3o_?x?H36TO2TT1aeXOSV4@{}$^cG)h!mh??2#Law>pEm2nA6$Gp zy~@0`j=uY=6^e?(T4+xelf$O=lXzQwGR4Uvn*C%kOCdhpVmK;NLxNe+iCm#4|7FOD zzf+x%E?Gj(A01bp8-2ajmx!>f>jS$Z5(BQEb#yr;!3P=N==(5I?JlQisR+ENP=u6B zV}{pWTAFm8>`_2z+$TcN<|Qc&^E8;L3;lJg)5j=r;gVux*6@m5sI0lH)wG4+F8o=o z38QaoFW?cndseY;(xVGDZngn63{(pWS~jI@vg`uIxyk4k0<%7V#_Qg8G;cB2u#W)p z4!z*G4qENB=$EbY^G=dI1rmY@QXFS^p~={_WmF~=E)RGxz$U} zH9v6>xRLhPd*q7&OqVzNWp}|@yq?r`{oPP#>bjYthMbOxwJ7|2*4ssDXJ+F6@wCmX z|JpF$2cZa%0@$%+{joBM@Ud{?3X7r#A`UB3p_VHM0Fw~Bji#w%h`BAIp2d2VX3mTA zk8IM@+9^MPMwJ#q%bmiZ6LTTcx5L(!FxvKEVzP03GlKx2kJHNl!XbB zjugsI_F`B&^YOf|qbSv~g@7($V(qo|+CK7L8St*SE-i>Q!IG=5L;vp{3e2!5e523! zYyrGG?Vgvn^?p%A64HbW0>B_G2Z-sjQC2A(`zz$L5UB9DM|xnt?U{;wM9SL{SybFV zWo?z@8x<-iI9Pv)rj9Gj(;Eu zZb#O?O6Kn#%NaltPASQJB;Z-Ia4;&;29PI?EL7^|F~gyD(m$I9LU~D~5m3bGR@jwQ z?SFE+T#KOMe@4j*u%tJvy7JanxD5K4dqn*v-U>9i40tEZc5Ds9D6}~4&7%u3N#1{? z=;`@KSuft=oEn&xYW!fMC`4@<#*qkKBJvK=BPSoP2~#PEf>S963AajTM`xYQpWh7Z zMBhXiNj)HZg?-A`gdd2T@+Gx1->%a%y9p zTTzR<=iHcCP{50{52#Yz*C)+-6csEPS1LWEve)_x1)98lcTEmq?yq06jUBFAIwKt8 zceo)vPr>Ftno;qmmD1I!F#TfYGO+w{wmN@CjNyw202R!QR6MnV^MG+p?kUN6!2JoU z(!%$s@6;2UZImCb-W~p6Z@r;N+{O1WKu@ByeCSKA`KPiW zdC))v{6FDCbjf1=(3g#WJc<3#57+AJydeU^f584V=}vusH;H%FGruQ7fR?`5CBS`OeT(;^miw^VW*Jw}F15gvVlM0#CYV?P zcKCzTGU1rfm|iD)E1&E{W3*flYh!8?c;#@p_H_3Y;_mJ4%eKiT&%X0>kQ_Ca5NdZi zx2Of3sZex+QmO}?a@vwVJ4JQXU_I8fb|ZE}6Ou0EuS40Y&|@H!YKlJ?mv1=XQBYpG z3_AA0Zd!C064HgF0}q3??~H15cTI+OTdGJl6ej)cm>njYW*flw?^VVweZbx0`>r;A zeh&Tfo=-kn3ZSY(v;mJE8nJ^>n3HhBF~d_6Ngb@FZU#Pr)3eq?1(rjp?vaD|eS!l8 zio52H_-Q<{;qSSgK-~7PhnQqV4%fdO1LqCTYOG-wTiV6fhI=vOrR>oI4pqJdg|laj zugjy8&SEn#*Msqw1cTe$8iLc)*7vwwT|1UuC*DTyMMYqr*7M*fH81b!`u2U(GXvgn zkeBH&1l9DCh(7n=pwZ_?E)7Jx^V3^PO5x`6i;%-FgE2Ag?f7tz|;UK z4vq1NK5~8jJ}e=@+<*A^gs(tpl_J?Dx$8#gOL=6iHi%XOuZPx5_)j^Q42Xv6!bWB5 ziLZlrxenb&!y0Ry7wxOCUw>z#;F14NsrcW(5yj9gZfo{Zp78zg5lUGXV;meZdJ`XF zaKu5Q1;$Gztl+)2e^LBbftebOs^M}PErCNYY4xzt4SjCg{pnHLu(D>5iMdHJcJ(wMk(9<?-JI`zLy3C^0XBav60$IIv?Yd4 z0S_gjHXM{Qo|{1yQ04^plX+Ho4O^BhPLIWohsCMA{lDAtT$Gye`(-*f9T$HdviLn> zKg);Hs($WG`&qeJ-oY3$Jh!PvVzmMQ7$$#EWg7nuhk`4-cOt8d2M}FTI>fdx%zMpy z?0sm&*1EcInfeZJ5v8+gO=B}hUdKBEdesxIRRNsq7+`czW2ia!dT9u2*z3UgXcyl) zwmnbE%;Z#^xW2iHI~7poXMMAVAMe&IUhN7g54tya(=K~FUD*{S9I+B zXK}a|mCYGy&7Pa$$)5ODPTN)*5kG9a)NXFUT)~HIGJM1+VKO5Kz-@->HUNR?LQ+5w%r-PRTykCT=XD-P9m|RWX!l#odKBP1q zvVi9shU|OG7xM~LP*y@&lm;DzrV)UV!2~ZgS2lm(jsJOrddqe=i^O=56C6Q+Ou3oZ z^~z7>6YNNsiZJX!xpf(LS+`fm(VOqeokc`=0jvrfAC|FL$5Lvq7H0bm>7ol;z@p+7 zKVlJU@lx&eM>;HaN0 zqsiOs_coH7)TszFo|_r+NeTvLMB!g0aHR3W?Ek&Ow4k9Y#O89~IQua+a$@OYp-|HEEdB9gOZ!BtX)#B4wgU=g$d$a>iqR}q5X{XJe(7>*z4}44$b`5 zYRtkzxXRmroWFb?em6)rT>PutRe;~{i|e`$BnIMo7*yB{7(*i`@!Y!bxe{NWQj2mj z+^i(JI5k88Pe3ycV=>tNk*QhbyDYJg8WGO)>!ExL8}CIN%918Gj%9Z(I4(iUf>&5* zhg)fGjXJHfYh%;<*=Pr{$A1FT68Mful3qYz&BzsfR}^=?c;1bJ1v{qL!31m zF`0VXJkK{Ws3$`&jlG?e1(16Z0IxbZUc4Mf*XSJMZza8lSF#}VeXTYO2(P$1du1-^ zCiso=9*Otcg|93zw+Ah@6p=nm(h^vK6vmzr-QtMc09K z$NvhmG)~g|xUZyy<~aU$X!E=VUH)TB7I_mAMR6vgy%`k8W=IJgVSK~ZT`0Tgt^C7> z)o@)f-JeBaFw)7-*{`PrUwJo7Xj!DA)L>SM6Gp#RWr@c>4Hr7D#1m$*Z6^5rj(?1^ zF<$$FTt=&YPFwH%!+0S+UFrMdSK4)$=3B)2dc5waa9~*S4~uLE5}`1Da(&y>R;7cc ztmTh|dfN??M{{m$!nR{sTJvbw;MUR_dU^5L%p|7CMGj5CzCr+^t9vkDW23mJxX-Bi zx%p*l^$@V%n^t&_;9@Z__#F*Z)I@8N9fgR9!7S(3&QP;ExjeDQQ>#Ab+3(6A7YUe= z2Y(8ZfcKM4N2YJWXYgiy{qgvp3j2e=CHaYnbebh2=Q5&;KL^dz;O({%W!c%+BL2&z zRY5Q2s++AffB7GO=oo7yoYNY+9z33Hj2{HlsY$AnEWR%bv9jL#0K7(FnW3Kgu~YVgWjm7||{+ zFBN-)oZ0kPZ8C1>tQ?5uIDsLPQ|9;k%$YE_e?Cy>=iil$W%@4L4o~^5Nzz*)ZjR|v zx5m6-#0DINC#YI>S4HPL%1AlraIiKDFuGyz-w}8Y`lBaW=$lj_eVGeACA50RhrJ6g zqObXGjGmi$Yeb4`$`n?FRA>jwORl8}5U_mSY`HLz${DOd;mt422L>C&oBdFwz(8QeuQ zCvw)?b6Pu5=WXV?_8tJfn4o?!5TlOngug7rq3=gNy?>bziaI;9hnPx6@3<<*HDF0N zxAQbl;Q1ABF#o7>K1fZcK3ZNO)R_rF?H`IqB#9~D(L5Ar$fJ|nfk8n3UMwyjA)=UE z{AU97H~Vid_XMMj;PG9!1^(fW#W59>51AnGKkJe#26}5uBNFt~m=u?LwRi_U3o83Z zi8?>hIu=5TZy+wyx!wm%t(vagb5qPdnTWh)bXu=qjM)AzX;;D*kpd46B*Qa#M72LmDL>oQCk;RiA>8WaJ{w=uE_ZEuF-)A$2q3K-cX=m;WaBD2N4aXXVn?7JqvL~%ZF731fYlRc@;QREoeNcdZ6~-(0^41Ye5FhyJ zMN5pbtv_sznVt%L%Mr^rzL4f$kbl3aNbchTr;!w5KjuN_fx(%%_12-5dQcJ0;>XO- zX~A2?_WU%CU#W0(-u897IGpK*sTpC^Mi-j2A+Hr*Ui=W|;!(s6-%`>CZzomlD_^US zRSnZ%(k9*JNGgC_H@oiG5LcA8Dy%|7NYI>Hlw7Aqz0x4Q=8Q6xGP2+Tmj?!X9hLXz zH;nHoXMAob_3B0g4&m2Vn#Y=tO+^jx83rK}2zT9e+bU9cq&>q|Zc7MUBKtWhEWV9& z@p_N&(RTr`g~z#r1QX;JUC%i|9>~6?HE>Q%^i)Yk@YETgjx-wHejY&Zd{9H);!Aqv3#%e)5BKZU;GiI+ zws0O(eZi^~E!D82pn+4+@A~e zEkPpfH#1v*B!NNV4WBo8J=pXTzKCk9nl}e!|8|A3)%t@mxHbKiz~wy9VXGWWy;C1Y zs^A;39{dCN9fEa1RlkF(l@tC_WPwNAqwukXI3ccA_lS7uqK}gWWsy*IXgr8dbBFBJ z=Yp0LnX$#??WPp|gF`BLi#Q8~q#tfVqZQm8EV1jx8&y?C(Ml(bb(204Q0!u?T`eZ8 z-1cXpvFELB{OdZCRF5FTjo|8?&qb;UXwjEc<#U-^pD_W!Hzd|u8GK0J6FEUDssOm= z`xZT}amTqR;CD-{?xmxQK((htg{dy#to;U;Z?% znMeO07Rq~a$OBtEq6k2EGFm9R>hbyDK zC_T@G+nZouO$X^}7v5caD^8~%z@hboqS>#ek7zUnSt}gD&eFgv8h_Yyp!3@`^k+w% z7zN`#DXkVyFPo62oQ+r(iaATuINzEU^EGM~?RLGH@SQL_>>Ht2C!P{CL?Z{`3NLdEX>{&YXAAOn6 zJn}Z+U-zB9r9imj#q0#HCGc~6;9whSLucdc163Qi1F7Gv@mYsnndB;~YNrhjp>IR{ zHndD(ct|oLJqP6^&@0G>{iR<uh&(t451FtZVj7Y3#M!XJ(i`hzQEem1)4w9dqM3=z^_ z%(U9I9|QkE(zq}9*io7p+iW&71g|*vd$o-dY+vzKxI-~C(q`p-HGH!rPIS2!u%5Xp zP-WT9+05WT(X&O#;6|&DUg)R7%M~a!2Wwrk(B&yLiT*F4W5Wl9Y}Jcb1%g_9q5SLf z5a1E&Qu|gu)-%!B&jzXqEND)Ji{F$SIY!=eI%K<@$z=X-#IR>ASQF9f2!-~A1QEzR zn#g25ulWMdP=CwmUht8R_vn+a-i_AcTpTyrr|w`=e6pzc_Q5aw%%i}6?92m7~B z-^s6qWh{q9eJtKgEuIA-%J^vIZGwsCaT46;-;2gXr~It*Mf#{P$erPeI&MkhTTkqz ze751FzA{YX6SnxE^P^dwJrfAODjF9}?-gv}IeM>uzQE>3EA*XbDR9KRS%tXqT<=L@ z9z_U2Y;WS*t1u)qgwSXK8_***P9>VY^cuc;>N!qW(f0l@VX3ykJD)0z#g^gI1#L^N zknyA!r{d~mOb6h@zFC6%&PA9M{9(J0w}3#r8h=+sBE$@@GC z>nWy5xd;d9x6MaaKwj>5-1dS&Yp2I5Sdq_3A8w!|&fOlVZ^8`NjL^HJh�j;PbFrZQ)irYxdv?t%TgT&_&t+8q+qVKc}h0H1Rr@x-d9lUPZ^YO~{b z&PP2s$<1;SkNrG(EJ%a%4)t}n5D!uY#MWeIXhha=@jJ+G>WDo_p0HO@C`nt8HXP&w zw*W6~#IhmbZ`c(zalaa7McJ6V)?v%(&%@`6xwp_;4Z~8H5=a#XlUYobJJJI9isaNl z;65g41VwE9A?nH``#v0_i7Fad?$5SGG}+rh3ojQPr0v0)*6}aNs8(g=JuaE(y^^_b zJBj35%0_*b%%s-Z%E>a%8;6FS;u@Hw=BCfTFC-LNqqz~3lSKV%f}}pna*Rgqd;_BkXf?D^n@9BIuZS{o zWj9AKs*;z&Bo6Z5!Gkzh2Xz~;^~P-_9wo8(5_Wn-6&xSbODt3u;vNvuL;TdqZv1cz zk2kbmAR#0v4pk36m6op;?^t}N_B|x|c_8VQFKX#A;_IX)V^`cLU4;bO_+FE~o~43+xsstQlCtVg z(>v&ip=oI~H$-J!+i?&*xi_5{J++wGI`6kA;v*Pkr2*nw4Zzqm1mX7l+k^)1+r+tU zXD+Y)=*FL|CZR76<4Sv7Bj+_Io+R2EIBV7XY@P$@a>w`LH>V5Mg#d)goCe=a*9xmz zv$j%;%wqf3_2Xz;muCxFkMd6&nN5_&8`{geKgz;QjyDSH zi6wEi+Dj*R2VBG(Zxi6`ZFN*yNd6pLWJ{QJ0*Vt+^RUE5#%X(ZlANFV`50fK8G?6$ z!m|x7W_*^gS0$?4Gtv10T7~7cBIPfuuuJS_h;6JhcH~n3T41Y z{a=xkOXuF*f@HotA@Y5UJMDjWh6U}{?zYdGeyUv*Nr=1B*!_&r4zH%M-^LKEn$2I$ zJIu$r4dqMVKUCnuGCiQls3{!&HXF68vKV5{O=kM*(wSJvx*_*6Yq;%Y*yvI*Wx&@* z`-yXrbCZRWMBfA8S#2L0k{@K|I=db|=)TqlkTCsJrf>q2bK_}6Es39prNsrYJ&F!z zMx!E5+I07Pciy*d(M%ev1uSP(MOp8i`Wy>ULbJuYR$U)=J4J_b zn%slo_v2?*OGTTonQDQS=Arj|TDf`4h8w+NYF}pe;p#%qcwo2hKX;xR;fr*Aw&V3y zR$vH7eH>*di3PH^Q$C4O=FY_C{hDz)y$K)XJnVJbw|CPOQ2VSWl%1a5}gDmlZ5KEkALVEQ?T<58CO3_tj%gsJp`_Yf?k8>@y0iB7JVxV zu;XQ2w6i5a11o$KVjvrvvXEF-Rin?3>`BnKi#O8YBR}Ls!&C<4f|364ss}y14i+L5 z?1;Z`7regECT&H1;quG3$qxHlctLQuS-%|L6Aa%zHM6QvN-JgIXbOxb7J~b1)Y()G zr}R6^d-+e#k)q?_^UC>RNnwp7n`@j8NiOR?9ShH z|NcZ*hmGY=`pc@m7i)jPK|C?Z6wI(@&0uM$;mvDK3wKtQ$4chdyK* zYwo`4AS@osNPZP_mjucNP*6CCUCXAJ`taJ;?D+aVFN+@p3h=ApSxlpNfNDR+ zEpAWgXtvKRdMi~%)yGgnQRWl7}Y8W3_)%G9sP>)`* zRl8Z@WjtPSr-CR!t^znj_3mmbw;}wp=@)6NnFcMs(jAHsR;I#@{Gv^*Kw)k*?^Tns zDMyH+A6ZU7i5kbtGIeW5jLFa-j;F?EK$NMzeo4Mb$R~0_Izd1@IYE^O9}&i6EO_E` za%8s7apmeMJnBclzTd@s#B)1E(Ag?ns|z0FN`zc~dD}M~0gP&reG0aXv`yGb%ljgz7FAX`Obr>RwuKeJ!}j8-5Z`{b@*G zKC1;mk79@86U&1k>R+Vtbu|F`8s?-@7mN_f2T)`Hr;PBPwOr>Dzg+$}GbzS0MEjk9 z+%X-}^-B%Cz4AxpE)v3luE=hqu1X-sm|xo^yQ|3SX! z$ruQWuaYQqa)W6UW&TT=3@~@8B_n$8o3i*m#bc(pF_!^YRNyymu7kGD1`}`5D+b~H zui8TWkq|bwhri<1Ow%Kn^RBhxL@))r1Qj#4WHC4dv|MU;1UN*D83M{ck;j*Y4jepkYJr3P*WEfb1f}UOCoF*xA_q_akBBCHZI~ zVe7>_wy9u5Vuf?i`uKPIDvEuGn82O7&`}{Q!i${U0(L=_iMba?^M0fRVVNwW%q*%Ze0KT)4fDCURCWL+Z+{(p(7DeCwA`;3 z*(95WZ`Al2&f!-UHqDfARF%>LB|;GcM4ebjY5`CJxpQAVxLVVP*P;6MbvBaH8%@QY z*8xrp9VjJf=Nl@<6ueqh51WXc)mb)Pq~Z=b-xkb!6F2s|i+=ncs@^gxj;P(%g`h#2 z;K3RQ?(XhE0wK6Va1GM9yIXK~cX#dJ+PJ&ByBxl=&%S%#|21mVT2)JGzR#SCF}P~A z6D5&RJWK8Ci!RgKD&W2I^M+ZUJ5&;bPnXs;^9rZ)r^2QlrIfOt-U-!KZ?M8#VHfic zN~D^)w4>?a+o(~Tx*ijbq_{QUMuBRej@?dtxsUl>J9m|P9zuqZp^krhx?l=#jzT}( zWJQPb7~$bIlda}s`QJY!FIY^0=>@VRY@QgS8Xmai!y2%da;qa-ll7K#F-?`V>8`Xr zS6u&h0YI-wM;#0bs|AUHFZb3HyAv6-Hui&H#G?=7VfY&;48ALv79v|VmJf2u2^$W~^+nBG|h@Kv>80awL5KPDFA(n-i2-o@DS znV+;u8mar5PW3%G)a~+Z3=9VWZtj~hUlj8VF2h79LQy>oU#1`aU=YIX+kU`TItF=x zWTVM}r6u>t@^Z#N0=B`ee6ZR%m$$Wv`I|(?Z_)*7+7s?4QKEhUVaYf8ehr3NHHa<8*N8N{NTy;W;qxu;&*clSSBtXN+-PyaLwrUoEc!qFJX^>ML^*>L$6ur2XyZ;_`3W z)oh}cDcR3;7hEd#(#W=$qK-czi)ooS*R44k2_w=?Ar&?4d-W(G=T`RBz*eRc(j2#L zuUOfu?J3g=8g{!4+{jtu-2CS6Sm*SZ#(njpN9{&x8p7vWwX^Ot{*O-Et6H%h8pa9K z|933RgPF-`lJ>uimCBVBN~&qg^q|%J?eF%pWK7a18d5`nuj3CBWZ=h|TY zVch`>st3At6528b$pxBjkFTEP@_|UcRteZ@@s=qOa$A{%h`rNg?F2`nHn?3h?C0Mv zY|>qnEFylkgG7!{{&Tlo57v%^h7iGo{~m!bJy#+$)WZ}+Z~oet&vCefFM+ACV*qtb zm#RU>@B@Op4SnS>O<%r5^Vw6q28Pt?$1Wl26fM_vl z%k!x`_70EB$79i+-!8QHnil#Xsg}%)j(bR}*`U#DD{Uh#!?h7*rTwajaLvkQ(6goT z(>Dp5pw8gfeJu8xzV$~>Y>;R(5MTG1N!tTIjS`Y* zS_bten>y3a)d6m-^zCI0P-C;r_WKyd(d&Fv;`kEVyYPMcbKAs)J$+U<73%2<#>ppGjy-LNZQLb=~m9DID3G6nPPXE*_J ze$za0S%yt*E~NySY4AU;V@{`@*!?+ShYp_QgMZe|Zx|eL-%>?Z(07@*OyK+2{lohS zthau)q4Sj;2X>t?ED3+`1Lhw`X6Zv*N0d4D@XS;%x z`1ZfE6SZ=6s?7Cy{WC!P@~k;QTZ^&IGTs3nLT6oX#WDM?Y7i}~L;WgKdA6^QSdaRp zlUm#O^Vv92lY|tA#l}&E@uL5xVlD}NK8%qtfTw!dZlpFlHsuTMDiK1T`BKk!$wV(R z@)uGkx2`}T8-eP7h@KWaQ!BJ<zPMD8zqY#-KQ)!tLTm4 zP|O1KEb+3sux2MSt+WtFz!k?SId(zkzArKoN$04ZraAH~q;AZKL@BtK`0z@n{m7T; zYQ5{8j}iqnV;Lhx6^vL7srWO_R%HztDKhwpaU(4G|LMD9(RqK6Wi;##EAZP-Ww?l~ z-$1<-=1Es1UYMa6UWr;)0b%+@!2hFgJ3wRApW^1UcG{Zvu`$G-&Y*9EUqKHJzbQ$#Nh*8cnPGZbJDB4Z{6VkUc@x%|cTs{q8?w78a!{!gLK1 zNVYj0YCeFW#-r_2<&@K*iu?+fC$+&x{S@37VMmT;7Lg3lxn_~KYUb~)HtH~1kZ|Uh zG)2viirOmVLz)$DIiU2)Rpz7C_;@6=vq7*?Sm84B4mU=Ot{Wj5+0a3)0FgJkSUTui zBJoLt*M?AOR5eq6UJh|N$+|{dPfoXF;8rVou2p}mi&-&N6l@|tBUMhQ=}PAQicaZ+ z^Ly+6WB9sTDn8(d*_Ks7M(6*gBYg0Mo9-Il0!zU|PzQk~gtV~HeBG`N|Jy>%)Q65ZB*nF|m7ftzU@sBU3WK)dW*JfHO z*Yblnf4$C-Bu_dtSdviN_3e+o1aDw{PqiyKYy^7tWiQ{lA<8(PO+zxCxwRBQ_qxGn zYnSQ#6M?v5SM2tZY9qQQnQ!FR&Z|~8} z!2tRxF4tZykcoYU1Dt$Vr|lUoxwtA-#| zBa|PJfs!jbZ^>MGK9z*c>+%zX6cw;AHM7~11l@M0vFH!uF>Cu9@hTiqphvIyh21UDyk8G*_!u(Bx7p36?l;-k#t>I^= zL`CAe){M)cVEsjB?5JI%`?U->DbSX~K5#XX2V!2upi~rWxO_06e>>L4qxf%9*LrHl zi77j-Ig%`f`-e!`^TXO*@WYL;oMXro|8B#4xloq&*nsUw)IN0XHcH+5uf_D=+h23d zT)*>t`(t;|6MY?|`^qcKPbP5Vm#p-q#Y(9_1?TVnt|knz`O2R-M~#}{YYzYBvBZ11 zhG9!uw4%rLNZNDqt>mYzSKSODYM&v7`dh9u~ZJNvBX?G|d#Bb~N z9(|%uT-1Qt%d4~2Jfo^yzdU@bi|hn9^zq67V7k4d7mwI9B6~%h-vVTM=EhKOzrgAD z&=cS2=&2QhouBX}6akJa9yxqO=c^l7W5%sN(oWrX4Qh4(WZ!ML2eJ8DX_KIJw9?0y zDPJlMkn)JOcT~Q18a5vb&*NO7be)ITfKNTDTkv%J0uB($;>+Awe;2xk_v^`!fl42I z#8Kku(x+3)?Cq6)Cum{h8aJHd;D!-%eu zc<^>5&TyOnzPCM<-~3Jnx2En4XP6k;lRlBc%V+$X$U4M7bV$S(Dg8LTA`|~~S%ZS? zesA9a67%d1oli?&EFX`aNbnPq3P#+&JZrL`Rb^#!N@_BE-`|k`vMxq8eFOsu=r;#U zHD{tyZZ+zs;N&lcb-{zxz0gq8i7&H++u?Wox3qR1eAummQqR#_2);1INb$JIeD`)S z{~7JhKd{k-i`lb`fzK`$1hWk7@%gq1OFGFR-kAvH0=uP=Fs$$7bKRudxmsqBps9W`Wwj z&s>S;g(Eh+kF+M^THu3le|t*tG~8FeozB<%1n{S`sm-xiLujeH<4f?sX)+I#vuOU;2mi$VMlAe6Sqyhv1gIp=goh?2SddNBFPGs`f+UfuE05;Gw!Hr zJ^($7;S%?EVlF`cW^}4L{LDILaaV5Y81ww(I`_;vx0v?AINkm<&sIf4~Me4k3&*bE=Z^qaR0IJBE?=6 zd;*4sZHj)6n~%NzeL0uqMiOOHBFU9=4&Cl^-v^<-4+Lp=_{r#h=W5SS_SB-F)98u8 z)+Zm9dEI>a22zq~`l7oY7hDZ*r+-TRY?b8Kbeu149*H*63z5?5PY5=9_ z$21Xc@joDTQw$=sIF6^2)qQ(AOb{vq7ievgykM8N7((ZJ6q?oMutj1n3CL`NdjG9i5utt-Y2AT|62dL#{im?BPFP8A>=p+3NY^ux4d(|KMTao(u6^tn-{hj%J^cf z!OKf_GERg$acP6*slNGh*c~6!8$KiL4PBn06;Hlm5lRPPiSWyE_xlU5UFi+}??oZ{ zz!8PK?YzRq4F-La5ygY<${)cfxXfp&8(&Ad{E8OnYz?ygzQFtPD&fCJzmsF(fvm;E zget~qUeFl_oPx&Byx)Oh?Y9EdTzm79w)?=8%GpSsD4uPT-W837_YTwV83rP)++4rN zTDbK(Jy7}YR)A(;%A5^>JJLu4TmAWH{jt9i)n-HXE-1IbRg4!DpI@O}pgb)Z^g0QF zYxUTG`a!}3$VivnaZVljFkQ?;j$Usb8dI=Dcw%>|RFi0ixr-Qfoz|TcO4Esh&3Z&; zRwKBbMYj3`qZ#mQtN4M1!grM}^XOG%CO=`l^*N_g`qxrWU45-(0L)y?RM@xbxi$dG85pJ_1eJX^`4ZnN6m?<0B zO#LGSrKHI4YaKL5Vbh-xgBX9B#1V3L7~`={v{CXz1DenRyv5Ch6)Zaw4*tz- z*6{=-=U%ygFfNB_bfo6}c8DtuROzm_Wa*A7K7VGtzL&lTx)Cy$Pa)<(Ve{oo--#&D z=w$_rJ3iboIwR+g;}JPZ4pRIUmDPz|ZAk<5t|X`N0KA|ldiz>(qO7rr!7G2(5M)i* z!B6fu!fRze9>|4YfHbrLxm(y55`<%!D-TekY4%(vO%0*_8YgDjA5jU~G=lM?>L~ZC zj&N4XFHobp&$+C*CJX?ZMXJ%@HZvUG=%%!+eE6{-^SFUB49FEC8OEQ`N&hYCy>;12 zDSsbzgeDmNAMk{cv-zOB1y~=xf^sN0?zVi}AtN~4wcsx!5p75t0Cs!4aFy2A^Jz;J znX`&JLLX-*{f^{z)e%b%}K1wPHO9ytPM$s4Z$~jpK zauY962c^8lYefbskelvw=930K=aYLg+M4y{)no#!OClMZz2G$sMYnbBbUI=QjaHQi1@byBWDjoHow)N2cnd=a;~jh+d+-Xm1x23c9T3f(R})6c zPAL0whd!>0X@WY?);Sa(;XR0NMOpT?`AX-%4D-9K2e{t2kGKp3*0HcfULR!4u;z0h z*p0I1rMLGxYrR{?dl$}yfnRkQVYUK|_sC2|{)OSSX6wkOcyjBw69T>4zXG0^uWj-8 zJB^9cd#_l(bf+YwN|?h^N50|JF$eNbdTVXeR80hpaZ3cIe=}QcfWHvwMTIGV_As^I z=IW%kJmxkm$LlL@J3m{wG)oNSHkMS|&CxB!q35SLg6Pe3z!vRW_d13$FuTD3j7><| z#lBem2F{|$WKUlMS9CuDA`Wz8P&2O2f`Lb}%bc6#hp6csqyIQw=1B%+WeXDzrMDb9XblqnoS{vTW>qDX7B8{X4IWQ!up8C`z@ak>GGhKj|1-V)Ur z_Jvg1oBRcSwjzb^jP%1}h|}t_*^1luER>nvwmVO=vTtK9_u@nHHj4_qou;x7C3lYU zbRpC!s?k8yWpgJLq;oG1dYMeztv%F_bWvc4JD@b_Isw~hOXjvqe+nm^R43AZodOxU zQ-O`1Zcdz?_Y68Sbdn;gldB9h(&$ht{RI zm{p{>8q2{AUun3ws=RNpc+=tS0cS_|!fBn-^}=!*)!{+qc7@I9*rME}E-1%KI6ao{ z~qe9V2S0}$G z<+gY6x+T9RG$pYQ?bik6L@)#VqUDjKYmOSz1osy3NY-T&qV` zj$2wD!@267*M&RGtXrHc$0$)-Ov1UfYyt&{oh6)R&ahnJ_#}RK<`q}b;JRK73CPAb z1r>629HLzwi-cV9FZCrcIH}I(wNw1kRo4kqc%;2J=|4&&a^{o)s)g5Tu0R&T&el+) zTlkKbb9Wxa+1Iup{@W*FS)mJCKe!2aLb`6*-FsHO&h^us7X<>$@23-O(@DpA9&VHM zr1)-*A{Q~o>*c?7m`0?`SgJQ-U+b}%Xfl!MVvZ;3g7uWxL=ERRo>)0xET~oL=smuD z=_>0<`MRaDQH5VYyQ-v+S;>aeKMtvy5+3eKw)uAi}(38>9m@^XIPq?5UD?` zb9SiafKL`H)n+=u$(@b+vvzsNdPRo<2>WPVfT1sA+F~fCptWeRueE@13pkT^{%%Ji2UR;sCL`*1^Vt znqF=TB&&Nd8UrQf~(iY^Cz7 z*D&>ng%X6{l5b#NS&NdzvJ0kWT+V|W!~wTJ`@o7dGgs67EqM0WG&)kzIiNY35e`WT}2qAhjvkk_cKONPZ>mZ_J$-T64I6JFZ*0&C=EfA;V|KK*gj^N2DVb;5+x`*_=llZ{O zPJ#Ybw{yhJ3Wz04Fvm?x=b9Z=&2vwrYrW>gc%LgPz-c_J2o%i1icbf*cFRFn#LZK3 z7>z6pH*=jKo+1OidCmKi?T?3+DB?qDII;hZoyxEb5#rEo1&5I1VN54yU$Sm%dZzcs zj2y;n85JWAe2$L%y4lk6U-;2~WVC-}(B|c4MVY{b|6unRQXZ_5ZmAvZuc_uohc+w! z>$*JXXksp@Y6ACq;fFhsYkU_K-`CG2jS)3{&?3SR_NWVq)ODZSx5kIWy3 zhi>V%qxxSNukpzO++ibC{;~32xdN$l^gM58_gms?`i`fOZx@W-x0tmBiYXm#yWO}q zK42204xwUWVM6Z3pEaZg-w&}4QtgcV68q<2KP$YMr53;y7E!ePZ&a6br+QJ4TO<6Q*D#&k)B!iZe)hBQ{=`!e9f^t^A(uVr^-j_)KlE z743z*5hthlv;+9HbQ*3m+z+{ zT#uNL0en`PE*W=jxjF;O#Gk0N zvhNO9K$b!{g}R({J9C1WZ$u?k+5Cvn{0GhK|}GTJNOGtsSs=-jUdDXu>T zx~qJ)*9rzv3%j~03~x)r#OP|M*AE0LeSZ}BKL1~33h`g}y^JkvHbN8DfK`|fmF6Fn zOBt^0k@H@ziQA5;bjS_-y~r(U(K|;Sc}u$RBQ^Y9f{XmhrZ&=%Jqb)J1#YUoW_$jJ z7{v%64w6#mJ%U;sB**Od9-PeWq%7>Mq??ANoyBhRD%R-lv_C&ewU42SBaZB0TiUy# zYin!YGBx~w#|MSnA!*+<}Eifl+8}V1rZtI z1Hd4H;4cm>`nL`kQh03q(SI-45^An+N?Nh{4qT@sP&}kSO{L%n|FN-eX6cqEOyX__ zsgwuTVI+42`faSgN6mjfi3L;V*1ca1Px2dfUt=6j8hB-g%ys28@MI{Z=>4KuvSXc! z@Ju^vDF3uAIPm;UsOtMnqZ6VYU;i#xu@W>krCR`eLmDQC#p^0&RNS^VCtFvZd)sh{ zshA!uU4E`q53VaEB6cb8{e`eJmD6}+3N*4DFQ57Rv3J=tKQ=0cpH?>qgxNBe;4Ff~ zHY1Pl={f~4;AZ1goD@r_`=M9P=8L(wj4Mt?d2;7YMk+oD;0Hi%X3yhcK#GwptQ(O( z6T?q4bm$R|m;Mrt4iNN<7;q;!o4wSOno!0gEf=mMlcjT5gS!Sse`nVQ^6iU3UX0{nt0*jPGWt1!YSw znAK+LWTI8ZsgBAl6}P$5`uz>r7!V|+MwIw(3WXkI55!;G8?@R~@Nr&k4aC?#yII7e zU#XnNvH=}TRNftocVyDAbSr$-#Nhu?eWFGaG54tV56|0cIE!WXG$D@99}}`SQE{1U zZLAfkg2b^`dGJBjJ z*gRy{%hjPaRBPLlj8j{*R-p+7SB;i~Ojpd4(J?>1x*~9BJAd&~J6`S8$i^Da=D9t& zKg-*{GK6nz1~|N=y|&fJdQ<9tSMv!RkDu5$T!-~?h_wOM2Ui~0PtTiQ z#;WD^B?rGfaeliIX_rflY`eKbcc31a+yTC`{@G-|m{dAIcwnH?{vj5Nvy{!|05g33 zEq}ag5H!JvZnB05cBt|>JF(mZt#wS+Jr->YdE-lFtC=E6|H@PnChC=8J+8eE8WKS0V?|5q!xMZEF*V!mZ^@lJb=Y?yXD9#z)IW z3o%LeF95+iJPMb16&9gG^V&f9qmU7SSpX4w5^~AGTzebJxafvC*4GN`=}WP>D0TYE z-#EUXx_l=*UixdW>P0O%6h1G_Xm+*xUc%{QAm>X@#&fgAMa4(RMjcaN84iPQneF_M zqZxmEun3cKQi2SI)Us(e*LfQXZ4$>7#gM`8fsN$V7b%acDzscdIYXbSCAxI115qJg=wCof;` zE5|;Z5uReb*t?HoKNkTj~IHE(ttKfE(Dd$o-j+}v7 z)&1a#ChR1FSO2UDP4v(c=WNoO_lGl34Z~T6-z`Lx-jSDH@{R!wjRvYo zw87Vg>pskipu#!;@A1@a-slF|ix#FYv-q*r3n zVSj#eG;O&0`;Opi%crTeC$#h*tElD*?NHL+#n@@fSglA4D_U@W&3hn7b~GvM!FjuK z8Y=R?=D=|lj0I7#U1zs8z~Iw`lOhjv)2w^JP0}HLd=qcfo_#a2P^An2AUwP#PYza+HawPrh7(=CcfaEfB|)+ z9l}*JJA4s&Dlzu0Ug-mD$->mbtEmTYGX^-h;n1A-O!yz4-SX>@Fl__@7rlG|d^AUs@^58m+tCu_~=Dc5P;kKy5RI9~x(^ z5k<991Ez@~6`pwGM|_w)!B*hZZij&(vjl^2^yF$QU#nY_y?c+kX1y)q(Dp|E z9}f)^Q2OWYocShwezZn@d$tJK3@9$4dblf=-CJ;TMJHO0nj7;$QN+*oBTPxnnR4z6 zsp$uIG?hWB<%#H1=tGTm5Ygz!XaHQeS`LlWO5|eoZ3btEc9bcta*4gI0yDq(2=c&Q ze0=a)0O&IIspFo6yx_ycKW6!5CkTaI?91{Z8%uqC$#Cum7WF@Po%zPU{)Wk0c{*6# zqLhmc=BwB={jZG^7r}2jc+p_*zkM^rx|X2712oZ3Go*-|ms63w=oX$Ld9skpjh=lf zL*a(qzRrjyArR*ala;u=#PYQPqnA9h^`^{1dhq9EKsOa&_TZIIHSOHn0-JtK9U5+T ztlC6yEKRgW>ps^*9gM2*A1~ub5oQUV!$zYqox#{}!NqQ~_ z6K!4vExDck!8YcTmEdSq_)}Sh0cUTj+j!$6>v+MKzp^C*Sx{#{%12lvBwY`b0^P2W zI_>R-<<`SRHSrz3axVH`3R98W51T#SZVYIs0Tx@5r z*~GHi#XgS0!4I+g*L_1S!Tl0`OAFka_uU#?d(@T;u1+m^moOzf0U?251dHKA_1Dpi zZle%?l1-Kr5g?EhhBX?mx2tqPL8Z+6o+PlP~x3bc*w$_HWh9-Jhl(X8V(pF$usanO+?CrrV zZenO5wf@O67H^d#%$`$UE^4lF?mtyTE+?jtn^G9*rws|)mi&Vq-rUGf6C#QsofiEK z{1l;vxNqiR*ozk;>O~J*#>?VJSIj>FuqNp%1p2PRI#LhZZbQ?I;wBoh=Rj8v9pdWm zb1Rf1M3u{A3VRd;jf2N79K=4~!$|9e@K$M_9+gRsWVU!IqJ81>HwT@F_n96$zxVV0 zi=`Drsr@;K861iO(R)hf_VXtw`QV$Ci=Up4W}(q+3^_dpU9>l_=s~$L+eKwsAOoxT zYf=K&3rh7)|0jM`Q%_3236^hT=#ZHia{RQ^U1=>z37rZf10~pR?%Ey=-fIKq{x(b$X{p^(*?-IIdD+~Cg`q_0U`uMch zSy={R&RY%@yaR?Fm_`-}#*V%`PP)g8r~LPGdist_N4v4gqNr7xy7uiGS4Zbj24g`g z12HQ;Zn1!-T8?gQeK}4-?!=?AMYRXQkc>kDPhx#}38wcGcY7ktsT#ZEwz5?u3p`7v zOqq_MUyAV+X|SoXobEZ3Z!&0*>OOaYYgJf}7}dZ+$k3#-Lv42q{P?;>i94q&Vtw7K z_vVt9kSaG!LVth5-#(VTV9FS7uEMjO1_N;7ht|Jgk2$1_Bw4UcnaE{>sm;cd$l}xHWdn;ANjY~>)Kn2sR-_z`qmM%H8einSJTe-)ek+yzQ6g)ge z06Hw}pJ*a%hP=E%=f>Kt-=W8!wrS3UGUWsZ^Nz1(vP$Act3{HrC+jignSaOsVkK@b z{JS;n1MaS2oz@AYdJz?|c5gN9)k0Twi6?{tI8pi=D=%E295!o)E07@Vh@h&Kdy*PV zzrtF@*K)?n!5Er*<1hJI%g5kgEpP zE;riy_G0RIm1my%R}`Kv_9Q#PgT65^cy`@_t>eQ12~n#2OY<^8RgSJy6r@Do_Lf>UyMie4eV^-{zLHTNsNw-NXwj^5s8{!_t zh86EnaV;#ZaR{bva{d$1c{NKnFf})!jb{b)+B>6Z&-t(05h4S2c0bK!644Pl?ERZx zN-=i=%J%!hiP?y8TT%iDbb|m9R!U}E{D&cdvhD|B3U8bEm6VZ zOu!@aUo?BXpLA%a4xC=e7DeF3+`o9rr4ek2FAtjUAlE~!XZ_0Ni*Ntc*8eoasN0vO zJiC3kPg#=^H6^J?DQMa?B?x3ctH6MIWR>3k2+}1Ye){hQe{>TjV4(>5heR%R^qo#X zePwO@-*H1aGCG}s9Z-1i3 z6!K!X;ks9SKqGA7{PUA%hxj}nl1EceThf=+4~&d&tcls{=egI#YTPh_a8y(Kz2M1f zhPY`)5?(}0TZfF9?}jiHQ?dfAI;cot)g0*#7<2E#B1CR5n)IoENhZMJI8)k`37Rk>xby5D{7~$^wpGBMwTwkhCt}WN zeUXdZ4psg-s)X~Yan-LO_)uL)qbA%0Z!iGOBRb=Ne-&Y+sE8r;{Be&~{4{djcL zNP;8o{F;dO;xi;YP?lv*((7T5%LL7-eyS73Y&zYw9u(Td0FTMeT5V16h54Qp!1?LN zvJb1r!H>%j!zb@4vu+JgS0Cu&XWS;PvgTF!Z?wUXwsP1J7dtopC_Yns z0V~eR~+LrUDy+%#7b+;p6Wqep~(5goec21 zKE)<=V^|fK+Fs}tRwlW`8ULF21)XH6)J-btIo9@EM*h|HA0;OCRD~;(QSX|w6uDVJ zif#|{sPE5nnZYP*;;EE%3nNR}#N(*BWowhT?@MrNbZ15FgLKcO6UMIVENNdHje}a4 z5@{B2Z`j24YEjK->8AxZHuNz#q1^w46D+u#I`^d^JmvrB_J0<{A|61nhAW7iU+N6R zFOOWP?Ma z=TD__e_8kr4212>U?X8WeE2y3QpLv9)O5a>A8CttS)@D>{;#aWKfi|apq@D!oOcq9 z8qLhH=D{empMYIdGbyGgp8=z)c7;k#^oqa3?aktgH8VlsJPZfA{v_uqXbGd&?1riY z8?&zycUq4vW^G)lmTbNaLb)3!4ZTxr%U4gt^`?;0q1 z;?Z@p`jqi861WZ$juV-v=T+0ANkCL15o$=0wd-ob^l0lB4T?CPNHcU*KRwRRlA!j( z-?mf^P|&IF)NH}rdm9w6977^~vQBI;W2q+!b3awUEj$6u>wc z;V+rHnWW37aeE8B>W}d%tBFRdK-vB|#tDVwK};wsi3SvZv`fG_b1?@vCD3Q{>}$)E zy}xm4vUrz2W2S&}i+FWo4!Tvw+75aLEoQ&G&f|8bvE>)%QHT-~zJz#%t8EXIgdBxW z)X6-FbXfK-IfI=oFDgU##9q4FYDG-P>}(5hbX_>MbSbQIpd`~T{f~-wf>e~%ZSfhp zwkTWkcI;D+7e^0W4yAwpD|-4HeldSKRnT|eG(~WVV7`8=b%;a4o;3FoUp>9+Bpka| zjheOlKlDKB!0oJ`%%DYj*hqLtBqD@oDqLQ1;~yM)cjgjjc1UsKY=Zv_i;gH(}0kIIHOLRo)dv zi^h0@JKa{tK;R@FWcVa5_s(MrYkoqC8;!N)@5y6Y z_SmzDIFQIb)2WSzPa9p5X6ccIA1dH*M-TY5P?6f;K(qV$ zJ!b#05E2+E?xAP^EqX-btNK|nqTo8=lRWB92YG3M6Og3Ld>GA>ssK6=~7q z;C2zV>aX2sHC?wo)o@LEcfp%nt4n3)r;}6JePRjSVm*8$A9{IIM@67%3OBQYKDm`wG{yuSzJ!Ef>Qn3NvQ%9Zlh(VZ}jVTyf0ZoHC z@tMU0-yfVHEi6iJNHO#4>-oEQYMO+Sf3MWGXVh1po%&;VM6!E^jkePcsEoqb&@KNp ztg3BPx{(qXDkRJA1Li_R#H|&c1s^`5tLAL>6l~!{M;u|32Kcuv*po%(JKXERh~l}!ANcGa&%8h9v|#S3nqaN?soOmsUHoZor#zz8Q}o3;{)2{rX0Zw!);VIdB} z{`Zxo+q!jMaNVz8B z?Tt&I>C;>?G@_;-G^cO&GO8^t@AR7NU@F|dcU_)lM>@Z^X|JVMI}r|G{}8e!WpTTQ zwCStgoJ9i$X@On4NVN3r0cU$>HeTtKEk;BgmZa%v*36r2V-JVdOodwKJ=J-LL?Ofg zAGhEhRI}5WQkwm*6u^<~hx7s9sRLo|Erw%cR9Thq=q;z_LPU`-MSctVP)B!oWMGAW zaqP9s-bV1^_f?cYR}+WGO9qj;Ha!2UH05Tj;a!HFjbtV3Qu~`ls(>e>j_0|FpJeT0-+jxZ->omw-6M6I zpn8s}TAU;Pq+n#wG}CGge97ZuvJt&O*V6*N_qR_6fuY4Fzm5+g=fY~kG$AN1kBxiq zia}g-nPZ$F9)7O%bjd$fQ1rZOdsgc1Xr!*iP-#a;0!)aV1b;#)By#IpbsJk2Kf(VG4m06rA}F2t_^I+h z{)@dx^1?&u0i`f3OD8Ty9`+m&wuBUTp`}TzH2a&9&>CJZ)na$l;)f%Q%Un=gP0u|( ztSg#ii2R;oy<+@kiW*|TiHY`r?TRXqRS!xzLXv1Wa(4YqF;oG!e%j+o>-Vrc(AH*bT5lJBZjB7Bv?KG4rm_*sM3h}!_NR<| zxI|?9Gzk!{prq~8;+Je+7-cBOg1x?~1@OS-lz8mQMbhK@wSY1W7JF8qX_3PA;T?5W zQiN37JfmVq|ATJn7bdB@5$W#aK=QfxQp2m(@!!;JUFYRk~Z6sc`26bw((u=>V#rj@j+li$lqU?b_}VjJpaA=D$I<_x>ztw^Yuj#}bUI1L z9ox2T+v%7c+qP}nwry8z+qSjy{`>p)_nz#Nbx@$Zr-xUHrT7u-=*yODczrYA*r=!=uuh zgxZme@S{TV2;nD@B0Ka2B#_5JkEkEaXTdcKEI=a(Ce8YEWkT2UJ?0K9*Yeb3Q?jT8WFmn5Uq#~|9i*oOYh+GhAOk}DK%W9LW(IOpQa(BP zMl&c8?Xz{4U1sE`_02WMeOhpujW?FAA2U;5 zRXM0kO`5fpd4Yf_Jl_74;UnCq3~}^8N*>P^hdPW7XwI58!bVMe;dhVzI1FCX5Fh8r zv^&Z=mi_sAvvfI_X!@x2C{tEZGdl@-TFxC42E>gUoNnFquG1lAGt5EPbU(w6<6y;R zpM1A1xMjVA5q>HKay3!}5j^Ixd2ZTQ8<5H#Za1YZz+P1?%bYp>e&a_k3w1Op4fLh1 z|8|UceN|C7W?D7Ig_kp>v6&wo%L49!2BD9$e&qH0D0SmyL^E~z4ETzd>zba}cryPk ziQ3LvH2F05@mSln`YtI;@(wB@3Y`#l7yJqF_x{42$m-7w;kzSBR|pAASLf70Xpl5R zlP%H&``IIM=N0tD1OhF!qzf__?;p!BBVJZRb+|-Zy{7$W=62&$11YsGy3G~FyoHaK zgt6WT2Q*rT=*3Pns#>xzays9X;Nz1NE!)vO{~l1SJIZ?JFyAm-4c%r1M`*wx6iu9 za)XAIa3)E;RYHL+e}MV;dOCGTT$P*J{&KiNkM&|h5OqrytC+b@z^z`c0z`@^4h-_8 zE?8+fyT)r7H+hU8O>$?Z;wv70Y_D@t&ub+qK2Rb!rg?V~?yO4sESk6H@O(q|Duwk2 z{%C<&d4TH^Nb=<_+!S@iBw)SHs<}F3vn|R{v$4O=DKEOI?saGJ4U`FIbYZ#CK^~ERM@aMigX@yxzTf-f?h#&aih~$@ zranLyQ47=DNwG_Yx&)w}A%In}KqO!0LWCgeM!@NaR`utkK^XM#>_%(Y9tj2#<@pI> zCMa#Di*0qx*C|e~Y9Ebd0s`F&FQyoxwDG}kwIf0UwX{YYMD^;C`Yl&L4Qc-DX|1np zy-gG0VOonqb`$!6kk;@U>4w>Ld?s;XR)cY`1QRZ<{eRtPgPUo?SY_MpUJWqD|1frXXutbBfFElNC9gM&&R&}cW?Ra$T#A)%p@-H@6{j5qivO4;~RDv6UO z;SbpKW)WS~CMra%-=K+Ck656>Np;0~!~B}oI_Z}#D4eRKnxkbh2=9yqw<@%Ne_{m@ zuKlJeqI&^?)RUae%s&69f5UiMAUcyH9VU+f$k8MtB`?|?9-H$9%>ku;K_CiB>_9*s z0_In_yugyi^}Vm>C7f;5*;o)96}V+gFwr{(8QKPq^w^ZFP{M<2;Edr86}pSmhBUIA z_6c2g-8M)TtFT;wsosVEY=ww%e{g|^X&cOxvFEVXZJ|{`M{#U_6KY`x?jdV5(8^Lm z<5iM1cKLfwg8}mtP_fc9 zhtr#>cKRF#RjNVbo&zmwQ9?zO*?IUqb8xs^Vxy<-CMlEorh`YT=%9;FiWj(Z(`RdB ztvhf`g@ZU)wSfVXg^p4`1m;AW>Y#i(CcFGt>MP4@1nv#d(3AJO@Yhvb?T>&&{$i)oXbrv2Ssby)W#9W>>*>8dw=?jK9lYTgl1?mLW zGriH9xuRi9z~=RUzT>}Q?%VViVR9Z9cDt8i4~zow{#4Yo&$1{U)7vA?sGr0a6l#3- zldTELUmuoyzT(vU=P0Rf`Tf1&vwlOa+XE+i*^Xqg z8RTYa_#^z+b4>=@?Lx;y_h2GH^9hQQHCb8bf3pB4CF)nXI`nxH<}VW3pMeUkT4Q0m z-+>RbUYbYR)4^o8{a=-}q;J$%0V*9>BGJKjg{9dtj+~(=sPdcAV znFm|nMU~!c&faKux@^wDZgAY>z+{rgJTUycPk-A=Bkbq-DY(WrrUR#RuRVF`^ZxK zvguT2$hyhfSAG39d7Iu?!D5%%FUhY32a?s5y)^Wokev$SV-IrB5!LZ*XC7S5=juJ#sJ^M9D)2t9-G3HD{5GOq+9tW!FevZ+k#y#M?vmwiThhpqtlgk zI%|3|4HpWVv+yvCJ`YERy8_dCs`HoaN)`V< zX(@j=WZN3>cwB}DV=1U)+lPW1rL?;gRrFrW(CM@du}J!|5U!4H8MaO8tP$@3&6z$v zva51Jk%^mj_T(Lk*UP8W?Wgj{%N3hxIiRs-GC4*#*r{Rp7G#XkOcW&n4d-}tL zne==@be=|ryT}O(l2LcRu_(XbZa5ous&=IxvKr0@9^&lE( z4W?JqHWH+bM)5p3Q}V@t#g-rYOC=kwtSQa9)s-I!bPQC4Y(>7M4CZOa6S+q$pg+Eu)PUb_iZI|e=!+bd0*^4rJUXkeMhtj~? zSeKT`_ZX6l$!XKc{#$*osQz%VIUt4xr&z1lEpfHxt8O>Y-q`6ONIVxs}2EG&2;2fL#jFlhsj&~m%oeeLC_s) zR4nfEP-$W^=$9f5dI*Rr&cK6Q#+HBr(=$-gp)#+3?~*7;d3Diev3KM_KO9-(2RcTJ zJF9k5H&rs)Ko+k2*6Ede9q1NU<6kvv#X^gz1jU*~@5@4kTIf$jks0w#* zLs~i{n?owKfI`IcI=Pjhqf+cG*v^_8ufDxRiLM`NI3~GobYM$7(Y9gN{ZHHyYflXd zqN0?Y*T>v;SEUYxtL>a0vMRarg7=xl&IF!t+-^(AQh{A94L%&&UOqHx!4GqJkgtGs zkOucBtJT)_KsN#Pp5p-M_@Nf# zt-!@x@vRK==zO)F$qyfyn%$(vd~77Fhtki-=VippigKy9T!5Z~JZ--T7ck@JT}RyS z`6Yke-)28{O8UGK{i71^+ddd?J*z_F$QS=`U5V~@dzo@akjnnriQsi_~G?ohfaeJ_WY}@ z5i733Q>bW^SZGpZKoVxrwsi6$%bK?eA=q|6wLO=_oV_n^OQkSLF(`jclWDI|HXPsc zhdEN32UW8UPH6M0_HBw71~_c}o?wv8Wqwb}ooU;>&7}a7N}}qXP&6vDQJT#FJG}XC zbI6Hj0r}jW{a&Fj9+l#g4Ehq21kACLl^cAYEXP+B>IcWLQw0zwZ2jtj4jIq8fJc$? zjqgc0KN);@GWc7kI%=OpkjIoC8{{`t2dm!$QQqRZ-L!rRcGoPi23;t|wX;ClB#b(~ zI+AP6J`cxxffZAf3{M)@*b}G8)EEPZd;I+yEzd4axa2!lO!e{$je_Gx3IwJhYw{ zf06R5=xpoAXsu(1{-#%Q;EKaXr}t@BRBjMF5j$kqwu2;lmcJ9cFftUbg*Uc`qW@Q_ z^xP-?D9`cFL?{W#xu2wv`ext2(pW7!&QJY#xY?LWWun&|Qwr4J90i`+fNT$Z1@g9t zkL$oq#^_#q8qNtK7V;ggUy;V%dJ3zy9e+4{r#ER9>j-Ys%4AI zHu%y;`cFg9nZII>v12v8D|iqdBC zZM@l2c{YC>F!2rf13(Kwn{Xa-&rM`7>I?((6Jb%O0g`Vt1a1Yj3JgY4EHIQC8jJ21 zdFa~zKq3D@Nr3+0Bqq4jZB4W18vfSpxxuzOhy%QH8eQLr$=)?@pxf?@#h#rpID z_~s4!@{^$~oBq8Z@&w=9Wm zy4b$k>AnO~Ms1}(t-Fq$?X+Ci5Ow?nzbyiq+RQ*dNsVWZ|GP0L5qdZ3Qqd-f9N;3P zCLwtEm+1L4-0jtWx7rmr^bdF#H_Fnz45Pt zp7CgO^?{W5;SQy98V#vumVD}oTesv;(7$yZBdig9&%DHFnml9CSM@jbRMWtMYR9&d zbknpfbmK#q{(oU4As}pwA*t|RzVQGdnv25hAGL9HclbHg6Ks_!<7}1dY%~u4dbb9w z7@>I;I^2D1sSOi9wsXmyC&#s|->vafZea0L;mIOT(qys>z3FR_-P{Z5dh>QUZxD7% zs{Lf1eR4p}<8TEWm|MwSzIKlrCd+%=vyk0CWJlB15@~)j2_l=bH1Wb5zvTwwiG?*U zxIm)4X-`wL(`*G}@{FdeqNm=rVd1CECAi+L$(C<_0An5QGq3OZo_jT2F#8;L?o0Z`a%`R2Z3^<61}HCU?2fHy0GA8b%AnW0B3g z+HzN)4q&<@Be-=N-5HL>XoP~bRseQOU}^R+`rH?%noi`~8lc z;fOBU6G?kXbpExc&@H7Zd;Zm}woCFGy*~QJ0%@AsD`O|q*&ftfdPK`c97HlA3PJ<| z#8Skwp7+p78w9hnNV<^B^%jR0;h@4diwS@MbVwQz~KJkh{LdSy<5nyy6ObiXSkTY zkG3kJP;1oIxvMhy2A0AT*24pPON!3?8LgB&%;hh?Wq3j z8$Y+FLJ)gp&mahV7EA+yrnjW}E11O@VhQGPc=Y3zjEswx-<|Yp1xmTWNm`lEK+G$Y z;Rn(TgWj4w6^X@L_bpYXoMdBl_-g_t<1>}{k?K6EJ+rVt+}8FF{6U+8o^VUSy4@tLhZTvt(%VY;D}~_lE|0=T zfsN)@85jJYYxqH8NU(ebz2@>hV7hB)clpR*2KmwuRhx&6%uJuaV1m<%c)Q|nuZ9yv z>zDNEVLwNFjS@+cXP0>{);r)G32<5rubSzX=TfOK`bDB?|G8AY@GwOUdao$?@&Ox0 z7q#_C^Kd2}^V}MnbWUQ6zUr;Uj)Zjlw~mT~L+=Ef36Wm-j%NDs>=Ded6W9yEfrY=e z$)6>dHMyh#cAs(W>V956U=xwxT1iFQbs zK8pk?il46%>)bvot=2*+ z+3Wub8&c3%PCL~ZDTf+8_>fUm&UvILeYMCb+BR&hUA!u-0Iq;h4O%m5ai|0<;i^Mu7PbX#mA3ZyT?xFGfDF#P|DM= zk7jvlH7~rm+&rImnX$ zFlgvIr~(gLC^W`4o_M&&8OP400I3*Uxq6Szsf70HsT0gGW5xYe@RONuLiqD0Tc>Cx zd0ocaGit2}F*%->JUA{PJPVPrHdl3j%P?>v8g=dgltyWlwfw48!5?IpKWj5(qssW)*P9!!Xk>4T#O3LcJHei(*4Oe89 zh$p+0w6RQ^kwQS`aN9@1#j{N4d+$4TuSWxAF-0el*&%29@aI74m%oFY@%tP#7H70H zD&&h01;0{kbf=m~eU22Rw&tgg^(zlz)TVJKH^vWeWkCK^F=B#4Db177T`PZ5bMh1* z>vYE}Q(qRqG;0bN(mLn30BhC0l876P4MlE)%zz%zpxI9IFW5(X3W&cqYw%&F+v@0D z08P|}<7yW5+q+l0S_Wo!!UL0)(Q2p@D^w%c(D+z&AO=&Wg?xfdRcdi71FPV|cd8f< z_J?XOl`pZ|?!j{y$z83pEqxRON63P&Txwiy!%D2zrrTlS=4!qGd^w6Df4AJ| zh6e{Jj4L%~n%p2e97!L|Bt<&UwLWpt>JVj&1Mm4g&egN>z`B+cjvZoJ6V+XhG}Kpj zfKIJ)sJoiMR`yA1PF6_T1j6@*5!!r@AT#)Cn3V9!601i-l?4tiHd|jsZ(S;~*>GEecl6N+aYbq}BB~7q%uwb{MSG;PvN-b9aD$1`_p_a- zic^!pC;N$TGf`hAL#@@wE*SxrDYig-F6oQFT1_3kG(2V^xbzj=NYm^t=CoWN4fI^k ziVYt-XD3z>8D&MJCW9px6giAEeUvg+-1qU)=7yfE-j48=><<`jp4M<5rC2V)^Y5$i z4H=>f6FP;mIJ}8Y1F$oEZ{E|Rs2kFQ$PY@MSHcbMV_$+x>#Aamr~3Dq<*3+h93ix`)6hxcCq|to{xT}a{Talt|Etg z3qxJ_W#dn;l~|HClMRh|waEA!1joarZxTO!AB z{-m>=-13Qo2Z_IX@LSL2-sHTez@h7yO zk36sT146v-%gRbp{&%OiI%}{9`m+stq-1ng9?XUtZ{po$^3f!P{UjyG!^X@P)$g>c zVH)1g${h;0ftXMk?w<{6bt0vb5p8pViJ0g%MjA=u$%LvTKYM$?dt1Bj#wN&gMADaZ zr0c(j-14gpse_Q0gLd!;$YGQW4t096Kv5c!zB0_6?Vr^CakEyxeYY-crMMq<)V>&Y zCppBA@Z5IXe+i0K`w7fg(I6rEgx$m)IlXs)H=yVSgl;Z`mUy#j|3a}xpYyHMc}WZF z2|JT-*9U#bVuUp4z%^+1ZbfH`(0_YY^#P+LFY*n)%+tFLZ{mVBe4{)|^sYFUQ!tTL z>o9;k1TIfGF4VemRWO={mK^sV7S8y5aL2@hLw48*Y3BTDIJrxnYh>M$F}Lpa?Q%JV zS9?-3M$-K|Xf45c{Dl1*;Y5iLR*#*ra_(Dc^+(x^_wo36OU8`@r}BDFXC3s7=0)k= zYLu?%d{D0a3~b&9h7v(c0;ihAs+bO@Bza)cRq5 z1w#Wwc4=t#?f{<77g8{*o7|V-aWr;GH*y23-Ifi8Ny>dHuIL4aH$J?{-ar1PItDo} z{|+2oHGWG`fKQUfEt7@Ko&-*WDjDq31$-%)Tmdo0gZ|$TOn4#iU#+&T4j)i|b4rDw z1){(((}lu7$xE>28nrZu!fYHmZ%t%CNQ6npoEzk`TxbTKO@rxVzNT!2HuJ4;DP5Fzr-Z}*RV9YY?5D9t;`MeV<@7?k_T6_ z82F&pk*Ce-{ob_4;FeJSY2!gbZv&Eeu=^ppd;fJZEHUnLNaF1~evl=!iYqV+llO<$6`W5Qqqh4#!l~ZpDRIUs@yC?NqM~gaa z9N&o3SwU73`~fONko~aDL!j@Gk~iDa|FbNChrKKWywpq*b&P^iR$tz&JwJv<2k1bp zv_(YM6MEF+U$t4C^M z7se7i?0NP&senCV`S@-``J>`m*ts&hp$jM<(y5f88H0vXSvU|}prfc-MA@EKs6{|W zUh?gE9G3K59d-CxE^tLh2HR;@W-x=Y^((^iBR{)w?L0ug@L+2Fc|l|ui|_hIjT zxlVAGjLM4JKmRCN*W}mjrm2$!b zTm1g4S;?ZHKI~r&?CY?ZCg+BR>pW!mI-saAEQhZQRw=RYiIGtA?ko-wr#H&kt{+CN z`dE|w>3sv@yC!zHbVkrTVyf<4i;Se1LyY*9IWB&N6vWCRWf2N?ZP22{Q^WHqA(@QO z|5Ah^*zvwCb`qmABv{4BaDGj@e*tdd(R(W=w;UD7+3~W*2`l}){}MeYVOw-B)L`fb zxTk!A0{0j#xGC{o;7%%L*|O>SM)2k?Zl!|ne51yVFT6XyKDA!~D%A<5!*942I5<~S zzpl1+`F@#Sg$R5ft_h&ov0`c-p)IFmCJR&%)e!MrsTJ3~X9v&N9v`G6hiy%I~% z!5d1^)s%g^kY8Vsc(yY0QCd8?bI zy7t@rFXR+}g%$~-Sn#7Z>EnESk{yaO3`Bk9`Z7UTOuKM!i6h%JKx?@5R}kvIP7es| zekvmlC-i|K1xE1X?}u~u?8gV9Qr*du2A5B&R09fv%7JI3pwfAZRoGex7z_zJ_GH!pyn%zd-3z#AK%INR&^_E5<`COtj6>_&eLVbyKjZoWCB$mG>eYKYiJ zT9>(KHUN8nBI<5tJ8s%>QvVw208$3_e_`c&v#kc-@BH>14YY%Q4Qb$r1V%0Gfjy;# zdH#w=e@SrPd|?{g1~EpzG%3slfdHi1V;s<=T353|@U_A9d+9|n(TzHt>mn^V%7QS> zuLg061K1B70E`hcuQT(~f5#&FFR+P>GNf8*(qy8f1XClRuIztD^Kn?tcophy(tR1L2qTFBnx{<*$X5HOT^!Dk0(-8ruLHYkw{pjawE0JP+6?2-MYU_ zN_O=3TVS$1SvfM=O=XA?y$rz2ryks1+gl)k)&LJ8NN%O%;QOF)XI2VMbJMcOBM|TN8#z~Wudd`O1P~P5<%mU~HAp7>({NqPmZ^iI6i_3^=c-*k{(tvA_iMFaTr zd=*-ZAL4Fgo9O<7_4LlHPi0}nc#X2sd41hq@46U2PRw#)Q`1y3N=n3o1IX1>bPY{Y zx&^%}dN|vSY{z1lDX`vFV@5K@;KnOtM`YwY{8i9)d!N}V?p~wR|V0P)(eax**OubxjyIq6caz}FdYrd9G@z!?kpM|mhaZU{rX>P(|WaaC2dNvP;M`q{HPLH`l{Yx&Q<{^7HaYk3h+q?OB(d zPOTkJe#DX|KZ@wId`qQhQ>5*WCKJ2;I--x0qd}NzchkWxli|K>s^Pqhsj1VU-h%6s z=OUYL#oXh0f?)VDj9tY(G`K^wW#%Mk0-}p&51J)f(Y($GT`fOHJ0HE$Y%g9CKzR14 z*fpqcq?>`1E1cwUX>ag1;^s0uz*UU^>!5V$OzA~@%0)=3XTjiK{&7CMqt;#lx0Q_x zM)8t&HG&yFF1?y8;T6nHQ`*$*L zkivNz_p|n2KT?$Dh)<^7kH5;SX9w-oVh-m{86r9klaDZu-s2C^(=1zWw#yW+8 zaw6cBs{#By9Wv96D9{vbZCzd=pzZE(I-N%p1O&-LMG)R)c5#y0(Sg_4S3I1my^$*%B!vRq}*Qv z>TN$U3K|ZF@_p>5poh4;wzaC#Ces)NU*C;)0#){`N%h=8X@s$<+f9!ilTUUQjOX^` zs?WkjyEQY!8K_p9|9Hx8P@N4nuw9S8XKBd%7mVL>+4qZifFI}5@-3B1uo zDB0y0ZQtaTx*HA*vW~el^S;C`&a29n%m>_8NZecQ$<9C* z%L!cPJ@=`OBlAOLTPDKPm9lILzP=%cxi)!|5VGq-SY7V2+;mzfwkvZ_gv;#7M@pjW>Pg$ zVr7WU=z%mw#$n`~1L?YK<1E}|50GDZ%a=K#$+&Ib)xDzJ4!w^(ufe7=P=%)%Ew{_4 zL|fjo)euAijVXBn`W;QtclT7A^#ZfQDP4!R>%ddM=*=-9!o+Tt=DX?=%Ya7mH@B*Eia1fyja2czy1IsHxQ{k1km#EQD|Nl3)cAr3I%2=1?LN2 zUZLEG5Pu3KdN&%XBS!xmwb9J5+sRZ?`y*XWqa_pE%qzrN#g&e;+_j|@>Zl`Q0POi9 zQ$cUh>2IWE!(Q3O6)e-ai%&!=r$(pWT^ix<*l=Sj#f)zsewbM?`{T=q{JUu)FjUVl zXu`AnIoLXUtK&Uy`LVBhpz3069!Gf&*nKMOso)+T>cai0lEi!p5}?3=sT%p0flzbkJ`P z#Stu)EbfMv&0fi}C|@61*M>LCU+g6;g)V=hd@+TthA@9k8p*3D zQkRvP3k!CZ)%(#0N`c!9b3ifGah#r8825t*YgU)^Wy!@(b(Qn{k{`xYPCBbF-WHFX zD%5Yikj|6(YtZ^M+lEPLko`wEqlu$XY2+|FwvaRLvitWNVcTPVVsNj--tV8VL|CPL zjc2+70vGs+SNAM4j@%LM7s){3(IbK|{g$)_@$2r>?%7CdaMnS zIoQ2x-@f`b$lw`=CYqUOL9_^IWFh;Kleah#oe8H{3oC?gW62XmhYl97Pd?W?u-(c? z|MC!M^U4nq>dGb791mSxWW99`cV(^@-rFJ$>ahs{AL4KI&@p2z0?KM>_8?j@!E`wJjY@aLV5pwdlSt zWN8J$sn4K+KkeTo#RQEU$4Lm{1zyCDCl@(o$e7A;HnC|+d4DL%bPK2N!Dh`Y1DF6p zRV;t-&jd^7y|0zLZ?Ey9`Jp1_Kt%uEtxr#{Fc%wcJTUGOHHhTp` z_(GuZV15=4I)I`jKZU0J#YoA!m6XZv6iOi*Y z%TKifH)^4kzB&#Fr_#HqX08IW%U;1R3i^^YDA!|sL-~cg^81JQKM`0S{#4}&+szSk zF1Do(ZZPbP5RO+&35gsb^gYF*1!qlTpfgenYNS9HbAhi3x%RaGoZ#U=&{fq#dF4?f z*W2n@ju?Zjc6bvYga$X;bCg7vA{1pbTE9j9Gzt~h4plb;W(`+7?(kcR3Z+^0u1Sfq zYDNODhxCvT&*$7{PsL2tVUynIcZvJ7VxZiThFJz#UJ$x_p zuVHK;Q5{AEW(iO z_M;?=MJyz_cI2G@463mAV0fKonQQy8@h>;B5yD%g427sp;w{usRd6G>3NBPgo-qg^ajJpj#dkd708F(#*ob#>P_fDy26{%2UOp_@Z*(9@2*ynVF412XoNErpRdF|M|Pw=4i8` zpt5&jW=lc3*;*Pke}kpUu9|sHggV5y(h2zPRH4Mv^LB=_fU& ztMJLPCN3NRlJ6?NuhsY|28Do8^-boj9hRGuqE{hHokuE6#GLM=A&)t+Pc_z_V_hBA zwi{o4G>hp&x7ifqdgC34)^=h(Oo%v2Af1MjN7Z_OCXHzJd;9bEp{gxh!u(uK!QQgf z{83EU@YJMkq2^xhp!a=lc$FEqp}K1Y-P`rrKV!JB2#sv#fLRjX74OhJyen|;&`o5X z7@?4BZk7>id=k(!O5D2Bq~p?6-zE($l#zNW7hm{XQRn%`4PrLCXypnksh*3hT#pc+ z({;o~Q}HB)T9x+dhS6%r$L>Z1zSgXZ@d7Z7OeKW$WY-VFfWhB zY(v_o!sx|A49YwjvQW^H!{mkh^bDkcdz!X5HRVx^`OZ!iw`}^S!+KR_WIU94H73(& zS2Az>dWOD+C(!ri@jDGi1c-twOROj`*m>p`pm=Q`s_`N(VMmzGqd5R!81<9U=AL~@ z&{oRhdiuIssb)uf-Msgg>P-%lcbIA%KkiTdL}NK8u&8EtG{SQFn~#ggQHAEnc^R0Lc?=k66hJ@{d``m9 z`6?f4ybS~+Qf2^=Nmem-;Ehy~1aPhuIurRvQo@udtin+$F^{kO<`gBduGqrGC!hyU zNpRJ?f>>zQY|b52$OWDzE-oku)*Wj^<*-3lNpialUUa%Lpl273(1@B`!6Pw9JI*dbDxI|ZAGaV;ryw8SbB9yG^ZPuRX z01&p&tL%AvX=YBpC_Tf3$rG1KhkNFu1zUCcR9p4d!hH0he`z%{Y`We^6^Srs)SMOx zi?*vrMA(Zw!={4ulD?|1xlq{39k+NTIpmV0y^$`Q#oUbG^q-KXizVNCq-w z^^0f+H6EFNvPR%0^Ydty(G{0Ng*WANUIo`Yw&~Y|miZ4?;`X(2YJDFN18;u={QH5O z$%3P+&Q@YF2LNDS9p4YeJEHc+J&bU)wG9poi@w&XGIxNX=w|jqlZ%Jv90BTnJXx13 zSK0?)bvnDY2)F?MxtUz9->KlzMJaI7u#&@Xs!5?l!H&w`;WR`oNJz7-R9|zD(e4?{{pZ2fLo1;6T=HL8g^&8lo>oG^suJ}95dA5hevT~ztAVCjD>=G|DUIiosI&0 zpWaBeyl-dT=3nG-4>H+;Csp5F(*UM8!!HMQx@k(L}wSeZ|KAkXluIWQlvx_?njki~9?r9WVx6|afiFL)c7LJau z7SVv~uwy7BH#`A`9)_R{GdO`%bs0Gss1w{`I4LwcnEQ`t2D?m>Lk9b*pU+QwVrJ{C z@)(~1CV;-e1kNpL&VKCUrT>Uwe&RA;KOfjl1fqfpv8V&F*qQtf#G0c_onv}@0L%t* z#S&XgwFMVH&2%toKlFoLZraZZN(&o#qn3j#R#Gm!t=vZp{Z1AirowsAntn8T@`O?J4H|Q}$}@Dr~coU0*9* z-#u3Oo@1Mg9(_Hr09zR)BFkRjoSVrV8++5E(Xhsw$CJ`@B1VAS?auwMp|O`R_mrK; zb??+86k`~uNw`^r|NGaG6}lJm1oVUHuU9@c=M;p$k~>ANd0`ha7vH%!IyQ-CpaPdM zinE7>hodE4D7QTg9H*5$0j_wLis(ypbvoWS^{=FS$_SZ6RS)-PiZSpTMmnxFrw1NW zi6w1wqjkMW+q!bRZjuDmb=JrCIy#Xo?@)+;41)~aFSqiBsU)v~J+mS>LWWn*6N=Eu zL}m7ZPO100XeeK;Fhgs=;t(ooqzVs~LErOTl3*6A{s7EAnplMpoPMau6aVdw4LglR z-TWP#oAb9lD{d*8_0qngvfrk%w1#Bvv@J-Ij0*T&kcogo63N0<+_ApD76KX@4nXny zbYr&>XF2sX{dq@s1;4!s;7aE**1S|2+(SZ!BO|$;J2z9It8n2#~wq_Z$1 z!{asl?Muh)7W_+!)k9l4_95fipVf)*j-U;`=DPKl=jT+Hh>l;kbF~2Q923%@?2Ks- zyYUYgdr&QxNpj`6TSr7e?+o_ezaDso+NwfzA&#W;j&xx+P$#h)oPmNefqp$nqe!%H zQ7ue7(V(kK`auSzn~+p#_$GX6>{q+Rn6MP@ffv{%!!$CY>aoL@Yk%M6NyvyJP^>*9 z``eIICikXP+Nx0Rhi-X{(_pZuG)GvXd$%TL-w$kD?uY@#_1B{Brl5VTvb@RyvM?4L0$ORSorpejBywSJS?TS1oL96XR4Oriojjzx2 z?5enQtcCEFtomB&*+x|WvwTbmC*`w>G{j3u?0!x{^W4!)(+Omz*-FGa&-$<$@0D|( zJ)mZ+Fg8)}3SLWD*vCy&reN!btG9<3$=XUZ3Z8)&SoJ1J&bQ|_0=%f83McwTDPxP~H{#H=k zzI#S8=a(a7>^*A^_Sw-<($B0QsHNF?iIMCXS`MwZM^irR-Y#WL0jWa9NzQL)dY)HB zW#4;#W|9QfKxdX{v4A4^AXB8#6ydq0Bf{Orv}cQrN!o*yL5uFqV2^ho6}L9kTKjD- zwu@Q99IvPTiV(|QOPM)S$x|(zJ_HWQ`+CR|!pTx3yaQhO+Dzv?ERMhvyeG2$b{2R} zNDXc8<@;I4b&#}9O5tocoi-~ge=1(oQ`xemSIV(lLc16B45hnLhF8gESn!%u#${Od?`s6OtnCPL{` zs_x1%hf-oexw_xY0Ax05h3ioxk%lz+Cg?xu()N>8*kb5StsVy(z~JDLH9ROLuDkeK z83Is6`+a9}Hlu@_L=4Yj%;{&gwgOmBBVB+t2%qX}-DMmIyFvT^Tth=&LVJDo37;n7 zo{3~})|$c+KeD3o&ukCcbKF|=X}q-k5t6)FRC5AHn&f4JD<>eMbJA3SdKVz0qSmfm zx~|cLIB1kH8a~rXXQ*oG|wdA(nGx?Ar+&6OKzqk#ShCeGP9-dW%wBVsb;?8W#TTp zJS6#T8jfFjyzWPk5q712t^6Qa`4Z%Vw&I=r0`+1u{|c*&<@w43l>Duboo_edkO%V0 z<}mLvbV2ekq909~1D!O)FL97(QyUsshfXQoOw*wdgH{M6QFE`#3_JT)yhrs%v2ES0V%@WE%Ka+d zw|aXU_OaGJD?Y~lmBO>?6)&v6Rjl3o?+rwGqV!Fi)!3QI+OW4$QF<~JBH3M4wz z@G){+_wnFHgL=&NuZH8S|EqUo}1=vwtkR2=)EC6)bcv^9XIn{Qz7~Y{V(?= zt-h9hk3PgK+ulnIk`-=aJNCGqtHz-p?0dZepJiazDDv=%bwKE+sONVme=piXXw&av zPQ6Uq&-uy2=>2HPi;1L+{h~Pbi+EpZxa~!`ZxpY1RSYG%#F0t`O4(n}dz1cs?rJ9h z90$r3HdrF+>_zH4rX1;gu)dn@BH7%e4($})FOD>|Scf{?2Q1`FP0y-85WH576sP?*M;{tk z#`-dn>UNDxp8*m6w;yAcRe`0vi?QbfG5!|bj@dPTh)LI}2<*Tzql|I>$h{}$QieWv zk`obC;zi%b-W2bBTy-zroRR#icbz;kg!;eoH5C^>>`C*v%)j?lDi9y2-<#i5GapS% z`qp&Dr@W5NnSU`&y$3fj*#Rw9{R2IrGn_X*b_S z({I0y@~wWIBKwtl<{cX9(9YLhln;dVD`l`yN#svPP6rjv(E`;jS%J={>`#5=#0Pw6Qq z&7jmTnK8G~%=K5%l<9M6>b*aZ=Mje9{e#Tx-{%%F_VQ-a)UV;j;KX5T4NzXGKc*as zk5tDD-*Ouz3U8z&SKs`vbmWqMq#>(cjG~Zx=qIg|bb@o8qOSMfr}y7~n?CHp5=u`S zy|;HS{o(Ks&A*6yw1=L1=2!H=M<3AtetHWr3%>jXUio*^WO}CZlNl!X&w^!(WsMuw z+(S=1{Unur&w9F?H;-maa9fU->IU}jrWan`O&$C9Q|r#>=`SzBXk1fEf3-+2j%CjY zER9nue(yn{9Ky@dTL^!^58eTy`|&b#&-04rU*KKjx2W!Cdys!O(RzgO_DZaa3QoZ? z-3FpxqiheBWPbgEDsu{!_7?o93jfQ~pxc3csk_wmpDzWi2e8EVD!z+B{Oj;xu?1yO=MOe1n)A_)u{nH`0w$zd z{}an?7;6XKLKxr1#92Y4Xz&3n&8>b0L%WX#R$op1Pyd=y?H^ET`!h6fH<(fMWwkH0 ze+TuS3++nyKC<{s>?Q5SzQb-B>PcY=`ZGHG=+9)ip_UzV`1+fX4~MDF!pkW7^$7ZP z2>qJc$;$3g@b+<QCR=@`OHGW_N{rX8LJ1qZhsNiD@!SZn1b52{hO_&wZa^N@;vm*gLHy;mGaRqBb+|AZZ82QQ^NVq z3P;!1;^;abUpIa~Y&sy91AqUkAG+c`s%w~geQce54Q)SZu=c%Cd}pPOtAhy; z5y0C6&b+M`s&o<@*Z)~}K~V4d-g&tro8Km0GO_wk`&j#zN5b(;5;&cu=wT`5+s1cw z%W+)11S3HIz%pN)$anxP6<27puPt$)6OEsLf!xMiQmTp>5k1qF|wY;OtU5oF$ z_&X~;){aNLtKFNdBksB^`}iXVBo|!-t!ANys10 zT`%S6@2ujbPut5jd=LIm@14%KVOE`e<-a(p@2oPG2Sd*7Me?Ws02UERL_t*Kadh;* zKDw@tr|W*xzFf`=kd>Bdcb{&+GNaj_e)3@E;jrHlW~TBG)Q?_y5cs+*vBvn8s_-6h zB$lwMLRzU>!n6CfY@6)cz&`KX?i@Xr{d$!++|VXz2Fa24{*qOwyU_pWx`eeyA@q;q z_}V)Y@{JoFq&|G+pEvyk(n$B|=rMJsIoYuG3R)bENel7q@Zea7y!<`Z^-UiAQJ~(B z@!BvD&w0{CC#IRq$uz7~OvhWCTC6g-gtzKGEJch+v*Pox@|G5CRj1EDx`$|BeW?1_ zS=q)+DG1KpxNUFHAI^O=npS#RsP_SAa4;%O+Nag(x?kV<#oK%gkOw=DPvF?e11iKf z79zGJ@Z{&M7V^H#KJbzKf8Z6RG3(+^VZb3BK&gL0?Q z2`9K$jLmXpS+5e)^d{1uJZSNWyvt8hUBOWA$GW9J`u1H~k!fkhs%#TR#);{cBK+d@ zjRd3z)~CxK6MyW`>i%%pQ=AyVd}Cu=7ktU^(w#u!q)>%X{|h2s11P+8(M$!qg)hAnb?5kW^B`2qQ-3V_WU#=ZXf z9=hhzbi{dn$muW}`y~S}lp5r*p8>cdde?tz090)lOavBQur=HC>aIj+=O&VeOj%YoDZ< zCoQCNfh~^DAVN1z9Pon{m(cS2@Eta+>YTj{AIpwYed)*y!~V|DzE3FUq|a0CsF3M> zFc0$~<(#<;w$My!K@j0V(B3#;NNMANK0^KcLDbuaEU|D<`eblIXnzV-mieeVI} zop}4a8&kpcB4a;O+pyG zrkN)%pt+hTKEvMM{vI6w#QDsHi)on)rhI@pkakF>Ek)YVe{hvipQrk0ShiW+xJcr7 z)5-H`irwa*zGFTBw!Iaw?XY*3bIR$Iw+MEb;DRCB{h{7vVGNAa-hCu6FmjiAWl-iA zw?CQ6N*r<$gYm*-{yaKwxksh|@6sOKD`jQg(sOCP!8_(pN9#KT5u-T^m(mieU*iY& zQ#Wj2TB0KF2jW5}FPLx1V34eMZeN>Q(k7PJH!ZFw17Ph3dt%}R4SE06e=6(imZkLp z+Xq^3;!|L5Npy-z!~71|AgH7Y#z!W{as6JiEB|G$VRFb1YJQi6$}9QJJVzfelB#*H z?g~;qOPpS$;V~Nndj+fnx@1AiOide1JO#;$bt@(xB&@xLwl@`43N`|hNjI$hhHVNWgkkLm2YcL* z_s_o}jzB@BvkNF~1VUl4KUuZ`+iKS+rWKWmx`tY@#O~RyrpS7LC!$rdTyaUU zDsS+pw^^mtwRMZ$?rzbIAJdR7(Jac#szkJ_S0uU-mJhK~(Z=aWtE{LIF$sSj6ICTF z59N4`bX+!k5ZUy(We0HpZ5~E@hwvNLZ4T-?fW;(=*$(6VV$kB-P-iC~H28k@NO+BY z*ufj_RWyfs6>quEeWBi&Cfjx_dsRWW_gb*-w(@jCV>sN&K9sqv^orWDGEp0ei3DIi z8YKrY*{g}@-mIs@(gRTlvQ(;9(2lbt;V?jUN z)F1>&6Pl!jsyURPHc>tZHiRU~AGTT>)L;itBZJ@%KtZHNfm9|9GHObkNR=HT*^00m z#Z_Pa3R8tsb*Qohc7Ow%7ApdUv_8xk-{I}=&Ft*W?%l2L%*8aYBi+r;*PEF) zZ|Cj2ncZJWLpXN{$U})6Nm&ixL$9oi05xym>(Q37>cfXUxu1Rbn5$C8_%L+}4pko5 zX~bLCVnSJg2P{SHym1*%(P^=sE}k;R`dLBsTUb7-l+%wl^J?t{V+>*=afJSm{P3zZ zSz)?0v4BQ8OzfZj7NIO-y{VW!I!#rdep9NrOn!eWKX_Fc{aRn~v2P<)mWPdIRu;7f zGm+Bb<%&qrh=WIAlSlkSEhjCRT`H?Gqt~imBjdR2Z{|s_#5f{vJ6r0JdY;rKudH$z zbNR8?3Qaz=rnrsW`7U!DO(moiQ_3pVnX1(EF`2OTz64zc6p=D#V_rh0+W>h#8+a!PifL(TZ%X(g zpLVj)JcFU?m`Y!vgLWPMC#7lmwSJRVk^wNgbN}8EkFuRys#$2>HM6rrW8mV4Kv6gNc zS8a9~JL6xcL|d-zdzVIlihQYjCB@o8Ql?C}ra1aHQoWK!w(q9+P;vw^LcKF0FJsmK zs^e1iCK@SyhQ@aK{>&3(zk)rN*dx>yxx# zQ5kK6PGL8eFw4jw&qgV(XvP!N(9lR5%U_}&-+q$bSha|zuWPd=GF2e5_Z?afWm&hY zLSQ^`{MpZcPYV_-pylg%+G6LZPe!Kwzm_rg&G+mRHi{{a*p>>Os1-aWe&)e@(YM`U4u@ww*?2Ra5+1+5C)9 z??^6K__74i-1DixB%|P?5@RfF&#y5uD_PO%80w<6%P~GR9kgU7KM@tvbFX1dv}YCF zIj@w;OMNtb(Kf`-!c*tJ=`cSK=`Q{sI-`fh^$>kr`5p`c?X*zPv}Z^sEA|`+sRI_4 z3DAaT6&Hssc<)NV+Ov63?pg7^mE5}$X$~`>G6{LJCC$#sKMqu;=KS-MBV3-tp z4Rt-4X`5y|+n$w~I}=2BMmYBgAi#q{=6V|;yVx&C?zF{>X7DvwcgS9B zI4t&|8hG=-bm0b;`&zk&q$1(R&K)so#lEB6EC+j#Nh4XCR!m$Ts%M&Vk3ywg49k73 zVBJBCbsC3#k&ZfPns4q;w$F_Zc^PxFDP#4F=~N-iyKZpq(tP-WWFMBXOujn4^ zsps;-0Xt9(;BbmT7LeU1l-UVpmk?F}vl*`U8l4l`yP)_X)b&>juALI+=ql>+O;4I9 zQmSN7Dt7w0S_Iu7!ZSg8PXr0>MpLvQhrip4A3j|v9O!F=q1iO!ZX&MA!>OI~~ywLyAG=RDZ7o|_%1vGcC zo__gwlJA_9m8?i2awY0~(N`%PpKsiPwLgcY?dZ1=32Q&XAF)l)g|lbr%ozvdJ<^n! z(5yp*6-^a4)qb?EiRR5Kr|mm;!hA8GD&NB_N_DjCS^2)iRJZ3wst?-`X`$vHSSWk> zCM>RzA;7%D-;Jo{i}T<6f`u=E=yAHT;}gKGN8tZf-1vdT4!A zT@C_4+FCZ7jsqjUrw4g~5g$F;M5Wa`sR!`q5x}3nzY7x>+)TiqPsU|G=9lQomQ6H@ zHeVh5isGHSDYhnrHvfwJXtNufyKn9@Vv-#DT_Yt>cC6`5p=b(zcE2lRyBpNI@1FZa zcN2f!OII&^OXHCc#g8B=_(M4%Plg#8%49Dq7pSS?VcLdiv8s6)1x|)26zZW+A8uT@ zp-?Avh6ZS8XplOaR?{Nf%eD1v-i3+q)ha03O8Vpeqj=YfnYmcMrX&~?a^u>@k0jN#`GMv4BBJg^h28`i^ zN)Qjt=)e4x)yiX-SOQ)T$L-KfVc+NLjL_mR|Mj(NJV&43(%|D&pYz54c>CMpz$D$dRCZ?8+r zGWc4j`U2=T`yw)L(CZpn5f3sCam6c|nxN*ol z)~w=7Y8nh>$y3MLRhshoJuQA$2h*BHx5Q{L;m=iHbLB&JV~(wh2KzgB(V7p8xMh9J zJa*i!t4B;T9n60`_Lmx^Y1tLB-398MX~sL`s})L?hD%-tynOQHXs&!tB?WO{y>Jpr_LA`>LId+qwyrQLICsir&!U~f&w zS1l(cMxzv?A}UVFn4nRdii`)+rRfwL*XU<77J~(~hze6^I`TS#qAmDhMiK5or*Uzz ze8_6d)z}1pRGhBODm?|xojYKTdzv&?$aWW~cR_QMkGcSXx6o#Dqe=0>!Dm}A2U)R| z(*g;!HHObm6crbzj2mZD7mm%TwNjCU7+5jlDfmCV|h%W4xtb|_Un zZ{EjMCyr~OtaL@96qiy$Dow>Z%BKYNe5$OGsmfIz_C4ouz~g|&0gnS72Xf_rkKdaT(DkfH@EbRM zRxEsNXfX9k35m3;d?Ju{r5Ms)metec^|(b((LH_o3@v_sNv`d@A@kofdsY*SULVk5 zJ?C-2{!7lU#*%%g!#wm0`XTnIHAf|`B`si>8iZScN)q{U8}U4 z(dUgrKRtFccHP!zeFN2&aFYc*KF9(0_~7wDzx8!_xJ4-$U4d{MJLT(zEsLL6OeM{N zTwc&HErQOn&ajQ97w|aXalqq%#{rK6E*!A!S*d(VaVekU`I;4u6$hoH6p+oAi!z(V}__5luc0v*Se zcaHLtA|gC?w(fY!G`DZVlx$uA6V#aCP?}p2p}=*VyYMLwK@bnmd17Ju7wOi3;~cN0 zb97K6VzKmzH<|))M*DhHJB%Hmg0TgLw?ExX(iulC;DrJhK7q)EjqV1fy#>jJ&aIs9 z9tYD8Ds2^w2Udh!UzuwrGv{{U0)X=33E5q0il?M2*?b>C)Bu|MG0x>9YolJbyC-l9 z2;miznB_3Z!LMsM=VlaF$1L12Wi{^AWaqzf7-Z+DHgUe{30pQwCHN#?u2qE z2jg-D5o1wN(a0~4Nbl7v-RNJ986J5V8XU^KB+h4ce!d6-+2_qy*9!^Pt|i>veG(#+ zoSeY62rM>psc}uj{PwRETwJ>JVY3a#9O~!aEaN^8xyPB=h)ym(5fNqYbW5Uc$|2Rs z)==7DC7B3UJX#f&lBZ4QHq20e!?eztA(q2Znx0M1Q1i)XVJ}n>twO$UI^;p!hu>|X zGfIrBJt9Hca3D`4G$KqrO^2=PnVGj-*@b@149Wg{om$ zT$oxQ4Lv(=+zVKm__)aX!pD^x^$kDN^&Zt4U*DTg5)L7_fTK%Yja4#Fkz-tUpHOxh z8~)z0UlgPuTn`Z0S{TqX7#Vh3kkFv>p7m z(L}a2oHMyD7!3`H2k72|NKtvow}Hi!JkJy4y~jpjl1Q8uAep@HJ0S-u|64)fI)Zmm zQK^=$b9f)ZOfAgCY0U7wz9YpM(P1Z@?C$J=Oo%XW=j`&=J+2Gf93>nYVnv0LckwV$ zE-^TWhaokT>aA%00j#fB4bE12YdWqahC#6%8U(c-hl?<~Mt)EZ$nP2BLqhz8{l!{n zOydx8CvV;I2FCO@s_21A#MYrTJ%WNs^yGH@9CUi|CH*4S0+tAC4#C|TLq4*yh=zWD z>-=ifIkLIMxt=)=7djtG$%O3wW%mQ|TUoaRCWwE@nkiA}BeC>Y%E5I7jCK#k|Ber`tV+MSN7CIt>ntYewF)!x;8oho?+GT5`e>O_jVTdi#v>wxS zz^~VinXCp>ta1tZE_8N0C*f=B$f6TkV zTMjcUbB8V`cdIKpDh4Q`ta4Vk`vTOelRxTKPY@JZs|Be|4)F|`4FzHphUikwOUK|@ z`pNKEIJ!imR9Bx_zqwSg6sw;*X}ckCNaIi39xYQ?RIpyb0hcVDsS+)#%)eLk(X?Il z@*ROrj}n_)zDmA{wVIy(q`6nVUBtRHe>m!;LEem9g;~X2SLSV^?dy3O|v^lgUeDi!}U|~brj;&9%k|?-($Z$4l5oKOpUV&%u(%|(>#!Sr&W`<34 zLR3z))wOcY%Ei>Ran5HxxW2A-xL(rsL3I36XO&^e(F@p`yU@AknkK)8rt9K=q*e z=bPum7my3fko(t{R}{GQxPCYlVW7m@O|pBIiCQrKF)(j6Z@589g&&m*ZJSV&SR_L& zM}f>*P_eUv+{UHgqM)UrZrPaUAnLh%QOr$|4f_u--1 zt5iY*tzN_awSLm738MzY^z<~aOwS-N&xD5%CH(mDa zuHhRfuPvu=?wQRo<7`f7taNL?`fd6%*Dk>Bx*n%Jtl?ci{(QeHCufMOwNLX3la4au56$OwY0d<;M&paVje=>%h=$-g#TWPC_QNy$j z*^}2%P%wMQOc=gy8>+6&fJaYRYwR7IBiAhdK|*5^DjqWv(TTvCDcN-?wOg z%|b}!dl$8(&u7izHk7-iJ88NwyT&;&x|}%f?o{tOT=ku%WoR{^YwI4`lb~;pVzM$N zbAy6g;+BpMaIxKfCmB2EbZp>nKa9pFLk3SKQipTWz{f3zIUOtRg}+J{r;nXQXN{{& zOx-r?ww$k~4`$bc%dSw2Gh*`uE`$(*G>RNXPlguv$;)+&NxaFd8{xt|CUVA)!c@W! z{iB1hUlvnZt9ES{Guu0kaMtC%Da1tgy-K;Ke^qp1GFd#m@kQvQ=Hmd%tKC<1{pJ@r z`{&r0EY1|pK5hNn8EN5=8Hr5^iM>F~uNb-ux;n7eb>c<-Me0tOa0ul3i`X5BNBhQm z{yQT(jR$Vd-}nP&rI|%8H-dhge@SnnIJoa5J0G}pNwdGXxivr9G|Fy%_z@X+botvt zz#EY;N+Z~zibJ#FE{_Xva{|1DvdrU*kMyRJ{65W;Bgwd847=_nVfdYCgacb8eLQCX zxS)LLiRZx|Iru5SjNm!s)?Byz+I4+A9?7rg#x~GLRx+PQ+Vu$lvpAh}T?-3a%gxPT zeU&<2oDDVeG0d+h#lv&e&r(MvE(F(3*8mw~`~zl53PAt>C({vbh&0sHfLOb^3Rv2> zS=kEsxw_w6XaN8zKgiA2)fQ<9@^f`TctQN6S^u(t-0c5|!K|RaOpwmftcIE}kb;}1 zE$ESeh=3rg3=jkYNqO4XL3E)?|AycENwYd2k?s&M*w@!rz*ktn&C?z%Bq1RI7JLYP z_>lj`g5T>I0%_^TkMLsqmy`eY1GV+C_H=YdI=Uf1fBafnxp^a{Sy}%O{b&6vPFp|6 zznKtT|4!>BLGT|9SV%w+{2$*pP^mvs2+YyX*2NI&=xU4by5W!!6O#JN{QoHaCj1A` z=x?C7$UlMqQ2ZMx1^yHMKhpVEt^ShU6iWsu1^!RrWq=eSEo=Y)A-O744(^Auo$0J` zN10~eTB`J5PY$4WUnhdW94h~esGr_i!zE&`3vUW)atVUIgm)C@0xm(9uv1+g{8H|5 z`bV^~TrYK~AIWRN0|7z~l;beWyHoi!-DRKAF?RWroW<;)7_*7=wOZektaOQ!Mti$7 z3HysI<1n5mybH3b(zx)@@E-B-@ci$Bc=SmJS>pg~UmV?Bt1ozEWu@A(hhS=UR@lyj z+O*yO&espF3QwM-H2`}WHgtBjC%%6DsiC6s_6gA5-@ipX;lAv*FH=(wZ09q@+_HRk zzj-SuDoSu_=WN}q6emL61{tryI65#8q^PKvhxzg22MgAx<$P~B0wDi(bCsMa_l=iX zyFWXV|Cd_Z6?$f7vhB*%y>_lw$5)R$?M$mP`+4WVsGU;8-m!+0{3;g)XU6w?GtQd(KaOiFB(ZArIuu|9q%rF-i zY_B0o*U-?w!po~(&V^X|$ZWhe)s`F}DOo(i)Q&a0YTk)`q=`jpeBlJ-^?F zCC}XA`?Do|_YN@X<)*Eu@rQH`_4T1roSdA5zt%^3a-;(&@7)t%eY@Te?bLK}Vp%*N zGRnZn=&?QDAcym8{3aN}(d}TPj^OsgwW3XE}e3)#edrO3+4cg^&>uJ+`qaC{{^C37E!STm-^i z%psD(6;^+r?yst7X({RIQq5GDL=U{WAA-taXqD|$xn3Q3CGP$mz{TZvx@T!)Z_ikg zZuXkNAm#FLrJAq9_0?r@aWN%xw$w8!v$eDIaSQjdAetR{Cj^aow^@16gzq9$vJr)P|%Y z7k5Sc4sBOyY3a&u;ne2VPfFB+cZQC}^>iG*u}E&yrrjRh*+KMtKxaurykr#1tFDH% zIc%z(>?~5h&P9EQVpC4=d*lsm9Q+|9E%&P&!k5;_gCGri1Q6odL-s)cD}WfXk6Zn06^h3oaiE?0i%5+}t`Dvi|Y*FEMhVk#7$%4eTG;Fx9;E+cM43}SBUmd6I;;jcMJ z-mcKrBbr?lH*|F5b})MKHArg9-b6Ard~#xj-kbx;XPY+(=TJ8`*gHWN-@7!=3`wTRnrz z)w0+zigyFsh3aXMg!%>svCyJ7Z|K{3hX?e_3}Pgrx;qZvS^BT`lWon_^msD15I1N=HzIvl@NTi|Bdp-N3L^qnxy6)1AL)$VPt=r| yED!Hl0RI0c){#0$Z}XZ}wz2~g#Q=QR$dJN literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/appmaphead2.png b/doc/source/_static/style/appmaphead2.png new file mode 100644 index 0000000000000000000000000000000000000000..9adde619083782a62984ed6e55b24ed9fcaf14bb GIT binary patch literal 7641 zcmZ{I1yI}17H$YuTuO0w4^9aVf#MDYibIg%G(d1DR;0KTC{nDrQ=H-ycemnD+-b3w z{_lPF-kCQqGf6hPNA~RRobP;_2o1GYIGE&^0000-Q30Zb_&!9uy`G^VKEd#jEdT(s zz)n_HLs3?iR>RHN+RgzA04PKxr=aWTC=!Pqw2ekgk6YQgjV3VlGb^KM+BJW%lbI3_ zV7G?4pu8d?AkdJv`hXduM5zNILJkhZ+(9DKnRex&WoN&OD@a=qZ}GX#dbf5r>et}3 zQu5k;6c6y5IB`rphzX#u4aI!lX0#u#IRRTn2?UV;pvC1kAoESlp_L4r-?%%u_9q>X zKP;T^Ef08lG*wNY%I*XLFlAoGWOgda-C}QKa<}A+T<6LAx>k{Pgnj#xkH~WoJCiXmPtxp{lj4SG*E6-0r$*!(DivQYL}Vj7zR*% zf?S*bv4)#lHfhX)`zkSw^jF)oO@0x&M{7$s>nPUq?F)WmxvSr7(z*AyL?BHu1`ddT z&~p(7Wy@g5U1HBAhM`pl>SG{=e%jdq-!6rR@@6H8i%3KojAW~?XNpWLrJ8PKd3qT; z@b_KAsn6PE`wGVg4@b6?Rn$O7YQinf-cLV7Yy*IRs|PKWO&m9YYoLoqAQO=lSI_tb z9yTq|l@@0!2-D3U7DT{Ar>C7>V& zNQqqk6XhkUp)^Y)fIIlW46uxL)5-b?0O}xu<5|-^>yjq=fH@#r5=U$WNRt(cW7tCa zE#0X>M1j?Dt-ihb$#5+1zm+ zwv zO^$PJ|G4&EB`KynhGk%NHxExdDXujaGl_O|ai4%Cx4DxgGtWu2J||9TaDAV*WnPuy zJkI>eeD^%F1BnN|XiR3`y36@XSJ_64Wppu&%1GgUpN+gT zv3n0+-gfE$T*;7t$ZRGB{JLizAyUAMsw@quB6=9U6F&2^T8LsU?nDr&k_x>OLkd1c zMCu2;56+5=3E+X0y7xUyj!emMY6%r_eF`)KT2_8F6e3}qj3;z+aV z2vrC255G)na%)j*N_^1ljqWA2aH!_8fLd@_$XZnPU~Cv}aBr0M*7ipAszb)-1%L<;*&wC9{=6r4@P9h3&>|iVt7V zG`r;)r1KQ=-dHMW>rR=z%d-yN5$AfJ@=-5$R=V7zoXnBx=-E-`V)^12YzsyTUI62O z@nCGtYR&u2$;}OyRo+701K#0RXqV=G$`6nSz6T1ls-R?~xt~>?IdpyFvvD^1b}ua; zLTKWBGGirNsqyk#@=Ze*A3wG_^<8RMyVJS0e^H z(47WOR^lw;H=K){CLn%&;_rt)YD80Xlmmt`Q&v(;%gV}8^gik(&Zf^+&u-5$2u%t} z3$-{_%v(4ZJ2uRFEc(~g)(q8<|pd#rxIxwLR2sgNHU#UvtrTYIvG~Dcwok^F{l> zk9#kFKEMmMn)YI-=vBTt_+EfwXMkx zG7RTUZ^nOZX6di(d^b2WX&?MLRk&|y&L{iknQU68#WZiBZ<(La@5|c+KUrU8KbpJ! zJB)kqy?Fr5)59Ykunq`EDi5NKBWRR7HIGx%@E!+oRB^o5ODcyGIuN(=HVTK3*D&Wx zZ21-0+e+^}@T~Bd>w7lB;VN+BTa^$WhD9GDK;5h zPpwD4Z>tZxa?+sQX=Y{yvs7Cz+nAv}tZ4_v$t*%|u_o z_9B}+V{tTbgXzM1QfG3u0OuU%w&{KX)^drwuMISnc1KI9X3yi&CMN*<$r!JDc2#rs zm(`Tv*>}%nnXoj)G*mdZJ^nF1$hP*eeyT%i3#$L(legICNYO9Ut8 z`ONkFP20CwCk7|w)$0R~siV-PwWAO76Y+VUYXxoYMJrFp-Fn?nn;1loJoS8IAG7ZK zp17lo!#(^)EO(8EghxQ5PpujCcZ+z|l1cYG4J_vB0YSQ*@on>Mt!>?RpRZ?#m58lL zVoq+>*0dm?K@M+Evyq_E-l0eiTtYldyqJ{RWzYt< z77vfgRbo>Ai&4Dk+5W5JSkD-5D#zmK3QmbI@f7z%MP0b=iMo*Fj4p@H?(2Z!^_l2d zw7KhPgSnX*ju{ex4xFyAflc0ibhXB8TYG`GVv7*_K}ifXOm>k?Y-c?e?$D>Pw%U z#PZw=ThkUl&jIVpoYUT_I#4-Ay`^xtu{$}gI}oW z$Z^BDv;8WU&zv}wz{8&3ue*h`R=jhXw~~v#hYv(&`}>EBV~t}>rkCw7->Zi|X58*h zF=IqL-xZOFHe6(Y0Oz-W90=Va(!@wlGS;8l3~8+NM~a}Q9xU=d83srio7htGdVqWU zk8UWgTpw>9l6b|oGqZ- za7Pyep#=cM;9$hi5ehS>g*!Spy#vF=>Hm=cBgTKHLG-l$h`{W{>Gf4LXl0$D|A>LIeouhW8Y*x&H8W%C-{uI9*>Beui0;B`HU{uoH#$VKOF66`HKE1 zKsBdDB6L9sqK}D7kVybwumT12^-u_1@R7*Nf|KMl6$8Oaij<9E_@PE?XR)074$AKs0**abALy^z)of;yScX zk{G14Qo(|NC{4wj9E1@pL*(IX6m0yQPV&IsB1VmI6x%A)a-W?4z5iz*QO&9*jjS;(6r{gFKHG*<~E@RA2D9i-A@3ee< zTMJW^82LFD8V|I3`RNLPJm#_4NrxSc4gz zWL_A2UoRQ{oLp4%rWR~iqRX*US(fG`fx^Ryi-naKPUEj{*xs(#p5X9 z@!?L{#iijf-bToU?G(VamjW0zGYp%w`Td}A3i+m4&BL3G`G6;p%tk6aJV$p!SIQrG;~+gdtP)*T==%P z5`~YSAG$wTP!sUvU#yuIW@Tkn4Hw{h$`HG1; z!@F5NY7{G0gQx4=Gr8K8CT%J>;qm$!QVqXy)LE!XxC2=)_qRy);3B#D)fR;0qnbrq26a&RF6Sz zRpDy^Pb)KJ2K*yb4UOK{hs81JWBRXUWZpAO7`1qe6o`~~*r}Bmr`Lk!k~v;e8sPq7 zYFIp_ISrNY{k@$!s6fK0pQG=4v*IZ-nu)?HE^bW5Mox1tvS3!6x-1wgYf-Le+UBi$ zM`X89QvjYVeXZ{J<@91ql49`&B5u?{P&#)=aVY#vT>y57PErAAb zTG?s&GQEh`Zn3VITtY2FNVO^7_vzv01>2!epN0!cQR?#HLibx0r4&xK($|%GOTZX1 zetBFP2~G~ZvbbjVgB|9bDvo3G_U<0N4aAP>D%gu^`amy$bd=>G5K%qkd zftS}4s7!Q7*@%$@0WT0^K3vLd4fokJ?qSfp=4U9cNmMb(=5TA48QuYE$X7-)8w9kA z+K1P~2a+Iskl<$}1ZEs}^q;)oo`7}iK zjxK(2VEif9L4cN#ltj1Lm!15{8r|y0IEo|1M+%ePs$-&6!u}X44t|@_%P4h=l-|!3 zMX5{64$f7=PFBiyxUa1Gh9P#-Rr&YAk0sO2ji&FC{H8wdTzIkb^J^cK^7~vnEVTQz z^gq~1#F94m){u5>N2s*HH5ANDRqy%NDA)M-jrn|g*c6qAQWk38Dkvz#wcZcfjhI19 zyb5X#*8*fyK&4y{j|6n#xCP2>Xm?gi*F*?F9*K{seC~LF{`+SzUcO2iUslmfiEd)V za~8~dys>ma<-fpw92y#$=H@zBqXBv>v;4-RPip&BZ99KyE!J-JrdF`>PR^=#&}yM| zAEfo&CIvc)$ z5fK%YSlRwZiCN_nEdo6g6cd??eVks+N+gW9WEyQx`*0) zb7^~7m%G@ZSPPi@Zc={OCFps7S*P!4`8D@fyLDB?<|_)XTT0C#-*WOfbIQKDWMHpvLM?G(^x!-iQIo^Qw`@@pAF#dIr4b1HmG`?|0w(vwlARd$=~1I%gEY$K@F1wmnO0ir`S`N1wqb zF|Am?t7zNIOmZ~K(PpT9Wcrejy`fTX6O2ukpBa|)2iZgRV7fTb8*mYQywXy7;$*Xw zA?%#ux$4!Q_;aDwMttp~uem`H$uvp8H1C^XbIryXd+yQ~KU4d42sxLB1Mc3J8T!eQ z3$X?*@tOTpjL6WClejyK1fNV6d`3ZETXk169zWytI=9-Mq0uyN#UkM*a-1?FYUAjwHL5Hm!>s7HQ^**)>jGte%om&|BgzmA(cEW{=cf za$;S%r4E|gQMD-l3A(xuz%wD|5mQqgnf*4wnj6+9(kyWDJi|xG%%Js!_4@Azo&MwZ zy+X1ZEk9B=3B$fa2T^t#cc!1G^k${MdvD@UyJw~TLW`ZS>TfBxT}s<2(3&FHlJD6m zGsb)rqFhb>gvoinS^NujzkY^hVIO4vb0x z8S!0!i&1J#=a;`XP*%&K2vS&uj=-d&FX&3rz-B}TayPeN{5wZpCVnZ!rXcm#F}F%I z1bB_$b@k!Ytf&}!2$DDx8WFG1LH!q<#*qtXk&IC&&8v!wi}{?EUydw66!#bFOFxTw zX}w+_hWxo+a)vI{K&`1~u7-P~NT7S;UrIQ;!|_r>LQssy$Rc2m_g-F)zjt-18D7N@ z)sAjm?i4o#>xwc{iorEb7@9YaR@>g7;Zhb22J?Lp6L$V%GZ&7q!(lCGKT9-?%+a03 zZ zFR$kH@d}6IVqLt)(c*yf_lKPWP1&1RD$zot3`E{9fL;K*eljI-8#gOJAR)FA@B;6v zUz6V)-zX2c9n5k;#WXcF*<%qLY-mbdvRuSgUAU)PVT(yQMFBwp-E3{PM8sY35RKyW zErzV$dMHWKSLJ|KKViQ=u-}(c>f2wl{k2AaYv8G6f(u?jj9saHPo?+QzJH&@Ak8me}DE$8Vjl`HG~2RdlgHa z?6y6~b`nM<>X9V83YRskvo+Th8q}Wv*v0e~K!;LF;;T5%Bnf5mPN%iiF=7xW_*x=% zosODIbkJ7rC87;aqLZWOz%DC(AbX{7y_t>VbjwEymI>Bq_w|~ftoHGh*!=**V<}ob zeX-K$YRP6;S5$nY?F+?gll-%nmjw>HE%Gc-PEA3lhIh~|DE-TsPW2wba+L9!J-Yv#09&DC!f6#F$HInlY@uomYsHn=1jy*M+Yj%M1oM9T?H({@nwwq~< ztDcDGB{iG2+lw-`wnga{^>1gCPTSP8CC;}J)iKE#N|C*tqlqW>VF$CSiw({o1fi}6 zQ`F;N6Ml?{;7$H!7wnD~u)Jhs_A>f~#w9HeUnww*-IhpWg21e(D9{?qA=$`!s?%Rw ze_09?WZpP~)JDMG*diSj9i3E&fYx@c=yi5j3~ImpNAlwRi5%%*gj)w36ewPPjs!}&IgHOsQ1!^Q+Wiv3l&=G|c_ z+}JuLI*PyMDl3VtdCIye$=*p`(yC$cXmRqGkH%6XbFAIg)&MPB(YnF+oI&!qOEwjT z^-6Us*qptgGv3%I%K@KQoR_|hO!k6*#2)EZ0YA!9yt?$(<9-WNN@mwZIl#Ku9h(y} zY^l(TtbSk>QmW<4iSH&t#fr5KK9jzr5iv)9iA`fY{OJp^CgQ?Ya-8Gdz?aitQfC{V zp7Z@`_6@roYlTH`q@MMr;*j|CjZxJp7CAWz=XfJBxx~E@bOw`~orc)@^derA)3n{_ zA^NApP5;E8X7U>efx&rm{EtY$u=4S?8MLUTt(KGaQvc2VG0|s5?~aXeXHc;u$1^f; z3Ik@P!|tw+v_m0dk%1yggv^8mi2`w7TGU`8iabexeW?8?MI90ZefEFy}8AmnZ(UHK}8-Wa&2{r!VW NQBDm~A!GLb{{UlZLE!)Z literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_ax0.png b/doc/source/_static/style/bg_ax0.png new file mode 100644 index 0000000000000000000000000000000000000000..1767d34136a02482983199f701bde9d21eba35f8 GIT binary patch literal 13699 zcmZX)b95y^xBneYY}@upGO=@F+qP{_%!zH=nAo;$PCOI)mwE1e@4aii=Z~{?*RI;T zs=KTEbH4jTD9DK;z~aJ!fPf%KN&uBWKtL(KY!hh6uWvfCY#0y_*kVf&5d}#R5n=@= zdoxQLQxFh|h~yL~6%|R$(DSb8h{ahG3#aJ>iV;d_NJY!`d`sa4Ha1!_QwIofbQBZ? zF_UoESSfrJAUb$Z0PHa+j>@7VGchggYg}>q8efO^Lyq&#>$GpH-ByLV>ogL`GiKt9 zd>{pg#E~iNk1n0x@rrXUn-BpYxTC~~teQAJsd>Z#0jvA3*AIT!V`3L2b3WDnpC9_N z2?V))U?8x0{^)AjGmHj$W8iL5n10mETHu*;B*nSo>qEfRuQ9)zEk)B_&h0j=`=6 zA4*}jtr?C588VdOgK*Z*W*+W#kOCoXAD^4CpSI;R{X<-A$W&CYok1rjnLDkAZ2Bm- z`(R*L=@WXVmN{r4+lEn-;|@`JChQAa>TK*3z4q~FzjELevt`6iN#W*yh~6Kt@(N?7 z(`aN!r{N!yW^m+?RHSEP#E(H(b08~WC7pg02qCO?O(%=*#5$S9ID zW5jr$n2vqcwP;>c2Ibb-5k@@?hjw(!sw;Z`Oe0kA_Jq!)$V)~CWMfC;vXM3p0=^~= z-oa>@^nUvYfYC_*bIx*t=O%qni{-!-k?w+ZI6Sz3!<$l1vbDLm2_NwM+Ir)?GvB>M z7w%?Dld}CSz?Km8kbWp_l*_^&4CMY@N#+2-iR}T*!7YFS-GpIq_7(}A7|f9v;V=-^ z$b)Q%r!r>2}IxKvU;yf^X@aA0z&Tm>m zRKGwNgU$^=HX$GTsMA20deJ?Q%t)a7h0w!c$3!aPFik+xMcCuW4nd!V`V`R7;T(wx z$%EO2dGdV9kZD6=g$0(A}aInnjL zIq$k~;!OuW_EI=d_~6?GGWD+PZQM}5f#rySqQi)al;=k)uv2rW5Yyr&2cYJ$7yeZk zDw9$ns{mh$YLCDb?E?A&jptO@2yBB}#Jvj#X6aqHy;!|C+R=5R?1Uck>m*kZR<~9MRw-?; z+>m)(FCiF)X z2=?uN-O|iW-uS+ClcG5sI5laY!_fvPq#% zv`EE{PfID1%8-PVDv`QMsY>mneU^cxDrPt$8A=(C=caU3w91Msuu|A6qd!Bm!nj9u z6~?8jCY$=2*mNpC$rckI1sO%745f})woLk@X1XG7WQnZttdhmICgmo>bt+9$O{_JK zHQk1o29X9hS3Zl$_;$YNtDU#W$0+Ht*cpCgfRtYrO)S(Pl+1eHjY>2jG0B~=2| zwhGaT6V==W(^JMfbT62$F#h6_qT^~x6`qRaTK3ACLc)@6-7d-Zd`QIsF*2b-i9&5- zDP`3Kedj{6pkqFUA1N_v1-2g|tQwd-7o!b4DJs(b6WJ+SDf(4a)eveiYKhAk%k|4g%Vg~H>_Y4vwl%9pHoCU0 zt8VLlO^po`O+2QLrCss94ZdCPjYr%Q2dU}|MD7ai2Hg0r*seu9!`vUIoM%PHgDw>= zUKdq+lWsL`erE+I35QObN6D4tmA%btuESH)wu#!QJmXsvd^24;BI9I}g^LIArR`KB zjeXAJ6Z6(V>Zu&V8!O(qkI*9NeMXCb5}ztx_UF5&1YZ#!X)!l+;4{dM4#^; zNMKE19-!5M#BnIU1a1uDQ|MKvV@z8ye3HAmW4q6mR9+r%rE5a&nmbF;uxOlj@-rY(4OomdE8`Ky+9EPu* z*KD?1T3UjwR94H?CF>4tJ9eR`WG8Lme_&;e(;Ul+H6BbJk*(jZEpHAGT zxb>P>nO`nO_>1t=_BQ9zaR>fe8BAZu6*8%wwva&?7lqqbSa;98zn*@is-X(WdSE3- zkE$)UwZ^{d<3;y8*Ua1Ovk9~-usPqmaDCVoZ-l+ghPxeS9ly2BJ8C#3ivdl$t7X}a z%uagy;oNQEGGt@tGJJI|zA&rtyZIYW?KQ5`kkhv|GM-C!HJ_g!Ij_E-j8VE_ZoX5- zC%O|HQ%uvJote$A>quq-NpH-pREF~YfvSD+U8`N4T?4OK4@;O*m}Xe9*N;0pNagq%u9U-{th|I~3nmY%M)lv#)5?YU|qnZoIJmSo*ti z>{t1bg15}E4rv3$4Vj9RgXe_X?y&HFe54(%y2wY7Sl>ND^7Hy<T7Mm8|~UpKBZ#T;Km_-FRJ1UA6wTx87#( zUg0BPdq4F(dtFQKMEXl)C9v*u@s9rI_wS4KnO`#$`gh$fKKJi028^zDu`}q*J(8g4 z`wqfjAof-uc|ekN(7CC>WVn~7Od+_854^z7K{(u(OifUQ19-tzHIO&t7$*ovhTySL zZ#~EmZt&H9;lt0SXb8C55G@nncO(AEsV-GWkQLBgtiH|7i@mlsCRK?>chFNQ{n?}2 zP|>?r$w}V(avR7`t4|Pqt#5w`c#xRC2sYbNMZ-lyR)*Wy-j>nO#NNo1(Zkl^3ur+= zcs;nkOj}bILt+nG8#`xi4?fcW^x*!o|1~p_691=*i!~pqhO7dyh`p03@pncxMrKle zSYl#gUMCYXZY7}De-HmU;v=!DXUmT#Rv$2zS6i6ne3eZ zds$x#Wcue}0x&W&{ZH(#p}hZExfLuuOl>rPmbRvL&R;qBS-9AE|I`2f=lS1^|7WD; z|BeJOGymU_|Ht#+BYBzrE#Utw=)ca^e_Fpz7e6d7)Bl`$eprEK#jguReIp4JQt<%2 z^o7$=8Cm-{P-lu-{o!vciiWnR!WPj-5d#<$`w@dK!&b8-R;~4;)k|6-2-Sibvsq{{ zX_r8R4HO~eJ|R3AqCl)HwOs;Tp;Hb0hV?Db!!YKBXNTwHLenjkudd?)c9=`R{V~hk zY}<3k+$@LH9C3O>Di8c^7uo@W6N~{m@Y7PL#X*@93?DQ$ARL5>avSB#4#W%tp(?># zA>Sm91rZNGL%Rn5@(V_Z1fVHLS)^UeF0xG1xE2Pf{! zw8L=UEZSCp(Efz{2e)qwOr)NcO0F|@q8xSHWlgw(UA?T`IH)a@7pD+rWmiQp)sjZs zvF!U(y6==q=2+X;QdD|@7y{o#cV$BBjgjvqMqINw6&Ck!gfE+Q4D}qskz4tsab;`- z>*a5FelWd@Sq$uh*EN}5xcf>vLc3LDMw8^S|*j*q8A0locB6DF}LhEMR2O&?** zU!FKxKua<qk_1xjTMeNldMmIjfC#ii>$Um3NEr+l7jaV)2=ZCo z=1~bXM@yVV&Gal+C0w6OoUvI?lNRFF32uKYwHAQ>nfq}g;g**h6%`A)V>P2Dtw6}z zwDfmm^xF-C%w!&yAiaq};ES@mDco9zQp8(&7cdx@bFx6rB0EZpIv#$rHYuB3h8Arr zWUJb=CMcntQ0CLXMtr`eNJq}vRiHDB^1}=Tg&n~Q1ub~4ihM1A03|Jk6O~q}W3jSP zXl7{8roJ|Kj-FS$io>MIM7p9zq9IkHQfgTq`?p02^;UQ|Z}S>kVv3pO*rHhiqnq;@ zg9$kJEG|7A2LVhUhF?#0kpBcjXru2gtNkTR3k-Cc`yr5)o8{;@(VL!z)56 zSb^~78lGJGNon#@&3l;+=tM`@ni;;~2AY3(ntw2cLE`cWkAz4E#qHUzW3m7wKVX(6 zf0&`|2T%&vG%hjU@{&zC9JR0|^u%!DO5(V^$fc+pN&HMDDNMIt^pAAimzCqw$`}eo zhRIx`+W-cW5khVx_H+=FGrz(A46KUg1aBml7I(Rl9wM&YYpx)7tTVoDw`i_7VO#QMM=b_hZl68k4e^S zp~nXO(>KI{VYFt2p8msS#N&8K-!9FJ2!;zK{?U@yi>z(>Y+=s70N z7K4_ySC|!4bNKQ;VsKvy%hPz1&q@*dxtZ)%)l6Ls@SWri$!rzb2KVNA!X$+-7Hb!D zzz@lOU%sRfWt)?FX;MbY9}-`v`*~Of+$}57A1=+|Q=!Qsk0bqdt(vo+cGU6W2Ur3ip6wt@PA@CeFFvaiRQN)TIVbC@T;I~h2Q{>2R@5_iPwv4bG zYXec{WuU@&UxsiCFu}!3qirua(&rXN70<6EYlN(CV$kO!p1vYn$dkZ>y38UuU2dZq zW?~ROi%&nTP|9agHlCx|g0C1#50d`zVRcV=EI0l%b5K&@vyPJM@*U)Gqa%Tie$F@?C?5{SY*JApV!bOqJ3cRMwI&Ui7h z_qKhc!W(IJ94gf%oVS4=5~KXSyIg{2h5|}yLLs!(9^u=2BQfHSD;%)QZdJQ5lCsiR zB%Yc=KFQjN;`nlY8hT-3X+qYxv@N1nDv7aE7FR1p3x95DW-Sj-lnACOUc_Np!03%& zNiT3KKh`olo>4UM(GX%}PRMw_1uBAf4(E0txVxg|9Z%K*XK0B{COZ_!U3#B^J z%Dyi{Mz=#cxB%q2JAcn=j@}ywv!U^+lb%zQVhG#FVhPD$XLFkLj<$7`35fy(qurp_^=s z=Jf~LVXhpm7h$&AU4C?Q2+YG-0WGXxAB?W+VZ#jOvFv`lj=CK1^CzJ4vB%(tf$=D_KSzaSyO|&>=Moun- z`f$h@K6^--SN@?pcWKw!E^A~r`_({1SK1Z$6ry~y-Lx$+#iib3aLIKMdl};xco*@G zt!{;&nIWyW(_bQ8Lpbspw4OgTvtjsjfv(fEj8OL3DIhg|`*zXHN%46mGK$j`wG3htc?;L#}HBzm%%);*? zju~zvm7`=zPimwzA^YR|>`6%!j~Y=nk^XC@@iPvZhMVpyUhT&7R`d^kIL{(Td1vbs zK=+KG(vM%FDyq75P*LXmNIjw(7#xNZ+RHcxGcm=scD02TXv|fgxbeZw3onL#wYo*O zxNeV;%x5mmn*jK;)OMvN1aT;a!K%}v@bHPDI?#PYdo!G$!%$@Ra|jaK??gb}7xzxQ z*O0Vr9VgaYSDBmry<$w4Dtw)3vvUo2yxON7#>|{tj>sL>CkDsgX;IrDddLmjP@pH61w#KFHA` zT>7~iJE9_0PNm(H+6CNFHcQ@X?pNc{EoYTEctlJ=B+0>sE6&*qgco+Fi|@`>y;sr}U}Gmj3dy14~RUze5@Gp6CpmX5;msx1)!w zjp_#$q1a~?1E-vCZ?O~9OMvcR5z|_fhOoQRxDxP<3ye;Qa!!>V5uID15uy5S@TM60 zMIuNp+Qw{%l<$O151Su5Xj=!Cfvlv*#AwQbXc0fQltB_M?pYUycESZajTx~I4>lm< z{S*T`Qj+bcM^icU8=>=`yLl0ijd1@mJlgOi?(z}C^*}!CxlHcon67Yv%3-dIe2mD( zMuNrN?-D+nMBY&51yv0RX13s#%b3w%QR0g&Qrte@|r~0CM zvyTZ%0XwSi`5khDB&ZF9>;5tjW{)W55;^>}f~@q|fg)aPdAFRQykb9X^s@Js+gQ~I zYBQH{g1~3h>Xqt1$FS8W@SNNJ+z~k(<%&w=;)NF-F&X}?+m`rzG_4$3|Cl8H(B$#R zIJ97Fi-3_8u-$_C17*CY+lF*4j~_TP;uAioYM(ZfQ~rrxO*#*_%e|CjlQWlm55w7| zJTP8^(xGo1@$3kfRKuVBg!!}B;UDl+d80sX?*62He`;W1uePt_M_AHz7*bvPiY7TF z*e8qR;SJTdG5|*i;TK{DRmTY#<{B>sdL%J; z4(S9+-XFuvt1b%61U!I=EzM1Z``IE!BCtwWdlEt8Od)#Lu0}6?(h3dkNXKQN2N5Xx zk6>6g4ny_lQ{t|b3k-J=#10C8I;UKhMIDk=e!t6o^uG!O?Dz#p`@_X46Nc-Dh5!EW zC>fd7yjyjNWtX|WjLNHD048V!%C7p)8L4PRd#mDU5`T_k?hN}gsbVuylGx%YPWogZ z=mNsqqXT9No%F=0Fz-htqwDAqmlW&|wltUID851G@xtQLORe-&@u&Py+nyP?ye&jV z4A~%@4UqT?_5;8_s;zSN3=apRg2>0WoSdmTP$TIyGvw6Bo#`ahm^&@sw{W8mxA6*J z6qY>k%UDY0(M}s)!Ob_u$A;2Vc~g~E+Y{e<`Kl1Fn(BoYD?p%@UhwmhZYqUZLvh)E zeO4<;d);u8>nV28zs!E|8k~)=>H(B$JlV%m@Cq4l$-{_n6;(=Y6Fz~6VYwYO``pB6 z;$btd&n1Hz&Wh6Vp`xHnv3&Ihh+NH4U1;52WPT$HXEtk`!^F$G2J~ZmNU4J`_}ppl zyEcBnj5+q`2E|5Uf$@ato}J}RPZF6l`zSv9L7YKSy`f4xZXN3zi{o-GzRT`2CoDoO46Q>LQ-uYS8K3fzij`_YInw@Qh{j z7lV>($3aU9&w4kQZ)=`#zIy_(#ag+6gfwnMUdTE=4v~x^EN_%3TnAX)=T7ac$u$e zQPx?%?^8+K3&?$LIp_jP?z=v)d%LIRv+TtH@;;DaqnO{p>Q#vrFOq)|N{80H2r11@ z+RjVPzAxA-H^W&t7~9o3A#bY?<8(|lFQQ06!w zp5dOl41e}ELD1q6off~61UGPx7QUh2==~c#7p-pY6bo=a*+z~}B41IH zKS9}nIOxiNTKIq{L^sXe49zhR1NF9#?TE!gLX89}#Zlr!9*l$5-K@x!+ao zy1c7jf{echtpT>EfiYG4&svf_{R~DB%4WOk1a0a$+ycN(u#D3gbc#YLw#Z;|fl%$1 zTXu9=e3cuKVL6bGhmIiW=a%>(b^EZ5j2zL#llHI+p8Oq~t>SmHpqTbUo0`oBVAD$E z!W2cSuUU`m+QT7Y*QM3MsLkQ@&sG|Fo(0;W+v9{x-2En7`N3(sinWK^lvyY(cLh6O zM}(xC?ihe8`*S@sfG%Q;9WH}PT{*nmlip3QT8@nJBz=)S1DBLCY&k6rieNjQZ4N8k+lCvYyHZz6xer=ZvuZbn0V9(|#w7Ccy1nwE^RgUCP7 z#rZ;D7Sy-YFW|HQKvRFT2}af5P}z;{y-f%>B2{-h{*`&?sUw;`aGN$qDm_TiTX_u{ zXovS1RtftiaPTi2^uKTtA`YzovT{Rk0+tN>X8tpSAOB`BkGu*0kFOEG61i&Ju>P|{ zhBr+09u(w=-aqPc-Q{pj% z7izd#I<(xxMOpcVv^;$k8|b$M0N9~1j(5ybk5w-@os(m}$PS5mD!*x)$F~U1b2;?A zaA((~Xg5wlYF>;Gk(%qc8&5NW_KehTojYFYpDVsRgLSH=E_&o_i^OM=BzL8tMBQxD zs~{58q)0b&K4#js@hZR$e77-!Wt8w9)~qhnOJhHg8{ z$9D;=JZiX{eybjKT6b#mm{wd=(-ld3Rk`!|%=E&b4taKREn}p3zlXO08gKwq7u?#{ ze;ywcLo@u9|1P)`qq1p~$YgHx#{S)Dp?|hH$fWpeOc1sPJ0vDU^_SpJ*P#upLRGp9 z#X0j;Z!HyV-ZNDq*#Tw|ugbWg(bm*Dzs_KM-k{%w{xP|&pA=wX{t$=DyW$jW7|atN z7%_`f0{w65e8h5TSJ^`~wK;vEtzeQTOuGWENM0v);*!--M}KXmB4eaTeWw_bd`bsk zVCPRMukRm5(QbY{@}+cs%4bpY1^Q#on~f3@dTgphDCLOj^%J}A9t>IkaUFvR7Bin? z@8m;ysERsLGjl09=5}EWwT}e$W7GI7Ld1%g16gDH^o;7_?g+3DYPS}12ZZfgXBX?PKgz`e0*hF(LqVp z!{bN*z3F#f%p9DQZ>qh(zrVlnUTs5C@yi1vvZnnoZtu{Xmqr(A*R{=9ebpyqWo`a? zp7Tp^HieU(`UIV}(ba2oKs60D3{9dS5BNOK%HCPQUOwCT{o+pjxsA%@J2NI$s6s}_v}u%>m@YMjG0nAexwqDQ<1ox=mYoLrVgDt znYm`b|87i2ZoMRi{aM={;1ohD@vDvLBc|)7K8mN(PZ8B&Xsw!|S;=0+7T~~_v{xMc z^>m3jXDDOkV9;Wu)V?NnNlw|2I90#ey&^CtA{w!0$PZ?5+@)=x(;t4r7o zJ0GHqLw?=_tZNCy)X?#@=~#0 zGDENMelp(dfEm?=pcl{{F{+sMg$dW3#l=kdt6X47%LmJMvFD_Pq{X|ZJNbNw{c$(W z3SwLJNv2(?n4mm=-`Msvvqm)AGLrdZsiB=WIj4q*YgTM}`vScNRa z-pfGPLo-yi{<2*yPn-+1uUxp=_8Vc9j{A7le88wn#>~<;ziFHr4@oTF#45E$hC#d}TnIDBm{eAbhG0!Fbo^oa7`4B%;Cf;{A~uCG#R)SNHl0J#>OT zI2BNp>X}xXg5E#DnOBv?`Rfv>)^|vtrbv9ni-&DC zlhbXcf+{uJ+0*Av>o$cZpT2klD<+a-rU|J2!CX7B_jrdk?Y z(h$u%u-|E*{VazgI#xfv1p9naw%5yi>&R|Z1rX%|XoLsQ{!zp6S1M&^l9}^=tb+bU z1VzvY^8XR_;JyfADulQ8AFC(;za~V=#vAyLTNoiSQqxZTuNT-cd3lwboB6-G%nGAe zup4nFp~ATTlsjFn#^d(s} zjrGt$q#YTlgF|sc3A@sI5lU%r>Q;t@AC)+yt>D4jH;?T=18zL$yVYG9OW>`7~fpx)>gNgGhzMqesF`cOJ*N0I3idcNka@Wq6rLbc<+m<7sqairfGg&6!OB92 z`uMxvIbYyS+=`s5c&G$R;^lgom`^O_l}wjDPyD4HM?y!0l~pm7zBEpd;BGZuMZbn= zGs4lS>x#ARGs{rR^&Z&_`|tzq0ELSLbAP%7IA8w($@5O8NW7n?WADq1PQK+GVHmMY zFE!AIN}9H~fNodD=Vv+kv7GP?Tg6!m-u_TiUKEe_Rl{3{+Y|HVui(_s!01!#gT3FZ z2sTX!Axri%?U5oZ-PgV=x4mB<6Y3)q0aW1KtTmWqoj$?&ypQ2Vzgy!@r&TPZ9vCyk zP}ySg*PMo~v~Tr||0deq1z&`^AftR2wK3AE<3s{n0}ZBfu-mD7%O~8LdbcV$V~9He zz59Njrv@^(2XKoQ;;pG7C^sR9$;qa(-NPiyNZ64_RPgc3ck^Y5DWX&?nqmT{AM4zxG=e_^dn#T31YK8U#9I-v} zWU_`#r?-RSn8K0Ej$Qe}@^b8{Xi4Tg{SvsF;|BO|^L)R_&0_qmUXj|g z=8|+J_$V+vhzgJrR4;lPgL?WrGCEQICNl!%=g$0IZL)~{V=UwRB4Ya!$;0QAvuH9S zoFSspfu6QEZD=V!GU*Nv!D2sxAwMwbFI#G71fw>3YRQZL+c`^Cbgnok?(tqwKn*63 z)3~SwB|2IF?iM>eV9j5rJVu8hwYuSGh2Ka?v1q}EU^)afFb_4e(R#`Jx-;G!!obd<-Y`unhu_Tg zN6BCpt96Jxe>I+pvRAb|@S2kZD16{~%NV-AcYw&OnTVQKrj(DpS<)xW*dy^d3E}<< z(PP9&72)v!O=MO>V%x&X{$3aWu+rq@5*9Fi>EE*v=4!pXH|zPA=3K;)lH$CzO1)yM z#s2-#oj7#xL!I95Trt+ct7xGTlGr_;2$6D#BxteD_rrdoe%_?nommo?%%I!CVW5$?8qm0dVf=-K+ z<`pAbDg8JJxUX{X23xI%oa=I;B$z6X{yY#0CGOgp|HiW&snZuMZ>vBwj-zkE6vBtF znyxD0nr`{eaa4B%+L5PUx<*5hZvIXPDjDyYk z<$SX4x6SP0H%>{CdPnL+Fuvhk)(pP;Wr&EM9zkXdg`zX2*n&UZ_i{BE13m*-DW0Du z9m3wYpP4v;B5KLu?J${Z2I{hL9Y^x zEDzb>{ifkcTPdUNfGSb2>2V-L%6_or0Go zo2@6!2L~;q@Xy@|kEM=zT-A02Ug=t*`64A*Bm!8B+MZyt5-P z_r6tSeaGt%DOXks^R-um*3^!y)gh>mkn9;F1EPJ^1rcHM!^-qkCJdvJ{(4X+xi)9r zYnl_GE8IggpHGU~hm+rqwAIdDGpOjW{suS7*8O80nXA`uoyiR+rHfxP{#6(3HdGOb z>g|g!C=_DyI52YKj$Qa+{|PYO-6)^_9G>AEXmE|mxP|hM<`J)Y`B_)OR6}=R=Xg+R< z=9eu`}2nN?llFBVKxjMK&hYsbZHqWv;$6{ z5c|v61q{5_5rfU|2gU^^hnofJqFci|6KD?IxVB$LP!6XN^4BooEaZw(fN*7usjPTe zjtc0f(ex(#&%9b80N=h*ejHlFX*eiS=EDa_=$Sk9(L$BwB`5PI+wudp;Tp~#p9(xF zk(bc;i+;gB%h7`IUVCA@*5;7-1;AtVp8J48zrh!vF4iA_`f0Ip;N(ioe=sY5su;~L zzy@uz6&`U-S>3U{Q>v8_9ZSftBKQOS{SW1BZef;|l)b1cu}lK_yy~g@9_FgsR3W{A z1d~n67!vXlVKYfwq|ay4(BSuor&ja3#%1C(hcRtN=jt^Z55Xbi1+C8L$Kh>??q5In z;uAi&;Z`-3Tlng~*%$LpQE=REb8X~EaIPp&(`t>?-6~j-ePQL4jl~99NBEe|<$E`& zk<)c>MyJ%|AJ+hgOar^V6r8)^n3P`27L_vSJLvN*h!|Hm`g6wPJN}L^P!T;?5hfgJCldYuNlaj zGfprh955tgfZ_xj=JIlKjJSXMt|*b;zv^m@c<)uKQw>MNO7EBa#e4AupI)opAdk6Z zvtVD`xCn+9k~xUtvHE4-)jfh-35e%?Bh7m{0b`g|vqSwt)SEcsUUL~FNfBobE-)n2 zFCB}pXnDWI34##yOUB|ON0{0=gyxm_7dK8TYGWM5L*k2e@8a0l+!M)ei2v1dg{&Ct h=)`wPgmync>;*`1#e^lt{{0nIQdACDBW&>F{{gi+8|451 literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_axNone.png b/doc/source/_static/style/bg_axNone.png new file mode 100644 index 0000000000000000000000000000000000000000..8882c6f689773e9cf8a52bc479fb03b64f00e56e GIT binary patch literal 14203 zcmZv?b9AJ^xAz;{wr$(CGttDhZEIrNnb@8<>2P9qY-8dHZ{~N-d){-`z5Pe8-BtD7 zh4nmDpRT>5Rg|O=;qc%>WhB%YLVCWRCaKQ3Z9oKp;EjNWQ}7ufsiPYI{RS7hAxtlS3{NyaAia`un)%9NKoPL*23VImzPi7IfuFL zf2c*?wP!mQXDd)i55wF3wea$EfD#Vx{P^66|Fo~99R%=lpioo8b%!1w=WMqha2TTA z?SX+|XH6QMSm&dMZyCq_o^XmaFymU-)Z*Zx>~~B;@5o0`%~Oy%A%mX}k$l)^7ZAhB zqSelp&m=e^%jV7}t;)*7!D}4T*|-UkoR&fQqd(z{4uBjqQk{VXQ=Y?9Xa8toViHfC zF=2X0$-+77S+p#zfcEJA6-hG2Xbe0?T!N~9eu0VPt>E_1b213x^*Y+FV?fKqK z`X~>3+Vm~RAbTRT0|r3m7_U_z7|6rBn!-M!3&$gvlSdFGh8g4V>>V-!379hp;z2N+ zOMqK2Dhu?tej*-lJAX5&~qb@4XCF9noJPpehe>U3sRUtQH&_Kaq+4|EHjWSajr!21JJ*s11cC8@XjPe z6k%Lq{DpoMD0JcRV(zmuD*SGsU2uqEo<*!yXg=WnA*w96=@Dth_Nx$W!TLsqJQxO$ zt~+i#_|w5p{gh6WegqD|%>667>o+uSVEIy@7_gG!l|?^PxM;XFNa*m=g3t=NN^VsE z6>=KnRp3jpUD0@wJre#BrgIt`g!W-=(!M1_vkY#0KI}f+T^RbY4x&$mj>V(1>L>L$ za8j_QL5)K!EJ-*>7K~Kb>OU$+I82$09ZacMt{b%(5UWF5M|@068e~@yS2tIOR;lc; zJy7`L^G0@^Zh63S!I@A-ht2m~oFkn^osXTxJ771U1z_uAxJP~WO8yAEdHAvR5(Od& zhXuwIP|BdRz<7j-faNsgtB6#PxuH0qP{A}y$QC2b1>?vmkU5a2qu@tpL?K5x%2Fir zjiw`8VP7MleuZlddHG8uJ`?q{WEg0%V~nj+b5(Evho9 z5vdAAsRDiga7^qP8BNSh7)``Y>W5+XboZF{ssYV_Sb%cuHcB9^XyR9=yACRuFjml;P!x@qd<^!6$ z7#@8s`PA3IrZa>{H<<+~$tjx^sJ>g|DP;ZD$x_9ODOWU|RkMO@QExE@QtO!OV6SU_`5Y`Z(KcTNt(XMuq*i9akm$a7j zl}tCu-=hWi$~7eSsx{0*m6^)<%PmaMO&Co09g0t2rx{kwYUFArYs6?wS1MGgX%cF7 zR!LSJYZfe+pD^8H_`vo=3YC?Y9@SE5@K-I@b5+-s5S91p_sG5%L8%T&k&Bkdl<1kt zscSA6x|Udk9tkprq{n?LUKXu2sKvD>yo9;T1J(j(+z#Au_}2Ik`HW|tE!wXJzfB)D8yC)jLQ@Yu_7 zUvV9|9XJEbi>=Y@n`TN^4yr55tYfUxPW{GkEeI_fe?Ls=9GM@rGtgk=W9cyf z84Q@&wXymx#+vxkHRJ;)^U^od4Qpy@A-=_ZOIgldZd^WGCg+;x66N}3U$<&vr*GfB z>H!RBX>OWq;WvN!*^}ho{jLG zb5XPV+oR4S;H>yK`M_o4Fs-_>y1#YJePn9dK1DBse`0e|aHeNle1iOU$>M&}&o1iG z<^k7<$$8sQtqksw^%dWOCm8Xp0h2}6a=#jXuD|!s$^PPg^8UoHr?0SYd~e2q#GmgU z$Y3pCUZAzXB#EdU!Z*f=N-93H%=8WPA>UGKz0mBix>-B8!|v#*U=tGNSACv@JW3 zJIHT6o_j1@hOcj5My<{zmE<;aTE6kuU*ov|Tp&Bi`7b@c`6YbhzxsbN#p*|T_)nQ0 z>rZk|F;9PX=d`{8ku8K%-&oqIjgCb5`g!2L|PqWAK!}IIrb>@nPa)-YrERmX#kqMoJ=CzBy zCmH@ZmA*>wjQ1h5uUxES5Q-2?cfXL;^wPXm<`Q1gq}Mps3as2+`mqeP^0=t8vb03M zgw4^9I2bX$&pHaN)KOq%%V8q`l(3x;hE;-77W(9UJl<9B{n@3wp6eh`;$GgJ1tg${d_KuP!TqiV-7{2BIc|DHKrB0;=C~Foh z3$h0;^PckZ-uNBfMPt8VH#56_Ccc%trCn9C2l9Qcabxmw{?Wbhxth9azjd_TV)R`R zB;04vHOow)}vAzYEtjli)NF`aRX72?eqO+K)Z3v2n56*~zRa)9eX)B4;>zco!jg z|0?@i;GxnE>eJ>EL`WC%mXIHr<%?i*tu?gWv=tTjOdai+jLjTP%$dCGoxXq;1Vq4# z@5{6|cQYpOvbS?^z5#zm7ALrA2YM3rzevqJCmb}B{M58FE29-8#5al<5vqtS8oS5 zV=qPrSMvXI@_+nDn7f*~SUb5{J35g3^J{G4=h5xJKzl8$K|Hl8n=JQ{R^`FwO)e?deVE&(#7lO0Ejy(nep*xk85Y_Mkz0`;O zt_i^VTwmyjf)l{TYIcHU!-kMYR76lLXwQHzpP}R&WL)_^h~O7CQ-)A0eGrp{ak2p2 zi9rQt4pn(b`CUXcRRWchXlI17<+YdhnCL1#W3oq#?e0~7I?K_b_wSJ~`{!L=XGRtj z9o$YZc|Yn7%pw|C!lGJFw{AR$bPzcSNxy6qGQ^i%_ziW3LZkZTZh{w#08~lL55Wn7 z2dp&)gt|yKyg+OEOX`GHWrIMIMnkI#=2<%S)R^w9L7rRC;P0-j&}@q1o@$KV`jI-XOFTIH=wWrk&ahE!Z|C zSO;10Q+~9_4{m7i=Jm3q$r>v8<69_u)G4eeSE_Ezw;8UXQ{5j|IRaQUZWpM<{Dwym z=JL1Ib|**Hp`{ySr(Xgfnx?KCDEFjQalN})JlyfDSdqdE)h!|Xl=in}8epIw`ALad zP-)aD6`)=(QztB4O6f>hl`t>JN5J=+HLCU)G{yXK=@@@$msO3ajqCIxQnb5Q(5L_y z;4~Ta(dZV(FW?PYG+I;5X#bkG)#U;6%5RJKj;XOS>*)EF`Wbaq6@6hwjph*xj5HPO za_N}3y3p?E+^wI&t*hPCViw3D@zHPHhdC3pfsgctv8ML2*+!uzW#6Mroqog=Y3Fun zL@_Hee#4|u({PTquG)iR|E+l}izC~C6p!v^oza95$>z?lUT15&(A5qCZH0NAmxa{8=xcyJu#FY(IU1UWoEo#>>rzy4BERTwo{5~)SMl9EKS8)kuZWz8%GQeV8TD(A4M{AwMZXRnMqmK&}h>)4n79`M3!#vWu;(Ewy zLUJ&oAHB?mMnvV+4T?tslTetIe3q9_G|0ZQJGXRPnvJV1Qx#Y>RcK?@UeQ+uV$x^X zyJ+e2GjAbdG%g&+ngFLTguaq11%j0(#loR%VG#<+n_^fg%9<4kqz3nRryH;+YMcw7q+4XRwzj z<%Sl@y&ct!R1Up!-ST7^87H2)pHb*ms=0Ea%5qWyA{2s*&2-V$)_!1!3RITh$yB-w z| zNKKo=q3(}6SJn81^Eg0U*S_#Lq0Qry&zCMz7wyqQR)x@WU@TshC)Qo1_G|xfXRrmw z7vONdZ65x)zJVclxf~^zB25u?7kvj9XJ^yEB*d%?&`hbu;9jcd;h@##A!bGz>#!rG z@^R*{idnjZ-K-6mqEkYp>iixVR>rqhs!=)Y(ZmO_RJGSE0cNS=O>0<@sI0)b&%8_1 z9nWgA_!lLsX%Ew!PLfWM&Ii$ePb=36A@H&{QAWnDV1kDD(gh9Zy|Ka zyFUZcOwA!?xi4MRzf4Urd9T6&5O1keVV) z82(IZXfSbaPlyEN@NaDHAz(kZzUY!}8vl<(Hzhcmp!8@dwh^P0?fW?|r87u3K zLI(io{`KE5GR*p>19*3Y8Vw%d^JO8kr|gk20!34zN$N4B@&h**EJruL@eUr3gIq2a zal1FN)@^I3z!;;l^ibZu!97Uq1znO)%(h{QZz#M}5}FBarL&esdrvFbR~Nd;Qy{k< zrNUEOtP&zTt#>f0P3w~pB89AU4EGf+KGn*qCYgOVMMaXMs4Y|Nv4Y}bQ^9Kk%+u@0 ztmOp-Az?QdJ@P!aAq-){j@s<>%gVI-1|bC(hFcq5EAS-1iN75e7^xW=8ngUXPAzd! zX>#Gn(vcBJ4O=W=0;vNf@hp%+#^)=7xhPJ_F>kdoYW37mmdjGtdlt9`KU8+k8Q~psc<^&Pz*OL zz_LNq(TZ%4WxMgG8*hYVtxCB1d_-_aAgw4USaJc)?O9Ej;_~C*X9MYS!q{RK<{2Qp z7hyVd_7Gvprv5Hqg{(YklQf}akU4t%kk3|8 zIo(3uU!TbzrjQsWhwND9wWxtoYn3yi;%1zDgsYTFK$4#pFz=q$y(gWZ|44mvPTcGX z(!i^@9Ux>RC#}UZSCrtBeg6w~(7Z=O3H!czLJcM;gi2+%_wi$5eC$PRSz>`Z&E_7c z)VTwUQ+uLw+O0!SN@&b|k@0IwnNPfS2)JQpTVZG%b`sWspT-?GP5{8ZTj5f^Y@p)PK9hBF z7+~n^4Ag#Exj1)x@!9HHSl3qMOsag9)y}PpCfaNYjBm4D|L*Bz)&G0G!}=}$Kd2|5 z4e^{k8w$u-8Tq2O6Vk7j2N{Lv5XK3Dhyg4&jEG^Yo9m1GM3BF5&nM`H=8Fe!j8Ivg zDdE?tLZJV{heq@e_kHl}Bt`4(j_Xwpbz1PCE3(A4fU}v)Mo+_f;kYH+N>}wBL+rD% zl@*#m!s>d#ne{2u3i5xjNKQB~^#z<-5InF;EBVKlM=*=1|Lmmr9SbLjGy1PFoY~IY zdALHH!GO8vgq@7#&FhtWDp!)@xUlnp5d`CrsBp{5WJNyg6Yk7*^KR&84-E_;ud~aC zTclfD4fUFaV{z(u+rSe&kLlrPnLa^$G}|?4d+4)pA*dYRokTbaYn;D1aahRn*)wuG zm_L;D5%vGxeP{K))keVIg1h!lGp*QHLK4bbfXg?y&uWJ1^jy@jk$12%o}ye~YxFZ> zZ*g;sV08rSnRKG2t;8Gstv1OIYUd)6egRBfvTL(zrncw4{0U9LXH_*c6N1$j{1LSq zAOVAPgWn4S&8%g*e753K@SIh9$AX-rs^Wux;VBWaU zH=-oQ%_Cf>=JL)*?yO|t>7N3Y(3YPcVJ{G_exzq8?-5@Uir9F=GvqTz!k;4IaKQ*m z^#)fm5-*XfDwfF)9h_`F(@}N*LZolDc5T#xDr6@>;XmdT@IDrx>dpu&<)4N@bpz_w zHCC5XE{snk8+AQhXefI?$@GoFd$?C2^-tr|Q61L0*j8Ja@c6edM@`1_tINGM7 zw99V3M3z4Z2Fu|8;vBP}Z)y=NF6hP}|LTcC2Yq$EvO_w0cL()RF4I4v@#DPK3G;o+ zIQYAIhWgTf-~)vhmn|z+roC1t{Eg}tnE)F>-MDI^uhalr;BW>tkICWEZ9@rWVAk=52?C7MX2Q7Y zM;FCz!WR>?Ki|{RR&je|YNhwxi)d1!v=(=FP_$3uH1T|&5XeTPN$3Pr82E+Y*4yi9 zS}Z4%Y{(AIVz^5EhPuRMCi!D&4Gp32)`}>{CmhrE?WVnRDUi|xb*+PsMk?{aX!n<= zXnB@uSk*lJLt$-^c5_p(->fEkX1K1zZMNz&Z~ritEL?C?4p!yC4^2v1bm{WpJP zm4Y26<O=3)A9qj_z`VTax;4bfK=#SaXkH>5fG*v@?0=` zcV@ruVR1^7CO=P6@_*4tIml4dFs%G3wPWOFX$FR<9RzrQ5eGaXVE6}zZi744yyLop z6LHT{yvFJ-s!%x4irbHuz0|tcpM*yp0Qy@)wR@jU{W_>|uPF^fkqFnl$shJeTea$k zwa%>z&S-tF>tLhD{)K zTDG9EVxw>&o3I}dkMp968iVyfh8*-gZeJTqiWL)<;%?5pH5J6S0PFX`y%4->vxasuZ=J9cgHBbd%mDMLTf8C{ON=ss9ksq zJTx5k=vzZ8`O`NTIm3t);}3VD@>hgYV1r?_KKtnsbq3e|t2(5ZZX6>|#k&$f zVIgO~Nh(>SOYSjae5G5M5Q$V~87XI&9{#;kp$GeP2 z@av4IGmT9n`%p%yt3DTRAS&OCyr;soD`G3` zIAKXLGo7W~6A8r>|y*;7-xY!eJ_^QGHV{$uLsrs~yQ=Pj!T>HLAE4RD=V=tIaYwJF%|ke&Gi#;Ihv( zZ`>Dg{IQO*0kVnPfaz!O(s~?qd%6sUn%xIx{Y0!+A|K?yWTaf%ddoZMzr$}o23Y?(T1Ls9-k5&pxE^S_pyU|9**ts4@dIfY1YoXwq1=5pi48|mA3)ColZ1|Iuk@%P~Hn;{eG>#S|ki91QNF05R{n+mb>@m*Opgr-c~dY(ni1yyfwdAE(n;)h^4 zX3!}M(w}}*v}+sH$pIX+zX}kK=+uv!H@^`fj{Dlg;D-Zzm^x@?s5_q-(h%Cdc3A1| zWixu@M~(JhGYhS`Y-_mV8hS35%X;?o$OFITmhC?LdA+4QTI>pm?HE_;>cjQaFKK>L zkyfU%klON9Q<^_K+wh}Rtrsqbg`Ig!UODD1$((#>E}as*P-xV0k}k91QWx@V4s7tm z_ZJ`r+oa4E)CO-D{JTkYAn%v0aCky)k0-yG{jyJru1&T?;zc&4f>KmAuZSH8rFV*? zgHmS2w*lS31i@W#K?4bx_}mWP+Zh97YF_uJ<04BbmuyE~rKh#(`^8gV-?0kfHJXOlhS0Rm%li7RUnDdx!S$Sjt-dGDD?K7Er@ar5__YpopnKhdD7_PLjg~o^yW~PKWOb z*kIJIUK~VQH(hZvZ0RcPAM<=7DcgYj^s}U}mvfMFP%w7AfH@_#d!a1n8aO6po=p_=>Cy<%2kLi*4WG%kB`o-42eLUR`5}xkHvV34N?r#6k@!wi@ zek2LEOzw@F)fn6e7-)CuS%&u19|^?lO99LQ^*y@-EFYR z>%)IYU*}*uCqI^>x8=HCiCHGiB?-^*C%Fh=r;sHiD|vQ5rp6s6V_OVMsTarG3OhZt z<98?^EXwflid!K2YC84exhv^!k{Lh$85K4{rfmfw!$&IYg2UH+ULW9kf_LBbBs z00A9Ax@(6E zCK%ILlf=&z#}N8`LN6Kam}nm*D>ppscr)=OnB6ZgPo*YXwG*{Bb?KmW$G0UX1&NX! za;otUW^rjULoPeziaNzPC&DDZmeEL;eND`PN!?Z#9d32&8; z3!&~^+WFJ4f7#{zr3TGX`a_{0{Am;vzT^kNUvf>gDQ9ZT>bUBpD=uTGbJn(*TX8(I z78FD>#PBV8jqwE7F%fB=@eLGR)rh-M@!fC9NZWN++A4>-NeB*DMoYdI)nl1k;A79I zLZYlu*xh9Pso$&i#`_*n3=VzzxI(d7Eq8Ez;&SdBb#ns-hv$UcBv8&3<3?+Q;Kp=a z#vAYq@H^WE{Q|i;)VBtLQ?wfBvXGvv7jcwrdu}s-?=OmI^Urs4HuE6{?xr%v;>vL`aH7HQIgCaw&5%{qS1_zAB~7TsloteVwE6*;x-TXEwVk&*Jxo| zci--e=>CO;(8)2Ldh;}GFg4faLh#=4+04o}_uT?ns&19e532VUvE}t9AcIw}@gswM zFiBVx9k9=Tq+mhq{2j2;mX57$>7}vp4zIg_s#M=*DIydSF!*{)+#z}{McBP@Q>BHT z`CHuu-J5Q(f}UR{XL{$xoL*MW3=60opk6WE6?igM5Dt|7D8=Kw9o2|CBMXpq)%3P$skd=mr6sKu)deesC7~6vQ=f)l=l85!9 zgkQV@zNa4( z&`K|0GtXY8UM2jQjr7>oYb`f)Ve2$Kp2*lU`j`fXRF2bw1F|zok@o~;XuSSyk@0V| zA@+wh>I~f)KFodkqGG||Qf>~hOLAIcKoG6x730NLz5+c^rRgL5%G`` zcbByJZbzTITQUJn2*Vj)OJhgB!a8a;ywif1!W$0pg6EqmsMy4lYEMkxai@9wnNmK_ zDt=Kx^7vwXk3%ZQmME`S=5j>9zG$Xhp|J<5e`-bSDSd#A0Xtk;xw!12sCeOUA4S>) z)@+(ksIwsLv~NKvh)ZitH$(1Z4TSmprBm0;G9^I2KH;v|0{ljFL8v>#(v`MBZE;@W zuW-7;Rn1E%TSizVD+ZMO>#vC7bwi^Aj zpf&W5A|k#hV$x~9>>owY|Dy;w5f{#X;9>p`JP^1IV*a56+CT7sp!olj7~Plx_&ray z>%ALz!7x`&ggMWRqj1-Hh}v}RZj&wsebzlLBkt-?TIY72zWj>RlJ^^q+VsG8fD#11 z#C^q`Up7Cx9|(eVzfeh~))SOFh+L)bUy!=*wV*wpT;&{Q5earKureb({g&L_rsuWu zO9+(S2~L^aor>MrStYo-y0W@D3F^POq^Y&>gsL_!{jUqc(e@s>$yzo>bh6@ayKyhkoo*=E1J=hnOV4qrN!vZ zNN_h@HyW&o&Kwnj|2gYUd21#AeSJ*u>Ip-_BR(I<5!4Ne*XSv*NgxU58 zQ6p@02hiVim-T0^5${*)Ie1CWTHAJ}8*_Fm<^mz%f!9)h9?qY;{9*C~+^d77)V;qh zJ&g)6A*h8e!*1SoR|B63+8CjaARR#Wu@$YtNhkXN$$w)TN$hA1P7@REi$~|)RCj(w zVWTWrz(0OE!Bwxn!IxlPVZ^L`VbIilUKWK$%mec)DH$s|>Es|Qhb#QZ3*v~VHM#J@ z3n#6c+4u{q9!irZbk>^3aZzeYIevO2LY>07%C>O>kFu@=>klm85GbLIW&#zK$SXyR(R7gAHwo|qdK5| zmk-Xul1pE^mdV^ROs=u4#29LT4gWh9se1o_TMr(}mFr4*PlNJ3kn+znY2+Bi6$0VR z0#5BBz`(E<#b0h_XV@NRt+5fcOK2GfTR9*6JE3Z}Cj{(hJqV*uNdoxIX_GroqH72Y zU6xy$xRVzCWSp1HlXcIZgI_-zX&dq9#h`{NVF^iYrj>8j+A(o~M75Oy2avdowzi#Z z_9J?l{--^zQF`mRbFmRc*514uuS_E!i*qCab76g}1Dhq_0w?=h&veVR_Tj5DQYEx$ zn>~H(T)jj=<#9Q4S*E_bS&x)16JNJ}<}z`4KAuNOiEPMvtKEj_@i*M!4IYj>l5EZ| zYMisZzL=IQFhf2(XW6qrVsz=bw$w6c*j1AI^%4tyj|a z6E3*;5mR#%RCrXt?$`hfT|rEUTJq1nXp%l|r-&Y4-dc#{Pd|vim4qJ@+q73yUbe*F z^6x~M4p+wHQI|{^eEbbbyews#+eCUJsf3rhd-Ve`q<$7ww7^vN1c+)B(m6bhM{hlgr>fZ--ni2Nh&hILU>}~4mb6so+=$n&-;99gtTao$ z;^>%dq15(8Z_oXWs{*cIZBqUrrbX|=6fb&odEKfW(w3HzdXw~s6}yUujT|lOWjV0= z<+qFJUvV=|l!D>kAU=7w*I~W>e5AWAr|n7C=}3{Uw`DhrCbWEYiU@88$mR|pI518pgA$UJclJ%GsUKD1~u==fT|(Z4Vg;|Jp7!bVpIv{vk9a3g+c38MwX5vOPBV2YPe zHnyE>h{w27Vmqw~^Um5y3GeH|BVDJ)W4|m*SC@Lcd_Rp8eCKqpDw6nWXxI|MthlYU zl$m+nJrWJ{fT2pkQqM%$9oc}G*PO*!q`Cva%LP4oH4CXZvInf%5=#nAjh}ASo@r~| zDO7M0Tw9wxJ4>guUS1F4d&?3Cw}sY+Y#0W$oZ)8k&D)u|2I$*?-GW=I&Uj4v(4>Cn zc4Q=`MEPowV!rPFaLV3uqmD>uo;4BBz7seL^{hiRzl7AW_72v;gNI~K-qd({N$SVg zof2=dmo1N=Ta4a8eP-!_8_f6EKOJ}Ml%RUYu&a4dT*GuXahQ}6&Im7u_73F6?HIVL zTP@6rBNs9A>=^3kgDVA|6jEDTzmFQwOO73ZLF_p8NSas!i}>H15c}{+i*g-{gI1^q z#reMSzu~6c%V-$_5F1*%n|iApe^Zd&8uuoM-PO5V;NisI$F`%Q8O4w(XsphHyP8Ch zk&#rmOi6lkqTjtp8L21;^R#)zz)cT1aLSqo5C!|=Kq31KHm9vg+Iqs*ThNp}WI|?X z&kdn7xy2}!&%^e-ybNv6VE!6`C~oN5Zke;)6RI)+6j$C{U&XNV7fUoHb*fxuX)||+ z%vEoXJkd(wZ?E$v2TBM?p6h&oew>{8UWZ`@Hf(Z{=+C6t!u z(eF-J3Ce(l0P48(9@!6Q=q+8KQ?7jaCzEVmYE%OHURv0?L}K0YMO z=KKLgfK`VQsexaeMQZA3^{fSCm1aZ-`kdhrO!C7=Nd|6B;n=kcH6ql)U;8~=#C*F| zGv_vheYxY@$?nrQ1fna3CvL9F+ss_MLF6LY5x zBwv4%pF`Qwl|GnNB{segguh!N!dHnp2@O&#;U%hnlOp(eh^gWsGrJ4QRcV#8E_N88 zs~v;Z=g3r6cA$DI-!tk6z2j&|W2Ur|{7r-Y3X6dqRvMnbRiUy&JN^Tmx=jIF`@=u4 z*N0xIN~^6DnRNe-oD zSI}Q&`yW+rOM^HHOBbg9vnL7rtB+w8J^w}PpHTbPXHYB5ArJznH6|Z#&c5>q{UT1KtPGU%K9)+U;kOP#yKD$aK%<)V#+dN zV#LbM4(3+2W*{Ka5y>gg>gqC>p=X^_5eqY>md;a&6vLEqP%2h!`BtLy?CiAWW{wb2 z7^tYqlBVHsv9kE;;uzpTKjDr*anu)_n2Bj=U*n6@R|VR=A9Gx`U#EP5_M5-8T&Ivh zUNDoUl>#V0qz}#D0z36i6I5ma8xTK1a7T!d*mQ7wQuBy~ey;4jUOoC@k4m1G%=%RM ze|{J$BogHIf`Pz^a>r)(%1S&V>}At;5u<}(5JlKjP999Vp!Usy6^R7f9H%$*WZ&Tf zd%(AyMT0g`J_tbg(?>BJL5LMdoV+ljf)+@~Dj-YvIa9-G+4^ED#VPT2t6=a-N=hd0 zoPu5VKUBl+fEi8&8S<1;1MoI4<{s|$P(mTCAD^v9gg2O;}@s7%9xDb3=lv3=At zGK&40HfDTCO2_`wxnNOL2JP0-9!5O{kA8T^rZ4gELL*Y}_Kd-#!cRsg&d!O>V=HG8 zB>tK-a1Z<4vgupgJ*#O|N7v;z6KKEfzjVaUK{xc+(C9 zH{^E_sumE&ptBz!8&FTZ)M+40Js2Ly<|HtEA{gOtqhi0~F-<|z#W>^14nSW-dX+IS z;GKvG$%8pX`SN_qP-sJ9MO|m6mH7amZE%R9?)fa1XkOsHfhx>6DWS(|t8U^$YY7_bsz<@wRdoYY+E#I(4{Khg3y z3vZPN%VgEbeuFPYwMF1cbc*|mo6M@S6W9eeOL-Ue&(H&Sz1Y0C+A#E^>_wjP914bK z)J|%!;Ur;Ae%AFfGbdmpnKMvgsYRC$v70a&*_%)@U)5>TBUS`84SAUq*2=6Pu57OK zuTa`zxuNjIW)JN;-g1Lyfit2E518#aJB2w8I~_ZTwZLvb^TXCeat(X$72fc_x%sek z5&9zu1^Y+lQb?mT!ng$sgJss{C<~X70#NKxC}A4JWeSjH1F&V~N$tr}Q1BvB!;!-s zWXKbFM^hRD2Po_)lH(Nvz~8H~k@YX)HVzV9*aRSY%^Mhz-OZKL?ph{VsNI41p& zwN0T;vP{KJNJ}Y_&5(hTEs?!UsZ8yleUXQwDrPt&8B7^V;H7j`vCfJtuvXqIqyK|s z{p|tGRTP)5ifr<0U{k4rB%4eE6l4?)@|1e!+4AWVI_WC7ktGTyGpd%5jcScXYg9UB zI#{b7tNQgZ^&!>3W;n2@?K-3eW#vUY`|_- zsu~0ut-mFHA8X{!o1HM;V|c-Kh6xsz6dhGjs`LF`s^P4tE+j1J((jac&xca!mn0J@ zlrGdYkyX=}H*_g94>}TH2uz95ELak$GN{6_Be;OM$X=^jn+6;Juz6Q`5qXgTG;NA) zr)|k?!24P+POnj~um`AnBVW09@psmDJgC}$WYFcE+TJ{pp_!$43vDZIV{uMrmN8aq zW?Z%soL3wNE_)6?vjQtLyZY&(<%5dyVyj52s8odD%n>J?m`A9uqin^Ttcy`KWieY7C6@+GtX3|o|Qr*(w5*g6v7vsvkK+RuO9%#0h9V{(e!MOF5MS=&uWj2-~ zC-2Og%tqSoEgl~79{SJn!QSFV1C~WH2PGqCDsU9JPT8C+6>YVh^ zv_s6t=xrV%EVwV&0v~$HZ_2bl&Dd#At{M;c>y>wuubkA1l83pK(vy*bw3y~wE^g-9 zRMH;Bo!6ZD+)^>(E#hQ^0YA%hw&DzC4o{;qpp9sO`+eI>F@ z|8kB2RckD;+M)B~ul`xCxwrXeBWPzpQ@(fM+K?UIFlVbRZyU}UKCsn0YA7X(0bRGV zdC8v4UT*90%x(T6WPST0d}TJFFsp&X;*GE73fFni8M2j(@4{WvC+;KX)%TMzN*rL&{6|26Az5mOe^94q$fX?t5$ zoN0{C%l*e>K*ivABpnhOG8}Sj%5!of!5n|%)6DVQz}&i7wVA@9>|y(u1yTbtGJ%ue zoOZrmg5k}n)McD|tQUb@`9d|lV5mTf>$!}EhsKo>r_iDXo%*qsfBEiW^b*wa8u>l!AByNYWVGJMOqc|DHEqWVQO zsHBm*B*5ms#B<8SbM13@7lHML)xhNP8UI%JmV8;k=Fj`N%Jq$helCQ7{k(Ymhu~k~PrT$$@0}zt5Q>@EIR?0iOf#xPLQsK$Z6qgjX~{-cVwkA)FY3 zM@PI3ph9@TSNcQ`J{zMU;O|1dn~HN73rLVa3)f(U+xyd~g6X8t1BEGuuZe>lqmikDu^FR>o#PkK zf`IUQ@O~BT%m7Bj9(K0&F1#KBr2nzt{VM;fW+Em2j|sp=fK*#SnOMxh*^HQjk)4s5 zR1l7sn3&($)SOpUT=IY1zpeyGEdc;WUM41YcXvj2Hbw_$3nmsG9v&uURwh@QdTf3>{IRvu=y+TvDrX7(;$eF(C0bMgPj{{PkTe|!92 zo;v^6lZWkpJN~bR|8eAJ`ZxanHJ|@3)_-chR!b0$pXvXsydc~ftN;cG2z7;wxQMz3 z=!Gw$p1Q{0Pb^TJKCq94$deSy2+c^bsy0*O7PHujBW$}r8RLoHzvId_2M-oJyJ(Ga z&P@P0?0WTI*t#f67}PP}7OcbNZzu-mK)7^-B|^Ebu6UlN2rdy^j684*&aC_WU>qU1!7ieOfo@+r$uR1_YjKE{>s|N&vL4hEl&|6`%8F+x zK4>fmmY}swkEy6mI&7H+-8Ea`G8$p(xH`05nJnzq@$Bl?{mzHdNqR1RVgW%mao_v z=jL7`4Ut-z;S%tI>cRB&{JC-&HjZzpK-vt2r{=Y9*`?LGOO~ULg=l&WCpEhDyFf*& zGM%YhX{-L8j8FAM-K3R9Td@h#TDy4GG?!q?lJY8+&EWm7m{v5Q)CQEkrS;!rE0rcL zLXa;=NsvQ z24w)ZEA~rU5jkH*R*c_I$+@I^%|k2lelWw-)_7~wM)S=WBxdWl@JBJt#aZyK)s@b& zE{WoV^GwOx>J$&T<|H=&3v-bh-z)spRssVJxQz^c6c1sDIP8)s zGS`QQ;PJ)=o2%O<){fBg7i!K88I-3!7sur2ki#Ui8!$8m3i8TgFxt{Ptw$qz&~Kni zJv0V(oG4-^(jPXV$^<4LCPA3u@0)0P1^tadGu=g3F|3(ukvw3dP2Sa^En%tj(^I4?TIm&J%P>l%#+Y-Hk6W4p4~5`;k3`19mGb)CxVoyn zY?_egiZYBDm(49>YBE<%5-S_Ranzc4yxfv_Q-WSEM;GEN_YfHC!LQ}1D2+dSL_bSf z)HF+@o+)|=OF|=xi#(*_x#T7;wmWZ&KY^as`fHb|sZzP?Y+G<@Mee73EaAn3w9`psyL=JIY)a{u2{mPuCeV!0vRcuW4L)1PrtE{#YMx`KP<3Ts?b|79EXuu{4E6jFCW{D$ z6;|fNNnH)?N7&O*b9r2tT+E#wZVnUF;b7-tl|B#6NXNZEmG4{y%}f0F zE!QfQ$ewI#aJh2@qWQJqDL+-Y)2YO|6?omUS9&lJ?b6_>ib{0=B?X7gkz<*XXb{H; zQAG;qs8p3QimXJVfWVvIFVte00U<8sbE5L2SZaw&l2OGA{5dTGNBinlB`vM4ntfWA zNp_MV@k673Q8F=Qr)Q3Y`u~g{?hQDrR)cQ`7kZo)0lqJl8Mb= z$IdA<)*2rY$1d30MNC{JywOMwIP>JNiMJqE(8!UC`st1$6}2P5HmsQS%UniEL>~eV zuPWE4@R@jCQhKc{7NY7~H>(o4`oNDP-+3-bQMCSPA5#-oafLBW;${eFuM=r0VXIA8 zj^kj7qt-bIdbGsE(_JdU!3-eOa|m0N2an}OQJRqjCjIirvNP%y9SCTrDz3rC=(kh% z8HhE}k-@Zx`MO|Z8}ndV=WKK{2Q2LwL?rPaxx(T&s!J_=_dCI9a^IT7OkXLVs%-$Z--5tJ659&fLVa49-L)NLl{ZD z)?5H6i?oorsNWhq3@lOH1yWPYs-nip#tch!GU!>L>yN@k0a;#RNKgC07U}X_ zt(b1<+7D|~BudlDLX9t03Bsnf50Iwk4VVx(h3RewVYw?MJBX#APILmGKr{OMQgZC-Odl{EXLM}#fGB7b2Hl10Y-kLP} zPH0Qsi2!pJ4EezUe_qnXsz12lrPY$mXdp+uD2y*`<8v9JQTKF&RHCwB3miKh5+VscU0>oA6483j= z-&NXW2xD3Hs*D&GY_>2vLB!(IvE)ETPdKlpgHxs@w*xwqf|MnxIP_3)?4vl59)fpf zUkOyz*eNll7^om^Hct5I`HjXhQ0lhXY1*DN5)ySG@V4Ug@W07%q*&KC7F9U$-U)kg z10-xba5yz_!<}M7J$W&rMp|ME)TpVzO}u6+ zK8$PzW2~R4m+N6y^(?&>relh6GV9c6o{@_;-X$>>tas+ak{c!ZRCUK(Tu_EJV>Y4l z6XaXwdA-FGMLQdJjd2@Fh?Rp2atu>U_2w~jAvuA&Q89T~2;u42f$w6CYj`z za+|N`lnc@BgBA$t!U?gLaI>~Syq#GT?m%l8V7uw4g zxF%Y+8$6DVM)Zpnru;+yLAEt$P`~4OLGC*PZZLdMyf1320}ZC)0mBb?6!{6C``DZQ z)c0k$LfjJ)he*sn`c-a32GjJ9>6_rV?)guMtdO6#? z!CAagM4{Z?wdngHXenE2{?RynAH;vuND%my5edNfu6})?;!a;!%@-p|<%yx}^p%SJ zIL>zdGSPd!=J*#L00Wk-^&EeeaclEuNsbqSVT(rUC)tcS3_x(#gg5Sitz+l#IP7PR z-}NorU~Gr8Q_rC%wBt_29wy=zz=y7b@~UZLQK$vv|%c z){!%xIO>wcyMO8bPDqPaHH}i#`R4m$(2^ap(y4e$xtDMAQRX?9qzm3DEHq+uMyT*n z`Y;km_8lmrK^VYf<3}m^+YZ^=1%j@htF2=hlQiemc7UB<#YS5hg(Jia(9|btW{Kf_ zvdN&VF4?eNC{iG4t}&TEEGs@nQI`)UKB!WZBkpO{@$Vpy_`V7JGI)WJv;Ty@d?#R& zfc|yp7klVZLP{>Ewy3N%6{(W;k&RTm9Jd3px{d39)h#Y!6Fx_FbO|h<6kBnBvX=Xr zKM<7I82!Z*$NAWSa8M|b(&UP7R{PnpUmtk4=<4w@RZh*)6^(rP2J_T?U!@H!Jh9_= z{OfiZM&kqKXvM^CMYrQc2D7-2wO|odheCVD9XyA;TX)9LvIJG|7g%R{Jb= zZ*fR&l~hJV{Haut2{BqOa;Q?=dcOaM|Hz-MZ!&6k=dmK)*0;8nJdOLwAc3U9)u z)dbK)YTv-9=*3U{b5N?_?G^FD&sP!_j;dxln+ew5aYiVrhRXqvTbP8p*fdj2{q@k& zJ9;%C$94iUXPlW-5}RhWA2UR`0`6Qs_Om}WI&@|Tc|-N)ym*5R7W)D?H7BF!IkujW zH-t7q?ad@x3`ogZafxR4x{5QD;?vwdE!p(^|Kbx$qV?DtpmY$55YPzQe=3r;p%T76 zsTzY=d-J4kUE>%TX~eSA_{f(c69>pnZ`6ZDYA(4f`-Kccj1y+^S9e6t9w>} z57#DOhanK>BfKHI=>5#Mr{9>FckjMKElEJ+zm3SU3Z;idJHV-^S`LI4Sz!ZNkf}d0 z-uuZ|*v{x_$KY=wpF}r+d(1u2r9wv$#%TP0YgTD4mO_+>*Tj6o(6zUG3{#0Eu6hgg zZnL3GQn%Up+$bpleTisuU6S#S97vwa7}2Wi)3s<4$d$q}qfP^O#;EDDS_><&rIW)O z*PDquwbcbVa|pq88NopD^GfLPI*W35@MUzR`cksLh@7*em zGEzo%m*3TWli*EVecNwJci4Zb>4xo&H}jZ0-<)rjQPN@? zoJ9PHoQN%;)3)?&G-l%GnhsjmpPs#;fm{seI(eMr)P84EUv=9^iIyug>pAK%MHOjlQ_3+TPS~b$Q%xT7meoiI?*>a-_9>Qb*EEDVm3}+^0D#?>s3|} z72U(Q?c$AoKo=D$fZz$a%5&JRgW-JX&ukGi+km7u46FUvTFB-)BGQiuHVRV_jC|oJWd(1_?W=BAE;J{7F~#gk4Fz|8;Of@@5r{_>nPke4_+m zvgu-UUGjUY?ZqPu;qz@D|;mc)nryz586bmBg=5yC*}Wm z!F-@x4Z~Fb>&mHLteZd&KQuJfXp#flZkR_lrNinhK#XltI)w^3YmMfXn*&_+l&4UJ zh-sIP_dlGV_4yR5<6%KBx4I%`3T@0Fnk>pmLRrvd9G{3@zbCVR98dUbIBDWbVv~xC z9AmnWU9a)ItfmZ&?$sDePx{)U#3Af@5n(uHW?3`AAtE77P!TtLdWYoTYL`_W06|y( zC#v|gFJ+7`*?!T&2CZoD{uT=o*=Eqf(J-0}T2R)VrL3x{73_zVSDQOVCr7_Vg*O)L zFRGj%0eX|>2uy7|Bjfq@M#hKS-*{3F}8 z^wJ?ed`!Vy9|pUfJ1Q4Eo0?=#c}ILaHRyS`6G7^l4)yc?*XlaJ zXG6~1uS$lYC9R{m1bXZ(L0d`8y&kI+P@gt>5EB2=VaWN#Kq-^N*v7J=A19|<&#cctTIP>N71nl_D$Om z<>68?aXO;~hsp-? z)_KDjFCNTe8zS}=fjdJ4CdvvQt7$uB1OlOcbTcurs;+Ct%2kM<9hAy`D0;Wsfycdo z#06jC=VvtKu-OYJyMzR_K}G96%k7!Mu?J|X`SF2E+;{8SmEuz_xTx}o1Wzs>nT3KyO&yQQIx(L*d-yrN~94HJpGXaGhX!yTx@G7q*+Y0#b)z$Pt2N+56TE5q;U5< zVrC>vem&XKR-CV4HMk$m#5RcxWA-3EdaotKxgQ!fD^ce@93g>dbCp`Kr|>0bRZ=S9 z%u>)|-pDAc5Ms0^2owCPkw($XhVuDgT2^LvMDz~RU7qo?4fNxApN}n`#fwbO&?w|` zs2drnn5E4XrIQ0(vt0L{u?-7B@*zCAMNtmFEc9%b!jWvUpM_6Ci%3K~u7CA-F_ z)NP6V%`D}!*^M|%Ex%;ey6fLd_A4)$$}S}`3#LkR-H)43eunJCUnj^#;ng*@>)4-L zyq(GK&ZisM#ay3G>^lbon^6U0?-yFfsYBxW-34u>aTz^$pC+6yxxw^d0?c7^zqIZt z+JB7t#mar9NlGXdab%7Sq&*?ile`T+diVi~{{+!&pFnPJ{D91Fpo~8DOX-LEdoTcJ zh)RPdcV`0!+(-K?ulEuhITxr~7r)InliR1sfIlT)#|ckV3?};-agcC?yI*?i8v2*_ z8oJ#U)>nT&zIdGo9)tfMt~XMB0dynPS_jh?Soi#+ZA@y9;4j#=RQgBw5}6-Sos|Wjqor`RjhwTVy8LP_&aaj{ObDn!R8a&EI_V z<~Gmp)BRHAIT4g6S1fZ!1z9D?od3yeubD^W;r_LBzZt*0+F`0YuXWotb71w}3+a3) zgTlvIQ$b`zR4i&TcwAh86`M%>G zHog;1p=QZlTeOONT+l4^?0(JsZenI29eSaPE*Avw%{RRX)YE#@x7Tpj_k^0=ee3tN ztrMS@tv1MV1=0T6f~C44V(_uILm~@{#e61VhFD)|w0tS8>$U zNvTyrHg1T4KvA95C6bh^CO+rca>38&#KS8+BrL4C=o7XN4e+t>5(G2DBI6F8Z3!TL z>*P0C4Kvfx8KUu$M-nV%0cOuN)17!&N{+UA7nhD7+hHCiJ*FM+Ev5HAPpE;c^-E@9 zQW}Y;xNYAEM7WO5l$)WC!pE_jShH8mqG_S5aRf&Bt?fu?4L#spAy>;d6AA2zgrY3q zS~HZrLZZj~qj~Eq&qy;n&UrxXfnXX@1|pNd)iRIRtTs=C(BUA(60U;u(YLXA`qGQH zjCjFtb`YN74xYn<$!i;0vy|Nel)ecey1?n0(s+&?3hMThDC&2#qTKS}E(e;FcIY22 z;uJVmTw(R}&q$fEML|^YAC63>Rksr1HOR6do|xcw&MJ$B$o@W_S?h z6G=gYd~bKat5?Vax?xkarfuJ9wb8;FFgr)Hn5j5VFB1$)AHO4&Kh_@Tm1nQUKGGVd zQMTEfl|B`AM3g=u-l2gap2{oU<|mQ=Eg*U>Gm0m|elS>}4|Gy0s0^1ka^V*#MWA&i zV*7fdiwo7}8%u~J_hAl=hspS-s8m-Urq!Vod^&Y!59cfHgf{>MNAE6xaY4^dilB4aHoaR8yY=Rx%htsvnERz;RA>XxjLWO&YVeeKX zhW?OGsC{th3F{DHKdo8T1+HY3uQz>j3Z;)Lhz6gKIDr72zCTddH*yeK+s50)&(~XR zvsSA)KAm7clil(p5Qm&Tnp3K{{!@WU2_|T#@HC_)LpJM`5~*{{8HJS`ge7JR(yB7+ zjt5S6wN9@kb^~S_Xpuj~4AwajB^|>EwBhwet5Ml|ic3Isz10#tn~~x7*oGddIUxqJ zvYJ%AyAGH|f5#CzRb3qH1DA70N@c-J!H;f(td`B4D$&{5j8Q(iu!eoYhfq^ltj%Pz&T%Ct zALQTbEgKj})bD?uy(jHF$fGo!f=<@U+)mfU9%Rih zQ<}S?QTIPG_1eF(5;tfobFPkc9d~9=OZg5%PZ;M1?D}CvYM+T3k+ix`fwGFHit1eP zh7cj@hPp{J;~I?p*6(Dmg!VV&*|<%5hU~77bok1r-5QfemE61^NC+SBLedPtC8`QhGyW$xh8S*iDaz5+v`yaSWDuoT+Qq9|GmEF50g%{VX091LGLLH;BzN@{`Dl9pmB1$4KS3z zF*%O-$^cCf1+vH1L&VslW6pDNv}me{$7D*jEC* z2;;bl_)4}z9^Z%`#KHLJ^K?)}gyikk3_^>&*&LAuY8 zrE|;wSk0$z$hspVe%OU@wMHASfyKJ6Cbq;{y)$w$$~|YoYhE%PmdiCe4j(UIMj3m6 z7W$oP{;ahD&qe-@X0(3{C;$5PHc;zC?|h05xUOw7aOZuLxbs4YQ!1ydq7o1(CmAgr zDc)1xAERNj&R+`Ox>Sn?rp67M^Z%y`!L8HzDi>$wN$WmoEPwjJEDMGWhdJcE9$)nd zaHYZlT%TC1FZF+!Xwl8904N;nu#i9R72a-}O;&jJtW5pEHj-K}x|KN%KfZPSy=&+G z)j}9%peWhv@7*(b!3sCM5j!e>t@nb>=Z_x4rUiC016?s2q<74nT5USzw*k7V7W?(; zo$zC%fA8=n?HBOdyy)=mJlZSbTJaL5$@@Z;c85PfpI4?v9HEG|4=?!D#xad1&8Ic= zy?jgE^g(j`K_+bbCr0GVs8@udCoJD>5u^2Z9hpH%m~ohPZ!m#>M%008-O(*;w*1l~ zv(a)!C}93(GmY};i~}n-heOx58-VRHc|RjmC4L?ak!7qy9*!zn>KJNx%702)5+jU)KfPd_MN zL6oqTJ5k}$BZV?(-gg{}_D~fY!_myZNlgjx5)+grZ(DnQzdxs8fdg8#E?#IK6}F89 zAci&TflSNAJ7+{-PD($%#YSg^z=NV+%guRo#lE4#IlI(y1?nAJLXmMVjNW_ynplnF zV(^hau1mgnI^+rWw_;hkh$|H*R5W0BP<_4lu4coOu((p^VT?WhijnV*Z7%pt{`XEP z46%i0;S;WkswzzXHAFB}CG>pFZ^c8Bt2xUohH~NES@p_j@&Hk=J;yn2am`5)Bc53o z15wkHJMO{zNi|(mZ#}xXxY7_*-_(QtKWm_sZ;DY3>RmYou?I%;%zMiDWie(QE#KNY z8w1LZAKr)ZJQOHp$K+~xPi)BX$i2R_CT0ef$U_$cTB(x;zM{v)zZwX@SP7Nt^;jxf^QXpjTF^5aVhf(tW@|oa*}iZx-+vB zfXp*v;ThgoW)0S6yph!hRS!<1s8c(?eA7C;8T>x4nqZcnl4G?Haj9a=*OSWrlH;se z{9&t@XdYE@TKt{UaQcAH9=)nRra8ziVv4;P-jF~}t{m-eFOSjy{Wp^XyQ0q;RJ8wjet;`Qo56clM=djCGt+bD#^@P`2jujhubx_AGavl z%`pG@S_@&jbJ|(PCbwe;T(Y{KTq7WEf$K{fGP6~SKm7pQ|1R}5`9QWn4Xvp&kz?KR z!oxmOAYciQ!ylN&7NJz*z-A%K5vRJ*(TG+mtGeobfjC>P8s2qbFA4HT=OAtMb@SKQ z7CdXk$uGby@ZQmJC{PsR3_N{?1SXUr5Gg)}8=~qLGgl3hvLBR`PurKW?j~Rc=r)V@ zkmbsoK?%lumnm~x0V^0-bE1sRy0GtB;)GR<6mye!K(PEZxCl&}1My#|m{6m3P?Og=GregU zm_Pc4I%KC5kEa0qx3lc`8=p0qdwf>RoAu+Qox0N5^h)vLa1wA9=5WD$_#e``F z8n0Cr#iQDr;o@lQ=+bRCJFPsp)kTo7K~#}_h4u$fdbv%YTqo3;Y%%y}FVvZIg@tP> zEg4Mt`pEv0=>|oJb>lK zY+4}>lVFFUbHhcQ9<^$oK`4KHs2xe*4KXts${2NX9+04PGLE)a@1e3d$hnDqq%C-U zA#{`Qp8L@g@(8K_aB9i0v|I_0b~=lK%%mhC;h%FZHE#PA7E*}D>^5)du`L)M$uL0J z;5g@i%5`PaO!oIIf>0^ZM@!jo%@+um>$BN+IH;mdL_jHu-| zTK*NXd8ccTS*?L7krnU{?&K7DSR4hVKLW^SgN2v`H=G~Is;0_Y{h9Y9iAn&&ZWpcw4l zdNuEbOW9Iryu2s22APS`-*1C5EergDA%3IYL0?az#k}3tKod6Z*YZKa{lmZiQuSWd zWPdu-4G|hPa)}@nTw>sOp~eCcL8{ddS&TXW6L=g zQoHAM)O&AS3qfaAeW!y8iy_{n#%r3=sAZ=*}_EDN4H--wXS_nN;okZvk?HfA@I@yY@kE=)*#8rOR(b*3OJ+7;`V|k{KPQM z#{3HE5d6Lx{w%O1>zrGts7J*Xe!G@4Yd><9G)D^?SzmZZ~Zs@_fVavR#-U z%=l0-cTQA?0_R)Ojj~RdIIrxbm!>{%mmw@-9`zBzq?%; zA=s|eR#y;Z>0ro16adR&eBJp`I4nx1UZEd?RKLi_yce{DPC*uX>=5}VXFH>xrIi8m zm1XHfb%;Y5U#m}kAv{z00#kCSPznov!H7MshHARxEWc1wLPNwuD-ST_(xE7AI>=n~ zjsCi|C=C9)$;VuhtPd20Lcd+Y3NNOE&3R|w6}26zFaQ0yW821&>tAj3)Szmq`aA;3 zca$28Y(;p;Ly<l32gLrv^VnSQDNv~$ ei-?|2P)I1gPE+VVQvW_8m61>suNM6g`2PU@q81DQ literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_axNone_lowhigh.png b/doc/source/_static/style/bg_axNone_lowhigh.png new file mode 100644 index 0000000000000000000000000000000000000000..c37a707e73692e1cbbe6379fd853111c4ce2a869 GIT binary patch literal 14041 zcma*NV|1iZmj)W!Nq20s;*Qg?ZQHhO+qP}nwrx9|4sZIKxihoY{d4O_t%J=|>%9Bf z&s$-#(jxFMSTI09K=5Lsg7QE>z{KBUZ77KEZ_^9=U?3ovA~OL2Sup_t0$F<-V>3%5 zARy7Oq-01%MX{g37hTg~i?fEN_S5mCBV-Z~a%Sy$X8a4x%v8okwqPP?$jGw7hM_Rg z;y8+eXrKYUFvq}{ii>vi1XNUSu|;WX+#Mdz*$z8z(_SssTjeTF(}+NSelsM z9vQ*-cWIu*$;~-#g82erjS?U*sbPAiazTg6`P!2W%mI z;sSA{jiK2B5l9i*x&Mp^SR^E_059fZO$n)G>W{7zB*WgTg2FB?E}nX@3v@d8ln;4m zNw>>Smn0Jzgtho*;trsylZX2O($hxql!putAYZHgsnhhtHB`JJL1Uv69^mM?)#s4#nQZ-#7 z73Y{J{Z}?&d0G|*R^6!D=B=;Lv?#)z*0>$&5O}|?+zd2`%p8^i(`P*$oj~G@KHXD7 z8pe6oqDf&Xq;qFS2*ork>d^y}meA8*O1}K}7c_tz7csRUGb<{)rG!C%;9J7rBec3< zFZib~v})Sj1>*^}v&2CSx-ENHnj`w*@ZbU_S8^TU*5=|SobTVamRpaV`R*<1P-iR3 zLaCJWNRv@~7 z3tga1h(CQ4sX%~UG*?7pLa2T|v{0Baf%4d&hCpcotg*z0z<>GrWYN%I?FjHl0$KSv zb3IFusDh&Ton~ibIURx9Vc_{)@)%4}+(Etk<>)b!gOl{ER>7M6v~+dY(6qrFb{*NU zr~Ur)lG>7b;#m6udRO)~ZYkbDvW0=spoIj=@*-tfDSjyuP+=wcqU5p`+{+G?iYpSA zgDyq1hhYhI33>?{%qcSCS_L+XcoYoG(l~OsGr9k2N7IV1=KGUtlRq-6a9V=_BMfce zTQ@*YABTZpOiPBY5Lq_NY(S@HZ9qnUQ>RJ;UlGtW>~2s{E4B*1y0tp6N@j`fjKmq8 zHN0ng&jy+aN{2KuXtZx{7h*eNcVZ{d3cU%*1zi*VYs6!};EwCv*^{9g&j*1w&?h{H zR1~QZ$~o{iNJee8?C(+{MeyjX{4se$uX=3uU>cGe!j;&LQI|0rO0;m+p)?e!vBJx|ML9ZoJUO0F zxuM7*41LQwT74sZT73cinnCD&^?kbiilK&~h#{GX9V8!0zS!Ah+XP*4%VdfK(-e%j z)Z{|(bTJ6=V)5(b%9Kv3zmhQIMYKnRL&@WD9Ar*%=9%I7=CWI*H0KEBKb}yW__3&~ zh^M{-n@-^&+yZcu5|cJal4%-eNv2JzrO9E17fTt;%A0~WDm3b?ldBo2p|82FY1K#7 z3)C~&hucrv-`Ts_OWC&^@r*4TaUUie{TXW+=T-boeM(&^t6E_zxEJ3qA!H`xA(X72 zy-zvhAzmBbEnhniQDh+QCB86DHLgAGc_c87o}^PgtC*=2uNba4T_#yBuY{}ARxVV2 zqLi~>bV~P#<__Hz!c$aScw9xM$XUKz!&*^YfLGkD)g|_k2O&2gOw3mxTA*PduAsD_ z<4|B6aLi5XpB$x}zsy&qU4?0ddj)lswO+M8<9O(Z!Lh~x&w=Pj*)H9F)}GYf@>uK6 z>OSTk@&xgy=OytW_`&#r4N>ct1iZ3a+m}l?Ji8ogqH4ybFUV@oFwSUBkHu7s`G#rp z%bLa8DBldls(z+$<*=fx$Sm9}>CB6Xk_o@z${5$!X7XuD?bztJg@)p1_D>C(bsB8| zlj_f&%h7tyWJL*|iLB(UWSz>&Dlp|J<%H$*<+|mgWn$KORzB7atLjyKOD(IGRp)i@ z#)kTdMoyzYC0%h|^BvHpF0iT4ull<>lTJ9~qE=Xlri!T||*D6^K<&R>*VcRcc|%cltrUMX#so z((39e>8kZ28R#LXH)vWYc33=mAqOM3S=wx>>FDa{^!O+qlLV@d$dR0HVX^1d&)rRY0qNh0Ni_ueImWUq>@iSzn20F|d-YP2LvW zQf<@q`Koo1W9(u4)d<|>*Occ`us&>sJ;K^%$cS5iy*cNsFq{)x2y?Y%Q_< zeBr!s6|}K)6}mbXSCHAjV)D*ebAx3+WDnj(%z5Ra>>2Z!{pR&W7oip6>@{U@qBZer z3NZcEnbGvNj%duA_)gzKt|#N;r_>kMwc6F$HSm`Cy!2E2r!ji;&7Ykec|pK9wY!V% zlwZZrL^w493L*?*bn;75Bknv`C3NOq z`2OIr1BMYu>DC-m3ube!bwP`9UTA3;8J;ip=aW5!?vi$yjZAB<0;l56wDrstkfutT zG;=9ST&oCkY|B4pH`lc{rk_JEDe2Np)AbFECa}}UGe?;joH<_J?GZaq_aGPS&;(J8Eu>wlcbU zo`8?}wgtjMhGG)&ls=39Xe^X2ovtzbsp}pgjJb(v7}j~uzI!_f%Op=EACghZS>|T) zS!O?DXTSA4dI&>*M{fW)e8s*OyeC~(F!^wNt^NAJ&T^-I>wZ0T-EwbZu}$l-!i~%P zaq4ycwwBh3c#m(+yY6}Ufp&LxcDX*&Izy`S*zM@~^zo`o=VTo{gGS#Y28_0E%MSu% zV-A!nNVpC>H#L|9`}&f>2b=zh?e{eZi}jkJ1}u93_j^?t=p8A_9?Xt5aBS2=8zP7U zbhV%V@T)Nr4E7;N-B6H4pJ#HaO9=vK1-KWzZ*%ipLs)-y zuxau^_Q7M(Itlgn^+zVMQHB73#Q4->QYHko9fp`A(urEhp#tPIJ5k;rOqaeveA+Mx z2&qYh$`Pu*u6&AMok+Pv)cJ4R58WLT*Y$vUSn5Qko}Ky|E~bY`rZ1P{uj$DSt_6N6 zU@luQHV_;?GT~e!)Rmcv6R3Z45RlABSCQTCaXR?-Fn!2sSNPw0X5{$-q#{*fQkn1h ztD!g0y&@`}$^;2^dAl40gh~SZ@ygV`B)k>y5qpv}U>ol204Tjiz%TGL~b6@l*oqC3m9Jv3R+|<+RZi_Vj7zMo!p1 z`!bvO5TYI#x2NyO;UrjbpUtS5m&y{w5@^IY4!8bc&SnfLR3^57LPpGqvu(NtVou`u zCItBBY0ouB-~Xv|044jVykU2<4WbvmCs^>yp$?X=OLHRN<}L{NQ$`IeX(q6 zXV%EVz%SuRR47;8nO26Z_skHpzOwC*#cg1g#O-F3EKOKLZ)euTm9AfeO|d3zfm~{-C1n=qciv9(xF%;e`|CM zs^eKoJz9vk6sJPXsl`G^GZ5tIkCT*Fs7&!iCy0D(22B;10D*1-IW4!NSogV?-%c+n zl>0#X0ij;QJ>}0uH?@Uad6GwVcDlnjOJ;NC7Pl@q7rIc&CKeo;WsK6el?}v_Rff1h z5Uv<;rl<3wPekx(!NRk@W1xOs75U|;7b>gBdmP191fU1P7!ItXG;66%4EJ;EDN0u_ zJNI7}ifVcPO%!d4XGwr!id~lG!ANC-pO_ynyA8xQ9;?@?`Vxr@VRbrxogeB|k$0ZKG@sPp%G=J&yTg zKxn}{8dbwmoWkI%@#T z)C{QD!K87Fx_2CN7x&EIMAZ(K<}_E7l{Ir(%(%~1?y~bmZacx3No^utqRCFzxZlJI z^-Dp5_m;tAG4kb3fBdhNo}J;Lr^1Tw8TJV(jYNt#g3o>&s^jD#Xp50Zfok=Fk_2}a z3hfi5qZCI+hcD9Xfh>(*q!%H=BmBJIABx6IAs!r5!*$A*m-6U;Hci$kD(lxpWF|1M z%CfXAU()AP!}T|JI-?{CP^j!=)k%$&k*pfg>6cS2X7FczK`N`?Z=qfqXU5WOEI{flPTj3qi~pu63@^6LA@c|duP{SvxRJfmfo0j>CU-Xsx{&d!nOCT zLuXa0l&CEYsHkT(S{al(b6|0GFUFIw_T*F2L}J-z%4g z?7l1*EqyIUSWj7Og9F|-1JJv?Lq&)`bXW{xAxGcIt(u-qOH&SSn1I?*{L8<|-gGAw zi_|njT?a>xZv*#epie!ilPT>JqQ4^t$J?Ql{AyFYD@^^V^w$RqTxFAA%mPlF|MND? zihtp8_qZmQ-yVL1tt_D%@zZ$`U?9V9ejRmBXiao0A=G!b>>wbsw;VVBtnbD#GE3J`gV$}yB*`1_QDoSH8nJ*W^rKJNKJt(O$25iVDI zuw53zAD?w}s-D$cx5>i-qGWOo^?u|~_5&vbUAyqLOraJb0{f}LUhOyAy!XWK_i-bn z;6VbJE`P>R^4Uy~MdA62^J|fnG1X^nMtg4w0aJY5cdyWwW;u$(eZ$a%>|zc8%i!4k zi&QG5p%U^iJ7n{`6Twfp5S7-&D6_0GW?zui4@!D{oIJ8HUgLcziS_GMd_STZAXv2i zf!{pg+^eck!TcB>`jH<=&D-DuDe}QyOm}nJaE6yf-5oLe?ydFrk)o5sc_g?@E+7mG zEQqW7hO2GQBA#{R+S-^=roq%v@%prwX@1{#D>!S6K;Sl*e~`AQuf>7InCBOwhM6T` z-4iMqjs!c!$wt`izz#K(gBF(}Qn@Yx>r^CFOF zI!O%_MwnA~MGW}@{-r(Jp$_`tt_S0t9Tf?F~l8`3kvZ>B6K&ooS+5 z9lON%$hqpr1%E<(XwYA08*p2>hJW~%tBL!# zQPnJz@YKkQ#WC`zcZoib70l`2u9#7f#z##mI|MlIkV?Cv@>?az=dIy0txD*>oc^HT zrs@VpbxgAu?YeJx7F%HrVkt^lM)f%@r~of}2k>Oe_MVbp;^ zcKccN!B=;LIv+?^Yd5w%SKPHvuxgbzD`53-Ga1FTiqTl>XNZ5Lcd?eNyhd?ON-ZrS zr5*D?XRN}{8K^$b%sG2yWFR8YrmC?luzC+6{|CZB0C$@#GPgdcvxDFO6N9l~VgmOU z-~dPeULfclD>bt{X|n&ucEV7$jEo?GBHu!O#eBc-(dj3-!4bd3fJA!Lp)h(l6G#fe z!(lAedY{e->5mq4p7ar?k0f}u@G^(7nii{BbYe01hy>ZGie>j0zm;~oiF?ORy9K7H z55IAwB;og^MWfOj{tju=FN7! zsJLQi7Bgs?v90!Xvo6@pXwzu0#}E7SIe0k*uQ~$`FaNjwvFP9y^Wy~2+F_B4q)}0g z3j|fxJH5KgM_|=inS>exbU~h~xxbC*+4JX(&6}fK5e48odRM$;xd-3r!o{Sr)2EiWsYwI&|nnwIaV>K^^d8sDX z>sM?wAp_`o`JR%i$#3Mp^z!RQYeTS0(!+H)B7Y>H3Bo zFVH49_reH~vnA>Xy?j25mNpMeejQM<&Bwnz|5mI8WLxgQ6u%Nw01Pw9JuR(?_AfL& zr9HU?w{h`I14x0ZPnDH|LqAOH8pVT6z_ON;HG+N1vT+LSn zn(99qjcJ+tO=3h$a4w1XwyPOAE>)Nq|ya z)U&~_%*Z-Zm(7a!(V$FV?pCYntc-@@EO>eu#;s4rk9#?nmPqd7gRJJ0sQ+osUZCJFWD62MGn5uJpu;G>ks#@M(h%3{EG35uz<2_8tCZrL+{Gm#kGxP z-7!VLA?L{exgmJ>OnOWikEQ7^>r)0h_~B>xzy}}SIzkD0+80X6%u^X_DM-RIMH>IB z_&U3A^zj(M*p6vQ5`4hmtn^T*{lkE3ieupng+PrjQl$iEKnlc$-k%EVTeX9T_%g#2 z3{EsuN99 z%K6mOLEM=^(gU8sEFY6LUUUaybmj(vp>6g?aBDyo_8r~KZEJ-@y6l&eFO-b#=V=({ z%`w(`1Sh=rmXK;Dl>TFsl-E>?c>C{Y!<-4;qTW(0h>UGF?F{}ZM}Z# zu@#%(mD^BX+LqV5zJEnl^oC*wDDeBrwL{hfw7{_ams4^BJ@F3S-O`Byb5&q=vD2d0 z0@T=lPyv89h)TlA&A}uGRCf$WrbkR}hlal{5Fx3~k=XFDL*o&%<@73ZlRlvk{-*3E zg=vHAMQTY=#z>)AI-G9FWK2}vl#h4Vp6`iIG|`=Ussv4WOk>B6bbS`=j)N2B;@9+# zl%OZWI}RzZFS5bN|Jq!)Z@{vldce)y3Hcf#2Mrzw6V@KO$=aTByXyB_l~h-d!$|~7+*;^P8*o~L*|LKi z@i`V4pvxyM9+@uBv=sh2TR|E2B2K4w{(t{<4Ue--41MU4Lxh zhQZa{Pt7_NNV5a;QR~=fMKZ*z8w!vC;M?~>0mKC%L90IUo2NXj z{WG@{shfLVCho2%<@C0{miJU7g;ZE2#^0bv z9z?AdVEw%6<7^2uWVLJwu-pHd}-^f zGsQ;gm;yns;H%z7uwCt7t+e0XYNLb|>%6ctwo_Ms!QPcw=E=YYHkwj#{@B`3Q`45g zDKtLuuN(F;4GLOMhV+*&B0V8#E`76x5t%?-NC9i^$vH~`J5hIPV99-OTY ze=ZqU0ss>W6Sr2Nh!NHh|E==*y=M$KhSd&XiU@(WKsjnAy-nx{v)>^V-8n(j94~P# zqPMuqN(elP@X$NUuEU@@F8{lc)HDHJgD=w`>Guf1ASkKca0B;lM#H4O08Us#2eFYZ zp-=ZQ+Evr>WGd5uoojV-y*j6n@FfkNOqNNOUk2S6QU&X^nRWLk*##*}DKM0{@r=6> zR=;FnGMs-otUoxqCo;RI$EG}TaJ(>m4%5{dM(beX#L;t*}w~{Q7jUA1^ zlj5fVb-o9RK%Zvht+2>x>aQr$zyXyinFR>e+k?!!)ygWb7j@63Ys^Rw0D?$B_F!rf zjUF12GY|2ufmYjcXnPMS?)^@u+k&Ckk1p*j1`@&4yxC^PX51~|j1>g32iEl1D%#a< zxGXKwwzEE}0#8ftd~W#Uu)yBKM+P1p@eIu!n;#!9^3DM4^9sp?8FxBC^?A#2h+j2k z(osumq~kTQncOrM{!K?LEOiyZtw?$@ZW$h2Ru!hKx&{WfqDXM2RBhfD*i%7J9p5jv z`FgZJE{=T%4hFa$rTo_n3u$TLCWE7`tLhH{^i3>bbmMR@E z65@nBge!v>r3I&6g&hvqKzvxiXvhk9H3ZtazJcuT2Vvg?xrc7vO^)bZ&;TIv0?1?K zhhTfK^k4rd)IT44%XVOR7~g1@o4zIeS0eFjCvxojoWVx(rHKodDz@K-%i?l%zS{Xr zXZc?>uZG6yc42sjJe~0eSy*5mrgCs-^nVnrl;5}I;@?>-W8X-6k^kEzt!S$+aWAqd z%(rP4@bgaTe~|g-w@-7ve(-;RDg1-^bc<&_j}znLfnP5*)Yviy-!d z)uNQHf4R-tRZl_+`=^x(k$rNfu&!fL!x1!1ve5p8+O{xdvQQS+W}Uk$xuYK5tF@B8 z$2#RDC80_qihV*8TL+Z(x0zxXwC^W8GOig#xqe0Ni zU#ySH{&jnS2Q2d8%`rL>mwNKE!<5{n)RtS!R?$`z_K!Q>xT_vKR1dSwve<_Yz$!GM znW1=ZMyMJqc`ue?lS?*_|>31pu` z6qxIE{l(mA;p?ix`$l0T;{0=%%g-YeX_Xc~{_UjNx@Jua4+HO&lTxDlSJ!)>{S}_D zHjQouTenAaj&djpVwDE5FSW=XfEfR9n6P8hdg`AF!giQ=28R-ykbQwY@euo3f(u;V z)<$|u_)jPkGsi>EHY{58MkF4Y&a}Y#bY$W~z#BWBjOn}vSha9)8_AI_;&Fl9O&`d+ zHvcwe%mO0VSt*eSWt(ipUXESCn{ICvHk(8OOy8H$Ml{B6nHihznUMNJETVkUqFKMXmo6K(?_{zANpKVJp z=k9iD2)@)wyNy4?G}ii8#>VZV4&(+n7kX8_qN((ikbZ9AYX!Oi^RRgLe~F9yVBif8 z+h!J+-7mweTtC&>(@+|JHxKETxzN&CpcvUpARJ*#+#3CDddoZPY)mpIC*DdRIJayxsnWJJ{(-DO5Cb7J29vC<}5HE-JxJbqyWp;gp z!~&Kq8l3$l1YGr&-XsUd;8fNuFNtHl3ncMnj+>h9ieCry-I}GHb)SphziN4f79u#_ z6<@Wf1d8oqskH^Z7)jC2Xr#IvQ;Hd5;C6A*#FhM@6rmR4KHycOLzj`o%RXn!)n`Kn zs<2`G=U7^eB||CQ;j9p|w)bWR&MZ5TZCSjiCG~VVdst=r*mKBc?cSRAZG(~?G40Cg zbXcOM0=E=tX6>h($h-F=i8leE!-0>+j9PP|hL6RB({q~LT=;Wvq=1texHq8Pk9x6q zdW)lfiw$(B=-!lEZCI$1_E&X{+i>3|_7vyAD(BPNd2otYep^1*MK173h1qr>I+~5?|Olw}; z-r;ASDzX)<<=&g+tW5d?WRuk$`Mnbb(omTaOqwSFnXaN(!|Z3;AnS@O48Qd)tU<@B zN*Wv845^lDwLD4yUtLlY)*4tag<1uMs&U}aT)7p(;?$zC;%*( z%P;%tieOn>z6C!*?vx#!CU%e(YDX|TEC>_+c| z3Uv^vHVJ3HsjEslgHw?qQCY!>ZSpg474^!x{#uJQtAesWXv!6Rc?`Dp*??o2-K!#U zVoRtdM^lK(M8^x64wcLgdl^ODavn;Fs{R)DoIU+fwyEJF-1=0eltV=g9c#E+LR)` z#JZ6{3$` z0ju88=QVV{?N^P7M$|4~q2#=A+`djfbG!nT?0>$q&t$Mf>>jhH+VJ;L;C@=q$5 z36HUpd!q)mn0UHMXk|U$Luq>)B25cTEh5Nae6CfQ9I|$v_fI4AnbC~Qc0m{;mly>| zfm6{@GSD!BSyv!rl$8Omk^1N@0cAxgF9Kf(JMoRwPq=!y;6*F93}uqP_NtZaAlyW| zAj^Erj+xU5yTv;+WBceTjbm1FJ0fTVdMrXz40jZGC{b>&d%^H3ukE?0C=gtIXU#C7!JUeSSp&mYua#?HMt#JqHzV z#(kuI(-U((r=bDI-6*bp1cQW&_2p;IE0Y+8i&sp<)oN0!5b#mt^7msqAUDMs>-y7G z;nZ4`^jDv*+bbVAQ)5&SGy8}NSIXprjsDj*Up<=EmIAb1cp3w zzle%A;H+(v+;2ItyQGQ}A9k|86&Cqnh9-%#f*7Eqm9J(TaWgwrp0M^{Y%o1JBi!2mREWDG31-28_n4Ok16nom1!XQ53}a> zO!;2CTghGZx9^MGn_CvkhDu_!~bZKJQTY)BFnQ6N0+) z=-qJT3|*_X=Imha4Re#mDnoZBA%QA+w{&r9Q!RtR`Z{wI+6FQne>E8{|HOqAlBDl@ zi`7n}fq=*z6Z7u z2Ed&Z6;PWdWvvY>Ej!N9iLomr^1-Hix$pQw^z(1z&1lO*IxWCo>0}o*@Nud4HF3xG z6N=$*Nb+*M9=c=Rf#SgWa;7~8i`ISkRXO?j!ExH;0kzwXfzhd`*(XN%SQTsDJITDuQ2tVfss z65vXGLZ->pv1evCZ}tMV#8k|Vr&MTy*Tc-OJAcmXne2?BVX!@NQd89{l+|F{6{#X$ zd%cUnb#PNnZrPo39Ea@I;e2)hK2iZ`#=ZdS*v^uCQv|%Z6baGXTtq5L_KbbSQpQ+d z%Eb9oSi`O`^*m7)(C|bmg%;}Q3F*-{QG(CaDNJN0f->wrySA?tu)9=MTq~K`lA645 zf2Wnc{ONE4?TE|vbHZhkoQRR-Kmk+stk&;ib|!)+TqU;A0henvL8E*dOX1p$#nhB{ zFeX1arvwzY#b^&Zozw|`mq?dEbWZ|%qx)1wqUCHM>Q`Nb-JUsSHzjHXoBJbTI&hP} zf(G?BI(9mox%tW_*>Tp%FmUw1=U;_&y> z6d2VoF{c?g^^T_93^BN%z6ip(_T@+Kx_tmg5wUSR@|0P3jQ>OL?@PP}oU;?^T9V)4 zR3ubZSAHt`{n=OWsWO|GvW*kneKa9;?~!vwhkAf9@$18_{+7m;Dnyl(r5 z9i>hJxOs<@jAeh_Jp{=cF31U?{vhI%!S749{&g47bc7{uNkLM37Eto`5pA{eNmn}l z#TuUcwy-OcPd<>d-i5W+1J? zY^m-Mh^b{d3_F4v5>M28+L}rB7GMqD42jQjRiF3!;73Q$94$%NZB7#xgs)$U z_z#8=o_>3qgGA3An9nYj-B!6RDJ-~#QP-T|6Sr9sf;)s`c_MW=WgxOP2G&Mxx6g$H)m0?i?dw90dM2vk?LxjO`C~K`EbPgYKMa{p?Nmq3 zNrI83xa}_a{3V)|;`%{PVMxP>zL33mib69cF|?Uom{b0QJ3uiSaI8w_%1sQ-blBx+ z)5JRKogikOT)q-WDY@8H65vV_=OT0YXW6j4+pn#T+%~dR3~s5_P9WE)$*U8SkWmqT_MC?5Tb{1R2Eil0~%Ty0+5k^JvkY( z-C!1$L(9I_UtrsjFtsK%Bb1cBhE2HN7OVihV+y{TbILM7w8B9bDNiFBIrURE-zZ12 z^q@W{-M`FHflg3S4cZou2*uAk4PHcoenUh*_rip^yLc@k{5gD8UeZ`oQ?vjrE7Kdl zkRbSP;Q2WvU8^}R>R__cWuwLe#qumw6t_QuvHFERi6B|8#k8lNtr9sj=wCXDiG#hO zG7+JtadSA9MVFY>Q>l=fxPke@WafZyc!D=3y zr4?CSVafAUT(2MhN?vVd6QM1E|+qsQcQHLLo>pg>*Oh;D9vQo7r7JCN|}=l)6xCenoG zv3f_r!EyEPj`WGu{g7@tWv;zaS=SyS->$ZKvz2G!fb|K(_dnIh&7w%+TTZe-QO#na zNMoUvT~N!tDwb^E5=9;LJR#_ZIYZIB%PWMUqT}+w&f8J`R`uQ5? z=?RAwPp)Gy;Y(bVkh6}-@cC(DbH~X9@*xUo2nCDQJd2-mY-}{9CiYW@d`75bud35oM9paRt;>g)LbG#zvbPxt4ANK5wY{)8LtZ8uTMSM z1pJ&H5FnVJTrpWaQligryIFLdM5sV$gyB}@6Z=z6$i1^5g+f7AM`?B4S$B9%-Jn~J zKLa)g zQLCp*|HeBcN$1QaE=|kA!l@b7SiklYos>Yh(HV6>9RTmuSD1nZk(mt2Zg=xJh7k-w*Jr#>4|E$_2p#ZP*6m0@oCx zYzAToJktkShj{9t`VGX`jpl}EN(|L2gcb%fB3v4WVGNWe%n?Vr5Bwt3Baem#>p(<6 z7R2$BH`nVg5>0T-Pv_|=d0qf;D-8Tk*F0u(6c14E00k!Ol#pZtn`N*@e;s{2E;LZi?xg=yH@Q8z7oM#@WB1a|+BMZXNVXU-8nmczNnVsZ2NkC>5e-hVA4)Dq!L9tj zUnynMQqYCS)^Hrr4iRq=qZws3e4C&~anFLjX*vLp2df8XE1FKEt}$#K6DD&qzv2uBo+y=!r~L+Hq^h?nBphFwSOsL>)g2GUSuM+(pL<`oze z2^0jv6b7OOuneth=nYK_=?#SqtNNjLHFp_y%LeKOA_wFmw~&0Ph2o}D>=X5+tW&5G z%~P@Bf2R~mrAtCc6-!;Fl&7}SyvV>%7SSIN52TF7^H4Y|SY}4#Tgq?zr8`BiM1MeW z{)t0dK|1lB*kq~z@dhJ5IVpLa428C7mQ32XMw$XnM6s;Vw4yn9y;8lwDy4>r2Ih*} zicW2Gt#B=?V}#?R+GE;sdG#`Tk)4EIX;BMN zPtg>^>|N>sPpRsJPQ~h3h$15?Z>hOano-?RuLI#x%w)aNY2{3n1my_j$r71TMHPIN zmQvBuBbA&vlVgT^G!N*GP=TW2!ovy*W!}=oDvq+s0)paBoes&5JP3t8F;bxdi2^Mn zDJ7LTJ*NWGz(aoefRt#}{6(P(-3n|Q{0pdytksIuDZoAei)V!go(B;?-TJHbq&2y< z>Au>7!(+rF^a0}Dz+3u5Cx+j--aC$M$Ox=RZP=v#gd6dPH35T^9 z`wiQU)0W-GB;NwXrgo}uX}_$b$Rffb`NW%*nw7Bb!W7@sZv0_F&>1sH|(zm4YF05^d1{k>FdGNkISe)YlqLAa_} zjRxC=g$0;0CDj}q($0{ULjWBG2T7yABP(m1#zi|zo8E&OxK`wXD%9`r;BL{G>WBB_R^fL;j)naBI5&W>wu4c$Yxqo5J!tOPb>B+UQ>%_cz*z!z#L1rDh**kC56^`S8BX|ob?}e+XSL|o@oA(z(q)w=d_k_`r z&KTze6;paz}ed-&^M60)`ZZDQ3*o)7F-v2;(TN zhpYaCf7!rT1T6vzA`D_o%5!o({w!bp)AZ47|LmGcrHSl;)Ir;*8A2T*BEEybta_ey zyxz@;_+_kXj0e6=$$TZ9KnQ<|^SPvoo640Ohv0$=t@4qYZ^_O=)FQ;v|TLe_G|%GHv+^oQvDjRx_7C z8p`d`EM=|nZ6Ymkt)DEeE~~H1KL?&u(|Gi9b{(k=6L(GMs7Xaf*kd_ zJ*8j1VUjX#o@&g@`HnozCXZ#OGCsE6XLs$|7u}U^EIe9qENND0>DZmrom+h_+%6sZ zlzpb)E^@9ySVQtaq#|bHI^wk2&wU&oXhmr)*+H%o;3434vZAoEc!YW%t!yNWvq)eh5kW~r1xCn z$7lOE_C9@ENoz;ECA1V=^*aARyE!>IU!7{6BGv8)R8juh<(=0G1bGVG}f z5zGU+-1~F?t3C=0_AXe{ScKhBV0@xO1p;UZxEr%)ef@l=rG-&NqRtigSW0jD;4Vb; z{!Mb6@1evR;>+?2NI(<(7M~Z9=^MdjS}3an)MaIOjO=U}42A^tj4Ukl-xl;v?zR8} zH+owq(*GLd{~1Tb#L39f!X98@XG`>NTmwTpX8=D5$-hAV^ZTznP24R0ZzNl%|DM+O z1R4Kn7?~NE82@MNZ&$v5y*%<3ZYI|1A{I6#woczM1X#E@`2J)6|I+;5i2ut|9c(Z7d8m<;*7dtF~K7iQuN3L$v`^ zNJvmneS0HV)tU-QM&On1iZ2|0ouaRwOT?5E&KTLZ?KiGRq#QZwayg?fV>tiRaLqgA z;Pwh(5e(sOdw5r%xZA;t(h3R!j<-WqZ8QxIM@g=^P57%}Dl{8v(em^;%6^#8`W4@KBZS6`7VS zj{uDIG%d3yo5j!ZnXppY(&H)*{-O=P7vPaN2&saNt1pUR@%DJ^vo;lCngC{8W)9V+XU;_!PNj^pO!{ z4m`kh(#X`E#JxpeH5C}Yy>rxXXi2Ln3)Nt&Y6>{+{G~>#{#b6D!ojI=nmC5ZNgG{+ zTyd>kQRPY55t)|#D#NB~{-lykyOUyat4Pqx9OEWTI8?i|U%Sq(;WB=9aNx#RU*t%` zoN6q=a+u>p!pT3UzCp`5W@KYqLl$TuRG%~yXFaIEhFK)}QcuEK#)Mw0ZN$u~_wYko zE9-qG=qyeIeP z3Kv;=kuH`@ND$XZwY^Tl((iTS^5x$$ z7xyyQ_My?(kFT#Y*yH^RWP88I4}Sl?TI|#fFmw}JYA9njk#rn9aNwJu^R>;S<2(}V zH?>aothqtsH6eSshhXWK;^!Yz>FH$hH%c9(F{e4CJU-d^IhZ*)8yIpwJe5evv&3E& z7F8a~#i8k%Ko)&gI7JdZPB-RKmGow%*=-VOI&i4j=w9hBni%!RQm&?+_&vL51-@Q@ zI8{@rlz*H>Z0uDZYT>9|ybYJ2%AtCrySD|;H!WumYa20o;=q^&7Ynradf5-aPmQsW zTN9xG7qP=wG+s}ymG1Q#WNpghD3T5Ec#%}$QzKw1magPyW7iu)>m#M7W55_qG*x9_ zok5Mq#U%>NjtIRNq@ricfgt^JCnS+*14)!jczesZ)rC^9_Gp(O^h}fZ0su! zfaB3~KVg*}_hCHN#n&}r)pt=v%de=WF@moM9m(Ib(}m!3EV{a)(q22RMtrZI`1l4A%RjMKpuTrPvP0+q=^F^Xym zU>O@7ukHvDWt}5mJA;)4%l>sR63En~JvvgxjzeXdAuxh#Qzd`+)51`k7hAVTP5>D@ z@nPRvqCD+|OZI)Qy+e^vqm(k$<64?as!DSmiwEEMhx=Vw!?>~tYPhb$9Lm1r;EiiL zX2eeXgVCv&pDO5V5P6^$Wtq)^o3?Hq&C{&yI)DF+EnuhUk}SMM!sjEBV8XfJM#(mT z!0>hEmWC}osywkhLdICW&^VFb^UETo=ccZs_=aN^W^@6I0|L@xd$`K`w%JHcu0~|7 zBZ44nn1$t}hqA+GK@cLyDNU?dbG9L+WDs7ox-H6=T2w?H697Hx>^jTM1dS~=2U+B1 z`pi{0t+GhFgHY7jvag5i7I8U;ztQF*p2>{4LR(BST@1N8_MvH`cW7CqnBDm+jk~2I zM_Z!HXSAUbG=Qu$9deG&D8G(f>6J)%+%)v{mFRh8>|BSwm99GErU zw>#qYVGap{8LbRq{^u9lpEB~wcwpk5_HiMMY!#G|d4IjRRPH96(NyVF5K$F52N6;L z;U<`;dqO?qWYU;;+<^%0W7E76gw=snUQlc(7HO ze`ZcLyN!TYPB`ZwZBZ+7{pPee&?>=i@$ME*ic{d(9T=8k0~LqFfbH`!_f9;R2)r{Z zlj`)YruF)RPau-U8atFcLrS4FeDD%uly`e{&8c9A9bthStBGLPXXKHog2qp8I&iq` zG^WIfrswS({_JUa8E|N{1=FBlAVZdi{`$Mr z{$+LqQ5FJmX7pI@U0qE|_B+A`7De_TYiy9h8;Hmhy~w%|FhbEcQaCadIdO0G;lU10 zbD;XokA^643m5gug_eyAxzR93mEVKSPgdv=s#Mc&``2^?U>$c^HL~-HgW5%c-DR>3P$Yse zc;4u`KPIqfg|TtBzqw#G$f-{E4>TCqHw^#uU)hKxK;JV1vxbHeTR+SExowSl;>uiY z4NIq-8(m$r&}WmeN~p0^#Cl%^^suqmW3Scb+HG`W#pxjo*mXd~+2uW=;AeYvl8F_! z)1{cdCK!AOcRlr>4-B<@S;kx8i0V{rJUBwmH??M-*LYki`S-`uLkg`7Hl z)EisXLRl7FuN_ej7F-@tJ!8p0Q0F>4MjQWL`rGoo<&0FZCMY?(C|eaC@D4z zr(G8mU)pLskV*zVenpp-C0X&O<^8=d%6TWZa8KRg{-<73PBw4lGI3wu7KZSsku&r% zhNTG(p`sbeS(?#-{2wR-^NRJmzIi4vnhE@VCffmiOneM}Vdej1Pq3h!8~(w3$Zsgu z4GwJJrVWIw@lSq&GXD&Q-g5kJ{I?AY#s)uiFqY~)le2vi@FUc0s7>BB_B=UX&K6NL zIHeRSOpRVe9ZcaH9A1-y^j<%s==5lPH{?ZsU=jTPP;iFs)CHe6U-~G-zx| zMzobMariOT6TxUQ{Iq7oC4<2nHRdC6@cj`FLRNa zaMo{DmdvOhyg{*G;uu63m->Gy)e zV#8;f&v=`vWt%d0j2ZJ~YAa8oDvWk`4TvzGd{|!|hrCfyQ-hp~w!agRV<-F1SC*Hm zpX$(m&mJ(CoBO$@{u=6j{Bu3quRP8QJSCg%CvZKq`ofrd#7NPO_C?(JP7Y-!`8`uR zJP^x@QP+cRE<<2PwW^-O_d1Cv7?(2B%ymfCt1YOzfyiYqUlal__X_1dMaQ-v9BJ}_ zqCK&Bd_IVl6PUH-?Ttq&QvO1nh3832UA*AAx7zxu7}mS+ zx^XXsqKhKX`VntE)$*jYr|FOfVe`8ijQqQ9HQnPQ1)W)DW&j4Sv*e-9O%qtiG}xI@rY%J+1i#$AVr+5S?6vVx1X#YvAe}t(bIe7}F{7CREFM{--dg5XMmh zamglr2cGMM$dSME6Pv9|x-&jmuczO$fVPhLW)W3}I)8|NsLO12f|GhQ7I}v^8TLT) z|C)t4i>q&a&D0&D8*XRB(}G0L&2LRt)d4kMUvAfYD^!Q0X%2M*E#v9Fg!>%JL&nU@G^%bYf{P_$SmGQmF zS!@E6Cd`!GmpOm`xsW=5KRd9b7ga;At?6Ivh_7*CXjSu#OmgU(IS77=Yw#+(!>DWR zWYSp`UW>i`RnFvdO~cgD)G-!i)HGiNS za)e10e1M>h?HVl)fwV%%l^hS2mPOqY*}H`sr*NBm8tkQOs%R^5AJ zjZum#bVyrj<$w|5rbW@~5u8+AVPPKDlA|!Ds4(@ZkLQW~QRSK3npq(&D22=^!61o= z4QHYS(9z0j2j*Rv+1{RA<>$4Z&ER@IlF&B;CLl_2lP_*vi_UXo--+@TNq+ z7+#ZRsrT3G&O(t9QscMLWxRuSySIqobXcV>gpGYvIGKl^ zLOp20$mboIl}S7GRG+5>zX9l3!w&hF_n9pNS9lXC&QmPQ1LXHM;< ztZbLG^J1IB=RSp(R}Cv5?GqWJjW!puUgW3(4IXb@Cl&r}EvBjF>I)C4F^ zQYB(mBQDm3;4qR0vc~oSKAu5IM{IGZY42=`{E2~c*m;uWg}sN`2s$*p6_S6HEm7+h z`%}bX^vUy|IIUH*=(rL%}t50EdQ5RSxG+!qA| zXBeg7RUtC&PZ~$wIn0o8rzXGJU6ueG8M<0RPp`CSxii{|eCW9o&%?toh)1f-?7-jKLY8^GNWDVC?<^=TYTzg0n!S=t5eeL|Uq)+`ETIzZ}RUy^A zfsXi#iU3S}oPs8+3?<);GrvBhM)w?6god|fRtRWc56LA4f6D_#Ud)s4O zsVAu-Md_KW2D{=+wq@L*vLc7;)oUtCOX$j^$T~Bq=gDigMPKO!%ZMEd>NL@=iO?Sf zUVQDIS)+4>Fa0Q)U?3p=TG;+#xY=6RXn7g`fTz|!;7Q*Z(DvQP{f9_ta-kCbKNQ+u zy91kRI<6ZgT^N`k{)b}vH=N?>83(d`c7t^tb~qZ|!W{Kwe>wp%9a5oeIpQpP@mUjHT`BQdkR1INZI(BTKnLVNyeN!y5 za#eB)(>|bgjf_gc-V1+FEV8TA9v;>Aoeg-v?DSD4s0B~!a~?9h@0EymEbQy^4-8@j z&!W=vrA~{@Zd;kbR@1~103A6)W>+hn18s9~QS#)SS>18k05l1gY+SAYUT&mU0V=}) zurgOP61NEiS3Zop297#uRBvzdWq&K&`oN_++W(MHKsjem_S`uF*88MwdLnlzgq=_xGsWYkhxR7`d@ z%QfGr>x2mouBYLvX5saMgyG?L(2iVi?n9RA-cxKp5M=2NSchuX)-Qvl7$h0OJF<6x zE9w4#bF=GJ>T}KF5TF4)t$3CIV|uYOIj=5`xtP&L7S(SEk{oEQY#kG*-bPm9UfUS^ zwO2}OQ!2bsM91Vja<`Be%AC;exQPm7V6lO!NF`40gvRve9b@W(K{bhBQ1-i2o{b^R ziq?)bdxm5*R}eUt73%df)#?fdID1zT$z)u>`(6VRSVsJ2F|2BIWl{8yO-V5n{%f@G zH?cSD%K{||%S%))3^Zdop_RC=fZV^@c*`sbRX%|SczvR8uhnY|s?1-dHpFhoUvA(V@k*#Di2I(V*aq6)ei_E6z4>D>c7# zVQNz}vFJ}H^U;KK5{||{CIVrVtcB(mGCc!hS0LJSH}@cXlhapCN(=uY(w-PYTi?`? zuu6l;V5T>x(|Si&)@ay#Bou^gsGs}s5q$=`P*CEbIK&EVyRHcz+@wIJreQ-RO7E_k z4V*pJy(HnawWGy$GMom-f<}?LRPYT(=WIWTp{Bu!Uo7c=UC4>a%R=`OoCKsjy!vDY zj=IBSsJh|OxArzsPDp&FbJ(x`jL7Ol`Zz|X-$8X`db)PN!mVK9j?kCo|I&`xmEE6f z*|P5X#9R^E|e^n_~tVaEmeX@`Gh=oI&0>{YuTc_Cii#VvC z4a+7bYc>;2^E4)*x7rOZ{$HLv%&NU>c&w&I#OrUQb z#^e}iGXodj-Ktf>dUEcGdmz;pcZW2Xx$N|(NgF4zogRB^hZ7t;x2pR%w8^&ztj35O zxd;^B9hf>%%Jr_9KddP3_$n__>%O|xv$ZB4^F&RQ0VRaw4{gkyN03)B6WbWOj=GB9 zMDPV|Y>cX6gzif5SC6E?M|GD(V;>_g0`HL}00$qT@IPJ2XJpGg#CM9l2yy~r=G5JM z97*^q;7QzRvG;>JTi{kVo-Uok z$l96|b3ex-V>pb?=|7`|+b6V){1hk}%jZ0+`kKqV<~W{tvR!+|37B|WBn~ole0e0} zb21@c?(@!SEzfw`ry1D_34kOJF5}WQfr3FCOz<6E0#G^>Mwuo-Kb6Lyz3%f3#-dG$ zUbNQO#?TL?b3U-s%et%MeOi_{vCoJcRZZCfL=AL*i3@w~Ho^NUSU6-&b|M%R3~Xt1 zyx0|Sp|+1WY{?0iK}Yi}eYyvCMzna=d&#;gSFJP-#6jeenoU(T1|3?nlI(^s1mI+8dc|D?O zl<#slhw?X4034)=`FPxL~x>#P&kL|;sKd`bh&U~OPzGH)d&|A43`}%6pVIT_}reI0^g!cD4f~x<$ zvy#RNd-48F2Bk6_A8#Rq%sW?4bD(s}sg@IPW0-Irr^H2k!iUs2!0QyS6*?Uq>Gp+B z(AwMXp=cN0^|;47mZ&~s>&@XCTg?`!nq!C;YdU|kH&j|qI(lV7o98=dJ|*z@iSqXI zCQN#~q`_U#o<3Y5w?^N`0#jn#RW5-=EX{`D4D9_vvZ{xaObWN(I~;Mtvc@%grH`4woMDN*z1C{`pFU z@8DJ5)et2)1i3JpJ7(VxG^K)?yJDIyhOgnY9&JOR@vxOaV;OqyS0`BA3kpINM98Dg zA&LnJl0z9H(bER_ux3Jd31W_zkcunVOpHD!90D4r6nngo{-rp$79NZkP;KG8GDpDV z4=P%&GF7!eQ|^&n_cwa<$@f_F>LPA(lnO&ngo2LtA|Ir*Q7Ia(D$5j3{-j_ru@S&z zpXyg_QNY2t@^K#BpP;)+Q|>vV<@YEzEpNzH;)s3MB0bZEwmWN{n@%J3o=;1SL8V~2 zK?=F!du0qe3BzK`gcECM>DnFtX*X4po!o^g9s*Z^&#UnZth$sLS}ISo-`Xy6r?#Vn z#<8oXosTx;bxBvIyfic`@644KdZvU~jn$xMsWr#*h#G+LF_y1?9Lw&DZdAqR?}z?7 z%Ew7#!nuw8#5c1BwKTP%B2YRUZ;cyKE#5nV;vpnc7T)WbQYVFiimH>CTKBRlRqxrE zAK%n7Wk(r9?P?@u+RMAB&3C0(+L>2;{=2=s5%EnmmW>Uc7p4+DM3d32B}k>Q;BjBo z&X-Nq>$Uk6g!SHioc^4UT)4sy`pum8&65D!X)ppg)8ut@fQ`Pmp{{E2ymXuo%&|uq z;08xd$Nny7g@V`YsJqTaWMK;ot&NmVA)jtS0j6P;&))I$Q_o@d<0M zx39i0#bP=W*^$@OJj} zX}jh8hp&G{Xk~87=@D6!zhH+dxOjUQ6$1x(Fu}3Nllc2eB36*MdTL@gq5xR_ zdkLwnwPe7&8pJfmA09Y2^5k)f5%(gPE#xZS^Dl(4RQjPwrNX(!ryji~^v}xgdjtHk zr3ThGP&%YH6!E^xjiAt|lzD5{l5QtsPR&;%N6f)d$a0q%KDvah>yghk3}6XgE_n5Z zLGDvHg?yN?j_hB@vA9g5TqqVH+lQx(E6;9bZsy+vYIXbVI zth;8DC{aa1W9xHBxk=i-xh_0+h##ro=^XQ!eT5aSR3=rtKeTr}rU(B#Zchh9U-M$v zU;T8$Eb-i>4rh44t%pd~FJzppjw_$`&QB;FL6qS6hWnr92aA*J8d>9qe&QYlu8cgr zUm_0K7^J`7V(MR=O;qG`>7$}ek8%E(X9?~-*C=`!MqXaaJ~GU{G5`>oNBY*xgH0X! z<;y1RR99y=nQ!ThZU)bW%@=P$4J@yJ0d!CgZp~l0*2A=NI`x762Vw>`@ zLn^EU?tHavilq5RbMz@c-r!je+(8vnau0V;Xf`T%Tm%%sIhm#y_;h6-zoT_rYk)Fs zmk42@5ri@7vgAr#KzH-bN!`OVCyyRZrAGAd0 zbTvKvaYh>1?8Jl+h5Px{4a*d@8MAt5uW1!jGXtk1EG7nR7x?&bvsr(#jJFnAXcgFW zD3(}%blvcuvS0%t59<V)9It}dZe0B)k~Fn#Y7m2UNr%Z@`-nv z@fITG=jD^sMVjp6^HHH=*u#%wnA_4LY( zua+~L>@M?O-xj4}_HWJ*dFm^S35z<3XTE8UiOJQM&wc@^E&%{LeupC+ZOkI-+Hg&2 zls|F#^)~rPiHDnlUWmJ=F=Uz8KQ=@3@sKZ(0WEh}K-I~0c+!LtZ#Z5C*;@S*8uExY zbqAtHh;=k*wGnGk_-wjHk8Xnbs2kYg1Qy+ z)LmOe8dDC-yy1aeCw_wV8&6rLv0`Vt;2Thvh zIfv;o2WE8FRF+)9$>|Vs?y4xkO-28WjT_+wvnA;hj$j{(807UjwbY{jn{PMia{;om z)fuxsPix?o9+7MA%Stkxl$vl>WhZRF?M;pBy^2^U{O|Tv?g^CJhf3Lr4h#Yauz&YO z%r||}^ah3W3JazQ^bMvofdDf_Kek=Ozwy(WHyG3hI0(I;-=SIsMw}>s`uhNIodPNh meagnMAUGYw9~avH3!FyZ`@})gzV+X2KS|MFB9%Y&1O5*r7Xdo} literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_gmap.png b/doc/source/_static/style/bg_gmap.png new file mode 100644 index 0000000000000000000000000000000000000000..039ff6b78958ec0d20add9a0663cd275ca7270f1 GIT binary patch literal 12925 zcmajGb980Fvp*W!$;7r#Y)ow1wv!V~Y)-6+GqG*kwrz8g318-W@4LUd-dpRv^T%Gb ztE;NJYo9)!&#CSmp(rnb2!{&?0s?|4B`K;50s>0e& zM2Hog9n7t4%|JjTBa%~~RaK=hL(e;>A{J&$EuE(lD26Fzpp>lIep?C8v$4^dn>j*A zV4$KZikpVR#Yp3;iei8V`NJK7;;1e-F%i?!zQz@%t@5{f{mFLOex34Zw%;t%aGgR1 z`HPu2tq?!~B6(;A7uczL8m}}9+<@>0!5tw+V%5U&PR%72^k3O~z5e5iJt}@tH0xdI z_xWKUmq3uy3kCuw%oUT>D=qenu$M*OMT`!DK@?$AF?lfUg4#C+Rv;8?bDUP+lXZ{Z z+ylPtEF83n@<9N?mo|#w2tq7J;^c`L6|_J?Rt{Oj$B`OV!`c^9Axeq2TM2_#R8%y1 z?-cC1|Dhaq-<%Nm*JJHg4^R*2ay$*pwvFt=^au`XFSVq0%%gn8GZs3hPH5 zBcn*tv@zpjVjA{Y=YmB+3A9^Bdl>Z;Jo@21tDe~7UmBsjw`UB15+50zC>sYlx2=px zkmzgTzyqwdX%FOwKdffj?K#Uao}0{mHI^fHL>dt5U}#_-hcBg;WOHL-1Hu3AYx9lQ z_FUH{UAUVaP0ALeza1gk0sY{w5pGLAFp$T0W!Zg1XSP3Jj&A-G7^VyZGxx{{#9&Uu zhz9|1&c46^R3>Qk9zrf~R%v$9t;m;a}t<7A&hXiQIWDZOjD3F5so;r1JJ)hy^0tZ z@J_^pbD6}Cl!mcyZio8J3HaJ9K_utHxXrACcfl5p`DWSF=M49Mqhu#I(4{{%E-z z`FDzgCDN*7W#EfZZ4tO)ouWRXCbOz+1a`qq5?=ZJGxR_nPgYOPHVnNed!eUXhrHn# zm6K|0IB{4L|Jr^grg&^5a|TK*m7k?UY$l9G_9m1}*R`7Th~+_zL!Ku2HBu{xE1N6* zE0nfaZYaDlSwp*ycU<6^;EX8417>^9PGOG2PRC9nEwCHVe6ZD#oWovw`L}#;Zr;pY zgnmeZ!G4iB6p|T_k;#Qjyw0`&Sl@s*vH3WH4nco`=#^$vQJK&suS_g#HZ4 z`r9L#t1vEICE4WHz@}0KNHzid6l4_jvXr{!S+Z#pT4_qSkwtPQGs>2b4Jr*rYgAfh zT3D+dt9o_Obs}}F&XLYj&bQ7#oaLOG4+Tc&5BU!g51&Sx#{^Zs)1A;&C~B5FitZ-# z$%t8rd5NVMXYbJrdP&zLbSc-&K^2-v`$*4^(T?elc^`_5VI>=s&8TLoC8$QKPL;}* zDXS5vwU&vM9joQco1HK|V0glIh6xlF6&zJks`8dCRdbYA2*rI|AtcP7bg?S zm(15OkycTgH*m=}4?5y!2uz7q&s!3z)UU*`Be;aQ%v!5ln+6^Lv3XW`5P6V+G;Q*2 zr)|k?%?~x69G;_|VUJJ`Mm{p{qVFv4cu+L~$)L+SHNCkcLo-Wp7MfOE#-bd~%wsIp zOt`E?IIlPkoc8R#W_ea`a_=a{ZtJGB6mf1LmqrrY}W$bA)b#DuCs!p z0bm)>^P*yR!mY~9_bl%?;lO$0FuA<6yr*&1b!c+RE>S0ycWiT7`KkHK;Xcq#KuzS zT7# zLa3h8YP4TmT!brEQP0sM>k4f>0@71*kTwbYVP%ce8qJI`8Au+MtKHCq)&i_dU6B4U z?GW)cdYgv`3+@ZHz=xjloifc+F?QOMslsbcdgU4AEhV+0f%Ht!fx9i?lOFvvuSAlHP zznra4)f&@W<bYA7X>0bQrF zX~~|5V2-chY36utU~b*4%1rK1`mlY>0;wJunZQY4 zPV={Jyut0M#8s?&j3xGn>huXCQhv1?bo$9fMU+M1R&n2kkKMPvR zi;HxNSZqCreW9cK%)`*~EjgApY}R~hqBdiKu<~#U0-xM}j(1hMirW;{Gwu2EU5h%> z)-spD8Y>*qtmSM8?4qpkY@e*IuWGI>KL(#u)8!kd>gri6;HOZh4>L1(b9{W;qP8FJ zz>fPop3<*gvB&^hXIiuKexpxw$>Z6nfIn>y+1-1Nh4*Efi+^l5mbI&O^c+s>FKj** z@0O2z%Rf@^mN?g-Y@vCeQjxRqoN?P6=iiSGb$;qBJ3wy`;v?a8v7)iEc!v2LuWlx_ zS6`cLW^{Hxf*uKNiA01A#wHP}eH1ltTI2pV!9X0WL2^Y&)<9<`2a@4mo->5t(?9S6J_q1&Uox~n755Rouc(8(p+q}FI57l| zj(F)qh46r{^a&q)HvEKuzYozi6=gRTn3(KTg92Fw?ZN8Z*tpnjZ3U=F*1LnANE^%? z-iL}kyh=^*J(k)+eOiBl2xvpz5%3~2eKFWfD^*RPrkpH~iGv-Zk*R~R8KZ}t;}@X? z0pauD`4a8SfJVd~cDD8|JRba{|FPitlK-gzq{RO*0ow4BYRV}Ri#Rx&5wkP0F*1<~ zz!4J@^EsQE^C*jo|F`?s9Y3ih5a`GQ0JyumGrF@fIyhSZn7O&R0Zc3a78Zst3kH`T z_CO;K274E>{|fSd#}PGiF>$tX1X?-R6aR~AWbEJyNl*U|rc{%f3O9#;RSCwrIw zp4Qg{0smS6%#2Kc|B3zO%J)ypqiE$}W~(V`WoKsZ^3{g`3pW$rf9(HXE&r#-|K+Lm ze>}Na{vB`T!q0eb0! zsH?iT^t4?tPMqX%n{Mpc&k5fad-SW6BrUL$)|^t>H*R@YvEA2A>R~ju1viD}6y3Pc zsLMLY4e_UH0|py9ydd#CqDMil2HGTD032B4Q-@7X?Ct~;w0S5F=<4eeo_S8=lfGBw z=ayGi<1)@4D=I}W1$@vL5Q$tI)RuCb*CRa_2re*uP*hO+aNG=-#YJjBHIy}8`HJ|M z4Q%ZAVfvW5Mp97k;+_io=&7NQPW07TZ!K0B)X*v+ehY~*awLy|7pPQqv=A>RC-Ceb z)XlwF>H&R^65i_2a9_{=J=B-c}|(&fkGwWi#zIC1xkbGIZcuj%hN%%sjIAqDU64OR%^MGh7OF;o#z1;i|>8SUNNt+GAx~=lg)Vn-u#COWgby#7vTGJ1H zrLCoRZaW}qF4<}*)RG+X5G?G%_s>hv8^-tK`223B;h-ysa{8BsmIp z%Pr%p;o=Vp#d3UwP|73?_#cENMO!Q_e=#cQ)Sr)P-icu}&clyEOvpvBE-i-*!LG{6 zv7{);#%PT`u5n5#A}CqvwU=E$C{5jvhgzh?b@1sXWZV)l;2D_FcTr=&sYnX=z~Gn{ zX6rL)_6YC?8e!2T&CJQh%*;=Q{^a$E;v}3Sy;V?(@Sn*r1l$yH$7UbV%G4S3}GQ%S*;2Mi}o~VRyF4q zsnQ|bc1Vb@=iv~+FCxbisU~A&i;pGCZQ^SQm^v;RL=Ga)-Z*1`qu&|D#J#KKwO~J@ zMhLt4b~V5}&SlZNw6X=%RhPE%k{VD@he`tZK9U(3@(Q6yGOm72JcBp^mnYb9i&qLv z(=&fgQBp88RZf&B`dYQ+o{Uqr@0J5i`1_HI^yU7DyA@OJOB|G4h?c(7&%OQ<;*gYe z(Bgo1-+^M+KtR3a^p0mN@G~MdnLeQCml&#Jl48krGB|{zFx^SBD2koN{AnU&lH9Jw z@x30c#3!!-w4n3U{xoUw^>9R#odQ`~i zo-QmEfLHH=lS?&WFfNO3R7Si^c2@!0ERl+O(_=)ls35VJ{)bWon>iZ5jdhide0coV z0;{OoP7irCM~cu0!&J&q^vU3A3^=DK=23VZ(alUm)kT3mTG-84XhvwH;6#>NA^7%M-O7Pvz zSCf(Nr=@u-B{ozlnW~w%6Jz3ui>HDdvK88L*vABGqJItP^G2_>8;xY~=nDtYM~03y zSZ&Du6xbPyqyY`hbS+-_OVu2N9W=aKLVwF6CL| zo>7{rqXxeGDgI*xqn{mfGt946=x?Dh!~n_Nt?cW8F}L3z|7QrBZ;lg&=Ebo9D@;g3 zCm12UqBsBjf>pGP(3LPjYLrVWcn1=SR!T&X^W|?(&*NU69Cw_FZk#PWkkHk{#MGpS zcTsrRSG5g*C)UX0u{CX-PG)4wkld;nVUh@Z3jRbe|EdxqrqU~O`s8{0B9A2YbGv1705ib=o;Rd1@J2M%s9ugcz1F-?n(Yy21@rJx97}$0m77&RWRhU5K9VL9RF%JL z!fEQzZjTd2&*ys$GdjuevxiW|CSupv!x7ClRf@PbPB1`+xye@WxJMuVqh&EYmlFQN z;r4<@9TH!ptq0)?qb5LhQ~$dKFK<1T`#PSi2bD?0ACHvv`!j`vKbhkzYRxkpmfHZt ze^I?2Etp?|Wc8Nev~+YC8-CEu^>RnOIF4Vu&kh{4i`@oqf1>dKroj@Tg~WQczdvLj zrUCJv{Qs-h?jkAc3b>2JW55+<#Q*Afh7Cfyilj^+_A&9na@h!lXOkD^@+v*BAk!wV z>5y9@-fgc=FI^+&FaG7w1#7km8FOoZoC#@szR#L!XV;y8wmbV-mu3jkBtu3dO4uOKjJVD5` zf&}w!Ee$rSIp~XEk;H);{0grQmd;;ZX?OOVsK4Qq@2gxfKTcgRw9@Of?}yd@5~vw4 zbc=6sh&-&jYjSK6U0q$>5Orf1gooI`fl3WJT{VB~NHN{_qQsV`=6ZcO*-3C5A;h~kQK0quFI>1Ll`76(Q`pl{yx?k1!QmJNR6!CQm?=hbsZ?~sEHhm+h#5R$VjD;6SyUdOX=^3TFc;iP8{ z*d+`9`0RF)?=#&J@7t27dBPTOPBP#HO~0#QLK*H&1yImfktMxUxJ z6aWjpD3>on-;&FMmVSMJF(BdaiB)vF@>eW6`90Lo3S8)$3239ku5r>W$f3cYz2{9n zD*s?u#3kuyJW_Dk%?}AOwmont=g&K@>pfP`2fs#{+ZYI%)#2asR;msbl?-YS&XlBS8Jhf%@Km7w`KArp#9%;B!45=8ZUK zBW(YM@D8_+75RIM6U1yb-R$|~JP0qBZv!3UL2>_OyyV3AS~$wK;7se(f!*6d7}{MA zoY*y879VUIxNBNqgApD%GTEcj-IzJquy=U_D=HRLhp>AofB0rAl#y3kRJN|s8QTPY z-}6cwuxq5NHY;Y`)XtvR?9v7p%1i(&qmkVOj>^IT5b}wQX0*9R+ns;LSyZ6 zCEp#HnW~WA(GpJX`E6GLSQONEEj02%uiE)RZ8xcFW%CEOV-)6edtLsRWt;5O36+VS zkRhA-@*Y?NjN=|GMYtI(f&||!eFc9T`uG+xA|W0?E1)vo^n7$eZ$clB> zoIMiUgFj!Asv`8ga56Awp`6nIe7wvG78LyK34v=k%p$iV5+R~w;Q1Chna6SO*?^&k z4fW3$;l_c2b6NCkC_gN_N9yDp$$nX7o$a8}VreY)pDAULi&~!OqR+Wz;rR?^3myh@ zk5D)Xp*pEK-160+r2L~~@KhUl%`$iTTuDN|P%#NRnG|vq*2N>TC3Hsd`Je?AsOBf_ z(GYEB6|G1*h;XGHl;ek3EM7h5&begrY*e~bf2q0#^QTs@88)WtE=DW=~06Q=0lQvXu!n6Aq;=t?0=I9xGaJ$ z4>|Xurlx}|w4yEnc!pf1TpHih(?>yOl0Y~Z_FBUk1K`oB2R$*8inwAm0UF;1pdkO& z`K1OezF^bzxbOQvb1dB)V0czK#Kl!YEK%=Zv1pg6xVmM%9jQu%wg73KZ4uPV&+2lG zue^kYzSm)6YrH5!tK*Gm9M)O^MSM?2aFMRbJHbDziqJ+-1I zc)v>u(m}Up$vJ%25F@hbl9@A5+eITU$VoiFBC;+4I$#nsG6CbRti9n>&IyBS!l=Al z`|qm_y5((t`<8v&=KSJugzCbG5;wlTo%Hws5S(j}NA|~n z{(c6rw&Fb!)`z(dZ9rQzi3z$(Q8!^1FOL2Dn)CBe-Mnm+b(WnnD7nL4f?Y{J(k89= zC@*8&=$ic=2@h}lXillzz{}OHCW~touj02>3B?y^Qi3&_08-0kIB-Y{wb&=2^vLRa zjY1_)ll=bGyxft%%tJ?xuu(6K*gR0ZPA0e1GA9^v+-DRdL7;(0knu= zU5+H1DMQ%_Em1?n!b%}?v>@$4oXz5{(xr>}UM}p9m#5CItE(B)lNABNDe8XI;#|oE zRtfh-{Tf2ci!cwDtrdM_%E5~N%_8Q@oYaNfL2YmUuzc2cM%#lQ2l;7O-Q+e~=BQD& z>RvhFb|PCDT|1GHF=zPvUMkeLoZ0XP(p{2@R>MgOPrENH`fDMn0(bqTC(ROlibs=y zcV5g_@={Fe15@gmzg=V1uo@l!|Dx&V(2ssj+_6$CKPZo_Hmeg@oRV1xtAMPKF(f$xmscP@;6~OcNzQ~+f0nZ z>V&LksbZr(664*rSIx0P+`^3y&LmM$^0Z~l;~^OEqaV$;)_!=wKLMe#ja?1YE6dYk zCNgoFh8=Jr`SZX>e-Dukd>wLHGNB!QgY}qa2qwEpOh|Kj#qQ zvHKm%``(QRnCLfOdOnMuj^(=1t{EJE8N#d{Zka!)|lTZ=#=7b!cpT z`V;Byv}g%j%>|O?B#<-b8fhBo3j6bo6=-eGUev8}9BU804zZF3lOGQ@Aql z0Cz3Mr^oF12Y3>DL>hJ#EMKT|r%gX%Bkb%Nu&ttyaG`hBG}uh**z3-u9(x6rRZa&b z_`&5^H}@c3qE^g)Y7Z710?$A&S*|@WGip-Fo?4L!&Vik0P5$~g92{~jZSD2TX8WbO zyqaaKdB+!50+L(7(z-#11Y>ijz;bE=50&dO)qFdPWeP4m4)3rd8#mes2{ zRUO(LqqCFuMAlRmKO>U8^qAT6Shr(O=gEL&5lbu665n1+Xv_jx7i=(Fh)(`~2OUU~M}fQofvjDrdu?dZm-fb9G9 zaBU2fQEw60IMh};PbE009ZlupEL!NAZbst(l!&Nvoz$iAW$oK!S*l*v`IT8U@eAmx z`EB+c(MQ-;$_}K0hI@w_EtudX{!ja5X(r+aA9{NxH`B8pnslmvV=ZQ~jY;ioiuRXc zhG*sem^PfiZJ+hkdt@={W4DWK;7;*)VuBN{D+HYCw3Mi#b+58-O4)O#+0R*^GmZgv z-!G3x&YeQG4LptQ7oui}xa_TW8qXTsHWzEGPA#K*OI`0YZ6+xi-w+S%SVa+;+Q=fW z&O;QkyMn1wmH8xnmDTjUkpL4fNPw!@{;($fOClMuA5T}mSbVU%(%>s-sxlzk18HdiTwx-R>@$yN zL30k6a~3SLv6DmA+xlG3hcM5Z_YxD2ox$Ir3yS@I82HG9Hdf!CHl1vuP+peT?qn&{ zc1n6irVQ`tv?zE+$5ik(oB=Hl+sd70=i>1p%#cdTGY<50IFK-rr#Y2!x5U^JvfV{A zLqOH6ROFsc%i2B#Iz?Rtc28HHkgH$1S(_c?FH?dh<8udgf?U@71rA;{?xn2XC zdyGu+nP;axosXV?UyWH|swm^h;^`kO_N(Q`)VnV1IW$)xvMpc5F2vi{VO&2ew6^~hHLLtu3rGG8U{J-^d(OQf4|2ebs&0x!3F_b4}B!JrN!<3$Eq1I;N@6|tMhyc zW}_SOo$%WBqO|Fm+zmM`EKizjR4Ofu>F*mfH4bJl8KnGKAE+%qCH+vrbHvFAQa?R^ z6j`GAo+%RVlaTyN$S-$Ce0d}!;gmE&LRNL-m$6+-104~nUoClimV6xYOdu~zvI_bz z)iDG6(x@y{vJ#m4U!;fBMWws(K=ppHY+e`GXd5%u{z}H?LgD#f$aw>YAv)cAg*>(7 z^pqWblR0=Ixep*D({M?Qk!y{tFxrnloOM3_I$Wl7YE_Ybg#5L!+VVOrc6J(SP^%Iq zuUD0`tHL78lc~joXwv~j;w#EW>xIzsJTd5+oSB(lKdtR}H(U^nzMK7o;}+u)Bp>Mw zBl1)swAl{ljgpRE^n*P1+9kHX@mb~~`+;_jRDEY;&7{D_;-`i9$r;H7E^0tvxd1ll z9gt&FiOFWP^r%{`xb?((6+adE&6B}%!wae%NWyRZg*~G{SQ{O1(zMYBOU(1*1=d{_f1UXM~2-T+?pKt~} zsc?67epuHPUw3&^dm(_b<)@&={tJDBr z5ASZI=)eQ8Ti8bV$e=%V@L~ogkryBOs-YLw+Fo}V%8!?JSBX0z;UuY1od!30%?jBs z1GrK7eFYDgXVX4B%8U8N5eJ$^QgySoDU|`I*NI^t$#sP7qel9YWw>c}S!(Z!G)UCZ3`c44=1MZX!?d32{W za}qG<1ig8%VYAL0@0O<8BXa19(V0T3zat9l$62iWHC~3F?k@k zb^0I>giZE%uwd>H>>&@=)E_Heyj&-q+&*zKk7S&=J@{)(8`P9fW1AM}Q@$YZd}O0_ z>gDhIbQ4bo3xJrUzU1!(!Q9Oa5r1+bKX-8@&?5J^9Aq^J-r_VpWXghj0r+^fWPpCb zIOe(UTOI={?-ltx2rCWZd)148^YwJq8xVKH^U+{z%=r|>>6PkP zM|tIDHl82Iq)`H)oxif$2m$!Iu|09nvO{%M$anrt@}iTw9;RhY*9_mF&k^IVj0z4+ z_m6R66RNOk_w&v2rn>a(ew2p+yUaS#)i-^1&~H#WpkC3Y>sMIU{pY~iTm;+Gs)&RY z%++Yp)!IC1rZ9BHm2@HMoC64r5_cA($0VCbOlzR{2kB98;%@}=`4<7cyij5kskiUz z26-tQ6BPy_*$%>j5!3zZJ5&e~Cm#@el2NTq38t6)-+^8Voh`V9z~WJlTXfuq_T9Ul z{=8*p_id$L-i{33AKsj!Odwt4#eLfy24`z$syf13yAYoFR;NhM9^ZCB>}IVuC5&H& zcBx}BW4!kUfThiYK*$1E zbm`S6{KJsdz{LlS!f4`#s7_^6g3U#X#e;9e)m_H9(qSm$G6c>_TbJgx7@utUf}rMDM0WNFT>ZUofl%TL^bhwyipdd^VLS5A|CpG2q7 z4zg-)K=@F#1pd1ERY>y`c+gZq)cOFAc_H<(AWRsU%KQWixk&L3st<$|2{ z#8YW^x%Z_6l}U|qd6(J6I&NBpwwAe=z0a|sLar;6Xh1Z{rvF6ZCqA4KhQ&_7iI$2L z{BZ9V=kuM3_waZ`6hHfn>8*81)bjqH>ez}EjFqEdA5jVZF>@b&*UT^RZ!Oo>ZK5eg78Z}n6Pj&L{ z#go+03(U?B&5sHjS$pB-x5HVTx}LbOl8pKw@(`TWV&Q@mGu(alJvX0Ww*zmk*v2roJBHm4P%y{8L-X0RSEa`=%vt?5w-CSgLQE$FMafHN6tgJ)pck<>x z1aa25Z1COQkHXx|jG!F@eg&xn%f*gRLrBaGoO7Wm3|9`Cq6%stNO{H|2t?(9<4}{l zy$rs~UHHxL6b)-(aAh#!4GbmQ7l;RhzFGfcH2;3`V&?P^(O|>mehQuawX?|`$X>^H%yQ6fGFN$|{;mmu@wkM6iJa~Ql z;G=~yI0Kq z2dz85=g5|xoE4&Or>+t{KTbQN+<*)Q71}%5?IOX!)tt$FASxX*w_A&8jbs>FB;Yc< z%l73S#H7345=<+#UW8ZTC7%_(ozUD~ecQB^6E zNDDPh2MUj(#4_uqb3 zt4Tk_*WMS-Kbciz^4sD6Q&0a9Q!;5e{ng+Ip$!4HL1vBG-xKoXZuie+eSf;~7^VT{ epZq`i55dd#-u1Ll#D6cBfJlkSi&hC62L4})u9*M; literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/df_pipe.png b/doc/source/_static/style/df_pipe.png new file mode 100644 index 0000000000000000000000000000000000000000..071a481ad5acc154ffa90b340f1c24cad2bee958 GIT binary patch literal 8673 zcmZ{K1yCK$w)NnWAVGtl5L|-=J2(UjuE7ZqTte^z90=|X0fM_b1lI%$PH=bU;Bc^? z+`9F@dtZI8YI?eBrl)77cJIAPh}b%@#!qR&!+!W zVK~Tq_yPdfJ%0;A0y8!l0Du5lNilV|?}us7_iC1NSM6m|iyb~^P>3Lt9 zuvhZ#)l5V97(Bi{Ay_;6&%;eRp5t35`k$7HBsx2Z;v0*@y677S(3hZ76Kjpvb^1N^aVO{-<`g%xA-Ee zxAXM#8C2K_NqY&K84t-w>LXAp^W29p1fi0e1JI;F?Fj7wI1NO&=cirnckdigR6%1q zSb3xf=C=mfY7GzE>5skZq>4RUJgIpqpX^4kdA#?>CrJgziXc#rd_FbZzI)oKw+!c3 z`t>Vkft)9oI&j5VQ5ql@KDd=52Z(Z?w_GLObaKhw8+5X}O1mFK38B@q;%RyH#K(2s z{B)W6FpnPcFgZOFc)Dw35~JDqV#A6s`D#nt2N;`G3EO0+B>f)F;#{j?8Hk&Y= z^V-estjvx^;GDx<=J)*u{f3n@)cZR!)^I4)QZDsn*yQ-~;MCpOD~5CU;!#25>^nsuv$BtZ(A_%my55(Hmu6 zfB;l!ztfC7dM4pb2OB?(?7O%@#0EYaw`H0P;GiTYzeu5Ul|9{DTc~DtM7UNv86=F3 zIr$tU!X-_;1evVdqTZA{c|AgY>e}=PMThm*J^L%@W;#CeCOLS%W6Dk;%m0~CgCs8G zk_ojmxwD5K69MoA;eWuCq62*SxZYk_*#=<{kYb>T=8HzZ2$8IGer8?T|D~EAkOqNJ z1UF}8n+c>$bh&muS&wfINhgR-F1y|v&=@k0AOixD6BAp5#Btj_AMsMwESRh=qNqe3 z#)OFpH^|?-#8lI@M1izlBff2}>n2?rIFrXRD@i0S*K0cqc{KC??nh%7c_J+$?MPiC zxQX^-26nV*8qKVAHYYQ@V#-43$d@{NM6zHyhAOXaMKqAXfp$cOZgsC@PCAP1l0UYsl)9DL*BJi3(ph<7hI^CLIr&*H*3Y?T>&+K`}sm-$5qAcX5;qw_MQMJ2QgR4j}S!1jDk^o93vn*JX@SUf>qTC zCcnt{`W4{bbt@9(a8Jz zK#~IxDfyM(HBqta1CL?p%vf7<+l&BtH^?Oco+IODT3+#Z-v=U zb8~Jw&zu#)-SYKfBK9kOnl=6?+8mPEJ?VpNM0`fmm4-F1W6iq{=W~Cf(d9o*>(R3r zct@?0?p@>TnTW`%tLBmWJ~cZxNDQUvMaRH$DZXFYSF3a=0wyJYDb@ zf!`7GesT5o{#En&bD2tm$~~Y@3&}!OEr!g^Dl`ilw4J&bI>L} zpia!z&~@82%)-Bi3Lit>u<|(#m~!4PRnFViE2@zFL_v4tMtqDH!~|5X;zi_hUihbj zD~Gqd?F6jh_em-4C1%`+07tvXS@dqH7=N>6uEeB0*F2Y#Ve0$*WoYv4xD^Q&LSILV zxuS7l8E-lhAs7m7^PUU71E+cwb~@Fs8f~SF7nY3d4GYg1SzYT$D=0{FaxUpXlV^LU zKC9Y;t>I1TE9oiHr#;`yvEda8<_dv}54}ovB_G*-Ove8Kr#Tm{miC|5u}#@~!|_y- zSa%oQ7$Dw@`1OY&=V!G+B{IVOV;P(j{hJTq!&RnQM>Zgm#xQ2rl|DVSH0oL3@R+Gb zzK{xNFZmRmyYXwVwTGf=A>kK5(Z##X&1FskfdqHMs)wA0n&NI98ydV8d_~E#Q2~ae zL61TP&c1LWI8^hbl+Id02>_W!oqm)KLQ}KxtcwzWab$!T=v!#5cUs~?b8q|p%z*}S zMr&WzF4?mlCBx014AASAXp}xjlx=&PisbMU4}mbGR3{@lol!}%O>r0{IyomnI=xxq zj8bI7il5j=v~>V7(DL>=n?TAeIBtR?s2e4lfN-!+Cnd8HG4@hf#ep~YU%#`I{=m-^ zv~KRZl?f0i`~-X165rx)a+oq(m?G+NzDR(1$ID(~aslp3!E1_f64g5{r1SF*$OW4q>Tn89${ z1_3y@N@?=65V9bFkO>}HhzD=S4gEITwvdUsFM<@%X#qYZzxz|X-cp7lX5J}LY8Ons zXS5G?!rf-#OaJVOEb8Q9Wn~b{{D`94#^SE|n`<))actH+r-m_u9yhG=~E7n%`VNFIF{5GdmQOb{`4hn1sdlT1Y4CBh3|xt=`?46F3mKxaRAFjTdmUsAHBLSgrt zy(&J;!y-i5|q*<8Ahx2LZ`5}9vao^tt4vRNYWHNvZlsE+m9 zeiM3T4<*Ens0pcfXR5modfZeiR0`4?GR~Yo@(ywk*VRFANbWwJZH5ta^bfQ0nEJ(e z!A8%81~9x2GGn>1LZ6qLR&-{@Gg9?@cRnr};3mw{Ts`o8;XRo}PFJx^R8_W6AKHhoKjV zt;nKj#n?2#=`>*<(;P%yxqp1`zBQ+Kw|4K^eV_d@315lK&$%lXCwt*N))Yo(iA|#p z^=85XcB{o0u7CZe6qphRt3KY^A6X0*FZJu8(G5&#sF8v>8U9B*cjE=JQgK-e|BKMx z^%FOX4XZrOf!FBi>S{}zd<~P-2Ac{cRWbvw-3A8c*$dtJ^oEft#^rt{fh8N~-nI*>!4> zvv_o*=}_±&V1L3MJu%FsFj|7y6)V`g2RB)a$nXO&G>k0}K=84q%BcIuZRJ!4WS zIX$i{S*Ocj6-)*N{E%2e5(Xv$OqCGQ0n?fWnsLc6)M!%awG+4RDI~kcRLNOec0?}* zyZ!bmi0Gb2z0A|L8u6EnyMr9LV8pz6<62|!A!qYf+mq#59Jvy9)^bz&v6B+N*^*vMy(c zkD-y>D{lmhO4up`&j%*o!!ITaCs)N~wQl2h2%hc~)5`h0|5t!?V66~*xC0n^r6*xfcB>cRPb z!X?K^km$zq1rgHH8^E6)a_n>&9*Dn)hdv63q9OWjw#}l)w8=@k9SXT7awBH2BgWH1 z(i96OcE2ZozD!pyvlnTHIgOZtz`Pu8HK8KzWkp_&8mW@hcut_`nQfGoRVi_r`N>CJ z*$Nq&5UGmR>9-^^2kBXsq@sK)wG9pPUE0sz+Zni;oED#xc1=$#%vm2>78k**|0u>~ zhmum*cTRc0BNvw@CKsAJCbV{Wk7;7svT@_m%3K7pUYs(W%XL}rS3!`Lmhy{>>(-G0 zi+Nd+qT(hlQh*WvL@fGFKLrHV=j~f(nhh7y8zHdg7wr=2CioedPi!tzZ3%@Bd@K^1-5nAI?;*-#7S~B~P~-%!uYf z@7Ae~^EgF=J#3bc`VoRkxKx0W>FWU?kSd$)SN*IR5TPwVvkvI!9;3=>&Y^R{C^2Wr>fIlG%Z z9rL5AfTLTcCe#YF>97$m>8MvMbm5Sd`5@jZa1B$p4SKDyuiG`I;$?Z_Kg1j^sDn7i zxx=^1o3G$J$r;Up@bob{Gt7>b_*J{ehbsEl_I6|P(W;IGO^bYC9t?dKhKO(7QI z!FZ}zoQO=Z3#(?;TU?Gf6#4QV6L{7c5YTQgetdI2p(HYqp~K63yCB@A8WSH~p?vCP zM}^^|=x-bg9PSJAD(kU(Y3myp7Y-99$W0y!3Sz@bSazuxM)XT617Z|x;CG09@C*q=``T?H8ROkU7WbF;n zUx|yBtDHK8gA7xM*mccI?X`7U>@88U{K`3|L!Wn~+-fi-jI%-CoV?m`3CP1_1hF)q znx`HXFoyJnq?58>RPLz^^#SX6kV`MFfjM$ZLa7HhY<-T;znEd?cyTuuP#xy!m@dN zPn+fHz{tq6HWT9K;y&yNU7PtYoSLr()Q@`N>sJQg&C@u~C@8j}4$s>6_TY3vtp%mW z0^R#lgFhX;8ermD_tIG}z8-;;njFvKerh!-j@k+|YPS@?W8ede9<)9y9sKJ!Ccw4X zbkhL2Go)ut=$+w*>4Z(=^{45e?(wSXN>t2CNZ=eka61raQlz(uYW(Z*Zd+uz^JkS! zlHvWH)_!EA;j{lQ4gN!@IR4EExX}C`JwQ@17Um=U;t!D&*`&Gxx)Mq*nnl;u6PHc` zvxw)W<{DXY`LX?{=LL{fK=evITCk7yTBCJeJO#0yu2rsQJ4_u;2LT!9w#pf*5r)?p7_!E)QMq zS1*ow2LfepM7X?v z9t&7_@PGX?KWjRXk$9oLdjT)LjBw0v$8P@YhE$z0HQ^K|M}yaY-da3GM}Yx*cH!Zr zux$ZaIT#a6El$xgyFjMM*Fsr)S{(#gs-!c41xWJg4-G!Nnw_7Fn=Y>5iPAiu5pZr* zaBX3%^&_wtn(Cxqt3W=B*;YD*7V zr`UX1VcETu)ax`t_$T;=Wv$C;YKpo6J?7bU^yX#&z1j^4f@bbe@7VA1W96%sZU{c>$F`%U5UrsZrl+3T(WX}X>4i9VBP}A zY`uJHG@q@b=P7YjgFm=U3d;vndb*rvfWSiJG6?Ge*GQk0I z%jLEYY^bo1j3L$qYKA$wQx1P^GND6@c+E-jU6B-_ATDE$T`ac}>ASpXjJ`2{`+=ZY zPa#ck4}=g|4410=o#!trmNA(;xjgkEh5SK<2OFfQUVNso@GTN1%BWt$S_`*?DYxn$ zl8hTC{sX6poui?7skF3ONnDS^-%7%2rRMT7yxhnRBt%d<%4Po#2>q+clWiE88TQ^) zR#upB;{98d|8Lg=@xEOaRXqngqug-@gBx+(E%cZZF48|v@w z!7o$o**){mZivTM%^=9l7Z5A^g+f;LY2qWs(z zxRgvxu!(svqT6)0F5@U&#-zRjrmd^7*kNCB;jIf(2?#&}x$Sd~jF-2aZ43VT3g9~B zfExW{<^RMQJ8V2~VMQrwXpDy9n~%DrPmkun{$#CaQ=?fokVUVt=F+HER+NwP{eFbH zLfY9OB|i@(1nt!SU{SQqL&Wc^7;6#Q>`Zd@D;HhF<+`TJ9-~$jtT6}6?;4+H_L4HA zwl(Sg4l)ZXe}YuXrpfD3rMs=l@oCp}R6r)Ioe{F#9pa)_+*;B0$=qKYAo^;o%0L(^ z>bA2gK|BPJr?ivqbziP3XBG!k8BrIwukDBpZ!|EU2D9uueufwZg}5!9;{u}bm`d@C z%g$N?MHcpP z?K3HPI+t?J^g=e4I-#wfNJdQ;@iHGXAz0~D%AK(6(#c(xg5Ak8dSkquqHqtz#|O)U z@Y=KR+^sdCB56FXa6CsYO7m7V?~Rp{TLZtg`4byku`JQm38z){(XzvCz0YOEuhzum z`OA~Pr*LS=LOt?w(W$4C79;*8)~entPQ%g z&&rzU@`?T!S(50?b*i@A<0Vo*D8#{$a#(b0!AhBh)sK!44|V)h{*6sLe#doK!0 zzMqF*du$fj3*V#I2uET$d?LX9D&zE5C!|2`Vz$%8-ry|llm6V-@-A1VJ_?gklL)=_t0u_(*}jM!}zN%h6B!>#A>%e)66vOfoe?imnHa4c6vK`4(D4;XW zs>EK=<_GV?Pvs|#L~6j)WkTRAETYxZ$15%0Y$#?FBp1JOlF8C*gnq)e#-6#_+4RAZ z{DjXgab+(oFN&*>O+1F^*ONTwdLYprg?pJrg=GAXdD_1&vhG;6{dUtB>i?tjBNf&z2)t^Yy%%ky#@a?F{A?bHARG%^xhswd^q6 zD$K5W`BXF?#pnF6h95&S+H|+@;E2K<>|WDkD4S63t}7c$1GJfVePEmBE_0mPjK)SvH<`>(BHiPDR7oL_%9HF@@Nv{_*2$Od3FD!0-r$;kkjVhUn8%EAQ)GWj?5pE-W+u2(5 z3j5u^-5pfimVQmIY+XNcy2ARQFLqyG3lI?-j3Z?v+T)>Oem=OOqlcLCSF#AFNn1t=#x(brIIVv7808ehs8z?}_N0ke z7Y#WzT(V6A)q_n2p3gB6*n*HSF9xtkYQI}O_x^GPCk^O|L>N@Y1zk}r9UrxE5TSNS z*e)KencODQi~I{_o}s?#NRnH1oIr2V_cUV?tRi@aXZS%m`he~91qO@&WiQHhQHDy) zHJV=kgKL+>71}~K62v7<&Xev+KjV5=+im6h*ymrB?cs(lWrO8>Fp2Zojyx;vx>LAsftTR>6)>6Vl(=`KYY1O}uVX#qhx9JqYn zIrp6JxpzO$-fQpmu6M1y){h-g+L}uEIMg@*003V_Szh;1Iz4tvER4s|u$H?I0Kh46 zkdxC^k&^>zySvyrINJaK%2BCln0hau)Pu(X;4TDeqUIjmzP>+x*K-=i5JFX~kbFxy z8&`BCQX(R449_AC5l$~{B3yC|nf_`t{<5-7^Pb2NNpoz04m+QT`-wKc&u7~mYsg;# zSGi}HZ`_lg5f**b1r+^m2>-YWf4`SugmI7k27p?GLWVFpR1a}-t(B4%dic|^x+dG8 zv%;HOesTQpV5s6j%)bDT?qMJF;9qcH&y!c8cnSQ53LwgN8`Fn|Qt`+@4c-aAb6>&p z*ll%++}!mw7iNP@(`~Pz0JV)kj1+)VbZR^6hBkXToc5RIBfBTp>>c@seVUeT+T#if z?A+Xek@5Le6c6cQ%b#c7Q_NY{wOZybjaFgNcdE;9I*mQth~2kL=_;H#*%yQ*v7BS) zKwWmaii_@|G8q^s1t%cmCRzg7ET>T!cHSichY$}V`k)_eTYA%MJHu`<2wo%;~uuE8Edj}9U?rW5zzsfr2bJ? z8EH4^fxk0;ycTx)m{(QgninBi*e~Hq41x5!1%->4C2X@+gO%~gciD>kV>UAvEPk{> z_bgs!(13!ZhCyrVei#r!fYFW~0f#sUpbz7K?1ao93BdlC!t#R)qOgwS%?mW=MBV_N$jqA_ zRN@5mdTErhf?Xv7N;D61eA+j-C+OWktne_(TGUz@Vr4YhTAV7hENMx0bnzZ~?WYxA zWVA8kLO1jAJwqDSnObo$z7Vz&MD|enqOg7m;C&)PNM9f;sEnc;*2pL~L|Y$`4b&Q9 zz=^QT7kP*Kn~|vT=^`qbY`?as9{!A+lpGxby2mewwGyG2@3x13PNis$1H!HRB6eaq zPwpLoTzB;q{T3mxPAfvNijv+VbVAfhCW&hn@pj$f`UxZo5dI!P?^>5y+dkSBp7c$T2NA2y+<3sBnh#w;^(E|UEuFL>617)kq^p1l4-0Rbg(QaL~CB1 z{h1)4CP_6(E^)~_U3pl61mC+M#%e}#A_5}usJVAmeH=-GD(PFwIBd_7g%d}VHHvVH zFpFBO`JrLXN>3TjhT69HPx(%tonqaPC`4LHyOn|p-?59cH>JQwOh;aFrdG&#eH_qj zC=)NcnD&@fnvN$H#3sQmL07UYl#h_F?DL8;Op;fDsTdBEC5a{#C$X{AsC1_=sqQPc zsnrz;E9)zJDX*k`NGVatQo&RyRdGp&Cv=05>KNcc=1q>?#NlL7b`MS4lt_f2!!JQ$ z8150_5h_fboUM{;G({tgJ4J$X2`tXY#%#wy!O>z|E&D5FnV0V40V4BU&z}e>f+Rs-E(kVxK-a9Xpjf%_F14$;IKo=^$MsrRTIH#V3^`HRPbz z?##c|RWTTriTb3M?`g81%U|W3Pg~JxaFg$zcUlx(5#16kZId@5nwnnWsZTLuJwrV6 zy<*c*P>4+^*EnjCs%aNbG?I%sRcAnCB17BI;7W(0+_3VhQp;h_QQqEp)_&HvOl7KZ zMtvGIc{rJk(EW~sFfN@b^U}&HgH`4fbs2UT-i5_gYo?4F zN!WGaby0U^-HHUhM3N$rw`dr(A*nJ+G7&vOa`TAA1iRKXR|J-mwO!xy3chxjI-j@D zxG-Fhp6_GUb(XWRgMeR?k(1lu57U?cieP~zehncBYyUO%fp2rp4bajB8V@v&X1|CV z4cv-|^nS${@0hu?$TaLUOwh5>WEE;vv+;#dU{iA)x$$#o{F41b--6!aa&u}Ee)9@E zVEC!iu5HU?_2{qUuBN~=q}!bo-FrG+T58%{?n%?6+ATzqjuO67npM+gElDrO_`Xu9 zg`JmOnn&!J6@>;x+^9u*Y4vUOaJ7<&SzBg1_KNYcWb0B#jnDC=G^A4k2XZT617U`Y zN-+32__{rofwl8f=bXRKoq6D&*WNeFwiUb!-~4yNYhsW6*Uo}SKl6Te|2%k)aWDHY z4iNeB+}!6&nDmi!koikOEK7`|r~Z^xG}%Od)UbG1M2eWURLY=x?TA%g0{z!-%HSnw@rKO=N4Oy zR;6u&JDN;i3XkhsmambFHGW5HlNp9 zT~$RkvE#7&v9(C(NMcCx-00dm+BQ|jRf<)>e!n{1R}ZZ7wkoy=IAl1QIKG$bl#e-t zH?}rD?Z%Ue2$nPz3iP_o|Dv4$rW~)Gm`~osZ&i>~78{-!UVY*-YJiP>r>)7(o3)&+pS!GjVb zAH8-DU_TFNQfQ*7HxzX?uM=&QFyj1aVJ`ge3V6wL3VO6b`N7-=+N-lqn?ZvCn)A8# zV&#GL^%1kW2%M&Bha7t~M;ds%1H<9T(cZr2p7TxrR{A2(MRoKG_tI$1WX;{2T*+h) z+xnLX z(Qan$v~s3~8##;oH0NC1L@h)VbV?SrZ6; z;7_{>zb!g1U;KV+w=>Z`&^3UW3l5aLls&QhX>-po6|C1i72tKMl`eJ1TEI5&3fD~Q z^TNHs2JA?D)JGz@Cpmt#v@7pc`$T8MYK3h{t1GWiQYy#z;>kntVdeeYxJ%F)y?UvJ z7Os)(Y!#U_0)>qg^`wLx;E4B=PJ2z=SZp`m?#Y)c3UIbyq<_$}Er_@=;l}w%UB^&bx>f;rbBw-T>FV$A_;7wcCuiT_ zgR(cCakee+_2WH~(mV22@A;eH87aW4Jq&D(J+o*4043W&&&bnALtV_u1;%4x?P6)e z;|p_re9-~`5MQy!Da^*x0_Y2KhI@$lN-+IxA@(@`OATfM{%zvvB*A2)p$(LCakl{q z@d)zpGD+e9fk246wXK-0yyCyQ!ozkcLxJgnRuTs<9J;K0B9T3EVxc}g%b{T1jx?O*$}@pbrb zB)G@F(|Sx0{1*b|3shm&EZox6ph1S&XR2%jo%{9GM0? zlg!aQP|EUQ3l4u(%-3^@U?nX{U0fuH4PGRJ>C=AFF_l>YMgzHtspM#)g!!3_fO?-P*05l?XL?NdHxX2s9Zh zL}iP!2AU0uA*4Jh?6VCUyCiOhL$}a`XgbylC9#57v`Wvs8iOVCN6R_%8~io9-+B|p z8Te|w^+K`g>y0DxI=StfH+&gaUCo+OsEHG5zHzi#7srL47AU3UQjCs=q6$I{+#@~nbgd^?} z#!_=HQZ8nOXIp$e?$;7%xXhW)+!|DT3n>^pI#eZ@)>@QLY3yXg<22bavDPDea7et3 z{u!r}xx=z46jPtHS3}#s-FK%_UsC65NkVT~`a#Np74<~{l^_W(7tF1`O5UsxrkSSr zrHXXd8XDEDxRecweFNnfAFd}3jBOT$Qlm8JLPkNGSM;X#@5=YrUfaB*FHFgxqNsr? z*4c*AYo=iXX>3B1ZAghvaJ}H zx9I(DdUzDkDWS(y$GRD^usKZ-KO9^0-klCB;az7D9#?1d=%Q}FAb)AXv}Uqx6u~Z* z+XVALAJj@*R9sx>sa3Z2^AgfYGVlvX^&tP9rBP$EcgF`%l$V#(J_dGND+HP2BcBHvyIMAtSSd5HP0`6}b9) z??dmz@p3A@M0B^P2PyJV)ml1x39YBtxO2ypc{rv`jT0}wyfuN5EZ4hQcZ{{BCc(DE>69U6n{$aSiaJ?^>)oi4z{8F8~ z=dSMH`n}j}0uq)zbf32OqvWGZZR+kdbq|*1@W=-y=i(~L*JPuNfInl2&^Egr8RdVUG>Y`$j8 zo-Z06_sC42jlwQSJ;rO%ozVcR( z$(bmDPl@%$63G5Leqys+Phs|Gsb%nc&v9lrzy(7CbrDM-8^o+FsDjBh%90<;+dIsU zs*vpITCe{9pky6u{Cg|#$oUzy%+Wi}IJcYo8}$cck>qdpu@5pBXz63AHf+`jj&XJ3WqN4>^gRc zpu|LW9Ye-kC^Fz0;)F15B3U8-)yCDzOxr$`q{s5G^DKF`2%|$Mtpn?L>xPi}d^2;9 zayeg$Q{+@rpnB5yw(rUTWx1B&&JDzjLDfDi=KU))d?=j)q5eW_026O0E{G+PnI?V1 zF%Xl^B!$U`Wx!z6;i$8{3AO3dRGFZ6;${Ezdqglaur@ihiP9-ZV@1j+@^f>Z%vo-Z zB?$ZwTK73A(L9pn@|k4*9oXB&lqXgb7e`xrU9n{+O^70^-PjHG>BaPmbyusqNq_-Q z{RhtWD&6bUuk!FBT5~Ldm7o1tl{xm;%2OvDVJOaKdCaPU)hu%?L2hFtN6DNW3;i<^<;;C$q z+Qyu1c5*R3Sgp8}9g1e}zJTbvHE2)dDl>zI5>rUzXo}3;Wl)%Ub(t|ATiEwjlQ`e) zu&560aZee2Cwh<+3u#M5Uof*9aunP4=#iTu_m17Aq9eYdI!`bU4iEpz=pPrfZK|5g zA;5eu7Of_VeXc|jK!E`-A=OVUcar4t?!vrp$SHGoDcb7DR5L5sqD;(Rn&4h&z~jch z!4jyxz`c61UgkAv`mNxd?Y7muN%KwnC7n>&nbLX&KHmJwyKrWN&_!vq?vsz}t6So} z9MIXhst(5A+hZ_fxJii%_=2~)O%MU>-)2v%nSn)%`*BLkH5B%}%znMg%>A}cXQ?Nj z%|XU6Prs27c!-YEu3t{%CEF4=4#RcrrwDhdZVXENBnj}F@F(1hVM1tb9!Oz!>W9wa zelT2B{;k&%B0K*Lf4gflBiFR=h_R`4P_fv~kwn(lu6;qi!a%6D4kD@k%7=Ti zTgd*s^~yXbh-e3#Ql8s^r|-Geg+{?^()Wps&AX}lgyfFOoEv{Vacg}+3fy(!@A+DXK>eg{&*6FcoAqc l!g>{7(_qJ~U~lpPC0t{om~X9=`>#KSih`zmwXAvQ{{Su%It>5- literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/df_pipe_applymap.png b/doc/source/_static/style/df_pipe_applymap.png new file mode 100644 index 0000000000000000000000000000000000000000..cd493c78452ef03ad1954eee2b72b45565b45f66 GIT binary patch literal 5809 zcmai1byO5w(_g}+6Z001C?sVV8-?k=~}5+8WG{wm7& z2mlb4I4LUX!W0$3x}I+KPA+x;fLcsiI-a2coVIU2P@r`jJ4t(!Vaq6hq*a2BIh0f# zKQ!N3(as%Lm4b{+7wA>QCCTlrOGZQql<%p=5iToRvFwZ*l(i%fX?E}(yB=%wZ$DaV zUcA~1Jj*@83-L^0BQ5IE2NWHCh3g)lCNEd;4S@)wJ!q2J76pp(=dF7EO6>=aHLp$n~eM`pQSnjs@Y# z4~_|P;BM=!#YLAfub7yqMaQ6{k99<{A7ElK9ehdxcc4fU#-JaK<)PO{+5Jb>FSc!k zx;NmNjNH|EiCZ+{W4y~vw2<3pW3kw4}kdKZTP>=2HFeVKa|#^n1z0LOhQ>(_4Mh3`Ni1~@KBf`R-| zug*KM$rEwkKg234*igMkjf1o#p${QCz-CuZtHSwvtcc75ZU`rGXIGLE3OHs*~Cei@hUI@QyqwV}XLG8kT-|9kXIT{rkvl zuueY{VWf4wNhTHL6dA_ho5G^_^N}j~9-Ab`G%A*ayhN29 zkOONprBCG5lDnVOaHPl*z2vm{qbYap?2!5pUou8_fAMINq|3g?56p-& zi=B&~OK|S>UIBU(Ho%~aH6aq8rGJDU@IIrIWoo|{j2jFLW{``zQ3Hb}@Yit#6nH~* z(3+fVqNLiOYEUkD$#Z=r*8&-#=l#sp%#>vJ$fRSYpWA%qN*0A>tf~=mu%(D64XSAs z5f$MTeXxUktyoJv>r8(37j@bb z6?Y}tCEdk#)uiO8nI2WSaphU1j??B-B}bPj$0@%u z*w?yA%?aKwJ72QL^}i90o0d+NdFy1AxmM;BwHh}YUxvq5Yp0Gr$`Q|TXu>pU&ePAU z%;N?Y< zAN?cGwTLPrGkR2FePXB6qtXe|@p$)5zEojXeKb8WLDheJM`bSiRPjSr8Ku=1)dSV4k1ZNsH4)64ewS^SYp(I#KYa*oks*X$ z$k;(yp+hoE{!V@#w`pK+dD}7_;CpF_`u)u3{JVXH;LLEqdPL33{eZ=zAc}UucF*>{ zYv8rQ%_u;!L&DOxBmCj+!yrpDQhaOR?%e?DD!Od4-x~P*4F_ia3eur+N0QG3?7&=9W`544F8X=tF@X=wQK&4cth~q+A zG-YC42725)}gM7F*!_7i@~n>ylJ6(?*;N?<^Vp4QK@cXOs>)Id7UELPW z=k?v*Z@f3QU4L%VrP95iT~^Us`ITg+3XBh+cXbm+R3J(us2Fkjl=^aS=+BPct_1Z3 zYNK-}w_4m*uUU&Ivps4xtiy1mb3H}A!g%${fq^KXykExuw zA;i7Kus+t*+uDnlD}a(cRXDKzX?M-^HQ2E2YoIqqCqwR%y?~?l z36X_P`^@#DW!GKlAzzu4&Xk0O($>5S-2=U4n|Y2ooz}cUS-Bk3lRG!XJC)bdqi#Wq zjGCodIz%Q4Q&smLj$?7KV;_`I0-h58WYAsIG=*#=INTZWzkp-k#B4y1tE#Zdys^M6 zRDj@#Li&f%;)4{&Qa0gT<^UHfX2u&s`+~^x$9#l8Y2PuGmM&HRLiTZ&D%}GDp57cE zPb)fp@Wt93%{*EY>3Va0MeP%HVJHz2JShiwvI!*6+O&890AOW18Jc*RXlX)h++6vs zY~8Hw`2AenZ#!B50O|+1J-XU?S%LjrT@Xl!pA5^N8j#!bA7TL(@SiGP&N3_}TDo9G zH%~jT7{4gLAd4&^7z~Db+S)_(l~n$w-`-?c9KF2UAp!!vzP|jvBK&Ti4gx|_Qc?nf z!UDp=e772WNPmQvl^-7h$@-U(|FxrJhqUo@a`$p_LxBIVYh~@`?Ipv)@`s~;#$WHW z^K<%-Cj|2Eux*QzWVxr{aYKK7H`j8b8l!X4N|6j;| zJpPLb{|{42{NKcX0e=&r0)M>!U-A5vtUsl<*^(uM3jC9KSwb>ju*U7rf)S=9Z|H}$ zYktoKG|h007*&zM1m(S?n{IsAB16Byg8C>lC%8HvghjlJp{QiFXKp2eMq;?V1yEH3DEic$F zKG;Cq5%C~x;J~Hufh+WDpLY8)A;{;rV0os@le&k+__jmzs3cF@**CD0gBR3okvA`T zMoVY;OZlLYn6AU=yXRe(wkXM|20b$iqOl+eb$q3{57Of`pLDV{M^YPoJ|q|^H+RjQ z3JBe39chrcT2)itydAz|dE0ha*A@@qGe!3=wxTt}8h*Y^Ik3P)e;NfgYP*G>DYhED zZbt=J%!IZEJ9aJ@KYfSs7m)G<@5|G*E6!jZ-B+7YOJsP=y6Bi%n-bcf@a&l`jkLIL zZ;mJjt*e!A>z5I|$34(eYrc4@mV;?s zI^&kVhzckOek&7aXJyV5g1%m7>#oX1$DEA}acc>v@Bj&MxX zyNwRudmB#BXY`D>`U`k4*4~@4oj#RoG@(|I`#Ql=uO$V=3}ZL;3-M#nH~70^5j>5SR?CZ#$0KK8ly{!>vo^JGkkpoF ztq9p6Q(35Oa7Jxf_!W<87E|{@+?e=Q7}kts_*hS(c{#vfk*;?q0OluUyNk2R^j%@Q z<2QYK%Ybb{|65t|(Wm!itQ{zFIpg~mu*1lbcN*YLukz-x^wyR)9<<1@&@$cudK%Hj z>#)yUGe|pal*`y6z`4}a;PWt)9pEc!H)0E`*m?N7mO1N7Cg}3F=V~?+>eU!YaMqoi zq{DdEtvYK+>yaNNpEP}o~R zgJz7XDCf@VaV`8!cFqKZ^)c=fC}`CwzmlMZ1Ckb~N#k~{u}b{aPejwE&&dCGepYle zkQ}~vb+O^wE;@66>x~?DMhz5)T>{g#HfV0s+cwuGthsK}<(%!ujEnmJPnN|zo>IxNUN;_(1iB-)|dlPruzrC2IH!gBq<#e}~i zh#}3w0^BtroPXNhW=P<3b#YRz%+pe7|IjO@qWryz0&}LAm+ngi{F2w)&JQTU;EH&Y zoDr7)4NY*M59D;)3rOK`IOpScG<|ZDXH&0oGk9uCjiVC;b9wLRoG=m}w262$;GIOi zU^4a=;ol6gts92m(kXZU<`^GO3WxYIctU*M*e>%Q4ExjWI;kppGZO}*sDBHb>Kz>O zZbt=(eS z#YT@Pfz4%irYG!uLD=cv`x5AP#%*bybxN6)6uUd`Ey}zp`BUPGl4QYWXQ!=d{+@#n z`74i4wMy}g)!Pg_t3NpseyWf&r#vp0;P8S zjZs*o*2;k3?z1B5dtSc@WH>`!sy^FojLB8#hV%@J~f99}!4 z1_rt*^n*C?Nlupp%^|;kO}~?RZ9B8?ey&^m@sOh#ztwHhD#?buOrofk{)q*Vr*}FJ zA#DH2q|#S93HH2g#)1l$4{YUjTXvShfsIM846B^JO^j%5?2)ewy-K$pxFZUy^5X~f zkdX1}vHxiDAja>ETlq?%qabMDD9#UJ&h&Wv`Uec(o^Yhsjs;gFUc&%de!*x@NS+kK z4OAlf|ElKS1$B!HEygtH=w!LKMl|;E&G43z6O1ccn;^^d(kMxa{|g%5G)RvY&X}2g zN4EP&>Ev!r3sFMPT#k#oP~X^Hc2}cmKi)W9zs;BaA+aYO)lbl$@+f8IzYbuB0*)+> zMS?@dz6|h16n^i4fJR3?voRE^X|m(Qo3HZYac zr+n#=5p(VLWi-pn#(PY}@=ccRbc^1OX1g!m0y6Gjj(s|g@1oO0mY-q!Gw8EV;O>Mc zg{>R+zD42d!ifiM0cC7sxK7dW{vhB4hNdp6g_6^@zl5zxTWSLvyOY- zXHgPqXny!x_n9+Zbe>-x#k5e=jIMV|=sa;q6S(3fef_vv$~$J4EOWxKKEyD4`|^1b z^a@YQmT#wErdvL$(3LoJPsJthDd<9qqahiMffV9DTaw>>{}dsoSK)=P!f&e}>?MKW zq%^*WX*~Zoh}U;h6~~ZqF9%O z&rO}bL~##)``IU+P&KPNS1~+IAJC_vs*UzHMbS4yMpmF~(&~Kkh=lj*hxByz->GD# z0Q}up!N#oXk`9*M=wan9jZD_LW$Py%0&N&YAHZFH)qaY#K`gwvi|7XVanP{)e4qj{{vNz!ngnc literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/df_pipe_hl.png b/doc/source/_static/style/df_pipe_hl.png new file mode 100644 index 0000000000000000000000000000000000000000..2238a55ab1ce3e5bb9b3ff8a19b841547ee034d5 GIT binary patch literal 5485 zcmaJ@cQhQ_wjU*W@7*Ax8=VoOi(Z20z1Ly%5-kitLUf55y+sK^^j@PyCx}k;(V`4b zzI)&OzO~+6@2qvs+3mO6AA9X+Ee%C{92y(|0D!NoB(MGO+do`&ER2V@*(abB0Kh47 zl#|m^mXiZ&dAQj*y4V5$O3^8)n7TTKH2p_`pw@BJ1dSc~-4_A)t)jF{VMHoeVYyav zw(jVPWW>Z;7+wV&Vw~Pu#JCg~GQE{(0wpEu7M+npk`~y4&Gx=ycVl1u+fO%}SCEH+ zm)WP7p&m)BLifP$0Sh>y$AH#_MD7M*YAI>q``hN_71`S7 zOMKa-=STPV`pTXp0`marPWFCJfq6&v9C<}bW8gPb0C6sCnTfLaFJOq2iwI*q+e?N zI%Kpk<6f=j;(LYF{$y^%!RR1rB#i8&@ znxpW6BY(R4fxp28f6|E&E~BJ%3Lg_Ul1t*6!=rv${(dA84TyL%PVC)7Rip4D0r;3g zSw3O_qX596;PmcE0qL#+6xcnWxJYvIS-FDWD$+X+L^Loh<;DB-ZNCW#~rDXACW z7GM@M*a#SgyC@Pcoeq565J2!FSP@uPqzaK%(y(Ig{MYPH+3S;`L#9KW_({g7OwoS}@VT&(Pt27TAYja0(`<+H4FbSDfZf!RGZ?2;qLg&Yrs zL|kzXhz?L))hO7?xrUR~Q+bjhoQt5ROl&On9F!alMwPOM$v^n$KkkjcKhq5$Mv5UR zkf+Qk3YD=W6t2cobvxI2ga{P`F9I9!2r+_?L*$TC;bh}*;53shkTGytk>Qick?C_V zXtm`2)>hUVl!^YNo9kt=n=Me`l1o?CqIZ?+k%K6RDT`@{k+#hl0;i;vdA*>VwwWfG zo-A8;5)x(;&Nhl(pswG>14nYPq&)8v8%x*H*Sma9S*l-tS+420<0S9kGUG7gSEBsA zZdz@Mdt!egb6k58XWXcGy2M*Eqr|m5tDsfCS^p+Hu2Lg;R4+>;%f1QGq_#x2^lS+; zKo?@)iq}fhnsF@_WQ-(3BCpXfs>4!b5@q0>!g6!t3-9b3SKP<3oNerTM9cV_T-Eq} zghxf7LJa&LGk#~dh&T%R*Bdy)_SZ~f11UoU>jl(>AvOUkYJJ~kU1|-B=V?9BJiqwE zuhenN;Ay?jVtry~QzKKcQ!%-Z3??d2E4~<=8wAyVsi80uHH=?$nD3d_UHI`Or5^vw z5;Sm-z062k)nS?uT4iom(4R_npnLtp7>l5Uoq9Xe!ZHsn`3lW zvDnhy+dkFv?Wr}TI%V9jWm<9Nb>(2CqKWy}_f6PKMn5DQ7n`emk1nJoS|B(Q*AQC? z7Kvd9qramc?4b=$T0XVR2Ke4s1l_*$x%y#O#y9^hU@M~P?NPwWX)swkU%N+p{~gAi z?ENS}tV7how{>Zld zXQR7{*gAF`b}zOjDLrW{X%3A3YxCE2GRSN z$B4SdI)XMlDR_vasc?|@b#8~&I}p`q_1IhzKhqg0!)Sy+n*A61!Mtb1ngwjv8bXN& z$)*WN*o%!a((*$6Q~k?N{06nIBa?JhnK?67GoNQKDs;>n9G3Nz~xKi zBKMbr#fwj&I_`zzb=7sp!#)=mTvF*st&8rnxE^|>!i)l@&zcXycX?s6rM%LhNvzn< z%eUijhUA&D%wRKw>(Yd`p8L~+>H^ooQ^(SGT$IReiVj zNAK-D*I#?I$+R&vtIwXV|4y(~#E1)^b9EDdmO+a|DH+iE<@>Ym=`K${tq1o9YRqLj zh?fR^{tTbd9>=M#bj)&4b)tpFJ2LJcAMEUU?YLa^Zlo>no>#``cofHIBx!7CWlJV` z+I==2MeX_7XdG|M9|XJ^*k+uDDSe(a*H^bn2;aW+Dz8sFqCj* z^@qQY==tGZ;QVFL;kIOlB)cE`dG%QWBLBn6S%@>k7Y0e;HU96HV*4k^=NCJ%D}o{S z0;!h~*9B*#3zOIOTVqXqt$mo;pdiT$*<-6;ws(x*Lv-7|2YMqk)1+>mK44hpjND| ziEAJ`Q$a30j>7f?^|*)v;Dq;!UTa0oNPIiq{?Um4wIS+#^tQxVMFmQUHwuuM5)d+# zPbVEEGDLO;whr$y1-Mu;G2H9g<-xB^cyNBv)G!touap5okI;XXy9Wd~-JhM!$~n~f zqU?;OpKc0veY`_b`9xmpiiUrX12s87aK>s=a%G1`*@$X1b z&wrQoP$1}!2E@pe!$=>xXh+5~5~2 zOMdq)ex|3c6X!)}p|Nt40EzlkYLmw81XLcUYa(Wib3Rom+URwecI1M^N4tXe3!&pw zhUIVMnQ`TIZRXOqI6rkwsraareHME0md;U(9ot47W0c0q0MJ~syBKsdZyIUkcexOJ zM83rJ{&FwVbL^@ua0L<{=#7uc@lzoU%Vy_0*I{x5fK9Oc*I6~rEl$o2;}dqMrztvR0_BCY6%KEywTr?!3ujjA$n(q5AEwL;5apbcLY zlV4ngYEv<$ z-ih58P$uH#4@v81{jXjpc)yCI<1fRyder(Vt7bc*EuHAutw#txfwxKrGl3WL6v+?Y zqlUq22eLl-_C)T5I54(Zy>3J8Yp=@Z+LqKcZ?53%$ZYED6$xBVe~86y#~dxWE^iNf zM00M!fYppXaxZ_QpMGTU1aDa8%}whG)iflr^R}7?Gb6*qVfS42SR~Kjv58~!^(ql* zwA8ibyk0m#8l}x`#I}cYDLleM2>Mv43(@g=Cw@YW%-Hu!mzhP>aB#Yba9*D7;UjJl zL$An{3i!HPUJ-upGpT?OsR*rLu+x`>3AN{g;XANPC?+QkO_*mCzjURu$0?a3h$DH! znV;i){EI}MzLmT}o9CE^77FCpz7$zy&`KySuc{TYGAzRD!B1%3{QXRbbyz+}6q1#QWl)^-M zR|giF?Etg>wrii?LvfbB_=76!E%v~dSW=~M={ns0qsknist$W3-=`X@<9d|3d7Q)G zvfnLdr8yrz#gA{4cjpAb`pCBu{aKE?bOW;l4epeqfIIm_xA zxpf2F9U`F`KtgY=*~3C#E!L(bZ|}EkvjcRwh&AH$2DnPnlJWtMKgo-MjhL!(%{1ar z-_?<EWvx$%%in=y8A zPF8QBG9fsui!$9PyIirBuz}6!{>*~SmndY7a7<*pcaF*EischxYcJ_RH2LjZFi`8Yn zac!#A4tNo7-)Ek*na;UX^}Bxq+FBcLVBBnO1QP5IdFEfSQJ{k++vB@tdp@zz8?+Zv zC=g~0>b?6!ReY@&YOW?6JSavJs9i!NLNG045eb@#gjCNIkTFNlG51RhIX)!wmMt} z!>(&F0sRD?lDX92{@Zti->T~GYx5=)c9)sZ!)4GROz2f#ElGW;Z_ys{_Z*$HF(;0D z4L@DeAk6o^1c(Rgsu3p;QxihsKyK%Q5!{(F5$|jVY<7BMootW5v3QOi%1y3jEqhxZ zEV&2dm2WClQWK%Ao`T7|5La|K z)p|`kqT)aR}}fEVu->;10pv0t5@L!5xAJ_W%L@>C=6?>(;56NEIa+G!!Bf004jnk(E??wdt?7A`-&uQ;)5x697Odv5}Ba zfk;SzRa_h`ZR{)n0NKctR74GRU82D=UzX0PHwj7yB!`+lXq`MjvY>ZzNI`j~5*AKy zGB}u+DhO^xbiDNLDwwEv2x8x>VcE;dwxK;?Bf?N*j;~gplh2cF-rufvzivF8`rhSS zAqKi6QN1hbRRa`VG=$`@JH#BM>mWQM2Lj#{!Qf8m94iFbI@OAZa=rZdy1pUapt{DI z^X2C3$=}+ z8@9dg1?8p@oF&=afB~!MFp}W|F5!r*%p2OQNl@DLp(Fd}ziGeb9rr7lI;%`bEzvSD z`Gv*hRpP&-@vnZHi@{$ouWL2USsATF0(B^e(c6x{-19%T{7{p+vb8P&enC@ z>ntvMjQmVWO29cOIANg7kwtkKnQrA#;(ILUssr->+4d#q`6_GZ$~5Z8oULzPHyuP@ zog05h$UVup)vH&KjHG?hj;!E3o>g>S<~^||g)rf2DVM$>9{ZJ-87DN#R|&@_@-98~ zKF#kU{pSa6+x*ue|M5{RCp6SY?VpCHl{c}4wK#% z;F^2K#zgWVeJxFeVM~P-}k)1 ziifKgg()l8m%$)_b%ml42cn+Cb%BvWf(dHh)QVxr!iv|TRKjM83e&;~^ng^*%e%!? z5Mw`X=b^a;HEfc%q9Am?YsCobA@G8s?)GJc7kdXP5a*PIQ44M)lNchd56uEA50RpT zn&$CFpk9z+Hli=T!4?0m!l!{YCm|w1GNpUK&WW@ZDxK$ifObtN4Mkx@t?1@IH(kW@ z2z}ag^5Po{<=7)|@bY{eBuH4Y8mH2DoL7zqf8nZk7MC#aKJO#oBkK_o+l z5Q+eFQt#ubi?9!+9Kd};@;lfF;rS*61ZFX?1Blo`CT8rcK-Lwu-E@8hhCRVO1WP!M zp!=|5)0Da}9bZOl;HOEs2ykq#|cOGvaLE(MV&h5OwEDUCZv>5&9mDrUy z+X44&1n0t5h#;sYkf&eZXYe`W2RvcYZx{h^0SEykB4IDGVC-3>JvbI|#vtWI1zIZ3 zcS_jR*g04w5o(h51ww2QLuA!tc$gTN0+9<5X8m-DoRG8~Sri(oB<_R}S;ZpMBE+H= zb9UWeI~jDctD&|X_Di-)s!ODMY^g9)QRh;|!U$Rc+NNZO5yKIE`jm1B_xu61hBAS& zn_1Ucnb|ljPGoH45;z&tLdj6cihlPR&L-~rcj02)K#X-Z7z|p1ysdVK1 zR)c5_i$zvx;1Pfri92(>{eVu@m*JzNrtQ_JyIts72LNVeuy{XFGt?e+?69$XXL z4xAvi{LJ5(cHA}`-c35T&c{CuqkZuMIGWfMxrEGpHWUWN7VH{yOP7GIu&&MCq4$cY z<)LZcrK3Hf7gEDgky8;F&vd3M-&8j1-RSr=HP_+k@#w~_STFT2X)Lccr!=88uQ~V* zqucIVwoF%#o+foR`K3NNKZ=mVkf;$85$7;X8z$E7OeLzypvk0~HEq{o_t8xp%9NT| zxm%^We!4QlSHzDUHAyS2eyAR0aCWBumxZSWc4S^Q-J zz}w9O_3RE7JrVVX>c2xWML0qCA*cjqVg8XvVy5IRBHw|O=^nICt+VPaj?{+L-fcN- zNp0OxT$4*;3MC{ajFKNwh)8-%;=Xpk5^F^2$0f}chCc&}j{6iNL@dxY>w1_|Y&Ke% zy36vl$-wje*KV3NZMCeS^ZVi*x%m-n=f&2t*05HVO0X81Eqe~3F`-D0ht-+Ix6M{3 zIo@sLSmf`>%Ge~>(b&1pByC^YwjmRcVhD@(X@|@Dk$LV;`3?r17+n+H%oo)!<2E6U zt&QkiZ$&}_gblg;+#m9~RpMC)CTb@ali0|vu|X3d>}gibR>K9-rOHJ#4@#VgC&`8h zPtKY<<=7R)+E?0lRcty9_Tw|eHCegyrt|d+x0UL~EjDXIJ_|QV%=Mh@!-p6eT2d-6M|SB{SEi+VRgd%o*;Pb-Yg zCo8vAf$C1hQ;oHa=c69Cw+tfbPb#;4*RlO1Pg3(zh#o&ZENUUjIqd{X4nzeD}Ra80jJUESxMC+p*8l0-tdPeA?Cm*?7_TfKe8w)aha?)mQf zNA|ytfXTopqAh9F?cWI&G6=Cg#P*Kt4&@FdJoq5kLCL|K7vj6Cs%`&4U!}zyYyK~O z_4T3iYEvjp)i&AI^6!BTaW2BMyM<}JXm4t-6D|ugHm(C~LCqjxuAgi_?s$(co>XrSqBl4KUf5Ib zLLQ2)zbwx@SnW-=4|EP7=CJq)--@4`{<3%`{Slzi^~2ZwQaMfJk-C6pzzEe?`P9^n1kUnD9U3VQteaaQo--VeHOULyAeuPZBI%G_bV?zQB>eJbF^nRF?TezVD_?i zdi|mW00h1GUzhe4ZYE$adpieLelH>Ne#`x6fhVp=wfckuO=z|Z~E(-5V^IRn-f0^i>Ie2vnL0$ql*;_8y_DZ3oAPdJ3G^B z1e2?`gPVyLlY=Y8zmojFJdzf!W-c~PZZ?h%;Jv8rDDF*J%l(2(tWV^1>*rc60Cmz}rs{NihvCm=i-k zCu|Flf8sR-8@yV{NZK-;%a7DUv0sB;2cKxhyb}5QKF5-~p})uqon<{Z&%}CWkq|a! z72;1!2a9YTI2{yk^5R@13lQpSpdZUB!tH02H+32%;4pGdl`J(w?$+1r<-gh`I$Sv{ zPkCDX*jalzIotNm^%H46bJQM8LbEeNvJMF}qUcm$k`R>)#EiidQ->qy;S5cqvw|}t zp2C5VW>ws>7$NK-K;)DH-y_X?ze#BpKXBv6S*BMB2%C8aCiPf!D_h*38w<1A(85X7 z3GtFME)Nwc3i=X{!4*J@Wq;r%RVMsvrdt*+rPr@^XQ}<3K5(_A;X>0<0fJ$_!L`!(1%c{QQvanW+~eS6k^&K`>u_T>*VzVS^f zPSVi|*j+#lCBM^_FFoR}{7M!h*M7f=3T;T8X z?`CV`GoPr3zJd@P*EDD}j}F(!Emzjusw96ZT)HRhs2Ny{W3FryAJJb3&^{Oc9<&bi zjYr%#3Ck|fYb(#XA+^DtaX;qZFAM$R2JTUNEv))&yB4mq8(jM#N*}3=K^Osvnp<+frK5= z(orkC6WB11#_l2ICr_iufayMsPE$Zegcu`#+IJ;6J&&eulIti_F=%q8fXY?)L&v$ovEzt}GjqPvr+H%30Z`DkK;B}Okx>=nIXYmIT~Eqw44 zNMkmv3XIi0qE)6O*A7zR#YqX8xdsn9vl1mW>}Jr*N{y)@St1{|B&I^uH6dfw{BVgb z@iokn)IupptREGr9yq$WDHh+B?RQYcWEXF@JEC>@aIwjoiq+bN^ zoo!I>JNitlMTJ0JlkFW_JR zg*Y@bb1Qz7mi}A)Ou&!u-a&+|=Vv9i0B_h4EngLu@SIJjaJEj%7dyu2TZdHv12b2`{YkbCJv8_lWOEAFFgvY~gJW#7FuztZukX#N(eT?4Z; zciE%-NJll=uH+^Ryk6Dk2gQ&sw1{O<^zmS&BPU%1=U3EF=Lv zID?DK9BTMmQY1Fu+uPd?oF60TK|lwc4$}KAsy_1VICiKM@clO|^bi&OIfSDlOU(;7 z{pH$+;^-f(RXj)w*f5KZl(AvrWdV2fzj3m&eD`4{KVT@cut}IV3tu_H-gyH&^QqXOVtY8pXB3tNu(9c;# zQmRM7TDI5|Fg8!6awXE%0ZI3+I4_C>zcIG@Co`Q}gE^C=IfQ?+0;w2?hM7X7ldmne zMZf5#WT)Vcs=Dt1**{RUNbF^Ow325wFjqiem#Ct3kO6L0*MU*p;mUjY5@XI%KJZq$ zl}n~H5TR5k=^q^(se6>(+1lM(kQoV)llVKt0W(QesR>eUpk(<{K>5CkwJ2X90%Z(HOXrm1Lde5!3MHY=$KyQIL~WrAQlFxc-?;Dob8`AIj5n1VK1S;fn!ev z73NVjN3A;^uIoUVbnM1HOA^28<98U402lY07-$`vDV8(18wn55>v(qYFES7T=??-> z=n!w#shd%}B0vgiOvE=F8PKeKK{qMGs3q}>eJS)Ur#T*4#`n^y)7s`+rUD%(d?=ik zU{rSX?~_odm*xepniG%zBhtzSNXZO%8)qwwF;ma_aSQ&ZxM*z0htca+n4WFa&(LA5s7@TqO2^mCgbi=(-*Ps5&4c&2!~;}LX@;H5B0sYKOf6}gy9S89 zZ%^oWOd{CiJGiFYwREdv30E0pJm*1T7pF%L@4>4-{<8-vDls$^YdqUACTV&b5{b8R zn*zO=(N09H@$f6iXiU56+=#`J7xqf5)-s@~oYr0*;5T z`iSODLedtp`nB;xC=I+-s0?ByTA@vNd^PnoRbd)G{H~dX`GmCLw*4m1wI|$h(8CsS zL_^pg8q<6r!m$zTJq#X;Ru&Jq@Mj-T`~KsgeOpkj3LXe~pMJ_v()qZ8Y!0)t;qK3= zztX4_x4BZp0EAJ`L8MF7#F|bF>jlbS`KUvE=RejfEp0|f8&MATKqmDlysM32`rM1$ zqviyW4?owLOV>tRW$K~gm69S02-Ua|wzI^CfBGN^lKYW^k;w(Dn!{?d-jixRd3p2s zE(`yPbPEH@;4K++WkhEG$ZPbYau1Os}^*# zS1eK;7I7Lwynl9j6rz57*aXdJhp4tC2*!kb=x6E!YzxBT#d})D?MOWU&m?m+N1_d6 zFUjRYBrx7#m5)V_J>J+a`p3jG8fJW1hD?~2IYivd_G^*l*&lyD$ftZYk<&15_mT>ab4|4pD zgguGXBZ;FX(zQF~HAPpc|AuoCS8#^mF|W8ma5|`X7(U0V>35%3hfd9R#{r5K!BBdl*Ee`$of9?nFu>b%7 literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/footer_extended.png b/doc/source/_static/style/footer_extended.png new file mode 100644 index 0000000000000000000000000000000000000000..3699d61ad4346e84748334a8fd5e5eb556364154 GIT binary patch literal 12326 zcmaia1yo!?6738F48h%Hu;32CVX)v1!2<+$_uwQzLU0Z465JtZaQEQu5ZwJwcK7e@ zxAQrtc|EVXs^06V?z(k*!c~-{(U6Ie0RRA+tc;`@>>3SgYeEj=xs|OM03Z{dkObCHpU3|^*2(cEGjfy<{ z@jY&`c>)GtkH*M28!H0NyAdWEE1_@YtcgJ{xE%q=fKx=IP^P%b4<@CH;<@v7 zoR4-+FmwFpK872Ku@n2zAQ`-`ToyeF)nUANC$Kx5$ z$x*3iR0awf$KF*(-^wPkhN&MV(4u`%%pn`Ln zA^F^C7{GUi3t0HckU39s$!G_^rh%cw%93_!98aNrxo-uq%K}FP5LF_gcOZBIC@=xy zQ1~$5D+u1OgbiMh5Cj25j6?gCoEZNcQhkthCCMKIaEGxYZf!u^8gnC3c@ViHiVy&q z9q>ND_!K=t95W=89usXcID!(TAKf`HIhq0&PD;#4g)tsxlY=JoOZJQkzXniK!Y9jn zPkImD6~7tvO8h2E#1hFHUzzo<}H3{fMEwU5_B3<;6o4}M1ck=Tx>F$S@H|B#9*{s9=?_2LbTErVh;2@6$J+$ zDq2?upV3G5^<+s@g19K1&pM&Sy`&x7CZi_yM)&Mp$rakjzX3L(SH0C$4B9|<%6eSA z@Z~7s$ku??Ph=f!UF(jnX?UYJ1d;MRW}As?zt`P2zB?W@zSkGEhmYzzTt7N(yCoC? z8wRlTIP_8c%Iq}k!&no=oJQ(}ClIHn{04blN~M8~j;(}Kj+24)HvGFJN)9i_=YDF| z*Y=QO>UyjSEO1yZ4NENEKvFz(@O2rDW(=_`feePM!_4%w!c5Dw`H%4FcSV}TWU4>& zeivTHD9|p(2KVoq@_Pw4MJ_3|N&CrXt1vSF&>c8l4x zDpVcbwUGzP9V@C^iCwa%CCLN|k6}i%SzxQU=yX$V&9!{+sD$Sye zZyzS^zu8}ze9G$^N0}I!Sj?NvxsjpBb@(Bw=b!z|`h;@NR%oMqlQNqA{>{_gdW= zN+s8u`@Q_mQqRO^1ik*E5@-IX_OPyR!Ds&Kkgm*JTKo&?@RORo8ipPN}d?A&~h%y)Y| z7PsAJ(|5>^+Im~Np1y__hEC^BnDkFAado+b?vfR};Yw_Z^cp}p#M`S|I?S7ucEC7+ zJIP&Xf8e{_x+T3NO0(jL6Lnb~_A^vBj65MX{6x{k)^*||`l&r!w6Fd5i)?@nfX&Fi zgDVk@Db+3PigBDlh1gT*)Hm8=d@}3Wh=z~3k-1SYggBN^jqIFnnp*>N0aJi7@oicZjdZW{J86;d>Tj`c>!i9SuqIM-DMqW4X)|W9 zK_BKgQccKvbbEICo`pzw1>N08{uDZmI`ufEpVi*poq;!3Y2jbn%6LXwrqe6`k(rc{ zQ!Y{V&LmCKk13T&iK*t^wl482p|9gX%S?=~7qv50>Q5_kL7ZHklAm%ItNZ@b@_5_p z{=#`T{d)yRCfAEL;u~uIO`~25CDg)1uGoRmr5OHUv<>^swl##)`qR(1ZI8&$HO#A} zUrNOSD~xK4WcJufaocba!b_woYPxDXEZWDy2a1P?h7?Eb2C28qw$d@xd-yjlP$KY| zb;p^i;^O6S?^f>gE<*ETCZ9NE-e~>S&HDBwkF}}pk;Ka|+t65XJ@k_-N)*eaF3tPt zx;gi=g`Tl5;$Qu=gLHlwOsd+dwNy7cFxc#8n2lEtn~@ETBqlLaXk*oHbjVE(rX{qO zI_qjy9YoCx{8*Gjm46q1>5p@*C{gTPTzff6oXPXEz2_s-eZ5cF;lhzkd*az(dAwkP zR-*s=J_C~aRJ`JxO%v{NjiB^qbpY@4HI;1Td8S#&g$$$ zLvsvnG2TBlfSpwd!+PIIA`< z@^ZRx{8}kB@qJ;zhiXGoI#qe`ER)Q?T+qt7dMAFg7_CsYP+lo2ok4(WlGV!J>z!kL zyNO?4unWVkeMY8l+k^PlCwT%1wv0nlw~VGOpZzVflOeit_KI5ltHy<;?G?92f*Zp~ z4o>wjpwOHq!Xgh$Y-CeFKps|NjR0THC7zTKE{EcmYxaP2mBs{RLFVfP<$VZfa}LsiWS_X zCEHO(3Dah^sE^C9FP}5LA0JTT@fri$$6nycsR>4@3XUjYSofWowye3LB7hOrM*;vt ztpM<_9uU@uVGRHP#eN2WVOMNelgxtqw=_Bn^zZ%}Oa>5Bmynf(UDZvT&CKjwEFE0w z1LvP%rhZ##XuE1FDhQZ3*s&U$I(#r=^|W*Rs{#=66o7T@%v_Bjo_4nOE&`syRR0(u z0PFv~%|-?J#}HQ=VJdA!6^Mj`vl)bkm7A5FN(31Kfe1O9nhU5&zWtXv?2|B+rK_u> z02`Z!hX<<%7psG_1sexHKR+8gCmSax3v2|7i~Z{{Oh$~FUa;+!p6bM&h~HHFjb+ycLh|e zJk4yiC9UjW(*yG%!okfg^pElXhvYvV|Ea0zV&*L2U&PBV*C5%|1}f;oaTSrh0U`FvJl(9zcUeJu5=k#1x3{@D=DVo2|O|Y zC*w-t`^6$?#0G59BjyCb!O3BNqTYps6=Exgs1N7>#n#~Av1q|~sHT|k@M5?K!tjB3 zD!;I4WJWJrUyf$Rhe!WhD~&1K4F7&;OW7|kKRuW^EIC-dx$zI}6xXdqfUAXm0SxVb zSb+NgJc2G@+3G+AqggKYe=P*oV$4+sxI^@IraskytSBnYU<2xh3sG#qlOje#a0`Jg z!#Br2Pt=Ol^7kgBDe~K2o)=x}d>^i3S4wrbWPj+^F<$LW1S@5Vu0GvY zo>5&L%zS0kD8hWBsrIQXAIWO3KZg2p_2pTs)}Y$-Rh`w;XQebgv8h7kq}rh*wjqaa z#60wdZ694Pb`%GqoRtRJd>=Dre)%_pO`kdY7tIf`w zpPnC1TyIa;7biJDt_RcM3-xwPTsY=*!)j5&*MxQU%irCP7TolnRnoe9d({*aA~b}} zhY1hHvq5M?oRY4$H|;NqFzpdy${P8@&fIbn+G@qMUR%6rrdKE0w0qrb?dCVxe$#P zLQf6;&{^nno}Vu(N`#{4b6JdHj&VB!m0S*{88+tS51LQf9pTv9aHJGRn)G(r{Z% z(g^rH^0hwSA8OQEsyY>s@Y<`yy;j`voGHf{RU*PXGmdbv3lWThOm$6RHXH3Jd(k1P4pdKM*%jAJNS)i_|7De ztq!0@t=@Ltob$Gc*M z6wX?^#W#xrP3DN4H(RhU&Slan$HYt;^ctQ8CIRJg+=ikDlkv6tmaOA{Y*(A>k-Asr?@Dmm!h zaww6--456BZX#D&9Trl>lHu6ES*+3^i?NJ^61hZX*}h1!I9j|pUW8Z3wPdr*#wDPD z4kOW8qfWSAml&nI9bVRkwT|oJ17FO5giIag0iL%PW1_TVr>WsMblB&0)-!S{SONH< zdI2fmJVL^I3cp)1Qlc8G0)-TKPeOa&aaQ>2PeK>NT+X@V!rt-cJ0nb1fghVj1IJZ6 zOe!&gj0O<~)x*EL<3W{(-sZ!CL%Z61AXdt6-AD-u!jsDDaLp}jISzr-j#=-{sC6V? z^1WFOvIBAAvqjuqFFL&9t0v`lo>S{`lPC1fJ=Ny3AI}2%38}Sx^y2URj5$Q~X8vZy zhmE1{%+$3Ln8=9%uO^pW@2ut(BLSCr=r`O5XK0xKOY)Q7^PPp)$%OFD5;auv zr-06{PsQ7v_9mUoQPSQ*I!4zM*dI~ylrvY<^05w~iQa`=2)4i<=v#xrDjluA5xVSQ z8OE5`%ax~yjJ~;->rd+hI<9-1v{vv!V2+JY*9wWeva#_jBhedAvwo8D1GfbrHOqdk z1w`I_GUf4a;u0O&bCv(QS_TvZ;zVI<=-h%R-F^U;tC&+{EzT`q_Me$^r&#`urRCaJ zcxGN}_Jriq=U`$zY|5Xi*<~S9Np*aw@q6-KKX+E!rQRK#DneVGT2U`7{X7H;)8ClK`kzZxiN9IoCG8V`=%T&jPZonWZvjF1vYw zGmI0{nHynOaOLqR&A$U5-H<_WON7VvNXNEm!Lpd_sfi_-t@3>l;i zwVCJ4DBKJ4kzEdv4~`>`Uuw^XUO)#3kb zD0wndCY6doUJxgQGQ+GEy}+uiP^`X?D)A@5-ADr$9o2^dq6LI%oL^2UqZksXkj;4n zP-v`oX=H5OqbL=8iL;nQ6DfLM1Ynte669fd4zP2P97fFx2$A}N8`I|lQ4V~EppB8H zA8-O7z{sP$2>xRH-tW#V5Pz{=4OgRMC%cn}aZ28hxezBJoGq4?%Xg(Y<|sqTRtS@n z)@5gyTqt=STl;qaBNBdDZqu;}Stu9z-tab)t8#PgEB792A(if7=t}EI3Rj^=>@I0! zeos3BwhQ?S?WSRPqp->&>mDxM-vl6gC@r=Ki&!Jf^gluF5V|r71Unn>mmo3MYLtb* zv|2yhTD$!dsQt%QdEB{`8G~_$UB|0a$tY*rO=v1+81bIY9|lK4Rosce=G#$cBgQW7{uP+AnJ)ln-CESbhX zexd3Nr<9mGDpJj*DQeSeuy1H}T7BiKk&5($Fpcz)+P#Md{cr9y-U-}P0GAzNo+9jXtpbB(?1LS1qEDA`eE8-J z?6(0eXsV$`JNwH&c@>k{#Xe-)%#=ty+?>$C&$8*(_76D4*7rqGu#JM+*d0@V4Lf zAz2`5j6Ca^l8kQ@m?a(lt*5_Ge!VYp{6NHM8r_22LrI738)$?e>0qPHypsQG^hY zx4U<1qK8DT@r>OY&klyBoK1eBl~2Uv>zSTP zigQ&qlI?v)b-?SiMLriQ^c-R5Vmju0gzNkYMr%RniIjvboQ&GaOaM-%PBc0r{i$)2 zRHdufn8usc9-Pr)im;;QK z;4fPfE`i4jf%@!7F{MHO65H={tRQ$>Lmm4qvO_8iVZ4D+2 z)e9KAK_q)R`5i6bZW4rj+{3-zvSH?;D6!)$+Wxd)Ei!^Z2yC9a4iJV6G-dfz^{2rv z)afiS5L`3D$$UM@H~M?W7W=tR>$Jm5P{b}5Av-Qz=;WCJT0@;k~7%nod6E;^2Xmya;te)HN=?##N&o$Uccn zI)DC`TD@vh*^-VmZ zR2%FK_&;+vKK|Bo7~ls_lb{;Mm>|CX-0EK<)>}X{to6FEB`sV8j6trE)b%`cHviVv zwzPH#lR8)owcgEfS)CAWBCrS`pRM`5zf7+IIT|x?19Myf&a27$%DU4;HV0&=-7J^H z%EGcqNNWCYedGegiM@3Kum-9I15CgMe>SG|ccBQ#xY6i4{V@`LGXz1~lq-C%*V+M` zA#35_afA1x@GtQ6XB$0Vx4+{U!h96)yX?qJ&8eI$HK|`yDD4IL(%BEf>Hyh`tid3O z#bxj(HypB}^~(8P+aM(n&WSSABb-jSWjr8>d!GF~cAd?qf9kA%DNh!MSma})V<8-S zv3Wo=@9_*kg%My0_dd@R2{AR78a=O4mqBgmJ3#aMJj-*nmt1Nyjvikc zy6U%E2bym%t}gGKTn#+c8n%p!KS6cYOtNp;VfDRS%x z*(0fyz-MuSFq@Ua_Yx}*>(xnEuimq!qfXY<+a=Txf^XC+Lyw*EeXR&F;r*Zu{KCA| z1Bbnb;41DcUE0Cq*E~Egp4sXpGC#w3rTZO{SPf{*tzkP%{Z)7)5fVD^Erlc<;~Abo zlAwou_73}~iVsRpil3@Y)k;E-2x>5Hhu9ys#xgSp z;~8*pS0ck)AZBOEB6pHxgKXkDyQAq$kH$v)ACH@ML)LfCw}%?GQFaHGt)^6%Yv4r0 z`S&jK`Gh}=rF(P-c9bEWzTU&fXVs_I08@NC4TOP+CF}xkRAkwX>@#4@D+ptNl!*bj ztfR`LZ;RBHUul3OVMzU9EKx1W%uu~m2PI8SVnNg0$T55F?XsyV_^`^_Y@EPuzFJBI z`8>F~BYqwI;xmFy8E^8PZlZVCo0jR%j0FAiHK{WRB{dc@6vMBXXA>CIm#SaIBWod8_=vI9jp9`UyVgMP4Ix`ReNRsA|pS&+%>Hw6*Bc?65b2*-1%|Sq~K} zNo&~XI)%puKVLSkH&GmG7y3*pXzo-Q410Ed0G zo9T}qO}(btS(!kq9feMh5-Uxs@(rC{zJu>x?NS%=fp=m;kS1mZ7IHB>@5`s&2}U1& zrm4n-?aVrx+X8m5o!MT7N}f_a>a6qDN6a)u%I53KiXN7{zf7%eN&=!AxhQ_4PUjec z2i+m<@bc!S_fajV$uDfFEXy6@5fyb}RcD}i7 zd@pzeEvKOJVA^X0X;!>XhgFt%CtPj^8kvcpFMx_YaHpL3`MgD@Z2DReQXPyakw^wi zB*E4t${EVSWfHly?+23#wZhSd;$JxJlS5?JXN%_*GI4ZD`h}wg*XsF*^tL1gW(2wjPURZAsIoh^So(`x3;%KHoRnE%2IYs zz7OsthV3GDD=jIH`HOaJRp)5LN7*-Y6uIIEQxowRQByT;8%XbT>msvdPbhN1UzTNY z=*V&t%RXTLU_3i~bU@aUfzcM9l)vg+Nm4}jUHzF?58LV>>5bNx)gm=sYlfR3UEM|u zTY{>A#d&rBcuLHUpPf#=W%}JSY(0TG$rjqYE(*?;JyvCZLPjr=j@7eCw$Z4OGEJn( z&D5Hvd-puf<$Vo)ZsGd6A2HLmOCathTy_SZ_uo-G4Bpz$7RH|8uspFbJMp6Wp+|Qz zMry9L6Z2TpaHqhQU7iYGpb7$nAEIICbHqbw;&s{y4j(uOk*jfB|JA8AsQgXARl37q zJ#&=gjcv4kR>Eb{apCtYnwkqtK`fIt`|*pTo5_e2f7L#8{=M6?#_bZxa6Y9vY}$=e zD=@mgS&_>^z@mFT2r&$S{u4P z-Z~){f`EA*k(9$^0&GJW<{Z!3z4EyNg|%YjsSnZ}A>5 zwva-=HEb}Eg=v%vMooR^cPZFgV-v;|C=zI}|u6*rx zXl$#r-6*59Td1XM83qDGm@6FG?h3{(zu+-_R;Vl0QJ>nNE!;oQ3~qKGp_C^i{-I4t zy*&{3xi{k5Hmyj)<3@|6YO2Q8S%cJQo~J|3JP|79sq^Y~m#I~+W<7F<1*xpfTxE7= zd}4;s7c!#MV@JE3l!B$Hwn@BpqqsMjKTO#9AL1dlSu?L6g9knd*_d5Ati<94m0Po%8#5~9>}-bRT_a8I8oQyE9@@CM1vfk zKz4lnK(pD3V9yhQIE=y^jbi)tVh(-^QNM3?3Ez*B0;(A{%%9+$`)OwdRX&h~eLoa) zhb+2R?LlANW!aIrB1|~IO7{uTUdp3v_pIpey!TAA=(tZNq?6>&`O$2k##QB-v3 zw}pk7e!9Q(#njETFbg<7b{m)I+R^biV0BGv5o~JiW3+9u3BN~>m8m>}h7F0OaYh8M zI!c3HXIB6%q%sBE|HxeIj9h$`zDNK+>WA%*rf|{u!l+L8rR0jrr9ArIM>C>D%B#T| z<@#?IE|>XB-e2%GyQ6V@Xy~T+*k^cO7Aq@SeFQc1y-KKT1AKD4JvCmawTiQxC|0LC zSpL~SN=;wtcKG`*b>%f8LWuZ7qgdRCLL(xmQJtt)!lW-U!G6AaF*HTKi>VwoI%SJBzfquJRYZOyM?8j}f0ACn0 zNXtz?*#JOc)~o;XE}Mqj7!jE9Lu*X`(z+py+u|J-F_(ZSczPLm{pn(isn+_6=)*0# zFJC?;$4BW=UORzZF0#9$FgvjL5~u0?<^DO!Brg&l%NhW}uUBWC;OIfQ*DCcDKd8J& zt0DvNVzh=rz}iptMa)PjfxW+Dek}m$UEi{(&$X>nWJQWoRF@%wC(_CorNn2MWydVl z+_&L4IZBdDZ|s+np(tk3ez)u33DDDdpWhx624-zz_Fq^!0&<22Ht1wk+sx83+}{xt z(tE6wU7F6M*ZMw4A)2gEB?A?}8)&QZ^(^+q7fM=p`6>XzdQv1VxMTX#vnJcUHIYs;SF<@6IxpUL66z9E`TdZ6X+QzT92w436b}^X8r3#nu3I zd*H`TCy3jjc>$Hwq-xx2rzI2V$leiw#sVXOmDBW4iZ38s@4_FC^dw<(&JYzDlW`6d z7Q?nQk*7=b3E+@!iAHg^749FzQ&K*#IaP}*zy}w;bF=xEONwBn{jvamI-gUHipD0q zQeZ1ih$``Pg`p^sewxYKt~R6^4cJB&yW5~k$ftF5I!Wy{H;1_T3?F<0CHKv0;IFvT7IVgaSiU!T zm8+6nn<20}!eX6;Y9w$=1HbBo`a3r<1A0PgxXrF^I`aLh-fTdSVxh-5R7bwk%`s`y z94Z&v+p9$sV3yvYz3<4?g9G`e@4 z(R7z@_oNz%XUGRvbGa=zeHS=lyLW%;Qp=#T$vh?V-#;U+3mdG&Rl%<~htPB|V8>nV zPvMA|uM@q!Rx)UIjWHDp2fnh#M+kpYi10v|mg~uRaYIb(;se7@T$za5e0ueZp~tg^ zY!jEopM>2&7YSSz)?-Zqr%C1IbXT+3tjxGvDr$IFqZO{_?(8QYW1KUliRibUoF7n4 zX8{u%eC|hUu3mb|#!5^TiykYmoj#2sIP5pWH^J35j?Cgh1r3-=)$5_h}Wglf& zvMm1Z`Q!hSUi!$8#tFKGsY$P3rAOS|Bt#u_r zQaxKo%TiJ{{l;;QbiT)FBWA6jBcH>8?3Nr}7gIt;syMLb0U$N{1K|em0-bh6#n;Dr zpJ0g)UxV{J29oMrIbxTHFp-M(=jKk1Inv_f<%?MNtxkU1(KCe%0XX4CVa9=aLERq! zPlj$q>g4wbebm($m}E%wa8L(K931LCBug-m>_dfS+S%VDBj61zO7gQMLe;81^awSw z!*94)GEnZ7Dy0kjj=%S3g>TO_KytJ&8;p+;OqS51jU+)rtI6=3;Lnl?i-Z@QdwzPb zf)!;!sA6Qv#+n~_N8=bPQ83AFd;WxZIt)7kXj{&DNt_h}N&Z&J^-xm&^XEgu7b^5S zTKH>fKV;RIjtvA&xT_ZEkkf9@gP1fs?9hXM2c5{)HMld(IZ`ko37-EgIg%wtNBgJ7 z4_n}}&PwC1+nFbi0fXMDhR*DA=Q977XGFnBQZ6cP*^=sD8Gc-G!P@|Hu?hMB-Of@v z7k3%*)1=}@p?>VIWsO~!V02+nN}x(4@D-x^d)X&GFbllj1o;BghtE8$DfGTm0Vj|M z8E!EdaEKm7Or9MOae{Cd>w){7n25mi{}nj(~$O*>p%hH z(kzsUMwhUQbh*tq{oJ*^mT8|f z2BY+kit^bCnNU3uD)&J1w=*sM74_yROLkTP8_Op@50mzZDI5Wav28q`%9}}qJ)`! zyLLxT076xJAm!=YfPChbDtfo;%7*KlkPix?#`aAc{;Y?`{bfww8H@|_ga!e2jaPwt+}EOQ9o{k+ zv_<2kh2BELv61_3IL_}_|%Zh~(>w4lY;Usp3gc@{Ifk=ilf|my*%zpt|e{6qC`r j)+^%k*!(}#UqI>h+sr3b)+~R2mn18tBv~$Q6!bp;N|q&I literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/footer_simple.png b/doc/source/_static/style/footer_simple.png new file mode 100644 index 0000000000000000000000000000000000000000..56dc3c09cc700209a5d2922304a3dc48d48581d0 GIT binary patch literal 8717 zcmaKR1y~$G^6x@$4FrM(CrEI2cXx*nWPycX3j_}qG|1xa7Tnz-xO>pxw!z)s=I-9z zz4!j#oA2xCs_w3;>FSxPU-yKmsmNlY5u*VB08Du~DGgZd0W0MwNU(dn_M{^KfVK#d zlvIL5_ z^6yTo&PaM+U#AMLx3C&-}##ISO{)&?QszQ4&XW0ppj zy5oJ1PJ0^|%Q-+KNfs?1iY}1?2k;-=kV6DOj?_i-AdF%ckOX1AFb^vB;Xe8Yr=WLa z!*uEm6C;M%+v4%%Q|_n$yh|4V=Y=XiqVG^v@$ke3ik8sSLK9AaG>1Hk)!B;&*UR!b zcqh(FFE@}WK6rrPMsniBemp=9|2wBupHfXIuX$%D(?n?bs;dt$D-34b9+)o3i z)@7oV{h7wv-=bQJ=Ji`Pz5V5WtExUDS+C&lao^Da!)|Vy0&NA+opG<9hT~Z~Ndc7{ zvkb}SPNM+6Gd#djD?`>I$>j$-gmo=!T^8o_Q`2~Iy~_g-z%Cm(!jGs539Ady9YFpX zAR&ej3WrL8KPqX9A0R}5h%U~qhpHgX{|ludz^01i4MQIBiL< zY|nk!eFT@cEf`l4H`yZAD4wr;0&tQsY|x@=Rq@;Ff#)M<2~b?Wa>O@6 zCl5&5s6ExbeOu;d(nXCTHv3w@EP#(fi3TN1d>X16bqS7JzRWV16i2*L2O92ijT#yNlI!`_q&nP_pF|M;$ z%&J?d?x53I@vIQ|!!)ORI=M(aU%l8#X)Co~Q8~A;z!E|$>zra4xr;g`m`ZKH#cWkh zST0oVYJgnNVvDJ9rln|uk|4{Rb6k*_cRlkum54dCzo5MED9<<>(|W6}yl!!6acO5M zq`tL2#Px@Oy@0&HcNZHMl>_>Hl-1br!o6wU$>?c^8Bt}<^#0WTtcEWJTXoQxHK@u0 z=Hw1E@qp)GZTcyHU=n?5WNJBoA@@d(JkOz2)X+EQndJ%np0&tU^(JLJ<0H@0{^bKw zm4CT-dN*!hJCZRHb+AmZX)tc@QScsy3Gq}Ud_+xT17QXaF-IZM1JU=0M#3!iuj7-Z zRfNqEANoD|tHS){o5EHsqU9TheiSEwat}Vt(9XzO(r^~)CoaCN#i})RHovgGFuOpy zcz7#?UKliKmL^@)=cULvVll#<$eb9c)1vdal&Tb|G-Mvn))(pxHGrn^@R1ZMM~$hZ zks1D)U)8ZTH1`_AYPhI+vv^botshwOTKs-wdvwAFA=SeQ!otMziEpK6f_05u(;W|`W#vA^}8+a zd|AlYB|GZqZ|{Bj9#RxCn>S@XIJ3gp>m0I2TFAqh*c|0Cgbu~uuU~=YFUUGzpTM8w zt#v-|-EQB$yCg~namR@|{~rBnqG=L!LS_;~-pks1;w2i?874Z=IsYQ>rw?HL4d8hZ6%l0l8wUFg&Y;x;**ef^1skEw~NN$?wSETIPJIo~Xo*6XF$0`GRo8feOd z-;t0Jbuc_)EknkD3$-ECLez;e>CrT@{jxf;B4IU=u`>12y;C?-X?f)1HOcQY=Wt({ z{bEluC+jol+Z}ioBHbFwCC`#mv9U5PW;UC4^wBPDjM?7sf4ZiJoM0>7f`dt=Y zChlMPsrHlHK5H3X2OeTriF9RcZ>^hE=S0|0@d(j~^0?hF^)_%jwL8O$kb;=$Ck{Ct-FbuT$nboY=K4nx50e}dQ{|12AbIp?=4k_(kF)i^ zzML)fO@vE)|Ed?D-(x(j{#m1~rpbZ9_8=2DSu+YG9T`hZVj|bWY1r&im>y0~=qv*p zXjdOb&kdC>D_|(<#9#WpxmK1e_AIWu94F4=Ztd*zX1s6kDu*r|*>)zL4OheqCg>*m zejG4%+Y#J5Dd`|F%&PD33LFSFCUGL^BXMvz|Lu}3QJVWB*Vp-Scb7SW8EE+YOTZVp zqtH0Kx-7T6y^N==u3f%7*66VE+-#udQ%YM^2ny+rsux^4**ZBaBeI%QS9yWFi+%NQb(3`RqCr=o`7Tt@v16^a<2k##8xzwp zu+4N|?LH^7w>!j;gp~M(YbtFx+K>92dct?~_Uw9UFpqQ~d4Slr<`y*Iv~*Tu`NPBM z!tr~R^i=B7k{8vclx&*n@>v$CZ-pQTT(cWLUW{2JU!T{SJO?5w#y65NSq&v-Wm+*~FLk76Q0ca`!6linwt&^ty97GujvGkOH zu8NA1GuAj@bBoe8Ro*b}gX!l#<&=P19zciOK);MzRHV;h$XhRLzw1X-}#8m(N+Aj}I8}_)Y$u6E6s4)CA*Hg-4Vytg8dmlebh>27G|gC;+$+5C8#2 z!NCeKtN;M7VuJz5uo@Ruq_W}v-RqJ4>fdzv-+|(qlJfGfTGJd11ll`WJGd+{4#C4z z&4aY`T=bNc1k4@mSWGP(%z!NJb})PmyM){YV5A+;#gxL`?z6qKfV(i&KSl_^=)cXZ zR22Uh;$kaIrKhY$A?W}HQgE|yv9M8zpixjz2!Smu1vI2&{v{6MgsH4uTpR^hS>4>+ zSll>S9Kcqr?EL)vtZW>t930HB5zNjW_AaLG%=XUI|5Wn7^+*Ao&A}i?7m$NJ#b3Rq zW)7|{!ccLdQu=e zn0sJ0MA$jlh5j-A|LOUU#eYg_I|IRz4t6j>7m@$?_g}*Q-T8kD{==mHf0^Xv{BM*0 z)$=b&A=bY$|1VGc)6M^Ah51`amWS_L+B+zxH+MC9vUA$km1M&(|?#Kpd#YbjjvWp`H%v|;RSJgQwnQBdob+0 zd6++N>F^S`DXU#rdsvx&5ZXKTD0l08sb4VI%Y1qGtqpHCY*-N0g_LlA{5U3$AT$US zmprBLf-D50lp#0nXIWsMd%pI+T7S#96W0dbQP6I*H@?Fl7ZF0-D$}pkcrfug;;#SH z4ZrwxJ4SZ44m4x&bkz7;CF8_w=lS6z$0({12`}%l0`@A!fRLb1&OQPOpPGPP@Tk+|7*#R6;plI0eIc4?&TG?4*{AE%4 z@Hcpb*;uFe!~4MloQHd87lvxjq~b8Q|Mv#gzuHS^D`9k&wowhJo8 zu5;QoFL!%c1Z+|KyM)pcrzf2+6|Nme{o(kG`Oh^q99Pqdti_z;e3Fsx_%+nAp(kw@ zQMXgs;;&qP``j$*Xf})6|EkJCu-yC^b{L2)q6S!Na@x|WLg5(LZ-`_zYEt6{O-qes z2)JrV z2Ko2Wsa3nyy92eM^W}3e=XQ>Z9X0P|g1y(f{A;#B1N+my&v#aH^|rbRr8$F$Am1-5 zPO@`#AkD>#3DK9q8ij-pAgeNc?k!Hm+K#h+l1lz=v-0Uc?K&J`DBmI$~Vm)&0q<}$*lMQo*!4#FoUeGg5k-|Kq1=A74;^J|NRc1`DV|Fv8eT8xw4|rt~G*cPv`IKr~p?H&{u1LUQ=zr8ihbZRY?xt({F>K853^jCv8A#j+1 z>CZjrtn~exn7ofC`=_w4eIyQj^6BA5cdI`#&Uq|uLZ;Dvb+*d1ch+MsgKl75XuwqT z@iegI{;(9EUt676$YC(>Vs9dei`>jqkVWtnQ3nSqkT&aw-Eu=Q;f9c9a&&Mz%5DFs zrm82}Tb<_^hR$3?Vg^a{$S8S&3(RRNBa%gupJfL1HfZC#ck0`Ai^-f2r{l+q34$!Z zpGiq<fCu(_4l)LTs#$HGD@GLjDeHdwYM6v6)M6g-?Q2V%uy zi?4ZKjFkswM0e86GaI*P9hT@;W**KIMbq@`Xa@81(w*E#i$1gM3Fvd=P&%f4t_h1f z|K6?^%w}f3%ek|vT7jznhazdO?st+20@(kZr&~sr*Yh8Gs&(-ws$uqp4e=-nu|Q=%a(2)2V4Z;3fhB=VGhW-Lvyxmwg7MGjCyeLe&m~$z7IV-6(`;1uWsO*N>xIo>s zcQ)dLb0$GQD)Ku=F|mj$Sho0RcGFMdpof3d);d1_$2#HlHiqPzGsoL8$h=&wevL&~ zXqY4iYczFY`O0t&c_O2?GoDu(ufxYlyFMf^PbC~N8qMNh? z#etHRUzyu>4U01Gb`gm~L!eH24bdX^*7_2ovXq&P9p2A(d&B7tu1V7FIICQDrZg0g zx!bN+*uQvPN@G?wsh)&YX|b&`JqnG+EF>CL76-lshaoNh%5Ym_c;M#ow{Q0!KaG2W zZjon@yxsp36JRf_GYvQ~pZxT_MM&P^AQ**D)v3ZwCf{UhVI*3tj3VhI_BY8Fg!!-D zJuIt9Nuen2kT-CFhIB%M_FHM8)53#ce%%U$%AxT)A=7>A`f~I_5F2 z2!kIwTCu3ZQCWKf(;fYK9AVK{2Jg+sU}XLcK+r}eGJga;Z5swQxCaicR~Q?Nm>Y_~ z(c^sI=y-S50iX2E6O9R{j@ype1u~WMEi{CN!u* zYJekJAW-pV5=hQkPuB`8B;l)qW(t+1Plyu>+1*X(GTn34oVX_z|3m@9Xz9OFDJwOi z{w~ul)fxBkCz@&Za?{q($c^YKvn$96Y+;ZIsQJV0t zu*yliKo0k21XRtJ8#14QsF3`&!4ef)6C=dt)R~+Q#j*7;Hb3S`N95qhMZ3Nj4 z=U?(ztG9BI;n9|~WdcwwHv?#DE$a-lp$OY9t2S%w;?rsJ;8%_YH05~O?!@A~!I;v^ zDMj0Z)Y0)IraCIPjVSjxvzTo7(rn)5HFHgwW0 z6l==%oBV7_X$1D6-PbUHBED(7lRSxKGA9bBsN73OoerQ@9 zs?siZe%bT|Vg1wSmXrP7z3?K@AbvZjY0hOVrE)Vz&?;-8dz^IWXJGon+ee~iDhRp- zojI7L=<&>&8Wv=E)~aWB#iQQ;nbJA@Ke*HjMyTzrk(xkYd+-|3O=(1p5vEP8(6ki_%(ebPDmGke8TeGnE3*s;6W~P_^GH zEZG#2n>tW?C96=%+$=#zdMZZZ2TpFQfDx3gt|QEi$zyGaIus5tY}%tGMs*8|!FUE} z4m<-fFo1!cN6pHL`;{ZypG}H_!3+k&V+%$^vwW^oQaP_uzY9Kz>P>hc^=+a4vt5bCL~y95jx@>ZbO9fr}kO$SBw^Hq8fkt-G@f*w(YdgK%K1Ku&x5| zz|X%n`@RF4D?|nS!dYx!m{{Ubk~s;Qb2Dv6biLa$?C2CR8ab6G``w|17gvJnB-qhT zj)Jo?MbNEc*5_^~$zoOiFWyZ zu8rk{FI!bMb0x)^D*V#k{vQ;tzo}ok9$*`}d{p$S#v)CMCna%)2YW;j3NUbLOAS*_ z7m>j4_Gh1Pcr9$j9xgXl{)tkYeFqjC0?_}N(St3KzG)V*i7~S})T$?Bs&}d?dBs3~ zKB^5Kxj&kmb*pbMhFa4qEsQ|$Qj>AORVDhJ!%Si1XIoj|3UJ6GxIo87HKAK?VmtE5 zn@gH^vRS-%OTB_^>Tte1?KaSY^NslGdU~thBj*(*;@D5s8~b@znP)E!hH{j6D-B2P zWLqLsll-E|tBbJZ+qIHBri-8--Z>(8kMaWaQq7`ZNFExb(n=YDiX?79>tG=_$`6_U zFmymI3zt`S9nDE6`j5|2BO@S$smg}H&FSw8GjDdzlDl7pcqse zZ;G?<{A7B(qA)}#|HX1fK|o^m_SqA{9H8?#Y5Otr&t|Yrg;9#eks>SkTe*kpqZzki zxK~K@bzKnvr~xwdk{A~jzu7O!LFV)GV|t7zf|6i4-P}fF*ao-M#vG0a{O-J;?>B6t z7r#Y+10;T|2;dt?LELSAf^A^XD$zN}+rXh$+%~gnbm@)~neTVp1Zuvg=9cwG5K$wD z?29=r!Gd0@iTM2=Ju#k-u|a@6##OZg$sp{HZ!pGSAl|r-AtYjyYB~l6MDclX0)xU1 zi7i|zrsky>2!c@_S>zzi&#eh$inV0WWTlZmtF4XRld=6=4ocmVQufJ8m>gUvHbycI zy)&N)i%?2f^lN@L3;wvkeZ|EM3xW*i@jQSn>oQdi_>u@WLVm6OCiX5Mx$2kn{x|3Z zMscs-|Dr#UjB&M7OPsp+2E}B>8s;b#;MoF~QRKx7dYo^+d{0uT)JMO35v1%P#UMhd zmcs@3yV(sgb4qnR>?UB-V^5^3TT-~G6#94To1XgRs59BBF+WHxK{uMce(DwxGuwaR%rIArx( zT#5mTx~oO1;iUF^om# zub%w2r#F5ww4uGD7|sa)EFZ;qjx!Gxk7E1fq`f2l|Kn7?3!O))nW(P^W(;9p>oq)q zk_;-(FWy(av!BWhn)b9h7cfH$obntXbwkm_o414k0$0+NP+n5FcyFB08P81r!iUgg zDD2g$ImTwR=3Q}=oP-@)0GxQFHn?6%hxcQDTPjq|BC}>z#j&3`acC%+4}9#b0#SKQ z{!q`;dVQtX9{91{6LImnNQKF<0Or743MKZ`90trtSG-`5MY3f$+F-5aSnQGa3m=?5 z)C?(=mC19CRKNx0Et4L_*T07%F6E1p#@YMEwo4$k&%dht(xk~!63aLl;KvGZK{6IG zps@@!nd42+BkVFl05f~BEd1eAWGcA90 zZkv4XBsQBNqtcj;%8{D+hEAofq#tzKKiG^Sgh8p~L4M*x-l9F+Z z6EFvSUmMBRab1KBHHc!kDJUoez|N0~!X@~8c1KgFb}J5si=TenE`PWU5orx}>Hi#0 zM14Oh?0KojSb)&OmsspJCgMI8Y~diK>5N(qHYSuYz+zPszbB+>um1cnb|#TzJ`ja} z_9e0~wDS!oxME$OI%t&XAun1aHktSd_J<6E)Ivi}PGiP6Bb5Ufewr|4)33jZ$hb-@ zePSR=dUt<#>CHF(-1cP3->X}XLoGu;c=)F1b>g8<)u}m JDkMGy{4d{@&d2}& literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/format_excel_css.png b/doc/source/_static/style/format_excel_css.png new file mode 100644 index 0000000000000000000000000000000000000000..0bd4662c3f2d029d194ffbf94770068e0ace3f6c GIT binary patch literal 33222 zcmZU(1ymf}vM4;b1ef3v+}+)R1b4UK!6i5hZo!>Ea1z`#KyY{W;O>JCGV}P(Isdu$ ze{Zi+a0=)7sK14sWx|k@iI`w_@%CTC zy2?bwP%satMhsP)E8bYbfj?)5Q>nxfUrrIw$(Eml{@8O(yY4fp9fRd$whNM@Bq0$^ zd}oS9{BxAdj}42Sw$#U$7X0D5yiMR^GRd#zAy$MoY}8KLe)9Sd_&XyJK?*h%eDPey zkpxApfoK9Nce#PGot}(P+it{yAe6mYtDq4|k|=|A;@?|8zR-}m1mju4z_{qxBU%?T z$n{@+2gG#eVW9*$xR^w_X#r64A!GSEU%w(|jn2UC$C6&*?TBC@xNUbD2`Y*lu@Z>{ z6VHfuxSQ|xc)Ocx#zOsGgYL5<5O`<4#QAqqYx;F36Uw5|%Hs4zW3+OTM?LS(ToI#X zlU#&8W9(f|Oi+F1oC)Hs;jUl8i?fe2@h*9iY2Pta7YVlJ7rOb=4#= zEynivHXz9b-cCWPpZu5QUTvdUdoj#xYIm0f&o~wVnaiA&*zHkd>RUYvcQS3i5jzH$ zcSnLn3W1Ni1H;gvc z0c!sCT5C&&5g?Kfj!EDgYiY2dyD|_6oZ~mPqy;>0iz%DjD%*6SI^nXz2t0()XS^@< zXx)(tY7gV{pNkD5s)fDhpy1y^c#-5lhAWa_k%uocqpGaOs&;40M70iT%ZE=5-Zw)p zh0pHRsl`2md6t9|K#LEZ+k$OHs}5~^@}|L8kgLbyq>{6yyh-G;q}7Oako=NpnkYT&x<@)2u`Wwa^<$W14>zVjON)Z{ zqvOnWDdwnD(X@;Q?DudL$#2u(BaKGbDp{|hyCc_ISYI+nInzQ>!fqZ;MC)Kd3o$~H zZv(TnN=h*ky`SpbDv2xlzHhnziD|MhUY5&=nscSQK+|=h$ql=?Vti!rM=^=M>_5Fu zdDMRS@HY-fJB>^SPai@NEKR65Or=DdiZzO|hJu9hYpZ=;(p!}Vhe$@akl&v`GkT@} zxSzHky}z^H`ZLKjXI-QW6;V=-JWR13PDZLK(}ybRb2>6=;}2gGeR+9!$9dOzdUkP{Tl}z=_&Qu?Ddt1$L*zr!7VRIsE_qROq;bXolPdFx&x)-i zL_<#SW1E~$MOm3$*>Q!StY)#GT!WN*-huu^ zJcb4wta0h8hMy`5e&mbh-+h^BGHMhs@UL1i&^EX;&}}raOKEa!)L+GI`qntsglTI! zdolarbor$AYpYO$N4W>a@%sGnOv!2dblk$koN-~h;zQ1byJ&K5l}RSKsq>$|I-R(_ z3CDyRF_S-Uj&6?Dj(#1HJ;?+J2gC<92WCOYUv*xc;6&jtLoGw4LYYFT5zon1n*5J^vU@bQxG;w?od_Z-xa4vZ0ZP{2tL}{P28>R*{-KPIXY(WsC4Vt3ls7YjLzW`XhNq( zyCI_^*AYGY$-R)ZZ?t7}`Z689jdQMXHoFD-(t3B*xXm1qM+8||nB!ggnti%~?Mp2i z$3Q;AvlEwHL^4%1dj4H_LUcW#*{|r?9+Mt3ct|$EFxO*hCik7Fp-7jvkDo&4NrzBp zRp(silHca-%tQ2z{*L=%#umwr@Y+M|rJOi>mw(r5AP_A4eE4_?0*cK#6`SofCndnK zz_r2{!mh$)1s?}z!>xs~gtSU7NG60-g@Ak3Li6fK^;Rl3Ds?|pf0#`S{XnrCc$Rf` zpvJ;O)yUJxtHjR2SjlxJ*=psj>6Jb$r(KD0(zxpHY|uQ@_-H8Bdx9SKzVBf&7BDhvt`1>P2)mz!Ywc>O zf3YBDWEFJuGni_vb+sS-8v8(M$^{vOZ$}BoomA$@%@n>Kra4zwS3sIYIc7X=pA~nv zXj@p%MN=7kighB-Zb^SgrWiM!xWt1;)Z>AmMl;)u zL9?r%YnwMX4quYlvwGfGzY%m@K2vd`%dMMH9oS+$``vw?d_Q|pV{vq;Syx7vr@FzU z#ed`QlllH#d186zsq!Mb3Dg`8*q0X9LvqTO<;1jhYu8|VQ^ksHcv9h5G1h|9^4=lf zH>=NDR$f40^@>wRr^9mw*$&yc@UB;G<#l!0idDV6KC1z9&H1`$;KROXfgkZ{&S~?R z)fwv=(*4Lo=OgWF2FY{LeGBQ8pxQ&lM9nGYEN1#5_1)Y_J4qP&d0bhXW27wE7dw!% z&|~(PvoMAsX|>R27t0m((HXGZa{@lusa-mc7ul2$q}57eYy;Oq)ks+I!1A=LRMSQ^ zGEdAk;_JckN>D@O6!?d*$KON$K7^WgcLbvB__2)-v z`OHH#5hkJmaqj26cbTV-{K-!1zXc)I&#aKD!xnevu0NVCzb-Fw{KEW79=={=h;#U; zy+-y?_^oxWCWxQ~cwejc7PTk%B$$dNiGtTTp^WEtVD1|m-l>gTiVL*Mj+f8t-{Lcp zGXjihfhUkQNZMD4*CgV(ox_s39sdTwhj(&f$HhX96i-jfaR z%;9d9Dq~*mo${;FlhO6Sw&L7UY0bma&C@fj5u+}cIjASX+8XxC8VS&-1vt7|mnHfo z^o|KZ{fB89@(X>X5IfVryC9)u#=u?a5Ey}Jv;@=I88gk7UV22h`Hg_uvXAM<7e99j zdsDCqTDqWBjDTV~z#sYN=V!j#2jB`~XeYYF4c-Eg)(07H>*dP5)?S7jxInB0@NGLO z@GO`L7W2vi%8sLzSYf_n`26bu-lA0hk`Q{M|Cwf|qWqVLhrKA3p0XOHw6mKP zB_A6H8wZsbDkUYQh?}LgkcN!xf7IXZM5$~&JY0m>*}c5H*u1#eoZW2LIRym;**Un_ zxwu&0Bv{>joIK3ES)JUe|83;|vm;~WZsBI<;$i3PMEQ?h^Uuy-Jw&Of{&Do*@$cuf z^0xaQPfqUtS=QSE+5h3NbFy);|JU}Ls>naHLTYy2Ru1|yc8+f`d-Ea2$tNK4FZut6 z^FJQ{m!{tTXmWA<|1|#>=l|B!cDHhqc6NMo=^^$%+w~u}|C{+AMG^LYR{p;t@$WGI zYxXV9VyGhQ|J^e&)R4x3^|x&#wUbfRdc$uo*}uj=G{YPGgWo{N@jh+w&l?t0kdf5# zhB^CT*J!ffv-yyQB^8v3^np{T*tN2jIPX#KRw) zn^nKOfJwi}<%5ZAq>px$M?8Frf&|8zgQN6ZHNjoTN)m+a`6@tte^Dki$%G_tF^3xK$b8uUx4QHzZlO6a_!BRL1=eOr zCaWZf6z$9OtP+b3B&P`PMfYz?;}J2$jc8R;8}NAnx2+O8xq$G)pDhO38CFZ?4bf=Z;g&`QCge*EpoOA=6uj7Rutero!sLK({_ z&qY|**FAv!H3L^rhY={@a@h&uelT9As^kS8;-!=?*$;xVR{T}pq&!Unm8 zk%=WEVMwhp?3-nfgx#b51IMd32;UveepS2=+a)UE=aCFQCA$IF69eQ5(P=+szm_e- z7V#7L2*nVy#rF7tQ-T&79n#eI?%5kQXe#rU0yaXJWmEKX;-xkPkF1_dg(S;N@fQ|p zTCARu;KE*w2{b+B20Q!YkuR7VX5rr> z^5!*>Yvv{i+bAz$2ucztmgWtEaCtFqKt$Z!$UgP~1k=dObILyX#*SOqQ6iQis(rO|jgz#=oX*0OCj8PTOYP|@C z_j)P)u~vJ45$d8>4B4;k^m4R4+kLs*VT${ck5ixfzd9rBr{gm#-G82tI4O&>Oi#o@ zf0zp3&cwhj-j+&=6bD|}@HuF(@`EZqd>0H+PuXT--8W@{S|iGmb5{nWORytA@y7VD z`pOc4r;AlT-Sf#*0;%YXpNl5up&p6AY-`FV5nVW`qcUe`-2^HnY4V}~(`kQc<(R1t z9mc0!J@+0~J-f9faym@d<#9_jixKHuC@&^268soV^G%3t>}!u;e2K$yZNYdR&^7@& zn#L>>Sjp0PtFa(m1oFC#GKvIxTV%4GY!`~@3f6Dd8CmEf9fo=6Af@}9h=O!A3g`jC zpi3C2-pMRVL~iv96I02Hdf`xR3qpDNs%_hY1-PX?EXCISkX(;8CjJ*s&aaf0-8DRk zdgyv6PthuJ)WRyzVN568rJN>K&jcHu5KVxhD&eFSKgm~CK{$6Bh4?CbdPB-O#C)Zf z1)rx!eZ|GK)0~bBa=KWlUB zT?KAv@SQYLG^iX(u!D(z1V2VtYT^WLaro;1J|yS4#Qy+YWF41)LzwOuq?m&`0sjx_WAUhb65)Lvu%njj$RXvNZuS#JI8MoDOp{a zb_E9LcbRv7Y8Z{CXQN3aff&KTd;<8;@PBe~0?%dSBUq7=&>t+OtSk0^4(&EZ>Q=ZT zBTdEc!6Lzx#54+qDNhwsp~MQ33Cm_u8kHsUr>3cRAumZpGlIo2ZiFH6r{u}%3YG4- zw*j|^P{8mw|Ls{E@yHP0BTDr_q7Yey{K2%u9TTa~miuz{3l>A%@NaK2Qi?^ZUUzwr z?-drc>(6+&wCymo;_7OQ^mqiC5dsKo_W)nz=2i*GQKFH@PBc1DfZeoiJ-&UfzJzlu z3ke1@!3+;W8K|we#jN3m7n$c(E3ksoOo>fngM{I#Un9YxnMc^kQ6U*UY#o1_b-~Ts*bU@4a`3XQ+j6Z zFwk7YLa-IRQesY*suH&BIwcfa!y`iIUpnJh&%R|C5pxGU@~6s(ZA`!FX34Jph>Nd~ zp~pjm57%HRII>WyheM@)BE*r%a0`#>h@t{cHaEuaOlhK?zZZQmSiO)aAqKDJMU(YL z+xH`T->rgRw4XJ=a(PCg$`j2JuGyOIn%(s9zzU4J-FUPNF8)phjkm# zlPMh$HfhoI_eyGSky21IN;=6ZayN%aa^GlQHMJB5$HLt3B46BNiSYtCz&r<^MualQ z>GkQW)$3rKH3FvhTx1Ht4@>VO$=&x860Mm08roLgbLuFh8hgJd*C$!KgAOhBG5LkX z$nb-Z5bK|^OQapbj?U_nCYWK7@Q$THTStlB1#dp*0hnpBF&_m}&U85t(W2MiO) zaJytrx$HMYvJc-vkO`##`5XfDJGNwFe8vx!yrn9=!q+qii5de8{OqPqIDcha6h*f- z5mSaD8&h8}H&4YgB@jw%YzK`%6I^y&}y1D5XrqGX+YWAf{pRda{(+b6n$R z-1hLt&#;^w1{y#Mr9+Q6y1<%{P|+-fYMZt{B{&LRbB&*(jioKvp?2 zpZU~P;2d4L%ecfUzKX89GU;HWeQ)DdX-pEHvt=dymB)F}N@xSpIWauvAX03A?zBlc zmJ&Vz$S9PMqGOcw_q6Gz(M{=3p^EI2VD*wQU^@KYr++c9 z?Y!9yfz$Jm(7?CoEt=cf0QVFm_49%0?l)7E$BEeuOOxAdW<(^ z*#ih9(d6ZSu7Lyq-H}=)TTZj7p1pmm2we^bB+zP(~_Qfq_R8&8K-7eY=Q-v=&it3;lUAHBmc+)3ZcfZH&$R?Pk(p8UE zI6pS%Ph}E^C$K^CAg%N!Zin3wNb+t`epz8?SC@to z`XUf(l}g^W(W43KyFdKB9-841ob%EJc{&}N>NgcZWlf#2*VlSun2&X{@t@zU`{(5Q)s=|0YpSSRX#0&Kg7iwniif13Xps#MAkzYz# zul&_(X6+Pwrl8nbXr?ER1KZv;8wpz!7H>GF<|GHQTp!q4;j#2RrkMiE#(9FHQZ)iSz&GWs8pvQlA;k_6AGG`N-M+09j-hOSo7=B46>hjkariBo*2L)-bmz-Emf;^qhda91j>%j>IfgiT_*$H zMSZs(rf8j^zv})3$@Sq$E?(yhDK~c8f8P=c82BtW)444x*rV;tZ1dx*b=%EkI}22| zNwRm`{i?%fe`biBIg-q`v`r88qwq#Bec2cgmP@2~puText z?so+%!y~9=_u{0k3_|D~DUE!OOL`q%6PgJupy5MhU88mAnb3Nh&@A@|r=1Y~G z7pJJ84VAq(o4BUf)rWDLOE9y&{C?S9)Al6Bpl69 zejY7{*8udS0$Am*UbJ@!?YYc=-WFhHi^I*A{mL=O^#(GkDUR6RHFiO3XF z^y$dLASu;t<15fG5WM-~_MJVf%3zHIa+({UvNO&UN#>f_pTzRC*#ylb7uRKBAQZvJ zx)T6q>s=_7PE44L|0cm8rnDL9^AARhX`*qcsX+WQwfn7>hW<0Bkj=nVRuc8&GqL={ zIJU5iTS>ahw5k8~EO9MN8#6@hWVCm+YG3soS^S=)L~au|_&5vtjt((a^sq}IAv~w` zs=r==pG5p=CLvu@MUZwSV=CanegC5U;(qP``Z8r|ccKZ^?17df<}DyDrOJ`|gw69d zJ#}sSTIc7nAO7%i0zWNvIXx`{*P74wzuW)Sgh1W`DeNNt(u28wWS&Q05B)l2r@(Y5 z4njspZtYHd%~pnY9{bFPGL_t~$TW(|w%w#43Yr~|#Mz(*SP+Gd64KIte-?=d;&lDB zLL)*PgTM48s-uDl`tf%XachV+P=`eqz-a7oJ@BN^6eT&wdme1upudWrNqDp>v)o47 z7Yio~xK0?dlf9}jJ{nCp)A9o6qwoDnMn^jRWRqM`wUwXOz=IL~V%mD~ZvXp(iN$gf zQj%4-_hTex27+&D^adk&+|Ft$o~mf6lXyDt+}(ab9bp<z;b7+0IH*LSy zUwcA!nr2e2hc;KO(O7%Xj#sk{uh4lPBaus*-1k8fXWF%xwDLIV>da9Q8LW!#Czp=F zd>m4eqn^oNE$e*3mR!a@+nL@+L1~Z;V&TM+#OPT}mu_*Bv@rzxNW`~jzjRg9RsvZt zO~Mu9i7**ebh7F+FnBMS*ZQ<`8|p6pP%~C<#h>P78t}Dkr+$pN!BT{w>u!yLF#-BG zw4VaJ9RUSy5P&4C^hM!YoiB<`!*tan@1Sm@U7m5IDr8haShW>EFM(4UWXHdQ_kexz_>aEx`BL;NhDO$Nz3P&x9i#n0Sl zPz%GB1vwuYJypUqX)M_;TZ^!k2JLN&#RB|~uAHLnl?xw>oquxTDAv0)21x{Y=wQc% z4G|+}stFjF;TF8*ZNMa3d_RerbpNxh8RuC&up`3s zFY(thw@E>p4B?B`xmwME&Ds3J02cA7`+_a%1OeYb-&H!j za+OfeRNr0tg^__Xj>V?e8bbxW{z~qzH%!(;e7x_FHlZdf9ai?+Hhb%e?H4?Cv%FYr-(NCtCB7K=ql|v2&EoVPDF~a* z;ye9FG$s2b;S=vXo^f4qx%xBPW2lsbw-HP#D)+SEHzuGqGlAAf-j#jDfkn3kG@I75BiLNHZuY;%|G0}0B%7*jG~L~=g&18w9r>|h)rI=Ac!KMm=vQV z%y(L4;-V8mk!teyaWIc{t=YM$`h&RQrM<9-!%e%nv z%pBA_vV-$Ug#v?6!>^4ReVMneDJNB({jP_o7 zWj-ZWa-4$R>$S#0zrkwC9z~D1%Z$~QPg$=P>+iMOYJf>nKQ&u=9*n%1F*xgWaHs`e zb^&~|D~4(yb=uyyX}tKs%2#mD^k#hS065M$^eOK|Ax-?ry5w9VaBVoI$3*X8KMjZu zL$fJxLKx^Ydky?XW#~M^#}^}eUalF#)=(A?sM9Jy$dr%3z?%{|^)az!m|^L%s}-?R zub+b>X67kw*Gog|fvK)0YgC)~s^cm46GXYfUX6Y>VUR)EfiTJ$;bIr1hlS36i{8vAz zY=*moz)zh_;egepCm`dVX@e|x;>#7o>lU`6ZcR_Oxic&4{CE?h zBEkMyBg~PR8__&rjl0%10;R~>qT+(jc(Jd>sfh$^{&&mVVdMN!=v$@}zc`vl6t%J? zwSiPf->C(jeaK;bDL1WAT+l-b*CS`xY7DzcmcJVup(S{viWOt2K}vX^#}}LaLK4_k zk$SszN;2NxXw6_5REVrcx-VMVnvTe1yT%QPzPONr^?!i*PJC&K2+j*ULE+5Pju$mi z6d2*z*vIr4yls5B=9{B=zM+u{lVzZ=cXQk(tBwyRROZ0al10Si!zK6n;z}TUg~ms{ zA{|C5-Cll0C?bav_MH~)$43W?c20oP;(OW`_TPIrA|E*xz^6o|zCIQPXvoG8_;B_! zuU_$XSht7Z`6F03rD5mR&OO!0Rd%6L@wA_Jo5<~#k6m~m@$3GS=C$h~_RM1FF@rcv z*d>dAQ^Biq@+(*rOZH1^h$!STXuTenXV4UZmlVLOWs1P<@p{=v$$FA6C zfqB5OTsFD{0Bq356O$(c@VMR?pkg#~bXH-QPX3{v*>)oE@K`}P()m?A!{0xxCmXBp zMH}(Q-EXNuOQlh?L^{CM)V(z#_r$p}Uk_n4oM>^S*t@?LSR35+HK_B&_pzQPw)XXv#c0hefg>N?xo>p!KA2M+Z*&T0c2S-E zq>Lty`Dq%Z^u!sqMqr@@wox%MQb83%K#-~3V9fcc92sqN=NnskUzy5QU0>wxY-m40 zlL}wEBtt)!HgDexy6P6^SnX|rMUy+4U{-j9^zm87(WL|+ z`6SQV_sb`w%WsL#Zk9oGbO%2hN(X+>qr( z1FvBs$aOF`h=Ae@dixyiWXZs6l4Kw%*e}WsR*!To>g3jP^EwX|tSepyn2lY{tOTUs4n1G4C^3i`p?{kD^a(;>J=np*|CU9o?~*5osYAT zRIkW)RXE?H(H*ADq3Z0w@DL4v7+6S(0j~!7Wj&zl%oYBQ7jA$Tm?us!aTO|?lH7&t zokbHv4epaD8{4!ZTc4Md8YF@oSv5lPsDe^6gNdJ2EndZEvec98lV3ZdxBSF(qC29( zeRI5{oI1%>H;c1=5uq2%x-x)FRMhiE&RfZeQr;1o*|SzDYm|i6AY!V~=M_@Sb1K?3 z_ng0dmrmBgeUVpnPNuzJMXvb! zMGPxe`JKqo`3InWawh47#)$&QBz(0SMkMDyQWX;n=|U{`92CQ;Ce z$CId3>Byf_aLI^bsHn7(G64;YM~DFSDmIXJ7vYb;ObVgN2pbDLaK2|!mm{M?HYh&-0M3OC`|naA1+pIH zCd4RETPONV$5y1WfX*!pgb1%XgXPMZd{IjdE9JL9C>9Th*{d}6Q^=eUtd95bl`IzS zqYJke9u(1}D!q-%5qu9u8bV8c)r1TI$D>4bRP;)Tj_@g=xC!@~f?@dmCNZ(R&h zZmbt}5T9pcSEy+*e9B9o{qbAxi3l;zh->6-9yadL5$ld7KK@-sE5RTdDG15uPD`SW z48sGsKGAEoi$0&ei zXhJfB2YKK|wrPeuGN92z6a#qs@VOI~a)fD$Q|8nY3TuNtnOeW;V=Z-SCe!Fk`esN+ zOCyTvERL3Ss`@+Mn(hAAmhb5;zHe2GQ+%i071>V*!ke95NgS}PhZC@RdP#mOzW8P< zkU`0;ukxoeTougE&@-Bio@QrZ6m*@*Aq`ydO62?kY$ zU!Rco0tNb5g|1})5llIY1%^B6%~2$?pBN*9H)gOwxDoI1(|-rmlUqNo!7oGU^`RDP zEzV0ap*VYl27Tx+utC3NTQauQze0PKaER~D5`flbUNQmeDTJNbhZ`IH%N!}6;9aQ`9{9rvZ-rsECmh`GYCt{{CF$7&t7ues@x-YC% z#Do-2%Vez)aa?E|G)uhb>ijYvwhVHDE~bAVzI4`wwI0Xj$3omXCw}F%Yq#qfe&SpOYWVKdm=OcVWY@3YK%a^F<5h?Kuc!Uisn!o z^FF?q2r}zS=J498X%Tr`0ptsJA%zOK^apV?OY?tSk1`!gYNw98A>wer*gD5LF7_ND zm`qiP93=&eE8NWl4xyMmjnTuQnW~f2Hx(H}Ds-5F`XQjP8r$w1I~V((jJg~d4fU8Q zP1tB;^<*Evn$t}Q^&Z3O&6R74XYbv^mmfmPq>DZ)TR`fB!zp}@D(jU^!P4Z>>hu}a zc6E!(7_8eJwB{M~FPSE^c}(cEQOz?E|FVzgpEp&ncx`IA{irn;wpcP#6{#TCa58EAoXWcY8GL zHt7O-ic#41Ln$3Ur3qfa=_zDG9MRS8RV0w5dfcMMD2nKS)j3YufF!m#w7smj_^&ue zuisTHtPxTZ8Z5OhMU;vSJ;j);AGr_in|_p^2pOri;KB23++Jp%K5{J4#KPY%=mbtk zG*S{G*&sv~Exq$Ox!r`x_lG-BYG)1F;c`Ki;N~k2iN}bN+;=v)a?$97D{B|oJD6eV ze5K2Pf$g|kG+Y;D->k|IuITKop8*nNl53*lP%i4DC zB3uMs7Cz(WLrEFVir%t_LA3dC0obFb7`bEroH6)#=Q8r+H9S;3zCv{gZusa)BI7(Gt-8a|#{ez>V* z%LJpIc6U74=sp)%CpyY z=wd0`_^nS}sQP`JsmAAA^?6P7c@*NROaLb}C0G916gRQ(joo!GVXrrQm`il`8C*-_ zIA}j9uqNYkkj1+XNAV~D?j`sGkZ;qhDG@zA;|?F=!Xo`h(w}31%5vJ_rrAm9IU;@m zhrfaJyO%9jYn7b#E|d4^j@83Z70W)bY~w@nfHVQ|W+0QNmI0aVL)G$eP=;tM@{$dUudE%?@>0}>IW2LA z4Rb#3IjA%(^1x&qCGF_kor(g+{rPTMqO496^N45rQc{!7HhrIr(}5_>H1_4t)a8#j zoJE=ztLW{t^nT-r|-p+?NI3zf+!j8xj~Gerdn8d)Np{o_D6_D2zjjIJFsFFxGn-9ucAbJU*&&$hGkV?fwbsY|paqRj6Z&${W8K4Q=-Fqs632fOr$ziO2haD;h`BZW+n#$ff7K|B zL`!v^^RcdNfk)lyeetHowoZ7#?#dnCiJx&qo#dcVvPU?9s@=p8T{a5ke~Ftvu3UtmEMlC(bqLC0vAlN$E_`X z;0-_0KQ-J~uW?Q}!){@xu1=Kit_V|no#4nCi7Ui>;g{O++)%Xa}gWr#N$O#IVc_MzxjUhr@3=Vw_aQSgVC~XTopK-&7c;j#B)*{`y4zf$?oV)tUfC&;;cu z{ha-*e^_aUz=AyS5KZ{GADH%5)+RFPrOl$G3i?^tXJ0x?DeXczSXjSPX3D|>R$ZD= zO;8$e9=4>LlKfeC%WQn2oAt;X+*K1?%p51ZC=s<{@YvWwkD_Sa`N86vOKeP%0czV&8>_BmeIiTyAqpt?FZgx=Q77Jy8ef}o!1 zIlh(ja_rEaet#cAPG@YDn^E`fFz9`f$Co2JYs{A|E5cRbXiCpKd*d{X52nse(NcM-Z?|VK_%<~Bc9Nzgcluf zKZSpxLH(*N>e=S7G*%d%=<|bhJ~FxMfFvoe)NIYw3tE%t4+a`MJT;-6`XHE|Yxscl zY3^w(GZ#xBen?`C?^cE*vO!;_KB<2}7SZr=k2kYs+Tit^`&mHA*%cR^IuukF4`y z*X!ikSljpDM3#`jy)$`1eBTJkkv~bOs>-@)KzOdNC;1W#o1Xw>CmINe+-Wo$)HBv# zfrXW*awpW~%rYZY1hlXXOFND_O5KKeCX^W46Ri+AH*MAznwtk|6Ajs^i-N*0xuQ$r zkgubRzfyD<(Dy zu~=*Bi@0`Q!JE;d9jxQQ_q*@T+TS|nc>Dv*9VZfaH7q4is10j3=ALdl%(2*9F8~)k z2jn87QW@wm5bI#MPl_rW2q6J~jFubfMw5%%MA2g>cNr6SsM&vs8NK}>Xs9NCrJD2O zddi8`x_9a-WAt<@MM#R@1Z1Y0?xI-?bm+=VK$Sm6qHvSAW`pqSV$c;9X788jVZ1M5o>-0Ln?DZfL6wUh%QcNAu3x}FW3(;`DNL^d=I$!{8v}(c}3h2U3xNpA!ohWtS1Cy z5m21fBAE0uL|j#;o0^CNLW6O{C2(W6OyljTPBqCPCYj#I9dEEOJ?ygm0N;E6@4!Z zt_5zH;~d0qOqKa(&h*Fc4%vmf5!oO}@%50Lyn}$xV}G7W7^F#x>D`@5^RK&o;n`DD zV1)AQ39wS+aSLu+_jBjjpw(}cKS~ptML=l&BuJtrWtP4by8E8Vg-{qh2%i)|t)BW? zP32rKKFVO_5s}}Ycq1uiuDtk}RKGhdu#w&jXq@-1 za400Z?|7cN8JsSL)1*8;NXm>CKFlu!@kH2tNzK~cw57JDL>w&;eu$7zpJf)~CDl^s ziTH)})Fs*O=r7&o3htT!T7(Z|~$9^QPB2?wBNa_O&8Pv0+o^>(rPS|Gmk5;OW<`Hd2oox|tNO zA@zC0wrVD4cZ@Pmi4$%BPL^`EA&*VYfGF0g<-B}1k#KN@df~@)cnMSwDn3PVyaRMUxc<4@aL#C$ zoS&$0I=OuW1W0BfDIXp;>>aGfq893Ry>(rck8|=^GjYdLNYgj z1&t}Z-|{O4c+{xl%9GVcTLLuAx4;9XN2i za9s($b2{ezlLP99&Sba#Di_3Y1`1aU`iIXmQUb5v>ft>*@pR>P#{7B2$s+|cBZOjO_{ zb0`iK2Y3>8;U|1~)?BoHvTWDNa918R#w-0bC-hdvWCofIj6Vcx4d7HixsBzSsmA0d znNeU_Qt>u6P)+@apflutO~JN)9NMy2n3q23G6qenrSVHm=;I$^7^PIbo$cOYFM(+0 zcBJORi9#mmU{&I~uQwq}faDJ|I&!Wp>XI(lYEO|Q56MBxmjn`y{3xI0@18V;$>29L za2yFjx}rX6KO0nJ28U4-C(`xsT1yg~_B74tB=E$BX_tT>IP44jxQx=Y!9}Du(!^yxbn)NKoeS5DN@b*G$Jn`$Fji_uYb-`B?yN8Dt%{T-n0ZN&+zZ)Cv{cz zWW9Cs(_H`e-+zCI#DFhH_P{UyYrYTUS-`G|_v0LLlj=KM&kzVSwDpk(h=1|32ZBT; zDJ}wsjRh4bHxE4M@lw+j&3s>ic_|=?6rxP+@F8&`m>>F~0!Zb{^;?k9iPpf$_NK;> zleL0r8nUi)-c?%%Y;txoVpB82zhj!|ssH;FK-iOnmu0?vsR)r(gCV}L5HpaMW^!OZ zksY3&8!jf}nX3}*5f%$R@5sY=y!S`nxblou7eM9}QB2Tecjz1t0#+c=5?kOCez77w z_cnHykzg_fZkHC6vo??>qxNeOI(yPmOD;P1QJRJpH>#3j${fAvN()?{SWBbLH4HM- zN6Feo(yVwn`1?I->OlC?^wW29PZBl9*6Ms0b;`czRyzC17 zBGm~V$_OUTwK_0^|B&|^Ji&BUH!6C#xSO(4bS61|kAMm;&+rdDgkrHL$=3|80R4YU z?>fW}UatV^UuTE?XTA>TnSjtmpn|1dage%$bj=l*F*`A@a{_doD} zEq8_jXq5i_QV}alLJ_%Ei~=;dj^ttUa|Cq3;FU!ukq!zzV&Zhulgmw zO|SChhj;N#?0Bs&d0y?S-=CvwS5(@7Aq)y>GrJn|EaAD}J}A zO|SfI-;LeA=9ky`Zq}=QX?uQ)UhR8ByxLd6w%bWt@14{sWj4>Fp)`mFfR7_!J7^=b zeUvEOv~1bn$Rm!dx{|AF{E+hx^vhoBx{I?f_`9RYB+s4RcdPb>fB1ohV5KgtZt3Z# zo*L-QI?_SNf6qPlG(G6z><#~h&@T8l9qTfyh}|mq>j{K`KHJmnewT$F%Fz$%2om0h zt#7Y1!P5khUM?RmohOLMM*wZxIzG7i-6)|9PCfP1!PQq^J^00Uyknp*bGvTQqD6x> zYu414%Ga066FhpiHt)ywJFj^McK!4FuoWm-&hutoF0XfJ^Iq(m^w&sYNn3Gwr*)L_ zEJ)RnTBx1p4cLQ>n#om_nolj|-Qj=JO*akZ%ssFMPYgIY_;7yjd#|ZXJExXKze9V$ z9|B!+-Fy(AT+>4^P=)cSg-`iY9`hQZ|Ns5B*A0$2=9oqYz_$U^(u@A+IH{^u_|!Hg ze-N^ED(rv%efKMf9%#DLbg1&zCP93i!CzpgGml>`&x}L4zGOaUA6__5p7a0w+~)>= z_Gkag0l^Hz-*wkrrx_`NNUvRunMYq`h?mOqa{0^?Jo<72L8U*wz>q=XU{cWd>kNYD zCG+je z)e8aC>a!8Qsq407KF@tVl(Hqy&>NNCJ^SCU*+-A@G@VLHc`nQ-F#{1ubq3pEv; z0gS?!Fp3_}jUDZ`x)zzoZ`OisRU0qH7EnmJI ze_hRG%XqfGWjrrk)?CK=uH~phei?CiUfNuW^YZ0ORbILj? zZhGo&S~dfk4)Al-2R_dQZV4{(75z-6)~c!x`UTC%>Kye({DvEDXr@MjZc=l?gb4-5 zX7k7+j})AGJMpBGa-Eau75+vvI+$6sWh(hrPyW=fv1jXcZgt4$(W7N2_uqei?#G() z7a><%of~V~A(^pfTKUhJHM==$&YX-^)th`tjsZuJ6LrxsmNmQRIB*zq5IPQ>#2p3a z<;x9^jzRqDyrg;Ml~+kAg9i_8KK0a7&1H0~#dIG0@k~crVh*&J&Vsxgh0d}B&a!9` z@C?2Xc{&zhbRId-N|g`_igReC2Otb5>ei{-T))dIl%b%V6f$Q2ykTXUI_Fn;BUq9O%C6 zKx@w5_-01+Vg`E#r=H}WJ$rU@_Uu`pV{J-pk$94`$bsM}*^w;Tba^z&QHRk;Q3q$w zFQF6ZZ&`Exg89v@x7xbdfFT@kzyWX+L>@X2{>*XAffmz=!kOqS_@h%*jp@=rDi=`W#C!!AX%afuw$RK7{dn zsNcXiMtw1Im5d+ zSkf^P!3i$7-~uUQ=+L3fsZ*xFp_VDi&~bDfBu8Q%4yCAL%QMT)YB`6@umIn!1;(4VE2_|1NC?Qv^)eHywU*oB&(4blvq-q>| zaCRO)_}@R!bq<#}fBVT#vLf@((Fw-%^%IV_#v*CVnKcgW#{${1Rs1U& z1+KC<*#A}LuLwj3Vj_*2jsw0$GE=;rX@m_ttjmf&Z$E|7>0Z!GTCB z`d8)=fh_tg@9H+EY28G~%Q#*e(OKw7I1}#9b=B2ZD{Rv^uDJYiIGiopEc%!O*(SnL zcMvYw=t!u;#)3r~)bW~#jsu6$A05rhy#BcZe=dnB$VD=NszOAAjN}>Vvm~H~g*pr& zH?x{tLI=lFubnc|gu!Yqc@h4I1UP0CQ zN1Y?Es8nE9kQN||8H9{vES7NIFw-k_GkUX#K=&Js8nJ?ICIW=a%z(8fUMP9wrpQ=z zK|on6=WI*$oY`}V2!zU_p0u%v5W8@25beexy1+r~56;3R8lw;E7>WFk5r!KJ*IjpA zbI9N!Qr!9HpRenoaG+&y5=InlBXk?8Xjsf5~{wi?P2KsB||KNiUDnfI~GIYq`<_j;pC{m2< zTW-0f)nVrT^{?;g6aLokkfQJ8uQ?FdtxW&gM8K|H6@l2x(7pGyeljtLC3xpbhlTkYSw!N7!a_zQIa0L?X9Bc{6U8{Yl(ixX zrPG9Bg;PTe6`302SV5UvhaXXS+_-VN$)SaaAkDPl z@Qu>6U)_}kZ!i=DFv;5Xj5jxW7*woYoFvYkZR0yi;lr;d6$C(Rqf&)Zdpwrf0uPbQ;e!y}&(P-y??X}k?>`qFrY!keg-(ctl>8Jnl zv-F95_USYJ;`yt)?&1WV zcG_vDG-}iv(oopR&#%8eZ7^(@bkp1VSI2nB0v36|I>|rhfd_;?X53ioQxFM!Mjq7= zK!r(Z#4<%l0W8UonsTHt#~pXvF(JGK$>MCLF?{&&^x_LIq`^amcpd1IZlP3GWCvCO zIEIpWvenEPGXQR;kL|V_&fYiTGZCRR$&*{~LUS$rYfW3_uT@%K*M$GI*Zwqp zV-Pyn&45uPL<{_5M532+kaILq8!C)2HUb{>1L*6D_OnrB|!4BcSwLsE-Qh8Kwjd>2QBb=f&X*Q zKbxL>;!kOtZMI3HMvhwR{7u5%m!I|{jX_7Hx|J^hWo_tHE!}23Z>QeZ|EkJA2M$Es zabw4+NooJ=TZ21kjlobPj9DT>7|x0ez=HsxGe~X#X2-{^+X(_so@Qe+0q@XQQR ztqxdx6eX3LO*krX*$6sspl88fJ)t9uxAC7jV>$`hEKL(9Yn;d>Qc#bwkis2=92o4Q z&Hg4FT2xgc8b#7#J`#Hw2m}1v^*=}v2s+aY7+S=+pX#3uL@6QywT(X`kgZ7zVP!4m z02G`i%qU@J`I>FIdNj#SA}qtRh-Bu?yqt~>AvVq{SjhA0{Jn`3Prfs!&w!&f)5M9p zqXcq+?!C%_vG2J~g6I%3v@|w^8bKDAqH{QVR%g`I&0{%0cs(O$-m9pht8Ijix{ z*Fa`;tg=d$wd*EyYJ?4`7z6@Pq**r+1{euw__~TJ>)?oT93sG>h)YcqlQ|}XSZ3w? z;Xu=;sm-p4K=O^Ey*UP`p&wBvw4{K(Gr%AauQL{CShq5@_WoxEV(>i}&C}#dM=s=*it9Ie4L9kpNilU-|DTjj` z-UqQ%1REK4c{CW0>{)2eFbX!xc7j$g<#1RZ_$%^LT^N5(m`wdye%u2>AZR4qqtnb; zjjBXT4c_#Gv@#PJGc1cpbQ~HP4^)MYT$&+I<0nMNE-wF&cU+!DM@?-jQ<#ABo)3*k z_0031XpxsK{~CfDKhgB^V^2ITN8NpoJ&YIV+W>(uJ4ODrQ~;5sldJd4FctU(rq$ut z3C2oBAN5O^j@1#l03y4-@t-}LyI1Z+AwO3tShg$G^iNBmzK{r4)-&~!s%SDA2coiG zmjasRs&UmaVJ-0IV>V{k=m>gGgXTZ85*W2+5`z;1&n&2uW<>10IfewcDqZ7ermX6iFM({- zS`L82+p0UZ3GQ`rW!s$38E?~C?#KLb2OquXq)0t*oO!gNq7P}+AFA@QY@T_wVP%U# zgy*!77d}vEE^;KI=VW+&&A-7%cF0$uS3QBC@(#L@ExahLIhElt8WEpSLmLokZ<`A? zH4tvm$LP^B1mY1MWO$;64!Ba~G~)q4rjGre0|$9NqXO0t%EV#stKs2j^I3a;5c3>%FkxWYj^o&y|45E9@) zgv_zzU;$ShEKE*EH?WkQGQQ5varXBLd+X2v%ef z0S?lQYZqTy5#QEcyn3O*c~qk9Afgz|jSqE*Q;JpB(PiqdZM)WAC9=Si66O!Glil4-X+}!Eyy5NG&5wP|?P)3%cqPof>FpT5nz8 zWj4srLq!*de5FeSraA?I`Z<5DI4E6A#{FG>UIWqo#_3^FA7cm@)I8GI%op#Q-+)!R4?sYnushI*q&oQORf6@Sp*fZMF-e8>y%^a}k1=&>`hpE8s7u zb{hBKJ`lS<5e}q8rf!hI--b@T1EvHG}u1=dJ-YRZA79#tT_xf-)Ze6QY83*CK;+q zCYqT5A|*0Z!39w*KOq|a;>iS6c+*VQkxTwFron;0ez*8NP$1{Fp2=lLP&t5goPb8x zgJkY3040tlmW))=XTDB5qX`^T>No+7rd1o%P+3SNPJiG}8=@U>a#WH4A%&mor$R_; zvM%XAKgt6K>ZE`CppH)>&@uQwyMYZHz<6nos`*%t%_>xn$xLj{^G`ncgnk#aaN(k~ z^;TP9%Wqu5%SE*DpnE-}jYe!|$u-CVoP{N0m`}PF#K%I^9u4oFK3z*h{bq*vNWtG) z5mIqriAkJ#;2-j{dGsuwk#Sqjbw+v3_~QkV>E3(qg?_uzHt&6Jddn7XQTH=nFrhRt z@sMZKG0(Iv%AX(qV#n1-$bsz2#{a)U|E67b5S?@T?==3rAmdIeybobEf6~1I{p%;)5Lz>Yyb*c*4ktDe=%M&;yKIa&=CsTlal{wX z6<1uYpXBa=M{E`@T9_`q^s+R6-n{hox4kVr@W6v<*zgVH?0V9gCWCgGjeLe;0iCZe z54qnxXfWWu3_vq(V-rnFIH$vYkrpeo{6v;1u4Qb>ATm(EIN;XY;MO7UlQ4;lx(GYg zSZGmgbctu}R+az!SLP?YwkrM8B^M*K8-GUdqmMp1U3k%Cs>V_u@n0MA$4|O7rW8?3 zsjB~a{jZMvx(@_7bR#v759Pui;0;C`cvnKf*$769I;<+Kig6x#q{|4R3g3 z+H21}bIm>X+><`~(Z6QvbT{%J{J;lfD~ggdbk<0*%%H^Qm^$z`r=tFi|J~DY9|-9A zod{^&QwlbQSCk?f=YmiPiY1a@WkrF%vTVIcGo5y zNIS?W=u}Z#75=B3a!UICnK~uy^YML1EIszvI%z10~!C!ta5A zA5_X5$4$kM&@^JWI z^s-@%q$!%e9Hqeg^=Q~3abkAi&fAMGzJM9^l{6jqYEC=twDjUjFQ%i8JPJl}A%1zv z=4e1mRYRP}$_e#9YOwRJ9EabgqN3a0JBN-3Axrn4HW3aH~voMdFY~ABEERleJPJ4w zYjr4lbv}z2Bfi~X&&%?HjCi8FF6e(%$tVUfRl8@lhzA1@X0xANF8Qj3{@-O7n$$GSLsMHJ&4^(e;;U886b z*e--iDs0GP@^gvOIR%3_J606q4F0E|euknoF}Q@gNSg?2kH7SU{`9u|jmBU|L^&v| zCuIL)?ti4~|MhwwXj25fA(g&x*cZ}}A-HEK)OAmOz88-2*u$h9#=kee55#*tzr5p@ z&5bu6K@BwzI_O|Up&Wr2h4>0zp8a*c@pXpR`}cv$nU?eLX{Vkh9l;s>K2Uwe*1aa5 zuya%>C-A5X!ciAzhg-1@RH+UEcJJ`#xMRf~z49p3-Ms25`|z5qkrwHHbLY;@JuSPm zuE^ih_kpyH%@ANwz&y-k!<@ir!Rz?6HdJhKV-brMh}J-WDs<6^_wO*A5dN)`PE3;~ zotL)U@?GhXM;}#GVGAh8+K4P0qGiE@bl4t)33#lfEo|M5H_xRZfUSO^KE3NSkR5_@93I=@x-N;~3lrnmqX;qp4%rpEA!% zlzV}{Q|fj3bM)rw!qr4xFTM0qy7SIEYx*C$!3OE$AK%;23goq*|5cTr??j*=)z2B~ z`q!g88Y1+c1}dpJK=deX9PkB-fSY1q%%~%lxeD?q5Q9gJE6U&f&Uezur<|JJz4g|( znQAYltA0;J+N4M-bQ$tx5+R&fZ~Xbwi3Zkw#O4x?&X(T`=k3+BmN$rwB22pEQY7oN z^*z^FFT|gyEOI$ddA+$wUOyUDAzQXQiX;x1hgX9C>DWXlci|>u!^M&_6G*)5K(Z@j$9)hckHq0;LjhD_>OE{7r~F7dFH9K%SXnir~m)cY5xQEPgh)i zMTjFsikW2ciyz)glK)lw&AHeQbP%{K%YgvmYppawPL;>-tn|^V_80l%xbuch=LU{VL%r z-cgym)f@aXG050g@@u6+wQ&i@|Fy|1r06^TR9tZB4gPi`wf>WC@c9E*CX2=X)VG&YktkT(znhbYF`;k6G^?^bO>m-5RFID>eYj=GXYD~x(m zJCU;qg0oQDr%dXIS?NJ9SFZ+tF6EiH;lv4QZS(JK`TO+yK++TrIdV|*NK{-mcI9+W z0E%=lYJ6x~232!aAZgH~cR8E}04-lH-v^Slw#ZMN<>c~Lu8?b#p{`_@B{+0f!Xm7f zuodXPjsjw`!Z5{JEhwy87Dsc=fv#A1?2r34QXQuwiKO%Ep)|)U{ zOLbXuCB$7fE>%)K2F(VtUTTGvwUNg;9L&&lr16*`#WH67;je2T zszu7~|MKUTimd4SKzvS(qptfsm>&3cH!d$^1S}&I6M^?|h{HU?7;*TU58?@*kMhu% zTyb_Mnsf+f74Q_+StNj8c_1(5{SVFUaUrw=F8LgE(ciIqInDUqEph^Ec>ThJ73@xeVtoP86}Mqq95mji(-w~S%6GB=w&;7R~VIyw%Gop{=w1Qe1O z9#QBB1q@oDb5JoJa?vrUt37Gu2FLRmF7n5g2=7yKLZ=g)VO^$v)eiNQ{(uaHz- zL>+~uZ^-6Y&yMmL6t=4+wxkE*%>SsDkgWzNj6wmVqb3I1>y*y|P^IY?+-%AQj!^v& zea-zqy<=33opuF?Tv3s;o~fVIz{hfcKeVCYq_NW$Saz?l$u%p}6x~uZAdf{w&U&VP zQj@i*|4vUL)cDhZvIx`wN*?*73?frYmED^}Jj!pp4QaF{suXbqM^i7`s2vXEA&nD& z#FKRIb>zpv*z@Gis6GzUF1QKJKvDGuRa+ebi)RCr#|&-7MOiX?^VVtyi)74V1N?ow zLes9a%7JY1G;pO+<*K5o7UgwS|M7JO?ZT;_Zp+BiUK{fify@i)zQ9?9!Ly1Yd@`Gj zc%?)|ak(X{k{Xzg(ByEP0tbxMhT4me<&n)jDKJG2T*!a#Q|b=H2xL5oUC-9wXbtg% zVrV9K9jCRF(UFN+Ra!GrnPkqi5Gdg5DgX8E`#|#-AOdmRH3cBUsb7v!nIEgm#B$9v z;TU}Vl&qBLeaImgC>w<&Oxo!tVp|-34wr8hJ$*MF=du zO2;T{t@1GgMwJ8RBMvykWxe$0~+Ok zMyZmv#0oNj-A*kqa4x%C0}t?D@4gQ-fBpi@753mC{3S;U+6c{oKWF2Ti$_h~@pc|s zkqn8AScyL!tfgii?KhH&p%9}?Tmp*cO89qP!jVcR{tnj(T-7HQEah%S835y77=FIE z30JaKI@SoFpfZ@W!bv|zG-pN-{>m-|tKcNe00P^3Wvz6q5kNuZb&G#QAl7bv0t%xF-B4FIxd}6Xkb#zP);^l)3|1@S z-?|Go4c9=zXS*7qFgO5%e>TQ|w{c%>p4rqGkJKmng;rBnIHybUrfQX4kf_=oFq^s1s@~_JG zfu7w5ZErqR_e))m&O!Tc zrZ*gVxa!qDW5&40xzK4PxaU?be~;e>;u0<#CDZnSW4SUO6wCuMnlgDA)=U>%`@?kI z9X|(NBTab!`1I{Bod^PS7|*1Kev~%+@OWxfI3fQlS6-f$Og<~6Ew)UX-*`J!7>qJ4 zLX@Py(4wE~rw(m^;PU53c~1k=g4o_SU94!Mx`(m?~q01O8D>9X%m4Ykm~abA<`r>If*q6p?s_G z4zUmdIw)%L85!55=_FHr?fl>Qj(4P|o_eaFP-WTkE3djL9eB_|lA)jY4OCHn_^oK*}|Al`zHg)|9%kcLg>i+m_!P@>KoIrZMPM#z8L?Wz7N#M+t?g7Dl^t`6dnKBc?vI0 zsGBV4f(C*{MI#+~{_$zfqqEZv+rBT||M2~3!j2Ox0#zt#*2D$WAiQbETok0Deg*vZ z>ac{9l0&QNlXZ`zR+=ahM$@K?4w$G{f1;`^D{p|+<As^L9VMi&Hcy@sj*~n8vYNb zrBMeQsOklX%nQGTXCCoV#-aOvAuapyC9;o^-@iO#uf8=Uw&v^pW>ZNN0Z$OE^wHXrvuCO+%br3{29BW-H;}aS0dvaW)@j z5Vb21(Jbo4A3(W{!|lW)6FX;gD4P-|lF~t~LDUML&7vJAo>k%hvul5vCVXT`NJR5 zyu&_)4^%hOW_R4129M$fKqD>2b8`!R^iSyxUppZUefvAovpa1Acr$H$;g8ZryTF0G z-_~DMrFlQTDlIzyjFjHAS=xN+-K(Vk)sbKKfxv%--v_E9kae%tS8|lKqb@FE+;Bpe zR~IizpF8b~>E%~mN|&5?QF`c4|DMjJ1ASn;9LUhCM#=>f($3rBo%!!|^8Ud}=eBF0 zT5Y`$Yz$hIA{*zTSqX|IqF`l3fxohBy-87uEY4fMXY2IX|9ebiCQh7~rrv%#ZCq(> z@XyRLPx;tt^DnP~Ld=lnl`C-Va?#l-4c|CzKI8s&#{eSRJo`a75FWVXHPDC;PpJGb z6a$<^zvZ>m=U_iW(&qp6AUa)C8p9U9D|!>zI6FR1C5Za(@%umv@F-6-DhDiRnh&va zoWqTQ>R|j0Yz|ER*UQqiQ-7BB{pddF`2D|{Zk&0W9B2X~5H=B_O^7FdBtuVSeJB3y z5r|71v{I*|=P3HDZ86anX@o^&$o#FfxL2*cGd1-&tPRNlI#rdc!~YM6HalVmE~uP4 z>D+Yu@n4V5*f}aQ)B!qGm8-+QY8}MeA-Qvb5`Y!(FSoH<_5Z>NUrWn>c|#iXmcK}& zZ=4#GGG!msG}j$y~Mx|pG2y3?QLK9vqV6w+E5gsg@{fxvJFF8uF+ zw8C9DN40juX9}Q|N>HvrMo}=Mc#TdOtD;2^Q}oSB6|1tzYv9lKhoAFpyZ9x^pI`s; z^of162ayKqV!^#AOH_oe2dSJQ?koRLO-_5h%j{qI1YAfUXK@$c#TK>9rp>B>hy*?xXN zh$Z&B8(~71=wuEY;!gkLJ?X?BoRPNK^4;l@Z(OLQ5<7RpJyX)8E5E1vKxZQYRRaZj zAjX|Lps|2I%c-PdJO9=t+_dwt1uCL3*N}#DiLB01PwJ5ZAVtkk zGqO5IeaXWRe)z*5rlXGf3MyDm>8!KPN++FU(Ym5o;l8 z4*xA}cG=ZZa3}uH?Tq_6aKI7Rg&V$9r*Lb^84<)FSf6jsrK;04^Nj|d@((Ct?>6QnfbKzv$y2e zH4wy!3BuUe@%iJvn!2Y?N$D-`DxM&ydPNJPCZ5F-u8E0w6t00bgahSSzv%O_XRwK| z4>l3dVB~ohr;R@PF`8CkL`JvdUvUCzK`StWKg-$x0l_)?Mk*rEI7FbbjpzMFs+B5Z zj#^Q|fi>1U*3<}jEgvQ2RnE`3iEuTRaJZ3r4z7U=-zifQzH1+2vLLTsG#{LHTp^W`r%fjqbkugNxGA%kUYO;Ulq) zeP5yFx{hrZu8GY7+y}y!E9!(y{zdoxHqAQ<*F7k1dh^UQc#F-!j*~I54X2g=>WqJn z-v_c?I7$i!kia;=Ky)+JAG?kSk?kEQgWwDnlh|!K54sKb4Kt?bM(TJz3&$I&6ow7? zpK^%_VaP|66Rb1;PWOS(8ZGRIEK->_NCpT00&G(nTCp1vij}0ngpM(ZBn}X&@)rS} zo8=2+%a%u?IRDvrN%>!n+a4v-M@8*a6|TU&l0y#ue6RB_yVaNSD*}O;y_JbvDXZyp zA-?gn;JCxomCf$@eQJ!@So&Y`&_n5!uN)${4)b0z`G;?q(gqu9`M>GUZcRhp^hQ+M z$S=6&|E5KgPEBdpMrrfA{~&lpz;j?-^3U}ec+{wC);D8F3@@`S47SJb1I=H6XW?ih zXlUEWAfWhWwBm6c(E~^ZAWxv<(CySqV+%UBU|Dz0mFK6w{=i4mnP2>7)i1Th0ZF@Y zY(WX+q$}p%7J;ydV9bn`ad(gxA{rS+b+l?pS~ZreHYg!>&?mmD(}@5=Zz_P!(m-D5 z#DDIy&!(-m+A1x4^;O}vI+77>rt7b}KJB~TzT(%L{7KNd*SGTPy>J{_Mxd6|y?AkY z?j!HRN2$8f@Dt8VqdtS1Tu*i5 zdaAM0__W#o`B9JhXIEB7{vN*%G=D)njSceboKgr+NdVvpvxr5V3d7AVs0JaWHYGtf(8c)=h+X5=m^l3M!K~5+rm^;8;Vndc(;*tboe3G>BrH9lKUULFLyu|DL`N^y+Olr-dgUm!F3l zG;-8y*8lvUUXvDm_vDm@Y>-CZd2ecPsVAF);$6QSg5J=7kKYHHKMyYzu{D(syPeb@ zn`YG$kDRECo(4L~<{h;v>pXF>$b)ytNTZ3LjRy)V;lJ0Z2<=6R2t@5veKLenVc{V3 zeDN=6)HEB73VH$3tl^&B%4>jsXZ^>+q$~VB5XR%#iFj(@mFIN>bR?ck50eTV_oV;l zKEHoz&i-8*^2vkKCMTVeM>e+uq_@Vu$L|B_y>RXW>`AOWO{y##YAiBDRFUzsL76B4 z5;5s9F~^5s+ViKD05D}a6+>6sR{pI^xOgv|kmKNdqo{Mou;M-GfuCjJF zAjJ{bOl3;U>8eFbGF_>djk(3i%;fn1tC_VZ4KX~$OcM1(se42`#j;jdNu#Q2ed4dLGgP8O zx4hKG^h5o}L>BrlVR;n1GG-~^Xw=;ea64BZ0R!i4@?*Rm2t~y z+~CoJt}t*(4jdY{fB%vX{IQ{ycF57;y6K2r6p5*sx&YGyq!21Q62*~|VS@n*lB1Hp z+L6A|B87w7K5ufX=&~*+SZ4ebLJmv?B54S8$R&dchCbp<{_JNRIX7a%meiqcfPb6* z=|GIucpJ3FKidg)MF*nE;TA^|KkpCyYgNFpC=yH5xNwwX$?Q%rIT%+<){aHmmfg_p zd;Ykm1*-*b8n+Be9WkfSvvBG<5hw)l;P~t$y9`B87iNTXhN2FRGYJ2*aX@DwBHB=V z=9mm(nU(X$M~0_QQ=47!8yfbLDcX~-b%cYXDm!EZuNU{KKB{wv)HXva#3Dyt!6PsS z_`7~#z5M+QQ@o5l54exND*X`pTbK2(pLD}6#`+J|n2c;(FDrVfX`O$}GQoca9SHPx-EBAP zE@)Iqy{d}JRWRjnki&cHQV~Qq?DA+Z9@#U^4)7eqXkpb6T45@O!}`F#GB`f|?>50) zKb4=)!qIle@aGI2#$OSLW&wM)q|s*9GIA6ex9nYm1tt$b4`m*Kq2JFEG~X_;-2|p*H^cBg0&m z(U9!F>@OoS(NQ7LjdWXyoBcl_gP z24nr3qNbKgIURKc;JT{+N*+uI>SulY8_TeSqmk;qid&_YRlv4=6NDCE$T>hj29L<1 zGvOY3Y`KQ-q0*`ZM;>Nq_XQZ?cunMVkW+6^wma4pe=3-XbjhDzbs)XzGZBGsMLKc! z-B^o1_8O_k)(f=?_21^mR(Pgl^zxAsyW4}m*yW}#Y3v_HMpDi3MS@WEJI15v=(*l>d-ERtQP&@qInL1$i0hmsMX zvXEoJ^(J?rGqEY?3+%7?rGtcXkx$8AxIVp`Gp95IkHWLPmT(mdwse-x)cMrCu5M^=Tc4mD5 O0000?8L4yw=KuAJ>Ai-UOyA2j}a3{D+(BK4n zlimGy_pev=PSw5lNPpdZy3Xn9?|y)&E8*i%;{X5v{8umKUn9RTQ=-NHGQ(r#b|kRD zWvS$i*9althWzWO28;#pa?2Vg6mGN^t2yquh!z5%?gtSF>Qe0E6fQUDpl2glOk%~22Ht_fhCOqkIzi$-4f9Q9Y8#JfXod5Qe$-YBmekrijny{dV#obQbeU-S9Ga7E6rLN z7EM7x!SI=TnAgU`>xi?46!)AIRaT{LT*n(5Umq6?srO9}kBiZd&c*CqJ)%NHY-~8K z?{;=lR~j~jOo`9dfk2AnL6cqk4AS?@<{yU;ZXZpogeRBY2nn-vxW!qlCqupjo5y@aINlvGvuIkrcJPA=4c}eiw7~xLZgWhGPIf@O` z7^l_}e5mH(k&7R-;JNskO!*5wWt&%o>D}5A@oWT_bn8seSmEM^T_)%5nv7RdoS9Qz zNSIXANyYM={O#B7b8G{v4)li*Y`x^)`vN;O-YOdv6mFs)l07Lld%Gv8#1pF+mlmfM z@j`BH8&3RJCfb%bBfXv36PM9Loasq6xq80!i`oSP0T=hLRX6aVLYF`{?+_L;EAH;G zGeSHN&>e)o3B!Q~dBTYKFm*fVU!W4Mq5@@}MFPL0;64r21_2y96)W*KLjx))XaV_I zKw8wgRkRoA1~P1o0G@aIW`ISEtIlWN0K6S!zJxZ6SY0w?kvIc#B{AeyfMhx080Jls z8<|cB85yoSi2iAqu&h{CKoJqg`)FCOu~CSaCrUF8zN}9+za5DmYG9})A5~&_g1Pe? zT0P9z%=87B3A)Fs=L?z<*i{FM8%qGK3yim8c5UJ0*&Q%L5rqs}L9RIa6GZr#h&G6W zIw6E4OE~uw(o>|Y&0K;y{jvE2wE|o|P~LJ}TZqm%tX|1K_s1BQC)iKWPo$a5_@j%= zRhDZ`-{&0 zn!}%4n)@-w>O|p9Bo>|CyXJQK0yPblhp4aHdL8N>;nwHA<1W{Ty@)A}T@fYH=f9r& zTl~&DfWM7Cm_RBlI4YCnB~dMwcbGIVwK4-DUBu)`U-zVr^(Qi!)0?jCx#oOz%sEf!9`n)ik2vTu+Y~XJcCWeFj|F$)bdN zl1bDC@(!}6GCza{1=r@X>YE>J#7EKO?VxKYJn%je!=>`6q#f?2&Z9nzw5 z93V?YT)zk$o?MW4$x?HcF%Kgh8%dI6T;i2rVP>gOWi_%%S4|$$Pu8T4Dp0c=du@ka zt5s`0&!%s!Pw~_Dr*ZY?YPo7bXcTk=`WyNdss?S?k{p=alGyyZbv4j{kkXds+~q8V z=zVvSUyJKfQLtC=S4gzTSZD9?SFVg}dtEt!k#DISs62__K$su`w&V~L38p1u+G#p* z+ELmg#i}K*b?9`ON)$?VbTTKccX`gq{IKB>lKBOB+hwfUVkI*b!r#kt=?mJ7;jiwq zF*JWDGRx$?%r&%B*3y|Y^~kk(w=KaPn)q2aXGW&Xq>Rd$?hxxReZFjd)N|9568sa4 z4<__vZ&q*KYfflxIIr{*_8ahvxWG6!4^+9AzZbZt!Kj2Kpve;`r zkQavXBLp1ys09nCZmC>FT%HG6=h%}tSC8h+ZhkM$w~w+<*b5Y77i6e8w4t+c9l99S z-?rXv;Ce=$L2k%3&t<|Zs7Ky@&|fWJGBv8*v!^f)9Ha2oN#>iCP}1sVb(wM3QI1z(_)IhAfPl*_~!gNE>JE&C6M8E z?-u(Gd}kia@Ob|~2&@JAqLjfvF~p5hC+0Eg5Wg{A&PvWu-S{$J5-0Lj{zj27>T1?J z$<4qbM?0C_d%h(;b3LC%Utd*U<7?G0e|ht6yS!JM1^xS)IGT$^^>#*{zMfv^=gMCa zFm=_t^?EmZ@hT?t>s+R%r*Xb(>1G-;w}m%tdvdV~Gu2C83JS*P52Qs~b|>_yRW0ga z>hsQx957v4waNvU-%X-Lgms14(qfJTjacPqS-7vOl+!fC-+~9kika+LMfsXo-}109 z<+DG@q)we5`MS<><~N}|F_Vvfihtd7H}2VTj=HA>G?nqfh_B+v<<_Dm1_#O-ulaOU zarKo}ml8Vun9VR@Yl?0tcZEOvG2YL#@wa)bMS;WWvi)=Cd!1?egqxhe%~bQW4Nd+Z zdlS>RNe$ukGcL?7D$AGq-jj##7gi1<=f-1m(`ugE-icKlQ$u^8=uOOGhd#OiUmh}U z10Q)l8b^2s4qNUR4~h))jy$%e*4@q%+DOIU@inlSYXrk|I%DB;@K*Sb+qBDRa%FNG zis<93m6g}>ya-M|AG2ZD_nyHhP684_9Kz_t>x5dm3Gv#iv7L$Ti3RI&Yqc%qtrmnW zK@A}xox9|OUba!J>F+(Iqc1+uess>oQ{`Ne;Sz~n2d{K|b&fTJrKWW_wRheG7q3l! zn!%X8oYJ42p5~mU5bD713Ln_u@55AY%(Qb9auA=FcSJ~GtK(=$K8jxMtZB6sHft=T zxrpa_6|^SLr_BQEN?ns3)ST#?KRVDjUD+QWRUX?t^js&UsMn2D*9h3+ju4M-rKO5x z1_m{MT)8*}?sWNHr5xQ-F!L_|(jT7;9=MuF7|clGy=*?uXkT~BKPy?9zH}6xHK;H& zcHOHvaD13Po!t)l{*XvBBQlTSgbBt-BFvzHQa8Iz-fwRielnbO#ayJPC7@{&BoP$w ziwNBLxfI`0acsSm3U9wa*_K(B`w-FdC7xdAq2S7JvS@nuC;wGdTOZ?><1aP6rgs^? zZ+AYVvBk6XXy|0lNC*bci0+Aso&;>2eW19bsNwZ^jJeCbOE~&27z}>=De^@0`EP?0 zzoX%!hErF^Wp4jj2|A(s-N0YBKa*PtPZ=Df<^vAy$$szc9n6n5jPhr1JS|6Hfa;HEs#z#hACssE(vqd+$Bq~~-2 zcSN6|Xztu$1O5Ic81KQTb6v8VkF}rBaL?WwSjj)PkQ^F@>tFz8Q939(7Z(rKnwofZ zUe@@a>?)g%ZJmWHoZr4062B;R!gzFe1V|d7pVEmD@&N!SY4+NBo_cDkU`tnL9&;;K z3u_)9^(ygoiYJU)Uvu25TEeo;|TUOoX{ z0Re8L1h>aq7f*9vZWj;ce=7NZdgQG=ETQ&pp7yRTpuc*}EnK}kC77808v1wpr=Qlo z_W!Zu;_a=!TQH6GFp;2;=KQkyd+L7_!`L~2yb7>%V_(e z92#SLs?AbA5X-$Tj?(GY8Z^THoP8QspZ8>tNK5lGNL=QVLRq4B8IO*fzBasp+(4%+ zr--E+@`KYrc}Om@zSoZ)bFr^gWzlo+K)@jB)EIz8$!8Xl?zDT>LtzT} z5sq(vdGNDf*PQcdhwzX7W5=1&?CWdasL04He#_pmiuMO(hTuDXbE0T^31t?QL?Ko2 zkXg9D4}F+x`+efW^#-BexFt} zgl%SELF>(SDq^+sohad!prBw=DPI~1IeG7m^L(wQfPld1yxXPO^~qXkgkE%fSQr`$ zJNql*@HfKF^Lc2Pn4G|P#8Qh-!rYvm_37pS;ztBwf`-(6ytUjL6f+hPky-t!w_9&2 zOopBF^Yh)8SArjh1O0Cg8xnYon@>b(S6X~D^xK0Z7MvgMZqnLaS0q`GO(ThzS@BuX zm(7yTs)ht1o~i?}v9YaWMPo;cO@a&sg@u!QlZ7`p#ae{~foH=aPr*YgRYqYE5nn0E zMmSAcU-Wc$pG@c0Zy?yp8w=D@<0#1xBO?XrVm=d}X+-Rfm*Jkj*A?a~%~2&D@6MiL zgeZQYR|DqN+Kn@%2s=}MW{_Hb5^JOc_$1W>+NpyjJ9LJkiyI=L(Kf>=3c#mMf^&@g z{IO_yKNW$`gj|=!|LhfJ?%p1^v!Q64yf&_jwFmX@mkqHdUy>K=MX1 zsW%H`jwq5q;nK^&Ofgd-9~mbTElmxO^*htI4stW+DBbT9bGu?8dff|4f*V81YZu<8h z9&6PC6BKQX8$U*ziZ9^)})w+Wth1mKiqij8cW3)m!!vB-Mqdr95}! zun=7nCZHGZcOL!5|E`^o4jfO{U1K|X>hq#5SMf9AD`AA@H9OBBMGZp(%TZ(_{z6;E z3-{2`lx zb?nQn2BLEj`3B>)mc!726@;*>CzaBx_xvZ#7~UFqaQFQ!FHSNU^jZy!1L=_4ldgHntkYQ~PCI5(Lm%t4TJmx+3y!geO7+ufpoD6#xOmtqqFh3d2VA}dkR&-+?f{-K`1!@0WJ1!}Trc!913j#2#lDQDU- z8aE?Xw5Bs%?5d~dO&p2edqF^M>YLp2^HMfXdEUWzf~%!lGFdp zLfOcWxuL@T;(ELwW@v~;yT;}%__nZVVFwY9o%Q_tIy$~& zQaIj%EG{{HfpPJDk?-$5a2KD~udowSmm_1bkWCTLYY>D6utrjB0{6Qs7BLIx>Itsr zGISOMbRct45i+Z%eTD-wlmP&|MeZZwvzvArk_1;w5D32eGL4jg`S%o11W5o& zl~#YWAzOC+52lC-p4WEMR6-WMPn}m1`?V^Z*D@ri$1-h~vo%mN~+TD z)9Pnj3aP;d!|FM@N@eu*522T7hgEb7dP{2-hD2|yXUmQEhLQ!(4{An9ynp$2vDYSS zd^lFTkLS89jndzXEj+@OFt$ha5tNakpMx7)1Z4Qy6BRw@!(V_dO?$MR+LK`gpwh;gZT=aT`$la*XQkrM7Gxa)vuuQx>u_a`7ioUqZD@O-PQN8qOHivc4 z?ccmIZi!H^Q8*Qt3L23Bi)bfwjWs?eB@5RS&{M)i!=P#86LN)R@qLQx1RH?(<~rVI zzr{s+Zyt{~GY*7xV({P%-;!YwA)YWAU65cEePva%(4F)cmFTk~g+t3X?e{jLpY1rb zj7&65`yB-#AS3B}5xNY8G9hpjrlM*dM(u@4!9FuWd>Y|TKYGA9<&1dZh&4KFLEDi$ zm!)RnuDYgFt7khgYaHnd#`h4@a463Lp+w*TJ^9`fS^8(zhV@S7(F{3n{X)+w5VG3i z_09{>Z+a34(Ov7v{TBBPMGS^-y=GD^gB+m&+*`D0>X4Bn9x;TIG53aEzQ;SnAzr%S zVXj!F0phKh82|=-7}%sM%|(FX4kI$?UxPYAEIlG4zYREt9Bu7JO_RQc(C4IVj1q4d zm3{F+FTy@n2T(4l3B-H!yCsYqmIoE_%84c+p^|^0%;7xO!I9^IM~kBH`o$3~xehT0 zEZ!(U&*9Zd@D647xAcslKlVyJ`DeH}X^u3|#F4VH{Udjocn{k@+0zR1Y=C11O@stL zy5o}=(cbwlm6z+*6h^eipbqt?8rNuc5M(fN3pK5+*mue=<*3mVz4+R_JNbHpQ16P~ z6CJ4}a;>fVilSpxovRr;qjE>ybLBwykQa?Pe9;miWtqub%Shv*1QQzxk@q7dZB3ZvK&n zX1-?;-W|2ppTsd1s08KOqW9jpE0(NI*Q;fx`PW5ckR0`aPU1iG(;=Gr;w!07sc^4r zEq4WVPdya4h^}t{gPx;6*r$ZM)=$WD$zYazqz{Uzipxx@@>gHAb8OCB$nN*jXsPQ+ z!H!xyS@4+y!r*FAM$zb@Od^|?boJ6^u3wpwEMZyw_)UQk+(dH=dHq}0WLT#kp*+iG z>d3NUdvzXJVQ?@8q!-4iqIofI4W6S)tuR-noG(a|09J%yLY0|juSpwYk9FD}m!oE2k89x@C7fW%{`R+rEfAR@|b zstj3{78XoizotGk(LUkS&BG>ToztNc^X7UV9u7_Z_KgzbBOa#_4w@a7RorpgZS?zh zbFhv$Dk>`PsKz)=NW4cab_tbqNQ6EpDLgE!kqJ>wF#q*KmP>qq*n3X_WL5d&jZZ*8 zD^{UlBCMQL}oRAD}0n|t$&l!}U-4%-9=jZfRbfkR2CM-01?8138*>q1^O%p;`a z?b&v7ssTV2GR#M3@l`p2L#JO0rFUaQ406Kr^LdZ+7xMH-;~oTg(`LH9}I^qRcdq#+cVIpkg8RQqKy!nsKbhpWIb6N8(_#%x6;X0YEV}a zxkWSdb8T%6ulG)N!*QR&6HievF$I~uPunmIA9BV|aVH2L^purxE{<0+P5ky)WFI@* z+T1t7eKnQdmPco;q>8xOcngPw%#r}m^pww~vl7_rXtRJsPsqs?=g`9%qPu&09TxaE z^*%$RJ6*=IP#9OAlOzHUf*~?%(Iq5(mk4NxdCKE69NRmfg;^&T~S4gg%hPkz&mem_rt=KT#}^8O_Rc z@fh8Wn+t!<9`Y#3wS798v#aqwH1NKhdazUhCCZjX>(MX!Zs0cu+z4d;hy!_Kzq~<{ zHr(j_e0bhd014?D+B^g-EGH7O8MkEY|D;y|95tS*%nRp9$Cz5qGgQ(=!2bth;~^>t#Ofp-P?Wooab1buG4)Z)l}tiu_&+r006F{f{e!V=>Pn_$3T1j%7@y20sydz z?4+gD6s4s>YHrTfc8*p6fI?(SD!P`IB5BxR+gRkxgr%+9SR&&9lQNpRU2~qD)HDyz zU)EMGDDotPglck@5m>QGR9Z45$iV?vyGSp!W?VTyfBl7sFUnXHZSlU&{;-A^^MylJ zOLg7H@Bw$EN#o!^Mu5VO6;@E&`(Fv_lh75E006}g5FWSwOP{n{ka)ntCgSAUk9<(> zck!f8h5yr|iAo}MP8Se>B_$A>)ukkRi?f-<(hed9kkCdtl#gzYe<19h0v1Y!IP7KA zb!J^q!8?)H+@yk62_C5dA{m1uE&z}UovRn=r{EbnhBDM*5x%tWYVPjXav3JdjS39P z;^N}b3)c|$tw)XU3;1W({Lk;0hc?e#=#%c}T z+xxNx-{vtPyx0T+$ufqG_wBNYLw}lm8X0o=WNgVdy{gN@$JptdK-`#(qn`CnZl4}| zDoFNfi(5pBG~=z_XXSLNUHZ@b*>t5DS>zNoKlE2l17yb(@Xm~eT#5ToyG_-{F@fMo z3Qg|ES~fQ66ap@HFT>hGb=a& zcL?~Y_u5qZDszN~)7#Xar~ytiMB6NV=|2Q*{egh12aR`IxNbbxKo^eyMiNWb-iZr* z91zeIgu5Mx<>m(sB;-KX>7)@r#$QJUO1_BzmLXxk3eo}r9J=JHakqnfs>!GTMY%vK z59?3?%CW$kPDU3-A1X*7d*}Sd^68svE(o<0&03VI1=phWL1eB3! z5%>|#imwud22<;UdKsMgGOB}LcK4#kKEj`hSl84oYh>~ys8&-(_oWrv#hVK?^7T6(;9*QTgmu| zRF@=ECCAh^Nw#U^3F)bYN}m~k-HQHa|OfbvtwgvVsxwQqKpiTb?=zoTW7t?7}3vAr-&+6v6#@XMXlGY zH(PqGZ>3MR2wOC&jj5Hc<#vm58*@8z`{<_P2Hz1IoZb=LPTIK{gb#^pNigp-m#gWO zxyWoJb}P%;$$HDCnrCmm?ekWuPHfkxo{+0s4joN23lwo4K(%(if`qT<5c3MMV#(zzcAAF`St3qT)O^=xp*5rI{|YU zJ~z%GE_)6N?&6n-m(KhUUO%gRJ0hpr@xuA-vTsFpQFbZ6e7WCp)7BkXQ(HTaT#f4Q zTJ6GF-jHUK8nP_07_)Qhk#_w4Q7fFPrR+bPmAaa0QeIwxq7$Q&H1~O~W^QMWfp3aW zlCQ<7a>3lu$O*pSvE)}@S36uUY<2UsEy1_ex9y>BM`(B}O_!C{Q_a&Jc z=y6}*pm4VrS_<|0UA{5mQR(4#kiVC>?Y6R$QueK^^ZTNE|LB-gl3|+g(CV=0c-xxv z5W`5p%vQqJ=GOytT_1*qryPQH)A;+B=e=`oFr+iO%x5@@eae0L?k;Z=eWiVreQ6QD z5SaHu_h$aIPY;jyz*PuM7aY8J1m<~702T+JM$lUxBKawKi#Y~&B2sAVb? z+x9JSu$A0@;8^7_)AMYE!QR1)Zr_D?%b4}r7AkHR|2R~~QeP=)uze4OLESGemEuy+ zbyR!x`nUUWs;2b6LuO}ZvC1@ca*P<-!8~wYrG3oqr%}Q~x~ekaNSz9vVZ=9f!U@Fwq1 z{i&I(#D;ARa}WlI^3Ur-kLjb(<+Y=Tg~^11%sO71d*P}R3b#Ht)FuYuBTpTlxW{aS z?-Sc6qi_%3QHwpJVg6C}v8UEA-w{jr*5b+c9Prm>VE;hvu7tLQw$`>DMCSD@sS>F* zS?tNp+M0$8`w+92r|D>5S>JFJGaeB>7Jh8%ZAv}$lt}%}#NJfz)Us8jmCBCNPRo!D zUL8I@wX4{aUf%lzle1s)$8nyqUer$CW-3|4!bDTue=BOkv`@f%;_wJj9EQX(Rk6TgPlDHb8=CE3o59PO z5Rn4+;?|6%%z5DVa_0{T6H>ff+ za{g8K+u?Ece16xj>@k&cj(-Ww5nTu^4L_UGjiT9Q`eAp+Fxqh58GVI@3Xig#n~0ms zE8KT)aW%Q6>cndGOIybk(yruB>B#WDxMUjb$Ko5q>5|#~Mb4X=_5r%MleoHmll$y5 z#9n0P>*UvcVC|eaQEvY^!C!)cr#?Fuk!1H|b?hIW;_nOYQ;y5H{e_+u`Cka~o*A5a z9giNv&z&89vU<;pQu93Q`yL<`Gg|S_Y3;?Ae11QWoc;RsduhCJoYCa69qM!S@W+(R z9TGcE!qK6KM6&531q3+T19D~PmXIb#dsDFg+2z#g#Q=@Q1fCTBs26LbufRQ++fF_C=_Hr3|mR*vM#`HX2|asgta0W##uqQxm(k zLY*hlzLLqr&PAB)B|>pTZ!aFvT$}{GqZFy zw_<}ixjY|e0RR!0(DT&E3Tg&|IXOZ;2*E_@{~;mtJpX%{ogVZL5vYSGy`G91NZQ%W z3dGCC!^S}`h6MtFMBFT`g*0U3{sn)gMComzP!}O~c27@FHcxIgXEz&mPC-FIb`CCf zE-uz*3DyrEAy6|IE93*iKb8DHJu+4wEZpo|pmxp>(BFE^%$?n#qV)8C8~XS2Pd}|- zcK@*i`S7o>o&#k6o5Rk@#=-t?-Oo^wzgLCS>|j=odNOuSR*(ED`p9XI>1esL`_t03c{rl#$ee zAsrdnK}q^Z+sn;3%fUz{>j0pAXLTq#h9nL?onus3HHHWQP#x!odeF&7lEv+SAaTGI zfS&4F9)M&;sJ1vSp=T-IixF5&P6smF*IgSutofzi>|Jal#Qn=#l}nUY`$lKg(7nuS zw*`)^81$nU_${y-)otcFS0B0I4Et0DqvY(vhY!D&zB^nF(E5{@rtIb9p^w}ur?TYb z%YM@6i+b_xeUn>qe7uBi<@>W1CdkiLMfbzGBB@X;!3Sy)*u=>>o#}dKpla=8&batf z=D~awc={_CUqO6EDykIw{>*Q-RC}gC9^Zbd5VNSTZ~`p;AVceSODB0JA4`>I-WPQz z&2rHja`>}NnAv|UQ}FU)TzvXR0)xKwH;tluCf&hC?J^y$3Ikpt=sp3fVMDaB_YrTY zahs5KsaE9iS_jz7%gZEqzb&Lmcs=;}?((4i2v1*BmLd zdEcZG5fd}!x!CoD;cCEOjjxyrpf|@xZ`s&X|LhmU6{6$QDA4-fa}LXy^!28=xw%D0 zxzCt1xiMZSfx&yV;!pQU1J}D>oRy4PJa)!id{2Ig+KqnZEr(aN|A`vDIbLP1(5va5 zOv{rFSF*)F*OB-0M5Hgew5fdIvr1zJ@1z-fMY7}5ioLTL%iMUO-s0i(S=5^^GVt8n z5nEkd{Z(|ARVX%D@xp?hF)J6XvvEy#enEjEd2nrAo!vkzHFT>Vcd5<>->4OqFjr}$ z3>Le4h1oY9LCB!+PW3atGmXk;o=CYknxhy62m=#o2iBGltdn~*OE|4}?s&bs z8Mw0v5p$M94>_pu;5qWiNaEKEdh=B#du}F=-|eQuLSE+|EyL?3yAC@8ky+$nNcKNl z1;J72>EslAj^vnN$?gvm&-Dk(zoj=^ZLk=K^+9{Cteig#*%92U& zR-Cw-zxr`Z?0x%QPj&igi|5HM-gBwG^erkk@I&p&3cU*tnAB|J{DpylSdQnQZsrYTksnb(CWP))En6J0-EqPzN6Rq9f;JRn$VU%AomafOol&UZ)J?AceVo-NBy(qwqP zCv7)0W*|L%t__s(!soP|EuZfU6IaI3hz?ci8i5z(PI&*!*4cn7jSJ*(?N^(5!DuA8 zR7uwvHraU7J=Eu}1SER|RXfrSe!-(!v;?sk6AQHWQLeYbb}vKfIJtu` zF4O8&A}!yhe(7w+Tj+n=A+TeRNi&^u_)F7m#+c+Insq z6&<@n}uFn3CKJSE*S%_eJ3KOy7eez*t7M;? zBEB)I(s)$5@6E@zhd4@qCdxRVWq`yxf<>7MM&T+;e~bx6}J zk-It#Gy4yxvhuiY$_Tk1kYGk!Nec*jMqec(2hTM)8TQw-boU|2AequL0dh3~ z>e*txpHWI>@x9~RA*;>uU&r4U1R_&lVrXF=+*JL69$fNvA=n+2b>>vql=pRXL2=4c zr)HzJJEg=xdGhpG|GhD4V`#)jz%8ntc&}i|Kvj{vXOxRi&t54|N07r2s3F_tbwzKw zN?;e9F_$ZetgC{Pf+Q0up6fk>3}k>&;xxd{Hb=;+kN0x?eb-!wBnuafWXapU3we z&R_dQ@G||lt*N<`$@)ez-%eU#rXJnD3KP@W-K#T&94R>{5p*Mi|m_HFA2rxmE;DMBF+Thf z#X5cHTjp>IzLIxk9fE^~$asE%pw5AflY!NZj3crUOWGn=2nT~wElZ^#A9|AGWFGiZ=m$?!EBk}3~nur@1;5s zhm+X-pim;*vWZCok355;xDWgI@X{Qr}}kgI!F<_*L7R@wu}Ia+QBV8<<`(s=u=wIkyJ4mpQ}#6;D> zK@MaKnV2o{p&7;CAaOgNXhKLj4985QSvZ1xDxrV|P1OBBD=IGTbs_ad%B2LHZtFCG zg(9&vEHS*|1=^{0Y0b3+7`X-Ghsf!lA5#yHh!gQ%6n8+SESjdsFaTuu4v_4Su%%4n z`2uP6xz7_@?8p0R;A?ue&g@8hZE#-8p3Z0vf*|*Yw$>faRctAuZ?jGGw&%kp@EM^k zj*A>&5OxEgUi~FO5L`x2-F~eyb$<)_Q8TV9=frBC4|FKOOXrTbJ=@YJ<1$AF;dd@3 zOw$6uohu2n<1qNn8|kXb9#sS($g{PoXK-1qXwMr}qOR(2HG9wBRLBjE z|KKHJweap8dc2$lzwD?Af%KfFgBFM+59h1q`;0$~qg!B1aVJ3)dDJkyg|CIq;!z=? zRJQ=$&oz6&c{KYCfybYXUyM_B>SNm)g_K9Gv}B?gQ&)v+ohkf?=6=~A+g8G?vh_ub zo*E}`0o9mRJQZoRY{n&LKmCgV?WDsB1(>lR^kiO^XaTfIMwAYDO)S4l72K2CgWU%7bT zRQvL^4$W~(_^_}ud7+=3qu7I{#L34dizJJ(5&b5iOSR}+ zV_%Oi+H@k1$QV-PGh7^X738#K=!`twR71`;+zcJVw!1T2o1^1|qwe*}m8Mf6j;5 z7f;hMtO?>r4fNr3HYQ7`QIX%|E_OX$2>u}wz%6@uesu~6&;idmbv7Ywb<0G?#8|y) zrAdBnJHbd2RL^aFPiqaJ1qzT5fg6F>cn|jnNpTyAqT|@r081$8lQHd`p$MG>*F#m~1TTdBqgyMg(Z zN;!aToaMj^W*y6|PUL_9M((5)t50Z3WXQoQr>8DI-QrJZVZ0-5CW_e2SzizVj}H*# zKPQ{84fk$=_hp)CvNP8?;L~6-pOh+i+t>1CjIqwt^yK8;NzsS}MMcF8)KCFz@b=`b zZA2~>Rr9GDO5^&>j8-X|+c{cL!2G9(2-6;EV=XNbY51xNX@M^g+M<4mV^I4hySYzZ zDAYpIZ#II<5q1hu>!^)4k$Po`hZ9D-#|q#i^b0LtCKsO=`OzGMi3Btt%T>+I3Otl& zGe+?;9iHH7mxtr;Mi4|sN57I8HD!&X1Lc?$}3>35S=b}(Y&(g|=Gu_=&?5ikYR z5l)z9vW>pl5i1^f6QdtPE=o|Vy4A*I(6=nM9|U=N9oFFV3p{E#V$BJg=;ljE6IZ*t zzOr8J?;$hxIg@Snx#MyUSBR~!2^5%Usq~BO#d2Jz&P(zmd=WHApfHtmyJ(^h=TzO^ zoL6dhbf0r-^jeYpNWTMarn&L4TnU@HK>md*d9G{n9p{9-iOZyDa+~X_EOle{iucCH z9U0-fZ`;Y|x4$rTs^GXqsiVKoQyoJq)W9S1ykWz%NriA7x58stapQVjbDSHCRzdr` qrd9X3{CPLX)iEb+$3g8oPXOy>lw12dhV{SyD=EsV%2Z032K^7$byz(B literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/hbetw_props.png b/doc/source/_static/style/hbetw_props.png new file mode 100644 index 0000000000000000000000000000000000000000..56bbe8479d564d34bbe896855f16f6d04aa4432d GIT binary patch literal 7776 zcma)h1yCH{((fW!EI7esaR{ue+;GSNBYWijp)sDlsYm06>?Okx+Ym2fW_8$iUZUj?Ra6006bn zN?cq;R$LsU;_P53J~X1TcD zo0~ZzO5ysoodHXkN^n(Wuh zv|J}K0eAQbQ_2Cf0GT~A)WD99M{%k@U~7o}0ODZ~2B!{@cSj5j8$M^5tg?M@bRCkPjSM-gFDF|j-4g55gULIT4m#gY#2q2k2*#fm~zjqYjJVYc00u3HfO(8%~Fs$q(Yks zl)U=MDIkWQ#;BbxpGvwKGhRRyS-B)GV7 z`E2D)f+QXi`Y%y*O}pPb`=e;5o&DxGAaRrbSxw-`7m)@d*d6SjB@#&fPPx7|xAxZm z?xE?g*XB&;I#alt9b@vw8-F`;oL%OD)L}kLKX|~^lbXU$bZ4$>ctwQR!nCtYE3O@|46%}2~{TsU_jwgaopeh?ta%hr~-4bGB zfc_^#UOa<0E?Y2OlF5LZZdylLZ&LdJaQEW&>R*OO_-rXSJQPXs(r;f>xEXk$p!dW{ z{y4eZ`R6JFC2~;eGKBf4)(B$B4hbI#lOIqnGP~eLX|MdgX=WJ2lhc!@6;D6PUi2o{ zA#Z3}{ji!4RSLz#|9c-BTO1*VISU2M=;c8TgPtb&HL^?S zOY2L0OLVpbZdm*=S%ce-=e!7+2&`B`{boDPPGOEiP6tln%_wU~0w~pyJVRbP`DX%; zZr<#jvWoep)-)q*$f+s@OppiV-zapIQ>WP-dD_%Ej2WVfPUSM!ZPwfixV& zk%HrIbE>RrMmL=7lMZDRQ`ipEYS zJ0^URvrT45uuLJ0OHD42OP2-870LZcu1INrf2V*-U&yjYIgmUW2cdISwa$#pvsPIz zVg7|-{q73KRg9Ral6vCRvB?x6%5|_HEj4YO0^LXREQPdjoitVA$Rb6PX*J6?_3HIT zEA%>MIt0re%lfs^wc@p$&XLZO&S%b_ofVy%_Jl@e_XKwn_HIU+MunjwOovPrD%$0a z65H{;@{(4PUXsbi**lB_UUD_@ooY2Rz(Nx_AGz7l_oD`*-h1Ms1WATv)6h(fcxWVa zvQ(i=O@mCMrA)HyKqF_??2z>m&l9C1OsKG^V84-$;=#p<9v@ggKm>jK{w;)<+P)TG*c7lA8wNkk<1>1!YLY5)u5KI_j zt5WMxYf@{|WsN7d=ZI(674Xu?NB&9TiQ|a`SQC&0x42c)lS?@`y%1}mZN+OW!R^dG z%3;k$%vnVAK;*z<|H0QR&kDz`cB)`;x4g8_D$*+H$cK}WlcMg#oXp%|{AxmH-)z5$ znE^i=Uypf(*#OL`jo)=VT+5#fmG>LVN?uPktf;6&)Qr|lSV&*^zOc7I%{{{{%H3vH zwPb9oZ`ZWsw&Gh~S36eEZ+27M5$99u)A3Zd2O0aBqQyetuHya)Lh4HBTEIUDc|PR* zRj}U=D}#9+S8R{FRk`{8$~%bPbza*`DlaYXZdi65oS3vr&`aSTT^|#i>ev(?r5?|p z`x#fusl_X5FgcS2 zSFfXS!JgV)e&hPL+w4im>gGxK(vP_O%(@R2kNnlA#LffGZ(6ANPuw-VzdmO__`I-2 z>4&-bOqd+#kMT@^Ctun#8Xi_K&4m*m*_!B$l>GuUdg3~kI@&w>9x|`z@#XN%31Uuf zHaFEIz@to_?w=+C$_K_GnJ{oLQ88nZZCbrZ5@XN+9dYWUSTDa{(A3SL=Zd@)PY@IVjuG#lm3)*`axcndEW7TIMe;>`Dfx! z)47Aq28-9CAQ{)wq0g^}<+OIpa|&zW74PFGytAXDm3pJpCb|AS=j z@?T-S1_=I#17>Gs1OK<}S5<+3b|ETO9%i=M5>|F*_Aaj&LhKxT0{@c#KREw`_#c`& z|D(yr_MeLX!T2vl0q{Tm|3^IkPS(G6U$Z5IDggfP%nPBiw`fiQ0N9hV5~5HKxD$O8 z7*wNC@PiqKslSNs7r1CSOOZFev8qHuOz_x79|H5xspL&j8V$QOg-53;GDehNLUzPnVOGmF03f)^3FzsZDFB>gdSox56lcdbX}1utSyUyP%%@uR^~DNzN-iw#2u( zpDI-+O;ehL*Ki? zdfR0VtMOF!nYmK+7L1drEdC+8ke!0#W7{eUZrfDY{iPKWx*ukda?Yf4_?}EUn^7Q_ z*(jbYgTZ?ShFBb2Tv|A}(yA(@(TRy9eAbT)%eLHBBk{E3B(hvmr2XvLM2Ir z=r$z(hfEL=G8d+fAThMnaJn`@L9g(ajao^t+)i4AX+bYNtPOm=D>ui>eSu95o8%m`q39&HbuI^!YwX&n715(QY8n~?c3VrXhtmPU zRz>mgIFS(%+xJE7>#$ixTeI`?^VIA8i42s5qj@dE!Z>Pa4hH?BAh0=SMTX{^`unTj zLqbutr@5GHd8Z7utoEIE=C8-Xh)vAUSa$0o;E=&I84#&g^9c?Ckj<9K$s0B${37s8 zJk`oMV5ow*!|P_hdSqlI_-DTK*IHROyW)1K6ZTzVVP65B)1~j!>#mFXZ&9JSoy5v)a_GFH`&8OftX`}X zudS3i7=VD3dOZg28s98d%oxNlR#nCJ3sZlXQ&;A@?K8sjmFs>CxiZn+i3zuAzP>uh z@!P+6xIRgkcHo`eaerrzM(=twQ;4CnB`~JsoD1Rb=MZNIc5oaHcAuzIkZLBrZpN6T z9nZ(t6hT)uBS|$bVG>4UVV2hB{%D3GA#DUFs$E~ic~uq=dxvO1JFOh7j{rgL z6++x55J3{X9ReNW9D#W`LVJ&(f=1|s|oTrX;|bn9i7Dj?Vo-NBj4##qVRik+GyG;%`!OX z0z!iTLH^CckABf2(ug6}8d#pJf!v2j#W~l6^!^mwSngpeSDY4;j;Y-KxsN^hjI}a_ z^H4qqlBB)Lg7XoZ((SN1``ro)xl-@@hqa;|;qwvAkAD{EQr|Vozk}W#o72Cj(;6(# z?^EQK(Y}%3cKt?GCIZ6j)iU&vjKXKh`#JPA0U6x^59v^Y>K#26HP36Mjp@6=YYO{> zfv5tHjC>?ka{%so6r&oIdgDr=7@SvTaGywg{UW8B-{l7j|7@mKJ={gaZ$- zUO2X*>GIxKEnWqOW)mAPP!B2vd;UPc6c7m?=$oV*-&(bAo0z5bou^#F1xt{8Cf3%b zuvLnPj5KaVmhO7^)7G$@O9@=^+@X@(T+K2jKN3$SC#rBf3QRpZ(PJ0RAcU0`MD845 z&|i=d(Tu;FZ@*w!_H7d@QnGveWg`+D@&#&9xJ2Y0XC{JA#7tMFqamZ#5l#9dwO3b@ z<>gC7*GE(QP)iN*F}wLDya{Z_2TS1u4AFfd? z)r`jzYi!>V=?>N2RXp4xCwwS?h!4M|YwPJ(;M(*RTpm4+>riw3nN|`ZkR9Hy%iuJR z*eFmVLTKqyO)?y7x%u6cU`gaG>W_)PCU{kp+h5!43=NTE!trq=xYF9^{_@)gn}zAa zANhj!7`5KMK9M?h?UW%k`&yA73~>-hA1<*t3=Y1~a2w=EB=hsDPPBGBo=0^KMzbK) zdGOVeL&O(c%uLmWQP3z!X{qx2gi3a*M(JZ2duZ~z>5(O|RhxpdWT{qCp5AB)A!!r{AzCz=i-7 z-pWRsG0n$z?BllScMraA!6~Ixnr0{*oSdKOF&d?fW{1(h?cKg`++Qs9YX%fpmp;R; zk9EY>o|J{3ByQGRzIsP~{r*P2C1}KH<}1RHHM++0lX_<_ENY3+^Do}EJsfA1`IAtf z1J;W-tA@ghzoO&#Z8o%Fk8Mh%u;Fh|;ilr=Rmc52IK*Tn!o z#VY)8U6mwOh-WK|+EgFo;vy?U#eK?Z4%8wKy?n5{Fa zr;_=0aFSTAh-4LF2HR#q7%E)(snrzg@;M`6Xdw?LXq))=pURV$HqSM81iap~eloG) zsL;ox)w3s{=rDg3=i5kNBFK@oThyo!HyRQB5ELG)idy>xf*e?}B?GDs0^vw84s-kh zA=d^{2ehxQi_#!hj+ImbVe+|^Le%l;7=huW+;$-gqW?c+nFqYtR1FVcgyyxfD~$rM z%rZp2GA*2b^TZZRcwBRA0-#7NB|lAJl(F*4(VhAE&eYwgK$|cEYOP^eiimN^tatSD zf1U`^W^`3&3|%^Ao&||Wu;*{1u8jGTSl6Unr6*(e_T3UM6VC-Fh?xK!VZ)X z4zPB{e)*9yzTOK6e^FP)ScSLiKV^(kCQ6EK5W?p$NXt^b8j38PuW#eqeVoDlEmKzE z>a4bmdGKbES|b_ofKkIIVLHCcgsobjQ^N@KICCp|X-T#6iVTPAwLw!>>1{T^yBhjE z%s>@{`^-GkTozhW*HVxV%q3qF;N=z(oeaeK|zsMqPMDE$hI8HzzK(^$g-pee^V9ix2p$ZNqiItsLV@#%Z|_SQI9Gx?`mHT%3IZp=8s!qHtf|OklPZY zOf&Y5D8`7!h8Ny+Q4|X<2+tPV6$??X4_m|#Nfhy|T8-qKD3&d{G{C266UF;PF2-Nz zh^lPbzmrlUBwkky*N4wiHvgos8iL%;P(-Wo8}xrCTQsOMm~f(~tx`RH@kh{iwnb>` z>dJi;ye4cCmLwUu*vUs z3^ENulKHD0$fDOj`Tl$2Z6u1u9k|bWh=jiqFn zK5rDqe<_hsJ@t&teN8kZBm@x_L&_K5p(^z4io`p_&wG-n-J}S;pzP)OOAZHol*-e! zy#eq`X{5dw+__f5oXOGFg$>-*q4X>lu+0z?C zb{H2I*QeGIjqB=Ci(}gDby#e}?xS`BpemvQeU305Ow3~&^=)N(-36i;j7BWWHZQMk+9p$?Yf;82DvQEgX$|i(j4Zsjr09K($mi{ZiDA$7UwojSyc7KpysTqr0aHo$ z9eD-cP!YSNnRgHRvqYC_zvS0#?^nn5CV^BMNG-v!ZW1t3+X@#{oVe4T;f(4^OIPL;wch5X^KUv-^Vb1+?k R>p$PwWF?g(s>D78{x6GavOE9) literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/hbetw_seq.png b/doc/source/_static/style/hbetw_seq.png new file mode 100644 index 0000000000000000000000000000000000000000..0fc3108a7968c04a13f858d246a1aa87a03f68eb GIT binary patch literal 7275 zcmZX21ymf}((T|9JOl_XK?8#f4DJ>jg1Zx32OAh95L^@7J-E9CcXxLU1PJatzI*Tc z|NGYKwYvLsRqd)ewa!`lREH`nN@JoEqXPf{Oqov-pI_hJueS~=^6L|N#1RhwpyykO ziz~~Bi&H8)J6KrRngalzLgN!q)YN1Mf{xn8LT4t-ES<-`)ArNJA*)z5XIqI)b8<3S zm^&g$ZM|Z|uaJ#19^F#w}_LFNm z(l1CGIuO>K#R67wp2+|L$%FWg07?ZaCn!O9zzh{l2~vRoI5D^q)D=}KK}WJ#hDuUU zP%wJo6zIDB{5kldA;l>tMV?N&=Z(#sg@?O6vhcU2=a-eJ7rP?Hu3lbF9C~{6)`0!} z)b)lPPD9*_EqHjsGDzs)NiJ~ zN?vV)1jGoE8MRX6l1TTcQ@Aszij&ich^q#)SI+z-$39{G)gN+t+l$m?q%w{MuQW-l z3VN;v0>$ITO@LQFl8Jt|&3wr#L~(0v31%33^LF(OK4W-<&ya${N0SsU%8pP5GjOX>G7-o7 z;bnny&y{-%Wz}ek5oW`iLy0BZBw!MzlWLq~c7gTMUb)>9%n&{(2+G}zuODtNdXwdl z(?6klP(g$)g=Xqk)y>8hON3>?LPw|?QPjt23N*GirDHp-(qhIe38?FXn&wu@EMP9I zE_5%@*%G?p@JFThZ91OwAfzDxar%4Ax161V9s8a3ox~f_R!{`cD#Ez?y|!}y3Ou@b zv$vD`VhIQOhGo)z!l^}d3lxD*t;|psDWrzr*yGTl)=0?YU`_fH$;wmP(1D_+jAQWITnz zyyNT{72s!b6`>H7-iTfz6Wb~l6LS+56LFJ@9<(i;E#Ov3Z%uD_uTuCrjxVEV%tV6Y z4_2}7}bbgnAaX<<3m%BzLU zzp<>}UE#Tk5q~VB8GTJ`EK!JRl~s_IhPFnYPR}A;K6yktS%o;PK*4n4vn5ikYOV1S zy|%eF;iAW)esyHEcs0m5%z4cDud}DKf^);J(BSm0;LeZTo56-5VKtGD2Omq7wMrZ% zHotetNm@yINhX+NY%%tF$yR=E|6DnRoNp=%lbs%78ZsF2-W4Arj5jQvP)k$)t`?>? zRwQ5iS)EM1saUdjUp;f$`~Y}~4@GMW7RoQk+bg3}<1e180GE{Kk{7h=x5+$ZBdc^v z(TL`L%GEWMRaKuhbjh^{*b`*=nh>dxGbdVRQ1;%A><{Xn^rf<;amWsYh;NY(lMfrh z*sR!m*c{*7a9IfjLkFS3SIC#fFu5m*Cypl)S9UkzBXuQm&ExOmdp_X# zowwHmDTY9gOE*W{%H4c^=j?yqabDStFDWYNs9SXH8y&Ozp_|A*v^p#}-nK42L^F~* zvmN`hnZCcK(`9IQ$|gWFk-Kks-YfG4RXn-VWQM)KyA%e#yS)7l6Ze*bQ9K+zpgr*TQ?imHAV~;5G`M8OJCpLnl~2R(|}d5m)AcXG_q^-pCzDTuqlJv;!-& zu@pUcVq0Z1)^cz3@R0Y=zm*U4k}&SE%#+zE7&uZvS6L~nx734pKwK{`WuxO!G!%QZ z`gZy-DyFpS>}O|Z(Mwb{GWBWNgPQgr%yeMtdZB9&C`NlQEy}bfzF(ngMGHlnbz$t7 z`r52j+}rqZ8ZkJqEAR^`%9ziXS&piS)0SL0NkiNN-ynYxwG|yNTN9lpkd`{1@m(fy z>eAScE!qp{l-kr>KIS>*ZPVixl^B-~CZ@mX-c`liUs_#? zZPPuUVL;y$)llxx_I$5@lxg8*@lp%d=3kfXmAlkuN74^&vgK=jzeL*5 zx2=B8p2l8o?fS@V`p>uJ^*&L1Zr<4_NRKLmd(q}6?SEJTZX=1)nH?j zISEZ^W$VQn{ymgFiFS{IlGzo_lrswj2`0E6%cy&(pDKZcXVpKd?Q8lLZO%r_AX2-$U`u8fZcSo)V|98`d20FGdz+Y|ST|N(!|~Y~ohIQ??_T;qh;LTM0a7H5Qb@OG$uPw*?i{jPUYa8&qPKB<%!(q*_ z&GYQ}{GLzAa{|d6_Y$%#3LkPJb_R(vakJy})84Lbgzmfp$_hCt7D+n@55xfthV3t| z#pic|&}kstu>SjUlq20eHlTbVhVy z2^<{oGC=;uhp^Bkw)0XOf%xX)n~s?Tmx<8GXq!4RU>>f6uybYQc(bXARsB1>fuVA2BO6<-bfIHiFbz3d)q?4$kJ3TtH4B z8?_KRB_*YRvzZ0oX9=nQz+b-vsVyN8M?O|ocXxN7I|%6D{DqaBmzS57jf0hggXPtN z#l_PeV(h_U??Ur$C;!)vgt?2Uvy~&n%E6xUAHT*X4z3VEYU+Oi{m=F9Jk33<{x_1n z%YUZzIziTdG_34EHrD^~{R$QMN6M#c~7*hfA2(DP~S<>mBcjqC9A zNmUUq^!PIPuSgF?LB5;#B7zT6YhGU9`NhTIQk_lx1Hum6FSvHd;pec3h`ho=hUS;& z2S!1`^m2nXYtNbr!(WPMXlQEXdQBX@!B}L~dcyb-MC>^c_6rxCC=|*+=oA?E`IlMU zL}c=WIm>kF!tQTQl~{CYvr0>u@9&Q)thRbW5rZiNp>a%Vg+|Hm56qtKuS|o-Q@Nwt zA5Qp*g?$8$N_Far&7rejm+S51CMPE$w`ayW?cThC7b772w{OK=f6rv|yPqiM%f>hI zSah844wsaq_(m$E#vksDrhW(L6zjLtL+;MaC1>LpRV%gQ>TMUP+b+iVKW7TT@wH$N~631wWo`eJ^%oK_0MaA56=A{%xKp_L@&w2X<1jio}Pmm}e}k1N)! z0ES{QthU2+Wfc_S*Ss$j4o_BFrgRA2T47{=DdwYAQnq7|;94LGr+4oOZTQrA8N7ZdTpYQf*XQa>jq!W}54?2@H zbn9&gr<+#Hy{?Z36Q!d_v!{P5xr{qZWWkN5^CwDd-+~GrdwL}B0FDL{5{UVEd1r+& zySL|2v2k&667T>f28OJ3e)p-0U(d3Ju&a0atsb#+6^2TfB;XI5w$QuXL>b5@ok}A( zg;a2)f$x*6p_((;XRK zmod`&t#DS$!SA#m_-I+f*KP-#JK_^~^$ULj^}^r#^`QQw5)XtA7?UQ&OPGlp5z7KXgUg#7NpX&@j(T*niGgGYo@dVXwsW#hbyDv5g_tOYY zt;b)J6H+M%|2*t`kls@X1UXRN*Il`@7)|M|-=y=d^nLNEhh6NA+OnQs-4C2|ks9R? zo-Wn|N0#a~0FQIchZFvKZTCes;U>XVCKP$SM3QhfVu1Cpe$Uddw@tDBdK8QQsJZ7& z!+LczzrUC(9Zk;cwL26asQqPcBzdOI%l*V}>^z$=7r!P!8?9QW!G6>W!*!X8f(+YE z`a2AlkWiY-W^N}6#W&O{{q*Q6c9(`!5xO18O=NrgsmiIV?@zp@0Tod+?g#o~z1hRR z{S^DrT-Gy`dKCO_5kJ-UoqzK|D~#u-m4ruoO-)T1d3lrh0UR;WQTGSA(Npzy%MA4N z!E7mPS2>^!-5}x5@l5Im8D1yG>RNL#&cW3&Fo46and z<5xf2&-AqzU62p0uxj=1l9DKunL1vqxm$bP+3}I+V2$Mlhnq#61J8r(;15+FDkBh3 zuquz;*I|jm!Z#}}t)BcYdlk5M-DGGQ% zQVrzkzd4c=Hw&z1i;HRyoFi$GRDad5Kw&$Y9SB=gxSlxcfAB)dfb{GPl%B?qPPdXppcxM^)F<918-oRpuS*>!rcZ__zcvNX`UeW+n#N7 zKJuI`e#ta9Xen`S-#Lse1Ja-d`RvaaHbX)?%)7Uhl}l?6^mTmVp0;rvIf(>741p#% zSfjF8AK!e1YrEYF2WGl#zvuKY$l}iAcaJJ+1FS|!-$KpaOu@M4xxK_^&Uq1V)P54|}V72t?CdtH!hu~UUUf>#c! z&AkYiX+Aho9WaFG^`@tc$HKY%$Da!6jE1J10n@YxQk+H|0Qj3X^imVKDPTJ$!izgH z39(0ir0o?{)ZWss$Qq?uv95kyEA22Z)q#7c*>EZ8h_mHGwD9wRi!Q7Pb&YHwgJ{)g z45(8Z(*AfZ2d}T2E^w!!ukqHgiE_aEQzS9hmb>Q08775c+G2_|JAed}oX3&!OMslc z#`y=5bdy&SxE!E~R{p7&eo!+ml&e!`ot$sbK#GGe9Z4*Cv+7Q9F*o_Q6VXlawd})u zZtr;KN4+S?D>rz)Q(^hHCRCt)T*?%yP&5s;FT_ z?i^hQeOt9xVi@hORYn;O&HCUHth!v7@bfyC&#+xXtK@tT`7Y=&DkD(;_szQRIJ%Qq zcLZiCvoQXeC|qBJAD5sq+HI24(k#t}GXi@&a&>1l*>wv$_JK}!b{Fz;Qf@$gy?sYO zuhBA=DS0Iv_uj9V5<3KP=a~1P1>}{a7qRksOpgIkixHz;n`CzOU-Q6ZqcEb(Z*ez* z&tc>Uv)HVF1J0aP06$O1h8kaam`qY8GA)>9V1 zTk6_ispYg)Zhp1NCGnZdlO#SoO#-`5{wIwzYYX$fTtUeG5UC)WXAm}pqES+#9BOE& za?-Qn#40HGC)smFr!J+&pWhy zH~Qzz%j&nOsl?cj;G^bB!X~4guSub(F(I2+&IlsX?R@^8An2v#Uf@IO7LsnqE`lJb zH$=kF=W14ScCV@px?||XYyTOxI^c_7vAAU z&kL;46c@ke7PqVeOT0RzsC>KBVD$R%)U{1!k!=tvQkDQs0`GvoIMO>}CpFOtwHm7q zQY2O~PHRmyYs_~+4uhQL6E&UBqC_WyKj>>BIcg*w_{~f?;ZPcR7yvq$cyTFrACP<`mu|1%83@O&sLSF~Rk&YsQ=^4MKvooU!!ZE(YB}w|5ak1*MxfA&!fv8sTkd zSbB&gB`ANls@&n_-u0JBl&WJL{_$=fNpq zPNu5~Z^8N0zd^zIeVdYwle02w3`G~Et$m?y)7)bJbQpJy3%{gcb8U&h+ams$)#uk6 z9creCHd*S9r8qWQ1+R4V=gwCUUWB;cTt$*m^hHZ6{A5g>Qe{EZON1F?G?rfV7(}m7fs2Vc z?`XY|OX%QFgS5wlxLLVBcw_G#bdbO3haxDkM{J2;UoISAkb?RDD3OphbU1e@d>FFo z2QLb38k7n>!xw8ERT$rQV2m3yDoY$bxg^003Yf-cF(y6}H0adF-_Jr<^tWSce_9S9 zAtA{dBb%c5$KYy?>7S$WG%jt3ixX-6xOIird?!yiZ-3gCOslojVf#h63sm|!1I&le z=Ystr1&k)pt?v?rRiERT!y=1B0jr_;@*0Qprox}{pgD{I%^1Z5eA2SA=&Hr5RpDH4 z(zG}f1VFt3<5b+{H+de2bs(BS&fVDeqk2y%@91r!$A% z#O$rfw^dg|oRP`Ex zqIyD-_7)cx<>$Y_gYX!JvPkJ0!Wp+M2l_1$qA{mE`wC~(ww0zWB~Q353JE|ONPIAp zQlqa4)rOtEr!ja(9i9!Je`QDwOvZ`okn%cza6OufM9BbcALJ(*YfikBs&*k_;jpAeSFZOCt>n4RuN|9sF{(MN#Tr(Ix7zln0!S3lFdJJcdT{ zCb`F^(2nNhhffze=y)e@D{36wn9*M=QL1mD;^#2Z)6snv6N8slm7*7cUnc00s}e6fhzz1aGhXT=kN@!AKgTp`nSx zGU`DkH6`Z!kO;OTl|O`Sf7=d|%-b^|IK+N}+M*oIsB)Uebz(3}w~oDbLx#4Fy;|QC zM{%S6_((}1FVB@~gXlVQ=l#dR^|O-)X%+4=?NDoXG?Yfn)$43DbN}cDu^&jeW(w*u zfCDuP#ru)M7>?kfi@H(#Ic;_UGK-R&l{VK`DCMgr46}&I=7AOubh_E94<(m`FDea-Bmu3ZbSQjzj z!Wv`L-7a!8|CVq0?Sf@+KNsn@V|<}k_Qa-OcCG6{)=Nz7`Wf?jurw+vp#^U=uiy)P zv+d|ck<({UX%9YPvLi(I#9p8g*rB>ChkHQ)8eOUy;7;fK^UFm>Qcapo4JR>6ws#klR6S1>X_Kt{&6t+QjEnEY5 zdm&s2Eh@?lbMU}^N=ZhhuVxV>fZ)HfZ#;hisbk`1PjE7+j`x*=v_5i{DrjJM44p^NL+ z2j{^w!)p6w(}6W1_jlGhDU1a@AOKiNGBK}5Q}yz}dLC~VGX;S1QLJ&} zL^0BNJFBTX?}VYX8*2@r6tPNr#|V(g8m9CBFzY<_^rwoCn0?Gzg;ORYo)KLy(wkVV z!cPCa2A{sHtZe+mGt&FVoniDzYqn=mwl@3IK0@a!n4hm3u6$J6-Tg}9y=w(mZ@-i% zDF+9*BVv0yXRUQZ)SB#M9R#Az8nyZf&!>p`Y8yW`;t_9UFOFV)E-KE}?Ve2WA^(9v zp0?Ug7Q&fu)w3TWGD=igT&CGtnG9Pj*%JAWE3@)w=o*I1Rt`f|C)A0LEJr*k`f++~ z3?>Ocded~zMD7~-`IXZqq5Nm5Sv0$yvoA}^@q9WyMsrROQf!`xSgM{~aVZwvTv7@c z$guLLh>BB4xoFu%s9dM^of4SacjMfJ5}0Nk?Fnzw`)K{BqxO)B%|cRd4D_LCWl#-| zS660N9)w<9w;l$p&2+8u#Q3;!p}yjTx-yY(@b+g8NjZjq0B5&`+CPX8qURtFpHMbR zd%nJ@6OsqaAWvqZjW94G7#T(;h-cExB#A}x4GW~m83U>U5hooc5_kc!>3ktm~UWXscl;?GmONQl3PzeGEw zWxSi>#as8H!Kr6I>uA7g1a_efp9KYzX^3Hb?9|T^Dh5RD_-)cPTGv`6R7~osn6F}C}%;Ex{qe85l@HFXlk># zv7$)nV>4n%V%#;Jq(FvI&Eb7)u59T^x+%3u1L}{44ef&(>7`!rJ~?>2ko0lLci5ea zGETfdi(F^8WWR9MfZveGKrY6hKcSxn>e9dmb%63gm7#Tg1ncJO{OeWyP5trxdhu(d zAzX?{Qz(yA8%-A!XR2ccO>!oxL^E3hSF=p>H>x_LgZoMw%u&j>`M4i7k_=(@HgL*~ zD{|6bE$7`Oc6xY5?yW?}Q^Pur+1Nye+~ZXNSvFR-CT;c?usrRoF|#ZKy0|hOyD39Q zoaSfEw#yu54rbI#eoK~(uN##cMG$d_3B(cNFN6-FbyIE_y(znqx_L3&IwEhR!1I%* zTHmzFL*;u)ua+uYH9!>w&0pv0574Yn=`yUJ!7a7Z4AMl8aF1Ax1a2yiP^Vj0P8sDI zrx?WuJ5exc{(!^cn7{ZIITj%B}D%J#KwiI&k4zeb3v+15@)!FHPZ+ zP!(~6(1@^;Ae~4V?KQ2tgqv8fLlKjQxr?TK(=Cu+KF{S#nKh!9VGSUhe%|orCtgEYQuuWc@q|Rs0H_UI&vx?7% zD~f+~tzCq=Sh}_@`YZ=GH#LqnOFLY=>r4)64C=gX+Jub$$av28$XDOj2EyP?<6R;> z0J-}qxm&W;hpa^U?^k~x^QrX--Ywcr*+8sprdL%|b-!Qo9vGi+O|{679$6igo$Opw z9$_6Tp8b*huAO7BsmE(%bjCU2d4|NmmxX|W3w-6Q9_Xx4SzvXL_|@rUN|17(R?wsC zooj*{$c=5tqx;)C5>PY94_Fh%oJ96P{?Il_SKoh1fTx}(+$62WkKBc-L+FD8)ZX#24I7g@)pMzNI#_a>8a+M zbUYK?KGT7Xfd_RnX7AnR=H|dv&rAv|S-W1fZ6SHt#aUY9&P7C$%!YFl?fTLObsAPo z@yrAkC-zy+?K_kMZEw)n(UHB8FB$MAf+y^YouGqkn^ z#1Eiy`6w(pTjt$Z-L$@*@A;sAMSWTO6|*>x68PpW z|2pWNKi)FhCurPm+j3N5Twvn9Bj^3~G6_sR?MASb!&WaO%(y4HbFs6d^YeA?`5cud z6^uIZ;9_mfP(@&b$KTgxJglmJG>(UuoCHjgh`LN~W}J~}zL?sc>6`iDQ0t(xsk!-a zB}Ai)8Dgou|Lw_$K-@x>n5A^2)uEMS1UQ82cF?=!whE8S@xzKM$$+ zK9?|$yKp{hwlFuxGe<4jP1O5p_=nISp6-VNM`uwdnPnB{5qSb#u%6t#)cN-JXI<~w z^}gh~$rO8+b!08)E`Z)wyJtD+xG=iLJJGvbzz=@cA2{CiUuIB*jEEe4HR+t!w>AOV8 zMTGsMgSMAe(>~T6IIQM$emVngDSlOsjqZPw#$(2fKp1N0k8X{o8p`F-&GBzK@XZPmUQ-|HlJ` zMH9dc>1zbGCtu|7P=FO~6a;IrS83zEIRTsSB+A@gMGPu8Hr{EB3s?YlQ}?W_?0;`- z6EIe9@&*3Xw4T~Ld8K-KtuZEZR^fts?{p84Gsih*lqL}b0D!r0BU7ZQjyA;3-Id?g z-W}?|@8{})8E63j89xYybag=5GW)r@xOqYRWLf^D0m0CJvjtd~|D}R-mSr*3(Pvh6 zM>sHx@r&{cvdDp%nVDq}_ArQ{irRnRn3XJxBNFKW5fJe8_2u^!;de*86cCbt#)q7ckeJNB^#32p z|2+N!Y4$&)xcEPT{}B8KC?oLq{{IorzmxSZE+$)YU>Sk`W?l~bJ)fTpxSe?S%;)DEUp&Xwg7%SroD{326LTs8_BYa*Y%JAZ zgs(cz&b_RgUL38A>13ngodo0G1$;XBI!W(0=5a7N)+|s>9`wvixo#c= z`DWbx4pECjEv*%Y(-jfa$)t$ldW;=znUG6*85+5f!08)gY5vA6{Slpqxn3TAU;g0v<1%=yD@fBgMM7$j+o~p0BZR z=Cf!?^Px$2$ZA1IK=9oM6S!)X<;NCOW+vxqVmQqiakO~lxU>&(1X1J;kJt3I`W=CW??+Jpt*o+|2b*!0N>W ze$QN+*UR>xYw_5nSDGkZjiOY8>+@fwo327#LDwh@N{wE^?TL_A{Tr7f3zQUJ=6r6H z_tPjDS=lz_@*&e>mKa-G+qASa6`gF+LMlGw!hIQjA7j9`};%ie(WzbQBqQFd@~QJ zvZ|IBwdn?!e+rhh?hLrN<9i0wW7o^q)+>-J#U$&Qk`hSzcD+A7Qrvv68K-;$I%Y`1 zB*WZ*tv{YE=A{Bb{_HOP#Js9Yp9TG_cyX}yj!g?SwL4d(QSCchXD!cdG6rX__v#x6 zk1(x$A$5Fyz7@K^*uXmIDJRt)Ox4$$vNCK)*N z^3(R7;P4Hca}UDo{+#Ye(Ptk07<7i-ij>HSiKU;#OuyxocUi1wpjY{JY2`2&18#_= z7j@K6%n!rDbE>ocWYSW~40NG07FS@>=S0*0>Sy(7y!l8#|Je84WP$vNBQm{3Qa1B9 zlfv_swKhT&Pe2j=A`G_)AKSn^oK}04LiHOOQych2U$I3p+d1O8U)F|%8r_@ofPVSz zv2WZOXYpjN57t{qOgwxou*rBJ+B62Ahu8W9s=@>r5A5E}+2Er8Kg(~YHeDh2`G3xT zjlMfpGmrgjV{L8yO7r(p6DE@77FYVca`X4xjBHNLJVsQXd|AL>ZcmpUIBZ z4(N+ZOoT~BVP;v|cKks;CXmpd~<}OcK z=Ao~7muOrf!@gE<@K-~K*Dc&e2c*j&<0nYO2uVAVE>?TJ-VMH~6iW^)C}thYlg>y< zO-Ea5t*K{}KPv0;_Cf6$I<;&Q9^YZWz^c}*mx%L!-6bO1b06v&L z@S-0(r-LTS%xrCF?NdoPYfgB?t;?|mmF8=0ZLM@}<03e^%;3cfhN$RhFIFSc9fdhn z6_rDr<}k9|r6vvnQnC22AAR2l_5%Zom6DT_BXvWw%KXoESERxcpR-*mDOls@0hz@D zrcbCJafzLY={5DtZ5ulNdJ;CDdXixz7zU}b{v;>Ckog=)XEM;S`&x`neF#fE%6XyI zQpOTPgwhnk#r%_z z{G^sY?1aoOddX&F!)g?7_LpK_n3!FK%cF$m>}q<`EDDuycyi|-9enM~!3O2MbJ_>i zC)(ZOyfQ&i@xM@C3^cAXdF@$~baHX=?(X(_w6cC=dv8w_2rT`uqO1@XaE|{jElsrQ z28A=jO2O%&&5>z7nJ4|S;xqgL)BhYOAq)fnVPf3+2E)R^H;>SmqW??c_WNf%^L8Yc zz+-O6VG*fE)1o$`q(`WPQD*xu#en;OXownHJXyi`$HfY?ZcJ8vT$)pb4RjFCqc%~h zzZv_m-XPa;XQs3?VRSUIfQ*Zat3wz1#HQ0Ih~fdj@$K&6IhDfVx+R5`^2xlzUO&M1i8h&s9XIqYlwT}vZ6b>qIXF|LTdOLPq5vT8_ zZ8VqCIWjesM@L7sDv48fE{;NHv#+i*Ve^%{K#|N!CNJAykxYID24nP?O{M=>)YZ66 zyN8F7&glD`{x`O%`}<)o(tAyX6AW46xj)=61-!}!72g+&fPa?yZDNm$US)xOwi3zw zr_kF;5YuBOIBpa;$8x2f(YkSwa~w;RB(i}ZB*R%o1ko%0KI52EraVqW{_v?I-l;-? z!JnOX58{5e>0GWalnCGve`O_^tO#^n@_%OaUa~5UJ^4?3>Rl{_51cmdXgo)v-h|?k zmgqt~lJgs#f5R+C3N;IDk%Fw_y1qF@GHnq-RTp>vn)_Xl+lJpLMwfXe2C>?$0&Fh=7apDR zszFuac+x_->z}ySSx@7XYK;~=*1|zERVdn44^3-g%Fa%oroPg7zPiCB;`FxTuM_nW zH&#|tmJ+t86JS`6HZZ{G=|g6eDF;x#pA)Zrl5>-UJ_^lYt~Pc=%1D6Tn&g*rEM z`95JvtxWBSnh_iGqJ+z0H>a$z*&(Y0ub{SNJJR+@%4f9T~lJ~_c!{G(}&J?t# zpL3g7nl=kc(j@sZz4zuz2`JbFJILx)T$m2gO6h=5wO;EvG0i$WXEB$a9Q%4K^pmfe z&kDs|mk%FAC_wGkZ)uB%{AZop_(3o!sg{R0xNJ#lzDJa+oy1~SUTj<^h{6+a3h5Vr ztBcSa*Ts5U#r-SPM{qBRkInPfxlL*s+*J`i{vj}auA54M~@D*Qv4md7)Qwh4lERs18gcZ)VOs< literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/hq_axNone.png b/doc/source/_static/style/hq_axNone.png new file mode 100644 index 0000000000000000000000000000000000000000..40a33b194e640de84ed8cdf6e54e039c4cbf7cc3 GIT binary patch literal 6102 zcmZX11yCGY)Ar)9K+xbWi?dj839yi0VQ~wv$O4Ny1h+tt;2tDsa3@HDTX5Gv2(H0h zKkj|s|Gu~CpQ@QTbGo0Fr@E*5ghSuF!Ua+Q0RRB5!fS~7)7$0gmcvAUdcK3rYXbnl z0)(tAR6$mj4(jY+fv|-G0I$Q7k})(j6-Yw&T1LXB#>}mpN8*|LKuYLph~^xG%%p$- zy9L}4^%XGz0aV`n129^VN)tki5*z^BL?YLma^j(5XTOatNSl}V=6#XnvV1$@+hD&` zqT@D#2e>9l7*!2q2E5*Y1A|)L?!>8$yDp*z04RRYJ>%CS_euFoClxTadV6@`N7gUD zUpVel?*I5;tP)S1-GK}M$_Phib|}hS;jCtIw$c#+h-t%Z%7)iRT?jfSkn^QOY_`&B z+cQt88ro5oon?ZT@E@oF;%WWFjsQ9pdM7WE$lxh@#!|FGalw?ZD*n#sG6;xrr5uy8 zu&{9W)G5Sm?Lj^4v?1LoH(eR@svFzp+QQSr9$o5v)5GIp^rKxdTW61m06q%~@O$vq z)~Dr$bpd07(^X_-(zHRNZA2E)`(GxJLj#VHM&^Q(OF9CA%7XbN~u(v4Cz8y^d)JTWE9mu^cIf-woF;(7h$-6{}#qNf=#q>%5Nxle$6JGdv`?)RugCBfCvZ@ ziP$Qc1w(EVy3erm&D+r)06aF#7I6Zu zZyS!hmnT}6I6k=Bu_gaP3$UXhT<7db{UKuIj|@1!S65!cbr!flc61M5CN}5l9y`Uu zp+k0}!(9&qI{Uc>67XPXx6=rt;QdBHmS+8cT#AIv6r@Q9u<4Mm!d(yYsUoEU6nsXe zLaG0aDvYKt&C&?q4&H+S7SS&|SW^Muc4ALF3wq2>Y2pvSe%X>(5_3SBtY9qTI?}at z2b7o?+lh{bDMV04?6Xf1KKuJ<8Mm=fsF*8KGZ0tCBZt?D&y@a8`l(|whQ_C$~&)XjMG)ri{DGAnbM3(-VDTZSgp&6TN+&7>t&W#r7(v( zw=~x^2eKt~#}|vv>|Jp@5k|>C;l}UlhOat1g*o;)Z8^y{Vl85bV^u~7^?9%69gE+& z`|!5X_&<{h@sG%6evMy;=^pYD`BPOE^kor)E51EG2(uQVko#;rkW5jT!JaW0pE5k< z1KtM*1*Uk>{^Yu#Ze}~?q}Vs{6|udqY5Ud9eQPL1LOGcZ>1SiV{qX2_U?YwY>`5b3 z>CfNKnNs6cr%{vqpw<)BLuP7Q&1DKVbmYqCOR*QD~jjJ z*8HY>T~f#8>Gaa?f{9iCz_$gf#oRs^c?#@7PSNr7l9)6e8%% zJHThnL&0B2eoO8kWdFhso{J!~s~OFoT`w&zKtv#tc6|BS_-SkZSWsIy44n__ZNfJj zI9W-uNDMd^IE}#kx+HD;KWfC1HI@7aGn1E+jmyf)QMEs6C(NYJRL^Y8FbYlxN(+9o ztC%yjHMDD(b6@bQtF0NV6N6uVX^Hc#@ol-U-4GpIOVQz?^?-W7M5)}!-15bGMIW|> zck?&9T}xcO_RCg=+$-Guc5}Dl*PRzPl1ht9+w14udWT2s5)4wr29^dTMq8F;2N;L) zrq<%VG_&;8cDM`-PS^zNqzLu?ob}GW#FS0zFrDHp^eOWdygs{%_m%Zg@}<4qxy8B@ zy)*HreY}6bL#{*iL@E!YizR53Ix>lU1N9mMb5wBzX(yI@655h{=WP@Up{N1nORoDC z*;q+$-}5Z-nCN;mdU`5*8eS=fctcFOt@0Jt3xDjX0o4|ZzFNI?^>lSRJ5!8F!q9%x zt=qfai&HtFS8qQ(Jq;|?(9SkwYz=MNbmasIGJKW1;OCFk>(7Wb>rU!Zsb18@&;!qn z>@!@Lf0y+!xtl}{3+W88q{0~S8!^w-Fm+m0s-SF0ycO*iD`r4|M0lD&@3@&63fP`! zQ+!$&Nmyk*^_tL}m?^+L!M$p_8+ZM7hO(o9Y%J}Do>(=*? zJJK-B-FMh*%WzO=7(DX${Zswz0-l9b;vG)|i;1d#pjJm*%UsL%maf~3i)j)?5)0Dk z!^`DmbqIKX!^;CU99Y^j7{T$35D$nKoqUy4M?E24cR98-(LM1KUIACxP~7-7VEL>T z50BbOazZ!fZJhD(&Z~nMk7zGyyW*(|PRUS-WVd|8Kg> z*^4Q?+39JHX;Oi9+|JPcHQqjqH;vg=HUie-3lN(DDXcd@Rmn$@i>(!n)-TPfKQrvb z^V|x*r!8d6BG;EWq*<%jQrksZQ`%l44iBmhtsZ)=QqtelkJQxiSz?b6jBaFn63h1W zYmQt#KSAE=^t?-csn zJcA{XrAJjOdq#rae@0|SMC8b4<20P~jhQVTx&_r%9LJNpZxjib!QXRWS2=l3@-Za4er zQDUAp1tj8CM;T;*gEinYgnj{Oe7HLa`{wGCGzhMfm`V-}w`idQW|7)SI~EuBSDKo@TCZz8 zkhT?#$2Lww<<4#uhQ!Z{ZP6dC9|4m3XeZQScsu|AQU*d(*Hu?VS=7wIj@!iC!4%Hz zY3KMf&;kJBo}y1nJGiR}ou{3xy^E-)1jFANqEG9;w!sW^f2+9KNHFNCKirKXDDrRMzXbmUii7`(|6lq1hpfMMpV*QFii7`QUJ~f|syYq;z=kM5q%}Q}{$ya- z5;*ykC0$mHMI{AVm zU}%#N+zw#jb7i7;?T|vxI!kVYuTdwP!%Fit@VxD9tDBRJNP3;#o*p$%PhrDG$Bx*$ zLX}UAG)6{70$yiUimzWoXKF2mDT2gp(y@U24y_7qCK^PS08 zgV|DTp#eTuN75XI&2=*+S{#2)w?-!xD&IDBqUWmphBKu`l5m;MKjExR&Bv@~%8Ior zjSLq|ipsX<^~ZDN=A%6he<7F^6URWRS)6uZrBPnN!5`TrPv&?yg5=?Y$pum5{OX}N zo8i8L5JJSzM!2S zdZ`|o@eK_^LYU9Ve}2a_)zu{%&JampU|=Y+o-Sbu)%(dgaoOa&k&A?a?)c!gH={5p zGAL})iRyK_iK|rv4Ln%-_G&asB5!U^*ODQ8`5ntI=2Kj+&5gm7o%`EMs;ITT7!4Q< z#+Pza?#25EP2o1Qx<1*svtDk!Z%BLV!)Mx!>2-BPKb$Ro;` zn8`>&Le(7*05~~6-w3c>Y~&qx;uCV*?R<}Y{hp@DqzgT$m9`!&Z|aHX#vEABuN+wn z8j)x!5r)~%XLiSZOpL({kn!K%>aQNf2Sp>m`)vnV^NoZ!Xo8ry)X zq*Ok^9!vQYczR-11nd?cy*J8DW*}Vc|MZlu#3ZmVTpY%rQI>^3n{31|YVnez7IR-W ze8;F?MV=wzVj<7mVE5Akf>v(amgN=_5;FF1e~Y^__FlWvXqfWg#@plV7nM)F83cl7 zO39oGVB^+Ab&e%|B(e@q$I8>q;RH*O#}af$Tk5$qdok+e%ff@WwP@;bD!|3$M(gKb{$t#A>EYug~OnalNHQKXltRI60JC*rwT@~b58Kq6FNhioRt z%l=kw9zrbV28ZQMBfsNSId;vWSYPdwL*5A-a=sAMM@8h`WHVdax}a^8C7lvYHj4mP z#ZgdXjzJWaR*B}y%K1cr5{^68c6yC@|H(&5DXBK5Yz5p}N7FPuIANL&5-taSrB`vg zd=vIyn~8z|(((PbFJ2b6zLbjU9{ed7~arqi0>mkHnQC@w3;chuz^s zQEyjyarB}ztW({lf)^1XjBB=1eYAsQySzFkKX8G-ntT0SzrC4qCQMhFl4|sJ@#t&P z7G*&b?ygl10fEFO?H?OI;(|pz4me9YO(Z!F+SeF5+PDn9ntM|N=X~gw_@x8QoO`<@ zSyZ#?)K;2){`uL!|NhylPM9qY%%mXxV7W~-0i?3}Zs&Y&R>0%n?LZ1RI|Aqj#a!eJ z?UwMqI0^!uHi!|Dz@O6EqKUb7xw|?R-yhgc27ZzG;(PA}u z+~5|bMsA-bPNpSO6%7ab`dGMr+TS!Q4W|`e1}Wgw1tH>JcH(fPKK0=`5>cAzxiS$N zf4?db(fkIDhGdLmOxF5D{KMX%pJB^1zng06Z)>zkROu;|s-R-;PE;L^R1jvaeb95| zB_dFxeuscGz8jG^h4PVTKy_7cw2DsV&N#=~E>IieHuj0SEz)gqs*(Kks{ZoqiYE6z z`dUsSZC=ChlM@=1cfl;<7t}(yI{{qyPsrQjZkm@nDvBVQ|O{IAnwWNI|tpL9$$sG|N^@EwHDUyviC)I!gOZcGyvm9EzJ8vLpJU&nR% z6*|L%A&i4^F1eHw2TKiqxo z->akN^wS%@z7LWn{5IOx66VFQ<~1v*CUF-B9rH_*mzdJ_lnU*aJ>z!fA>)LG?4++Q6CNJJQvcM(QuKhUyTf zFFFVn&aqSAOiVr?+D}>Gmo@zoFJf77yC|}qiApZ0D#A5xKnBMV=!& zZ#TtMEhx^{f5dst7zKWJ8=VvInKk`puMJHvTUJgk{`=R{r=i21f%j;ZgQ-7awH`@4iK3CG%vfg~k$`gx(^_eTlR)9p|O z^%aKbbELAZoSU;x0xkGN1evGwJCw01Sv1nLc@p}o*>bdZJS-G7$JjM~p=ggLUTT$$ aUQ3Cv6jm7Udw2YGx>1mO1F4XK1^pk84RLD# literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/hq_props.png b/doc/source/_static/style/hq_props.png new file mode 100644 index 0000000000000000000000000000000000000000..1f117490966907b6f8c7ecd215a3453fb09a5496 GIT binary patch literal 6241 zcmZX21ymc|)^?DhgYWyf-?$F}0axTn!|FjS0EKlcykL~!b^>e^F^?GtpzdQJ6ws&gP0eAD44hfLIXL&H z?3dds9Q7>?c(^xFO{B|iM+e|ZzlzOlSCYMax|+$|%0LPrqmQ)zHn=wIM%*!mo-Y+@ zznNCumU%*3--fXSmkwDVx~Bt(rS+4!02ovmUA@VpLnauROP&;p38jWt2z11LgR;`B zl;Y4778VYkxQ4p_zSj&tsZV#!O;=%+@4~mgvi9m;~DXA;_^yd!)Wrq|9kBkOfNxPqPn8Jo} z(bY$(wFK^~Kp>gqVRO(~QX1tBYQiS}E4D{Vb2$4DKI!_2fRXIk6^B&r?Ijr)CdSMK z6%-;BaZt7hf!-u_o#MW=YT zU@Zs_?vDr}=Ec@+d;SUoxQu}=#U6oPf`B z$U&#Ys9nZ<_2jJ-TLS77}XY}a}2GM_sQ!V;YL8b1-ylu$c&<$ zq%Lxtb9+a$w#q5-lUZ2&x zBe7c#U%uAo0fdsF0a4j33Pd$H9-*(%KUHLDy#C6BAaWvN#i@oW<`RwuQ7Wl0IWZ%N zXd+W1fDz7$FA^dB$eQ3T7Dtwpc(uf`_#OrNeoafiDjJdZ+%FCor{kOZJo}wF$fAV0 z(@0eN^Y=baz(AVMVG$bB9VEb1QQma~bpUF5K0(tDx1A?&|L7ZuRITq5ux5 z_z|Q_lBtpdl0C^bl``QYGG8fO5lg91X&?D5wT1Ia1&^(OXPvPdIgkKhb%(vnh{}DZ zvGA39hw$CAGZJ@cYOYe|!NTIW|MD(}44M7Q+g%Ci1 z2#zMTrtPMbrux$gZz1n~@9;CMQ!_v1JLnz%9Sv4RPzu`gaz%R%W6#J`yp5jSD|4t2 zoNs{t9WS*&A=M3)v#`@kf2&+O6343H{OPrlq5``pyOeD|0S*EB>OaAe-Bqtce*8{-!$a?>;b{t?b*FUC7wv)E zZ@t>d-{?XVBfR&%t^D*T^YGuv-Ar7A&#$MH6qU5q&bs#u4ml2tK{16{TEy)Zp z|IC~Co$#fJt+%?}ZQ#e4eaM?s;hta9KG_#IGHLDR6MTif-~5EGPA?PvWPFwV=x?@f zaBm^EW&!jMclSW_8gwtT(jbO-;s(h>vv@TP?-4Lp1y`_ca;X=I19=NygK#Ky6>Gl4 zn%`G@TdA!(-UVJWJI=5_*@in_0#|#hc2(_Wb9`YZu%+)e?T~#ilsZYLv^otfT*|Cc7HnRQ&Vqq%a zc$Q85X>KTKmF2{HOnYpqfZ&+mvhj8l(R_-rt%YtP<&KqH$(hHaMNJIxlQvrM?5O1K z{Z{o2Xx}-VWz5zXTVLjky1zEs&9?TjeyBl11=W7`$(!qOr0ErEbbvHb&C%93`b762 zGk8c1P<2yI%udRS=er)`f5Lt({fU?vO~}irerare5+EKh7I~RcLpLT?b1||x);0Fas?17tU1_~}z=p6I z2&8kB7}NV~m|$|WEx#Y<8S72wSTs?_E%9C)>At6^E z`g}rvdUBF$l0vYJpyPf2Z@yk^wT5h4d%<^Nb5Q#MNnABNb%_U&^UW2l)-O%!zcQS} z^4tqs(&jRz(QCgsr@d2kpmU6VN8@l|cd%b^V0+(vnVPOvJ5*K8Z-YNXJiMOqNi^Hf zzbSg@>==Es!|Nh_|AvAYytt!3Iv&t}F_!WpD;0d+bei>j)urI1cwzF~UTFGlxq*@M zcJ-e9{p9iVhJVRDl4eSH4$A=>f|Ux)qJdL4xs2a!tQ*7_Ogm%GKc^+6X%!$5;P(#q z+nilUZZ1EtTKI(eeulOowI~xA-W`|xT<5;1$>L?8h2Ff05_mHiyDh(+o7uD$;4%F>$vNM>JF=tg?Y+6-hG7|#p0k=dS z;FzvFq5XY6##mtxjF}GUwTGG*O#G9ux0cYC<`O>#Q94+FX|y(q_WAj}mBvP}jzYC3 z+Ln^Z$ok29+0z@vpJHc44p^;jVIZw;6a#0&n9??4G4Owwm%xjEcO?V>;14T8rL?`!{utuB z#ORaXLkIFWNu-QO1bCmK>6?;mRji&5)!@|qaah|Ccu(OPS$0co#0^f z8rxBHVdq6eYbNb6K5TeEa46% zG&F4LS==veM_x0@CnJ$cr3Up#EJ9j#eQxq@)O@2`I5374!>H_)>#sV;jEnAQ3K9d{ z{L4$PBIQ)jGwK0jMe=@42M2EY@OIPqbYW*usa|DAR=%eDFK8YgXJta~Idp@&#_<<9 zBO@cu)2C2FL)wpMwj_K;DJj;2>Ft*MjV&>nV883r_Jw9o*cUZA!$$ZM?yAvB3;D~N z8!oM2*xBwh@M3=f!@!7$ni@~2airQtz55m(T5gQ$2t>oEu%8B1qy+y`h|QW4+ko20 z<@(9yV`5?+Mly6*s4EZI&(&~Dl^I17(m`_EHit}q6p2T!dPAX@h3eTu+ZTU+efg*! z>oQwqHL{JcU1}9SK0c1m&W?K|rLU6Vb=~BL4`h*#e90*);(AK!>XPqFmTdMsPmVOd z!I%3$9Y!bWacDnNVKzYd=j+CBcH9^BZ1W#T9t9<()t*-GGfvx>A=VPQ=5fwarN+ia zQ)O{#VJAbm)cE+eyoEG}I9Vo9h2^8)y)zB2%$k~-ZRY|pv?3(_{?E07Xu{|y8%}nn zN~4#TP4^>0uz^S_6~(Zaw2X|O;HWkXY{I}b9S`mr&QksA_v%EDARxU&MmgE$P-ap# zZOUBGlaJd(m6rX9RU!S~^s8UgeaBU}3_HEQyD=pEboX6MTkq2=SMxN&pJpv(YnZm+ zEJ7QLo*0fK0q;}W?e!n2=P#U!#?3VP+vQ|X9BF?0xn^uy_l@C%uXIpbXWq~K7N1A= zX6jAST`=w(%o1$N)C2;z@*yzyT%cYX_WK;U$Bv+{2n0U;JTEMiYZ7$Tc3E;t-dSxoBJhAzR2`I1D3Ey^J0>sK6RxtL|= z_U=YBZ(JI3@*c);`B++4+0fx5_E;$TepgqQ%WucUmPT{#{E#9X^7eRZQ+8p(d)M{o zC&38b24-v};x@aH94Xqm<&l(I9v{EA>HgSZk-M;&0OpvRax`5ElOleJy8*2cu;g~r zQ*1|;>Sx#Avjd%WB>YiSsXb@z3|yWD`{YT+SJzsUQpvepNoxnnU0wT&G%2KG<(jsMw#~(Q8d*y z!yuo+e5PmAIk-TnzJ{T|?tx4Oln}cakGw_e4YN|>Lua<#jHSz)1phINYfVh|4`GFM zZZ=oowzJn~+%t<6b)+CL{OQW%q2YuGz zi}^sGSv*JB4O1T1tOqf5o#zw5xZtxZ8U~}aD$Ll}JcJw$}^W@{;6GE9vxDCoCGvh&uu4aV14zGHd zN4~E(=VZb5L#zEvY7C9Sj@XC<<(eq19Mava{iO<%nmaRscbti;BVEo4L7@gCpr zw7=~v7*wpAJ^nbhdrR(M`|8WO;FtbpKDpo^`W`rIvT4Dr!-?FMLDX6H3&pY4hZZ6IL^2KfT zMZm!#s;+nFq1rPn3SSx4oiw`8;3~rxCvK+bQFh90mU1d{@nZXInksRU9ogIC!YV)T za7$BUI_KoS9#CmCMt8l~8XdLEogKTstW$=u4NLYlPUN17y4qt`%a-8bbBODzl0145 z^s|X%eTtYPxicJPo2o~`9C^IO%LeyIR6J&v2l}85?LPjbOng?#PO@#~YHiJm%?9I# zylGS0U;`hGFA*}l0Fl46ISal&bl=jX@_<(8&EV^Gbbh4F=|en-;MBa(eEKjXH99wr zLqAD1$>tsc;=#Yp+vEBETEyp^TQ6Oui6-l##cDo`;tE!%fB#?A>_xAo?Q^ZPiucYKZII15v!|;!5>q+>E%9oUW zn8*-?7WKt9utv;_q7idp3oKfXgg#;369^lL$reqZq$RrdA7Q;GoKrV&qe7x*YCd~q zgg^-DL6nWzeiE|`KU2k(`-(#zbPIoqr}V~}ODTstDnoCZEX4ttc47>7fwf=$>7&0O zFy^@?cOWpEA*MQvaC|}dz=6(@Xg|1*@j-V_*4?G|J+1S#Z=3fTaA{zfNG{aFadjl* zmZ#uXzF)j)1`GxhTPPF@Khc%PhI~OR=JrK)8PAfX2t8v;#L9Krp$tSdf(+Z449KL1 zS-U<5lW3Xr|x>UNNh*pf2ki-IB@DW}U#dYjyL zmq8O41yyya1utrA&DoAebywr)WuF@Kpi@r<1!xms))pTm3ckILNp<`D=NDEw!1YWS zcVQ}ryhGkY`qBgv#FWHjkWaJ|%g<2fFq^3Eg#Lk;dXX-SWJ~i}b)ZT?5~kM(XBn<74db&$KvgvPhhK(2paYmyn}mwEpXyJfCJ>Xind`YWU@zCxta@ z$hob>Icc)-O*!{nGj+53ZqMgWH4s>o%C%o4JIR7^*6CM)Jp|jdv0P+dn{R4-O4MKgVV8QTsSw9bVjpmpc9>Q8uupFAew_Fy?lX>go5`rT z)t6t>KjKboH@s_{0@F=cUE0dNe!EDsf??{juR%wpTBEaMjwor>PhTH{^i%#S#3a$9 zy2@HRYakUDX^1NT5AQ|NN54b~s!c{3^S6jLKX$L0y|*HA2MVTI%v^Zv-Y+jvFLocA X*r<(xYQDg~H(H9aYS1!i)8PLDRAaj^ literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/latex_1.png b/doc/source/_static/style/latex_1.png new file mode 100644 index 0000000000000000000000000000000000000000..8b901878a0ec9ecf67f415ff391ddfbcfb087394 GIT binary patch literal 11638 zcmZvC1yr2966m6fySo&3hXodQFH$I_xVsc6Zbgf`7bp}h4ut|m7Fk@1yL)lBxBc&Z z_ug~f+nlp!@?|D7nM^X7Nuo5>6)@4r&;S4crjnxUYXATa2L|h+AjAHH_kIQd0BFUw zGBTPC9I4K4S8f_s)`x`R%z5#jzSnu~s}H08%}&eZ zZ$6A-1MY~D$25W&0E*j|Xd&(I_7k-xJeClH0AvF+SbPSge(7ImB!cEP9!{*_-k6Z)H$z5=O4OfWs7hOaxWQkIc?y$XfA zq@-lz!Zqx}=JV@_i{>oX!Yoxr1qizRot3wj6SBmomgkqHxEIGVmhN5=ejFwyw6@UQ z-R#xoEq-I%iw$^q;>@ArIJChLzg9#M_ZnmOF~vVV6b!=BsZ#vb!If^irFi zrp@9y)6rW^k?yo+ZL1AzqxWc&gf zg9hG}26HPI%`Lzq7?&HU*GVOefW3wQFU1@QUjc{yDny3{VBaNQgSi#rS3^tzDELl>SwxUJpmLiBmgUXaA5Lo~+WdsxGx8Uxi zx-%>iARj&-p;iK;7$>Ql zFV2Pi5@CVS`3#CU4Je*rlJMCzd76@?bRIZPIE*OuvPy+m6Tu|Ps&r2DX*lFj z>5?5D0@KLu!J0()WZuMQZ443;#NDk#P1`C$zH(KL@;^&MXAcg1wn| zYS6Do1;4d8UsGv;Bei-z_mY@7)N+_vnsJ!Pm{mhiH{Nb=ZdCNv_r~;U#H`{3vPdP2 zr@17XC_AJvC)=cxBxa<2RnAgERxVNglUAAD#(JlU##GF)P1~C`m7@^PIU-6Wl8 zozXJY^4GeQx-I2$<-5B1Q-;1b9I*i(to(hn) zdgSS)iWG|s&6Txvr;Oc;tU`ChIYQE6^$KUCs@_+TI#M2^9Oo`nEsS|=d60nSL6{(H z50+N-*8SGh*5=C^A3-0ePsA1SrK!Kllk5}k6FG8Ca4OvFT20p%+P?9b1Z#a;VKZ4l zH=aRWJ8m+*64D1!XCWtn0LwyKJjc4Rud`bfWyQA9wyFF6d@Ov_^~Y9}R?fp$BL+K` zJI(COM0rGp>!ri1Tx25gWin5B%#`zC@BcqPVhUub%%R}O0?W;0_ z^utBJHxs|LGWFMYxepFa+K0YL7wTJ__07LQk;&{b`^{70SLrW!cX^xSFXN};PyMj} zfcgk}G!3MFd3wf%Z-DoPs|uz`!2Ka{YMP*~=`+s7R>Kyemr~`8=RnlP^Ft_%td8+3 zc+0=k-bQNgiF=vbRNw1|x3{Xd(XDEjudFG==Bv_H$-to&n$}Wjlg&F1Z;ua`m&);} zKs|Mce&1FfM)jmYqto>CG+KqWUcM21M|jJQ2Rox6T@(14k1xRhniFRZN$ppwUD5{{ zaLtV#(Op}#$@rN*P9a8wb%$9~07nBxEef^GTsKs{lQ*Y4fS{sfbheBl+%1eBIT`4R zSqSsVvKK~^HyAE_CUqufiZRbIZ(ANGJbqpx>}$gtOMO62sbwwV&?dtL`AZwEdv({c z_gB_cV%zu3=DlZXiEIAu-2QxTbeM1DYxU9q*B;zh;9IoN=Sbc!*x~?cC0(FsZt;!j zOUvQFH*9a3aiVupS-C#+oI3urxOyBpH<4JBQ!ilsC|Z3&=GN@!7t@pLnl0A4v%-@OG9S+^(=?AL+$hWcQw-Z~CwI7r~RY%hRzldD#u4{LEYt>lH zaS|)~P|}vUkTVP4Sm~T;r{+NE7-L87aASM&r{=`wx%W0bOTBTluAbK#eH3?WJ11K- z-#?%=X7%bEez)8EChN}wF+JDHfx*O7AoON3bto^L>$>$a@7IP)@kROa^tHXS<@&@U!8pGjNHD0*kzZ508)6 zC&GVsemUi5^@-(jcKfd@xE-k#nW%`~_!KJL=aL)4snY4ad7hivj(*zsllb~Rv$v-{ ze@6Z^pF7*HaQM!OQ}RFU`5!#YXSQLVQ`<=__#HhFobB%)EsXsbV=%t#@bJ5Ox;Nqc z;1oAT!2L@Jj$p$@8Xn+m2lygOy8t&a0!c-`zs;6H&w3^geu1Eq-DexXX>MXj&glUj zabn#NT{*&_1HSK(KYX8a^YUUwnFNAuC`89v*4eMFz~mbrN?iEgh}5v~cMv z)_cM2DI1S(UxdqDJ}3=~U6nZ?zu3J1z;BVxDMhilVJFxeTOEB5eKl2(xw9jusfDwd zC8xKe3+zA(0El^mV4$O=hbfJ>ql1$>$XlH59}Ex-{`;AWj^-Z}4|{PseKk!Q8D}?3 z8UapzPHs9d8VwDNn45(a=(Vi;zsX@WaXK3h4;K&@mzS3prxzcmvzs*+kBEo}7dI~# zFE0lSgTwu!lZUA{hm$+~zdHH9{m5Fno4eV%c-T5S(fsY#)Xe#Vhd3SGUq=7={Yy?u zZ`=QJa&rIouwVn^`rE?A!^zF{pT1$FVt>DaG;O^t9rR^w9W9;QVLZURLcC)C;Qv1@ z|K<2Ulm`E$6c+ej!vAUbH=!8UU-|!MJpWSGKVM{YUd)G$2~eE&xEYr6en* z;|+J5i<+U++t^>BVEM*^kN}B5zzzpa4i|Gk2G<}ZGR*oNHx4EvcJElw$wd#t9bOS_ zRG*RbtI{tJX0QMn+_u~!q9uBv1`YvXXyH*zrTgk#Zb5j{{?FlK%U0h5Klg7xojIBh zbf(MP)pehoyYYGBD1d*9nadY;d9rkfYbUHoimV|<9-_n{#39teKt+d{maxAS^`IZ% zHQ_a3oNO2fvC381gH`^b!rJ~nsG8gqCMvIMomC1SeQ*9$RVviZGF7@$P!JVlblX-Q zFE(eNpSwAB*46PoiCvD%#FUXS=*}$v^vX8+>AC*}wD%6}k9l%ArEw#hU{fah^StAB1N~&leXNSzQg`rGa^_7Z0=73VoL&hRgp0wl z8&S!|e&!V(YEQ2-$`uZwFEok43Z^g+Ii8cQx;&nB>uV`CH+x9=-cftbneIHSzI0d->M?G(ClX&lDj%R40fjdMooV7CR5!^VlPI_vB^E9r zwDDWdBkezQ=n+xM`AaYl_&xV)`>E`A#`3EvYn)d*VACYB)3oTs*H7X8`EGIb*=iy3 zX`xt!7&G0VM>QX8bf;o|b+T%F>f6S>xWL3<3KYxa0W( z8Q{hJVfAiUKSp0s9B7c=W?XW_>i+fNOjTn(2D#vHv+8BA$5Sx)dOd36R%Ene&7$jpk>6P_2z|-_W|=HJ7GyO{LV3h5EGV9v zO`4j?Yy<)=Oej!0*4$7BKDyR_IMm}9;~NtzQkMlGgoeRs+J*JO{W?<;hpkEEmG1a7 z4#Scv))3#zy@?Xn&Ay#U6)pkT3KgJNh|__K1&|3j<)Ha==lT8-xtKGLw*|w5I)yui zhMkRyo@}M`qhQ}7C*y)~?z>I`UwmagPNqhI)4ofHok8$fNemlo3h<{YsyoWxh8w>$ zg6V%f-!7j7ykSzw`Q5T-%&;x1wuryYSnmTfR$UdzNbb!dy2*hIP7FUiyFEk?o3s2< zlUtaXIn7iVT4%7}NQ?VijvHXujsYYMMGhpDN4M6yeE+t<&S(@(Gi^;qGk7xZN+dSJQc+#4&)DkN>?xP~|ek3~w71c9t#cBTx z^zT%@)!1l;S#q6vz!Nd1V5vm3abM;8c98XlYFv0(L#b!eY_qJnQ57Z#h7!YXhvB^5 z7-D<+sZtbtoxv0T6W?+)I)}H7c9t`h21Tw0AxA6iz9t|m^~g~x$G}H`4R=ZuT(@eB zo%%OshLG9gg~q9SBli)3aMGFhwWmerUsZ35V$Ff%tP=2Va;ia#eCB+S5LEjjs#B$( zyGi{GgkuW!b~BZkE$4%5BSrCGyFHu<=desZ7~ZXDi_8z6?-6&m;UeX*zqs zjtm!OH}*aln#^=KxhnB|hHVu72R3IDeNB8W#kP^)5T(z^SI-Y874k{% z81G3OF_lrbI&UG9{-5G#g9`V-l=cJOM){0@VHaAzO*ZCX>pp2Y|YGw zl?#^~F-^L`eQRl}KSF-q8SJ(EVYW*pQ2h8NBEoP&J_7ziN z=Zk%t?^~Z0Tigt8LjT~@ed4^DAoUN{;I+lM> zB>)Qfb*%#PQA;v3UixEQ~jW|8@<0G=o0sH`=AzX>9c~HhMe}2aNaQel2@S zd`Mw=^Wj@PXrUi&KOqle1L2mIat{CMcQRaoX`z7k#^dFmmR+iNew>@H+P-Fkd(O|r zB^4NocXX8Un{}j7eiX;M05JmU5MX=~v?Kh3di~vs6ql`ivqoMa&@J zdgi-P_Peb?w~|A1ploG#Y|V4VQ0uHPwc>U|`VP7sBcBW_kMAqdrwrU4;Nvn*@Qz>_CC78GnEHa;VTh)52MzjCE=bHH=G*^GCpPsrYx$k0< z9+ckpYPOL+Vi*;cq7@S#FqIIm&~aZf(- zxTyA&hetGwX$%nCJ{tb4{I$nj6~w7Ph+_0<5P$J~Q?AV+O}>V{-+^t^wZ;e=vzQaf z7`n3kQ1XQaVn5d1%^B6EKYd3%z6A=^+ct9=dbn=XPDAH*-Ddy#MqxBsm$~zyJ~nlT z+kIn+Htj{2rJbj+n^}gNn_>cj8O5bCYUoUi12 zkKMS+uHD_|)4<&eYP1owc`2U@pUtOy)ei0KOo&0_xj{%Jm<`79o;MboNSeSbf(rOq z_#I@lr+y49s$7>8r@bd;dc!Qr8hAHkY$sYne_0sAIZ64;bB#$zQ`7GrdJ=FWx?D2a zCB-e?Ib;H4{TVP%c|<%q5h84AdWV zMWZ7J&J?Hw4LOfSVNeKFXl0X)-Jl;5Bs-{>-s`ee378 zh|~+bm-Mt^bz<&D*Z`0$K44u-COD_u}dJKV2}>4Nj4gqeqXSGY)|r`+DeOc?GM(Db(78J9r#0E-z(r zfxhd9pM5o1jEROml`KcOcYf$njP|MoUHyYC~OjB8GzyP^9{U@6{tc zs>HkQpR+ts$rj$cy~)pfXPLuurfpZga`vYE6}K9rsk%w`bU~g`l0MeyXDc|0p)!zf zzYS;zeHHbon46G8F4#x~qUt7tg}ISS(Q2G*C_EZ`PukxgDXvWSigDMZPZg$F*F#8tcy}hb< z7aisDxHp^JEfsqLf^{?v&13s=iQnm)zT~kwaxA_mr$)7pzZ(0px%36-tlPXiv)a>@ z47_|xj(%bBt-K6WsC-XW^2ywy-*0E26Z^C1F!-JLONmpX>RstopTkHz8J=O#+O4@A@8bPLQFctwe0*J#Zf+6bRVsQ zw}?l1g8xdqaJqGF@$^6>vJiMEZ!TM*_^8Q`#h%7{H!F#V@{CQox83Z(z49Ixt9eL- zoj=-K7Qr@ydxdY2e~I2-dxBm5$oCg+k=_P_dg5iFe};i*Y(zL;we;Gn?}^@+X4uE9 zwKPl`QqyglaiYJ~cWwpWMip)B0N7|Kh7>L&kNZfD1FD^a?v-uL{NeXv>in zBEmkNoCrp};5dGoDBouQcYkYw^rN~q-M;R?le+BrP~tZSO72w3W*UqFNKqD>uMuYo zyB3-;Y?Xok@QADdGFXEiHn2%~Eh6m@O!D;$+gsR+W8xXn61efB3Bg~xgy3I;vEKf2 z2@3wVc&Ha!GKuiKdH9qTojrw(=h!a_-xdymUwS#z<{^x%Vr{5~0u_FbLyAG;a7WQG$CAU`LHTPUz{VUe zLzjuN$evFNb`na?qB>yKcadW?Z8iCyAo#;Tid4O1L>*LXrV2isV{zYW2cxp}W@A4| zM{eJYD+!_?^92fgc!&@q*`sy9`Vnz7K2aLix(I{(8+RUsarUgiZw?d}B{43NG&$Zi zXU{cq*FljcxFV*5az*&Z(gIP?MYY0aVJ+Zqd zDTd{jbBrB5Dn)fD66*zqQZiF?tfG8>S4?bpt=%-Mkr5nrs5nCNl3R?=@H`jbH9q&V zhcE~`Qkf?4A`Xkf{EJKQDIp;~uZACV(R_^_p#kS(h@^~(Ns}fxC`8IQThKA<&#Dbu z^-oEYAToKe$9Ga^^Fr`#)YT1ZqNfR$sFIPikMsfSu04M@Xy9`)=?Z(2y^={~Log63 zDWUv3o=Yw!qj}N$B)S?n6_#6iXdxiS|5-M^m67%t8>D7Z| zHe^G>VKRju3dE$~bL)fiW-o8EO{!-i zaOz{njzT7ol-ohT75MHKr1Q;e*G#hAP7w#>WZWV@n{eN1M??lpde`wukN*u-KOF2} zu`4Ohh@oQcfVPQ&K^|O)V>`!e8Rd~v;rfcAlQ)?Um++NXa4}LJ*>(7MxYnYm(c{*2 zwQHos!`o&ZFfSfWj#nuf9cgYE>z-b5_m<+7$*&{_70y6)PcpND=~PTvar-wpnUN_{ zf9Px7J$8mOeaMJ)>bR9-$S339(8VI}dE_l&f0zIFe=Ow;Lgvk!j<(6lHM72A+-_r!h%Ay(xA)<46ta{^VK6{MW8#%Maw2d0oOV$MMG&s; zv0cET1nE!}qWd_t-Y#9pP@v0Tp-WSu!+f()cBBOg>c@%ZOac3}JXxSvh4we~65FW~ zv<%2r)Lj59$jHhQ_btap&BLKStul`j=>t5>!O~?@uq2AOIpibHIYc4y3tVH>ILrs* z0TVH53QG6%KJ}I6q<#BNqr1E`o?^5tsafWkd0tB%%M|p}GSDVxs$U+O6AZ`S0}Dh7 znap}7Z+oo;uVe^eaxVuid!SOH=xDL46e6fp-Za86LDSbL_35cre+M*g5ME!x)(aCkbI+_H5#4XjpGWGW8d2~(WX z>~Tq=*6D;>(HM8A1>XP$bm+}VNGQgfXTRrS!Q00eE{ZkUNS!HdMzHn)>O$k@XOpI_ z6nE1Fnc3NmyX&P=K28+MmCNY|!=X%;Y881{ei{>T-^pOg^OjbV3ag~C8vvZZNkc971w=;5S+ig~FfuT=Hxt<)IkNaM6| zpa8w_7UA$cS@vz7pJ$W_1!OFek_C9eFOaChk2!RUlU<+nIlu6_G00Qt9cK^dTh^DL zI|AeSH+o|*>kF5G(!LjV*vJSd#sOy$nB-PX4)d?06le5mBoz_pNV%${M>OOqRWSR7 zg8G8EE0p#1JLX9uPNTf&pU&a~b&PSbw1U#~8#j`9y4lojJ50LZDS;?folPLmqtb5R zDylTnx@udQ_BU1uSIB#pE~MyiXQU5;gE>`V=i!RsX~8ulB5q%~T6MjP5gVb|bQ9Mn z7ML2^ojj72cC_Q3?#1sH<-FS%e5I)HGJY0ds?IBFRZE>46%d&{uGwEn0=E`a{$XZ z;`<}*UG7h%8lU2fVE#f5x!H)}(tANZS<2+KjP8&Qb&(vHxC9tU+Z4riYUh-FlK;bP zMB9Ku6m)cVikQec@ixtozIush)f7|#CBUXLm7{wt0lgIi{_Mv(0HtQdU)iUBCAs@Cr%!b#YL0F618 z#g^>I?S3t<$;WPJJK9Va@bML41r6cN(#w%MfGXmysP5&r*+A`pD#>Wn5|o+z2ueNKq4nBP}Z4RL@}Y zu2I*#9|Z?`?&}T(MGcoM$yuyQN_}a7iYi>Hoz+`vlUJLGy%Q5fBHL9peJmGN3ad9Cxg^C`Zq>v(PItHaH z+TXqQBY9d*YS+kM@Ewc8%;vmm9_jCEK63z1-;s{Xgv7`(lt(Lumzk{}3eTg4!!vP+ zXlvo48b^qy4trWgj)s`*RLYK`t|Cc}=KR`caE?x|7ozGmugM^JGjkK@m*N4hwbsr& z^d~#>>S zez7nQL^&D)VTDFk$_uT|E%mVf%WNEp-`DZ5E zGG;`2ZWT#w_>$N`KAb=8H}Ay}IuotG?nM)_4R6Lve8)iKFD|*4DD{GQu?pIZ;nDM=7wr_osDkTEbB5_lVyO!HaRSY)+5n=THXmmW|c8v>-Z#@FfQb< z=F}Dr_IPI3Lb3jDS-LHbBNwoJUcJB_o)qA^Kw3|kPYqNSNr>E+2zc}sFYtxnD(sPv z@>zZECg<${kndh{c9&@44w|9kfF$4x{2#7U{YoY~iO$P`;~9*q8DCzf7CpB6-UwbD zplH{0gq^hrRjGFB@({_JfKlliQruBQBLR<)9J$Qjk^$Fx^iJtl$L`cpIr= zNJJ-L;VXp66&9gj2RrB_dF(hsn-j&VEi(hX%~AVP&<5$1b4tg~Dy$(78HM{eP;P}T z2PCYdlh35;d+-qeup;b~Gdx}(GcuUUMp)h%#}J0K!@g;8G*=69LL-Ff8-Th|MNLP# z03<_|{_42cw9`lKtaFU``#4K|I%JY@CahWYxiQIdGx)sE>&|XPK`miy1ddEmAJi?8 z61X)R>~({dxcv4k6xG7K@Eb~5D%|>R43z#1;UW@`Y0$ln8-8yJCRV_8eLjmMj^tq4 z-a`hx`|hYNCM5;#B)k|zydljx-(OGQ!+CEGhecQ&d(Bv#)eBPy9cd!Q2h4obgSpON zgsWazMRR^URVtixJ6?vO224_3*%#_Wbc;>@crfDioQ8gqX%qQc3zVWsGKD(#vu4 z>i0SJ3caey(}TzKL_qC(Q*aB-SH@Ch8^SqvMIFvBhjTZlE@8 z>~sUeSlDzr7ivMt?s!vl!C!1R=_w0p-d@`e94Kz^@>nA>s6`Qe{$oF~qSCQooKxcg zpcP0J!&Sc8lD>8zf;(?(D_lFjZ%rX5;j9pBP2^yP8L$OZ_)_G-p74SEExa@UpTPZ7 zN9_0-M?jkxCV)0z5j?ha8cO{rnIGXf(d=x@XFK`o(-Q>J6%~CKKaVh>AjxbFZ#9Wj zfFWhxpCeyh5?K4j6o+8VLOokhE#1l>`e>pk;v?)DZv+yBKWz|Z5l)K{d072xh5P6A z;q2IOzJ&3#@WwiN3+#TTqU?V9K_3K22DZm_vMz7$`Q)O5o7qz&O z8y|Nn12k%B%Rl0jE4@ogYPiXszo064c0mYO0Gem2PnG;y&lA*qZ@W3NB zKbN{Dr^lWzU% z29Ff!3#bVcd~=p3!kU8=F0BSE&EG4sHzni#J`IQ?J*xAfV79~(hI>6&b$GBNoL~-v wlgyES9#I07Ghq(}Qrr$(TSc(8G7w&{yXmr_OX^Zie_x*{$*IeJmo^FcKVKd2&;S4c literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/latex_2.png b/doc/source/_static/style/latex_2.png new file mode 100644 index 0000000000000000000000000000000000000000..7d6baa681575ebcc610655bbb67564be0e04e987 GIT binary patch literal 14870 zcmZYl1yEeUwl)k8g9QkZ;O_43?(PuWT?Tg<0zpD>cMU;;yF-Gz+hD;N2m}aj-{hS8 z-}hGiv#X}M_wLoJSIe?~Vl>p`(NT#|0RRBHqJoST001WdD+j(og8gp~A zjT?eI&bxOSa@NtP@k%5*GC1(zVAKORLY)P74hklw$HcE$tD>EOH~F4hk5j>|t{av5 z-cy)>d%Tos^)On1!oCga$8O`}B+Xf`bp$YgXoLcT+kh}AqmV)zyt4CneiK44DtB5o z8&nLJAPc8pUw~P^eP62jInqFHllfA(n~oWkl6+|A_xCLr46p`VDbe zS=r>3dxZDyi&oTCYqooFwhEp60GiXit)H(elK7|gm)G_9SC+utp5SaPAOnqn2fn&4 zbH6mOu}P;+Te4lJWD!8S7wk&Qk$t*4qZp>pu=lUHO=Pd{86}IK?r_*OMW~r&c=)gd zot3S^Wgb%o{-PLK_aVN3Q9fi{oNyfy`zY_$;=2jPWO?E54Gqi_ilo<5Zmch?zXjhv zw*Cp+n(NtMj`ne3Oy5KVyO6!#V;RgG5ws5l09SVR`zg!*q-p8Oid=kLdLLdoz z^EQG2qM;;x8-OkR#2m1WblcC631IKT@x!#GeDgyRCmMBBx-t>Z8jvN;mq@(_cQ4tm zfrEqQPC-T!!6zkL7*vkM^eJA-duCcg*bA-$6B z6LT{G98*NkZ7%`hsj%BVS~uDt64x;HzU5z#KMYU6d^tEA6j|wtqAwbJ4E#D2Ohjqm z_l0~Vml}iRN;=e)@Qbk>F+{T6GQl!dvpPJaE)gyAfh9j@SiFP+xC8h*a7`$;>p!rdSA{nZ1z45TDXyTe zY^?lTp>xLf!4i(o8~WvTDFB}f&xSQTV6)@l9_2Rde&{aUhO&+QgLe>iZb@x_GGSf?yvK5h(%9PI1zh`tY-K(I|e`VdL984ce5~B0gbjFLRg;Fe2I&TFkIx=c9v2?|9;zO#`(mT>`=Wa(`?sU5W8yjz%ty@MH9l0i z$^1(Gp)Bhl8z`G@nZLt07^qa2+@n=DhxFA-DOhQKjA_htENEYP3_s1Raz-atH(BSi z&Qyg;rIs$KZhNI{<)LoDyv-5YUz`Ax?kKUZWu*tzbUMP7OSODeH6>(aJtp0X&qYX@ zKjo+;OB70stdz8M=gmAzY{L&kSwE)7=@l?dTrWk&x=54RR~=O z(~Ge~t>d^Ot)umCT>xLeXh768(qD^U))*T#g(>++~E1gdl!b-VmE&hxaZG)1}LMRTWt?hCVB~uZFL=5Q#T|cd77@ z(94kkwDe%WtI{jr^!u*~pBkSKXz^k4p2zxrT2)0=U-PQ>(Bza$icyB}*v7c%boZ9@ z81+QS!fw*H4*KE7e$TP-Ij3;_4E`a=a$v#j8|kcm%LUG|pzpza_kZt_gQbI%gUKI{ zA5or!o-9JiU!PwvflWX^xau&9#CL7te=HK!Gy-PWnd_K8>ZMluy?4gz;%wuOAZnm1 z7269gce0l}dgj>Ru=wEH=I5v4XL6?!5h!CZU|*`ZS2l8@iK@9?-ePa;<>%%7_peey z8nT|+z=xr|p|`bj2Fb}x` z=a?NvBPJ%PyV%@^BI6{pi(~n-1mE}oQkRN_8Wyog(RA-qMO{DLb9FxPMO|i{L;cW- zUyEOskd|*243-xcnHTYS`p|zwj_z^}BdfI)*gNq!imb^vjftbEp{k3$3f>(4((d`z zp$^G)6)Ev9>&jZoT?RIP2W2^`I+MD@IubkII-H-?o!h?*-eqK~HBU7(a@nCxy_??8 z%@HmL4(W*9y1oP+{_wlaK6}KcX5WMw%+807-p-|s=V!3rbo|Zl-EsSRRk^WvTsKXKG-+rq`B+;^8h{Ik7(1;##4B#|&-%70L z0iLkpJP_PjBSuF8O_4qc!LR&~+IwyKf`E4Q$Sf`_>gq}T-%kF2{m9sOT6s9Qc{zYwDgO0qVF~i~5~ZU0$LN25 z{~f1|pTqy-hve)!~f+YxbHLTw!4zqA!ib-hXkI{T4Xztu+j3G_qc`C|N zTT}b?Dv`oBWM15B7X6I&4jn-gONOdZg^Q7!tNaZY1M27Uk@CLIZZSl}L(-eg!%gmW z-?Mhz(e}gj?kY$9YQyX5X1!{|PHyZ9rV5Qh*kg{$Sht3AQ{naBDuretz}O-}DoiS@ z;;RH)TLtObT>ZZq(Crg4Dk3UkSvnALyUfR4dH7!qwGWq!0fzx6(-0L0oU>NMO3DnY z`KeO&-!c68fyQ1srf=lZTseRI{cmAQAhQk0Pw{EdTjT5L(3eI-&q>~@MSU|-QOU;u z$jbR__}?(dR}cH0#!PME=e09d+w95abHr^!hMR=Yr;>-~^H8(yo95c;B}1ob=&U@s zD)>(%e;1|doL#(vJ%XMw#{>3nP?ODf3jpI*r{_id89^;O2yi0z92?3$k zRc5*;)vcz@)}zTfx8wYF7e75;pAN+*Kl zH1PMCyxva{&)lbaEo%IG@v2bHIjV1xR=1$`{dDNM=XCYUWk#;Yc0Y;8I31Jw2_66$33Xm~we@Y1vfo9uaRb+P+BD82Z-;`LsNdYSu~$w;LC zLEBwPoLuaMY0|c#9H~|4(=K`K_amA*H1;lz=a)WYu|)7EYYXpMWl$glQYm`7-gwjV zuz3;h`!_8a3Vq%<56LKPg@Agyg-+{!LK}EY;p4|x&3Z&sEeT&AmV4>IlkbU-KS#6< zlLgWoS5!1eKDv!iM(KPj2nn#-+Im=O)f-P|t?GGwKG#Pkag(^7^6hD?Kw&l^xBbqy z6?6r)B7gjF{d6_`4H1x6{zv${%d?}Vw$^{L>f0E{Gzos_cbU{n^=qgmvy6WhaADka z@%KJ4`*YtNAl^_`=*J&AU<^D35FNU=_4;zlt3NnVeC!%I7PLh<*I@4PC1_1CX0 zQwLcOPEUdU(*8UIyrJc-rTPpSfHPdfE-1e4R~M>-#|45EG6@Y)1wFc`f%H8h4XbH* z4V|U40Suo-U47jJ>1~;3+QD)ww#YlDns&O1ACG69AmH4Y-5q+90_}nhG_BwQ;9Uc?YS)gTS})6cBCwGc&XVbyhikdtDfng_-i zd5gXKYt5-P7`OXcSvV)To+`~GV^Am{g^k;OVbSUPS>0F!emzAW%5!a% zU5gJU_V7}q0)sVQT6>>e9*|W0ExoF;0ecy}Pd3T69pp~CQldJ9bNT21N0JJAwwQ%s z+eN3h)7~1tRXR%Ig(@FlTUa~KV~mlJy<)O4FhvYN+ZkJ`M1iyo<nu>pr&k|* z;hs`}?!k0n_9>S{1R5OpC|yUv+q*NxSQO9)*?PJ8{4m^yN%Q!dKiAOs_$|yFANc{G1+w7iMqc0`7)UP z8G%3cot^moPx6<4Aeq2mXAo_Bffkq0LD7<6TUh842w4Ph>cj?DVaca(09JguL zsu)G7B+NRupCie#zSzx!qSMGK`TOnf;RR6!JWt+N{DGMjWu}VB$fqDS;svA8gRo1T zCYUB+QB0P=>VqpgJ=?i&JcjREGIMNym{BWuE|6^QtcEUzy3B|dN>6rfe?*%*hnIj_makzI`NR(&MnpVbcTs7`-gN0doxV)?Va5r{GyF!Myzrr2ML=ktT(T* zjVa`NP7Kel>j zxrPL$IizdGRI$Kr9vPznI$@p_mXDCq&#rqnM?T#GS8M`CdK19+%A-X*<6axT`2-9L zo&S&|clE(*-T=Y|Gl@+AhgvZpd6QPb1yB4~pex962lIY$c0q3!%-613vi)C)gzK3y z68EXMf##ny;VAw9R0wmypPUFptH9m~*6>^x9-`jzXpuCy&K4dW z-%RK@!Ek;q6&08@fu{MVM+kdsN3%D%d;zU#p0yQm&tvETy=F%ApTv=VZ_#8h1NcWv z8b3pB!pWZ~6e|E3O|n8RkRp@8N!A2*96ww{Lw>@BxUx6?wZ)L)Hi^MvF0H|ebeg1O zR7AQ@KpwyWHkzBZW7>AEc2l!j1l#{OPRR^?rqI7JdDmqNO#SX?=6fz{qK=hK#_TW; zhyjOyA5j6cx>0s)Q2pLmyU3&azup$5M&VsQYlJ3~3(LPo(J*HUiOPskhe3m(4hyDw zT`9k4Ian1*6!l5qFaY?mR+WnWd0(F-!j?=HKFx&94SkY?lOMU8yQuOk9b4*=e!O?C zo=5cgO*(CeJ2jx@D8R=%bvVtBe(>n;xj%4U=r0&6w5-W^k6a8e$9BHV*?O`Sf@hY? z=Wd$%DYTbg@)q(?8>`1PBYmi$;4C6P3IJ{R&kO1-W_tHHaBNLGo%;QZgY^Q}hP0D0 zUh);d;aRh|sy{1;K;_nw#xsn)@rWu{w+?h>n4GZ_{DhmqS%nt>p=#gln{SgkVq z@;Zq;8T)xHltd~1otA{m7InM;m7=FFBfW(Cohb#NC{@PAU1VZaKS_+_A!9jM zQ)QicU(iB&4fr{CHjTGJ_QfsQ%nPF+7o1^^1jUb$HAqqMAD)x39ff-@&=zY5JT=xn zFQ@4Y;lWCi>lD?0lu4_nXj|F{egEx-S5|QJ`Ruk!vGBg_$b_bC0&zd^6bGPJT=~YI z#x+j>1apvFgmzJz04aZDOqYjEkrxIW9-;cZ*`=cBUh28L*8ry3Xw{%Y>)M_NC_;3p z%(k0NmxR12_T3OMNJWA&azl|mFLey~hO%JgbxP>uyN=JOGMs4lmQ*Gk_Ovi6xqGWIAM0gytHBTxEi8(BOP=ocUlpF_6Iqq&l= zlHPG^%pi}8%aGcwYOb7=YJ#uxgL^UbAj__-EdZI!&%A?8+`&<&A_nQlW}s$VW{A|c zz}HWOIpChwR~ieu!Df7{k`m0i<%F|^zasv6ypv)7W=&_om5rhIFb?>GJ@{`4c{PF; zmedAXlV@>_kGjU|VVk>-HXw6R@m@tEIa*PLsidZ*J5-XiFK#boi#Q6WZz(uf$91l<88s?xZ6R%YjOX*$-uiIwte$?{fw*eIS+DZ1pO-6f_7kc(rf1v?&SOG8NBd^M9<`f zgo1ht6aZTVlEQ@8&04onXZi9yW*h=A@Kn)QDk!A~j5u^drUk{vWv1sNWTJaadOEe; z)bB?cB$HL*E>aEXlcxQeT@M*$PVnv7gt@jABwKto<*ubf{wfTP^rM*I4kQ&3P=<+* zBIR83vRstih_uVXjpV$u{dQVhzOtp&)s<>6UGthw#YwMfCT8f}&&AHI=d-kIzMR8- zU{&(LCPEaloGK`oIp?i^#Q5z8Msw*tW`u61RBJ>51!g+V84GE68Hw1xh%*W<;Ay&# zTRIxo0FrZ6iuw_GOHJ~}G^e2Bu1$^Z?(=pHeLh=AaO$#h_~j!l0L9y&vkAAZ0=sSh z23@Iqbq1WL8p_oMu{K989de5Dxw#=HRkSqtI`une^2(z?k9Lw(ZVkjDuL$yD_?67X zpL!ls43OYXQVfuYM}_*f3{JMx)9|K5HoXxzNapIp(DGrMt{AV{co0QxZmW6eaH`5a z*e&#hYwJj2TmDZ(`4Kp%YRwc`?^XOyN5;ccBD7qq1{W0vRRs|iVX6*tq^se;Y73g? zu6oNIL}SfTVY(`CNdBWpGOfKUOs-2G{Y7pGTzUAk8^kGv)Zkgs`p`!r*U_CJSCTtg zecYRT!M2HIj)KN&@Vh+Mpw#7i_ab(EOyvD`uEK&75`%WHy8=wy&=y+vFXJ`46b|+A zXk-WCBg63t1*CRQ6Ty*>qt3^deIf(%#MZ_zD@>si%N6v6t$i;U8f*4s>4CMMG1yx* zi5muikJ1@-d4T%`p3JVaB=KYK`tw7^iVXn|$;Z^VI8S96c7Sj3nb+RGktsd5~N3ItzwfuDe|F(HHphSBuh}K_NCZ z*=T4((Ea5eLEKjES)D@Rq4CkjZ+T&~*Q#1f0RA7dZb9XYrug3`nW=*?kK>lRuOdXQ zDvTC3a?359DdTY&rEU70^|bF-vy7)J7>5WQlC7l%%}cc6p_~qHGaor2pd{{G2`t12 zL#85iB!OT#_16@Qzl7D#31u}!f{Ho#q4RBpgn&%G2k&`&=?6gD_dpnk;DCGA}$d&@w#4QjDHP)jig7A`x` zQqR^qtp}cWFDA*8Wrf6=*#5W&P`stf&roz|5Q=SeUTvTug@x12?BQ9|3Ut&Ch~Zh^ zH|{yJ3G8Ud9J6&ZgHf+6<$m}1O+D=&5lZ`bo5q%;DDEDm&PYa)`g}+h;40rA%2YXZ z2fPg~H$IZ`I>aGRprDmCDAohD(Y6p8nnQ?Y(K?GeFi_wAnI}5N?Ln=bbP{ z5|X2^P7u8w)ciIPMH*_kd>pTTAQt({Xs^TLZ6lIlGy!=F)jmh=iZMaF5)?wC>&nJC z6#>-86n@i5qDwHsRvLk4A2mcr>VVgmg5^P#R3s`PMeai#i!A&NREh0?FuA(7@?|EU zrXR-~&cf3Qd2-kCy&C*8S6p__?CHNq<*&Kag}4|tI)V7jCG(4%6B=->2z&FC41RZy z>|tpV&Nf46^S}eqTz;~45VOYN@1yawoPD^xY-2}O;;9IovTqkxGU9012he-Mpc^3{ zUS`rVju=&QJ`4$&;IAGihZA%^;)jtw9tT{(LpBNQGF5^z(=|!2q8-Xu(P-iLBSU!j z7-z-?N@@-f20U2!0We{T6qiYrCoL&!Ux>;16q_XR(Ex_~?)BW6q~`cs{~7uPI?*Ft zx0WXB%xR|zoC?v#(`Q$y5~J@Ifj&rQWblCW`$kucLi(PweU}z~w5~xr*g^V1%NzHS zSwk!VNY|xLsHXx7U@u?CvUM<2e>}B4RV6r4Hha)oyS{qSsFe17=_s@Oz+4lfCUu-1 z0KHV7=l#ora!oASnIg+DFF5}uFJPEe23s;dFaGYF=`LHnn(Xp~P zX|_mx>bN|@T!U)4F5w*IxuF_%cklxgm<>*mZRvXau%G|)^gO&xU$OdzhO)rjd753S2LV(eZT5l zbXv)zC0L*oj89U6X-BA610l z2gF75Xtf>Bc;#MS=F03%h4-~u*P63fCza#GJerH&*~Xxkl@~D@F$?Gr85(%8xc(w$vvys=7&KfhWt)& z@Gyy|O7gEJU@2TiWCQssWJEbb0%BQdB8DZ9V>7oB0N zyV9DsFHRlHdRrVKkd0}?S7ETzpufes8TG-rxU_8dL4mH_7H`!Qv6)mWGJG4`XMLkJ zl}+us%isb9x7@v<;tmxX%hjm1Ln_iN(Ov%QNa@}_n3 ze(pZ>`~q@CjOAST?D62MB=Dqt<;U!cka{&2n&7hckfH7!jFSA2x%(4ST!eJhJocT| z2yu1!WJx?buYC@_r=UokZZ07Y9-hE#O!r9EbCYoMz7?o?msUhhcqFQ((|>|a*@W>LY$%Hy`+7FOys==Xcik0-EI*T%8v($=*)=A^k~@FxxU>tB06d|NbN?h zlg(+3iHk{GtK=VUHj{aN#Vd=wTU`~bsUJ#SRnMK%>t z9N$J?>*HYn*Q0moplZ6#$rwC@ZF!-s&MLJwB&TOYbg!a#$4mBLaaU5&okcsg%tuuL2 z$w_u!SXtQ|E}H@crO_&!rbQ^JP}aZH?e>~Y_qni znhvHoLgZ~Uwzfh(^3ko#>&s+*%U|7_C##>zYg8aGUG;Evy4XK+4hnvq(54tXQv50F*xma*JbnU$1c(kdoO( ztbof+@x534{OF+QF$qMFRmK{~h`q}5^Y#I)vDRjDLpCsz3d$e2x1rR|h| z2lQ$Bx^n8pB89p__6EUC!bNXv(N**IHG29+vrcRht#47h!>I3}xcQ=nhy9}xSx*AzPotamnT8|(Gp@4pl*T=7lLc!g{p$Q0` z2(DigY2yljlyG`QJ1qpKpAnV?PVi>V2d&1@}3Mo)Y}TKYrW*=Nm*{N#Kk z7aE~m&A@t?dSIKyF^{2X+6sC6T8MwT2bjDg^;xkve)Fv)oqyd;8puJB-am7pM4zYd zXR0O^t7nwXox~}+i9&aweIQ)At9qx)vG;?u^o$!w1xX zmur>8YSPifiMJY^-RfB7j%ApcEwH{WD8PqdnQFK5mEwnrzVL9j|E4VNV#I?5@9?&> z6_AF5w=I3Rp0$9Y?DBdjc)X}JpmsOmJ*I2ON+PRjmzY!pr_jSu$-IEU#1~HCib(`V zmSdI8v%PG4&SH35|#P`M~-ypDUHAqlZWye$_%M*#a759hR(pPa^ z<^{J8>2BvRh~6A&<*f6(2YQeLQsb*3X-w#*9S*m80*%?V8k=h6%S_@}l(L2xX-~Am z#?V9psQVI^U|nw^FZ%pGmJeHUA}NKPw|xD++@AxsCWI9##wZX5?DFyPOJW9 zHzXYwe6vcgJ3&4hi3zsgtJ9RBh0Se5zHcoXdD>UDmYlSnfpFsKw#gPfVqvM$m0zZFBexF|-p^8$h2j%agG*}V?`xvhis-Xy)4 zAT0}nMaNlSVXPy_XS-a)M?ewv%aSEp%OJ?AICIO-EiNJtN<5S}3>3~D9KF-xh9jN- z2-4#J5c;37lH&~FX+`w(PLAhw}QTF1`fLBei6 zy8(IVkTLdwz=!d)Aq>4T?en%TU(I;ddxr|5`wI{VYeTMDO3N15L%SG7yFIT~@9;K- zOgZ(tTdI0m9$jvm>{cFCgPE4;BZ1QGF>VuG{{wn?vDpR)_^5h#eC(4vh9QD@T`(bR zWU|@HZ8Z5yL0`cvp`@LHeLkjiPYgoB5~Rhv6QVMyt&%2W1@cIKmN>suf_zWE_;1-n z*k-rWJ?V7G&l8WIsAxZ5zTSUwq$@5e^##2!K|5KEP5I(GmJ-IyjUmtSQ4EmUGqxa; z7>zt%`OcPjW$96LW?h39!g!#!Gye(EeFckA3yYvRnOh&+u^UT4=Gopht9d&N3X~!< z8w|?aYDCYle!RaNXIF>JU;tA1LSgqjx_$nJDb+XrnD20t38HqgQmCDMPc^+TqGivL z{y6()0~4>yLhg4}kGZ#ExtsV0s}647(C;S<2(FD&t& z0nT2y67uRNcW%HpwsI$!CWwTRAe6l`H@Rz+O!U^RU z=o_9f-v8rt#$+YNT+70(g^~eF+%iJ@{-u{oxsbg{aRm=Dt&*FLTI~_V$nQtw=ltcF zh~0u-AGZqPKdUe&0PpWH-Zk$$0n#+6g@us|1<@Wq2zZkb4Be>0zjf56gw(p;cK!~XACm}vKoRa`Y4fLGKa>bTC|ubW?Mdtmkd21Wqc> zF+|bq!t4JAvyA3Ug5F3_H6v1ZZ({f1T^!nm;MAHn{hG;n*iXPFSR{qBH;UZ5Da$CO zeZhXK%-uj@D4^mrxVGQese7eFqE+IDf$;f0P+M1p{oA zDhgOR&JHFB;v@?yC}Cqsj?l!U@VCu3mqbI~MvM6tTKP%;4Y!?vVu0oNDqzC6;8J*HkY~PA?#Bq#b5{52{gCTO zzfCagFy)x=S%v$X=)QQ(%Hr#`u6rm$#RE}%$$XOBOEH9ruUY$?4}PsUN27mKbLP|0 zSp?+ZZuu2w-&-7|{ot2%oSdFUkM@7m2U}kcue|GKS=fpz>A4*?nz&zV-q>Q5@R@0N zhWhlDjtx;Bf6jwEU7|OE2}L-_73~iQwPVml&E=J__cMqIQ<4Qw^5deIYuczjiC`P- zH5NQOGa)}ks!HGY!=a@_Gs&DbHA)HXILR&B62m)!<-d6kOwrqITljDuzGsU`n#YLK zMQ)=K$qT*R@FlNA|M@;7cmH>BJ}eZ3kBBCk!BiVa##$0Ob?L=i~ z7t$Ex?Spa6J6z&4s*ZRXfy00GVFA0+_S%o*zM#0r4O>pk00H%#cU&Z1A-Sr zp6kC(Ocs+jDZ)Mch7^JH*SIWn@}+Dn>sG=U2G>;EZktyVrggy3`0*y$7$t)TAhf5A z-%^9F*81!UhL$yrB+56hK<>J#(f*j~s2ZXyriaQdr0_E=p;gqh{>SX_*u%_Do}smT zv%P)yucY$wns*EeF5RQSp0yCt0g(4SQv8^$#wn}#@+-{rw`)09USXoRX{+WKc)2FQ zt+OyRj5WTs2{+oWW=H>rQ(?fdFY2qZ8A+emUux3s3!qogLFi#|0H8+1CE|N1VoSr2 zzu4S3DKl7-?>BKhy>b-oCw@3d+l||Mt~%tEhB-C&b3VmcR{10g8Lw%8iZP<11 zdqi*!_B$8Aa%HiWdhBN*MQOwT^f{v%7V`4F8nA4ld2a&a0Rzj-Flan&kmMhr!)l1Hpqsf&@a4;BLV!5ZnfLcikpe{&#op z`|bU>{Cen~?yj!tQ>TtR=b0!~WjS;dViYJSD0KO^GVh_F;Nqd6pk0v=fHSfmoF$>4 z&`hkQrB&snr72aN9WAWw%%Py(Mx|&Yf|9fe+gAE%jp3;&X%6TH=>uc2wBVpKf=fba z6F5v1k*FPPN`u{Ekm94YWLfGyK$pT|@3TZN2t~u~;5VdambipwOePq>E$jj%O>%?2kOxA+jKePn4ihq(Wrc=Js zzUS*Xt2;fqJhn&YtA4?P3N(|-V%0!Gp>lwQV#n>@kcGnP&R=%dN>mK%#UdFn4#XPM zgy^9gly=P#x9Hm^W%Y+Oz(aAxjiqhFLa zp`E68brCVOV3{yjv4?mP_8c6j2)I1|$`$Pz+un{hQZlfiicr9-hIu-8uHkvsML|vO z79(BFi`txs`u1Rq{$qPAQq7pS$M5$+gQw`GuclgYO>O5B^jS`(YIINB4=^pxXTyCy|<3eyCARjE2+y)pma)>EZI5 zHoZe_^OgKz6^PaCj~Lk&= zzpd4Wq_ci{6J~u=HB9PdRxoX#Rq|?-keN`y)9HhrKCN>6^c3O(>uT9Le^7%w?dBJ> z^GLiT(+C&r=q?(bM$>F+>u5g1gZOer7PaPjL#>3o+P;B;V8)bX1QTvqr%&$eVr@v| z%IMBYB_Psf`;GXWq*>76oCJ|>z+i(X zZNfdilxlxD`3(22^B5~C(8SCz-CGkbI{&FVQp?2+R&ei+$z{Cn3D%kv7A(rD<5%xA z*B)IaPbl6`0iJ{%Ak?~$46%ByW)Puu#XiEWz-mI~FH#uX$nE9Ac9nj5YyWF5*CJL}LYYDY4Kw&4fyi>r zq67Up++V;G>EZykR?mL%+oH}h{a7NJSZU*Q{F<`l>q5qSyy7vBy{4VJ*vb;7J8M6l zDvOUADZeJe16>f91=6)q~Mi0a2%n{*)`H?>-DU4)CWfPCZYU_H}-1s z@rKItF18|9XHVbrfbSAHPdgg&zCo&>ASSkXR3Oxc?GU?R%V)V&*#3zihLHmUzKfE0 z%|5*Y{u2h3i{O0{&jp{_K_Ji>D-y(x1eZ?0MN%SSex^mRSGiTF?ARUz;$XOLx zE7&a$!47(%gYATD7V&c@6&URSZ8R8l6{P`rOa;>u<*t*NBa#vcIiCit2d_%%09(5U z*O;dBEgVh^E%okOf=@ATvFW%7Z(&Mhv}kzywZ4$wLJLNcN-NXo_oHvH{f>EvYLVtl z4kykhD1jBiFfSzKK~Q>AT+q0q_!Cb*mQAi~LdX?!K9pQ~tU!C*;o`MWAhaT)smXLY z$**4xAd%FdO(SPJF|8m?V>b}ZG|Ka?0*HS)qO~_l0M-)XaDpbc{<;AwH)^ae4Z?fF z)`EQ8!4B~_^?#rVQs!W2z=1&$=`xdIGbYH-Qk9*R6p#>rJ}x5fVx~^)rxn6^TwO|6 z9;O+kVxw`OKBB9`ZjQW;f~Cb7NZxpdLfxBm^M(4opf#*D<}Twdb!o&!c)4^Pwa+J- z0V1>4jj?*N%SvKWZEyW$$iM8UAH0T6_KZ&8%B&8q&i?&QP-QnC?LB%){1l}P7w^ZW z$PY^PUvl!63);pfCaNY*XEMIfOz4!Te?M;>eVyGbZ^r9WK|ty))OaCRA5jw@g>|J+aBcowJ@J z_3&$=SKXvYGB?*V$K=O{f@_Gx8#=^p3M)vC=o-Uph*A(lT zU3>1u;ab1XF@77W98Z%E=WNLgR$b9%4r7>=m95T47N3vMk8k7ZzjKp$ju`dRou zr1d*BW*@>oZ0S_gikQsV_Ew_QeFA;26|%VuH6LE9L;?LN$Ixo8ZFKut``f$Bt@tu8 zu23mjDPzPSbD>T{x6VHP@WX=pg1<0xbCkQwA@PBKZSic)Vei%cp~`%_JJ|KD%gtc^ z9Hgo+ie{{8s;cue6?f@%k9CoH_N2LG0r!I7idWJ(JmFMtpX|Z=edlF-w@bH_1_$Ru zV(C}w;%|hC9EN%MKa=!*pZ(r>cm&#-ovkuA&Fm8+vE*knUHs<6(C6oR?ZggVIr5+E z9ybk^%aAMU%gtfSff@TSHe}%XO}_Z+_TcfOmGN9Ub0W7P%#bK6U&S5p=OD=Vmua>Vm1~*HWOW#nrQC&;fONZqPOHnvw33_FTW?32^8J|_c8vkg7 zUi4b51G9)QwNSTkcX3eD70|CKe^D}XFq1Y5NLHE3Dyg%Qa#g(ewqq-TniHr!wx7Go zZ@e5D|LTL{dqt(;R!|zq%WhxBbuBc?mVP<9=3~+?-o6k|gNM&I3&`=v+_?UqWwMev zO5J~V%Y5ut5v#rCNNVok^*X&f)s0KQ-z&B8Xg{!eC*vm92yHt#s2+V@G;oS+c(=#^ zd1pgAc3yB^x6wNKacm_+Tq8lNvOYmCUDH$5hH)g#eYi49jjK>$i-|eSZ452F(ZQl! zGAt5?J^yIW8E*^PuXuS zGcD8Qo3#fuLj2+yw%*x;4wvJ$NYWg9Tjd*e*S5w-;&fT+G#tFH<2Ew)bqnkG{s>NK z+S52NFJFOu_O6mvs@uP>7}pg#_eEs?&R%N9@o<~E|1)#Ux6)bwZLi9AaWt^#0%<-u z{y@A$Tz_PK^!Tz_8dNMx3OKL#CD-5*XaIFjf3J~;P% zi66z~WKe!2y*)oo%1V+`fzK9l6sh}a0W+QLL^WWVbY8kTp(WAfH^x66Hu6EdVX(nujVui8c^kHVr5YdsXMYciH zN%z_P581h}Tf5+MUHwA+{mIfv^Y#(lQnQyK_<0w$H{@D0hT=DcmT0xN#`Em?Y5sEm zGO9|E$gUTYkI=04Q*-ArNv@#i{prVX@zG0_qhl|&0I`ehYQiFR_qGfFIk5x|@U|wT ztaAq6QE}zKd&uACOn<&vE~I8TX<0h}{4{>YxwZa1zbl`L$Z%9U0L2KLCnyN@(1;7= zEd&*dTB(#?uzNcdY;~u>3-gR@dnZ+U12c}q@Mqf_#0Yi2V3Y}uNQUvJ*xQ*=qP68s zxE7wEK@Kq+@qs6jV|cg$2ui{Rel^6`$_hL=K8`qBXu=O=m07yWt0@)Q+R1MdA2a+Z zY?XWBB0qQI<`zu@+cRWrI{+H0CmO!b{mV0ubVQqjzx*WtpO-fO*u#Dbhe8Ct;Q$}cJeYso4HusW z`>$hYSKu0yq`I`cJg`?cbv8G5aItc9bswNj2TmY^-|DzPK@rfse4ypu)1Cm&pRv{e zxq_4wg-jjo*^JE`P0ZOm?ZGe4ffDf)0uJrXU5zO{?d=>~ggiy5|K1@49KW1qr>6XS zi>s|DHAqR7QrgkkoRXJ~hmC_;426=CQpDNJLg>BBn}0S3zKK#>xw?Xd*x5ZiJlH(A z*&LlM**OIT1=%^c*txh^fjd}Tyc}GOJy{)GX#Vk#e?3RW+{M({8tiKA=s@}MTw@bQ zH&;<=>X(lG{r8V?ntNLRPfrdm|9mar1=(M&uye9;u>bqnKvR*IvqGxYp5}HS8Ebn$ zW(RPaI4t0%a7c1Z_zQ)>x>MS(U!5{ zMHev_{h88j9%NkP1gQVM$skP?g7K)je$Yz5{MYSGU>CLu91bP)f7^mUFnIAMRUAoh zZv+49)`dOxUmF4^|6kkxLt_6=R{vI&|EC-Ozw1WOCz_M}AMJzf&rg|d&3iLH*yD(K z2R++o3vY%?;IF7WIcYhHQ|OJVo8M@kJ_;}o{v zYg1hh=W7=+Xo|12&W@FpB^v%=!z3WcJD8k*kCX9&{%Glr>@IQK*e*6y+0IrPpOEdR z>bvXI4|5ICZX8zik?WB4rRqBDYFJfsVTfLg$6hV@9Jel=4Y51kA5?Z@5FM&7#J%}Q ze&{}LT|7^aeBz>w>@o$ZmcU=)JB*ojDSz_;S-;xTI+ z@s)+fVKbf6bg^l_g5+XSfxQ*4UPosWortUFd*jyUPR@~R;rjCV+;3Hldbp*7Wyx;6 z0w0fCuMO%U;UrBjLU_+Hy(jSWbnjTdTa<}#{Rl|DLTj`4Q19Dt%xN=jUopr_{P7}b z!S8zMkRN4RQvZw8lp5ioMaGMvR%L{FUlI+%d@<{0Qbwn1y$+>cQ=N4kTWd2pjDzpj zKZ@Jc%f=97L_$v>NrkJR60mS2!ouEuubUsVw0yXoHT`H#N1DGXpG5zs6dY>WYjiqq z`@=0;$gR356=V_>`|))e`bWX7SGYY}!x>4H!Z#jAi*xhf8%P90`Op@sjOc}aerO?} zQp>LkkNcOty|NXm1ghKNPqU;i3so0*tvlIv%}AcH}3v!JLCCi z{dg5o-QnomY;IJfDO~4GPfG(uh?CjPbIF_aN^WCX3ZQ+Lhwz8%_UFrHvqzmsp=t*_ zj~5pd#6=|mDNz)P9-$4gp$e?`zuf!dlIwdWA1A%+e}8xgIh|5h`CfIDC`Zo8oIuR{ z+sqdju$Dy;fgGc<{;pkaQ?n7KSw=4(wpS{x~2X}AC8 z!)IW0&~iSC(ZwpT@fk%;uEKuZ|L^4P!7-Aicw<)i(W;um6{+MpgnVu2R;DBE^d&p8 zouiM_G!033j}zb32ZhSgV*XtLf@FfRHx3eoe8Wvu5!$8IX6s7)CC*t^+e8r0bCu*& z#thjhuNh^E1}6{kCW5OqPSSi)xMPDu-&cxeAAqEq;VmlB|L*}!|IwK zacnV$t+E*T+$jG1z@7TopGY(GMxbe@Am+exP)8v6Rqfdzv${aEC`n9O20NrA{ZUuM z`?fTrOU>A1pKkwqlYj@R(-@l!*+4pH?u!v*3WBoKoSZpO1myBndi8uvobbUqZ;LqY z{S3!MgIqy{O#yV4ZMn7wuiC_bg<11U!-P~UfMA0#n=9+GhMgf&`f;)aJArAFs&4PWt5t0<$d`_TXvmeR?#e7QgLZi zX5^qS)BHj|z)I$O+yVQXqRTA8VaMtBs$axJDJ08pO_>Hzn9C{ms2PB@(6;E*NA|nQ zwC{wzpaO<3Dg9UCyMFB%DIzi7hL+o{%!1RRSRznG^&k`SLblIQqw|v6Ow&$!g3sCIJ0+k-)cYStstTkho(_Gccb&eiSX>ylvJjV7uns(FV(|w$NSyw z78gK!%j(k~Wc|(?D1a@EwTxD$Qk}Z&A01(DyOH9-YDwXBSQH=<@vMBfoH69GnPS{s zX>V_Ne!4F>wU_Bdv8_jlBy{_zAP^q#blYdq1&f)**VHPOdf74zw|bQ>p_Y>3(RJ{$h97N4{ITt#c3KYa@vFI-s)J@OHh5> zc0d2vo0d7ia8#DDXlgm~na;E?zV^aVr)tE8NSpmg9&kg|Mp~S4 zehm6euGo5r#IPz;YyC%>PWQc=BUys#z=*}xqy0M`NoFuBIFmDDy}p@x9@fstz#9mj z_0xp-bO8G2cy0QV(3g{&?g%0VS-h&fwQ9RSzD+Ue@e_Vqndc9%%-fEYGW0Mr z7HircuXH>82(~ewltoTun#a7vcJFv8-0~_EkBoQc*A|9VUKSan0!&Jt|LH40tk62M-_a9oDb7kBQxvPX>J*$sr(Q?$IUvUNb6GVZTsUp^_`6jQIU%Er#74 zotURmVo|8!EmYR<8q$$-ah7xRVFYEC(22+6-DOR>lF+ImBh4gx28a}$RA8LNPUEc; zQhByQ*XiCz&IheI$2v}`;O5RzN0!r#R z6ej^0Vy(|25ppfb=P^K+20BMTdu2o*lum@v7o4$!MS~O&8MLX+_xL4* zH)2+WDIGNYnk=%p>_^*tsl<8*$z`5>2JOYBL-U~pH!M~B_?YBCWcsPFqF#uY({3*@B9~u zBibnXutkYU_GFMooX{E<*LTAEZ?l#&WAKD}tFEiNu2>Fk#znGl8<(X%*GKL-K@&ul zS;jRVZ(Y!ajReyM_!!wU0YDTx?$6iY(91uO*fcD4S}SdShkEeECznaq&ucf2cA-vH zQ(T*&dDjBkoygl`8J0P_O>X;JU~)sZKbr22LTiyUBGa*AHI042xE8iOp@>$nCUp>l zm~&(&XAMx5t-z?zNs6|C6BElEAJd+2rEC^S);S|Hi8imEWqLL)XRL;ArOUv|xyE?i zy7DiPNmB{Ewj5EPd9R3l>}&s@+*0 zVdc6=@kfN>(HyWgDwg=2FcHR1YSZM~r|U4XNHZ3@xvUl55WeE4esQNbbQ0RNIESzJ zV&LB9(FZAp(ul8Pz!2VtO41_SVv%uJAu))*)nZ18$SZ?kK|=AYY+Z|H z?&@v8RTk(xyF7H2@~1cC!j9*-C*HH4|5=J2%>A0b34KrSYS#2SX)^v(NJ&seT^Diz zBEpUhIs{8^fdMU+ibbLvwYUM=qsTnZzU}yrHx;AS%J@P@y;|WLzEL$!x z$Z$=CbT+EYaa_#EfCReid>YLu(yb;9`0E-!&?BK#F|3qiwnnR zo%kUp`6{9$*smHp9xVQTh{;g#B+A_jf|N0kZmQp8s%op^0R-8mlavrTgo?}`ZFfxl z-CZ&4ZB~7O>m6vHA#hZrw>D%1qsl_BrTMlUrGoO=Z(I2DXYGDOxL(^O@7{~Dkebi` zm=&K9;%bdBpCtGh5%uCr2jif^3Z3^k>(b!9r3VsXu32cFgQk``$eTDRQ5c@-uN${> zP%ROs@%P-55{}!Av}i6=os(oVjw32-+ z4K_7Ri0%B+R6+Zi?%d7|-qkYKuda2oFA&jf9W$ii1lH$xxs{bd@_k{c?My|TvVL)} zrW5Ru;-Cbye;!&)AvhQwMTQAv5tyV>ihwkCk(4`uQ6Q@qTnjgA#zab`j8`Xq+;WbC z(9K9P?Epzu2A!A~XE8z(V5x#fjZ&q)*u8oaMgx+owVwRdxks~*Y~5OUAf?EVWr<}` zxC^*;e)r1s(}8+i0xbi*j@pQ7-*@#&eA}>tvBDn{kB^m%gPf+zb*3#q{W9uJPrX-8 zFm7yF59Lt;*9Fw;#t~!|_Mr)h)`M5%G0rN&hdgBrLN0|(BRXO?VM*0<3^-dndX;WCzX_!azdEa((hAt#M2tmkmAr{f8K{zxiAaF*21ae?B7eivqT+q-75BECxBcgu; zOXxEPsI#eLQ$kffI!rON5U+X01|I=Z$tE#w_fwoD&(j0D!8G(E{t(!&^U65?^-;cf z5*HW>ye{E|^|JsO))=%Bs{RdHitllQwHF-gByU;>YUaX^Z@b+hsbT4=>aa(UT!s8b zV*XyTpe7K7g!-#@#L5l+YVE%^P^#OX?reub;0YIz3gy@P=I-o5 zX#yzZ^ogGb^{&A79l@_zg!(wIFN=i{8sOd})vv{(xIDEeUNx`($dr0K+Bjc^Lzpwb zEIpdbv0kiAvu-bl)!RSuOFX(}$gI0DU1NAjyOME%z<6^?*Nz!kLoi4&tkCOxD`NqF zt-uZt3x<;z2S;a~B&;BVs^T@&F{^NwWB57(A=pFW7Q8RzHzD3&LP?-jSBlkzttVHI z3WB(MLr@9cE`LVpi&{TiY;-xOGU>o`myhz<8T*3v6E+xs52ZOdbb(M`nOCB}D3{VC z&j8fi^VKelCK-ANmZBNKd>s>ZW2fpXE&)$RGMpu+}%4ul3z=f&XA3;!pfa3lC`ixGng+cYH)S1(@7@R~j#YfkTF3ffv4Y}qIMWr-T66L&>f{y!%mlm&@> z@02p&r|(bpdPQ?kS#Y1D;}#Ismk^}UBgpQ$Y`ipW+w~9iaN3~ z)R$RyKzQbow+GnpwD;demZkq)nVXm(g(>bqqT zoYX19J4SkS%k%P`x`Icuwy_uqkSpw0DQe9sQ|>T>Mykl60~1A(6kpv*vcj?Tv<1~- z4@#-7*r^v?dV65$tvexW=~%%Av)tdvwq-*=iW7d5f2CgyNFVK$(_^0B>d|^hes=$m zh5*Vn`(AYO*hbgul`gfqYY?ddU%}89`J`Bqa{l%AY+cx+6tq3t!Jc6QC?D$VtS==t z&R@qj8m5hK-uOA=$FGh?yA-Ro3j@w#(^>T9US?5rp(eKZitA82InkEPzuonE)NX$5 zV^r?nBvkn&2~~Hj`cA953HKXW4lEpK4oBXAjmMwGagK9FMY@~3cQmc*So_D{DC1z{ z$H%xYlZA-Az36Nlpq^CD5gi@e%2ebZ98pTPdArp#E>B>r||L5Bpwn0XF;`767xAuZ1yE3TW{xKp3t z_G!LLv`&C|uU$iEmsH%QQW?zuD)}jiCv^{HMWh)Wdx8TnB86`M+&T{{?xflI{cN z>0c;)FzUro@3GdUz=eQv`jbRlw?6t>E@NSvAZ2G91y1y%%UJ zSq-O+qeyj#YqApP!h<9@ZMqs^U1Th{s=g>4KyPjofJTo6MRcTN%!#s3$z|6733mVz z>l>SavCyq^tUyBQk182gxJC65VW<{J(e$J=7JIP#>_xKmQT~XA7li@bhcoNwqI;4p zG`Uziq~^!K)QAmHC2%5)yj{*_ILZnp!A4k0cS46X1{^r`E55y}F6^sZL6@S&DPs$^ zEXRx@yS>Y!FOc(JGa-3{R!Q}w(ru2GOc2H-doEE5Rm(-MgWgS4_7<#6xPd;Jvo7qS zuh)LL6#;5*xv*(;m2dIlog4esK-KvAzOihI`{YUQs1q5p1q)&AG*4J2e`IjW zcfkSIuZdjB$F5#gst7?CSwc;GXnUf%FLnD@vVE5`og2x2FV%F?{dLV%0snH%4KwR> zQbp}#R^orLNW4s7!c4BA_8FjY-YP)B+M;Rc{DDl}jH%cB1nQ-s}J%BRkl~lDxGQPwD;>&VWjaa%7ITJv^6; zGYl=eZ;a*}psQBsk8t}AEwQ8)jNPyaS*CT>!z|OHnb~!62;(UY^jt=GCDDl5MT^|z z9%tFId|Of`Rm4RraU>~LeLVMUN#!gLxs#z?o-1YLi_9fFLsY{e4o}K;{O&HCCWuZ? zRO6d`B`h04^}(I+l9M&N3}||4jdF7^3UDj%KjDLvv$n{D9 z#j50rH#Q3NYLd=71mPViwrkMTI_or;%zK|aIYwt^oNtlaSZQ+x;)$Jv>=aM=a2M)A zys(bB##7$JCEwr;MQ`*(!`HV;B*(Ji{~3N2>yCtrzdx zV-7Kgc#(#~9+n2STVJQ`Q!R-K`yK*GIir9IU|yj{YcUQo2SiyW>sCoSAJP0M_?hYV zO#kL!?`Z)@6VE5i9{9!qgB3#=ZnRqaa5j|Li`+x}01-xzdZiJDYjR@L5yW4I#1aiH zR?fvyDI-E8Qf_P%&A;(@WPyi4gxd)C%WQL8a8w)} zd1#$IoRPv$H|wmGTyPYmKRga&QlYh9UqC;3U+e%lpFyj)AX-+UDj(=Z%xly;>sHYEUalO2=H=YaKN8DCOW7Cht zoQF#zV;?lq-QcyG9iNY~pCH%Egd`8}`QN*4OMg3mB}HAX)`7mwz`{nEyqZoW6G#V8 z&-yRiiuP7c{1Ary#Bcn#fsBvZyFGZ}bw%SzE51FV^zpmx{iyrlE%F#>{(17YCpGr+ zHA<9x%_hO%MRhUFCR=v~6tvFKmE{;H+p%GP7gXB_^Ta63Rkimic zFOdb)4!3B&-qTB%V#0VK_XDeIrNv^fBXp9r;t>`mcL@Q)#liC=3}{L{6>N zXsd#t&$7J)6Ns*A(8KXGIt#86j`w9Lfjb?F7oB2rMe^s|tTZjzD1rsBV_(eINIR-Z z-zK>>E8feqvX+axofvQK8qMD9WP;Tu`k#K2)qg-A2!=c*!aJRv7s~H z4*uXeds#3oUZ9l~>;gMOUV%O)q!ZN84!P@eE3c3%SPS2gSh-TFQ_EY9zWZYLGIo!bvs)tG}y5v0p)gyQrtYS#-3=; zRpNgX-!;&JO28VNh`(T|{+2sSUP|apb+%qcFEQ7@wy$p_EdSuTsc_qlp04+$eK?;b z|7ZQZL=wma(P@YOw}O1o1Mc`Mr2@`wne~PAn7!NWTz2ABL!Kh-CuY;Ae?ByU0qE!q z?o;poMg@>yf&s!|X1Nf{m{A{Va%BAk>LjUZLkqO9hWP8U3UzFqi-;$Eh z!S%eDklO?@62?7FpbKJthb)onMHg97BQ53sk<`CM1{`cktO7W7N*w=kLx~bAAm%9= z^UeRf;XjXcu=Bvl_-qb}|NU8iqyaf!UhsZQ{%>y`2vnV;%xWN7dsP$94XZV|5SpvlB)nVo~j~t zD^mf~dhv9f@R1(^0EP0Tf2ltz0WUM%-vA3JJ^qW*_hyet1Ch{Qd=XM2BU~htDf_Yy zAE6F zE32?E%yu~gpwyus?N58!L!5o2G#)^yQ+{_upg5gteAE*U<$ZPP#{U5vnxj>+E&ITs`|~ zRf*wjm`TaJck;RH=Xo;3{F+z{T4pi60fGol3kIq&#lsoga>P7VlnZt>ae7X@_|-u9 zh;X9=I|kqWemgf{W6m-+tNpib^Pon4l_WK7345|&yV%Lg7z_+)C0U{O4MkIP1NHq-Lk7D-jL|r3%TxK>HYzVM8{jB*(A;J z8MFzgK|rx%#cgkDKoo#+8xKbP-sY7fSqeM&Z?)*knoA?Q6-9{bUv*LJq{{?;l##+V=Lho5g5Z6^w3kJ_If zf2fzy6cfBX%5oW%2N`Cvm*)H-4|s601<3k$8E4WGXXnpU?#*!NrpAGg#U}|iV751<;_j1~ zgMd(Y$scy=j^_%?HC0j%Ushpl?|dt=eNIY!--X zTy`XMGz2~YML6nRNH@EGil$Z~JSqW_FGPW=l#yv9lRwR4#ZQ}W5#0UW3THtT8fdBo zo!u|ppDxiCPqqon13!OCDy#1F>@iSPtea&3as&rfvC^N08I_e z_=6>K4~#$cwR%ziCk3|GK&7RWZdbSM z*UtoP1Fzn*s-rJS%Aaay3>KzaPr6Xgc8hZdF~uK@rj;3TeXBQVDhofW=#JYx3Z{@a zG;}UUn$pc*kMOVbKir3 zwa9HrUhD=yRI-Tf?$2WDvD{w_+x>YhR5E$2KiSpIS%kx1pFKU^qaGa-`zZ5ZL~pL= zMBfJpP_42Z{kb9QYzvU7U)~ zL$s<+a46b*7zr3bLT>b7!Ythod8}}LrC`&9}rg3XoCE#Fy zB_8V)C_pBAv^@1E7O!Dl{d*zY%*oD(W`NbQh(KbudepFv;Q<&&q3o{=v)YzsE_akB z=6g`I{c@Bta7$Ovpi5|KP%*4${ZZ5YyqJ&#(%FPX%dQqbuWtRMvhbY8#_oXxM(TSj z#QBfn$@$!Hb5AF^Gl*D2+WAAfMl_6RsI*YPXx6+`mEc?xcC>WuYghdn0SZvVXtr=O zz{{Cx1xzP0qicfI^@l|lC&%7aifjF6!e!9XXZ>dP!$rVn90KeS`{Ll%1)zLyUl0Z! zZc-eYBE(ZR6cXrZoecUSO?e2^UuNf>fO>sc*%lYF25pcOh!m2cVpufi<~F4!Cy4KB zTd8q6UGa!b^9B?<(B^kP568TBwWm>uT&Hqf9*)jC6}VtuzoZA0W4|klXy4)aJM@Td z4RepV0!e@i6JujpMblvE+s$}^HKca06G5*tGas?3IY?Ht?h(LJar0cZoi5KH5p@1? zSTp^>7Jfl{4<^J^+hz=K`gs`0ssi7n8}L$L=q%$b`R-b zJi>P}AI;<+I}`MYU>F$V0HP9OV!a9kn1L3X~m)}6BQ18iI$0d>NeSuh&jQoB2TgrGK@DpGfE^~#c#OttA zxIe7VRlhcC_E>w0YG3kLS$YJ53+P~1YD_*2ZPNw9kY;c&KJ1|HQR8;k9GiZ|t6`Hn zuv?=1EM{s2KR`8;B5zNSpnDNi=yn_6_m`hI3aMDUjUMA{`E2Z8Z8ZX9oCRLAA&T6W z_{ppCEUE~o;<|a&J_3XO#qlKe#lap~mco73xK6p75$+Z%v{KY!9i0KhNc9YuR%A=< z3$C!j=pzGiQsgOgV1C`YYu~@S?7}V&?UMIf7@6W{A$Al65nQp7jCe!OR=02fH8}vXf^h=HsX)miAgEh5jUYP_fmfiUjVrX#5<$WZe4I86@EgK>9*O|N z+1^;4v6Dg&_Hl!63dz7p=nQc~7PkQI;U&;qkoKBX)Ajmn^=+O55Ep(UsYrQm!v#U= zLo6c|!MouBLb4b?=rYO&4%-gNb|%RA_xk1P%U*)`aYo0-&<44;6hMe_5EA!&LmWr- zB?>0izzw334o|=NbddfGgyOT0qy*4nUva%2+&u^Kg4|fce{W?v9%0qUFsM|Xg=112 z@z;#Gn%i*L6$iP5czWKTcjYQa8l_evhn=D#YfuJBA<1h zU~EgLXUNEo3t`lw54NJ|M1j%52IyE7wsX~AQ^JCnagjBIf`CGR*p^w=0ukI|H>UVv zg|K4qnrRA^KT*X#P(jfS3l!@b77JLhI$Wpq5!3kKRx0??v{2+y{N+|p6D7#7 zfD8jsmARxJxUZ-26~Jz4%{Bmh)nNpY|KY^N-Tse-rjp)?7l$r>1P$fHdm?S93t=3+ zAg-`xyayPq?kBqu`*66(v_PW4NJO-yO%07Cr-seon}uZqe(pK266-~ltK4e=$R_r< z4IcsS#$%4x7EybqpEtAW)GZKEGyy!zKWCFEQ9i5Sf3ey=7>sC?RTg4XcP{ot*kJg`N-+0AEQC zyJYl9^M-dEi2wkA7yo_&()5l73WCHLf`!Y*CVA8Oh)Yl`=B1#FHtyS{>nE=M6%&9< zV+q*wcKEAQ*f*pFx)cyVtdw7wYuxsyD=f!y>g;bQEW)BmR>SKdfO~`q@&!bA)yYeS z?7N#68k4 z8L}$cpS%|k?`b;GZi7+TkQ_~hu!<_h2d|;YZ0&l`aBe67yC{PHEea}>W|4%`g|6Jc z3z;(-iye-Q!gwEMv5@nen4Us{R3uUv%l#xX7%qmUT)NS7{}|W8p-TZ}!!$24s4{nx zQ9px`LOrk@P^&fp#OqgZ>!JWxC|HCoLE2NS`?$izK3d(iD~&Fe%pT); zlE{ZanauM+mLn%FG;r1w?B9X-R8qg>I9Xk}$`5?p#pihxnh<9BzM(NZm$S<6D?PBf z=4(XQa7v^BVeer%5D_jFDUsY8_hLHVU^+%V8VXPhO&HA@20YeEuWCdpzl1EF_puZK zNCLBB$^xH(1hzG?ixT)9)aexw8y2CytJ*jQ2%)ow$X?GP!~DRWi=3E4$<7liI0C=r z`x~+0qSJAJZJXP3g#t$>A_3^35YX{&?6tdh)PNqYp|i+hBf`P5EV4Tp8UQIgS_oG?`RNBQ#bnOCH6efHIq-DlZlK?}^{@Mp561tSFo`MXh zsjx&(R}cElNarz3>S-mTYXn_)RLfxR^Tb3#OvrLbs4O7n3^|fBA8c8DmV5lrrjMR72(E#0 zdrHn81nM#`;{~~)G=;h2Q+4fNA%5Um_iLa+Hj|bJp^0vaio$@A3CNT68=?>mml>}Gp>Do=`2+2?&nOw&VWr_OKl|<6rsnXu5;?+l$#Duu+ z{_Yhs^Fk zmKQ#`UGrgG!I}ySEE(3|3%nu(ReK+>_@+U;?CZCki`jyZKSsE2LD<b(@|}*~dsp{oNpMUVflV>dZKO%IKU(W3BW_29k0YD%qCQ+l{%XLh z<-*+4-0w2d%QvamL6y@jPKDIPZZ}yCZs@Q!sno6fvC#*Zf_xL683ojymBL4s4cjGo zbWF|z03(~G)%y}kPm`e`W9@rB#&xR2Bs(Q?In@!U)V848TfWb9-cy5p3fsztWzU*-Ju{0qTWSI zz;_H%hA__GKLA)~LMkxCpOFocIY_HYqX4|g2=*JNoVqIX16XT=ZTCmax(v}-_z08od7u~YEXMcpPfWcC zP~OBsw_v=HoX!v$<~Oi(z`f}pWZI1-;_^H(@_M@6W<_|DE9NHyYf1Rcds=-9Xqw`M zSbh5!Z43gwnNOD@A){;aW^c}zSo@!k@Dr$@b50l`fAmcMlfX!-Db0TVN}h5IxO0<; zF$TabO00m9Gd1~mf&V@u_2r4?_4wZ>@ZzJxbMTWSZgGI%pv|u@6+qzefEcu}i`z2p zQyVfd~05}eY5^C4H=id zA3)W<0)cLdSAxj5Zrj=Bx>@ zpS87fAq7r2X{5{j1-4Zc0f|M+w?FI^fi?X7ei0-|Odu!6fG>bt1i8p%jGotiUgI;b zZR!ZyVqH8^av}I97$TLyWge}VBQhsO2-pA;yUHL^uZ_fN;71-Lr7oj_a(9>e3~?m< z6*SH)byF|EYAw03&p(i7M$%K#{%}C>c2-WhfU?o3vUN=WOPz(n_asO{;;a<#TGe~s z%I3AVvwQ(bo&osQ-5=*+&c1$qk0pHHZK+eF-^(aljAp$C7^knv^``Y+B5{=H%#?qWTg)VYwS|z-Apf!t z3`PZ}Ap?hz5YkV9iLi$i0w8>Xw?Lu99Ke-N$_`wf9&Vih?(sW7U%yC4V4ML2W4iL~ zv^6ZC2@=c(0ADfyKb2~=YurqAudr|0EkYirG@@oa0}>h=fNFqqqVYh8B8R`yDmKGERkx)cL{hjkX``N$!zTdak`u_UX znq|Vw&3#|j8OL#)#}&=U`y%>W>)|G}*y`~=0ocq=VvxEau70hmK@9eQ@vos0ED;u_ z&J3VGd0@T!OZeTr+8CMz;xKjcq}7`u{{Vz{ZwBux#p_1oPAezU+J+_FJzm4VPxctb z&E5|j^-*XIetG^!(A1?qY6pbu^)qS~NVzpY_C$ytG59U&Fkk8}CiqyAd^HUiHKO5x zQ8#qmKq4I%%**DF_4)v&s2YJs;$nJ=9iL)8zBG&-|Jp2yAZ<7xdrEMd25^^d$||Tl zZ#=mv^8;3`2J!Y(WqM|q4_XYy>D;!NQ+j-!WavtuRGh2p?|F=gRGff7osF-ytyrT# zQs1Q2m0|S{Z1J;H*W9|kW$`zTUV8oPPLRaORtbgO&HJzgfi9RCr^&yPQ$XVCaj|nR zyg(|a`8LoFPsqIK_A`Jkxf=f@^3W=p!57_hX1^0qLe^)C(2MHq`Sp>h-R-8-X+x(!5MfP{>tta2zzbSkQX3^qMBpXtCc_vYz1=^0 zD_2w>@w^{^Y~UVf?aiCPF7ggW?;3*(j0IR$?&=74e_UN|nKW)#{6{LUr|rr6Rh-fx zpS0oE`=0VRC4*Vtx3@pkNdshAE|*?lBHRv4Q(ue1lB7fHrHK~EZv7uI%az;ZKbB?tW}Exw>OLH-dAi$iU7$3-LrH?N%OxoB-A0G+=TEd^E)FX`p}XGqo)jfu zf!<+^xkL2$0cADc^jQ+8-Gyf74-k-b(ln>$PZC6Pj=rqOz7ul_bt&)FtdluWR+2AY z4-bNXkRQi?y(VqZ>2b|_xL1u9QX~tz-jBqhpD7h+$$ZPuU3*ixeVkqqB~(l_Fl2Whj@^l-}xwDS?C z8gIi7yP%J%g~0^U&7&@VoF2X^ zFi8_{PTMm+s-&VYcxIw5Ua&N#h|@%1g-A1rn>zvO=k2q1E7^MQ zeh~Daeb(vkU%eApM~Bqhh<>G;@8(2F?edOV`yO>t@G@2>^)ng}%foe>MTyK>fwq|` z&rw6m2k0{%0##?tQXY)?90IUvB;`@vcD-9&;Qmg>;jEmtdn`XhvD74s%YWx81gvMo zd9%S#NLR$n9uyFU6cL-e^p@^F01THTac~Jo^HT{OCyHwh7d!q5k^05sMXV_N z$ERhg4QNN-9*CekQ$A2Le=VY)`tk-IxKvo0OW7-+~x6EQ#J2PxNNin`@*KGCPKtubSVk| z(WMPn5F)#0No{86Rxeh#M;S||9R*2zOqbq)`KP%MKbb+xWHlr9Z#`$#FH-@cMa1xv}k!OYIcZoSl? z7$-4$*7FG;p+0qIGW)XqIKAmPKUFv52%mYBG%fMl0{`_`xl`7uXxY9+LepoqrnPR? zVz>8y`mdp=Ot0E{E%~o)fmmYiY4#~sR7u(wRuN8*C(HhJmyCE|{PXx& z*>bjF?{`dmR6~hi?0k$U_Zfn)PD?d!0J|mgiMS2z#fS{E^-@lt)pq$QK2ibVW4?Hb zq=t6aPWRHr^Czs=ey8{94Ed4KsbXWBKDK$9_6evV&FR&t+4Ha`uegrLW(uw~S6fw}0e+FAu|P{B(LL^Hd#D}e3ZLVJk=;&E0NK;?3qI_U~L77^baqv+7? z91np!w4q%DiI_XE9fSrwOL(`h*?D}0O2X9L+9<*1KAM=$mMzBJ|9z{=2+z#XCNq6l zVk^CgWfY|olKotx0@}B4I1qS<2p5$2R>_{knj!14=nQsH-G08D+PD~zoa}L)${%fA z0r`jE=erSD+jilG{{6rlJAJd{eC{63e9yOgSRk*#F@ELB_&WFfZ9LZkhjRi+(mEk& ze@k{SN7#PuRdwEXsOrYLURiU6(Gz5(yCb*bc9)*M#WX_YMCy8FEwV=%?=#GnHn(9e zqyRy8Wo`rT)RsGz1!RSHNaQMi6)}aSzG%GvV@?zb(?AO~#3X9vg-7gaOXSBHoe!X` zpxgcwDSl;yzcHboZvpZ$eK?9unRGD$gefNX)%YA-Y;YUx9=d4c-b|_lfV(a#o(t-T!xBVY`(J@QXQ|nZ zh_N;a2a|Y4mwWP1y*f}LlH0v`(cbaa~zHicio99n=D=o?lBRy=8S5`IGb|*P&0wI+yvhdcENsC$?9paD_Zn_4H4p zOdz4kJo&pLSn6VE7Z6kprDN!XyAyu*unzuFnq?_!(gJ^l!dNkPrCyv1xaDMj9Dp3- zB!~I4J|qL`h7olOVXv6GT<@)9Mj4l)<;R8>@#|3^0T9>XF0p)Zk7~pYOcI_2{)~)Y z0M~+TZT#v?U=jsPk3J8C)Mdn7dwzNlPjd94@mc?E!>WKycKWcj3?5yL3w`ZTUgxqz z>juLnT48>B)*bh7(SAJNah-7-5>rPTR;oG5-ge=hIXP3{7K@Zh?+1pyDoB3qScir+ zGt`5}6?7l=pr5m)az`OoYD}+q>AdhXnvgSPMwx@$z<1n2YIF;~ej6GDt+spGt}VC2 z90*w`K9jUE&1xE*s`)K3XPr-?nB^Jn`Mv3WvDkg%mG8fnUIrmFK4q^dHYbw;9UhTl zaiL{i0eUd5)i!3E&TIGhStu(0zYq|MjW#*G$t zDT@>Z6cz19$(xa6&BO#VKhBaQ@Y8CkOp#powiA3{F7^Nb+D?reAvv&&5O7M>75l9G z+|i8XU2S-YIp5Y@O3fRL5rh(#tOtlESLchZVk&SlJ%4H#tka&AY7R{U$R6X(DXt@| zrWdN#A=UB=Iy{#ylW(hM)4xTGGuz*O5m3>ykiL8CEoF0;tCQ(n^PML%mc{$Sib_{> zSlR`vEdDZnfD$i-61O?4WhJQD;+2!Et62dyMcY3=tWYa5fIm(X-Wrf{PH~k0U4mjw zlA?V<+>%zr&G-R#a-WjM&&(){i~Q>g62-3Cub=vTnY(jMrqL{_~LBg+4efJv#e z%Oxn)Cyy09Uo!Z`bljfiCd*<0l<jrd4RHvW8WW(-9+Hgmy@==zl+=->J^soF=UV=14-8Rkn2K=?-8b|`c%anGH{;<44NkR@=*LRn=4850{=Nal@CHh;dqMwVVnBHL zkN?f(f1ocuTDWCO3a{e+{*E*(ZiOUnw1#vN*WarF-=PO@Qda!J=Lc4oc#GZmdOfyd z_dQn^f(?OW4}? zKVK;-2xz!EcK{bZ0BNNv1X!mG>ASy!X@o37gMWRS^#n_pq1varCf0Oz@J1<^9C#-o zlE0L|{}#1k|2>!79~2N8s1qR7&)&s?Ss)0I70-h)U-j{;KV9Zs8pk$(q|Aa^QX5dO zOCR61y48_T{}t8LVR2=ko@3?3|M|kukcVDqf>;aSnkH7ylL?GGM~n)#^E31f^nIT5Msd6#A>qv?hg5pl6j*-=37|`CpcdA5PJzc$KaDpcYfg0<<==_r>}o7{>6#Q(w#yXql5Ahmn%V&&OqT$blpVXV>Ug z(uWCH4y0lBW)NiyZgnE^Hx!~|T4(>)50FoQiNUxqv`uQ{iKrC@e&U&TC`m+82}0Ly zIlk%n@{}ypziCA9=^)5Xt;6cb&{BEKjGzeeMa*)yRTl()s+`n3?!q8Ju)^nfSe2y17 z&HH17E>LEkJ7~ioq96=aXqOuQrDjq*p*+{=kmz?mUMt;hF# z1`^vSU%753fxryPal4_0!*MD6zie4Np$b!l;8JSbXeW1-not2pei?u;*WMQ(%}v*K zF zKuqq_4g1$!R4w{MU=jD}nqYfhB%6jML&%8fH+Mpm1P8yngd&(qEOMsDl>u| zsGK~Z2O7e5`brp)-fDAb_X!l)jMOa_zGoVk@XR{3-MWWF-yb}D`Sr604?p8WHw*K> zfL}GP0(MvcE{lSrd^os9U)CfgfB_yIJB*^fnL;8q*R43Ga85=q%fyMN9rD^|#&*!q zQ57jwha-O{Lp=Fo-NW(Eq6_0?n`m{r^#cS^0@^#!T;8-*xfK{41YR+7s}4XO9{_}& zb@R-8F)dO^(;wuLE|TBHCs9(}ahtF+QUFw+ntMR=gAIoabr&lX*EcX$vLNvtps`0% z@vY@``XgH>K!Up9J2^ko`_`-7l^}shc9}PnZh`z7KTC{b6NvMM;4@q=ip-dKX*MF? zLOXl*xjrmt(+|Dr>P;PI+>P&g45N+aqi?|3q z)e4pLrmwi|(oLa1(72LU>xBIJW+?F?-wP>xHAnq^HJ5`}0hLhBC3=Cc4 z{B+B5;Pw`8$g4BBW*6NliSb#nip%O-pjQ+P03_bZS+5R= z*dhRS3zJbK%Z1B*_1YZNNE9cy0O$0qYys%bXzM%zOo7_EQp&Eqs!4DF5$cA;fJPu~?1wCyQa4 zgs@cPcvD((t0)3_rkNw#?2^&Ts8fpWBFaX5OY~HwF4z4b(p}m@sr9BJriqIh{T!i= z3tHl9ND2coud}rU;Qmy@oYStg#W$*a42pswTIXt(m^)vw0|3K(Ph1Shi92@GY`+u4 z3mTnW>j>>9u%cF?Fv|xR>T=`tu$T`NNH%1y%8t+ji#tf+Cty9)0FUq3C4?$AG`T+d z6JUOKh(dmCl_Z;(@wPFrTKs@v1_59F)Vx6_wQgX})?@!&WoYjsv1V|MDHs=2&Yu@m zsE9hOc-2n#&`QZjx;NAreAuX-Za%v2TIH`CC5=H5=E$H7l%8}Q#&>25&17KEl3n0#lvy4F%QPe<(23lo0MF@W~t@IaAT$uPcbliG**nL?!^gltV;n z7iBeEh$BAx*iZ{ZKHOlBRKLB3ZOpreCSO~S%s>k5(c6u&pL*)&MpPRNv)g$NQ&_}D zTjJZ4?k234Y#>vJc4M~;QQ;b8N;or$S09m`5Ii@Q)Ck?9#3N{VPjb|T z`a0-?M7PhQ6xwP@#aD6K;r&PD35s7}lgT=TqKQ#_k32Er4HSe3zUS@%X0>#PwpVLq z$P?s@H4qSG1Y32y@tmED7$&QAyf#+(I$PGT|2tFjuY_F0j6{o532MVa(5yHo%u$Dx zMiKltZU9JOUUG-x1MLlKd^;OO=FG=M7gvFtYRWAF)rxRq9EO-`LzptU8|DJIP<077 z@7<;1uc58hNTd8EHJaIQlF%Q8-WZ$7%dY-LIG!5LLvhdW?};Lf`Cm+57VJs;0@bIzMv1ui zDBlHmCTA7K;{IJz#L3}MDY@T)b}Bji(*yr?f*;^PwE>W3leAH%MYr!-6!n5HI$dZm z4$p;@a8=#_dC7;8m^!YzK1UOE+0$~5Xjb%5FGcr`8JdpShIv`N0h!HZq%5>~9Y?U< zzA)fX{k;)8k)E*@cp`|$(rxuikT~ks$*eXk1_X>6nTsU)X3Gl$c`XN2&C=`CX=MVv zn`lFxoIxnuBPJW=y+ERzl)ERybEbL_Jt}RC^HtITPKgMshTSwwNPXq+HVg_^ZhXX_ z)5GL3_963vSb1k)CLhX&!bcH;DEl6tY||IiEcB2}4|JATdw!;~v z>txj&(WDH#bNIqu+IHeS`Xm9MW<6)`)t-dffyp3%PM-8-{a*R=dv=lB6bu@&lVy~#Rhg#Z2fiyX*5Dx-}zL38VtiRiY3KRmY!aQkJ=Rdy5d~@ZnNBE<~=)shJw1 z^UB4FsI)g0^S}9I4^=Vt^PZB82J*4Aw}zqDNM|j4GY-7rk z+lbm+`-*&dSZ%>L;YU6CxLRLH>xXZS^+;_`BCif~kF8tkWS0&&q^RrQ*?6$3jWpR;ceYxJzU%7l$jaPSo>AFbrE7}SOaC5W3X7%SQWvWY|M57N!&U*O z@k$Q^36+)meB|TzFjl|~0)~1kH2w}~4n+M#jJ~lrNZ0b1nbw(o1e#K8+QXD1)R-X| z#sVrq#)mjZ*&T7LFtj*xN@_X6w)j)%%o>1~JIm2+5^b+o7++(>7IheTX@*lg;prrt zYGaSz*6UFkFri;#WAMbC#Rc?%JAyew6v98!&OS_Dip^!$W`^R25ScgqPnNZ#?-dz} z`QEG7id>ZhCz_XeMA1(O8p*IE1QjvWuh5T`_pi7Ky1xyLM?Jf2dmZO;X&2C*V()TI zq>WY$k#|4O)t9v6ouqB?ZmDJLa)k?kRo#*zeiM24B;ajaG2f!XTw(Sie8Bp|w@rn` zmGn`<3b1NUeXj__-pk)Wpg#$j&H!jj`IMxOEk+vUv;-4tL^H|rZR_NHY$!kIbIJQk zK21N)9$SsNP4taCa=c;&xNW%bc@}WlJLb0SE(rUkW~UH-?!( z3SR|u;MXg)GMwbxY6zt!U#sbVb^6+(NN?JkL_JO*lqG$zR%)Pl z-TnrV{d@yy+7fhZ@4bheyxJ`vOkpxEDOlIH0ns^E1GMV(GhZbs-QffWz*VH}hX^WDR9DvlXo;>yiof{JNM<4q?-;d?si{1q;2s zX;bNObq~Cr-YnPPL|o1aJ>d2-Fxd*p$r4wHPx8@ z2B_Ffo^=pP5_{eWUo98Zc7;kDiu1FqMZ02DS|6Y;_Km40*YsE?!|VKrOm>hs{e2fX zWx~;>JoRbokAY&=l=|Rjup8S2$%rouf17%4PaRLS7RD1W+V5aH2ggJg`h*yv~5`>yux z15)npwEhHMf199po>Tc{pC4n##>|LMg6}5IZSfh&OM@3ey=ZhlTjJzCL2eu9?9)< z%XSg-QMS!l;Oy83_wLBk-tYermN{_h?k-?a#Q;AP%ov%gPjZ``IT?soscR$R>L9%5 zc;22y{!BF!(3s8!yy%ows~in#w8JPncNi!ld_4;jkqpk@`-ZSpP_x&eP5qdF% zsDuK?UYRrdDRp0f49NfJx>&1<1Uj=2BYgru3WZ6SIDg%BNEBgpIAPBGPLAhmbnJOV zSg4D2njRFtJqTKYrENav#BW@aqk;4Ng^laOp(AS;oJ52LIz$$B|D)naNyB;cy+7j_ zt(Kggbi*c=JKnr%maDo7r{+j$ENa~LYzI?V8hY^PVg~GEmy&Y#kwQx+EYb9=wCVkiQU|_+ zU|{Nz{VdJHb-%cct59MWU#=U%PLYF5)Za$nIaOL{N6x(WXAq->SG={{Y1(lLV8;#^ zSNNowOXa-(1?BAdL=J<&M+WBRSaD8;R3k$xA=(qXwK;5ySgRdegeGZbZOSqY)H@gq z&)>s{-GPBpDV;T~soR>~2E)A~Da|REXDE!g)xV%eK(*0rWMTBFdj_-~HWmSWMK3BV z`kx#cs2CBAlmKASzl`bakK2{r9}N~5!{BFs(9f_49A9|#aeF^>GuMVAx_u8VCGD; zWT+i2(>q@~Pz5bbrP)qA+6s{lo4hcSGSxybcYqP|!h z^-)Cl(M>?V2%HC!ncQ&i2zLoX7c;0Yy#NBCCAcd6?<6)P^+@-nV35}KNTL+5ux-0k z4<#AFI=Nlo0l{T|7PN1J#e9fV$op&zCqgmA*~ak)Yf9A@^gFcc(@A65L;WiwEMf9k z#sf869f5_vhBHE2#bTVV_LuxADSq_n`q=mqq^Hjz5N7|Fk~K!+ngIGnKGPSg@@d%a zO&*tvQP?eXkgECEYrLDv37Oe919`%5_}9>(5)6lt1;=FuNKdlly{rP=s9D0GaVcv& zDU{NrH|RU3(8+<{cRw=^urliZJo+pVWOCjl?Ac2K^qX=md+klc{u516k30AtEzmrj zPjoty^)m~J4J?x+F37(E3uBlD-q-WV`NlmWPE&)9qtsr)-Io@}um0FbFD;OCm|%{k z`P2(=TMs2l_km%DR4?Gg_Hw;z8#aEbqZT9kI17cbTjAo9hkfzZS?t3x2zM*~V{!P4W4}2AjdLYglsO z^$vAQ9QMA)sWacly(0=xI_@NM8I>&ZjG)G^TDcrejC?{0cq}}=mq{#1wab+AjV}*+ zbenpmD%%!%l$`t9Gs#ebh?V(IH9H&50+p&j#YJn`kF}z=PL`qSAO7NlVW!we$s=pS zU>N@~sYy4hFt?Hh{bEoWN8g`bF_?O3)dVoX+N1aMC})XaTUb*(a`pwO)aa9S`&5E_ z0HSBE#sK=RJXjdKbJ8H0S7d z3qf{YyggYG{X!Ol;}RhDthLTL$>z5hlV{BR%(v-+ci~dbClW@>R#e{o^V{|Q?G`ns zZv@%eX0q;DL|lC~mQ{3rHP8JIhg-Z)?N)CN=}9A+KT3@k8gzzOkNKz+F~-d>k^R&B zsj6X=NakTxD!$8%=}?V{W2}Gscr%sWqmQfs`i|If5l$`X{m3Sl`AfcU)yMSqRlQHl zv8W$8sm{_{o8!H;Vnj?ue`?KragXAPiL5$vj=MbpXYgpVl|&w??>u}YIjfhS0r;Fb zP*+^W)`_A!`Fe(=SZ|L_P&Z_QU`4D&!;X{ug0NL&#ULL(y*p+-j{dnLv!713#w2E} zI7_*>Hb)|>Z^@ITsJ?5busXtAXJ1w1LQ92@j=Tx?Lwtunx(V0u&;h@%LQ4_1Gl^rIgaQdh2l-?;1)aXCcO_TnYT@(6|nc z+aQKh$&9HQPx=U*z8^S;xr5wY?`A|SoV$>lrR+1~TuJoocs>4eNIcCOi*LE32U`Fq zG<}ma@PJtXA6~O^!5n8V*3b+7_)Nid#{Mib)Ve>=7Au)rx9ZEHIADsv^Tv-k11#{y zjf<$PmYb0F(Mo4amcD_dswQ);a!jmuxFMFWuh)2ra5PHCxf*_60lo4$M}VvaIX2BBnn8+D+*kP%5Z>07+*41O^FaZ7CkNxGcW$P9yk>`UDRaa-Vb6 zCRCfgt2I*E2Wjr5ujY1T&Xyw2U)M!Ew*`J zb^AjwFUbRQTj@YKN;>FN;mR!D15ne!Rk4SZvJ+<#6+VDfHl1nPdU!)Rj$G}uw$U}# zkoFuvemYd(6L+)CkgUkSAO zD`pSYKUv@WP0;;Wzx=et8GqN&DJolAO>&>x(mG^_4)^QXn|a<}mSwmPlcaXh&ASaN zlg#Nv94euA3_Z zWZA{7kY&(PJKO1JDW$<&oW^>?YKlj#&*z2d$eYC7%(RZoJA@@V`7e6zpnD~(T&E4B zdB@50>}y~kh_aw-Xnjx)g;o4HhwiOUf_O>*tCz3XuCsUxCn+=xw2W5B;GQyeFZjj_ zw4^{(k>b{lI358Lnu~yx5goK?n;_@>%BGoIG-JuRtL^EX|8gH$C)a+A26p&oJ9wn@ z{`&dlxhqT)Uq91fLLUwWKN8ItUe`0%ihM;M`{i7p;c8}V;Mwf?0FzI9^GC7bA~9tT zWj&}aT0>qY*dF;tWEXowZg2^Jh3hr3`6lJl;u&!Ai9~Zqz4a`vN#eK3gv+m_-&n<0 zpf}%xyN$Vd=E~*75rjB)B&B}F;+g(XmEbv(Gl^1^6bFol6@qWgYHT0J=W2edxrm`( zk1D&C^YO;5gf^^}sdAgv{*yXB|I2SD#XSsQ{NS71>xOs2lMi+F2B%gb{rI|PEOyta zs?^bNXoXU#aPV%`8w0}dgDdm`z5BHn39@Tnfxylq_}#y`;U0cZj8;;HODNI;$ATz&B+aXuf2siVC=O{wj1lDox=R#b}sAtas; zIGq8^amW~h@^7>N9J9xXX8OU?goH{Ad+@;+4ppnB=!byq7#j~`>~O$jOy0=41mIpBLkxWP_woE zJb|(AG~XcC6d7Ejck4goRYI_`f?Qkb>ST*9B0e>vz_sNl=Y+~8RNWW#$Fg&IhTe$Q>XE=`%cGuPXMHihaar8aDHNaTVmWp)v(Z(@a6N;v z(qdzw)$LV~mb!ZTlgQ>_0MAFm=d(Sbx1LXM+F~1Zk6xkhIqP#`6?YjEsLHW7^o%d; zx&_5>98QME-)@`29V*xnDuI^OH<4_U+MI)Q zOBg*tvqrusIaSOp{(h?$Y<$SaJfSyedK21{^&|@+YDVp}qkWdfdLY3A3fYP`p>x1m zJd2mHy2yANu%GMizK9bC^X=2e%N!%@i!X{`eC2E0>!IlmEoFZ`nkPEa$5xOyp_wDZ z;z;LF@R8q1ZPrU^7rDAy+c+!Hws?w=frRP3_=pvrdwDlqPKJ;}Wy)0VnN<`}}P5 za)I$EUrEYf7n+0-@_3FXx#(C+feYDbu`1%_x$4oO)X8wZ(|pFd&c~*Ae zE}-&i_U)}^Go3lBMVUiw?lb#_W{>y1W3Eu5VC?Xw-E#0XzI#q~@8_AxV~-AgKsyt_g4Pq*ZHt+;nizV{McYyu0U*3n^5NB!%8ipxsPTcM6L_{`mFmCNwBJnjEcJ9}M7l&_3zMvr_Y0jjJXom1_05 zca5T!<>smE(fBe)5?8{zOe@TJVjd(BpS3m1qc~RHXO_VnQDhtinRCX*4whVTMMarU zKAzPH=27HOQoJ9EM#Ysjq6JRz-?u&pllX9C_x(t4lBxB(wEN=c-JRXfe!lkh&km(m z@OwhC=Qqe$T%?nm^kwh<_NL7CAtMu)lF}6w742BD;C#T#%X_|fV{T)k^uT+2YIHQg z^(D7L0H>M1%E7J=StGhdzFWCEl0Pm?YPqLyIb-)_PRENg#IJ5?d=ezGu%(vkR=~ut zwUZ!m<(RWjMK9jPYID}!4hgxMB5lPvK~bZTk@(^q9=)cfX4S)oEGg22gmhL0lwbHH z&FCXs3g@xB8k&XEeWba-#p>91Z z;weqGKQUr8Q|J;To!(MVSm<;RlY)^AVDI1;yK(XGzI%_~mt_t--P*>TogIhuj1~)b zccGz@Uj3UhN-8)@O$!SPn9mQHWJ9)E%FD`>D5VdnYh%w;6$>&dF>gP=@{^O3ufIz@cRzi4okub@Hr7GGK;4P{c66zytczaS zCPx!b>nhrL>xKWk*H>Z@T2?#XT+;2y$<&kM{gPhutF;3%;Z&p%Rm|36g+Cn}9o5M2 zK3<^?h%6^k%-?D_o{Vc5#ZZ%6!qaK_p}=$Q+_`UEJG1un-+P`S$Pod--nNQixByeBF{|Plx!YsRKklt_$4v&$pl#+F7;766RGWY55 zayBTV(MYR=P)){wk6qSkv}?jh7|n<<){bGi80FP_h*~-;Vv`URMH0Z%K}lnU(tmhJ z$weLX_1q;I2eO4Tcx~;ov$Kzji;ETQGv0+>Hem~HZE4}Wq)(Io?rUPJqP~9Svk(+} zb^yUK4C5##E^lCZ_VP(&_2xQXx^i&B%RWjK!+Cp<(&n`ksBIdeAC8pM|_jcgok-OG`^UCPv=R zR8&-&EyUS5jY{m#mwrUtn>w4P>`xb~q`&^75{<_?No=ekI$!oXN*@w6r(0w6yYYnuge$$PH}$+hiqWW~g?h_e68{vqofC1tZ_PC&I?Q(ej*UA!Cl zGnH}CjhdP|Zd9@M_V0#<(*(@iAf-B*n%qa<>Xq6%V z!8B%O)W!t_tZGRY)OSzfi)_6j7x*XJi6{t~huE5oJm)7yE$-zxUsqww%*@>MnM?UR zDgR-`L89zXdSPj4`XT;NXwTy1$gjaakH+}lFZrE6aJYS2^VP9D;|~J!PV-Y*=>UAI zd(`>6xlBwfgr(qKPHp?q9S{?Q=0k}7}Y`u-lp2tDqgYbV1myq3;uWKWOYPEThp+eMS7*=7Wi z2gxAKe4-6g30PL<=2U-NW`E4QvwUGQ;^f++gkT~>c=u(5JiUxH%X4h*c!_RnZ=YO{ zx%>N6?Z*cjrHAjbuOA1xx?ayNsb)#enIFp>N9I0_S^6MJv$tj&>h3l1=Sm^dj|*av z4v$9oIC%rSLqcl5P7=~~?tZ76%g)GPuL<$@pRkOsRf{SOZ`gZPzYSqDV$FI=g-@PE{A{465cV#w>Ymv9g=;G>nn5vCsIMi7B6=Aj5uwxv9e~s*atS8m+!#X=ynO<_4KyJDXQvcn+HKing{k zZjNhNdWF&f#f4(G0q(n1=D)vg@FbU9VxIH_PAupVGi9Kqt(tmr>(+0)&aSRZnV=&@ zHD{p<8J?TNLn7^-n>Vn21j4_8J34QXrMJ2EB3c~=doqBb9`KI`-$Y&T4z7-jjGWVm zUwLr7JpI)NK!(c~pZdMBGkIo_6w3BWdO55L^k~<9yMjEhVzsrFaM`PEfLs=}*cD{x z2&V%zMKMWuDt?@AEFTjAuZQ$6PMvr4K=pJtqsFwgD!McAEF$~jErT16G%7*;JG8cV zeAQ`lXTFi}869ZAx{gG?yre+Xe|`$OY@SBDu}jYSaClk5dIRKFpl;#uT}f7FbR-}g z8D;*cp27W4I^=ZjfSlNb^gkgE1R#&u8F;w@7M!( zpd3N-u0Z{vG>-Tkwy%`ePFeTF?zGwrk)Qpe_X9d0o9CND=_>7aezpYWwZb~JID^ZX zF2WwH9a?YY*wUN&L;bG%~wu7+ESf0L<>{PY!8$}c|)8NCRBId?mn77WP zFuq3K&#%KO*=^BmJCfx#%_ z>&sTJrR7j+$22a|6t2L*h3sldfnf7j7Y7*vV z(ud4U6AzeqG4i1rzW}+kX4g-@RJ-rtHnDNzK>;+nFI7`&Z#}8`kP3+Z@qevSs`Xg;w0(syGS7Gq|{ zkfuC4J-CK7A~n26%HT9T#m#e{vEvY1 zN?cW_h8ii~uM4Vo0r(s~-hY<~+vVD693|g-^!%HyAiu{ZVL5t-pK4Fy8}8B>zB&cm zi=S)UpBgN_wC9@~(|;wdW(c!}+T*{9Q$JemI`g^jdVwSl7?w2UF(DZXub7$yuC1*N z!%dyfAK(o;dsy_g0EiuRaztLUlK=VXG3h5H_Xj|b zQZ<%z<^2_VGlka*p|K^Uv*p1m379k`9E$S$Zl~4VSI%je@9jq?)6~45gjs_kdQemI zl2)mizyZ1%vk?A2dY|!JUH$mc(TBCcZj9D(7~iGoPjGN>28@N@-!)xQi?dT;9tR78 zhs(v`E0J+gS<={vkg`Xq>a>+K{6i(c((r*7a5}O?^travv(kosH&i4q()8FH5t5Xm zB1C|_!|8|SZ0Fb@DQq&@ILH6tJl4;mEp`5yhPcTQIN*S*by9p{bqc^HV7Kojf zN-Jx0wI@6_nkC3Bky_RvmhL{CwCx+uSd^K6Fk7fG_N-`mmy)G~@zCOZnRMJ1Ckoep zY>@fhKgXpz`wjTfozK~jm(SwWS9daVRBPU_5wlTaoM%m(`aPSb(tZ9| zBHbV_kjbMyQ6K=RGB31k9ZQE?U(ZdxpXJVz#O%J-d$tUC!L$VJl~?Y>&pkKQ&o0!V zQ~bgFqMohabHRui2Z6YHu9>}HCLg_U>xGgw2{Hw9%b1Q2urafn)`v|--rc4)0dJqZ z5GS}ZTsmp$jj-38K?!+eR6l*-R!HU^f^6E*0ah#%#vJIsa1|&W zUH4a?6cwMI4U611!6id9ispQ;Oj{hi*R(+vY;NpCTiI4&nscd}OiGo_e*bP+i%e~6 z)<&ufJMC$G#R9d*QsIWtP1ycJWUYOkul5X)jh5ouhAXoZ?<4+*@&|W+NRnBp>+_Ty zUm!)JvBMu5WE$hJ($Sj+zF(GaA0&zc-qpD6|3ui)22U-Vp;L1ON*lA2ZR-ch1T*Gl zI^+tN=$~Xv7pas(A&2Bs;R-+GBHN4{e;}*X>yqfZGbhJ7T08x--RlEsrl5HqOx~)5 zqwDepa<4!0-w(e>LZ{sO=d9&(^-NxN16?)6l*T~1UKYVv2QPL`9(Iwm>PKpyxWclT zNjGm3Hj0uz;t+E)cCbkmy2ea46m!XD(BiVWXFKy~nd;U^6l)I*I@W$S$`ac?B17I{N&~#ODffI~}MX(oKp!{Y078qpVb1uOGCa zFhOOoObqUvt+Krf%J$bTiJGqZisUb|ehPYgJ=4k>z|u{tGIamS##0Ek=Mmx3X?ZHW z49m7%VJJgBuH4*QQP`ImXmI{LIqbi&x#0 z9|vTJ-8LbA#K^8{teY9`!e&V*-UFb*7h#T^rg7I)}>KMY~L?i))0}Mv7 z7yC}aM5~o9@rPJzWF!wArGQBW(nE21dD+8ElA8Jf%6+AutHaFKQxRg@#@u%Bs{{S-|*>BQ)mUjpmK z6p#0td_S8Qqckv4fB3^C*fm&`^rXmB&spEX{g6O`JkCv~xIj^jemKcBR(?FMCNyZP z)Vsn2c7%l>7*l|(Af`{+xCoBblMoj*KlpQbuyo(aZaaxEP4_0VSm*wDB(Ormmf}J- zk*1O}`hFx^>qJzYv^4d}lPCG3@e|`2g65KM+;B|rAvk0{bF51fU>(V=<@nAZ5Wp^P zpN<;S0DquIa5Fgl9CkYH9?wep?-hL&&XNi8B~N{*DsIo^=jZqIz^k|wR_2e^7s+0i z5g1HPwHk?9Fc&@?Xp7i{A}`A%ojJpZoC~q&7u$HNO<(OC6dGw@j>I60NC?H?Y>=t4 zk@eqG{F-i4=J*x#YBxZiSRo%p2&s)pjTrJ02LXrpE!)^Q9D17w7mCyw2awWb2wM+S z#2yo~rKpmv+FwoeCB8D^i0%$z3gf7&LzMq^4~0tCCA^)YiOZP$BKEUe-P&x`h_Ih+ zaQ9$I2tFGRSJ4CnCngOLN#DLyBsXD-!8`MSX2)JxY2d^c^j=Ul%hGTpl$0C4jCsZB z3Qt69@z;x*Yp1BrkY9O^G;&!>YEO=jwIOe@$7URxjtgvnjSCJN`VYj$^M5XQkw9U! zM~b$1psbjqDh9#yUXa}zk+-t)$LBxJ)wBHq)zjp?tX6ZxElVUhK5QGVq>xnuxHfFx zAcRmO!CtOO^1(0c}VVtr6e zGVcBmT-ZPF{`Y16^EZq<8{{{6w()Q9WdG-X{rhjL>~L!~*ctlAZ2s$KF+aqv-iKr# zPX5=0{>Rn3lOuD)J=2ob?JoV-&q|rVrl^S+uRH(kYM?nIfND-cO~vCf)VJEMFSO42rK+-_MqslKWY0LfZpT#bMtsF(x&HfK#Zyi=u)VBKy(%s#Q?rxDq zFS?cP?vMtFg%Z-;-5^RSAxH^`bV-M_N=wHXynBCV?{l5~UFZA{*IH}NF~=Ow{oKF% zq5aR@{XhPpdbo5rfuc$}XA$U_ilGZGss8t$xk8QjYapQlY`dN9C{fvjsiKe3_jyvm z*Lf?vFGl~{`xGibr&*><{5_3hPQt|zeB9xgdaLJu|2bp`Zy#?y{by0W|Ne^)p+2rv z4YSj{|Ne%o;XPh#BvKaJ|I=mu&$S2VBCv)|_MAF({`)UPB@-GAdAqjJ2QDFAw6ysq z_u+hGC;^dljO7OoE*5@o!~fr}PBc2W9t1jW?nWSN0t%k5pWeiTit#1i`jUQS zR*7YbY9}dC8G%1heC`e~lK=X#Up*ezHI=|uD_rERH0r12Odi)R_kdRvgI>H&Ko8=b~!S?(e3>_|0O6qZJ1B9-&d7;aL zWWbmFlg%MnP-8V`7ib`YuW5NI@%Zo41i7M+MT8{!;cF5_%E-zN_4Y!J zh57RheMa3QT{&?7urKFyu*PvSZB5&hy#s~DNv?=-S>Si8$rQ}J&DSRaoKpQ5mHY9cHjNaDze#_8^U`cWm&V7G zm1mE(*TEC=!hrj`>_TcAk#7Iiv&N>O_;n>m42Do6<^%oT{p7~^Z*5mmrNQc9R2q6a32!a%r3(QpP&O1F8g>gM;fSbn=v zPAXCk(4giDIR8U#Gn5=D?t7pLWCa$W`w2<7F(INWQhf;^X1xxg2{x^v5^~d;vAB(h z=(Rt{&1-VGK|cw`mri@ms;ZU~qNG<%^wDrOns5#%D}TNBHcXULX5a45vT1M`>6J`Z zt*%tL1Q9CITc8*=-O&FP|Cbx|v}f;5rm9kF>z(e$51B|3tfU7WBsS0JoWCraRC;%& zPH#2-NoA@FOikWsFdgn8j1iq|4b=L+kT=wL#uEOSZ^i0ogcFgU9|XC8q%R_3Bb$%q zYT8-R)n7R=HiPat(jI=mFU~K9&$!v?nN~E|q;A6{RrFZqIQ?NAAFQ)O|DLk#PP?Ky z#j76t=u49FMfR+pj5MCyN*5`GBWd=Hykr^JnphL~RI{_jePcF6)+>$D;ZIw|as`Z@D zpl%1I%V#ECA$dRyQ7lu*Casc5@5S&F8x?Xot#cqL9Sddqa zHZXXMe6u`H&mfu1O@&{tGN5xljSdg8OrILY&)#r+a+)tb(rN4bw=H$ z&Ed?YXk?^N>^@I}zssJ_Z$SC{3g0`ytPHX%v#9HEyFiZpL}3#Bw{7uKxF zSd<7yGNS5S`I8$kK3DhjYJ71FdHN*RW4JF#;GE)>q7>YJEv{IuJ5{M&nD^Oa%abt2u#?d|}r(fBxRwivvn>os1mz8BK7*9RIL z`sZbq{hpIO$n8~btC50^x*Qa|A3hrRbG*1!qZU^!yMIa%8}?H6*ds(CPhuqT>f#Tz zyqIUlGMq^&x+Js^&GBr?AOIgv;WUt>jA349ydfCZ;b`)=;MuU`bUT#0F);4He7;jr z(0_{B(SQqchBZeWuUgK_XQ@O@F@Iv4R(UEA843SH0X%5Us(*!{Q4qdvwK;#p)-6s3 z7{fyzCai#)qs(uCr!%nlwZo9z-z2UMF^N%t7ig zn%OR=X#$#Ei`lwHdsU`SpZOEj$*m{2CU()q7Vf3qFYfV=G)7ai_FN>feSt`fDpT#9 z>NAfmK;W};P8e5SSmZQjOrI_~8NsuK(lq&c0?WPfl!&{PX_9$q`7gTJ=UKIwE<_*?g|QDJ;3+>!nR_iCKTMUv2tgF+o7_IjWT3)4pU9K&-9*Id{B(}C@3Pz` zlqc!0&3#M6q5n?$Gv~_>p$&Fp>cH}xLg*0qJz&m(cmApR%f!cE)?H95q{9v6;8qJ> zpA(2V^fkesIeZO8_z!_R1fFv0{)@fVK1RiOs`ABGO^2%FLK2xv?+qFi)cv4zKOrF@ zA&)hN7MP~RG^F+3SnL{RF6MfC0DkBv8CT{51m3S0LBn3*(#d#7FA0V5}jQRU!ZMmRVGUd6-6(@7A;NuYC?X9*5b z4fMC8U``OT>lQZH41aG8C!u=%`t_AmpFo@A3_BkR7xStW&?}23N)(6vqlCZI-J#;r z42q3_ZOl4^Gd&{L1mxF_6g)TUn0HpcUdx8v6u;0OBQF3&hB~L|b(b*)m)6rwm5$?ytjiFxYN97!rN4C}STx^X zTadFCDoaMfjW4S4UOWn&Rrr&5HS@gR1VdXE%DvbB+Az##f_jUd3v-oKBg2bLo!CTq zSonIkV|5qm;Kf)U3=V)VUb9rS_dg-v(Fsze6y&vg=TSmt90K6OlYG9HAB}~&tCxXw zjYBAFQ244^(OOIJi;9dsTO=8VL*fCPMq?><{8^yzPVh+0!{DRlP1_CG~Nz6(CK$eHuTtZuVVg6l|>%J+pfolR~<&R zw>;QBLQr&^5+>twFSq`1XVYdg)>fiaP{9zHXR1+zYL_#*xPWj~I(8=oby9MqjHWMz zNd^rvxp2dfkndLF&ayC)>`KsyLjLk#%pa$)Wl0&KpE%#vNXGm5g@w=k<1_K{9+;1U zsA?8%jhCsAwpzYl0TGeFhx)yrh_g+)gfzv-zT(k7=XhQ1a2)sM8#;;qV}wHwoQ=5~ zjz*}jYApa1EoB_+sJ*EQmA}zILRb(~6tPi4lK6^jjVqX0)cjNunhQ>_VL`p6gmTm* zO}b$|f`a8WdNm3_@qPh(wZV4ZOXtJh%!B3^{g|{)kLgAw=;rX_3UW3fV!I_OWp2v+ zmJo|B2|fl*x*>TD`$av_sgNNi0#CyRRBny@>rXx6G|?U}GzO%Fi*W@615}1NQ-cn_ zk5nQVY&r^#TRa90a#C+$S)u`pzSvO~k{Ow@(lrF71^VXn2qSzQ!{aQ~Uz06Bgb5OM z8F-D?RMfdnRQMhE{#{_&q0dZ^cBfg$9Y5$}XgxP^XfE`VLQnaqM)9{G{r$hl)|b=B zDtMx#*fPBranb^nO|h-Wp*9AzpHoXT{?S$%)Vfx@<*CSsp~Zf6^KJhh7k=Pg>U!~f zbGCNm1M#G~5^WO6oC zSIxN@Wz|0XN$~p20KE*k_wU~~P=V1-Oz8LL}`tS(&*DK$}FlK%T8%u?mMc&%8+EC*YW(}YIhytXdC zG1)2??|^ELuhsbj=WG2Lx^A1~ZGtrof(Eg}0nPvHF9^d$lNS_PW3|2>l${ycqd71I zj~Q%cvx!`}WXeEUd>nZUmNljvGpA`VzP!p0+PM%ZgUx(k@lUH~QX9LSC67k??8W{m+TO`jiE#Ss1J7i-v-MBqO2${SBi*y_R9rO_>OP zkMw)9wBAYOzlbwmqR*ex(2a~FOzTLr`{E*qt`n}>Y4@HvV$;tB>9N|oxbaK2Dj%f5 z4J`cmiinvb!(t5+3C<>UYkk|(k_VILwna!5)S=~0@qa`qJpf$83gVM9JNhd$3bL7* z_vOe)T~|8TR-*mw!TTuLn2g^#${04c5e^#z$2NyomSt)c2UZPO*mLbW<#JprY^n8seEhfOvXt8;TNk|i^vWS;ip z*IE!D|7`*}Sl=67R-1lln>Ngm@O4dff?dHO!C}ZMmqQ5j2%cz+N$hl~x?GZplGUN2 z;a3pBMe~g}Qla%@pYStH!#dRVDHPpq-amHGf~2IM7!WAZ#aWPReRj8i^*P);EiZqW z`Gy1A#msT4@`er$o=L7~*|YlDe<*_S!1PEL9dtXoQuZ?WSxXcoQH0Jzm+eqH9ANJ? zZ#|t1^o{Wpk6sB%1f@*}X>+J>-EzJ1a-qjsWhlr{L9y7w`)G14vd^hh@}q6AA5+N8 zbA{g{H8*f|=E^tU`24@&eILNy79ei=kn zWhu@=ZrQ>RG+UQ(;Q+oVCG+Yz^BxrBte?q5tOMlHkPG&_NSL9HMZQmD_-y9$Pe;C+ zGoJZyR(rh{>z{VZF*0UcV%%D^j%L@1JZ>`QU(FQ@4gGq3$5=$<)1yQbaC<1>$#tEj zO4{H{5NY;qdPUlBGYrXXmnDcszItIe#pm8bcw={|)ys3DOuJ0kmhdMy*ul6MgUq1( zbcKDOA+EL{N6KxSSaZbAxi3`OE2FL|^>n=iSxSG66qQ+^+FTnu#Ut~pMgNNFl!(wn z`${La2OP#Gh$&sUHA-FeEH+S^e4;q)p|oqqMEJOE%a`_@G7qV^Cn~bIWh?+@ceKVut^41ezRdIf zw92lkQgx~?Bkd@L-s63}5*T9@Odjz2P#?T}7CcGA?$alpxQ7BN{7N}EH<=OU+=ztZ zUH385U;BmZMb||T8}Pc~eA&{i6Sy|)U-xNU8(q?9m!;oB0#?gX16V($+w>!Qz{B0x zF6{Rx2?d*b8o2u_^H=QiP7E`bM=*@&)mEftUldn!Z)1LR%-`&=*?VG>j7vGzG83s zYN109Rh{B;u3*C}UC&0%jNY?p9@Xpu;*l(dh4cfl)9~zfVl3#)qP;Y;3_SjUyXc+o z9I>Cv`wIk4G#thZ^MfMAC@#irWAK?9oARPZZS8%3?Fy04L$!epScEz)Q5hP0^YF?p zh>PW+@;^44<@znQOXd?+W9X&QcbRn!SCxUE>76nBI9Dd(qi-Hc6HieGzepS4-wsP# z$QZHx8D(lO{^0QY(~_^k2HLje^`A1N`BNTbH*!ZG74fm@&1Hu;Nzw*8hDfs*=+hrF zvYC~UF=sJVR3_D&n=?T;-hg? zN4EaGvZjHYqGzA>H5VCC2P@MuY&z5yugP04>`k;|57Ne9HpEVdtA*FM*BZ-~G2K=z zlg^?TIYlJ1Q!J$~T{PUEfcZ1sC?XYn2L@|C4 zo#&Q)UWKZNO4X7tOWvIzv#cAA6TXTr2w8iMTCT~-ct>C$rP$&Qef_l*y>AH|jhH@G zls7ZR7w&}@^0`5VB1L2Vwxf($&Q$A_b%|sacKf$7c_2NvCS+DkbXY6-u8o(7in9yl z?D;X59_rU~`gGM}d}$hEkmHgtLNee?$&tySo@KL>Y2GSMG@NaI>1Z`_rwDIeP!1Z= z>)^HyRpJ1Pme>O>zV>#{EJCZ@sID>6$%MUfShfbzGGWt;4d!id1~cu!6(J;EK!W5S zh)4k41fxED=kAXTF~sDxe%<~p%~5oG_=;p}q3Z*Repe@3{GQQ--GpOmYPWq_?LImV z^OM1XL2_OeuE#b+k{Fv0hM5ME<;(4g+W8$>Xak98y>D0@v~a%H7&eprec%YFOJU6t zV%dll-4{OPzdOIMqWH;(i*285q*LPYNp-9QX;nT4ZuL7MrXt#N`9$tc%Wn58a!D1= zEfuzFx*~y&d_&;W^!rG*$O=>2=-&Zci($Vmnd8?|DYk7AcXZmcK~A-&+1)Rl#%E=h zm27PEVQ1F5g#Vgm>XiA{EiW4bOD|w578mc6oz4+AH}+&hjBGW>yT0jJ*wfljH7Tb* z)e#K7cJ3LNRNK=)S8wa3T}|G0dNWsFJ8Lrrvtx|!$R1B}`aR9^Y}0$+(k&Ii-@r)mddkJYFhle{)v{EU3hFJA+q}g1Cd%vI zEk`o?$pxkwj2e!C6Jmk$met2y^MKU@DGn{O6#Bvb?O@YUHJLHF%NBRA#4j7!piNJaKHN41kLGsJV-B^C;}q82a+0toI*sXAmUS6 z3JUg_Vto9*^j~6N*B2wBf*y&k$>T_P{-h4{_lE&v?I-Jr|Lm*Z;sz$m;}j#~LR2XD zbuX1aCNe~J;p1S_#z0S`p;R{Io;EjVDVU@f*sn!;`$D*wN2%Sx7&;<1u+nBsUycu9=6yx9D_(0-q9T3CMIW{`lbs_d*5ySNnn$S!SZ(@$OW$WP}5ObX?Dwe+B`#2BY`5? z9dPscib2EOaNtM-B2!aSL1{lXe75LwTf+Z@dHn&i*I_&3vy<~)IB?=zZYvww(lq^= zq5RuJaqMrTACEoYm1^UYVO6w2t1mIaUxQFH<)8v(6;Ej3a#Cl+EWs^BwVNiPbpuKxF_Np#`k()Z4 z#TCrL@xt{9(YWf?4D?`CnP{vootHm9F$P*zmn3zf5PJ+Mx^6sI`N+w^9Sxa&rQt6Z z+cSr5vRO$qXm?%BACqm9bNMss38CVxin}3_?#3X0H3do~u&nXWXCqASw1e1BmwqnP zEOrk#t|PE4{%{W!m!z7bH_Um)QEi)*(huEWiFcf%fl2u0Ho&O1#^%yomjf8|-*r~x zw(^gorlJ01jK`a)3%;r9aZ^+m_YHBZ4HWH+dVB-*{UGtsZ5z;PI~B|%`Nyq*&5?EE z*5D)H9`MY1Ug-MQsvQwY(fIJ(w;|!mq{`~pt#pQMmNW)kn-}O&WrV^K&{+Jdx%cXHc-4JEh_gsEGpNQ zN|n94es4%aCS?not?cxuTvZk+nsG%fBnNroOhIKxH#6^1tY|i(r_!=(0+Qp>m!bAy z1^P;Qk2b zJ&>8M-(){Oi;Tg(eM|pNVi5VVH4LT$;u}LI&bd+a` z=l$G00GKf(g6AI7wevhpu753Z${ooCn()>2pZ3L!(|yl)j+w^pK=sl9@(nV zgo-N7b0WbmMJ)`p=)$nr`ibja4eLp)s-@kZr59{!7%yUm!K=B5W=}ZWD)QJj$Q0hf0YY3%iEgfOXb3%2aq*tBp z`LXb8H%x#~aUn308HxEr=GU(mG1bR4Y7xPiLs&E*eUShSMs3u6L0+yi-V(-!!>l#R z3YL>}XQh;;=9{u7Mx3eLKd=@iC!(7>BMb0>@=t@Tm3w+BoTcRzV#5B(x{_GMYL1t_P3VfOjUGV|9A9ofEuyEA5d&>nil5LxxWj>m}>YC_~M6Q;`K0e)1ZO9doFQW zl)v3EByqbYX;Z6GlC1}IG=;XmIt}gb`fZ?$H#WprvGix&R30^RUy&gQ|DeaKd@*lDTE zuv}_;0lK5ahK6yoButItyeCBXGWaDH&>1IUTrWj=hYYQ(E0-DMvEk#;&j!0Faw*bWz1UbHY zQ4|YO|5;|jWZ#t1DJ+1QfDUAhbw6Or4AmgM-4%TUqyF)*z9h+vy63o3+R;2qI`TtbhPBfE_W zRDC7xxt#pxqdZlSDxUBdv6;?sp4`tH=Qh>yU#D+snxr-~raYK3_;qz2iuQhbqB7h4 z>eO+Ry~Lfj z5`$2Yo#5j?;I}W z@x+fbd!k?)D*PI#tC)4Ej@>0u7-4Kg{3{v7vtfVRwkT*j!gal$QY%=dLf<$Ghm*6a zF$1@|0I}yvy`zw7>iBbL(eTWMvw)Z z`8-E5QzkF6Ax=T9w~838LAp8;Ny;dZ&ps>g*q#fv+xiz~H+TD=SlP2UG(E-RQ=8#M z^dJg4;3*Ny&isJ<$5grg;=xuwxB3e>j%ArNoqh$vPr}%nM5k^?YrlZF@DP#Lt|t7X z!rAfBH@Nq|P#vUfS%|{XD423BUzA2k46ylm=@vs8sHpv?sg&MFN|BViQ?j0Yb@mvl z+*;SNdJ>sutoCL{1d<~0VaK@-pIIWa)SMt1Jw<<-clNS;W3iezGkIN75Ms{$9)nrw zNX$OLjd2q?=vceZdq^k9++aP#5F1Xy7sY5lB>Gq}BN`xe@O|vx)RS=Nzr&>x)x5TZ z4?SyoLl=KWtpu@Wf2pw1BS)OQNQaIuOl8G}r`v**O+3yT?t?$+gnV%iT5QS>ilFWF z?j&%sT+Q(&{tP(?3?kyEV$lF}$UC&qA@X8$8N60J{6FUTXI&3>8riOR2re<1-H+0f|EJGVA`Q z=ezr7!ce&fzKeq6zn{G99|a%Hl^nz7_ashhpmY}Z5(urv6^9n2sc3Zlke~hULx+yUM zFF{Pj#r(1wo&EAVn*~y@u96QD8H3jGKIc3E%>lVpRDY~@17e?eX3QvB^7lKUY~%Hwr9YVXVcZ0Pc{v%pELg&j-_t=q z1Df%1EOlZBU%j}{8Z5AC%J^}<{QQryt4(L&2P+P?vt7$6p-p*mXiXb4<_6j(mLS=h z*;92>JukE;;a-_%<~`Q_*eNgI<_{nqMlDvDJ!(9=dtb6xY92zDSEzoB?$A|OhyKF>f$K}fT^4Qm8L+N z=LR7ndQ!CGo0G{6Ab3j&J&`b8$rBR)$A2zVPEU=y`DM>r%0c=wVya%*P_dxe4FmZz zMag!cj)RM(ycMnf(|u7y;QvhgNH zbP?vCPAYQH#Y@vcC5Iw9y?#pv92W@9lYA_QrySK`%jlWb6kTq4kty0hS~cjcE=#pe zkBNpB<=jdgNM`@_HMYv693At?F>wg&q=lDA&+jA>06lC z!q!I08-ARZ(4P1R(08gBH`=lBdX>+{`Xws4Tq7m=7HOX7>`?!g%jeIUZ&pqj(YHQ% zup?8iT1fKTmphK3!4Q5uR;`kRuB2qPnM>*|`6=V>qT;{~6tvU?rTog?p4lIGh zSBU8K48{V&2)(MIo`CqRy0qoj^?D&Q4)={XzKjxEorQorm(__*%@XVNtMCVkIs*oV zo$e*vu`S?!-g1Nedu`Loi;+9+e&p9uuAb)@K5Dp{#oaltwJd4APJ9D1SUER&YDqbU zx)SVX+LR0$V^^byRTVxF&vWKBK#;2)!9~A z_niI$w41_NGiTo%7Uod6l+?Dpo1XtS%&PXx=ChI!!+G{7sn3v#+FA3D!4h|iT%Vur z$>)kxOfNjPoaRVU)k#|i&A&Tay^V%YewFiDaEO9bT5j~BA;O5|+O=Oi|BxI(TGiFn z6<`oz4K9zH_;y}t*b@{pKUfcx`MjB4$>XhGmW&=vghA!!Uw}wMs0;Ab)WmzWHyN4E zBceX~A>ipw%HgEc&(;8z`)G*dRA*;>l|zb0kMV|NuwhA4rqUT9hZ9@ra};78(a)o) z#~c>KrQ?O&YI0{u`uj}Te8D;H6yh3i_xm^EuY}*rkYQoWTzY4|9O{Jb@Td~j>??TA zuRf3Zp1udUqlEq*92l^lZ1x>9Shung4W6r~kE30k)%9`q*+>!1{&Xt!&&)fH*_T_+ zrR!P z6bH7!@Gs-(VB_Um>%9)c-=u7pJgUuSvpGjuCVxd2^$8{nL?~kvUB(aj!J1>a37+>vjK#(4+3coj??RH?shS!D$W52@d4{-h zU3&L~8UIWhVmuRkap3gudHW$zQVP|94A$mYZ?$@~kATeO^mh4Wv;ZitptU{qMxWFb zoY@VVD);h%6++{2s1C=UD02*K1~?`v0(|H_SJ*685N#DABGSD(?|Rr6$xNyO6KvXh zw&keesonS}L*^~?7Ydm)@BFRYCCkU$KNbvCC?}zB5OUXLcv}Fi*ThI5-BUGTiXTp? zUb)UmULIC0I_c|3^D5Sj?c2hxm#TizR=83Cdo;qTsFmB+# z!2E;;!BG>$!zT-5V(}>D;cba*odOX*jPH3N@F!i#Oe)G;&l42^o|L#VfqJPH*3o;Y zpq9YuL?@;-}!bwbpNk zv~b!~;zEyT|Ed_`A2ZTRPysBs2+}FZ4~HT(|Kkn$6gPIG;D|HFCx}Q8R?Wgt#<#~b z-)_8tt#UGBrhPwQ1V4~f6Hg{2=J_-2{H*Mx@DA2?6_P2wT+QcN`8n|)Q}jiP@0C}h zw12;geQHbE-!0ZlyukQfBC&hE1s0|2{IKo?ufAj6X1#QQZ3 zyT%?*Zx{cNNw2Jy@&`75dZ_w(^vz%yqRq^uF+bB9wI{`f(q9fqKe`CeHDxk$O(zV- zlKQZZK*;oS@^Iyg(a85j#?U7{){b{XKnoAn`qlfE6gZ*!t3da28qB2;_-BG8j6X{d zL3bOHNK^CrgV;CrCnq2J;L^7Qim{}B@ka%BqNE5o?gAi1VI=Z$WPM-RR0*I*BBg*A ze^(-l%A;&qbv?i|Gvz5^{*^#Rw`nDKUuS|r%>F8-Yd4kG>gMhu#5(JIsdaJs^8^{+ zni?CkTC<#7=ZhZSK5>TwVK-CXV+l_Ij>UHD!>x#cKA2U@bqW-5kd z91QQJ-y58K7uVzw%$9M>hjcX)vTn@ifX}58rsoWkr<|S?8>H#4+;jESqSapB`T5R*&f}}v1K=? zc|&!%e-j6lC_PTmklo_$_S+lr6~~DXTc3{5CKA3h?(RdTV12beowSWF=DC^3TTRHw zRi1v6iDgx!NX)rwK8cBljFiSs$0RNDlOX8#dZYY5d`k`!OEKI=Vmv;&=%!ypYnWcr ziHOza3~1U4=-?zgqY_Rb#`WJ92>EMCfBF3E*j_FK92crR;Z>1c&V) zdgT$ZX)6&zoh@SEQejU|K^Af*6q%4+wENpDIn9W3{6RS2j0vKWB>hh^%RbHcf^@26 z{HWu_2-{qCGTI0_gsY;NaGiRI({q;e;#;m!D6GbBwO^BF)_irwLE_4_>Qc%2A3kIsq3%dyB-kb-n${jK z&5MnDR4)Fwj@T0@@&i~qyAA7YS^kuh0f`b@lQt>wjZ=$N4oDSCr@eD%?8^8;m>EyA z`x$aENgwpQ+0=y2^lF7z<_o;UL6LuXeDVuf=p~${iOH|E2Z!c=AfQoBNE;cVF~4V} zf_7DaHecHATcFaV@NfSG9*6t(=37HMKR>v;q<;dykxo>dzI0+;q_ZuPM5qvsRA8QI6)X2U&Ek_w-ET z;&1rYkvw5LdGS3wuB39)HJQl+!>{Zh@w#}tvoehIm#{DttlKHkl$L}5+R>)Sddf=y z1pGo`Rn>Je@=;$tzB*8E5EUpcoE=slj(-EEuVMFJ<2Y}EOu@-9V(fH0oTkBrvZ=a~n6x#S*9GL9Qp}||L zk!{^LKCH#04W|&vU#?;DDSA(err28~7_uh|S6Mk=HAek7)R(szRj9hb zEo0+7PB(<9FIz47DbQ)rtr_=e6^E({GSEkr^m*2M%*4YP<2R~DqRXi4SmFsV<2`0@ z?3m&=_U~J%Zt%aB(i*-zO6uwPFIb8Z1u&-`NNL^Rhh{l$q;rF zA32^g`u~RGVLaFWO5_fV>yw{!E!{XiCHxPZ3?NO3rBPrM6{fO~)X!QOJQAWJy!9r0 z9HezY@&_>AN}}ojy;;5TrpKRuIt-s=B3s&o=zQLN6g$vG4l~S3K+pUw&dsY->_EL* zBKiQN(QbX}5CmPQ(d>H@ju{R71){liJc5=QdrW6@d^V&TQQRB@Ih#<399%Y%t`8zJ z(Nj(`C!J^zYhE-vWTZfM@JQDQXYzY&iaI{BMOoPq(GcS3zK_7B;&CQx>5BXQT zb?|6v=zm=o5~O4-4M$7z4>)-4TLLOdn1VE%2w(oYB(Zc%F#+k$-#>wzY$V-Mnq+U)Y`N63FD<7 zM*A6J(2>?P)}68c zMhFw{hY`w7;69RMHiSD<2-@R;`Nqm`81#5T zr^9}^z}t13Lt9*z*&#-BBc-qu>(5&D(%s$EQ4n~IJes_yLIg{YkSZGk94fF6m?7<7 z=<8%NA3lv+J1)01dwZfL!#Iv=69&;W9Tes{-%0l{01(dk$o3J;`VSH&g!-s*Mq3T{ zX>IJHx+4EC+DtV5@n`Uo%f5UmNASepF)BD4%u8qj%6k4Y8znh7Uml;;8+0x>F7i-B zB`%#i9a#EF(@zhacIqG}5Xl$D;IegYf}OhCwmX2lMP z0Ob)aivX>Z|3B0l*s>6-p32A*Cvx&GhR4)9R-?ES$iBtNY6{l{E--m&ac|8=WivSt zf0JH>ZQ+R$rsGnHVfQ0*LXNqL9FKx<-5ww37?2|;P0CmfV^w<}<;jffsltrg+}FeR z2vpa=WC_lZRfTtC7UJ~>m2;q+g@TyYw2C4wA@5yHhuQM-UtEyHLNr!-qWb`yaQ!SI zc1*Fr)YLU!LX0JY2)ikE8%F4ar}!wDhrD$^ z0HM=qA@4vC4qKkZuU!4YJtXYHNB(CD_#U3C04wz@v(VoN!8Q<5v)tjI;<^2UZLC)p zTPjKVWp`CeS)q&4@2xGSU{+`#I!^P%{q{(X5%Z3f0v!}jQ|4}W=FK&__6zN5M!hKn zxwgboZx172`J#{ejP59}&NN!oAwuS~arVGdhe}O$iJOh%OUb!uc;a@sW@H zqd*74S4{W)0uUtekXzA_TrnL}Pq^qKbk)803KXbiuw0w@^a0TIj

    evsN3&5$y+q zt_7Q&=G4J(mu#WQ)>jt?%XKzMJJLj828f%W?}0Ilj9!~cFcVKNE)w65^byIFn9c$q zqzGUJ2UJJ8`;ubhHhuz2sOWW=(IXrAKk3DGI5!88Y>9(`is_cn|6rGS9s_)Si}6OlNP9mMSI`4 zF^aC<1HOe71X#zDByCPY45_K$1>&z_jE#-a;s@40NIK2do`Im&!BuqC8@!|g5ZpAm z?hityN?}&UVPR1rm9%n@bC5fu>b`^%*0LCZio1CQQl|!;JoFCRx!-~p**{=ii2s$o z)|V8^(Rx`5Vd8|R7JfcbEQ4fBlf(S)l2eby?GfCMVLVf;-S-pajTa1rigExgJaPB> zNZR11HqfWl(K8$=@W|4Kt*x0iwCvWwdfMN8HG-I@e>Iaq{#a$+CnZxMUzVZ`eQD+XY zoqXBbVVL+>xF@AhMdiY@!k0?gUkFY34EuUVJjU;C*kk!I$Tf^^g^;KSesvF%*wsUYU4ZmUu{ zx&m_Y;HjjkGilcspH?7u|GW&wCX)ZfqL8UBU^3Q0-nNh0Mp;!l%7aHaeNF%of!ZJ| zpj5!sXxk1Po-(p^Kon3Jmr+ZlJoDmTaKF_748^>L(50O~`@9?^7!2M&B2jU)&rJ!O z-IZ|Ec;tFcPft_25K3b?=387S|LsjFUcZ;gGAt(O!pF;udT%%E5MbO4DH?{MZJ-f$ z%D2-_zSkKW5~0go;Yp35&>K}E#*{c`z=_qgpLmbzeYBwxaLJJBX?I@wCXHV^?}Yr_ zswn1>`sz^yGQ-_gU|1@1B*zSUMt*ikX0*0{Y{G_-vJAdry(uzA@&6603hCELd2EP} zWH;w=n{=o+OkQg)i*Hrg)HpKql=`Ngg#W)tR?hrkT=~-P3|f=o0SrY^ki2$M|KQ2A zC6yossau+cEc}G~k~J&Pr3(Lbh!q^Lp7Y)i)pJBo8%seu<`{+)xhy7IFA^sNDFq~Q z@yh5Xh%MsFs}$Sle}=In7T4oe;6udScs8m^HMH}(-tS)HCT8dPkIUN^L}OikxIa1O zXpyyMx>@#a-n6nN_#J#%I@@Tc74psMkV<81o;Z`BI>gjEe4cT)yAq4XoaYOE{HFctY|M0G-UF zz2mQ=gpI)f>DJw{6vCd}(_idOd+RrZS9rl>v9h8wmZmxR2Rt3ccTM`?Jj!ch}G` zF;lj_y7A<*$t{1otpMXuEIDfk=qJ+fp3n6hp5b-Gx-~_%Z)LFzW@%!y-?=pHdAk}6 z3hf}B3MwO!o4OW7W#q__TYPg8_w9aega1zo->PO!WUrqgPlL%5HfNlyw;=P!p+qI- zLaCrNK{&@@HUGA}FPj=q>-x+Loq;W3yVeUBy*!W*pM(T+%HMqWYSFB(U7^keqwNp{ z`YuP7?-x@IzsnMP;NHVNWa!n|BDYiFg|Y87a=I@T+FWr-d|57)VOMw^;FsCvo6Y?cAIz|p$AL+ z=Kn{qby1sbv7%ACWSkBJMlsS1w^znTdy5EJnlAT(LlNm72 ztxJghUB{+1{2==5gc6zi)7kfQ8AF%IA2lW{4i%Z&6lRfuo2Yn&vZ+;$6PdrNR{wA* zpZ;k7N;Oj>ej1xg6!JiTeAk?Kw&#h)P!;=w@`Hz9q=}fd@0WD+yfje^G?KB0Os|br z%!;e!*r=V=ob9wOcQ zekYwh+log#xG#@+Az7OC`gwU(9)@-!dzK|9M;j6>PZUw?iJB4TS9&Ehl2e6vDmJ0e zlHUFWp-cFm#ZUETzoP~l$D7_%J#VV}_!EpJ5qFxuH0H#Y4o&|rxKj(zQA%C6C$W7L zP~#8g=y4aB)*u1K^W&&t+kePTg&z$*(PIvHQL#IfpgQrC3hP%({B*8lJnf|MY%~|e z3-d>>`SB4LY&Ez$t10-EYApNf{*xGVcRNIa4iFpQdH>mdf+73)YblGOMVf9ZJbgh; z$xxf841rcqDiTh;YTYEHqG~J)4S$bp<{k~A3beX>2+?s+R+W8*t`J;CQ})-#`;qVj z)!6=zt3zuKpV+ttiAf7$HeS~L+3lCfVaMlTyDfU@OlYW}?ft%VY+S4KPMQ@P{V#nH zUnx?4*%6CCpKBst-1v#dh_LFrA_iiW!F=KUzMB!I{hmtBX@!vB1?J7m5?rgr!dm|b!Cu;Qs9)_u}Y-z>(CV_9T9HcmAe|(^e z`znn~Df}*-+l2jvmAR`8+jwb@B6aDIM4;G9;eBfAr^6t!NZCy5NY;i8lva>9K=Qm3 zMTBL5z!K3pQSIN~Fc!f}wggWR0RbV|*&1YBeDD1G|D-h|*`|(&MeMI>eH{i8Y^uVk zvr&V$P15{XqG5&l_knmywAhl+sWy^wW;f=Di~LB<{=@i*2W1d29UPmJ{QLs9gkY7X zh+Tead;!X&_CJBp+1iererLMO6}H_vw%yn^8r!yQ+fC!7v7N@Y?W9p-+eyR5_E~+ubKdV?IAf2of5>R|+F8BV zgZr8Dnv~&XeC@pRl8ce>=$pnRx82_+OHbxo=1$m>hGa68CVk1kq3H7ssT>M=k)!j+ zQs65~3AQL9oDY<11iY8uQb}{RNP9y!2XRKF4$m4f3e?U^BGjuQQ$KDqy)jPdPHUe}Qtfn`*`r~&KKR;i{d&!{VnjlbkYu}viC0^5!5jCz?d z^yn?&WyHe*$~kJLq~9%1#6vn$z7TrDMA!I_GBon|9aeW@P(X{bvZ4&F`kB7Jt}?L+t&j`JUUJJD=V?wZ`a# zbdbAyKLJ^)+SmDZnoHG}WERC7dv|#is#q*01G$;0;Q^$kkW_kYG`c;Q%#InxfdY8; zw#A_)U{;h_2LH#pXk##n;Dx9Qj~r5qRu1GPr>2&&+51&X*|deaK|!e;K}LUS!h&Un zzQJx36g4_R&q8+9GM#|C0nNcJg5Pc5&9a?wTMCjN$*lui=0FcA(;aY`y?qFGT&y+A zCd$>(2k!7;An#CNPwN3-PZJ0zIK+5v*TEGLG5{``N{%K)-6rFmW^yG+Jgk&(?kcUS zNgtve_d#%N7eTD)k&#(By#ZKEzHIZBB$-g{U1P9Z@L2FsgcB@RnFvp@oqDOjwu|q^ z4M!pjvm{S3ptoBPQ4AX=>_cPhN#Vt$WPUbJIFg4%4<=~Kh~^P@4a1`gs0R*)>NVa` zWP5&TG@J4Qw)0^3xEdo@1zN2R*^9WwKcEq5sz!IfV?fc=zW39L_B{KNd`w+xq`n zFk90n^CG{$Bga@}#AW&yX?uIA8f_+6nh_*z$IA%T_K5dZ^~#U;&?&gnrTdSr!!v}$ zNaoCCSYy&?4wuE=G}}iz2ns}y*>{skR23+bX^up4;r?H1XjdiFKWk{y&|I3~hluSm ze#sfeFF!4mPPZ<+e82gE>6F1b3i zMLBj4QPyy+$)}y#MK9*Y4!&F39+D2AA)=;CrEdN+q}TgFS*=ve|vLF?S(? zi~(w<=0}UN9eb>ho{%nf$ve)g6(qbT&|PsEFrTCd_{wm(9tMKN%+JSHRPuF{BwAP< zG~ey!K)Spc*@T1=)S2CU45K_HupS3X*S_5-yqvyAVNRw$;Rt3L>JX8bEbsjY#<^(d z)jr42o?F_R2m(cl3~=UDp%}%0a8i3=HDR&NaGe5?Scn8XYE~BTL1a+!3)_KVfs{B+$JOd#b^PcRiCYy+6r>oxFZ2 zG5yOmC(#(~h(v$8P97U?U_}$UiE3)ND+mjQNL~0PM}d&{j0iv%l^vn@BCb&>LH`K0 zkPUps_#1!C8p!5|5PP*ZG7j~bSMTxy?BFQ159J+3jIMwvC&l(Xt0<}9Lc6jgR%Z;s z-dNyt$s_xl@Z@lIVd`STOtixx9Xu*jC~Nb}Pu3!r((%SnvQDe5Ua6n$_n@@#lTj^k z)UwvTdh$j)I`s~c$M7uqZ_kx)H*cRmvXXTmEr*I&ETLRotR^yb+5I-{GiUON-K#Pt z)nW^Ir-bm;E1tTwU^^u^9i}iZNnB$DQsV!|{(0!S)^i?m#r8FGt>snBO7?*{n2dBl zD2Gs9%x!GJ?XzE`Q8-C^00#+B^UZk|<*|1!N7i=0Lsk*W7;|n_IdofVM*a}}-7TeV z&5BiLY}Qn*&XF8XZFKGg*XbdaWU#VbDrGvS<1(oeuF{fCSsTm_=3SJsuuYFnJCh+N zmh_O>{R-%o&v_VfK4G!Tiiv$J-B_r?4sN#}@~)p-i9M@~RZO3(b;u`_U5g%>I}sQL zXoD5z1|J~HrhFtC$6@gj zSYzn3Al8gp8i*5ECMdLbOYc3=ekDLKS_SQg~2N1IPVz1DXQ0`gX9na3mVN+W8zu%{6bEm@( zyu-qf@EOj+PV5N%1whqWFVb_FCwn>%e^-l%+NF~wurCYf*aT{{6KNe~@k6(XK867@ zNOO($i+E*yVF*78ya(hlHH9psIb=^PCcpIf zi9>LVN>05bQrarT#M-Pu-5(YaNW`%Qqzz?eghVH*G@fC>0dh`Kb3uVq0OapO&X$C-lYEyqP}4Q4$w#hFc`s4^}Tjhsu_#vHIfO;L#2HtTKG@c z_aQM8Soy5FgdTLi_~AH5Jjld<_R*v{%>g%uNT&tsTtnJ%k^e|i)gx6}-o!P|dW*u8 zn7(FmR-6aG;>oQd_$eMQPJI2RxQ=_og+XU4{4rR$YX6(z^L0Q{1&fuhlYzh#&p?3n zPp}4BQxb6?Z;8^V^=*Vk#nf%_s_x*v?Lq6YZpPHCAW+YU=!aJGEn9_z?k+QF~YN3lcdi)!aAxiCZ}*m;pUG$A}!KPWt4xJ2*+l|rH5Ul1YC&?W1CZEWcQs`g{>^s}WhWl=^C z+wstdFK?K_04<8VIk9P(;Zc~?*Z|zWmsu8Q5N65+-LOc87;;Z||8uz0I!fZxV0fTg zypT9LNCPY^?4JojOBaiC5zV%-^`srzMdp!Y_Dvc+<{tpBA^20+xQBCHEm^igF%-&& zcfOwO0w5CxbpJ?02q#Y0?l(j;z+x^3i;6BITMHv(g5e4iGvnlX__!HoLa!^1=%h_A z6HXz>)I#CH9bp;eT0yCNcwXzAL6Zy@1IbnIyWbm%x)FtkcE4gN#=HX^<*91>2gsgF z;Qp?hzg>q0x2UB44>;%bYTufXcEV|l%UUN*h-)-NUj(=;-Cm`c?O*cQebqLkAXY2x zXNJ-slGtIIlGgkZQ(R!|3s%=JXa71z!T3u^2vSlb{}K7_&*m5Zr~jv*74n+8wiTHj z9_nkHs|i)C!`62>$Qyr#OP&D`RT@|A7PnlScm>SZ!>HMhUcbawE>6H5x?9R{$_rag9Cw)mK`gnCo){SXFdJ%d2lXWKZRiJJwm-iH4qAwqoWUHmUI`+W{$Y ztwmEq)mV)w;MnY#>=EgOrMd^lJ1PNP4u&IJ}Y=eGmY0MDYKt*)73a`OaZBu{{+e1{;DBC!pnwSLOzW>? zA7F9!D+5?wbcG6{`-G-Cl(BzDxWCevlEzZXr3?Mz-!EhiW(m_(q`*$~FDuKTE?qHJ z0e}a3FiOXnmWdR)GMLKzk6gLcWig zpv%(e{1dT?!SuFew42A{f^d}ihc~}iZAmTG9Iue9RW5^_H}x5~aB|N+3Z}y57@%>BFO-?MU@E+l{396Np0`$08RS)u(fSoWWxlQQC% z(A0WcRn{;L?h?F7Z5 zHujxe70qPRV+=*XHrvj*QL>Q_4Xb`*So@MY`DE)C^7oVLuf z-$=}K@64O(A_Zy5wGpc1WrKoiyo(te;|jP4 z2IJ5!IegI2^kU%TIuT=FQ1k2x6V-MXo|TJBOF}wy-!8RH97I?^XcXu`N1`Nd;(BB2 z#}LOdM}!JuiS-AYx>|`6-=Jgd?NCMhE<8qI@3mk8yv#;Uj-ZI&VbCQHxrRrS0e*qs zBXI~E7CkjMp7nZwpUAd|5UDRIslW(%cn9JD%ycpB15dT){K(N#qMCD24fUY%&KKb} z9FBf-Zc>#@-HR_QSYQ17LvYyi*^1{aFDT@G(Wb{Ttm9k`Dqs0yeC$Z7zR&mtVI{M{ zqf1V0`26lV*Gkaq`Q9FF2Gz)Bg<2jII1Wtd^1G(ZgR=l*Yf^4<0hykZC?Glr0f$*A zeJi>Y=q5)kH4kPe{9A;Has?+3Sf_#3H5cnN-v=VPMMKPUMw}%B3VxAZUVs01_sW z(7T&l$Jp)-{forTc%j&deMt|`wW*%&&FtsH?o0hP3O!i?(aKhv)pCkklP`$)-h7X= zU3hpJEmdHWOrD{Zi>Xr42t)6%j!{#qP_b`3k)zh_kz?@TD_n@PpA_2m-iv1l)2G$s z@ffVXQLM$li_r!y$BwO=YzoqK7{5%trpkHV#j$}&RuAL%mx%U%4J?Ek*pm|YX=_wS z{MAwpzh$5y{4+X!)opPT*D>6;x>AM4I^bYTFTPNnk6Wj$Z1@_ny80Y7Hp(j(@Sa<0 zWYvU5!-ykMs31_+D}q0ONG^EB5%QA@m7*rt?(2xh+UMrXtI%@ngcsfFSy|)}GRk2y z*x=w@=>aSSAWU_mb@yua(*?O{wrx2R;A(6)E#T~iE{ea z6qE*y=+Fn8q$E(ugb@h)J}FZHJ-*PP*{UK_JRfIGD43{1x?w%vnhU4va%(@w$ywgT(@#t#Sojn*gRO;3V7lU^GFi!7O6J zpxvdI_`;s_5%|tEfoY5s87!|7_(mbBucPPb&u8}w`c>elkl%rQxQbv1et-O0~l?CT_$T7Aus%@6w{dxR{1 zD9+1~QQAy(bkO3=fy6+QNp<_-j>==fON1Qs?^6E$w|6@D1pMD%r&Bl)POIB1}^8^r2p&Zi1cEh%t+xL*iR!j<(qYKb!u3 zzQ%E4lFI35K(E^(r(!!(5f|_mXdKh>T6VzLSGKzO;PJSX;mE!-Vz+TmlYq#cBEXR~ zCTyQdM^cm}CUPjgWj_aT-YA-S0>$U-iQ-uv$CAtIAbu2OoUlbdc|*)SGE3d>tTv-H z@gb0CdLr8Pw1kxm=67o@w+m5(*!Xbm{ze@w3sWPR$7(Fncx(<>S-761`LNBemk(=0SMM+;GP+U0p?YY%7`8a+ z#=#}jqheyP>1=*ot8BXbzB!j3wb~i2wlaU}GSymz`?)Rq{Xj9(Jt_~azj!blWg}NA zifn!F_F>tHj*@%a&RBtGg!IbE@PJr&u@bOq7)EHK&4X zf!*bcK16sjGK(bVZvR+OU|78{uSXUt*D1#tJC+^lwEuh%4*iBY14UYR1!e9hQ;z8Q z+(+u_*Er8CnEo?Eq6XaR$tJY^!V|oj-Q4|0RMhN%R`COIm-0ny2fG{x#x|ZKNSL@K z_6V?8d{sOu$t=BdKp`wad?JzlKD1}Jajur((GxtZ$V zBbpG8I6FgXFb^Dn7F1%QP#s-=CAp{uAl#6}y8iYqjB#(q!gMOKCf#pY(+j9cJFsY^ zfQWlSm^C>D@UU|+@io@bE%LD%CB+9l?ZAOHESy3*voHjeG+c83QNoDe=NlUkp+|m6 zr|+Gcyn;LyPNG8Fmw>|uyHmpEdmodwpLr>}dto@{W!IjBYJHNFGSf&>N?R! z>GN79ZeqQA0mKg1}D4_zS%G4FM#+`3+uCN17h^pRtsr zJ|#c}on#q_L3;B~K`!v?j-Z4K4nILJo(@d!OJkUu0;r_Qzb) zd~710ee3Vrzqwij;x686d`Fz)j7C*fX9ssWq~BZHVVSCtc@(%PpW~Kb&w`Jlo(rT# z&co5piC*)eZY(&(_UCbl9&Wj#KlfR;%9C^f>(WWA0~@as1_1$GVTHK((B9l1ZPJO# zcJYzEKzL@0yIQYadwo=fBF-QF&0u1hNYNQ^_oy%}@?AdN*zwetR>E<4hBWDkrqGy+J1$Oy%PnKrI7~>_$_oi)#Ga6M9h-2ob@`VLm1ll5QP zN#uno5xGqC6_VA9ICOQ`6%`S=NUg-iAY^^fs)(~i0c*-XSyF!_QxCJ&Qf6-di?Yp8 zvjLrL@ftk7_B3(mm$+kdc6_5jUC#RRs3 zFLDUAmgHlK&p4%N){C3*qc&2i(LQD*l^6C=6DD@7c-Smkle_JZz=-cKw%a!v*&sG# zXB%R4UMPV7JjNcFlfQ7uUQfD+I)$QHZKdLGx}T6sCn)fbkS}o z`JGrg6ua=UV@-Rk1{Sn;*D1C|7m@Ra{+YArPjNAmL984aEffBJ_bc zgP^&zU|}JTH>1s+5{TjUH~4jTQGC?`5;sSiLBO@=&SCo%ty1Hn|AK$Jz{T9vsm15< zB>k&TY*B0NLw?k`&Iz<^Ih^NBsvdC9kl= z6!Dn+G84PhwO-QfMN-Y1Rs~b$^%p+ac1^k&49EsiG?6P}nVcY)46<79mg6_GwZRz- zVK^px+lu?uJmVVlc+3E+VLiLFk}k25je{LLV@)80RMZp>)EzWa_>tjNOt%gTuOBMJ zQweFVzws9>_ExF~Un=@e{CGxHMU!&k_D>Npn1+9h%#VT>zWG(0;xIREL2%k|{Jv zOTmI;7v~?B_hyDwk*?4l5dD!<_k)9h72ShZ)3E1{lT;hQC}i{dXvk`;tFg!92K=hG z0(4Rh8d73YuN5V*pBzU!m*iP3`zV2r=efJ(v||3vj) zXNER+PYWn1=uWO)YQ8l)1eS`dx0}kojTN9^l6vOPkK6u9)eUuE`ctF!Vl!o?lEID@ zg?*@+Q&P#3pcubqGvNg1=$bAx zPt_idq$A5%{eBX@ivNpNLtVI*CiFoZ+&uFpzkL0>N|lSf918A$x#$aBI02f0|JO%$ z#tA>+9MgQKII4pn?jloZ{4dleP=`Z#%%F7*(R?!jYym;%LZb!}_k_PvkeT{2B!}%YH2GYnu zjTl$KcDF^fUc(>dR}^z27YoT6UU}l|NM+X4>Ty%x(E!T&n3FaS_LFBe8CkJ-6&{n` z)DkY!@@um}Q0KAx?u`eqbz6@BRbcw65 zGS6;`7kj})<}_$azwN_GYC3q!Bvmz;;}80m^;G8w>Y2I37p?3!b(A4fcR(h^`|J?8 zPCjF5Rg++C2jWB_k?D=jDL!?pTMi7KsL<14!KEkD;KDjRT1_x<1;(lT!5t zG$%1u^O{89*nEPrp0%HdaRP<7056mBVE$yemI#-_hP)lR*{!35MF$%cP6AscQ2=*_ zl#l=QZ{ytD8b-YBd`XO3C@ca3L?CbX)>Uk&IT`~#*E5ADd1y#Cey%neC|j-X>qBB# z*rPYVvXxUY34{9U9b3gwcCAwR_Tv&Aen)w z$P?1wb|&ou!qPg=wB=5wt`kv4kR+^f!345Y_==@klt3AW-doaevyMchF9wKu0deEu zGYp~fUAq9*!J2fSKK+?4WGuuEhT_fTYo$0m^6R-WM9uS$h->{h9POMw*iT^ahIQ1@ zxMYQ<#LxQ|B`CpHm|I5)WGAMT%;7(w_%1_nBQVJh3hK7l0ohe`#mUFVR_(Z=NRUW5 zRyEbtiOx=t67i!X`}B)ma`w`zu^3Xn19=UeM3Qt_`Th00+vQK zqo0!{);`4Rm}VgSiQXj@On5`OiHi6)+# zYnCe9&*l>pTy~rw#n-RcsL<=SlSX3)9D83fXh;vgM=NCGjov|^7F6`p1w-bpaN~3H zNMGrv>pJ1kt=Hdj1(2l$bfLk_wZr!WZudw`05kSYynz-BBF3A9JBp77=Rg_~>%&qq zHqGJSSH7orTT{Y_Ch0Qy0BXD4(gC*@I6ZjH04DEbJv~UnS*w2CyEOXCcO6GiN)9aW z=rb79RB;|prZ2lOSu)qHQ)G4t%&xHrEG_59v5jDoY@zOkz0Z~`24ehnhSPN>qh*$p z5|Kxrd*E-p9Szsdr3z4tTw&IG*}_UkdlNcWXdC7wB60m-*&Z-c1zvJ!m&uQYGCem_ z54k5TvKp_hT96p%8ZmA$X9D{eOE7&lyHw^%hA4jpBh4UpMz4_IhJQNt&~WnuLVw}{ zT1KqPya2PyJfOW4&_$5Nl~60{+G>n@i?f+5glZ13eQ$KfE_Dr$tmv%3D} z^_}_*hNA+$#M1?uWF|vG{QY>zVnFiQ0EWHC^<%I510cCT_HM#tOqq6Uqezir7k@?C zTL~v7X6Tm&-8r)bF;Drhx9v+f8g0T{BFpeTnJcC+61HX0Ikq**tM(~vPFT9Nxo;dx zorYlWeex--0t}>crs+XK5UEc;OaB7Gh?`=UDPa@;DI&kub53s0c*sE_-NzrwNU%Jp zAq7!*g%#EVmdScm5XYTvCj}IOOZIQ)Uj}%n^a2<>Lm?l;8(llS;Nhu_??s6viFr-* z*z^ynhV{M%vB}M}A10G8@mwAyI)hMz*kkeu3r4&kBiMI({Tiy|b0Z8L0^49mCN zP7Ffy9-x)Zediap(I0VoyK=Pgm6MAWGg;6Myd6wmbe-gsx}r_NNEKbGRX#G1y4z&V zO|~D@OG=d)mVi(?*jq+<=}R?vJ2IE^I0QBf+_#(K1-?{HVh-RyXgcfyGreO9t@^=8 zB9nSeCQDFQ3*0a$Y9ZFuuitT`h8yhfMT;oHBrT!=G+)1JfQbV7ZpYQunNdvZ#a*k&TV33bz=WIcPf54F~P zRPm_K%3{-iDIoE=yWVVFekR7=A(yp`Y}7`+xVlbc&7Bp(_1n**Z1hwZ{n9LuRtB3x zu`MBFd~DQ(4>Sjx+-CZ$iH75qVrZ+|b5u~)_FYlqb|+qPsC^AREC<(BmU3HZFBq2% z$%JT#*u%+9@^G}$sO_IWpBO0b*#rcH5#CHG>O>t#EwSw5ijdU7Vsp`gb;8x1xKerEt=n9ry2p#I8^U39n`%Vgy4F?dtk#(<8LYxSY0)hoOF~J4wgd|LxpD z^gE{-dkYsVB|e;iGR5-bv+Y8xO%p%~j)$l>!J>ykGD*b53m%o955+{&%tt7g*2d_m z?hTw8R|Jz|?7`>rhE*BuMvIn-j-81tu|BKnLuK+9Ex1i5k*er~RVYY&2Ikw12`d)% zR!f?Ndg7sNnwO)6=ATpr#T=CuILF#pSv?=P5^x6WLF!wC zj9Ty8`WtW&Lj{WCd5NK~73TUrvAG^QRzO$-s|-zV^I8Go{eUAWl};0e&3a3zGc0G;-{cGLEuLhoF;x??e)QCgUfbysK z6tE3$in{D^FtJFPvE_)@om5RQzL!4E(2RIRm91a=<5sr6;Z~%a7}ynpbRL$mF`)Kv zcXABxXnH~F4BWmckjX9r6SNB1Dc8Jh0MiygVU!j6p$FqeQRt2;nED-cUToQI=A{f< zjE=0QNmN7x&_zNPiq93coK4&lK|6`PZ*&m_$3S1~o^D$Gwp}Q5LHXW=&w0P$N{bQ3 z^kbZPivr%Ou*1o^{*Mb56=};kH$f!9h$y*|T4%IB>r`RWRwwYp%&k%B-tX=0L4G-v zwp;%u2PPgJY4O|JOy}|C-l)p36*-M*JEcnk=ws4qq!Rgt{&5DC?83^f3_Bp0JEDUf zgeBG%(~9c!QaT6+jSm*lWLp+7QJ$d}n8&Z4sf^Sb zi->4p)0!on71_>@=8^C-49*HKZr2s)$k2>k6UFAt$EEb+z9Fe^>w!Z&0VNg$bSr9LW`PqF{tHoACOl?QO^hJ%qTSfZFGV4 zcfuK&Fn=zc4pj<}evoKvt05oTw7+4F4oT=P89h7b@0$l=Wwn^nq;zpQMTqT`e8TvK%6lN!H%n$<)1mI4)u-Iy&XdG`@V8HyQrWb_T@+OEq(o*h zelFm@uRhbI0I*oX#(~9?WT+e{MCu)CdrxIh$=Eabe{XV?`2l3|=ozM!z_px-)R>Z? z{`ZBy4E!qoaz;fyd;A}cynw={cL)rpgSM@IZ&OXUa9nfj3EVAkTx0l;!uKN9LjQgB zIRh6Obyf_0Egp}SyvSL|N5eRrjW50+r4p}o|imZvg)G1pF*r^P0T`xkNQ(^ zTFuu5=*?En;zV?KSsvSzoR|*g?1#g5r&+)3s}VSRhL$>(sk%Q~UsUN{V>$i)oSXk1 zak}=ucQP4@RR|o&t^E-frwG+#=bsdZ$)!_g1WKupW#xaVCS43#94J=~N~CEdp&lo< z+B&gOuC&Z`aJ685$Y{~gGt^`owMTJl-r>hB8a5Cly5q9v34D#z<9*NGjbEqPsM>ov zi{iA1)c<`IQ zly#a)PWf06N^>-~VF)BvJ@7Rs%GSf${Ni zElL&^)Y;kDhVQ^?Eb0Ceh;it*%=P`zXgR$H;H5^Yv>VMf%^(@j6tcJi0l$W6MQ0~( zj?V@6zOxK3AVnPpj>BWbz4(E^B6dQ{!|V}Q{C2Nrk)sTG;OT=xBTOKYdNa8D;7&mC zBZ_clDX&OBT5R_wC876iTLqrQqFeT3dJ=X5H_E1>+q=N2MbTphbN;$$MqQ8tZ*+8( z#aezy3@5X=RJ&y5z68V}zKnxX0oGC>@FS5`{H=5}9ATI5c2g40BE6hX#5J`f~UPL{#=L>m+o z%tp59AfHUCA>Zoj4?!uoUML z@mSBc3$n38t9#I{y`-6j$ke9xmF_1dGKNz!=4A)E;Vq!XzXJGgdq8rVM1eaPW3f!i zfGksY2oU~YG@i=vwEg)JMyFY`^I*^7j5W|tFI)`lk&ULZxGiwGyl=>9)ye_@`OeN+ zZSOD;CC2p0>A1B~mDL;!1PmE3*XSGJIXgQyj5_wd|5!Lh8w4b)VrFJVRjPGH!az_T zJn+e4TV3AO*|e&~{75^a`>V89-shrihqUyy{3NXSEL0!jD=({PPtIZ_HQew6rN+>0 zm^9E}$`E-bQv@QTiL*P+=@B4l4U2y0In#@wwmAi09H=`*)aX4PSfr1R>~yEc!V=(; z4nq|Km424Tt~hR~IGC|3Y@I0W-dJda5WFk;1i(rP@^Q{Re_CDcl!S#90@Z&~Y+_e% za?OQ74|I#kAHsKvv>@xrX+$j2rx`N=uLLIGVs>L92g0f8CT=sG*%|Yz!`s+jo^$^2 z$jG7Sle2n@a|4KNc*#i}it8oa=Ia6Y;;OIpo|oEk*&SFTarzQMdB~Yv29TG{-mo7Z zTN>l**~f!@%4%|ahaY~RXP#SPPF4sNX~-huFV9N*?Zll-hIC#Ba@2=4T*waHe2_wh ztRVV2n4(+wIXVeu>+b80Mz{X0^_O1Ea0fR7x`^-v&gXo;$G8)(n!qIi>{LHsal5+c zyd)(i2GNtM#}E`0Tqsu^VO>4F214Gh0ZZc!U?9KoSp{;!uJAuTJ~YNF2NyV)0pO+G zS`(}ueIrJ!G2pKH0c0))!s`N&b&`NlLm3D(9WS6EloS^?ez`l@@d8d!e9F`-v9|nP z?XN$4NQS;C`Wyn8TXo;=ZMY2%z)ZWp-IE(iE5)CC>PSh!0LhFIz`&IcY|g%_CmZl6 z)@}z(##-D&15O3-e-QrZa58=QlXjU}pTc{~McP>TuO@)GdS_#|`*BNjzC^zH???{; zHYc`m1TwyWBa6N;`+P4c=$GmSe*1gRGtwQxU#)-QL1O&kS_K#_*VoY=b_`6ym%cYJ zyk7O#vNhhxfd;xp>2`UsthG8aRp-HcBM|i4NIyd0{ih#Dt_TeH8XS*9K+>PRLRT7p zKGeFa-$}6-?6`ArK*%~xsPCOz1@+nb3Oi1M9OC#4IV<@ANV7P1)SFof9vnqRZ!V)J zkNLWGwolo-l4}Cn@QzR@`$!FJl^%aLOk`j3jNRhVakV8S^d+pbaqP~!gwKqF^F8h3 z4eAR*PG?^J>-I^t`Ge=XcCJ>l0lvV7?pY!`wvQ!lP^?KI6Eh$P?@8Fed9>-FL7vb{ z?K{C%?E{9aNCcp0z`XWk`qe1o+LnUCgW(JFTHiV<+j&0=>}P<*;PtEH=iWf5u8Gkn zQHujVZJz~!i9G_LO2@geae`O#t`ewS*lua=XBewFoXOPR7_f<);N4&Ad7lBP7K=bo z4;_z3R=D50Zh_5xb|B zdNS=^6p^O>U;u0wOPQ?oTkUz{Qq7VL?wN1z4#pZgW5^E<$SX3xQq{DaUzfTyCijDD zWqOn|RfM3GIotSsr}oP%T|V0EWjT0~)1iCWew}s4KTo~771jdZfgv5R`l10S6~_|A zad4eCG#D9qKfuQN{i+}Oc)Pdv%@2L_3Fu}237Q4;GDEJ)#LbjyJ)uxYgnZ!@>XkC% z7O~E9e*k@j)oej1e$+$`Ki;&!YZXMo;Z!yqLtT5GMVu4h)#h6o;%=`Izt77bH89Erog6zlJbN}`&+cckP*C8Nx*aZp@JaC>u zHdTuGp|ZAzHI^j?lIWr+ZEF6E(`dDK;=5d4bW}GG9|HoK0a%E}LJyz3UaQwtztzm0 zZ7GF8tmA7%>^T+(OvWdPcvW*G&xfh{_8? zS2-n%N>U~`R@qIwyYcl1)v*FjnDEVDa0M$il@+qyU1hbj<7RB24tpUKN@6BORjtep zp(_&kg|>9Yiw9JogoN`NCnf_N^ihUXXSV)h9elxuV0v>={v{%31+o##^~6&sT1APn z+G?KV#IMJNw|-X8<~n4y=X5*Nwwsrwr#9W)-Ni#l4B=dTO_V_FW-^h!#O;1X3`aSD z*2N!nF%J__3ZrvKqQdI~vvsGRKb+2G(o9TcGGuIgLfw-IV6a}P%g3j7;z$Xu0uc2p zw3#&XY-m6lYX`}(e`EgRcwqd{Hu33|Mj;Z|8WLn+UN}K7iQS}}X-74I4ld+9hEW*M z^}~)4x|=<3pq|%r#YQC_;n?`H{99}DvwC_*JD<>`He2}W?vsQ?0Ey7tK&a2haFmrq zc7*tJ2g}r3(vuYd2mtQ?EcAA|o(zPhlOR2O+8(ZX?&83A7^)<(nXV1rVBbv7nL`pL zdu+Hz_1{Mj!G_t9JnpYPfWNXf2C;&b8^T{l@O=QgI)4gwGc8nCUFelHZ-mHs6UYL^ z{?zlTb3rhPb&xr6qd5eoSZY#IodC|Hcq&B@1Ml`AoPZcPHm8NoM|!!OCR}KwSZuln zsBtC(qtB1q>vvDOI`?(zg(__ogWF=+R^=KyCid_5-t@Xl+ht+Ao+8RcPL^FyD%$ln zx%e2ISaS9wz!Nz=OfYa~epGim@V>z+ay4$glp=kxHb8o?hKYf-Bxvj2{`A(_R-$ltleS;+9ua zm5?m1PHXauxfVO&Nv4cl2jgkT04G!NutoyjC`d3Xz1YH8EXqWo2JoG$TWzatspzl) zs0S$>?9vu4(`Vh$(~&G<)lvXCBGGKKc(9jVg1*Ij`Z6u}nVe+Vai*HCP>G}Sb>>|V zXjAE6{#*x?ea3S|(!@`~Q55n3sW$Ag=T$+6$9WRZ^j9Y`*ye(cG%##Nh}n!g#Yum##TZ+o~P&8m680zpa?@@jNiSG;TiK7;s z&uNLC)CXY;nlbm@GJUn+!m0!VSoe|}L*O^8`d~K$y=?tF1R|E6;&0t_R~75ZZH?Vz zb7;=~d!16zZA{}gahlyzS5;dw$7iG>mhLPiydD2~?ehZ80f!Zi8I(KWAZoFBEdvZ+ z1s{lTEZ3^MIANCKT)Jm!eEBD`*JTg?=Pb^Ulyc;Kwo4qb@#eCN4Qo90qHoo8Ub#q$ zv5~J6ISYj=GCEQ+e_3NcV?bbqrMG9bIHwKehmHN$-EN6c=JRnG3-2YQ!KG&@ z+jM%L^$%XJ?h!$*_C1cm@wJ8+%^BxW|KpV|m|_2iuKFqDsA-eF!Cz0O`whqQl)qW~ zwECv9OTTdKdB#K$Lr~I(@`c==EvMDUkV!<>O(=NC!S4Z_iv53>6Ak7P$m5$Pw-c2t zL4N~2Y;n291wS}kBOJ?)Sm!-!SbWqF+l{vRroA4&_l|?0h0KB>WuVJWAr5jWEi)j^ zB`i7zhH}M>85{FYJUt=+A<>Yi26WrL3w*qsF_nX#uL0-`n$0J#2_I2UcG-M*3}G_e zKILyGeBFNFsqda8rhc!mxZGg)#dmte%iZ}%K-MAlNreuPY2Fq<2cnU6Pe!+6k|;eX~O6+slinaXerky>96) z0F~5}_BpF=Rcf+QmTt1IMnr-DKx?LT}WYZF>`a3Z^1&>l;CO*8_&OA2?F zj5T#82}o>A!dpC-Ks!9=vVaJaUVjW2hhx1-NuWM1QT*)vqM2whDP~*fBZx!!pyS<9 z2Wuw=#-^jA)A9w-uMx{{+$X{Oblmd1A$WGrBo{qB6fhX3qWuKdncpXm>EG&L=>JHS18$Le-WituR@p_OnEi)A^T=VNT}nXIHn*qH=a%w5?k(Iy3}&b0Ffkcp9J0G9w*NFAQj+CKtv6gx+HKt?MCt>_8=T_dbU-`uU4~+rD5cPyXRP zS8`xjmqFeMM>Le`3L+xv$g`%ZI#z*lJVuOCLk?NrTv*918rS9bMO<($FThk?4+AWRKGfSqG^&-EV$8p|LMhMXK`l;&Um z_WUQUnbc)G5KJ0%m6EKLW@l86|AzSL*Q^th4&iL+!_G&X=mcDw*DJ}fGYU9BBP5$r`aCMo3X9LX-~e+{!3pV&_7UAv48~s`w=Ktj}jW#Il1p9W4JK*~Ja&pdr6S3A790i7PfX$OeCioI`cCio>RCZ;0Kb zK-%l>co^x!okHVxALJ}puG3{lfu7d~&+oKtk;Bo}xqs1a4^IMgTSc6%W*}fO%#rQL z3PdPL+3gaXRIk|S1np0G+%5$O7`Cfu=;`O$5|2arX46z-jrOK3XU@9Oo;rNE*|M+l zypo#R2=HU{2i~jXSbEK#OJMOlH^s&kJ!JyQZ7cV1UX;Ih!MoVGP;>lkQ4bOojHzO* zjECrSDD6$4>x7=FvI}`Nu5WWX2bF>IqhLbrwr;fE^3Mz;0|TGanFhll)r4nAc$#GU z<6K`X<`U$DHt0w}mTGgstMmNNci$I|)k%)0k=OuLCfNZsR~&l5&;^~^hZ;y2Yh2Yn z>FQD?UeN8l|D#E!!Ck^d8aOU&z0m_Y&hi@3UCFHun~7cYfJDl_B}v^&1`S6nGys)B zT*}Oj7UBS=C;|KwXt!`qCcc&fjo{+jyB9?AP7o>?CkU>FqkmgwQIzay`dlJ`U?AIq z%kv`3joy2Of$mb!a>xH61n6ygCye$mKVR9nyjE zu$ru((M?-3f$BIHBt7!SlgJX}F{-MJDKeI?P!5o?(&LmJwG@0X_3k{Kb8J8Z8w6j1 z)#vIJrm;uK64Y07jNJu4;cPVZxC?T0VH|{Y7E?Z#zn6Ff>l;eKtR4^twj|?bC>zau z4Deo?=t-R)08M`e$C-L50iU52**xh|tmvJvtBFYC01ZADy3<1bFERQjXSN>*zM3cx z^P$svK-*5L3U@1cb}bTP5|U7L4?XcSb`@#8QneJJ<;~ttaZ*=)cN7?VYYZ=k3Yybc zu70WDtkkPDMA2>A1UJFya9=}qsjY6v&}kbd%!oN**>8GES|l7Dk1G_jIL}&3xAA)$ ziJRTBYW2M}T(EW^Y)3^FgB8(ji1k~mN&h9{#)}jw*!r8F5?Y)T)~vdc{lE?`xs)u# z905;%uuc5CnSVgtUFSfBE1@x&$p1~bgBKNH_fbcB4A&@xTIX9rXTlQP5-)9b`~NnY@ttLWK>B8NKlK(tAgpMFTBRsFD|g1_Tsn$kMO4K)MvxlI zxh=&@(+Pn98_ATVp;EX5#QH>4Dve2d85ndGcc>Na%)f0gwf*$k_A-!=@ewxsat2v; zK>OkD?h7s4A!ORKCOronXe7GjKV->)1JY}MvmY(3lish2l zDlO#qnu~EXs(jNtAbbdugZn(A;zRkgmo^$vc4E>Hwiwy&rP$m;8wk!v9*y`5mgRDN zBYqtDJA9)N>Oy>sq;4L^RCO2caJOFrJfNU`YG|`7d}sCDJek8(7}}E+jw^9+B`BEs z@Ci#e)*h*)e+V=1U1K~);-e8n`IEkB)pj}P&(8WGuc_ZbDtGu=vlgK(V325KBK~_c zO|ErdaA9s{Yf|?ke+ZQ!j{O=lOSUG+CjzbB8n@t)99puef#%=R!DjUauj=XDuwU|; zu;*zGUuvHtmG-%QC!n3NO&2OpYIdVc?HXN}^e^c~Q7V;ce?e|*BqDGu zs-tEQI+)}o;v05li@``*wrrsx^sl0^I~}^7PE<>#u>B+D#o2?Qt`cpH$6#@@mbjj+ zu;z6~oZH(!cgch6fqv3mP=@k?KOFt}AWKqc4x_e*7lTSW36V+nl@zr|XBiw8i`GWJ zqH7D#x9FGuL)lpc#T9ki8V&C5?(Q1gB|va@3&9BlcWB%p1b26L zclY4#1b4kV=fCHk^LlGn^&>@Buf5iqbB=EeCF{^NUZ}KNkIyoKHgtNCfI()|PiFcp zz=|+=`Q(oTWWC(}fmK`}zlGR)+&zs&#-OpSG3RmkXB`NQKo0*3=#3l;~n3+E(* zhl2=~&e-$LJ(S(vJD@1sK0c-cl(bm`EePO-Lc2;|8GAD4pJ&B3!3y-kdlX;mFvl8a zF?e@c{v?#1jaT!ZeC|BW*UP^QQ26H<%4N8jyjolQJ{1U zd5JF^h%Oj;#9ND?rj|9P(@|$1G7(B}<&bwWzrs)rxfOT9WeZ$~+bgl=$9>=r*L;9y z;fkbzqEyk|vU*I$bh_3vvEt@*@N<7sNd@n|KWgM^#S=q3!}^II27vA`&^mk@w!+|p zvPj1ZZFKhxIZzu`!Tg#Z4?}#L@(*curs!q?l7dfF-kEx<*m1Y2+GpJ-ty|^FL?7CI zjH%}N0}md1)L)0|WfdygC%sEs!ZVe2d!IaNG*$uA6p zOJW_RvVoVuWh~pfaWB)QqTu6rzg;H-%b^IbR!8lr^_{K!ZK|x&`GMlZ$L3h057t;O zU&TZpG97B}>k-gL=WLZ;uI0^LMY?kbye5tz#G|DYVZTk!f_g{j{AqQ?h^chcS0SSR zoCEjW{8j8HpW0g1c;>zm=US!d;rJ%0IYmIU|DS?Mrz^wJ2E}1-_}_vHxk2&xto+!P zBVR&|@9urNSYdp;ot@;arz>AwRM1Hl-ipbZDa6f&e2C7nJN5XtS7pNJ#~zj5{7S^I zMT^8O`+uG8{uG&~Rge0QG(D^&bCdd75?ZFdiGThLjlZ8t?VC9v0q6Ayukhm*WLZbS z6ZKPw0ytp>Xf@YtXrynk# z?h8EA59&lnN|t^oyca!N$WKy>sbwL}zQbX^I1j2M|L%nh<%&xdQ{l(}eU}cuv=&^^ zI`vsFLq9DN0^sW=f`p{WdSvSrn@dvz)0jK?wl>#Uk60T>YZr~1$de>oxB~~`xjHGk zP>RH}>Ogx!vwAzXE%Mz>cUDJhf~YNTmi`RK+Xjt7dJs2wN@$&X2>qEHA0h-Cs>_bl zTStGJhD@GniMaI_oDQrQK+1gsrLQ8G44E)fNf%628^6m|_GHU~8BQ71%C!(b{wmf& z$V=)UEPsI|Y4IXYOUCu{iPJuFZ)DJuP>Eur~c(myI<1WfEM8R=C zwnP6bFto@4aQ@*mo~pg*TuI0w#(&M;nZH^G0MkyZM(h65_CN*p(+vM=a+!{@ax7(l zL5&GC?!90cJPyam57~%TV4NyHJ(71$e9509@1s(%no#@hY&Vl8w&oQ%gv+Q6dh zy}!HTD!qBU+P!`d2fGH^;)Ay|HKuU)TD!a^U9iMVk@7mXcMIm?BumHviZYgGjpn*| z;DiGdz-7q?!aaracz*>Mod2cVw1HpS7SJt}yAEaLo_ecofygZIcH&t~&<$-=sANcG z%Sa2+^mf3Kg3|ZYs;PiWHk=S2nCIn||MxP0GpHr{?J>8OnCnMzx-t)zb)@Bw=rj69=)d zxuH$NV8v^Qp4s$=jDR*Qw*+bz-Im&^wpTZd*3n!3N=w-CfPK-?-hAi^wehbFIHkiS zOW0V5r~0*b02T0*`Y-IVK^+gmsZdnH><8C&i3$PAm-f-cb8!bF(zE zQp?Z-+(3!*?stdOajvXaE!iYuwRPe1N@tv`?)FxSRoOz#8<*MEH;~htV>%!R*{PZh zx^_RcM*WEqc*u)7oRMi6!+uDx`(B8&jtVwkr-($T|H15qR}evsw5B*#(3q736bXME zzA7MK%o$;n9D|a(06XWzf#%pe=z{4JHZcNyo?-c6Ng}EdE)k;6%U*=|RZtHf$ablyVJwO&d2Az%bD#*aR+47A_>Af0Li=b?N@^YPRbIj zp<=Nz6zRJW9>4@KKFeQ41b8QC#{N8;!V%UG`qTZ6iYD{R$A>9{WUb zKi)McI7XBq&Aq)S<;Foo-ZUvakL(OK(e#%WTp12*0)n^pd+3nC58)uos3vpF#+OY5 z16GW!Nga1xYhWcJ{p%{(8U~>VaCJxGk)-K=Ra0ZEbw|LFAp`I%j2WD|K$V4GKF8OL zj^FJ{KRaweMB=?MY}o5+#$FIGCXD0PAO7n^*&k$f4{Os*zen}{;>^xnR+k#+r_lTi zdFgIt4T(9DGQqfcOUlm;{zOpT3o{XJbVf6kYBBOAUL>yk(8pA)N(ZA6=_{&3FmqD; z^;qP4>Rr6HyAJc`8&qDM2XUhLQX`+Larh#KSam;&coPnMeE+QJY2`YsYs`6yI;~KZ z1>T+mi3!X{BXAuUj_)l@P1o})Q}9olUN=!t_KZ*Xc#CPTjF<;AM6po2D<;WY>#_Bz zi=y?hWFKmBxmRS4y$WN9s@re#%Ke1EkS*&d>jE=wil^%&?WZgZ%SvCwEYRB0 zX<4RK>jF(kr7Bwr2MDy(B}+vt7P|Amq(6Jyb|=!Q8sW{RSY$!P3{OoLDUnzoD9(zg znkY$W?)pXSE&bMlj6P~IXmhoFe0h<#qX4c6GqPGr;L^YE$D1;zmO_2YIw63~VKr}D z7>u}}lw#|R3>~qRYlnz(Py(MGp*u90dC)ZO+d(vUCjzdvBE}T_kL?c)VfUZ*gGHbY z7eXmFPKNw7KA}`9ii&~s<P$zNv8ULlX9!y9SLiEYnw!4wt7PI^kM16@nvS#hPlx6gssdw z`&n(*&bET@9{buh9&bKUB4JlhAIIGlNf zVB?*k7!nro5x2i93!DtgLhDcNUARP%4^wK*U_%Z&d)M0+6b$eovqqC(VR6e>nIY1)a%%> zCuJg(UMZ#0a`eXz6 zFQ?)!d;NB=Sm!NrQK^cQxM`EBbWZF`?_JZMn)U&|oMF%eEwdGt+%3doffPEF;b7(M zPVAOdRMAMYMZRa)^Q<#>wC5KIiFZo}E@P@Li_d^2qXw$z-0Q_3OeuF;y#)MK#|J{n zsEwmC0jfxAaezm_>E3WyKd2%t3gl}Qm%u$jyCrjY9O!4zYa(~Vcs8^FYIE)VBYF|C zzO7P1x5G><@+bFw&}HhIeQ%io4Q0@p#8G)=FFUQyK}(7~=zEnf{j5Ei*)t5fgGoq) zlI!CG>Vu@r-a#tp*$wa3Ju?_6$?Z21g~JG?mp6`BRuMC(*X9*+D@HyMKHfC@=LH2x zG`BP+>haV#cCDNpKRWtuW&It3zPbqQZJ-`8F9m=x5C}B5VB_2$#AqGcXJr%Fy4WFY z3XboV{C6^=PF7)$diUi*AlHrL29AKD60q%p)2y6CgDQRJeu*;)0jje0Om?QFK!B6c zTC_Li=lIRcn8#t1`DQ`?E$xTx9UspAi6(w#ezeAPxi) zeWd>6uHvnCd)}W}GY(!bBGDla3)e0GW&f_0~53 z;I%igP1H=m$k3m_5nR4_gBz-5e`ndHN_3$M*LYqU@6O}hPH&_zCTZ;dK+uf4iL9USF$1zsm{HabcHCI<%hSHb=(}5K_ws8p*Nq zvk8)tWVyO=_%)?)L*jcq1&w>5^6)t49>IBC&wlA?@6tmE4@0?G6-aDf1QCK!((I9@ zFqmN`o2zk!$nJ+GmWjPi+d!7cr!ePOO#GguOzfhA&=f?`yy8_RKkBbt={7q1Q!~3h zah*5S198fzQ!n#OhKbSkbrVM zD#ZR{jp*W`DZ@Y(C+*0hBHNtd&Gf(o*U(Yg4iAi$sYEaZ>$2+komlP(Ayy-2n23bqVh$}x+xBB;g`PHmG;(2KsEV?1UM{u z^GqE!LQG~0 zPah9FcR(*WF#e8#YMauNRv}s)L zZfyAxH!-Ge@#gtRjPF&CbW$As<$reNi%-c|!V zy8eO~9NLG&7EYARrwvXf)<*e7D1jn2f@80LJ2yKfCgyv9-9F*z-QQIl!BF%UuI{UG z5E{jv82u1ey-7AS>JT9zi#6$5rG8tazZkAdki;Somq~!n{oP7%8Suj0Gr=8_2Nc6C z-!;ebJ&{Gb@i;7oYw+fl-^W-m*lG$6%PTfgWa$0?~|oFt17SM2a@ zlCjJJMe+Wxd8hOv(4_6UfV3~9j&N2oQx=NT7wi1LP=O~1J}j!$^D7O7w|X^R9%}@R zb*4kee3siBBbyk(&SXx(Y%zW;9aNb?uV`IxXb`CIiimwt{Q`r`W`Q`@ZF_9Q2|>>q zJNmn9ZV=FDKLorH9#6d{W*Y+g;Am>Z>a$4SRlK&MAaWh0SZR zbWSmw*OziPEn(R2UIeYPe>nC2n2}4YIje(6-}$-_!K@y)6y!XjCGxlX*pH%9VMO=_ z7@x{xh&=l^`5DveMeU5;12Rx{Z2)IpaXfzmWSxP=G}gBL>fpB1eJMN?Il$aT@aQ^DNG0ny|a>kGEz#O{5=zv9Zez^ zngFH}vo#m3e+Mk(&{+6ru`o4kZWIKDHA0GjNlSW)dTEl6-r8#IZMs_S`Tpu&gsW!G z>5%X29Gc~zdBdnn0GHzyhl)l{UV$Z*yEOubkuDMU!Xu}DjnYGeu6tT{DE_Ck1hn>0 z#19QV7Ene`Epqsu?11#$;D!;hlEt&9)aSQ+s71K1Ne1-!gJ8-AL7wsXFi6$p7ic>slv@(h zCq1PzpwBJqA=GC|K6>3MVLK#nW!*d;ldS3EhDpdxb>&}3T9@#ST)|MFrKZI{kQdUI9 z+Kc5osvaU^0w>GKm!5rBg@fRdb_(wY?p3@Y3qhvp(HL-}al2z`gflG_o`*;F-M6ok z?MiNn(F;#rzkFXz_kZf8$?F*8eV)QP%^&J87I0yqa=6`zU>UWHzd@7Y-@6FLmQ|7) z9GH;Q+JS8QaseS~ntc=3jlu+?Z%M?WW2Rf|i2n)ciH^K0(W}kzz^!rZCjftZExU6@ zD!>#aGy0jquEZ}FeIONYY=U*e)Keh>9!7YjqNbo&pWk7zq)v zzL~O*jOInc@Yt}*kD`qJYnT|M9>BIkz)@JoD<-(lIn=!>CzOG(Mb{2XQ>m0wv9MBS zfS!S>x3bVrK~-&sqeq141LRz#GjuzphlcNMO_hA`-4!x?%|QFrKnr<5kC&VxU)#nn zJIQkf-eE#l!GT(m?EZw#`iW8*%4UE_ZcJuN<`g7vBxHTJyF)|@mcI)6%;X}aJS0kn zy06n502UK2Pn`dFN`r(guW_3_brU(gR5u;wJZOlqUrCsZt%RI8`|S#_Ezth_L_ulL zMI3o|dJk9jr!9WzD5_9Bm?}4A0eg36r^fm4|6*e-3rsZFQsKHxr^>k)kkaj6q&QZa zPq8c4!bRA6Z04IA!8kjR7Dc0vwhd$u-#0u7EUN@X{Hh4(1y5Q0D4m6%=k6=vee!+p zH+%Ne;%GJ3P`IG+Ty@-|6jmqbyU}Py>MzarICPE*)nV<<)>>3`9?{pfPFkrBRdiN0 z?tt3K?rxq(l%F|##)F?LB4V!s_p6ovM9l)QHx>*CgNsd|uWAq_y<6eSpkx#s+HcKN z9vqiu05>>%vW8Dg(^exR!45)2gB2NX6cYgdX$FH?aG&)t&g(v_Mo=8g zAVn?;F8bxoKlif6yD-f8kv2X|3S11M7T2en1YyX1yuYzojN@AWkhGytb-!ECED#{} zrjzBXXjM|_d_G}7#OEBYJf$*{ z@C#fNgZfeq&i-8nQYO>)-F+l1pR3ZJ%jGV2EQK1sTil1T)mep8B;;l9!+k$Vld@_C z0K1s=_hHYhavRdHw}v$gk1X8o&$RzyI<2-iTaLg;ZbiN20IO8|KGlP=+Kk@N_l*mi zQHyka7T3dI4x#1ua9X~EwI99=s5ypV)3-QrJNuKM(-Z7^FuxXq4bId|4wDNpoVG5U zb-PD~vhH}WzIAy;X+hJcwQJ5m8|92!JB@~-|Ft-?Ib*l+9q_kkxw)}{`)AUk*xrG5 z`_CHvQ8F?v|JY5R7scz_X>$eVK6YEb{8iwanJa5){i-iF?xX-y0!$DDVZz!*4D)3H z8?Gzh%gG@SGfl*~k`zRLB8;1F6Ak=SO9${#?om*)a*4D#Tr5ptVPRik08f`BJ9m7C z+eKY_*)giR-V@TXZ=`JOm^_1cj9_);=(7&n%W_yEVtW;w^W&}Rdh)`d{l#V4n&2@G z`}{E?KQ+HS!!i|@9+!Y;oVy<>Q+`fUv!(lg4ClXXT<|}yooy@y$j%JNV zKRhf0w>Zsz1gNKyU!@8w%?p2wT!=>yHOg+uP z18ir(UpHWE@qgbTfb0M+6Jg^u>fI^_%_yZfa*+S?MB8+i_)%P+qu!E+aXtAD3na$J z=05`&vfY3j!rY8i3YCJ^8SrQP)f!cUlx*D3+UR?^C2Cq1?Z>qzsMGiz58wuk%9S+v z;w?x%d#K%~VJl)v{V)yUk=Nvs5;OI-A&sT*p5Zq_;+b1O zAR%kKUtANJ^;b^R6DV7OAT0D>+sR)$Jbs)3Zh+S>v1!990BAVJG-t0c(lxSpSu%vYqY|pQ!21u zv9VySaMxN99zzk^4J~bL@+S7gb%_1~x?On5vJtO zzW3$WgIGEfxEoZO14}^P4NpZCTpBNDVSLU)O)YUo=kv|2+})n_5?F~KZ{t3$mMnn& zem-pcLFoI!Z=!T2{n~&a6A2qCf3N4jqyvAxUxqO1FC$U@3tM9n-4k zN=yba?+y|WK#DB%d_;lw`8#0fYPD3Ek-Caca}A}i;dP_v1?Xc3J!Rx6T1#h$YytKn z8yY>>OBTSJK2d~MXx9x|MFF;gOclXLuG|1e6^Y9x)|svyD)JQw(KdtPgb>^_%pE`z zjRKZ#uumGk*_PRX30ela%| z)CQQ&#-F7@?LwVp=3l;FW`5O#ilcqi`sZB}pOq%nx%-*-$4Meoq*gTAw#=jL3Hz{% zClxO*F4S}}&3$0p0j-NTr6aF@Pq`TRI{++rZTmR&l3Mxe?wUKCx77`#}^0^3*JG0TZ$@M1M!lT&=$=0_TN?3q##3NwvDw zp=w{{x9y+xe8?P_FR^ia{bLRV^_)F&s(xvZmJ(!M(Nxm9=L1YBEffr6xNIKC9}RfF zEz*ZIZIm)~78JxM{oo|41C#z|b)~UNSmw~H-DIa9`!VArI&uB{_&1%^7=qh&#Vpla z&|;Nv?rgKm&+3{Rapsr8X2+jzbYNyxJ_xYbrBYFyE z%9yj{R{k>e8>|lSG!KLf4*qewd^)akvl;0L0L-@#zS2PT241m>$CNT-$Jd#E$BR?R z7~Hzc8=ZWMh}q{kan!T&I8g89vEFw3ZK&Q}$tkN9z?HbZWD3&ZXgrl)63@|ZqB-t8 zj9&@(z7X25>o`~FG>G_UMjta3D5i0<=U%s6bTart>ajLpzE7Ua%f7uG54$r>T{XwR z!N+zb6V@Uk_Jd|`{~Q6vsObrODq1xzX<%RiJ+cy_Y77hhhD|xO3gdB~VKZ1s z2d>A*h0ZmbA0KHL9*XZ0xE>Q}c&QxvJ;ka+RC+Wj_2@{%I)|UYZUz(>Qh~iA7r0xG z*pV1Azgk^`SAH#kf2^U&p06?#eliq4d%7y3LiNWkyy`jhZ5_J`?vCLSZufF@M4fK1 z(Zn0fL>sd@7MneOeEbQAIsOq(5}IN1x%2107;(;T^RPlfLcC6UuvS+Rm!cS* zV9SLR-ekptJ@|Y%up-uPG+U0lgGdyQeK^b2#(jGwKfh#l0j+T-5m|k7%AJNQihi4% z0c5!TsdT=)9~WV^BNRyjZgwTo29Qk6I3ANJVh}E8(tX1fFxbwwHevgV_<2di2p!t_eanjb3zDm@rU=Y>4c!kc72^5~s-g zvc8j41Wly2Y6x({qd9N*Px9cjZ)74us5#QS1DmpWbMjBLXYvK}kw4z@lvV}m@?+c- z0ApMHvmW;c?nCb`Yne1%;^;*bw65J2;4gW(uNypb7!~09!~Jh(+s}&^(~{(jxw=HC z0z0Uot+%TohwE=p)IWT%sjfYYCksy>-A@%2&TbJme8@CQZ41=403R56Zat|cw3jPj z%9H)zIkgoI=F}Rq7V4S9ruDgm`}*)&t|V=+y&A^x=igmhTJ3%u`}RbfsT9QSbBYI6 zmld{o+AN~FPz^LlMdw?PkxCnj>+aK6aNPM8&RPVK^E$^+$LFuO$c%PI-&bddETN;3 ziCX*fSlwm{Prpg%+65(@eStYEMKo!Gtx84PfnZJaWKM}uW6fXpToA$2m~8RB%r}u z?`Ryo%Vpyi+|UT?>UA^0a|Gl};q%xrcHQY+?oeiG%$4}-Ev}3YbACAiRQL2W&xCWS z@I&ftu2eC0zZf)`@p?3LtvD-8ZL&-?zwq-R7cB7iAn4>_%x~o{s>v*LJCY9d{_BX3 zAMO}!_Xh8O-j>i!xNUViUWi_-qG{RZeFGZBDG^nKfk2q2R5ZoY>UfrLm!98&)+#Zu zpD1Li3uWmM>epMv39_6f>;zz9=nZpYdC;O3E6Q3~PqnDX?HswQeq+eH`DqWEc=h`+ z(`epShyd}duLXZo*X!^P!@ZDZy0liBu1;p?Cpa&fSe^X=9K*y+WG|X)gTHGn7(nTp zNJ@J6+pDOMI`wTs;;jyQ`=%?ndfYDyEM$#go?S1#`abOa!anFjP!>$am|YPvx?i5C z=CI%X>vs}HLYDHF$uX~tBA!=|81_<1wrlVmdqeTH5^>N*n|fgzHod=}=p2%HzZ?Cw z@oG*&m&Do=Mu=;BE!pMi)1a%K7j(6q?f$vSDZx}MmuosU@2njPTnXI#bZrne51|68 z;}=_pUL1DeT40zQTU?XJV7&6A*aW2pi+fcdi5en;pkGv80bzZol}zFVuUYoUzSkWc z2!0<$;)*mrdy;>c3@Lt}w#Yu8W)YW|c{p6$*bE^H;i8l64WQ)QO5&GM43JQWB$7Xw zD{;|fbO8ftW4gTr$eB}V((~KR!9l?^#1xS)Xa0t0 zH&dLZtqnvegacc5=bXoO$+%?YbN$~!OPcrSC{XfvB2bd6sjt63Ww2&3#SBa8$2lxr zg6~QX)*aIxA!x*#=V>5{yzYA!PVm-e#dWKEkW85jhS>ZODn%c~RDh5PW#o?;v8gm# zH|oi_S2#(Q-qkedabsZ4`AN*u+(tz*eFk_$J?}%-lpFH7|Fw@63Yd@%sA;y>1`I%1 z(a@^c+S#sA?=z74w&4~~Da+kv>0=;yTb(%2)mOgvGb}inZV!JOtlU5f6HUcFy%HNB ze7tM>J$Rew>Jh`%{PE@JpTID>_NAFj+r)voI^v=9A# zt6)JrCrh1bHtC*e=222v`TVW0?78eB9e?J5^VmvFVbS8PTD{VJOQ5BF0*(D(1bcE@ zSe0Wh-$p2jQ~VQi>d#I>Q%w2bnp!!+C;|cb&PS)zbPqXiZ4;S%o*{1)+8wLD5aIoN z>d$)t2ufT*;v^(Ns490c=k$m5b|wb5H`GKP_g?x^h*``LDeYEjebO(p562u|GJVe> zpH5CK$M@vKHyx`83KR)N_gE)IBoX6`qDD9@dQxdU-k%OLaMUK_GCPw}oiVXd6}LeY z!-&*N%9gBg0WBW>Z=Th%gH3 z4^p*qAlH4BY!TM9J6|Jz@T3{=8*n=x=w-l7Fl8JS(`8FGEf35Iu$YZ>6=AZ&N$__O zcPElT$JSiM(iHkaCHF*1G)7=WI67{fG@~|ip8>%NB{@jlzrqt z2)TRH&2?LV)*&%!h{8Awem?5nQzRZox+evFw{lF2El|-ThxOC1{ba{9tJ&aQB{`yf zas8gMr;#Glkaj+$nQ```%xq}OpjMcnuZY))W`(V|$gINPfKaaZtNynR&H?Y0rVO?V z`Q^J4)xm`9-?9m(c)J4D+~gBO-cg=y#Y=5B^i`|onY{E>NO^bkxc)L>6tEPZR9)_8 z*r}q!QM6@#?~P?~5k+2w`gG(uvW(rD8`Ow0$hID_(QM3Y)GWP3!>ktn^dgYuuWCB; z?ki4=97|oLC;Ub^NE9kR`AkCjmPbR_RA1`mb3{4v`ytwFlIT?4pyo<4-sRl8n zq$l@T>b;*&=F1s5Qu5`7$i@g$8j>g`FVn`|S81cqeeNvXyPMlTJ4Jnr84jnx@Ts8p z&-g8A4rjBK?fvz)rQ$?@+61wV#az7af3nq_#zDpxj66R8&RywE9)N?Ns&^?~F1}S)4=xW!wCeBy+ z{&K&Xj1{+8x1#oZCyT)D+Df^n+1**Z%_nx6o3J$>|3&O`*nXDC?btttnE%O6krrod z@Jx_JS)sE<7C9{hM@=SAxgV|OlY5*AdpK`YyOwUCLbo~b!XQwG|J{zwqODg!+nU6( zqJ>ocwR??*z--f8ok#AcMLI6Rx&ul9v?&}g z#Ws0LV&n`u*{2Fa32)ExJf)I(?FL_$S8SM^P-&F#u9ChpL+N0tD_$(uWBnn3ZgRB zSi(__c3X9SYAts}hkBC- zRPpaV_Lgm|AGwl*-i<%oUOkVTTF%q?ubp^&onW1Is?!dKb70hyMVx4~h$b(Zv<7dj zD5V$Wt_3=5HIvidrp4UP7eCFu-|9VN=BRQH`^lab?1yogJ?GK96lB@YOYNyYY>n2M z{H%QLW)Te%pb{l)Xz^}j)Qh7ZLy|QeA<%92uc}J494b#dJQtR1HAs5dv7CbT3AoFt zC>p(zR!>lpYK#v=I#}dgy1d47ns|~Pt^53&KXgb@m*qjQO@?1zOUc0V)g^8INnvc5 zjpZ0l68OsYQmBr zjKglCH4%%)p(n8E{}yy)+(^72AsX_+=jaP0av%5CxfF%QnxKQ&YoBeb30UI$wIs|Z zcbIxWP#_8uajbz3WOIlb0mlpvfPsH4jVeVDo1!Nry|5C-G^33qU0M6&4jVR%z~F8& zu;4n76w}sT&=A!G_RUKVQYBS4FjjOXAk8w{X{^xE*W*d(=1o+6@PXbThe!q)^I|QH z_iM4WmGo%Hwa_#OTwYMaCqcsz``6&PXGc{>WxbFJMXt~u-!63toN53(917;}On@MF z`)g$}!vo|+UNrw`)d^#CA8@Mv&WYgUAJQX$x^ec9Y&Y?Ys>5kO(#$w4hmB zrM4XNko~(v)%52;K>P2rry|(d{nJ`6drF#?1k3#o6AZMxVcTukH2o?R7YG{?mj~5J zs^hxC;d8h?B26y6#WieZ61P_6Rmlz^wa)$HW_0QO60Y5HW=XQ>OJAwZvA#@g5q?sO z@3H9|fkTM7N&Aot@tS)N`G+yQUu9k@u=06Lk`tR{5Sdf`yC3jccn867>GOvkRO^1`>2sn(4NI@>kt~%S-`#PU$WtQnU0;P96q6l~<`J_UFSH+AOfA_0gCy7&C zk zxrt*p-nGF|UEoY&M2}Up*5~J#o1)kqe~8-X`qXl@$ZV0&p1|P`3LPNAYmmtH@mA4E z>4jo{Wb$bw3ZvDR#rJ$X-agBgIx4yxzwu9whFaTpa8|x+l-#vg%C7fP-k0B(cO+6d zF4eD9RUdB0DNLUGSt!g7vy~oCh=!!Ln*(Te5^20E9Xz_x?GCM}8S5-vzCPMQKc-GE zKIk8i^mm%$JGe-vqL|dKrvCEl9(*Ly580^tWr z(;QCgPH;Ul|FHHO854^8lbSeQAX*x9 zP667bWFT&nzs3WEf^F68&0ZMDYyA2NyvKg-P4@tQ-k#PJ)P*NYNL}@S#Hd+*hM}5V z!wpX|*b1GRvWex+_NfV~LcktdUQ8DEDBfaS4bBuocA@o$%^~~fVu3skjR8wfW1seT zQ`FPD?deQUtbnKYf{frRZHY_oh{bhdoEp3D83r@C^OIMJis9C)c67C@_v0sw z*4)B*FJ8@o-yOycXYLQ?LsXNMdgoj^Jx3gAR#ms z#o?8*l`SOjzLSAVA&(oyrhaZiC!ST+klMxvWdLoOXAns?iX3E13 ze6A5iik*#jbR-jav-YP9)s<7LLK7 z@FY3oFWu_BJ{-Q&bqrO;6|gkM(#wX#3K+$z$7)n4WU4A|*rj#6+PCyVoeo-j{-Kkj zsTINPSpVMCF+1On(d+7yAfS45M)=6Hy&L5;G>4GCPuE~J==*9pvUYrSPl$1ZpTarb z{xHfEg34d}B|I{{S;N4)JT=|Tf+$Vx%lTt!`D(4SfzJ`AC0A1^U08g)-c0#PqbhNG zL5_0;iz6BLPAc#7-I8#AkK@N-c5>ato7Yjt-2u*lx$iG+>*5+OK2N{8i5edzJsUkz z{(X5RgFV1QD`ECt@8iOwv!4tZxJF^-YE2p&M8fq)=;E3ogWFz97Xw$0&~cP3VTecr9!v+R-Fi6qDV;%v}5$(V|}13~Ez$ zup&;&ag8VbMS=tA8^y3PEEnvYOeUIvk7mv&atp|ldDr@s{@dL3OCJ?dAnc*rXg>?Q z#7gSbPf-o*6^o@`qVZR@=e+b<-lRXjPPWO#>$(jS;X+e7VQ?d+BcBWF-2YWbigd4| z^u7;81~(7O^OJvbU4IISNZNCYvnc|Kg9e;XlPVO zb{m)0@gr8{x*8~6n34XIZ*l0-kKA=05bu9tRThudex*8~oW$a9x~|$zo{?r`u97Be zaRv53)Dan6OVXLcaK|c-Yp_EOYz0I!FM{&ud7=`60+D${Yq~glYG7K8QA?CnN?kvO z`I4>GMD5IDeS5b&TfR|afW2hc)!M&2Z-wbeW5k!@Ej4RGyEVLdHuCw`JrrHI0NFTr zK=2~h^eyyix|Uc!CVc$?%6JM_SJB9$L6o$CSY zl3z&wK8dHbDUEDB@CP(i+AEJac@2u!GOYCrm_cWo!u&1{W!%h1hiZAGm5?{OpV92{uOe;Wcy)5B~a;5 z-Ww8?70qvL+-Z}ZK>R+y;aG1r!f)-W!?Ihd+|wWaI~}C~?!wF5pxl7f(ez}jSCR#n z{ayRXXjD&JS&t@0@S8~)^q(-KHUjk%`w-eAh7?0bsqW=rSJNb3@pnTiY7m`U#SWPM#PHrB(+Aaa6(DWa8$ zlu4dVRv*~;g3yC&pmt*&2HDrzV|U0U&oFneMJ0?;ooI0!mbUj7@DbdnKgDX>H6Zq5 zG5hSn`3J2m+YZdQ?rG{hHIHF4th&B_Iv&zuQvQoko*w6uWv#h31~C-Ltz@#39c>9Q z+bXf_e4kK5l@L{Yg2I19>dAX%2`uw7q+FMH{tfSk^)F{EGHTT7Pm|K@l}zIo^ws!+ z94abe=OXve92&YotUKCPOqUPeVqjhv5p3&xruH2Yv2rjNn?%NfD2H~JPl+na0F_c zn_BokLO$xW?XvyZ3U40P-gNooa(A+TVR3|u%Zg!wwA2E4Q3;45cO0~t48U6^V_OR{ z!){5z3>wimxb|;UWlverN0+Vvhfl#GvK%3mS|z!9tD<9g%F~@MrH$;ltLXl|*G^oK z>@`*(;RC0JbVPqVvxgczH}4SK^xx)2+Y$9?2`AfX&KdIPO1yRjA;aJJ@x1&4w9(wS z12exo@kg*M3GvIV$YN^9bwAve_^#q0G^IH<^qmd@Z}w9*XU(n2+(ENCtY{TD( zD8gU6u06hP zzt(n?X)IfTIjV7cIr~!(0-wbYVsip?W$9o(=FnPCyQ8y7R(9w2Pzn-QknMVuG9Q}N znyrG}KRuUL$q2piAOH^WZ2UCs^xLz8hC07IYpdX0iGB@dAaPqZF-`-!E|$P$5yNj8 zvfduWIeu6k%z~FB+zuEHBB94IVS&cqodEO!0%pddHO^~G*ms@h?Q$Mza0c@~>X9(& zBU1pkG2Lw4@5Vlw{y%oTv+XxKC++`yIzmpqkR+AFuzgIQuj3EstJMy(co@BYJk2d;XD!Du>h1M| z^T}kla~nn+o-rzUijoLNeVlo@P_;7JtUF<@BdV?m3y2dC4QJ@v&}M@C zKka>IP?XKK?x!MIqU0bsibN3@g2Z9SAUO)cAW;|)BqJFlrvb^5v!o&CI1EX0&M*oB z0+Q2tJ>S{)?o)O4t^4EtxmA1qbk$V7t5>gnyWj4$p0yruWh~BUI;VrtWbz%=iDnjs zU!oKuAB23eH$UJkyI>L4LRy)!M!V~=ba*srqIzB3+*`KYR~Lt|b=o{uM>^W`?P!Z; zbF)B#PACBAfKqa);h@cNF)BLg0xwgrWP(w^W4xZxWXkr5XRc<{PhngC(9LRshy6{k z=Yj78$-|kAi=OzNn}=eX!Sjn4EJSc z<4)%~g!0%#A<0H4S*Ox`5g~6uLu$j)P!};cXAm11xIoJ9uiwd?xMmxjCZ^Ex2aIvW zx;I&zAj^VLk&U_BGWn&0;n5pxi}uxmk-QYI^+De52cTe^=C|9VPY>e7F{vVqn9LtI z<+@_Q3H1ngSF8uJAqi}vJ4vrAY*pL@eo!em?$xzh+o2HX~gm4JX-6oi9Em{k6>^`>uk^Wt;Qc; zDIg7NjH`#urX%FzYk2D0Cb?$^?R+0v3nY8bZoL249VK|$~27mKAS+GvZ!+~o{$(FX5aotp|;(j z*Zh2J48nrMiCI=3%JG-%+LA@S9Oq1M5q#oub{+ntmE-t^l-4UVPu;7hvVn|1Qc0`A zZ%H&+)T_brMr8QFdmuvzsdaJ+QE=0qLngFz8?0*%vtEp|TlwiWS-zwu)3mWXAmdpt z>fdc2FfzM+)+#oc(0^yLvOIEzEU`5Zy|S8TW=FMCZ@rIi!(bF{Gij8amueOIZtJkG z%n+mC(TTmtrI((J4(KRqs#HHo;!_1SYbNH2Z+a7D_ZL)CvXtMN~@QI7^g1PbZzL#V8#%Z#J zh1`w62~%zmB({6>Y7F`2y2DQ?bQ*lVHRJL%9PhYOtntep@zM4~Fbh{aK{D6yQ8V4e z5=Wo=&mHTVEB9pZ6Vx5;Bb{s2Gz9V+cxgXfP1$dhLze53)d#xDA_%%cFmikzDC5l( zuE5|$eSZN}S_!QXR@Pftg(uVL>YxS(`AfAWSj!jE6A!#uuq>hg#8QIDhv^J4V>(=A z+Bj=~i%h}11k3w*O4A0~lO)w3vco=tL zz(?pC&KxCJr~$SLMT6Po73Q{{;gy^5=nYUjWHoj@XHY8+dk0i*bpCl z>3h~ASiRvMeT;AUL@OOl6Cqm7Nj5DP3R^T24x=Xm6dF}=`4s|6C+LUKG6zH-es^EOFF8YsnKzJ6}Ga5 zic1-AX^+0@ra19DlNj7Z0B)W-tBq{afik@KnFkL71--}P9GZvS_<+L$Wf?z*H5>n= zeNeNw+d>6yX9>HaHdQv?uUfiKf4%@g+R3T?>tElJfFTr;7Kzg3Rx4Shtb0rQzP;rd zg*kbyLm6c1cfa_N1}|m~yo)k$L+N-q?|kP(GlYu^e_r{>7v;$qS};PkO|2i+`7BI} z+K|RLaE7xrxNR@KkTo?L3kNQr!kJ^L4m*R&^*~k{zQyCI*5@yv(_hd=iZ|WyHzT17 zRAN?v_$jrf`%SYLu-4mIC&+O|T9s3?tFLdPkZ4KC5Y_C{l?z2ez%z3{%acNd!h)bY z9TrEs2CL7Fyis@tr|7$5nlaTL#i*B*u&>JUc$1G$-K?am-mFMap1KJqKbhu<86{nR z;v-g_l=72VejoD<{Xw#rJ9 zGc#Ffgxeegh-Z!h_i{p)<2wUy*1eMzOtWT;>AZS%l6^V^RFPjNNo zP4SkR@pBto)F+wsnY&qpi*yZ#bW9a#?YA7OnH765T9MUuP%=IQj$n|S5XKL@&jcKS zy!U`!TE4~F?#5JN!Jvkxv5Wwb+;I}hk?3tRkS;u;ELf5nFRaLHJjU3$h_FJD_nTmz z+#{7Dg2ad)g^{LwcE+{*YtX#jV4WkLbxD&}+7Lqx8kZH-_`v2RZBIbtU*B6M1(N+I zIVup`72dd9evmA7dKS`mDnD~JJ55NKd+s>|+~-O3xRE3k#LFkPyw{nzgy~@wJ_!`f z*)-COfsy$7d#yM1=2_+GCSECc#;mJg{RqdS)IS8li~t_23@3c?6^-wqHD9hPoVtB} zR`t`u4HMNqhKys&6Ynr(;y7-2MBoyAp@<3U5Nnv|^IWj3yfe7 zxpF9!1Q?y=n9kpq!wZ#V@>}?|M7p4Ux*-N}4(+R5A$y}kPTGoOF_NaXi!XW(=+};% z*{_@Z^pF>C5+*N4`}e!GCU@n%D%6&k_hYDl9A1V8UpH%{|7VP0mwpNc zhuUyv%VAe?j1`4~Cn7|J_yF3@1+&xMNOpYiHIRSjHu5?+RmOeQvGOAM`FfE>w_Chx z+u=Jk!JVWR*JPOC{pp;@y)2=Ou)}=cJ=ez&Opd7u8>2s<~w5A4(Fyh;f&V#LucfRmSh5EQxG~2_H&iidS%9|%=en~U* zQYL_}KSq{uufm!fNg-vQl(pQ>_xAHB+A)dy;7Yem_)N5S5v?oBqI&CHga6}w^`di> z6#vW#^5M}wJ59b<;l}11y=vgL-dN`cw&;%SBNWzP=)^XX@Hpi2R9EzI;BG~;0+O5A zni8JtwRK5jm`3rVZv4x|&G_y3w!r%xW?vhixFTcdOlxnE^zPtjd*p8r+KOE_jQJ7+ zI;dGGnTK>%sA=(BOU&eZL2H@51XeGP7MmNK@zr*vXJulzTM*TeRjm+kJ+}_G4M#cYRhC4h zMdOT&46J22ZeXer5urZ|M)te(&#LTj5TE#kVDu&^m@DHzHG3{aq6a0DwAmX0Gvhtu zjIJeeyRHZu?bD~aKS|xsFcU^a{bA%!z~RQEv9gYq2b=Qk zz#!ES@sQZq7gbtksk|PHtXe^@eF277R$sxito8hMK|Lu{Dp1~3bJu3(0MCY3Se2Kt#U_mL3PFAAF&FtV1wh8IG<+3cBWDh z1IAXxWJ`@D?{gHVpVYMJ`h}oj5Aj2r`C}{yY7vc5Rq=K)yR2%Sy0qfG#2q%3x63k0 zu(V9#W?4+M)cny8w~y^)dJyt6b!S>5T~F?d-4%+;OAIuf1TGlrhupNdLLn8 zjXHtTvNAt8%&K#AzCl;pE>&dPvZR?wXKj7_Q$037{N}N3_Q=uf#;sg+xm0O}#O3!7 zHYb&RurrChJ##9-OBX>GsV*WGlRR`Q{35!hGr|s)>=3J(22K;9Y}`-lpFDsjD~-ku z8!4puvVMbSPEG5-->Ycm<>%cu@#$}M?78n8BolBO6i!i(_nh~0&qSa%!@gSfp-gg0 z%6E?N@45GfMiXvM1UghoIFpvKQNhFgjeH?@ z(zrxMpOBE!)wTMU*X+NTp@}AS&$JhS{UpUdTn%W(Yk>QgHxE{q`~xuf?xPu41ML}{Mrj2ZcOH`e%T~><>)Hr+H$=WEl*NMnde|jv-COP{$GyEmw)OM2B@oL=T$<(G@=`nu`CjW} za6#l}jbeE2@3{%f(Vn>IouYZwGYG7-414yAwpw)O4Gaa4K+gL$L0tK!rZ64)mroF9BM?JahR~CJ-1i|;n zj=UyihF%*xbv`5@mh*lf#=_li{T$1(YVnC^&3gc@2GxjE7ku_tk5CswO~VUmq6ZMh z5h-()ikqu5_-jYk^s{*0z;|NP32I`E4oiw;JREF?NmoJ6bO~uvw*;$Zp4g9KLA9z&0+n^3w*z~*3 zSOF~n-URvR=ax7XX6~(*HQtukrH8H5s7BRKu-qlDnys=lsOVvo2Az&-@%PCe zs0-i%l)0i9vr9cN;l9;d2U$wxJmI|bc?AcwQlT^vi*LupB<)K7M=+u?G#RYKc;pcQJs#5vbm{!u~+w5 zXo*CVcsIM{BW(&#Ve2$GVkNpfdOMQ4$N6lg5UIwykK8KRYiT}T+r-zINn_B@Nncbp zFD{>Wqt7LeS*7k;`lHrgmp?Uf21__vohx=>_UrIvz zMMHC1I(P=@O2a6Glk-D{7!hHt5Qd!3!VWvXvtq#j$I_|QBZ&Mp;ZYU0FN{PWfNUz| zyHjDI8M|Mi(9n49Hjl%GR|H8=TL#5TV?F&jB#Cto-~AWHzwM1_A1gOhy&~LEzkXsD zdmp2*FGXC2$})-{)A3!A+0Pvtx2nhvH)gY64|$u%Fs&E~$p=@L5jQOex&+5acoFF1 zl+C*#J4u9T)g>~bzDS{q%9cpCsavKlW9C3m4nwXaZE>SH1~&siSdfjrO@4}xSj9tE zX*l#Zb}hs0l;<@*^50rpOh=b;KHwkgi>BMY_LTanFY(*Dv#Tg4@LB_Lu6oN$hjuSxKzd_ z#(-LxOWvgSkjihie8rJ0BbJR&URp(Lx7K%8ACZPGd4pso=^NJzS%`(rDg&5srHkZV zIJHkGB!Ao(Q*pxX@nbFg0wVok{5A*d(YS zj8zmLqcZMX?f$-?w)E)UgGGP{0Tc)tmnG+a`O;=7%PvT zmn!|bAa+h2>P*Ek-=hD#v%)(-g$(D#192$p5Bp#0en1jj9>99FZ*hT$lMxEn!~k_EqcOA6NFhs2L~jk$`KTi#{1tMpe*7}%(uq80ln zcp;~Qqel^G`p9svAT5rKQaGzuFS`etzBA>tQe+vzk|8XBl<_nM&g)C2y>DPhCBhdxV>ctW2m5 z$BLnK4dM@Re^mN?s9^L)pk@mV2SkUDz8W;!#H>OQ&A;!Q^=Kh~@hEv*M<4`t}f zAmSG4u7_4cj`A(vNwWsll?)X@0|J-!VKt|F$IwZ}0*(4?{5LH!v^0q-7$|6@j6H{4 z!?8bisS^KMa921iTfZxRjEmOZv(lkoLX&n)5>4O#VYq);id@Qu+w@kJY7f)-^#(z( z9jJ1}deWjOA|*SBQaXbGBCp}{VtK!s_zdge6Wn7f6o4bVa0s;pk&tb-`KH3h2u7cT zX->W_)9vfwsFKs1IZRt?e52$2nA3RS!-WaJ6_iPP1Wrtd>sv=QK#8n!8vY zF?)@#DHAAU5e*+fsg=VeV}rP+fc&(i(-{zkjycM_@=R;{C{{N-_h>A_5DUE~VK@Dh zeSq1Cfzbgd1MyCQhn(RO$lgpJ35;frpL)gsXNbWSsJtLgG-E;Ms?pMMU|4Fbf8$Xp zeATUJau=U&)-I?uGpQKy9o~mT2f0|Sn3by1H{w#YW3bke0vHh6>qGZ~A>+?nY4uhE z{A@(0y@fOJw#eH&aNg97>!I7lR=rT;O-nZ`=j%Ftclx~4Gn85=pIGUd^!QLi8CkEy zaE3dC7`!M5vTncrYTxW#bIu#eVQYn*(~$z+uMS>d%gS%3^gw8`XXxKCkm6qvN$I^f zt1B|xe!9%|Nw+}hgexhp7lNfcff-lniylF>n&^fzyVFIy0f9jM9Fn@o8MB*BBKfii za?0ZYpR5&Mc4acxTL}Fq{w?O1>E%KejXt){Sbk zu&KVL!ApAS#~-muD=-CrfvqssIvO7I;%4QP{pYm~M`uJ|ioRs8#W(Asf^SVRouw}z zwrtwu871Z>VR1zKLo#3U;awj*udMZkXq?w=8H(sJYrh`Y%Bb zl}z0^O8{&4MTF(6R0)%@^2Lqpj%QtpJ&hn^TB~!Ew)i~9DPhCL{z@~dC=fvjuXbfG z(6lHwCTp3ww7r~_eHorRYe&ix$3%3Do%q=K@$t(KPhUfb!1SLUA0w6HMIt=-#K|?n znPdFrMLoocz?va^KolIvQOfO#u#D$)e96g7Hih`K6f%woUqXC~PkMxrD8favmzS@N zVS#&(H5zw@R*BZ5Up1WhAZ3m_QKVY2r>~F%T=k8h$`p6eA#60PTCv=oG24Nnc`x6A zwAmq-l^cW&c;nsXm6S`cd+_fo(UMhgCW;(Ir;&h{`D^qzoGs(RqSRTVEpq8B8Xsfu zS!?23yddvg;z-`rH$@cyl7O%3-;#{}NJ7jjE0<#TVEm^fs(&r1N5*2R8K_P!lKiRI z3NOGPVM3%~u^Ed{jT8zZ&mCe0f?uicWXk=SG>-m=6&XtUpOvb89O2KDjm>c%XjbU} z8??MB(Bru89OF*{4!*+WaNeYQ@#jxyrN;UI6ERJe5^b*n=q=~td-5-dH86mt19YEN zi8jBWU=T<(`O%u_84Sqld3^s$@O0qxPU7(^D=xfXNQHZtL8hyo0f+5SB7fELH>T8L z{#Tg-IWTQnWW65usI46K;e{@V!uP0rtae3W6*C{&{>HmtK{tbCS< zQc4t|7O)HxKVJ>(eFYnq|JM(~B0GA*N%H}QkbJ{#K=Gnq0K24a3E6ZS_n&F{AM^sI znVjgu?{DCI08F26IQjoOXJW$<8B=YPH+qE5ZoJg2rg;d-Q6Aj zV()Buzw_PvH|IGQFVE9-QB`YI)mn4SIp&z7@Ryf;kBv!$iGYBBEh+K#69NJX9039G zJNg4)kMHq-I|9N(U9gy#yrh^InY^`yA=u0S0YSn)N*PThLYc5(u9Hd!m4b|No4T9k z%Qsvl6vSB1A6`_UuXR75Q<$6Nc-jS`2L~vLGs4smbC4fzF$PTW1|UDpB*?3hzp!{P zJPGB$b4t3J;aW?&N(#FQLjIz~1)^aPK0&05BNFt{37y_ms0ge<12JnzDN~3i`MM&^%fH+ZN@_pT zg(0OFp;#3Q_Y#UGJUsZ`((?RO9j-2|G0Rs+!nW;gIc}S~pUDC(18Zx++A>-ueo!v`X#w@WLo4Bzvkq z3R1ptf40_H{bFbKU4zDsvcX(xuiS$1MNy!5oz87c=HfEDUk-xyu&$)Zi?$kEV*0}f zU3SjzGCALgxy&e)X>u1UzkX*5f9DN8FX?^Zq@OmbrS#)jKOqC5w4;@ph9;G4@aQnw z6w`d(5|>AnB-KjB!y~^Z5qiEhj{{mL*{k&uDZl}opzYv%I>wK+l^~_il!}h7G*7yy z*$298;#T4IEP@T_B(m5-Bn%8v5~FSYMk@!jRK|3|Dvl8`tGcV5|Db^>ZfF5hs?7%qI(kT zx{tdkii?anZ;|O7W80?w$`OGh9O_8eq=GdsUvQ();Sr}&@Q$aq2Tly5dWYNU|EoYqO$F^d(5|E!0$%fzf zgk?$jAL4j!hdwu2G;G2?^Ks{PL_gj}uF!x?=T58M(F}w=6wIj}4PKNLp--nveUd%k zuvxQy`K|cJt4pvOdx>Gl=P16?!)oX*tsOxN&#H~I83v4$&;Mnste$VNVN*-6F4H*A zb}?p1LMqxO8~ZlX$Cq+~%VF28q@XPYMM-a$uP#MbE;~eYcu`Ms%wO+5oG`1VQr2i& zY^wa?*B*iSGg!x|#>E*S#W~xUM1ke|rUPmV|540tkMlWtAt@1&$&ECjCUTSDIddxG zh4fna1Hng0NS2rW!t^Yo>y{(l2+0qe_k&qcqnil0o4@gUu%M%)k|BLSI3RnT@fBR$ zuj`n!qD}A}i@?KL9z)5~F6Dt4;#3p!{>yQ+xMp(8hu07LJ+bC7t1t%Sa2zo&n;BmF zk)dOxQa)^ZQX;zjSh)>fhq74$1uu|_Vnc!;JW%2>H7nr-QqEf?N{&vY43Y~(9={i2 zvXq*g*vriDz-#|Hu{V)EM5zQnka-^&q`zQ)AoDIet$O`K`4i1=%V!XK_w4G9Xl1uQOvsyX%((f-}-icZe*hc z^ZkeKm*}eylKlrdViq{gnd>n2nphSb4&1LPJ!D_gR^cI`^S9`WGV2heCdiAAi*O5b zBkpC9v@lSFbyD%--OSH?m+htOCugQKr`V;2J+AdT^GBw_>xx{K!lY=AIM1N?!~;eK z<807vP~`X?`{awkC|trRyPoUQSAWwGpOq05ZIEz(OOmm!uuYE`=@<~o8ei&Jnh2NT zk=sap_zC+*@GzMPD@RC;pPCFLBPnGztzq!juaaK}W3d^Ozf^xH6dcv}(gxnB;0Ynw+WYg=QRq{EF_LfTEQ_h0*Q(Xp!-7EjHW zH|vRor+(G(N*%N;S>SA4@;Q6{tTnpX0R0*0HH=O6wevT>cfs%Y)A`e-()Ygc23-bW z2h|5D1WA804Z)lNoEl^2!kW%_D9YOAV+75usrrtQU;uyB=v3SN^FFN5C2Voa5h z{6|WS71eg-6>#S~yX=#cpmdo4nbXgM{UY}%AJp)*5BS{hiKcqB73}S zr~PzmM{csw-qKdW=Da&~Vxc76pK_pNxTN_Y8h?hq4V=M-^-N;ur! zA-?@-%X;>y-HBaPmAUl}k=U~(p?ADl=Di$Tal{=3;|0w-yDF>W<0S@q@g0K1MqJE# z)45i(9d5Q~RxFltyY54+gLBquQ^Aq}Vv^k~@@Nfe=KF=-9dYu1a;#@82J zy>_L!bX^(RJ!&2sP~M$dpV~57i&?4swMR7Nm)NBBQ*cFYALj@(5AD2ze6bMz*wpYv z2x$iC2-yKmkYtZri{1U|{7QsV3cLq^|RUrZkwDL0((17<90 z`{6iu-IO0I>5KBfR`NWT&a7APGqn$&KFIXetHh`{nQgtbUG(xdrI`&V3yJv2(ec$$ z>pJdy3L_YU4c|TPZ9?Sh9D8`{+mN$eK z$(OuL{17{}vt_G@H5tKp^Jr#Ci!L&Q48!ftXW#MO7|cW_eMs&-?9EennH~K>K%mb| zt+~8)=p^eXdvAb3Kht2R-+3TpZR`N8)Mt`3j`AjSeKGdE#R(r- z)1_BmF~rynJp5j5ZrI5Gu0hO$F6)(Az_7#reg_MXs_>MmsET=)$5C z;&)hGHn!hqhGow6H?N_umJd`U)f(^4I3+l#uTdVAtyQuVYZg0-47B|4t|jDC>-iSWdkT0954;j59cJ$P=9MsPp=8(P_$7Z*nd5eX6R6C#h6KIpSW+V_0;tk6}4$@)WlhzTvqg&&Ww4Fk%^-Rt<>-AM_wNB*){Uq!78UJul8s5Yu9$EXKI~5mUkP-?O)FX0!iVd zN&=>G~VCj>)ZTdpZCA~8&uR3F`wpFf1ihZ4eL$ zsP8|BlAoydf&Rz9pH*yCWIpifSwNU|^euD^m>nUO_x&L7JMsdX5CdBsGDnD+xec$Q z0L33?@B-WSyICm6{y4?fRDeQ7MxIQ}!rFk0gPEQAHH9E185tSBwZ0+mr?>BZHwXR_ zpfI+zwd7@Cad2>8c3@++ur^|O!^6YF@|u-}m6ZuNgUQCp+*Zet$=rtW&rbg8=dFQ_ zo;BFg7HnZocHgg#u7#bg00qVUh5r8e^F9q6!T(&z+~)UV0T0M>e}v@?^J|vB`v#iw z-|yv>2Rj;=sk{Y4fH4EEA^3)sjh+9G2LH#Qe=hlFQ$-sCYcUH5(9%}$pHcta_}>Tr zXTv|Psrt_~*fY1Fm@G+BnvUqKafFO(@`Bvn!BjR=@mv!)-!MhI=K8lF7~fmctZ9iR0#o3)<_X=+cL81BZecrs`!tm^JtEe_a@Ul}TmbIO!V)#=b^zPO? z9s;vsD6Cox@tEkL-XB(wt6pgJUAy#-u(Pu>KWHa(V8}1_+#1T3@^T})Ha(d4RsD4BUPHXCn%yFB znk`HH@u$toE}HmqVO`0}+9Ohx-1YKU$ zC!^{!%Uux>HaC|iHpJ#tIzJoXUyYKSeqs}IG6pqV?bc>rRn(MStj5=(k-8RJo}SJ@ zp$^^j*;P)fvDuah)`%U!FSmB9*OG=^GNY8b{gSay0^t%n#%SaPFBNITJ^u8-<%&o< zk`hB-bmrX7Wbzih3AKzldaBv+w|_f8HIJr~G0>j<6r@sAIa+bDY(Q{pHOXa{{aVAO zZ^aXVwq)nmds3MjL!-3;=&g+|IU)Qzk4;tthcZ)b7AE*rkD6)0ihkEX*pCctp+= zYROChk5)B&UODfHm4zsHbF9E+z1(@sy{+LeV`o05>45F~6he#-C)y-bv~4B|@e)@H zQpTP5`seW2q9B!o+AoK_ufdn)PHj$e*qI!$+Z?9n`ZmiLbJPJ~4Jnk`#q{w~L z5{{=%*@)OGtTm6j6S_SJ+1;((>sN7aConZFtTr^nhD&*|``ZSoYAfo7jmEiQiG1@e z_a$-e1Ye+UQ8`Nf80NKY-l-VV+U;g2dgyxs%lt&oGZreYr6;OZ{}wA@(C$RJyitiN z8I^g8Q3Mwrhf3-HSDbpyhoVc)q=|Xc7sQP> ze_1$8bUrPR*;;LYox6a^c{3;5r&XU1GRe+$cYTyS#I=@S)4$B6E={$Lj5?|-;|?rL zRXmINCJ$NCm*ElA(e7i^aPY6I*H1qas1sY%l}x$p)z7fF9|Th6Ut-){3_!!@4sc~j z0Mpc@ST}3DIrYcQwAqyFjN3ZsAeCdPAFfo<3*wjt24UfL-&aWgtw?)L76V1b#-&lB zol+JH`5T2HzOV6uZZ#3;w2u`SJ)g#(Z&J#tLn@*o&a>RZFC{h^a-fIPHUr*N;ozhN za55U2d1uG*?b z0$QW6b(cdEo6gT5;+X1;Jru)hH(IWx0mMJ?R!RZxK1&5_BluGDm?czdukp@3UEB{U zwxrj1YaH%#=V@PW+NNW-?VV0lS2=I;x{mNN+)^tA{5k>tnY3BGkwYzd6?0y^?L(4i z?Bel_?|0UP>rcxp$7v?E7|kWG%|6Y4@u=r*z@rQwg@-s?u12X8txE8 zSI4zn1_aJNM-I~$X=yVM&lbhT931I_h$<)fR5C8&TK`x^z|tG6LD3B?S5#7Mzu9$Y zWQeD)uOB6b&vVY$DxB@N;`p}e=W*qoLds5t37cvh_L5w|Lk{csEKF%vvo3oR+Pm{# zaqJ1T9Ce)gvA|tZ{dMAYAlHNTWtpq*Z}eEl6a~wD{13V3T#pR0@=X#eAeA$mGSHio zF$X!p3w`HpZ_eyW z%Mo&{YcXmjIm_oBXe5@>%+(oN6J0H&5Mv)<^2%P7e2 zpy3?zF$U7BRtg<1}UDYQ|@IDH0*jvh33m1>RJSOac9Ylx_< zImavxlegoA4OR3hY>rBr|3n=&VMxpcIHX}~rZz=Qx>>eU5CFT9{uR?9=(7pcqiC89zMk0P z(#yldnzp-miw63YdbdI!akb9Dl^9S`mz<+1^Bs=hIoZ;hcsXvtGT-WRp?dH3-;+1V zipb(<;6AlDcVxNUo)=CC-F!NNMGJOSB~v9-ue;cA+rVp`Tsv|0w(itEMSnTW9>QW<5&OZW4I*#5}M4?U6XjgJS;^U6llMB`Wq z9CXFd#)?|QyVmgK_`~SY(0+$;m_$=it0EBQfMsQ+6j^eGr<`vpA#r3H5q&+Gv61}r z)A5u^jKv_wFe!Q=Ir$C{x_g#K)CZ`?&^M$%hp%S~H8!Y7lx|L`NR&+NqSYBXATZ2vu)zQyNQ>S*_Uh%3Sj|1M}g~E?m9+W1(p3?U>Wj0 z=m=Xx4Bu`QW!EZ!x>;D#nEH5~wbx;>_iu>6f#g1^)x~m}Y)Wqq?P$Ec+$uN5#sHIt zjvj`eRE|C$an`;Rs5lUv6!&Itd@FY8@?z1a;4O!cKsnVG-Ce3zs5ij`r=o;H67}a5 zp1Xte@Jmv>!g5Ykv*%e=Be?K|}YqbyTl`eRVCS2lv~lDWdSTf4EZ(ZLwJ zw|{$ukLYcpY6^m!+H>4f^vThhW)-7xqH%&M+I&H`(mI)dI}^0oCA~ZM?GwEEJw5i~ zO;B(xl@*o61*-q&W>pc!q{*Duc!p-ruC9po2%o|u3%z(`0>2FvcO2ermkm6r6W3tO zNOWBErR0MnTAG zo#3m-wcHOy)js&?{gvcSa(GNUhHuA_l`ePN0GQ1pODvi;V}r?NdohO;+pU7F4PqQr zj!Pq}Jj5v4p{QPv>GK(vA7dTUym|Vy>z65g#KHOooyGZVm*y5B!8aVELWs;r0yjamb+z}d)>~=xN zxp~!jc?X1EddcO|x?J*%4ThxAD17*0k=v?UyT(r2Dj|7axB=;JQ08GTr)}?5F7}9r zYA6go{CKPUi{-Tv4?6gED->n!*GdwB;(01`SfoUl;5{IK#@Z6&!MCk3SZ?Z%;nL*Q zZO$5j3sZF_AO&glmZy*5kvMMXO{)jr^@1f!&-(fH^!xWpdzfQ4at_eKi16UDf)~pl z!UEmRQhtPtS76ucrVDJV!BgEtTLK+av6BC1T z`BfkH`MWdnUOVP{d1v1Pik?>!VyYTX)6lV%1=$S;mW5`)E=!jH(5V?N{6xLA z>#m2T@nIK@8w#=Vm6Y<9IX(7Pwq1vL{mJO^H2@vs4~BfP)w+QPT4XT`h?oXay~M8C z(Wt1`nS{QjYGnqa4PJxuRG->!b32L|-`QY+Ys_bdXSi`rpOG^S6R7_+nY<|X3k-!$ zYpm5i|M2u{=;=mw^61vH$$cv{K|hTZ-f!JK#X7ClLy

    5@$yzO~Fv#m-fXXl-uf+B*PNWP(kxIkpH-B{5xFTySaDjEwe`vexe@U59jJA#OnyYVYpoJ|QeZ9f){OnpH_-n{Xd z%J~+jm0K~sI0wZ5TjM46^B?s(-voqO_^TB)T$%(bNq!`ki4DjW_i^vuUud-{I2nOD zZu)py8Fe7=30NE!kv`E_3@Llo3puE*+kxIz$Mb!$OyH! zG1cw2fZ^Rm(D=<7+l}xfz2BMVpceXir9+$F<1UK=LV;5BN)Edj_!?oF&V|&OOkU+r_{LO{PzkGP8q=%4hYTS+CLf)f-;8yCh*E>C~>Px z@~<^WHt2zVUSv}Pc+yOH|C`Hl*=Rqe6k@r9m?)4X@$wEJuYv8Ld zh#Yb@xV1Sc2p1|iI9v>5sR4MDTCI9#o`StMV;VLUQd(7Xe?z+;c1UWi{m_i2`n%gJ z^Wh&K`=JRGs>&8Xwk@kA3i~-(iVm)rHR&Y%yT!jrUtt+s;3oV{$ngJ0%>O}BSI8oe z7mS+vUH@&vf3rpRVQ?4)(W^Y}#{*Wx|JeOk>Mp;9f(tjP_xbO||6@=AI_1B^{$GRp z+sXXt$o_vUwQ_1Ct{K~icwRe`h|)TMEV=g%9Q2Po$Y0l@0Kk0($jrBAU61OW*+m`_dqH4am=9mD~Vn_3+si~o%r4-Ry@Fq03 z=BE%&!9!v*>bE7-u=F5)tu6fZR|xMWokQ=Nj;iX_xV&b||54Z~Lnq7lg?H~B*u#K+ z@jV{biDS3|{_mcXv;&fB?bI#v@?Y=%AVD>2?U|k=3IE&stak!lgV%aT!5&>Wg#sxG z9jy2Bn-vygv9S0UB_2wgsCJ0JspXiK3#6V7U>~!9EdFBpo)i2TLYjOav>Yln?X;E{ z!_)Db8T!5U2DLK{mr)%S1j0=6Km8->iN5 z8!3(IDuAYxWM(#^9wU?O!5efo&MT}jSxvjzYi#5LnreiQ2!tOAUGIM#itG<$Y2e1+ z1rAwQ1{hVJhD&C6e9ii4VRHU}S#w+((wJ%wl1zH#kI%}_XC+4TH!cF4aa;^=V@HQn z9Khou7Uw+~?Zq+yw9L+ZgF zi2|qNdf;&&6K{n8IC-PQe%5gb+W{a&Xw>~}@xB3peXfdQRE1I%3IT}yelKNWXMmax zw<&Gnzh`_^jS^RY?gY;Oa*Aqg4B&#^c1~HgVzXI|z$~g)2po?3xM~55=nEnd;5`7D z;jap`AU?s>{1;7G5m#}$-Z!2PksLQBdJoTAGh1(F;%BwJY>~=)@dzcChz8f z!(kUK?OD=c;gyv6jNO<_cXpjSP&3&DE@_LyzyHMru;3Yh^o?7752mvyG2Ij(VjG&Y zubg$x<6HX1HoaBUxC=lymfgf>tQ~*ba|#E>d1cX`l$$XU;$3fam(t~$cX>+*^t3EF z^Wge(ZMXmoY;u?AFx!=J1wj1hYXqSnQr|O*_gz?&vD}<)cTqo`4~nW51K^SL3Q}5@ z^zpk7h|NL|uC9*;q_tdo5?mJjX<5fkJjH%Ofsy}n66!1?bqY@pn$buj>q7~rrL^c| z?&{}IKgGNnl=I!TyR8s-g)lwk>}tP$#%7FH?Ypsd&{YWaSXI_Bnqq7&k1*7H?#-f zvSA^u&u^CT9@}_Kl79g>IQpeOe1f;I*-ZLLW=nZ>ovr=24%!*??hNWbhRKj0s?xaZ z9XQYxCoyVdU7D*3sTje-isIIAW$Z}>-Cd4#ZTu`;^JWKF!Ov$gjn~u>-xxy{?aD^3tcs@*~pscr2hCTL2Eb zTzgdX!-4(|hwJ%1Tp8r1c&Nb7w$Z&v%Plm+u(XzK6s_ORX0_NBo*W)?5aBIjd2nJY zlgM|uo-PgGQTMJub85|w=mlW)Fkj?A{WH}PXuTuE;|d{5UA*ttJdZqOmV(ksY&0{ z6fBD`e5g6ZA+RYqnYz#Avo^9(vYkG@sBoxWEtOD+tMMU>ly;W_*YoElHePP#WlBcS zP!m{TlHS^~pek$*tFdTVg6bLdcm~h#Sju2ZV)8aEXEd5?+!S zUrDsCOtnefyRjIMyDCe~9fHWJ8eYw^?#E8G=q@4HB<5Br7$Ow!P|M=c4K*mwsOZZ# zWhU>FJ1g&szV3L;>g05LvBq|LAn;1AfX(=5`u+j!0WR90@ZT|481fj%EebBlxB#Oj zCeudioh&-mLw0}p!#Q)rmN~w`iyhH9Eth9`Z7-rf4FJ%;O>KTWU2!jH^oS?>DH*?x-aRkH(HUc573a<`?&1g-Aa2a zJVy)|G21CL_;1+J!BU`TixRRIlOM+~ivnoUqFT6OLTNq@^4?|s`Y_=s@vNndly>l9 zqWZm_VafTk!XARZ>$Souoap-{h;w`$_nD6|rpLJ+vX+Dm5Mh;5skRsIw3e-bnxYm> zQst(#R+ZF%6FJjFG5|#2HfnxuK#C_9a%6w(@f9d}PEBml-&@Uh3=)=c_xtScPQ3ra zGohXS3yul6jh2Koq19Li<|$B%86Hrt;$aFVNvJHex(!TR2!%YwIJa95_rBVpz7Kzu z?F8!M3LCfgp`XNQIqX=)Zd?2%VWo(ht?qq}-3nd<4)vDf3l)qNl=bs6YjHz+zrQi} zKqZh^x;*JiAWVbnv1C;DaZWJ`oGQFctjw~#WhmYSii!g_uRA1Z6C+(aDa>o{%cw^R z?}y5dQ$2yQh@b%TSqUObhJj$dB@ivE5s@Ec9MpwIyEDF}t(-FFTSILgo^x262H?Tk zt9oE8wOnn1_$oPM^iTa;IOQk$uusx^=KB~>{u1f-O;U+U&M<|{sBTG5nS{c#TKl+s zYhBtNO4R^wAsLQ{G6JyEmuj+)XgqZRm-C~u- zUWQi0-sKwBKXgRo!k4wnLOoeo1jlQc{b}zb;e5J-U+wc z$4ieX-%$c2Y;fEi&ULfWQ7w@S*)}8JrBZAU-2q;3_As?S~OxZrV->DD*l6uZf4-LTWK>WVEtRtMkQ4&xphr4fg zV}LI)ja-@hHA)|bhw9wq8_4R74V~y&wPrR?|yX?`*{YNMAXKRwz)1U^7x(zl}zWXpUBQXnl0JsP67>8S=J zBaxAF^049pSg6vUINzou{1_a(Aj>Eyhf8fVHPQpr<^aH@Dcsl1;OyHRA>508 zZBn#eBe0UKiDJMcRV6{77eMgY0P0o@8$I9jI)V#mGgMQV+s;T|bS`hKr-wY!GE?{e zf+8ow^-a8g(2)IvlRzAz1n_BUyep9k)~GxfYPabKoQ|a+^8hP!6_mtIoE&Q0pjR$y!@Iq zn;!HgaA@t>zucc4#{P(`P}(2lrLif>bqC5cplew^7H4xw(_=iL$QteI>mv zLR_Q%gxEGsU4QEEBF;%KB`qg|Y!<=?YfxB=d8wk-@zY;G$(meXuN~#=DEW?Gz%bhm z!x6DNM;0ijFCU*MSrn|)*zc%uD_n+WfX?UacBdzpfW)KFVQP|=`f-i#DY8jt>~QEk9ndHI4_6p@9jCAh$h_pTP<(_- zLbEnJ-4i)00g}&`?BChrAO4g9c*LDtUi$mT_UOj=M$FYe1C*_H0iM_){aui?d#XJ^ z`GP8e{Dcn!tXX5*MRgxUxC;`gX+IW#8r`2mAKiEKubV69y6>~#9X-x}KJ)LBgcX4- zUlEnT|9=;?{3|d&dYVCG)YIiRPjR(^E zEpInK#5N7oytWc?Xji}MJ*otFpPJ;`2e|9sD#0xW+<<_D?y>Kpja_r*?d{{5b2UhhG99?rxy1t@%zDY~x1P3W zt#TtPzDCgPa+nVkgnu>%ZM*_}>rjSbzc+yzodArFYt}J6o)ekA

    +
    + +
    + +
    +
    + +When working with tabular data, such as data stored in spreadsheets or databases, pandas is the right tool for you. pandas will help you +to explore, clean, and process your data. In pandas, a data table is called a :class:`DataFrame`. + +.. image:: ../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_01_tableoriented>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these +data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data. + +.. image:: ../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_02_read_write>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the +data you need are available in pandas. + +.. image:: ../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_03_subset>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...) +corresponding to your data. + +.. image:: ../_static/schemas/04_plot_overview.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_04_plotting>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise. +Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward. + +.. image:: ../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_05_columns>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire +data set, a sliding window of the data, or grouped by categories. The latter is also known as the split-apply-combine approach. + +.. image:: ../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_06_stats>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot` +from long to wide format. With aggregations built-in, a pivot table is created with a single command. + +.. image:: ../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_07_reshape>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Multiple tables can be concatenated both column wise and row wise as database-like join/merge operations are provided to combine multiple tables of data. + +.. image:: ../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_08_combine>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data. + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_09_timeseries>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Data sets do not only contain numerical data. pandas provides a wide range of functions to clean textual data and extract useful information from it. + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_10_text>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    +
    + + +.. _comingfrom: + +Coming from... +-------------- + +Are you familiar with other software for manipulating tablular data? Learn +the pandas-equivalent operations compared to software you already know: + +.. panels:: + :card: + comparison-card text-center shadow + :column: col-lg-6 col-md-6 col-sm-6 col-xs-12 d-flex + + --- + :card: + comparison-card-r + :img-top: ../_static/logo_r.svg + + The `R programming language `__ provides the + ``data.frame`` data structure and multiple packages, such as + `tidyverse `__ use and extend ``data.frame`` + for convenient data handling functionalities similar to pandas. + + +++ + + .. link-button:: compare_with_r + :type: ref + :text: Learn more + :classes: btn-secondary stretched-link + + + --- + :card: + comparison-card-sql + :img-top: ../_static/logo_sql.svg + + Already familiar to ``SELECT``, ``GROUP BY``, ``JOIN``, etc.? + Most of these SQL manipulations do have equivalents in pandas. + + +++ + + .. link-button:: compare_with_sql + :type: ref + :text: Learn more + :classes: btn-secondary stretched-link + + + --- + :card: + comparison-card-stata + :img-top: ../_static/logo_stata.svg + + The ``data set`` included in the `STATA `__ + statistical software suite corresponds to the pandas ``DataFrame``. + Many of the operations known from STATA have an equivalent in pandas. + + +++ + + .. link-button:: compare_with_stata + :type: ref + :text: Learn more + :classes: btn-secondary stretched-link + + + --- + :card: + comparison-card-excel + :img-top: ../_static/spreadsheets/logo_excel.svg + + Users of `Excel `__ + or other spreadsheet programs will find that many of the concepts are + transferrable to pandas. + + +++ + + .. link-button:: compare_with_spreadsheets + :type: ref + :text: Learn more + :classes: btn-secondary stretched-link + + + --- + :card: + comparison-card-sas + :img-top: ../_static/logo_sas.svg + + The `SAS `__ statistical software suite + also provides the ``data set`` corresponding to the pandas ``DataFrame``. + Also SAS vectorized operations, filtering, string processing operations, + and more have similar functions in pandas. + + +++ + + .. link-button:: compare_with_sas + :type: ref + :text: Learn more + :classes: btn-secondary stretched-link + + +Tutorials +--------- + +For a quick overview of pandas functionality, see :ref:`10 Minutes to pandas<10min>`. + +You can also reference the pandas `cheat sheet `_ +for a succinct guide for manipulating data with pandas. + +The community produces a wide variety of tutorials available online. Some of the +material is enlisted in the community contributed :ref:`communitytutorials`. + + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + :hidden: + + install + overview + intro_tutorials/index + comparison/index + tutorials diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst new file mode 100644 index 00000000..31eaa236 --- /dev/null +++ b/doc/source/getting_started/install.rst @@ -0,0 +1,444 @@ +.. _install: + +{{ header }} + +============ +Installation +============ + +The easiest way to install pandas is to install it +as part of the `Anaconda `__ distribution, a +cross platform distribution for data analysis and scientific computing. +This is the recommended installation method for most users. + +Instructions for installing from source, +`PyPI `__, `ActivePython `__, various Linux distributions, or a +`development version `__ are also provided. + +.. _install.version: + +Python version support +---------------------- + +Officially Python 3.8, 3.9, 3.10 and 3.11. + +Installing pandas +----------------- + +.. _install.anaconda: + +Installing with Anaconda +~~~~~~~~~~~~~~~~~~~~~~~~ + +Installing pandas and the rest of the `NumPy `__ and +`SciPy `__ stack can be a little +difficult for inexperienced users. + +The simplest way to install not only pandas, but Python and the most popular +packages that make up the `SciPy `__ stack +(`IPython `__, `NumPy `__, +`Matplotlib `__, ...) is with +`Anaconda `__, a cross-platform +(Linux, macOS, Windows) Python distribution for data analytics and +scientific computing. + +After running the installer, the user will have access to pandas and the +rest of the `SciPy `__ stack without needing to install +anything else, and without needing to wait for any software to be compiled. + +Installation instructions for `Anaconda `__ +`can be found here `__. + +A full list of the packages available as part of the +`Anaconda `__ distribution +`can be found here `__. + +Another advantage to installing Anaconda is that you don't need +admin rights to install it. Anaconda can install in the user's home directory, +which makes it trivial to delete Anaconda if you decide (just delete +that folder). + +.. _install.miniconda: + +Installing with Miniconda +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The previous section outlined how to get pandas installed as part of the +`Anaconda `__ distribution. +However this approach means you will install well over one hundred packages +and involves downloading the installer which is a few hundred megabytes in size. + +If you want to have more control on which packages, or have a limited internet +bandwidth, then installing pandas with +`Miniconda `__ may be a better solution. + +`Conda `__ is the package manager that the +`Anaconda `__ distribution is built upon. +It is a package manager that is both cross-platform and language agnostic +(it can play a similar role to a pip and virtualenv combination). + +`Miniconda `__ allows you to create a +minimal self contained Python installation, and then use the +`Conda `__ command to install additional packages. + +First you will need `Conda `__ to be installed and +downloading and running the `Miniconda +`__ +will do this for you. The installer +`can be found here `__ + +The next step is to create a new conda environment. A conda environment is like a +virtualenv that allows you to specify a specific version of Python and set of libraries. +Run the following commands from a terminal window:: + + conda create -n name_of_my_env python + +This will create a minimal environment with only Python installed in it. +To put your self inside this environment run:: + + source activate name_of_my_env + +On Windows the command is:: + + activate name_of_my_env + +The final step required is to install pandas. This can be done with the +following command:: + + conda install pandas + +To install a specific pandas version:: + + conda install pandas=0.20.3 + +To install other packages, IPython for example:: + + conda install ipython + +To install the full `Anaconda `__ +distribution:: + + conda install anaconda + +If you need packages that are available to pip but not conda, then +install pip, and then use pip to install those packages:: + + conda install pip + pip install django + +Installing from PyPI +~~~~~~~~~~~~~~~~~~~~ + +pandas can be installed via pip from +`PyPI `__. + +.. note:: + You must have ``pip>=19.3`` to install from PyPI. + +:: + + pip install pandas + +Installing with ActivePython +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Installation instructions for +`ActivePython `__ can be found +`here `__. Versions +2.7, 3.5 and 3.6 include pandas. + +Installing using your Linux distribution's package manager. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The commands in this table will install pandas for Python 3 from your distribution. + +.. csv-table:: + :header: "Distribution", "Status", "Download / Repository Link", "Install method" + :widths: 10, 10, 20, 50 + + + Debian, stable, `official Debian repository `__ , ``sudo apt-get install python3-pandas`` + Debian & Ubuntu, unstable (latest packages), `NeuroDebian `__ , ``sudo apt-get install python3-pandas`` + Ubuntu, stable, `official Ubuntu repository `__ , ``sudo apt-get install python3-pandas`` + OpenSuse, stable, `OpenSuse Repository `__ , ``zypper in python3-pandas`` + Fedora, stable, `official Fedora repository `__ , ``dnf install python3-pandas`` + Centos/RHEL, stable, `EPEL repository `__ , ``yum install python3-pandas`` + +**However**, the packages in the linux package managers are often a few versions behind, so +to get the newest version of pandas, it's recommended to install using the ``pip`` or ``conda`` +methods described above. + +Handling ImportErrors +~~~~~~~~~~~~~~~~~~~~~~ + +If you encounter an ImportError, it usually means that Python couldn't find pandas in the list of available +libraries. Python internally has a list of directories it searches through, to find packages. You can +obtain these directories with:: + + import sys + sys.path + +One way you could be encountering this error is if you have multiple Python installations on your system +and you don't have pandas installed in the Python installation you're currently using. +In Linux/Mac you can run ``which python`` on your terminal and it will tell you which Python installation you're +using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended. + +It is highly recommended to use ``conda``, for quick installation and for package and dependency updates. +You can find simple installation instructions for pandas in this document: ``installation instructions ``. + +Installing from source +~~~~~~~~~~~~~~~~~~~~~~ + +See the :ref:`contributing guide ` for complete instructions on building from the git source tree. Further, see :ref:`creating a development environment ` if you wish to create a pandas development environment. + +Running the test suite +---------------------- + +pandas is equipped with an exhaustive set of unit tests, covering about 97% of +the code base as of this writing. To run it on your machine to verify that +everything is working (and that you have all of the dependencies, soft and hard, +installed), make sure you have `pytest +`__ >= 6.0 and `Hypothesis +`__ >= 6.13.0, then run: + +:: + + >>> pd.test() + running: pytest --skip-slow --skip-network --skip-db /home/user/anaconda3/lib/python3.9/site-packages/pandas + + ============================= test session starts ============================== + platform linux -- Python 3.9.7, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 + rootdir: /home/user + plugins: dash-1.19.0, anyio-3.5.0, hypothesis-6.29.3 + collected 154975 items / 4 skipped / 154971 selected + ........................................................................ [ 0%] + ........................................................................ [ 99%] + ....................................... [100%] + + ==================================== ERRORS ==================================== + + =================================== FAILURES =================================== + + =============================== warnings summary =============================== + + =========================== short test summary info ============================ + + = 1 failed, 146194 passed, 7402 skipped, 1367 xfailed, 5 xpassed, 197 warnings, 10 errors in 1090.16s (0:18:10) = + +This is just an example of what information is shown. You might see a slightly different result as what is shown above. + +.. _install.dependencies: + +Dependencies +------------ + +================================================================ ========================== +Package Minimum supported version +================================================================ ========================== +`NumPy `__ 1.20.3 +`python-dateutil `__ 2.8.1 +`pytz `__ 2020.1 +================================================================ ========================== + +.. _install.recommended_dependencies: + +Recommended dependencies +~~~~~~~~~~~~~~~~~~~~~~~~ + +* `numexpr `__: for accelerating certain numerical operations. + ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups. + If installed, must be Version 2.7.3 or higher. + +* `bottleneck `__: for accelerating certain types of ``nan`` + evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed, + must be Version 1.3.2 or higher. + +.. note:: + + You are highly encouraged to install these libraries, as they provide speed improvements, especially + when working with large data sets. + + +.. _install.optional_dependencies: + +Optional dependencies +~~~~~~~~~~~~~~~~~~~~~ + +pandas has many optional dependencies that are only used for specific methods. +For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while +:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the +optional dependency is not installed, pandas will raise an ``ImportError`` when +the method requiring that dependency is called. + +Timezones +^^^^^^^^^ + +========================= ========================= ============================================================= +Dependency Minimum Version Notes +========================= ========================= ============================================================= +tzdata 2022.1(pypi)/ Allows the use of ``zoneinfo`` timezones with pandas. + 2022a(for system tzdata) **Note**: You only need to install the pypi package if your + system does not already provide the IANA tz database. + However, the minimum tzdata version still applies, even if it + is not enforced through an error. + + If you would like to keep your system tzdata version updated, + it is recommended to use the ``tzdata`` package from + conda-forge. +========================= ========================= ============================================================= + +Visualization +^^^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +matplotlib 3.3.2 Plotting library +Jinja2 3.0.0 Conditional formatting with DataFrame.style +tabulate 0.8.9 Printing in Markdown-friendly format (see `tabulate`_) +========================= ================== ============================================================= + +Computation +^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +SciPy 1.7.1 Miscellaneous statistical functions +numba 0.53.1 Alternative execution engine for rolling operations + (see :ref:`Enhancing Performance `) +xarray 0.19.0 pandas-like API for N-dimensional data +========================= ================== ============================================================= + +Excel files +^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +xlrd 2.0.1 Reading Excel +xlwt 1.3.0 Writing Excel +xlsxwriter 1.4.3 Writing Excel +openpyxl 3.0.7 Reading / writing for xlsx files +pyxlsb 1.0.8 Reading for xlsb files +========================= ================== ============================================================= + +HTML +^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +BeautifulSoup4 4.9.3 HTML parser for read_html +html5lib 1.1 HTML parser for read_html +lxml 4.6.3 HTML parser for read_html +========================= ================== ============================================================= + +One of the following combinations of libraries is needed to use the +top-level :func:`~pandas.read_html` function: + +* `BeautifulSoup4`_ and `html5lib`_ +* `BeautifulSoup4`_ and `lxml`_ +* `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ +* Only `lxml`_, although see :ref:`HTML Table Parsing ` + for reasons as to why you should probably **not** take this approach. + +.. warning:: + + * if you install `BeautifulSoup4`_ you must install either + `lxml`_ or `html5lib`_ or both. + :func:`~pandas.read_html` will **not** work with *only* + `BeautifulSoup4`_ installed. + * You are highly encouraged to read :ref:`HTML Table Parsing gotchas `. + It explains issues surrounding the installation and + usage of the above three libraries. + +.. _html5lib: https://github.com/html5lib/html5lib-python +.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup +.. _lxml: https://lxml.de +.. _tabulate: https://github.com/astanin/python-tabulate + +XML +^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +lxml 4.5.0 XML parser for read_xml and tree builder for to_xml +========================= ================== ============================================================= + +SQL databases +^^^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +SQLAlchemy 1.4.16 SQL support for databases other than sqlite +psycopg2 2.8.6 PostgreSQL engine for sqlalchemy +pymysql 1.0.2 MySQL engine for sqlalchemy +========================= ================== ============================================================= + +Other data sources +^^^^^^^^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +PyTables 3.6.1 HDF5-based reading / writing +blosc 1.21.0 Compression for HDF5 +zlib Compression for HDF5 +fastparquet 0.4.0 Parquet reading / writing +pyarrow 1.0.1 Parquet, ORC, and feather reading / writing +pyreadstat 1.1.2 SPSS files (.sav) reading +========================= ================== ============================================================= + +.. _install.warn_orc: + +.. warning:: + + * If you want to use :func:`~pandas.read_orc`, it is highly recommended to install pyarrow using conda. + The following is a summary of the environment in which :func:`~pandas.read_orc` can work. + + ========================= ================== ============================================================= + System Conda PyPI + ========================= ================== ============================================================= + Linux Successful Failed(pyarrow==3.0 Successful) + macOS Successful Failed + Windows Failed Failed + ========================= ================== ============================================================= + +Access data in the cloud +^^^^^^^^^^^^^^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +fsspec 2021.7.0 Handling files aside from simple local and HTTP +gcsfs 2021.7.0 Google Cloud Storage access +pandas-gbq 0.15.0 Google Big Query access +s3fs 2021.08.0 Amazon S3 access +========================= ================== ============================================================= + +Clipboard +^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +PyQt4/PyQt5 Clipboard I/O +qtpy Clipboard I/O +xclip Clipboard I/O on linux +xsel Clipboard I/O on linux +========================= ================== ============================================================= + + +Compression +^^^^^^^^^^^ + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +brotli 0.7.0 Brotli compression +python-snappy 0.6.0 Snappy compression +Zstandard 0.15.2 Zstandard compression +========================= ================== ============================================================= diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst new file mode 100644 index 00000000..2dcc8b0a --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst @@ -0,0 +1,222 @@ +.. _10min_tut_01_tableoriented: + +{{ header }} + +What kind of data does pandas handle? +===================================== + +.. raw:: html + +
      +
    • + +I want to start using pandas + +.. ipython:: python + + import pandas as pd + +To load the pandas package and start working with it, import the +package. The community agreed alias for pandas is ``pd``, so loading +pandas as ``pd`` is assumed standard practice for all of the pandas +documentation. + +.. raw:: html + +
    • +
    + +pandas data table representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to store passenger data of the Titanic. For a number of passengers, I know the name (characters), age (integers) and sex (male/female) data. + +.. ipython:: python + + df = pd.DataFrame( + { + "Name": [ + "Braund, Mr. Owen Harris", + "Allen, Mr. William Henry", + "Bonnell, Miss. Elizabeth", + ], + "Age": [22, 35, 58], + "Sex": ["male", "male", "female"], + } + ) + df + +To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and +the values in each list as columns of the ``DataFrame``. + +.. raw:: html + +
    • +
    + +A :class:`DataFrame` is a 2-dimensional data structure that can store data of +different types (including characters, integers, floating point values, +categorical data and more) in columns. It is similar to a spreadsheet, a +SQL table or the ``data.frame`` in R. + +- The table has 3 columns, each of them with a column label. The column + labels are respectively ``Name``, ``Age`` and ``Sex``. +- The column ``Name`` consists of textual data with each value a + string, the column ``Age`` are numbers and the column ``Sex`` is + textual data. + +In spreadsheet software, the table representation of our data would look +very similar: + +.. image:: ../../_static/schemas/01_table_spreadsheet.png + :align: center + +Each column in a ``DataFrame`` is a ``Series`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_series.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m just interested in working with the data in the column ``Age`` + +.. ipython:: python + + df["Age"] + +When selecting a single column of a pandas :class:`DataFrame`, the result is +a pandas :class:`Series`. To select the column, use the column label in +between square brackets ``[]``. + +.. raw:: html + +
    • +
    + +.. note:: + If you are familiar to Python + :ref:`dictionaries `, the selection of a + single column is very similar to selection of dictionary values based on + the key. + +You can create a ``Series`` from scratch as well: + +.. ipython:: python + + ages = pd.Series([22, 35, 58], name="Age") + ages + +A pandas ``Series`` has no column labels, as it is just a single column +of a ``DataFrame``. A Series does have row labels. + +Do something with a DataFrame or Series +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to know the maximum Age of the passengers + +We can do this on the ``DataFrame`` by selecting the ``Age`` column and +applying ``max()``: + +.. ipython:: python + + df["Age"].max() + +Or to the ``Series``: + +.. ipython:: python + + ages.max() + +.. raw:: html + +
    • +
    + +As illustrated by the ``max()`` method, you can *do* things with a +``DataFrame`` or ``Series``. pandas provides a lot of functionalities, +each of them a *method* you can apply to a ``DataFrame`` or ``Series``. +As methods are functions, do not forget to use parentheses ``()``. + +.. raw:: html + +
      +
    • + +I’m interested in some basic statistics of the numerical data of my data table + +.. ipython:: python + + df.describe() + +The :func:`~DataFrame.describe` method provides a quick overview of the numerical data in +a ``DataFrame``. As the ``Name`` and ``Sex`` columns are textual data, +these are by default not taken into account by the :func:`~DataFrame.describe` method. + +.. raw:: html + +
    • +
    + +Many pandas operations return a ``DataFrame`` or a ``Series``. The +:func:`~DataFrame.describe` method is an example of a pandas operation returning a +pandas ``Series`` or a pandas ``DataFrame``. + +.. raw:: html + +
    + To user guide + +Check more options on ``describe`` in the user guide section about :ref:`aggregations with describe ` + +.. raw:: html + +
    + +.. note:: + This is just a starting point. Similar to spreadsheet + software, pandas represents data as a table with columns and rows. Apart + from the representation, also the data manipulations and calculations + you would do in spreadsheet software are supported by pandas. Continue + reading the next tutorials to get started! + +.. raw:: html + +
    +

    REMEMBER

    + +- Import the package, aka ``import pandas as pd`` +- A table of data is stored as a pandas ``DataFrame`` +- Each column in a ``DataFrame`` is a ``Series`` +- You can do things by applying a method to a ``DataFrame`` or ``Series`` + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A more extended explanation to ``DataFrame`` and ``Series`` is provided in the :ref:`introduction to data structures `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst new file mode 100644 index 00000000..d69a48de --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -0,0 +1,208 @@ +.. _10min_tut_02_read_write: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +.. include:: includes/titanic.rst + +.. raw:: html + +
    • +
    +
    + +How do I read and write tabular data? +===================================== + +.. image:: ../../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to analyze the Titanic passenger data, available as a CSV file. + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + +pandas provides the :func:`read_csv` function to read data stored as a csv +file into a pandas ``DataFrame``. pandas supports many different file +formats or data sources out of the box (csv, excel, sql, json, parquet, +…), each of them with the prefix ``read_*``. + +.. raw:: html + +
    • +
    + +Make sure to always have a check on the data after reading in the +data. When displaying a ``DataFrame``, the first and last 5 rows will be +shown by default: + +.. ipython:: python + + titanic + +.. raw:: html + +
      +
    • + +I want to see the first 8 rows of a pandas DataFrame. + +.. ipython:: python + + titanic.head(8) + +To see the first N rows of a ``DataFrame``, use the :meth:`~DataFrame.head` method with +the required number of rows (in this case 8) as argument. + +.. raw:: html + +
    • +
    + +.. note:: + + Interested in the last N rows instead? pandas also provides a + :meth:`~DataFrame.tail` method. For example, ``titanic.tail(10)`` will return the last + 10 rows of the DataFrame. + +A check on how pandas interpreted each of the column data types can be +done by requesting the pandas ``dtypes`` attribute: + +.. ipython:: python + + titanic.dtypes + +For each of the columns, the used data type is enlisted. The data types +in this ``DataFrame`` are integers (``int64``), floats (``float64``) and +strings (``object``). + +.. note:: + When asking for the ``dtypes``, no brackets are used! + ``dtypes`` is an attribute of a ``DataFrame`` and ``Series``. Attributes + of ``DataFrame`` or ``Series`` do not need brackets. Attributes + represent a characteristic of a ``DataFrame``/``Series``, whereas a + method (which requires brackets) *do* something with the + ``DataFrame``/``Series`` as introduced in the :ref:`first tutorial <10min_tut_01_tableoriented>`. + +.. raw:: html + +
      +
    • + +My colleague requested the Titanic data as a spreadsheet. + +.. ipython:: python + + titanic.to_excel("titanic.xlsx", sheet_name="passengers", index=False) + +Whereas ``read_*`` functions are used to read data to pandas, the +``to_*`` methods are used to store data. The :meth:`~DataFrame.to_excel` method stores +the data as an excel file. In the example here, the ``sheet_name`` is +named *passengers* instead of the default *Sheet1*. By setting +``index=False`` the row index labels are not saved in the spreadsheet. + +.. raw:: html + +
    • +
    + +The equivalent read function :meth:`~DataFrame.read_excel` will reload the data to a +``DataFrame``: + +.. ipython:: python + + titanic = pd.read_excel("titanic.xlsx", sheet_name="passengers") + +.. ipython:: python + + titanic.head() + +.. ipython:: python + :suppress: + + import os + + os.remove("titanic.xlsx") + +.. raw:: html + +
      +
    • + +I’m interested in a technical summary of a ``DataFrame`` + +.. ipython:: python + + titanic.info() + + +The method :meth:`~DataFrame.info` provides technical information about a +``DataFrame``, so let’s explain the output in more detail: + +- It is indeed a :class:`DataFrame`. +- There are 891 entries, i.e. 891 rows. +- Each row has a row label (aka the ``index``) with values ranging from + 0 to 890. +- The table has 12 columns. Most columns have a value for each of the + rows (all 891 values are ``non-null``). Some columns do have missing + values and less than 891 ``non-null`` values. +- The columns ``Name``, ``Sex``, ``Cabin`` and ``Embarked`` consists of + textual data (strings, aka ``object``). The other columns are + numerical data with some of them whole numbers (aka ``integer``) and + others are real numbers (aka ``float``). +- The kind of data (characters, integers,…) in the different columns + are summarized by listing the ``dtypes``. +- The approximate amount of RAM used to hold the DataFrame is provided + as well. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Getting data in to pandas from many different file formats or data + sources is supported by ``read_*`` functions. +- Exporting data out of pandas is provided by different + ``to_*``\ methods. +- The ``head``/``tail``/``info`` methods and the ``dtypes`` attribute + are convenient for a first check. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +For a complete overview of the input and output possibilities from and to pandas, see the user guide section about :ref:`reader and writer functions `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst new file mode 100644 index 00000000..291cbddf --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst @@ -0,0 +1,379 @@ +.. _10min_tut_03_subset: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +.. include:: includes/titanic.rst + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How do I select a subset of a ``DataFrame``? +============================================ + +How do I select specific columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the age of the Titanic passengers. + +.. ipython:: python + + ages = titanic["Age"] + ages.head() + +To select a single column, use square brackets ``[]`` with the column +name of the column of interest. + +.. raw:: html + +
    • +
    + +Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is +selected, the returned object is a pandas :class:`Series`. We can verify this +by checking the type of the output: + +.. ipython:: python + + type(titanic["Age"]) + +And have a look at the ``shape`` of the output: + +.. ipython:: python + + titanic["Age"].shape + +:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parentheses for attributes) of a +pandas ``Series`` and ``DataFrame`` containing the number of rows and +columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only +the number of rows is returned. + +.. raw:: html + +
      +
    • + +I’m interested in the age and sex of the Titanic passengers. + +.. ipython:: python + + age_sex = titanic[["Age", "Sex"]] + age_sex.head() + +To select multiple columns, use a list of column names within the +selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +.. note:: + The inner square brackets define a + :ref:`Python list ` with column names, whereas + the outer brackets are used to select the data from a pandas + ``DataFrame`` as seen in the previous example. + +The returned data type is a pandas DataFrame: + +.. ipython:: python + + type(titanic[["Age", "Sex"]]) + +.. ipython:: python + + titanic[["Age", "Sex"]].shape + +The selection returned a ``DataFrame`` with 891 rows and 2 columns. Remember, a +``DataFrame`` is 2-dimensional with both a row and column dimension. + +.. raw:: html + +
    + To user guide + +For basic information on indexing, see the user guide section on :ref:`indexing and selecting data `. + +.. raw:: html + +
    + +How do I filter specific rows from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_rows.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the passengers older than 35 years. + +.. ipython:: python + + above_35 = titanic[titanic["Age"] > 35] + above_35.head() + +To select rows based on a conditional expression, use a condition inside +the selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +The condition inside the selection +brackets ``titanic["Age"] > 35`` checks for which rows the ``Age`` +column has a value larger than 35: + +.. ipython:: python + + titanic["Age"] > 35 + +The output of the conditional expression (``>``, but also ``==``, +``!=``, ``<``, ``<=``,… would work) is actually a pandas ``Series`` of +boolean values (either ``True`` or ``False``) with the same number of +rows as the original ``DataFrame``. Such a ``Series`` of boolean values +can be used to filter the ``DataFrame`` by putting it in between the +selection brackets ``[]``. Only rows for which the value is ``True`` +will be selected. + +We know from before that the original Titanic ``DataFrame`` consists of +891 rows. Let’s have a look at the number of rows which satisfy the +condition by checking the ``shape`` attribute of the resulting +``DataFrame`` ``above_35``: + +.. ipython:: python + + above_35.shape + +.. raw:: html + +
      +
    • + +I’m interested in the Titanic passengers from cabin class 2 and 3. + +.. ipython:: python + + class_23 = titanic[titanic["Pclass"].isin([2, 3])] + class_23.head() + +Similar to the conditional expression, the :func:`~Series.isin` conditional function +returns a ``True`` for each row the values are in the provided list. To +filter the rows based on such a function, use the conditional function +inside the selection brackets ``[]``. In this case, the condition inside +the selection brackets ``titanic["Pclass"].isin([2, 3])`` checks for +which rows the ``Pclass`` column is either 2 or 3. + +.. raw:: html + +
    • +
    + +The above is equivalent to filtering by rows for which the class is +either 2 or 3 and combining the two statements with an ``|`` (or) +operator: + +.. ipython:: python + + class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)] + class_23.head() + +.. note:: + When combining multiple conditional statements, each condition + must be surrounded by parentheses ``()``. Moreover, you can not use + ``or``/``and`` but need to use the ``or`` operator ``|`` and the ``and`` + operator ``&``. + +.. raw:: html + +
    + To user guide + +See the dedicated section in the user guide about :ref:`boolean indexing ` or about the :ref:`isin function `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to work with passenger data for which the age is known. + +.. ipython:: python + + age_no_na = titanic[titanic["Age"].notna()] + age_no_na.head() + +The :meth:`~Series.notna` conditional function returns a ``True`` for each row the +values are not a ``Null`` value. As such, this can be combined with the +selection brackets ``[]`` to filter the data table. + +.. raw:: html + +
    • +
    + +You might wonder what actually changed, as the first 5 lines are still +the same values. One way to verify is to check if the shape has changed: + +.. ipython:: python + + age_no_na.shape + +.. raw:: html + +
    + To user guide + +For more dedicated functions on missing values, see the user guide section about :ref:`handling missing data `. + +.. raw:: html + +
    + +.. _10min_tut_03_subset.rows_and_columns: + +How do I select specific rows and columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the names of the passengers older than 35 years. + +.. ipython:: python + + adult_names = titanic.loc[titanic["Age"] > 35, "Name"] + adult_names.head() + +In this case, a subset of both rows and columns is made in one go and +just using selection brackets ``[]`` is not sufficient anymore. The +``loc``/``iloc`` operators are required in front of the selection +brackets ``[]``. When using ``loc``/``iloc``, the part before the comma +is the rows you want, and the part after the comma is the columns you +want to select. + +.. raw:: html + +
    • +
    + +When using the column names, row labels or a condition expression, use +the ``loc`` operator in front of the selection brackets ``[]``. For both +the part before and after the comma, you can use a single label, a list +of labels, a slice of labels, a conditional expression or a colon. Using +a colon specifies you want to select all rows or columns. + +.. raw:: html + +
      +
    • + +I’m interested in rows 10 till 25 and columns 3 to 5. + +.. ipython:: python + + titanic.iloc[9:25, 2:5] + +Again, a subset of both rows and columns is made in one go and just +using selection brackets ``[]`` is not sufficient anymore. When +specifically interested in certain rows and/or columns based on their +position in the table, use the ``iloc`` operator in front of the +selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +When selecting specific rows and/or columns with ``loc`` or ``iloc``, +new values can be assigned to the selected data. For example, to assign +the name ``anonymous`` to the first 3 elements of the third column: + +.. ipython:: python + + titanic.iloc[0:3, 3] = "anonymous" + titanic.head() + +.. raw:: html + +
    + To user guide + +See the user guide section on :ref:`different choices for indexing ` to get more insight in the usage of ``loc`` and ``iloc``. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- When selecting subsets of data, square brackets ``[]`` are used. +- Inside these brackets, you can use a single column/row label, a list + of column/row labels, a slice of labels, a conditional expression or + a colon. +- Select specific rows and/or columns using ``loc`` when using the row + and column names. +- Select specific rows and/or columns using ``iloc`` when using the + positions in the table. +- You can assign new values to a selection based on ``loc``/``iloc``. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview of indexing is provided in the user guide pages on :ref:`indexing and selecting data `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst new file mode 100644 index 00000000..b6f1ac21 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst @@ -0,0 +1,250 @@ +.. _10min_tut_04_plotting: + +{{ header }} + +How do I create plots in pandas? +---------------------------------- + +.. image:: ../../_static/schemas/04_plot_overview.svg + :align: center + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +.. include:: includes/air_quality_no2.rst + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", index_col=0, parse_dates=True) + air_quality.head() + +.. note:: + The usage of the ``index_col`` and ``parse_dates`` parameters of the ``read_csv`` function to define the first (0th) column as + index of the resulting ``DataFrame`` and convert the dates in the column to :class:`Timestamp` objects, respectively. + +.. raw:: html + +
    • +
    +
    + +.. raw:: html + +
      +
    • + +I want a quick visual check of the data. + +.. ipython:: python + + @savefig 04_airqual_quick.png + air_quality.plot() + plt.show() + +With a ``DataFrame``, pandas creates by default one line plot for each of +the columns with numeric data. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to plot only the columns of the data table with the data from Paris. + +.. ipython:: python + :suppress: + + # We need to clear the figure here as, within doc generation, the plot + # accumulates data on each plot(). This is not needed when running + # in a notebook, so is suppressed from output. + plt.clf() + +.. ipython:: python + + @savefig 04_airqual_paris.png + air_quality["station_paris"].plot() + plt.show() + +To plot a specific column, use the selection method of the +:ref:`subset data tutorial <10min_tut_03_subset>` in combination with the :meth:`~DataFrame.plot` +method. Hence, the :meth:`~DataFrame.plot` method works on both ``Series`` and +``DataFrame``. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to visually compare the :math:`NO_2` values measured in London versus Paris. + +.. ipython:: python + + @savefig 04_airqual_scatter.png + air_quality.plot.scatter(x="station_london", y="station_paris", alpha=0.5) + plt.show() + +.. raw:: html + +
    • +
    + +Apart from the default ``line`` plot when using the ``plot`` function, a +number of alternatives are available to plot data. Let’s use some +standard Python to get an overview of the available plot methods: + +.. ipython:: python + + [ + method_name + for method_name in dir(air_quality.plot) + if not method_name.startswith("_") + ] + +.. note:: + In many development environments as well as IPython and + Jupyter Notebook, use the TAB button to get an overview of the available + methods, for example ``air_quality.plot.`` + TAB. + +One of the options is :meth:`DataFrame.plot.box`, which refers to a +`boxplot `__. The ``box`` +method is applicable on the air quality example data: + +.. ipython:: python + + @savefig 04_airqual_boxplot.png + air_quality.plot.box() + plt.show() + +.. raw:: html + +
    + To user guide + +For an introduction to plots other than the default line plot, see the user guide section about :ref:`supported plot styles `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want each of the columns in a separate subplot. + +.. ipython:: python + + @savefig 04_airqual_area_subplot.png + axs = air_quality.plot.area(figsize=(12, 4), subplots=True) + plt.show() + +Separate subplots for each of the data columns are supported by the ``subplots`` argument +of the ``plot`` functions. The builtin options available in each of the pandas plot +functions are worth reviewing. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +Some more formatting options are explained in the user guide section on :ref:`plot formatting `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to further customize, extend or save the resulting plot. + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)) + air_quality.plot.area(ax=axs) + axs.set_ylabel("NO$_2$ concentration") + @savefig 04_airqual_customized.png + fig.savefig("no2_concentrations.png") + plt.show() + +.. ipython:: python + :suppress: + + import os + + os.remove("no2_concentrations.png") + +.. raw:: html + +
    • +
    + +Each of the plot objects created by pandas is a +`Matplotlib `__ object. As Matplotlib provides +plenty of options to customize plots, making the link between pandas and +Matplotlib explicit enables all the power of Matplotlib to the plot. +This strategy is applied in the previous example: + +:: + + fig, axs = plt.subplots(figsize=(12, 4)) # Create an empty Matplotlib Figure and Axes + air_quality.plot.area(ax=axs) # Use pandas to put the area plot on the prepared Figure/Axes + axs.set_ylabel("NO$_2$ concentration") # Do any Matplotlib customization you like + fig.savefig("no2_concentrations.png") # Save the Figure/Axes using the existing Matplotlib method. + plt.show() # Display the plot + +.. raw:: html + +
    +

    REMEMBER

    + +- The ``.plot.*`` methods are applicable on both Series and DataFrames. +- By default, each of the columns is plotted as a different element + (line, boxplot,…). +- Any plot created by pandas is a Matplotlib object. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview of plotting in pandas is provided in the :ref:`visualization pages `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst new file mode 100644 index 00000000..6cf7c0ea --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst @@ -0,0 +1,173 @@ +.. _10min_tut_05_columns: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +.. include:: includes/air_quality_no2.rst + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", index_col=0, parse_dates=True) + air_quality.head() + +.. raw:: html + +
    • +
    +
    + +How to create new columns derived from existing columns? +-------------------------------------------------------- + +.. image:: ../../_static/schemas/05_newcolumn_1.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3`. + +(*If we assume temperature of 25 degrees Celsius and pressure of 1013 +hPa, the conversion factor is 1.882*) + +.. ipython:: python + + air_quality["london_mg_per_cubic"] = air_quality["station_london"] * 1.882 + air_quality.head() + +To create a new column, use the ``[]`` brackets with the new column name +at the left side of the assignment. + +.. raw:: html + +
    • +
    + +.. note:: + The calculation of the values is done **element-wise**. This + means all values in the given column are multiplied by the value 1.882 + at once. You do not need to use a loop to iterate each of the rows! + +.. image:: ../../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column. + +.. ipython:: python + + air_quality["ratio_paris_antwerp"] = ( + air_quality["station_paris"] / air_quality["station_antwerp"] + ) + air_quality.head() + +The calculation is again element-wise, so the ``/`` is applied *for the +values in each row*. + +.. raw:: html + +
    • +
    + +Also other mathematical operators (``+``, ``-``, ``*``, ``/``,…) or +logical operators (``<``, ``>``, ``==``,…) work element-wise. The latter was already +used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter +rows of a table using a conditional expression. + +If you need more advanced logic, you can use arbitrary Python code via :meth:`~DataFrame.apply`. + +.. raw:: html + +
      +
    • + +I want to rename the data columns to the corresponding station identifiers used by `OpenAQ `__. + +.. ipython:: python + + air_quality_renamed = air_quality.rename( + columns={ + "station_antwerp": "BETR801", + "station_paris": "FR04014", + "station_london": "London Westminster", + } + ) + +.. ipython:: python + + air_quality_renamed.head() + +The :meth:`~DataFrame.rename` function can be used for both row labels and column +labels. Provide a dictionary with the keys the current names and the +values the new names to update the corresponding names. + +.. raw:: html + +
    • +
    + +The mapping should not be restricted to fixed names only, but can be a +mapping function as well. For example, converting the column names to +lowercase letters can be done using a function as well: + +.. ipython:: python + + air_quality_renamed = air_quality_renamed.rename(columns=str.lower) + air_quality_renamed.head() + +.. raw:: html + +
    + To user guide + +Details about column or row label renaming is provided in the user guide section on :ref:`renaming labels `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Create a new column by assigning the output to the DataFrame with a + new column name in between the ``[]``. +- Operations are element-wise, no need to loop over rows. +- Use ``rename`` with a dictionary or function to rename row labels or + column names. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +The user guide contains a separate section on :ref:`column addition and deletion `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst new file mode 100644 index 00000000..9e596858 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -0,0 +1,286 @@ +.. _10min_tut_06_stats: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +.. include:: includes/titanic.rst + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How to calculate summary statistics? +------------------------------------ + +Aggregating statistics +~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_aggregate.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the average age of the Titanic passengers? + +.. ipython:: python + + titanic["Age"].mean() + +.. raw:: html + +
    • +
    + +Different statistics are available and can be applied to columns with +numerical data. Operations in general exclude missing data and operate +across rows by default. + +.. image:: ../../_static/schemas/06_reduction.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the median age and ticket fare price of the Titanic passengers? + +.. ipython:: python + + titanic[["Age", "Fare"]].median() + +The statistic applied to multiple columns of a ``DataFrame`` (the selection of two columns +returns a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column. + +.. raw:: html + +
    • +
    + +The aggregating statistic can be calculated for multiple columns at the +same time. Remember the ``describe`` function from the :ref:`first tutorial <10min_tut_01_tableoriented>`? + +.. ipython:: python + + titanic[["Age", "Fare"]].describe() + +Instead of the predefined statistics, specific combinations of +aggregating statistics for given columns can be defined using the +:func:`DataFrame.agg` method: + +.. ipython:: python + + titanic.agg( + { + "Age": ["min", "max", "median", "skew"], + "Fare": ["min", "max", "median", "mean"], + } + ) + +.. raw:: html + +
    + To user guide + +Details about descriptive statistics are provided in the user guide section on :ref:`descriptive statistics `. + +.. raw:: html + +
    + + +Aggregating statistics grouped by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the average age for male versus female Titanic passengers? + +.. ipython:: python + + titanic[["Sex", "Age"]].groupby("Sex").mean() + +As our interest is the average age for each gender, a subselection on +these two columns is made first: ``titanic[["Sex", "Age"]]``. Next, the +:meth:`~DataFrame.groupby` method is applied on the ``Sex`` column to make a group per +category. The average age *for each gender* is calculated and +returned. + +.. raw:: html + +
    • +
    + +Calculating a given statistic (e.g. ``mean`` age) *for each category in +a column* (e.g. male/female in the ``Sex`` column) is a common pattern. +The ``groupby`` method is used to support this type of operations. This +fits in the more general ``split-apply-combine`` pattern: + +- **Split** the data into groups +- **Apply** a function to each group independently +- **Combine** the results into a data structure + +The apply and combine steps are typically done together in pandas. + +In the previous example, we explicitly selected the 2 columns first. If +not, the ``mean`` method is applied to each column containing numerical +columns by passing ``numeric_only=True``: + +.. ipython:: python + + titanic.groupby("Sex").mean(numeric_only=True) + +It does not make much sense to get the average value of the ``Pclass``. +If we are only interested in the average age for each gender, the +selection of columns (rectangular brackets ``[]`` as usual) is supported +on the grouped data as well: + +.. ipython:: python + + titanic.groupby("Sex")["Age"].mean() + +.. image:: ../../_static/schemas/06_groupby_select_detail.svg + :align: center + +.. note:: + The ``Pclass`` column contains numerical data but actually + represents 3 categories (or factors) with respectively the labels ‘1’, + ‘2’ and ‘3’. Calculating statistics on these does not make much sense. + Therefore, pandas provides a ``Categorical`` data type to handle this + type of data. More information is provided in the user guide + :ref:`categorical` section. + +.. raw:: html + +
      +
    • + +What is the mean ticket fare price for each of the sex and cabin class combinations? + +.. ipython:: python + + titanic.groupby(["Sex", "Pclass"])["Fare"].mean() + +Grouping can be done by multiple columns at the same time. Provide the +column names as a list to the :meth:`~DataFrame.groupby` method. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +A full description on the split-apply-combine approach is provided in the user guide section on :ref:`groupby operations `. + +.. raw:: html + +
    + +Count number of records by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_valuecounts.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the number of passengers in each of the cabin classes? + +.. ipython:: python + + titanic["Pclass"].value_counts() + +The :meth:`~Series.value_counts` method counts the number of records for each +category in a column. + +.. raw:: html + +
    • +
    + +The function is a shortcut, as it is actually a groupby operation in combination with counting of the number of records +within each group: + +.. ipython:: python + + titanic.groupby("Pclass")["Pclass"].count() + +.. note:: + Both ``size`` and ``count`` can be used in combination with + ``groupby``. Whereas ``size`` includes ``NaN`` values and just provides + the number of rows (size of the table), ``count`` excludes the missing + values. In the ``value_counts`` method, use the ``dropna`` argument to + include or exclude the ``NaN`` values. + +.. raw:: html + +
    + To user guide + +The user guide has a dedicated section on ``value_counts`` , see the page on :ref:`discretization `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Aggregation statistics can be calculated on entire columns or rows. +- ``groupby`` provides the power of the *split-apply-combine* pattern. +- ``value_counts`` is a convenient shortcut to count the number of + entries in each category of a variable. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full description on the split-apply-combine approach is provided in the user guide pages about :ref:`groupby operations `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst new file mode 100644 index 00000000..27d6f959 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -0,0 +1,382 @@ +.. _10min_tut_07_reshape: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
    +
    + +How to reshape the layout of tables? +------------------------------------ + +Sort table rows +~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to sort the Titanic data according to the age of the passengers. + +.. ipython:: python + + titanic.sort_values(by="Age").head() + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to sort the Titanic data according to the cabin class and age in descending order. + +.. ipython:: python + + titanic.sort_values(by=['Pclass', 'Age'], ascending=False).head() + +With :meth:`DataFrame.sort_values`, the rows in the table are sorted according to the +defined column(s). The index will follow the row order. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More details about sorting of tables is provided in the user guide section on :ref:`sorting data `. + +.. raw:: html + +
    + +Long to wide table format +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Let’s use a small subset of the air quality data set. We focus on +:math:`NO_2` data and only use the first two measurements of each +location (i.e. the head of each group). The subset of data will be +called ``no2_subset``. + +.. ipython:: python + + # filter for no2 data only + no2 = air_quality[air_quality["parameter"] == "no2"] + +.. ipython:: python + + # use 2 measurements (head) for each location (groupby) + no2_subset = no2.sort_index().groupby(["location"]).head(2) + no2_subset + +.. image:: ../../_static/schemas/07_pivot.svg + :align: center + +.. raw:: html + +
      +
    • + +I want the values for the three stations as separate columns next to each other. + +.. ipython:: python + + no2_subset.pivot(columns="location", values="value") + +The :meth:`~pandas.pivot` function is purely reshaping of the data: a single value +for each index/column combination is required. + +.. raw:: html + +
    • +
    + +As pandas supports plotting of multiple columns (see :ref:`plotting tutorial <10min_tut_04_plotting>`) out of the box, the conversion from +*long* to *wide* table format enables the plotting of the different time +series at the same time: + +.. ipython:: python + + no2.head() + +.. ipython:: python + + @savefig 7_reshape_columns.png + no2.pivot(columns="location", values="value").plot() + +.. note:: + When the ``index`` parameter is not defined, the existing + index (row labels) is used. + +.. raw:: html + +
    + To user guide + +For more information about :meth:`~DataFrame.pivot`, see the user guide section on :ref:`pivoting DataFrame objects `. + +.. raw:: html + +
    + +Pivot table +~~~~~~~~~~~ + +.. image:: ../../_static/schemas/07_pivot_table.svg + :align: center + +.. raw:: html + +
      +
    • + +I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of the stations in table form. + +.. ipython:: python + + air_quality.pivot_table( + values="value", index="location", columns="parameter", aggfunc="mean" + ) + +In the case of :meth:`~DataFrame.pivot`, the data is only rearranged. When multiple +values need to be aggregated (in this specific case, the values on +different time steps), :meth:`~DataFrame.pivot_table` can be used, providing an +aggregation function (e.g. mean) on how to combine these values. + +.. raw:: html + +
    • +
    + +Pivot table is a well known concept in spreadsheet software. When +interested in the row/column margins (subtotals) for each variable, set +the ``margins`` parameter to ``True``: + +.. ipython:: python + + air_quality.pivot_table( + values="value", + index="location", + columns="parameter", + aggfunc="mean", + margins=True, + ) + +.. raw:: html + +
    + To user guide + +For more information about :meth:`~DataFrame.pivot_table`, see the user guide section on :ref:`pivot tables `. + +.. raw:: html + +
    + +.. note:: + In case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked + to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both + ``parameter`` and ``location``: + + :: + + air_quality.groupby(["parameter", "location"]).mean() + +.. raw:: html + +
    + To user guide + +Have a look at :meth:`~DataFrame.groupby` in combination with :meth:`~DataFrame.unstack` at the user guide section on :ref:`combining stats and groupby `. + +.. raw:: html + +
    + +Wide to long format +~~~~~~~~~~~~~~~~~~~ + +Starting again from the wide format table created in the previous +section, we add a new index to the ``DataFrame`` with :meth:`~DataFrame.reset_index`. + +.. ipython:: python + + no2_pivoted = no2.pivot(columns="location", values="value").reset_index() + no2_pivoted.head() + +.. image:: ../../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to collect all air quality :math:`NO_2` measurements in a single column (long format). + +.. ipython:: python + + no_2 = no2_pivoted.melt(id_vars="date.utc") + no_2.head() + +The :func:`pandas.melt` method on a ``DataFrame`` converts the data table from wide +format to long format. The column headers become the variable names in a +newly created column. + +.. raw:: html + +
    • +
    + +The solution is the short version on how to apply :func:`pandas.melt`. The method +will *melt* all columns NOT mentioned in ``id_vars`` together into two +columns: A column with the column header names and a column with the +values itself. The latter column gets by default the name ``value``. + +The parameters passed to :func:`pandas.melt` can be defined in more detail: + +.. ipython:: python + + no_2 = no2_pivoted.melt( + id_vars="date.utc", + value_vars=["BETR801", "FR04014", "London Westminster"], + value_name="NO_2", + var_name="id_location", + ) + no_2.head() + +The additional parameters have the following effects: + +- ``value_vars`` defines which columns to *melt* together +- ``value_name`` provides a custom column name for the values column + instead of the default column name ``value`` +- ``var_name`` provides a custom column name for the column collecting + the column header names. Otherwise it takes the index name or a + default ``variable`` + +Hence, the arguments ``value_name`` and ``var_name`` are just +user-defined names for the two generated columns. The columns to melt +are defined by ``id_vars`` and ``value_vars``. + +.. raw:: html + +
    + To user guide + +Conversion from wide to long format with :func:`pandas.melt` is explained in the user guide section on :ref:`reshaping by melt `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Sorting by one or more columns is supported by ``sort_values``. +- The ``pivot`` function is purely restructuring of the data, + ``pivot_table`` supports aggregations. +- The reverse of ``pivot`` (long to wide format) is ``melt`` (wide to + long format). + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview is available in the user guide on the pages about :ref:`reshaping and pivoting `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst new file mode 100644 index 00000000..b2b38910 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -0,0 +1,321 @@ +.. _10min_tut_08_combine: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` is used, made available by +`OpenAQ `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_no2_long.csv`` data set provides :math:`NO_2` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv", + parse_dates=True) + air_quality_no2 = air_quality_no2[["date.utc", "location", + "parameter", "value"]] + air_quality_no2.head() + +.. raw:: html + +
    • +
    • + +
      +
      +

      + +For this tutorial, air quality data about Particulate +matter less than 2.5 micrometers is used, made available by +`OpenAQ `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_pm25_long.csv`` data set provides :math:`PM_{25}` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv", + parse_dates=True) + air_quality_pm25 = air_quality_pm25[["date.utc", "location", + "parameter", "value"]] + air_quality_pm25.head() + +.. raw:: html + +
    • +
    +
    + + +How to combine data from multiple tables? +----------------------------------------- + +Concatenating objects +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to combine the measurements of :math:`NO_2` and :math:`PM_{25}`, two tables with a similar structure, in a single table. + +.. ipython:: python + + air_quality = pd.concat([air_quality_pm25, air_quality_no2], axis=0) + air_quality.head() + +The :func:`~pandas.concat` function performs concatenation operations of multiple +tables along one of the axes (row-wise or column-wise). + +.. raw:: html + +
    • +
    + +By default concatenation is along axis 0, so the resulting table combines the rows +of the input tables. Let’s check the shape of the original and the +concatenated tables to verify the operation: + +.. ipython:: python + + print('Shape of the ``air_quality_pm25`` table: ', air_quality_pm25.shape) + print('Shape of the ``air_quality_no2`` table: ', air_quality_no2.shape) + print('Shape of the resulting ``air_quality`` table: ', air_quality.shape) + +Hence, the resulting table has 3178 = 1110 + 2068 rows. + +.. note:: + The **axis** argument will return in a number of pandas + methods that can be applied **along an axis**. A ``DataFrame`` has two + corresponding axes: the first running vertically downwards across rows + (axis 0), and the second running horizontally across columns (axis 1). + Most operations like concatenation or summary statistics are by default + across rows (axis 0), but can be applied across columns as well. + +Sorting the table on the datetime information illustrates also the +combination of both tables, with the ``parameter`` column defining the +origin of the table (either ``no2`` from table ``air_quality_no2`` or +``pm25`` from table ``air_quality_pm25``): + +.. ipython:: python + + air_quality = air_quality.sort_values("date.utc") + air_quality.head() + +In this specific example, the ``parameter`` column provided by the data +ensures that each of the original tables can be identified. This is not +always the case. The ``concat`` function provides a convenient solution +with the ``keys`` argument, adding an additional (hierarchical) row +index. For example: + +.. ipython:: python + + air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], keys=["PM25", "NO2"]) + air_quality_.head() + +.. note:: + The existence of multiple row/column indices at the same time + has not been mentioned within these tutorials. *Hierarchical indexing* + or *MultiIndex* is an advanced and powerful pandas feature to analyze + higher dimensional data. + + Multi-indexing is out of scope for this pandas introduction. For the + moment, remember that the function ``reset_index`` can be used to + convert any level of an index to a column, e.g. + ``air_quality.reset_index(level=0)`` + + .. raw:: html + +
    + To user guide + + Feel free to dive into the world of multi-indexing at the user guide section on :ref:`advanced indexing `. + + .. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +More options on table concatenation (row and column +wise) and how ``concat`` can be used to define the logic (union or +intersection) of the indexes on the other axes is provided at the section on +:ref:`object concatenation `. + +.. raw:: html + +
    + +Join tables using a common identifier +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_merge_left.svg + :align: center + +.. raw:: html + +
      +
    • + +Add the station coordinates, provided by the stations metadata table, to the corresponding rows in the measurements table. + +.. warning:: + The air quality measurement station coordinates are stored in a data + file ``air_quality_stations.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + stations_coord = pd.read_csv("data/air_quality_stations.csv") + stations_coord.head() + +.. note:: + The stations used in this example (FR04014, BETR801 and London + Westminster) are just three entries enlisted in the metadata table. We + only want to add the coordinates of these three to the measurements + table, each on the corresponding rows of the ``air_quality`` table. + +.. ipython:: python + + air_quality.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, stations_coord, how="left", on="location") + air_quality.head() + +Using the :meth:`~pandas.merge` function, for each of the rows in the +``air_quality`` table, the corresponding coordinates are added from the +``air_quality_stations_coord`` table. Both tables have the column +``location`` in common which is used as a key to combine the +information. By choosing the ``left`` join, only the locations available +in the ``air_quality`` (left) table, i.e. FR04014, BETR801 and London +Westminster, end up in the resulting table. The ``merge`` function +supports multiple join options similar to database-style operations. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +Add the parameters' full description and name, provided by the parameters metadata table, to the measurements table. + +.. warning:: + The air quality parameters metadata are stored in a data file + ``air_quality_parameters.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + air_quality_parameters = pd.read_csv("data/air_quality_parameters.csv") + air_quality_parameters.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, air_quality_parameters, + how='left', left_on='parameter', right_on='id') + air_quality.head() + +Compared to the previous example, there is no common column name. +However, the ``parameter`` column in the ``air_quality`` table and the +``id`` column in the ``air_quality_parameters_name`` both provide the +measured variable in a common format. The ``left_on`` and ``right_on`` +arguments are used here (instead of just ``on``) to make the link +between the two tables. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +pandas supports also inner, outer, and right joins. +More information on join/merge of tables is provided in the user guide section on +:ref:`database style merging of tables `. Or have a look at the +:ref:`comparison with SQL` page. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Multiple tables can be concatenated both column-wise and row-wise using + the ``concat`` function. +- For database-like merging/joining of tables, use the ``merge`` + function. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +See the user guide for a full description of the various :ref:`facilities to combine data tables `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst new file mode 100644 index 00000000..9d3bc805 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -0,0 +1,390 @@ +.. _10min_tut_09_timeseries: + +{{ header }} + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` and Particulate +matter less than 2.5 micrometers is used, made available by +`OpenAQ `__ and downloaded using the +`py-openaq `__ package. +The ``air_quality_no2_long.csv"`` data set provides :math:`NO_2` values +for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2_long.csv") + air_quality = air_quality.rename(columns={"date.utc": "datetime"}) + air_quality.head() + +.. ipython:: python + + air_quality.city.unique() + +.. raw:: html + +
    • +
    +
    + +How to handle time series data with ease? +----------------------------------------- + +.. _10min_tut_09_timeseries.properties: + +Using pandas datetime properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to work with the dates in the column ``datetime`` as datetime objects instead of plain text + +.. ipython:: python + + air_quality["datetime"] = pd.to_datetime(air_quality["datetime"]) + air_quality["datetime"] + +Initially, the values in ``datetime`` are character strings and do not +provide any datetime operations (e.g. extract the year, day of the +week,…). By applying the ``to_datetime`` function, pandas interprets the +strings and convert these to datetime (i.e. ``datetime64[ns, UTC]``) +objects. In pandas we call these datetime objects similar to +``datetime.datetime`` from the standard library as :class:`pandas.Timestamp`. + +.. raw:: html + +
    • +
    + +.. note:: + As many data sets do contain datetime information in one of + the columns, pandas input function like :func:`pandas.read_csv` and :func:`pandas.read_json` + can do the transformation to dates when reading the data using the + ``parse_dates`` parameter with a list of the columns to read as + Timestamp: + + :: + + pd.read_csv("../data/air_quality_no2_long.csv", parse_dates=["datetime"]) + +Why are these :class:`pandas.Timestamp` objects useful? Let’s illustrate the added +value with some example cases. + + What is the start and end date of the time series data set we are working + with? + +.. ipython:: python + + air_quality["datetime"].min(), air_quality["datetime"].max() + +Using :class:`pandas.Timestamp` for datetimes enables us to calculate with date +information and make them comparable. Hence, we can use this to get the +length of our time series: + +.. ipython:: python + + air_quality["datetime"].max() - air_quality["datetime"].min() + +The result is a :class:`pandas.Timedelta` object, similar to ``datetime.timedelta`` +from the standard Python library and defining a time duration. + +.. raw:: html + +
    + To user guide + +The various time concepts supported by pandas are explained in the user guide section on :ref:`time related concepts `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to add a new column to the ``DataFrame`` containing only the month of the measurement + +.. ipython:: python + + air_quality["month"] = air_quality["datetime"].dt.month + air_quality.head() + +By using ``Timestamp`` objects for dates, a lot of time-related +properties are provided by pandas. For example the ``month``, but also +``year``, ``weekofyear``, ``quarter``,… All of these properties are +accessible by the ``dt`` accessor. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +An overview of the existing date properties is given in the +:ref:`time and date components overview table `. More details about the ``dt`` accessor +to return datetime like properties are explained in a dedicated section on the :ref:`dt accessor `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +What is the average :math:`NO_2` concentration for each day of the week for each of the measurement locations? + +.. ipython:: python + + air_quality.groupby( + [air_quality["datetime"].dt.weekday, "location"])["value"].mean() + +Remember the split-apply-combine pattern provided by ``groupby`` from the +:ref:`tutorial on statistics calculation <10min_tut_06_stats>`? +Here, we want to calculate a given statistic (e.g. mean :math:`NO_2`) +**for each weekday** and **for each measurement location**. To group on +weekdays, we use the datetime property ``weekday`` (with Monday=0 and +Sunday=6) of pandas ``Timestamp``, which is also accessible by the +``dt`` accessor. The grouping on both locations and weekdays can be done +to split the calculation of the mean on each of these combinations. + +.. danger:: + As we are working with a very short time series in these + examples, the analysis does not provide a long-term representative + result! + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +Plot the typical :math:`NO_2` pattern during the day of our time series of all stations together. In other words, what is the average value for each hour of the day? + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)) + air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot( + kind='bar', rot=0, ax=axs + ) + plt.xlabel("Hour of the day"); # custom x label using Matplotlib + @savefig 09_bar_chart.png + plt.ylabel("$NO_2 (µg/m^3)$"); + +Similar to the previous case, we want to calculate a given statistic +(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the +split-apply-combine approach again. For this case, we use the datetime property ``hour`` +of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor. + +.. raw:: html + +
    • +
    + +Datetime as index +~~~~~~~~~~~~~~~~~ + +In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`, +:meth:`~pandas.pivot` was introduced to reshape the data table with each of the +measurements locations as a separate column: + +.. ipython:: python + + no_2 = air_quality.pivot(index="datetime", columns="location", values="value") + no_2.head() + +.. note:: + By pivoting the data, the datetime information became the + index of the table. In general, setting a column as an index can be + achieved by the ``set_index`` function. + +Working with a datetime index (i.e. ``DatetimeIndex``) provides powerful +functionalities. For example, we do not need the ``dt`` accessor to get +the time series properties, but have these properties available on the +index directly: + +.. ipython:: python + + no_2.index.year, no_2.index.weekday + +Some other advantages are the convenient subsetting of time period or +the adapted time scale on plots. Let’s apply this on our data. + +.. raw:: html + +
      +
    • + +Create a plot of the :math:`NO_2` values in the different stations from the 20th of May till the end of 21st of May + +.. ipython:: python + :okwarning: + + @savefig 09_time_section.png + no_2["2019-05-20":"2019-05-21"].plot(); + +By providing a **string that parses to a datetime**, a specific subset of the data can be selected on a ``DatetimeIndex``. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More information on the ``DatetimeIndex`` and the slicing by using strings is provided in the section on :ref:`time series indexing `. + +.. raw:: html + +
    + +Resample a time series to another frequency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +Aggregate the current hourly time series values to the monthly maximum value in each of the stations. + +.. ipython:: python + + monthly_max = no_2.resample("M").max() + monthly_max + +A very powerful method on time series data with a datetime index, is the +ability to :meth:`~Series.resample` time series to another frequency (e.g., +converting secondly data into 5-minutely data). + +.. raw:: html + +
    • +
    + +The :meth:`~Series.resample` method is similar to a groupby operation: + +- it provides a time-based grouping, by using a string (e.g. ``M``, + ``5H``,…) that defines the target frequency +- it requires an aggregation function such as ``mean``, ``max``,… + +.. raw:: html + +
    + To user guide + +An overview of the aliases used to define time series frequencies is given in the :ref:`offset aliases overview table `. + +.. raw:: html + +
    + +When defined, the frequency of the time series is provided by the +``freq`` attribute: + +.. ipython:: python + + monthly_max.index.freq + +.. raw:: html + +
      +
    • + +Make a plot of the daily mean :math:`NO_2` value in each of the stations. + +.. ipython:: python + :okwarning: + + @savefig 09_resample_mean.png + no_2.resample("D").mean().plot(style="-o", figsize=(10, 5)); + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More details on the power of time series ``resampling`` is provided in the user guide section on :ref:`resampling `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Valid date strings can be converted to datetime objects using + ``to_datetime`` function or as part of read functions. +- Datetime objects in pandas support calculations, logical operations + and convenient date-related properties using the ``dt`` accessor. +- A ``DatetimeIndex`` contains these date-related properties and + supports convenient slicing. +- ``Resample`` is a powerful method to change the frequency of a time + series. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview on time series is given on the pages on :ref:`time series and date functionality `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst new file mode 100644 index 00000000..148ac246 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -0,0 +1,248 @@ +.. _10min_tut_10_text: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • +.. include:: includes/titanic.rst + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How to manipulate textual data? +------------------------------- + +.. raw:: html + +
      +
    • + +Make all name characters lowercase. + +.. ipython:: python + + titanic["Name"].str.lower() + +To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column +(see the :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and +apply the ``lower`` method. As such, each of the strings is converted element-wise. + +.. raw:: html + +
    • +
    + +Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>` +having a ``dt`` accessor, a number of +specialized string methods are available when using the ``str`` +accessor. These methods have in general matching names with the +equivalent built-in string methods for single elements, but are applied +element-wise (remember :ref:`element-wise calculations <10min_tut_05_columns>`?) +on each of the values of the columns. + +.. raw:: html + +
      +
    • + +Create a new column ``Surname`` that contains the surname of the passengers by extracting the part before the comma. + +.. ipython:: python + + titanic["Name"].str.split(",") + +Using the :meth:`Series.str.split` method, each of the values is returned as a list of +2 elements. The first element is the part before the comma and the +second element is the part after the comma. + +.. ipython:: python + + titanic["Surname"] = titanic["Name"].str.split(",").str.get(0) + titanic["Surname"] + +As we are only interested in the first part representing the surname +(element 0), we can again use the ``str`` accessor and apply :meth:`Series.str.get` to +extract the relevant part. Indeed, these string functions can be +concatenated to combine multiple functions at once! + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`splitting and replacing strings `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +Extract the passenger data about the countesses on board of the Titanic. + +.. ipython:: python + + titanic["Name"].str.contains("Countess") + +.. ipython:: python + + titanic[titanic["Name"].str.contains("Countess")] + +(*Interested in her story? See* `Wikipedia `__\ *!*) + +The string method :meth:`Series.str.contains` checks for each of the values in the +column ``Name`` if the string contains the word ``Countess`` and returns +for each of the values ``True`` (``Countess`` is part of the name) or +``False`` (``Countess`` is not part of the name). This output can be used +to subselect the data using conditional (boolean) indexing introduced in +the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was +only one countess on the Titanic, we get one row as a result. + +.. raw:: html + +
    • +
    + +.. note:: + More powerful extractions on strings are supported, as the + :meth:`Series.str.contains` and :meth:`Series.str.extract` methods accept `regular + expressions `__, but out of + scope of this tutorial. + +.. raw:: html + +
    + To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`string matching and extracting `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +Which passenger of the Titanic has the longest name? + +.. ipython:: python + + titanic["Name"].str.len() + +To get the longest name we first have to get the lengths of each of the +names in the ``Name`` column. By using pandas string methods, the +:meth:`Series.str.len` function is applied to each of the names individually +(element-wise). + +.. ipython:: python + + titanic["Name"].str.len().idxmax() + +Next, we need to get the corresponding location, preferably the index +label, in the table for which the name length is the largest. The +:meth:`~Series.idxmax` method does exactly that. It is not a string method and is +applied to integers, so no ``str`` is used. + +.. ipython:: python + + titanic.loc[titanic["Name"].str.len().idxmax(), "Name"] + +Based on the index name of the row (``307``) and the column (``Name``), +we can do a selection using the ``loc`` operator, introduced in the +:ref:`tutorial on subsetting <10min_tut_03_subset>`. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +In the "Sex" column, replace values of "male" by "M" and values of "female" by "F". + +.. ipython:: python + + titanic["Sex_short"] = titanic["Sex"].replace({"male": "M", "female": "F"}) + titanic["Sex_short"] + +Whereas :meth:`~Series.replace` is not a string method, it provides a convenient way +to use mappings or vocabularies to translate certain values. It requires +a ``dictionary`` to define the mapping ``{from : to}``. + +.. raw:: html + +
    • +
    + +.. warning:: + There is also a :meth:`~Series.str.replace` method available to replace a + specific set of characters. However, when having a mapping of multiple + values, this would become: + + :: + + titanic["Sex_short"] = titanic["Sex"].str.replace("female", "F") + titanic["Sex_short"] = titanic["Sex_short"].str.replace("male", "M") + + This would become cumbersome and easily lead to mistakes. Just think (or + try out yourself) what would happen if those two statements are applied + in the opposite order… + +.. raw:: html + +
    +

    REMEMBER

    + +- String methods are available using the ``str`` accessor. +- String methods work element-wise and can be used for conditional + indexing. +- The ``replace`` method is a convenient method to convert values + according to a given dictionary. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview is provided in the user guide pages on :ref:`working with text data `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/includes/air_quality_no2.rst b/doc/source/getting_started/intro_tutorials/includes/air_quality_no2.rst new file mode 100644 index 00000000..43790bd5 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/includes/air_quality_no2.rst @@ -0,0 +1,22 @@ +.. raw:: html + + +
    +
    +

    + +For this tutorial, air quality data about :math:`NO_2` is used, made +available by `OpenAQ `__ and using the +`py-openaq `__ package. +The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for +the measurement stations *FR04014*, *BETR801* and *London Westminster* +in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    diff --git a/doc/source/getting_started/intro_tutorials/includes/titanic.rst b/doc/source/getting_started/intro_tutorials/includes/titanic.rst new file mode 100644 index 00000000..19b8e819 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/includes/titanic.rst @@ -0,0 +1,31 @@ +.. raw:: html + + +
    +
    +

    + +This tutorial uses the Titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: Indication whether passenger survived. ``0`` for yes and ``1`` for no. +- Pclass: One out of the 3 ticket classes: Class ``1``, Class ``2`` and Class ``3``. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger in years. +- SibSp: Number of siblings or spouses aboard. +- Parch: Number of parents or children aboard. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: Cabin number of passenger. +- Embarked: Port of embarkation. + +.. raw:: html + +

    + To raw data +
    +
    diff --git a/doc/source/getting_started/intro_tutorials/index.rst b/doc/source/getting_started/intro_tutorials/index.rst new file mode 100644 index 00000000..c67e1804 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/index.rst @@ -0,0 +1,21 @@ +{{ header }} + +.. _10times1minute: + +========================= +Getting started tutorials +========================= + +.. toctree:: + :maxdepth: 1 + + 01_table_oriented + 02_read_write + 03_subset_data + 04_plotting + 05_add_columns + 06_calculate_statistics + 07_reshape_table_layout + 08_combine_dataframes + 09_timeseries + 10_text_data diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst new file mode 100644 index 00000000..320d2da0 --- /dev/null +++ b/doc/source/getting_started/overview.rst @@ -0,0 +1,176 @@ +.. _overview: + +{{ header }} + +**************** +Package overview +**************** + +pandas is a `Python `__ package providing fast, +flexible, and expressive data structures designed to make working with +"relational" or "labeled" data both easy and intuitive. It aims to be the +fundamental high-level building block for doing practical, **real-world** data +analysis in Python. Additionally, it has the broader goal of becoming **the +most powerful and flexible open source data analysis/manipulation tool +available in any language**. It is already well on its way toward this goal. + +pandas is well suited for many different kinds of data: + + - Tabular data with heterogeneously-typed columns, as in an SQL table or + Excel spreadsheet + - Ordered and unordered (not necessarily fixed-frequency) time series data. + - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and + column labels + - Any other form of observational / statistical data sets. The data + need not be labeled at all to be placed into a pandas data structure + +The two primary data structures of pandas, :class:`Series` (1-dimensional) +and :class:`DataFrame` (2-dimensional), handle the vast majority of typical use +cases in finance, statistics, social science, and many areas of +engineering. For R users, :class:`DataFrame` provides everything that R's +``data.frame`` provides and much more. pandas is built on top of `NumPy +`__ and is intended to integrate well within a scientific +computing environment with many other 3rd party libraries. + +Here are just a few of the things that pandas does well: + + - Easy handling of **missing data** (represented as NaN) in floating point as + well as non-floating point data + - Size mutability: columns can be **inserted and deleted** from DataFrame and + higher dimensional objects + - Automatic and explicit **data alignment**: objects can be explicitly + aligned to a set of labels, or the user can simply ignore the labels and + let ``Series``, ``DataFrame``, etc. automatically align the data for you in + computations + - Powerful, flexible **group by** functionality to perform + split-apply-combine operations on data sets, for both aggregating and + transforming data + - Make it **easy to convert** ragged, differently-indexed data in other + Python and NumPy data structures into DataFrame objects + - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets + - Intuitive **merging** and **joining** data sets + - Flexible **reshaping** and pivoting of data sets + - **Hierarchical** labeling of axes (possible to have multiple labels per + tick) + - Robust IO tools for loading data from **flat files** (CSV and delimited), + Excel files, databases, and saving / loading data from the ultrafast **HDF5 + format** + - **Time series**-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting, and lagging. + +Many of these principles are here to address the shortcomings frequently +experienced using other languages / scientific research environments. For data +scientists, working with data is typically divided into multiple stages: +munging and cleaning data, analyzing / modeling it, then organizing the results +of the analysis into a form suitable for plotting or tabular display. pandas +is the ideal tool for all of these tasks. + +Some other notes + + - pandas is **fast**. Many of the low-level algorithmic bits have been + extensively tweaked in `Cython `__ code. However, as with + anything else generalization usually sacrifices performance. So if you focus + on one feature for your application you may be able to create a faster + specialized tool. + + - pandas is a dependency of `statsmodels + `__, making it an important part of the + statistical computing ecosystem in Python. + + - pandas has been used extensively in production in financial applications. + +Data structures +--------------- + +.. csv-table:: + :header: "Dimensions", "Name", "Description" + :widths: 15, 20, 50 + + 1, "Series", "1D labeled homogeneously-typed array" + 2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column" + +Why more than one data structure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The best way to think about the pandas data structures is as flexible +containers for lower dimensional data. For example, DataFrame is a container +for Series, and Series is a container for scalars. We would like to be +able to insert and remove objects from these containers in a dictionary-like +fashion. + +Also, we would like sensible default behaviors for the common API functions +which take into account the typical orientation of time series and +cross-sectional data sets. When using the N-dimensional array (ndarrays) to store 2- and 3-dimensional +data, a burden is placed on the user to consider the orientation of the data +set when writing functions; axes are considered more or less equivalent (except +when C- or Fortran-contiguousness matters for performance). In pandas, the axes +are intended to lend more semantic meaning to the data; i.e., for a particular +data set, there is likely to be a "right" way to orient the data. The goal, +then, is to reduce the amount of mental effort required to code up data +transformations in downstream functions. + +For example, with tabular data (DataFrame) it is more semantically helpful to +think of the **index** (the rows) and the **columns** rather than axis 0 and +axis 1. Iterating through the columns of the DataFrame thus results in more +readable code: + +:: + + for col in df.columns: + series = df[col] + # do something with series + +Mutability and copying of data +------------------------------ + +All pandas data structures are value-mutable (the values they contain can be +altered) but not always size-mutable. The length of a Series cannot be +changed, but, for example, columns can be inserted into a DataFrame. However, +the vast majority of methods produce new objects and leave the input data +untouched. In general we like to **favor immutability** where sensible. + +Getting support +--------------- + +The first stop for pandas issues and ideas is the `Github Issue Tracker +`__. If you have a general question, +pandas community experts can answer through `Stack Overflow +`__. + +Community +--------- + +pandas is actively supported today by a community of like-minded individuals around +the world who contribute their valuable time and energy to help make open source +pandas possible. Thanks to `all of our contributors `__. + +If you're interested in contributing, please visit the :ref:`contributing guide `. + +pandas is a `NumFOCUS `__ sponsored project. +This will help ensure the success of the development of pandas as a world-class open-source +project and makes it possible to `donate `__ to the project. + +Project governance +------------------ + +The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents `__. +The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities. + +Wes McKinney is the Benevolent Dictator for Life (BDFL). + +Development team +----------------- + +The list of the Core Team members and more detailed information can be found on the `people’s page `__ of the governance repo. + + +Institutional partners +---------------------- + +The information about current institutional partners can be found on `pandas website page `__. + +License +------- + +.. literalinclude:: ../../../LICENSE diff --git a/doc/source/getting_started/tutorials.rst b/doc/source/getting_started/tutorials.rst new file mode 100644 index 00000000..bff50bb1 --- /dev/null +++ b/doc/source/getting_started/tutorials.rst @@ -0,0 +1,121 @@ +.. _communitytutorials: + +{{ header }} + +******************* +Community tutorials +******************* + +This is a guide to many pandas tutorials by the community, geared mainly for new users. + +pandas cookbook by Julia Evans +------------------------------ + +The goal of this 2015 cookbook (by `Julia Evans `_) is to +give you some concrete examples for getting started with pandas. These +are examples with real-world data, and all the bugs and weirdness that +entails. +For the table of contents, see the `pandas-cookbook GitHub +repository `_. + +pandas workshop by Stefanie Molin +--------------------------------- + +An introductory workshop by `Stefanie Molin `_ +designed to quickly get you up to speed with pandas using real-world datasets. +It covers getting started with pandas, data wrangling, and data visualization +(with some exposure to matplotlib and seaborn). The +`pandas-workshop GitHub repository `_ +features detailed environment setup instructions (including a Binder environment), +slides and notebooks for following along, and exercises to practice the concepts. +There is also a lab with new exercises on a dataset not covered in the workshop for +additional practice. + +Learn pandas by Hernan Rojas +---------------------------- + +A set of lesson for new pandas users: https://bitbucket.org/hrojas/learn-pandas + +Practical data analysis with Python +----------------------------------- + +This `guide `_ is an introduction to the data analysis process using the Python data ecosystem and an interesting open dataset. +There are four sections covering selected topics as `munging data `__, +`aggregating data `_, `visualizing data `_ +and `time series `_. + +.. _tutorial-exercises-new-users: + +Exercises for new users +----------------------- +Practice your skills with real data sets and exercises. +For more resources, please visit the main `repository `__. + + +.. _tutorial-modern: + +Modern pandas +------------- + +Tutorial series written in 2016 by +`Tom Augspurger `_. +The source may be found in the GitHub repository +`TomAugspurger/effective-pandas `_. + +* `Modern Pandas `_ +* `Method Chaining `_ +* `Indexes `_ +* `Performance `_ +* `Tidy Data `_ +* `Visualization `_ +* `Timeseries `_ + +Excel charts with pandas, vincent and xlsxwriter +------------------------------------------------ + +* `Using Pandas and XlsxWriter to create Excel charts `_ + +Joyful pandas +------------- + +A tutorial written in Chinese by Yuanhao Geng. It covers the basic operations +for NumPy and pandas, 4 main data manipulation methods (including indexing, groupby, reshaping +and concatenation) and 4 main data types (including missing data, string data, categorical +data and time series data). At the end of each chapter, corresponding exercises are posted. +All the datasets and related materials can be found in the GitHub repository +`datawhalechina/joyful-pandas `_. + +Video tutorials +--------------- + +* `Pandas From The Ground Up `_ + (2015) (2:24) + `GitHub repo `__ +* `Introduction Into Pandas `_ + (2016) (1:28) + `GitHub repo `__ +* `Pandas: .head() to .tail() `_ + (2016) (1:26) + `GitHub repo `__ +* `Data analysis in Python with pandas `_ + (2016-2018) + `GitHub repo `__ and + `Jupyter Notebook `__ +* `Best practices with pandas `_ + (2018) + `GitHub repo `__ and + `Jupyter Notebook `__ + + +Various tutorials +----------------- + +* `Wes McKinney's (pandas BDFL) blog `_ +* `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson `_ +* `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 `_ +* `Financial analysis in Python, by Thomas Wiecki `_ +* `Intro to pandas data structures, by Greg Reda `_ +* `Pandas and Python: Top 10, by Manish Amde `_ +* `Pandas DataFrames Tutorial, by Karlijn Willems `_ +* `A concise tutorial with real life examples `_ +* `430+ Searchable Pandas recipes by Isshin Inada `_ diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template new file mode 100644 index 00000000..59280536 --- /dev/null +++ b/doc/source/index.rst.template @@ -0,0 +1,134 @@ +:notoc: + +.. pandas documentation master file, created by + +.. module:: pandas + +******************** +pandas documentation +******************** + +**Date**: |today| **Version**: |version| + +**Download documentation**: `Zipped HTML `__ + +**Previous versions**: Documentation of previous pandas versions is available at +`pandas.pydata.org `__. + +**Useful links**: +`Binary Installers `__ | +`Source Repository `__ | +`Issues & Ideas `__ | +`Q&A Support `__ | +`Mailing List `__ + +:mod:`pandas` is an open source, BSD-licensed library providing high-performance, +easy-to-use data structures and data analysis tools for the `Python `__ +programming language. + +{% if not single_doc -%} +.. panels:: + :card: + intro-card text-center + :column: col-lg-6 col-md-6 col-sm-6 col-xs-12 d-flex + + --- + :img-top: _static/index_getting_started.svg + + Getting started + ^^^^^^^^^^^^^^^ + + New to *pandas*? Check out the getting started guides. They contain an + introduction to *pandas'* main concepts and links to additional tutorials. + + +++ + + .. link-button:: getting_started + :type: ref + :text: To the getting started guides + :classes: btn-block btn-secondary stretched-link + + --- + :img-top: _static/index_user_guide.svg + + User guide + ^^^^^^^^^^ + + The user guide provides in-depth information on the + key concepts of pandas with useful background information and explanation. + + +++ + + .. link-button:: user_guide + :type: ref + :text: To the user guide + :classes: btn-block btn-secondary stretched-link + + --- + :img-top: _static/index_api.svg + + API reference + ^^^^^^^^^^^^^ + + The reference guide contains a detailed description of + the pandas API. The reference describes how the methods work and which parameters can + be used. It assumes that you have an understanding of the key concepts. + + +++ + + .. link-button:: api + :type: ref + :text: To the reference guide + :classes: btn-block btn-secondary stretched-link + + --- + :img-top: _static/index_contribute.svg + + Developer guide + ^^^^^^^^^^^^^^^ + + Saw a typo in the documentation? Want to improve + existing functionalities? The contributing guidelines will guide + you through the process of improving pandas. + + +++ + + .. link-button:: development + :type: ref + :text: To the development guide + :classes: btn-block btn-secondary stretched-link + +{% endif %} +{% if single_doc and single_doc.endswith('.rst') -%} +.. toctree:: + :maxdepth: 3 + :titlesonly: + + {{ single_doc[:-4] }} +{% elif single_doc and single_doc.count('.') <= 1 %} +.. autosummary:: + :toctree: reference/api/ + + {{ single_doc }} +{% elif single_doc %} +.. autosummary:: + :toctree: reference/api/ + :template: autosummary/accessor_method.rst + + {{ single_doc }} +{% else -%} +.. toctree:: + :maxdepth: 3 + :hidden: + :titlesonly: +{% endif %} +{% if not single_doc %} + getting_started/index + user_guide/index + {% endif -%} + {% if include_api -%} + reference/index + {% endif -%} + {% if not single_doc -%} + development/index + whatsnew/index +{% endif %} diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst new file mode 100644 index 00000000..6d09e10f --- /dev/null +++ b/doc/source/reference/arrays.rst @@ -0,0 +1,639 @@ +{{ header }} + +.. _api.arrays: + +====================================== +pandas arrays, scalars, and data types +====================================== + +******* +Objects +******* + +.. currentmodule:: pandas + +For most data types, pandas uses NumPy arrays as the concrete +objects contained with a :class:`Index`, :class:`Series`, or +:class:`DataFrame`. + +For some data types, pandas extends NumPy's type system. String aliases for these types +can be found at :ref:`basics.dtypes`. + +=================== ========================= ============================= ============================= +Kind of Data pandas Data Type Scalar Array +=================== ========================= ============================= ============================= +TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` +Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` +Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` +Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` +Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` +Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` +Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` +Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` +Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` +PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` +=================== ========================= ============================= ============================= + +pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). +The top-level :meth:`array` method can be used to create a new array, which may be +stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFrame`. + +.. autosummary:: + :toctree: api/ + + array + +.. _api.arrays.arrow: + +PyArrow +------- + +.. warning:: + + This feature is experimental, and the API can change in a future release without warning. + +The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a +:external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray` +is an :class:`ArrowDtype`. + +`Pyarrow `__ provides similar array and `data type `__ +support as NumPy including first-class nullability support for all data types, immutability and more. + +.. note:: + + For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated + by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section ` + below. + +While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned** +as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing +values. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.ArrowExtensionArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + ArrowDtype + +.. _api.arrays.datetime: + +Datetimes +--------- + +NumPy cannot natively represent timezone-aware datetimes. pandas supports this +with the :class:`arrays.DatetimeArray` extension array, which can hold timezone-naive +or timezone-aware values. + +:class:`Timestamp`, a subclass of :class:`datetime.datetime`, is pandas' +scalar type for timezone-naive or timezone-aware datetime data. + +.. autosummary:: + :toctree: api/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.day_of_week + Timestamp.dayofyear + Timestamp.day_of_year + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.fold + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.minute + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.week + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.day_name + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.fromordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.month_name + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timestamp + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_numpy + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +A collection of timestamps may be stored in a :class:`arrays.DatetimeArray`. +For timezone-aware data, the ``.dtype`` of a :class:`arrays.DatetimeArray` is a +:class:`DatetimeTZDtype`. For timezone-naive data, ``np.dtype("datetime64[ns]")`` +is used. + +If the data are timezone-aware, then every value in the array must have the same timezone. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.DatetimeArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + DatetimeTZDtype + +.. _api.arrays.timedelta: + +Timedeltas +---------- + +NumPy can natively represent timedeltas. pandas provides :class:`Timedelta` +for symmetry with :class:`Timestamp`. + +.. autosummary:: + :toctree: api/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.delta + Timedelta.freq + Timedelta.is_populated + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + Timedelta.view + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timedelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.to_numpy + Timedelta.total_seconds + +A collection of :class:`Timedelta` may be stored in a :class:`TimedeltaArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.TimedeltaArray + +.. _api.arrays.period: + +Periods +------- + +pandas represents spans of times as :class:`Period` objects. + +Period +------ +.. autosummary:: + :toctree: api/ + + Period + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Period.day + Period.dayofweek + Period.day_of_week + Period.dayofyear + Period.day_of_year + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Period.asfreq + Period.now + Period.strftime + Period.to_timestamp + +A collection of :class:`Period` may be stored in a :class:`arrays.PeriodArray`. +Every period in a :class:`arrays.PeriodArray` must have the same ``freq``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.PeriodArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + PeriodDtype + +.. _api.arrays.interval: + +Intervals +--------- + +Arbitrary intervals can be represented as :class:`Interval` objects. + +.. autosummary:: + :toctree: api/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.is_empty + Interval.left + Interval.length + Interval.mid + Interval.open_left + Interval.open_right + Interval.overlaps + Interval.right + +A collection of intervals may be stored in an :class:`arrays.IntervalArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.IntervalArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + IntervalDtype + + +.. Those attributes and methods are included in the API because the docstrings +.. of IntervalIndex and IntervalArray are shared. Including it here to make +.. sure a docstring page is built for them to avoid warnings + +.. + .. autosummary:: + :toctree: api/ + + arrays.IntervalArray.left + arrays.IntervalArray.right + arrays.IntervalArray.closed + arrays.IntervalArray.mid + arrays.IntervalArray.length + arrays.IntervalArray.is_empty + arrays.IntervalArray.is_non_overlapping_monotonic + arrays.IntervalArray.from_arrays + arrays.IntervalArray.from_tuples + arrays.IntervalArray.from_breaks + arrays.IntervalArray.contains + arrays.IntervalArray.overlaps + arrays.IntervalArray.set_closed + arrays.IntervalArray.to_tuples + + +.. _api.arrays.integer_na: + +Nullable integer +---------------- + +:class:`numpy.ndarray` cannot natively represent integer-data with missing values. +pandas provides this through :class:`arrays.IntegerArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.IntegerArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Int8Dtype + Int16Dtype + Int32Dtype + Int64Dtype + UInt8Dtype + UInt16Dtype + UInt32Dtype + UInt64Dtype + +.. _api.arrays.categorical: + +Categoricals +------------ + +pandas defines a custom data type for representing data that can take only a +limited, fixed set of values. The dtype of a :class:`Categorical` can be described by +a :class:`CategoricalDtype`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CategoricalDtype + +.. autosummary:: + :toctree: api/ + + CategoricalDtype.categories + CategoricalDtype.ordered + +Categorical data can be stored in a :class:`pandas.Categorical` + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Categorical + +The alternative :meth:`Categorical.from_codes` constructor can be used when you +have the categories and integer codes already: + +.. autosummary:: + :toctree: api/ + + Categorical.from_codes + +The dtype information is available on the :class:`Categorical` + +.. autosummary:: + :toctree: api/ + + Categorical.dtype + Categorical.categories + Categorical.ordered + Categorical.codes + +``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts +the :class:`Categorical` back to a NumPy array, so categories and order information is not preserved! + +.. autosummary:: + :toctree: api/ + + Categorical.__array__ + +A :class:`Categorical` can be stored in a :class:`Series` or :class:`DataFrame`. +To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or +``Series(..., dtype=dtype)`` where ``dtype`` is either + +* the string ``'category'`` +* an instance of :class:`CategoricalDtype`. + +If the :class:`Series` is of dtype :class:`CategoricalDtype`, ``Series.cat`` can be used to change the categorical +data. See :ref:`api.series.cat` for more. + +.. _api.arrays.sparse: + +Sparse +------ + +Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may +be stored efficiently as a :class:`arrays.SparseArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.SparseArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + SparseDtype + +The ``Series.sparse`` accessor may be used to access sparse-specific attributes +and methods if the :class:`Series` contains sparse values. See +:ref:`api.series.sparse` and :ref:`the user guide ` for more. + + +.. _api.arrays.string: + +Strings +------- + +When working with text data, where each valid element is a string or missing, +we recommend using :class:`StringDtype` (with the alias ``"string"``). + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.StringArray + arrays.ArrowStringArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + StringDtype + +The ``Series.str`` accessor is available for :class:`Series` backed by a :class:`arrays.StringArray`. +See :ref:`api.series.str` for more. + + +.. _api.arrays.bool: + +Nullable Boolean +---------------- + +The boolean dtype (with the alias ``"boolean"``) provides support for storing +boolean data (``True``, ``False``) with missing values, which is not possible +with a bool :class:`numpy.ndarray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.BooleanArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + BooleanDtype + + +.. Dtype attributes which are manually listed in their docstrings: including +.. it here to make sure a docstring page is built for them + +.. + .. autosummary:: + :toctree: api/ + + DatetimeTZDtype.unit + DatetimeTZDtype.tz + PeriodDtype.freq + IntervalDtype.subtype + +********* +Utilities +********* + +Constructors +------------ +.. autosummary:: + :toctree: api/ + + api.types.union_categoricals + api.types.infer_dtype + api.types.pandas_dtype + +Data type introspection +~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_bool_dtype + api.types.is_categorical_dtype + api.types.is_complex_dtype + api.types.is_datetime64_any_dtype + api.types.is_datetime64_dtype + api.types.is_datetime64_ns_dtype + api.types.is_datetime64tz_dtype + api.types.is_extension_type + api.types.is_extension_array_dtype + api.types.is_float_dtype + api.types.is_int64_dtype + api.types.is_integer_dtype + api.types.is_interval_dtype + api.types.is_numeric_dtype + api.types.is_object_dtype + api.types.is_period_dtype + api.types.is_signed_integer_dtype + api.types.is_string_dtype + api.types.is_timedelta64_dtype + api.types.is_timedelta64_ns_dtype + api.types.is_unsigned_integer_dtype + api.types.is_sparse + +Iterable introspection +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_dict_like + api.types.is_file_like + api.types.is_list_like + api.types.is_named_tuple + api.types.is_iterator + +Scalar introspection +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_bool + api.types.is_categorical + api.types.is_complex + api.types.is_float + api.types.is_hashable + api.types.is_integer + api.types.is_interval + api.types.is_number + api.types.is_re + api.types.is_re_compilable + api.types.is_scalar diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst new file mode 100644 index 00000000..ce8d8d5c --- /dev/null +++ b/doc/source/reference/extensions.rst @@ -0,0 +1,77 @@ +{{ header }} + +.. _api.extensions: + +========== +Extensions +========== +.. currentmodule:: pandas + +These are primarily intended for library authors looking to extend pandas +objects. + +.. autosummary:: + :toctree: api/ + + api.extensions.register_extension_dtype + api.extensions.register_dataframe_accessor + api.extensions.register_series_accessor + api.extensions.register_index_accessor + api.extensions.ExtensionDtype + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + api.extensions.ExtensionArray + arrays.PandasArray + +.. We need this autosummary so that methods and attributes are generated. +.. Separate block, since they aren't classes. + + .. autosummary:: + :toctree: api/ + + api.extensions.ExtensionArray._concat_same_type + api.extensions.ExtensionArray._formatter + api.extensions.ExtensionArray._from_factorized + api.extensions.ExtensionArray._from_sequence + api.extensions.ExtensionArray._from_sequence_of_strings + api.extensions.ExtensionArray._reduce + api.extensions.ExtensionArray._values_for_argsort + api.extensions.ExtensionArray._values_for_factorize + api.extensions.ExtensionArray.argsort + api.extensions.ExtensionArray.astype + api.extensions.ExtensionArray.copy + api.extensions.ExtensionArray.view + api.extensions.ExtensionArray.dropna + api.extensions.ExtensionArray.equals + api.extensions.ExtensionArray.factorize + api.extensions.ExtensionArray.fillna + api.extensions.ExtensionArray.insert + api.extensions.ExtensionArray.isin + api.extensions.ExtensionArray.isna + api.extensions.ExtensionArray.ravel + api.extensions.ExtensionArray.repeat + api.extensions.ExtensionArray.searchsorted + api.extensions.ExtensionArray.shift + api.extensions.ExtensionArray.take + api.extensions.ExtensionArray.unique + api.extensions.ExtensionArray.dtype + api.extensions.ExtensionArray.nbytes + api.extensions.ExtensionArray.ndim + api.extensions.ExtensionArray.shape + api.extensions.ExtensionArray.tolist + +Additionally, we have some utility methods for ensuring your object +behaves correctly. + +.. autosummary:: + :toctree: api/ + + api.indexers.check_array_indexer + + +The sentinel ``pandas.api.extensions.no_default`` is used as the default +value in some methods. Use an ``is`` comparison to check if the user +provides a non-default value. diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst new file mode 100644 index 00000000..e71ee807 --- /dev/null +++ b/doc/source/reference/frame.rst @@ -0,0 +1,395 @@ +{{ header }} + +.. _api.dataframe: + +========= +DataFrame +========= +.. currentmodule:: pandas + +Constructor +~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame + +Attributes and underlying data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +**Axes** + +.. autosummary:: + :toctree: api/ + + DataFrame.index + DataFrame.columns + +.. autosummary:: + :toctree: api/ + + DataFrame.dtypes + DataFrame.info + DataFrame.select_dtypes + DataFrame.values + DataFrame.axes + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.memory_usage + DataFrame.empty + DataFrame.set_flags + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.astype + DataFrame.convert_dtypes + DataFrame.infer_objects + DataFrame.copy + DataFrame.bool + +Indexing, iteration +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.head + DataFrame.at + DataFrame.iat + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.items + DataFrame.iteritems + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.lookup + DataFrame.pop + DataFrame.tail + DataFrame.xs + DataFrame.get + DataFrame.isin + DataFrame.where + DataFrame.mask + DataFrame.query + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.dot + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + DataFrame.combine + DataFrame.combine_first + +Function application, GroupBy & window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.apply + DataFrame.applymap + DataFrame.pipe + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm + +.. _api.dataframe.stats: + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.clip + DataFrame.corr + DataFrame.corrwith + DataFrame.count + DataFrame.cov + DataFrame.cummax + DataFrame.cummin + DataFrame.cumprod + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.eval + DataFrame.kurt + DataFrame.kurtosis + DataFrame.mad + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.mode + DataFrame.pct_change + DataFrame.prod + DataFrame.product + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.sem + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + DataFrame.value_counts + +Reindexing / selection / label manipulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.align + DataFrame.at_time + DataFrame.between_time + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.equals + DataFrame.filter + DataFrame.first + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.reindex + DataFrame.reindex_like + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + DataFrame.truncate + +.. _api.dataframe.missing: + +Missing data handling +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.backfill + DataFrame.bfill + DataFrame.dropna + DataFrame.ffill + DataFrame.fillna + DataFrame.interpolate + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.pad + DataFrame.replace + +Reshaping, sorting, transposing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.droplevel + DataFrame.pivot + DataFrame.pivot_table + DataFrame.reorder_levels + DataFrame.sort_values + DataFrame.sort_index + DataFrame.nlargest + DataFrame.nsmallest + DataFrame.swaplevel + DataFrame.stack + DataFrame.unstack + DataFrame.swapaxes + DataFrame.melt + DataFrame.explode + DataFrame.squeeze + DataFrame.to_xarray + DataFrame.T + DataFrame.transpose + +Combining / comparing / joining / merging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.append + DataFrame.assign + DataFrame.compare + DataFrame.join + DataFrame.merge + DataFrame.update + +Time Series-related +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.asfreq + DataFrame.asof + DataFrame.shift + DataFrame.slice_shift + DataFrame.tshift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + DataFrame.to_period + DataFrame.to_timestamp + DataFrame.tz_convert + DataFrame.tz_localize + +.. _api.frame.flags: + +Flags +~~~~~ + +Flags refer to attributes of the pandas object. Properties of the dataset (like +the date is was recorded, the URL it was accessed from, etc.) should be stored +in :attr:`DataFrame.attrs`. + +.. autosummary:: + :toctree: api/ + + Flags + + +.. _api.frame.metadata: + +Metadata +~~~~~~~~ + +:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. + +.. warning:: ``DataFrame.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + DataFrame.attrs + + +.. _api.dataframe.plotting: + +Plotting +~~~~~~~~ +``DataFrame.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``DataFrame.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + DataFrame.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + DataFrame.plot.area + DataFrame.plot.bar + DataFrame.plot.barh + DataFrame.plot.box + DataFrame.plot.density + DataFrame.plot.hexbin + DataFrame.plot.hist + DataFrame.plot.kde + DataFrame.plot.line + DataFrame.plot.pie + DataFrame.plot.scatter + +.. autosummary:: + :toctree: api/ + + DataFrame.boxplot + DataFrame.hist + + +.. _api.frame.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``DataFrame.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + DataFrame.sparse.density + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + DataFrame.sparse.from_spmatrix + DataFrame.sparse.to_coo + DataFrame.sparse.to_dense + + +Serialization / IO / conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.from_dict + DataFrame.from_records + DataFrame.to_orc + DataFrame.to_parquet + DataFrame.to_pickle + DataFrame.to_csv + DataFrame.to_hdf + DataFrame.to_sql + DataFrame.to_dict + DataFrame.to_excel + DataFrame.to_json + DataFrame.to_html + DataFrame.to_feather + DataFrame.to_latex + DataFrame.to_stata + DataFrame.to_gbq + DataFrame.to_records + DataFrame.to_string + DataFrame.to_clipboard + DataFrame.to_markdown + DataFrame.style + DataFrame.__dataframe__ diff --git a/doc/source/reference/general_functions.rst b/doc/source/reference/general_functions.rst new file mode 100644 index 00000000..474e37a8 --- /dev/null +++ b/doc/source/reference/general_functions.rst @@ -0,0 +1,88 @@ +{{ header }} + +.. _api.general_functions: + +================= +General functions +================= +.. currentmodule:: pandas + +Data manipulations +~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + melt + pivot + pivot_table + crosstab + cut + qcut + merge + merge_ordered + merge_asof + concat + get_dummies + from_dummies + factorize + unique + wide_to_long + +Top-level missing data +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + isna + isnull + notna + notnull + +Top-level dealing with numeric data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + to_numeric + +Top-level dealing with datetimelike data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + to_datetime + to_timedelta + date_range + bdate_range + period_range + timedelta_range + infer_freq + +Top-level dealing with Interval data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + interval_range + +Top-level evaluation +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + eval + +Hashing +~~~~~~~ +.. autosummary:: + :toctree: api/ + + util.hash_array + util.hash_pandas_object + +Importing from other DataFrame libraries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.interchange.from_dataframe diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst new file mode 100644 index 00000000..51bd6590 --- /dev/null +++ b/doc/source/reference/groupby.rst @@ -0,0 +1,145 @@ +{{ header }} + +.. _api.groupby: + +======= +GroupBy +======= +.. currentmodule:: pandas.core.groupby + +GroupBy objects are returned by groupby calls: :func:`pandas.DataFrame.groupby`, :func:`pandas.Series.groupby`, etc. + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.__iter__ + GroupBy.groups + GroupBy.indices + GroupBy.get_group + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Grouper + +.. currentmodule:: pandas.core.groupby + +Function application +-------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.apply + GroupBy.agg + SeriesGroupBy.aggregate + DataFrameGroupBy.aggregate + SeriesGroupBy.transform + DataFrameGroupBy.transform + GroupBy.pipe + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.all + GroupBy.any + GroupBy.bfill + GroupBy.backfill + GroupBy.count + GroupBy.cumcount + GroupBy.cummax + GroupBy.cummin + GroupBy.cumprod + GroupBy.cumsum + GroupBy.ffill + GroupBy.first + GroupBy.head + GroupBy.last + GroupBy.max + GroupBy.mean + GroupBy.median + GroupBy.min + GroupBy.ngroup + GroupBy.nth + GroupBy.ohlc + GroupBy.pad + GroupBy.prod + GroupBy.rank + GroupBy.pct_change + GroupBy.size + GroupBy.sem + GroupBy.std + GroupBy.sum + GroupBy.var + GroupBy.tail + +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.backfill + DataFrameGroupBy.bfill + DataFrameGroupBy.corr + DataFrameGroupBy.count + DataFrameGroupBy.cov + DataFrameGroupBy.cumcount + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.filter + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.mad + DataFrameGroupBy.nunique + DataFrameGroupBy.pad + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.sample + DataFrameGroupBy.shift + DataFrameGroupBy.size + DataFrameGroupBy.skew + DataFrameGroupBy.take + DataFrameGroupBy.tshift + DataFrameGroupBy.value_counts + +The following methods are available only for ``SeriesGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + SeriesGroupBy.hist + SeriesGroupBy.nlargest + SeriesGroupBy.nsmallest + SeriesGroupBy.unique + SeriesGroupBy.is_monotonic_increasing + SeriesGroupBy.is_monotonic_decreasing + +The following methods are available only for ``DataFrameGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.corrwith + DataFrameGroupBy.boxplot diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst new file mode 100644 index 00000000..fc920db6 --- /dev/null +++ b/doc/source/reference/index.rst @@ -0,0 +1,63 @@ +{{ header }} + +.. _api: + +============= +API reference +============= + +This page gives an overview of all public pandas objects, functions and +methods. All classes and functions exposed in ``pandas.*`` namespace are public. + +Some subpackages are public which include ``pandas.errors``, +``pandas.plotting``, and ``pandas.testing``. Public functions in +``pandas.io`` and ``pandas.tseries`` submodules are mentioned in +the documentation. ``pandas.api.types`` subpackage holds some +public functions related to data types in pandas. + +.. warning:: + + The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + io + general_functions + series + frame + arrays + indexing + offset_frequency + window + groupby + resampling + style + plotting + options + extensions + testing + +.. This is to prevent warnings in the doc build. We don't want to encourage +.. these methods. + +.. + .. toctree:: + + api/pandas.Index.asi8 + api/pandas.Index.holds_integer + api/pandas.Index.is_type_compatible + api/pandas.Index.nlevels + api/pandas.Index.sort + + +.. Can't convince sphinx to generate toctree for this class attribute. +.. So we do it manually to avoid a warning + +.. + .. toctree:: + + api/pandas.api.extensions.ExtensionDtype.na_value diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst new file mode 100644 index 00000000..ddfef140 --- /dev/null +++ b/doc/source/reference/indexing.rst @@ -0,0 +1,495 @@ +{{ header }} + +.. _api.indexing: + +============= +Index objects +============= + +Index +----- +.. currentmodule:: pandas + +**Many of these methods or variants thereof are available on the objects +that contain an index (Series/DataFrame) and those should most likely be +used before calling these methods directly.** + +.. autosummary:: + :toctree: api/ + + Index + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.values + Index.is_monotonic + Index.is_monotonic_increasing + Index.is_monotonic_decreasing + Index.is_unique + Index.has_duplicates + Index.hasnans + Index.dtype + Index.inferred_type + Index.is_all_dates + Index.shape + Index.name + Index.names + Index.nbytes + Index.ndim + Index.size + Index.empty + Index.T + Index.memory_usage + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.all + Index.any + Index.argmin + Index.argmax + Index.copy + Index.delete + Index.drop + Index.drop_duplicates + Index.duplicated + Index.equals + Index.factorize + Index.identical + Index.insert + Index.is_ + Index.is_boolean + Index.is_categorical + Index.is_floating + Index.is_integer + Index.is_interval + Index.is_mixed + Index.is_numeric + Index.is_object + Index.min + Index.max + Index.reindex + Index.rename + Index.repeat + Index.where + Index.take + Index.putmask + Index.unique + Index.nunique + Index.value_counts + +Compatibility with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.set_names + Index.droplevel + +Missing values +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.fillna + Index.dropna + Index.isna + Index.notna + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.astype + Index.item + Index.map + Index.ravel + Index.to_list + Index.to_native_types + Index.to_series + Index.to_frame + Index.view + +Sorting +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.argsort + Index.searchsorted + Index.sort_values + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.shift + +Combining / joining / set operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.append + Index.join + Index.intersection + Index.union + Index.difference + Index.symmetric_difference + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.asof + Index.asof_locs + Index.get_indexer + Index.get_indexer_for + Index.get_indexer_non_unique + Index.get_level_values + Index.get_loc + Index.get_slice_bound + Index.get_value + Index.isin + Index.slice_indexer + Index.slice_locs + +.. _api.numericindex: + +Numeric Index +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + RangeIndex + Int64Index + UInt64Index + Float64Index + +.. We need this autosummary so that the methods are generated. +.. Separate block, since they aren't classes. + +.. autosummary:: + :toctree: api/ + + RangeIndex.start + RangeIndex.stop + RangeIndex.step + RangeIndex.from_range + +.. _api.categoricalindex: + +CategoricalIndex +---------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CategoricalIndex + +Categorical components +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.codes + CategoricalIndex.categories + CategoricalIndex.ordered + CategoricalIndex.rename_categories + CategoricalIndex.reorder_categories + CategoricalIndex.add_categories + CategoricalIndex.remove_categories + CategoricalIndex.remove_unused_categories + CategoricalIndex.set_categories + CategoricalIndex.as_ordered + CategoricalIndex.as_unordered + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.map + CategoricalIndex.equals + +.. _api.intervalindex: + +IntervalIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + IntervalIndex + +IntervalIndex components +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + IntervalIndex.from_arrays + IntervalIndex.from_tuples + IntervalIndex.from_breaks + IntervalIndex.left + IntervalIndex.right + IntervalIndex.mid + IntervalIndex.closed + IntervalIndex.length + IntervalIndex.values + IntervalIndex.is_empty + IntervalIndex.is_non_overlapping_monotonic + IntervalIndex.is_overlapping + IntervalIndex.get_loc + IntervalIndex.get_indexer + IntervalIndex.set_closed + IntervalIndex.contains + IntervalIndex.overlaps + IntervalIndex.to_tuples + +.. _api.multiindex: + +MultiIndex +---------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + MultiIndex + +.. autosummary:: + :toctree: api/ + + IndexSlice + +MultiIndex constructors +~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.from_arrays + MultiIndex.from_tuples + MultiIndex.from_product + MultiIndex.from_frame + +MultiIndex properties +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.names + MultiIndex.levels + MultiIndex.codes + MultiIndex.nlevels + MultiIndex.levshape + MultiIndex.dtypes + +MultiIndex components +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.set_levels + MultiIndex.set_codes + MultiIndex.to_flat_index + MultiIndex.to_frame + MultiIndex.sortlevel + MultiIndex.droplevel + MultiIndex.swaplevel + MultiIndex.reorder_levels + MultiIndex.remove_unused_levels + +MultiIndex selecting +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.get_loc + MultiIndex.get_locs + MultiIndex.get_loc_level + MultiIndex.get_indexer + MultiIndex.get_level_values + +.. _api.datetimeindex: + +DatetimeIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + DatetimeIndex + +Time/date components +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.year + DatetimeIndex.month + DatetimeIndex.day + DatetimeIndex.hour + DatetimeIndex.minute + DatetimeIndex.second + DatetimeIndex.microsecond + DatetimeIndex.nanosecond + DatetimeIndex.date + DatetimeIndex.time + DatetimeIndex.timetz + DatetimeIndex.dayofyear + DatetimeIndex.day_of_year + DatetimeIndex.weekofyear + DatetimeIndex.week + DatetimeIndex.dayofweek + DatetimeIndex.day_of_week + DatetimeIndex.weekday + DatetimeIndex.quarter + DatetimeIndex.tz + DatetimeIndex.freq + DatetimeIndex.freqstr + DatetimeIndex.is_month_start + DatetimeIndex.is_month_end + DatetimeIndex.is_quarter_start + DatetimeIndex.is_quarter_end + DatetimeIndex.is_year_start + DatetimeIndex.is_year_end + DatetimeIndex.is_leap_year + DatetimeIndex.inferred_freq + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.indexer_at_time + DatetimeIndex.indexer_between_time + + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.normalize + DatetimeIndex.strftime + DatetimeIndex.snap + DatetimeIndex.tz_convert + DatetimeIndex.tz_localize + DatetimeIndex.round + DatetimeIndex.floor + DatetimeIndex.ceil + DatetimeIndex.month_name + DatetimeIndex.day_name + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.to_period + DatetimeIndex.to_perioddelta + DatetimeIndex.to_pydatetime + DatetimeIndex.to_series + DatetimeIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.mean + DatetimeIndex.std + +TimedeltaIndex +-------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + TimedeltaIndex + +Components +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.days + TimedeltaIndex.seconds + TimedeltaIndex.microseconds + TimedeltaIndex.nanoseconds + TimedeltaIndex.components + TimedeltaIndex.inferred_freq + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.to_pytimedelta + TimedeltaIndex.to_series + TimedeltaIndex.round + TimedeltaIndex.floor + TimedeltaIndex.ceil + TimedeltaIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.mean + +.. currentmodule:: pandas + +PeriodIndex +----------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + PeriodIndex + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.day + PeriodIndex.dayofweek + PeriodIndex.day_of_week + PeriodIndex.dayofyear + PeriodIndex.day_of_year + PeriodIndex.days_in_month + PeriodIndex.daysinmonth + PeriodIndex.end_time + PeriodIndex.freq + PeriodIndex.freqstr + PeriodIndex.hour + PeriodIndex.is_leap_year + PeriodIndex.minute + PeriodIndex.month + PeriodIndex.quarter + PeriodIndex.qyear + PeriodIndex.second + PeriodIndex.start_time + PeriodIndex.week + PeriodIndex.weekday + PeriodIndex.weekofyear + PeriodIndex.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.asfreq + PeriodIndex.strftime + PeriodIndex.to_timestamp diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst new file mode 100644 index 00000000..425b5f81 --- /dev/null +++ b/doc/source/reference/io.rst @@ -0,0 +1,211 @@ +{{ header }} + +.. _api.io: + +============ +Input/output +============ +.. currentmodule:: pandas + +Pickling +~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_pickle + DataFrame.to_pickle + +Flat file +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_table + read_csv + DataFrame.to_csv + read_fwf + +Clipboard +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_clipboard + DataFrame.to_clipboard + +Excel +~~~~~ +.. autosummary:: + :toctree: api/ + + read_excel + DataFrame.to_excel + ExcelFile.parse + +.. currentmodule:: pandas.io.formats.style + +.. autosummary:: + :toctree: api/ + + Styler.to_excel + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: api/ + + ExcelWriter + +.. currentmodule:: pandas + +JSON +~~~~ +.. autosummary:: + :toctree: api/ + + read_json + json_normalize + DataFrame.to_json + +.. currentmodule:: pandas.io.json + +.. autosummary:: + :toctree: api/ + + build_table_schema + +.. currentmodule:: pandas + +HTML +~~~~ +.. autosummary:: + :toctree: api/ + + read_html + DataFrame.to_html + +.. currentmodule:: pandas.io.formats.style + +.. autosummary:: + :toctree: api/ + + Styler.to_html + +.. currentmodule:: pandas + +XML +~~~~ +.. autosummary:: + :toctree: api/ + + read_xml + DataFrame.to_xml + +Latex +~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.to_latex + +.. currentmodule:: pandas.io.formats.style + +.. autosummary:: + :toctree: api/ + + Styler.to_latex + +.. currentmodule:: pandas + +HDFStore: PyTables (HDF5) +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_hdf + HDFStore.put + HDFStore.append + HDFStore.get + HDFStore.select + HDFStore.info + HDFStore.keys + HDFStore.groups + HDFStore.walk + +.. warning:: + + One can store a subclass of :class:`DataFrame` or :class:`Series` to HDF5, + but the type of the subclass is lost upon storing. + +Feather +~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_feather + DataFrame.to_feather + +Parquet +~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_parquet + DataFrame.to_parquet + +ORC +~~~ +.. autosummary:: + :toctree: api/ + + read_orc + DataFrame.to_orc + +SAS +~~~ +.. autosummary:: + :toctree: api/ + + read_sas + +SPSS +~~~~ +.. autosummary:: + :toctree: api/ + + read_spss + +SQL +~~~ +.. autosummary:: + :toctree: api/ + + read_sql_table + read_sql_query + read_sql + DataFrame.to_sql + +Google BigQuery +~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_gbq + +STATA +~~~~~ +.. autosummary:: + :toctree: api/ + + read_stata + DataFrame.to_stata + +.. currentmodule:: pandas.io.stata + +.. autosummary:: + :toctree: api/ + + StataReader.data_label + StataReader.value_labels + StataReader.variable_labels + StataWriter.write_file diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst new file mode 100644 index 00000000..f0e531cd --- /dev/null +++ b/doc/source/reference/offset_frequency.rst @@ -0,0 +1,1537 @@ +{{ header }} + +.. _api.dateoffsets: + +============ +Date offsets +============ +.. currentmodule:: pandas.tseries.offsets + +DateOffset +---------- +.. autosummary:: + :toctree: api/ + + DateOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DateOffset.freqstr + DateOffset.kwds + DateOffset.name + DateOffset.nanos + DateOffset.normalize + DateOffset.rule_code + DateOffset.n + DateOffset.is_month_start + DateOffset.is_month_end + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DateOffset.apply + DateOffset.apply_index + DateOffset.copy + DateOffset.isAnchored + DateOffset.onOffset + DateOffset.is_anchored + DateOffset.is_on_offset + DateOffset.__call__ + DateOffset.is_month_start + DateOffset.is_month_end + DateOffset.is_quarter_start + DateOffset.is_quarter_end + DateOffset.is_year_start + DateOffset.is_year_end + +BusinessDay +----------- + +.. autosummary:: + :toctree: api/ + + BusinessDay + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + BDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessDay.freqstr + BusinessDay.kwds + BusinessDay.name + BusinessDay.nanos + BusinessDay.normalize + BusinessDay.rule_code + BusinessDay.n + BusinessDay.weekmask + BusinessDay.holidays + BusinessDay.calendar + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessDay.apply + BusinessDay.apply_index + BusinessDay.copy + BusinessDay.isAnchored + BusinessDay.onOffset + BusinessDay.is_anchored + BusinessDay.is_on_offset + BusinessDay.__call__ + BusinessDay.is_month_start + BusinessDay.is_month_end + BusinessDay.is_quarter_start + BusinessDay.is_quarter_end + BusinessDay.is_year_start + BusinessDay.is_year_end + +BusinessHour +------------ +.. autosummary:: + :toctree: api/ + + BusinessHour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessHour.freqstr + BusinessHour.kwds + BusinessHour.name + BusinessHour.nanos + BusinessHour.normalize + BusinessHour.rule_code + BusinessHour.n + BusinessHour.start + BusinessHour.end + BusinessHour.weekmask + BusinessHour.holidays + BusinessHour.calendar + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessHour.apply + BusinessHour.apply_index + BusinessHour.copy + BusinessHour.isAnchored + BusinessHour.onOffset + BusinessHour.is_anchored + BusinessHour.is_on_offset + BusinessHour.__call__ + BusinessHour.is_month_start + BusinessHour.is_month_end + BusinessHour.is_quarter_start + BusinessHour.is_quarter_end + BusinessHour.is_year_start + BusinessHour.is_year_end + +CustomBusinessDay +----------------- + +.. autosummary:: + :toctree: api/ + + CustomBusinessDay + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessDay.freqstr + CustomBusinessDay.kwds + CustomBusinessDay.name + CustomBusinessDay.nanos + CustomBusinessDay.normalize + CustomBusinessDay.rule_code + CustomBusinessDay.n + CustomBusinessDay.weekmask + CustomBusinessDay.calendar + CustomBusinessDay.holidays + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessDay.apply_index + CustomBusinessDay.apply + CustomBusinessDay.copy + CustomBusinessDay.isAnchored + CustomBusinessDay.onOffset + CustomBusinessDay.is_anchored + CustomBusinessDay.is_on_offset + CustomBusinessDay.__call__ + CustomBusinessDay.is_month_start + CustomBusinessDay.is_month_end + CustomBusinessDay.is_quarter_start + CustomBusinessDay.is_quarter_end + CustomBusinessDay.is_year_start + CustomBusinessDay.is_year_end + +CustomBusinessHour +------------------ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour.freqstr + CustomBusinessHour.kwds + CustomBusinessHour.name + CustomBusinessHour.nanos + CustomBusinessHour.normalize + CustomBusinessHour.rule_code + CustomBusinessHour.n + CustomBusinessHour.weekmask + CustomBusinessHour.calendar + CustomBusinessHour.holidays + CustomBusinessHour.start + CustomBusinessHour.end + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour.apply + CustomBusinessHour.apply_index + CustomBusinessHour.copy + CustomBusinessHour.isAnchored + CustomBusinessHour.onOffset + CustomBusinessHour.is_anchored + CustomBusinessHour.is_on_offset + CustomBusinessHour.__call__ + CustomBusinessHour.is_month_start + CustomBusinessHour.is_month_end + CustomBusinessHour.is_quarter_start + CustomBusinessHour.is_quarter_end + CustomBusinessHour.is_year_start + CustomBusinessHour.is_year_end + +MonthEnd +-------- +.. autosummary:: + :toctree: api/ + + MonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthEnd.freqstr + MonthEnd.kwds + MonthEnd.name + MonthEnd.nanos + MonthEnd.normalize + MonthEnd.rule_code + MonthEnd.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthEnd.apply + MonthEnd.apply_index + MonthEnd.copy + MonthEnd.isAnchored + MonthEnd.onOffset + MonthEnd.is_anchored + MonthEnd.is_on_offset + MonthEnd.__call__ + MonthEnd.is_month_start + MonthEnd.is_month_end + MonthEnd.is_quarter_start + MonthEnd.is_quarter_end + MonthEnd.is_year_start + MonthEnd.is_year_end + +MonthBegin +---------- +.. autosummary:: + :toctree: api/ + + MonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthBegin.freqstr + MonthBegin.kwds + MonthBegin.name + MonthBegin.nanos + MonthBegin.normalize + MonthBegin.rule_code + MonthBegin.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthBegin.apply + MonthBegin.apply_index + MonthBegin.copy + MonthBegin.isAnchored + MonthBegin.onOffset + MonthBegin.is_anchored + MonthBegin.is_on_offset + MonthBegin.__call__ + MonthBegin.is_month_start + MonthBegin.is_month_end + MonthBegin.is_quarter_start + MonthBegin.is_quarter_end + MonthBegin.is_year_start + MonthBegin.is_year_end + +BusinessMonthEnd +---------------- + +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + BMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd.freqstr + BusinessMonthEnd.kwds + BusinessMonthEnd.name + BusinessMonthEnd.nanos + BusinessMonthEnd.normalize + BusinessMonthEnd.rule_code + BusinessMonthEnd.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd.apply + BusinessMonthEnd.apply_index + BusinessMonthEnd.copy + BusinessMonthEnd.isAnchored + BusinessMonthEnd.onOffset + BusinessMonthEnd.is_anchored + BusinessMonthEnd.is_on_offset + BusinessMonthEnd.__call__ + BusinessMonthEnd.is_month_start + BusinessMonthEnd.is_month_end + BusinessMonthEnd.is_quarter_start + BusinessMonthEnd.is_quarter_end + BusinessMonthEnd.is_year_start + BusinessMonthEnd.is_year_end + +BusinessMonthBegin +------------------ + +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + BMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin.freqstr + BusinessMonthBegin.kwds + BusinessMonthBegin.name + BusinessMonthBegin.nanos + BusinessMonthBegin.normalize + BusinessMonthBegin.rule_code + BusinessMonthBegin.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin.apply + BusinessMonthBegin.apply_index + BusinessMonthBegin.copy + BusinessMonthBegin.isAnchored + BusinessMonthBegin.onOffset + BusinessMonthBegin.is_anchored + BusinessMonthBegin.is_on_offset + BusinessMonthBegin.__call__ + BusinessMonthBegin.is_month_start + BusinessMonthBegin.is_month_end + BusinessMonthBegin.is_quarter_start + BusinessMonthBegin.is_quarter_end + BusinessMonthBegin.is_year_start + BusinessMonthBegin.is_year_end + +CustomBusinessMonthEnd +---------------------- + +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CBMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd.freqstr + CustomBusinessMonthEnd.kwds + CustomBusinessMonthEnd.m_offset + CustomBusinessMonthEnd.name + CustomBusinessMonthEnd.nanos + CustomBusinessMonthEnd.normalize + CustomBusinessMonthEnd.rule_code + CustomBusinessMonthEnd.n + CustomBusinessMonthEnd.weekmask + CustomBusinessMonthEnd.calendar + CustomBusinessMonthEnd.holidays + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd.apply + CustomBusinessMonthEnd.apply_index + CustomBusinessMonthEnd.copy + CustomBusinessMonthEnd.isAnchored + CustomBusinessMonthEnd.onOffset + CustomBusinessMonthEnd.is_anchored + CustomBusinessMonthEnd.is_on_offset + CustomBusinessMonthEnd.__call__ + CustomBusinessMonthEnd.is_month_start + CustomBusinessMonthEnd.is_month_end + CustomBusinessMonthEnd.is_quarter_start + CustomBusinessMonthEnd.is_quarter_end + CustomBusinessMonthEnd.is_year_start + CustomBusinessMonthEnd.is_year_end + +CustomBusinessMonthBegin +------------------------ + +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin + +Alias: + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CBMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin.freqstr + CustomBusinessMonthBegin.kwds + CustomBusinessMonthBegin.m_offset + CustomBusinessMonthBegin.name + CustomBusinessMonthBegin.nanos + CustomBusinessMonthBegin.normalize + CustomBusinessMonthBegin.rule_code + CustomBusinessMonthBegin.n + CustomBusinessMonthBegin.weekmask + CustomBusinessMonthBegin.calendar + CustomBusinessMonthBegin.holidays + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin.apply + CustomBusinessMonthBegin.apply_index + CustomBusinessMonthBegin.copy + CustomBusinessMonthBegin.isAnchored + CustomBusinessMonthBegin.onOffset + CustomBusinessMonthBegin.is_anchored + CustomBusinessMonthBegin.is_on_offset + CustomBusinessMonthBegin.__call__ + CustomBusinessMonthBegin.is_month_start + CustomBusinessMonthBegin.is_month_end + CustomBusinessMonthBegin.is_quarter_start + CustomBusinessMonthBegin.is_quarter_end + CustomBusinessMonthBegin.is_year_start + CustomBusinessMonthBegin.is_year_end + +SemiMonthEnd +------------ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd.freqstr + SemiMonthEnd.kwds + SemiMonthEnd.name + SemiMonthEnd.nanos + SemiMonthEnd.normalize + SemiMonthEnd.rule_code + SemiMonthEnd.n + SemiMonthEnd.day_of_month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd.apply + SemiMonthEnd.apply_index + SemiMonthEnd.copy + SemiMonthEnd.isAnchored + SemiMonthEnd.onOffset + SemiMonthEnd.is_anchored + SemiMonthEnd.is_on_offset + SemiMonthEnd.__call__ + SemiMonthEnd.is_month_start + SemiMonthEnd.is_month_end + SemiMonthEnd.is_quarter_start + SemiMonthEnd.is_quarter_end + SemiMonthEnd.is_year_start + SemiMonthEnd.is_year_end + +SemiMonthBegin +-------------- +.. autosummary:: + :toctree: api/ + + SemiMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthBegin.freqstr + SemiMonthBegin.kwds + SemiMonthBegin.name + SemiMonthBegin.nanos + SemiMonthBegin.normalize + SemiMonthBegin.rule_code + SemiMonthBegin.n + SemiMonthBegin.day_of_month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthBegin.apply + SemiMonthBegin.apply_index + SemiMonthBegin.copy + SemiMonthBegin.isAnchored + SemiMonthBegin.onOffset + SemiMonthBegin.is_anchored + SemiMonthBegin.is_on_offset + SemiMonthBegin.__call__ + SemiMonthBegin.is_month_start + SemiMonthBegin.is_month_end + SemiMonthBegin.is_quarter_start + SemiMonthBegin.is_quarter_end + SemiMonthBegin.is_year_start + SemiMonthBegin.is_year_end + +Week +---- +.. autosummary:: + :toctree: api/ + + Week + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Week.freqstr + Week.kwds + Week.name + Week.nanos + Week.normalize + Week.rule_code + Week.n + Week.weekday + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Week.apply + Week.apply_index + Week.copy + Week.isAnchored + Week.onOffset + Week.is_anchored + Week.is_on_offset + Week.__call__ + Week.is_month_start + Week.is_month_end + Week.is_quarter_start + Week.is_quarter_end + Week.is_year_start + Week.is_year_end + +WeekOfMonth +----------- +.. autosummary:: + :toctree: api/ + + WeekOfMonth + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + WeekOfMonth.freqstr + WeekOfMonth.kwds + WeekOfMonth.name + WeekOfMonth.nanos + WeekOfMonth.normalize + WeekOfMonth.rule_code + WeekOfMonth.n + WeekOfMonth.week + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + WeekOfMonth.apply + WeekOfMonth.apply_index + WeekOfMonth.copy + WeekOfMonth.isAnchored + WeekOfMonth.onOffset + WeekOfMonth.is_anchored + WeekOfMonth.is_on_offset + WeekOfMonth.__call__ + WeekOfMonth.weekday + WeekOfMonth.is_month_start + WeekOfMonth.is_month_end + WeekOfMonth.is_quarter_start + WeekOfMonth.is_quarter_end + WeekOfMonth.is_year_start + WeekOfMonth.is_year_end + +LastWeekOfMonth +--------------- +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth.freqstr + LastWeekOfMonth.kwds + LastWeekOfMonth.name + LastWeekOfMonth.nanos + LastWeekOfMonth.normalize + LastWeekOfMonth.rule_code + LastWeekOfMonth.n + LastWeekOfMonth.weekday + LastWeekOfMonth.week + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth.apply + LastWeekOfMonth.apply_index + LastWeekOfMonth.copy + LastWeekOfMonth.isAnchored + LastWeekOfMonth.onOffset + LastWeekOfMonth.is_anchored + LastWeekOfMonth.is_on_offset + LastWeekOfMonth.__call__ + LastWeekOfMonth.is_month_start + LastWeekOfMonth.is_month_end + LastWeekOfMonth.is_quarter_start + LastWeekOfMonth.is_quarter_end + LastWeekOfMonth.is_year_start + LastWeekOfMonth.is_year_end + +BQuarterEnd +----------- +.. autosummary:: + :toctree: api/ + + BQuarterEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterEnd.freqstr + BQuarterEnd.kwds + BQuarterEnd.name + BQuarterEnd.nanos + BQuarterEnd.normalize + BQuarterEnd.rule_code + BQuarterEnd.n + BQuarterEnd.startingMonth + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterEnd.apply + BQuarterEnd.apply_index + BQuarterEnd.copy + BQuarterEnd.isAnchored + BQuarterEnd.onOffset + BQuarterEnd.is_anchored + BQuarterEnd.is_on_offset + BQuarterEnd.__call__ + BQuarterEnd.is_month_start + BQuarterEnd.is_month_end + BQuarterEnd.is_quarter_start + BQuarterEnd.is_quarter_end + BQuarterEnd.is_year_start + BQuarterEnd.is_year_end + +BQuarterBegin +------------- +.. autosummary:: + :toctree: api/ + + BQuarterBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterBegin.freqstr + BQuarterBegin.kwds + BQuarterBegin.name + BQuarterBegin.nanos + BQuarterBegin.normalize + BQuarterBegin.rule_code + BQuarterBegin.n + BQuarterBegin.startingMonth + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterBegin.apply + BQuarterBegin.apply_index + BQuarterBegin.copy + BQuarterBegin.isAnchored + BQuarterBegin.onOffset + BQuarterBegin.is_anchored + BQuarterBegin.is_on_offset + BQuarterBegin.__call__ + BQuarterBegin.is_month_start + BQuarterBegin.is_month_end + BQuarterBegin.is_quarter_start + BQuarterBegin.is_quarter_end + BQuarterBegin.is_year_start + BQuarterBegin.is_year_end + +QuarterEnd +---------- +.. autosummary:: + :toctree: api/ + + QuarterEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterEnd.freqstr + QuarterEnd.kwds + QuarterEnd.name + QuarterEnd.nanos + QuarterEnd.normalize + QuarterEnd.rule_code + QuarterEnd.n + QuarterEnd.startingMonth + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterEnd.apply + QuarterEnd.apply_index + QuarterEnd.copy + QuarterEnd.isAnchored + QuarterEnd.onOffset + QuarterEnd.is_anchored + QuarterEnd.is_on_offset + QuarterEnd.__call__ + QuarterEnd.is_month_start + QuarterEnd.is_month_end + QuarterEnd.is_quarter_start + QuarterEnd.is_quarter_end + QuarterEnd.is_year_start + QuarterEnd.is_year_end + +QuarterBegin +------------ +.. autosummary:: + :toctree: api/ + + QuarterBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterBegin.freqstr + QuarterBegin.kwds + QuarterBegin.name + QuarterBegin.nanos + QuarterBegin.normalize + QuarterBegin.rule_code + QuarterBegin.n + QuarterBegin.startingMonth + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterBegin.apply + QuarterBegin.apply_index + QuarterBegin.copy + QuarterBegin.isAnchored + QuarterBegin.onOffset + QuarterBegin.is_anchored + QuarterBegin.is_on_offset + QuarterBegin.__call__ + QuarterBegin.is_month_start + QuarterBegin.is_month_end + QuarterBegin.is_quarter_start + QuarterBegin.is_quarter_end + QuarterBegin.is_year_start + QuarterBegin.is_year_end + +BYearEnd +-------- +.. autosummary:: + :toctree: api/ + + BYearEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearEnd.freqstr + BYearEnd.kwds + BYearEnd.name + BYearEnd.nanos + BYearEnd.normalize + BYearEnd.rule_code + BYearEnd.n + BYearEnd.month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearEnd.apply + BYearEnd.apply_index + BYearEnd.copy + BYearEnd.isAnchored + BYearEnd.onOffset + BYearEnd.is_anchored + BYearEnd.is_on_offset + BYearEnd.__call__ + BYearEnd.is_month_start + BYearEnd.is_month_end + BYearEnd.is_quarter_start + BYearEnd.is_quarter_end + BYearEnd.is_year_start + BYearEnd.is_year_end + +BYearBegin +---------- +.. autosummary:: + :toctree: api/ + + BYearBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearBegin.freqstr + BYearBegin.kwds + BYearBegin.name + BYearBegin.nanos + BYearBegin.normalize + BYearBegin.rule_code + BYearBegin.n + BYearBegin.month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearBegin.apply + BYearBegin.apply_index + BYearBegin.copy + BYearBegin.isAnchored + BYearBegin.onOffset + BYearBegin.is_anchored + BYearBegin.is_on_offset + BYearBegin.__call__ + BYearBegin.is_month_start + BYearBegin.is_month_end + BYearBegin.is_quarter_start + BYearBegin.is_quarter_end + BYearBegin.is_year_start + BYearBegin.is_year_end + +YearEnd +------- +.. autosummary:: + :toctree: api/ + + YearEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearEnd.freqstr + YearEnd.kwds + YearEnd.name + YearEnd.nanos + YearEnd.normalize + YearEnd.rule_code + YearEnd.n + YearEnd.month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearEnd.apply + YearEnd.apply_index + YearEnd.copy + YearEnd.isAnchored + YearEnd.onOffset + YearEnd.is_anchored + YearEnd.is_on_offset + YearEnd.__call__ + YearEnd.is_month_start + YearEnd.is_month_end + YearEnd.is_quarter_start + YearEnd.is_quarter_end + YearEnd.is_year_start + YearEnd.is_year_end + +YearBegin +--------- +.. autosummary:: + :toctree: api/ + + YearBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearBegin.freqstr + YearBegin.kwds + YearBegin.name + YearBegin.nanos + YearBegin.normalize + YearBegin.rule_code + YearBegin.n + YearBegin.month + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearBegin.apply + YearBegin.apply_index + YearBegin.copy + YearBegin.isAnchored + YearBegin.onOffset + YearBegin.is_anchored + YearBegin.is_on_offset + YearBegin.__call__ + YearBegin.is_month_start + YearBegin.is_month_end + YearBegin.is_quarter_start + YearBegin.is_quarter_end + YearBegin.is_year_start + YearBegin.is_year_end + +FY5253 +------ +.. autosummary:: + :toctree: api/ + + FY5253 + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253.freqstr + FY5253.kwds + FY5253.name + FY5253.nanos + FY5253.normalize + FY5253.rule_code + FY5253.n + FY5253.startingMonth + FY5253.variation + FY5253.weekday + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253.apply + FY5253.apply_index + FY5253.copy + FY5253.get_rule_code_suffix + FY5253.get_year_end + FY5253.isAnchored + FY5253.onOffset + FY5253.is_anchored + FY5253.is_on_offset + FY5253.__call__ + FY5253.is_month_start + FY5253.is_month_end + FY5253.is_quarter_start + FY5253.is_quarter_end + FY5253.is_year_start + FY5253.is_year_end + +FY5253Quarter +------------- +.. autosummary:: + :toctree: api/ + + FY5253Quarter + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253Quarter.freqstr + FY5253Quarter.kwds + FY5253Quarter.name + FY5253Quarter.nanos + FY5253Quarter.normalize + FY5253Quarter.rule_code + FY5253Quarter.n + FY5253Quarter.qtr_with_extra_week + FY5253Quarter.startingMonth + FY5253Quarter.variation + FY5253Quarter.weekday + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253Quarter.apply + FY5253Quarter.apply_index + FY5253Quarter.copy + FY5253Quarter.get_rule_code_suffix + FY5253Quarter.get_weeks + FY5253Quarter.isAnchored + FY5253Quarter.onOffset + FY5253Quarter.is_anchored + FY5253Quarter.is_on_offset + FY5253Quarter.year_has_extra_week + FY5253Quarter.__call__ + FY5253Quarter.is_month_start + FY5253Quarter.is_month_end + FY5253Quarter.is_quarter_start + FY5253Quarter.is_quarter_end + FY5253Quarter.is_year_start + FY5253Quarter.is_year_end + +Easter +------ +.. autosummary:: + :toctree: api/ + + Easter + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Easter.freqstr + Easter.kwds + Easter.name + Easter.nanos + Easter.normalize + Easter.rule_code + Easter.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Easter.apply + Easter.apply_index + Easter.copy + Easter.isAnchored + Easter.onOffset + Easter.is_anchored + Easter.is_on_offset + Easter.__call__ + Easter.is_month_start + Easter.is_month_end + Easter.is_quarter_start + Easter.is_quarter_end + Easter.is_year_start + Easter.is_year_end + +Tick +---- +.. autosummary:: + :toctree: api/ + + Tick + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Tick.delta + Tick.freqstr + Tick.kwds + Tick.name + Tick.nanos + Tick.normalize + Tick.rule_code + Tick.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Tick.copy + Tick.isAnchored + Tick.onOffset + Tick.is_anchored + Tick.is_on_offset + Tick.__call__ + Tick.apply + Tick.apply_index + Tick.is_month_start + Tick.is_month_end + Tick.is_quarter_start + Tick.is_quarter_end + Tick.is_year_start + Tick.is_year_end + +Day +--- +.. autosummary:: + :toctree: api/ + + Day + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Day.delta + Day.freqstr + Day.kwds + Day.name + Day.nanos + Day.normalize + Day.rule_code + Day.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Day.copy + Day.isAnchored + Day.onOffset + Day.is_anchored + Day.is_on_offset + Day.__call__ + Day.apply + Day.apply_index + Day.is_month_start + Day.is_month_end + Day.is_quarter_start + Day.is_quarter_end + Day.is_year_start + Day.is_year_end + +Hour +---- +.. autosummary:: + :toctree: api/ + + Hour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Hour.delta + Hour.freqstr + Hour.kwds + Hour.name + Hour.nanos + Hour.normalize + Hour.rule_code + Hour.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Hour.copy + Hour.isAnchored + Hour.onOffset + Hour.is_anchored + Hour.is_on_offset + Hour.__call__ + Hour.apply + Hour.apply_index + Hour.is_month_start + Hour.is_month_end + Hour.is_quarter_start + Hour.is_quarter_end + Hour.is_year_start + Hour.is_year_end + +Minute +------ +.. autosummary:: + :toctree: api/ + + Minute + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Minute.delta + Minute.freqstr + Minute.kwds + Minute.name + Minute.nanos + Minute.normalize + Minute.rule_code + Minute.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Minute.copy + Minute.isAnchored + Minute.onOffset + Minute.is_anchored + Minute.is_on_offset + Minute.__call__ + Minute.apply + Minute.apply_index + Minute.is_month_start + Minute.is_month_end + Minute.is_quarter_start + Minute.is_quarter_end + Minute.is_year_start + Minute.is_year_end + +Second +------ +.. autosummary:: + :toctree: api/ + + Second + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Second.delta + Second.freqstr + Second.kwds + Second.name + Second.nanos + Second.normalize + Second.rule_code + Second.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Second.copy + Second.isAnchored + Second.onOffset + Second.is_anchored + Second.is_on_offset + Second.__call__ + Second.apply + Second.apply_index + Second.is_month_start + Second.is_month_end + Second.is_quarter_start + Second.is_quarter_end + Second.is_year_start + Second.is_year_end + +Milli +----- +.. autosummary:: + :toctree: api/ + + Milli + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Milli.delta + Milli.freqstr + Milli.kwds + Milli.name + Milli.nanos + Milli.normalize + Milli.rule_code + Milli.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Milli.copy + Milli.isAnchored + Milli.onOffset + Milli.is_anchored + Milli.is_on_offset + Milli.__call__ + Milli.apply + Milli.apply_index + Milli.is_month_start + Milli.is_month_end + Milli.is_quarter_start + Milli.is_quarter_end + Milli.is_year_start + Milli.is_year_end + +Micro +----- +.. autosummary:: + :toctree: api/ + + Micro + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Micro.delta + Micro.freqstr + Micro.kwds + Micro.name + Micro.nanos + Micro.normalize + Micro.rule_code + Micro.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Micro.copy + Micro.isAnchored + Micro.onOffset + Micro.is_anchored + Micro.is_on_offset + Micro.__call__ + Micro.apply + Micro.apply_index + Micro.is_month_start + Micro.is_month_end + Micro.is_quarter_start + Micro.is_quarter_end + Micro.is_year_start + Micro.is_year_end + +Nano +---- +.. autosummary:: + :toctree: api/ + + Nano + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Nano.delta + Nano.freqstr + Nano.kwds + Nano.name + Nano.nanos + Nano.normalize + Nano.rule_code + Nano.n + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Nano.copy + Nano.isAnchored + Nano.onOffset + Nano.is_anchored + Nano.is_on_offset + Nano.__call__ + Nano.apply + Nano.apply_index + Nano.is_month_start + Nano.is_month_end + Nano.is_quarter_start + Nano.is_quarter_end + Nano.is_year_start + Nano.is_year_end + +.. _api.frequencies: + +=========== +Frequencies +=========== +.. currentmodule:: pandas.tseries.frequencies + +.. _api.offsets: + +.. autosummary:: + :toctree: api/ + + to_offset diff --git a/doc/source/reference/options.rst b/doc/source/reference/options.rst new file mode 100644 index 00000000..7316b6e9 --- /dev/null +++ b/doc/source/reference/options.rst @@ -0,0 +1,21 @@ +{{ header }} + +.. _api.options: + +==================== +Options and settings +==================== +.. currentmodule:: pandas + +API for configuring global behavior. See :ref:`the User Guide ` for more. + +Working with options +-------------------- +.. autosummary:: + :toctree: api/ + + describe_option + reset_option + get_option + set_option + option_context diff --git a/doc/source/reference/plotting.rst b/doc/source/reference/plotting.rst new file mode 100644 index 00000000..632b39a1 --- /dev/null +++ b/doc/source/reference/plotting.rst @@ -0,0 +1,26 @@ +{{ header }} + +.. _api.plotting: + +======== +Plotting +======== +.. currentmodule:: pandas.plotting + +The following functions are contained in the ``pandas.plotting`` module. + +.. autosummary:: + :toctree: api/ + + andrews_curves + autocorrelation_plot + bootstrap_plot + boxplot + deregister_matplotlib_converters + lag_plot + parallel_coordinates + plot_params + radviz + register_matplotlib_converters + scatter_matrix + table diff --git a/doc/source/reference/resampling.rst b/doc/source/reference/resampling.rst new file mode 100644 index 00000000..57263139 --- /dev/null +++ b/doc/source/reference/resampling.rst @@ -0,0 +1,66 @@ +{{ header }} + +.. _api.resampling: + +========== +Resampling +========== +.. currentmodule:: pandas.core.resample + +Resampler objects are returned by resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`. + +Indexing, iteration +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.__iter__ + Resampler.groups + Resampler.indices + Resampler.get_group + +Function application +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.apply + Resampler.aggregate + Resampler.transform + Resampler.pipe + +Upsampling +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.ffill + Resampler.backfill + Resampler.bfill + Resampler.pad + Resampler.nearest + Resampler.fillna + Resampler.asfreq + Resampler.interpolate + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.count + Resampler.nunique + Resampler.first + Resampler.last + Resampler.max + Resampler.mean + Resampler.median + Resampler.min + Resampler.ohlc + Resampler.prod + Resampler.size + Resampler.sem + Resampler.std + Resampler.sum + Resampler.var + Resampler.quantile diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst new file mode 100644 index 00000000..fcdc9ea9 --- /dev/null +++ b/doc/source/reference/series.rst @@ -0,0 +1,609 @@ +{{ header }} + +.. _api.series: + +====== +Series +====== +.. currentmodule:: pandas + +Constructor +----------- +.. autosummary:: + :toctree: api/ + + Series + +Attributes +---------- +**Axes** + +.. autosummary:: + :toctree: api/ + + Series.index + Series.array + Series.values + Series.dtype + Series.shape + Series.nbytes + Series.ndim + Series.size + Series.T + Series.memory_usage + Series.hasnans + Series.empty + Series.dtypes + Series.name + Series.flags + Series.set_flags + +Conversion +---------- +.. autosummary:: + :toctree: api/ + + Series.astype + Series.convert_dtypes + Series.infer_objects + Series.copy + Series.bool + Series.to_numpy + Series.to_period + Series.to_timestamp + Series.to_list + Series.__array__ + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + Series.get + Series.at + Series.iat + Series.loc + Series.iloc + Series.__iter__ + Series.items + Series.iteritems + Series.keys + Series.pop + Series.item + Series.xs + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +------------------------- +.. autosummary:: + :toctree: api/ + + Series.add + Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow + Series.combine + Series.combine_first + Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq + Series.product + Series.dot + +Function application, GroupBy & window +-------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.apply + Series.agg + Series.aggregate + Series.transform + Series.map + Series.groupby + Series.rolling + Series.expanding + Series.ewm + Series.pipe + +.. _api.series.stats: + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + Series.abs + Series.all + Series.any + Series.autocorr + Series.between + Series.clip + Series.corr + Series.count + Series.cov + Series.cummax + Series.cummin + Series.cumprod + Series.cumsum + Series.describe + Series.diff + Series.factorize + Series.kurt + Series.mad + Series.max + Series.mean + Series.median + Series.min + Series.mode + Series.nlargest + Series.nsmallest + Series.pct_change + Series.prod + Series.quantile + Series.rank + Series.sem + Series.skew + Series.std + Series.sum + Series.var + Series.kurtosis + Series.unique + Series.nunique + Series.is_unique + Series.is_monotonic + Series.is_monotonic_increasing + Series.is_monotonic_decreasing + Series.value_counts + +Reindexing / selection / label manipulation +------------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.align + Series.drop + Series.droplevel + Series.drop_duplicates + Series.duplicated + Series.equals + Series.first + Series.head + Series.idxmax + Series.idxmin + Series.isin + Series.last + Series.reindex + Series.reindex_like + Series.rename + Series.rename_axis + Series.reset_index + Series.sample + Series.set_axis + Series.take + Series.tail + Series.truncate + Series.where + Series.mask + Series.add_prefix + Series.add_suffix + Series.filter + +Missing data handling +--------------------- +.. autosummary:: + :toctree: api/ + + Series.backfill + Series.bfill + Series.dropna + Series.ffill + Series.fillna + Series.interpolate + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad + Series.replace + +Reshaping, sorting +------------------ +.. autosummary:: + :toctree: api/ + + Series.argsort + Series.argmin + Series.argmax + Series.reorder_levels + Series.sort_values + Series.sort_index + Series.swaplevel + Series.unstack + Series.explode + Series.searchsorted + Series.ravel + Series.repeat + Series.squeeze + Series.view + +Combining / comparing / joining / merging +----------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.append + Series.compare + Series.update + +Time Series-related +------------------- +.. autosummary:: + :toctree: api/ + + Series.asfreq + Series.asof + Series.shift + Series.first_valid_index + Series.last_valid_index + Series.resample + Series.tz_convert + Series.tz_localize + Series.at_time + Series.between_time + Series.tshift + Series.slice_shift + +Accessors +--------- + +pandas provides dtype-specific methods under various accessors. +These are separate namespaces within :class:`Series` that only apply +to specific data types. + +=========================== ================================= +Data Type Accessor +=========================== ================================= +Datetime, Timedelta, Period :ref:`dt ` +String :ref:`str ` +Categorical :ref:`cat ` +Sparse :ref:`sparse ` +=========================== ================================= + +.. _api.series.dt: + +Datetimelike properties +~~~~~~~~~~~~~~~~~~~~~~~ + +``Series.dt`` can be used to access the values of the series as +datetimelike and return several properties. +These can be accessed like ``Series.dt.``. + +Datetime properties +^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.date + Series.dt.time + Series.dt.timetz + Series.dt.year + Series.dt.month + Series.dt.day + Series.dt.hour + Series.dt.minute + Series.dt.second + Series.dt.microsecond + Series.dt.nanosecond + Series.dt.week + Series.dt.weekofyear + Series.dt.dayofweek + Series.dt.day_of_week + Series.dt.weekday + Series.dt.dayofyear + Series.dt.day_of_year + Series.dt.quarter + Series.dt.is_month_start + Series.dt.is_month_end + Series.dt.is_quarter_start + Series.dt.is_quarter_end + Series.dt.is_year_start + Series.dt.is_year_end + Series.dt.is_leap_year + Series.dt.daysinmonth + Series.dt.days_in_month + Series.dt.tz + Series.dt.freq + +Datetime methods +^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.isocalendar + Series.dt.to_period + Series.dt.to_pydatetime + Series.dt.tz_localize + Series.dt.tz_convert + Series.dt.normalize + Series.dt.strftime + Series.dt.round + Series.dt.floor + Series.dt.ceil + Series.dt.month_name + Series.dt.day_name + +Period properties +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.qyear + Series.dt.start_time + Series.dt.end_time + +Timedelta properties +^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.days + Series.dt.seconds + Series.dt.microseconds + Series.dt.nanoseconds + Series.dt.components + +Timedelta methods +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.to_pytimedelta + Series.dt.total_seconds + + +.. _api.series.str: + +String handling +~~~~~~~~~~~~~~~ + +``Series.str`` can be used to access the values of the series as +strings and apply several methods to it. These can be accessed like +``Series.str.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.str.capitalize + Series.str.casefold + Series.str.cat + Series.str.center + Series.str.contains + Series.str.count + Series.str.decode + Series.str.encode + Series.str.endswith + Series.str.extract + Series.str.extractall + Series.str.find + Series.str.findall + Series.str.fullmatch + Series.str.get + Series.str.index + Series.str.join + Series.str.len + Series.str.ljust + Series.str.lower + Series.str.lstrip + Series.str.match + Series.str.normalize + Series.str.pad + Series.str.partition + Series.str.removeprefix + Series.str.removesuffix + Series.str.repeat + Series.str.replace + Series.str.rfind + Series.str.rindex + Series.str.rjust + Series.str.rpartition + Series.str.rstrip + Series.str.slice + Series.str.slice_replace + Series.str.split + Series.str.rsplit + Series.str.startswith + Series.str.strip + Series.str.swapcase + Series.str.title + Series.str.translate + Series.str.upper + Series.str.wrap + Series.str.zfill + Series.str.isalnum + Series.str.isalpha + Series.str.isdigit + Series.str.isspace + Series.str.islower + Series.str.isupper + Series.str.istitle + Series.str.isnumeric + Series.str.isdecimal + Series.str.get_dummies + +.. + The following is needed to ensure the generated pages are created with the + correct template (otherwise they would be created in the Series/Index class page) + +.. + .. autosummary:: + :toctree: api/ + :template: autosummary/accessor.rst + + Series.str + Series.cat + Series.dt + Series.sparse + DataFrame.sparse + Index.str + +.. _api.series.cat: + +Categorical accessor +~~~~~~~~~~~~~~~~~~~~ + +Categorical-dtype specific methods and attributes are available under +the ``Series.cat`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.cat.categories + Series.cat.ordered + Series.cat.codes + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.cat.rename_categories + Series.cat.reorder_categories + Series.cat.add_categories + Series.cat.remove_categories + Series.cat.remove_unused_categories + Series.cat.set_categories + Series.cat.as_ordered + Series.cat.as_unordered + + +.. _api.series.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``Series.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.sparse.npoints + Series.sparse.density + Series.sparse.fill_value + Series.sparse.sp_values + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.sparse.from_coo + Series.sparse.to_coo + +.. _api.series.flags: + +Flags +~~~~~ + +Flags refer to attributes of the pandas object. Properties of the dataset (like +the date is was recorded, the URL it was accessed from, etc.) should be stored +in :attr:`Series.attrs`. + +.. autosummary:: + :toctree: api/ + + Flags + +.. _api.series.metadata: + +Metadata +~~~~~~~~ + +:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. + +.. warning:: ``Series.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + Series.attrs + + +Plotting +-------- +``Series.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``Series.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + Series.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.plot.area + Series.plot.bar + Series.plot.barh + Series.plot.box + Series.plot.density + Series.plot.hist + Series.plot.kde + Series.plot.line + Series.plot.pie + +.. autosummary:: + :toctree: api/ + + Series.hist + +Serialization / IO / conversion +------------------------------- +.. autosummary:: + :toctree: api/ + + Series.to_pickle + Series.to_csv + Series.to_dict + Series.to_excel + Series.to_frame + Series.to_xarray + Series.to_hdf + Series.to_sql + Series.to_json + Series.to_string + Series.to_clipboard + Series.to_latex + Series.to_markdown diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst new file mode 100644 index 00000000..5144f12f --- /dev/null +++ b/doc/source/reference/style.rst @@ -0,0 +1,82 @@ +{{ header }} + +.. _api.style: + +===== +Style +===== +.. currentmodule:: pandas.io.formats.style + +``Styler`` objects are returned by :attr:`pandas.DataFrame.style`. + +Styler constructor +------------------ +.. autosummary:: + :toctree: api/ + + Styler + Styler.from_custom_template + +Styler properties +----------------- +.. autosummary:: + :toctree: api/ + + Styler.env + Styler.template_html + Styler.template_html_style + Styler.template_html_table + Styler.template_latex + Styler.template_string + Styler.loader + +Style application +----------------- +.. autosummary:: + :toctree: api/ + + Styler.apply + Styler.applymap + Styler.apply_index + Styler.applymap_index + Styler.format + Styler.format_index + Styler.relabel_index + Styler.hide + Styler.concat + Styler.set_td_classes + Styler.set_table_styles + Styler.set_table_attributes + Styler.set_tooltips + Styler.set_caption + Styler.set_sticky + Styler.set_properties + Styler.set_uuid + Styler.clear + Styler.pipe + +Builtin styles +-------------- +.. autosummary:: + :toctree: api/ + + Styler.highlight_null + Styler.highlight_max + Styler.highlight_min + Styler.highlight_between + Styler.highlight_quantile + Styler.background_gradient + Styler.text_gradient + Styler.bar + +Style export and import +----------------------- +.. autosummary:: + :toctree: api/ + + Styler.to_html + Styler.to_latex + Styler.to_excel + Styler.to_string + Styler.export + Styler.use diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst new file mode 100644 index 00000000..1144c767 --- /dev/null +++ b/doc/source/reference/testing.rst @@ -0,0 +1,77 @@ +{{ header }} + +.. _api.testing: + +======= +Testing +======= +.. currentmodule:: pandas + +.. _api.general.testing: + +Assertion functions +------------------- +.. autosummary:: + :toctree: api/ + + testing.assert_frame_equal + testing.assert_series_equal + testing.assert_index_equal + testing.assert_extension_array_equal + +Exceptions and warnings +----------------------- +.. autosummary:: + :toctree: api/ + + errors.AbstractMethodError + errors.AccessorRegistrationWarning + errors.AttributeConflictWarning + errors.CategoricalConversionWarning + errors.ClosedFileError + errors.CSSWarning + errors.DatabaseError + errors.DataError + errors.DtypeWarning + errors.DuplicateLabelError + errors.EmptyDataError + errors.IncompatibilityWarning + errors.IndexingError + errors.InvalidColumnName + errors.InvalidIndexError + errors.IntCastingNaNError + errors.MergeError + errors.NullFrequencyError + errors.NumbaUtilError + errors.NumExprClobberingError + errors.OptionError + errors.OutOfBoundsDatetime + errors.OutOfBoundsTimedelta + errors.ParserError + errors.ParserWarning + errors.PerformanceWarning + errors.PossibleDataLossError + errors.PossiblePrecisionLoss + errors.PyperclipException + errors.PyperclipWindowsException + errors.SettingWithCopyError + errors.SettingWithCopyWarning + errors.SpecificationError + errors.UndefinedVariableError + errors.UnsortedIndexError + errors.UnsupportedFunctionCall + errors.ValueLabelTypeMismatch + +Bug report function +------------------- +.. autosummary:: + :toctree: api/ + + show_versions + +Test suite runner +----------------- +.. autosummary:: + :toctree: api/ + + test diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst new file mode 100644 index 00000000..0be3184a --- /dev/null +++ b/doc/source/reference/window.rst @@ -0,0 +1,110 @@ +{{ header }} + +.. _api.window: + +====== +Window +====== + +Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +ExponentialMovingWindow objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. + +.. _api.functions_rolling: + +Rolling window functions +------------------------ +.. currentmodule:: pandas.core.window.rolling + +.. autosummary:: + :toctree: api/ + + Rolling.count + Rolling.sum + Rolling.mean + Rolling.median + Rolling.var + Rolling.std + Rolling.min + Rolling.max + Rolling.corr + Rolling.cov + Rolling.skew + Rolling.kurt + Rolling.apply + Rolling.aggregate + Rolling.quantile + Rolling.sem + Rolling.rank + +.. _api.functions_window: + +Weighted window functions +------------------------- +.. currentmodule:: pandas.core.window.rolling + +.. autosummary:: + :toctree: api/ + + Window.mean + Window.sum + Window.var + Window.std + +.. _api.functions_expanding: + +Expanding window functions +-------------------------- +.. currentmodule:: pandas.core.window.expanding + +.. autosummary:: + :toctree: api/ + + Expanding.count + Expanding.sum + Expanding.mean + Expanding.median + Expanding.var + Expanding.std + Expanding.min + Expanding.max + Expanding.corr + Expanding.cov + Expanding.skew + Expanding.kurt + Expanding.apply + Expanding.aggregate + Expanding.quantile + Expanding.sem + Expanding.rank + +.. _api.functions_ewm: + +Exponentially-weighted window functions +--------------------------------------- +.. currentmodule:: pandas.core.window.ewm + +.. autosummary:: + :toctree: api/ + + ExponentialMovingWindow.mean + ExponentialMovingWindow.sum + ExponentialMovingWindow.std + ExponentialMovingWindow.var + ExponentialMovingWindow.corr + ExponentialMovingWindow.cov + +.. _api.indexers_window: + +Window indexer +-------------- +.. currentmodule:: pandas + +Base class for defining custom window boundaries. + +.. autosummary:: + :toctree: api/ + + api.indexers.BaseIndexer + api.indexers.FixedForwardWindowIndexer + api.indexers.VariableOffsetWindowIndexer diff --git a/doc/source/styled.xlsx b/doc/source/styled.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1233ff2b8692bad1cfcd54f59d26eb315cfa728c GIT binary patch literal 5682 zcmZ`-1z3}9_aEI#3Ze)|3o^QEv>+i}0uqkcMt6@=I!2e2sC0wWMu&ia0@58SjWqI~ z@B95fU%B3IyYA=Nu4})ubD!s&^E>xXQxyxF5&!_;13p(uKBi|H#zLaMQD6c9MCiMj zvo*v8$o=bDnxGNcE<@barjm9}tB>liko4Mw8eU*Jv0loFCo)o0`ul{pKoS{&O~q2K$v?!mX##}swl>kxCc&A6ncxA^jty9ve{7U(Bm4#& zy}?TKB1JDN2TM(72S*p+Ge<{mPbfq!MvbVQhgAMbWpftBu>rD>sZc^C9>>8T41HmP zgr}#k;q>)h&Ch7uEc<(xDqRJ`bkr{&B*X%`%4pPXr>hT&EZGz=N(W{h+yYwVkR>7o zV(^d;Qg4~KpNdb~yA6qY+IMP?>N}(nif*pi@v@(!7$3x#)Jc$>*I!h?=O0~S{^_s7 z3&@upw4a>l2SfHxe=QxHt$#T@nD9ikgNL-IEn=mmg_SNZ_gkc;1|b})t3Ga~&O9+} zrC_#RxYRg&182xpQ*ve-w)C>98d|}(? z8>b#x!*{ z-xA=>>@Ex9G>4NZrnC~-+*&fZq!9NA%u2MZQnjNAbAGdKG8fb_J+?g1414Bxe^qeJ ztWQM@$E12-W#a5v^>me<&PCZ9PsVKzS>@J*;xj+z%S8_=_m)bZ^Vn!09E#Ppop(Jt zX(pLWq__Y;CL20+(b4gQ09|dYq1Jz2d4I)c%gESeN`%sXr~IQQ_y|hWEfiE&L*ZiU zK3$U*vZYQRN@frZ@v@2!320LiA{|I3*-B7*pCN(GwRt|IptQar;&rgiZLtyd0a=$v?ZDD{_B|t`%7O4cKLpPEM@RW%RQP-(;$m3_ zL+-vuom)9k{Zp<2P}+s%Jx`Ikfze3EyVwg)nzGxo4k2v?AcC5F+mBY0M^k0-uir&$ zYm6QH&40_V$s~+6))=$*m>r9G*C7W`bxglu3&^671Om+>nMPHKr?C!!Iw;jEmETHI|6^C;J zAly0fSkt#0R2Q+(o-k?>H$Kc!t4WKW-W&8o3C`FxQ-1j)rKzug-#g1sY{(uFU|Wn) zwAGX`oOt1~v654iIH4vx)gIqW)^i6hAo&r+#LR$?PJ4 z%x5a`v9{GrMXs!a8~n5=#qv9D;WncZ-am0pGX)9n#^0gC1mCGvWDK!w+PzoK9Ftz! zPR1!KWpP`G*17gpP@U7;5OX8)?h#M*(HgP~!xF(nDf8a%gYj-d_|_xK2L_8RX=t$KSB3MY3rYnH)%Fs??XgxiZx;7HK`ZolEeoyJ4S+|hbr zCOntF>B!n@@&~VuubhBDoGQo<&EqO62>c=<+-j>T*Idl zX6rSBMEr8~&Z=MQ5UsVWA}WWK9~{;14G@ciB@*cBJYLZWo*zARUtucb(N2+Tpp|XK zg1*n15fjNyjc+6z(~5KGINj{>q+JsIg5Btsst;YC92q%ZM?I;?-;qeB>v^%C$GDvv zmO9)a7w+X_mI_Yl))b(GLQ0sq6zk0!Cib`}7p%-5zQPMGe}cKMrEAV>+>ON&bkt%OE;`*ykF63Cs{+KKK#?L zv^R&CO#K!Sxw0jK`n5{l!ENodPZiM_kH)X6c3d7{-tA|(#=)zb+f~#a%KKUv&(0& zL~-TEnOBKqWaG2vFP?*^ux5w+|SH*!!7l}d>Q zC~yUbshf_nkW)rf5aTdXJ1_6-B-(jIO`LHBK|^%-tUgos%e_QN@;l2->aZM?O|cS% zPA};UHqo}SnyYqr!?GT-T}_*QlB}VYR6NK}Tew#p?vml%(5_H&WiWp4ly6?taawE+ z%XE!`ORm5l(3vy5MMbf!Oy`u~DHFCz-S=o-cg@ zu4R<*p@3N|rA6uM%F^yBUSh96g9GB+=OXjr3hA``pCd~|qX`{ApJi61r$3R{9FYjz zjmI(NO{){iUr{HNnEAOwQqkTSv$z%6hVdu8>adM%$zTHj3O4`%n!gFe!@=3!#m3s& z)dl$1?JpK_OEGg;79vKrsl*+3-Tx8$U=&d6gF51Q)v)M33@a8wu{aI6Pj5}fsyG|U zzGaV_;^Rn=jz0sXNmXC;E5Ww&=b6i-)Qu%IlqkOPXd2VuAHz;thuFphbqgV=;eX>fuDne#?nV zWnh5Byt?i_J8Wqz^|j6EybomQdE#>mY5#22tZXQ2*TA}2HSi|Ng(r54Gy`_vcPUW@ zD-y_(=t(((^80T|`NtLoe*c0xcZX0Pw4SFP_VgY@F^Pp^=zLUy-Do9!EeC^?=Be#W zTdHqru)SnW3ABHCZh~3P(z-%^W@M43np!R2MV8rt)S~h5%x&cieL~N&D=Ql^LdpD` z77G)u1jdY`2r{!{F1osTa-v}N;YUWgt)sHa^a3d{E;)XL7$ZAoVV`I|JeZH-sZ}07 z>-{AoFj-`%z3Vo`2Lw3huF*HmT(ZXzw2Fy%(Nb|jk7cZsw!_j5!Nej}=8PhxIH%T+ zaDso}%g-^|spVAgDd@3UvM7IIt!8hxdCcM5la^0>o)2;o!PJxD2**H)#iUE*FBxE$ zBt#s@_j?Xe)%4p|G7scOG^#-I0`IlpV%;JbD1pQ$4H^)T*ADtpatk(M7rnJIcF7HQUkY8ldzPG3Z16-B-t0nuln;?~D$ zrZibbHELA#Q-e;45lGGR53e=|qk*~%!m+PJK_zU&Ds{WQqn~`@?gqM>dfB^@mN-)h zk&A$c3-_6rGIUW71((Gn1?9&VrOm<=6buk3av}Tv%tK5)Zj@@wLIXmJF&)8%d5emc zz&UoQuH#Y&yVppZN>0DmZNP_t^Oc9*%OumavO>kLlER*rseDcPuc;zxWEE8_-pD=I zwDL(*-Svk>FLL*%g{E!=H)5u|I!eY`b5k<)>w7>sOpNf(z|lKEi5P~k)Q`?#6d?Ak z1m0;JfixBL!WZog+MoN8HwrvZDG+^W0*+wni4HjiTQ(N=9{Fp|y)i#gP#u(5(1B?}4yCl-}iy zC*u48?Ob>y8$4<&bC4W^a%2vRrBj#Xq^)WeNk{NMice9t;Jm%${`GWU;eqE4zO|Yj zwMU#Q>a2RpY@+Ge{$#Xey(Fp6=6l}#3rK*N;a=AEMb%57rT5{*3_Vb&C#7;aha39& zbgkj!=AT_>KM8Bu1>J3`(CgP33_Z5G*gSK#w$gTWwuOORe$A~{Tfqftx_kTF(b;iB=Bo5U%<4Xi-A@ENp zpalEeh_AQk&{vKFwXd+xwaIT(uoY&Y1W1(E^D?fkUj1px#DPP-;a{ITRy26&1f#tUkflh3lR>ePbV zhT7VCz8_Qb{v5F^iF4lC4Z~&75XVK9&gCz=NO;!DKR@^STG7>A#i72vkwM|jq_E1I z(Q{z*v^M*B;`@X%JUvqyW2dI^?2kD8+I{v&SI+ctNtA26F*Xljv`R&{&DtGfzPhx| zVO_=Cu0{hJZqQ0|2pq|BmOceBr(j9hUm*AYY|qB)&Nt57>03O*PCjv9u@spx$Ww)K zx-hOow&b?>P(R%496@#jmfG%I-6P&*8?fi~4k*$0vjRV5D=OC#FxvY#pvdE|%i~{U zo?WF7Ueg^xz2Z_-C?9C75;Fs5r-SeGI}97N+2SQ!O$7y1&ihLE>@(IX9W+D2_94vr z2+H6%hC{BAh<8gtv$K)6FE`7Ky$0#n_ckgp%eVObKa_e7;u2_-Ey#(;vYj`H!}l&} ze>7`Z9a>#2B++~V13k}#RssNjCXCDKlEVr*Q9>~QfSZ3O%r7praB#5ymC(^~APp== z($FiyMZ8{EJ}c~f%SPeL5mwrrYMzDyBqXrzr!QW5w4^ufT1$$9>7*$;9a~=h34zqw zlc7ScU@gO88ix51cs!jdhs>g&UvV=tb|Scwym`uXHlJ`IZw9JA&iHmZlb zFg)dAeL@$2H*`WU#xC9`>P7)c8TTp`Ex4j&vCrzJ9xV#2G|6B+)NES$^3X5>A4BQ% zNnZ=)R{LC3RVxu?-4J(EUp}R+Bcz~Z)3~h`0s3}7U3krET+q``IWMOB%Z&7)hSp@_ z@o-5?k3PJ+Jn9FYd)uD@xk9qyxBLnSx{7H34#?lS%0I*MAB_b>7|cToXuIzvqYvH8 zeN$xGTz4t+@$-xHknhP~cQU-6fO7GG1kXA?$RtQqB+k&Ny{n`o_ABC`iuf`Ic%wO~ zDt0?^O5?$@ci+zqyNW6s>uA#OMXH$u#wx2m;(b0UZXgp`b3t*uHijG?{j!nPi^n|B zkv^h}q>&|QZ*5(*AP?+%!?_Bo-q&1N6~T)vqJDv77_U)f(n%rsL&k?eg?j zCe-SvX>{-qUvg@f+YBxsF`!KV{Z#A#24>hDL9?oskxQQ$n4HA}3!7<{mj~aEP02YA zj-j~et|%oVN)i`;&4|ZXAni0hdC7y{1R?mli$_KKX3t;U`?{8k(E6 z>wU|xwjxtXG4M{*^OU?Pc^`SPS&C$Q|K8kg>-X^?7W*>!BlMVKH_l8BwanoooPz=BFYx7yWrs*)AgJjel znmE-5)ipR#k4TWPW=YlEk0h*&kNF8BzAMJCCF{WxzES|a@}ozv1=hg7_Dy=DLCF!})Im zC-{FE_}5YWx|Qo>``Zc(O>6%~z1J;VXTskWDA31-Ul#t$i`Svod*W}XJX(E3L;u+d yu7j`lkKbTjv=oR2|Br5R-Olxr{B1{t{9g-HQxykI`TzhS`W=XVR++yF8SsDP;o9B+ literal 0 HcmV?d00001 diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst new file mode 100644 index 00000000..c767fb1e --- /dev/null +++ b/doc/source/user_guide/10min.rst @@ -0,0 +1,840 @@ +.. _10min: + +{{ header }} + +******************** +10 minutes to pandas +******************** + +This is a short introduction to pandas, geared mainly for new users. +You can see more complex recipes in the :ref:`Cookbook`. + +Customarily, we import as follows: + +.. ipython:: python + + import numpy as np + import pandas as pd + +Object creation +--------------- + +See the :ref:`Intro to data structures section `. + +Creating a :class:`Series` by passing a list of values, letting pandas create +a default integer index: + +.. ipython:: python + + s = pd.Series([1, 3, 5, np.nan, 6, 8]) + s + +Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index using :func:`date_range` +and labeled columns: + +.. ipython:: python + + dates = pd.date_range("20130101", periods=6) + dates + df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD")) + df + +Creating a :class:`DataFrame` by passing a dictionary of objects that can be +converted into a series-like structure: + +.. ipython:: python + + df2 = pd.DataFrame( + { + "A": 1.0, + "B": pd.Timestamp("20130102"), + "C": pd.Series(1, index=list(range(4)), dtype="float32"), + "D": np.array([3] * 4, dtype="int32"), + "E": pd.Categorical(["test", "train", "test", "train"]), + "F": "foo", + } + ) + df2 + +The columns of the resulting :class:`DataFrame` have different +:ref:`dtypes `: + +.. ipython:: python + + df2.dtypes + +If you're using IPython, tab completion for column names (as well as public +attributes) is automatically enabled. Here's a subset of the attributes that +will be completed: + +.. ipython:: + + @verbatim + In [1]: df2. # noqa: E225, E999 + df2.A df2.bool + df2.abs df2.boxplot + df2.add df2.C + df2.add_prefix df2.clip + df2.add_suffix df2.columns + df2.align df2.copy + df2.all df2.count + df2.any df2.combine + df2.append df2.D + df2.apply df2.describe + df2.applymap df2.diff + df2.B df2.duplicated + +As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically +tab completed. ``E`` and ``F`` are there as well; the rest of the attributes have been +truncated for brevity. + +Viewing data +------------ + +See the :ref:`Basics section `. + +Use :meth:`DataFrame.head` and :meth:`DataFrame.tail` to view the top and bottom rows of the frame +respectively: + +.. ipython:: python + + df.head() + df.tail(3) + +Display the :attr:`DataFrame.index` or :attr:`DataFrame.columns`: + +.. ipython:: python + + df.index + df.columns + +:meth:`DataFrame.to_numpy` gives a NumPy representation of the underlying data. +Note that this can be an expensive operation when your :class:`DataFrame` has +columns with different data types, which comes down to a fundamental difference +between pandas and NumPy: **NumPy arrays have one dtype for the entire array, +while pandas DataFrames have one dtype per column**. When you call +:meth:`DataFrame.to_numpy`, pandas will find the NumPy dtype that can hold *all* +of the dtypes in the DataFrame. This may end up being ``object``, which requires +casting every value to a Python object. + +For ``df``, our :class:`DataFrame` of all floating-point values, and +:meth:`DataFrame.to_numpy` is fast and doesn't require copying data: + +.. ipython:: python + + df.to_numpy() + +For ``df2``, the :class:`DataFrame` with multiple dtypes, +:meth:`DataFrame.to_numpy` is relatively expensive: + +.. ipython:: python + + df2.to_numpy() + +.. note:: + + :meth:`DataFrame.to_numpy` does *not* include the index or column + labels in the output. + +:func:`~DataFrame.describe` shows a quick statistic summary of your data: + +.. ipython:: python + + df.describe() + +Transposing your data: + +.. ipython:: python + + df.T + +:meth:`DataFrame.sort_index` sorts by an axis: + +.. ipython:: python + + df.sort_index(axis=1, ascending=False) + +:meth:`DataFrame.sort_values` sorts by values: + +.. ipython:: python + + df.sort_values(by="B") + +Selection +--------- + +.. note:: + + While standard Python / NumPy expressions for selecting and setting are + intuitive and come in handy for interactive work, for production code, we + recommend the optimized pandas data access methods, :meth:`DataFrame.at`, :meth:`DataFrame.iat`, + :meth:`DataFrame.loc` and :meth:`DataFrame.iloc`. + +See the indexing documentation :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing `. + +Getting +~~~~~~~ + +Selecting a single column, which yields a :class:`Series`, +equivalent to ``df.A``: + +.. ipython:: python + + df["A"] + +Selecting via ``[]`` (``__getitem__``), which slices the rows: + +.. ipython:: python + + df[0:3] + df["20130102":"20130104"] + +Selection by label +~~~~~~~~~~~~~~~~~~ + +See more in :ref:`Selection by Label ` using :meth:`DataFrame.loc` or :meth:`DataFrame.at`. + +For getting a cross section using a label: + +.. ipython:: python + + df.loc[dates[0]] + +Selecting on a multi-axis by label: + +.. ipython:: python + + df.loc[:, ["A", "B"]] + +Showing label slicing, both endpoints are *included*: + +.. ipython:: python + + df.loc["20130102":"20130104", ["A", "B"]] + +Reduction in the dimensions of the returned object: + +.. ipython:: python + + df.loc["20130102", ["A", "B"]] + +For getting a scalar value: + +.. ipython:: python + + df.loc[dates[0], "A"] + +For getting fast access to a scalar (equivalent to the prior method): + +.. ipython:: python + + df.at[dates[0], "A"] + +Selection by position +~~~~~~~~~~~~~~~~~~~~~ + +See more in :ref:`Selection by Position ` using :meth:`DataFrame.iloc` or :meth:`DataFrame.at`. + +Select via the position of the passed integers: + +.. ipython:: python + + df.iloc[3] + +By integer slices, acting similar to NumPy/Python: + +.. ipython:: python + + df.iloc[3:5, 0:2] + +By lists of integer position locations, similar to the NumPy/Python style: + +.. ipython:: python + + df.iloc[[1, 2, 4], [0, 2]] + +For slicing rows explicitly: + +.. ipython:: python + + df.iloc[1:3, :] + +For slicing columns explicitly: + +.. ipython:: python + + df.iloc[:, 1:3] + +For getting a value explicitly: + +.. ipython:: python + + df.iloc[1, 1] + +For getting fast access to a scalar (equivalent to the prior method): + +.. ipython:: python + + df.iat[1, 1] + +Boolean indexing +~~~~~~~~~~~~~~~~ + +Using a single column's values to select data: + +.. ipython:: python + + df[df["A"] > 0] + +Selecting values from a DataFrame where a boolean condition is met: + +.. ipython:: python + + df[df > 0] + +Using the :func:`~Series.isin` method for filtering: + +.. ipython:: python + + df2 = df.copy() + df2["E"] = ["one", "one", "two", "three", "four", "three"] + df2 + df2[df2["E"].isin(["two", "four"])] + +Setting +~~~~~~~ + +Setting a new column automatically aligns the data +by the indexes: + +.. ipython:: python + + s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6)) + s1 + df["F"] = s1 + +Setting values by label: + +.. ipython:: python + + df.at[dates[0], "A"] = 0 + +Setting values by position: + +.. ipython:: python + + df.iat[0, 1] = 0 + +Setting by assigning with a NumPy array: + +.. ipython:: python + :okwarning: + + df.loc[:, "D"] = np.array([5] * len(df)) + +The result of the prior setting operations: + +.. ipython:: python + + df + +A ``where`` operation with setting: + +.. ipython:: python + + df2 = df.copy() + df2[df2 > 0] = -df2 + df2 + + +Missing data +------------ + +pandas primarily uses the value ``np.nan`` to represent missing data. It is by +default not included in computations. See the :ref:`Missing Data section +`. + +Reindexing allows you to change/add/delete the index on a specified axis. This +returns a copy of the data: + +.. ipython:: python + + df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ["E"]) + df1.loc[dates[0] : dates[1], "E"] = 1 + df1 + +:meth:`DataFrame.dropna` drops any rows that have missing data: + +.. ipython:: python + + df1.dropna(how="any") + +:meth:`DataFrame.fillna` fills missing data: + +.. ipython:: python + + df1.fillna(value=5) + +:func:`isna` gets the boolean mask where values are ``nan``: + +.. ipython:: python + + pd.isna(df1) + + +Operations +---------- + +See the :ref:`Basic section on Binary Ops `. + +Stats +~~~~~ + +Operations in general *exclude* missing data. + +Performing a descriptive statistic: + +.. ipython:: python + + df.mean() + +Same operation on the other axis: + +.. ipython:: python + + df.mean(1) + +Operating with objects that have different dimensionality and need alignment. +In addition, pandas automatically broadcasts along the specified dimension: + +.. ipython:: python + + s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) + s + df.sub(s, axis="index") + + +Apply +~~~~~ + +:meth:`DataFrame.apply` applies a user defined function to the data: + +.. ipython:: python + + df.apply(np.cumsum) + df.apply(lambda x: x.max() - x.min()) + +Histogramming +~~~~~~~~~~~~~ + +See more at :ref:`Histogramming and Discretization `. + +.. ipython:: python + + s = pd.Series(np.random.randint(0, 7, size=10)) + s + s.value_counts() + +String Methods +~~~~~~~~~~~~~~ + +Series is equipped with a set of string processing methods in the ``str`` +attribute that make it easy to operate on each element of the array, as in the +code snippet below. Note that pattern-matching in ``str`` generally uses `regular +expressions `__ by default (and in +some cases always uses them). See more at :ref:`Vectorized String Methods +`. + +.. ipython:: python + + s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"]) + s.str.lower() + +Merge +----- + +Concat +~~~~~~ + +pandas provides various facilities for easily combining together Series and +DataFrame objects with various kinds of set logic for the indexes +and relational algebra functionality in the case of join / merge-type +operations. + +See the :ref:`Merging section `. + +Concatenating pandas objects together along an axis with :func:`concat`: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + df + + # break it into pieces + pieces = [df[:3], df[3:7], df[7:]] + + pd.concat(pieces) + +.. note:: + Adding a column to a :class:`DataFrame` is relatively fast. However, adding + a row requires a copy, and may be expensive. We recommend passing a + pre-built list of records to the :class:`DataFrame` constructor instead + of building a :class:`DataFrame` by iteratively appending records to it. + +Join +~~~~ + +:func:`merge` enables SQL style join types along specific columns. See the :ref:`Database style joining ` section. + +.. ipython:: python + + left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]}) + right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]}) + left + right + pd.merge(left, right, on="key") + +Another example that can be given is: + +.. ipython:: python + + left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]}) + right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]}) + left + right + pd.merge(left, right, on="key") + +Grouping +-------- + +By "group by" we are referring to a process involving one or more of the +following steps: + + - **Splitting** the data into groups based on some criteria + - **Applying** a function to each group independently + - **Combining** the results into a data structure + +See the :ref:`Grouping section `. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + df + +Grouping and then applying the :meth:`~pandas.core.groupby.GroupBy.sum` function to the resulting +groups: + +.. ipython:: python + + df.groupby("A")[["C", "D"]].sum() + +Grouping by multiple columns forms a hierarchical index, and again we can +apply the :meth:`~pandas.core.groupby.GroupBy.sum` function: + +.. ipython:: python + + df.groupby(["A", "B"]).sum() + +Reshaping +--------- + +See the sections on :ref:`Hierarchical Indexing ` and +:ref:`Reshaping `. + +Stack +~~~~~ + +.. ipython:: python + + tuples = list( + zip( + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ) + ) + index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"]) + df2 = df[:4] + df2 + +The :meth:`~DataFrame.stack` method "compresses" a level in the DataFrame's +columns: + +.. ipython:: python + + stacked = df2.stack() + stacked + +With a "stacked" DataFrame or Series (having a :class:`MultiIndex` as the +``index``), the inverse operation of :meth:`~DataFrame.stack` is +:meth:`~DataFrame.unstack`, which by default unstacks the **last level**: + +.. ipython:: python + + stacked.unstack() + stacked.unstack(1) + stacked.unstack(0) + +Pivot tables +~~~~~~~~~~~~ +See the section on :ref:`Pivot Tables `. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 3, + "B": ["A", "B", "C"] * 4, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2, + "D": np.random.randn(12), + "E": np.random.randn(12), + } + ) + df + +:func:`pivot_table` pivots a :class:`DataFrame` specifying the ``values``, ``index`` and ``columns`` + +.. ipython:: python + + pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"]) + + +Time series +----------- + +pandas has simple, powerful, and efficient functionality for performing +resampling operations during frequency conversion (e.g., converting secondly +data into 5-minutely data). This is extremely common in, but not limited to, +financial applications. See the :ref:`Time Series section `. + +.. ipython:: python + + rng = pd.date_range("1/1/2012", periods=100, freq="S") + ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) + ts.resample("5Min").sum() + +:meth:`Series.tz_localize` localizes a time series to a time zone: + +.. ipython:: python + + rng = pd.date_range("3/6/2012 00:00", periods=5, freq="D") + ts = pd.Series(np.random.randn(len(rng)), rng) + ts + ts_utc = ts.tz_localize("UTC") + ts_utc + +:meth:`Series.tz_convert` converts a timezones aware time series to another time zone: + +.. ipython:: python + + ts_utc.tz_convert("US/Eastern") + +Converting between time span representations: + +.. ipython:: python + + rng = pd.date_range("1/1/2012", periods=5, freq="M") + ts = pd.Series(np.random.randn(len(rng)), index=rng) + ts + ps = ts.to_period() + ps + ps.to_timestamp() + +Converting between period and timestamp enables some convenient arithmetic +functions to be used. In the following example, we convert a quarterly +frequency with year ending in November to 9am of the end of the month following +the quarter end: + +.. ipython:: python + + prng = pd.period_range("1990Q1", "2000Q4", freq="Q-NOV") + ts = pd.Series(np.random.randn(len(prng)), prng) + ts.index = (prng.asfreq("M", "e") + 1).asfreq("H", "s") + 9 + ts.head() + +Categoricals +------------ + +pandas can include categorical data in a :class:`DataFrame`. For full docs, see the +:ref:`categorical introduction ` and the :ref:`API documentation `. + +.. ipython:: python + + df = pd.DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + + + +Converting the raw grades to a categorical data type: + +.. ipython:: python + + df["grade"] = df["raw_grade"].astype("category") + df["grade"] + +Rename the categories to more meaningful names: + +.. ipython:: python + + new_categories = ["very good", "good", "very bad"] + df["grade"] = df["grade"].cat.rename_categories(new_categories) + +Reorder the categories and simultaneously add the missing categories (methods under :meth:`Series.cat` return a new :class:`Series` by default): + +.. ipython:: python + + df["grade"] = df["grade"].cat.set_categories( + ["very bad", "bad", "medium", "good", "very good"] + ) + df["grade"] + +Sorting is per order in the categories, not lexical order: + +.. ipython:: python + + df.sort_values(by="grade") + +Grouping by a categorical column also shows empty categories: + +.. ipython:: python + + df.groupby("grade").size() + + +Plotting +-------- + +See the :ref:`Plotting ` docs. + +We use the standard convention for referencing the matplotlib API: + +.. ipython:: python + + import matplotlib.pyplot as plt + + plt.close("all") + +The ``plt.close`` method is used to `close `__ a figure window: + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) + ts = ts.cumsum() + + @savefig series_plot_basic.png + ts.plot(); + +If running under Jupyter Notebook, the plot will appear on :meth:`~Series.plot`. Otherwise use +`matplotlib.pyplot.show `__ to show it or +`matplotlib.pyplot.savefig `__ to write it to a file. + +.. ipython:: python + + plt.show(); + +On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all +of the columns with labels: + +.. ipython:: python + + df = pd.DataFrame( + np.random.randn(1000, 4), index=ts.index, columns=["A", "B", "C", "D"] + ) + + df = df.cumsum() + + plt.figure(); + df.plot(); + @savefig frame_plot_basic.png + plt.legend(loc='best'); + +Importing and exporting data +---------------------------- + +CSV +~~~ + +:ref:`Writing to a csv file: ` using :meth:`DataFrame.to_csv` + +.. ipython:: python + + df.to_csv("foo.csv") + +:ref:`Reading from a csv file: ` using :func:`read_csv` + +.. ipython:: python + + pd.read_csv("foo.csv") + +.. ipython:: python + :suppress: + + import os + + os.remove("foo.csv") + +HDF5 +~~~~ + +Reading and writing to :ref:`HDFStores `. + +Writing to a HDF5 Store using :meth:`DataFrame.to_hdf`: + +.. ipython:: python + + df.to_hdf("foo.h5", "df") + +Reading from a HDF5 Store using :func:`read_hdf`: + +.. ipython:: python + + pd.read_hdf("foo.h5", "df") + +.. ipython:: python + :suppress: + + os.remove("foo.h5") + +Excel +~~~~~ + +Reading and writing to :ref:`Excel `. + +Writing to an excel file using :meth:`DataFrame.to_excel`: + +.. ipython:: python + + df.to_excel("foo.xlsx", sheet_name="Sheet1") + +Reading from an excel file using :func:`read_excel`: + +.. ipython:: python + + pd.read_excel("foo.xlsx", "Sheet1", index_col=None, na_values=["NA"]) + +.. ipython:: python + :suppress: + + os.remove("foo.xlsx") + +Gotchas +------- + +If you are attempting to perform a boolean operation on a :class:`Series` or :class:`DataFrame` +you might see an exception like: + +.. ipython:: python + :okexcept: + + if pd.Series([False, True, False]): + print("I was true") + +See :ref:`Comparisons` and :ref:`Gotchas` for an explanation and what to do. diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst new file mode 100644 index 00000000..b8df21ab --- /dev/null +++ b/doc/source/user_guide/advanced.rst @@ -0,0 +1,1250 @@ +.. _advanced: + +{{ header }} + +****************************** +MultiIndex / advanced indexing +****************************** + +This section covers :ref:`indexing with a MultiIndex ` +and :ref:`other advanced indexing features `. + +See the :ref:`Indexing and Selecting Data ` for general indexing documentation. + +.. warning:: + + Whether a copy or a reference is returned for a setting operation may + depend on the context. This is sometimes called ``chained assignment`` and + should be avoided. See :ref:`Returning a View versus Copy + `. + +See the :ref:`cookbook` for some advanced strategies. + +.. _advanced.hierarchical: + +Hierarchical indexing (MultiIndex) +---------------------------------- + +Hierarchical / Multi-level indexing is very exciting as it opens the door to some +quite sophisticated data analysis and manipulation, especially for working with +higher dimensional data. In essence, it enables you to store and manipulate +data with an arbitrary number of dimensions in lower dimensional data +structures like ``Series`` (1d) and ``DataFrame`` (2d). + +In this section, we will show what exactly we mean by "hierarchical" indexing +and how it integrates with all of the pandas indexing functionality +described above and in prior sections. Later, when discussing :ref:`group by +` and :ref:`pivoting and reshaping data `, we'll show +non-trivial applications to illustrate how it aids in structuring data for +analysis. + +See the :ref:`cookbook` for some advanced strategies. + +Creating a MultiIndex (hierarchical index) object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`MultiIndex` object is the hierarchical analogue of the standard +:class:`Index` object which typically stores the axis labels in pandas objects. You +can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A +``MultiIndex`` can be created from a list of arrays (using +:meth:`MultiIndex.from_arrays`), an array of tuples (using +:meth:`MultiIndex.from_tuples`), a crossed set of iterables (using +:meth:`MultiIndex.from_product`), or a :class:`DataFrame` (using +:meth:`MultiIndex.from_frame`). The ``Index`` constructor will attempt to return +a ``MultiIndex`` when it is passed a list of tuples. The following examples +demonstrate different ways to initialize MultiIndexes. + + +.. ipython:: python + + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + tuples + + index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"]) + index + + s = pd.Series(np.random.randn(8), index=index) + s + +When you want every pairing of the elements in two iterables, it can be easier +to use the :meth:`MultiIndex.from_product` method: + +.. ipython:: python + + iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]] + pd.MultiIndex.from_product(iterables, names=["first", "second"]) + +You can also construct a ``MultiIndex`` from a ``DataFrame`` directly, using +the method :meth:`MultiIndex.from_frame`. This is a complementary method to +:meth:`MultiIndex.to_frame`. + +.. ipython:: python + + df = pd.DataFrame( + [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]], + columns=["first", "second"], + ) + pd.MultiIndex.from_frame(df) + +As a convenience, you can pass a list of arrays directly into ``Series`` or +``DataFrame`` to construct a ``MultiIndex`` automatically: + +.. ipython:: python + + arrays = [ + np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]), + np.array(["one", "two", "one", "two", "one", "two", "one", "two"]), + ] + s = pd.Series(np.random.randn(8), index=arrays) + s + df = pd.DataFrame(np.random.randn(8, 4), index=arrays) + df + +All of the ``MultiIndex`` constructors accept a ``names`` argument which stores +string names for the levels themselves. If no names are provided, ``None`` will +be assigned: + +.. ipython:: python + + df.index.names + +This index can back any axis of a pandas object, and the number of **levels** +of the index is up to you: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + df + pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6]) + +We've "sparsified" the higher levels of the indexes to make the console output a +bit easier on the eyes. Note that how the index is displayed can be controlled using the +``multi_sparse`` option in ``pandas.set_options()``: + +.. ipython:: python + + with pd.option_context("display.multi_sparse", False): + df + +It's worth keeping in mind that there's nothing preventing you from using +tuples as atomic labels on an axis: + +.. ipython:: python + + pd.Series(np.random.randn(8), index=tuples) + +The reason that the ``MultiIndex`` matters is that it can allow you to do +grouping, selection, and reshaping operations as we will describe below and in +subsequent areas of the documentation. As you will see in later sections, you +can find yourself working with hierarchically-indexed data without creating a +``MultiIndex`` explicitly yourself. However, when loading data from a file, you +may wish to generate your own ``MultiIndex`` when preparing the data set. + +.. _advanced.get_level_values: + +Reconstructing the level labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The method :meth:`~MultiIndex.get_level_values` will return a vector of the labels for each +location at a particular level: + +.. ipython:: python + + index.get_level_values(0) + index.get_level_values("second") + +Basic indexing on axis with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the important features of hierarchical indexing is that you can select +data by a "partial" label identifying a subgroup in the data. **Partial** +selection "drops" levels of the hierarchical index in the result in a +completely analogous way to selecting a column in a regular DataFrame: + +.. ipython:: python + + df["bar"] + df["bar", "one"] + df["bar"]["one"] + s["qux"] + +See :ref:`Cross-section with hierarchical index ` for how to select +on a deeper level. + +.. _advanced.shown_levels: + +Defined levels +~~~~~~~~~~~~~~ + +The :class:`MultiIndex` keeps all the defined levels of an index, even +if they are not actually used. When slicing an index, you may notice this. +For example: + +.. ipython:: python + +   df.columns.levels # original MultiIndex + + df[["foo","qux"]].columns.levels # sliced + +This is done to avoid a recomputation of the levels in order to make slicing +highly performant. If you want to see only the used levels, you can use the +:meth:`~MultiIndex.get_level_values` method. + +.. ipython:: python + + df[["foo", "qux"]].columns.to_numpy() + + # for a specific level + df[["foo", "qux"]].columns.get_level_values(0) + +To reconstruct the ``MultiIndex`` with only the used levels, the +:meth:`~MultiIndex.remove_unused_levels` method may be used. + +.. ipython:: python + + new_mi = df[["foo", "qux"]].columns.remove_unused_levels() + new_mi.levels + +Data alignment and using ``reindex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Operations between differently-indexed objects having ``MultiIndex`` on the +axes will work as you expect; data alignment will work the same as an Index of +tuples: + +.. ipython:: python + + s + s[:-2] + s + s[::2] + +The :meth:`~DataFrame.reindex` method of ``Series``/``DataFrames`` can be +called with another ``MultiIndex``, or even a list or array of tuples: + +.. ipython:: python + + s.reindex(index[:3]) + s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")]) + +.. _advanced.advanced_hierarchical: + +Advanced indexing with hierarchical index +----------------------------------------- + +Syntactically integrating ``MultiIndex`` in advanced indexing with ``.loc`` is a +bit challenging, but we've made every effort to do so. In general, MultiIndex +keys take the form of tuples. For example, the following works as you would expect: + +.. ipython:: python + + df = df.T + df + df.loc[("bar", "two")] + +Note that ``df.loc['bar', 'two']`` would also work in this example, but this shorthand +notation can lead to ambiguity in general. + +If you also want to index a specific column with ``.loc``, you must use a tuple +like this: + +.. ipython:: python + + df.loc[("bar", "two"), "A"] + +You don't have to specify all levels of the ``MultiIndex`` by passing only the +first elements of the tuple. For example, you can use "partial" indexing to +get all elements with ``bar`` in the first level as follows: + +.. ipython:: python + + df.loc["bar"] + +This is a shortcut for the slightly more verbose notation ``df.loc[('bar',),]`` (equivalent +to ``df.loc['bar',]`` in this example). + +"Partial" slicing also works quite nicely. + +.. ipython:: python + + df.loc["baz":"foo"] + +You can slice with a 'range' of values, by providing a slice of tuples. + +.. ipython:: python + + df.loc[("baz", "two"):("qux", "one")] + df.loc[("baz", "two"):"foo"] + +Passing a list of labels or tuples works similar to reindexing: + +.. ipython:: python + + df.loc[[("bar", "two"), ("qux", "one")]] + +.. note:: + + It is important to note that tuples and lists are not treated identically + in pandas when it comes to indexing. Whereas a tuple is interpreted as one + multi-level key, a list is used to specify several keys. Or in other words, + tuples go horizontally (traversing levels), lists go vertically (scanning levels). + +Importantly, a list of tuples indexes several complete ``MultiIndex`` keys, +whereas a tuple of lists refer to several values within a level: + +.. ipython:: python + + s = pd.Series( + [1, 2, 3, 4, 5, 6], + index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]), + ) + s.loc[[("A", "c"), ("B", "d")]] # list of tuples + s.loc[(["A", "B"], ["c", "d"])] # tuple of lists + + +.. _advanced.mi_slicers: + +Using slicers +~~~~~~~~~~~~~ + +You can slice a ``MultiIndex`` by providing multiple indexers. + +You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, +including slices, lists of labels, labels, and boolean indexers. + +You can use ``slice(None)`` to select all the contents of *that* level. You do not need to specify all the +*deeper* levels, they will be implied as ``slice(None)``. + +As usual, **both sides** of the slicers are included as this is label indexing. + +.. warning:: + + You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and + for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted +   as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows. + + You should do this: + + .. code-block:: python + + df.loc[(slice("A1", "A3"), ...), :] # noqa: E999 + +   You should **not** do this: +  + .. code-block:: python + + df.loc[(slice("A1", "A3"), ...)] # noqa: E999 + +.. ipython:: python + + def mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + + + miindex = pd.MultiIndex.from_product( + [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)] + ) + micolumns = pd.MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"] + ) + dfmi = ( + pd.DataFrame( + np.arange(len(miindex) * len(micolumns)).reshape( + (len(miindex), len(micolumns)) + ), + index=miindex, + columns=micolumns, + ) + .sort_index() + .sort_index(axis=1) + ) + dfmi + +Basic MultiIndex slicing using slices, lists, and labels. + +.. ipython:: python + + dfmi.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + + +You can use :class:`pandas.IndexSlice` to facilitate a more natural syntax +using ``:``, rather than using ``slice(None)``. + +.. ipython:: python + + idx = pd.IndexSlice + dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]] + +It is possible to perform quite complicated selections using this method on multiple +axes at the same time. + +.. ipython:: python + + dfmi.loc["A1", (slice(None), "foo")] + dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]] + +Using a boolean indexer you can provide selection related to the *values*. + +.. ipython:: python + + mask = dfmi[("a", "foo")] > 200 + dfmi.loc[idx[mask, :, ["C1", "C3"]], idx[:, "foo"]] + +You can also specify the ``axis`` argument to ``.loc`` to interpret the passed +slicers on a single axis. + +.. ipython:: python + + dfmi.loc(axis=0)[:, :, ["C1", "C3"]] + +Furthermore, you can *set* the values using the following methods. + +.. ipython:: python + + df2 = dfmi.copy() + df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10 + df2 + +You can use a right-hand-side of an alignable object as well. + +.. ipython:: python + + df2 = dfmi.copy() + df2.loc[idx[:, :, ["C1", "C3"]], :] = df2 * 1000 + df2 + +.. _advanced.xs: + +Cross-section +~~~~~~~~~~~~~ + +The :meth:`~DataFrame.xs` method of ``DataFrame`` additionally takes a level argument to make +selecting data at a particular level of a ``MultiIndex`` easier. + +.. ipython:: python + + df + df.xs("one", level="second") + +.. ipython:: python + + # using the slicers + df.loc[(slice(None), "one"), :] + +You can also select on the columns with ``xs``, by +providing the axis argument. + +.. ipython:: python + + df = df.T + df.xs("one", level="second", axis=1) + +.. ipython:: python + + # using the slicers + df.loc[:, (slice(None), "one")] + +``xs`` also allows selection with multiple keys. + +.. ipython:: python + + df.xs(("one", "bar"), level=("second", "first"), axis=1) + +.. ipython:: python + + # using the slicers + df.loc[:, ("bar", "one")] + +You can pass ``drop_level=False`` to ``xs`` to retain +the level that was selected. + +.. ipython:: python + + df.xs("one", level="second", axis=1, drop_level=False) + +Compare the above with the result using ``drop_level=True`` (the default value). + +.. ipython:: python + + df.xs("one", level="second", axis=1, drop_level=True) + +.. ipython:: python + :suppress: + + df = df.T + +.. _advanced.advanced_reindex: + +Advanced reindexing and alignment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and +:meth:`~DataFrame.align` methods of pandas objects is useful to broadcast +values across a level. For instance: + +.. ipython:: python + + midx = pd.MultiIndex( + levels=[["zero", "one"], ["x", "y"]], codes=[[1, 1, 0, 0], [1, 0, 1, 0]] + ) + df = pd.DataFrame(np.random.randn(4, 2), index=midx) + df + df2 = df.groupby(level=0).mean() + df2 + df2.reindex(df.index, level=0) + + # aligning + df_aligned, df2_aligned = df.align(df2, level=0) + df_aligned + df2_aligned + + +Swapping levels with ``swaplevel`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~MultiIndex.swaplevel` method can switch the order of two levels: + +.. ipython:: python + + df[:5] + df[:5].swaplevel(0, 1, axis=0) + +.. _advanced.reorderlevels: + +Reordering levels with ``reorder_levels`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~MultiIndex.reorder_levels` method generalizes the ``swaplevel`` +method, allowing you to permute the hierarchical index levels in one step: + +.. ipython:: python + + df[:5].reorder_levels([1, 0], axis=0) + +.. _advanced.index_names: + +Renaming names of an ``Index`` or ``MultiIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method is used to rename the labels of a +``MultiIndex``, and is typically used to rename the columns of a ``DataFrame``. +The ``columns`` argument of ``rename`` allows a dictionary to be specified +that includes only the columns you wish to rename. + +.. ipython:: python + + df.rename(columns={0: "col0", 1: "col1"}) + +This method can also be used to rename specific labels of the main index +of the ``DataFrame``. + +.. ipython:: python + + df.rename(index={"one": "two", "y": "z"}) + +The :meth:`~DataFrame.rename_axis` method is used to rename the name of a +``Index`` or ``MultiIndex``. In particular, the names of the levels of a +``MultiIndex`` can be specified, which is useful if ``reset_index()`` is later +used to move the values from the ``MultiIndex`` to a column. + +.. ipython:: python + + df.rename_axis(index=["abc", "def"]) + +Note that the columns of a ``DataFrame`` are an index, so that using +``rename_axis`` with the ``columns`` argument will change the name of that +index. + +.. ipython:: python + + df.rename_axis(columns="Cols").columns + +Both ``rename`` and ``rename_axis`` support specifying a dictionary, +``Series`` or a mapping function to map labels/names to new values. + +When working with an ``Index`` object directly, rather than via a ``DataFrame``, +:meth:`Index.set_names` can be used to change the names. + +.. ipython:: python + + mi = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=["x", "y"]) + mi.names + + mi2 = mi.rename("new name", level=0) + mi2 + + +You cannot set the names of the MultiIndex via a level. + +.. ipython:: python + :okexcept: + + mi.levels[0].name = "name via level" + +Use :meth:`Index.set_names` instead. + +Sorting a ``MultiIndex`` +------------------------ + +For :class:`MultiIndex`-ed objects to be indexed and sliced effectively, +they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_index`. + +.. ipython:: python + + import random + + random.shuffle(tuples) + s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples)) + s + s.sort_index() + s.sort_index(level=0) + s.sort_index(level=1) + +.. _advanced.sortlevel_byname: + +You may also pass a level name to ``sort_index`` if the ``MultiIndex`` levels +are named. + +.. ipython:: python + + s.index.set_names(["L1", "L2"], inplace=True) + s.sort_index(level="L1") + s.sort_index(level="L2") + +On higher dimensional objects, you can sort any of the other axes by level if +they have a ``MultiIndex``: + +.. ipython:: python + + df.T.sort_index(level=1, axis=1) + +Indexing will work even if the data are not sorted, but will be rather +inefficient (and show a ``PerformanceWarning``). It will also +return a copy of the data rather than a view: + +.. ipython:: python + + dfm = pd.DataFrame( + {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)} + ) + dfm = dfm.set_index(["jim", "joe"]) + dfm + +.. code-block:: ipython + + In [4]: dfm.loc[(1, 'z')] + PerformanceWarning: indexing past lexsort depth may impact performance. + + Out[4]: + jolie + jim joe + 1 z 0.64094 + +.. _advanced.unsorted: + +Furthermore, if you try to index something that is not fully lexsorted, this can raise: + +.. code-block:: ipython + + In [5]: dfm.loc[(0, 'y'):(1, 'z')] + UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' + +The :meth:`~MultiIndex.is_monotonic_increasing` method on a ``MultiIndex`` shows if the +index is sorted: + +.. ipython:: python + + dfm.index.is_monotonic_increasing + +.. ipython:: python + + dfm = dfm.sort_index() + dfm + dfm.index.is_monotonic_increasing + +And now selection works as expected. + +.. ipython:: python + + dfm.loc[(0, "y"):(1, "z")] + +Take methods +------------ + +.. _advanced.take: + +Similar to NumPy ndarrays, pandas ``Index``, ``Series``, and ``DataFrame`` also provides +the :meth:`~DataFrame.take` method that retrieves elements along a given axis at the given +indices. The given indices must be either a list or an ndarray of integer +index positions. ``take`` will also accept negative integers as relative positions to the end of the object. + +.. ipython:: python + + index = pd.Index(np.random.randint(0, 1000, 10)) + index + + positions = [0, 9, 3] + + index[positions] + index.take(positions) + + ser = pd.Series(np.random.randn(10)) + + ser.iloc[positions] + ser.take(positions) + +For DataFrames, the given indices should be a 1d list or ndarray that specifies +row or column positions. + +.. ipython:: python + + frm = pd.DataFrame(np.random.randn(5, 3)) + + frm.take([1, 4, 3]) + + frm.take([0, 2], axis=1) + +It is important to note that the ``take`` method on pandas objects are not +intended to work on boolean indices and may return unexpected results. + +.. ipython:: python + + arr = np.random.randn(10) + arr.take([False, False, True, True]) + arr[[0, 1]] + + ser = pd.Series(np.random.randn(10)) + ser.take([False, False, True, True]) + ser.iloc[[0, 1]] + +Finally, as a small note on performance, because the ``take`` method handles +a narrower range of inputs, it can offer performance that is a good deal +faster than fancy indexing. + +.. ipython:: python + + arr = np.random.randn(10000, 5) + indexer = np.arange(10000) + random.shuffle(indexer) + + %timeit arr[indexer] + %timeit arr.take(indexer, axis=0) + +.. ipython:: python + + ser = pd.Series(arr[:, 0]) + %timeit ser.iloc[indexer] + %timeit ser.take(indexer) + +.. _advanced.index_types: + +Index types +----------- + +We have discussed ``MultiIndex`` in the previous sections pretty extensively. +Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `, +and documentation about ``TimedeltaIndex`` is found :ref:`here `. + +In the following sub-sections we will highlight some other index types. + +.. _advanced.categoricalindex: + +CategoricalIndex +~~~~~~~~~~~~~~~~ + +:class:`CategoricalIndex` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a :class:`Categorical` +and allows efficient indexing and storage of an index with a large number of duplicated elements. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + df = pd.DataFrame({"A": np.arange(6), "B": list("aabbca")}) + df["B"] = df["B"].astype(CategoricalDtype(list("cab"))) + df + df.dtypes + df["B"].cat.categories + +Setting the index will create a ``CategoricalIndex``. + +.. ipython:: python + + df2 = df.set_index("B") + df2.index + +Indexing with ``__getitem__/.iloc/.loc`` works similarly to an ``Index`` with duplicates. +The indexers **must** be in the category or the operation will raise a ``KeyError``. + +.. ipython:: python + + df2.loc["a"] + +The ``CategoricalIndex`` is **preserved** after indexing: + +.. ipython:: python + + df2.loc["a"].index + +Sorting the index will sort by the order of the categories (recall that we +created the index with ``CategoricalDtype(list('cab'))``, so the sorted +order is ``cab``). + +.. ipython:: python + + df2.sort_index() + +Groupby operations on the index will preserve the index nature as well. + +.. ipython:: python + + df2.groupby(level=0).sum() + df2.groupby(level=0).sum().index + +Reindexing operations will return a resulting index based on the type of the passed +indexer. Passing a list will return a plain-old ``Index``; indexing with +a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories +of the **passed** ``Categorical`` dtype. This allows one to arbitrarily index these even with +values **not** in the categories, similarly to how you can reindex **any** pandas index. + +.. ipython:: python + + df3 = pd.DataFrame( + {"A": np.arange(3), "B": pd.Series(list("abc")).astype("category")} + ) + df3 = df3.set_index("B") + df3 + +.. ipython:: python + + df3.reindex(["a", "e"]) + df3.reindex(["a", "e"]).index + df3.reindex(pd.Categorical(["a", "e"], categories=list("abe"))) + df3.reindex(pd.Categorical(["a", "e"], categories=list("abe"))).index + +.. warning:: + + Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories + or a ``TypeError`` will be raised. + + .. ipython:: python + + df4 = pd.DataFrame({"A": np.arange(2), "B": list("ba")}) + df4["B"] = df4["B"].astype(CategoricalDtype(list("ab"))) + df4 = df4.set_index("B") + df4.index + + df5 = pd.DataFrame({"A": np.arange(2), "B": list("bc")}) + df5["B"] = df5["B"].astype(CategoricalDtype(list("bc"))) + df5 = df5.set_index("B") + df5.index + + .. code-block:: ipython + + In [1]: pd.concat([df4, df5]) + TypeError: categories must match existing categories when appending + +.. _advanced.rangeindex: + +Int64Index and RangeIndex +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. deprecated:: 1.4.0 + In pandas 2.0, :class:`Index` will become the default index type for numeric types + instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types + are therefore deprecated and will be removed in a futire version. + ``RangeIndex`` will not be removed, as it represents an optimized version of an integer index. + +:class:`Int64Index` is a fundamental basic index in pandas. This is an immutable array +implementing an ordered, sliceable set. + +:class:`RangeIndex` is a sub-class of ``Int64Index`` that provides the default index for all ``NDFrame`` objects. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types `__. + +.. _advanced.float64index: + +Float64Index +~~~~~~~~~~~~ + +.. deprecated:: 1.4.0 + :class:`Index` will become the default index type for numeric types in the future + instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types + are therefore deprecated and will be removed in a future version of Pandas. + ``RangeIndex`` will not be removed as it represents an optimized version of an integer index. + +By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation. +This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the +same. + +.. ipython:: python + + indexf = pd.Index([1.5, 2, 3, 4.5, 5]) + indexf + sf = pd.Series(range(5), index=indexf) + sf + +Scalar selection for ``[],.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``). + +.. ipython:: python + + sf[3] + sf[3.0] + sf.loc[3] + sf.loc[3.0] + +The only positional indexing is via ``iloc``. + +.. ipython:: python + + sf.iloc[3] + +A scalar index that is not found will raise a ``KeyError``. +Slicing is primarily on the values of the index when using ``[],ix,loc``, and +**always** positional when using ``iloc``. The exception is when the slice is +boolean, in which case it will always be positional. + +.. ipython:: python + + sf[2:4] + sf.loc[2:4] + sf.iloc[2:4] + +In float indexes, slicing using floats is allowed. + +.. ipython:: python + + sf[2.1:4.6] + sf.loc[2.1:4.6] + +In non-float indexes, slicing using floats will raise a ``TypeError``. + +.. code-block:: ipython + + In [1]: pd.Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: pd.Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + +Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat +irregular timedelta-like indexing scheme, but the data is recorded as floats. This could, for +example, be millisecond offsets. + +.. ipython:: python + + dfir = pd.concat( + [ + pd.DataFrame( + np.random.randn(5, 2), index=np.arange(5) * 250.0, columns=list("AB") + ), + pd.DataFrame( + np.random.randn(6, 2), + index=np.arange(4, 10) * 250.1, + columns=list("AB"), + ), + ] + ) + dfir + +Selection operations then will always work on a value basis, for all selection operators. + +.. ipython:: python + + dfir[0:1000.4] + dfir.loc[0:1001, "A"] + dfir.loc[1000.4] + +You could retrieve the first 1 second (1000 ms) of data as such: + +.. ipython:: python + + dfir[0:1000] + +If you need integer based selection, you should use ``iloc``: + +.. ipython:: python + + dfir.iloc[0:5] + + +.. _advanced.intervalindex: + +IntervalIndex +~~~~~~~~~~~~~ + +:class:`IntervalIndex` together with its own dtype, :class:`~pandas.api.types.IntervalDtype` +as well as the :class:`Interval` scalar type, allow first-class support in pandas +for interval notation. + +The ``IntervalIndex`` allows some unique indexing and is also used as a +return type for the categories in :func:`cut` and :func:`qcut`. + +Indexing with an ``IntervalIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An ``IntervalIndex`` can be used in ``Series`` and in ``DataFrame`` as the index. + +.. ipython:: python + + df = pd.DataFrame( + {"A": [1, 2, 3, 4]}, index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]) + ) + df + +Label based indexing via ``.loc`` along the edges of an interval works as you would expect, +selecting that particular interval. + +.. ipython:: python + + df.loc[2] + df.loc[[2, 3]] + +If you select a label *contained* within an interval, this will also select the interval. + +.. ipython:: python + + df.loc[2.5] + df.loc[[2.5, 3.5]] + +Selecting using an ``Interval`` will only return exact matches (starting from pandas 0.25.0). + +.. ipython:: python + + df.loc[pd.Interval(1, 2)] + +Trying to select an ``Interval`` that is not exactly contained in the ``IntervalIndex`` will raise a ``KeyError``. + +.. code-block:: python + + In [7]: df.loc[pd.Interval(0.5, 2.5)] + --------------------------------------------------------------------------- + KeyError: Interval(0.5, 2.5, closed='right') + +Selecting all ``Intervals`` that overlap a given ``Interval`` can be performed using the +:meth:`~IntervalIndex.overlaps` method to create a boolean indexer. + +.. ipython:: python + + idxr = df.index.overlaps(pd.Interval(0.5, 2.5)) + idxr + df[idxr] + +Binning data with ``cut`` and ``qcut`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`cut` and :func:`qcut` both return a ``Categorical`` object, and the bins they +create are stored as an ``IntervalIndex`` in its ``.categories`` attribute. + +.. ipython:: python + + c = pd.cut(range(4), bins=2) + c + c.categories + +:func:`cut` also accepts an ``IntervalIndex`` for its ``bins`` argument, which enables +a useful pandas idiom. First, We call :func:`cut` with some data and ``bins`` set to a +fixed number, to generate the bins. Then, we pass the values of ``.categories`` as the +``bins`` argument in subsequent calls to :func:`cut`, supplying new data which will be +binned into the same bins. + +.. ipython:: python + + pd.cut([0, 3, 5, 1], bins=c.categories) + +Any value which falls outside all bins will be assigned a ``NaN`` value. + +Generating ranges of intervals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If we need intervals on a regular frequency, we can use the :func:`interval_range` function +to create an ``IntervalIndex`` using various combinations of ``start``, ``end``, and ``periods``. +The default frequency for ``interval_range`` is a 1 for numeric intervals, and calendar day for +datetime-like intervals: + +.. ipython:: python + + pd.interval_range(start=0, end=5) + + pd.interval_range(start=pd.Timestamp("2017-01-01"), periods=4) + + pd.interval_range(end=pd.Timedelta("3 days"), periods=3) + +The ``freq`` parameter can used to specify non-default frequencies, and can utilize a variety +of :ref:`frequency aliases ` with datetime-like intervals: + +.. ipython:: python + + pd.interval_range(start=0, periods=5, freq=1.5) + + pd.interval_range(start=pd.Timestamp("2017-01-01"), periods=4, freq="W") + + pd.interval_range(start=pd.Timedelta("0 days"), periods=3, freq="9H") + +Additionally, the ``closed`` parameter can be used to specify which side(s) the intervals +are closed on. Intervals are closed on the right side by default. + +.. ipython:: python + + pd.interval_range(start=0, end=4, closed="both") + + pd.interval_range(start=0, end=4, closed="neither") + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +intervals from ``start`` to ``end`` inclusively, with ``periods`` number of elements +in the resulting ``IntervalIndex``: + +.. ipython:: python + + pd.interval_range(start=0, end=6, periods=4) + + pd.interval_range(pd.Timestamp("2018-01-01"), pd.Timestamp("2018-02-28"), periods=3) + +Miscellaneous indexing FAQ +-------------------------- + +Integer indexing +~~~~~~~~~~~~~~~~ + +Label-based indexing with integer axis labels is a thorny topic. It has been +discussed heavily on mailing lists and among various members of the scientific +Python community. In pandas, our general viewpoint is that labels matter more +than integer locations. Therefore, with an integer axis index *only* +label-based indexing is possible with the standard tools like ``.loc``. The +following code will generate exceptions: + +.. ipython:: python + :okexcept: + + s = pd.Series(range(5)) + s[-1] + df = pd.DataFrame(np.random.randn(5, 4)) + df + df.loc[-2:] + +This deliberate decision was made to prevent ambiguities and subtle bugs (many +users reported finding bugs when the API change was made to stop "falling back" +on position-based indexing). + +Non-monotonic indexes require exact matches +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the index of a ``Series`` or ``DataFrame`` is monotonically increasing or decreasing, then the bounds +of a label-based slice can be outside the range of the index, much like slice indexing a +normal Python ``list``. Monotonicity of an index can be tested with the :meth:`~Index.is_monotonic_increasing` and +:meth:`~Index.is_monotonic_decreasing` attributes. + +.. ipython:: python + + df = pd.DataFrame(index=[2, 3, 3, 4, 5], columns=["data"], data=list(range(5))) + df.index.is_monotonic_increasing + + # no rows 0 or 1, but still returns rows 2, 3 (both of them), and 4: + df.loc[0:4, :] + + # slice is are outside the index, so empty DataFrame is returned + df.loc[13:15, :] + +On the other hand, if the index is not monotonic, then both slice bounds must be +*unique* members of the index. + +.. ipython:: python + + df = pd.DataFrame(index=[2, 3, 1, 4, 3, 5], columns=["data"], data=list(range(6))) + df.index.is_monotonic_increasing + + # OK because 2 and 4 are in the index + df.loc[2:4, :] + +.. code-block:: ipython + + # 0 is not in the index + In [9]: df.loc[0:4, :] + KeyError: 0 + + # 3 is not a unique label + In [11]: df.loc[2:3, :] + KeyError: 'Cannot get right slice bound for non-unique label: 3' + +``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that +an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with +the :meth:`~Index.is_unique` attribute. + +.. ipython:: python + + weakly_monotonic = pd.Index(["a", "b", "c", "c"]) + weakly_monotonic + weakly_monotonic.is_monotonic_increasing + weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique + +.. _advanced.endpoints_are_inclusive: + +Endpoints are inclusive +~~~~~~~~~~~~~~~~~~~~~~~ + +Compared with standard Python sequence slicing in which the slice endpoint is +not inclusive, label-based slicing in pandas **is inclusive**. The primary +reason for this is that it is often not possible to easily determine the +"successor" or next element after a particular label in an index. For example, +consider the following ``Series``: + +.. ipython:: python + + s = pd.Series(np.random.randn(6), index=list("abcdef")) + s + +Suppose we wished to slice from ``c`` to ``e``, using integers this would be +accomplished as such: + +.. ipython:: python + + s[2:5] + +However, if you only had ``c`` and ``e``, determining the next element in the +index can be somewhat complicated. For example, the following does not work: + +:: + + s.loc['c':'e' + 1] + +A very common use case is to limit a time series to start and end at two +specific dates. To enable this, we made the design choice to make label-based +slicing include both endpoints: + +.. ipython:: python + + s.loc["c":"e"] + +This is most definitely a "practicality beats purity" sort of thing, but it is +something to watch out for if you expect label-based slicing to behave exactly +in the way that standard Python integer slicing works. + + +Indexing potentially changes underlying Series dtype +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The different indexing operation can potentially change the dtype of a ``Series``. + +.. ipython:: python + + series1 = pd.Series([1, 2, 3]) + series1.dtype + res = series1.reindex([0, 4]) + res.dtype + res + +.. ipython:: python + + series2 = pd.Series([True]) + series2.dtype + res = series2.reindex_like(series1) + res.dtype + res + +This is because the (re)indexing operations above silently inserts ``NaNs`` and the ``dtype`` +changes accordingly. This can cause some issues when using ``numpy`` ``ufuncs`` +such as ``numpy.logical_and``. + +See the :issue:`2388` for a more +detailed discussion. diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst new file mode 100644 index 00000000..a34d4891 --- /dev/null +++ b/doc/source/user_guide/basics.rst @@ -0,0 +1,2493 @@ +.. _basics: + +{{ header }} + +============================== + Essential basic functionality +============================== + +Here we discuss a lot of the essential functionality common to the pandas data +structures. To begin, let's create some example objects like we did in +the :ref:`10 minutes to pandas <10min>` section: + +.. ipython:: python + + index = pd.date_range("1/1/2000", periods=8) + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"]) + +.. _basics.head_tail: + +Head and tail +------------- + +To view a small sample of a Series or DataFrame object, use the +:meth:`~DataFrame.head` and :meth:`~DataFrame.tail` methods. The default number +of elements to display is five, but you may pass a custom number. + +.. ipython:: python + + long_series = pd.Series(np.random.randn(1000)) + long_series.head() + long_series.tail(3) + +.. _basics.attrs: + +Attributes and underlying data +------------------------------ + +pandas objects have a number of attributes enabling you to access the metadata + +* **shape**: gives the axis dimensions of the object, consistent with ndarray +* Axis labels + * **Series**: *index* (only axis) + * **DataFrame**: *index* (rows) and *columns* + +Note, **these attributes can be safely assigned to**! + +.. ipython:: python + + df[:2] + df.columns = [x.lower() for x in df.columns] + df + +pandas objects (:class:`Index`, :class:`Series`, :class:`DataFrame`) can be +thought of as containers for arrays, which hold the actual data and do the +actual computation. For many types, the underlying array is a +:class:`numpy.ndarray`. However, pandas and 3rd party libraries may *extend* +NumPy's type system to add support for custom arrays +(see :ref:`basics.dtypes`). + +To get the actual data inside a :class:`Index` or :class:`Series`, use +the ``.array`` property + +.. ipython:: python + + s.array + s.index.array + +:attr:`~Series.array` will always be an :class:`~pandas.api.extensions.ExtensionArray`. +The exact details of what an :class:`~pandas.api.extensions.ExtensionArray` is and why pandas uses them are a bit +beyond the scope of this introduction. See :ref:`basics.dtypes` for more. + +If you know you need a NumPy array, use :meth:`~Series.to_numpy` +or :meth:`numpy.asarray`. + +.. ipython:: python + + s.to_numpy() + np.asarray(s) + +When the Series or Index is backed by +an :class:`~pandas.api.extensions.ExtensionArray`, :meth:`~Series.to_numpy` +may involve copying data and coercing values. See :ref:`basics.dtypes` for more. + +:meth:`~Series.to_numpy` gives some control over the ``dtype`` of the +resulting :class:`numpy.ndarray`. For example, consider datetimes with timezones. +NumPy doesn't have a dtype to represent timezone-aware datetimes, so there +are two possibly useful representations: + +1. An object-dtype :class:`numpy.ndarray` with :class:`Timestamp` objects, each + with the correct ``tz`` +2. A ``datetime64[ns]`` -dtype :class:`numpy.ndarray`, where the values have + been converted to UTC and the timezone discarded + +Timezones may be preserved with ``dtype=object`` + +.. ipython:: python + + ser = pd.Series(pd.date_range("2000", periods=2, tz="CET")) + ser.to_numpy(dtype=object) + +Or thrown away with ``dtype='datetime64[ns]'`` + +.. ipython:: python + + ser.to_numpy(dtype="datetime64[ns]") + +Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more +complex. When your ``DataFrame`` only has a single data type for all the +columns, :meth:`DataFrame.to_numpy` will return the underlying data: + +.. ipython:: python + + df.to_numpy() + +If a DataFrame contains homogeneously-typed data, the ndarray can +actually be modified in-place, and the changes will be reflected in the data +structure. For heterogeneous data (e.g. some of the DataFrame's columns are not +all the same dtype), this will not be the case. The values attribute itself, +unlike the axis labels, cannot be assigned to. + +.. note:: + + When working with heterogeneous data, the dtype of the resulting ndarray + will be chosen to accommodate all of the data involved. For example, if + strings are involved, the result will be of object dtype. If there are only + floats and integers, the resulting array will be of float dtype. + +In the past, pandas recommended :attr:`Series.values` or :attr:`DataFrame.values` +for extracting the data from a Series or DataFrame. You'll still find references +to these in old code bases and online. Going forward, we recommend avoiding +``.values`` and using ``.array`` or ``.to_numpy()``. ``.values`` has the following +drawbacks: + +1. When your Series contains an :ref:`extension type `, it's + unclear whether :attr:`Series.values` returns a NumPy array or the extension array. + :attr:`Series.array` will always return an :class:`~pandas.api.extensions.ExtensionArray`, and will never + copy data. :meth:`Series.to_numpy` will always return a NumPy array, + potentially at the cost of copying / coercing values. +2. When your DataFrame contains a mixture of data types, :attr:`DataFrame.values` may + involve copying data and coercing values to a common dtype, a relatively expensive + operation. :meth:`DataFrame.to_numpy`, being a method, makes it clearer that the + returned NumPy array may not be a view on the same data in the DataFrame. + +.. _basics.accelerate: + +Accelerated operations +---------------------- + +pandas has support for accelerating certain types of binary numerical and boolean operations using +the ``numexpr`` library and the ``bottleneck`` libraries. + +These libraries are especially useful when dealing with large data sets, and provide large +speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is +a set of specialized cython routines that are especially fast when dealing with arrays that have +``nans``. + +Here is a sample (using 100 column x 100,000 row ``DataFrames``): + +.. csv-table:: + :header: "Operation", "0.11.0 (ms)", "Prior Version (ms)", "Ratio to Prior" + :widths: 25, 25, 25, 25 + :delim: ; + + ``df1 > df2``; 13.32; 125.35; 0.1063 + ``df1 * df2``; 21.71; 36.63; 0.5928 + ``df1 + df2``; 22.04; 36.50; 0.6039 + +You are highly encouraged to install both libraries. See the section +:ref:`Recommended Dependencies ` for more installation info. + +These are both enabled to be used by default, you can control this by setting the options: + +.. code-block:: python + + pd.set_option("compute.use_bottleneck", False) + pd.set_option("compute.use_numexpr", False) + +.. _basics.binop: + +Flexible binary operations +-------------------------- + +With binary operations between pandas data structures, there are two key points +of interest: + +* Broadcasting behavior between higher- (e.g. DataFrame) and + lower-dimensional (e.g. Series) objects. +* Missing data in computations. + +We will demonstrate how to manage these issues independently, though they can +be handled simultaneously. + +Matching / broadcasting behavior +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DataFrame has the methods :meth:`~DataFrame.add`, :meth:`~DataFrame.sub`, +:meth:`~DataFrame.mul`, :meth:`~DataFrame.div` and related functions +:meth:`~DataFrame.radd`, :meth:`~DataFrame.rsub`, ... +for carrying out binary operations. For broadcasting behavior, +Series input is of primary interest. Using these functions, you can use to +either match on the *index* or *columns* via the **axis** keyword: + +.. ipython:: python + + df = pd.DataFrame( + { + "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]), + "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]), + "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]), + } + ) + df + row = df.iloc[1] + column = df["two"] + + df.sub(row, axis="columns") + df.sub(row, axis=1) + + df.sub(column, axis="index") + df.sub(column, axis=0) + +.. ipython:: python + :suppress: + + df_orig = df + +Furthermore you can align a level of a MultiIndexed DataFrame with a Series. + +.. ipython:: python + + dfmi = df.copy() + dfmi.index = pd.MultiIndex.from_tuples( + [(1, "a"), (1, "b"), (1, "c"), (2, "a")], names=["first", "second"] + ) + dfmi.sub(column, axis=0, level="second") + +Series and Index also support the :func:`divmod` builtin. This function takes +the floor division and modulo operation at the same time returning a two-tuple +of the same type as the left hand side. For example: + +.. ipython:: python + + s = pd.Series(np.arange(10)) + s + div, rem = divmod(s, 3) + div + rem + + idx = pd.Index(np.arange(10)) + idx + div, rem = divmod(idx, 3) + div + rem + +We can also do elementwise :func:`divmod`: + +.. ipython:: python + + div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6]) + div + rem + +Missing data / operations with fill values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In Series and DataFrame, the arithmetic functions have the option of inputting +a *fill_value*, namely a value to substitute when at most one of the values at +a location are missing. For example, when adding two DataFrame objects, you may +wish to treat NaN as 0 unless both DataFrames are missing that value, in which +case the result will be NaN (you can later replace NaN with some other value +using ``fillna`` if you wish). + +.. ipython:: python + :suppress: + + df2 = df.copy() + df2["three"]["a"] = 1.0 + +.. ipython:: python + + df + df2 + df + df2 + df.add(df2, fill_value=0) + +.. _basics.compare: + +Flexible comparisons +~~~~~~~~~~~~~~~~~~~~ + +Series and DataFrame have the binary comparison methods ``eq``, ``ne``, ``lt``, ``gt``, +``le``, and ``ge`` whose behavior is analogous to the binary +arithmetic operations described above: + +.. ipython:: python + + df.gt(df2) + df2.ne(df) + +These operations produce a pandas object of the same type as the left-hand-side +input that is of dtype ``bool``. These ``boolean`` objects can be used in +indexing operations, see the section on :ref:`Boolean indexing`. + +.. _basics.reductions: + +Boolean reductions +~~~~~~~~~~~~~~~~~~ + +You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`, +:meth:`~DataFrame.all`, and :meth:`~DataFrame.bool` to provide a +way to summarize a boolean result. + +.. ipython:: python + + (df > 0).all() + (df > 0).any() + +You can reduce to a final boolean value. + +.. ipython:: python + + (df > 0).any().any() + +You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property. + +.. ipython:: python + + df.empty + pd.DataFrame(columns=list("ABC")).empty + +To evaluate single-element pandas objects in a boolean context, use the method +:meth:`~DataFrame.bool`: + +.. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +.. warning:: + + You might be tempted to do the following: + + .. code-block:: python + + >>> if df: + ... pass + + Or + + .. code-block:: python + + >>> df and df2 + + These will both raise errors, as you are trying to compare multiple values.:: + + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + +See :ref:`gotchas` for a more detailed discussion. + +.. _basics.equals: + +Comparing if objects are equivalent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Often you may find that there is more than one way to compute the same +result. As a simple example, consider ``df + df`` and ``df * 2``. To test +that these two computations produce the same result, given the tools +shown above, you might imagine using ``(df + df == df * 2).all()``. But in +fact, this expression is False: + +.. ipython:: python + + df + df == df * 2 + (df + df == df * 2).all() + +Notice that the boolean DataFrame ``df + df == df * 2`` contains some False values! +This is because NaNs do not compare as equals: + +.. ipython:: python + + np.nan == np.nan + +So, NDFrames (such as Series and DataFrames) +have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in +corresponding locations treated as equal. + +.. ipython:: python + + (df + df).equals(df * 2) + +Note that the Series or DataFrame index needs to be in the same order for +equality to be True: + +.. ipython:: python + + df1 = pd.DataFrame({"col": ["foo", 0, np.nan]}) + df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0]) + df1.equals(df2) + df1.equals(df2.sort_index()) + +Comparing array-like objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can conveniently perform element-wise comparisons when comparing a pandas +data structure with a scalar value: + +.. ipython:: python + + pd.Series(["foo", "bar", "baz"]) == "foo" + pd.Index(["foo", "bar", "baz"]) == "foo" + +pandas also handles element-wise comparisons between different array-like +objects of the same length: + +.. ipython:: python + + pd.Series(["foo", "bar", "baz"]) == pd.Index(["foo", "bar", "qux"]) + pd.Series(["foo", "bar", "baz"]) == np.array(["foo", "bar", "qux"]) + +Trying to compare ``Index`` or ``Series`` objects of different lengths will +raise a ValueError: + +.. code-block:: ipython + + In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar']) + ValueError: Series lengths must match to compare + + In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo']) + ValueError: Series lengths must match to compare + +Note that this is different from the NumPy behavior where a comparison can +be broadcast: + +.. ipython:: python + + np.array([1, 2, 3]) == np.array([2]) + +or it can return False if broadcasting can not be done: + +.. ipython:: python + :okwarning: + + np.array([1, 2, 3]) == np.array([1, 2]) + +Combining overlapping data sets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A problem occasionally arising is the combination of two similar data sets +where values in one are preferred over the other. An example would be two data +series representing a particular economic indicator where one is considered to +be of "higher quality". However, the lower quality series might extend further +back in history or have more complete data coverage. As such, we would like to +combine two DataFrame objects where missing values in one DataFrame are +conditionally filled with like-labeled values from the other DataFrame. The +function implementing this operation is :meth:`~DataFrame.combine_first`, +which we illustrate: + +.. ipython:: python + + df1 = pd.DataFrame( + {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} + ) + df2 = pd.DataFrame( + { + "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], + "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], + } + ) + df1 + df2 + df1.combine_first(df2) + +General DataFrame combine +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.combine_first` method above calls the more general +:meth:`DataFrame.combine`. This method takes another DataFrame +and a combiner function, aligns the input DataFrame and then passes the combiner +function pairs of Series (i.e., columns whose names are the same). + +So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: + +.. ipython:: python + + def combiner(x, y): + return np.where(pd.isna(x), y, x) + + + df1.combine(df2, combiner) + +.. _basics.stats: + +Descriptive statistics +---------------------- + +There exists a large number of methods for computing descriptive statistics and +other related operations on :ref:`Series `, :ref:`DataFrame +`. Most of these +are aggregations (hence producing a lower-dimensional result) like +:meth:`~DataFrame.sum`, :meth:`~DataFrame.mean`, and :meth:`~DataFrame.quantile`, +but some of them, like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`, +produce an object of the same size. Generally speaking, these methods take an +**axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be +specified by name or integer: + +* **Series**: no axis argument needed +* **DataFrame**: "index" (axis=0, default), "columns" (axis=1) + +For example: + +.. ipython:: python + + df + df.mean(0) + df.mean(1) + +All such methods have a ``skipna`` option signaling whether to exclude missing +data (``True`` by default): + +.. ipython:: python + + df.sum(0, skipna=False) + df.sum(axis=1, skipna=True) + +Combined with the broadcasting / arithmetic behavior, one can describe various +statistical procedures, like standardization (rendering data zero mean and +standard deviation of 1), very concisely: + +.. ipython:: python + + ts_stand = (df - df.mean()) / df.std() + ts_stand.std() + xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) + xs_stand.std(1) + +Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` +preserve the location of ``NaN`` values. This is somewhat different from +:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling` since ``NaN`` behavior +is furthermore dictated by a ``min_periods`` parameter. + +.. ipython:: python + + df.cumsum() + +Here is a quick reference summary table of common functions. Each also takes an +optional ``level`` parameter which applies only if the object has a +:ref:`hierarchical index`. + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + + ``count``, Number of non-NA observations + ``sum``, Sum of values + ``mean``, Mean of values + ``mad``, Mean absolute deviation + ``median``, Arithmetic median of values + ``min``, Minimum + ``max``, Maximum + ``mode``, Mode + ``abs``, Absolute Value + ``prod``, Product of values + ``std``, Bessel-corrected sample standard deviation + ``var``, Unbiased variance + ``sem``, Standard error of the mean + ``skew``, Sample skewness (3rd moment) + ``kurt``, Sample kurtosis (4th moment) + ``quantile``, Sample quantile (value at %) + ``cumsum``, Cumulative sum + ``cumprod``, Cumulative product + ``cummax``, Cumulative maximum + ``cummin``, Cumulative minimum + +Note that by chance some NumPy methods, like ``mean``, ``std``, and ``sum``, +will exclude NAs on Series input by default: + +.. ipython:: python + + np.mean(df["one"]) + np.mean(df["one"].to_numpy()) + +:meth:`Series.nunique` will return the number of unique non-NA values in a +Series: + +.. ipython:: python + + series = pd.Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5 + series.nunique() + +.. _basics.describe: + +Summarizing data: describe +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is a convenient :meth:`~DataFrame.describe` function which computes a variety of summary +statistics about a Series or the columns of a DataFrame (excluding NAs of +course): + +.. ipython:: python + + series = pd.Series(np.random.randn(1000)) + series[::2] = np.nan + series.describe() + frame = pd.DataFrame(np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"]) + frame.iloc[::2] = np.nan + frame.describe() + +You can select specific percentiles to include in the output: + +.. ipython:: python + + series.describe(percentiles=[0.05, 0.25, 0.75, 0.95]) + +By default, the median is always included. + +For a non-numerical Series object, :meth:`~Series.describe` will give a simple +summary of the number of unique values and most frequently occurring values: + +.. ipython:: python + + s = pd.Series(["a", "a", "b", "b", "a", "a", np.nan, "c", "d", "a"]) + s.describe() + +Note that on a mixed-type DataFrame object, :meth:`~DataFrame.describe` will +restrict the summary to include only numerical columns or, if none are, only +categorical columns: + +.. ipython:: python + + frame = pd.DataFrame({"a": ["Yes", "Yes", "No", "No"], "b": range(4)}) + frame.describe() + +This behavior can be controlled by providing a list of types as ``include``/``exclude`` +arguments. The special value ``all`` can also be used: + +.. ipython:: python + + frame.describe(include=["object"]) + frame.describe(include=["number"]) + frame.describe(include="all") + +That feature relies on :ref:`select_dtypes `. Refer to +there for details about accepted inputs. + +.. _basics.idxmin: + +Index of min/max values +~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` functions on Series +and DataFrame compute the index labels with the minimum and maximum +corresponding values: + +.. ipython:: python + + s1 = pd.Series(np.random.randn(5)) + s1 + s1.idxmin(), s1.idxmax() + + df1 = pd.DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + df1 + df1.idxmin(axis=0) + df1.idxmax(axis=1) + +When there are multiple rows (or columns) matching the minimum or maximum +value, :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` return the first +matching index: + +.. ipython:: python + + df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=["A"], index=list("edcba")) + df3 + df3["A"].idxmin() + +.. note:: + + ``idxmin`` and ``idxmax`` are called ``argmin`` and ``argmax`` in NumPy. + +.. _basics.discretization: + +Value counts (histogramming) / mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~Series.value_counts` Series method and top-level function computes a histogram +of a 1D array of values. It can also be used as a function on regular arrays: + +.. ipython:: python + + data = np.random.randint(0, 7, size=50) + data + s = pd.Series(data) + s.value_counts() + pd.value_counts(data) + +.. versionadded:: 1.1.0 + +The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns. +By default all columns are used but a subset can be selected using the ``subset`` argument. + +.. ipython:: python + + data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]} + frame = pd.DataFrame(data) + frame.value_counts() + +Similarly, you can get the most frequently occurring value(s), i.e. the mode, of the values in a Series or DataFrame: + +.. ipython:: python + + s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7]) + s5.mode() + df5 = pd.DataFrame( + { + "A": np.random.randint(0, 7, size=50), + "B": np.random.randint(-10, 15, size=50), + } + ) + df5.mode() + + +Discretization and quantiling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Continuous values can be discretized using the :func:`cut` (bins based on values) +and :func:`qcut` (bins based on sample quantiles) functions: + +.. ipython:: python + + arr = np.random.randn(20) + factor = pd.cut(arr, 4) + factor + + factor = pd.cut(arr, [-5, -1, 0, 1, 5]) + factor + +:func:`qcut` computes sample quantiles. For example, we could slice up some +normally distributed data into equal-size quartiles like so: + +.. ipython:: python + + arr = np.random.randn(30) + factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1]) + factor + pd.value_counts(factor) + +We can also pass infinite values to define the bins: + +.. ipython:: python + + arr = np.random.randn(20) + factor = pd.cut(arr, [-np.inf, 0, np.inf]) + factor + +.. _basics.apply: + +Function application +-------------------- + +To apply your own or another library's functions to pandas objects, +you should be aware of the three methods below. The appropriate +method to use depends on whether your function expects to operate +on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. + +1. `Tablewise Function Application`_: :meth:`~DataFrame.pipe` +2. `Row or Column-wise Function Application`_: :meth:`~DataFrame.apply` +3. `Aggregation API`_: :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform` +4. `Applying Elementwise Functions`_: :meth:`~DataFrame.applymap` + +.. _basics.pipe: + +Tablewise function application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DataFrames`` and ``Series`` can be passed into functions. +However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. + +First some setup: + +.. ipython:: python + + def extract_city_name(df): + """ + Chicago, IL -> Chicago for city_name column + """ + df["city_name"] = df["city_and_code"].str.split(",").str.get(0) + return df + + + def add_country_name(df, country_name=None): + """ + Chicago -> Chicago-US for city_name column + """ + col = "city_name" + df["city_and_country"] = df[col] + country_name + return df + + + df_p = pd.DataFrame({"city_and_code": ["Chicago, IL"]}) + + +``extract_city_name`` and ``add_country_name`` are functions taking and returning ``DataFrames``. + +Now compare the following: + +.. ipython:: python + + add_country_name(extract_city_name(df_p), country_name="US") + +Is equivalent to: + +.. ipython:: python + + df_p.pipe(extract_city_name).pipe(add_country_name, country_name="US") + +pandas encourages the second style, which is known as method chaining. +``pipe`` makes it easy to use your own or another library's functions +in method chains, alongside pandas' methods. + +In the example above, the functions ``extract_city_name`` and ``add_country_name`` each expected a ``DataFrame`` as the first positional argument. +What if the function you wish to apply takes its data as, say, the second argument? +In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``. +``.pipe`` will route the ``DataFrame`` to the argument specified in the tuple. + +For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``: + +.. ipython:: python + :okwarning: + + import statsmodels.formula.api as sm + + bb = pd.read_csv("data/baseball.csv", index_col="id") + + ( + bb.query("h > 0") + .assign(ln_h=lambda df: np.log(df.h)) + .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)") + .fit() + .summary() + ) + +The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which +have introduced the popular ``(%>%)`` (read pipe) operator for R_. +The implementation of ``pipe`` here is quite clean and feels right at home in Python. +We encourage you to view the source code of :meth:`~DataFrame.pipe`. + +.. _dplyr: https://github.com/tidyverse/dplyr +.. _magrittr: https://github.com/tidyverse/magrittr +.. _R: https://www.r-project.org + + +Row or column-wise function application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Arbitrary functions can be applied along the axes of a DataFrame +using the :meth:`~DataFrame.apply` method, which, like the descriptive +statistics methods, takes an optional ``axis`` argument: + +.. ipython:: python + + df.apply(np.mean) + df.apply(np.mean, axis=1) + df.apply(lambda x: x.max() - x.min()) + df.apply(np.cumsum) + df.apply(np.exp) + +The :meth:`~DataFrame.apply` method will also dispatch on a string method name. + +.. ipython:: python + + df.apply("mean") + df.apply("mean", axis=1) + +The return type of the function passed to :meth:`~DataFrame.apply` affects the +type of the final output from ``DataFrame.apply`` for the default behaviour: + +* If the applied function returns a ``Series``, the final output is a ``DataFrame``. + The columns match the index of the ``Series`` returned by the applied function. +* If the applied function returns any other type, the final output is a ``Series``. + +This default behaviour can be overridden using the ``result_type``, which +accepts three options: ``reduce``, ``broadcast``, and ``expand``. +These will determine how list-likes return values expand (or not) to a ``DataFrame``. + +:meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions +about a data set. For example, suppose we wanted to extract the date where the +maximum value for each column occurred: + +.. ipython:: python + + tsdf = pd.DataFrame( + np.random.randn(1000, 3), + columns=["A", "B", "C"], + index=pd.date_range("1/1/2000", periods=1000), + ) + tsdf.apply(lambda x: x.idxmax()) + +You may also pass additional arguments and keyword arguments to the :meth:`~DataFrame.apply` +method. For instance, consider the following function you would like to apply: + +.. code-block:: python + + def subtract_and_divide(x, sub, divide=1): + return (x - sub) / divide + +You may then apply this function as follows: + +.. code-block:: python + + df.apply(subtract_and_divide, args=(5,), divide=3) + +Another useful feature is the ability to pass Series methods to carry out some +Series operation on each column or row: + +.. ipython:: python + :suppress: + + tsdf = pd.DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=pd.date_range("1/1/2000", periods=10), + ) + tsdf.iloc[3:7] = np.nan + +.. ipython:: python + + tsdf + tsdf.apply(pd.Series.interpolate) + + +Finally, :meth:`~DataFrame.apply` takes an argument ``raw`` which is False by default, which +converts each row or column into a Series before applying the function. When +set to True, the passed function will instead receive an ndarray object, which +has positive performance implications if you do not need the indexing +functionality. + +.. _basics.aggregate: + +Aggregation API +~~~~~~~~~~~~~~~ + +The aggregation API allows one to express possibly multiple aggregation operations in a single concise way. +This API is similar across pandas objects, see :ref:`groupby API `, the +:ref:`window API `, and the :ref:`resample API `. +The entry point for aggregation is :meth:`DataFrame.aggregate`, or the alias +:meth:`DataFrame.agg`. + +We will use a similar starting frame from above: + +.. ipython:: python + + tsdf = pd.DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=pd.date_range("1/1/2000", periods=10), + ) + tsdf.iloc[3:7] = np.nan + tsdf + +Using a single function is equivalent to :meth:`~DataFrame.apply`. You can also +pass named methods as strings. These will return a ``Series`` of the aggregated +output: + +.. ipython:: python + + tsdf.agg(np.sum) + + tsdf.agg("sum") + + # these are equivalent to a ``.sum()`` because we are aggregating + # on a single function + tsdf.sum() + +Single aggregations on a ``Series`` this will return a scalar value: + +.. ipython:: python + + tsdf["A"].agg("sum") + + +Aggregating with multiple functions ++++++++++++++++++++++++++++++++++++ + +You can pass multiple aggregation arguments as a list. +The results of each of the passed functions will be a row in the resulting ``DataFrame``. +These are naturally named from the aggregation function. + +.. ipython:: python + + tsdf.agg(["sum"]) + +Multiple functions yield multiple rows: + +.. ipython:: python + + tsdf.agg(["sum", "mean"]) + +On a ``Series``, multiple functions return a ``Series``, indexed by the function names: + +.. ipython:: python + + tsdf["A"].agg(["sum", "mean"]) + +Passing a ``lambda`` function will yield a ```` named row: + +.. ipython:: python + + tsdf["A"].agg(["sum", lambda x: x.mean()]) + +Passing a named function will yield that name for the row: + +.. ipython:: python + + def mymean(x): + return x.mean() + + + tsdf["A"].agg(["sum", mymean]) + +Aggregating with a dict ++++++++++++++++++++++++ + +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg`` +allows you to customize which functions are applied to which columns. Note that the results +are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. + +.. ipython:: python + + tsdf.agg({"A": "mean", "B": "sum"}) + +Passing a list-like will generate a ``DataFrame`` output. You will get a matrix-like output +of all of the aggregators. The output will consist of all unique functions. Those that are +not noted for a particular column will be ``NaN``: + +.. ipython:: python + + tsdf.agg({"A": ["mean", "min"], "B": "sum"}) + +.. _basics.aggregation.mixed_string: + +Mixed dtypes +++++++++++++ + +.. deprecated:: 1.4.0 + Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise. + +When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid +aggregations. This is similar to how ``.groupby.agg`` works. + +.. ipython:: python + + mdf = pd.DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": pd.date_range("20130101", periods=3), + } + ) + mdf.dtypes + +.. ipython:: python + :okwarning: + + mdf.agg(["min", "sum"]) + +.. _basics.aggregation.custom_describe: + +Custom describe ++++++++++++++++ + +With ``.agg()`` it is possible to easily create a custom describe function, similar +to the built in :ref:`describe function `. + +.. ipython:: python + + from functools import partial + + q_25 = partial(pd.Series.quantile, q=0.25) + q_25.__name__ = "25%" + q_75 = partial(pd.Series.quantile, q=0.75) + q_75.__name__ = "75%" + + tsdf.agg(["count", "mean", "std", "min", q_25, "median", q_75, "max"]) + +.. _basics.transform: + +Transform API +~~~~~~~~~~~~~ + +The :meth:`~DataFrame.transform` method returns an object that is indexed the same (same size) +as the original. This API allows you to provide *multiple* operations at the same +time rather than one-by-one. Its API is quite similar to the ``.agg`` API. + +We create a frame similar to the one used in the above sections. + +.. ipython:: python + + tsdf = pd.DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=pd.date_range("1/1/2000", periods=10), + ) + tsdf.iloc[3:7] = np.nan + tsdf + +Transform the entire frame. ``.transform()`` allows input functions as: a NumPy function, a string +function name or a user defined function. + +.. ipython:: python + :okwarning: + + tsdf.transform(np.abs) + tsdf.transform("abs") + tsdf.transform(lambda x: x.abs()) + +Here :meth:`~DataFrame.transform` received a single function; this is equivalent to a `ufunc +`__ application. + +.. ipython:: python + + np.abs(tsdf) + +Passing a single function to ``.transform()`` with a ``Series`` will yield a single ``Series`` in return. + +.. ipython:: python + + tsdf["A"].transform(np.abs) + + +Transform with multiple functions ++++++++++++++++++++++++++++++++++ + +Passing multiple functions will yield a column MultiIndexed DataFrame. +The first level will be the original frame column names; the second level +will be the names of the transforming functions. + +.. ipython:: python + + tsdf.transform([np.abs, lambda x: x + 1]) + +Passing multiple functions to a Series will yield a DataFrame. The +resulting column names will be the transforming functions. + +.. ipython:: python + + tsdf["A"].transform([np.abs, lambda x: x + 1]) + + +Transforming with a dict +++++++++++++++++++++++++ + + +Passing a dict of functions will allow selective transforming per column. + +.. ipython:: python + + tsdf.transform({"A": np.abs, "B": lambda x: x + 1}) + +Passing a dict of lists will generate a MultiIndexed DataFrame with these +selective transforms. + +.. ipython:: python + :okwarning: + + tsdf.transform({"A": np.abs, "B": [lambda x: x + 1, "sqrt"]}) + +.. _basics.elementwise: + +Applying elementwise functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since not all functions can be vectorized (accept NumPy arrays and return +another array or value), the methods :meth:`~DataFrame.applymap` on DataFrame +and analogously :meth:`~Series.map` on Series accept any Python function taking +a single value and returning a single value. For example: + +.. ipython:: python + :suppress: + + df4 = df_orig.copy() + +.. ipython:: python + + df4 + + def f(x): + return len(str(x)) + + df4["one"].map(f) + df4.applymap(f) + +:meth:`Series.map` has an additional feature; it can be used to easily +"link" or "map" values defined by a secondary series. This is closely related +to :ref:`merging/joining functionality `: + +.. ipython:: python + + s = pd.Series( + ["six", "seven", "six", "seven", "six"], index=["a", "b", "c", "d", "e"] + ) + t = pd.Series({"six": 6.0, "seven": 7.0}) + s + s.map(t) + + +.. _basics.reindexing: + +Reindexing and altering labels +------------------------------ + +:meth:`~Series.reindex` is the fundamental data alignment method in pandas. +It is used to implement nearly all other features relying on label-alignment +functionality. To *reindex* means to conform the data to match a given set of +labels along a particular axis. This accomplishes several things: + +* Reorders the existing data to match a new set of labels +* Inserts missing value (NA) markers in label locations where no data for + that label existed +* If specified, **fill** data for missing labels using logic (highly relevant + to working with time series data) + +Here is a simple example: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + s + s.reindex(["e", "b", "f", "d"]) + +Here, the ``f`` label was not contained in the Series and hence appears as +``NaN`` in the result. + +With a DataFrame, you can simultaneously reindex the index and columns: + +.. ipython:: python + + df + df.reindex(index=["c", "f", "b"], columns=["three", "two", "one"]) + +You may also use ``reindex`` with an ``axis`` keyword: + +.. ipython:: python + + df.reindex(["c", "f", "b"], axis="index") + +Note that the ``Index`` objects containing the actual axis labels can be +**shared** between objects. So if we have a Series and a DataFrame, the +following can be done: + +.. ipython:: python + + rs = s.reindex(df.index) + rs + rs.index is df.index + +This means that the reindexed Series's index is the same Python object as the +DataFrame's index. + +:meth:`DataFrame.reindex` also supports an "axis-style" calling convention, +where you specify a single ``labels`` argument and the ``axis`` it applies to. + +.. ipython:: python + + df.reindex(["c", "f", "b"], axis="index") + df.reindex(["three", "two", "one"], axis="columns") + +.. seealso:: + + :ref:`MultiIndex / Advanced Indexing ` is an even more concise way of + doing reindexing. + +.. note:: + + When writing performance-sensitive code, there is a good reason to spend + some time becoming a reindexing ninja: **many operations are faster on + pre-aligned data**. Adding two unaligned DataFrames internally triggers a + reindexing step. For exploratory analysis you will hardly notice the + difference (because ``reindex`` has been heavily optimized), but when CPU + cycles matter sprinkling a few explicit ``reindex`` calls here and there can + have an impact. + +.. _basics.reindex_like: + +Reindexing to align with another object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may wish to take an object and reindex its axes to be labeled the same as +another object. While the syntax for this is straightforward albeit verbose, it +is a common enough operation that the :meth:`~DataFrame.reindex_like` method is +available to make this simpler: + +.. ipython:: python + :suppress: + + df2 = df.reindex(["a", "b", "c"], columns=["one", "two"]) + df3 = df2 - df2.mean() + + +.. ipython:: python + + df2 + df3 + df.reindex_like(df2) + +.. _basics.align: + +Aligning objects with each other with ``align`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~Series.align` method is the fastest way to simultaneously align two objects. It +supports a ``join`` argument (related to :ref:`joining and merging `): + + - ``join='outer'``: take the union of the indexes (default) + - ``join='left'``: use the calling object's index + - ``join='right'``: use the passed object's index + - ``join='inner'``: intersect the indexes + +It returns a tuple with both of the reindexed Series: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + s1 = s[:4] + s2 = s[1:] + s1.align(s2) + s1.align(s2, join="inner") + s1.align(s2, join="left") + +.. _basics.df_join: + +For DataFrames, the join method will be applied to both the index and the +columns by default: + +.. ipython:: python + + df.align(df2, join="inner") + +You can also pass an ``axis`` option to only align on the specified axis: + +.. ipython:: python + + df.align(df2, join="inner", axis=0) + +.. _basics.align.frame.series: + +If you pass a Series to :meth:`DataFrame.align`, you can choose to align both +objects either on the DataFrame's index or columns using the ``axis`` argument: + +.. ipython:: python + + df.align(df2.iloc[0], axis=1) + +.. _basics.reindex_fill: + +Filling while reindexing +~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~Series.reindex` takes an optional parameter ``method`` which is a +filling method chosen from the following table: + +.. csv-table:: + :header: "Method", "Action" + :widths: 30, 50 + + pad / ffill, Fill values forward + bfill / backfill, Fill values backward + nearest, Fill from the nearest index value + +We illustrate these fill methods on a simple Series: + +.. ipython:: python + + rng = pd.date_range("1/3/2000", periods=8) + ts = pd.Series(np.random.randn(8), index=rng) + ts2 = ts[[0, 3, 6]] + ts + ts2 + + ts2.reindex(ts.index) + ts2.reindex(ts.index, method="ffill") + ts2.reindex(ts.index, method="bfill") + ts2.reindex(ts.index, method="nearest") + +These methods require that the indexes are **ordered** increasing or +decreasing. + +Note that the same result could have been achieved using +:ref:`fillna ` (except for ``method='nearest'``) or +:ref:`interpolate `: + +.. ipython:: python + + ts2.reindex(ts.index).fillna(method="ffill") + +:meth:`~Series.reindex` will raise a ValueError if the index is not monotonically +increasing or decreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate` +will not perform any checks on the order of the index. + +.. _basics.limits_on_reindex_fill: + +Limits on filling while reindexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``limit`` and ``tolerance`` arguments provide additional control over +filling while reindexing. Limit specifies the maximum count of consecutive +matches: + +.. ipython:: python + + ts2.reindex(ts.index, method="ffill", limit=1) + +In contrast, tolerance specifies the maximum distance between the index and +indexer values: + +.. ipython:: python + + ts2.reindex(ts.index, method="ffill", tolerance="1 day") + +Notice that when used on a ``DatetimeIndex``, ``TimedeltaIndex`` or +``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. +This allows you to specify tolerance with appropriate strings. + +.. _basics.drop: + +Dropping labels from an axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A method closely related to ``reindex`` is the :meth:`~DataFrame.drop` function. +It removes a set of labels from an axis: + +.. ipython:: python + + df + df.drop(["a", "d"], axis=0) + df.drop(["one"], axis=1) + +Note that the following also works, but is a bit less obvious / clean: + +.. ipython:: python + + df.reindex(df.index.difference(["a", "d"])) + +.. _basics.rename: + +Renaming / mapping labels +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method allows you to relabel an axis based on some +mapping (a dict or Series) or an arbitrary function. + +.. ipython:: python + + s + s.rename(str.upper) + +If you pass a function, it must return a value when called with any of the +labels (and must produce a set of unique values). A dict or +Series can also be used: + +.. ipython:: python + + df.rename( + columns={"one": "foo", "two": "bar"}, + index={"a": "apple", "b": "banana", "d": "durian"}, + ) + +If the mapping doesn't include a column/index label, it isn't renamed. Note that +extra labels in the mapping don't throw an error. + +:meth:`DataFrame.rename` also supports an "axis-style" calling convention, where +you specify a single ``mapper`` and the ``axis`` to apply that mapping to. + +.. ipython:: python + + df.rename({"one": "foo", "two": "bar"}, axis="columns") + df.rename({"a": "apple", "b": "banana", "d": "durian"}, axis="index") + + +The :meth:`~DataFrame.rename` method also provides an ``inplace`` named +parameter that is by default ``False`` and copies the underlying data. Pass +``inplace=True`` to rename the data in place. + +Finally, :meth:`~Series.rename` also accepts a scalar or list-like +for altering the ``Series.name`` attribute. + +.. ipython:: python + + s.rename("scalar-name") + +.. _basics.rename_axis: + +The methods :meth:`DataFrame.rename_axis` and :meth:`Series.rename_axis` +allow specific names of a ``MultiIndex`` to be changed (as opposed to the +labels). + +.. ipython:: python + + df = pd.DataFrame( + {"x": [1, 2, 3, 4, 5, 6], "y": [10, 20, 30, 40, 50, 60]}, + index=pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2]], names=["let", "num"] + ), + ) + df + df.rename_axis(index={"let": "abc"}) + df.rename_axis(index=str.upper) + +.. _basics.iteration: + +Iteration +--------- + +The behavior of basic iteration over pandas objects depends on the type. +When iterating over a Series, it is regarded as array-like, and basic iteration +produces the values. DataFrames follow the dict-like convention of iterating +over the "keys" of the objects. + +In short, basic iteration (``for i in object``) produces: + +* **Series**: values +* **DataFrame**: column labels + +Thus, for example, iterating over a DataFrame gives you the column names: + +.. ipython:: python + + df = pd.DataFrame( + {"col1": np.random.randn(3), "col2": np.random.randn(3)}, index=["a", "b", "c"] + ) + + for col in df: + print(col) + + +pandas objects also have the dict-like :meth:`~DataFrame.items` method to +iterate over the (key, value) pairs. + +To iterate over the rows of a DataFrame, you can use the following methods: + +* :meth:`~DataFrame.iterrows`: Iterate over the rows of a DataFrame as (index, Series) pairs. + This converts the rows to Series objects, which can change the dtypes and has some + performance implications. +* :meth:`~DataFrame.itertuples`: Iterate over the rows of a DataFrame + as namedtuples of the values. This is a lot faster than + :meth:`~DataFrame.iterrows`, and is in most cases preferable to use + to iterate over the values of a DataFrame. + +.. warning:: + + Iterating through pandas objects is generally **slow**. In many cases, + iterating manually over the rows is not needed and can be avoided with + one of the following approaches: + + * Look for a *vectorized* solution: many operations can be performed using + built-in methods or NumPy functions, (boolean) indexing, ... + + * When you have a function that cannot work on the full DataFrame/Series + at once, it is better to use :meth:`~DataFrame.apply` instead of iterating + over the values. See the docs on :ref:`function application `. + + * If you need to do iterative manipulations on the values but performance is + important, consider writing the inner loop with cython or numba. + See the :ref:`enhancing performance ` section for some + examples of this approach. + +.. warning:: + + You should **never modify** something you are iterating over. + This is not guaranteed to work in all cases. Depending on the + data types, the iterator returns a copy and not a view, and writing + to it will have no effect! + + For example, in the following case setting the value has no effect: + + .. ipython:: python + + df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + + for index, row in df.iterrows(): + row["a"] = 10 + + df + +items +~~~~~ + +Consistent with the dict-like interface, :meth:`~DataFrame.items` iterates +through key-value pairs: + +* **Series**: (index, scalar value) pairs +* **DataFrame**: (column, Series) pairs + +For example: + +.. ipython:: python + + for label, ser in df.items(): + print(label) + print(ser) + +.. _basics.iterrows: + +iterrows +~~~~~~~~ + +:meth:`~DataFrame.iterrows` allows you to iterate through the rows of a +DataFrame as Series objects. It returns an iterator yielding each +index value along with a Series containing the data in each row: + +.. ipython:: python + + for row_index, row in df.iterrows(): + print(row_index, row, sep="\n") + +.. note:: + + Because :meth:`~DataFrame.iterrows` returns a Series for each row, + it does **not** preserve dtypes across the rows (dtypes are + preserved across columns for DataFrames). For example, + + .. ipython:: python + + df_orig = pd.DataFrame([[1, 1.5]], columns=["int", "float"]) + df_orig.dtypes + row = next(df_orig.iterrows())[1] + row + + All values in ``row``, returned as a Series, are now upcasted + to floats, also the original integer value in column ``x``: + + .. ipython:: python + + row["int"].dtype + df_orig["int"].dtype + + To preserve dtypes while iterating over the rows, it is better + to use :meth:`~DataFrame.itertuples` which returns namedtuples of the values + and which is generally much faster than :meth:`~DataFrame.iterrows`. + +For instance, a contrived way to transpose the DataFrame would be: + +.. ipython:: python + + df2 = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + print(df2) + print(df2.T) + + df2_t = pd.DataFrame({idx: values for idx, values in df2.iterrows()}) + print(df2_t) + +itertuples +~~~~~~~~~~ + +The :meth:`~DataFrame.itertuples` method will return an iterator +yielding a namedtuple for each row in the DataFrame. The first element +of the tuple will be the row's corresponding index value, while the +remaining values are the row values. + +For instance: + +.. ipython:: python + + for row in df.itertuples(): + print(row) + +This method does not convert the row to a Series object; it merely +returns the values inside a namedtuple. Therefore, +:meth:`~DataFrame.itertuples` preserves the data type of the values +and is generally faster as :meth:`~DataFrame.iterrows`. + +.. note:: + + The column names will be renamed to positional names if they are + invalid Python identifiers, repeated, or start with an underscore. + With a large number of columns (>255), regular tuples are returned. + +.. _basics.dt_accessors: + +.dt accessor +------------ + +``Series`` has an accessor to succinctly return datetime like properties for the +*values* of the Series, if it is a datetime/period like Series. +This will return a Series, indexed like the existing Series. + +.. ipython:: python + + # datetime + s = pd.Series(pd.date_range("20130101 09:10:12", periods=4)) + s + s.dt.hour + s.dt.second + s.dt.day + +This enables nice expressions like this: + +.. ipython:: python + + s[s.dt.day == 2] + +You can easily produces tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize("US/Eastern") + stz + stz.dt.tz + +You can also chain these types of operations: + +.. ipython:: python + + s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") + +You can also format datetime values as strings with :meth:`Series.dt.strftime` which +supports the same format as the standard :meth:`~datetime.datetime.strftime`. + +.. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range("20130101", periods=4)) + s + s.dt.strftime("%Y/%m/%d") + +.. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range("20130101", periods=4)) + s + s.dt.strftime("%Y/%m/%d") + +The ``.dt`` accessor works for period and timedelta dtypes. + +.. ipython:: python + + # period + s = pd.Series(pd.period_range("20130101", periods=4, freq="D")) + s + s.dt.year + s.dt.day + +.. ipython:: python + + # timedelta + s = pd.Series(pd.timedelta_range("1 day 00:00:05", periods=4, freq="s")) + s + s.dt.days + s.dt.seconds + s.dt.components + +.. note:: + + ``Series.dt`` will raise a ``TypeError`` if you access with a non-datetime-like values. + +Vectorized string methods +------------------------- + +Series is equipped with a set of string processing methods that make it easy to +operate on each element of the array. Perhaps most importantly, these methods +exclude missing/NA values automatically. These are accessed via the Series's +``str`` attribute and generally have names matching the equivalent (scalar) +built-in string methods. For example: + + .. ipython:: python + + s = pd.Series( + ["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"], dtype="string" + ) + s.str.lower() + +Powerful pattern-matching methods are provided as well, but note that +pattern-matching generally uses `regular expressions +`__ by default (and in some cases +always uses them). + +.. note:: + + Prior to pandas 1.0, string methods were only available on ``object`` -dtype + ``Series``. pandas 1.0 added the :class:`StringDtype` which is dedicated + to strings. See :ref:`text.types` for more. + +Please see :ref:`Vectorized String Methods ` for a complete +description. + +.. _basics.sorting: + +Sorting +------- + +pandas supports three kinds of sorting: sorting by index labels, +sorting by column values, and sorting by a combination of both. + +.. _basics.sort_index: + +By index +~~~~~~~~ + +The :meth:`Series.sort_index` and :meth:`DataFrame.sort_index` methods are +used to sort a pandas object by its index levels. + +.. ipython:: python + + df = pd.DataFrame( + { + "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]), + "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]), + "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]), + } + ) + + unsorted_df = df.reindex( + index=["a", "d", "c", "b"], columns=["three", "two", "one"] + ) + unsorted_df + + # DataFrame + unsorted_df.sort_index() + unsorted_df.sort_index(ascending=False) + unsorted_df.sort_index(axis=1) + + # Series + unsorted_df["three"].sort_index() + +.. _basics.sort_index_key: + +.. versionadded:: 1.1.0 + +Sorting by index also supports a ``key`` parameter that takes a callable +function to apply to the index being sorted. For ``MultiIndex`` objects, +the key is applied per-level to the levels specified by ``level``. + +.. ipython:: python + + s1 = pd.DataFrame({"a": ["B", "a", "C"], "b": [1, 2, 3], "c": [2, 3, 4]}).set_index( + list("ab") + ) + s1 + +.. ipython:: python + + s1.sort_index(level="a") + s1.sort_index(level="a", key=lambda idx: idx.str.lower()) + +For information on key sorting by value, see :ref:`value sorting +`. + +.. _basics.sort_values: + +By values +~~~~~~~~~ + +The :meth:`Series.sort_values` method is used to sort a ``Series`` by its values. The +:meth:`DataFrame.sort_values` method is used to sort a ``DataFrame`` by its column or row values. +The optional ``by`` parameter to :meth:`DataFrame.sort_values` may used to specify one or more columns +to use to determine the sorted order. + +.. ipython:: python + + df1 = pd.DataFrame( + {"one": [2, 1, 1, 1], "two": [1, 3, 2, 4], "three": [5, 4, 3, 2]} + ) + df1.sort_values(by="two") + +The ``by`` parameter can take a list of column names, e.g.: + +.. ipython:: python + + df1[["one", "two", "three"]].sort_values(by=["one", "two"]) + +These methods have special treatment of NA values via the ``na_position`` +argument: + +.. ipython:: python + + s[2] = np.nan + s.sort_values() + s.sort_values(na_position="first") + +.. _basics.sort_value_key: + +.. versionadded:: 1.1.0 + +Sorting also supports a ``key`` parameter that takes a callable function +to apply to the values being sorted. + +.. ipython:: python + + s1 = pd.Series(["B", "a", "C"]) + +.. ipython:: python + + s1.sort_values() + s1.sort_values(key=lambda x: x.str.lower()) + +``key`` will be given the :class:`Series` of values and should return a ``Series`` +or array of the same shape with the transformed values. For ``DataFrame`` objects, +the key is applied per column, so the key should still expect a Series and return +a Series, e.g. + +.. ipython:: python + + df = pd.DataFrame({"a": ["B", "a", "C"], "b": [1, 2, 3]}) + +.. ipython:: python + + df.sort_values(by="a") + df.sort_values(by="a", key=lambda col: col.str.lower()) + +The name or type of each column can be used to apply different functions to +different columns. + +.. _basics.sort_indexes_and_values: + +By indexes and values +~~~~~~~~~~~~~~~~~~~~~ + +Strings passed as the ``by`` parameter to :meth:`DataFrame.sort_values` may +refer to either columns or index level names. + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("a", 2), ("b", 2), ("b", 1), ("b", 1)] + ) + idx.names = ["first", "second"] + + # Build DataFrame + df_multi = pd.DataFrame({"A": np.arange(6, 0, -1)}, index=idx) + df_multi + +Sort by 'second' (index) and 'A' (column) + +.. ipython:: python + + df_multi.sort_values(by=["second", "A"]) + +.. note:: + + If a string matches both a column name and an index level name then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. + +.. _basics.searchsorted: + +searchsorted +~~~~~~~~~~~~ + +Series has the :meth:`~Series.searchsorted` method, which works similarly to +:meth:`numpy.ndarray.searchsorted`. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.searchsorted([0, 3]) + ser.searchsorted([0, 4]) + ser.searchsorted([1, 3], side="right") + ser.searchsorted([1, 3], side="left") + ser = pd.Series([3, 1, 2]) + ser.searchsorted([0, 3], sorter=np.argsort(ser)) + +.. _basics.nsorted: + +smallest / largest values +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the +smallest or largest :math:`n` values. For a large ``Series`` this can be much +faster than sorting the entire Series and calling ``head(n)`` on the result. + +.. ipython:: python + + s = pd.Series(np.random.permutation(10)) + s + s.sort_values() + s.nsmallest(3) + s.nlargest(3) + +``DataFrame`` also has the ``nlargest`` and ``nsmallest`` methods. + +.. ipython:: python + + df = pd.DataFrame( + { + "a": [-2, -1, 1, 10, 8, 11, -1], + "b": list("abdceff"), + "c": [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0], + } + ) + df.nlargest(3, "a") + df.nlargest(5, ["a", "c"]) + df.nsmallest(3, "a") + df.nsmallest(5, ["a", "c"]) + + +.. _basics.multiindex_sorting: + +Sorting by a MultiIndex column +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You must be explicit about sorting when the column is a MultiIndex, and fully specify +all levels to ``by``. + +.. ipython:: python + + df1.columns = pd.MultiIndex.from_tuples( + [("a", "one"), ("a", "two"), ("b", "three")] + ) + df1.sort_values(by=("a", "two")) + + +Copying +------- + +The :meth:`~DataFrame.copy` method on pandas objects copies the underlying data (though not +the axis indexes, since they are immutable) and returns a new object. Note that +**it is seldom necessary to copy objects**. For example, there are only a +handful of ways to alter a DataFrame *in-place*: + +* Inserting, deleting, or modifying a column. +* Assigning to the ``index`` or ``columns`` attributes. +* For homogeneous data, directly modifying the values via the ``values`` + attribute or advanced indexing. + +To be clear, no pandas method has the side effect of modifying your data; +almost every method returns a new object, leaving the original object +untouched. If the data is modified, it is because you did so explicitly. + +.. _basics.dtypes: + +dtypes +------ + +For the most part, pandas uses NumPy arrays and dtypes for Series or individual +columns of a DataFrame. NumPy provides support for ``float``, +``int``, ``bool``, ``timedelta64[ns]`` and ``datetime64[ns]`` (note that NumPy +does not support timezone-aware datetimes). + +pandas and third-party libraries *extend* NumPy's type system in a few places. +This section describes the extensions pandas has made internally. +See :ref:`extending.extension-types` for how to write your own extension that +works with pandas. See :ref:`ecosystem.extensions` for a list of third-party +libraries that have implemented an extension. + +The following table lists all of pandas extension types. For methods requiring ``dtype`` +arguments, strings can be specified as indicated. See the respective +documentation sections for more on each type. + ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| Kind of Data | Data Type | Scalar | Array | String Aliases | ++=================================================+===============+===========+========+===========+===============================+========================================+ +| :ref:`tz-aware datetime ` | :class:`DatetimeTZDtype` | :class:`Timestamp` | :class:`arrays.DatetimeArray` | ``'datetime64[ns, ]'`` | +| | | | | | ++-------------------------------------------------+---------------+-----------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`Categorical ` | :class:`CategoricalDtype` | (none) | :class:`Categorical` | ``'category'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`period (time spans) ` | :class:`PeriodDtype` | :class:`Period` | :class:`arrays.PeriodArray` | ``'period[]'``, | +| | | | ``'Period[]'`` | | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`sparse ` | :class:`SparseDtype` | (none) | :class:`arrays.SparseArray` | ``'Sparse'``, ``'Sparse[int]'``, | +| | | | | ``'Sparse[float]'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`intervals ` | :class:`IntervalDtype` | :class:`Interval` | :class:`arrays.IntervalArray` | ``'interval'``, ``'Interval'``, | +| | | | | ``'Interval[]'``, | +| | | | | ``'Interval[datetime64[ns, ]]'``, | +| | | | | ``'Interval[timedelta64[]]'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`nullable integer ` | :class:`Int64Dtype`, ... | (none) | :class:`arrays.IntegerArray` | ``'Int8'``, ``'Int16'``, ``'Int32'``, | +| | | | | ``'Int64'``, ``'UInt8'``, ``'UInt16'``,| +| | | | | ``'UInt32'``, ``'UInt64'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`Strings ` | :class:`StringDtype` | :class:`str` | :class:`arrays.StringArray` | ``'string'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ +| :ref:`Boolean (with NA) ` | :class:`BooleanDtype` | :class:`bool` | :class:`arrays.BooleanArray` | ``'boolean'`` | ++-------------------------------------------------+---------------------------+--------------------+-------------------------------+----------------------------------------+ + +pandas has two ways to store strings. + +1. ``object`` dtype, which can hold any Python object, including strings. +2. :class:`StringDtype`, which is dedicated to strings. + +Generally, we recommend using :class:`StringDtype`. See :ref:`text.types` for more. + +Finally, arbitrary objects may be stored using the ``object`` dtype, but should +be avoided to the extent possible (for performance and interoperability with +other libraries and methods. See :ref:`basics.object_conversion`). + +A convenient :attr:`~DataFrame.dtypes` attribute for DataFrame returns a Series +with the data type of each column. + +.. ipython:: python + + dft = pd.DataFrame( + { + "A": np.random.rand(3), + "B": 1, + "C": "foo", + "D": pd.Timestamp("20010102"), + "E": pd.Series([1.0] * 3).astype("float32"), + "F": False, + "G": pd.Series([1] * 3, dtype="int8"), + } + ) + dft + dft.dtypes + +On a ``Series`` object, use the :attr:`~Series.dtype` attribute. + +.. ipython:: python + + dft["A"].dtype + +If a pandas object contains data with multiple dtypes *in a single column*, the +dtype of the column will be chosen to accommodate all of the data types +(``object`` is the most general). + +.. ipython:: python + + # these ints are coerced to floats + pd.Series([1, 2, 3, 4, 5, 6.0]) + + # string data forces an ``object`` dtype + pd.Series([1, 2, 3, 6.0, "foo"]) + +The number of columns of each type in a ``DataFrame`` can be found by calling +``DataFrame.dtypes.value_counts()``. + +.. ipython:: python + + dft.dtypes.value_counts() + +Numeric dtypes will propagate and can coexist in DataFrames. +If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, +or a passed ``Series``), then it will be preserved in DataFrame operations. Furthermore, +different numeric dtypes will **NOT** be combined. The following example will give you a taste. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32") + df1 + df1.dtypes + df2 = pd.DataFrame( + { + "A": pd.Series(np.random.randn(8), dtype="float16"), + "B": pd.Series(np.random.randn(8)), + "C": pd.Series(np.array(np.random.randn(8), dtype="uint8")), + } + ) + df2 + df2.dtypes + +defaults +~~~~~~~~ + +By default integer types are ``int64`` and float types are ``float64``, +*regardless* of platform (32-bit or 64-bit). +The following will all result in ``int64`` dtypes. + +.. ipython:: python + + pd.DataFrame([1, 2], columns=["a"]).dtypes + pd.DataFrame({"a": [1, 2]}).dtypes + pd.DataFrame({"a": 1}, index=list(range(2))).dtypes + +Note that Numpy will choose *platform-dependent* types when creating arrays. +The following **WILL** result in ``int32`` on 32-bit platform. + +.. ipython:: python + + frame = pd.DataFrame(np.array([1, 2])) + + +upcasting +~~~~~~~~~ + +Types can potentially be *upcasted* when combined with other types, meaning they are promoted +from the current type (e.g. ``int`` to ``float``). + +.. ipython:: python + + df3 = df1.reindex_like(df2).fillna(value=0.0) + df2 + df3 + df3.dtypes + +:meth:`DataFrame.to_numpy` will return the *lower-common-denominator* of the dtypes, meaning +the dtype that can accommodate **ALL** of the types in the resulting homogeneous dtyped NumPy array. This can +force some *upcasting*. + +.. ipython:: python + + df3.to_numpy().dtype + +astype +~~~~~~ + +.. _basics.cast: + +You can use the :meth:`~DataFrame.astype` method to explicitly convert dtypes from one to another. These will by default return a copy, +even if the dtype was unchanged (pass ``copy=False`` to change this behavior). In addition, they will raise an +exception if the astype operation is invalid. + +Upcasting is always according to the **NumPy** rules. If two different dtypes are involved in an operation, +then the more *general* one will be used as the result of the operation. + +.. ipython:: python + + df3 + df3.dtypes + + # conversion of dtypes + df3.astype("float32").dtypes + + +Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`. + +.. ipython:: python + + dft = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + dft[["a", "b"]] = dft[["a", "b"]].astype(np.uint8) + dft + dft.dtypes + +Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`. + +.. ipython:: python + + dft1 = pd.DataFrame({"a": [1, 0, 1], "b": [4, 5, 6], "c": [7, 8, 9]}) + dft1 = dft1.astype({"a": np.bool_, "c": np.float64}) + dft1 + dft1.dtypes + +.. note:: + + When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype` and :meth:`~DataFrame.loc`, upcasting occurs. + + :meth:`~DataFrame.loc` tries to fit in what we are assigning to the current dtypes, while ``[]`` will overwrite them taking the dtype from the right hand side. Therefore the following piece of code produces the unintended result. + + .. ipython:: python + + dft = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + dft.loc[:, ["a", "b"]].astype(np.uint8).dtypes + dft.loc[:, ["a", "b"]] = dft.loc[:, ["a", "b"]].astype(np.uint8) + dft.dtypes + +.. _basics.object_conversion: + +object conversion +~~~~~~~~~~~~~~~~~ + +pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. +In cases where the data is already of the correct type, but stored in an ``object`` array, the +:meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` methods can be used to soft convert +to the correct type. + + .. ipython:: python + + import datetime + + df = pd.DataFrame( + [ + [1, 2], + ["a", "b"], + [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)], + ] + ) + df = df.T + df + df.dtypes + +Because the data was transposed the original inference stored all columns as object, which +``infer_objects`` will correct. + + .. ipython:: python + + df.infer_objects().dtypes + +The following functions are available for one dimensional object arrays or scalars to perform +hard conversion of objects to a specified type: + +* :meth:`~pandas.to_numeric` (conversion to numeric dtypes) + + .. ipython:: python + + m = ["1.1", 2, 3] + pd.to_numeric(m) + +* :meth:`~pandas.to_datetime` (conversion to datetime objects) + + .. ipython:: python + + import datetime + + m = ["2016-07-09", datetime.datetime(2016, 3, 2)] + pd.to_datetime(m) + +* :meth:`~pandas.to_timedelta` (conversion to timedelta objects) + + .. ipython:: python + + m = ["5us", pd.Timedelta("1day")] + pd.to_timedelta(m) + +To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements +that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered +will be raised during the conversion process. However, if ``errors='coerce'``, these errors will be ignored and pandas +will convert problematic elements to ``pd.NaT`` (for datetime and timedelta) or ``np.nan`` (for numeric). This might be +useful if you are reading in data which is mostly of the desired dtype (e.g. numeric, datetime), but occasionally has +non-conforming elements intermixed that you want to represent as missing: + +.. ipython:: python + + import datetime + + m = ["apple", datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors="coerce") + + m = ["apple", 2, 3] + pd.to_numeric(m, errors="coerce") + + m = ["apple", pd.Timedelta("1day")] + pd.to_timedelta(m, errors="coerce") + +The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it +encounters any errors with the conversion to a desired data type: + +.. ipython:: python + + import datetime + + m = ["apple", datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors="ignore") + + m = ["apple", 2, 3] + pd.to_numeric(m, errors="ignore") + + m = ["apple", pd.Timedelta("1day")] + pd.to_timedelta(m, errors="ignore") + +In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the +option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory: + +.. ipython:: python + + m = ["1", 2, 3] + pd.to_numeric(m, downcast="integer") # smallest signed int dtype + pd.to_numeric(m, downcast="signed") # same as 'integer' + pd.to_numeric(m, downcast="unsigned") # smallest unsigned int dtype + pd.to_numeric(m, downcast="float") # smallest float dtype + +As these methods apply only to one-dimensional arrays, lists or scalars; they cannot be used directly on multi-dimensional objects such +as DataFrames. However, with :meth:`~pandas.DataFrame.apply`, we can "apply" the function over each column efficiently: + +.. ipython:: python + + import datetime + + df = pd.DataFrame([["2016-07-09", datetime.datetime(2016, 3, 2)]] * 2, dtype="O") + df + df.apply(pd.to_datetime) + + df = pd.DataFrame([["1.1", 2, 3]] * 2, dtype="O") + df + df.apply(pd.to_numeric) + + df = pd.DataFrame([["5us", pd.Timedelta("1day")]] * 2, dtype="O") + df + df.apply(pd.to_timedelta) + +gotchas +~~~~~~~ + +Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. +The dtype of the input data will be preserved in cases where ``nans`` are not introduced. +See also :ref:`Support for integer NA `. + +.. ipython:: python + + dfi = df3.astype("int32") + dfi["E"] = 1 + dfi + dfi.dtypes + + casted = dfi[dfi > 0] + casted + casted.dtypes + +While float dtypes are unchanged. + +.. ipython:: python + + dfa = df3.copy() + dfa["A"] = dfa["A"].astype("float32") + dfa.dtypes + + casted = dfa[df2 > 0] + casted + casted.dtypes + +Selecting columns based on ``dtype`` +------------------------------------ + +.. _basics.selectdtypes: + +The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns +based on their ``dtype``. + +First, let's create a :class:`DataFrame` with a slew of different +dtypes: + +.. ipython:: python + + df = pd.DataFrame( + { + "string": list("abc"), + "int64": list(range(1, 4)), + "uint8": np.arange(3, 6).astype("u1"), + "float64": np.arange(4.0, 7.0), + "bool1": [True, False, True], + "bool2": [False, True, False], + "dates": pd.date_range("now", periods=3), + "category": pd.Series(list("ABC")).astype("category"), + } + ) + df["tdeltas"] = df.dates.diff() + df["uint64"] = np.arange(3, 6).astype("u8") + df["other_dates"] = pd.date_range("20130101", periods=3) + df["tz_aware_dates"] = pd.date_range("20130101", periods=3, tz="US/Eastern") + df + +And the dtypes: + +.. ipython:: python + + df.dtypes + +:meth:`~DataFrame.select_dtypes` has two parameters ``include`` and ``exclude`` that allow you to +say "give me the columns *with* these dtypes" (``include``) and/or "give the +columns *without* these dtypes" (``exclude``). + +For example, to select ``bool`` columns: + +.. ipython:: python + + df.select_dtypes(include=[bool]) + +You can also pass the name of a dtype in the `NumPy dtype hierarchy +`__: + +.. ipython:: python + + df.select_dtypes(include=["bool"]) + +:meth:`~pandas.DataFrame.select_dtypes` also works with generic dtypes as well. + +For example, to select all numeric and boolean columns while excluding unsigned +integers: + +.. ipython:: python + + df.select_dtypes(include=["number", "bool"], exclude=["unsignedinteger"]) + +To select string columns you must use the ``object`` dtype: + +.. ipython:: python + + df.select_dtypes(include=["object"]) + +To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you +can define a function that returns a tree of child dtypes: + +.. ipython:: python + + def subdtypes(dtype): + subs = dtype.__subclasses__() + if not subs: + return dtype + return [dtype, [subdtypes(dt) for dt in subs]] + +All NumPy dtypes are subclasses of ``numpy.generic``: + +.. ipython:: python + + subdtypes(np.generic) + +.. note:: + + pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal + NumPy hierarchy and won't show up with the above function. diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst new file mode 100644 index 00000000..54c67674 --- /dev/null +++ b/doc/source/user_guide/boolean.rst @@ -0,0 +1,107 @@ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import pandas as pd + import numpy as np + +.. _boolean: + +************************** +Nullable Boolean data type +************************** + +.. note:: + + BooleanArray is currently experimental. Its API or implementation may + change without warning. + +.. versionadded:: 1.0.0 + + +.. _boolean.indexing: + +Indexing with NA values +----------------------- + +pandas allows indexing with ``NA`` values in a boolean array, which are treated as ``False``. + +.. versionchanged:: 1.0.2 + +.. ipython:: python + :okexcept: + + s = pd.Series([1, 2, 3]) + mask = pd.array([True, False, pd.NA], dtype="boolean") + s[mask] + +If you would prefer to keep the ``NA`` values you can manually fill them with ``fillna(True)``. + +.. ipython:: python + + s[mask.fillna(True)] + +.. _boolean.kleene: + +Kleene logical operations +------------------------- + +:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for +logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). + +This table demonstrates the results for every combination. These operations are symmetrical, +so flipping the left- and right-hand side makes no difference in the result. + +================= ========= +Expression Result +================= ========= +``True & True`` ``True`` +``True & False`` ``False`` +``True & NA`` ``NA`` +``False & False`` ``False`` +``False & NA`` ``False`` +``NA & NA`` ``NA`` +``True | True`` ``True`` +``True | False`` ``True`` +``True | NA`` ``True`` +``False | False`` ``False`` +``False | NA`` ``NA`` +``NA | NA`` ``NA`` +``True ^ True`` ``False`` +``True ^ False`` ``True`` +``True ^ NA`` ``NA`` +``False ^ False`` ``False`` +``False ^ NA`` ``NA`` +``NA ^ NA`` ``NA`` +================= ========= + +When an ``NA`` is present in an operation, the output value is ``NA`` only if +the result cannot be determined solely based on the other input. For example, +``True | NA`` is ``True``, because both ``True | True`` and ``True | False`` +are ``True``. In that case, we don't actually need to consider the value +of the ``NA``. + +On the other hand, ``True & NA`` is ``NA``. The result depends on whether +the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``, +but ``True & False`` is ``False``, so we can't determine the output. + + +This differs from how ``np.nan`` behaves in logical operations. pandas treated +``np.nan`` is *always false in the output*. + +In ``or`` + +.. ipython:: python + + pd.Series([True, False, np.nan], dtype="object") | True + pd.Series([True, False, np.nan], dtype="boolean") | True + +In ``and`` + +.. ipython:: python + + pd.Series([True, False, np.nan], dtype="object") & True + pd.Series([True, False, np.nan], dtype="boolean") & True + +.. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst new file mode 100644 index 00000000..b5cb1d83 --- /dev/null +++ b/doc/source/user_guide/categorical.rst @@ -0,0 +1,1190 @@ +.. _categorical: + +{{ header }} + +**************** +Categorical data +**************** + +This is an introduction to pandas categorical data type, including a short comparison +with R's ``factor``. + +``Categoricals`` are a pandas data type corresponding to categorical variables in +statistics. A categorical variable takes on a limited, and usually fixed, +number of possible values (``categories``; ``levels`` in R). Examples are gender, +social class, blood type, country affiliation, observation time or rating via +Likert scales. + +In contrast to statistical categorical variables, categorical data might have an order (e.g. +'strongly agree' vs 'agree' or 'first observation' vs. 'second observation'), but numerical +operations (additions, divisions, ...) are not possible. + +All values of categorical data are either in ``categories`` or ``np.nan``. Order is defined by +the order of ``categories``, not lexical order of the values. Internally, the data structure +consists of a ``categories`` array and an integer array of ``codes`` which point to the real value in +the ``categories`` array. + +The categorical data type is useful in the following cases: + +* A string variable consisting of only a few different values. Converting such a string + variable to a categorical variable will save some memory, see :ref:`here `. +* The lexical order of a variable is not the same as the logical order ("one", "two", "three"). + By converting to a categorical and specifying an order on the categories, sorting and + min/max will use the logical order instead of the lexical order, see :ref:`here `. +* As a signal to other Python libraries that this column should be treated as a categorical + variable (e.g. to use suitable statistical methods or plot types). + +See also the :ref:`API docs on categoricals`. + +.. _categorical.objectcreation: + +Object creation +--------------- + +Series creation +~~~~~~~~~~~~~~~ + +Categorical ``Series`` or columns in a ``DataFrame`` can be created in several ways: + +By specifying ``dtype="category"`` when constructing a ``Series``: + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s + +By converting an existing ``Series`` or column to a ``category`` dtype: + +.. ipython:: python + + df = pd.DataFrame({"A": ["a", "b", "c", "a"]}) + df["B"] = df["A"].astype("category") + df + +By using special functions, such as :func:`~pandas.cut`, which groups data into +discrete bins. See the :ref:`example on tiling ` in the docs. + +.. ipython:: python + + df = pd.DataFrame({"value": np.random.randint(0, 100, 20)}) + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + + df["group"] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) + df.head(10) + +By passing a :class:`pandas.Categorical` object to a ``Series`` or assigning it to a ``DataFrame``. + +.. ipython:: python + + raw_cat = pd.Categorical( + ["a", "b", "c", "a"], categories=["b", "c", "d"], ordered=False + ) + s = pd.Series(raw_cat) + s + df = pd.DataFrame({"A": ["a", "b", "c", "a"]}) + df["B"] = raw_cat + df + +Categorical data has a specific ``category`` :ref:`dtype `: + +.. ipython:: python + + df.dtypes + +DataFrame creation +~~~~~~~~~~~~~~~~~~ + +Similar to the previous section where a single column was converted to categorical, all columns in a +``DataFrame`` can be batch converted to categorical either during or after construction. + +This can be done during construction by specifying ``dtype="category"`` in the ``DataFrame`` constructor: + +.. ipython:: python + + df = pd.DataFrame({"A": list("abca"), "B": list("bccd")}, dtype="category") + df.dtypes + +Note that the categories present in each column differ; the conversion is done column by column, so +only labels present in a given column are categories: + +.. ipython:: python + + df["A"] + df["B"] + + +Analogously, all columns in an existing ``DataFrame`` can be batch converted using :meth:`DataFrame.astype`: + +.. ipython:: python + + df = pd.DataFrame({"A": list("abca"), "B": list("bccd")}) + df_cat = df.astype("category") + df_cat.dtypes + +This conversion is likewise done column by column: + +.. ipython:: python + + df_cat["A"] + df_cat["B"] + + +Controlling behavior +~~~~~~~~~~~~~~~~~~~~ + +In the examples above where we passed ``dtype='category'``, we used the default +behavior: + +1. Categories are inferred from the data. +2. Categories are unordered. + +To control those behaviors, instead of passing ``'category'``, use an instance +of :class:`~pandas.api.types.CategoricalDtype`. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + s = pd.Series(["a", "b", "c", "a"]) + cat_type = CategoricalDtype(categories=["b", "c", "d"], ordered=True) + s_cat = s.astype(cat_type) + s_cat + +Similarly, a ``CategoricalDtype`` can be used with a ``DataFrame`` to ensure that categories +are consistent among all columns. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + df = pd.DataFrame({"A": list("abca"), "B": list("bccd")}) + cat_type = CategoricalDtype(categories=list("abcd"), ordered=True) + df_cat = df.astype(cat_type) + df_cat["A"] + df_cat["B"] + +.. note:: + + To perform table-wise conversion, where all labels in the entire ``DataFrame`` are used as + categories for each column, the ``categories`` parameter can be determined programmatically by + ``categories = pd.unique(df.to_numpy().ravel())``. + +If you already have ``codes`` and ``categories``, you can use the +:func:`~pandas.Categorical.from_codes` constructor to save the factorize step +during normal constructor mode: + +.. ipython:: python + + splitter = np.random.choice([0, 1], 5, p=[0.5, 0.5]) + s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"])) + + +Regaining original data +~~~~~~~~~~~~~~~~~~~~~~~ + +To get back to the original ``Series`` or NumPy array, use +``Series.astype(original_dtype)`` or ``np.asarray(categorical)``: + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"]) + s + s2 = s.astype("category") + s2 + s2.astype(str) + np.asarray(s2) + +.. note:: + + In contrast to R's ``factor`` function, categorical data is not converting input values to + strings; categories will end up the same data type as the original values. + +.. note:: + + In contrast to R's ``factor`` function, there is currently no way to assign/change labels at + creation time. Use ``categories`` to change the categories after creation time. + +.. _categorical.categoricaldtype: + +CategoricalDtype +---------------- + +A categorical's type is fully described by + +1. ``categories``: a sequence of unique values and no missing values +2. ``ordered``: a boolean + +This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`. +The ``categories`` argument is optional, which implies that the actual categories +should be inferred from whatever is present in the data when the +:class:`pandas.Categorical` is created. The categories are assumed to be unordered +by default. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + CategoricalDtype(["a", "b", "c"]) + CategoricalDtype(["a", "b", "c"], ordered=True) + CategoricalDtype() + +A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas +expects a ``dtype``. For example :func:`pandas.read_csv`, +:func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. + +.. note:: + + As a convenience, you can use the string ``'category'`` in place of a + :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of + the categories being unordered, and equal to the set values present in the + array. In other words, ``dtype='category'`` is equivalent to + ``dtype=CategoricalDtype()``. + +Equality semantics +~~~~~~~~~~~~~~~~~~ + +Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal +whenever they have the same categories and order. When comparing two +unordered categoricals, the order of the ``categories`` is not considered. + +.. ipython:: python + + c1 = CategoricalDtype(["a", "b", "c"], ordered=False) + + # Equal, since order is not considered when ordered=False + c1 == CategoricalDtype(["b", "c", "a"], ordered=False) + + # Unequal, since the second CategoricalDtype is ordered + c1 == CategoricalDtype(["a", "b", "c"], ordered=True) + +All instances of ``CategoricalDtype`` compare equal to the string ``'category'``. + +.. ipython:: python + + c1 == "category" + +.. warning:: + + Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``, + and since all instances ``CategoricalDtype`` compare equal to ``'category'``, + all instances of ``CategoricalDtype`` compare equal to a + ``CategoricalDtype(None, False)``, regardless of ``categories`` or + ``ordered``. + +Description +----------- + +Using :meth:`~DataFrame.describe` on categorical data will produce similar +output to a ``Series`` or ``DataFrame`` of type ``string``. + +.. ipython:: python + + cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"]) + df = pd.DataFrame({"cat": cat, "s": ["a", "c", "c", np.nan]}) + df.describe() + df["cat"].describe() + +.. _categorical.cat: + +Working with categories +----------------------- + +Categorical data has a ``categories`` and a ``ordered`` property, which list their +possible values and whether the ordering matters or not. These properties are +exposed as ``s.cat.categories`` and ``s.cat.ordered``. If you don't manually +specify categories and ordering, they are inferred from the passed arguments. + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s.cat.categories + s.cat.ordered + +It's also possible to pass in the categories in a specific order: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"])) + s.cat.categories + s.cat.ordered + +.. note:: + + New categorical data are **not** automatically ordered. You must explicitly + pass ``ordered=True`` to indicate an ordered ``Categorical``. + + +.. note:: + + The result of :meth:`~Series.unique` is not always the same as ``Series.cat.categories``, + because ``Series.unique()`` has a couple of guarantees, namely that it returns categories + in the order of appearance, and it only includes values that are actually present. + + .. ipython:: python + + s = pd.Series(list("babc")).astype(CategoricalDtype(list("abcd"))) + s + + # categories + s.cat.categories + + # uniques + s.unique() + +Renaming categories +~~~~~~~~~~~~~~~~~~~ + +Renaming categories is done by using the +:meth:`~pandas.Categorical.rename_categories` method: + + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s + new_categories = ["Group %s" % g for g in s.cat.categories] + s = s.cat.rename_categories(new_categories) + s + # You can also pass a dict-like object to map the renaming + s = s.cat.rename_categories({1: "x", 2: "y", 3: "z"}) + s + +.. note:: + + In contrast to R's ``factor``, categorical data can have categories of other types than string. + +.. note:: + + Be aware that assigning new categories is an inplace operation, while most other operations + under ``Series.cat`` per default return a new ``Series`` of dtype ``category``. + +Categories must be unique or a ``ValueError`` is raised: + +.. ipython:: python + + try: + s = s.cat.rename_categories([1, 1, 1]) + except ValueError as e: + print("ValueError:", str(e)) + +Categories must also not be ``NaN`` or a ``ValueError`` is raised: + +.. ipython:: python + + try: + s = s.cat.rename_categories([1, 2, np.nan]) + except ValueError as e: + print("ValueError:", str(e)) + +Appending new categories +~~~~~~~~~~~~~~~~~~~~~~~~ + +Appending categories can be done by using the +:meth:`~pandas.Categorical.add_categories` method: + +.. ipython:: python + + s = s.cat.add_categories([4]) + s.cat.categories + s + +Removing categories +~~~~~~~~~~~~~~~~~~~ + +Removing categories can be done by using the +:meth:`~pandas.Categorical.remove_categories` method. Values which are removed +are replaced by ``np.nan``.: + +.. ipython:: python + + s = s.cat.remove_categories([4]) + s + +Removing unused categories +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Removing unused categories can also be done: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", "c", "d"])) + s + s.cat.remove_unused_categories() + +Setting categories +~~~~~~~~~~~~~~~~~~ + +If you want to do remove and add new categories in one step (which has some +speed advantage), or simply set the categories to a predefined scale, +use :meth:`~pandas.Categorical.set_categories`. + + +.. ipython:: python + + s = pd.Series(["one", "two", "four", "-"], dtype="category") + s + s = s.cat.set_categories(["one", "two", "three", "four"]) + s + +.. note:: + Be aware that :func:`Categorical.set_categories` cannot know whether some category is omitted + intentionally or because it is misspelled or (under Python3) due to a type difference (e.g., + NumPy S1 dtype and Python strings). This can result in surprising behaviour! + +Sorting and order +----------------- + +.. _categorical.sort: + +If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a +meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a ``TypeError``. + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "a"], ordered=False)) + s.sort_values(inplace=True) + s = pd.Series(["a", "b", "c", "a"]).astype(CategoricalDtype(ordered=True)) + s.sort_values(inplace=True) + s + s.min(), s.max() + +You can set categorical data to be ordered by using ``as_ordered()`` or unordered by using ``as_unordered()``. These will by +default return a *new* object. + +.. ipython:: python + + s.cat.as_ordered() + s.cat.as_unordered() + +Sorting will use the order defined by categories, not any lexical order present on the data type. +This is even true for strings and numeric data: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 1], dtype="category") + s = s.cat.set_categories([2, 3, 1], ordered=True) + s + s.sort_values(inplace=True) + s + s.min(), s.max() + + +Reordering +~~~~~~~~~~ + +Reordering the categories is possible via the :meth:`Categorical.reorder_categories` and +the :meth:`Categorical.set_categories` methods. For :meth:`Categorical.reorder_categories`, all +old categories must be included in the new categories and no new categories are allowed. This will +necessarily make the sort order the same as the categories order. + +.. ipython:: python + + s = pd.Series([1, 2, 3, 1], dtype="category") + s = s.cat.reorder_categories([2, 3, 1], ordered=True) + s + s.sort_values(inplace=True) + s + s.min(), s.max() + +.. note:: + + Note the difference between assigning new categories and reordering the categories: the first + renames categories and therefore the individual values in the ``Series``, but if the first + position was sorted last, the renamed value will still be sorted last. Reordering means that the + way values are sorted is different afterwards, but not that individual values in the + ``Series`` are changed. + +.. note:: + + If the ``Categorical`` is not ordered, :meth:`Series.min` and :meth:`Series.max` will raise + ``TypeError``. Numeric operations like ``+``, ``-``, ``*``, ``/`` and operations based on them + (e.g. :meth:`Series.median`, which would need to compute the mean between two values if the length + of an array is even) do not work and raise a ``TypeError``. + +Multi column sorting +~~~~~~~~~~~~~~~~~~~~ + +A categorical dtyped column will participate in a multi-column sort in a similar manner to other columns. +The ordering of the categorical is determined by the ``categories`` of that column. + +.. ipython:: python + + dfs = pd.DataFrame( + { + "A": pd.Categorical( + list("bbeebbaa"), + categories=["e", "a", "b"], + ordered=True, + ), + "B": [1, 2, 1, 2, 2, 1, 2, 1], + } + ) + dfs.sort_values(by=["A", "B"]) + +Reordering the ``categories`` changes a future sort. + +.. ipython:: python + + dfs["A"] = dfs["A"].cat.reorder_categories(["a", "b", "e"]) + dfs.sort_values(by=["A", "B"]) + +Comparisons +----------- + +Comparing categorical data with other objects is possible in three cases: + +* Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, + ...) of the same length as the categorical data. +* All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to + another categorical Series, when ``ordered==True`` and the ``categories`` are the same. +* All comparisons of a categorical data to a scalar. + +All other comparisons, especially "non-equality" comparisons of two categoricals with different +categories or a categorical with any list-like object, will raise a ``TypeError``. + +.. note:: + + Any "non-equality" comparisons of categorical data with a ``Series``, ``np.array``, ``list`` or + categorical data with different categories or ordering will raise a ``TypeError`` because custom + categories ordering could be interpreted in two ways: one with taking into account the + ordering and one without. + +.. ipython:: python + + cat = pd.Series([1, 2, 3]).astype(CategoricalDtype([3, 2, 1], ordered=True)) + cat_base = pd.Series([2, 2, 2]).astype(CategoricalDtype([3, 2, 1], ordered=True)) + cat_base2 = pd.Series([2, 2, 2]).astype(CategoricalDtype(ordered=True)) + + cat + cat_base + cat_base2 + +Comparing to a categorical with the same categories and ordering or to a scalar works: + +.. ipython:: python + + cat > cat_base + cat > 2 + +Equality comparisons work with any list-like object of same length and scalars: + +.. ipython:: python + + cat == cat_base + cat == np.array([1, 2, 3]) + cat == 2 + +This doesn't work because the categories are not the same: + +.. ipython:: python + + try: + cat > cat_base2 + except TypeError as e: + print("TypeError:", str(e)) + +If you want to do a "non-equality" comparison of a categorical series with a list-like object +which is not categorical data, you need to be explicit and convert the categorical data back to +the original values: + +.. ipython:: python + + base = np.array([1, 2, 3]) + + try: + cat > base + except TypeError as e: + print("TypeError:", str(e)) + + np.asarray(cat) > base + +When you compare two unordered categoricals with the same categories, the order is not considered: + +.. ipython:: python + + c1 = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=False) + c2 = pd.Categorical(["a", "b"], categories=["b", "a"], ordered=False) + c1 == c2 + +Operations +---------- + +Apart from :meth:`Series.min`, :meth:`Series.max` and :meth:`Series.mode`, the +following operations are possible with categorical data: + +``Series`` methods like :meth:`Series.value_counts` will use all categories, +even if some categories are not present in the data: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"])) + s.value_counts() + +``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. + +.. ipython:: python + + columns = pd.Categorical( + ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True + ) + df = pd.DataFrame( + data=[[1, 2, 3], [4, 5, 6]], + columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]), + ) + df.groupby(axis=1, level=1).sum() + +Groupby will also show "unused" categories: + +.. ipython:: python + + cats = pd.Categorical( + ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] + ) + df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]}) + df.groupby("cats").mean() + + cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) + df2 = pd.DataFrame( + { + "cats": cats2, + "B": ["c", "d", "c", "d"], + "values": [1, 2, 3, 4], + } + ) + df2.groupby(["cats", "B"]).mean() + + +Pivot tables: + +.. ipython:: python + + raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) + df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]}) + pd.pivot_table(df, values="values", index=["A", "B"]) + +Data munging +------------ + +The optimized pandas data access methods ``.loc``, ``.iloc``, ``.at``, and ``.iat``, +work as normal. The only difference is the return type (for getting) and +that only values already in ``categories`` can be assigned. + +Getting +~~~~~~~ + +If the slicing operation returns either a ``DataFrame`` or a column of type +``Series``, the ``category`` dtype is preserved. + +.. ipython:: python + + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats = pd.Series(["a", "b", "b", "b", "c", "c", "c"], dtype="category", index=idx) + values = [1, 2, 2, 2, 3, 4, 5] + df = pd.DataFrame({"cats": cats, "values": values}, index=idx) + df.iloc[2:4, :] + df.iloc[2:4, :].dtypes + df.loc["h":"j", "cats"] + df[df["cats"] == "b"] + +An example where the category type is not preserved is if you take one single +row: the resulting ``Series`` is of dtype ``object``: + +.. ipython:: python + + # get the complete "h" row as a Series + df.loc["h", :] + +Returning a single item from categorical data will also return the value, not a categorical +of length "1". + +.. ipython:: python + + df.iat[0, 0] + df["cats"] = df["cats"].cat.rename_categories(["x", "y", "z"]) + df.at["h", "cats"] # returns a string + +.. note:: + The is in contrast to R's ``factor`` function, where ``factor(c(1,2,3))[1]`` + returns a single value ``factor``. + +To get a single value ``Series`` of type ``category``, you pass in a list with +a single value: + +.. ipython:: python + + df.loc[["h"], "cats"] + +String and datetime accessors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The accessors ``.dt`` and ``.str`` will work if the ``s.cat.categories`` are of +an appropriate type: + + +.. ipython:: python + + str_s = pd.Series(list("aabb")) + str_cat = str_s.astype("category") + str_cat + str_cat.str.contains("a") + + date_s = pd.Series(pd.date_range("1/1/2015", periods=5)) + date_cat = date_s.astype("category") + date_cat + date_cat.dt.day + +.. note:: + + The returned ``Series`` (or ``DataFrame``) is of the same type as if you used the + ``.str.`` / ``.dt.`` on a ``Series`` of that type (and not of + type ``category``!). + +That means, that the returned values from methods and properties on the accessors of a +``Series`` and the returned values from methods and properties on the accessors of this +``Series`` transformed to one of type ``category`` will be equal: + +.. ipython:: python + + ret_s = str_s.str.contains("a") + ret_cat = str_cat.str.contains("a") + ret_s.dtype == ret_cat.dtype + ret_s == ret_cat + +.. note:: + + The work is done on the ``categories`` and then a new ``Series`` is constructed. This has + some performance implication if you have a ``Series`` of type string, where lots of elements + are repeated (i.e. the number of unique elements in the ``Series`` is a lot smaller than the + length of the ``Series``). In this case it can be faster to convert the original ``Series`` + to one of type ``category`` and use ``.str.`` or ``.dt.`` on that. + +Setting +~~~~~~~ + +Setting values in a categorical column (or ``Series``) works as long as the +value is included in the ``categories``: + +.. ipython:: python + + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats = pd.Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"]) + values = [1, 1, 1, 1, 1, 1, 1] + df = pd.DataFrame({"cats": cats, "values": values}, index=idx) + + df.iloc[2:4, :] = [["b", 2], ["b", 2]] + df + try: + df.iloc[2:4, :] = [["c", 3], ["c", 3]] + except TypeError as e: + print("TypeError:", str(e)) + +Setting values by assigning categorical data will also check that the ``categories`` match: + +.. ipython:: python + + df.loc["j":"k", "cats"] = pd.Categorical(["a", "a"], categories=["a", "b"]) + df + try: + df.loc["j":"k", "cats"] = pd.Categorical(["b", "b"], categories=["a", "b", "c"]) + except TypeError as e: + print("TypeError:", str(e)) + +Assigning a ``Categorical`` to parts of a column of other types will use the values: + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]}) + df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df + df.dtypes + +.. _categorical.merge: +.. _categorical.concat: + +Merging / concatenation +~~~~~~~~~~~~~~~~~~~~~~~ + +By default, combining ``Series`` or ``DataFrames`` which contain the same +categories results in ``category`` dtype, otherwise results will depend on the +dtype of the underlying categories. Merges that result in non-categorical +dtypes will likely have higher memory usage. Use ``.astype`` or +``union_categoricals`` to ensure ``category`` results. + +.. ipython:: python + + from pandas.api.types import union_categoricals + + # same categories + s1 = pd.Series(["a", "b"], dtype="category") + s2 = pd.Series(["a", "b", "a"], dtype="category") + pd.concat([s1, s2]) + + # different categories + s3 = pd.Series(["b", "c"], dtype="category") + pd.concat([s1, s3]) + + # Output dtype is inferred based on categories values + int_cats = pd.Series([1, 2], dtype="category") + float_cats = pd.Series([3.0, 4.0], dtype="category") + pd.concat([int_cats, float_cats]) + + pd.concat([s1, s3]).astype("category") + union_categoricals([s1.array, s3.array]) + +The following table summarizes the results of merging ``Categoricals``: + ++-------------------+------------------------+----------------------+-----------------------------+ +| arg1 | arg2 | identical | result | ++===================+========================+======================+=============================+ +| category | category | True | category | ++-------------------+------------------------+----------------------+-----------------------------+ +| category (object) | category (object) | False | object (dtype is inferred) | ++-------------------+------------------------+----------------------+-----------------------------+ +| category (int) | category (float) | False | float (dtype is inferred) | ++-------------------+------------------------+----------------------+-----------------------------+ + +See also the section on :ref:`merge dtypes` for notes about +preserving merge dtypes and performance. + +.. _categorical.union: + +Unioning +~~~~~~~~ + +If you want to combine categoricals that do not necessarily have the same +categories, the :func:`~pandas.api.types.union_categoricals` function will +combine a list-like of categoricals. The new categories will be the union of +the categories being combined. + +.. ipython:: python + + from pandas.api.types import union_categoricals + + a = pd.Categorical(["b", "c"]) + b = pd.Categorical(["a", "b"]) + union_categoricals([a, b]) + +By default, the resulting categories will be ordered as +they appear in the data. If you want the categories to +be lexsorted, use ``sort_categories=True`` argument. + +.. ipython:: python + + union_categoricals([a, b], sort_categories=True) + +``union_categoricals`` also works with the "easy" case of combining two +categoricals of the same categories and order information +(e.g. what you could also ``append`` for). + +.. ipython:: python + + a = pd.Categorical(["a", "b"], ordered=True) + b = pd.Categorical(["a", "b", "a"], ordered=True) + union_categoricals([a, b]) + +The below raises ``TypeError`` because the categories are ordered and not identical. + +.. code-block:: ipython + + In [1]: a = pd.Categorical(["a", "b"], ordered=True) + In [2]: b = pd.Categorical(["a", "b", "c"], ordered=True) + In [3]: union_categoricals([a, b]) + Out[3]: + TypeError: to union ordered Categoricals, all categories must be the same + +Ordered categoricals with different categories or orderings can be combined by +using the ``ignore_ordered=True`` argument. + +.. ipython:: python + + a = pd.Categorical(["a", "b", "c"], ordered=True) + b = pd.Categorical(["c", "b", "a"], ordered=True) + union_categoricals([a, b], ignore_order=True) + +:func:`~pandas.api.types.union_categoricals` also works with a +``CategoricalIndex``, or ``Series`` containing categorical data, but note that +the resulting array will always be a plain ``Categorical``: + +.. ipython:: python + + a = pd.Series(["b", "c"], dtype="category") + b = pd.Series(["a", "b"], dtype="category") + union_categoricals([a, b]) + +.. note:: + + ``union_categoricals`` may recode the integer codes for categories + when combining categoricals. This is likely what you want, + but if you are relying on the exact numbering of the categories, be + aware. + + .. ipython:: python + + c1 = pd.Categorical(["b", "c"]) + c2 = pd.Categorical(["a", "b"]) + + c1 + # "b" is coded to 0 + c1.codes + + c2 + # "b" is coded to 1 + c2.codes + + c = union_categoricals([c1, c2]) + c + # "b" is coded to 0 throughout, same as c1, different from c2 + c.codes + + +Getting data in/out +------------------- + +You can write data that contains ``category`` dtypes to a ``HDFStore``. +See :ref:`here ` for an example and caveats. + +It is also possible to write data to and reading data from *Stata* format files. +See :ref:`here ` for an example and caveats. + +Writing to a CSV file will convert the data, effectively removing any information about the +categorical (categories and ordering). So if you read back the CSV file you have to convert the +relevant columns back to ``category`` and assign the right categories and categories ordering. + +.. ipython:: python + :okwarning: + + import io + + s = pd.Series(pd.Categorical(["a", "b", "b", "a", "a", "d"])) + # rename the categories + s = s.cat.rename_categories(["very good", "good", "bad"]) + # reorder the categories and add missing categories + s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) + df = pd.DataFrame({"cats": s, "vals": [1, 2, 3, 4, 5, 6]}) + csv = io.StringIO() + df.to_csv(csv) + df2 = pd.read_csv(io.StringIO(csv.getvalue())) + df2.dtypes + df2["cats"] + # Redo the category + df2["cats"] = df2["cats"].astype("category") + df2["cats"].cat.set_categories( + ["very bad", "bad", "medium", "good", "very good"], inplace=True + ) + df2.dtypes + df2["cats"] + +The same holds for writing to a SQL database with ``to_sql``. + +Missing data +------------ + +pandas primarily uses the value ``np.nan`` to represent missing data. It is by +default not included in computations. See the :ref:`Missing Data section +`. + +Missing values should **not** be included in the Categorical's ``categories``, +only in the ``values``. +Instead, it is understood that NaN is different, and is always a possibility. +When working with the Categorical's ``codes``, missing values will always have +a code of ``-1``. + +.. ipython:: python + + s = pd.Series(["a", "b", np.nan, "a"], dtype="category") + # only two categories + s + s.cat.codes + + +Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series.fillna`, +:meth:`~Series.dropna`, all work normally: + +.. ipython:: python + + s = pd.Series(["a", "b", np.nan], dtype="category") + s + pd.isna(s) + s.fillna("a") + +Differences to R's ``factor`` +----------------------------- + +The following differences to R's factor functions can be observed: + +* R's ``levels`` are named ``categories``. +* R's ``levels`` are always of type string, while ``categories`` in pandas can be of any dtype. +* It's not possible to specify labels at creation time. Use ``s.cat.rename_categories(new_labels)`` + afterwards. +* In contrast to R's ``factor`` function, using categorical data as the sole input to create a + new categorical series will *not* remove unused categories but create a new categorical series + which is equal to the passed in one! +* R allows for missing values to be included in its ``levels`` (pandas' ``categories``). pandas + does not allow ``NaN`` categories, but missing values can still be in the ``values``. + + +Gotchas +------- + +.. _categorical.rfactor: + +Memory usage +~~~~~~~~~~~~ + +.. _categorical.memory: + +The memory usage of a ``Categorical`` is proportional to the number of categories plus the length of the data. In contrast, +an ``object`` dtype is a constant times the length of the data. + +.. ipython:: python + + s = pd.Series(["foo", "bar"] * 1000) + + # object dtype + s.nbytes + + # category dtype + s.astype("category").nbytes + +.. note:: + + If the number of categories approaches the length of the data, the ``Categorical`` will use nearly the same or + more memory than an equivalent ``object`` dtype representation. + + .. ipython:: python + + s = pd.Series(["foo%04d" % i for i in range(2000)]) + + # object dtype + s.nbytes + + # category dtype + s.astype("category").nbytes + + +``Categorical`` is not a ``numpy`` array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, categorical data and the underlying ``Categorical`` is implemented as a Python +object and not as a low-level NumPy array dtype. This leads to some problems. + +NumPy itself doesn't know about the new ``dtype``: + +.. ipython:: python + + try: + np.dtype("category") + except TypeError as e: + print("TypeError:", str(e)) + + dtype = pd.Categorical(["a"]).dtype + try: + np.dtype(dtype) + except TypeError as e: + print("TypeError:", str(e)) + +Dtype comparisons work: + +.. ipython:: python + + dtype == np.str_ + np.str_ == dtype + +To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: + +.. ipython:: python + + hasattr(pd.Series(["a"], dtype="category"), "cat") + hasattr(pd.Series(["a"]), "cat") + +Using NumPy functions on a ``Series`` of type ``category`` should not work as ``Categoricals`` +are not numeric data (even in the case that ``.categories`` is numeric). + +.. ipython:: python + + s = pd.Series(pd.Categorical([1, 2, 3, 4])) + try: + np.sum(s) + # same with np.log(s),... + except TypeError as e: + print("TypeError:", str(e)) + +.. note:: + If such a function works, please file a bug at https://github.com/pandas-dev/pandas! + +dtype in apply +~~~~~~~~~~~~~~ + +pandas currently does not preserve the dtype in apply functions: If you apply along rows you get +a ``Series`` of ``object`` ``dtype`` (same as getting a row -> getting one element will return a +basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected. +You can use ``fillna`` to handle missing values before applying a function. + +.. ipython:: python + + df = pd.DataFrame( + { + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"], + "cats": pd.Categorical([1, 2, 3, 2]), + } + ) + df.apply(lambda row: type(row["cats"]), axis=1) + df.apply(lambda col: col.dtype, axis=0) + +Categorical index +~~~~~~~~~~~~~~~~~ + +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. +See the :ref:`advanced indexing docs ` for a more detailed +explanation. + +Setting the index will create a ``CategoricalIndex``: + +.. ipython:: python + + cats = pd.Categorical([1, 2, 3, 4], categories=[4, 2, 3, 1]) + strings = ["a", "b", "c", "d"] + values = [4, 2, 3, 1] + df = pd.DataFrame({"strings": strings, "values": values}, index=cats) + df.index + # This now sorts by the categories order + df.sort_index() + +Side effects +~~~~~~~~~~~~ + +Constructing a ``Series`` from a ``Categorical`` will not copy the input +``Categorical``. This means that changes to the ``Series`` will in most cases +change the original ``Categorical``: + +.. ipython:: python + :okwarning: + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat") + cat + s.iloc[0:2] = 10 + cat + df = pd.DataFrame(s) + df["cat"].cat.categories = [1, 2, 3, 4, 5] + cat + +Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: + +.. ipython:: python + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat", copy=True) + cat + s.iloc[0:2] = 10 + cat + +.. note:: + + This also happens in some cases when you supply a NumPy array instead of a ``Categorical``: + using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using + a string array (e.g. ``np.array(["a","b","c","a"])``) will not. diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst new file mode 100644 index 00000000..571f8980 --- /dev/null +++ b/doc/source/user_guide/dsintro.rst @@ -0,0 +1,849 @@ +.. _dsintro: + +{{ header }} + +************************ +Intro to data structures +************************ + +We'll start with a quick, non-comprehensive overview of the fundamental data +structures in pandas to get you started. The fundamental behavior about data +types, indexing, axis labeling, and alignment apply across all of the +objects. To get started, import NumPy and load pandas into your namespace: + +.. ipython:: python + + import numpy as np + import pandas as pd + +Fundamentally, **data alignment is intrinsic**. The link +between labels and data will not be broken unless done so explicitly by you. + +We'll give a brief intro to the data structures, then consider all of the broad +categories of functionality and methods in separate sections. + +.. _basics.series: + +Series +------ + +:class:`Series` is a one-dimensional labeled array capable of holding any data +type (integers, strings, floating point numbers, Python objects, etc.). The axis +labels are collectively referred to as the **index**. The basic method to create a :class:`Series` is to call: + +:: + + >>> s = pd.Series(data, index=index) + +Here, ``data`` can be many different things: + +* a Python dict +* an ndarray +* a scalar value (like 5) + +The passed **index** is a list of axis labels. Thus, this separates into a few +cases depending on what **data is**: + +**From ndarray** + +If ``data`` is an ndarray, **index** must be the same length as **data**. If no +index is passed, one will be created having values ``[0, ..., len(data) - 1]``. + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + s + s.index + + pd.Series(np.random.randn(5)) + +.. note:: + + pandas supports non-unique index values. If an operation + that does not support duplicate index values is attempted, an exception + will be raised at that time. + +**From dict** + +:class:`Series` can be instantiated from dicts: + +.. ipython:: python + + d = {"b": 1, "a": 0, "c": 2} + pd.Series(d) + +If an index is passed, the values in data corresponding to the labels in the +index will be pulled out. + +.. ipython:: python + + d = {"a": 0.0, "b": 1.0, "c": 2.0} + pd.Series(d) + pd.Series(d, index=["b", "c", "d", "a"]) + +.. note:: + + NaN (not a number) is the standard missing data marker used in pandas. + +**From scalar value** + +If ``data`` is a scalar value, an index must be +provided. The value will be repeated to match the length of **index**. + +.. ipython:: python + + pd.Series(5.0, index=["a", "b", "c", "d", "e"]) + +Series is ndarray-like +~~~~~~~~~~~~~~~~~~~~~~ + +:class:`Series` acts very similarly to a ``ndarray`` and is a valid argument to most NumPy functions. +However, operations such as slicing will also slice the index. + +.. ipython:: python + + s[0] + s[:3] + s[s > s.median()] + s[[4, 3, 1]] + np.exp(s) + +.. note:: + + We will address array-based indexing like ``s[[4, 3, 1]]`` + in :ref:`section on indexing `. + +Like a NumPy array, a pandas :class:`Series` has a single :attr:`~Series.dtype`. + +.. ipython:: python + + s.dtype + +This is often a NumPy dtype. However, pandas and 3rd-party libraries +extend NumPy's type system in a few places, in which case the dtype would +be an :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within +pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes` +for more. + +If you need the actual array backing a :class:`Series`, use :attr:`Series.array`. + +.. ipython:: python + + s.array + +Accessing the array can be useful when you need to do some operation without the +index (to disable :ref:`automatic alignment `, for example). + +:attr:`Series.array` will always be an :class:`~pandas.api.extensions.ExtensionArray`. +Briefly, an ExtensionArray is a thin wrapper around one or more *concrete* arrays like a +:class:`numpy.ndarray`. pandas knows how to take an :class:`~pandas.api.extensions.ExtensionArray` and +store it in a :class:`Series` or a column of a :class:`DataFrame`. +See :ref:`basics.dtypes` for more. + +While :class:`Series` is ndarray-like, if you need an *actual* ndarray, then use +:meth:`Series.to_numpy`. + +.. ipython:: python + + s.to_numpy() + +Even if the :class:`Series` is backed by a :class:`~pandas.api.extensions.ExtensionArray`, +:meth:`Series.to_numpy` will return a NumPy ndarray. + +Series is dict-like +~~~~~~~~~~~~~~~~~~~ + +A :class:`Series` is also like a fixed-size dict in that you can get and set values by index +label: + +.. ipython:: python + + s["a"] + s["e"] = 12.0 + s + "e" in s + "f" in s + +If a label is not contained in the index, an exception is raised: + +.. ipython:: python + :okexcept: + + s["f"] + +Using the :meth:`Series.get` method, a missing label will return None or specified default: + +.. ipython:: python + + s.get("f") + + s.get("f", np.nan) + +These labels can also be accessed by :ref:`attribute`. + +Vectorized operations and label alignment with Series +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When working with raw NumPy arrays, looping through value-by-value is usually +not necessary. The same is true when working with :class:`Series` in pandas. +:class:`Series` can also be passed into most NumPy methods expecting an ndarray. + +.. ipython:: python + + s + s + s * 2 + np.exp(s) + +A key difference between :class:`Series` and ndarray is that operations between :class:`Series` +automatically align the data based on label. Thus, you can write computations +without giving consideration to whether the :class:`Series` involved have the same +labels. + +.. ipython:: python + + s[1:] + s[:-1] + +The result of an operation between unaligned :class:`Series` will have the **union** of +the indexes involved. If a label is not found in one :class:`Series` or the other, the +result will be marked as missing ``NaN``. Being able to write code without doing +any explicit data alignment grants immense freedom and flexibility in +interactive data analysis and research. The integrated data alignment features +of the pandas data structures set pandas apart from the majority of related +tools for working with labeled data. + +.. note:: + + In general, we chose to make the default result of operations between + differently indexed objects yield the **union** of the indexes in order to + avoid loss of information. Having an index label, though the data is + missing, is typically important information as part of a computation. You + of course have the option of dropping labels with missing data via the + **dropna** function. + +Name attribute +~~~~~~~~~~~~~~ + +.. _dsintro.name_attribute: + +:class:`Series` also has a ``name`` attribute: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), name="something") + s + s.name + +The :class:`Series` ``name`` can be assigned automatically in many cases, in particular, +when selecting a single column from a :class:`DataFrame`, the ``name`` will be assigned +the column label. + +You can rename a :class:`Series` with the :meth:`pandas.Series.rename` method. + +.. ipython:: python + + s2 = s.rename("different") + s2.name + +Note that ``s`` and ``s2`` refer to different objects. + +.. _basics.dataframe: + +DataFrame +--------- + +:class:`DataFrame` is a 2-dimensional labeled data structure with columns of +potentially different types. You can think of it like a spreadsheet or SQL +table, or a dict of Series objects. It is generally the most commonly used +pandas object. Like Series, DataFrame accepts many different kinds of input: + +* Dict of 1D ndarrays, lists, dicts, or :class:`Series` +* 2-D numpy.ndarray +* `Structured or record + `__ ndarray +* A :class:`Series` +* Another :class:`DataFrame` + +Along with the data, you can optionally pass **index** (row labels) and +**columns** (column labels) arguments. If you pass an index and / or columns, +you are guaranteeing the index and / or columns of the resulting +DataFrame. Thus, a dict of Series plus a specific index will discard all data +not matching up to the passed index. + +If axis labels are not passed, they will be constructed from the input data +based on common sense rules. + +From dict of Series or dicts +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The resulting **index** will be the **union** of the indexes of the various +Series. If there are any nested dicts, these will first be converted to +Series. If no columns are passed, the columns will be the ordered list of dict +keys. + +.. ipython:: python + + d = { + "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]), + "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]), + } + df = pd.DataFrame(d) + df + + pd.DataFrame(d, index=["d", "b", "a"]) + pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"]) + +The row and column labels can be accessed respectively by accessing the +**index** and **columns** attributes: + +.. note:: + + When a particular set of columns is passed along with a dict of data, the + passed columns override the keys in the dict. + +.. ipython:: python + + df.index + df.columns + +From dict of ndarrays / lists +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ndarrays must all be the same length. If an index is passed, it must +also be the same length as the arrays. If no index is passed, the +result will be ``range(n)``, where ``n`` is the array length. + +.. ipython:: python + + d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + pd.DataFrame(d) + pd.DataFrame(d, index=["a", "b", "c", "d"]) + +From structured or record array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This case is handled identically to a dict of arrays. + +.. ipython:: python + + data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")]) + data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] + + pd.DataFrame(data) + pd.DataFrame(data, index=["first", "second"]) + pd.DataFrame(data, columns=["C", "A", "B"]) + +.. note:: + + DataFrame is not intended to work exactly like a 2-dimensional NumPy + ndarray. + +.. _basics.dataframe.from_list_of_dicts: + +From a list of dicts +~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}] + pd.DataFrame(data2) + pd.DataFrame(data2, index=["first", "second"]) + pd.DataFrame(data2, columns=["a", "b"]) + +.. _basics.dataframe.from_dict_of_tuples: + +From a dict of tuples +~~~~~~~~~~~~~~~~~~~~~ + +You can automatically create a MultiIndexed frame by passing a tuples +dictionary. + +.. ipython:: python + + pd.DataFrame( + { + ("a", "b"): {("A", "B"): 1, ("A", "C"): 2}, + ("a", "a"): {("A", "C"): 3, ("A", "B"): 4}, + ("a", "c"): {("A", "B"): 5, ("A", "C"): 6}, + ("b", "a"): {("A", "C"): 7, ("A", "B"): 8}, + ("b", "b"): {("A", "D"): 9, ("A", "B"): 10}, + } + ) + +.. _basics.dataframe.from_series: + +From a Series +~~~~~~~~~~~~~ + +The result will be a DataFrame with the same index as the input Series, and +with one column whose name is the original name of the Series (only if no other +column name provided). + +.. ipython:: python + + ser = pd.Series(range(3), index=list("abc"), name="ser") + pd.DataFrame(ser) + +.. _basics.dataframe.from_list_namedtuples: + +From a list of namedtuples +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The field names of the first ``namedtuple`` in the list determine the columns +of the :class:`DataFrame`. The remaining namedtuples (or tuples) are simply unpacked +and their values are fed into the rows of the :class:`DataFrame`. If any of those +tuples is shorter than the first ``namedtuple`` then the later columns in the +corresponding row are marked as missing values. If any are longer than the +first ``namedtuple``, a ``ValueError`` is raised. + +.. ipython:: python + + from collections import namedtuple + + Point = namedtuple("Point", "x y") + + pd.DataFrame([Point(0, 0), Point(0, 3), (2, 3)]) + + Point3D = namedtuple("Point3D", "x y z") + + pd.DataFrame([Point3D(0, 0, 0), Point3D(0, 3, 5), Point(2, 3)]) + + +.. _basics.dataframe.from_list_dataclasses: + +From a list of dataclasses +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +Data Classes as introduced in `PEP557 `__, +can be passed into the DataFrame constructor. +Passing a list of dataclasses is equivalent to passing a list of dictionaries. + +Please be aware, that all values in the list should be dataclasses, mixing +types in the list would result in a ``TypeError``. + +.. ipython:: python + + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + +**Missing data** + +To construct a DataFrame with missing data, we use ``np.nan`` to +represent missing values. Alternatively, you may pass a ``numpy.MaskedArray`` +as the data argument to the DataFrame constructor, and its masked entries will +be considered missing. See :ref:`Missing data ` for more. + +Alternate constructors +~~~~~~~~~~~~~~~~~~~~~~ + +.. _basics.dataframe.from_dict: + +**DataFrame.from_dict** + +:meth:`DataFrame.from_dict` takes a dict of dicts or a dict of array-like sequences +and returns a DataFrame. It operates like the :class:`DataFrame` constructor except +for the ``orient`` parameter which is ``'columns'`` by default, but which can be +set to ``'index'`` in order to use the dict keys as row labels. + + +.. ipython:: python + + pd.DataFrame.from_dict(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])])) + +If you pass ``orient='index'``, the keys will be the row labels. In this +case, you can also pass the desired column names: + +.. ipython:: python + + pd.DataFrame.from_dict( + dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]), + orient="index", + columns=["one", "two", "three"], + ) + +.. _basics.dataframe.from_records: + +**DataFrame.from_records** + +:meth:`DataFrame.from_records` takes a list of tuples or an ndarray with structured +dtype. It works analogously to the normal :class:`DataFrame` constructor, except that +the resulting DataFrame index may be a specific field of the structured +dtype. + +.. ipython:: python + + data + pd.DataFrame.from_records(data, index="C") + +.. _basics.dataframe.sel_add_del: + +Column selection, addition, deletion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can treat a :class:`DataFrame` semantically like a dict of like-indexed :class:`Series` +objects. Getting, setting, and deleting columns works with the same syntax as +the analogous dict operations: + +.. ipython:: python + + df["one"] + df["three"] = df["one"] * df["two"] + df["flag"] = df["one"] > 2 + df + +Columns can be deleted or popped like with a dict: + +.. ipython:: python + + del df["two"] + three = df.pop("three") + df + +When inserting a scalar value, it will naturally be propagated to fill the +column: + +.. ipython:: python + + df["foo"] = "bar" + df + +When inserting a :class:`Series` that does not have the same index as the :class:`DataFrame`, it +will be conformed to the DataFrame's index: + +.. ipython:: python + + df["one_trunc"] = df["one"][:2] + df + +You can insert raw ndarrays but their length must match the length of the +DataFrame's index. + +By default, columns get inserted at the end. :meth:`DataFrame.insert` +inserts at a particular location in the columns: + +.. ipython:: python + + df.insert(1, "bar", df["one"]) + df + +.. _dsintro.chained_assignment: + +Assigning new columns in method chains +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Inspired by `dplyr's +`__ +``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` +method that allows you to easily create new columns that are potentially +derived from existing columns. + +.. ipython:: python + + iris = pd.read_csv("data/iris.data") + iris.head() + iris.assign(sepal_ratio=iris["SepalWidth"] / iris["SepalLength"]).head() + +In the example above, we inserted a precomputed value. We can also pass in +a function of one argument to be evaluated on the DataFrame being assigned to. + +.. ipython:: python + + iris.assign(sepal_ratio=lambda x: (x["SepalWidth"] / x["SepalLength"])).head() + +:meth:`~pandas.DataFrame.assign` **always** returns a copy of the data, leaving the original +DataFrame untouched. + +Passing a callable, as opposed to an actual value to be inserted, is +useful when you don't have a reference to the DataFrame at hand. This is +common when using :meth:`~pandas.DataFrame.assign` in a chain of operations. For example, +we can limit the DataFrame to just those observations with a Sepal Length +greater than 5, calculate the ratio, and plot: + +.. ipython:: python + + @savefig basics_assign.png + ( + iris.query("SepalLength > 5") + .assign( + SepalRatio=lambda x: x.SepalWidth / x.SepalLength, + PetalRatio=lambda x: x.PetalWidth / x.PetalLength, + ) + .plot(kind="scatter", x="SepalRatio", y="PetalRatio") + ) + +Since a function is passed in, the function is computed on the DataFrame +being assigned to. Importantly, this is the DataFrame that's been filtered +to those rows with sepal length greater than 5. The filtering happens first, +and then the ratio calculations. This is an example where we didn't +have a reference to the *filtered* DataFrame available. + +The function signature for :meth:`~pandas.DataFrame.assign` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a :class:`Series` or NumPy array), or a function +of one argument to be called on the :class:`DataFrame`. A *copy* of the original +:class:`DataFrame` is returned, with the new values inserted. + +The order of ``**kwargs`` is preserved. This allows +for *dependent* assignment, where an expression later in ``**kwargs`` can refer +to a column created earlier in the same :meth:`~DataFrame.assign`. + +.. ipython:: python + + dfa = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + dfa.assign(C=lambda x: x["A"] + x["B"], D=lambda x: x["A"] + x["C"]) + +In the second expression, ``x['C']`` will refer to the newly created column, +that's equal to ``dfa['A'] + dfa['B']``. + + +Indexing / selection +~~~~~~~~~~~~~~~~~~~~ +The basics of indexing are as follows: + +.. csv-table:: + :header: "Operation", "Syntax", "Result" + :widths: 30, 20, 10 + + Select column, ``df[col]``, Series + Select row by label, ``df.loc[label]``, Series + Select row by integer location, ``df.iloc[loc]``, Series + Slice rows, ``df[5:10]``, DataFrame + Select rows by boolean vector, ``df[bool_vec]``, DataFrame + +Row selection, for example, returns a :class:`Series` whose index is the columns of the +:class:`DataFrame`: + +.. ipython:: python + + df.loc["b"] + df.iloc[2] + +For a more exhaustive treatment of sophisticated label-based indexing and +slicing, see the :ref:`section on indexing `. We will address the +fundamentals of reindexing / conforming to new sets of labels in the +:ref:`section on reindexing `. + +.. _dsintro.alignment: + +Data alignment and arithmetic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Data alignment between :class:`DataFrame` objects automatically align on **both the +columns and the index (row labels)**. Again, the resulting object will have the +union of the column and row labels. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df2 = pd.DataFrame(np.random.randn(7, 3), columns=["A", "B", "C"]) + df + df2 + +When doing an operation between :class:`DataFrame` and :class:`Series`, the default behavior is +to align the :class:`Series` **index** on the :class:`DataFrame` **columns**, thus `broadcasting +`__ +row-wise. For example: + +.. ipython:: python + + df - df.iloc[0] + +For explicit control over the matching and broadcasting behavior, see the +section on :ref:`flexible binary operations `. + +Arithmetic operations with scalars operate element-wise: + +.. ipython:: python + + df * 5 + 2 + 1 / df + df ** 4 + +.. _dsintro.boolean: + +Boolean operators operate element-wise as well: + +.. ipython:: python + + df1 = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}, dtype=bool) + df2 = pd.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}, dtype=bool) + df1 & df2 + df1 | df2 + df1 ^ df2 + -df1 + +Transposing +~~~~~~~~~~~ + +To transpose, access the ``T`` attribute or :meth:`DataFrame.transpose`, +similar to an ndarray: + +.. ipython:: python + + # only show the first 5 rows + df[:5].T + +.. _dsintro.numpy_interop: + +DataFrame interoperability with NumPy functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most NumPy functions can be called directly on :class:`Series` and :class:`DataFrame`. + +.. ipython:: python + + np.exp(df) + np.asarray(df) + +:class:`DataFrame` is not intended to be a drop-in replacement for ndarray as its +indexing semantics and data model are quite different in places from an n-dimensional +array. + +:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's +`universal functions `_. + +The ufunc is applied to the underlying array in a :class:`Series`. + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4]) + np.exp(ser) + +.. versionchanged:: 0.25.0 + + When multiple :class:`Series` are passed to a ufunc, they are aligned before + performing the operation. + +Like other parts of the library, pandas will automatically align labeled inputs +as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` +on two :class:`Series` with differently ordered labels will align before the operation. + +.. ipython:: python + + ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) + ser2 = pd.Series([1, 3, 5], index=["b", "a", "c"]) + ser1 + ser2 + np.remainder(ser1, ser2) + +As usual, the union of the two indices is taken, and non-overlapping values are filled +with missing values. + +.. ipython:: python + + ser3 = pd.Series([2, 4, 6], index=["b", "c", "d"]) + ser3 + np.remainder(ser1, ser3) + +When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the :class:`Series` +implementation takes precedence and a :class:`Series` is returned. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + idx = pd.Index([4, 5, 6]) + + np.maximum(ser, idx) + +NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays, +for example :class:`arrays.SparseArray` (see :ref:`sparse.calculation`). If possible, +the ufunc is applied without converting the underlying data to an ndarray. + +Console display +~~~~~~~~~~~~~~~ + +A very large :class:`DataFrame` will be truncated to display them in the console. +You can also get a summary using :meth:`~pandas.DataFrame.info`. +(The **baseball** dataset is from the **plyr** R package): + +.. ipython:: python + :suppress: + + # force a summary to be printed + pd.set_option("display.max_rows", 5) + +.. ipython:: python + + baseball = pd.read_csv("data/baseball.csv") + print(baseball) + baseball.info() + +.. ipython:: python + :suppress: + :okwarning: + + # restore GlobalPrintConfig + pd.reset_option(r"^display\.") + +However, using :meth:`DataFrame.to_string` will return a string representation of the +:class:`DataFrame` in tabular form, though it won't always fit the console width: + +.. ipython:: python + + print(baseball.iloc[-20:, :12].to_string()) + +Wide DataFrames will be printed across multiple rows by +default: + +.. ipython:: python + + pd.DataFrame(np.random.randn(3, 12)) + +You can change how much to print on a single row by setting the ``display.width`` +option: + +.. ipython:: python + + pd.set_option("display.width", 40) # default is 80 + + pd.DataFrame(np.random.randn(3, 12)) + +You can adjust the max width of the individual columns by setting ``display.max_colwidth`` + +.. ipython:: python + + datafile = { + "filename": ["filename_01", "filename_02"], + "path": [ + "media/user_name/storage/folder_01/filename_01", + "media/user_name/storage/folder_02/filename_02", + ], + } + + pd.set_option("display.max_colwidth", 30) + pd.DataFrame(datafile) + + pd.set_option("display.max_colwidth", 100) + pd.DataFrame(datafile) + +.. ipython:: python + :suppress: + + pd.reset_option("display.width") + pd.reset_option("display.max_colwidth") + +You can also disable this feature via the ``expand_frame_repr`` option. +This will print the table in one block. + +DataFrame column attribute access and IPython completion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a :class:`DataFrame` column label is a valid Python variable name, the column can be +accessed like an attribute: + +.. ipython:: python + + df = pd.DataFrame({"foo1": np.random.randn(5), "foo2": np.random.randn(5)}) + df + df.foo1 + +The columns are also connected to the `IPython `__ +completion mechanism so they can be tab-completed: + +.. code-block:: ipython + + In [5]: df.foo # noqa: E225, E999 + df.foo1 df.foo2 diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst new file mode 100644 index 00000000..78947898 --- /dev/null +++ b/doc/source/user_guide/duplicates.rst @@ -0,0 +1,206 @@ +.. _duplicates: + +**************** +Duplicate Labels +**************** + +:class:`Index` objects are not required to be unique; you can have duplicate row +or column labels. This may be a bit confusing at first. If you're familiar with +SQL, you know that row labels are similar to a primary key on a table, and you +would never want duplicates in a SQL table. But one of pandas' roles is to clean +messy, real-world data before it goes to some downstream system. And real-world +data has duplicates, even in fields that are supposed to be unique. + +This section describes how duplicate labels change the behavior of certain +operations, and how prevent duplicates from arising during operations, or to +detect them if they do. + +.. ipython:: python + + import pandas as pd + import numpy as np + +Consequences of Duplicate Labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some pandas methods (:meth:`Series.reindex` for example) just don't work with +duplicates present. The output can't be determined, and so pandas raises. + +.. ipython:: python + :okexcept: + :okwarning: + + s1 = pd.Series([0, 1, 2], index=["a", "b", "b"]) + s1.reindex(["a", "b", "c"]) + +Other methods, like indexing, can give very surprising results. Typically +indexing with a scalar will *reduce dimensionality*. Slicing a ``DataFrame`` +with a scalar will return a ``Series``. Slicing a ``Series`` with a scalar will +return a scalar. But with duplicates, this isn't the case. + +.. ipython:: python + + df1 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=["A", "A", "B"]) + df1 + +We have duplicates in the columns. If we slice ``'B'``, we get back a ``Series`` + +.. ipython:: python + + df1["B"] # a series + +But slicing ``'A'`` returns a ``DataFrame`` + + +.. ipython:: python + + df1["A"] # a DataFrame + +This applies to row labels as well + +.. ipython:: python + + df2 = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "a", "b"]) + df2 + df2.loc["b", "A"] # a scalar + df2.loc["a", "A"] # a Series + +Duplicate Label Detection +~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can check whether an :class:`Index` (storing the row or column labels) is +unique with :attr:`Index.is_unique`: + +.. ipython:: python + + df2 + df2.index.is_unique + df2.columns.is_unique + +.. note:: + + Checking whether an index is unique is somewhat expensive for large datasets. + pandas does cache this result, so re-checking on the same index is very fast. + +:meth:`Index.duplicated` will return a boolean ndarray indicating whether a +label is repeated. + +.. ipython:: python + + df2.index.duplicated() + +Which can be used as a boolean filter to drop duplicate rows. + +.. ipython:: python + + df2.loc[~df2.index.duplicated(), :] + +If you need additional logic to handle duplicate labels, rather than just +dropping the repeats, using :meth:`~DataFrame.groupby` on the index is a common +trick. For example, we'll resolve duplicates by taking the average of all rows +with the same label. + +.. ipython:: python + + df2.groupby(level=0).mean() + +.. _duplicates.disallow: + +Disallowing Duplicate Labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.2.0 + +As noted above, handling duplicates is an important feature when reading in raw +data. That said, you may want to avoid introducing duplicates as part of a data +processing pipeline (from methods like :meth:`pandas.concat`, +:meth:`~DataFrame.rename`, etc.). Both :class:`Series` and :class:`DataFrame` +*disallow* duplicate labels by calling ``.set_flags(allows_duplicate_labels=False)``. +(the default is to allow them). If there are duplicate labels, an exception +will be raised. + +.. ipython:: python + :okexcept: + + pd.Series([0, 1, 2], index=["a", "b", "b"]).set_flags(allows_duplicate_labels=False) + +This applies to both row and column labels for a :class:`DataFrame` + +.. ipython:: python + :okexcept: + + pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=["A", "B", "C"],).set_flags( + allows_duplicate_labels=False + ) + +This attribute can be checked or set with :attr:`~DataFrame.flags.allows_duplicate_labels`, +which indicates whether that object can have duplicate labels. + +.. ipython:: python + + df = pd.DataFrame({"A": [0, 1, 2, 3]}, index=["x", "y", "X", "Y"]).set_flags( + allows_duplicate_labels=False + ) + df + df.flags.allows_duplicate_labels + +:meth:`DataFrame.set_flags` can be used to return a new ``DataFrame`` with attributes +like ``allows_duplicate_labels`` set to some value + +.. ipython:: python + + df2 = df.set_flags(allows_duplicate_labels=True) + df2.flags.allows_duplicate_labels + +The new ``DataFrame`` returned is a view on the same data as the old ``DataFrame``. +Or the property can just be set directly on the same object + + +.. ipython:: python + + df2.flags.allows_duplicate_labels = False + df2.flags.allows_duplicate_labels + +When processing raw, messy data you might initially read in the messy data +(which potentially has duplicate labels), deduplicate, and then disallow duplicates +going forward, to ensure that your data pipeline doesn't introduce duplicates. + + +.. code-block:: python + + >>> raw = pd.read_csv("...") + >>> deduplicated = raw.groupby(level=0).first() # remove duplicates + >>> deduplicated.flags.allows_duplicate_labels = False # disallow going forward + +Setting ``allows_duplicate_labels=False`` on a ``Series`` or ``DataFrame`` with duplicate +labels or performing an operation that introduces duplicate labels on a ``Series`` or +``DataFrame`` that disallows duplicates will raise an +:class:`errors.DuplicateLabelError`. + +.. ipython:: python + :okexcept: + + df.rename(str.upper) + +This error message contains the labels that are duplicated, and the numeric positions +of all the duplicates (including the "original") in the ``Series`` or ``DataFrame`` + +Duplicate Label Propagation +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In general, disallowing duplicates is "sticky". It's preserved through +operations. + +.. ipython:: python + :okexcept: + + s1 = pd.Series(0, index=["a", "b"]).set_flags(allows_duplicate_labels=False) + s1 + s1.head().rename({"a": "b"}) + +.. warning:: + + This is an experimental feature. Currently, many methods fail to + propagate the ``allows_duplicate_labels`` value. In future versions + it is expected that every method taking or returning one or more + DataFrame or Series objects will propagate ``allows_duplicate_labels``. diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst new file mode 100644 index 00000000..1a1229f9 --- /dev/null +++ b/doc/source/user_guide/enhancingperf.rst @@ -0,0 +1,866 @@ +.. _enhancingperf: + +{{ header }} + +********************* +Enhancing performance +********************* + +In this part of the tutorial, we will investigate how to speed up certain +functions operating on pandas :class:`DataFrame` using three different techniques: +Cython, Numba and :func:`pandas.eval`. We will see a speed improvement of ~200 +when we use Cython and Numba on a test function operating row-wise on the +:class:`DataFrame`. Using :func:`pandas.eval` we will speed up a sum by an order of +~2. + +.. note:: + + In addition to following the steps in this tutorial, users interested in enhancing + performance are highly encouraged to install the + :ref:`recommended dependencies` for pandas. + These dependencies are often not installed by default, but will offer speed + improvements if present. + +.. _enhancingperf.cython: + +Cython (writing C extensions for pandas) +---------------------------------------- + +For many use cases writing pandas in pure Python and NumPy is sufficient. In some +computationally heavy applications however, it can be possible to achieve sizable +speed-ups by offloading work to `cython `__. + +This tutorial assumes you have refactored as much as possible in Python, for example +by trying to remove for-loops and making use of NumPy vectorization. It's always worth +optimising in Python first. + +This tutorial walks through a "typical" process of cythonizing a slow computation. +We use an `example from the Cython documentation `__ +but in the context of pandas. Our final cythonized solution is around 100 times +faster than the pure Python solution. + +.. _enhancingperf.pure: + +Pure Python +~~~~~~~~~~~ + +We have a :class:`DataFrame` to which we want to apply a function row-wise. + +.. ipython:: python + + df = pd.DataFrame( + { + "a": np.random.randn(1000), + "b": np.random.randn(1000), + "N": np.random.randint(100, 1000, (1000)), + "x": "x", + } + ) + df + +Here's the function in pure Python: + +.. ipython:: python + + def f(x): + return x * (x - 1) + + + def integrate_f(a, b, N): + s = 0 + dx = (b - a) / N + for i in range(N): + s += f(a + i * dx) + return s * dx + +We achieve our result by using :meth:`DataFrame.apply` (row-wise): + +.. ipython:: python + + %timeit df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1) + +But clearly this isn't fast enough for us. Let's take a look and see where the +time is spent during this operation (limited to the most time consuming +four calls) using the `prun ipython magic function `__: + +.. ipython:: python + + %prun -l 4 df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1) # noqa E999 + +By far the majority of time is spend inside either ``integrate_f`` or ``f``, +hence we'll concentrate our efforts cythonizing these two functions. + +.. _enhancingperf.plain: + +Plain Cython +~~~~~~~~~~~~ + +First we're going to need to import the Cython magic function to IPython: + +.. ipython:: python + :okwarning: + + %load_ext Cython + + +Now, let's simply copy our functions over to Cython as is (the suffix +is here to distinguish between function versions): + +.. ipython:: + + In [2]: %%cython + ...: def f_plain(x): + ...: return x * (x - 1) + ...: def integrate_f_plain(a, b, N): + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_plain(a + i * dx) + ...: return s * dx + ...: + +.. note:: + + If you're having trouble pasting the above into your ipython, you may need + to be using bleeding edge IPython for paste to play well with cell magics. + + +.. ipython:: python + + %timeit df.apply(lambda x: integrate_f_plain(x["a"], x["b"], x["N"]), axis=1) + +Already this has shaved a third off, not too bad for a simple copy and paste. + +.. _enhancingperf.type: + +Adding type +~~~~~~~~~~~ + +We get another huge improvement simply by providing type information: + +.. ipython:: + + In [3]: %%cython + ...: cdef double f_typed(double x) except? -2: + ...: return x * (x - 1) + ...: cpdef double integrate_f_typed(double a, double b, int N): + ...: cdef int i + ...: cdef double s, dx + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: + +.. ipython:: python + + %timeit df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1) + +Now, we're talking! It's now over ten times faster than the original Python +implementation, and we haven't *really* modified the code. Let's have another +look at what's eating up time: + +.. ipython:: python + + %prun -l 4 df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1) + +.. _enhancingperf.ndarray: + +Using ndarray +~~~~~~~~~~~~~ + +It's calling series a lot! It's creating a :class:`Series` from each row, and calling get from both +the index and the series (three times for each row). Function calls are expensive +in Python, so maybe we could minimize these by cythonizing the apply part. + +.. note:: + + We are now passing ndarrays into the Cython function, fortunately Cython plays + very nicely with NumPy. + +.. ipython:: + + In [4]: %%cython + ...: cimport numpy as np + ...: import numpy as np + ...: cdef double f_typed(double x) except? -2: + ...: return x * (x - 1) + ...: cpdef double integrate_f_typed(double a, double b, int N): + ...: cdef int i + ...: cdef double s, dx + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b, + ...: np.ndarray col_N): + ...: assert (col_a.dtype == np.float_ + ...: and col_b.dtype == np.float_ and col_N.dtype == np.int_) + ...: cdef Py_ssize_t i, n = len(col_N) + ...: assert (len(col_a) == len(col_b) == n) + ...: cdef np.ndarray[double] res = np.empty(n) + ...: for i in range(len(col_a)): + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) + ...: return res + ...: + + +The implementation is simple, it creates an array of zeros and loops over +the rows, applying our ``integrate_f_typed``, and putting this in the zeros array. + + +.. warning:: + + You can **not pass** a :class:`Series` directly as a ``ndarray`` typed parameter + to a Cython function. Instead pass the actual ``ndarray`` using the + :meth:`Series.to_numpy`. The reason is that the Cython + definition is specific to an ndarray and not the passed :class:`Series`. + + So, do not do this: + + .. code-block:: python + + apply_integrate_f(df["a"], df["b"], df["N"]) + + But rather, use :meth:`Series.to_numpy` to get the underlying ``ndarray``: + + .. code-block:: python + + apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) + +.. note:: + + Loops like this would be *extremely* slow in Python, but in Cython looping + over NumPy arrays is *fast*. + +.. ipython:: python + + %timeit apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) + +We've gotten another big improvement. Let's check again where the time is spent: + +.. ipython:: python + + %prun -l 4 apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) + +As one might expect, the majority of the time is now spent in ``apply_integrate_f``, +so if we wanted to make anymore efficiencies we must continue to concentrate our +efforts here. + +.. _enhancingperf.boundswrap: + +More advanced techniques +~~~~~~~~~~~~~~~~~~~~~~~~ + +There is still hope for improvement. Here's an example of using some more +advanced Cython techniques: + +.. ipython:: + + In [5]: %%cython + ...: cimport cython + ...: cimport numpy as np + ...: import numpy as np + ...: cdef np.float64_t f_typed(np.float64_t x) except? -2: + ...: return x * (x - 1) + ...: cpdef np.float64_t integrate_f_typed(np.float64_t a, np.float64_t b, np.int64_t N): + ...: cdef np.int64_t i + ...: cdef np.float64_t s = 0.0, dx + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: @cython.boundscheck(False) + ...: @cython.wraparound(False) + ...: cpdef np.ndarray[np.float64_t] apply_integrate_f_wrap( + ...: np.ndarray[np.float64_t] col_a, + ...: np.ndarray[np.float64_t] col_b, + ...: np.ndarray[np.int64_t] col_N + ...: ): + ...: cdef np.int64_t i, n = len(col_N) + ...: assert len(col_a) == len(col_b) == n + ...: cdef np.ndarray[np.float64_t] res = np.empty(n, dtype=np.float64) + ...: for i in range(n): + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) + ...: return res + ...: + +.. ipython:: python + + %timeit apply_integrate_f_wrap(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) + +Even faster, with the caveat that a bug in our Cython code (an off-by-one error, +for example) might cause a segfault because memory access isn't checked. +For more about ``boundscheck`` and ``wraparound``, see the Cython docs on +`compiler directives `__. + +.. _enhancingperf.numba: + +Numba (JIT compilation) +----------------------- + +An alternative to statically compiling Cython code is to use a dynamic just-in-time (JIT) compiler with `Numba `__. + +Numba allows you to write a pure Python function which can be JIT compiled to native machine instructions, similar in performance to C, C++ and Fortran, +by decorating your function with ``@jit``. + +Numba works by generating optimized machine code using the LLVM compiler infrastructure at import time, runtime, or statically (using the included pycc tool). +Numba supports compilation of Python to run on either CPU or GPU hardware and is designed to integrate with the Python scientific software stack. + +.. note:: + + The ``@jit`` compilation will add overhead to the runtime of the function, so performance benefits may not be realized especially when using small data sets. + Consider `caching `__ your function to avoid compilation overhead each time your function is run. + +Numba can be used in 2 ways with pandas: + +#. Specify the ``engine="numba"`` keyword in select pandas methods +#. Define your own Python function decorated with ``@jit`` and pass the underlying NumPy array of :class:`Series` or :class:`DataFrame` (using ``to_numpy()``) into the function + +pandas Numba Engine +~~~~~~~~~~~~~~~~~~~ + +If Numba is installed, one can specify ``engine="numba"`` in select pandas methods to execute the method using Numba. +Methods that support ``engine="numba"`` will also have an ``engine_kwargs`` keyword that accepts a dictionary that allows one to specify +``"nogil"``, ``"nopython"`` and ``"parallel"`` keys with boolean values to pass into the ``@jit`` decorator. +If ``engine_kwargs`` is not specified, it defaults to ``{"nogil": False, "nopython": True, "parallel": False}`` unless otherwise specified. + +In terms of performance, **the first time a function is run using the Numba engine will be slow** +as Numba will have some function compilation overhead. However, the JIT compiled functions are cached, +and subsequent calls will be fast. In general, the Numba engine is performant with +a larger amount of data points (e.g. 1+ million). + +.. code-block:: ipython + + In [1]: data = pd.Series(range(1_000_000)) # noqa: E225 + + In [2]: roll = data.rolling(10) + + In [3]: def f(x): + ...: return np.sum(x) + 5 + # Run the first time, compilation time will affect performance + In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True) + 1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) + # Function is cached and performance will improve + In [5]: %timeit roll.apply(f, engine='numba', raw=True) + 188 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [6]: %timeit roll.apply(f, engine='cython', raw=True) + 3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + +If your compute hardware contains multiple CPUs, the largest performance gain can be realized by setting ``parallel`` to ``True`` +to leverage more than 1 CPU. Internally, pandas leverages numba to parallelize computations over the columns of a :class:`DataFrame`; +therefore, this performance benefit is only beneficial for a :class:`DataFrame` with a large number of columns. + +.. code-block:: ipython + + In [1]: import numba + + In [2]: numba.set_num_threads(1) + + In [3]: df = pd.DataFrame(np.random.randn(10_000, 100)) + + In [4]: roll = df.rolling(100) + + In [5]: %timeit roll.mean(engine="numba", engine_kwargs={"parallel": True}) + 347 ms ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [6]: numba.set_num_threads(2) + + In [7]: %timeit roll.mean(engine="numba", engine_kwargs={"parallel": True}) + 201 ms ± 2.97 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + +Custom Function Examples +~~~~~~~~~~~~~~~~~~~~~~~~ + +A custom Python function decorated with ``@jit`` can be used with pandas objects by passing their NumPy array +representations with ``to_numpy()``. + +.. code-block:: python + + import numba + + + @numba.jit + def f_plain(x): + return x * (x - 1) + + + @numba.jit + def integrate_f_numba(a, b, N): + s = 0 + dx = (b - a) / N + for i in range(N): + s += f_plain(a + i * dx) + return s * dx + + + @numba.jit + def apply_integrate_f_numba(col_a, col_b, col_N): + n = len(col_N) + result = np.empty(n, dtype="float64") + assert len(col_a) == len(col_b) == n + for i in range(n): + result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) + return result + + + def compute_numba(df): + result = apply_integrate_f_numba( + df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() + ) + return pd.Series(result, index=df.index, name="result") + + +.. code-block:: ipython + + In [4]: %timeit compute_numba(df) + 1000 loops, best of 3: 798 us per loop + +In this example, using Numba was faster than Cython. + +Numba can also be used to write vectorized functions that do not require the user to explicitly +loop over the observations of a vector; a vectorized function will be applied to each row automatically. +Consider the following example of doubling each observation: + +.. code-block:: python + + import numba + + + def double_every_value_nonumba(x): + return x * 2 + + + @numba.vectorize + def double_every_value_withnumba(x): # noqa E501 + return x * 2 + +.. code-block:: ipython + + # Custom function without numba + In [5]: %timeit df["col1_doubled"] = df["a"].apply(double_every_value_nonumba) # noqa E501 + 1000 loops, best of 3: 797 us per loop + + # Standard implementation (faster than a custom function) + In [6]: %timeit df["col1_doubled"] = df["a"] * 2 + 1000 loops, best of 3: 233 us per loop + + # Custom function with numba + In [7]: %timeit df["col1_doubled"] = double_every_value_withnumba(df["a"].to_numpy()) + 1000 loops, best of 3: 145 us per loop + +Caveats +~~~~~~~ + +Numba is best at accelerating functions that apply numerical functions to NumPy +arrays. If you try to ``@jit`` a function that contains unsupported `Python `__ +or `NumPy `__ +code, compilation will revert `object mode `__ which +will mostly likely not speed up your function. If you would +prefer that Numba throw an error if it cannot compile a function in a way that +speeds up your code, pass Numba the argument +``nopython=True`` (e.g. ``@jit(nopython=True)``). For more on +troubleshooting Numba modes, see the `Numba troubleshooting page +`__. + +Using ``parallel=True`` (e.g. ``@jit(parallel=True)``) may result in a ``SIGABRT`` if the threading layer leads to unsafe +behavior. You can first `specify a safe threading layer `__ +before running a JIT function with ``parallel=True``. + +Generally if the you encounter a segfault (``SIGSEGV``) while using Numba, please report the issue +to the `Numba issue tracker. `__ + +.. _enhancingperf.eval: + +Expression evaluation via :func:`~pandas.eval` +----------------------------------------------- + +The top-level function :func:`pandas.eval` implements expression evaluation of +:class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. + +.. note:: + + To benefit from using :func:`~pandas.eval` you need to + install ``numexpr``. See the :ref:`recommended dependencies section + ` for more details. + +The point of using :func:`~pandas.eval` for expression evaluation rather than +plain Python is two-fold: 1) large :class:`~pandas.DataFrame` objects are +evaluated more efficiently and 2) large arithmetic and boolean expressions are +evaluated all at once by the underlying engine (by default ``numexpr`` is used +for evaluation). + +.. note:: + + You should not use :func:`~pandas.eval` for simple + expressions or for expressions involving small DataFrames. In fact, + :func:`~pandas.eval` is many orders of magnitude slower for + smaller expressions/objects than plain ol' Python. A good rule of thumb is + to only use :func:`~pandas.eval` when you have a + :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows. + + +:func:`~pandas.eval` supports all arithmetic expressions supported by the +engine in addition to some extensions available only in pandas. + +.. note:: + + The larger the frame and the larger the expression the more speedup you will + see from using :func:`~pandas.eval`. + +Supported syntax +~~~~~~~~~~~~~~~~ + +These operations are supported by :func:`pandas.eval`: + +* Arithmetic operations except for the left shift (``<<``) and right shift + (``>>``) operators, e.g., ``df + 2 * pi / s ** 4 % 42 - the_golden_ratio`` +* Comparison operations, including chained comparisons, e.g., ``2 < df < df2`` +* Boolean operations, e.g., ``df < df2 and df3 < df4 or not df_bool`` +* ``list`` and ``tuple`` literals, e.g., ``[1, 2]`` or ``(1, 2)`` +* Attribute access, e.g., ``df.a`` +* Subscript expressions, e.g., ``df[0]`` +* Simple variable evaluation, e.g., ``pd.eval("df")`` (this is not very useful) +* Math functions: ``sin``, ``cos``, ``exp``, ``log``, ``expm1``, ``log1p``, + ``sqrt``, ``sinh``, ``cosh``, ``tanh``, ``arcsin``, ``arccos``, ``arctan``, ``arccosh``, + ``arcsinh``, ``arctanh``, ``abs``, ``arctan2`` and ``log10``. + +This Python syntax is **not** allowed: + +* Expressions + + * Function calls other than math functions. + * ``is``/``is not`` operations + * ``if`` expressions + * ``lambda`` expressions + * ``list``/``set``/``dict`` comprehensions + * Literal ``dict`` and ``set`` expressions + * ``yield`` expressions + * Generator expressions + * Boolean expressions consisting of only scalar values + +* Statements + + * Neither `simple `__ + nor `compound `__ + statements are allowed. This includes things like ``for``, ``while``, and + ``if``. + + + +:func:`~pandas.eval` examples +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`pandas.eval` works well with expressions containing large arrays. + +First let's create a few decent-sized arrays to play with: + +.. ipython:: python + + nrows, ncols = 20000, 100 + df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for _ in range(4)] + + +Now let's compare adding them together using plain ol' Python versus +:func:`~pandas.eval`: + +.. ipython:: python + + %timeit df1 + df2 + df3 + df4 + +.. ipython:: python + + %timeit pd.eval("df1 + df2 + df3 + df4") + + +Now let's do the same thing but with comparisons: + +.. ipython:: python + + %timeit (df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0) + +.. ipython:: python + + %timeit pd.eval("(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)") + + +:func:`~pandas.eval` also works with unaligned pandas objects: + +.. ipython:: python + + s = pd.Series(np.random.randn(50)) + %timeit df1 + df2 + df3 + df4 + s + +.. ipython:: python + + %timeit pd.eval("df1 + df2 + df3 + df4 + s") + +.. note:: + + Operations such as + + .. code-block:: python + + 1 and 2 # would parse to 1 & 2, but should evaluate to 2 + 3 or 4 # would parse to 3 | 4, but should evaluate to 3 + ~1 # this is okay, but slower when using eval + + should be performed in Python. An exception will be raised if you try to + perform any boolean/bitwise operations with scalar operands that are not + of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of + operations in plain Python. + +The :meth:`DataFrame.eval` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to the top level :func:`pandas.eval` function you can also +evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. + +.. ipython:: python + :suppress: + + try: + del a + except NameError: + pass + + try: + del b + except NameError: + pass + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2), columns=["a", "b"]) + df.eval("a + b") + +Any expression that is a valid :func:`pandas.eval` expression is also a valid +:meth:`DataFrame.eval` expression, with the added benefit that you don't have to +prefix the name of the :class:`~pandas.DataFrame` to the column(s) you're +interested in evaluating. + +In addition, you can perform assignment of columns within an expression. +This allows for *formulaic evaluation*. The assignment target can be a +new column name or an existing column name, and it must be a valid Python +identifier. + +The ``inplace`` keyword determines whether this assignment will performed +on the original :class:`DataFrame` or return a copy with the new column. + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df.eval("c = a + b", inplace=True) + df.eval("d = a + b + c", inplace=True) + df.eval("a = 1", inplace=True) + df + +When ``inplace`` is set to ``False``, the default, a copy of the :class:`DataFrame` with the +new or modified columns is returned and the original frame is unchanged. + +.. ipython:: python + + df + df.eval("e = a - c", inplace=False) + df + +As a convenience, multiple assignments can be performed by using a +multi-line string. + +.. ipython:: python + + df.eval( + """ + c = a + b + d = a + b + c + a = 1""", + inplace=False, + ) + +The equivalent in standard Python would be + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df["c"] = df["a"] + df["b"] + df["d"] = df["a"] + df["b"] + df["c"] + df["a"] = 1 + df + +The :class:`DataFrame.query` method has a ``inplace`` keyword which determines +whether the query modifies the original frame. + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df.query("a > 2") + df.query("a > 2", inplace=True) + df + +Local variables +~~~~~~~~~~~~~~~ + +You must *explicitly reference* any local variable that you want to use in an +expression by placing the ``@`` character in front of the name. For example, + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2), columns=list("ab")) + newcol = np.random.randn(len(df)) + df.eval("b + @newcol") + df.query("b < @newcol") + +If you don't prefix the local variable with ``@``, pandas will raise an +exception telling you the variable is undefined. + +When using :meth:`DataFrame.eval` and :meth:`DataFrame.query`, this allows you +to have a local variable and a :class:`~pandas.DataFrame` column with the same +name in an expression. + + +.. ipython:: python + + a = np.random.randn() + df.query("@a < a") + df.loc[a < df["a"]] # same as the previous expression + +With :func:`pandas.eval` you cannot use the ``@`` prefix *at all*, because it +isn't defined in that context. pandas will let you know this if you try to +use ``@`` in a top-level call to :func:`pandas.eval`. For example, + +.. ipython:: python + :okexcept: + + a, b = 1, 2 + pd.eval("@a + b") + +In this case, you should simply refer to the variables like you would in +standard Python. + +.. ipython:: python + + pd.eval("a + b") + + +:func:`pandas.eval` parsers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are two different parsers and two different engines you can use as +the backend. + +The default ``'pandas'`` parser allows a more intuitive syntax for expressing +query-like operations (comparisons, conjunctions and disjunctions). In +particular, the precedence of the ``&`` and ``|`` operators is made equal to +the precedence of the corresponding boolean operations ``and`` and ``or``. + +For example, the above conjunction can be written without parentheses. +Alternatively, you can use the ``'python'`` parser to enforce strict Python +semantics. + +.. ipython:: python + + expr = "(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)" + x = pd.eval(expr, parser="python") + expr_no_parens = "df1 > 0 & df2 > 0 & df3 > 0 & df4 > 0" + y = pd.eval(expr_no_parens, parser="pandas") + np.all(x == y) + + +The same expression can be "anded" together with the word :keyword:`and` as +well: + +.. ipython:: python + + expr = "(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)" + x = pd.eval(expr, parser="python") + expr_with_ands = "df1 > 0 and df2 > 0 and df3 > 0 and df4 > 0" + y = pd.eval(expr_with_ands, parser="pandas") + np.all(x == y) + + +The ``and`` and ``or`` operators here have the same precedence that they would +in vanilla Python. + + +:func:`pandas.eval` backends +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's also the option to make :func:`~pandas.eval` operate identical to plain +ol' Python. + +.. note:: + + Using the ``'python'`` engine is generally *not* useful, except for testing + other evaluation engines against it. You will achieve **no** performance + benefits using :func:`~pandas.eval` with ``engine='python'`` and in fact may + incur a performance hit. + +You can see this by using :func:`pandas.eval` with the ``'python'`` engine. It +is a bit slower (not by much) than evaluating the same expression in Python + +.. ipython:: python + + %timeit df1 + df2 + df3 + df4 + +.. ipython:: python + + %timeit pd.eval("df1 + df2 + df3 + df4", engine="python") + + +:func:`pandas.eval` performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`~pandas.eval` is intended to speed up certain kinds of operations. In +particular, those operations involving complex expressions with large +:class:`~pandas.DataFrame`/:class:`~pandas.Series` objects should see a +significant performance benefit. Here is a plot showing the running time of +:func:`pandas.eval` as function of the size of the frame involved in the +computation. The two lines are two different engines. + + +.. image:: ../_static/eval-perf.png + + +.. note:: + + Operations with smallish objects (around 15k-20k rows) are faster using + plain Python: + + .. image:: ../_static/eval-perf-small.png + + +This plot was created using a :class:`DataFrame` with 3 columns each containing +floating point values generated using ``numpy.random.randn()``. + +Technical minutia regarding expression evaluation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Expressions that would result in an object dtype or involve datetime operations +(because of ``NaT``) must be evaluated in Python space. The main reason for +this behavior is to maintain backwards compatibility with versions of NumPy < +1.7. In those versions of NumPy a call to ``ndarray.astype(str)`` will +truncate any strings that are more than 60 characters in length. Second, we +can't pass ``object`` arrays to ``numexpr`` thus string comparisons must be +evaluated in Python space. + +The upshot is that this *only* applies to object-dtype expressions. So, if +you have an expression--for example + +.. ipython:: python + + df = pd.DataFrame( + {"strings": np.repeat(list("cba"), 3), "nums": np.repeat(range(3), 3)} + ) + df + df.query("strings == 'a' and nums == 1") + +the numeric part of the comparison (``nums == 1``) will be evaluated by +``numexpr``. + +In general, :meth:`DataFrame.query`/:func:`pandas.eval` will +evaluate the subexpressions that *can* be evaluated by ``numexpr`` and those +that must be evaluated in Python space transparently to the user. This is done +by inferring the result type of an expression from its arguments and operators. diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst new file mode 100644 index 00000000..adb40e16 --- /dev/null +++ b/doc/source/user_guide/gotchas.rst @@ -0,0 +1,411 @@ +.. _gotchas: + +{{ header }} + +******************************** +Frequently Asked Questions (FAQ) +******************************** + +.. _df-memory-usage: + +DataFrame memory usage +---------------------- +The memory usage of a :class:`DataFrame` (including the index) is shown when calling +the :meth:`~DataFrame.info`. A configuration option, ``display.memory_usage`` +(see :ref:`the list of options `), specifies if the +:class:`DataFrame` memory usage will be displayed when invoking the ``df.info()`` +method. + +For example, the memory usage of the :class:`DataFrame` below is shown +when calling :meth:`~DataFrame.info`: + +.. ipython:: python + + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + n = 5000 + data = {t: np.random.randint(100, size=n).astype(t) for t in dtypes} + df = pd.DataFrame(data) + df["categorical"] = df["object"].astype("category") + + df.info() + +The ``+`` symbol indicates that the true memory usage could be higher, because +pandas does not count the memory used by values in columns with +``dtype=object``. + +Passing ``memory_usage='deep'`` will enable a more accurate memory usage report, +accounting for the full usage of the contained objects. This is optional +as it can be expensive to do this deeper introspection. + +.. ipython:: python + + df.info(memory_usage="deep") + +By default the display option is set to ``True`` but can be explicitly +overridden by passing the ``memory_usage`` argument when invoking ``df.info()``. + +The memory usage of each column can be found by calling the +:meth:`~DataFrame.memory_usage` method. This returns a :class:`Series` with an index +represented by column names and memory usage of each column shown in bytes. For +the :class:`DataFrame` above, the memory usage of each column and the total memory +usage can be found with the ``memory_usage`` method: + +.. ipython:: python + + df.memory_usage() + + # total memory usage of dataframe + df.memory_usage().sum() + +By default the memory usage of the :class:`DataFrame` index is shown in the +returned :class:`Series`, the memory usage of the index can be suppressed by passing +the ``index=False`` argument: + +.. ipython:: python + + df.memory_usage(index=False) + +The memory usage displayed by the :meth:`~DataFrame.info` method utilizes the +:meth:`~DataFrame.memory_usage` method to determine the memory usage of a +:class:`DataFrame` while also formatting the output in human-readable units (base-2 +representation; i.e. 1KB = 1024 bytes). + +See also :ref:`Categorical Memory Usage `. + +.. _gotchas.truth: + +Using if/truth statements with pandas +------------------------------------- + +pandas follows the NumPy convention of raising an error when you try to convert +something to a ``bool``. This happens in an ``if``-statement or when using the +boolean operations: ``and``, ``or``, and ``not``. It is not clear what the result +of the following code should be: + +.. code-block:: python + + >>> if pd.Series([False, True, False]): + ... pass + +Should it be ``True`` because it's not zero-length, or ``False`` because there +are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: + +.. ipython:: python + :okexcept: + + if pd.Series([False, True, False]): + print("I was true") + +You need to explicitly choose what you want to do with the :class:`DataFrame`, e.g. +use :meth:`~DataFrame.any`, :meth:`~DataFrame.all` or :meth:`~DataFrame.empty`. +Alternatively, you might want to compare if the pandas object is ``None``: + +.. ipython:: python + + if pd.Series([False, True, False]) is not None: + print("I was not None") + + +Below is how to check if any of the values are ``True``: + +.. ipython:: python + + if pd.Series([False, True, False]).any(): + print("I am any") + +To evaluate single-element pandas objects in a boolean context, use the method +:meth:`~DataFrame.bool`: + +.. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +Bitwise boolean +~~~~~~~~~~~~~~~ + +Bitwise boolean operators like ``==`` and ``!=`` return a boolean :class:`Series` +which performs an element-wise comparison when compared to a scalar. + +.. ipython:: python + + s = pd.Series(range(5)) + s == 4 + +See :ref:`boolean comparisons` for more examples. + +Using the ``in`` operator +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the Python ``in`` operator on a :class:`Series` tests for membership in the +**index**, not membership among the values. + +.. ipython:: python + + s = pd.Series(range(5), index=list("abcde")) + 2 in s + 'b' in s + +If this behavior is surprising, keep in mind that using ``in`` on a Python +dictionary tests keys, not values, and :class:`Series` are dict-like. +To test for membership in the values, use the method :meth:`~pandas.Series.isin`: + +.. ipython:: python + + s.isin([2]) + s.isin([2]).any() + +For :class:`DataFrame`, likewise, ``in`` applies to the column axis, +testing for membership in the list of column names. + +.. _gotchas.udf-mutation: + +Mutating with User Defined Function (UDF) methods +------------------------------------------------- + +This section applies to pandas methods that take a UDF. In particular, the methods +``.apply``, ``.aggregate``, ``.transform``, and ``.filter``. + +It is a general rule in programming that one should not mutate a container +while it is being iterated over. Mutation will invalidate the iterator, +causing unexpected behavior. Consider the example: + +.. ipython:: python + + values = [0, 1, 2, 3, 4, 5] + n_removed = 0 + for k, value in enumerate(values): + idx = k - n_removed + if value % 2 == 1: + del values[idx] + n_removed += 1 + else: + values[idx] = value + 1 + values + +One probably would have expected that the result would be ``[1, 3, 5]``. +When using a pandas method that takes a UDF, internally pandas is often +iterating over the +:class:`DataFrame` or other pandas object. Therefore, if the UDF mutates (changes) +the :class:`DataFrame`, unexpected behavior can arise. + +Here is a similar example with :meth:`DataFrame.apply`: + +.. ipython:: python + + def f(s): + s.pop("a") + return s + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + try: + df.apply(f, axis="columns") + except Exception as err: + print(repr(err)) + +To resolve this issue, one can make a copy so that the mutation does +not apply to the container being iterated over. + +.. ipython:: python + + values = [0, 1, 2, 3, 4, 5] + n_removed = 0 + for k, value in enumerate(values.copy()): + idx = k - n_removed + if value % 2 == 1: + del values[idx] + n_removed += 1 + else: + values[idx] = value + 1 + values + +.. ipython:: python + + def f(s): + s = s.copy() + s.pop("a") + return s + + df = pd.DataFrame({"a": [1, 2, 3], 'b': [4, 5, 6]}) + df.apply(f, axis="columns") + +``NaN``, Integer ``NA`` values and ``NA`` type promotions +--------------------------------------------------------- + +Choice of ``NA`` representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For lack of ``NA`` (missing) support from the ground up in NumPy and Python in +general, we were given the difficult choice between either: + +* A *masked array* solution: an array of data and an array of boolean values + indicating whether a value is there or is missing. +* Using a special sentinel value, bit pattern, or set of sentinel values to + denote ``NA`` across the dtypes. + +For many reasons we chose the latter. After years of production use it has +proven, at least in my opinion, to be the best decision given the state of +affairs in NumPy and Python in general. The special value ``NaN`` +(Not-A-Number) is used everywhere as the ``NA`` value, and there are API +functions :meth:`DataFrame.isna` and :meth:`DataFrame.notna` which can be used across the dtypes to +detect NA values. + +However, it comes with it a couple of trade-offs which I most certainly have +not ignored. + +.. _gotchas.intna: + +Support for integer ``NA`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the absence of high performance ``NA`` support being built into NumPy from +the ground up, the primary casualty is the ability to represent NAs in integer +arrays. For example: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4, 5], index=list("abcde")) + s + s.dtype + + s2 = s.reindex(["a", "b", "c", "f", "u"]) + s2 + s2.dtype + +This trade-off is made largely for memory and performance reasons, and also so +that the resulting :class:`Series` continues to be "numeric". + +If you need to represent integers with possibly missing values, use one of +the nullable-integer extension dtypes provided by pandas + +* :class:`Int8Dtype` +* :class:`Int16Dtype` +* :class:`Int32Dtype` +* :class:`Int64Dtype` + +.. ipython:: python + + s_int = pd.Series([1, 2, 3, 4, 5], index=list("abcde"), dtype=pd.Int64Dtype()) + s_int + s_int.dtype + + s2_int = s_int.reindex(["a", "b", "c", "f", "u"]) + s2_int + s2_int.dtype + +See :ref:`integer_na` for more. + +``NA`` type promotions +~~~~~~~~~~~~~~~~~~~~~~ + +When introducing NAs into an existing :class:`Series` or :class:`DataFrame` via +:meth:`~Series.reindex` or some other means, boolean and integer types will be +promoted to a different dtype in order to store the NAs. The promotions are +summarized in this table: + +.. csv-table:: + :header: "Typeclass","Promotion dtype for storing NAs" + :widths: 40,60 + + ``floating``, no change + ``object``, no change + ``integer``, cast to ``float64`` + ``boolean``, cast to ``object`` + +While this may seem like a heavy trade-off, I have found very few cases where +this is an issue in practice i.e. storing values greater than 2**53. Some +explanation for the motivation is in the next section. + +Why not make NumPy like R? +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many people have suggested that NumPy should simply emulate the ``NA`` support +present in the more domain-specific statistical programming language `R +`__. Part of the reason is the NumPy type hierarchy: + +.. csv-table:: + :header: "Typeclass","Dtypes" + :widths: 30,70 + :delim: | + + ``numpy.floating`` | ``float16, float32, float64, float128`` + ``numpy.integer`` | ``int8, int16, int32, int64`` + ``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64`` + ``numpy.object_`` | ``object_`` + ``numpy.bool_`` | ``bool_`` + ``numpy.character`` | ``string_, unicode_`` + +The R language, by contrast, only has a handful of built-in data types: +``integer``, ``numeric`` (floating-point), ``character``, and +``boolean``. ``NA`` types are implemented by reserving special bit patterns for +each type to be used as the missing value. While doing this with the full NumPy +type hierarchy would be possible, it would be a more substantial trade-off +(especially for the 8- and 16-bit data types) and implementation undertaking. + +An alternate approach is that of using masked arrays. A masked array is an +array of data with an associated boolean *mask* denoting whether each value +should be considered ``NA`` or not. I am personally not in love with this +approach as I feel that overall it places a fairly heavy burden on the user and +the library implementer. Additionally, it exacts a fairly high performance cost +when working with numerical data compared with the simple approach of using +``NaN``. Thus, I have chosen the Pythonic "practicality beats purity" approach +and traded integer ``NA`` capability for a much simpler approach of using a +special value in float and object arrays to denote ``NA``, and promoting +integer arrays to floating when NAs must be introduced. + + +Differences with NumPy +---------------------- +For :class:`Series` and :class:`DataFrame` objects, :meth:`~DataFrame.var` normalizes by +``N-1`` to produce `unbiased estimates of the population variance `__, while NumPy's +:meth:`numpy.var` normalizes by N, which measures the variance of the sample. Note that +:meth:`~DataFrame.cov` normalizes by ``N-1`` in both pandas and NumPy. + +.. _gotchas.thread-safety: + +Thread-safety +------------- + +pandas is not 100% thread safe. The known issues relate to +the :meth:`~DataFrame.copy` method. If you are doing a lot of copying of +:class:`DataFrame` objects shared among threads, we recommend holding locks inside +the threads where the data copying occurs. + +See `this link `__ +for more information. + + +Byte-ordering issues +-------------------- +Occasionally you may have to deal with data that were created on a machine with +a different byte order than the one on which you are running Python. A common +symptom of this issue is an error like:: + + Traceback + ... + ValueError: Big-endian buffer not supported on little-endian compiler + +To deal +with this issue you should convert the underlying NumPy array to the native +system byte order *before* passing it to :class:`Series` or :class:`DataFrame` +constructors using something similar to the following: + +.. ipython:: python + + x = np.array(list(range(10)), ">i4") # big endian + newx = x.byteswap().newbyteorder() # force native byteorder + s = pd.Series(newx) + +See `the NumPy documentation on byte order +`__ for more +details. diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst new file mode 100644 index 00000000..5d8ef7ce --- /dev/null +++ b/doc/source/user_guide/groupby.rst @@ -0,0 +1,1620 @@ +.. _groupby: + +{{ header }} + +***************************** +Group by: split-apply-combine +***************************** + +By "group by" we are referring to a process involving one or more of the following +steps: + +* **Splitting** the data into groups based on some criteria. +* **Applying** a function to each group independently. +* **Combining** the results into a data structure. + +Out of these, the split step is the most straightforward. In fact, in many +situations we may wish to split the data set into groups and do something with +those groups. In the apply step, we might wish to do one of the +following: + +* **Aggregation**: compute a summary statistic (or statistics) for each + group. Some examples: + + * Compute group sums or means. + * Compute group sizes / counts. + +* **Transformation**: perform some group-specific computations and return a + like-indexed object. Some examples: + + * Standardize data (zscore) within a group. + * Filling NAs within groups with a value derived from each group. + +* **Filtration**: discard some groups, according to a group-wise computation + that evaluates True or False. Some examples: + + * Discard data that belongs to groups with only a few members. + * Filter out data based on the group sum or mean. + +* Some combination of the above: GroupBy will examine the results of the apply + step and try to return a sensibly combined result if it doesn't fit into + either of the above two categories. + +Since the set of object instance methods on pandas data structures are generally +rich and expressive, we often simply want to invoke, say, a DataFrame function +on each group. The name GroupBy should be quite familiar to those who have used +a SQL-based tool (or ``itertools``), in which you can write code like: + +.. code-block:: sql + + SELECT Column1, Column2, mean(Column3), sum(Column4) + FROM SomeTable + GROUP BY Column1, Column2 + +We aim to make operations like this natural and easy to express using +pandas. We'll address each area of GroupBy functionality then provide some +non-trivial examples / use cases. + +See the :ref:`cookbook` for some advanced strategies. + +.. _groupby.split: + +Splitting an object into groups +------------------------------- + +pandas objects can be split on any of their axes. The abstract definition of +grouping is to provide a mapping of labels to group names. To create a GroupBy +object (more on what the GroupBy object is later), you may do the following: + +.. ipython:: python + + df = pd.DataFrame( + [ + ("bird", "Falconiformes", 389.0), + ("bird", "Psittaciformes", 24.0), + ("mammal", "Carnivora", 80.2), + ("mammal", "Primates", np.nan), + ("mammal", "Carnivora", 58), + ], + index=["falcon", "parrot", "lion", "monkey", "leopard"], + columns=("class", "order", "max_speed"), + ) + df + + # default is axis=0 + grouped = df.groupby("class") + grouped = df.groupby("order", axis="columns") + grouped = df.groupby(["class", "order"]) + +The mapping can be specified many different ways: + +* A Python function, to be called on each of the axis labels. +* A list or NumPy array of the same length as the selected axis. +* A dict or ``Series``, providing a ``label -> group name`` mapping. +* For ``DataFrame`` objects, a string indicating either a column name or + an index level name to be used to group. +* ``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``. +* A list of any of the above things. + +Collectively we refer to the grouping objects as the **keys**. For example, +consider the following ``DataFrame``: + +.. note:: + + A string passed to ``groupby`` may refer to either a column or an index level. + If a string matches both a column name and an index level name, a + ``ValueError`` will be raised. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + df + +On a DataFrame, we obtain a GroupBy object by calling :meth:`~DataFrame.groupby`. +We could naturally group by either the ``A`` or ``B`` columns, or both: + +.. ipython:: python + + grouped = df.groupby("A") + grouped = df.groupby(["A", "B"]) + +If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all +but the specified columns + +.. ipython:: python + + df2 = df.set_index(["A", "B"]) + grouped = df2.groupby(level=df2.index.names.difference(["B"])) + grouped.sum() + +These will split the DataFrame on its index (rows). We could also split by the +columns: + +.. ipython:: + + In [4]: def get_letter_type(letter): + ...: if letter.lower() in 'aeiou': + ...: return 'vowel' + ...: else: + ...: return 'consonant' + ...: + + In [5]: grouped = df.groupby(get_letter_type, axis=1) + +pandas :class:`~pandas.Index` objects support duplicate values. If a +non-unique index is used as the group key in a groupby operation, all values +for the same index value will be considered to be in one group and thus the +output of aggregation functions will only contain unique index values: + +.. ipython:: python + + lst = [1, 2, 3, 1, 2, 3] + + s = pd.Series([1, 2, 3, 10, 20, 30], lst) + + grouped = s.groupby(level=0) + + grouped.first() + + grouped.last() + + grouped.sum() + +Note that **no splitting occurs** until it's needed. Creating the GroupBy object +only verifies that you've passed a valid mapping. + +.. note:: + + Many kinds of complicated data manipulations can be expressed in terms of + GroupBy operations (though can't be guaranteed to be the most + efficient). You can get quite creative with the label mapping functions. + +.. _groupby.sorting: + +GroupBy sorting +~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default the group keys are sorted during the ``groupby`` operation. You may however pass ``sort=False`` for potential speedups: + +.. ipython:: python + + df2 = pd.DataFrame({"X": ["B", "B", "A", "A"], "Y": [1, 2, 3, 4]}) + df2.groupby(["X"]).sum() + df2.groupby(["X"], sort=False).sum() + + +Note that ``groupby`` will preserve the order in which *observations* are sorted *within* each group. +For example, the groups created by ``groupby()`` below are in the order they appeared in the original ``DataFrame``: + +.. ipython:: python + + df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}) + df3.groupby(["X"]).get_group("A") + + df3.groupby(["X"]).get_group("B") + + +.. _groupby.dropna: + +.. versionadded:: 1.1.0 + +GroupBy dropna +^^^^^^^^^^^^^^ + +By default ``NA`` values are excluded from group keys during the ``groupby`` operation. However, +in case you want to include ``NA`` values in group keys, you could pass ``dropna=False`` to achieve it. + +.. ipython:: python + + df_list = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] + df_dropna = pd.DataFrame(df_list, columns=["a", "b", "c"]) + + df_dropna + +.. ipython:: python + + # Default ``dropna`` is set to True, which will exclude NaNs in keys + df_dropna.groupby(by=["b"], dropna=True).sum() + + # In order to allow NaN in keys, set ``dropna`` to False + df_dropna.groupby(by=["b"], dropna=False).sum() + +The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. + + +.. _groupby.attributes: + +GroupBy object attributes +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``groups`` attribute is a dict whose keys are the computed unique groups +and corresponding values being the axis labels belonging to each group. In the +above example we have: + +.. ipython:: python + + df.groupby("A").groups + df.groupby(get_letter_type, axis=1).groups + +Calling the standard Python ``len`` function on the GroupBy object just returns +the length of the ``groups`` dict, so it is largely just a convenience: + +.. ipython:: python + + grouped = df.groupby(["A", "B"]) + grouped.groups + len(grouped) + + +.. _groupby.tabcompletion: + +``GroupBy`` will tab complete column names (and other attributes): + +.. ipython:: python + :suppress: + + n = 10 + weight = np.random.normal(166, 20, size=n) + height = np.random.normal(60, 10, size=n) + time = pd.date_range("1/1/2000", periods=n) + gender = np.random.choice(["male", "female"], size=n) + df = pd.DataFrame( + {"height": height, "weight": weight, "gender": gender}, index=time + ) + +.. ipython:: python + + df + gb = df.groupby("gender") + + +.. ipython:: + + @verbatim + In [1]: gb. # noqa: E225, E999 + gb.agg gb.boxplot gb.cummin gb.describe gb.filter gb.get_group gb.height gb.last gb.median gb.ngroups gb.plot gb.rank gb.std gb.transform + gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var + gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight + +.. _groupby.multiindex: + +GroupBy with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~ + +With :ref:`hierarchically-indexed data `, it's quite +natural to group by one of the levels of the hierarchy. + +Let's create a Series with a two-level ``MultiIndex``. + +.. ipython:: python + + + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"]) + s = pd.Series(np.random.randn(8), index=index) + s + +We can then group by one of the levels in ``s``. + +.. ipython:: python + + grouped = s.groupby(level=0) + grouped.sum() + +If the MultiIndex has names specified, these can be passed instead of the level +number: + +.. ipython:: python + + s.groupby(level="second").sum() + +Grouping with multiple levels is supported. + +.. ipython:: python + :suppress: + + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["doo", "doo", "bee", "bee", "bop", "bop", "bop", "bop"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = pd.MultiIndex.from_tuples(tuples, names=["first", "second", "third"]) + s = pd.Series(np.random.randn(8), index=index) + +.. ipython:: python + + s + s.groupby(level=["first", "second"]).sum() + +Index level names may be supplied as keys. + +.. ipython:: python + + s.groupby(["first", "second"]).sum() + +More on the ``sum`` function and aggregation later. + +Grouping DataFrame with Index levels and columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A DataFrame may be grouped by a combination of columns and index levels by +specifying the column names as strings and the index levels as ``pd.Grouper`` +objects. + +.. ipython:: python + + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + + index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"]) + + df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 3, 3], "B": np.arange(8)}, index=index) + + df + +The following example groups ``df`` by the ``second`` index level and +the ``A`` column. + +.. ipython:: python + + df.groupby([pd.Grouper(level=1), "A"]).sum() + +Index levels may also be specified by name. + +.. ipython:: python + + df.groupby([pd.Grouper(level="second"), "A"]).sum() + +Index level names may be specified as keys directly to ``groupby``. + +.. ipython:: python + + df.groupby(["second", "A"]).sum() + +DataFrame column selection in GroupBy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you have created the GroupBy object from a DataFrame, you might want to do +something different for each of the columns. Thus, using ``[]`` similar to +getting a column from a DataFrame, you can do: + +.. ipython:: python + + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + df + + grouped = df.groupby(["A"]) + grouped_C = grouped["C"] + grouped_D = grouped["D"] + +This is mainly syntactic sugar for the alternative and much more verbose: + +.. ipython:: python + + df["C"].groupby(df["A"]) + +Additionally this method avoids recomputing the internal grouping information +derived from the passed key. + +.. _groupby.iterating-label: + +Iterating through groups +------------------------ + +With the GroupBy object in hand, iterating through the grouped data is very +natural and functions similarly to :py:func:`itertools.groupby`: + +.. ipython:: + + In [4]: grouped = df.groupby('A') + + In [5]: for name, group in grouped: + ...: print(name) + ...: print(group) + ...: + +In the case of grouping by multiple keys, the group name will be a tuple: + +.. ipython:: + + In [5]: for name, group in df.groupby(['A', 'B']): + ...: print(name) + ...: print(group) + ...: + +See :ref:`timeseries.iterating-label`. + +Selecting a group +----------------- + +A single group can be selected using +:meth:`~pandas.core.groupby.DataFrameGroupBy.get_group`: + +.. ipython:: python + + grouped.get_group("bar") + +Or for an object grouped on multiple columns: + +.. ipython:: python + + df.groupby(["A", "B"]).get_group(("bar", "one")) + +.. _groupby.aggregate: + +Aggregation +----------- + +Once the GroupBy object has been created, several methods are available to +perform a computation on the grouped data. These operations are similar to the +:ref:`aggregating API `, :ref:`window API `, +and :ref:`resample API `. + +An obvious one is aggregation via the +:meth:`~pandas.core.groupby.DataFrameGroupBy.aggregate` or equivalently +:meth:`~pandas.core.groupby.DataFrameGroupBy.agg` method: + +.. ipython:: python + + grouped = df.groupby("A") + grouped[["C", "D"]].aggregate(np.sum) + + grouped = df.groupby(["A", "B"]) + grouped.aggregate(np.sum) + +As you can see, the result of the aggregation will have the group names as the +new index along the grouped axis. In the case of multiple keys, the result is a +:ref:`MultiIndex ` by default, though this can be +changed by using the ``as_index`` option: + +.. ipython:: python + + grouped = df.groupby(["A", "B"], as_index=False) + grouped.aggregate(np.sum) + + df.groupby("A", as_index=False)[["C", "D"]].sum() + +Note that you could use the ``reset_index`` DataFrame function to achieve the +same result as the column names are stored in the resulting ``MultiIndex``: + +.. ipython:: python + + df.groupby(["A", "B"]).sum().reset_index() + +Another simple aggregation example is to compute the size of each group. +This is included in GroupBy as the ``size`` method. It returns a Series whose +index are the group names and whose values are the sizes of each group. + +.. ipython:: python + + grouped.size() + +.. ipython:: python + + grouped.describe() + +Another aggregation example is to compute the number of unique values of each group. This is similar to the ``value_counts`` function, except that it only counts unique values. + +.. ipython:: python + + ll = [['foo', 1], ['foo', 2], ['foo', 2], ['bar', 1], ['bar', 1]] + df4 = pd.DataFrame(ll, columns=["A", "B"]) + df4 + df4.groupby("A")["B"].nunique() + +.. note:: + + Aggregation functions **will not** return the groups that you are aggregating over + if they are named *columns*, when ``as_index=True``, the default. The grouped columns will + be the **indices** of the returned object. + + Passing ``as_index=False`` **will** return the groups that you are aggregating over, if they are + named *columns*. + +Aggregating functions are the ones that reduce the dimension of the returned objects. +Some common aggregating functions are tabulated below: + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + :delim: ; + + :meth:`~pd.core.groupby.DataFrameGroupBy.mean`;Compute mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sum`;Compute sum of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.size`;Compute group sizes + :meth:`~pd.core.groupby.DataFrameGroupBy.count`;Compute count of group + :meth:`~pd.core.groupby.DataFrameGroupBy.std`;Standard deviation of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.var`;Compute variance of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sem`;Standard error of the mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.describe`;Generates descriptive statistics + :meth:`~pd.core.groupby.DataFrameGroupBy.first`;Compute first of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.last`;Compute last of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.nth`;Take nth value, or a subset if n is a list + :meth:`~pd.core.groupby.DataFrameGroupBy.min`;Compute min of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.max`;Compute max of group values + + +The aggregating functions above will exclude NA values. Any function which +reduces a :class:`Series` to a scalar value is an aggregation function and will work, +a trivial example is ``df.groupby('A').agg(lambda ser: 1)``. Note that +:meth:`~pd.core.groupby.DataFrameGroupBy.nth` can act as a reducer *or* a +filter, see :ref:`here `. + +.. _groupby.aggregate.multifunc: + +Applying multiple functions at once +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With grouped ``Series`` you can also pass a list or dict of functions to do +aggregation with, outputting a DataFrame: + +.. ipython:: python + + grouped = df.groupby("A") + grouped["C"].agg([np.sum, np.mean, np.std]) + +On a grouped ``DataFrame``, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + grouped[["C", "D"]].agg([np.sum, np.mean, np.std]) + + +The resulting aggregations are named for the functions themselves. If you +need to rename, then you can add in a chained operation for a ``Series`` like this: + +.. ipython:: python + + ( + grouped["C"] + .agg([np.sum, np.mean, np.std]) + .rename(columns={"sum": "foo", "mean": "bar", "std": "baz"}) + ) + +For a grouped ``DataFrame``, you can rename in a similar manner: + +.. ipython:: python + + ( + grouped[["C", "D"]].agg([np.sum, np.mean, np.std]).rename( + columns={"sum": "foo", "mean": "bar", "std": "baz"} + ) + ) + +.. note:: + + In general, the output column names should be unique. You can't apply + the same function (or two functions with the same name) to the same + column. + + .. ipython:: python + :okexcept: + + grouped["C"].agg(["sum", "sum"]) + + + pandas *does* allow you to provide multiple lambdas. In this case, pandas + will mangle the name of the (nameless) lambda functions, appending ``_`` + to each subsequent lambda. + + .. ipython:: python + + grouped["C"].agg([lambda x: x.max() - x.min(), lambda x: x.median() - x.mean()]) + + + +.. _groupby.aggregate.named: + +Named aggregation +~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.25.0 + +To support column-specific aggregation *with control over the output column names*, pandas +accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where + +- The keywords are the *output* column names +- The values are tuples whose first element is the column to select + and the second element is the aggregation to apply to that column. pandas + provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']`` + to make it clearer what the arguments are. As usual, the aggregation can + be a callable or a string alias. + +.. ipython:: python + + animals = pd.DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + animals + + animals.groupby("kind").agg( + min_height=pd.NamedAgg(column="height", aggfunc="min"), + max_height=pd.NamedAgg(column="height", aggfunc="max"), + average_weight=pd.NamedAgg(column="weight", aggfunc=np.mean), + ) + + +``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well. + +.. ipython:: python + + animals.groupby("kind").agg( + min_height=("height", "min"), + max_height=("height", "max"), + average_weight=("weight", np.mean), + ) + + +If your desired output column names are not valid Python keywords, construct a dictionary +and unpack the keyword arguments + +.. ipython:: python + + animals.groupby("kind").agg( + **{ + "total weight": pd.NamedAgg(column="weight", aggfunc=sum) + } + ) + +Additional keyword arguments are not passed through to the aggregation functions. Only pairs +of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions +requires additional arguments, partially apply them with :meth:`functools.partial`. + +.. note:: + + For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not + preserved. This means that the output column ordering would not be + consistent. To ensure consistent ordering, the keys (and so output columns) + will always be sorted for Python 3.5. + +Named aggregation is also valid for Series groupby aggregations. In this case there's +no column selection, so the values are just the functions. + +.. ipython:: python + + animals.groupby("kind").height.agg( + min_height="min", + max_height="max", + ) + +Applying different functions to DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a DataFrame: + +.. ipython:: python + + grouped.agg({"C": np.sum, "D": lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be either implemented on GroupBy or available via :ref:`dispatching +`: + +.. ipython:: python + + grouped.agg({"C": "sum", "D": "std"}) + +.. _groupby.aggregate.cython: + +Cython-optimized aggregation functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some common aggregations, currently only ``sum``, ``mean``, ``std``, and ``sem``, have +optimized Cython implementations: + +.. ipython:: python + + df.groupby("A")[["C", "D"]].sum() + df.groupby(["A", "B"]).mean() + +Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above +code would work even without the special versions via dispatching (see below). + +.. _groupby.aggregate.udfs: + +Aggregations with User-Defined Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users can also provide their own functions for custom aggregations. When aggregating +with a User-Defined Function (UDF), the UDF should not mutate the provided ``Series``, see +:ref:`gotchas.udf-mutation` for more information. + +.. ipython:: python + + animals.groupby("kind")[["height"]].agg(lambda x: set(x)) + +The resulting dtype will reflect that of the aggregating function. If the results from different groups have +different dtypes, then a common dtype will be determined in the same way as ``DataFrame`` construction. + +.. ipython:: python + + animals.groupby("kind")[["height"]].agg(lambda x: x.astype(int).sum()) + +.. _groupby.transform: + +Transformation +-------------- + +The ``transform`` method returns an object that is indexed the same +as the one being grouped. The transform function must: + +* Return a result that is either the same size as the group chunk or + broadcastable to the size of the group chunk (e.g., a scalar, + ``grouped.transform(lambda x: x.iloc[-1])``). +* Operate column-by-column on the group chunk. The transform is applied to + the first group chunk using chunk.apply. +* Not perform in-place operations on the group chunk. Group chunks should + be treated as immutable, and changes to a group chunk may produce unexpected + results. For example, when using ``fillna``, ``inplace`` must be ``False`` + (``grouped.transform(lambda x: x.fillna(inplace=False))``). +* (Optionally) operates on the entire group chunk. If this is supported, a + fast path is used starting from the *second* chunk. + +.. deprecated:: 1.5.0 + + When using ``.transform`` on a grouped DataFrame and the transformation function + returns a DataFrame, currently pandas does not align the result's index + with the input's index. This behavior is deprecated and alignment will + be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + result of the transformation function to avoid alignment. + +Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the +transformation function. If the results from different groups have different dtypes, then +a common dtype will be determined in the same way as ``DataFrame`` construction. + +Suppose we wished to standardize the data within each group: + +.. ipython:: python + + index = pd.date_range("10/1/1999", periods=1100) + ts = pd.Series(np.random.normal(0.5, 2, 1100), index) + ts = ts.rolling(window=100, min_periods=100).mean().dropna() + + ts.head() + ts.tail() + + transformed = ts.groupby(lambda x: x.year).transform( + lambda x: (x - x.mean()) / x.std() + ) + + +We would expect the result to now have mean 0 and standard deviation 1 within +each group, which we can easily check: + +.. ipython:: python + + # Original Data + grouped = ts.groupby(lambda x: x.year) + grouped.mean() + grouped.std() + + # Transformed Data + grouped_trans = transformed.groupby(lambda x: x.year) + grouped_trans.mean() + grouped_trans.std() + +We can also visually compare the original and transformed data sets. + +.. ipython:: python + + compare = pd.DataFrame({"Original": ts, "Transformed": transformed}) + + @savefig groupby_transform_plot.png + compare.plot() + +Transformation functions that have lower dimension outputs are broadcast to +match the shape of the input array. + +.. ipython:: python + + ts.groupby(lambda x: x.year).transform(lambda x: x.max() - x.min()) + +Alternatively, the built-in methods could be used to produce the same outputs. + +.. ipython:: python + + max_ts = ts.groupby(lambda x: x.year).transform("max") + min_ts = ts.groupby(lambda x: x.year).transform("min") + + max_ts - min_ts + +Another common data transform is to replace missing data with the group mean. + +.. ipython:: python + :suppress: + + cols = ["A", "B", "C"] + values = np.random.randn(1000, 3) + values[np.random.randint(0, 1000, 100), 0] = np.nan + values[np.random.randint(0, 1000, 50), 1] = np.nan + values[np.random.randint(0, 1000, 200), 2] = np.nan + data_df = pd.DataFrame(values, columns=cols) + +.. ipython:: python + + data_df + + countries = np.array(["US", "UK", "GR", "JP"]) + key = countries[np.random.randint(0, 4, 1000)] + + grouped = data_df.groupby(key) + + # Non-NA count in each group + grouped.count() + + transformed = grouped.transform(lambda x: x.fillna(x.mean())) + +We can verify that the group means have not changed in the transformed data +and that the transformed data contains no NAs. + +.. ipython:: python + + grouped_trans = transformed.groupby(key) + + grouped.mean() # original group means + grouped_trans.mean() # transformation did not change group means + + grouped.count() # original has some missing data points + grouped_trans.count() # counts after transformation + grouped_trans.size() # Verify non-NA count equals group size + +.. note:: + + Some functions will automatically transform the input when applied to a + GroupBy object, but returning an object of the same shape as the original. + Passing ``as_index=False`` will not affect these transformation methods. + + For example: ``fillna, ffill, bfill, shift.``. + + .. ipython:: python + + grouped.ffill() + + +.. _groupby.transform.window_resample: + +Window and resample operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to use ``resample()``, ``expanding()`` and +``rolling()`` as methods on groupbys. + +The example below will apply the ``rolling()`` method on the samples of +the column B based on the groups of column A. + +.. ipython:: python + + df_re = pd.DataFrame({"A": [1] * 10 + [5] * 10, "B": np.arange(20)}) + df_re + + df_re.groupby("A").rolling(4).B.mean() + + +The ``expanding()`` method will accumulate a given operation +(``sum()`` in the example) for all the members of each particular +group. + +.. ipython:: python + + df_re.groupby("A").expanding().sum() + + +Suppose you want to use the ``resample()`` method to get a daily +frequency in each group of your dataframe and wish to complete the +missing values with the ``ffill()`` method. + +.. ipython:: python + + df_re = pd.DataFrame( + { + "date": pd.date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + df_re + + df_re.groupby("group").resample("1D").ffill() + +.. _groupby.filter: + +Filtration +---------- + +The ``filter`` method returns a subset of the original object. Suppose we +want to take only elements that belong to groups with a group sum greater +than 2. + +.. ipython:: python + + sf = pd.Series([1, 1, 2, 3, 3, 3]) + sf.groupby(sf).filter(lambda x: x.sum() > 2) + +The argument of ``filter`` must be a function that, applied to the group as a +whole, returns ``True`` or ``False``. + +Another useful operation is filtering out elements that belong to groups +with only a couple members. + +.. ipython:: python + + dff = pd.DataFrame({"A": np.arange(8), "B": list("aabbbbcc")}) + dff.groupby("B").filter(lambda x: len(x) > 2) + +Alternatively, instead of dropping the offending groups, we can return a +like-indexed objects where the groups that do not pass the filter are filled +with NaNs. + +.. ipython:: python + + dff.groupby("B").filter(lambda x: len(x) > 2, dropna=False) + +For DataFrames with multiple columns, filters should explicitly specify a column as the filter criterion. + +.. ipython:: python + + dff["C"] = np.arange(8) + dff.groupby("B").filter(lambda x: len(x["C"]) > 2) + +.. note:: + + Some functions when applied to a groupby object will act as a **filter** on the input, returning + a reduced shape of the original (and potentially eliminating groups), but with the index unchanged. + Passing ``as_index=False`` will not affect these transformation methods. + + For example: ``head, tail``. + + .. ipython:: python + + dff.groupby("B").head(2) + + +.. _groupby.dispatch: + +Dispatching to instance methods +------------------------------- + +When doing an aggregation or transformation, you might just want to call an +instance method on each data group. This is pretty easy to do by passing lambda +functions: + +.. ipython:: python + :okwarning: + + grouped = df.groupby("A") + grouped.agg(lambda x: x.std()) + +But, it's rather verbose and can be untidy if you need to pass additional +arguments. Using a bit of metaprogramming cleverness, GroupBy now has the +ability to "dispatch" method calls to the groups: + +.. ipython:: python + :okwarning: + + grouped.std() + +What is actually happening here is that a function wrapper is being +generated. When invoked, it takes any passed arguments and invokes the function +with any arguments on each group (in the above example, the ``std`` +function). The results are then combined together much in the style of ``agg`` +and ``transform`` (it actually uses ``apply`` to infer the gluing, documented +next). This enables some operations to be carried out rather succinctly: + +.. ipython:: python + + tsdf = pd.DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2000", periods=1000), + columns=["A", "B", "C"], + ) + tsdf.iloc[::2] = np.nan + grouped = tsdf.groupby(lambda x: x.year) + grouped.fillna(method="pad") + +In this example, we chopped the collection of time series into yearly chunks +then independently called :ref:`fillna ` on the +groups. + +The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: + +.. ipython:: python + + s = pd.Series([9, 8, 7, 5, 19, 1, 4.2, 3.3]) + g = pd.Series(list("abababab")) + gb = s.groupby(g) + gb.nlargest(3) + gb.nsmallest(3) + +.. _groupby.apply: + +Flexible ``apply`` +------------------ + +Some operations on the grouped data might not fit into either the aggregate or +transform categories. Or, you may simply want GroupBy to infer how to combine +the results. For these, use the ``apply`` function, which can be substituted +for both ``aggregate`` and ``transform`` in many standard use cases. However, +``apply`` can handle some exceptional use cases. + +.. note:: + + ``apply`` can act as a reducer, transformer, *or* filter function, depending + on exactly what is passed to it. It can depend on the passed function and + exactly what you are grouping. Thus the grouped column(s) may be included in + the output as well as set the indices. + +.. ipython:: python + + df + grouped = df.groupby("A") + + # could also just call .describe() + grouped["C"].apply(lambda x: x.describe()) + +The dimension of the returned result can also change: + +.. ipython:: python + + grouped = df.groupby('A')['C'] + + def f(group): + return pd.DataFrame({'original': group, + 'demeaned': group - group.mean()}) + grouped.apply(f) + +``apply`` on a Series can operate on a returned value from the applied function, +that is itself a series, and possibly upcast the result to a DataFrame: + +.. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + +Control grouped column(s) placement with ``group_keys`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`, + functions passed to ``apply`` that return like-indexed outputs will have the + group keys added to the result index. Previous versions of pandas would add + the group keys only when the result from the applied function had a different + index than the input. If ``group_keys`` is not specified, the group keys will + not be added for like-indexed outputs. In the future this behavior + will change to always respect ``group_keys``, which defaults to ``True``. + + .. versionchanged:: 1.5.0 + +To control whether the grouped column(s) are included in the indices, you can use +the argument ``group_keys``. Compare + +.. ipython:: python + + df.groupby("A", group_keys=True).apply(lambda x: x) + +with + +.. ipython:: python + + df.groupby("A", group_keys=False).apply(lambda x: x) + +Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the +apply function. If the results from different groups have different dtypes, then +a common dtype will be determined in the same way as ``DataFrame`` construction. + + +Numba Accelerated Routines +-------------------------- + +.. versionadded:: 1.1 + +If `Numba `__ is installed as an optional dependency, the ``transform`` and +``aggregate`` methods support ``engine='numba'`` and ``engine_kwargs`` arguments. +See :ref:`enhancing performance with Numba ` for general usage of the arguments +and performance considerations. + +The function signature must start with ``values, index`` **exactly** as the data belonging to each group +will be passed into ``values``, and the group index will be passed into ``index``. + +.. warning:: + + When using ``engine='numba'``, there will be no "fall back" behavior internally. The group + data and group index will be passed as NumPy arrays to the JITed user defined function, and no + alternative execution attempts will be tried. + +Other useful features +--------------------- + +Automatic exclusion of "nuisance" columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Again consider the example DataFrame we've been looking at: + +.. ipython:: python + + df + +Suppose we wish to compute the standard deviation grouped by the ``A`` +column. There is a slight problem, namely that we don't care about the data in +column ``B``. We refer to this as a "nuisance" column. You can avoid nuisance +columns by specifying ``numeric_only=True``: + +.. ipython:: python + + df.groupby("A").std(numeric_only=True) + +Note that ``df.groupby('A').colname.std().`` is more efficient than +``df.groupby('A').std().colname``, so if the result of an aggregation function +is only interesting over one column (here ``colname``), it may be filtered +*before* applying the aggregation function. + +.. note:: + Any object column, also if it contains numerical values such as ``Decimal`` + objects, is considered as a "nuisance" columns. They are excluded from + aggregate functions automatically in groupby. + + If you do wish to include decimal or object columns in an aggregation with + other non-nuisance data types, you must do so explicitly. + +.. warning:: + The automatic dropping of nuisance columns has been deprecated and will be removed + in a future version of pandas. If columns are included that cannot be operated + on, pandas will instead raise an error. In order to avoid this, either select + the columns you wish to operate on or specify ``numeric_only=True``. + +.. ipython:: python + :okwarning: + + from decimal import Decimal + + df_dec = pd.DataFrame( + { + "id": [1, 2, 1, 2], + "int_column": [1, 2, 3, 4], + "dec_column": [ + Decimal("0.50"), + Decimal("0.15"), + Decimal("0.25"), + Decimal("0.40"), + ], + } + ) + + # Decimal columns can be sum'd explicitly by themselves... + df_dec.groupby(["id"])[["dec_column"]].sum() + + # ...but cannot be combined with standard data types or they will be excluded + df_dec.groupby(["id"])[["int_column", "dec_column"]].sum() + + # Use .agg function to aggregate over standard and "nuisance" data types + # at the same time + df_dec.groupby(["id"]).agg({"int_column": "sum", "dec_column": "sum"}) + +.. _groupby.observed: + +Handling of (un)observed Categorical values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When using a ``Categorical`` grouper (as a single grouper, or as part of multiple groupers), the ``observed`` keyword +controls whether to return a cartesian product of all possible groupers values (``observed=False``) or only those +that are observed groupers (``observed=True``). + +Show all values: + +.. ipython:: python + + pd.Series([1, 1, 1]).groupby( + pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=False + ).count() + +Show only the observed values: + +.. ipython:: python + + pd.Series([1, 1, 1]).groupby( + pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=True + ).count() + +The returned dtype of the grouped will *always* include *all* of the categories that were grouped. + +.. ipython:: python + + s = ( + pd.Series([1, 1, 1]) + .groupby(pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=False) + .count() + ) + s.index.dtype + +.. _groupby.missing: + +NA and NaT group handling +~~~~~~~~~~~~~~~~~~~~~~~~~ + +If there are any NaN or NaT values in the grouping key, these will be +automatically excluded. In other words, there will never be an "NA group" or +"NaT group". This was not the case in older versions of pandas, but users were +generally discarding the NA group anyway (and supporting it was an +implementation headache). + +Grouping with ordered factors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Categorical variables represented as instance of pandas's ``Categorical`` class +can be used as group keys. If so, the order of the levels will be preserved: + +.. ipython:: python + + data = pd.Series(np.random.randn(100)) + + factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0]) + + data.groupby(factor).mean() + +.. _groupby.specify: + +Grouping with a grouper specification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may need to specify a bit more data to properly group. You can +use the ``pd.Grouper`` to provide this local control. + +.. ipython:: python + + import datetime + + df = pd.DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime.datetime(2013, 1, 1, 13, 0), + datetime.datetime(2013, 1, 1, 13, 5), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 12, 2, 12, 0), + datetime.datetime(2013, 12, 2, 14, 0), + ], + } + ) + + df + +Groupby a specific column with the desired frequency. This is like resampling. + +.. ipython:: python + + df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"])[["Quantity"]].sum() + +You have an ambiguous specification in that you have a named index and a column +that could be potential groupers. + +.. ipython:: python + + df = df.set_index("Date") + df["Date"] = df.index + pd.offsets.MonthEnd(2) + df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"])[["Quantity"]].sum() + + df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"])[["Quantity"]].sum() + + +Taking the first rows of each group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Just like for a DataFrame or Series you can call head and tail on a groupby: + +.. ipython:: python + + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + df + + g = df.groupby("A") + g.head(1) + + g.tail(1) + +This shows the first or last n rows from each group. + +.. _groupby.nth: + +Taking the nth row of each group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To select from a DataFrame or Series the nth item, use +:meth:`~pd.core.groupby.DataFrameGroupBy.nth`. This is a reduction method, and +will return a single row (or no row) per group if you pass an int for n: + +.. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + + g.nth(0) + g.nth(-1) + g.nth(1) + +If you want to select the nth not-null item, use the ``dropna`` kwarg. For a DataFrame this should be either ``'any'`` or ``'all'`` just like you would pass to dropna: + +.. ipython:: python + + # nth(0) is the same as g.first() + g.nth(0, dropna="any") + g.first() + + # nth(-1) is the same as g.last() + g.nth(-1, dropna="any") # NaNs denote group exhausted when using dropna + g.last() + + g.B.nth(0, dropna="all") + +As with other methods, passing ``as_index=False``, will achieve a filtration, which returns the grouped row. + +.. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A", as_index=False) + + g.nth(0) + g.nth(-1) + +You can also select multiple rows from each group by specifying multiple nth values as a list of ints. + +.. ipython:: python + + business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") + df = pd.DataFrame(1, index=business_dates, columns=["a", "b"]) + # get the first, 4th, and last date index for each month + df.groupby([df.index.year, df.index.month]).nth([0, 3, -1]) + +Enumerate group items +~~~~~~~~~~~~~~~~~~~~~ + +To see the order in which each row appears within its group, use the +``cumcount`` method: + +.. ipython:: python + + dfg = pd.DataFrame(list("aaabba"), columns=["A"]) + dfg + + dfg.groupby("A").cumcount() + + dfg.groupby("A").cumcount(ascending=False) + +.. _groupby.ngroup: + +Enumerate groups +~~~~~~~~~~~~~~~~ + +To see the ordering of the groups (as opposed to the order of rows +within a group given by ``cumcount``) you can use +:meth:`~pandas.core.groupby.DataFrameGroupBy.ngroup`. + + + +Note that the numbers given to the groups match the order in which the +groups would be seen when iterating over the groupby object, not the +order they are first observed. + +.. ipython:: python + + dfg = pd.DataFrame(list("aaabba"), columns=["A"]) + dfg + + dfg.groupby("A").ngroup() + + dfg.groupby("A").ngroup(ascending=False) + +Plotting +~~~~~~~~ + +Groupby also works with some plotting methods. For example, suppose we +suspect that some features in a DataFrame may differ by group, in this case, +the values in column 1 where the group is "B" are 3 higher on average. + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.randn(50, 2)) + df["g"] = np.random.choice(["A", "B"], size=50) + df.loc[df["g"] == "B", 1] += 3 + +We can easily visualize this with a boxplot: + +.. ipython:: python + :okwarning: + + @savefig groupby_boxplot.png + df.groupby("g").boxplot() + +The result of calling ``boxplot`` is a dictionary whose keys are the values +of our grouping column ``g`` ("A" and "B"). The values of the resulting dictionary +can be controlled by the ``return_type`` keyword of ``boxplot``. +See the :ref:`visualization documentation` for more. + +.. warning:: + + For historical reasons, ``df.groupby("g").boxplot()`` is not equivalent + to ``df.boxplot(by="g")``. See :ref:`here` for + an explanation. + +.. _groupby.pipe: + +Piping function calls +~~~~~~~~~~~~~~~~~~~~~ + +Similar to the functionality provided by ``DataFrame`` and ``Series``, functions +that take ``GroupBy`` objects can be chained together using a ``pipe`` method to +allow for a cleaner, more readable syntax. To read about ``.pipe`` in general terms, +see :ref:`here `. + +Combining ``.groupby`` and ``.pipe`` is often useful when you need to reuse +GroupBy objects. + +As an example, imagine having a DataFrame with columns for stores, products, +revenue and quantity sold. We'd like to do a groupwise calculation of *prices* +(i.e. revenue/quantity) per store and per product. We could do this in a +multi-step operation, but expressing it in terms of piping can make the +code more readable. First we set the data: + +.. ipython:: python + + n = 1000 + df = pd.DataFrame( + { + "Store": np.random.choice(["Store_1", "Store_2"], n), + "Product": np.random.choice(["Product_1", "Product_2"], n), + "Revenue": (np.random.random(n) * 50 + 10).round(2), + "Quantity": np.random.randint(1, 10, size=n), + } + ) + df.head(2) + +Now, to find prices per store/product, we can simply do: + +.. ipython:: python + + ( + df.groupby(["Store", "Product"]) + .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum()) + .unstack() + .round(2) + ) + +Piping can also be expressive when you want to deliver a grouped object to some +arbitrary function, for example: + +.. ipython:: python + + def mean(groupby): + return groupby.mean() + + + df.groupby(["Store", "Product"]).pipe(mean) + +where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity +columns respectively for each Store-Product combination. The ``mean`` function can +be any function that takes in a GroupBy object; the ``.pipe`` will pass the GroupBy +object as a parameter into the function you specify. + +Examples +-------- + +Regrouping by factor +~~~~~~~~~~~~~~~~~~~~ + +Regroup columns of a DataFrame according to their sum, and sum the aggregated ones. + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 0, 0], "b": [0, 1, 0], "c": [1, 0, 0], "d": [2, 3, 4]}) + df + df.groupby(df.sum(), axis=1).sum() + +.. _groupby.multicolumn_factorization: + +Multi-column factorization +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using :meth:`~pandas.core.groupby.DataFrameGroupBy.ngroup`, we can extract +information about the groups in a way similar to :func:`factorize` (as described +further in the :ref:`reshaping API `) but which applies +naturally to multiple columns of mixed type and different +sources. This can be useful as an intermediate categorical-like step +in processing, when the relationships between the group rows are more +important than their content, or as input to an algorithm which only +accepts the integer encoding. (For more information about support in +pandas for full categorical data, see the :ref:`Categorical +introduction ` and the +:ref:`API documentation `.) + +.. ipython:: python + + dfg = pd.DataFrame({"A": [1, 1, 2, 3, 2], "B": list("aaaba")}) + + dfg + + dfg.groupby(["A", "B"]).ngroup() + + dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup() + +Groupby by indexer to 'resample' data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. + +In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized. + +In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. + +.. note:: The below example shows how we can downsample by consolidation of samples into fewer samples. Here by using **df.index // 5**, we are aggregating the samples in bins. By applying **std()** function, we aggregate the information contained in many samples into a small subset of values which is their standard deviation thereby reducing the number of samples. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2)) + df + df.index // 5 + df.groupby(df.index // 5).std() + +Returning a Series to propagate names +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Group DataFrame columns, compute a set of metrics and return a named Series. +The Series name is used as the name for the column index. This is especially +useful in conjunction with reshaping operations such as stacking in which the +column index name will be used as the name of the inserted column: + +.. ipython:: python + + df = pd.DataFrame( + { + "a": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], + "c": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], + "d": [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1], + } + ) + + def compute_metrics(x): + result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()} + return pd.Series(result, name="metrics") + + result = df.groupby("a").apply(compute_metrics) + + result + + result.stack() diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst new file mode 100644 index 00000000..a6392706 --- /dev/null +++ b/doc/source/user_guide/index.rst @@ -0,0 +1,88 @@ +{{ header }} + +.. _user_guide: + +========== +User Guide +========== + +The User Guide covers all of pandas by topic area. Each of the subsections +introduces a topic (such as "working with missing data"), and discusses how +pandas approaches the problem, with many examples throughout. + +Users brand-new to pandas should start with :ref:`10min`. + +For a high level summary of the pandas fundamentals, see :ref:`dsintro` and :ref:`basics`. + +Further information on any specific method can be obtained in the +:ref:`api`. + +How to read these guides +------------------------ +In these guides you will see input code inside code blocks such as: + +:: + + import pandas as pd + pd.DataFrame({'A': [1, 2, 3]}) + + +or: + +.. ipython:: python + + import pandas as pd + pd.DataFrame({'A': [1, 2, 3]}) + +The first block is a standard python input, while in the second the ``In [1]:`` indicates the input is inside a `notebook `__. In Jupyter Notebooks the last line is printed and plots are shown inline. + +For example: + +.. ipython:: python + + a = 1 + a +is equivalent to: + +:: + + a = 1 + print(a) + + + +Guides +------- + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + 10min + dsintro + basics + io + indexing + advanced + merging + reshaping + text + missing_data + duplicates + categorical + integer_na + boolean + visualization + style + groupby + window + timeseries + timedeltas + options + enhancingperf + scale + sparse + gotchas + cookbook diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst new file mode 100644 index 00000000..f939945f --- /dev/null +++ b/doc/source/user_guide/indexing.rst @@ -0,0 +1,1964 @@ +.. _indexing: + +{{ header }} + +*************************** +Indexing and selecting data +*************************** + +The axis labeling information in pandas objects serves many purposes: + +* Identifies data (i.e. provides *metadata*) using known indicators, + important for analysis, visualization, and interactive console display. +* Enables automatic and explicit data alignment. +* Allows intuitive getting and setting of subsets of the data set. + +In this section, we will focus on the final point: namely, how to slice, dice, +and generally get and set subsets of pandas objects. The primary focus will be +on Series and DataFrame as they have received more development attention in +this area. + +.. note:: + + The Python and NumPy indexing operators ``[]`` and attribute operator ``.`` + provide quick and easy access to pandas data structures across a wide range + of use cases. This makes interactive work intuitive, as there's little new + to learn if you already know how to deal with Python dictionaries and NumPy + arrays. However, since the type of the data to be accessed isn't known in + advance, directly using standard operators has some optimization limits. For + production code, we recommended that you take advantage of the optimized + pandas data access methods exposed in this chapter. + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may + depend on the context. This is sometimes called ``chained assignment`` and + should be avoided. See :ref:`Returning a View versus Copy + `. + +See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation. + +See the :ref:`cookbook` for some advanced strategies. + +.. _indexing.choice: + +Different choices for indexing +------------------------------ + +Object selection has had a number of user-requested additions in order to +support more explicit location based indexing. pandas now supports three types +of multi-axis indexing. + +* ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are: + + * A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a + *label* of the index. This use is **not** an integer position along the + index.). + * A list or array of labels ``['a', 'b', 'c']``. + * A slice object with labels ``'a':'f'`` (Note that contrary to usual Python + slices, **both** the start and the stop are included, when present in the + index! See :ref:`Slicing with labels ` + and :ref:`Endpoints are inclusive `.) + * A boolean array (any ``NA`` values will be treated as ``False``). + * A ``callable`` function with one argument (the calling Series or DataFrame) and + that returns valid output for indexing (one of the above). + + See more at :ref:`Selection by Label `. + +* ``.iloc`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. ``.iloc`` will raise ``IndexError`` if a requested + indexer is out-of-bounds, except *slice* indexers which allow + out-of-bounds indexing. (this conforms with Python/NumPy *slice* + semantics). Allowed inputs are: + + * An integer e.g. ``5``. + * A list or array of integers ``[4, 3, 0]``. + * A slice object with ints ``1:7``. + * A boolean array (any ``NA`` values will be treated as ``False``). + * A ``callable`` function with one argument (the calling Series or DataFrame) and + that returns valid output for indexing (one of the above). + + See more at :ref:`Selection by Position `, + :ref:`Advanced Indexing ` and :ref:`Advanced + Hierarchical `. + +* ``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. See more at :ref:`Selection By Callable `. + +Getting values from an object with multi-axes selection uses the following +notation (using ``.loc`` as an example, but the following applies to ``.iloc`` as +well). Any of the axes accessors may be the null slice ``:``. Axes left out of +the specification are assumed to be ``:``, e.g. ``p.loc['a']`` is equivalent to +``p.loc['a', :]``. + +.. csv-table:: + :header: "Object Type", "Indexers" + :widths: 30, 50 + :delim: ; + + Series; ``s.loc[indexer]`` + DataFrame; ``df.loc[row_indexer,column_indexer]`` + +.. _indexing.basics: + +Basics +------ + +As mentioned when introducing the data structures in the :ref:`last section +`, the primary function of indexing with ``[]`` (a.k.a. ``__getitem__`` +for those familiar with implementing class behavior in Python) is selecting out +lower-dimensional slices. The following table shows return type values when +indexing pandas objects with ``[]``: + +.. csv-table:: + :header: "Object Type", "Selection", "Return Value Type" + :widths: 30, 30, 60 + :delim: ; + + Series; ``series[label]``; scalar value + DataFrame; ``frame[colname]``; ``Series`` corresponding to colname + +Here we construct a simple time series data set to use for illustrating the +indexing functionality: + +.. ipython:: python + + dates = pd.date_range('1/1/2000', periods=8) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + df + +.. note:: + + None of the indexing functionality is time series specific unless + specifically stated. + +Thus, as per above, we have the most basic indexing using ``[]``: + +.. ipython:: python + + s = df['A'] + s[dates[5]] + +You can pass a list of columns to ``[]`` to select columns in that order. +If a column is not contained in the DataFrame, an exception will be +raised. Multiple columns can also be set in this manner: + +.. ipython:: python + + df + df[['B', 'A']] = df[['A', 'B']] + df + +You may find this useful for applying a transform (in-place) to a subset of the +columns. + +.. warning:: + + pandas aligns all AXES when setting ``Series`` and ``DataFrame`` from ``.loc``, and ``.iloc``. + + This will **not** modify ``df`` because the column alignment is before value assignment. + + .. ipython:: python + + df[['A', 'B']] + df.loc[:, ['B', 'A']] = df[['A', 'B']] + df[['A', 'B']] + + The correct way to swap column values is by using raw values: + + .. ipython:: python + + df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy() + df[['A', 'B']] + + +Attribute access +---------------- + +.. _indexing.columns.multiple: + +.. _indexing.df_cols: + +.. _indexing.attribute_access: + +You may access an index on a ``Series`` or column on a ``DataFrame`` directly +as an attribute: + +.. ipython:: python + + sa = pd.Series([1, 2, 3], index=list('abc')) + dfa = df.copy() + +.. ipython:: python + + sa.b + dfa.A + +.. ipython:: python + + sa.a = 5 + sa + dfa.A = list(range(len(dfa.index))) # ok if A already exists + dfa + dfa['A'] = list(range(len(dfa.index))) # use this form to create a new column + dfa + +.. warning:: + + - You can use this access only if the index element is a valid Python identifier, e.g. ``s.1`` is not allowed. + See `here for an explanation of valid identifiers + `__. + + - The attribute will not be available if it conflicts with an existing method name, e.g. ``s.min`` is not allowed, but ``s['min']`` is possible. + + - Similarly, the attribute will not be available if it conflicts with any of the following list: ``index``, + ``major_axis``, ``minor_axis``, ``items``. + + - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will + access the corresponding element or column. + +If you are using the IPython environment, you may also use tab-completion to +see these accessible attributes. + +You can also assign a ``dict`` to a row of a ``DataFrame``: + +.. ipython:: python + + x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) + x.iloc[1] = {'x': 9, 'y': 99} + x + +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access + In [3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Slicing ranges +-------------- + +The most robust and consistent way of slicing ranges along arbitrary axes is +described in the :ref:`Selection by Position ` section +detailing the ``.iloc`` method. For now, we explain the semantics of slicing using the ``[]`` operator. + +With Series, the syntax works exactly as with an ndarray, returning a slice of +the values and the corresponding labels: + +.. ipython:: python + + s[:5] + s[::2] + s[::-1] + +Note that setting works as well: + +.. ipython:: python + + s2 = s.copy() + s2[:5] = 0 + s2 + +With DataFrame, slicing inside of ``[]`` **slices the rows**. This is provided +largely as a convenience since it is such a common operation. + +.. ipython:: python + + df[:3] + df[::-1] + +.. _indexing.label: + +Selection by label +------------------ + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may depend on the context. + This is sometimes called ``chained assignment`` and should be avoided. + See :ref:`Returning a View versus Copy `. + +.. warning:: + + ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example + using integers in a ``DatetimeIndex``. These will raise a ``TypeError``. + + .. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) + dfl + + .. code-block:: ipython + + In [4]: dfl.loc[2:3] + TypeError: cannot do slice indexing on with these indexers [2] of + + String likes in slicing *can* be convertible to the type of the index and lead to natural slicing. + + .. ipython:: python + + dfl.loc['20130102':'20130104'] + +.. warning:: + + .. versionchanged:: 1.0.0 + + pandas will raise a ``KeyError`` if indexing with a list with missing labels. See :ref:`list-like Using loc with + missing keys in a list is Deprecated `. + +pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. +Every label asked for must be in the index, or a ``KeyError`` will be raised. +When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. +Integers are valid labels, but they refer to the label **and not the position**. + +The ``.loc`` attribute is the primary access method. The following are valid inputs: + +* A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index.). +* A list or array of labels ``['a', 'b', 'c']``. +* A slice object with labels ``'a':'f'`` (Note that contrary to usual Python + slices, **both** the start and the stop are included, when present in the + index! See :ref:`Slicing with labels `. +* A boolean array. +* A ``callable``, see :ref:`Selection By Callable `. + +.. ipython:: python + + s1 = pd.Series(np.random.randn(6), index=list('abcdef')) + s1 + s1.loc['c':] + s1.loc['b'] + +Note that setting works as well: + +.. ipython:: python + + s1.loc['c':] = 0 + s1 + +With a DataFrame: + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list('abcdef'), + columns=list('ABCD')) + df1 + df1.loc[['a', 'b', 'd'], :] + +Accessing via label slices: + +.. ipython:: python + + df1.loc['d':, 'A':'C'] + +For getting a cross section using a label (equivalent to ``df.xs('a')``): + +.. ipython:: python + + df1.loc['a'] + +For getting values with a boolean array: + +.. ipython:: python + + df1.loc['a'] > 0 + df1.loc[:, df1.loc['a'] > 0] + +NA values in a boolean array propagate as ``False``: + +.. versionchanged:: 1.0.2 + +.. ipython:: python + + mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean") + mask + df1[mask] + +For getting a value explicitly: + +.. ipython:: python + + # this is also equivalent to ``df1.at['a','A']`` + df1.loc['a', 'A'] + +.. _indexing.slicing_with_labels: + +Slicing with labels +~~~~~~~~~~~~~~~~~~~ + +When using ``.loc`` with slices, if both the start and the stop labels are +present in the index, then elements *located* between the two (including them) +are returned: + +.. ipython:: python + + s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4]) + s.loc[3:5] + +If at least one of the two is absent, but the index is sorted, and can be +compared against start and stop labels, then slicing will still work as +expected, by selecting labels which *rank* between the two: + +.. ipython:: python + + s.sort_index() + s.sort_index().loc[1:6] + +However, if at least one of the two is absent *and* the index is not sorted, an +error will be raised (since doing otherwise would be computationally expensive, +as well as potentially ambiguous for mixed type indexes). For instance, in the +above example, ``s.loc[1:6]`` would raise ``KeyError``. + +For the rationale behind this behavior, see +:ref:`Endpoints are inclusive `. + +.. ipython:: python + + s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2]) + s.loc[3:5] + +Also, if the index has duplicate labels *and* either the start or the stop label is duplicated, +an error will be raised. For instance, in the above example, ``s.loc[2:5]`` would raise a ``KeyError``. + +For more information about duplicate labels, see +:ref:`Duplicate Labels `. + +.. _indexing.integer: + +Selection by position +--------------------- + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may depend on the context. + This is sometimes called ``chained assignment`` and should be avoided. + See :ref:`Returning a View versus Copy `. + +pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely Python and NumPy slicing. These are ``0-based`` indexing. When slicing, the start bound is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. + +The ``.iloc`` attribute is the primary access method. The following are valid inputs: + +* An integer e.g. ``5``. +* A list or array of integers ``[4, 3, 0]``. +* A slice object with ints ``1:7``. +* A boolean array. +* A ``callable``, see :ref:`Selection By Callable `. + +.. ipython:: python + + s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2))) + s1 + s1.iloc[:3] + s1.iloc[3] + +Note that setting works as well: + +.. ipython:: python + + s1.iloc[:3] = 0 + s1 + +With a DataFrame: + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list(range(0, 12, 2)), + columns=list(range(0, 8, 2))) + df1 + +Select via integer slicing: + +.. ipython:: python + + df1.iloc[:3] + df1.iloc[1:5, 2:4] + +Select via integer list: + +.. ipython:: python + + df1.iloc[[1, 3, 5], [1, 3]] + +.. ipython:: python + + df1.iloc[1:3, :] + +.. ipython:: python + + df1.iloc[:, 1:3] + +.. ipython:: python + + # this is also equivalent to ``df1.iat[1,1]`` + df1.iloc[1, 1] + +For getting a cross section using an integer position (equiv to ``df.xs(1)``): + +.. ipython:: python + + df1.iloc[1] + +Out of range slice indexes are handled gracefully just as in Python/NumPy. + +.. ipython:: python + + # these are allowed in Python/NumPy. + x = list('abcdef') + x + x[4:10] + x[8:10] + s = pd.Series(x) + s + s.iloc[4:10] + s.iloc[8:10] + +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned). + +.. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + dfl + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] + dfl.iloc[4:6] + +A single indexer that is out of bounds will raise an ``IndexError``. +A list of indexers where any element is out of bounds will raise an +``IndexError``. + +.. code-block:: python + + >>> dfl.iloc[[4, 5, 6]] + IndexError: positional indexers are out-of-bounds + + >>> dfl.iloc[:, 4] + IndexError: single positional indexer is out-of-bounds + +.. _indexing.callable: + +Selection by callable +--------------------- + +``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. +The ``callable`` must be a function with one argument (the calling Series or DataFrame) that returns valid output for indexing. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list('abcdef'), + columns=list('ABCD')) + df1 + + df1.loc[lambda df: df['A'] > 0, :] + df1.loc[:, lambda df: ['A', 'B']] + + df1.iloc[:, lambda df: [0, 1]] + + df1[lambda df: df.columns[0]] + + +You can use callable indexing in ``Series``. + +.. ipython:: python + + df1['A'].loc[lambda s: s > 0] + +Using these methods / indexers, you can chain data selection operations +without using a temporary variable. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + (bb.groupby(['year', 'team']).sum(numeric_only=True) + .loc[lambda df: df['r'] > 100]) + + +.. _combining_positional_and_label_based_indexing: + +Combining positional and label-based indexing +--------------------------------------------- + +If you wish to get the 0th and the 2nd elements from the index in the 'A' column, you can do: + +.. ipython:: python + + dfd = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) + dfd + dfd.loc[dfd.index[[0, 2]], 'A'] + +This can also be expressed using ``.iloc``, by explicitly getting locations on the indexers, and using +*positional* indexing to select things. + +.. ipython:: python + + dfd.iloc[[0, 2], dfd.columns.get_loc('A')] + +For getting *multiple* indexers, using ``.get_indexer``: + +.. ipython:: python + + dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])] + + +.. _deprecate_loc_reindex_listlike: +.. _indexing.deprecate_loc_reindex_listlike: + +Indexing with list with missing labels is deprecated +---------------------------------------------------- + +.. warning:: + + .. versionchanged:: 1.0.0 + + Using ``.loc`` or ``[]`` with a list with one or more missing labels will no longer reindex, in favor of ``.reindex``. + +In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it +would raise a ``KeyError``). This behavior was changed and will now raise a ``KeyError`` if at least one label is missing. +The recommended alternative is to use ``.reindex()``. + +For example. + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + +Previous behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc with any non-matching elements will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Reindexing +~~~~~~~~~~ + +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()``. See also the section on :ref:`reindexing `. + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Alternatively, if you want to select only *valid* keys, the following is idiomatic and efficient; it is guaranteed to preserve the dtype of the selection. + +.. ipython:: python + + labels = [1, 2, 3] + s.loc[s.index.intersection(labels)] + +Having a duplicated index will raise for a ``.reindex()``: + +.. ipython:: python + + s = pd.Series(np.arange(4), index=['a', 'a', 'b', 'c']) + labels = ['c', 'd'] + +.. code-block:: ipython + + In [17]: s.reindex(labels) + ValueError: cannot reindex on an axis with duplicate labels + +Generally, you can intersect the desired labels with the current +axis, and then reindex. + +.. ipython:: python + + s.loc[s.index.intersection(labels)].reindex(labels) + +However, this would *still* raise if your resulting index is duplicated. + +.. code-block:: ipython + + In [41]: labels = ['a', 'd'] + + In [42]: s.loc[s.index.intersection(labels)].reindex(labels) + ValueError: cannot reindex on an axis with duplicate labels + + +.. _indexing.basics.partial_setting: + +Selecting random samples +------------------------ + +A random selection of rows or columns from a Series or DataFrame with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows. + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + + # When no arguments are passed, returns 1 row. + s.sample() + + # One may specify either a number of rows: + s.sample(n=3) + + # Or a fraction of the rows: + s.sample(frac=0.5) + +By default, ``sample`` will return each row at most once, but one can also sample with replacement +using the ``replace`` option: + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + + # Without replacement (default): + s.sample(n=6, replace=False) + + # With replacement: + s.sample(n=6, replace=True) + + +By default, each row has an equal probability of being selected, but if you want rows +to have different probabilities, you can pass the ``sample`` function sampling weights as +``weights``. These weights can be a list, a NumPy array, or a Series, but they must be of the same length as the object you are sampling. Missing values will be treated as a weight of zero, and inf values are not allowed. If weights do not sum to 1, they will be re-normalized by dividing all weights by the sum of the weights. For example: + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4] + s.sample(n=3, weights=example_weights) + + # Weights will be re-normalized automatically + example_weights2 = [0.5, 0, 0, 0, 0, 0] + s.sample(n=1, weights=example_weights2) + +When applied to a DataFrame, you can use a column of the DataFrame as sampling weights +(provided you are sampling rows and not columns) by simply passing the name of the column +as a string. + +.. ipython:: python + + df2 = pd.DataFrame({'col1': [9, 8, 7, 6], + 'weight_column': [0.5, 0.4, 0.1, 0]}) + df2.sample(n=3, weights='weight_column') + +``sample`` also allows users to sample columns instead of rows using the ``axis`` argument. + +.. ipython:: python + + df3 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]}) + df3.sample(n=1, axis=1) + +Finally, one can also set a seed for ``sample``'s random number generator using the ``random_state`` argument, which will accept either an integer (as a seed) or a NumPy RandomState object. + +.. ipython:: python + + df4 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]}) + + # With a given seed, the sample will always draw the same rows. + df4.sample(n=2, random_state=2) + df4.sample(n=2, random_state=2) + + + +Setting with enlargement +------------------------ + +The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis. + +In the ``Series`` case this is effectively an appending operation. + +.. ipython:: python + + se = pd.Series([1, 2, 3]) + se + se[5] = 5. + se + +A ``DataFrame`` can be enlarged on either axis via ``.loc``. + +.. ipython:: python + + dfi = pd.DataFrame(np.arange(6).reshape(3, 2), + columns=['A', 'B']) + dfi + dfi.loc[:, 'C'] = dfi.loc[:, 'A'] + dfi + +This is like an ``append`` operation on the ``DataFrame``. + +.. ipython:: python + + dfi.loc[3] = 5 + dfi + +.. _indexing.basics.get_value: + +Fast scalar value getting and setting +------------------------------------- + +Since indexing with ``[]`` must handle a lot of cases (single-label access, +slicing, boolean indexing, etc.), it has a bit of overhead in order to figure +out what you're asking for. If you only want to access a scalar value, the +fastest way is to use the ``at`` and ``iat`` methods, which are implemented on +all of the data structures. + +Similarly to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analogously to ``iloc`` + +.. ipython:: python + + s.iat[5] + df.at[dates[5], 'A'] + df.iat[3, 0] + +You can also set using these same indexers. + +.. ipython:: python + + df.at[dates[5], 'E'] = 7 + df.iat[3, 0] = 7 + +``at`` may enlarge the object in-place as above if the indexer is missing. + +.. ipython:: python + + df.at[dates[-1] + pd.Timedelta('1 day'), 0] = 7 + df + +Boolean indexing +---------------- + +.. _indexing.boolean: + +Another common operation is the use of boolean vectors to filter the data. +The operators are: ``|`` for ``or``, ``&`` for ``and``, and ``~`` for ``not``. +These **must** be grouped by using parentheses, since by default Python will +evaluate an expression such as ``df['A'] > 2 & df['B'] < 3`` as +``df['A'] > (2 & df['B']) < 3``, while the desired evaluation order is +``(df['A'] > 2) & (df['B'] < 3)``. + +Using a boolean vector to index a Series works exactly as in a NumPy ndarray: + +.. ipython:: python + + s = pd.Series(range(-3, 4)) + s + s[s > 0] + s[(s < -1) | (s > 0.5)] + s[~(s < 0)] + +You may select rows from a DataFrame using a boolean vector the same length as +the DataFrame's index (for example, something derived from one of the columns +of the DataFrame): + +.. ipython:: python + + df[df['A'] > 0] + +List comprehensions and the ``map`` method of Series can also be used to produce +more complex criteria: + +.. ipython:: python + + df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'], + 'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'], + 'c': np.random.randn(7)}) + + # only want 'two' or 'three' + criterion = df2['a'].map(lambda x: x.startswith('t')) + + df2[criterion] + + # equivalent but slower + df2[[x.startswith('t') for x in df2['a']]] + + # Multiple criteria + df2[criterion & (df2['b'] == 'x')] + +With the choice methods :ref:`Selection by Label `, :ref:`Selection by Position `, +and :ref:`Advanced Indexing ` you may select along more than one axis using boolean vectors combined with other indexing expressions. + +.. ipython:: python + + df2.loc[criterion & (df2['b'] == 'x'), 'b':'c'] + +.. warning:: + + ``iloc`` supports two kinds of boolean indexing. If the indexer is a boolean ``Series``, + an error will be raised. For instance, in the following example, ``df.iloc[s.values, 1]`` is ok. + The boolean indexer is an array. But ``df.iloc[s, 1]`` would raise ``ValueError``. + + .. ipython:: python + + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], + index=list('abc'), + columns=['A', 'B']) + s = (df['A'] > 2) + s + + df.loc[s, 'B'] + + df.iloc[s.values, 1] + +.. _indexing.basics.indexing_isin: + +Indexing with isin +------------------ + +Consider the :meth:`~Series.isin` method of ``Series``, which returns a boolean +vector that is true wherever the ``Series`` elements exist in the passed list. +This allows you to select rows where one or more columns have values you want: + +.. ipython:: python + + s = pd.Series(np.arange(5), index=np.arange(5)[::-1], dtype='int64') + s + s.isin([2, 4, 6]) + s[s.isin([2, 4, 6])] + +The same method is available for ``Index`` objects and is useful for the cases +when you don't know which of the sought labels are in fact present: + +.. ipython:: python + + s[s.index.isin([2, 4, 6])] + + # compare it to the following + s.reindex([2, 4, 6]) + +In addition to that, ``MultiIndex`` allows selecting a separate level to use +in the membership check: + +.. ipython:: python + + s_mi = pd.Series(np.arange(6), + index=pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']])) + s_mi + s_mi.iloc[s_mi.index.isin([(1, 'a'), (2, 'b'), (0, 'c')])] + s_mi.iloc[s_mi.index.isin(['a', 'c', 'e'], level=1)] + +DataFrame also has an :meth:`~DataFrame.isin` method. When calling ``isin``, pass a set of +values as either an array or dict. If values is an array, ``isin`` returns +a DataFrame of booleans that is the same shape as the original DataFrame, with True +wherever the element is in the sequence of values. + +.. ipython:: python + + df = pd.DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], + 'ids2': ['a', 'n', 'c', 'n']}) + + values = ['a', 'b', 1, 3] + + df.isin(values) + +Oftentimes you'll want to match certain values with certain columns. +Just make values a ``dict`` where the key is the column, and the value is +a list of items you want to check for. + +.. ipython:: python + + values = {'ids': ['a', 'b'], 'vals': [1, 3]} + + df.isin(values) + +To return the DataFrame of booleans where the values are *not* in the original DataFrame, +use the ``~`` operator: + +.. ipython:: python + + values = {'ids': ['a', 'b'], 'vals': [1, 3]} + + ~df.isin(values) + +Combine DataFrame's ``isin`` with the ``any()`` and ``all()`` methods to +quickly select subsets of your data that meet a given criteria. +To select a row where each column meets its own criterion: + +.. ipython:: python + + values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]} + + row_mask = df.isin(values).all(1) + + df[row_mask] + +.. _indexing.where_mask: + +The :meth:`~pandas.DataFrame.where` Method and Masking +------------------------------------------------------ + +Selecting values from a Series with a boolean vector generally returns a +subset of the data. To guarantee that selection output has the same shape as +the original data, you can use the ``where`` method in ``Series`` and ``DataFrame``. + +To return only the selected rows: + +.. ipython:: python + + s[s > 0] + +To return a Series of the same shape as the original: + +.. ipython:: python + + s.where(s > 0) + +Selecting values from a DataFrame with a boolean criterion now also preserves +input data shape. ``where`` is used under the hood as the implementation. +The code below is equivalent to ``df.where(df < 0)``. + +.. ipython:: python + :suppress: + + dates = pd.date_range('1/1/2000', periods=8) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + +.. ipython:: python + + df[df < 0] + +In addition, ``where`` takes an optional ``other`` argument for replacement of +values where the condition is False, in the returned copy. + +.. ipython:: python + + df.where(df < 0, -df) + +You may wish to set values based on some boolean criteria. +This can be done intuitively like so: + +.. ipython:: python + + s2 = s.copy() + s2[s2 < 0] = 0 + s2 + + df2 = df.copy() + df2[df2 < 0] = 0 + df2 + +By default, ``where`` returns a modified copy of the data. There is an +optional parameter ``inplace`` so that the original data can be modified +without creating a copy: + +.. ipython:: python + + df_orig = df.copy() + df_orig.where(df > 0, -df, inplace=True) + df_orig + +.. note:: + + The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. + Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. + + .. ipython:: python + + df.where(df < 0, -df) == np.where(df < 0, df, -df) + +**Alignment** + +Furthermore, ``where`` aligns the input boolean condition (ndarray or DataFrame), +such that partial selection with setting is possible. This is analogous to +partial setting via ``.loc`` (but on the contents rather than the axis labels). + +.. ipython:: python + + df2 = df.copy() + df2[df2[1:4] > 0] = 3 + df2 + +Where can also accept ``axis`` and ``level`` parameters to align the input when +performing the ``where``. + +.. ipython:: python + + df2 = df.copy() + df2.where(df2 > 0, df2['A'], axis='index') + +This is equivalent to (but faster than) the following. + +.. ipython:: python + + df2 = df.copy() + df.apply(lambda x, y: x.where(x > 0, y), y=df['A']) + +``where`` can accept a callable as condition and ``other`` arguments. The function must +be with one argument (the calling Series or DataFrame) and that returns valid output +as condition and ``other`` argument. + +.. ipython:: python + + df3 = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6], + 'C': [7, 8, 9]}) + df3.where(lambda x: x > 4, lambda x: x + 10) + +Mask +~~~~ + +:meth:`~pandas.DataFrame.mask` is the inverse boolean operation of ``where``. + +.. ipython:: python + + s.mask(s >= 0) + df.mask(df >= 0) + +.. _indexing.np_where: + +Setting with enlargement conditionally using :func:`numpy` +---------------------------------------------------------- + +An alternative to :meth:`~pandas.DataFrame.where` is to use :func:`numpy.where`. +Combined with setting a new column, you can use it to enlarge a DataFrame where the +values are determined conditionally. + +Consider you have two choices to choose from in the following DataFrame. And you want to +set a new column color to 'green' when the second column has 'Z'. You can do the +following: + +.. ipython:: python + + df = pd.DataFrame({'col1': list('ABBC'), 'col2': list('ZZXY')}) + df['color'] = np.where(df['col2'] == 'Z', 'green', 'red') + df + +If you have multiple conditions, you can use :func:`numpy.select` to achieve that. Say +corresponding to three conditions there are three choice of colors, with a fourth color +as a fallback, you can do the following. + +.. ipython:: python + + conditions = [ + (df['col2'] == 'Z') & (df['col1'] == 'A'), + (df['col2'] == 'Z') & (df['col1'] == 'B'), + (df['col1'] == 'B') + ] + choices = ['yellow', 'blue', 'purple'] + df['color'] = np.select(conditions, choices, default='black') + df + +.. _indexing.query: + +The :meth:`~pandas.DataFrame.query` Method +------------------------------------------ + +:class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` +method that allows selection using an expression. + +You can get the value of the frame where column ``b`` has values +between the values of columns ``a`` and ``c``. For example: + +.. ipython:: python + + n = 10 + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df + + # pure python + df[(df['a'] < df['b']) & (df['b'] < df['c'])] + + # query + df.query('(a < b) & (b < c)') + +Do the same thing but fall back on a named index if there is no column +with the name ``a``. + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n / 2, size=(n, 2)), columns=list('bc')) + df.index.name = 'a' + df + df.query('a < b and b < c') + +If instead you don't want to or cannot name your index, you can use the name +``index`` in your query expression: + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n, size=(n, 2)), columns=list('bc')) + df + df.query('index < b < c') + +.. note:: + + If the name of your index overlaps with a column name, the column name is + given precedence. For example, + + .. ipython:: python + + df = pd.DataFrame({'a': np.random.randint(5, size=5)}) + df.index.name = 'a' + df.query('a > 2') # uses the column 'a', not the index + + You can still use the index in a query expression by using the special + identifier 'index': + + .. ipython:: python + + df.query('index > 2') + + If for some reason you have a column named ``index``, then you can refer to + the index as ``ilevel_0`` as well, but at this point you should consider + renaming your columns to something less ambiguous. + + +:class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also use the levels of a ``DataFrame`` with a +:class:`~pandas.MultiIndex` as if they were columns in the frame: + +.. ipython:: python + + n = 10 + colors = np.random.choice(['red', 'green'], size=n) + foods = np.random.choice(['eggs', 'ham'], size=n) + colors + foods + + index = pd.MultiIndex.from_arrays([colors, foods], names=['color', 'food']) + df = pd.DataFrame(np.random.randn(n, 2), index=index) + df + df.query('color == "red"') + +If the levels of the ``MultiIndex`` are unnamed, you can refer to them using +special names: + +.. ipython:: python + + df.index.names = [None, None] + df + df.query('ilevel_0 == "red"') + + +The convention is ``ilevel_0``, which means "index level 0" for the 0th level +of the ``index``. + + +:meth:`~pandas.DataFrame.query` Use Cases +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A use case for :meth:`~pandas.DataFrame.query` is when you have a collection of +:class:`~pandas.DataFrame` objects that have a subset of column names (or index +levels/names) in common. You can pass the same query to both frames *without* +having to specify which frame you're interested in querying + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df + df2 = pd.DataFrame(np.random.rand(n + 2, 3), columns=df.columns) + df2 + expr = '0.0 <= a <= c <= 0.5' + map(lambda frame: frame.query(expr), [df, df2]) + +:meth:`~pandas.DataFrame.query` Python versus pandas Syntax Comparison +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Full numpy-like syntax: + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=list('abc')) + df + df.query('(a < b) & (b < c)') + df[(df['a'] < df['b']) & (df['b'] < df['c'])] + +Slightly nicer by removing the parentheses (comparison operators bind tighter +than ``&`` and ``|``): + +.. ipython:: python + + df.query('a < b & b < c') + +Use English instead of symbols: + +.. ipython:: python + + df.query('a < b and b < c') + +Pretty close to how you might write it on paper: + +.. ipython:: python + + df.query('a < b < c') + +The ``in`` and ``not in`` operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~pandas.DataFrame.query` also supports special use of Python's ``in`` and +``not in`` comparison operators, providing a succinct syntax for calling the +``isin`` method of a ``Series`` or ``DataFrame``. + +.. ipython:: python + + # get all rows where columns "a" and "b" have overlapping values + df = pd.DataFrame({'a': list('aabbccddeeff'), 'b': list('aaaabbbbcccc'), + 'c': np.random.randint(5, size=12), + 'd': np.random.randint(9, size=12)}) + df + df.query('a in b') + + # How you'd do it in pure Python + df[df['a'].isin(df['b'])] + + df.query('a not in b') + + # pure Python + df[~df['a'].isin(df['b'])] + + +You can combine this with other expressions for very succinct queries: + + +.. ipython:: python + + # rows where cols a and b have overlapping values + # and col c's values are less than col d's + df.query('a in b and c < d') + + # pure Python + df[df['b'].isin(df['a']) & (df['c'] < df['d'])] + + +.. note:: + + Note that ``in`` and ``not in`` are evaluated in Python, since ``numexpr`` + has no equivalent of this operation. However, **only the** ``in``/``not in`` + **expression itself** is evaluated in vanilla Python. For example, in the + expression + + .. code-block:: python + + df.query('a in b + c + d') + + ``(b + c + d)`` is evaluated by ``numexpr`` and *then* the ``in`` + operation is evaluated in plain Python. In general, any operations that can + be evaluated using ``numexpr`` will be. + +Special use of the ``==`` operator with ``list`` objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Comparing a ``list`` of values to a column using ``==``/``!=`` works similarly +to ``in``/``not in``. + +.. ipython:: python + + df.query('b == ["a", "b", "c"]') + + # pure Python + df[df['b'].isin(["a", "b", "c"])] + + df.query('c == [1, 2]') + + df.query('c != [1, 2]') + + # using in/not in + df.query('[1, 2] in c') + + df.query('[1, 2] not in c') + + # pure Python + df[df['c'].isin([1, 2])] + + +Boolean operators +~~~~~~~~~~~~~~~~~ + +You can negate boolean expressions with the word ``not`` or the ``~`` operator. + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df['bools'] = np.random.rand(len(df)) > 0.5 + df.query('~bools') + df.query('not bools') + df.query('not bools') == df[~df['bools']] + +Of course, expressions can be arbitrarily complex too: + +.. ipython:: python + + # short query syntax + shorter = df.query('a < b < c and (not bools) or bools > 2') + + # equivalent in pure Python + longer = df[(df['a'] < df['b']) + & (df['b'] < df['c']) + & (~df['bools']) + | (df['bools'] > 2)] + + shorter + longer + + shorter == longer + + +Performance of :meth:`~pandas.DataFrame.query` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for +large frames. + +.. image:: ../_static/query-perf.png + +.. note:: + + You will only see the performance benefits of using the ``numexpr`` engine + with ``DataFrame.query()`` if your frame has more than approximately 200,000 + rows. + + .. image:: ../_static/query-perf-small.png + +This plot was created using a ``DataFrame`` with 3 columns each containing +floating point values generated using ``numpy.random.randn()``. + +.. ipython:: python + :suppress: + + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + df2 = df.copy() + + +Duplicate data +-------------- + +.. _indexing.duplicate: + +If you want to identify and remove duplicate rows in a DataFrame, there are +two methods that will help: ``duplicated`` and ``drop_duplicates``. Each +takes as an argument the columns to use to identify duplicated rows. + +* ``duplicated`` returns a boolean vector whose length is the number of rows, and which indicates whether a row is duplicated. +* ``drop_duplicates`` removes duplicate rows. + +By default, the first observed row of a duplicate set is considered unique, but +each method has a ``keep`` parameter to specify targets to be kept. + +* ``keep='first'`` (default): mark / drop duplicates except for the first occurrence. +* ``keep='last'``: mark / drop duplicates except for the last occurrence. +* ``keep=False``: mark / drop all duplicates. + +.. ipython:: python + + df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'two', 'two', 'three', 'four'], + 'b': ['x', 'y', 'x', 'y', 'x', 'x', 'x'], + 'c': np.random.randn(7)}) + df2 + df2.duplicated('a') + df2.duplicated('a', keep='last') + df2.duplicated('a', keep=False) + df2.drop_duplicates('a') + df2.drop_duplicates('a', keep='last') + df2.drop_duplicates('a', keep=False) + +Also, you can pass a list of columns to identify duplications. + +.. ipython:: python + + df2.duplicated(['a', 'b']) + df2.drop_duplicates(['a', 'b']) + +To drop duplicates by index value, use ``Index.duplicated`` then perform slicing. +The same set of options are available for the ``keep`` parameter. + +.. ipython:: python + + df3 = pd.DataFrame({'a': np.arange(6), + 'b': np.random.randn(6)}, + index=['a', 'a', 'b', 'c', 'b', 'a']) + df3 + df3.index.duplicated() + df3[~df3.index.duplicated()] + df3[~df3.index.duplicated(keep='last')] + df3[~df3.index.duplicated(keep=False)] + +.. _indexing.dictionarylike: + +Dictionary-like :meth:`~pandas.DataFrame.get` method +---------------------------------------------------- + +Each of Series or DataFrame have a ``get`` method which can return a +default value. + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s.get('a') # equivalent to s['a'] + s.get('x', default=-1) + +.. _indexing.lookup: + +Looking up values by index/column labels +---------------------------------------- + +Sometimes you want to extract a set of values given a sequence of row labels +and column labels, this can be achieved by ``pandas.factorize`` and NumPy indexing. +For instance: + +.. ipython:: python + + df = pd.DataFrame({'col': ["A", "A", "B", "B"], + 'A': [80, 23, np.nan, 22], + 'B': [80, 55, 76, 67]}) + df + idx, cols = pd.factorize(df['col']) + df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx] + +Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method +which was deprecated in version 1.2.0. + +.. _indexing.class: + +Index objects +------------- + +The pandas :class:`~pandas.Index` class and its subclasses can be viewed as +implementing an *ordered multiset*. Duplicates are allowed. However, if you try +to convert an :class:`~pandas.Index` object with duplicate entries into a +``set``, an exception will be raised. + +:class:`~pandas.Index` also provides the infrastructure necessary for +lookups, data alignment, and reindexing. The easiest way to create an +:class:`~pandas.Index` directly is to pass a ``list`` or other sequence to +:class:`~pandas.Index`: + +.. ipython:: python + + index = pd.Index(['e', 'd', 'a', 'b']) + index + 'd' in index + +You can also pass a ``name`` to be stored in the index: + + +.. ipython:: python + + index = pd.Index(['e', 'd', 'a', 'b'], name='something') + index.name + +The name, if set, will be shown in the console display: + +.. ipython:: python + + index = pd.Index(list(range(5)), name='rows') + columns = pd.Index(['A', 'B', 'C'], name='cols') + df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=columns) + df + df['A'] + +.. _indexing.set_metadata: + +Setting metadata +~~~~~~~~~~~~~~~~ + +Indexes are "mostly immutable", but it is possible to set and change their +``name`` attribute. You can use the ``rename``, ``set_names`` to set these attributes +directly, and they default to returning a copy. + +See :ref:`Advanced Indexing ` for usage of MultiIndexes. + +.. ipython:: python + + ind = pd.Index([1, 2, 3]) + ind.rename("apple") + ind + ind.set_names(["apple"], inplace=True) + ind.name = "bob" + ind + +``set_names``, ``set_levels``, and ``set_codes`` also take an optional +``level`` argument + +.. ipython:: python + + index = pd.MultiIndex.from_product([range(3), ['one', 'two']], names=['first', 'second']) + index + index.levels[1] + index.set_levels(["a", "b"], level=1) + +.. _indexing.set_ops: + +Set operations on Index objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The two main operations are ``union`` and ``intersection``. +Difference is provided via the ``.difference()`` method. + +.. ipython:: python + + a = pd.Index(['c', 'b', 'a']) + b = pd.Index(['c', 'e', 'd']) + a.difference(b) + +Also available is the ``symmetric_difference`` operation, which returns elements +that appear in either ``idx1`` or ``idx2``, but not in both. This is +equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``, +with duplicates dropped. + +.. ipython:: python + + idx1 = pd.Index([1, 2, 3, 4]) + idx2 = pd.Index([2, 3, 4, 5]) + idx1.symmetric_difference(idx2) + +.. note:: + + The resulting index from a set operation will be sorted in ascending order. + +When performing :meth:`Index.union` between indexes with different dtypes, the indexes +must be cast to a common dtype. Typically, though not always, this is object dtype. The +exception is when performing a union between integer and float data. In this case, the +integer values are converted to float + +.. ipython:: python + + idx1 = pd.Index([0, 1, 2]) + idx2 = pd.Index([0.5, 1.5]) + idx1.union(idx2) + +.. _indexing.missing: + +Missing values +~~~~~~~~~~~~~~ + +.. important:: + + Even though ``Index`` can hold missing values (``NaN``), it should be avoided + if you do not want any unexpected results. For example, some operations + exclude missing values implicitly. + +``Index.fillna`` fills missing values with specified scalar value. + +.. ipython:: python + + idx1 = pd.Index([1, np.nan, 3, 4]) + idx1 + idx1.fillna(2) + + idx2 = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2011-01-03')]) + idx2 + idx2.fillna(pd.Timestamp('2011-01-02')) + +Set / reset index +----------------- + +Occasionally you will load or create a data set into a DataFrame and want to +add an index after you've already done so. There are a couple of different +ways. + +.. _indexing.set_index: + +Set an index +~~~~~~~~~~~~ + +DataFrame has a :meth:`~DataFrame.set_index` method which takes a column name +(for a regular ``Index``) or a list of column names (for a ``MultiIndex``). +To create a new, re-indexed DataFrame: + +.. ipython:: python + :suppress: + + data = pd.DataFrame({'a': ['bar', 'bar', 'foo', 'foo'], + 'b': ['one', 'two', 'one', 'two'], + 'c': ['z', 'y', 'x', 'w'], + 'd': [1., 2., 3, 4]}) + +.. ipython:: python + + data + indexed1 = data.set_index('c') + indexed1 + indexed2 = data.set_index(['a', 'b']) + indexed2 + +The ``append`` keyword option allow you to keep the existing index and append +the given columns to a MultiIndex: + +.. ipython:: python + + frame = data.set_index('c', drop=False) + frame = frame.set_index(['a', 'b'], append=True) + frame + +Other options in ``set_index`` allow you not drop the index columns or to add +the index in-place (without creating a new object): + +.. ipython:: python + + data.set_index('c', drop=False) + data.set_index(['a', 'b'], inplace=True) + data + +Reset the index +~~~~~~~~~~~~~~~ + +As a convenience, there is a new function on DataFrame called +:meth:`~DataFrame.reset_index` which transfers the index values into the +DataFrame's columns and sets a simple integer index. +This is the inverse operation of :meth:`~DataFrame.set_index`. + + +.. ipython:: python + + data + data.reset_index() + +The output is more similar to a SQL table or a record array. The names for the +columns derived from the index are the ones stored in the ``names`` attribute. + +You can use the ``level`` keyword to remove only a portion of the index: + +.. ipython:: python + + frame + frame.reset_index(level=1) + + +``reset_index`` takes an optional parameter ``drop`` which if true simply +discards the index, instead of putting index values in the DataFrame's columns. + +Adding an ad hoc index +~~~~~~~~~~~~~~~~~~~~~~ + +If you create an index yourself, you can just assign it to the ``index`` field: + +.. code-block:: python + + data.index = index + +.. _indexing.view_versus_copy: + +Returning a view versus a copy +------------------------------ + +When setting values in a pandas object, care must be taken to avoid what is called +``chained indexing``. Here is an example. + +.. ipython:: python + + dfmi = pd.DataFrame([list('abcd'), + list('efgh'), + list('ijkl'), + list('mnop')], + columns=pd.MultiIndex.from_product([['one', 'two'], + ['first', 'second']])) + dfmi + +Compare these two access methods: + +.. ipython:: python + + dfmi['one']['second'] + +.. ipython:: python + + dfmi.loc[:, ('one', 'second')] + +These both yield the same results, so which should you use? It is instructive to understand the order +of operations on these and why method 2 (``.loc``) is much preferred over method 1 (chained ``[]``). + +``dfmi['one']`` selects the first level of the columns and returns a DataFrame that is singly-indexed. +Then another Python operation ``dfmi_with_one['second']`` selects the series indexed by ``'second'``. +This is indicated by the variable ``dfmi_with_one`` because pandas sees these operations as separate events. +e.g. separate calls to ``__getitem__``, so it has to treat them as linear operations, they happen one after another. + +Contrast this to ``df.loc[:,('one','second')]`` which passes a nested tuple of ``(slice(None),('one','second'))`` to a single call to +``__getitem__``. This allows pandas to deal with this as a single entity. Furthermore this order of operations *can* be significantly +faster, and allows one to index *both* axes if so desired. + +Why does assignment fail when using chained indexing? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The problem in the previous section is just a performance issue. What's up with +the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when +you do something that might cost a few extra milliseconds! + +But it turns out that assigning to the product of chained indexing has +inherently unpredictable results. To see this, think about how the Python +interpreter executes this code: + +.. ipython:: python + :suppress: + + value = None + +.. code-block:: python + + dfmi.loc[:, ('one', 'second')] = value + # becomes + dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) + +But this code is handled differently: + +.. code-block:: python + + dfmi['one']['second'] = value + # becomes + dfmi.__getitem__('one').__setitem__('second', value) + +See that ``__getitem__`` in there? Outside of simple cases, it's very hard to +predict whether it will return a view or a copy (it depends on the memory layout +of the array, about which pandas makes no guarantees), and therefore whether +the ``__setitem__`` will modify ``dfmi`` or a temporary object that gets thrown +out immediately afterward. **That's** what ``SettingWithCopy`` is warning you +about! + +.. note:: You may be wondering whether we should be concerned about the ``loc`` + property in the first example. But ``dfmi.loc`` is guaranteed to be ``dfmi`` + itself with modified indexing behavior, so ``dfmi.loc.__getitem__`` / + ``dfmi.loc.__setitem__`` operate on ``dfmi`` directly. Of course, + ``dfmi.loc.__getitem__(idx)`` may be a view or a copy of ``dfmi``. + +Sometimes a ``SettingWithCopy`` warning will arise at times when there's no +obvious chained indexing going on. **These** are the bugs that +``SettingWithCopy`` is designed to catch! pandas is probably trying to warn you +that you've done this: + +.. code-block:: python + + def do_something(df): + foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! + # ... many lines here ... + # We don't know whether this will modify df or not! + foo['quux'] = value + return foo + +Yikes! + +.. _indexing.evaluation_order: + +Evaluation order matters +~~~~~~~~~~~~~~~~~~~~~~~~ + +When you use chained indexing, the order and type of the indexing operation +partially determine whether the result is a slice into the original object, or +a copy of the slice. + +pandas has the ``SettingWithCopyWarning`` because assigning to a copy of a +slice is frequently not intentional, but a mistake caused by chained indexing +returning a copy where a slice was expected. + +If you would like pandas to be more or less trusting about assignment to a +chained indexing expression, you can set the :ref:`option ` +``mode.chained_assignment`` to one of these values: + +* ``'warn'``, the default, means a ``SettingWithCopyWarning`` is printed. +* ``'raise'`` means pandas will raise a ``SettingWithCopyError`` + you have to deal with. +* ``None`` will suppress the warnings entirely. + +.. ipython:: python + :okwarning: + + dfb = pd.DataFrame({'a': ['one', 'one', 'two', + 'three', 'two', 'one', 'six'], + 'c': np.arange(7)}) + + # This will show the SettingWithCopyWarning + # but the frame values will be set + dfb['c'][dfb['a'].str.startswith('o')] = 42 + +This however is operating on a copy and will not work. + +:: + + >>> pd.set_option('mode.chained_assignment','warn') + >>> dfb[dfb['a'].str.startswith('o')]['c'] = 42 + Traceback (most recent call last) + ... + SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + +A chained assignment can also crop up in setting in a mixed dtype frame. + +.. note:: + + These setting rules apply to all of ``.loc/.iloc``. + +The following is the recommended access method using ``.loc`` for multiple items (using ``mask``) and a single item using a fixed index: + +.. ipython:: python + + dfc = pd.DataFrame({'a': ['one', 'one', 'two', + 'three', 'two', 'one', 'six'], + 'c': np.arange(7)}) + dfd = dfc.copy() + # Setting multiple items using a mask + mask = dfd['a'].str.startswith('o') + dfd.loc[mask, 'c'] = 42 + dfd + + # Setting a single item + dfd = dfc.copy() + dfd.loc[2, 'a'] = 11 + dfd + +The following *can* work at times, but it is not guaranteed to, and therefore should be avoided: + +.. ipython:: python + :okwarning: + + dfd = dfc.copy() + dfd['a'][2] = 111 + dfd + +Last, the subsequent example will **not** work at all, and so should be avoided: + +:: + + >>> pd.set_option('mode.chained_assignment','raise') + >>> dfd.loc[0]['a'] = 1111 + Traceback (most recent call last) + ... + SettingWithCopyError: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + +.. warning:: + + The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid + assignment. There may be false positives; situations where a chained assignment is inadvertently + reported. diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst new file mode 100644 index 00000000..fe732dac --- /dev/null +++ b/doc/source/user_guide/integer_na.rst @@ -0,0 +1,151 @@ +.. currentmodule:: pandas + +{{ header }} + +.. _integer_na: + +************************** +Nullable integer data type +************************** + +.. note:: + + IntegerArray is currently experimental. Its API or implementation may + change without warning. + +.. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as the missing value rather + than :attr:`numpy.nan`. + +In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent +missing data. Because ``NaN`` is a float, this forces an array of integers with +any missing values to become floating point. In some cases, this may not matter +much. But if your integer column is, say, an identifier, casting to float can +be problematic. Some integers cannot even be represented as floating point +numbers. + +Construction +------------ + +pandas can represent integer data with possibly missing values using +:class:`arrays.IntegerArray`. This is an :ref:`extension type ` +implemented within pandas. + +.. ipython:: python + + arr = pd.array([1, 2, None], dtype=pd.Int64Dtype()) + arr + +Or the string alias ``"Int64"`` (note the capital ``"I"``, to differentiate from +NumPy's ``'int64'`` dtype: + +.. ipython:: python + + pd.array([1, 2, np.nan], dtype="Int64") + +All NA-like values are replaced with :attr:`pandas.NA`. + +.. ipython:: python + + pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64") + +This array can be stored in a :class:`DataFrame` or :class:`Series` like any +NumPy array. + +.. ipython:: python + + pd.Series(arr) + +You can also pass the list-like object to the :class:`Series` constructor +with the dtype. + +.. warning:: + + Currently :meth:`pandas.array` and :meth:`pandas.Series` use different + rules for dtype inference. :meth:`pandas.array` will infer a nullable- + integer dtype + + .. ipython:: python + + pd.array([1, None]) + pd.array([1, 2]) + + For backwards-compatibility, :class:`Series` infers these as either + integer or float dtype + + .. ipython:: python + + pd.Series([1, None]) + pd.Series([1, 2]) + + We recommend explicitly providing the dtype to avoid confusion. + + .. ipython:: python + + pd.array([1, None], dtype="Int64") + pd.Series([1, None], dtype="Int64") + + In the future, we may provide an option for :class:`Series` to infer a + nullable-integer dtype. + +Operations +---------- + +Operations involving an integer array will behave similar to NumPy arrays. +Missing values will be propagated, and the data will be coerced to another +dtype if needed. + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + + # arithmetic + s + 1 + + # comparison + s == 1 + + # indexing + s.iloc[1:3] + + # operate with other dtypes + s + s.iloc[1:3].astype("Int8") + + # coerce when needed + s + 0.01 + +These dtypes can operate as part of ``DataFrame``. + +.. ipython:: python + + df = pd.DataFrame({"A": s, "B": [1, 1, 3], "C": list("aab")}) + df + df.dtypes + + +These dtypes can be merged & reshaped & casted. + +.. ipython:: python + + pd.concat([df[["A"]], df[["B", "C"]]], axis=1).dtypes + df["A"].astype(float) + +Reduction and groupby operations such as 'sum' work as well. + +.. ipython:: python + + df.sum() + df.groupby("B").A.sum() + +Scalar NA Value +--------------- + +:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar +missing value. Slicing a single element that's missing will return +:attr:`pandas.NA` + +.. ipython:: python + + a = pd.array([1, None], dtype="Int64") + a[1] diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst new file mode 100644 index 00000000..7a7e518e --- /dev/null +++ b/doc/source/user_guide/io.rst @@ -0,0 +1,6601 @@ +.. _io: + +.. currentmodule:: pandas + + +=============================== +IO tools (text, CSV, HDF5, ...) +=============================== + +The pandas I/O API is a set of top level ``reader`` functions accessed like +:func:`pandas.read_csv` that generally return a pandas object. The corresponding +``writer`` functions are object methods that are accessed like +:meth:`DataFrame.to_csv`. Below is a table containing available ``readers`` and +``writers``. + +.. csv-table:: + :header: "Format Type", "Data Description", "Reader", "Writer" + :widths: 30, 100, 60, 60 + :delim: ; + + text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` + text;Fixed-Width Text File;:ref:`read_fwf` + text;`JSON `__;:ref:`read_json`;:ref:`to_json` + text;`HTML `__;:ref:`read_html`;:ref:`to_html` + text;`LaTeX `__;;:ref:`Styler.to_latex` + text;`XML `__;:ref:`read_xml`;:ref:`to_xml` + text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` + binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` + binary;`OpenDocument `__;:ref:`read_excel`; + binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` + binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` + binary;`ORC Format `__;:ref:`read_orc`;:ref:`to_orc` + binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` + binary;`SAS `__;:ref:`read_sas`; + binary;`SPSS `__;:ref:`read_spss`; + binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` + SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` + SQL;`Google BigQuery `__;:ref:`read_gbq`;:ref:`to_gbq` + +:ref:`Here ` is an informal performance comparison for some of these IO methods. + +.. note:: + For examples that use the ``StringIO`` class, make sure you import it + with ``from io import StringIO`` for Python 3. + +.. _io.read_csv_table: + +CSV & text files +---------------- + +The workhorse function for reading text files (a.k.a. flat files) is +:func:`read_csv`. See the :ref:`cookbook` for some advanced strategies. + +Parsing options +''''''''''''''' + +:func:`read_csv` accepts the following common arguments: + +Basic ++++++ + +filepath_or_buffer : various + Either a path to a file (a :class:`python:str`, :class:`python:pathlib.Path`, + or :class:`py:py._path.local.LocalPath`), URL (including http, ftp, and S3 + locations), or any object with a ``read()`` method (such as an open file or + :class:`~python:io.StringIO`). +sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` + Delimiter to use. If sep is ``None``, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will be + used and automatically detect the separator by Python's builtin sniffer tool, + :class:`python:csv.Sniffer`. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. +delimiter : str, default ``None`` + Alternative argument name for sep. +delim_whitespace : boolean, default False + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) + will be used as the delimiter. Equivalent to setting ``sep='\s+'``. + If this option is set to ``True``, nothing should be passed in for the + ``delimiter`` parameter. + +Column and index locations and names +++++++++++++++++++++++++++++++++++++ + +header : int or list of ints, default ``'infer'`` + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first line of the file, if column names are + passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to replace + existing names. + + The header can be a list of ints that specify row locations + for a MultiIndex on the columns e.g. ``[0,1,3]``. Intervening rows + that are not specified will be skipped (e.g. 2 in this example is + skipped). Note that this parameter ignores commented lines and empty + lines if ``skip_blank_lines=True``, so header=0 denotes the first + line of data rather than the first line of the file. +names : array-like, default ``None`` + List of column names to use. If file contains no header row, then you should + explicitly pass ``header=None``. Duplicates in this list are not allowed. +index_col : int, str, sequence of int / str, or False, optional, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + .. note:: + ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. + + The default value of ``None`` instructs pandas to guess. If the number of + fields in the column header row is equal to the number of fields in the body + of the data file, then a default index is used. If it is larger, then + the first columns are used as index so that the remaining number of fields in + the body are equal to the number of fields in the header. + + The first row after the header is used to determine the number of columns, + which will go into the index. If the subsequent rows contain less columns + than the first row, they are filled with ``NaN``. + + This can be avoided through ``usecols``. This ensures that the columns are + taken as is and the trailing data are ignored. +usecols : list-like or callable, default ``None`` + Return a subset of the columns. If list-like, all elements must either + be positional (i.e. integer indices into the document columns) or strings + that correspond to column names provided either by the user in ``names`` or + inferred from the document header row(s). If ``names`` are given, the document + header row(s) are not taken into account. For example, a valid list-like + ``usecols`` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. To + instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` for + ``['bar', 'foo']`` order. + + If callable, the callable function will be evaluated against the column names, + returning names where the callable function evaluates to True: + + .. ipython:: python + + import pandas as pd + from io import StringIO + + data = "col1,col2,col3\na,b,1\na,b,2\nc,d,3" + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ["COL1", "COL3"]) + + Using this parameter results in much faster parsing time and lower memory usage + when using the c engine. The Python engine loads the data first before deciding + which columns to drop. +squeeze : boolean, default ``False`` + If the parsed data only contains one column then return a ``Series``. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze + the data. +prefix : str, default ``None`` + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... + + .. deprecated:: 1.4.0 + Use a list comprehension on the DataFrame's columns after calling ``read_csv``. + + .. ipython:: python + + data = "col1,col2,col3\na,b,1" + + df = pd.read_csv(StringIO(data)) + df.columns = [f"pre_{col}" for col in df.columns] + df + +mangle_dupe_cols : boolean, default ``True`` + Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'. + Passing in ``False`` will cause data to be overwritten if there are duplicate + names in the columns. + + .. deprecated:: 1.5.0 + The argument was never implemented, and a new argument where the + renaming pattern can be specified will be added instead. + +General parsing configuration ++++++++++++++++++++++++++++++ + +dtype : Type name or dict of column -> type, default ``None`` + Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}`` + Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve + and not interpret dtype. If converters are specified, they will be applied INSTEAD + of dtype conversion. + + .. versionadded:: 1.5.0 + + Support for defaultdict was added. Specify a defaultdict as input where + the default determines the dtype of the columns which are not explicitly + listed. +engine : {``'c'``, ``'python'``, ``'pyarrow'``} + Parser engine to use. The C and pyarrow engines are faster, while the python engine + is currently more feature-complete. Multithreading is currently only supported by + the pyarrow engine. + + .. versionadded:: 1.4.0 + + The "pyarrow" engine was added as an *experimental* engine, and some features + are unsupported, or may not work correctly, with this engine. +converters : dict, default ``None`` + Dict of functions for converting values in certain columns. Keys can either be + integers or column labels. +true_values : list, default ``None`` + Values to consider as ``True``. +false_values : list, default ``None`` + Values to consider as ``False``. +skipinitialspace : boolean, default ``False`` + Skip spaces after delimiter. +skiprows : list-like or integer, default ``None`` + Line numbers to skip (0-indexed) or number of lines to skip (int) at the start + of the file. + + If callable, the callable function will be evaluated against the row + indices, returning True if the row should be skipped and False otherwise: + + .. ipython:: python + + data = "col1,col2,col3\na,b,1\na,b,2\nc,d,3" + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0) + +skipfooter : int, default ``0`` + Number of lines at bottom of file to skip (unsupported with engine='c'). + +nrows : int, default ``None`` + Number of rows of file to read. Useful for reading pieces of large files. +low_memory : boolean, default ``True`` + Internally process the file in chunks, resulting in lower memory use + while parsing, but possibly mixed type inference. To ensure no mixed + types either set ``False``, or specify the type with the ``dtype`` parameter. + Note that the entire file is read into a single ``DataFrame`` regardless, + use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. + (Only valid with C parser) +memory_map : boolean, default False + If a filepath is provided for ``filepath_or_buffer``, map the file object + directly onto memory and access the data directly from there. Using this + option can improve performance because there is no longer any I/O overhead. + +NA and missing data handling +++++++++++++++++++++++++++++ + +na_values : scalar, str, list-like, or dict, default ``None`` + Additional strings to recognize as NA/NaN. If dict passed, specific per-column + NA values. See :ref:`na values const ` below + for a list of the values interpreted as NaN by default. + +keep_default_na : boolean, default ``True`` + Whether or not to include the default NaN values when parsing the data. + Depending on whether ``na_values`` is passed in, the behavior is as follows: + + * If ``keep_default_na`` is ``True``, and ``na_values`` are specified, ``na_values`` + is appended to the default NaN values used for parsing. + * If ``keep_default_na`` is ``True``, and ``na_values`` are not specified, only + the default NaN values are used for parsing. + * If ``keep_default_na`` is ``False``, and ``na_values`` are specified, only + the NaN values specified ``na_values`` are used for parsing. + * If ``keep_default_na`` is ``False``, and ``na_values`` are not specified, no + strings will be parsed as NaN. + + Note that if ``na_filter`` is passed in as ``False``, the ``keep_default_na`` and + ``na_values`` parameters will be ignored. +na_filter : boolean, default ``True`` + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing ``na_filter=False`` can improve the performance + of reading a large file. +verbose : boolean, default ``False`` + Indicate number of NA values placed in non-numeric columns. +skip_blank_lines : boolean, default ``True`` + If ``True``, skip over blank lines rather than interpreting as NaN values. + +.. _io.read_csv_table.datetime: + +Datetime handling ++++++++++++++++++ + +parse_dates : boolean or list of ints or names or list of lists or dict, default ``False``. + * If ``True`` -> try parsing the index. + * If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3 each as a separate date + column. + * If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date + column. + * If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'. + + .. note:: + A fast-path exists for iso8601-formatted dates. +infer_datetime_format : boolean, default ``False`` + If ``True`` and parse_dates is enabled for a column, attempt to infer the + datetime format to speed up the processing. +keep_date_col : boolean, default ``False`` + If ``True`` and parse_dates specifies combining multiple columns then keep the + original columns. +date_parser : function, default ``None`` + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. pandas will try to call date_parser in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays (as + defined by parse_dates) as arguments; 2) concatenate (row-wise) the string + values from the columns defined by parse_dates into a single array and pass + that; and 3) call date_parser once for each row using one or more strings + (corresponding to the columns defined by parse_dates) as arguments. +dayfirst : boolean, default ``False`` + DD/MM format dates, international and European format. +cache_dates : boolean, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. + + .. versionadded:: 0.25.0 + +Iteration ++++++++++ + +iterator : boolean, default ``False`` + Return ``TextFileReader`` object for iteration or getting chunks with + ``get_chunk()``. +chunksize : int, default ``None`` + Return ``TextFileReader`` object for iteration. See :ref:`iterating and chunking + ` below. + +Quoting, compression, and file format ++++++++++++++++++++++++++++++++++++++ + +compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``'zstd'``, ``None``, ``dict``}, default ``'infer'`` + For on-the-fly decompression of on-disk data. If 'infer', then use gzip, + bz2, zip, xz, or zstandard if ``filepath_or_buffer`` is path-like ending in '.gz', '.bz2', + '.zip', '.xz', '.zst', respectively, and no decompression otherwise. If using 'zip', + the ZIP file must contain only one data file to be read in. + Set to ``None`` for no decompression. Can also be a dict with key ``'method'`` + set to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other key-value pairs are + forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``. + As an example, the following could be passed for faster compression and to + create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionchanged:: 1.1.0 dict option extended to support ``gzip`` and ``bz2``. + .. versionchanged:: 1.2.0 Previous versions forwarded dict entries for 'gzip' to ``gzip.open``. +thousands : str, default ``None`` + Thousands separator. +decimal : str, default ``'.'`` + Character to recognize as decimal point. E.g. use ``','`` for European data. +float_precision : string, default None + Specifies which converter the C engine should use for floating-point values. + The options are ``None`` for the ordinary converter, ``high`` for the + high-precision converter, and ``round_trip`` for the round-trip converter. +lineterminator : str (length 1), default ``None`` + Character to break file into lines. Only valid with C parser. +quotechar : str (length 1) + The character used to denote the start and end of a quoted item. Quoted items + can include the delimiter and it will be ignored. +quoting : int or ``csv.QUOTE_*`` instance, default ``0`` + Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of + ``QUOTE_MINIMAL`` (0), ``QUOTE_ALL`` (1), ``QUOTE_NONNUMERIC`` (2) or + ``QUOTE_NONE`` (3). +doublequote : boolean, default ``True`` + When ``quotechar`` is specified and ``quoting`` is not ``QUOTE_NONE``, + indicate whether or not to interpret two consecutive ``quotechar`` elements + **inside** a field as a single ``quotechar`` element. +escapechar : str (length 1), default ``None`` + One-character string used to escape delimiter when quoting is ``QUOTE_NONE``. +comment : str, default ``None`` + Indicates remainder of line should not be parsed. If found at the beginning of + a line, the line will be ignored altogether. This parameter must be a single + character. Like empty lines (as long as ``skip_blank_lines=True``), fully + commented lines are ignored by the parameter ``header`` but not by ``skiprows``. + For example, if ``comment='#'``, parsing '#empty\\na,b,c\\n1,2,3' with + ``header=0`` will result in 'a,b,c' being treated as the header. +encoding : str, default ``None`` + Encoding to use for UTF when reading/writing (e.g. ``'utf-8'``). `List of + Python standard encodings + `_. +dialect : str or :class:`python:csv.Dialect` instance, default ``None`` + If provided, this parameter will override values (default or not) for the + following parameters: ``delimiter``, ``doublequote``, ``escapechar``, + ``skipinitialspace``, ``quotechar``, and ``quoting``. If it is necessary to + override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` + documentation for more details. + +Error handling +++++++++++++++ + +error_bad_lines : boolean, optional, default ``None`` + Lines with too many fields (e.g. a csv line with too many commas) will by + default cause an exception to be raised, and no ``DataFrame`` will be + returned. If ``False``, then these "bad lines" will dropped from the + ``DataFrame`` that is returned. See :ref:`bad lines ` + below. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +warn_bad_lines : boolean, optional, default ``None`` + If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for + each "bad line" will be output. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +on_bad_lines : {{'error', 'warn', 'skip'}}, default 'error' + Specifies what to do upon encountering a bad line (a line with too many fields). + Allowed values are : + + - 'error', raise an ParserError when a bad line is encountered. + - 'warn', print a warning when a bad line is encountered and skip that line. + - 'skip', skip bad lines without raising or warning when they are encountered. + + .. versionadded:: 1.3.0 + +.. _io.dtypes: + +Specifying column data types +'''''''''''''''''''''''''''' + +You can indicate the data type for the whole ``DataFrame`` or individual +columns: + +.. ipython:: python + + import numpy as np + + data = "a,b,c,d\n1,2,3,4\n5,6,7,8\n9,10,11" + print(data) + + df = pd.read_csv(StringIO(data), dtype=object) + df + df["a"][0] + df = pd.read_csv(StringIO(data), dtype={"b": object, "c": np.float64, "d": "Int64"}) + df.dtypes + +Fortunately, pandas offers more than one way to ensure that your column(s) +contain only one ``dtype``. If you're unfamiliar with these concepts, you can +see :ref:`here` to learn more about dtypes, and +:ref:`here` to learn more about ``object`` conversion in +pandas. + + +For instance, you can use the ``converters`` argument +of :func:`~pandas.read_csv`: + +.. ipython:: python + + data = "col_1\n1\n2\n'A'\n4.22" + df = pd.read_csv(StringIO(data), converters={"col_1": str}) + df + df["col_1"].apply(type).value_counts() + +Or you can use the :func:`~pandas.to_numeric` function to coerce the +dtypes after reading in the data, + +.. ipython:: python + + df2 = pd.read_csv(StringIO(data)) + df2["col_1"] = pd.to_numeric(df2["col_1"], errors="coerce") + df2 + df2["col_1"].apply(type).value_counts() + +which will convert all valid parsing to floats, leaving the invalid parsing +as ``NaN``. + +Ultimately, how you deal with reading in columns containing mixed dtypes +depends on your specific needs. In the case above, if you wanted to ``NaN`` out +the data anomalies, then :func:`~pandas.to_numeric` is probably your best option. +However, if you wanted for all the data to be coerced, no matter the type, then +using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be +worth trying. + +.. note:: + In some cases, reading in abnormal data with columns containing mixed dtypes + will result in an inconsistent dataset. If you rely on pandas to infer the + dtypes of your columns, the parsing engine will go and infer the dtypes for + different chunks of the data, rather than the whole dataset at once. Consequently, + you can end up with column(s) with mixed dtypes. For example, + + .. ipython:: python + :okwarning: + + col_1 = list(range(500000)) + ["a", "b"] + list(range(500000)) + df = pd.DataFrame({"col_1": col_1}) + df.to_csv("foo.csv") + mixed_df = pd.read_csv("foo.csv") + mixed_df["col_1"].apply(type).value_counts() + mixed_df["col_1"].dtype + + will result with ``mixed_df`` containing an ``int`` dtype for certain chunks + of the column, and ``str`` for others due to the mixed dtypes from the + data that was read in. It is important to note that the overall column will be + marked with a ``dtype`` of ``object``, which is used for columns with mixed dtypes. + +.. ipython:: python + :suppress: + + import os + + os.remove("foo.csv") + +.. _io.categorical: + +Specifying categorical dtype +'''''''''''''''''''''''''''' + +``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` or +``dtype=CategoricalDtype(categories, ordered)``. + +.. ipython:: python + + data = "col1,col2,col3\na,b,1\na,b,2\nc,d,3" + + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data)).dtypes + pd.read_csv(StringIO(data), dtype="category").dtypes + +Individual columns can be parsed as a ``Categorical`` using a dict +specification: + +.. ipython:: python + + pd.read_csv(StringIO(data), dtype={"col1": "category"}).dtypes + +Specifying ``dtype='category'`` will result in an unordered ``Categorical`` +whose ``categories`` are the unique values observed in the data. For more +control on the categories and order, create a +:class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for +that column's ``dtype``. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + dtype = CategoricalDtype(["d", "c", "b", "a"], ordered=True) + pd.read_csv(StringIO(data), dtype={"col1": dtype}).dtypes + +When using ``dtype=CategoricalDtype``, "unexpected" values outside of +``dtype.categories`` are treated as missing values. + +.. ipython:: python + + dtype = CategoricalDtype(["a", "b", "d"]) # No 'c' + pd.read_csv(StringIO(data), dtype={"col1": dtype}).col1 + +This matches the behavior of :meth:`Categorical.set_categories`. + +.. note:: + + With ``dtype='category'``, the resulting categories will always be parsed + as strings (object dtype). If the categories are numeric they can be + converted using the :func:`to_numeric` function, or as appropriate, another + converter such as :func:`to_datetime`. + + When ``dtype`` is a ``CategoricalDtype`` with homogeneous ``categories`` ( + all numeric, all datetimes, etc.), the conversion is done automatically. + + .. ipython:: python + + df = pd.read_csv(StringIO(data), dtype="category") + df.dtypes + df["col3"] + new_categories = pd.to_numeric(df["col3"].cat.categories) + df["col3"] = df["col3"].cat.rename_categories(new_categories) + df["col3"] + + +Naming and using columns +'''''''''''''''''''''''' + +.. _io.headers: + +Handling column names ++++++++++++++++++++++ + +A file may or may not have a header row. pandas assumes the first row should be +used as the column names: + +.. ipython:: python + + data = "a,b,c\n1,2,3\n4,5,6\n7,8,9" + print(data) + pd.read_csv(StringIO(data)) + +By specifying the ``names`` argument in conjunction with ``header`` you can +indicate other names to use and whether or not to throw away the header row (if +any): + +.. ipython:: python + + print(data) + pd.read_csv(StringIO(data), names=["foo", "bar", "baz"], header=0) + pd.read_csv(StringIO(data), names=["foo", "bar", "baz"], header=None) + +If the header is in a row other than the first, pass the row number to +``header``. This will skip the preceding rows: + +.. ipython:: python + + data = "skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9" + pd.read_csv(StringIO(data), header=1) + +.. note:: + + Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first non-blank line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. + +.. _io.dupe_names: + +Duplicate names parsing +''''''''''''''''''''''' + + .. deprecated:: 1.5.0 + ``mangle_dupe_cols`` was never implemented, and a new argument where the + renaming pattern can be specified will be added instead. + +If the file or header contains duplicate names, pandas will by default +distinguish between them so as to prevent overwriting data: + +.. ipython:: python + + data = "a,b,a\n0,1,2\n3,4,5" + pd.read_csv(StringIO(data)) + +There is no more duplicate data because ``mangle_dupe_cols=True`` by default, +which modifies a series of duplicate columns 'X', ..., 'X' to become +'X', 'X.1', ..., 'X.N'. + +.. _io.usecols: + +Filtering columns (``usecols``) ++++++++++++++++++++++++++++++++ + +The ``usecols`` argument allows you to select any subset of the columns in a +file, either using the column names, position numbers or a callable: + +.. ipython:: python + + data = "a,b,c,d\n1,2,3,foo\n4,5,6,bar\n7,8,9,baz" + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), usecols=["b", "d"]) + pd.read_csv(StringIO(data), usecols=[0, 2, 3]) + pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ["A", "C"]) + +The ``usecols`` argument can also be used to specify which columns not to +use in the final result: + +.. ipython:: python + + pd.read_csv(StringIO(data), usecols=lambda x: x not in ["a", "c"]) + +In this case, the callable is specifying that we exclude the "a" and "c" +columns from the output. + +Comments and empty lines +'''''''''''''''''''''''' + +.. _io.skiplines: + +Ignoring line comments and empty lines +++++++++++++++++++++++++++++++++++++++ + +If the ``comment`` parameter is specified, then completely commented lines will +be ignored. By default, completely blank lines will be ignored as well. + +.. ipython:: python + + data = "\na,b,c\n \n# commented line\n1,2,3\n\n4,5,6" + print(data) + pd.read_csv(StringIO(data), comment="#") + +If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines: + +.. ipython:: python + + data = "a,b,c\n\n1,2,3\n\n\n4,5,6" + pd.read_csv(StringIO(data), skip_blank_lines=False) + +.. warning:: + + The presence of ignored lines might create ambiguities involving line numbers; + the parameter ``header`` uses row numbers (ignoring commented/empty + lines), while ``skiprows`` uses line numbers (including commented/empty lines): + + .. ipython:: python + + data = "#comment\na,b,c\nA,B,C\n1,2,3" + pd.read_csv(StringIO(data), comment="#", header=1) + data = "A,B,C\n#comment\na,b,c\n1,2,3" + pd.read_csv(StringIO(data), comment="#", skiprows=2) + + If both ``header`` and ``skiprows`` are specified, ``header`` will be + relative to the end of ``skiprows``. For example: + +.. ipython:: python + + data = ( + "# empty\n" + "# second empty line\n" + "# third emptyline\n" + "X,Y,Z\n" + "1,2,3\n" + "A,B,C\n" + "1,2.,4.\n" + "5.,NaN,10.0\n" + ) + print(data) + pd.read_csv(StringIO(data), comment="#", skiprows=4, header=1) + +.. _io.comments: + +Comments +++++++++ + +Sometimes comments or meta data may be included in a file: + +.. ipython:: python + :suppress: + + data = ( + "ID,level,category\n" + "Patient1,123000,x # really unpleasant\n" + "Patient2,23000,y # wouldn't take his medicine\n" + "Patient3,1234018,z # awesome" + ) + + with open("tmp.csv", "w") as fh: + fh.write(data) + +.. ipython:: python + + print(open("tmp.csv").read()) + +By default, the parser includes the comments in the output: + +.. ipython:: python + + df = pd.read_csv("tmp.csv") + df + +We can suppress the comments using the ``comment`` keyword: + +.. ipython:: python + + df = pd.read_csv("tmp.csv", comment="#") + df + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + +.. _io.unicode: + +Dealing with Unicode data +''''''''''''''''''''''''' + +The ``encoding`` argument should be used for encoded unicode data, which will +result in byte strings being decoded to unicode in the result: + +.. ipython:: python + + from io import BytesIO + + data = b"word,length\n" b"Tr\xc3\xa4umen,7\n" b"Gr\xc3\xbc\xc3\x9fe,5" + data = data.decode("utf8").encode("latin-1") + df = pd.read_csv(BytesIO(data), encoding="latin-1") + df + df["word"][1] + +Some formats which encode all characters as multiple bytes, like UTF-16, won't +parse correctly at all without specifying the encoding. `Full list of Python +standard encodings +`_. + +.. _io.index_col: + +Index columns and trailing delimiters +''''''''''''''''''''''''''''''''''''' + +If a file has one more column of data than the number of column names, the +first column will be used as the ``DataFrame``'s row names: + +.. ipython:: python + + data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10" + pd.read_csv(StringIO(data)) + +.. ipython:: python + + data = "index,a,b,c\n4,apple,bat,5.7\n8,orange,cow,10" + pd.read_csv(StringIO(data), index_col=0) + +Ordinarily, you can achieve this behavior using the ``index_col`` option. + +There are some exception cases when a file has been prepared with delimiters at +the end of each data line, confusing the parser. To explicitly disable the +index column inference and discard the last column, pass ``index_col=False``: + +.. ipython:: python + + data = "a,b,c\n4,apple,bat,\n8,orange,cow," + print(data) + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), index_col=False) + +If a subset of data is being parsed using the ``usecols`` option, the +``index_col`` specification is based on that subset, not the original data. + +.. ipython:: python + + data = "a,b,c\n4,apple,bat,\n8,orange,cow," + print(data) + pd.read_csv(StringIO(data), usecols=["b", "c"]) + pd.read_csv(StringIO(data), usecols=["b", "c"], index_col=0) + +.. _io.parse_dates: + +Date Handling +''''''''''''' + +Specifying date columns ++++++++++++++++++++++++ + +To better facilitate working with datetime data, :func:`read_csv` +uses the keyword arguments ``parse_dates`` and ``date_parser`` +to allow users to specify a variety of columns and date/time formats to turn the +input text data into ``datetime`` objects. + +The simplest case is to just pass in ``parse_dates=True``: + +.. ipython:: python + + with open("foo.csv", mode="w") as f: + f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5") + + # Use a column as an index, and parse it as dates. + df = pd.read_csv("foo.csv", index_col=0, parse_dates=True) + df + + # These are Python datetime objects + df.index + +It is often the case that we may want to store date and time data separately, +or store various date fields separately. the ``parse_dates`` keyword can be +used to specify a combination of columns to parse the dates and/or times from. + +You can specify a list of column lists to ``parse_dates``, the resulting date +columns will be prepended to the output (so as to not affect the existing column +order) and the new column names will be the concatenation of the component +column names: + +.. ipython:: python + + data = ( + "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900" + ) + + with open("tmp.csv", "w") as fh: + fh.write(data) + + df = pd.read_csv("tmp.csv", header=None, parse_dates=[[1, 2], [1, 3]]) + df + +By default the parser removes the component date columns, but you can choose +to retain them via the ``keep_date_col`` keyword: + +.. ipython:: python + + df = pd.read_csv( + "tmp.csv", header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True + ) + df + +Note that if you wish to combine multiple columns into a single date column, a +nested list must be used. In other words, ``parse_dates=[1, 2]`` indicates that +the second and third columns should each be parsed as separate date columns +while ``parse_dates=[[1, 2]]`` means the two columns should be parsed into a +single column. + +You can also use a dict to specify custom name columns: + +.. ipython:: python + + date_spec = {"nominal": [1, 2], "actual": [1, 3]} + df = pd.read_csv("tmp.csv", header=None, parse_dates=date_spec) + df + +It is important to remember that if multiple text columns are to be parsed into +a single date column, then a new column is prepended to the data. The ``index_col`` +specification is based off of this new set of columns rather than the original +data columns: + + +.. ipython:: python + + date_spec = {"nominal": [1, 2], "actual": [1, 3]} + df = pd.read_csv( + "tmp.csv", header=None, parse_dates=date_spec, index_col=0 + ) # index is the nominal column + df + +.. note:: + If a column or index contains an unparsable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use :func:`to_datetime` after ``pd.read_csv``. + + +.. note:: + read_csv has a fast_path for parsing datetime strings in iso8601 format, + e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange + for your data to store datetimes in this format, load times will be + significantly faster, ~20x has been observed. + + +Date parsing functions +++++++++++++++++++++++ + +Finally, the parser allows you to specify a custom ``date_parser`` function to +take full advantage of the flexibility of the date parsing API: + +.. ipython:: python + + df = pd.read_csv( + "tmp.csv", header=None, parse_dates=date_spec, date_parser=pd.to_datetime + ) + df + +pandas will try to call the ``date_parser`` function in three different ways. If +an exception is raised, the next one is tried: + +1. ``date_parser`` is first called with one or more arrays as arguments, + as defined using ``parse_dates`` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). + +2. If #1 fails, ``date_parser`` is called with all the columns + concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``). + +Note that performance-wise, you should try these methods of parsing dates in order: + +1. Try to infer the format using ``infer_datetime_format=True`` (see section below). + +2. If you know the format, use ``pd.to_datetime()``: + ``date_parser=lambda x: pd.to_datetime(x, format=...)``. + +3. If you have a really non-standard format, use a custom ``date_parser`` function. + For optimal performance, this should be vectorized, i.e., it should accept arrays + as arguments. + + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + + +.. _io.csv.mixed_timezones: + +Parsing a CSV with mixed timezones +++++++++++++++++++++++++++++++++++ + +pandas cannot natively represent a column or index with mixed timezones. If your CSV +file contains columns with a mixture of timezones, the default result will be +an object-dtype column with strings, even with ``parse_dates``. + + +.. ipython:: python + + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(StringIO(content), parse_dates=["a"]) + df["a"] + +To parse the mixed-timezone values as a datetime column, pass a partially-applied +:func:`to_datetime` with ``utc=True`` as the ``date_parser``. + +.. ipython:: python + + df = pd.read_csv( + StringIO(content), + parse_dates=["a"], + date_parser=lambda col: pd.to_datetime(col, utc=True), + ) + df["a"] + + +.. _io.dayfirst: + + +Inferring datetime format ++++++++++++++++++++++++++ + +If you have ``parse_dates`` enabled for some or all of your columns, and your +datetime strings are all formatted the same way, you may get a large speed +up by setting ``infer_datetime_format=True``. If set, pandas will attempt +to guess the format of your datetime strings, and then use a faster means +of parsing the strings. 5-10x parsing speeds have been observed. pandas +will fallback to the usual parsing if either the format cannot be guessed +or the format that was guessed cannot properly parse the entire column +of strings. So in general, ``infer_datetime_format`` should not have any +negative consequences if enabled. + +Here are some examples of datetime strings that can be guessed (All +representing December 30th, 2011 at 00:00:00): + +* "20111230" +* "2011/12/30" +* "20111230 00:00:00" +* "12/30/2011 00:00:00" +* "30/Dec/2011 00:00:00" +* "30/December/2011 00:00:00" + +Note that ``infer_datetime_format`` is sensitive to ``dayfirst``. With +``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With +``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th. + +.. ipython:: python + + # Try to infer the format for the index column + df = pd.read_csv( + "foo.csv", + index_col=0, + parse_dates=True, + infer_datetime_format=True, + ) + df + +.. ipython:: python + :suppress: + + os.remove("foo.csv") + +International date formats +++++++++++++++++++++++++++ + +While US date formats tend to be MM/DD/YYYY, many international formats use +DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided: + +.. ipython:: python + + data = "date,value,cat\n1/6/2000,5,a\n2/6/2000,10,b\n3/6/2000,15,c" + print(data) + with open("tmp.csv", "w") as fh: + fh.write(data) + + pd.read_csv("tmp.csv", parse_dates=[0]) + pd.read_csv("tmp.csv", dayfirst=True, parse_dates=[0]) + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + +Writing CSVs to binary file objects ++++++++++++++++++++++++++++++++++++ + +.. versionadded:: 1.2.0 + +``df.to_csv(..., mode="wb")`` allows writing a CSV to a file object +opened binary mode. In most cases, it is not necessary to specify +``mode`` as Pandas will auto-detect whether the file object is +opened in text or binary mode. + +.. ipython:: python + + import io + + data = pd.DataFrame([0, 1, 2]) + buffer = io.BytesIO() + data.to_csv(buffer, encoding="utf-8", compression="gzip") + +.. _io.float_precision: + +Specifying method for floating-point conversion +''''''''''''''''''''''''''''''''''''''''''''''' + +The parameter ``float_precision`` can be specified in order to use +a specific floating-point converter during parsing with the C engine. +The options are the ordinary converter, the high-precision converter, and +the round-trip converter (which is guaranteed to round-trip values after +writing to a file). For example: + +.. ipython:: python + + val = "0.3066101993807095471566981359501369297504425048828125" + data = "a,b,c\n1,2,{0}".format(val) + abs( + pd.read_csv( + StringIO(data), + engine="c", + float_precision=None, + )["c"][0] - float(val) + ) + abs( + pd.read_csv( + StringIO(data), + engine="c", + float_precision="high", + )["c"][0] - float(val) + ) + abs( + pd.read_csv(StringIO(data), engine="c", float_precision="round_trip")["c"][0] + - float(val) + ) + + +.. _io.thousands: + +Thousand separators +''''''''''''''''''' + +For large numbers that have been written with a thousands separator, you can +set the ``thousands`` keyword to a string of length 1 so that integers will be parsed +correctly: + +By default, numbers with a thousands separator will be parsed as strings: + +.. ipython:: python + + data = ( + "ID|level|category\n" + "Patient1|123,000|x\n" + "Patient2|23,000|y\n" + "Patient3|1,234,018|z" + ) + + with open("tmp.csv", "w") as fh: + fh.write(data) + + df = pd.read_csv("tmp.csv", sep="|") + df + + df.level.dtype + +The ``thousands`` keyword allows integers to be parsed correctly: + +.. ipython:: python + + df = pd.read_csv("tmp.csv", sep="|", thousands=",") + df + + df.level.dtype + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + +.. _io.na_values: + +NA values +''''''''' + +To control which values are parsed as missing values (which are signified by +``NaN``), specify a string in ``na_values``. If you specify a list of strings, +then all values in it are considered to be missing values. If you specify a +number (a ``float``, like ``5.0`` or an ``integer`` like ``5``), the +corresponding equivalent values will also imply a missing value (in this case +effectively ``[5.0, 5]`` are recognized as ``NaN``). + +To completely override the default values that are recognized as missing, specify ``keep_default_na=False``. + +.. _io.navaluesconst: + +The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', +'n/a', 'NA', '', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``. + +Let us consider some examples: + +.. code-block:: python + + pd.read_csv("path_to_file.csv", na_values=[5]) + +In the example above ``5`` and ``5.0`` will be recognized as ``NaN``, in +addition to the defaults. A string will first be interpreted as a numerical +``5``, then as a ``NaN``. + +.. code-block:: python + + pd.read_csv("path_to_file.csv", keep_default_na=False, na_values=[""]) + +Above, only an empty field will be recognized as ``NaN``. + +.. code-block:: python + + pd.read_csv("path_to_file.csv", keep_default_na=False, na_values=["NA", "0"]) + +Above, both ``NA`` and ``0`` as strings are ``NaN``. + +.. code-block:: python + + pd.read_csv("path_to_file.csv", na_values=["Nope"]) + +The default values, in addition to the string ``"Nope"`` are recognized as +``NaN``. + +.. _io.infinity: + +Infinity +'''''''' + +``inf`` like values will be parsed as ``np.inf`` (positive infinity), and ``-inf`` as ``-np.inf`` (negative infinity). +These will ignore the case of the value, meaning ``Inf``, will also be parsed as ``np.inf``. + + +Returning Series +'''''''''''''''' + +Using the ``squeeze`` keyword, the parser will return output with a single column +as a ``Series``: + +.. deprecated:: 1.4.0 + Users should append ``.squeeze("columns")`` to the DataFrame returned by + ``read_csv`` instead. + +.. ipython:: python + :okwarning: + + data = "level\nPatient1,123000\nPatient2,23000\nPatient3,1234018" + + with open("tmp.csv", "w") as fh: + fh.write(data) + + print(open("tmp.csv").read()) + + output = pd.read_csv("tmp.csv", squeeze=True) + output + + type(output) + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + +.. _io.boolean: + +Boolean values +'''''''''''''' + +The common values ``True``, ``False``, ``TRUE``, and ``FALSE`` are all +recognized as boolean. Occasionally you might want to recognize other values +as being boolean. To do this, use the ``true_values`` and ``false_values`` +options as follows: + +.. ipython:: python + + data = "a,b,c\n1,Yes,2\n3,No,4" + print(data) + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), true_values=["Yes"], false_values=["No"]) + +.. _io.bad_lines: + +Handling "bad" lines +'''''''''''''''''''' + +Some files may have malformed lines with too few fields or too many. Lines with +too few fields will have NA values filled in the trailing fields. Lines with +too many fields will raise an error by default: + +.. ipython:: python + :okexcept: + + data = "a,b,c\n1,2,3\n4,5,6,7\n8,9,10" + pd.read_csv(StringIO(data)) + +You can elect to skip bad lines: + +.. code-block:: ipython + + In [29]: pd.read_csv(StringIO(data), on_bad_lines="warn") + Skipping line 3: expected 3 fields, saw 4 + + Out[29]: + a b c + 0 1 2 3 + 1 8 9 10 + +Or pass a callable function to handle the bad line if ``engine="python"``. +The bad line will be a list of strings that was split by the ``sep``: + +.. code-block:: ipython + + In [29]: external_list = [] + + In [30]: def bad_lines_func(line): + ...: external_list.append(line) + ...: return line[-3:] + + In [31]: pd.read_csv(StringIO(data), on_bad_lines=bad_lines_func, engine="python") + Out[31]: + a b c + 0 1 2 3 + 1 5 6 7 + 2 8 9 10 + + In [32]: external_list + Out[32]: [4, 5, 6, 7] + + .. versionadded:: 1.4.0 + + +You can also use the ``usecols`` parameter to eliminate extraneous column +data that appear in some lines but not others: + +.. code-block:: ipython + + In [33]: pd.read_csv(StringIO(data), usecols=[0, 1, 2]) + + Out[33]: + a b c + 0 1 2 3 + 1 4 5 6 + 2 8 9 10 + +In case you want to keep all data including the lines with too many fields, you can +specify a sufficient number of ``names``. This ensures that lines with not enough +fields are filled with ``NaN``. + +.. code-block:: ipython + + In [34]: pd.read_csv(StringIO(data), names=['a', 'b', 'c', 'd']) + + Out[34]: + a b c d + 0 1 2 3 NaN + 1 4 5 6 7 + 2 8 9 10 NaN + +.. _io.dialect: + +Dialect +''''''' + +The ``dialect`` keyword gives greater flexibility in specifying the file format. +By default it uses the Excel dialect but you can specify either the dialect name +or a :class:`python:csv.Dialect` instance. + +Suppose you had data with unenclosed quotes: + +.. ipython:: python + + data = "label1,label2,label3\n" 'index1,"a,c,e\n' "index2,b,d,f" + print(data) + +By default, ``read_csv`` uses the Excel dialect and treats the double quote as +the quote character, which causes it to fail when it finds a newline before it +finds the closing double quote. + +We can get around this using ``dialect``: + +.. ipython:: python + :okwarning: + + import csv + + dia = csv.excel() + dia.quoting = csv.QUOTE_NONE + pd.read_csv(StringIO(data), dialect=dia) + +All of the dialect options can be specified separately by keyword arguments: + +.. ipython:: python + + data = "a,b,c~1,2,3~4,5,6" + pd.read_csv(StringIO(data), lineterminator="~") + +Another common dialect option is ``skipinitialspace``, to skip any whitespace +after a delimiter: + +.. ipython:: python + + data = "a, b, c\n1, 2, 3\n4, 5, 6" + print(data) + pd.read_csv(StringIO(data), skipinitialspace=True) + +The parsers make every attempt to "do the right thing" and not be fragile. Type +inference is a pretty big deal. If a column can be coerced to integer dtype +without altering the contents, the parser will do so. Any non-numeric +columns will come through as object dtype as with the rest of pandas objects. + +.. _io.quoting: + +Quoting and Escape Characters +''''''''''''''''''''''''''''' + +Quotes (and other escape characters) in embedded fields can be handled in any +number of ways. One way is to use backslashes; to properly parse this data, you +should pass the ``escapechar`` option: + +.. ipython:: python + + data = 'a,b\n"hello, \\"Bob\\", nice to see you",5' + print(data) + pd.read_csv(StringIO(data), escapechar="\\") + +.. _io.fwf_reader: +.. _io.fwf: + +Files with fixed width columns +'''''''''''''''''''''''''''''' + +While :func:`read_csv` reads delimited data, the :func:`read_fwf` function works +with data files that have known and fixed column widths. The function parameters +to ``read_fwf`` are largely the same as ``read_csv`` with two extra parameters, and +a different usage of the ``delimiter`` parameter: + +* ``colspecs``: A list of pairs (tuples) giving the extents of the + fixed-width fields of each line as half-open intervals (i.e., [from, to[ ). + String value 'infer' can be used to instruct the parser to try detecting + the column specifications from the first 100 rows of the data. Default + behavior, if not specified, is to infer. +* ``widths``: A list of field widths which can be used instead of 'colspecs' + if the intervals are contiguous. +* ``delimiter``: Characters to consider as filler characters in the fixed-width file. + Can be used to specify the filler character of the fields + if it is not spaces (e.g., '~'). + +Consider a typical fixed-width data file: + +.. ipython:: python + + data1 = ( + "id8141 360.242940 149.910199 11950.7\n" + "id1594 444.953632 166.985655 11788.4\n" + "id1849 364.136849 183.628767 11806.2\n" + "id1230 413.836124 184.375703 11916.8\n" + "id1948 502.953953 173.237159 12468.3" + ) + with open("bar.csv", "w") as f: + f.write(data1) + +In order to parse this file into a ``DataFrame``, we simply need to supply the +column specifications to the ``read_fwf`` function along with the file name: + +.. ipython:: python + + # Column specifications are a list of half-intervals + colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)] + df = pd.read_fwf("bar.csv", colspecs=colspecs, header=None, index_col=0) + df + +Note how the parser automatically picks column names X. when +``header=None`` argument is specified. Alternatively, you can supply just the +column widths for contiguous columns: + +.. ipython:: python + + # Widths are a list of integers + widths = [6, 14, 13, 10] + df = pd.read_fwf("bar.csv", widths=widths, header=None) + df + +The parser will take care of extra white spaces around the columns +so it's ok to have extra separation between the columns in the file. + +By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the +first 100 rows of the file. It can do it only in cases when the columns are +aligned and correctly separated by the provided ``delimiter`` (default delimiter +is whitespace). + +.. ipython:: python + + df = pd.read_fwf("bar.csv", header=None, index_col=0) + df + +``read_fwf`` supports the ``dtype`` parameter for specifying the types of +parsed columns to be different from the inferred type. + +.. ipython:: python + + pd.read_fwf("bar.csv", header=None, index_col=0).dtypes + pd.read_fwf("bar.csv", header=None, dtype={2: "object"}).dtypes + +.. ipython:: python + :suppress: + + os.remove("bar.csv") + + +Indexes +''''''' + +Files with an "implicit" index column ++++++++++++++++++++++++++++++++++++++ + +Consider a file with one less entry in the header than the number of data +column: + +.. ipython:: python + + data = "A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5" + print(data) + with open("foo.csv", "w") as f: + f.write(data) + +In this special case, ``read_csv`` assumes that the first column is to be used +as the index of the ``DataFrame``: + +.. ipython:: python + + pd.read_csv("foo.csv") + +Note that the dates weren't automatically parsed. In that case you would need +to do as before: + +.. ipython:: python + + df = pd.read_csv("foo.csv", parse_dates=True) + df.index + +.. ipython:: python + :suppress: + + os.remove("foo.csv") + + +Reading an index with a ``MultiIndex`` +++++++++++++++++++++++++++++++++++++++ + +.. _io.csv_multiindex: + +Suppose you have data indexed by two columns: + +.. ipython:: python + + data = 'year,indiv,zit,xit\n1977,"A",1.2,.6\n1977,"B",1.5,.5' + print(data) + with open("mindex_ex.csv", mode="w") as f: + f.write(data) + +The ``index_col`` argument to ``read_csv`` can take a list of +column numbers to turn multiple columns into a ``MultiIndex`` for the index of the +returned object: + +.. ipython:: python + + df = pd.read_csv("mindex_ex.csv", index_col=[0, 1]) + df + df.loc[1977] + +.. ipython:: python + :suppress: + + os.remove("mindex_ex.csv") + +.. _io.multi_index_columns: + +Reading columns with a ``MultiIndex`` ++++++++++++++++++++++++++++++++++++++ + +By specifying list of row locations for the ``header`` argument, you +can read in a ``MultiIndex`` for the columns. Specifying non-consecutive +rows will skip the intervening rows. + +.. ipython:: python + + from pandas._testing import makeCustomDataframe as mkdf + + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv("mi.csv") + print(open("mi.csv").read()) + pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1]) + +``read_csv`` is also able to interpret a more common format +of multi-columns indices. + +.. ipython:: python + + data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12" + print(data) + with open("mi2.csv", "w") as fh: + fh.write(data) + + pd.read_csv("mi2.csv", header=[0, 1], index_col=0) + +.. note:: + If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it + with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will + be *lost*. + +.. ipython:: python + :suppress: + + os.remove("mi.csv") + os.remove("mi2.csv") + +.. _io.sniff: + +Automatically "sniffing" the delimiter +'''''''''''''''''''''''''''''''''''''' + +``read_csv`` is capable of inferring delimited (not necessarily +comma-separated) files, as pandas uses the :class:`python:csv.Sniffer` +class of the csv module. For this, you have to specify ``sep=None``. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + df.to_csv("tmp.csv", sep="|") + df.to_csv("tmp2.csv", sep=":") + pd.read_csv("tmp2.csv", sep=None, engine="python") + +.. ipython:: python + :suppress: + + os.remove("tmp2.csv") + +.. _io.multiple_files: + +Reading multiple files to create a single DataFrame +''''''''''''''''''''''''''''''''''''''''''''''''''' + +It's best to use :func:`~pandas.concat` to combine multiple files. +See the :ref:`cookbook` for an example. + +.. _io.chunking: + +Iterating through files chunk by chunk +'''''''''''''''''''''''''''''''''''''' + +Suppose you wish to iterate through a (potentially very large) file lazily +rather than reading the entire file into memory, such as the following: + + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + df.to_csv("tmp.csv", sep="|") + table = pd.read_csv("tmp.csv", sep="|") + table + + +By specifying a ``chunksize`` to ``read_csv``, the return +value will be an iterable object of type ``TextFileReader``: + +.. ipython:: python + + with pd.read_csv("tmp.csv", sep="|", chunksize=4) as reader: + reader + for chunk in reader: + print(chunk) + +.. versionchanged:: 1.2 + + ``read_csv/json/sas`` return a context-manager when iterating through a file. + +Specifying ``iterator=True`` will also return the ``TextFileReader`` object: + +.. ipython:: python + + with pd.read_csv("tmp.csv", sep="|", iterator=True) as reader: + reader.get_chunk(5) + +.. ipython:: python + :suppress: + + os.remove("tmp.csv") + +Specifying the parser engine +'''''''''''''''''''''''''''' + +Pandas currently supports three engines, the C engine, the python engine, and an experimental +pyarrow engine (requires the ``pyarrow`` package). In general, the pyarrow engine is fastest +on larger workloads and is equivalent in speed to the C engine on most other workloads. +The python engine tends to be slower than the pyarrow and C engines on most workloads. However, +the pyarrow engine is much less robust than the C engine, which lacks a few features compared to the +Python engine. + +Where possible, pandas uses the C parser (specified as ``engine='c'``), but it may fall +back to Python if C-unsupported options are specified. + +Currently, options unsupported by the C and pyarrow engines include: + +* ``sep`` other than a single character (e.g. regex separators) +* ``skipfooter`` +* ``sep=None`` with ``delim_whitespace=False`` + +Specifying any of the above options will produce a ``ParserWarning`` unless the +python engine is selected explicitly using ``engine='python'``. + +Options that are unsupported by the pyarrow engine which are not covered by the list above include: + +* ``float_precision`` +* ``chunksize`` +* ``comment`` +* ``nrows`` +* ``thousands`` +* ``memory_map`` +* ``dialect`` +* ``warn_bad_lines`` +* ``error_bad_lines`` +* ``on_bad_lines`` +* ``delim_whitespace`` +* ``quoting`` +* ``lineterminator`` +* ``converters`` +* ``decimal`` +* ``iterator`` +* ``dayfirst`` +* ``infer_datetime_format`` +* ``verbose`` +* ``skipinitialspace`` +* ``low_memory`` + +Specifying these options with ``engine='pyarrow'`` will raise a ``ValueError``. + +.. _io.remote: + +Reading/writing remote files +'''''''''''''''''''''''''''' + +You can pass in a URL to read or write remote files to many of pandas' IO +functions - the following example shows reading a CSV file: + +.. code-block:: python + + df = pd.read_csv("https://download.bls.gov/pub/time.series/cu/cu.item", sep="\t") + +.. versionadded:: 1.3.0 + +A custom header can be sent alongside HTTP(s) requests by passing a dictionary +of header key value mappings to the ``storage_options`` keyword argument as shown below: + +.. code-block:: python + + headers = {"User-Agent": "pandas"} + df = pd.read_csv( + "https://download.bls.gov/pub/time.series/cu/cu.item", + sep="\t", + storage_options=headers + ) + +All URLs which are not local files or HTTP(s) are handled by +`fsspec`_, if installed, and its various filesystem implementations +(including Amazon S3, Google Cloud, SSH, FTP, webHDFS...). +Some of these implementations will require additional packages to be +installed, for example +S3 URLs require the `s3fs +`_ library: + +.. code-block:: python + + df = pd.read_json("s3://pandas-test/adatafile.json") + +When dealing with remote storage systems, you might need +extra configuration with environment variables or config files in +special locations. For example, to access data in your S3 bucket, +you will need to define credentials in one of the several ways listed in +the `S3Fs documentation +`_. The same is true +for several of the storage backends, and you should follow the links +at `fsimpl1`_ for implementations built into ``fsspec`` and `fsimpl2`_ +for those not included in the main ``fsspec`` +distribution. + +You can also pass parameters directly to the backend driver. For example, +if you do *not* have S3 credentials, you can still access public data by +specifying an anonymous connection, such as + +.. versionadded:: 1.2.0 + +.. code-block:: python + + pd.read_csv( + "s3://ncei-wcsd-archive/data/processed/SH1305/18kHz/SaKe2013" + "-D20130523-T080854_to_SaKe2013-D20130523-T085643.csv", + storage_options={"anon": True}, + ) + +``fsspec`` also allows complex URLs, for accessing data in compressed +archives, local caching of files, and more. To locally cache the above +example, you would modify the call to + +.. code-block:: python + + pd.read_csv( + "simplecache::s3://ncei-wcsd-archive/data/processed/SH1305/18kHz/" + "SaKe2013-D20130523-T080854_to_SaKe2013-D20130523-T085643.csv", + storage_options={"s3": {"anon": True}}, + ) + +where we specify that the "anon" parameter is meant for the "s3" part of +the implementation, not to the caching implementation. Note that this caches to a temporary +directory for the duration of the session only, but you can also specify +a permanent store. + +.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ +.. _fsimpl1: https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations +.. _fsimpl2: https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + +Writing out data +'''''''''''''''' + +.. _io.store_in_csv: + +Writing to CSV format ++++++++++++++++++++++ + +The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` which +allows storing the contents of the object as a comma-separated-values file. The +function takes a number of arguments. Only the first is required. + +* ``path_or_buf``: A string path to the file to write or a file object. If a file object it must be opened with ``newline=''`` +* ``sep`` : Field delimiter for the output file (default ",") +* ``na_rep``: A string representation of a missing value (default '') +* ``float_format``: Format string for floating point numbers +* ``columns``: Columns to write (default None) +* ``header``: Whether to write out the column names (default True) +* ``index``: whether to write row (index) names (default True) +* ``index_label``: Column label(s) for index column(s) if desired. If None + (default), and ``header`` and ``index`` are True, then the index names are + used. (A sequence should be given if the ``DataFrame`` uses MultiIndex). +* ``mode`` : Python write mode, default 'w' +* ``encoding``: a string representing the encoding to use if the contents are + non-ASCII, for Python versions prior to 3 +* ``lineterminator``: Character sequence denoting line end (default ``os.linesep``) +* ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a ``float_format`` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric +* ``quotechar``: Character used to quote fields (default '"') +* ``doublequote``: Control quoting of ``quotechar`` in fields (default True) +* ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when + appropriate (default None) +* ``chunksize``: Number of rows to write at a time +* ``date_format``: Format string for datetime objects + +Writing a formatted string +++++++++++++++++++++++++++ + +.. _io.formatting: + +The ``DataFrame`` object has an instance method ``to_string`` which allows control +over the string representation of the object. All arguments are optional: + +* ``buf`` default None, for example a StringIO object +* ``columns`` default None, which columns to write +* ``col_space`` default None, minimum width of each column. +* ``na_rep`` default ``NaN``, representation of NA value +* ``formatters`` default None, a dictionary (by column) of functions each of + which takes a single argument and returns a formatted string +* ``float_format`` default None, a function which takes a single (float) + argument and returns a formatted string; to be applied to floats in the + ``DataFrame``. +* ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical + index to print every MultiIndex key at each row. +* ``index_names`` default True, will print the names of the indices +* ``index`` default True, will print the index (ie, row labels) +* ``header`` default True, will print the column labels +* ``justify`` default ``left``, will print column headers left- or + right-justified + +The ``Series`` object also has a ``to_string`` method, but with only the ``buf``, +``na_rep``, ``float_format`` arguments. There is also a ``length`` argument +which, if set to ``True``, will additionally output the length of the Series. + +.. _io.json: + +JSON +---- + +Read and write ``JSON`` format files and strings. + +.. _io.json_writer: + +Writing JSON +'''''''''''' + +A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json`` +with optional parameters: + +* ``path_or_buf`` : the pathname or buffer to write the output + This can be ``None`` in which case a JSON string is returned +* ``orient`` : + + ``Series``: + * default is ``index`` + * allowed values are {``split``, ``records``, ``index``} + + ``DataFrame``: + * default is ``columns`` + * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} + + The format of the JSON string + + .. csv-table:: + :widths: 20, 150 + :delim: ; + + ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} + ``records``; list like [{column -> value}, ... , {column -> value}] + ``index``; dict like {index -> {column -> value}} + ``columns``; dict like {column -> {index -> value}} + ``values``; just the values array + ``table``; adhering to the JSON `Table Schema`_ + +* ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601. +* ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10. +* ``force_ascii`` : force encoded string to be ASCII, default True. +* ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'. +* ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object. +* ``lines`` : If ``records`` orient, then will write each record per line as json. + +Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters. + +.. ipython:: python + + dfj = pd.DataFrame(np.random.randn(5, 2), columns=list("AB")) + json = dfj.to_json() + json + +Orient options +++++++++++++++ + +There are a number of different options for the format of the resulting JSON +file / string. Consider the following ``DataFrame`` and ``Series``: + +.. ipython:: python + + dfjo = pd.DataFrame( + dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), + columns=list("ABC"), + index=list("xyz"), + ) + dfjo + sjo = pd.Series(dict(x=15, y=16, z=17), name="D") + sjo + +**Column oriented** (the default for ``DataFrame``) serializes the data as +nested JSON objects with column labels acting as the primary index: + +.. ipython:: python + + dfjo.to_json(orient="columns") + # Not available for Series + +**Index oriented** (the default for ``Series``) similar to column oriented +but the index labels are now primary: + +.. ipython:: python + + dfjo.to_json(orient="index") + sjo.to_json(orient="index") + +**Record oriented** serializes the data to a JSON array of column -> value records, +index labels are not included. This is useful for passing ``DataFrame`` data to plotting +libraries, for example the JavaScript library ``d3.js``: + +.. ipython:: python + + dfjo.to_json(orient="records") + sjo.to_json(orient="records") + +**Value oriented** is a bare-bones option which serializes to nested JSON arrays of +values only, column and index labels are not included: + +.. ipython:: python + + dfjo.to_json(orient="values") + # Not available for Series + +**Split oriented** serializes to a JSON object containing separate entries for +values, index and columns. Name is also included for ``Series``: + +.. ipython:: python + + dfjo.to_json(orient="split") + sjo.to_json(orient="split") + +**Table oriented** serializes to the JSON `Table Schema`_, allowing for the +preservation of metadata including but not limited to dtypes and index names. + +.. note:: + + Any orient option that encodes to a JSON object will not preserve the ordering of + index and column labels during round-trip serialization. If you wish to preserve + label ordering use the ``split`` option as it uses ordered containers. + +Date handling ++++++++++++++ + +Writing in ISO date format: + +.. ipython:: python + + dfd = pd.DataFrame(np.random.randn(5, 2), columns=list("AB")) + dfd["date"] = pd.Timestamp("20130101") + dfd = dfd.sort_index(axis=1, ascending=False) + json = dfd.to_json(date_format="iso") + json + +Writing in ISO date format, with microseconds: + +.. ipython:: python + + json = dfd.to_json(date_format="iso", date_unit="us") + json + +Epoch timestamps, in seconds: + +.. ipython:: python + + json = dfd.to_json(date_format="epoch", date_unit="s") + json + +Writing to a file, with a date index and a date column: + +.. ipython:: python + + dfj2 = dfj.copy() + dfj2["date"] = pd.Timestamp("20130101") + dfj2["ints"] = list(range(5)) + dfj2["bools"] = True + dfj2.index = pd.date_range("20130101", periods=5) + dfj2.to_json("test.json") + + with open("test.json") as fh: + print(fh.read()) + +Fallback behavior ++++++++++++++++++ + +If the JSON serializer cannot handle the container contents directly it will +fall back in the following manner: + +* if the dtype is unsupported (e.g. ``np.complex_``) then the ``default_handler``, if provided, will be called + for each value, otherwise an exception is raised. + +* if an object is unsupported it will attempt the following: + + + * check if the object has defined a ``toDict`` method and call it. + A ``toDict`` method should return a ``dict`` which will then be JSON serialized. + + * invoke the ``default_handler`` if one was provided. + + * convert the object to a ``dict`` by traversing its contents. However this will often fail + with an ``OverflowError`` or give unexpected results. + +In general the best approach for unsupported objects or dtypes is to provide a ``default_handler``. +For example: + +.. code-block:: python + + >>> DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises + RuntimeError: Unhandled numpy dtype 15 + +can be dealt with by specifying a simple ``default_handler``: + +.. ipython:: python + + pd.DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str) + +.. _io.json_reader: + +Reading JSON +'''''''''''' + +Reading a JSON string to pandas object can take a number of parameters. +The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or +is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` + +* ``filepath_or_buffer`` : a **VALID** JSON string or file handle / StringIO. The string could be + a URL. Valid URL schemes include http, ftp, S3, and file. For file URLs, a host + is expected. For instance, a local file could be + file ://localhost/path/to/table.json +* ``typ`` : type of object to recover (series or frame), default 'frame' +* ``orient`` : + + Series : + * default is ``index`` + * allowed values are {``split``, ``records``, ``index``} + + DataFrame + * default is ``columns`` + * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} + + The format of the JSON string + + .. csv-table:: + :widths: 20, 150 + :delim: ; + + ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} + ``records``; list like [{column -> value}, ... , {column -> value}] + ``index``; dict like {index -> {column -> value}} + ``columns``; dict like {column -> {index -> value}} + ``values``; just the values array + ``table``; adhering to the JSON `Table Schema`_ + + +* ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data. +* ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True`` +* ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``. +* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns. +* ``numpy`` : direct decoding to NumPy arrays. default is ``False``; + Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``. +* ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality. +* ``date_unit`` : string, the timestamp unit to detect if converting dates. Default + None. By default the timestamp precision will be detected, if this is not desired + then pass one of 's', 'ms', 'us' or 'ns' to force timestamp precision to + seconds, milliseconds, microseconds or nanoseconds respectively. +* ``lines`` : reads file as one json object per line. +* ``encoding`` : The encoding to use to decode py3 bytes. +* ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration. + +The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable. + +If a non-default ``orient`` was used when encoding to JSON be sure to pass the same +option here so that decoding produces sensible results, see `Orient Options`_ for an +overview. + +Data conversion ++++++++++++++++ + +The default of ``convert_axes=True``, ``dtype=True``, and ``convert_dates=True`` +will try to parse the axes, and all of the data into appropriate types, +including dates. If you need to override specific dtypes, pass a dict to +``dtype``. ``convert_axes`` should only be set to ``False`` if you need to +preserve string-like numbers (e.g. '1', '2') in an axes. + +.. note:: + + Large integer values may be converted to dates if ``convert_dates=True`` and the data and / or column labels appear 'date-like'. The exact threshold depends on the ``date_unit`` specified. 'date-like' means that the column label meets one of the following criteria: + + * it ends with ``'_at'`` + * it ends with ``'_time'`` + * it begins with ``'timestamp'`` + * it is ``'modified'`` + * it is ``'date'`` + +.. warning:: + + When reading JSON data, automatic coercing into dtypes has some quirks: + + * an index can be reconstructed in a different order from serialization, that is, the returned order is not guaranteed to be the same as before serialization + * a column that was ``float`` data will be converted to ``integer`` if it can be done safely, e.g. a column of ``1.`` + * bool columns will be converted to ``integer`` on reconstruction + + Thus there are times where you may want to specify specific dtypes via the ``dtype`` keyword argument. + +Reading from a JSON string: + +.. ipython:: python + + pd.read_json(json) + +Reading from a file: + +.. ipython:: python + + pd.read_json("test.json") + +Don't convert any data (but still convert axes and dates): + +.. ipython:: python + + pd.read_json("test.json", dtype=object).dtypes + +Specify dtypes for conversion: + +.. ipython:: python + + pd.read_json("test.json", dtype={"A": "float32", "bools": "int8"}).dtypes + +Preserve string indices: + +.. ipython:: python + + si = pd.DataFrame( + np.zeros((4, 4)), columns=list(range(4)), index=[str(i) for i in range(4)] + ) + si + si.index + si.columns + json = si.to_json() + + sij = pd.read_json(json, convert_axes=False) + sij + sij.index + sij.columns + +Dates written in nanoseconds need to be read back in nanoseconds: + +.. ipython:: python + + json = dfj2.to_json(date_unit="ns") + + # Try to parse timestamps as milliseconds -> Won't Work + dfju = pd.read_json(json, date_unit="ms") + dfju + + # Let pandas detect the correct precision + dfju = pd.read_json(json) + dfju + + # Or specify that all timestamps are in nanoseconds + dfju = pd.read_json(json, date_unit="ns") + dfju + +The Numpy parameter ++++++++++++++++++++ + +.. note:: + This param has been deprecated as of version 1.0.0 and will raise a ``FutureWarning``. + + This supports numeric data only. Index and columns labels may be non-numeric, e.g. strings, dates etc. + +If ``numpy=True`` is passed to ``read_json`` an attempt will be made to sniff +an appropriate dtype during deserialization and to subsequently decode directly +to NumPy arrays, bypassing the need for intermediate Python objects. + +This can provide speedups if you are deserialising a large amount of numeric +data: + +.. ipython:: python + + randfloats = np.random.uniform(-100, 1000, 10000) + randfloats.shape = (1000, 10) + dffloats = pd.DataFrame(randfloats, columns=list("ABCDEFGHIJ")) + + jsonfloats = dffloats.to_json() + +.. ipython:: python + + %timeit pd.read_json(jsonfloats) + +.. ipython:: python + :okwarning: + + %timeit pd.read_json(jsonfloats, numpy=True) + +The speedup is less noticeable for smaller datasets: + +.. ipython:: python + + jsonfloats = dffloats.head(100).to_json() + +.. ipython:: python + + %timeit pd.read_json(jsonfloats) + +.. ipython:: python + :okwarning: + + %timeit pd.read_json(jsonfloats, numpy=True) + +.. warning:: + + Direct NumPy decoding makes a number of assumptions and may fail or produce + unexpected output if these assumptions are not satisfied: + + - data is numeric. + + - data is uniform. The dtype is sniffed from the first value decoded. + A ``ValueError`` may be raised, or incorrect output may be produced + if this condition is not satisfied. + + - labels are ordered. Labels are only read from the first container, it is assumed + that each subsequent row / column has been encoded in the same order. This should be satisfied if the + data was encoded using ``to_json`` but may not be the case if the JSON + is from another source. + +.. ipython:: python + :suppress: + + os.remove("test.json") + +.. _io.json_normalize: + +Normalization +''''''''''''' + +pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data +into a flat table. + +.. ipython:: python + + data = [ + {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + {"name": {"given": "Mark", "family": "Regner"}}, + {"id": 2, "name": "Faye Raker"}, + ] + pd.json_normalize(data) + +.. ipython:: python + + data = [ + { + "state": "Florida", + "shortname": "FL", + "info": {"governor": "Rick Scott"}, + "county": [ + {"name": "Dade", "population": 12345}, + {"name": "Broward", "population": 40000}, + {"name": "Palm Beach", "population": 60000}, + ], + }, + { + "state": "Ohio", + "shortname": "OH", + "info": {"governor": "John Kasich"}, + "county": [ + {"name": "Summit", "population": 1234}, + {"name": "Cuyahoga", "population": 1337}, + ], + }, + ] + + pd.json_normalize(data, "county", ["state", "shortname", ["info", "governor"]]) + +The max_level parameter provides more control over which level to end normalization. +With max_level=1 the following snippet normalizes until 1st nesting level of the provided dict. + +.. ipython:: python + + data = [ + { + "CreatedBy": {"Name": "User001"}, + "Lookup": { + "TextField": "Some text", + "UserField": {"Id": "ID001", "Name": "Name001"}, + }, + "Image": {"a": "b"}, + } + ] + pd.json_normalize(data, max_level=1) + +.. _io.jsonl: + +Line delimited json +''''''''''''''''''' + +pandas is able to read and write line-delimited json files that are common in data processing pipelines +using Hadoop or Spark. + +For line-delimited json files, pandas can also return an iterator which reads in ``chunksize`` lines at a time. This can be useful for large files or to read from a stream. + +.. ipython:: python + + jsonl = """ + {"a": 1, "b": 2} + {"a": 3, "b": 4} + """ + df = pd.read_json(jsonl, lines=True) + df + df.to_json(orient="records", lines=True) + + # reader is an iterator that returns ``chunksize`` lines each iteration + with pd.read_json(StringIO(jsonl), lines=True, chunksize=1) as reader: + reader + for chunk in reader: + print(chunk) + +.. _io.table_schema: + +Table schema +'''''''''''' + +`Table Schema`_ is a spec for describing tabular datasets as a JSON +object. The JSON includes information on the field names, types, and +other attributes. You can use the orient ``table`` to build +a JSON string with two fields, ``schema`` and ``data``. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": [1, 2, 3], + "B": ["a", "b", "c"], + "C": pd.date_range("2016-01-01", freq="d", periods=3), + }, + index=pd.Index(range(3), name="idx"), + ) + df + df.to_json(orient="table", date_format="iso") + +The ``schema`` field contains the ``fields`` key, which itself contains +a list of column name to type pairs, including the ``Index`` or ``MultiIndex`` +(see below for a list of types). +The ``schema`` field also contains a ``primaryKey`` field if the (Multi)index +is unique. + +The second field, ``data``, contains the serialized data with the ``records`` +orient. +The index is included, and any datetimes are ISO 8601 formatted, as required +by the Table Schema spec. + +The full list of types supported are described in the Table Schema +spec. This table shows the mapping from pandas types: + +=============== ================= +pandas type Table Schema type +=============== ================= +int64 integer +float64 number +bool boolean +datetime64[ns] datetime +timedelta64[ns] duration +categorical any +object str +=============== ================= + +A few notes on the generated table schema: + +* The ``schema`` object contains a ``pandas_version`` field. This contains + the version of pandas' dialect of the schema, and will be incremented + with each revision. +* All dates are converted to UTC when serializing. Even timezone naive values, + which are treated as UTC with an offset of 0. + + .. ipython:: python + + from pandas.io.json import build_table_schema + + s = pd.Series(pd.date_range("2016", periods=4)) + build_table_schema(s) + +* datetimes with a timezone (before serializing), include an additional field + ``tz`` with the time zone name (e.g. ``'US/Central'``). + + .. ipython:: python + + s_tz = pd.Series(pd.date_range("2016", periods=12, tz="US/Central")) + build_table_schema(s_tz) + +* Periods are converted to timestamps before serialization, and so have the + same behavior of being converted to UTC. In addition, periods will contain + and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'``. + + .. ipython:: python + + s_per = pd.Series(1, index=pd.period_range("2016", freq="A-DEC", periods=4)) + build_table_schema(s_per) + +* Categoricals use the ``any`` type and an ``enum`` constraint listing + the set of possible values. Additionally, an ``ordered`` field is included: + + .. ipython:: python + + s_cat = pd.Series(pd.Categorical(["a", "b", "a"])) + build_table_schema(s_cat) + +* A ``primaryKey`` field, containing an array of labels, is included + *if the index is unique*: + + .. ipython:: python + + s_dupe = pd.Series([1, 2], index=[1, 1]) + build_table_schema(s_dupe) + +* The ``primaryKey`` behavior is the same with MultiIndexes, but in this + case the ``primaryKey`` is an array: + + .. ipython:: python + + s_multi = pd.Series(1, index=pd.MultiIndex.from_product([("a", "b"), (0, 1)])) + build_table_schema(s_multi) + +* The default naming roughly follows these rules: + + * For series, the ``object.name`` is used. If that's none, then the + name is ``values`` + * For ``DataFrames``, the stringified version of the column name is used + * For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a + fallback to ``index`` if that is None. + * For ``MultiIndex``, ``mi.names`` is used. If any level has no name, + then ``level_`` is used. + +``read_json`` also accepts ``orient='table'`` as an argument. This allows for +the preservation of metadata such as dtypes and index names in a +round-trippable manner. + + .. ipython:: python + + df = pd.DataFrame( + { + "foo": [1, 2, 3, 4], + "bar": ["a", "b", "c", "d"], + "baz": pd.date_range("2018-01-01", freq="d", periods=4), + "qux": pd.Categorical(["a", "b", "c", "c"]), + }, + index=pd.Index(range(4), name="idx"), + ) + df + df.dtypes + + df.to_json("test.json", orient="table") + new_df = pd.read_json("test.json", orient="table") + new_df + new_df.dtypes + +Please note that the literal string 'index' as the name of an :class:`Index` +is not round-trippable, nor are any names beginning with ``'level_'`` within a +:class:`MultiIndex`. These are used by default in :func:`DataFrame.to_json` to +indicate missing values and the subsequent read cannot distinguish the intent. + +.. ipython:: python + :okwarning: + + df.index.name = "index" + df.to_json("test.json", orient="table") + new_df = pd.read_json("test.json", orient="table") + print(new_df.index.name) + +.. ipython:: python + :suppress: + + os.remove("test.json") + +When using ``orient='table'`` along with user-defined ``ExtensionArray``, +the generated schema will contain an additional ``extDtype`` key in the respective +``fields`` element. This extra key is not standard but does enable JSON roundtrips +for extension types (e.g. ``read_json(df.to_json(orient="table"), orient="table")``). + +The ``extDtype`` key carries the name of the extension, if you have properly registered +the ``ExtensionDtype``, pandas will use said name to perform a lookup into the registry +and re-convert the serialized data into your custom dtype. + +.. _Table Schema: https://specs.frictionlessdata.io/table-schema/ + + +HTML +---- + +.. _io.read_html: + +Reading HTML content +'''''''''''''''''''''' + +.. warning:: + + We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` + below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. + +The top-level :func:`~pandas.io.html.read_html` function can accept an HTML +string/file/URL and will parse HTML tables into list of pandas ``DataFrames``. +Let's look at a few examples. + +.. note:: + + ``read_html`` returns a ``list`` of ``DataFrame`` objects, even if there is + only a single table contained in the HTML content. + +Read a URL with no options: + +.. code-block:: ipython + + In [320]: "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list" + In [321]: pd.read_html(url) + Out[321]: + [ Bank NameBank CityCity StateSt ... Acquiring InstitutionAI Closing DateClosing FundFund + 0 Almena State Bank Almena KS ... Equity Bank October 23, 2020 10538 + 1 First City Bank of Florida Fort Walton Beach FL ... United Fidelity Bank, fsb October 16, 2020 10537 + 2 The First State Bank Barboursville WV ... MVB Bank, Inc. April 3, 2020 10536 + 3 Ericson State Bank Ericson NE ... Farmers and Merchants Bank February 14, 2020 10535 + 4 City National Bank of New Jersey Newark NJ ... Industrial Bank November 1, 2019 10534 + .. ... ... ... ... ... ... ... + 558 Superior Bank, FSB Hinsdale IL ... Superior Federal, FSB July 27, 2001 6004 + 559 Malta National Bank Malta OH ... North Valley Bank May 3, 2001 4648 + 560 First Alliance Bank & Trust Co. Manchester NH ... Southern New Hampshire Bank & Trust February 2, 2001 4647 + 561 National State Bank of Metropolis Metropolis IL ... Banterra Bank of Marion December 14, 2000 4646 + 562 Bank of Honolulu Honolulu HI ... Bank of the Orient October 13, 2000 4645 + + [563 rows x 7 columns]] + +.. note:: + + The data from the above URL changes every Monday so the resulting data above may be slightly different. + +Read in the content of the file from the above URL and pass it to ``read_html`` +as a string: + +.. ipython:: python + + html_str = """ + + + + + + + + + + + +
    ABC
    abc
    + """ + + with open("tmp.html", "w") as f: + f.write(html_str) + df = pd.read_html("tmp.html") + df[0] + +.. ipython:: python + :suppress: + + os.remove("tmp.html") + +You can even pass in an instance of ``StringIO`` if you so desire: + +.. ipython:: python + + dfs = pd.read_html(StringIO(html_str)) + dfs[0] + +.. note:: + + The following examples are not run by the IPython evaluator due to the fact + that having so many network-accessing functions slows down the documentation + build. If you spot an error or an example that doesn't run, please do not + hesitate to report it over on `pandas GitHub issues page + `__. + + +Read a URL and match a table that contains specific text: + +.. code-block:: python + + match = "Metcalf Bank" + df_list = pd.read_html(url, match=match) + +Specify a header row (by default ```` or ```` elements located within a +```` are used to form the column index, if multiple rows are contained within +```` then a MultiIndex is created); if specified, the header row is taken +from the data minus the parsed header elements (```` elements). + +.. code-block:: python + + dfs = pd.read_html(url, header=0) + +Specify an index column: + +.. code-block:: python + + dfs = pd.read_html(url, index_col=0) + +Specify a number of rows to skip: + +.. code-block:: python + + dfs = pd.read_html(url, skiprows=0) + +Specify a number of rows to skip using a list (``range`` works +as well): + +.. code-block:: python + + dfs = pd.read_html(url, skiprows=range(2)) + +Specify an HTML attribute: + +.. code-block:: python + + dfs1 = pd.read_html(url, attrs={"id": "table"}) + dfs2 = pd.read_html(url, attrs={"class": "sortable"}) + print(np.array_equal(dfs1[0], dfs2[0])) # Should be True + +Specify values that should be converted to NaN: + +.. code-block:: python + + dfs = pd.read_html(url, na_values=["No Acquirer"]) + +Specify whether to keep the default set of NaN values: + +.. code-block:: python + + dfs = pd.read_html(url, keep_default_na=False) + +Specify converters for columns. This is useful for numerical text data that has +leading zeros. By default columns that are numerical are cast to numeric +types and the leading zeros are lost. To avoid this, we can convert these +columns to strings. + +.. code-block:: python + + url_mcc = "https://en.wikipedia.org/wiki/Mobile_country_code" + dfs = pd.read_html( + url_mcc, + match="Telekom Albania", + header=0, + converters={"MNC": str}, + ) + +Use some combination of the above: + +.. code-block:: python + + dfs = pd.read_html(url, match="Metcalf Bank", index_col=0) + +Read in pandas ``to_html`` output (with some loss of floating point precision): + +.. code-block:: python + + df = pd.DataFrame(np.random.randn(2, 2)) + s = df.to_html(float_format="{0:.40g}".format) + dfin = pd.read_html(s, index_col=0) + +The ``lxml`` backend will raise an error on a failed parse if that is the only +parser you provide. If you only have a single parser you can provide just a +string, but it is considered good practice to pass a list with one string if, +for example, the function expects a sequence of strings. You may use: + +.. code-block:: python + + dfs = pd.read_html(url, "Metcalf Bank", index_col=0, flavor=["lxml"]) + +Or you could pass ``flavor='lxml'`` without a list: + +.. code-block:: python + + dfs = pd.read_html(url, "Metcalf Bank", index_col=0, flavor="lxml") + +However, if you have bs4 and html5lib installed and pass ``None`` or ``['lxml', +'bs4']`` then the parse will most likely succeed. Note that *as soon as a parse +succeeds, the function will return*. + +.. code-block:: python + + dfs = pd.read_html(url, "Metcalf Bank", index_col=0, flavor=["lxml", "bs4"]) + +Links can be extracted from cells along with the text using ``extract_links="all"``. + +.. ipython:: python + + html_table = """ + + + + + + + +
    GitHub
    pandas
    + """ + + df = pd.read_html( + html_table, + extract_links="all" + )[0] + df + df[("GitHub", None)] + df[("GitHub", None)].str[1] + +.. versionadded:: 1.5.0 + +.. _io.html: + +Writing to HTML files +'''''''''''''''''''''' + +``DataFrame`` objects have an instance method ``to_html`` which renders the +contents of the ``DataFrame`` as an HTML table. The function arguments are as +in the method ``to_string`` described above. + +.. note:: + + Not all of the possible options for ``DataFrame.to_html`` are shown here for + brevity's sake. See :func:`~pandas.core.frame.DataFrame.to_html` for the + full set of options. + +.. note:: + + In an HTML-rendering supported environment like a Jupyter Notebook, ``display(HTML(...))``` + will render the raw HTML into the environment. + +.. ipython:: python + + from IPython.display import display, HTML + + df = pd.DataFrame(np.random.randn(2, 2)) + df + html = df.to_html() + print(html) # raw html + display(HTML(html)) + +The ``columns`` argument will limit the columns shown: + +.. ipython:: python + + html = df.to_html(columns=[0]) + print(html) + display(HTML(html)) + +``float_format`` takes a Python callable to control the precision of floating +point values: + +.. ipython:: python + + html = df.to_html(float_format="{0:.10f}".format) + print(html) + display(HTML(html)) + + +``bold_rows`` will make the row labels bold by default, but you can turn that +off: + +.. ipython:: python + + html = df.to_html(bold_rows=False) + print(html) + display(HTML(html)) + + +The ``classes`` argument provides the ability to give the resulting HTML +table CSS classes. Note that these classes are *appended* to the existing +``'dataframe'`` class. + +.. ipython:: python + + print(df.to_html(classes=["awesome_table_class", "even_more_awesome_class"])) + +The ``render_links`` argument provides the ability to add hyperlinks to cells +that contain URLs. + +.. ipython:: python + + url_df = pd.DataFrame( + { + "name": ["Python", "pandas"], + "url": ["https://www.python.org/", "https://pandas.pydata.org"], + } + ) + html = url_df.to_html(render_links=True) + print(html) + display(HTML(html)) + +Finally, the ``escape`` argument allows you to control whether the +"<", ">" and "&" characters escaped in the resulting HTML (by default it is +``True``). So to get the HTML without escaped characters pass ``escape=False`` + +.. ipython:: python + + df = pd.DataFrame({"a": list("&<>"), "b": np.random.randn(3)}) + +Escaped: + +.. ipython:: python + + html = df.to_html() + print(html) + display(HTML(html)) + +Not escaped: + +.. ipython:: python + + html = df.to_html(escape=False) + print(html) + display(HTML(html)) + +.. note:: + + Some browsers may not show a difference in the rendering of the previous two + HTML tables. + + +.. _io.html.gotchas: + +HTML Table Parsing Gotchas +'''''''''''''''''''''''''' + +There are some versioning issues surrounding the libraries that are used to +parse HTML tables in the top-level pandas io function ``read_html``. + +**Issues with** |lxml|_ + +* Benefits + + * |lxml|_ is very fast. + + * |lxml|_ requires Cython to install correctly. + +* Drawbacks + + * |lxml|_ does *not* make any guarantees about the results of its parse + *unless* it is given |svm|_. + + * In light of the above, we have chosen to allow you, the user, to use the + |lxml|_ backend, but **this backend will use** |html5lib|_ if |lxml|_ + fails to parse + + * It is therefore *highly recommended* that you install both + |BeautifulSoup4|_ and |html5lib|_, so that you will still get a valid + result (provided everything else is valid) even if |lxml|_ fails. + +**Issues with** |BeautifulSoup4|_ **using** |lxml|_ **as a backend** + +* The above issues hold here as well since |BeautifulSoup4|_ is essentially + just a wrapper around a parser backend. + +**Issues with** |BeautifulSoup4|_ **using** |html5lib|_ **as a backend** + +* Benefits + + * |html5lib|_ is far more lenient than |lxml|_ and consequently deals + with *real-life markup* in a much saner way rather than just, e.g., + dropping an element without notifying you. + + * |html5lib|_ *generates valid HTML5 markup from invalid markup + automatically*. This is extremely important for parsing HTML tables, + since it guarantees a valid document. However, that does NOT mean that + it is "correct", since the process of fixing markup does not have a + single definition. + + * |html5lib|_ is pure Python and requires no additional build steps beyond + its own installation. + +* Drawbacks + + * The biggest drawback to using |html5lib|_ is that it is slow as + molasses. However consider the fact that many tables on the web are not + big enough for the parsing algorithm runtime to matter. It is more + likely that the bottleneck will be in the process of reading the raw + text from the URL over the web, i.e., IO (input-output). For very large + tables, this might not be true. + + +.. |svm| replace:: **strictly valid markup** +.. _svm: https://validator.w3.org/docs/help.html#validation_basics + +.. |html5lib| replace:: **html5lib** +.. _html5lib: https://github.com/html5lib/html5lib-python + +.. |BeautifulSoup4| replace:: **BeautifulSoup4** +.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup + +.. |lxml| replace:: **lxml** +.. _lxml: https://lxml.de + +.. _io.latex: + +LaTeX +----- + +.. versionadded:: 1.3.0 + +Currently there are no methods to read from LaTeX, only output methods. + +Writing to LaTeX files +'''''''''''''''''''''' + +.. note:: + + DataFrame *and* Styler objects currently have a ``to_latex`` method. We recommend + using the `Styler.to_latex() <../reference/api/pandas.io.formats.style.Styler.to_latex.rst>`__ method + over `DataFrame.to_latex() <../reference/api/pandas.DataFrame.to_latex.rst>`__ due to the former's greater flexibility with + conditional styling, and the latter's possible future deprecation. + +Review the documentation for `Styler.to_latex <../reference/api/pandas.io.formats.style.Styler.to_latex.rst>`__, +which gives examples of conditional styling and explains the operation of its keyword +arguments. + +For simple application the following pattern is sufficient. + +.. ipython:: python + + df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["c", "d"]) + print(df.style.to_latex()) + +To format values before output, chain the `Styler.format <../reference/api/pandas.io.formats.style.Styler.format.rst>`__ +method. + +.. ipython:: python + + print(df.style.format("€ {}").to_latex()) + +XML +--- + +.. _io.read_xml: + +Reading XML +''''''''''' + +.. versionadded:: 1.3.0 + +The top-level :func:`~pandas.io.xml.read_xml` function can accept an XML +string/file/URL and will parse nodes and attributes into a pandas ``DataFrame``. + +.. note:: + + Since there is no standard XML structure where design types can vary in + many ways, ``read_xml`` works best with flatter, shallow versions. If + an XML document is deeply nested, use the ``stylesheet`` feature to + transform XML into a flatter version. + +Let's look at a few examples. + +Read an XML string: + +.. ipython:: python + + xml = """ + + + Everyday Italian + Giada De Laurentiis + 2005 + 30.00 + + + Harry Potter + J K. Rowling + 2005 + 29.99 + + + Learning XML + Erik T. Ray + 2003 + 39.95 + + """ + + df = pd.read_xml(xml) + df + +Read a URL with no options: + +.. ipython:: python + + df = pd.read_xml("https://www.w3schools.com/xml/books.xml") + df + +Read in the content of the "books.xml" file and pass it to ``read_xml`` +as a string: + +.. ipython:: python + + file_path = "books.xml" + with open(file_path, "w") as f: + f.write(xml) + + with open(file_path, "r") as f: + df = pd.read_xml(f.read()) + df + +Read in the content of the "books.xml" as instance of ``StringIO`` or +``BytesIO`` and pass it to ``read_xml``: + +.. ipython:: python + + with open(file_path, "r") as f: + sio = StringIO(f.read()) + + df = pd.read_xml(sio) + df + +.. ipython:: python + + with open(file_path, "rb") as f: + bio = BytesIO(f.read()) + + df = pd.read_xml(bio) + df + +Even read XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing +Biomedical and Life Science Jorurnals: + +.. ipython:: python + :okwarning: + + df = pd.read_xml( + "s3://pmc-oa-opendata/oa_comm/xml/all/PMC1236943.xml", + xpath=".//journal-meta", + ) + df + +With `lxml`_ as default ``parser``, you access the full-featured XML library +that extends Python's ElementTree API. One powerful tool is ability to query +nodes selectively or conditionally with more expressive XPath: + +.. _lxml: https://lxml.de + +.. ipython:: python + + df = pd.read_xml(file_path, xpath="//book[year=2005]") + df + +Specify only elements or only attributes to parse: + +.. ipython:: python + + df = pd.read_xml(file_path, elems_only=True) + df + +.. ipython:: python + + df = pd.read_xml(file_path, attrs_only=True) + df + +.. ipython:: python + :suppress: + + os.remove("books.xml") + +XML documents can have namespaces with prefixes and default namespaces without +prefixes both of which are denoted with a special attribute ``xmlns``. In order +to parse by node under a namespace context, ``xpath`` must reference a prefix. + +For example, below XML contains a namespace with prefix, ``doc``, and URI at +``https://example.com``. In order to parse ``doc:row`` nodes, +``namespaces`` must be used. + +.. ipython:: python + + xml = """ + + + square + 360 + 4.0 + + + circle + 360 + + + + triangle + 180 + 3.0 + + """ + + df = pd.read_xml(xml, + xpath="//doc:row", + namespaces={"doc": "https://example.com"}) + df + +Similarly, an XML document can have a default namespace without prefix. Failing +to assign a temporary prefix will return no nodes and raise a ``ValueError``. +But assigning *any* temporary name to correct URI allows parsing by nodes. + +.. ipython:: python + + xml = """ + + + square + 360 + 4.0 + + + circle + 360 + + + + triangle + 180 + 3.0 + + """ + + df = pd.read_xml(xml, + xpath="//pandas:row", + namespaces={"pandas": "https://example.com"}) + df + +However, if XPath does not reference node names such as default, ``/*``, then +``namespaces`` is not required. + +With `lxml`_ as parser, you can flatten nested XML documents with an XSLT +script which also can be string/file/URL types. As background, `XSLT`_ is +a special-purpose language written in a special XML file that can transform +original XML documents into other XML, HTML, even text (CSV, JSON, etc.) +using an XSLT processor. + +.. _lxml: https://lxml.de +.. _XSLT: https://www.w3.org/TR/xslt/ + +For example, consider this somewhat nested structure of Chicago "L" Rides +where station and rides elements encapsulate data in their own sections. +With below XSLT, ``lxml`` can transform original nested document into a flatter +output (as shown below for demonstration) for easier parse into ``DataFrame``: + +.. ipython:: python + + xml = """ + + + + 2020-09-01T00:00:00 + + 864.2 + 534 + 417.2 + + + + + 2020-09-01T00:00:00 + + 2707.4 + 1909.8 + 1438.6 + + + + + 2020-09-01T00:00:00 + + 2949.6 + 1657 + 1453.8 + + + """ + + xsl = """ + + + + + + + + + + + + + + + """ + + output = """ + + + 40850 + Library + 2020-09-01T00:00:00 + 864.2 + 534 + 417.2 + + + 41700 + Washington/Wabash + 2020-09-01T00:00:00 + 2707.4 + 1909.8 + 1438.6 + + + 40380 + Clark/Lake + 2020-09-01T00:00:00 + 2949.6 + 1657 + 1453.8 + + """ + + df = pd.read_xml(xml, stylesheet=xsl) + df + +For very large XML files that can range in hundreds of megabytes to gigabytes, :func:`pandas.read_xml` +supports parsing such sizeable files using `lxml's iterparse`_ and `etree's iterparse`_ +which are memory-efficient methods to iterate through an XML tree and extract specific elements and attributes. +without holding entire tree in memory. + + .. versionadded:: 1.5.0 + +.. _`lxml's iterparse`: https://lxml.de/3.2/parsing.html#iterparse-and-iterwalk +.. _`etree's iterparse`: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + +To use this feature, you must pass a physical XML file path into ``read_xml`` and use the ``iterparse`` argument. +Files should not be compressed or point to online sources but stored on local disk. Also, ``iterparse`` should be +a dictionary where the key is the repeating nodes in document (which become the rows) and the value is a list of +any element or attribute that is a descendant (i.e., child, grandchild) of repeating node. Since XPath is not +used in this method, descendants do not need to share same relationship with one another. Below shows example +of reading in Wikipedia's very large (12 GB+) latest article data dump. + +.. code-block:: ipython + + In [1]: df = pd.read_xml( + ... "/path/to/downloaded/enwikisource-latest-pages-articles.xml", + ... iterparse = {"page": ["title", "ns", "id"]} + ... ) + ... df + Out[2]: + title ns id + 0 Gettysburg Address 0 21450 + 1 Main Page 0 42950 + 2 Declaration by United Nations 0 8435 + 3 Constitution of the United States of America 0 8435 + 4 Declaration of Independence (Israel) 0 17858 + ... ... ... ... + 3578760 Page:Black cat 1897 07 v2 n10.pdf/17 104 219649 + 3578761 Page:Black cat 1897 07 v2 n10.pdf/43 104 219649 + 3578762 Page:Black cat 1897 07 v2 n10.pdf/44 104 219649 + 3578763 The History of Tom Jones, a Foundling/Book IX 0 12084291 + 3578764 Page:Shakespeare of Stratford (1926) Yale.djvu/91 104 21450 + + [3578765 rows x 3 columns] + +.. _io.xml: + +Writing XML +''''''''''' + +.. versionadded:: 1.3.0 + +``DataFrame`` objects have an instance method ``to_xml`` which renders the +contents of the ``DataFrame`` as an XML document. + +.. note:: + + This method does not support special properties of XML including DTD, + CData, XSD schemas, processing instructions, comments, and others. + Only namespaces at the root level is supported. However, ``stylesheet`` + allows design changes after initial output. + +Let's look at a few examples. + +Write an XML without options: + +.. ipython:: python + + geom_df = pd.DataFrame( + { + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4, np.nan, 3], + } + ) + + print(geom_df.to_xml()) + + +Write an XML with new root and row name: + +.. ipython:: python + + print(geom_df.to_xml(root_name="geometry", row_name="objects")) + +Write an attribute-centric XML: + +.. ipython:: python + + print(geom_df.to_xml(attr_cols=geom_df.columns.tolist())) + +Write a mix of elements and attributes: + +.. ipython:: python + + print( + geom_df.to_xml( + index=False, + attr_cols=['shape'], + elem_cols=['degrees', 'sides']) + ) + +Any ``DataFrames`` with hierarchical columns will be flattened for XML element names +with levels delimited by underscores: + +.. ipython:: python + + ext_geom_df = pd.DataFrame( + { + "type": ["polygon", "other", "polygon"], + "shape": ["square", "circle", "triangle"], + "degrees": [360, 360, 180], + "sides": [4, np.nan, 3], + } + ) + + pvt_df = ext_geom_df.pivot_table(index='shape', + columns='type', + values=['degrees', 'sides'], + aggfunc='sum') + pvt_df + + print(pvt_df.to_xml()) + +Write an XML with default namespace: + +.. ipython:: python + + print(geom_df.to_xml(namespaces={"": "https://example.com"})) + +Write an XML with namespace prefix: + +.. ipython:: python + + print( + geom_df.to_xml(namespaces={"doc": "https://example.com"}, + prefix="doc") + ) + +Write an XML without declaration or pretty print: + +.. ipython:: python + + print( + geom_df.to_xml(xml_declaration=False, + pretty_print=False) + ) + +Write an XML and transform with stylesheet: + +.. ipython:: python + + xsl = """ + + + + + + + + + + + polygon + + + + + + + + """ + + print(geom_df.to_xml(stylesheet=xsl)) + + +XML Final Notes +''''''''''''''' + +* All XML documents adhere to `W3C specifications`_. Both ``etree`` and ``lxml`` + parsers will fail to parse any markup document that is not well-formed or + follows XML syntax rules. Do be aware HTML is not an XML document unless it + follows XHTML specs. However, other popular markup types including KML, XAML, + RSS, MusicML, MathML are compliant `XML schemas`_. + +* For above reason, if your application builds XML prior to pandas operations, + use appropriate DOM libraries like ``etree`` and ``lxml`` to build the necessary + document and not by string concatenation or regex adjustments. Always remember + XML is a *special* text file with markup rules. + +* With very large XML files (several hundred MBs to GBs), XPath and XSLT + can become memory-intensive operations. Be sure to have enough available + RAM for reading and writing to large XML files (roughly about 5 times the + size of text). + +* Because XSLT is a programming language, use it with caution since such scripts + can pose a security risk in your environment and can run large or infinite + recursive operations. Always test scripts on small fragments before full run. + +* The `etree`_ parser supports all functionality of both ``read_xml`` and + ``to_xml`` except for complex XPath and any XSLT. Though limited in features, + ``etree`` is still a reliable and capable parser and tree builder. Its + performance may trail ``lxml`` to a certain degree for larger files but + relatively unnoticeable on small to medium size files. + +.. _`W3C specifications`: https://www.w3.org/TR/xml/ +.. _`XML schemas`: https://en.wikipedia.org/wiki/List_of_types_of_XML_schemas +.. _`etree`: https://docs.python.org/3/library/xml.etree.elementtree.html + + + +.. _io.excel: + +Excel files +----------- + +The :func:`~pandas.read_excel` method can read Excel 2007+ (``.xlsx``) files +using the ``openpyxl`` Python module. Excel 2003 (``.xls``) files +can be read using ``xlrd``. Binary Excel (``.xlsb``) +files can be read using ``pyxlsb``. +The :meth:`~DataFrame.to_excel` instance method is used for +saving a ``DataFrame`` to Excel. Generally the semantics are +similar to working with :ref:`csv` data. +See the :ref:`cookbook` for some advanced strategies. + +.. warning:: + + The `xlwt `__ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `__ package is now only for reading + old-style ``.xls`` files. + + Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, including new + Excel 2007+ (``.xlsx``) files. pandas will now default to using the + `openpyxl `__ engine. + + It is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** + This is no longer supported, switch to using ``openpyxl`` instead. + + Attempting to use the ``xlwt`` engine will raise a ``FutureWarning`` + unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. + While this option is now deprecated and will also raise a ``FutureWarning``, + it can be globally set and the warning suppressed. Users are recommended to + write ``.xlsx`` files using the ``openpyxl`` engine instead. + +.. _io.excel_reader: + +Reading Excel files +''''''''''''''''''' + +In the most basic use-case, ``read_excel`` takes a path to an Excel +file, and the ``sheet_name`` indicating which sheet to parse. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.xls", sheet_name="Sheet1") + + +.. _io.excel.excelfile_class: + +``ExcelFile`` class ++++++++++++++++++++ + +To facilitate working with multiple sheets from the same file, the ``ExcelFile`` +class can be used to wrap the file and can be passed into ``read_excel`` +There will be a performance benefit for reading multiple sheets as the file is +read into memory only once. + +.. code-block:: python + + xlsx = pd.ExcelFile("path_to_file.xls") + df = pd.read_excel(xlsx, "Sheet1") + +The ``ExcelFile`` class can also be used as a context manager. + +.. code-block:: python + + with pd.ExcelFile("path_to_file.xls") as xls: + df1 = pd.read_excel(xls, "Sheet1") + df2 = pd.read_excel(xls, "Sheet2") + +The ``sheet_names`` property will generate +a list of the sheet names in the file. + +The primary use-case for an ``ExcelFile`` is parsing multiple sheets with +different parameters: + +.. code-block:: python + + data = {} + # For when Sheet1's format differs from Sheet2 + with pd.ExcelFile("path_to_file.xls") as xls: + data["Sheet1"] = pd.read_excel(xls, "Sheet1", index_col=None, na_values=["NA"]) + data["Sheet2"] = pd.read_excel(xls, "Sheet2", index_col=1) + +Note that if the same parsing parameters are used for all sheets, a list +of sheet names can simply be passed to ``read_excel`` with no loss in performance. + +.. code-block:: python + + # using the ExcelFile class + data = {} + with pd.ExcelFile("path_to_file.xls") as xls: + data["Sheet1"] = pd.read_excel(xls, "Sheet1", index_col=None, na_values=["NA"]) + data["Sheet2"] = pd.read_excel(xls, "Sheet2", index_col=None, na_values=["NA"]) + + # equivalent using the read_excel function + data = pd.read_excel( + "path_to_file.xls", ["Sheet1", "Sheet2"], index_col=None, na_values=["NA"] + ) + +``ExcelFile`` can also be called with a ``xlrd.book.Book`` object +as a parameter. This allows the user to control how the excel file is read. +For example, sheets can be loaded on demand by calling ``xlrd.open_workbook()`` +with ``on_demand=True``. + +.. code-block:: python + + import xlrd + + xlrd_book = xlrd.open_workbook("path_to_file.xls", on_demand=True) + with pd.ExcelFile(xlrd_book) as xls: + df1 = pd.read_excel(xls, "Sheet1") + df2 = pd.read_excel(xls, "Sheet2") + +.. _io.excel.specifying_sheets: + +Specifying sheets ++++++++++++++++++ + +.. note:: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``. + +.. note:: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets. + +* The arguments ``sheet_name`` allows specifying the sheet or sheets to read. +* The default value for ``sheet_name`` is 0, indicating to read the first sheet +* Pass a string to refer to the name of a particular sheet in the workbook. +* Pass an integer to refer to the index of a sheet. Indices follow Python + convention, beginning at 0. +* Pass a list of either strings or integers, to return a dictionary of specified sheets. +* Pass a ``None`` to return a dictionary of all available sheets. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.xls", "Sheet1", index_col=None, na_values=["NA"]) + +Using the sheet index: + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.xls", 0, index_col=None, na_values=["NA"]) + +Using all default values: + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.xls") + +Using None to get all sheets: + +.. code-block:: python + + # Returns a dictionary of DataFrames + pd.read_excel("path_to_file.xls", sheet_name=None) + +Using a list to get multiple sheets: + +.. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel("path_to_file.xls", sheet_name=["Sheet1", 3]) + +``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either +a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. +Sheets can be specified by sheet index or sheet name, using an integer or string, +respectively. + +.. _io.excel.reading_multiindex: + +Reading a ``MultiIndex`` +++++++++++++++++++++++++ + +``read_excel`` can read a ``MultiIndex`` index, by passing a list of columns to ``index_col`` +and a ``MultiIndex`` column by passing a list of rows to ``header``. If either the ``index`` +or ``columns`` have serialized level names those will be read in as well by specifying +the rows/columns that make up the levels. + +For example, to read in a ``MultiIndex`` index without names: + +.. ipython:: python + + df = pd.DataFrame( + {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}, + index=pd.MultiIndex.from_product([["a", "b"], ["c", "d"]]), + ) + df.to_excel("path_to_file.xlsx") + df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1]) + df + +If the index has level names, they will parsed as well, using the same +parameters. + +.. ipython:: python + + df.index = df.index.set_names(["lvl1", "lvl2"]) + df.to_excel("path_to_file.xlsx") + df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1]) + df + + +If the source file has both ``MultiIndex`` index and columns, lists specifying each +should be passed to ``index_col`` and ``header``: + +.. ipython:: python + + df.columns = pd.MultiIndex.from_product([["a"], ["b", "d"]], names=["c1", "c2"]) + df.to_excel("path_to_file.xlsx") + df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1], header=[0, 1]) + df + +.. ipython:: python + :suppress: + + os.remove("path_to_file.xlsx") + +Missing values in columns specified in ``index_col`` will be forward filled to +allow roundtripping with ``to_excel`` for ``merged_cells=True``. To avoid forward +filling the missing values use ``set_index`` after reading the data instead of +``index_col``. + +Parsing specific columns +++++++++++++++++++++++++ + +It is often the case that users will insert columns to do temporary computations +in Excel and you may not want to read in those columns. ``read_excel`` takes +a ``usecols`` keyword to allow you to specify a subset of columns to parse. + +.. versionchanged:: 1.0.0 + +Passing in an integer for ``usecols`` will no longer work. Please pass in a list +of ints from 0 to ``usecols`` inclusive instead. + +You can specify a comma-delimited set of Excel columns and ranges as a string: + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", usecols="A,C:E") + +If ``usecols`` is a list of integers, then it is assumed to be the file column +indices to be parsed. + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", usecols=[0, 2, 3]) + +Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + +If ``usecols`` is a list of strings, it is assumed that each string corresponds +to a column name provided either by the user in ``names`` or inferred from the +document header row(s). Those strings define which columns will be parsed: + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", usecols=["foo", "bar"]) + +Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``. + +If ``usecols`` is callable, the callable function will be evaluated against +the column names, returning names where the callable function evaluates to ``True``. + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", usecols=lambda x: x.isalpha()) + +Parsing dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the ``parse_dates`` keyword to parse those strings to datetimes: + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", parse_dates=["date_strings"]) + + +Cell converters ++++++++++++++++ + +It is possible to transform the contents of Excel cells via the ``converters`` +option. For instance, to convert a column to boolean: + +.. code-block:: python + + pd.read_excel("path_to_file.xls", "Sheet1", converters={"MyBools": bool}) + +This options handles missing values and treats exceptions in the converters +as missing data. Transformations are applied cell by cell rather than to the +column as a whole, so the array dtype is not guaranteed. For instance, a +column of integers with missing values cannot be transformed to an array +with integer dtype, because NaN is strictly a float. You can manually mask +missing data to recover integer dtype: + +.. code-block:: python + + def cfun(x): + return int(x) if x else -1 + + + pd.read_excel("path_to_file.xls", "Sheet1", converters={"MyInts": cfun}) + +Dtype specifications +++++++++++++++++++++ + +As an alternative to converters, the type for an entire column can +be specified using the ``dtype`` keyword, which takes a dictionary +mapping column names to types. To interpret data with +no type inference, use the type ``str`` or ``object``. + +.. code-block:: python + + pd.read_excel("path_to_file.xls", dtype={"MyInts": "int64", "MyText": str}) + +.. _io.excel_writer: + +Writing Excel files +''''''''''''''''''' + +Writing Excel files to disk ++++++++++++++++++++++++++++ + +To write a ``DataFrame`` object to a sheet of an Excel file, you can use the +``to_excel`` instance method. The arguments are largely the same as ``to_csv`` +described above, the first argument being the name of the excel file, and the +optional second argument the name of the sheet to which the ``DataFrame`` should be +written. For example: + +.. code-block:: python + + df.to_excel("path_to_file.xlsx", sheet_name="Sheet1") + +Files with a ``.xls`` extension will be written using ``xlwt`` and those with a +``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or +``openpyxl``. + +The ``DataFrame`` will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the +``merge_cells`` option in ``to_excel()`` to ``False``: + +.. code-block:: python + + df.to_excel("path_to_file.xlsx", index_label="label", merge_cells=False) + +In order to write separate ``DataFrames`` to separate sheets in a single Excel file, +one can pass an :class:`~pandas.io.excel.ExcelWriter`. + +.. code-block:: python + + with pd.ExcelWriter("path_to_file.xlsx") as writer: + df1.to_excel(writer, sheet_name="Sheet1") + df2.to_excel(writer, sheet_name="Sheet2") + +.. _io.excel_writing_buffer: + +Writing Excel files to memory ++++++++++++++++++++++++++++++ + +pandas supports writing Excel files to buffer-like objects such as ``StringIO`` or +``BytesIO`` using :class:`~pandas.io.excel.ExcelWriter`. + +.. code-block:: python + + from io import BytesIO + + bio = BytesIO() + + # By setting the 'engine' in the ExcelWriter constructor. + writer = pd.ExcelWriter(bio, engine="xlsxwriter") + df.to_excel(writer, sheet_name="Sheet1") + + # Save the workbook + writer.save() + + # Seek to the beginning and read to copy the workbook to a variable in memory + bio.seek(0) + workbook = bio.read() + +.. note:: + + ``engine`` is optional but recommended. Setting the engine determines + the version of workbook produced. Setting ``engine='xlrd'`` will produce an + Excel 2003-format workbook (xls). Using either ``'openpyxl'`` or + ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If + omitted, an Excel 2007-formatted workbook is produced. + + +.. _io.excel.writers: + +Excel writer engines +'''''''''''''''''''' + +.. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed from a future version + of pandas. This is the only engine in pandas that supports writing to + ``.xls`` files. + +pandas chooses an Excel writer via two methods: + +1. the ``engine`` keyword argument +2. the filename extension (via the default specified in config options) + +By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_ +for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple +engines installed, you can set the default engine through :ref:`setting the +config options ` ``io.excel.xlsx.writer`` and +``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` +files if `Xlsxwriter`_ is not available. + +.. _XlsxWriter: https://xlsxwriter.readthedocs.io +.. _openpyxl: https://openpyxl.readthedocs.io/ +.. _xlwt: http://www.python-excel.org + +To specify which writer you want to use, you can pass an engine keyword +argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: + +* ``openpyxl``: version 2.4 or higher is required +* ``xlsxwriter`` +* ``xlwt`` + +.. code-block:: python + + # By setting the 'engine' in the DataFrame 'to_excel()' methods. + df.to_excel("path_to_file.xlsx", sheet_name="Sheet1", engine="xlsxwriter") + + # By setting the 'engine' in the ExcelWriter constructor. + writer = pd.ExcelWriter("path_to_file.xlsx", engine="xlsxwriter") + + # Or via pandas configuration. + from pandas import options # noqa: E402 + + options.io.excel.xlsx.writer = "xlsxwriter" + + df.to_excel("path_to_file.xlsx", sheet_name="Sheet1") + +.. _io.excel.style: + +Style and formatting +'''''''''''''''''''' + +The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. + +* ``float_format`` : Format string for floating point numbers (default ``None``). +* ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). + +Using the `Xlsxwriter`_ engine provides many options for controlling the +format of an Excel worksheet created with the ``to_excel`` method. Excellent examples can be found in the +`Xlsxwriter`_ documentation here: https://xlsxwriter.readthedocs.io/working_with_pandas.html + +.. _io.ods: + +OpenDocument Spreadsheets +------------------------- + +.. versionadded:: 0.25 + +The :func:`~pandas.read_excel` method can also read OpenDocument spreadsheets +using the ``odfpy`` module. The semantics and features for reading +OpenDocument spreadsheets match what can be done for `Excel files`_ using +``engine='odf'``. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.ods", engine="odf") + +.. note:: + + Currently pandas only supports *reading* OpenDocument spreadsheets. Writing + is not implemented. + +.. _io.xlsb: + +Binary Excel (.xlsb) files +-------------------------- + +.. versionadded:: 1.0.0 + +The :func:`~pandas.read_excel` method can also read binary Excel files +using the ``pyxlsb`` module. The semantics and features for reading +binary Excel files mostly match what can be done for `Excel files`_ using +``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types +in files and will return floats instead. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel("path_to_file.xlsb", engine="pyxlsb") + +.. note:: + + Currently pandas only supports *reading* binary Excel files. Writing + is not implemented. + + +.. _io.clipboard: + +Clipboard +--------- + +A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, +which takes the contents of the clipboard buffer and passes them to the +``read_csv`` method. For instance, you can copy the following text to the +clipboard (CTRL-C on many operating systems): + +.. code-block:: console + + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +And then import the data directly to a ``DataFrame`` by calling: + +.. code-block:: python + + >>> clipdf = pd.read_clipboard() + >>> clipdf + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +The ``to_clipboard`` method can be used to write the contents of a ``DataFrame`` to +the clipboard. Following which you can paste the clipboard contents into other +applications (CTRL-V on many operating systems). Here we illustrate writing a +``DataFrame`` into clipboard and reading it back. + +.. code-block:: python + + >>> df = pd.DataFrame( + ... {"A": [1, 2, 3], "B": [4, 5, 6], "C": ["p", "q", "r"]}, index=["x", "y", "z"] + ... ) + + >>> df + A B C + x 1 4 p + y 2 5 q + z 3 6 r + >>> df.to_clipboard() + >>> pd.read_clipboard() + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +We can see that we got the same content back, which we had earlier written to the clipboard. + +.. note:: + + You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods. + +.. _io.pickle: + +Pickling +-------- + +All pandas objects are equipped with ``to_pickle`` methods which use Python's +``cPickle`` module to save data structures to disk using the pickle format. + +.. ipython:: python + + df + df.to_pickle("foo.pkl") + +The ``read_pickle`` function in the ``pandas`` namespace can be used to load +any pickled pandas object (or any other pickled object) from file: + + +.. ipython:: python + + pd.read_pickle("foo.pkl") + +.. ipython:: python + :suppress: + + os.remove("foo.pkl") + +.. warning:: + + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html + +.. warning:: + + :func:`read_pickle` is only guaranteed backwards compatible back to pandas version 0.20.3 + +.. _io.pickle.compression: + +Compressed pickle files +''''''''''''''''''''''' + +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read +and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz``, ``zstd`` are supported for reading and writing. +The ``zip`` file format only supports reading and must contain only one data file +to be read. + +The compression type can be an explicit parameter or be inferred from the file extension. +If 'infer', then use ``gzip``, ``bz2``, ``zip``, ``xz``, ``zstd`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, +``'.xz'``, or ``'.zst'``, respectively. + +The compression parameter can also be a ``dict`` in order to pass options to the +compression protocol. It must have a ``'method'`` key set to the name +of the compression protocol, which must be one of +{``'zip'``, ``'gzip'``, ``'bz2'``, ``'xz'``, ``'zstd'``}. All other key-value pairs are passed to +the underlying compression library. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": np.random.randn(1000), + "B": "foo", + "C": pd.date_range("20130101", periods=1000, freq="s"), + } + ) + df + +Using an explicit compression type: + +.. ipython:: python + + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension: + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer': + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt + +Passing options to the compression protocol in order to speed up compression: + +.. ipython:: python + + df.to_pickle("data.pkl.gz", compression={"method": "gzip", "compresslevel": 1}) + +.. ipython:: python + :suppress: + + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + +.. _io.msgpack: + +msgpack +------- + +pandas support for ``msgpack`` has been removed in version 1.0.0. It is +recommended to use :ref:`pickle ` instead. + +Alternatively, you can also the Arrow IPC serialization format for on-the-wire +transmission of pandas objects. For documentation on pyarrow, see +`here `__. + + +.. _io.hdf5: + +HDF5 (PyTables) +--------------- + +``HDFStore`` is a dict-like object which reads and writes pandas using +the high performance HDF5 format using the excellent `PyTables +`__ library. See the :ref:`cookbook ` +for some advanced strategies + +.. warning:: + + pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle. Loading pickled data received from + untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + +.. ipython:: python + :suppress: + :okexcept: + + os.remove("store.h5") + +.. ipython:: python + + store = pd.HDFStore("store.h5") + print(store) + +Objects can be written to the file just like adding key-value pairs to a +dict: + +.. ipython:: python + + index = pd.date_range("1/1/2000", periods=8) + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"]) + + # store.put('s', s) is an equivalent method + store["s"] = s + + store["df"] = df + + store + +In a current or later Python session, you can retrieve stored objects: + +.. ipython:: python + + # store.get('df') is an equivalent method + store["df"] + + # dotted (attribute) access provides get as well + store.df + +Deletion of the object specified by the key: + +.. ipython:: python + + # store.remove('df') is an equivalent method + del store["df"] + + store + +Closing a Store and using a context manager: + +.. ipython:: python + + store.close() + store + store.is_open + + # Working with, and automatically closing the store using a context manager + with pd.HDFStore("store.h5") as store: + store.keys() + +.. ipython:: python + :suppress: + + store.close() + os.remove("store.h5") + + + +Read/write API +'''''''''''''' + +``HDFStore`` supports a top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, +similar to how ``read_csv`` and ``to_csv`` work. + +.. ipython:: python + + df_tl = pd.DataFrame({"A": list(range(5)), "B": list(range(5))}) + df_tl.to_hdf("store_tl.h5", "table", append=True) + pd.read_hdf("store_tl.h5", "table", where=["index>2"]) + +.. ipython:: python + :suppress: + :okexcept: + + os.remove("store_tl.h5") + + +HDFStore will by default not drop rows that are all missing. This behavior can be changed by setting ``dropna=True``. + + +.. ipython:: python + + df_with_missing = pd.DataFrame( + { + "col1": [0, np.nan, 2], + "col2": [1, np.nan, np.nan], + } + ) + df_with_missing + + df_with_missing.to_hdf("file.h5", "df_with_missing", format="table", mode="w") + + pd.read_hdf("file.h5", "df_with_missing") + + df_with_missing.to_hdf( + "file.h5", "df_with_missing", format="table", mode="w", dropna=True + ) + pd.read_hdf("file.h5", "df_with_missing") + + +.. ipython:: python + :suppress: + + os.remove("file.h5") + + +.. _io.hdf5-fixed: + +Fixed format +'''''''''''' + +The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called +the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply +remove them and rewrite). Nor are they **queryable**; they must be +retrieved in their entirety. They also do not support dataframes with non-unique column names. +The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. +This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'``. + +.. warning:: + + A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where``: + + .. code-block:: python + + >>> pd.DataFrame(np.random.randn(10, 2)).to_hdf("test_fixed.h5", "df") + >>> pd.read_hdf("test_fixed.h5", "df", where="index>5") + TypeError: cannot pass a where specification when reading a fixed format. + this store must be selected in its entirety + + +.. _io.hdf5-table: + +Table format +'''''''''''' + +``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` +format. Conceptually a ``table`` is shaped very much like a DataFrame, +with rows and columns. A ``table`` may be appended to in the same or +other sessions. In addition, delete and query type operations are +supported. This format is specified by ``format='table'`` or ``format='t'`` +to ``append`` or ``put`` or ``to_hdf``. + +This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to +enable ``put/append/to_hdf`` to by default store in the ``table`` format. + +.. ipython:: python + :suppress: + :okexcept: + + os.remove("store.h5") + +.. ipython:: python + + store = pd.HDFStore("store.h5") + df1 = df[0:4] + df2 = df[4:] + + # append data (creates a table automatically) + store.append("df", df1) + store.append("df", df2) + store + + # select the entire object + store.select("df") + + # the type of stored data + store.root.df._v_attrs.pandas_type + +.. note:: + + You can also create a ``table`` by passing ``format='table'`` or ``format='t'`` to a ``put`` operation. + +.. _io.hdf5-keys: + +Hierarchical keys +''''''''''''''''' + +Keys to a store can be specified as a string. These can be in a +hierarchical path-name like format (e.g. ``foo/bar/bah``), which will +generate a hierarchy of sub-stores (or ``Groups`` in PyTables +parlance). Keys can be specified without the leading '/' and are **always** +absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove +everything in the sub-store and **below**, so be *careful*. + +.. ipython:: python + + store.put("foo/bar/bah", df) + store.append("food/orange", df) + store.append("food/apple", df) + store + + # a list of keys are returned + store.keys() + + # remove all nodes under this level + store.remove("food") + store + + +You can walk through the group hierarchy using the ``walk`` method which +will yield a tuple for each group key along with the relative keys of its contents. + +.. ipython:: python + + for (path, subgroups, subkeys) in store.walk(): + for subgroup in subgroups: + print("GROUP: {}/{}".format(path, subgroup)) + for subkey in subkeys: + key = "/".join([path, subkey]) + print("KEY: {}".format(key)) + print(store.get(key)) + + + +.. warning:: + + Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node. + + .. code-block:: ipython + + In [8]: store.foo.bar.bah + AttributeError: 'HDFStore' object has no attribute 'foo' + + # you can directly access the actual PyTables node but using the root node + In [9]: store.root.foo.bar.bah + Out[9]: + /foo/bar/bah (Group) '' + children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] + + Instead, use explicit string based keys: + + .. ipython:: python + + store["foo/bar/bah"] + + +.. _io.hdf5-types: + +Storing types +''''''''''''' + +Storing mixed types in a table +++++++++++++++++++++++++++++++ + +Storing mixed-dtype data is supported. Strings are stored as a +fixed-width using the maximum size of the appended column. Subsequent attempts +at appending longer strings will raise a ``ValueError``. + +Passing ``min_itemsize={`values`: size}`` as a parameter to append +will set a larger minimum for the string columns. Storing ``floats, +strings, ints, bools, datetime64`` are currently supported. For string +columns, passing ``nan_rep = 'nan'`` to append will change the default +nan representation on disk (which converts to/from ``np.nan``), this +defaults to ``nan``. + +.. ipython:: python + + df_mixed = pd.DataFrame( + { + "A": np.random.randn(8), + "B": np.random.randn(8), + "C": np.array(np.random.randn(8), dtype="float32"), + "string": "string", + "int": 1, + "bool": True, + "datetime64": pd.Timestamp("20010102"), + }, + index=list(range(8)), + ) + df_mixed.loc[df_mixed.index[3:5], ["A", "B", "string", "datetime64"]] = np.nan + + store.append("df_mixed", df_mixed, min_itemsize={"values": 50}) + df_mixed1 = store.select("df_mixed") + df_mixed1 + df_mixed1.dtypes.value_counts() + + # we have provided a minimum string column size + store.root.df_mixed.table + +Storing MultiIndex DataFrames ++++++++++++++++++++++++++++++ + +Storing MultiIndex ``DataFrames`` as tables is very similar to +storing/selecting from homogeneous index ``DataFrames``. + +.. ipython:: python + + index = pd.MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["foo", "bar"], + ) + df_mi = pd.DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + df_mi + + store.append("df_mi", df_mi) + store.select("df_mi") + + # the levels are automatically included as data columns + store.select("df_mi", "foo=bar") + +.. note:: + The ``index`` keyword is reserved and cannot be use as a level name. + +.. _io.hdf5-query: + +Querying +'''''''' + +Querying a table +++++++++++++++++ + +``select`` and ``delete`` operations have an optional criterion that can +be specified to select/delete only a subset of the data. This allows one +to have a very large on-disk table and retrieve only a portion of the +data. + +A query is specified using the ``Term`` class under the hood, as a boolean expression. + +* ``index`` and ``columns`` are supported indexers of ``DataFrames``. +* if ``data_columns`` are specified, these can be used as additional indexers. +* level name in a MultiIndex, with default name ``level_0``, ``level_1``, … if not provided. + +Valid comparison operators are: + +``=, ==, !=, >, >=, <, <=`` + +Valid boolean expressions are combined with: + +* ``|`` : or +* ``&`` : and +* ``(`` and ``)`` : for grouping + +These rules are similar to how boolean expressions are used in pandas for indexing. + +.. note:: + + - ``=`` will be automatically expanded to the comparison operator ``==`` + - ``~`` is the not operator, but can only be used in very limited + circumstances + - If a list/tuple of expressions is passed they will be combined via ``&`` + +The following are valid expressions: + +* ``'index >= date'`` +* ``"columns = ['A', 'D']"`` +* ``"columns in ['A', 'D']"`` +* ``'columns = A'`` +* ``'columns == A'`` +* ``"~(columns = ['A', 'B'])"`` +* ``'index > df.index[3] & string = "bar"'`` +* ``'(index > df.index[3] & index <= df.index[6]) | string = "bar"'`` +* ``"ts >= Timestamp('2012-02-01')"`` +* ``"major_axis>=20130101"`` + +The ``indexers`` are on the left-hand side of the sub-expression: + +``columns``, ``major_axis``, ``ts`` + +The right-hand side of the sub-expression (after a comparison operator) can be: + +* functions that will be evaluated, e.g. ``Timestamp('2012-02-01')`` +* strings, e.g. ``"bar"`` +* date-like, e.g. ``20130101``, or ``"20130101"`` +* lists, e.g. ``"['A', 'B']"`` +* variables that are defined in the local names space, e.g. ``date`` + +.. note:: + + Passing a string to a query by interpolating it into the query + expression is not recommended. Simply assign the string of interest to a + variable and use that variable in an expression. For example, do this + + .. code-block:: python + + string = "HolyMoly'" + store.select("df", "index == string") + + instead of this + + .. code-block:: ipython + + string = "HolyMoly'" + store.select('df', f'index == {string}') + + The latter will **not** work and will raise a ``SyntaxError``.Note that + there's a single quote followed by a double quote in the ``string`` + variable. + + If you *must* interpolate, use the ``'%r'`` format specifier + + .. code-block:: python + + store.select("df", "index == %r" % string) + + which will quote ``string``. + + +Here are some examples: + +.. ipython:: python + + dfq = pd.DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=pd.date_range("20130101", periods=10), + ) + store.append("dfq", dfq, format="table", data_columns=True) + +Use boolean expressions, with in-line function evaluation. + +.. ipython:: python + + store.select("dfq", "index>pd.Timestamp('20130104') & columns=['A', 'B']") + +Use inline column reference. + +.. ipython:: python + + store.select("dfq", where="A>0 or C>0") + +The ``columns`` keyword can be supplied to select a list of columns to be +returned, this is equivalent to passing a +``'columns=list_of_columns_to_filter'``: + +.. ipython:: python + + store.select("df", "columns=['A', 'B']") + +``start`` and ``stop`` parameters can be specified to limit the total search +space. These are in terms of the total number of rows in a table. + +.. note:: + + ``select`` will raise a ``ValueError`` if the query expression has an unknown + variable reference. Usually this means that you are trying to select on a column + that is **not** a data_column. + + ``select`` will raise a ``SyntaxError`` if the query expression is not valid. + + +.. _io.hdf5-timedelta: + +Query timedelta64[ns] ++++++++++++++++++++++ + +You can store and query using the ``timedelta64[ns]`` type. Terms can be +specified in the format: ``()``, where float may be signed (and fractional), and unit can be +``D,s,ms,us,ns`` for the timedelta. Here's an example: + +.. ipython:: python + + from datetime import timedelta + + dftd = pd.DataFrame( + { + "A": pd.Timestamp("20130101"), + "B": [ + pd.Timestamp("20130101") + timedelta(days=i, seconds=10) + for i in range(10) + ], + } + ) + dftd["C"] = dftd["A"] - dftd["B"] + dftd + store.append("dftd", dftd, data_columns=True) + store.select("dftd", "C<'-3.5D'") + +.. _io.query_multi: + +Query MultiIndex +++++++++++++++++ + +Selecting from a ``MultiIndex`` can be achieved by using the name of the level. + +.. ipython:: python + + df_mi.index.names + store.select("df_mi", "foo=baz and bar=two") + +If the ``MultiIndex`` levels names are ``None``, the levels are automatically made available via +the ``level_n`` keyword with ``n`` the level of the ``MultiIndex`` you want to select from. + +.. ipython:: python + + index = pd.MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + ) + df_mi_2 = pd.DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + df_mi_2 + + store.append("df_mi_2", df_mi_2) + + # the levels are automatically included as data columns with keyword level_n + store.select("df_mi_2", "level_0=foo and level_1=two") + + +Indexing +++++++++ + +You can create/modify an index for a table with ``create_table_index`` +after data is already in the table (after and ``append/put`` +operation). Creating a table index is **highly** encouraged. This will +speed your queries a great deal when you use a ``select`` with the +indexed dimension as the ``where``. + +.. note:: + + Indexes are automagically created on the indexables + and any data columns you specify. This behavior can be turned off by passing + ``index=False`` to ``append``. + +.. ipython:: python + + # we have automagically already created an index (in the first section) + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + + # change an index by passing new parameters + store.create_table_index("df", optlevel=9, kind="full") + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + +Oftentimes when appending large amounts of data to a store, it is useful to turn off index creation for each append, then recreate at the end. + +.. ipython:: python + + df_1 = pd.DataFrame(np.random.randn(10, 2), columns=list("AB")) + df_2 = pd.DataFrame(np.random.randn(10, 2), columns=list("AB")) + + st = pd.HDFStore("appends.h5", mode="w") + st.append("df", df_1, data_columns=["B"], index=False) + st.append("df", df_2, data_columns=["B"], index=False) + st.get_storer("df").table + +Then create the index when finished appending. + +.. ipython:: python + + st.create_table_index("df", columns=["B"], optlevel=9, kind="full") + st.get_storer("df").table + + st.close() + +.. ipython:: python + :suppress: + :okexcept: + + os.remove("appends.h5") + +See `here `__ for how to create a completely-sorted-index (CSI) on an existing store. + +.. _io.hdf5-query-data-columns: + +Query via data columns +++++++++++++++++++++++ + +You can designate (and index) certain columns that you want to be able +to perform queries (other than the ``indexable`` columns, which you can +always query). For instance say you want to perform this common +operation, on-disk, and return just the frame that matches this +query. You can specify ``data_columns = True`` to force all columns to +be ``data_columns``. + +.. ipython:: python + + df_dc = df.copy() + df_dc["string"] = "foo" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" + df_dc["string2"] = "cool" + df_dc.loc[df_dc.index[1:3], ["B", "C"]] = 1.0 + df_dc + + # on-disk operations + store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"]) + store.select("df_dc", where="B > 0") + + # getting creative + store.select("df_dc", "B > 0 & C > 0 & string == foo") + + # this is in-memory version of this type of selection + df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] + + # we have automagically created this index and the B/C/string/string2 + # columns are stored separately as ``PyTables`` columns + store.root.df_dc.table + +There is some performance degradation by making lots of columns into +``data columns``, so it is up to the user to designate these. In addition, +you cannot change data columns (nor indexables) after the first +append/put operation (Of course you can simply read in the data and +create a new table!). + +Iterator +++++++++ + +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` +to ``select`` and ``select_as_multiple`` to return an iterator on the results. +The default is 50,000 rows returned in a chunk. + +.. ipython:: python + + for df in store.select("df", chunksize=3): + print(df) + +.. note:: + + You can also use the iterator with ``read_hdf`` which will open, then + automatically close the store when finished iterating. + + .. code-block:: python + + for df in pd.read_hdf("store.h5", "df", chunksize=3): + print(df) + +Note, that the chunksize keyword applies to the **source** rows. So if you +are doing a query, then the chunksize will subdivide the total rows in the table +and the query applied, returning an iterator on potentially unequal sized chunks. + +Here is a recipe for generating a query and using it to create equal sized return +chunks. + +.. ipython:: python + + dfeq = pd.DataFrame({"number": np.arange(1, 11)}) + dfeq + + store.append("dfeq", dfeq, data_columns=["number"]) + + def chunks(l, n): + return [l[i: i + n] for i in range(0, len(l), n)] + + evens = [2, 4, 6, 8, 10] + coordinates = store.select_as_coordinates("dfeq", "number=evens") + for c in chunks(coordinates, 2): + print(store.select("dfeq", where=c)) + +Advanced queries +++++++++++++++++ + +Select a single column +^^^^^^^^^^^^^^^^^^^^^^ + +To retrieve a single indexable or data column, use the +method ``select_column``. This will, for example, enable you to get the index +very quickly. These return a ``Series`` of the result, indexed by the row number. +These do not currently accept the ``where`` selector. + +.. ipython:: python + + store.select_column("df_dc", "index") + store.select_column("df_dc", "string") + +.. _io.hdf5-selecting_coordinates: + +Selecting coordinates +^^^^^^^^^^^^^^^^^^^^^ + +Sometimes you want to get the coordinates (a.k.a the index locations) of your query. This returns an +``Int64Index`` of the resulting locations. These coordinates can also be passed to subsequent +``where`` operations. + +.. ipython:: python + + df_coord = pd.DataFrame( + np.random.randn(1000, 2), index=pd.date_range("20000101", periods=1000) + ) + store.append("df_coord", df_coord) + c = store.select_as_coordinates("df_coord", "index > 20020101") + c + store.select("df_coord", where=c) + +.. _io.hdf5-where_mask: + +Selecting using a where mask +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometime your query can involve creating a list of rows to select. Usually this ``mask`` would +be a resulting ``index`` from an indexing operation. This example selects the months of +a datetimeindex which are 5. + +.. ipython:: python + + df_mask = pd.DataFrame( + np.random.randn(1000, 2), index=pd.date_range("20000101", periods=1000) + ) + store.append("df_mask", df_mask) + c = store.select_column("df_mask", "index") + where = c[pd.DatetimeIndex(c).month == 5].index + store.select("df_mask", where=where) + +Storer object +^^^^^^^^^^^^^ + +If you want to inspect the stored object, retrieve via +``get_storer``. You could use this programmatically to say get the number +of rows in an object. + +.. ipython:: python + + store.get_storer("df_dc").nrows + + +Multiple table queries +++++++++++++++++++++++ + +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from +multiple tables at once. The idea is to have one table (call it the +selector table) that you index most/all of the columns, and perform your +queries. The other table(s) are data tables with an index matching the +selector table's index. You can then perform a very fast query +on the selector table, yet get lots of data back. This method is similar to +having a very wide table, but enables more efficient queries. + +The ``append_to_multiple`` method splits a given single DataFrame +into multiple tables according to ``d``, a dictionary that maps the +table names to a list of 'columns' you want in that table. If ``None`` +is used in place of a list, that table will have the remaining +unspecified columns of the given DataFrame. The argument ``selector`` +defines which table is the selector table (which you can make queries from). +The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure +tables are synchronized. This means that if a row for one of the tables +being written to is entirely ``np.NaN``, that row will be dropped from all tables. + +If ``dropna`` is False, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**. +Remember that entirely ``np.Nan`` rows are not written to the HDFStore, so if +you choose to call ``dropna=False``, some tables may have more rows than others, +and therefore ``select_as_multiple`` may not work or it may return unexpected +results. + +.. ipython:: python + + df_mt = pd.DataFrame( + np.random.randn(8, 6), + index=pd.date_range("1/1/2000", periods=8), + columns=["A", "B", "C", "D", "E", "F"], + ) + df_mt["foo"] = "bar" + df_mt.loc[df_mt.index[1], ("A", "B")] = np.nan + + # you can also create the tables individually + store.append_to_multiple( + {"df1_mt": ["A", "B"], "df2_mt": None}, df_mt, selector="df1_mt" + ) + store + + # individual tables were created + store.select("df1_mt") + store.select("df2_mt") + + # as a multiple + store.select_as_multiple( + ["df1_mt", "df2_mt"], + where=["A>0", "B>0"], + selector="df1_mt", + ) + + +Delete from a table +''''''''''''''''''' + +You can delete from a table selectively by specifying a ``where``. In +deleting rows, it is important to understand the ``PyTables`` deletes +rows by erasing the rows, then **moving** the following data. Thus +deleting can potentially be a very expensive operation depending on the +orientation of your data. To get optimal performance, it's +worthwhile to have the dimension you are deleting be the first of the +``indexables``. + +Data is ordered (on the disk) in terms of the ``indexables``. Here's a +simple use case. You store panel-type data, with dates in the +``major_axis`` and ids in the ``minor_axis``. The data is then +interleaved like this: + +* date_1 + * id_1 + * id_2 + * . + * id_n +* date_2 + * id_1 + * . + * id_n + +It should be clear that a delete operation on the ``major_axis`` will be +fairly quick, as one chunk is removed, then the following data moved. On +the other hand a delete operation on the ``minor_axis`` will be very +expensive. In this case it would almost certainly be faster to rewrite +the table using a ``where`` that selects all but the missing data. + +.. warning:: + + Please note that HDF5 **DOES NOT RECLAIM SPACE** in the h5 files + automatically. Thus, repeatedly deleting (or removing nodes) and adding + again, **WILL TEND TO INCREASE THE FILE SIZE**. + + To *repack and clean* the file, use :ref:`ptrepack `. + +.. _io.hdf5-notes: + +Notes & caveats +''''''''''''''' + + +Compression ++++++++++++ + +``PyTables`` allows the stored data to be compressed. This applies to +all kinds of stores, not just tables. Two parameters are used to +control compression: ``complevel`` and ``complib``. + +* ``complevel`` specifies if and how hard data is to be compressed. + ``complevel=0`` and ``complevel=None`` disables compression and + ``0`_: The default compression library. + A classic in terms of compression, achieves good compression + rates but is somewhat slow. + - `lzo `_: Fast + compression and decompression. + - `bzip2 `_: Good compression rates. + - `blosc `_: Fast compression and + decompression. + + Support for alternative blosc compressors: + + - `blosc:blosclz `_ This is the + default compressor for ``blosc`` + - `blosc:lz4 + `_: + A compact, very popular and fast compressor. + - `blosc:lz4hc + `_: + A tweaked version of LZ4, produces better + compression ratios at the expense of speed. + - `blosc:snappy `_: + A popular compressor used in many places. + - `blosc:zlib `_: A classic; + somewhat slower than the previous ones, but + achieving better compression ratios. + - `blosc:zstd `_: An + extremely well balanced codec; it provides the best + compression ratios among the others above, and at + reasonably fast speed. + + If ``complib`` is defined as something other than the listed libraries a + ``ValueError`` exception is issued. + +.. note:: + + If the library specified with the ``complib`` option is missing on your platform, + compression defaults to ``zlib`` without further ado. + +Enable compression for all objects within the file: + +.. code-block:: python + + store_compressed = pd.HDFStore( + "store_compressed.h5", complevel=9, complib="blosc:blosclz" + ) + +Or on-the-fly compression (this only applies to tables) in stores where compression is not enabled: + +.. code-block:: python + + store.append("df", df, complib="zlib", complevel=5) + +.. _io.hdf5-ptrepack: + +ptrepack +++++++++ + +``PyTables`` offers better write performance when tables are compressed after +they are written, as opposed to turning on compression at the very +beginning. You can use the supplied ``PyTables`` utility +``ptrepack``. In addition, ``ptrepack`` can change compression levels +after the fact. + +.. code-block:: console + + ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5 + +Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow +you to reuse previously deleted space. Alternatively, one can simply +remove the file and write again, or use the ``copy`` method. + +.. _io.hdf5-caveats: + +Caveats ++++++++ + +.. warning:: + + ``HDFStore`` is **not-threadsafe for writing**. The underlying + ``PyTables`` only supports concurrent reads (via threading or + processes). If you need reading and writing *at the same time*, you + need to serialize these operations in a single thread in a single + process. You will corrupt your data otherwise. See the (:issue:`2397`) for more information. + +* If you use locks to manage write access between multiple processes, you + may want to use :py:func:`~os.fsync` before releasing write locks. For + convenience you can use ``store.flush(fsync=True)`` to do this for you. +* Once a ``table`` is created columns (DataFrame) + are fixed; only exactly the same columns can be appended +* Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``) + are not necessarily equal across timezone versions. So if data is + localized to a specific timezone in the HDFStore using one version + of a timezone library and that data is updated with another version, the data + will be converted to UTC since these timezones are not considered + equal. Either use the same version of timezone library or use ``tz_convert`` with + the updated timezone definition. + +.. warning:: + + ``PyTables`` will show a ``NaturalNameWarning`` if a column name + cannot be used as an attribute selector. + *Natural* identifiers contain only letters, numbers, and underscores, + and may not begin with a number. + Other identifiers cannot be used in a ``where`` clause + and are generally a bad idea. + +.. _io.hdf5-data_types: + +DataTypes +''''''''' + +``HDFStore`` will map an object dtype to the ``PyTables`` underlying +dtype. This means the following types are known to work: + +====================================================== ========================= +Type Represents missing values +====================================================== ========================= +floating : ``float64, float32, float16`` ``np.nan`` +integer : ``int64, int32, int8, uint64,uint32, uint8`` +boolean +``datetime64[ns]`` ``NaT`` +``timedelta64[ns]`` ``NaT`` +categorical : see the section below +object : ``strings`` ``np.nan`` +====================================================== ========================= + +``unicode`` columns are not supported, and **WILL FAIL**. + +.. _io.hdf5-categorical: + +Categorical data +++++++++++++++++ + +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is +stored in a more efficient manner. + +.. ipython:: python + + dfcat = pd.DataFrame( + {"A": pd.Series(list("aabbcdba")).astype("category"), "B": np.random.randn(8)} + ) + dfcat + dfcat.dtypes + cstore = pd.HDFStore("cats.h5", mode="w") + cstore.append("dfcat", dfcat, format="table", data_columns=["A"]) + result = cstore.select("dfcat", where="A in ['b', 'c']") + result + result.dtypes + +.. ipython:: python + :suppress: + :okexcept: + + cstore.close() + os.remove("cats.h5") + + +String columns +++++++++++++++ + +**min_itemsize** + +The underlying implementation of ``HDFStore`` uses a fixed column width (itemsize) for string columns. +A string column itemsize is calculated as the maximum of the +length of data (for that column) that is passed to the ``HDFStore``, **in the first append**. Subsequent appends, +may introduce a string for a column **larger** than the column can hold, an Exception will be raised (otherwise you +could have a silent truncation of these columns, leading to loss of information). In the future we may relax this and +allow a user-specified truncation to occur. + +Pass ``min_itemsize`` on the first table creation to a-priori specify the minimum length of a particular string column. +``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to +allow all *indexables* or *data_columns* to have this min_itemsize. + +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. + +.. note:: + + If you are not passing any ``data_columns``, then the ``min_itemsize`` will be the maximum of the length of any string passed + +.. ipython:: python + + dfs = pd.DataFrame({"A": "foo", "B": "bar"}, index=list(range(5))) + dfs + + # A and B have a size of 30 + store.append("dfs", dfs, min_itemsize=30) + store.get_storer("dfs").table + + # A is created as a data_column with a size of 30 + # B is size is calculated + store.append("dfs2", dfs, min_itemsize={"A": 30}) + store.get_storer("dfs2").table + +**nan_rep** + +String columns will serialize a ``np.nan`` (a missing value) with the ``nan_rep`` string representation. This defaults to the string value ``nan``. +You could inadvertently turn an actual ``nan`` value into a missing value. + +.. ipython:: python + + dfss = pd.DataFrame({"A": ["foo", "bar", "nan"]}) + dfss + + store.append("dfss", dfss) + store.select("dfss") + + # here you need to specify a different nan rep + store.append("dfss2", dfss, nan_rep="_nan_") + store.select("dfss2") + +.. _io.external_compatibility: + +External compatibility +'''''''''''''''''''''' + +``HDFStore`` writes ``table`` format objects in specific formats suitable for +producing loss-less round trips to pandas objects. For external +compatibility, ``HDFStore`` can read native ``PyTables`` format +tables. + +It is possible to write an ``HDFStore`` object that can easily be imported into ``R`` using the +``rhdf5`` library (`Package website`_). Create a table format store like this: + +.. _package website: https://www.bioconductor.org/packages/release/bioc/html/rhdf5.html + +.. ipython:: python + + df_for_r = pd.DataFrame( + { + "first": np.random.rand(100), + "second": np.random.rand(100), + "class": np.random.randint(0, 2, (100,)), + }, + index=range(100), + ) + df_for_r.head() + + store_export = pd.HDFStore("export.h5") + store_export.append("df_for_r", df_for_r, data_columns=df_dc.columns) + store_export + +.. ipython:: python + :suppress: + + store_export.close() + os.remove("export.h5") + +In R this file can be read into a ``data.frame`` object using the ``rhdf5`` +library. The following example function reads the corresponding column names +and data values from the values and assembles them into a ``data.frame``: + +.. code-block:: R + + # Load values and column names for all datasets from corresponding nodes and + # insert them into one data.frame object. + + library(rhdf5) + + loadhdf5data <- function(h5File) { + + listing <- h5ls(h5File) + # Find all data nodes, values are stored in *_values and corresponding column + # titles in *_items + data_nodes <- grep("_values", listing$name) + name_nodes <- grep("_items", listing$name) + data_paths = paste(listing$group[data_nodes], listing$name[data_nodes], sep = "/") + name_paths = paste(listing$group[name_nodes], listing$name[name_nodes], sep = "/") + columns = list() + for (idx in seq(data_paths)) { + # NOTE: matrices returned by h5read have to be transposed to obtain + # required Fortran order! + data <- data.frame(t(h5read(h5File, data_paths[idx]))) + names <- t(h5read(h5File, name_paths[idx])) + entry <- data.frame(data) + colnames(entry) <- names + columns <- append(columns, entry) + } + + data <- data.frame(columns) + + return(data) + } + +Now you can import the ``DataFrame`` into R: + +.. code-block:: R + + > data = loadhdf5data("transfer.hdf5") + > head(data) + first second class + 1 0.4170220047 0.3266449 0 + 2 0.7203244934 0.5270581 0 + 3 0.0001143748 0.8859421 1 + 4 0.3023325726 0.3572698 1 + 5 0.1467558908 0.9085352 1 + 6 0.0923385948 0.6233601 1 + +.. note:: + The R function lists the entire HDF5 file's contents and assembles the + ``data.frame`` object from all matching nodes, so use this only as a + starting point if you have stored multiple ``DataFrame`` objects to a + single HDF5 file. + + +Performance +''''''''''' + +* ``tables`` format come with a writing performance penalty as compared to + ``fixed`` stores. The benefit is the ability to append/delete and + query (potentially very large amounts of data). Write times are + generally longer as compared with regular stores. Query times can + be quite fast, especially on an indexed axis. +* You can pass ``chunksize=`` to ``append``, specifying the + write chunksize (default is 50000). This will significantly lower + your memory usage on writing. +* You can pass ``expectedrows=`` to the first ``append``, + to set the TOTAL number of rows that ``PyTables`` will expect. + This will optimize read/write performance. +* Duplicate rows can be written to tables, but are filtered out in + selection (with the last items being selected; thus a table is + unique on major, minor pairs) +* A ``PerformanceWarning`` will be raised if you are attempting to + store types that will be pickled by PyTables (rather than stored as + endemic types). See + `Here `__ + for more information and some solutions. + + +.. ipython:: python + :suppress: + + store.close() + os.remove("store.h5") + + +.. _io.feather: + +Feather +------- + +Feather provides binary columnar serialization for data frames. It is designed to make reading and writing data +frames efficient, and to make sharing data across data analysis languages easy. + +Feather is designed to faithfully serialize and de-serialize DataFrames, supporting all of the pandas +dtypes, including extension dtypes such as categorical and datetime with tz. + +Several caveats: + +* The format will NOT write an ``Index``, or ``MultiIndex`` for the + ``DataFrame`` and will raise an error if a non-default one is provided. You + can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to + ignore it. +* Duplicate column names and non-string columns names are not supported +* Actual Python objects in object dtype columns are not supported. These will + raise a helpful error message on an attempt at serialization. + +See the `Full Documentation `__. + +.. ipython:: python + + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, freq="ns"), + } + ) + + df + df.dtypes + +Write to a feather file. + +.. ipython:: python + + df.to_feather("example.feather") + +Read from a feather file. + +.. ipython:: python + :okwarning: + + result = pd.read_feather("example.feather") + result + + # we preserve dtypes + result.dtypes + +.. ipython:: python + :suppress: + + os.remove("example.feather") + + +.. _io.parquet: + +Parquet +------- + +`Apache Parquet `__ provides a partitioned binary columnar serialization for data frames. It is designed to +make reading and writing data frames efficient, and to make sharing data across data analysis +languages easy. Parquet can use a variety of compression techniques to shrink the file size as much as possible +while still maintaining good read performance. + +Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, supporting all of the pandas +dtypes, including extension dtypes such as datetime with tz. + +Several caveats. + +* Duplicate column names and non-string columns names are not supported. +* The ``pyarrow`` engine always writes the index to the output, but ``fastparquet`` only writes non-default + indexes. This extra column can cause problems for non-pandas consumers that are not expecting it. You can + force including or omitting indexes with the ``index`` argument, regardless of the underlying engine. +* Index level names, if specified, must be strings. +* In the ``pyarrow`` engine, categorical dtypes for non-string types can be serialized to parquet, but will de-serialize as their primitive dtype. +* The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag. +* Non supported types include ``Interval`` and actual Python object types. These will raise a helpful error message + on an attempt at serialization. ``Period`` type is supported with pyarrow >= 0.16.0. +* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data + type (requiring pyarrow >= 0.16.0, and requiring the extension type to implement the needed protocols, + see the :ref:`extension types documentation `). + +You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``. +If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, +then ``pyarrow`` is tried, and falling back to ``fastparquet``. + +See the documentation for `pyarrow `__ and `fastparquet `__. + +.. note:: + + These engines are very similar and should read/write nearly identical parquet format files. + ``pyarrow>=8.0.0`` supports timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. + These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). + +.. ipython:: python + + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3), + "g": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "h": pd.Categorical(list("abc")), + "i": pd.Categorical(list("abc"), ordered=True), + } + ) + + df + df.dtypes + +Write to a parquet file. + +.. ipython:: python + :okwarning: + + df.to_parquet("example_pa.parquet", engine="pyarrow") + df.to_parquet("example_fp.parquet", engine="fastparquet") + +Read from a parquet file. + +.. ipython:: python + :okwarning: + + result = pd.read_parquet("example_fp.parquet", engine="fastparquet") + result = pd.read_parquet("example_pa.parquet", engine="pyarrow") + + result.dtypes + +Read only certain columns of a parquet file. + +.. ipython:: python + + result = pd.read_parquet( + "example_fp.parquet", + engine="fastparquet", + columns=["a", "b"], + ) + result = pd.read_parquet( + "example_pa.parquet", + engine="pyarrow", + columns=["a", "b"], + ) + result.dtypes + + +.. ipython:: python + :suppress: + + os.remove("example_pa.parquet") + os.remove("example_fp.parquet") + + +Handling indexes +'''''''''''''''' + +Serializing a ``DataFrame`` to parquet may include the implicit index as one or +more columns in the output file. Thus, this code: + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df.to_parquet("test.parquet", engine="pyarrow") + +creates a parquet file with *three* columns if you use ``pyarrow`` for serialization: +``a``, ``b``, and ``__index_level_0__``. If you're using ``fastparquet``, the +index `may or may not `_ +be written to the file. + +This unexpected extra column causes some databases like Amazon Redshift to reject +the file, because that column doesn't exist in the target table. + +If you want to omit a dataframe's indexes when writing, pass ``index=False`` to +:func:`~pandas.DataFrame.to_parquet`: + +.. ipython:: python + + df.to_parquet("test.parquet", index=False) + +This creates a parquet file with just the two expected columns, ``a`` and ``b``. +If your ``DataFrame`` has a custom index, you won't get it back when you load +this file into a ``DataFrame``. + +Passing ``index=True`` will *always* write the index, even if that's not the +underlying engine's default behavior. + +.. ipython:: python + :suppress: + + os.remove("test.parquet") + + +Partitioning Parquet files +'''''''''''''''''''''''''' + +Parquet supports partitioning of data based on the values of one or more columns. + +.. ipython:: python + + df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]}) + df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None) + +The ``path`` specifies the parent directory to which data will be saved. +The ``partition_cols`` are the column names by which the dataset will be partitioned. +Columns are partitioned in the order they are given. The partition splits are +determined by the unique values in the partition columns. +The above example creates a partitioned dataset that may look like: + +.. code-block:: text + + test + ├── a=0 + │ ├── 0bac803e32dc42ae83fddfd029cbdebc.parquet + │ └── ... + └── a=1 + ├── e6ab24a4f45147b49b54a662f0c412a3.parquet + └── ... + +.. ipython:: python + :suppress: + + from shutil import rmtree + + try: + rmtree("test") + except OSError: + pass + +.. _io.orc: + +ORC +--- + +.. versionadded:: 1.0.0 + +Similar to the :ref:`parquet ` format, the `ORC Format `__ is a binary columnar serialization +for data frames. It is designed to make reading data frames efficient. pandas provides both the reader and the writer for the +ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This requires the `pyarrow `__ library. + +.. warning:: + + * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow. + * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0. + * :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc` are not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies `. + * For supported dtypes please refer to `supported ORC features in Arrow `__. + * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files. + +.. ipython:: python + + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(4.0, 7.0, dtype="float64"), + "d": [True, False, True], + "e": pd.date_range("20130101", periods=3), + } + ) + + df + df.dtypes + +Write to an orc file. + +.. ipython:: python + :okwarning: + + df.to_orc("example_pa.orc", engine="pyarrow") + +Read from an orc file. + +.. ipython:: python + :okwarning: + + result = pd.read_orc("example_pa.orc") + + result.dtypes + +Read only certain columns of an orc file. + +.. ipython:: python + + result = pd.read_orc( + "example_pa.orc", + columns=["a", "b"], + ) + result.dtypes + + +.. ipython:: python + :suppress: + + os.remove("example_pa.orc") + + +.. _io.sql: + +SQL queries +----------- + +The :mod:`pandas.io.sql` module provides a collection of query wrappers to both +facilitate data retrieval and to reduce dependency on DB-specific API. Database abstraction +is provided by SQLAlchemy if installed. In addition you will need a driver library for +your database. Examples of such drivers are `psycopg2 `__ +for PostgreSQL or `pymysql `__ for MySQL. +For `SQLite `__ this is +included in Python's standard library by default. +You can find an overview of supported drivers for each SQL dialect in the +`SQLAlchemy docs `__. + +If SQLAlchemy is not installed, a fallback is only provided for sqlite (and +for mysql for backwards compatibility, but this is deprecated and will be +removed in a future version). +This mode requires a Python database adapter which respect the `Python +DB-API `__. + +See also some :ref:`cookbook examples ` for some advanced strategies. + +The key functions are: + +.. autosummary:: + + read_sql_table + read_sql_query + read_sql + DataFrame.to_sql + +.. note:: + + The function :func:`~pandas.read_sql` is a convenience wrapper around + :func:`~pandas.read_sql_table` and :func:`~pandas.read_sql_query` (and for + backward compatibility) and will delegate to specific function depending on + the provided input (database table name or sql query). + Table names do not need to be quoted if they have special characters. + +In the following example, we use the `SQlite `__ SQL database +engine. You can use a temporary SQLite database where data are stored in +"memory". + +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. +For more information on :func:`create_engine` and the URI formatting, see the examples +below and the SQLAlchemy `documentation `__ + +.. ipython:: python + + from sqlalchemy import create_engine + + # Create your engine. + engine = create_engine("sqlite:///:memory:") + +If you want to manage your own connections you can pass one of those instead. The example below opens a +connection to the database using a Python context manager that automatically closes the connection after +the block has completed. +See the `SQLAlchemy docs `__ +for an explanation of how the database connection is handled. + +.. code-block:: python + + with engine.connect() as conn, conn.begin(): + data = pd.read_sql_table("data", conn) + +.. warning:: + + When you open a connection to a database you are also responsible for closing it. + Side effects of leaving a connection open may include locking the database or + other breaking behaviour. + +Writing DataFrames +'''''''''''''''''' + +Assuming the following data is in a ``DataFrame`` ``data``, we can insert it into +the database using :func:`~pandas.DataFrame.to_sql`. + ++-----+------------+-------+-------+-------+ +| id | Date | Col_1 | Col_2 | Col_3 | ++=====+============+=======+=======+=======+ +| 26 | 2012-10-18 | X | 25.7 | True | ++-----+------------+-------+-------+-------+ +| 42 | 2012-10-19 | Y | -12.4 | False | ++-----+------------+-------+-------+-------+ +| 63 | 2012-10-20 | Z | 5.73 | True | ++-----+------------+-------+-------+-------+ + + +.. ipython:: python + + import datetime + + c = ["id", "Date", "Col_1", "Col_2", "Col_3"] + d = [ + (26, datetime.datetime(2010, 10, 18), "X", 27.5, True), + (42, datetime.datetime(2010, 10, 19), "Y", -12.5, False), + (63, datetime.datetime(2010, 10, 20), "Z", 5.73, True), + ] + + data = pd.DataFrame(d, columns=c) + + data + data.to_sql("data", engine) + +With some databases, writing large DataFrames can result in errors due to +packet size limitations being exceeded. This can be avoided by setting the +``chunksize`` parameter when calling ``to_sql``. For example, the following +writes ``data`` to the database in batches of 1000 rows at a time: + +.. ipython:: python + + data.to_sql("data_chunked", engine, chunksize=1000) + +SQL data types +++++++++++++++ + +:func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate +SQL data type based on the dtype of the data. When you have columns of dtype +``object``, pandas will try to infer the data type. + +You can always override the default type by specifying the desired SQL type of +any of the columns by using the ``dtype`` argument. This argument needs a +dictionary mapping column names to SQLAlchemy types (or strings for the sqlite3 +fallback mode). +For example, specifying to use the sqlalchemy ``String`` type instead of the +default ``Text`` type for string columns: + +.. ipython:: python + + from sqlalchemy.types import String + + data.to_sql("data_dtype", engine, dtype={"Col_1": String}) + +.. note:: + + Due to the limited support for timedelta's in the different database + flavors, columns with type ``timedelta64`` will be written as integer + values as nanoseconds to the database and a warning will be raised. + +.. note:: + + Columns of ``category`` dtype will be converted to the dense representation + as you would get with ``np.asarray(categorical)`` (e.g. for string categories + this gives an array of strings). + Because of this, reading the database table back in does **not** generate + a categorical. + +.. _io.sql_datetime_data: + +Datetime data types +''''''''''''''''''' + +Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing +datetime data that is timezone naive or timezone aware. However, the resulting +data stored in the database ultimately depends on the supported data type +for datetime data of the database system being used. + +The following table lists supported data types for datetime data for some +common databases. Other database dialects may have different data types for +datetime data. + +=========== ============================================= =================== +Database SQL Datetime Types Timezone Support +=========== ============================================= =================== +SQLite ``TEXT`` No +MySQL ``TIMESTAMP`` or ``DATETIME`` No +PostgreSQL ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE`` Yes +=========== ============================================= =================== + +When writing timezone aware data to databases that do not support timezones, +the data will be written as timezone naive timestamps that are in local time +with respect to the timezone. + +:func:`~pandas.read_sql_table` is also capable of reading datetime data that is +timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +will convert the data to UTC. + +.. _io.sql.method: + +Insertion method +++++++++++++++++ + +The parameter ``method`` controls the SQL insertion clause used. +Possible values are: + +- ``None``: Uses standard SQL ``INSERT`` clause (one per row). +- ``'multi'``: Pass multiple values in a single ``INSERT`` clause. + It uses a *special* SQL syntax not supported by all backends. + This usually provides better performance for analytic databases + like *Presto* and *Redshift*, but has worse performance for + traditional SQL backend if the table contains many columns. + For more information check the SQLAlchemy `documentation + `__. +- callable with signature ``(pd_table, conn, keys, data_iter)``: + This can be used to implement a more performant insertion method based on + specific backend dialect features. + +Example of a callable using PostgreSQL `COPY clause +`__:: + + # Alternative to_sql() *method* for DBs that support COPY FROM + import csv + from io import StringIO + + def psql_insert_copy(table, conn, keys, data_iter): + """ + Execute SQL statement inserting data + + Parameters + ---------- + table : pandas.io.sql.SQLTable + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : Iterable that iterates the values to be inserted + """ + # gets a DBAPI connection that can provide a cursor + dbapi_conn = conn.connection + with dbapi_conn.cursor() as cur: + s_buf = StringIO() + writer = csv.writer(s_buf) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ', '.join(['"{}"'.format(k) for k in keys]) + if table.schema: + table_name = '{}.{}'.format(table.schema, table.name) + else: + table_name = table.name + + sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format( + table_name, columns) + cur.copy_expert(sql=sql, file=s_buf) + +Reading tables +'''''''''''''' + +:func:`~pandas.read_sql_table` will read a database table given the +table name and optionally a subset of columns to read. + +.. note:: + + In order to use :func:`~pandas.read_sql_table`, you **must** have the + SQLAlchemy optional dependency installed. + +.. ipython:: python + + pd.read_sql_table("data", engine) + +.. note:: + + Note that pandas infers column dtypes from query outputs, and not by looking + up data types in the physical database schema. For example, assume ``userid`` + is an integer column in a table. Then, intuitively, ``select userid ...`` will + return integer-valued series, while ``select cast(userid as text) ...`` will + return object-valued (str) series. Accordingly, if the query output is empty, + then all resulting columns will be returned as object-valued (since they are + most general). If you foresee that your query will sometimes generate an empty + result, you may want to explicitly typecast afterwards to ensure dtype + integrity. + +You can also specify the name of the column as the ``DataFrame`` index, +and specify a subset of columns to be read. + +.. ipython:: python + + pd.read_sql_table("data", engine, index_col="id") + pd.read_sql_table("data", engine, columns=["Col_1", "Col_2"]) + +And you can explicitly force columns to be parsed as dates: + +.. ipython:: python + + pd.read_sql_table("data", engine, parse_dates=["Date"]) + +If needed you can explicitly specify a format string, or a dict of arguments +to pass to :func:`pandas.to_datetime`: + +.. code-block:: python + + pd.read_sql_table("data", engine, parse_dates={"Date": "%Y-%m-%d"}) + pd.read_sql_table( + "data", + engine, + parse_dates={"Date": {"format": "%Y-%m-%d %H:%M:%S"}}, + ) + + +You can check if a table exists using :func:`~pandas.io.sql.has_table` + +Schema support +'''''''''''''' + +Reading from and writing to different schema's is supported through the ``schema`` +keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` +functions. Note however that this depends on the database flavor (sqlite does not +have schema's). For example: + +.. code-block:: python + + df.to_sql("table", engine, schema="other_schema") + pd.read_sql_table("table", engine, schema="other_schema") + +Querying +'''''''' + +You can query using raw SQL in the :func:`~pandas.read_sql_query` function. +In this case you must use the SQL variant appropriate for your database. +When using SQLAlchemy, you can also pass SQLAlchemy Expression language constructs, +which are database-agnostic. + +.. ipython:: python + + pd.read_sql_query("SELECT * FROM data", engine) + +Of course, you can specify a more "complex" query. + +.. ipython:: python + + pd.read_sql_query("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", engine) + +The :func:`~pandas.read_sql_query` function supports a ``chunksize`` argument. +Specifying this will return an iterator through chunks of the query result: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(20, 3), columns=list("abc")) + df.to_sql("data_chunks", engine, index=False) + +.. ipython:: python + + for chunk in pd.read_sql_query("SELECT * FROM data_chunks", engine, chunksize=5): + print(chunk) + +You can also run a plain query without creating a ``DataFrame`` with +:func:`~pandas.io.sql.execute`. This is useful for queries that don't return values, +such as INSERT. This is functionally equivalent to calling ``execute`` on the +SQLAlchemy engine or db connection object. Again, you must use the SQL syntax +variant appropriate for your database. + +.. code-block:: python + + from pandas.io import sql + + sql.execute("SELECT * FROM table_name", engine) + sql.execute( + "INSERT INTO table_name VALUES(?, ?, ?)", engine, params=[("id", 1, 12.2, True)] + ) + + +Engine connection examples +'''''''''''''''''''''''''' + +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. + +.. code-block:: python + + from sqlalchemy import create_engine + + engine = create_engine("postgresql://scott:tiger@localhost:5432/mydatabase") + + engine = create_engine("mysql+mysqldb://scott:tiger@localhost/foo") + + engine = create_engine("oracle://scott:tiger@127.0.0.1:1521/sidname") + + engine = create_engine("mssql+pyodbc://mydsn") + + # sqlite:/// + # where is relative: + engine = create_engine("sqlite:///foo.db") + + # or absolute, starting with a slash: + engine = create_engine("sqlite:////absolute/path/to/foo.db") + +For more information see the examples the SQLAlchemy `documentation `__ + + +Advanced SQLAlchemy queries +''''''''''''''''''''''''''' + +You can use SQLAlchemy constructs to describe your query. + +Use :func:`sqlalchemy.text` to specify query parameters in a backend-neutral way + +.. ipython:: python + + import sqlalchemy as sa + + pd.read_sql( + sa.text("SELECT * FROM data where Col_1=:col1"), engine, params={"col1": "X"} + ) + +If you have an SQLAlchemy description of your database you can express where conditions using SQLAlchemy expressions + +.. ipython:: python + + metadata = sa.MetaData() + data_table = sa.Table( + "data", + metadata, + sa.Column("index", sa.Integer), + sa.Column("Date", sa.DateTime), + sa.Column("Col_1", sa.String), + sa.Column("Col_2", sa.Float), + sa.Column("Col_3", sa.Boolean), + ) + + pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 is True), engine) + +You can combine SQLAlchemy expressions with parameters passed to :func:`read_sql` using :func:`sqlalchemy.bindparam` + +.. ipython:: python + + import datetime as dt + + expr = sa.select([data_table]).where(data_table.c.Date > sa.bindparam("date")) + pd.read_sql(expr, engine, params={"date": dt.datetime(2010, 10, 18)}) + + +Sqlite fallback +''''''''''''''' + +The use of sqlite is supported without using SQLAlchemy. +This mode requires a Python database adapter which respect the `Python +DB-API `__. + +You can create connections like so: + +.. code-block:: python + + import sqlite3 + + con = sqlite3.connect(":memory:") + +And then issue the following queries: + +.. code-block:: python + + data.to_sql("data", con) + pd.read_sql_query("SELECT * FROM data", con) + + +.. _io.bigquery: + +Google BigQuery +--------------- + +.. warning:: + + Starting in 0.20.0, pandas has split off Google BigQuery support into the + separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. + +The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. + +pandas integrates with this external package. if ``pandas-gbq`` is installed, you can +use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the +respective functions from ``pandas-gbq``. + +Full documentation can be found `here `__. + +.. _io.stata: + +Stata format +------------ + +.. _io.stata_writer: + +Writing to stata format +''''''''''''''''''''''' + +The method :func:`~pandas.core.frame.DataFrame.to_stata` will write a DataFrame +into a .dta file. The format version of this file is always 115 (Stata 12). + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.to_stata("stata.dta") + +*Stata* data files have limited data type support; only strings with +244 or fewer characters, ``int8``, ``int16``, ``int32``, ``float32`` +and ``float64`` can be stored in ``.dta`` files. Additionally, +*Stata* reserves certain values to represent missing data. Exporting a +non-missing value that is outside of the permitted range in Stata for +a particular data type will retype the variable to the next larger +size. For example, ``int8`` values are restricted to lie between -127 +and 100 in Stata, and so variables with values above 100 will trigger +a conversion to ``int16``. ``nan`` values in floating points data +types are stored as the basic missing data type (``.`` in *Stata*). + +.. note:: + + It is not possible to export missing data values for integer data types. + + +The *Stata* writer gracefully handles other data types including ``int64``, +``bool``, ``uint8``, ``uint16``, ``uint32`` by casting to +the smallest supported type that can represent the data. For example, data +with a type of ``uint8`` will be cast to ``int8`` if all values are less than +100 (the upper bound for non-missing ``int8`` data in *Stata*), or, if values are +outside of this range, the variable is cast to ``int16``. + + +.. warning:: + + Conversion from ``int64`` to ``float64`` may result in a loss of precision + if ``int64`` values are larger than 2**53. + +.. warning:: + + :class:`~pandas.io.stata.StataWriter` and + :func:`~pandas.core.frame.DataFrame.to_stata` only support fixed width + strings containing up to 244 characters, a limitation imposed by the version + 115 dta file format. Attempting to write *Stata* dta files with strings + longer than 244 characters raises a ``ValueError``. + +.. _io.stata_reader: + +Reading from Stata format +''''''''''''''''''''''''' + +The top-level function ``read_stata`` will read a dta file and return +either a ``DataFrame`` or a :class:`~pandas.io.stata.StataReader` that can +be used to read the file incrementally. + +.. ipython:: python + + pd.read_stata("stata.dta") + +Specifying a ``chunksize`` yields a +:class:`~pandas.io.stata.StataReader` instance that can be used to +read ``chunksize`` lines from the file at a time. The ``StataReader`` +object can be used as an iterator. + +.. ipython:: python + + with pd.read_stata("stata.dta", chunksize=3) as reader: + for df in reader: + print(df.shape) + +For more fine-grained control, use ``iterator=True`` and specify +``chunksize`` with each call to +:func:`~pandas.io.stata.StataReader.read`. + +.. ipython:: python + + with pd.read_stata("stata.dta", iterator=True) as reader: + chunk1 = reader.read(5) + chunk2 = reader.read(5) + +Currently the ``index`` is retrieved as a column. + +The parameter ``convert_categoricals`` indicates whether value labels should be +read and used to create a ``Categorical`` variable from them. Value labels can +also be retrieved by the function ``value_labels``, which requires :func:`~pandas.io.stata.StataReader.read` +to be called before use. + +The parameter ``convert_missing`` indicates whether missing value +representations in Stata should be preserved. If ``False`` (the default), +missing values are represented as ``np.nan``. If ``True``, missing values are +represented using ``StataMissingValue`` objects, and columns containing missing +values will have ``object`` data type. + +.. note:: + + :func:`~pandas.read_stata` and + :class:`~pandas.io.stata.StataReader` support .dta formats 113-115 + (Stata 10-12), 117 (Stata 13), and 118 (Stata 14). + +.. note:: + + Setting ``preserve_dtypes=False`` will upcast to the standard pandas data types: + ``int64`` for all integer types and ``float64`` for floating point data. By default, + the Stata data types are preserved when importing. + +.. ipython:: python + :suppress: + + os.remove("stata.dta") + +.. _io.stata-categorical: + +Categorical data +++++++++++++++++ + +``Categorical`` data can be exported to *Stata* data files as value labeled data. +The exported data consists of the underlying category codes as integer data values +and the categories as value labels. *Stata* does not have an explicit equivalent +to a ``Categorical`` and information about *whether* the variable is ordered +is lost when exporting. + +.. warning:: + + *Stata* only supports string value labels, and so ``str`` is called on the + categories when exporting data. Exporting ``Categorical`` variables with + non-string categories produces a warning, and can result a loss of + information if the ``str`` representations of the categories are not unique. + +Labeled data can similarly be imported from *Stata* data files as ``Categorical`` +variables using the keyword argument ``convert_categoricals`` (``True`` by default). +The keyword argument ``order_categoricals`` (``True`` by default) determines +whether imported ``Categorical`` variables are ordered. + +.. note:: + + When importing categorical data, the values of the variables in the *Stata* + data file are not preserved since ``Categorical`` variables always + use integer data types between ``-1`` and ``n-1`` where ``n`` is the number + of categories. If the original values in the *Stata* data file are required, + these can be imported by setting ``convert_categoricals=False``, which will + import original data (but not the variable labels). The original values can + be matched to the imported categorical data since there is a simple mapping + between the original *Stata* data values and the category codes of imported + Categorical variables: missing values are assigned code ``-1``, and the + smallest original value is assigned ``0``, the second smallest is assigned + ``1`` and so on until the largest original value is assigned the code ``n-1``. + +.. note:: + + *Stata* supports partially labeled series. These series have value labels for + some but not all data values. Importing a partially labeled series will produce + a ``Categorical`` with string categories for the values that are labeled and + numeric categories for values with no label. + +.. _io.sas: + +.. _io.sas_reader: + +SAS formats +----------- + +The top-level function :func:`read_sas` can read (but not write) SAS +XPORT (.xpt) and (since *v0.18.0*) SAS7BDAT (.sas7bdat) format files. + +SAS files only contain two value types: ASCII text and floating point +values (usually 8 bytes but sometimes truncated). For xport files, +there is no automatic type conversion to integers, dates, or +categoricals. For SAS7BDAT files, the format codes may allow date +variables to be automatically converted to dates. By default the +whole file is read and returned as a ``DataFrame``. + +Specify a ``chunksize`` or use ``iterator=True`` to obtain reader +objects (``XportReader`` or ``SAS7BDATReader``) for incrementally +reading the file. The reader objects also have attributes that +contain additional information about the file and its variables. + +Read a SAS7BDAT file: + +.. code-block:: python + + df = pd.read_sas("sas_data.sas7bdat") + +Obtain an iterator and read an XPORT file 100,000 lines at a time: + +.. code-block:: python + + def do_something(chunk): + pass + + + with pd.read_sas("sas_xport.xpt", chunk=100000) as rdr: + for chunk in rdr: + do_something(chunk) + +The specification_ for the xport file format is available from the SAS +web site. + +.. _specification: https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf + +No official documentation is available for the SAS7BDAT format. + +.. _io.spss: + +.. _io.spss_reader: + +SPSS formats +------------ + +.. versionadded:: 0.25.0 + +The top-level function :func:`read_spss` can read (but not write) SPSS +SAV (.sav) and ZSAV (.zsav) format files. + +SPSS files contain column names. By default the +whole file is read, categorical columns are converted into ``pd.Categorical``, +and a ``DataFrame`` with all columns is returned. + +Specify the ``usecols`` parameter to obtain a subset of columns. Specify ``convert_categoricals=False`` +to avoid converting categorical columns into ``pd.Categorical``. + +Read an SPSS file: + +.. code-block:: python + + df = pd.read_spss("spss_data.sav") + +Extract a subset of columns contained in ``usecols`` from an SPSS file and +avoid converting categorical columns into ``pd.Categorical``: + +.. code-block:: python + + df = pd.read_spss( + "spss_data.sav", + usecols=["foo", "bar"], + convert_categoricals=False, + ) + +More information about the SAV and ZSAV file formats is available here_. + +.. _here: https://www.ibm.com/docs/en/spss-statistics/22.0.0 + +.. _io.other: + +Other file formats +------------------ + +pandas itself only supports IO with a limited set of file formats that map +cleanly to its tabular data model. For reading and writing other file formats +into and from pandas, we recommend these packages from the broader community. + +netCDF +'''''' + +xarray_ provides data structures inspired by the pandas ``DataFrame`` for working +with multi-dimensional datasets, with a focus on the netCDF file format and +easy conversion to and from pandas. + +.. _xarray: https://xarray.pydata.org/en/stable/ + +.. _io.perf: + +Performance considerations +-------------------------- + +This is an informal comparison of various IO methods, using pandas +0.24.2. Timings are machine dependent and small differences should be +ignored. + +.. code-block:: ipython + + In [1]: sz = 1000000 + In [2]: df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz}) + + In [3]: df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 2 columns): + A 1000000 non-null float64 + B 1000000 non-null int64 + dtypes: float64(1), int64(1) + memory usage: 15.3 MB + +The following test functions will be used below to compare the performance of several IO methods: + +.. code-block:: python + + + + import numpy as np + + import os + + sz = 1000000 + df = pd.DataFrame({"A": np.random.randn(sz), "B": [1] * sz}) + + sz = 1000000 + np.random.seed(42) + df = pd.DataFrame({"A": np.random.randn(sz), "B": [1] * sz}) + + + def test_sql_write(df): + if os.path.exists("test.sql"): + os.remove("test.sql") + sql_db = sqlite3.connect("test.sql") + df.to_sql(name="test_table", con=sql_db) + sql_db.close() + + + def test_sql_read(): + sql_db = sqlite3.connect("test.sql") + pd.read_sql_query("select * from test_table", sql_db) + sql_db.close() + + + def test_hdf_fixed_write(df): + df.to_hdf("test_fixed.hdf", "test", mode="w") + + + def test_hdf_fixed_read(): + pd.read_hdf("test_fixed.hdf", "test") + + + def test_hdf_fixed_write_compress(df): + df.to_hdf("test_fixed_compress.hdf", "test", mode="w", complib="blosc") + + + def test_hdf_fixed_read_compress(): + pd.read_hdf("test_fixed_compress.hdf", "test") + + + def test_hdf_table_write(df): + df.to_hdf("test_table.hdf", "test", mode="w", format="table") + + + def test_hdf_table_read(): + pd.read_hdf("test_table.hdf", "test") + + + def test_hdf_table_write_compress(df): + df.to_hdf( + "test_table_compress.hdf", "test", mode="w", complib="blosc", format="table" + ) + + + def test_hdf_table_read_compress(): + pd.read_hdf("test_table_compress.hdf", "test") + + + def test_csv_write(df): + df.to_csv("test.csv", mode="w") + + + def test_csv_read(): + pd.read_csv("test.csv", index_col=0) + + + def test_feather_write(df): + df.to_feather("test.feather") + + + def test_feather_read(): + pd.read_feather("test.feather") + + + def test_pickle_write(df): + df.to_pickle("test.pkl") + + + def test_pickle_read(): + pd.read_pickle("test.pkl") + + + def test_pickle_write_compress(df): + df.to_pickle("test.pkl.compress", compression="xz") + + + def test_pickle_read_compress(): + pd.read_pickle("test.pkl.compress", compression="xz") + + + def test_parquet_write(df): + df.to_parquet("test.parquet") + + + def test_parquet_read(): + pd.read_parquet("test.parquet") + +When writing, the top three functions in terms of speed are ``test_feather_write``, ``test_hdf_fixed_write`` and ``test_hdf_fixed_write_compress``. + +.. code-block:: ipython + + In [4]: %timeit test_sql_write(df) + 3.29 s ± 43.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [5]: %timeit test_hdf_fixed_write(df) + 19.4 ms ± 560 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [6]: %timeit test_hdf_fixed_write_compress(df) + 19.6 ms ± 308 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [7]: %timeit test_hdf_table_write(df) + 449 ms ± 5.61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [8]: %timeit test_hdf_table_write_compress(df) + 448 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [9]: %timeit test_csv_write(df) + 3.66 s ± 26.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [10]: %timeit test_feather_write(df) + 9.75 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [11]: %timeit test_pickle_write(df) + 30.1 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [12]: %timeit test_pickle_write_compress(df) + 4.29 s ± 15.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [13]: %timeit test_parquet_write(df) + 67.6 ms ± 706 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + +When reading, the top three functions in terms of speed are ``test_feather_read``, ``test_pickle_read`` and +``test_hdf_fixed_read``. + + +.. code-block:: ipython + + In [14]: %timeit test_sql_read() + 1.77 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [15]: %timeit test_hdf_fixed_read() + 19.4 ms ± 436 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [16]: %timeit test_hdf_fixed_read_compress() + 19.5 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [17]: %timeit test_hdf_table_read() + 38.6 ms ± 857 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [18]: %timeit test_hdf_table_read_compress() + 38.8 ms ± 1.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [19]: %timeit test_csv_read() + 452 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [20]: %timeit test_feather_read() + 12.4 ms ± 99.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [21]: %timeit test_pickle_read() + 18.4 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [22]: %timeit test_pickle_read_compress() + 915 ms ± 7.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [23]: %timeit test_parquet_read() + 24.4 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + +The files ``test.pkl.compress``, ``test.parquet`` and ``test.feather`` took the least space on disk (in bytes). + +.. code-block:: none + + 29519500 Oct 10 06:45 test.csv + 16000248 Oct 10 06:45 test.feather + 8281983 Oct 10 06:49 test.parquet + 16000857 Oct 10 06:47 test.pkl + 7552144 Oct 10 06:48 test.pkl.compress + 34816000 Oct 10 06:42 test.sql + 24009288 Oct 10 06:43 test_fixed.hdf + 24009288 Oct 10 06:43 test_fixed_compress.hdf + 24458940 Oct 10 06:44 test_table.hdf + 24458940 Oct 10 06:44 test_table_compress.hdf diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst new file mode 100644 index 00000000..bbca5773 --- /dev/null +++ b/doc/source/user_guide/merging.rst @@ -0,0 +1,1513 @@ +.. _merging: + +{{ header }} + +.. ipython:: python + :suppress: + + from matplotlib import pyplot as plt + import pandas.util._doctools as doctools + + p = doctools.TablePlotter() + + +************************************ +Merge, join, concatenate and compare +************************************ + +pandas provides various facilities for easily combining together Series or +DataFrame with various kinds of set logic for the indexes +and relational algebra functionality in the case of join / merge-type +operations. + +In addition, pandas also provides utilities to compare two Series or DataFrame +and summarize their differences. + +.. _merging.concat: + +Concatenating objects +--------------------- + +The :func:`~pandas.concat` function (in the main pandas namespace) does all of +the heavy lifting of performing concatenation operations along an axis while +performing optional set logic (union or intersection) of the indexes (if any) on +the other axes. Note that I say "if any" because there is only a single possible +axis of concatenation for Series. + +Before diving into all of the details of ``concat`` and what it can do, here is +a simple example: + +.. ipython:: python + + df1 = pd.DataFrame( + { + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + "C": ["C0", "C1", "C2", "C3"], + "D": ["D0", "D1", "D2", "D3"], + }, + index=[0, 1, 2, 3], + ) + + df2 = pd.DataFrame( + { + "A": ["A4", "A5", "A6", "A7"], + "B": ["B4", "B5", "B6", "B7"], + "C": ["C4", "C5", "C6", "C7"], + "D": ["D4", "D5", "D6", "D7"], + }, + index=[4, 5, 6, 7], + ) + + df3 = pd.DataFrame( + { + "A": ["A8", "A9", "A10", "A11"], + "B": ["B8", "B9", "B10", "B11"], + "C": ["C8", "C9", "C10", "C11"], + "D": ["D8", "D9", "D10", "D11"], + }, + index=[8, 9, 10, 11], + ) + + frames = [df1, df2, df3] + result = pd.concat(frames) + +.. ipython:: python + :suppress: + + @savefig merging_concat_basic.png + p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True); + plt.close("all"); + +Like its sibling function on ndarrays, ``numpy.concatenate``, ``pandas.concat`` +takes a list or dict of homogeneously-typed objects and concatenates them with +some configurable handling of "what to do with the other axes": + +:: + + pd.concat( + objs, + axis=0, + join="outer", + ignore_index=False, + keys=None, + levels=None, + names=None, + verify_integrity=False, + copy=True, + ) + +* ``objs`` : a sequence or mapping of Series or DataFrame objects. If a + dict is passed, the sorted keys will be used as the ``keys`` argument, unless + it is passed, in which case the values will be selected (see below). Any None + objects will be dropped silently unless they are all None in which case a + ValueError will be raised. +* ``axis`` : {0, 1, ...}, default 0. The axis to concatenate along. +* ``join`` : {'inner', 'outer'}, default 'outer'. How to handle indexes on + other axis(es). Outer for union and inner for intersection. +* ``ignore_index`` : boolean, default False. If True, do not use the index + values on the concatenation axis. The resulting axis will be labeled 0, ..., + n - 1. This is useful if you are concatenating objects where the + concatenation axis does not have meaningful indexing information. Note + the index values on the other axes are still respected in the join. +* ``keys`` : sequence, default None. Construct hierarchical index using the + passed keys as the outermost level. If multiple levels passed, should + contain tuples. +* ``levels`` : list of sequences, default None. Specific levels (unique values) + to use for constructing a MultiIndex. Otherwise they will be inferred from the + keys. +* ``names`` : list, default None. Names for the levels in the resulting + hierarchical index. +* ``verify_integrity`` : boolean, default False. Check whether the new + concatenated axis contains duplicates. This can be very expensive relative + to the actual data concatenation. +* ``copy`` : boolean, default True. If False, do not copy data unnecessarily. + +Without a little bit of context many of these arguments don't make much sense. +Let's revisit the above example. Suppose we wanted to associate specific keys +with each of the pieces of the chopped up DataFrame. We can do this using the +``keys`` argument: + +.. ipython:: python + + result = pd.concat(frames, keys=["x", "y", "z"]) + +.. ipython:: python + :suppress: + + @savefig merging_concat_keys.png + p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True) + plt.close("all"); + +As you can see (if you've read the rest of the documentation), the resulting +object's index has a :ref:`hierarchical index `. This +means that we can now select out each chunk by key: + +.. ipython:: python + + result.loc["y"] + +It's not a stretch to see how this can be very useful. More detail on this +functionality below. + +.. note:: + It is worth noting that :func:`~pandas.concat` (and therefore + :func:`~pandas.append`) makes a full copy of the data, and that constantly + reusing this function can create a significant performance hit. If you need + to use the operation over several datasets, use a list comprehension. + +:: + + frames = [ process_your_file(f) for f in files ] + result = pd.concat(frames) + +.. note:: + + When concatenating DataFrames with named axes, pandas will attempt to preserve + these index/column names whenever possible. In the case where all inputs share a + common name, this name will be assigned to the result. When the input names do + not all agree, the result will be unnamed. The same is true for :class:`MultiIndex`, + but the logic is applied separately on a level-by-level basis. + + +Set logic on the other axes +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When gluing together multiple DataFrames, you have a choice of how to handle +the other axes (other than the one being concatenated). This can be done in +the following two ways: + +* Take the union of them all, ``join='outer'``. This is the default + option as it results in zero information loss. +* Take the intersection, ``join='inner'``. + +Here is an example of each of these methods. First, the default ``join='outer'`` +behavior: + +.. ipython:: python + + df4 = pd.DataFrame( + { + "B": ["B2", "B3", "B6", "B7"], + "D": ["D2", "D3", "D6", "D7"], + "F": ["F2", "F3", "F6", "F7"], + }, + index=[2, 3, 6, 7], + ) + result = pd.concat([df1, df4], axis=1) + + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1.png + p.plot([df1, df4], result, labels=["df1", "df4"], vertical=False); + plt.close("all"); + +Here is the same thing with ``join='inner'``: + +.. ipython:: python + + result = pd.concat([df1, df4], axis=1, join="inner") + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1_inner.png + p.plot([df1, df4], result, labels=["df1", "df4"], vertical=False); + plt.close("all"); + +Lastly, suppose we just wanted to reuse the *exact index* from the original +DataFrame: + +.. ipython:: python + + result = pd.concat([df1, df4], axis=1).reindex(df1.index) + +Similarly, we could index before the concatenation: + +.. ipython:: python + + pd.concat([df1, df4.reindex(df1.index)], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1_join_axes.png + p.plot([df1, df4], result, labels=["df1", "df4"], vertical=False); + plt.close("all"); + +.. _merging.ignore_index: + +Ignoring indexes on the concatenation axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For ``DataFrame`` objects which don't have a meaningful index, you may wish +to append them and ignore the fact that they may have overlapping indexes. To +do this, use the ``ignore_index`` argument: + +.. ipython:: python + + result = pd.concat([df1, df4], ignore_index=True, sort=False) + +.. ipython:: python + :suppress: + + @savefig merging_concat_ignore_index.png + p.plot([df1, df4], result, labels=["df1", "df4"], vertical=True); + plt.close("all"); + +.. _merging.mixed_ndims: + +Concatenating with mixed ndims +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can concatenate a mix of ``Series`` and ``DataFrame`` objects. The +``Series`` will be transformed to ``DataFrame`` with the column name as +the name of the ``Series``. + +.. ipython:: python + + s1 = pd.Series(["X0", "X1", "X2", "X3"], name="X") + result = pd.concat([df1, s1], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_mixed_ndim.png + p.plot([df1, s1], result, labels=["df1", "s1"], vertical=False); + plt.close("all"); + +.. note:: + + Since we're concatenating a ``Series`` to a ``DataFrame``, we could have + achieved the same result with :meth:`DataFrame.assign`. To concatenate an + arbitrary number of pandas objects (``DataFrame`` or ``Series``), use + ``concat``. + +If unnamed ``Series`` are passed they will be numbered consecutively. + +.. ipython:: python + + s2 = pd.Series(["_0", "_1", "_2", "_3"]) + result = pd.concat([df1, s2, s2, s2], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_unnamed_series.png + p.plot([df1, s2], result, labels=["df1", "s2"], vertical=False); + plt.close("all"); + +Passing ``ignore_index=True`` will drop all name references. + +.. ipython:: python + + result = pd.concat([df1, s1], axis=1, ignore_index=True) + +.. ipython:: python + :suppress: + + @savefig merging_concat_series_ignore_index.png + p.plot([df1, s1], result, labels=["df1", "s1"], vertical=False); + plt.close("all"); + +More concatenating with group keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A fairly common use of the ``keys`` argument is to override the column names +when creating a new ``DataFrame`` based on existing ``Series``. +Notice how the default behaviour consists on letting the resulting ``DataFrame`` +inherit the parent ``Series``' name, when these existed. + +.. ipython:: python + + s3 = pd.Series([0, 1, 2, 3], name="foo") + s4 = pd.Series([0, 1, 2, 3]) + s5 = pd.Series([0, 1, 4, 5]) + + pd.concat([s3, s4, s5], axis=1) + +Through the ``keys`` argument we can override the existing column names. + +.. ipython:: python + + pd.concat([s3, s4, s5], axis=1, keys=["red", "blue", "yellow"]) + +Let's consider a variation of the very first example presented: + +.. ipython:: python + + result = pd.concat(frames, keys=["x", "y", "z"]) + +.. ipython:: python + :suppress: + + @savefig merging_concat_group_keys2.png + p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True); + plt.close("all"); + +You can also pass a dict to ``concat`` in which case the dict keys will be used +for the ``keys`` argument (unless other keys are specified): + +.. ipython:: python + + pieces = {"x": df1, "y": df2, "z": df3} + result = pd.concat(pieces) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict.png + p.plot([df1, df2, df3], result, labels=["df1", "df2", "df3"], vertical=True); + plt.close("all"); + +.. ipython:: python + + result = pd.concat(pieces, keys=["z", "y"]) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict_keys.png + p.plot([df1, df2, df3], result, labels=["df1", "df2", "df3"], vertical=True); + plt.close("all"); + +The MultiIndex created has levels that are constructed from the passed keys and +the index of the ``DataFrame`` pieces: + +.. ipython:: python + + result.index.levels + +If you wish to specify other levels (as will occasionally be the case), you can +do so using the ``levels`` argument: + +.. ipython:: python + + result = pd.concat( + pieces, keys=["x", "y", "z"], levels=[["z", "y", "x", "w"]], names=["group_key"] + ) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict_keys_names.png + p.plot([df1, df2, df3], result, labels=["df1", "df2", "df3"], vertical=True); + plt.close("all"); + +.. ipython:: python + + result.index.levels + +This is fairly esoteric, but it is actually necessary for implementing things +like GroupBy where the order of a categorical variable is meaningful. + +.. _merging.append.row: + +Appending rows to a DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have a series that you want to append as a single row to a ``DataFrame``, you can convert the row into a +``DataFrame`` and use ``concat`` + +.. ipython:: python + + s2 = pd.Series(["X0", "X1", "X2", "X3"], index=["A", "B", "C", "D"]) + result = pd.concat([df1, s2.to_frame().T], ignore_index=True) + +.. ipython:: python + :suppress: + + @savefig merging_append_series_as_row.png + p.plot([df1, s2], result, labels=["df1", "s2"], vertical=True); + plt.close("all"); + +You should use ``ignore_index`` with this method to instruct DataFrame to +discard its index. If you wish to preserve the index, you should construct an +appropriately-indexed DataFrame and append or concatenate those objects. + +.. _merging.join: + +Database-style DataFrame or named Series joining/merging +-------------------------------------------------------- + +pandas has full-featured, **high performance** in-memory join operations +idiomatically very similar to relational databases like SQL. These methods +perform significantly better (in some cases well over an order of magnitude +better) than other open source implementations (like ``base::merge.data.frame`` +in R). The reason for this is careful algorithmic design and the internal layout +of the data in ``DataFrame``. + +See the :ref:`cookbook` for some advanced strategies. + +Users who are familiar with SQL but new to pandas might be interested in a +:ref:`comparison with SQL`. + +pandas provides a single function, :func:`~pandas.merge`, as the entry point for +all standard database join operations between ``DataFrame`` or named ``Series`` objects: + +:: + + pd.merge( + left, + right, + how="inner", + on=None, + left_on=None, + right_on=None, + left_index=False, + right_index=False, + sort=True, + suffixes=("_x", "_y"), + copy=True, + indicator=False, + validate=None, + ) + +* ``left``: A DataFrame or named Series object. +* ``right``: Another DataFrame or named Series object. +* ``on``: Column or index level names to join on. Must be found in both the left + and right DataFrame and/or Series objects. If not passed and ``left_index`` and + ``right_index`` are ``False``, the intersection of the columns in the + DataFrames and/or Series will be inferred to be the join keys. +* ``left_on``: Columns or index levels from the left DataFrame or Series to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame or Series. +* ``right_on``: Columns or index levels from the right DataFrame or Series to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame or Series. +* ``left_index``: If ``True``, use the index (row labels) from the left + DataFrame or Series as its join key(s). In the case of a DataFrame or Series with a MultiIndex + (hierarchical), the number of levels must match the number of join keys + from the right DataFrame or Series. +* ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series +* ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``, ``'cross'``. Defaults + to ``inner``. See below for more detailed description of each method. +* ``sort``: Sort the result DataFrame by the join keys in lexicographical + order. Defaults to ``True``, setting to ``False`` will improve performance + substantially in many cases. +* ``suffixes``: A tuple of string suffixes to apply to overlapping + columns. Defaults to ``('_x', '_y')``. +* ``copy``: Always copy data (default ``True``) from the passed DataFrame or named Series + objects, even when reindexing is not necessary. Cannot be avoided in many + cases but may improve performance / memory usage. The cases where copying + can be avoided are somewhat pathological but this option is provided + nonetheless. +* ``indicator``: Add a column to the output DataFrame called ``_merge`` + with information on the source of each row. ``_merge`` is Categorical-type + and takes on a value of ``left_only`` for observations whose merge key + only appears in ``'left'`` DataFrame or Series, ``right_only`` for observations whose + merge key only appears in ``'right'`` DataFrame or Series, and ``both`` if the + observation's merge key is found in both. + +* ``validate`` : string, default None. + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": checks if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": checks if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": checks if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + +.. note:: + + Support for specifying index levels as the ``on``, ``left_on``, and + ``right_on`` parameters was added in version 0.23.0. + Support for merging named ``Series`` objects was added in version 0.24.0. + +The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series`` +and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``. + +``merge`` is a function in the pandas namespace, and it is also available as a +``DataFrame`` instance method :meth:`~DataFrame.merge`, with the calling +``DataFrame`` being implicitly considered the left object in the join. + +The related :meth:`~DataFrame.join` method, uses ``merge`` internally for the +index-on-index (by default) and column(s)-on-index join. If you are joining on +index only, you may wish to use ``DataFrame.join`` to save yourself some typing. + +Brief primer on merge methods (relational algebra) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Experienced users of relational databases like SQL will be familiar with the +terminology used to describe join operations between two SQL-table like +structures (``DataFrame`` objects). There are several cases to consider which +are very important to understand: + +* **one-to-one** joins: for example when joining two ``DataFrame`` objects on + their indexes (which must contain unique values). +* **many-to-one** joins: for example when joining an index (unique) to one or + more columns in a different ``DataFrame``. +* **many-to-many** joins: joining columns on columns. + +.. note:: + + When joining columns on columns (potentially a many-to-many join), any + indexes on the passed ``DataFrame`` objects **will be discarded**. + + +It is worth spending some time understanding the result of the **many-to-many** +join case. In SQL / standard relational algebra, if a key combination appears +more than once in both tables, the resulting table will have the **Cartesian +product** of the associated data. Here is a very basic example with one unique +key combination: + +.. ipython:: python + + left = pd.DataFrame( + { + "key": ["K0", "K1", "K2", "K3"], + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + } + ) + + right = pd.DataFrame( + { + "key": ["K0", "K1", "K2", "K3"], + "C": ["C0", "C1", "C2", "C3"], + "D": ["D0", "D1", "D2", "D3"], + } + ) + result = pd.merge(left, right, on="key") + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +Here is a more complicated example with multiple join keys. Only the keys +appearing in ``left`` and ``right`` are present (the intersection), since +``how='inner'`` by default. + +.. ipython:: python + + left = pd.DataFrame( + { + "key1": ["K0", "K0", "K1", "K2"], + "key2": ["K0", "K1", "K0", "K1"], + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + } + ) + + right = pd.DataFrame( + { + "key1": ["K0", "K1", "K1", "K2"], + "key2": ["K0", "K0", "K0", "K0"], + "C": ["C0", "C1", "C2", "C3"], + "D": ["D0", "D1", "D2", "D3"], + } + ) + + result = pd.merge(left, right, on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_multiple.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +The ``how`` argument to ``merge`` specifies how to determine which keys are to +be included in the resulting table. If a key combination **does not appear** in +either the left or right tables, the values in the joined table will be +``NA``. Here is a summary of the ``how`` options and their SQL equivalent names: + +.. csv-table:: + :header: "Merge method", "SQL Join Name", "Description" + :widths: 20, 20, 60 + + ``left``, ``LEFT OUTER JOIN``, Use keys from left frame only + ``right``, ``RIGHT OUTER JOIN``, Use keys from right frame only + ``outer``, ``FULL OUTER JOIN``, Use union of keys from both frames + ``inner``, ``INNER JOIN``, Use intersection of keys from both frames + ``cross``, ``CROSS JOIN``, Create the cartesian product of rows of both frames + +.. ipython:: python + + result = pd.merge(left, right, how="left", on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_left.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge(left, right, how="right", on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_right.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + +.. ipython:: python + + result = pd.merge(left, right, how="outer", on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_outer.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge(left, right, how="inner", on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_inner.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge(left, right, how="cross") + +.. ipython:: python + :suppress: + + @savefig merging_merge_cross.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +You can merge a mult-indexed Series and a DataFrame, if the names of +the MultiIndex correspond to the columns from the DataFrame. Transform +the Series to a DataFrame using :meth:`Series.reset_index` before merging, +as shown in the following example. + +.. ipython:: python + + df = pd.DataFrame({"Let": ["A", "B", "C"], "Num": [1, 2, 3]}) + df + + ser = pd.Series( + ["a", "b", "c", "d", "e", "f"], + index=pd.MultiIndex.from_arrays( + [["A", "B", "C"] * 2, [1, 2, 3, 4, 5, 6]], names=["Let", "Num"] + ), + ) + ser + + pd.merge(df, ser.reset_index(), on=["Let", "Num"]) + + +Here is another example with duplicate join keys in DataFrames: + +.. ipython:: python + + left = pd.DataFrame({"A": [1, 2], "B": [2, 2]}) + + right = pd.DataFrame({"A": [4, 5, 6], "B": [2, 2, 2]}) + + result = pd.merge(left, right, on="B", how="outer") + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_dup.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + + +.. warning:: + + Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, which may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + +.. _merging.validation: + +Checking for duplicate keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users can use the ``validate`` argument to automatically check whether there +are unexpected duplicates in their merge keys. Key uniqueness is checked before +merge operations and so should protect against memory overflows. Checking key +uniqueness is also a good way to ensure user data structures are as expected. + +In the following example, there are duplicate values of ``B`` in the right +``DataFrame``. As this is not a one-to-one merge -- as specified in the +``validate`` argument -- an exception will be raised. + + +.. ipython:: python + + left = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + right = pd.DataFrame({"A": [4, 5, 6], "B": [2, 2, 2]}) + +.. code-block:: ipython + + In [53]: result = pd.merge(left, right, on="B", how="outer", validate="one_to_one") + ... + MergeError: Merge keys are not unique in right dataset; not a one-to-one merge + +If the user is aware of the duplicates in the right ``DataFrame`` but wants to +ensure there are no duplicates in the left DataFrame, one can use the +``validate='one_to_many'`` argument instead, which will not raise an exception. + +.. ipython:: python + + pd.merge(left, right, on="B", how="outer", validate="one_to_many") + + +.. _merging.indicator: + +The merge indicator +~~~~~~~~~~~~~~~~~~~ + +:func:`~pandas.merge` accepts the argument ``indicator``. If ``True``, a +Categorical-type column called ``_merge`` will be added to the output object +that takes on values: + + =================================== ================ + Observation Origin ``_merge`` value + =================================== ================ + Merge key only in ``'left'`` frame ``left_only`` + Merge key only in ``'right'`` frame ``right_only`` + Merge key in both frames ``both`` + =================================== ================ + +.. ipython:: python + + df1 = pd.DataFrame({"col1": [0, 1], "col_left": ["a", "b"]}) + df2 = pd.DataFrame({"col1": [1, 2, 2], "col_right": [2, 2, 2]}) + pd.merge(df1, df2, on="col1", how="outer", indicator=True) + +The ``indicator`` argument will also accept string arguments, in which case the indicator function will use the value of the passed string as the name for the indicator column. + +.. ipython:: python + + pd.merge(df1, df2, on="col1", how="outer", indicator="indicator_column") + + +.. _merging.dtypes: + +Merge dtypes +~~~~~~~~~~~~ + +Merging will preserve the dtype of the join keys. + +.. ipython:: python + + left = pd.DataFrame({"key": [1], "v1": [10]}) + left + right = pd.DataFrame({"key": [1, 2], "v1": [20, 30]}) + right + +We are able to preserve the join keys: + +.. ipython:: python + + pd.merge(left, right, how="outer") + pd.merge(left, right, how="outer").dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast. + +.. ipython:: python + + pd.merge(left, right, how="outer", on="key") + pd.merge(left, right, how="outer", on="key").dtypes + +Merging will preserve ``category`` dtypes of the mergands. See also the section on :ref:`categoricals `. + +The left frame. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + X = pd.Series(np.random.choice(["foo", "bar"], size=(10,))) + X = X.astype(CategoricalDtype(categories=["foo", "bar"])) + + left = pd.DataFrame( + {"X": X, "Y": np.random.choice(["one", "two", "three"], size=(10,))} + ) + left + left.dtypes + +The right frame. + +.. ipython:: python + + right = pd.DataFrame( + { + "X": pd.Series(["foo", "bar"], dtype=CategoricalDtype(["foo", "bar"])), + "Z": [1, 2], + } + ) + right + right.dtypes + +The merged result: + +.. ipython:: python + + result = pd.merge(left, right, how="outer") + result + result.dtypes + +.. note:: + + The category dtypes must be *exactly* the same, meaning the same categories and the ordered attribute. + Otherwise the result will coerce to the categories' dtype. + +.. note:: + + Merging on ``category`` dtypes that are the same can be quite performant compared to ``object`` dtype merging. + +.. _merging.join.index: + +Joining on index +~~~~~~~~~~~~~~~~ + +:meth:`DataFrame.join` is a convenient method for combining the columns of two +potentially differently-indexed ``DataFrames`` into a single result +``DataFrame``. Here is a very basic example: + +.. ipython:: python + + left = pd.DataFrame( + {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=["K0", "K1", "K2"] + ) + + right = pd.DataFrame( + {"C": ["C0", "C2", "C3"], "D": ["D0", "D2", "D3"]}, index=["K0", "K2", "K3"] + ) + + result = left.join(right) + +.. ipython:: python + :suppress: + + @savefig merging_join.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = left.join(right, how="outer") + +.. ipython:: python + :suppress: + + @savefig merging_join_outer.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +The same as above, but with ``how='inner'``. + +.. ipython:: python + + result = left.join(right, how="inner") + +.. ipython:: python + :suppress: + + @savefig merging_join_inner.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +The data alignment here is on the indexes (row labels). This same behavior can +be achieved using ``merge`` plus additional arguments instructing it to use the +indexes: + +.. ipython:: python + + result = pd.merge(left, right, left_index=True, right_index=True, how="outer") + +.. ipython:: python + :suppress: + + @savefig merging_merge_index_outer.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge(left, right, left_index=True, right_index=True, how="inner") + +.. ipython:: python + :suppress: + + @savefig merging_merge_index_inner.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +Joining key columns on an index +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.join` takes an optional ``on`` argument which may be a column +or multiple column names, which specifies that the passed ``DataFrame`` is to be +aligned on that column in the ``DataFrame``. These two function calls are +completely equivalent: + +:: + + left.join(right, on=key_or_keys) + pd.merge( + left, right, left_on=key_or_keys, right_index=True, how="left", sort=False + ) + +Obviously you can choose whichever form you find more convenient. For +many-to-one joins (where one of the ``DataFrame``'s is already indexed by the +join key), using ``join`` may be more convenient. Here is a simple example: + +.. ipython:: python + + left = pd.DataFrame( + { + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + "key": ["K0", "K1", "K0", "K1"], + } + ) + + right = pd.DataFrame({"C": ["C0", "C1"], "D": ["D0", "D1"]}, index=["K0", "K1"]) + + result = left.join(right, on="key") + +.. ipython:: python + :suppress: + + @savefig merging_join_key_columns.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge( + left, right, left_on="key", right_index=True, how="left", sort=False + ) + +.. ipython:: python + :suppress: + + @savefig merging_merge_key_columns.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. _merging.multikey_join: + +To join on multiple keys, the passed DataFrame must have a ``MultiIndex``: + +.. ipython:: python + + left = pd.DataFrame( + { + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + "key1": ["K0", "K0", "K1", "K2"], + "key2": ["K0", "K1", "K0", "K1"], + } + ) + + index = pd.MultiIndex.from_tuples( + [("K0", "K0"), ("K1", "K0"), ("K2", "K0"), ("K2", "K1")] + ) + right = pd.DataFrame( + {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, index=index + ) + +Now this can be joined by passing the two key column names: + +.. ipython:: python + + result = left.join(right, on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merging_join_multikeys.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. _merging.df_inner_join: + +The default for ``DataFrame.join`` is to perform a left join (essentially a +"VLOOKUP" operation, for Excel users), which uses only the keys found in the +calling DataFrame. Other join types, for example inner join, can be just as +easily performed: + +.. ipython:: python + + result = left.join(right, on=["key1", "key2"], how="inner") + +.. ipython:: python + :suppress: + + @savefig merging_join_multikeys_inner.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +As you can see, this drops any rows where there was no match. + +.. _merging.join_on_mi: + +Joining a single Index to a MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can join a singly-indexed ``DataFrame`` with a level of a MultiIndexed ``DataFrame``. +The level will match on the name of the index of the singly-indexed frame against +a level name of the MultiIndexed frame. + +.. ipython:: python + + left = pd.DataFrame( + {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, + index=pd.Index(["K0", "K1", "K2"], name="key"), + ) + + index = pd.MultiIndex.from_tuples( + [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], + names=["key", "Y"], + ) + right = pd.DataFrame( + {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, + index=index, + ) + + result = left.join(right, how="inner") + + +.. ipython:: python + :suppress: + + @savefig merging_join_multiindex_inner.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +This is equivalent but less verbose and more memory efficient / faster than this. + +.. ipython:: python + + result = pd.merge( + left.reset_index(), right.reset_index(), on=["key"], how="inner" + ).set_index(["key","Y"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_multiindex_alternative.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. _merging.join_with_two_multi_indexes: + +Joining with two MultiIndexes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is supported in a limited way, provided that the index for the right +argument is completely used in the join, and is a subset of the indices in +the left argument, as in this example: + +.. ipython:: python + + leftindex = pd.MultiIndex.from_product( + [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"] + ) + left = pd.DataFrame({"v1": range(12)}, index=leftindex) + left + + rightindex = pd.MultiIndex.from_product( + [list("abc"), list("xy")], names=["abc", "xy"] + ) + right = pd.DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex) + right + + left.join(right, on=["abc", "xy"], how="inner") + +If that condition is not satisfied, a join with two multi-indexes can be +done using the following code. + +.. ipython:: python + + leftindex = pd.MultiIndex.from_tuples( + [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"] + ) + left = pd.DataFrame( + {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=leftindex + ) + + rightindex = pd.MultiIndex.from_tuples( + [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"] + ) + right = pd.DataFrame( + {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, index=rightindex + ) + + result = pd.merge( + left.reset_index(), right.reset_index(), on=["key"], how="inner" + ).set_index(["key", "X", "Y"]) + +.. ipython:: python + :suppress: + + @savefig merging_merge_two_multiindex.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. _merging.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Strings passed as the ``on``, ``left_on``, and ``right_on`` parameters +may refer to either column names or index level names. This enables merging +``DataFrame`` instances on a combination of index levels and columns without +resetting indexes. + +.. ipython:: python + + left_index = pd.Index(["K0", "K0", "K1", "K2"], name="key1") + + left = pd.DataFrame( + { + "A": ["A0", "A1", "A2", "A3"], + "B": ["B0", "B1", "B2", "B3"], + "key2": ["K0", "K1", "K0", "K1"], + }, + index=left_index, + ) + + right_index = pd.Index(["K0", "K1", "K2", "K2"], name="key1") + + right = pd.DataFrame( + { + "C": ["C0", "C1", "C2", "C3"], + "D": ["D0", "D1", "D2", "D3"], + "key2": ["K0", "K0", "K0", "K1"], + }, + index=right_index, + ) + + result = left.merge(right, on=["key1", "key2"]) + +.. ipython:: python + :suppress: + + @savefig merge_on_index_and_column.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. note:: + + When DataFrames are merged on a string that matches an index level in both + frames, the index level is preserved as an index level in the resulting + DataFrame. + +.. note:: + When DataFrames are merged using only some of the levels of a ``MultiIndex``, + the extra levels will be dropped from the resulting merge. In order to + preserve those levels, use ``reset_index`` on those level names to move + those levels to columns prior to doing the merge. + +.. note:: + + If a string matches both a column name and an index level name, then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. + +Overlapping value columns +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The merge ``suffixes`` argument takes a tuple of list of strings to append to +overlapping column names in the input ``DataFrame``\ s to disambiguate the result +columns: + +.. ipython:: python + + left = pd.DataFrame({"k": ["K0", "K1", "K2"], "v": [1, 2, 3]}) + right = pd.DataFrame({"k": ["K0", "K0", "K3"], "v": [4, 5, 6]}) + + result = pd.merge(left, right, on="k") + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. ipython:: python + + result = pd.merge(left, right, on="k", suffixes=("_l", "_r")) + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped_suffix.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +:meth:`DataFrame.join` has ``lsuffix`` and ``rsuffix`` arguments which behave +similarly. + +.. ipython:: python + + left = left.set_index("k") + right = right.set_index("k") + result = left.join(right, lsuffix="_l", rsuffix="_r") + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped_multi_suffix.png + p.plot([left, right], result, labels=["left", "right"], vertical=False); + plt.close("all"); + +.. _merging.multiple_join: + +Joining multiple DataFrames +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A list or tuple of ``DataFrames`` can also be passed to :meth:`~DataFrame.join` +to join them together on their indexes. + +.. ipython:: python + + right2 = pd.DataFrame({"v": [7, 8, 9]}, index=["K1", "K1", "K2"]) + result = left.join([right, right2]) + +.. ipython:: python + :suppress: + + @savefig merging_join_multi_df.png + p.plot( + [left, right, right2], + result, + labels=["left", "right", "right2"], + vertical=False, + ); + plt.close("all"); + +.. _merging.combine_first.update: + +Merging together values within Series or DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another fairly common situation is to have two like-indexed (or similarly +indexed) ``Series`` or ``DataFrame`` objects and wanting to "patch" values in +one object from values for matching indices in the other. Here is an example: + +.. ipython:: python + + df1 = pd.DataFrame( + [[np.nan, 3.0, 5.0], [-4.6, np.nan, np.nan], [np.nan, 7.0, np.nan]] + ) + df2 = pd.DataFrame([[-42.6, np.nan, -8.2], [-5.0, 1.6, 4]], index=[1, 2]) + +For this, use the :meth:`~DataFrame.combine_first` method: + +.. ipython:: python + + result = df1.combine_first(df2) + +.. ipython:: python + :suppress: + + @savefig merging_combine_first.png + p.plot([df1, df2], result, labels=["df1", "df2"], vertical=False); + plt.close("all"); + +Note that this method only takes values from the right ``DataFrame`` if they are +missing in the left ``DataFrame``. A related method, :meth:`~DataFrame.update`, +alters non-NA values in place: + +.. ipython:: python + :suppress: + + df1_copy = df1.copy() + +.. ipython:: python + + df1.update(df2) + +.. ipython:: python + :suppress: + + @savefig merging_update.png + p.plot([df1_copy, df2], df1, labels=["df1", "df2"], vertical=False); + plt.close("all"); + +.. _merging.time_series: + +Timeseries friendly merging +--------------------------- + +.. _merging.merge_ordered: + +Merging ordered data +~~~~~~~~~~~~~~~~~~~~ + +A :func:`merge_ordered` function allows combining time series and other +ordered data. In particular it has an optional ``fill_method`` keyword to +fill/interpolate missing data: + +.. ipython:: python + + left = pd.DataFrame( + {"k": ["K0", "K1", "K1", "K2"], "lv": [1, 2, 3, 4], "s": ["a", "b", "c", "d"]} + ) + + right = pd.DataFrame({"k": ["K1", "K2", "K4"], "rv": [1, 2, 3]}) + + pd.merge_ordered(left, right, fill_method="ffill", left_by="s") + +.. _merging.merge_asof: + +Merging asof +~~~~~~~~~~~~ + +A :func:`merge_asof` is similar to an ordered left-join except that we match on +nearest key rather than equal keys. For each row in the ``left`` ``DataFrame``, +we select the last row in the ``right`` ``DataFrame`` whose ``on`` key is less +than the left's key. Both DataFrames must be sorted by the key. + +Optionally an asof merge can perform a group-wise merge. This matches the +``by`` key equally, in addition to the nearest match on the ``on`` key. + +For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` +merge them. + +.. ipython:: python + + trades = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.048", + "20160525 13:30:00.049", + "20160525 13:30:00.072", + "20160525 13:30:00.075", + ] + ), + "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"], + "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], + }, + columns=["time", "ticker", "bid", "ask"], + ) + +.. ipython:: python + + trades + quotes + +By default we are taking the asof of the quotes. + +.. ipython:: python + + pd.merge_asof(trades, quotes, on="time", by="ticker") + +We only asof within ``2ms`` between the quote time and the trade time. + +.. ipython:: python + + pd.merge_asof(trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms")) + +We only asof within ``10ms`` between the quote time and the trade time and we +exclude exact matches on time. Note that though we exclude the exact matches +(of the quotes), prior quotes **do** propagate to that point in time. + +.. ipython:: python + + pd.merge_asof( + trades, + quotes, + on="time", + by="ticker", + tolerance=pd.Timedelta("10ms"), + allow_exact_matches=False, + ) + +.. _merging.compare: + +Comparing objects +----------------- + +The :meth:`~Series.compare` and :meth:`~DataFrame.compare` methods allow you to +compare two DataFrame or Series, respectively, and summarize their differences. + +This feature was added in :ref:`V1.1.0 `. + +For example, you might want to compare two ``DataFrame`` and stack their differences +side by side. + +.. ipython:: python + + df = pd.DataFrame( + { + "col1": ["a", "a", "b", "b", "a"], + "col2": [1.0, 2.0, 3.0, np.nan, 5.0], + "col3": [1.0, 2.0, 3.0, 4.0, 5.0], + }, + columns=["col1", "col2", "col3"], + ) + df + +.. ipython:: python + + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + df2 + +.. ipython:: python + + df.compare(df2) + +By default, if two corresponding values are equal, they will be shown as ``NaN``. +Furthermore, if all values in an entire row / column, the row / column will be +omitted from the result. The remaining differences will be aligned on columns. + +If you wish, you may choose to stack the differences on rows. + +.. ipython:: python + + df.compare(df2, align_axis=0) + +If you wish to keep all original rows and columns, set ``keep_shape`` argument +to ``True``. + +.. ipython:: python + + df.compare(df2, keep_shape=True) + +You may also keep all the original values even if they are equal. + +.. ipython:: python + + df.compare(df2, keep_shape=True, keep_equal=True) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst new file mode 100644 index 00000000..3052ee30 --- /dev/null +++ b/doc/source/user_guide/missing_data.rst @@ -0,0 +1,947 @@ +.. _missing_data: + +{{ header }} + +************************* +Working with missing data +************************* + +In this section, we will discuss missing (also referred to as NA) values in +pandas. + +.. note:: + + The choice of using ``NaN`` internally to denote missing data was largely + for simplicity and performance reasons. + Starting from pandas 1.0, some optional data types start experimenting + with a native ``NA`` scalar using a mask-based approach. See + :ref:`here ` for more. + +See the :ref:`cookbook` for some advanced strategies. + +Values considered "missing" +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As data comes in many shapes and forms, pandas aims to be flexible with regard +to handling missing data. While ``NaN`` is the default missing value marker for +reasons of computational speed and convenience, we need to be able to easily +detect this value with data of different types: floating point, integer, +boolean, and general object. In many cases, however, the Python ``None`` will +arise and we wish to also consider that "missing" or "not available" or "NA". + +.. note:: + + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. + +.. _missing.isna: + +.. ipython:: python + + df = pd.DataFrame( + np.random.randn(5, 3), + index=["a", "c", "e", "f", "h"], + columns=["one", "two", "three"], + ) + df["four"] = "bar" + df["five"] = df["one"] > 0 + df + df2 = df.reindex(["a", "b", "c", "d", "e", "f", "g", "h"]) + df2 + +To make detecting missing values easier (and across different array dtypes), +pandas provides the :func:`isna` and +:func:`notna` functions, which are also methods on +Series and DataFrame objects: + +.. ipython:: python + + df2["one"] + pd.isna(df2["one"]) + df2["four"].notna() + df2.isna() + +.. warning:: + + One has to be mindful that in Python (and NumPy), the ``nan's`` don't compare equal, but ``None's`` **do**. + Note that pandas/NumPy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. + + .. ipython:: python + + None == None # noqa: E711 + np.nan == np.nan + + So as compared to above, a scalar equality comparison versus a ``None/np.nan`` doesn't provide useful information. + + .. ipython:: python + + df2["one"] == np.nan + +Integer dtypes and missing data +------------------------------- + +Because ``NaN`` is a float, a column of integers with even one missing values +is cast to floating-point dtype (see :ref:`gotchas.intna` for more). pandas +provides a nullable integer array, which can be used by explicitly requesting +the dtype: + +.. ipython:: python + + pd.Series([1, 2, np.nan, 4], dtype=pd.Int64Dtype()) + +Alternatively, the string alias ``dtype='Int64'`` (note the capital ``"I"``) can be +used. + +See :ref:`integer_na` for more. + +Datetimes +--------- + +For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native +sentinel value that can be represented by NumPy in a singular dtype (datetime64[ns]). +pandas objects provide compatibility between ``NaT`` and ``NaN``. + +.. ipython:: python + + df2 = df.copy() + df2["timestamp"] = pd.Timestamp("20120101") + df2 + df2.loc[["a", "c", "h"], ["one", "timestamp"]] = np.nan + df2 + df2.dtypes.value_counts() + +.. _missing.inserting: + +Inserting missing data +~~~~~~~~~~~~~~~~~~~~~~ + +You can insert missing values by simply assigning to containers. The +actual missing value used will be chosen based on the dtype. + +For example, numeric containers will always use ``NaN`` regardless of +the missing value type chosen: + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s.loc[0] = None + s + +Likewise, datetime containers will always use ``NaT``. + +For object containers, pandas will use the value given: + +.. ipython:: python + + s = pd.Series(["a", "b", "c"]) + s.loc[0] = None + s.loc[1] = np.nan + s + +.. _missing_data.calculations: + +Calculations with missing data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Missing values propagate naturally through arithmetic operations between pandas +objects. + +.. ipython:: python + :suppress: + + df = df2.loc[:, ["one", "two", "three"]] + a = df2.loc[df2.index[:5], ["one", "two"]].fillna(method="pad") + b = df2.loc[df2.index[:5], ["one", "two", "three"]] + +.. ipython:: python + + a + b + a + b + +The descriptive statistics and computational methods discussed in the +:ref:`data structure overview ` (and listed :ref:`here +` and :ref:`here `) are all written to +account for missing data. For example: + +* When summing data, NA (missing) values will be treated as zero. +* If the data are all NA, the result will be 0. +* Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` ignore NA values by default, but preserve them in the resulting arrays. To override this behaviour and include NA values, use ``skipna=False``. + +.. ipython:: python + + df + df["one"].sum() + df.mean(1) + df.cumsum() + df.cumsum(skipna=False) + + +.. _missing_data.numeric_sum: + +Sum/prod of empties/nans +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This behavior is now standard as of v0.22.0 and is consistent with the default in ``numpy``; previously sum/prod of all-NA or empty Series/DataFrames would return NaN. + See :ref:`v0.22.0 whatsnew ` for more. + +The sum of an empty or all-NA Series or column of a DataFrame is 0. + +.. ipython:: python + + pd.Series([np.nan]).sum() + + pd.Series([], dtype="float64").sum() + +The product of an empty or all-NA Series or column of a DataFrame is 1. + +.. ipython:: python + + pd.Series([np.nan]).prod() + + pd.Series([], dtype="float64").prod() + + +NA values in GroupBy +~~~~~~~~~~~~~~~~~~~~ + +NA groups in GroupBy are automatically excluded. This behavior is consistent +with R, for example: + +.. ipython:: python + + df + df.groupby("one").mean() + +See the groupby section :ref:`here ` for more information. + +Cleaning / filling missing data +-------------------------------- + +pandas objects are equipped with various data manipulation methods for dealing +with missing data. + +.. _missing_data.fillna: + +Filling missing values: fillna +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.fillna` can "fill in" NA values with non-NA data in a couple +of ways, which we illustrate: + +**Replace NA with a scalar value** + +.. ipython:: python + + df2 + df2.fillna(0) + df2["one"].fillna("missing") + +**Fill gaps forward or backward** + +Using the same filling arguments as :ref:`reindexing `, we +can propagate non-NA values forward or backward: + +.. ipython:: python + + df + df.fillna(method="pad") + +.. _missing_data.fillna.limit: + +**Limit the amount of filling** + +If we only want consecutive gaps filled up to a certain number of data points, +we can use the ``limit`` keyword: + +.. ipython:: python + :suppress: + + df.iloc[2:4, :] = np.nan + +.. ipython:: python + + df + df.fillna(method="pad", limit=1) + +To remind you, these are the available filling methods: + +.. csv-table:: + :header: "Method", "Action" + :widths: 30, 50 + + pad / ffill, Fill values forward + bfill / backfill, Fill values backward + +With time series data, using pad/ffill is extremely common so that the "last +known value" is available at every time point. + +:meth:`~DataFrame.ffill` is equivalent to ``fillna(method='ffill')`` +and :meth:`~DataFrame.bfill` is equivalent to ``fillna(method='bfill')`` + +.. _missing_data.PandasObject: + +Filling with a PandasObject +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series +must match the columns of the frame you wish to fill. The +use case of this is to fill a DataFrame with the mean of that column. + +.. ipython:: python + + dff = pd.DataFrame(np.random.randn(10, 3), columns=list("ABC")) + dff.iloc[3:5, 0] = np.nan + dff.iloc[4:6, 1] = np.nan + dff.iloc[5:8, 2] = np.nan + dff + + dff.fillna(dff.mean()) + dff.fillna(dff.mean()["B":"C"]) + +Same result as above, but is aligning the 'fill' value which is +a Series in this case. + +.. ipython:: python + + dff.where(pd.notna(dff), dff.mean(), axis="columns") + + +.. _missing_data.dropna: + +Dropping axis labels with missing data: dropna +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may wish to simply exclude labels from a data set which refer to missing +data. To do this, use :meth:`~DataFrame.dropna`: + +.. ipython:: python + :suppress: + + df["two"] = df["two"].fillna(0) + df["three"] = df["three"].fillna(0) + +.. ipython:: python + + df + df.dropna(axis=0) + df.dropna(axis=1) + df["one"].dropna() + +An equivalent :meth:`~Series.dropna` is available for Series. +DataFrame.dropna has considerably more options than Series.dropna, which can be +examined :ref:`in the API `. + +.. _missing_data.interpolate: + +Interpolation +~~~~~~~~~~~~~ + +Both Series and DataFrame objects have :meth:`~DataFrame.interpolate` +that, by default, performs linear interpolation at missing data points. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + idx = pd.date_range("1/1/2000", periods=100, freq="BM") + ts = pd.Series(np.random.randn(100), index=idx) + ts[1:5] = np.nan + ts[20:30] = np.nan + ts[60:80] = np.nan + ts = ts.cumsum() + +.. ipython:: python + + ts + ts.count() + @savefig series_before_interpolate.png + ts.plot() + +.. ipython:: python + + ts.interpolate() + ts.interpolate().count() + + @savefig series_interpolate.png + ts.interpolate().plot() + +Index aware interpolation is available via the ``method`` keyword: + +.. ipython:: python + :suppress: + + ts2 = ts[[0, 1, 30, 60, 99]] + +.. ipython:: python + + ts2 + ts2.interpolate() + ts2.interpolate(method="time") + +For a floating-point index, use ``method='values'``: + +.. ipython:: python + :suppress: + + idx = [0.0, 1.0, 10.0] + ser = pd.Series([0.0, np.nan, 10.0], idx) + +.. ipython:: python + + ser + ser.interpolate() + ser.interpolate(method="values") + +You can also interpolate with a DataFrame: + +.. ipython:: python + + df = pd.DataFrame( + { + "A": [1, 2.1, np.nan, 4.7, 5.6, 6.8], + "B": [0.25, np.nan, np.nan, 4, 12.2, 14.4], + } + ) + df + df.interpolate() + +The ``method`` argument gives access to fancier interpolation methods. +If you have scipy_ installed, you can pass the name of a 1-d interpolation routine to ``method``. +You'll want to consult the full scipy interpolation documentation_ and reference guide_ for details. +The appropriate interpolation method will depend on the type of data you are working with. + +* If you are dealing with a time series that is growing at an increasing rate, + ``method='quadratic'`` may be appropriate. +* If you have values approximating a cumulative distribution function, + then ``method='pchip'`` should work well. +* To fill missing values with goal of smooth plotting, consider ``method='akima'``. + +.. warning:: + + These methods require ``scipy``. + +.. ipython:: python + + df.interpolate(method="barycentric") + + df.interpolate(method="pchip") + + df.interpolate(method="akima") + +When interpolating via a polynomial or spline approximation, you must also specify +the degree or order of the approximation: + +.. ipython:: python + + df.interpolate(method="spline", order=2) + + df.interpolate(method="polynomial", order=2) + +Compare several methods: + +.. ipython:: python + + np.random.seed(2) + + ser = pd.Series(np.arange(1, 10.1, 0.25) ** 2 + np.random.randn(37)) + missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29]) + ser[missing] = np.nan + methods = ["linear", "quadratic", "cubic"] + + df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods}) + @savefig compare_interpolations.png + df.plot() + +Another use case is interpolation at *new* values. +Suppose you have 100 observations from some distribution. And let's suppose +that you're particularly interested in what's happening around the middle. +You can mix pandas' ``reindex`` and ``interpolate`` methods to interpolate +at the new values. + +.. ipython:: python + + ser = pd.Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])) + interp_s = ser.reindex(new_index).interpolate(method="pchip") + interp_s[49:51] + +.. _scipy: https://scipy.org/ +.. _documentation: https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation +.. _guide: https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html + +.. _missing_data.interp_limits: + +Interpolation limits +-------------------- + +Like other pandas fill methods, :meth:`~DataFrame.interpolate` accepts a ``limit`` keyword +argument. Use this argument to limit the number of consecutive ``NaN`` values +filled since the last valid observation: + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) + ser + + # fill all consecutive values in a forward direction + ser.interpolate() + + # fill one consecutive value in a forward direction + ser.interpolate(limit=1) + +By default, ``NaN`` values are filled in a ``forward`` direction. Use +``limit_direction`` parameter to fill ``backward`` or from ``both`` directions. + +.. ipython:: python + + # fill one consecutive value backwards + ser.interpolate(limit=1, limit_direction="backward") + + # fill one consecutive value in both directions + ser.interpolate(limit=1, limit_direction="both") + + # fill all consecutive values in both directions + ser.interpolate(limit_direction="both") + +By default, ``NaN`` values are filled whether they are inside (surrounded by) +existing valid values, or outside existing valid values. The ``limit_area`` +parameter restricts filling to either inside or outside values. + +.. ipython:: python + + # fill one consecutive inside value in both directions + ser.interpolate(limit_direction="both", limit_area="inside", limit=1) + + # fill all consecutive outside values backward + ser.interpolate(limit_direction="backward", limit_area="outside") + + # fill all consecutive outside values in both directions + ser.interpolate(limit_direction="both", limit_area="outside") + +.. _missing_data.replace: + +Replacing generic values +~~~~~~~~~~~~~~~~~~~~~~~~ +Often times we want to replace arbitrary values with other values. + +:meth:`~Series.replace` in Series and :meth:`~DataFrame.replace` in DataFrame provides an efficient yet +flexible way to perform such replacements. + +For a Series, you can replace a single value or a list of values by another +value: + +.. ipython:: python + + ser = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0]) + + ser.replace(0, 5) + +You can replace a list of values by a list of other values: + +.. ipython:: python + + ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + +You can also specify a mapping dict: + +.. ipython:: python + + ser.replace({0: 10, 1: 100}) + +For a DataFrame, you can specify individual values by column: + +.. ipython:: python + + df = pd.DataFrame({"a": [0, 1, 2, 3, 4], "b": [5, 6, 7, 8, 9]}) + + df.replace({"a": 0, "b": 5}, 100) + +Instead of replacing with specified values, you can treat all given values as +missing and interpolate over them: + +.. ipython:: python + + ser.replace([1, 2, 3], method="pad") + +.. _missing_data.replace_expression: + +String/regular expression replacement +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + Python strings prefixed with the ``r`` character such as ``r'hello world'`` + are so-called "raw" strings. They have different semantics regarding + backslashes than strings without this prefix. Backslashes in raw strings + will be interpreted as an escaped backslash, e.g., ``r'\' == '\\'``. You + should `read about them + `__ + if this is unclear. + +Replace the '.' with ``NaN`` (str -> str): + +.. ipython:: python + + d = {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + df = pd.DataFrame(d) + df.replace(".", np.nan) + +Now do it with a regular expression that removes surrounding whitespace +(regex -> regex): + +.. ipython:: python + + df.replace(r"\s*\.\s*", np.nan, regex=True) + +Replace a few different values (list -> list): + +.. ipython:: python + + df.replace(["a", "."], ["b", np.nan]) + +list of regex -> list of regex: + +.. ipython:: python + + df.replace([r"\.", r"(a)"], ["dot", r"\1stuff"], regex=True) + +Only search in column ``'b'`` (dict -> dict): + +.. ipython:: python + + df.replace({"b": "."}, {"b": np.nan}) + +Same as the previous example, but use a regular expression for +searching instead (dict of regex -> dict): + +.. ipython:: python + + df.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True) + +You can pass nested dictionaries of regular expressions that use ``regex=True``: + +.. ipython:: python + + df.replace({"b": {"b": r""}}, regex=True) + +Alternatively, you can pass the nested dictionary like so: + +.. ipython:: python + + df.replace(regex={"b": {r"\s*\.\s*": np.nan}}) + +You can also use the group of a regular expression match when replacing (dict +of regex -> dict of regex), this works for lists as well. + +.. ipython:: python + + df.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True) + +You can pass a list of regular expressions, of which those that match +will be replaced with a scalar (list of regex -> regex). + +.. ipython:: python + + df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True) + +All of the regular expression examples can also be passed with the +``to_replace`` argument as the ``regex`` argument. In this case the ``value`` +argument must be passed explicitly by name or ``regex`` must be a nested +dictionary. The previous example, in this case, would then be: + +.. ipython:: python + + df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan) + +This can be convenient if you do not want to pass ``regex=True`` every time you +want to use a regular expression. + +.. note:: + + Anywhere in the above ``replace`` examples that you see a regular expression + a compiled regular expression is valid as well. + +Numeric replacement +~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.replace` is similar to :meth:`~DataFrame.fillna`. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2)) + df[np.random.rand(df.shape[0]) > 0.5] = 1.5 + df.replace(1.5, np.nan) + +Replacing more than one value is possible by passing a list. + +.. ipython:: python + + df00 = df.iloc[0, 0] + df.replace([1.5, df00], [np.nan, "a"]) + df[1].dtype + +You can also operate on the DataFrame in place: + +.. ipython:: python + + df.replace(1.5, np.nan, inplace=True) + +Missing data casting rules and indexing +--------------------------------------- + +While pandas supports storing arrays of integer and boolean type, these types +are not capable of storing missing data. Until we can switch to using a native +NA type in NumPy, we've established some "casting rules". When a reindexing +operation introduces missing data, the Series will be cast according to the +rules introduced in the table below. + +.. csv-table:: + :header: "data type", "Cast to" + :widths: 40, 40 + + integer, float + boolean, object + float, no cast + object, no cast + +For example: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=[0, 2, 4, 6, 7]) + s > 0 + (s > 0).dtype + crit = (s > 0).reindex(list(range(8))) + crit + crit.dtype + +Ordinarily NumPy will complain if you try to use an object array (even if it +contains boolean values) instead of a boolean array to get or set values from +an ndarray (e.g. selecting values based on some criteria). If a boolean vector +contains NAs, an exception will be generated: + +.. ipython:: python + :okexcept: + + reindexed = s.reindex(list(range(8))).fillna(0) + reindexed[crit] + +However, these can be filled in using :meth:`~DataFrame.fillna` and it will work fine: + +.. ipython:: python + + reindexed[crit.fillna(False)] + reindexed[crit.fillna(True)] + +pandas provides a nullable integer dtype, but you must explicitly request it +when creating the series or column. Notice that we use a capital "I" in +the ``dtype="Int64"``. + +.. ipython:: python + + s = pd.Series([0, 1, np.nan, 3, 4], dtype="Int64") + s + +See :ref:`integer_na` for more. + + +.. _missing_data.NA: + +Experimental ``NA`` scalar to denote missing values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +.. versionadded:: 1.0.0 + +Starting from pandas 1.0, an experimental ``pd.NA`` value (singleton) is +available to represent scalar missing values. At this moment, it is used in +the nullable :doc:`integer `, boolean and +:ref:`dedicated string ` data types as the missing value indicator. + +The goal of ``pd.NA`` is provide a "missing" indicator that can be used +consistently across data types (instead of ``np.nan``, ``None`` or ``pd.NaT`` +depending on the data type). + +For example, when having missing values in a Series with the nullable integer +dtype, it will use ``pd.NA``: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + s[2] is pd.NA + +Currently, pandas does not yet use those data types by default (when creating +a DataFrame or Series, or when reading in data), so you need to specify +the dtype explicitly. An easy way to convert to those dtypes is explained +:ref:`here `. + +Propagation in arithmetic and comparison operations +--------------------------------------------------- + +In general, missing values *propagate* in operations involving ``pd.NA``. When +one of the operands is unknown, the outcome of the operation is also unknown. + +For example, ``pd.NA`` propagates in arithmetic operations, similarly to +``np.nan``: + +.. ipython:: python + + pd.NA + 1 + "a" * pd.NA + +There are a few special cases when the result is known, even when one of the +operands is ``NA``. + +.. ipython:: python + + pd.NA ** 0 + 1 ** pd.NA + +In equality and comparison operations, ``pd.NA`` also propagates. This deviates +from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always +return ``False``. + +.. ipython:: python + + pd.NA == 1 + pd.NA == pd.NA + pd.NA < 2.5 + +To check if a value is equal to ``pd.NA``, the :func:`isna` function can be +used: + +.. ipython:: python + + pd.isna(pd.NA) + +An exception on this basic propagation rule are *reductions* (such as the +mean or the minimum), where pandas defaults to skipping missing values. See +:ref:`above ` for more. + +Logical operations +------------------ + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*, similarly to R, SQL and Julia). This logic means to only +propagate missing values when it is logically required. + +For example, for the logical "or" operation (``|``), if one of the operands +is ``True``, we already know the result will be ``True``, regardless of the +other value (so regardless the missing value would be ``True`` or ``False``). +In this case, ``pd.NA`` does not propagate: + +.. ipython:: python + + True | False + True | pd.NA + pd.NA | True + +On the other hand, if one of the operands is ``False``, the result depends +on the value of the other operand. Therefore, in this case ``pd.NA`` +propagates: + +.. ipython:: python + + False | True + False | False + False | pd.NA + +The behaviour of the logical "and" operation (``&``) can be derived using +similar logic (where now ``pd.NA`` will not propagate if one of the operands +is already ``False``): + +.. ipython:: python + + False & True + False & False + False & pd.NA + +.. ipython:: python + + True & True + True & False + True & pd.NA + + +``NA`` in a boolean context +--------------------------- + +Since the actual value of an NA is unknown, it is ambiguous to convert NA +to a boolean value. The following raises an error: + +.. ipython:: python + :okexcept: + + bool(pd.NA) + +This also means that ``pd.NA`` cannot be used in a context where it is +evaluated to a boolean, such as ``if condition: ...`` where ``condition`` can +potentially be ``pd.NA``. In such cases, :func:`isna` can be used to check +for ``pd.NA`` or ``condition`` being ``pd.NA`` can be avoided, for example by +filling missing values beforehand. + +A similar situation occurs when using Series or DataFrame objects in ``if`` +statements, see :ref:`gotchas.truth`. + +NumPy ufuncs +------------ + +:attr:`pandas.NA` implements NumPy's ``__array_ufunc__`` protocol. Most ufuncs +work with ``NA``, and generally return ``NA``: + +.. ipython:: python + + np.log(pd.NA) + np.add(pd.NA, 1) + +.. warning:: + + Currently, ufuncs involving an ndarray and ``NA`` will return an + object-dtype filled with NA values. + + .. ipython:: python + + a = np.array([1, 2, 3]) + np.greater(a, pd.NA) + + The return type here may change to return a different array type + in the future. + +See :ref:`dsintro.numpy_interop` for more on ufuncs. + +.. _missing_data.NA.conversion: + +Conversion +---------- + +If you have a DataFrame or Series using traditional types that have missing data +represented using ``np.nan``, there are convenience methods +:meth:`~Series.convert_dtypes` in Series and :meth:`~DataFrame.convert_dtypes` +in DataFrame that can convert data to use the newer dtypes for integers, strings and +booleans listed :ref:`here `. This is especially helpful after reading +in data sets when letting the readers such as :meth:`read_csv` and :meth:`read_excel` +infer default dtypes. + +In this example, while the dtypes of all columns are changed, we show the results for +the first 10 columns. + +.. ipython:: python + + bb = pd.read_csv("data/baseball.csv", index_col="id") + bb[bb.columns[:10]].dtypes + +.. ipython:: python + + bbn = bb.convert_dtypes() + bbn[bbn.columns[:10]].dtypes diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst new file mode 100644 index 00000000..c7f5d3dd --- /dev/null +++ b/doc/source/user_guide/options.rst @@ -0,0 +1,407 @@ +.. _options: + +{{ header }} + +******************** +Options and settings +******************** + +Overview +-------- +pandas has an options API configure and customize global behavior related to +:class:`DataFrame` display, data behavior and more. + +Options have a full "dotted-style", case-insensitive name (e.g. ``display.max_rows``). +You can get/set options directly as attributes of the top-level ``options`` attribute: + +.. ipython:: python + + import pandas as pd + + pd.options.display.max_rows + pd.options.display.max_rows = 999 + pd.options.display.max_rows + +The API is composed of 5 relevant functions, available directly from the ``pandas`` +namespace: + +* :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option. +* :func:`~pandas.reset_option` - reset one or more options to their default value. +* :func:`~pandas.describe_option` - print the descriptions of one or more options. +* :func:`~pandas.option_context` - execute a codeblock with a set of options + that revert to prior settings after execution. + +.. note:: + + Developers can check out `pandas/core/config_init.py `_ for more information. + +All of the functions above accept a regexp pattern (``re.search`` style) as an argument, +to match an unambiguous substring: + +.. ipython:: python + + pd.get_option("display.chop_threshold") + pd.set_option("display.chop_threshold", 2) + pd.get_option("display.chop_threshold") + pd.set_option("chop", 4) + pd.get_option("display.chop_threshold") + + +The following will **not work** because it matches multiple option names, e.g. +``display.max_colwidth``, ``display.max_rows``, ``display.max_columns``: + +.. ipython:: python + :okexcept: + + pd.get_option("max") + + +.. warning:: + + Using this form of shorthand may cause your code to break if new options with similar names are added in future versions. + + +.. ipython:: python + :suppress: + :okwarning: + + pd.reset_option("all") + +.. _options.available: + +Available options +----------------- + +You can get a list of available options and their descriptions with :func:`~pandas.describe_option`. When called +with no argument :func:`~pandas.describe_option` will print out the descriptions for all available options. + +.. ipython:: python + + pd.describe_option() + +Getting and setting options +--------------------------- + +As described above, :func:`~pandas.get_option` and :func:`~pandas.set_option` +are available from the pandas namespace. To change an option, call +``set_option('option regex', new_value)``. + +.. ipython:: python + + pd.get_option("mode.sim_interactive") + pd.set_option("mode.sim_interactive", True) + pd.get_option("mode.sim_interactive") + +.. note:: + + The option ``'mode.sim_interactive'`` is mostly used for debugging purposes. + +You can use :func:`~pandas.reset_option` to revert to a setting's default value + +.. ipython:: python + :suppress: + + pd.reset_option("display.max_rows") + +.. ipython:: python + + pd.get_option("display.max_rows") + pd.set_option("display.max_rows", 999) + pd.get_option("display.max_rows") + pd.reset_option("display.max_rows") + pd.get_option("display.max_rows") + + +It's also possible to reset multiple options at once (using a regex): + +.. ipython:: python + :okwarning: + + pd.reset_option("^display") + + +:func:`~pandas.option_context` context manager has been exposed through +the top-level API, allowing you to execute code with given option values. Option values +are restored automatically when you exit the ``with`` block: + +.. ipython:: python + + with pd.option_context("display.max_rows", 10, "display.max_columns", 5): + print(pd.get_option("display.max_rows")) + print(pd.get_option("display.max_columns")) + print(pd.get_option("display.max_rows")) + print(pd.get_option("display.max_columns")) + + +Setting startup options in Python/IPython environment +----------------------------------------------------- + +Using startup scripts for the Python/IPython environment to import pandas and set options makes working with pandas more efficient. +To do this, create a ``.py`` or ``.ipy`` script in the startup directory of the desired profile. +An example where the startup folder is in a default IPython profile can be found at: + +.. code-block:: none + + $IPYTHONDIR/profile_default/startup + +More information can be found in the `IPython documentation +`__. An example startup script for pandas is displayed below: + +.. code-block:: python + + import pandas as pd + + pd.set_option("display.max_rows", 999) + pd.set_option("display.precision", 5) + +.. _options.frequently_used: + +Frequently used options +----------------------- +The following is a demonstrates the more frequently used display options. + +``display.max_rows`` and ``display.max_columns`` sets the maximum number +of rows and columns displayed when a frame is pretty-printed. Truncated +lines are replaced by an ellipsis. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(7, 2)) + pd.set_option("display.max_rows", 7) + df + pd.set_option("display.max_rows", 5) + df + pd.reset_option("display.max_rows") + +Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options +determines how many rows are shown in the truncated repr. + +.. ipython:: python + + pd.set_option("display.max_rows", 8) + pd.set_option("display.min_rows", 4) + # below max_rows -> all rows shown + df = pd.DataFrame(np.random.randn(7, 2)) + df + # above max_rows -> only min_rows (4) rows shown + df = pd.DataFrame(np.random.randn(9, 2)) + df + pd.reset_option("display.max_rows") + pd.reset_option("display.min_rows") + +``display.expand_frame_repr`` allows for the representation of a +:class:`DataFrame` to stretch across pages, wrapped over the all the columns. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 10)) + pd.set_option("expand_frame_repr", True) + df + pd.set_option("expand_frame_repr", False) + df + pd.reset_option("expand_frame_repr") + +``display.large_repr`` displays a :class:`DataFrame` that exceed +``max_columns`` or ``max_rows`` as a truncated frame or summary. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 10)) + pd.set_option("display.max_rows", 5) + pd.set_option("large_repr", "truncate") + df + pd.set_option("large_repr", "info") + df + pd.reset_option("large_repr") + pd.reset_option("display.max_rows") + +``display.max_colwidth`` sets the maximum width of columns. Cells +of this length or longer will be truncated with an ellipsis. + +.. ipython:: python + + df = pd.DataFrame( + np.array( + [ + ["foo", "bar", "bim", "uncomfortably long string"], + ["horse", "cow", "banana", "apple"], + ] + ) + ) + pd.set_option("max_colwidth", 40) + df + pd.set_option("max_colwidth", 6) + df + pd.reset_option("max_colwidth") + +``display.max_info_columns`` sets a threshold for the number of columns +displayed when calling :meth:`~pandas.DataFrame.info`. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 10)) + pd.set_option("max_info_columns", 11) + df.info() + pd.set_option("max_info_columns", 5) + df.info() + pd.reset_option("max_info_columns") + +``display.max_info_rows``: :meth:`~pandas.DataFrame.info` will usually show null-counts for each column. +For a large :class:`DataFrame`, this can be quite slow. ``max_info_rows`` and ``max_info_cols`` +limit this null check to the specified rows and columns respectively. The :meth:`~pandas.DataFrame.info` +keyword argument ``null_counts=True`` will override this. + +.. ipython:: python + + df = pd.DataFrame(np.random.choice([0, 1, np.nan], size=(10, 10))) + df + pd.set_option("max_info_rows", 11) + df.info() + pd.set_option("max_info_rows", 5) + df.info() + pd.reset_option("max_info_rows") + +``display.precision`` sets the output display precision in terms of decimal places. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 5)) + pd.set_option("display.precision", 7) + df + pd.set_option("display.precision", 4) + df + +``display.chop_threshold`` sets the rounding threshold to zero when displaying a +:class:`Series` or :class:`DataFrame`. This setting does not change the +precision at which the number is stored. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 6)) + pd.set_option("chop_threshold", 0) + df + pd.set_option("chop_threshold", 0.5) + df + pd.reset_option("chop_threshold") + +``display.colheader_justify`` controls the justification of the headers. +The options are ``'right'``, and ``'left'``. + +.. ipython:: python + + df = pd.DataFrame( + np.array([np.random.randn(6), np.random.randint(1, 9, 6) * 0.1, np.zeros(6)]).T, + columns=["A", "B", "C"], + dtype="float", + ) + pd.set_option("colheader_justify", "right") + df + pd.set_option("colheader_justify", "left") + df + pd.reset_option("colheader_justify") + + +.. _basics.console_output: + +Number formatting +------------------ + +pandas also allows you to set how numbers are displayed in the console. +This option is not set through the ``set_options`` API. + +Use the ``set_eng_float_format`` function +to alter the floating-point formatting of pandas objects to produce a particular +format. + +.. ipython:: python + + import numpy as np + + pd.set_eng_float_format(accuracy=3, use_eng_prefix=True) + s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + s / 1.0e3 + s / 1.0e6 + +.. ipython:: python + :suppress: + :okwarning: + + pd.reset_option("^display") + +Use :meth:`~pandas.DataFrame.round` to specifically control rounding of an individual :class:`DataFrame` + +.. _options.east_asian_width: + +Unicode formatting +------------------ + +.. warning:: + + Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower). + Use only when it is actually required. + +Some East Asian countries use Unicode characters whose width corresponds to two Latin characters. +If a DataFrame or Series contains these characters, the default output mode may not align them properly. + +.. ipython:: python + + df = pd.DataFrame({"国籍": ["UK", "日本"], "名前": ["Alice", "しのぶ"]}) + df + +Enabling ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property. +These characters can be aligned properly by setting this option to ``True``. However, this will result in longer render +times than the standard ``len`` function. + +.. ipython:: python + + pd.set_option("display.unicode.east_asian_width", True) + df + +In addition, Unicode characters whose width is "ambiguous" can either be 1 or 2 characters wide depending on the +terminal setting or encoding. The option ``display.unicode.ambiguous_as_wide`` can be used to handle the ambiguity. + +By default, an "ambiguous" character's width, such as "¡" (inverted exclamation) in the example below, is taken to be 1. + +.. ipython:: python + + df = pd.DataFrame({"a": ["xxx", "¡¡"], "b": ["yyy", "¡¡"]}) + df + + +Enabling ``display.unicode.ambiguous_as_wide`` makes pandas interpret these characters' widths to be 2. +(Note that this option will only be effective when ``display.unicode.east_asian_width`` is enabled.) + +However, setting this option incorrectly for your terminal will cause these characters to be aligned incorrectly: + +.. ipython:: python + + pd.set_option("display.unicode.ambiguous_as_wide", True) + df + + +.. ipython:: python + :suppress: + + pd.set_option("display.unicode.east_asian_width", False) + pd.set_option("display.unicode.ambiguous_as_wide", False) + +.. _options.table_schema: + +Table schema display +-------------------- + +:class:`DataFrame` and :class:`Series` will publish a Table Schema representation +by default. This can be enabled globally with the +``display.html.table_schema`` option: + +.. ipython:: python + + pd.set_option("display.html.table_schema", True) + +Only ``'display.max_rows'`` are serialized and published. + + +.. ipython:: python + :suppress: + + pd.reset_option("display.html.table_schema") diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst new file mode 100644 index 00000000..adca9de6 --- /dev/null +++ b/doc/source/user_guide/reshaping.rst @@ -0,0 +1,935 @@ +.. _reshaping: + +{{ header }} + +************************** +Reshaping and pivot tables +************************** + +.. _reshaping.reshaping: + +Reshaping by pivoting DataFrame objects +--------------------------------------- + +.. image:: ../_static/reshaping_pivot.png + +Data is often stored in so-called "stacked" or "record" format: + +.. ipython:: python + + import pandas._testing as tm + + def unpivot(frame): + N, K = frame.shape + data = { + "value": frame.to_numpy().ravel("F"), + "variable": np.asarray(frame.columns).repeat(N), + "date": np.tile(np.asarray(frame.index), K), + } + return pd.DataFrame(data, columns=["date", "variable", "value"]) + + df = unpivot(tm.makeTimeDataFrame(3)) + df + +To select out everything for variable ``A`` we could do: + +.. ipython:: python + + filtered = df[df["variable"] == "A"] + filtered + +But suppose we wish to do time series operations with the variables. A better +representation would be where the ``columns`` are the unique variables and an +``index`` of dates identifies individual observations. To reshape the data into +this form, we use the :meth:`DataFrame.pivot` method (also implemented as a +top level function :func:`~pandas.pivot`): + +.. ipython:: python + + pivoted = df.pivot(index="date", columns="variable", values="value") + pivoted + +If the ``values`` argument is omitted, and the input :class:`DataFrame` has more than +one column of values which are not used as column or index inputs to :meth:`~DataFrame.pivot`, +then the resulting "pivoted" :class:`DataFrame` will have :ref:`hierarchical columns +` whose topmost level indicates the respective value +column: + +.. ipython:: python + + df["value2"] = df["value"] * 2 + pivoted = df.pivot(index="date", columns="variable") + pivoted + +You can then select subsets from the pivoted :class:`DataFrame`: + +.. ipython:: python + + pivoted["value2"] + +Note that this returns a view on the underlying data in the case where the data +are homogeneously-typed. + +.. note:: + :func:`~pandas.pivot` will error with a ``ValueError: Index contains duplicate + entries, cannot reshape`` if the index/column pair is not unique. In this + case, consider using :func:`~pandas.pivot_table` which is a generalization + of pivot that can handle duplicate values for one index/column pair. + +.. _reshaping.stacking: + +Reshaping by stacking and unstacking +------------------------------------ + +.. image:: ../_static/reshaping_stack.png + +Closely related to the :meth:`~DataFrame.pivot` method are the related +:meth:`~DataFrame.stack` and :meth:`~DataFrame.unstack` methods available on +:class:`Series` and :class:`DataFrame`. These methods are designed to work together with +:class:`MultiIndex` objects (see the section on :ref:`hierarchical indexing +`). Here are essentially what these methods do: + +* :meth:`~DataFrame.stack`: "pivot" a level of the (possibly hierarchical) column labels, + returning a :class:`DataFrame` with an index with a new inner-most level of row + labels. +* :meth:`~DataFrame.unstack`: (inverse operation of :meth:`~DataFrame.stack`) "pivot" a level of the + (possibly hierarchical) row index to the column axis, producing a reshaped + :class:`DataFrame` with a new inner-most level of column labels. + +.. image:: ../_static/reshaping_unstack.png + +The clearest way to explain is by example. Let's take a prior example data set +from the hierarchical indexing section: + +.. ipython:: python + + tuples = list( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ) + index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"]) + df2 = df[:4] + df2 + +The :meth:`~DataFrame.stack` function "compresses" a level in the :class:`DataFrame` columns to +produce either: + +* A :class:`Series`, in the case of a simple column Index. +* A :class:`DataFrame`, in the case of a :class:`MultiIndex` in the columns. + +If the columns have a :class:`MultiIndex`, you can choose which level to stack. The +stacked level becomes the new lowest level in a :class:`MultiIndex` on the columns: + +.. ipython:: python + + stacked = df2.stack() + stacked + +With a "stacked" :class:`DataFrame` or :class:`Series` (having a :class:`MultiIndex` as the +``index``), the inverse operation of :meth:`~DataFrame.stack` is :meth:`~DataFrame.unstack`, which by default +unstacks the **last level**: + +.. ipython:: python + + stacked.unstack() + stacked.unstack(1) + stacked.unstack(0) + +.. _reshaping.unstack_by_name: + +.. image:: ../_static/reshaping_unstack_1.png + +If the indexes have names, you can use the level names instead of specifying +the level numbers: + +.. ipython:: python + + stacked.unstack("second") + + +.. image:: ../_static/reshaping_unstack_0.png + +Notice that the :meth:`~DataFrame.stack` and :meth:`~DataFrame.unstack` methods implicitly sort the index +levels involved. Hence a call to :meth:`~DataFrame.stack` and then :meth:`~DataFrame.unstack`, or vice versa, +will result in a **sorted** copy of the original :class:`DataFrame` or :class:`Series`: + +.. ipython:: python + + index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]]) + df = pd.DataFrame(np.random.randn(4), index=index, columns=["A"]) + df + all(df.unstack().stack() == df.sort_index()) + +The above code will raise a ``TypeError`` if the call to :meth:`~DataFrame.sort_index` is +removed. + +.. _reshaping.stack_multiple: + +Multiple levels +~~~~~~~~~~~~~~~ + +You may also stack or unstack more than one level at a time by passing a list +of levels, in which case the end result is as if each level in the list were +processed individually. + +.. ipython:: python + + columns = pd.MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = pd.DataFrame(np.random.randn(4, 4), columns=columns) + df + + df.stack(level=["animal", "hair_length"]) + +The list of levels can contain either level names or level numbers (but +not a mixture of the two). + +.. ipython:: python + + # df.stack(level=['animal', 'hair_length']) + # from above is equivalent to: + df.stack(level=[1, 2]) + +Missing data +~~~~~~~~~~~~ + +These functions are intelligent about handling missing data and do not expect +each subgroup within the hierarchical index to have the same set of labels. +They also can handle the index being unsorted (but you can make it sorted by +calling :meth:`~DataFrame.sort_index`, of course). Here is a more complex example: + +.. ipython:: python + + columns = pd.MultiIndex.from_tuples( + [ + ("A", "cat"), + ("B", "dog"), + ("B", "cat"), + ("A", "dog"), + ], + names=["exp", "animal"], + ) + index = pd.MultiIndex.from_product( + [("bar", "baz", "foo", "qux"), ("one", "two")], names=["first", "second"] + ) + df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns) + df2 = df.iloc[[0, 1, 2, 4, 5, 7]] + df2 + +As mentioned above, :meth:`~DataFrame.stack` can be called with a ``level`` argument to select +which level in the columns to stack: + +.. ipython:: python + + df2.stack("exp") + df2.stack("animal") + +Unstacking can result in missing values if subgroups do not have the same +set of labels. By default, missing values will be replaced with the default +fill value for that data type, ``NaN`` for float, ``NaT`` for datetimelike, +etc. For integer types, by default data will converted to float and missing +values will be set to ``NaN``. + +.. ipython:: python + + df3 = df.iloc[[0, 1, 4, 7], [1, 2]] + df3 + df3.unstack() + +Alternatively, unstack takes an optional ``fill_value`` argument, for specifying +the value of missing data. + +.. ipython:: python + + df3.unstack(fill_value=-1e9) + +With a MultiIndex +~~~~~~~~~~~~~~~~~ + +Unstacking when the columns are a :class:`MultiIndex` is also careful about doing +the right thing: + +.. ipython:: python + + df[:3].unstack(0) + df2.unstack(1) + +.. _reshaping.melt: + +Reshaping by melt +----------------- + +.. image:: ../_static/reshaping_melt.png + +The top-level :func:`~pandas.melt` function and the corresponding :meth:`DataFrame.melt` +are useful to massage a :class:`DataFrame` into a format where one or more columns +are *identifier variables*, while all other columns, considered *measured +variables*, are "unpivoted" to the row axis, leaving just two non-identifier +columns, "variable" and "value". The names of those columns can be customized +by supplying the ``var_name`` and ``value_name`` parameters. + +For instance, + +.. ipython:: python + + cheese = pd.DataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) + cheese + cheese.melt(id_vars=["first", "last"]) + cheese.melt(id_vars=["first", "last"], var_name="quantity") + +When transforming a DataFrame using :func:`~pandas.melt`, the index will be ignored. The original index values can be kept around by setting the ``ignore_index`` parameter to ``False`` (default is ``True``). This will however duplicate them. + +.. versionadded:: 1.1.0 + +.. ipython:: python + + index = pd.MultiIndex.from_tuples([("person", "A"), ("person", "B")]) + cheese = pd.DataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + }, + index=index, + ) + cheese + cheese.melt(id_vars=["first", "last"]) + cheese.melt(id_vars=["first", "last"], ignore_index=False) + +Another way to transform is to use the :func:`~pandas.wide_to_long` panel data +convenience function. It is less flexible than :func:`~pandas.melt`, but more +user-friendly. + +.. ipython:: python + + dft = pd.DataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), np.random.randn(3))), + } + ) + dft["id"] = dft.index + dft + pd.wide_to_long(dft, ["A", "B"], i="id", j="year") + +.. _reshaping.combine_with_groupby: + +Combining with stats and GroupBy +-------------------------------- + +It should be no shock that combining :meth:`~DataFrame.pivot` / :meth:`~DataFrame.stack` / :meth:`~DataFrame.unstack` with +GroupBy and the basic Series and DataFrame statistical functions can produce +some very expressive and fast data manipulations. + +.. ipython:: python + + df + df.stack().mean(1).unstack() + + # same result, another way + df.groupby(level=1, axis=1).mean() + + df.stack().groupby(level=1).mean() + + df.mean().unstack(0) + + +Pivot tables +------------ + +.. _reshaping.pivot: + +While :meth:`~DataFrame.pivot` provides general purpose pivoting with various +data types (strings, numerics, etc.), pandas also provides :func:`~pandas.pivot_table` +for pivoting with aggregation of numeric data. + +The function :func:`~pandas.pivot_table` can be used to create spreadsheet-style +pivot tables. See the :ref:`cookbook` for some advanced +strategies. + +It takes a number of arguments: + +* ``data``: a DataFrame object. +* ``values``: a column or a list of columns to aggregate. +* ``index``: a column, Grouper, array which has the same length as data, or list of them. + Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values. +* ``columns``: a column, Grouper, array which has the same length as data, or list of them. + Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values. +* ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean``. + +Consider a data set like this: + +.. ipython:: python + + import datetime + + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": np.random.randn(24), + "E": np.random.randn(24), + "F": [datetime.datetime(2013, i, 1) for i in range(1, 13)] + + [datetime.datetime(2013, i, 15) for i in range(1, 13)], + } + ) + df + +We can produce pivot tables from this data very easily: + +.. ipython:: python + + pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"]) + pd.pivot_table(df, values="D", index=["B"], columns=["A", "C"], aggfunc=np.sum) + pd.pivot_table( + df, values=["D", "E"], + index=["B"], + columns=["A", "C"], + aggfunc=np.sum, + ) + +The result object is a :class:`DataFrame` having potentially hierarchical indexes on the +rows and columns. If the ``values`` column name is not given, the pivot table +will include all of the data in an additional level of hierarchy in the columns: + +.. ipython:: python + + pd.pivot_table(df[["A", "B", "C", "D", "E"]], index=["A", "B"], columns=["C"]) + +Also, you can use :class:`Grouper` for ``index`` and ``columns`` keywords. For detail of :class:`Grouper`, see :ref:`Grouping with a Grouper specification `. + +.. ipython:: python + + pd.pivot_table(df, values="D", index=pd.Grouper(freq="M", key="F"), columns="C") + +You can render a nice output of the table omitting the missing values by +calling :meth:`~DataFrame.to_string` if you wish: + +.. ipython:: python + + table = pd.pivot_table(df, index=["A", "B"], columns=["C"], values=["D", "E"]) + print(table.to_string(na_rep="")) + +Note that :meth:`~DataFrame.pivot_table` is also available as an instance method on DataFrame, + i.e. :meth:`DataFrame.pivot_table`. + +.. _reshaping.pivot.margins: + +Adding margins +~~~~~~~~~~~~~~ + +If you pass ``margins=True`` to :meth:`~DataFrame.pivot_table`, special ``All`` columns and +rows will be added with partial group aggregates across the categories on the +rows and columns: + +.. ipython:: python + + table = df.pivot_table( + index=["A", "B"], + columns="C", + values=["D", "E"], + margins=True, + aggfunc=np.std + ) + table + +Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFrame +as having a multi-level index: + +.. ipython:: python + + table.stack() + +.. _reshaping.crosstabulations: + +Cross tabulations +----------------- + +Use :func:`~pandas.crosstab` to compute a cross-tabulation of two (or more) +factors. By default :func:`~pandas.crosstab` computes a frequency table of the factors +unless an array of values and an aggregation function are passed. + +It takes a number of arguments + +* ``index``: array-like, values to group by in the rows. +* ``columns``: array-like, values to group by in the columns. +* ``values``: array-like, optional, array of values to aggregate according to + the factors. +* ``aggfunc``: function, optional, If no values array is passed, computes a + frequency table. +* ``rownames``: sequence, default ``None``, must match number of row arrays passed. +* ``colnames``: sequence, default ``None``, if passed, must match number of column + arrays passed. +* ``margins``: boolean, default ``False``, Add row/column margins (subtotals) +* ``normalize``: boolean, {'all', 'index', 'columns'}, or {0,1}, default ``False``. + Normalize by dividing all values by the sum of values. + + +Any :class:`Series` passed will have their name attributes used unless row or column +names for the cross-tabulation are specified + +For example: + +.. ipython:: python + + foo, bar, dull, shiny, one, two = "foo", "bar", "dull", "shiny", "one", "two" + a = np.array([foo, foo, bar, bar, foo, foo], dtype=object) + b = np.array([one, one, two, one, two, one], dtype=object) + c = np.array([dull, dull, shiny, dull, dull, shiny], dtype=object) + pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]) + + +If :func:`~pandas.crosstab` receives only two Series, it will provide a frequency table. + +.. ipython:: python + + df = pd.DataFrame( + {"A": [1, 2, 2, 2, 2], "B": [3, 3, 4, 4, 4], "C": [1, 1, np.nan, 1, 1]} + ) + df + + pd.crosstab(df["A"], df["B"]) + +:func:`~pandas.crosstab` can also be implemented +to :class:`Categorical` data. + +.. ipython:: python + + foo = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) + bar = pd.Categorical(["d", "e"], categories=["d", "e", "f"]) + pd.crosstab(foo, bar) + +If you want to include **all** of data categories even if the actual data does +not contain any instances of a particular category, you should set ``dropna=False``. + +For example: + +.. ipython:: python + + pd.crosstab(foo, bar, dropna=False) + +Normalization +~~~~~~~~~~~~~ + +Frequency tables can also be normalized to show percentages rather than counts +using the ``normalize`` argument: + +.. ipython:: python + + pd.crosstab(df["A"], df["B"], normalize=True) + +``normalize`` can also normalize values within each row or within each column: + +.. ipython:: python + + pd.crosstab(df["A"], df["B"], normalize="columns") + +:func:`~pandas.crosstab` can also be passed a third :class:`Series` and an aggregation function +(``aggfunc``) that will be applied to the values of the third :class:`Series` within +each group defined by the first two :class:`Series`: + +.. ipython:: python + + pd.crosstab(df["A"], df["B"], values=df["C"], aggfunc=np.sum) + +Adding margins +~~~~~~~~~~~~~~ + +Finally, one can also add margins or normalize this output. + +.. ipython:: python + + pd.crosstab( + df["A"], df["B"], values=df["C"], aggfunc=np.sum, normalize=True, margins=True + ) + +.. _reshaping.tile: +.. _reshaping.tile.cut: + +Tiling +------ + +The :func:`~pandas.cut` function computes groupings for the values of the input +array and is often used to transform continuous variables to discrete or +categorical variables: + +.. ipython:: python + + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + + pd.cut(ages, bins=3) + +If the ``bins`` keyword is an integer, then equal-width bins are formed. +Alternatively we can specify custom bin-edges: + +.. ipython:: python + + c = pd.cut(ages, bins=[0, 18, 35, 70]) + c + +If the ``bins`` keyword is an :class:`IntervalIndex`, then these will be +used to bin the passed data.:: + + pd.cut([25, 20, 50], bins=c.categories) + + +.. _reshaping.dummies: + +Computing indicator / dummy variables +------------------------------------- + +To convert a categorical variable into a "dummy" or "indicator" :class:`DataFrame`, +for example a column in a :class:`DataFrame` (a :class:`Series`) which has ``k`` distinct +values, can derive a :class:`DataFrame` containing ``k`` columns of 1s and 0s using +:func:`~pandas.get_dummies`: + +.. ipython:: python + + df = pd.DataFrame({"key": list("bbacab"), "data1": range(6)}) + + pd.get_dummies(df["key"]) + +Sometimes it's useful to prefix the column names, for example when merging the result +with the original :class:`DataFrame`: + +.. ipython:: python + + dummies = pd.get_dummies(df["key"], prefix="key") + dummies + + df[["data1"]].join(dummies) + +This function is often used along with discretization functions like :func:`~pandas.cut`: + +.. ipython:: python + + values = np.random.randn(10) + values + + bins = [0, 0.2, 0.4, 0.6, 0.8, 1] + + pd.get_dummies(pd.cut(values, bins)) + +See also :func:`Series.str.get_dummies `. + +:func:`get_dummies` also accepts a :class:`DataFrame`. By default all categorical +variables (categorical in the statistical sense, those with ``object`` or +``categorical`` dtype) are encoded as dummy variables. + + +.. ipython:: python + + df = pd.DataFrame({"A": ["a", "b", "a"], "B": ["c", "c", "b"], "C": [1, 2, 3]}) + pd.get_dummies(df) + +All non-object columns are included untouched in the output. You can control +the columns that are encoded with the ``columns`` keyword. + +.. ipython:: python + + pd.get_dummies(df, columns=["A"]) + +Notice that the ``B`` column is still included in the output, it just hasn't +been encoded. You can drop ``B`` before calling ``get_dummies`` if you don't +want to include it in the output. + +As with the :class:`Series` version, you can pass values for the ``prefix`` and +``prefix_sep``. By default the column name is used as the prefix, and ``_`` as +the prefix separator. You can specify ``prefix`` and ``prefix_sep`` in 3 ways: + +* string: Use the same value for ``prefix`` or ``prefix_sep`` for each column + to be encoded. +* list: Must be the same length as the number of columns being encoded. +* dict: Mapping column name to prefix. + +.. ipython:: python + + simple = pd.get_dummies(df, prefix="new_prefix") + simple + from_list = pd.get_dummies(df, prefix=["from_A", "from_B"]) + from_list + from_dict = pd.get_dummies(df, prefix={"B": "from_B", "A": "from_A"}) + from_dict + +Sometimes it will be useful to only keep k-1 levels of a categorical +variable to avoid collinearity when feeding the result to statistical models. +You can switch to this mode by turn on ``drop_first``. + +.. ipython:: python + + s = pd.Series(list("abcaa")) + + pd.get_dummies(s) + + pd.get_dummies(s, drop_first=True) + +When a column contains only one level, it will be omitted in the result. + +.. ipython:: python + + df = pd.DataFrame({"A": list("aaaaa"), "B": list("ababc")}) + + pd.get_dummies(df) + + pd.get_dummies(df, drop_first=True) + +By default new columns will have ``np.uint8`` dtype. +To choose another dtype, use the ``dtype`` argument: + +.. ipython:: python + + df = pd.DataFrame({"A": list("abc"), "B": [1.1, 2.2, 3.3]}) + + pd.get_dummies(df, dtype=bool).dtypes + +.. versionadded:: 1.5.0 + +To convert a "dummy" or "indicator" ``DataFrame``, into a categorical ``DataFrame``, +for example ``k`` columns of a ``DataFrame`` containing 1s and 0s can derive a +``DataFrame`` which has ``k`` distinct values using +:func:`~pandas.from_dummies`: + +.. ipython:: python + + df = pd.DataFrame({"prefix_a": [0, 1, 0], "prefix_b": [1, 0, 1]}) + df + + pd.from_dummies(df, sep="_") + +Dummy coded data only requires ``k - 1`` categories to be included, in this case +the ``k`` th category is the default category, implied by not being assigned any of +the other ``k - 1`` categories, can be passed via ``default_category``. + +.. ipython:: python + + df = pd.DataFrame({"prefix_a": [0, 1, 0]}) + df + + pd.from_dummies(df, sep="_", default_category="b") + +.. _reshaping.factorize: + +Factorizing values +------------------ + +To encode 1-d values as an enumerated type use :func:`~pandas.factorize`: + +.. ipython:: python + + x = pd.Series(["A", "A", np.nan, "B", 3.14, np.inf]) + x + labels, uniques = pd.factorize(x) + labels + uniques + +Note that :func:`~pandas.factorize` is similar to ``numpy.unique``, but differs in its +handling of NaN: + +.. note:: + The following ``numpy.unique`` will fail under Python 3 with a ``TypeError`` + because of an ordering bug. See also + `here `__. + +.. ipython:: python + :okexcept: + + ser = pd.Series(['A', 'A', np.nan, 'B', 3.14, np.inf]) + pd.factorize(ser, sort=True) + np.unique(ser, return_inverse=True)[::-1] + +.. note:: + If you just want to handle one column as a categorical variable (like R's factor), + you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or + ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, + see the :ref:`Categorical introduction ` and the + :ref:`API documentation `. + +Examples +-------- + +In this section, we will review frequently asked questions and examples. The +column names and relevant column values are named to correspond with how this +DataFrame will be pivoted in the answers below. + +.. ipython:: python + + np.random.seed([3, 1415]) + n = 20 + + cols = np.array(["key", "row", "item", "col"]) + df = cols + pd.DataFrame( + (np.random.randint(5, size=(n, 4)) // [2, 1, 2, 1]).astype(str) + ) + df.columns = cols + df = df.join(pd.DataFrame(np.random.rand(n, 2).round(2)).add_prefix("val")) + + df + +Pivoting with single aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Suppose we wanted to pivot ``df`` such that the ``col`` values are columns, +``row`` values are the index, and the mean of ``val0`` are the values? In +particular, the resulting DataFrame should look like: + +.. code-block:: text + + col col0 col1 col2 col3 col4 + row + row0 0.77 0.605 NaN 0.860 0.65 + row2 0.13 NaN 0.395 0.500 0.25 + row3 NaN 0.310 NaN 0.545 NaN + row4 NaN 0.100 0.395 0.760 0.24 + +This solution uses :func:`~pandas.pivot_table`. Also note that +``aggfunc='mean'`` is the default. It is included here to be explicit. + +.. ipython:: python + + df.pivot_table(values="val0", index="row", columns="col", aggfunc="mean") + +Note that we can also replace the missing values by using the ``fill_value`` +parameter. + +.. ipython:: python + + df.pivot_table( + values="val0", + index="row", + columns="col", + aggfunc="mean", + fill_value=0, + ) + +Also note that we can pass in other aggregation functions as well. For example, +we can also pass in ``sum``. + +.. ipython:: python + + df.pivot_table( + values="val0", + index="row", + columns="col", + aggfunc="sum", + fill_value=0, + ) + +Another aggregation we can do is calculate the frequency in which the columns +and rows occur together a.k.a. "cross tabulation". To do this, we can pass +``size`` to the ``aggfunc`` parameter. + +.. ipython:: python + + df.pivot_table(index="row", columns="col", fill_value=0, aggfunc="size") + +Pivoting with multiple aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We can also perform multiple aggregations. For example, to perform both a +``sum`` and ``mean``, we can pass in a list to the ``aggfunc`` argument. + +.. ipython:: python + + df.pivot_table( + values="val0", + index="row", + columns="col", + aggfunc=["mean", "sum"], + ) + +Note to aggregate over multiple value columns, we can pass in a list to the +``values`` parameter. + +.. ipython:: python + + df.pivot_table( + values=["val0", "val1"], + index="row", + columns="col", + aggfunc=["mean"], + ) + +Note to subdivide over multiple columns we can pass in a list to the +``columns`` parameter. + +.. ipython:: python + + df.pivot_table( + values=["val0"], + index="row", + columns=["item", "col"], + aggfunc=["mean"], + ) + +.. _reshaping.explode: + +Exploding a list-like column +---------------------------- + +.. versionadded:: 0.25.0 + +Sometimes the values in a column are list-like. + +.. ipython:: python + + keys = ["panda1", "panda2", "panda3"] + values = [["eats", "shoots"], ["shoots", "leaves"], ["eats", "leaves"]] + df = pd.DataFrame({"keys": keys, "values": values}) + df + +We can 'explode' the ``values`` column, transforming each list-like to a separate row, by using :meth:`~Series.explode`. This will replicate the index values from the original row: + +.. ipython:: python + + df["values"].explode() + +You can also explode the column in the :class:`DataFrame`. + +.. ipython:: python + + df.explode("values") + +:meth:`Series.explode` will replace empty lists with ``np.nan`` and preserve scalar entries. The dtype of the resulting :class:`Series` is always ``object``. + +.. ipython:: python + + s = pd.Series([[1, 2, 3], "foo", [], ["a", "b"]]) + s + s.explode() + +Here is a typical usecase. You have comma separated strings in a column and want to expand this. + +.. ipython:: python + + df = pd.DataFrame([{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}]) + df + +Creating a long form DataFrame is now straightforward using explode and chained operations + +.. ipython:: python + + df.assign(var1=df.var1.str.split(",")).explode("var1") diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst new file mode 100644 index 00000000..129f43dd --- /dev/null +++ b/doc/source/user_guide/scale.rst @@ -0,0 +1,396 @@ +.. _scale: + +************************* +Scaling to large datasets +************************* + +pandas provides data structures for in-memory analytics, which makes using pandas +to analyze datasets that are larger than memory datasets somewhat tricky. Even datasets +that are a sizable fraction of memory become unwieldy, as some pandas operations need +to make intermediate copies. + +This document provides a few recommendations for scaling your analysis to larger datasets. +It's a complement to :ref:`enhancingperf`, which focuses on speeding up analysis +for datasets that fit in memory. + +But first, it's worth considering *not using pandas*. pandas isn't the right +tool for all situations. If you're working with very large datasets and a tool +like PostgreSQL fits your needs, then you should probably be using that. +Assuming you want or need the expressiveness and power of pandas, let's carry on. + +Load less data +-------------- + +Suppose our raw dataset on disk has many columns:: + + id_0 name_0 x_0 y_0 id_1 name_1 x_1 ... name_8 x_8 y_8 id_9 name_9 x_9 y_9 + timestamp ... + 2000-01-01 00:00:00 1015 Michael -0.399453 0.095427 994 Frank -0.176842 ... Dan -0.315310 0.713892 1025 Victor -0.135779 0.346801 + 2000-01-01 00:01:00 969 Patricia 0.650773 -0.874275 1003 Laura 0.459153 ... Ursula 0.913244 -0.630308 1047 Wendy -0.886285 0.035852 + 2000-01-01 00:02:00 1016 Victor -0.721465 -0.584710 1046 Michael 0.524994 ... Ray -0.656593 0.692568 1064 Yvonne 0.070426 0.432047 + 2000-01-01 00:03:00 939 Alice -0.746004 -0.908008 996 Ingrid -0.414523 ... Jerry -0.958994 0.608210 978 Wendy 0.855949 -0.648988 + 2000-01-01 00:04:00 1017 Dan 0.919451 -0.803504 1048 Jerry -0.569235 ... Frank -0.577022 -0.409088 994 Bob -0.270132 0.335176 + ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + 2000-12-30 23:56:00 999 Tim 0.162578 0.512817 973 Kevin -0.403352 ... Tim -0.380415 0.008097 1041 Charlie 0.191477 -0.599519 + 2000-12-30 23:57:00 970 Laura -0.433586 -0.600289 958 Oliver -0.966577 ... Zelda 0.971274 0.402032 1038 Ursula 0.574016 -0.930992 + 2000-12-30 23:58:00 1065 Edith 0.232211 -0.454540 971 Tim 0.158484 ... Alice -0.222079 -0.919274 1022 Dan 0.031345 -0.657755 + 2000-12-30 23:59:00 1019 Ingrid 0.322208 -0.615974 981 Hannah 0.607517 ... Sarah -0.424440 -0.117274 990 George -0.375530 0.563312 + 2000-12-31 00:00:00 937 Ursula -0.906523 0.943178 1018 Alice -0.564513 ... Jerry 0.236837 0.807650 985 Oliver 0.777642 0.783392 + + [525601 rows x 40 columns] + +That can be generated by the following code snippet: + +.. ipython:: python + + import pandas as pd + import numpy as np + + def make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None): + index = pd.date_range(start=start, end=end, freq=freq, name="timestamp") + n = len(index) + state = np.random.RandomState(seed) + columns = { + "name": state.choice(["Alice", "Bob", "Charlie"], size=n), + "id": state.poisson(1000, size=n), + "x": state.rand(n) * 2 - 1, + "y": state.rand(n) * 2 - 1, + } + df = pd.DataFrame(columns, index=index, columns=sorted(columns)) + if df.index[-1] == end: + df = df.iloc[:-1] + return df + + timeseries = [ + make_timeseries(freq="1T", seed=i).rename(columns=lambda x: f"{x}_{i}") + for i in range(10) + ] + ts_wide = pd.concat(timeseries, axis=1) + ts_wide.to_parquet("timeseries_wide.parquet") + +To load the columns we want, we have two options. +Option 1 loads in all the data and then filters to what we need. + +.. ipython:: python + + columns = ["id_0", "name_0", "x_0", "y_0"] + + pd.read_parquet("timeseries_wide.parquet")[columns] + +Option 2 only loads the columns we request. + +.. ipython:: python + + pd.read_parquet("timeseries_wide.parquet", columns=columns) + +.. ipython:: python + :suppress: + + import os + + os.remove("timeseries_wide.parquet") + +If we were to measure the memory usage of the two calls, we'd see that specifying +``columns`` uses about 1/10th the memory in this case. + +With :func:`pandas.read_csv`, you can specify ``usecols`` to limit the columns +read into memory. Not all file formats that can be read by pandas provide an option +to read a subset of columns. + +Use efficient datatypes +----------------------- + +The default pandas data types are not the most memory efficient. This is +especially true for text data columns with relatively few unique values (commonly +referred to as "low-cardinality" data). By using more efficient data types, you +can store larger datasets in memory. + +.. ipython:: python + + ts = make_timeseries(freq="30S", seed=0) + ts.to_parquet("timeseries.parquet") + ts = pd.read_parquet("timeseries.parquet") + ts + +.. ipython:: python + :suppress: + + os.remove("timeseries.parquet") + +Now, let's inspect the data types and memory usage to see where we should focus our +attention. + +.. ipython:: python + + ts.dtypes + +.. ipython:: python + + ts.memory_usage(deep=True) # memory usage in bytes + + +The ``name`` column is taking up much more memory than any other. It has just a +few unique values, so it's a good candidate for converting to a +:class:`pandas.Categorical`. With a :class:`pandas.Categorical`, we store each unique name once and use +space-efficient integers to know which specific name is used in each row. + + +.. ipython:: python + + ts2 = ts.copy() + ts2["name"] = ts2["name"].astype("category") + ts2.memory_usage(deep=True) + +We can go a bit further and downcast the numeric columns to their smallest types +using :func:`pandas.to_numeric`. + +.. ipython:: python + + ts2["id"] = pd.to_numeric(ts2["id"], downcast="unsigned") + ts2[["x", "y"]] = ts2[["x", "y"]].apply(pd.to_numeric, downcast="float") + ts2.dtypes + +.. ipython:: python + + ts2.memory_usage(deep=True) + +.. ipython:: python + + reduction = ts2.memory_usage(deep=True).sum() / ts.memory_usage(deep=True).sum() + print(f"{reduction:0.2f}") + +In all, we've reduced the in-memory footprint of this dataset to 1/5 of its +original size. + +See :ref:`categorical` for more on :class:`pandas.Categorical` and :ref:`basics.dtypes` +for an overview of all of pandas' dtypes. + +Use chunking +------------ + +Some workloads can be achieved with chunking: splitting a large problem like "convert this +directory of CSVs to parquet" into a bunch of small problems ("convert this individual CSV +file into a Parquet file. Now repeat that for each file in this directory."). As long as each chunk +fits in memory, you can work with datasets that are much larger than memory. + +.. note:: + + Chunking works well when the operation you're performing requires zero or minimal + coordination between chunks. For more complicated workflows, you're better off + :ref:`using another library `. + +Suppose we have an even larger "logical dataset" on disk that's a directory of parquet +files. Each file in the directory represents a different year of the entire dataset. + +.. ipython:: python + + import pathlib + + N = 12 + starts = [f"20{i:>02d}-01-01" for i in range(N)] + ends = [f"20{i:>02d}-12-13" for i in range(N)] + + pathlib.Path("data/timeseries").mkdir(exist_ok=True) + + for i, (start, end) in enumerate(zip(starts, ends)): + ts = make_timeseries(start=start, end=end, freq="1T", seed=i) + ts.to_parquet(f"data/timeseries/ts-{i:0>2d}.parquet") + + +:: + + data + └── timeseries + ├── ts-00.parquet + ├── ts-01.parquet + ├── ts-02.parquet + ├── ts-03.parquet + ├── ts-04.parquet + ├── ts-05.parquet + ├── ts-06.parquet + ├── ts-07.parquet + ├── ts-08.parquet + ├── ts-09.parquet + ├── ts-10.parquet + └── ts-11.parquet + +Now we'll implement an out-of-core :meth:`pandas.Series.value_counts`. The peak memory usage of this +workflow is the single largest chunk, plus a small series storing the unique value +counts up to this point. As long as each individual file fits in memory, this will +work for arbitrary-sized datasets. + +.. ipython:: python + + %%time + files = pathlib.Path("data/timeseries/").glob("ts*.parquet") + counts = pd.Series(dtype=int) + for path in files: + df = pd.read_parquet(path) + counts = counts.add(df["name"].value_counts(), fill_value=0) + counts.astype(int) + +Some readers, like :meth:`pandas.read_csv`, offer parameters to control the +``chunksize`` when reading a single file. + +Manually chunking is an OK option for workflows that don't +require too sophisticated of operations. Some operations, like :meth:`pandas.DataFrame.groupby`, are +much harder to do chunkwise. In these cases, you may be better switching to a +different library that implements these out-of-core algorithms for you. + +.. _scale.other_libraries: + +Use other libraries +------------------- + +pandas is just one library offering a DataFrame API. Because of its popularity, +pandas' API has become something of a standard that other libraries implement. +The pandas documentation maintains a list of libraries implementing a DataFrame API +in :ref:`our ecosystem page `. + +For example, `Dask`_, a parallel computing library, has `dask.dataframe`_, a +pandas-like API for working with larger than memory datasets in parallel. Dask +can use multiple threads or processes on a single machine, or a cluster of +machines to process data in parallel. + + +We'll import ``dask.dataframe`` and notice that the API feels similar to pandas. +We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in. + +.. ipython:: python + :okwarning: + + import dask.dataframe as dd + + ddf = dd.read_parquet("data/timeseries/ts*.parquet", engine="pyarrow") + ddf + +Inspecting the ``ddf`` object, we see a few things + +* There are familiar attributes like ``.columns`` and ``.dtypes`` +* There are familiar methods like ``.groupby``, ``.sum``, etc. +* There are new attributes like ``.npartitions`` and ``.divisions`` + +The partitions and divisions are how Dask parallelizes computation. A **Dask** +DataFrame is made up of many pandas :class:`pandas.DataFrame`. A single method call on a +Dask DataFrame ends up making many pandas method calls, and Dask knows how to +coordinate everything to get the result. + +.. ipython:: python + + ddf.columns + ddf.dtypes + ddf.npartitions + +One major difference: the ``dask.dataframe`` API is *lazy*. If you look at the +repr above, you'll notice that the values aren't actually printed out; just the +column names and dtypes. That's because Dask hasn't actually read the data yet. +Rather than executing immediately, doing operations build up a **task graph**. + +.. ipython:: python + :okwarning: + + ddf + ddf["name"] + ddf["name"].value_counts() + +Each of these calls is instant because the result isn't being computed yet. +We're just building up a list of computation to do when someone needs the +result. Dask knows that the return type of a :class:`pandas.Series.value_counts` +is a pandas :class:`pandas.Series` with a certain dtype and a certain name. So the Dask version +returns a Dask Series with the same dtype and the same name. + +To get the actual result you can call ``.compute()``. + +.. ipython:: python + + %time ddf["name"].value_counts().compute() + +At that point, you get back the same thing you'd get with pandas, in this case +a concrete pandas :class:`pandas.Series` with the count of each ``name``. + +Calling ``.compute`` causes the full task graph to be executed. This includes +reading the data, selecting the columns, and doing the ``value_counts``. The +execution is done *in parallel* where possible, and Dask tries to keep the +overall memory footprint small. You can work with datasets that are much larger +than memory, as long as each partition (a regular pandas :class:`pandas.DataFrame`) fits in memory. + +By default, ``dask.dataframe`` operations use a threadpool to do operations in +parallel. We can also connect to a cluster to distribute the work on many +machines. In this case we'll connect to a local "cluster" made up of several +processes on this single machine. + +.. code-block:: python + + >>> from dask.distributed import Client, LocalCluster + + >>> cluster = LocalCluster() + >>> client = Client(cluster) + >>> client + + +Once this ``client`` is created, all of Dask's computation will take place on +the cluster (which is just processes in this case). + +Dask implements the most used parts of the pandas API. For example, we can do +a familiar groupby aggregation. + +.. ipython:: python + + %time ddf.groupby("name")[["x", "y"]].mean().compute().head() + +The grouping and aggregation is done out-of-core and in parallel. + +When Dask knows the ``divisions`` of a dataset, certain optimizations are +possible. When reading parquet datasets written by dask, the divisions will be +known automatically. In this case, since we created the parquet files manually, +we need to supply the divisions manually. + +.. ipython:: python + :okwarning: + + N = 12 + starts = [f"20{i:>02d}-01-01" for i in range(N)] + ends = [f"20{i:>02d}-12-13" for i in range(N)] + + divisions = tuple(pd.to_datetime(starts)) + (pd.Timestamp(ends[-1]),) + ddf.divisions = divisions + ddf + +Now we can do things like fast random access with ``.loc``. + +.. ipython:: python + :okwarning: + + ddf.loc["2002-01-01 12:01":"2002-01-01 12:05"].compute() + +Dask knows to just look in the 3rd partition for selecting values in 2002. It +doesn't need to look at any other data. + +Many workflows involve a large amount of data and processing it in a way that +reduces the size to something that fits in memory. In this case, we'll resample +to daily frequency and take the mean. Once we've taken the mean, we know the +results will fit in memory, so we can safely call ``compute`` without running +out of memory. At that point it's just a regular pandas object. + +.. ipython:: python + :okwarning: + + @savefig dask_resample.png + ddf[["x", "y"]].resample("1D").mean().cumsum().compute().plot() + +.. ipython:: python + :suppress: + + import shutil + + shutil.rmtree("data/timeseries") + +These Dask examples have all be done using multiple processes on a single +machine. Dask can be `deployed on a cluster +`_ to scale up to even larger +datasets. + +You see more dask examples at https://examples.dask.org. + +.. _Dask: https://dask.org +.. _dask.dataframe: https://docs.dask.org/en/latest/dataframe.html diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst new file mode 100644 index 00000000..bc4eec1c --- /dev/null +++ b/doc/source/user_guide/sparse.rst @@ -0,0 +1,368 @@ +.. _sparse: + +{{ header }} + +********************** +Sparse data structures +********************** + +pandas provides data structures for efficiently storing sparse data. +These are not necessarily sparse in the typical "mostly 0". Rather, you can view these +objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value +can be chosen, including 0) is omitted. The compressed values are not actually stored in the array. + +.. ipython:: python + + arr = np.random.randn(10) + arr[2:-2] = np.nan + ts = pd.Series(pd.arrays.SparseArray(arr)) + ts + +Notice the dtype, ``Sparse[float64, nan]``. The ``nan`` means that elements in the +array that are ``nan`` aren't actually stored, only the non-``nan`` elements are. +Those non-``nan`` elements have a ``float64`` dtype. + +The sparse objects exist for memory efficiency reasons. Suppose you had a +large, mostly NA :class:`DataFrame`: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10000, 4)) + df.iloc[:9998] = np.nan + sdf = df.astype(pd.SparseDtype("float", np.nan)) + sdf.head() + sdf.dtypes + sdf.sparse.density + +As you can see, the density (% of values that have not been "compressed") is +extremely low. This sparse object takes up much less memory on disk (pickled) +and in the Python interpreter. + +.. ipython:: python + + 'dense : {:0.2f} bytes'.format(df.memory_usage().sum() / 1e3) + 'sparse: {:0.2f} bytes'.format(sdf.memory_usage().sum() / 1e3) + +Functionally, their behavior should be nearly +identical to their dense counterparts. + +.. _sparse.array: + +SparseArray +----------- + +:class:`arrays.SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray` +for storing an array of sparse values (see :ref:`basics.dtypes` for more +on extension arrays). It is a 1-dimensional ndarray-like object storing +only values distinct from the ``fill_value``: + +.. ipython:: python + + arr = np.random.randn(10) + arr[2:5] = np.nan + arr[7:8] = np.nan + sparr = pd.arrays.SparseArray(arr) + sparr + +A sparse array can be converted to a regular (dense) ndarray with :meth:`numpy.asarray` + +.. ipython:: python + + np.asarray(sparr) + + +.. _sparse.dtype: + +SparseDtype +----------- + +The :attr:`SparseArray.dtype` property stores two pieces of information + +1. The dtype of the non-sparse values +2. The scalar fill value + + +.. ipython:: python + + sparr.dtype + + +A :class:`SparseDtype` may be constructed by passing only a dtype + +.. ipython:: python + + pd.SparseDtype(np.dtype('datetime64[ns]')) + +in which case a default fill value will be used (for NumPy dtypes this is often the +"missing" value for that dtype). To override this default an explicit fill value may be +passed instead + +.. ipython:: python + + pd.SparseDtype(np.dtype('datetime64[ns]'), + fill_value=pd.Timestamp('2017-01-01')) + +Finally, the string alias ``'Sparse[dtype]'`` may be used to specify a sparse dtype +in many places + +.. ipython:: python + + pd.array([1, 0, 0, 2], dtype='Sparse[int]') + +.. _sparse.accessor: + +Sparse accessor +--------------- + +pandas provides a ``.sparse`` accessor, similar to ``.str`` for string data, ``.cat`` +for categorical data, and ``.dt`` for datetime-like data. This namespace provides +attributes and methods that are specific to sparse data. + +.. ipython:: python + + s = pd.Series([0, 0, 1, 2], dtype="Sparse[int]") + s.sparse.density + s.sparse.fill_value + +This accessor is available only on data with ``SparseDtype``, and on the :class:`Series` +class itself for creating a Series with sparse data from a scipy COO matrix with. + + +.. versionadded:: 0.25.0 + +A ``.sparse`` accessor has been added for :class:`DataFrame` as well. +See :ref:`api.frame.sparse` for more. + +.. _sparse.calculation: + +Sparse calculation +------------------ + +You can apply NumPy `ufuncs `_ +to :class:`arrays.SparseArray` and get a :class:`arrays.SparseArray` as a result. + +.. ipython:: python + + arr = pd.arrays.SparseArray([1., np.nan, np.nan, -2., np.nan]) + np.abs(arr) + + +The *ufunc* is also applied to ``fill_value``. This is needed to get +the correct dense result. + +.. ipython:: python + + arr = pd.arrays.SparseArray([1., -1, -1, -2., -1], fill_value=-1) + np.abs(arr) + np.abs(arr).to_dense() + +.. _sparse.migration: + +Migrating +--------- + +.. note:: + + ``SparseSeries`` and ``SparseDataFrame`` were removed in pandas 1.0.0. This migration + guide is present to aid in migrating from previous versions. + +In older versions of pandas, the ``SparseSeries`` and ``SparseDataFrame`` classes (documented below) +were the preferred way to work with sparse data. With the advent of extension arrays, these subclasses +are no longer needed. Their purpose is better served by using a regular Series or DataFrame with +sparse values instead. + +.. note:: + + There's no performance or memory penalty to using a Series or DataFrame with sparse values, + rather than a SparseSeries or SparseDataFrame. + +This section provides some guidance on migrating your code to the new style. As a reminder, +you can use the Python warnings module to control warnings. But we recommend modifying +your code, rather than ignoring the warning. + +**Construction** + +From an array-like, use the regular :class:`Series` or +:class:`DataFrame` constructors with :class:`arrays.SparseArray` values. + +.. code-block:: python + + # Previous way + >>> pd.SparseDataFrame({"A": [0, 1]}) + +.. ipython:: python + + # New way + pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) + +From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`, + +.. code-block:: python + + # Previous way + >>> from scipy import sparse + >>> mat = sparse.eye(3) + >>> df = pd.SparseDataFrame(mat, columns=['A', 'B', 'C']) + +.. ipython:: python + + # New way + from scipy import sparse + mat = sparse.eye(3) + df = pd.DataFrame.sparse.from_spmatrix(mat, columns=['A', 'B', 'C']) + df.dtypes + +**Conversion** + +From sparse to dense, use the ``.sparse`` accessors + +.. ipython:: python + + df.sparse.to_dense() + df.sparse.to_coo() + +From dense to sparse, use :meth:`DataFrame.astype` with a :class:`SparseDtype`. + +.. ipython:: python + + dense = pd.DataFrame({"A": [1, 0, 0, 1]}) + dtype = pd.SparseDtype(int, fill_value=0) + dense.astype(dtype) + +**Sparse Properties** + +Sparse-specific properties, like ``density``, are available on the ``.sparse`` accessor. + +.. ipython:: python + + df.sparse.density + +**General differences** + +In a ``SparseDataFrame``, *all* columns were sparse. A :class:`DataFrame` can have a mixture of +sparse and dense columns. As a consequence, assigning new columns to a :class:`DataFrame` with sparse +values will not automatically convert the input to be sparse. + +.. code-block:: python + + # Previous Way + >>> df = pd.SparseDataFrame({"A": [0, 1]}) + >>> df['B'] = [0, 0] # implicitly becomes Sparse + >>> df['B'].dtype + Sparse[int64, nan] + +Instead, you'll need to ensure that the values being assigned are sparse + +.. ipython:: python + + df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) + df['B'] = [0, 0] # remains dense + df['B'].dtype + df['B'] = pd.arrays.SparseArray([0, 0]) + df['B'].dtype + +The ``SparseDataFrame.default_kind`` and ``SparseDataFrame.default_fill_value`` attributes +have no replacement. + +.. _sparse.scipysparse: + +Interaction with *scipy.sparse* +------------------------------- + +Use :meth:`DataFrame.sparse.from_spmatrix` to create a :class:`DataFrame` with sparse values from a sparse matrix. + +.. versionadded:: 0.25.0 + +.. ipython:: python + + from scipy.sparse import csr_matrix + + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + + sp_arr = csr_matrix(arr) + sp_arr + + sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr) + sdf.head() + sdf.dtypes + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. +To convert back to sparse SciPy matrix in COO format, you can use the :meth:`DataFrame.sparse.to_coo` method: + +.. ipython:: python + + sdf.sparse.to_coo() + +:meth:`Series.sparse.to_coo` is implemented for transforming a :class:`Series` with sparse values indexed by a :class:`MultiIndex` to a :class:`scipy.sparse.coo_matrix`. + +The method requires a :class:`MultiIndex` with two or more levels. + +.. ipython:: python + + s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + s.index = pd.MultiIndex.from_tuples( + [ + (1, 2, "a", 0), + (1, 2, "a", 1), + (1, 1, "b", 0), + (1, 1, "b", 1), + (2, 1, "b", 0), + (2, 1, "b", 1), + ], + names=["A", "B", "C", "D"], + ) + ss = s.astype('Sparse') + ss + +In the example below, we transform the :class:`Series` to a sparse representation of a 2-d array by specifying that the first and second ``MultiIndex`` levels define labels for the rows and the third and fourth levels define labels for the columns. We also specify that the column and row labels should be sorted in the final sparse representation. + +.. ipython:: python + + A, rows, columns = ss.sparse.to_coo( + row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ) + + A + A.todense() + rows + columns + +Specifying different row and column labels (and not sorting them) yields a different sparse matrix: + +.. ipython:: python + + A, rows, columns = ss.sparse.to_coo( + row_levels=["A", "B", "C"], column_levels=["D"], sort_labels=False + ) + + A + A.todense() + rows + columns + +A convenience method :meth:`Series.sparse.from_coo` is implemented for creating a :class:`Series` with sparse values from a ``scipy.sparse.coo_matrix``. + +.. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)) + A + A.todense() + +The default behaviour (with ``dense_index=False``) simply returns a :class:`Series` containing +only the non-null entries. + +.. ipython:: python + + ss = pd.Series.sparse.from_coo(A) + ss + +Specifying ``dense_index=True`` will result in an index that is the Cartesian product of the +row and columns coordinates of the matrix. Note that this will consume a significant amount of memory +(relative to ``dense_index=False``) if the sparse matrix is large (and sparse) enough. + +.. ipython:: python + + ss_dense = pd.Series.sparse.from_coo(A, dense_index=True) + ss_dense diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb new file mode 100644 index 00000000..620e3806 --- /dev/null +++ b/doc/source/user_guide/style.ipynb @@ -0,0 +1,2049 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Table Visualization\n", + "\n", + "This section demonstrates visualization of tabular data using the [Styler][styler]\n", + "class. For information on visualization with charting please see [Chart Visualization][viz]. This document is written as a Jupyter Notebook, and can be viewed or downloaded [here][download].\n", + "\n", + "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n", + "[viz]: visualization.rst\n", + "[download]: https://nbviewer.ipython.org/github/pandas-dev/pandas/blob/main/doc/source/user_guide/style.ipynb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Styler Object and HTML \n", + "\n", + "Styling should be performed after the data in a DataFrame has been processed. The [Styler][styler] creates an HTML `` and leverages CSS styling language to manipulate many parameters including colors, fonts, borders, background, etc. See [here][w3schools] for more information on styling HTML tables. This allows a lot of flexibility out of the box, and even enables web developers to integrate DataFrames into their exiting user interface designs.\n", + " \n", + "The `DataFrame.style` attribute is a property that returns a [Styler][styler] object. It has a `_repr_html_` method defined on it so they are rendered automatically in Jupyter Notebook.\n", + "\n", + "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n", + "[w3schools]: https://www.w3schools.com/html/html_tables.asp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot\n", + "# We have this here to trigger matplotlib's font cache stuff.\n", + "# This cell is hidden from the output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib as mpl\n", + "\n", + "df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]], \n", + " index=pd.Index(['Tumour (Positive)', 'Non-Tumour (Negative)'], name='Actual Label:'), \n", + " columns=pd.MultiIndex.from_product([['Decision Tree', 'Regression', 'Random'],['Tumour', 'Non-Tumour']], names=['Model:', 'Predicted:']))\n", + "df.style" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above output looks very similar to the standard DataFrame HTML representation. But the HTML here has already attached some CSS classes to each cell, even if we haven't yet created any styles. We can view these by calling the [.to_html()][tohtml] method, which returns the raw HTML as string, which is useful for further processing or adding to a file - read on in [More about CSS and HTML](#More-About-CSS-and-HTML). Below we will show how we can use these to format the DataFrame to be more communicative. For example how we can build `s`:\n", + "\n", + "[tohtml]: ../reference/api/pandas.io.formats.style.Styler.to_html.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to just create the below example: code is covered throughout the guide.\n", + "s = df.style\\\n", + " .hide_columns([('Random', 'Tumour'), ('Random', 'Non-Tumour')])\\\n", + " .format('{:.0f}')\\\n", + " .set_table_styles([{\n", + " 'selector': '',\n", + " 'props': 'border-collapse: separate;'\n", + " },{\n", + " 'selector': 'caption',\n", + " 'props': 'caption-side: bottom; font-size:1.3em;'\n", + " },{\n", + " 'selector': '.index_name',\n", + " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", + " },{\n", + " 'selector': 'th:not(.index_name)',\n", + " 'props': 'background-color: #000066; color: white;'\n", + " },{\n", + " 'selector': 'th.col_heading',\n", + " 'props': 'text-align: center;'\n", + " },{\n", + " 'selector': 'th.col_heading.level0',\n", + " 'props': 'font-size: 1.5em;'\n", + " },{\n", + " 'selector': 'th.col2',\n", + " 'props': 'border-left: 1px solid white;'\n", + " },{\n", + " 'selector': '.col2',\n", + " 'props': 'border-left: 1px solid #000066;'\n", + " },{\n", + " 'selector': 'td',\n", + " 'props': 'text-align: center; font-weight:bold;'\n", + " },{\n", + " 'selector': '.true',\n", + " 'props': 'background-color: #e6ffe6;'\n", + " },{\n", + " 'selector': '.false',\n", + " 'props': 'background-color: #ffe6e6;'\n", + " },{\n", + " 'selector': '.border-red',\n", + " 'props': 'border: 2px dashed red;'\n", + " },{\n", + " 'selector': '.border-green',\n", + " 'props': 'border: 2px dashed green;'\n", + " },{\n", + " 'selector': 'td:hover',\n", + " 'props': 'background-color: #ffffb3;'\n", + " }])\\\n", + " .set_td_classes(pd.DataFrame([['true border-green', 'false', 'true', 'false border-red', '', ''],\n", + " ['false', 'true', 'false', 'true', '', '']], \n", + " index=df.index, columns=df.columns))\\\n", + " .set_caption(\"Confusion matrix for multiple cancer prediction models.\")\\\n", + " .set_tooltips(pd.DataFrame([['This model has a very strong true positive rate', '', '', \"This model's total number of false negatives is too high\", '', ''],\n", + " ['', '', '', '', '', '']], \n", + " index=df.index, columns=df.columns),\n", + " css_class='pd-tt', props=\n", + " 'visibility: hidden; position: absolute; z-index: 1; border: 1px solid #000066;'\n", + " 'background-color: white; color: #000066; font-size: 0.8em;' \n", + " 'transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Formatting the Display\n", + "\n", + "### Formatting Values\n", + "\n", + "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value, in both datavalues and index or columns headers. To control the display value, the text is printed in each cell as string, and we can use the [.format()][formatfunc] and [.format_index()][formatfuncindex] methods to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table, or index, or for individual columns, or MultiIndex levels. \n", + "\n", + "Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' `styler.format.precision` option, controllable using `with pd.option_context('format.precision', 2):` \n", + "\n", + "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n", + "[format]: https://docs.python.org/3/library/string.html#format-specification-mini-language\n", + "[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst\n", + "[formatfuncindex]: ../reference/api/pandas.io.formats.style.Styler.format_index.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.format(precision=0, na_rep='MISSING', thousands=\" \",\n", + " formatter={('Decision Tree', 'Tumour'): \"{:.2f}\",\n", + " ('Regression', 'Non-Tumour'): lambda x: \"$ {:,.1f}\".format(x*-1e6)\n", + " })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using Styler to manipulate the display is a useful feature because maintaining the indexing and datavalues for other purposes gives greater control. You do not have to overwrite your DataFrame to display it how you like. Here is an example of using the formatting functions whilst still relying on the underlying data for indexing and calculations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n", + " index=pd.date_range(start=\"2021-01-01\", periods=10),\n", + " columns=[\"Tokyo\", \"Beijing\"])\n", + "\n", + "def rain_condition(v): \n", + " if v < 1.75:\n", + " return \"Dry\"\n", + " elif v < 2.75:\n", + " return \"Rain\"\n", + " return \"Heavy Rain\"\n", + "\n", + "def make_pretty(styler):\n", + " styler.set_caption(\"Weather Conditions\")\n", + " styler.format(rain_condition)\n", + " styler.format_index(lambda v: v.strftime(\"%A\"))\n", + " styler.background_gradient(axis=None, vmin=1, vmax=5, cmap=\"YlGnBu\")\n", + " return styler\n", + "\n", + "weather_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weather_df.loc[\"2021-01-04\":\"2021-01-08\"].style.pipe(make_pretty)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hiding Data\n", + "\n", + "The index and column headers can be completely hidden, as well subselecting rows or columns that one wishes to exclude. Both these options are performed using the same methods.\n", + "\n", + "The index can be hidden from rendering by calling [.hide()][hideidx] without any arguments, which might be useful if your index is integer based. Similarly column headers can be hidden by calling [.hide(axis=\"columns\")][hideidx] without any further arguments.\n", + "\n", + "Specific rows or columns can be hidden from rendering by calling the same [.hide()][hideidx] method and passing in a row/column label, a list-like or a slice of row/column labels to for the ``subset`` argument.\n", + "\n", + "Hiding does not change the integer arrangement of CSS classes, e.g. hiding the first two columns of a DataFrame means the column class indexing will still start at `col2`, since `col0` and `col1` are simply ignored.\n", + "\n", + "We can update our `Styler` object from before to hide some data and format the values.\n", + "\n", + "[hideidx]: ../reference/api/pandas.io.formats.style.Styler.hide.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s = df.style.format('{:.0f}').hide([('Random', 'Tumour'), ('Random', 'Non-Tumour')], axis=\"columns\")\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_hide')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Methods to Add Styles\n", + "\n", + "There are **3 primary methods of adding custom CSS styles** to [Styler][styler]:\n", + "\n", + "- Using [.set_table_styles()][table] to control broader areas of the table with specified internal CSS. Although table styles allow the flexibility to add CSS selectors and properties controlling all individual parts of the table, they are unwieldy for individual cell specifications. Also, note that table styles cannot be exported to Excel. \n", + "- Using [.set_td_classes()][td_class] to directly link either external CSS classes to your data cells or link the internal CSS classes created by [.set_table_styles()][table]. See [here](#Setting-Classes-and-Linking-to-External-CSS). These cannot be used on column header rows or indexes, and also won't export to Excel. \n", + "- Using the [.apply()][apply] and [.applymap()][applymap] functions to add direct internal CSS to specific data cells. See [here](#Styler-Functions). As of v1.4.0 there are also methods that work directly on column header rows or indexes; [.apply_index()][applyindex] and [.applymap_index()][applymapindex]. Note that only these methods add styles that will export to Excel. These methods work in a similar way to [DataFrame.apply()][dfapply] and [DataFrame.applymap()][dfapplymap].\n", + "\n", + "[table]: ../reference/api/pandas.io.formats.style.Styler.set_table_styles.rst\n", + "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n", + "[td_class]: ../reference/api/pandas.io.formats.style.Styler.set_td_classes.rst\n", + "[apply]: ../reference/api/pandas.io.formats.style.Styler.apply.rst\n", + "[applymap]: ../reference/api/pandas.io.formats.style.Styler.applymap.rst\n", + "[applyindex]: ../reference/api/pandas.io.formats.style.Styler.apply_index.rst\n", + "[applymapindex]: ../reference/api/pandas.io.formats.style.Styler.applymap_index.rst\n", + "[dfapply]: ../reference/api/pandas.DataFrame.apply.rst\n", + "[dfapplymap]: ../reference/api/pandas.DataFrame.applymap.rst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Table Styles\n", + "\n", + "Table styles are flexible enough to control all individual parts of the table, including column headers and indexes. \n", + "However, they can be unwieldy to type for individual data cells or for any kind of conditional formatting, so we recommend that table styles are used for broad styling, such as entire rows or columns at a time.\n", + "\n", + "Table styles are also used to control features which can apply to the whole table at once such as creating a generic hover functionality. The `:hover` pseudo-selector, as well as other pseudo-selectors, can only be used this way.\n", + "\n", + "To replicate the normal format of CSS selectors and properties (attribute value pairs), e.g. \n", + "\n", + "```\n", + "tr:hover {\n", + " background-color: #ffff99;\n", + "}\n", + "```\n", + "\n", + "the necessary format to pass styles to [.set_table_styles()][table] is as a list of dicts, each with a CSS-selector tag and CSS-properties. Properties can either be a list of 2-tuples, or a regular CSS-string, for example:\n", + "\n", + "[table]: ../reference/api/pandas.io.formats.style.Styler.set_table_styles.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cell_hover = { # for row hover use instead of `` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) +- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) +- :func:`read_csv()` and :func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) +- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) +- Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`) +- Bug in :func:`read_csv()` in unnamed columns were being improperly identified when extracting a multi-index (:issue:`23687`) +- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) +- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) +- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) +- Bug in :func:`read_sas()` in which an incorrect error was raised on an invalid file format. (:issue:`24548`) +- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) +- Bug in :func:`DataFrame.to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) +- Bug in :func:`DataFrame.to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`) +- Bug in :func:`DataFrame.to_html()` with ``index_names=False`` displaying index name (:issue:`22747`) +- Bug in :func:`DataFrame.to_html()` with ``header=False`` not displaying row index names (:issue:`23788`) +- Bug in :func:`DataFrame.to_html()` with ``sparsify=False`` that caused it to raise ``TypeError`` (:issue:`22887`) +- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) +- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). +- :class:`HDFStore` will raise ``ValueError`` when the ``format`` kwarg is passed to the constructor (:issue:`13291`) +- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) +- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`) +- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`) +- Bug in :func:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) +- Bug in :func:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`) +- Bug in :func:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) +- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`) +- Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`) +- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`) +- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`) +- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`) +- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`) +- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) +- Bug in :func:`read_csv` that caused the C engine on Python 3.6+ on Windows to improperly read CSV filenames with accented or special characters (:issue:`15086`) +- Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`) +- Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`) +- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) +- Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) +- Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) +- Bug in :func:`DataFrame.to_string()` and more generally in the floating ``repr`` formatter. Zeros were not trimmed if ``inf`` was present in a columns while it was the case with NA values. Zeros are now trimmed as in the presence of NA (:issue:`24861`). +- Bug in the ``repr`` when truncating the number of columns and having a wide last column (:issue:`24849`). + +Plotting +^^^^^^^^ + +- Bug in :func:`DataFrame.plot.scatter` and :func:`DataFrame.plot.hexbin` caused x-axis label and ticklabels to disappear when colorbar was on in IPython inline backend (:issue:`10611`, :issue:`10678`, and :issue:`20455`) +- Bug in plotting a Series with datetimes using :func:`matplotlib.axes.Axes.scatter` (:issue:`22039`) +- Bug in :func:`DataFrame.plot.bar` caused bars to use multiple colors instead of a single one (:issue:`20585`) +- Bug in validating color parameter caused extra color to be appended to the given color array. This happened to multiple plotting functions using matplotlib. (:issue:`20726`) + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :func:`pandas.core.window.Rolling.min` and :func:`pandas.core.window.Rolling.max` with ``closed='left'``, a datetime-like index and only one entry in the series leading to segfault (:issue:`24718`) +- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) +- Bug in :meth:`DateFrame.resample` when downsampling across a DST boundary (:issue:`8531`) +- Bug in date anchoring for :meth:`DateFrame.resample` with offset :class:`Day` when n > 1 (:issue:`24127`) +- Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a + ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). +- Multiple bugs in :func:`pandas.core.window.Rolling.min` with ``closed='left'`` and a + datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) +- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing positional arguments to applied func (:issue:`14615`). +- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`). +- Bug in :meth:`pandas.core.resample.Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) +- :func:`pandas.core.groupby.RollingGroupby.agg` and :func:`pandas.core.groupby.ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) +- Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) +- Bug in :meth:`pandas.core.groupby.GroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). +- Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`) +- Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`). +- Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`) +- Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`). +- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`) +- Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`). +- Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`) +- Bug in groupby when grouping on categorical causes ``ValueError`` and incorrect grouping if ``observed=True`` and ``nan`` is present in categorical column (:issue:`24740`, :issue:`21151`). + +Reshaping +^^^^^^^^^ + +- Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) +- Bug in :func:`pandas.concat` when joining only ``Series`` the ``names`` argument of ``concat`` is no longer ignored (:issue:`23490`) +- Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) +- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) +- Bug in :meth:`DataFrame.where` with an empty DataFrame and empty ``cond`` having non-bool dtype (:issue:`21947`) +- Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) +- :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) +- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`) +- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) +- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) +- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) +- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) +- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`) +- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) +- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`). +- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`) +- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) +- Bug in :class:`Series` construction when passing no data and ``dtype=str`` (:issue:`22477`) +- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) +- Bug in :func:`pandas.concat` when joining ``Series`` datetimetz with ``Series`` category would lose timezone (:issue:`23816`) +- Bug in :meth:`DataFrame.join` when joining on partial MultiIndex would drop names (:issue:`20452`). +- :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) +- Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken (:issue:`22227`). +- Bug in :class:`DataFrame` prevented list subclasses to be used to construction (:issue:`21226`) +- Bug in :func:`DataFrame.unstack` and :func:`DataFrame.pivot_table` returning a misleading error message when the resulting DataFrame has more elements than int32 can handle. Now, the error message is improved, pointing towards the actual problem (:issue:`20601`) +- Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`) +- Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`) +- Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`) +- Bug showing an incorrect shape when throwing error during ``DataFrame`` construction. (:issue:`20742`) + +.. _whatsnew_0240.bug_fixes.sparse: + +Sparse +^^^^^^ + +- Updating a boolean, datetime, or timedelta column to be Sparse now works (:issue:`22367`) +- Bug in :meth:`Series.to_sparse` with Series already holding sparse data not constructing properly (:issue:`22389`) +- Providing a ``sparse_index`` to the SparseArray constructor no longer defaults the na-value to ``np.nan`` for all dtypes. The correct na_value for ``data.dtype`` is now used. +- Bug in ``SparseArray.nbytes`` under-reporting its memory usage by not including the size of its sparse index. +- Improved performance of :meth:`Series.shift` for non-NA ``fill_value``, as values are no longer converted to a dense array. +- Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) +- Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) +- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) +- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) +- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) +- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) + +Style +^^^^^ + +- :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) +- :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) +- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`) +- Testing pandas now requires ``hypothesis>=3.58``. You can find `the Hypothesis docs here `_, and a pandas-specific introduction :ref:`in the contributing guide `. (:issue:`22280`) +- Building pandas on macOS now targets minimum macOS 10.9 if run on macOS 10.9 or above (:issue:`23424`) + +Other +^^^^^ + +- Bug where C variables were declared with external linkage causing import errors if certain other C libraries were imported before pandas. (:issue:`24113`) + + +.. _whatsnew_0.24.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.4..v0.24.0 diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst new file mode 100644 index 00000000..dd859dab --- /dev/null +++ b/doc/source/whatsnew/v0.24.1.rst @@ -0,0 +1,94 @@ +.. _whatsnew_0241: + +What's new in 0.24.1 (February 3, 2019) +--------------------------------------- + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more. + +{{ header }} + +These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog +including other versions of pandas. See :ref:`whatsnew_0240` for the 0.24.0 changelog. + +.. _whatsnew_0241.api: + +API changes +~~~~~~~~~~~ + +Changing the ``sort`` parameter for :class:`Index` set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). +The default *behavior*, however, remains the same: the result is sorted, unless + +1. ``self`` and ``other`` are identical +2. ``self`` or ``other`` is empty +3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). + +This change will allow ``sort=True`` to mean "always sort" in a future release. + +The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which +would not sort the result when the values could not be compared. + +The ``sort`` option for :meth:`Index.intersection` has changed in three ways. + +1. The default has changed from ``True`` to ``False``, to restore the + pandas 0.23.4 and earlier behavior of not sorting by default. +2. The behavior of ``sort=True`` can now be obtained with ``sort=None``. + This will sort the result only if the values in ``self`` and ``other`` + are not identical. +3. The value ``sort=True`` is no longer allowed. A future version of pandas + will properly support ``sort=True`` meaning "always sort". + +.. _whatsnew_0241.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.to_dict` with ``records`` orient raising an + ``AttributeError`` when the ``DataFrame`` contained more than 255 columns, or + wrongly converting column names that were not valid python identifiers (:issue:`24939`, :issue:`24940`). +- Fixed regression in :func:`read_sql` when passing certain queries with MySQL/pymysql (:issue:`24988`). +- Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). +- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). +- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`) +- Fixed regression in :func:`to_timedelta` with ``box=False`` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`) +- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) + +.. _whatsnew_0241.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Reshaping** + +- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) + +**Visualization** + +- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`). + +**Other** + +- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`) + +.. _whatsnew_0.241.contributors: + +Contributors +~~~~~~~~~~~~ + +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 7 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Alex Buchkovsky +* Roman Yurchak +* h-vetinari +* jbrockmendel +* Jeremy Schendel +* Joris Van den Bossche +* Tom Augspurger diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst new file mode 100644 index 00000000..36684d46 --- /dev/null +++ b/doc/source/whatsnew/v0.24.2.rst @@ -0,0 +1,107 @@ +.. _whatsnew_0242: + +What's new in 0.24.2 (March 12, 2019) +------------------------------------- + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more. + +{{ header }} + +These are the changes in pandas 0.24.2. See :ref:`release` for a full changelog +including other versions of pandas. + +.. _whatsnew_0242.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`) +- Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) +- Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) +- Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ``Categorical`` data (:issue:`25299`) +- Fixed regression in subtraction between :class:`Series` objects with ``datetime64[ns]`` dtype incorrectly raising ``OverflowError`` when the ``Series`` on the right contains null values (:issue:`25317`) +- Fixed regression in :class:`TimedeltaIndex` where ``np.sum(index)`` incorrectly returned a zero-dimensional object instead of a scalar (:issue:`25282`) +- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) +- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) +- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) +- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`). +- Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :meth:`DataFrame.replace` where large strings of numbers would be coerced into ``int64``, causing an ``OverflowError`` (:issue:`25616`) +- Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`). +- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) + +.. _whatsnew_0242.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**I/O** + +- Better handling of terminal printing when the terminal dimensions are not known (:issue:`25080`) +- Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) +- Bug in reading a JSON with ``orient='table'`` generated by :meth:`DataFrame.to_json` with ``index=False`` (:issue:`25170`) +- Bug where float indexes could have misaligned values when printing (:issue:`25061`) + +**Categorical** + +- Bug where calling :meth:`Series.replace` on categorical data could return a ``Series`` with incorrect dimensions (:issue:`24971`) +- + +**Reshaping** + +- Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) +- Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) + +**Visualization** + +- Bug in :meth:`Series.plot` where a secondary y axis could not be set to log scale (:issue:`25545`) + +**Other** + +- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) +- Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) +- Bug in ``IntervalTree`` where a ``RecursionError`` occurs upon construction due to an overflow when adding endpoints, which also causes :class:`IntervalIndex` to crash during indexing operations (:issue:`25485`) +- Bug in :attr:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) +- Bug in resampling raising for nullable integer-dtype columns (:issue:`25580`) + +.. _whatsnew_0242.contributors: + +Contributors +~~~~~~~~~~~~ + +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 25 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Albert Villanova del Moral +* Arno Veenstra + +* chris-b1 +* Devin Petersohn + +* EternalLearner42 + +* Flavien Lambert + +* gfyoung +* Gioia Ballin +* jbrockmendel +* Jeff Reback +* Jeremy Schendel +* Johan von Forstner + +* Joris Van den Bossche +* Josh +* Justin Zheng +* Kendall Masse +* Matthew Roeschke +* Max Bolingbroke + +* rbenes + +* Sterling Paramore + +* Tao He + +* Thomas A Caswell +* Tom Augspurger +* Vibhu Agarwal + +* William Ayd +* Zach Angell diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst new file mode 100644 index 00000000..e4dd6fa0 --- /dev/null +++ b/doc/source/whatsnew/v0.25.0.rst @@ -0,0 +1,1277 @@ +.. _whatsnew_0250: + +What's new in 0.25.0 (July 18, 2019) +------------------------------------ + +.. warning:: + + Starting with the 0.25.x series of releases, pandas only supports Python 3.5.3 and higher. + See `Dropping Python 2.7 `_ for more details. + +.. warning:: + + The minimum supported Python version will be bumped to 3.6 in a future release. + +.. warning:: + + ``Panel`` has been fully removed. For N-D labeled data structures, please + use `xarray `_ + +.. warning:: + + :func:`read_pickle` and :func:`read_msgpack` are only guaranteed backwards compatible back to + pandas version 0.20.3 (:issue:`27082`) + +{{ header }} + +These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog +including other versions of pandas. + + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0250.enhancements.agg_relabel: + +GroupBy aggregation with relabeling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has added special groupby behavior, known as "named aggregation", for naming the +output columns when applying multiple aggregation functions to specific columns (:issue:`18366`, :issue:`26512`). + +.. ipython:: python + + animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'], + 'height': [9.1, 6.0, 9.5, 34.0], + 'weight': [7.9, 7.5, 9.9, 198.0]}) + animals + animals.groupby("kind").agg( + min_height=pd.NamedAgg(column='height', aggfunc='min'), + max_height=pd.NamedAgg(column='height', aggfunc='max'), + average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean), + ) + +Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs`` +should be tuples where the first element is the column selection, and the second element is the +aggregation function to apply. pandas provides the ``pandas.NamedAgg`` namedtuple to make it clearer +what the arguments to the function are, but plain tuples are accepted as well. + +.. ipython:: python + + animals.groupby("kind").agg( + min_height=('height', 'min'), + max_height=('height', 'max'), + average_weight=('weight', np.mean), + ) + +Named aggregation is the recommended replacement for the deprecated "dict-of-dicts" +approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`). + +A similar approach is now available for Series groupby objects as well. Because there's no need for +column selection, the values can just be the functions to apply + +.. ipython:: python + + animals.groupby("kind").height.agg( + min_height="min", + max_height="max", + ) + + +This type of aggregation is the recommended alternative to the deprecated behavior when passing +a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`). + +See :ref:`groupby.aggregate.named` for more. + +.. _whatsnew_0250.enhancements.multiple_lambdas: + +GroupBy aggregation with multiple lambdas +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can now provide multiple lambda functions to a list-like aggregation in +:class:`pandas.core.groupby.GroupBy.agg` (:issue:`26430`). + +.. ipython:: python + + animals.groupby('kind').height.agg([ + lambda x: x.iloc[0], lambda x: x.iloc[-1] + ]) + + animals.groupby('kind').agg([ + lambda x: x.iloc[0] - x.iloc[1], + lambda x: x.iloc[0] + x.iloc[1] + ]) + +Previously, these raised a ``SpecificationError``. + +.. _whatsnew_0250.enhancements.multi_index_repr: + +Better repr for MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Printing of :class:`MultiIndex` instances now shows tuples of each row and ensures +that the tuple items are vertically aligned, so it's now easier to understand +the structure of the ``MultiIndex``. (:issue:`13480`): + +The repr now looks like this: + +.. ipython:: python + + pd.MultiIndex.from_product([['a', 'abc'], range(500)]) + +Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and +``codes`` of the ``MultiIndex``, which was visually unappealing and made +the output more difficult to navigate. For example (limiting the range to 5): + +.. code-block:: ipython + + In [1]: pd.MultiIndex.from_product([['a', 'abc'], range(5)]) + Out[1]: MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], + ...: codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) + +In the new repr, all values will be shown, if the number of rows is smaller +than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, +the output will truncate, if it's wider than :attr:`options.display.width` +(default: 80 characters). + +.. _whatsnew_0250.enhancements.shorter_truncated_repr: + +Shorter truncated repr for Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Currently, the default display options of pandas ensure that when a Series +or DataFrame has more than 60 rows, its repr gets truncated to this maximum +of 60 rows (the ``display.max_rows`` option). However, this still gives +a repr that takes up a large part of the vertical screen estate. Therefore, +a new option ``display.min_rows`` is introduced with a default of 10 which +determines the number of rows showed in the truncated repr: + +- For small Series or DataFrames, up to ``max_rows`` number of rows is shown + (default: 60). +- For larger Series of DataFrame with a length above ``max_rows``, only + ``min_rows`` number of rows is shown (default: 10, i.e. the first and last + 5 rows). + +This dual option allows to still see the full content of relatively small +objects (e.g. ``df.head(20)`` shows all 20 rows), while giving a brief repr +for large objects. + +To restore the previous behaviour of a single threshold, set +``pd.options.display.min_rows = None``. + +.. _whatsnew_0250.enhancements.json_normalize_with_max_level: + +JSON normalize with max_level param support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`json_normalize` normalizes the provided input dict to all +nested levels. The new max_level parameter provides more control over +which level to end normalization (:issue:`23843`): + +The repr now looks like this: + +.. code-block:: ipython + + from pandas.io.json import json_normalize + data = [{ + 'CreatedBy': {'Name': 'User001'}, + 'Lookup': {'TextField': 'Some text', + 'UserField': {'Id': 'ID001', 'Name': 'Name001'}}, + 'Image': {'a': 'b'} + }] + json_normalize(data, max_level=1) + + +.. _whatsnew_0250.enhancements.explode: + +Series.explode to split list-like values to rows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` and :class:`DataFrame` have gained the :meth:`DataFrame.explode` methods to transform list-likes to individual rows. See :ref:`section on Exploding list-like column ` in docs for more information (:issue:`16538`, :issue:`10511`) + + +Here is a typical usecase. You have comma separated string in a column. + +.. ipython:: python + + df = pd.DataFrame([{'var1': 'a,b,c', 'var2': 1}, + {'var1': 'd,e,f', 'var2': 2}]) + df + +Creating a long form ``DataFrame`` is now straightforward using chained operations + +.. ipython:: python + + df.assign(var1=df.var1.str.split(',')).explode('var1') + +.. _whatsnew_0250.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :func:`DataFrame.plot` keywords ``logy``, ``logx`` and ``loglog`` can now accept the value ``'sym'`` for symlog scaling. (:issue:`24867`) +- Added support for ISO week year format ('%G-%V-%u') when parsing datetimes using :meth:`to_datetime` (:issue:`16607`) +- Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) +- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) +- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :class:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DataFrame.pivot_table` now accepts an ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) +- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) +- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) +- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behavior of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) +- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a monotonically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`) +- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) +- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`) +- Added :ref:`api.frame.sparse` for working with a ``DataFrame`` whose values are sparse (:issue:`25681`) +- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`) +- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) +- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) +- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`) +- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`) +- Error message for missing required imports now includes the original import error's text (:issue:`23868`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`) +- :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) +- Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) +- Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where ``` for more details (:issue:`9070`) +- :class:`Interval`, :class:`IntervalIndex`, and :class:`~arrays.IntervalArray` have gained an :attr:`~Interval.is_empty` attribute denoting if the given interval(s) are empty (:issue:`27219`) + +.. _whatsnew_0250.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0250.api_breaking.utc_offset_indexing: + + +Indexing with date strings with UTC offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a +date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset +is respected in indexing. (:issue:`24076`, :issue:`16785`) + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00'] + Out[3]: + 0 + 2019-01-01 00:00:00-08:00 0 + +*New behavior*: + +.. ipython:: python + + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + + +.. _whatsnew_0250.api_breaking.multi_indexing: + + +``MultiIndex`` constructed from levels and codes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Constructing a :class:`MultiIndex` with ``NaN`` levels or codes value < -1 was allowed previously. +Now, construction with codes value < -1 is not allowed and ``NaN`` levels' corresponding codes +would be reassigned as -1. (:issue:`19387`) + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], + ...: codes=[[0, -1, 1, 2, 3, 4]]) + ...: + Out[1]: MultiIndex(levels=[[nan, None, NaT, 128, 2]], + codes=[[0, -1, 1, 2, 3, 4]]) + + In [2]: pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]]) + Out[2]: MultiIndex(levels=[[1, 2]], + codes=[[0, -2]]) + +*New behavior*: + +.. ipython:: python + :okexcept: + + pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], + codes=[[0, -1, 1, 2, 3, 4]]) + pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]]) + + +.. _whatsnew_0250.api_breaking.groupby_apply_first_group_once: + +``GroupBy.apply`` on ``DataFrame`` evaluates first group only once +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The implementation of :meth:`DataFrameGroupBy.apply() ` +previously evaluated the supplied function consistently twice on the first group +to infer if it is safe to use a fast code path. Particularly for functions with +side effects, this was an undesired behavior and may have led to surprises. (:issue:`2936`, :issue:`2656`, :issue:`7739`, :issue:`10519`, :issue:`12155`, :issue:`20084`, :issue:`21417`) + +Now every group is evaluated only a single time. + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]}) + df + + def func(group): + print(group.name) + return group + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.groupby('a').apply(func) + x + x + y + Out[3]: + a b + 0 x 1 + 1 y 2 + +*New behavior*: + +.. code-block:: python + + In [3]: df.groupby('a').apply(func) + x + y + Out[3]: + a b + 0 x 1 + 1 y 2 + +Concatenating sparse values +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When passed DataFrames whose values are sparse, :func:`concat` will now return a +:class:`Series` or :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`25702`). + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"A": pd.SparseArray([0, 1])}) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: type(pd.concat([df, df])) + pandas.core.sparse.frame.SparseDataFrame + +*New behavior*: + +.. ipython:: python + + type(pd.concat([df, df])) + + +This now matches the existing behavior of :class:`concat` on ``Series`` with sparse values. +:func:`concat` will continue to return a ``SparseDataFrame`` when all the values +are instances of ``SparseDataFrame``. + +This change also affects routines using :func:`concat` internally, like :func:`get_dummies`, +which now returns a :class:`DataFrame` in all cases (previously a ``SparseDataFrame`` was +returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwise). + +Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will +cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before. + +The ``.str``-accessor performs stricter type checks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was +of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular, +``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`, +:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`. + +*Previous behavior*: + +.. code-block:: python + + In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object) + + In [2]: s + Out[2]: + 0 b'a' + 1 b'ba' + 2 b'cba' + dtype: object + + In [3]: s.str.startswith(b'a') + Out[3]: + 0 True + 1 False + 2 False + dtype: bool + +*New behavior*: + +.. ipython:: python + :okexcept: + + s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object) + s + s.str.startswith(b'a') + +.. _whatsnew_0250.api_breaking.groupby_categorical: + +Categorical dtypes are preserved during GroupBy +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. pandas now will preserve these dtypes. (:issue:`18502`) + +.. ipython:: python + + cat = pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True) + df = pd.DataFrame({'payload': [-1, -2, -1, -2], 'col': cat}) + df + df.dtypes + +*Previous Behavior*: + +.. code-block:: python + + In [5]: df.groupby('payload').first().col.dtype + Out[5]: dtype('O') + +*New Behavior*: + +.. ipython:: python + + df.groupby('payload').first().col.dtype + + +.. _whatsnew_0250.api_breaking.incompatible_index_unions: + +Incompatible Index type unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing :func:`Index.union` operations between objects of incompatible dtypes, +the result will be a base :class:`Index` of dtype ``object``. This behavior holds true for +unions between :class:`Index` objects that previously would have been prohibited. The dtype +of empty :class:`Index` objects will now be evaluated before performing union operations +rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be +considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). + +*Previous behavior*: + +.. code-block:: python + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ... + ValueError: can only call with other PeriodIndex-ed objects + + In [2]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) + Out[2]: Int64Index([1, 2, 3], dtype='int64') + +*New behavior*: + +.. code-block:: python + + In [3]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + Out[3]: Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object') + In [4]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) + Out[4]: Index([1, 2, 3], dtype='object') + +Note that integer- and floating-dtype indexes are considered "compatible". The integer +values are coerced to floating point, which may result in loss of precision. See +:ref:`indexing.set_ops` for more. + + +``DataFrame`` GroupBy ffill/bfill no longer return group labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of +:class:`DataFrameGroupBy ` +previously included the group labels in the return value, which was +inconsistent with other groupby transforms. Now only the filled values +are returned. (:issue:`21521`) + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]}) + df + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.groupby("a").ffill() + Out[3]: + a b + 0 x 1 + 1 y 2 + +*New behavior*: + +.. ipython:: python + + df.groupby("a").ffill() + +``DataFrame`` describe on an empty Categorical / object column will return top and freq +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When calling :meth:`DataFrame.describe` with an empty categorical / object +column, the 'top' and 'freq' columns were previously omitted, which was inconsistent with +the output for non-empty columns. Now the 'top' and 'freq' columns will always be included, +with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397`) + +.. ipython:: python + + df = pd.DataFrame({"empty_col": pd.Categorical([])}) + df + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.describe() + Out[3]: + empty_col + count 0 + unique 0 + +*New behavior*: + +.. ipython:: python + + df.describe() + +``__str__`` methods now call ``__repr__`` rather than vice versa +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has until now mostly defined string representations in a pandas objects' +``__str__``/``__unicode__``/``__bytes__`` methods, and called ``__str__`` from the ``__repr__`` +method, if a specific ``__repr__`` method is not found. This is not needed for Python3. +In pandas 0.25, the string representations of pandas objects are now generally +defined in ``__repr__``, and calls to ``__str__`` in general now pass the call on to +the ``__repr__``, if a specific ``__str__`` method doesn't exist, as is standard for Python. +This change is backward compatible for direct usage of pandas, but if you subclass +pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods, +you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`). + +.. _whatsnew_0250.api_breaking.interval_indexing: + + +Indexing an ``IntervalIndex`` with ``Interval`` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing methods for :class:`IntervalIndex` have been modified to require exact matches only for :class:`Interval` queries. +``IntervalIndex`` methods previously matched on any overlapping ``Interval``. Behavior with scalar points, e.g. querying +with an integer, is unchanged (:issue:`16316`). + +.. ipython:: python + + ii = pd.IntervalIndex.from_tuples([(0, 4), (1, 5), (5, 8)]) + ii + +The ``in`` operator (``__contains__``) now only returns ``True`` for exact matches to ``Intervals`` in the ``IntervalIndex``, whereas +this would previously return ``True`` for any ``Interval`` overlapping an ``Interval`` in the ``IntervalIndex``. + +*Previous behavior*: + +.. code-block:: python + + In [4]: pd.Interval(1, 2, closed='neither') in ii + Out[4]: True + + In [5]: pd.Interval(-10, 10, closed='both') in ii + Out[5]: True + +*New behavior*: + +.. ipython:: python + + pd.Interval(1, 2, closed='neither') in ii + pd.Interval(-10, 10, closed='both') in ii + +The :meth:`~IntervalIndex.get_loc` method now only returns locations for exact matches to ``Interval`` queries, as opposed to the previous behavior of +returning locations for overlapping matches. A ``KeyError`` will be raised if an exact match is not found. + +*Previous behavior*: + +.. code-block:: python + + In [6]: ii.get_loc(pd.Interval(1, 5)) + Out[6]: array([0, 1]) + + In [7]: ii.get_loc(pd.Interval(2, 6)) + Out[7]: array([0, 1, 2]) + +*New behavior*: + +.. code-block:: python + + In [6]: ii.get_loc(pd.Interval(1, 5)) + Out[6]: 1 + + In [7]: ii.get_loc(pd.Interval(2, 6)) + --------------------------------------------------------------------------- + KeyError: Interval(2, 6, closed='right') + +Likewise, :meth:`~IntervalIndex.get_indexer` and :meth:`~IntervalIndex.get_indexer_non_unique` will also only return locations for exact matches +to ``Interval`` queries, with ``-1`` denoting that an exact match was not found. + +These indexing changes extend to querying a :class:`Series` or :class:`DataFrame` with an ``IntervalIndex`` index. + +.. ipython:: python + + s = pd.Series(list('abc'), index=ii) + s + +Selecting from a ``Series`` or ``DataFrame`` using ``[]`` (``__getitem__``) or ``loc`` now only returns exact matches for ``Interval`` queries. + +*Previous behavior*: + +.. code-block:: python + + In [8]: s[pd.Interval(1, 5)] + Out[8]: + (0, 4] a + (1, 5] b + dtype: object + + In [9]: s.loc[pd.Interval(1, 5)] + Out[9]: + (0, 4] a + (1, 5] b + dtype: object + +*New behavior*: + +.. ipython:: python + + s[pd.Interval(1, 5)] + s.loc[pd.Interval(1, 5)] + +Similarly, a ``KeyError`` will be raised for non-exact matches instead of returning overlapping matches. + +*Previous behavior*: + +.. code-block:: python + + In [9]: s[pd.Interval(2, 3)] + Out[9]: + (0, 4] a + (1, 5] b + dtype: object + + In [10]: s.loc[pd.Interval(2, 3)] + Out[10]: + (0, 4] a + (1, 5] b + dtype: object + +*New behavior*: + +.. code-block:: python + + In [6]: s[pd.Interval(2, 3)] + --------------------------------------------------------------------------- + KeyError: Interval(2, 3, closed='right') + + In [7]: s.loc[pd.Interval(2, 3)] + --------------------------------------------------------------------------- + KeyError: Interval(2, 3, closed='right') + +The :meth:`~IntervalIndex.overlaps` method can be used to create a boolean indexer that replicates the +previous behavior of returning overlapping matches. + +*New behavior*: + +.. ipython:: python + + idxr = s.index.overlaps(pd.Interval(2, 3)) + idxr + s[idxr] + s.loc[idxr] + + +.. _whatsnew_0250.api_breaking.ufunc: + +Binary ufuncs on Series now align +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Applying a binary ufunc like :func:`numpy.power` now aligns the inputs +when both are :class:`Series` (:issue:`23293`). + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s2 = pd.Series([3, 4, 5], index=['d', 'c', 'b']) + s1 + s2 + +*Previous behavior* + +.. code-block:: ipython + + In [5]: np.power(s1, s2) + Out[5]: + a 1 + b 16 + c 243 + dtype: int64 + +*New behavior* + +.. ipython:: python + + np.power(s1, s2) + +This matches the behavior of other binary operations in pandas, like :meth:`Series.add`. +To retain the previous behavior, convert the other ``Series`` to an array before +applying the ufunc. + +.. ipython:: python + + np.power(s1, s2.array) + +Categorical.argsort now places missing values at the end +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Categorical.argsort` now places missing values at the end of the array, making it +consistent with NumPy and the rest of pandas (:issue:`21801`). + +.. ipython:: python + + cat = pd.Categorical(['b', None, 'a'], categories=['a', 'b'], ordered=True) + +*Previous behavior* + +.. code-block:: ipython + + In [2]: cat = pd.Categorical(['b', None, 'a'], categories=['a', 'b'], ordered=True) + + In [3]: cat.argsort() + Out[3]: array([1, 2, 0]) + + In [4]: cat[cat.argsort()] + Out[4]: + [NaN, a, b] + categories (2, object): [a < b] + +*New behavior* + +.. ipython:: python + + cat.argsort() + cat[cat.argsort()] + +.. _whatsnew_0250.api_breaking.list_of_dict: + +Column order is preserved when passing a list of dicts to DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since +Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as +it does a list of ``OrderedDict``, i.e. preserving the order of the dicts. +This change applies only when pandas is running on Python>=3.6 (:issue:`27309`). + +.. ipython:: python + + data = [ + {'name': 'Joe', 'state': 'NY', 'age': 18}, + {'name': 'Jane', 'state': 'KY', 'age': 19, 'hobby': 'Minecraft'}, + {'name': 'Jean', 'state': 'OK', 'age': 20, 'finances': 'good'} + ] + +*Previous Behavior*: + +The columns were lexicographically sorted previously, + +.. code-block:: python + + In [1]: pd.DataFrame(data) + Out[1]: + age finances hobby name state + 0 18 NaN NaN Joe NY + 1 19 NaN Minecraft Jane KY + 2 20 good NaN Jean OK + +*New Behavior*: + +The column order now matches the insertion-order of the keys in the ``dict``, +considering all the records from top to bottom. As a consequence, the column +order of the resulting DataFrame has changed compared to previous pandas versions. + +.. ipython:: python + + pd.DataFrame(data) + +.. _whatsnew_0250.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Due to dropping support for Python 2.7, a number of optional dependencies have updated minimum versions (:issue:`25725`, :issue:`24942`, :issue:`25752`). +Independently, some minimum supported versions of dependencies were updated (:issue:`23519`, :issue:`25554`). +If installed, we now require: + ++-----------------+-----------------+----------+ +| Package | Minimum Version | Required | ++=================+=================+==========+ +| numpy | 1.13.3 | X | ++-----------------+-----------------+----------+ +| pytz | 2015.4 | X | ++-----------------+-----------------+----------+ +| python-dateutil | 2.6.1 | X | ++-----------------+-----------------+----------+ +| bottleneck | 1.2.1 | | ++-----------------+-----------------+----------+ +| numexpr | 2.6.2 | | ++-----------------+-----------------+----------+ +| pytest (dev) | 4.0.2 | | ++-----------------+-----------------+----------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+ +| Package | Minimum Version | ++=================+=================+ +| beautifulsoup4 | 4.6.0 | ++-----------------+-----------------+ +| fastparquet | 0.2.1 | ++-----------------+-----------------+ +| gcsfs | 0.2.2 | ++-----------------+-----------------+ +| lxml | 3.8.0 | ++-----------------+-----------------+ +| matplotlib | 2.2.2 | ++-----------------+-----------------+ +| openpyxl | 2.4.8 | ++-----------------+-----------------+ +| pyarrow | 0.9.0 | ++-----------------+-----------------+ +| pymysql | 0.7.1 | ++-----------------+-----------------+ +| pytables | 3.4.2 | ++-----------------+-----------------+ +| scipy | 0.19.0 | ++-----------------+-----------------+ +| sqlalchemy | 1.1.4 | ++-----------------+-----------------+ +| xarray | 0.8.2 | ++-----------------+-----------------+ +| xlrd | 1.1.0 | ++-----------------+-----------------+ +| xlsxwriter | 0.9.8 | ++-----------------+-----------------+ +| xlwt | 1.2.0 | ++-----------------+-----------------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_0250.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) +- :class:`Timestamp` and :class:`Timedelta` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) +- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) +- Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`) +- Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`) +- The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`) +- The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`) +- Most pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`) +- The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`) +- Removed support of gtk package for clipboards (:issue:`26563`) +- Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) +- :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) +- :meth:`ExtensionArray.argsort` places NA values at the end of the sorted array. (:issue:`21801`) +- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extension data types for a ``fixed`` format. (:issue:`7775`) +- Passing duplicate ``names`` in :meth:`read_csv` will now raise a ``ValueError`` (:issue:`17346`) + +.. _whatsnew_0250.deprecations: + +Deprecations +~~~~~~~~~~~~ + +Sparse subclasses +^^^^^^^^^^^^^^^^^ + +The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided +by a ``Series`` or ``DataFrame`` with sparse values. + +**Previous way** + +.. code-block:: python + + df = pd.SparseDataFrame({"A": [0, 0, 1, 2]}) + df.dtypes + +**New way** + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])}) + df.dtypes + +The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`). + +msgpack format +^^^^^^^^^^^^^^ + +The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`) + + +Other deprecations +^^^^^^^^^^^^^^^^^^ + +- The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`). +- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) +- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result or on the inputs (:issue:`21951`) +- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or + the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). +- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) +- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`). +- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated. + Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`). +- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version. + Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`). +- The :meth:`Series.get_values`, :meth:`DataFrame.get_values`, :meth:`Index.get_values`, + :meth:`SparseArray.get_values` and :meth:`Categorical.get_values` methods are deprecated. + One of ``np.asarray(..)`` or :meth:`~Series.to_numpy` can be used instead (:issue:`19617`). +- The 'outer' method on NumPy ufuncs, e.g. ``np.subtract.outer`` has been deprecated on :class:`Series` objects. Convert the input to an array with :attr:`Series.array` first (:issue:`27186`) +- :meth:`Timedelta.resolution` is deprecated and replaced with :meth:`Timedelta.resolution_string`. In a future version, :meth:`Timedelta.resolution` will be changed to behave like the standard library :attr:`datetime.timedelta.resolution` (:issue:`21344`) +- :func:`read_table` has been undeprecated. (:issue:`25220`) +- :attr:`Index.dtype_str` is deprecated. (:issue:`18262`) +- :attr:`Series.imag` and :attr:`Series.real` are deprecated. (:issue:`18262`) +- :meth:`Series.put` is deprecated. (:issue:`18262`) +- :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) +- The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`) +- :meth:`Index.contains` is deprecated. Use ``key in index`` (``__contains__``) instead (:issue:`17753`). +- :meth:`DataFrame.get_dtype_counts` is deprecated. (:issue:`18262`) +- :meth:`Categorical.ravel` will return a :class:`Categorical` instead of a ``np.ndarray`` (:issue:`27199`) + + +.. _whatsnew_0250.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed ``Panel`` (:issue:`25047`, :issue:`25191`, :issue:`25231`) +- Removed the previously deprecated ``sheetname`` keyword in :func:`read_excel` (:issue:`16442`, :issue:`20938`) +- Removed the previously deprecated ``TimeGrouper`` (:issue:`16942`) +- Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`) +- Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`) +- Removed the previously deprecated ``convert_objects`` (:issue:`11221`) +- Removed the previously deprecated ``select`` method of ``DataFrame`` and ``Series`` (:issue:`17633`) +- Removed the previously deprecated behavior of :class:`Series` treated as list-like in :meth:`~Series.cat.rename_categories` (:issue:`17982`) +- Removed the previously deprecated ``DataFrame.reindex_axis`` and ``Series.reindex_axis`` (:issue:`17842`) +- Removed the previously deprecated behavior of altering column or index labels with :meth:`Series.rename_axis` or :meth:`DataFrame.rename_axis` (:issue:`17842`) +- Removed the previously deprecated ``tupleize_cols`` keyword argument in :meth:`read_html`, :meth:`read_csv`, and :meth:`DataFrame.to_csv` (:issue:`17877`, :issue:`17820`) +- Removed the previously deprecated ``DataFrame.from.csv`` and ``Series.from_csv`` (:issue:`17812`) +- Removed the previously deprecated ``raise_on_error`` keyword argument in :meth:`DataFrame.where` and :meth:`DataFrame.mask` (:issue:`17744`) +- Removed the previously deprecated ``ordered`` and ``categories`` keyword arguments in ``astype`` (:issue:`17742`) +- Removed the previously deprecated ``cdate_range`` (:issue:`17691`) +- Removed the previously deprecated ``True`` option for the ``dropna`` keyword argument in :func:`SeriesGroupBy.nth` (:issue:`17493`) +- Removed the previously deprecated ``convert`` keyword argument in :meth:`Series.take` and :meth:`DataFrame.take` (:issue:`17352`) +- Removed the previously deprecated behavior of arithmetic operations with ``datetime.date`` objects (:issue:`21152`) + +.. _whatsnew_0250.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) +- :meth:`DataFrame.to_stata()` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`) +- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is + int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) +- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) +- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`) +- :class:`RangeIndex` now performs standard lookup without instantiating an actual hashtable, hence saving memory (:issue:`16685`) +- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) +- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) +- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) +- Improved performance of :meth:`DataFrame.to_csv` when writing datetime dtypes (:issue:`25708`) +- Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`) +- Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`) +- Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`) +- Improved performance of :meth:`IntervalIndex.intersection` (:issue:`24813`) +- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero and float ``NaN``; by faster checking the string for the possibility of being a date (:issue:`25754`) +- Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`) +- Restored performance of :meth:`DatetimeIndex.__iter__` by re-enabling specialized code path (:issue:`26702`) +- Improved performance when building :class:`MultiIndex` with at least one :class:`CategoricalIndex` level (:issue:`22044`) +- Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) +- For :meth:`to_datetime` changed default value of cache parameter to ``True`` (:issue:`26043`) +- Improved performance of :class:`DatetimeIndex` and :class:`PeriodIndex` slicing given non-unique, monotonic data (:issue:`27136`). +- Improved performance of :meth:`pd.read_json` for index-oriented data. (:issue:`26773`) +- Improved performance of :meth:`MultiIndex.shape` (:issue:`27384`). + +.. _whatsnew_0250.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`) +- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in ``True`` (:issue:`26504`) +- Bug in :meth:`DataFrame.dropna` when the :class:`DataFrame` has a :class:`CategoricalIndex` containing :class:`Interval` objects incorrectly raised a ``TypeError`` (:issue:`25087`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`) +- Bug in :func:`to_datetime` which would raise ``InvalidIndexError: Reindexing only valid with uniquely valued Index objects`` when called with ``cache=True``, with ``arg`` including at least two different elements from the set ``{None, numpy.nan, pandas.NaT}`` (:issue:`22305`) +- Bug in :class:`DataFrame` and :class:`Series` where timezone aware data with ``dtype='datetime64[ns]`` was not cast to naive (:issue:`25843`) +- Improved :class:`Timestamp` type checking in various datetime functions to prevent exceptions when using a subclassed ``datetime`` (:issue:`25851`) +- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`) +- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`) +- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`) +- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`) +- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`) +- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` +- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`) +- Bug in constructing a ``Series`` or ``DataFrame`` from a numpy ``datetime64`` array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an ``OutOfBoundsDatetime`` error (:issue:`26206`). +- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`) +- Bug where adding :class:`Timestamp` to a ``np.timedelta64`` object would raise instead of returning a :class:`Timestamp` (:issue:`24775`) +- Bug where comparing a zero-dimensional numpy array containing a ``np.datetime64`` object to a :class:`Timestamp` would incorrect raise ``TypeError`` (:issue:`26916`) +- Bug in :func:`to_datetime` which would raise ``ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True`` when called with ``cache=True``, with ``arg`` including datetime strings with different offset (:issue:`26097`) +- + +Timedelta +^^^^^^^^^ + +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indices in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`) +- Bug with comparisons between :class:`Timedelta` and ``NaT`` raising ``TypeError`` (:issue:`26039`) +- Bug when adding or subtracting a :class:`BusinessHour` to a :class:`Timestamp` with the resulting time landing in a following or prior day respectively (:issue:`26381`) +- Bug when comparing a :class:`TimedeltaIndex` against a zero-dimensional numpy array (:issue:`26689`) + +Timezones +^^^^^^^^^ + +- Bug in :func:`DatetimeIndex.to_frame` where timezone aware data would be converted to timezone naive data (:issue:`25809`) +- Bug in :func:`to_datetime` with ``utc=True`` and datetime strings that would apply previously parsed UTC offsets to subsequent arguments (:issue:`24992`) +- Bug in :func:`Timestamp.tz_localize` and :func:`Timestamp.tz_convert` does not propagate ``freq`` (:issue:`25241`) +- Bug in :func:`Series.at` where setting :class:`Timestamp` with timezone raises ``TypeError`` (:issue:`25506`) +- Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`) +- Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`) +- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) +- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) +- Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`) +- Bug in :meth:`DatetimeIndex.union` when combining a timezone aware and timezone unaware :class:`DatetimeIndex` (:issue:`21671`) +- Bug when applying a numpy reduction function (e.g. :meth:`numpy.minimum`) to a timezone aware :class:`Series` (:issue:`15552`) + +Numeric +^^^^^^^ + +- Bug in :meth:`to_numeric` in which large negative numbers were being improperly handled (:issue:`24910`) +- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`) +- Bug in :meth:`to_numeric` in which invalid values for ``errors`` were being allowed (:issue:`26466`) +- Bug in :class:`format` in which floating point complex numbers were not being formatted to proper display precision and trimming (:issue:`25514`) +- Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`) +- Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`) +- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`) +- Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`) +- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`) +- Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`) +- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`) +- + +Conversion +^^^^^^^^^^ + +- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`) +- + +Strings +^^^^^^^ + +- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`) +- Improved error message when passing :class:`Series` of wrong dtype to :meth:`Series.str.cat` (:issue:`22722`) +- + + +Interval +^^^^^^^^ + +- Construction of :class:`Interval` is restricted to numeric, :class:`Timestamp` and :class:`Timedelta` endpoints (:issue:`23013`) +- Fixed bug in :class:`Series`/:class:`DataFrame` not displaying ``NaN`` in :class:`IntervalIndex` with missing values (:issue:`25984`) +- Bug in :meth:`IntervalIndex.get_loc` where a ``KeyError`` would be incorrectly raised for a decreasing :class:`IntervalIndex` (:issue:`25860`) +- Bug in :class:`Index` constructor where passing mixed closed :class:`Interval` objects would result in a ``ValueError`` instead of an ``object`` dtype ``Index`` (:issue:`27172`) + +Indexing +^^^^^^^^ + +- Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`). +- Improved exception message when calling ``.iloc`` or ``.loc`` with a boolean indexer with different length (:issue:`26658`). +- Bug in ``KeyError`` exception message when indexing a :class:`MultiIndex` with a non-existent key not displaying the original key (:issue:`27250`). +- Bug in ``.iloc`` and ``.loc`` with a boolean indexer not raising an ``IndexError`` when too few items are passed (:issue:`26658`). +- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`). +- Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`). +- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). +- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) +- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) +- Fixed a ``KeyError`` when indexing a :class:`MultiIndex` level with a list containing exactly one label, which is missing (:issue:`27148`) +- Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`) +- Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`) +- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`) +- Bug in :class:`CategoricalIndex` and :class:`Categorical` incorrectly raising ``ValueError`` instead of ``TypeError`` when a list is passed using the ``in`` operator (``__contains__``) (:issue:`21729`) +- Bug in setting a new value in a :class:`Series` with a :class:`Timedelta` object incorrectly casting the value to an integer (:issue:`22717`) +- Bug in :class:`Series` setting a new key (``__setitem__``) with a timezone-aware datetime incorrectly raising ``ValueError`` (:issue:`12862`) +- Bug in :meth:`DataFrame.iloc` when indexing with a read-only indexer (:issue:`17192`) +- Bug in :class:`Series` setting an existing tuple key (``__setitem__``) with timezone-aware datetime values incorrectly raising ``TypeError`` (:issue:`20441`) + +Missing +^^^^^^^ + +- Fixed misleading exception message in :meth:`Series.interpolate` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`). +- Fixed class type displayed in exception message in :meth:`DataFrame.dropna` if invalid ``axis`` parameter passed (:issue:`25555`) +- A ``ValueError`` will now be thrown by :meth:`DataFrame.fillna` when ``limit`` is not a positive integer (:issue:`27042`) +- + +MultiIndex +^^^^^^^^^^ + +- Bug in which incorrect exception raised by :class:`Timedelta` when testing the membership of :class:`MultiIndex` (:issue:`24570`) +- + +IO +^^ + +- Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`) +- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) +- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) +- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) +- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) +- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string ``"nan"`` instead of ``numpy.nan`` (:issue:`25468`) +- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AssertionError`` (:issue:`25608`) +- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`) +- Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`) +- Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`) +- Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) +- Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`) +- Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) +- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) +- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`) +- Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`) +- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) +- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`) +- Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`) +- Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) +- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) +- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) +- Fixed bug in :func:`pandas.read_csv` where a BOM would result in incorrect parsing using engine='python' (:issue:`26545`) +- :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) +- Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). +- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. ``PeriodIndex``) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) +- Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) +- Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`) +- Fixed bug in :meth:`DataFrame.to_parquet` which would raise a ``ValueError`` when the dataframe had no columns (:issue:`27339`) +- Allow parsing of :class:`PeriodDtype` columns when using :func:`read_csv` (:issue:`26934`) + +Plotting +^^^^^^^^ + +- Fixed bug where :class:`api.extensions.ExtensionArray` could not be used in matplotlib plotting (:issue:`25587`) +- Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`) +- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612`, :issue:`15912`, :issue:`22334`) +- Fixed bug causing plots of :class:`PeriodIndex` timeseries to fail if the frequency is a multiple of the frequency rule code (:issue:`14763`) +- Fixed bug when plotting a :class:`DatetimeIndex` with ``datetime.timezone.utc`` timezone (:issue:`17173`) +- + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) +- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying an aggregation function to timezone aware data (:issue:`23683`) +- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`) +- Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`) +- Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) +- Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) +- Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`) +- Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) +- Bug in :meth:`pandas.core.window.Rolling.count` and ``pandas.core.window.Expanding.count`` was previously ignoring the ``axis`` keyword (:issue:`13503`) +- Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) +- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`) +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise a ``ValueError`` (:issue:`26208`) +- Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`) +- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) +- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) +- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) +- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.ExponentialMovingWindow` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) +- Bug in :meth:`pandas.core.window.Rolling.max` and :meth:`pandas.core.window.Rolling.min` where incorrect results are returned with an empty variable window (:issue:`26005`) +- Raise a helpful exception when an unsupported weighted window function is used as an argument of :meth:`pandas.core.window.Window.aggregate` (:issue:`26597`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`pandas.merge` adds a string of ``None``, if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). +- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (missing index values are now assigned NA) (:issue:`24212`, :issue:`25009`) +- :func:`to_records` now accepts dtypes to its ``column_dtypes`` parameter (:issue:`24895`) +- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) +- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`) +- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`). +- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`) +- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`). +- Bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`). +- Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`) +- Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`) +- Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`) +- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) +- Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) +- Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) +- Bug in which :meth:`DataFrame.from_dict` ignored order of ``OrderedDict`` when ``orient='index'`` (:issue:`8425`). +- Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) +- Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) +- Bug in :func:`merge_asof` when specifying multiple ``by`` columns where one is ``datetime64[ns, tz]`` dtype (:issue:`26649`) + +Sparse +^^^^^^ + +- Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) +- Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`) +- Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`) +- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned (:issue:`26946`). + + +Build changes +^^^^^^^^^^^^^ + +- Fix install error with PyPy on macOS (:issue:`26536`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). +- :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) +- Added ``Series.__array_ufunc__`` to better handle NumPy ufuncs applied to Series backed by extension arrays (:issue:`23293`). +- Keyword argument ``deep`` has been removed from :meth:`ExtensionArray.copy` (:issue:`27083`) + +Other +^^^^^ + +- Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) +- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`) +- Use actual class name in repr of empty objects of a ``Series`` subclass (:issue:`27001`). +- Bug in :class:`DataFrame` where passing an object array of timezone-aware ``datetime`` objects would incorrectly raise ``ValueError`` (:issue:`13287`) + +.. _whatsnew_0.250.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.24.2..v0.25.0 diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst new file mode 100644 index 00000000..cc24ba5d --- /dev/null +++ b/doc/source/whatsnew/v0.25.1.rst @@ -0,0 +1,119 @@ +.. _whatsnew_0251: + +What's new in 0.25.1 (August 21, 2019) +-------------------------------------- + +These are the changes in pandas 0.25.1. See :ref:`release` for a full changelog +including other versions of pandas. + +IO and LZMA +~~~~~~~~~~~ + +Some users may unknowingly have an incomplete Python installation lacking the ``lzma`` module from the standard library. In this case, ``import pandas`` failed due to an ``ImportError`` (:issue:`27575`). +pandas will now warn, rather than raising an ``ImportError`` if the ``lzma`` module is not present. Any subsequent attempt to use ``lzma`` methods will raise a ``RuntimeError``. +A possible fix for the lack of the ``lzma`` module is to ensure you have the necessary libraries and then re-install Python. +For example, on MacOS installing Python with ``pyenv`` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like ``xz``). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. + +.. _whatsnew_0251.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`Categorical.fillna` that would replace all values, not just those that are ``NaN`` (:issue:`26215`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) +- Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` (:issue:`19643`) +- Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only (:issue:`28055`) + +Timezones +^^^^^^^^^ + +- Bug in :class:`Index` where a numpy object array with a timezone aware :class:`Timestamp` and ``np.nan`` would not return a :class:`DatetimeIndex` (:issue:`27011`) + +Numeric +^^^^^^^ + +- Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` (:issue:`27548`) +- Bug when printing negative floating point complex numbers would raise an ``IndexError`` (:issue:`27484`) +- Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr (:issue:`27636`) +- Bug in :class:`DataFrame` arithmetic where missing values in results were incorrectly masked with ``NaN`` instead of ``Inf`` (:issue:`27464`) + +Conversion +^^^^^^^^^^ + +- Improved the warnings for the deprecated methods :meth:`Series.real` and :meth:`Series.imag` (:issue:`27610`) + +Interval +^^^^^^^^ + +- Bug in :class:`IntervalIndex` where ``dir(obj)`` would raise ``ValueError`` (:issue:`27571`) + +Indexing +^^^^^^^^ + +- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) +- Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`) +- Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`). +- Fix regression in ``.ix`` fallback with an ``IntervalIndex`` (:issue:`27865`). + +Missing +^^^^^^^ + +- Bug in :func:`pandas.isnull` or :func:`pandas.isna` when the input is a type e.g. ``type(pandas.Series())`` (:issue:`27482`) + +IO +^^ + +- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) +- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`) +- Follow the ``min_rows`` display option (introduced in v0.25.0) correctly in the HTML repr in the notebook (:issue:`27991`). + +Plotting +^^^^^^^^ + +- Added a ``pandas_plotting_backends`` entrypoint group for registering plot backends. See :ref:`extending.plotting-backends` for more (:issue:`26747`). +- Fixed the re-instatement of Matplotlib datetime converters after calling + :meth:`pandas.plotting.deregister_matplotlib_converters` (:issue:`27481`). +- Fix compatibility issue with matplotlib when passing a pandas ``Index`` to a plot call (:issue:`27775`). + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`) +- Bug in windowing over read-only arrays (:issue:`27766`) +- Fixed segfault in ``pandas.core.groupby.DataFrameGroupBy.quantile`` when an invalid quantile was passed (:issue:`27470`) + +Reshaping +^^^^^^^^^ + +- A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) +- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing ``tolerance`` kwarg (:issue:`27642`) +- Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) +- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) +- Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`) + +Sparse +^^^^^^ + +- Bug in reductions for :class:`Series` with Sparse dtypes (:issue:`27080`) + +Other +^^^^^ + +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`) +- Bug in :meth:`Series.rename` when using a custom type indexer. Now any value that isn't callable or dict-like is treated as a scalar. (:issue:`27814`) + +.. _whatsnew_0.251.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.0..v0.25.1 diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst new file mode 100644 index 00000000..ab6aaebe --- /dev/null +++ b/doc/source/whatsnew/v0.25.2.rst @@ -0,0 +1,49 @@ +.. _whatsnew_0252: + +What's new in 0.25.2 (October 15, 2019) +--------------------------------------- + +These are the changes in pandas 0.25.2. See :ref:`release` for a full changelog +including other versions of pandas. + +.. note:: + + pandas 0.25.2 adds compatibility for Python 3.8 (:issue:`28147`). + +.. _whatsnew_0252.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Indexing +^^^^^^^^ + +- Fix regression in :meth:`DataFrame.reindex` not following the ``limit`` argument (:issue:`28631`). +- Fix regression in :meth:`RangeIndex.get_indexer` for decreasing :class:`RangeIndex` where target values may be improperly identified as missing/present (:issue:`28678`) + +IO +^^ + +- Fix regression in notebook display where `` of the output html), + # there are two `foot_` in the id and class + fp1 = "foot0_" + fp2 = "foot0_foot0_" + expected = dedent( + f"""\ + + + + + + + + + + + + + +
    \n", + " 'selector': 'td:hover',\n", + " 'props': [('background-color', '#ffffb3')]\n", + "}\n", + "index_names = {\n", + " 'selector': '.index_name',\n", + " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", + "}\n", + "headers = {\n", + " 'selector': 'th:not(.index_name)',\n", + " 'props': 'background-color: #000066; color: white;'\n", + "}\n", + "s.set_table_styles([cell_hover, index_names, headers])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_tab_styles1')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we just add a couple more styling artifacts targeting specific parts of the table. Be careful here, since we are *chaining methods* we need to explicitly instruct the method **not to** ``overwrite`` the existing styles." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.set_table_styles([\n", + " {'selector': 'th.col_heading', 'props': 'text-align: center;'},\n", + " {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},\n", + " {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},\n", + "], overwrite=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_tab_styles2')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As a convenience method (*since version 1.2.0*) we can also pass a **dict** to [.set_table_styles()][table] which contains row or column keys. Behind the scenes Styler just indexes the keys and adds relevant `.col` or `.row` classes as necessary to the given CSS selectors.\n", + "\n", + "[table]: ../reference/api/pandas.io.formats.style.Styler.set_table_styles.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.set_table_styles({\n", + " ('Regression', 'Tumour'): [{'selector': 'th', 'props': 'border-left: 1px solid white'},\n", + " {'selector': 'td', 'props': 'border-left: 1px solid #000066'}]\n", + "}, overwrite=False, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('xyz01')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting Classes and Linking to External CSS\n", + "\n", + "If you have designed a website then it is likely you will already have an external CSS file that controls the styling of table and cell objects within it. You may want to use these native files rather than duplicate all the CSS in python (and duplicate any maintenance work).\n", + "\n", + "### Table Attributes\n", + "\n", + "It is very easy to add a `class` to the main `` using [.set_table_attributes()][tableatt]. This method can also attach inline styles - read more in [CSS Hierarchies](#CSS-Hierarchies).\n", + "\n", + "[tableatt]: ../reference/api/pandas.io.formats.style.Styler.set_table_attributes.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "out = s.set_table_attributes('class=\"my-table-cls\"').to_html()\n", + "print(out[out.find('` elements of the `
    `. Rather than use external CSS we will create our classes internally and add them to table style. We will save adding the borders until the [section on tooltips](#Tooltips).\n", + "\n", + "[tdclass]: ../reference/api/pandas.io.formats.style.Styler.set_td_classes.rst\n", + "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.set_table_styles([ # create internal CSS classes\n", + " {'selector': '.true', 'props': 'background-color: #e6ffe6;'},\n", + " {'selector': '.false', 'props': 'background-color: #ffe6e6;'},\n", + "], overwrite=False)\n", + "cell_color = pd.DataFrame([['true ', 'false ', 'true ', 'false '], \n", + " ['false ', 'true ', 'false ', 'true ']], \n", + " index=df.index, \n", + " columns=df.columns[:4])\n", + "s.set_td_classes(cell_color)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_classes')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Styler Functions\n", + "\n", + "### Acting on Data\n", + "\n", + "We use the following methods to pass your style functions. Both of those methods take a function (and some other keyword arguments) and apply it to the DataFrame in a certain way, rendering CSS styles.\n", + "\n", + "- [.applymap()][applymap] (elementwise): accepts a function that takes a single value and returns a string with the CSS attribute-value pair.\n", + "- [.apply()][apply] (column-/row-/table-wise): accepts a function that takes a Series or DataFrame and returns a Series, DataFrame, or numpy array with an identical shape where each element is a string with a CSS attribute-value pair. This method passes each column or row of your DataFrame one-at-a-time or the entire table at once, depending on the `axis` keyword argument. For columnwise use `axis=0`, rowwise use `axis=1`, and for the entire table at once use `axis=None`.\n", + "\n", + "This method is powerful for applying multiple, complex logic to data cells. We create a new DataFrame to demonstrate this.\n", + "\n", + "[apply]: ../reference/api/pandas.io.formats.style.Styler.apply.rst\n", + "[applymap]: ../reference/api/pandas.io.formats.style.Styler.applymap.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(0)\n", + "df2 = pd.DataFrame(np.random.randn(10,4), columns=['A','B','C','D'])\n", + "df2.style" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example we can build a function that colors text if it is negative, and chain this with a function that partially fades cells of negligible value. Since this looks at each element in turn we use ``applymap``." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def style_negative(v, props=''):\n", + " return props if v < 0 else None\n", + "s2 = df2.style.applymap(style_negative, props='color:red;')\\\n", + " .applymap(lambda v: 'opacity: 20%;' if (v < 0.3) and (v > -0.3) else None)\n", + "s2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s2.set_uuid('after_applymap')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also build a function that highlights the maximum value across rows, cols, and the DataFrame all at once. In this case we use ``apply``. Below we highlight the maximum in a column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def highlight_max(s, props=''):\n", + " return np.where(s == np.nanmax(s.values), props, '')\n", + "s2.apply(highlight_max, props='color:white;background-color:darkblue', axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s2.set_uuid('after_apply')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the same function across the different axes, highlighting here the DataFrame maximum in purple, and row maximums in pink." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s2.apply(highlight_max, props='color:white;background-color:pink;', axis=1)\\\n", + " .apply(highlight_max, props='color:white;background-color:purple', axis=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s2.set_uuid('after_apply_again')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This last example shows how some styles have been overwritten by others. In general the most recent style applied is active but you can read more in the [section on CSS hierarchies](#CSS-Hierarchies). You can also apply these styles to more granular parts of the DataFrame - read more in section on [subset slicing](#Finer-Control-with-Slicing).\n", + "\n", + "It is possible to replicate some of this functionality using just classes but it can be more cumbersome. See [item 3) of Optimization](#Optimization)\n", + "\n", + "
    \n", + "\n", + "*Debugging Tip*: If you're having trouble writing your style function, try just passing it into ``DataFrame.apply``. Internally, ``Styler.apply`` uses ``DataFrame.apply`` so the result should be the same, and with ``DataFrame.apply`` you will be able to inspect the CSS string output of your intended function in each cell.\n", + "\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Acting on the Index and Column Headers\n", + "\n", + "Similar application is achieved for headers by using:\n", + " \n", + "- [.applymap_index()][applymapindex] (elementwise): accepts a function that takes a single value and returns a string with the CSS attribute-value pair.\n", + "- [.apply_index()][applyindex] (level-wise): accepts a function that takes a Series and returns a Series, or numpy array with an identical shape where each element is a string with a CSS attribute-value pair. This method passes each level of your Index one-at-a-time. To style the index use `axis=0` and to style the column headers use `axis=1`.\n", + "\n", + "You can select a `level` of a `MultiIndex` but currently no similar `subset` application is available for these methods.\n", + "\n", + "[applyindex]: ../reference/api/pandas.io.formats.style.Styler.apply_index.rst\n", + "[applymapindex]: ../reference/api/pandas.io.formats.style.Styler.applymap_index.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s2.applymap_index(lambda v: \"color:pink;\" if v>4 else \"color:darkblue;\", axis=0)\n", + "s2.apply_index(lambda s: np.where(s.isin([\"A\", \"B\"]), \"color:pink;\", \"color:darkblue;\"), axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tooltips and Captions\n", + "\n", + "Table captions can be added with the [.set_caption()][caption] method. You can use table styles to control the CSS relevant to the caption.\n", + "\n", + "[caption]: ../reference/api/pandas.io.formats.style.Styler.set_caption.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.set_caption(\"Confusion matrix for multiple cancer prediction models.\")\\\n", + " .set_table_styles([{\n", + " 'selector': 'caption',\n", + " 'props': 'caption-side: bottom; font-size:1.25em;'\n", + " }], overwrite=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_caption')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adding tooltips (*since version 1.3.0*) can be done using the [.set_tooltips()][tooltips] method in the same way you can add CSS classes to data cells by providing a string based DataFrame with intersecting indices and columns. You don't have to specify a `css_class` name or any css `props` for the tooltips, since there are standard defaults, but the option is there if you want more visual control. \n", + "\n", + "[tooltips]: ../reference/api/pandas.io.formats.style.Styler.set_tooltips.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tt = pd.DataFrame([['This model has a very strong true positive rate', \n", + " \"This model's total number of false negatives is too high\"]], \n", + " index=['Tumour (Positive)'], columns=df.columns[[0,3]])\n", + "s.set_tooltips(tt, props='visibility: hidden; position: absolute; z-index: 1; border: 1px solid #000066;'\n", + " 'background-color: white; color: #000066; font-size: 0.8em;' \n", + " 'transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_tooltips')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The only thing left to do for our table is to add the highlighting borders to draw the audience attention to the tooltips. We will create internal CSS classes as before using table styles. **Setting classes always overwrites** so we need to make sure we add the previous classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.set_table_styles([ # create internal CSS classes\n", + " {'selector': '.border-red', 'props': 'border: 2px dashed red;'},\n", + " {'selector': '.border-green', 'props': 'border: 2px dashed green;'},\n", + "], overwrite=False)\n", + "cell_border = pd.DataFrame([['border-green ', ' ', ' ', 'border-red '], \n", + " [' ', ' ', ' ', ' ']], \n", + " index=df.index, \n", + " columns=df.columns[:4])\n", + "s.set_td_classes(cell_color + cell_border)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", + "s.set_uuid('after_borders')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finer Control with Slicing\n", + "\n", + "The examples we have shown so far for the `Styler.apply` and `Styler.applymap` functions have not demonstrated the use of the ``subset`` argument. This is a useful argument which permits a lot of flexibility: it allows you to apply styles to specific rows or columns, without having to code that logic into your `style` function.\n", + "\n", + "The value passed to `subset` behaves similar to slicing a DataFrame;\n", + "\n", + "- A scalar is treated as a column label\n", + "- A list (or Series or NumPy array) is treated as multiple column labels\n", + "- A tuple is treated as `(row_indexer, column_indexer)`\n", + "\n", + "Consider using `pd.IndexSlice` to construct the tuple for the last one. We will create a MultiIndexed DataFrame to demonstrate the functionality." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df3 = pd.DataFrame(np.random.randn(4,4), \n", + " pd.MultiIndex.from_product([['A', 'B'], ['r1', 'r2']]),\n", + " columns=['c1','c2','c3','c4'])\n", + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use subset to highlight the maximum in the third and fourth columns with red text. We will highlight the subset sliced region in yellow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "slice_ = ['c3', 'c4']\n", + "df3.style.apply(highlight_max, props='color:red;', axis=0, subset=slice_)\\\n", + " .set_properties(**{'background-color': '#ffffb3'}, subset=slice_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If combined with the ``IndexSlice`` as suggested then it can index across both dimensions with greater flexibility." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "idx = pd.IndexSlice\n", + "slice_ = idx[idx[:,'r1'], idx['c2':'c4']]\n", + "df3.style.apply(highlight_max, props='color:red;', axis=0, subset=slice_)\\\n", + " .set_properties(**{'background-color': '#ffffb3'}, subset=slice_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This also provides the flexibility to sub select rows when used with the `axis=1`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "slice_ = idx[idx[:,'r2'], :]\n", + "df3.style.apply(highlight_max, props='color:red;', axis=1, subset=slice_)\\\n", + " .set_properties(**{'background-color': '#ffffb3'}, subset=slice_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is also scope to provide **conditional filtering**. \n", + "\n", + "Suppose we want to highlight the maximum across columns 2 and 4 only in the case that the sum of columns 1 and 3 is less than -2.0 *(essentially excluding rows* `(:,'r2')`*)*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "slice_ = idx[idx[(df3['c1'] + df3['c3']) < -2.0], ['c2', 'c4']]\n", + "df3.style.apply(highlight_max, props='color:red;', axis=1, subset=slice_)\\\n", + " .set_properties(**{'background-color': '#ffffb3'}, subset=slice_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only label-based slicing is supported right now, not positional, and not callables.\n", + "\n", + "If your style function uses a `subset` or `axis` keyword argument, consider wrapping your function in a `functools.partial`, partialing out that keyword.\n", + "\n", + "```python\n", + "my_func2 = functools.partial(my_func, subset=42)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optimization\n", + "\n", + "Generally, for smaller tables and most cases, the rendered HTML does not need to be optimized, and we don't really recommend it. There are two cases where it is worth considering:\n", + "\n", + " - If you are rendering and styling a very large HTML table, certain browsers have performance issues.\n", + " - If you are using ``Styler`` to dynamically create part of online user interfaces and want to improve network performance.\n", + " \n", + "Here we recommend the following steps to implement:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Remove UUID and cell_ids\n", + "\n", + "Ignore the `uuid` and set `cell_ids` to `False`. This will prevent unnecessary HTML." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is sub-optimal:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4 = pd.DataFrame([[1,2],[3,4]])\n", + "s4 = df4.style" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is better:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.io.formats.style import Styler\n", + "s4 = Styler(df4, uuid_len=0, cell_ids=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Use table styles\n", + "\n", + "Use table styles where possible (e.g. for all cells or rows or columns at a time) since the CSS is nearly always more efficient than other formats." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is sub-optimal:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "props = 'font-family: \"Times New Roman\", Times, serif; color: #e83e8c; font-size:1.3em;'\n", + "df4.style.applymap(lambda x: props, subset=[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is better:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.set_table_styles([{'selector': 'td.col1', 'props': props}])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Set classes instead of using Styler functions\n", + "\n", + "For large DataFrames where the same style is applied to many cells it can be more efficient to declare the styles as classes and then apply those classes to data cells, rather than directly applying styles to cells. It is, however, probably still easier to use the Styler function api when you are not concerned about optimization." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is sub-optimal:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.style.apply(highlight_max, props='color:white;background-color:darkblue;', axis=0)\\\n", + " .apply(highlight_max, props='color:white;background-color:pink;', axis=1)\\\n", + " .apply(highlight_max, props='color:white;background-color:purple', axis=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "\n", + "This is better:\n", + "\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "build = lambda x: pd.DataFrame(x, index=df2.index, columns=df2.columns)\n", + "cls1 = build(df2.apply(highlight_max, props='cls-1 ', axis=0))\n", + "cls2 = build(df2.apply(highlight_max, props='cls-2 ', axis=1, result_type='expand').values)\n", + "cls3 = build(highlight_max(df2, props='cls-3 '))\n", + "df2.style.set_table_styles([\n", + " {'selector': '.cls-1', 'props': 'color:white;background-color:darkblue;'},\n", + " {'selector': '.cls-2', 'props': 'color:white;background-color:pink;'},\n", + " {'selector': '.cls-3', 'props': 'color:white;background-color:purple;'}\n", + "]).set_td_classes(cls1 + cls2 + cls3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Don't use tooltips\n", + "\n", + "Tooltips require `cell_ids` to work and they generate extra HTML elements for *every* data cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. If every byte counts use string replacement\n", + "\n", + "You can remove unnecessary HTML, or shorten the default class names by replacing the default css dict. You can read a little more about CSS [below](#More-About-CSS-and-HTML)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_css = {\n", + " \"row_heading\": \"\",\n", + " \"col_heading\": \"\",\n", + " \"index_name\": \"\",\n", + " \"col\": \"c\",\n", + " \"row\": \"r\",\n", + " \"col_trim\": \"\",\n", + " \"row_trim\": \"\",\n", + " \"level\": \"l\",\n", + " \"data\": \"\",\n", + " \"blank\": \"\",\n", + "}\n", + "html = Styler(df4, uuid_len=0, cell_ids=False)\n", + "html.set_table_styles([{'selector': 'td', 'props': props},\n", + " {'selector': '.c1', 'props': 'color:green;'},\n", + " {'selector': '.l0', 'props': 'color:blue;'}],\n", + " css_class_names=my_css)\n", + "print(html.to_html())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Builtin Styles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some styling functions are common enough that we've \"built them in\" to the `Styler`, so you don't have to write them and apply them yourself. The current list of such functions is:\n", + "\n", + " - [.highlight_null][nullfunc]: for use with identifying missing data. \n", + " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremeties in data.\n", + " - [.highlight_between][betweenfunc] and [.highlight_quantile][quantilefunc]: for use with identifying classes within data.\n", + " - [.background_gradient][bgfunc]: a flexible method for highlighting cells based on their, or other, values on a numeric scale.\n", + " - [.text_gradient][textfunc]: similar method for highlighting text based on their, or other, values on a numeric scale.\n", + " - [.bar][barfunc]: to display mini-charts within cell backgrounds.\n", + " \n", + "The individual documentation on each function often gives more examples of their arguments.\n", + "\n", + "[nullfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_null.rst\n", + "[minfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_min.rst\n", + "[maxfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_max.rst\n", + "[betweenfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_between.rst\n", + "[quantilefunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_quantile.rst\n", + "[bgfunc]: ../reference/api/pandas.io.formats.style.Styler.background_gradient.rst\n", + "[textfunc]: ../reference/api/pandas.io.formats.style.Styler.text_gradient.rst\n", + "[barfunc]: ../reference/api/pandas.io.formats.style.Styler.bar.rst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Highlight Null" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.iloc[0,2] = np.nan\n", + "df2.iloc[4,3] = np.nan\n", + "df2.loc[:4].style.highlight_null(color='yellow')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Highlight Min or Max" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.loc[:4].style.highlight_max(axis=1, props='color:white; font-weight:bold; background-color:darkblue;')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Highlight Between\n", + "This method accepts ranges as float, or NumPy arrays or Series provided the indexes match." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "left = pd.Series([1.0, 0.0, 1.0], index=[\"A\", \"B\", \"D\"])\n", + "df2.loc[:4].style.highlight_between(left=left, right=1.5, axis=1, props='color:white; background-color:purple;')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Highlight Quantile\n", + "Useful for detecting the highest or lowest percentile values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.loc[:4].style.highlight_quantile(q_left=0.85, axis=None, color='yellow')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Background Gradient and Text Gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](http://seaborn.pydata.org/) to get a nice colormap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "cm = sns.light_palette(\"green\", as_cmap=True)\n", + "\n", + "df2.style.background_gradient(cmap=cm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.style.text_gradient(cmap=cm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[.background_gradient][bgfunc] and [.text_gradient][textfunc] have a number of keyword arguments to customise the gradients and colors. See the documentation.\n", + "\n", + "[bgfunc]: ../reference/api/pandas.io.formats.style.Styler.background_gradient.rst\n", + "[textfunc]: ../reference/api/pandas.io.formats.style.Styler.text_gradient.rst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set properties\n", + "\n", + "Use `Styler.set_properties` when the style doesn't actually depend on the values. This is just a simple wrapper for `.applymap` where the function returns the same properties for all cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.loc[:4].style.set_properties(**{'background-color': 'black',\n", + " 'color': 'lawngreen',\n", + " 'border-color': 'white'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bar charts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can include \"bar charts\" in your DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.style.bar(subset=['A', 'B'], color='#d65f5f')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional keyword arguments give more control on centering and positioning, and you can pass a list of `[color_negative, color_positive]` to highlight lower and higher values or a matplotlib colormap.\n", + "\n", + "To showcase an example here's how you can change the above with the new `align` option, combined with setting `vmin` and `vmax` limits, the `width` of the figure, and underlying css `props` of cells, leaving space to display the text and the bars. We also use `text_gradient` to color the text the same as the bars using a matplotlib colormap (although in this case the visualization is probably better without this additional effect)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.style.format('{:.3f}', na_rep=\"\")\\\n", + " .bar(align=0, vmin=-2.5, vmax=2.5, cmap=\"bwr\", height=50,\n", + " width=60, props=\"width: 120px; border-right: 1px solid black;\")\\\n", + " .text_gradient(cmap=\"bwr\", vmin=-2.5, vmax=2.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example aims to give a highlight of the behavior of the new align options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Hide the construction of the display chart from the user\n", + "import pandas as pd\n", + "from IPython.display import HTML\n", + "\n", + "# Test series\n", + "test1 = pd.Series([-100,-60,-30,-20], name='All Negative')\n", + "test2 = pd.Series([-10,-5,0,90], name='Both Pos and Neg')\n", + "test3 = pd.Series([10,20,50,100], name='All Positive')\n", + "test4 = pd.Series([100, 103, 101, 102], name='Large Positive')\n", + "\n", + "\n", + "head = \"\"\"\n", + "
    \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\"\"\"\n", + "\n", + "aligns = ['left', 'right', 'zero', 'mid', 'mean', 99]\n", + "for align in aligns:\n", + " row = \"\".format(align)\n", + " for series in [test1,test2,test3, test4]:\n", + " s = series.copy()\n", + " s.name=''\n", + " row += \"\".format(s.to_frame().style.hide_index().bar(align=align, \n", + " color=['#d65f5f', '#5fba7d'], \n", + " width=100).to_html()) #testn['width']\n", + " row += ''\n", + " head += row\n", + " \n", + "head+= \"\"\"\n", + "\n", + "
    AlignAll NegativeBoth Neg and PosAll PositiveLarge Positive
    {}{}
    \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HTML(head)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sharing styles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Say you have a lovely style built up for a DataFrame, and now you want to apply the same style to a second DataFrame. Export the style with `df1.style.export`, and import it on the second DataFrame with `df1.style.set`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style1 = df2.style\\\n", + " .applymap(style_negative, props='color:red;')\\\n", + " .applymap(lambda v: 'opacity: 20%;' if (v < 0.3) and (v > -0.3) else None)\\\n", + " .set_table_styles([{\"selector\": \"th\", \"props\": \"color: blue;\"}])\\\n", + " .hide(axis=\"index\")\n", + "style1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "style2 = df3.style\n", + "style2.use(style1.export())\n", + "style2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that you're able to share the styles even though they're data aware. The styles are re-evaluated on the new DataFrame they've been `use`d upon." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Limitations\n", + "\n", + "- DataFrame only (use `Series.to_frame().style`)\n", + "- The index and columns do not need to be unique, but certain styling functions can only work with unique indexes.\n", + "- No large repr, and construction performance isn't great; although we have some [HTML optimizations](#Optimization)\n", + "- You can only apply styles, you can't insert new HTML entities, except via subclassing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other Fun and Useful Stuff\n", + "\n", + "Here are a few interesting examples." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Widgets\n", + "\n", + "`Styler` interacts pretty well with widgets. If you're viewing this online instead of running the notebook yourself, you're missing out on interactively adjusting the color palette." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ipywidgets import widgets\n", + "@widgets.interact\n", + "def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)):\n", + " return df2.style.background_gradient(\n", + " cmap=sns.palettes.diverging_palette(h_neg=h_neg, h_pos=h_pos, s=s, l=l,\n", + " as_cmap=True)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Magnify" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def magnify():\n", + " return [dict(selector=\"th\",\n", + " props=[(\"font-size\", \"4pt\")]),\n", + " dict(selector=\"td\",\n", + " props=[('padding', \"0em 0em\")]),\n", + " dict(selector=\"th:hover\",\n", + " props=[(\"font-size\", \"12pt\")]),\n", + " dict(selector=\"tr:hover td:hover\",\n", + " props=[('max-width', '200px'),\n", + " ('font-size', '12pt')])\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(25)\n", + "cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)\n", + "bigdf = pd.DataFrame(np.random.randn(20, 25)).cumsum()\n", + "\n", + "bigdf.style.background_gradient(cmap, axis=1)\\\n", + " .set_properties(**{'max-width': '80px', 'font-size': '1pt'})\\\n", + " .set_caption(\"Hover to magnify\")\\\n", + " .format(precision=2)\\\n", + " .set_table_styles(magnify())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sticky Headers\n", + "\n", + "If you display a large matrix or DataFrame in a notebook, but you want to always see the column and row headers you can use the [.set_sticky][sticky] method which manipulates the table styles CSS.\n", + "\n", + "[sticky]: ../reference/api/pandas.io.formats.style.Styler.set_sticky.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bigdf = pd.DataFrame(np.random.randn(16, 100))\n", + "bigdf.style.set_sticky(axis=\"index\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is also possible to stick MultiIndexes and even only specific levels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bigdf.index = pd.MultiIndex.from_product([[\"A\",\"B\"],[0,1],[0,1,2,3]])\n", + "bigdf.style.set_sticky(axis=\"index\", pixel_size=18, levels=[1,2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### HTML Escaping\n", + "\n", + "Suppose you have to display HTML within HTML, that can be a bit of pain when the renderer can't distinguish. You can use the `escape` formatting option to handle this, and even use it within a formatter that contains HTML itself." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4 = pd.DataFrame([['
    ', '\"&other\"', '']])\n", + "df4.style" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.format(escape=\"html\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.format('{}', escape=\"html\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to Excel\n", + "\n", + "Some support (*since version 0.20.0*) is available for exporting styled `DataFrames` to Excel worksheets using the `OpenPyXL` or `XlsxWriter` engines. CSS2.2 properties handled include:\n", + "\n", + "- `background-color`\n", + "- `border-style` properties\n", + "- `border-width` properties\n", + "- `border-color` properties\n", + "- `color`\n", + "- `font-family`\n", + "- `font-style`\n", + "- `font-weight`\n", + "- `text-align`\n", + "- `text-decoration`\n", + "- `vertical-align`\n", + "- `white-space: nowrap`\n", + "\n", + "\n", + "- Shorthand and side-specific border properties are supported (e.g. `border-style` and `border-left-style`) as well as the `border` shorthands for all sides (`border: 1px solid green`) or specified sides (`border-left: 1px solid green`). Using a `border` shorthand will override any border properties set before it (See [CSS Working Group](https://drafts.csswg.org/css-backgrounds/#border-shorthands) for more details)\n", + "\n", + "\n", + "- Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported.\n", + "- The following pseudo CSS properties are also available to set Excel specific style properties:\n", + " - `number-format`\n", + " - `border-style` (for Excel-specific styles: \"hair\", \"mediumDashDot\", \"dashDotDot\", \"mediumDashDotDot\", \"dashDot\", \"slantDashDot\", or \"mediumDashed\")\n", + "\n", + "Table level styles, and data cell CSS-classes are not included in the export to Excel: individual cells must have their properties mapped by the `Styler.apply` and/or `Styler.applymap` methods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df2.style.\\\n", + " applymap(style_negative, props='color:red;').\\\n", + " highlight_max(axis=0).\\\n", + " to_excel('styled.xlsx', engine='openpyxl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A screenshot of the output:\n", + "\n", + "![Excel spreadsheet with styled DataFrame](../_static/style-excel.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to LaTeX\n", + "\n", + "There is support (*since version 1.3.0*) to export `Styler` to LaTeX. The documentation for the [.to_latex][latex] method gives further detail and numerous examples.\n", + "\n", + "[latex]: ../reference/api/pandas.io.formats.style.Styler.to_latex.rst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More About CSS and HTML\n", + "\n", + "Cascading Style Sheet (CSS) language, which is designed to influence how a browser renders HTML elements, has its own peculiarities. It never reports errors: it just silently ignores them and doesn't render your objects how you intend so can sometimes be frustrating. Here is a very brief primer on how ``Styler`` creates HTML and interacts with CSS, with advice on common pitfalls to avoid." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CSS Classes and Ids\n", + "\n", + "The precise structure of the CSS `class` attached to each cell is as follows.\n", + "\n", + "- Cells with Index and Column names include `index_name` and `level` where `k` is its level in a MultiIndex\n", + "- Index label cells include\n", + " + `row_heading`\n", + " + `level` where `k` is the level in a MultiIndex\n", + " + `row` where `m` is the numeric position of the row\n", + "- Column label cells include\n", + " + `col_heading`\n", + " + `level` where `k` is the level in a MultiIndex\n", + " + `col` where `n` is the numeric position of the column\n", + "- Data cells include\n", + " + `data`\n", + " + `row`, where `m` is the numeric position of the cell.\n", + " + `col`, where `n` is the numeric position of the cell.\n", + "- Blank cells include `blank`\n", + "- Trimmed cells include `col_trim` or `row_trim`\n", + "\n", + "The structure of the `id` is `T_uuid_level_row_col` where `level` is used only on headings, and headings will only have either `row` or `col` whichever is needed. By default we've also prepended each row/column identifier with a UUID unique to each DataFrame so that the style from one doesn't collide with the styling from another within the same notebook or page. You can read more about the use of UUIDs in [Optimization](#Optimization).\n", + "\n", + "We can see example of the HTML by calling the [.to_html()][tohtml] method.\n", + "\n", + "[tohtml]: ../reference/api/pandas.io.formats.style.Styler.to_html.rst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pd.DataFrame([[1,2],[3,4]], index=['i1', 'i2'], columns=['c1', 'c2']).style.to_html())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CSS Hierarchies\n", + "\n", + "The examples have shown that when CSS styles overlap, the one that comes last in the HTML render, takes precedence. So the following yield different results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4 = pd.DataFrame([['text']])\n", + "df4.style.applymap(lambda x: 'color:green;')\\\n", + " .applymap(lambda x: 'color:red;')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.applymap(lambda x: 'color:red;')\\\n", + " .applymap(lambda x: 'color:green;')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is only true for CSS rules that are equivalent in hierarchy, or importance. You can read more about [CSS specificity here](https://www.w3schools.com/css/css_specificity.asp) but for our purposes it suffices to summarize the key points:\n", + "\n", + "A CSS importance score for each HTML element is derived by starting at zero and adding:\n", + "\n", + " - 1000 for an inline style attribute\n", + " - 100 for each ID\n", + " - 10 for each attribute, class or pseudo-class\n", + " - 1 for each element name or pseudo-element\n", + " \n", + "Let's use this to describe the action of the following configurations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.set_uuid('a_')\\\n", + " .set_table_styles([{'selector': 'td', 'props': 'color:red;'}])\\\n", + " .applymap(lambda x: 'color:green;')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This text is red because the generated selector `#T_a_ td` is worth 101 (ID plus element), whereas `#T_a_row0_col0` is only worth 100 (ID), so is considered inferior even though in the HTML it comes after the previous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.set_uuid('b_')\\\n", + " .set_table_styles([{'selector': 'td', 'props': 'color:red;'},\n", + " {'selector': '.cls-1', 'props': 'color:blue;'}])\\\n", + " .applymap(lambda x: 'color:green;')\\\n", + " .set_td_classes(pd.DataFrame([['cls-1']]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above case the text is blue because the selector `#T_b_ .cls-1` is worth 110 (ID plus class), which takes precedence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.set_uuid('c_')\\\n", + " .set_table_styles([{'selector': 'td', 'props': 'color:red;'},\n", + " {'selector': '.cls-1', 'props': 'color:blue;'},\n", + " {'selector': 'td.data', 'props': 'color:yellow;'}])\\\n", + " .applymap(lambda x: 'color:green;')\\\n", + " .set_td_classes(pd.DataFrame([['cls-1']]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have created another table style this time the selector `T_c_ td.data` (ID plus element plus class) gets bumped up to 111. \n", + "\n", + "If your style fails to be applied, and its really frustrating, try the `!important` trump card." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df4.style.set_uuid('d_')\\\n", + " .set_table_styles([{'selector': 'td', 'props': 'color:red;'},\n", + " {'selector': '.cls-1', 'props': 'color:blue;'},\n", + " {'selector': 'td.data', 'props': 'color:yellow;'}])\\\n", + " .applymap(lambda x: 'color:green !important;')\\\n", + " .set_td_classes(pd.DataFrame([['cls-1']]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally got that green text after all!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extensibility\n", + "\n", + "The core of pandas is, and will remain, its \"high-performance, easy-to-use data structures\".\n", + "With that in mind, we hope that `DataFrame.style` accomplishes two goals\n", + "\n", + "- Provide an API that is pleasing to use interactively and is \"good enough\" for many tasks\n", + "- Provide the foundations for dedicated libraries to build on\n", + "\n", + "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n", + "\n", + "### Subclassing\n", + "\n", + "If the default template doesn't quite suit your needs, you can subclass Styler and extend or override the template.\n", + "We'll show an example of extending the default template to insert a custom header before each table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jinja2 import Environment, ChoiceLoader, FileSystemLoader\n", + "from IPython.display import HTML\n", + "from pandas.io.formats.style import Styler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll use the following template:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"templates/myhtml.tpl\") as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've created a template, we need to set up a subclass of ``Styler`` that\n", + "knows about it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class MyStyler(Styler):\n", + " env = Environment(\n", + " loader=ChoiceLoader([\n", + " FileSystemLoader(\"templates\"), # contains ours\n", + " Styler.loader, # the default\n", + " ])\n", + " )\n", + " template_html_table = env.get_template(\"myhtml.tpl\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that we include the original loader in our environment's loader.\n", + "That's because we extend the original template, so the Jinja environment needs\n", + "to be able to find it.\n", + "\n", + "Now we can use that custom styler. It's `__init__` takes a DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MyStyler(df3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our custom template accepts a `table_title` keyword. We can provide the value in the `.to_html` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HTML(MyStyler(df3).to_html(table_title=\"Extending Example\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For convenience, we provide the `Styler.from_custom_template` method that does the same as the custom subclass." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "EasyStyler = Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", + "HTML(EasyStyler(df3).to_html(table_title=\"Another Title\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Template Structure\n", + "\n", + "Here's the template structure for the both the style generation template and the table generation template:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Style template:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "with open(\"templates/html_style_structure.html\") as f:\n", + " style_structure = f.read()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HTML(style_structure)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Table template:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "with open(\"templates/html_table_structure.html\") as f:\n", + " table_structure = f.read()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HTML(table_structure)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See the template in the [GitHub repo](https://github.com/pandas-dev/pandas) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# # Hack to get the same style in the notebook as the\n", + "# # main site. This is hidden in the docs.\n", + "# from IPython.display import HTML\n", + "# with open(\"themes/nature_with_gtoc/static/nature.css_t\") as f:\n", + "# css = f.read()\n", + " \n", + "# HTML(''.format(css))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/doc/source/user_guide/templates/html_style_structure.html b/doc/source/user_guide/templates/html_style_structure.html new file mode 100644 index 00000000..dc0c03ac --- /dev/null +++ b/doc/source/user_guide/templates/html_style_structure.html @@ -0,0 +1,35 @@ + + + +
    before_style
    +
    style +
    <style type="text/css">
    +
    table_styles
    +
    before_cellstyle
    +
    cellstyle
    +
    </style>
    +
    diff --git a/doc/source/user_guide/templates/html_table_structure.html b/doc/source/user_guide/templates/html_table_structure.html new file mode 100644 index 00000000..e03f9591 --- /dev/null +++ b/doc/source/user_guide/templates/html_table_structure.html @@ -0,0 +1,48 @@ + + + +
    before_table
    + +
    table +
    <table ...>
    +
    caption
    + +
    thead +
    before_head_rows
    +
    head_tr (loop over headers)
    +
    after_head_rows
    +
    + +
    tbody +
    before_rows
    +
    tr (loop over data rows)
    +
    after_rows
    +
    +
    </table>
    +
    + +
    after_table
    diff --git a/doc/source/user_guide/templates/myhtml.tpl b/doc/source/user_guide/templates/myhtml.tpl new file mode 100644 index 00000000..1e204d0b --- /dev/null +++ b/doc/source/user_guide/templates/myhtml.tpl @@ -0,0 +1,5 @@ +{% extends "html_table.tpl" %} +{% block table %} +

    {{ table_title|default("My Table") }}

    +{{ super() }} +{% endblock table %} diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst new file mode 100644 index 00000000..d3503510 --- /dev/null +++ b/doc/source/user_guide/text.rst @@ -0,0 +1,801 @@ +.. _text: + +{{ header }} + +====================== +Working with text data +====================== + +.. _text.types: + +Text data types +--------------- + +.. versionadded:: 1.0.0 + +There are two ways to store text data in pandas: + +1. ``object`` -dtype NumPy array. +2. :class:`StringDtype` extension type. + +We recommend using :class:`StringDtype` to store text data. + +Prior to pandas 1.0, ``object`` dtype was the only option. This was unfortunate +for many reasons: + +1. You can accidentally store a *mixture* of strings and non-strings in an + ``object`` dtype array. It's better to have a dedicated dtype. +2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. + There isn't a clear way to select *just* text while excluding non-text + but still object-dtype columns. +3. When reading code, the contents of an ``object`` dtype array is less clear + than ``'string'``. + +Currently, the performance of ``object`` dtype arrays of strings and +:class:`arrays.StringArray` are about the same. We expect future enhancements +to significantly increase the performance and lower the memory overhead of +:class:`~arrays.StringArray`. + +.. warning:: + + ``StringArray`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +For backwards-compatibility, ``object`` dtype remains the default type we +infer a list of strings to + +.. ipython:: python + + pd.Series(["a", "b", "c"]) + +To explicitly request ``string`` dtype, specify the ``dtype`` + +.. ipython:: python + + pd.Series(["a", "b", "c"], dtype="string") + pd.Series(["a", "b", "c"], dtype=pd.StringDtype()) + +Or ``astype`` after the ``Series`` or ``DataFrame`` is created + +.. ipython:: python + + s = pd.Series(["a", "b", "c"]) + s + s.astype("string") + + +.. versionchanged:: 1.1.0 + +You can also use :class:`StringDtype`/``"string"`` as the dtype on non-string data and +it will be converted to ``string`` dtype: + +.. ipython:: python + + s = pd.Series(["a", 2, np.nan], dtype="string") + s + type(s[1]) + +or convert from existing pandas data: + +.. ipython:: python + + s1 = pd.Series([1, 2, np.nan], dtype="Int64") + s1 + s2 = s1.astype("string") + s2 + type(s2[0]) + + +.. _text.differences: + +Behavior differences +^^^^^^^^^^^^^^^^^^^^ + +These are places where the behavior of ``StringDtype`` objects differ from +``object`` dtype + +l. For ``StringDtype``, :ref:`string accessor methods` + that return **numeric** output will always return a nullable integer dtype, + rather than either int or float dtype, depending on the presence of NA values. + Methods returning **boolean** output will return a nullable boolean dtype. + + .. ipython:: python + + s = pd.Series(["a", None, "b"], dtype="string") + s + s.str.count("a") + s.dropna().str.count("a") + + Both outputs are ``Int64`` dtype. Compare that with object-dtype + + .. ipython:: python + + s2 = pd.Series(["a", None, "b"], dtype="object") + s2.str.count("a") + s2.dropna().str.count("a") + + When NA values are present, the output dtype is float64. Similarly for + methods returning boolean values. + + .. ipython:: python + + s.str.isdigit() + s.str.match("a") + +2. Some string methods, like :meth:`Series.str.decode` are not available + on ``StringArray`` because ``StringArray`` only holds strings, not + bytes. +3. In comparison operations, :class:`arrays.StringArray` and ``Series`` backed + by a ``StringArray`` will return an object with :class:`BooleanDtype`, + rather than a ``bool`` dtype object. Missing values in a ``StringArray`` + will propagate in comparison operations, rather than always comparing + unequal like :attr:`numpy.nan`. + +Everything else that follows in the rest of this document applies equally to +``string`` and ``object`` dtype. + +.. _text.string_methods: + +String methods +-------------- + +Series and Index are equipped with a set of string processing methods +that make it easy to operate on each element of the array. Perhaps most +importantly, these methods exclude missing/NA values automatically. These are +accessed via the ``str`` attribute and generally have names matching +the equivalent (scalar) built-in string methods: + +.. ipython:: python + + s = pd.Series( + ["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"], dtype="string" + ) + s.str.lower() + s.str.upper() + s.str.len() + +.. ipython:: python + + idx = pd.Index([" jack", "jill ", " jesse ", "frank"]) + idx.str.strip() + idx.str.lstrip() + idx.str.rstrip() + +The string methods on Index are especially useful for cleaning up or +transforming DataFrame columns. For instance, you may have columns with +leading or trailing whitespace: + +.. ipython:: python + + df = pd.DataFrame( + np.random.randn(3, 2), columns=[" Column A ", " Column B "], index=range(3) + ) + df + +Since ``df.columns`` is an Index object, we can use the ``.str`` accessor + +.. ipython:: python + + df.columns.str.strip() + df.columns.str.lower() + +These string methods can then be used to clean up the columns as needed. +Here we are removing leading and trailing whitespaces, lower casing all names, +and replacing any remaining whitespaces with underscores: + +.. ipython:: python + + df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_") + df + +.. note:: + + If you have a ``Series`` where lots of elements are repeated + (i.e. the number of unique elements in the ``Series`` is a lot smaller than the length of the + ``Series``), it can be faster to convert the original ``Series`` to one of type + ``category`` and then use ``.str.`` or ``.dt.`` on that. + The performance difference comes from the fact that, for ``Series`` of type ``category``, the + string operations are done on the ``.categories`` and not on each element of the + ``Series``. + + Please note that a ``Series`` of type ``category`` with string ``.categories`` has + some limitations in comparison to ``Series`` of type string (e.g. you can't add strings to + each other: ``s + " " + s`` won't work if ``s`` is a ``Series`` of type ``category``). Also, + ``.str`` methods which operate on elements of type ``list`` are not available on such a + ``Series``. + +.. _text.warn_types: + +.. warning:: + + Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with + v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously. + + Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few + exceptions, other uses are not supported, and may be disabled at a later point. + +.. _text.split: + +Splitting and replacing strings +------------------------------- + +Methods like ``split`` return a Series of lists: + +.. ipython:: python + + s2 = pd.Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype="string") + s2.str.split("_") + +Elements in the split lists can be accessed using ``get`` or ``[]`` notation: + +.. ipython:: python + + s2.str.split("_").str.get(1) + s2.str.split("_").str[1] + +It is easy to expand this to return a DataFrame using ``expand``. + +.. ipython:: python + + s2.str.split("_", expand=True) + +When original ``Series`` has :class:`StringDtype`, the output columns will all +be :class:`StringDtype` as well. + +It is also possible to limit the number of splits: + +.. ipython:: python + + s2.str.split("_", expand=True, n=1) + +``rsplit`` is similar to ``split`` except it works in the reverse direction, +i.e., from the end of the string to the beginning of the string: + +.. ipython:: python + + s2.str.rsplit("_", expand=True, n=1) + +``replace`` optionally uses `regular expressions +`__: + +.. ipython:: python + + s3 = pd.Series( + ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"], + dtype="string", + ) + s3 + s3.str.replace("^.a|dog", "XX-XX ", case=False, regex=True) + +.. warning:: + + Some caution must be taken when dealing with regular expressions! The current behavior + is to treat single character patterns as literal strings, even when ``regex`` is set + to ``True``. This behavior is deprecated and will be removed in a future version so + that the ``regex`` keyword is always respected. + +.. versionchanged:: 1.2.0 + +If you want literal replacement of a string (equivalent to :meth:`str.replace`), you +can set the optional ``regex`` parameter to ``False``, rather than escaping each +character. In this case both ``pat`` and ``repl`` must be strings: + +.. ipython:: python + + dollars = pd.Series(["12", "-$10", "$10,000"], dtype="string") + + # These lines are equivalent + dollars.str.replace(r"-\$", "-", regex=True) + dollars.str.replace("-$", "-", regex=False) + +The ``replace`` method can also take a callable as replacement. It is called +on every ``pat`` using :func:`re.sub`. The callable should expect one +positional argument (a regex object) and return a string. + +.. ipython:: python + + # Reverse every lowercase alphabetic word + pat = r"[a-z]+" + + def repl(m): + return m.group(0)[::-1] + + pd.Series(["foo 123", "bar baz", np.nan], dtype="string").str.replace( + pat, repl, regex=True + ) + + # Using regex groups + pat = r"(?P\w+) (?P\w+) (?P\w+)" + + def repl(m): + return m.group("two").swapcase() + + pd.Series(["Foo Bar Baz", np.nan], dtype="string").str.replace( + pat, repl, regex=True + ) + +The ``replace`` method also accepts a compiled regular expression object +from :func:`re.compile` as a pattern. All flags should be included in the +compiled regular expression object. + +.. ipython:: python + + import re + + regex_pat = re.compile(r"^.a|dog", flags=re.IGNORECASE) + s3.str.replace(regex_pat, "XX-XX ", regex=True) + +Including a ``flags`` argument when calling ``replace`` with a compiled +regular expression object will raise a ``ValueError``. + +.. ipython:: + + @verbatim + In [1]: s3.str.replace(regex_pat, 'XX-XX ', flags=re.IGNORECASE) + --------------------------------------------------------------------------- + ValueError: case and flags cannot be set when pat is a compiled regex + +``removeprefix`` and ``removesuffix`` have the same effect as ``str.removeprefix`` and ``str.removesuffix`` added in Python 3.9 +`__: + +.. versionadded:: 1.4.0 + +.. ipython:: python + + s = pd.Series(["str_foo", "str_bar", "no_prefix"]) + s.str.removeprefix("str_") + + s = pd.Series(["foo_str", "bar_str", "no_suffix"]) + s.str.removesuffix("_str") + +.. _text.concatenate: + +Concatenation +------------- + +There are several ways to concatenate a ``Series`` or ``Index``, either with itself or others, all based on :meth:`~Series.str.cat`, +resp. ``Index.str.cat``. + +Concatenating a single Series into a string +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The content of a ``Series`` (or ``Index``) can be concatenated: + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "d"], dtype="string") + s.str.cat(sep=",") + +If not specified, the keyword ``sep`` for the separator defaults to the empty string, ``sep=''``: + +.. ipython:: python + + s.str.cat() + +By default, missing values are ignored. Using ``na_rep``, they can be given a representation: + +.. ipython:: python + + t = pd.Series(["a", "b", np.nan, "d"], dtype="string") + t.str.cat(sep=",") + t.str.cat(sep=",", na_rep="-") + +Concatenating a Series and something list-like into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first argument to :meth:`~Series.str.cat` can be a list-like object, provided that it matches the length of the calling ``Series`` (or ``Index``). + +.. ipython:: python + + s.str.cat(["A", "B", "C", "D"]) + +Missing values on either side will result in missing values in the result as well, *unless* ``na_rep`` is specified: + +.. ipython:: python + + s.str.cat(t) + s.str.cat(t, na_rep="-") + +Concatenating a Series and something array-like into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The parameter ``others`` can also be two-dimensional. In this case, the number or rows must match the lengths of the calling ``Series`` (or ``Index``). + +.. ipython:: python + + d = pd.concat([t, s], axis=1) + s + d + s.str.cat(d, na_rep="-") + +Concatenating a Series and an indexed object into a Series, with alignment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For concatenation with a ``Series`` or ``DataFrame``, it is possible to align the indexes before concatenation by setting +the ``join``-keyword. + +.. ipython:: python + :okwarning: + + u = pd.Series(["b", "d", "a", "c"], index=[1, 3, 0, 2], dtype="string") + s + u + s.str.cat(u) + s.str.cat(u, join="left") + +.. warning:: + + If the ``join`` keyword is not passed, the method :meth:`~Series.str.cat` will currently fall back to the behavior before version 0.23.0 (i.e. no alignment), + but a ``FutureWarning`` will be raised if any of the involved indexes differ, since this default will change to ``join='left'`` in a future version. + +The usual options are available for ``join`` (one of ``'left', 'outer', 'inner', 'right'``). +In particular, alignment also means that the different lengths do not need to coincide anymore. + +.. ipython:: python + + v = pd.Series(["z", "a", "b", "d", "e"], index=[-1, 0, 1, 3, 4], dtype="string") + s + v + s.str.cat(v, join="left", na_rep="-") + s.str.cat(v, join="outer", na_rep="-") + +The same alignment can be used when ``others`` is a ``DataFrame``: + +.. ipython:: python + + f = d.loc[[3, 2, 1, 0], :] + s + f + s.str.cat(f, join="left", na_rep="-") + +Concatenating a Series and many objects into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Several array-like items (specifically: ``Series``, ``Index``, and 1-dimensional variants of ``np.ndarray``) +can be combined in a list-like container (including iterators, ``dict``-views, etc.). + +.. ipython:: python + + s + u + s.str.cat([u, u.to_numpy()], join="left") + +All elements without an index (e.g. ``np.ndarray``) within the passed list-like must match in length to the calling ``Series`` (or ``Index``), +but ``Series`` and ``Index`` may have arbitrary length (as long as alignment is not disabled with ``join=None``): + +.. ipython:: python + + v + s.str.cat([v, u, u.to_numpy()], join="outer", na_rep="-") + +If using ``join='right'`` on a list-like of ``others`` that contains different indexes, +the union of these indexes will be used as the basis for the final concatenation: + +.. ipython:: python + + u.loc[[3]] + v.loc[[-1, 0]] + s.str.cat([u.loc[[3]], v.loc[[-1, 0]]], join="right", na_rep="-") + +Indexing with ``.str`` +---------------------- + +.. _text.indexing: + +You can use ``[]`` notation to directly index by position locations. If you index past the end +of the string, the result will be a ``NaN``. + + +.. ipython:: python + + s = pd.Series( + ["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"], dtype="string" + ) + + s.str[0] + s.str[1] + +Extracting substrings +--------------------- + +.. _text.extract: + +Extract first match in each subject (extract) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + + Before version 0.23, argument ``expand`` of the ``extract`` method defaulted to + ``False``. When ``expand=False``, ``expand`` returns a ``Series``, ``Index``, or + ``DataFrame``, depending on the subject and regular expression + pattern. When ``expand=True``, it always returns a ``DataFrame``, + which is more consistent and less confusing from the perspective of a user. + ``expand=True`` has been the default since version 0.23.0. + +The ``extract`` method accepts a `regular expression +`__ with at least one +capture group. + +Extracting a regular expression with more than one group returns a +DataFrame with one column per group. + +.. ipython:: python + + pd.Series( + ["a1", "b2", "c3"], + dtype="string", + ).str.extract(r"([ab])(\d)", expand=False) + +Elements that do not match return a row filled with ``NaN``. Thus, a +Series of messy strings can be "converted" into a like-indexed Series +or DataFrame of cleaned-up or more useful strings, without +necessitating ``get()`` to access tuples or ``re.match`` objects. The +dtype of the result is always object, even if no match is found and +the result only contains ``NaN``. + +Named groups like + +.. ipython:: python + + pd.Series(["a1", "b2", "c3"], dtype="string").str.extract( + r"(?P[ab])(?P\d)", expand=False + ) + +and optional groups like + +.. ipython:: python + + pd.Series( + ["a1", "b2", "3"], + dtype="string", + ).str.extract(r"([ab])?(\d)", expand=False) + +can also be used. Note that any capture group names in the regular +expression will be used for column names; otherwise capture group +numbers will be used. + +Extracting a regular expression with one group returns a ``DataFrame`` +with one column if ``expand=True``. + +.. ipython:: python + + pd.Series(["a1", "b2", "c3"], dtype="string").str.extract(r"[ab](\d)", expand=True) + +It returns a Series if ``expand=False``. + +.. ipython:: python + + pd.Series(["a1", "b2", "c3"], dtype="string").str.extract(r"[ab](\d)", expand=False) + +Calling on an ``Index`` with a regex with exactly one capture group +returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"], dtype="string") + s + s.index.str.extract("(?P[a-zA-Z])", expand=True) + +It returns an ``Index`` if ``expand=False``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])", expand=False) + +Calling on an ``Index`` with a regex with more than one capture group +returns a ``DataFrame`` if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=True) + +It raises ``ValueError`` if ``expand=False``. + +.. code-block:: python + + >>> s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=False) + ValueError: only one regex group is supported with Index + +The table below summarizes the behavior of ``extract(expand=False)`` +(input subject in first column, number of groups in regex in +first row) + ++--------+---------+------------+ +| | 1 group | >1 group | ++--------+---------+------------+ +| Index | Index | ValueError | ++--------+---------+------------+ +| Series | Series | DataFrame | ++--------+---------+------------+ + +Extract all matches in each subject (extractall) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _text.extractall: + +Unlike ``extract`` (which returns only the first match), + +.. ipython:: python + + s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"], dtype="string") + s + two_groups = "(?P[a-z])(?P[0-9])" + s.str.extract(two_groups, expand=True) + +the ``extractall`` method returns every match. The result of +``extractall`` is always a ``DataFrame`` with a ``MultiIndex`` on its +rows. The last level of the ``MultiIndex`` is named ``match`` and +indicates the order in the subject. + +.. ipython:: python + + s.str.extractall(two_groups) + +When each subject string in the Series has exactly one match, + +.. ipython:: python + + s = pd.Series(["a3", "b3", "c2"], dtype="string") + s + +then ``extractall(pat).xs(0, level='match')`` gives the same result as +``extract(pat)``. + +.. ipython:: python + + extract_result = s.str.extract(two_groups, expand=True) + extract_result + extractall_result = s.str.extractall(two_groups) + extractall_result + extractall_result.xs(0, level="match") + +``Index`` also supports ``.str.extractall``. It returns a ``DataFrame`` which has the +same result as a ``Series.str.extractall`` with a default index (starts from 0). + +.. ipython:: python + + pd.Index(["a1a2", "b1", "c1"]).str.extractall(two_groups) + + pd.Series(["a1a2", "b1", "c1"], dtype="string").str.extractall(two_groups) + + +Testing for strings that match or contain a pattern +--------------------------------------------------- + +You can check whether elements contain a pattern: + +.. ipython:: python + + pattern = r"[0-9][a-z]" + pd.Series( + ["1", "2", "3a", "3b", "03c", "4dx"], + dtype="string", + ).str.contains(pattern) + +Or whether elements match a pattern: + +.. ipython:: python + + pd.Series( + ["1", "2", "3a", "3b", "03c", "4dx"], + dtype="string", + ).str.match(pattern) + +.. versionadded:: 1.1.0 + +.. ipython:: python + + pd.Series( + ["1", "2", "3a", "3b", "03c", "4dx"], + dtype="string", + ).str.fullmatch(pattern) + +.. note:: + + The distinction between ``match``, ``fullmatch``, and ``contains`` is strictness: + ``fullmatch`` tests whether the entire string matches the regular expression; + ``match`` tests whether there is a match of the regular expression that begins + at the first character of the string; and ``contains`` tests whether there is + a match of the regular expression at any position within the string. + + The corresponding functions in the ``re`` package for these three match modes are + `re.fullmatch `_, + `re.match `_, and + `re.search `_, + respectively. + +Methods like ``match``, ``fullmatch``, ``contains``, ``startswith``, and +``endswith`` take an extra ``na`` argument so missing values can be considered +True or False: + +.. ipython:: python + + s4 = pd.Series( + ["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"], dtype="string" + ) + s4.str.contains("A", na=False) + +.. _text.indicator: + +Creating indicator variables +---------------------------- + +You can extract dummy variables from string columns. +For example if they are separated by a ``'|'``: + +.. ipython:: python + + s = pd.Series(["a", "a|b", np.nan, "a|c"], dtype="string") + s.str.get_dummies(sep="|") + +String ``Index`` also supports ``get_dummies`` which returns a ``MultiIndex``. + +.. ipython:: python + + idx = pd.Index(["a", "a|b", np.nan, "a|c"]) + idx.str.get_dummies(sep="|") + +See also :func:`~pandas.get_dummies`. + +Method summary +-------------- + +.. _text.summary: + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + :delim: ; + + :meth:`~Series.str.cat`;Concatenate strings + :meth:`~Series.str.split`;Split strings on delimiter + :meth:`~Series.str.rsplit`;Split strings on delimiter working from the end of the string + :meth:`~Series.str.get`;Index into each element (retrieve i-th element) + :meth:`~Series.str.join`;Join strings in each element of the Series with passed separator + :meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables + :meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex + :meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence + :meth:`~Series.str.removeprefix`;Remove prefix from string, i.e. only remove if string starts with prefix. + :meth:`~Series.str.removesuffix`;Remove suffix from string, i.e. only remove if string ends with suffix. + :meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) + :meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings" + :meth:`~Series.str.center`;Equivalent to ``str.center`` + :meth:`~Series.str.ljust`;Equivalent to ``str.ljust`` + :meth:`~Series.str.rjust`;Equivalent to ``str.rjust`` + :meth:`~Series.str.zfill`;Equivalent to ``str.zfill`` + :meth:`~Series.str.wrap`;Split long strings into lines with length less than a given width + :meth:`~Series.str.slice`;Slice each string in the Series + :meth:`~Series.str.slice_replace`;Replace slice in each string with passed value + :meth:`~Series.str.count`;Count occurrences of pattern + :meth:`~Series.str.startswith`;Equivalent to ``str.startswith(pat)`` for each element + :meth:`~Series.str.endswith`;Equivalent to ``str.endswith(pat)`` for each element + :meth:`~Series.str.findall`;Compute list of all occurrences of pattern/regex for each string + :meth:`~Series.str.match`;"Call ``re.match`` on each element, returning matched groups as list" + :meth:`~Series.str.extract`;"Call ``re.search`` on each element, returning DataFrame with one row for each element and one column for each regex capture group" + :meth:`~Series.str.extractall`;"Call ``re.findall`` on each element, returning DataFrame with one row for each match and one column for each regex capture group" + :meth:`~Series.str.len`;Compute string lengths + :meth:`~Series.str.strip`;Equivalent to ``str.strip`` + :meth:`~Series.str.rstrip`;Equivalent to ``str.rstrip`` + :meth:`~Series.str.lstrip`;Equivalent to ``str.lstrip`` + :meth:`~Series.str.partition`;Equivalent to ``str.partition`` + :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition`` + :meth:`~Series.str.lower`;Equivalent to ``str.lower`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` + :meth:`~Series.str.upper`;Equivalent to ``str.upper`` + :meth:`~Series.str.find`;Equivalent to ``str.find`` + :meth:`~Series.str.rfind`;Equivalent to ``str.rfind`` + :meth:`~Series.str.index`;Equivalent to ``str.index`` + :meth:`~Series.str.rindex`;Equivalent to ``str.rindex`` + :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize`` + :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase`` + :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize`` + :meth:`~Series.str.translate`;Equivalent to ``str.translate`` + :meth:`~Series.str.isalnum`;Equivalent to ``str.isalnum`` + :meth:`~Series.str.isalpha`;Equivalent to ``str.isalpha`` + :meth:`~Series.str.isdigit`;Equivalent to ``str.isdigit`` + :meth:`~Series.str.isspace`;Equivalent to ``str.isspace`` + :meth:`~Series.str.islower`;Equivalent to ``str.islower`` + :meth:`~Series.str.isupper`;Equivalent to ``str.isupper`` + :meth:`~Series.str.istitle`;Equivalent to ``str.istitle`` + :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric`` + :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal`` diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst new file mode 100644 index 00000000..180de1df --- /dev/null +++ b/doc/source/user_guide/timedeltas.rst @@ -0,0 +1,491 @@ +.. _timedeltas: + +{{ header }} + +.. _timedeltas.timedeltas: + +*********** +Time deltas +*********** + +Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, +seconds. They can be both positive and negative. + +``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, +parsing, and attributes. + +Parsing +------- + +You can construct a ``Timedelta`` scalar through various arguments, including `ISO 8601 Duration`_ strings. + +.. ipython:: python + + import datetime + + # strings + pd.Timedelta("1 days") + pd.Timedelta("1 days 00:00:00") + pd.Timedelta("1 days 2 hours") + pd.Timedelta("-1 days 2 min 3us") + + # like datetime.timedelta + # note: these MUST be specified as keyword arguments + pd.Timedelta(days=1, seconds=1) + + # integers with a unit + pd.Timedelta(1, unit="d") + + # from a datetime.timedelta/np.timedelta64 + pd.Timedelta(datetime.timedelta(days=1, seconds=1)) + pd.Timedelta(np.timedelta64(1, "ms")) + + # negative Timedeltas have this string repr + # to be more consistent with datetime.timedelta conventions + pd.Timedelta("-1us") + + # a NaT + pd.Timedelta("nan") + pd.Timedelta("nat") + + # ISO 8601 Duration strings + pd.Timedelta("P0DT0H1M0S") + pd.Timedelta("P0DT0H0M0.000000123S") + +:ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. + +.. ipython:: python + + pd.Timedelta(pd.offsets.Second(2)) + +Further, operations among the scalars yield another scalar ``Timedelta``. + +.. ipython:: python + + pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( + "00:00:00.000123" + ) + +to_timedelta +~~~~~~~~~~~~ + +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, +or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is +scalar-like, otherwise it will output a ``TimedeltaIndex``. + +You can parse a single string to a Timedelta: + +.. ipython:: python + + pd.to_timedelta("1 days 06:05:01.00003") + pd.to_timedelta("15.5us") + +or a list/array of strings: + +.. ipython:: python + + pd.to_timedelta(["1 days 06:05:01.00003", "15.5us", "nan"]) + +The ``unit`` keyword argument specifies the unit of the Timedelta if the input +is numeric: + +.. ipython:: python + + pd.to_timedelta(np.arange(5), unit="s") + pd.to_timedelta(np.arange(5), unit="d") + +.. warning:: + If a string or array of strings is passed as an input then the ``unit`` keyword + argument will be ignored. If a string without units is passed then the default + unit of nanoseconds is assumed. + +.. _timedeltas.limitations: + +Timedelta limitations +~~~~~~~~~~~~~~~~~~~~~ + +pandas represents ``Timedeltas`` in nanosecond resolution using +64 bit integers. As such, the 64 bit integer limits determine +the ``Timedelta`` limits. + +.. ipython:: python + + pd.Timedelta.min + pd.Timedelta.max + +.. _timedeltas.operations: + +Operations +---------- + +You can operate on Series/DataFrames and construct ``timedelta64[ns]`` Series through +subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``. + +.. ipython:: python + + s = pd.Series(pd.date_range("2012-1-1", periods=3, freq="D")) + td = pd.Series([pd.Timedelta(days=i) for i in range(3)]) + df = pd.DataFrame({"A": s, "B": td}) + df + df["C"] = df["A"] + df["B"] + df + df.dtypes + + s - s.max() + s - datetime.datetime(2011, 1, 1, 3, 5) + s + datetime.timedelta(minutes=5) + s + pd.offsets.Minute(5) + s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + +Operations with scalars from a ``timedelta64[ns]`` series: + +.. ipython:: python + + y = s - s[0] + y + +Series of timedeltas with ``NaT`` values are supported: + +.. ipython:: python + + y = s - s.shift() + y + +Elements can be set to ``NaT`` using ``np.nan`` analogously to datetimes: + +.. ipython:: python + + y[1] = np.nan + y + +Operands can also appear in a reversed order (a singular object operated with a Series): + +.. ipython:: python + + s.max() - s + datetime.datetime(2011, 1, 1, 3, 5) - s + datetime.timedelta(minutes=5) + s + +``min, max`` and the corresponding ``idxmin, idxmax`` operations are supported on frames: + +.. ipython:: python + + A = s - pd.Timestamp("20120101") - pd.Timedelta("00:05:05") + B = s - pd.Series(pd.date_range("2012-1-2", periods=3, freq="D")) + + df = pd.DataFrame({"A": A, "B": B}) + df + + df.min() + df.min(axis=1) + + df.idxmin() + df.idxmax() + +``min, max, idxmin, idxmax`` operations are supported on Series as well. A scalar result will be a ``Timedelta``. + +.. ipython:: python + + df.min().max() + df.min(axis=1).min() + + df.min().idxmax() + df.min(axis=1).idxmin() + +You can fillna on timedeltas, passing a timedelta to get a particular value. + +.. ipython:: python + + y.fillna(pd.Timedelta(0)) + y.fillna(pd.Timedelta(10, unit="s")) + y.fillna(pd.Timedelta("-1 days, 00:00:05")) + +You can also negate, multiply and use ``abs`` on ``Timedeltas``: + +.. ipython:: python + + td1 = pd.Timedelta("-1 days 2 hours 3 seconds") + td1 + -1 * td1 + -td1 + abs(td1) + +.. _timedeltas.timedeltas_reductions: + +Reductions +---------- + +Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` objects. As usual +``NaT`` are skipped during evaluation. + +.. ipython:: python + + y2 = pd.Series( + pd.to_timedelta(["-1 days +00:00:05", "nat", "-1 days +00:00:05", "1 days"]) + ) + y2 + y2.mean() + y2.median() + y2.quantile(0.1) + y2.sum() + +.. _timedeltas.timedeltas_convert: + +Frequency conversion +-------------------- + +Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, +or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. +Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. + +.. ipython:: python + + december = pd.Series(pd.date_range("20121201", periods=4)) + january = pd.Series(pd.date_range("20130101", periods=4)) + td = january - december + + td[2] += datetime.timedelta(minutes=5, seconds=3) + td[3] = np.nan + td + + # to days + td / np.timedelta64(1, "D") + td.astype("timedelta64[D]") + + # to seconds + td / np.timedelta64(1, "s") + td.astype("timedelta64[s]") + + # to months (these are constant months) + td / np.timedelta64(1, "M") + +Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series +yields another ``timedelta64[ns]`` dtypes Series. + +.. ipython:: python + + td * -1 + td * pd.Series([1, 2, 3, 4]) + +Rounded division (floor-division) of a ``timedelta64[ns]`` Series by a scalar +``Timedelta`` gives a series of integers. + +.. ipython:: python + + td // pd.Timedelta(days=3, hours=4) + pd.Timedelta(days=3, hours=4) // td + +.. _timedeltas.mod_divmod: + +The mod (%) and divmod operations are defined for ``Timedelta`` when operating with another timedelta-like or with a numeric argument. + +.. ipython:: python + + pd.Timedelta(hours=37) % datetime.timedelta(hours=2) + + # divmod against a timedelta-like returns a pair (int, Timedelta) + divmod(datetime.timedelta(hours=2), pd.Timedelta(minutes=11)) + + # divmod against a numeric returns a pair (Timedelta, Timedelta) + divmod(pd.Timedelta(hours=25), 86400000000000) + +Attributes +---------- + +You can access various components of the ``Timedelta`` or ``TimedeltaIndex`` directly using the attributes ``days,seconds,microseconds,nanoseconds``. These are identical to the values returned by ``datetime.timedelta``, in that, for example, the ``.seconds`` attribute represents the number of seconds >= 0 and < 1 day. These are signed according to whether the ``Timedelta`` is signed. + +These operations can also be directly accessed via the ``.dt`` property of the ``Series`` as well. + +.. note:: + + Note that the attributes are NOT the displayed values of the ``Timedelta``. Use ``.components`` to retrieve the displayed values. + +For a ``Series``: + +.. ipython:: python + + td.dt.days + td.dt.seconds + +You can access the value of the fields for a scalar ``Timedelta`` directly. + +.. ipython:: python + + tds = pd.Timedelta("31 days 5 min 3 sec") + tds.days + tds.seconds + (-tds).seconds + +You can use the ``.components`` property to access a reduced form of the timedelta. This returns a ``DataFrame`` indexed +similarly to the ``Series``. These are the *displayed* values of the ``Timedelta``. + +.. ipython:: python + + td.dt.components + td.dt.components.seconds + +.. _timedeltas.isoformat: + +You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the +``.isoformat`` method + +.. ipython:: python + + pd.Timedelta( + days=6, minutes=50, seconds=3, milliseconds=10, microseconds=10, nanoseconds=12 + ).isoformat() + +.. _ISO 8601 Duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + +.. _timedeltas.index: + +TimedeltaIndex +-------------- + +To generate an index with time delta, you can use either the :class:`TimedeltaIndex` or +the :func:`timedelta_range` constructor. + +Using ``TimedeltaIndex`` you can pass string-like, ``Timedelta``, ``timedelta``, +or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent missing values. + +.. ipython:: python + + pd.TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + datetime.timedelta(days=2, seconds=2), + ] + ) + +The string 'infer' can be passed in order to set the frequency of the index as the +inferred frequency upon creation: + +.. ipython:: python + + pd.TimedeltaIndex(["0 days", "10 days", "20 days"], freq="infer") + +Generating ranges of time deltas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to :func:`date_range`, you can construct regular ranges of a ``TimedeltaIndex`` +using :func:`timedelta_range`. The default frequency for ``timedelta_range`` is +calendar day: + +.. ipython:: python + + pd.timedelta_range(start="1 days", periods=5) + +Various combinations of ``start``, ``end``, and ``periods`` can be used with +``timedelta_range``: + +.. ipython:: python + + pd.timedelta_range(start="1 days", end="5 days") + + pd.timedelta_range(end="10 days", periods=4) + +The ``freq`` parameter can passed a variety of :ref:`frequency aliases `: + +.. ipython:: python + + pd.timedelta_range(start="1 days", end="2 days", freq="30T") + + pd.timedelta_range(start="1 days", periods=5, freq="2D5H") + + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +timedeltas from ``start`` to ``end`` inclusively, with ``periods`` number of elements +in the resulting ``TimedeltaIndex``: + +.. ipython:: python + + pd.timedelta_range("0 days", "4 days", periods=5) + + pd.timedelta_range("0 days", "4 days", periods=10) + +Using the TimedeltaIndex +~~~~~~~~~~~~~~~~~~~~~~~~ + +Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodIndex``, you can use +``TimedeltaIndex`` as the index of pandas objects. + +.. ipython:: python + + s = pd.Series( + np.arange(100), + index=pd.timedelta_range("1 days", periods=100, freq="h"), + ) + s + +Selections work similarly, with coercion on string-likes and slices: + +.. ipython:: python + + s["1 day":"2 day"] + s["1 day 01:00:00"] + s[pd.Timedelta("1 day 1h")] + +Furthermore you can use partial string selection and the range will be inferred: + +.. ipython:: python + + s["1 day":"1 day 5 hours"] + +Operations +~~~~~~~~~~ + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are NaT preserving: + +.. ipython:: python + + tdi = pd.TimedeltaIndex(["1 days", pd.NaT, "2 days"]) + tdi.to_list() + dti = pd.date_range("20130101", periods=3) + dti.to_list() + (dti + tdi).to_list() + (dti - tdi).to_list() + +Conversions +~~~~~~~~~~~ + +Similarly to frequency conversion on a ``Series`` above, you can convert these indices to yield another Index. + +.. ipython:: python + + tdi / np.timedelta64(1, "s") + tdi.astype("timedelta64[s]") + +Scalars type ops work as well. These can potentially return a *different* type of index. + +.. ipython:: python + + # adding or timedelta and date -> datelike + tdi + pd.Timestamp("20130101") + + # subtraction of a date and a timedelta -> datelike + # note that trying to subtract a date from a Timedelta will raise an exception + (pd.Timestamp("20130101") - tdi).to_list() + + # timedelta + timedelta -> timedelta + tdi + pd.Timedelta("10 days") + + # division can result in a Timedelta if the divisor is an integer + tdi / 2 + + # or a Float64Index if the divisor is a Timedelta + tdi / tdi[0] + +.. _timedeltas.resampling: + +Resampling +---------- + +Similar to :ref:`timeseries resampling `, we can resample with a ``TimedeltaIndex``. + +.. ipython:: python + + s.resample("D").mean() diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst new file mode 100644 index 00000000..474068e4 --- /dev/null +++ b/doc/source/user_guide/timeseries.rst @@ -0,0 +1,2663 @@ +.. _timeseries: + +{{ header }} + +******************************** +Time series / date functionality +******************************** + +pandas contains extensive capabilities and features for working with time series data for all domains. +Using the NumPy ``datetime64`` and ``timedelta64`` dtypes, pandas has consolidated a large number of +features from other Python libraries like ``scikits.timeseries`` as well as created +a tremendous amount of new functionality for manipulating time series data. + +For example, pandas supports: + +Parsing time series information from various sources and formats + +.. ipython:: python + + import datetime + + dti = pd.to_datetime( + ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)] + ) + dti + +Generate sequences of fixed-frequency dates and time spans + +.. ipython:: python + + dti = pd.date_range("2018-01-01", periods=3, freq="H") + dti + +Manipulating and converting date times with timezone information + +.. ipython:: python + + dti = dti.tz_localize("UTC") + dti + dti.tz_convert("US/Pacific") + +Resampling or converting a time series to a particular frequency + +.. ipython:: python + + idx = pd.date_range("2018-01-01", periods=5, freq="H") + ts = pd.Series(range(len(idx)), index=idx) + ts + ts.resample("2H").mean() + +Performing date and time arithmetic with absolute or relative time increments + +.. ipython:: python + + friday = pd.Timestamp("2018-01-05") + friday.day_name() + # Add 1 day + saturday = friday + pd.Timedelta("1 day") + saturday.day_name() + # Add 1 business day (Friday --> Monday) + monday = friday + pd.offsets.BDay() + monday.day_name() + +pandas provides a relatively compact and self-contained set of tools for +performing the above tasks and more. + + +.. _timeseries.overview: + +Overview +-------- + +pandas captures 4 general time related concepts: + +#. Date times: A specific date and time with timezone support. Similar to ``datetime.datetime`` from the standard library. +#. Time deltas: An absolute time duration. Similar to ``datetime.timedelta`` from the standard library. +#. Time spans: A span of time defined by a point in time and its associated frequency. +#. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package. + +===================== ================= =================== ============================================ ======================================== +Concept Scalar Class Array Class pandas Data Type Primary Creation Method +===================== ================= =================== ============================================ ======================================== +Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range`` +Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range`` +Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range`` +Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset`` +===================== ================= =================== ============================================ ======================================== + +For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame` +so manipulations can be performed with respect to the time element. + +.. ipython:: python + + pd.Series(range(3), index=pd.date_range("2000", freq="D", periods=3)) + +However, :class:`Series` and :class:`DataFrame` can directly also support the time component as data itself. + +.. ipython:: python + + pd.Series(pd.date_range("2000", freq="D", periods=3)) + +:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime``, ``timedelta`` +and ``Period`` data when passed into those constructors. ``DateOffset`` +data however will be stored as ``object`` data. + +.. ipython:: python + + pd.Series(pd.period_range("1/1/2011", freq="M", periods=3)) + pd.Series([pd.DateOffset(1), pd.DateOffset(2)]) + pd.Series(pd.date_range("1/1/2011", freq="M", periods=3)) + +Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which +is useful for representing missing or null date like values and behaves similar +as ``np.nan`` does for float data. + +.. ipython:: python + + pd.Timestamp(pd.NaT) + pd.Timedelta(pd.NaT) + pd.Period(pd.NaT) + # Equality acts as np.nan would + pd.NaT == pd.NaT + +.. _timeseries.representation: + +Timestamps vs. time spans +------------------------- + +Timestamped data is the most basic type of time series data that associates +values with points in time. For pandas objects it means using the points in +time. + +.. ipython:: python + + pd.Timestamp(datetime.datetime(2012, 5, 1)) + pd.Timestamp("2012-05-01") + pd.Timestamp(2012, 5, 1) + +However, in many cases it is more natural to associate things like change +variables with a time span instead. The span represented by ``Period`` can be +specified explicitly, or inferred from datetime string format. + +For example: + +.. ipython:: python + + pd.Period("2011-01") + + pd.Period("2012-05", freq="D") + +:class:`Timestamp` and :class:`Period` can serve as an index. Lists of +``Timestamp`` and ``Period`` are automatically coerced to :class:`DatetimeIndex` +and :class:`PeriodIndex` respectively. + +.. ipython:: python + + dates = [ + pd.Timestamp("2012-05-01"), + pd.Timestamp("2012-05-02"), + pd.Timestamp("2012-05-03"), + ] + ts = pd.Series(np.random.randn(3), dates) + + type(ts.index) + ts.index + + ts + + periods = [pd.Period("2012-01"), pd.Period("2012-02"), pd.Period("2012-03")] + + ts = pd.Series(np.random.randn(3), periods) + + type(ts.index) + ts.index + + ts + +pandas allows you to capture both representations and +convert between them. Under the hood, pandas represents timestamps using +instances of ``Timestamp`` and sequences of timestamps using instances of +``DatetimeIndex``. For regular time spans, pandas uses ``Period`` objects for +scalar values and ``PeriodIndex`` for sequences of spans. Better support for +irregular intervals with arbitrary start and end points are forth-coming in +future releases. + + +.. _timeseries.converting: + +Converting to timestamps +------------------------ + +To convert a :class:`Series` or list-like object of date-like objects e.g. strings, +epochs, or a mixture, you can use the ``to_datetime`` function. When passed +a ``Series``, this returns a ``Series`` (with the same index), while a list-like +is converted to a ``DatetimeIndex``: + +.. ipython:: python + + pd.to_datetime(pd.Series(["Jul 31, 2009", "2010-01-10", None])) + + pd.to_datetime(["2005/11/23", "2010.12.31"]) + +If you use dates which start with the day first (i.e. European style), +you can pass the ``dayfirst`` flag: + +.. ipython:: python + :okwarning: + + pd.to_datetime(["04-01-2012 10:00"], dayfirst=True) + + pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True) + +.. warning:: + + You see in the above example that ``dayfirst`` isn't strict. If a date + can't be parsed with the day being first it will be parsed as if + ``dayfirst`` were False, and in the case of parsing delimited date strings + (e.g. ``31-12-2012``) then a warning will also be raised. + +If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. +``Timestamp`` can also accept string input, but it doesn't accept string parsing +options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are required. + +.. ipython:: python + + pd.to_datetime("2010/11/12") + + pd.Timestamp("2010/11/12") + +You can also use the ``DatetimeIndex`` constructor directly: + +.. ipython:: python + + pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]) + +The string 'infer' can be passed in order to set the frequency of the index as the +inferred frequency upon creation: + +.. ipython:: python + + pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer") + +.. _timeseries.converting.format: + +Providing a format argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing. +This could also potentially speed up the conversion considerably. + +.. ipython:: python + + pd.to_datetime("2010/11/12", format="%Y/%m/%d") + + pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M") + +For more information on the choices available when specifying the ``format`` +option, see the Python `datetime documentation`_. + +.. _datetime documentation: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior + +Assembling datetime from multiple DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` of ``Timestamps``. + +.. ipython:: python + + df = pd.DataFrame( + {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]} + ) + pd.to_datetime(df) + + +You can pass only the columns that you need to assemble. + +.. ipython:: python + + pd.to_datetime(df[["year", "month", "day"]]) + +``pd.to_datetime`` looks for standard designations of the datetime component in the column names, including: + +* required: ``year``, ``month``, ``day`` +* optional: ``hour``, ``minute``, ``second``, ``millisecond``, ``microsecond``, ``nanosecond`` + +Invalid data +~~~~~~~~~~~~ + +The default behavior, ``errors='raise'``, is to raise when unparsable: + +.. code-block:: ipython + + In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise') + ValueError: Unknown string format + +Pass ``errors='ignore'`` to return the original input when unparsable: + +.. ipython:: python + + pd.to_datetime(["2009/07/31", "asd"], errors="ignore") + +Pass ``errors='coerce'`` to convert unparsable data to ``NaT`` (not a time): + +.. ipython:: python + + pd.to_datetime(["2009/07/31", "asd"], errors="coerce") + + +.. _timeseries.converting.epoch: + +Epoch timestamps +~~~~~~~~~~~~~~~~ + +pandas supports converting integer or float epoch times to ``Timestamp`` and +``DatetimeIndex``. The default unit is nanoseconds, since that is how ``Timestamp`` +objects are stored internally. However, epochs are often stored in another ``unit`` +which can be specified. These are computed from the starting point specified by the +``origin`` parameter. + +.. ipython:: python + + pd.to_datetime( + [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s" + ) + + pd.to_datetime( + [1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500], + unit="ms", + ) + +.. note:: + + The ``unit`` parameter does not use the same strings as the ``format`` parameter + that was discussed :ref:`above`). The + available units are listed on the documentation for :func:`pandas.to_datetime`. + +.. versionchanged:: 1.0.0 + +Constructing a :class:`Timestamp` or :class:`DatetimeIndex` with an epoch timestamp +with the ``tz`` argument specified will raise a ValueError. If you have +epochs in wall time in another timezone, you can read the epochs +as timezone-naive timestamps and then localize to the appropriate timezone: + +.. ipython:: python + + pd.Timestamp(1262347200000000000).tz_localize("US/Pacific") + pd.DatetimeIndex([1262347200000000000]).tz_localize("US/Pacific") + +.. note:: + + Epoch times will be rounded to the nearest nanosecond. + +.. warning:: + + Conversion of float epoch times can lead to inaccurate and unexpected results. + :ref:`Python floats ` have about 15 digits precision in + decimal. Rounding during conversion from float to high precision ``Timestamp`` is + unavoidable. The only way to achieve exact precision is to use a fixed-width + types (e.g. an int64). + + .. ipython:: python + + pd.to_datetime([1490195805.433, 1490195805.433502912], unit="s") + pd.to_datetime(1490195805433502912, unit="ns") + +.. seealso:: + + :ref:`timeseries.origin` + +.. _timeseries.converting.epoch_inverse: + +From timestamps to epoch +~~~~~~~~~~~~~~~~~~~~~~~~ + +To invert the operation from above, namely, to convert from a ``Timestamp`` to a 'unix' epoch: + +.. ipython:: python + + stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D") + stamps + +We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by the +"unit" (1 second). + +.. ipython:: python + + (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s") + +.. _timeseries.origin: + +Using the ``origin`` parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the ``origin`` parameter, one can specify an alternative starting point for creation +of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit="D") + +.. _timeseries.daterange: + +Generating ranges of timestamps +------------------------------- + +To generate an index with timestamps, you can use either the ``DatetimeIndex`` or +``Index`` constructor and pass in a list of datetime objects: + +.. ipython:: python + + dates = [ + datetime.datetime(2012, 5, 1), + datetime.datetime(2012, 5, 2), + datetime.datetime(2012, 5, 3), + ] + + # Note the frequency information + index = pd.DatetimeIndex(dates) + index + + # Automatically converted to DatetimeIndex + index = pd.Index(dates) + index + +In practice this becomes very cumbersome because we often need a very long +index with a large number of timestamps. If we need timestamps on a regular +frequency, we can use the :func:`date_range` and :func:`bdate_range` functions +to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a +**calendar day** while the default for ``bdate_range`` is a **business day**: + +.. ipython:: python + + start = datetime.datetime(2011, 1, 1) + end = datetime.datetime(2012, 1, 1) + + index = pd.date_range(start, end) + index + + index = pd.bdate_range(start, end) + index + +Convenience functions like ``date_range`` and ``bdate_range`` can utilize a +variety of :ref:`frequency aliases `: + +.. ipython:: python + + pd.date_range(start, periods=1000, freq="M") + + pd.bdate_range(start, periods=250, freq="BQS") + +``date_range`` and ``bdate_range`` make it easy to generate a range of dates +using various combinations of parameters like ``start``, ``end``, ``periods``, +and ``freq``. The start and end dates are strictly inclusive, so dates outside +of those specified will not be generated: + +.. ipython:: python + + pd.date_range(start, end, freq="BM") + + pd.date_range(start, end, freq="W") + + pd.bdate_range(end=end, periods=20) + + pd.bdate_range(start=start, periods=20) + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +dates from ``start`` to ``end`` inclusively, with ``periods`` number of elements in the +resulting ``DatetimeIndex``: + +.. ipython:: python + + pd.date_range("2018-01-01", "2018-01-05", periods=5) + + pd.date_range("2018-01-01", "2018-01-05", periods=10) + +.. _timeseries.custom-freq-ranges: + +Custom frequency ranges +~~~~~~~~~~~~~~~~~~~~~~~ + +``bdate_range`` can also generate a range of custom frequency dates by using +the ``weekmask`` and ``holidays`` parameters. These parameters will only be +used if a custom frequency string is passed. + +.. ipython:: python + + weekmask = "Mon Wed Fri" + + holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)] + + pd.bdate_range(start, end, freq="C", weekmask=weekmask, holidays=holidays) + + pd.bdate_range(start, end, freq="CBMS", weekmask=weekmask) + +.. seealso:: + + :ref:`timeseries.custombusinessdays` + +.. _timeseries.timestamp-limits: + +Timestamp limitations +--------------------- + +Since pandas represents timestamps in nanosecond resolution, the time span that +can be represented using a 64-bit integer is limited to approximately 584 years: + +.. ipython:: python + + pd.Timestamp.min + pd.Timestamp.max + +.. seealso:: + + :ref:`timeseries.oob` + +.. _timeseries.datetimeindex: + +Indexing +-------- + +One of the main uses for ``DatetimeIndex`` is as an index for pandas objects. +The ``DatetimeIndex`` class contains many time series related optimizations: + +* A large range of dates for various offsets are pre-computed and cached + under the hood in order to make generating subsequent date ranges very fast + (just have to grab a slice). +* Fast shifting using the ``shift`` method on pandas objects. +* Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is + very fast (important for fast data alignment). +* Quick access to date fields via properties such as ``year``, ``month``, etc. +* Regularization functions like ``snap`` and very fast ``asof`` logic. + +``DatetimeIndex`` objects have all the basic functionality of regular ``Index`` +objects, and a smorgasbord of advanced time series specific methods for easy +frequency processing. + +.. seealso:: + :ref:`Reindexing methods ` + +.. note:: + + While pandas does not force you to have a sorted date index, some of these + methods may have unexpected or incorrect behavior if the dates are unsorted. + +``DatetimeIndex`` can be used like a regular index and offers all of its +intelligent functionality like selection, slicing, etc. + +.. ipython:: python + + rng = pd.date_range(start, end, freq="BM") + ts = pd.Series(np.random.randn(len(rng)), index=rng) + ts.index + ts[:5].index + ts[::2].index + +.. _timeseries.partialindexing: + +Partial string indexing +~~~~~~~~~~~~~~~~~~~~~~~ + +Dates and strings that parse to timestamps can be passed as indexing parameters: + +.. ipython:: python + + ts["1/31/2011"] + + ts[datetime.datetime(2011, 12, 25):] + + ts["10/31/2011":"12/31/2011"] + +To provide convenience for accessing longer time series, you can also pass in +the year or year and month as strings: + +.. ipython:: python + + ts["2011"] + + ts["2011-6"] + +This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as well. Since the +partial string selection is a form of label slicing, the endpoints **will be** included. This +would include matching times on an included date: + +.. warning:: + + Indexing ``DataFrame`` rows with a *single* string with getitem (e.g. ``frame[dtstring]``) + is deprecated starting with pandas 1.2.0 (given the ambiguity whether it is indexing + the rows or selecting a column) and will be removed in a future version. The equivalent + with ``.loc`` (e.g. ``frame.loc[dtstring]``) is still supported. + +.. ipython:: python + + dft = pd.DataFrame( + np.random.randn(100000, 1), + columns=["A"], + index=pd.date_range("20130101", periods=100000, freq="T"), + ) + dft + dft.loc["2013"] + +This starts on the very first time in the month, and includes the last date and +time for the month: + +.. ipython:: python + + dft["2013-1":"2013-2"] + +This specifies a stop time **that includes all of the times on the last day**: + +.. ipython:: python + + dft["2013-1":"2013-2-28"] + +This specifies an **exact** stop time (and is not the same as the above): + +.. ipython:: python + + dft["2013-1":"2013-2-28 00:00:00"] + +We are stopping on the included end-point as it is part of the index: + +.. ipython:: python + + dft["2013-1-15":"2013-1-15 12:30:00"] + +``DatetimeIndex`` partial string indexing also works on a ``DataFrame`` with a ``MultiIndex``: + +.. ipython:: python + + dft2 = pd.DataFrame( + np.random.randn(20, 1), + columns=["A"], + index=pd.MultiIndex.from_product( + [pd.date_range("20130101", periods=10, freq="12H"), ["a", "b"]] + ), + ) + dft2 + dft2.loc["2013-01-05"] + idx = pd.IndexSlice + dft2 = dft2.swaplevel(0, 1).sort_index() + dft2.loc[idx[:, "2013-01-05"], :] + +.. versionadded:: 0.25.0 + +Slicing with string indexing also honors UTC offset. + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(["2019-01-01"], tz="US/Pacific")) + df + df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"] + +.. _timeseries.slice_vs_exact_match: + +Slice vs. exact match +~~~~~~~~~~~~~~~~~~~~~ + +The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. + +Consider a ``Series`` object with a minute resolution index: + +.. ipython:: python + + series_minute = pd.Series( + [1, 2, 3], + pd.DatetimeIndex( + ["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"] + ), + ) + series_minute.index.resolution + +A timestamp string less accurate than a minute gives a ``Series`` object. + +.. ipython:: python + + series_minute["2011-12-31 23"] + +A timestamp string with minute resolution (or more accurate), gives a scalar instead, i.e. it is not casted to a slice. + +.. ipython:: python + + series_minute["2011-12-31 23:59"] + series_minute["2011-12-31 23:59:00"] + +If index resolution is second, then the minute-accurate timestamp gives a +``Series``. + +.. ipython:: python + + series_second = pd.Series( + [1, 2, 3], + pd.DatetimeIndex( + ["2011-12-31 23:59:59", "2012-01-01 00:00:00", "2012-01-01 00:00:01"] + ), + ) + series_second.index.resolution + series_second["2011-12-31 23:59"] + +If the timestamp string is treated as a slice, it can be used to index ``DataFrame`` with ``.loc[]`` as well. + +.. ipython:: python + + dft_minute = pd.DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6]}, index=series_minute.index + ) + dft_minute.loc["2011-12-31 23"] + + +.. warning:: + + However, if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as the index and there is no column with such name: + + To *always* have unambiguous selection, whether the row is treated as a slice or a single selection, use ``.loc``. + + .. ipython:: python + + dft_minute.loc["2011-12-31 23:59"] + +Note also that ``DatetimeIndex`` resolution cannot be less precise than day. + +.. ipython:: python + + series_monthly = pd.Series( + [1, 2, 3], pd.DatetimeIndex(["2011-12", "2012-01", "2012-02"]) + ) + series_monthly.index.resolution + series_monthly["2011-12"] # returns Series + + +Exact indexing +~~~~~~~~~~~~~~ + +As discussed in previous section, indexing a ``DatetimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. + +These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and ``seconds``, even though they were not explicitly specified (they are ``0``). + +.. ipython:: python + + dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)] + +With no defaults. + +.. ipython:: python + + dft[ + datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime( + 2013, 2, 28, 10, 12, 0 + ) + ] + +Truncating & fancy indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A :meth:`~DataFrame.truncate` convenience function is provided that is similar +to slicing. Note that ``truncate`` assumes a 0 value for any unspecified date +component in a ``DatetimeIndex`` in contrast to slicing which returns any +partially matching dates: + +.. ipython:: python + + rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W") + ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2) + + ts2.truncate(before="2011-11", after="2011-12") + ts2["2011-11":"2011-12"] + +Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency +regularity will result in a ``DatetimeIndex``, although frequency is lost: + +.. ipython:: python + + ts2[[0, 2, 6]].index + +.. _timeseries.components: + +Time/date components +-------------------- + +There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DatetimeIndex``. + +.. csv-table:: + :header: "Property", "Description" + :widths: 15, 65 + + year, "The year of the datetime" + month,"The month of the datetime" + day,"The days of the datetime" + hour,"The hour of the datetime" + minute,"The minutes of the datetime" + second,"The seconds of the datetime" + microsecond,"The microseconds of the datetime" + nanosecond,"The nanoseconds of the datetime" + date,"Returns datetime.date (does not contain timezone information)" + time,"Returns datetime.time (does not contain timezone information)" + timetz,"Returns datetime.time as local time with timezone information" + dayofyear,"The ordinal day of year" + day_of_year,"The ordinal day of year" + weekofyear,"The week ordinal of the year" + week,"The week ordinal of the year" + dayofweek,"The number of the day of the week with Monday=0, Sunday=6" + day_of_week,"The number of the day of the week with Monday=0, Sunday=6" + weekday,"The number of the day of the week with Monday=0, Sunday=6" + quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." + days_in_month,"The number of days in the month of the datetime" + is_month_start,"Logical indicating if first day of month (defined by frequency)" + is_month_end,"Logical indicating if last day of month (defined by frequency)" + is_quarter_start,"Logical indicating if first day of quarter (defined by frequency)" + is_quarter_end,"Logical indicating if last day of quarter (defined by frequency)" + is_year_start,"Logical indicating if first day of year (defined by frequency)" + is_year_end,"Logical indicating if last day of year (defined by frequency)" + is_leap_year,"Logical indicating if the date belongs to a leap year" + +Furthermore, if you have a ``Series`` with datetimelike values, then you can +access these properties via the ``.dt`` accessor, as detailed in the section +on :ref:`.dt accessors`. + +.. versionadded:: 1.1.0 + +You may obtain the year, week and day components of the ISO year from the ISO 8601 standard: + +.. ipython:: python + + idx = pd.date_range(start="2019-12-29", freq="D", periods=4) + idx.isocalendar() + idx.to_series().dt.isocalendar() + +.. _timeseries.offsets: + +DateOffset objects +------------------ + +In the preceding examples, frequency strings (e.g. ``'D'``) were used to specify +a frequency that defined: + +* how the date times in :class:`DatetimeIndex` were spaced when using :meth:`date_range` +* the frequency of a :class:`Period` or :class:`PeriodIndex` + +These frequency strings map to a :class:`DateOffset` object and its subclasses. A :class:`DateOffset` +is similar to a :class:`Timedelta` that represents a duration of time but follows specific calendar duration rules. +For example, a :class:`Timedelta` day will always increment ``datetimes`` by 24 hours, while a :class:`DateOffset` day +will increment ``datetimes`` to the same time the next day whether a day represents 23, 24 or 25 hours due to daylight +savings time. However, all :class:`DateOffset` subclasses that are an hour or smaller +(``Hour``, ``Minute``, ``Second``, ``Milli``, ``Micro``, ``Nano``) behave like +:class:`Timedelta` and respect absolute time. + +The basic :class:`DateOffset` acts similar to ``dateutil.relativedelta`` (`relativedelta documentation`_) +that shifts a date time by the corresponding calendar duration specified. The +arithmetic operator (``+``) can be used to perform the shift. + +.. ipython:: python + + # This particular day contains a day light savings time transition + ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki") + # Respects absolute time + ts + pd.Timedelta(days=1) + # Respects calendar time + ts + pd.DateOffset(days=1) + friday = pd.Timestamp("2018-01-05") + friday.day_name() + # Add 2 business days (Friday --> Tuesday) + two_business_days = 2 * pd.offsets.BDay() + friday + two_business_days + (friday + two_business_days).day_name() + + +Most ``DateOffsets`` have associated frequencies strings, or offset aliases, that can be passed +into ``freq`` keyword arguments. The available date offsets and associated frequency strings can be found below: + +.. csv-table:: + :header: "Date Offset", "Frequency String", "Description" + :widths: 15, 15, 65 + + :class:`~pandas.tseries.offsets.DateOffset`, None, "Generic offset class, defaults to absolute 24 hours" + :class:`~pandas.tseries.offsets.BDay` or :class:`~pandas.tseries.offsets.BusinessDay`, ``'B'``,"business day (weekday)" + :class:`~pandas.tseries.offsets.CDay` or :class:`~pandas.tseries.offsets.CustomBusinessDay`, ``'C'``, "custom business day" + :class:`~pandas.tseries.offsets.Week`, ``'W'``, "one week, optionally anchored on a day of the week" + :class:`~pandas.tseries.offsets.WeekOfMonth`, ``'WOM'``, "the x-th day of the y-th week of each month" + :class:`~pandas.tseries.offsets.LastWeekOfMonth`, ``'LWOM'``, "the x-th day of the last week of each month" + :class:`~pandas.tseries.offsets.MonthEnd`, ``'M'``, "calendar month end" + :class:`~pandas.tseries.offsets.MonthBegin`, ``'MS'``, "calendar month begin" + :class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BM'``, "business month end" + :class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin" + :class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBM'``, "custom business month end" + :class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin" + :class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end" + :class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin" + :class:`~pandas.tseries.offsets.QuarterEnd`, ``'Q'``, "calendar quarter end" + :class:`~pandas.tseries.offsets.QuarterBegin`, ``'QS'``, "calendar quarter begin" + :class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQ``, "business quarter end" + :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin" + :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter" + :class:`~pandas.tseries.offsets.YearEnd`, ``'A'``, "calendar year end" + :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin" + :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end" + :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin" + :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year" + :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday" + :class:`~pandas.tseries.offsets.BusinessHour`, ``'BH'``, "business hour" + :class:`~pandas.tseries.offsets.CustomBusinessHour`, ``'CBH'``, "custom business hour" + :class:`~pandas.tseries.offsets.Day`, ``'D'``, "one absolute day" + :class:`~pandas.tseries.offsets.Hour`, ``'H'``, "one hour" + :class:`~pandas.tseries.offsets.Minute`, ``'T'`` or ``'min'``,"one minute" + :class:`~pandas.tseries.offsets.Second`, ``'S'``, "one second" + :class:`~pandas.tseries.offsets.Milli`, ``'L'`` or ``'ms'``, "one millisecond" + :class:`~pandas.tseries.offsets.Micro`, ``'U'`` or ``'us'``, "one microsecond" + :class:`~pandas.tseries.offsets.Nano`, ``'N'``, "one nanosecond" + +``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback` +methods for moving a date forward or backward respectively to a valid offset +date relative to the offset. For example, business offsets will roll dates +that land on the weekends (Saturday and Sunday) forward to Monday since +business offsets operate on the weekdays. + +.. ipython:: python + + ts = pd.Timestamp("2018-01-06 00:00:00") + ts.day_name() + # BusinessHour's valid offset dates are Monday through Friday + offset = pd.offsets.BusinessHour(start="09:00") + # Bring the date to the closest offset date (Monday) + offset.rollforward(ts) + # Date is brought to the closest offset date first and then the hour is added + ts + offset + +These operations preserve time (hour, minute, etc) information by default. +To reset time to midnight, use :meth:`normalize` before or after applying +the operation (depending on whether you want the time information included +in the operation). + +.. ipython:: python + + ts = pd.Timestamp("2014-01-01 09:00") + day = pd.offsets.Day() + day + ts + (day + ts).normalize() + + ts = pd.Timestamp("2014-01-01 22:00") + hour = pd.offsets.Hour() + hour + ts + (hour + ts).normalize() + (hour + pd.Timestamp("2014-01-01 23:30")).normalize() + +.. _relativedelta documentation: https://dateutil.readthedocs.io/en/stable/relativedelta.html + + +Parametric offsets +~~~~~~~~~~~~~~~~~~ + +Some of the offsets can be "parameterized" when created to result in different +behaviors. For example, the ``Week`` offset for generating weekly data accepts a +``weekday`` parameter which results in the generated dates always lying on a +particular day of the week: + +.. ipython:: python + + d = datetime.datetime(2008, 8, 18, 9, 0) + d + d + pd.offsets.Week() + d + pd.offsets.Week(weekday=4) + (d + pd.offsets.Week(weekday=4)).weekday() + + d - pd.offsets.Week() + +The ``normalize`` option will be effective for addition and subtraction. + +.. ipython:: python + + d + pd.offsets.Week(normalize=True) + d - pd.offsets.Week(normalize=True) + + +Another example is parameterizing ``YearEnd`` with the specific ending month: + +.. ipython:: python + + d + pd.offsets.YearEnd() + d + pd.offsets.YearEnd(month=6) + + +.. _timeseries.offsetseries: + +Using offsets with ``Series`` / ``DatetimeIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to +apply the offset to each element. + +.. ipython:: python + + rng = pd.date_range("2012-01-01", "2012-01-03") + s = pd.Series(rng) + rng + rng + pd.DateOffset(months=2) + s + pd.DateOffset(months=2) + s - pd.DateOffset(months=2) + +If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``, +``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be +used exactly like a ``Timedelta`` - see the +:ref:`Timedelta section` for more examples. + +.. ipython:: python + + s - pd.offsets.Day(2) + td = s - pd.Series(pd.date_range("2011-12-29", "2011-12-31")) + td + td + pd.offsets.Minute(15) + +Note that some offsets (such as ``BQuarterEnd``) do not have a +vectorized implementation. They can still be used but may +calculate significantly slower and will show a ``PerformanceWarning`` + +.. ipython:: python + :okwarning: + + rng + pd.offsets.BQuarterEnd() + + +.. _timeseries.custombusinessdays: + +Custom business days +~~~~~~~~~~~~~~~~~~~~ + +The ``CDay`` or ``CustomBusinessDay`` class provides a parametric +``BusinessDay`` class which can be used to create customized business day +calendars which account for local holidays and local weekend conventions. + +As an interesting example, let's look at Egypt where a Friday-Saturday weekend is observed. + +.. ipython:: python + + weekmask_egypt = "Sun Mon Tue Wed Thu" + + # They also observe International Workers' Day so let's + # add that for a couple of years + + holidays = [ + "2012-05-01", + datetime.datetime(2013, 5, 1), + np.datetime64("2014-05-01"), + ] + bday_egypt = pd.offsets.CustomBusinessDay( + holidays=holidays, + weekmask=weekmask_egypt, + ) + dt = datetime.datetime(2013, 4, 30) + dt + 2 * bday_egypt + +Let's map to the weekday names: + +.. ipython:: python + + dts = pd.date_range(dt, periods=5, freq=bday_egypt) + + pd.Series(dts.weekday, dts).map(pd.Series("Mon Tue Wed Thu Fri Sat Sun".split())) + +Holiday calendars can be used to provide the list of holidays. See the +:ref:`holiday calendar` section for more information. + +.. ipython:: python + + from pandas.tseries.holiday import USFederalHolidayCalendar + + bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar()) + + # Friday before MLK Day + dt = datetime.datetime(2014, 1, 17) + + # Tuesday after MLK Day (Monday is skipped because it's a holiday) + dt + bday_us + +Monthly offsets that respect a certain holiday calendar can be defined +in the usual way. + +.. ipython:: python + + bmth_us = pd.offsets.CustomBusinessMonthBegin(calendar=USFederalHolidayCalendar()) + + # Skip new years + dt = datetime.datetime(2013, 12, 17) + dt + bmth_us + + # Define date index with custom offset + pd.date_range(start="20100101", end="20120101", freq=bmth_us) + +.. note:: + + The frequency string 'C' is used to indicate that a CustomBusinessDay + DateOffset is used, it is important to note that since CustomBusinessDay is + a parameterised type, instances of CustomBusinessDay may differ and this is + not detectable from the 'C' frequency string. The user therefore needs to + ensure that the 'C' frequency string is used consistently within the user's + application. + +.. _timeseries.businesshour: + +Business hour +~~~~~~~~~~~~~ + +The ``BusinessHour`` class provides a business hour representation on ``BusinessDay``, +allowing to use specific start and end times. + +By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours. +Adding ``BusinessHour`` will increment ``Timestamp`` by hourly frequency. +If target ``Timestamp`` is out of business hours, move to the next business hour +then increment it. If the result exceeds the business hours end, the remaining +hours are added to the next business day. + +.. ipython:: python + + bh = pd.offsets.BusinessHour() + bh + + # 2014-08-01 is Friday + pd.Timestamp("2014-08-01 10:00").weekday() + pd.Timestamp("2014-08-01 10:00") + bh + + # Below example is the same as: pd.Timestamp('2014-08-01 09:00') + bh + pd.Timestamp("2014-08-01 08:00") + bh + + # If the results is on the end time, move to the next business day + pd.Timestamp("2014-08-01 16:00") + bh + + # Remainings are added to the next day + pd.Timestamp("2014-08-01 16:30") + bh + + # Adding 2 business hours + pd.Timestamp("2014-08-01 10:00") + pd.offsets.BusinessHour(2) + + # Subtracting 3 business hours + pd.Timestamp("2014-08-01 10:00") + pd.offsets.BusinessHour(-3) + +You can also specify ``start`` and ``end`` time by keywords. The argument must +be a ``str`` with an ``hour:minute`` representation or a ``datetime.time`` +instance. Specifying seconds, microseconds and nanoseconds as business hour +results in ``ValueError``. + +.. ipython:: python + + bh = pd.offsets.BusinessHour(start="11:00", end=datetime.time(20, 0)) + bh + + pd.Timestamp("2014-08-01 13:00") + bh + pd.Timestamp("2014-08-01 09:00") + bh + pd.Timestamp("2014-08-01 18:00") + bh + +Passing ``start`` time later than ``end`` represents midnight business hour. +In this case, business hour exceeds midnight and overlap to the next day. +Valid business hours are distinguished by whether it started from valid ``BusinessDay``. + +.. ipython:: python + + bh = pd.offsets.BusinessHour(start="17:00", end="09:00") + bh + + pd.Timestamp("2014-08-01 17:00") + bh + pd.Timestamp("2014-08-01 23:00") + bh + + # Although 2014-08-02 is Saturday, + # it is valid because it starts from 08-01 (Friday). + pd.Timestamp("2014-08-02 04:00") + bh + + # Although 2014-08-04 is Monday, + # it is out of business hours because it starts from 08-03 (Sunday). + pd.Timestamp("2014-08-04 04:00") + bh + +Applying ``BusinessHour.rollforward`` and ``rollback`` to out of business hours results in +the next business hour start or previous day's end. Different from other offsets, ``BusinessHour.rollforward`` +may output different results from ``apply`` by definition. + +This is because one day's business hour end is equal to next day's business hour start. For example, +under the default business hours (9:00 - 17:00), there is no gap (0 minutes) between ``2014-08-01 17:00`` and +``2014-08-04 09:00``. + +.. ipython:: python + + # This adjusts a Timestamp to business hour edge + pd.offsets.BusinessHour().rollback(pd.Timestamp("2014-08-02 15:00")) + pd.offsets.BusinessHour().rollforward(pd.Timestamp("2014-08-02 15:00")) + + # It is the same as BusinessHour() + pd.Timestamp('2014-08-01 17:00'). + # And it is the same as BusinessHour() + pd.Timestamp('2014-08-04 09:00') + pd.offsets.BusinessHour() + pd.Timestamp("2014-08-02 15:00") + + # BusinessDay results (for reference) + pd.offsets.BusinessHour().rollforward(pd.Timestamp("2014-08-02")) + + # It is the same as BusinessDay() + pd.Timestamp('2014-08-01') + # The result is the same as rollworward because BusinessDay never overlap. + pd.offsets.BusinessHour() + pd.Timestamp("2014-08-02") + +``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary +holidays, you can use ``CustomBusinessHour`` offset, as explained in the +following subsection. + +.. _timeseries.custombusinesshour: + +Custom business hour +~~~~~~~~~~~~~~~~~~~~ + +The ``CustomBusinessHour`` is a mixture of ``BusinessHour`` and ``CustomBusinessDay`` which +allows you to specify arbitrary holidays. ``CustomBusinessHour`` works as the same +as ``BusinessHour`` except that it skips specified custom holidays. + +.. ipython:: python + + from pandas.tseries.holiday import USFederalHolidayCalendar + + bhour_us = pd.offsets.CustomBusinessHour(calendar=USFederalHolidayCalendar()) + # Friday before MLK Day + dt = datetime.datetime(2014, 1, 17, 15) + + dt + bhour_us + + # Tuesday after MLK Day (Monday is skipped because it's a holiday) + dt + bhour_us * 2 + +You can use keyword arguments supported by either ``BusinessHour`` and ``CustomBusinessDay``. + +.. ipython:: python + + bhour_mon = pd.offsets.CustomBusinessHour(start="10:00", weekmask="Tue Wed Thu Fri") + + # Monday is skipped because it's a holiday, business hour starts from 10:00 + dt + bhour_mon * 2 + +.. _timeseries.offset_aliases: + +Offset aliases +~~~~~~~~~~~~~~ + +A number of string aliases are given to useful common time series +frequencies. We will refer to these aliases as *offset aliases*. + +.. csv-table:: + :header: "Alias", "Description" + :widths: 15, 100 + + "B", "business day frequency" + "C", "custom business day frequency" + "D", "calendar day frequency" + "W", "weekly frequency" + "M", "month end frequency" + "SM", "semi-month end frequency (15th and end of month)" + "BM", "business month end frequency" + "CBM", "custom business month end frequency" + "MS", "month start frequency" + "SMS", "semi-month start frequency (1st and 15th)" + "BMS", "business month start frequency" + "CBMS", "custom business month start frequency" + "Q", "quarter end frequency" + "BQ", "business quarter end frequency" + "QS", "quarter start frequency" + "BQS", "business quarter start frequency" + "A, Y", "year end frequency" + "BA, BY", "business year end frequency" + "AS, YS", "year start frequency" + "BAS, BYS", "business year start frequency" + "BH", "business hour frequency" + "H", "hourly frequency" + "T, min", "minutely frequency" + "S", "secondly frequency" + "L, ms", "milliseconds" + "U, us", "microseconds" + "N", "nanoseconds" + +.. note:: + + When using the offset aliases above, it should be noted that functions + such as :func:`date_range`, :func:`bdate_range`, will only return + timestamps that are in the interval defined by ``start_date`` and + ``end_date``. If the ``start_date`` does not correspond to the frequency, + the returned timestamps will start at the next valid timestamp, same for + ``end_date``, the returned timestamps will stop at the previous valid + timestamp. + + For example, for the offset ``MS``, if the ``start_date`` is not the first + of the month, the returned timestamps will start with the first day of the + next month. If ``end_date`` is not the first day of a month, the last + returned timestamp will be the first day of the corresponding month. + + .. ipython:: python + + dates_lst_1 = pd.date_range("2020-01-06", "2020-04-03", freq="MS") + dates_lst_1 + + dates_lst_2 = pd.date_range("2020-01-01", "2020-04-01", freq="MS") + dates_lst_2 + + We can see in the above example :func:`date_range` and + :func:`bdate_range` will only return the valid timestamps between the + ``start_date`` and ``end_date``. If these are not valid timestamps for the + given frequency it will roll to the next value for ``start_date`` + (respectively previous for the ``end_date``) + + +Combining aliases +~~~~~~~~~~~~~~~~~ + +As we have seen previously, the alias and the offset instance are fungible in +most functions: + +.. ipython:: python + + pd.date_range(start, periods=5, freq="B") + + pd.date_range(start, periods=5, freq=pd.offsets.BDay()) + +You can combine together day and intraday offsets: + +.. ipython:: python + + pd.date_range(start, periods=10, freq="2h20min") + + pd.date_range(start, periods=10, freq="1D10U") + +Anchored offsets +~~~~~~~~~~~~~~~~ + +For some frequencies you can specify an anchoring suffix: + +.. csv-table:: + :header: "Alias", "Description" + :widths: 15, 100 + + "W\-SUN", "weekly frequency (Sundays). Same as 'W'" + "W\-MON", "weekly frequency (Mondays)" + "W\-TUE", "weekly frequency (Tuesdays)" + "W\-WED", "weekly frequency (Wednesdays)" + "W\-THU", "weekly frequency (Thursdays)" + "W\-FRI", "weekly frequency (Fridays)" + "W\-SAT", "weekly frequency (Saturdays)" + "(B)Q(S)\-DEC", "quarterly frequency, year ends in December. Same as 'Q'" + "(B)Q(S)\-JAN", "quarterly frequency, year ends in January" + "(B)Q(S)\-FEB", "quarterly frequency, year ends in February" + "(B)Q(S)\-MAR", "quarterly frequency, year ends in March" + "(B)Q(S)\-APR", "quarterly frequency, year ends in April" + "(B)Q(S)\-MAY", "quarterly frequency, year ends in May" + "(B)Q(S)\-JUN", "quarterly frequency, year ends in June" + "(B)Q(S)\-JUL", "quarterly frequency, year ends in July" + "(B)Q(S)\-AUG", "quarterly frequency, year ends in August" + "(B)Q(S)\-SEP", "quarterly frequency, year ends in September" + "(B)Q(S)\-OCT", "quarterly frequency, year ends in October" + "(B)Q(S)\-NOV", "quarterly frequency, year ends in November" + "(B)A(S)\-DEC", "annual frequency, anchored end of December. Same as 'A'" + "(B)A(S)\-JAN", "annual frequency, anchored end of January" + "(B)A(S)\-FEB", "annual frequency, anchored end of February" + "(B)A(S)\-MAR", "annual frequency, anchored end of March" + "(B)A(S)\-APR", "annual frequency, anchored end of April" + "(B)A(S)\-MAY", "annual frequency, anchored end of May" + "(B)A(S)\-JUN", "annual frequency, anchored end of June" + "(B)A(S)\-JUL", "annual frequency, anchored end of July" + "(B)A(S)\-AUG", "annual frequency, anchored end of August" + "(B)A(S)\-SEP", "annual frequency, anchored end of September" + "(B)A(S)\-OCT", "annual frequency, anchored end of October" + "(B)A(S)\-NOV", "annual frequency, anchored end of November" + +These can be used as arguments to ``date_range``, ``bdate_range``, constructors +for ``DatetimeIndex``, as well as various other timeseries-related functions +in pandas. + +Anchored offset semantics +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For those offsets that are anchored to the start or end of specific +frequency (``MonthEnd``, ``MonthBegin``, ``WeekEnd``, etc), the following +rules apply to rolling forward and backwards. + +When ``n`` is not 0, if the given date is not on an anchor point, it snapped to the next(previous) +anchor point, and moved ``|n|-1`` additional steps forwards or backwards. + +.. ipython:: python + + pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=1) + pd.Timestamp("2014-01-02") + pd.offsets.MonthEnd(n=1) + + pd.Timestamp("2014-01-02") - pd.offsets.MonthBegin(n=1) + pd.Timestamp("2014-01-02") - pd.offsets.MonthEnd(n=1) + + pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=4) + pd.Timestamp("2014-01-02") - pd.offsets.MonthBegin(n=4) + +If the given date *is* on an anchor point, it is moved ``|n|`` points forwards +or backwards. + +.. ipython:: python + + pd.Timestamp("2014-01-01") + pd.offsets.MonthBegin(n=1) + pd.Timestamp("2014-01-31") + pd.offsets.MonthEnd(n=1) + + pd.Timestamp("2014-01-01") - pd.offsets.MonthBegin(n=1) + pd.Timestamp("2014-01-31") - pd.offsets.MonthEnd(n=1) + + pd.Timestamp("2014-01-01") + pd.offsets.MonthBegin(n=4) + pd.Timestamp("2014-01-31") - pd.offsets.MonthBegin(n=4) + +For the case when ``n=0``, the date is not moved if on an anchor point, otherwise +it is rolled forward to the next anchor point. + +.. ipython:: python + + pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=0) + pd.Timestamp("2014-01-02") + pd.offsets.MonthEnd(n=0) + + pd.Timestamp("2014-01-01") + pd.offsets.MonthBegin(n=0) + pd.Timestamp("2014-01-31") + pd.offsets.MonthEnd(n=0) + +.. _timeseries.holiday: + +Holidays / holiday calendars +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Holidays and calendars provide a simple way to define holiday rules to be used +with ``CustomBusinessDay`` or in other analysis that requires a predefined +set of holidays. The ``AbstractHolidayCalendar`` class provides all the necessary +methods to return a list of holidays and only ``rules`` need to be defined +in a specific holiday calendar class. Furthermore, the ``start_date`` and ``end_date`` +class attributes determine over what date range holidays are generated. These +should be overwritten on the ``AbstractHolidayCalendar`` class to have the range +apply to all calendar subclasses. ``USFederalHolidayCalendar`` is the +only calendar that exists and primarily serves as an example for developing +other calendars. + +For holidays that occur on fixed dates (e.g., US Memorial Day or July 4th) an +observance rule determines when that holiday is observed if it falls on a weekend +or some other non-observed day. Defined observance rules are: + +.. csv-table:: + :header: "Rule", "Description" + :widths: 15, 70 + + "nearest_workday", "move Saturday to Friday and Sunday to Monday" + "sunday_to_monday", "move Sunday to following Monday" + "next_monday_or_tuesday", "move Saturday to Monday and Sunday/Monday to Tuesday" + "previous_friday", move Saturday and Sunday to previous Friday" + "next_monday", "move Saturday and Sunday to following Monday" + +An example of how holidays and holiday calendars are defined: + +.. ipython:: python + + from pandas.tseries.holiday import ( + Holiday, + USMemorialDay, + AbstractHolidayCalendar, + nearest_workday, + MO, + ) + + class ExampleCalendar(AbstractHolidayCalendar): + rules = [ + USMemorialDay, + Holiday("July 4th", month=7, day=4, observance=nearest_workday), + Holiday( + "Columbus Day", + month=10, + day=1, + offset=pd.DateOffset(weekday=MO(2)), + ), + ] + + cal = ExampleCalendar() + cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31)) + +:hint: + **weekday=MO(2)** is same as **2 * Week(weekday=2)** + +Using this calendar, creating an index or doing offset arithmetic skips weekends +and holidays (i.e., Memorial Day/July 4th). For example, the below defines +a custom business day offset using the ``ExampleCalendar``. Like any other offset, +it can be used to create a ``DatetimeIndex`` or added to ``datetime`` +or ``Timestamp`` objects. + +.. ipython:: python + + pd.date_range( + start="7/1/2012", end="7/10/2012", freq=pd.offsets.CDay(calendar=cal) + ).to_pydatetime() + offset = pd.offsets.CustomBusinessDay(calendar=cal) + datetime.datetime(2012, 5, 25) + offset + datetime.datetime(2012, 7, 3) + offset + datetime.datetime(2012, 7, 3) + 2 * offset + datetime.datetime(2012, 7, 6) + offset + +Ranges are defined by the ``start_date`` and ``end_date`` class attributes +of ``AbstractHolidayCalendar``. The defaults are shown below. + +.. ipython:: python + + AbstractHolidayCalendar.start_date + AbstractHolidayCalendar.end_date + +These dates can be overwritten by setting the attributes as +datetime/Timestamp/string. + +.. ipython:: python + + AbstractHolidayCalendar.start_date = datetime.datetime(2012, 1, 1) + AbstractHolidayCalendar.end_date = datetime.datetime(2012, 12, 31) + cal.holidays() + +Every calendar class is accessible by name using the ``get_calendar`` function +which returns a holiday class instance. Any imported calendar class will +automatically be available by this function. Also, ``HolidayCalendarFactory`` +provides an easy interface to create calendars that are combinations of calendars +or calendars with additional rules. + +.. ipython:: python + + from pandas.tseries.holiday import get_calendar, HolidayCalendarFactory, USLaborDay + + cal = get_calendar("ExampleCalendar") + cal.rules + new_cal = HolidayCalendarFactory("NewExampleCalendar", cal, USLaborDay) + new_cal.rules + +.. _timeseries.advanced_datetime: + +Time Series-related instance methods +------------------------------------ + +Shifting / lagging +~~~~~~~~~~~~~~~~~~ + +One may want to *shift* or *lag* the values in a time series back and forward in +time. The method for this is :meth:`~Series.shift`, which is available on all of +the pandas objects. + +.. ipython:: python + + ts = pd.Series(range(len(rng)), index=rng) + ts = ts[:5] + ts.shift(1) + +The ``shift`` method accepts an ``freq`` argument which can accept a +``DateOffset`` class or other ``timedelta``-like object or also an +:ref:`offset alias `. + +When ``freq`` is specified, ``shift`` method changes all the dates in the index +rather than changing the alignment of the data and the index: + +.. ipython:: python + + ts.shift(5, freq="D") + ts.shift(5, freq=pd.offsets.BDay()) + ts.shift(5, freq="BM") + +Note that with when ``freq`` is specified, the leading entry is no longer NaN +because the data is not being realigned. + +Frequency conversion +~~~~~~~~~~~~~~~~~~~~ + +The primary function for changing frequencies is the :meth:`~Series.asfreq` +method. For a ``DatetimeIndex``, this is basically just a thin, but convenient +wrapper around :meth:`~Series.reindex` which generates a ``date_range`` and +calls ``reindex``. + +.. ipython:: python + + dr = pd.date_range("1/1/2010", periods=3, freq=3 * pd.offsets.BDay()) + ts = pd.Series(np.random.randn(3), index=dr) + ts + ts.asfreq(pd.offsets.BDay()) + +``asfreq`` provides a further convenience so you can specify an interpolation +method for any gaps that may appear after the frequency conversion. + +.. ipython:: python + + ts.asfreq(pd.offsets.BDay(), method="pad") + +Filling forward / backward +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is +documented in the :ref:`missing data section `. + +Converting to Python datetimes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DatetimeIndex`` can be converted to an array of Python native +:py:class:`datetime.datetime` objects using the ``to_pydatetime`` method. + +.. _timeseries.resampling: + +Resampling +---------- + +pandas has a simple, powerful, and efficient functionality for performing +resampling operations during frequency conversion (e.g., converting secondly +data into 5-minutely data). This is extremely common in, but not limited to, +financial applications. + +:meth:`~Series.resample` is a time-based groupby, followed by a reduction method +on each of its groups. See some :ref:`cookbook examples ` for +some advanced strategies. + +The ``resample()`` method can be used directly from ``DataFrameGroupBy`` objects, +see the :ref:`groupby docs `. + +Basics +~~~~~~ + +.. ipython:: python + + rng = pd.date_range("1/1/2012", periods=100, freq="S") + + ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) + + ts.resample("5Min").sum() + +The ``resample`` function is very flexible and allows you to specify many +different parameters to control the frequency conversion and resampling +operation. + +Any function available via :ref:`dispatching ` is available as +a method of the returned object, including ``sum``, ``mean``, ``std``, ``sem``, +``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``: + +.. ipython:: python + + ts.resample("5Min").mean() + + ts.resample("5Min").ohlc() + + ts.resample("5Min").max() + + +For downsampling, ``closed`` can be set to 'left' or 'right' to specify which +end of the interval is closed: + +.. ipython:: python + + ts.resample("5Min", closed="right").mean() + + ts.resample("5Min", closed="left").mean() + +Parameters like ``label`` are used to manipulate the resulting labels. +``label`` specifies whether the result is labeled with the beginning or +the end of the interval. + +.. ipython:: python + + ts.resample("5Min").mean() # by default label='left' + + ts.resample("5Min", label="left").mean() + +.. warning:: + + The default values for ``label`` and ``closed`` is '**left**' for all + frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' + which all have a default of 'right'. + + This might unintendedly lead to looking ahead, where the value for a later + time is pulled back to a previous time as in the following example with + the :class:`~pandas.tseries.offsets.BusinessDay` frequency: + + .. ipython:: python + + s = pd.date_range("2000-01-01", "2000-01-05").to_series() + s.iloc[2] = pd.NaT + s.dt.day_name() + + # default: label='left', closed='left' + s.resample("B").last().dt.day_name() + + Notice how the value for Sunday got pulled back to the previous Friday. + To get the behavior where the value for Sunday is pushed to Monday, use + instead + + .. ipython:: python + + s.resample("B", label="right", closed="right").last().dt.day_name() + +The ``axis`` parameter can be set to 0 or 1 and allows you to resample the +specified axis for a ``DataFrame``. + +``kind`` can be set to 'timestamp' or 'period' to convert the resulting index +to/from timestamp and time span representations. By default ``resample`` +retains the input representation. + +``convention`` can be set to 'start' or 'end' when resampling period data +(detail below). It specifies how low frequency periods are converted to higher +frequency periods. + + +Upsampling +~~~~~~~~~~ + +For upsampling, you can specify a way to upsample and the ``limit`` parameter to interpolate over the gaps that are created: + +.. ipython:: python + + # from secondly to every 250 milliseconds + + ts[:2].resample("250L").asfreq() + + ts[:2].resample("250L").ffill() + + ts[:2].resample("250L").ffill(limit=2) + +Sparse resampling +~~~~~~~~~~~~~~~~~ + +Sparse timeseries are the ones where you have a lot fewer points relative +to the amount of time you are looking to resample. Naively upsampling a sparse +series can potentially generate lots of intermediate values. When you don't want +to use a method to fill these values, e.g. ``fill_method`` is ``None``, then +intermediate values will be filled with ``NaN``. + +Since ``resample`` is a time-based groupby, the following is a method to efficiently +resample only the groups that are not all ``NaN``. + +.. ipython:: python + + rng = pd.date_range("2014-1-1", periods=100, freq="D") + pd.Timedelta("1s") + ts = pd.Series(range(100), index=rng) + +If we want to resample to the full range of the series: + +.. ipython:: python + + ts.resample("3T").sum() + +We can instead only resample those groups where we have points as follows: + +.. ipython:: python + + from functools import partial + from pandas.tseries.frequencies import to_offset + + def round(t, freq): + # round a Timestamp to a specified freq + freq = to_offset(freq) + return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value) + + ts.groupby(partial(round, freq="3T")).sum() + +.. _timeseries.aggregate: + +Aggregation +~~~~~~~~~~~ + +Similar to the :ref:`aggregating API `, :ref:`groupby API `, and the :ref:`window API `, +a ``Resampler`` can be selectively resampled. + +Resampling a ``DataFrame``, the default will be to act on all columns with the same function. + +.. ipython:: python + + df = pd.DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000), + columns=["A", "B", "C"], + ) + r = df.resample("3T") + r.mean() + +We can select a specific column or columns using standard getitem. + +.. ipython:: python + + r["A"].mean() + + r[["A", "B"]].mean() + +You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``: + +.. ipython:: python + + r["A"].agg([np.sum, np.mean, np.std]) + +On a resampled ``DataFrame``, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + r.agg([np.sum, np.mean]) + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a ``DataFrame``: + +.. ipython:: python + :okexcept: + + r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be implemented on the resampled object: + +.. ipython:: python + + r.agg({"A": "sum", "B": "std"}) + +Furthermore, you can also specify multiple aggregation functions for each column separately. + +.. ipython:: python + + r.agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + + +If a ``DataFrame`` does not have a datetimelike index, but instead you want +to resample based on datetimelike column in the frame, it can passed to the +``on`` keyword. + +.. ipython:: python + + df = pd.DataFrame( + {"date": pd.date_range("2015-01-01", freq="W", periods=5), "a": np.arange(5)}, + index=pd.MultiIndex.from_arrays( + [[1, 2, 3, 4, 5], pd.date_range("2015-01-01", freq="W", periods=5)], + names=["v", "d"], + ), + ) + df + df.resample("M", on="date")[["a"]].sum() + +Similarly, if you instead want to resample by a datetimelike +level of ``MultiIndex``, its name or location can be passed to the +``level`` keyword. + +.. ipython:: python + + df.resample("M", level="d")[["a"]].sum() + +.. _timeseries.iterating-label: + +Iterating through groups +~~~~~~~~~~~~~~~~~~~~~~~~ + +With the ``Resampler`` object in hand, iterating through the grouped data is very +natural and functions similarly to :py:func:`itertools.groupby`: + +.. ipython:: python + + small = pd.Series( + range(6), + index=pd.to_datetime( + [ + "2017-01-01T00:00:00", + "2017-01-01T00:30:00", + "2017-01-01T00:31:00", + "2017-01-01T01:00:00", + "2017-01-01T03:00:00", + "2017-01-01T03:05:00", + ] + ), + ) + resampled = small.resample("H") + + for name, group in resampled: + print("Group: ", name) + print("-" * 27) + print(group, end="\n\n") + +See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. + +.. _timeseries.adjust-the-start-of-the-bins: + +Use ``origin`` or ``offset`` to adjust the start of the bins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like ``30D``) or that divide a day evenly (like ``90s`` or ``1min``). This can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can specify a fixed Timestamp with the argument ``origin``. + +For example: + +.. ipython:: python + + start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" + middle = "2000-10-02 00:00:00" + rng = pd.date_range(start, end, freq="7min") + ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + ts + +Here we can see that, when using ``origin`` with its default value (``'start_day'``), the result after ``'2000-10-02 00:00:00'`` are not identical depending on the start of time series: + +.. ipython:: python + + ts.resample("17min", origin="start_day").sum() + ts[middle:end].resample("17min", origin="start_day").sum() + + +Here we can see that, when setting ``origin`` to ``'epoch'``, the result after ``'2000-10-02 00:00:00'`` are identical depending on the start of time series: + +.. ipython:: python + + ts.resample("17min", origin="epoch").sum() + ts[middle:end].resample("17min", origin="epoch").sum() + + +If needed you can use a custom timestamp for ``origin``: + +.. ipython:: python + + ts.resample("17min", origin="2001-01-01").sum() + ts[middle:end].resample("17min", origin=pd.Timestamp("2001-01-01")).sum() + +If needed you can just adjust the bins with an ``offset`` Timedelta that would be added to the default ``origin``. +Those two examples are equivalent for this time series: + +.. ipython:: python + + ts.resample("17min", origin="start").sum() + ts.resample("17min", offset="23h30min").sum() + + +Note the use of ``'start'`` for ``origin`` on the last example. In that case, ``origin`` will be set to the first value of the timeseries. + +Backward resample +~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.3.0 + +Instead of adjusting the beginning of bins, sometimes we need to fix the end of the bins to make a backward resample with a given ``freq``. The backward resample sets ``closed`` to ``'right'`` by default since the last value should be considered as the edge point for the last bin. + +We can set ``origin`` to ``'end'``. The value for a specific ``Timestamp`` index stands for the resample result from the current ``Timestamp`` minus ``freq`` to the current ``Timestamp`` with a right close. + +.. ipython:: python + + ts.resample('17min', origin='end').sum() + +Besides, in contrast with the ``'start_day'`` option, ``end_day`` is supported. This will set the origin as the ceiling midnight of the largest ``Timestamp``. + +.. ipython:: python + + ts.resample('17min', origin='end_day').sum() + +The above result uses ``2000-10-02 00:29:00`` as the last bin's right edge since the following computation. + +.. ipython:: python + + ceil_mid = rng.max().ceil('D') + freq = pd.offsets.Minute(17) + bin_res = ceil_mid - freq * ((ceil_mid - rng.max()) // freq) + bin_res + +.. _timeseries.periods: + +Time span representation +------------------------ + +Regular intervals of time are represented by ``Period`` objects in pandas while +sequences of ``Period`` objects are collected in a ``PeriodIndex``, which can +be created with the convenience function ``period_range``. + +Period +~~~~~~ + +A ``Period`` represents a span of time (e.g., a day, a month, a quarter, etc). +You can specify the span via ``freq`` keyword using a frequency alias like below. +Because ``freq`` represents a span of ``Period``, it cannot be negative like "-3D". + +.. ipython:: python + + pd.Period("2012", freq="A-DEC") + + pd.Period("2012-1-1", freq="D") + + pd.Period("2012-1-1 19:00", freq="H") + + pd.Period("2012-1-1 19:00", freq="5H") + +Adding and subtracting integers from periods shifts the period by its own +frequency. Arithmetic is not allowed between ``Period`` with different ``freq`` (span). + +.. ipython:: python + + p = pd.Period("2012", freq="A-DEC") + p + 1 + p - 3 + p = pd.Period("2012-01", freq="2M") + p + 2 + p - 1 + p == pd.Period("2012-01", freq="3M") + + +If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``), ``offsets`` and ``timedelta``-like can be added if the result can have the same freq. Otherwise, ``ValueError`` will be raised. + +.. ipython:: python + + p = pd.Period("2014-07-01 09:00", freq="H") + p + pd.offsets.Hour(2) + p + datetime.timedelta(minutes=120) + p + np.timedelta64(7200, "s") + +.. code-block:: ipython + + In [1]: p + pd.offsets.Minute(5) + Traceback + ... + ValueError: Input has different freq from Period(freq=H) + +If ``Period`` has other frequencies, only the same ``offsets`` can be added. Otherwise, ``ValueError`` will be raised. + +.. ipython:: python + + p = pd.Period("2014-07", freq="M") + p + pd.offsets.MonthEnd(3) + +.. code-block:: ipython + + In [1]: p + pd.offsets.MonthBegin(3) + Traceback + ... + ValueError: Input has different freq from Period(freq=M) + +Taking the difference of ``Period`` instances with the same frequency will +return the number of frequency units between them: + +.. ipython:: python + + pd.Period("2012", freq="A-DEC") - pd.Period("2002", freq="A-DEC") + +PeriodIndex and period_range +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Regular sequences of ``Period`` objects can be collected in a ``PeriodIndex``, +which can be constructed using the ``period_range`` convenience function: + +.. ipython:: python + + prng = pd.period_range("1/1/2011", "1/1/2012", freq="M") + prng + +The ``PeriodIndex`` constructor can also be used directly: + +.. ipython:: python + + pd.PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") + +Passing multiplied frequency outputs a sequence of ``Period`` which +has multiplied span. + +.. ipython:: python + + pd.period_range(start="2014-01", freq="3M", periods=4) + +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.period_range( + start=pd.Period("2017Q1", freq="Q"), end=pd.Period("2017Q2", freq="Q"), freq="M" + ) + +Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas +objects: + +.. ipython:: python + + ps = pd.Series(np.random.randn(len(prng)), prng) + ps + +``PeriodIndex`` supports addition and subtraction with the same rule as ``Period``. + +.. ipython:: python + + idx = pd.period_range("2014-07-01 09:00", periods=5, freq="H") + idx + idx + pd.offsets.Hour(2) + + idx = pd.period_range("2014-07", periods=5, freq="M") + idx + idx + pd.offsets.MonthEnd(3) + +``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes `. + +.. _timeseries.period_dtype: + +Period dtypes +~~~~~~~~~~~~~ + +``PeriodIndex`` has a custom ``period`` dtype. This is a pandas extension +dtype similar to the :ref:`timezone aware dtype ` (``datetime64[ns, tz]``). + +The ``period`` dtype holds the ``freq`` attribute and is represented with +``period[freq]`` like ``period[D]`` or ``period[M]``, using :ref:`frequency strings `. + +.. ipython:: python + + pi = pd.period_range("2016-01-01", periods=3, freq="M") + pi + pi.dtype + +The ``period`` dtype can be used in ``.astype(...)``. It allows one to change the +``freq`` of a ``PeriodIndex`` like ``.asfreq()`` and convert a +``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``: + +.. ipython:: python + + # change monthly freq to daily freq + pi.astype("period[D]") + + # convert to DatetimeIndex + pi.astype("datetime64[ns]") + + # convert to PeriodIndex + dti = pd.date_range("2011-01-01", freq="M", periods=3) + dti + dti.astype("period[M]") + +PeriodIndex partial string indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PeriodIndex now supports partial string slicing with non-monotonic indexes. + +.. versionadded:: 1.1.0 + +You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodIndex``, in the same manner as ``DatetimeIndex``. For details, refer to :ref:`DatetimeIndex Partial String Indexing `. + +.. ipython:: python + + ps["2011-01"] + + ps[datetime.datetime(2011, 12, 25):] + + ps["10/31/2011":"12/31/2011"] + +Passing a string representing a lower frequency than ``PeriodIndex`` returns partial sliced data. + +.. ipython:: python + + ps["2011"] + + dfp = pd.DataFrame( + np.random.randn(600, 1), + columns=["A"], + index=pd.period_range("2013-01-01 9:00", periods=600, freq="T"), + ) + dfp + dfp.loc["2013-01-01 10H"] + +As with ``DatetimeIndex``, the endpoints will be included in the result. The example below slices data starting from 10:00 to 11:59. + +.. ipython:: python + + dfp["2013-01-01 10H":"2013-01-01 11H"] + + +Frequency conversion and resampling with PeriodIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The frequency of ``Period`` and ``PeriodIndex`` can be converted via the ``asfreq`` +method. Let's start with the fiscal year 2011, ending in December: + +.. ipython:: python + + p = pd.Period("2011", freq="A-DEC") + p + +We can convert it to a monthly frequency. Using the ``how`` parameter, we can +specify whether to return the starting or ending month: + +.. ipython:: python + + p.asfreq("M", how="start") + + p.asfreq("M", how="end") + +The shorthands 's' and 'e' are provided for convenience: + +.. ipython:: python + + p.asfreq("M", "s") + p.asfreq("M", "e") + +Converting to a "super-period" (e.g., annual frequency is a super-period of +quarterly frequency) automatically returns the super-period that includes the +input period: + +.. ipython:: python + + p = pd.Period("2011-12", freq="M") + + p.asfreq("A-NOV") + +Note that since we converted to an annual frequency that ends the year in +November, the monthly period of December 2011 is actually in the 2012 A-NOV +period. + +.. _timeseries.quarterly: + +Period conversions with anchored frequencies are particularly useful for +working with various quarterly data common to economics, business, and other +fields. Many organizations define quarters relative to the month in which their +fiscal year starts and ends. Thus, first quarter of 2011 could start in 2010 or +a few months into 2011. Via anchored frequencies, pandas works for all quarterly +frequencies ``Q-JAN`` through ``Q-DEC``. + +``Q-DEC`` define regular calendar quarters: + +.. ipython:: python + + p = pd.Period("2012Q1", freq="Q-DEC") + + p.asfreq("D", "s") + + p.asfreq("D", "e") + +``Q-MAR`` defines fiscal year end in March: + +.. ipython:: python + + p = pd.Period("2011Q4", freq="Q-MAR") + + p.asfreq("D", "s") + + p.asfreq("D", "e") + +.. _timeseries.interchange: + +Converting between representations +---------------------------------- + +Timestamped data can be converted to PeriodIndex-ed data using ``to_period`` +and vice-versa using ``to_timestamp``: + +.. ipython:: python + + rng = pd.date_range("1/1/2012", periods=5, freq="M") + + ts = pd.Series(np.random.randn(len(rng)), index=rng) + + ts + + ps = ts.to_period() + + ps + + ps.to_timestamp() + +Remember that 's' and 'e' can be used to return the timestamps at the start or +end of the period: + +.. ipython:: python + + ps.to_timestamp("D", how="s") + +Converting between period and timestamp enables some convenient arithmetic +functions to be used. In the following example, we convert a quarterly +frequency with year ending in November to 9am of the end of the month following +the quarter end: + +.. ipython:: python + + prng = pd.period_range("1990Q1", "2000Q4", freq="Q-NOV") + + ts = pd.Series(np.random.randn(len(prng)), prng) + + ts.index = (prng.asfreq("M", "e") + 1).asfreq("H", "s") + 9 + + ts.head() + +.. _timeseries.oob: + +Representing out-of-bounds spans +-------------------------------- + +If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, +then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do computations. + +.. ipython:: python + + span = pd.period_range("1215-01-01", "1381-01-01", freq="D") + span + +To convert from an ``int64`` based YYYYMMDD representation. + +.. ipython:: python + + s = pd.Series([20121231, 20141130, 99991231]) + s + + def conv(x): + return pd.Period(year=x // 10000, month=x // 100 % 100, day=x % 100, freq="D") + + s.apply(conv) + s.apply(conv)[2] + +These can easily be converted to a ``PeriodIndex``: + +.. ipython:: python + + span = pd.PeriodIndex(s.apply(conv)) + span + +.. _timeseries.timezone: + +Time zone handling +------------------ + +pandas provides rich support for working with timestamps in different time +zones using the ``pytz`` and ``dateutil`` libraries or :class:`datetime.timezone` +objects from the standard library. + + +Working with time zones +~~~~~~~~~~~~~~~~~~~~~~~ + +By default, pandas objects are time zone unaware: + +.. ipython:: python + + rng = pd.date_range("3/6/2012 00:00", periods=15, freq="D") + rng.tz is None + +To localize these dates to a time zone (assign a particular time zone to a naive date), +you can use the ``tz_localize`` method or the ``tz`` keyword argument in +:func:`date_range`, :class:`Timestamp`, or :class:`DatetimeIndex`. +You can either pass ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings. +Olson time zone strings will return ``pytz`` time zone objects by default. +To return ``dateutil`` time zone objects, append ``dateutil/`` before the string. + +* In ``pytz`` you can find a list of common (and less common) time zones using + ``from pytz import common_timezones, all_timezones``. +* ``dateutil`` uses the OS time zones so there isn't a fixed list available. For + common zones, the names are the same as ``pytz``. + +.. ipython:: python + + import dateutil + + # pytz + rng_pytz = pd.date_range("3/6/2012 00:00", periods=3, freq="D", tz="Europe/London") + rng_pytz.tz + + # dateutil + rng_dateutil = pd.date_range("3/6/2012 00:00", periods=3, freq="D") + rng_dateutil = rng_dateutil.tz_localize("dateutil/Europe/London") + rng_dateutil.tz + + # dateutil - utc special case + rng_utc = pd.date_range( + "3/6/2012 00:00", + periods=3, + freq="D", + tz=dateutil.tz.tzutc(), + ) + rng_utc.tz + +.. versionadded:: 0.25.0 + +.. ipython:: python + + # datetime.timezone + rng_utc = pd.date_range( + "3/6/2012 00:00", + periods=3, + freq="D", + tz=datetime.timezone.utc, + ) + rng_utc.tz + +Note that the ``UTC`` time zone is a special case in ``dateutil`` and should be constructed explicitly +as an instance of ``dateutil.tz.tzutc``. You can also construct other time +zones objects explicitly first. + +.. ipython:: python + + import pytz + + # pytz + tz_pytz = pytz.timezone("Europe/London") + rng_pytz = pd.date_range("3/6/2012 00:00", periods=3, freq="D") + rng_pytz = rng_pytz.tz_localize(tz_pytz) + rng_pytz.tz == tz_pytz + + # dateutil + tz_dateutil = dateutil.tz.gettz("Europe/London") + rng_dateutil = pd.date_range("3/6/2012 00:00", periods=3, freq="D", tz=tz_dateutil) + rng_dateutil.tz == tz_dateutil + +To convert a time zone aware pandas object from one time zone to another, +you can use the ``tz_convert`` method. + +.. ipython:: python + + rng_pytz.tz_convert("US/Eastern") + +.. note:: + + When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different + time zone object than a :class:`Timestamp` for the same time zone input. A :class:`DatetimeIndex` + can hold a collection of :class:`Timestamp` objects that may have different UTC offsets and cannot be + succinctly represented by one ``pytz`` time zone instance while one :class:`Timestamp` + represents one point in time with a specific UTC offset. + + .. ipython:: python + + dti = pd.date_range("2019-01-01", periods=3, freq="D", tz="US/Pacific") + dti.tz + ts = pd.Timestamp("2019-01-01", tz="US/Pacific") + ts.tz + +.. warning:: + + Be wary of conversions between libraries. For some time zones, ``pytz`` and ``dateutil`` have different + definitions of the zone. This is more of a problem for unusual time zones than for + 'standard' zones like ``US/Eastern``. + +.. warning:: + + Be aware that a time zone definition across versions of time zone libraries may not + be considered equal. This may cause problems when working with stored data that + is localized using one version and operated on with a different version. + See :ref:`here` for how to handle such a situation. + +.. warning:: + + For ``pytz`` time zones, it is incorrect to pass a time zone object directly into + the ``datetime.datetime`` constructor + (e.g., ``datetime.datetime(2011, 1, 1, tzinfo=pytz.timezone('US/Eastern'))``. + Instead, the datetime needs to be localized using the ``localize`` method + on the ``pytz`` time zone object. + +.. warning:: + + Be aware that for times in the future, correct conversion between time zones + (and UTC) cannot be guaranteed by any time zone library because a timezone's + offset from UTC may be changed by the respective government. + +.. warning:: + + If you are using dates beyond 2038-01-18, due to current deficiencies + in the underlying libraries caused by the year 2038 problem, daylight saving time (DST) adjustments + to timezone aware dates will not be applied. If and when the underlying libraries are fixed, + the DST transitions will be applied. + + For example, for two dates that are in British Summer Time (and so would normally be GMT+1), both the following asserts evaluate as true: + + .. ipython:: python + + d_2037 = "2037-03-31T010101" + d_2038 = "2038-03-31T010101" + DST = "Europe/London" + assert pd.Timestamp(d_2037, tz=DST) != pd.Timestamp(d_2037, tz="GMT") + assert pd.Timestamp(d_2038, tz=DST) == pd.Timestamp(d_2038, tz="GMT") + +Under the hood, all timestamps are stored in UTC. Values from a time zone aware +:class:`DatetimeIndex` or :class:`Timestamp` will have their fields (day, hour, minute, etc.) +localized to the time zone. However, timestamps with the same UTC value are +still considered to be equal even if they are in different time zones: + +.. ipython:: python + + rng_eastern = rng_utc.tz_convert("US/Eastern") + rng_berlin = rng_utc.tz_convert("Europe/Berlin") + + rng_eastern[2] + rng_berlin[2] + rng_eastern[2] == rng_berlin[2] + +Operations between :class:`Series` in different time zones will yield UTC +:class:`Series`, aligning the data on the UTC timestamps: + +.. ipython:: python + + ts_utc = pd.Series(range(3), pd.date_range("20130101", periods=3, tz="UTC")) + eastern = ts_utc.tz_convert("US/Eastern") + berlin = ts_utc.tz_convert("Europe/Berlin") + result = eastern + berlin + result + result.index + +To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None)``. +``tz_localize(None)`` will remove the time zone yielding the local time representation. +``tz_convert(None)`` will remove the time zone after converting to UTC time. + +.. ipython:: python + + didx = pd.date_range(start="2014-08-01 09:00", freq="H", periods=3, tz="US/Eastern") + didx + didx.tz_localize(None) + didx.tz_convert(None) + + # tz_convert(None) is identical to tz_convert('UTC').tz_localize(None) + didx.tz_convert("UTC").tz_localize(None) + +.. _timeseries.fold: + +Fold +~~~~ + +.. versionadded:: 1.1.0 + +For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. +Due to daylight saving time, one wall clock time can occur twice when shifting +from summer to winter time; fold describes whether the datetime-like corresponds +to the first (0) or the second time (1) the wall clock hits the ambiguous time. +Fold is supported only for constructing from naive ``datetime.datetime`` +(see `datetime documentation `__ for details) or from :class:`Timestamp` +or for constructing from components (see below). Only ``dateutil`` timezones are supported +(see `dateutil documentation `__ +for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` +timezones do not support fold (see `pytz documentation `__ +for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime +with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely +on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct +control over how they are handled. + +.. ipython:: python + + pd.Timestamp( + datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz="dateutil/Europe/London", + fold=0, + ) + pd.Timestamp( + year=2019, + month=10, + day=27, + hour=1, + minute=30, + tz="dateutil/Europe/London", + fold=1, + ) + +.. _timeseries.timezone_ambiguous: + +Ambiguous times when localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``tz_localize`` may not be able to determine the UTC offset of a timestamp +because daylight savings time (DST) in a local time zone causes some times to occur +twice within one day ("clocks fall back"). The following options are available: + +* ``'raise'``: Raises a ``pytz.AmbiguousTimeError`` (the default behavior) +* ``'infer'``: Attempt to determine the correct offset base on the monotonicity of the timestamps +* ``'NaT'``: Replaces ambiguous times with ``NaT`` +* ``bool``: ``True`` represents a DST time, ``False`` represents non-DST time. An array-like of ``bool`` values is supported for a sequence of times. + +.. ipython:: python + + rng_hourly = pd.DatetimeIndex( + ["11/06/2011 00:00", "11/06/2011 01:00", "11/06/2011 01:00", "11/06/2011 02:00"] + ) + +This will fail as there are ambiguous times (``'11/06/2011 01:00'``) + +.. code-block:: ipython + + In [2]: rng_hourly.tz_localize('US/Eastern') + AmbiguousTimeError: Cannot infer dst time from Timestamp('2011-11-06 01:00:00'), try using the 'ambiguous' argument + +Handle these ambiguous times by specifying the following. + +.. ipython:: python + + rng_hourly.tz_localize("US/Eastern", ambiguous="infer") + rng_hourly.tz_localize("US/Eastern", ambiguous="NaT") + rng_hourly.tz_localize("US/Eastern", ambiguous=[True, True, False, False]) + +.. _timeseries.timezone_nonexistent: + +Nonexistent times when localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A DST transition may also shift the local time ahead by 1 hour creating nonexistent +local times ("clocks spring forward"). The behavior of localizing a timeseries with nonexistent times +can be controlled by the ``nonexistent`` argument. The following options are available: + +* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``'NaT'``: Replaces nonexistent times with ``NaT`` +* ``'shift_forward'``: Shifts nonexistent times forward to the closest real time +* ``'shift_backward'``: Shifts nonexistent times backward to the closest real time +* timedelta object: Shifts nonexistent times by the timedelta duration + +.. ipython:: python + + dti = pd.date_range(start="2015-03-29 02:30:00", periods=3, freq="H") + # 2:30 is a nonexistent time + +Localization of nonexistent times will raise an error by default. + +.. code-block:: ipython + + In [2]: dti.tz_localize('Europe/Warsaw') + NonExistentTimeError: 2015-03-29 02:30:00 + +Transform nonexistent times to ``NaT`` or shift the times. + +.. ipython:: python + + dti + dti.tz_localize("Europe/Warsaw", nonexistent="shift_forward") + dti.tz_localize("Europe/Warsaw", nonexistent="shift_backward") + dti.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta(1, unit="H")) + dti.tz_localize("Europe/Warsaw", nonexistent="NaT") + + +.. _timeseries.timezone_series: + +Time zone Series operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A :class:`Series` with time zone **naive** values is +represented with a dtype of ``datetime64[ns]``. + +.. ipython:: python + + s_naive = pd.Series(pd.date_range("20130101", periods=3)) + s_naive + +A :class:`Series` with a time zone **aware** values is +represented with a dtype of ``datetime64[ns, tz]`` where ``tz`` is the time zone + +.. ipython:: python + + s_aware = pd.Series(pd.date_range("20130101", periods=3, tz="US/Eastern")) + s_aware + +Both of these :class:`Series` time zone information +can be manipulated via the ``.dt`` accessor, see :ref:`the dt accessor section `. + +For example, to localize and convert a naive stamp to time zone aware. + +.. ipython:: python + + s_naive.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") + +Time zone information can also be manipulated using the ``astype`` method. +This method can convert between different timezone-aware dtypes. + +.. ipython:: python + + # convert to a new time zone + s_aware.astype("datetime64[ns, CET]") + +.. note:: + + Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data. + NumPy does not currently support time zones (even though it is *printing* in the local time zone!), + therefore an object array of Timestamps is returned for time zone aware data: + + .. ipython:: python + + s_naive.to_numpy() + s_aware.to_numpy() + + By converting to an object array of Timestamps, it preserves the time zone + information. For example, when converting back to a Series: + + .. ipython:: python + + pd.Series(s_aware.to_numpy()) + + However, if you want an actual NumPy ``datetime64[ns]`` array (with the values + converted to UTC) instead of an array of objects, you can specify the + ``dtype`` argument: + + .. ipython:: python + + s_aware.to_numpy(dtype="datetime64[ns]") diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst new file mode 100644 index 00000000..147981f2 --- /dev/null +++ b/doc/source/user_guide/visualization.rst @@ -0,0 +1,1837 @@ +.. _visualization: + +{{ header }} + +******************* +Chart visualization +******************* + + +.. note:: + + The examples below assume that you're using `Jupyter `_. + +This section demonstrates visualization through charting. For information on +visualization of tabular data please see the section on `Table Visualization `_. + +We use the standard convention for referencing the matplotlib API: + +.. ipython:: python + + import matplotlib.pyplot as plt + + plt.close("all") + +We provide the basics in pandas to easily create decent looking plots. +See the :ref:`ecosystem ` section for visualization +libraries that go beyond the basics documented here. + +.. note:: + + All calls to ``np.random`` are seeded with 123456. + +.. _visualization.basic: + +Basic plotting: ``plot`` +------------------------ + +We will demonstrate the basics, see the :ref:`cookbook` for +some advanced strategies. + +The ``plot`` method on Series and DataFrame is just a simple wrapper around +:meth:`plt.plot() `: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) + ts = ts.cumsum() + + @savefig series_plot_basic.png + ts.plot(); + +If the index consists of dates, it calls :meth:`gcf().autofmt_xdate() ` +to try to format the x-axis nicely as per above. + +On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the columns with labels: + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list("ABCD")) + df = df.cumsum() + + plt.figure(); + @savefig frame_plot_basic.png + df.plot(); + +You can plot one column versus another using the ``x`` and ``y`` keywords in +:meth:`~DataFrame.plot`: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df3 = pd.DataFrame(np.random.randn(1000, 2), columns=["B", "C"]).cumsum() + df3["A"] = pd.Series(list(range(len(df)))) + + @savefig df_plot_xy.png + df3.plot(x="A", y="B"); + +.. note:: + + For more formatting and styling options, see + :ref:`formatting ` below. + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.other: + +Other plots +----------- + +Plotting methods allow for a handful of plot styles other than the +default line plot. These methods can be provided as the ``kind`` +keyword argument to :meth:`~DataFrame.plot`, and include: + +* :ref:`'bar' ` or :ref:`'barh' ` for bar plots +* :ref:`'hist' ` for histogram +* :ref:`'box' ` for boxplot +* :ref:`'kde' ` or :ref:`'density' ` for density plots +* :ref:`'area' ` for area plots +* :ref:`'scatter' ` for scatter plots +* :ref:`'hexbin' ` for hexagonal bin plots +* :ref:`'pie' ` for pie plots + +For example, a bar plot can be created the following way: + +.. ipython:: python + + plt.figure(); + + @savefig bar_plot_ex.png + df.iloc[5].plot(kind="bar"); + +You can also create these other plots using the methods ``DataFrame.plot.`` instead of providing the ``kind`` keyword argument. This makes it easier to discover plot methods and the specific arguments they use: + +.. ipython:: + :verbatim: + + In [14]: df = pd.DataFrame() + + In [15]: df.plot. # noqa: E225, E999 + df.plot.area df.plot.barh df.plot.density df.plot.hist df.plot.line df.plot.scatter + df.plot.bar df.plot.box df.plot.hexbin df.plot.kde df.plot.pie + +In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, +and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. + +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` +that take a :class:`Series` or :class:`DataFrame` as an argument. These +include: + +* :ref:`Scatter Matrix ` +* :ref:`Andrews Curves ` +* :ref:`Parallel Coordinates ` +* :ref:`Lag Plot ` +* :ref:`Autocorrelation Plot ` +* :ref:`Bootstrap Plot ` +* :ref:`RadViz ` + +Plots may also be adorned with :ref:`errorbars ` +or :ref:`tables `. + +.. _visualization.barplot: + +Bar plots +~~~~~~~~~ + +For labeled, non-time series data, you may wish to produce a bar plot: + +.. ipython:: python + + plt.figure(); + + @savefig bar_plot_ex.png + df.iloc[5].plot.bar(); + plt.axhline(0, color="k"); + +Calling a DataFrame's :meth:`plot.bar() ` method produces a multiple +bar plot: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df2 = pd.DataFrame(np.random.rand(10, 4), columns=["a", "b", "c", "d"]) + + @savefig bar_plot_multi_ex.png + df2.plot.bar(); + +To produce a stacked bar plot, pass ``stacked=True``: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + +.. ipython:: python + + @savefig bar_plot_stacked_ex.png + df2.plot.bar(stacked=True); + +To get horizontal bar plots, use the ``barh`` method: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + +.. ipython:: python + + @savefig barh_plot_stacked_ex.png + df2.plot.barh(stacked=True); + +.. _visualization.hist: + +Histograms +~~~~~~~~~~ + +Histograms can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. + +.. ipython:: python + + df4 = pd.DataFrame( + { + "a": np.random.randn(1000) + 1, + "b": np.random.randn(1000), + "c": np.random.randn(1000) - 1, + }, + columns=["a", "b", "c"], + ) + + plt.figure(); + + @savefig hist_new.png + df4.plot.hist(alpha=0.5); + + +.. ipython:: python + :suppress: + + plt.close("all") + +A histogram can be stacked using ``stacked=True``. Bin size can be changed +using the ``bins`` keyword. + +.. ipython:: python + + plt.figure(); + + @savefig hist_new_stacked.png + df4.plot.hist(stacked=True, bins=20); + +.. ipython:: python + :suppress: + + plt.close("all") + +You can pass other keywords supported by matplotlib ``hist``. For example, +horizontal and cumulative histograms can be drawn by +``orientation='horizontal'`` and ``cumulative=True``. + +.. ipython:: python + + plt.figure(); + + @savefig hist_new_kwargs.png + df4["a"].plot.hist(orientation="horizontal", cumulative=True); + +.. ipython:: python + :suppress: + + plt.close("all") + +See the :meth:`hist ` method and the +`matplotlib hist documentation `__ for more. + + +The existing interface ``DataFrame.hist`` to plot histogram still can be used. + +.. ipython:: python + + plt.figure(); + + @savefig hist_plot_ex.png + df["A"].diff().hist(); + +.. ipython:: python + :suppress: + + plt.close("all") + +:meth:`DataFrame.hist` plots the histograms of the columns on multiple +subplots: + +.. ipython:: python + + plt.figure(); + + @savefig frame_hist_ex.png + df.diff().hist(color="k", alpha=0.5, bins=50); + + +The ``by`` keyword can be specified to plot grouped histograms: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + data = pd.Series(np.random.randn(1000)) + + @savefig grouped_hist.png + data.hist(by=np.random.randint(0, 4, 1000), figsize=(6, 4)); + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +In addition, the ``by`` keyword can also be specified in :meth:`DataFrame.plot.hist`. + +.. versionchanged:: 1.4.0 + +.. ipython:: python + + data = pd.DataFrame( + { + "a": np.random.choice(["x", "y", "z"], 1000), + "b": np.random.choice(["e", "f", "g"], 1000), + "c": np.random.randn(1000), + "d": np.random.randn(1000) - 1, + }, + ) + + @savefig grouped_hist_by.png + data.plot.hist(by=["a", "b"], figsize=(10, 5)); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.box: + +Box plots +~~~~~~~~~ + +Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`, +or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column. + +For instance, here is a boxplot representing five trials of 10 observations of +a uniform random variable on [0,1). + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) + + @savefig box_plot_new.png + df.plot.box(); + +Boxplot can be colorized by passing ``color`` keyword. You can pass a ``dict`` +whose keys are ``boxes``, ``whiskers``, ``medians`` and ``caps``. +If some keys are missing in the ``dict``, default colors are used +for the corresponding artists. Also, boxplot has ``sym`` keyword to specify fliers style. + +When you pass other type of arguments via ``color`` keyword, it will be directly +passed to matplotlib for all the ``boxes``, ``whiskers``, ``medians`` and ``caps`` +colorization. + +The colors are applied to every boxes to be drawn. If you want +more complicated colorization, you can get each drawn artists by passing +:ref:`return_type `. + +.. ipython:: python + + color = { + "boxes": "DarkGreen", + "whiskers": "DarkOrange", + "medians": "DarkBlue", + "caps": "Gray", + } + + @savefig box_new_colorize.png + df.plot.box(color=color, sym="r+"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Also, you can pass other keywords supported by matplotlib ``boxplot``. +For example, horizontal and custom-positioned boxplot can be drawn by +``vert=False`` and ``positions`` keywords. + +.. ipython:: python + + @savefig box_new_kwargs.png + df.plot.box(vert=False, positions=[1, 4, 5, 6, 8]); + + +See the :meth:`boxplot ` method and the +`matplotlib boxplot documentation `__ for more. + + +The existing interface ``DataFrame.boxplot`` to plot boxplot still can be used. + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 5)) + plt.figure(); + + @savefig box_plot_ex.png + bp = df.boxplot() + +You can create a stratified boxplot using the ``by`` keyword argument to create +groupings. For instance, + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) + + plt.figure(); + + @savefig box_plot_ex2.png + bp = df.boxplot(by="X") + +You can also pass a subset of columns to plot, as well as group by multiple +columns: + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 3), columns=["Col1", "Col2", "Col3"]) + df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) + df["Y"] = pd.Series(["A", "B", "A", "B", "A", "B", "A", "B", "A", "B"]) + + plt.figure(); + + @savefig box_plot_ex3.png + bp = df.boxplot(column=["Col1", "Col2"], by=["X", "Y"]) + +.. ipython:: python + :suppress: + + plt.close("all") + +You could also create groupings with :meth:`DataFrame.plot.box`, for instance: + +.. versionchanged:: 1.4.0 + +.. ipython:: python + :suppress: + + plt.close("all") + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 3), columns=["Col1", "Col2", "Col3"]) + df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) + + plt.figure(); + + @savefig box_plot_ex4.png + bp = df.plot.box(column=["Col1", "Col2"], by="X") + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.box.return: + +In ``boxplot``, the return type can be controlled by the ``return_type``, keyword. The valid choices are ``{"axes", "dict", "both", None}``. +Faceting, created by ``DataFrame.boxplot`` with the ``by`` +keyword, will affect the output type as well: + +================ ======= ========================== +``return_type`` Faceted Output type +================ ======= ========================== +``None`` No axes +``None`` Yes 2-D ndarray of axes +``'axes'`` No axes +``'axes'`` Yes Series of axes +``'dict'`` No dict of artists +``'dict'`` Yes Series of dicts of artists +``'both'`` No namedtuple +``'both'`` Yes Series of namedtuples +================ ======= ========================== + +``Groupby.boxplot`` always returns a ``Series`` of ``return_type``. + +.. ipython:: python + :okwarning: + + np.random.seed(1234) + df_box = pd.DataFrame(np.random.randn(50, 2)) + df_box["g"] = np.random.choice(["A", "B"], size=50) + df_box.loc[df_box["g"] == "B", 1] += 3 + + @savefig boxplot_groupby.png + bp = df_box.boxplot(by="g") + +.. ipython:: python + :suppress: + + plt.close("all") + +The subplots above are split by the numeric columns first, then the value of +the ``g`` column. Below the subplots are first split by the value of ``g``, +then by the numeric columns. + +.. ipython:: python + :okwarning: + + @savefig groupby_boxplot_vis.png + bp = df_box.groupby("g").boxplot() + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.area_plot: + +Area plot +~~~~~~~~~ + +You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. +Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. + +When input data contains ``NaN``, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:`dataframe.dropna` or :func:`dataframe.fillna` before calling ``plot``. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(10, 4), columns=["a", "b", "c", "d"]) + + @savefig area_plot_stacked.png + df.plot.area(); + +To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 unless otherwise specified: + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + +.. ipython:: python + + @savefig area_plot_unstacked.png + df.plot.area(stacked=False); + +.. _visualization.scatter: + +Scatter plot +~~~~~~~~~~~~ + +Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. +Scatter plot requires numeric columns for the x and y axes. +These can be specified by the ``x`` and ``y`` keywords. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.close("all") + plt.figure() + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(50, 4), columns=["a", "b", "c", "d"]) + df["species"] = pd.Categorical( + ["setosa"] * 20 + ["versicolor"] * 20 + ["virginica"] * 10 + ) + + @savefig scatter_plot.png + df.plot.scatter(x="a", y="b"); + +To plot multiple column groups in a single axes, repeat ``plot`` method specifying target ``ax``. +It is recommended to specify ``color`` and ``label`` keywords to distinguish each groups. + +.. ipython:: python + :okwarning: + + ax = df.plot.scatter(x="a", y="b", color="DarkBlue", label="Group 1") + @savefig scatter_plot_repeated.png + df.plot.scatter(x="c", y="d", color="DarkGreen", label="Group 2", ax=ax); + +.. ipython:: python + :suppress: + + plt.close("all") + +The keyword ``c`` may be given as the name of a column to provide colors for +each point: + +.. ipython:: python + + @savefig scatter_plot_colored.png + df.plot.scatter(x="a", y="b", c="c", s=50); + + +.. ipython:: python + :suppress: + + plt.close("all") + +If a categorical column is passed to ``c``, then a discrete colorbar will be produced: + +.. versionadded:: 1.3.0 + +.. ipython:: python + + @savefig scatter_plot_categorical.png + df.plot.scatter(x="a", y="b", c="species", cmap="viridis", s=50); + + +.. ipython:: python + :suppress: + + plt.close("all") + +You can pass other keywords supported by matplotlib +:meth:`scatter `. The example below shows a +bubble chart using a column of the ``DataFrame`` as the bubble size. + +.. ipython:: python + + @savefig scatter_plot_bubble.png + df.plot.scatter(x="a", y="b", s=df["c"] * 200); + +.. ipython:: python + :suppress: + + plt.close("all") + +See the :meth:`scatter ` method and the +`matplotlib scatter documentation `__ for more. + +.. _visualization.hexbin: + +Hexagonal bin plot +~~~~~~~~~~~~~~~~~~ + +You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. +Hexbin plots can be a useful alternative to scatter plots if your data are +too dense to plot each point individually. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 2), columns=["a", "b"]) + df["b"] = df["b"] + np.arange(1000) + + @savefig hexbin_plot.png + df.plot.hexbin(x="a", y="b", gridsize=25); + + +A useful keyword argument is ``gridsize``; it controls the number of hexagons +in the x-direction, and defaults to 100. A larger ``gridsize`` means more, smaller +bins. + +By default, a histogram of the counts around each ``(x, y)`` point is computed. +You can specify alternative aggregations by passing values to the ``C`` and +``reduce_C_function`` arguments. ``C`` specifies the value at each ``(x, y)`` point +and ``reduce_C_function`` is a function of one argument that reduces all the +values in a bin to a single number (e.g. ``mean``, ``max``, ``sum``, ``std``). In this +example the positions are given by columns ``a`` and ``b``, while the value is +given by column ``z``. The bins are aggregated with NumPy's ``max`` function. + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 2), columns=["a", "b"]) + df["b"] = df["b"] + np.arange(1000) + df["z"] = np.random.uniform(0, 3, 1000) + + @savefig hexbin_plot_agg.png + df.plot.hexbin(x="a", y="b", C="z", reduce_C_function=np.max, gridsize=25); + +.. ipython:: python + :suppress: + + plt.close("all") + +See the :meth:`hexbin ` method and the +`matplotlib hexbin documentation `__ for more. + +.. _visualization.pie: + +Pie plot +~~~~~~~~ + +You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. +If your data includes any ``NaN``, they will be automatically filled with 0. +A ``ValueError`` will be raised if there are any negative values in your data. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + :okwarning: + + series = pd.Series(3 * np.random.rand(4), index=["a", "b", "c", "d"], name="series") + + @savefig series_pie_plot.png + series.plot.pie(figsize=(6, 6)); + +.. ipython:: python + :suppress: + + plt.close("all") + +For pie plots it's best to use square figures, i.e. a figure aspect ratio 1. +You can create the figure with equal width and height, or force the aspect ratio +to be equal after plotting by calling ``ax.set_aspect('equal')`` on the returned +``axes`` object. + +Note that pie plot with :class:`DataFrame` requires that you either specify a +target column by the ``y`` argument or ``subplots=True``. When ``y`` is +specified, pie plot of selected column will be drawn. If ``subplots=True`` is +specified, pie plots for each column are drawn as subplots. A legend will be +drawn in each pie plots by default; specify ``legend=False`` to hide it. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + + df = pd.DataFrame( + 3 * np.random.rand(4, 2), index=["a", "b", "c", "d"], columns=["x", "y"] + ) + + @savefig df_pie_plot.png + df.plot.pie(subplots=True, figsize=(8, 4)); + +.. ipython:: python + :suppress: + + plt.close("all") + +You can use the ``labels`` and ``colors`` keywords to specify the labels and colors of each wedge. + +.. warning:: + + Most pandas plots use the ``label`` and ``color`` arguments (note the lack of "s" on those). + To be consistent with :func:`matplotlib.pyplot.pie` you must use ``labels`` and ``colors``. + +If you want to hide wedge labels, specify ``labels=None``. +If ``fontsize`` is specified, the value will be applied to wedge labels. +Also, other keywords supported by :func:`matplotlib.pyplot.pie` can be used. + + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + @savefig series_pie_plot_options.png + series.plot.pie( + labels=["AA", "BB", "CC", "DD"], + colors=["r", "g", "b", "c"], + autopct="%.2f", + fontsize=20, + figsize=(6, 6), + ); + +If you pass values whose sum total is less than 1.0 they will be rescaled so that they sum to 1. + +.. ipython:: python + :suppress: + + plt.close("all") + plt.figure() + +.. ipython:: python + :okwarning: + + series = pd.Series([0.1] * 4, index=["a", "b", "c", "d"], name="series2") + + @savefig series_pie_plot_semi.png + series.plot.pie(figsize=(6, 6)); + +See the `matplotlib pie documentation `__ for more. + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.missing_data: + +Plotting with missing data +-------------------------- + +pandas tries to be pragmatic about plotting ``DataFrames`` or ``Series`` +that contain missing data. Missing values are dropped, left out, or filled +depending on the plot type. + ++----------------+--------------------------------------+ +| Plot Type | NaN Handling | ++================+======================================+ +| Line | Leave gaps at NaNs | ++----------------+--------------------------------------+ +| Line (stacked) | Fill 0's | ++----------------+--------------------------------------+ +| Bar | Fill 0's | ++----------------+--------------------------------------+ +| Scatter | Drop NaNs | ++----------------+--------------------------------------+ +| Histogram | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Box | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Area | Fill 0's | ++----------------+--------------------------------------+ +| KDE | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Hexbin | Drop NaNs | ++----------------+--------------------------------------+ +| Pie | Fill 0's | ++----------------+--------------------------------------+ + +If any of these defaults are not what you want, or if you want to be +explicit about how missing values are handled, consider using +:meth:`~pandas.DataFrame.fillna` or :meth:`~pandas.DataFrame.dropna` +before plotting. + +.. _visualization.tools: + +Plotting tools +-------------- + +These functions can be imported from ``pandas.plotting`` +and take a :class:`Series` or :class:`DataFrame` as an argument. + +.. _visualization.scatter_matrix: + +Scatter matrix plot +~~~~~~~~~~~~~~~~~~~ + +You can create a scatter plot matrix using the +``scatter_matrix`` method in ``pandas.plotting``: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import scatter_matrix + + df = pd.DataFrame(np.random.randn(1000, 4), columns=["a", "b", "c", "d"]) + + @savefig scatter_matrix_kde.png + scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal="kde"); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.kde: + +Density plot +~~~~~~~~~~~~ + +You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + ser = pd.Series(np.random.randn(1000)) + + @savefig kde_plot.png + ser.plot.kde(); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.andrews_curves: + +Andrews curves +~~~~~~~~~~~~~~ + +Andrews curves allow one to plot multivariate data as a large number +of curves that are created using the attributes of samples as coefficients +for Fourier series, see the `Wikipedia entry `__ +for more information. By coloring these curves differently for each class +it is possible to visualize data clustering. Curves belonging to samples +of the same class will usually be closer together and form larger structures. + +**Note**: The "Iris" dataset is available `here `__. + +.. ipython:: python + + from pandas.plotting import andrews_curves + + data = pd.read_csv("data/iris.data") + + plt.figure(); + + @savefig andrews_curves.png + andrews_curves(data, "Name"); + +.. _visualization.parallel_coordinates: + +Parallel coordinates +~~~~~~~~~~~~~~~~~~~~ + +Parallel coordinates is a plotting technique for plotting multivariate data, +see the `Wikipedia entry `__ +for an introduction. +Parallel coordinates allows one to see clusters in data and to estimate other statistics visually. +Using parallel coordinates points are represented as connected line segments. +Each vertical line represents one attribute. One set of connected line segments +represents one data point. Points that tend to cluster will appear closer together. + +.. ipython:: python + + from pandas.plotting import parallel_coordinates + + data = pd.read_csv("data/iris.data") + + plt.figure(); + + @savefig parallel_coordinates.png + parallel_coordinates(data, "Name"); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.lag: + +Lag plot +~~~~~~~~ + +Lag plots are used to check if a data set or time series is random. Random +data should not exhibit any structure in the lag plot. Non-random structure +implies that the underlying data are not random. The ``lag`` argument may +be passed, and when ``lag=1`` the plot is essentially ``data[:-1]`` vs. +``data[1:]``. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import lag_plot + + plt.figure(); + + spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000) + data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing)) + + @savefig lag_plot.png + lag_plot(data); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.autocorrelation: + +Autocorrelation plot +~~~~~~~~~~~~~~~~~~~~ + +Autocorrelation plots are often used for checking randomness in time series. +This is done by computing autocorrelations for data values at varying time lags. +If time series is random, such autocorrelations should be near zero for any and +all time-lag separations. If time series is non-random then one or more of the +autocorrelations will be significantly non-zero. The horizontal lines displayed +in the plot correspond to 95% and 99% confidence bands. The dashed line is 99% +confidence band. See the +`Wikipedia entry `__ for more about +autocorrelation plots. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import autocorrelation_plot + + plt.figure(); + + spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) + data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) + + @savefig autocorrelation_plot.png + autocorrelation_plot(data); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.bootstrap: + +Bootstrap plot +~~~~~~~~~~~~~~ + +Bootstrap plots are used to visually assess the uncertainty of a statistic, such +as mean, median, midrange, etc. A random subset of a specified size is selected +from a data set, the statistic in question is computed for this subset and the +process is repeated a specified number of times. Resulting plots and histograms +are what constitutes the bootstrap plot. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import bootstrap_plot + + data = pd.Series(np.random.rand(1000)) + + @savefig bootstrap_plot.png + bootstrap_plot(data, size=50, samples=500, color="grey"); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.radviz: + +RadViz +~~~~~~ + +RadViz is a way of visualizing multi-variate data. It is based on a simple +spring tension minimization algorithm. Basically you set up a bunch of points in +a plane. In our case they are equally spaced on a unit circle. Each point +represents a single attribute. You then pretend that each sample in the data set +is attached to each of these points by a spring, the stiffness of which is +proportional to the numerical value of that attribute (they are normalized to +unit interval). The point in the plane, where our sample settles to (where the +forces acting on our sample are at an equilibrium) is where a dot representing +our sample will be drawn. Depending on which class that sample belongs it will +be colored differently. +See the R package `Radviz `__ +for more information. + +**Note**: The "Iris" dataset is available `here `__. + +.. ipython:: python + + from pandas.plotting import radviz + + data = pd.read_csv("data/iris.data") + + plt.figure(); + + @savefig radviz.png + radviz(data, "Name"); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.formatting: + +Plot formatting +--------------- + +Setting the plot style +~~~~~~~~~~~~~~~~~~~~~~ + +From version 1.5 and up, matplotlib offers a range of pre-configured plotting styles. Setting the +style can be used to easily give plots the general look that you want. +Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before +creating your plot. For example you could write ``matplotlib.style.use('ggplot')`` for ggplot-style +plots. + +You can see the various available style names at ``matplotlib.style.available`` and it's very +easy to try them out. + +General plot style arguments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most plotting methods have a set of keyword arguments that control the +layout and formatting of the returned plot: + +.. ipython:: python + + plt.figure(); + @savefig series_plot_basic2.png + ts.plot(style="k--", label="Series"); + +.. ipython:: python + :suppress: + + plt.close("all") + +For each kind of plot (e.g. ``line``, ``bar``, ``scatter``) any additional arguments +keywords are passed along to the corresponding matplotlib function +(:meth:`ax.plot() `, +:meth:`ax.bar() `, +:meth:`ax.scatter() `). These can be used +to control additional styling, beyond what pandas provides. + +Controlling the legend +~~~~~~~~~~~~~~~~~~~~~~ + +You may set the ``legend`` argument to ``False`` to hide the legend, which is +shown by default. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list("ABCD")) + df = df.cumsum() + + @savefig frame_plot_basic_noleg.png + df.plot(legend=False); + +.. ipython:: python + :suppress: + + plt.close("all") + + +Controlling the labels +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labels +for x and y axis. By default, pandas will pick up index name as xlabel, while leaving +it empty for ylabel. + +.. ipython:: python + :suppress: + + plt.figure(); + +.. ipython:: python + + df.plot(); + + @savefig plot_xlabel_ylabel.png + df.plot(xlabel="new x", ylabel="new y"); + +.. ipython:: python + :suppress: + + plt.close("all") + + +Scales +~~~~~~ + +You may pass ``logy`` to get a log-scale Y axis. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) + ts = np.exp(ts.cumsum()) + + @savefig series_plot_logy.png + ts.plot(logy=True); + +.. ipython:: python + :suppress: + + plt.close("all") + +See also the ``logx`` and ``loglog`` keyword arguments. + +Plotting on a secondary y-axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To plot data on a secondary y-axis, use the ``secondary_y`` keyword: + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + df["A"].plot(); + + @savefig series_plot_secondary_y.png + df["B"].plot(secondary_y=True, style="g"); + +.. ipython:: python + :suppress: + + plt.close("all") + +To plot some columns in a ``DataFrame``, give the column names to the ``secondary_y`` +keyword: + +.. ipython:: python + + plt.figure(); + ax = df.plot(secondary_y=["A", "B"]) + ax.set_ylabel("CD scale"); + @savefig frame_plot_secondary_y.png + ax.right_ax.set_ylabel("AB scale"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Note that the columns plotted on the secondary y-axis is automatically marked +with "(right)" in the legend. To turn off the automatic marking, use the +``mark_right=False`` keyword: + +.. ipython:: python + + plt.figure(); + + @savefig frame_plot_secondary_y_no_right.png + df.plot(secondary_y=["A", "B"], mark_right=False); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _plotting.formatters: + +Custom formatters for timeseries plots +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionchanged:: 1.0.0 + +pandas provides custom formatters for timeseries plots. These change the +formatting of the axis labels for dates and times. By default, +the custom formatters are applied only to plots created by pandas with +:meth:`DataFrame.plot` or :meth:`Series.plot`. To have them apply to all +plots, including those made by matplotlib, set the option +``pd.options.plotting.matplotlib.register_converters = True`` or use +:meth:`pandas.plotting.register_matplotlib_converters`. + +Suppressing tick resolution adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pandas includes automatic tick resolution adjustment for regular frequency +time-series data. For limited cases where pandas cannot infer the frequency +information (e.g., in an externally created ``twinx``), you can choose to +suppress this behavior for alignment purposes. + +Here is the default behavior, notice how the x-axis tick labeling is performed: + +.. ipython:: python + + plt.figure(); + + @savefig ser_plot_suppress.png + df["A"].plot(); + +.. ipython:: python + :suppress: + + plt.close("all") + +Using the ``x_compat`` parameter, you can suppress this behavior: + +.. ipython:: python + + plt.figure(); + + @savefig ser_plot_suppress_parm.png + df["A"].plot(x_compat=True); + +.. ipython:: python + :suppress: + + plt.close("all") + +If you have more than one plot that needs to be suppressed, the ``use`` method +in ``pandas.plotting.plot_params`` can be used in a ``with`` statement: + +.. ipython:: python + + plt.figure(); + + @savefig ser_plot_suppress_context.png + with pd.plotting.plot_params.use("x_compat", True): + df["A"].plot(color="r") + df["B"].plot(color="g") + df["C"].plot(color="b") + +.. ipython:: python + :suppress: + + plt.close("all") + +Automatic date tick adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``TimedeltaIndex`` now uses the native matplotlib +tick locator methods, it is useful to call the automatic +date tick adjustment from matplotlib for figures whose ticklabels overlap. + +See the :meth:`autofmt_xdate ` method and the +`matplotlib documentation `__ for more. + +Subplots +~~~~~~~~ + +Each ``Series`` in a ``DataFrame`` can be plotted on a different axis +with the ``subplots`` keyword: + +.. ipython:: python + + @savefig frame_plot_subplots.png + df.plot(subplots=True, figsize=(6, 6)); + +.. ipython:: python + :suppress: + + plt.close("all") + +Using layout and targeting multiple axes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The layout of subplots can be specified by the ``layout`` keyword. It can accept +``(rows, columns)``. The ``layout`` keyword can be used in +``hist`` and ``boxplot`` also. If the input is invalid, a ``ValueError`` will be raised. + +The number of axes which can be contained by rows x columns specified by ``layout`` must be +larger than the number of required subplots. If layout can contain more axes than required, +blank axes are not drawn. Similar to a NumPy array's ``reshape`` method, you +can use ``-1`` for one dimension to automatically calculate the number of rows +or columns needed, given the other. + +.. ipython:: python + + @savefig frame_plot_subplots_layout.png + df.plot(subplots=True, layout=(2, 3), figsize=(6, 6), sharex=False); + +.. ipython:: python + :suppress: + + plt.close("all") + +The above example is identical to using: + +.. ipython:: python + + df.plot(subplots=True, layout=(2, -1), figsize=(6, 6), sharex=False); + +.. ipython:: python + :suppress: + + plt.close("all") + +The required number of columns (3) is inferred from the number of series to plot +and the given number of rows (2). + +You can pass multiple axes created beforehand as list-like via ``ax`` keyword. +This allows more complicated layouts. +The passed axes must be the same number as the subplots being drawn. + +When multiple axes are passed via the ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords +don't affect to the output. You should explicitly pass ``sharex=False`` and ``sharey=False``, +otherwise you will see a warning. + +.. ipython:: python + + fig, axes = plt.subplots(4, 4, figsize=(9, 9)) + plt.subplots_adjust(wspace=0.5, hspace=0.5) + target1 = [axes[0][0], axes[1][1], axes[2][2], axes[3][3]] + target2 = [axes[3][0], axes[2][1], axes[1][2], axes[0][3]] + + df.plot(subplots=True, ax=target1, legend=False, sharex=False, sharey=False); + @savefig frame_plot_subplots_multi_ax.png + (-df).plot(subplots=True, ax=target2, legend=False, sharex=False, sharey=False); + +.. ipython:: python + :suppress: + + plt.close("all") + +Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a particular axis: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) + ts = ts.cumsum() + + df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list("ABCD")) + df = df.cumsum() + +.. ipython:: python + :suppress: + + plt.close("all") + +.. ipython:: python + + fig, axes = plt.subplots(nrows=2, ncols=2) + plt.subplots_adjust(wspace=0.2, hspace=0.5) + df["A"].plot(ax=axes[0, 0]); + axes[0, 0].set_title("A"); + df["B"].plot(ax=axes[0, 1]); + axes[0, 1].set_title("B"); + df["C"].plot(ax=axes[1, 0]); + axes[1, 0].set_title("C"); + df["D"].plot(ax=axes[1, 1]); + @savefig series_plot_multi.png + axes[1, 1].set_title("D"); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.errorbars: + +Plotting with error bars +~~~~~~~~~~~~~~~~~~~~~~~~ + +Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Series.plot`. + +Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats: + +* As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`. +* As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values. +* As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`. + +Here is an example of one way to easily plot group means with standard deviations from the raw data. + +.. ipython:: python + + # Generate the data + ix3 = pd.MultiIndex.from_arrays( + [ + ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], + ["foo", "foo", "foo", "bar", "bar", "foo", "foo", "bar", "bar", "bar"], + ], + names=["letter", "word"], + ) + + df3 = pd.DataFrame( + { + "data1": [9, 3, 2, 4, 3, 2, 4, 6, 3, 2], + "data2": [9, 6, 5, 7, 5, 4, 5, 6, 5, 1], + }, + index=ix3, + ) + + # Group by index labels and take the means and standard deviations + # for each group + gp3 = df3.groupby(level=("letter", "word")) + means = gp3.mean() + errors = gp3.std() + means + errors + + # Plot + fig, ax = plt.subplots() + @savefig errorbar_example.png + means.plot.bar(yerr=errors, ax=ax, capsize=4, rot=0); + +.. ipython:: python + :suppress: + + plt.close("all") + +Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``N`` length :class:`Series`, a ``2xN`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array. + +Here is an example of one way to plot the min/max range using asymmetrical error bars. + +.. ipython:: python + + mins = gp3.min() + maxs = gp3.max() + + # errors should be positive, and defined in the order of lower, upper + errors = [[means[c] - mins[c], maxs[c] - means[c]] for c in df3.columns] + + # Plot + fig, ax = plt.subplots() + @savefig errorbar_asymmetrical_example.png + means.plot.bar(yerr=errors, ax=ax, capsize=4, rot=0); + +.. ipython:: python + :suppress: + + plt.close("all") + +.. _visualization.table: + +Plotting tables +~~~~~~~~~~~~~~~ + +Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + fig, ax = plt.subplots(1, 1, figsize=(7, 6.5)) + df = pd.DataFrame(np.random.rand(5, 3), columns=["a", "b", "c"]) + ax.xaxis.tick_top() # Display x-axis ticks on top. + + @savefig line_plot_table_true.png + df.plot(table=True, ax=ax); + +.. ipython:: python + :suppress: + + plt.close("all") + +Also, you can pass a different :class:`DataFrame` or :class:`Series` to the +``table`` keyword. The data will be drawn as displayed in print method +(not transposed automatically). If required, it should be transposed manually +as seen in the example below. + +.. ipython:: python + + fig, ax = plt.subplots(1, 1, figsize=(7, 6.75)) + ax.xaxis.tick_top() # Display x-axis ticks on top. + + @savefig line_plot_table_data.png + df.plot(table=np.round(df.T, 2), ax=ax); + +.. ipython:: python + :suppress: + + plt.close("all") + +There also exists a helper function ``pandas.plotting.table``, which creates a +table from :class:`DataFrame` or :class:`Series`, and adds it to an +``matplotlib.Axes`` instance. This function can accept keywords which the +matplotlib `table `__ has. + +.. ipython:: python + + from pandas.plotting import table + + fig, ax = plt.subplots(1, 1) + + table(ax, np.round(df.describe(), 2), loc="upper right", colWidths=[0.2, 0.2, 0.2]); + + @savefig line_plot_table_describe.png + df.plot(ax=ax, ylim=(0, 2), legend=None); + +.. ipython:: python + :suppress: + + plt.close("all") + +**Note**: You can get table instances on the axes using ``axes.tables`` property for further decorations. See the `matplotlib table documentation `__ for more. + +.. _visualization.colormaps: + +Colormaps +~~~~~~~~~ + +A potential issue when plotting a large number of columns is that it can be +difficult to distinguish some series due to repetition in the default colors. To +remedy this, ``DataFrame`` plotting supports the use of the ``colormap`` argument, +which accepts either a Matplotlib `colormap `__ +or a string that is a name of a colormap registered with Matplotlib. A +visualization of the default matplotlib colormaps is available `here +`__. + +As matplotlib does not directly support colormaps for line-based plots, the +colors are selected based on an even spacing determined by the number of columns +in the ``DataFrame``. There is no consideration made for background color, so some +colormaps will produce lines that are not easily visible. + +To use the cubehelix colormap, we can pass ``colormap='cubehelix'``. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index) + df = df.cumsum() + + plt.figure(); + + @savefig cubehelix.png + df.plot(colormap="cubehelix"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Alternatively, we can pass the colormap itself: + +.. ipython:: python + + from matplotlib import cm + + plt.figure(); + + @savefig cubehelix_cm.png + df.plot(colormap=cm.cubehelix); + +.. ipython:: python + :suppress: + + plt.close("all") + +Colormaps can also be used other plot types, like bar charts: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + dd = pd.DataFrame(np.random.randn(10, 10)).applymap(abs) + dd = dd.cumsum() + + plt.figure(); + + @savefig greens.png + dd.plot.bar(colormap="Greens"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Parallel coordinates charts: + +.. ipython:: python + + plt.figure(); + + @savefig parallel_gist_rainbow.png + parallel_coordinates(data, "Name", colormap="gist_rainbow"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Andrews curves charts: + +.. ipython:: python + + plt.figure(); + + @savefig andrews_curve_winter.png + andrews_curves(data, "Name", colormap="winter"); + +.. ipython:: python + :suppress: + + plt.close("all") + +Plotting directly with Matplotlib +--------------------------------- + +In some situations it may still be preferable or necessary to prepare plots +directly with matplotlib, for instance when a certain type of plot or +customization is not (yet) supported by pandas. ``Series`` and ``DataFrame`` +objects behave like arrays and can therefore be passed directly to +matplotlib functions without explicit casts. + +pandas also automatically registers formatters and locators that recognize date +indices, thereby extending date and time support to practically all plot types +available in matplotlib. Although this formatting does not provide the same +level of refinement you would get when plotting via pandas, it can be faster +when plotting a large number of points. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + price = pd.Series( + np.random.randn(150).cumsum(), + index=pd.date_range("2000-1-1", periods=150, freq="B"), + ) + ma = price.rolling(20).mean() + mstd = price.rolling(20).std() + + plt.figure(); + + plt.plot(price.index, price, "k"); + plt.plot(ma.index, ma, "b"); + @savefig bollinger.png + plt.fill_between(mstd.index, ma - 2 * mstd, ma + 2 * mstd, color="b", alpha=0.2); + +.. ipython:: python + :suppress: + + plt.close("all") + +Plotting backends +----------------- + +Starting in version 0.25, pandas can be extended with third-party plotting backends. The +main idea is letting users select a plotting backend different than the provided +one based on Matplotlib. + +This can be done by passing 'backend.module' as the argument ``backend`` in ``plot`` +function. For example: + +.. code-block:: python + + >>> Series([1, 2, 3]).plot(backend="backend.module") + +Alternatively, you can also set this option globally, do you don't need to specify +the keyword in each ``plot`` call. For example: + +.. code-block:: python + + >>> pd.set_option("plotting.backend", "backend.module") + >>> pd.Series([1, 2, 3]).plot() + +Or: + +.. code-block:: python + + >>> pd.options.plotting.backend = "backend.module" + >>> pd.Series([1, 2, 3]).plot() + +This would be more or less equivalent to: + +.. code-block:: python + + >>> import backend.module + >>> backend.module.plot(pd.Series([1, 2, 3])) + +The backend module can then use other visualization tools (Bokeh, Altair, hvplot,...) +to generate the plots. Some libraries implementing a backend for pandas are listed +on the ecosystem :ref:`ecosystem.visualization` page. + +Developers guide can be found at +https://pandas.pydata.org/docs/dev/development/extending.html#plotting-backends diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst new file mode 100644 index 00000000..e08fa81c --- /dev/null +++ b/doc/source/user_guide/window.rst @@ -0,0 +1,649 @@ +.. _window: + +{{ header }} + +******************** +Windowing operations +******************** + +pandas contains a compact set of APIs for performing windowing operations - an operation that performs +an aggregation over a sliding partition of values. The API functions similarly to the ``groupby`` API +in that :class:`Series` and :class:`DataFrame` call the windowing method with +necessary parameters and then subsequently call the aggregation function. + +.. ipython:: python + + s = pd.Series(range(5)) + s.rolling(window=2).sum() + +The windows are comprised by looking back the length of the window from the current observation. +The result above can be derived by taking the sum of the following windowed partitions of data: + +.. ipython:: python + + for window in s.rolling(window=2): + print(window) + + +.. _window.overview: + +Overview +-------- + +pandas supports 4 types of windowing operations: + +#. Rolling window: Generic fixed or variable sliding window over the values. +#. Weighted window: Weighted, non-rectangular window supplied by the ``scipy.signal`` library. +#. Expanding window: Accumulating window over the values. +#. Exponentially Weighted window: Accumulating and exponentially weighted window over the values. + +============================= ================= =========================== =========================== ======================== =================================== =========================== +Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method Supports online operations +============================= ================= =========================== =========================== ======================== =================================== =========================== +Rolling window ``rolling`` ``Rolling`` Yes Yes Yes (as of version 1.3) No +Weighted window ``rolling`` ``Window`` No No No No +Expanding window ``expanding`` ``Expanding`` No Yes Yes (as of version 1.3) No +Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2) No Yes (as of version 1.3) +============================= ================= =========================== =========================== ======================== =================================== =========================== + +As noted above, some operations support specifying a window based on a time offset: + +.. ipython:: python + + s = pd.Series(range(5), index=pd.date_range('2020-01-01', periods=5, freq='1D')) + s.rolling(window='2D').sum() + +Additionally, some methods support chaining a ``groupby`` operation with a windowing operation +which will first group the data by the specified keys and then perform a windowing operation per group. + +.. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a', 'b', 'a'], 'B': range(5)}) + df.groupby('A').expanding().sum() + +.. note:: + + Windowing operations currently only support numeric data (integer and float) + and will always return ``float64`` values. + +.. warning:: + + Some windowing aggregation, ``mean``, ``sum``, ``var`` and ``std`` methods may suffer from numerical + imprecision due to the underlying windowing algorithms accumulating sums. When values differ + with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be + noted, that large values may have an impact on windows, which do not include these values. `Kahan summation + `__ is used + to compute the rolling sums to preserve accuracy as much as possible. + + +.. versionadded:: 1.3.0 + +Some windowing operations also support the ``method='table'`` option in the constructor which +performs the windowing operation over an entire :class:`DataFrame` instead of a single column or row at a time. +This can provide a useful performance benefit for a :class:`DataFrame` with many columns or rows +(with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing +operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified +in the corresponding method call. + +For example, a `weighted mean `__ calculation can +be calculated with :meth:`~Rolling.apply` by specifying a separate column of weights. + +.. ipython:: python + + def weighted_mean(x): + arr = np.ones((1, x.shape[1])) + arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() + return arr + + df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) + df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") # noqa:E501 + +.. versionadded:: 1.3 + +Some windowing operations also support an ``online`` method after constructing a windowing object +which returns a new object that supports passing in new :class:`DataFrame` or :class:`Series` objects +to continue the windowing calculation with the new values (i.e. online calculations). + +The methods on this new windowing objects must call the aggregation method first to "prime" the initial +state of the online calculation. Then, new :class:`DataFrame` or :class:`Series` objects can be passed in +the ``update`` argument to continue the windowing calculation. + +.. ipython:: python + + df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) + df.ewm(0.5).mean() + +.. ipython:: python + + online_ewm = df.head(2).ewm(0.5).online() + online_ewm.mean() + online_ewm.mean(update=df.tail(1)) + +All windowing operations support a ``min_periods`` argument that dictates the minimum amount of +non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``. +``min_periods`` defaults to 1 for time-based windows and ``window`` for fixed windows + +.. ipython:: python + + s = pd.Series([np.nan, 1, 2, np.nan, np.nan, 3]) + s.rolling(window=3, min_periods=1).sum() + s.rolling(window=3, min_periods=2).sum() + # Equivalent to min_periods=3 + s.rolling(window=3, min_periods=None).sum() + + +Additionally, all windowing operations supports the ``aggregate`` method for returning a result +of multiple aggregations applied to a window. + +.. ipython:: python + + df = pd.DataFrame({"A": range(5), "B": range(10, 15)}) + df.expanding().agg([np.sum, np.mean, np.std]) + + +.. _window.generic: + +Rolling window +-------------- + +Generic rolling windows support specifying windows as a fixed number of observations or variable +number of observations based on an offset. If a time based offset is provided, the corresponding +time based index must be monotonic. + +.. ipython:: python + + times = ['2020-01-01', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-29'] + s = pd.Series(range(5), index=pd.DatetimeIndex(times)) + s + # Window with 2 observations + s.rolling(window=2).sum() + # Window with 2 days worth of observations + s.rolling(window='2D').sum() + +For all supported aggregation functions, see :ref:`api.functions_rolling`. + +.. _window.center: + +Centering windows +~~~~~~~~~~~~~~~~~ + +By default the labels are set to the right edge of the window, but a +``center`` keyword is available so the labels can be set at the center. + +.. ipython:: python + + s = pd.Series(range(10)) + s.rolling(window=5).mean() + s.rolling(window=5, center=True).mean() + + +This can also be applied to datetime-like indices. + +.. versionadded:: 1.3.0 + +.. ipython:: python + + df = pd.DataFrame( + {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") + ) + df + df.rolling("2D", center=False).mean() + df.rolling("2D", center=True).mean() + + +.. _window.endpoints: + +Rolling window endpoints +~~~~~~~~~~~~~~~~~~~~~~~~ + +The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed`` +parameter: + +============= ==================== +Value Behavior +============= ==================== +``'right'`` close right endpoint +``'left'`` close left endpoint +``'both'`` close both endpoints +``'neither'`` open endpoints +============= ==================== + +For example, having the right endpoint open is useful in many problems that require that there is no contamination +from present information back to past information. This allows the rolling window to compute statistics +"up to that point in time", but not including that point in time. + +.. ipython:: python + + df = pd.DataFrame( + {"x": 1}, + index=[ + pd.Timestamp("20130101 09:00:01"), + pd.Timestamp("20130101 09:00:02"), + pd.Timestamp("20130101 09:00:03"), + pd.Timestamp("20130101 09:00:04"), + pd.Timestamp("20130101 09:00:06"), + ], + ) + + df["right"] = df.rolling("2s", closed="right").x.sum() # default + df["both"] = df.rolling("2s", closed="both").x.sum() + df["left"] = df.rolling("2s", closed="left").x.sum() + df["neither"] = df.rolling("2s", closed="neither").x.sum() + + df + +.. _window.custom_rolling_window: + +Custom window rolling +~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.0 + +In addition to accepting an integer or offset as a ``window`` argument, ``rolling`` also accepts +a ``BaseIndexer`` subclass that allows a user to define a custom method for calculating window bounds. +The ``BaseIndexer`` subclass will need to define a ``get_window_bounds`` method that returns +a tuple of two arrays, the first being the starting indices of the windows and second being the +ending indices of the windows. Additionally, ``num_values``, ``min_periods``, ``center``, ``closed`` +and will automatically be passed to ``get_window_bounds`` and the defined method must +always accept these arguments. + +For example, if we have the following :class:`DataFrame` + +.. ipython:: python + + use_expanding = [True, False, True, False, True] + use_expanding + df = pd.DataFrame({"values": range(5)}) + df + +and we want to use an expanding window where ``use_expanding`` is ``True`` otherwise a window of size +1, we can create the following ``BaseIndexer`` subclass: + +.. code-block:: ipython + + In [2]: from pandas.api.indexers import BaseIndexer + + In [3]: class CustomIndexer(BaseIndexer): + ...: def get_window_bounds(self, num_values, min_periods, center, closed): + ...: start = np.empty(num_values, dtype=np.int64) + ...: end = np.empty(num_values, dtype=np.int64) + ...: for i in range(num_values): + ...: if self.use_expanding[i]: + ...: start[i] = 0 + ...: end[i] = i + 1 + ...: else: + ...: start[i] = i + ...: end[i] = i + self.window_size + ...: return start, end + + In [4]: indexer = CustomIndexer(window_size=1, use_expanding=use_expanding) + + In [5]: df.rolling(indexer).sum() + Out[5]: + values + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 10.0 + +You can view other examples of ``BaseIndexer`` subclasses `here `__ + +.. versionadded:: 1.1 + +One subclass of note within those examples is the ``VariableOffsetWindowIndexer`` that allows +rolling operations over a non-fixed offset like a ``BusinessDay``. + +.. ipython:: python + + from pandas.api.indexers import VariableOffsetWindowIndexer + + df = pd.DataFrame(range(10), index=pd.date_range("2020", periods=10)) + offset = pd.offsets.BDay(1) + indexer = VariableOffsetWindowIndexer(index=df.index, offset=offset) + df + df.rolling(indexer).sum() + +For some problems knowledge of the future is available for analysis. For example, this occurs when +each data point is a full time series read from an experiment, and the task is to extract underlying +conditions. In these cases it can be useful to perform forward-looking rolling window computations. +:func:`FixedForwardWindowIndexer ` class is available for this purpose. +This :func:`BaseIndexer ` subclass implements a closed fixed-width +forward-looking rolling window, and we can use it as follows: + +.. ipython:: python + + from pandas.api.indexers import FixedForwardWindowIndexer + indexer = FixedForwardWindowIndexer(window_size=2) + df.rolling(indexer, min_periods=1).sum() + +We can also achieve this by using slicing, applying rolling aggregation, and then flipping the result as shown in example below: + +.. ipython:: python + + df = pd.DataFrame( + data=[ + [pd.Timestamp("2018-01-01 00:00:00"), 100], + [pd.Timestamp("2018-01-01 00:00:01"), 101], + [pd.Timestamp("2018-01-01 00:00:03"), 103], + [pd.Timestamp("2018-01-01 00:00:04"), 111], + ], + columns=["time", "value"], + ).set_index("time") + df + + reversed_df = df[::-1].rolling("2s").sum()[::-1] + reversed_df + +.. _window.rolling_apply: + +Rolling apply +~~~~~~~~~~~~~ + +The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs +generic rolling computations. The ``func`` argument should be a single function +that produces a single value from an ndarray input. ``raw`` specifies whether +the windows are cast as :class:`Series` objects (``raw=False``) or ndarray objects (``raw=True``). + +.. ipython:: python + + def mad(x): + return np.fabs(x - x.mean()).mean() + + s = pd.Series(range(10)) + s.rolling(window=4).apply(mad, raw=True) + +.. _window.numba_engine: + +Numba engine +~~~~~~~~~~~~ + +.. versionadded:: 1.0 + +Additionally, :meth:`~Rolling.apply` can leverage `Numba `__ +if installed as an optional dependency. The apply aggregation can be executed using Numba by specifying +``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``). +See :ref:`enhancing performance with Numba ` for general usage of the arguments and performance considerations. + +Numba will be applied in potentially two routines: + +#. If ``func`` is a standard Python function, the engine will `JIT `__ the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again. +#. The engine will JIT the for loop where the apply function is applied to each window. + +The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the +`numba.jit decorator `__. +These keyword arguments will be applied to *both* the passed function (if a standard Python function) +and the apply for loop over each window. + +.. versionadded:: 1.3.0 + +``mean``, ``median``, ``max``, ``min``, and ``sum`` also support the ``engine`` and ``engine_kwargs`` arguments. + +.. _window.cov_corr: + +Binary window functions +~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~Rolling.cov` and :meth:`~Rolling.corr` can compute moving window statistics about +two :class:`Series` or any combination of :class:`DataFrame`/:class:`Series` or +:class:`DataFrame`/:class:`DataFrame`. Here is the behavior in each case: + +* two :class:`Series`: compute the statistic for the pairing. +* :class:`DataFrame`/:class:`Series`: compute the statistics for each column of the DataFrame + with the passed Series, thus returning a DataFrame. +* :class:`DataFrame`/:class:`DataFrame`: by default compute the statistic for matching column + names, returning a DataFrame. If the keyword argument ``pairwise=True`` is + passed then computes the statistic for each pair of columns, returning a :class:`DataFrame` with a + :class:`MultiIndex` whose values are the dates in question (see :ref:`the next section + `). + +For example: + +.. ipython:: python + + df = pd.DataFrame( + np.random.randn(10, 4), + index=pd.date_range("2020-01-01", periods=10), + columns=["A", "B", "C", "D"], + ) + df = df.cumsum() + + df2 = df[:4] + df2.rolling(window=2).corr(df2["B"]) + +.. _window.corr_pairwise: + +Computing rolling pairwise covariances and correlations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In financial data analysis and other fields it's common to compute covariance +and correlation matrices for a collection of time series. Often one is also +interested in moving-window covariance and correlation matrices. This can be +done by passing the ``pairwise`` keyword argument, which in the case of +:class:`DataFrame` inputs will yield a MultiIndexed :class:`DataFrame` whose ``index`` are the dates in +question. In the case of a single DataFrame argument the ``pairwise`` argument +can even be omitted: + +.. note:: + + Missing values are ignored and each entry is computed using the pairwise + complete observations. + + Assuming the missing data are missing at random this results in an estimate + for the covariance matrix which is unbiased. However, for many applications + this estimate may not be acceptable because the estimated covariance matrix + is not guaranteed to be positive semi-definite. This could lead to + estimated correlations having absolute values which are greater than one, + and/or a non-invertible covariance matrix. See `Estimation of covariance + matrices `_ + for more details. + +.. ipython:: python + + covs = ( + df[["B", "C", "D"]] + .rolling(window=4) + .cov(df[["A", "B", "C"]], pairwise=True) + ) + covs + + +.. _window.weighted: + +Weighted window +--------------- + +The ``win_type`` argument in ``.rolling`` generates a weighted windows that are commonly used in filtering +and spectral estimation. ``win_type`` must be string that corresponds to a `scipy.signal window function +`__. +Scipy must be installed in order to use these windows, and supplementary arguments +that the Scipy window methods take must be specified in the aggregation function. + + +.. ipython:: python + + s = pd.Series(range(10)) + s.rolling(window=5).mean() + s.rolling(window=5, win_type="triang").mean() + # Supplementary Scipy arguments passed in the aggregation function + s.rolling(window=5, win_type="gaussian").mean(std=0.1) + +For all supported aggregation functions, see :ref:`api.functions_window`. + +.. _window.expanding: + +Expanding window +---------------- + +An expanding window yields the value of an aggregation statistic with all the data available up to that +point in time. Since these calculations are a special case of rolling statistics, +they are implemented in pandas such that the following two calls are equivalent: + +.. ipython:: python + + df = pd.DataFrame(range(5)) + df.rolling(window=len(df), min_periods=1).mean() + df.expanding(min_periods=1).mean() + +For all supported aggregation functions, see :ref:`api.functions_expanding`. + + +.. _window.exponentially_weighted: + +Exponentially weighted window +----------------------------- + +An exponentially weighted window is similar to an expanding window but with each prior point +being exponentially weighted down relative to the current point. + +In general, a weighted moving average is calculated as + +.. math:: + + y_t = \frac{\sum_{i=0}^t w_i x_{t-i}}{\sum_{i=0}^t w_i}, + +where :math:`x_t` is the input, :math:`y_t` is the result and the :math:`w_i` +are the weights. + +For all supported aggregation functions, see :ref:`api.functions_ewm`. + +The EW functions support two variants of exponential weights. +The default, ``adjust=True``, uses the weights :math:`w_i = (1 - \alpha)^i` +which gives + +.. math:: + + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + + (1 - \alpha)^t x_{0}}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + + (1 - \alpha)^t} + +When ``adjust=False`` is specified, moving averages are calculated as + +.. math:: + + y_0 &= x_0 \\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + +which is equivalent to using weights + +.. math:: + + w_i = \begin{cases} + \alpha (1 - \alpha)^i & \text{if } i < t \\ + (1 - \alpha)^i & \text{if } i = t. + \end{cases} + +.. note:: + + These equations are sometimes written in terms of :math:`\alpha' = 1 - \alpha`, e.g. + + .. math:: + + y_t = \alpha' y_{t-1} + (1 - \alpha') x_t. + +The difference between the above two variants arises because we are +dealing with series which have finite history. Consider a series of infinite +history, with ``adjust=True``: + +.. math:: + + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...} + {1 + (1 - \alpha) + (1 - \alpha)^2 + ...} + +Noting that the denominator is a geometric series with initial term equal to 1 +and a ratio of :math:`1 - \alpha` we have + +.. math:: + + y_t &= \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...} + {\frac{1}{1 - (1 - \alpha)}}\\ + &= [x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...] \alpha \\ + &= \alpha x_t + [(1-\alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...]\alpha \\ + &= \alpha x_t + (1 - \alpha)[x_{t-1} + (1 - \alpha) x_{t-2} + ...]\alpha\\ + &= \alpha x_t + (1 - \alpha) y_{t-1} + +which is the same expression as ``adjust=False`` above and therefore +shows the equivalence of the two variants for infinite series. +When ``adjust=False``, we have :math:`y_0 = x_0` and +:math:`y_t = \alpha x_t + (1 - \alpha) y_{t-1}`. +Therefore, there is an assumption that :math:`x_0` is not an ordinary value +but rather an exponentially weighted moment of the infinite series up to that +point. + +One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass +:math:`\alpha` directly, it's often easier to think about either the +**span**, **center of mass (com)** or **half-life** of an EW moment: + +.. math:: + + \alpha = + \begin{cases} + \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ + \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ + 1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 + \end{cases} + +One must specify precisely one of **span**, **center of mass**, **half-life** +and **alpha** to the EW functions: + +* **Span** corresponds to what is commonly called an "N-day EW moving average". +* **Center of mass** has a more physical interpretation and can be thought of + in terms of span: :math:`c = (s - 1) / 2`. +* **Half-life** is the period of time for the exponential weight to reduce to + one half. +* **Alpha** specifies the smoothing factor directly. + +.. versionadded:: 1.1.0 + +You can also specify ``halflife`` in terms of a timedelta convertible unit to specify the amount of +time it takes for an observation to decay to half its value when also specifying a sequence +of ``times``. + +.. ipython:: python + + df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + times = ["2020-01-01", "2020-01-03", "2020-01-10", "2020-01-15", "2020-01-17"] + df.ewm(halflife="4 days", times=pd.DatetimeIndex(times)).mean() + +The following formula is used to compute exponentially weighted mean with an input vector of times: + +.. math:: + + y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda}}, + + +ExponentialMovingWindow also has an ``ignore_na`` argument, which determines how +intermediate null values affect the calculation of the weights. +When ``ignore_na=False`` (the default), weights are calculated based on absolute +positions, so that intermediate null values affect the result. +When ``ignore_na=True``, +weights are calculated by ignoring intermediate null values. +For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted +average of ``3, NaN, 5`` would be calculated as + +.. math:: + + \frac{(1-\alpha)^2 \cdot 3 + 1 \cdot 5}{(1-\alpha)^2 + 1}. + +Whereas if ``ignore_na=True``, the weighted average would be calculated as + +.. math:: + + \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. + +The :meth:`~Ewm.var`, :meth:`~Ewm.std`, and :meth:`~Ewm.cov` functions have a ``bias`` argument, +specifying whether the result should contain biased or unbiased statistics. +For example, if ``bias=True``, ``ewmvar(x)`` is calculated as +``ewmvar(x) = ewma(x**2) - ewma(x)**2``; +whereas if ``bias=False`` (the default), the biased variance statistics +are scaled by debiasing factors + +.. math:: + + \frac{\left(\sum_{i=0}^t w_i\right)^2}{\left(\sum_{i=0}^t w_i\right)^2 - \sum_{i=0}^t w_i^2}. + +(For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor, +with :math:`N = t + 1`.) +See `Weighted Sample Variance `__ +on Wikipedia for further details. diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst new file mode 100644 index 00000000..e2f3b45d --- /dev/null +++ b/doc/source/whatsnew/index.rst @@ -0,0 +1,291 @@ +.. _release: + +{{ header }} + +************* +Release notes +************* + +This is the list of changes to pandas between each release. For full details, +see the `commit logs `_. For install and +upgrade instructions, see :ref:`install`. + +Version 1.5 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.5.3 + v1.5.2 + v1.5.1 + v1.5.0 + +Version 1.4 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.4.4 + v1.4.3 + v1.4.2 + v1.4.1 + v1.4.0 + +Version 1.3 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.3.5 + v1.3.4 + v1.3.3 + v1.3.2 + v1.3.1 + v1.3.0 + +Version 1.2 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.2.5 + v1.2.4 + v1.2.3 + v1.2.2 + v1.2.1 + v1.2.0 + +Version 1.1 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.1.5 + v1.1.4 + v1.1.3 + v1.1.2 + v1.1.1 + v1.1.0 + +Version 1.0 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.0.5 + v1.0.4 + v1.0.3 + v1.0.2 + v1.0.1 + v1.0.0 + +Version 0.25 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.25.3 + v0.25.2 + v0.25.1 + v0.25.0 + +Version 0.24 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.24.2 + v0.24.1 + v0.24.0 + +Version 0.23 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.23.4 + v0.23.3 + v0.23.2 + v0.23.1 + v0.23.0 + +Version 0.22 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.22.0 + +Version 0.21 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.21.1 + v0.21.0 + +Version 0.20 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.20.3 + v0.20.2 + v0.20.0 + +Version 0.19 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.19.2 + v0.19.1 + v0.19.0 + +Version 0.18 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.18.1 + v0.18.0 + +Version 0.17 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.17.1 + v0.17.0 + +Version 0.16 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.16.2 + v0.16.1 + v0.16.0 + +Version 0.15 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.15.2 + v0.15.1 + v0.15.0 + +Version 0.14 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.14.1 + v0.14.0 + +Version 0.13 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.13.1 + v0.13.0 + +Version 0.12 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.12.0 + +Version 0.11 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.11.0 + +Version 0.10 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.10.1 + v0.10.0 + +Version 0.9 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.9.1 + v0.9.0 + +Version 0.8 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.8.1 + v0.8.0 + +Version 0.7 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.7.3 + v0.7.2 + v0.7.1 + v0.7.0 + +Version 0.6 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.6.1 + v0.6.0 + +Version 0.5 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.5.0 + +Version 0.4 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.4.x diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst new file mode 100644 index 00000000..bd47e6e4 --- /dev/null +++ b/doc/source/whatsnew/v0.10.0.rst @@ -0,0 +1,549 @@ +.. _whatsnew_0100: + +Version 0.10.0 (December 17, 2012) +---------------------------------- + +{{ header }} + + +This is a major release from 0.9.1 and includes many new features and +enhancements along with a large number of bug fixes. There are also a number of +important API changes that long-time pandas users should pay close attention +to. + +File parsing new features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The delimited file parsing engine (the guts of ``read_csv`` and ``read_table``) +has been rewritten from the ground up and now uses a fraction the amount of +memory while parsing, while being 40% or more faster in most use cases (in some +cases much faster). + +There are also many new features: + +- Much-improved Unicode handling via the ``encoding`` option. +- Column filtering (``usecols``) +- Dtype specification (``dtype`` argument) +- Ability to specify strings to be recognized as True/False +- Ability to yield NumPy record arrays (``as_recarray``) +- High performance ``delim_whitespace`` option +- Decimal format (e.g. European format) specification +- Easier CSV dialect options: ``escapechar``, ``lineterminator``, + ``quotechar``, etc. +- More robust handling of many exceptional kinds of files observed in the wild + +API changes +~~~~~~~~~~~ + +**Deprecated DataFrame BINOP TimeSeries special case behavior** + +The default behavior of binary operations between a DataFrame and a Series has +always been to align on the DataFrame's columns and broadcast down the rows, +**except** in the special case that the DataFrame contains time series. Since +there are now method for each binary operator enabling you to specify how you +want to broadcast, we are phasing out this special case (Zen of Python: +*Special cases aren't special enough to break the rules*). Here's what I'm +talking about: + +.. ipython:: python + :okwarning: + + import pandas as pd + + df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range("1/1/2000", periods=6)) + df + # deprecated now + df - df[0] + # Change your code to + df.sub(df[0], axis=0) # align on axis 0 (rows) + +You will get a deprecation warning in the 0.10.x series, and the deprecated +functionality will be removed in 0.11 or later. + +**Altered resample default behavior** + +The default time series ``resample`` binning behavior of daily ``D`` and +*higher* frequencies has been changed to ``closed='left', label='left'``. Lower +nfrequencies are unaffected. The prior defaults were causing a great deal of +confusion for users, especially resampling data to daily frequency (which +labeled the aggregated group with the end of the interval: the next day). + +.. code-block:: ipython + + In [1]: dates = pd.date_range('1/1/2000', '1/5/2000', freq='4h') + + In [2]: series = pd.Series(np.arange(len(dates)), index=dates) + + In [3]: series + Out[3]: + 2000-01-01 00:00:00 0 + 2000-01-01 04:00:00 1 + 2000-01-01 08:00:00 2 + 2000-01-01 12:00:00 3 + 2000-01-01 16:00:00 4 + 2000-01-01 20:00:00 5 + 2000-01-02 00:00:00 6 + 2000-01-02 04:00:00 7 + 2000-01-02 08:00:00 8 + 2000-01-02 12:00:00 9 + 2000-01-02 16:00:00 10 + 2000-01-02 20:00:00 11 + 2000-01-03 00:00:00 12 + 2000-01-03 04:00:00 13 + 2000-01-03 08:00:00 14 + 2000-01-03 12:00:00 15 + 2000-01-03 16:00:00 16 + 2000-01-03 20:00:00 17 + 2000-01-04 00:00:00 18 + 2000-01-04 04:00:00 19 + 2000-01-04 08:00:00 20 + 2000-01-04 12:00:00 21 + 2000-01-04 16:00:00 22 + 2000-01-04 20:00:00 23 + 2000-01-05 00:00:00 24 + Freq: 4H, dtype: int64 + + In [4]: series.resample('D', how='sum') + Out[4]: + 2000-01-01 15 + 2000-01-02 51 + 2000-01-03 87 + 2000-01-04 123 + 2000-01-05 24 + Freq: D, dtype: int64 + + In [5]: # old behavior + In [6]: series.resample('D', how='sum', closed='right', label='right') + Out[6]: + 2000-01-01 0 + 2000-01-02 21 + 2000-01-03 57 + 2000-01-04 93 + 2000-01-05 129 + Freq: D, dtype: int64 + +- Infinity and negative infinity are no longer treated as NA by ``isnull`` and + ``notnull``. That they ever were was a relic of early pandas. This behavior + can be re-enabled globally by the ``mode.use_inf_as_null`` option: + +.. code-block:: ipython + + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') + +- Methods with the ``inplace`` option now all return ``None`` instead of the + calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` + may stop working. To fix, simply delete the unnecessary variable assignment. + +- ``pandas.merge`` no longer sorts the group keys (``sort=False``) by + default. This was done for performance reasons: the group-key sorting is + often one of the more expensive parts of the computation and is often + unnecessary. + +- The default column names for a file with no header have been changed to the + integers ``0`` through ``N - 1``. This is to create consistency with the + DataFrame constructor with no columns specified. The v0.9.0 behavior (names + ``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``: + +.. ipython:: python + :okwarning: + + import io + + data = """ + a,b,c + 1,Yes,2 + 3,No,4 + """ + print(data) + pd.read_csv(io.StringIO(data), header=None) + pd.read_csv(io.StringIO(data), header=None, prefix="X") + +- Values like ``'Yes'`` and ``'No'`` are not interpreted as boolean by default, + though this can be controlled by new ``true_values`` and ``false_values`` + arguments: + +.. code-block:: ipython + + In [4]: print(data) + + a,b,c + 1,Yes,2 + 3,No,4 + + In [5]: pd.read_csv(io.StringIO(data)) + Out[5]: + a b c + 0 1 Yes 2 + 1 3 No 4 + + In [6]: pd.read_csv(io.StringIO(data), true_values=["Yes"], false_values=["No"]) + Out[6]: + a b c + 0 1 True 2 + 1 3 False 4 + +- The file parsers will not recognize non-string values arising from a + converter function as NA if passed in the ``na_values`` argument. It's better + to do post-processing using the ``replace`` function instead. + +- Calling ``fillna`` on Series or DataFrame with no arguments is no longer + valid code. You must either specify a fill value or an interpolation method: + +.. ipython:: python + + s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4]) + s + s.fillna(0) + s.fillna(method="pad") + +Convenience methods ``ffill`` and ``bfill`` have been added: + +.. ipython:: python + + s.ffill() + + +- ``Series.apply`` will now operate on a returned value from the applied + function, that is itself a series, and possibly upcast the result to a + DataFrame + + .. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + +- New API functions for working with pandas options (:issue:`2097`): + + - ``get_option`` / ``set_option`` - get/set the value of an option. Partial + names are accepted. - ``reset_option`` - reset one or more options to + their default value. Partial names are accepted. - ``describe_option`` - + print a description of one or more options. When called with no + arguments. print all registered options. + + Note: ``set_printoptions``/ ``reset_printoptions`` are now deprecated (but + functioning), the print options now live under "display.XYZ". For example: + + .. ipython:: python + + pd.get_option("display.max_rows") + +- to_string() methods now always return unicode strings (:issue:`2224`). + +New features +~~~~~~~~~~~~ + +Wide DataFrame printing +~~~~~~~~~~~~~~~~~~~~~~~ + +Instead of printing the summary information, pandas now splits the string +representation across multiple rows by default: + +.. ipython:: python + + wide_frame = pd.DataFrame(np.random.randn(5, 16)) + + wide_frame + +The old behavior of printing out summary information can be achieved via the +'expand_frame_repr' print option: + +.. ipython:: python + + pd.set_option("expand_frame_repr", False) + + wide_frame + +.. ipython:: python + :suppress: + + pd.reset_option("expand_frame_repr") + +The width of each line can be changed via 'line_width' (80 by default): + +.. code-block:: python + + pd.set_option("line_width", 40) + + wide_frame + + +Updated PyTables support +~~~~~~~~~~~~~~~~~~~~~~~~ + +:ref:`Docs ` for PyTables ``Table`` format & several enhancements to the api. Here is a taste of what to expect. + +.. code-block:: ipython + + In [41]: store = pd.HDFStore('store.h5') + + In [42]: df = pd.DataFrame(np.random.randn(8, 3), + ....: index=pd.date_range('1/1/2000', periods=8), + ....: columns=['A', 'B', 'C']) + + In [43]: df + Out[43]: + A B C + 2000-01-01 -2.036047 0.000830 -0.955697 + 2000-01-02 -0.898872 -0.725411 0.059904 + 2000-01-03 -0.449644 1.082900 -1.221265 + 2000-01-04 0.361078 1.330704 0.855932 + 2000-01-05 -1.216718 1.488887 0.018993 + 2000-01-06 -0.877046 0.045976 0.437274 + 2000-01-07 -0.567182 -0.888657 -0.556383 + 2000-01-08 0.655457 1.117949 -2.782376 + + [8 rows x 3 columns] + + # appending data frames + In [44]: df1 = df[0:4] + + In [45]: df2 = df[4:] + + In [46]: store.append('df', df1) + + In [47]: store.append('df', df2) + + In [48]: store + Out[48]: + + File path: store.h5 + /df frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + + # selecting the entire store + In [49]: store.select('df') + Out[49]: + A B C + 2000-01-01 -2.036047 0.000830 -0.955697 + 2000-01-02 -0.898872 -0.725411 0.059904 + 2000-01-03 -0.449644 1.082900 -1.221265 + 2000-01-04 0.361078 1.330704 0.855932 + 2000-01-05 -1.216718 1.488887 0.018993 + 2000-01-06 -0.877046 0.045976 0.437274 + 2000-01-07 -0.567182 -0.888657 -0.556383 + 2000-01-08 0.655457 1.117949 -2.782376 + + [8 rows x 3 columns] + +.. code-block:: ipython + + In [50]: wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + ....: major_axis=pd.date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + + In [51]: wp + Out[51]: + + Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + + # storing a panel + In [52]: store.append('wp', wp) + + # selecting via A QUERY + In [53]: store.select('wp', [pd.Term('major_axis>20000102'), + ....: pd.Term('minor_axis', '=', ['A', 'B'])]) + ....: + Out[53]: + + Dimensions: 2 (items) x 3 (major_axis) x 2 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to B + + # removing data from tables + In [54]: store.remove('wp', pd.Term('major_axis>20000103')) + Out[54]: 8 + + In [55]: store.select('wp') + Out[55]: + + Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-03 00:00:00 + Minor_axis axis: A to D + + # deleting a store + In [56]: del store['df'] + + In [57]: store + Out[57]: + + File path: store.h5 + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + + +**Enhancements** + +- added ability to hierarchical keys + + .. code-block:: ipython + + In [58]: store.put('foo/bar/bah', df) + + In [59]: store.append('food/orange', df) + + In [60]: store.append('food/apple', df) + + In [61]: store + Out[61]: + + File path: store.h5 + /foo/bar/bah frame (shape->[8,3]) + /food/apple frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + /food/orange frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + + # remove all nodes under this level + In [62]: store.remove('food') + + In [63]: store + Out[63]: + + File path: store.h5 + /foo/bar/bah frame (shape->[8,3]) + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + +- added mixed-dtype support! + + .. code-block:: ipython + + In [64]: df['string'] = 'string' + + In [65]: df['int'] = 1 + + In [66]: store.append('df', df) + + In [67]: df1 = store.select('df') + + In [68]: df1 + Out[68]: + A B C string int + 2000-01-01 -2.036047 0.000830 -0.955697 string 1 + 2000-01-02 -0.898872 -0.725411 0.059904 string 1 + 2000-01-03 -0.449644 1.082900 -1.221265 string 1 + 2000-01-04 0.361078 1.330704 0.855932 string 1 + 2000-01-05 -1.216718 1.488887 0.018993 string 1 + 2000-01-06 -0.877046 0.045976 0.437274 string 1 + 2000-01-07 -0.567182 -0.888657 -0.556383 string 1 + 2000-01-08 0.655457 1.117949 -2.782376 string 1 + + [8 rows x 5 columns] + + In [69]: df1.get_dtype_counts() + Out[69]: + float64 3 + int64 1 + object 1 + dtype: int64 + +- performance improvements on table writing +- support for arbitrarily indexed dimensions +- ``SparseSeries`` now has a ``density`` property (:issue:`2384`) +- enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument + to strip arbitrary characters (:issue:`2411`) +- implement ``value_vars`` in ``melt`` to limit values to certain columns + and add ``melt`` to pandas namespace (:issue:`2412`) + +**Bug Fixes** + +- added ``Term`` method of specifying where conditions (:issue:`1996`). +- ``del store['df']`` now call ``store.remove('df')`` for store deletion +- deleting of consecutive rows is much faster than before +- ``min_itemsize`` parameter can be specified in table creation to force a + minimum size for indexing columns (the previous implementation would set the + column size based on the first append) +- indexing support via ``create_table_index`` (requires PyTables >= 2.3) + (:issue:`698`). +- appending on a store would fail if the table was not first created via ``put`` +- fixed issue with missing attributes after loading a pickled dataframe (GH2431) +- minor change to select and remove: require a table ONLY if where is also + provided (and not None) + +**Compatibility** + +0.10 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas, +however, query terms using the prior (undocumented) methodology are unsupported. You must read in the entire +file and write it out using the new format to take advantage of the updates. + +N dimensional panels (experimental) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Adding experimental support for Panel4D and factory functions to create n-dimensional named panels. +Here is a taste of what to expect. + +.. code-block:: ipython + + In [58]: p4d = Panel4D(np.random.randn(2, 2, 5, 4), + ....: labels=['Label1','Label2'], + ....: items=['Item1', 'Item2'], + ....: major_axis=date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + ....: + + In [59]: p4d + Out[59]: + + Dimensions: 2 (labels) x 2 (items) x 5 (major_axis) x 4 (minor_axis) + Labels axis: Label1 to Label2 + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + + + + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.10.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.10.0 diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst new file mode 100644 index 00000000..611ac202 --- /dev/null +++ b/doc/source/whatsnew/v0.10.1.rst @@ -0,0 +1,268 @@ +.. _whatsnew_0101: + +Version 0.10.1 (January 22, 2013) +--------------------------------- + +{{ header }} + + +This is a minor release from 0.10.0 and includes new features, enhancements, +and bug fixes. In particular, there is substantial new HDFStore functionality +contributed by Jeff Reback. + +An undesired API breakage with functions taking the ``inplace`` option has been +reverted and deprecation warnings added. + +API changes +~~~~~~~~~~~ + +- Functions taking an ``inplace`` option return the calling object as before. A + deprecation message has been added +- Groupby aggregations Max/Min no longer exclude non-numeric data (:issue:`2700`) +- Resampling an empty DataFrame now returns an empty DataFrame instead of + raising an exception (:issue:`2640`) +- The file reader will now raise an exception when NA values are found in an + explicitly specified integer column instead of converting the column to float + (:issue:`2631`) +- DatetimeIndex.unique now returns a DatetimeIndex with the same name and +- timezone instead of an array (:issue:`2563`) + +New features +~~~~~~~~~~~~ + +- MySQL support for database (contribution from Dan Allan) + +HDFStore +~~~~~~~~ + +You may need to upgrade your existing data files. Please visit the +**compatibility** section in the main docs. + + +.. ipython:: python + :suppress: + :okexcept: + + import os + + os.remove("store.h5") + +You can designate (and index) certain columns that you want to be able to +perform queries on a table, by passing a list to ``data_columns`` + +.. ipython:: python + + store = pd.HDFStore("store.h5") + df = pd.DataFrame( + np.random.randn(8, 3), + index=pd.date_range("1/1/2000", periods=8), + columns=["A", "B", "C"], + ) + df["string"] = "foo" + df.loc[df.index[4:6], "string"] = np.nan + df.loc[df.index[7:9], "string"] = "bar" + df["string2"] = "cool" + df + + # on-disk operations + store.append("df", df, data_columns=["B", "C", "string", "string2"]) + store.select("df", "B>0 and string=='foo'") + + # this is in-memory version of this type of selection + df[(df.B > 0) & (df.string == "foo")] + +Retrieving unique values in an indexable or data column. + +.. code-block:: python + + # note that this is deprecated as of 0.14.0 + # can be replicated by: store.select_column('df','index').unique() + store.unique("df", "index") + store.unique("df", "string") + +You can now store ``datetime64`` in data columns + +.. ipython:: python + + df_mixed = df.copy() + df_mixed["datetime64"] = pd.Timestamp("20010102") + df_mixed.loc[df_mixed.index[3:4], ["A", "B"]] = np.nan + + store.append("df_mixed", df_mixed) + df_mixed1 = store.select("df_mixed") + df_mixed1 + df_mixed1.dtypes.value_counts() + +You can pass ``columns`` keyword to select to filter a list of the return +columns, this is equivalent to passing a +``Term('columns',list_of_columns_to_filter)`` + +.. ipython:: python + + store.select("df", columns=["A", "B"]) + +``HDFStore`` now serializes MultiIndex dataframes when appending tables. + +.. code-block:: ipython + + In [19]: index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ....: ['one', 'two', 'three']], + ....: labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + ....: [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + ....: names=['foo', 'bar']) + ....: + + In [20]: df = pd.DataFrame(np.random.randn(10, 3), index=index, + ....: columns=['A', 'B', 'C']) + ....: + + In [21]: df + Out[21]: + A B C + foo bar + foo one -0.116619 0.295575 -1.047704 + two 1.640556 1.905836 2.772115 + three 0.088787 -1.144197 -0.633372 + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + baz two -0.824095 -0.337730 -0.927764 + three -0.840123 0.248505 -0.109250 + qux one 0.431977 -0.460710 0.336505 + two -3.207595 -1.535854 0.409769 + three -0.673145 -0.741113 -0.110891 + + In [22]: store.append('mi', df) + + In [23]: store.select('mi') + Out[23]: + A B C + foo bar + foo one -0.116619 0.295575 -1.047704 + two 1.640556 1.905836 2.772115 + three 0.088787 -1.144197 -0.633372 + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + baz two -0.824095 -0.337730 -0.927764 + three -0.840123 0.248505 -0.109250 + qux one 0.431977 -0.460710 0.336505 + two -3.207595 -1.535854 0.409769 + three -0.673145 -0.741113 -0.110891 + + # the levels are automatically included as data columns + In [24]: store.select('mi', "foo='bar'") + Out[24]: + A B C + foo bar + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + +Multi-table creation via ``append_to_multiple`` and selection via +``select_as_multiple`` can create/select from multiple tables and return a +combined result, by using ``where`` on a selector table. + +.. ipython:: python + + df_mt = pd.DataFrame( + np.random.randn(8, 6), + index=pd.date_range("1/1/2000", periods=8), + columns=["A", "B", "C", "D", "E", "F"], + ) + df_mt["foo"] = "bar" + + # you can also create the tables individually + store.append_to_multiple( + {"df1_mt": ["A", "B"], "df2_mt": None}, df_mt, selector="df1_mt" + ) + store + + # individual tables were created + store.select("df1_mt") + store.select("df2_mt") + + # as a multiple + store.select_as_multiple( + ["df1_mt", "df2_mt"], where=["A>0", "B>0"], selector="df1_mt" + ) + +.. ipython:: python + :suppress: + + store.close() + os.remove("store.h5") + +**Enhancements** + +- ``HDFStore`` now can read native PyTables table format tables + +- You can pass ``nan_rep = 'my_nan_rep'`` to append, to change the default nan + representation on disk (which converts to/from ``np.nan``), this defaults to + ``nan``. + +- You can pass ``index`` to ``append``. This defaults to ``True``. This will + automagically create indices on the *indexables* and *data columns* of the + table + +- You can pass ``chunksize=an integer`` to ``append``, to change the writing + chunksize (default is 50000). This will significantly lower your memory usage + on writing. + +- You can pass ``expectedrows=an integer`` to the first ``append``, to set the + TOTAL number of expected rows that ``PyTables`` will expected. This will + optimize read/write performance. + +- ``Select`` now supports passing ``start`` and ``stop`` to provide selection + space limiting in selection. + +- Greatly improved ISO8601 (e.g., yyyy-mm-dd) date parsing for file parsers (:issue:`2698`) +- Allow ``DataFrame.merge`` to handle combinatorial sizes too large for 64-bit + integer (:issue:`2690`) +- Series now has unary negation (-series) and inversion (~series) operators (:issue:`2686`) +- DataFrame.plot now includes a ``logx`` parameter to change the x-axis to log scale (:issue:`2327`) +- Series arithmetic operators can now handle constant and ndarray input (:issue:`2574`) +- ExcelFile now takes a ``kind`` argument to specify the file type (:issue:`2613`) +- A faster implementation for Series.str methods (:issue:`2602`) + +**Bug Fixes** + +- ``HDFStore`` tables can now store ``float32`` types correctly (cannot be + mixed with ``float64`` however) +- Fixed Google Analytics prefix when specifying request segment (:issue:`2713`). +- Function to reset Google Analytics token store so users can recover from + improperly setup client secrets (:issue:`2687`). +- Fixed groupby bug resulting in segfault when passing in MultiIndex (:issue:`2706`) +- Fixed bug where passing a Series with datetime64 values into ``to_datetime`` + results in bogus output values (:issue:`2699`) +- Fixed bug in ``pattern in HDFStore`` expressions when pattern is not a valid + regex (:issue:`2694`) +- Fixed performance issues while aggregating boolean data (:issue:`2692`) +- When given a boolean mask key and a Series of new values, Series __setitem__ + will now align the incoming values with the original Series (:issue:`2686`) +- Fixed MemoryError caused by performing counting sort on sorting MultiIndex + levels with a very large number of combinatorial values (:issue:`2684`) +- Fixed bug that causes plotting to fail when the index is a DatetimeIndex with + a fixed-offset timezone (:issue:`2683`) +- Corrected business day subtraction logic when the offset is more than 5 bdays + and the starting date is on a weekend (:issue:`2680`) +- Fixed C file parser behavior when the file has more columns than data + (:issue:`2668`) +- Fixed file reader bug that misaligned columns with data in the presence of an + implicit column and a specified ``usecols`` value +- DataFrames with numerical or datetime indices are now sorted prior to + plotting (:issue:`2609`) +- Fixed DataFrame.from_records error when passed columns, index, but empty + records (:issue:`2633`) +- Several bug fixed for Series operations when dtype is datetime64 (:issue:`2689`, + :issue:`2629`, :issue:`2626`) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.10.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.0..v0.10.1 diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst new file mode 100644 index 00000000..0fba784e --- /dev/null +++ b/doc/source/whatsnew/v0.11.0.rst @@ -0,0 +1,464 @@ +.. _whatsnew_0110: + +Version 0.11.0 (April 22, 2013) +------------------------------- + +{{ header }} + + +This is a major release from 0.10.1 and includes many new features and +enhancements along with a large number of bug fixes. The methods of Selecting +Data have had quite a number of additions, and Dtype support is now full-fledged. +There are also a number of important API changes that long-time pandas users should +pay close attention to. + +There is a new section in the documentation, :ref:`10 Minutes to Pandas <10min>`, +primarily geared to new users. + +There is a new section in the documentation, :ref:`Cookbook `, a collection +of useful recipes in pandas (and that we want contributions!). + +There are several libraries that are now :ref:`Recommended Dependencies ` + +Selection choices +~~~~~~~~~~~~~~~~~ + +Starting in 0.11.0, object selection has had a number of user-requested additions in +order to support more explicit location based indexing. pandas now supports +three types of multi-axis indexing. + +- ``.loc`` is strictly label based, will raise ``KeyError`` when the items are not found, allowed inputs are: + + - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index) + - A list or array of labels ``['a', 'b', 'c']`` + - A slice object with labels ``'a':'f'``, (note that contrary to usual python slices, **both** the start and the stop are included!) + - A boolean array + + See more at :ref:`Selection by Label ` + +- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of the axis), will raise ``IndexError`` when the requested indices are out of bounds. Allowed inputs are: + + - An integer e.g. ``5`` + - A list or array of integers ``[4, 3, 0]`` + - A slice object with ints ``1:7`` + - A boolean array + + See more at :ref:`Selection by Position ` + +- ``.ix`` supports mixed integer and label based access. It is primarily label based, but will fallback to integer positional access. ``.ix`` is the most general and will support + any of the inputs to ``.loc`` and ``.iloc``, as well as support for floating point label schemes. ``.ix`` is especially useful when dealing with mixed positional and label + based hierarchical indexes. + + As using integer slices with ``.ix`` have different behavior depending on whether the slice + is interpreted as position based or label based, it's usually better to be + explicit and use ``.iloc`` or ``.loc``. + + See more at :ref:`Advanced Indexing ` and :ref:`Advanced Hierarchical `. + + +Selection deprecations +~~~~~~~~~~~~~~~~~~~~~~ + +Starting in version 0.11.0, these methods *may* be deprecated in future versions. + +- ``irow`` +- ``icol`` +- ``iget_value`` + +See the section :ref:`Selection by Position ` for substitutes. + +Dtypes +~~~~~~ + +Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32') + df1 + df1.dtypes + df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'), + 'B': pd.Series(np.random.randn(8)), + 'C': pd.Series(range(8), dtype='uint8')}) + df2 + df2.dtypes + + # here you get some upcasting + df3 = df1.reindex_like(df2).fillna(value=0.0) + df2 + df3 + df3.dtypes + +Dtype conversion +~~~~~~~~~~~~~~~~ + +This is lower-common-denominator upcasting, meaning you get the dtype which can accommodate all of the types + +.. ipython:: python + + df3.values.dtype + +Conversion + +.. ipython:: python + + df3.astype('float32').dtypes + +Mixed conversion + +.. code-block:: ipython + + In [12]: df3['D'] = '1.' + + In [13]: df3['E'] = '1' + + In [14]: df3.convert_objects(convert_numeric=True).dtypes + Out[14]: + A float32 + B float64 + C float64 + D float64 + E int64 + dtype: object + + # same, but specific dtype conversion + In [15]: df3['D'] = df3['D'].astype('float16') + + In [16]: df3['E'] = df3['E'].astype('int32') + + In [17]: df3.dtypes + Out[17]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + +Forcing date coercion (and setting ``NaT`` when not datelike) + +.. code-block:: ipython + + In [18]: import datetime + + In [19]: s = pd.Series([datetime.datetime(2001, 1, 1, 0, 0), 'foo', 1.0, 1, + ....: pd.Timestamp('20010104'), '20010105'], dtype='O') + ....: + + In [20]: s.convert_objects(convert_dates='coerce') + Out[20]: + 0 2001-01-01 + 1 NaT + 2 NaT + 3 NaT + 4 2001-01-04 + 5 2001-01-05 + dtype: datetime64[ns] + +Dtype gotchas +~~~~~~~~~~~~~ + +**Platform gotchas** + +Starting in 0.11.0, construction of DataFrame/Series will use default dtypes of ``int64`` and ``float64``, +*regardless of platform*. This is not an apparent change from earlier versions of pandas. If you specify +dtypes, they *WILL* be respected, however (:issue:`2837`) + +The following will all result in ``int64`` dtypes + +.. code-block:: ipython + + In [21]: pd.DataFrame([1, 2], columns=['a']).dtypes + Out[21]: + a int64 + dtype: object + + In [22]: pd.DataFrame({'a': [1, 2]}).dtypes + Out[22]: + a int64 + dtype: object + + In [23]: pd.DataFrame({'a': 1}, index=range(2)).dtypes + Out[23]: + a int64 + dtype: object + +Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms! + + +**Upcasting gotchas** + +Performing indexing operations on integer type data can easily upcast the data. +The dtype of the input data will be preserved in cases where ``nans`` are not introduced. + +.. code-block:: ipython + + In [24]: dfi = df3.astype('int32') + + In [25]: dfi['D'] = dfi['D'].astype('int64') + + In [26]: dfi + Out[26]: + A B C D E + 0 0 0 0 1 1 + 1 -2 0 1 1 1 + 2 -2 0 2 1 1 + 3 0 -1 3 1 1 + 4 1 0 4 1 1 + 5 0 0 5 1 1 + 6 0 -1 6 1 1 + 7 0 0 7 1 1 + + In [27]: dfi.dtypes + Out[27]: + A int32 + B int32 + C int32 + D int64 + E int32 + dtype: object + + In [28]: casted = dfi[dfi > 0] + + In [29]: casted + Out[29]: + A B C D E + 0 NaN NaN NaN 1 1 + 1 NaN NaN 1.0 1 1 + 2 NaN NaN 2.0 1 1 + 3 NaN NaN 3.0 1 1 + 4 1.0 NaN 4.0 1 1 + 5 NaN NaN 5.0 1 1 + 6 NaN NaN 6.0 1 1 + 7 NaN NaN 7.0 1 1 + + In [30]: casted.dtypes + Out[30]: + A float64 + B float64 + C float64 + D int64 + E int32 + dtype: object + +While float dtypes are unchanged. + +.. code-block:: ipython + + In [31]: df4 = df3.copy() + + In [32]: df4['A'] = df4['A'].astype('float32') + + In [33]: df4.dtypes + Out[33]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + + In [34]: casted = df4[df4 > 0] + + In [35]: casted + Out[35]: + A B C D E + 0 NaN NaN NaN 1.0 1 + 1 NaN 0.567020 1.0 1.0 1 + 2 NaN 0.276232 2.0 1.0 1 + 3 NaN NaN 3.0 1.0 1 + 4 1.933792 NaN 4.0 1.0 1 + 5 NaN 0.113648 5.0 1.0 1 + 6 NaN NaN 6.0 1.0 1 + 7 NaN 0.524988 7.0 1.0 1 + + In [36]: casted.dtypes + Out[36]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + +Datetimes conversion +~~~~~~~~~~~~~~~~~~~~ + +Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, +in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way. +Furthermore ``datetime64[ns]`` columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*) +(:issue:`2809`, :issue:`2810`) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 2), pd.date_range('20010102', periods=6), + columns=['A', ' B']) + df['timestamp'] = pd.Timestamp('20010103') + df + + # datetime64[ns] out of the box + df.dtypes.value_counts() + + # use the traditional nan, which is mapped to NaT internally + df.loc[df.index[2:4], ['A', 'timestamp']] = np.nan + df + +Astype conversion on ``datetime64[ns]`` to ``object``, implicitly converts ``NaT`` to ``np.nan`` + +.. ipython:: python + + import datetime + s = pd.Series([datetime.datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s.dtype + s[1] = np.nan + s + s.dtype + s = s.astype('O') + s + s.dtype + + +API changes +~~~~~~~~~~~ + + - Added to_series() method to indices, to facilitate the creation of indexers + (:issue:`3275`) + + - ``HDFStore`` + + - added the method ``select_column`` to select a single column from a table as a Series. + - deprecated the ``unique`` method, can be replicated by ``select_column(key,column).unique()`` + - ``min_itemsize`` parameter to ``append`` will now automatically create data_columns for passed keys + +Enhancements +~~~~~~~~~~~~ + + - Improved performance of df.to_csv() by up to 10x in some cases. (:issue:`3059`) + + - Numexpr is now a :ref:`Recommended Dependencies `, to accelerate certain + types of numerical and boolean operations + + - Bottleneck is now a :ref:`Recommended Dependencies `, to accelerate certain + types of ``nan`` operations + + - ``HDFStore`` + + - support ``read_hdf/to_hdf`` API similar to ``read_csv/to_csv`` + + .. ipython:: python + + df = pd.DataFrame({'A': range(5), 'B': range(5)}) + df.to_hdf('store.h5', 'table', append=True) + pd.read_hdf('store.h5', 'table', where=['index > 2']) + + .. ipython:: python + :suppress: + :okexcept: + + import os + + os.remove('store.h5') + + - provide dotted attribute access to ``get`` from stores, e.g. ``store.df == store['df']`` + + - new keywords ``iterator=boolean``, and ``chunksize=number_in_a_chunk`` are + provided to support iteration on ``select`` and ``select_as_multiple`` (:issue:`3076`) + + - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (:issue:`2437`) + + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (:issue:`3070`) + + .. ipython:: python + :okwarning: + + idx = pd.date_range("2001-10-1", periods=5, freq='M') + ts = pd.Series(np.random.rand(len(idx)), index=idx) + ts['2001'] + + df = pd.DataFrame({'A': ts}) + df['2001'] + + - ``Squeeze`` to possibly remove length 1 dimensions from an object. + + .. code-block:: python + + >>> p = pd.Panel(np.random.randn(3, 4, 4), items=['ItemA', 'ItemB', 'ItemC'], + ... major_axis=pd.date_range('20010102', periods=4), + ... minor_axis=['A', 'B', 'C', 'D']) + >>> p + + Dimensions: 3 (items) x 4 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2001-01-02 00:00:00 to 2001-01-05 00:00:00 + Minor_axis axis: A to D + + >>> p.reindex(items=['ItemA']).squeeze() + A B C D + 2001-01-02 0.926089 -2.026458 0.501277 -0.204683 + 2001-01-03 -0.076524 1.081161 1.141361 0.479243 + 2001-01-04 0.641817 -0.185352 1.824568 0.809152 + 2001-01-05 0.575237 0.669934 1.398014 -0.399338 + + >>> p.reindex(items=['ItemA'], minor=['B']).squeeze() + 2001-01-02 -2.026458 + 2001-01-03 1.081161 + 2001-01-04 -0.185352 + 2001-01-05 0.669934 + Freq: D, Name: B, dtype: float64 + + - In ``pd.io.data.Options``, + + + Fix bug when trying to fetch data for the current month when already + past expiry. + + Now using lxml to scrape html instead of BeautifulSoup (lxml was faster). + + New instance variables for calls and puts are automatically created + when a method that creates them is called. This works for current month + where the instance variables are simply ``calls`` and ``puts``. Also + works for future expiry months and save the instance variable as + ``callsMMYY`` or ``putsMMYY``, where ``MMYY`` are, respectively, the + month and year of the option's expiry. + + ``Options.get_near_stock_price`` now allows the user to specify the + month for which to get relevant options data. + + ``Options.get_forward_data`` now has optional kwargs ``near`` and + ``above_below``. This allows the user to specify if they would like to + only return forward looking data for options near the current stock + price. This just obtains the data from Options.get_near_stock_price + instead of Options.get_xxx_data() (:issue:`2758`). + + - Cursor coordinate information is now displayed in time-series plots. + + - added option ``display.max_seq_items`` to control the number of + elements printed per sequence pprinting it. (:issue:`2979`) + + - added option ``display.chop_threshold`` to control display of small numerical + values. (:issue:`2739`) + + - added option ``display.max_info_rows`` to prevent verbose_info from being + calculated for frames above 1M rows (configurable). (:issue:`2807`, :issue:`2918`) + + - value_counts() now accepts a "normalize" argument, for normalized + histograms. (:issue:`2710`). + + - DataFrame.from_records now accepts not only dicts but any instance of + the collections.Mapping ABC. + + - added option ``display.mpl_style`` providing a sleeker visual style + for plots. Based on https://gist.github.com/huyng/816622 (:issue:`3075`). + + - Treat boolean values as integers (values 1 and 0) for numeric + operations. (:issue:`2641`) + + - to_html() now accepts an optional "escape" argument to control reserved + HTML character escaping (enabled by default) and escapes ``&``, in addition + to ``<`` and ``>``. (:issue:`2919`) + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.11.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.1..v0.11.0 diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst new file mode 100644 index 00000000..c12adb2f --- /dev/null +++ b/doc/source/whatsnew/v0.12.0.rst @@ -0,0 +1,535 @@ +.. _whatsnew_0120: + +Version 0.12.0 (July 24, 2013) +------------------------------ + +{{ header }} + + +This is a major release from 0.11.0 and includes several new features and +enhancements along with a large number of bug fixes. + +Highlights include a consistent I/O API naming scheme, routines to read html, +write MultiIndexes to csv files, read & write STATA data files, read & write JSON format +files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a +revamped ``replace`` routine that accepts regular expressions. + +API changes +~~~~~~~~~~~ + + - The I/O API is now much more consistent with a set of top level ``reader`` functions + accessed like ``pd.read_csv()`` that generally return a ``pandas`` object. + + * ``read_csv`` + * ``read_excel`` + * ``read_hdf`` + * ``read_sql`` + * ``read_json`` + * ``read_html`` + * ``read_stata`` + * ``read_clipboard`` + + The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` + + * ``to_csv`` + * ``to_excel`` + * ``to_hdf`` + * ``to_sql`` + * ``to_json`` + * ``to_html`` + * ``to_stata`` + * ``to_clipboard`` + + + - Fix modulo and integer division on Series,DataFrames to act similarly to ``float`` dtypes to return + ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`). This correct a numpy bug that treats ``integer`` + and ``float`` dtypes differently. + + .. ipython:: python + + p = pd.DataFrame({"first": [4, 5, 8], "second": [0, 0, 3]}) + p % 0 + p % p + p / p + p / 0 + + - Add ``squeeze`` keyword to ``groupby`` to allow reduction from + DataFrame -> Series if groups are unique. This is a Regression from 0.10.1. + We are reverting back to the prior behavior. This means groupby will return the + same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) + with (:issue:`3596`). + + .. code-block:: ipython + + In [2]: df2 = pd.DataFrame([{"val1": 1, "val2": 20}, + ...: {"val1": 1, "val2": 19}, + ...: {"val1": 1, "val2": 27}, + ...: {"val1": 1, "val2": 12}]) + + In [3]: def func(dataf): + ...: return dataf["val2"] - dataf["val2"].mean() + ...: + + In [4]: # squeezing the result frame to a series (because we have unique groups) + ...: df2.groupby("val1", squeeze=True).apply(func) + Out[4]: + 0 0.5 + 1 -0.5 + 2 7.5 + 3 -7.5 + Name: 1, dtype: float64 + + In [5]: # no squeezing (the default, and behavior in 0.10.1) + ...: df2.groupby("val1").apply(func) + Out[5]: + val2 0 1 2 3 + val1 + 1 0.5 -0.5 7.5 -7.5 + + - Raise on ``iloc`` when boolean indexing with a label based indexer mask + e.g. a boolean Series, even with integer labels, will raise. Since ``iloc`` + is purely positional based, the labels on the Series are not alignable (:issue:`3631`) + + This case is rarely used, and there are plenty of alternatives. This preserves the + ``iloc`` API to be *purely* positional based. + + .. ipython:: python + + df = pd.DataFrame(range(5), index=list("ABCDE"), columns=["a"]) + mask = df.a % 2 == 0 + mask + + # this is what you should use + df.loc[mask] + + # this will work as well + df.iloc[mask.values] + + ``df.iloc[mask]`` will raise a ``ValueError`` + + - The ``raise_on_error`` argument to plotting functions is removed. Instead, + plotting functions raise a ``TypeError`` when the ``dtype`` of the object + is ``object`` to remind you to avoid ``object`` arrays whenever possible + and thus you should cast to an appropriate numeric dtype if you need to + plot something. + + - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a + matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such + an object (ie, 'jet'). The colormap is sampled to select the color for each + column. Please see :ref:`visualization.colormaps` for more information. + (:issue:`3860`) + + - ``DataFrame.interpolate()`` is now deprecated. Please use + ``DataFrame.fillna()`` and ``DataFrame.replace()`` instead. (:issue:`3582`, + :issue:`3675`, :issue:`3676`) + + - the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are + deprecated + + - ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now + performs conversion by default. (:issue:`3907`) + + - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column + to be inserted if ``True``, default is ``False`` (same as prior to 0.12) (:issue:`3679`) + - Implement ``__nonzero__`` for ``NDFrame`` objects (:issue:`3691`, :issue:`3696`) + + - IO api + + - added top-level function ``read_excel`` to replace the following, + The original API is deprecated and will be removed in a future version + + .. code-block:: python + + from pandas.io.parsers import ExcelFile + + xls = ExcelFile("path_to_file.xls") + xls.parse("Sheet1", index_col=None, na_values=["NA"]) + + With + + .. code-block:: python + + import pandas as pd + + pd.read_excel("path_to_file.xls", "Sheet1", index_col=None, na_values=["NA"]) + + - added top-level function ``read_sql`` that is equivalent to the following + + .. code-block:: python + + from pandas.io.sql import read_frame + + read_frame(...) + + - ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for + their first argument (:issue:`3702`) + + - Do not allow astypes on ``datetime64[ns]`` except to ``object``, and + ``timedelta64[ns]`` to ``object/int`` (:issue:`3425`) + + - The behavior of ``datetime64`` dtypes has changed with respect to certain + so-called reduction operations (:issue:`3726`). The following operations now + raise a ``TypeError`` when performed on a ``Series`` and return an *empty* + ``Series`` when performed on a ``DataFrame`` similar to performing these + operations on, for example, a ``DataFrame`` of ``slice`` objects: + + - sum, prod, mean, std, var, skew, kurt, corr, and cov + + - ``read_html`` now defaults to ``None`` when reading, and falls back on + ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try + until success is also valid + + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the base class for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) + +IO enhancements +~~~~~~~~~~~~~~~ + + - ``pd.read_html()`` can now parse HTML strings, files or urls and return + DataFrames, courtesy of @cpcloud. (:issue:`3477`, :issue:`3605`, :issue:`3606`, :issue:`3616`). + It works with a *single* parser backend: BeautifulSoup4 + html5lib :ref:`See the docs` + + You can use ``pd.read_html()`` to read the output from ``DataFrame.to_html()`` like so + + .. ipython:: python + :okwarning: + + df = pd.DataFrame({"a": range(3), "b": list("abc")}) + print(df) + html = df.to_html() + alist = pd.read_html(html, index_col=0) + print(df == alist[0]) + + Note that ``alist`` here is a Python ``list`` so ``pd.read_html()`` and + ``DataFrame.to_html()`` are not inverses. + + - ``pd.read_html()`` no longer performs hard conversion of date strings + (:issue:`3656`). + + .. warning:: + + You may have to install an older version of BeautifulSoup4, + :ref:`See the installation docs` + + - Added module for reading and writing Stata files: ``pandas.io.stata`` (:issue:`1512`) + accessible via ``read_stata`` top-level function for reading, + and ``to_stata`` DataFrame method for writing, :ref:`See the docs` + + - Added module for reading and writing json format files: ``pandas.io.json`` + accessible via ``read_json`` top-level function for reading, + and ``to_json`` DataFrame method for writing, :ref:`See the docs` + various issues (:issue:`1226`, :issue:`3804`, :issue:`3876`, :issue:`3867`, :issue:`1305`) + + - ``MultiIndex`` column support for reading and writing csv format files + + - The ``header`` option in ``read_csv`` now accepts a + list of the rows from which to read the index. + + - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and + ``read_csv``, to provide compatibility for the pre 0.12 behavior of + writing and reading ``MultIndex`` columns via a list of tuples. The default in + 0.12 is to write lists of tuples and *not* interpret list of tuples as a + ``MultiIndex`` column. + + Note: The default behavior in 0.12 remains unchanged from prior versions, but starting with 0.13, + the default *to* write and read ``MultiIndex`` columns will be in the new + format. (:issue:`3571`, :issue:`1651`, :issue:`3141`) + + - If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it + with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will + be *lost*. + + .. ipython:: python + + from pandas._testing import makeCustomDataframe as mkdf + + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv("mi.csv") + print(open("mi.csv").read()) + pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1]) + + .. ipython:: python + :suppress: + + import os + + os.remove("mi.csv") + + - Support for ``HDFStore`` (via ``PyTables 3.0.0``) on Python3 + + - Iterator support via ``read_hdf`` that automatically opens and closes the + store when iteration is finished. This is only for *tables* + + .. code-block:: ipython + + In [25]: path = 'store_iterator.h5' + + In [26]: pd.DataFrame(np.random.randn(10, 2)).to_hdf(path, 'df', table=True) + + In [27]: for df in pd.read_hdf(path, 'df', chunksize=3): + ....: print(df) + ....: + 0 1 + 0 0.713216 -0.778461 + 1 -0.661062 0.862877 + 2 0.344342 0.149565 + 0 1 + 3 -0.626968 -0.875772 + 4 -0.930687 -0.218983 + 5 0.949965 -0.442354 + 0 1 + 6 -0.402985 1.111358 + 7 -0.241527 -0.670477 + 8 0.049355 0.632633 + 0 1 + 9 -1.502767 -1.225492 + + + + - ``read_csv`` will now throw a more informative error message when a file + contains no columns, e.g., all newline characters + +Other enhancements +~~~~~~~~~~~~~~~~~~ + + - ``DataFrame.replace()`` now allows regular expressions on contained + ``Series`` with object dtype. See the examples section in the regular docs + :ref:`Replacing via String Expression ` + + For example you can do + + .. ipython:: python + + df = pd.DataFrame({"a": list("ab.."), "b": [1, 2, 3, 4]}) + df.replace(regex=r"\s*\.\s*", value=np.nan) + + to replace all occurrences of the string ``'.'`` with zero or more + instances of surrounding white space with ``NaN``. + + Regular string replacement still works as expected. For example, you can do + + .. ipython:: python + + df.replace(".", np.nan) + + to replace all occurrences of the string ``'.'`` with ``NaN``. + + - ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name`` + to specify custom column names of the returned DataFrame. + + - ``pd.set_option()`` now allows N option, value pairs (:issue:`3667`). + + Let's say that we had an option ``'a.b'`` and another option ``'b.c'``. + We can set them at the same time: + + .. code-block:: ipython + + In [31]: pd.get_option('a.b') + Out[31]: 2 + + In [32]: pd.get_option('b.c') + Out[32]: 3 + + In [33]: pd.set_option('a.b', 1, 'b.c', 4) + + In [34]: pd.get_option('a.b') + Out[34]: 1 + + In [35]: pd.get_option('b.c') + Out[35]: 4 + + - The ``filter`` method for group objects returns a subset of the original + object. Suppose we want to take only elements that belong to groups with a + group sum greater than 2. + + .. ipython:: python + + sf = pd.Series([1, 1, 2, 3, 3, 3]) + sf.groupby(sf).filter(lambda x: x.sum() > 2) + + The argument of ``filter`` must a function that, applied to the group as a + whole, returns ``True`` or ``False``. + + Another useful operation is filtering out elements that belong to groups + with only a couple members. + + .. ipython:: python + + dff = pd.DataFrame({"A": np.arange(8), "B": list("aabbbbcc")}) + dff.groupby("B").filter(lambda x: len(x) > 2) + + Alternatively, instead of dropping the offending groups, we can return a + like-indexed objects where the groups that do not pass the filter are + filled with NaNs. + + .. ipython:: python + + dff.groupby("B").filter(lambda x: len(x) > 2, dropna=False) + + - Series and DataFrame hist methods now take a ``figsize`` argument (:issue:`3834`) + + - DatetimeIndexes no longer try to convert mixed-integer indexes during join + operations (:issue:`3877`) + + - Timestamp.min and Timestamp.max now represent valid Timestamp instances instead + of the default datetime.min and datetime.max (respectively), thanks @SleepingPills + + - ``read_html`` now raises when no tables are found and BeautifulSoup==4.2.0 + is detected (:issue:`4214`) + + +Experimental features +~~~~~~~~~~~~~~~~~~~~~ + + - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` + with custom holiday calendars and custom weekmasks. (:issue:`2301`) + + .. note:: + + This uses the ``numpy.busdaycalendar`` API introduced in Numpy 1.7 and + therefore requires Numpy 1.7.0 or newer. + + .. ipython:: python + + from pandas.tseries.offsets import CustomBusinessDay + from datetime import datetime + + # As an interesting example, let's look at Egypt where + # a Friday-Saturday weekend is observed. + weekmask_egypt = "Sun Mon Tue Wed Thu" + # They also observe International Workers' Day so let's + # add that for a couple of years + holidays = ["2012-05-01", datetime(2013, 5, 1), np.datetime64("2014-05-01")] + bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt) + dt = datetime(2013, 4, 30) + print(dt + 2 * bday_egypt) + dts = pd.date_range(dt, periods=5, freq=bday_egypt) + print(pd.Series(dts.weekday, dts).map(pd.Series("Mon Tue Wed Thu Fri Sat Sun".split()))) + +Bug fixes +~~~~~~~~~ + + - Plotting functions now raise a ``TypeError`` before trying to plot anything + if the associated objects have a dtype of ``object`` (:issue:`1818`, + :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + eliminates any spurious plots from showing up. + + - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is + a list or tuple. + + - ``Series.str`` now supports iteration (:issue:`3638`). You can iterate over the + individual elements of each string in the ``Series``. Each iteration yields + a ``Series`` with either a single character at each index of the original + ``Series`` or ``NaN``. For example, + + .. ipython:: python + :okwarning: + + strs = "go", "bow", "joe", "slow" + ds = pd.Series(strs) + + for s in ds.str: + print(s) + + s + s.dropna().values.item() == "w" + + The last element yielded by the iterator will be a ``Series`` containing + the last element of the longest string in the ``Series`` with all other + elements being ``NaN``. Here since ``'slow'`` is the longest string + and there are no other strings with the same length ``'w'`` is the only + non-null string in the yielded ``Series``. + + - ``HDFStore`` + + - will retain index attributes (freq,tz,name) on recreation (:issue:`3499`) + - will warn with a ``AttributeConflictWarning`` if you are attempting to append + an index with a different frequency than the existing, or attempting + to append an index with a different name than the existing + - support datelike columns with a timezone as data_columns (:issue:`2852`) + + - Non-unique index support clarified (:issue:`3468`). + + - Fix assigning a new index to a duplicate index in a DataFrame would fail (:issue:`3468`) + - Fix construction of a DataFrame with a duplicate index + - ref_locs support to allow duplicative indices across dtypes, + allows iget support to always find the index (even across dtypes) (:issue:`2194`) + - applymap on a DataFrame with a non-unique index now works + (removed warning) (:issue:`2786`), and fix (:issue:`3230`) + - Fix to_csv to handle non-unique columns (:issue:`3495`) + - Duplicate indexes with getitem will return items in the correct order (:issue:`3455`, :issue:`3457`) + and handle missing elements like unique indices (:issue:`3561`) + - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (:issue:`3562`) + - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (:issue:`3602`) + - Allow insert/delete to non-unique columns (:issue:`3679`) + - Non-unique indexing with a slice via ``loc`` and friends fixed (:issue:`3659`) + - Allow insert/delete to non-unique columns (:issue:`3679`) + - Extend ``reindex`` to correctly deal with non-unique indices (:issue:`3679`) + - ``DataFrame.itertuples()`` now works with frames with duplicate column + names (:issue:`3873`) + - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to + ``reindex`` for location-based taking + - Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem__`` (:issue:`4246`) + - Fixed non-unique indexing memory allocation issue with ``.ix/.loc`` (:issue:`4280`) + + - ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`) + - ``read_html`` now correctly skips tests (:issue:`3741`) + - Fixed a bug where ``DataFrame.replace`` with a compiled regular expression + in the ``to_replace`` argument wasn't working (:issue:`3907`) + - Improved ``network`` test decorator to catch ``IOError`` (and therefore + ``URLError`` as well). Added ``with_connectivity_check`` decorator to allow + explicitly checking a website as a proxy for seeing if there is network + connectivity. Plus, new ``optional_args`` decorator factory for decorators. + (:issue:`3910`, :issue:`3914`) + - Fixed testing issue where too many sockets where open thus leading to a + connection reset issue (:issue:`3982`, :issue:`3985`, :issue:`4028`, + :issue:`4054`) + - Fixed failing tests in test_yahoo, test_google where symbols were not + retrieved but were being accessed (:issue:`3982`, :issue:`3985`, + :issue:`4028`, :issue:`4054`) + - ``Series.hist`` will now take the figure from the current environment if + one is not passed + - Fixed bug where a 1xN DataFrame would barf on a 1xN mask (:issue:`4071`) + - Fixed running of ``tox`` under python3 where the pickle import was getting + rewritten in an incompatible way (:issue:`4062`, :issue:`4063`) + - Fixed bug where sharex and sharey were not being passed to grouped_hist + (:issue:`4089`) + - Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being + iterated over when regex=False (:issue:`4115`) + - Fixed bug in the parsing of microseconds when using the ``format`` + argument in ``to_datetime`` (:issue:`4152`) + - Fixed bug in ``PandasAutoDateLocator`` where ``invert_xaxis`` triggered + incorrectly ``MilliSecondLocator`` (:issue:`3990`) + - Fixed bug in plotting that wasn't raising on invalid colormap for + matplotlib 1.1.1 (:issue:`4215`) + - Fixed the legend displaying in ``DataFrame.plot(kind='kde')`` (:issue:`4216`) + - Fixed bug where Index slices weren't carrying the name attribute + (:issue:`4226`) + - Fixed bug in initializing ``DatetimeIndex`` with an array of strings + in a certain time zone (:issue:`4229`) + - Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`) + - Fixed bug where get_data_famafrench wasn't using the correct file edges + (:issue:`4281`) + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.12.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.11.0..v0.12.0 diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst new file mode 100644 index 00000000..44223bc6 --- /dev/null +++ b/doc/source/whatsnew/v0.13.0.rst @@ -0,0 +1,1335 @@ +.. _whatsnew_0130: + +Version 0.13.0 (January 3, 2014) +-------------------------------- + +{{ header }} + + + +This is a major release from 0.12.0 and includes a number of API changes, several new features and +enhancements along with a large number of bug fixes. + +Highlights include: + +- support for a new index type ``Float64Index``, and other Indexing enhancements +- ``HDFStore`` has a new string based syntax for query specification +- support for new methods of interpolation +- updated ``timedelta`` operations +- a new string manipulation method ``extract`` +- Nanosecond support for Offsets +- ``isin`` for DataFrames + +Several experimental features are added, including: + +- new ``eval/query`` methods for expression evaluation +- support for ``msgpack`` serialization +- an i/o interface to Google's ``BigQuery`` + +Their are several new or updated docs sections including: + +- :ref:`Comparison with SQL`, which should be useful for those familiar with SQL but still learning pandas. +- :ref:`Comparison with R`, idiom translations from R to pandas. +- :ref:`Enhancing Performance`, ways to enhance pandas performance with ``eval/query``. + +.. warning:: + + In 0.13.0 ``Series`` has internally been refactored to no longer sub-class ``ndarray`` + but instead subclass ``NDFrame``, similar to the rest of the pandas containers. This should be + a transparent change with only very limited API implications. See :ref:`Internal Refactoring` + +API changes +~~~~~~~~~~~ + +- ``read_excel`` now supports an integer in its ``sheetname`` argument giving + the index of the sheet to read in (:issue:`4301`). +- Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", + "iNf", etc.) as infinity. (:issue:`4220`, :issue:`4219`), affecting + ``read_table``, ``read_csv``, etc. +- ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) +- ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) +- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`) +- Changes to how ``Index`` and ``MultiIndex`` handle metadata (``levels``, + ``labels``, and ``names``) (:issue:`4039`): + + .. code-block:: python + + # previously, you would have set levels or labels directly + >>> pd.index.levels = [[1, 2, 3, 4], [1, 2, 4, 4]] + + # now, you use the set_levels or set_labels methods + >>> index = pd.index.set_levels([[1, 2, 3, 4], [1, 2, 4, 4]]) + + # similarly, for names, you can rename the object + # but setting names is not deprecated + >>> index = pd.index.set_names(["bob", "cranberry"]) + + # and all methods take an inplace kwarg - but return None + >>> pd.index.set_names(["bob", "cranberry"], inplace=True) + +- **All** division with ``NDFrame`` objects is now *truedivision*, regardless + of the future import. This means that operating on pandas objects will by default + use *floating point* division, and return a floating point dtype. + You can use ``//`` and ``floordiv`` to do integer division. + + Integer division + + .. code-block:: ipython + + In [3]: arr = np.array([1, 2, 3, 4]) + + In [4]: arr2 = np.array([5, 3, 2, 1]) + + In [5]: arr / arr2 + Out[5]: array([0, 0, 1, 4]) + + In [6]: pd.Series(arr) // pd.Series(arr2) + Out[6]: + 0 0 + 1 0 + 2 1 + 3 4 + dtype: int64 + + True Division + + .. code-block:: ipython + + In [7]: pd.Series(arr) / pd.Series(arr2) # no future import required + Out[7]: + 0 0.200000 + 1 0.666667 + 2 1.500000 + 3 4.000000 + dtype: float64 + +- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) +- ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) + behavior. See :ref:`gotchas` for a more detailed discussion. + + This prevents doing boolean comparison on *entire* pandas objects, which is inherently ambiguous. These all will raise a ``ValueError``. + + .. code-block:: python + + >>> df = pd.DataFrame({'A': np.random.randn(10), + ... 'B': np.random.randn(10), + ... 'C': pd.date_range('20130101', periods=10) + ... }) + ... + >>> if df: + ... pass + ... + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + >>> df1 = df + >>> df2 = df + >>> df1 and df2 + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + >>> d = [1, 2, 3] + >>> s1 = pd.Series(d) + >>> s2 = pd.Series(d) + >>> s1 and s2 + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + Added the ``.bool()`` method to ``NDFrame`` objects to facilitate evaluating of single-element boolean Series: + + .. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +- All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not + support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) +- ``Series`` and ``DataFrame`` now have a ``mode()`` method to calculate the + statistical mode(s) by axis/Series. (:issue:`5367`) + +- Chained assignment will now by default warn if the user is assigning to a copy. This can be changed + with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. See :ref:`the docs`. + + .. ipython:: python + + dfc = pd.DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + pd.set_option('chained_assignment', 'warn') + + The following warning / exception will show if this is attempted. + + .. ipython:: python + :okwarning: + + dfc.loc[0]['A'] = 1111 + + :: + + Traceback (most recent call last) + ... + SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + + Here is the correct method of assignment. + + .. ipython:: python + + dfc.loc[0, 'A'] = 11 + dfc + +- ``Panel.reindex`` has the following call signature ``Panel.reindex(items=None, major_axis=None, minor_axis=None, **kwargs)`` + to conform with other ``NDFrame`` objects. See :ref:`Internal Refactoring` for more information. + +- ``Series.argmin`` and ``Series.argmax`` are now aliased to ``Series.idxmin`` and ``Series.idxmax``. These return the *index* of the + min or max element respectively. Prior to 0.13.0 these would return the position of the min / max element. (:issue:`6214`) + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These were announced changes in 0.12 or prior that are taking effect as of 0.13.0 + +- Remove deprecated ``Factor`` (:issue:`3650`) +- Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:`3046`) +- Remove deprecated ``_verbose_info`` (:issue:`3215`) +- Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) + These are available as functions in the main pandas namespace (e.g. ``pd.read_clipboard``) +- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) +- default for ``display.max_seq_len`` is now 100 rather than ``None``. This activates + truncated display ("...") of long sequences in various places. (:issue:`3391`) + +Deprecations +~~~~~~~~~~~~ + +Deprecated in 0.13.0 + +- deprecated ``iterkv``, which will be removed in a future release (this was + an alias of iteritems used to bypass ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) +- deprecated the string method ``match``, whose role is now performed more + idiomatically by ``extract``. In a future release, the default behavior + of ``match`` will change to become analogous to ``contains``, which returns + a boolean indexer. (Their + distinction is strictness: ``match`` relies on ``re.match`` while + ``contains`` relies on ``re.search``.) In this release, the deprecated + behavior is the default, but the new behavior is available through the + keyword argument ``as_indexer=True``. + +Indexing API changes +~~~~~~~~~~~~~~~~~~~~ + +Prior to 0.13, it was impossible to use a label indexer (``.loc/.ix``) to set a value that +was not contained in the index of a particular axis. (:issue:`2578`). See :ref:`the docs` + +In the ``Series`` case this is effectively an appending operation + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + s[5] = 5. + s + +.. ipython:: python + + dfi = pd.DataFrame(np.arange(6).reshape(3, 2), + columns=['A', 'B']) + dfi + +This would previously ``KeyError`` + +.. ipython:: python + + dfi.loc[:, 'C'] = dfi.loc[:, 'A'] + dfi + +This is like an ``append`` operation. + +.. ipython:: python + + dfi.loc[3] = 5 + dfi + +A Panel setting operation on an arbitrary axis aligns the input to the Panel + +.. code-block:: ipython + + In [20]: p = pd.Panel(np.arange(16).reshape(2, 4, 2), + ....: items=['Item1', 'Item2'], + ....: major_axis=pd.date_range('2001/1/12', periods=4), + ....: minor_axis=['A', 'B'], dtype='float64') + ....: + + In [21]: p + Out[21]: + + Dimensions: 2 (items) x 4 (major_axis) x 2 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2001-01-12 00:00:00 to 2001-01-15 00:00:00 + Minor_axis axis: A to B + + In [22]: p.loc[:, :, 'C'] = pd.Series([30, 32], index=p.items) + + In [23]: p + Out[23]: + + Dimensions: 2 (items) x 4 (major_axis) x 3 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2001-01-12 00:00:00 to 2001-01-15 00:00:00 + Minor_axis axis: A to C + + In [24]: p.loc[:, :, 'C'] + Out[24]: + Item1 Item2 + 2001-01-12 30.0 32.0 + 2001-01-13 30.0 32.0 + 2001-01-14 30.0 32.0 + 2001-01-15 30.0 32.0 + +Float64Index API change +~~~~~~~~~~~~~~~~~~~~~~~ + +- Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation. + This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the + same. See :ref:`the docs`, (:issue:`263`) + + Construction is by default for floating type values. + + .. ipython:: python + + index = pd.Index([1.5, 2, 3, 4.5, 5]) + index + s = pd.Series(range(5), index=index) + s + + Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``) + + .. ipython:: python + + s[3] + s.loc[3] + + The only positional indexing is via ``iloc`` + + .. ipython:: python + + s.iloc[3] + + A scalar index that is not found will raise ``KeyError`` + + Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc`` + + .. ipython:: python + + s[2:4] + s.loc[2:4] + s.iloc[2:4] + + In float indexes, slicing using floats are allowed + + .. ipython:: python + + s[2.1:4.6] + s.loc[2.1:4.6] + +- Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing + on indexes on non ``Float64Index`` will now raise a ``TypeError``. + + .. code-block:: ipython + + In [1]: pd.Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: pd.Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + + Using a scalar float indexer will be deprecated in a future version, but is allowed for now. + + .. code-block:: ipython + + In [3]: pd.Series(range(5))[3.0] + Out[3]: 3 + +HDFStore API changes +~~~~~~~~~~~~~~~~~~~~ + +- Query Format Changes. A much more string-like query format is now supported. See :ref:`the docs`. + + .. ipython:: python + + path = 'test.h5' + dfq = pd.DataFrame(np.random.randn(10, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=10)) + dfq.to_hdf(path, 'dfq', format='table', data_columns=True) + + Use boolean expressions, with in-line function evaluation. + + .. ipython:: python + + pd.read_hdf(path, 'dfq', + where="index>Timestamp('20130104') & columns=['A', 'B']") + + Use an inline column reference + + .. ipython:: python + + pd.read_hdf(path, 'dfq', + where="A>0 or C>0") + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)`` or ``table(t)`` + the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies ``fixed`` format and ``append`` implies + ``table`` format. This default format can be set as an option by setting ``io.hdf.default_format``. + + .. ipython:: python + + path = 'test.h5' + df = pd.DataFrame(np.random.randn(10, 2)) + df.to_hdf(path, 'df_table', format='table') + df.to_hdf(path, 'df_table2', append=True) + df.to_hdf(path, 'df_fixed') + with pd.HDFStore(path) as store: + print(store) + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- Significant table writing performance improvements +- handle a passed ``Series`` in table format (:issue:`4330`) +- can now serialize a ``timedelta64[ns]`` dtype in a table (:issue:`3577`), See :ref:`the docs`. +- added an ``is_open`` property to indicate if the underlying file handle is_open; + a closed store will now report 'CLOSED' when viewing the store (rather than raising an error) + (:issue:`4409`) +- a close of a ``HDFStore`` now will close that instance of the ``HDFStore`` + but will only close the actual file if the ref count (by ``PyTables``) w.r.t. all of the open handles + are 0. Essentially you have a local instance of ``HDFStore`` referenced by a variable. Once you + close it, it will report closed. Other references (to the same file) will continue to operate + until they themselves are closed. Performing an action on a closed file will raise + ``ClosedFileError`` + + .. ipython:: python + + path = 'test.h5' + df = pd.DataFrame(np.random.randn(10, 2)) + store1 = pd.HDFStore(path) + store2 = pd.HDFStore(path) + store1.append('df', df) + store2.append('df2', df) + + store1 + store2 + store1.close() + store2 + store2.close() + store2 + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- removed the ``_quiet`` attribute, replace by a ``DuplicateWarning`` if retrieving + duplicate rows from a table (:issue:`4367`) +- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will + be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`) +- allow a passed locations array or mask as a ``where`` condition (:issue:`4467`). + See :ref:`the docs` for an example. +- add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written + to the store (default is ``True``, ALL nan rows are NOT written), also settable + via the option ``io.hdf.dropna_table`` (:issue:`4625`) +- pass through store creation arguments; can be used to support in-memory stores + +DataFrame repr changes +~~~~~~~~~~~~~~~~~~~~~~ + +The HTML and plain text representations of :class:`DataFrame` now show +a truncated view of the table once it exceeds a certain size, rather +than switching to the short info view (:issue:`4886`, :issue:`5550`). +This makes the representation more consistent as small DataFrames get +larger. + +.. image:: ../_static/df_repr_truncated.png + :alt: Truncated HTML representation of a DataFrame + +To get the info view, call :meth:`DataFrame.info`. If you prefer the +info view as the repr for large DataFrames, you can set this by running +``set_option('display.large_repr', 'info')``. + +Enhancements +~~~~~~~~~~~~ + +- ``df.to_clipboard()`` learned a new ``excel`` keyword that let's you + paste df data directly into excel (enabled by default). (:issue:`5070`). +- ``read_html`` now raises a ``URLError`` instead of catching and raising a + ``ValueError`` (:issue:`4303`, :issue:`4305`) +- Added a test for ``read_clipboard()`` and ``to_clipboard()`` (:issue:`4282`) +- Clipboard functionality now works with PySide (:issue:`4282`) +- Added a more informative error message when plot arguments contain + overlapping color and style arguments (:issue:`4402`) +- ``to_dict`` now takes ``records`` as a possible out type. Returns an array + of column-keyed dictionaries. (:issue:`4936`) + +- ``NaN`` handing in get_dummies (:issue:`4446`) with ``dummy_na`` + + .. ipython:: python + + # previously, nan was erroneously counted as 2 here + # now it is not counted at all + pd.get_dummies([1, 2, np.nan]) + + # unless requested + pd.get_dummies([1, 2, np.nan], dummy_na=True) + + +- ``timedelta64[ns]`` operations. See :ref:`the docs`. + + .. warning:: + + Most of these operations require ``numpy >= 1.7`` + + Using the new top-level ``to_timedelta``, you can convert a scalar or array from the standard + timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``). + + .. ipython:: python + + pd.to_timedelta('1 days 06:05:01.00003') + pd.to_timedelta('15.5us') + pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + pd.to_timedelta(np.arange(5), unit='s') + pd.to_timedelta(np.arange(5), unit='d') + + A Series of dtype ``timedelta64[ns]`` can now be divided by another + ``timedelta64[ns]`` object, or astyped to yield a ``float64`` dtyped Series. This + is frequency conversion. See :ref:`the docs` for the docs. + + .. ipython:: python + + import datetime + td = pd.Series(pd.date_range('20130101', periods=4)) - pd.Series( + pd.date_range('20121201', periods=4)) + td[2] += np.timedelta64(datetime.timedelta(minutes=5, seconds=3)) + td[3] = np.nan + td + + # to days + td / np.timedelta64(1, 'D') + td.astype('timedelta64[D]') + + # to seconds + td / np.timedelta64(1, 's') + td.astype('timedelta64[s]') + + Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series + + .. ipython:: python + + td * -1 + td * pd.Series([1, 2, 3, 4]) + + Absolute ``DateOffset`` objects can act equivalently to ``timedeltas`` + + .. ipython:: python + + from pandas import offsets + td + offsets.Minute(5) + offsets.Milli(5) + + Fillna is now supported for timedeltas + + .. ipython:: python + + td.fillna(pd.Timedelta(0)) + td.fillna(datetime.timedelta(days=1, seconds=5)) + + You can do numeric reduction operations on timedeltas. + + .. ipython:: python + + td.mean() + td.quantile(.1) + +- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and + ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set + the bandwidth, and to gkde.evaluate() to specify the indices at which it + is evaluated, respectively. See scipy docs. (:issue:`4298`) + +- DataFrame constructor now accepts a numpy masked record array (:issue:`3478`) + +- The new vectorized string method ``extract`` return regular expression + matches more conveniently. + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract('[ab](\\d)') + + Elements that do not match return ``NaN``. Extracting a regular expression + with more than one group returns a DataFrame with one column per group. + + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract('([ab])(\\d)') + + Elements that do not match return a row of ``NaN``. + Thus, a Series of messy strings can be *converted* into a + like-indexed Series or DataFrame of cleaned-up or more useful strings, + without necessitating ``get()`` to access tuples or ``re.match`` objects. + + Named groups like + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract( + '(?P[ab])(?P\\d)') + + and optional groups can also be used. + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', '3']).str.extract( + '(?P[ab])?(?P\\d)') + +- ``read_stata`` now accepts Stata 13 format (:issue:`4291`) + +- ``read_fwf`` now infers the column specifications from the first 100 rows of + the file if the data has correctly separated and properly aligned columns + using the delimiter provided to the function (:issue:`4488`). + +- support for nanosecond times as an offset + + .. warning:: + + These operations require ``numpy >= 1.7`` + + Period conversions in the range of seconds and below were reworked and extended + up to nanoseconds. Periods in the nanosecond range are now available. + + .. ipython:: python + + pd.date_range('2013-01-01', periods=5, freq='5N') + + or with frequency as offset + + .. ipython:: python + + pd.date_range('2013-01-01', periods=5, freq=pd.offsets.Nano(5)) + + Timestamps can be modified in the nanosecond range + + .. ipython:: python + + t = pd.Timestamp('20130101 09:01:02') + t + pd.tseries.offsets.Nano(123) + +- A new method, ``isin`` for DataFrames, which plays nicely with boolean indexing. The argument to ``isin``, what we're comparing the DataFrame to, can be a DataFrame, Series, dict, or array of values. See :ref:`the docs` for more. + + To get the rows where any of the conditions are met: + + .. ipython:: python + + dfi = pd.DataFrame({'A': [1, 2, 3, 4], 'B': ['a', 'b', 'f', 'n']}) + dfi + other = pd.DataFrame({'A': [1, 3, 3, 7], 'B': ['e', 'f', 'f', 'e']}) + mask = dfi.isin(other) + mask + dfi[mask.any(axis=1)] + +- ``Series`` now supports a ``to_frame`` method to convert it to a single-column DataFrame (:issue:`5164`) + +- All R datasets listed here http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html can now be loaded into pandas objects + + .. code-block:: python + + # note that pandas.rpy was deprecated in v0.16.0 + import pandas.rpy.common as com + com.load_data('Titanic') + +- ``tz_localize`` can infer a fall daylight savings transition based on the structure + of the unlocalized data (:issue:`4230`), see :ref:`the docs` + +- ``DatetimeIndex`` is now in the API documentation, see :ref:`the docs` + +- :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table + from semi-structured JSON data. See :ref:`the docs` (:issue:`1067`) + +- Added PySide support for the qtpandas DataFrameModel and DataFrameWidget. + +- Python csv parser now supports usecols (:issue:`4335`) + +- Frequencies gained several new offsets: + + * ``LastWeekOfMonth`` (:issue:`4637`) + * ``FY5253``, and ``FY5253Quarter`` (:issue:`4511`) + + +- DataFrame has a new ``interpolate`` method, similar to Series (:issue:`4434`, :issue:`1892`) + + .. ipython:: python + + df = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8], + 'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]}) + df.interpolate() + + Additionally, the ``method`` argument to ``interpolate`` has been expanded + to include ``'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'barycentric', 'krogh', 'piecewise_polynomial', 'pchip', 'polynomial', 'spline'`` + The new methods require scipy_. Consult the Scipy reference guide_ and documentation_ for more information + about when the various methods are appropriate. See :ref:`the docs`. + + Interpolate now also accepts a ``limit`` keyword argument. + This works similar to ``fillna``'s limit: + + .. ipython:: python + + ser = pd.Series([1, 3, np.nan, np.nan, np.nan, 11]) + ser.interpolate(limit=2) + +- Added ``wide_to_long`` panel data convenience function. See :ref:`the docs`. + + .. ipython:: python + + np.random.seed(123) + df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + "X" : dict(zip(range(3), np.random.randn(3))) + }) + df["id"] = df.index + df + pd.wide_to_long(df, ["A", "B"], i="id", j="year") + +.. _scipy: http://www.scipy.org +.. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation +.. _guide: https://docs.scipy.org/doc/scipy/tutorial/interpolate.html + +- ``to_csv`` now takes a ``date_format`` keyword argument that specifies how + output datetime objects should be formatted. Datetimes encountered in the + index, columns, and values will all have this formatting applied. (:issue:`4313`) +- ``DataFrame.plot`` will scatter plot x versus y by passing ``kind='scatter'`` (:issue:`2215`) +- Added support for Google Analytics v3 API segment IDs that also supports v2 IDs. (:issue:`5271`) + +.. _whatsnew_0130.experimental: + +Experimental +~~~~~~~~~~~~ + +- The new :func:`~pandas.eval` function implements expression evaluation using + ``numexpr`` behind the scenes. This results in large speedups for + complicated expressions involving large DataFrames/Series. For example, + + .. ipython:: python + + nrows, ncols = 20000, 100 + df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) + for _ in range(4)] + + .. ipython:: python + + # eval with NumExpr backend + %timeit pd.eval('df1 + df2 + df3 + df4') + + .. ipython:: python + + # pure Python evaluation + %timeit df1 + df2 + df3 + df4 + + For more details, see the :ref:`the docs` + +- Similar to ``pandas.eval``, :class:`~pandas.DataFrame` has a new + ``DataFrame.eval`` method that evaluates an expression in the context of + the ``DataFrame``. For example, + + .. ipython:: python + :suppress: + + try: + del a # noqa: F821 + except NameError: + pass + + try: + del b # noqa: F821 + except NameError: + pass + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2), columns=['a', 'b']) + df.eval('a + b') + +- :meth:`~pandas.DataFrame.query` method has been added that allows + you to select elements of a ``DataFrame`` using a natural query syntax + nearly identical to Python syntax. For example, + + .. ipython:: python + :suppress: + + try: + del a # noqa: F821 + except NameError: + pass + + try: + del b # noqa: F821 + except NameError: + pass + + try: + del c # noqa: F821 + except NameError: + pass + + .. ipython:: python + + n = 20 + df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=['a', 'b', 'c']) + df.query('a < b < c') + + selects all the rows of ``df`` where ``a < b < c`` evaluates to ``True``. + For more details see the :ref:`the docs`. + +- ``pd.read_msgpack()`` and ``pd.to_msgpack()`` are now a supported method of serialization + of arbitrary pandas (and python objects) in a lightweight portable binary format. See :ref:`the docs` + + .. warning:: + + Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. + + .. code-block:: python + + df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) + df.to_msgpack('foo.msg') + pd.read_msgpack('foo.msg') + + s = pd.Series(np.random.rand(5), index=pd.date_range('20130101', periods=5)) + pd.to_msgpack('foo.msg', df, s) + pd.read_msgpack('foo.msg') + + You can pass ``iterator=True`` to iterator over the unpacked results + + .. code-block:: python + + for o in pd.read_msgpack('foo.msg', iterator=True): + print(o) + + .. ipython:: python + :suppress: + :okexcept: + + os.remove('foo.msg') + +- ``pandas.io.gbq`` provides a simple way to extract from, and load data into, + Google's BigQuery Data Sets by way of pandas DataFrames. BigQuery is a high + performance SQL-like database service, useful for performing ad-hoc queries + against extremely large datasets. :ref:`See the docs ` + + .. code-block:: python + + from pandas.io import gbq + + # A query to select the average monthly temperatures in the + # in the year 2000 across the USA. The dataset, + # publicata:samples.gsod, is available on all BigQuery accounts, + # and is based on NOAA gsod data. + + query = """SELECT station_number as STATION, + month as MONTH, AVG(mean_temp) as MEAN_TEMP + FROM publicdata:samples.gsod + WHERE YEAR = 2000 + GROUP BY STATION, MONTH + ORDER BY STATION, MONTH ASC""" + + # Fetch the result set for this query + + # Your Google BigQuery Project ID + # To find this, see your dashboard: + # https://console.developers.google.com/iam-admin/projects?authuser=0 + projectid = 'xxxxxxxxx' + df = gbq.read_gbq(query, project_id=projectid) + + # Use pandas to process and reshape the dataset + + df2 = df.pivot(index='STATION', columns='MONTH', values='MEAN_TEMP') + df3 = pd.concat([df2.min(), df2.mean(), df2.max()], + axis=1, keys=["Min Tem", "Mean Temp", "Max Temp"]) + + The resulting DataFrame is:: + + > df3 + Min Tem Mean Temp Max Temp + MONTH + 1 -53.336667 39.827892 89.770968 + 2 -49.837500 43.685219 93.437932 + 3 -77.926087 48.708355 96.099998 + 4 -82.892858 55.070087 97.317240 + 5 -92.378261 61.428117 102.042856 + 6 -77.703334 65.858888 102.900000 + 7 -87.821428 68.169663 106.510714 + 8 -89.431999 68.614215 105.500000 + 9 -86.611112 63.436935 107.142856 + 10 -78.209677 56.880838 92.103333 + 11 -50.125000 48.861228 94.996428 + 12 -50.332258 42.286879 94.396774 + + .. warning:: + + To use this module, you will need a BigQuery account. See + for details. + + As of 10/10/13, there is a bug in Google's API preventing result sets + from being larger than 100,000 rows. A patch is scheduled for the week of + 10/14/13. + +.. _whatsnew_0130.refactoring: + +Internal refactoring +~~~~~~~~~~~~~~~~~~~~ + +In 0.13.0 there is a major refactor primarily to subclass ``Series`` from +``NDFrame``, which is the base class currently for ``DataFrame`` and ``Panel``, +to unify methods and behaviors. Series formerly subclassed directly from +``ndarray``. (:issue:`4080`, :issue:`3862`, :issue:`816`) + +.. warning:: + + There are two potential incompatibilities from < 0.13.0 + + - Using certain numpy functions would previously return a ``Series`` if passed a ``Series`` + as an argument. This seems only to affect ``np.ones_like``, ``np.empty_like``, + ``np.diff`` and ``np.where``. These now return ``ndarrays``. + + .. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + + Numpy Usage + + .. ipython:: python + + np.ones_like(s) + np.diff(s) + np.where(s > 1, s, np.nan) + + Pandonic Usage + + .. ipython:: python + + pd.Series(1, index=s.index) + s.diff() + s.where(s > 1) + + - Passing a ``Series`` directly to a cython function expecting an ``ndarray`` type will no + long work directly, you must pass ``Series.values``, See :ref:`Enhancing Performance` + + - ``Series(0.5)`` would previously return the scalar ``0.5``, instead this will return a 1-element ``Series`` + + - This change breaks ``rpy2<=2.3.8``. an Issue has been opened against rpy2 and a workaround + is detailed in :issue:`5698`. Thanks @JanSchulz. + +- Pickle compatibility is preserved for pickles created prior to 0.13. These must be unpickled with ``pd.read_pickle``, see :ref:`Pickling`. + +- Refactor of series.py/frame.py/panel.py to move common code to generic.py + + - added ``_setup_axes`` to created generic NDFrame structures + - moved methods + + - ``from_axes,_wrap_array,axes,ix,loc,iloc,shape,empty,swapaxes,transpose,pop`` + - ``__iter__,keys,__contains__,__len__,__neg__,__invert__`` + - ``convert_objects,as_blocks,as_matrix,values`` + - ``__getstate__,__setstate__`` (compat remains in frame/panel) + - ``__getattr__,__setattr__`` + - ``_indexed_same,reindex_like,align,where,mask`` + - ``fillna,replace`` (``Series`` replace is now consistent with ``DataFrame``) + - ``filter`` (also added axis argument to selectively filter on a different axis) + - ``reindex,reindex_axis,take`` + - ``truncate`` (moved to become part of ``NDFrame``) + +- These are API changes which make ``Panel`` more consistent with ``DataFrame`` + + - ``swapaxes`` on a ``Panel`` with the same axes specified now return a copy + - support attribute access for setting + - filter supports the same API as the original ``DataFrame`` filter + +- Reindex called with no arguments will now return a copy of the input object + +- ``TimeSeries`` is now an alias for ``Series``. the property ``is_time_series`` + can be used to distinguish (if desired) + +- Refactor of Sparse objects to use BlockManager + + - Created a new block type in internals, ``SparseBlock``, which can hold multi-dtypes + and is non-consolidatable. ``SparseSeries`` and ``SparseDataFrame`` now inherit + more methods from there hierarchy (Series/DataFrame), and no longer inherit + from ``SparseArray`` (which instead is the object of the ``SparseBlock``) + - Sparse suite now supports integration with non-sparse data. Non-float sparse + data is supportable (partially implemented) + - Operations on sparse structures within DataFrames should preserve sparseness, + merging type operations will convert to dense (and back to sparse), so might + be somewhat inefficient + - enable setitem on ``SparseSeries`` for boolean/integer/slices + - ``SparsePanels`` implementation is unchanged (e.g. not using BlockManager, needs work) + +- added ``ftypes`` method to Series/DataFrame, similar to ``dtypes``, but indicates + if the underlying is sparse/dense (as well as the dtype) +- All ``NDFrame`` objects can now use ``__finalize__()`` to specify various + values to propagate to new objects from an existing one (e.g. ``name`` in ``Series`` will + follow more automatically now) +- Internal type checking is now done via a suite of generated classes, allowing ``isinstance(value, klass)`` + without having to directly import the klass, courtesy of @jtratner +- Bug in Series update where the parent frame is not updating its cache based on + changes (:issue:`4080`) or types (:issue:`3217`), fillna (:issue:`3386`) +- Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`) +- Refactor ``Series.reindex`` to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing + on a Series to work +- ``Series.copy`` no longer accepts the ``order`` parameter and is now consistent with ``NDFrame`` copy +- Refactor ``rename`` methods to core/generic.py; fixes ``Series.rename`` for (:issue:`4605`), and adds ``rename`` + with the same signature for ``Panel`` +- Refactor ``clip`` methods to core/generic.py (:issue:`4798`) +- Refactor of ``_get_numeric_data/_get_bool_data`` to core/generic.py, allowing Series/Panel functionality +- ``Series`` (for index) / ``Panel`` (for items) now allow attribute access to its elements (:issue:`1903`) + + .. ipython:: python + + s = pd.Series([1, 2, 3], index=list('abc')) + s.b + s.a = 5 + s + +.. _release.bug_fixes-0.13.0: + +Bug fixes +~~~~~~~~~ + +- ``HDFStore`` + + - raising an invalid ``TypeError`` rather than ``ValueError`` when + appending with a different block ordering (:issue:`4096`) + - ``read_hdf`` was not respecting as passed ``mode`` (:issue:`4504`) + - appending a 0-len table will work correctly (:issue:`4273`) + - ``to_hdf`` was raising when passing both arguments ``append`` and + ``table`` (:issue:`4584`) + - reading from a store with duplicate columns across dtypes would raise + (:issue:`4767`) + - Fixed a bug where ``ValueError`` wasn't correctly raised when column + names weren't strings (:issue:`4956`) + - A zero length series written in Fixed format not deserializing properly. + (:issue:`4708`) + - Fixed decoding perf issue on pyt3 (:issue:`5441`) + - Validate levels in a MultiIndex before storing (:issue:`5527`) + - Correctly handle ``data_columns`` with a Panel (:issue:`5717`) +- Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError + exception while trying to access trans[pos + 1] (:issue:`4496`) +- The ``by`` argument now works correctly with the ``layout`` argument + (:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods +- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str + representation of the index (:issue:`4136`) +- Fixed test failure ``test_time_series_plot_color_with_empty_kwargs`` when + using custom matplotlib default colors (:issue:`4345`) +- Fix running of stata IO tests. Now uses temporary files to write + (:issue:`4353`) +- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean`` + for integer valued frames (:issue:`4365`) +- ``read_html`` tests now work with Python 2.6 (:issue:`4351`) +- Fixed bug where ``network`` testing was throwing ``NameError`` because a + local variable was undefined (:issue:`4381`) +- In ``to_json``, raise if a passed ``orient`` would cause loss of data + because of a duplicate index (:issue:`4359`) +- In ``to_json``, fix date handling so milliseconds are the default timestamp + as the docstring says (:issue:`4362`). +- ``as_index`` is no longer ignored when doing groupby apply (:issue:`4648`, + :issue:`3417`) +- JSON NaT handling fixed, NaTs are now serialized to ``null`` (:issue:`4498`) +- Fixed JSON handling of escapable characters in JSON object keys + (:issue:`4593`) +- Fixed passing ``keep_default_na=False`` when ``na_values=None`` + (:issue:`4318`) +- Fixed bug with ``values`` raising an error on a DataFrame with duplicate + columns and mixed dtypes, surfaced in (:issue:`4377`) +- Fixed bug with duplicate columns and type conversion in ``read_json`` when + ``orient='split'`` (:issue:`4377`) +- Fixed JSON bug where locales with decimal separators other than '.' threw + exceptions when encoding / decoding certain values. (:issue:`4918`) +- Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`) +- Fixed an issue where ``PeriodIndex`` joining with self was returning a new + instance rather than the same instance (:issue:`4379`); also adds a test + for this for the other index types +- Fixed a bug with all the dtypes being converted to object when using the + CSV cparser with the usecols parameter (:issue:`3192`) +- Fix an issue in merging blocks where the resulting DataFrame had partially + set _ref_locs (:issue:`4403`) +- Fixed an issue where hist subplots were being overwritten when they were + called using the top level matplotlib API (:issue:`4408`) +- Fixed a bug where calling ``Series.astype(str)`` would truncate the string + (:issue:`4405`, :issue:`4437`) +- Fixed a py3 compat issue where bytes were being repr'd as tuples + (:issue:`4455`) +- Fixed Panel attribute naming conflict if item is named 'a' + (:issue:`3440`) +- Fixed an issue where duplicate indexes were raising when plotting + (:issue:`4486`) +- Fixed an issue where cumsum and cumprod didn't work with bool dtypes + (:issue:`4170`, :issue:`4440`) +- Fixed Panel slicing issued in ``xs`` that was returning an incorrect dimmed + object (:issue:`4016`) +- Fix resampling bug where custom reduce function not used if only one group + (:issue:`3849`, :issue:`4494`) +- Fixed Panel assignment with a transposed frame (:issue:`3830`) +- Raise on set indexing with a Panel and a Panel as a value which needs + alignment (:issue:`3777`) +- frozenset objects now raise in the ``Series`` constructor (:issue:`4482`, + :issue:`4480`) +- Fixed issue with sorting a duplicate MultiIndex that has multiple dtypes + (:issue:`4516`) +- Fixed bug in ``DataFrame.set_values`` which was causing name attributes to + be lost when expanding the index. (:issue:`3742`, :issue:`4039`) +- Fixed issue where individual ``names``, ``levels`` and ``labels`` could be + set on ``MultiIndex`` without validation (:issue:`3714`, :issue:`4039`) +- Fixed (:issue:`3334`) in pivot_table. Margins did not compute if values is + the index. +- Fix bug in having a rhs of ``np.timedelta64`` or ``np.offsets.DateOffset`` + when operating with datetimes (:issue:`4532`) +- Fix arithmetic with series/datetimeindex and ``np.timedelta64`` not working + the same (:issue:`4134`) and buggy timedelta in NumPy 1.6 (:issue:`4135`) +- Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not + decoding properly +- ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise + if code argument out of range (:issue:`4519`, :issue:`4520`) +- Fix boolean indexing on an empty series loses index names (:issue:`4235`), + infer_dtype works with empty arrays. +- Fix reindexing with multiple axes; if an axes match was not replacing the + current axes, leading to a possible lazy frequency inference issue + (:issue:`3317`) +- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly + (causing the original stack trace to be truncated). +- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`) +- Fix assignment with iloc/loc involving a dtype change in an existing column + (:issue:`4312`, :issue:`5702`) have internal setitem_with_indexer in core/indexing + to use Block.setitem +- Fixed bug where thousands operator was not handled correctly for floating + point numbers in csv_import (:issue:`4322`) +- Fix an issue with CacheableOffset not properly being used by many + DateOffset; this prevented the DateOffset from being cached (:issue:`4609`) +- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the + rhs (:issue:`4576`) +- Fix error/dtype conversion with setitem of ``None`` on ``Series/DataFrame`` + (:issue:`4667`) +- Fix decoding based on a passed in non-default encoding in ``pd.read_stata`` + (:issue:`4626`) +- Fix ``DataFrame.from_records`` with a plain-vanilla ``ndarray``. + (:issue:`4727`) +- Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``, + etc. (:issue:`4718`, :issue:`4628`) +- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indices + (:issue:`4726`) +- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. + (:issue:`4328`) +- Bug with Series indexing not raising an error when the right-hand-side has + an incorrect length (:issue:`2702`) +- Bug in MultiIndexing with a partial string selection as one part of a + MultIndex (:issue:`4758`) +- Bug with reindexing on the index with a non-unique index will now raise + ``ValueError`` (:issue:`4746`) +- Bug in setting with ``loc/ix`` a single indexer with a MultiIndex axis and + a NumPy array, related to (:issue:`3777`) +- Bug in concatenation with duplicate columns across dtypes not merging with + axis=0 (:issue:`4771`, :issue:`4975`) +- Bug in ``iloc`` with a slice index failing (:issue:`4771`) +- Incorrect error message with no colspecs or width in ``read_fwf``. + (:issue:`4774`) +- Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, + :issue:`4550`) +- Fixed bug with reading compressed files with ``read_fwf`` in Python 3. + (:issue:`3963`) +- Fixed an issue with a duplicate index and assignment with a dtype change + (:issue:`4686`) +- Fixed bug with reading compressed files in as ``bytes`` rather than ``str`` + in Python 3. Simplifies bytes-producing file-handling in Python 3 + (:issue:`3963`, :issue:`4785`). +- Fixed an issue related to ticklocs/ticklabels with log scale bar plots + across different versions of matplotlib (:issue:`4789`) +- Suppressed DeprecationWarning associated with internal calls issued by + repr() (:issue:`4391`) +- Fixed an issue with a duplicate index and duplicate selector with ``.loc`` + (:issue:`4825`) +- Fixed an issue with ``DataFrame.sort_index`` where, when sorting by a + single column and passing a list for ``ascending``, the argument for + ``ascending`` was being interpreted as ``True`` (:issue:`4839`, + :issue:`4846`) +- Fixed ``Panel.tshift`` not working. Added ``freq`` support to ``Panel.shift`` + (:issue:`4853`) +- Fix an issue in TextFileReader w/ Python engine (i.e. PythonParser) + with thousands != "," (:issue:`4596`) +- Bug in getitem with a duplicate index when using where (:issue:`4879`) +- Fix Type inference code coerces float column into datetime (:issue:`4601`) +- Fixed ``_ensure_numeric`` does not check for complex numbers + (:issue:`4902`) +- Fixed a bug in ``Series.hist`` where two figures were being created when + the ``by`` argument was passed (:issue:`4112`, :issue:`4113`). +- Fixed a bug in ``convert_objects`` for > 2 ndims (:issue:`4937`) +- Fixed a bug in DataFrame/Panel cache insertion and subsequent indexing + (:issue:`4939`, :issue:`5424`) +- Fixed string methods for ``FrozenNDArray`` and ``FrozenList`` + (:issue:`4929`) +- Fixed a bug with setting invalid or out-of-range values in indexing + enlargement scenarios (:issue:`4940`) +- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr +- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep + separate metadata. (:issue:`4202`, :issue:`4830`) +- Fixed skiprows option in Python parser for read_csv (:issue:`4382`) +- Fixed bug preventing ``cut`` from working with ``np.inf`` levels without + explicitly passing labels (:issue:`3415`) +- Fixed wrong check for overlapping in ``DatetimeIndex.union`` + (:issue:`4564`) +- Fixed conflict between thousands separator and date parser in csv_parser + (:issue:`4678`) +- Fix appending when dtypes are not the same (error showing mixing + float/np.datetime64) (:issue:`4993`) +- Fix repr for DateOffset. No longer show duplicate entries in kwds. + Removed unused offset fields. (:issue:`4638`) +- Fixed wrong index name during read_csv if using usecols. Applies to c + parser only. (:issue:`4201`) +- ``Timestamp`` objects can now appear in the left hand side of a comparison + operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`). +- Fix a bug when indexing with ``np.nan`` via ``iloc/loc`` (:issue:`5016`) +- Fixed a bug where low memory c parser could create different types in + different chunks of the same file. Now coerces to numerical type or raises + warning. (:issue:`3866`) +- Fix a bug where reshaping a ``Series`` to its own shape raised + ``TypeError`` (:issue:`4554`) and other reshaping issues. +- Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`) +- Make sure series-series boolean comparisons are label based (:issue:`4947`) +- Bug in multi-level indexing with a Timestamp partial indexer + (:issue:`4294`) +- Tests/fix for MultiIndex construction of an all-nan frame (:issue:`4078`) +- Fixed a bug where :func:`~pandas.read_html` wasn't correctly inferring + values of tables with commas (:issue:`5029`) +- Fixed a bug where :func:`~pandas.read_html` wasn't providing a stable + ordering of returned tables (:issue:`4770`, :issue:`5029`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly parsing when + passed ``index_col=0`` (:issue:`5066`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly inferring the + type of headers (:issue:`5048`). +- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a + stack overflow (:issue:`3899`). +- Fixed a bug where ``groupby`` objects didn't allow plots (:issue:`5102`). +- Fixed a bug where ``groupby`` objects weren't tab-completing column names + (:issue:`5102`). +- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures + multiple times (:issue:`5102`). +- Provide automatic conversion of ``object`` dtypes on fillna, related + (:issue:`5103`) +- Fixed a bug where default options were being overwritten in the option + parser cleaning (:issue:`5121`). +- Treat a list/ndarray identically for ``iloc`` indexing with list-like + (:issue:`5006`) +- Fix ``MultiIndex.get_level_values()`` with missing values (:issue:`5074`) +- Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`) +- Fix a bug where ``TestReadHtml`` wasn't calling the correct ``read_html()`` + function (:issue:`5150`). +- Fix a bug with ``NDFrame.replace()`` which made replacement appear as + though it was (incorrectly) using regular expressions (:issue:`5143`). +- Fix better error message for to_datetime (:issue:`4928`) +- Made sure different locales are tested on travis-ci (:issue:`4918`). Also + adds a couple of utilities for getting locales and setting locales with a + context manager. +- Fixed segfault on ``isnull(MultiIndex)`` (now raises an error instead) + (:issue:`5123`, :issue:`5125`) +- Allow duplicate indices when performing operations that align + (:issue:`5185`, :issue:`5639`) +- Compound dtypes in a constructor raise ``NotImplementedError`` + (:issue:`5191`) +- Bug in comparing duplicate frames (:issue:`4421`) related +- Bug in describe on duplicate frames +- Bug in ``to_datetime`` with a format and ``coerce=True`` not raising + (:issue:`5195`) +- Bug in ``loc`` setting with multiple indexers and a rhs of a Series that + needs broadcasting (:issue:`5206`) +- Fixed bug where inplace setting of levels or labels on ``MultiIndex`` would + not clear cached ``values`` property and therefore return wrong ``values``. + (:issue:`5215`) +- Fixed bug where filtering a grouped DataFrame or Series did not maintain + the original ordering (:issue:`4621`). +- Fixed ``Period`` with a business date freq to always roll-forward if on a + non-business date. (:issue:`5203`) +- Fixed bug in Excel writers where frames with duplicate column names weren't + written correctly. (:issue:`5235`) +- Fixed issue with ``drop`` and a non-unique index on Series (:issue:`5248`) +- Fixed segfault in C parser caused by passing more names than columns in + the file. (:issue:`5156`) +- Fix ``Series.isin`` with date/time-like dtypes (:issue:`5021`) +- C and Python Parser can now handle the more common MultiIndex column + format which doesn't have a row for index names (:issue:`4702`) +- Bug when trying to use an out-of-bounds date as an object dtype + (:issue:`5312`) +- Bug when trying to display an embedded PandasObject (:issue:`5324`) +- Allows operating of Timestamps to return a datetime if the result is out-of-bounds + related (:issue:`5312`) +- Fix return value/type signature of ``initObjToJSON()`` to be compatible + with numpy's ``import_array()`` (:issue:`5334`, :issue:`5326`) +- Bug when renaming then set_index on a DataFrame (:issue:`5344`) +- Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`) + (thanks for catching this @yarikoptic!) +- Fixed html tests on win32. (:issue:`4580`) +- Make sure that ``head/tail`` are ``iloc`` based, (:issue:`5370`) +- Fixed bug for ``PeriodIndex`` string representation if there are 1 or 2 + elements. (:issue:`5372`) +- The GroupBy methods ``transform`` and ``filter`` can be used on Series + and DataFrames that have repeated (non-unique) indices. (:issue:`4620`) +- Fix empty series not printing name in repr (:issue:`4651`) +- Make tests create temp files in temp directory by default. (:issue:`5419`) +- ``pd.to_timedelta`` of a scalar returns a scalar (:issue:`5410`) +- ``pd.to_timedelta`` accepts ``NaN`` and ``NaT``, returning ``NaT`` instead of raising (:issue:`5437`) +- performance improvements in ``isnull`` on larger size pandas objects +- Fixed various setitem with 1d ndarray that does not have a matching + length to the indexer (:issue:`5508`) +- Bug in getitem with a MultiIndex and ``iloc`` (:issue:`5528`) +- Bug in delitem on a Series (:issue:`5542`) +- Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`) +- Bug in selecting from a non-unique index with ``loc`` (:issue:`5553`) +- Bug in groupby returning non-consistent types when user function returns a ``None``, (:issue:`5592`) +- Work around regression in numpy 1.7.0 which erroneously raises IndexError from ``ndarray.item`` (:issue:`5666`) +- Bug in repeated indexing of object with resultant non-unique index (:issue:`5678`) +- Bug in fillna with Series and a passed series/dict (:issue:`5703`) +- Bug in groupby transform with a datetime-like grouper (:issue:`5712`) +- Bug in MultiIndex selection in PY3 when using certain keys (:issue:`5725`) +- Row-wise concat of differing dtypes failing in certain cases (:issue:`5754`) + +.. _whatsnew_0.13.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.12.0..v0.13.0 diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst new file mode 100644 index 00000000..249b9555 --- /dev/null +++ b/doc/source/whatsnew/v0.13.1.rst @@ -0,0 +1,482 @@ +.. _whatsnew_0131: + +Version 0.13.1 (February 3, 2014) +--------------------------------- + +{{ header }} + + + +This is a minor release from 0.13.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- Added ``infer_datetime_format`` keyword to ``read_csv/to_datetime`` to allow speedups for homogeneously formatted datetimes. +- Will intelligently limit display precision for datetime/timedelta formats. +- Enhanced Panel :meth:`~pandas.Panel.apply` method. +- Suggested tutorials in new :ref:`Tutorials` section. +- Our pandas ecosystem is growing, We now feature related projects in a new :ref:`Pandas Ecosystem` section. +- Much work has been taking place on improving the docs, and a new :ref:`Contributing` section has been added. +- Even though it may only be of interest to devs, we <3 our new CI status page: `ScatterCI `__. + +.. warning:: + + 0.13.1 fixes a bug that was caused by a combination of having numpy < 1.8, and doing + chained assignment on a string-like array. Please review :ref:`the docs`, + chained indexing can have unexpected results and should generally be avoided. + + This would previously segfault: + + .. ipython:: python + + df = pd.DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df["A"].iloc[0] = np.nan + df + + The recommended way to do this type of assignment is: + + .. ipython:: python + + df = pd.DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df.loc[0, "A"] = np.nan + df + +Output formatting enhancements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- df.info() view now display dtype info per column (:issue:`5682`) + +- df.info() now honors the option ``max_info_rows``, to disable null counts for large frames (:issue:`5974`) + + .. ipython:: python + + max_info_rows = pd.get_option("max_info_rows") + + df = pd.DataFrame( + { + "A": np.random.randn(10), + "B": np.random.randn(10), + "C": pd.date_range("20130101", periods=10), + } + ) + df.iloc[3:6, [0, 2]] = np.nan + + .. ipython:: python + + # set to not display the null counts + pd.set_option("max_info_rows", 0) + df.info() + + .. ipython:: python + + # this is the default (same as in 0.13.0) + pd.set_option("max_info_rows", max_info_rows) + df.info() + +- Add ``show_dimensions`` display option for the new DataFrame repr to control whether the dimensions print. + + .. ipython:: python + + df = pd.DataFrame([[1, 2], [3, 4]]) + pd.set_option("show_dimensions", False) + df + + pd.set_option("show_dimensions", True) + df + +- The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently + limit precision based on the values in the array (:issue:`3401`) + + Previously output might look like: + + .. code-block:: text + + age today diff + 0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00 + 1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00 + + Now the output looks like: + + .. ipython:: python + + df = pd.DataFrame( + [pd.Timestamp("20010101"), pd.Timestamp("20040601")], columns=["age"] + ) + df["today"] = pd.Timestamp("20130419") + df["diff"] = df["today"] - df["age"] + df + +API changes +~~~~~~~~~~~ + +- Add ``-NaN`` and ``-nan`` to the default set of NA values (:issue:`5952`). + See :ref:`NA Values `. + +- Added ``Series.str.get_dummies`` vectorized string method (:issue:`6021`), to extract + dummy/indicator variables for separated string columns: + + .. ipython:: python + + s = pd.Series(["a", "a|b", np.nan, "a|c"]) + s.str.get_dummies(sep="|") + +- Added the ``NDFrame.equals()`` method to compare if two NDFrames are + equal have equal axes, dtypes, and values. Added the + ``array_equivalent`` function to compare if two ndarrays are + equal. NaNs in identical locations are treated as + equal. (:issue:`5283`) See also :ref:`the docs` for a motivating example. + + .. code-block:: python + + df = pd.DataFrame({"col": ["foo", 0, np.nan]}) + df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0]) + df.equals(df2) + df.equals(df2.sort_index()) + +- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a + ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is + empty (:issue:`6007`). + + Previously, calling ``DataFrame.apply`` an empty ``DataFrame`` would return + either a ``DataFrame`` if there were no columns, or the function being + applied would be called with an empty ``Series`` to guess whether a + ``Series`` or ``DataFrame`` should be returned: + + .. code-block:: ipython + + In [32]: def applied_func(col): + ....: print("Apply function being called with: ", col) + ....: return col.sum() + ....: + + In [33]: empty = DataFrame(columns=['a', 'b']) + + In [34]: empty.apply(applied_func) + Apply function being called with: Series([], Length: 0, dtype: float64) + Out[34]: + a NaN + b NaN + Length: 2, dtype: float64 + + Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce`` + argument is ``True`` a ``Series`` will returned, if it is ``False`` a + ``DataFrame`` will be returned, and if it is ``None`` (the default) the + function being applied will be called with an empty series to try and guess + the return type. + + .. code-block:: ipython + + In [35]: empty.apply(applied_func, reduce=True) + Out[35]: + a NaN + b NaN + Length: 2, dtype: float64 + + In [36]: empty.apply(applied_func, reduce=False) + Out[36]: + Empty DataFrame + Columns: [a, b] + Index: [] + + [0 rows x 2 columns] + + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are no announced changes in 0.13 or prior that are taking effect as of 0.13.1 + +Deprecations +~~~~~~~~~~~~ + +There are no deprecations of prior behavior in 0.13.1 + +Enhancements +~~~~~~~~~~~~ + +- ``pd.read_csv`` and ``pd.to_datetime`` learned a new ``infer_datetime_format`` keyword which greatly + improves parsing perf in many cases. Thanks to @lexual for suggesting and @danbirken + for rapidly implementing. (:issue:`5490`, :issue:`6021`) + + If ``parse_dates`` is enabled and this flag is set, pandas will attempt to + infer the format of the datetime strings in the columns, and if it can + be inferred, switch to a faster method of parsing them. In some cases + this can increase the parsing speed by ~5-10x. + + .. code-block:: python + + # Try to infer the format for the index column + df = pd.read_csv( + "foo.csv", index_col=0, parse_dates=True, infer_datetime_format=True + ) + +- ``date_format`` and ``datetime_format`` keywords can now be specified when writing to ``excel`` + files (:issue:`4133`) + +- ``MultiIndex.from_product`` convenience function for creating a MultiIndex from + the cartesian product of a set of iterables (:issue:`6055`): + + .. ipython:: python + + shades = ["light", "dark"] + colors = ["red", "green", "blue"] + + pd.MultiIndex.from_product([shades, colors], names=["shade", "color"]) + +- Panel :meth:`~pandas.Panel.apply` will work on non-ufuncs. See :ref:`the docs`. + + .. code-block:: ipython + + In [28]: import pandas._testing as tm + + In [29]: panel = tm.makePanel(5) + + In [30]: panel + Out[30]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [31]: panel['ItemA'] + Out[31]: + A B C D + 2000-01-03 -0.673690 0.577046 -1.344312 -1.469388 + 2000-01-04 0.113648 -1.715002 0.844885 0.357021 + 2000-01-05 -1.478427 -1.039268 1.075770 -0.674600 + 2000-01-06 0.524988 -0.370647 -0.109050 -1.776904 + 2000-01-07 0.404705 -1.157892 1.643563 -0.968914 + + [5 rows x 4 columns] + + Specifying an ``apply`` that operates on a Series (to return a single element) + + .. code-block:: ipython + + In [32]: panel.apply(lambda x: x.dtype, axis='items') + Out[32]: + A B C D + 2000-01-03 float64 float64 float64 float64 + 2000-01-04 float64 float64 float64 float64 + 2000-01-05 float64 float64 float64 float64 + 2000-01-06 float64 float64 float64 float64 + 2000-01-07 float64 float64 float64 float64 + + [5 rows x 4 columns] + + A similar reduction type operation + + .. code-block:: ipython + + In [33]: panel.apply(lambda x: x.sum(), axis='major_axis') + Out[33]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 + + [4 rows x 3 columns] + + This is equivalent to + + .. code-block:: ipython + + In [34]: panel.sum('major_axis') + Out[34]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 + + [4 rows x 3 columns] + + A transformation operation that returns a Panel, but is computing + the z-score across the major_axis + + .. code-block:: ipython + + In [35]: result = panel.apply(lambda x: (x - x.mean()) / x.std(), + ....: axis='major_axis') + ....: + + In [36]: result + Out[36]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [37]: result['ItemA'] # noqa E999 + Out[37]: + A B C D + 2000-01-03 -0.535778 1.500802 -1.506416 -0.681456 + 2000-01-04 0.397628 -1.108752 0.360481 1.529895 + 2000-01-05 -1.489811 -0.339412 0.557374 0.280845 + 2000-01-06 0.885279 0.421830 -0.453013 -1.053785 + 2000-01-07 0.742682 -0.474468 1.041575 -0.075499 + + [5 rows x 4 columns] + +- Panel :meth:`~pandas.Panel.apply` operating on cross-sectional slabs. (:issue:`1148`) + + .. code-block:: ipython + + In [38]: def f(x): + ....: return ((x.T - x.mean(1)) / x.std(1)).T + ....: + + In [39]: result = panel.apply(f, axis=['items', 'major_axis']) + + In [40]: result + Out[40]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC + + In [41]: result.loc[:, :, 'ItemA'] + Out[41]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] + + This is equivalent to the following + + .. code-block:: ipython + + In [42]: result = pd.Panel({ax: f(panel.loc[:, :, ax]) for ax in panel.minor_axis}) + + In [43]: result + Out[43]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC + + In [44]: result.loc[:, :, 'ItemA'] + Out[44]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] + +Performance +~~~~~~~~~~~ + +Performance improvements for 0.13.1 + +- Series datetime/timedelta binary operations (:issue:`5801`) +- DataFrame ``count/dropna`` for ``axis=1`` +- Series.str.contains now has a ``regex=False`` keyword which can be faster for plain (non-regex) string patterns. (:issue:`5879`) +- Series.str.extract (:issue:`5944`) +- ``dtypes/ftypes`` methods (:issue:`5968`) +- indexing with object dtypes (:issue:`5968`) +- ``DataFrame.apply`` (:issue:`6013`) +- Regression in JSON IO (:issue:`5765`) +- Index construction from Series (:issue:`6150`) + +Experimental +~~~~~~~~~~~~ + +There are no experimental changes in 0.13.1 + +.. _release.bug_fixes-0.13.1: + +Bug fixes +~~~~~~~~~ + +- Bug in ``io.wb.get_countries`` not including all countries (:issue:`6008`) +- Bug in Series replace with timestamp dict (:issue:`5797`) +- read_csv/read_table now respects the ``prefix`` kwarg (:issue:`5732`). +- Bug in selection with missing values via ``.ix`` from a duplicate indexed DataFrame failing (:issue:`5835`) +- Fix issue of boolean comparison on empty DataFrames (:issue:`5808`) +- Bug in isnull handling ``NaT`` in an object array (:issue:`5443`) +- Bug in ``to_datetime`` when passed a ``np.nan`` or integer datelike and a format string (:issue:`5863`) +- Bug in groupby dtype conversion with datetimelike (:issue:`5869`) +- Regression in handling of empty Series as indexers to Series (:issue:`5877`) +- Bug in internal caching, related to (:issue:`5727`) +- Testing bug in reading JSON/msgpack from a non-filepath on windows under py3 (:issue:`5874`) +- Bug when assigning to .ix[tuple(...)] (:issue:`5896`) +- Bug in fully reindexing a Panel (:issue:`5905`) +- Bug in idxmin/max with object dtypes (:issue:`5914`) +- Bug in ``BusinessDay`` when adding n days to a date not on offset when n>5 and n%5==0 (:issue:`5890`) +- Bug in assigning to chained series with a series via ix (:issue:`5928`) +- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`) +- Bug in DataFrame.tail with empty frame (:issue:`5846`) +- Bug in propagating metadata on ``resample`` (:issue:`5862`) +- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`) +- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`) +- ``pd.match`` not returning passed sentinel +- ``Panel.to_frame()`` no longer fails when ``major_axis`` is a + ``MultiIndex`` (:issue:`5402`). +- Bug in ``pd.read_msgpack`` with inferring a ``DateTimeIndex`` frequency + incorrectly (:issue:`5947`) +- Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``'s (:issue:`5961`) +- Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) +- Bug in scipy ``interpolate`` methods with a datetime index (:issue:`5975`) +- Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`) +- Fixed bug with ``pd.concat`` losing dtype information if all inputs are empty (:issue:`5742`) +- Recent changes in IPython cause warnings to be emitted when using previous versions + of pandas in QTConsole, now fixed. If you're using an older version and + need to suppress the warnings, see (:issue:`5922`). +- Bug in merging ``timedelta`` dtypes (:issue:`5695`) +- Bug in plotting.scatter_matrix function. Wrong alignment among diagonal + and off-diagonal plots, see (:issue:`5497`). +- Regression in Series with a MultiIndex via ix (:issue:`6018`) +- Bug in Series.xs with a MultiIndex (:issue:`6018`) +- Bug in Series construction of mixed type with datelike and an integer (which should result in + object type and not automatic conversion) (:issue:`6028`) +- Possible segfault when chained indexing with an object array under NumPy 1.7.1 (:issue:`6026`, :issue:`6056`) +- Bug in setting using fancy indexing a single element with a non-scalar (e.g. a list), + (:issue:`6043`) +- ``to_sql`` did not respect ``if_exists`` (:issue:`4110` :issue:`4304`) +- Regression in ``.get(None)`` indexing from 0.12 (:issue:`5652`) +- Subtle ``iloc`` indexing bug, surfaced in (:issue:`6059`) +- Bug with insert of strings into DatetimeIndex (:issue:`5818`) +- Fixed unicode bug in to_html/HTML repr (:issue:`6098`) +- Fixed missing arg validation in get_options_data (:issue:`6105`) +- Bug in assignment with duplicate columns in a frame where the locations + are a slice (e.g. next to each other) (:issue:`6120`) +- Bug in propagating _ref_locs during construction of a DataFrame with dups + index/columns (:issue:`6121`) +- Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`) +- Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`) +- Bug in DataFrame construction with recarray and non-ns datetime dtype (:issue:`6140`) +- Bug in ``.loc`` setitem indexing with a dataframe on rhs, multiple item setting, and + a datetimelike (:issue:`6152`) +- Fixed a bug in ``query``/``eval`` during lexicographic string comparisons (:issue:`6155`). +- Fixed a bug in ``query`` where the index of a single-element ``Series`` was + being thrown away (:issue:`6148`). +- Bug in ``HDFStore`` on appending a dataframe with MultiIndexed columns to + an existing table (:issue:`6167`) +- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`) +- Bug in selecting on a MultiIndex ``HDFStore`` even in the presence of under + specified column spec (:issue:`6169`) +- Bug in ``nanops.var`` with ``ddof=1`` and 1 elements would sometimes return ``inf`` + rather than ``nan`` on some platforms (:issue:`6136`) +- Bug in Series and DataFrame bar plots ignoring the ``use_index`` keyword (:issue:`6209`) +- Bug in groupby with mixed str/int under python3 fixed; ``argsort`` was failing (:issue:`6212`) + +.. _whatsnew_0.13.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.13.0..v0.13.1 diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst new file mode 100644 index 00000000..b59938a9 --- /dev/null +++ b/doc/source/whatsnew/v0.14.0.rst @@ -0,0 +1,1087 @@ +.. _whatsnew_0140: + +Version 0.14.0 (May 31 , 2014) +------------------------------ + +{{ header }} + + +This is a major release from 0.13.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- Highlights include: + + - Officially support Python 3.4 + - SQL interfaces updated to use ``sqlalchemy``, See :ref:`Here`. + - Display interface changes, See :ref:`Here` + - MultiIndexing Using Slicers, See :ref:`Here`. + - Ability to join a singly-indexed DataFrame with a MultiIndexed DataFrame, see :ref:`Here ` + - More consistency in groupby results and more flexible groupby specifications, See :ref:`Here` + - Holiday calendars are now supported in ``CustomBusinessDay``, see :ref:`Here ` + - Several improvements in plotting functions, including: hexbin, area and pie plots, see :ref:`Here`. + - Performance doc section on I/O operations, See :ref:`Here ` + +- :ref:`Other Enhancements ` + +- :ref:`API Changes ` + +- :ref:`Text Parsing API Changes ` + +- :ref:`Groupby API Changes ` + +- :ref:`Performance Improvements ` + +- :ref:`Prior Deprecations ` + +- :ref:`Deprecations ` + +- :ref:`Known Issues ` + +- :ref:`Bug Fixes ` + +.. warning:: + + In 0.14.0 all ``NDFrame`` based containers have undergone significant internal refactoring. Before that each block of + homogeneous data had its own labels and extra care was necessary to keep those in sync with the parent container's labels. + This should not have any visible user/API behavior changes (:issue:`6745`) + +.. _whatsnew_0140.api: + +API changes +~~~~~~~~~~~ + +- ``read_excel`` uses 0 as the default sheet (:issue:`6573`) +- ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being + indexed. These will be excluded. This will make pandas conform more with python/numpy indexing of out-of-bounds + values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise + ``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned) + + .. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + dfl + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] + dfl.iloc[4:6] + + These are out-of-bounds selections + + .. code-block:: python + + >>> dfl.iloc[[4, 5, 6]] + IndexError: positional indexers are out-of-bounds + + >>> dfl.iloc[:, 4] + IndexError: single positional indexer is out-of-bounds + +- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`): + + - ``df.iloc[:-len(df)]`` is now empty + - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse + +- The :meth:`DataFrame.interpolate` keyword ``downcast`` default has been changed from ``infer`` to + ``None``. This is to preserve the original dtype unless explicitly requested otherwise (:issue:`6290`). +- When converting a dataframe to HTML it used to return ``Empty DataFrame``. This special case has + been removed, instead a header with the column names is returned (:issue:`6062`). +- ``Series`` and ``Index`` now internally share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are + now supported on ``Index`` types as well. The ``Series.weekday`` property from is removed + from Series for API consistency. Using a ``DatetimeIndex/PeriodIndex`` method on a Series will now raise a ``TypeError``. + (:issue:`4551`, :issue:`4056`, :issue:`5519`, :issue:`6380`, :issue:`7206`). + +- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex`` / ``Timestamp`` which return a boolean array of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the frequency of the ``DateTimeIndex`` / ``Timestamp`` (:issue:`4565`, :issue:`6998`) + +- Local variable usage has changed in + :func:`pandas.eval`/:meth:`DataFrame.eval`/:meth:`DataFrame.query` + (:issue:`5987`). For the :class:`~pandas.DataFrame` methods, two things have + changed + + - Column names are now given precedence over locals + - Local variables must be referred to explicitly. This means that even if + you have a local variable that is *not* a column you must still refer to + it with the ``'@'`` prefix. + - You can have an expression like ``df.query('@a < a')`` with no complaints + from ``pandas`` about ambiguity of the name ``a``. + - The top-level :func:`pandas.eval` function does not allow you use the + ``'@'`` prefix and provides you with an error message telling you so. + - ``NameResolutionError`` was removed because it isn't necessary anymore. + +- Define and document the order of column vs index names in query/eval (:issue:`6676`) +- ``concat`` will now concatenate mixed Series and DataFrames using the Series name + or numbering columns as needed (:issue:`2385`). See :ref:`the docs ` +- Slicing and advanced/boolean indexing operations on ``Index`` classes as well + as :meth:`Index.delete` and :meth:`Index.drop` methods will no longer change the type of the + resulting index (:issue:`6440`, :issue:`7040`) + + .. ipython:: python + + i = pd.Index([1, 2, 3, 'a', 'b', 'c']) + i[[0, 1, 2]] + i.drop(['a', 'b', 'c']) + + Previously, the above operation would return ``Int64Index``. If you'd like + to do this manually, use :meth:`Index.astype` + + .. ipython:: python + + i[[0, 1, 2]].astype(np.int_) + +- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, + the old behavior returned an Index in this case (:issue:`6459`): + + .. ipython:: python + :suppress: + + np.random.seed(1234) + from itertools import product + tuples = list(product(('a', 'b'), ('c', 'd'))) + mi = pd.MultiIndex.from_tuples(tuples) + df_multi = pd.DataFrame(np.random.randn(4, 2), index=mi) + tuple_ind = pd.Index(tuples, tupleize_cols=False) + df_multi.index + + .. ipython:: python + + # Old behavior, casted MultiIndex to an Index + tuple_ind + df_multi.set_index(tuple_ind) + + # New behavior + mi + df_multi.set_index(mi) + + This also applies when passing multiple indices to ``set_index``: + + .. ipython:: python + + @suppress + df_multi.index = tuple_ind + + # Old output, 2-level MultiIndex of tuples + df_multi.set_index([df_multi.index, df_multi.index]) + + @suppress + df_multi.index = mi + + # New output, 4-level MultiIndex + df_multi.set_index([df_multi.index, df_multi.index]) + +- ``pairwise`` keyword was added to the statistical moment functions + ``rolling_cov``, ``rolling_corr``, ``ewmcov``, ``ewmcorr``, + ``expanding_cov``, ``expanding_corr`` to allow the calculation of moving + window covariance and correlation matrices (:issue:`4950`). See + :ref:`Computing rolling pairwise covariances and correlations + ` in the docs. + + .. code-block:: ipython + + In [1]: df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A', 'B', 'C']], + ....: df[['B', 'C', 'D']], + ....: 5, + ....: pairwise=True) + + + In [5]: covs[df.index[-1]] + Out[5]: + B C D + A 0.035310 0.326593 -0.505430 + B 0.137748 -0.006888 -0.005383 + C -0.006888 0.861040 0.020762 + +- ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) + +- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) +- ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers + to a non-unique item in the ``Index`` (previously raised a ``KeyError``). (:issue:`6738`) +- drop unused order argument from ``Series.sort``; args now are in the same order as ``Series.order``; + add ``na_position`` arg to conform to ``Series.order`` (:issue:`6847`) +- default sorting algorithm for ``Series.order`` is now ``quicksort``, to conform with ``Series.sort`` + (and numpy defaults) +- add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- ``DataFrame.sort`` now places NaNs at the beginning or end of the sort according to the ``na_position`` parameter. (:issue:`3917`) +- accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`), this was a regression + from 0.13.1 +- Added ``factorize`` functions to ``Index`` and ``Series`` to get indexer and unique values (:issue:`7090`) +- ``describe`` on a DataFrame with a mix of Timestamp and string like objects returns a different Index (:issue:`7088`). + Previously the index was unintentionally sorted. +- Arithmetic operations with **only** ``bool`` dtypes now give a warning indicating + that they are evaluated in Python space for ``+``, ``-``, + and ``*`` operations and raise for all others (:issue:`7011`, :issue:`6762`, + :issue:`7015`, :issue:`7210`) + + .. code-block:: python + + >>> x = pd.Series(np.random.rand(10) > 0.5) + >>> y = True + >>> x + y # warning generated: should do x | y instead + UserWarning: evaluating in Python space because the '+' operator is not + supported by numexpr for the bool dtype, use '|' instead + >>> x / y # this raises because it doesn't make sense + NotImplementedError: operator '/' not implemented for bool dtypes + +- In ``HDFStore``, ``select_as_multiple`` will always raise a ``KeyError``, when a key or the selector is not found (:issue:`6177`) +- ``df['col'] = value`` and ``df.loc[:,'col'] = value`` are now completely equivalent; + previously the ``.loc`` would not necessarily coerce the dtype of the resultant series (:issue:`6149`) +- ``dtypes`` and ``ftypes`` now return a series with ``dtype=object`` on empty containers (:issue:`5740`) +- ``df.to_csv`` will now return a string of the CSV data if neither a target path nor a buffer is provided + (:issue:`6061`) +- ``pd.infer_freq()`` will now raise a ``TypeError`` if given an invalid ``Series/Index`` + type (:issue:`6407`, :issue:`6463`) +- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of + the index, rather than requiring a list of tuple (:issue:`4370`) +- all offset operations now return ``Timestamp`` types (rather than datetime), Business/Week frequencies were incorrect (:issue:`4069`) +- ``to_excel`` now converts ``np.inf`` into a string representation, + customizable by the ``inf_rep`` keyword argument (Excel has no native inf + representation) (:issue:`6782`) +- Replace ``pandas.compat.scipy.scoreatpercentile`` with ``numpy.percentile`` (:issue:`6810`) +- ``.quantile`` on a ``datetime[ns]`` series now returns ``Timestamp`` instead + of ``np.datetime64`` objects (:issue:`6810`) +- change ``AssertionError`` to ``TypeError`` for invalid types passed to ``concat`` (:issue:`6583`) +- Raise a ``TypeError`` when ``DataFrame`` is passed an iterator as the + ``data`` argument (:issue:`5357`) + + +.. _whatsnew_0140.display: + +Display changes +~~~~~~~~~~~~~~~ + +- The default way of printing large DataFrames has changed. DataFrames + exceeding ``max_rows`` and/or ``max_columns`` are now displayed in a + centrally truncated view, consistent with the printing of a + :class:`pandas.Series` (:issue:`5603`). + + In previous versions, a DataFrame was truncated once the dimension + constraints were reached and an ellipse (...) signaled that part of + the data was cut off. + + .. image:: ../_static/trunc_before.png + :alt: The previous look of truncate. + + In the current version, large DataFrames are centrally truncated, + showing a preview of head and tail in both dimensions. + + .. image:: ../_static/trunc_after.png + :alt: The new look. + +- allow option ``'truncate'`` for ``display.show_dimensions`` to only show the dimensions if the + frame is truncated (:issue:`6547`). + + The default for ``display.show_dimensions`` will now be ``truncate``. This is consistent with + how Series display length. + + .. ipython:: python + + dfd = pd.DataFrame(np.arange(25).reshape(-1, 5), + index=[0, 1, 2, 3, 4], + columns=[0, 1, 2, 3, 4]) + + # show dimensions since this is truncated + with pd.option_context('display.max_rows', 2, 'display.max_columns', 2, + 'display.show_dimensions', 'truncate'): + print(dfd) + + # will not show dimensions since it is not truncated + with pd.option_context('display.max_rows', 10, 'display.max_columns', 40, + 'display.show_dimensions', 'truncate'): + print(dfd) + +- Regression in the display of a MultiIndexed Series with ``display.max_rows`` is less than the + length of the series (:issue:`7101`) +- Fixed a bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the + ``large_repr`` set to 'info' (:issue:`7105`) +- The ``verbose`` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` + representation, is now ``None`` by default. This will follow the global setting in + ``display.max_info_columns``. The global setting can be overridden with ``verbose=True`` or + ``verbose=False``. +- Fixed a bug with the ``info`` repr not honoring the ``display.max_info_columns`` setting (:issue:`6939`) +- Offset/freq info now in Timestamp __repr__ (:issue:`4553`) + +.. _whatsnew_0140.parsing: + +Text parsing API changes +~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`read_csv`/:func:`read_table` will now be noisier w.r.t invalid options rather than falling back to the ``PythonParser``. + +- Raise ``ValueError`` when ``sep`` specified with + ``delim_whitespace=True`` in :func:`read_csv`/:func:`read_table` + (:issue:`6607`) +- Raise ``ValueError`` when ``engine='c'`` specified with unsupported + options in :func:`read_csv`/:func:`read_table` (:issue:`6607`) +- Raise ``ValueError`` when fallback to python parser causes options to be + ignored (:issue:`6607`) +- Produce :class:`~pandas.io.parsers.ParserWarning` on fallback to python + parser when no options are ignored (:issue:`6607`) +- Translate ``sep='\s+'`` to ``delim_whitespace=True`` in + :func:`read_csv`/:func:`read_table` if no other C-unsupported options + specified (:issue:`6607`) + +.. _whatsnew_0140.groupby: + +GroupBy API changes +~~~~~~~~~~~~~~~~~~~ + +More consistent behavior for some groupby methods: + +- groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation: + + .. ipython:: python + + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + g.head(1) # filters DataFrame + + g.apply(lambda x: x.head(1)) # used to simply fall-through + +- groupby head and tail respect column selection: + + .. ipython:: python + + g[['B']].head(1) + +- groupby ``nth`` now reduces by default; filtering can be achieved by passing ``as_index=False``. With an optional ``dropna`` argument to ignore + NaN. See :ref:`the docs `. + + Reducing + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + g.nth(0) + + # this is equivalent to g.first() + g.nth(0, dropna='any') + + # this is equivalent to g.last() + g.nth(-1, dropna='any') + + Filtering + + .. ipython:: python + + gf = df.groupby('A', as_index=False) + gf.nth(0) + gf.nth(0, dropna='any') + +- groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`, :issue:`6732`), + as its already the index + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A') + g.count() + g.describe() + +- passing ``as_index`` will leave the grouped column in-place (this is not change in 0.14.0) + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A', as_index=False) + g.count() + g.describe() + +- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping + by a Time and a string field simultaneously. See :ref:`the docs `. (:issue:`3794`) + +- Better propagation/preservation of Series names when performing groupby + operations: + + - ``SeriesGroupBy.agg`` will ensure that the name attribute of the original + series is propagated to the result (:issue:`6265`). + - If the function provided to ``GroupBy.apply`` returns a named series, the + name of the series will be kept as the name of the column index of the + DataFrame returned by ``GroupBy.apply`` (:issue:`6124`). This facilitates + ``DataFrame.stack`` operations where the name of the column index is used as + the name of the inserted column containing the pivoted data. + + +.. _whatsnew_0140.sql: + +SQL +~~~ + +The SQL reading and writing functions now support more database flavors +through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). +All databases supported by SQLAlchemy can be used, such +as PostgreSQL, MySQL, Oracle, Microsoft SQL server (see documentation of +SQLAlchemy on `included dialects +`_). + +The functionality of providing DBAPI connection objects will only be supported +for sqlite3 in the future. The ``'mysql'`` flavor is deprecated. + +The new functions :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table` +are introduced. The function :func:`~pandas.read_sql` is kept as a convenience +wrapper around the other two and will delegate to specific function depending on +the provided input (database table name or sql query). + +In practice, you have to provide a SQLAlchemy ``engine`` to the sql functions. +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. For an in-memory sqlite database: + +.. ipython:: python + + from sqlalchemy import create_engine + # Create your connection. + engine = create_engine('sqlite:///:memory:') + +This ``engine`` can then be used to write or read data to/from this database: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) + df.to_sql('db_table', engine, index=False) + +You can read data from a database by specifying the table name: + +.. ipython:: python + + pd.read_sql_table('db_table', engine) + +or by specifying a sql query: + +.. ipython:: python + + pd.read_sql_query('SELECT * FROM db_table', engine) + +Some other enhancements to the sql functions include: + +- support for writing the index. This can be controlled with the ``index`` + keyword (default is True). +- specify the column label to use when writing the index with ``index_label``. +- specify string columns to parse as datetimes with the ``parse_dates`` + keyword in :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table`. + +.. warning:: + + Some of the existing functions or function aliases have been deprecated + and will be removed in future versions. This includes: ``tquery``, ``uquery``, + ``read_frame``, ``frame_query``, ``write_frame``. + +.. warning:: + + The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. + MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + + +.. _whatsnew_0140.slicers: + +Multi-indexing using slicers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In 0.14.0 we added a new way to slice MultiIndexed objects. +You can slice a MultiIndex by providing multiple indexers. + +You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, +including slices, lists of labels, labels, and boolean indexers. + +You can use ``slice(None)`` to select all the contents of *that* level. You do not need to specify all the +*deeper* levels, they will be implied as ``slice(None)``. + +As usual, **both sides** of the slicers are included as this is label indexing. + +See :ref:`the docs` +See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :issue:`5641`, :issue:`7106`) + +.. warning:: + + You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and + for the **columns**. Their are some ambiguous cases where the passed indexer could be mis-interpreted + as indexing *both* axes, rather than into say the MuliIndex for the rows. + + You should do this: + + .. code-block:: python + + >>> df.loc[(slice('A1', 'A3'), ...), :] # noqa: E901 + + rather than this: + + .. code-block:: python + + >>> df.loc[(slice('A1', 'A3'), ...)] # noqa: E901 + +.. warning:: + + You will need to make sure that the selection axes are fully lexsorted! + +.. ipython:: python + + def mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + + index = pd.MultiIndex.from_product([mklbl('A', 4), + mklbl('B', 2), + mklbl('C', 4), + mklbl('D', 2)]) + columns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = pd.DataFrame(np.arange(len(index) * len(columns)).reshape((len(index), + len(columns))), + index=index, + columns=columns).sort_index().sort_index(axis=1) + df + +Basic MultiIndex slicing using slices, lists, and labels. + +.. ipython:: python + + df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + +You can use a ``pd.IndexSlice`` to shortcut the creation of these slices + +.. ipython:: python + + idx = pd.IndexSlice + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +It is possible to perform quite complicated selections using this method on multiple +axes at the same time. + +.. ipython:: python + + df.loc['A1', (slice(None), 'foo')] + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +Using a boolean indexer you can provide selection related to the *values*. + +.. ipython:: python + + mask = df[('a', 'foo')] > 200 + df.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']] + +You can also specify the ``axis`` argument to ``.loc`` to interpret the passed +slicers on a single axis. + +.. ipython:: python + + df.loc(axis=0)[:, :, ['C1', 'C3']] + +Furthermore you can *set* the values using these methods + +.. ipython:: python + + df2 = df.copy() + df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10 + df2 + +You can use a right-hand-side of an alignable object as well. + +.. ipython:: python + + df2 = df.copy() + df2.loc[idx[:, :, ['C1', 'C3']], :] = df2 * 1000 + df2 + +.. _whatsnew_0140.plotting: + +Plotting +~~~~~~~~ + +- Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`), See :ref:`the docs`. +- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`), See :ref:`the docs` +- Pie plots from ``Series.plot`` and ``DataFrame.plot`` with ``kind='pie'`` (:issue:`6976`), See :ref:`the docs`. +- Plotting with Error Bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`, :issue:`6834`), See :ref:`the docs`. +- ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``, See :ref:`the docs`. The ``table`` keyword can receive the following values. + + - ``False``: Do nothing (default). + - ``True``: Draw a table using the ``DataFrame`` or ``Series`` called ``plot`` method. Data will be transposed to meet matplotlib's default layout. + - ``DataFrame`` or ``Series``: Draw matplotlib.table using the passed data. The data will be drawn as displayed in print method (not transposed automatically). + Also, helper function ``pandas.tools.plotting.table`` is added to create a table from ``DataFrame`` and ``Series``, and add it to an ``matplotlib.Axes``. + +- ``plot(legend='reverse')`` will now reverse the order of legend labels for + most plot kinds. (:issue:`6014`) +- Line plot and area plot can be stacked by ``stacked=True`` (:issue:`6656`) + +- Following keywords are now acceptable for :meth:`DataFrame.plot` with ``kind='bar'`` and ``kind='barh'``: + + - ``width``: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) + - ``align``: Specify the bar alignment. Default is ``center`` (different from matplotlib). In previous versions, pandas passes ``align='edge'`` to matplotlib and adjust the location to ``center`` by itself, and it results ``align`` keyword is not applied as expected. (:issue:`4525`) + - ``position``: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) + + Because of the default ``align`` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coordinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using ``set_xlim``, ``set_ylim``, etc. In this cases, please modify your script to meet with new coordinates. + +- The :func:`parallel_coordinates` function now takes argument ``color`` + instead of ``colors``. A ``FutureWarning`` is raised to alert that + the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) + +- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take + positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is + raised if the old ``data`` argument is used by name. (:issue:`6956`) + +- :meth:`DataFrame.boxplot` now supports ``layout`` keyword (:issue:`6769`) +- :meth:`DataFrame.boxplot` has a new keyword argument, ``return_type``. It accepts ``'dict'``, + ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib + axes and a dict of matplotlib Lines is returned. + + +.. _whatsnew_0140.prior_deprecations: + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are prior version deprecations that are taking effect as of 0.14.0. + +- Remove :class:`DateRange` in favor of :class:`DatetimeIndex` (:issue:`6816`) +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) +- Remove ``precision`` keyword from :func:`set_eng_float_format` (:issue:`395`) +- Remove ``force_unicode`` keyword from :meth:`DataFrame.to_string`, + :meth:`DataFrame.to_latex`, and :meth:`DataFrame.to_html`; these function + encode in unicode by default (:issue:`2224`, :issue:`2225`) +- Remove ``nanRep`` keyword from :meth:`DataFrame.to_csv` and + :meth:`DataFrame.to_string` (:issue:`275`) +- Remove ``unique`` keyword from :meth:`HDFStore.select_column` (:issue:`3256`) +- Remove ``inferTimeRule`` keyword from :func:`Timestamp.offset` (:issue:`391`) +- Remove ``name`` keyword from :func:`get_data_yahoo` and + :func:`get_data_google` ( `commit b921d1a `__ ) +- Remove ``offset`` keyword from :class:`DatetimeIndex` constructor + ( `commit 3136390 `__ ) +- Remove ``time_rule`` from several rolling-moment statistical functions, such + as :func:`rolling_sum` (:issue:`1042`) +- Removed neg ``-`` boolean operations on numpy arrays in favor of inv ``~``, as this is going to + be deprecated in numpy 1.9 (:issue:`6960`) + +.. _whatsnew_0140.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions + now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A + ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments + will not be supported in a future release (:issue:`5505`) + +- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods + now take argument ``subset`` instead of ``cols`` to better align with + :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old + ``cols`` arguments will not be supported in a future release (:issue:`6680`) + +- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions + now takes argument ``columns`` instead of ``cols``. A + ``FutureWarning`` is raised to alert that the old ``cols`` arguments + will not be supported in a future release (:issue:`6645`) + +- Indexers will warn ``FutureWarning`` when used with a scalar indexer and + a non-floating point Index (:issue:`4892`, :issue:`6960`) + + .. code-block:: ipython + + # non-floating point indexes can only be indexed by integers / labels + In [1]: pd.Series(1, np.arange(5))[3.0] + pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[1]: 1 + + In [2]: pd.Series(1, np.arange(5)).iloc[3.0] + pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[2]: 1 + + In [3]: pd.Series(1, np.arange(5)).iloc[3.0:4] + pandas/core/index.py:527: FutureWarning: slice indexers when using iloc should be integers and not floating point + Out[3]: + 3 1 + dtype: int64 + + # these are Float64Indexes, so integer or floating point is acceptable + In [4]: pd.Series(1, np.arange(5.))[3] + Out[4]: 1 + + In [5]: pd.Series(1, np.arange(5.))[3.0] + Out[6]: 1 + +- Numpy 1.9 compat w.r.t. deprecation warnings (:issue:`6960`) + +- :meth:`Panel.shift` now has a function signature that matches :meth:`DataFrame.shift`. + The old positional argument ``lags`` has been changed to a keyword argument + ``periods`` with a default value of 1. A ``FutureWarning`` is raised if the + old argument ``lags`` is used by name. (:issue:`6910`) +- The ``order`` keyword argument of :func:`factorize` will be removed. (:issue:`6926`). + +- Remove the ``copy`` keyword from :meth:`DataFrame.xs`, :meth:`Panel.major_xs`, :meth:`Panel.minor_xs`. A view will be + returned if possible, otherwise a copy will be made. Previously the user could think that ``copy=False`` would + ALWAYS return a view. (:issue:`6894`) + +- The :func:`parallel_coordinates` function now takes argument ``color`` + instead of ``colors``. A ``FutureWarning`` is raised to alert that + the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) + +- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take + positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is + raised if the old ``data`` argument is used by name. (:issue:`6956`) + +- The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. + MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + +- The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. + +- The ``percentile_width`` keyword argument in :meth:`~DataFrame.describe` has been deprecated. + Use the ``percentiles`` keyword instead, which takes a list of percentiles to display. The + default output is unchanged. + +- The default return type of :func:`boxplot` will change from a dict to a matplotlib Axes + in a future release. You can use the future behavior now by passing ``return_type='axes'`` + to boxplot. + +.. _whatsnew_0140.knownissues: + +Known issues +~~~~~~~~~~~~ + +- OpenPyXL 2.0.0 breaks backwards compatibility (:issue:`7169`) + + +.. _whatsnew_0140.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- DataFrame and Series will create a MultiIndex object if passed a tuples dict, See :ref:`the docs` (:issue:`3323`) + + .. ipython:: python + + pd.Series({('a', 'b'): 1, ('a', 'a'): 0, + ('a', 'c'): 2, ('b', 'a'): 3, ('b', 'b'): 4}) + pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, + ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, + ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, + ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, + ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) + +- Added the ``sym_diff`` method to ``Index`` (:issue:`5543`) +- ``DataFrame.to_latex`` now takes a longtable keyword, which if True will return a table in a longtable environment. (:issue:`6617`) +- Add option to turn off escaping in ``DataFrame.to_latex`` (:issue:`6472`) +- ``pd.read_clipboard`` will, if the keyword ``sep`` is unspecified, try to detect data copied from a spreadsheet + and parse accordingly. (:issue:`6223`) +- Joining a singly-indexed DataFrame with a MultiIndexed DataFrame (:issue:`3662`) + + See :ref:`the docs`. Joining MultiIndex DataFrames on both the left and right is not yet supported ATM. + + .. ipython:: python + + household = pd.DataFrame({'household_id': [1, 2, 3], + 'male': [0, 1, 0], + 'wealth': [196087.3, 316478.7, 294750] + }, + columns=['household_id', 'male', 'wealth'] + ).set_index('household_id') + household + portfolio = pd.DataFrame({'household_id': [1, 2, 2, 3, 3, 3, 4], + 'asset_id': ["nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan], + 'name': ["ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + np.nan], + 'share': [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0] + }, + columns=['household_id', 'asset_id', 'name', 'share'] + ).set_index(['household_id', 'asset_id']) + portfolio + + household.join(portfolio, how='inner') + +- ``quotechar``, ``doublequote``, and ``escapechar`` can now be specified when + using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`) +- Partially sort by only the specified levels of a MultiIndex with the + ``sort_remaining`` boolean kwarg. (:issue:`3984`) +- Added ``to_julian_date`` to ``TimeStamp`` and ``DatetimeIndex``. The Julian + Date is used primarily in astronomy and represents the number of days from + noon, January 1, 4713 BC. Because nanoseconds are used to define the time + in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`) +- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types + and will upcast when needed. When it is not possible to losslessly upcast, a warning + is issued (:issue:`6327`) +- ``DataFrame.to_stata`` and ``StataWriter`` will accept keyword arguments time_stamp + and data_label which allow the time stamp and dataset label to be set when creating a + file. (:issue:`6545`) +- ``pandas.io.gbq`` now handles reading unicode strings properly. (:issue:`5940`) +- :ref:`Holidays Calendars` are now available and can be used with the ``CustomBusinessDay`` offset (:issue:`6719`) +- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an + ``object`` dtype array (:issue:`6471`). +- Implemented ``Panel.pct_change`` (:issue:`6904`) +- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:`rolling_max` defaults to max, + :func:`rolling_min` defaults to min, and all others default to mean (:issue:`6297`) +- ``CustomBusinessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`) +- :meth:`Series.quantile` and :meth:`DataFrame.quantile` now accept an array of + quantiles. +- :meth:`~DataFrame.describe` now accepts an array of percentiles to include in the summary statistics (:issue:`4196`) +- ``pivot_table`` can now accept ``Grouper`` by ``index`` and ``columns`` keywords (:issue:`6913`) + + .. ipython:: python + + import datetime + df = pd.DataFrame({ + 'Branch': 'A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1], + 'Date': [datetime.datetime(2013, 11, 1, 13, 0), + datetime.datetime(2013, 9, 1, 13, 5), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 11, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0)], + 'PayDay': [datetime.datetime(2013, 10, 4, 0, 0), + datetime.datetime(2013, 10, 15, 13, 5), + datetime.datetime(2013, 9, 5, 20, 0), + datetime.datetime(2013, 11, 2, 10, 0), + datetime.datetime(2013, 10, 7, 20, 0), + datetime.datetime(2013, 9, 5, 10, 0)]}) + df + + df.pivot_table(values='Quantity', + index=pd.Grouper(freq='M', key='Date'), + columns=pd.Grouper(freq='M', key='PayDay'), + aggfunc=np.sum) + +- Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) +- Add :meth:`~Series.nsmallest` and :meth:`Series.nlargest` methods to Series, See :ref:`the docs ` (:issue:`3960`) + +- ``PeriodIndex`` fully supports partial string indexing like ``DatetimeIndex`` (:issue:`7043`) + + .. ipython:: python + + prng = pd.period_range('2013-01-01 09:00', periods=100, freq='H') + ps = pd.Series(np.random.randn(len(prng)), index=prng) + ps + ps['2013-01-02'] + +- ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`) +- ``pd.stats.moments.rolling_var`` now uses Welford's method for increased numerical stability (:issue:`6817`) +- pd.expanding_apply and pd.rolling_apply now take args and kwargs that are passed on to + the func (:issue:`6289`) +- ``DataFrame.rank()`` now has a percentage rank option (:issue:`5971`) +- ``Series.rank()`` now has a percentage rank option (:issue:`5971`) +- ``Series.rank()`` and ``DataFrame.rank()`` now accept ``method='dense'`` for ranks without gaps (:issue:`6514`) +- Support passing ``encoding`` with xlwt (:issue:`3710`) +- Refactor Block classes removing ``Block.items`` attributes to avoid duplication + in item handling (:issue:`6745`, :issue:`6988`). +- Testing statements updated to use specialized asserts (:issue:`6175`) + + + +.. _whatsnew_0140.performance: + +Performance +~~~~~~~~~~~ + +- Performance improvement when converting ``DatetimeIndex`` to floating ordinals + using ``DatetimeConverter`` (:issue:`6636`) +- Performance improvement for ``DataFrame.shift`` (:issue:`5609`) +- Performance improvement in indexing into a MultiIndexed Series (:issue:`5567`) +- Performance improvements in single-dtyped indexing (:issue:`6484`) +- Improve performance of DataFrame construction with certain offsets, by removing faulty caching + (e.g. MonthEnd,BusinessMonthEnd), (:issue:`6479`) +- Improve performance of ``CustomBusinessDay`` (:issue:`6584`) +- improve performance of slice indexing on Series with string keys (:issue:`6341`, :issue:`6372`) +- Performance improvement for ``DataFrame.from_records`` when reading a + specified number of rows from an iterable (:issue:`6700`) +- Performance improvements in timedelta conversions for integer dtypes (:issue:`6754`) +- Improved performance of compatible pickles (:issue:`6899`) +- Improve performance in certain reindexing operations by optimizing ``take_2d`` (:issue:`6749`) +- ``GroupBy.count()`` is now implemented in Cython and is much faster for large + numbers of groups (:issue:`7016`). + +Experimental +~~~~~~~~~~~~ + +There are no experimental changes in 0.14.0 + + +.. _whatsnew_0140.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in Series ValueError when index doesn't match data (:issue:`6532`) +- Prevent segfault due to MultiIndex not being supported in HDFStore table + format (:issue:`1848`) +- Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`) +- Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeros (:issue:`6391`) +- Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`) +- Inconsistent tz parsing ``Timestamp`` / ``to_datetime`` for current year (:issue:`5958`) +- Indexing bugs with reordered indexes (:issue:`6252`, :issue:`6254`) +- Bug in ``.xs`` with a Series multiindex (:issue:`6258`, :issue:`5684`) +- Bug in conversion of a string types to a DatetimeIndex with a specified frequency (:issue:`6273`, :issue:`6274`) +- Bug in ``eval`` where type-promotion failed for large expressions (:issue:`6205`) +- Bug in interpolate with ``inplace=True`` (:issue:`6281`) +- ``HDFStore.remove`` now handles start and stop (:issue:`6177`) +- ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`) +- ``HDFStore.select_as_coordinates`` and ``select_column`` works with a ``where`` clause that results in filters (:issue:`6177`) +- Regression in join of non_unique_indexes (:issue:`6329`) +- Issue with groupby ``agg`` with a single function and a mixed-type frame (:issue:`6337`) +- Bug in ``DataFrame.replace()`` when passing a non- ``bool`` + ``to_replace`` argument (:issue:`6332`) +- Raise when trying to align on different levels of a MultiIndex assignment (:issue:`3738`) +- Bug in setting complex dtypes via boolean indexing (:issue:`6345`) +- Bug in TimeGrouper/resample when presented with a non-monotonic DatetimeIndex that would return invalid results. (:issue:`4161`) +- Bug in index name propagation in TimeGrouper/resample (:issue:`4161`) +- TimeGrouper has a more compatible API to the rest of the groupers (e.g. ``groups`` was missing) (:issue:`3881`) +- Bug in multiple grouping with a TimeGrouper depending on target column order (:issue:`6764`) +- Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'`` + (:issue:`6351`) +- Bug correctly handle placements of ``-inf`` in Panels when dividing by integer 0 (:issue:`6178`) +- ``DataFrame.shift`` with ``axis=1`` was raising (:issue:`6371`) +- Disabled clipboard tests until release time (run locally with ``nosetests -A disabled``) (:issue:`6048`). +- Bug in ``DataFrame.replace()`` when passing a nested ``dict`` that contained + keys not in the values to be replaced (:issue:`6342`) +- ``str.match`` ignored the na flag (:issue:`6609`). +- Bug in take with duplicate columns that were not consolidated (:issue:`6240`) +- Bug in interpolate changing dtypes (:issue:`6290`) +- Bug in ``Series.get``, was using a buggy access method (:issue:`6383`) +- Bug in hdfstore queries of the form ``where=[('date', '>=', datetime(2013,1,1)), ('date', '<=', datetime(2014,1,1))]`` (:issue:`6313`) +- Bug in ``DataFrame.dropna`` with duplicate indices (:issue:`6355`) +- Regression in chained getitem indexing with embedded list-like from 0.12 (:issue:`6394`) +- ``Float64Index`` with nans not comparing correctly (:issue:`6401`) +- ``eval``/``query`` expressions with strings containing the ``@`` character + will now work (:issue:`6366`). +- Bug in ``Series.reindex`` when specifying a ``method`` with some nan values was inconsistent (noted on a resample) (:issue:`6418`) +- Bug in :meth:`DataFrame.replace` where nested dicts were erroneously + depending on the order of dictionary keys and values (:issue:`5338`). +- Performance issue in concatenating with empty objects (:issue:`3259`) +- Clarify sorting of ``sym_diff`` on ``Index`` objects with ``NaN`` values (:issue:`6444`) +- Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`) +- Bug in ``str.extract`` when passed a non-default index (:issue:`6348`) +- Bug in ``str.split`` when passed ``pat=None`` and ``n=1`` (:issue:`6466`) +- Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`) +- Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`) +- Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`) +- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`) +- Bug in ``read_html`` where nan's were incorrectly being used to indicate + missing values in text. Should use the empty string for consistency with the + rest of pandas (:issue:`5129`). +- Bug in ``read_html`` tests where redirected invalid URLs would make one test + fail (:issue:`6445`). +- Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) +- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) +- Regression from 0.13 in the treatment of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) +- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). +- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) +- Bug in setitem with ``.loc`` on mixed integer Indexes (:issue:`6546`) +- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`) +- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could be exported using the + wrong data types and missing values (:issue:`6335`) +- ``StataWriter`` replaces missing values in string columns by empty string (:issue:`6802`) +- Inconsistent types in ``Timestamp`` addition/subtraction (:issue:`6543`) +- Bug in preserving frequency across Timestamp addition/subtraction (:issue:`4547`) +- Bug in empty list lookup caused ``IndexError`` exceptions (:issue:`6536`, :issue:`6551`) +- ``Series.quantile`` raising on an ``object`` dtype (:issue:`6555`) +- Bug in ``.xs`` with a ``nan`` in level when dropped (:issue:`6574`) +- Bug in fillna with ``method='bfill/ffill'`` and ``datetime64[ns]`` dtype (:issue:`6587`) +- Bug in sql writing with mixed dtypes possibly leading to data loss (:issue:`6509`) +- Bug in ``Series.pop`` (:issue:`6600`) +- Bug in ``iloc`` indexing when positional indexer matched ``Int64Index`` of the corresponding axis and no reordering happened (:issue:`6612`) +- Bug in ``fillna`` with ``limit`` and ``value`` specified +- Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`) +- Bug in compat with ``np.compress``, surfaced in (:issue:`6658`) +- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`) +- Bug in ``DataFrame.to_stata`` which incorrectly handles nan values and ignores ``with_index`` keyword argument (:issue:`6685`) +- Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`) +- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`) +- Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`) +- Bug in downcasting inference with empty arrays (:issue:`6733`) +- Bug in ``obj.blocks`` on sparse containers dropping all but the last items of same for dtype (:issue:`6748`) +- Bug in unpickling ``NaT (NaTType)`` (:issue:`4606`) +- Bug in ``DataFrame.replace()`` where regex meta characters were being treated + as regex even when ``regex=False`` (:issue:`6777`). +- Bug in timedelta ops on 32-bit platforms (:issue:`6808`) +- Bug in setting a tz-aware index directly via ``.index`` (:issue:`6785`) +- Bug in expressions.py where numexpr would try to evaluate arithmetic ops + (:issue:`6762`). +- Bug in Makefile where it didn't remove Cython generated C files with ``make + clean`` (:issue:`6768`) +- Bug with numpy < 1.7.2 when reading long strings from ``HDFStore`` (:issue:`6166`) +- Bug in ``DataFrame._reduce`` where non bool-like (0/1) integers were being + converted into bools. (:issue:`6806`) +- Regression from 0.13 with ``fillna`` and a Series on datetime-like (:issue:`6344`) +- Bug in adding ``np.timedelta64`` to ``DatetimeIndex`` with timezone outputs incorrect results (:issue:`6818`) +- Bug in ``DataFrame.replace()`` where changing a dtype through replacement + would only replace the first occurrence of a value (:issue:`6689`) +- Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) +- Bug in ``Series.__unicode__`` when ``max_rows=None`` and the Series has more than 1000 rows. (:issue:`6863`) +- Bug in ``groupby.get_group`` where a datelike wasn't always accepted (:issue:`5267`) +- Bug in ``groupBy.get_group`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`6914`) +- Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` converting ``NaT`` incorrectly (:issue:`5546`) +- Bug in arithmetic operations affecting ``NaT`` (:issue:`6873`) +- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single + group match wasn't renamed to the group name +- Bug in ``DataFrame.to_csv`` where setting ``index=False`` ignored the + ``header`` kwarg (:issue:`6186`) +- Bug in ``DataFrame.plot`` and ``Series.plot``, where the legend behave inconsistently when plotting to the same axes repeatedly (:issue:`6678`) +- Internal tests for patching ``__finalize__`` / bug in merge not finalizing (:issue:`6923`, :issue:`6927`) +- accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`) +- Bug in C parser with leading white space (:issue:`3374`) +- Bug in C parser with ``delim_whitespace=True`` and ``\r``-delimited lines +- Bug in python parser with explicit MultiIndex in row following column header (:issue:`6893`) +- Bug in ``Series.rank`` and ``DataFrame.rank`` that caused small floats (<1e-13) to all receive the same rank (:issue:`6886`) +- Bug in ``DataFrame.apply`` with functions that used ``*args`` or ``**kwargs`` and returned + an empty result (:issue:`6952`) +- Bug in sum/mean on 32-bit platforms on overflows (:issue:`6915`) +- Moved ``Panel.shift`` to ``NDFrame.slice_shift`` and fixed to respect multiple dtypes. (:issue:`6959`) +- Bug in enabling ``subplots=True`` in ``DataFrame.plot`` only has single column raises ``TypeError``, and ``Series.plot`` raises ``AttributeError`` (:issue:`6951`) +- Bug in ``DataFrame.plot`` draws unnecessary axes when enabling ``subplots`` and ``kind=scatter`` (:issue:`6951`) +- Bug in ``read_csv`` from a filesystem with non-utf-8 encoding (:issue:`6807`) +- Bug in ``iloc`` when setting / aligning (:issue:`6766`) +- Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`) +- Bug in timeseries-with-frequency plot cursor display (:issue:`5453`) +- Bug surfaced in ``groupby.plot`` when using a ``Float64Index`` (:issue:`7025`) +- Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`) +- Bug in ``parallel_coordinates`` and ``radviz`` where reordering of class column + caused possible color/class mismatch (:issue:`6956`) +- Bug in ``radviz`` and ``andrews_curves`` where multiple values of 'color' + were being passed to plotting method (:issue:`6956`) +- Bug in ``Float64Index.isin()`` where containing ``nan`` s would make indices + claim that they contained all the things (:issue:`7066`). +- Bug in ``DataFrame.boxplot`` where it failed to use the axis passed as the ``ax`` argument (:issue:`3578`) +- Bug in the ``XlsxWriter`` and ``XlwtWriter`` implementations that resulted in datetime columns being formatted without the time (:issue:`7075`) + were being passed to plotting method +- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning + or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``) +- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict + views; mark ``is_copy`` on ``xs`` only if its an actual copy (and not a view) (:issue:`7084`) +- Bug in DatetimeIndex creation from string ndarray with ``dayfirst=True`` (:issue:`5917`) +- Bug in ``MultiIndex.from_arrays`` created from ``DatetimeIndex`` doesn't preserve ``freq`` and ``tz`` (:issue:`7090`) +- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`) +- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`) +- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`) +- Bug in ``quantile`` with datetime values (:issue:`6965`) +- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`) +- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`) +- Bug in ``Groupby`` doesn't preserve ``tz`` (:issue:`3950`) +- Bug in ``PeriodIndex`` partial string slicing (:issue:`6716`) +- Bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the ``large_repr`` set to 'info' + (:issue:`7105`) +- Bug in ``DatetimeIndex`` specifying ``freq`` raises ``ValueError`` when passed value is too short (:issue:`7098`) +- Fixed a bug with the ``info`` repr not honoring the ``display.max_info_columns`` setting (:issue:`6939`) +- Bug ``PeriodIndex`` string slicing with out of bounds values (:issue:`5407`) +- Fixed a memory error in the hashtable implementation/factorizer on resizing of large tables (:issue:`7157`) +- Bug in ``isnull`` when applied to 0-dimensional object arrays (:issue:`7176`) +- Bug in ``query``/``eval`` where global constants were not looked up correctly + (:issue:`7178`) +- Bug in recognizing out-of-bounds positional list indexers with ``iloc`` and a multi-axis tuple indexer (:issue:`7189`) +- Bug in setitem with a single value, MultiIndex and integer indices (:issue:`7190`, :issue:`7218`) +- Bug in expressions evaluation with reversed ops, showing in series-dataframe ops (:issue:`7198`, :issue:`7192`) +- Bug in multi-axis indexing with > 2 ndim and a MultiIndex (:issue:`7199`) +- Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`) + + +.. _whatsnew_0.14.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.13.1..v0.14.0 diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst new file mode 100644 index 00000000..a8f8955c --- /dev/null +++ b/doc/source/whatsnew/v0.14.1.rst @@ -0,0 +1,284 @@ +.. _whatsnew_0141: + +Version 0.14.1 (July 11, 2014) +------------------------------ + +{{ header }} + + +This is a minor release from 0.14.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- Highlights include: + + - New methods :meth:`~pandas.DataFrame.select_dtypes` to select columns + based on the dtype and :meth:`~pandas.Series.sem` to calculate the + standard error of the mean. + - Support for dateutil timezones (see :ref:`docs `). + - Support for ignoring full line comments in the :func:`~pandas.read_csv` + text parser. + - New documentation section on :ref:`Options and Settings `. + - Lots of bug fixes. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Performance Improvements ` +- :ref:`Experimental Changes ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0141.api: + +API changes +~~~~~~~~~~~ + +- Openpyxl now raises a ValueError on construction of the openpyxl writer + instead of warning on pandas import (:issue:`7284`). + +- For ``StringMethods.extract``, when no match is found, the result - only + containing ``NaN`` values - now also has ``dtype=object`` instead of + ``float`` (:issue:`7242`) + +- ``Period`` objects no longer raise a ``TypeError`` when compared using ``==`` + with another object that *isn't* a ``Period``. Instead + when comparing a ``Period`` with another object using ``==`` if the other + object isn't a ``Period`` ``False`` is returned. (:issue:`7376`) + +- Previously, the behaviour on resetting the time or not in + ``offsets.apply``, ``rollforward`` and ``rollback`` operations differed + between offsets. With the support of the ``normalize`` keyword for all offsets(see + below) with a default value of False (preserve time), the behaviour changed for certain + offsets (BusinessMonthBegin, MonthEnd, BusinessMonthEnd, CustomBusinessMonthEnd, + BusinessYearBegin, LastWeekOfMonth, FY5253Quarter, LastWeekOfMonth, Easter): + + .. code-block:: ipython + + In [6]: from pandas.tseries import offsets + + In [7]: d = pd.Timestamp('2014-01-01 09:00') + + # old behaviour < 0.14.1 + In [8]: d + offsets.MonthEnd() + Out[8]: pd.Timestamp('2014-01-31 00:00:00') + + Starting from 0.14.1 all offsets preserve time by default. The old + behaviour can be obtained with ``normalize=True`` + + .. ipython:: python + :suppress: + + import pandas.tseries.offsets as offsets + + d = pd.Timestamp("2014-01-01 09:00") + + .. ipython:: python + + # new behaviour + d + offsets.MonthEnd() + d + offsets.MonthEnd(normalize=True) + + Note that for the other offsets the default behaviour did not change. + +- Add back ``#N/A N/A`` as a default NA value in text parsing, (regression from 0.12) (:issue:`5521`) +- Raise a ``TypeError`` on inplace-setting with a ``.where`` and a non ``np.nan`` value as this is inconsistent + with a set-item expression like ``df[mask] = None`` (:issue:`7656`) + + +.. _whatsnew_0141.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- Add ``dropna`` argument to ``value_counts`` and ``nunique`` (:issue:`5569`). +- Add :meth:`~pandas.DataFrame.select_dtypes` method to allow selection of + columns based on dtype (:issue:`7316`). See :ref:`the docs `. +- All ``offsets`` supports the ``normalize`` keyword to specify whether + ``offsets.apply``, ``rollforward`` and ``rollback`` resets the time (hour, + minute, etc) or not (default ``False``, preserves time) (:issue:`7156`): + + .. code-block:: python + + import pandas.tseries.offsets as offsets + + day = offsets.Day() + day.apply(pd.Timestamp("2014-01-01 09:00")) + + day = offsets.Day(normalize=True) + day.apply(pd.Timestamp("2014-01-01 09:00")) + +- ``PeriodIndex`` is represented as the same format as ``DatetimeIndex`` (:issue:`7601`) +- ``StringMethods`` now work on empty Series (:issue:`7242`) +- The file parsers ``read_csv`` and ``read_table`` now ignore line comments provided by + the parameter ``comment``, which accepts only a single character for the C reader. + In particular, they allow for comments before file data begins (:issue:`2685`) +- Add ``NotImplementedError`` for simultaneous use of ``chunksize`` and ``nrows`` + for read_csv() (:issue:`6774`). +- Tests for basic reading of public S3 buckets now exist (:issue:`7281`). +- ``read_html`` now sports an ``encoding`` argument that is passed to the + underlying parser library. You can use this to read non-ascii encoded web + pages (:issue:`7323`). +- ``read_excel`` now supports reading from URLs in the same way + that ``read_csv`` does. (:issue:`6809`) +- Support for dateutil timezones, which can now be used in the same way as + pytz timezones across pandas. (:issue:`4688`) + + .. ipython:: python + + rng = pd.date_range( + "3/6/2012 00:00", periods=10, freq="D", tz="dateutil/Europe/London" + ) + rng.tz + + See :ref:`the docs `. + +- Implemented ``sem`` (standard error of the mean) operation for ``Series``, + ``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`) +- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` allowlist, + which means you can now use these methods on a ``SeriesGroupBy`` object + (:issue:`7053`). +- All offsets ``apply``, ``rollforward`` and ``rollback`` can now handle ``np.datetime64``, previously results in ``ApplyTypeError`` (:issue:`7452`) +- ``Period`` and ``PeriodIndex`` can contain ``NaT`` in its values (:issue:`7485`) +- Support pickling ``Series``, ``DataFrame`` and ``Panel`` objects with + non-unique labels along *item* axis (``index``, ``columns`` and ``items`` + respectively) (:issue:`7370`). +- Improved inference of datetime/timedelta with mixed null objects. Regression from 0.13.1 in interpretation of an object Index + with all null elements (:issue:`7431`) + +.. _whatsnew_0141.performance: + +Performance +~~~~~~~~~~~ +- Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`) +- Improvements in Series.transform for significant performance gains (:issue:`6496`) +- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for significant performance gains (:issue:`7383`) +- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) +- Improvements in ``MultiIndex.from_product`` for large iterables (:issue:`7627`) + + +.. _whatsnew_0141.experimental: + +Experimental +~~~~~~~~~~~~ + +- ``pandas.io.data.Options`` has a new method, ``get_all_data`` method, and now consistently returns a + MultiIndexed ``DataFrame`` (:issue:`5602`) +- ``io.gbq.read_gbq`` and ``io.gbq.to_gbq`` were refactored to remove the + dependency on the Google ``bq.py`` command line client. This submodule + now uses ``httplib2`` and the Google ``apiclient`` and ``oauth2client`` API client + libraries which should be more stable and, therefore, reliable than + ``bq.py``. See :ref:`the docs `. (:issue:`6937`). + + +.. _whatsnew_0141.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in ``DataFrame.where`` with a symmetric shaped frame and a passed other of a DataFrame (:issue:`7506`) +- Bug in Panel indexing with a MultiIndex axis (:issue:`7516`) +- Regression in datetimelike slice indexing with a duplicated index and non-exact end-points (:issue:`7523`) +- Bug in setitem with list-of-lists and single vs mixed types (:issue:`7551`:) +- Bug in time ops with non-aligned Series (:issue:`7500`) +- Bug in timedelta inference when assigning an incomplete Series (:issue:`7592`) +- Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`) +- Bug in ``Series.get`` with a boolean accessor (:issue:`7407`) +- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`) +- Bug in ``to_timedelta`` that accepted invalid units and misinterpreted 'm/h' (:issue:`7611`, :issue:`6423`) +- Bug in line plot doesn't set correct ``xlim`` if ``secondary_y=True`` (:issue:`7459`) +- Bug in grouped ``hist`` and ``scatter`` plots use old ``figsize`` default (:issue:`7394`) +- Bug in plotting subplots with ``DataFrame.plot``, ``hist`` clears passed ``ax`` even if the number of subplots is one (:issue:`7391`). +- Bug in plotting subplots with ``DataFrame.boxplot`` with ``by`` kw raises ``ValueError`` if the number of subplots exceeds 1 (:issue:`7391`). +- Bug in subplots displays ``ticklabels`` and ``labels`` in different rule (:issue:`5897`) +- Bug in ``Panel.apply`` with a MultiIndex as an axis (:issue:`7469`) +- Bug in ``DatetimeIndex.insert`` doesn't preserve ``name`` and ``tz`` (:issue:`7299`) +- Bug in ``DatetimeIndex.asobject`` doesn't preserve ``name`` (:issue:`7299`) +- Bug in MultiIndex slicing with datetimelike ranges (strings and Timestamps), (:issue:`7429`) +- Bug in ``Index.min`` and ``max`` doesn't handle ``nan`` and ``NaT`` properly (:issue:`7261`) +- Bug in ``PeriodIndex.min/max`` results in ``int`` (:issue:`7609`) +- Bug in ``resample`` where ``fill_method`` was ignored if you passed ``how`` (:issue:`2073`) +- Bug in ``TimeGrouper`` doesn't exclude column specified by ``key`` (:issue:`7227`) +- Bug in ``DataFrame`` and ``Series`` bar and barh plot raises ``TypeError`` when ``bottom`` + and ``left`` keyword is specified (:issue:`7226`) +- Bug in ``DataFrame.hist`` raises ``TypeError`` when it contains non numeric column (:issue:`7277`) +- Bug in ``Index.delete`` does not preserve ``name`` and ``freq`` attributes (:issue:`7302`) +- Bug in ``DataFrame.query()``/``eval`` where local string variables with the @ + sign were being treated as temporaries attempting to be deleted + (:issue:`7300`). +- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`). +- Bug in ``DataFrame.replace()`` where truthy values were being replaced + (:issue:`7140`). +- Bug in ``StringMethods.extract()`` where a single match group Series + would use the matcher's name instead of the group name (:issue:`7313`). +- Bug in ``isnull()`` when ``mode.use_inf_as_null == True`` where isnull + wouldn't test ``True`` when it encountered an ``inf``/``-inf`` + (:issue:`7315`). +- Bug in inferred_freq results in None for eastern hemisphere timezones (:issue:`7310`) +- Bug in ``Easter`` returns incorrect date when offset is negative (:issue:`7195`) +- Bug in broadcasting with ``.div``, integer dtypes and divide-by-zero (:issue:`7325`) +- Bug in ``CustomBusinessDay.apply`` raises ``NameError`` when ``np.datetime64`` object is passed (:issue:`7196`) +- Bug in ``MultiIndex.append``, ``concat`` and ``pivot_table`` don't preserve timezone (:issue:`6606`) +- Bug in ``.loc`` with a list of indexers on a single-multi index level (that is not nested) (:issue:`7349`) +- Bug in ``Series.map`` when mapping a dict with tuple keys of different lengths (:issue:`7333`) +- Bug all ``StringMethods`` now work on empty Series (:issue:`7242`) +- Fix delegation of ``read_sql`` to ``read_sql_query`` when query does not contain 'select' (:issue:`7324`). +- Bug where a string column name assignment to a ``DataFrame`` with a + ``Float64Index`` raised a ``TypeError`` during a call to ``np.isnan`` + (:issue:`7366`). +- Bug where ``NDFrame.replace()`` didn't correctly replace objects with + ``Period`` values (:issue:`7379`). +- Bug in ``.ix`` getitem should always return a Series (:issue:`7150`) +- Bug in MultiIndex slicing with incomplete indexers (:issue:`7399`) +- Bug in MultiIndex slicing with a step in a sliced level (:issue:`7400`) +- Bug where negative indexers in ``DatetimeIndex`` were not correctly sliced + (:issue:`7408`) +- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`, + :issue:`7409`). +- Bug where bool objects were converted to ``nan`` in ``convert_objects`` + (:issue:`7416`). +- Bug in ``quantile`` ignoring the axis keyword argument (:issue:`7306`) +- Bug where ``nanops._maybe_null_out`` doesn't work with complex numbers + (:issue:`7353`) +- Bug in several ``nanops`` functions when ``axis==0`` for + 1-dimensional ``nan`` arrays (:issue:`7354`) +- Bug where ``nanops.nanmedian`` doesn't work when ``axis==None`` + (:issue:`7352`) +- Bug where ``nanops._has_infs`` doesn't work with many dtypes + (:issue:`7357`) +- Bug in ``StataReader.data`` where reading a 0-observation dta failed (:issue:`7369`) +- Bug in ``StataReader`` when reading Stata 13 (117) files containing fixed width strings (:issue:`7360`) +- Bug in ``StataWriter`` where encoding was ignored (:issue:`7286`) +- Bug in ``DatetimeIndex`` comparison doesn't handle ``NaT`` properly (:issue:`7529`) +- Bug in passing input with ``tzinfo`` to some offsets ``apply``, ``rollforward`` or ``rollback`` resets ``tzinfo`` or raises ``ValueError`` (:issue:`7465`) +- Bug in ``DatetimeIndex.to_period``, ``PeriodIndex.asobject``, ``PeriodIndex.to_timestamp`` doesn't preserve ``name`` (:issue:`7485`) +- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` handle ``NaT`` incorrectly (:issue:`7228`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may return normal ``datetime`` (:issue:`7502`) +- Bug in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`) +- Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`) +- Bug in ``DatetimeIndex.asobject`` raises ``ValueError`` when it contains ``NaT`` (:issue:`7539`) +- Bug in ``Timestamp.__new__`` doesn't preserve nanosecond properly (:issue:`7610`) +- Bug in ``Index.astype(float)`` where it would return an ``object`` dtype + ``Index`` (:issue:`7464`). +- Bug in ``DataFrame.reset_index`` loses ``tz`` (:issue:`3950`) +- Bug in ``DatetimeIndex.freqstr`` raises ``AttributeError`` when ``freq`` is ``None`` (:issue:`7606`) +- Bug in ``GroupBy.size`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`7453`) +- Bug in single column bar plot is misaligned (:issue:`7498`). +- Bug in area plot with tz-aware time series raises ``ValueError`` (:issue:`7471`) +- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`) +- Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`) +- Bug in ``rolling_var`` where a window larger than the array would raise an error(:issue:`7297`) +- Bug with last plotted timeseries dictating ``xlim`` (:issue:`2960`) +- Bug with ``secondary_y`` axis not being considered for timeseries ``xlim`` (:issue:`3490`) +- Bug in ``Float64Index`` assignment with a non scalar indexer (:issue:`7586`) +- Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`) +- Bug in ``expanding_cov``, ``expanding_corr``, ``rolling_cov``, and ``rolling_corr`` for two arguments with mismatched index (:issue:`7512`) +- Bug in ``to_sql`` taking the boolean column as text column (:issue:`7678`) +- Bug in grouped ``hist`` doesn't handle ``rot`` kw and ``sharex`` kw properly (:issue:`7234`) +- Bug in ``.loc`` performing fallback integer indexing with ``object`` dtype indices (:issue:`7496`) +- Bug (regression) in ``PeriodIndex`` constructor when passed ``Series`` objects (:issue:`7701`). + + +.. _whatsnew_0.14.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.0..v0.14.1 diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst new file mode 100644 index 00000000..04506f16 --- /dev/null +++ b/doc/source/whatsnew/v0.15.0.rst @@ -0,0 +1,1242 @@ +.. _whatsnew_0150: + +Version 0.15.0 (October 18, 2014) +--------------------------------- + +{{ header }} + + +This is a major release from 0.14.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.15.0 will no longer support compatibility with NumPy versions < + 1.7.0. If you want to use the latest versions of pandas, please upgrade to + NumPy >= 1.7.0 (:issue:`7711`) + +- Highlights include: + + - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here ` + - New scalar type ``Timedelta``, and a new index type ``TimedeltaIndex``, see :ref:`here ` + - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties ` + - New DataFrame default display for ``df.info()`` to include memory usage, see :ref:`Memory Usage ` + - ``read_csv`` will now by default ignore blank lines when parsing, see :ref:`here ` + - API change in using Indexes in set operations, see :ref:`here ` + - Enhancements in the handling of timezones, see :ref:`here ` + - A lot of improvements to the rolling and expanding moment functions, see :ref:`here ` + - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring ` + - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`) + - Split indexing documentation into :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing ` + - Split out string methods documentation into :ref:`Working with Text Data ` + +- Check the :ref:`API Changes ` and :ref:`deprecations ` before updating + +- :ref:`Other Enhancements ` + +- :ref:`Performance Improvements ` + +- :ref:`Bug Fixes ` + +.. warning:: + + In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` + but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be + a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) + +.. warning:: + + The refactoring in :class:`~pandas.Categorical` changed the two argument constructor from + "codes/labels and levels" to "values and levels (now called 'categories')". This can lead to subtle bugs. If you use + :class:`~pandas.Categorical` directly, please audit your code before updating to this pandas + version and change it to use the :meth:`~pandas.Categorical.from_codes` constructor. See more on ``Categorical`` :ref:`here ` + + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0150.cat: + +Categoricals in Series/DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`~pandas.Categorical` can now be included in ``Series`` and ``DataFrames`` and gained new +methods to manipulate. Thanks to Jan Schulz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`, +:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, +:issue:`8075`, :issue:`8076`, :issue:`8143`, :issue:`8453`, :issue:`8518`). + +For full docs, see the :ref:`categorical introduction ` and the +:ref:`API documentation `. + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + + df["grade"] = df["raw_grade"].astype("category") + df["grade"] + + # Rename the categories + df["grade"].cat.categories = ["very good", "good", "very bad"] + + # Reorder the categories and simultaneously add the missing categories + df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", + "medium", "good", "very good"]) + df["grade"] + df.sort_values("grade") + df.groupby("grade").size() + +- ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct + a dataframe and use ``df.groupby().agg()``. + +- Supplying "codes/labels and levels" to the :class:`~pandas.Categorical` constructor is not + supported anymore. Supplying two arguments to the constructor is now interpreted as + "values and levels (now called 'categories')". Please change your code to use the :meth:`~pandas.Categorical.from_codes` + constructor. + +- The ``Categorical.labels`` attribute was renamed to ``Categorical.codes`` and is read + only. If you want to manipulate codes, please use one of the + :ref:`API methods on Categoricals `. + +- The ``Categorical.levels`` attribute is renamed to ``Categorical.categories``. + + +.. _whatsnew_0150.timedeltaindex: + +TimedeltaIndex/scalar +^^^^^^^^^^^^^^^^^^^^^ + +We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs `. +(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`) + +.. warning:: + + ``Timedelta`` scalars (and ``TimedeltaIndex``) component fields are *not the same* as the component fields on a ``datetime.timedelta`` object. For example, ``.seconds`` on a ``datetime.timedelta`` object returns the total number of seconds combined between ``hours``, ``minutes`` and ``seconds``. In contrast, the pandas ``Timedelta`` breaks out hours, minutes, microseconds and nanoseconds separately. + + .. code-block:: ipython + + # Timedelta accessor + In [9]: tds = pd.Timedelta('31 days 5 min 3 sec') + + In [10]: tds.minutes + Out[10]: 5L + + In [11]: tds.seconds + Out[11]: 3L + + # datetime.timedelta accessor + # this is 5 minutes * 60 + 3 seconds + In [12]: tds.to_pytimedelta().seconds + Out[12]: 303 + + **Note**: this is no longer true starting from v0.16.0, where full + compatibility with ``datetime.timedelta`` is introduced. See the + :ref:`0.16.0 whatsnew entry ` + +.. warning:: + + Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. + It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + + The arguments to ``pd.to_timedelta`` are now ``(arg,unit='ns',box=True,coerce=False)``, previously were ``(arg,box=True,unit='ns')`` as these are more logical. + +Construct a scalar + +.. ipython:: python + + pd.Timedelta('1 days 06:05:01.00003') + pd.Timedelta('15.5us') + pd.Timedelta('1 hour 15.5us') + + # negative Timedeltas have this string repr + # to be more consistent with datetime.timedelta conventions + pd.Timedelta('-1us') + + # a NaT + pd.Timedelta('nan') + +Access fields for a ``Timedelta`` + +.. ipython:: python + + td = pd.Timedelta('1 hour 3m 15.5us') + td.seconds + td.microseconds + td.nanoseconds + +Construct a ``TimedeltaIndex`` + +.. ipython:: python + :suppress: + + import datetime + +.. ipython:: python + + pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', + np.timedelta64(2, 'D'), + datetime.timedelta(days=2, seconds=2)]) + +Constructing a ``TimedeltaIndex`` with a regular range + +.. ipython:: python + + pd.timedelta_range('1 days', periods=5, freq='D') + pd.timedelta_range(start='1 days', end='2 days', freq='30T') + +You can now use a ``TimedeltaIndex`` as the index of a pandas object + +.. ipython:: python + + s = pd.Series(np.arange(5), + index=pd.timedelta_range('1 days', periods=5, freq='s')) + s + +You can select with partial string selections + +.. ipython:: python + + s['1 day 00:00:02'] + s['1 day':'1 day 00:00:02'] + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are ``NaT`` preserving: + +.. ipython:: python + + tdi = pd.TimedeltaIndex(['1 days', pd.NaT, '2 days']) + tdi.tolist() + dti = pd.date_range('20130101', periods=3) + dti.tolist() + + (dti + tdi).tolist() + (dti - tdi).tolist() + +- iteration of a ``Series`` e.g. ``list(Series(...))`` of ``timedelta64[ns]`` would prior to v0.15.0 return ``np.timedelta64`` for each element. These will now be wrapped in ``Timedelta``. + + +.. _whatsnew_0150.memory: + +Memory usage +^^^^^^^^^^^^ + +Implemented methods to find memory usage of a DataFrame. See the :ref:`FAQ ` for more. (:issue:`6852`). + +A new display option ``display.memory_usage`` (see :ref:`options`) sets the default behavior of the ``memory_usage`` argument in the ``df.info()`` method. By default ``display.memory_usage`` is ``True``. + +.. ipython:: python + + dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', + 'complex128', 'object', 'bool'] + n = 5000 + data = {t: np.random.randint(100, size=n).astype(t) for t in dtypes} + df = pd.DataFrame(data) + df['categorical'] = df['object'].astype('category') + + df.info() + +Additionally :meth:`~pandas.DataFrame.memory_usage` is an available method for a dataframe object which returns the memory usage of each column. + +.. ipython:: python + + df.memory_usage(index=True) + + +.. _whatsnew_0150.dt: + +Series.dt accessor +^^^^^^^^^^^^^^^^^^ + +``Series`` has gained an accessor to succinctly return datetime like properties for the *values* of the Series, if its a datetime/period like Series. (:issue:`7207`) +This will return a Series, indexed like the existing Series. See the :ref:`docs ` + +.. ipython:: python + + # datetime + s = pd.Series(pd.date_range('20130101 09:10:12', periods=4)) + s + s.dt.hour + s.dt.second + s.dt.day + s.dt.freq + +This enables nice expressions like this: + +.. ipython:: python + + s[s.dt.day == 2] + +You can easily produce tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize('US/Eastern') + stz + stz.dt.tz + +You can also chain these types of operations: + +.. ipython:: python + + s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') + +The ``.dt`` accessor works for period and timedelta dtypes. + +.. ipython:: python + + # period + s = pd.Series(pd.period_range('20130101', periods=4, freq='D')) + s + s.dt.year + s.dt.day + +.. ipython:: python + + # timedelta + s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s')) + s + s.dt.days + s.dt.seconds + s.dt.components + + +.. _whatsnew_0150.tz: + +Timezone handling improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``tz_localize(None)`` for tz-aware ``Timestamp`` and ``DatetimeIndex`` now removes timezone holding local time, + previously this resulted in ``Exception`` or ``TypeError`` (:issue:`7812`) + + .. ipython:: python + + ts = pd.Timestamp('2014-08-01 09:00', tz='US/Eastern') + ts + ts.tz_localize(None) + + didx = pd.date_range(start='2014-08-01 09:00', freq='H', + periods=10, tz='US/Eastern') + didx + didx.tz_localize(None) + +- ``tz_localize`` now accepts the ``ambiguous`` keyword which allows for passing an array of bools + indicating whether the date belongs in DST or not, 'NaT' for setting transition times to NaT, + 'infer' for inferring DST/non-DST, and 'raise' (default) for an ``AmbiguousTimeError`` to be raised. See :ref:`the docs` for more details (:issue:`7943`) + +- ``DataFrame.tz_localize`` and ``DataFrame.tz_convert`` now accepts an optional ``level`` argument + for localizing a specific level of a MultiIndex (:issue:`7846`) + +- ``Timestamp.tz_localize`` and ``Timestamp.tz_convert`` now raise ``TypeError`` in error cases, rather than ``Exception`` (:issue:`8025`) + +- a timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone (rather than being a naive ``datetime64[ns]``) as ``object`` dtype (:issue:`8411`) + +- ``Timestamp.__repr__`` displays ``dateutil.tz.tzoffset`` info (:issue:`7907`) + + +.. _whatsnew_0150.roll: + +Rolling/expanding moments improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`rolling_min`, :func:`rolling_max`, :func:`rolling_cov`, and :func:`rolling_corr` + now return objects with all ``NaN`` when ``len(arg) < min_periods <= window`` rather + than raising. (This makes all rolling functions consistent in this behavior). (:issue:`7766`) + + Prior to 0.15.0 + + .. ipython:: python + + s = pd.Series([10, 11, 12, 13]) + + .. code-block:: ipython + + In [15]: pd.rolling_min(s, window=10, min_periods=5) + ValueError: min_periods (5) must be <= window (4) + + New behavior + + .. code-block:: ipython + + In [4]: pd.rolling_min(s, window=10, min_periods=5) + Out[4]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 + +- :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, + :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, + :func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`, + :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same + structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries. + + Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed + by ``(window-1)/2`` ``NaN`` values (or with shrinking windows, in the case of :func:`rolling_apply`). + (:issue:`7925`, :issue:`8269`) + + Prior behavior (note final value is ``NaN``): + + .. code-block:: ipython + + In [7]: pd.rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 NaN + dtype: float64 + + New behavior (note final value is ``5 = sum([2, 3, NaN])``): + + .. code-block:: ipython + + In [7]: pd.rolling_sum(pd.Series(range(4)), window=3, + ....: min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 5 + dtype: float64 + +- :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that + the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those + calculated without weighting (i.e. 'boxcar'). See :ref:`the note on normalization ` for further details. (:issue:`7618`) + + .. ipython:: python + + s = pd.Series([10.5, 8.8, 11.4, 9.7, 9.3]) + + Behavior prior to 0.15.0: + + .. code-block:: ipython + + In [39]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[39]: + 0 NaN + 1 6.583333 + 2 6.883333 + 3 6.683333 + 4 NaN + dtype: float64 + + New behavior + + .. code-block:: ipython + + In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[10]: + 0 NaN + 1 9.875 + 2 10.325 + 3 10.025 + 4 NaN + dtype: float64 + +- Removed ``center`` argument from all :func:`expanding_ ` functions (see :ref:`list `), + as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) + +- Added optional ``ddof`` argument to :func:`expanding_cov` and :func:`rolling_cov`. + The default value of ``1`` is backwards-compatible. (:issue:`8279`) + +- Documented the ``ddof`` argument to :func:`expanding_var`, :func:`expanding_std`, + :func:`rolling_var`, and :func:`rolling_std`. These functions' support of a + ``ddof`` argument (with a default value of ``1``) was previously undocumented. (:issue:`8064`) + +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now interpret ``min_periods`` in the same manner that the :func:`rolling_*()` and :func:`expanding_*()` functions do: + a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain + at least ``min_periods`` values. The previous behavior was to set to ``NaN`` the ``min_periods`` entries + starting with the first non- ``NaN`` value. (:issue:`7977`) + + Prior behavior (note values start at index ``2``, which is ``min_periods`` after index ``0`` + (the index of the first non-empty value)): + + .. ipython:: python + + s = pd.Series([1, None, None, None, 2, 3]) + + .. code-block:: ipython + + In [51]: pd.ewma(s, com=3., min_periods=2) + Out[51]: + 0 NaN + 1 NaN + 2 1.000000 + 3 1.000000 + 4 1.571429 + 5 2.189189 + dtype: float64 + + New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): + + .. code-block:: ipython + + In [2]: pd.ewma(s, com=3., min_periods=2) + Out[2]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + 4 1.759644 + 5 2.383784 + dtype: float64 + +- :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``adjust`` argument, just like :func:`ewma` does, + affecting how the weights are calculated. + The default value of ``adjust`` is ``True``, which is backwards-compatible. + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7911`) + +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``ignore_na`` argument. + When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation. + When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. + (:issue:`7543`) + + .. code-block:: ipython + + In [7]: pd.ewma(pd.Series([None, 1., 8.]), com=2.) + Out[7]: + 0 NaN + 1 1.0 + 2 5.2 + dtype: float64 + + In [8]: pd.ewma(pd.Series([1., None, 8.]), com=2., + ....: ignore_na=True) # pre-0.15.0 behavior + Out[8]: + 0 1.0 + 1 1.0 + 2 5.2 + dtype: float64 + + In [9]: pd.ewma(pd.Series([1., None, 8.]), com=2., + ....: ignore_na=False) # new default + Out[9]: + 0 1.000000 + 1 1.000000 + 2 5.846154 + dtype: float64 + + .. warning:: + + By default (``ignore_na=False``) the :func:`ewm*()` functions' weights calculation + in the presence of missing values is different than in pre-0.15.0 versions. + To reproduce the pre-0.15.0 calculation of weights in the presence of missing values + one must specify explicitly ``ignore_na=True``. + +- Bug in :func:`expanding_cov`, :func:`expanding_corr`, :func:`rolling_cov`, :func:`rolling_cor`, :func:`ewmcov`, and :func:`ewmcorr` + returning results with columns sorted by name and producing an error for non-unique columns; + now handles non-unique columns and returns columns in original order + (except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`) +- Bug in :func:`rolling_count` and :func:`expanding_*()` functions unnecessarily producing error message for zero-length data (:issue:`8056`) +- Bug in :func:`rolling_apply` and :func:`expanding_apply` interpreting ``min_periods=0`` as ``min_periods=1`` (:issue:`8080`) +- Bug in :func:`expanding_std` and :func:`expanding_var` for a single value producing a confusing error message (:issue:`7900`) +- Bug in :func:`rolling_std` and :func:`rolling_var` for a single value producing ``0`` rather than ``NaN`` (:issue:`7900`) + +- Bug in :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, and :func:`ewmcov` + calculation of de-biasing factors when ``bias=False`` (the default). + Previously an incorrect constant factor was used, based on ``adjust=True``, ``ignore_na=True``, + and an infinite number of observations. + Now a different factor is used for each entry, based on the actual weights + (analogous to the usual ``N/(N-1)`` factor). + In particular, for a single point a value of ``NaN`` is returned when ``bias=False``, + whereas previously a value of (approximately) ``0`` was returned. + + For example, consider the following pre-0.15.0 results for ``ewmvar(..., bias=False)``, + and the corresponding debiasing factors: + + .. ipython:: python + + s = pd.Series([1., 2., 0., 4.]) + + .. code-block:: ipython + + In [89]: pd.ewmvar(s, com=2., bias=False) + Out[89]: + 0 -2.775558e-16 + 1 3.000000e-01 + 2 9.556787e-01 + 3 3.585799e+00 + dtype: float64 + + In [90]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[90]: + 0 1.25 + 1 1.25 + 2 1.25 + 3 1.25 + dtype: float64 + + Note that entry ``0`` is approximately 0, and the debiasing factors are a constant 1.25. + By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, + and the debiasing factors are decreasing (towards 1.25): + + .. code-block:: ipython + + In [14]: pd.ewmvar(s, com=2., bias=False) + Out[14]: + 0 NaN + 1 0.500000 + 2 1.210526 + 3 4.089069 + dtype: float64 + + In [15]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[15]: + 0 NaN + 1 2.083333 + 2 1.583333 + 3 1.425439 + dtype: float64 + + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) + + +.. _whatsnew_0150.sql: + +Improvements in the SQL IO module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Added support for a ``chunksize`` parameter to ``to_sql`` function. This allows DataFrame to be written in chunks and avoid packet-size overflow errors (:issue:`8062`). +- Added support for a ``chunksize`` parameter to ``read_sql`` function. Specifying this argument will return an iterator through chunks of the query result (:issue:`2908`). +- Added support for writing ``datetime.date`` and ``datetime.time`` object columns with ``to_sql`` (:issue:`6932`). +- Added support for specifying a ``schema`` to read from/write to with ``read_sql_table`` and ``to_sql`` (:issue:`7441`, :issue:`7952`). + For example: + + .. code-block:: python + + df.to_sql('table', engine, schema='other_schema') # noqa F821 + pd.read_sql_table('table', engine, schema='other_schema') # noqa F821 + +- Added support for writing ``NaN`` values with ``to_sql`` (:issue:`2754`). +- Added support for writing datetime64 columns with ``to_sql`` for all database flavors (:issue:`7103`). + + +.. _whatsnew_0150.api: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0150.api_breaking: + +Breaking changes +^^^^^^^^^^^^^^^^ + +API changes related to ``Categorical`` (see :ref:`here ` +for more details): + +- The ``Categorical`` constructor with two arguments changed from + "codes/labels and levels" to "values and levels (now called 'categories')". + This can lead to subtle bugs. If you use :class:`~pandas.Categorical` directly, + please audit your code by changing it to use the :meth:`~pandas.Categorical.from_codes` + constructor. + + An old function call like (prior to 0.15.0): + + .. code-block:: python + + pd.Categorical([0,1,0,2,1], levels=['a', 'b', 'c']) + + will have to adapted to the following to keep the same behaviour: + + .. code-block:: ipython + + In [2]: pd.Categorical.from_codes([0,1,0,2,1], categories=['a', 'b', 'c']) + Out[2]: + [a, b, a, c, b] + Categories (3, object): [a, b, c] + +API changes related to the introduction of the ``Timedelta`` scalar (see +:ref:`above ` for more details): + +- Prior to 0.15.0 :func:`to_timedelta` would return a ``Series`` for list-like/Series input, + and a ``np.timedelta64`` for scalar input. It will now return a ``TimedeltaIndex`` for + list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + +For API changes related to the rolling and expanding functions, see detailed overview :ref:`above `. + +Other notable API changes: + +- Consistency when indexing with ``.loc`` and a list-like indexer when no values are found. + + .. ipython:: python + + df = pd.DataFrame([['a'], ['b']], index=[1, 2]) + df + + In prior versions there was a difference in these two constructs: + + - ``df.loc[[3]]`` would return a frame reindexed by 3 (with all ``np.nan`` values) + - ``df.loc[[3],:]`` would raise ``KeyError``. + + Both will now raise a ``KeyError``. The rule is that *at least 1* indexer must be found when using a list-like and ``.loc`` (:issue:`7999`) + + Furthermore in prior versions these were also different: + + - ``df.loc[[1,3]]`` would return a frame reindexed by [1,3] + - ``df.loc[[1,3],:]`` would raise ``KeyError``. + + Both will now return a frame reindex by [1,3]. E.g. + + .. code-block:: ipython + + In [3]: df.loc[[1, 3]] + Out[3]: + 0 + 1 a + 3 NaN + + In [4]: df.loc[[1, 3], :] + Out[4]: + 0 + 1 a + 3 NaN + + This can also be seen in multi-axis indexing with a ``Panel``. + + .. code-block:: python + + >>> p = pd.Panel(np.arange(2 * 3 * 4).reshape(2, 3, 4), + ... items=['ItemA', 'ItemB'], + ... major_axis=[1, 2, 3], + ... minor_axis=['A', 'B', 'C', 'D']) + >>> p + + Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemB + Major_axis axis: 1 to 3 + Minor_axis axis: A to D + + + The following would raise ``KeyError`` prior to 0.15.0: + + .. code-block:: ipython + + In [5]: + Out[5]: + ItemA ItemD + 1 3 NaN + 2 7 NaN + 3 11 NaN + + Furthermore, ``.loc`` will raise If no values are found in a MultiIndex with a list-like indexer: + + .. ipython:: python + :okexcept: + + s = pd.Series(np.arange(3, dtype='int64'), + index=pd.MultiIndex.from_product([['A'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + ).sort_index() + s + try: + s.loc[['D']] + except KeyError as e: + print("KeyError: " + str(e)) + +- Assigning values to ``None`` now considers the dtype when choosing an 'empty' value (:issue:`7941`). + + Previously, assigning to ``None`` in numeric containers changed the + dtype to object (or errored, depending on the call). It now uses + ``NaN``: + + .. ipython:: python + + s = pd.Series([1, 2, 3]) + s.loc[0] = None + s + + ``NaT`` is now used similarly for datetime containers. + + For object containers, we now preserve ``None`` values (previously these + were converted to ``NaN`` values). + + .. ipython:: python + + s = pd.Series(["a", "b", "c"]) + s.loc[0] = None + s + + To insert a ``NaN``, you must explicitly use ``np.nan``. See the :ref:`docs `. + +- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`, :issue:`5104`) + + .. ipython:: python + + s = pd.Series([1, 2, 3]) + s2 = s + s += 1.5 + + Behavior prior to v0.15.0 + + .. code-block:: ipython + + + # the original object + In [5]: s + Out[5]: + 0 2.5 + 1 3.5 + 2 4.5 + dtype: float64 + + + # a reference to the original object + In [7]: s2 + Out[7]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + This is now the correct behavior + + .. ipython:: python + + # the original object + s + + # a reference to the original object + s2 + +.. _whatsnew_0150.blanklines: + +- Made both the C-based and Python engines for ``read_csv`` and ``read_table`` ignore empty lines in input as well as + white space-filled lines, as long as ``sep`` is not white space. This is an API change + that can be controlled by the keyword parameter ``skip_blank_lines``. See :ref:`the docs ` (:issue:`4466`) + +- A timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone + and inserted as ``object`` dtype rather than being converted to a naive ``datetime64[ns]`` (:issue:`8411`). + +- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) + + In prior versions this would drop the timezone, now it retains the timezone, + but gives a column of ``object`` dtype: + + .. ipython:: python + + i = pd.date_range('1/1/2011', periods=3, freq='10s', tz='US/Eastern') + i + df = pd.DataFrame({'a': i}) + df + df.dtypes + + Previously this would have yielded a column of ``datetime64`` dtype, but without timezone info. + + The behaviour of assigning a column to an existing dataframe as ``df['a'] = i`` + remains unchanged (this already returned an ``object`` column with a timezone). + +- When passing multiple levels to :meth:`~pandas.DataFrame.stack()`, it will now raise a ``ValueError`` when the + levels aren't all level names or all level numbers (:issue:`7660`). See + :ref:`Reshaping by stacking and unstacking `. + +- Raise a ``ValueError`` in ``df.to_hdf`` with 'fixed' format, if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`) + +- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`) + + .. code-block:: python + + In [1]: df = pd.DataFrame(np.arange(0, 9), columns=['count']) + + In [2]: df['group'] = 'b' + + In [3]: df.iloc[0:5]['group'] = 'a' + /usr/local/bin/ipython:1: SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_indexer,col_indexer] = value instead + + See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy + +- ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type + as the ``left`` argument (:issue:`7737`). + +- Previously an enlargement with a mixed-dtype frame would act unlike ``.append`` which will preserve dtypes (related :issue:`2578`, :issue:`8176`): + + .. ipython:: python + + df = pd.DataFrame([[True, 1], [False, 2]], + columns=["female", "fitness"]) + df + df.dtypes + + # dtypes are now preserved + df.loc[2] = df.loc[1] + df + df.dtypes + +- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`). + +- ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`). + +- ``DataFrame.info()`` now ends its output with a newline character (:issue:`8114`) +- Concatenating no objects will now raise a ``ValueError`` rather than a bare ``Exception``. +- Merge errors will now be sub-classes of ``ValueError`` rather than raw ``Exception`` (:issue:`8501`) +- ``DataFrame.plot`` and ``Series.plot`` keywords are now have consistent orders (:issue:`8037`) + + +.. _whatsnew_0150.refactoring: + +Internal refactoring +^^^^^^^^^^^^^^^^^^^^ + +In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` +but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This +change allows very easy sub-classing and creation of new index types. This should be +a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`, :issue:`8522`): + +- you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` +- when plotting with a ``PeriodIndex``, the matplotlib internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex`` (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) +- MultiIndexes will now raise similarly to other pandas objects w.r.t. truth testing, see :ref:`here ` (:issue:`7897`). +- When plotting a DatetimeIndex directly with matplotlib's ``plot`` function, + the axis labels will no longer be formatted as dates but as integers (the + internal representation of a ``datetime64``). **UPDATE** This is fixed + in 0.15.1, see :ref:`here `. + +.. _whatsnew_0150.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The attributes ``Categorical`` ``labels`` and ``levels`` attributes are + deprecated and renamed to ``codes`` and ``categories``. +- The ``outtype`` argument to ``pd.DataFrame.to_dict`` has been deprecated in favor of ``orient``. (:issue:`7840`) +- The ``convert_dummies`` method has been deprecated in favor of + ``get_dummies`` (:issue:`8140`) +- The ``infer_dst`` argument in ``tz_localize`` will be deprecated in favor of + ``ambiguous`` to allow for more flexibility in dealing with DST transitions. + Replace ``infer_dst=True`` with ``ambiguous='infer'`` for the same behavior (:issue:`7943`). + See :ref:`the docs` for more details. +- The top-level ``pd.value_range`` has been deprecated and can be replaced by ``.describe()`` (:issue:`8481`) + +.. _whatsnew_0150.index_set_ops: + +- The ``Index`` set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replaced by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()`` (:issue:`8226`) + + .. code-block:: python + + # + + pd.Index(['a', 'b', 'c']) + pd.Index(['b', 'c', 'd']) + + # should be replaced by + pd.Index(['a', 'b', 'c']).union(pd.Index(['b', 'c', 'd'])) + + .. code-block:: python + + # - + pd.Index(['a', 'b', 'c']) - pd.Index(['b', 'c', 'd']) + + # should be replaced by + pd.Index(['a', 'b', 'c']).difference(pd.Index(['b', 'c', 'd'])) + +- The ``infer_types`` argument to :func:`~pandas.read_html` now has no + effect and is deprecated (:issue:`7762`, :issue:`7032`). + + +.. _whatsnew_0150.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Remove ``DataFrame.delevel`` method in favor of ``DataFrame.reset_index`` + + + +.. _whatsnew_0150.enhancements: + +Enhancements +~~~~~~~~~~~~ + +Enhancements in the importing/exporting of Stata files: + +- Added support for bool, uint8, uint16 and uint32 data types in ``to_stata`` (:issue:`7097`, :issue:`7365`) +- Added conversion option when importing Stata files (:issue:`8527`) +- ``DataFrame.to_stata`` and ``StataWriter`` check string length for + compatibility with limitations imposed in dta files where fixed-width + strings must contain 244 or fewer characters. Attempting to write Stata + dta files with strings longer than 244 characters raises a ``ValueError``. (:issue:`7858`) +- ``read_stata`` and ``StataReader`` can import missing data information into a + ``DataFrame`` by setting the argument ``convert_missing`` to ``True``. When + using this options, missing values are returned as ``StataMissingValue`` + objects and columns containing missing values have ``object`` data type. (:issue:`8045`) + +Enhancements in the plotting functions: + +- Added ``layout`` keyword to ``DataFrame.plot``. You can pass a tuple of ``(rows, columns)``, one of which can be ``-1`` to automatically infer (:issue:`6667`, :issue:`8071`). +- Allow to pass multiple axes to ``DataFrame.plot``, ``hist`` and ``boxplot`` (:issue:`5353`, :issue:`6970`, :issue:`7069`) +- Added support for ``c``, ``colormap`` and ``colorbar`` arguments for ``DataFrame.plot`` with ``kind='scatter'`` (:issue:`7780`) +- Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs`. +- Boxplot from ``DataFrame.plot`` with ``kind='box'`` (:issue:`7998`), See :ref:`the docs`. + +Other: + +- ``read_csv`` now has a keyword parameter ``float_precision`` which specifies which floating-point converter the C engine should use during parsing, see :ref:`here ` (:issue:`8002`, :issue:`8044`) + +- Added ``searchsorted`` method to ``Series`` objects (:issue:`7447`) + +- :func:`describe` on mixed-types DataFrames is more flexible. Type-based column filtering is now possible via the ``include``/``exclude`` arguments. + See the :ref:`docs ` (:issue:`8164`). + + .. ipython:: python + + df = pd.DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, + 'catB': ['a', 'b', 'c', 'd'] * 6, + 'numC': np.arange(24), + 'numD': np.arange(24.) + .5}) + df.describe(include=["object"]) + df.describe(include=["number", "object"], exclude=["float"]) + + Requesting all columns is possible with the shorthand 'all' + + .. ipython:: python + + df.describe(include='all') + + Without those arguments, ``describe`` will behave as before, including only numerical columns or, if none are, only categorical columns. See also the :ref:`docs ` + +- Added ``split`` as an option to the ``orient`` argument in ``pd.DataFrame.to_dict``. (:issue:`7840`) + +- The ``get_dummies`` method can now be used on DataFrames. By default only + categorical columns are encoded as 0's and 1's, while other columns are + left untouched. + + .. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['c', 'c', 'b'], + 'C': [1, 2, 3]}) + pd.get_dummies(df) + +- ``PeriodIndex`` supports ``resolution`` as the same as ``DatetimeIndex`` (:issue:`7708`) +- ``pandas.tseries.holiday`` has added support for additional holidays and ways to observe holidays (:issue:`7070`) +- ``pandas.tseries.holiday.Holiday`` now supports a list of offsets in Python3 (:issue:`7070`) +- ``pandas.tseries.holiday.Holiday`` now supports a days_of_week parameter (:issue:`7070`) +- ``GroupBy.nth()`` now supports selecting multiple nth values (:issue:`7910`) + + .. ipython:: python + + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B') + df = pd.DataFrame(1, index=business_dates, columns=['a', 'b']) + # get the first, 4th, and last date index for each month + df.groupby([df.index.year, df.index.month]).nth([0, 3, -1]) + +- ``Period`` and ``PeriodIndex`` supports addition/subtraction with ``timedelta``-likes (:issue:`7966`) + + If ``Period`` freq is ``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``, ``Timedelta``-like can be added if the result can have same freq. Otherwise, only the same ``offsets`` can be added. + + .. ipython:: python + + idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H') + idx + idx + pd.offsets.Hour(2) + idx + pd.Timedelta('120m') + + idx = pd.period_range('2014-07', periods=5, freq='M') + idx + idx + pd.offsets.MonthEnd(3) + +- Added experimental compatibility with ``openpyxl`` for versions >= 2.0. The ``DataFrame.to_excel`` + method ``engine`` keyword now recognizes ``openpyxl1`` and ``openpyxl2`` + which will explicitly require openpyxl v1 and v2 respectively, failing if + the requested version is not available. The ``openpyxl`` engine is a now a + meta-engine that automatically uses whichever version of openpyxl is + installed. (:issue:`7177`) + +- ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`) + +- Passing multiple levels to :meth:`~pandas.DataFrame.stack()` will now work when multiple level + numbers are passed (:issue:`7660`). See + :ref:`Reshaping by stacking and unstacking `. + +- :func:`set_names`, :func:`set_labels`, and :func:`set_levels` methods now take an optional ``level`` keyword argument to all modification of specific level(s) of a MultiIndex. Additionally :func:`set_names` now accepts a scalar string value when operating on an ``Index`` or on a specific level of a ``MultiIndex`` (:issue:`7792`) + + .. ipython:: python + + idx = pd.MultiIndex.from_product([['a'], range(3), list("pqr")], + names=['foo', 'bar', 'baz']) + idx.set_names('qux', level=0) + idx.set_names(['qux', 'corge'], level=[0, 1]) + idx.set_levels(['a', 'b', 'c'], level='bar') + idx.set_levels([['a', 'b', 'c'], [1, 2, 3]], level=[1, 2]) + +- ``Index.isin`` now supports a ``level`` argument to specify which index level + to use for membership tests (:issue:`7892`, :issue:`7890`) + + .. code-block:: ipython + + In [1]: idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']]) + + In [2]: idx.values + Out[2]: array([(0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c')], dtype=object) + + In [3]: idx.isin(['a', 'c', 'e'], level=1) + Out[3]: array([ True, False, True, True, False, True], dtype=bool) + +- ``Index`` now supports ``duplicated`` and ``drop_duplicates``. (:issue:`4060`) + + .. ipython:: python + + idx = pd.Index([1, 2, 3, 4, 1, 2]) + idx + idx.duplicated() + idx.drop_duplicates() + +- add ``copy=True`` argument to ``pd.concat`` to enable pass through of complete blocks (:issue:`8252`) + +- Added support for numpy 1.8+ data types (``bool_``, ``int_``, ``float_``, ``string_``) for conversion to R dataframe (:issue:`8400`) + + + +.. _whatsnew_0150.performance: + +Performance +~~~~~~~~~~~ + +- Performance improvements in ``DatetimeIndex.__iter__`` to allow faster iteration (:issue:`7683`) +- Performance improvements in ``Period`` creation (and ``PeriodIndex`` setitem) (:issue:`5155`) +- Improvements in Series.transform for significant performance gains (revised) (:issue:`6496`) +- Performance improvements in ``StataReader`` when reading large files (:issue:`8040`, :issue:`8073`) +- Performance improvements in ``StataWriter`` when writing large files (:issue:`8079`) +- Performance and memory usage improvements in multi-key ``groupby`` (:issue:`8128`) +- Performance improvements in groupby ``.agg`` and ``.apply`` where builtins max/min were not mapped to numpy/cythonized versions (:issue:`7722`) +- Performance improvement in writing to sql (``to_sql``) of up to 50% (:issue:`8208`). +- Performance benchmarking of groupby for large value of ngroups (:issue:`6787`) +- Performance improvement in ``CustomBusinessDay``, ``CustomBusinessMonth`` (:issue:`8236`) +- Performance improvement for ``MultiIndex.values`` for multi-level indexes containing datetimes (:issue:`8543`) + + + +.. _whatsnew_0150.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in pivot_table, when using margins and a dict aggfunc (:issue:`8349`) +- Bug in ``read_csv`` where ``squeeze=True`` would return a view (:issue:`8217`) +- Bug in checking of table name in ``read_sql`` in certain cases (:issue:`7826`). +- Bug in ``DataFrame.groupby`` where ``Grouper`` does not recognize level when frequency is specified (:issue:`7885`) +- Bug in multiindexes dtypes getting mixed up when DataFrame is saved to SQL table (:issue:`8021`) +- Bug in ``Series`` 0-division with a float and integer operand dtypes (:issue:`7785`) +- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) +- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) +- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a ``DatetimeIndex`` (:issue:`7777`) +- Bug in ``to_datetime`` when ``format='%Y%m%d'`` and ``coerce=True`` are specified, where previously an object array was returned (rather than + a coerced time-series with ``NaT``), (:issue:`7930`) +- Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`) +- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`) +- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`) +- Bug in MultiIndex slicing with missing indexers (:issue:`7866`) +- Bug in MultiIndex slicing with various edge cases (:issue:`8132`) +- Regression in MultiIndex indexing with a non-scalar type object (:issue:`7914`) +- Bug in ``Timestamp`` comparisons with ``==`` and ``int64`` dtype (:issue:`8058`) +- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is referred internally (:issue:`7748`) +- Bug in ``Panel`` when using ``major_xs`` and ``copy=False`` is passed (deprecation warning fails because of missing ``warnings``) (:issue:`8152`). +- Bug in pickle deserialization that failed for pre-0.14.1 containers with dup items trying to avoid ambiguity + when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`) +- Bug in putting a ``PeriodIndex`` into a ``Series`` would convert to ``int64`` dtype, rather than ``object`` of ``Periods`` (:issue:`7932`) +- Bug in ``HDFStore`` iteration when passing a where (:issue:`8014`) +- Bug in ``DataFrameGroupby.transform`` when transforming with a passed non-sorted key (:issue:`8046`, :issue:`8430`) +- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`) +- Bug in inference in a ``MultiIndex`` with ``datetime.date`` inputs (:issue:`7888`) +- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may reset nanosecond (:issue:`7697`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may raise ``AttributeError`` if ``Timestamp`` has ``dateutil`` tzinfo (:issue:`7697`) +- Bug in sorting a MultiIndex frame with a ``Float64Index`` (:issue:`8017`) +- Bug in inconsistent panel setitem with a rhs of a ``DataFrame`` for alignment (:issue:`7763`) +- Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`) +- Bug in 32-bit platforms with ``Series.shift`` (:issue:`8129`) +- Bug in ``PeriodIndex.unique`` returns int64 ``np.ndarray`` (:issue:`7540`) +- Bug in ``groupby.apply`` with a non-affecting mutation in the function (:issue:`8467`) +- Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`) +- Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`) +- Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`) +- Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`) +- Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`) +- Bug in ``DatetimeIndex.value_counts`` doesn't preserve tz (:issue:`7735`) +- Bug in ``PeriodIndex.value_counts`` results in ``Int64Index`` (:issue:`7735`) +- Bug in ``DataFrame.join`` when doing left join on index and there are multiple matches (:issue:`5391`) +- Bug in ``GroupBy.transform()`` where int groups with a transform that + didn't preserve the index were incorrectly truncated (:issue:`7972`). +- Bug in ``groupby`` where callable objects without name attributes would take the wrong path, + and produce a ``DataFrame`` instead of a ``Series`` (:issue:`7929`) +- Bug in ``groupby`` error message when a DataFrame grouping column is duplicated (:issue:`7511`) +- Bug in ``read_html`` where the ``infer_types`` argument forced coercion of + date-likes incorrectly (:issue:`7762`, :issue:`7032`). +- Bug in ``Series.str.cat`` with an index which was filtered as to not include the first item (:issue:`7857`) +- Bug in ``Timestamp`` cannot parse ``nanosecond`` from string (:issue:`7878`) +- Bug in ``Timestamp`` with string offset and ``tz`` results incorrect (:issue:`7833`) +- Bug in ``tslib.tz_convert`` and ``tslib.tz_convert_single`` may return different results (:issue:`7798`) +- Bug in ``DatetimeIndex.intersection`` of non-overlapping timestamps with tz raises ``IndexError`` (:issue:`7880`) +- Bug in alignment with TimeOps and non-unique indexes (:issue:`8363`) +- Bug in ``GroupBy.filter()`` where fast path vs. slow path made the filter + return a non scalar value that appeared valid but wasn't (:issue:`7870`). +- Bug in ``date_range()``/``DatetimeIndex()`` when the timezone was inferred from input dates yet incorrect + times were returned when crossing DST boundaries (:issue:`7835`, :issue:`7901`). +- Bug in ``to_excel()`` where a negative sign was being prepended to positive infinity and was absent for negative infinity (:issue:`7949`) +- Bug in area plot draws legend with incorrect ``alpha`` when ``stacked=True`` (:issue:`8027`) +- ``Period`` and ``PeriodIndex`` addition/subtraction with ``np.timedelta64`` results in incorrect internal representations (:issue:`7740`) +- Bug in ``Holiday`` with no offset or observance (:issue:`7987`) +- Bug in ``DataFrame.to_latex`` formatting when columns or index is a ``MultiIndex`` (:issue:`7982`). +- Bug in ``DateOffset`` around Daylight Savings Time produces unexpected results (:issue:`5175`). +- Bug in ``DataFrame.shift`` where empty columns would throw ``ZeroDivisionError`` on numpy 1.7 (:issue:`8019`) +- Bug in installation where ``html_encoding/*.html`` wasn't installed and + therefore some tests were not running correctly (:issue:`7927`). +- Bug in ``read_html`` where ``bytes`` objects were not tested for in + ``_read`` (:issue:`7927`). +- Bug in ``DataFrame.stack()`` when one of the column levels was a datelike (:issue:`8039`) +- Bug in broadcasting numpy scalars with ``DataFrame`` (:issue:`8116`) +- Bug in ``pivot_table`` performed with nameless ``index`` and ``columns`` raises ``KeyError`` (:issue:`8103`) +- Bug in ``DataFrame.plot(kind='scatter')`` draws points and errorbars with different colors when the color is specified by ``c`` keyword (:issue:`8081`) +- Bug in ``Float64Index`` where ``iat`` and ``at`` were not testing and were + failing (:issue:`8092`). +- Bug in ``DataFrame.boxplot()`` where y-limits were not set correctly when + producing multiple axes (:issue:`7528`, :issue:`5517`). +- Bug in ``read_csv`` where line comments were not handled correctly given + a custom line terminator or ``delim_whitespace=True`` (:issue:`8122`). +- Bug in ``read_html`` where empty tables caused a ``StopIteration`` (:issue:`7575`) +- Bug in casting when setting a column in a same-dtype block (:issue:`7704`) +- Bug in accessing groups from a ``GroupBy`` when the original grouper + was a tuple (:issue:`8121`). +- Bug in ``.at`` that would accept integer indexers on a non-integer index and do fallback (:issue:`7814`) +- Bug with kde plot and NaNs (:issue:`8182`) +- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`). +- Bug with stacked barplots and NaNs (:issue:`8175`). +- Bug in resample with non evenly divisible offsets (e.g. '7s') (:issue:`8371`) +- Bug in interpolation methods with the ``limit`` keyword when no values needed interpolating (:issue:`7173`). +- Bug where ``col_space`` was ignored in ``DataFrame.to_string()`` when ``header=False`` (:issue:`8230`). +- Bug with ``DatetimeIndex.asof`` incorrectly matching partial strings and returning the wrong date (:issue:`8245`). +- Bug in plotting methods modifying the global matplotlib rcParams (:issue:`8242`). +- Bug in ``DataFrame.__setitem__`` that caused errors when setting a dataframe column to a sparse array (:issue:`8131`) +- Bug where ``Dataframe.boxplot()`` failed when entire column was empty (:issue:`8181`). +- Bug with messed variables in ``radviz`` visualization (:issue:`8199`). +- Bug in interpolation methods with the ``limit`` keyword when no values needed interpolating (:issue:`7173`). +- Bug where ``col_space`` was ignored in ``DataFrame.to_string()`` when ``header=False`` (:issue:`8230`). +- Bug in ``to_clipboard`` that would clip long column data (:issue:`8305`) +- Bug in ``DataFrame`` terminal display: Setting max_column/max_rows to zero did not trigger auto-resizing of dfs to fit terminal width/height (:issue:`7180`). +- Bug in OLS where running with "cluster" and "nw_lags" parameters did not work correctly, but also did not throw an error + (:issue:`5884`). +- Bug in ``DataFrame.dropna`` that interpreted non-existent columns in the subset argument as the 'last column' (:issue:`8303`) +- Bug in ``Index.intersection`` on non-monotonic non-unique indexes (:issue:`8362`). +- Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`) +- Bug in ``NDFrame.equals`` gives false negatives with dtype=object (:issue:`8437`) +- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`) +- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`) +- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`) +- Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`) +- Bug in item assignment of a ``DataFrame`` with MultiIndex columns where right-hand-side columns were not aligned (:issue:`7655`) +- Suppress FutureWarning generated by NumPy when comparing object arrays containing NaN for equality (:issue:`7065`) +- Bug in ``DataFrame.eval()`` where the dtype of the ``not`` operator (``~``) + was not correctly inferred as ``bool``. + + +.. _whatsnew_0.15.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.1..v0.15.0 diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst new file mode 100644 index 00000000..a1d4f9d1 --- /dev/null +++ b/doc/source/whatsnew/v0.15.1.rst @@ -0,0 +1,319 @@ +.. _whatsnew_0151: + +Version 0.15.1 (November 9, 2014) +--------------------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.15.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0151.api: + +API changes +~~~~~~~~~~~ + +- ``s.dt.hour`` and other ``.dt`` accessors will now return ``np.nan`` for missing values (rather than previously -1), (:issue:`8689`) + + .. ipython:: python + + s = pd.Series(pd.date_range("20130101", periods=5, freq="D")) + s.iloc[2] = np.nan + s + + previous behavior: + + .. code-block:: ipython + + In [6]: s.dt.hour + Out[6]: + 0 0 + 1 0 + 2 -1 + 3 0 + 4 0 + dtype: int64 + + current behavior: + + .. ipython:: python + + s.dt.hour + +- ``groupby`` with ``as_index=False`` will not add erroneous extra columns to + result (:issue:`8582`): + + .. ipython:: python + + np.random.seed(2718281) + df = pd.DataFrame(np.random.randint(0, 100, (10, 2)), columns=["jim", "joe"]) + df.head() + + ts = pd.Series(5 * np.random.randint(0, 3, 10)) + + previous behavior: + + .. code-block:: ipython + + In [4]: df.groupby(ts, as_index=False).max() + Out[4]: + NaN jim joe + 0 0 72 83 + 1 5 77 84 + 2 10 96 65 + + current behavior: + + .. ipython:: python + + df.groupby(ts, as_index=False).max() + +- ``groupby`` will not erroneously exclude columns if the column name conflicts + with the grouper name (:issue:`8112`): + + .. ipython:: python + + df = pd.DataFrame({"jim": range(5), "joe": range(5, 10)}) + df + gr = df.groupby(df["jim"] < 2) + + previous behavior (excludes 1st column from output): + + .. code-block:: ipython + + In [4]: gr.apply(sum) + Out[4]: + joe + jim + False 24 + True 11 + + current behavior: + + .. ipython:: python + + gr.apply(sum) + +- Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is + not found in the index (:issue:`7860`): + + .. ipython:: python + + s = pd.Series(["a", "b", "c", "d"], [4, 3, 2, 1]) + s + + previous behavior: + + .. code-block:: ipython + + In [8]: s.loc[3.5:1.5] + KeyError: 3.5 + + current behavior: + + .. ipython:: python + + s.loc[3.5:1.5] + +- ``io.data.Options`` has been fixed for a change in the format of the Yahoo Options page (:issue:`8612`), (:issue:`8741`) + + .. note:: + + As a result of a change in Yahoo's option page layout, when an expiry date is given, + ``Options`` methods now return data for a single expiry date. Previously, methods returned all + data for the selected month. + + The ``month`` and ``year`` parameters have been undeprecated and can be used to get all + options data for a given month. + + If an expiry date that is not valid is given, data for the next expiry after the given + date is returned. + + Option data frames are now saved on the instance as ``callsYYMMDD`` or ``putsYYMMDD``. Previously + they were saved as ``callsMMYY`` and ``putsMMYY``. The next expiry is saved as ``calls`` and ``puts``. + + New features: + + - The expiry parameter can now be a single date or a list-like object containing dates. + + - A new property ``expiry_dates`` was added, which returns all available expiry dates. + + Current behavior: + + .. code-block:: ipython + + In [17]: from pandas.io.data import Options + + In [18]: aapl = Options('aapl', 'yahoo') + + In [19]: aapl.get_call_data().iloc[0:5, 0:1] + Out[19]: + Last + Strike Expiry Type Symbol + 80 2014-11-14 call AAPL141114C00080000 29.05 + 84 2014-11-14 call AAPL141114C00084000 24.80 + 85 2014-11-14 call AAPL141114C00085000 24.05 + 86 2014-11-14 call AAPL141114C00086000 22.76 + 87 2014-11-14 call AAPL141114C00087000 21.74 + + In [20]: aapl.expiry_dates + Out[20]: + [datetime.date(2014, 11, 14), + datetime.date(2014, 11, 22), + datetime.date(2014, 11, 28), + datetime.date(2014, 12, 5), + datetime.date(2014, 12, 12), + datetime.date(2014, 12, 20), + datetime.date(2015, 1, 17), + datetime.date(2015, 2, 20), + datetime.date(2015, 4, 17), + datetime.date(2015, 7, 17), + datetime.date(2016, 1, 15), + datetime.date(2017, 1, 20)] + + In [21]: aapl.get_near_stock_price(expiry=aapl.expiry_dates[0:3]).iloc[0:5, 0:1] + Out[21]: + Last + Strike Expiry Type Symbol + 109 2014-11-22 call AAPL141122C00109000 1.48 + 2014-11-28 call AAPL141128C00109000 1.79 + 110 2014-11-14 call AAPL141114C00110000 0.55 + 2014-11-22 call AAPL141122C00110000 1.02 + 2014-11-28 call AAPL141128C00110000 1.32 + +.. _whatsnew_0151.datetime64_plotting: + +- pandas now also registers the ``datetime64`` dtype in matplotlib's units registry + to plot such values as datetimes. This is activated once pandas is imported. In + previous versions, plotting an array of ``datetime64`` values will have resulted + in plotted integer values. To keep the previous behaviour, you can do + ``del matplotlib.units.registry[np.datetime64]`` (:issue:`8614`). + + +.. _whatsnew_0151.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- ``concat`` permits a wider variety of iterables of pandas objects to be + passed as the first parameter (:issue:`8645`): + + .. ipython:: python + + from collections import deque + + df1 = pd.DataFrame([1, 2, 3]) + df2 = pd.DataFrame([4, 5, 6]) + + previous behavior: + + .. code-block:: ipython + + In [7]: pd.concat(deque((df1, df2))) + TypeError: first argument must be a list-like of pandas objects, you passed an object of type "deque" + + current behavior: + + .. ipython:: python + + pd.concat(deque((df1, df2))) + +- Represent ``MultiIndex`` labels with a dtype that utilizes memory based on the level size. In prior versions, the memory usage was a constant 8 bytes per element in each level. In addition, in prior versions, the *reported* memory usage was incorrect as it didn't show the usage for the memory occupied by the underling data array. (:issue:`8456`) + + .. ipython:: python + + dfi = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([["a"], range(1000)]), columns=["A"] + ) + + previous behavior: + + .. code-block:: ipython + + # this was underreported in prior versions + In [1]: dfi.memory_usage(index=True) + Out[1]: + Index 8000 # took about 24008 bytes in < 0.15.1 + A 8000 + dtype: int64 + + + current behavior: + + .. ipython:: python + + dfi.memory_usage(index=True) + +- Added Index properties ``is_monotonic_increasing`` and ``is_monotonic_decreasing`` (:issue:`8680`). + +- Added option to select columns when importing Stata files (:issue:`7935`) + +- Qualify memory usage in ``DataFrame.info()`` by adding ``+`` if it is a lower bound (:issue:`8578`) + +- Raise errors in certain aggregation cases where an argument such as ``numeric_only`` is not handled (:issue:`8592`). + +- Added support for 3-character ISO and non-standard country codes in :func:`io.wb.download()` (:issue:`8482`) + +- World Bank data requests now will warn/raise based + on an ``errors`` argument, as well as a list of hard-coded country codes and + the World Bank's JSON response. In prior versions, the error messages + didn't look at the World Bank's JSON response. Problem-inducing input were + simply dropped prior to the request. The issue was that many good countries + were cropped in the hard-coded approach. All countries will work now, but + some bad countries will raise exceptions because some edge cases break the + entire response. (:issue:`8482`) + +- Added option to ``Series.str.split()`` to return a ``DataFrame`` rather than a ``Series`` (:issue:`8428`) + +- Added option to ``df.info(null_counts=None|True|False)`` to override the default display options and force showing of the null-counts (:issue:`8701`) + + +.. _whatsnew_0151.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in unpickling of a ``CustomBusinessDay`` object (:issue:`8591`) +- Bug in coercing ``Categorical`` to a records array, e.g. ``df.to_records()`` (:issue:`8626`) +- Bug in ``Categorical`` not created properly with ``Series.to_frame()`` (:issue:`8626`) +- Bug in coercing in astype of a ``Categorical`` of a passed ``pd.Categorical`` (this now raises ``TypeError`` correctly), (:issue:`8626`) +- Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`) +- Bug in writing Categorical columns to an SQL database with ``to_sql`` (:issue:`8624`). +- Bug in comparing ``Categorical`` of datetime raising when being compared to a scalar datetime (:issue:`8687`) +- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`) +- Bug in groupby-transform with a Categorical (:issue:`8623`) +- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`) +- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`) +- Bug in Panel indexing with a list-like (:issue:`8710`) +- Compat issue is ``DataFrame.dtypes`` when ``options.mode.use_inf_as_null`` is True (:issue:`8722`) +- Bug in ``read_csv``, ``dialect`` parameter would not take a string (:issue:`8703`) +- Bug in slicing a MultiIndex level with an empty-list (:issue:`8737`) +- Bug in numeric index operations of add/sub with Float/Index Index with numpy arrays (:issue:`8608`) +- Bug in setitem with empty indexer and unwanted coercion of dtypes (:issue:`8669`) +- Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`) +- Bug when doing label based indexing with integers not found in the index for + non-unique but monotonic indexes (:issue:`8680`). +- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`). +- Fix ``shape`` attribute for ``MultiIndex`` (:issue:`8609`) +- Bug in ``GroupBy`` where a name conflict between the grouper and columns + would break ``groupby`` operations (:issue:`7115`, :issue:`8112`) +- Fixed a bug where plotting a column ``y`` and specifying a label would mutate the index name of the original DataFrame (:issue:`8494`) +- Fix regression in plotting of a DatetimeIndex directly with matplotlib (:issue:`8614`). +- Bug in ``date_range`` where partially-specified dates would incorporate current date (:issue:`6961`) +- Bug in Setting by indexer to a scalar value with a mixed-dtype ``Panel4d`` was failing (:issue:`8702`) +- Bug where ``DataReader``'s would fail if one of the symbols passed was invalid. Now returns data for valid symbols and np.nan for invalid (:issue:`8494`) +- Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values (:issue:`5229`). + + +.. _whatsnew_0.15.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.0..v0.15.1 diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst new file mode 100644 index 00000000..2dae76dd --- /dev/null +++ b/doc/source/whatsnew/v0.15.2.rst @@ -0,0 +1,258 @@ +.. _whatsnew_0152: + +Version 0.15.2 (December 12, 2014) +---------------------------------- + +{{ header }} + + +This is a minor release from 0.15.1 and includes a large number of bug fixes +along with several new features, enhancements, and performance improvements. +A small number of API changes were necessary to fix existing bugs. +We recommend that all users upgrade to this version. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Performance Improvements ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0152.api: + +API changes +~~~~~~~~~~~ + +- Indexing in ``MultiIndex`` beyond lex-sort depth is now supported, though + a lexically sorted index will have a better performance. (:issue:`2646`) + + .. ipython:: python + :okwarning: + + df = pd.DataFrame({'jim':[0, 0, 1, 1], + 'joe':['x', 'x', 'z', 'y'], + 'jolie':np.random.rand(4)}).set_index(['jim', 'joe']) + df + df.index.lexsort_depth + + # in prior versions this would raise a KeyError + # will now show a PerformanceWarning + df.loc[(1, 'z')] + + # lexically sorting + df2 = df.sort_index() + df2 + df2.index.lexsort_depth + df2.loc[(1,'z')] + +- Bug in unique of Series with ``category`` dtype, which returned all categories regardless + whether they were "used" or not (see :issue:`8559` for the discussion). + Previous behaviour was to return all categories: + + .. code-block:: ipython + + In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c']) + + In [4]: cat + Out[4]: + [a, b, a] + Categories (3, object): [a < b < c] + + In [5]: cat.unique() + Out[5]: array(['a', 'b', 'c'], dtype=object) + + Now, only the categories that do effectively occur in the array are returned: + + .. ipython:: python + + cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c']) + cat.unique() + +- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions and will now raise ``TypeError``. (:issue:`8302`). + +- Allow equality comparisons of Series with a categorical dtype and object dtype; previously these would raise ``TypeError`` (:issue:`8938`) + +- Bug in ``NDFrame``: conflicting attribute/column names now behave consistently between getting and setting. Previously, when both a column and attribute named ``y`` existed, ``data.y`` would return the attribute, while ``data.y = z`` would update the column (:issue:`8994`) + + .. ipython:: python + + data = pd.DataFrame({'x': [1, 2, 3]}) + data.y = 2 + data['y'] = [2, 4, 6] + data + + # this assignment was inconsistent + data.y = 5 + + Old behavior: + + .. code-block:: ipython + + In [6]: data.y + Out[6]: 2 + + In [7]: data['y'].values + Out[7]: array([5, 5, 5]) + + New behavior: + + .. ipython:: python + + data.y + data['y'].values + +- ``Timestamp('now')`` is now equivalent to ``Timestamp.now()`` in that it returns the local time rather than UTC. Also, ``Timestamp('today')`` is now equivalent to ``Timestamp.today()`` and both have ``tz`` as a possible argument. (:issue:`9000`) + +- Fix negative step support for label-based slices (:issue:`8753`) + + Old behavior: + + .. code-block:: ipython + + In [1]: s = pd.Series(np.arange(3), ['a', 'b', 'c']) + Out[1]: + a 0 + b 1 + c 2 + dtype: int64 + + In [2]: s.loc['c':'a':-1] + Out[2]: + c 2 + dtype: int64 + + New behavior: + + .. ipython:: python + + s = pd.Series(np.arange(3), ['a', 'b', 'c']) + s.loc['c':'a':-1] + + +.. _whatsnew_0152.enhancements: + +Enhancements +~~~~~~~~~~~~ + +``Categorical`` enhancements: + +- Added ability to export Categorical data to Stata (:issue:`8633`). See :ref:`here ` for limitations of categorical variables exported to Stata data files. +- Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. +- Added ability to export Categorical data to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here ` for an example and caveats w.r.t. prior versions of pandas. +- Added support for ``searchsorted()`` on ``Categorical`` class (:issue:`8420`). + +Other enhancements: + +- Added the ability to specify the SQL type of columns when writing a DataFrame + to a database (:issue:`8778`). + For example, specifying to use the sqlalchemy ``String`` type instead of the + default ``Text`` type for string columns: + + .. code-block:: python + + from sqlalchemy.types import String + data.to_sql('data_dtype', engine, dtype={'Col_1': String}) # noqa F821 + +- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`): + + .. ipython:: python + :okwarning: + + s = pd.Series([False, True, False], index=[0, 0, 1]) + s.any(level=0) + +- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`): + + .. code-block:: python + + >>> p = pd.Panel(np.random.rand(2, 5, 4) > 0.1) + >>> p.all() + 0 1 2 3 + 0 True True True True + 1 True False True True + 2 True True True True + 3 False True False True + 4 True True True True + +- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on ``Timestamp`` class (:issue:`5351`). +- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. +- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). +- ``Timedelta`` now supports arithmetic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). +- Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`). +- Added ``gbq.generate_bq_schema()`` function to the gbq module (:issue:`8325`). +- ``Series`` now works with map objects the same way as generators (:issue:`8909`). +- Added context manager to ``HDFStore`` for automatic closing (:issue:`8791`). +- ``to_datetime`` gains an ``exact`` keyword to allow for a format to not require an exact match for a provided format string (if its ``False``). ``exact`` defaults to ``True`` (meaning that exact matching is still the default) (:issue:`8904`) +- Added ``axvlines`` boolean option to parallel_coordinates plot function, determines whether vertical lines will be printed, default is True +- Added ability to read table footers to read_html (:issue:`8552`) +- ``to_sql`` now infers data types of non-NA values for columns that contain NA values and have dtype ``object`` (:issue:`8778`). + + +.. _whatsnew_0152.performance: + +Performance +~~~~~~~~~~~ + +- Reduce memory usage when skiprows is an integer in read_csv (:issue:`8681`) +- Performance boost for ``to_datetime`` conversions with a passed ``format=``, and the ``exact=False`` (:issue:`8904`) + + +.. _whatsnew_0152.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`) +- Bug in Timestamp-Timestamp not returning a Timedelta type and datelike-datelike ops with timezones (:issue:`8865`) +- Made consistent a timezone mismatch exception (either tz operated with None or incompatible timezone), will now return ``TypeError`` rather than ``ValueError`` (a couple of edge cases only), (:issue:`8865`) +- Bug in using a ``pd.Grouper(key=...)`` with no level/axis or level only (:issue:`8795`, :issue:`8866`) +- Report a ``TypeError`` when invalid/no parameters are passed in a groupby (:issue:`8015`) +- Bug in packaging pandas with ``py2app/cx_Freeze`` (:issue:`8602`, :issue:`8831`) +- Bug in ``groupby`` signatures that didn't include \*args or \*\*kwargs (:issue:`8733`). +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). +- Unclear error message in csv parsing when passing dtype and names and the parsed data is a different data type (:issue:`8833`) +- Bug in slicing a MultiIndex with an empty list and at least one boolean indexer (:issue:`8781`) +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). +- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`). +- Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons (:issue:`8813`, :issue:`5963`, :issue:`5436`). +- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`) +- ``slice`` string method now takes step into account (:issue:`8754`) +- Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`) +- Bug in ``DatetimeIndex`` when using ``time`` object as key (:issue:`8667`) +- Bug in ``merge`` where ``how='left'`` and ``sort=False`` would not preserve left frame order (:issue:`7331`) +- Bug in ``MultiIndex.reindex`` where reindexing at level would not reorder labels (:issue:`4088`) +- Bug in certain operations with dateutil timezones, manifesting with dateutil 2.3 (:issue:`8639`) +- Regression in DatetimeIndex iteration with a Fixed/Local offset timezone (:issue:`8890`) +- Bug in ``to_datetime`` when parsing a nanoseconds using the ``%f`` format (:issue:`8989`) +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). +- Fix: The font size was only set on x axis if vertical or the y axis if horizontal. (:issue:`8765`) +- Fixed division by 0 when reading big csv files in python 3 (:issue:`8621`) +- Bug in outputting a MultiIndex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) +- Imported categorical variables from Stata files retain the ordinal information in the underlying data (:issue:`8836`). +- Defined ``.size`` attribute across ``NDFrame`` objects to provide compat with numpy >= 1.9.1; buggy with ``np.array_split`` (:issue:`8846`) +- Skip testing of histogram plots for matplotlib <= 1.2 (:issue:`8648`). +- Bug where ``get_data_google`` returned object dtypes (:issue:`3995`) +- Bug in ``DataFrame.stack(..., dropna=False)`` when the DataFrame's ``columns`` is a ``MultiIndex`` + whose ``labels`` do not reference all its ``levels``. (:issue:`8844`) +- Bug in that Option context applied on ``__enter__`` (:issue:`8514`) +- Bug in resample that causes a ValueError when resampling across multiple days + and the last offset is not calculated from the start of the range (:issue:`8683`) +- Bug where ``DataFrame.plot(kind='scatter')`` fails when checking if an np.array is in the DataFrame (:issue:`8852`) +- Bug in ``pd.infer_freq/DataFrame.inferred_freq`` that prevented proper sub-daily frequency inference when the index contained DST days (:issue:`8772`). +- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`). +- Bugs when trying to stack multiple columns, when some (or all) of the level names are numbers (:issue:`8584`). +- Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) +- BUG CSV: fix problem with trailing white space in skipped rows, (:issue:`8679`), (:issue:`8661`), (:issue:`8983`) +- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`) +- Bug in ``StataWriter`` the produces writes strings with 244 characters irrespective of actual size (:issue:`8969`) +- Fixed ValueError raised by cummin/cummax when datetime64 Series contains NaT. (:issue:`8965`) +- Bug in DataReader returns object dtype if there are missing values (:issue:`8980`) +- Bug in plotting if sharex was enabled and index was a timeseries, would show labels on multiple axes (:issue:`3964`). +- Bug where passing a unit to the TimedeltaIndex constructor applied the to nano-second conversion twice. (:issue:`9011`). +- Bug in plotting of a period-like array (:issue:`9012`) + + +.. _whatsnew_0.15.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.1..v0.15.2 diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst new file mode 100644 index 00000000..8d0d6854 --- /dev/null +++ b/doc/source/whatsnew/v0.16.0.rst @@ -0,0 +1,690 @@ +.. _whatsnew_0160: + +Version 0.16.0 (March 22, 2015) +------------------------------- + +{{ header }} + + +This is a major release from 0.15.2 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- ``DataFrame.assign`` method, see :ref:`here ` +- ``Series.to_coo/from_coo`` methods to interact with ``scipy.sparse``, see :ref:`here ` +- Backwards incompatible change to ``Timedelta`` to conform the ``.seconds`` attribute with ``datetime.timedelta``, see :ref:`here ` +- Changes to the ``.loc`` slicing API to conform with the behavior of ``.ix`` see :ref:`here ` +- Changes to the default for ordering in the ``Categorical`` constructor, see :ref:`here ` +- Enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here ` +- The ``pandas.tools.rplot``, ``pandas.sandbox.qtpandas`` and ``pandas.rpy`` + modules are deprecated. We refer users to external packages like + `seaborn `_, + `pandas-qt `_ and + `rpy2 `_ for similar or equivalent + functionality, see :ref:`here ` + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.16.0 + :local: + :backlinks: none + + +.. _whatsnew_0160.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.assign: + +DataFrame assign +^^^^^^^^^^^^^^^^ + +Inspired by `dplyr's +`__ ``mutate`` verb, DataFrame has a new +:meth:`~pandas.DataFrame.assign` method. +The function signature for ``assign`` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a ``Series`` or NumPy array), or a function +of one argument to be called on the ``DataFrame``. The new values are inserted, +and the entire DataFrame (with all original and new columns) is returned. + +.. ipython:: python + + iris = pd.read_csv('data/iris.data') + iris.head() + + iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']).head() + +Above was an example of inserting a precomputed value. We can also pass in +a function to be evaluated. + +.. ipython:: python + + iris.assign(sepal_ratio=lambda x: (x['SepalWidth'] + / x['SepalLength'])).head() + +The power of ``assign`` comes when used in chains of operations. For example, +we can limit the DataFrame to just those with a Sepal Length greater than 5, +calculate the ratio, and plot + +.. ipython:: python + + iris = pd.read_csv('data/iris.data') + (iris.query('SepalLength > 5') + .assign(SepalRatio=lambda x: x.SepalWidth / x.SepalLength, + PetalRatio=lambda x: x.PetalWidth / x.PetalLength) + .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) + +.. image:: ../_static/whatsnew_assign.png + :scale: 50 % + +See the :ref:`documentation ` for more. (:issue:`9229`) + + +.. _whatsnew_0160.enhancements.sparse: + +Interaction with scipy.sparse +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: + +.. code-block:: python + + s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + + # SparseSeries + ss = s.to_sparse() + ss + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=False) + + A + A.todense() + rows + columns + +The from_coo method is a convenience method for creating a ``SparseSeries`` +from a ``scipy.sparse.coo_matrix``: + +.. code-block:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + + ss = pd.SparseSeries.from_coo(A) + ss + +.. _whatsnew_0160.enhancements.string: + +String methods enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Following new methods are accessible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9282`, :issue:`9352`, :issue:`9386`, :issue:`9387`, :issue:`9439`) + + ============= ============= ============= =============== =============== + .. .. Methods .. .. + ============= ============= ============= =============== =============== + ``isalnum()`` ``isalpha()`` ``isdigit()`` ``isdigit()`` ``isspace()`` + ``islower()`` ``isupper()`` ``istitle()`` ``isnumeric()`` ``isdecimal()`` + ``find()`` ``rfind()`` ``ljust()`` ``rjust()`` ``zfill()`` + ============= ============= ============= =============== =============== + + .. ipython:: python + + s = pd.Series(['abcd', '3456', 'EFGH']) + s.str.isalpha() + s.str.find('ab') + +- :meth:`Series.str.pad` and :meth:`Series.str.center` now accept ``fillchar`` option to specify filling character (:issue:`9352`) + + .. ipython:: python + + s = pd.Series(['12', '300', '25']) + s.str.pad(5, fillchar='_') + +- Added :meth:`Series.str.slice_replace`, which previously raised ``NotImplementedError`` (:issue:`8888`) + + .. ipython:: python + + s = pd.Series(['ABCD', 'EFGH', 'IJK']) + s.str.slice_replace(1, 3, 'X') + # replaced with empty char + s.str.slice_replace(0, 1) + +.. _whatsnew_0160.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Reindex now supports ``method='nearest'`` for frames or series with a monotonic increasing or decreasing index (:issue:`9258`): + + .. ipython:: python + + df = pd.DataFrame({'x': range(5)}) + df.reindex([0.2, 1.8, 3.5], method='nearest') + + This method is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. + +- The ``read_excel()`` function's :ref:`sheetname ` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) + + .. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel('path_to_file.xls', sheetname=['Sheet1', 3]) + + +- Allow Stata files to be read incrementally with an iterator; support for long strings in Stata files. See the docs :ref:`here` (:issue:`9493`:). +- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) +- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) +- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) +- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) +- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) +- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) +- SQL code now safely escapes table and column names (:issue:`8986`) +- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) +- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). +- ``Index.asof`` now works on all index types (:issue:`9258`). +- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) +- Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`) +- Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`) +- Added ``normalize`` option for ``Timestamp`` to normalized to midnight (:issue:`8794`) +- Added example for ``DataFrame`` import to R using HDF5 file and ``rhdf5`` + library. See the :ref:`documentation ` for more + (:issue:`9636`). + +.. _whatsnew_0160.api: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.api_breaking: + +.. _whatsnew_0160.api_breaking.timedelta: + +Changes in timedelta +^^^^^^^^^^^^^^^^^^^^ + +In v0.15.0 a new scalar type ``Timedelta`` was introduced, that is a +sub-class of ``datetime.timedelta``. Mentioned :ref:`here ` was a notice of an API change w.r.t. the ``.seconds`` accessor. The intent was to provide a user-friendly set of accessors that give the 'natural' value for that unit, e.g. if you had a ``Timedelta('1 day, 10:11:12')``, then ``.seconds`` would return 12. However, this is at odds with the definition of ``datetime.timedelta``, which defines ``.seconds`` as ``10 * 3600 + 11 * 60 + 12 == 36672``. + +So in v0.16.0, we are restoring the API to match that of ``datetime.timedelta``. Further, the component values are still available through the ``.components`` accessor. This affects the ``.seconds`` and ``.microseconds`` accessors, and removes the ``.hours``, ``.minutes``, ``.milliseconds`` accessors. These changes affect ``TimedeltaIndex`` and the Series ``.dt`` accessor as well. (:issue:`9185`, :issue:`9139`) + +Previous behavior + +.. code-block:: ipython + + In [2]: t = pd.Timedelta('1 day, 10:11:12.100123') + + In [3]: t.days + Out[3]: 1 + + In [4]: t.seconds + Out[4]: 12 + + In [5]: t.microseconds + Out[5]: 123 + +New behavior + +.. ipython:: python + + t = pd.Timedelta('1 day, 10:11:12.100123') + t.days + t.seconds + t.microseconds + +Using ``.components`` allows the full component access + +.. ipython:: python + + t.components + t.components.seconds + +.. _whatsnew_0160.api_breaking.indexing: + +Indexing changes +^^^^^^^^^^^^^^^^ + +The behavior of a small sub-set of edge cases for using ``.loc`` have changed (:issue:`8613`). Furthermore we have improved the content of the error messages that are raised: + +- Slicing with ``.loc`` where the start and/or stop bound is not found in the index is now allowed; this previously would raise a ``KeyError``. This makes the behavior the same as ``.ix`` in this case. This change is only for slicing, not when indexing with a single label. + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) + df + s = pd.Series(range(5), [-2, -1, 1, 2, 3]) + s + + Previous behavior + + .. code-block:: ipython + + In [4]: df.loc['2013-01-02':'2013-01-10'] + KeyError: 'stop bound [2013-01-10] is not in the [index]' + + In [6]: s.loc[-10:3] + KeyError: 'start bound [-10] is not the [index]' + + New behavior + + .. ipython:: python + + df.loc['2013-01-02':'2013-01-10'] + s.loc[-10:3] + +- Allow slicing with float-like values on an integer index for ``.ix``. Previously this was only enabled for ``.loc``: + + Previous behavior + + .. code-block:: ipython + + In [8]: s.ix[-1.0:2] + TypeError: the slice start value [-1.0] is not a proper indexer for this index type (Int64Index) + + New behavior + + .. code-block:: python + + In [2]: s.ix[-1.0:2] + Out[2]: + -1 1 + 1 2 + 2 3 + dtype: int64 + +- Provide a useful exception for indexing with an invalid type for that index when using ``.loc``. For example trying to use ``.loc`` on an index of type ``DatetimeIndex`` or ``PeriodIndex`` or ``TimedeltaIndex``, with an integer (or a float). + + Previous behavior + + .. code-block:: python + + In [4]: df.loc[2:3] + KeyError: 'start bound [2] is not the [index]' + + New behavior + + .. code-block:: ipython + + In [4]: df.loc[2:3] + TypeError: Cannot do slice indexing on with keys + + +.. _whatsnew_0160.api_breaking.categorical: + +Categorical changes +^^^^^^^^^^^^^^^^^^^ + +In prior versions, ``Categoricals`` that had an unspecified ordering (meaning no ``ordered`` keyword was passed) were defaulted as ``ordered`` Categoricals. Going forward, the ``ordered`` keyword in the ``Categorical`` constructor will default to ``False``. Ordering must now be explicit. + +Furthermore, previously you *could* change the ``ordered`` attribute of a Categorical by just setting the attribute, e.g. ``cat.ordered=True``; This is now deprecated and you should use ``cat.as_ordered()`` or ``cat.as_unordered()``. These will by default return a **new** object and not modify the existing object. (:issue:`9347`, :issue:`9190`) + +Previous behavior + +.. code-block:: ipython + + In [3]: s = pd.Series([0, 1, 2], dtype='category') + + In [4]: s + Out[4]: + 0 0 + 1 1 + 2 2 + dtype: category + Categories (3, int64): [0 < 1 < 2] + + In [5]: s.cat.ordered + Out[5]: True + + In [6]: s.cat.ordered = False + + In [7]: s + Out[7]: + 0 0 + 1 1 + 2 2 + dtype: category + Categories (3, int64): [0, 1, 2] + +New behavior + +.. ipython:: python + + s = pd.Series([0, 1, 2], dtype='category') + s + s.cat.ordered + s = s.cat.as_ordered() + s + s.cat.ordered + + # you can set in the constructor of the Categorical + s = pd.Series(pd.Categorical([0, 1, 2], ordered=True)) + s + s.cat.ordered + +For ease of creation of series of categorical data, we have added the ability to pass keywords when calling ``.astype()``. These are passed directly to the constructor. + +.. code-block:: python + + In [54]: s = pd.Series(["a", "b", "c", "a"]).astype('category', ordered=True) + + In [55]: s + Out[55]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (3, object): [a < b < c] + + In [56]: s = (pd.Series(["a", "b", "c", "a"]) + ....: .astype('category', categories=list('abcdef'), ordered=False)) + + In [57]: s + Out[57]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (6, object): [a, b, c, d, e, f] + + +.. _whatsnew_0160.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``Index.duplicated`` now returns ``np.array(dtype=bool)`` rather than ``Index(dtype=object)`` containing ``bool`` values. (:issue:`8875`) +- ``DataFrame.to_json`` now returns accurate type serialisation for each column for frames of mixed dtype (:issue:`9037`) + + Previously data was coerced to a common dtype before serialisation, which for + example resulted in integers being serialised to floats: + + .. code-block:: ipython + + In [2]: pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json() + Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1.0,"1":2.0}}' + + Now each column is serialised using its correct dtype: + + .. code-block:: ipython + + In [2]: pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json() + Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1,"1":2}}' + +- ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex.summary`` now output the same format. (:issue:`9116`) +- ``TimedeltaIndex.freqstr`` now output the same string format as ``DatetimeIndex``. (:issue:`9116`) + +- Bar and horizontal bar plots no longer add a dashed line along the info axis. The prior style can be achieved with matplotlib's ``axhline`` or ``axvline`` methods (:issue:`9088`). + +- ``Series`` accessors ``.dt``, ``.cat`` and ``.str`` now raise ``AttributeError`` instead of ``TypeError`` if the series does not contain the appropriate type of data (:issue:`9617`). This follows Python's built-in exception hierarchy more closely and ensures that tests like ``hasattr(s, 'cat')`` are consistent on both Python 2 and 3. + +- ``Series`` now supports bitwise operation for integral types (:issue:`9016`). Previously even if the input dtypes were integral, the output dtype was coerced to ``bool``. + + Previous behavior + + .. code-block:: ipython + + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) + Out[2]: + a True + b True + c True + d True + dtype: bool + + New behavior. If the input dtypes are integral, the output dtype is also integral and the output + values are the result of the bitwise operation. + + .. code-block:: ipython + + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) + Out[2]: + a 4 + b 5 + c 6 + d 7 + dtype: int64 + + +- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`) + + Previous behavior + + .. code-block:: ipython + + In [2]: p = pd.Series([0, 1]) + + In [3]: p / 0 + Out[3]: + 0 inf + 1 inf + dtype: float64 + + In [4]: p // 0 + Out[4]: + 0 inf + 1 inf + dtype: float64 + + + + New behavior + + .. ipython:: python + + p = pd.Series([0, 1]) + p / 0 + p // 0 + +- ``Series.values_counts`` and ``Series.describe`` for categorical data will now put ``NaN`` entries at the end. (:issue:`9443`) +- ``Series.describe`` for categorical data will now give counts and frequencies of 0, not ``NaN``, for unused categories (:issue:`9443`) + +- Due to a bug fix, looking up a partial string label with ``DatetimeIndex.asof`` now includes values that match the string, even if they are after the start of the partial string label (:issue:`9258`). + + Old behavior: + + .. code-block:: ipython + + In [4]: pd.to_datetime(['2000-01-31', '2000-02-28']).asof('2000-02') + Out[4]: Timestamp('2000-01-31 00:00:00') + + Fixed behavior: + + .. ipython:: python + + pd.to_datetime(['2000-01-31', '2000-02-28']).asof('2000-02') + + To reproduce the old behavior, simply add more precision to the label (e.g., use ``2000-02-01`` instead of ``2000-02``). + + +.. _whatsnew_0160.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The ``rplot`` trellis plotting interface is deprecated and will be removed + in a future version. We refer to external packages like + `seaborn `_ for similar + but more refined functionality (:issue:`3445`). + The documentation includes some examples how to convert your existing code + from ``rplot`` to seaborn `here `__. + +- The ``pandas.sandbox.qtpandas`` interface is deprecated and will be removed in a future version. + We refer users to the external package `pandas-qt `_. (:issue:`9615`) + +- The ``pandas.rpy`` interface is deprecated and will be removed in a future version. + Similar functionality can be accessed through the `rpy2 `_ project (:issue:`9602`) + +- Adding ``DatetimeIndex/PeriodIndex`` to another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to a ``TypeError`` in a future version. ``.union()`` should be used for the union set operation. (:issue:`9094`) +- Subtracting ``DatetimeIndex/PeriodIndex`` from another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to an actual numeric subtraction yielding a ``TimeDeltaIndex`` in a future version. ``.difference()`` should be used for the differencing set operation. (:issue:`9094`) + + +.. _whatsnew_0160.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.pivot_table`` and ``crosstab``'s ``rows`` and ``cols`` keyword arguments were removed in favor + of ``index`` and ``columns`` (:issue:`6581`) +- ``DataFrame.to_excel`` and ``DataFrame.to_csv`` ``cols`` keyword argument was removed in favor of ``columns`` (:issue:`6581`) +- Removed ``convert_dummies`` in favor of ``get_dummies`` (:issue:`6581`) +- Removed ``value_range`` in favor of ``describe`` (:issue:`6581`) + +.. _whatsnew_0160.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Fixed a performance regression for ``.loc`` indexing with an array or list-like (:issue:`9126`:). +- ``DataFrame.to_json`` 30x performance improvement for mixed dtype frames. (:issue:`9037`) +- Performance improvements in ``MultiIndex.duplicated`` by working with labels instead of values (:issue:`9125`) +- Improved the speed of ``nunique`` by calling ``unique`` instead of ``value_counts`` (:issue:`9129`, :issue:`7771`) +- Performance improvement of up to 10x in ``DataFrame.count`` and ``DataFrame.dropna`` by taking advantage of homogeneous/heterogeneous dtypes appropriately (:issue:`9136`) +- Performance improvement of up to 20x in ``DataFrame.count`` when using a ``MultiIndex`` and the ``level`` keyword argument (:issue:`9163`) +- Performance and memory usage improvements in ``merge`` when key space exceeds ``int64`` bounds (:issue:`9151`) +- Performance improvements in multi-key ``groupby`` (:issue:`9429`) +- Performance improvements in ``MultiIndex.sortlevel`` (:issue:`9445`) +- Performance and memory usage improvements in ``DataFrame.duplicated`` (:issue:`9398`) +- Cythonized ``Period`` (:issue:`9440`) +- Decreased memory usage on ``to_hdf`` (:issue:`9648`) + +.. _whatsnew_0160.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Changed ``.to_html`` to remove leading/trailing spaces in table body (:issue:`4987`) +- Fixed issue using ``read_csv`` on s3 with Python 3 (:issue:`9452`) +- Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`) +- Bug in Panel indexing with an object-like (:issue:`9140`) +- Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`) +- Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`) +- Bug in partial setting with a DatetimeIndex (:issue:`9478`) +- Bug in groupby for integer and datetime64 columns when applying an aggregator that caused the value to be + changed when the number was sufficiently large (:issue:`9311`, :issue:`6620`) +- Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime + column with timezone info) to the appropriate sqlalchemy type (:issue:`9085`). +- Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated + SQLAlchemy type (:issue:`9083`). +- Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`) +- Incorrect dtypes inferred on datetimelike looking ``Series`` & on ``.xs`` slices (:issue:`9477`) +- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas. +- Fixed bug on big endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`). +- Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`, :issue:`5873`) +- Bug in ``pivot`` and ``unstack`` where ``nan`` values would break index alignment (:issue:`4862`, :issue:`7401`, :issue:`7403`, :issue:`7405`, :issue:`7466`, :issue:`9497`) +- Bug in left ``join`` on MultiIndex with ``sort=True`` or null values (:issue:`9210`). +- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`). +- Bug in ``groupby`` when key space exceeds ``int64`` bounds (:issue:`9096`). +- Bug in ``unstack`` with ``TimedeltaIndex`` or ``DatetimeIndex`` and nulls (:issue:`9491`). +- Bug in ``rank`` where comparing floats with tolerance will cause inconsistent behaviour (:issue:`8365`). +- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`). +- Bug in adding ``offsets.Nano`` to other offsets raises ``TypeError`` (:issue:`9284`) +- Bug in ``DatetimeIndex`` iteration, related to (:issue:`8890`), fixed in (:issue:`9100`) +- Bugs in ``resample`` around DST transitions. This required fixing offset classes so they behave correctly on DST transitions. (:issue:`5172`, :issue:`8744`, :issue:`8653`, :issue:`9173`, :issue:`9468`). +- Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`). +- Bug in boxplot, scatter and hexbin plot may show an unnecessary warning (:issue:`8877`) +- Bug in subplot with ``layout`` kw may show unnecessary warning (:issue:`9464`) +- Bug in using grouper functions that need passed through arguments (e.g. axis), when using wrapped function (e.g. ``fillna``), (:issue:`9221`) +- ``DataFrame`` now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`) +- Bug in ``read_csv`` when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`) +- ``isnull`` now detects ``NaT`` in ``PeriodIndex`` (:issue:`9129`) +- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`) +- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`) +- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`) +- Accessing ``Series.str`` methods on with non-string values now raises ``TypeError`` instead of producing incorrect results (:issue:`9184`) +- Bug in ``DatetimeIndex.__contains__`` when index has duplicates and is not monotonic increasing (:issue:`9512`) +- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`) +- Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format was applied. This prevented other row or column formatting being applied. (:issue:`9167`) +- Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`) +- Bug where ``wide_to_long`` would modify the input stub names list (:issue:`9204`) +- Bug in ``to_sql`` not storing float64 values using double precision. (:issue:`9009`) +- ``SparseSeries`` and ``SparsePanel`` now accept zero argument constructors (same as their non-sparse counterparts) (:issue:`9272`). +- Regression in merging ``Categorical`` and ``object`` dtypes (:issue:`9426`) +- Bug in ``read_csv`` with buffer overflows with certain malformed input files (:issue:`9205`) +- Bug in groupby MultiIndex with missing pair (:issue:`9049`, :issue:`9344`) +- Fixed bug in ``Series.groupby`` where grouping on ``MultiIndex`` levels would ignore the sort argument (:issue:`9444`) +- Fix bug in ``DataFrame.Groupby`` where ``sort=False`` is ignored in the case of Categorical columns. (:issue:`8868`) +- Fixed bug with reading CSV files from Amazon S3 on python 3 raising a TypeError (:issue:`9452`) +- Bug in the Google BigQuery reader where the 'jobComplete' key may be present but False in the query results (:issue:`8728`) +- Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) +- Fixed missing numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) +- Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`) +- ``Series`` text representation disconnected from ``max_rows``/``max_columns`` (:issue:`7508`). + +\ + +- ``Series`` number formatting inconsistent when truncated (:issue:`8532`). + + Previous behavior + + .. code-block:: python + + In [2]: pd.options.display.max_rows = 10 + In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10) + In [4]: s + Out[4]: + 0 1 + 1 1 + 2 1 + ... + 127 0.9999 + 128 1.0000 + 129 1.0000 + Length: 130, dtype: float64 + + New behavior + + .. code-block:: python + + 0 1.0000 + 1 1.0000 + 2 1.0000 + 3 1.0000 + 4 1.0000 + ... + 125 1.0000 + 126 1.0000 + 127 0.9999 + 128 1.0000 + 129 1.0000 + dtype: float64 + +- A Spurious ``SettingWithCopy`` Warning was generated when setting a new item in a frame in some cases (:issue:`8730`) + + The following would previously report a ``SettingWithCopy`` Warning. + + .. ipython:: python + + df1 = pd.DataFrame({'x': pd.Series(['a', 'b', 'c']), + 'y': pd.Series(['d', 'e', 'f'])}) + df2 = df1[['x']] + df2['y'] = ['g', 'h', 'i'] + + +.. _whatsnew_0.16.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.2..v0.16.0 diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst new file mode 100644 index 00000000..cbf5b770 --- /dev/null +++ b/doc/source/whatsnew/v0.16.1.rst @@ -0,0 +1,480 @@ +.. _whatsnew_0161: + +Version 0.16.1 (May 11, 2015) +----------------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.16.0 and includes a large number of +bug fixes along several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` +- New section on how-to-contribute to *pandas*, see :ref:`here ` +- Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here ` +- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here ` +- The default ``Index`` printing has changed to a more uniform format, see :ref:`here ` +- ``BusinessHour`` datetime-offset is now supported, see :ref:`here ` + +- Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here ` + +.. contents:: What's new in v0.16.1 + :local: + :backlinks: none + +.. _whatsnew_0161.enhancements: + +.. warning:: + + In pandas 0.17.0, the sub-package ``pandas.io.data`` will be removed in favor of a separately installable package (:issue:`8961`). + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0161.enhancements.categoricalindex: + +CategoricalIndex +^^^^^^^^^^^^^^^^ + +We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0) +and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, +setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': np.arange(6), + ...: 'B': pd.Series(list('aabbca')) + ...: .astype('category', categories=list('cab')) + ...: }) + ...: + + In [2]: df + Out[2]: + A B + 0 0 a + 1 1 a + 2 2 b + 3 3 b + 4 4 c + 5 5 a + + In [3]: df.dtypes + Out[3]: + A int64 + B category + dtype: object + + In [4]: df.B.cat.categories + Out[4]: Index(['c', 'a', 'b'], dtype='object') + + +setting the index, will create a ``CategoricalIndex`` + +.. code-block:: ipython + + In [5]: df2 = df.set_index('B') + + In [6]: df2.index + Out[6]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + +indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates. +The indexers MUST be in the category or the operation will raise. + +.. code-block:: ipython + + In [7]: df2.loc['a'] + Out[7]: + A + B + a 0 + a 1 + a 5 + +and preserves the ``CategoricalIndex`` + +.. code-block:: ipython + + In [8]: df2.loc['a'].index + Out[8]: CategoricalIndex(['a', 'a', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + + +sorting will order by the order of the categories + +.. code-block:: ipython + + In [9]: df2.sort_index() + Out[9]: + A + B + c 4 + a 0 + a 1 + a 5 + b 2 + b 3 + +groupby operations on the index will preserve the index nature as well + +.. code-block:: ipython + + In [10]: df2.groupby(level=0).sum() + Out[10]: + A + B + c 4 + a 6 + b 5 + + In [11]: df2.groupby(level=0).sum().index + Out[11]: CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + + +reindexing operations, will return a resulting index based on the type of the passed +indexer, meaning that passing a list will return a plain-old-``Index``; indexing with +a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with +values NOT in the categories, similarly to how you can reindex ANY pandas index. + +.. code-block:: ipython + + In [12]: df2.reindex(['a', 'e']) + Out[12]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [13]: df2.reindex(['a', 'e']).index + Out[13]: pd.Index(['a', 'a', 'a', 'e'], dtype='object', name='B') + + In [14]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))) + Out[14]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [15]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index + Out[15]: pd.CategoricalIndex(['a', 'a', 'a', 'e'], + categories=['a', 'b', 'c', 'd', 'e'], + ordered=False, name='B', + dtype='category') + +See the :ref:`documentation ` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`) + +.. _whatsnew_0161.enhancements.sample: + +Sample +^^^^^^ + +Series, DataFrames, and Panels now have a new method: :meth:`~pandas.DataFrame.sample`. +The method accepts a specific number of rows or columns to return, or a fraction of the +total number or rows or columns. It also has options for sampling with or without replacement, +for passing in a column for weights for non-uniform sampling, and for setting seed values to +facilitate replication. (:issue:`2419`) + +.. ipython:: python + + example_series = pd.Series([0, 1, 2, 3, 4, 5]) + + # When no arguments are passed, returns 1 + example_series.sample() + + # One may specify either a number of rows: + example_series.sample(n=3) + + # Or a fraction of the rows: + example_series.sample(frac=0.5) + + # weights are accepted. + example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4] + example_series.sample(n=3, weights=example_weights) + + # weights will also be normalized if they do not sum to one, + # and missing values will be treated as zeros. + example_weights2 = [0.5, 0, 0, 0, None, np.nan] + example_series.sample(n=1, weights=example_weights2) + + +When applied to a DataFrame, one may pass the name of a column to specify sampling weights +when sampling from rows. + +.. ipython:: python + + df = pd.DataFrame({"col1": [9, 8, 7, 6], "weight_column": [0.5, 0.4, 0.1, 0]}) + df.sample(n=3, weights="weight_column") + + +.. _whatsnew_0161.enhancements.string: + +String methods enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`Continuing from v0.16.0 `, the following +enhancements make string operations easier and more consistent with standard python string operations. + + +- Added ``StringMethods`` (``.str`` accessor) to ``Index`` (:issue:`9068`) + + The ``.str`` accessor is now available for both ``Series`` and ``Index``. + + .. ipython:: python + + idx = pd.Index([" jack", "jill ", " jesse ", "frank"]) + idx.str.strip() + + One special case for the ``.str`` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor + will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression + to work naturally: + + .. ipython:: python + + idx = pd.Index(["a1", "a2", "b1", "b2"]) + s = pd.Series(range(4), index=idx) + s + idx.str.startswith("a") + s[s.index.str.startswith("a")] + +- The following new methods are accessible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`) + + ================ =============== =============== =============== ================ + .. .. Methods .. .. + ================ =============== =============== =============== ================ + ``capitalize()`` ``swapcase()`` ``normalize()`` ``partition()`` ``rpartition()`` + ``index()`` ``rindex()`` ``translate()`` + ================ =============== =============== =============== ================ + +- ``split`` now takes ``expand`` keyword to specify whether to expand dimensionality. ``return_type`` is deprecated. (:issue:`9847`) + + .. ipython:: python + + s = pd.Series(["a,b", "a,c", "b,c"]) + + # return Series + s.str.split(",") + + # return DataFrame + s.str.split(",", expand=True) + + idx = pd.Index(["a,b", "a,c", "b,c"]) + + # return Index + idx.str.split(",") + + # return MultiIndex + idx.str.split(",", expand=True) + + +- Improved ``extract`` and ``get_dummies`` methods for ``Index.str`` (:issue:`9980`) + + +.. _whatsnew_0161.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``BusinessHour`` offset is now supported, which represents business hours starting from 09:00 - 17:00 on ``BusinessDay`` by default. See :ref:`Here ` for details. (:issue:`7905`) + + .. ipython:: python + + pd.Timestamp("2014-08-01 09:00") + pd.tseries.offsets.BusinessHour() + pd.Timestamp("2014-08-01 07:00") + pd.tseries.offsets.BusinessHour() + pd.Timestamp("2014-08-01 16:30") + pd.tseries.offsets.BusinessHour() + +- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`) + +- Allow ``clip``, ``clip_lower``, and ``clip_upper`` to accept array-like arguments as thresholds (This is a regression from 0.11.0). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). (:issue:`6966`) + +- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`) + +- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`) + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(3, 3), columns=["A", "B", "C"]) + df.drop(["A", "X"], axis=1, errors="ignore") + +- Add support for separating years and quarters using dashes, for + example 2014-Q1. (:issue:`9688`) + +- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`) +- ``get_dummies`` function now accepts ``sparse`` keyword. If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`) +- ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`) + +- Allow timedelta string conversion when leading zero is missing from time definition, ie ``0:00:00`` vs ``00:00:00``. (:issue:`9570`) +- Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`) + +- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`) +- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`) + +- Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`) +- Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`) + +- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here ` + +- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`) + + +.. _whatsnew_0161.api: + +API changes +~~~~~~~~~~~ + +- When passing in an ax to ``df.plot( ..., ax=ax)``, the ``sharex`` kwarg will now default to ``False``. + The result is that the visibility of xlabels and xticklabels will not anymore be changed. You + have to do that by yourself for the right axes in your figure or set ``sharex=True`` explicitly + (but this changes the visible for all axes in the figure, not only the one which is passed in!). + If pandas creates the subplots itself (e.g. no passed in ``ax`` kwarg), then the + default is still ``sharex=True`` and the visibility changes are applied. + +- :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously + the order was arbitrary. (:issue:`9777`) + +- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`) + +.. _whatsnew_0161.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- ``Series.str.split``'s ``return_type`` keyword was removed in favor of ``expand`` (:issue:`9847`) + + +.. _whatsnew_0161.index_repr: + +Index representation +~~~~~~~~~~~~~~~~~~~~ + +The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanged (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`) + +Previous behavior + +.. code-block:: ipython + + In [2]: pd.Index(range(4), name='foo') + Out[2]: Int64Index([0, 1, 2, 3], dtype='int64') + + In [3]: pd.Index(range(104), name='foo') + Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64') + + In [4]: pd.date_range('20130101', periods=4, name='foo', tz='US/Eastern') + Out[4]: + + [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00] + Length: 4, Freq: D, Timezone: US/Eastern + + In [5]: pd.date_range('20130101', periods=104, name='foo', tz='US/Eastern') + Out[5]: + + [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00] + Length: 104, Freq: D, Timezone: US/Eastern + +New behavior + +.. ipython:: python + + pd.set_option("display.width", 80) + pd.Index(range(4), name="foo") + pd.Index(range(30), name="foo") + pd.Index(range(104), name="foo") + pd.CategoricalIndex(["a", "bb", "ccc", "dddd"], ordered=True, name="foobar") + pd.CategoricalIndex(["a", "bb", "ccc", "dddd"] * 10, ordered=True, name="foobar") + pd.CategoricalIndex(["a", "bb", "ccc", "dddd"] * 100, ordered=True, name="foobar") + pd.date_range("20130101", periods=4, name="foo", tz="US/Eastern") + pd.date_range("20130101", periods=25, freq="D") + pd.date_range("20130101", periods=104, name="foo", tz="US/Eastern") + + +.. _whatsnew_0161.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved csv write performance with mixed dtypes, including datetimes by up to 5x (:issue:`9940`) +- Improved csv write performance generally by 2x (:issue:`9940`) +- Improved the performance of ``pd.lib.max_len_string_array`` by 5-7x (:issue:`10024`) + + +.. _whatsnew_0161.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug where labels did not appear properly in the legend of ``DataFrame.plot()``, passing ``label=`` arguments works, and Series indices are no longer mutated. (:issue:`9542`) +- Bug in json serialization causing a segfault when a frame had zero length. (:issue:`9805`) +- Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`) +- Bug in retaining index name on appending (:issue:`9862`) +- Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`) +- Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`). +- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`) +- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`) +- Bug in grouping with multiple ``pd.Grouper`` where one is non-time based (:issue:`10063`) +- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`) +- Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`) +- Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`) +- Bug with ``TimedeltaIndex`` constructor ignoring ``name`` when given another ``TimedeltaIndex`` as data (:issue:`10025`). +- Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`) +- Bug in ``.loc`` with a read-only ndarray data source (:issue:`10043`) +- Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`) +- Always use temporary files in pytables tests (:issue:`9992`) +- Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`) +- Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns (:issue:`9853`) +- Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`) +- Bug in ``setup.py`` that would allow an incompat cython version to build (:issue:`9827`) +- Bug in plotting ``secondary_y`` incorrectly attaches ``right_ax`` property to secondary axes specifying itself recursively. (:issue:`9861`) +- Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`) +- Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`) +- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`) +- Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`) +- Not showing a ``DeprecationWarning`` on combining list-likes with an ``Index`` (:issue:`10083`) +- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`) +- Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`) +- Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9785`) +- Bug in which ``SparseDataFrame`` could not take ``nan`` as a column name (:issue:`8822`) +- Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`) +- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`) +- Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`) +- Bug in csv parser causing lines with initial white space plus one non-space character to be skipped. (:issue:`9710`) +- Bug in C csv parser causing spurious NaNs when data started with newline followed by white space. (:issue:`10022`) +- Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`) +- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`) +- Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`) +- Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`) +- Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`) +- Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`) +- Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9856`) +- Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`) +- Bug in ``DataFrame`` constructor when ``columns`` parameter is set, and ``data`` is an empty list (:issue:`9939`) +- Bug in bar plot with ``log=True`` raises ``TypeError`` if all values are less than 1 (:issue:`9905`) +- Bug in horizontal bar plot ignores ``log=True`` (:issue:`9905`) +- Bug in PyTables queries that did not return proper results using the index (:issue:`8265`, :issue:`9676`) +- Bug where dividing a dataframe containing values of type ``Decimal`` by another ``Decimal`` would raise. (:issue:`9787`) +- Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`) +- Bug causing extra index point when resample BM/BQ (:issue:`9756`) +- Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`) +- Fixed latex output for MultiIndexed dataframes (:issue:`9778`) +- Bug causing an exception when setting an empty range using ``DataFrame.loc`` (:issue:`9596`) +- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`) +- Bug in ``transform`` and ``filter`` when grouping on a categorical variable (:issue:`9921`) +- Bug in ``transform`` when groups are equal in number and dtype to the input index (:issue:`9700`) +- Google BigQuery connector now imports dependencies on a per-method basis.(:issue:`9713`) +- Updated BigQuery connector to no longer use deprecated ``oauth2client.tools.run()`` (:issue:`8327`) +- Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`) +- Bug in ``.median()`` where non-float null values are not handled correctly (:issue:`10040`) +- Bug in Series.fillna() where it raises if a numerically convertible string is given (:issue:`10092`) + + +.. _whatsnew_0.16.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.0..v0.16.1 diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst new file mode 100644 index 00000000..c6c134a3 --- /dev/null +++ b/doc/source/whatsnew/v0.16.2.rst @@ -0,0 +1,181 @@ +.. _whatsnew_0162: + +Version 0.16.2 (June 12, 2015) +------------------------------ + +{{ header }} + + +This is a minor bug-fix release from 0.16.1 and includes a large number of +bug fixes along some new features (:meth:`~DataFrame.pipe` method), enhancements, and performance improvements. + +We recommend that all users upgrade to this version. + +Highlights include: + +- A new ``pipe`` method, see :ref:`here ` +- Documentation on how to use numba_ with *pandas*, see :ref:`here ` + + +.. contents:: What's new in v0.16.2 + :local: + :backlinks: none + +.. _numba: http://numba.pydata.org + +.. _whatsnew_0162.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0162.enhancements.pipe: + +Pipe +^^^^ + +We've introduced a new method :meth:`DataFrame.pipe`. As suggested by the name, ``pipe`` +should be used to pipe data through a chain of function calls. +The goal is to avoid confusing nested function calls like + +.. code-block:: python + + # df is a DataFrame + # f, g, and h are functions that take and return DataFrames + f(g(h(df), arg1=1), arg2=2, arg3=3) # noqa F821 + +The logic flows from inside out, and function names are separated from their keyword arguments. +This can be rewritten as + +.. code-block:: python + + ( + df.pipe(h) # noqa F821 + .pipe(g, arg1=1) # noqa F821 + .pipe(f, arg2=2, arg3=3) # noqa F821 + ) + +Now both the code and the logic flow from top to bottom. Keyword arguments are next to +their functions. Overall the code is much more readable. + +In the example above, the functions ``f``, ``g``, and ``h`` each expected the DataFrame as the first positional argument. +When the function you wish to apply takes its data anywhere other than the first argument, pass a tuple +of ``(function, keyword)`` indicating where the DataFrame should flow. For example: + +.. ipython:: python + :okwarning: + + import statsmodels.formula.api as sm + + bb = pd.read_csv("data/baseball.csv", index_col="id") + + # sm.ols takes (formula, data) + ( + bb.query("h > 0") + .assign(ln_h=lambda df: np.log(df.h)) + .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)") + .fit() + .summary() + ) + +The pipe method is inspired by unix pipes, which stream text through +processes. More recently dplyr_ and magrittr_ have introduced the +popular ``(%>%)`` pipe operator for R_. + +See the :ref:`documentation ` for more. (:issue:`10129`) + +.. _dplyr: https://github.com/tidyverse/dplyr +.. _magrittr: https://github.com/smbache/magrittr +.. _R: http://www.r-project.org + +.. _whatsnew_0162.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Added ``rsplit`` to Index/Series StringMethods (:issue:`10303`) + +- Removed the hard-coded size limits on the ``DataFrame`` HTML representation + in the IPython notebook, and leave this to IPython itself (only for IPython + v3.0 or greater). This eliminates the duplicate scroll bars that appeared in + the notebook with large frames (:issue:`10231`). + + Note that the notebook has a ``toggle output scrolling`` feature to limit the + display of very large frames (by clicking left of the output). + You can also configure the way DataFrames are displayed using the pandas + options, see here :ref:`here `. + +- ``axis`` parameter of ``DataFrame.quantile`` now accepts also ``index`` and ``column``. (:issue:`9543`) + +.. _whatsnew_0162.api: + +API changes +~~~~~~~~~~~ + +- ``Holiday`` now raises ``NotImplementedError`` if both ``offset`` and ``observance`` are used in the constructor instead of returning an incorrect result (:issue:`10217`). + + +.. _whatsnew_0162.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved ``Series.resample`` performance with ``dtype=datetime64[ns]`` (:issue:`7754`) +- Increase performance of ``str.split`` when ``expand=True`` (:issue:`10081`) + +.. _whatsnew_0162.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in ``Series.hist`` raises an error when a one row ``Series`` was given (:issue:`10214`) +- Bug where ``HDFStore.select`` modifies the passed columns list (:issue:`7212`) +- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`) +- Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`) +- Bug where some of the nan functions do not have consistent return dtypes (:issue:`10251`) +- Bug in ``DataFrame.quantile`` on checking that a valid axis was passed (:issue:`9543`) +- Bug in ``groupby.apply`` aggregation for ``Categorical`` not preserving categories (:issue:`10138`) +- Bug in ``to_csv`` where ``date_format`` is ignored if the ``datetime`` is fractional (:issue:`10209`) +- Bug in ``DataFrame.to_json`` with mixed data types (:issue:`10289`) +- Bug in cache updating when consolidating (:issue:`10264`) +- Bug in ``mean()`` where integer dtypes can overflow (:issue:`10172`) +- Bug where ``Panel.from_dict`` does not set dtype when specified (:issue:`10058`) +- Bug in ``Index.union`` raises ``AttributeError`` when passing array-likes. (:issue:`10149`) +- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`) +- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`) +- Bug in Index repr when using the ``max_seq_items=None`` setting (:issue:`10182`). +- Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`) +- Bug in displaying datetimes with mixed frequencies; display 'ms' datetimes to the proper precision. (:issue:`10170`) +- Bug in ``setitem`` where type promotion is applied to the entire block (:issue:`10280`) +- Bug in ``Series`` arithmetic methods may incorrectly hold names (:issue:`10068`) +- Bug in ``GroupBy.get_group`` when grouping on multiple keys, one of which is categorical. (:issue:`10132`) +- Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetic ( :issue:`9926`) +- Bug in ``DataFrame`` construction from nested ``dict`` with ``datetime64`` (:issue:`10160`) +- Bug in ``Series`` construction from ``dict`` with ``datetime64`` keys (:issue:`9456`) +- Bug in ``Series.plot(label="LABEL")`` not correctly setting the label (:issue:`10119`) +- Bug in ``plot`` not defaulting to matplotlib ``axes.grid`` setting (:issue:`9792`) +- Bug causing strings containing an exponent, but no decimal to be parsed as ``int`` instead of ``float`` in ``engine='python'`` for the ``read_csv`` parser (:issue:`9565`) +- Bug in ``Series.align`` resets ``name`` when ``fill_value`` is specified (:issue:`10067`) +- Bug in ``read_csv`` causing index name not to be set on an empty DataFrame (:issue:`10184`) +- Bug in ``SparseSeries.abs`` resets ``name`` (:issue:`10241`) +- Bug in ``TimedeltaIndex`` slicing may reset freq (:issue:`10292`) +- Bug in ``GroupBy.get_group`` raises ``ValueError`` when group key contains ``NaT`` (:issue:`6992`) +- Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`) +- Bug in ``Categorical.remove_categories`` causing a ``ValueError`` when removing the ``NaN`` category if underlying dtype is floating-point (:issue:`10156`) +- Bug where infer_freq infers time rule (WOM-5XXX) unsupported by to_offset (:issue:`9425`) +- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`) +- Bug to handle masking empty ``DataFrame`` (:issue:`10126`). +- Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`) +- Bug in ``read_csv`` with a ``date_parser`` that returned a ``datetime64`` array of other time resolution than ``[ns]`` (:issue:`10245`) +- Bug in ``Panel.apply`` when the result has ndim=0 (:issue:`10332`) +- Bug in ``read_hdf`` where ``auto_close`` could not be passed (:issue:`9327`). +- Bug in ``read_hdf`` where open stores could not be used (:issue:`10330`). +- Bug in adding empty ``DataFrames``, now results in a ``DataFrame`` that ``.equals`` an empty ``DataFrame`` (:issue:`10181`). +- Bug in ``to_hdf`` and ``HDFStore`` which did not check that complib choices were valid (:issue:`4582`, :issue:`8874`). + + +.. _whatsnew_0.16.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.1..v0.16.2 diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst new file mode 100644 index 00000000..70674076 --- /dev/null +++ b/doc/source/whatsnew/v0.17.0.rst @@ -0,0 +1,1177 @@ +.. _whatsnew_0170: + +Version 0.17.0 (October 9, 2015) +-------------------------------- + +{{ header }} + + +This is a major release from 0.16.2 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.17.0 will no longer support compatibility with Python version 3.2 (:issue:`9118`) + +.. warning:: + + The ``pandas.io.data`` package is deprecated and will be replaced by the + `pandas-datareader package `_. + This will allow the data modules to be independently updated to your pandas + installation. The API for ``pandas-datareader v0.1.1`` is exactly the same + as in ``pandas v0.17.0`` (:issue:`8961`, :issue:`10861`). + + After installing pandas-datareader, you can easily change your imports: + + .. code-block:: python + + from pandas.io import data, wb + + becomes + + .. code-block:: python + + from pandas_datareader import data, wb + +Highlights include: + +- Release the Global Interpreter Lock (GIL) on some cython operations, see :ref:`here ` +- Plotting methods are now available as attributes of the ``.plot`` accessor, see :ref:`here ` +- The sorting API has been revamped to remove some long-time inconsistencies, see :ref:`here ` +- Support for a ``datetime64[ns]`` with timezones as a first-class dtype, see :ref:`here ` +- The default for ``to_datetime`` will now be to ``raise`` when presented with unparsable formats, + previously this would return the original input. Also, date parse + functions now return consistent results. See :ref:`here ` +- The default for ``dropna`` in ``HDFStore`` has changed to ``False``, to store by default all rows even + if they are all ``NaN``, see :ref:`here ` +- Datetime accessor (``dt``) now supports ``Series.dt.strftime`` to generate formatted strings for datetime-likes, and ``Series.dt.total_seconds`` to generate each duration of the timedelta in seconds. See :ref:`here ` +- ``Period`` and ``PeriodIndex`` can handle multiplied freq like ``3D``, which corresponding to 3 days span. See :ref:`here ` +- Development installed versions of pandas will now have ``PEP440`` compliant version strings (:issue:`9518`) +- Development support for benchmarking with the `Air Speed Velocity library `_ (:issue:`8361`) +- Support for reading SAS xport files, see :ref:`here ` +- Documentation comparing SAS to *pandas*, see :ref:`here ` +- Removal of the automatic TimeSeries broadcasting, deprecated since 0.8.0, see :ref:`here ` +- Display format with plain text can optionally align with Unicode East Asian Width, see :ref:`here ` +- Compatibility with Python 3.5 (:issue:`11097`) +- Compatibility with matplotlib 1.5.0 (:issue:`11111`) + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.17.0 + :local: + :backlinks: none + +.. _whatsnew_0170.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0170.tz: + +Datetime with TZ +^^^^^^^^^^^^^^^^ + +We are adding an implementation that natively supports datetime with timezones. A ``Series`` or a ``DataFrame`` column previously +*could* be assigned a datetime with timezones, and would work as an ``object`` dtype. This had performance issues with a large +number rows. See the :ref:`docs ` for more details. (:issue:`8260`, :issue:`10763`, :issue:`11034`). + +The new implementation allows for having a single-timezone across all rows, with operations in a performant manner. + +.. ipython:: python + + df = pd.DataFrame( + { + "A": pd.date_range("20130101", periods=3), + "B": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "C": pd.date_range("20130101", periods=3, tz="CET"), + } + ) + df + df.dtypes + +.. ipython:: python + + df.B + df.B.dt.tz_localize(None) + +This uses a new-dtype representation as well, that is very similar in look-and-feel to its numpy cousin ``datetime64[ns]`` + +.. ipython:: python + + df["B"].dtype + type(df["B"].dtype) + +.. note:: + + There is a slightly different string repr for the underlying ``DatetimeIndex`` as a result of the dtype changes, but + functionally these are the same. + + Previous behavior: + + .. code-block:: ipython + + In [1]: pd.date_range('20130101', periods=3, tz='US/Eastern') + Out[1]: DatetimeIndex(['2013-01-01 00:00:00-05:00', '2013-01-02 00:00:00-05:00', + '2013-01-03 00:00:00-05:00'], + dtype='datetime64[ns]', freq='D', tz='US/Eastern') + + In [2]: pd.date_range('20130101', periods=3, tz='US/Eastern').dtype + Out[2]: dtype('` by supplying the ``kind`` keyword arguments. Unfortunately, many of these kinds of plots use different required and optional keyword arguments, which makes it difficult to discover what any given plot kind uses out of the dozens of possible arguments. + +To alleviate this issue, we have added a new, optional plotting interface, which exposes each kind of plot as a method of the ``.plot`` attribute. Instead of writing ``series.plot(kind=, ...)``, you can now also use ``series.plot.(...)``: + +.. ipython:: + :verbatim: + + In [13]: df = pd.DataFrame(np.random.rand(10, 2), columns=['a', 'b']) + + In [14]: df.plot.bar() + +.. image:: ../_static/whatsnew_plot_submethods.png + +As a result of this change, these methods are now all discoverable via tab-completion: + +.. ipython:: + :verbatim: + + In [15]: df.plot. # noqa: E225, E999 + df.plot.area df.plot.barh df.plot.density df.plot.hist df.plot.line df.plot.scatter + df.plot.bar df.plot.box df.plot.hexbin df.plot.kde df.plot.pie + +Each method signature only includes relevant arguments. Currently, these are limited to required arguments, but in the future these will include optional arguments, as well. For an overview, see the new :ref:`api.dataframe.plotting` API documentation. + +.. _whatsnew_0170.strftime: + +Additional methods for ``dt`` accessor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Series.dt.strftime +"""""""""""""""""" + +We are now supporting a ``Series.dt.strftime`` method for datetime-likes to generate a formatted string (:issue:`10110`). Examples: + +.. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range("20130101", periods=4)) + s + s.dt.strftime("%Y/%m/%d") + +.. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range("20130101", periods=4)) + s + s.dt.strftime("%Y/%m/%d") + +The string format is as the python standard library and details can be found `here `_ + +Series.dt.total_seconds +""""""""""""""""""""""" + +``pd.Series`` of type ``timedelta64`` has new method ``.dt.total_seconds()`` returning the duration of the timedelta in seconds (:issue:`10817`) + +.. ipython:: python + + # TimedeltaIndex + s = pd.Series(pd.timedelta_range("1 minutes", periods=4)) + s + s.dt.total_seconds() + +.. _whatsnew_0170.periodfreq: + +Period frequency enhancement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Period``, ``PeriodIndex`` and ``period_range`` can now accept multiplied freq. Also, ``Period.freq`` and ``PeriodIndex.freq`` are now stored as a ``DateOffset`` instance like ``DatetimeIndex``, and not as ``str`` (:issue:`7811`) + +A multiplied freq represents a span of corresponding length. The example below creates a period of 3 days. Addition and subtraction will shift the period by its span. + +.. ipython:: python + + p = pd.Period("2015-08-01", freq="3D") + p + p + 1 + p - 2 + p.to_timestamp() + p.to_timestamp(how="E") + +You can use the multiplied freq in ``PeriodIndex`` and ``period_range``. + +.. ipython:: python + + idx = pd.period_range("2015-08-01", periods=4, freq="2D") + idx + idx + 1 + +.. _whatsnew_0170.enhancements.sas_xport: + +Support for SAS XPORT files +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~pandas.io.read_sas` provides support for reading *SAS XPORT* format files. (:issue:`4052`). + +.. code-block:: python + + df = pd.read_sas("sas_xport.xpt") + +It is also possible to obtain an iterator and read an XPORT file +incrementally. + +.. code-block:: python + + for df in pd.read_sas("sas_xport.xpt", chunksize=10000): + do_something(df) + +See the :ref:`docs ` for more details. + +.. _whatsnew_0170.matheval: + +Support for math functions in .eval() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~pandas.eval` now supports calling math functions (:issue:`4893`) + +.. code-block:: python + + df = pd.DataFrame({"a": np.random.randn(10)}) + df.eval("b = sin(a)") + +The support math functions are ``sin``, ``cos``, ``exp``, ``log``, ``expm1``, ``log1p``, +``sqrt``, ``sinh``, ``cosh``, ``tanh``, ``arcsin``, ``arccos``, ``arctan``, ``arccosh``, +``arcsinh``, ``arctanh``, ``abs`` and ``arctan2``. + +These functions map to the intrinsics for the ``NumExpr`` engine. For the Python +engine, they are mapped to ``NumPy`` calls. + +Changes to Excel with ``MultiIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In version 0.16.2 a ``DataFrame`` with ``MultiIndex`` columns could not be written to Excel via ``to_excel``. +That functionality has been added (:issue:`10564`), along with updating ``read_excel`` so that the data can +be read back with, no loss of information, by specifying which columns/rows make up the ``MultiIndex`` +in the ``header`` and ``index_col`` parameters (:issue:`4679`) + +See the :ref:`documentation ` for more details. + +.. ipython:: python + + df = pd.DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_product( + [["foo", "bar"], ["a", "b"]], names=["col1", "col2"] + ), + index=pd.MultiIndex.from_product([["j"], ["l", "k"]], names=["i1", "i2"]), + ) + + df + df.to_excel("test.xlsx") + + df = pd.read_excel("test.xlsx", header=[0, 1], index_col=[0, 1]) + df + +.. ipython:: python + :suppress: + + import os + + os.remove("test.xlsx") + +Previously, it was necessary to specify the ``has_index_names`` argument in ``read_excel``, +if the serialized data had index names. For version 0.17.0 the output format of ``to_excel`` +has been changed to make this keyword unnecessary - the change is shown below. + +**Old** + +.. image:: ../_static/old-excel-index.png + +**New** + +.. image:: ../_static/new-excel-index.png + +.. warning:: + + Excel files saved in version 0.16.2 or prior that had index names will still able to be read in, + but the ``has_index_names`` argument must specified to ``True``. + +.. _whatsnew_0170.gbq: + +Google BigQuery enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Added ability to automatically create a table/dataset using the :func:`pandas.io.gbq.to_gbq` function if the destination table/dataset does not exist. (:issue:`8325`, :issue:`11121`). +- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the `docs `__ for more details (:issue:`8325`). +- ``InvalidColumnOrder`` and ``InvalidPageToken`` in the gbq module will raise ``ValueError`` instead of ``IOError``. +- The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`) +- The gbq module will now support Python 3 (:issue:`11094`). + +.. _whatsnew_0170.east_asian_width: + +Display alignment with Unicode East Asian width +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + + Enabling this option will affect the performance for printing of ``DataFrame`` and ``Series`` (about 2 times slower). + Use only when it is actually required. + +Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets. If a ``DataFrame`` or ``Series`` contains these characters, the default output cannot be aligned properly. The following options are added to enable precise handling for these characters. + +- ``display.unicode.east_asian_width``: Whether to use the Unicode East Asian Width to calculate the display text width. (:issue:`2612`) +- ``display.unicode.ambiguous_as_wide``: Whether to handle Unicode characters belong to Ambiguous as Wide. (:issue:`11102`) + +.. ipython:: python + + df = pd.DataFrame({u"国籍": ["UK", u"日本"], u"名前": ["Alice", u"しのぶ"]}) + df + +.. ipython:: python + + pd.set_option("display.unicode.east_asian_width", True) + df + +For further details, see :ref:`here ` + +.. ipython:: python + :suppress: + + pd.set_option("display.unicode.east_asian_width", False) + +.. _whatsnew_0170.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Support for ``openpyxl`` >= 2.2. The API for style support is now stable (:issue:`10125`) +- ``merge`` now accepts the argument ``indicator`` which adds a Categorical-type column (by default called ``_merge``) to the output object that takes on the values (:issue:`8790`) + + =================================== ================ + Observation Origin ``_merge`` value + =================================== ================ + Merge key only in ``'left'`` frame ``left_only`` + Merge key only in ``'right'`` frame ``right_only`` + Merge key in both frames ``both`` + =================================== ================ + + .. ipython:: python + + df1 = pd.DataFrame({"col1": [0, 1], "col_left": ["a", "b"]}) + df2 = pd.DataFrame({"col1": [1, 2, 2], "col_right": [2, 2, 2]}) + pd.merge(df1, df2, on="col1", how="outer", indicator=True) + + For more, see the :ref:`updated docs ` + +- ``pd.to_numeric`` is a new function to coerce strings to numbers (possibly with coercion) (:issue:`11133`) + +- ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). + +- ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`). + +- ``pd.concat`` will now use existing Series names if provided (:issue:`10698`). + + .. ipython:: python + + foo = pd.Series([1, 2], name="foo") + bar = pd.Series([1, 2]) + baz = pd.Series([4, 5]) + + Previous behavior: + + .. code-block:: ipython + + In [1]: pd.concat([foo, bar, baz], axis=1) + Out[1]: + 0 1 2 + 0 1 1 4 + 1 2 2 5 + + New behavior: + + .. ipython:: python + + pd.concat([foo, bar, baz], axis=1) + +- ``DataFrame`` has gained the ``nlargest`` and ``nsmallest`` methods (:issue:`10393`) + +- Add a ``limit_direction`` keyword argument that works with ``limit`` to enable ``interpolate`` to fill ``NaN`` values forward, backward, or both (:issue:`9218`, :issue:`10420`, :issue:`11115`) + + .. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) + ser.interpolate(limit=1, limit_direction="both") + +- Added a ``DataFrame.round`` method to round the values to a variable number of decimal places (:issue:`10568`). + + .. ipython:: python + + df = pd.DataFrame( + np.random.random([3, 3]), + columns=["A", "B", "C"], + index=["first", "second", "third"], + ) + df + df.round(2) + df.round({"A": 0, "C": 2}) + +- ``drop_duplicates`` and ``duplicated`` now accept a ``keep`` keyword to target first, last, and all duplicates. The ``take_last`` keyword is deprecated, see :ref:`here ` (:issue:`6511`, :issue:`8505`) + + .. ipython:: python + + s = pd.Series(["A", "B", "C", "A", "B", "D"]) + s.drop_duplicates() + s.drop_duplicates(keep="last") + s.drop_duplicates(keep=False) + +- Reindex now has a ``tolerance`` argument that allows for finer control of :ref:`basics.limits_on_reindex_fill` (:issue:`10411`): + + .. ipython:: python + + df = pd.DataFrame({"x": range(5), "t": pd.date_range("2000-01-01", periods=5)}) + df.reindex([0.1, 1.9, 3.5], method="nearest", tolerance=0.2) + + When used on a ``DatetimeIndex``, ``TimedeltaIndex`` or ``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. This allows you to specify tolerance with a string: + + .. ipython:: python + + df = df.set_index("t") + df.reindex(pd.to_datetime(["1999-12-31"]), method="nearest", tolerance="1 day") + + ``tolerance`` is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. + +- Added functionality to use the ``base`` argument when resampling a ``TimeDeltaIndex`` (:issue:`10530`) + +- ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`) + +- ``to_datetime`` can now accept the ``yearfirst`` keyword (:issue:`7599`) + +- ``pandas.tseries.offsets`` larger than the ``Day`` offset can now be used with a ``Series`` for addition/subtraction (:issue:`10699`). See the :ref:`docs ` for more details. + +- ``pd.Timedelta.total_seconds()`` now returns Timedelta duration to ns precision (previously microsecond precision) (:issue:`10939`) + +- ``PeriodIndex`` now supports arithmetic with ``np.ndarray`` (:issue:`10638`) + +- Support pickling of ``Period`` objects (:issue:`10439`) + +- ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) + +- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). + +- Enable reading gzip compressed files via URL, either by explicitly setting the compression parameter or by inferring from the presence of the HTTP Content-Encoding header in the response (:issue:`8685`) + +- Enable writing Excel files in :ref:`memory ` using StringIO/BytesIO (:issue:`7074`) + +- Enable serialization of lists and dicts to strings in ``ExcelWriter`` (:issue:`8188`) + +- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`) + +- ``pd.read_sql`` and ``to_sql`` can accept database URI as ``con`` parameter (:issue:`10214`) + +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). + +- Enable writing complex values to ``HDFStores`` when using the ``table`` format (:issue:`10447`) + +- Enable ``pd.read_hdf`` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`) + +- ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`) + +- ``msgpack`` submodule has been updated to 0.4.6 with backward compatibility (:issue:`10581`) + +- ``DataFrame.to_dict`` now accepts ``orient='index'`` keyword argument (:issue:`10844`). + +- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`). + +- Allow passing ``kwargs`` to the interpolation methods (:issue:`10378`). + +- Improved error message when concatenating an empty iterable of ``Dataframe`` objects (:issue:`9157`) + +- ``pd.read_csv`` can now read bz2-compressed files incrementally, and the C parser can read bz2-compressed files from AWS S3 (:issue:`11070`, :issue:`11072`). + +- In ``pd.read_csv``, recognize ``s3n://`` and ``s3a://`` URLs as designating S3 file storage (:issue:`11070`, :issue:`11071`). + +- Read CSV files from AWS S3 incrementally, instead of first downloading the entire file. (Full file download still required for compressed files in Python 2.) (:issue:`11070`, :issue:`11073`) + +- ``pd.read_csv`` is now able to infer compression type for files read from AWS S3 storage (:issue:`11070`, :issue:`11074`). + + +.. _whatsnew_0170.api: + +.. _whatsnew_0170.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0170.api_breaking.sorting: + +Changes to sorting API +^^^^^^^^^^^^^^^^^^^^^^ + +The sorting API has had some longtime inconsistencies. (:issue:`9816`, :issue:`8239`). + +Here is a summary of the API **PRIOR** to 0.17.0: + +- ``Series.sort`` is **INPLACE** while ``DataFrame.sort`` returns a new object. +- ``Series.order`` returns a new object +- It was possible to use ``Series/DataFrame.sort_index`` to sort by **values** by passing the ``by`` keyword. +- ``Series/DataFrame.sortlevel`` worked only on a ``MultiIndex`` for sorting by index. + +To address these issues, we have revamped the API: + +- We have introduced a new method, :meth:`DataFrame.sort_values`, which is the merger of ``DataFrame.sort()``, ``Series.sort()``, + and ``Series.order()``, to handle sorting of **values**. +- The existing methods ``Series.sort()``, ``Series.order()``, and ``DataFrame.sort()`` have been deprecated and will be removed in a + future version. +- The ``by`` argument of ``DataFrame.sort_index()`` has been deprecated and will be removed in a future version. +- The existing method ``.sort_index()`` will gain the ``level`` keyword to enable level sorting. + +We now have two distinct and non-overlapping methods of sorting. A ``*`` marks items that +will show a ``FutureWarning``. + +To sort by the **values**: + +================================== ==================================== +Previous Replacement +================================== ==================================== +\* ``Series.order()`` ``Series.sort_values()`` +\* ``Series.sort()`` ``Series.sort_values(inplace=True)`` +\* ``DataFrame.sort(columns=...)`` ``DataFrame.sort_values(by=...)`` +================================== ==================================== + +To sort by the **index**: + +================================== ==================================== +Previous Replacement +================================== ==================================== +``Series.sort_index()`` ``Series.sort_index()`` +``Series.sortlevel(level=...)`` ``Series.sort_index(level=...``) +``DataFrame.sort_index()`` ``DataFrame.sort_index()`` +``DataFrame.sortlevel(level=...)`` ``DataFrame.sort_index(level=...)`` +\* ``DataFrame.sort()`` ``DataFrame.sort_index()`` +================================== ==================================== + +We have also deprecated and changed similar methods in two Series-like classes, ``Index`` and ``Categorical``. + +================================== ==================================== +Previous Replacement +================================== ==================================== +\* ``Index.order()`` ``Index.sort_values()`` +\* ``Categorical.order()`` ``Categorical.sort_values()`` +================================== ==================================== + +.. _whatsnew_0170.api_breaking.to_datetime: + +Changes to to_datetime and to_timedelta +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Error handling +"""""""""""""" + +The default for ``pd.to_datetime`` error handling has changed to ``errors='raise'``. +In prior versions it was ``errors='ignore'``. Furthermore, the ``coerce`` argument +has been deprecated in favor of ``errors='coerce'``. This means that invalid parsing +will raise rather that return the original input as in previous versions. (:issue:`10636`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.to_datetime(['2009-07-31', 'asd']) + Out[2]: array(['2009-07-31', 'asd'], dtype=object) + +New behavior: + +.. code-block:: ipython + + In [3]: pd.to_datetime(['2009-07-31', 'asd']) + ValueError: Unknown string format + +Of course you can coerce this as well. + +.. ipython:: python + + pd.to_datetime(["2009-07-31", "asd"], errors="coerce") + +To keep the previous behavior, you can use ``errors='ignore'``: + +.. ipython:: python + + pd.to_datetime(["2009-07-31", "asd"], errors="ignore") + +Furthermore, ``pd.to_timedelta`` has gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword +has been deprecated in favor of ``errors='coerce'``. + +Consistent parsing +"""""""""""""""""" + +The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has +been made consistent. (:issue:`7599`) + +Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` +uses the beginning of the year. ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` +can parse, such as a quarterly string. + +Previous behavior: + +.. code-block:: ipython + + In [1]: pd.Timestamp('2012Q2') + Traceback + ... + ValueError: Unable to parse 2012Q2 + + # Results in today's date. + In [2]: pd.Timestamp('2014') + Out [2]: 2014-08-12 00:00:00 + +v0.17.0 can parse them as below. It works on ``DatetimeIndex`` also. + +New behavior: + +.. ipython:: python + + pd.Timestamp("2012Q2") + pd.Timestamp("2014") + pd.DatetimeIndex(["2012Q2", "2014"]) + +.. note:: + + If you want to perform calculations based on today's date, use ``Timestamp.now()`` and ``pandas.tseries.offsets``. + + .. ipython:: python + + import pandas.tseries.offsets as offsets + + pd.Timestamp.now() + pd.Timestamp.now() + offsets.DateOffset(years=1) + +Changes to Index comparisons +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Operator equal on ``Index`` should behavior similarly to ``Series`` (:issue:`9947`, :issue:`10637`) + +Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise +a ``ValueError``. This is to be consistent with the behavior of ``Series``. + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) + Out[2]: array([ True, False, False], dtype=bool) + + In [3]: pd.Index([1, 2, 3]) == pd.Index([2]) + Out[3]: array([False, True, False], dtype=bool) + + In [4]: pd.Index([1, 2, 3]) == pd.Index([1, 2]) + Out[4]: False + +New behavior: + +.. code-block:: ipython + + In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) + Out[8]: array([ True, False, False], dtype=bool) + + In [9]: pd.Index([1, 2, 3]) == pd.Index([2]) + ValueError: Lengths must match to compare + + In [10]: pd.Index([1, 2, 3]) == pd.Index([1, 2]) + ValueError: Lengths must match to compare + +Note that this is different from the ``numpy`` behavior where a comparison can +be broadcast: + +.. ipython:: python + + np.array([1, 2, 3]) == np.array([1]) + +or it can return False if broadcasting can not be done: + +.. ipython:: python + :okwarning: + + np.array([1, 2, 3]) == np.array([1, 2]) + +Changes to boolean comparisons vs. None +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to comparing with ``np.nan``, rather than raise ``TypeError``. (:issue:`1079`). + +.. ipython:: python + + s = pd.Series(range(3)) + s.iloc[1] = None + s + +Previous behavior: + +.. code-block:: ipython + + In [5]: s == None + TypeError: Could not compare type with Series + +New behavior: + +.. ipython:: python + + s == None + +Usually you simply want to know which values are null. + +.. ipython:: python + + s.isnull() + +.. warning:: + + You generally will want to use ``isnull/notnull`` for these types of comparisons, as ``isnull/notnull`` tells you which elements are null. One has to be + mindful that ``nan's`` don't compare equal, but ``None's`` do. Note that pandas/numpy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. + + .. ipython:: python + + None == None + np.nan == np.nan + +.. _whatsnew_0170.api_breaking.hdf_dropna: + +HDFStore dropna behavior +^^^^^^^^^^^^^^^^^^^^^^^^ + +The default behavior for HDFStore write functions with ``format='table'`` is now to keep rows that are all missing. Previously, the behavior was to drop rows that were all missing save the index. The previous behavior can be replicated using the ``dropna=True`` option. (:issue:`9382`) + +Previous behavior: + +.. ipython:: python + + df_with_missing = pd.DataFrame( + {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} + ) + + df_with_missing + + +.. code-block:: ipython + + In [27]: + df_with_missing.to_hdf('file.h5', + 'df_with_missing', + format='table', + mode='w') + + In [28]: pd.read_hdf('file.h5', 'df_with_missing') + + Out [28]: + col1 col2 + 0 0 1 + 2 2 NaN + + +New behavior: + +.. ipython:: python + + df_with_missing.to_hdf("file.h5", "df_with_missing", format="table", mode="w") + + pd.read_hdf("file.h5", "df_with_missing") + +.. ipython:: python + :suppress: + + import os + + os.remove("file.h5") + +See the :ref:`docs ` for more details. + +.. _whatsnew_0170.api_breaking.display_precision: + +Changes to ``display.precision`` option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``display.precision`` option has been clarified to refer to decimal places (:issue:`10451`). + +Earlier versions of pandas would format floating point numbers to have one less decimal place than the value in +``display.precision``. + +.. code-block:: ipython + + In [1]: pd.set_option('display.precision', 2) + + In [2]: pd.DataFrame({'x': [123.456789]}) + Out[2]: + x + 0 123.5 + +If interpreting precision as "significant figures" this did work for scientific notation but that same interpretation +did not work for values with standard formatting. It was also out of step with how numpy handles formatting. + +Going forward the value of ``display.precision`` will directly control the number of places after the decimal, for +regular formatting as well as scientific notation, similar to how numpy's ``precision`` print option works. + +.. ipython:: python + + pd.set_option("display.precision", 2) + pd.DataFrame({"x": [123.456789]}) + +To preserve output behavior with prior versions the default value of ``display.precision`` has been reduced to ``6`` +from ``7``. + +.. ipython:: python + :suppress: + + pd.set_option("display.precision", 6) + +.. _whatsnew_0170.api_breaking.categorical_unique: + +Changes to ``Categorical.unique`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Categorical.unique`` now returns new ``Categoricals`` with ``categories`` and ``codes`` that are unique, rather than returning ``np.array`` (:issue:`10508`) + +- unordered category: values and categories are sorted by appearance order. +- ordered category: values are sorted by appearance order, categories keep existing order. + +.. ipython:: python + + cat = pd.Categorical(["C", "A", "B", "C"], categories=["A", "B", "C"], ordered=True) + cat + cat.unique() + + cat = pd.Categorical(["C", "A", "B", "C"], categories=["A", "B", "C"]) + cat + cat.unique() + +Changes to ``bool`` passed as ``header`` in parsers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In earlier versions of pandas, if a bool was passed the ``header`` argument of +``read_csv``, ``read_excel``, or ``read_html`` it was implicitly converted to +an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True`` +(:issue:`6113`) + +A ``bool`` input to ``header`` will now raise a ``TypeError`` + +.. code-block:: ipython + + In [29]: df = pd.read_csv('data.csv', header=False) + TypeError: Passing a bool to header is invalid. Use header=None for no header or + header=int or list-like of ints to specify the row(s) making up the column names + + +.. _whatsnew_0170.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- Line and kde plot with ``subplots=True`` now uses default colors, not all black. Specify ``color='k'`` to draw all lines in black (:issue:`9894`) +- Calling the ``.value_counts()`` method on a Series with a ``categorical`` dtype now returns a Series with a ``CategoricalIndex`` (:issue:`10704`) +- The metadata properties of subclasses of pandas objects will now be serialized (:issue:`10553`). +- ``groupby`` using ``Categorical`` follows the same rule as ``Categorical.unique`` described above (:issue:`10508`) +- When constructing ``DataFrame`` with an array of ``complex64`` dtype previously meant the corresponding column + was automatically promoted to the ``complex128`` dtype. pandas will now preserve the itemsize of the input for complex data (:issue:`10952`) +- some numeric reduction operators would return ``ValueError``, rather than ``TypeError`` on object types that includes strings and numbers (:issue:`11131`) +- Passing currently unsupported ``chunksize`` argument to ``read_excel`` or ``ExcelFile.parse`` will now raise ``NotImplementedError`` (:issue:`8011`) +- Allow an ``ExcelFile`` object to be passed into ``read_excel`` (:issue:`11198`) +- ``DatetimeIndex.union`` does not infer ``freq`` if ``self`` and the input have ``None`` as ``freq`` (:issue:`11086`) +- ``NaT``'s methods now either raise ``ValueError``, or return ``np.nan`` or ``NaT`` (:issue:`9513`) + + =============================== =============================================================== + Behavior Methods + =============================== =============================================================== + return ``np.nan`` ``weekday``, ``isoweekday`` + return ``NaT`` ``date``, ``now``, ``replace``, ``to_datetime``, ``today`` + return ``np.datetime64('NaT')`` ``to_datetime64`` (unchanged) + raise ``ValueError`` All other public methods (names not beginning with underscores) + =============================== =============================================================== + +.. _whatsnew_0170.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- For ``Series`` the following indexing functions are deprecated (:issue:`10177`). + + ===================== ================================= + Deprecated Function Replacement + ===================== ================================= + ``.irow(i)`` ``.iloc[i]`` or ``.iat[i]`` + ``.iget(i)`` ``.iloc[i]`` or ``.iat[i]`` + ``.iget_value(i)`` ``.iloc[i]`` or ``.iat[i]`` + ===================== ================================= + +- For ``DataFrame`` the following indexing functions are deprecated (:issue:`10177`). + + ===================== ================================= + Deprecated Function Replacement + ===================== ================================= + ``.irow(i)`` ``.iloc[i]`` + ``.iget_value(i, j)`` ``.iloc[i, j]`` or ``.iat[i, j]`` + ``.icol(j)`` ``.iloc[:, j]`` + ===================== ================================= + +.. note:: These indexing function have been deprecated in the documentation since 0.11.0. + +- ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`). +- Setting missing values (NaN) in a ``Categorical``'s ``categories`` will issue a warning (:issue:`10748`). You can still have missing values in the ``values``. +- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`6511`, :issue:`8505`) +- ``Series.nsmallest`` and ``nlargest``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`10792`) +- ``DataFrame.combineAdd`` and ``DataFrame.combineMult`` are deprecated. They + can easily be replaced by using the ``add`` and ``mul`` methods: + ``DataFrame.add(other, fill_value=0)`` and ``DataFrame.mul(other, fill_value=1.)`` + (:issue:`10735`). +- ``TimeSeries`` deprecated in favor of ``Series`` (note that this has been an alias since 0.13.0), (:issue:`10890`) +- ``SparsePanel`` deprecated and will be removed in a future version (:issue:`11157`). +- ``Series.is_time_series`` deprecated in favor of ``Series.index.is_all_dates`` (:issue:`11135`) +- Legacy offsets (like ``'A@JAN'``) are deprecated (note that this has been alias since 0.8.0) (:issue:`10878`) +- ``WidePanel`` deprecated in favor of ``Panel``, ``LongPanel`` in favor of ``DataFrame`` (note these have been aliases since < 0.11.0), (:issue:`10892`) +- ``DataFrame.convert_objects`` has been deprecated in favor of type-specific functions ``pd.to_datetime``, ``pd.to_timestamp`` and ``pd.to_numeric`` (new in 0.17.0) (:issue:`11133`). + +.. _whatsnew_0170.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Removal of ``na_last`` parameters from ``Series.order()`` and ``Series.sort()``, in favor of ``na_position``. (:issue:`5231`) +- Remove of ``percentile_width`` from ``.describe()``, in favor of ``percentiles``. (:issue:`7088`) +- Removal of ``colSpace`` parameter from ``DataFrame.to_string()``, in favor of ``col_space``, circa 0.8.0 version. +- Removal of automatic time-series broadcasting (:issue:`2304`) + + .. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame( + np.random.randn(5, 2), + columns=list("AB"), + index=pd.date_range("2013-01-01", periods=5), + ) + df + + Previously + + .. code-block:: ipython + + In [3]: df + df.A + FutureWarning: TimeSeries broadcasting along DataFrame index by default is deprecated. + Please use DataFrame. to explicitly broadcast arithmetic operations along the index + + Out[3]: + A B + 2013-01-01 0.942870 -0.719541 + 2013-01-02 2.865414 1.120055 + 2013-01-03 -1.441177 0.166574 + 2013-01-04 1.719177 0.223065 + 2013-01-05 0.031393 -2.226989 + + Current + + .. ipython:: python + + df.add(df.A, axis="index") + + +- Remove ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`) +- Remove ``kind`` in ``read_excel/ExcelFile`` as its unused (:issue:`4712`) +- Remove ``infer_type`` keyword from ``pd.read_html`` as its unused (:issue:`4770`, :issue:`7032`) +- Remove ``offset`` and ``timeRule`` keywords from ``Series.tshift/shift``, in favor of ``freq`` (:issue:`4853`, :issue:`4864`) +- Remove ``pd.load/pd.save`` aliases in favor of ``pd.to_pickle/pd.read_pickle`` (:issue:`3787`) + +.. _whatsnew_0170.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Development support for benchmarking with the `Air Speed Velocity library `_ (:issue:`8361`) +- Added vbench benchmarks for alternative ExcelWriter engines and reading Excel files (:issue:`7171`) +- Performance improvements in ``Categorical.value_counts`` (:issue:`10804`) +- Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` and ``SeriesGroupby.transform`` (:issue:`10820`, :issue:`11077`) +- Performance improvements in ``DataFrame.drop_duplicates`` with integer dtypes (:issue:`10917`) +- Performance improvements in ``DataFrame.duplicated`` with wide frames. (:issue:`10161`, :issue:`11180`) +- 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`) +- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`) +- Significantly improved performance of indexing ``MultiIndex`` with slicers (:issue:`10287`) +- 8x improvement in ``iloc`` using list-like input (:issue:`10791`) +- Improved performance of ``Series.isin`` for datetimelike/integer Series (:issue:`10287`) +- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`) +- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`) +- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`) +- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`) +- Regression from 0.16.1 in constructing ``DataFrame`` from nested dictionary (:issue:`11084`) +- Performance improvements in addition/subtraction operations for ``DateOffset`` with ``Series`` or ``DatetimeIndex`` (:issue:`10744`, :issue:`11205`) + +.. _whatsnew_0170.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in incorrect computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`) +- Bug in ``.isin`` on older numpies (:issue:`11232`) +- Bug in ``DataFrame.to_html(index=False)`` renders unnecessary ``name`` row (:issue:`10344`) +- Bug in ``DataFrame.to_latex()`` the ``column_format`` argument could not be passed (:issue:`9402`) +- Bug in ``DatetimeIndex`` when localizing with ``NaT`` (:issue:`10477`) +- Bug in ``Series.dt`` ops in preserving meta-data (:issue:`10477`) +- Bug in preserving ``NaT`` when passed in an otherwise invalid ``to_datetime`` construction (:issue:`10477`) +- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) +- Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`) +- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`) +- Bug in ``Series.quantile`` dropping name (:issue:`10881`) +- Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`) +- Bug in ``pd.Series.interpolate`` with invalid ``order`` keyword values. (:issue:`10633`) +- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) +- Bug in ``Index`` construction with a mixed list of tuples (:issue:`10697`) +- Bug in ``DataFrame.reset_index`` when index contains ``NaT``. (:issue:`10388`) +- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) +- Bug in ``BinGrouper.group_info`` where returned values are not compatible with base class (:issue:`10914`) +- Bug in clearing the cache on ``DataFrame.pop`` and a subsequent inplace op (:issue:`10912`) +- Bug in indexing with a mixed-integer ``Index`` causing an ``ImportError`` (:issue:`10610`) +- Bug in ``Series.count`` when index has nulls (:issue:`10946`) +- Bug in pickling of a non-regular freq ``DatetimeIndex`` (:issue:`11002`) +- Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`) +- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`) +- Bug in ``offsets.generate_range`` where ``start`` and ``end`` have finer precision than ``offset`` (:issue:`9907`) +- Bug in ``pd.rolling_*`` where ``Series.name`` would be lost in the output (:issue:`10565`) +- Bug in ``stack`` when index or columns are not unique. (:issue:`10417`) +- Bug in setting a ``Panel`` when an axis has a MultiIndex (:issue:`10360`) +- Bug in ``USFederalHolidayCalendar`` where ``USMemorialDay`` and ``USMartinLutherKingJr`` were incorrect (:issue:`10278` and :issue:`9760` ) +- Bug in ``.sample()`` where returned object, if set, gives unnecessary ``SettingWithCopyWarning`` (:issue:`10738`) +- Bug in ``.sample()`` where weights passed as ``Series`` were not aligned along axis before being treated positionally, potentially causing problems if weight indices were not aligned with sampled object. (:issue:`10738`) + +- Regression fixed in (:issue:`9311`, :issue:`6620`, :issue:`9345`), where groupby with a datetime-like converting to float with certain aggregators (:issue:`10979`) + +- Bug in ``DataFrame.interpolate`` with ``axis=1`` and ``inplace=True`` (:issue:`10395`) +- Bug in ``io.sql.get_schema`` when specifying multiple columns as primary + key (:issue:`10385`). + +- Bug in ``groupby(sort=False)`` with datetime-like ``Categorical`` raises ``ValueError`` (:issue:`10505`) +- Bug in ``groupby(axis=1)`` with ``filter()`` throws ``IndexError`` (:issue:`11041`) +- Bug in ``test_categorical`` on big-endian builds (:issue:`10425`) +- Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`) +- Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`) +- Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`) +- Bug in ``pd.get_dummies`` with ``sparse=True`` not returning ``SparseDataFrame`` (:issue:`10531`) +- Bug in ``Index`` subtypes (such as ``PeriodIndex``) not returning their own type for ``.drop`` and ``.insert`` methods (:issue:`10620`) +- Bug in ``algos.outer_join_indexer`` when ``right`` array is empty (:issue:`10618`) + +- Bug in ``filter`` (regression from 0.16.0) and ``transform`` when grouping on multiple keys, one of which is datetime-like (:issue:`10114`) + + +- Bug in ``to_datetime`` and ``to_timedelta`` causing ``Index`` name to be lost (:issue:`10875`) +- Bug in ``len(DataFrame.groupby)`` causing ``IndexError`` when there's a column containing only NaNs (:issue:`11016`) + +- Bug that caused segfault when resampling an empty Series (:issue:`10228`) +- Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) +- Bug in ``pd.eval`` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`) +- Bug in ``pd.concat`` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) +- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`, :issue:`10630`) +- Bug in ``pd.read_csv`` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype`` + (:issue:`10413`, :issue:`10467`, :issue:`10577`) +- Bug in ``Series.from_csv`` with ``header`` kwarg not setting the ``Series.name`` or the ``Series.index.name`` (:issue:`10483`) +- Bug in ``groupby.var`` which caused variance to be inaccurate for small float values (:issue:`10448`) +- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`) +- Bug in ``read_csv`` when using a converter which generates a ``uint8`` type (:issue:`9266`) + +- Bug causes memory leak in time-series line and area plot (:issue:`9003`) + +- Bug when setting a ``Panel`` sliced along the major or minor axes when the right-hand side is a ``DataFrame`` (:issue:`11014`) +- Bug that returns ``None`` and does not raise ``NotImplementedError`` when operator functions (e.g. ``.add``) of ``Panel`` are not implemented (:issue:`7692`) + +- Bug in line and kde plot cannot accept multiple colors when ``subplots=True`` (:issue:`9894`) +- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) + +- Bug in left and right ``align`` of ``Series`` with ``MultiIndex`` may be inverted (:issue:`10665`) +- Bug in left and right ``join`` of with ``MultiIndex`` may be inverted (:issue:`10741`) + +- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`) +- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`) +- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`) +- Bug in indexing with a ``PeriodIndex`` on an object with a ``PeriodIndex`` (:issue:`4125`) +- Bug in ``read_csv`` with ``engine='c'``: EOF preceded by a comment, blank line, etc. was not handled correctly (:issue:`10728`, :issue:`10548`) + +- Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). +- Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`) +- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) +- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) +- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`) +- Bug in ``merge`` with empty ``DataFrame`` may raise ``IndexError`` (:issue:`10824`) +- Bug in ``to_latex`` where unexpected keyword argument for some documented arguments (:issue:`10888`) +- Bug in indexing of large ``DataFrame`` where ``IndexError`` is uncaught (:issue:`10645` and :issue:`10692`) +- Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`) +- Bug in serialization of ``category`` types in HDF5 in presence of alternate encodings. (:issue:`10366`) +- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in ``pd.DataFrame.diff`` when DataFrame is not consolidated (:issue:`10907`) +- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue:`9431`) +- Bug in ``Timedelta`` raising error when slicing from 0s (:issue:`10583`) +- Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) +- Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) +- Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`) +- Bug in ``date_range`` when ``freq`` divides ``end`` as nanos (:issue:`10885`) +- Bug in ``iloc`` allowing memory outside bounds of a Series to be accessed with negative integers (:issue:`10779`) +- Bug in ``read_msgpack`` where encoding is not respected (:issue:`10581`) +- Bug preventing access to the first index when using ``iloc`` with a list containing the appropriate negative integer (:issue:`10547`, :issue:`10779`) +- Bug in ``TimedeltaIndex`` formatter causing error while trying to save ``DataFrame`` with ``TimedeltaIndex`` using ``to_csv`` (:issue:`10833`) +- Bug in ``DataFrame.where`` when handling Series slicing (:issue:`10218`, :issue:`9558`) +- Bug where ``pd.read_gbq`` throws ``ValueError`` when Bigquery returns zero rows (:issue:`10273`) +- Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`) +- Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`) +- Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`) +- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`, :issue:`11010`) +- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`) +- Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`) +- Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`) +- Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`) +- Bug in ``date_range`` results in empty if freq is negative annually, quarterly and monthly (:issue:`11018`) +- Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`) +- Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`) +- Bug in ``Index`` dtype may not applied properly (:issue:`11017`) +- Bug in ``io.gbq`` when testing for minimum google api client version (:issue:`10652`) +- Bug in ``DataFrame`` construction from nested ``dict`` with ``timedelta`` keys (:issue:`11129`) +- Bug in ``.fillna`` against may raise ``TypeError`` when data contains datetime dtype (:issue:`7095`, :issue:`11153`) +- Bug in ``.groupby`` when number of keys to group by is same as length of index (:issue:`11185`) +- Bug in ``convert_objects`` where converted values might not be returned if all null and ``coerce`` (:issue:`9589`) +- Bug in ``convert_objects`` where ``copy`` keyword was not respected (:issue:`9589`) + + +.. _whatsnew_0.17.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.2..v0.17.0 diff --git a/doc/source/whatsnew/v0.17.1.rst b/doc/source/whatsnew/v0.17.1.rst new file mode 100644 index 00000000..774d17e6 --- /dev/null +++ b/doc/source/whatsnew/v0.17.1.rst @@ -0,0 +1,213 @@ +.. _whatsnew_0171: + +Version 0.17.1 (November 21, 2015) +---------------------------------- + +{{ header }} + + +.. note:: + + We are proud to announce that *pandas* has become a sponsored project of the (`NumFOCUS organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. + +.. _numfocus organization: http://www.numfocus.org/blog/numfocus-announces-new-fiscally-sponsored-project-pandas + +This is a minor bug-fix release from 0.17.0 and includes a large number of +bug fixes along several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Support for Conditional HTML Formatting, see :ref:`here ` +- Releasing the GIL on the csv reader & other ops, see :ref:`here ` +- Fixed regression in ``DataFrame.drop_duplicates`` from 0.16.2, causing incorrect results on integer values (:issue:`11376`) + +.. contents:: What's new in v0.17.1 + :local: + :backlinks: none + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0171.style: + +Conditional HTML formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + This is a new feature and is under active development. + We'll be adding features an possibly making breaking changes in future + releases. Feedback is welcome in :issue:`11610` + +We've added *experimental* support for conditional HTML formatting: +the visual styling of a DataFrame based on the data. +The styling is accomplished with HTML and CSS. +Accesses the styler class with the :attr:`pandas.DataFrame.style`, attribute, +an instance of :class:`~pandas.core.style.Styler` with your data attached. + +Here's a quick example: + + .. ipython:: python + + np.random.seed(123) + df = pd.DataFrame(np.random.randn(10, 5), columns=list("abcde")) + html = df.style.background_gradient(cmap="viridis", low=0.5) + +We can render the HTML to get the following table. + +.. raw:: html + :file: whatsnew_0171_html_table.html + +:class:`~pandas.core.style.Styler` interacts nicely with the Jupyter Notebook. +See the :ref:`documentation ` for more. + +.. _whatsnew_0171.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- ``DatetimeIndex`` now supports conversion to strings with ``astype(str)`` (:issue:`10442`) +- Support for ``compression`` (gzip/bz2) in :meth:`pandas.DataFrame.to_csv` (:issue:`7615`) +- ``pd.read_*`` functions can now also accept :class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath` + objects for the ``filepath_or_buffer`` argument. (:issue:`11033`) + - The ``DataFrame`` and ``Series`` functions ``.to_csv()``, ``.to_html()`` and ``.to_latex()`` can now handle paths beginning with tildes (e.g. ``~/Documents/``) (:issue:`11438`) +- ``DataFrame`` now uses the fields of a ``namedtuple`` as columns, if columns are not supplied (:issue:`11181`) +- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`, :issue:`11625`) +- Added ``axvlines_kwds`` to parallel coordinates plot (:issue:`10709`) +- Option to ``.info()`` and ``.memory_usage()`` to provide for deep introspection of memory consumption. Note that this can be expensive to compute and therefore is an optional parameter. (:issue:`11595`) + + .. ipython:: python + + df = pd.DataFrame({"A": ["foo"] * 1000}) # noqa: F821 + df["B"] = df["A"].astype("category") + + # shows the '+' as we have object dtypes + df.info() + + # we have an accurate memory assessment (but can be expensive to compute this) + df.info(memory_usage="deep") + +- ``Index`` now has a ``fillna`` method (:issue:`10089`) + + .. ipython:: python + + pd.Index([1, np.nan, 3]).fillna(2) + +- Series of type ``category`` now make ``.str.<...>`` and ``.dt.<...>`` accessor methods / properties available, if the categories are of that type. (:issue:`10661`) + + .. ipython:: python + + s = pd.Series(list("aabb")).astype("category") + s + s.str.contains("a") + + date = pd.Series(pd.date_range("1/1/2015", periods=5)).astype("category") + date + date.dt.day + +- ``pivot_table`` now has a ``margins_name`` argument so you can use something other than the default of 'All' (:issue:`3335`) +- Implement export of ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`11411`) +- Pretty printing sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of + Legacy Python syntax (``set([x, y])``) (:issue:`11215`) +- Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`) + and when the DataFrame does not match the schema of the destination table (:issue:`11359`) + +.. _whatsnew_0171.api: + +API changes +~~~~~~~~~~~ + +- raise ``NotImplementedError`` in ``Index.shift`` for non-supported index types (:issue:`8038`) +- ``min`` and ``max`` reductions on ``datetime64`` and ``timedelta64`` dtyped series now + result in ``NaT`` and not ``nan`` (:issue:`11245`). +- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`) +- ``Series.ptp`` will now ignore missing values by default (:issue:`11163`) + +.. _whatsnew_0171.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The ``pandas.io.ga`` module which implements ``google-analytics`` support is deprecated and will be removed in a future version (:issue:`11308`) +- Deprecate the ``engine`` keyword in ``.to_csv()``, which will be removed in a future version (:issue:`11274`) + +.. _whatsnew_0171.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Checking monotonic-ness before sorting on an index (:issue:`11080`) +- ``Series.dropna`` performance improvement when its dtype can't contain ``NaN`` (:issue:`11159`) +- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`) +- Release the GIL on some rolling algos: ``rolling_median``, ``rolling_mean``, ``rolling_max``, ``rolling_min``, ``rolling_var``, ``rolling_kurt``, ``rolling_skew`` (:issue:`11450`) +- Release the GIL when reading and parsing text files in ``read_csv``, ``read_table`` (:issue:`11272`) +- Improved performance of ``rolling_median`` (:issue:`11450`) +- Improved performance of ``to_excel`` (:issue:`11352`) +- Performance bug in repr of ``Categorical`` categories, which was rendering the strings before chopping them for display (:issue:`11305`) +- Performance improvement in ``Categorical.remove_unused_categories``, (:issue:`11643`). +- Improved performance of ``Series`` constructor with no data and ``DatetimeIndex`` (:issue:`11433`) +- Improved performance of ``shift``, ``cumprod``, and ``cumsum`` with groupby (:issue:`4095`) + +.. _whatsnew_0171.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- ``SparseArray.__iter__()`` now does not cause ``PendingDeprecationWarning`` in Python 3.5 (:issue:`11622`) +- Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`) +- ``Series.sort_index()`` now correctly handles the ``inplace`` option (:issue:`11402`) +- Incorrectly distributed .c file in the build on ``PyPi`` when reading a csv of floats and passing ``na_values=`` would show an exception (:issue:`11374`) +- Bug in ``.to_latex()`` output broken when the index has a name (:issue:`10660`) +- Bug in ``HDFStore.append`` with strings whose encoded length exceeded the max unencoded length (:issue:`11234`) +- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`) +- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`) +- Bug in using ``DataFrame.ix`` with a MultiIndex indexer (:issue:`11372`) +- Bug in ``date_range`` with ambiguous endpoints (:issue:`11626`) +- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such + a value was not possible, so error out on setting it. (:issue:`10673`) +- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`) +- Bug in output formatting when using an index of ambiguous times (:issue:`11619`) +- Bug in comparisons of Series vs list-likes (:issue:`11339`) +- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`) +- Bug in ``isnull`` where ``numpy.datetime64('NaT')`` in a ``numpy.array`` was not determined to be null(:issue:`11206`) +- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) +- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`) +- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`) +- Regression in ``DataFrame.drop_duplicates`` from 0.16.2, causing incorrect results on integer values (:issue:`11376`) +- Bug in ``pd.eval`` where unary ops in a list error (:issue:`11235`) +- Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`) +- Bug in ``describe()`` dropping column names for hierarchical indexes (:issue:`11517`) +- Bug in ``DataFrame.pct_change()`` not propagating ``axis`` keyword on ``.fillna`` method (:issue:`11150`) +- Bug in ``.to_csv()`` when a mix of integer and string column names are passed as the ``columns`` parameter (:issue:`11637`) +- Bug in indexing with a ``range``, (:issue:`11652`) +- Bug in inference of numpy scalars and preserving dtype when setting columns (:issue:`11638`) +- Bug in ``to_sql`` using unicode column names giving UnicodeEncodeError with (:issue:`11431`). +- Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`). +- Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`) +- Fix plotting issues when having plain ``Axes`` instances instead of ``SubplotAxes`` (:issue:`11520`, :issue:`11556`). +- Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`) +- Bug in ``df.groupby(...).apply(func)`` when a func returns a ``Series`` containing a new datetimelike column (:issue:`11324`) +- Bug in ``pandas.json`` when file to load is big (:issue:`11344`) +- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`) +- Fixed a bug that prevented the construction of an empty series of dtype ``datetime64[ns, tz]`` (:issue:`11245`). +- Bug in ``read_excel`` with MultiIndex containing integers (:issue:`11317`) +- Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`) +- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) +- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`) +- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`) +- Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`) +- Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`) +- Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`) +- Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`) +- Bug in ``Categorical.remove_unused_categories`` when having ``NaN`` values (:issue:`11599`) +- Bug in ``DataFrame.to_sparse()`` loses column names for MultiIndexes (:issue:`11600`) +- Bug in ``DataFrame.round()`` with non-unique column index producing a Fatal Python error (:issue:`11611`) +- Bug in ``DataFrame.round()`` with ``decimals`` being a non-unique indexed Series producing extra columns (:issue:`11618`) + + +.. _whatsnew_0.17.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.0..v0.17.1 diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst new file mode 100644 index 00000000..a05b9bb1 --- /dev/null +++ b/doc/source/whatsnew/v0.18.0.rst @@ -0,0 +1,1304 @@ +.. _whatsnew_0180: + +Version 0.18.0 (March 13, 2016) +------------------------------- + +{{ header }} + + +This is a major release from 0.17.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.18.0 no longer supports compatibility with Python version 2.6 + and 3.3 (:issue:`7718`, :issue:`11273`) + +.. warning:: + + ``numexpr`` version 2.4.4 will now show a warning and not be used as a computation back-end for pandas because of some buggy behavior. This does not affect other versions (>= 2.1 and >= 2.4.6). (:issue:`12489`) + +Highlights include: + +- Moving and expanding window functions are now methods on Series and DataFrame, + similar to ``.groupby``, see :ref:`here `. +- Adding support for a ``RangeIndex`` as a specialized form of the ``Int64Index`` + for memory savings, see :ref:`here `. +- API breaking change to the ``.resample`` method to make it more ``.groupby`` + like, see :ref:`here `. +- Removal of support for positional indexing with floats, which was deprecated + since 0.14.0. This will now raise a ``TypeError``, see :ref:`here `. +- The ``.to_xarray()`` function has been added for compatibility with the + `xarray package `__, see :ref:`here `. +- The ``read_sas`` function has been enhanced to read ``sas7bdat`` files, see :ref:`here `. +- Addition of the :ref:`.str.extractall() method `, + and API changes to the :ref:`.str.extract() method ` + and :ref:`.str.cat() method `. +- ``pd.test()`` top-level nose test runner is available (:issue:`4327`). + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.18.0 + :local: + :backlinks: none + +.. _whatsnew_0180.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0180.enhancements.moments: + +Window functions are now methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`, :issue:`12373`) + + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame({'A': range(10), 'B': np.random.randn(10)}) + df + +Previous behavior: + +.. code-block:: ipython + + In [8]: pd.rolling_mean(df, window=3) + FutureWarning: pd.rolling_mean is deprecated for DataFrame and will be removed in a future version, replace with + DataFrame.rolling(window=3,center=False).mean() + Out[8]: + A B + 0 NaN NaN + 1 NaN NaN + 2 1 0.237722 + 3 2 -0.023640 + 4 3 0.133155 + 5 4 -0.048693 + 6 5 0.342054 + 7 6 0.370076 + 8 7 0.079587 + 9 8 -0.954504 + +New behavior: + +.. ipython:: python + + r = df.rolling(window=3) + +These show a descriptive repr + +.. ipython:: python + + r +with tab-completion of available methods and properties. + +.. code-block:: ipython + + In [9]: r. # noqa E225, E999 + r.A r.agg r.apply r.count r.exclusions r.max r.median r.name r.skew r.sum + r.B r.aggregate r.corr r.cov r.kurt r.mean r.min r.quantile r.std r.var + +The methods operate on the ``Rolling`` object itself + +.. ipython:: python + + r.mean() + +They provide getitem accessors + +.. ipython:: python + + r['A'].mean() + +And multiple aggregations + +.. ipython:: python + + r.agg({'A': ['mean', 'std'], + 'B': ['mean', 'std']}) + +.. _whatsnew_0180.enhancements.rename: + +Changes to rename +^^^^^^^^^^^^^^^^^ + +``Series.rename`` and ``NDFrame.rename_axis`` can now take a scalar or list-like +argument for altering the Series or axis *name*, in addition to their old behaviors of altering labels. (:issue:`9494`, :issue:`11965`) + +.. ipython:: python + + s = pd.Series(np.random.randn(5)) + s.rename('newname') + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2)) + (df.rename_axis("indexname") + .rename_axis("columns_name", axis="columns")) + +The new functionality works well in method chains. Previously these methods only accepted functions or dicts mapping a *label* to a new label. +This continues to work as before for function or dict-like values. + + +.. _whatsnew_0180.enhancements.rangeindex: + +Range Index +^^^^^^^^^^^ + +A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed. + +This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`, :issue:`12888`) + +Previous behavior: + +.. code-block:: ipython + + In [3]: s = pd.Series(range(1000)) + + In [4]: s.index + Out[4]: + Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + ... + 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], dtype='int64', length=1000) + + In [6]: s.index.nbytes + Out[6]: 8000 + + +New behavior: + +.. ipython:: python + + s = pd.Series(range(1000)) + s.index + s.index.nbytes + +.. _whatsnew_0180.enhancements.extract: + +Changes to str.extract +^^^^^^^^^^^^^^^^^^^^^^ + +The :ref:`.str.extract ` method takes a regular +expression with capture groups, finds the first match in each subject +string, and returns the contents of the capture groups +(:issue:`11386`). + +In v0.18.0, the ``expand`` argument was added to +``extract``. + +- ``expand=False``: it returns a ``Series``, ``Index``, or ``DataFrame``, depending on the subject and regular expression pattern (same behavior as pre-0.18.0). +- ``expand=True``: it always returns a ``DataFrame``, which is more consistent and less confusing from the perspective of a user. + +Currently the default is ``expand=None`` which gives a ``FutureWarning`` and uses ``expand=False``. To avoid this warning, please explicitly specify ``expand``. + +.. code-block:: ipython + + In [1]: pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=None) + FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) + but in a future version of pandas this will be changed to expand=True (return DataFrame) + + Out[1]: + 0 1 + 1 2 + 2 NaN + dtype: object + +Extracting a regular expression with one group returns a Series if +``expand=False``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=False) + +It returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=True) + +Calling on an ``Index`` with a regex with exactly one capture group +returns an ``Index`` if ``expand=False``. + +.. ipython:: python + + s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"]) + s.index + s.index.str.extract("(?P[a-zA-Z])", expand=False) + +It returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])", expand=True) + +Calling on an ``Index`` with a regex with more than one capture group +raises ``ValueError`` if ``expand=False``. + +.. code-block:: python + + >>> s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=False) + ValueError: only one regex group is supported with Index + +It returns a ``DataFrame`` if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=True) + +In summary, ``extract(expand=True)`` always returns a ``DataFrame`` +with a row for every subject string, and a column for every capture +group. + +.. _whatsnew_0180.enhancements.extractall: + +Addition of str.extractall +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :ref:`.str.extractall ` method was added +(:issue:`11386`). Unlike ``extract``, which returns only the first +match. + +.. ipython:: python + + s = pd.Series(["a1a2", "b1", "c1"], ["A", "B", "C"]) + s + s.str.extract(r"(?P[ab])(?P\d)", expand=False) + +The ``extractall`` method returns all matches. + +.. ipython:: python + + s.str.extractall(r"(?P[ab])(?P\d)") + +.. _whatsnew_0180.enhancements.strcat: + +Changes to str.cat +^^^^^^^^^^^^^^^^^^ + +The method ``.str.cat()`` concatenates the members of a ``Series``. Before, if ``NaN`` values were present in the Series, calling ``.str.cat()`` on it would return ``NaN``, unlike the rest of the ``Series.str.*`` API. This behavior has been amended to ignore ``NaN`` values by default. (:issue:`11435`). + +A new, friendlier ``ValueError`` is added to protect against the mistake of supplying the ``sep`` as an arg, rather than as a kwarg. (:issue:`11334`). + +.. ipython:: python + + pd.Series(['a', 'b', np.nan, 'c']).str.cat(sep=' ') + pd.Series(['a', 'b', np.nan, 'c']).str.cat(sep=' ', na_rep='?') + +.. code-block:: ipython + + In [2]: pd.Series(['a', 'b', np.nan, 'c']).str.cat(' ') + ValueError: Did you mean to supply a ``sep`` keyword? + + +.. _whatsnew_0180.enhancements.rounding: + +Datetimelike rounding +^^^^^^^^^^^^^^^^^^^^^ + +``DatetimeIndex``, ``Timestamp``, ``TimedeltaIndex``, ``Timedelta`` have gained the ``.round()``, ``.floor()`` and ``.ceil()`` method for datetimelike rounding, flooring and ceiling. (:issue:`4314`, :issue:`11963`) + +Naive datetimes + +.. ipython:: python + + dr = pd.date_range('20130101 09:12:56.1234', periods=3) + dr + dr.round('s') + + # Timestamp scalar + dr[0] + dr[0].round('10s') + +Tz-aware are rounded, floored and ceiled in local times + +.. ipython:: python + + dr = dr.tz_localize('US/Eastern') + dr + dr.round('s') + +Timedeltas + +.. ipython:: python + + t = pd.timedelta_range('1 days 2 hr 13 min 45 us', periods=3, freq='d') + t + t.round('10min') + + # Timedelta scalar + t[0] + t[0].round('2h') + + +In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available through the ``.dt`` accessor of ``Series``. + +.. ipython:: python + + s = pd.Series(dr) + s + s.dt.round('D') + +Formatting of integers in FloatIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Integers in ``FloatIndex``, e.g. 1., are now formatted with a decimal point and a ``0`` digit, e.g. ``1.0`` (:issue:`11713`) +This change not only affects the display to the console, but also the output of IO methods like ``.to_csv`` or ``.to_html``. + +Previous behavior: + +.. code-block:: ipython + + In [2]: s = pd.Series([1, 2, 3], index=np.arange(3.)) + + In [3]: s + Out[3]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + In [4]: s.index + Out[4]: Float64Index([0.0, 1.0, 2.0], dtype='float64') + + In [5]: print(s.to_csv(path=None)) + 0,1 + 1,2 + 2,3 + + +New behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=np.arange(3.)) + s + s.index + print(s.to_csv(path_or_buf=None, header=False)) + +Changes to dtype assignment behaviors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When a DataFrame's slice is updated with a new slice of the same dtype, the dtype of the DataFrame will now remain the same. (:issue:`10503`) + +Previous behavior: + +.. code-block:: ipython + + In [5]: df = pd.DataFrame({'a': [0, 1, 1], + 'b': pd.Series([100, 200, 300], dtype='uint32')}) + + In [7]: df.dtypes + Out[7]: + a int64 + b uint32 + dtype: object + + In [8]: ix = df['a'] == 1 + + In [9]: df.loc[ix, 'b'] = df.loc[ix, 'b'] + + In [11]: df.dtypes + Out[11]: + a int64 + b int64 + dtype: object + +New behavior: + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 1, 1], + 'b': pd.Series([100, 200, 300], dtype='uint32')}) + df.dtypes + ix = df['a'] == 1 + df.loc[ix, 'b'] = df.loc[ix, 'b'] + df.dtypes + +When a DataFrame's integer slice is partially updated with a new slice of floats that could potentially be down-casted to integer without losing precision, the dtype of the slice will be set to float instead of integer. + +Previous behavior: + +.. code-block:: ipython + + In [4]: df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + columns=list('abc'), + index=[[4,4,8], [8,10,12]]) + + In [5]: df + Out[5]: + a b c + 4 8 1 2 3 + 10 4 5 6 + 8 12 7 8 9 + + In [7]: df.ix[4, 'c'] = np.array([0., 1.]) + + In [8]: df + Out[8]: + a b c + 4 8 1 2 0 + 10 4 5 1 + 8 12 7 8 9 + +New behavior: + +.. ipython:: python + + df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + columns=list('abc'), + index=[[4,4,8], [8,10,12]]) + df + df.loc[4, 'c'] = np.array([0., 1.]) + df + +.. _whatsnew_0180.enhancements.xarray: + +Method to_xarray +^^^^^^^^^^^^^^^^ + +In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity, +all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has +a pandas-like interface for > 2 ndim. (:issue:`11972`) + +See the `xarray full-documentation here `__. + +.. code-block:: ipython + + In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4)) + + In [2]: p.to_xarray() + Out[2]: + + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + Coordinates: + * items (items) int64 0 1 + * major_axis (major_axis) int64 0 1 2 + * minor_axis (minor_axis) int64 0 1 2 3 + +Latex representation +^^^^^^^^^^^^^^^^^^^^ + +``DataFrame`` has gained a ``._repr_latex_()`` method in order to allow for conversion to latex in a ipython/jupyter notebook using nbconvert. (:issue:`11778`) + +Note that this must be activated by setting the option ``pd.display.latex.repr=True`` (:issue:`12182`) + +For example, if you have a jupyter notebook you plan to convert to latex using nbconvert, place the statement ``pd.display.latex.repr=True`` in the first cell to have the contained DataFrame output also stored as latex. + +The options ``display.latex.escape`` and ``display.latex.longtable`` have also been added to the configuration and are used automatically by the ``to_latex`` +method. See the :ref:`available options docs ` for more info. + +.. _whatsnew_0180.enhancements.sas: + +``pd.read_sas()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``read_sas`` has gained the ability to read SAS7BDAT files, including compressed files. The files can be read in entirety, or incrementally. For full details see :ref:`here `. (:issue:`4052`) + +.. _whatsnew_0180.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Handle truncated floats in SAS xport files (:issue:`11713`) +- Added option to hide index in ``Series.to_string`` (:issue:`11729`) +- ``read_excel`` now supports s3 urls of the format ``s3://bucketname/filename`` (:issue:`11447`) +- add support for ``AWS_S3_HOST`` env variable when reading from s3 (:issue:`12198`) +- A simple version of ``Panel.round()`` is now implemented (:issue:`11763`) +- For Python 3.x, ``round(DataFrame)``, ``round(Series)``, ``round(Panel)`` will work (:issue:`11763`) +- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the + values it contains (:issue:`11597`) +- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`) +- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`). +- Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) +- ``DataFrame.select_dtypes`` now allows the ``np.float16`` type code (:issue:`11990`) +- ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) +- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see `here `__ +- ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). +- Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`) +- The entire code base has been ``PEP``-ified (:issue:`12096`) + +.. _whatsnew_0180.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- the leading white spaces have been removed from the output of ``.to_string(index=False)`` method (:issue:`11833`) +- the ``out`` parameter has been removed from the ``Series.round()`` method. (:issue:`11763`) +- ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`) +- ``DataFrame.head(0)`` and ``DataFrame.tail(0)`` return empty frames, rather than ``self``. (:issue:`11937`) +- ``Series.head(0)`` and ``Series.tail(0)`` return empty series, rather than ``self``. (:issue:`11937`) +- ``to_msgpack`` and ``read_msgpack`` encoding now defaults to ``'utf-8'``. (:issue:`12170`) +- the order of keyword arguments to text file parsing functions (``.read_csv()``, ``.read_table()``, ``.read_fwf()``) changed to group related arguments. (:issue:`11555`) +- ``NaTType.isoformat`` now returns the string ``'NaT`` to allow the result to + be passed to the constructor of ``Timestamp``. (:issue:`12300`) + +NaT and Timedelta operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``NaT`` and ``Timedelta`` have expanded arithmetic operations, which are extended to ``Series`` +arithmetic where applicable. Operations defined for ``datetime64[ns]`` or ``timedelta64[ns]`` +are now also defined for ``NaT`` (:issue:`11564`). + +``NaT`` now supports arithmetic operations with integers and floats. + +.. ipython:: python + + pd.NaT * 1 + pd.NaT * 1.5 + pd.NaT / 2 + pd.NaT * np.nan + +``NaT`` defines more arithmetic operations with ``datetime64[ns]`` and ``timedelta64[ns]``. + +.. ipython:: python + + pd.NaT / pd.NaT + pd.Timedelta('1s') / pd.NaT + +``NaT`` may represent either a ``datetime64[ns]`` null or a ``timedelta64[ns]`` null. +Given the ambiguity, it is treated as a ``timedelta64[ns]``, which allows more operations +to succeed. + +.. ipython:: python + + pd.NaT + pd.NaT + + # same as + pd.Timedelta('1s') + pd.Timedelta('1s') + +as opposed to + +.. code-block:: ipython + + In [3]: pd.Timestamp('19900315') + pd.Timestamp('19900315') + TypeError: unsupported operand type(s) for +: 'Timestamp' and 'Timestamp' + +However, when wrapped in a ``Series`` whose ``dtype`` is ``datetime64[ns]`` or ``timedelta64[ns]``, +the ``dtype`` information is respected. + +.. code-block:: ipython + + In [1]: pd.Series([pd.NaT], dtype='=0.18 / any Python | + +----------------------+------------------------+ + | 0.17 / Python 3 | >=0.18 / any Python | + +----------------------+------------------------+ + | 0.18 | >= 0.18 | + +----------------------+------------------------+ + + + 0.18.0 is backward-compatible for reading files packed by older versions, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2. + +Signature change for .rank +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`) + +Previous signature + +.. code-block:: ipython + + In [3]: pd.Series([0,1]).rank(method='average', na_option='keep', + ascending=True, pct=False) + Out[3]: + 0 1 + 1 2 + dtype: float64 + + In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None, + method='average', na_option='keep', + ascending=True, pct=False) + Out[4]: + 0 + 0 1 + 1 2 + +New signature + +.. ipython:: python + + pd.Series([0,1]).rank(axis=0, method='average', numeric_only=False, + na_option='keep', ascending=True, pct=False) + pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=False, + na_option='keep', ascending=True, pct=False) + + +Bug in QuarterBegin with n=0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, the behavior of the QuarterBegin offset was inconsistent +depending on the date when the ``n`` parameter was 0. (:issue:`11406`) + +The general semantics of anchored offsets for ``n=0`` is to not move the date +when it is an anchor point (e.g., a quarter start date), and otherwise roll +forward to the next anchor point. + +.. ipython:: python + + d = pd.Timestamp('2014-02-01') + d + d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + d + pd.offsets.QuarterBegin(n=0, startingMonth=1) + +For the ``QuarterBegin`` offset in previous versions, the date would be rolled +*backwards* if date was in the same month as the quarter start date. + +.. code-block:: ipython + + In [3]: d = pd.Timestamp('2014-02-15') + + In [4]: d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + Out[4]: Timestamp('2014-02-01 00:00:00') + +This behavior has been corrected in version 0.18.0, which is consistent with +other anchored offsets like ``MonthBegin`` and ``YearBegin``. + +.. ipython:: python + + d = pd.Timestamp('2014-02-15') + d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + +.. _whatsnew_0180.breaking.resample: + +Resample API +^^^^^^^^^^^^ + +Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`, :issue:`12448`). + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.rand(10,4), + columns=list('ABCD'), + index=pd.date_range('2010-01-01 09:00:00', + periods=10, freq='s')) + df + + +**Previous API**: + +You would write a resampling operation that immediately evaluates. If a ``how`` parameter was not provided, it +would default to ``how='mean'``. + +.. code-block:: ipython + + In [6]: df.resample('2s') + Out[6]: + A B C D + 2010-01-01 09:00:00 0.485748 0.447351 0.357096 0.793615 + 2010-01-01 09:00:02 0.820801 0.794317 0.364034 0.531096 + 2010-01-01 09:00:04 0.433985 0.314582 0.424104 0.625733 + 2010-01-01 09:00:06 0.624988 0.609738 0.633165 0.612452 + 2010-01-01 09:00:08 0.510470 0.534317 0.573201 0.806949 + +You could also specify a ``how`` directly + +.. code-block:: ipython + + In [7]: df.resample('2s', how='sum') + Out[7]: + A B C D + 2010-01-01 09:00:00 0.971495 0.894701 0.714192 1.587231 + 2010-01-01 09:00:02 1.641602 1.588635 0.728068 1.062191 + 2010-01-01 09:00:04 0.867969 0.629165 0.848208 1.251465 + 2010-01-01 09:00:06 1.249976 1.219477 1.266330 1.224904 + 2010-01-01 09:00:08 1.020940 1.068634 1.146402 1.613897 + +**New API**: + +Now, you can write ``.resample(..)`` as a 2-stage operation like ``.groupby(...)``, which +yields a ``Resampler``. + +.. ipython:: python + :okwarning: + + r = df.resample('2s') + r + +Downsampling +"""""""""""" + +You can then use this object to perform operations. +These are downsampling operations (going from a higher frequency to a lower one). + +.. ipython:: python + + r.mean() + +.. ipython:: python + + r.sum() + +Furthermore, resample now supports ``getitem`` operations to perform the resample on specific columns. + +.. ipython:: python + + r[['A','C']].mean() + +and ``.aggregate`` type operations. + +.. ipython:: python + + r.agg({'A' : 'mean', 'B' : 'sum'}) + +These accessors can of course, be combined + +.. ipython:: python + + r[['A','B']].agg(['mean','sum']) + +Upsampling +"""""""""" + +.. currentmodule:: pandas.tseries.resample + +Upsampling operations take you from a lower frequency to a higher frequency. These are now +performed with the ``Resampler`` objects with :meth:`~Resampler.backfill`, +:meth:`~Resampler.ffill`, :meth:`~Resampler.fillna` and :meth:`~Resampler.asfreq` methods. + +.. ipython:: python + + s = pd.Series(np.arange(5, dtype='int64'), + index=pd.date_range('2010-01-01', periods=5, freq='Q')) + s + +Previously + +.. code-block:: ipython + + In [6]: s.resample('M', fill_method='ffill') + Out[6]: + 2010-03-31 0 + 2010-04-30 0 + 2010-05-31 0 + 2010-06-30 1 + 2010-07-31 1 + 2010-08-31 1 + 2010-09-30 2 + 2010-10-31 2 + 2010-11-30 2 + 2010-12-31 3 + 2011-01-31 3 + 2011-02-28 3 + 2011-03-31 4 + Freq: M, dtype: int64 + +New API + +.. ipython:: python + + s.resample('M').ffill() + +.. note:: + + In the new API, you can either downsample OR upsample. The prior implementation would allow you to pass an aggregator function (like ``mean``) even though you were upsampling, providing a bit of confusion. + +Previous API will work but with deprecations +"""""""""""""""""""""""""""""""""""""""""""" + +.. warning:: + + This new API for resample includes some internal changes for the prior-to-0.18.0 API, to work with a deprecation warning in most cases, as the resample operation returns a deferred object. We can intercept operations and just do what the (pre 0.18.0) API did (with a warning). Here is a typical use case: + + .. code-block:: ipython + + In [4]: r = df.resample('2s') + + In [6]: r*10 + pandas/tseries/resample.py:80: FutureWarning: .resample() is now a deferred operation + use .resample(...).mean() instead of .resample(...) + + Out[6]: + A B C D + 2010-01-01 09:00:00 4.857476 4.473507 3.570960 7.936154 + 2010-01-01 09:00:02 8.208011 7.943173 3.640340 5.310957 + 2010-01-01 09:00:04 4.339846 3.145823 4.241039 6.257326 + 2010-01-01 09:00:06 6.249881 6.097384 6.331650 6.124518 + 2010-01-01 09:00:08 5.104699 5.343172 5.732009 8.069486 + + However, getting and assignment operations directly on a ``Resampler`` will raise a ``ValueError``: + + .. code-block:: ipython + + In [7]: r.iloc[0] = 5 + ValueError: .resample() is now a deferred operation + use .resample(...).mean() instead of .resample(...) + + There is a situation where the new API can not perform all the operations when using original code. + This code is intending to resample every 2s, take the ``mean`` AND then take the ``min`` of those results. + + .. code-block:: ipython + + In [4]: df.resample('2s').min() + Out[4]: + A 0.433985 + B 0.314582 + C 0.357096 + D 0.531096 + dtype: float64 + + The new API will: + + .. ipython:: python + + df.resample('2s').min() + + The good news is the return dimensions will differ between the new API and the old API, so this should loudly raise + an exception. + + To replicate the original operation + + .. ipython:: python + + df.resample('2s').mean().min() + +Changes to eval +^^^^^^^^^^^^^^^ + +In prior versions, new columns assignments in an ``eval`` expression resulted +in an inplace change to the ``DataFrame``. (:issue:`9297`, :issue:`8664`, :issue:`10486`) + +.. ipython:: python + + df = pd.DataFrame({'a': np.linspace(0, 10, 5), 'b': range(5)}) + df + +.. ipython:: python + :suppress: + + df.eval('c = a + b', inplace=True) + +.. code-block:: ipython + + In [12]: df.eval('c = a + b') + FutureWarning: eval expressions containing an assignment currentlydefault to operating inplace. + This will change in a future version of pandas, use inplace=True to avoid this warning. + + In [13]: df + Out[13]: + a b c + 0 0.0 0 0.0 + 1 2.5 1 3.5 + 2 5.0 2 7.0 + 3 7.5 3 10.5 + 4 10.0 4 14.0 + +In version 0.18.0, a new ``inplace`` keyword was added to choose whether the +assignment should be done inplace or return a copy. + +.. ipython:: python + + df + df.eval('d = c - b', inplace=False) + df + df.eval('d = c - b', inplace=True) + df + +.. warning:: + + For backwards compatibility, ``inplace`` defaults to ``True`` if not specified. + This will change in a future version of pandas. If your code depends on an + inplace assignment you should update to explicitly set ``inplace=True`` + +The ``inplace`` keyword parameter was also added the ``query`` method. + +.. ipython:: python + + df.query('a > 5') + df.query('a > 5', inplace=True) + df + +.. warning:: + + Note that the default value for ``inplace`` in a ``query`` + is ``False``, which is consistent with prior versions. + +``eval`` has also been updated to allow multi-line expressions for multiple +assignments. These expressions will be evaluated one at a time in order. Only +assignments are valid for multi-line expressions. + +.. ipython:: python + + df + df.eval(""" + e = d + a + f = e - 22 + g = f / 2.0""", inplace=True) + df + + +.. _whatsnew_0180.api: + +Other API changes +^^^^^^^^^^^^^^^^^ +- ``DataFrame.between_time`` and ``Series.between_time`` now only parse a fixed set of time strings. Parsing of date strings is no longer supported and raises a ``ValueError``. (:issue:`11818`) + + .. ipython:: python + + s = pd.Series(range(10), pd.date_range('2015-01-01', freq='H', periods=10)) + s.between_time("7:00am", "9:00am") + + This will now raise. + + .. code-block:: ipython + + In [2]: s.between_time('20150101 07:00:00','20150101 09:00:00') + ValueError: Cannot convert arg ['20150101 07:00:00'] to a time. + +- ``.memory_usage()`` now includes values in the index, as does memory_usage in ``.info()`` (:issue:`11597`) +- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg ``utf-8``) in Python 2 with the parameter ``encoding`` (:issue:`7061`) +- ``pandas.merge()`` and ``DataFrame.merge()`` will show a specific error message when trying to merge with an object that is not of type ``DataFrame`` or a subclass (:issue:`12081`) +- ``DataFrame.unstack`` and ``Series.unstack`` now take ``fill_value`` keyword to allow direct replacement of missing values when an unstack results in missing values in the resulting ``DataFrame``. As an added benefit, specifying ``fill_value`` will preserve the data type of the original stacked data. (:issue:`9746`) +- As part of the new API for :ref:`window functions ` and :ref:`resampling `, aggregation functions have been clarified, raising more informative error messages on invalid aggregations. (:issue:`9052`). A full set of examples are presented in :ref:`groupby `. +- Statistical functions for ``NDFrame`` objects (like ``sum(), mean(), min()``) will now raise if non-numpy-compatible arguments are passed in for ``**kwargs`` (:issue:`12301`) +- ``.to_latex`` and ``.to_html`` gain a ``decimal`` parameter like ``.to_csv``; the default is ``'.'`` (:issue:`12031`) +- More helpful error message when constructing a ``DataFrame`` with empty data but with indices (:issue:`8020`) +- ``.describe()`` will now properly handle bool dtype as a categorical (:issue:`6625`) +- More helpful error message with an invalid ``.transform`` with user defined input (:issue:`10165`) +- Exponentially weighted functions now allow specifying alpha directly (:issue:`10789`) and raise ``ValueError`` if parameters violate ``0 < alpha <= 1`` (:issue:`12492`) + +.. _whatsnew_0180.deprecations: + +Deprecations +^^^^^^^^^^^^ + +.. _whatsnew_0180.window_deprecations: + +- The functions ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that + the new suggested syntax includes all of the arguments (even if default) (:issue:`11603`) + + .. code-block:: ipython + + In [1]: s = pd.Series(range(3)) + + In [2]: pd.rolling_mean(s,window=2,min_periods=1) + FutureWarning: pd.rolling_mean is deprecated for Series and + will be removed in a future version, replace with + Series.rolling(min_periods=1,window=2,center=False).mean() + Out[2]: + 0 0.0 + 1 0.5 + 2 1.5 + dtype: float64 + + In [3]: pd.rolling_cov(s, s, window=2) + FutureWarning: pd.rolling_cov is deprecated for Series and + will be removed in a future version, replace with + Series.rolling(window=2).cov(other=) + Out[3]: + 0 NaN + 1 0.5 + 2 0.5 + dtype: float64 + +- The ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`). + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D').mean().rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. + +- ``pd.tseries.frequencies.get_offset_name`` function is deprecated. Use offset's ``.freqstr`` property as alternative (:issue:`11192`) +- ``pandas.stats.fama_macbeth`` routines are deprecated and will be removed in a future version (:issue:`6077`) +- ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var`` routines are deprecated and will be removed in a future version (:issue:`6077`) +- show a ``FutureWarning`` rather than a ``DeprecationWarning`` on using long-time deprecated syntax in ``HDFStore.select``, where the ``where`` clause is not a string-like (:issue:`12027`) + +- The ``pandas.options.display.mpl_style`` configuration has been deprecated + and will be removed in a future version of pandas. This functionality + is better handled by matplotlib's `style sheets`_ (:issue:`11783`). + + +.. _style sheets: http://matplotlib.org/users/style_sheets.html + +.. _whatsnew_0180.float_indexers: + +Removal of deprecated float indexers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In :issue:`4892` indexing with floating point numbers on a non-``Float64Index`` was deprecated (in version 0.14.0). +In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`, :issue:`12333`) + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=[4, 5, 6]) + s + s2 = pd.Series([1, 2, 3], index=list('abc')) + s2 + +Previous behavior: + +.. code-block:: ipython + + # this is label indexing + In [2]: s[5.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[2]: 2 + + # this is positional indexing + In [3]: s.iloc[1.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[3]: 2 + + # this is label indexing + In [4]: s.loc[5.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[4]: 2 + + # .ix would coerce 1.0 to the positional 1, and index + In [5]: s2.ix[1.0] = 10 + FutureWarning: scalar indexers for index type Index should be integers and not floating point + + In [6]: s2 + Out[6]: + a 1 + b 10 + c 3 + dtype: int64 + +New behavior: + +For iloc, getting & setting via a float scalar will always raise. + +.. code-block:: ipython + + In [3]: s.iloc[2.0] + TypeError: cannot do label indexing on with these indexers [2.0] of + +Other indexers will coerce to a like integer for both getting and setting. The ``FutureWarning`` has been dropped for ``.loc``, ``.ix`` and ``[]``. + +.. ipython:: python + + s[5.0] + s.loc[5.0] + +and setting + +.. ipython:: python + + s_copy = s.copy() + s_copy[5.0] = 10 + s_copy + s_copy = s.copy() + s_copy.loc[5.0] = 10 + s_copy + +Positional setting with ``.ix`` and a float indexer will ADD this value to the index, rather than previously setting the value by position. + +.. code-block:: ipython + + In [3]: s2.ix[1.0] = 10 + In [4]: s2 + Out[4]: + a 1 + b 2 + c 3 + 1.0 10 + dtype: int64 + +Slicing will also coerce integer-like floats to integers for a non-``Float64Index``. + +.. ipython:: python + + s.loc[5.0:6] + +Note that for floats that are NOT coercible to ints, the label based bounds will be excluded + +.. ipython:: python + + s.loc[5.1:6] + +Float indexing on a ``Float64Index`` is unchanged. + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=np.arange(3.)) + s[1.0] + s[1.0:2.5] + +.. _whatsnew_0180.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``DataMatrix`` module. This was not imported into the pandas namespace in any event (:issue:`12111`) +- Removal of ``cols`` keyword in favor of ``subset`` in ``DataFrame.duplicated()`` and ``DataFrame.drop_duplicates()`` (:issue:`6680`) +- Removal of the ``read_frame`` and ``frame_query`` (both aliases for ``pd.read_sql``) + and ``write_frame`` (alias of ``to_sql``) functions in the ``pd.io.sql`` namespace, + deprecated since 0.14.0 (:issue:`6292`). +- Removal of the ``order`` keyword from ``.factorize()`` (:issue:`6930`) + +.. _whatsnew_0180.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of ``andrews_curves`` (:issue:`11534`) +- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`) +- Improved performance of ``pandas.concat`` (:issue:`11958`) +- Improved performance of ``StataReader`` (:issue:`11591`) +- Improved performance in construction of ``Categoricals`` with ``Series`` of datetimes containing ``NaT`` (:issue:`12077`) + + +- Improved performance of ISO 8601 date parsing for dates without separators (:issue:`11899`), leading zeros (:issue:`11871`) and with white space preceding the time zone (:issue:`9714`) + + + + +.. _whatsnew_0180.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in ``GroupBy.size`` when data-frame is empty. (:issue:`11699`) +- Bug in ``Period.end_time`` when a multiple of time period is requested (:issue:`11738`) +- Regression in ``.clip`` with tz-aware datetimes (:issue:`11838`) +- Bug in ``date_range`` when the boundaries fell on the frequency (:issue:`11804`, :issue:`12409`) +- Bug in consistency of passing nested dicts to ``.groupby(...).agg(...)`` (:issue:`9052`) +- Accept unicode in ``Timedelta`` constructor (:issue:`11995`) +- Bug in value label reading for ``StataReader`` when reading incrementally (:issue:`12014`) +- Bug in vectorized ``DateOffset`` when ``n`` parameter is ``0`` (:issue:`11370`) +- Compat for numpy 1.11 w.r.t. ``NaT`` comparison changes (:issue:`12049`) +- Bug in ``read_csv`` when reading from a ``StringIO`` in threads (:issue:`11790`) +- Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`) +- Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`) +- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`) +- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`, :issue:`12217`) +- Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`) +- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`) +- Bug in resampling with ``.nunique`` and a sparse index (:issue:`12352`) +- Removed some compiler warnings (:issue:`12471`) +- Work around compat issues with ``boto`` in python 3.5 (:issue:`11915`) +- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`) +- Bug in subtraction of ``Series`` of a single tz-aware ``Timestamp`` (:issue:`12290`) +- Use compat iterators in PY2 to support ``.next()`` (:issue:`12299`) +- Bug in ``Timedelta.round`` with negative values (:issue:`11690`) +- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`) +- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`) +- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`) +- Bug in ``Series.apply`` and ``Series.map`` where ``timedelta64`` was not boxed (:issue:`11349`) +- Bug in ``DataFrame.set_index()`` with tz-aware ``Series`` (:issue:`12358`) + + + +- Bug in subclasses of ``DataFrame`` where ``AttributeError`` did not propagate (:issue:`11808`) +- Bug groupby on tz-aware data where selection not returning ``Timestamp`` (:issue:`11616`) +- Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`) +- Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`) + +- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue:`11880`) +- Bug in ``.resample`` on categorical data with ``TimedeltaIndex`` (:issue:`12169`) + + +- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) +- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`) +- Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`) +- Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`) +- Bug in ``df.plot`` using incorrect colors for bar plots under matplotlib 1.5+ (:issue:`11614`) +- Bug in the ``groupby`` ``plot`` method when using keyword arguments (:issue:`11805`). +- Bug in ``DataFrame.duplicated`` and ``drop_duplicates`` causing spurious matches when setting ``keep=False`` (:issue:`11864`) +- Bug in ``.loc`` result with duplicated key may have ``Index`` with incorrect dtype (:issue:`11497`) +- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`) +- Bug in ``DataFrame.style`` with spurious zeros (:issue:`12134`) +- Bug in ``DataFrame.style`` with integer columns not starting at 0 (:issue:`12125`) +- Bug in ``.style.bar`` may not rendered properly using specific browser (:issue:`11678`) +- Bug in rich comparison of ``Timedelta`` with a ``numpy.array`` of ``Timedelta`` that caused an infinite recursion (:issue:`11835`) +- Bug in ``DataFrame.round`` dropping column index name (:issue:`11986`) +- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`) +- Bug in ``Index`` prevents copying name of passed ``Index``, when a new name is not provided (:issue:`11193`) +- Bug in ``read_excel`` failing to read any non-empty sheets when empty sheets exist and ``sheetname=None`` (:issue:`11711`) +- Bug in ``read_excel`` failing to raise ``NotImplemented`` error when keywords ``parse_dates`` and ``date_parser`` are provided (:issue:`11544`) +- Bug in ``read_sql`` with ``pymysql`` connections failing to return chunked data (:issue:`11522`) +- Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`) +- Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`) +- Bug in ``MultiIndex.drop`` for not lexsorted MultiIndexes (:issue:`12078`) + +- Bug in ``DataFrame`` when masking an empty ``DataFrame`` (:issue:`11859`) + + +- Bug in ``.plot`` potentially modifying the ``colors`` input when the number of columns didn't match the number of series provided (:issue:`12039`). +- Bug in ``Series.plot`` failing when index has a ``CustomBusinessDay`` frequency (:issue:`7222`). +- Bug in ``.to_sql`` for ``datetime.time`` values with sqlite fallback (:issue:`8341`) +- Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`) +- Bug in ``read_excel`` failing to read one empty column (:issue:`12292`, :issue:`9002`) +- Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`) +- Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`) +- Bug in ``.read_csv`` where strings like ``'2E'`` are treated as valid floats (:issue:`12237`) +- Bug in building *pandas* with debugging symbols (:issue:`12123`) + + +- Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`). +- Bug in ``Series`` constructor with read-only data (:issue:`11502`) +- Removed ``pandas._testing.choice()``. Should use ``np.random.choice()``, instead. (:issue:`12386`) +- Bug in ``.loc`` setitem indexer preventing the use of a TZ-aware DatetimeIndex (:issue:`12050`) +- Bug in ``.style`` indexes and MultiIndexes not appearing (:issue:`11655`) +- Bug in ``to_msgpack`` and ``from_msgpack`` which did not correctly serialize or deserialize ``NaT`` (:issue:`12307`). +- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`) +- Bug in ``Timestamp`` constructor where microsecond resolution was lost if HHMMSS were not separated with ':' (:issue:`10041`) +- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`) + +- Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`) + +- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`) +- Bug when initializing categorical series with a scalar value. (:issue:`12336`) +- Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) +- Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`) +- Bug when setting columns of a ``DataFrame`` with duplicate column names (:issue:`12344`) + + +.. _whatsnew_0.18.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.1..v0.18.0 diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst new file mode 100644 index 00000000..7d9008fd --- /dev/null +++ b/doc/source/whatsnew/v0.18.1.rst @@ -0,0 +1,711 @@ +.. _whatsnew_0181: + +Version 0.18.1 (May 3, 2016) +---------------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.18.0 and includes a large number of +bug fixes along with several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- ``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see :ref:`here ` +- ``pd.to_datetime()`` has gained the ability to assemble dates from a ``DataFrame``, see :ref:`here ` +- Method chaining improvements, see :ref:`here `. +- Custom business hour offset, see :ref:`here `. +- Many bug fixes in the handling of ``sparse``, see :ref:`here ` +- Expanded the :ref:`Tutorials section ` with a feature on modern pandas, courtesy of `@TomAugsburger `__. (:issue:`13045`). + + +.. contents:: What's new in v0.18.1 + :local: + :backlinks: none + +.. _whatsnew_0181.new_features: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0181.enhancements.custombusinesshour: + +Custom business hour +^^^^^^^^^^^^^^^^^^^^ + +The ``CustomBusinessHour`` is a mixture of ``BusinessHour`` and ``CustomBusinessDay`` which +allows you to specify arbitrary holidays. For details, +see :ref:`Custom Business Hour ` (:issue:`11514`) + +.. ipython:: python + + from pandas.tseries.offsets import CustomBusinessHour + from pandas.tseries.holiday import USFederalHolidayCalendar + + bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar()) + +Friday before MLK Day + +.. ipython:: python + + import datetime + + dt = datetime.datetime(2014, 1, 17, 15) + + dt + bhour_us + +Tuesday after MLK Day (Monday is skipped because it's a holiday) + +.. ipython:: python + + dt + bhour_us * 2 + +.. _whatsnew_0181.deferred_ops: + +Method ``.groupby(..)`` syntax with window and resample operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see (:issue:`12486`, :issue:`12738`). + +You can now use ``.rolling(..)`` and ``.expanding(..)`` as methods on groupbys. These return another deferred object (similar to what ``.rolling()`` and ``.expanding()`` do on ungrouped pandas objects). You can then operate on these ``RollingGroupby`` objects in a similar manner. + +Previously you would have to do this to get a rolling window mean per-group: + +.. ipython:: python + + df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + df + +.. ipython:: python + + df.groupby("A").apply(lambda x: x.rolling(4).B.mean()) + +Now you can do: + +.. ipython:: python + + df.groupby("A").rolling(4).B.mean() + +For ``.resample(..)`` type of operations, previously you would have to: + +.. ipython:: python + + df = pd.DataFrame( + { + "date": pd.date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + + df + +.. ipython:: python + + df.groupby("group").apply(lambda x: x.resample("1D").ffill()) + +Now you can do: + +.. ipython:: python + + df.groupby("group").resample("1D").ffill() + +.. _whatsnew_0181.enhancements.method_chain: + +Method chaining improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following methods / indexers now accept a ``callable``. It is intended to make +these more useful in method chains, see the :ref:`documentation `. +(:issue:`11485`, :issue:`12533`) + +- ``.where()`` and ``.mask()`` +- ``.loc[]``, ``iloc[]`` and ``.ix[]`` +- ``[]`` indexing + +Methods ``.where()`` and ``.mask()`` +"""""""""""""""""""""""""""""""""""" + +These can accept a callable for the condition and ``other`` +arguments. + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + df.where(lambda x: x > 4, lambda x: x + 10) + +Methods ``.loc[]``, ``.iloc[]``, ``.ix[]`` +"""""""""""""""""""""""""""""""""""""""""" + +These can accept a callable, and a tuple of callable as a slicer. The callable +can return a valid boolean indexer or anything which is valid for these indexer's input. + +.. ipython:: python + + # callable returns bool indexer + df.loc[lambda x: x.A >= 2, lambda x: x.sum() > 10] + + # callable returns list of labels + df.loc[lambda x: [1, 2], lambda x: ["A", "B"]] + +Indexing with ``[]`` +"""""""""""""""""""" + +Finally, you can use a callable in ``[]`` indexing of Series, DataFrame and Panel. +The callable must return a valid input for ``[]`` indexing depending on its +class and index type. + +.. ipython:: python + + df[lambda x: "A"] + +Using these methods / indexers, you can chain data selection operations +without using temporary variable. + +.. ipython:: python + + bb = pd.read_csv("data/baseball.csv", index_col="id") + (bb.groupby(["year", "team"]).sum(numeric_only=True).loc[lambda df: df.r > 100]) + +.. _whatsnew_0181.partial_string_indexing: + +Partial string indexing on ``DatetimeIndex`` when part of a ``MultiIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Partial string indexing now matches on ``DateTimeIndex`` when part of a ``MultiIndex`` (:issue:`10331`) + +.. ipython:: python + + dft2 = pd.DataFrame( + np.random.randn(20, 1), + columns=["A"], + index=pd.MultiIndex.from_product( + [pd.date_range("20130101", periods=10, freq="12H"), ["a", "b"]] + ), + ) + dft2 + dft2.loc["2013-01-05"] + +On other levels + +.. ipython:: python + + idx = pd.IndexSlice + dft2 = dft2.swaplevel(0, 1).sort_index() + dft2 + dft2.loc[idx[:, "2013-01-05"], :] + +.. _whatsnew_0181.enhancements.assembling: + +Assembling datetimes +^^^^^^^^^^^^^^^^^^^^ + +``pd.to_datetime()`` has gained the ability to assemble datetimes from a passed in ``DataFrame`` or a dict. (:issue:`8158`). + +.. ipython:: python + + df = pd.DataFrame( + {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]} + ) + df + +Assembling using the passed frame. + +.. ipython:: python + + pd.to_datetime(df) + +You can pass only the columns that you need to assemble. + +.. ipython:: python + + pd.to_datetime(df[["year", "month", "day"]]) + +.. _whatsnew_0181.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``pd.read_csv()`` now supports ``delim_whitespace=True`` for the Python engine (:issue:`12958`) +- ``pd.read_csv()`` now supports opening ZIP files that contains a single CSV, via extension inference or explicit ``compression='zip'`` (:issue:`12175`) +- ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`) +- ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`). +- ``pd.read_msgpack()`` now supports serializing and de-serializing categoricals with msgpack (:issue:`12573`) +- ``.to_json()`` now supports ``NDFrames`` that contain categorical and sparse data (:issue:`10778`) +- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`). +- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`) +- Added ``.weekday_name`` property as a component to ``DatetimeIndex`` and the ``.dt`` accessor. (:issue:`11128`) + +- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`) + + .. ipython:: python + + idx = pd.Index([1.0, 2.0, 3.0, 4.0], dtype="float") + + # default, allow_fill=True, fill_value=None + idx.take([2, -1]) + idx.take([2, -1], fill_value=True) + +- ``Index`` now supports ``.str.get_dummies()`` which returns ``MultiIndex``, see :ref:`Creating Indicator Variables ` (:issue:`10008`, :issue:`10103`) + + .. ipython:: python + + idx = pd.Index(["a|b", "a|c", "b|c"]) + idx.str.get_dummies("|") + + +- ``pd.crosstab()`` has gained a ``normalize`` argument for normalizing frequency tables (:issue:`12569`). Examples in the updated docs :ref:`here `. +- ``.resample(..).interpolate()`` is now supported (:issue:`12925`) +- ``.isin()`` now accepts passed ``sets`` (:issue:`12988`) + +.. _whatsnew_0181.sparse: + +Sparse changes +~~~~~~~~~~~~~~ + +These changes conform sparse handling to return the correct types and work to make a smoother experience with indexing. + +``SparseArray.take`` now returns a scalar for scalar input, ``SparseArray`` for others. Furthermore, it handles a negative indexer with the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) + +.. code-block:: python + + s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + s.take(0) + s.take([1, 2, 3]) + +- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`) +- Bug in ``SparseArray[]`` indexing with tuples are not handled properly (:issue:`12966`) +- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`) +- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) +- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) +- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`) +- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`) +- Bug in ``SparseArray`` mod raises ``AttributeError`` (:issue:`12910`) +- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`) +- Bug in ``SparseArray`` comparison output may incorrect result or raise ``ValueError`` (:issue:`12971`) +- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) +- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) +- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`) +- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`) +- Bug in ``SparseArray.to_frame()`` results in ``DataFrame``, rather than ``SparseDataFrame`` (:issue:`9850`) +- Bug in ``SparseSeries.value_counts()`` does not count ``fill_value`` (:issue:`6749`) +- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`) +- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`) +- Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` incorrectly handle ``fill_value`` (:issue:`9765`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` may raise ``AttributeError`` (:issue:`12174`) +- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`) + +.. _whatsnew_0181.api: + +API changes +~~~~~~~~~~~ + +.. _whatsnew_0181.api.groubynth: + +Method ``.groupby(..).nth()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The index in ``.groupby(..).nth()`` output is now more consistent when the ``as_index`` argument is passed (:issue:`11039`): + +.. ipython:: python + + df = pd.DataFrame({"A": ["a", "b", "a"], "B": [1, 2, 3]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [3]: df.groupby('A', as_index=True)['B'].nth(0) + Out[3]: + 0 1 + 1 2 + Name: B, dtype: int64 + + In [4]: df.groupby('A', as_index=False)['B'].nth(0) + Out[4]: + 0 1 + 1 2 + Name: B, dtype: int64 + +New behavior: + +.. ipython:: python + + df.groupby("A", as_index=True)["B"].nth(0) + df.groupby("A", as_index=False)["B"].nth(0) + +Furthermore, previously, a ``.groupby`` would always sort, regardless if ``sort=False`` was passed with ``.nth()``. + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.randn(100, 2), columns=["a", "b"]) + df["c"] = np.random.randint(0, 4, 100) + +Previous behavior: + +.. code-block:: ipython + + In [4]: df.groupby('c', sort=True).nth(1) + Out[4]: + a b + c + 0 -0.334077 0.002118 + 1 0.036142 -2.074978 + 2 -0.720589 0.887163 + 3 0.859588 -0.636524 + + In [5]: df.groupby('c', sort=False).nth(1) + Out[5]: + a b + c + 0 -0.334077 0.002118 + 1 0.036142 -2.074978 + 2 -0.720589 0.887163 + 3 0.859588 -0.636524 + +New behavior: + +.. ipython:: python + + df.groupby("c", sort=True).nth(1) + df.groupby("c", sort=False).nth(1) + + +.. _whatsnew_0181.numpy_compatibility: + +NumPy function compatibility +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Compatibility between pandas array-like methods (e.g. ``sum`` and ``take``) and their ``numpy`` +counterparts has been greatly increased by augmenting the signatures of the ``pandas`` methods so +as to accept arguments that can be passed in from ``numpy``, even if they are not necessarily +used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`12687`) + +- ``.searchsorted()`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`) +- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`) + +An example of this signature augmentation is illustrated below: + +.. code-block:: python + + sp = pd.SparseDataFrame([1, 2, 3]) + sp + +Previous behaviour: + +.. code-block:: ipython + + In [2]: np.cumsum(sp, axis=0) + ... + TypeError: cumsum() takes at most 2 arguments (4 given) + +New behaviour: + +.. code-block:: python + + np.cumsum(sp, axis=0) + +.. _whatsnew_0181.apply_resample: + +Using ``.apply`` on GroupBy resampling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Using ``apply`` on resampling groupby operations (using a ``pd.TimeGrouper``) now has the same output types as similar ``apply`` calls on other groupby operations. (:issue:`11742`). + +.. ipython:: python + + df = pd.DataFrame( + {"date": pd.to_datetime(["10/10/2000", "11/10/2000"]), "value": [10, 13]} + ) + df + +Previous behavior: + +.. code-block:: ipython + + In [1]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x.value.sum()) + Out[1]: + ... + TypeError: cannot concatenate a non-NDFrame object + + # Output is a Series + In [2]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x[['value']].sum()) + Out[2]: + date + 2000-10-31 value 10 + 2000-11-30 value 13 + dtype: int64 + +New behavior: + +.. code-block:: ipython + + # Output is a Series + In [55]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x.value.sum()) + Out[55]: + date + 2000-10-31 10 + 2000-11-30 13 + Freq: M, dtype: int64 + + # Output is a DataFrame + In [56]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x[['value']].sum()) + Out[56]: + value + date + 2000-10-31 10 + 2000-11-30 13 + +.. _whatsnew_0181.read_csv_exceptions: + +Changes in ``read_csv`` exceptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +In order to standardize the ``read_csv`` API for both the ``c`` and ``python`` engines, both will now raise an +``EmptyDataError``, a subclass of ``ValueError``, in response to empty columns or header (:issue:`12493`, :issue:`12506`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: import io + + In [2]: df = pd.read_csv(io.StringIO(''), engine='c') + ... + ValueError: No columns to parse from file + + In [3]: df = pd.read_csv(io.StringIO(''), engine='python') + ... + StopIteration + +New behaviour: + +.. code-block:: ipython + + In [1]: df = pd.read_csv(io.StringIO(''), engine='c') + ... + pandas.io.common.EmptyDataError: No columns to parse from file + + In [2]: df = pd.read_csv(io.StringIO(''), engine='python') + ... + pandas.io.common.EmptyDataError: No columns to parse from file + +In addition to this error change, several others have been made as well: + +- ``CParserError`` now sub-classes ``ValueError`` instead of just a ``Exception`` (:issue:`12551`) +- A ``CParserError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when the ``c`` engine cannot parse a column (:issue:`12506`) +- A ``ValueError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when the ``c`` engine encounters a ``NaN`` value in an integer column (:issue:`12506`) +- A ``ValueError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when ``true_values`` is specified, and the ``c`` engine encounters an element in a column containing unencodable bytes (:issue:`12506`) +- ``pandas.parser.OverflowError`` exception has been removed and has been replaced with Python's built-in ``OverflowError`` exception (:issue:`12506`) +- ``pd.read_csv()`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`) + + +.. _whatsnew_0181.api.to_datetime: + +Method ``to_datetime`` error changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'``. Furthermore, an ``OutOfBoundsDateime`` exception will be raised when an out-of-range value is encountered for that unit when ``errors='raise'``. (:issue:`11758`, :issue:`13052`, :issue:`13059`) + +Previous behaviour: + +.. code-block:: ipython + + In [27]: pd.to_datetime(1420043460, unit='s', errors='coerce') + Out[27]: NaT + + In [28]: pd.to_datetime(11111111, unit='D', errors='ignore') + OverflowError: Python int too large to convert to C long + + In [29]: pd.to_datetime(11111111, unit='D', errors='raise') + OverflowError: Python int too large to convert to C long + +New behaviour: + +.. code-block:: ipython + + In [2]: pd.to_datetime(1420043460, unit='s', errors='coerce') + Out[2]: Timestamp('2014-12-31 16:31:00') + + In [3]: pd.to_datetime(11111111, unit='D', errors='ignore') + Out[3]: 11111111 + + In [4]: pd.to_datetime(11111111, unit='D', errors='raise') + OutOfBoundsDatetime: cannot convert input with unit 'D' + +.. _whatsnew_0181.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``.swaplevel()`` for ``Series``, ``DataFrame``, ``Panel``, and ``MultiIndex`` now features defaults for its first two parameters ``i`` and ``j`` that swap the two innermost levels of the index. (:issue:`12934`) +- ``.searchsorted()`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`) +- ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`) +- ``Series.apply`` for category dtype now applies the passed function to each of the ``.categories`` (and not the ``.codes``), and returns a ``category`` dtype if possible (:issue:`12473`) +- ``read_csv`` will now raise a ``TypeError`` if ``parse_dates`` is neither a boolean, list, or dictionary (matches the doc-string) (:issue:`5636`) +- The default for ``.query()/.eval()`` is now ``engine=None``, which will use ``numexpr`` if it's installed; otherwise it will fallback to the ``python`` engine. This mimics the pre-0.18.1 behavior if ``numexpr`` is installed (and which, previously, if numexpr was not installed, ``.query()/.eval()`` would raise). (:issue:`12749`) +- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) +- Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`) +- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`) +- ``pd.merge()`` and ``DataFrame.join()`` will show a ``UserWarning`` when merging/joining a single- with a multi-leveled dataframe (:issue:`9455`, :issue:`12219`) +- Compat with ``scipy`` > 0.17 for deprecated ``piecewise_polynomial`` interpolation method; support for the replacement ``from_derivatives`` method (:issue:`12887`) + +.. _whatsnew_0181.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The method name ``Index.sym_diff()`` is deprecated and can be replaced by ``Index.symmetric_difference()`` (:issue:`12591`) +- The method name ``Categorical.sort()`` is deprecated in favor of ``Categorical.sort_values()`` (:issue:`12882`) + + + + + + + + +.. _whatsnew_0181.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved speed of SAS reader (:issue:`12656`, :issue:`12961`) +- Performance improvements in ``.groupby(..).cumcount()`` (:issue:`11039`) +- Improved memory usage in ``pd.read_csv()`` when using ``skiprows=an_integer`` (:issue:`13005`) +- Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`) +- Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`). +- Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`) +- Improved performance of ``to_numeric`` if input is numeric dtype (:issue:`12777`) +- Improved performance of sparse arithmetic with ``IntIndex`` (:issue:`13036`) + + + + + + + + +.. _whatsnew_0181.bug_fixes: + +Bug fixes +~~~~~~~~~ +- ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`) +- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:`12713`) +- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`) +- Bug in ``.resample(...).count()`` with a ``PeriodIndex`` always raising a ``TypeError`` (:issue:`12774`) +- Bug in ``.resample(...)`` with a ``PeriodIndex`` casting to a ``DatetimeIndex`` when empty (:issue:`12868`) +- Bug in ``.resample(...)`` with a ``PeriodIndex`` when resampling to an existing frequency (:issue:`12770`) +- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) +- Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`) +- Bugs in concatenation with a coercible dtype was too aggressive, resulting in different dtypes in output formatting when an object was longer than ``display.max_rows`` (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`, :issue:`12211`) +- Bug in ``float_format`` option with option not being validated as a callable. (:issue:`12706`) +- Bug in ``GroupBy.filter`` when ``dropna=False`` and no groups fulfilled the criteria (:issue:`12768`) +- Bug in ``__name__`` of ``.cum*`` functions (:issue:`12021`) +- Bug in ``.astype()`` of a ``Float64Inde/Int64Index`` to an ``Int64Index`` (:issue:`12881`) +- Bug in round tripping an integer based index in ``.to_json()/.read_json()`` when ``orient='index'`` (the default) (:issue:`12866`) +- Bug in plotting ``Categorical`` dtypes cause error when attempting stacked bar plot (:issue:`13019`) +- Compat with >= ``numpy`` 1.11 for ``NaT`` comparisons (:issue:`12969`) +- Bug in ``.drop()`` with a non-unique ``MultiIndex``. (:issue:`12701`) +- Bug in ``.concat`` of datetime tz-aware and naive DataFrames (:issue:`12467`) +- Bug in correctly raising a ``ValueError`` in ``.resample(..).fillna(..)`` when passing a non-string (:issue:`12952`) +- Bug fixes in various encoding and header processing issues in ``pd.read_sas()`` (:issue:`12659`, :issue:`12654`, :issue:`12647`, :issue:`12809`) +- Bug in ``pd.crosstab()`` where would silently ignore ``aggfunc`` if ``values=None`` (:issue:`12569`). +- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`). +- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`). +- Segfault in ``to_json`` when attempting to serialise a ``DataFrame`` or ``Series`` with non-ndarray values; now supports serialization of ``category``, ``sparse``, and ``datetime64[ns, tz]`` dtypes (:issue:`10778`). +- Bug in ``DataFrame.to_json`` with unsupported dtype not passed to default handler (:issue:`12554`). +- Bug in ``.align`` not returning the sub-class (:issue:`12983`) +- Bug in aligning a ``Series`` with a ``DataFrame`` (:issue:`13037`) +- Bug in ``ABCPanel`` in which ``Panel4D`` was not being considered as a valid instance of this generic type (:issue:`12810`) + + +- Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`) + +- Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`) +- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum ``timedeltas`` as recognized by pandas. See the :ref:`documentation `. (:issue:`12727`) +- Bug in ``.quantile()`` with interpolation may coerce to ``float`` unexpectedly (:issue:`12772`) +- Bug in ``.quantile()`` with empty ``Series`` may return scalar rather than empty ``Series`` (:issue:`12772`) + + +- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`) +- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`) + +- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`) +- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`) + + + +- Bug in ``pd.read_csv()`` with the ``c`` engine when specifying ``skiprows`` with newlines in quoted items (:issue:`10911`, :issue:`12775`) +- Bug in ``DataFrame`` timezone lost when assigning tz-aware datetime ``Series`` with alignment (:issue:`12981`) + + + + +- Bug in ``.value_counts()`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`) +- Bug in ``Series.value_counts()`` loses name if its dtype is ``category`` (:issue:`12835`) +- Bug in ``Series.value_counts()`` loses timezone info (:issue:`12835`) +- Bug in ``Series.value_counts(normalize=True)`` with ``Categorical`` raises ``UnboundLocalError`` (:issue:`12835`) +- Bug in ``Panel.fillna()`` ignoring ``inplace=True`` (:issue:`12633`) +- Bug in ``pd.read_csv()`` when specifying ``names``, ``usecols``, and ``parse_dates`` simultaneously with the ``c`` engine (:issue:`9755`) +- Bug in ``pd.read_csv()`` when specifying ``delim_whitespace=True`` and ``lineterminator`` simultaneously with the ``c`` engine (:issue:`12912`) +- Bug in ``Series.rename``, ``DataFrame.rename`` and ``DataFrame.rename_axis`` not treating ``Series`` as mappings to relabel (:issue:`12623`). +- Clean in ``.rolling.min`` and ``.rolling.max`` to enhance dtype handling (:issue:`12373`) +- Bug in ``groupby`` where complex types are coerced to float (:issue:`12902`) +- Bug in ``Series.map`` raises ``TypeError`` if its dtype is ``category`` or tz-aware ``datetime`` (:issue:`12473`) + +- Bugs on 32bit platforms for some test comparisons (:issue:`12972`) +- Bug in index coercion when falling back from ``RangeIndex`` construction (:issue:`12893`) +- Better error message in window functions when invalid argument (e.g. a float window) is passed (:issue:`12669`) + +- Bug in slicing subclassed ``DataFrame`` defined to return subclassed ``Series`` may return normal ``Series`` (:issue:`11559`) + + +- Bug in ``.str`` accessor methods may raise ``ValueError`` if input has ``name`` and the result is ``DataFrame`` or ``MultiIndex`` (:issue:`12617`) +- Bug in ``DataFrame.last_valid_index()`` and ``DataFrame.first_valid_index()`` on empty frames (:issue:`12800`) + + +- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`) +- Bug in ``PeriodIndex.resample`` where name not propagated (:issue:`12769`) + +- Bug in ``date_range`` ``closed`` keyword and timezones (:issue:`12684`). + +- Bug in ``pd.concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`) +- Bug in ``pd.concat`` did not handle empty ``Series`` properly (:issue:`11082`) + +- Bug in ``.plot.bar`` alignment when ``width`` is specified with ``int`` (:issue:`12979`) + + +- Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue:`12723`) + +- Bug in ``pd.read_html()`` when using bs4 flavor and parsing table with a header and only one column (:issue:`9178`) + +- Bug in ``.pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``.pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`) +- Bug in ``pd.crosstab()`` when ``margins=True`` and ``dropna=False`` which raised (:issue:`12642`) + +- Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`) + +- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`) +- Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`) +- ``pd.read_excel()`` now accepts column names associated with keyword argument ``names`` (:issue:`12870`) +- Bug in ``pd.to_numeric()`` with ``Index`` returns ``np.ndarray``, rather than ``Index`` (:issue:`12777`) +- Bug in ``pd.to_numeric()`` with datetime-like may raise ``TypeError`` (:issue:`12777`) +- Bug in ``pd.to_numeric()`` with scalar raises ``ValueError`` (:issue:`12777`) + + +.. _whatsnew_0.18.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.0..v0.18.1 diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst new file mode 100644 index 00000000..f2fdd23a --- /dev/null +++ b/doc/source/whatsnew/v0.19.0.rst @@ -0,0 +1,1603 @@ +.. _whatsnew_0190: + +Version 0.19.0 (October 2, 2016) +-------------------------------- + +{{ header }} + +This is a major release from 0.18.1 and includes number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` +- ``.rolling()`` is now time-series aware, see :ref:`here ` +- :func:`read_csv` now supports parsing ``Categorical`` data, see :ref:`here ` +- A function :func:`union_categorical` has been added for combining categoricals, see :ref:`here ` +- ``PeriodIndex`` now has its own ``period`` dtype, and changed to be more consistent with other ``Index`` classes. See :ref:`here ` +- Sparse data structures gained enhanced support of ``int`` and ``bool`` dtypes, see :ref:`here ` +- Comparison operations with ``Series`` no longer ignores the index, see :ref:`here ` for an overview of the API changes. +- Introduction of a pandas development API for utility functions, see :ref:`here `. +- Deprecation of ``Panel4D`` and ``PanelND``. We recommend to represent these types of n-dimensional data with the `xarray package `__. +- Removal of the previously deprecated modules ``pandas.io.data``, ``pandas.io.wb``, ``pandas.tools.rplot``. + +.. warning:: + + pandas >= 0.19.0 will no longer silence numpy ufunc warnings upon import, see :ref:`here `. + +.. contents:: What's new in v0.19.0 + :local: + :backlinks: none + +.. _whatsnew_0190.new_features: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0190.enhancements.asof_merge: + +Function ``merge_asof`` for asof-style time-series joining +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A long-time requested feature has been added through the :func:`merge_asof` function, to +support asof style joining of time-series (:issue:`1870`, :issue:`13695`, :issue:`13709`, :issue:`13902`). Full documentation is +:ref:`here `. + +The :func:`merge_asof` performs an asof merge, which is similar to a left-join +except that we match on nearest key rather than equal keys. + +.. ipython:: python + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + left + right + +We typically want to match exactly when possible, and use the most +recent value otherwise. + +.. ipython:: python + + pd.merge_asof(left, right, on="a") + +We can also match rows ONLY with prior data, and not an exact match. + +.. ipython:: python + + pd.merge_asof(left, right, on="a", allow_exact_matches=False) + + +In a typical time-series example, we have ``trades`` and ``quotes`` and we want to ``asof-join`` them. +This also illustrates using the ``by`` parameter to group data before merging. + +.. ipython:: python + + trades = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.048", + "20160525 13:30:00.049", + "20160525 13:30:00.072", + "20160525 13:30:00.075", + ] + ), + "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"], + "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], + }, + columns=["time", "ticker", "bid", "ask"], + ) + +.. ipython:: python + + trades + quotes + +An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and +in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except +that forward filling happens automatically taking the most recent non-NaN value. + +.. ipython:: python + + pd.merge_asof(trades, quotes, on="time", by="ticker") + +This returns a merged DataFrame with the entries in the same order as the original left +passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. + +.. _whatsnew_0190.enhancements.rolling_ts: + +Method ``.rolling()`` is now time-series aware +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``.rolling()`` objects are now time-series aware and can accept a time-series offset (or convertible) for the ``window`` argument (:issue:`13327`, :issue:`12995`). +See the full documentation :ref:`here `. + +.. ipython:: python + + dft = pd.DataFrame( + {"B": [0, 1, 2, np.nan, 4]}, + index=pd.date_range("20130101 09:00:00", periods=5, freq="s"), + ) + dft + +This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. + +.. ipython:: python + + dft.rolling(2).sum() + dft.rolling(2, min_periods=1).sum() + +Specifying an offset allows a more intuitive specification of the rolling frequency. + +.. ipython:: python + + dft.rolling("2s").sum() + +Using a non-regular, but still monotonic index, rolling with an integer window does not impart any special calculation. + +.. ipython:: python + + + dft = pd.DataFrame( + {"B": [0, 1, 2, np.nan, 4]}, + index=pd.Index( + [ + pd.Timestamp("20130101 09:00:00"), + pd.Timestamp("20130101 09:00:02"), + pd.Timestamp("20130101 09:00:03"), + pd.Timestamp("20130101 09:00:05"), + pd.Timestamp("20130101 09:00:06"), + ], + name="foo", + ), + ) + + dft + dft.rolling(2).sum() + +Using the time-specification generates variable windows for this sparse data. + +.. ipython:: python + + dft.rolling("2s").sum() + +Furthermore, we now allow an optional ``on`` parameter to specify a column (rather than the +default of the index) in a DataFrame. + +.. ipython:: python + + dft = dft.reset_index() + dft + dft.rolling("2s", on="foo").sum() + +.. _whatsnew_0190.enhancements.read_csv_dupe_col_names_support: + +Method ``read_csv`` has improved support for duplicate column names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. ipython:: python + :suppress: + + from io import StringIO + +:ref:`Duplicate column names ` are now supported in :func:`read_csv` whether +they are in the file or passed in as the ``names`` parameter (:issue:`7160`, :issue:`9424`) + +.. ipython:: python + + data = "0,1,2\n3,4,5" + names = ["a", "b", "a"] + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.read_csv(StringIO(data), names=names) + Out[2]: + a b a + 0 2 1 2 + 1 5 4 5 + +The first ``a`` column contained the same data as the second ``a`` column, when it should have +contained the values ``[0, 3]``. + +**New behavior**: + +.. ipython:: python + :okexcept: + + pd.read_csv(StringIO(data), names=names) + + +.. _whatsnew_0190.enhancements.read_csv_categorical: + +Method ``read_csv`` supports parsing ``Categorical`` directly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`read_csv` function now supports parsing a ``Categorical`` column when +specified as a dtype (:issue:`10153`). Depending on the structure of the data, +this can result in a faster parse time and lower memory usage compared to +converting to ``Categorical`` after parsing. See the io :ref:`docs here `. + +.. ipython:: python + + data = """ + col1,col2,col3 + a,b,1 + a,b,2 + c,d,3 + """ + + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data)).dtypes + pd.read_csv(StringIO(data), dtype="category").dtypes + +Individual columns can be parsed as a ``Categorical`` using a dict specification + +.. ipython:: python + + pd.read_csv(StringIO(data), dtype={"col1": "category"}).dtypes + +.. note:: + + The resulting categories will always be parsed as strings (object dtype). + If the categories are numeric they can be converted using the + :func:`to_numeric` function, or as appropriate, another converter + such as :func:`to_datetime`. + + .. ipython:: python + :okwarning: + + df = pd.read_csv(StringIO(data), dtype="category") + df.dtypes + df["col3"] + df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories) + df["col3"] + +.. _whatsnew_0190.enhancements.union_categoricals: + +Categorical concatenation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- A function :func:`union_categoricals` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`, :issue:`13763`, :issue:`13846`, :issue:`14173`) + + .. ipython:: python + + from pandas.api.types import union_categoricals + + a = pd.Categorical(["b", "c"]) + b = pd.Categorical(["a", "b"]) + union_categoricals([a, b]) + +- ``concat`` and ``append`` now can concat ``category`` dtypes with different ``categories`` as ``object`` dtype (:issue:`13524`) + + .. ipython:: python + + s1 = pd.Series(["a", "b"], dtype="category") + s2 = pd.Series(["b", "c"], dtype="category") + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.concat([s1, s2]) + ValueError: incompatible categories in categorical concat + +**New behavior**: + +.. ipython:: python + + pd.concat([s1, s2]) + +.. _whatsnew_0190.enhancements.semi_month_offsets: + +Semi-month offsets +^^^^^^^^^^^^^^^^^^ + +pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS'). +These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively. +(:issue:`1543`) + +.. ipython:: python + + from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin + +**SemiMonthEnd**: + +.. ipython:: python + + pd.Timestamp("2016-01-01") + SemiMonthEnd() + + pd.date_range("2015-01-01", freq="SM", periods=4) + +**SemiMonthBegin**: + +.. ipython:: python + + pd.Timestamp("2016-01-01") + SemiMonthBegin() + + pd.date_range("2015-01-01", freq="SMS", periods=4) + +Using the anchoring suffix, you can also specify the day of month to use instead of the 15th. + +.. ipython:: python + + pd.date_range("2015-01-01", freq="SMS-16", periods=4) + + pd.date_range("2015-01-01", freq="SM-14", periods=4) + +.. _whatsnew_0190.enhancements.index: + +New Index methods +^^^^^^^^^^^^^^^^^ + +The following methods and options are added to ``Index``, to be more consistent with the ``Series`` and ``DataFrame`` API. + +``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`) + +.. ipython:: python + + idx = pd.Index(["a", "b", "c"]) + idx.where([True, False, True]) + + +``Index`` now supports ``.dropna()`` to exclude missing values (:issue:`6194`) + +.. ipython:: python + + idx = pd.Index([1, 2, np.nan, 4]) + idx.dropna() + +For ``MultiIndex``, values are dropped if any level is missing by default. Specifying +``how='all'`` only drops values where all levels are missing. + +.. ipython:: python + + midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], [1, 2, np.nan, np.nan]]) + midx + midx.dropna() + midx.dropna(how="all") + +``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see the :ref:`docs here ` (:issue:`10008`, :issue:`13156`) + +.. ipython:: python + + idx = pd.Index(["a1a2", "b1", "c1"]) + idx.str.extractall(r"[ab](?P\d)") + +``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) + +.. _whatsnew_0190.gbq: + +Google BigQuery enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the `docs `__ for more details (:issue:`13615`). +- The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). + +.. _whatsnew_0190.errstate: + +Fine-grained NumPy errstate +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported. pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as ``NaN`` s. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner, only around where these operations are actually used in the pandas code base. (:issue:`13109`, :issue:`13145`) + +After upgrading pandas, you may see *new* ``RuntimeWarnings`` being issued from your code. These are likely legitimate, and the underlying cause likely existed in the code when using previous versions of pandas that simply silenced the warning. Use `numpy.errstate `__ around the source of the ``RuntimeWarning`` to control how these conditions are handled. + +.. _whatsnew_0190.get_dummies_dtypes: + +Method ``get_dummies`` now returns integer dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``pd.get_dummies`` function now returns dummy-encoded columns as small integers, rather than floats (:issue:`8725`). This should provide an improved memory footprint. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.get_dummies(['a', 'b', 'a', 'c']).dtypes + + Out[1]: + a float64 + b float64 + c float64 + dtype: object + +**New behavior**: + +.. ipython:: python + + pd.get_dummies(["a", "b", "a", "c"]).dtypes + + +.. _whatsnew_0190.enhancements.to_numeric_downcast: + +Downcast values to smallest possible dtype in ``to_numeric`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.to_numeric()`` now accepts a ``downcast`` parameter, which will downcast the data if possible to smallest specified numerical dtype (:issue:`13352`) + +.. ipython:: python + + s = ["1", 2, 3] + pd.to_numeric(s, downcast="unsigned") + pd.to_numeric(s, downcast="integer") + +.. _whatsnew_0190.dev_api: + +pandas development API +^^^^^^^^^^^^^^^^^^^^^^ + +As part of making pandas API more uniform and accessible in the future, we have created a standard +sub-package of pandas, ``pandas.api`` to hold public API's. We are starting by exposing type +introspection functions in ``pandas.api.types``. More sub-packages and officially sanctioned API's +will be published in future versions of pandas (:issue:`13147`, :issue:`13634`) + +The following are now part of this API: + +.. ipython:: python + + import pprint + from pandas.api import types + + funcs = [f for f in dir(types) if not f.startswith("_")] + pprint.pprint(funcs) + +.. note:: + + Calling these functions from the internal module ``pandas.core.common`` will now show a ``DeprecationWarning`` (:issue:`13990`) + + +.. _whatsnew_0190.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``Timestamp`` can now accept positional and keyword parameters similar to :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) + + .. ipython:: python + + pd.Timestamp(2012, 1, 1) + + pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) + +- The ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`) + + .. ipython:: python + + df = pd.DataFrame( + {"date": pd.date_range("2015-01-01", freq="W", periods=5), "a": np.arange(5)}, + index=pd.MultiIndex.from_arrays( + [[1, 2, 3, 4, 5], pd.date_range("2015-01-01", freq="W", periods=5)], + names=["v", "d"], + ), + ) + df + df.resample("M", on="date")[["a"]].sum() + df.resample("M", level="d")[["a"]].sum() + +- The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). +- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) +- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) +- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the + ``decimal`` (:issue:`12933`), ``na_filter`` (:issue:`13321`) and the ``memory_map`` option (:issue:`13381`). +- Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) +- The ``pd.read_html()`` has gained support for the ``na_values``, ``converters``, ``keep_default_na`` options (:issue:`13461`) +- ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`) +- ``DataFrame`` has gained the ``.asof()`` method to return the last non-NaN values according to the selected subset (:issue:`13358`) +- The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) +- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) +- ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) +- ``DataFrame.to_sql()`` now allows a single value as the SQL type for all columns (:issue:`11886`). +- ``Series.append`` now supports the ``ignore_index`` option (:issue:`13677`) +- ``.to_stata()`` and ``StataWriter`` can now write variable labels to Stata dta files using a dictionary to make column names to labels (:issue:`13535`, :issue:`13536`) +- ``.to_stata()`` and ``StataWriter`` will automatically convert ``datetime64[ns]`` columns to Stata format ``%tc``, rather than raising a ``ValueError`` (:issue:`12259`) +- ``read_stata()`` and ``StataReader`` raise with a more explicit error message when reading Stata files with repeated value labels when ``convert_categoricals=True`` (:issue:`13923`) +- ``DataFrame.style`` will now render sparsified MultiIndexes (:issue:`11655`) +- ``DataFrame.style`` will now show column level names (e.g. ``DataFrame.columns.names``) (:issue:`13775`) +- ``DataFrame`` has gained support to re-order the columns based on the values + in a row using ``df.sort_values(by='...', axis=1)`` (:issue:`10806`) + + .. ipython:: python + + df = pd.DataFrame({"A": [2, 7], "B": [3, 5], "C": [4, 8]}, index=["row1", "row2"]) + df + df.sort_values(by="row2", axis=1) + +- Added documentation to :ref:`I/O` regarding the perils of reading in columns with mixed dtypes and how to handle it (:issue:`13746`) +- :meth:`~DataFrame.to_html` now has a ``border`` argument to control the value in the opening ```` tag. The default is the value of the ``html.border`` option, which defaults to 1. This also affects the notebook HTML repr, but since Jupyter's CSS includes a border-width attribute, the visual effect is the same. (:issue:`11563`). +- Raise ``ImportError`` in the sql functions when ``sqlalchemy`` is not installed and a connection string is used (:issue:`11920`). +- Compatibility with matplotlib 2.0. Older versions of pandas should also work with matplotlib 2.0 (:issue:`13333`) +- ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) +- ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`) +- The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) +- :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) +- ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). +- ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) +- ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) +- ``Series`` and ``Index`` now support ``divmod`` which will return a tuple of + series or indices. This behaves like a standard binary operator with regards + to broadcasting rules (:issue:`14208`). + + +.. _whatsnew_0190.api: + +API changes +~~~~~~~~~~~ + +``Series.tolist()`` will now return Python types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series.tolist()`` will now return Python types in the output, mimicking NumPy ``.tolist()`` behavior (:issue:`10904`) + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + +**Previous behavior**: + +.. code-block:: ipython + + In [7]: type(s.tolist()[0]) + Out[7]: + + +**New behavior**: + +.. ipython:: python + + type(s.tolist()[0]) + +.. _whatsnew_0190.api.series_ops: + +``Series`` operators for different indexes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Following ``Series`` operators have been changed to make all operators consistent, +including ``DataFrame`` (:issue:`1134`, :issue:`4581`, :issue:`13538`) + +- ``Series`` comparison operators now raise ``ValueError`` when ``index`` are different. +- ``Series`` logical operators align both ``index`` of left and right hand side. + +.. warning:: + Until 0.18.1, comparing ``Series`` with the same length, would succeed even if + the ``.index`` are different (the result ignores ``.index``). As of 0.19.0, this will raises ``ValueError`` to be more strict. This section also describes how to keep previous behavior or align different indexes, using the flexible comparison methods like ``.eq``. + + +As a result, ``Series`` and ``DataFrame`` operators behave as below: + +Arithmetic operators +"""""""""""""""""""" + +Arithmetic operators align both ``index`` (no changes). + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=list("ABC")) + s2 = pd.Series([2, 2, 2], index=list("ABD")) + s1 + s2 + + df1 = pd.DataFrame([1, 2, 3], index=list("ABC")) + df2 = pd.DataFrame([2, 2, 2], index=list("ABD")) + df1 + df2 + +Comparison operators +"""""""""""""""""""" + +Comparison operators raise ``ValueError`` when ``.index`` are different. + +**Previous behavior** (``Series``): + +``Series`` compared values ignoring the ``.index`` as long as both had the same length: + +.. code-block:: ipython + + In [1]: s1 == s2 + Out[1]: + A False + B True + C False + dtype: bool + +**New behavior** (``Series``): + +.. code-block:: ipython + + In [2]: s1 == s2 + Out[2]: + ValueError: Can only compare identically-labeled Series objects + +.. note:: + + To achieve the same result as previous versions (compare values based on locations ignoring ``.index``), compare both ``.values``. + + .. ipython:: python + + s1.values == s2.values + + If you want to compare ``Series`` aligning its ``.index``, see flexible comparison methods section below: + + .. ipython:: python + + s1.eq(s2) + +**Current behavior** (``DataFrame``, no change): + +.. code-block:: ipython + + In [3]: df1 == df2 + Out[3]: + ValueError: Can only compare identically-labeled DataFrame objects + +Logical operators +""""""""""""""""" + +Logical operators align both ``.index`` of left and right hand side. + +**Previous behavior** (``Series``), only left hand side ``index`` was kept: + +.. code-block:: ipython + + In [4]: s1 = pd.Series([True, False, True], index=list('ABC')) + In [5]: s2 = pd.Series([True, True, True], index=list('ABD')) + In [6]: s1 & s2 + Out[6]: + A True + B False + C False + dtype: bool + +**New behavior** (``Series``): + +.. ipython:: python + + s1 = pd.Series([True, False, True], index=list("ABC")) + s2 = pd.Series([True, True, True], index=list("ABD")) + s1 & s2 + +.. note:: + ``Series`` logical operators fill a ``NaN`` result with ``False``. + +.. note:: + To achieve the same result as previous versions (compare values based on only left hand side index), you can use ``reindex_like``: + + .. ipython:: python + + s1 & s2.reindex_like(s1) + +**Current behavior** (``DataFrame``, no change): + +.. ipython:: python + + df1 = pd.DataFrame([True, False, True], index=list("ABC")) + df2 = pd.DataFrame([True, True, True], index=list("ABD")) + df1 & df2 + +Flexible comparison methods +""""""""""""""""""""""""""" + +``Series`` flexible comparison methods like ``eq``, ``ne``, ``le``, ``lt``, ``ge`` and ``gt`` now align both ``index``. Use these operators if you want to compare two ``Series`` +which has the different ``index``. + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) + s2 = pd.Series([2, 2, 2], index=["b", "c", "d"]) + s1.eq(s2) + s1.ge(s2) + +Previously, this worked the same as comparison operators (see above). + +.. _whatsnew_0190.api.promote: + +``Series`` type promotion on assignment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``Series`` will now correctly promote its dtype for assignment with incompat values to the current dtype (:issue:`13234`) + + +.. ipython:: python + :okwarning: + + s = pd.Series() + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: s["a"] = pd.Timestamp("2016-01-01") + + In [3]: s["b"] = 3.0 + TypeError: invalid type promotion + +**New behavior**: + +.. ipython:: python + + s["a"] = pd.Timestamp("2016-01-01") + s["b"] = 3.0 + s + s.dtype + +.. _whatsnew_0190.api.to_datetime_coerce: + +Function ``.to_datetime()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``. + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.to_datetime([1, 'foo'], errors='coerce') + Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None) + +**Current behavior**: + +This will now convert integers/floats with the default unit of ``ns``. + +.. ipython:: python + + pd.to_datetime([1, "foo"], errors="coerce") + +Bug fixes related to ``.to_datetime()``: + +- Bug in ``pd.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`). +- Bug in ``pd.to_datetime()`` when passing invalid data types (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) +- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, and ``int16`` dtypes (:issue:`13451`) +- Bug in ``pd.to_datetime()`` raise ``AttributeError`` with ``NaN`` and the other string is not valid when ``errors='ignore'`` (:issue:`12424`) +- Bug in ``pd.to_datetime()`` did not cast floats correctly when ``unit`` was specified, resulting in truncated datetime (:issue:`13834`) + +.. _whatsnew_0190.api.merging: + +Merging changes +^^^^^^^^^^^^^^^ + +Merging will now preserve the dtype of the join keys (:issue:`8596`) + +.. ipython:: python + + df1 = pd.DataFrame({"key": [1], "v1": [10]}) + df1 + df2 = pd.DataFrame({"key": [1, 2], "v1": [20, 30]}) + df2 + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.merge(df1, df2, how='outer') + Out[5]: + key v1 + 0 1.0 10.0 + 1 1.0 20.0 + 2 2.0 30.0 + + In [6]: pd.merge(df1, df2, how='outer').dtypes + Out[6]: + key float64 + v1 float64 + dtype: object + +**New behavior**: + +We are able to preserve the join keys + +.. ipython:: python + + pd.merge(df1, df2, how="outer") + pd.merge(df1, df2, how="outer").dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast, which is unchanged from previous. + +.. ipython:: python + + pd.merge(df1, df2, how="outer", on="key") + pd.merge(df1, df2, how="outer", on="key").dtypes + +.. _whatsnew_0190.api.describe: + +Method ``.describe()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Percentile identifiers in the index of a ``.describe()`` output will now be rounded to the least precision that keeps them distinct (:issue:`13104`) + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4]) + df = pd.DataFrame([0, 1, 2, 3, 4]) + +**Previous behavior**: + +The percentiles were rounded to at most one decimal place, which could raise ``ValueError`` for a data frame if the percentiles were duplicated. + +.. code-block:: ipython + + In [3]: s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[3]: + count 5.000000 + mean 2.000000 + std 1.581139 + min 0.000000 + 0.0% 0.000400 + 0.1% 0.002000 + 0.1% 0.004000 + 50% 2.000000 + 99.9% 3.996000 + 100.0% 3.998000 + 100.0% 3.999600 + max 4.000000 + dtype: float64 + + In [4]: df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[4]: + ... + ValueError: cannot reindex from a duplicate axis + +**New behavior**: + +.. ipython:: python + + s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + +Furthermore: + +- Passing duplicated ``percentiles`` will now raise a ``ValueError``. +- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) + +.. _whatsnew_0190.api.period: + +``Period`` changes +^^^^^^^^^^^^^^^^^^ + +The ``PeriodIndex`` now has ``period`` dtype +"""""""""""""""""""""""""""""""""""""""""""" + +``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a +pandas extension dtype like ``category`` or the :ref:`timezone aware dtype ` (``datetime64[ns, tz]``) (:issue:`13941`). +As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype: + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pi = pd.PeriodIndex(['2016-08-01'], freq='D') + + In [2]: pi + Out[2]: PeriodIndex(['2016-08-01'], dtype='int64', freq='D') + + In [3]: pd.api.types.is_integer_dtype(pi) + Out[3]: True + + In [4]: pi.dtype + Out[4]: dtype('int64') + +**New behavior**: + +.. ipython:: python + + pi = pd.PeriodIndex(["2016-08-01"], freq="D") + pi + pd.api.types.is_integer_dtype(pi) + pd.api.types.is_period_dtype(pi) + pi.dtype + type(pi.dtype) + +.. _whatsnew_0190.api.periodnat: + +``Period('NaT')`` now returns ``pd.NaT`` +"""""""""""""""""""""""""""""""""""""""" + +Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`, :issue:`13582`) + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.Period('NaT', freq='D') + Out[5]: Period('NaT', 'D') + +**New behavior**: + +These result in ``pd.NaT`` without providing ``freq`` option. + +.. ipython:: python + + pd.Period("NaT") + pd.Period(None) + + +To be compatible with ``Period`` addition and subtraction, ``pd.NaT`` now supports addition and subtraction with ``int``. Previously it raised ``ValueError``. + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.NaT + 1 + ... + ValueError: Cannot add integral value to Timestamp without freq. + +**New behavior**: + +.. ipython:: python + + pd.NaT + 1 + pd.NaT - 1 + +``PeriodIndex.values`` now returns array of ``Period`` object +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +``.values`` is changed to return an array of ``Period`` objects, rather than an array +of integers (:issue:`13988`). + +**Previous behavior**: + +.. code-block:: ipython + + In [6]: pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + In [7]: pi.values + Out[7]: array([492, 493]) + +**New behavior**: + +.. ipython:: python + + pi = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + pi.values + + +.. _whatsnew_0190.api.setops: + +Index ``+`` / ``-`` no longer used for set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Addition and subtraction of the base Index type and of DatetimeIndex +(not the numeric index types) +previously performed set operations (set union and difference). This +behavior was already deprecated since 0.15.0 (in favor using the specific +``.union()`` and ``.difference()`` methods), and is now disabled. When +possible, ``+`` and ``-`` are now used for element-wise operations, for +example for concatenating strings or subtracting datetimes +(:issue:`8227`, :issue:`14127`). + +Previous behavior: + +.. code-block:: ipython + + In [1]: pd.Index(['a', 'b']) + pd.Index(['a', 'c']) + FutureWarning: using '+' to provide set union with Indexes is deprecated, use '|' or .union() + Out[1]: Index(['a', 'b', 'c'], dtype='object') + +**New behavior**: the same operation will now perform element-wise addition: + +.. ipython:: python + + pd.Index(["a", "b"]) + pd.Index(["a", "c"]) + +Note that numeric Index objects already performed element-wise operations. +For example, the behavior of adding two integer Indexes is unchanged. +The base ``Index`` is now made consistent with this behavior. + +.. ipython:: python + + pd.Index([1, 2, 3]) + pd.Index([2, 3, 4]) + +Further, because of this change, it is now possible to subtract two +DatetimeIndex objects resulting in a TimedeltaIndex: + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: (pd.DatetimeIndex(['2016-01-01', '2016-01-02']) + ...: - pd.DatetimeIndex(['2016-01-02', '2016-01-03'])) + FutureWarning: using '-' to provide set differences with datetimelike Indexes is deprecated, use .difference() + Out[1]: DatetimeIndex(['2016-01-01'], dtype='datetime64[ns]', freq=None) + +**New behavior**: + +.. ipython:: python + + ( + pd.DatetimeIndex(["2016-01-01", "2016-01-02"]) + - pd.DatetimeIndex(["2016-01-02", "2016-01-03"]) + ) + + +.. _whatsnew_0190.api.difference: + +``Index.difference`` and ``.symmetric_difference`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Index.difference`` and ``Index.symmetric_difference`` will now, more consistently, treat ``NaN`` values as any other values. (:issue:`13514`) + +.. ipython:: python + + idx1 = pd.Index([1, 2, 3, np.nan]) + idx2 = pd.Index([0, 1, np.nan]) + +**Previous behavior**: + +.. code-block:: ipython + + In [3]: idx1.difference(idx2) + Out[3]: Float64Index([nan, 2.0, 3.0], dtype='float64') + + In [4]: idx1.symmetric_difference(idx2) + Out[4]: Float64Index([0.0, nan, 2.0, 3.0], dtype='float64') + +**New behavior**: + +.. ipython:: python + + idx1.difference(idx2) + idx1.symmetric_difference(idx2) + +.. _whatsnew_0190.api.unique_index: + +``Index.unique`` consistently returns ``Index`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Index.unique()`` now returns unique values as an +``Index`` of the appropriate ``dtype``. (:issue:`13395`). +Previously, most ``Index`` classes returned ``np.ndarray``, and ``DatetimeIndex``, +``TimedeltaIndex`` and ``PeriodIndex`` returned ``Index`` to keep metadata like timezone. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.Index([1, 2, 3]).unique() + Out[1]: array([1, 2, 3]) + + In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', + ...: '2011-01-03'], tz='Asia/Tokyo').unique() + Out[2]: + DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', + '2011-01-03 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq=None) + +**New behavior**: + +.. ipython:: python + + pd.Index([1, 2, 3]).unique() + pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], tz="Asia/Tokyo" + ).unique() + +.. _whatsnew_0190.api.multiindex: + +``MultiIndex`` constructors, ``groupby`` and ``set_index`` preserve categorical dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``MultiIndex.from_arrays`` and ``MultiIndex.from_product`` will now preserve categorical dtype +in ``MultiIndex`` levels (:issue:`13743`, :issue:`13854`). + +.. ipython:: python + + cat = pd.Categorical(["a", "b"], categories=list("bac")) + lvl1 = ["foo", "bar"] + midx = pd.MultiIndex.from_arrays([cat, lvl1]) + midx + +**Previous behavior**: + +.. code-block:: ipython + + In [4]: midx.levels[0] + Out[4]: Index(['b', 'a', 'c'], dtype='object') + + In [5]: midx.get_level_values[0] + Out[5]: Index(['a', 'b'], dtype='object') + +**New behavior**: the single level is now a ``CategoricalIndex``: + +.. ipython:: python + + midx.levels[0] + midx.get_level_values(0) + +An analogous change has been made to ``MultiIndex.from_product``. +As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes in indexes + +.. ipython:: python + + df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat}) + df_grouped = df.groupby(by=["A", "C"]).first() + df_set_idx = df.set_index(["A", "C"]) + +**Previous behavior**: + +.. code-block:: ipython + + In [11]: df_grouped.index.levels[1] + Out[11]: Index(['b', 'a', 'c'], dtype='object', name='C') + In [12]: df_grouped.reset_index().dtypes + Out[12]: + A int64 + C object + B float64 + dtype: object + + In [13]: df_set_idx.index.levels[1] + Out[13]: Index(['b', 'a', 'c'], dtype='object', name='C') + In [14]: df_set_idx.reset_index().dtypes + Out[14]: + A int64 + C object + B int64 + dtype: object + +**New behavior**: + +.. ipython:: python + + df_grouped.index.levels[1] + df_grouped.reset_index().dtypes + + df_set_idx.index.levels[1] + df_set_idx.reset_index().dtypes + +.. _whatsnew_0190.api.autogenerated_chunksize_index: + +Function ``read_csv`` will progressively enumerate chunks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :func:`read_csv` is called with ``chunksize=n`` and without specifying an index, +each chunk used to have an independently generated index from ``0`` to ``n-1``. +They are now given instead a progressive index, starting from ``0`` for the first chunk, +from ``n`` for the second, and so on, so that, when concatenated, they are identical to +the result of calling :func:`read_csv` without the ``chunksize=`` argument +(:issue:`12185`). + +.. ipython:: python + + data = "A,B\n0,1\n2,3\n4,5\n6,7" + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.concat(pd.read_csv(StringIO(data), chunksize=2)) + Out[2]: + A B + 0 0 1 + 1 2 3 + 0 4 5 + 1 6 7 + +**New behavior**: + +.. ipython:: python + + pd.concat(pd.read_csv(StringIO(data), chunksize=2)) + +.. _whatsnew_0190.sparse: + +Sparse changes +^^^^^^^^^^^^^^ + +These changes allow pandas to handle sparse data with more dtypes, and for work to make a smoother experience with data handling. + +Types ``int64`` and ``bool`` support enhancements +""""""""""""""""""""""""""""""""""""""""""""""""" + +Sparse data structures now gained enhanced support of ``int64`` and ``bool`` ``dtype`` (:issue:`667`, :issue:`13849`). + +Previously, sparse data were ``float64`` dtype by default, even if all inputs were of ``int`` or ``bool`` dtype. You had to specify ``dtype`` explicitly to create sparse data with ``int64`` dtype. Also, ``fill_value`` had to be specified explicitly because the default was ``np.nan`` which doesn't appear in ``int64`` or ``bool`` data. + +.. code-block:: ipython + + In [1]: pd.SparseArray([1, 2, 0, 0]) + Out[1]: + [1.0, 2.0, 0.0, 0.0] + Fill: nan + IntIndex + Indices: array([0, 1, 2, 3], dtype=int32) + + # specifying int64 dtype, but all values are stored in sp_values because + # fill_value default is np.nan + In [2]: pd.SparseArray([1, 2, 0, 0], dtype=np.int64) + Out[2]: + [1, 2, 0, 0] + Fill: nan + IntIndex + Indices: array([0, 1, 2, 3], dtype=int32) + + In [3]: pd.SparseArray([1, 2, 0, 0], dtype=np.int64, fill_value=0) + Out[3]: + [1, 2, 0, 0] + Fill: 0 + IntIndex + Indices: array([0, 1], dtype=int32) + +As of v0.19.0, sparse data keeps the input dtype, and uses more appropriate ``fill_value`` defaults (``0`` for ``int64`` dtype, ``False`` for ``bool`` dtype). + +.. ipython:: python + :okwarning: + + pd.SparseArray([1, 2, 0, 0], dtype=np.int64) + pd.SparseArray([True, False, False, False]) + +See the :ref:`docs ` for more details. + +Operators now preserve dtypes +""""""""""""""""""""""""""""" + +- Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`) + +.. code-block:: python + + s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) + s.dtype + + s + 1 + +- Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`) + +.. code-block:: python + + s = pd.SparseSeries([1.0, 0.0, 2.0, 0.0], fill_value=0) + s + s.astype(np.int64) + +``astype`` fails if data contains values which cannot be converted to specified ``dtype``. +Note that the limitation is applied to ``fill_value`` which default is ``np.nan``. + +.. code-block:: ipython + + In [7]: pd.SparseSeries([1., np.nan, 2., np.nan], fill_value=np.nan).astype(np.int64) + Out[7]: + ValueError: unable to coerce current fill_value nan to int64 dtype + +Other sparse fixes +"""""""""""""""""" + +- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`) +- ``SparseArray`` with ``bool`` dtype now supports logical (bool) operators (:issue:`14000`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) +- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) +- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) +- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) +- Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) +- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) +- Bug in single row slicing on multi-type ``SparseDataFrame`` s, types were previously forced to float (:issue:`13917`) +- Bug in ``SparseSeries`` slicing changes integer dtype to float (:issue:`8292`) +- Bug in ``SparseDataFarme`` comparison ops may raise ``TypeError`` (:issue:`13001`) +- Bug in ``SparseDataFarme.isnull`` raises ``ValueError`` (:issue:`8276`) +- Bug in ``SparseSeries`` representation with ``bool`` dtype may raise ``IndexError`` (:issue:`13110`) +- Bug in ``SparseSeries`` and ``SparseDataFrame`` of ``bool`` or ``int64`` dtype may display its values like ``float64`` dtype (:issue:`13110`) +- Bug in sparse indexing using ``SparseArray`` with ``bool`` dtype may return incorrect result (:issue:`13985`) +- Bug in ``SparseArray`` created from ``SparseSeries`` may lose ``dtype`` (:issue:`13999`) +- Bug in ``SparseSeries`` comparison with dense returns normal ``Series`` rather than ``SparseSeries`` (:issue:`13999`) + + +.. _whatsnew_0190.indexer_dtype: + +Indexer dtype changes +^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This change only affects 64 bit python running on Windows, and only affects relatively advanced + indexing operations + +Methods such as ``Index.get_indexer`` that return an indexer array, coerce that array to a "platform int", so that it can be +directly used in 3rd party library operations like ``numpy.take``. Previously, a platform int was defined as ``np.int_`` +which corresponds to a C integer, but the correct type, and what is being used now, is ``np.intp``, which corresponds +to the C integer size that can hold a pointer (:issue:`3033`, :issue:`13972`). + +These types are the same on many platform, but for 64 bit python on Windows, +``np.int_`` is 32 bits, and ``np.intp`` is 64 bits. Changing this behavior improves performance for many +operations on that platform. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int32') + +**New behavior**: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int64') + + +.. _whatsnew_0190.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``Timestamp.to_pydatetime`` will issue a ``UserWarning`` when ``warn=True``, and the instance has a non-zero number of nanoseconds, previously this would print a message to stdout (:issue:`14101`). +- ``Series.unique()`` with datetime and timezone now returns return array of ``Timestamp`` with timezone (:issue:`13565`). +- ``Panel.to_sparse()`` will raise a ``NotImplementedError`` exception when called (:issue:`13778`). +- ``Index.reshape()`` will raise a ``NotImplementedError`` exception when called (:issue:`12882`). +- ``.filter()`` enforces mutual exclusion of the keyword arguments (:issue:`12399`). +- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64 (:issue:`12388`). +- An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`). +- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior (:issue:`13299`). +- Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) +- ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels (:issue:`13222`). +- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) +- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) +- Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`) +- ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`) +- Faceted boxplots from ``DataFrame.boxplot(by=col)`` now return a ``Series`` when ``return_type`` is not None. Previously these returned an ``OrderedDict``. Note that when ``return_type=None``, the default, these still return a 2-D NumPy array (:issue:`12216`, :issue:`7096`). +- ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`) +- ``pd.read_csv()``, ``pd.read_table()``, and ``pd.read_hdf()`` raise the builtin ``FileNotFoundError`` exception for Python 3.x when called on a nonexistent file; this is back-ported as ``IOError`` in Python 2.x (:issue:`14086`) +- More informative exceptions are passed through the csv parser. The exception type would now be the original exception type instead of ``CParserError`` (:issue:`13652`). +- ``pd.read_csv()`` in the C engine will now issue a ``ParserWarning`` or raise a ``ValueError`` when ``sep`` encoded is more than one character long (:issue:`14065`) +- ``DataFrame.values`` will now return ``float64`` with a ``DataFrame`` of mixed ``int64`` and ``uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`) +- ``.groupby.groups`` will now return a dictionary of ``Index`` objects, rather than a dictionary of ``np.ndarray`` or ``lists`` (:issue:`14293`) + +.. _whatsnew_0190.deprecations: + +Deprecations +~~~~~~~~~~~~ +- ``Series.reshape`` and ``Categorical.reshape`` have been deprecated and will be removed in a subsequent release (:issue:`12882`, :issue:`12882`) +- ``PeriodIndex.to_datetime`` has been deprecated in favor of ``PeriodIndex.to_timestamp`` (:issue:`8254`) +- ``Timestamp.to_datetime`` has been deprecated in favor of ``Timestamp.to_pydatetime`` (:issue:`8254`) +- ``Index.to_datetime`` and ``DatetimeIndex.to_datetime`` have been deprecated in favor of ``pd.to_datetime`` (:issue:`8254`) +- ``pandas.core.datetools`` module has been deprecated and will be removed in a subsequent release (:issue:`14094`) +- ``SparseList`` has been deprecated and will be removed in a future version (:issue:`13784`) +- ``DataFrame.to_html()`` and ``DataFrame.to_latex()`` have dropped the ``colSpace`` parameter in favor of ``col_space`` (:issue:`13857`) +- ``DataFrame.to_sql()`` has deprecated the ``flavor`` parameter, as it is superfluous when SQLAlchemy is not installed (:issue:`13611`) +- Deprecated ``read_csv`` keywords: + + - ``compact_ints`` and ``use_unsigned`` have been deprecated and will be removed in a future version (:issue:`13320`) + - ``buffer_lines`` has been deprecated and will be removed in a future version (:issue:`13360`) + - ``as_recarray`` has been deprecated and will be removed in a future version (:issue:`13373`) + - ``skip_footer`` has been deprecated in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`) + +- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) +- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) +- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here ` (:issue:`736`) +- ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property (:issue:`13727`) +- ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package `__. pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion (:issue:`13564`). +- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead (:issue:`13874`) +- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq`` (:issue:`13874`) +- ``Categorical.from_array`` has been deprecated and will be removed in a future version (:issue:`13854`) + +.. _whatsnew_0190.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``SparsePanel`` class has been removed (:issue:`13778`) +- The ``pd.sandbox`` module has been removed in favor of the external library ``pandas-qt`` (:issue:`13670`) +- The ``pandas.io.data`` and ``pandas.io.wb`` modules are removed in favor of + the `pandas-datareader package `__ (:issue:`13724`). +- The ``pandas.tools.rplot`` module has been removed in favor of + the `seaborn package `__ (:issue:`13855`) +- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`) +- ``DataFrame.to_dict()`` has dropped the ``outtype`` parameter in favor of ``orient`` (:issue:`13627`, :issue:`8486`) +- ``pd.Categorical`` has dropped setting of the ``ordered`` attribute directly in favor of the ``set_ordered`` method (:issue:`13671`) +- ``pd.Categorical`` has dropped the ``levels`` attribute in favor of ``categories`` (:issue:`8376`) +- ``DataFrame.to_sql()`` has dropped the ``mysql`` option for the ``flavor`` parameter (:issue:`13611`) +- ``Panel.shift()`` has dropped the ``lags`` parameter in favor of ``periods`` (:issue:`14041`) +- ``pd.Index`` has dropped the ``diff`` method in favor of ``difference`` (:issue:`13669`) +- ``pd.DataFrame`` has dropped the ``to_wide`` method in favor of ``to_panel`` (:issue:`14039`) +- ``Series.to_csv`` has dropped the ``nanRep`` parameter in favor of ``na_rep`` (:issue:`13804`) +- ``Series.xs``, ``DataFrame.xs``, ``Panel.xs``, ``Panel.major_xs``, and ``Panel.minor_xs`` have dropped the ``copy`` parameter (:issue:`13781`) +- ``str.split`` has dropped the ``return_type`` parameter in favor of ``expand`` (:issue:`13701`) +- Removal of the legacy time rules (offset aliases), deprecated since 0.17.0 (this has been alias since 0.8.0) (:issue:`13590`, :issue:`13868`). Now legacy time rules raises ``ValueError``. For the list of currently supported offsets, see :ref:`here `. +- The default value for the ``return_type`` parameter for ``DataFrame.plot.box`` and ``DataFrame.boxplot`` changed from ``None`` to ``"axes"``. These methods will now return a matplotlib axes by default instead of a dictionary of artists. See :ref:`here ` (:issue:`6581`). +- The ``tquery`` and ``uquery`` functions in the ``pandas.io.sql`` module are removed (:issue:`5950`). + + +.. _whatsnew_0190.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`) +- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`) +- Improved performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`) +- Improved performance of float64 hash table operations, fixing some very slow indexing and groupby operations in python 3 (:issue:`13166`, :issue:`13334`) +- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`) +- Improved performance of ``Index`` and ``Series`` ``.duplicated`` (:issue:`10235`) +- Improved performance of ``Index.difference`` (:issue:`12044`) +- Improved performance of ``RangeIndex.is_monotonic_increasing`` and ``is_monotonic_decreasing`` (:issue:`13749`) +- Improved performance of datetime string parsing in ``DatetimeIndex`` (:issue:`13692`) +- Improved performance of hashing ``Period`` (:issue:`12817`) +- Improved performance of ``factorize`` of datetime with timezone (:issue:`13750`) +- Improved performance of by lazily creating indexing hashtables on larger Indexes (:issue:`14266`) +- Improved performance of ``groupby.groups`` (:issue:`14293`) +- Unnecessary materializing of a MultiIndex when introspecting for memory usage (:issue:`14308`) + +.. _whatsnew_0190.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`) +- Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`) +- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`) +- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) +- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) +- Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (:issue:`9161`, :issue:`13544`) +- Bug in ``DataFrame`` assignment with an object-dtyped ``Index`` where the resultant column is mutable to the original object. (:issue:`13522`) +- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) +- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) +- Bug in ``Categorical.from_codes()`` where an unhelpful error was raised when an invalid ``ordered`` parameter was passed in (:issue:`14058`) +- Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow was not being caught (:issue:`14068`) +- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) +- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) +- Bug in calling ``.memory_usage()`` on object which doesn't implement (:issue:`12924`) +- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()`` ); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`) +- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`) +- Bug where empty ``Series`` were incorrectly coerced in datetime-like numeric operations (:issue:`13844`) +- Bug in ``Categorical`` constructor when passed a ``Categorical`` containing datetimes with timezones (:issue:`14190`) +- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) +- Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) +- Bug in ``DatetimeIndex`` and ``Period`` subtraction raises ``ValueError`` or ``AttributeError`` rather than ``TypeError`` (:issue:`13078`) +- Bug in ``Index`` and ``Series`` created with ``NaN`` and ``NaT`` mixed data may not have ``datetime64`` dtype (:issue:`13324`) +- Bug in ``Index`` and ``Series`` may ignore ``np.datetime64('nat')`` and ``np.timdelta64('nat')`` to infer dtype (:issue:`13324`) +- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) +- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) +- Bug in ``groupby(..).apply(..)`` when the passed function returns scalar values per group (:issue:`13468`). +- Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) +- Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) +- Bug in ``.tz_localize`` with ``dateutil.tz.tzlocal`` may return incorrect result (:issue:`13583`) +- Bug in ``DatetimeTZDtype`` dtype with ``dateutil.tz.tzlocal`` cannot be regarded as valid dtype (:issue:`13583`) +- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) +- Bug in ``.rolling()`` that allowed a negative integer window in construction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) +- Bug in ``Series`` indexing with tuple-valued data and a numeric index (:issue:`13509`) +- Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`) +- Bug in ranking ``Series`` which could result in segfaults (:issue:`13445`) +- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) +- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) +- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) +- Bug in ``pd.concat`` and ``.append`` may coerces ``datetime64`` and ``timedelta`` to ``object`` dtype containing python built-in ``datetime`` or ``timedelta`` rather than ``Timestamp`` or ``Timedelta`` (:issue:`13626`) +- Bug in ``PeriodIndex.append`` may raises ``AttributeError`` when the result is ``object`` dtype (:issue:`13221`) +- Bug in ``CategoricalIndex.append`` may accept normal ``list`` (:issue:`13626`) +- Bug in ``pd.concat`` and ``.append`` with the same timezone get reset to UTC (:issue:`7795`) +- Bug in ``Series`` and ``DataFrame`` ``.append`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13626`) +- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) +- Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`) +- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) +- Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`) +- Bug in ``MultiIndex.from_arrays`` which didn't check for input array lengths matching (:issue:`13599`) +- Bug in ``cartesian_product`` and ``MultiIndex.from_product`` which may raise with empty input arrays (:issue:`12258`) +- Bug in ``pd.read_csv()`` which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`) +- Bug in ``pd.read_csv()`` which caused errors to be raised when a dictionary containing scalars is passed in for ``na_values`` (:issue:`12224`) +- Bug in ``pd.read_csv()`` which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` which raised errors when a numpy array was passed in for ``usecols`` (:issue:`12546`) +- Bug in ``pd.read_csv()`` where the index columns were being incorrectly parsed when parsed as dates with a ``thousands`` parameter (:issue:`14066`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which ``NaN`` values weren't being detected after data was converted to numeric values (:issue:`13314`) +- Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a ``tempfile.TemporaryFile`` on Windows with Python 3 (:issue:`13398`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) +- Bug in ``pd.read_csv()`` in the C engine where the NULL character was not being parsed as NULL (:issue:`14012`) +- Bug in ``pd.read_csv()`` with ``engine='c'`` in which NULL ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) +- Bug in ``pd.read_csv()`` with ``engine='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`) +- Bug in ``pd.read_csv()`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`) +- Bug in ``pd.read_csv()``, where aliases for utf-xx (e.g. UTF-xx, UTF_xx, utf_xx) raised UnicodeDecodeError (:issue:`13549`) +- Bug in ``pd.read_csv``, ``pd.read_table``, ``pd.read_fwf``, ``pd.read_stata`` and ``pd.read_sas`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`) +- Bug in ``StataReader``, ``StataWriter``, ``XportReader`` and ``SAS7BDATReader`` where a file was not properly closed when an error was raised. (:issue:`13940`) +- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`) +- Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`) +- Bug in checking for any null objects in a ``TimedeltaIndex``, which always returned ``True`` (:issue:`13603`) +- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`) +- Bug ``Series.isnull()`` and ``Series.notnull()`` ignore ``Period('NaT')`` (:issue:`13737`) +- Bug ``Series.fillna()`` and ``Series.dropna()`` don't affect to ``Period('NaT')`` (:issue:`13737` +- Bug in ``.fillna(value=np.nan)`` incorrectly raises ``KeyError`` on a ``category`` dtyped ``Series`` (:issue:`14021`) +- Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) +- Bug in ``.resample(..)`` where incorrect warnings were triggered by IPython introspection (:issue:`13618`) +- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) +- Bug in ``Series`` comparison may output incorrect result if rhs contains ``NaT`` (:issue:`9005`) +- Bug in ``Series`` and ``Index`` comparison may output incorrect result if it contains ``NaT`` with ``object`` dtype (:issue:`13592`) +- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) +- Bug in ``Period`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) +- Bug in ``pd.set_eng_float_format()`` that would prevent NaN and Inf from formatting (:issue:`11981`) +- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) +- Clean some compile time warnings in datetime parsing (:issue:`13607`) +- Bug in ``factorize`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13750`) +- Bug in ``.set_index`` raises ``AmbiguousTimeError`` if new index contains DST boundary and multi levels (:issue:`12920`) +- Bug in ``.shift`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13926`) +- Bug in ``pd.read_hdf()`` returns incorrect result when a ``DataFrame`` with a ``categorical`` column and a query which doesn't match any values (:issue:`13792`) +- Bug in ``.iloc`` when indexing with a non lexsorted MultiIndex (:issue:`13797`) +- Bug in ``.loc`` when indexing with date strings in a reverse sorted ``DatetimeIndex`` (:issue:`14316`) +- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) +- Bug in ``.combine_first`` may return incorrect ``dtype`` (:issue:`7630`, :issue:`10567`) +- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) +- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) +- Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`) +- Bug in ``DataFrame.iterrows()``, not yielding a ``Series`` subclasse if defined (:issue:`13977`) +- Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) +- Bug in invalid ``Timedelta`` arithmetic and comparison may raise ``ValueError`` rather than ``TypeError`` (:issue:`13624`) +- Bug in invalid datetime parsing in ``to_datetime`` and ``DatetimeIndex`` may raise ``TypeError`` rather than ``ValueError`` (:issue:`11169`, :issue:`11287`) +- Bug in ``Index`` created with tz-aware ``Timestamp`` and mismatched ``tz`` option incorrectly coerces timezone (:issue:`13692`) +- Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`) +- Bug in ``Series`` when setting a slice with a ``np.timedelta64`` (:issue:`14155`) +- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`) +- Bug in ``Index`` may ignore specified ``datetime64`` or ``timedelta64`` passed as ``dtype`` (:issue:`13981`) +- Bug in ``RangeIndex`` can be created without no arguments rather than raises ``TypeError`` (:issue:`13793`) +- Bug in ``.value_counts()`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`) +- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`) +- Bug in ``Series`` creation with ``np.datetime64`` which has other unit than ``ns`` as ``object`` dtype results in incorrect values (:issue:`13876`) +- Bug in ``resample`` with timedelta data where data was casted to float (:issue:`13119`). +- Bug in ``pd.isnull()`` ``pd.notnull()`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) +- Bug in ``pd.merge()`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) +- Bug in ``HDFStore``/``read_hdf()`` discarded ``DatetimeIndex.name`` if ``tz`` was set (:issue:`13884`) +- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) +- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) +- Bug in ``df.groupby(...)[...]`` where getitem with ``Int64Index`` raised an error (:issue:`13731`) +- Bug in the CSS classes assigned to ``DataFrame.style`` for index names. Previously they were assigned ``"col_heading level col"`` where ``n`` was the number of levels + 1. Now they are assigned ``"index_name level"``, where ``n`` is the correct level for that MultiIndex. +- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) +- Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`) +- Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`) +- Bug in subtract tz-aware ``datetime.datetime`` from tz-aware ``datetime64`` series (:issue:`14088`) +- Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) +- Bug in invalid frequency offset string like "D1", "-2-3H" may not raise ``ValueError`` (:issue:`13930`) +- Bug in ``concat`` and ``groupby`` for hierarchical frames with ``RangeIndex`` levels (:issue:`13542`). +- Bug in ``Series.str.contains()`` for Series containing only ``NaN`` values of ``object`` dtype (:issue:`14171`) +- Bug in ``agg()`` function on groupby dataframe changes dtype of ``datetime64[ns]`` column to ``float64`` (:issue:`12821`) +- Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`) +- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`) +- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`) +- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) +- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) +- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) +- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`) +- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) +- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) +- Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. +- Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) +- Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) +- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) +- Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) + + +.. _whatsnew_0.19.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.1..v0.19.0 diff --git a/doc/source/whatsnew/v0.19.1.rst b/doc/source/whatsnew/v0.19.1.rst new file mode 100644 index 00000000..6ff3fb69 --- /dev/null +++ b/doc/source/whatsnew/v0.19.1.rst @@ -0,0 +1,77 @@ +.. _whatsnew_0191: + +Version 0.19.1 (November 3, 2016) +--------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release from 0.19.0 and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.19.1 + :local: + :backlinks: none + + +.. _whatsnew_0191.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Fixed performance regression in factorization of ``Period`` data (:issue:`14338`) +- Fixed performance regression in ``Series.asof(where)`` when ``where`` is a scalar (:issue:`14461`) +- Improved performance in ``DataFrame.asof(where)`` when ``where`` is a scalar (:issue:`14461`) +- Improved performance in ``.to_json()`` when ``lines=True`` (:issue:`14408`) +- Improved performance in certain types of ``loc`` indexing with a MultiIndex (:issue:`14551`). + + +.. _whatsnew_0191.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Source installs from PyPI will now again work without ``cython`` installed, as in previous versions (:issue:`14204`) +- Compat with Cython 0.25 for building (:issue:`14496`) +- Fixed regression where user-provided file handles were closed in ``read_csv`` (c engine) (:issue:`14418`). +- Fixed regression in ``DataFrame.quantile`` when missing values where present in some columns (:issue:`14357`). +- Fixed regression in ``Index.difference`` where the ``freq`` of a ``DatetimeIndex`` was incorrectly set (:issue:`14323`) +- Added back ``pandas.core.common.array_equivalent`` with a deprecation warning (:issue:`14555`). +- Bug in ``pd.read_csv`` for the C engine in which quotation marks were improperly parsed in skipped rows (:issue:`14459`) +- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`) +- Fixed regression in ``Index.append`` when categorical indices were appended (:issue:`14545`). +- Fixed regression in ``pd.DataFrame`` where constructor fails when given dict with ``None`` value (:issue:`14381`) +- Fixed regression in ``DatetimeIndex._maybe_cast_slice_bound`` when index is empty (:issue:`14354`). +- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`) +- Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`) +- Correctly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`) +- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`). +- Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`) +- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`). +- Bug in ``DataFrame.insert`` where multiple calls with duplicate columns can fail (:issue:`14291`) +- ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`) +- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`) +- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) +- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`) +- Bug in ``pd.concat`` with dataframes heterogeneous in length and tuple ``keys`` (:issue:`14438`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) +- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) +- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue:`14327`) +- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) +- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` + is not scalar and ``values`` is not specified (:issue:`14380`) + + +.. _whatsnew_0.19.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.0..v0.19.1 diff --git a/doc/source/whatsnew/v0.19.2.rst b/doc/source/whatsnew/v0.19.2.rst new file mode 100644 index 00000000..db9d9e65 --- /dev/null +++ b/doc/source/whatsnew/v0.19.2.rst @@ -0,0 +1,98 @@ +.. _whatsnew_0192: + +Version 0.19.2 (December 24, 2016) +---------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.19.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Compatibility with Python 3.6 +- Added a `Pandas Cheat Sheet `__. (:issue:`13202`). + + +.. contents:: What's new in v0.19.2 + :local: + :backlinks: none + + +.. _whatsnew_0192.enhancements: + +Enhancements +~~~~~~~~~~~~ + +The ``pd.merge_asof()``, added in 0.19.0, gained some improvements: + +- ``pd.merge_asof()`` gained ``left_index``/``right_index`` and ``left_by``/``right_by`` arguments (:issue:`14253`) +- ``pd.merge_asof()`` can take multiple columns in ``by`` parameter and has specialized dtypes for better performance (:issue:`13936`) + + +.. _whatsnew_0192.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance regression with ``PeriodIndex`` (:issue:`14822`) +- Performance regression in indexing with getitem (:issue:`14930`) +- Improved performance of ``.replace()`` (:issue:`12745`) +- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`) + + +.. _whatsnew_0192.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Compat with python 3.6 for pickling of some offsets (:issue:`14685`) +- Compat with python 3.6 for some indexing exception types (:issue:`14684`, :issue:`14689`) +- Compat with python 3.6 for deprecation warnings in the test suite (:issue:`14681`) +- Compat with python 3.6 for Timestamp pickles (:issue:`14689`) +- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`) +- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`) +- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`) +- Bug in ``pd.read_csv`` in which column indices for a dict-like ``na_values`` were not being respected (:issue:`14203`) +- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`) +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`) +- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally. +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`) +- Bug in ``.fillna()`` in which timezone aware datetime64 values were incorrectly rounded (:issue:`14872`) +- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`) +- Bug in ``pd.cut`` with negative values and a single bin (:issue:`14652`) +- Bug in ``pd.to_numeric`` where a 0 was not unsigned on a ``downcast='unsigned'`` argument (:issue:`14401`) +- Bug in plotting regular and irregular timeseries using shared axes + (``sharex=True`` or ``ax.twinx()``) (:issue:`13341`, :issue:`14322`). +- Bug in not propagating exceptions in parsing invalid datetimes, noted in python 3.6 (:issue:`14561`) +- Bug in resampling a ``DatetimeIndex`` in local TZ, covering a DST change, which would raise ``AmbiguousTimeError`` (:issue:`14682`) +- Bug in indexing that transformed ``RecursionError`` into ``KeyError`` or ``IndexingError`` (:issue:`14554`) +- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`) +- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`) +- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`) +- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`) +- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`) +- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`) +- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`) +- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`) +- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`). +- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`) +- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`) +- Bug in ``pd.merge_asof()`` could not handle timezone-aware DatetimeIndex when a tolerance was specified (:issue:`14844`) +- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) +- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`) +- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`) + + +.. _whatsnew_0.19.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.1..v0.19.2 diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst new file mode 100644 index 00000000..faf4b1ac --- /dev/null +++ b/doc/source/whatsnew/v0.20.0.rst @@ -0,0 +1,1800 @@ +.. _whatsnew_0200: + +Version 0.20.1 (May 5, 2017) +---------------------------- + +{{ header }} + +This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- The ``.ix`` indexer has been deprecated, see :ref:`here ` +- ``Panel`` has been deprecated, see :ref:`here ` +- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` +- Improved user API when grouping by index levels in ``.groupby()``, see :ref:`here ` +- Improved support for ``UInt64`` dtypes, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec and that gives the possibility for a more interactive repr in the Jupyter Notebook, see :ref:`here ` +- Experimental support for exporting styled DataFrames (``DataFrame.style``) to Excel, see :ref:`here ` +- Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` + +.. warning:: + + pandas has changed the internal structure and layout of the code base. + This can affect imports that are not from the top-level ``pandas.*`` namespace, please see the changes :ref:`here `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. note:: + + This is a combined release for 0.20.0 and 0.20.1. + Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) + +.. contents:: What's new in v0.20.0 + :local: + :backlinks: none + +.. _whatsnew_0200.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0200.enhancements.agg: + +Method ``agg`` API for DataFrame/Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Series & DataFrame have been enhanced to support the aggregation API. This is a familiar API +from groupby, window operations, and resampling. This allows aggregation operations in a concise way +by using :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform`. The full documentation +is :ref:`here ` (:issue:`1623`). + +Here is a sample + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + df.iloc[3:7] = np.nan + df + +One can operate using string function names, callables, lists, or dictionaries of these. + +Using a single function is equivalent to ``.apply``. + +.. ipython:: python + + df.agg('sum') + +Multiple aggregations with a list of functions. + +.. ipython:: python + + df.agg(['sum', 'min']) + +Using a dict provides the ability to apply specific aggregations per column. +You will get a matrix-like output of all of the aggregators. The output has one column +per unique function. Those functions applied to a particular column will be ``NaN``: + +.. ipython:: python + + df.agg({'A': ['sum', 'min'], 'B': ['min', 'max']}) + +The API also supports a ``.transform()`` function for broadcasting results. + +.. ipython:: python + :okwarning: + + df.transform(['abs', lambda x: x - x.min()]) + +When presented with mixed dtypes that cannot be aggregated, ``.agg()`` will only take the valid +aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + df.dtypes + +.. ipython:: python + :okwarning: + + df.agg(['min', 'sum']) + +.. _whatsnew_0200.enhancements.dataio_dtype: + +Keyword argument ``dtype`` for data IO +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``'python'`` engine for :func:`read_csv`, as well as the :func:`read_fwf` function for parsing +fixed-width text files and :func:`read_excel` for parsing Excel files, now accept the ``dtype`` keyword argument for specifying the types of specific columns (:issue:`14295`). See the :ref:`io docs ` for more information. + +.. ipython:: python + :suppress: + + from io import StringIO + +.. ipython:: python + + data = "a b\n1 2\n3 4" + pd.read_fwf(StringIO(data)).dtypes + pd.read_fwf(StringIO(data), dtype={'a': 'float64', 'b': 'object'}).dtypes + +.. _whatsnew_0120.enhancements.datetime_origin: + +Method ``.to_datetime()`` has gained an ``origin`` parameter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date +from where to compute the resulting timestamps when parsing numerical values with a specific ``unit`` specified. (:issue:`11276`, :issue:`11745`) + +For example, with 1960-01-01 as the starting date: + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``, which is +commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + + +.. _whatsnew_0200.enhancements.groupby_access: + +GroupBy enhancements +^^^^^^^^^^^^^^^^^^^^ + +Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names. Previously, only column names could be referenced. This allows to easily group by a column and index level at the same time. (:issue:`5677`) + +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + df + + df.groupby(['second', 'A']).sum() + + +.. _whatsnew_0200.enhancements.compressed_urls: + +Better support for compressed URLs in ``read_csv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The compression code was refactored (:issue:`12688`). As a result, reading +dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports +additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`). +Previously, only ``gzip`` compression was supported. By default, compression of +URLs and paths are now inferred using their file extensions. Additionally, +support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). + +.. ipython:: python + + url = ('https://github.com/{repo}/raw/{branch}/{path}' + .format(repo='pandas-dev/pandas', + branch='main', + path='pandas/tests/io/parser/data/salaries.csv.bz2')) + # default, infer compression + df = pd.read_csv(url, sep='\t', compression='infer') + # explicitly specify compression + df = pd.read_csv(url, sep='\t', compression='bz2') + df.head(2) + +.. _whatsnew_0200.enhancements.pickle_compression: + +Pickle file IO now supports compression +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` +can now read from and write to compressed pickle files. Compression methods +can be an explicit parameter or be inferred from the file extension. +See :ref:`the docs here. ` + +.. ipython:: python + + df = pd.DataFrame({'A': np.random.randn(1000), + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + +Using an explicit compression type + +.. ipython:: python + + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt.head() + +The default is to infer the compression type from the extension (``compression='infer'``): + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt.head() + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt.head() + +.. ipython:: python + :suppress: + + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + +.. _whatsnew_0200.enhancements.uint64_support: + +UInt64 support improved +^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has significantly improved support for operations involving unsigned, +or purely non-negative, integers. Previously, handling these integers would +result in improper rounding or data-type casting, leading to incorrect results. +Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937`) + +.. code-block:: ipython + + In [1]: idx = pd.UInt64Index([1, 2, 3]) + In [2]: df = pd.DataFrame({'A': ['a', 'b', 'c']}, index=idx) + In [3]: df.index + Out[3]: UInt64Index([1, 2, 3], dtype='uint64') + +- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) +- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`) +- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`) +- Bug in ``pd.read_csv()`` in which unsigned 64-bit integer elements were being improperly converted to the wrong data types (:issue:`14983`) +- Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) +- Bug in ``pd.value_counts()`` in which unsigned 64-bit integers were being erroneously truncated in the output (:issue:`14934`) + +.. _whatsnew_0200.enhancements.groupy_categorical: + +GroupBy on categoricals +^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueError`` when grouping on a categorical series with some categories not appearing in the data. (:issue:`13179`) + +.. ipython:: python + + chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] + df = pd.DataFrame({ + 'A': np.random.randint(100), + 'B': np.random.randint(100), + 'C': np.random.randint(100), + 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), + categories=chromosomes, + ordered=True)}) + df + +**Previous behavior**: + +.. code-block:: ipython + + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + --------------------------------------------------------------------------- + ValueError: items in new_categories are not the same as in old categories + +**New behavior**: + +.. ipython:: python + + df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + +.. _whatsnew_0200.enhancements.table_schema: + +Table schema output +^^^^^^^^^^^^^^^^^^^ + +The new orient ``'table'`` for :meth:`DataFrame.to_json` +will generate a `Table Schema`_ compatible string representation of +the data. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3)}, + index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table') + + +See :ref:`IO: Table Schema for more information `. + +Additionally, the repr for ``DataFrame`` and ``Series`` can now publish +this JSON Table schema representation of the Series or DataFrame if you are +using IPython (or another frontend like `nteract`_ using the Jupyter messaging +protocol). +This gives frontends like the Jupyter notebook and `nteract`_ +more flexibility in how they display pandas objects, since they have +more information about the data. +You must enable this by setting the ``display.html.table_schema`` option to ``True``. + +.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. _nteract: https://nteract.io/ + +.. _whatsnew_0200.enhancements.scipy_sparse: + +SciPy sparse matrix from/to SparseDataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now supports creating sparse dataframes directly from ``scipy.sparse.spmatrix`` instances. +See the :ref:`documentation ` for more information. (:issue:`4343`) + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. + +.. code-block:: python + + from scipy.sparse import csr_matrix + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + sp_arr = csr_matrix(arr) + sp_arr + sdf = pd.SparseDataFrame(sp_arr) + sdf + +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use: + +.. code-block:: python + + sdf.to_coo() + +.. _whatsnew_0200.enhancements.style_excel: + +Excel output for styled DataFrames +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Experimental support has been added to export ``DataFrame.style`` formats to Excel using the ``openpyxl`` engine. (:issue:`15530`) + +For example, after running the following, ``styled.xlsx`` renders as below: + +.. ipython:: python + :okwarning: + + np.random.seed(24) + df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) + df = pd.concat([df, pd.DataFrame(np.random.RandomState(24).randn(10, 4), + columns=list('BCDE'))], + axis=1) + df.iloc[0, 2] = np.nan + df + styled = (df.style + .applymap(lambda val: 'color:red;' if val < 0 else 'color:black;') + .highlight_max()) + styled.to_excel('styled.xlsx', engine='openpyxl') + +.. image:: ../_static/style-excel.png + +.. ipython:: python + :suppress: + + import os + os.remove('styled.xlsx') + +See the :ref:`Style documentation ` for more detail. + +.. _whatsnew_0200.enhancements.intervalindex: + +IntervalIndex +^^^^^^^^^^^^^ + +pandas has gained an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval +notation, specifically as a return type for the categories in :func:`cut` and :func:`qcut`. The ``IntervalIndex`` allows some unique indexing, see the +:ref:`docs `. (:issue:`7640`, :issue:`8625`) + +.. warning:: + + These indexing behaviors of the IntervalIndex are provisional and may change in a future version of pandas. Feedback on usage is welcome. + + +Previous behavior: + +The returned categories were strings, representing Intervals + +.. code-block:: ipython + + In [1]: c = pd.cut(range(4), bins=2) + + In [2]: c + Out[2]: + [(-0.003, 1.5], (-0.003, 1.5], (1.5, 3], (1.5, 3]] + Categories (2, object): [(-0.003, 1.5] < (1.5, 3]] + + In [3]: c.categories + Out[3]: Index(['(-0.003, 1.5]', '(1.5, 3]'], dtype='object') + +New behavior: + +.. ipython:: python + + c = pd.cut(range(4), bins=2) + c + c.categories + +Furthermore, this allows one to bin *other* data with these same bins, with ``NaN`` representing a missing +value similar to other dtypes. + +.. ipython:: python + + pd.cut([0, 3, 5, 1], bins=c.categories) + +An ``IntervalIndex`` can also be used in ``Series`` and ``DataFrame`` as the index. + +.. ipython:: python + + df = pd.DataFrame({'A': range(4), + 'B': pd.cut([0, 3, 1, 1], bins=c.categories) + }).set_index('B') + df + +Selecting via a specific interval: + +.. ipython:: python + + df.loc[pd.Interval(1.5, 3.0)] + +Selecting via a scalar value that is contained *in* the intervals. + +.. ipython:: python + + df.loc[0] + +.. _whatsnew_0200.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window-endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) +- ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) +- ``DataFrame`` and ``DataFrame.groupby()`` have gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`, :issue:`15197`). +- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). +- ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- Multiple offset aliases with decimal points are now supported (e.g. ``0.5min`` is parsed as ``30s``) (:issue:`8419`) +- ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) +- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an + unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack + of sorting or an incorrect key. See :ref:`here ` +- ``MultiIndex`` has gained a ``.to_frame()`` method to convert to a ``DataFrame`` (:issue:`12397`) +- ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`) +- ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`) +- ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) +- The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) +- The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) +- The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) +- ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) +- ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) +- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) +- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename + labels in the specified level of a MultiIndex (:issue:`4160`). +- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issue:`16164`) +- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) +- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) +- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements +- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) +- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). +- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). +- :func:`pandas.util.hash_pandas_object` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) +- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- ``pd.read_html()`` will parse multiple header rows, creating a MultiIndex header. (:issue:`13434`). +- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) +- :class:`pandas.io.formats.style.Styler` template now has blocks for easier extension, see the :ref:`example notebook ` (:issue:`15649`) +- :meth:`Styler.render() ` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) +- Compatibility with Jupyter notebook 5.0; MultiIndex column labels are left-aligned and MultiIndex row-labels are top-aligned (:issue:`15379`) +- ``TimedeltaIndex`` now has a custom date-tick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- ``pd.api.types.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``DataFrame.to_latex()`` and ``DataFrame.to_string()`` now allow optional header aliases. (:issue:`15536`) +- Re-enable the ``parse_dates`` keyword of ``pd.read_excel()`` to parse string columns as dates (:issue:`14326`) +- Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) +- Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`) +- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) +- :meth:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) +- ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) +- The ``display.show_dimensions`` option can now also be used to specify + whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). +- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword argument that sorts class labels and the colors assigned to them (:issue:`15908`) +- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) +- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`) + +.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + + +.. _whatsnew_0200.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew.api_breaking.io_compat: + +Possible incompatibility for HDF5 formats created with pandas < 0.13.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.TimeSeries`` was deprecated officially in 0.17.0, though has already been an alias since 0.13.0. It has +been dropped in favor of ``pd.Series``. (:issue:`15098`). + +This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` +was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. +You can use a recent prior version of pandas to read in your HDF5 files, +then write them out again after applying the procedure below. + +.. code-block:: ipython + + In [2]: s = pd.TimeSeries([1, 2, 3], index=pd.date_range('20130101', periods=3)) + + In [3]: s + Out[3]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [4]: type(s) + Out[4]: pandas.core.series.TimeSeries + + In [5]: s = pd.Series(s) + + In [6]: s + Out[6]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [7]: type(s) + Out[7]: pandas.core.series.Series + + +.. _whatsnew_0200.api_breaking.index_map: + +Map on Index types now return other Index types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) + +.. ipython:: python + + idx = pd.Index([1, 2]) + idx + mi = pd.MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi + +Previous behavior: + +.. code-block:: ipython + + In [5]: idx.map(lambda x: x * 2) + Out[5]: array([2, 4]) + + In [6]: idx.map(lambda x: (x, x * 2)) + Out[6]: array([(1, 2), (2, 4)], dtype=object) + + In [7]: mi.map(lambda x: x) + Out[7]: array([(1, 2), (2, 4)], dtype=object) + + In [8]: mi.map(lambda x: x[0]) + Out[8]: array([1, 2]) + +New behavior: + +.. ipython:: python + + idx.map(lambda x: x * 2) + idx.map(lambda x: (x, x * 2)) + + mi.map(lambda x: x) + + mi.map(lambda x: x[0]) + + +``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` + +.. ipython:: python + + s = pd.Series(pd.date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H') + .tz_localize('Asia/Tokyo')) + s + +Previous behavior: + +.. code-block:: ipython + + In [9]: s.map(lambda x: x.hour) + Out[9]: + 0 0 + 1 1 + 2 2 + dtype: int32 + +New behavior: + +.. ipython:: python + + s.map(lambda x: x.hour) + + +.. _whatsnew_0200.api_breaking.index_dt_field: + +Accessing datetime fields of Index now return Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The datetime-related attributes (see :ref:`here ` +for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously +returned numpy arrays. They will now return a new ``Index`` object, except +in the case of a boolean field, where the result will still be a boolean ndarray. (:issue:`15022`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + +New behavior: + +.. ipython:: python + + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour + +This has the advantage that specific ``Index`` methods are still available on the +result. On the other hand, this might have backward incompatibilities: e.g. +compared to numpy arrays, ``Index`` objects are not mutable. To get the original +ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. + +.. _whatsnew_0200.api_breaking.unique: + +pd.unique will now be consistent with extension types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In prior versions, using :meth:`Series.unique` and :func:`pandas.unique` on ``Categorical`` and tz-aware +data-types would yield different return types. These are now made consistent. (:issue:`15903`) + +- Datetime tz-aware + + Previous behaviour: + + .. code-block:: ipython + + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + New behavior: + + .. ipython:: python + + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp(r'20160101', tz=r'US/Eastern'), + pd.Timestamp(r'20160101', tz=r'US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + +- Categoricals + + Previous behaviour: + + .. code-block:: ipython + + In [1]: pd.Series(list('baabc'), dtype='category').unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] + + In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) + Out[2]: array(['b', 'a', 'c'], dtype=object) + + New behavior: + + .. ipython:: python + + # returns a Categorical + pd.Series(list('baabc'), dtype='category').unique() + pd.unique(pd.Series(list('baabc'), dtype='category')) + +.. _whatsnew_0200.api_breaking.s3: + +S3 file handling +^^^^^^^^^^^^^^^^ + +pandas now uses `s3fs `_ for handling S3 connections. This shouldn't break +any code. However, since ``s3fs`` is not a required dependency, you will need to install it separately, like ``boto`` +in prior versions of pandas. (:issue:`11915`). + +.. _whatsnew_0200.api_breaking.partial_string_indexing: + +Partial string indexing changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`DatetimeIndex Partial String Indexing ` now works as an exact match, provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match ` for details. + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2, 3]}, pd.DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) +Previous behavior: + +.. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + Out[4]: + a + 2011-12-31 23:59:59 1 + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: + 2011-12-31 23:59:59 1 + Name: a, dtype: int64 + + +New behavior: + +.. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + KeyError: '2011-12-31 23:59:59' + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: 1 + +.. _whatsnew_0200.api_breaking.concat_dtypes: + +Concat of different float dtypes will not automatically upcast +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``concat`` of multiple objects with different ``float`` dtypes would automatically upcast results to a dtype of ``float64``. +Now the smallest acceptable dtype will be used (:issue:`13247`) + +.. ipython:: python + + df1 = pd.DataFrame(np.array([1.0], dtype=np.float32, ndmin=2)) + df1.dtypes + + df2 = pd.DataFrame(np.array([np.nan], dtype=np.float32, ndmin=2)) + df2.dtypes + +Previous behavior: + +.. code-block:: ipython + + In [7]: pd.concat([df1, df2]).dtypes + Out[7]: + 0 float64 + dtype: object + +New behavior: + +.. ipython:: python + + pd.concat([df1, df2]).dtypes + +.. _whatsnew_0200.api_breaking.gbq: + +pandas Google BigQuery support has moved +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``conda install pandas-gbq -c conda-forge`` or +``pip install pandas-gbq`` to get it. The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the +currently released version of ``pandas-gbq=0.1.4``. Documentation is now hosted `here `__ (:issue:`15347`) + +.. _whatsnew_0200.api_breaking.memory_usage: + +Memory usage for Index is more accurate +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, showing ``.memory_usage()`` on a pandas structure that has an index, would only include actual index values and not include structures that facilitated fast indexing. This will generally be different for ``Index`` and ``MultiIndex`` and less-so for other index types. (:issue:`15237`) + +Previous behavior: + +.. code-block:: ipython + + In [8]: index = pd.Index(['foo', 'bar', 'baz']) + + In [9]: index.memory_usage(deep=True) + Out[9]: 180 + + In [10]: index.get_loc('foo') + Out[10]: 0 + + In [11]: index.memory_usage(deep=True) + Out[11]: 180 + +New behavior: + +.. code-block:: ipython + + In [8]: index = pd.Index(['foo', 'bar', 'baz']) + + In [9]: index.memory_usage(deep=True) + Out[9]: 180 + + In [10]: index.get_loc('foo') + Out[10]: 0 + + In [11]: index.memory_usage(deep=True) + Out[11]: 260 + +.. _whatsnew_0200.api_breaking.sort_index: + +DataFrame.sort_index changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. +This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`, :issue:`15797`) + +This is *unchanged* from prior versions, but shown for illustration purposes: + +.. ipython:: python + + df = pd.DataFrame(np.arange(6), columns=['value'], + index=pd.MultiIndex.from_product([list('BA'), range(3)])) + df + +.. code-block:: python + + In [87]: df.index.is_lexsorted() + Out[87]: False + + In [88]: df.index.is_monotonic + Out[88]: False + +Sorting works as expected + +.. ipython:: python + + df.sort_index() + +.. code-block:: python + + In [90]: df.sort_index().index.is_lexsorted() + Out[90]: True + + In [91]: df.sort_index().index.is_monotonic + Out[91]: True + +However, this example, which has a non-monotonic 2nd level, +doesn't behave as desired. + +.. ipython:: python + + df = pd.DataFrame({'value': [1, 2, 3, 4]}, + index=pd.MultiIndex([['a', 'b'], ['bb', 'aa']], + [[0, 0, 1, 1], [0, 1, 0, 1]])) + df + +Previous behavior: + +.. code-block:: python + + In [11]: df.sort_index() + Out[11]: + value + a bb 1 + aa 2 + b bb 3 + aa 4 + + In [14]: df.sort_index().index.is_lexsorted() + Out[14]: True + + In [15]: df.sort_index().index.is_monotonic + Out[15]: False + +New behavior: + +.. code-block:: python + + In [94]: df.sort_index() + Out[94]: + value + a aa 2 + bb 1 + b aa 4 + bb 3 + + [4 rows x 1 columns] + + In [95]: df.sort_index().index.is_lexsorted() + Out[95]: True + + In [96]: df.sort_index().index.is_monotonic + Out[96]: True + + +.. _whatsnew_0200.api_breaking.groupby_describe: + +GroupBy describe formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The output formatting of ``groupby.describe()`` now labels the ``describe()`` metrics in the columns instead of the index. +This format is consistent with ``groupby.agg()`` when applying multiple functions at once. (:issue:`4792`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + In [2]: df.groupby('A').describe() + Out[2]: + B + A + 1 count 2.000000 + mean 1.500000 + std 0.707107 + min 1.000000 + 25% 1.250000 + 50% 1.500000 + 75% 1.750000 + max 2.000000 + 2 count 2.000000 + mean 3.500000 + std 0.707107 + min 3.000000 + 25% 3.250000 + 50% 3.500000 + 75% 3.750000 + max 4.000000 + + In [3]: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + Out[3]: + B + mean std amin amax + A + 1 1.5 0.707107 1 2 + 2 3.5 0.707107 3 4 + +New behavior: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + df.groupby('A').describe() + + df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + +.. _whatsnew_0200.api_breaking.rolling_pairwise: + +Window binary corr/cov operations return a MultiIndex DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A binary window operation, like ``.corr()`` or ``.cov()``, when operating on a ``.rolling(..)``, ``.expanding(..)``, or ``.ewm(..)`` object, +will now return a 2-level ``MultiIndexed DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, +see :ref:`here `. These are equivalent in function, +but a MultiIndexed ``DataFrame`` enjoys more support in pandas. +See the section on :ref:`Windowed Binary Operations ` for more information. (:issue:`15677`) + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.rand(100, 2), + columns=pd.Index(['A', 'B'], name='bar'), + index=pd.date_range('20160101', + periods=100, freq='D', name='foo')) + df.tail() + +Previous behavior: + +.. code-block:: ipython + + In [2]: df.rolling(12).corr() + Out[2]: + + Dimensions: 100 (items) x 2 (major_axis) x 2 (minor_axis) + Items axis: 2016-01-01 00:00:00 to 2016-04-09 00:00:00 + Major_axis axis: A to B + Minor_axis axis: A to B + +New behavior: + +.. ipython:: python + + res = df.rolling(12).corr() + res.tail() + +Retrieving a correlation matrix for a cross-section + +.. ipython:: python + + df.rolling(12).corr().loc['2016-04-07'] + +.. _whatsnew_0200.api_breaking.hdfstore_where: + +HDFStore where string comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions most types could be compared to string column in a ``HDFStore`` +usually resulting in an invalid comparison, returning an empty result frame. These comparisons will now raise a +``TypeError`` (:issue:`15492`) + +.. ipython:: python + + df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + df.to_hdf('store.h5', 'key', format='table', data_columns=True) + df.dtypes + +Previous behavior: + +.. code-block:: ipython + + In [4]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + File "", line 1 + (unparsed_date > 1970-01-01 00:00:01.388552400) + ^ + SyntaxError: invalid token + +New behavior: + +.. code-block:: ipython + + In [18]: ts = pd.Timestamp('2014-01-01') + + In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + TypeError: Cannot compare 2014-01-01 00:00:00 of + type to string column + +.. ipython:: python + :suppress: + + import os + os.remove('store.h5') + +.. _whatsnew_0200.api_breaking.index_order: + +Index.intersection and inner join now preserve the order of the left Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Index.intersection` now preserves the order of the calling ``Index`` (left) +instead of the other ``Index`` (right) (:issue:`15582`). This affects inner +joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. + +- ``Index.intersection`` + + .. ipython:: python + + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right + + Previous behavior: + + .. code-block:: ipython + + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') + + New behavior: + + .. ipython:: python + + left.intersection(right) + +- ``DataFrame.join`` and ``pd.merge`` + + .. ipython:: python + + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right + + Previous behavior: + + .. code-block:: ipython + + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 + + New behavior: + + .. ipython:: python + + left.join(right, how='inner') + +.. _whatsnew_0200.api_breaking.pivot_table: + +Pivot table always returns a DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The documentation for :meth:`pivot_table` states that a ``DataFrame`` is *always* returned. Here a bug +is fixed that allowed this to return a ``Series`` under certain circumstance. (:issue:`4386`) + +.. ipython:: python + + df = pd.DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + Out[2]: + col3 col2 + 1 C 3 + 3 D 4 + 9 E 5 + Name: col1, dtype: int64 + +New behavior: + +.. ipython:: python + + df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + +.. _whatsnew_0200.api: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). +- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`) +- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) +- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) +- ``Series.map()`` now respects default values of dictionary subclasses with a ``__missing__`` method, such as ``collections.Counter`` (:issue:`15999`) +- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) +- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) +- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) +- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than one byte (:issue:`11592`) +- ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) +- ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` +- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) +- ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) +- ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) +- ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) +- The default behaviour of ``Series.str.match`` has changed from extracting + groups to matching the pattern. The extracting behaviour was deprecated + since pandas version 0.13.0 and can be done with the ``Series.str.extract`` + method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is + ignored (no longer needed to specify the new behaviour) and is deprecated. +- ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) +- ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) +- ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` + methods (:issue:`15830`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``PandasError``, if called with scalar inputs and not axes (:issue:`15541`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) +- The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) + + +.. _whatsnew_0200.privacy: + +Reorganization of the library: privacy changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0200.privacy.extensions: + +Modules privacy has changed +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some formerly public python/c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. +Furthermore, the ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are now considered to be PRIVATE. +If indicated, a deprecation warning will be issued if you reference these modules. (:issue:`12588`) + +.. csv-table:: + :header: "Previous Location", "New Location", "Deprecated" + :widths: 30, 30, 4 + + "pandas.lib", "pandas._libs.lib", "X" + "pandas.tslib", "pandas._libs.tslib", "X" + "pandas.computation", "pandas.core.computation", "X" + "pandas.msgpack", "pandas.io.msgpack", "" + "pandas.index", "pandas._libs.index", "" + "pandas.algos", "pandas._libs.algos", "" + "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.indexes", "pandas.core.indexes", "" + "pandas.json", "pandas._libs.json / pandas.io.json", "X" + "pandas.parser", "pandas._libs.parsers", "X" + "pandas.formats", "pandas.io.formats", "" + "pandas.sparse", "pandas.core.sparse", "" + "pandas.tools", "pandas.core.reshape", "X" + "pandas.types", "pandas.core.dtypes", "X" + "pandas.io.sas.saslib", "pandas.io.sas._sas", "" + "pandas._join", "pandas._libs.join", "" + "pandas._hash", "pandas._libs.hashing", "" + "pandas._period", "pandas._libs.period", "" + "pandas._sparse", "pandas._libs.sparse", "" + "pandas._testing", "pandas._libs.testing", "" + "pandas._window", "pandas._libs.window", "" + + +Some new subpackages are created with public functionality that is not directly +exposed in the top-level namespace: ``pandas.errors``, ``pandas.plotting`` and +``pandas.testing`` (more details below). Together with ``pandas.api.types`` and +certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, +these are now the public subpackages. + +Further changes: + +- The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) +- The type import ``pandas.tslib.NaTType`` is deprecated and can be replaced by using ``type(pandas.NaT)`` (:issue:`16146`) +- The public functions in ``pandas.tools.hashing`` deprecated from that locations, but are now importable from ``pandas.util`` (:issue:`16223`) +- The modules in ``pandas.util``: ``decorators``, ``print_versions``, ``doctools``, ``validators``, ``depr_module`` are now private. Only the functions exposed in ``pandas.util`` itself are public (:issue:`16223`) + +.. _whatsnew_0200.privacy.errors: + +``pandas.errors`` +^^^^^^^^^^^^^^^^^ + +We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings +will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) + +The following are now part of this API: + +.. code-block:: python + + ['DtypeWarning', + 'EmptyDataError', + 'OutOfBoundsDatetime', + 'ParserError', + 'ParserWarning', + 'PerformanceWarning', + 'UnsortedIndexError', + 'UnsupportedFunctionCall'] + + +.. _whatsnew_0200.privacy.testing: + +``pandas.testing`` +^^^^^^^^^^^^^^^^^^ + +We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. + +The following testing functions are now part of this API: + +- :func:`testing.assert_frame_equal` +- :func:`testing.assert_series_equal` +- :func:`testing.assert_index_equal` + + +.. _whatsnew_0200.privacy.plotting: + +``pandas.plotting`` +^^^^^^^^^^^^^^^^^^^ + +A new public ``pandas.plotting`` module has been added that holds plotting functionality that was previously in either ``pandas.tools.plotting`` or in the top-level namespace. See the :ref:`deprecations sections ` for more details. + +.. _whatsnew_0200.privacy.development: + +Other development changes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Switched the test framework to use `pytest `__ (:issue:`13097`) +- Reorganization of tests directory layout (:issue:`14854`, :issue:`15707`). + + +.. _whatsnew_0200.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. _whatsnew_0200.api_breaking.deprecate_ix: + +Deprecate ``.ix`` +^^^^^^^^^^^^^^^^^ + +The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. More specifically, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation is :ref:`here `. (:issue:`14218`) + +The recommended methods of indexing are: + +- ``.loc`` if you want to *label* index +- ``.iloc`` if you want to *positionally* index. + +Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some examples of how to convert code `here `__. + + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) + + df + +Previous behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. + +.. code-block:: ipython + + In [3]: df.ix[[0, 2], 'A'] + Out[3]: + a 1 + c 3 + Name: A, dtype: int64 + +Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. + +.. ipython:: python + + df.loc[df.index[[0, 2]], 'A'] + +Using ``.iloc``. Here we will get the location of the 'A' column, then use *positional* indexing to select things. + +.. ipython:: python + + df.iloc[[0, 2], df.columns.get_loc('A')] + + +.. _whatsnew_0200.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`). + +.. code-block:: ipython + + In [133]: import pandas._testing as tm + + In [134]: p = tm.makePanel() + + In [135]: p + Out[135]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [136]: p.to_frame() + Out[136]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.628776 -1.409432 0.209395 + B 0.988138 -1.347533 -0.896581 + C -0.938153 1.272395 -0.161137 + D -0.223019 -0.591863 -1.051539 + 2000-01-04 A 0.186494 1.422986 -0.592886 + B -0.072608 0.363565 1.104352 + C -1.239072 -1.449567 0.889157 + D 2.123692 -0.414505 -0.319561 + 2000-01-05 A 0.952478 -2.147855 -1.473116 + B -0.550603 -0.014752 -0.431550 + C 0.139683 -1.195524 0.288377 + D 0.122273 -1.425795 -0.619993 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [137]: p.to_xarray() + Out[137]: + + array([[[ 0.628776, 0.988138, -0.938153, -0.223019], + [ 0.186494, -0.072608, -1.239072, 2.123692], + [ 0.952478, -0.550603, 0.139683, 0.122273]], + + [[-1.409432, -1.347533, 1.272395, -0.591863], + [ 1.422986, 0.363565, -1.449567, -0.414505], + [-2.147855, -0.014752, -1.195524, -1.425795]], + + [[ 0.209395, -0.896581, -0.161137, -1.051539], + [-0.592886, 1.104352, 0.889157, -0.319561], + [-1.473116, -0.43155 , 0.288377, -0.619993]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + +.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: + +Deprecate groupby.agg() with a dictionary when renaming +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.groupby(..).agg(..)``, ``.rolling(..).agg(..)``, and ``.resample(..).agg(..)`` syntax can accept a variable of inputs, including scalars, +list, and a dict of column names to scalars or lists. This provides a useful syntax for constructing multiple +(potentially different) aggregations. + +However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent +between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionality. + +- We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed + one to ``rename`` the resulting aggregation, but this had a completely different + meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. +- We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. + +This is an illustrative example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + df + +Here is a typical useful syntax for computing different aggregations for different columns. This +is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified +columns and applying the list of functions. This returns a ``MultiIndex`` for the columns (this is *not* deprecated). + +.. ipython:: python + + df.groupby('A').agg({'B': 'sum', 'C': 'min'}) + +Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This +is a combination aggregation & renaming: + +.. code-block:: ipython + + In [6]: df.groupby('A').B.agg({'foo': 'count'}) + FutureWarning: using a dict on a Series for aggregation + is deprecated and will be removed in a future version + + Out[6]: + foo + A + 1 3 + 2 2 + +You can accomplish the same operation, more idiomatically by: + +.. ipython:: python + + df.groupby('A').B.agg(['count']).rename(columns={'count': 'foo'}) + + +Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: + +.. code-block:: python + + In [23]: (df.groupby('A') + ...: .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) + ...: ) + FutureWarning: using a dict with renaming is deprecated and + will be removed in a future version + + Out[23]: + B C + foo bar + A + 1 3 0 + 2 7 3 + + +You can accomplish nearly the same by: + +.. ipython:: python + + (df.groupby('A') + .agg({'B': 'sum', 'C': 'min'}) + .rename(columns={'B': 'foo', 'C': 'bar'}) + ) + + + +.. _whatsnew_0200.privacy.deprecate_plotting: + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available +from ``pandas.plotting`` (:issue:`12548`). + +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. +Users can import these from ``pandas.plotting`` as well. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + + +.. _whatsnew_0200.deprecations.other: + +Other deprecations +^^^^^^^^^^^^^^^^^^ + +- ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) +- ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) +- ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`) +- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`) +- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) +- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) +- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) +- ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) +- ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) +- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explicit imports (:issue:`15358`) +- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). +- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`, :issue:`15940`) + + * ``pd.pnow()``, replaced by ``Period.now()`` + * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore + * ``pd.Expr``, is removed, as it is not applicable to user code. + * ``pd.match()``, is removed. + * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` + * ``pd.get_store()``, replaced by a direct call to ``pd.HDFStore(...)`` +- ``is_any_int_dtype``, ``is_floating_dtype``, and ``is_sequence`` are deprecated from ``pandas.api.types`` (:issue:`16042`) + +.. _whatsnew_0200.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``pandas.rpy`` module is removed. Similar functionality can be accessed + through the `rpy2 `__ project. + See the `R interfacing docs `__ for more details. +- The ``pandas.io.ga`` module with a ``google-analytics`` interface is removed (:issue:`11308`). + Similar functionality can be found in the `Google2Pandas `__ package. +- ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) +- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionality can be found in the `statsmodels `__ package. (:issue:`11898`) +- The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` + and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) +- The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed + in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). +- The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) +- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- ``Categorical`` has dropped support for ``NaN`` categories (:issue:`10748`) +- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) +- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) +- ``DataFrame`` has dropped the ``combineAdd`` and ``combineMult`` methods in favor of ``add`` and ``mul`` respectively (:issue:`10735`) + +.. _whatsnew_0200.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of ``pd.wide_to_long()`` (:issue:`14779`) +- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`, :issue:`16057`) +- Improved performance of timeseries plotting with an irregular DatetimeIndex + (or with ``compat_x=True``) (:issue:`15073`). +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) +- Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) +- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) +- Improved performance of ``.rank()`` for categorical data (:issue:`15498`) +- Improved performance when using ``.unstack()`` (:issue:`15503`) +- Improved performance of merge/join on ``category`` columns (:issue:`10409`) +- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) +- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied + function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). +- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). +- Improved performance of ``Series.sort_index()`` with a monotonic index (:issue:`15694`) +- Improved performance in ``pd.read_csv()`` on some platforms with buffered reads (:issue:`16039`) + +.. _whatsnew_0200.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) +- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`, :issue:`6538`) +- Bug in ``Timestamp`` incorrectly localizing timezones during construction (:issue:`11481`, :issue:`15777`) +- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) +- Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) +- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) +- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) +- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) +- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) +- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) +- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``Series.dt.round()`` inconsistent behaviour on ``NaT`` 's with different arguments (:issue:`14940`) +- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) +- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) +- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) +- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) +- Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) +- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) +- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) +- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`) +- Bug in indexing with partial string indexing with a len-1 DataFrame (:issue:`16071`) +- Bug in ``Series`` construction where passing invalid dtype didn't raise an error. (:issue:`15520`) + +Indexing +^^^^^^^^ + +- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) +- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) +- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) +- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) +- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) +- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) +- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) +- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in ``.reset_index()`` when raising error for index name already present in ``MultiIndex`` columns (:issue:`16120`) +- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in the HTML display with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) +- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) +- Bug in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) +- Bug in indexing with a scalar and a ``CategoricalIndex`` (:issue:`16123`) + +IO +^^ + +- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) +- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) +- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) +- Bug in ``pd.read_csv()`` with ``parse_dates`` when multi-line headers are specified (:issue:`15376`) +- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) +- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) +- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) +- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) +- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) +- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) +- Bug in ``pd.util.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) +- Bug in ``pd.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) +- Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) +- Bug in ``DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) +- Bug in ``.to_sql()`` when writing a DataFrame with numeric index names (:issue:`15404`). +- Bug in ``DataFrame.to_html()`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) +- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in the ``Series`` repr not showing the length when the output was truncated (:issue:`15962`). + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) +- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in the date and time converters pandas registers with matplotlib not handling multiple dimensions (:issue:`16026`) +- Bug in ``pd.scatter_matrix()`` could accept either ``color`` or ``c``, but not both (:issue:`14855`) + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) +- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`, :issue:`13966`) +- Bug in groupby operations with ``timedelta64`` when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) +- Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) +- Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) +- Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) +- Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) + +Sparse +^^^^^^ + +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +- Bug in sparse array indexing in which indices were not being validated (:issue:`15863`) + +Reshaping +^^^^^^^^^ + +- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) +- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) +- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) +- Bug in ``pd.concat()`` in which concatenating with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) +- Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) +- Bug in :func:`pandas.pivot_table` incorrectly raising ``UnicodeError`` when passing unicode input for ``margins`` keyword (:issue:`13292`) + +Numeric +^^^^^^^ + +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) +- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +- Bug in ``.eval()`` which caused multi-line evals to fail with local variables not on the first line (:issue:`15342`) + +Other +^^^^^ + +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) + + +.. _whatsnew_0.20.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.2..v0.20.0 diff --git a/doc/source/whatsnew/v0.20.2.rst b/doc/source/whatsnew/v0.20.2.rst new file mode 100644 index 00000000..430a39d2 --- /dev/null +++ b/doc/source/whatsnew/v0.20.2.rst @@ -0,0 +1,143 @@ +.. _whatsnew_0202: + +Version 0.20.2 (June 4, 2017) +----------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.20.2 + :local: + :backlinks: none + + +.. _whatsnew_0202.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) +- ``Series`` provides a ``to_latex`` method (:issue:`16180`) + +- A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`, + parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`, + has been added to return the group order (:issue:`11642`); see + :ref:`here `. + +.. _whatsnew_0202.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance regression fix when indexing with a list-like (:issue:`16285`) +- Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`) +- Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) +- Improved performance of groupby with categorical groupers (:issue:`16413`) +- Improved performance of ``MultiIndex.remove_unused_levels()`` (:issue:`16556`) + +.. _whatsnew_0202.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when + detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) +- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) +- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a ``TypeError`` (:issue:`13490`) +- Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) +- Passing an invalid engine to :func:`read_csv` now raises an informative + ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) +- Bug in :func:`unique` on an array of tuples (:issue:`16519`) +- Bug in :func:`cut` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) +- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on ``Categoricals`` (:issue:`16409`) + +Conversion +^^^^^^^^^^ + +- Bug in :func:`to_numeric` in which empty data inputs were causing a segfault of the interpreter (:issue:`16302`) +- Silence numpy warnings when broadcasting ``DataFrame`` to ``Series`` with comparison ops (:issue:`16378`, :issue:`16306`) + + +Indexing +^^^^^^^^ + +- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) +- Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`) +- Bug in ``MultiIndex.remove_unused_levels()`` that would not return a ``MultiIndex`` equal to the original. (:issue:`16556`) + +IO +^^ + +- Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`) +- Bug in :func:`read_csv` not raising an exception with nonexistent columns in ``usecols`` when it had the correct length (:issue:`14671`) +- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) +- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`) +- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) +- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) +- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`) + +- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`) +- Bug in ``plot`` where ``NaT`` in ``DatetimeIndex`` results in ``Timestamp.min`` (:issue:`12405`) +- Bug in ``DataFrame.boxplot`` where ``figsize`` keyword was not respected for non-grouped boxplots (:issue:`11959`) + + + + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in creating a time-based rolling window on an empty ``DataFrame`` (:issue:`15819`) +- Bug in ``rolling.cov()`` with offset window (:issue:`16058`) +- Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`) + + +Sparse +^^^^^^ + +- Bug in construction of ``SparseDataFrame`` from ``scipy.sparse.dok_matrix`` (:issue:`16179`) + +Reshaping +^^^^^^^^^ + +- Bug in ``DataFrame.stack`` with unsorted levels in ``MultiIndex`` columns (:issue:`16323`) +- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) +- Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) +- Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) +- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`) + + +Numeric +^^^^^^^ +- Bug in ``.interpolate()``, where ``limit_direction`` was not respected when ``limit=None`` (default) was passed (:issue:`16282`) + +Categorical +^^^^^^^^^^^ + +- Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`) + +Other +^^^^^ + +- Bug in ``DataFrame.drop()`` with an empty-list with non-unique indices (:issue:`16270`) + + +.. _whatsnew_0.20.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.0..v0.20.2 diff --git a/doc/source/whatsnew/v0.20.3.rst b/doc/source/whatsnew/v0.20.3.rst new file mode 100644 index 00000000..ff28f683 --- /dev/null +++ b/doc/source/whatsnew/v0.20.3.rst @@ -0,0 +1,76 @@ +.. _whatsnew_0203: + +Version 0.20.3 (July 7, 2017) +----------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.20.3 + :local: + :backlinks: none + +.. _whatsnew_0203.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) + +Conversion +^^^^^^^^^^ + +- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) +- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`). +- Bug in :meth:`DataFrame.astype` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`). + +Indexing +^^^^^^^^ + +- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) +- Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`) +- Fixed a bug in a slicing DataFrame/Series that have a ``TimedeltaIndex`` (:issue:`16637`) + +IO +^^ + +- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) +- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) +- Bug in :meth:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`) +- Fixed an issue with :meth:`DataFrame.style` where generated element ids were not unique (:issue:`16780`) +- Fixed loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) + +Plotting +^^^^^^^^ + +- Fixed regression that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`) +- Fixed an issue with :meth:`DataFrame.plot.scatter` that incorrectly raised a ``KeyError`` when categorical data is used for plotting (:issue:`16199`) + +Reshaping +^^^^^^^^^ + +- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) +- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). +- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) + +Categorical +^^^^^^^^^^^ + +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) + + +.. _whatsnew_0.20.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.2..v0.20.3 diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst new file mode 100644 index 00000000..1bbbbdc7 --- /dev/null +++ b/doc/source/whatsnew/v0.21.0.rst @@ -0,0 +1,1198 @@ +.. _whatsnew_0210: + +Version 0.21.0 (October 27, 2017) +--------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.20.3 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. +- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying + categoricals independent of the data, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. +- Compatibility fixes for pypy, see :ref:`here `. +- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. +- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). +- Indexing with a list of labels, where one or more of the labels is missing, is deprecated and will raise a KeyError in a future version, see :ref:`here `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.21.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0210.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0210.enhancements.parquet: + +Integration with Apache Parquet file format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here ` (:issue:`15838`, :issue:`17438`). + +`Apache Parquet `__ provides a cross-language, binary file format for reading and writing data frames efficiently. +Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, supporting all of the pandas +dtypes, including extension dtypes such as datetime with timezones. + +This functionality depends on either the `pyarrow `__ or `fastparquet `__ library. +For more details, see :ref:`the IO docs on Parquet `. + + +.. _whatsnew_0210.enhancements.infer_objects: + +Method ``infer_objects`` type conversion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` +methods have been added to perform dtype inference on object columns, replacing +some of the functionality of the deprecated ``convert_objects`` +method. See the documentation :ref:`here ` +for more details. (:issue:`11221`) + +This method only performs soft conversions on object columns, converting Python objects +to native types, but not any coercive conversions. For example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': np.array([1, 2, 3], dtype='object'), + 'C': ['1', '2', '3']}) + df.dtypes + df.infer_objects().dtypes + +Note that column ``'C'`` was not converted - only scalar numeric types +will be converted to a new type. Other types of conversion should be accomplished +using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). + +.. ipython:: python + + df = df.infer_objects() + df['C'] = pd.to_numeric(df['C'], errors='coerce') + df.dtypes + +.. _whatsnew_0210.enhancements.attribute_access: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often puzzled by the relationship between column operations and +attribute access on ``DataFrame`` instances (:issue:`7175`). One specific +instance of this confusion is attempting to create a new column by setting an +attribute on the ``DataFrame``: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] + +This does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In [3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Setting a list-like data structure into a new attribute now raises a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. + +.. _whatsnew_0210.enhancements.drop_api: + +Method ``drop`` now also accepts index/columns keywords +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`~DataFrame.drop` method has gained ``index``/``columns`` keywords as an +alternative to specifying the ``axis``. This is similar to the behavior of ``reindex`` +(:issue:`12392`). + +For example: + +.. ipython:: python + + df = pd.DataFrame(np.arange(8).reshape(2, 4), + columns=['A', 'B', 'C', 'D']) + df + df.drop(['B', 'C'], axis=1) + # the following is now equivalent + df.drop(columns=['B', 'C']) + +.. _whatsnew_0210.enhancements.rename_reindex_axis: + +Methods ``rename``, ``reindex`` now also accept axis keyword +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.rename` and :meth:`DataFrame.reindex` methods have gained +the ``axis`` keyword to specify the axis to target with the operation +(:issue:`12392`). + +Here's ``rename``: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.rename(str.lower, axis='columns') + df.rename(id, axis='index') + +And ``reindex``: + +.. ipython:: python + + df.reindex(['A', 'B', 'C'], axis='columns') + df.reindex([0, 1, 3], axis='index') + +The "index, columns" style continues to work as before. + +.. ipython:: python + + df.rename(index=id, columns=str.lower) + df.reindex(index=[0, 1, 3], columns=['A', 'B', 'C']) + +We *highly* encourage using named arguments to avoid confusion when using either +style. + +.. _whatsnew_0210.enhancements.categorical_dtype: + +``CategoricalDtype`` for specifying categoricals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`pandas.api.types.CategoricalDtype` has been added to the public API and +expanded to include the ``categories`` and ``ordered`` attributes. A +``CategoricalDtype`` can be used to specify the set of categories and +orderedness of an array, independent of the data. This can be useful for example, +when converting string data to a ``Categorical`` (:issue:`14711`, +:issue:`15078`, :issue:`16015`, :issue:`17643`): + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + s = pd.Series(['a', 'b', 'c', 'a']) # strings + dtype = CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=True) + s.astype(dtype) + +One place that deserves special mention is in :meth:`read_csv`. Previously, with +``dtype={'col': 'category'}``, the returned values and categories would always +be strings. + +.. ipython:: python + :suppress: + + from io import StringIO + +.. ipython:: python + + data = 'A,B\na,1\nb,2\nc,3' + pd.read_csv(StringIO(data), dtype={'B': 'category'}).B.cat.categories + +Notice the "object" dtype. + +With a ``CategoricalDtype`` of all numerics, datetimes, or +timedeltas, we can automatically convert to the correct type + +.. ipython:: python + + dtype = {'B': CategoricalDtype([1, 2, 3])} + pd.read_csv(StringIO(data), dtype=dtype).B.cat.categories + +The values have been correctly interpreted as integers. + +The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a +``Series`` with categorical type will now return an instance of +``CategoricalDtype``. While the repr has changed, ``str(CategoricalDtype())`` is +still the string ``'category'``. We'll take this moment to remind users that the +*preferred* way to detect categorical data is to use +:func:`pandas.api.types.is_categorical_dtype`, and not ``str(dtype) == 'category'``. + +See the :ref:`CategoricalDtype docs ` for more. + +.. _whatsnew_0210.enhancements.GroupBy_pipe: + +``GroupBy`` objects now have a ``pipe`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``GroupBy`` objects now have a ``pipe`` method, similar to the one on +``DataFrame`` and ``Series``, that allow for functions that take a +``GroupBy`` to be composed in a clean, readable syntax. (:issue:`17871`) + +For a concrete example on combining ``.groupby`` and ``.pipe`` , imagine having a +DataFrame with columns for stores, products, revenue and sold quantity. We'd like to +do a groupwise calculation of *prices* (i.e. revenue/quantity) per store and per product. +We could do this in a multi-step operation, but expressing it in terms of piping can make the +code more readable. + +First we set the data: + +.. ipython:: python + + import numpy as np + n = 1000 + df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), + 'Product': np.random.choice(['Product_1', + 'Product_2', + 'Product_3' + ], n), + 'Revenue': (np.random.random(n) * 50 + 10).round(2), + 'Quantity': np.random.randint(1, 10, size=n)}) + df.head(2) + +Now, to find prices per store/product, we can simply do: + +.. ipython:: python + + (df.groupby(['Store', 'Product']) + .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum()) + .unstack().round(2)) + +See the :ref:`documentation ` for more. + + +.. _whatsnew_0210.enhancements.rename_categories: + +``Categorical.rename_categories`` accepts a dict-like +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~Series.cat.rename_categories` now accepts a dict-like argument for +``new_categories``. The previous categories are looked up in the dictionary's +keys and replaced if found. The behavior of missing and extra keys is the same +as in :meth:`DataFrame.rename`. + +.. ipython:: python + + c = pd.Categorical(['a', 'a', 'b']) + c.rename_categories({"a": "eh", "b": "bee"}) + +.. warning:: + + To assist with upgrading pandas, ``rename_categories`` treats ``Series`` as + list-like. Typically, Series are considered to be dict-like (e.g. in + ``.rename``, ``.map``). In a future version of pandas ``rename_categories`` + will change to treat them as dict-like. Follow the warning message's + recommendations for writing future-proof code. + + .. code-block:: ipython + + In [33]: c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) + FutureWarning: Treating Series 'new_categories' as a list-like and using the values. + In a future version, 'rename_categories' will treat Series like a dictionary. + For dict-like, use 'new_categories.to_dict()' + For list-like, use 'new_categories.values'. + Out[33]: + [0, 0, 1] + Categories (2, int64): [0, 1] + + +.. _whatsnew_0210.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +New functions or methods +"""""""""""""""""""""""" + +- :meth:`~pandas.core.resample.Resampler.nearest` is added to support nearest-neighbor upsampling (:issue:`17496`). +- :class:`~pandas.Index` has added support for a ``to_frame`` method (:issue:`15230`). + +New keywords +"""""""""""" + +- Added a ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to + support type inference in the presence of missing values (:issue:`17059`). +- :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) +- :func:`Series.set_axis` and :func:`DataFrame.set_axis` now support the ``inplace`` parameter. (:issue:`14636`) +- :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) +- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) +- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) +- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) +- :func:`read_json` and :func:`~DataFrame.to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) + +Various enhancements +"""""""""""""""""""" + +- Improved the import time of pandas by about 2.25x. (:issue:`16764`) +- Support for `PEP 519 -- Adding a file system path protocol + `_ on most readers (e.g. + :func:`read_csv`) and writers (e.g. :meth:`DataFrame.to_csv`) (:issue:`13823`). +- Added a ``__fspath__`` method to ``pd.HDFStore``, ``pd.ExcelFile``, + and ``pd.ExcelWriter`` to work properly with the file system path protocol (:issue:`13823`). +- The ``validate`` argument for :func:`merge` now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- Added support for `PEP 518 `_ (``pyproject.toml``) to the build system (:issue:`16745`) +- :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) +- :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) +- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) +- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year. (:issue:`9313`) +- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year. (:issue:`9313`) +- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) +- Read/write methods that infer compression (:func:`read_csv`, :func:`read_table`, :func:`read_pickle`, and :meth:`~DataFrame.to_pickle`) can now infer from path-like objects, such as ``pathlib.Path``. (:issue:`17206`) +- :func:`read_sas` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files. (:issue:`15871`) +- :func:`DataFrame.items` and :func:`Series.items` are now present in both Python 2 and 3 and is lazy in all cases. (:issue:`13918`, :issue:`17213`) +- :meth:`pandas.io.formats.style.Styler.where` has been implemented as a convenience for :meth:`pandas.io.formats.style.Styler.applymap`. (:issue:`17474`) +- :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) +- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) +- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) +- :func:`Series.reindex`, :func:`DataFrame.reindex`, :func:`Index.get_indexer` now support list-like argument for ``tolerance``. (:issue:`17367`) + +.. _whatsnew_0210.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0210.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`). +If installed, we now require: + + +--------------+-----------------+----------+ + | Package | Minimum Version | Required | + +==============+=================+==========+ + | Numpy | 1.9.0 | X | + +--------------+-----------------+----------+ + | Matplotlib | 1.4.3 | | + +--------------+-----------------+----------+ + | Scipy | 0.14.0 | | + +--------------+-----------------+----------+ + | Bottleneck | 1.0.0 | | + +--------------+-----------------+----------+ + +Additionally, support has been dropped for Python 3.4 (:issue:`15251`). + + +.. _whatsnew_0210.api_breaking.bottleneck: + +Sum/prod of all-NaN or empty Series/DataFrames is now consistently NaN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + The changes described here have been partially reverted. See + the :ref:`v0.22.0 Whatsnew ` for more. + + +The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on +whether `bottleneck `__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`). + +Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs `. + +.. ipython:: python + + s = pd.Series([np.nan]) + +Previously WITHOUT ``bottleneck`` installed: + +.. code-block:: ipython + + In [2]: s.sum() + Out[2]: np.nan + +Previously WITH ``bottleneck``: + +.. code-block:: ipython + + In [2]: s.sum() + Out[2]: 0.0 + +New behavior, without regard to the bottleneck installation: + +.. ipython:: python + + s.sum() + +Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottleneck`` installation: + +.. code-block:: ipython + + In [1]: pd.Series([]).sum() + Out[1]: 0 + +but for consistency with the all-NaN case, this was changed to return NaN as well: + +.. ipython:: python + :okwarning: + + pd.Series([]).sum() + + +.. _whatsnew_0210.api_breaking.loc: + +Indexing with a list with missing labels is deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. +This will now show a ``FutureWarning``. In the future this will raise a ``KeyError`` (:issue:`15747`). +This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. +See the :ref:`deprecation docs `. + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previous behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()`` + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + + +.. _whatsnew_0210.api.na_changes: + +NA naming changes +^^^^^^^^^^^^^^^^^ + +In order to promote more consistency among the pandas API, we have added additional top-level +functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. +The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore +in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods +named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, +``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). + +The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. + + +.. _whatsnew_0210.api_breaking.iteration_scalars: + +Iteration of Series/Index will now return Python scalars +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`). + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previously: + +.. code-block:: ipython + + In [2]: type(list(s)[0]) + Out[2]: numpy.int64 + +New behavior: + +.. ipython:: python + + type(list(s)[0]) + +Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well. + +.. ipython:: python + + d = {'a': [1], 'b': ['b']} + df = pd.DataFrame(d) + +Previously: + +.. code-block:: ipython + + In [8]: type(df.to_dict()['a'][0]) + Out[8]: numpy.int64 + +New behavior: + +.. ipython:: python + + type(df.to_dict()['a'][0]) + + +.. _whatsnew_0210.api_breaking.loc_with_index: + +Indexing with a Boolean Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously when passing a boolean ``Index`` to ``.loc``, if the index of the ``Series/DataFrame`` had ``boolean`` labels, +you would get a label based selection, potentially duplicating result labels, rather than a boolean indexing selection +(where ``True`` selects elements), this was inconsistent how a boolean numpy array indexed. The new behavior is to +act like a boolean numpy array indexer. (:issue:`17738`) + +Previous behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=[False, True, False]) + s + +.. code-block:: ipython + + In [59]: s.loc[pd.Index([True, False, True])] + Out[59]: + True 2 + False 1 + False 3 + True 2 + dtype: int64 + +Current behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + + +Furthermore, previously if you had an index that was non-numeric (e.g. strings), then a boolean Index would raise a ``KeyError``. +This will now be treated as a boolean indexer. + +Previously behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s + +.. code-block:: ipython + + In [39]: s.loc[pd.Index([True, False, True])] + KeyError: "None of [Index([True, False, True], dtype='object')] are in the [index]" + +Current behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + + +.. _whatsnew_0210.api_breaking.period_index_resampling: + +``PeriodIndex`` resampling +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, resampling a ``Series``/``DataFrame`` indexed by a ``PeriodIndex`` returned a ``DatetimeIndex`` in some cases (:issue:`12884`). Resampling to a multiplied frequency now returns a ``PeriodIndex`` (:issue:`15944`). As a minor enhancement, resampling a ``PeriodIndex`` can now handle ``NaT`` values (:issue:`13224`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: pi = pd.period_range('2017-01', periods=12, freq='M') + + In [2]: s = pd.Series(np.arange(12), index=pi) + + In [3]: resampled = s.resample('2Q').mean() + + In [4]: resampled + Out[4]: + 2017-03-31 1.0 + 2017-09-30 5.5 + 2018-03-31 10.0 + Freq: 2Q-DEC, dtype: float64 + + In [5]: resampled.index + Out[5]: DatetimeIndex(['2017-03-31', '2017-09-30', '2018-03-31'], dtype='datetime64[ns]', freq='2Q-DEC') + +New behavior: + +.. ipython:: python + + pi = pd.period_range('2017-01', periods=12, freq='M') + + s = pd.Series(np.arange(12), index=pi) + + resampled = s.resample('2Q').mean() + + resampled + + resampled.index + +Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior. + +Previous behavior: + +.. code-block:: ipython + + In [1]: pi = pd.period_range(start='2000-01-01', freq='D', periods=10) + + In [2]: s = pd.Series(np.arange(10), index=pi) + + In [3]: s.resample('H').ohlc() + Out[3]: + 2000-01-01 00:00 0.0 + ... + 2000-01-10 23:00 NaN + Freq: H, Length: 240, dtype: float64 + + In [4]: s.resample('M').ohlc() + Out[4]: + open high low close + 2000-01 0 9 0 9 + +New behavior: + +.. ipython:: python + + pi = pd.period_range(start='2000-01-01', freq='D', periods=10) + + s = pd.Series(np.arange(10), index=pi) + + s.resample('H').ohlc() + + s.resample('M').ohlc() + + +.. _whatsnew_0210.api_breaking.pandas_eval: + +Improved error handling during item assignment in pd.eval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or +inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) + +.. ipython:: python + + arr = np.array([1, 2, 3]) + +Previously, if you attempted the following expression, you would get a not very helpful error message: + +.. code-block:: ipython + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices + +This is a very long way of saying numpy arrays don't support string-item indexing. With this +change, the error message is now this: + +.. code-block:: python + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot assign expression output to target + +It also used to be possible to evaluate expressions inplace, even if there was no item assignment: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 + +However, this input does not make much sense because the output is not being assigned to +the target. Now, a ``ValueError`` will be raised when such an input is passed in: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot operate inplace if there is no assignment + + +.. _whatsnew_0210.api_breaking.dtype_conversions: + +Dtype conversions +^^^^^^^^^^^^^^^^^ + +Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to same the type (e.g. int / float), or raise for datetimelikes. These will now preserve the bools with ``object`` dtypes. (:issue:`16821`). + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + +.. code-block:: python + + In [5]: s[1] = True + + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 + +New behavior + +.. ipython:: python + + s[1] = True + s + +Previously, as assignment to a datetimelike with a non-datetimelike would coerce the +non-datetime-like item being assigned (:issue:`14145`). + +.. ipython:: python + + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + +.. code-block:: python + + In [1]: s[1] = 1 + + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] + +These now coerce to ``object`` dtype. + +.. ipython:: python + + s[1] = 1 + s + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + + +.. _whatsnew_210.api.multiindex_single: + +MultiIndex constructor with a single level +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``MultiIndex`` constructors no longer squeezes a MultiIndex with all +length-one levels down to a regular ``Index``. This affects all the +``MultiIndex`` constructors. (:issue:`17178`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)]) + Out[2]: Index(['a', 'b'], dtype='object') + +Length 1 levels are no longer special-cased. They behave exactly as if you had +length 2+ levels, so a :class:`MultiIndex` is always returned from all of the +``MultiIndex`` constructors: + +.. ipython:: python + + pd.MultiIndex.from_tuples([('a',), ('b',)]) + +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`). + +Previous behavior + +.. ipython:: python + + s = pd.Series(['20130101 00:00:00'] * 3) + +.. code-block:: ipython + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + dtype: datetime64[ns] + +New behavior + +.. ipython:: python + + pd.to_datetime(s, utc=True) + +Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. + +.. _whatsnew_0210.api.consistency_of_range_functions: + +Consistency of range functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`). + +One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') + +New behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + +Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. + +Previous behavior: + +.. code-block:: ipython + + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + +New behavior: + +.. ipython:: python + + pd.interval_range(start=0, end=4) + +.. _whatsnew_0210.api.mpl_converters: + +No automatic Matplotlib converters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas no longer registers our ``date``, ``time``, ``datetime``, +``datetime64``, and ``Period`` converters with matplotlib when pandas is +imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not +nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You +must explicitly register these methods: + +pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these +converters on first-use (:issue:`17710`). + +.. note:: + + This change has been temporarily reverted in pandas 0.21.1, + for more details see :ref:`here `. + +.. _whatsnew_0210.api: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) +- Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now + raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +- :func:`read_csv` now issues a ``UserWarning`` if the ``names`` parameter contains duplicates (:issue:`17095`) +- :func:`read_csv` now treats ``'null'`` and ``'n/a'`` strings as missing values by default (:issue:`16471`, :issue:`16078`) +- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). +- Compression defaults in HDF stores now follow pytables standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) +- Removed the ``@slow`` decorator from ``pandas._testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) +- Moved definition of ``MergeError`` to the ``pandas.errors`` module. +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) +- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) +- :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). +- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) +- :func:`to_datetime` now raises a ``ValueError`` when format includes ``%W`` or ``%U`` without also including day of the week and calendar year (:issue:`16774`) +- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) +- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). + +.. _whatsnew_0210.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv()` (:issue:`4191`) +- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). +- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) +- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) +- :meth:`DataFrame.to_csv` has deprecated the ``tupleize_cols`` argument. MultiIndex columns will be always written as rows in the CSV file (:issue:`17060`) +- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) +- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). +- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). +- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) +- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) +- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) +- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) +- ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) +- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) +- Using :meth:`DataFrame.rename_axis` and :meth:`Series.rename_axis` to alter index or column *labels* is now deprecated in favor of using ``.rename``. ``rename_axis`` may still be used to alter the name of the index or columns (:issue:`17833`). +- :meth:`~DataFrame.reindex_axis` has been deprecated in favor of :meth:`~DataFrame.reindex`. See :ref:`here ` for more (:issue:`17833`). + +.. _whatsnew_0210.deprecations.select: + +Series.select and DataFrame.select +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`Series.select` and :meth:`DataFrame.select` methods are deprecated in favor of using ``df.loc[labels.map(crit)]`` (:issue:`12401`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) + +.. code-block:: ipython + + In [3]: df.select(lambda x: x in ['bar', 'baz']) + FutureWarning: select is deprecated and will be removed in a future release. You can use .loc[crit] as a replacement + Out[3]: + A + bar 2 + baz 3 + +.. ipython:: python + + df.loc[df.index.map(lambda x: x in ['bar', 'baz'])] + + +.. _whatsnew_0210.deprecations.argmin_min: + +Series.argmax and Series.argmin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of :func:`Series.argmax` and :func:`Series.argmin` have been deprecated in favor of :func:`Series.idxmax` and :func:`Series.idxmin`, respectively (:issue:`16830`). + +For compatibility with NumPy arrays, ``pd.Series`` implements ``argmax`` and +``argmin``. Since pandas 0.13.0, ``argmax`` has been an alias for +:meth:`pandas.Series.idxmax`, and ``argmin`` has been an alias for +:meth:`pandas.Series.idxmin`. They return the *label* of the maximum or minimum, +rather than the *position*. + +We've deprecated the current behavior of ``Series.argmax`` and +``Series.argmin``. Using either of these will emit a ``FutureWarning``. Use +:meth:`Series.idxmax` if you want the label of the maximum. Use +``Series.values.argmax()`` if you want the position of the maximum. Likewise for +the minimum. In a future release ``Series.argmax`` and ``Series.argmin`` will +return the position of the maximum or minimum. + +.. _whatsnew_0210.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`) +- The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`) +- The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) +- ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) +- ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) +- :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) +- The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) +- pandas no longer tests for compatibility with hdf5-files created with pandas < 0.11 (:issue:`17404`). + + + +.. _whatsnew_0210.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) +- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) +- Improved performance of :meth:`~Series.cat.set_categories` by not materializing the values (:issue:`17508`) +- :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) +- Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`) +- Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`) + +.. _whatsnew_0210.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) +- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`) + +.. _whatsnew_0210.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) +- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) +- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) +- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) +- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) +- Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) +- Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (``DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). +- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`) +- Bug in ``DataFrame`` and ``Series`` constructors where ``range`` objects are converted to ``int32`` dtype on Windows instead of ``int64`` (:issue:`16804`) + +Indexing +^^^^^^^^ + +- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). +- Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). +- Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). +- Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) +- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) +- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) +- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) +- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) +- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) +- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) +- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) +- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) + +IO +^^ + +- Bug in :func:`read_hdf` when reading a timezone aware index from ``fixed`` format HDFStore (:issue:`17618`) +- Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) +- Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) +- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). +- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) +- Bug in :meth:`DataFrame.to_csv` defaulting to 'ascii' encoding in Python 3, instead of 'utf-8' (:issue:`17097`) +- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_stata` where the index was not set (:issue:`16342`) +- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) +- Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) +- Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) +- Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`) +- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`) +- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`) + +Plotting +^^^^^^^^ +- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) +- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) +- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`, :issue:`11471`) +- With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`) +- Bug in ``Series.plot.bar`` or ``DataFrame.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`) +- Bug causing ``plotting.parallel_coordinates`` to reset the random seed when using random colors (:issue:`17525`) + + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) +- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) +- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1`` (:issue:`15305`) +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) +- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) +- Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`) +- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`) +- Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`) + +Sparse +^^^^^^ + +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) +- Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`) +- Bug in :func:`make_sparse` treating two numeric/boolean data, which have same bits, as same when array ``dtype`` is ``object`` (:issue:`17574`) +- :func:`SparseArray.all` and :func:`SparseArray.any` are now implemented to handle ``SparseArray``, these were used but not implemented (:issue:`17570`) + +Reshaping +^^^^^^^^^ +- Joining/Merging with a non unique ``PeriodIndex`` raised a ``TypeError`` (:issue:`16871`) +- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`) +- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) +- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) +- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) +- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) +- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) +- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) +- Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) +- Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`) +- Bug in :func:`unstack` which, when called on a list of levels, would discard the ``fillna`` argument (:issue:`13971`) +- Bug in the alignment of ``range`` objects and other list-likes with ``DataFrame`` leading to operations being performed row-wise instead of column-wise (:issue:`17901`) + +Numeric +^^^^^^^ +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) +- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`). + + +Categorical +^^^^^^^^^^^ +- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`) +- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) +- Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) +- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`) +- Bug in constructing a ``Categorical``/``CategoricalDtype`` when the specified ``categories`` are of categorical type (:issue:`17884`). + +.. _whatsnew_0210.pypy: + +PyPy +^^^^ + +- Compatibility with PyPy in :func:`read_csv` with ``usecols=[]`` and + :func:`read_json` (:issue:`17351`) +- Split tests into cases for CPython and PyPy where needed, which highlights the fragility + of index matching with ``float('nan')``, ``np.nan`` and ``NAT`` (:issue:`17351`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, + so an approximation is used instead (:issue:`17228`) + +Other +^^^^^ +- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) +- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + + + +.. _whatsnew_0.21.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.3..v0.21.0 diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst new file mode 100644 index 00000000..e217e1a7 --- /dev/null +++ b/doc/source/whatsnew/v0.21.1.rst @@ -0,0 +1,187 @@ +.. _whatsnew_0211: + +Version 0.21.1 (December 12, 2017) +---------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.21.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Temporarily restore matplotlib datetime plotting functionality. This should + resolve issues for users who implicitly relied on pandas to plot datetimes + with matplotlib. See :ref:`here `. +- Improvements to the Parquet IO functions introduced in 0.21.0. See + :ref:`here `. + + +.. contents:: What's new in v0.21.1 + :local: + :backlinks: none + + +.. _whatsnew_0211.converters: + +Restore Matplotlib datetime converter registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pandas implements some matplotlib converters for nicely formatting the axis +labels on plots with ``datetime`` or ``Period`` values. Prior to pandas 0.21.0, +these were implicitly registered with matplotlib, as a side effect of ``import +pandas``. + +In pandas 0.21.0, we required users to explicitly register the +converter. This caused problems for some users who relied on those converters +being present for regular ``matplotlib.pyplot`` plotting methods, so we're +temporarily reverting that change; pandas 0.21.1 again registers the converters on +import, just like before 0.21.0. + +We've added a new option to control the converters: +``pd.options.plotting.matplotlib.register_converters``. By default, they are +registered. Toggling this to ``False`` removes pandas' formatters and restore +any converters we overwrote when registering them (:issue:`18301`). + +We're working with the matplotlib developers to make this easier. We're trying +to balance user convenience (automatically registering the converters) with +import performance and best practices (importing pandas shouldn't have the side +effect of overwriting any custom converters you've already set). In the future +we hope to have most of the datetime formatting functionality in matplotlib, +with just the pandas-specific converters in pandas. We'll then gracefully +deprecate the automatic registration of converters in favor of users explicitly +registering them when they want them. + +.. _whatsnew_0211.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0211.enhancements.parquet: + +Improvements to the Parquet IO functionality +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`DataFrame.to_parquet` will now write non-default indexes when the + underlying engine supports it. The indexes will be preserved when reading + back in with :func:`read_parquet` (:issue:`18581`). +- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) +- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) + +.. _whatsnew_0211.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`) +- :class:`Grouper` and :class:`TimeGrouper` now have a friendly repr output (:issue:`18203`). + +.. _whatsnew_0211.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``pandas.tseries.register`` has been renamed to + :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18301`) + +.. _whatsnew_0211.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of plotting large series/dataframes (:issue:`18236`). + +.. _whatsnew_0211.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) +- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) +- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) +- Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising ``TypeError`` (:issue:`18372`) +- Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) +- Bug in :meth:`Series.fillna` which raised when passed a long integer on Python 2 (:issue:`18159`). + +Indexing +^^^^^^^^ + +- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`) +- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`) +- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) +- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`) +- Bug in masked assignment of a ``timedelta64[ns]`` dtype ``Series``, incorrectly coerced to float (:issue:`18493`) + +IO +^^ + +- Bug in :class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. +- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`) +- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) +- Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) +- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) +- Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). +- Bug in :meth:`DataFrame.to_msgpack` when serializing data of the ``numpy.bool_`` datatype (:issue:`18390`) +- Bug in :func:`read_json` not decoding when reading line delimited JSON from S3 (:issue:`17200`) +- Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) +- Bug in :func:`to_latex` where repeated MultiIndex values were not printed even though a higher level index differed from the previous row (:issue:`14484`) +- Bug when reading NaN-only categorical columns in :class:`HDFStore` (:issue:`18413`) +- Bug in :meth:`DataFrame.to_latex` with ``longtable=True`` where a latex multicolumn always spanned over three columns (:issue:`17959`) + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`) + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequency is 12h or higher (:issue:`15549`) +- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) +- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) + +Reshaping +^^^^^^^^^ + +- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`) +- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) +- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`) +- Bug when merging empty DataFrames when ``np.seterr(divide='raise')`` is set (:issue:`17776`) + +Numeric +^^^^^^^ + +- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) +- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) +- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) +- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`) + +String +^^^^^^ + +- :meth:`Series.str.split()` will now propagate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) + + +.. _whatsnew_0.21.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.0..v0.21.1 diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst new file mode 100644 index 00000000..ec9769c2 --- /dev/null +++ b/doc/source/whatsnew/v0.22.0.rst @@ -0,0 +1,261 @@ +.. _whatsnew_0220: + +Version 0.22.0 (December 29, 2017) +---------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.21.1 and includes a single, API-breaking change. +We recommend that all users upgrade to this version after carefully reading the +release note (singular!). + +.. _whatsnew_0220.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pandas 0.22.0 changes the handling of empty and all-*NA* sums and products. The +summary is that + +* The sum of an empty or all-*NA* ``Series`` is now ``0`` +* The product of an empty or all-*NA* ``Series`` is now ``1`` +* We've added a ``min_count`` parameter to ``.sum()`` and ``.prod()`` controlling + the minimum number of valid values for the result to be valid. If fewer than + ``min_count`` non-*NA* values are present, the result is *NA*. The default is + ``0``. To return ``NaN``, the 0.21 behavior, use ``min_count=1``. + +Some background: In pandas 0.21, we fixed a long-standing inconsistency +in the return value of all-*NA* series depending on whether or not bottleneck +was installed. See :ref:`whatsnew_0210.api_breaking.bottleneck`. At the same +time, we changed the sum and prod of an empty ``Series`` to also be ``NaN``. + +Based on feedback, we've partially reverted those changes. + +Arithmetic operations +^^^^^^^^^^^^^^^^^^^^^ + +The default sum for empty or all-*NA* ``Series`` is now ``0``. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [1]: pd.Series([]).sum() + Out[1]: nan + + In [2]: pd.Series([np.nan]).sum() + Out[2]: nan + +*pandas 0.22.0* + +.. ipython:: python + :okwarning: + + pd.Series([]).sum() + pd.Series([np.nan]).sum() + +The default behavior is the same as pandas 0.20.3 with bottleneck installed. It +also matches the behavior of NumPy's ``np.nansum`` on empty and all-*NA* arrays. + +To have the sum of an empty series return ``NaN`` (the default behavior of +pandas 0.20.3 without bottleneck, or pandas 0.21.x), use the ``min_count`` +keyword. + +.. ipython:: python + :okwarning: + + pd.Series([]).sum(min_count=1) + +Thanks to the ``skipna`` parameter, the ``.sum`` on an all-*NA* +series is conceptually the same as the ``.sum`` of an empty one with +``skipna=True`` (the default). + +.. ipython:: python + + pd.Series([np.nan]).sum(min_count=1) # skipna=True by default + +The ``min_count`` parameter refers to the minimum number of *non-null* values +required for a non-NA sum or product. + +:meth:`Series.prod` has been updated to behave the same as :meth:`Series.sum`, +returning ``1`` instead. + +.. ipython:: python + :okwarning: + + pd.Series([]).prod() + pd.Series([np.nan]).prod() + pd.Series([]).prod(min_count=1) + +These changes affect :meth:`DataFrame.sum` and :meth:`DataFrame.prod` as well. +Finally, a few less obvious places in pandas are affected by this change. + +Grouping by a Categorical +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a ``Categorical`` and summing now returns ``0`` instead of +``NaN`` for categories with no observations. The product now returns ``1`` +instead of ``NaN``. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b']) + + In [9]: pd.Series([1, 2]).groupby(grouper).sum() + Out[9]: + a 3.0 + b NaN + dtype: float64 + +*pandas 0.22* + +.. ipython:: python + + grouper = pd.Categorical(["a", "a"], categories=["a", "b"]) + pd.Series([1, 2]).groupby(grouper).sum() + +To restore the 0.21 behavior of returning ``NaN`` for unobserved groups, +use ``min_count>=1``. + +.. ipython:: python + + pd.Series([1, 2]).groupby(grouper).sum(min_count=1) + +Resample +^^^^^^^^ + +The sum and product of all-*NA* bins has changed from ``NaN`` to ``0`` for +sum and ``1`` for product. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [11]: s = pd.Series([1, 1, np.nan, np.nan], + ....: index=pd.date_range('2017', periods=4)) + ....: s + Out[11]: + 2017-01-01 1.0 + 2017-01-02 1.0 + 2017-01-03 NaN + 2017-01-04 NaN + Freq: D, dtype: float64 + + In [12]: s.resample('2d').sum() + Out[12]: + 2017-01-01 2.0 + 2017-01-03 NaN + Freq: 2D, dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + s = pd.Series([1, 1, np.nan, np.nan], index=pd.date_range("2017", periods=4)) + s.resample("2d").sum() + +To restore the 0.21 behavior of returning ``NaN``, use ``min_count>=1``. + +.. ipython:: python + + s.resample("2d").sum(min_count=1) + +In particular, upsampling and taking the sum or product is affected, as +upsampling introduces missing values even if the original series was +entirely valid. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [14]: idx = pd.DatetimeIndex(['2017-01-01', '2017-01-02']) + + In [15]: pd.Series([1, 2], index=idx).resample('12H').sum() + Out[15]: + 2017-01-01 00:00:00 1.0 + 2017-01-01 12:00:00 NaN + 2017-01-02 00:00:00 2.0 + Freq: 12H, dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + idx = pd.DatetimeIndex(["2017-01-01", "2017-01-02"]) + pd.Series([1, 2], index=idx).resample("12H").sum() + +Once again, the ``min_count`` keyword is available to restore the 0.21 behavior. + +.. ipython:: python + + pd.Series([1, 2], index=idx).resample("12H").sum(min_count=1) + +Rolling and expanding +^^^^^^^^^^^^^^^^^^^^^ + +Rolling and expanding already have a ``min_periods`` keyword that behaves +similar to ``min_count``. The only case that changes is when doing a rolling +or expanding sum with ``min_periods=0``. Previously this returned ``NaN``, +when fewer than ``min_periods`` non-*NA* values were in the window. Now it +returns ``0``. + +*pandas 0.21.1* + +.. code-block:: ipython + + In [17]: s = pd.Series([np.nan, np.nan]) + + In [18]: s.rolling(2, min_periods=0).sum() + Out[18]: + 0 NaN + 1 NaN + dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + s = pd.Series([np.nan, np.nan]) + s.rolling(2, min_periods=0).sum() + +The default behavior of ``min_periods=None``, implying that ``min_periods`` +equals the window size, is unchanged. + +Compatibility +~~~~~~~~~~~~~ + +If you maintain a library that should work across pandas versions, it +may be easiest to exclude pandas 0.21 from your requirements. Otherwise, all your +``sum()`` calls would need to check if the ``Series`` is empty before summing. + +With setuptools, in your ``setup.py`` use:: + + install_requires=['pandas!=0.21.*', ...] + +With conda, use + +.. code-block:: yaml + + requirements: + run: + - pandas !=0.21.0,!=0.21.1 + +Note that the inconsistency in the return value for all-*NA* series is still +there for pandas 0.20.3 and earlier. Avoiding pandas 0.21 will only help with +the empty case. + + +.. _whatsnew_0.22.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.1..v0.22.0 diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst new file mode 100644 index 00000000..9f24bc8e --- /dev/null +++ b/doc/source/whatsnew/v0.23.0.rst @@ -0,0 +1,1480 @@ +.. _whatsnew_0230: + +What's new in 0.23.0 (May 15, 2018) +----------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.22.0 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- :ref:`Round-trippable JSON format with 'table' orient `. +- :ref:`Instantiation from dicts respects order for Python 3.6+ `. +- :ref:`Dependent column arguments for assign `. +- :ref:`Merging / sorting on a combination of columns and index levels `. +- :ref:`Extending pandas with custom types `. +- :ref:`Excluding unobserved categories from groupby `. +- :ref:`Changes to make output shape of DataFrame.apply consistent `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0230.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0230.enhancements.round-trippable_json: + +JSON read/write round-trippable with ``orient='table'`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. + +.. ipython:: python + + df = pd.DataFrame({'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, + index=pd.Index(range(4), name='idx')) + df + df.dtypes + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + +.. ipython:: python + :okwarning: + + df.index.name = 'index' + + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('test.json') + + +.. _whatsnew_0230.enhancements.assign_dependent: + + +Method ``.assign()`` accepts dependent arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 +`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the +:ref:`documentation here ` (:issue:`14207`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x: x['A'] + x['B']) + +.. warning:: + + This may subtly change the behavior of your code when you're + using ``.assign()`` to update an existing column. Previously, callables + referring to other variables being updated would get the "old" values + + Previous behavior: + + .. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) + + In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) + Out[3]: + A C + 0 2 -1 + 1 3 -2 + 2 4 -3 + + New behavior: + + .. ipython:: python + + df.assign(A=df.A + 1, C=lambda df: df.A * -1) + + + +.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` +parameters may now refer to either column names or index level names. +This enables merging ``DataFrame`` instances on a combination of index levels +and columns without resetting indexes. See the :ref:`Merge on columns and +levels ` documentation section. +(:issue:`14355`) + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + left.merge(right, on=['key1', 'key2']) + +.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: + +Sorting by a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may +now refer to either column names or index level names. This enables sorting +``DataFrame`` instances by a combination of index levels and columns without +resetting indexes. See the :ref:`Sorting by Indexes and Values +` documentation section. +(:issue:`14353`) + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + + # Sort by 'second' (index) and 'A' (column) + df_multi.sort_values(by=['second', 'A']) + + +.. _whatsnew_023.enhancements.extension: + +Extending pandas with custom types (experimental) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now supports storing array-like objects that aren't necessarily 1-D NumPy +arrays as columns in a DataFrame or values in a Series. This allows third-party +libraries to implement extensions to NumPy's types, similar to how pandas +implemented categoricals, datetimes with timezones, periods, and intervals. + +As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type +for storing ip addresses. + +.. code-block:: ipython + + In [1]: from cyberpandas import IPArray + + In [2]: values = IPArray([ + ...: 0, + ...: 3232235777, + ...: 42540766452641154071740215577757643572 + ...: ]) + ...: + ...: + +``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas +:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. + +.. code-block:: ipython + + In [3]: ser = pd.Series(values) + + In [4]: ser + Out[4]: + 0 0.0.0.0 + 1 192.168.1.1 + 2 2001:db8:85a3::8a2e:370:7334 + dtype: ip + +Notice that the dtype is ``ip``. The missing value semantics of the underlying +array are respected: + +.. code-block:: ipython + + In [5]: ser.isna() + Out[5]: + 0 True + 1 False + 2 False + dtype: bool + +For more, see the :ref:`extension types ` +documentation. If you build an extension array, publicize it on our +:ref:`ecosystem page `. + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ + + +.. _whatsnew_0230.enhancements.categorical_grouping: + +New ``observed`` keyword for excluding unobserved categories in ``GroupBy`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a categorical includes the unobserved categories in the output. +When grouping by multiple categorical columns, this means you get the cartesian product of all the +categories, including combinations where there are no observations, which can result in a large +number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to +``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df['C'] = ['foo', 'bar'] * 2 + df + +To show all values, the previous behavior: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=False).count() + + +To show only observed values: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=True).count() + +For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df + +.. ipython:: python + + pd.pivot_table(df, values='values', index=['A', 'B'], + dropna=True) + pd.pivot_table(df, values='values', index=['A', 'B'], + dropna=False) + + +.. _whatsnew_0230.enhancements.window_raw: + +Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, +:func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have gained a ``raw=None`` parameter. +This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The +default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. +In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) + +.. ipython:: python + + s = pd.Series(np.arange(5), np.arange(5) + 1) + s + +Pass a ``Series``: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) + +Mimic the original behavior of passing a ndarray: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) + + +.. _whatsnew_0210.enhancements.limit_area: + +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) + ser + +Fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +Fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +Fill all consecutive outside values in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='outside') + +.. _whatsnew_0210.enhancements.get_dummies_dtype: + +Function ``get_dummies`` now supports ``dtype`` argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + pd.get_dummies(df, columns=['c']).dtypes + pd.get_dummies(df, columns=['c'], dtype=bool).dtypes + + +.. _whatsnew_0230.enhancements.timedelta_mod: + +Timedelta mod method +^^^^^^^^^^^^^^^^^^^^ + +``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects +when operating with either timedelta-like or with numeric arguments. +See the :ref:`documentation here `. (:issue:`19365`) + +.. ipython:: python + + td = pd.Timedelta(hours=37) + td % pd.Timedelta(minutes=45) + +.. _whatsnew_0230.enhancements.ran_inf: + +Method ``.rank()`` handles ``inf`` values when ``NaN`` are present +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) + +.. ipython:: python + + s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [11]: s.rank() + Out[11]: + 0 1.0 + 1 2.0 + 2 3.0 + 3 NaN + 4 NaN + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank() + +Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. + +.. ipython:: python + + s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [15]: s.rank(na_option='top') + Out[15]: + 0 2.5 + 1 2.5 + 2 2.5 + 3 2.5 + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank(na_option='top') + +These bugs were squashed: + +- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) +- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) + + +.. _whatsnew_0230.enhancements.str_cat_align: + +``Series.str.cat`` has gained the ``join`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). +The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. + +In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. + +.. ipython:: python + :okwarning: + + s = pd.Series(['a', 'b', 'c', 'd']) + t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) + s.str.cat(t) + s.str.cat(t, join='left', na_rep='-') + +Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). + +.. _whatsnew_0230.enhancements.astype_category: + +``DataFrame.astype`` performs column-wise conversion to ``Categorical`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or +a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the +:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) + +Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + df = df.astype('category') + df['A'].dtype + df['B'].dtype + + +Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + cdt = CategoricalDtype(categories=list('abcd'), ordered=True) + df = df.astype(cdt) + df['A'].dtype + df['B'].dtype + + +.. _whatsnew_0230.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) +- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) +- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) +- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) +- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) +- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) +- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) +- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) +- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). +- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) +- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) +- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) +- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) +- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) +- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` + can now take a callable as their argument (:issue:`18862`) +- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) +- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. + Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). +- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- Added :func:`pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing` and :func:`pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) +- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) +- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) +- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) +- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) +- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) +- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) +- :func:`read_html` now reads all ```` elements in a ``
    ``, not just the first. (:issue:`20690`) +- :meth:`~pandas.core.window.Rolling.quantile` and :meth:`~pandas.core.window.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) +- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) +- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). +- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 +- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from + the pandas-gbq library version 0.4.0. Adds intersphinx mapping to pandas-gbq + library. (:issue:`20564`) +- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) +- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) +- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) +- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) + +.. _whatsnew_0230.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0230.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15184`). +If installed, we now require: + ++-----------------+-----------------+----------+---------------+ +| Package | Minimum Version | Required | Issue | ++=================+=================+==========+===============+ +| python-dateutil | 2.5.0 | X | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| openpyxl | 2.4.0 | | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| beautifulsoup4 | 4.2.1 | | :issue:`20082`| ++-----------------+-----------------+----------+---------------+ +| setuptools | 24.2.0 | | :issue:`20698`| ++-----------------+-----------------+----------+---------------+ + +.. _whatsnew_0230.api_breaking.dict_insertion_order: + +Instantiation from dicts preserves dict insertion order for Python 3.6+ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Until Python 3.6, dicts in Python had no formally defined ordering. For Python +version 3.6 and later, dicts are ordered by insertion order, see +`PEP 468 `_. +pandas will use the dict's insertion order, when creating a ``Series`` or +``DataFrame`` from a dict and you're using Python version 3.6 or +higher. (:issue:`19884`) + +Previous behavior (and current behavior if on Python < 3.6): + +.. code-block:: ipython + + In [16]: pd.Series({'Income': 2000, + ....: 'Expenses': -1500, + ....: 'Taxes': -200, + ....: 'Net result': 300}) + Out[16]: + Expenses -1500 + Income 2000 + Net result 300 + Taxes -200 + dtype: int64 + +Note the Series above is ordered alphabetically by the index values. + +New behavior (for Python >= 3.6): + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}) + +Notice that the Series is now ordered by insertion order. This new behavior is +used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` +and ``SparseDataFrame``). + +If you wish to retain the old behavior while using Python >= 3.6, you can use +``.sort_index()``: + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}).sort_index() + +.. _whatsnew_0230.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). + +.. code-block:: ipython + + In [75]: import pandas._testing as tm + + In [76]: p = tm.makePanel() + + In [77]: p + Out[77]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [78]: p.to_frame() + Out[78]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.469112 0.721555 0.404705 + B -1.135632 0.271860 -1.039268 + C 0.119209 0.276232 -1.344312 + D -2.104569 0.113648 -0.109050 + 2000-01-04 A -0.282863 -0.706771 0.577046 + B 1.212112 -0.424972 -0.370647 + C -1.044236 -1.087401 0.844885 + D -0.494929 -1.478427 1.643563 + 2000-01-05 A -1.509059 -1.039575 -1.715002 + B -0.173215 0.567020 -1.157892 + C -0.861849 -0.673690 1.075770 + D 1.071804 0.524988 -1.469388 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [79]: p.to_xarray() + Out[79]: + + array([[[ 0.469112, -1.135632, 0.119209, -2.104569], + [-0.282863, 1.212112, -1.044236, -0.494929], + [-1.509059, -0.173215, -0.861849, 1.071804]], + + [[ 0.721555, 0.27186 , 0.276232, 0.113648], + [-0.706771, -0.424972, -1.087401, -1.478427], + [-1.039575, 0.56702 , -0.67369 , 0.524988]], + + [[ 0.404705, -1.039268, -1.344312, -0.10905 ], + [ 0.577046, -0.370647, 0.844885, 1.643563], + [-1.715002, -1.157892, 1.07577 , -1.469388]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + + +.. _whatsnew_0230.api_breaking.core_common: + +pandas.core.common removals +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): + +- ``PerformanceWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` +- ``AbstractMethodError`` + +These are available from import from ``pandas.errors`` (since 0.19.0). + + +.. _whatsnew_0230.api_breaking.apply: + +Changes to make output of ``DataFrame.apply`` consistent +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies +are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). + +.. ipython:: python + + df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + df + +Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. +If the return shape did not match, a ``Series`` with lists was returned. + +.. code-block:: python + + In [3]: df.apply(lambda x: [1, 2, 3], axis=1) + Out[3]: + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + In [4]: df.apply(lambda x: [1, 2], axis=1) + Out[4]: + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + dtype: object + + +New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1) + df.apply(lambda x: [1, 2], axis=1) + +To have expanded columns, you can use ``result_type='expand'`` + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + +Returning a ``Series`` allows one to control the exact return structure and column names: + +.. ipython:: python + + df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) + +.. _whatsnew_0230.api_breaking.concat: + +Concatenation will no longer sort +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. +The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). + +.. ipython:: python + :okwarning: + + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) + df2 = pd.DataFrame({"a": [4, 5]}) + + pd.concat([df1, df2]) + +To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` + +.. ipython:: python + + pd.concat([df1, df2], sort=True) + +To accept the future behavior (no sorting), pass ``sort=False`` + +.. ipython + + pd.concat([df1, df2], sort=False) + +Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. + + +.. _whatsnew_0230.api_breaking.build_changes: + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) +- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) +- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) + +.. _whatsnew_0230.api_breaking.index_division_by_zero: + +Index division by zero fills correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current behavior: + +.. code-block:: ipython + + In [12]: index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, + # +infinity where positive, and NaN for 0 / 0 + In [13]: index / 0 + + # The result of division by zero should not depend on + # whether the zero is int or float + In [14]: index / 0.0 + + In [15]: index = pd.UInt64Index([0, 1]) + In [16]: index / np.array([0, 0], dtype=np.uint64) + + In [17]: pd.RangeIndex(1, 5) / 0 + +.. _whatsnew_0230.api_breaking.extract: + +Extraction of matching patterns from strings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, extracting matching patterns from strings with :func:`str.extract` used to return a +``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was +extracted). As of pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless +``expand`` is set to ``False``. Finally, ``None`` was an accepted value for +the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: s = pd.Series(['number 10', '12 eggs']) + + In [2]: extracted = s.str.extract(r'.*(\d\d).*') + + In [3]: extracted + Out [3]: + 0 10 + 1 12 + dtype: object + + In [4]: type(extracted) + Out [4]: + pandas.core.series.Series + +New behavior: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*') + extracted + type(extracted) + +To restore previous behavior, simply set ``expand`` to ``False``: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*', expand=False) + extracted + type(extracted) + +.. _whatsnew_0230.api_breaking.cdt_ordered: + +Default value for the ``ordered`` parameter of ``CategoricalDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) + +In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. + +New behavior: + +.. code-block:: ipython + + In [2]: from pandas.api.types import CategoricalDtype + + In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + + In [4]: cat + Out[4]: + [a, b, c, a, b, a] + Categories (3, object): [c < b < a] + + In [5]: cdt = CategoricalDtype(categories=list('cbad')) + + In [6]: cat.astype(cdt) + Out[6]: + [a, b, c, a, b, a] + Categories (4, object): [c < b < a < d] + +Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. + +Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. + +.. _whatsnew_0230.api_breaking.pretty_printing: + +Better pretty-printing of DataFrames in a terminal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the default value for the maximum number of columns was +``pd.options.display.max_columns=20``. This meant that relatively wide data +frames would not fit within the terminal width, and pandas would introduce line +breaks to display these 20 columns. This resulted in an output that was +relatively difficult to read: + +.. image:: ../_static/print_df_old.png + +If Python runs in a terminal, the maximum number of columns is now determined +automatically so that the printed data frame fits within the current terminal +width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs +as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as +well as in many IDEs), this value cannot be inferred automatically and is thus +set to ``20`` as in previous versions. In a terminal, this results in a much +nicer output: + +.. image:: ../_static/print_df_new.png + +Note that if you don't like the new default, you can always set this option +yourself. To revert to the old setting, you can run this line: + +.. code-block:: python + + pd.options.display.max_columns = 20 + +.. _whatsnew_0230.api.datetimelike: + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mismatched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) +- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) +- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) +- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) +- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) + +.. _whatsnew_0230.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) +- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) +- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) +- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). +- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) +- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). +- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) +- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) +- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) +- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) +- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) +- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) +- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) +- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) +- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) +- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) +- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) +- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) +- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) +- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). +- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) +- A user-defined-function that is passed to :func:`Series.rolling().aggregate() `, :func:`DataFrame.rolling().aggregate() `, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) +- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). + +.. _whatsnew_0230.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). +- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). +- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) +- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. + In the future, a tuple passed to ``'by'`` will always refer to a single key + that is the actual tuple, instead of treating the tuple as multiple keys. To + retain the previous behavior, use a list instead of a tuple (:issue:`18314`) +- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). +- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) +- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). +- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) +- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). + +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) +- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) + +- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) +- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) +- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) +- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). +- :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, + :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) +- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties + of the ``Series`` and ``Index`` classes have been deprecated and will be + removed in a future version (:issue:`20419`). +- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) +- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) +- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) +- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) +- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). +- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) + + +.. _whatsnew_0230.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) +- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). +- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) +- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) +- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) +- The ``SparseList`` class has been removed (:issue:`14007`) +- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) +- ``Categorical.from_array`` has been removed (:issue:`13854`) +- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) +- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) +- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) +- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) +- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) +- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) +- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) +- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) +- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) +- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) +- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) +- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) +- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) +- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) +- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) +- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) +- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) +- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` + and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to + ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). +- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), + so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) +- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) + +.. _whatsnew_0230.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) +- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) +- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) +- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) +- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) +- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) +- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) +- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) +- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) +- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) +- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) +- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) +- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) +- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` (:issue:`15779`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) +- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) +- Improved performance of :func:`pandas.core.arrays.Categorical.from_codes` (:issue:`18501`) + +.. _whatsnew_0230.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +Thanks to all of the contributors who participated in the pandas Documentation +Sprint, which took place on March 10th. We had about 500 participants from over +30 locations across the world. You should notice that many of the +:ref:`API docstrings ` have greatly improved. + +There were too many simultaneous contributions to include a release note for each +improvement, but this `GitHub search`_ should give you an idea of how many docstrings +were improved. + +Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, +read the `NumFOCUS blogpost`_ recapping the sprint. + +.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ +.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ +.. _Marc Garcia: https://github.com/datapythonista + +- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) +- Consistency when introducing code samples, using either colon or period. + Rewrote some sentences for greater clarity, added more dynamic references + to functions, methods and classes. + (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) +- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) + +.. _whatsnew_0230.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +.. warning:: + + A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that + affects the correctness of operations like ``merge``, ``concat``, and + indexing when comparing multiple unordered ``Categorical`` arrays that have + the same categories, but in a different order. We highly recommend upgrading + or manually aligning your categories before doing these operations. + +- Bug in ``Categorical.equals`` returning the wrong result when comparing two + unordered ``Categorical`` arrays with the same categories, but in a different + order (:issue:`16603`) +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when for unordered categoricals with the categories in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in :func:`pandas.merge` returning the wrong result when joining on an + unordered ``Categorical`` that had the same categories but in a different + order (:issue:`19551`) +- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when + ``target`` was an unordered ``Categorical`` that had the same categories as + ``self`` but in a different order (:issue:`19551`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) +- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) +- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) +- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) +- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) +- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) +- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) +- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) +- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) +- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) +- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) +- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) +- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) +- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) + +Timedelta +^^^^^^^^^ + +- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) +- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) +- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) +- Bug in :func:`Timedelta.total_seconds()` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) +- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`) +- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) + + +Timezones +^^^^^^^^^ + +- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) +- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) +- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) +- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) +- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) +- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) +- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) +- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) +- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) + +Offsets +^^^^^^^ + +- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) +- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) +- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) + +Numeric +^^^^^^^ +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) +- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) +- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) + +Strings +^^^^^^^ +- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) + + +Indexing +^^^^^^^^ + +- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) +- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) +- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) +- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). +- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) +- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) +- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) +- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) +- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) +- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) +- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). +- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) +- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) +- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) +- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) +- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) +- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) +- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) +- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) + + +IO +^^ + +- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
    `` tag (:issue:`8496`) +- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) +- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) +- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) +- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) +- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) +- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) +- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) +- Bug in :func:`DataFrame.to_latex()` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) +- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) +- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) +- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) +- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) +- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) +- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) +- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) +- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) +- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) + +Plotting +^^^^^^^^ + +- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). +- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) +- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) + + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) +- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) +- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) +- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) +- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.resample().aggregate ` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) +- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) +- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) +- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) +- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) +- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) +- Bug in :func:`DataFrameGroupBy.rolling().apply() ` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) + +Sparse +^^^^^^ + +- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) +- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) +- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) +- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) +- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) +- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) +- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) +- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) +- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) + +- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) +- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) +- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) +- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) +- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) +- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) +- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) +- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) +- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) +- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) +- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) +- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) +- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) +- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) +- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) + +Other +^^^^^ + +- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) +- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) + +.. _whatsnew_0.23.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.22.0..v0.23.0 diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst new file mode 100644 index 00000000..b51368c8 --- /dev/null +++ b/doc/source/whatsnew/v0.23.1.rst @@ -0,0 +1,151 @@ +.. _whatsnew_0231: + +What's new in 0.23.1 (June 12, 2018) +------------------------------------ + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.1 + :local: + :backlinks: none + +.. _whatsnew_0231.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +**Comparing Series with datetime.date** + +We've reverted a 0.23.0 change to comparing a :class:`Series` holding datetimes and a ``datetime.date`` object (:issue:`21152`). +In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comparing. +This was inconsistent with Python, NumPy, and :class:`DatetimeIndex`, which never consider a datetime and ``datetime.date`` equal. + +In 0.23.0, we unified operations between DatetimeIndex and Series, and in the process changed comparisons between a Series of datetimes and ``datetime.date`` without warning. + +We've temporarily restored the 0.22.0 behavior, so datetimes and dates may again compare equal, but restore the 0.23.0 behavior in a future release. + +To summarize, here's the behavior in 0.22.0, 0.23.0, 0.23.1: + +.. code-block:: python + + # 0.22.0... Silently coerce the datetime.date + >>> import datetime + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + 0 True + 1 False + dtype: bool + + # 0.23.0... Do not coerce the datetime.date + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + 0 False + 1 False + dtype: bool + + # 0.23.1... Coerce the datetime.date with a warning + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + /bin/python:1: FutureWarning: Comparing Series of datetimes with 'datetime.date'. Currently, the + 'datetime.date' is coerced to a datetime. In the future pandas will + not coerce, and the values not compare equal to the 'datetime.date'. + To retain the current behavior, convert the 'datetime.date' to a + datetime with 'pd.Timestamp'. + #!/bin/python3 + 0 True + 1 False + dtype: bool + +In addition, ordering comparisons will raise a ``TypeError`` in the future. + +**Other fixes** + +- Reverted the ability of :func:`~DataFrame.to_sql` to perform multivalue + inserts as this caused regression in certain cases (:issue:`21103`). + In the future this will be made configurable. +- Fixed regression in the :attr:`DatetimeIndex.date` and :attr:`DatetimeIndex.time` + attributes in case of timezone-aware data: :attr:`DatetimeIndex.time` returned + a tz-aware time instead of tz-naive (:issue:`21267`) and :attr:`DatetimeIndex.date` + returned incorrect date when the input date has a non-UTC timezone (:issue:`21230`). +- Fixed regression in :meth:`pandas.io.json.json_normalize` when called with ``None`` values + in nested levels in JSON, and to not drop keys with value as ``None`` (:issue:`21158`, :issue:`21356`). +- Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) +- Bug preventing pandas from being importable with -OO optimization (:issue:`21071`) +- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when ``value`` the individual categories are iterable and ``value`` is an iterable (:issue:`21097`, :issue:`19788`) +- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`) +- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing + values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) +- Fixed regression in merging on boolean index/columns (:issue:`21119`). + +.. _whatsnew_0231.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) +- Improved performance of :meth:`CategoricalIndex.is_unique` (:issue:`21107`) + + +.. _whatsnew_0231.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Groupby/resample/rolling** + +- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) +- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`) +- Bug in :func:`pandas.core.groupby.GroupBy.rank` where results did not scale to 100% when specifying ``method='dense'`` and ``pct=True`` +- Bug in :func:`pandas.DataFrame.rolling` and :func:`pandas.Series.rolling` which incorrectly accepted a 0 window size rather than raising (:issue:`21286`) + +**Data-type specific** + +- Bug in :meth:`Series.str.replace()` where the method throws ``TypeError`` on Python 3.5.2 (:issue:`21078`) +- Bug in :class:`Timedelta` where passing a float with a unit would prematurely round the float precision (:issue:`14156`) +- Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`) + +**Sparse** + +- Bug in :attr:`SparseArray.shape` which previously only returned the shape :attr:`SparseArray.sp_values` (:issue:`21126`) + +**Indexing** + +- Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`) +- Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`) +- Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`) +- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, :issue:`21253`) +- Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`) + +**Plotting** + +- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue:`20968`) + +**I/O** + +- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`) +- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`) +- Bug in :meth:`read_stata` and :class:`StataReader` which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (:issue:`21244`) +- Bug in IO JSON :func:`read_json` reading empty JSON schema with ``orient='table'`` back to :class:`DataFrame` caused an error (:issue:`21287`) + +**Reshaping** + +- Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`) +- Bug in :func:`concat` warning message providing the wrong guidance for future behavior (:issue:`21101`) + +**Other** + +- Tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) +- Bug preventing pandas being used on Windows without C++ redistributable installed (:issue:`21106`) + +.. _whatsnew_0.23.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.0..v0.23.1 diff --git a/doc/source/whatsnew/v0.23.2.rst b/doc/source/whatsnew/v0.23.2.rst new file mode 100644 index 00000000..99650e82 --- /dev/null +++ b/doc/source/whatsnew/v0.23.2.rst @@ -0,0 +1,123 @@ +.. _whatsnew_0232: + +What's new in 0.23.2 (July 5, 2018) +----------------------------------- + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. note:: + + pandas 0.23.2 is first pandas release that's compatible with + Python 3.7 (:issue:`20552`) + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.2 + :local: + :backlinks: none + +.. _whatsnew_0232.enhancements: + +Logical reductions over entire DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2], "B": [True, False]}) + df.all(axis=None) + + +This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``. +With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis: + +.. code-block:: python + + >>> # NumPy 1.15, pandas 0.23.1 + >>> np.any(pd.DataFrame({"A": [False], "B": [False]})) + A False + B False + dtype: bool + +With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15. + +.. ipython:: python + + np.any(pd.DataFrame({"A": [False], "B": [False]})) + + +.. _whatsnew_0232.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) +- Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`). +- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) +- Fixed printing of DataFrames with hierarchical columns with long names (:issue:`21180`) +- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby` + with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`). +- Fixed regression in unary negative operations with object dtype (:issue:`21380`) +- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`) +- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) + + +Build changes +~~~~~~~~~~~~~ + +- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`) + +.. _whatsnew_0232.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Conversion** + +- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`) +- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`) + +**Indexing** + +- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`) +- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`) +- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`) +- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`). + +**I/O** + +- Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) +- Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) + +**Categorical** + +- Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`) + +**Timezones** + +- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) +- Bug in comparing :class:`DataFrame` with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) +- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`) +- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError`` (:issue:`8910`) +- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`) +- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) + +**Timedelta** + +- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`) + +.. _whatsnew_0.23.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.1..v0.23.2 diff --git a/doc/source/whatsnew/v0.23.3.rst b/doc/source/whatsnew/v0.23.3.rst new file mode 100644 index 00000000..bb8862a8 --- /dev/null +++ b/doc/source/whatsnew/v0.23.3.rst @@ -0,0 +1,16 @@ +.. _whatsnew_0233: + +What's new in 0.23.3 (July 7, 2018) +----------------------------------- + +{{ header }} + +This release fixes a build issue with the sdist for Python 3.7 (:issue:`21785`) +There are no other changes. + +.. _whatsnew_0.23.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.2..v0.23.3 diff --git a/doc/source/whatsnew/v0.23.4.rst b/doc/source/whatsnew/v0.23.4.rst new file mode 100644 index 00000000..eadac6f5 --- /dev/null +++ b/doc/source/whatsnew/v0.23.4.rst @@ -0,0 +1,47 @@ +.. _whatsnew_0234: + +What's new in 0.23.4 (August 3, 2018) +------------------------------------- + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.4 + :local: + :backlinks: none + +.. _whatsnew_0234.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`) + +.. _whatsnew_0234.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Groupby/resample/rolling** + +- Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) +- Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`) + +**Missing** + +- Bug in :func:`Series.clip` and :func:`DataFrame.clip` cannot accept list-like threshold containing ``NaN`` (:issue:`19992`) + +.. _whatsnew_0.23.4.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.3..v0.23.4 diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst new file mode 100644 index 00000000..f5175283 --- /dev/null +++ b/doc/source/whatsnew/v0.24.0.rst @@ -0,0 +1,1936 @@ +.. _whatsnew_0240: + +What's new in 0.24.0 (January 25, 2019) +--------------------------------------- + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more + details. + +{{ header }} + +This is a major release from 0.23.4 and includes a number of API changes, new +features, enhancements, and performance improvements along with a large number +of bug fixes. + +Highlights include: + +* :ref:`Optional Integer NA Support ` +* :ref:`New APIs for accessing the array backing a Series or Index ` +* :ref:`A new top-level method for creating arrays ` +* :ref:`Store Interval and Period data in a Series or DataFrame ` +* :ref:`Support for joining on two MultiIndexes ` + + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +These are the changes in pandas 0.24.0. See :ref:`release` for a full changelog +including other versions of pandas. + + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0240.enhancements.intna: + +Optional integer NA support +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled through the use of :ref:`extension types `. + +.. note:: + + IntegerArray is currently experimental. Its API or implementation may + change without warning. + +We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value +marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`, :issue:`21789`, :issue:`22346`) + +.. ipython:: python + + s = pd.Series([1, 2, np.nan], dtype='Int64') + s + + +Operations on these dtypes will propagate ``NaN`` as other pandas operations. + +.. ipython:: python + + # arithmetic + s + 1 + + # comparison + s == 1 + + # indexing + s.iloc[1:3] + + # operate with other dtypes + s + s.iloc[1:3].astype('Int8') + + # coerce when needed + s + 0.01 + +These dtypes can operate as part of a ``DataFrame``. + +.. ipython:: python + + df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')}) + df + df.dtypes + + +These dtypes can be merged, reshaped, and casted. + +.. ipython:: python + + pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes + df['A'].astype(float) + +Reduction and groupby operations such as ``sum`` work. + +.. ipython:: python + + df.sum() + df.groupby('B').A.sum() + +.. warning:: + + The Integer NA support currently uses the capitalized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date. + +See :ref:`integer_na` for more. + + +.. _whatsnew_0240.values_api: + +Accessing the values in a Series or Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a +``Series`` or ``Index``. (:issue:`19954`, :issue:`23623`) + +.. ipython:: python + + idx = pd.period_range('2000', periods=4) + idx.array + pd.Series(idx).array + +Historically, this would have been done with ``series.values``, but with +``.values`` it was unclear whether the returned value would be the actual array, +some transformation of it, or one of pandas custom arrays (like +``Categorical``). For example, with :class:`PeriodIndex`, ``.values`` generates +a new ndarray of period objects each time. + +.. ipython:: python + + idx.values + id(idx.values) + id(idx.values) + +If you need an actual NumPy array, use :meth:`Series.to_numpy` or :meth:`Index.to_numpy`. + +.. ipython:: python + + idx.to_numpy() + pd.Series(idx).to_numpy() + +For Series and Indexes backed by normal NumPy arrays, :attr:`Series.array` will return a +new :class:`arrays.PandasArray`, which is a thin (no-copy) wrapper around a +:class:`numpy.ndarray`. :class:`~arrays.PandasArray` isn't especially useful on its own, +but it does provide the same interface as any extension array defined in pandas or by +a third-party library. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.array + ser.to_numpy() + +We haven't removed or deprecated :attr:`Series.values` or :attr:`DataFrame.values`, but we +highly recommend and using ``.array`` or ``.to_numpy()`` instead. + +See :ref:`Dtypes ` and :ref:`Attributes and Underlying Data ` for more. + + +.. _whatsnew_0240.enhancements.array: + +``pandas.array``: a new top-level method for creating arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new top-level method :func:`array` has been added for creating 1-dimensional arrays (:issue:`22860`). +This can be used to create any :ref:`extension array `, including +extension arrays registered by :ref:`3rd party libraries `. +See the :ref:`dtypes docs ` for more on extension arrays. + +.. ipython:: python + + pd.array([1, 2, np.nan], dtype='Int64') + pd.array(['a', 'b', 'c'], dtype='category') + +Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.) +will return a new :class:`arrays.PandasArray`, which is just a thin (no-copy) +wrapper around a :class:`numpy.ndarray` that satisfies the pandas extension array interface. + +.. ipython:: python + + pd.array([1, 2, 3]) + +On their own, a :class:`~arrays.PandasArray` isn't a very useful object. +But if you need write low-level code that works generically for any +:class:`~pandas.api.extensions.ExtensionArray`, :class:`~arrays.PandasArray` +satisfies that need. + +Notice that by default, if no ``dtype`` is specified, the dtype of the returned +array is inferred from the data. In particular, note that the first example of +``[1, 2, np.nan]`` would have returned a floating-point array, since ``NaN`` +is a float. + +.. ipython:: python + + pd.array([1, 2, np.nan]) + + +.. _whatsnew_0240.enhancements.interval: + +Storing Interval and Period data in Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Interval` and :class:`Period` data may now be stored in a :class:`Series` or :class:`DataFrame`, in addition to an +:class:`IntervalIndex` and :class:`PeriodIndex` like previously (:issue:`19453`, :issue:`22862`). + +.. ipython:: python + + ser = pd.Series(pd.interval_range(0, 5)) + ser + ser.dtype + +For periods: + +.. ipython:: python + + pser = pd.Series(pd.period_range("2000", freq="D", periods=5)) + pser + pser.dtype + +Previously, these would be cast to a NumPy array with object dtype. In general, +this should result in better performance when storing an array of intervals or periods +in a :class:`Series` or column of a :class:`DataFrame`. + +Use :attr:`Series.array` to extract the underlying array of intervals or periods +from the ``Series``: + +.. ipython:: python + + ser.array + pser.array + +These return an instance of :class:`arrays.IntervalArray` or :class:`arrays.PeriodArray`, +the new extension arrays that back interval and period data. + +.. warning:: + + For backwards compatibility, :attr:`Series.values` continues to return + a NumPy array of objects for Interval and Period data. We recommend + using :attr:`Series.array` when you need the array of data stored in the + ``Series``, and :meth:`Series.to_numpy` when you know you need a NumPy array. + + See :ref:`Dtypes ` and :ref:`Attributes and Underlying Data ` + for more. + + +.. _whatsnew_0240.enhancements.join_with_two_multiindexes: + +Joining with two multi-indexes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.merge` and :func:`DataFrame.join` can now be used to join multi-indexed ``Dataframe`` instances on the overlapping index levels (:issue:`6360`) + +See the :ref:`Merge, join, and concatenate +` documentation section. + +.. ipython:: python + + index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), + ('K1', 'X2')], + names=['key', 'X']) + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, index=index_left) + + index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'), + ('K2', 'Y2'), ('K2', 'Y3')], + names=['key', 'Y']) + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, index=index_right) + + left.join(right) + +For earlier versions this can be done using the following. + +.. ipython:: python + + pd.merge(left.reset_index(), right.reset_index(), + on=['key'], how='inner').set_index(['key', 'X', 'Y']) + +.. _whatsnew_0240.enhancements.read_html: + +Function ``read_html`` enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_html` previously ignored ``colspan`` and ``rowspan`` attributes. +Now it understands them, treating them as sequences of cells with the same +value. (:issue:`17054`) + +.. ipython:: python + + result = pd.read_html(""" +
    + + + + + + + + + + +
    ABC
    12
    """) + +*Previous behavior*: + +.. code-block:: ipython + + In [13]: result + Out [13]: + [ A B C + 0 1 2 NaN] + +*New behavior*: + +.. ipython:: python + + result + + +New ``Styler.pipe()`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The :class:`~pandas.io.formats.style.Styler` class has gained a +:meth:`~pandas.io.formats.style.Styler.pipe` method. This provides a +convenient way to apply users' predefined styling functions, and can help reduce +"boilerplate" when using DataFrame styling functionality repeatedly within a notebook. (:issue:`23229`) + +.. ipython:: python + + df = pd.DataFrame({'N': [1250, 1500, 1750], 'X': [0.25, 0.35, 0.50]}) + + def format_and_align(styler): + return (styler.format({'N': '{:,}', 'X': '{:.1%}'}) + .set_properties(**{'text-align': 'right'})) + + df.style.pipe(format_and_align).set_caption('Summary of results.') + +Similar methods already exist for other classes in pandas, including :meth:`DataFrame.pipe`, +:meth:`GroupBy.pipe() `, and :meth:`Resampler.pipe() `. + +.. _whatsnew_0240.enhancements.rename_axis: + +Renaming names in a MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.rename_axis` now supports ``index`` and ``columns`` arguments +and :func:`Series.rename_axis` supports ``index`` argument (:issue:`19978`). + +This change allows a dictionary to be passed so that some of the names +of a ``MultiIndex`` can be changed. + +Example: + +.. ipython:: python + + mi = pd.MultiIndex.from_product([list('AB'), list('CD'), list('EF')], + names=['AB', 'CD', 'EF']) + df = pd.DataFrame(list(range(len(mi))), index=mi, columns=['N']) + df + df.rename_axis(index={'CD': 'New'}) + +See the :ref:`Advanced documentation on renaming` for more details. + +.. _whatsnew_0240.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) +- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) +- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups ` for more information (:issue:`15475`, :issue:`15506`). +- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing + the user to override the engine's default behavior to include or omit the + dataframe's indexes from the resulting Parquet file. (:issue:`20768`) +- :func:`read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`) +- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) +- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`) +- :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame. + See the :ref:`section on writing HTML ` in the IO docs for example usage. (:issue:`2679`) +- :func:`pandas.read_csv` now supports pandas extension types as an argument to ``dtype``, allowing the user to use pandas extension types when reading CSVs. (:issue:`23228`) +- The :meth:`~DataFrame.shift` method now accepts ``fill_value`` as an argument, allowing the user to specify a value which will be used instead of NA/NaT in the empty periods. (:issue:`15486`) +- :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) +- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether ``NaN``/``NaT`` values should be considered (:issue:`17534`) +- :func:`DataFrame.to_csv` and :func:`Series.to_csv` now support the ``compression`` keyword when a file handle is passed. (:issue:`21227`) +- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) +- :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) +- Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`) +- :func:`DataFrame.to_gbq` and :func:`read_gbq` signature and documentation updated to + reflect changes from the `pandas-gbq library version 0.8.0 + `__. + Adds a ``credentials`` argument, which enables the use of any kind of + `google-auth credentials + `__. (:issue:`21627`, + :issue:`22557`, :issue:`23662`) +- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) +- :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) +- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) +- :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) +- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). + The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). +- :func:`to_timedelta` now supports iso-formatted timedelta strings (:issue:`21877`) +- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` objects in the constructor (:issue:`2193`) +- :class:`DatetimeIndex` has gained the :attr:`DatetimeIndex.timetz` attribute. This returns the local time with timezone information. (:issue:`21358`) +- :meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, and :meth:`~Timestamp.floor` for :class:`DatetimeIndex` and :class:`Timestamp` + now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) + and a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) +- The result of :meth:`~DataFrame.resample` is now iterable similar to ``groupby()`` (:issue:`15314`). +- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`pandas.core.resample.Resampler.quantile` (:issue:`15023`). +- :meth:`DataFrame.resample` and :meth:`Series.resample` with a :class:`PeriodIndex` will now respect the ``base`` argument in the same fashion as with a :class:`DatetimeIndex`. (:issue:`23882`) +- :meth:`pandas.api.types.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, + all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`) +- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). +- :meth:`Categorical.from_codes` now can take a ``dtype`` parameter as an alternative to passing ``categories`` and ``ordered`` (:issue:`24398`). +- New attribute ``__git_version__`` will return git commit sha of current build (:issue:`21295`). +- Compatibility with Matplotlib 3.0 (:issue:`22790`). +- Added :meth:`Interval.overlaps`, :meth:`arrays.IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) +- :func:`read_fwf` now accepts keyword ``infer_nrows`` (:issue:`15138`). +- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`) +- :meth:`Index.difference`, :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference` now have an optional ``sort`` parameter to control whether the results should be sorted if possible (:issue:`17839`, :issue:`24471`) +- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) +- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. +- :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed string columns to Stata strl format (:issue:`23633`) +- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`) +- :meth:`DataFrame.to_records` now accepts ``index_dtypes`` and ``column_dtypes`` parameters to allow different data types in stored column and index records (:issue:`18146`) +- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`) +- :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method ` section in the documentation. (:issue:`8953`) +- :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`) +- :meth:`DataFrame.to_json`, :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, and other export methods now support tilde(~) in path argument. (:issue:`23473`) + +.. _whatsnew_0240.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pandas 0.24.0 includes a number of API breaking changes. + + +.. _whatsnew_0240.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`21242`, :issue:`18742`, :issue:`23774`, :issue:`24767`). +If installed, we now require: + ++-----------------+-----------------+----------+ +| Package | Minimum Version | Required | ++=================+=================+==========+ +| numpy | 1.12.0 | X | ++-----------------+-----------------+----------+ +| bottleneck | 1.2.0 | | ++-----------------+-----------------+----------+ +| fastparquet | 0.2.1 | | ++-----------------+-----------------+----------+ +| matplotlib | 2.0.0 | | ++-----------------+-----------------+----------+ +| numexpr | 2.6.1 | | ++-----------------+-----------------+----------+ +| pandas-gbq | 0.8.0 | | ++-----------------+-----------------+----------+ +| pyarrow | 0.9.0 | | ++-----------------+-----------------+----------+ +| pytables | 3.4.2 | | ++-----------------+-----------------+----------+ +| scipy | 0.18.1 | | ++-----------------+-----------------+----------+ +| xlrd | 1.0.0 | | ++-----------------+-----------------+----------+ +| pytest (dev) | 3.6 | | ++-----------------+-----------------+----------+ + +Additionally we no longer depend on ``feather-format`` for feather based storage +and replaced it with references to ``pyarrow`` (:issue:`21639` and :issue:`23053`). + +.. _whatsnew_0240.api_breaking.csv_line_terminator: + +``os.linesep`` is used for ``line_terminator`` of ``DataFrame.to_csv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` +for the default line terminator (:issue:`20353`). +This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator +even when ``'\n'`` was passed in ``line_terminator``. + +*Previous behavior* on Windows: + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: # When passing file PATH to to_csv, + ...: # line_terminator does not work, and csv is saved with '\r\n'. + ...: # Also, this converts all '\n's in the data to '\r\n'. + ...: data.to_csv("test.csv", index=False, line_terminator='\n') + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' + + In [4]: # When passing file OBJECT with newline option to + ...: # to_csv, line_terminator works. + ...: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False, line_terminator='\n') + + In [5]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + Out[5]: b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + + +*New behavior* on Windows: + +Passing ``line_terminator`` explicitly, set the ``line terminator`` to that character. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + + +On Windows, the value of ``os.linesep`` is ``'\r\n'``, so if ``line_terminator`` is not +set, ``'\r\n'`` is used for line terminator. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: data.to_csv("test.csv", index=False) + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + + +For file objects, specifying ``newline`` is not sufficient to set the line terminator. +You must pass in the ``line_terminator`` explicitly, even in this case. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False) + + In [3]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + +.. _whatsnew_0240.bug_fixes.nan_with_str_dtype: + +Proper handling of ``np.NaN`` in a string data-typed column with the Python engine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There was bug in :func:`read_excel` and :func:`read_csv` with the Python +engine, where missing values turned to ``'nan'`` with ``dtype=str`` and +``na_filter=True``. Now, these missing values are converted to the string +missing indicator, ``np.nan``. (:issue:`20377`) + +.. ipython:: python + :suppress: + + from io import StringIO + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: data = 'a,b,c\n1,,3\n4,5,6' + In [6]: df = pd.read_csv(StringIO(data), engine='python', dtype=str, na_filter=True) + In [7]: df.loc[0, 'b'] + Out[7]: + 'nan' + +*New behavior*: + +.. ipython:: python + + data = 'a,b,c\n1,,3\n4,5,6' + df = pd.read_csv(StringIO(data), engine='python', dtype=str, na_filter=True) + df.loc[0, 'b'] + +Notice how we now instead output ``np.nan`` itself instead of a stringified form of it. + +.. _whatsnew_0240.api.timezone_offset_parsing: + +Parsing datetime strings with timezone offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, parsing datetime strings with UTC offsets with :func:`to_datetime` +or :class:`DatetimeIndex` would automatically convert the datetime to UTC +without timezone localization. This is inconsistent from parsing the same +datetime string with :class:`Timestamp` which would preserve the UTC +offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC +offset in the ``tz`` attribute when all the datetime strings have the same +UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") + Out[2]: Timestamp('2015-11-18 10:00:00') + + In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") + Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') + + # Different UTC offsets would automatically convert the datetimes to UTC (without a UTC timezone) + In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) + +*New behavior*: + +.. ipython:: python + + pd.to_datetime("2015-11-18 15:30:00+05:30") + pd.Timestamp("2015-11-18 15:30:00+05:30") + +Parsing datetime strings with the same UTC offset will preserve the UTC offset in the ``tz`` + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30"] * 2) + +Parsing datetime strings with different UTC offsets will now create an Index of +``datetime.datetime`` objects with different UTC offsets + +.. ipython:: python + + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"]) + idx + idx[0] + idx[1] + +Passing ``utc=True`` will mimic the previous behavior but will correctly indicate +that the dates have been converted to UTC + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"], utc=True) + + +.. _whatsnew_0240.api_breaking.read_csv_mixed_tz: + +Parsing mixed-timezones with :func:`read_csv` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`). + +*Previous behavior* + +.. code-block:: python + + >>> import io + >>> content = """\ + ... a + ... 2000-01-01T00:00:00+05:00 + ... 2000-01-01T00:00:00+06:00""" + >>> df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + >>> df.a + 0 1999-12-31 19:00:00 + 1 1999-12-31 18:00:00 + Name: a, dtype: datetime64[ns] + +*New behavior* + +.. ipython:: python + + import io + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + df.a + +As can be seen, the ``dtype`` is object; each value in the column is a string. +To convert the strings to an array of datetimes, the ``date_parser`` argument + +.. ipython:: python + + df = pd.read_csv(io.StringIO(content), parse_dates=['a'], + date_parser=lambda col: pd.to_datetime(col, utc=True)) + df.a + +See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more. + +.. _whatsnew_0240.api_breaking.period_end_time: + +Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The time values in :class:`Period` and :class:`PeriodIndex` objects are now set +to '23:59:59.999999999' when calling :attr:`Series.dt.end_time`, :attr:`Period.end_time`, +:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp()` with ``how='end'``, +or :func:`PeriodIndex.to_timestamp()` with ``how='end'`` (:issue:`17157`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: p = pd.Period('2017-01-01', 'D') + In [3]: pi = pd.PeriodIndex([p]) + + In [4]: pd.Series(pi).dt.end_time[0] + Out[4]: Timestamp(2017-01-01 00:00:00) + + In [5]: p.end_time + Out[5]: Timestamp(2017-01-01 23:59:59.999999999) + +*New behavior*: + +Calling :attr:`Series.dt.end_time` will now result in a time of '23:59:59.999999999' as +is the case with :attr:`Period.end_time`, for example + +.. ipython:: python + + p = pd.Period('2017-01-01', 'D') + pi = pd.PeriodIndex([p]) + + pd.Series(pi).dt.end_time[0] + + p.end_time + +.. _whatsnew_0240.api_breaking.datetime_unique: + +Series.unique for timezone-aware data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The return type of :meth:`Series.unique` for datetime with timezone values has changed +from an :class:`numpy.ndarray` of :class:`Timestamp` objects to a :class:`arrays.DatetimeArray` (:issue:`24024`). + +.. ipython:: python + + ser = pd.Series([pd.Timestamp('2000', tz='UTC'), + pd.Timestamp('2000', tz='UTC')]) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser.unique() + Out[3]: array([Timestamp('2000-01-01 00:00:00+0000', tz='UTC')], dtype=object) + + +*New behavior*: + +.. ipython:: python + + ser.unique() + + +.. _whatsnew_0240.api_breaking.sparse_values: + +Sparse data structure refactor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``SparseArray``, the array backing ``SparseSeries`` and the columns in a ``SparseDataFrame``, +is now an extension array (:issue:`21978`, :issue:`19056`, :issue:`22835`). +To conform to this interface and for consistency with the rest of pandas, some API breaking +changes were made: + +- ``SparseArray`` is no longer a subclass of :class:`numpy.ndarray`. To convert a ``SparseArray`` to a NumPy array, use :func:`numpy.asarray`. +- ``SparseArray.dtype`` and ``SparseSeries.dtype`` are now instances of :class:`SparseDtype`, rather than ``np.dtype``. Access the underlying dtype with ``SparseDtype.subtype``. +- ``numpy.asarray(sparse_array)`` now returns a dense array with all the values, not just the non-fill-value values (:issue:`14167`) +- ``SparseArray.take`` now matches the API of :meth:`pandas.api.extensions.ExtensionArray.take` (:issue:`19506`): + + * The default value of ``allow_fill`` has changed from ``False`` to ``True``. + * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified). + * Passing a scalar for ``indices`` is no longer allowed. + +- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. +- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. +- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. +- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). +- The result of :meth:`Series.where` is now a ``Series`` with sparse values, like with other extension arrays (:issue:`24077`) + +Some new warnings are issued for operations that require or are likely to materialize a large dense array: + +- A :class:`errors.PerformanceWarning` is issued when using fillna with a ``method``, as a dense array is constructed to create the filled array. Filling with a ``value`` is the efficient way to fill a sparse array. +- A :class:`errors.PerformanceWarning` is now issued when concatenating sparse Series with differing fill values. The fill value from the first sparse array continues to be used. + +In addition to these API breaking changes, many :ref:`Performance Improvements and Bug Fixes have been made `. + +Finally, a ``Series.sparse`` accessor was added to provide sparse-specific methods like :meth:`Series.sparse.from_coo`. + +.. ipython:: python + + s = pd.Series([0, 0, 1, 1, 1], dtype='Sparse[int]') + s.sparse.density + +.. _whatsnew_0240.api_breaking.get_dummies: + +:meth:`get_dummies` always returns a DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when ``sparse=True`` was passed to :func:`get_dummies`, the return value could be either +a :class:`DataFrame` or a :class:`SparseDataFrame`, depending on whether all or a just a subset +of the columns were dummy-encoded. Now, a :class:`DataFrame` is always returned (:issue:`24284`). + +*Previous behavior* + +The first :func:`get_dummies` returns a :class:`DataFrame` because the column ``A`` +is not dummy encoded. When just ``["B", "C"]`` are passed to ``get_dummies``, +then all the columns are dummy-encoded, and a :class:`SparseDataFrame` was returned. + +.. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']}) + + In [3]: type(pd.get_dummies(df, sparse=True)) + Out[3]: pandas.core.frame.DataFrame + + In [4]: type(pd.get_dummies(df[['B', 'C']], sparse=True)) + Out[4]: pandas.core.sparse.frame.SparseDataFrame + +.. ipython:: python + :suppress: + + df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']}) + +*New behavior* + +Now, the return type is consistently a :class:`DataFrame`. + +.. ipython:: python + + type(pd.get_dummies(df, sparse=True)) + type(pd.get_dummies(df[['B', 'C']], sparse=True)) + +.. note:: + + There's no difference in memory usage between a :class:`SparseDataFrame` + and a :class:`DataFrame` with sparse values. The memory usage will + be the same as in the previous version of pandas. + +.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: + +Raise ValueError in ``DataFrame.to_dict(orient='index')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with +``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) + +.. ipython:: python + :okexcept: + + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + df + + df.to_dict(orient='index') + +.. _whatsnew_0240.api.datetimelike.normalize: + +Tick DateOffset normalize restrictions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Creating a ``Tick`` object (:class:`Day`, :class:`Hour`, :class:`Minute`, +:class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano`) with +``normalize=True`` is no longer supported. This prevents unexpected behavior +where addition could fail to be monotone or associative. (:issue:`21427`) + +*Previous behavior*: + +.. code-block:: ipython + + + In [2]: ts = pd.Timestamp('2018-06-11 18:01:14') + + In [3]: ts + Out[3]: Timestamp('2018-06-11 18:01:14') + + In [4]: tic = pd.offsets.Hour(n=2, normalize=True) + ...: + + In [5]: tic + Out[5]: <2 * Hours> + + In [6]: ts + tic + Out[6]: Timestamp('2018-06-11 00:00:00') + + In [7]: ts + tic + tic + tic == ts + (tic + tic + tic) + Out[7]: False + +*New behavior*: + +.. ipython:: python + + ts = pd.Timestamp('2018-06-11 18:01:14') + tic = pd.offsets.Hour(n=2) + ts + tic + tic + tic == ts + (tic + tic + tic) + + +.. _whatsnew_0240.api.datetimelike: + + +.. _whatsnew_0240.api.period_subtraction: + +Period subtraction +^^^^^^^^^^^^^^^^^^ + +Subtraction of a ``Period`` from another ``Period`` will give a ``DateOffset``. +instead of an integer (:issue:`21314`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: june = pd.Period('June 2018') + + In [3]: april = pd.Period('April 2018') + + In [4]: june - april + Out [4]: 2 + +*New behavior*: + +.. ipython:: python + + june = pd.Period('June 2018') + april = pd.Period('April 2018') + june - april + +Similarly, subtraction of a ``Period`` from a ``PeriodIndex`` will now return +an ``Index`` of ``DateOffset`` objects instead of an ``Int64Index`` + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: pi = pd.period_range('June 2018', freq='M', periods=3) + + In [3]: pi - pi[0] + Out[3]: Int64Index([0, 1, 2], dtype='int64') + +*New behavior*: + +.. ipython:: python + + pi = pd.period_range('June 2018', freq='M', periods=3) + pi - pi[0] + + +.. _whatsnew_0240.api.timedelta64_subtract_nan: + +Addition/subtraction of ``NaN`` from :class:`DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Adding or subtracting ``NaN`` from a :class:`DataFrame` column with +``timedelta64[ns]`` dtype will now raise a ``TypeError`` instead of returning +all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and +``Series`` behavior (:issue:`22163`) + +.. ipython:: python + + df = pd.DataFrame([pd.Timedelta(days=1)]) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: df = pd.DataFrame([pd.Timedelta(days=1)]) + + In [5]: df - np.nan + Out[5]: + 0 + 0 NaT + +*New behavior*: + +.. code-block:: ipython + + In [2]: df - np.nan + ... + TypeError: unsupported operand type(s) for -: 'TimedeltaIndex' and 'float' + +.. _whatsnew_0240.api.dataframe_cmp_broadcasting: + +DataFrame comparison operations broadcasting changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the broadcasting behavior of :class:`DataFrame` comparison +operations (``==``, ``!=``, ...) was inconsistent with the behavior of +arithmetic operations (``+``, ``-``, ...). The behavior of the comparison +operations has been changed to match the arithmetic operations in these cases. +(:issue:`22880`) + +The affected cases are: + +- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`). +- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`. +- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`). + +.. ipython:: python + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df == arr[[0], :] + ...: # comparison previously broadcast where arithmetic would raise + Out[5]: + 0 1 + 0 True True + 1 False False + 2 False False + In [6]: df + arr[[0], :] + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + + In [7]: df == (1, 2) + ...: # length matches number of columns; + ...: # comparison previously raised where arithmetic would broadcast + ... + ValueError: Invalid broadcasting comparison [(1, 2)] with block values + In [8]: df + (1, 2) + Out[8]: + 0 1 + 0 1 3 + 1 3 5 + 2 5 7 + + In [9]: df == (1, 2, 3) + ...: # length matches number of rows + ...: # comparison previously broadcast where arithmetic would raise + Out[9]: + 0 1 + 0 False True + 1 True False + 2 False False + In [10]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +*New behavior*: + +.. ipython:: python + + # Comparison operations and arithmetic operations both broadcast. + df == arr[[0], :] + df + arr[[0], :] + +.. ipython:: python + + # Comparison operations and arithmetic operations both broadcast. + df == (1, 2) + df + (1, 2) + +.. code-block:: ipython + + # Comparison operations and arithmetic operations both raise ValueError. + In [6]: df == (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + + In [7]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: + +DataFrame arithmetic operations broadcasting changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`DataFrame` arithmetic operations when operating with 2-dimensional +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray`` +broadcast. (:issue:`23000`) + +.. ipython:: python + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df + arr[[0], :] # 1 row, 2 columns + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + In [6]: df + arr[:, [1]] # 1 column, 3 rows + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1) + +*New behavior*: + +.. ipython:: python + + df + arr[[0], :] # 1 row, 2 columns + df + arr[:, [1]] # 1 column, 3 rows + +.. _whatsnew_0240.api.incompatibilities: + +Series and Index data-dtype incompatibilities +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series`` and ``Index`` constructors now raise when the +data is incompatible with a passed ``dtype=`` (:issue:`15832`) + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: pd.Series([-1], dtype="uint64") + Out [4]: + 0 18446744073709551615 + dtype: uint64 + +*New behavior*: + +.. code-block:: ipython + + In [4]: pd.Series([-1], dtype="uint64") + Out [4]: + ... + OverflowError: Trying to coerce negative values to unsigned integers + +.. _whatsnew_0240.api.concat_categorical: + +Concatenation changes +^^^^^^^^^^^^^^^^^^^^^ + +Calling :func:`pandas.concat` on a ``Categorical`` of ints with NA values now +causes them to be processed as objects when concatenating with anything +other than another ``Categorical`` of ints (:issue:`19214`) + +.. ipython:: python + + s = pd.Series([0, 1, np.nan]) + c = pd.Series([0, 1, np.nan], dtype="category") + +*Previous behavior* + +.. code-block:: ipython + + In [3]: pd.concat([s, c]) + Out[3]: + 0 0.0 + 1 1.0 + 2 NaN + 0 0.0 + 1 1.0 + 2 NaN + dtype: float64 + +*New behavior* + +.. ipython:: python + + pd.concat([s, c]) + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`) +- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) +- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) +- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) +- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) +- :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`) + +.. _whatsnew_0240.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) +- :meth:`Series.str.cat` will now raise if ``others`` is a ``set`` (:issue:`23009`) +- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) +- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`) +- :func:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`) +- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) +- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`) +- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) +- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`) +- Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in + :meth:`~MultiIndex.get_level_values`) now raises a ``ValueError`` instead of a ``KeyError`` (:issue:`21678`). +- Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) +- Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`) +- :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`) +- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`) +- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`) +- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`) +- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types, + and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`) +- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) +- :class:`DateOffset` attribute ``_cacheable`` and method ``_should_cache`` have been removed (:issue:`23118`) +- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`). +- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`). +- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). +- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`). +- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) +- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). +- :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`) +- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`24372`). +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` no longer ignore the dtype precision. Passing a non-nanosecond resolution dtype will raise a ``ValueError`` (:issue:`24753`) + + +.. _whatsnew_0240.api.extension: + +Extension type changes +~~~~~~~~~~~~~~~~~~~~~~ + +**Equality and hashability** + +pandas now requires that extension dtypes be hashable (i.e. the respective +``ExtensionDtype`` objects; hashability is not a requirement for the values +of the corresponding ``ExtensionArray``). The base class implements +a default ``__eq__`` and ``__hash__``. If you have a parametrized dtype, you should +update the ``ExtensionDtype._metadata`` tuple to match the signature of your +``__init__`` method. See :class:`pandas.api.extensions.ExtensionDtype` for more (:issue:`22476`). + +**New and changed methods** + +- :meth:`~pandas.api.types.ExtensionArray.dropna` has been added (:issue:`21185`) +- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24349`) +- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) +- :meth:`pandas.api.extensions.ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface (:issue:`22387`). +- :meth:`~pandas.api.types.ExtensionArray.searchsorted` has been added (:issue:`24350`) +- Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) +- :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). + +**Dtype changes** + +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore + the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) +- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`). +- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) +- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) + +.. _whatsnew_0240.enhancements.extension_array_operators: + +**Operator support** + +A ``Series`` based on an ``ExtensionArray`` now supports arithmetic and comparison +operators (:issue:`19577`). There are two approaches for providing operator support for an ``ExtensionArray``: + +1. Define each of the operators on your ``ExtensionArray`` subclass. +2. Use an operator implementation from pandas that depends on operators that are already defined + on the underlying elements (scalars) of the ``ExtensionArray``. + +See the :ref:`ExtensionArray Operator Support +` documentation section for details on both +ways of adding operator support. + +**Other changes** + +- A default repr for :class:`pandas.api.extensions.ExtensionArray` is now provided (:issue:`23601`). +- :meth:`ExtensionArray._formatting_values` is deprecated. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) +- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`) + +**Bug fixes** + +- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) +- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`) +- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) +- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) +- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185`). +- Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) +- Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`) +- Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`) +- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`). +- :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). +- Bug in :func:`pandas.merge` when merging on an extension array-backed column (:issue:`23020`). + + +.. _whatsnew_0240.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`. + The functionality is unchanged. The new name better reflects the natures of + these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex` (:issue:`13443`). + As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``: + + - You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``. + - ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`. + - For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter. +- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`) +- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) +- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) +- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) +- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`) +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) +- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary. This deprecation has been removed in 0.25.0. (:issue:`21948`) +- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain + many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) +- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) +- :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`) +- The ``fastpath`` keyword of the different Index constructors is deprecated (:issue:`23110`). +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) +- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) +- The methods :meth:`DataFrame.update` and :meth:`Panel.update` have deprecated the ``raise_conflict=False|True`` keyword in favor of ``errors='ignore'|'raise'`` (:issue:`23585`) +- The methods :meth:`Series.str.partition` and :meth:`Series.str.rpartition` have deprecated the ``pat`` keyword in favor of ``sep`` (:issue:`22676`) +- Deprecated the ``nthreads`` keyword of :func:`pandas.read_feather` in favor of ``use_threads`` to reflect the changes in ``pyarrow>=0.11.0``. (:issue:`23053`) +- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) +- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) +- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of + :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`). +- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) +- :func:`pandas.api.types.is_period` is deprecated in favor of ``pandas.api.types.is_period_dtype`` (:issue:`23917`) +- :func:`pandas.api.types.is_datetimetz` is deprecated in favor of ``pandas.api.types.is_datetime64tz`` (:issue:`23917`) +- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments ``start``, ``end``, and ``periods`` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) +- Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). +- The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`) +- In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). +- :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`) +- :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) +- Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) +- ``Series.cat.categorical``, ``Series.cat.name`` and ``Series.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). +- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). + +.. _whatsnew_0240.deprecations.datetimelike_int_ops: + +Integer addition/subtraction with datetimes and timedeltas is deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the past, users could—in some cases—add or subtract integers or integer-dtype +arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`. + +This usage is now deprecated. Instead add or subtract integer multiples of +the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`). + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour()) + In [6]: ts + 2 + Out[6]: Timestamp('1994-05-06 14:15:16', freq='H') + + In [7]: tdi = pd.timedelta_range('1D', periods=2) + In [8]: tdi - np.array([2, 1]) + Out[8]: TimedeltaIndex(['-1 days', '1 days'], dtype='timedelta64[ns]', freq=None) + + In [9]: dti = pd.date_range('2001-01-01', periods=2, freq='7D') + In [10]: dti + pd.Index([1, 2]) + Out[10]: DatetimeIndex(['2001-01-08', '2001-01-22'], dtype='datetime64[ns]', freq=None) + +*New behavior*: + +.. ipython:: python + :okwarning: + + ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour()) + ts + 2 * ts.freq + + tdi = pd.timedelta_range('1D', periods=2) + tdi - np.array([2 * tdi.freq, 1 * tdi.freq]) + + dti = pd.date_range('2001-01-01', periods=2, freq='7D') + dti + pd.Index([1 * dti.freq, 2 * dti.freq]) + + +.. _whatsnew_0240.deprecations.integer_tz: + +Passing integer data and a timezone to DatetimeIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of :class:`DatetimeIndex` when passed integer data and +a timezone is changing in a future version of pandas. Previously, these +were interpreted as wall times in the desired timezone. In the future, +these will be interpreted as wall times in UTC, which are then converted +to the desired timezone (:issue:`24559`). + +The default behavior remains the same, but issues a warning: + +.. code-block:: ipython + + In [3]: pd.DatetimeIndex([946684800000000000], tz="US/Central") + /bin/ipython:1: FutureWarning: + Passing integer-dtype data and a timezone to DatetimeIndex. Integer values + will be interpreted differently in a future version of pandas. Previously, + these were viewed as datetime64[ns] values representing the wall time + *in the specified timezone*. In the future, these will be viewed as + datetime64[ns] values representing the wall time *in UTC*. This is similar + to a nanosecond-precision UNIX epoch. To accept the future behavior, use + + pd.to_datetime(integer_data, utc=True).tz_convert(tz) + + To keep the previous behavior, use + + pd.to_datetime(integer_data).tz_localize(tz) + + #!/bin/python3 + Out[3]: DatetimeIndex(['2000-01-01 00:00:00-06:00'], dtype='datetime64[ns, US/Central]', freq=None) + +As the warning message explains, opt in to the future behavior by specifying that +the integer values are UTC, and then converting to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000], utc=True).tz_convert('US/Central') + +The old behavior can be retained with by localizing directly to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000]).tz_localize('US/Central') + +.. _whatsnew_0240.deprecations.tz_aware_array: + +Converting timezone-aware Series and Index to NumPy arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The conversion from a :class:`Series` or :class:`Index` with timezone-aware +datetime data will change to preserve timezones by default (:issue:`23569`). + +NumPy doesn't have a dedicated dtype for timezone-aware datetimes. +In the past, converting a :class:`Series` or :class:`DatetimeIndex` with +timezone-aware datatimes would convert to a NumPy array by + +1. converting the tz-aware data to UTC +2. dropping the timezone-info +3. returning a :class:`numpy.ndarray` with ``datetime64[ns]`` dtype + +Future versions of pandas will preserve the timezone information by returning an +object-dtype NumPy array where each value is a :class:`Timestamp` with the correct +timezone attached + +.. ipython:: python + + ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + ser + +The default behavior remains the same, but issues a warning + +.. code-block:: python + + In [8]: np.asarray(ser) + /bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive + ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray + with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'. + + To accept the future behavior, pass 'dtype=object'. + To keep the old behavior, pass 'dtype="datetime64[ns]"'. + #!/bin/python3 + Out[8]: + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], + dtype='datetime64[ns]') + +The previous or future behavior can be obtained, without any warnings, by specifying +the ``dtype`` + +*Previous behavior* + +.. ipython:: python + + np.asarray(ser, dtype='datetime64[ns]') + +*Future behavior* + +.. ipython:: python + + # New behavior + np.asarray(ser, dtype=object) + + +Or by using :meth:`Series.to_numpy` + +.. ipython:: python + + ser.to_numpy() + ser.to_numpy(dtype="datetime64[ns]") + +All the above applies to a :class:`DatetimeIndex` with tz-aware values as well. + +.. _whatsnew_0240.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) +- :meth:`Series.repeat` has renamed the ``reps`` argument to ``repeats`` (:issue:`14645`) +- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) +- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) +- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) +- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) +- The ``Series`` constructor and ``.astype`` method will now raise a ``ValueError`` if timestamp dtypes are passed in without a unit (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15987`) +- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) +- The modules ``pandas.types``, ``pandas.computation``, and ``pandas.util.decorators`` have been removed (:issue:`16157`, :issue:`16250`) +- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`) +- ``pandas.pnow``, ``pandas.match``, ``pandas.groupby``, ``pd.get_store``, ``pd.Expr``, and ``pd.Term`` have been removed (:issue:`15538`, :issue:`15940`) +- :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`) +- ``pandas.parser``, ``pandas.lib``, and ``pandas.tslib`` have been removed (:issue:`15537`) +- :meth:`Index.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`) +- ``DataFrame.consolidate`` and ``Series.consolidate`` have been removed (:issue:`15501`) +- Removal of the previously deprecated module ``pandas.json`` (:issue:`19944`) +- The module ``pandas.tools`` has been removed (:issue:`15358`, :issue:`16005`) +- :meth:`SparseArray.get_values` and :meth:`SparseArray.to_dense` have dropped the ``fill`` parameter (:issue:`14686`) +- ``DataFrame.sortlevel`` and ``Series.sortlevel`` have been removed (:issue:`15099`) +- :meth:`SparseSeries.to_dense` has dropped the ``sparse_only`` parameter (:issue:`14686`) +- :meth:`DataFrame.astype` and :meth:`Series.astype` have renamed the ``raise_on_error`` argument to ``errors`` (:issue:`14967`) +- ``is_sequence``, ``is_any_int_dtype``, and ``is_floating_dtype`` have been removed from ``pandas.api.types`` (:issue:`16163`, :issue:`16189`) + +.. _whatsnew_0240.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Slicing Series and DataFrames with an monotonically increasing :class:`CategoricalIndex` + is now very fast and has speed comparable to slicing with an ``Int64Index``. + The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`) + Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``) + shows similar speed improvements as above (:issue:`21659`) +- Improved performance of :meth:`CategoricalIndex.equals` when comparing to another :class:`CategoricalIndex` (:issue:`24023`) +- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`) +- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`) +- Improved performance of :meth:`Series.at` and :meth:`Index.get_value` for Extension Arrays values (e.g. :class:`Categorical`) (:issue:`24204`) +- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex` + (i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains` + is likewise much faster (:issue:`21369`, :issue:`21508`) +- Improved performance of :meth:`HDFStore.groups` (and dependent functions like + :meth:`HDFStore.keys`. (i.e. ``x in store`` checks are much faster) + (:issue:`21372`) +- Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`) +- Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) +- Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) +- Improved performance of :func:`concat` for ``Series`` objects (:issue:`23404`) +- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`) +- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`) +- Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`) +- Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`) +- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`) +- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators + without internally allocating lists of all elements (:issue:`20783`) +- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084`, :issue:`24118`) +- Improved performance of tz-aware :class:`DatetimeArray` binary operations (:issue:`24491`) + +.. _whatsnew_0240.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. +- Bug in :meth:`Categorical.sort_values` where ``NaN`` values were always positioned in front regardless of ``na_position`` value. (:issue:`22556`). +- Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) +- Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). +- Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). +- In :meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) +- Bug when resampling :meth:`DataFrame.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) +- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`) +- Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`) +- Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`) +- Bug in :class:`Categorical` comparison methods incorrectly raising ``ValueError`` when operating against a :class:`DataFrame` (:issue:`24630`) +- Bug in :meth:`Categorical.set_categories` where setting fewer new categories with ``rename=True`` caused a segmentation fault (:issue:`24675`) + +Datetimelike +^^^^^^^^^^^^ + +- Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) +- Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`, :issue:`21365`) +- Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) +- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`, :issue:`22163`) +- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`, :issue:`22163`) +- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`, :issue:`22163`) +- Bug in :attr:`DataFrame.values` returning a :class:`DatetimeIndex` for a single-column ``DataFrame`` with tz-aware datetime values. Now a 2-D :class:`numpy.ndarray` of :class:`Timestamp` objects is returned (:issue:`24024`) +- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`) +- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`) +- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) +- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`) +- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`) +- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) +- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) +- Bug in the :class:`Series` repr with period-dtype data missing a space before the data (:issue:`23601`) +- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) +- Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`) +- Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`) +- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) +- Bug in :func:`period_range` ignoring the frequency of ``start`` and ``end`` when those are provided as :class:`Period` objects (:issue:`20535`). +- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) +- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) +- Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones creating an object-dtype column, rather than datetime with timezone (:issue:`23932`) +- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) +- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and ``dtype=object`` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) +- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`) +- Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) +- Bug in :class:`DatetimeIndex` where constructing a :class:`DatetimeIndex` from a :class:`Categorical` or :class:`CategoricalIndex` would incorrectly drop timezone information (:issue:`18664`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would incorrectly lose the index's ``freq`` attribute (:issue:`21282`) +- Clarified error message produced when passing an incorrect ``freq`` argument to :class:`DatetimeIndex` with ``NaT`` as the first entry in the passed data (:issue:`11587`) +- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`) +- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`) +- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`) +- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). +- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`) +- Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`) +- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`) + +Timedelta +^^^^^^^^^ +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`, :issue:`22163`) +- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`) +- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) +- Bug in :class:`Series` with numeric dtype when adding or subtracting an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) +- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) +- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) +- Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) +- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) +- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) +- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-``NaT`` :class:`DatetimeIndex` instead of an all-``NaT`` :class:`TimedeltaIndex` (:issue:`23215`) +- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) +- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`) +- Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`) +- Bug in :class:`Timedelta` comparisons when comparing with a ``Tick`` object incorrectly raising ``TypeError`` (:issue:`24710`) + +Timezones +^^^^^^^^^ + +- Bug in :meth:`Index.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`) +- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError`` (:issue:`8910`) +- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`) +- Bug in :meth:`Series.replace` with ``datetime64[ns, tz]`` data when replacing ``NaT`` (:issue:`11792`) +- Bug in :class:`Timestamp` when passing different string date formats with a timezone offset would produce different timezone offsets (:issue:`12064`) +- Bug when comparing a tz-naive :class:`Timestamp` to a tz-aware :class:`DatetimeIndex` which would coerce the :class:`DatetimeIndex` to tz-naive (:issue:`12601`) +- Bug in :meth:`Series.truncate` with a tz-aware :class:`DatetimeIndex` which would cause a core dump (:issue:`9243`) +- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp` to tz-aware (:issue:`13051`) +- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) +- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show ``first`` and ``last`` result (:issue:`21328`) +- Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) +- Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) +- Bug in :func:`DataFrame.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) +- Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp` constructed with the ``replace`` method across DST (:issue:`18785`) +- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) +- Bug in :meth:`Index.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) +- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) +- Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`) +- Bug in :class:`DatetimeIndex` constructor where ``NaT`` and ``dateutil.tz.tzlocal`` would raise an ``OutOfBoundsDatetime`` error (:issue:`23807`) +- Bug in :meth:`DatetimeIndex.tz_localize` and :meth:`Timestamp.tz_localize` with ``dateutil.tz.tzlocal`` near a DST transition that would return an incorrectly localized datetime (:issue:`23807`) +- Bug in :class:`Timestamp` constructor where a ``dateutil.tz.tzutc`` timezone passed with a ``datetime.datetime`` argument would be converted to a ``pytz.UTC`` timezone (:issue:`23807`) +- Bug in :func:`to_datetime` where ``utc=True`` was not respected when specifying a ``unit`` and ``errors='ignore'`` (:issue:`23758`) +- Bug in :func:`to_datetime` where ``utc=True`` was not respected when passing a :class:`Timestamp` (:issue:`24415`) +- Bug in :meth:`DataFrame.any` returns wrong value when ``axis=1`` and the data is of datetimelike type (:issue:`23070`) +- Bug in :meth:`DatetimeIndex.to_period` where a timezone aware index was converted to UTC first before creating :class:`PeriodIndex` (:issue:`22905`) +- Bug in :meth:`DataFrame.tz_localize`, :meth:`DataFrame.tz_convert`, :meth:`Series.tz_localize`, and :meth:`Series.tz_convert` where ``copy=False`` would mutate the original argument inplace (:issue:`6326`) +- Bug in :meth:`DataFrame.max` and :meth:`DataFrame.min` with ``axis=1`` where a :class:`Series` with ``NaN`` would be returned when all columns contained the same timezone (:issue:`10390`) + +Offsets +^^^^^^^ + +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) +- Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) +- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) +- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) + +Numeric +^^^^^^^ + +- Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`) +- Bug in :func:`factorize` fails with read-only array (:issue:`12813`) +- Fixed bug in :func:`unique` handled signed zeros inconsistently: for some inputs 0.0 and -0.0 were treated as equal and for some inputs as different. Now they are treated as equal for all inputs (:issue:`21866`) +- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where, + when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), + a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). +- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) +- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`) +- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) +- Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`) +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) +- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). +- :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2\ :sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) +- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`). +- Added ``log10``, ``floor`` and ``ceil`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`, :issue:`24353`) +- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) +- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) +- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`). + +Conversion +^^^^^^^^^^ + +- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`) +- Bug in :meth:`DataFrame.clip` in which column types are not preserved and casted to float (:issue:`24162`) +- Bug in :meth:`DataFrame.clip` when order of columns of dataframes doesn't match, result observed is wrong in numeric values (:issue:`20911`) +- Bug in :meth:`DataFrame.astype` where converting to an extension dtype when duplicate column names are present causes a ``RecursionError`` (:issue:`24704`) + +Strings +^^^^^^^ + +- Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`). +- Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). +- Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) +- Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) + +Interval +^^^^^^^^ + +- Bug in the :class:`IntervalIndex` constructor where the ``closed`` parameter did not always override the inferred ``closed`` (:issue:`19370`) +- Bug in the ``IntervalIndex`` repr where a trailing comma was missing after the list of intervals (:issue:`20611`) +- Bug in :class:`Interval` where scalar arithmetic operations did not retain the ``closed`` value (:issue:`22313`) +- Bug in :class:`IntervalIndex` where indexing with datetime-like values raised a ``KeyError`` (:issue:`20636`) +- Bug in ``IntervalTree`` where data containing ``NaN`` triggered a warning and resulted in incorrect indexing queries with :class:`IntervalIndex` (:issue:`23352`) + +Indexing +^^^^^^^^ + +- Bug in :meth:`DataFrame.ne` fails if columns contain column name "dtype" (:issue:`22383`) +- The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`) +- :class:`PeriodIndex` now emits a ``KeyError`` when a malformed string is looked up, which is consistent with the behavior of :class:`DatetimeIndex` (:issue:`22803`) +- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`) +- Bug in :meth:`Index.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`Series.reindex` when reindexing an empty series with a ``datetime64[ns, tz]`` dtype (:issue:`20869`) +- Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`) +- ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`) +- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`) +- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) +- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) +- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`) +- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) +- Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`) +- :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`) +- Bug in ``scalar in Index`` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) +- Bug in :func:`MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) +- Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) +- Bug in :class:`Index` slicing with boolean :class:`Index` may raise ``TypeError`` (:issue:`22533`) +- Bug in ``PeriodArray.__setitem__`` when accepting slice and list-like value (:issue:`23978`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would lose their ``freq`` attribute (:issue:`21282`) +- Bug in ``iat`` where using it to assign an incompatible value would create a new column (:issue:`23236`) + +Missing +^^^^^^^ + +- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`) +- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) +- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object_``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) +- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object_``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`) +- :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`) +- Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`io.formats.style.Styler.applymap` where ``subset=`` with :class:`MultiIndex` slice would reduce to :class:`Series` (:issue:`19861`) +- Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`) +- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a ``Series`` or ``DataFrame`` with a :class:`MultiIndex` index) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) +- :class:`MultiIndex` has gained the :meth:`MultiIndex.from_frame`, it allows constructing a :class:`MultiIndex` object from a :class:`DataFrame` (:issue:`22420`) +- Fix ``TypeError`` in Python 3 when creating :class:`MultiIndex` in which some levels have mixed types, e.g. when some labels are tuples (:issue:`15457`) + +IO +^^ + +- Bug in :func:`read_csv` in which a column specified with ``CategoricalDtype`` of boolean categories was not being correctly coerced from string values to booleans (:issue:`20498`) +- Bug in :func:`read_csv` in which unicode column names were not being properly recognized with Python 2.x (:issue:`13253`) +- Bug in :meth:`DataFrame.to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`DataFrame.to_sql` where a naive :class:`DatetimeIndex` would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) +- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`) +- :func:`read_html()` no longer ignores all-whitespace ``
    `` tags were missing for :attr:`DataFrame.index` values (:issue:`28204`). +- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`) +- Fix :meth:`~DataFrame.to_csv` with ``ExtensionArray`` with list-like values (:issue:`28840`). + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`). +- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`) + +Other +^^^^^ + +- Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) +- Fix to ensure that tab-completion in an IPython console does not raise + warnings for deprecated attributes (:issue:`27900`). + +.. _whatsnew_0.252.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.1..v0.25.2 diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst new file mode 100644 index 00000000..e028c08e --- /dev/null +++ b/doc/source/whatsnew/v0.25.3.rst @@ -0,0 +1,22 @@ +.. _whatsnew_0253: + +What's new in 0.25.3 (October 31, 2019) +--------------------------------------- + +These are the changes in pandas 0.25.3. See :ref:`release` for a full changelog +including other versions of pandas. + +.. _whatsnew_0253.bug_fixes: + +Bug fixes +~~~~~~~~~ + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.2..v0.25.3 diff --git a/doc/source/whatsnew/v0.4.x.rst b/doc/source/whatsnew/v0.4.x.rst new file mode 100644 index 00000000..0ed7bb39 --- /dev/null +++ b/doc/source/whatsnew/v0.4.x.rst @@ -0,0 +1,69 @@ +.. _whatsnew_04x: + +Versions 0.4.1 through 0.4.3 (September 25 - October 9, 2011) +------------------------------------------------------------- + +{{ header }} + +New features +~~~~~~~~~~~~ + +- Added Python 3 support using 2to3 (:issue:`200`) +- :ref:`Added ` ``name`` attribute to ``Series``, now + prints as part of ``Series.__repr__`` +- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to + Series (:issue:`209`, :issue:`203`) +- :ref:`Added ` ``Series.align`` method for aligning two series + with choice of join method (ENH56_) +- :ref:`Added ` method ``get_level_values`` to + ``MultiIndex`` (:issue:`188`) +- Set values in mixed-type ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`) +- Added new ``DataFrame`` :ref:`methods ` + ``get_dtype_counts`` and property ``dtypes`` (ENHdc_) +- Added :ref:`ignore_index ` option to + ``DataFrame.append`` to stack DataFrames (ENH1b_) +- ``read_csv`` tries to :ref:`sniff ` delimiters using + ``csv.Sniffer`` (:issue:`146`) +- ``read_csv`` can :ref:`read ` multiple columns into a + ``MultiIndex``; DataFrame's ``to_csv`` method writes out a corresponding + ``MultiIndex`` (:issue:`151`) +- ``DataFrame.rename`` has a new ``copy`` parameter to :ref:`rename + ` a DataFrame in place (ENHed_) +- :ref:`Enable ` unstacking by name (:issue:`142`) +- :ref:`Enable ` ``sortlevel`` to work by level (:issue:`141`) + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Altered binary operations on differently-indexed SparseSeries objects + to use the integer-based (dense) alignment logic which is faster with a + larger number of blocks (:issue:`205`) +- Wrote faster Cython data alignment / merging routines resulting in + substantial speed increases +- Improved performance of ``isnull`` and ``notnull``, a regression from v0.3.0 + (:issue:`187`) +- Refactored code related to ``DataFrame.join`` so that intermediate aligned + copies of the data in each ``DataFrame`` argument do not need to be created. + Substantial performance increases result (:issue:`176`) +- Substantially improved performance of generic ``Index.intersection`` and + ``Index.union`` +- Implemented ``BlockManager.take`` resulting in significantly faster ``take`` + performance on mixed-type ``DataFrame`` objects (:issue:`104`) +- Improved performance of ``Series.sort_index`` +- Significant groupby performance enhancement: removed unnecessary integrity + checks in DataFrame internals that were slowing down slicing operations to + retrieve groups +- Optimized ``_ensure_index`` function resulting in performance savings in + type-checking Index objects +- Wrote fast time series merging / joining methods in Cython. Will be + integrated later into DataFrame.join and related functions + +.. _ENH1b: https://github.com/pandas-dev/pandas/commit/1ba56251f0013ff7cd8834e9486cef2b10098371 +.. _ENHdc: https://github.com/pandas-dev/pandas/commit/dca3c5c5a6a3769ee01465baca04cfdfa66a4f76 +.. _ENHed: https://github.com/pandas-dev/pandas/commit/edd9f1945fc010a57fa0ae3b3444d1fffe592591 +.. _ENH56: https://github.com/pandas-dev/pandas/commit/56e0c9ffafac79ce262b55a6a13e1b10a88fbe93 + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.1..v0.4.3 diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst new file mode 100644 index 00000000..129b86dc --- /dev/null +++ b/doc/source/whatsnew/v0.5.0.rst @@ -0,0 +1,52 @@ + +.. _whatsnew_050: + +Version 0.5.0 (October 24, 2011) +-------------------------------- + +{{ header }} + +New features +~~~~~~~~~~~~ + +- :ref:`Added ` ``DataFrame.align`` method with standard join options +- :ref:`Added ` ``parse_dates`` option to ``read_csv`` and ``read_table`` methods to optionally try to parse dates in the index columns +- :ref:`Added ` ``nrows``, ``chunksize``, and ``iterator`` arguments to ``read_csv`` and ``read_table``. The last two return a new ``TextParser`` class capable of lazily iterating through chunks of a flat file (:issue:`242`) +- :ref:`Added ` ability to join on multiple columns in ``DataFrame.join`` (:issue:`214`) +- Added private ``_get_duplicates`` function to ``Index`` for identifying duplicate values more easily (ENH5c_) +- :ref:`Added ` column attribute access to DataFrame. +- :ref:`Added ` Python tab completion hook for DataFrame columns. (:issue:`233`, :issue:`230`) +- :ref:`Implemented ` ``Series.describe`` for Series containing objects (:issue:`241`) +- :ref:`Added ` inner join option to ``DataFrame.join`` when joining on key(s) (:issue:`248`) +- :ref:`Implemented ` selecting DataFrame columns by passing a list to ``__getitem__`` (:issue:`253`) +- :ref:`Implemented ` & and | to intersect / union Index objects, respectively (:issue:`261`) +- :ref:`Added` ``pivot_table`` convenience function to pandas namespace (:issue:`234`) +- :ref:`Implemented ` ``Panel.rename_axis`` function (:issue:`243`) +- DataFrame will show index level names in console output (:issue:`334`) +- :ref:`Implemented ` ``Panel.take`` +- :ref:`Added` ``set_eng_float_format`` for alternate DataFrame floating point string formatting (ENH61_) +- :ref:`Added ` convenience ``set_index`` function for creating a DataFrame index from its existing columns +- :ref:`Implemented ` ``groupby`` hierarchical index level name (:issue:`223`) +- :ref:`Added ` support for different delimiters in ``DataFrame.to_csv`` (:issue:`244`) + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- VBENCH Major performance improvements in file parsing functions ``read_csv`` and ``read_table`` +- VBENCH Added Cython function for converting tuples to ndarray very fast. Speeds up many MultiIndex-related operations +- VBENCH Refactored merging / joining code into a tidy class and disabled unnecessary computations in the float/object case, thus getting about 10% better performance (:issue:`211`) +- VBENCH Improved speed of ``DataFrame.xs`` on mixed-type DataFrame objects by about 5x, regression from 0.3.0 (:issue:`215`) +- VBENCH With new ``DataFrame.align`` method, speeding up binary operations between differently-indexed DataFrame objects by 10-25%. +- VBENCH Significantly sped up conversion of nested dict into DataFrame (:issue:`212`) +- VBENCH Significantly speed up DataFrame ``__repr__`` and ``count`` on large mixed-type DataFrame objects + +.. _ENH61: https://github.com/pandas-dev/pandas/commit/6141961 +.. _ENH5c: https://github.com/pandas-dev/pandas/commit/5ca6ff5d822ee4ddef1ec0d87b6d83d8b4bbd3eb + + +.. _whatsnew_0.5.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.0..v0.5.0 diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst new file mode 100644 index 00000000..5ddcd5d9 --- /dev/null +++ b/doc/source/whatsnew/v0.6.0.rst @@ -0,0 +1,66 @@ +.. _whatsnew_060: + +Version 0.6.0 (November 25, 2011) +--------------------------------- + +{{ header }} + +New features +~~~~~~~~~~~~ +- :ref:`Added ` ``melt`` function to ``pandas.core.reshape`` +- :ref:`Added ` ``level`` parameter to group by level in Series and DataFrame descriptive statistics (:issue:`313`) +- :ref:`Added ` ``head`` and ``tail`` methods to Series, analogous to DataFrame (:issue:`296`) +- :ref:`Added ` ``Series.isin`` function which checks if each value is contained in a passed sequence (:issue:`289`) +- :ref:`Added ` ``float_format`` option to ``Series.to_string`` +- :ref:`Added ` ``skip_footer`` (:issue:`291`) and ``converters`` (:issue:`343`) options to ``read_csv`` and ``read_table`` +- :ref:`Added ` ``drop_duplicates`` and ``duplicated`` functions for removing duplicate DataFrame rows and checking for duplicate rows, respectively (:issue:`319`) +- :ref:`Implemented ` operators '&', '|', '^', '-' on DataFrame (:issue:`347`) +- :ref:`Added ` ``Series.mad``, mean absolute deviation +- :ref:`Added ` ``QuarterEnd`` DateOffset (:issue:`321`) +- :ref:`Added ` ``dot`` to DataFrame (:issue:`65`) +- Added ``orient`` option to ``Panel.from_dict`` (:issue:`359`, :issue:`301`) +- :ref:`Added ` ``orient`` option to ``DataFrame.from_dict`` +- :ref:`Added ` passing list of tuples or list of lists to ``DataFrame.from_records`` (:issue:`357`) +- :ref:`Added ` multiple levels to groupby (:issue:`103`) +- :ref:`Allow ` multiple columns in ``by`` argument of ``DataFrame.sort_index`` (:issue:`92`, :issue:`362`) +- :ref:`Added ` fast ``get_value`` and ``put_value`` methods to DataFrame (:issue:`360`) +- Added ``cov`` instance methods to Series and DataFrame (:issue:`194`, :issue:`362`) +- :ref:`Added ` ``kind='bar'`` option to ``DataFrame.plot`` (:issue:`348`) +- :ref:`Added ` ``idxmin`` and ``idxmax`` to Series and DataFrame (:issue:`286`) +- :ref:`Added ` ``read_clipboard`` function to parse DataFrame from clipboard (:issue:`300`) +- :ref:`Added ` ``nunique`` function to Series for counting unique elements (:issue:`297`) +- :ref:`Made ` DataFrame constructor use Series name if no columns passed (:issue:`373`) +- :ref:`Support ` regular expressions in read_table/read_csv (:issue:`364`) +- :ref:`Added ` ``DataFrame.to_html`` for writing DataFrame to HTML (:issue:`387`) +- :ref:`Added ` support for MaskedArray data in DataFrame, masked values converted to NaN (:issue:`396`) +- :ref:`Added ` ``DataFrame.boxplot`` function (:issue:`368`) +- :ref:`Can ` pass extra args, kwds to DataFrame.apply (:issue:`376`) +- :ref:`Implement ` ``DataFrame.join`` with vector ``on`` argument (:issue:`312`) +- :ref:`Added ` ``legend`` boolean flag to ``DataFrame.plot`` (:issue:`324`) +- :ref:`Can ` pass multiple levels to ``stack`` and ``unstack`` (:issue:`370`) +- :ref:`Can ` pass multiple values columns to ``pivot_table`` (:issue:`381`) +- :ref:`Use ` Series name in GroupBy for result index (:issue:`363`) +- :ref:`Added ` ``raw`` option to ``DataFrame.apply`` for performance if only need ndarray (:issue:`309`) +- Added proper, tested weighted least squares to standard and panel OLS (:issue:`303`) + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ +- VBENCH Cythonized ``cache_readonly``, resulting in substantial micro-performance enhancements throughout the code base (:issue:`361`) +- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than ``np.apply_along_axis`` (:issue:`309`) +- VBENCH Improved performance of ``MultiIndex.from_tuples`` +- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations +- VBENCH + DOCUMENT Add ``raw`` option to ``DataFrame.apply`` for getting better performance when +- VBENCH Faster cythonized count by level in Series and DataFrame (:issue:`341`) +- VBENCH? Significant GroupBy performance enhancement with multiple keys with many "empty" combinations +- VBENCH New Cython vectorized function ``map_infer`` speeds up ``Series.apply`` and ``Series.map`` significantly when passed elementwise Python function, motivated by (:issue:`355`) +- VBENCH Significantly improved performance of ``Series.order``, which also makes np.unique called on a Series faster (:issue:`327`) +- VBENCH Vastly improved performance of GroupBy on axes with a MultiIndex (:issue:`299`) + + + +.. _whatsnew_0.6.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.5.0..v0.6.0 diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst new file mode 100644 index 00000000..58a7d1ee --- /dev/null +++ b/doc/source/whatsnew/v0.6.1.rst @@ -0,0 +1,58 @@ + +.. _whatsnew_061: + +Version 0.6.1 (December 13, 2011) +--------------------------------- + +New features +~~~~~~~~~~~~ +- Can append single rows (as Series) to a DataFrame +- Add Spearman and Kendall rank correlation + options to Series.corr and DataFrame.corr (:issue:`428`) +- :ref:`Added ` ``get_value`` and ``set_value`` methods to + Series, DataFrame, and Panel for very low-overhead access (>2x faster in many + cases) to scalar elements (:issue:`437`, :issue:`438`). ``set_value`` is capable of + producing an enlarged object. +- Add PyQt table widget to sandbox (:issue:`435`) +- DataFrame.align can :ref:`accept Series arguments ` + and an :ref:`axis option ` (:issue:`461`) +- Implement new :ref:`SparseArray ` and ``SparseList`` + data structures. SparseSeries now derives from SparseArray (:issue:`463`) +- :ref:`Better console printing options ` (:issue:`453`) +- Implement fast data ranking for Series and + DataFrame, fast versions of scipy.stats.rankdata (:issue:`428`) +- Implement ``DataFrame.from_items`` alternate + constructor (:issue:`444`) +- DataFrame.convert_objects method for :ref:`inferring better dtypes ` + for object columns (:issue:`302`) +- Add :ref:`rolling_corr_pairwise ` function for + computing Panel of correlation matrices (:issue:`189`) +- Add :ref:`margins ` option to :ref:`pivot_table + ` for computing subgroup aggregates (:issue:`114`) +- Add ``Series.from_csv`` function (:issue:`482`) +- :ref:`Can pass ` DataFrame/DataFrame and + DataFrame/Series to rolling_corr/rolling_cov (GH #462) +- MultiIndex.get_level_values can :ref:`accept the level name ` + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improve memory usage of ``DataFrame.describe`` (do not copy data + unnecessarily) (PR #425) + +- Optimize scalar value lookups in the general case by 25% or more in Series + and DataFrame + +- Fix performance regression in cross-sectional count in DataFrame, affecting + DataFrame.dropna speed +- Column deletion in DataFrame copies no data (computes views on blocks) (GH + #158) + + + +.. _whatsnew_0.6.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.0..v0.6.1 diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst new file mode 100644 index 00000000..1ee6a989 --- /dev/null +++ b/doc/source/whatsnew/v0.7.0.rst @@ -0,0 +1,384 @@ +.. _whatsnew_0700: + +Version 0.7.0 (February 9, 2012) +-------------------------------- + +{{ header }} + + +New features +~~~~~~~~~~~~ + +- New unified :ref:`merge function ` for efficiently performing + full gamut of database / relational-algebra operations. Refactored existing + join methods to use the new infrastructure, resulting in substantial + performance gains (:issue:`220`, :issue:`249`, :issue:`267`) + +- New :ref:`unified concatenation function ` for concatenating + Series, DataFrame or Panel objects along an axis. Can form union or + intersection of the other axes. Improves performance of ``Series.append`` and + ``DataFrame.append`` (:issue:`468`, :issue:`479`, :issue:`273`) + +- Can pass multiple DataFrames to + ``DataFrame.append`` to concatenate (stack) and multiple Series to + ``Series.append`` too + +- :ref:`Can` pass list of dicts (e.g., a + list of JSON objects) to DataFrame constructor (:issue:`526`) + +- You can now :ref:`set multiple columns ` in a + DataFrame via ``__getitem__``, useful for transformation (:issue:`342`) + +- Handle differently-indexed output values in ``DataFrame.apply`` (:issue:`498`) + +.. code-block:: ipython + + In [1]: df = pd.DataFrame(np.random.randn(10, 4)) + In [2]: df.apply(lambda x: x.describe()) + Out[2]: + 0 1 2 3 + count 10.000000 10.000000 10.000000 10.000000 + mean 0.190912 -0.395125 -0.731920 -0.403130 + std 0.730951 0.813266 1.112016 0.961912 + min -0.861849 -2.104569 -1.776904 -1.469388 + 25% -0.411391 -0.698728 -1.501401 -1.076610 + 50% 0.380863 -0.228039 -1.191943 -1.004091 + 75% 0.658444 0.057974 -0.034326 0.461706 + max 1.212112 0.577046 1.643563 1.071804 + + [8 rows x 4 columns] + +- :ref:`Add` ``reorder_levels`` method to Series and + DataFrame (:issue:`534`) + +- :ref:`Add` dict-like ``get`` function to DataFrame + and Panel (:issue:`521`) + +- :ref:`Add` ``DataFrame.iterrows`` method for efficiently + iterating through the rows of a DataFrame + +- Add ``DataFrame.to_panel`` with code adapted from + ``LongPanel.to_long`` + +- :ref:`Add ` ``reindex_axis`` method added to DataFrame + +- :ref:`Add ` ``level`` option to binary arithmetic functions on + ``DataFrame`` and ``Series`` + +- :ref:`Add ` ``level`` option to the ``reindex`` + and ``align`` methods on Series and DataFrame for broadcasting values across + a level (:issue:`542`, :issue:`552`, others) + +- Add attribute-based item access to + ``Panel`` and add IPython completion (:issue:`563`) + +- :ref:`Add ` ``logy`` option to ``Series.plot`` for + log-scaling on the Y axis + +- :ref:`Add ` ``index`` and ``header`` options to + ``DataFrame.to_string`` + +- :ref:`Can ` pass multiple DataFrames to + ``DataFrame.join`` to join on index (:issue:`115`) + +- :ref:`Can ` pass multiple Panels to ``Panel.join`` + (:issue:`115`) + +- :ref:`Added ` ``justify`` argument to ``DataFrame.to_string`` + to allow different alignment of column headers + +- :ref:`Add ` ``sort`` option to GroupBy to allow disabling + sorting of the group keys for potential speedups (:issue:`595`) + +- :ref:`Can ` pass MaskedArray to Series + constructor (:issue:`563`) + +- Add Panel item access via attributes + and IPython completion (:issue:`554`) + +- Implement ``DataFrame.lookup``, fancy-indexing analogue for retrieving values + given a sequence of row and column labels (:issue:`338`) + +- Can pass a :ref:`list of functions ` to + aggregate with groupby on a DataFrame, yielding an aggregated result with + hierarchical columns (:issue:`166`) + +- Can call ``cummin`` and ``cummax`` on Series and DataFrame to get cumulative + minimum and maximum, respectively (:issue:`647`) + +- ``value_range`` added as utility function to get min and max of a dataframe + (:issue:`288`) + +- Added ``encoding`` argument to ``read_csv``, ``read_table``, ``to_csv`` and + ``from_csv`` for non-ascii text (:issue:`717`) + +- :ref:`Added ` ``abs`` method to pandas objects + +- :ref:`Added ` ``crosstab`` function for easily computing frequency tables + +- :ref:`Added ` ``isin`` method to index objects + +- :ref:`Added ` ``level`` argument to ``xs`` method of DataFrame. + + +API changes to integer indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the potentially riskiest API changes in 0.7.0, but also one of the most +important, was a complete review of how **integer indexes** are handled with +regard to label-based indexing. Here is an example: + +.. code-block:: ipython + + In [3]: s = pd.Series(np.random.randn(10), index=range(0, 20, 2)) + In [4]: s + Out[4]: + 0 -1.294524 + 2 0.413738 + 4 0.276662 + 6 -0.472035 + 8 -0.013960 + 10 -0.362543 + 12 -0.006154 + 14 -0.923061 + 16 0.895717 + 18 0.805244 + Length: 10, dtype: float64 + + In [5]: s[0] + Out[5]: -1.2945235902555294 + + In [6]: s[2] + Out[6]: 0.41373810535784006 + + In [7]: s[4] + Out[7]: 0.2766617129497566 + +This is all exactly identical to the behavior before. However, if you ask for a +key **not** contained in the Series, in versions 0.6.1 and prior, Series would +*fall back* on a location-based lookup. This now raises a ``KeyError``: + +.. code-block:: ipython + + In [2]: s[1] + KeyError: 1 + +This change also has the same impact on DataFrame: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame(np.random.randn(8, 4), index=range(0, 16, 2)) + + In [4]: df + 0 1 2 3 + 0 0.88427 0.3363 -0.1787 0.03162 + 2 0.14451 -0.1415 0.2504 0.58374 + 4 -1.44779 -0.9186 -1.4996 0.27163 + 6 -0.26598 -2.4184 -0.2658 0.11503 + 8 -0.58776 0.3144 -0.8566 0.61941 + 10 0.10940 -0.7175 -1.0108 0.47990 + 12 -1.16919 -0.3087 -0.6049 -0.43544 + 14 -0.07337 0.3410 0.0424 -0.16037 + + In [5]: df.ix[3] + KeyError: 3 + +In order to support purely integer-based indexing, the following methods have +been added: + +.. csv-table:: + :header: "Method","Description" + :widths: 40,60 + + ``Series.iget_value(i)``, Retrieve value stored at location ``i`` + ``Series.iget(i)``, Alias for ``iget_value`` + ``DataFrame.irow(i)``, Retrieve the ``i``-th row + ``DataFrame.icol(j)``, Retrieve the ``j``-th column + "``DataFrame.iget_value(i, j)``", Retrieve the value at row ``i`` and column ``j`` + +API tweaks regarding label-based slicing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Label-based slicing using ``ix`` now requires that the index be sorted +(monotonic) **unless** both the start and endpoint are contained in the index: + +.. code-block:: python + + In [1]: s = pd.Series(np.random.randn(6), index=list('gmkaec')) + + In [2]: s + Out[2]: + g -1.182230 + m -0.276183 + k -0.243550 + a 1.628992 + e 0.073308 + c -0.539890 + dtype: float64 + +Then this is OK: + +.. code-block:: python + + In [3]: s.ix['k':'e'] + Out[3]: + k -0.243550 + a 1.628992 + e 0.073308 + dtype: float64 + +But this is not: + +.. code-block:: ipython + + In [12]: s.ix['b':'h'] + KeyError 'b' + +If the index had been sorted, the "range selection" would have been possible: + +.. code-block:: python + + In [4]: s2 = s.sort_index() + + In [5]: s2 + Out[5]: + a 1.628992 + c -0.539890 + e 0.073308 + g -1.182230 + k -0.243550 + m -0.276183 + dtype: float64 + + In [6]: s2.ix['b':'h'] + Out[6]: + c -0.539890 + e 0.073308 + g -1.182230 + dtype: float64 + +Changes to Series ``[]`` operator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As as notational convenience, you can pass a sequence of labels or a label +slice to a Series when getting and setting values via ``[]`` (i.e. the +``__getitem__`` and ``__setitem__`` methods). The behavior will be the same as +passing similar input to ``ix`` **except in the case of integer indexing**: + +.. code-block:: ipython + + In [8]: s = pd.Series(np.random.randn(6), index=list('acegkm')) + + In [9]: s + Out[9]: + a -1.206412 + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + m -0.226169 + Length: 6, dtype: float64 + + In [10]: s[['m', 'a', 'c', 'e']] + Out[10]: + m -0.226169 + a -1.206412 + c 2.565646 + e 1.431256 + Length: 4, dtype: float64 + + In [11]: s['b':'l'] + Out[11]: + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + Length: 4, dtype: float64 + + In [12]: s['c':'k'] + Out[12]: + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + Length: 4, dtype: float64 + +In the case of integer indexes, the behavior will be exactly as before +(shadowing ``ndarray``): + +.. code-block:: ipython + + In [13]: s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) + + In [14]: s[[4, 0, 2]] + Out[14]: + 4 0.132003 + 0 0.410835 + 2 0.813850 + Length: 3, dtype: float64 + + In [15]: s[1:5] + Out[15]: + 2 0.813850 + 4 0.132003 + 6 -0.827317 + 8 -0.076467 + Length: 4, dtype: float64 + +If you wish to do indexing with sequences and slicing on an integer index with +label semantics, use ``ix``. + +Other API changes +~~~~~~~~~~~~~~~~~ + +- The deprecated ``LongPanel`` class has been completely removed + +- If ``Series.sort`` is called on a column of a DataFrame, an exception will + now be raised. Before it was possible to accidentally mutate a DataFrame's + column by doing ``df[col].sort()`` instead of the side-effect free method + ``df[col].order()`` (:issue:`316`) + +- Miscellaneous renames and deprecations which will (harmlessly) raise + ``FutureWarning`` + +- ``drop`` added as an optional parameter to ``DataFrame.reset_index`` (:issue:`699`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- :ref:`Cythonized GroupBy aggregations ` no longer + presort the data, thus achieving a significant speedup (:issue:`93`). GroupBy + aggregations with Python functions significantly sped up by clever + manipulation of the ndarray data type in Cython (:issue:`496`). +- Better error message in DataFrame constructor when passed column labels + don't match data (:issue:`497`) +- Substantially improve performance of multi-GroupBy aggregation when a + Python function is passed, reuse ndarray object in Cython (:issue:`496`) +- Can store objects indexed by tuples and floats in HDFStore (:issue:`492`) +- Don't print length by default in Series.to_string, add ``length`` option (:issue:`489`) +- Improve Cython code for multi-groupby to aggregate without having to sort + the data (:issue:`93`) +- Improve MultiIndex reindexing speed by storing tuples in the MultiIndex, + test for backwards unpickling compatibility +- Improve column reindexing performance by using specialized Cython take + function +- Further performance tweaking of Series.__getitem__ for standard use cases +- Avoid Index dict creation in some cases (i.e. when getting slices, etc.), + regression from prior versions +- Friendlier error message in setup.py if NumPy not installed +- Use common set of NA-handling operations (sum, mean, etc.) in Panel class + also (:issue:`536`) +- Default name assignment when calling ``reset_index`` on DataFrame with a + regular (non-hierarchical) index (:issue:`476`) +- Use Cythonized groupers when possible in Series/DataFrame stat ops with + ``level`` parameter passed (:issue:`545`) +- Ported skiplist data structure to C to speed up ``rolling_median`` by about + 5-10x in most typical use cases (:issue:`374`) + + +.. _whatsnew_0.7.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.1..v0.7.0 diff --git a/doc/source/whatsnew/v0.7.1.rst b/doc/source/whatsnew/v0.7.1.rst new file mode 100644 index 00000000..7082ef8e --- /dev/null +++ b/doc/source/whatsnew/v0.7.1.rst @@ -0,0 +1,41 @@ +.. _whatsnew_0701: + +Version 0.7.1 (February 29, 2012) +--------------------------------- + +{{ header }} + + +This release includes a few new features and addresses over a dozen bugs in +0.7.0. + +New features +~~~~~~~~~~~~ + + - Add ``to_clipboard`` function to pandas namespace for writing objects to + the system clipboard (:issue:`774`) + - Add ``itertuples`` method to DataFrame for iterating through the rows of a + dataframe as tuples (:issue:`818`) + - Add ability to pass fill_value and method to DataFrame and Series align + method (:issue:`806`, :issue:`807`) + - Add fill_value option to reindex, align methods (:issue:`784`) + - Enable concat to produce DataFrame from Series (:issue:`787`) + - Add ``between`` method to Series (:issue:`802`) + - Add HTML representation hook to DataFrame for the IPython HTML notebook + (:issue:`773`) + - Support for reading Excel 2007 XML documents using openpyxl + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Improve performance and memory usage of fillna on DataFrame + - Can concatenate a list of Series along axis=1 to obtain a DataFrame (:issue:`787`) + + + +.. _whatsnew_0.7.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.0..v0.7.1 diff --git a/doc/source/whatsnew/v0.7.2.rst b/doc/source/whatsnew/v0.7.2.rst new file mode 100644 index 00000000..e10a7b49 --- /dev/null +++ b/doc/source/whatsnew/v0.7.2.rst @@ -0,0 +1,38 @@ +.. _whatsnew_0702: + +Version 0.7.2 (March 16, 2012) +------------------------------ + +{{ header }} + + +This release targets bugs in 0.7.1, and adds a few minor features. + +New features +~~~~~~~~~~~~ + + - Add additional tie-breaking methods in DataFrame.rank (:issue:`874`) + - Add ascending parameter to rank in Series, DataFrame (:issue:`875`) + - Add coerce_float option to DataFrame.from_records (:issue:`893`) + - Add sort_columns parameter to allow unsorted plots (:issue:`918`) + - Enable column access via attributes on GroupBy (:issue:`882`) + - Can pass dict of values to DataFrame.fillna (:issue:`661`) + - Can select multiple hierarchical groups by passing list of values in .ix + (:issue:`134`) + - Add ``axis`` option to DataFrame.fillna (:issue:`174`) + - Add level keyword to ``drop`` for dropping values from a level (:issue:`159`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Use khash for Series.value_counts, add raw function to algorithms.py (:issue:`861`) + - Intercept __builtin__.sum in groupby (:issue:`885`) + + + +.. _whatsnew_0.7.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.1..v0.7.2 diff --git a/doc/source/whatsnew/v0.7.3.rst b/doc/source/whatsnew/v0.7.3.rst new file mode 100644 index 00000000..5da6bef0 --- /dev/null +++ b/doc/source/whatsnew/v0.7.3.rst @@ -0,0 +1,151 @@ +.. _whatsnew_0703: + +Version 0.7.3 (April 12, 2012) +------------------------------ + +{{ header }} + + +This is a minor release from 0.7.2 and fixes many minor bugs and adds a number +of nice new features. There are also a couple of API changes to note; these +should not affect very many users, and we are inclined to call them "bug fixes" +even though they do constitute a change in behavior. See the :ref:`full release +notes ` or issue +tracker on GitHub for a complete list. + +New features +~~~~~~~~~~~~ + +- New :ref:`fixed width file reader `, ``read_fwf`` +- New :ref:`scatter_matrix ` function for making + a scatter plot matrix + +.. code-block:: python + + from pandas.tools.plotting import scatter_matrix + + scatter_matrix(df, alpha=0.2) # noqa F821 + + +- Add ``stacked`` argument to Series and DataFrame's ``plot`` method for + :ref:`stacked bar plots `. + +.. code-block:: python + + df.plot(kind="bar", stacked=True) # noqa F821 + + +.. code-block:: python + + df.plot(kind="barh", stacked=True) # noqa F821 + + +- Add log x and y :ref:`scaling options ` to + ``DataFrame.plot`` and ``Series.plot`` +- Add ``kurt`` methods to Series and DataFrame for computing kurtosis + + +NA boolean comparison API change +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Reverted some changes to how NA values (represented typically as ``NaN`` or +``None``) are handled in non-numeric Series: + +.. code-block:: ipython + + In [1]: series = pd.Series(["Steve", np.nan, "Joe"]) + + In [2]: series == "Steve" + Out[2]: + 0 True + 1 False + 2 False + Length: 3, dtype: bool + + In [3]: series != "Steve" + Out[3]: + 0 False + 1 True + 2 True + Length: 3, dtype: bool + +In comparisons, NA / NaN will always come through as ``False`` except with +``!=`` which is ``True``. *Be very careful* with boolean arithmetic, especially +negation, in the presence of NA data. You may wish to add an explicit NA +filter into boolean array operations if you are worried about this: + +.. code-block:: ipython + + In [4]: mask = series == "Steve" + + In [5]: series[mask & series.notnull()] + Out[5]: + 0 Steve + Length: 1, dtype: object + +While propagating NA in comparisons may seem like the right behavior to some +users (and you could argue on purely technical grounds that this is the right +thing to do), the evaluation was made that propagating NA everywhere, including +in numerical arrays, would cause a large amount of problems for users. Thus, a +"practicality beats purity" approach was taken. This issue may be revisited at +some point in the future. + +Other API changes +~~~~~~~~~~~~~~~~~ + +When calling ``apply`` on a grouped Series, the return value will also be a +Series, to be more consistent with the ``groupby`` behavior with DataFrame: + +.. code-block:: ipython + + In [6]: df = pd.DataFrame( + ...: { + ...: "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + ...: "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + ...: "C": np.random.randn(8), + ...: "D": np.random.randn(8), + ...: } + ...: ) + ...: + + In [7]: df + Out[7]: + A B C D + 0 foo one 0.469112 -0.861849 + 1 bar one -0.282863 -2.104569 + 2 foo two -1.509059 -0.494929 + 3 bar three -1.135632 1.071804 + 4 foo two 1.212112 0.721555 + 5 bar two -0.173215 -0.706771 + 6 foo one 0.119209 -1.039575 + 7 foo three -1.044236 0.271860 + + [8 rows x 4 columns] + + In [8]: grouped = df.groupby("A")["C"] + + In [9]: grouped.describe() + Out[9]: + count mean std min 25% 50% 75% max + A + bar 3.0 -0.530570 0.526860 -1.135632 -0.709248 -0.282863 -0.228039 -0.173215 + foo 5.0 -0.150572 1.113308 -1.509059 -1.044236 0.119209 0.469112 1.212112 + + [2 rows x 8 columns] + + In [10]: grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values + Out[10]: + A + bar 1 -0.282863 + 5 -0.173215 + foo 0 0.469112 + 4 1.212112 + Name: C, Length: 4, dtype: float64 + + +.. _whatsnew_0.7.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.2..v0.7.3 diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst new file mode 100644 index 00000000..ce02525a --- /dev/null +++ b/doc/source/whatsnew/v0.8.0.rst @@ -0,0 +1,284 @@ +.. _whatsnew_080: + +Version 0.8.0 (June 29, 2012) +----------------------------- + +{{ header }} + + +This is a major release from 0.7.3 and includes extensive work on the time +series handling and processing infrastructure as well as a great deal of new +functionality throughout the library. It includes over 700 commits from more +than 20 distinct authors. Most pandas 0.7.3 and earlier users should not +experience any issues upgrading, but due to the migration to the NumPy +datetime64 dtype, there may be a number of bugs and incompatibilities +lurking. Lingering incompatibilities will be fixed ASAP in a 0.8.1 release if +necessary. See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + +Support for non-unique indexes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All objects can now work with non-unique indexes. Data alignment / join +operations work according to SQL join semantics (including, if application, +index duplication in many-to-many joins) + +NumPy datetime64 dtype and 1.6 dependency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Time series data are now represented using NumPy's datetime64 dtype; thus, +pandas 0.8.0 now requires at least NumPy 1.6. It has been tested and verified +to work with the development version (1.7+) of NumPy as well which includes +some significant user-facing API changes. NumPy 1.6 also has a number of bugs +having to do with nanosecond resolution data, so I recommend that you steer +clear of NumPy 1.6's datetime64 API functions (though limited as they are) and +only interact with this data using the interface that pandas provides. + +See the end of the 0.8.0 section for a "porting" guide listing potential issues +for users migrating legacy code bases from pandas 0.7 or earlier to 0.8.0. + +Bug fixes to the 0.7.x series for legacy NumPy < 1.6 users will be provided as +they arise. There will be no more further development in 0.7.x beyond bug +fixes. + +Time Series changes and improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + With this release, legacy scikits.timeseries users should be able to port + their code to use pandas. + +.. note:: + + See :ref:`documentation ` for overview of pandas timeseries API. + +- New datetime64 representation **speeds up join operations and data + alignment**, **reduces memory usage**, and improve serialization / + deserialization performance significantly over datetime.datetime +- High performance and flexible **resample** method for converting from + high-to-low and low-to-high frequency. Supports interpolation, user-defined + aggregation functions, and control over how the intervals and result labeling + are defined. A suite of high performance Cython/C-based resampling functions + (including Open-High-Low-Close) have also been implemented. +- Revamp of :ref:`frequency aliases ` and support for + **frequency shortcuts** like '15min', or '1h30min' +- New :ref:`DatetimeIndex class ` supports both fixed + frequency and irregular time + series. Replaces now deprecated DateRange class +- New ``PeriodIndex`` and ``Period`` classes for representing + :ref:`time spans ` and performing **calendar logic**, + including the ``12 fiscal quarterly frequencies ``. + This is a partial port of, and a substantial enhancement to, + elements of the scikits.timeseries code base. Support for conversion between + PeriodIndex and DatetimeIndex +- New Timestamp data type subclasses ``datetime.datetime``, providing the same + interface while enabling working with nanosecond-resolution data. Also + provides :ref:`easy time zone conversions `. +- Enhanced support for :ref:`time zones `. Add + ``tz_convert`` and ``tz_localize`` methods to TimeSeries and DataFrame. All + timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time + zone set will be localized to local time. Time zone conversions are therefore + essentially free. User needs to know very little about pytz library now; only + time zone names as strings are required. Time zone-aware timestamps are + equal if and only if their UTC timestamps match. Operations between time + zone-aware time series with different time zones will result in a UTC-indexed + time series. +- Time series **string indexing conveniences** / shortcuts: slice years, year + and month, and index values with strings +- Enhanced time series **plotting**; adaptation of scikits.timeseries + matplotlib-based plotting code +- New ``date_range``, ``bdate_range``, and ``period_range`` :ref:`factory + functions ` +- Robust **frequency inference** function ``infer_freq`` and ``inferred_freq`` + property of DatetimeIndex, with option to infer frequency on construction of + DatetimeIndex +- to_datetime function efficiently **parses array of strings** to + DatetimeIndex. DatetimeIndex will parse array or list of strings to + datetime64 +- **Optimized** support for datetime64-dtype data in Series and DataFrame + columns +- New NaT (Not-a-Time) type to represent **NA** in timestamp arrays +- Optimize Series.asof for looking up **"as of" values** for arrays of + timestamps +- Milli, Micro, Nano date offset objects +- Can index time series with datetime.time objects to select all data at + particular **time of day** (``TimeSeries.at_time``) or **between two times** + (``TimeSeries.between_time``) +- Add :ref:`tshift ` method for leading/lagging + using the frequency (if any) of the index, as opposed to a naive lead/lag + using shift + +Other new features +~~~~~~~~~~~~~~~~~~ + +- New :ref:`cut ` and ``qcut`` functions (like R's cut + function) for computing a categorical variable from a continuous variable by + binning values either into value-based (``cut``) or quantile-based (``qcut``) + bins +- Rename ``Factor`` to ``Categorical`` and add a number of usability features +- Add :ref:`limit ` argument to fillna/reindex +- More flexible multiple function application in GroupBy, and can pass list + (name, function) tuples to get result in particular order with given names +- Add flexible :ref:`replace ` method for efficiently + substituting values +- Enhanced :ref:`read_csv/read_table ` for reading time series + data and converting multiple columns to dates +- Add :ref:`comments ` option to parser functions: read_csv, etc. +- Add :ref:`dayfirst ` option to parser functions for parsing + international DD/MM/YYYY dates +- Allow the user to specify the CSV reader :ref:`dialect ` to + control quoting etc. +- Handling :ref:`thousands ` separators in read_csv to improve + integer parsing. +- Enable unstacking of multiple levels in one shot. Alleviate ``pivot_table`` + bugs (empty columns being introduced) +- Move to klib-based hash tables for indexing; better performance and less + memory usage than Python's dict +- Add first, last, min, max, and prod optimized GroupBy functions +- New :ref:`ordered_merge ` function +- Add flexible :ref:`comparison ` instance methods eq, ne, lt, + gt, etc. to DataFrame, Series +- Improve :ref:`scatter_matrix ` plotting + function and add histogram or kernel density estimates to diagonal +- Add :ref:`'kde' ` plot option for density plots +- Support for converting DataFrame to R data.frame through rpy2 +- Improved support for complex numbers in Series and DataFrame +- Add ``pct_change`` method to all data structures +- Add max_colwidth configuration option for DataFrame console output +- :ref:`Interpolate ` Series values using index values +- Can select multiple columns from GroupBy +- Add :ref:`update ` methods to Series/DataFrame + for updating values in place +- Add ``any`` and ``all`` method to DataFrame + +New plotting methods +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import pandas as pd + + fx = pd.read_pickle("data/fx_prices") + import matplotlib.pyplot as plt + +``Series.plot`` now supports a ``secondary_y`` option: + +.. code-block:: python + + plt.figure() + + fx["FR"].plot(style="g") + + fx["IT"].plot(style="k--", secondary_y=True) + +Vytautas Jancauskas, the 2012 GSOC participant, has added many new plot +types. For example, ``'kde'`` is a new option: + +.. code-block:: python + + s = pd.Series( + np.concatenate((np.random.randn(1000), np.random.randn(1000) * 0.5 + 3)) + ) + plt.figure() + s.hist(density=True, alpha=0.2) + s.plot(kind="kde") + +See :ref:`the plotting page ` for much more. + +Other API changes +~~~~~~~~~~~~~~~~~ + +- Deprecation of ``offset``, ``time_rule``, and ``timeRule`` arguments names in + time series functions. Warnings will be printed until pandas 0.9 or 1.0. + +Potential porting issues for pandas <= 0.7.3 users +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The major change that may affect you in pandas 0.8.0 is that time series +indexes use NumPy's ``datetime64`` data type instead of ``dtype=object`` arrays +of Python's built-in ``datetime.datetime`` objects. ``DateRange`` has been +replaced by ``DatetimeIndex`` but otherwise behaved identically. But, if you +have code that converts ``DateRange`` or ``Index`` objects that used to contain +``datetime.datetime`` values to plain NumPy arrays, you may have bugs lurking +with code using scalar values because you are handing control over to NumPy: + +.. ipython:: python + + import datetime + + rng = pd.date_range("1/1/2000", periods=10) + rng[5] + isinstance(rng[5], datetime.datetime) + rng_asarray = np.asarray(rng) + scalar_val = rng_asarray[5] + type(scalar_val) + +pandas's ``Timestamp`` object is a subclass of ``datetime.datetime`` that has +nanosecond support (the ``nanosecond`` field store the nanosecond value between +0 and 999). It should substitute directly into any code that used +``datetime.datetime`` values before. Thus, I recommend not casting +``DatetimeIndex`` to regular NumPy arrays. + +If you have code that requires an array of ``datetime.datetime`` objects, you +have a couple of options. First, the ``astype(object)`` method of ``DatetimeIndex`` +produces an array of ``Timestamp`` objects: + +.. ipython:: python + + stamp_array = rng.astype(object) + stamp_array + stamp_array[5] + +To get an array of proper ``datetime.datetime`` objects, use the +``to_pydatetime`` method: + +.. ipython:: python + + dt_array = rng.to_pydatetime() + dt_array + dt_array[5] + +matplotlib knows how to handle ``datetime.datetime`` but not Timestamp +objects. While I recommend that you plot time series using ``TimeSeries.plot``, +you can either use ``to_pydatetime`` or register a converter for the Timestamp +type. See `matplotlib documentation +`__ for more on this. + +.. warning:: + + There are bugs in the user-facing API with the nanosecond datetime64 unit + in NumPy 1.6. In particular, the string version of the array shows garbage + values, and conversion to ``dtype=object`` is similarly broken. + + .. ipython:: python + + rng = pd.date_range("1/1/2000", periods=10) + rng + np.asarray(rng) + converted = np.asarray(rng, dtype=object) + converted[5] + + **Trust me: don't panic**. If you are using NumPy 1.6 and restrict your + interaction with ``datetime64`` values to pandas's API you will be just + fine. There is nothing wrong with the data-type (a 64-bit integer + internally); all of the important data processing happens in pandas and is + heavily tested. I strongly recommend that you **do not work directly with + datetime64 arrays in NumPy 1.6** and only use the pandas API. + + +**Support for non-unique indexes**: In the latter case, you may have code +inside a ``try:... catch:`` block that failed due to the index not being +unique. In many cases it will no longer fail (some method like ``append`` still +check for uniqueness unless disabled). However, all is not lost: you can +inspect ``index.is_unique`` and raise an exception explicitly if it is +``False`` or go to a different code branch. + + +.. _whatsnew_0.8.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.3..v0.8.0 diff --git a/doc/source/whatsnew/v0.8.1.rst b/doc/source/whatsnew/v0.8.1.rst new file mode 100644 index 00000000..a00a57a0 --- /dev/null +++ b/doc/source/whatsnew/v0.8.1.rst @@ -0,0 +1,47 @@ +.. _whatsnew_0801: + +Version 0.8.1 (July 22, 2012) +----------------------------- + +{{ header }} + + +This release includes a few new features, performance enhancements, and over 30 +bug fixes from 0.8.0. New features include notably NA friendly string +processing functionality and a series of new plot types and options. + +New features +~~~~~~~~~~~~ + + - Add :ref:`vectorized string processing methods ` + accessible via Series.str (:issue:`620`) + - Add option to disable adjustment in EWMA (:issue:`1584`) + - :ref:`Radviz plot ` (:issue:`1566`) + - :ref:`Parallel coordinates plot ` + - :ref:`Bootstrap plot ` + - Per column styles and secondary y-axis plotting (:issue:`1559`) + - New datetime converters millisecond plotting (:issue:`1599`) + - Add option to disable "sparse" display of hierarchical indexes (:issue:`1538`) + - Series/DataFrame's ``set_index`` method can :ref:`append levels + ` to an existing Index/MultiIndex (:issue:`1569`, :issue:`1577`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Improved implementation of rolling min and max (thanks to `Bottleneck + `__ !) + - Add accelerated ``'median'`` GroupBy option (:issue:`1358`) + - Significantly improve the performance of parsing ISO8601-format date + strings with ``DatetimeIndex`` or ``to_datetime`` (:issue:`1571`) + - Improve the performance of GroupBy on single-key aggregations and use with + Categorical types + - Significant datetime parsing performance improvements + + + +.. _whatsnew_0.8.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.0..v0.8.1 diff --git a/doc/source/whatsnew/v0.9.0.rst b/doc/source/whatsnew/v0.9.0.rst new file mode 100644 index 00000000..44ded51e --- /dev/null +++ b/doc/source/whatsnew/v0.9.0.rst @@ -0,0 +1,109 @@ +.. _whatsnew_0900: + +{{ header }} + + +Version 0.9.0 (October 7, 2012) +------------------------------- + +This is a major release from 0.8.1 and includes several new features and +enhancements along with a large number of bug fixes. New features include +vectorized unicode encoding/decoding for ``Series.str``, ``to_latex`` method to +DataFrame, more flexible parsing of boolean values, and enabling the download of +options data from Yahoo! Finance. + +New features +~~~~~~~~~~~~ + + - Add ``encode`` and ``decode`` for unicode handling to :ref:`vectorized + string processing methods ` in Series.str (:issue:`1706`) + - Add ``DataFrame.to_latex`` method (:issue:`1735`) + - Add convenient expanding window equivalents of all rolling_* ops (:issue:`1785`) + - Add Options class to pandas.io.data for fetching options data from Yahoo! + Finance (:issue:`1748`, :issue:`1739`) + - More flexible parsing of boolean values (Yes, No, TRUE, FALSE, etc) + (:issue:`1691`, :issue:`1295`) + - Add ``level`` parameter to ``Series.reset_index`` + - ``TimeSeries.between_time`` can now select times across midnight (:issue:`1871`) + - Series constructor can now handle generator as input (:issue:`1679`) + - ``DataFrame.dropna`` can now take multiple axes (tuple/list) as input + (:issue:`924`) + - Enable ``skip_footer`` parameter in ``ExcelFile.parse`` (:issue:`1843`) + +API changes +~~~~~~~~~~~ + + - The default column names when ``header=None`` and no columns names passed to + functions like ``read_csv`` has changed to be more Pythonic and amenable to + attribute access: + +.. ipython:: python + + import io + + data = """ + 0,0,1 + 1,1,0 + 0,1,0 + """ + df = pd.read_csv(io.StringIO(data), header=None) + df + + +- Creating a Series from another Series, passing an index, will cause reindexing + to happen inside rather than treating the Series like an ndarray. Technically + improper usages like ``Series(df[col1], index=df[col2])`` that worked before + "by accident" (this was never intended) will lead to all NA Series in some + cases. To be perfectly clear: + +.. ipython:: python + + s1 = pd.Series([1, 2, 3]) + s1 + + s2 = pd.Series(s1, index=["foo", "bar", "baz"]) + s2 + +- Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear`` + (:issue:`1723`) + +- Don't modify NumPy suppress printoption to True at import time + +- The internal HDF5 data arrangement for DataFrames has been transposed. Legacy + files will still be readable by HDFStore (:issue:`1834`, :issue:`1824`) + +- Legacy cruft removed: pandas.stats.misc.quantileTS + +- Use ISO8601 format for Period repr: monthly, daily, and on down (:issue:`1776`) + +- Empty DataFrame columns are now created as object dtype. This will prevent a + class of TypeErrors that was occurring in code where the dtype of a column + would depend on the presence of data or not (e.g. a SQL query having results) + (:issue:`1783`) + +- Setting parts of DataFrame/Panel using ix now aligns input Series/DataFrame + (:issue:`1630`) + +- ``first`` and ``last`` methods in ``GroupBy`` no longer drop non-numeric + columns (:issue:`1809`) + +- Resolved inconsistencies in specifying custom NA values in text parser. + ``na_values`` of type dict no longer override default NAs unless + ``keep_default_na`` is set to false explicitly (:issue:`1657`) + +- ``DataFrame.dot`` will not do data alignment, and also work with Series + (:issue:`1915`) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + + +.. _whatsnew_0.9.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.1..v0.9.0 diff --git a/doc/source/whatsnew/v0.9.1.rst b/doc/source/whatsnew/v0.9.1.rst new file mode 100644 index 00000000..cdc0671f --- /dev/null +++ b/doc/source/whatsnew/v0.9.1.rst @@ -0,0 +1,171 @@ +.. _whatsnew_0901: + +Version 0.9.1 (November 14, 2012) +--------------------------------- + +{{ header }} + + +This is a bug fix release from 0.9.0 and includes several new features and +enhancements along with a large number of bug fixes. The new features include +by-column sort order for DataFrame and Series, improved NA handling for the rank +method, masking functions for DataFrame, and intraday time-series filtering for +DataFrame. + +New features +~~~~~~~~~~~~ + + - ``Series.sort``, ``DataFrame.sort``, and ``DataFrame.sort_index`` can now be + specified in a per-column manner to support multiple sort orders (:issue:`928`) + + .. code-block:: ipython + + In [2]: df = pd.DataFrame(np.random.randint(0, 2, (6, 3)), + ...: columns=['A', 'B', 'C']) + + In [3]: df.sort(['A', 'B'], ascending=[1, 0]) + + Out[3]: + A B C + 3 0 1 1 + 4 0 1 1 + 2 0 0 1 + 0 1 0 0 + 1 1 0 0 + 5 1 0 0 + + - ``DataFrame.rank`` now supports additional argument values for the + ``na_option`` parameter so missing values can be assigned either the largest + or the smallest rank (:issue:`1508`, :issue:`2159`) + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) + + df.loc[2:4] = np.nan + + df.rank() + + df.rank(na_option='top') + + df.rank(na_option='bottom') + + + - DataFrame has new ``where`` and ``mask`` methods to select values according to a + given boolean mask (:issue:`2109`, :issue:`2151`) + + DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the ``[]``). + The returned DataFrame has the same number of columns as the original, but is sliced on its index. + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C']) + + df + + df[df['A'] > 0] + + If a DataFrame is sliced with a DataFrame based boolean condition (with the same size as the original DataFrame), + then a DataFrame the same size (index and columns) as the original is returned, with + elements that do not meet the boolean condition as ``NaN``. This is accomplished via + the new method ``DataFrame.where``. In addition, ``where`` takes an optional ``other`` argument for replacement. + + .. ipython:: python + + df[df > 0] + + df.where(df > 0) + + df.where(df > 0, -df) + + Furthermore, ``where`` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection + with setting is possible. This is analogous to partial setting via ``.ix`` (but on the contents rather than the axis labels) + + .. ipython:: python + + df2 = df.copy() + df2[df2[1:4] > 0] = 3 + df2 + + ``DataFrame.mask`` is the inverse boolean operation of ``where``. + + .. ipython:: python + + df.mask(df <= 0) + + - Enable referencing of Excel columns by their column names (:issue:`1936`) + + .. code-block:: ipython + + In [1]: xl = pd.ExcelFile('data/test.xls') + + In [2]: xl.parse('Sheet1', index_col=0, parse_dates=True, + parse_cols='A:D') + + + - Added option to disable pandas-style tick locators and formatters + using ``series.plot(x_compat=True)`` or ``pandas.plot_params['x_compat'] = + True`` (:issue:`2205`) + - Existing TimeSeries methods ``at_time`` and ``between_time`` were added to + DataFrame (:issue:`2149`) + - DataFrame.dot can now accept ndarrays (:issue:`2042`) + - DataFrame.drop now supports non-unique indexes (:issue:`2101`) + - Panel.shift now supports negative periods (:issue:`2164`) + - DataFrame now support unary ~ operator (:issue:`2110`) + +API changes +~~~~~~~~~~~ + + - Upsampling data with a PeriodIndex will result in a higher frequency + TimeSeries that spans the original time window + + .. code-block:: ipython + + In [1]: prng = pd.period_range('2012Q1', periods=2, freq='Q') + + In [2]: s = pd.Series(np.random.randn(len(prng)), prng) + + In [4]: s.resample('M') + Out[4]: + 2012-01 -1.471992 + 2012-02 NaN + 2012-03 NaN + 2012-04 -0.493593 + 2012-05 NaN + 2012-06 NaN + Freq: M, dtype: float64 + + - Period.end_time now returns the last nanosecond in the time interval + (:issue:`2124`, :issue:`2125`, :issue:`1764`) + + .. ipython:: python + + p = pd.Period('2012') + + p.end_time + + + - File parsers no longer coerce to float or bool for columns that have custom + converters specified (:issue:`2184`) + + .. ipython:: python + + import io + + data = ('A,B,C\n' + '00001,001,5\n' + '00002,002,6') + pd.read_csv(io.StringIO(data), converters={'A': lambda x: x.strip()}) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.9.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.9.1 diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst new file mode 100755 index 00000000..2ab0af46 --- /dev/null +++ b/doc/source/whatsnew/v1.0.0.rst @@ -0,0 +1,1303 @@ +.. _whatsnew_100: + +What's new in 1.0.0 (January 29, 2020) +-------------------------------------- + +These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog +including other versions of pandas. + +.. note:: + + The pandas 1.0 release removed a lot of functionality that was deprecated + in previous releases (see :ref:`below ` + for an overview). It is recommended to first upgrade to pandas 0.25 and to + ensure your code is working without warnings, before upgrading to pandas + 1.0. + + +New deprecation policy +~~~~~~~~~~~~~~~~~~~~~~ + +Starting with pandas 1.0.0, pandas will adopt a variant of `SemVer`_ to +version releases. Briefly, + +* Deprecations will be introduced in minor releases (e.g. 1.1.0, 1.2.0, 2.1.0, ...) +* Deprecations will be enforced in major releases (e.g. 1.0.0, 2.0.0, 3.0.0, ...) +* API-breaking changes will be made only in major releases (except for experimental features) + +See :ref:`policies.version` for more. + +.. _2019 Pandas User Survey: https://pandas.pydata.org/community/blog/2019-user-survey.html +.. _SemVer: https://semver.org + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_100.numba_rolling_apply: + +Using Numba in ``rolling.apply`` and ``expanding.apply`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added an ``engine`` keyword to :meth:`~core.window.rolling.Rolling.apply` and :meth:`~core.window.expanding.Expanding.apply` +that allows the user to execute the routine using `Numba `__ instead of Cython. +Using the Numba engine can yield significant performance gains if the apply function can operate on numpy arrays and +the data set is larger (1 million rows or greater). For more details, see +:ref:`rolling apply documentation ` (:issue:`28987`, :issue:`30936`) + +.. _whatsnew_100.custom_window: + +Defining custom windows for rolling operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added a :func:`pandas.api.indexers.BaseIndexer` class that allows users to define how +window bounds are created during ``rolling`` operations. Users can define their own ``get_window_bounds`` +method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate the start and end +indices used for each window during the rolling aggregation. For more details and example usage, see +the :ref:`custom window rolling documentation ` + +.. _whatsnew_100.to_markdown: + +Converting to markdown +^^^^^^^^^^^^^^^^^^^^^^ + +We've added :meth:`~DataFrame.to_markdown` for creating a markdown table (:issue:`11052`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=['a', 'a', 'b']) + print(df.to_markdown()) + +Experimental new features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_100.NA: + +Experimental ``NA`` scalar to denote missing values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``pd.NA`` value (singleton) is introduced to represent scalar missing +values. Up to now, pandas used several values to represent missing data: ``np.nan`` is used for this for float data, ``np.nan`` or +``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The +goal of ``pd.NA`` is to provide a "missing" indicator that can be used +consistently across data types. ``pd.NA`` is currently used by the nullable integer and boolean +data types and the new string data type (:issue:`28095`). + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +For example, creating a Series using the nullable integer dtype: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + +Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations. +In addition to arithmetic operations, ``pd.NA`` also propagates as "missing" +or "unknown" in comparison operations: + +.. ipython:: python + + np.nan > 1 + pd.NA > 1 + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*). For example: + +.. ipython:: python + + pd.NA | True + +For more, see :ref:`NA section ` in the user guide on missing +data. + + +.. _whatsnew_100.string: + +Dedicated string data type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`StringDtype`, an extension type dedicated to string data. +Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`) + +.. warning:: + + ``StringDtype`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +The ``'string'`` extension type solves several issues with object-dtype NumPy arrays: + +1. You can accidentally store a *mixture* of strings and non-strings in an + ``object`` dtype array. A ``StringArray`` can only store strings. +2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. + There isn't a clear way to select *just* text while excluding non-text, + but still object-dtype columns. +3. When reading code, the contents of an ``object`` dtype array is less clear + than ``string``. + + +.. ipython:: python + + pd.Series(['abc', None, 'def'], dtype=pd.StringDtype()) + +You can use the alias ``"string"`` as well. + +.. ipython:: python + + s = pd.Series(['abc', None, 'def'], dtype="string") + s + +The usual string accessor methods work. Where appropriate, the return type +of the Series or columns of a DataFrame will also have string dtype. + +.. ipython:: python + + s.str.upper() + s.str.split('b', expand=True).dtypes + +String accessor methods returning integers will return a value with :class:`Int64Dtype` + +.. ipython:: python + + s.str.count("a") + +We recommend explicitly using the ``string`` data type when working with strings. +See :ref:`text.types` for more. + +.. _whatsnew_100.boolean: + +Boolean data type with missing values support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension +type dedicated to boolean data that can hold missing values. The default +``bool`` data type based on a bool-dtype NumPy array, the column can only hold +``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray` +can store missing values as well by keeping track of this in a separate mask. +(:issue:`29555`, :issue:`30095`, :issue:`31131`) + +.. ipython:: python + + pd.Series([True, False, None], dtype=pd.BooleanDtype()) + +You can use the alias ``"boolean"`` as well. + +.. ipython:: python + + s = pd.Series([True, False, None], dtype="boolean") + s + +.. _whatsnew_100.convert_dtypes: + +Method ``convert_dtypes`` to ease use of supported extension dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to encourage use of the extension dtypes ``StringDtype``, +``BooleanDtype``, ``Int64Dtype``, ``Int32Dtype``, etc., that support ``pd.NA``, the +methods :meth:`DataFrame.convert_dtypes` and :meth:`Series.convert_dtypes` +have been introduced. (:issue:`29752`) (:issue:`30929`) + +Example: + +.. ipython:: python + + df = pd.DataFrame({'x': ['abc', None, 'def'], + 'y': [1, 2, np.nan], + 'z': [True, False, True]}) + df + df.dtypes + +.. ipython:: python + + converted = df.convert_dtypes() + converted + converted.dtypes + +This is especially useful after reading in data using readers such as :func:`read_csv` +and :func:`read_excel`. +See :ref:`here ` for a description. + + +.. _whatsnew_100.enhancements.other: + +Other enhancements +~~~~~~~~~~~~~~~~~~ + +- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) +- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`) +- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) +- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) +- DataFrames with :ref:`nullable integer `, the :ref:`new string dtype ` + and period data type can now be converted to ``pyarrow`` (>=0.15.0), which means that it is + supported in writing to the Parquet file format when using the ``pyarrow`` engine (:issue:`28368`). + Full roundtrip to parquet (writing and reading back in with :meth:`~DataFrame.to_parquet` / :func:`read_parquet`) + is supported starting with pyarrow >= 0.16 (:issue:`20612`). +- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`) +- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) +- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Implemented :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` functions (:issue:`26597`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) +- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) +- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) +- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) +- :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`. +- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) +- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) +- DataFrame constructor preserve ``ExtensionArray`` dtype with ``ExtensionArray`` (:issue:`11363`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- Added new writer for exporting Stata dta files in versions 118 and 119, ``StataWriterUTF8``. These files formats support exporting strings containing Unicode characters. Format 119 supports data sets with more than 32,767 variables (:issue:`23573`, :issue:`30959`) +- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) +- Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) +- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) +- :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) + + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_100.api_breaking.MultiIndex._names: + +Avoid using names from ``MultiIndex.levels`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As part of a larger refactor to :class:`MultiIndex` the level names are now +stored separately from the levels (:issue:`27242`). We recommend using +:attr:`MultiIndex.names` to access the names, and :meth:`Index.set_names` +to update the names. + +For backwards compatibility, you can still *access* the names via the levels. + +.. ipython:: python + + mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y']) + mi.levels[0].name + +However, it is no longer possible to *update* the names of the ``MultiIndex`` +via the level. + +.. ipython:: python + :okexcept: + + mi.levels[0].name = "new name" + mi.names + +To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. + +.. ipython:: python + + mi2 = mi.set_names("new name", level=0) + mi2.names + +New repr for :class:`~pandas.arrays.IntervalArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + Out[2]: + IntervalArray([(0, 1], (2, 3]], + closed='right', + dtype='interval[int64]') + +*pandas 1.0.0* + +.. ipython:: python + + pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + +``DataFrame.rename`` now only accepts one positional argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.rename` would previously accept positional arguments that would lead +to ambiguous or undefined behavior. From pandas 1.0, only the very first argument, which +maps labels to their new names along the default axis, is allowed to be passed by position +(:issue:`29136`). + +.. ipython:: python + :suppress: + + df = pd.DataFrame([[1]]) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: df = pd.DataFrame([[1]]) + In [2]: df.rename({0: 1}, {0: 2}) + Out[2]: + FutureWarning: ...Use named arguments to resolve ambiguity... + 2 + 1 1 + +*pandas 1.0.0* + +.. code-block:: ipython + + In [3]: df.rename({0: 1}, {0: 2}) + Traceback (most recent call last): + ... + TypeError: rename() takes from 1 to 2 positional arguments but 3 were given + +Note that errors will now be raised when conflicting or potentially ambiguous arguments are provided. + +*pandas 0.25.x* + +.. code-block:: ipython + + In [4]: df.rename({0: 1}, index={0: 2}) + Out[4]: + 0 + 1 1 + + In [5]: df.rename(mapper={0: 1}, index={0: 2}) + Out[5]: + 0 + 2 1 + +*pandas 1.0.0* + +.. code-block:: ipython + + In [6]: df.rename({0: 1}, index={0: 2}) + Traceback (most recent call last): + ... + TypeError: Cannot specify both 'mapper' and any of 'index' or 'columns' + + In [7]: df.rename(mapper={0: 1}, index={0: 2}) + Traceback (most recent call last): + ... + TypeError: Cannot specify both 'mapper' and any of 'index' or 'columns' + +You can still change the axis along which the first positional argument is applied by +supplying the ``axis`` keyword argument. + +.. ipython:: python + + df.rename({0: 1}) + df.rename({0: 1}, axis=1) + +If you would like to update both the index and column labels, be sure to use the respective +keywords. + +.. ipython:: python + + df.rename(index={0: 1}, columns={0: 2}) + +Extended verbose info output for :class:`~pandas.DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({"int_col": [1, 2, 3], + ... "text_col": ["a", "b", "c"], + ... "float_col": [0.0, 0.1, 0.2]}) + In [2]: df.info(verbose=True) + + RangeIndex: 3 entries, 0 to 2 + Data columns (total 3 columns): + int_col 3 non-null int64 + text_col 3 non-null object + float_col 3 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 152.0+ bytes + +*pandas 1.0.0* + +.. ipython:: python + + df = pd.DataFrame({"int_col": [1, 2, 3], + "text_col": ["a", "b", "c"], + "float_col": [0.0, 0.1, 0.2]}) + df.info(verbose=True) + +:meth:`pandas.array` inference changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`): + +1. String data (including missing values) now returns a :class:`arrays.StringArray`. +2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`. +3. Boolean data (including missing values) now returns the new :class:`arrays.BooleanArray` + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.array(["a", None]) + Out[1]: + + ['a', None] + Length: 2, dtype: object + + In [2]: pd.array([1, None]) + Out[2]: + + [1, None] + Length: 2, dtype: object + + +*pandas 1.0.0* + +.. ipython:: python + + pd.array(["a", None]) + pd.array([1, None]) + +As a reminder, you can specify the ``dtype`` to disable all inference. + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than +:attr:`numpy.nan` as its missing value marker (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: a = pd.array([1, 2, None], dtype="Int64") + In [2]: a + Out[2]: + + [1, 2, NaN] + Length: 3, dtype: Int64 + + In [3]: a[2] + Out[3]: + nan + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a + a[2] + +This has a few API-breaking consequences. + +**Converting to a NumPy ndarray** + +When converting to a NumPy array missing values will be ``pd.NA``, which cannot +be converted to a float. So calling ``np.asarray(integer_array, dtype="float")`` +will now raise. + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: np.asarray(a, dtype="float") + Out[1]: + array([ 1., 2., nan]) + +*pandas 1.0.0* + +.. ipython:: python + :okexcept: + + np.asarray(a, dtype="float") + +Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead. + +.. ipython:: python + + a.to_numpy(dtype="float", na_value=np.nan) + +**Reductions can return** ``pd.NA`` + +When performing a reduction such as a sum with ``skipna=False``, the result +will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values +(:issue:`30958`). + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Series(a).sum(skipna=False) + Out[1]: + nan + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series(a).sum(skipna=False) + +**value_counts returns a nullable integer dtype** + +:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable +integer dtype for the values. + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + Out[1]: + dtype('int64') + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + +See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA` +and :attr:`numpy.nan`. + +:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Comparison operations on a :class:`arrays.IntegerArray` now returns a +:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: a = pd.array([1, 2, None], dtype="Int64") + In [2]: a + Out[2]: + + [1, 2, NaN] + Length: 3, dtype: Int64 + + In [3]: a > 1 + Out[3]: + array([False, True, False]) + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a > 1 + +Note that missing values now propagate, rather than always comparing unequal +like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more. + +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + + +Default dtype of empty :class:`pandas.Series` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a ``DeprecationWarning`` now +(:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is +consistent with the behaviour of :class:`DataFrame` and :class:`Index`. + +*pandas 1.0.0* + +.. code-block:: ipython + + In [1]: pd.Series() + Out[2]: + DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. + Series([], dtype: float64) + +Result dtype inference changes for resample operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The rules for the result dtype in :meth:`DataFrame.resample` aggregations have changed for extension types (:issue:`31359`). +Previously, pandas would attempt to convert the result back to the original dtype, falling back to the usual +inference rules if that was not possible. Now, pandas will only return a result of the original dtype if the +scalar values in the result are instances of the extension dtype's scalar type. + +.. ipython:: python + + df = pd.DataFrame({"A": ['a', 'b']}, dtype='category', + index=pd.date_range('2000', periods=2)) + df + + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]> df.resample("2D").agg(lambda x: 'a').A.dtype + Out[1]: + CategoricalDtype(categories=['a', 'b'], ordered=False) + +*pandas 1.0.0* + +.. ipython:: python + + df.resample("2D").agg(lambda x: 'a').A.dtype + +This fixes an inconsistency between ``resample`` and ``groupby``. +This also fixes a potential bug, where the **values** of the result might change +depending on how the results are cast back to the original dtype. + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1] df.resample("2D").agg(lambda x: 'c') + Out[1]: + + A + 0 NaN + +*pandas 1.0.0* + +.. ipython:: python + + df.resample("2D").agg(lambda x: 'c') + + +.. _whatsnew_100.api_breaking.python: + +Increased minimum version for Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas 1.0.0 supports Python 3.6.1 and higher (:issue:`29212`). + +.. _whatsnew_100.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`). +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.13.3 | X | | ++-----------------+-----------------+----------+---------+ +| pytz | 2015.4 | X | | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.6.1 | X | | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.6.2 | | | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 4.0.2 | | | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.3.2 | X | ++-----------------+-----------------+---------+ +| gcsfs | 0.2.2 | | ++-----------------+-----------------+---------+ +| lxml | 3.8.0 | | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.2 | | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | X | ++-----------------+-----------------+---------+ +| openpyxl | 2.5.7 | X | ++-----------------+-----------------+---------+ +| pyarrow | 0.13.0 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.7.1 | | ++-----------------+-----------------+---------+ +| pytables | 3.4.2 | | ++-----------------+-----------------+---------+ +| s3fs | 0.3.0 | X | ++-----------------+-----------------+---------+ +| scipy | 0.19.0 | | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.1.4 | | ++-----------------+-----------------+---------+ +| xarray | 0.8.2 | | ++-----------------+-----------------+---------+ +| xlrd | 1.1.0 | | ++-----------------+-----------------+---------+ +| xlsxwriter | 0.9.8 | | ++-----------------+-----------------+---------+ +| xlwt | 1.2.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +Build changes +^^^^^^^^^^^^^ + +pandas has added a `pyproject.toml `_ file and will no longer include +cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :issue:`20775`). If you're installing +a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from +source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``. + + +.. _whatsnew_100.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`) +- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) +- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) +- In order to improve tab-completion, pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``). + To see which attributes are excluded, see an object's ``_deprecations`` attribute, for example ``pd.DataFrame._deprecations`` (:issue:`28805`). +- The returned dtype of :func:`unique` now matches the input dtype. (:issue:`27874`) +- Changed the default configuration value for ``options.matplotlib.register_converters`` from ``True`` to ``"auto"`` (:issue:`18720`). + Now, pandas custom formatters will only be applied to plots created by pandas, through :meth:`~DataFrame.plot`. + Previously, pandas' formatters would be applied to all plots created *after* a :meth:`~DataFrame.plot`. + See :ref:`units registration ` for more. +- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter. + Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) +- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) +- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). +- Added ```` to the list of default NA values for :meth:`read_csv` (:issue:`30821`) + +.. _whatsnew_100.api.documentation: + +Documentation improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Added new section on :ref:`scale` (:issue:`28315`). +- Added sub-section on :ref:`io.query_multi` for HDF5 datasets (:issue:`28791`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :meth:`Series.item` and :meth:`Index.item` have been _undeprecated_ (:issue:`29250`) +- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, + value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` + is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). +- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) +- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) +- :meth:`DateOffset.isAnchored` and :meth:`DatetOffset.onOffset` are deprecated and will be removed in a future version, use :meth:`DateOffset.is_anchored` and :meth:`DateOffset.is_on_offset` instead (:issue:`30340`) +- ``pandas.tseries.frequencies.get_offset`` is deprecated and will be removed in a future version, use ``pandas.tseries.frequencies.to_offset`` instead (:issue:`4205`) +- :meth:`Categorical.take_nd` and :meth:`CategoricalIndex.take_nd` are deprecated, use :meth:`Categorical.take` and :meth:`CategoricalIndex.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- The parameter ``label`` in :func:`lreshape` has been deprecated and will be removed in a future version (:issue:`29742`) +- ``pandas.core.index`` has been deprecated and will be removed in a future version, the public classes are available in the top-level namespace (:issue:`19711`) +- :func:`pandas.json_normalize` is now exposed in the top-level namespace. + Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and + it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). +- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). +- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) +- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). +- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) +- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) +- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) +- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) +- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`) +- :class:`~DataFrame.diff` will raise a ``TypeError`` rather than implicitly losing the dtype of extension types in the future. Convert to the correct dtype before calling ``diff`` instead (:issue:`31025`) + +**Selecting Columns from a Grouped DataFrame** + +When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, +a list of items should be used instead. (:issue:`23566`) For example: + +.. code-block:: ipython + + df = pd.DataFrame({ + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": np.random.randn(8), + "C": np.random.randn(8), + }) + g = df.groupby('A') + + # single key, returns SeriesGroupBy + g['B'] + + # tuple of single key, returns SeriesGroupBy + g[('B',)] + + # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning + g[('B', 'C')] + + # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning + # (implicitly converts the passed strings into a single tuple) + g['B', 'C'] + + # proper way, returns DataFrameGroupBy + g[['B', 'C']] + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Removed SparseSeries and SparseDataFrame** + +``SparseSeries``, ``SparseDataFrame`` and the ``DataFrame.to_sparse`` method +have been removed (:issue:`28425`). We recommend using a ``Series`` or +``DataFrame`` with sparse values instead. See :ref:`sparse.migration` for help +with migrating existing code. + +.. _whatsnew_100.matplotlib_units: + +**Matplotlib unit registration** + +Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`). +This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using +matplotlib directly rather than :meth:`~DataFrame.plot`. + +To use pandas formatters with a matplotlib plot, specify + +.. code-block:: ipython + + In [1]: import pandas as pd + In [2]: pd.options.plotting.matplotlib.register_converters = True + +Note that plots created by :meth:`DataFrame.plot` and :meth:`Series.plot` *do* register the converters +automatically. The only behavior change is when plotting a date-like object via ``matplotlib.pyplot.plot`` +or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. + +**Other removals** + +- Removed the previously deprecated keyword "index" from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`) +- Removed ``StataReader.data`` method, use :meth:`StataReader.read` instead (:issue:`9493`) +- Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`) +- ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`) +- :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`) +- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) +- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) +- :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) +- Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`) +- Removed ``Series.ix`` and ``DataFrame.ix`` (:issue:`26438`) +- Removed ``Index.summary`` (:issue:`18217`) +- Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`) +- Removed ``Series.get_value``, ``Series.set_value``, ``DataFrame.get_value``, ``DataFrame.set_value`` (:issue:`17739`) +- Removed ``Series.compound`` and ``DataFrame.compound`` (:issue:`26405`) +- Changed the default "inplace" argument in :meth:`DataFrame.set_index` and :meth:`Series.set_axis` from ``None`` to ``False`` (:issue:`27600`) +- Removed ``Series.cat.categorical``, ``Series.cat.index``, ``Series.cat.name`` (:issue:`24751`) +- Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) +- :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`) +- Removed the previously deprecated keyword "time_rule" from (non-public) ``offsets.generate_range``, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) +- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) +- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) +- Removed the previously deprecated keyword "join_axes" from :func:`concat`; use ``reindex_like`` on the result instead (:issue:`22318`) +- Removed the previously deprecated keyword "by" from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`18529`) +- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) +- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`) +- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) +- Removed ``Index.contains``, use ``key in index`` instead (:issue:`30103`) +- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`) +- Removed ``Series.ptp`` (:issue:`21614`) +- Removed ``Series.from_array`` (:issue:`18258`) +- Removed ``DataFrame.from_items`` (:issue:`18458`) +- Removed ``DataFrame.as_matrix``, ``Series.as_matrix`` (:issue:`18458`) +- Removed ``Series.asobject`` (:issue:`18477`) +- Removed ``DataFrame.as_blocks``, ``Series.as_blocks``, ``DataFrame.blocks``, ``Series.blocks`` (:issue:`17656`) +- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) +- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) +- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) +- Removed the previously deprecated keywords "start", "end", and "periods" from the :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors; use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) +- Removed the previously deprecated keyword "verify_integrity" from the :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors (:issue:`23919`) +- Removed the previously deprecated keyword "fastpath" from ``pandas.core.internals.blocks.make_block`` (:issue:`19265`) +- Removed the previously deprecated keyword "dtype" from :meth:`Block.make_block_same_class` (:issue:`19434`) +- Removed ``ExtensionArray._formatting_values``. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) +- Removed ``MultiIndex.to_hierarchical`` (:issue:`21613`) +- Removed ``MultiIndex.labels``, use :attr:`MultiIndex.codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`) +- Removed ``MultiIndex.set_labels``, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) +- Removed support for legacy HDF5 formats (:issue:`29787`) +- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) +- Removed the previously deprecated keyword "skip_footer" from :func:`read_excel`; use "skipfooter" instead (:issue:`18836`) +- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) +- Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`) +- Removed ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- Changed the default "keep_tz" argument in :meth:`DatetimeIndex.to_series` from ``None`` to ``True`` (:issue:`23739`) +- Removed ``api.types.is_period`` and ``api.types.is_datetimetz`` (:issue:`23917`) +- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) +- Removed ``pandas.tseries.plotting.tsplot`` (:issue:`18627`) +- Removed the previously deprecated keywords "reduce" and "broadcast" from :meth:`DataFrame.apply` (:issue:`18577`) +- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas._testing`` (:issue:`29174`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) +- Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) +- Removed ``Index.is_lexsorted_for_tuple`` (:issue:`29305`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`29608`) +- Removed ``Series.valid``; use :meth:`Series.dropna` instead (:issue:`18800`) +- Removed ``DataFrame.is_copy``, ``Series.is_copy`` (:issue:`18812`) +- Removed ``DataFrame.get_ftype_counts``, ``Series.get_ftype_counts`` (:issue:`18243`) +- Removed ``DataFrame.ftypes``, ``Series.ftypes``, ``Series.ftype`` (:issue:`26744`) +- Removed ``Index.get_duplicates``, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) +- Removed ``Series.clip_upper``, ``Series.clip_lower``, ``DataFrame.clip_upper``, ``DataFrame.clip_lower`` (:issue:`24203`) +- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`) +- Removed ``DatetimeIndex.offset`` (:issue:`20730`) +- Removed ``DatetimeIndex.asobject``, ``TimedeltaIndex.asobject``, ``PeriodIndex.asobject``, use ``astype(object)`` instead (:issue:`29801`) +- Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`) +- Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`) +- Changed the default "sort" argument in :func:`concat` from ``None`` to ``False`` (:issue:`20613`) +- Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) +- Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`) +- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) +- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) +- Removed ``Series.nonzero``, use ``to_numpy().nonzero()`` instead (:issue:`24048`) +- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) +- Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`) +- Removed ``Series.put`` (:issue:`27106`) +- Removed ``Series.real``, ``Series.imag`` (:issue:`27106`) +- Removed ``Series.to_dense``, ``DataFrame.to_dense`` (:issue:`26684`) +- Removed ``Index.dtype_str``, use ``str(index.dtype)`` instead (:issue:`27106`) +- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) +- The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) +- Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`) +- Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) +- Changed the default value for the ``raw`` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) +- Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) +- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) +- Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) +- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) +- Removed ``Timestamp.weekday_name``, ``DatetimeIndex.weekday_name``, and ``Series.dt.weekday_name`` (:issue:`18164`) +- Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) +- Changed the default "ordered" argument in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) +- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) +- Removed ``to_msgpack``, ``read_msgpack``, ``DataFrame.to_msgpack``, ``Series.to_msgpack`` (:issue:`27103`) +- Removed ``Series.compress`` (:issue:`21930`) +- Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`) +- Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "data" from :func:`parallel_coordinates`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "colors" from :func:`parallel_coordinates`, use "color" instead (:issue:`6956`) +- Removed the previously deprecated keywords "verbose" and "private_key" from :func:`read_gbq` (:issue:`30200`) +- Calling ``np.array`` and ``np.asarray`` on tz-aware :class:`Series` and :class:`DatetimeIndex` will now return an object array of tz-aware :class:`Timestamp` (:issue:`24596`) +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`) +- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) +- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) +- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) +- Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) +- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) +- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`) +- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`) +- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`) +- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`) +- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar. + The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) +- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) +- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- Added test to assert the :func:`fillna` raises the correct ``ValueError`` message when the value isn't a value from categories (:issue:`13628`) +- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) +- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`) +- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) +- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`) +- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) +- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) +- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the + same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a + :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) +- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`) +- The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`) + * :meth:`core.groupby.SeriesGroupBy.count` + * :meth:`core.groupby.SeriesGroupBy.size` + * :meth:`core.groupby.SeriesGroupBy.nunique` + * :meth:`core.groupby.SeriesGroupBy.nth` + + +Datetimelike +^^^^^^^^^^^^ +- Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) +- Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) +- Bug in :meth:`core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) +- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) +- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) +- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) +- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) +- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) +- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype ``'timedelta64[ns]'`` (:issue:`28049`) +- Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) +- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) +- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) +- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) +- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) +- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) +- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) +- Bug in :func:`pandas.to_datetime` failing for ``deques`` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) +- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) +- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) +- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`) +- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`) +- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`) +- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`) +- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`) +- Bug in :meth:`DatetimeIndex.insert` raising a ``ValueError`` instead of a ``TypeError`` when trying to insert a timezone-aware :class:`Timestamp` into a timezone-naive :class:`DatetimeIndex`, or vice-versa (:issue:`30806`) + +Timedelta +^^^^^^^^^ +- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) +- + +Timezones +^^^^^^^^^ + +- + + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) +- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) +- Bug in :class:`DataFrame` logical operations (``&``, ``|``, ``^``) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) +- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) +- Improved error message when using ``frac`` > 1 and ``replace`` = False (:issue:`27451`) +- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) +- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) +- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) +- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) +- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) +- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) +- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) +- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`) +- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`) + +Conversion +^^^^^^^^^^ + +- + +Strings +^^^^^^^ + +- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty ``Series`` would return an ``object`` dtype instead of ``bool`` (:issue:`29624`) +- + + +Interval +^^^^^^^^ + +- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) +- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`) +- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`) +- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`) + +Indexing +^^^^^^^^ + +- Bug in assignment using a reverse slicer (:issue:`26939`) +- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`) +- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a ``Period`` (:issue:`28323`) (:issue:`28337`) +- Fix assignment of column via ``.loc`` with numpy non-ns datetime type (:issue:`27395`) +- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) +- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) +- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) +- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`) +- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) +- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) +- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) +- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) +- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) + +Missing +^^^^^^^ + +- + +MultiIndex +^^^^^^^^^^ + +- Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`) +- Series and MultiIndex ``.drop`` with ``MultiIndex`` raise exception if labels not in given in level (:issue:`8594`) +- + +IO +^^ + +- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) +- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) +- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) +- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) +- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) +- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) +- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with ``engine='fastparquet'`` if the file did not already exist (:issue:`28326`) +- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) +- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) +- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) +- Bug in :meth:`DataFrame.read_excel` with ``engine='ods'`` when ``sheet_name`` argument references a non-existent sheet (:issue:`27676`) +- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`) +- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) +- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) +- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) +- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) +- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) +- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by ``record_path`` would raise a ``TypeError`` (:issue:`30148`) +- :func:`read_excel` now accepts binary data (:issue:`15914`) +- Bug in :meth:`read_csv` in which encoding handling was limited to just the string ``utf-16`` for the C engine (:issue:`24130`) + +Plotting +^^^^^^^^ + +- Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`) +- Bug in :meth:`DataFrame.plot` not able to plot when no rows (:issue:`27758`) +- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) +- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) +- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) +- Bug where :meth:`DataFrame.boxplot` would not accept a ``color`` parameter like :meth:`DataFrame.plot.box` (:issue:`26214`) +- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`) +- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`) +- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`). +- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`). +- Allow :meth:`DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`) +- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`). + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`) +- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) +- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty :class:`Series` or :class:`DataFrame` (:issue:`28427`) +- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`) +- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`). +- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`). +- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) +- Bug in :meth:`core.groupby.DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) +- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) +- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby`. Previously error will be raised if the same function is applied on the same column and now it is allowed if new assigned names are different. (:issue:`28426`) +- :meth:`core.groupby.SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`) +- Bug in :meth:`core.window.rolling.Rolling.quantile` ignoring ``interpolation`` keyword argument when used within a groupby (:issue:`28779`) +- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) +- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) +- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) +- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) + +Reshaping +^^^^^^^^^ + +- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`) +- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue:`28301`) +- Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) +- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) +- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) +- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) +- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). +- Better error message in :func:`get_dummies` when ``columns`` isn't a list-like value (:issue:`28383`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) +- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`) +- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) +- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) +- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) +- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) +- Improved error message and docstring in :func:`cut` and :func:`qcut` when ``labels=True`` (:issue:`13318`) +- Bug in missing ``fill_na`` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) + +Sparse +^^^^^^ +- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`) +- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`) +- Fixed :meth:`operator.xor` with a boolean-dtype ``SparseArray``. Now returns a sparse result, rather than object dtype (:issue:`31025`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). +- Bug where nullable integers could not be compared to strings (:issue:`28930`) +- Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) + +Other +^^^^^ +- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) +- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) +- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) +- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) +- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) +- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`) +- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`) +- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) +- Fix :class:`AbstractHolidayCalendar` to return correct results for + years after 2030 (now goes up to 2200) (:issue:`27790`) +- Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`) +- Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) +- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) +- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) +- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) +- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) +- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) +- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`) +- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.3..v1.0.0 diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst new file mode 100644 index 00000000..c42aab6d --- /dev/null +++ b/doc/source/whatsnew/v1.0.1.rst @@ -0,0 +1,79 @@ +.. _whatsnew_101: + +What's new in 1.0.1 (February 5, 2020) +-------------------------------------- + +These are the changes in pandas 1.0.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`) +- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containing a :class:`datetime.date` (:issue:`31501`) +- Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`) +- Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`) +- Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`) +- Fixed regression in ``.groupby()`` aggregations with categorical dtype using Cythonized reduction functions (e.g. ``first``) (:issue:`31450`) +- Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) +- Fixed regression in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) +- Fixed regression in :meth:`DataFrame.groupby` with an empty DataFrame grouping by a level of a MultiIndex (:issue:`31670`). +- Fixed regression in :meth:`DataFrame.apply` with object dtype and non-reducing function (:issue:`31505`) +- Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`) +- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) +- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) +- Fixed regression in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` when selecting a row containing a single ``datetime64`` or ``timedelta64`` column (:issue:`31649`) +- Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`) +- Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`) +- Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) +- Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`) +- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`) +- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- Support for negative integer for :attr:`pd.options.display.max_colwidth` is deprecated in favor of using ``None`` (:issue:`31532`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Datetimelike** + +- Fixed bug in :meth:`to_datetime` raising when ``cache=True`` and out-of-bound values are present (:issue:`31491`) + +**Numeric** + +- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) + and for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) + +**Plotting** + +- Plotting tz-aware timeseries no longer gives UserWarning (:issue:`31205`) + +**Interval** + +- Bug in :meth:`Series.shift` with ``interval`` dtype raising a ``TypeError`` when shifting an interval array of integers or datetimes (:issue:`34195`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.0..v1.0.1|HEAD diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst new file mode 100644 index 00000000..3f7c6e85 --- /dev/null +++ b/doc/source/whatsnew/v1.0.2.rst @@ -0,0 +1,125 @@ +.. _whatsnew_102: + +What's new in 1.0.2 (March 12, 2020) +------------------------------------ + +These are the changes in pandas 1.0.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_102.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +**Groupby** + +- Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with :class:`MultiIndex` columns and a custom function (:issue:`31777`) +- Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) +- Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) +- Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) + +**I/O** + +- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) +- Fixed regression in :meth:`DataFrame.to_excel` when the ``columns`` keyword argument is passed (:issue:`31677`) +- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) +- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). + +**Reindexing/alignment** + +- Fixed regression in :meth:`Series.align` when ``other`` is a :class:`DataFrame` and ``method`` is not ``None`` (:issue:`31785`) +- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) +- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) + +**Other** + +- Fixed regression in joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` to preserve ``freq`` in simple cases (:issue:`32166`) +- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) +- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) + + +.. --------------------------------------------------------------------------- + +Indexing with nullable boolean arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + mask = pd.array([True, True, False, None], dtype="boolean") + s + mask + +*pandas 1.0.0-1.0.1* + +.. code-block:: python + + >>> s[mask] + Traceback (most recent call last): + ... + ValueError: cannot mask with array containing NA / NaN values + +*pandas 1.0.2* + +.. ipython:: python + + s[mask] + +.. _whatsnew_102.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Datetimelike** + +- Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware ``datetime64`` dtype (:issue:`32490`) +- Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) +- Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`) + +**Categorical** + +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) +- Fixed bug where :meth:`Categorical` constructor would raise a ``TypeError`` when given a numpy array containing ``pd.NA``. (:issue:`31927`) +- Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) + +**I/O** + +- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`) +- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`) +- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). + +**Experimental dtypes** + +- Fixed bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). +- Fixed bug in :meth:`DataFrame.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) +- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) +- Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) +- Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) + +**Strings** + +- Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) + +**Rolling** + +- Fixed rolling operations with variable window (defined by time duration) on decreasing time index (:issue:`32385`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_102.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.1..v1.0.2 diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst new file mode 100644 index 00000000..62e6ae5b --- /dev/null +++ b/doc/source/whatsnew/v1.0.3.rst @@ -0,0 +1,29 @@ + +.. _whatsnew_103: + +What's new in 1.0.3 (March 17, 2020) +------------------------------------ + +These are the changes in pandas 1.0.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_103.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in ``resample.agg`` when the underlying data is non-writeable (:issue:`31710`) +- Fixed regression in :class:`DataFrame` exponentiation with reindexing (:issue:`32685`) + +.. _whatsnew_103.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.2..v1.0.3 diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst new file mode 100644 index 00000000..84b7e7d4 --- /dev/null +++ b/doc/source/whatsnew/v1.0.4.rst @@ -0,0 +1,48 @@ + +.. _whatsnew_104: + +What's new in 1.0.4 (May 28, 2020) +------------------------------------ + +These are the changes in pandas 1.0.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_104.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fix regression where :meth:`Series.isna` and :meth:`DataFrame.isna` would raise for categorical dtype when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33594`) +- Fix regression in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) +- Fix regression in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). +- Fix performance regression in ``memory_usage(deep=True)`` for object dtype (:issue:`33012`) +- Fix regression where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) +- Fix regression where an ordered :class:`Categorical` containing only ``NaN`` values would raise rather than returning ``NaN`` when taking the minimum or maximum (:issue:`33450`) +- Fix regression in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) +- Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 `_, :issue:`32905`). +- Fix regression in :meth:`DataFrame.describe` raising ``TypeError: unhashable type: 'dict'`` (:issue:`32409`) +- Fix regression in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) +- Fix regression in :meth:`Series.groupby` would raise ``ValueError`` when grouping by :class:`PeriodIndex` level (:issue:`34010`) +- Fix regression in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`) +- Fix regression in error message with ``np.min`` or ``np.max`` on unordered :class:`Categorical` (:issue:`33115`) +- Fix regression in :meth:`DataFrame.loc` and :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) + +.. _whatsnew_104.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` returning floats when applied to nullable Booleans (:issue:`33071`) +- Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`) +- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) +- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) +- Bug in :meth:`read_parquet` was raising a ``FileNotFoundError`` when passed an s3 directory path. (:issue:`26388`) +- Bug in :meth:`~DataFrame.to_parquet` was throwing an ``AttributeError`` when writing a partitioned parquet file to s3 (:issue:`27596`) +- Bug in :meth:`GroupBy.quantile` causes the quantiles to be shifted when the ``by`` axis contains ``NaN`` (:issue:`33200`, :issue:`33569`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.3..v1.0.4 diff --git a/doc/source/whatsnew/v1.0.5.rst b/doc/source/whatsnew/v1.0.5.rst new file mode 100644 index 00000000..9a5128a0 --- /dev/null +++ b/doc/source/whatsnew/v1.0.5.rst @@ -0,0 +1,39 @@ + +.. _whatsnew_105: + +What's new in 1.0.5 (June 17, 2020) +----------------------------------- + +These are the changes in pandas 1.0.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_105.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fix regression in :meth:`read_parquet` when reading from file-like objects + (:issue:`34467`). +- Fix regression in reading from public S3 buckets (:issue:`34626`). + +Note this disables the ability to read Parquet files from directories on S3 +again (:issue:`26388`, :issue:`34632`), which was added in the 1.0.4 release, +but is now targeted for pandas 1.1.0. + +- Fixed regression in :meth:`~DataFrame.replace` raising an ``AssertionError`` when replacing values in an extension dtype with values of a different dtype (:issue:`34530`) + +.. _whatsnew_105.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Fixed building from source with Python 3.8 fetching the wrong version of NumPy (:issue:`34666`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.4..v1.0.5|HEAD diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst new file mode 100644 index 00000000..e1f54c43 --- /dev/null +++ b/doc/source/whatsnew/v1.1.0.rst @@ -0,0 +1,1224 @@ +.. _whatsnew_110: + +What's new in 1.1.0 (July 28, 2020) +----------------------------------- + +These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_110.specify_missing_labels: + +KeyErrors raised by loc specify missing labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, if labels were missing for a ``.loc`` call, a KeyError was raised stating that this was no longer supported. + +Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See :issue:`34272`. + + +.. _whatsnew_110.astype_string: + +All dtypes can now be converted to ``StringDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, declaring or converting to :class:`StringDtype` was in general only possible if the data was already only ``str`` or nan-like (:issue:`31204`). +:class:`StringDtype` now works in all situations where ``astype(str)`` or ``dtype=str`` work: + +For example, the below now works: + +.. ipython:: python + + ser = pd.Series([1, "abc", np.nan], dtype="string") + ser + ser[0] + pd.Series([1, 2, np.nan], dtype="Int64").astype("string") + + +.. _whatsnew_110.period_index_partial_string_slicing: + +Non-monotonic PeriodIndex partial string slicing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`PeriodIndex` now supports partial string slicing for non-monotonic indexes, mirroring :class:`DatetimeIndex` behavior (:issue:`31096`) + +For example: + +.. ipython:: python + + dti = pd.date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + ser_monotonic = pd.Series(np.arange(30), index=pi) + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_monotonic[shuffler] + ser + +.. ipython:: python + + ser["2014"] + ser.loc["May 2015"] + + +.. _whatsnew_110.dataframe_or_series_comparing: + +Comparing two ``DataFrame`` or two ``Series`` and summarizing the differences +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two ``DataFrame`` or two ``Series`` (:issue:`30429`) + +.. ipython:: python + + df = pd.DataFrame( + { + "col1": ["a", "a", "b", "b", "a"], + "col2": [1.0, 2.0, 3.0, np.nan, 5.0], + "col3": [1.0, 2.0, 3.0, 4.0, 5.0] + }, + columns=["col1", "col2", "col3"], + ) + df + +.. ipython:: python + + df2 = df.copy() + df2.loc[0, 'col1'] = 'c' + df2.loc[2, 'col3'] = 4.0 + df2 + +.. ipython:: python + + df.compare(df2) + +See :ref:`User Guide ` for more details. + + +.. _whatsnew_110.groupby_key: + +Allow NA in groupby key +^^^^^^^^^^^^^^^^^^^^^^^^ + +With :ref:`groupby ` , we've added a ``dropna`` keyword to :meth:`DataFrame.groupby` and :meth:`Series.groupby` in order to +allow ``NA`` values in group keys. Users can define ``dropna`` to ``False`` if they want to include +``NA`` values in groupby keys. The default is set to ``True`` for ``dropna`` to keep backwards +compatibility (:issue:`3729`) + +.. ipython:: python + + df_list = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] + df_dropna = pd.DataFrame(df_list, columns=["a", "b", "c"]) + + df_dropna + +.. ipython:: python + + # Default ``dropna`` is set to True, which will exclude NaNs in keys + df_dropna.groupby(by=["b"], dropna=True).sum() + + # In order to allow NaN in keys, set ``dropna`` to False + df_dropna.groupby(by=["b"], dropna=False).sum() + +The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. + + +.. _whatsnew_110.key_sorting: + +Sorting with keys +^^^^^^^^^^^^^^^^^ + +We've added a ``key`` argument to the :class:`DataFrame` and :class:`Series` sorting methods, including +:meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_values`, +and :meth:`Series.sort_index`. The ``key`` can be any callable function which is applied +column-by-column to each column used for sorting, before sorting is performed (:issue:`27237`). +See :ref:`sort_values with keys ` and :ref:`sort_index with keys +` for more information. + +.. ipython:: python + + s = pd.Series(['C', 'a', 'B']) + s + +.. ipython:: python + + s.sort_values() + + +Note how this is sorted with capital letters first. If we apply the :meth:`Series.str.lower` +method, we get + +.. ipython:: python + + s.sort_values(key=lambda x: x.str.lower()) + + +When applied to a ``DataFrame``, they key is applied per-column to all columns or a subset if +``by`` is specified, e.g. + +.. ipython:: python + + df = pd.DataFrame({'a': ['C', 'C', 'a', 'a', 'B', 'B'], + 'b': [1, 2, 3, 4, 5, 6]}) + df + +.. ipython:: python + + df.sort_values(by=['a'], key=lambda col: col.str.lower()) + + +For more details, see examples and documentation in :meth:`DataFrame.sort_values`, +:meth:`Series.sort_values`, and :meth:`~DataFrame.sort_index`. + +.. _whatsnew_110.timestamp_fold_support: + +Fold argument support in Timestamp constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 `_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold. + +For example: + +.. ipython:: python + + ts = pd.Timestamp("2019-10-27 01:30:00+00:00") + ts.fold + +.. ipython:: python + + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz="dateutil/Europe/London", fold=1) + ts + +For more on working with fold, see :ref:`Fold subsection ` in the user guide. + +.. _whatsnew_110.to_datetime_multiple_tzname_tzoffset_support: + +Parsing timezone-aware format with different timezones in to_datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` now supports parsing formats containing timezone names (``%Z``) and UTC offsets (``%z``) from different timezones then converting them to UTC by setting ``utc=True``. This would return a :class:`DatetimeIndex` with timezone at UTC as opposed to an :class:`Index` with ``object`` dtype if ``utc=True`` is not set (:issue:`32792`). + +For example: + +.. ipython:: python + + tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100", + "2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"] + pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True) + pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + +.. _whatsnew_110.grouper_resample_origin: + +Grouper and resample now supports the arguments origin and offset +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Grouper` and :meth:`DataFrame.resample` now supports the arguments ``origin`` and ``offset``. It let the user control the timestamp on which to adjust the grouping. (:issue:`31809`) + +The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like ``30D``) or that divides a day (like ``90s`` or ``1min``). But it can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can now specify a fixed timestamp with the argument ``origin``. + +Two arguments are now deprecated (more information in the documentation of :meth:`DataFrame.resample`): + +- ``base`` should be replaced by ``offset``. +- ``loffset`` should be replaced by directly adding an offset to the index :class:`DataFrame` after being resampled. + +Small example of the use of ``origin``: + +.. ipython:: python + + start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' + middle = '2000-10-02 00:00:00' + rng = pd.date_range(start, end, freq='7min') + ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + ts + +Resample with the default behavior ``'start_day'`` (origin is ``2000-10-01 00:00:00``): + +.. ipython:: python + + ts.resample('17min').sum() + ts.resample('17min', origin='start_day').sum() + +Resample using a fixed origin: + +.. ipython:: python + + ts.resample('17min', origin='epoch').sum() + ts.resample('17min', origin='2000-01-01').sum() + +If needed you can adjust the bins with the argument ``offset`` (a :class:`Timedelta`) that would be added to the default ``origin``. + +For a full example, see: :ref:`timeseries.adjust-the-start-of-the-bins`. + +fsspec now used for filesystem handling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For reading and writing to filesystems other than local and reading from HTTP(S), +the optional dependency ``fsspec`` will be used to dispatch operations (:issue:`33452`). +This will give unchanged +functionality for S3 and GCS storage, which were already supported, but also add +support for several other storage implementations such as `Azure Datalake and Blob`_, +SSH, FTP, dropbox and github. For docs and capabilities, see the `fsspec docs`_. + +The existing capability to interface with S3 and GCS will be unaffected by this +change, as ``fsspec`` will still bring in the same packages as before. + +.. _Azure Datalake and Blob: https://github.com/fsspec/adlfs + +.. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/ + +.. _whatsnew_110.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Compatibility with matplotlib 3.3.0 (:issue:`34850`) +- :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:`32538`) +- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`) +- Added :class:`pandas.errors.InvalidIndexError` (:issue:`34570`). +- Added :meth:`DataFrame.value_counts` (:issue:`5377`) +- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. +- Added a :func:`pandas.api.indexers.VariableOffsetWindowIndexer` class to support ``rolling`` operations with non-fixed offsets (:issue:`34994`) +- :meth:`~DataFrame.describe` now includes a ``datetime_is_numeric`` keyword to control how datetime columns are summarized (:issue:`30164`, :issue:`34798`) +- :class:`~pandas.io.formats.style.Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) +- :meth:`~pandas.io.formats.style.Styler.highlight_null` now accepts ``subset`` argument (:issue:`31345`) +- When writing directly to a sqlite connection :meth:`DataFrame.to_sql` now supports the ``multi`` method (:issue:`29921`) +- :class:`pandas.errors.OptionError` is now exposed in ``pandas.errors`` (:issue:`27553`) +- Added :meth:`api.extensions.ExtensionArray.argmax` and :meth:`api.extensions.ExtensionArray.argmin` (:issue:`24382`) +- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) +- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) +- :class:`Series.str` now has a ``fullmatch`` method that matches a regular expression against the entire string in each row of the :class:`Series`, similar to ``re.fullmatch`` (:issue:`32806`). +- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) +- :meth:`Index.union` will now raise ``RuntimeWarning`` for :class:`MultiIndex` objects if the object inside are unsortable. Pass ``sort=False`` to suppress this warning (:issue:`33015`) +- Added :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`, :issue:`34392`). +- The :meth:`DataFrame.to_feather` method now supports additional keyword + arguments (e.g. to set the compression) that are added in pyarrow 0.17 + (:issue:`33422`). +- The :func:`cut` will now accept parameter ``ordered`` with default ``ordered=True``. If ``ordered=False`` and no labels are provided, an error will be raised (:issue:`33141`) +- :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, + and :meth:`DataFrame.to_json` now support passing a dict of + compression arguments when using the ``gzip`` and ``bz2`` protocols. + This can be used to set a custom compression level, e.g., + ``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}`` + (:issue:`33196`) +- :meth:`melt` has gained an ``ignore_index`` (default ``True``) argument that, if set to ``False``, prevents the method from dropping the index (:issue:`17440`). +- :meth:`Series.update` now accepts objects that can be coerced to a :class:`Series`, + such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`) +- :meth:`~pandas.core.groupby.DataFrameGroupBy.transform` and :meth:`~pandas.core.groupby.DataFrameGroupBy.aggregate` have gained ``engine`` and ``engine_kwargs`` arguments that support executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`) +- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) +- :class:`~pandas.core.groupby.DataFrameGroupBy` and :class:`~pandas.core.groupby.SeriesGroupBy` now implement the ``sample`` method for doing random sampling within groups (:issue:`31775`) +- :meth:`DataFrame.to_numpy` now supports the ``na_value`` keyword to control the NA sentinel in the output array (:issue:`33820`) +- Added :class:`api.extension.ExtensionArray.equals` to the extension array interface, similar to :meth:`Series.equals` (:issue:`27081`) +- The minimum supported dta version has increased to 105 in :func:`read_stata` and :class:`~pandas.io.stata.StataReader` (:issue:`26667`). +- :meth:`~DataFrame.to_stata` supports compression using the ``compression`` + keyword argument. Compression can either be inferred or explicitly set using a string or a + dictionary containing both the method and any additional arguments that are passed to the + compression library. Compression was also added to the low-level Stata-file writers + :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, + and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). +- :meth:`HDFStore.put` now accepts a ``track_times`` parameter. This parameter is passed to the ``create_table`` method of ``PyTables`` (:issue:`32682`). +- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts ``xlabel`` and ``ylabel`` parameters to present labels on x and y axis (:issue:`9093`). +- Made :class:`pandas.core.window.rolling.Rolling` and :class:`pandas.core.window.expanding.Expanding` iterable(:issue:`11704`) +- Made ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`). +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`) +- :meth:`~pandas.core.groupby.DataFrameGroupBy.groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`). +- :func:`read_json` now accepts an ``nrows`` parameter. (:issue:`33916`). +- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`) +- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example + combining a nullable integer column with a numpy integer column will no longer + result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`). +- :func:`read_gbq` now allows to disable progress bar (:issue:`33360`). +- :func:`read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). +- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ``ddof`` to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`). +- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`). +- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`) +- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similar to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`). +- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` now accept ``index`` argument as an alias for tabulate's ``showindex`` (:issue:`32667`) +- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable Boolean dtype (:issue:`34859`) +- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`) +- :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`) +- ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`) +- :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for ``yerr`` and/or ``xerr``, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +``MultiIndex.get_indexer`` interprets ``method`` argument correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This restores the behavior of :meth:`MultiIndex.get_indexer` with ``method='backfill'`` or ``method='pad'`` to the behavior before pandas 0.23.0. In particular, MultiIndexes are treated as a list of tuples and padding or backfilling is done with respect to the ordering of these lists of tuples (:issue:`29896`). + +As an example of this, given: + +.. ipython:: python + + df = pd.DataFrame({ + 'a': [0, 0, 0, 0], + 'b': [0, 2, 3, 4], + 'c': ['A', 'B', 'C', 'D'], + }).set_index(['a', 'b']) + mi_2 = pd.MultiIndex.from_product([[0], [-1, 0, 1, 3, 4, 5]]) + +The differences in reindexing ``df`` with ``mi_2`` and using ``method='backfill'`` can be seen here: + +*pandas >= 0.23, < 1.1.0*: + +.. code-block:: ipython + + In [1]: df.reindex(mi_2, method='backfill') + Out[1]: + c + 0 -1 A + 0 A + 1 D + 3 A + 4 A + 5 C + +*pandas <0.23, >= 1.1.0* + +.. ipython:: python + + df.reindex(mi_2, method='backfill') + +And the differences in reindexing ``df`` with ``mi_2`` and using ``method='pad'`` can be seen here: + +*pandas >= 0.23, < 1.1.0* + +.. code-block:: ipython + + In [1]: df.reindex(mi_2, method='pad') + Out[1]: + c + 0 -1 NaN + 0 NaN + 1 D + 3 NaN + 4 A + 5 C + +*pandas < 0.23, >= 1.1.0* + +.. ipython:: python + + df.reindex(mi_2, method='pad') + +.. _whatsnew_110.notable_bug_fixes.indexing_raises_key_errors: + +Failed label-based lookups always raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Label lookups ``series[key]``, ``series.loc[key]`` and ``frame.loc[key]`` +used to raise either ``KeyError`` or ``TypeError`` depending on the type of +key and type of :class:`Index`. These now consistently raise ``KeyError`` (:issue:`31867`) + +.. ipython:: python + + ser1 = pd.Series(range(3), index=[0, 1, 2]) + ser2 = pd.Series(range(3), index=pd.date_range("2020-02-01", periods=3)) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + TypeError: cannot do label indexing on DatetimeIndex with these indexers [1] of type int + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') + +*New behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + KeyError: 1.5 + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + KeyError: 1.5 + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + KeyError: 1 + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') + + +Similarly, :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) + +.. _whatsnew_110.notable_bug_fixes.indexing_int_multiindex_raises_key_errors: + +Failed Integer Lookups on MultiIndex Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Indexing with integers with a :class:`MultiIndex` that has an integer-dtype +first level incorrectly failed to raise ``KeyError`` when one or more of +those integer keys is not present in the first level of the index (:issue:`33539`) + +.. ipython:: python + + idx = pd.Index(range(4)) + dti = pd.date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = pd.Series(range(len(mi)), index=mi) + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + Out[5]: Series([], dtype: int64) + +*New behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + ... + KeyError: '[5] not in index' + +:meth:`DataFrame.merge` preserves right frame's row order +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:meth:`DataFrame.merge` now preserves the right frame's row order when executing a right merge (:issue:`27453`) + +.. ipython:: python + + left_df = pd.DataFrame({'animal': ['dog', 'pig'], + 'max_speed': [40, 11]}) + right_df = pd.DataFrame({'animal': ['quetzal', 'pig'], + 'max_speed': [80, 11]}) + left_df + right_df + +*Previous behavior*: + +.. code-block:: python + + >>> left_df.merge(right_df, on=['animal', 'max_speed'], how="right") + animal max_speed + 0 pig 11 + 1 quetzal 80 + +*New behavior*: + +.. ipython:: python + + left_df.merge(right_df, on=['animal', 'max_speed'], how="right") + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.notable_bug_fixes.assignment_to_multiple_columns: + +Assignment to multiple columns of a DataFrame when some columns do not exist +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns will be constructed with the right values. (:issue:`13658`) + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df[['a', 'c']] = 1 + In [4]: df + Out[4]: + a b + 0 1 1 + 1 1 1 + 2 1 1 + +*New behavior*: + +.. ipython:: python + + df[['a', 'c']] = 1 + df + +.. _whatsnew_110.notable_bug_fixes.groupby_consistency: + +Consistency across groupby reductions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Using :meth:`DataFrame.groupby` with ``as_index=True`` and the aggregation ``nunique`` would include the grouping column(s) in the columns of the result. Now the grouping column(s) only appear in the index, consistent with other reductions. (:issue:`32579`) + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "x", "y", "y"], "b": [1, 1, 2, 3]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df.groupby("a", as_index=True).nunique() + Out[4]: + a b + a + x 1 1 + y 1 2 + +*New behavior*: + +.. ipython:: python + + df.groupby("a", as_index=True).nunique() + +Using :meth:`DataFrame.groupby` with ``as_index=False`` and the function ``idxmax``, ``idxmin``, ``mad``, ``nunique``, ``sem``, ``skew``, or ``std`` would modify the grouping column. Now the grouping column remains unchanged, consistent with other reductions. (:issue:`21090`, :issue:`10355`) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df.groupby("a", as_index=False).nunique() + Out[4]: + a b + 0 1 1 + 1 1 2 + +*New behavior*: + +.. ipython:: python + + df.groupby("a", as_index=False).nunique() + +The method :meth:`~pandas.core.groupby.DataFrameGroupBy.size` would previously ignore ``as_index=False``. Now the grouping columns are returned as columns, making the result a :class:`DataFrame` instead of a :class:`Series`. (:issue:`32599`) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df.groupby("a", as_index=False).size() + Out[4]: + a + x 2 + y 2 + dtype: int64 + +*New behavior*: + +.. ipython:: python + + df.groupby("a", as_index=False).size() + +.. _whatsnew_110.api_breaking.groupby_results_lost_as_index_false: + +:meth:`~pandas.core.groupby.DataFrameGroupby.agg` lost results with ``as_index=False`` when relabeling columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously :meth:`~pandas.core.groupby.DataFrameGroupby.agg` lost the result columns, when the ``as_index`` option was +set to ``False`` and the result columns were relabeled. In this case the result values were replaced with +the previous index (:issue:`32240`). + +.. ipython:: python + + df = pd.DataFrame({"key": ["x", "y", "z", "x", "y", "z"], + "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: grouped = df.groupby("key", as_index=False) + In [3]: result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + In [4]: result + Out[4]: + min_val + 0 x + 1 y + 2 z + +*New behavior*: + +.. ipython:: python + + grouped = df.groupby("key", as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + result + + +.. _whatsnew_110.notable_bug_fixes.apply_applymap_first_once: + +apply and applymap on ``DataFrame`` evaluates first row/column only once +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 6]}) + + def func(row): + print(row) + return row + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: df.apply(func, axis=1) + a 1 + b 3 + Name: 0, dtype: int64 + a 1 + b 3 + Name: 0, dtype: int64 + a 2 + b 6 + Name: 1, dtype: int64 + Out[4]: + a b + 0 1 3 + 1 2 6 + +*New behavior*: + +.. ipython:: python + + df.apply(func, axis=1) + +.. _whatsnew_110.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_110.api_breaking.testing.check_freq: + +Added ``check_freq`` argument to ``testing.assert_frame_equal`` and ``testing.assert_series_equal`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``check_freq`` argument was added to :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal` in pandas 1.1.0 and defaults to ``True``. :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal` now raise ``AssertionError`` if the indexes do not have the same frequency. Before pandas 1.1.0, the index frequency was not checked. + + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some minimum supported versions of dependencies were updated (:issue:`33718`, :issue:`29766`, :issue:`29723`, pytables >= 3.4.3). +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.15.4 | X | X | ++-----------------+-----------------+----------+---------+ +| pytz | 2015.4 | X | | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.7.3 | X | X | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.6.2 | | | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 4.0.2 | | | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.3.2 | | ++-----------------+-----------------+---------+ +| fsspec | 0.7.4 | | ++-----------------+-----------------+---------+ +| gcsfs | 0.6.0 | X | ++-----------------+-----------------+---------+ +| lxml | 3.8.0 | | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.2 | | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | | ++-----------------+-----------------+---------+ +| openpyxl | 2.5.7 | | ++-----------------+-----------------+---------+ +| pyarrow | 0.13.0 | | ++-----------------+-----------------+---------+ +| pymysql | 0.7.1 | | ++-----------------+-----------------+---------+ +| pytables | 3.4.3 | X | ++-----------------+-----------------+---------+ +| s3fs | 0.4.0 | X | ++-----------------+-----------------+---------+ +| scipy | 1.2.0 | X | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.1.4 | | ++-----------------+-----------------+---------+ +| xarray | 0.8.2 | | ++-----------------+-----------------+---------+ +| xlrd | 1.1.0 | | ++-----------------+-----------------+---------+ +| xlsxwriter | 0.9.8 | | ++-----------------+-----------------+---------+ +| xlwt | 1.2.0 | | ++-----------------+-----------------+---------+ +| pandas-gbq | 1.2.0 | X | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +Development changes +^^^^^^^^^^^^^^^^^^^ + +- The minimum version of Cython is now the most recent bug-fix version (0.29.16) (:issue:`33334`). + + +.. _whatsnew_110.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated and will raise in a future version. Either convert the list to a tuple, or pass the slice directly instead (:issue:`31333`) + +- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include ``datetime64`` and ``datetime64tz`` columns in a future version (:issue:`29941`) +- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) +- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` and will raise in a future version (:issue:`32515`) +- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`) +- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`) +- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`). +- Indexing a :class:`Series` with a multi-dimensional indexer like ``[:, None]`` to return an ``ndarray`` now raises a ``FutureWarning``. Convert to a NumPy array before indexing instead (:issue:`27837`) +- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`) + +- Passing any arguments but the first one to :func:`read_html` as + positional arguments is deprecated. All other + arguments should be given as keyword arguments (:issue:`27573`). + +- Passing any arguments but ``path_or_buf`` (the first one) to + :func:`read_json` as positional arguments is deprecated. All + other arguments should be given as keyword arguments (:issue:`27573`). + +- Passing any arguments but the first two to :func:`read_excel` as + positional arguments is deprecated. All other + arguments should be given as keyword arguments (:issue:`27573`). + +- :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) +- :meth:`Index.get_value` is deprecated and will be removed in a future version (:issue:`19728`) +- :meth:`Series.dt.week` and :meth:`Series.dt.weekofyear` are deprecated and will be removed in a future version, use :meth:`Series.dt.isocalendar().week` instead (:issue:`33595`) +- :meth:`DatetimeIndex.week` and ``DatetimeIndex.weekofyear`` are deprecated and will be removed in a future version, use ``DatetimeIndex.isocalendar().week`` instead (:issue:`33595`) +- :meth:`DatetimeArray.week` and ``DatetimeArray.weekofyear`` are deprecated and will be removed in a future version, use ``DatetimeArray.isocalendar().week`` instead (:issue:`33595`) +- :meth:`DateOffset.__call__` is deprecated and will be removed in a future version, use ``offset + other`` instead (:issue:`34171`) +- :meth:`~pandas.tseries.offsets.BusinessDay.apply_index` is deprecated and will be removed in a future version. Use ``offset + other`` instead (:issue:`34580`) +- :meth:`DataFrame.tshift` and :meth:`Series.tshift` are deprecated and will be removed in a future version, use :meth:`DataFrame.shift` and :meth:`Series.shift` instead (:issue:`11631`) +- Indexing an :class:`Index` object with a float key is deprecated, and will + raise an ``IndexError`` in the future. You can manually convert to an integer key + instead (:issue:`34191`). +- The ``squeeze`` keyword in :meth:`~DataFrame.groupby` is deprecated and will be removed in a future version (:issue:`32380`) +- The ``tz`` keyword in :meth:`Period.to_timestamp` is deprecated and will be removed in a future version; use ``per.to_timestamp(...).tz_localize(tz)`` instead (:issue:`34522`) +- :meth:`DatetimeIndex.to_perioddelta` is deprecated and will be removed in a future version. Use ``index - index.to_period(freq).to_timestamp()`` instead (:issue:`34853`) +- :meth:`DataFrame.melt` accepting a ``value_name`` that already exists is deprecated, and will be removed in a future version (:issue:`34731`) +- The ``center`` keyword in the :meth:`DataFrame.expanding` function is deprecated and will be removed in a future version (:issue:`20647`) + + + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_110.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) +- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) +- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) +- Performance improvement in arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=1`` (:issue:`33600`) +- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, + avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of + existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`) +- Significant performance improvement when creating a :class:`DataFrame` with + sparse values from ``scipy.sparse`` matrices using the + :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`, + :issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`). +- Performance improvement for groupby methods :meth:`~pandas.core.groupby.groupby.Groupby.first` + and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`) +- Performance improvement in :func:`factorize` for nullable (integer and Boolean) dtypes (:issue:`33064`). +- Performance improvement when constructing :class:`Categorical` objects (:issue:`33921`) +- Fixed performance regression in :func:`pandas.qcut` and :func:`pandas.cut` (:issue:`33921`) +- Performance improvement in reductions (``sum``, ``prod``, ``min``, ``max``) for nullable (integer and Boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`). +- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`) +- Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`) +- Performance improvement in arithmetic operations (``sub``, ``add``, ``mul``, ``div``) for :class:`MultiIndex` (:issue:`34297`) +- Performance improvement in ``DataFrame[bool_indexer]`` when ``bool_indexer`` is a ``list`` (:issue:`33924`) +- Significant performance improvement of :meth:`io.formats.style.Styler.render` with styles added with various ways such as :meth:`io.formats.style.Styler.apply`, :meth:`io.formats.style.Styler.applymap` or :meth:`io.formats.style.Styler.bar` (:issue:`19917`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- Passing an invalid ``fill_value`` to :meth:`Categorical.take` raises a ``ValueError`` instead of ``TypeError`` (:issue:`33660`) +- Combining a :class:`Categorical` with integer categories and which contains missing values with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append` will now result in a float column instead of an object dtype column (:issue:`33607`) +- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`) +- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) +- Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) +- :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`) +- Repr of :class:`Categorical` was not distinguishing between ``int`` and ``str`` (:issue:`33676`) + +Datetimelike +^^^^^^^^^^^^ + +- Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`) +- :meth:`Series.to_timestamp` now raises a ``TypeError`` if the axis is not a :class:`PeriodIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`) +- :meth:`Series.to_period` now raises a ``TypeError`` if the axis is not a :class:`DatetimeIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`) +- :class:`Period` no longer accepts tuples for the ``freq`` argument (:issue:`34658`) +- Bug in :class:`Timestamp` where constructing a :class:`Timestamp` from ambiguous epoch time and calling constructor again changed the :meth:`Timestamp.value` property (:issue:`24329`) +- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) +- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) +- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) +- :class:`Timestamp` raised a confusing error message when year, month or day is missing (:issue:`31200`) +- Bug in :class:`DatetimeIndex` constructor incorrectly accepting ``bool``-dtype inputs (:issue:`32668`) +- Bug in :meth:`DatetimeIndex.searchsorted` not accepting a ``list`` or :class:`Series` as its argument (:issue:`32762`) +- Bug where :meth:`PeriodIndex` raised when passed a :class:`Series` of strings (:issue:`26109`) +- Bug in :class:`Timestamp` arithmetic when adding or subtracting an ``np.ndarray`` with ``timedelta64`` dtype (:issue:`33296`) +- Bug in :meth:`DatetimeIndex.to_period` not inferring the frequency when called with no arguments (:issue:`33358`) +- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original ``freq`` is no longer valid (:issue:`30511`) +- Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`) +- Bug in :meth:`DatetimeIndex.get_indexer` where incorrect output would be returned for mixed datetime-like targets (:issue:`33741`) +- Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`) +- Bug in :class:`DatetimeIndex` where setting the ``freq`` attribute on an index could silently change the ``freq`` attribute on another index viewing the same data (:issue:`33552`) +- :meth:`DataFrame.min` and :meth:`DataFrame.max` were not returning consistent results with :meth:`Series.min` and :meth:`Series.max` when called on objects initialized with empty :func:`pd.to_datetime` +- Bug in :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` with results not having the correct ``name`` attribute (:issue:`33904`) +- Bug in :meth:`DatetimeArray.__setitem__`, :meth:`TimedeltaArray.__setitem__`, :meth:`PeriodArray.__setitem__` incorrectly allowing values with ``int64`` dtype to be silently cast (:issue:`33717`) +- Bug in subtracting :class:`TimedeltaIndex` from :class:`Period` incorrectly raising ``TypeError`` in some cases where it should succeed and ``IncompatibleFrequency`` in some cases where it should raise ``TypeError`` (:issue:`33883`) +- Bug in constructing a :class:`Series` or :class:`Index` from a read-only NumPy array with non-ns + resolution which converted to object dtype instead of coercing to ``datetime64[ns]`` + dtype when within the timestamp bounds (:issue:`34843`). +- The ``freq`` keyword in :class:`Period`, :func:`date_range`, :func:`period_range`, :func:`pd.tseries.frequencies.to_offset` no longer allows tuples, pass as string instead (:issue:`34703`) +- Bug in :meth:`DataFrame.append` when appending a :class:`Series` containing a scalar tz-aware :class:`Timestamp` to an empty :class:`DataFrame` resulted in an object column instead of ``datetime64[ns, tz]`` dtype (:issue:`35038`) +- ``OutOfBoundsDatetime`` issues an improved error message when timestamp is out of implementation bounds. (:issue:`32967`) +- Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`) +- Bug in :class:`Tick` comparisons raising ``TypeError`` when comparing against timedelta-like objects (:issue:`34088`) +- Bug in :class:`Tick` multiplication raising ``TypeError`` when multiplying by a float (:issue:`34486`) + +Timedelta +^^^^^^^^^ + +- Bug in constructing a :class:`Timedelta` with a high precision integer that would round the :class:`Timedelta` components (:issue:`31354`) +- Bug in dividing ``np.nan`` or ``None`` by :class:`Timedelta` incorrectly returning ``NaT`` (:issue:`31869`) +- :class:`Timedelta` now understands ``µs`` as an identifier for microsecond (:issue:`32899`) +- :class:`Timedelta` string representation now includes nanoseconds, when nanoseconds are non-zero (:issue:`9309`) +- Bug in comparing a :class:`Timedelta` object against an ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`33441`) +- Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) +- Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`) +- Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`) +- Bug in :class:`Timedelta` and :func:`pandas.to_timedelta` that ignored the ``unit`` argument for string input (:issue:`12136`) + +Timezones +^^^^^^^^^ + +- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` where timezone names (e.g. ``UTC``) would not be parsed correctly (:issue:`33133`) + + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) +- Bug in :func:`to_numeric` with string argument ``"uint64"`` and ``errors="coerce"`` silently fails (:issue:`32394`) +- Bug in :func:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`) +- Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`) +- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) +- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) +- Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) +- Bug in arithmetic operations between :class:`DataFrame` objects with non-overlapping columns with duplicate labels causing an infinite loop (:issue:`35194`) +- Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) +- Bug in :meth:`Index.difference` giving incorrect results when comparing a :class:`Float64Index` and object :class:`Index` (:issue:`35217`) +- Bug in :class:`DataFrame` reductions (e.g. ``df.min()``, ``df.max()``) with ``ExtensionArray`` dtypes (:issue:`34520`, :issue:`32651`) +- :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raise a ValueError if ``limit_direction`` is ``'forward'`` or ``'both'`` and ``method`` is ``'backfill'`` or ``'bfill'`` or ``limit_direction`` is ``'backward'`` or ``'both'`` and ``method`` is ``'pad'`` or ``'ffill'`` (:issue:`34746`) + +Conversion +^^^^^^^^^^ +- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) +- Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`32402`) +- Bug in :class:`DataFrame` construction where sets would be duplicated rather than raising (:issue:`32582`) +- The :class:`DataFrame` constructor no longer accepts a list of :class:`DataFrame` objects. Because of changes to NumPy, :class:`DataFrame` objects are now consistently treated as 2D objects, so a list of :class:`DataFrame` objects is considered 3D, and no longer acceptable for the :class:`DataFrame` constructor (:issue:`32289`). +- Bug in :class:`DataFrame` when initiating a frame with lists and assign ``columns`` with nested list for ``MultiIndex`` (:issue:`32173`) +- Improved error message for invalid construction of list when creating a new index (:issue:`35190`) + + +Strings +^^^^^^^ + +- Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`). +- Fixed issue where taking ``min`` or ``max`` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) +- Bug in :meth:`Series.str.cat` returning ``NaN`` output when other had :class:`Index` type (:issue:`33425`) +- :func:`pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string. + +Interval +^^^^^^^^ +- Bug in :class:`IntervalArray` incorrectly allowing the underlying data to be changed when setting values (:issue:`32782`) + +Indexing +^^^^^^^^ + +- :meth:`DataFrame.xs` now raises a ``TypeError`` if a ``level`` keyword is supplied and the axis is not a :class:`MultiIndex`. Previously an ``AttributeError`` was raised (:issue:`33610`) +- Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`) +- Bug in :meth:`PeriodIndex.get_loc` treating higher-resolution strings differently from :meth:`PeriodIndex.get_value` (:issue:`31172`) +- Bug in :meth:`Series.at` and :meth:`DataFrame.at` not matching ``.loc`` behavior when looking up an integer in a :class:`Float64Index` (:issue:`31329`) +- Bug in :meth:`PeriodIndex.is_monotonic` incorrectly returning ``True`` when containing leading ``NaT`` entries (:issue:`31437`) +- Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`) +- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`) +- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`) +- Bug in :meth:`DataFrame.at` when either columns or index is non-unique (:issue:`33041`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`) +- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`) +- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`) +- Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`) +- Bug in :class:`Index` constructor where an unhelpful error message was raised for NumPy scalars (:issue:`33017`) +- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) +- Bug in :class:`Interval` where a :class:`Timedelta` could not be added or subtracted from a :class:`Timestamp` interval (:issue:`32023`) +- Bug in :meth:`DataFrame.copy` not invalidating _item_cache after copy caused post-copy value updates to not be reflected (:issue:`31784`) +- Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) +- Bug in :meth:`Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) +- Bug in :meth:`DataFrame.iloc` when slicing a single column :class:`DataFrame` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) +- Bug in :meth:`DatetimeIndex.insert` and :meth:`TimedeltaIndex.insert` causing index ``freq`` to be lost when setting an element into an empty :class:`Series` (:issue:`33573`) +- Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`) +- Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`) +- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`) +- Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex` (:issue:`11278`) +- Bug in :meth:`Series.at` when used with a :class:`MultiIndex` would raise an exception on valid inputs (:issue:`26989`) +- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`) +- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an ``IndexingError`` when accessing a ``None`` value (:issue:`34318`) +- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`) +- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`35114`) + +Missing +^^^^^^^ +- Calling :meth:`fillna` on an empty :class:`Series` now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`). +- Bug in :meth:`Series.replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ```` was raising a ``TypeError``. The method now handles this by ignoring ```` values when doing the comparison for the replacement (:issue:`32621`) +- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ```` for all ``False`` or all ``True`` values using the nulllable Boolean dtype and with ``skipna=False`` (:issue:`33253`) +- Clarified documentation on interpolate with ``method=akima``. The ``der`` parameter must be scalar or ``None`` (:issue:`33426`) +- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`DataFrame.fillna` (:issue:`12918`, :issue:`29146`) +- Bug in :meth:`DataFrame.interpolate` when called on a :class:`DataFrame` with column names of string type was throwing a ValueError. The method is now independent of the type of the column names (:issue:`33956`) +- Passing :class:`NA` into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``""`` (:issue:`34740`) +- Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`) + +MultiIndex +^^^^^^^^^^ + +- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`. Previously an ``AttributeError`` was raised (:issue:`31126`) +- Bug in :meth:`Dataframe.loc` when used with a :class:`MultiIndex`. The returned values were not in the same order as the given inputs (:issue:`22797`) + +.. ipython:: python + + df = pd.DataFrame(np.arange(4), + index=[["a", "a", "b", "b"], [1, 2, 1, 2]]) + # Rows are now ordered as the requested keys + df.loc[(['b', 'a'], [2, 1]), :] + +- Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`) +- Bug in :meth:`DataFrame.truncate` was dropping :class:`MultiIndex` names. (:issue:`34564`) + +.. ipython:: python + + left = pd.MultiIndex.from_arrays([["b", "a"], [2, 1]]) + right = pd.MultiIndex.from_arrays([["a", "b", "c"], [1, 2, 3]]) + # Common elements are now guaranteed to be ordered by the left side + left.intersection(right, sort=False) + +- Bug when joining two :class:`MultiIndex` without specifying level with different columns. Return-indexers parameter was ignored. (:issue:`34074`) + +IO +^^ +- Passing a ``set`` as ``names`` argument to :func:`pandas.read_csv`, :func:`pandas.read_table`, or :func:`pandas.read_fwf` will raise ``ValueError: Names should be an ordered collection.`` (:issue:`34946`) +- Bug in print-out when ``display.precision`` is zero. (:issue:`20359`) +- Bug in :func:`read_json` where integer overflow was occurring when json contains big number strings. (:issue:`30320`) +- :func:`read_csv` will now raise a ``ValueError`` when the arguments ``header`` and ``prefix`` both are not ``None``. (:issue:`27394`) +- Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) +- Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for + ``coerce_timestamps``; following pyarrow's default allows writing nanosecond + timestamps with ``version="2.0"`` (:issue:`31652`). +- Bug in :func:`read_csv` was raising ``TypeError`` when ``sep=None`` was used in combination with ``comment`` keyword (:issue:`31396`) +- Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a :class:`DataFrame` in Python 3 from fixed format written in Python 2 (:issue:`31750`) +- :func:`read_sas()` now handles dates and datetimes larger than :attr:`Timestamp.max` returning them as :class:`datetime.datetime` objects (:issue:`20927`) +- Bug in :meth:`DataFrame.to_json` where ``Timedelta`` objects would not be serialized correctly with ``date_format="iso"`` (:issue:`28256`) +- :func:`read_csv` will raise a ``ValueError`` when the column names passed in ``parse_dates`` are missing in the :class:`Dataframe` (:issue:`31251`) +- Bug in :func:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) +- Bug in :func:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) +- Bug in :func:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`) +- Bug in :func:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`) +- Bug in :func:`read_csv` was raising an ``IndexError`` when ``header=None`` and two extra data columns +- Bug in :func:`read_sas` was raising an ``AttributeError`` when reading files from Google Cloud Storage (:issue:`33069`) +- Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`) +- Bug in :func:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) +- Bug in :func:`read_json` was raising ``TypeError`` when reading a ``list`` of Booleans into a :class:`Series`. (:issue:`31464`) +- Bug in :func:`pandas.io.json.json_normalize` where location specified by ``record_path`` doesn't point to an array. (:issue:`26284`) +- :func:`pandas.read_hdf` has a more explicit error message when loading an + unsupported HDF file (:issue:`9539`) +- Bug in :meth:`~DataFrame.read_feather` was raising an ``ArrowIOError`` when reading an s3 or http file path (:issue:`29055`) +- Bug in :meth:`~DataFrame.to_excel` could not handle the column name ``render`` and was raising an ``KeyError`` (:issue:`34331`) +- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the ``%`` character and no parameters were present (:issue:`34211`) +- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with different dtypes when reading data using an iterator. (:issue:`31544`) +- :meth:`HDFStore.keys` has now an optional ``include`` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- ``TypeError`` exceptions raised by :func:`read_csv` and :func:`read_table` were showing as ``parser_f`` when an unexpected keyword argument was passed (:issue:`25648`) +- Bug in :func:`read_excel` for ODS files removes 0.0 values (:issue:`27222`) +- Bug in :func:`ujson.encode` was raising an ``OverflowError`` with numbers larger than ``sys.maxsize`` (:issue:`34395`) +- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the ``min_itemsize`` parameter is set (:issue:`11238`) +- Bug in :meth:`~HDFStore.create_table` now raises an error when ``column`` argument was not specified in ``data_columns`` on input (:issue:`28156`) +- :func:`read_json` now could read line-delimited json file from a file url while ``lines`` and ``chunksize`` are set. +- Bug in :meth:`DataFrame.to_sql` when reading DataFrames with ``-np.inf`` entries with MySQL now has a more explicit ``ValueError`` (:issue:`34431`) +- Bug where capitalised files extensions were not decompressed by read_* functions (:issue:`35164`) +- Bug in :meth:`read_excel` that was raising a ``TypeError`` when ``header=None`` and ``index_col`` is given as a ``list`` (:issue:`31783`) +- Bug in :func:`read_excel` where datetime values are used in the header in a :class:`MultiIndex` (:issue:`34748`) +- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in the keyword argument ``chunksize`` now raises a ``TypeError`` (previously raised a ``NotImplementedError``), while passing in the keyword argument ``encoding`` now raises a ``TypeError`` (:issue:`34464`) +- Bug in :meth:`DataFrame.to_records` was incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`) + +Plotting +^^^^^^^^ + +- :meth:`DataFrame.plot` for line/bar now accepts color by dictionary (:issue:`8193`). +- Bug in :meth:`DataFrame.plot.hist` where weights are not working for multiple columns (:issue:`33173`) +- Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``boxprops`` (:issue:`30346`) +- Bug in :meth:`DataFrame.hist` where the order of ``column`` argument was ignored (:issue:`29235`) +- Bug in :meth:`DataFrame.plot.scatter` that when adding multiple plots with different ``cmap``, colorbars always use the first ``cmap`` (:issue:`33389`) +- Bug in :meth:`DataFrame.plot.scatter` was adding a colorbar to the plot even if the argument ``c`` was assigned to a column containing color names (:issue:`34316`) +- Bug in :meth:`pandas.plotting.bootstrap_plot` was causing cluttered axes and overlapping labels (:issue:`34905`) +- Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Using a :class:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :class:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`) now raise a ``TypeError`` if a non-accepted keyword argument is passed into it. Previously an ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`) +- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted, has duplicates, and the applied ``func`` does not mutate passed in objects (:issue:`30667`) +- Bug in :meth:`DataFrameGroupBy.transform` produces an incorrect result with transformation functions (:issue:`30918`) +- Bug in :meth:`Groupby.transform` was returning the wrong result when grouping by multiple keys of which some were categorical and others not (:issue:`32494`) +- Bug in :meth:`GroupBy.count` causes segmentation fault when grouped-by columns contain NaNs (:issue:`32841`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean :class:`Series` (:issue:`32894`) +- Bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` where a large negative number would be returned when the number of non-null values was below ``min_count`` for nullable integer dtypes (:issue:`32861`) +- Bug in :meth:`SeriesGroupBy.quantile` was raising on nullable integers (:issue:`33136`) +- Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) +- Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`) +- Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`) +- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of :class:`SeriesGroupBy` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) +- Bug in :meth:`DataFrame.groupby` lost the name of the :class:`Index` when one of the ``agg`` keys referenced an empty list (:issue:`32580`) +- Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) +- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.quantile` raised ``TypeError`` for non-numeric types rather than dropping the columns (:issue:`27892`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`) +- Bug in :meth:`DataFrame.groupby` raising an ``AttributeError`` when selecting a column and aggregating with ``as_index=False`` (:issue:`35246`). +- Bug in :meth:`DataFrameGroupBy.first` and :meth:`DataFrameGroupBy.last` that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`) + +Reshaping +^^^^^^^^^ + +- Bug effecting all numeric and Boolean reduction methods not returning subclassed data type. (:issue:`25596`) +- Bug in :meth:`DataFrame.pivot_table` when only :class:`MultiIndexed` columns is set (:issue:`17038`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in :class:`MultiIndexed` data (:issue:`19966`) +- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) +- Fixed incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) +- Bug in :func:`crosstab` when inputs are two :class:`Series` and have tuple names, the output will keep a dummy :class:`MultiIndex` as columns. (:issue:`18321`) +- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) +- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- Bug in :meth:`SeriesGroupBy.aggregate` was resulting in aggregations being overwritten when they shared the same name (:issue:`30880`) +- Bug where :meth:`Index.astype` would lose the :attr:`name` attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`) +- :meth:`Series.append` will now raise a ``TypeError`` when passed a :class:`DataFrame` or a sequence containing :class:`DataFrame` (:issue:`31413`) +- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) +- Bug on inplace operation of a :class:`Series` that was adding a column to the :class:`DataFrame` from where it was originally dropped from (using ``inplace=True``) (:issue:`30484`) +- Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) +- Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) +- Bug in :func:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) +- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregate a non-existent column (:issue:`32755`) +- Bug in :meth:`DataFrame.unstack` when :class:`MultiIndex` columns and :class:`MultiIndex` rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) +- Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True`` instead of ``TypeError: Can only append a :class:`Series` if ignore_index=True or if the :class:`Series` has a name`` (:issue:`30871`) +- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, + :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, + :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not + returning subclassed types. (:issue:`31331`) +- Bug in :func:`concat` was not allowing for concatenation of :class:`DataFrame` and :class:`Series` with duplicate keys (:issue:`33654`) +- Bug in :func:`cut` raised an error when the argument ``labels`` contains duplicates (:issue:`33141`) +- Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) +- Bug in :meth:`Dataframe.aggregate` and :meth:`Series.aggregate` was causing a recursive loop in some cases (:issue:`34224`) +- Fixed bug in :func:`melt` where melting :class:`MultiIndex` columns with ``col_level > 0`` would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) +- Bug in :meth:`Series.where` with an empty :class:`Series` and empty ``cond`` having non-bool dtype (:issue:`34592`) +- Fixed regression where :meth:`DataFrame.apply` would raise ``ValueError`` for elements with ``S`` dtype (:issue:`34529`) + +Sparse +^^^^^^ +- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) +- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`) +- Bug in :meth:`Series.sum` with ``SparseArray`` raised a ``TypeError`` (:issue:`25777`) +- Bug where :class:`DataFrame` containing an all-sparse :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`) +- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`) +- Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`) +- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`) +- Fixed bug in :func:`concat` when concatenating :class:`DataFrame` objects with non-overlapping columns resulting in object-dtype columns rather than preserving the extension dtype (:issue:`27692`, :issue:`33027`) +- Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`) +- Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`) +- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`). +- Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`) +- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) +- Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable Boolean dtypes (:issue:`34051`) +- Fixed bug where the constructor of :class:`DataFrame` with ``dtype='string'`` would fail (:issue:`27953`, :issue:`33623`) +- Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) +- Fixed bug in :meth:`IntegerArray.astype` to correctly copy the mask as well (:issue:`34931`). + +Other +^^^^^ + +- Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`) +- Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if the ``left`` argument is a different subclass with ``check_series_type=True`` (:issue:`32670`). +- Getting a missing attribute in a :meth:`DataFrame.query` or :meth:`DataFrame.eval` string raises the correct ``AttributeError`` (:issue:`32408`) +- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`) +- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) +- Bug in :meth:`DataFrame.equals` and :meth:`Series.equals` in allowing subclasses to be equal (:issue:`34402`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.5..v1.1.0|HEAD diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst new file mode 100644 index 00000000..77ea67f7 --- /dev/null +++ b/doc/source/whatsnew/v1.1.1.rst @@ -0,0 +1,56 @@ +.. _whatsnew_111: + +What's new in 1.1.1 (August 20, 2020) +------------------------------------- + +These are the changes in pandas 1.1.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`CategoricalIndex.format` where, when stringified scalars had different lengths, the shorter string would be right-filled with spaces, so it had the same length as the longest string (:issue:`35439`) +- Fixed regression in :meth:`Series.truncate` when trying to truncate a single-element series (:issue:`35544`) +- Fixed regression where :meth:`DataFrame.to_numpy` would raise a ``RuntimeError`` for mixed dtypes when converting to ``str`` (:issue:`35455`) +- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`) +- Fixed regression where :func:`pandas.testing.assert_series_equal` would raise an error when non-numeric dtypes were passed with ``check_exact=True`` (:issue:`35446`) +- Fixed regression in ``.groupby(..).rolling(..)`` where column selection was ignored (:issue:`35486`) +- Fixed regression where :meth:`DataFrame.interpolate` would raise a ``TypeError`` when the :class:`DataFrame` was empty (:issue:`35598`) +- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) +- Fixed regression in :meth:`DataFrame.diff` with read-only data (:issue:`35559`) +- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`) +- Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`) +- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`) +- Fixed regression where :func:`pandas.merge_asof` would raise a ``UnboundLocalError`` when ``left_index``, ``right_index`` and ``tolerance`` were set (:issue:`35558`) +- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`) +- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`) +- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.aggregate` where a list of functions would produce the wrong results if at least one of the functions did not aggregate (:issue:`35490`) +- Fixed memory usage issue when instantiating large :class:`pandas.arrays.StringArray` (:issue:`35499`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in :class:`~pandas.io.formats.style.Styler` whereby ``cell_ids`` argument had no effect due to other recent changes (:issue:`35588`) (:issue:`35663`) +- Bug in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` where extension dtypes were not ignored when ``check_dtypes`` was set to ``False`` (:issue:`35715`) +- Bug in :meth:`to_timedelta` fails when ``arg`` is a :class:`Series` with ``Int64`` dtype containing null values (:issue:`35574`) +- Bug in ``.groupby(..).rolling(..)`` where passing ``closed`` with column selection would raise a ``ValueError`` (:issue:`35549`) +- Bug in :class:`DataFrame` constructor failing to raise ``ValueError`` in some cases when ``data`` and ``index`` have mismatched lengths (:issue:`33437`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.0..v1.1.1 diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst new file mode 100644 index 00000000..81b8e7df --- /dev/null +++ b/doc/source/whatsnew/v1.1.2.rst @@ -0,0 +1,64 @@ +.. _whatsnew_112: + +What's new in 1.1.2 (September 8, 2020) +--------------------------------------- + +These are the changes in pandas 1.1.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_112.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) +- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`) +- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) +- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) +- Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`) +- Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`) +- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`) +- Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`) +- Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`) +- Fixed regression in :meth:`Series.groupby.rolling` number of levels of :class:`MultiIndex` in input was compressed to one (:issue:`36018`) +- Fixed regression in :class:`DataFrameGroupBy` on an empty :class:`DataFrame` (:issue:`36197`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_112.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) +- Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`) +- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`) +- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`) +- Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) +- Bug in :class:`Series` constructor incorrectly raising a ``TypeError`` when passed an ordered set (:issue:`36044`) +- Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`) +- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`) +- Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`) +- Bug in :meth:`import_optional_dependency` returning incorrect package names in cases where package name is different from import name (:issue:`35948`) +- Bug when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`31368`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_112.other: + +Other +~~~~~ +- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`) +- :meth:`DataFrame.plot` and :meth:`Series.plot` raise ``UserWarning`` about usage of ``FixedFormatter`` and ``FixedLocator`` (:issue:`35684` and :issue:`35945`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_112.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.1..v1.1.2 diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst new file mode 100644 index 00000000..e752eb54 --- /dev/null +++ b/doc/source/whatsnew/v1.1.3.rst @@ -0,0 +1,78 @@ +.. _whatsnew_113: + +What's new in 1.1.3 (October 5, 2020) +------------------------------------- + +These are the changes in pandas 1.1.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +Added support for new Python version +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas 1.1.3 now supports Python 3.9 (:issue:`36296`). + +Development Changes +^^^^^^^^^^^^^^^^^^^ + +- The minimum version of Cython is now the most recent bug-fix version (0.29.21) (:issue:`36296`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`) +- Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) +- Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised a ``ValueError`` (:issue:`35897`) +- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) +- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) +- Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`) +- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) +- Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`) +- Fixed regression in :meth:`Series.loc` on a :class:`Series` with a :class:`MultiIndex` containing :class:`Timestamp` raising ``InvalidIndexError`` (:issue:`35858`) +- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) +- Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) +- Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) +- Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) +- Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) +- Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) +- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) +- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) +- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`) +- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) +- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`) +- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) +- Bug in :func:`cut` raising a ``ValueError`` when passed a :class:`Series` of labels with ``ordered=False`` (:issue:`36603`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.other: + +Other +~~~~~ +- Reverted enhancement added in pandas-1.1.0 where :func:`timedelta_range` infers a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.2..v1.1.3 diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst new file mode 100644 index 00000000..6353dbfa --- /dev/null +++ b/doc/source/whatsnew/v1.1.4.rst @@ -0,0 +1,55 @@ +.. _whatsnew_114: + +What's new in 1.1.4 (October 30, 2020) +-------------------------------------- + +These are the changes in pandas 1.1.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_114.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`read_csv` raising a ``ValueError`` when ``names`` was of type ``dict_keys`` (:issue:`36928`) +- Fixed regression in :func:`read_csv` with more than 1M rows and specifying a ``index_col`` argument (:issue:`37094`) +- Fixed regression where attempting to mutate a :class:`DateOffset` object would no longer raise an ``AttributeError`` (:issue:`36940`) +- Fixed regression where :meth:`DataFrame.agg` would fail with :exc:`TypeError` when passed positional arguments to be passed on to the aggregation function (:issue:`36948`). +- Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) +- Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) +- Fixed regression in :meth:`Series.rank` method failing for read-only data (:issue:`37290`) +- Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) +- Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`) +- Fixed regression in ``DataFrame.groupby(..).std()`` with nullable integer dtype (:issue:`37415`) +- Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`) +- Fixed regression where slicing :class:`DatetimeIndex` raised :exc:`AssertionError` on irregular time series with ``pd.NaT`` or on unsorted indices (:issue:`36953` and :issue:`35509`) +- Fixed regression in certain offsets (:meth:`pd.offsets.Day() ` and below) no longer being hashable (:issue:`37267`) +- Fixed regression in :class:`StataReader` which required ``chunksize`` to be manually set when using an iterator to read a dataset (:issue:`37280`) +- Fixed regression in setitem with :meth:`DataFrame.iloc` which raised error when trying to set a value while filtering with a boolean list (:issue:`36741`) +- Fixed regression in setitem with a Series getting aligned before setting the values (:issue:`37427`) +- Fixed regression in :attr:`MultiIndex.is_monotonic_increasing` returning wrong results with ``NaN`` in at least one of the levels (:issue:`37220`) +- Fixed regression in inplace arithmetic operation on a Series not updating the parent DataFrame (:issue:`36373`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_114.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug causing ``groupby(...).sum()`` and similar to not preserve metadata (:issue:`29442`) +- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` raising a ``ValueError`` when the target was read-only (:issue:`37174`) +- Bug in :meth:`GroupBy.fillna` that introduced a performance regression after 1.0.5 (:issue:`36757`) +- Bug in :meth:`DataFrame.info` was raising a ``KeyError`` when the DataFrame has integer column names (:issue:`37245`) +- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on (:issue:`35792`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_114.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.3..v1.1.4 diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst new file mode 100644 index 00000000..002e1f85 --- /dev/null +++ b/doc/source/whatsnew/v1.1.5.rst @@ -0,0 +1,56 @@ +.. _whatsnew_115: + +What's new in 1.1.5 (December 07, 2020) +--------------------------------------- + +These are the changes in pandas 1.1.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_115.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in addition of a timedelta-like scalar to a :class:`DatetimeIndex` raising incorrectly (:issue:`37295`) +- Fixed regression in :meth:`Series.groupby` raising when the :class:`Index` of the :class:`Series` had a tuple as its name (:issue:`37755`) +- Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` for ``__setitem__`` when one-dimensional tuple was given to select from :class:`MultiIndex` (:issue:`37711`) +- Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) +- Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) +- Fixed regression in :class:`MultiIndex` constructed from a :class:`DatetimeIndex` not retaining frequency (:issue:`35563`) +- Fixed regression in :class:`Index` constructor raising a ``AttributeError`` when passed a :class:`SparseArray` with datetime64 values (:issue:`35843`) +- Fixed regression in :meth:`DataFrame.unstack` with columns with integer dtype (:issue:`37115`) +- Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) +- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`) +- Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) +- Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). +- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) +- Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) +- Fixed regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_115.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in pytables methods in python 3.9 (:issue:`38041`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_115.other: + +Other +~~~~~ +- Only set ``-Werror`` as a compiler flag in the CI jobs (:issue:`33315`, :issue:`33314`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_115.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.4..v1.1.5|HEAD diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst new file mode 100644 index 00000000..49f9abd9 --- /dev/null +++ b/doc/source/whatsnew/v1.2.0.rst @@ -0,0 +1,878 @@ +.. _whatsnew_120: + +What's new in 1.2.0 (December 26, 2020) +--------------------------------------- + +These are the changes in pandas 1.2.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. warning:: + + The `xlwt `_ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `_ package is now only for reading + old-style ``.xls`` files. + + Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, including new + Excel 2007+ (``.xlsx``) files. + If `openpyxl `_ is installed, + many of these cases will now default to using the ``openpyxl`` engine. + See the :func:`read_excel` documentation for more details. + + Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** + This is no longer supported, switch to using ``openpyxl`` instead. + + Attempting to use the ``xlwt`` engine will raise a ``FutureWarning`` + unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. + While this option is now deprecated and will also raise a ``FutureWarning``, + it can be globally set and the warning suppressed. Users are recommended to + write ``.xlsx`` files using the ``openpyxl`` engine instead. + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_120.duplicate_labels: + +Optionally disallow duplicate labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` and :class:`DataFrame` can now be created with ``allows_duplicate_labels=False`` flag to +control whether the index or columns can contain duplicate labels (:issue:`28394`). This can be used to +prevent accidental introduction of duplicate labels, which can affect downstream operations. + +By default, duplicates continue to be allowed. + +.. code-block:: ipython + + In [1]: pd.Series([1, 2], index=['a', 'a']) + Out[1]: + a 1 + a 2 + Length: 2, dtype: int64 + + In [2]: pd.Series([1, 2], index=['a', 'a']).set_flags(allows_duplicate_labels=False) + ... + DuplicateLabelError: Index has duplicates. + positions + label + a [0, 1] + +pandas will propagate the ``allows_duplicate_labels`` property through many operations. + +.. code-block:: ipython + + In [3]: a = ( + ...: pd.Series([1, 2], index=['a', 'b']) + ...: .set_flags(allows_duplicate_labels=False) + ...: ) + + In [4]: a + Out[4]: + a 1 + b 2 + Length: 2, dtype: int64 + + # An operation introducing duplicates + In [5]: a.reindex(['a', 'b', 'a']) + ... + DuplicateLabelError: Index has duplicates. + positions + label + a [0, 2] + + [1 rows x 1 columns] + +.. warning:: + + This is an experimental feature. Currently, many methods fail to + propagate the ``allows_duplicate_labels`` value. In future versions + it is expected that every method taking or returning one or more + DataFrame or Series objects will propagate ``allows_duplicate_labels``. + +See :ref:`duplicates` for more. + +The ``allows_duplicate_labels`` flag is stored in the new :attr:`DataFrame.flags` +attribute. This stores global attributes that apply to the *pandas object*. This +differs from :attr:`DataFrame.attrs`, which stores information that applies to +the dataset. + +Passing arguments to fsspec backends +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Many read/write functions have acquired the ``storage_options`` optional argument, +to pass a dictionary of parameters to the storage backend. This allows, for +example, for passing credentials to S3 and GCS storage. The details of what +parameters can be passed to which backends can be found in the documentation +of the individual storage backends (detailed from the fsspec docs for +`builtin implementations`_ and linked to `external ones`_). See +Section :ref:`io.remote`. + +:issue:`35655` added fsspec support (including ``storage_options``) +for reading excel files. + +.. _builtin implementations: https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations +.. _external ones: https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + +.. _whatsnew_120.binary_handle_to_csv: + +Support for binary file handles in ``to_csv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`to_csv` supports file handles in binary mode (:issue:`19827` and :issue:`35058`) +with ``encoding`` (:issue:`13068` and :issue:`23854`) and ``compression`` (:issue:`22555`). +If pandas does not automatically detect whether the file handle is opened in binary or text mode, +it is necessary to provide ``mode="wb"``. + +For example: + +.. ipython:: python + + import io + + data = pd.DataFrame([0, 1, 2]) + buffer = io.BytesIO() + data.to_csv(buffer, encoding="utf-8", compression="gzip") + +Support for short caption and table position in ``to_latex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.to_latex` now allows one to specify +a floating table position (:issue:`35281`) +and a short caption (:issue:`36267`). + +The keyword ``position`` has been added to set the position. + +.. ipython:: python + :okwarning: + + data = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + table = data.to_latex(position='ht') + print(table) + +Usage of the keyword ``caption`` has been extended. +Besides taking a single string as an argument, +one can optionally provide a tuple ``(full_caption, short_caption)`` +to add a short caption macro. + +.. ipython:: python + :okwarning: + + data = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + table = data.to_latex(caption=('the full long caption', 'short caption')) + print(table) + +.. _whatsnew_120.read_csv_table_precision_default: + +Change in default floating precision for ``read_csv`` and ``read_table`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For the C parsing engine, the methods :meth:`read_csv` and :meth:`read_table` previously defaulted to a parser that +could read floating point numbers slightly incorrectly with respect to the last bit in precision. +The option ``floating_precision="high"`` has always been available to avoid this issue. +Beginning with this version, the default is now to use the more accurate parser by making +``floating_precision=None`` correspond to the high precision parser, and the new option +``floating_precision="legacy"`` to use the legacy parser. The change to using the higher precision +parser by default should have no impact on performance. (:issue:`17154`) + +.. _whatsnew_120.floating: + +Experimental nullable data types for float data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`Float32Dtype` / :class:`Float64Dtype` and :class:`~arrays.FloatingArray`. +These are extension data types dedicated to floating point data that can hold the +``pd.NA`` missing value indicator (:issue:`32265`, :issue:`34307`). + +While the default float data type already supports missing values using ``np.nan``, +these new data types use ``pd.NA`` (and its corresponding behavior) as the missing +value indicator, in line with the already existing nullable :ref:`integer ` +and :ref:`boolean ` data types. + +One example where the behavior of ``np.nan`` and ``pd.NA`` is different is +comparison operations: + +.. ipython:: python + + # the default NumPy float64 dtype + s1 = pd.Series([1.5, None]) + s1 + s1 > 1 + +.. ipython:: python + + # the new nullable float64 dtype + s2 = pd.Series([1.5, None], dtype="Float64") + s2 + s2 > 1 + +See the :ref:`missing_data.NA` doc section for more details on the behavior +when using the ``pd.NA`` missing value indicator. + +As shown above, the dtype can be specified using the "Float64" or "Float32" +string (capitalized to distinguish it from the default "float64" data type). +Alternatively, you can also use the dtype object: + +.. ipython:: python + + pd.Series([1.5, None], dtype=pd.Float32Dtype()) + +Operations with the existing integer or boolean nullable data types that +give float results will now also use the nullable floating data types (:issue:`38178`). + +.. warning:: + + Experimental: the new floating data types are currently experimental, and their + behavior or API may still change without warning. Especially the behavior + regarding NaN (distinct from NA missing values) is subject to change. + +.. _whatsnew_120.index_name_preservation: + +Index/column name preservation when aggregating +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When aggregating using :meth:`concat` or the :class:`DataFrame` constructor, pandas +will now attempt to preserve index and column names whenever possible (:issue:`35847`). +In the case where all inputs share a common name, this name will be assigned to the +result. When the input names do not all agree, the result will be unnamed. Here is an +example where the index name is preserved: + +.. ipython:: python + + idx = pd.Index(range(5), name='abc') + ser = pd.Series(range(5, 10), index=idx) + pd.concat({'x': ser[1:], 'y': ser[:-1]}, axis=1) + +The same is true for :class:`MultiIndex`, but the logic is applied separately on a +level-by-level basis. + +.. _whatsnew_120.groupby_ewm: + +GroupBy supports EWM operations directly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`.DataFrameGroupBy` now supports exponentially weighted window operations directly (:issue:`16037`). + +.. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a', 'b'], 'B': range(4)}) + df + df.groupby('A').ewm(com=1.0).mean() + +Additionally ``mean`` supports execution via `Numba `__ with +the ``engine`` and ``engine_kwargs`` arguments. Numba must be installed as an optional dependency +to use this feature. + +.. _whatsnew_120.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- Added ``day_of_week`` (compatibility alias ``dayofweek``) property to :class:`Timestamp`, :class:`.DatetimeIndex`, :class:`Period`, :class:`PeriodIndex` (:issue:`9605`) +- Added ``day_of_year`` (compatibility alias ``dayofyear``) property to :class:`Timestamp`, :class:`.DatetimeIndex`, :class:`Period`, :class:`PeriodIndex` (:issue:`9605`) +- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a Series or DataFrame (:issue:`28394`) +- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) +- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) +- :meth:`io.sql.get_schema` now supports a ``schema`` keyword argument that will add a schema into the create table statement (:issue:`28486`) +- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) +- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) +- :meth:`.Styler.set_table_styles` now allows the direct styling of rows and columns and can be chained (:issue:`35607`) +- :class:`.Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`) +- :meth:`.Rolling.mean` and :meth:`.Rolling.sum` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) +- :meth:`.DatetimeIndex.searchsorted`, :meth:`.TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetime-like dtypes will now try to cast string arguments (list-like and scalar) to the matching datetime-like type (:issue:`36346`) +- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) +- Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`) +- Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`). + This change has been reverted in pandas 1.2.1, and the behaviour to not align DataFrames + is deprecated instead, see the :ref:`the 1.2.1 release notes `. +- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) +- :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`) +- :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible for the resulting DataFrame (default is ``False``, and only applicable for ``engine="pyarrow"``) (:issue:`31242`) +- Added :meth:`.Rolling.sem` and :meth:`Expanding.sem` to compute the standard error of the mean (:issue:`26476`) +- :meth:`.Rolling.var` and :meth:`.Rolling.std` use Kahan summation and Welford's Method to avoid numerical issues (:issue:`37051`) +- :meth:`DataFrame.corr` and :meth:`DataFrame.cov` use Welford's Method to avoid numerical issues (:issue:`37448`) +- :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`) +- :class:`DataFrame` now supports the ``divmod`` operation (:issue:`37165`) +- :meth:`DataFrame.to_parquet` now returns a ``bytes`` object when no ``path`` argument is passed (:issue:`37105`) +- :class:`.Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`) +- :class:`.DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`) +- :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) +- :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) +- :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`) +- Add support for ``min_count`` keyword for :meth:`DataFrame.groupby` and :meth:`DataFrame.resample` for functions ``min``, ``max``, ``first`` and ``last`` (:issue:`37821`, :issue:`37768`) +- Improve error reporting for :meth:`DataFrame.merge` when invalid merge column definitions were given (:issue:`16228`) +- Improve numerical stability for :meth:`.Rolling.skew`, :meth:`.Rolling.kurt`, :meth:`Expanding.skew` and :meth:`Expanding.kurt` through implementation of Kahan summation (:issue:`6929`) +- Improved error reporting for subsetting columns of a :class:`.DataFrameGroupBy` with ``axis=1`` (:issue:`37725`) +- Implement method ``cross`` for :meth:`DataFrame.merge` and :meth:`DataFrame.join` (:issue:`5401`) +- When :func:`read_csv`, :func:`read_sas` and :func:`read_json` are called with ``chunksize``/``iterator`` they can be used in a ``with`` statement as they return context-managers (:issue:`38225`) +- Augmented the list of named colors available for styling Excel exports, enabling all of CSS4 colors (:issue:`38247`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +Consistency of DataFrame Reductions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True`` now +determines whether to exclude object-dtype columns on a column-by-column basis, +instead of checking if *all* object-dtype columns can be considered boolean. + +This prevents pathological behavior where applying the reduction on a subset +of columns could result in a larger Series result. See (:issue:`37799`). + +.. ipython:: python + + df = pd.DataFrame({"A": ["foo", "bar"], "B": [True, False]}, dtype=object) + df["C"] = pd.Series([True, True]) + + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df.all(bool_only=True) + Out[5]: + C True + dtype: bool + + In [6]: df[["B", "C"]].all(bool_only=True) + Out[6]: + B False + C True + dtype: bool + +*New behavior*: + +.. ipython:: python + :okwarning: + + In [5]: df.all(bool_only=True) + + In [6]: df[["B", "C"]].all(bool_only=True) + + +Other DataFrame reductions with ``numeric_only=None`` will also avoid +this pathological behavior (:issue:`37827`): + +.. ipython:: python + + df = pd.DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) + + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df.mean() + Out[3]: Series([], dtype: float64) + + In [4]: df[["A"]].mean() + Out[4]: + A 1.0 + dtype: float64 + +*New behavior*: + +.. ipython:: python + :okwarning: + + df.mean() + + df[["A"]].mean() + +Moreover, DataFrame reductions with ``numeric_only=None`` will now be +consistent with their Series counterparts. In particular, for +reductions where the Series method raises ``TypeError``, the +DataFrame reduction will now consider that column non-numeric +instead of casting to a NumPy array which may have different semantics (:issue:`36076`, +:issue:`28949`, :issue:`21020`). + +.. ipython:: python + :okwarning: + + ser = pd.Series([0, 1], dtype="category", name="A") + df = ser.to_frame() + + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df.any() + Out[5]: + A True + dtype: bool + +*New behavior*: + +.. ipython:: python + :okwarning: + + df.any() + + +.. _whatsnew_120.api_breaking.python: + +Increased minimum version for Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas 1.2.0 supports Python 3.7.1 and higher (:issue:`35214`). + +.. _whatsnew_120.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some minimum supported versions of dependencies were updated (:issue:`35214`). +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.16.5 | X | X | ++-----------------+-----------------+----------+---------+ +| pytz | 2017.3 | X | X | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.7.3 | X | | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.6.8 | | X | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 5.0.1 | | X | ++-----------------+-----------------+----------+---------+ +| mypy (dev) | 0.782 | | X | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.3.2 | | ++-----------------+-----------------+---------+ +| fsspec | 0.7.4 | | ++-----------------+-----------------+---------+ +| gcsfs | 0.6.0 | | ++-----------------+-----------------+---------+ +| lxml | 4.3.0 | X | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.3 | X | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | | ++-----------------+-----------------+---------+ +| openpyxl | 2.6.0 | X | ++-----------------+-----------------+---------+ +| pyarrow | 0.15.0 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.7.11 | X | ++-----------------+-----------------+---------+ +| pytables | 3.5.1 | X | ++-----------------+-----------------+---------+ +| s3fs | 0.4.0 | | ++-----------------+-----------------+---------+ +| scipy | 1.2.0 | | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.2.8 | X | ++-----------------+-----------------+---------+ +| xarray | 0.12.3 | X | ++-----------------+-----------------+---------+ +| xlrd | 1.2.0 | X | ++-----------------+-----------------+---------+ +| xlsxwriter | 1.0.2 | X | ++-----------------+-----------------+---------+ +| xlwt | 1.3.0 | X | ++-----------------+-----------------+---------+ +| pandas-gbq | 0.12.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_120.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for Datetime-like :class:`Index` subclasses. This will affect sort order when sorting a DataFrame on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, the count of missing values is no longer necessarily last in the list of duplicate counts. Instead, its position corresponds to the position in the original Series. When using :meth:`Index.sort_values` for Datetime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beginning. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses (:issue:`35992`) +- Passing an invalid ``fill_value`` to :meth:`Categorical.take`, :meth:`.DatetimeArray.take`, :meth:`TimedeltaArray.take`, or :meth:`PeriodArray.take` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) +- Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) +- Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) +- Attempting to reindex a Series with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) +- :meth:`CategoricalIndex.append` with an index that contains non-category values will now cast instead of raising ``TypeError`` (:issue:`38098`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.deprecations: + +Deprecations +~~~~~~~~~~~~ +- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) +- Deprecated parameter ``dtype`` of method :meth:`~Index.copy` for all :class:`Index` subclasses. Use the :meth:`~Index.astype` method instead for changing dtype (:issue:`35853`) +- Deprecated parameters ``levels`` and ``codes`` in :meth:`MultiIndex.copy`. Use the :meth:`~MultiIndex.set_levels` and :meth:`~MultiIndex.set_codes` methods instead (:issue:`36685`) +- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) +- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`35224`) +- The method :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`) +- Deprecated indexing :class:`DataFrame` rows with a single datetime-like string as ``df[string]`` (given the ambiguity whether it is indexing the rows or selecting a column), use ``df.loc[string]`` instead (:issue:`36179`) +- Deprecated :meth:`Index.is_all_dates` (:issue:`27744`) +- The default value of ``regex`` for :meth:`Series.str.replace` will change from ``True`` to ``False`` in a future release. In addition, single character regular expressions will *not* be treated as literal strings when ``regex=True`` is set (:issue:`24804`) +- Deprecated automatic alignment on comparison operations between :class:`DataFrame` and :class:`Series`, do ``frame, ser = frame.align(ser, axis=1, copy=False)`` before e.g. ``frame == ser`` (:issue:`28759`) +- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`) +- Using "outer" ufuncs on DataFrames to return 4d ndarray is now deprecated. Convert to an ndarray first (:issue:`23743`) +- Deprecated slice-indexing on tz-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`) +- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`) +- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`) +- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`) +- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`) +- :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` are deprecated, use :meth:`Series.shift` or :meth:`DataFrame.shift` instead (:issue:`37601`) +- Partial slicing on unordered :class:`.DatetimeIndex` objects with keys that are not in the index is deprecated and will be removed in a future version (:issue:`18531`) +- The ``how`` keyword in :meth:`PeriodIndex.astype` is deprecated and will be removed in a future version, use ``index.to_timestamp(how=how)`` instead (:issue:`37982`) +- Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`.DatetimeIndex`, :class:`.TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`) +- The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`) +- The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`) + +**Calling NumPy ufuncs on non-aligned DataFrames** + +Calling NumPy ufuncs on non-aligned DataFrames changed behaviour in pandas +1.2.0 (to align the inputs before calling the ufunc), but this change is +reverted in pandas 1.2.1. The behaviour to not align is now deprecated instead, +see the :ref:`the 1.2.1 release notes ` for +more details. + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_120.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance improvements when creating DataFrame or Series with dtype ``str`` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`, :issue:`36432`, :issue:`37371`) +- Performance improvement in :meth:`.GroupBy.agg` with the ``numba`` engine (:issue:`35759`) +- Performance improvements when creating :meth:`Series.map` from a huge dictionary (:issue:`34717`) +- Performance improvement in :meth:`.GroupBy.transform` with the ``numba`` engine (:issue:`36240`) +- :class:`.Styler` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`) +- Performance improvement in :func:`to_datetime` with non-ns time unit for ``float`` ``dtype`` columns (:issue:`20445`) +- Performance improvement in setting values on an :class:`IntervalArray` (:issue:`36310`) +- The internal index method :meth:`~Index._shallow_copy` now makes the new index and original index share cached attributes, avoiding creating these again, if created on either. This can speed up operations that depend on creating copies of existing indexes (:issue:`36840`) +- Performance improvement in :meth:`.RollingGroupby.count` (:issue:`35625`) +- Small performance decrease to :meth:`.Rolling.min` and :meth:`.Rolling.max` for fixed windows (:issue:`36567`) +- Reduced peak memory usage in :meth:`DataFrame.to_pickle` when using ``protocol=5`` in python 3.8+ (:issue:`34244`) +- Faster ``dir`` calls when the object has many index labels, e.g. ``dir(ser)`` (:issue:`37450`) +- Performance improvement in :class:`ExpandingGroupby` (:issue:`37064`) +- Performance improvement in :meth:`Series.astype` and :meth:`DataFrame.astype` for :class:`Categorical` (:issue:`8628`) +- Performance improvement in :meth:`DataFrame.groupby` for ``float`` ``dtype`` (:issue:`28303`), changes of the underlying hash-function can lead to changes in float based indexes sort ordering for ties (e.g. :meth:`Index.value_counts`) +- Performance improvement in :meth:`pd.isin` for inputs with more than 1e6 elements (:issue:`36611`) +- Performance improvement for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) +- :meth:`read_json` now avoids reading entire file into memory when chunksize is specified (:issue:`34548`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- :meth:`Categorical.fillna` will always return a copy, validate a passed fill value regardless of whether there are any NAs to fill, and disallow an ``NaT`` as a fill value for numeric categories (:issue:`36530`) +- Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) +- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) +- Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`) +- Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with tz-aware ``datetime64`` categories incorrectly dropping the time zone information instead of casting to object dtype (:issue:`38136`) + +Datetime-like +^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.combine_first` that would convert datetime-like column on other :class:`DataFrame` to integer when the column is not present in original :class:`DataFrame` (:issue:`28481`) +- Bug in :attr:`.DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`) +- Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`) +- Bug in :class:`.DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g. months=12) (:issue:`34511`) +- Bug in :meth:`.DatetimeIndex.get_slice_bound` where ``datetime.date`` objects were not accepted or naive :class:`Timestamp` with a tz-aware :class:`.DatetimeIndex` (:issue:`35690`) +- Bug in :meth:`.DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`) +- Bug in :meth:`.DatetimeIndex.searchsorted`, :meth:`.TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or :class:`Period` dtype placement of ``NaT`` values being inconsistent with NumPy (:issue:`36176`, :issue:`36254`) +- Inconsistency in :class:`.DatetimeArray`, :class:`.TimedeltaArray`, and :class:`.PeriodArray` method ``__setitem__`` casting arrays of strings to datetime-like scalars but not scalar strings (:issue:`36261`) +- Bug in :meth:`.DatetimeArray.take` incorrectly allowing ``fill_value`` with a mismatched time zone (:issue:`37356`) +- Bug in :class:`.DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) +- :class:`Timestamp` and :class:`.DatetimeIndex` comparisons between tz-aware and tz-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) +- Bug in :meth:`.DatetimeIndex.equals` and :meth:`.TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`) +- :meth:`Series.to_json`, :meth:`DataFrame.to_json`, and :meth:`read_json` now implement time zone parsing when orient structure is ``table`` (:issue:`35973`) +- :meth:`astype` now attempts to convert to ``datetime64[ns, tz]`` directly from ``object`` with inferred time zone from string (:issue:`35973`) +- Bug in :meth:`.TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`) +- Bug in :meth:`.DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched time zone (:issue:`37299`) +- Bug in adding a :class:`.BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) +- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) +- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`) +- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider tz-aware and tz-naive datetimes as always different (:issue:`35728`) +- Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) +- Bug in :class:`Period` constructor now correctly handles nanoseconds in the ``value`` argument (:issue:`34621` and :issue:`17053`) + +Timedelta +^^^^^^^^^ +- Bug in :class:`.TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`) +- Bug in parsing of ISO 8601 durations in :class:`Timedelta` and :func:`to_datetime` (:issue:`29773`, :issue:`36204`) +- Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`) +- Bug in :class:`Timedelta` incorrectly truncating to sub-second portion of a string input when it has precision higher than nanoseconds (:issue:`36738`) + +Timezones +^^^^^^^^^ + +- Bug in :func:`date_range` was raising ``AmbiguousTimeError`` for valid input with ``ambiguous=False`` (:issue:`35297`) +- Bug in :meth:`Timestamp.replace` was losing fold information (:issue:`37610`) + + +Numeric +^^^^^^^ +- Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`) +- Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`) +- Bug in :meth:`Series.equals` where a ``ValueError`` was raised when NumPy arrays were compared to scalars (:issue:`35267`) +- Bug in :class:`Series` where two Series each have a :class:`.DatetimeIndex` with different time zones having those indexes incorrectly changed when performing arithmetic operations (:issue:`33671`) +- Bug in :mod:`pandas.testing` module functions when used with ``check_exact=False`` on complex numeric types (:issue:`28235`) +- Bug in :meth:`DataFrame.__rmatmul__` error handling reporting transposed shapes (:issue:`21581`) +- Bug in :class:`Series` flex arithmetic methods where the result when operating with a ``list``, ``tuple`` or ``np.ndarray`` would have an incorrect name (:issue:`36760`) +- Bug in :class:`.IntegerArray` multiplication with ``timedelta`` and ``np.timedelta64`` objects (:issue:`36870`) +- Bug in :class:`MultiIndex` comparison with tuple incorrectly treating tuple as array-like (:issue:`21517`) +- Bug in :meth:`DataFrame.diff` with ``datetime64`` dtypes including ``NaT`` values failing to fill ``NaT`` results correctly (:issue:`32441`) +- Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`) +- Bug in :class:`.IntervalArray` comparisons with :class:`Series` not returning Series (:issue:`36908`) +- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`) +- Bug in :meth:`DataFrame.std` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`) +- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`) +- Bug in :meth:`DataFrame.idxmax` and :meth:`DataFrame.idxmin` with mixed dtypes incorrectly raising ``TypeError`` (:issue:`38195`) + +Conversion +^^^^^^^^^^ + +- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetime-like columns (:issue:`21256`) +- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values (:issue:`37626`) + +Strings +^^^^^^^ +- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`) +- Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype Series containing only numeric strings and ``NA`` (:issue:`37262`) + +Interval +^^^^^^^^ + +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dtypes (:issue:`34871`) +- Bug in :meth:`IntervalIndex.take` with negative indices and ``fill_value=None`` (:issue:`37330`) +- Bug in :meth:`IntervalIndex.putmask` with datetime-like dtype incorrectly casting to object dtype (:issue:`37968`) +- Bug in :meth:`IntervalArray.astype` incorrectly dropping dtype information with a :class:`CategoricalDtype` object (:issue:`37984`) + +Indexing +^^^^^^^^ + +- Bug in :meth:`PeriodIndex.get_loc` incorrectly raising ``ValueError`` on non-datelike strings instead of ``KeyError``, causing similar errors in :meth:`Series.__getitem__`, :meth:`Series.__contains__`, and :meth:`Series.loc.__getitem__` (:issue:`34240`) +- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order (:issue:`35584`) +- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where ``int64`` arrays are returned instead of ``intp`` (:issue:`36359`) +- Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result (:issue:`32334`) +- Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`) +- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`) +- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`) +- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` and a level named ``"0"`` (:issue:`37194`) +- Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) +- Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) +- Bug in :meth:`DataFrame.loc` returning empty result when indexer is a slice with negative step size (:issue:`38071`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when the index was of ``object`` dtype and the given numeric label was in the index (:issue:`26491`) +- Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from a :class:`MultiIndex` (:issue:`27104`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a list-like indexer containing NA values (:issue:`37722`) +- Bug in :meth:`DataFrame.loc.__setitem__` expanding an empty :class:`DataFrame` with mixed dtypes (:issue:`37932`) +- Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) +- Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty DataFrame with ``tolerance`` not ``None`` or ``method="nearest"`` (:issue:`27315`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using list-like indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) +- Bug on inserting a boolean label into a :class:`DataFrame` with a numeric :class:`Index` columns incorrectly casting to integer (:issue:`36319`) +- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) +- Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`) +- Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) +- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) +- Bug in :meth:`Series.at` returning :class:`Series` with one element instead of scalar when index is a :class:`MultiIndex` with one level (:issue:`38053`) +- Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`) +- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`) +- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) +- Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`) +- Bug in :meth:`Series.loc` and :meth:`Series.iloc` raising ``ValueError`` when inserting a list-like ``np.array``, ``list`` or ``tuple`` in an ``object`` Series of equal length (:issue:`37748`, :issue:`37486`) +- Bug in :meth:`Series.loc` and :meth:`Series.iloc` setting all the values of an ``object`` Series with those of a list-like ``ExtensionArray`` instead of inserting it (:issue:`38271`) + +Missing +^^^^^^^ + +- Bug in :meth:`.SeriesGroupBy.transform` now correctly handles missing values for ``dropna=False`` (:issue:`35014`) +- Bug in :meth:`Series.nunique` with ``dropna=True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and ``backfill`` (:issue:`31048`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) +- Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) +- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`) +- Bug in :meth:`MultiIndex.drop` dropped ``NaN`` values when non existing key was given as input (:issue:`18853`) +- Bug in :meth:`MultiIndex.drop` dropping more values than expected when index has duplicates and is not sorted (:issue:`33494`) + +I/O +^^^ + +- :func:`read_sas` no longer leaks resources on failure (:issue:`35566`) +- Bug in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` caused a ``ValueError`` when it was called with a filename in combination with ``mode`` containing a ``b`` (:issue:`35058`) +- Bug in :meth:`read_csv` with ``float_precision='round_trip'`` did not handle ``decimal`` and ``thousands`` parameters (:issue:`35365`) +- :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`) +- :meth:`to_csv` passes compression arguments for ``'gzip'`` always to ``gzip.GzipFile`` (:issue:`28103`) +- :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue:`35058`) +- :meth:`to_csv` and :meth:`read_csv` did not honor ``compression`` and ``encoding`` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, :issue:`32392`) +- :meth:`DataFrame.to_pickle`, :meth:`Series.to_pickle`, and :meth:`read_pickle` did not support compression for file-objects (:issue:`26237`, :issue:`29054`, :issue:`29570`) +- Bug in :func:`LongTableBuilder.middle_separator` was duplicating LaTeX longtable entries in the List of Tables of a LaTeX document (:issue:`34360`) +- Bug in :meth:`read_csv` with ``engine='python'`` truncating data if multiple items present in first row and first element started with BOM (:issue:`36343`) +- Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`) +- Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`) +- Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`) +- Bug in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when used with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`) +- Bug in :meth:`read_parquet` with fixed offset time zones. String representation of time zones was not recognized (:issue:`35997`, :issue:`36004`) +- Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`) +- Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) +- Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty DataFrame with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :class:`HDFStore` was dropping time zone information when exporting a Series with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`) +- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) +- Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) +- :meth:`DataFrame.to_excel`, :meth:`Series.to_excel`, :meth:`DataFrame.to_markdown`, and :meth:`Series.to_markdown` now support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) +- Bug in :func:`read_fwf` with ``skip_blank_lines=True`` was not skipping blank lines (:issue:`37758`) +- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`) +- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other ``read_*`` functions (:issue:`37909`) +- :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) +- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`, :issue:`37983`) +- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`) +- Bug in the conversion of a sliced ``pyarrow.Table`` with missing values to a DataFrame (:issue:`38525`) +- Bug in :func:`read_sql_table` raising a ``sqlalchemy.exc.OperationalError`` when column names contained a percentage sign (:issue:`37517`) + +Period +^^^^^^ + +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Period` dtypes would be converted to object dtypes (:issue:`34871`) + +Plotting +^^^^^^^^ + +- Bug in :meth:`DataFrame.plot` was rotating xticklabels when ``subplots=True``, even if the x-axis wasn't an irregular time series (:issue:`29460`) +- Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes caused a ``ValueError`` (:issue:`21003`) +- Bug in :meth:`DataFrame.plot.bar` and :meth:`Series.plot.bar` where ticks positions were assigned by value order instead of using the actual value for numeric or a smart ordering for string (:issue:`26186`, :issue:`11465`). This fix has been reverted in pandas 1.2.1, see :doc:`v1.2.1` +- Twinned axes were losing their tick labels which should only happen to all but the last row or column of 'externally' shared axes (:issue:`33819`) +- Bug in :meth:`Series.plot` and :meth:`DataFrame.plot` was throwing a :exc:`ValueError` when the Series or DataFrame was + indexed by a :class:`.TimedeltaIndex` with a fixed frequency and the x-axis lower limit was greater than the upper limit (:issue:`37454`) +- Bug in :meth:`.DataFrameGroupBy.boxplot` when ``subplots=False`` would raise a ``KeyError`` (:issue:`16748`) +- Bug in :meth:`DataFrame.plot` and :meth:`Series.plot` was overwriting matplotlib's shared y axes behavior when no ``sharey`` parameter was passed (:issue:`37942`) +- Bug in :meth:`DataFrame.plot` was raising a ``TypeError`` with ``ExtensionDtype`` columns (:issue:`32073`) + +Styler +^^^^^^ + +- Bug in :meth:`Styler.render` HTML was generated incorrectly because of formatting error in ``rowspan`` attribute, it now matches with w3 syntax (:issue:`38234`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`.DataFrameGroupBy.count` and :meth:`SeriesGroupBy.sum` returning ``NaN`` for missing categories when grouped on multiple ``Categoricals``. Now returning ``0`` (:issue:`35028`) +- Bug in :meth:`.DataFrameGroupBy.apply` that would sometimes throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`) +- Bug in :meth:`DataFrame.resample` that would throw a ``ValueError`` when resampling from ``"D"`` to ``"24H"`` over a transition into daylight savings time (DST) (:issue:`35219`) +- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising a ``TypeError`` (:issue:`35325`) +- Bug in :meth:`.DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply`` (:issue:`34656`) +- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values (:issue:`9959`) +- Bug in :meth:`.DataFrameGroupBy.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) +- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) +- Bug in :meth:`.DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) +- Bug in :meth:`.Rolling.sum` returned wrong values when dtypes where mixed between float and integer and ``axis=1`` (:issue:`20649`, :issue:`35596`) +- Bug in :meth:`.Rolling.count` returned ``np.nan`` with :class:`~pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in the window (:issue:`35579`) +- Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) +- Bug in :meth:`.DataFrameGroupBy.ffill` and :meth:`.DataFrameGroupBy.bfill` where a ``NaN`` group would return filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`) +- Bug in :meth:`.RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`) +- Bug in :meth:`.DataFrameGroupBy.rolling` returning wrong values with partial centered window (:issue:`36040`) +- Bug in :meth:`.DataFrameGroupBy.rolling` returned wrong values with time aware window containing ``NaN``. Raises ``ValueError`` because windows are not monotonic now (:issue:`34617`) +- Bug in :meth:`.Rolling.__iter__` where a ``ValueError`` was not raised when ``min_periods`` was larger than ``window`` (:issue:`37156`) +- Using :meth:`.Rolling.var` instead of :meth:`.Rolling.std` avoids numerical issues for :meth:`.Rolling.corr` when :meth:`.Rolling.var` is still within floating point precision while :meth:`.Rolling.std` is not (:issue:`31286`) +- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.Resampler.quantile` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`) +- Bug in :meth:`.Rolling.median` and :meth:`.Rolling.quantile` returned wrong values for :class:`.BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`) +- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) +- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) +- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.resample` using ``.agg`` with sum produced different result than just calling ``.sum`` (:issue:`33548`) +- Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`) +- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`) +- Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`) + +Reshaping +^^^^^^^^^ + +- Bug in :meth:`DataFrame.crosstab` was returning incorrect results on inputs with duplicate row names, duplicate column names or duplicate names between row and column labels (:issue:`22529`) +- Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) +- Bug in :func:`concat` and :class:`DataFrame` constructor where input index names are not preserved in some cases (:issue:`13475`) +- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) +- Bug in :meth:`DataFrame.stack` where an empty DataFrame.stack would raise an error (:issue:`36113`). Now returning an empty Series with empty MultiIndex. +- Bug in :meth:`Series.unstack`. Now a Series with single level of Index trying to unstack would raise a ``ValueError`` (:issue:`36113`) +- Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) +- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was a dictionary (:issue:`35811`) +- Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns are both multiindexed (:issue:`36360`) +- Bug in :meth:`DataFrame.pivot` modified ``index`` argument when ``columns`` was passed but ``values`` was not (:issue:`37635`) +- Bug in :meth:`DataFrame.join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) +- Bug in :meth:`DataFrame.combine_first` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) +- Fixed regression in :func:`merge` on merging :class:`.DatetimeIndex` with empty DataFrame (:issue:`36895`) +- Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) +- Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) +- Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) +- Bug in :func:`merge_ordered` returned wrong join result when length of ``left_by`` or ``right_by`` equals to the rows of ``left`` or ``right`` (:issue:`38166`) +- Bug in :func:`merge_ordered` didn't raise when elements in ``left_by`` or ``right_by`` not exist in ``left`` columns or ``right`` columns (:issue:`38167`) +- Bug in :func:`DataFrame.drop_duplicates` not validating bool dtype for ``ignore_index`` keyword (:issue:`38274`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Fixed bug where :class:`DataFrame` column set to scalar extension type via a dict instantiation was considered an object type rather than the extension type (:issue:`35965`) +- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`28488`) +- Fixed bug when applying a NumPy ufunc with multiple outputs to an :class:`.IntegerArray` returning ``None`` (:issue:`36913`) +- Fixed an inconsistency in :class:`.PeriodArray`'s ``__init__`` signature to those of :class:`.DatetimeArray` and :class:`.TimedeltaArray` (:issue:`37289`) +- Reductions for :class:`.BooleanArray`, :class:`.Categorical`, :class:`.DatetimeArray`, :class:`.FloatingArray`, :class:`.IntegerArray`, :class:`.PeriodArray`, :class:`.TimedeltaArray`, and :class:`.PandasArray` are now keyword-only methods (:issue:`37541`) +- Fixed a bug where a ``TypeError`` was wrongly raised if a membership check was made on an ``ExtensionArray`` containing nan-like values (:issue:`37867`) + +Other +^^^^^ + +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising an ``AssertionError`` instead of a ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) +- Fixed metadata propagation in :meth:`Series.abs` and ufuncs called on Series and DataFrames (:issue:`28283`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly casting from ``PeriodDtype`` to object dtype (:issue:`34871`) +- Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`) +- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`, :issue:`37381`) +- Fixed metadata propagation when selecting columns with ``DataFrame.__getitem__`` (:issue:`28283`) +- Bug in :meth:`Index.intersection` with non-:class:`Index` failing to set the correct name on the returned :class:`Index` (:issue:`38111`) +- Bug in :meth:`RangeIndex.intersection` failing to set the correct name on the returned :class:`Index` in some corner cases (:issue:`38197`) +- Bug in :meth:`Index.difference` failing to set the correct name on the returned :class:`Index` in some corner cases (:issue:`38268`) +- Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`) +- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`) +- Bug in :meth:`IntervalIndex.union` returning an incorrectly-typed :class:`Index` when empty (:issue:`38282`) +- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`) +- Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`) +- Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`) +- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) +- Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) +- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`) +- Fixed regression in logical operators raising ``ValueError`` when columns of :class:`DataFrame` are a :class:`CategoricalIndex` with unused categories (:issue:`38367`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.5..v1.2.0 diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst new file mode 100644 index 00000000..34e28eab --- /dev/null +++ b/doc/source/whatsnew/v1.2.1.rst @@ -0,0 +1,153 @@ +.. _whatsnew_121: + +What's new in 1.2.1 (January 20, 2021) +-------------------------------------- + +These are the changes in pandas 1.2.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_121.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`) +- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamReaderWriter`` in binary mode instead of in text mode (:issue:`39247`) +- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`) +- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) +- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occurred (:issue:`39202`) +- Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`) +- Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`) +- Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`) +- Fixed regression in :meth:`~DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) +- Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) +- Fixed regression in :meth:`.DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`) +- Fixed regression in :meth:`DataFrame.groupby` when aggregating an ``ExtensionDType`` that could fail for non-numeric values (:issue:`38980`) +- Fixed regression in :meth:`.Rolling.skew` and :meth:`.Rolling.kurt` modifying the object inplace (:issue:`38908`) +- Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) +- Fixed regression in :meth:`DataFrame.apply` with ``axis=1`` using str accessor in apply function (:issue:`38979`) +- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`) +- Fixed regression in :meth:`Series.fillna` that raised ``RecursionError`` with ``datetime64[ns, UTC]`` dtype (:issue:`38851`) +- Fixed regression in comparisons between ``NaT`` and ``datetime.date`` objects incorrectly returning ``True`` (:issue:`39151`) +- Fixed regression in calling NumPy :func:`~numpy.ufunc.accumulate` ufuncs on DataFrames, e.g. ``np.maximum.accumulate(df)`` (:issue:`39259`) +- Fixed regression in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) +- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`) +- Fixed regression in :func:`pandas.testing.assert_frame_equal` raising ``TypeError`` with ``check_like=True`` when :class:`Index` or columns have mixed dtype (:issue:`39168`) + +We have reverted a commit that resulted in several plotting related regressions in pandas 1.2.0 (:issue:`38969`, :issue:`38736`, :issue:`38865`, :issue:`38947` and :issue:`39126`). +As a result, bugs reported as fixed in pandas 1.2.0 related to inconsistent tick labeling in bar plots are again present (:issue:`26186` and :issue:`11465`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_121.ufunc_deprecation: + +Calling NumPy ufuncs on non-aligned DataFrames +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before pandas 1.2.0, calling a NumPy ufunc on non-aligned DataFrames (or +DataFrame / Series combination) would ignore the indices, only match +the inputs by shape, and use the index/columns of the first DataFrame for +the result: + +.. code-block:: ipython + + In [1]: df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[0, 1]) + In [2]: df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[1, 2]) + In [3]: df1 + Out[3]: + a b + 0 1 3 + 1 2 4 + In [4]: df2 + Out[4]: + a b + 1 1 3 + 2 2 4 + + In [5]: np.add(df1, df2) + Out[5]: + a b + 0 2 6 + 1 4 8 + +This contrasts with how other pandas operations work, which first align +the inputs: + +.. code-block:: ipython + + In [6]: df1 + df2 + Out[6]: + a b + 0 NaN NaN + 1 3.0 7.0 + 2 NaN NaN + +In pandas 1.2.0, we refactored how NumPy ufuncs are called on DataFrames, and +this started to align the inputs first (:issue:`39184`), as happens in other +pandas operations and as it happens for ufuncs called on Series objects. + +For pandas 1.2.1, we restored the previous behaviour to avoid a breaking +change, but the above example of ``np.add(df1, df2)`` with non-aligned inputs +will now to raise a warning, and a future pandas 2.0 release will start +aligning the inputs first (:issue:`39184`). Calling a NumPy ufunc on Series +objects (eg ``np.add(s1, s2)``) already aligns and continues to do so. + +To avoid the warning and keep the current behaviour of ignoring the indices, +convert one of the arguments to a NumPy array: + +.. code-block:: ipython + + In [7]: np.add(df1, np.asarray(df2)) + Out[7]: + a b + 0 2 6 + 1 4 8 + +To obtain the future behaviour and silence the warning, you can align manually +before passing the arguments to the ufunc: + +.. code-block:: ipython + + In [8]: df1, df2 = df1.align(df2) + In [9]: np.add(df1, df2) + Out[9]: + a b + 0 NaN NaN + 1 3.0 7.0 + 2 NaN NaN + +.. --------------------------------------------------------------------------- + +.. _whatsnew_121.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`) +- Bug in :func:`read_csv` not closing an opened file handle when a ``csv.Error`` or ``UnicodeDecodeError`` occurred while initializing (:issue:`39024`) +- Bug in :func:`pandas.testing.assert_index_equal` raising ``TypeError`` with ``check_order=False`` when :class:`Index` has mixed dtype (:issue:`39168`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_121.other: + +Other +~~~~~ + +- The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`) +- Bumped minimum fastparquet version to 0.4.0 to avoid ``AttributeError`` from numba (:issue:`38344`) +- Bumped minimum pymysql version to 0.8.1 to avoid test failures (:issue:`38344`) +- Fixed build failure on MacOS 11 in Python 3.9.1 (:issue:`38766`) +- Added reference to backwards incompatible ``check_freq`` arg of :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal` in :ref:`pandas 1.1.0 what's new ` (:issue:`34050`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_121.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.0..v1.2.1 diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst new file mode 100644 index 00000000..1a9204bc --- /dev/null +++ b/doc/source/whatsnew/v1.2.2.rst @@ -0,0 +1,49 @@ +.. _whatsnew_122: + +What's new in 1.2.2 (February 09, 2021) +--------------------------------------- + +These are the changes in pandas 1.2.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_122.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :func:`read_excel` that caused it to raise ``AttributeError`` when checking version of older xlrd versions (:issue:`38955`) +- Fixed regression in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`) +- Fixed regression in :meth:`DataFrame.astype` and :meth:`Series.astype` not casting to bytes dtype (:issue:`39474`) +- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) +- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) +- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) +- Fixed regression in :meth:`Categorical.astype` casting to incorrect dtype when ``np.int32`` is passed to dtype argument (:issue:`39402`) +- Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`) +- Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`) +- Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN or numeric object dtype column (:issue:`39329`) +- Fixed regression in :meth:`.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`) +- Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_122.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`) +- Fixed bug in :func:`pandas.read_excel` producing incorrect results when the engine ``openpyxl`` is used and the excel file is missing or has incorrect dimension information; the fix requires ``openpyxl`` >= 3.0.0, prior versions may still fail (:issue:`38956`, :issue:`39001`) +- Fixed bug in :func:`pandas.read_excel` sometimes producing a ``DataFrame`` with trailing rows of ``np.nan`` when the engine ``openpyxl`` is used (:issue:`39181`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_122.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.1..v1.2.2 diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst new file mode 100644 index 00000000..dec2d061 --- /dev/null +++ b/doc/source/whatsnew/v1.2.3.rst @@ -0,0 +1,32 @@ +.. _whatsnew_123: + +What's new in 1.2.3 (March 02, 2021) +------------------------------------ + +These are the changes in pandas 1.2.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_123.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`) +- Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`) +- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`) +- Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`) +- Fixed regression in :meth:`Series.sort_index` and :meth:`DataFrame.sort_index`, which exited with an ungraceful error when having kwarg ``ascending=None`` passed. Passing ``ascending=None`` is still considered invalid, and the improved error message suggests a proper usage (``ascending`` must be a boolean or a list-like of boolean) (:issue:`39434`) +- Fixed regression in :meth:`DataFrame.transform` and :meth:`Series.transform` giving incorrect column labels when passed a dictionary with a mix of list and non-list values (:issue:`40018`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_123.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.2..v1.2.3 diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst new file mode 100644 index 00000000..433ee375 --- /dev/null +++ b/doc/source/whatsnew/v1.2.4.rst @@ -0,0 +1,33 @@ +.. _whatsnew_124: + +What's new in 1.2.4 (April 12, 2021) +------------------------------------ + +These are the changes in pandas 1.2.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_124.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`) +- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) +- Fixed regression in (in)equality comparison of ``pd.NaT`` with a non-datetimelike numpy array returning a scalar instead of an array (:issue:`40722`) +- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) +- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) +- Fixed regression in repr of floats in an ``object`` column not respecting ``float_format`` when printed in the console or outputted through :meth:`DataFrame.to_string`, :meth:`DataFrame.to_html`, and :meth:`DataFrame.to_latex` (:issue:`40024`) +- Fixed regression in NumPy ufuncs such as ``np.add`` not passing through all arguments for :class:`DataFrame` (:issue:`40662`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_124.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.3..v1.2.4 diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst new file mode 100644 index 00000000..d3ceb2b9 --- /dev/null +++ b/doc/source/whatsnew/v1.2.5.rst @@ -0,0 +1,31 @@ +.. _whatsnew_125: + +What's new in 1.2.5 (June 22, 2021) +----------------------------------- + +These are the changes in pandas 1.2.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_125.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`concat` between two :class:`DataFrame` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`) +- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`) +- Fixed regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`) +- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`) +- Fixed regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`) +- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_125.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.4..v1.2.5|HEAD diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst new file mode 100644 index 00000000..a392aeb5 --- /dev/null +++ b/doc/source/whatsnew/v1.3.0.rst @@ -0,0 +1,1236 @@ +.. _whatsnew_130: + +What's new in 1.3.0 (July 2, 2021) +---------------------------------- + +These are the changes in pandas 1.3.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. warning:: + + When reading new Excel 2007+ (``.xlsx``) files, the default argument + ``engine=None`` to :func:`read_excel` will now result in using the + `openpyxl `_ engine in all cases + when the option :attr:`io.excel.xlsx.reader` is set to ``"auto"``. + Previously, some cases would use the + `xlrd `_ engine instead. See + :ref:`What's new 1.2.0 ` for background on this change. + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_130.enhancements.read_csv_json_http_headers: + +Custom HTTP(s) headers when reading csv or json files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When reading from a remote URL that is not handled by fsspec (e.g. HTTP and +HTTPS) the dictionary passed to ``storage_options`` will be used to create the +headers included in the request. This can be used to control the User-Agent +header or send other custom headers (:issue:`36688`). +For example: + +.. ipython:: python + + headers = {"User-Agent": "pandas"} + df = pd.read_csv( + "https://download.bls.gov/pub/time.series/cu/cu.item", + sep="\t", + storage_options=headers + ) + +.. _whatsnew_130.enhancements.read_to_xml: + +Read and write XML documents +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We added I/O support to read and render shallow versions of `XML`_ documents with +:func:`read_xml` and :meth:`DataFrame.to_xml`. Using `lxml`_ as parser, +both XPath 1.0 and XSLT 1.0 are available. (:issue:`27554`) + +.. _XML: https://www.w3.org/standards/xml/core +.. _lxml: https://lxml.de + +.. code-block:: ipython + + In [1]: xml = """ + ...: + ...: + ...: square + ...: 360 + ...: 4.0 + ...: + ...: + ...: circle + ...: 360 + ...: + ...: + ...: + ...: triangle + ...: 180 + ...: 3.0 + ...: + ...: """ + + In [2]: df = pd.read_xml(xml) + In [3]: df + Out[3]: + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + + In [4]: df.to_xml() + Out[4]: + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + + + +For more, see :ref:`io.xml` in the user guide on IO tools. + +.. _whatsnew_130.enhancements.styler: + +Styler enhancements +^^^^^^^^^^^^^^^^^^^ + +We provided some focused development on :class:`.Styler`. See also the `Styler documentation <../user_guide/style.ipynb>`_ +which has been revised and improved (:issue:`39720`, :issue:`39317`, :issue:`40493`). + + - The method :meth:`.Styler.set_table_styles` can now accept more natural CSS language for arguments, such as ``'color:red;'`` instead of ``[('color', 'red')]`` (:issue:`39563`) + - The methods :meth:`.Styler.highlight_null`, :meth:`.Styler.highlight_min`, and :meth:`.Styler.highlight_max` now allow custom CSS highlighting instead of the default background coloring (:issue:`40242`) + - :meth:`.Styler.apply` now accepts functions that return an ``ndarray`` when ``axis=None``, making it now consistent with the ``axis=0`` and ``axis=1`` behavior (:issue:`39359`) + - When incorrectly formatted CSS is given via :meth:`.Styler.apply` or :meth:`.Styler.applymap`, an error is now raised upon rendering (:issue:`39660`) + - :meth:`.Styler.format` now accepts the keyword argument ``escape`` for optional HTML and LaTeX escaping (:issue:`40388`, :issue:`41619`) + - :meth:`.Styler.background_gradient` has gained the argument ``gmap`` to supply a specific gradient map for shading (:issue:`22727`) + - :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`) + - Added the method :meth:`.Styler.highlight_between` (:issue:`39821`) + - Added the method :meth:`.Styler.highlight_quantile` (:issue:`40926`) + - Added the method :meth:`.Styler.text_gradient` (:issue:`41098`) + - Added the method :meth:`.Styler.set_tooltips` to allow hover tooltips; this can be used enhance interactive displays (:issue:`21266`, :issue:`40284`) + - Added the parameter ``precision`` to the method :meth:`.Styler.format` to control the display of floating point numbers (:issue:`40134`) + - :class:`.Styler` rendered HTML output now follows the `w3 HTML Style Guide `_ (:issue:`39626`) + - Many features of the :class:`.Styler` class are now either partially or fully usable on a DataFrame with a non-unique indexes or columns (:issue:`41143`) + - One has greater control of the display through separate sparsification of the index or columns using the :ref:`new styler options `, which are also usable via :func:`option_context` (:issue:`41142`) + - Added the option ``styler.render.max_elements`` to avoid browser overload when styling large DataFrames (:issue:`40712`) + - Added the method :meth:`.Styler.to_latex` (:issue:`21673`, :issue:`42320`), which also allows some limited CSS conversion (:issue:`40731`) + - Added the method :meth:`.Styler.to_html` (:issue:`13379`) + - Added the method :meth:`.Styler.set_sticky` to make index and column headers permanently visible in scrolling HTML frames (:issue:`29072`) + +.. _whatsnew_130.enhancements.dataframe_honors_copy_with_dict: + +DataFrame constructor honors ``copy=False`` with dict +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When passing a dictionary to :class:`DataFrame` with ``copy=False``, +a copy will no longer be made (:issue:`32960`). + +.. ipython:: python + + arr = np.array([1, 2, 3]) + df = pd.DataFrame({"A": arr, "B": arr.copy()}, copy=False) + df + +``df["A"]`` remains a view on ``arr``: + +.. ipython:: python + + arr[0] = 0 + assert df.iloc[0, 0] == 0 + +The default behavior when not passing ``copy`` will remain unchanged, i.e. +a copy will be made. + +.. _whatsnew_130.enhancements.arrow_string: + +PyArrow backed string data type +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've enhanced the :class:`StringDtype`, an extension type dedicated to string data. +(:issue:`39908`) + +It is now possible to specify a ``storage`` keyword option to :class:`StringDtype`. Use +pandas options or specify the dtype using ``dtype='string[pyarrow]'`` to allow the +StringArray to be backed by a PyArrow array instead of a NumPy array of Python objects. + +The PyArrow backed StringArray requires pyarrow 1.0.0 or greater to be installed. + +.. warning:: + + ``string[pyarrow]`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +.. ipython:: python + + pd.Series(['abc', None, 'def'], dtype=pd.StringDtype(storage="pyarrow")) + +You can use the alias ``"string[pyarrow]"`` as well. + +.. ipython:: python + + s = pd.Series(['abc', None, 'def'], dtype="string[pyarrow]") + s + +You can also create a PyArrow backed string array using pandas options. + +.. ipython:: python + + with pd.option_context("string_storage", "pyarrow"): + s = pd.Series(['abc', None, 'def'], dtype="string") + s + +The usual string accessor methods work. Where appropriate, the return type of the Series +or columns of a DataFrame will also have string dtype. + +.. ipython:: python + + s.str.upper() + s.str.split('b', expand=True).dtypes + +String accessor methods returning integers will return a value with :class:`Int64Dtype` + +.. ipython:: python + + s.str.count("a") + +.. _whatsnew_130.enhancements.centered_datetimelike_rolling_window: + +Centered datetime-like rolling windows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing rolling calculations on DataFrame and Series +objects with a datetime-like index, a centered datetime-like window can now be +used (:issue:`38780`). +For example: + +.. ipython:: python + + df = pd.DataFrame( + {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") + ) + df + df.rolling("2D", center=True).mean() + + +.. _whatsnew_130.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, and :meth:`Series.expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`15095`, :issue:`38995`) +- :class:`.ExponentialMovingWindow` now support a ``online`` method that can perform ``mean`` calculations in an online fashion. See :ref:`Window Overview ` (:issue:`41673`) +- Added :meth:`MultiIndex.dtypes` (:issue:`37062`) +- Added ``end`` and ``end_day`` options for the ``origin`` argument in :meth:`DataFrame.resample` (:issue:`37804`) +- Improved error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) +- Improved consistency of error messages when passing an invalid ``win_type`` argument in :ref:`Window methods ` (:issue:`15969`) +- :func:`read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) +- :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when ``usecols`` is not specified (:issue:`21768`) +- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) +- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) +- Added support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`) +- :func:`read_excel` can now auto-detect .xlsb files and older .xls files (:issue:`35416`, :issue:`41225`) +- :class:`ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behavior of append mode when writing to existing sheets (:issue:`40230`) +- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.ExponentialMovingWindow.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support `Numba `_ execution with the ``engine`` keyword (:issue:`38895`, :issue:`41267`) +- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) +- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) +- :meth:`DataFrame.applymap` can now accept kwargs to pass on to the user-provided ``func`` (:issue:`39987`) +- Passing a :class:`DataFrame` indexer to ``iloc`` is now disallowed for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` (:issue:`39004`) +- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`) +- :meth:`DataFrame.plot.scatter` can now accept a categorical column for the argument ``c`` (:issue:`12380`, :issue:`31357`) +- :meth:`Series.loc` now raises a helpful error message when the Series has a :class:`MultiIndex` and the indexer has too many dimensions (:issue:`35349`) +- :func:`read_stata` now supports reading data from compressed files (:issue:`26599`) +- Added support for parsing ``ISO 8601``-like timestamps with negative signs to :class:`Timedelta` (:issue:`37172`) +- Added support for unary operators in :class:`FloatingArray` (:issue:`38749`) +- :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`) +- :meth:`Series.round` and :meth:`DataFrame.round` now work with nullable integer and floating dtypes (:issue:`38844`) +- :meth:`read_csv` and :meth:`read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`) +- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`) +- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`) +- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising with ``object`` data containing ``pd.NA`` even when ``skipna=True`` (:issue:`37501`) +- :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`) +- Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`) +- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`) +- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`) +- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`) +- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`) +- :meth:`Series.between` can now accept ``left`` or ``right`` as arguments to ``inclusive`` to include only the left or right boundary (:issue:`40245`) +- :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`) +- :meth:`DataFrame.sample` now accepts the ``ignore_index`` argument to reset the index after sampling, similar to :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.sort_values` (:issue:`38581`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_130.notable_bug_fixes.categorical_unique_maintains_dtype: + +``Categorical.unique`` now always maintains same dtype as original +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when calling :meth:`Categorical.unique` with categorical data, unused categories in the new array +would be removed, making the dtype of the new array different than the +original (:issue:`18291`) + +As an example of this, given: + +.. ipython:: python + + dtype = pd.CategoricalDtype(['bad', 'neutral', 'good'], ordered=True) + cat = pd.Categorical(['good', 'good', 'bad', 'bad'], dtype=dtype) + original = pd.Series(cat) + unique = original.unique() + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: unique + ['good', 'bad'] + Categories (2, object): ['bad' < 'good'] + In [2]: original.dtype == unique.dtype + False + +*New behavior*: + +.. ipython:: python + + unique + original.dtype == unique.dtype + +.. _whatsnew_130.notable_bug_fixes.combine_first_preserves_dtype: + +Preserve dtypes in :meth:`DataFrame.combine_first` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.combine_first` will now preserve dtypes (:issue:`7509`) + +.. ipython:: python + + df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2]) + df1 + df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4]) + df2 + combined = df1.combine_first(df2) + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: combined.dtypes + Out[2]: + A float64 + B float64 + C float64 + dtype: object + +*New behavior*: + +.. ipython:: python + + combined.dtypes + +.. _whatsnew_130.notable_bug_fixes.groupby_preserves_dtype: + +Groupby methods agg and transform no longer changes return dtype for callables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously the methods :meth:`.DataFrameGroupBy.aggregate`, +:meth:`.SeriesGroupBy.aggregate`, :meth:`.DataFrameGroupBy.transform`, and +:meth:`.SeriesGroupBy.transform` might cast the result dtype when the argument ``func`` +is callable, possibly leading to undesirable results (:issue:`21240`). The cast would +occur if the result is numeric and casting back to the input dtype does not change any +values as measured by ``np.allclose``. Now no such casting occurs. + +.. ipython:: python + + df = pd.DataFrame({'key': [1, 1], 'a': [True, False], 'b': [True, True]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df.groupby('key').agg(lambda x: x.sum()) + Out[5]: + a b + key + 1 True 2 + +*New behavior*: + +.. ipython:: python + + df.groupby('key').agg(lambda x: x.sum()) + +.. _whatsnew_130.notable_bug_fixes.groupby_reductions_float_result: + +``float`` result for :meth:`.GroupBy.mean`, :meth:`.GroupBy.median`, and :meth:`.GroupBy.var` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, these methods could result in different dtypes depending on the input values. +Now, these methods will always return a float dtype. (:issue:`41137`) + +.. ipython:: python + + df = pd.DataFrame({'a': [True], 'b': [1], 'c': [1.0]}) + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df.groupby(df.index).mean() + Out[5]: + a b c + 0 True 1 1.0 + +*New behavior*: + +.. ipython:: python + + df.groupby(df.index).mean() + +.. _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace: + +Try operating inplace when setting values with ``loc`` and ``iloc`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When setting an entire column using ``loc`` or ``iloc``, pandas will try to +insert the values into the existing data rather than create an entirely new array. + +.. ipython:: python + + df = pd.DataFrame(range(3), columns=["A"], dtype="float64") + values = df.values + new = np.array([5, 6, 7], dtype="int64") + df.loc[[0, 1, 2], "A"] = new + +In both the new and old behavior, the data in ``values`` is overwritten, but in +the old behavior the dtype of ``df["A"]`` changed to ``int64``. + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: df.dtypes + Out[1]: + A int64 + dtype: object + In [2]: np.shares_memory(df["A"].values, new) + Out[2]: False + In [3]: np.shares_memory(df["A"].values, values) + Out[3]: False + +In pandas 1.3.0, ``df`` continues to share data with ``values`` + +*New behavior*: + +.. ipython:: python + + df.dtypes + np.shares_memory(df["A"], new) + np.shares_memory(df["A"], values) + + +.. _whatsnew_130.notable_bug_fixes.setitem_never_inplace: + +Never operate inplace when setting ``frame[keys] = values`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When setting multiple columns using ``frame[keys] = values`` new arrays will +replace pre-existing arrays for these keys, which will *not* be over-written +(:issue:`39510`). As a result, the columns will retain the dtype(s) of ``values``, +never casting to the dtypes of the existing arrays. + +.. ipython:: python + + df = pd.DataFrame(range(3), columns=["A"], dtype="float64") + df[["A"]] = 5 + +In the old behavior, ``5`` was cast to ``float64`` and inserted into the existing +array backing ``df``: + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: df.dtypes + Out[1]: + A float64 + +In the new behavior, we get a new array, and retain an integer-dtyped ``5``: + +*New behavior*: + +.. ipython:: python + + df.dtypes + + +.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting: + +Consistent casting with setting into Boolean Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Setting non-boolean values into a :class:`Series` with ``dtype=bool`` now consistently +casts to ``dtype=object`` (:issue:`38709`) + +.. ipython:: python + + orig = pd.Series([True, False]) + ser = orig.copy() + ser.iloc[1] = np.nan + ser2 = orig.copy() + ser2.iloc[1] = 2.0 + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: ser + Out [1]: + 0 1.0 + 1 NaN + dtype: float64 + + In [2]:ser2 + Out [2]: + 0 True + 1 2.0 + dtype: object + +*New behavior*: + +.. ipython:: python + + ser + ser2 + + +.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column: + +GroupBy.rolling no longer returns grouped-by column in values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The group-by column will now be dropped from the result of a +``groupby.rolling`` operation (:issue:`32262`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: df.groupby("A").rolling(2).sum() + Out[1]: + A B + A + 1 0 NaN NaN + 1 2.0 1.0 + 2 2 NaN NaN + 3 3 NaN NaN + +*New behavior*: + +.. ipython:: python + + df.groupby("A").rolling(2).sum() + +.. _whatsnew_130.notable_bug_fixes.rolling_var_precision: + +Removed artificial truncation in rolling variance and standard deviation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`.Rolling.std` and :meth:`.Rolling.var` will no longer +artificially truncate results that are less than ``~1e-8`` and ``~1e-15`` respectively to +zero (:issue:`37051`, :issue:`40448`, :issue:`39872`). + +However, floating point artifacts may now exist in the results when rolling over larger values. + +.. ipython:: python + + s = pd.Series([7, 5, 5, 5]) + s.rolling(3).var() + +.. _whatsnew_130.notable_bug_fixes.rolling_groupby_multiindex: + +GroupBy.rolling with MultiIndex no longer drops levels in the result +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`GroupBy.rolling` will no longer drop levels of a :class:`DataFrame` +with a :class:`MultiIndex` in the result. This can lead to a perceived duplication of levels in the resulting +:class:`MultiIndex`, but this change restores the behavior that was present in version 1.1.3 (:issue:`38787`, :issue:`38523`). + + +.. ipython:: python + + index = pd.MultiIndex.from_tuples([('idx1', 'idx2')], names=['label1', 'label2']) + df = pd.DataFrame({'a': [1], 'b': [2]}, index=index) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: df.groupby('label1').rolling(1).sum() + Out[1]: + a b + label1 + idx1 1.0 2.0 + +*New behavior*: + +.. ipython:: python + + df.groupby('label1').rolling(1).sum() + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_130.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Some minimum supported versions of dependencies were updated. +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.17.3 | X | X | ++-----------------+-----------------+----------+---------+ +| pytz | 2017.3 | X | | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.7.3 | X | | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.7.0 | | X | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 6.0 | | X | ++-----------------+-----------------+----------+---------+ +| mypy (dev) | 0.812 | | X | ++-----------------+-----------------+----------+---------+ +| setuptools | 38.6.0 | | X | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.4.0 | X | ++-----------------+-----------------+---------+ +| fsspec | 0.7.4 | | ++-----------------+-----------------+---------+ +| gcsfs | 0.6.0 | | ++-----------------+-----------------+---------+ +| lxml | 4.3.0 | | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.3 | | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | | ++-----------------+-----------------+---------+ +| openpyxl | 3.0.0 | X | ++-----------------+-----------------+---------+ +| pyarrow | 0.17.0 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.8.1 | X | ++-----------------+-----------------+---------+ +| pytables | 3.5.1 | | ++-----------------+-----------------+---------+ +| s3fs | 0.4.0 | | ++-----------------+-----------------+---------+ +| scipy | 1.2.0 | | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.3.0 | X | ++-----------------+-----------------+---------+ +| tabulate | 0.8.7 | X | ++-----------------+-----------------+---------+ +| xarray | 0.12.0 | | ++-----------------+-----------------+---------+ +| xlrd | 1.2.0 | | ++-----------------+-----------------+---------+ +| xlsxwriter | 1.0.2 | | ++-----------------+-----------------+---------+ +| xlwt | 1.3.0 | | ++-----------------+-----------------+---------+ +| pandas-gbq | 0.12.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_130.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ +- Partially initialized :class:`CategoricalDtype` objects (i.e. those with ``categories=None``) will no longer compare as equal to fully initialized dtype objects (:issue:`38516`) +- Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`) +- Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc `_ (:issue:`36893`) +- Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`) +- :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`) +- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`) +- :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed. + +.. _whatsnew_130.api_breaking.build: + +Build +^^^^^ +- Documentation in ``.pptx`` and ``.pdf`` formats are no longer included in wheels or source distributions. (:issue:`30741`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. _whatsnew_130.deprecations.nuisance_columns: + +Deprecated dropping nuisance columns in DataFrame reductions and DataFrameGroupBy operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Calling a reduction (e.g. ``.min``, ``.max``, ``.sum``) on a :class:`DataFrame` with +``numeric_only=None`` (the default), columns where the reduction raises a ``TypeError`` +are silently ignored and dropped from the result. + +This behavior is deprecated. In a future version, the ``TypeError`` will be raised, +and users will need to select only valid columns before calling the function. + +For example: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + df + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.prod() + Out[3]: + Out[3]: + A 24 + dtype: int64 + +*Future behavior*: + +.. code-block:: ipython + + In [4]: df.prod() + ... + TypeError: 'DatetimeArray' does not implement reduction 'prod' + + In [5]: df[["A"]].prod() + Out[5]: + A 24 + dtype: int64 + + +Similarly, when applying a function to :class:`DataFrameGroupBy`, columns on which +the function raises ``TypeError`` are currently silently ignored and dropped +from the result. + +This behavior is deprecated. In a future version, the ``TypeError`` +will be raised, and users will need to select only valid columns before calling +the function. + +For example: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + gb = df.groupby([1, 1, 2, 2]) + +*Old behavior*: + +.. code-block:: ipython + + In [4]: gb.prod(numeric_only=False) + Out[4]: + A + 1 2 + 2 12 + +*Future behavior*: + +.. code-block:: ipython + + In [5]: gb.prod(numeric_only=False) + ... + TypeError: datetime64 type does not support prod operations + + In [6]: gb[["A"]].prod(numeric_only=False) + Out[6]: + A + 1 2 + 2 12 + +.. _whatsnew_130.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ +- Deprecated allowing scalars to be passed to the :class:`Categorical` constructor (:issue:`38433`) +- Deprecated constructing :class:`CategoricalIndex` without passing list-like data (:issue:`38944`) +- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`, :issue:`21311`, :issue:`22315`, :issue:`26974`) +- Deprecated the :meth:`astype` method of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to convert to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`). This deprecation was later reverted in pandas 1.4.0. +- Deprecated :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth`, use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`) +- Deprecated keyword ``try_cast`` in :meth:`Series.where`, :meth:`Series.mask`, :meth:`DataFrame.where`, :meth:`DataFrame.mask`; cast results manually if desired (:issue:`38836`) +- Deprecated comparison of :class:`Timestamp` objects with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`) +- Deprecated :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`) +- Deprecated :attr:`Rolling.is_datetimelike` (:issue:`38963`) +- Deprecated :class:`DataFrame` indexer for :meth:`Series.__setitem__` and :meth:`DataFrame.__setitem__` (:issue:`39004`) +- Deprecated :meth:`ExponentialMovingWindow.vol` (:issue:`39220`) +- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) +- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) +- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favor of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`) +- Deprecated :meth:`.Styler.where` in favor of using an alternative formulation with :meth:`Styler.applymap` (:issue:`40821`) +- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`) +- Deprecated arguments ``error_bad_lines`` and ``warn_bad_lines`` in :meth:`read_csv` and :meth:`read_table` in favor of argument ``on_bad_lines`` (:issue:`15122`) +- Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`) +- Deprecated using :func:`merge`, :meth:`DataFrame.merge`, and :meth:`DataFrame.join` on a different number of levels (:issue:`34862`) +- Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) +- Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`) +- Deprecated the ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` and will be removed in a future version (:issue:`37643`) +- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) +- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) +- Deprecated the ``convert_float`` optional argument in :func:`read_excel` and :meth:`ExcelFile.parse` (:issue:`41127`) +- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) +- Deprecated using ``usecols`` with out of bounds indices for :func:`read_csv` with ``engine="c"`` (:issue:`25623`) +- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`) +- Deprecated behavior of :class:`DataFrame` constructor when a ``dtype`` is passed and the data cannot be cast to that dtype. In a future version, this will raise instead of being silently ignored (:issue:`24435`) +- Deprecated the :attr:`Timestamp.freq` attribute. For the properties that use it (``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end``), when you have a ``freq``, use e.g. ``freq.is_month_start(ts)`` (:issue:`15146`) +- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`, :issue:`33401`) +- Deprecated behavior of :class:`Series` construction with large-integer values and small-integer dtype silently overflowing; use ``Series(data).astype(dtype)`` instead (:issue:`41734`) +- Deprecated behavior of :class:`DataFrame` construction with floating data and integer dtype casting even when lossy; in a future version this will remain floating, matching :class:`Series` behavior (:issue:`41770`) +- Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`) +- In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`) +- Deprecated passing lists as ``key`` to :meth:`DataFrame.xs` and :meth:`Series.xs` (:issue:`41760`) +- Deprecated boolean arguments of ``inclusive`` in :meth:`Series.between` to have ``{"left", "right", "neither", "both"}`` as standard argument values (:issue:`40628`) +- Deprecated passing arguments as positional for all of the following, with exceptions noted (:issue:`41485`): + + - :func:`concat` (other than ``objs``) + - :func:`read_csv` (other than ``filepath_or_buffer``) + - :func:`read_table` (other than ``filepath_or_buffer``) + - :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``upper`` and ``lower``) + - :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates`, :meth:`Index.drop_duplicates` and :meth:`MultiIndex.drop_duplicates` + - :meth:`DataFrame.drop` (other than ``labels``) and :meth:`Series.drop` + - :meth:`DataFrame.dropna` and :meth:`Series.dropna` + - :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, and :meth:`Series.bfill` + - :meth:`DataFrame.fillna` and :meth:`Series.fillna` (apart from ``value``) + - :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (other than ``method``) + - :meth:`DataFrame.mask` and :meth:`Series.mask` (other than ``cond`` and ``other``) + - :meth:`DataFrame.reset_index` (other than ``level``) and :meth:`Series.reset_index` + - :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``labels``) + - :meth:`DataFrame.set_index` (other than ``keys``) + - :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` + - :meth:`DataFrame.sort_values` (other than ``by``) and :meth:`Series.sort_values` + - :meth:`DataFrame.where` and :meth:`Series.where` (other than ``cond`` and ``other``) + - :meth:`Index.set_names` and :meth:`MultiIndex.set_names` (except for ``names``) + - :meth:`MultiIndex.codes` (except for ``codes``) + - :meth:`MultiIndex.set_levels` (except for ``levels``) + - :meth:`Resampler.interpolate` (other than ``method``) + + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_130.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`) +- Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`) +- Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`) +- Performance improvement in :meth:`DataFrame.fillna` with ``method="pad"`` or ``method="backfill"`` for nullable floating and nullable integer dtypes (:issue:`39953`) +- Performance improvement in :meth:`DataFrame.corr` for ``method=kendall`` (:issue:`28329`) +- Performance improvement in :meth:`DataFrame.corr` for ``method=spearman`` (:issue:`40956`, :issue:`41885`) +- Performance improvement in :meth:`.Rolling.corr` and :meth:`.Rolling.cov` (:issue:`39388`) +- Performance improvement in :meth:`.RollingGroupby.corr`, :meth:`.ExpandingGroupby.corr`, :meth:`.ExpandingGroupby.corr` and :meth:`.ExpandingGroupby.cov` (:issue:`39591`) +- Performance improvement in :func:`unique` for object data type (:issue:`37615`) +- Performance improvement in :func:`json_normalize` for basic cases (including separators) (:issue:`40035` :issue:`15621`) +- Performance improvement in :class:`.ExpandingGroupby` aggregation methods (:issue:`39664`) +- Performance improvement in :class:`.Styler` where render times are more than 50% reduced and now matches :meth:`DataFrame.to_html` (:issue:`39972` :issue:`39952`, :issue:`40425`) +- The method :meth:`.Styler.set_td_classes` is now as performant as :meth:`.Styler.apply` and :meth:`.Styler.applymap`, and even more so in some cases (:issue:`40453`) +- Performance improvement in :meth:`.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`) +- Performance improvement in :meth:`.GroupBy.apply` when requiring the Python fallback implementation (:issue:`40176`) +- Performance improvement in the conversion of a PyArrow Boolean array to a pandas nullable Boolean array (:issue:`41051`) +- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`) +- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) +- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) +- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`) +- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in :class:`CategoricalIndex` incorrectly failing to raise ``TypeError`` when scalar data is passed (:issue:`38614`) +- Bug in ``CategoricalIndex.reindex`` failed when the :class:`Index` passed was not categorical but whose values were all labels in the category (:issue:`28690`) +- Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) +- Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`) +- Bug in setting categorical values into an object-dtype column in a :class:`DataFrame` (:issue:`39136`) +- Bug in :meth:`DataFrame.reindex` was raising an ``IndexError`` when the new index contained duplicates and the old index was a :class:`CategoricalIndex` (:issue:`38906`) +- Bug in :meth:`Categorical.fillna` with a tuple-like category raising ``NotImplementedError`` instead of ``ValueError`` when filling with a non-category tuple (:issue:`41914`) + +Datetimelike +^^^^^^^^^^^^ +- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) +- Bug in :meth:`DataFrame.first` and :meth:`Series.first` with an offset of one month returning an incorrect result when the first day is the last day of a month (:issue:`29623`) +- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise a ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`) +- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object out of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`) +- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) +- Bug in :meth:`DatetimeIndex.intersection` giving incorrect results with non-Tick frequencies with ``n != 1`` (:issue:`42104`) +- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) +- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`) +- Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`) +- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`) +- Bug in :meth:`Timedelta.round`, :meth:`Timedelta.floor`, :meth:`Timedelta.ceil` for values near the implementation bounds of :class:`Timedelta` (:issue:`38964`) +- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`) +- Bug in :func:`infer_freq` incorrectly fails to infer 'H' frequency of :class:`DatetimeIndex` if the latter has a timezone and crosses DST boundaries (:issue:`39556`) +- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`) + +Timedelta +^^^^^^^^^ +- Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`) +- Bug in constructing a :class:`TimedeltaIndex` incorrectly accepting ``np.datetime64("NaT")`` objects (:issue:`39462`) +- Bug in constructing :class:`Timedelta` from an input string with only symbols and no digits failed to raise an error (:issue:`39710`) +- Bug in :class:`TimedeltaIndex` and :func:`to_timedelta` failing to raise when passed non-nanosecond ``timedelta64`` arrays that overflow when converting to ``timedelta64[ns]`` (:issue:`40008`) + +Timezones +^^^^^^^^^ +- Bug in different ``tzinfo`` objects representing UTC not being treated as equivalent (:issue:`39216`) +- Bug in ``dateutil.tz.gettz("UTC")`` not being recognized as equivalent to other UTC-representing tzinfos (:issue:`39276`) + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) +- Bug in :meth:`DataFrame.sort_values` raising an :class:`IndexError` for empty ``by`` (:issue:`40258`) +- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` would drop numeric ``ExtensionDtype`` columns (:issue:`35340`) +- Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`) +- Bug in :meth:`DataFrame.rank` when the DataFrame contained ``np.inf`` (:issue:`32593`) +- Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising an ``IndexError`` (:issue:`38932`) +- Bug in :meth:`Series.rank`, :meth:`DataFrame.rank`, and :meth:`.GroupBy.rank` treating the most negative ``int64`` value as missing (:issue:`32859`) +- Bug in :meth:`DataFrame.select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36596`) +- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed the argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`) +- Bug in :meth:`DataFrame.transform` would raise a ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`) +- Bug in :meth:`.GroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`) +- Bug in :meth:`Series.count` would result in an ``int32`` result on 32-bit platforms when argument ``level=None`` (:issue:`40908`) +- Bug in :class:`Series` and :class:`DataFrame` reductions with methods ``any`` and ``all`` not returning Boolean results for object data (:issue:`12863`, :issue:`35450`, :issue:`27709`) +- Bug in :meth:`Series.clip` would fail if the Series contains NA values and has nullable int or float as a data type (:issue:`40851`) +- Bug in :meth:`UInt64Index.where` and :meth:`UInt64Index.putmask` with an ``np.int64`` dtype ``other`` incorrectly raising ``TypeError`` (:issue:`41974`) +- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggregation functions when one or more aggregation function fails to produce results (:issue:`33634`) +- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) + +Conversion +^^^^^^^^^^ +- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns Python native types (:issue:`25969`) +- Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`) +- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`) +- Bug in :class:`DataFrame` failing to raise a ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`) +- Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`) +- Bug in :meth:`StringArray.astype` falling back to NumPy and raising when converting to ``dtype='categorical'`` (:issue:`40450`) +- Bug in :func:`factorize` where, when given an array with a numeric NumPy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`) +- Bug in :class:`DataFrame` construction with a dictionary containing an array-like with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) +- Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`) +- Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`) +- Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`) +- Bug in :class:`DataFrame` construction when given a two-dimensional object-dtype ``np.ndarray`` of :class:`Period` or :class:`Interval` objects failing to cast to :class:`PeriodDtype` or :class:`IntervalDtype`, respectively (:issue:`41812`) +- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) +- Bug in creating a :class:`Series` from a ``range`` object that does not fit in the bounds of ``int64`` dtype (:issue:`30173`) +- Bug in creating a :class:`Series` from a ``dict`` with all-tuple keys and an :class:`Index` that requires reindexing (:issue:`41707`) +- Bug in :func:`.infer_dtype` not recognizing Series, Index, or array with a Period dtype (:issue:`23553`) +- Bug in :func:`.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`) +- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised a ``ValueError`` when called on an empty DataFrame (:issue:`40393`) + +Strings +^^^^^^^ +- Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`~arrays.StringArray` when the original had zero chunks (:issue:`41040`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` ignoring replacements with ``regex=True`` for ``StringDType`` data (:issue:`41333`, :issue:`35977`) +- Bug in :meth:`Series.str.extract` with :class:`~arrays.StringArray` returning object dtype for an empty :class:`DataFrame` (:issue:`41441`) +- Bug in :meth:`Series.str.replace` where the ``case`` argument was ignored when ``regex=False`` (:issue:`41602`) + +Interval +^^^^^^^^ +- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`) +- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of the :class:`Index` objects have duplicates which are present in the other (:issue:`38743`) +- :meth:`IntervalIndex.union`, :meth:`IntervalIndex.intersection`, :meth:`IntervalIndex.difference`, and :meth:`IntervalIndex.symmetric_difference` now cast to the appropriate dtype instead of raising a ``TypeError`` when operating with another :class:`IntervalIndex` with incompatible dtype (:issue:`39267`) +- :meth:`PeriodIndex.union`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference`, :meth:`PeriodIndex.difference` now cast to object dtype instead of raising ``IncompatibleFrequency`` when operating with another :class:`PeriodIndex` with incompatible dtype (:issue:`39306`) +- Bug in :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.get_loc`, :meth:`IntervalIndex.get_indexer_for`, and :meth:`IntervalIndex.__contains__` when NA values are present (:issue:`41831`) + +Indexing +^^^^^^^^ +- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`) +- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) +- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`41934`) +- Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`) +- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) +- Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`) +- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) +- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` with timezone aware indexes raising a ``TypeError`` for ``method="ffill"`` and ``method="bfill"`` and specified ``tolerance`` (:issue:`38566`) +- Bug in :meth:`DataFrame.reindex` with ``datetime64[ns]`` or ``timedelta64[ns]`` incorrectly casting to integers when the ``fill_value`` requires casting to object dtype (:issue:`39755`) +- Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting on an empty :class:`DataFrame` using specified columns and a nonempty :class:`DataFrame` value (:issue:`38831`) +- Bug in :meth:`DataFrame.loc.__setitem__` raising a ``ValueError`` when operating on a unique column when the :class:`DataFrame` has duplicate columns (:issue:`38521`) +- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) +- Bug in :meth:`Series.loc.__setitem__` and :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` when provided a Boolean generator (:issue:`39614`) +- Bug in :meth:`Series.iloc` and :meth:`DataFrame.iloc` raising a ``KeyError`` when provided a generator (:issue:`39614`) +- Bug in :meth:`DataFrame.__setitem__` not raising a ``ValueError`` when the right hand side is a :class:`DataFrame` with wrong number of columns (:issue:`38604`) +- Bug in :meth:`Series.__setitem__` raising a ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`) +- Bug in :meth:`DataFrame.loc` dropping levels of a :class:`MultiIndex` when the :class:`DataFrame` used as input has only one row (:issue:`10521`) +- Bug in :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` always raising ``KeyError`` when slicing with existing strings where the :class:`Index` has milliseconds (:issue:`33589`) +- Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, :issue:`39619`) +- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) +- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrectly casting the datetime64 values to integers (:issue:`39266`) +- Bug in setting ``np.datetime64("NaT")`` into a :class:`Series` with :class:`Datetime64TZDtype` incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) +- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when ``key=NaN`` and ``method`` is specified but ``NaN`` is not in the :class:`Index` (:issue:`39382`) +- Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) +- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) +- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) +- Bug in :meth:`RangeIndex.astype` where when converting to :class:`CategoricalIndex`, the categories became a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`41263`) +- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a Boolean indexer (:issue:`39488`) +- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) +- Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`) +- Bug in :meth:`DataFrame.loc` not raising ``KeyError`` when the key was not found in :class:`MultiIndex` and the levels were not fully specified (:issue:`41170`) +- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contained duplicates (:issue:`40096`) +- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one index column has float dtype and we retrieve a scalar (:issue:`41369`) +- Bug in :meth:`DataFrame.loc` incorrectly matching non-Boolean index elements (:issue:`20432`) +- Bug in indexing with ``np.nan`` on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` incorrectly raising ``KeyError`` when ``np.nan`` keys are present (:issue:`41933`) +- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) +- Bug in :meth:`DataFrame.at` with a :class:`CategoricalIndex` returning incorrect results when passed integer keys (:issue:`41846`) +- Bug in :meth:`DataFrame.loc` returning a :class:`MultiIndex` in the wrong order if an indexer has duplicates (:issue:`40978`) +- Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) +- Bug in :meth:`PeriodIndex.get_loc` failing to raise a ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`) +- Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`) +- Bug in :meth:`Index.get_indexer` failing to raise ``ValueError`` in some cases with invalid ``method``, ``limit``, or ``tolerance`` arguments (:issue:`41918`) +- Bug when slicing a :class:`Series` or :class:`DataFrame` with a :class:`TimedeltaIndex` when passing an invalid string raising ``ValueError`` instead of a ``TypeError`` (:issue:`41821`) +- Bug in :class:`Index` constructor sometimes silently ignoring a specified ``dtype`` (:issue:`38879`) +- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`) + +Missing +^^^^^^^ +- Bug in :class:`Grouper` did not correctly propagate the ``dropna`` argument; :meth:`.DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`) +- Bug in :func:`isna`, :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna`, and the corresponding ``notna`` functions not recognizing ``Decimal("NaN")`` objects (:issue:`39409`) +- Bug in :meth:`DataFrame.fillna` not accepting a dictionary for the ``downcast`` keyword (:issue:`40809`) +- Bug in :func:`isna` not returning a copy of the mask for nullable types, causing any subsequent mask modification to change the original array (:issue:`40935`) +- Bug in :class:`DataFrame` construction with float data containing ``NaN`` and an integer ``dtype`` casting instead of retaining the ``NaN`` (:issue:`26919`) +- Bug in :meth:`Series.isin` and :meth:`MultiIndex.isin` didn't treat all nans as equivalent if they were in tuples (:issue:`41836`) + +MultiIndex +^^^^^^^^^^ +- Bug in :meth:`DataFrame.drop` raising a ``TypeError`` when the :class:`MultiIndex` is non-unique and ``level`` is not provided (:issue:`36293`) +- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in the result (:issue:`38623`) +- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when the :class:`MultiIndex` contained ``NaN`` even when they are differently ordered (:issue:`38439`) +- Bug in :meth:`MultiIndex.intersection` always returning an empty result when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`MultiIndex.difference` incorrectly raising ``TypeError`` when indexes contain non-sortable entries (:issue:`41915`) +- Bug in :meth:`MultiIndex.reindex` raising a ``ValueError`` when used on an empty :class:`MultiIndex` and indexing only a specific level (:issue:`41170`) +- Bug in :meth:`MultiIndex.reindex` raising ``TypeError`` when reindexing against a flat :class:`Index` (:issue:`41707`) + +I/O +^^^ +- Bug in :meth:`Index.__repr__` when ``display.max_seq_items=1`` (:issue:`38415`) +- Bug in :func:`read_csv` not recognizing scientific notation if the argument ``decimal`` is set and ``engine="python"`` (:issue:`31920`) +- Bug in :func:`read_csv` interpreting ``NA`` value as comment, when ``NA`` does contain the comment string fixed for ``engine="python"`` (:issue:`34002`) +- Bug in :func:`read_csv` raising an ``IndexError`` with multiple header columns and ``index_col`` is specified when the file has no data rows (:issue:`38292`) +- Bug in :func:`read_csv` not accepting ``usecols`` with a different length than ``names`` for ``engine="python"`` (:issue:`16469`) +- Bug in :meth:`read_csv` returning object dtype when ``delimiter=","`` with ``usecols`` and ``parse_dates`` specified for ``engine="python"`` (:issue:`35873`) +- Bug in :func:`read_csv` raising a ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) +- Bug in :func:`read_clipboard` and :func:`DataFrame.to_clipboard` not working in WSL (:issue:`38527`) +- Allow custom error values for the ``parse_dates`` argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) +- Bug in :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` raising a ``KeyError`` when trying to apply for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`) +- Bug in :meth:`.HDFStore.put` raising a wrong ``TypeError`` when saving a DataFrame with non-string dtype (:issue:`34274`) +- Bug in :func:`json_normalize` resulting in the first element of a generator object not being included in the returned DataFrame (:issue:`35923`) +- Bug in :func:`read_csv` applying the thousands separator to date columns when the column should be parsed for dates and ``usecols`` is specified for ``engine="python"`` (:issue:`39365`) +- Bug in :func:`read_excel` forward filling :class:`MultiIndex` names when multiple header and index columns are specified (:issue:`34673`) +- Bug in :func:`read_excel` not respecting :func:`set_option` (:issue:`34252`) +- Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable Boolean dtype (:issue:`34655`) +- Bug in :func:`read_json` when ``orient="split"`` not maintaining a numeric string index (:issue:`28556`) +- :meth:`read_sql` returned an empty generator if ``chunksize`` was non-zero and the query returned no results. Now returns a generator with a single empty DataFrame (:issue:`34411`) +- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using the ``where`` parameter (:issue:`39189`) +- Bug in :func:`read_sas` raising a ``ValueError`` when ``datetimes`` were null (:issue:`39725`) +- Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`) +- Bug in :func:`read_excel` loading trailing empty rows/columns for some filetypes (:issue:`41167`) +- Bug in :func:`read_excel` raising an ``AttributeError`` when the excel file had a ``MultiIndex`` header followed by two empty rows and no index (:issue:`40442`) +- Bug in :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`) +- Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40904`) +- Bug in :meth:`DataFrame.to_string` adding an extra dot and misaligning the truncation row when ``index=False`` (:issue:`40904`) +- Bug in :func:`read_orc` always raising an ``AttributeError`` (:issue:`40918`) +- Bug in :func:`read_csv` and :func:`read_table` silently ignoring ``prefix`` if ``names`` and ``prefix`` are defined, now raising a ``ValueError`` (:issue:`39123`) +- Bug in :func:`read_csv` and :func:`read_excel` not respecting the dtype for a duplicated column name when ``mangle_dupe_cols`` is set to ``True`` (:issue:`35211`) +- Bug in :func:`read_csv` silently ignoring ``sep`` if ``delimiter`` and ``sep`` are defined, now raising a ``ValueError`` (:issue:`39823`) +- Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`) +- Bug in the conversion from PyArrow to pandas (e.g. for reading Parquet) with nullable dtypes and a PyArrow array whose data buffer size is not a multiple of the dtype size (:issue:`40896`) +- Bug in :func:`read_excel` would raise an error when pandas could not determine the file type even though the user specified the ``engine`` argument (:issue:`41225`) +- Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`) +- Bug in :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` raising a ``TypeError`` when trying to append a string column to an incompatible column (:issue:`41897`) + +Period +^^^^^^ +- Comparisons of :class:`Period` objects or :class:`Index`, :class:`Series`, or :class:`DataFrame` with mismatched ``PeriodDtype`` now behave like other mismatched-type comparisons, returning ``False`` for equals, ``True`` for not-equal, and raising ``TypeError`` for inequality checks (:issue:`39274`) + +Plotting +^^^^^^^^ +- Bug in :func:`plotting.scatter_matrix` raising when 2d ``ax`` argument passed (:issue:`16253`) +- Prevent warnings when Matplotlib's ``constrained_layout`` is enabled (:issue:`25261`) +- Bug in :func:`DataFrame.plot` was showing the wrong colors in the legend if the function was called repeatedly and some calls used ``yerr`` while others didn't (:issue:`39522`) +- Bug in :func:`DataFrame.plot` was showing the wrong colors in the legend if the function was called repeatedly and some calls used ``secondary_y`` and others use ``legend=False`` (:issue:`40044`) +- Bug in :meth:`DataFrame.plot.box` when ``dark_background`` theme was selected, caps or min/max markers for the plot were not visible (:issue:`40769`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`.GroupBy.agg` with :class:`PeriodDtype` columns incorrectly casting results too aggressively (:issue:`38254`) +- Bug in :meth:`.SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical Series were not tallied (:issue:`38672`) +- Bug in :meth:`.SeriesGroupBy.value_counts` where an error was raised on an empty Series (:issue:`39172`) +- Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) +- Fixed bug in :meth:`.GroupBy.sum` causing a loss of precision by now using Kahan summation (:issue:`38778`) +- Fixed bug in :meth:`.GroupBy.cumsum` and :meth:`.GroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) +- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising a ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`) +- Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`) +- Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) +- Bug in :meth:`.RollingGroupby.corr` and :meth:`.ExpandingGroupby.corr` where the groupby column would return ``0`` instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) +- Bug in :meth:`.ExpandingGroupby.corr` and :meth:`.ExpandingGroupby.cov` where ``1`` would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) +- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`) +- Bug in :meth:`Series.groupby` and :meth:`DataFrame.groupby` on an empty ``Series`` or ``DataFrame`` would lose index, columns, and/or data types when directly using the methods ``idxmax``, ``idxmin``, ``mad``, ``min``, ``max``, ``sum``, ``prod``, and ``skew`` or using them through ``apply``, ``aggregate``, or ``resample`` (:issue:`26411`) +- Bug in :meth:`.GroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` when used on a :class:`.RollingGroupby` object (:issue:`39732`) +- Bug in :meth:`.DataFrameGroupBy.sample` where an error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`) +- Bug in :meth:`.DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise a ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`) +- Bug in :meth:`.DataFrameGroupBy.sample` where column selection was not applied before computing the result (:issue:`39928`) +- Bug in :class:`.ExponentialMovingWindow` when calling ``__getitem__`` would incorrectly raise a ``ValueError`` when providing ``times`` (:issue:`40164`) +- Bug in :class:`.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes (:issue:`40164`) +- :class:`.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`) +- Bug in :meth:`.ExponentialMovingWindowGroupby.mean` where the ``times`` argument was ignored when ``engine='numba'`` (:issue:`40951`) +- Bug in :meth:`.ExponentialMovingWindowGroupby.mean` where the wrong times were used the in case of multiple groups (:issue:`40951`) +- Bug in :class:`.ExponentialMovingWindowGroupby` where the times vector and values became out of sync for non-trivial groups (:issue:`40951`) +- Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index was not sorted (:issue:`39805`) +- Bug in aggregation functions for :class:`DataFrame` not respecting ``numeric_only`` argument when ``level`` keyword was given (:issue:`40660`) +- Bug in :meth:`.SeriesGroupBy.aggregate` where using a user-defined function to aggregate a Series with an object-typed :class:`Index` causes an incorrect :class:`Index` shape (:issue:`40014`) +- Bug in :class:`.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`) +- Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising a ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`) +- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) +- Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising a ``TypeError`` (:issue:`41010`) +- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) +- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`.SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`) +- Bug in :meth:`.GroupBy.min` and :meth:`.GroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising a ``ValueError`` (:issue:`41111`) +- Bug in :meth:`.DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`) +- Bug in :meth:`DataFrameGroupBy.__getitem__` with non-unique columns incorrectly returning a malformed :class:`SeriesGroupBy` instead of :class:`DataFrameGroupBy` (:issue:`41427`) +- Bug in :meth:`.DataFrameGroupBy.transform` with non-unique columns incorrectly raising an ``AttributeError`` (:issue:`41427`) +- Bug in :meth:`.Resampler.apply` with non-unique columns incorrectly dropping duplicated columns (:issue:`41445`) +- Bug in :meth:`Series.groupby` aggregations incorrectly returning empty :class:`Series` instead of raising ``TypeError`` on aggregations that are invalid for its dtype, e.g. ``.prod`` with ``datetime64[ns]`` dtype (:issue:`41342`) +- Bug in :class:`DataFrameGroupBy` aggregations incorrectly failing to drop columns with invalid dtypes for that aggregation when there are no valid columns (:issue:`41291`) +- Bug in :meth:`DataFrame.rolling.__iter__` where ``on`` was not assigned to the index of the resulting objects (:issue:`40373`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`41647`) +- Bug in :class:`DataFrameGroupBy` methods ``agg``, ``transform``, ``sum``, ``bfill``, ``ffill``, ``pad``, ``pct_change``, ``shift``, ``ohlc`` dropping ``.columns.names`` (:issue:`41497`) + + +Reshaping +^^^^^^^^^ +- Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index=True`` when there was no overlap between indices (:issue:`33814`) +- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) +- Bug in :func:`merge_asof` propagating the right Index with ``left_index=True`` and ``right_on`` specification instead of left Index (:issue:`33463`) +- Bug in :meth:`DataFrame.join` on a DataFrame with a :class:`MultiIndex` returned the wrong result when one of both indexes had only one level (:issue:`36909`) +- :func:`merge_asof` now raises a ``ValueError`` instead of a cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) +- Bug in :meth:`DataFrame.join` not assigning values correctly when the DataFrame had a :class:`MultiIndex` where at least one dimension had dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`) +- :meth:`Series.value_counts` and :meth:`Series.mode` now return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`) +- Bug in :meth:`DataFrame.stack` not handling ``NaN`` in :class:`MultiIndex` columns correctly (:issue:`39481`) +- Bug in :meth:`DataFrame.apply` would give incorrect results when the argument ``func`` was a string, ``axis=1``, and the axis argument was not supported; now raises a ``ValueError`` instead (:issue:`39211`) +- Bug in :meth:`DataFrame.sort_values` not reshaping the index correctly after sorting on columns when ``ignore_index=True`` (:issue:`39464`) +- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`) +- Bug in :meth:`DataFrame.append` returning incorrect dtypes when used with combinations of ``datetime64`` and ``timedelta64`` dtypes (:issue:`39574`) +- Bug in :meth:`DataFrame.append` with a :class:`DataFrame` with a :class:`MultiIndex` and appending a :class:`Series` whose :class:`Index` is not a :class:`MultiIndex` (:issue:`41707`) +- Bug in :meth:`DataFrame.pivot_table` returning a :class:`MultiIndex` for a single value when operating on an empty DataFrame (:issue:`13483`) +- :class:`Index` can now be passed to the :func:`numpy.all` function (:issue:`40180`) +- Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a :class:`MultiIndex` (:issue:`36991`) +- Bug in :func:`to_datetime` raising an error when the input sequence contained unhashable items (:issue:`39756`) +- Bug in :meth:`Series.explode` preserving the index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`) +- Bug in :func:`to_datetime` raising a ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`) +- Bug in :meth:`Series.unstack` and :meth:`DataFrame.unstack` with object-dtype values containing timezone-aware datetime objects incorrectly raising ``TypeError`` (:issue:`41875`) +- Bug in :meth:`DataFrame.melt` raising ``InvalidIndexError`` when :class:`DataFrame` has duplicate columns used as ``value_vars`` (:issue:`41951`) + +Sparse +^^^^^^ +- Bug in :meth:`DataFrame.sparse.to_coo` raising a ``KeyError`` with columns that are a numeric :class:`Index` without a ``0`` (:issue:`18414`) +- Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`) +- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` would always return an empty result (:issue:`40921`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.where` when ``other`` is a Series with an :class:`ExtensionDtype` (:issue:`38729`) +- Fixed bug where :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Series.argmax`, and :meth:`Series.argmin` would fail when the underlying data is an :class:`ExtensionArray` (:issue:`32749`, :issue:`33719`, :issue:`36566`) +- Fixed bug where some properties of subclasses of :class:`PandasExtensionDtype` where improperly cached (:issue:`40329`) +- Bug in :meth:`DataFrame.mask` where masking a DataFrame with an :class:`ExtensionDtype` raises a ``ValueError`` (:issue:`40941`) + +Styler +^^^^^^ +- Bug in :class:`.Styler` where the ``subset`` argument in methods raised an error for some valid MultiIndex slices (:issue:`33562`) +- :class:`.Styler` rendered HTML output has seen minor alterations to support w3 good code standards (:issue:`39626`) +- Bug in :class:`.Styler` where rendered HTML was missing a column class identifier for certain header cells (:issue:`39716`) +- Bug in :meth:`.Styler.background_gradient` where text-color was not determined correctly (:issue:`39888`) +- Bug in :meth:`.Styler.set_table_styles` where multiple elements in CSS-selectors of the ``table_styles`` argument were not correctly added (:issue:`34061`) +- Bug in :class:`.Styler` where copying from Jupyter dropped the top left cell and misaligned headers (:issue:`12147`) +- Bug in :class:`Styler.where` where ``kwargs`` were not passed to the applicable callable (:issue:`40845`) +- Bug in :class:`.Styler` causing CSS to duplicate on multiple renders (:issue:`39395`, :issue:`40334`) + +Other +^^^^^ +- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`) +- Bug in :meth:`Series.where` with numeric dtype and ``other=None`` not casting to ``nan`` (:issue:`39761`) +- Bug in :func:`.assert_series_equal`, :func:`.assert_frame_equal`, :func:`.assert_index_equal` and :func:`.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) +- Bug in :func:`.assert_index_equal` with ``exact=True`` not raising when comparing :class:`CategoricalIndex` instances with ``Int64Index`` and ``RangeIndex`` categories (:issue:`41263`) +- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, and :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) +- Bug in :func:`show_versions` where console JSON output was not proper JSON (:issue:`39701`) +- pandas can now compile on z/OS when using `xlc `_ (:issue:`35826`) +- Bug in :func:`pandas.util.hash_pandas_object` not recognizing ``hash_key``, ``encoding`` and ``categorize`` when the input object type is a :class:`DataFrame` (:issue:`41404`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_130.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.5..v1.3.0 diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst new file mode 100644 index 00000000..a57995eb --- /dev/null +++ b/doc/source/whatsnew/v1.3.1.rst @@ -0,0 +1,51 @@ +.. _whatsnew_131: + +What's new in 1.3.1 (July 25, 2021) +----------------------------------- + +These are the changes in pandas 1.3.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_131.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Pandas could not be built on PyPy (:issue:`42355`) +- :class:`DataFrame` constructed with an older version of pandas could not be unpickled (:issue:`42345`) +- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42248`) +- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`) +- Fixed regression in :meth:`DataFrame.astype` changing the order of noncontiguous data (:issue:`42396`) +- Performance regression in :class:`DataFrame` in reduction operations requiring casting such as :meth:`DataFrame.mean` on integer data (:issue:`38592`) +- Performance regression in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when ``orient`` argument one of "records", "dict", or "split" (:issue:`42352`) +- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`) +- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`) +- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`) +- Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`) +- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`) +- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`) +- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`) +- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_131.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`) +- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`) +- Fixed bug in :meth:`.Styler.set_sticky` not handling index names correctly for single index columns case (:issue:`42537`) +- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_131.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.0..v1.3.1 diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst new file mode 100644 index 00000000..e3c62685 --- /dev/null +++ b/doc/source/whatsnew/v1.3.2.rst @@ -0,0 +1,51 @@ +.. _whatsnew_132: + +What's new in 1.3.2 (August 15, 2021) +------------------------------------- + +These are the changes in pandas 1.3.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_132.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`) +- Regression in updating values of :class:`Series` using boolean index, created by using :meth:`DataFrame.pop` (:issue:`42530`) +- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) +- Fixed regression in :meth:`DataFrame.shift` where ``TypeError`` occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) +- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) +- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`) +- Fixed regression where :func:`read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to ``None`` (:issue:`42387`) +- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) +- Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`) +- Fixed regression in :func:`concat` where ``copy=False`` was not honored in ``axis=1`` Series concatenation (:issue:`42501`) +- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`42816`) +- Fixed regression in :meth:`Series.quantile` with :class:`Int64Dtype` (:issue:`42626`) +- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` where supplying the ``by`` argument with a Series named with a tuple would incorrectly raise (:issue:`42731`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_132.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :func:`read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`) +- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`) +- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and ``window`` is an offset that covers all the rows (:issue:`42753`) +- :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) +- :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`) +- Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_132.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.1..v1.3.2 diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst new file mode 100644 index 00000000..ecec6d97 --- /dev/null +++ b/doc/source/whatsnew/v1.3.3.rst @@ -0,0 +1,57 @@ +.. _whatsnew_133: + +What's new in 1.3.3 (September 12, 2021) +---------------------------------------- + +These are the changes in pandas 1.3.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_133.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) +- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) +- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) +- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`) +- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) +- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) +- Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`) +- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``ValueError`` when setting array as cell value (:issue:`43422`) +- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`) +- Fixed regression in :meth:`DataFrame.__getitem__` raising error for slice of :class:`DatetimeIndex` when index is non monotonic (:issue:`43223`) +- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`) +- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) +- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`) +- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`) +- Fixed regression in :func:`read_csv` raising ``AttributeError`` when the file handle is an ``tempfile.SpooledTemporaryFile`` object (:issue:`43439`) +- Fixed performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_133.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement for :meth:`DataFrame.__setitem__` when the key or value is not a :class:`DataFrame`, or key is not list-like (:issue:`43274`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_133.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fixed bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_133.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.2..v1.3.3 diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst new file mode 100644 index 00000000..b46744d5 --- /dev/null +++ b/doc/source/whatsnew/v1.3.4.rst @@ -0,0 +1,57 @@ +.. _whatsnew_134: + +What's new in 1.3.4 (October 17, 2021) +-------------------------------------- + +These are the changes in pandas 1.3.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_134.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.convert_dtypes` incorrectly converts byte strings to strings (:issue:`43183`) +- Fixed regression in :meth:`.GroupBy.agg` where it was failing silently with mixed data types along ``axis=1`` and :class:`MultiIndex` (:issue:`43209`) +- Fixed regression in :func:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`) +- Fixed regression in :meth:`DataFrame.corr` raising ``ValueError`` with ``method="spearman"`` on 32-bit platforms (:issue:`43588`) +- Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`) +- Fixed performance regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` with :class:`StringDtype` (:issue:`41596`) +- Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`) +- Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`) +- Fixed regression in :func:`read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`) +- Fixed regression in :meth:`DataFrame.explode` raising ``AssertionError`` when ``column`` is any scalar which is not a string (:issue:`43314`) +- Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`) +- Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`) +- Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_134.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fixed bug in :meth:`pandas.DataFrame.groupby.rolling` and :class:`pandas.api.indexers.FixedForwardWindowIndexer` leading to segfaults and window endpoints being mixed across groups (:issue:`43267`) +- Fixed bug in :meth:`.GroupBy.mean` with datetimelike values including ``NaT`` values returning incorrect results (:issue:`43132`) +- Fixed bug in :meth:`Series.aggregate` not passing the first ``args`` to the user supplied ``func`` in certain cases (:issue:`43357`) +- Fixed memory leaks in :meth:`Series.rolling.quantile` and :meth:`Series.rolling.median` (:issue:`43339`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_134.other: + +Other +~~~~~ +- The minimum version of Cython needed to compile pandas is now ``0.29.24`` (:issue:`43729`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_134.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.3..v1.3.4 diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst new file mode 100644 index 00000000..339bd7de --- /dev/null +++ b/doc/source/whatsnew/v1.3.5.rst @@ -0,0 +1,34 @@ +.. _whatsnew_135: + +What's new in 1.3.5 (December 12, 2021) +--------------------------------------- + +These are the changes in pandas 1.3.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_135.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`) +- Fixed regression in :func:`merge_asof` raising error when array was supplied as join key (:issue:`42844`) +- Fixed regression when resampling :class:`DataFrame` with :class:`DateTimeIndex` with empty groups and ``uint8``, ``uint16`` or ``uint32`` columns incorrectly raising ``RuntimeError`` (:issue:`43329`) +- Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`) +- Fixed performance regression in :func:`read_csv` (:issue:`44106`) +- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) +- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) +- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`) + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_135.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.4..v1.3.5|HEAD diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst new file mode 100644 index 00000000..697070e5 --- /dev/null +++ b/doc/source/whatsnew/v1.4.0.rst @@ -0,0 +1,1112 @@ +.. _whatsnew_140: + +What's new in 1.4.0 (January 22, 2022) +-------------------------------------- + +These are the changes in pandas 1.4.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_140.enhancements.warning_lineno: + +Improved warning messages +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, warning messages may have pointed to lines within the pandas +library. Running the script ``setting_with_copy_warning.py`` + +.. code-block:: python + + import pandas as pd + + df = pd.DataFrame({'a': [1, 2, 3]}) + df[:2].loc[:, 'a'] = 5 + +with pandas 1.3 resulted in:: + + .../site-packages/pandas/core/indexing.py:1951: SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + +This made it difficult to determine where the warning was being generated from. +Now pandas will inspect the call stack, reporting the first line outside of the +pandas library that gave rise to the warning. The output of the above script is +now:: + + setting_with_copy_warning.py:4: SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + + + + +.. _whatsnew_140.enhancements.ExtensionIndex: + +Index can hold arbitrary ExtensionArrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Until now, passing a custom :class:`ExtensionArray` to ``pd.Index`` would cast +the array to ``object`` dtype. Now :class:`Index` can directly hold arbitrary +ExtensionArrays (:issue:`43930`). + +*Previous behavior*: + +.. ipython:: python + + arr = pd.array([1, 2, pd.NA]) + idx = pd.Index(arr) + +In the old behavior, ``idx`` would be object-dtype: + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: idx + Out[1]: Index([1, 2, ], dtype='object') + +With the new behavior, we keep the original dtype: + +*New behavior*: + +.. ipython:: python + + idx + +One exception to this is ``SparseArray``, which will continue to cast to numpy +dtype until pandas 2.0. At that point it will retain its dtype like other +ExtensionArrays. + +.. _whatsnew_140.enhancements.styler: + +Styler +^^^^^^ + +:class:`.Styler` has been further developed in 1.4.0. The following general enhancements have been made: + + - Styling and formatting of indexes has been added, with :meth:`.Styler.apply_index`, :meth:`.Styler.applymap_index` and :meth:`.Styler.format_index`. These mirror the signature of the methods already used to style and format data values, and work with both HTML, LaTeX and Excel format (:issue:`41893`, :issue:`43101`, :issue:`41993`, :issue:`41995`) + - The new method :meth:`.Styler.hide` deprecates :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` (:issue:`43758`) + - The keyword arguments ``level`` and ``names`` have been added to :meth:`.Styler.hide` (and implicitly to the deprecated methods :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns`) for additional control of visibility of MultiIndexes and of Index names (:issue:`25475`, :issue:`43404`, :issue:`43346`) + - The :meth:`.Styler.export` and :meth:`.Styler.use` have been updated to address all of the added functionality from v1.2.0 and v1.3.0 (:issue:`40675`) + - Global options under the category ``pd.options.styler`` have been extended to configure default ``Styler`` properties which address formatting, encoding, and HTML and LaTeX rendering. Note that formerly ``Styler`` relied on ``display.html.use_mathjax``, which has now been replaced by ``styler.html.mathjax`` (:issue:`41395`) + - Validation of certain keyword arguments, e.g. ``caption`` (:issue:`43368`) + - Various bug fixes as recorded below + +Additionally there are specific enhancements to the HTML specific rendering: + + - :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`) + - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`) + - :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`) + - Custom CSS classes can now be directly specified without string replacement (:issue:`43686`) + - Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`) + +There are also some LaTeX specific enhancements: + + - :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`) + - Naive sparsification is now possible for LaTeX without the necessity of including the multirow package (:issue:`43369`) + - *cline* support has been added for :class:`MultiIndex` row sparsification through a keyword argument (:issue:`45138`) + +.. _whatsnew_140.enhancements.pyarrow_csv_engine: + +Multi-threaded CSV reading with a new CSV Engine based on pyarrow +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`pandas.read_csv` now accepts ``engine="pyarrow"`` (requires at least +``pyarrow`` 1.0.1) as an argument, allowing for faster csv parsing on multicore +machines with pyarrow installed. See the :doc:`I/O docs ` for +more info. (:issue:`23697`, :issue:`43706`) + +.. _whatsnew_140.enhancements.window_rank: + +Rank function for rolling and expanding windows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Added ``rank`` function to :class:`Rolling` and :class:`Expanding`. The new +function supports the ``method``, ``ascending``, and ``pct`` flags of +:meth:`DataFrame.rank`. The ``method`` argument supports ``min``, ``max``, and +``average`` ranking methods. +Example: + +.. ipython:: python + + s = pd.Series([1, 4, 2, 3, 5, 3]) + s.rolling(3).rank() + + s.rolling(3).rank(method="max") + +.. _whatsnew_140.enhancements.groupby_indexing: + +Groupby positional indexing +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is now possible to specify positional ranges relative to the ends of each +group. + +Negative arguments for :meth:`.GroupBy.head` and :meth:`.GroupBy.tail` now work +correctly and result in ranges relative to the end and start of each group, +respectively. Previously, negative arguments returned empty frames. + +.. ipython:: python + + df = pd.DataFrame([["g", "g0"], ["g", "g1"], ["g", "g2"], ["g", "g3"], + ["h", "h0"], ["h", "h1"]], columns=["A", "B"]) + df.groupby("A").head(-1) + + +:meth:`.GroupBy.nth` now accepts a slice or list of integers and slices. + +.. ipython:: python + + df.groupby("A").nth(slice(1, -1)) + df.groupby("A").nth([slice(None, 1), slice(-1, None)]) + +:meth:`.GroupBy.nth` now accepts index notation. + +.. ipython:: python + + df.groupby("A").nth[1, -1] + df.groupby("A").nth[1:-1] + df.groupby("A").nth[:1, -1:] + +.. _whatsnew_140.dict_tight: + +DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``'tight'`` dictionary format that preserves :class:`MultiIndex` entries +and names is now available with the :meth:`DataFrame.from_dict` and +:meth:`DataFrame.to_dict` methods and can be used with the standard ``json`` +library to produce a tight representation of :class:`DataFrame` objects +(:issue:`4889`). + +.. ipython:: python + + df = pd.DataFrame.from_records( + [[1, 3], [2, 4]], + index=pd.MultiIndex.from_tuples([("a", "b"), ("a", "c")], + names=["n1", "n2"]), + columns=pd.MultiIndex.from_tuples([("x", 1), ("y", 2)], + names=["z1", "z2"]), + ) + df + df.to_dict(orient='tight') + +.. _whatsnew_140.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :meth:`concat` will preserve the ``attrs`` when it is the same for all objects and discard the ``attrs`` when they are different (:issue:`41828`) +- :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`) +- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`) +- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`) +- :meth:`Series.ewm` and :meth:`DataFrame.ewm` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`42273`) +- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`) +- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`) +- :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns (:issue:`38454`) +- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) +- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) +- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`) +- Indexing with ``.loc`` and ``.iloc`` now supports ``Ellipsis`` (:issue:`37750`) +- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`) +- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`) +- :meth:`DataFrame.__pos__` and :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`) +- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`) +- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`) +- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`32835`, :issue:`25549`) +- :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`) +- Added :meth:`DataFrameGroupBy.value_counts` (:issue:`43564`) +- :func:`read_csv` now accepts a ``callable`` function in ``on_bad_lines`` when ``engine="python"`` for custom handling of bad lines (:issue:`5686`) +- :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`) +- :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`) +- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, and :meth:`.GroupBy.sum` now support `Numba `_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`, :issue:`44939`) +- :meth:`Timestamp.isoformat` now handles the ``timespec`` argument from the base ``datetime`` class (:issue:`26131`) +- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) +- New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`) +- Added "Juneteenth National Independence Day" to ``USFederalHolidayCalendar`` (:issue:`44574`) +- :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, and :meth:`.Expanding.std` now support `Numba `_ execution with the ``engine`` keyword (:issue:`44461`) +- :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`) +- Implemented :meth:`IntervalArray.min` and :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`) +- :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`) +- :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`) +- :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`) +- :meth:`is_list_like` now identifies duck-arrays as list-like unless ``.ndim == 0`` (:issue:`35131`) +- :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`) +- Add support for `Zstandard `_ compression to :meth:`DataFrame.to_pickle`/:meth:`read_pickle` and friends (:issue:`43925`) +- :meth:`DataFrame.to_sql` now returns an ``int`` of the number of written rows (:issue:`23998`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing: + +Inconsistent date string parsing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``dayfirst`` option of :func:`to_datetime` isn't strict, and this can lead +to surprising behavior: + +.. ipython:: python + :okwarning: + + pd.to_datetime(["31-12-2021"], dayfirst=False) + +Now, a warning will be raised if a date string cannot be parsed accordance to +the given ``dayfirst`` value when the value is a delimited date string (e.g. +``31-12-2012``). + +.. _whatsnew_140.notable_bug_fixes.concat_with_empty_or_all_na: + +Ignoring dtypes in concat with empty or all-NA columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + This behaviour change has been reverted in pandas 1.4.3. + +When using :func:`concat` to concatenate two or more :class:`DataFrame` objects, +if one of the DataFrames was empty or had all-NA values, its dtype was +*sometimes* ignored when finding the concatenated dtype. These are now +consistently *not* ignored (:issue:`43507`). + +.. ipython:: python + + df1 = pd.DataFrame({"bar": [pd.Timestamp("2013-01-01")]}, index=range(1)) + df2 = pd.DataFrame({"bar": np.nan}, index=range(1, 2)) + res = pd.concat([df1, df2]) + +Previously, the float-dtype in ``df2`` would be ignored so the result dtype +would be ``datetime64[ns]``. As a result, the ``np.nan`` would be cast to +``NaT``. + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: res + Out[4]: + bar + 0 2013-01-01 + 1 NaT + +Now the float-dtype is respected. Since the common dtype for these DataFrames is +object, the ``np.nan`` is retained. + +*New behavior*: + +.. code-block:: ipython + + In [4]: res + Out[4]: + bar + 0 2013-01-01 00:00:00 + 1 NaN + + + +.. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerce_to_nan: + +Null-values are no longer coerced to NaN-value in value_counts and mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, +``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This +behavior is now consistent with ``unique``, ``isin`` and others +(:issue:`42688`). + +.. ipython:: python + + s = pd.Series([True, None, pd.NaT, None, pd.NaT, None]) + res = s.value_counts(dropna=False) + +Previously, all null-values were replaced by a NaN-value. + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: res + Out[3]: + NaN 5 + True 1 + dtype: int64 + +Now null-values are no longer mangled. + +*New behavior*: + +.. ipython:: python + + res + +.. _whatsnew_140.notable_bug_fixes.read_csv_mangle_dup_cols: + +mangle_dupe_cols in read_csv no longer renames unique columns conflicting with target names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_csv` no longer renames unique column labels which conflict with the target +names of duplicated columns. Already existing columns are skipped, i.e. the next +available index is used for the target column name (:issue:`14704`). + +.. ipython:: python + + import io + + data = "a,a,a.1\n1,2,3" + res = pd.read_csv(io.StringIO(data)) + +Previously, the second column was called ``a.1``, while the third column was +also renamed to ``a.1.1``. + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: res + Out[3]: + a a.1 a.1.1 + 0 1 2 3 + +Now the renaming checks if ``a.1`` already exists when changing the name of the +second column and jumps this index. The second column is instead renamed to +``a.2``. + +*New behavior*: + +.. ipython:: python + + res + +.. _whatsnew_140.notable_bug_fixes.unstack_pivot_int32_limit: + +unstack and pivot_table no longer raises ValueError for result that would exceed int32 limit +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously :meth:`DataFrame.pivot_table` and :meth:`DataFrame.unstack` would +raise a ``ValueError`` if the operation could produce a result with more than +``2**31 - 1`` elements. This operation now raises a +:class:`errors.PerformanceWarning` instead (:issue:`26314`). + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df = DataFrame({"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0}) + In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count") + ValueError: Unstacked DataFrame is too big, causing int32 overflow + +*New behavior*: + +.. code-block:: python + + In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count") + PerformanceWarning: The following operation may generate 4294967296 cells in the resulting pandas object. + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.notable_bug_fixes.groupby_apply_mutation: + +groupby.apply consistent transform detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`.GroupBy.apply` is designed to be flexible, allowing users to perform +aggregations, transformations, filters, and use it with user-defined functions +that might not fall into any of these categories. As part of this, apply will +attempt to detect when an operation is a transform, and in such a case, the +result will have the same index as the input. In order to determine if the +operation is a transform, pandas compares the input's index to the result's and +determines if it has been mutated. Previously in pandas 1.3, different code +paths used different definitions of "mutated": some would use Python's ``is`` +whereas others would test only up to equality. + +This inconsistency has been removed, pandas now tests up to equality. + +.. ipython:: python + + def func(x): + return x.copy() + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df.groupby(['a']).apply(func) + Out[3]: + a b c + a + 1 0 1 3 5 + 2 1 2 4 6 + + In [4]: df.set_index(['a', 'b']).groupby(['a']).apply(func) + Out[4]: + c + a b + 1 3 5 + 2 4 6 + +In the examples above, the first uses a code path where pandas uses ``is`` and +determines that ``func`` is not a transform whereas the second tests up to +equality and determines that ``func`` is a transform. In the first case, the +result's index is not the same as the input's. + +*New behavior*: + +.. code-block:: ipython + + In [5]: df.groupby(['a']).apply(func) + Out[5]: + a b c + 0 1 3 5 + 1 2 4 6 + + In [6]: df.set_index(['a', 'b']).groupby(['a']).apply(func) + Out[6]: + c + a b + 1 3 5 + 2 4 6 + +Now in both cases it is determined that ``func`` is a transform. In each case, +the result has the same index as the input. + +.. _whatsnew_140.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_140.api_breaking.python: + +Increased minimum version for Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas 1.4.0 supports Python 3.8 and higher. + +.. _whatsnew_140.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Some minimum supported versions of dependencies were updated. +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.18.5 | X | X | ++-----------------+-----------------+----------+---------+ +| pytz | 2020.1 | X | X | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.8.1 | X | X | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.3.1 | | X | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.7.1 | | X | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 6.0 | | | ++-----------------+-----------------+----------+---------+ +| mypy (dev) | 0.930 | | X | ++-----------------+-----------------+----------+---------+ + +For `optional libraries +`_ the general +recommendation is to use the latest version. The following table lists the +lowest version per library that is currently being tested throughout the +development of pandas. Optional libraries below the lowest tested version may +still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.8.2 | X | ++-----------------+-----------------+---------+ +| fastparquet | 0.4.0 | | ++-----------------+-----------------+---------+ +| fsspec | 0.7.4 | | ++-----------------+-----------------+---------+ +| gcsfs | 0.6.0 | | ++-----------------+-----------------+---------+ +| lxml | 4.5.0 | X | ++-----------------+-----------------+---------+ +| matplotlib | 3.3.2 | X | ++-----------------+-----------------+---------+ +| numba | 0.50.1 | X | ++-----------------+-----------------+---------+ +| openpyxl | 3.0.3 | X | ++-----------------+-----------------+---------+ +| pandas-gbq | 0.14.0 | X | ++-----------------+-----------------+---------+ +| pyarrow | 1.0.1 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.10.1 | X | ++-----------------+-----------------+---------+ +| pytables | 3.6.1 | X | ++-----------------+-----------------+---------+ +| s3fs | 0.4.0 | | ++-----------------+-----------------+---------+ +| scipy | 1.4.1 | X | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.4.0 | X | ++-----------------+-----------------+---------+ +| tabulate | 0.8.7 | | ++-----------------+-----------------+---------+ +| xarray | 0.15.1 | X | ++-----------------+-----------------+---------+ +| xlrd | 2.0.1 | X | ++-----------------+-----------------+---------+ +| xlsxwriter | 1.2.2 | X | ++-----------------+-----------------+---------+ +| xlwt | 1.3.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_140.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ +- :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than ``target``); in the past these would be silently ignored if the index was not unique (:issue:`42310`) +- Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`) +- Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`) +- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`) +- Changed the ``name`` attribute of several holidays in + ``USFederalHolidayCalendar`` to match `official federal holiday + names `_ + specifically: + + - "New Year's Day" gains the possessive apostrophe + - "Presidents Day" becomes "Washington's Birthday" + - "Martin Luther King Jr. Day" is now "Birthday of Martin Luther King, Jr." + - "July 4th" is now "Independence Day" + - "Thanksgiving" is now "Thanksgiving Day" + - "Christmas" is now "Christmas Day" + - Added "Juneteenth National Independence Day" + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. _whatsnew_140.deprecations.int64_uint64_float64index: + +Deprecated Int64Index, UInt64Index & Float64Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index` have been +deprecated in favor of the base :class:`Index` class and will be removed in +Pandas 2.0 (:issue:`43028`). + +For constructing a numeric index, you can use the base :class:`Index` class +instead specifying the data type (which will also work on older pandas +releases): + +.. code-block:: python + + # replace + pd.Int64Index([1, 2, 3]) + # with + pd.Index([1, 2, 3], dtype="int64") + +For checking the data type of an index object, you can replace ``isinstance`` +checks with checking the ``dtype``: + +.. code-block:: python + + # replace + isinstance(idx, pd.Int64Index) + # with + idx.dtype == "int64" + +Currently, in order to maintain backward compatibility, calls to :class:`Index` +will continue to return :class:`Int64Index`, :class:`UInt64Index` and +:class:`Float64Index` when given numeric data, but in the future, an +:class:`Index` will be returned. + +*Current behavior*: + +.. code-block:: ipython + + In [1]: pd.Index([1, 2, 3], dtype="int32") + Out [1]: Int64Index([1, 2, 3], dtype='int64') + In [1]: pd.Index([1, 2, 3], dtype="uint64") + Out [1]: UInt64Index([1, 2, 3], dtype='uint64') + +*Future behavior*: + +.. code-block:: ipython + + In [3]: pd.Index([1, 2, 3], dtype="int32") + Out [3]: Index([1, 2, 3], dtype='int32') + In [4]: pd.Index([1, 2, 3], dtype="uint64") + Out [4]: Index([1, 2, 3], dtype='uint64') + + +.. _whatsnew_140.deprecations.frame_series_append: + +Deprecated DataFrame.append and Series.append +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.append` and :meth:`Series.append` have been deprecated and will +be removed in a future version. Use :func:`pandas.concat` instead (:issue:`35407`). + +*Deprecated syntax* + +.. code-block:: ipython + + In [1]: pd.Series([1, 2]).append(pd.Series([3, 4]) + Out [1]: + :1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. + 0 1 + 1 2 + 0 3 + 1 4 + dtype: int64 + + In [2]: df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + In [4]: df1.append(df2) + Out [4]: + :1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. + A B + 0 1 2 + 1 3 4 + 0 5 6 + 1 7 8 + +*Recommended syntax* + +.. ipython:: python + + pd.concat([pd.Series([1, 2]), pd.Series([3, 4])]) + + df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + pd.concat([df1, df2]) + + +.. _whatsnew_140.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ +- Deprecated :meth:`Index.is_type_compatible` (:issue:`42113`) +- Deprecated ``method`` argument in :meth:`Index.get_loc`, use ``index.get_indexer([label], method=...)`` instead (:issue:`42269`) +- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`) +- Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`) +- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex` (:issue:`42351`) +- Creating an empty :class:`Series` without a ``dtype`` will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`) +- Deprecated the ``kind`` argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, and :meth:`Index.slice_locs`; in a future version passing ``kind`` will raise (:issue:`42857`) +- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`) +- Deprecated :meth:`Index.reindex` with a non-unique :class:`Index` (:issue:`42568`) +- Deprecated :meth:`.Styler.render` in favor of :meth:`.Styler.to_html` (:issue:`42140`) +- Deprecated :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` in favor of :meth:`.Styler.hide` (:issue:`43758`) +- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) +- Deprecated the ``include_start`` and ``include_end`` arguments in :meth:`DataFrame.between_time`; in a future version passing ``include_start`` or ``include_end`` will raise (:issue:`40245`) +- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the :class:`DataFrame` afterwards with ``.squeeze("columns")`` instead (:issue:`43242`) +- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`) +- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) +- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) +- Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`) +- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`) +- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`) +- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object ``dtype``. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`, :issue:`44940`) +- Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`) +- Deprecated passing non boolean argument to ``sort`` in :func:`concat` (:issue:`41518`) +- Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`) +- Deprecated passing arguments as positional for :func:`read_xml` other than ``path_or_buffer`` (:issue:`45133`) +- Deprecated passing ``skipna=None`` for :meth:`DataFrame.mad` and :meth:`Series.mad`, pass ``skipna=True`` instead (:issue:`44580`) +- Deprecated the behavior of :func:`to_datetime` with the string "now" with ``utc=False``; in a future version this will match ``Timestamp("now")``, which in turn matches :meth:`Timestamp.now` returning the local time (:issue:`18705`) +- Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`) +- Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`) +- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) +- Deprecated behavior of :func:`concat` between objects with bool-dtype and numeric-dtypes; in a future version these will cast to object dtype instead of coercing bools to numeric values (:issue:`39817`) +- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) +- Deprecated passing ``set`` or ``dict`` as indexer for :meth:`DataFrame.loc.__setitem__`, :meth:`DataFrame.loc.__getitem__`, :meth:`Series.loc.__setitem__`, :meth:`Series.loc.__getitem__`, :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__` and :meth:`Series.__setitem__` (:issue:`42825`) +- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) +- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) +- Deprecated :meth:`DatetimeIndex.union_many`, use :meth:`DatetimeIndex.union` instead (:issue:`44091`) +- Deprecated :meth:`.Groupby.pad` in favor of :meth:`.Groupby.ffill` (:issue:`33396`) +- Deprecated :meth:`.Groupby.backfill` in favor of :meth:`.Groupby.bfill` (:issue:`33396`) +- Deprecated :meth:`.Resample.pad` in favor of :meth:`.Resample.ffill` (:issue:`33396`) +- Deprecated :meth:`.Resample.backfill` in favor of :meth:`.Resample.bfill` (:issue:`33396`) +- Deprecated ``numeric_only=None`` in :meth:`DataFrame.rank`; in a future version ``numeric_only`` must be either ``True`` or ``False`` (the default) (:issue:`45036`) +- Deprecated the behavior of :meth:`Timestamp.utcfromtimestamp`, in the future it will return a timezone-aware UTC :class:`Timestamp` (:issue:`22451`) +- Deprecated :meth:`NaT.freq` (:issue:`45071`) +- Deprecated behavior of :class:`Series` and :class:`DataFrame` construction when passed float-dtype data containing ``NaN`` and an integer dtype ignoring the dtype argument; in a future version this will raise (:issue:`40110`) +- Deprecated the behaviour of :meth:`Series.to_frame` and :meth:`Index.to_frame` to ignore the ``name`` argument when ``name=None``. Currently, this means to preserve the existing name, but in the future explicitly passing ``name=None`` will set ``None`` as the name of the column in the resulting DataFrame (:issue:`44212`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`) +- Performance improvement when converting non-string arrays to string arrays (:issue:`34483`) +- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`) +- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`, :issue:`43142`, :issue:`43147`, :issue:`43307`, :issue:`43144`, :issue:`44826`) +- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) +- Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`) +- Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`, :issue:`43578`) +- Performance improvement in :func:`read_stata` (:issue:`43059`, :issue:`43227`) +- Performance improvement in :func:`read_sas` (:issue:`43333`) +- Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`) +- Performance improvement in :meth:`to_datetime` with ``infer_datetime_format`` set to ``True`` (:issue:`43901`) +- Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`) +- Performance improvement in indexing with a :class:`UInt64Index` (:issue:`43862`) +- Performance improvement in indexing with a :class:`Float64Index` (:issue:`43705`) +- Performance improvement in indexing with a non-unique :class:`Index` (:issue:`43792`) +- Performance improvement in indexing with a listlike indexer on a :class:`MultiIndex` (:issue:`43370`) +- Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:`43370`) +- Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`, :issue:`43725`) +- Performance improvement in :meth:`GroupBy.count` (:issue:`43730`, :issue:`43694`) +- Performance improvement in :meth:`GroupBy.any` and :meth:`GroupBy.all` (:issue:`43675`, :issue:`42841`) +- Performance improvement in :meth:`GroupBy.std` (:issue:`43115`, :issue:`43576`) +- Performance improvement in :meth:`GroupBy.cumsum` (:issue:`43309`) +- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`) +- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`) +- Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`) +- Performance improvement in :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min` and :meth:`.Expanding.min` with ``engine="numba"`` (:issue:`43612`, :issue:`44176`, :issue:`45170`) +- Improved performance of :meth:`pandas.read_csv` with ``memory_map=True`` when file encoding is UTF-8 (:issue:`43787`) +- Performance improvement in :meth:`RangeIndex.sort_values` overriding :meth:`Index.sort_values` (:issue:`43666`) +- Performance improvement in :meth:`RangeIndex.insert` (:issue:`43988`) +- Performance improvement in :meth:`Index.insert` (:issue:`43953`) +- Performance improvement in :meth:`DatetimeIndex.tolist` (:issue:`43823`) +- Performance improvement in :meth:`DatetimeIndex.union` (:issue:`42353`) +- Performance improvement in :meth:`Series.nsmallest` (:issue:`43696`) +- Performance improvement in :meth:`DataFrame.insert` (:issue:`42998`) +- Performance improvement in :meth:`DataFrame.dropna` (:issue:`43683`) +- Performance improvement in :meth:`DataFrame.fillna` (:issue:`43316`) +- Performance improvement in :meth:`DataFrame.values` (:issue:`43160`) +- Performance improvement in :meth:`DataFrame.select_dtypes` (:issue:`42611`) +- Performance improvement in :class:`DataFrame` reductions (:issue:`43185`, :issue:`43243`, :issue:`43311`, :issue:`43609`) +- Performance improvement in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`43335`, :issue:`43352`, :issue:`42704`, :issue:`43025`) +- Performance improvement in :meth:`Series.to_frame` (:issue:`43558`) +- Performance improvement in :meth:`Series.mad` (:issue:`43010`) +- Performance improvement in :func:`merge` (:issue:`43332`) +- Performance improvement in :func:`to_csv` when index column is a datetime and is formatted (:issue:`39413`) +- Performance improvement in :func:`to_csv` when :class:`MultiIndex` contains a lot of unused levels (:issue:`37484`) +- Performance improvement in :func:`read_csv` when ``index_col`` was set with a numeric column (:issue:`44158`) +- Performance improvement in :func:`concat` (:issue:`43354`) +- Performance improvement in :meth:`SparseArray.__getitem__` (:issue:`23122`) +- Performance improvement in constructing a :class:`DataFrame` from array-like objects like a ``Pytorch`` tensor (:issue:`44616`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in setting dtype-incompatible values into a :class:`Categorical` (or ``Series`` or ``DataFrame`` backed by ``Categorical``) raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`) +- Bug in :meth:`Categorical.searchsorted` when passing a dtype-incompatible value raising ``KeyError`` instead of ``TypeError`` (:issue:`41919`) +- Bug in :meth:`Categorical.astype` casting datetimes and :class:`Timestamp` to int for dtype ``object`` (:issue:`44930`) +- Bug in :meth:`Series.where` with ``CategoricalDtype`` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`) +- Bug in :meth:`Categorical.fillna` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`) +- Bug in :meth:`Categorical.fillna` with a tuple-like category raising ``ValueError`` instead of ``TypeError`` when filling with a non-category tuple (:issue:`41919`) + +Datetimelike +^^^^^^^^^^^^ +- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) +- Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`) +- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) +- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) +- Bug in inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`) +- Bug in calling ``np.isnan``, ``np.isfinite``, or ``np.isinf`` on a timezone-aware :class:`DatetimeIndex` incorrectly raising ``TypeError`` (:issue:`43917`) +- Bug in constructing a :class:`Series` from datetime-like strings with mixed timezones incorrectly partially-inferring datetime values (:issue:`40111`) +- Bug in addition of a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`) +- ``np.maximum.reduce`` and ``np.minimum.reduce`` now correctly return :class:`Timestamp` and :class:`Timedelta` objects when operating on :class:`Series`, :class:`DataFrame`, or :class:`Index` with ``datetime64[ns]`` or ``timedelta64[ns]`` dtype (:issue:`43923`) +- Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`) +- Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`) +- Bug in :meth:`Timestamp.to_pydatetime` failing to retain the ``fold`` attribute (:issue:`45087`) +- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) +- Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) +- Bug in :class:`DateOffset` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`) +- Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`) +- Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`) +- Bug in :class:`Timestamp` hashing during some DST transitions caused a segmentation fault (:issue:`33931` and :issue:`40817`) + +Timedelta +^^^^^^^^^ +- Bug in division of all-``NaT`` :class:`TimeDeltaIndex`, :class:`Series` or :class:`DataFrame` column with object-dtype array like of numbers failing to infer the result as timedelta64-dtype (:issue:`39750`) +- Bug in floor division of ``timedelta64[ns]`` data with a scalar returning garbage values (:issue:`44466`) +- Bug in :class:`Timedelta` now properly taking into account any nanoseconds contribution of any kwarg (:issue:`43764`, :issue:`45227`) + +Time Zones +^^^^^^^^^^ +- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` failing to parse zero UTC offset (``Z``) correctly (:issue:`41047`) +- Bug in :meth:`Series.dt.tz_convert` resetting index in a :class:`Series` with :class:`CategoricalIndex` (:issue:`43080`) +- Bug in ``Timestamp`` and ``DatetimeIndex`` incorrectly raising a ``TypeError`` when subtracting two timezone-aware objects with mismatched timezones (:issue:`31793`) + +Numeric +^^^^^^^ +- Bug in floor-dividing a list or tuple of integers by a :class:`Series` incorrectly raising (:issue:`44674`) +- Bug in :meth:`DataFrame.rank` raising ``ValueError`` with ``object`` columns and ``method="first"`` (:issue:`41931`) +- Bug in :meth:`DataFrame.rank` treating missing values and extreme values as equal (for example ``np.nan`` and ``np.inf``), causing incorrect results when ``na_option="bottom"`` or ``na_option="top`` used (:issue:`41931`) +- Bug in ``numexpr`` engine still being used when the option ``compute.use_numexpr`` is set to ``False`` (:issue:`32556`) +- Bug in :class:`DataFrame` arithmetic ops with a subclass whose :meth:`_constructor` attribute is a callable other than the subclass itself (:issue:`43201`) +- Bug in arithmetic operations involving :class:`RangeIndex` where the result would have the incorrect ``name`` (:issue:`43962`) +- Bug in arithmetic operations involving :class:`Series` where the result could have the incorrect ``name`` when the operands having matching NA or matching tuple names (:issue:`44459`) +- Bug in division with ``IntegerDtype`` or ``BooleanDtype`` array and NA scalar incorrectly raising (:issue:`44685`) +- Bug in multiplying a :class:`Series` with ``FloatingDtype`` with a timedelta-like scalar incorrectly raising (:issue:`44772`) + +Conversion +^^^^^^^^^^ +- Bug in :class:`UInt64Index` constructor when passing a list containing both positive integers small enough to cast to int64 and integers too large to hold in int64 (:issue:`42201`) +- Bug in :class:`Series` constructor returning 0 for missing values with dtype ``int64`` and ``False`` for dtype ``bool`` (:issue:`43017`, :issue:`43018`) +- Bug in constructing a :class:`DataFrame` from a :class:`PandasArray` containing :class:`Series` objects behaving differently than an equivalent ``np.ndarray`` (:issue:`43986`) +- Bug in :class:`IntegerDtype` not allowing coercion from string dtype (:issue:`25472`) +- Bug in :func:`to_datetime` with ``arg:xr.DataArray`` and ``unit="ns"`` specified raises ``TypeError`` (:issue:`44053`) +- Bug in :meth:`DataFrame.convert_dtypes` not returning the correct type when a subclass does not overload :meth:`_constructor_sliced` (:issue:`43201`) +- Bug in :meth:`DataFrame.astype` not propagating ``attrs`` from the original :class:`DataFrame` (:issue:`44414`) +- Bug in :meth:`DataFrame.convert_dtypes` result losing ``columns.names`` (:issue:`41435`) +- Bug in constructing a ``IntegerArray`` from pyarrow data failing to validate dtypes (:issue:`44891`) +- Bug in :meth:`Series.astype` not allowing converting from a ``PeriodDtype`` to ``datetime64`` dtype, inconsistent with the :class:`PeriodIndex` behavior (:issue:`45038`) + +Strings +^^^^^^^ +- Bug in checking for ``string[pyarrow]`` dtype incorrectly raising an ``ImportError`` when pyarrow is not installed (:issue:`44276`) + +Interval +^^^^^^^^ +- Bug in :meth:`Series.where` with ``IntervalDtype`` incorrectly raising when the ``where`` call should not replace anything (:issue:`44181`) + +Indexing +^^^^^^^^ +- Bug in :meth:`Series.rename` with :class:`MultiIndex` and ``level`` is provided (:issue:`43659`) +- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's :class:`Index` has a length greater than one but only one unique value (:issue:`42365`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`) +- Bug in :meth:`Series.loc` with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`) +- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`) +- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`) +- Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`) +- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`) +- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`) +- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a :class:`DataFrame` (:issue:`42826`) +- Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`) +- Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed (:issue:`22435`) +- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`) +- Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`) +- Bug in :meth:`DataFrame.__setitem__` incorrectly writing into an existing column's array rather than setting a new array when the new dtype and the old dtype match (:issue:`43406`) +- Bug in setting floating-dtype values into a :class:`Series` with integer dtype failing to set inplace when those values can be losslessly converted to integers (:issue:`44316`) +- Bug in :meth:`Series.__setitem__` with object dtype when setting an array with matching size and dtype='datetime64[ns]' or dtype='timedelta64[ns]' incorrectly converting the datetime/timedeltas to integers (:issue:`43868`) +- Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) +- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) +- Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) +- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`) +- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`) +- Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`) +- Bug in :meth:`Series.reset_index` not ignoring ``name`` argument when ``drop`` and ``inplace`` are set to ``True`` (:issue:`44575`) +- Bug in :meth:`DataFrame.loc.__setitem__` and :meth:`DataFrame.iloc.__setitem__` with mixed dtypes sometimes failing to operate in-place (:issue:`44345`) +- Bug in :meth:`DataFrame.loc.__getitem__` incorrectly raising ``KeyError`` when selecting a single column with a boolean key (:issue:`44322`). +- Bug in setting :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and setting 2D values e.g. ``df.iloc[:] = df.values`` incorrectly raising (:issue:`44514`) +- Bug in setting values with :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and a tuple of arrays as the indexer (:issue:`44703`) +- Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`) +- Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`) +- Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) +- Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) +- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`) +- Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`) +- Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype :class:`Series` failing to upcast when necessary to retain precision (:issue:`45121`) +- Bug in :meth:`DataFrame.iloc.__setitem__` ignores axis argument (:issue:`45032`) + +Missing +^^^^^^^ +- Bug in :meth:`DataFrame.fillna` with ``limit`` and no ``method`` ignores ``axis='columns'`` or ``axis = 1`` (:issue:`40989`, :issue:`17399`) +- Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`) +- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`44428`) +- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`) +- Bug in :meth:`Index.fillna` incorrectly returning an unfilled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`) +- Bug in :meth:`DataFrame.dropna` changing :class:`Index` even if no entries were dropped (:issue:`41965`) +- Bug in :meth:`Series.fillna` with an object-dtype incorrectly ignoring ``downcast="infer"`` (:issue:`44241`) + +MultiIndex +^^^^^^^^^^ +- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`42465`) +- Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`) +- Bug in :meth:`MultiIndex.get_loc` raising ``TypeError`` instead of ``KeyError`` on nested tuple (:issue:`42440`) +- Bug in :meth:`MultiIndex.union` setting wrong ``sortorder`` causing errors in subsequent indexing operations with slices (:issue:`44752`) +- Bug in :meth:`MultiIndex.putmask` where the other value was also a :class:`MultiIndex` (:issue:`43212`) +- Bug in :meth:`MultiIndex.dtypes` duplicate level names returned only one dtype per name (:issue:`45174`) + +I/O +^^^ +- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`) +- Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`) +- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`) +- Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`) +- Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serializing plain Python objects to JSON (:issue:`42768`, :issue:`33043`) +- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`) +- Bug in unpickling an :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) +- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raised ``IndexError`` (:issue:`43102`) +- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and some chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`) +- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) +- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`) +- Bug in :func:`read_csv` keeping the original column in object format when ``keep_date_col=True`` is set (:issue:`13378`) +- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`) +- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) +- Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect ``errors="ignore"`` (:issue:`44312`) +- Bug in :func:`read_csv` used second row to guess implicit index if ``header`` was set to ``None`` for ``engine="python"`` (:issue:`22144`) +- Bug in :func:`read_csv` not recognizing bad lines when ``names`` were given for ``engine="c"`` (:issue:`22144`) +- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) +- Bug when Python is built without the lzma module: a warning was raised at the pandas import time, even if the lzma capability isn't used (:issue:`43495`) +- Bug in :func:`read_csv` not applying dtype for ``index_col`` (:issue:`9435`) +- Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) +- Bug in :func:`read_csv` raising ``ValueError`` when ``names`` was longer than ``header`` but equal to data rows for ``engine="python"`` (:issue:`38453`) +- Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`) +- Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with :class:`MultiIndex` columns (:issue:`8991`) +- Bug in :func:`read_csv` not raising an ``ValueError`` when ``\n`` was specified as ``delimiter`` or ``sep`` which conflicts with ``lineterminator`` (:issue:`43528`) +- Bug in :func:`to_csv` converting datetimes in categorical :class:`Series` to integers (:issue:`40754`) +- Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`) +- Bug in :func:`read_csv` not replacing ``NaN`` values with ``np.nan`` before attempting date conversion (:issue:`26203`) +- Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`) +- Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly (:issue:`39465`) +- Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`) +- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`) +- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`) +- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`) +- Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`) +- Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`) +- Bug in :func:`read_json` raising ``ValueError`` when attempting to parse json strings containing "://" (:issue:`36271`) +- Bug in :func:`read_csv` when ``engine="c"`` and ``encoding_errors=None`` which caused a segfault (:issue:`45180`) +- Bug in :func:`read_csv` an invalid value of ``usecols`` leading to an unclosed file handle (:issue:`45384`) +- Bug in :meth:`DataFrame.to_json` fix memory leak (:issue:`43877`) + +Period +^^^^^^ +- Bug in adding a :class:`Period` object to a ``np.timedelta64`` object incorrectly raising ``TypeError`` (:issue:`44182`) +- Bug in :meth:`PeriodIndex.to_timestamp` when the index has ``freq="B"`` inferring ``freq="D"`` for its result instead of ``freq="B"`` (:issue:`44105`) +- Bug in :class:`Period` constructor incorrectly allowing ``np.timedelta64("NaT")`` (:issue:`44507`) +- Bug in :meth:`PeriodIndex.to_timestamp` giving incorrect values for indexes with non-contiguous data (:issue:`44100`) +- Bug in :meth:`Series.where` with ``PeriodDtype`` incorrectly raising when the ``where`` call should not replace anything (:issue:`45135`) + +Plotting +^^^^^^^^ +- When given non-numeric data, :meth:`DataFrame.boxplot` now raises a ``ValueError`` rather than a cryptic ``KeyError`` or ``ZeroDivisionError``, in line with other plotting functions like :meth:`DataFrame.hist` (:issue:`43480`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`) +- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`) +- Bug in :meth:`GroupBy.max` and :meth:`GroupBy.min` with nullable integer dtypes losing precision (:issue:`41743`) +- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`) +- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not ``None`` (:issue:`41556`) +- Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input :class:`Series` was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`) +- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) +- Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) +- Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) +- Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) +- Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`) +- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`) +- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`) +- Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`) +- Bug in :meth:`GroupBy.mean` raising ``KeyError`` when column was selected at least twice (:issue:`44924`) +- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) +- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Bug in :meth:`Groupby.nunique` not respecting ``observed=True`` for ``categorical`` grouping columns (:issue:`45128`) +- Bug in :meth:`GroupBy.head` and :meth:`GroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`) +- Bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`44821`) +- Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`) +- Bug where grouping by a :class:`Series` that has a ``categorical`` data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`) + +Reshaping +^^^^^^^^^ +- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`) +- Bug in :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`) +- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42185`, :issue:`42425`) +- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`) +- Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`) +- Bug in :func:`crosstab` when inputs are categorical :class:`Series`, there are categories that are not present in one or both of the :class:`Series`, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`) +- Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`) +- Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`) +- Bug in :func:`merge` with :class:`MultiIndex` as column index for the ``on`` argument returning an error when assigning a column internally (:issue:`43734`) +- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`) +- Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`) +- Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`) +- Bug in :func:`concat` casting levels of :class:`MultiIndex` to float if all levels only consist of missing values (:issue:`44900`) +- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`) +- Bug in :func:`merge` raising ``KeyError`` when joining over differently named indexes with on keywords (:issue:`45094`) +- Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`) +- Bug in :meth:`MultiIndex.join()` with overlapping ``IntervalIndex`` levels (:issue:`44096`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` results is different ``dtype`` based on ``regex`` parameter (:issue:`44864`) +- Bug in :meth:`DataFrame.pivot` with ``index=None`` when the :class:`DataFrame` index was a :class:`MultiIndex` (:issue:`23955`) + +Sparse +^^^^^^ +- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`) +- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`) +- Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`) +- Bug in :class:`SparseArray` comparison methods with an array-like operand of mismatched length raising ``AssertionError`` or unclear ``ValueError`` depending on the input (:issue:`43863`) +- Bug in :class:`SparseArray` arithmetic methods ``floordiv`` and ``mod`` behaviors when dividing by zero not matching the non-sparse :class:`Series` behavior (:issue:`38172`) +- Bug in :class:`SparseArray` unary methods as well as :meth:`SparseArray.isna` doesn't recalculate indexes (:issue:`44955`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) +- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`) +- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`) +- NumPy ufuncs with ``out`` keyword are now supported by arrays with ``IntegerDtype`` and ``FloatingDtype`` (:issue:`45122`) +- Avoid raising ``PerformanceWarning`` about fragmented :class:`DataFrame` when using many columns with an extension dtype (:issue:`44098`) +- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) +- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) +- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`) +- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`) +- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the :class:`Series` dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`) +- Bug in :meth:`Series.replace` where explicitly passing ``value=None`` is treated as if no ``value`` was passed, and ``None`` not being in the result (:issue:`36984`, :issue:`19998`) +- Bug in :meth:`Series.replace` with unwanted downcasting being done in no-op replacements (:issue:`44498`) +- Bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`, :issue:`40732`, :issue:`31644`, :issue:`41215`, :issue:`25438`) + +Styler +^^^^^^ +- Bug in :class:`.Styler` where the ``uuid`` at initialization maintained a floating underscore (:issue:`43037`) +- Bug in :meth:`.Styler.to_html` where the ``Styler`` object was updated if the ``to_html`` method was called with some args (:issue:`43034`) +- Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`) +- Bug in :meth:`Styler.apply` where functions which returned :class:`Series` objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`) +- Bug when rendering an empty :class:`DataFrame` with a named :class:`Index` (:issue:`43305`) +- Bug when rendering a single level :class:`MultiIndex` (:issue:`43383`) +- Bug when combining non-sparse rendering and :meth:`.Styler.hide_columns` or :meth:`.Styler.hide_index` (:issue:`43464`) +- Bug setting a table style when using multiple selectors in :class:`.Styler` (:issue:`44011`) +- Bugs where row trimming and column trimming failed to reflect hidden rows (:issue:`43703`, :issue:`44247`) + +Other +^^^^^ +- Bug in :meth:`DataFrame.astype` with non-unique columns and a :class:`Series` ``dtype`` argument (:issue:`44417`) +- Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`) +- Bug in :meth:`RangeIndex.union` with another ``RangeIndex`` with matching (even) ``step`` and starts differing by strictly less than ``step / 2`` (:issue:`44019`) +- Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`, :issue:`37899`) +- Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``periods`` larger than ``len(frame.columns)`` producing an invalid :class:`DataFrame` (:issue:`44978`) +- Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`) +- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`) +- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`) +- Bug in :meth:`DataFrame.eval` where ``resolvers`` argument was overriding the default resolvers (:issue:`34966`) +- :meth:`Series.__repr__` and :meth:`DataFrame.__repr__` no longer replace all null-values in indexes with "NaN" but use their real string-representations. "NaN" is used only for ``float("nan")`` (:issue:`45263`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_140.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.5..v1.4.0 diff --git a/doc/source/whatsnew/v1.4.1.rst b/doc/source/whatsnew/v1.4.1.rst new file mode 100644 index 00000000..dd2002bb --- /dev/null +++ b/doc/source/whatsnew/v1.4.1.rst @@ -0,0 +1,56 @@ +.. _whatsnew_141: + +What's new in 1.4.1 (February 12, 2022) +--------------------------------------- + +These are the changes in pandas 1.4.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_141.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Regression in :meth:`Series.mask` with ``inplace=True`` and ``PeriodDtype`` and an incompatible ``other`` coercing to a common dtype instead of raising (:issue:`45546`) +- Regression in :func:`.assert_frame_equal` not respecting ``check_flags=False`` (:issue:`45554`) +- Regression in :meth:`DataFrame.loc` raising ``ValueError`` when indexing (getting values) on a :class:`MultiIndex` with one level (:issue:`45779`) +- Regression in :meth:`Series.fillna` with ``downcast=False`` incorrectly downcasting ``object`` dtype (:issue:`45603`) +- Regression in :func:`api.types.is_bool_dtype` raising an ``AttributeError`` when evaluating a categorical :class:`Series` (:issue:`45615`) +- Regression in :meth:`DataFrame.iat` setting values leading to not propagating correctly in subsequent lookups (:issue:`45684`) +- Regression when setting values with :meth:`DataFrame.loc` losing :class:`Index` name if :class:`DataFrame` was empty before (:issue:`45621`) +- Regression in :meth:`~Index.join` with overlapping :class:`IntervalIndex` raising an ``InvalidIndexError`` (:issue:`45661`) +- Regression when setting values with :meth:`Series.loc` raising with all ``False`` indexer and :class:`Series` on the right hand side (:issue:`45778`) +- Regression in :func:`read_sql` with a DBAPI2 connection that is not an instance of ``sqlite3.Connection`` incorrectly requiring SQLAlchemy be installed (:issue:`45660`) +- Regression in :class:`DateOffset` when constructing with an integer argument with no keywords (e.g. ``pd.DateOffset(n)``) would behave like ``datetime.timedelta(days=0)`` (:issue:`45643`, :issue:`45890`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_141.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fixed segfault in :meth:`DataFrame.to_json` when dumping tz-aware datetimes in Python 3.10 (:issue:`42130`) +- Stopped emitting unnecessary ``FutureWarning`` in :meth:`DataFrame.sort_values` with sparse columns (:issue:`45618`) +- Fixed window aggregations in :meth:`DataFrame.rolling` and :meth:`Series.rolling` to skip over unused elements (:issue:`45647`) +- Fixed builtin highlighters in :class:`.Styler` to be responsive to ``NA`` with nullable dtypes (:issue:`45804`) +- Bug in :meth:`~Rolling.apply` with ``axis=1`` raising an erroneous ``ValueError`` (:issue:`45912`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_141.other: + +Other +~~~~~ +- Reverted performance speedup of :meth:`DataFrame.corr` for ``method=pearson`` to fix precision regression (:issue:`45640`, :issue:`42761`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_141.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.0..v1.4.1 diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst new file mode 100644 index 00000000..64c36632 --- /dev/null +++ b/doc/source/whatsnew/v1.4.2.rst @@ -0,0 +1,45 @@ +.. _whatsnew_142: + +What's new in 1.4.2 (April 2, 2022) +----------------------------------- + +These are the changes in pandas 1.4.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_142.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`) +- Fixed regression in :func:`read_csv` killing python process when invalid file input was given for ``engine="c"`` (:issue:`45957`) +- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) +- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`) +- Fixed regression in :meth:`DataFrame.replace` when a replacement value was also a target for replacement (:issue:`46306`) +- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` (:issue:`45601`, :issue:`45836`) +- Fixed regression when setting values with :meth:`DataFrame.loc` losing :class:`MultiIndex` names if :class:`DataFrame` was empty before (:issue:`46317`) +- Fixed regression when rendering boolean datatype columns with :meth:`.Styler` (:issue:`46384`) +- Fixed regression in :meth:`Groupby.rolling` with a frequency window that would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_142.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`) +- Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`) +- Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`) +- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_142.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.1..v1.4.2 diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst new file mode 100644 index 00000000..70b451a2 --- /dev/null +++ b/doc/source/whatsnew/v1.4.3.rst @@ -0,0 +1,72 @@ +.. _whatsnew_143: + +What's new in 1.4.3 (June 23, 2022) +----------------------------------- + +These are the changes in pandas 1.4.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.concat: + +Behavior of ``concat`` with empty or all-NA DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The behavior change in version 1.4.0 to stop ignoring the data type +of empty or all-NA columns with float or object dtype in :func:`concat` +(:ref:`whatsnew_140.notable_bug_fixes.concat_with_empty_or_all_na`) has been +reverted (:issue:`45637`). + + +.. _whatsnew_143.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) +- Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`) +- Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`) +- Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) +- Fixed regression in :meth:`DataFrame.fillna` and :meth:`DataFrame.update` creating a copy when updating inplace (:issue:`47188`) +- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when the sorting column has ``np.nan`` values (:issue:`46589`) +- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) +- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) +- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`) +- Fixed regression in ``NaN`` comparison for :class:`Index` operations where the same object was compared (:issue:`47105`) +- Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`) +- Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` when used with list-likes or dict-likes and ``axis=1`` that would give incorrect results; now raises ``NotImplementedError`` (:issue:`46995`) +- Fixed regression in :meth:`DataFrame.resample` and :meth:`DataFrame.rolling` when used with list-likes or dict-likes and ``axis=1`` that would raise an unintuitive error message; now raises ``NotImplementedError`` (:issue:`46904`) +- Fixed regression in :func:`testing.assert_index_equal` when ``check_order=False`` and :class:`Index` has extension or object dtype (:issue:`47207`) +- Fixed regression in :func:`read_excel` returning ints as floats on certain input sheets (:issue:`46988`) +- Fixed regression in :meth:`DataFrame.shift` when ``axis`` is ``columns`` and ``fill_value`` is absent, ``freq`` is ignored (:issue:`47039`) +- Fixed regression in :meth:`DataFrame.to_json` causing a segmentation violation when :class:`DataFrame` is created with an ``index`` parameter of the type :class:`PeriodIndex` (:issue:`46683`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :func:`pandas.eval`, :meth:`DataFrame.eval` and :meth:`DataFrame.query` where passing empty ``local_dict`` or ``global_dict`` was treated as passing ``None`` (:issue:`47084`) +- Most I/O methods no longer suppress ``OSError`` and ``ValueError`` when closing file handles (:issue:`47136`) +- Improving error message raised by :meth:`DataFrame.from_dict` when passing an invalid ``orient`` parameter (:issue:`47450`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.other: + +Other +~~~~~ +- The minimum version of Cython needed to compile pandas is now ``0.29.30`` (:issue:`41935`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_143.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.2..v1.4.3 diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst new file mode 100644 index 00000000..56b1254d --- /dev/null +++ b/doc/source/whatsnew/v1.4.4.rst @@ -0,0 +1,65 @@ +.. _whatsnew_144: + +What's new in 1.4.4 (August 31, 2022) +------------------------------------- + +These are the changes in pandas 1.4.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.fillna` not working on a :class:`DataFrame` with a :class:`MultiIndex` (:issue:`47649`) +- Fixed regression in taking NULL :class:`objects` from a :class:`DataFrame` causing a segmentation violation. These NULL values are created by :meth:`numpy.empty_like` (:issue:`46848`) +- Fixed regression in :func:`concat` materializing the :class:`Index` during sorting even if the :class:`Index` was already sorted (:issue:`47501`) +- Fixed regression in :func:`concat` or :func:`merge` handling of all-NaN ExtensionArrays with custom attributes (:issue:`47762`) +- Fixed regression in calling bitwise numpy ufuncs (for example, ``np.bitwise_and``) on Index objects (:issue:`46769`) +- Fixed regression in :func:`cut` when using a ``datetime64`` IntervalIndex as bins (:issue:`46218`) +- Fixed regression in :meth:`DataFrame.select_dtypes` where ``include="number"`` included :class:`BooleanDtype` (:issue:`46870`) +- Fixed regression in :meth:`DataFrame.loc` raising error when indexing with a ``NamedTuple`` (:issue:`48124`) +- Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`) +- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`) +- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) +- Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DatetimeIndex` with a :class:`.DateOffset` object for its ``freq`` (:issue:`46671`) +- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in updating a DataFrame column through Series ``__setitem__`` (using chained assignment) not updating column values inplace and using too much memory (:issue:`47172`) +- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) +- Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) +- Fixed regression in :meth:`~Index.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) +- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) +- Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) +- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.bug_fixes: + +Bug fixes +~~~~~~~~~ +- The ``FutureWarning`` raised when passing arguments (other than ``filepath_or_buffer``) as positional in :func:`read_csv` is now raised at the correct stacklevel (:issue:`47385`) +- Bug in :meth:`DataFrame.to_sql` when ``method`` was a ``callable`` that did not return an ``int`` and would raise a ``TypeError`` (:issue:`46891`) +- Bug in :meth:`.DataFrameGroupBy.value_counts` where ``subset`` had no effect (:issue:`46383`) +- Bug when getting values with :meth:`DataFrame.loc` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) +- Bug in the :meth:`Series.dt.strftime` accessor return a float instead of object dtype Series for all-NaT input, which also causes a spurious deprecation warning (:issue:`45858`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.other: + +Other +~~~~~ +- The minimum version of Cython needed to compile pandas is now ``0.29.32`` (:issue:`47978`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_144.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.3..v1.4.4|HEAD diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst new file mode 100644 index 00000000..ecd38555 --- /dev/null +++ b/doc/source/whatsnew/v1.5.0.rst @@ -0,0 +1,1294 @@ +.. _whatsnew_150: + +What's new in 1.5.0 (September 19, 2022) +---------------------------------------- + +These are the changes in pandas 1.5.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.enhancements: + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_150.enhancements.pandas-stubs: + +``pandas-stubs`` +^^^^^^^^^^^^^^^^ + +The ``pandas-stubs`` library is now supported by the pandas development team, providing type stubs for the pandas API. Please visit +https://github.com/pandas-dev/pandas-stubs for more information. + +We thank VirtusLab and Microsoft for their initial, significant contributions to ``pandas-stubs`` + +.. _whatsnew_150.enhancements.arrow: + +Native PyArrow-backed ExtensionArray +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With `Pyarrow `__ installed, users can now create pandas objects +that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``. + +The ``dtype`` argument can accept a string of a `pyarrow data type `__ +with ``pyarrow`` in brackets e.g. ``"int64[pyarrow]"`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` +initialized with a ``pyarrow.DataType``. + +.. ipython:: python + + import pyarrow as pa + ser_float = pd.Series([1.0, 2.0, None], dtype="float32[pyarrow]") + ser_float + + list_of_int_type = pd.ArrowDtype(pa.list_(pa.int64())) + ser_list = pd.Series([[1, 2], [3, None]], dtype=list_of_int_type) + ser_list + + ser_list.take([1, 0]) + ser_float * 5 + ser_float.mean() + ser_float.dropna() + +Most operations are supported and have been implemented using `pyarrow compute `__ functions. +We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. + +.. warning:: + + This feature is experimental, and the API can change in a future release without warning. + +.. _whatsnew_150.enhancements.dataframe_interchange: + +DataFrame interchange protocol implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now implement the DataFrame interchange API spec. +See the full details on the API at https://data-apis.org/dataframe-protocol/latest/index.html + +The protocol consists of two parts: + +- New method :meth:`DataFrame.__dataframe__` which produces the interchange object. + It effectively "exports" the pandas dataframe as an interchange object so + any other library which has the protocol implemented can "import" that dataframe + without knowing anything about the producer except that it makes an interchange object. +- New function :func:`pandas.api.interchange.from_dataframe` which can take + an arbitrary interchange object from any conformant library and construct a + pandas DataFrame out of it. + +.. _whatsnew_150.enhancements.styler: + +Styler +^^^^^^ + +The most notable development is the new method :meth:`.Styler.concat` which +allows adding customised footer rows to visualise additional calculations on the data, +e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`) + +Additionally there is an alternative output method :meth:`.Styler.to_string`, +which allows using the Styler's formatting methods to create, for example, CSVs (:issue:`44502`). + +A new feature :meth:`.Styler.relabel_index` is also made available to provide full customisation of the display of +index or column headers (:issue:`47864`) + +Minor feature improvements are: + + - Adding the ability to render ``border`` and ``border-{side}`` CSS properties in Excel (:issue:`42276`) + - Making keyword arguments consist: :meth:`.Styler.highlight_null` now accepts ``color`` and deprecates ``null_color`` although this remains backwards compatible (:issue:`45907`) + +.. _whatsnew_150.enhancements.resample_group_keys: + +Control of index with ``group_keys`` in :meth:`DataFrame.resample` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The argument ``group_keys`` has been added to the method :meth:`DataFrame.resample`. +As with :meth:`DataFrame.groupby`, this argument controls the whether each group is added +to the index in the resample when :meth:`.Resampler.apply` is used. + +.. warning:: + Not specifying the ``group_keys`` argument will retain the + previous behavior and emit a warning if the result will change + by specifying ``group_keys=False``. In a future version + of pandas, not specifying ``group_keys`` will default to + the same behavior as ``group_keys=False``. + +.. ipython:: python + + df = pd.DataFrame( + {'a': range(6)}, + index=pd.date_range("2021-01-01", periods=6, freq="8H") + ) + df.resample("D", group_keys=True).apply(lambda x: x) + df.resample("D", group_keys=False).apply(lambda x: x) + +Previously, the resulting index would depend upon the values returned by ``apply``, +as seen in the following example. + +.. code-block:: ipython + + In [1]: # pandas 1.3 + In [2]: df.resample("D").apply(lambda x: x) + Out[2]: + a + 2021-01-01 00:00:00 0 + 2021-01-01 08:00:00 1 + 2021-01-01 16:00:00 2 + 2021-01-02 00:00:00 3 + 2021-01-02 08:00:00 4 + 2021-01-02 16:00:00 5 + + In [3]: df.resample("D").apply(lambda x: x.reset_index()) + Out[3]: + index a + 2021-01-01 0 2021-01-01 00:00:00 0 + 1 2021-01-01 08:00:00 1 + 2 2021-01-01 16:00:00 2 + 2021-01-02 0 2021-01-02 00:00:00 3 + 1 2021-01-02 08:00:00 4 + 2 2021-01-02 16:00:00 5 + +.. _whatsnew_150.enhancements.from_dummies: + +from_dummies +^^^^^^^^^^^^ + +Added new function :func:`~pandas.from_dummies` to convert a dummy coded :class:`DataFrame` into a categorical :class:`DataFrame`. + +.. ipython:: python + + import pandas as pd + + df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], + "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], + "col2_c": [0, 0, 1]}) + + pd.from_dummies(df, sep="_") + +.. _whatsnew_150.enhancements.orc: + +Writing to ORC files +^^^^^^^^^^^^^^^^^^^^ + +The new method :meth:`DataFrame.to_orc` allows writing to ORC files (:issue:`43864`). + +This functionality depends the `pyarrow `__ library. For more details, see :ref:`the IO docs on ORC `. + +.. warning:: + + * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow. + * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0. + * :func:`~pandas.DataFrame.to_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies `. + * For supported dtypes please refer to `supported ORC features in Arrow `__. + * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files. + +.. code-block:: python + + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.to_orc("./out.orc") + +.. _whatsnew_150.enhancements.tar: + +Reading directly from TAR archives +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +I/O methods like :func:`read_csv` or :meth:`DataFrame.to_json` now allow reading and writing +directly on TAR archives (:issue:`44787`). + +.. code-block:: python + + df = pd.read_csv("./movement.tar.gz") + # ... + df.to_csv("./out.tar.gz") + +This supports ``.tar``, ``.tar.gz``, ``.tar.bz`` and ``.tar.xz2`` archives. +The used compression method is inferred from the filename. +If the compression method cannot be inferred, use the ``compression`` argument: + +.. code-block:: python + + df = pd.read_csv(some_file_obj, compression={"method": "tar", "mode": "r:gz"}) # noqa F821 + +(``mode`` being one of ``tarfile.open``'s modes: https://docs.python.org/3/library/tarfile.html#tarfile.open) + + +.. _whatsnew_150.enhancements.read_xml_dtypes: + +read_xml now supports ``dtype``, ``converters``, and ``parse_dates`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns, +apply converter methods, and parse dates (:issue:`43567`). + +.. ipython:: python + + xml_dates = """ + + + square + 00360 + 4.0 + 2020-01-01 + + + circle + 00360 + + 2021-01-01 + + + triangle + 00180 + 3.0 + 2022-01-01 + + """ + + df = pd.read_xml( + xml_dates, + dtype={'sides': 'Int64'}, + converters={'degrees': str}, + parse_dates=['date'] + ) + df + df.dtypes + + +.. _whatsnew_150.enhancements.read_xml_iterparse: + +read_xml now supports large XML using ``iterparse`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For very large XML files that can range in hundreds of megabytes to gigabytes, :func:`pandas.read_xml` +now supports parsing such sizeable files using `lxml's iterparse`_ and `etree's iterparse`_ +which are memory-efficient methods to iterate through XML trees and extract specific elements +and attributes without holding entire tree in memory (:issue:`45442`). + +.. code-block:: ipython + + In [1]: df = pd.read_xml( + ... "/path/to/downloaded/enwikisource-latest-pages-articles.xml", + ... iterparse = {"page": ["title", "ns", "id"]}) + ... ) + df + Out[2]: + title ns id + 0 Gettysburg Address 0 21450 + 1 Main Page 0 42950 + 2 Declaration by United Nations 0 8435 + 3 Constitution of the United States of America 0 8435 + 4 Declaration of Independence (Israel) 0 17858 + ... ... ... ... + 3578760 Page:Black cat 1897 07 v2 n10.pdf/17 104 219649 + 3578761 Page:Black cat 1897 07 v2 n10.pdf/43 104 219649 + 3578762 Page:Black cat 1897 07 v2 n10.pdf/44 104 219649 + 3578763 The History of Tom Jones, a Foundling/Book IX 0 12084291 + 3578764 Page:Shakespeare of Stratford (1926) Yale.djvu/91 104 21450 + + [3578765 rows x 3 columns] + + +.. _`lxml's iterparse`: https://lxml.de/3.2/parsing.html#iterparse-and-iterwalk +.. _`etree's iterparse`: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + +.. _whatsnew_150.enhancements.copy_on_write: + +Copy on Write +^^^^^^^^^^^^^ + +A new feature ``copy_on_write`` was added (:issue:`46958`). Copy on write ensures that +any DataFrame or Series derived from another in any way always behaves as a copy. +Copy on write disallows updating any other object than the object the method +was applied to. + +Copy on write can be enabled through: + +.. code-block:: python + + pd.set_option("mode.copy_on_write", True) + pd.options.mode.copy_on_write = True + +Alternatively, copy on write can be enabled locally through: + +.. code-block:: python + + with pd.option_context("mode.copy_on_write", True): + ... + +Without copy on write, the parent :class:`DataFrame` is updated when updating a child +:class:`DataFrame` that was derived from this :class:`DataFrame`. + +.. ipython:: python + + df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1}) + view = df["foo"] + view.iloc[0] + df + +With copy on write enabled, df won't be updated anymore: + +.. ipython:: python + + with pd.option_context("mode.copy_on_write", True): + df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1}) + view = df["foo"] + view.iloc[0] + df + +A more detailed explanation can be found `here `_. + +.. _whatsnew_150.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :meth:`Series.map` now raises when ``arg`` is dict but ``na_action`` is not either ``None`` or ``'ignore'`` (:issue:`46588`) +- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`) +- :class:`.StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) +- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) +- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). +- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) +- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) +- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) +- :func:`read_csv` now supports ``defaultdict`` as a ``dtype`` parameter (:issue:`41574`) +- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`) +- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`) +- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) +- :class:`Series` and :class:`DataFrame` with :class:`IntegerDtype` now supports bitwise operations (:issue:`34463`) +- Add ``milliseconds`` field support for :class:`.DateOffset` (:issue:`43371`) +- :meth:`DataFrame.where` tries to maintain dtype of :class:`DataFrame` if fill value can be cast without loss of precision (:issue:`45582`) +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- :func:`concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`) +- :func:`concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) +- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`) +- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`) +- Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) +- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) +- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) +- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) +- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) +- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) +- Add support for :meth:`.GroupBy.ohlc` for extension array dtypes (:issue:`37493`) +- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) +- :func:`pandas.read_html` now supports extracting links from table cells (:issue:`13141`) +- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) +- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) +- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) +- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`) +- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) +- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) +- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) +- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.notable_bug_fixes: + +Notable bug fixes +~~~~~~~~~~~~~~~~~ + +These are bug fixes that might have notable behavior changes. + +.. _whatsnew_150.notable_bug_fixes.groupby_transform_dropna: + +Using ``dropna=True`` with ``groupby`` transforms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A transform is an operation whose result has the same size as its input. When the +result is a :class:`DataFrame` or :class:`Series`, it is also required that the +index of the result matches that of the input. In pandas 1.4, using +:meth:`.DataFrameGroupBy.transform` or :meth:`.SeriesGroupBy.transform` with null +values in the groups and ``dropna=True`` gave incorrect results. Demonstrated by the +examples below, the incorrect results either contained incorrect values, or the result +did not have the same index as the input. + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 1, np.nan], 'b': [2, 3, 4]}) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: # Value in the last row should be np.nan + df.groupby('a', dropna=True).transform('sum') + Out[3]: + b + 0 5 + 1 5 + 2 5 + + In [3]: # Should have one additional row with the value np.nan + df.groupby('a', dropna=True).transform(lambda x: x.sum()) + Out[3]: + b + 0 5 + 1 5 + + In [3]: # The value in the last row is np.nan interpreted as an integer + df.groupby('a', dropna=True).transform('ffill') + Out[3]: + b + 0 2 + 1 3 + 2 -9223372036854775808 + + In [3]: # Should have one additional row with the value np.nan + df.groupby('a', dropna=True).transform(lambda x: x) + Out[3]: + b + 0 2 + 1 3 + +*New behavior*: + +.. ipython:: python + + df.groupby('a', dropna=True).transform('sum') + df.groupby('a', dropna=True).transform(lambda x: x.sum()) + df.groupby('a', dropna=True).transform('ffill') + df.groupby('a', dropna=True).transform(lambda x: x) + +.. _whatsnew_150.notable_bug_fixes.to_json_incorrectly_localizing_naive_timestamps: + +Serializing tz-naive Timestamps with to_json() with ``iso_dates=True`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.to_json`, :meth:`Series.to_json`, and :meth:`Index.to_json` +would incorrectly localize DatetimeArrays/DatetimeIndexes with tz-naive Timestamps +to UTC. (:issue:`38760`) + +Note that this patch does not fix the localization of tz-aware Timestamps to UTC +upon serialization. (Related issue :issue:`12997`) + +*Old Behavior* + +.. ipython:: python + + index = pd.date_range( + start='2020-12-28 00:00:00', + end='2020-12-28 02:00:00', + freq='1H', + ) + a = pd.Series( + data=range(3), + index=index, + ) + +.. code-block:: ipython + + In [4]: a.to_json(date_format='iso') + Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}' + + In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index + Out[5]: array([False, False, False]) + +*New Behavior* + +.. ipython:: python + + a.to_json(date_format='iso') + # Roundtripping now works + pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index + +.. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical: + +DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Calling :meth:`.DataFrameGroupBy.value_counts` with ``observed=True`` would incorrectly drop non-observed categories of non-grouping columns (:issue:`46357`). + +.. code-block:: ipython + + In [6]: df = pd.DataFrame(["a", "b", "c"], dtype="category").iloc[0:2] + In [7]: df + Out[7]: + 0 + 0 a + 1 b + +*Old Behavior* + +.. code-block:: ipython + + In [8]: df.groupby(level=0, observed=True).value_counts() + Out[8]: + 0 a 1 + 1 b 1 + dtype: int64 + + +*New Behavior* + +.. code-block:: ipython + + In [9]: df.groupby(level=0, observed=True).value_counts() + Out[9]: + 0 a 1 + 1 a 0 + b 1 + 0 b 0 + c 0 + 1 c 0 + dtype: int64 + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_150.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Some minimum supported versions of dependencies were updated. +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.20.3 | X | X | ++-----------------+-----------------+----------+---------+ +| mypy (dev) | 0.971 | | X | ++-----------------+-----------------+----------+---------+ +| beautifulsoup4 | 4.9.3 | | X | ++-----------------+-----------------+----------+---------+ +| blosc | 1.21.0 | | X | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.3.2 | | X | ++-----------------+-----------------+----------+---------+ +| fsspec | 2021.07.0 | | X | ++-----------------+-----------------+----------+---------+ +| hypothesis | 6.13.0 | | X | ++-----------------+-----------------+----------+---------+ +| gcsfs | 2021.07.0 | | X | ++-----------------+-----------------+----------+---------+ +| jinja2 | 3.0.0 | | X | ++-----------------+-----------------+----------+---------+ +| lxml | 4.6.3 | | X | ++-----------------+-----------------+----------+---------+ +| numba | 0.53.1 | | X | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.7.3 | | X | ++-----------------+-----------------+----------+---------+ +| openpyxl | 3.0.7 | | X | ++-----------------+-----------------+----------+---------+ +| pandas-gbq | 0.15.0 | | X | ++-----------------+-----------------+----------+---------+ +| psycopg2 | 2.8.6 | | X | ++-----------------+-----------------+----------+---------+ +| pymysql | 1.0.2 | | X | ++-----------------+-----------------+----------+---------+ +| pyreadstat | 1.1.2 | | X | ++-----------------+-----------------+----------+---------+ +| pyxlsb | 1.0.8 | | X | ++-----------------+-----------------+----------+---------+ +| s3fs | 2021.08.0 | | X | ++-----------------+-----------------+----------+---------+ +| scipy | 1.7.1 | | X | ++-----------------+-----------------+----------+---------+ +| sqlalchemy | 1.4.16 | | X | ++-----------------+-----------------+----------+---------+ +| tabulate | 0.8.9 | | X | ++-----------------+-----------------+----------+---------+ +| xarray | 0.19.0 | | X | ++-----------------+-----------------+----------+---------+ +| xlsxwriter | 1.4.3 | | X | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 |4.9.3 | X | ++-----------------+-----------------+---------+ +| blosc |1.21.0 | X | ++-----------------+-----------------+---------+ +| bottleneck |1.3.2 | X | ++-----------------+-----------------+---------+ +| brotlipy |0.7.0 | | ++-----------------+-----------------+---------+ +| fastparquet |0.4.0 | | ++-----------------+-----------------+---------+ +| fsspec |2021.08.0 | X | ++-----------------+-----------------+---------+ +| html5lib |1.1 | | ++-----------------+-----------------+---------+ +| hypothesis |6.13.0 | X | ++-----------------+-----------------+---------+ +| gcsfs |2021.08.0 | X | ++-----------------+-----------------+---------+ +| jinja2 |3.0.0 | X | ++-----------------+-----------------+---------+ +| lxml |4.6.3 | X | ++-----------------+-----------------+---------+ +| matplotlib |3.3.2 | | ++-----------------+-----------------+---------+ +| numba |0.53.1 | X | ++-----------------+-----------------+---------+ +| numexpr |2.7.3 | X | ++-----------------+-----------------+---------+ +| odfpy |1.4.1 | | ++-----------------+-----------------+---------+ +| openpyxl |3.0.7 | X | ++-----------------+-----------------+---------+ +| pandas-gbq |0.15.0 | X | ++-----------------+-----------------+---------+ +| psycopg2 |2.8.6 | X | ++-----------------+-----------------+---------+ +| pyarrow |1.0.1 | | ++-----------------+-----------------+---------+ +| pymysql |1.0.2 | X | ++-----------------+-----------------+---------+ +| pyreadstat |1.1.2 | X | ++-----------------+-----------------+---------+ +| pytables |3.6.1 | | ++-----------------+-----------------+---------+ +| python-snappy |0.6.0 | | ++-----------------+-----------------+---------+ +| pyxlsb |1.0.8 | X | ++-----------------+-----------------+---------+ +| s3fs |2021.08.0 | X | ++-----------------+-----------------+---------+ +| scipy |1.7.1 | X | ++-----------------+-----------------+---------+ +| sqlalchemy |1.4.16 | X | ++-----------------+-----------------+---------+ +| tabulate |0.8.9 | X | ++-----------------+-----------------+---------+ +| tzdata |2022a | | ++-----------------+-----------------+---------+ +| xarray |0.19.0 | X | ++-----------------+-----------------+---------+ +| xlrd |2.0.1 | | ++-----------------+-----------------+---------+ +| xlsxwriter |1.4.3 | X | ++-----------------+-----------------+---------+ +| xlwt |1.3.0 | | ++-----------------+-----------------+---------+ +| zstandard |0.15.2 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_150.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- BigQuery I/O methods :func:`read_gbq` and :meth:`DataFrame.to_gbq` default to + ``auth_local_webserver = True``. Google has deprecated the + ``auth_local_webserver = False`` `"out of band" (copy-paste) flow + `_. + The ``auth_local_webserver = False`` option is planned to stop working in + October 2022. (:issue:`46312`) +- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`) +- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`) +- When :func:`read_sas` previously returned ``None``, it now returns an empty :class:`DataFrame` (:issue:`47410`) +- :class:`DataFrame` constructor raises if ``index`` or ``columns`` arguments are sets (:issue:`47215`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. warning:: + + In the next major version release, 2.0, several larger API changes are being considered without a formal deprecation such as + making the standard library `zoneinfo `_ the default timezone implementation instead of ``pytz``, + having the :class:`Index` support all data types instead of having multiple subclasses (:class:`CategoricalIndex`, :class:`Int64Index`, etc.), and more. + The changes under consideration are logged in `this Github issue `_, and any + feedback or concerns are welcome. + +.. _whatsnew_150.deprecations.int_slicing_series: + +Label-based integer slicing on a Series with an Int64Index or RangeIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`). + +For example: + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11]) + +In the old behavior, ``ser[2:4]`` treats the slice as positional: + +*Old behavior*: + +.. code-block:: ipython + + In [3]: ser[2:4] + Out[3]: + 5 3 + 7 4 + dtype: int64 + +In a future version, this will be treated as label-based: + +*Future behavior*: + +.. code-block:: ipython + + In [4]: ser.loc[2:4] + Out[4]: + 2 1 + 3 2 + dtype: int64 + +To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior, +use ``series.loc[i:j]``. + +Slicing on a :class:`DataFrame` will not be affected. + +.. _whatsnew_150.deprecations.excel_writer_attributes: + +:class:`ExcelWriter` attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All attributes of :class:`ExcelWriter` were previously documented as not +public. However some third party Excel engines documented accessing +``ExcelWriter.book`` or ``ExcelWriter.sheets``, and users were utilizing these +and possibly other attributes. Previously these attributes were not safe to use; +e.g. modifications to ``ExcelWriter.book`` would not update ``ExcelWriter.sheets`` +and conversely. In order to support this, pandas has made some attributes public +and improved their implementations so that they may now be safely used. (:issue:`45572`) + +The following attributes are now public and considered safe to access. + + - ``book`` + - ``check_extension`` + - ``close`` + - ``date_format`` + - ``datetime_format`` + - ``engine`` + - ``if_sheet_exists`` + - ``sheets`` + - ``supported_extensions`` + +The following attributes have been deprecated. They now raise a ``FutureWarning`` +when accessed and will be removed in a future version. Users should be aware +that their usage is considered unsafe, and can lead to unexpected results. + + - ``cur_sheet`` + - ``handles`` + - ``path`` + - ``save`` + - ``write_cells`` + +See the documentation of :class:`ExcelWriter` for further details. + +.. _whatsnew_150.deprecations.group_keys_in_apply: + +Using ``group_keys`` with transformers in :meth:`.GroupBy.apply` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, if it was inferred that the function passed to +:meth:`.GroupBy.apply` was a transformer (i.e. the resulting index was equal to +the input index), the ``group_keys`` argument of :meth:`DataFrame.groupby` and +:meth:`Series.groupby` was ignored and the group keys would never be added to +the index of the result. In the future, the group keys will be added to the index +when the user specifies ``group_keys=True``. + +As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and +:meth:`Series.groupby`, not specifying ``group_keys`` with a transformer will +raise a ``FutureWarning``. This can be silenced and the previous behavior +retained by specifying ``group_keys=False``. + +.. _whatsnew_150.deprecations.setitem_column_try_inplace: + _ see also _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace + +Inplace operation when setting values with ``loc`` and ``iloc`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Most of the time setting values with :meth:`DataFrame.iloc` attempts to set values +inplace, only falling back to inserting a new array if necessary. There are +some cases where this rule is not followed, for example when setting an entire +column from an array with different dtype: + +.. ipython:: python + + df = pd.DataFrame({'price': [11.1, 12.2]}, index=['book1', 'book2']) + original_prices = df['price'] + new_prices = np.array([98, 99]) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = new_prices + In [4]: df.iloc[:, 0] + Out[4]: + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: price, float: 64 + +This behavior is deprecated. In a future version, setting an entire column with +iloc will attempt to operate inplace. + +*Future behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = new_prices + In [4]: df.iloc[:, 0] + Out[4]: + book1 98.0 + book2 99.0 + Name: price, dtype: float64 + In [5]: original_prices + Out[5]: + book1 98.0 + book2 99.0 + Name: price, dtype: float64 + +To get the old behavior, use :meth:`DataFrame.__setitem__` directly: + +.. code-block:: ipython + + In [3]: df[df.columns[0]] = new_prices + In [4]: df.iloc[:, 0] + Out[4] + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: price, dtype: float64 + +To get the old behaviour when ``df.columns`` is not unique and you want to +change a single column by index, you can use :meth:`DataFrame.isetitem`, which +has been added in pandas 1.5: + +.. code-block:: ipython + + In [3]: df_with_duplicated_cols = pd.concat([df, df], axis='columns') + In [3]: df_with_duplicated_cols.isetitem(0, new_prices) + In [4]: df_with_duplicated_cols.iloc[:, 0] + Out[4]: + book1 98 + book2 99 + Name: price, dtype: int64 + In [5]: original_prices + Out[5]: + book1 11.1 + book2 12.2 + Name: 0, dtype: float64 + +.. _whatsnew_150.deprecations.numeric_only_default: + +``numeric_only`` default value +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as +``min``, ``sum``, and ``idxmax``, the default +value of the ``numeric_only`` argument, if it exists at all, was inconsistent. +Furthermore, operations with the default value ``None`` can lead to surprising +results. (:issue:`46560`) + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]}) + + In [2]: # Reading the next line without knowing the contents of df, one would + # expect the result to contain the products for both columns a and b. + df[["a", "b"]].prod() + Out[2]: + a 2 + dtype: int64 + +To avoid this behavior, the specifying the value ``numeric_only=None`` has been +deprecated, and will be removed in a future version of pandas. In the future, +all operations with a ``numeric_only`` argument will default to ``False``. Users +should either call the operation only with columns that can be operated on, or +specify ``numeric_only=True`` to operate only on Boolean, integer, and float columns. + +In order to support the transition to the new behavior, the following methods have +gained the ``numeric_only`` argument. + +- :meth:`DataFrame.corr` +- :meth:`DataFrame.corrwith` +- :meth:`DataFrame.cov` +- :meth:`DataFrame.idxmin` +- :meth:`DataFrame.idxmax` +- :meth:`.DataFrameGroupBy.cummin` +- :meth:`.DataFrameGroupBy.cummax` +- :meth:`.DataFrameGroupBy.idxmin` +- :meth:`.DataFrameGroupBy.idxmax` +- :meth:`.GroupBy.var` +- :meth:`.GroupBy.std` +- :meth:`.GroupBy.sem` +- :meth:`.DataFrameGroupBy.quantile` +- :meth:`.Resampler.mean` +- :meth:`.Resampler.median` +- :meth:`.Resampler.sem` +- :meth:`.Resampler.std` +- :meth:`.Resampler.var` +- :meth:`DataFrame.rolling` operations +- :meth:`DataFrame.expanding` operations +- :meth:`DataFrame.ewm` operations + +.. _whatsnew_150.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ +- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`) +- Deprecated behavior of :meth:`SparseArray.astype`, :meth:`Series.astype`, and :meth:`DataFrame.astype` with :class:`SparseDtype` when passing a non-sparse ``dtype``. In a future version, this will cast to that non-sparse dtype instead of wrapping it in a :class:`SparseDtype` (:issue:`34457`) +- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed time zones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`) +- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) +- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) +- Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`) +- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) +- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`) +- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`) +- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) +- Deprecated allowing non-keyword arguments in :meth:`.ExtensionArray.argsort` (:issue:`46134`) +- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) +- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). +- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`) +- Deprecated :attr:`Timedelta.delta` (:issue:`46476`) +- Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`) +- Deprecated passing positional arguments to :meth:`DataFrame.pivot` and :func:`pivot` except ``data`` (:issue:`30228`) +- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) +- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`) +- Deprecated positional arguments to :meth:`StringMethods.rsplit` and :meth:`StringMethods.split` except for ``pat``, use keyword-only arguments instead of positional arguments (:issue:`47423`) +- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`) +- Deprecated allowing ``unit="M"`` or ``unit="Y"`` in :class:`Timestamp` constructor with a non-round float value (:issue:`47267`) +- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`) +- Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`) +- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`) +- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`) +- Emit warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument even for dates where leading zero is omitted (e.g. ``31/1/2001``) (:issue:`47880`) +- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) +- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) +- Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) +- Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) +- Deprecated the ``inplace`` keyword in :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, and :meth:`Categorical.as_unordered` (:issue:`37643`) +- Deprecated setting a categorical's categories with ``cat.categories = ['a', 'b', 'c']``, use :meth:`Categorical.rename_categories` instead (:issue:`37643`) +- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`) +- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`) +- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) +- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`) +- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`). +- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`). +- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`) +- Deprecated allowing ``dtype='datetime64'`` or ``dtype=np.datetime64`` in :meth:`Series.astype`, use "datetime64[ns]" instead (:issue:`47844`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`DataFrame.corrwith` for column-wise (axis=0) Pearson and Spearman correlation when other is a :class:`Series` (:issue:`46174`) +- Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`) +- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`) +- Performance improvement in :meth:`.GroupBy.diff` (:issue:`16706`) +- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`) +- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`) +- Performance improvement in :meth:`.GroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`) +- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`) +- Performance improvement in :meth:`.GroupBy.var` with ``ddof`` other than one (:issue:`48152`) +- Performance improvement in :meth:`DataFrame.to_records` when the index is a :class:`MultiIndex` (:issue:`47263`) +- Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`) +- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`) +- Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`) +- Performance improvement in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when target is a :class:`MultiIndex` (:issue:`46235`) +- Performance improvement when setting values in a pyarrow backed string array (:issue:`46400`) +- Performance improvement in :func:`factorize` (:issue:`46109`) +- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`) +- Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`) +- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`) +- Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`) +- Performance improvement in :class:`BusinessHour` ``str`` and ``repr`` (:issue:`44764`) +- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) +- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) +- Performance improvement to :func:`read_sas` (:issue:`47404`) +- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`) +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ +- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`) +- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) +- Bug in :meth:`concat` when concatenating two (or more) unordered :class:`CategoricalIndex` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`) + +Datetimelike +^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.quantile` with datetime-like dtypes and no rows incorrectly returning ``float64`` dtype instead of retaining datetime-like dtype (:issue:`41544`) +- Bug in :func:`to_datetime` with sequences of ``np.str_`` objects incorrectly raising (:issue:`32264`) +- Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`) +- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) +- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) +- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) +- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) +- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) +- Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) +- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) +- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) +- + +Timedelta +^^^^^^^^^ +- Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`) +- Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`) +- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`) +- + +Time Zones +^^^^^^^^^^ +- Bug in :class:`Timestamp` constructor raising when passed a ``ZoneInfo`` tzinfo object (:issue:`46425`) +- + +Numeric +^^^^^^^ +- Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) +- Bug in arithmetic operations with nullable types without :attr:`NA` values not matching the same operation with non-nullable types (:issue:`48223`) +- Bug in ``floordiv`` when dividing by ``IntegerDtype`` ``0`` would return ``0`` instead of ``inf`` (:issue:`48223`) +- Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`) +- Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`) +- Bug in :meth:`mean` where the optional dependency ``bottleneck`` causes precision loss linear in the length of the array. ``bottleneck`` has been disabled for :meth:`mean` improving the loss to log-linear but may result in a performance decrease. (:issue:`42878`) + +Conversion +^^^^^^^^^^ +- Bug in :meth:`DataFrame.astype` not preserving subclasses (:issue:`40810`) +- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) +- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) +- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) +- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) +- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`) +- Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`) +- Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`) +- Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`) +- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`) +- Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`) +- + +Strings +^^^^^^^ +- Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) +- Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) +- + +Interval +^^^^^^^^ +- Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`) +- Bug in :class:`IntervalDtype` when using datetime64[ns, tz] as a dtype string (:issue:`46999`) + +Indexing +^^^^^^^^ +- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) +- Bug in :meth:`DataFrame.__getitem__` returning copy when :class:`DataFrame` has duplicated columns even if a unique column is selected (:issue:`45316`, :issue:`41062`) +- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`) +- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) +- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`) +- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`) +- Bug in :meth:`DataFrame.loc` not casting ``None`` to ``NA`` when setting value as a list into :class:`DataFrame` (:issue:`47987`) +- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`) +- Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`) +- Bug in :func:`isin` upcasting to ``float64`` with unsigned integer dtype and list-like argument without a dtype (:issue:`46485`) +- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) +- Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`) +- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) +- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) +- Bug in :meth:`DataFrame.__setitem__` casting extension array dtypes to object when setting with a scalar key and :class:`DataFrame` as value (:issue:`46896`) +- Bug in :meth:`Series.__setitem__` when setting a scalar to a nullable pandas dtype would not raise a ``TypeError`` if the scalar could not be cast (losslessly) to the nullable type (:issue:`45404`) +- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) +- Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`) +- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`) +- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`) +- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`) +- Bug in :meth:`Series.__setitem__` losing precision when enlarging :class:`Series` with scalar (:issue:`32346`) +- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) +- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) +- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) +- Bug in :meth:`DataFrame.__getattribute__` raising ``AttributeError`` if columns have ``"string"`` dtype (:issue:`46185`) +- Bug in :meth:`DataFrame.compare` returning all ``NaN`` column when comparing extension array dtype and numpy dtype (:issue:`44014`) +- Bug in :meth:`DataFrame.where` setting wrong values with ``"boolean"`` mask for numpy dtype (:issue:`44014`) +- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) +- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) +- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) +- Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) +- Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) +- Bug in :meth:`DataFrame.sum` min_count changes dtype if input contains NaNs (:issue:`46947`) +- Bug in :class:`IntervalTree` that lead to an infinite recursion. (:issue:`46658`) +- Bug in :class:`PeriodIndex` raising ``AttributeError`` when indexing on ``NA``, rather than putting ``NaT`` in its place. (:issue:`46673`) +- Bug in :meth:`DataFrame.at` would allow the modification of multiple columns (:issue:`48296`) + +Missing +^^^^^^^ +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`) +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with :class:`IntervalDtype` and incompatible value raising instead of casting to a common (usually object) dtype (:issue:`45796`) +- Bug in :meth:`Series.map` not respecting ``na_action`` argument if mapper is a ``dict`` or :class:`Series` (:issue:`47527`) +- Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`) +- Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`) +- Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`) + +MultiIndex +^^^^^^^^^^ +- Bug in :meth:`DataFrame.loc` returning empty result when slicing a :class:`MultiIndex` with a negative step size and non-null start/stop values (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size other than -1 (:issue:`46156`) +- Bug in :meth:`DataFrame.loc` raising when slicing a :class:`MultiIndex` with a negative step size and slicing a non-int labeled index level (:issue:`46156`) +- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) +- Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`) +- Bug in :meth:`MultiIndex.from_tuples` cannot construct Index of empty tuples (:issue:`45608`) + +I/O +^^^ +- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`) +- Bug in :func:`read_excel` results in an infinite loop with certain ``skiprows`` callables (:issue:`45585`) +- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`) +- Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`) +- Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`) +- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`) +- Bug in :func:`read_csv` interpreting second row as :class:`Index` names even when ``index_col=False`` (:issue:`46569`) +- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) +- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`) +- Bug in :func:`read_csv` ignoring non-existing header row for ``engine="python"`` (:issue:`47400`) +- Bug in :func:`read_excel` raising uncontrolled ``IndexError`` when ``header`` references non-existing rows (:issue:`43143`) +- Bug in :func:`read_html` where elements surrounding ``
    `` were joined without a space between them (:issue:`29528`) +- Bug in :func:`read_csv` when data is longer than header leading to issues with callables in ``usecols`` expecting strings (:issue:`46997`) +- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) +- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) +- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) +- :meth:`to_html` now excludes the ``border`` attribute from ```` elements when ``border`` keyword is set to ``False``. +- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`) +- Bug in :func:`read_excel` not forward filling :class:`MultiIndex` when no names were given (:issue:`47487`) +- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`) +- Bug in :meth:`DataFrame.to_string` using wrong missing value with extension arrays in :class:`MultiIndex` (:issue:`47986`) +- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`) +- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`) +- Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`) +- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x40 control bytes (:issue:`31243`) +- Bug in :func:`read_sas` that scrambled column names (:issue:`31243`) +- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`) +- Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`) +- Bug in :meth:`DataFrame.to_json` where ``PeriodDtype`` would not make the serialization roundtrip when read back with :meth:`read_json` (:issue:`44720`) +- Bug in :func:`read_xml` when reading XML files with Chinese character tags and would raise ``XMLSyntaxError`` (:issue:`47902`) + +Period +^^^^^^ +- Bug in subtraction of :class:`Period` from :class:`.PeriodArray` returning wrong results (:issue:`45999`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) +- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`) +- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`) +- Bug in adding ``np.timedelta64("NaT", "ns")`` to a :class:`Period` with a timedelta-like freq incorrectly raising ``IncompatibleFrequency`` instead of returning ``NaT`` (:issue:`47196`) +- Bug in adding an array of integers to an array with :class:`PeriodDtype` giving incorrect results when ``dtype.freq.n > 1`` (:issue:`47209`) +- Bug in subtracting a :class:`Period` from an array with :class:`PeriodDtype` returning incorrect results instead of raising ``OverflowError`` when the operation overflows (:issue:`47538`) +- + +Plotting +^^^^^^^^ +- Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`) +- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`) +- Bug in :meth:`DataFrame.plot.scatter` that prevented specifying ``norm`` (:issue:`45809`) +- Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`) +- Bug in :meth:`DataFrame.plot` that led to xticks and vertical grids being improperly placed when plotting a quarterly series (:issue:`47602`) +- Bug in :meth:`DataFrame.plot` that prevented setting y-axis label, limits and ticks for a secondary y-axis (:issue:`47753`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`) +- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``func="size"`` and the input DataFrame has multiple columns (:issue:`27469`) +- Bug in :meth:`.DataFrameGroupBy.size` and :meth:`.DataFrameGroupBy.transform` with ``func="size"`` produced incorrect results when ``axis=1`` (:issue:`45715`) +- Bug in :meth:`.ExponentialMovingWindow.mean` with ``axis=1`` and ``engine='numba'`` when the :class:`DataFrame` has more columns than rows (:issue:`46086`) +- Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`) +- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`) +- Bug in :meth:`DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) +- Bug in :meth:`.GroupBy.sum`, :meth:`.GroupBy.prod` and :meth:`.GroupBy.cumsum` with integer dtypes losing precision (:issue:`37493`) +- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) +- Bug in :meth:`.GroupBy.cumsum` with integer dtypes causing overflows when sum was bigger than maximum of dtype (:issue:`37493`) +- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) +- Bug in :meth:`.GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) +- Bug in :meth:`.GroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`) +- Bug in :meth:`.GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) +- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) +- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`) +- Bug in :meth:`.Rolling.sum` and :meth:`.Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`) +- Bug in :meth:`.Rolling.var` and :meth:`.Rolling.std` would give non-zero result with window of same values (:issue:`42064`) +- Bug in :meth:`.Rolling.skew` and :meth:`.Rolling.kurt` would give NaN with window of same values (:issue:`30993`) +- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`) +- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) +- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) +- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`) +- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would not respect ``dropna=False`` when the input DataFrame/Series had a NaN values in a :class:`MultiIndex` (:issue:`46783`) +- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list which misses the resample key (:issue:`47362`) +- Bug in :meth:`DataFrame.groupby` would lose index columns when the DataFrame is empty for transforms, like fillna (:issue:`47787`) +- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` with ``dropna=False`` and ``sort=False`` would put any null groups at the end instead the order that they are encountered (:issue:`46584`) +- + +Reshaping +^^^^^^^^^ +- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) +- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) +- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) +- Bug in concatenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) +- Bug in :func:`concat` losing dtype of columns when ``join="outer"`` and ``sort=True`` (:issue:`47329`) +- Bug in :func:`concat` not sorting the column names when ``None`` is included (:issue:`47331`) +- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) +- Bug in :func:`pivot_table` raising ``TypeError`` when ``dropna=True`` and aggregation column has extension array dtype (:issue:`47477`) +- Bug in :func:`merge` raising error for ``how="cross"`` when using ``FIPS`` mode in ssl library (:issue:`48024`) +- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) +- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`) +- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`) +- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`) +- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`) + +Sparse +^^^^^^ +- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``SparseDtype`` failing to retain the array's ``fill_value`` (:issue:`45691`) +- Bug in :meth:`SparseArray.unique` fails to keep original elements order (:issue:`47809`) + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :meth:`IntegerArray.searchsorted` and :meth:`FloatingArray.searchsorted` returning inconsistent results when acting on ``np.nan`` (:issue:`45255`) + +Styler +^^^^^^ +- Bug when attempting to apply styling functions to an empty DataFrame subset (:issue:`45313`) +- Bug in :class:`CSSToExcelConverter` leading to ``TypeError`` when border color provided without border style for ``xlsxwriter`` engine (:issue:`42276`) +- Bug in :meth:`Styler.set_sticky` leading to white text on white background in dark mode (:issue:`46984`) +- Bug in :meth:`Styler.to_latex` causing ``UnboundLocalError`` when ``clines="all;data"`` and the ``DataFrame`` has no rows. (:issue:`47203`) +- Bug in :meth:`Styler.to_excel` when using ``vertical-align: middle;`` with ``xlsxwriter`` engine (:issue:`30107`) +- Bug when applying styles to a DataFrame with boolean column labels (:issue:`47838`) + +Metadata +^^^^^^^^ +- Fixed metadata propagation in :meth:`DataFrame.melt` (:issue:`28283`) +- Fixed metadata propagation in :meth:`DataFrame.explode` (:issue:`28283`) + +Other +^^^^^ + +.. ***DO NOT USE THIS SECTION*** + +- Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_150.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.4.4..v1.5.0 diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst new file mode 100644 index 00000000..bcd8ddb9 --- /dev/null +++ b/doc/source/whatsnew/v1.5.1.rst @@ -0,0 +1,122 @@ +.. _whatsnew_151: + +What's new in 1.5.1 (October 19, 2022) +-------------------------------------- + +These are the changes in pandas 1.5.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.groupby_categorical_regr: + +Behavior of ``groupby`` with categorical groupers (:issue:`48645`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In versions of pandas prior to 1.5, ``groupby`` with ``dropna=False`` would still drop +NA values when the grouper was a categorical dtype. A fix for this was attempted in +1.5, however it introduced a regression where passing ``observed=False`` and +``dropna=False`` to ``groupby`` would result in only observed categories. It was found +that the patch fixing the ``dropna=False`` bug is incompatible with ``observed=False``, +and decided that the best resolution is to restore the correct ``observed=False`` +behavior at the cost of reintroducing the ``dropna=False`` bug. + +.. ipython:: python + + df = pd.DataFrame( + { + "x": pd.Categorical([1, None], categories=[1, 2, 3]), + "y": [3, 4], + } + ) + df + +*1.5.0 behavior*: + +.. code-block:: ipython + + In [3]: # Correct behavior, NA values are not dropped + df.groupby("x", observed=True, dropna=False).sum() + Out[3]: + y + x + 1 3 + NaN 4 + + + In [4]: # Incorrect behavior, only observed categories present + df.groupby("x", observed=False, dropna=False).sum() + Out[4]: + y + x + 1 3 + NaN 4 + + +*1.5.1 behavior*: + +.. ipython:: python + + # Incorrect behavior, NA values are dropped + df.groupby("x", observed=True, dropna=False).sum() + + # Correct behavior, unobserved categories present (NA values still dropped) + df.groupby("x", observed=False, dropna=False).sum() + +.. _whatsnew_151.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed Regression in :meth:`Series.__setitem__` casting ``None`` to ``NaN`` for object dtype (:issue:`48665`) +- Fixed Regression in :meth:`DataFrame.loc` when setting values as a :class:`DataFrame` with all ``True`` indexer (:issue:`48701`) +- Regression in :func:`.read_csv` causing an ``EmptyDataError`` when using an UTF-8 file handle that was already read from (:issue:`48646`) +- Regression in :func:`to_datetime` when ``utc=True`` and ``arg`` contained timezone naive and aware arguments raised a ``ValueError`` (:issue:`48678`) +- Fixed regression in :meth:`DataFrame.loc` raising ``FutureWarning`` when setting an empty :class:`DataFrame` (:issue:`48480`) +- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`) +- Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`) +- Fixed regression in :meth:`MultiIndex.values` resetting ``freq`` attribute of underlying :class:`Index` object (:issue:`49054`) +- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) +- Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`) +- Fixed regression in :func:`to_datetime` when ``arg`` was a date string with nanosecond and ``format`` contained ``%f`` would raise a ``ValueError`` (:issue:`48767`) +- Fixed regression in :func:`testing.assert_frame_equal` raising for :class:`MultiIndex` with :class:`Categorical` and ``check_like=True`` (:issue:`48975`) +- Fixed regression in :meth:`DataFrame.fillna` replacing wrong values for ``datetime64[ns]`` dtype and ``inplace=True`` (:issue:`48863`) +- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`) +- Fixed Regression in :meth:`.DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) +- Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`) +- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`) +- Fixed performance regression in :meth:`Series.isin` with mismatching dtypes (:issue:`49162`) +- Fixed regression in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`) +- Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`) +- Fixed regression in :meth:`DataFrame.corrwith` when computing correlation on tied data with ``method="spearman"`` (:issue:`48826`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :meth:`Series.__getitem__` not falling back to positional for integer keys and boolean :class:`Index` (:issue:`48653`) +- Bug in :meth:`DataFrame.to_hdf` raising ``AssertionError`` with boolean index (:issue:`48667`) +- Bug in :func:`testing.assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`) +- Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`) +- Bug in :meth:`DataFrame.sort_values` emitting unnecessary ``FutureWarning`` when called on :class:`DataFrame` with boolean sparse columns (:issue:`48784`) +- Bug in :class:`.arrays.ArrowExtensionArray` with a comparison operator to an invalid object would not raise a ``NotImplementedError`` (:issue:`48833`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.other: + +Other +~~~~~ +- Avoid showing deprecated signatures when introspecting functions with warnings about arguments becoming keyword-only (:issue:`48692`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_151.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.0..v1.5.1 diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst new file mode 100644 index 00000000..6397016d --- /dev/null +++ b/doc/source/whatsnew/v1.5.2.rst @@ -0,0 +1,46 @@ +.. _whatsnew_152: + +What's new in 1.5.2 (November 21, 2022) +--------------------------------------- + +These are the changes in pandas 1.5.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`) +- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) +- Fixed regression in arithmetic operations for :class:`DataFrame` with :class:`MultiIndex` columns with different dtypes (:issue:`49769`) +- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance + from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) +- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`) +- Fixed performance regression in groupby operations (:issue:`49676`) +- Fixed regression in :class:`Timedelta` constructor returning object of wrong type when subclassing ``Timedelta`` (:issue:`49579`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`) +- Fixed memory leak in :meth:`.Styler.to_excel` (:issue:`49751`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.other: + +Other +~~~~~ +- Reverted ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_152.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.1..v1.5.2|HEAD diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst new file mode 100644 index 00000000..97c4c73f --- /dev/null +++ b/doc/source/whatsnew/v1.5.3.rst @@ -0,0 +1,59 @@ +.. _whatsnew_153: + +What's new in 1.5.3 (January 18, 2023) +-------------------------------------- + +These are the changes in pandas 1.5.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed performance regression in :meth:`Series.isin` when ``values`` is empty (:issue:`49839`) +- Fixed regression in :meth:`DataFrame.memory_usage` showing unnecessary ``FutureWarning`` when :class:`DataFrame` is empty (:issue:`50066`) +- Fixed regression in :meth:`.DataFrameGroupBy.transform` when used with ``as_index=False`` (:issue:`49834`) +- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) +- Fixed regression in :meth:`.SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`) +- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`) +- Fixed regression in the methods ``apply``, ``agg``, and ``transform`` when used with NumPy functions that informed users to supply ``numeric_only=True`` if the operation failed on non-numeric dtypes; such columns must be dropped prior to using these methods (:issue:`50538`) +- Fixed regression in :func:`to_datetime` raising ``ValueError`` when parsing array of ``float`` containing ``np.nan`` (:issue:`50237`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views when indexing a :class:`DataFrame` with another :class:`DataFrame` (:issue:`50630`) +- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) +- Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`) +- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) +- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) +- Bug in :func:`pivot_table` with NumPy 1.24 or greater when the :class:`DataFrame` columns has nested elements (:issue:`50342`) +- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.other: + +Other +~~~~~ + +.. note:: + + If you are using :meth:`DataFrame.to_sql`, :func:`read_sql`, :func:`read_sql_table`, or :func:`read_sql_query` with SQLAlchemy 1.4.46 or greater, + you may see a ``sqlalchemy.exc.RemovedIn20Warning``. These warnings can be safely ignored for the SQLAlchemy 1.4.x releases + as pandas works toward compatibility with SQLAlchemy 2.0. + +- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`) +- A ``FutureWarning`` raised when attempting to set values inplace with :meth:`DataFrame.loc` or :meth:`DataFrame.iloc` has been changed to a ``DeprecationWarning`` (:issue:`48673`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_153.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.5.2..v1.5.3|HEAD diff --git a/doc/source/whatsnew/whatsnew_0171_html_table.html b/doc/source/whatsnew/whatsnew_0171_html_table.html new file mode 100644 index 00000000..a76d6207 --- /dev/null +++ b/doc/source/whatsnew/whatsnew_0171_html_table.html @@ -0,0 +1,872 @@ + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + a + + b + + c + + d + + e + +
    + + 0 + + + + + -1.085631 + + + + + 0.997345 + + + + + 0.282978 + + + + + -1.506295 + + + + + -0.5786 + + +
    + + 1 + + + + + 1.651437 + + + + + -2.426679 + + + + + -0.428913 + + + + + 1.265936 + + + + + -0.86674 + + +
    + + 2 + + + + + -0.678886 + + + + + -0.094709 + + + + + 1.49139 + + + + + -0.638902 + + + + + -0.443982 + + +
    + + 3 + + + + + -0.434351 + + + + + 2.20593 + + + + + 2.186786 + + + + + 1.004054 + + + + + 0.386186 + + +
    + + 4 + + + + + 0.737369 + + + + + 1.490732 + + + + + -0.935834 + + + + + 1.175829 + + + + + -1.253881 + + +
    + + 5 + + + + + -0.637752 + + + + + 0.907105 + + + + + -1.428681 + + + + + -0.140069 + + + + + -0.861755 + + +
    + + 6 + + + + + -0.255619 + + + + + -2.798589 + + + + + -1.771533 + + + + + -0.699877 + + + + + 0.927462 + + +
    + + 7 + + + + + -0.173636 + + + + + 0.002846 + + + + + 0.688223 + + + + + -0.879536 + + + + + 0.283627 + + +
    + + 8 + + + + + -0.805367 + + + + + -1.727669 + + + + + -0.3909 + + + + + 0.573806 + + + + + 0.338589 + + +
    + + 9 + + + + + -0.01183 + + + + + 2.392365 + + + + + 0.412912 + + + + + 0.978736 + + + + + 2.238143 + + +
    diff --git a/doc/sphinxext/README.rst b/doc/sphinxext/README.rst new file mode 100644 index 00000000..ef52433e --- /dev/null +++ b/doc/sphinxext/README.rst @@ -0,0 +1,5 @@ +sphinxext +========= + +This directory contains custom sphinx extensions in use in the pandas +documentation. diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py new file mode 100755 index 00000000..b0b430ed --- /dev/null +++ b/doc/sphinxext/announce.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Script to generate contributor and pull request lists + +This script generates contributor and pull request lists for release +announcements using Github v3 protocol. Use requires an authentication token in +order to have sufficient bandwidth, you can get one following the directions at +`_ +Don't add any scope, as the default is read access to public information. The +token may be stored in an environment variable as you only get one chance to +see it. + +Usage:: + + $ ./scripts/announce.py + +The output is utf8 rst. + +Dependencies +------------ + +- gitpython +- pygithub + +Some code was copied from scipy `tools/gh_lists.py` and `tools/authors.py`. + +Examples +-------- + +From the bash command line with $GITHUB token. + + $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst + +""" +import codecs +import os +import re +import textwrap + +from git import Repo + +UTF8Writer = codecs.getwriter("utf8") +this_repo = Repo(os.path.join(os.path.dirname(__file__), "..", "..")) + +author_msg = """\ +A total of %d people contributed patches to this release. People with a +"+" by their names contributed a patch for the first time. +""" + +pull_request_msg = """\ +A total of %d pull requests were merged for this release. +""" + + +def get_authors(revision_range): + pat = "^.*\\t(.*)$" + lst_release, cur_release = (r.strip() for r in revision_range.split("..")) + + if "|" in cur_release: + # e.g. v1.0.1|HEAD + maybe_tag, head = cur_release.split("|") + assert head == "HEAD" + if maybe_tag in this_repo.tags: + cur_release = maybe_tag + else: + cur_release = head + revision_range = f"{lst_release}..{cur_release}" + + # authors, in current release and previous to current release. + # We need two passes over the log for cur and prev, one to get the + # "Co-authored by" commits, which come from backports by the bot, + # and one for regular commits. + xpr = re.compile(r"Co-authored-by: (?P[^<]+) ") + cur = set( + xpr.findall( + this_repo.git.log("--grep=Co-authored", "--pretty=%b", revision_range) + ) + ) + cur |= set(re.findall(pat, this_repo.git.shortlog("-s", revision_range), re.M)) + + pre = set( + xpr.findall(this_repo.git.log("--grep=Co-authored", "--pretty=%b", lst_release)) + ) + pre |= set(re.findall(pat, this_repo.git.shortlog("-s", lst_release), re.M)) + + # Homu is the author of auto merges, clean him out. + cur.discard("Homu") + pre.discard("Homu") + + # Append '+' to new authors. + authors = [s + " +" for s in cur - pre] + [s for s in cur & pre] + authors.sort() + return authors + + +def get_pull_requests(repo, revision_range): + prnums = [] + + # From regular merges + merges = this_repo.git.log("--oneline", "--merges", revision_range) + issues = re.findall("Merge pull request \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From Homu merges (Auto merges) + issues = re.findall("Auto merge of \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From fast forward squash-merges + commits = this_repo.git.log( + "--oneline", "--no-merges", "--first-parent", revision_range + ) + issues = re.findall("^.*\\(\\#(\\d+)\\)$", commits, re.M) + prnums.extend(int(s) for s in issues) + + # get PR data from github repo + prnums.sort() + prs = [repo.get_pull(n) for n in prnums] + return prs + + +def build_components(revision_range, heading="Contributors"): + lst_release, cur_release = (r.strip() for r in revision_range.split("..")) + authors = get_authors(revision_range) + + return { + "heading": heading, + "author_message": author_msg % len(authors), + "authors": authors, + } + + +def build_string(revision_range, heading="Contributors"): + components = build_components(revision_range, heading=heading) + components["uline"] = "=" * len(components["heading"]) + components["authors"] = "* " + "\n* ".join(components["authors"]) + + # Don't change this to an fstring. It breaks the formatting. + tpl = textwrap.dedent( + """\ + {heading} + {uline} + + {author_message} + {authors}""" + ).format(**components) + return tpl + + +def main(revision_range): + # document authors + text = build_string(revision_range) + print(text) + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser(description="Generate author lists for release") + parser.add_argument("revision_range", help="..") + args = parser.parse_args() + main(args.revision_range) diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py new file mode 100644 index 00000000..c2b21e40 --- /dev/null +++ b/doc/sphinxext/contributors.py @@ -0,0 +1,57 @@ +"""Sphinx extension for listing code contributors to a release. + +Usage:: + + .. contributors:: v0.23.0..v0.23.1 + +This will be replaced with a message indicating the number of +code contributors and commits, and then list each contributor +individually. For development versions (before a tag is available) +use:: + + .. contributors:: v0.23.0..v0.23.1|HEAD + +While the v0.23.1 tag does not exist, that will use the HEAD of the +branch as the end of the revision range. +""" +from announce import build_components +from docutils import nodes +from docutils.parsers.rst import Directive +import git + + +class ContributorsDirective(Directive): + required_arguments = 1 + name = "contributors" + + def run(self): + range_ = self.arguments[0] + if range_.endswith("x..HEAD"): + return [nodes.paragraph(), nodes.bullet_list()] + try: + components = build_components(range_) + except git.GitCommandError as exc: + return [ + self.state.document.reporter.warning( + f"Cannot find contributors for range {repr(range_)}: {exc}", + line=self.lineno, + ) + ] + else: + message = nodes.paragraph() + message += nodes.Text(components["author_message"]) + + listnode = nodes.bullet_list() + + for author in components["authors"]: + para = nodes.paragraph() + para += nodes.Text(author) + listnode += nodes.list_item("", para) + + return [message, listnode] + + +def setup(app): + app.add_directive("contributors", ContributorsDirective) + + return {"version": "0.1", "parallel_read_safe": True, "parallel_write_safe": True} diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..20f839db --- /dev/null +++ b/environment.yml @@ -0,0 +1,132 @@ +# Local development dependencies including docs building, website upload, ASV benchmark +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # test dependencies + - cython=0.29.32 + - pytest>=6.0 + - pytest-cov + - pytest-xdist>=1.31 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - brotlipy + - bottleneck + - fastparquet + - fsspec + - html5lib + - hypothesis + - gcsfs + - jinja2 + - lxml + - matplotlib>=3.6.1 + - numba>=0.53.1 + - numexpr>=2.8.0 # pin for "Run checks on imported code" job + - openpyxl + - odfpy + - pandas-gbq + - psycopg2 + - pyarrow<10 + - pymysql + - pyreadstat + - pytables + - python-snappy + - pyxlsb + - s3fs>=2021.08.0 + - scipy + - sqlalchemy<1.4.46 + - tabulate + - tzdata>=2022a + - xarray + - xlrd + - xlsxwriter + - xlwt + - zstandard + + # downstream packages + - aiobotocore<2.0.0 # GH#44311 pinned to fix docbuild + - botocore + - cftime + - dask + - ipython + - geopandas-base + - seaborn + - scikit-learn + - statsmodels + - coverage + - pandas-datareader + - pyyaml + - py + - pytorch + + # local testing dependencies + - moto + - flask + + # benchmarks + - asv + + # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms. + - c-compiler + - cxx-compiler + + # code checks + - black=22.3.0 + - cpplint + - flake8=5.0.4 + - flake8-bugbear=22.7.1 # used by flake8, find likely bugs + - isort>=5.2.1 # check that imports are in the right order + - mypy=0.971 + - pre-commit>=2.15.0 + - pycodestyle # used by flake8 + - pyupgrade + + # documentation + - gitpython # obtain contributors from git for whatsnew + - gitdb + - natsort # DataFrame.sort_values doctest + - numpydoc + - pandas-dev-flaker=0.5.0 + - pydata-sphinx-theme<0.11 + - pytest-cython # doctest + - sphinx + - sphinx-panels + - sphinx-copybutton + - types-python-dateutil + - types-PyMySQL + - types-pytz + - types-setuptools + + # documentation (jupyter notebooks) + - nbconvert>=6.4.5 + - nbsphinx + - pandoc + - ipywidgets + - nbformat + - notebook>=6.0.3 + - ipykernel + + # web + - jinja2 # in optional dependencies, but documented here as needed + - markdown + - feedparser + - pyyaml + - requests + + # build the interactive terminal + - jupyterlab >=3.4,<4 + - pip: + - jupyterlite==0.1.0b10 + - sphinx-toggleprompt diff --git a/pandas/__init__.py b/pandas/__init__.py new file mode 100644 index 00000000..5016bde0 --- /dev/null +++ b/pandas/__init__.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +__docformat__ = "restructuredtext" + +# Let users know if they're missing any of our hard dependencies +_hard_dependencies = ("numpy", "pytz", "dateutil") +_missing_dependencies = [] + +for _dependency in _hard_dependencies: + try: + __import__(_dependency) + except ImportError as _e: + _missing_dependencies.append(f"{_dependency}: {_e}") + +if _missing_dependencies: + raise ImportError( + "Unable to import required dependencies:\n" + "\n".join(_missing_dependencies) + ) +del _hard_dependencies, _dependency, _missing_dependencies + +# numpy compat +from pandas.compat import is_numpy_dev as _is_numpy_dev # pyright: ignore # noqa:F401 + +try: + from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib +except ImportError as _err: # pragma: no cover + _module = _err.name + raise ImportError( + f"C extension: {_module} not built. If you want to import " + "pandas from the source directory, you may need to run " + "'python setup.py build_ext --force' to build the C extensions first." + ) from _err +else: + del _tslib, _lib, _hashtable + +from pandas._config import ( + get_option, + set_option, + reset_option, + describe_option, + option_context, + options, +) + +# let init-time option registration happen +import pandas.core.config_init # pyright: ignore # noqa:F401 + +from pandas.core.api import ( + # dtype + ArrowDtype, + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + Float32Dtype, + Float64Dtype, + CategoricalDtype, + PeriodDtype, + IntervalDtype, + DatetimeTZDtype, + StringDtype, + BooleanDtype, + # missing + NA, + isna, + isnull, + notna, + notnull, + # indexes + Index, + CategoricalIndex, + RangeIndex, + MultiIndex, + IntervalIndex, + TimedeltaIndex, + DatetimeIndex, + PeriodIndex, + IndexSlice, + # tseries + NaT, + Period, + period_range, + Timedelta, + timedelta_range, + Timestamp, + date_range, + bdate_range, + Interval, + interval_range, + DateOffset, + # conversion + to_numeric, + to_datetime, + to_timedelta, + # misc + Flags, + Grouper, + factorize, + unique, + value_counts, + NamedAgg, + array, + Categorical, + set_eng_float_format, + Series, + DataFrame, +) + +from pandas.core.arrays.sparse import SparseDtype + +from pandas.tseries.api import infer_freq +from pandas.tseries import offsets + +from pandas.core.computation.api import eval + +from pandas.core.reshape.api import ( + concat, + lreshape, + melt, + wide_to_long, + merge, + merge_asof, + merge_ordered, + crosstab, + pivot, + pivot_table, + get_dummies, + from_dummies, + cut, + qcut, +) + +from pandas import api, arrays, errors, io, plotting, tseries +from pandas import testing # noqa:PDF015 +from pandas.util._print_versions import show_versions + +from pandas.io.api import ( + # excel + ExcelFile, + ExcelWriter, + read_excel, + # parsers + read_csv, + read_fwf, + read_table, + # pickle + read_pickle, + to_pickle, + # pytables + HDFStore, + read_hdf, + # sql + read_sql, + read_sql_query, + read_sql_table, + # misc + read_clipboard, + read_parquet, + read_orc, + read_feather, + read_gbq, + read_html, + read_xml, + read_json, + read_stata, + read_sas, + read_spss, +) + +from pandas.io.json import _json_normalize as json_normalize + +from pandas.util._tester import test + +# use the closest tagged version if possible +from pandas._version import get_versions + +v = get_versions() +__version__ = v.get("closest-tag", v["version"]) +__git_version__ = v.get("full-revisionid") +del get_versions, v + +# GH 27101 +__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"] + + +def __dir__() -> list[str]: + # GH43028 + # Int64Index etc. are deprecated, but we still want them to be available in the dir. + # Remove in Pandas 2.0, when we remove Int64Index etc. from the code base. + return list(globals().keys()) + __deprecated_num_index_names + + +def __getattr__(name): + import warnings + + if name in __deprecated_num_index_names: + warnings.warn( + f"pandas.{name} is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.Index with the appropriate dtype instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.api import Float64Index, Int64Index, UInt64Index + + return { + "Float64Index": Float64Index, + "Int64Index": Int64Index, + "UInt64Index": UInt64Index, + }[name] + elif name == "datetime": + warnings.warn( + "The pandas.datetime class is deprecated " + "and will be removed from pandas in a future version. " + "Import from datetime module instead.", + FutureWarning, + stacklevel=2, + ) + + from datetime import datetime as dt + + return dt + + elif name == "np": + + warnings.warn( + "The pandas.np module is deprecated " + "and will be removed from pandas in a future version. " + "Import numpy directly instead.", + FutureWarning, + stacklevel=2, + ) + import numpy as np + + return np + + elif name in {"SparseSeries", "SparseDataFrame"}: + warnings.warn( + f"The {name} class is removed from pandas. Accessing it from " + "the top-level namespace will also be removed in the next version.", + FutureWarning, + stacklevel=2, + ) + + return type(name, (), {}) + + elif name == "SparseArray": + + warnings.warn( + "The pandas.SparseArray class is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.arrays.SparseArray instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.arrays.sparse import SparseArray as _SparseArray + + return _SparseArray + + raise AttributeError(f"module 'pandas' has no attribute '{name}'") + + +# module level doc-string +__doc__ = """ +pandas - a powerful data analysis and manipulation library for Python +===================================================================== + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way toward this goal. + +Main Features +------------- +Here are just a few of the things that pandas does well: + + - Easy handling of missing data in floating point as well as non-floating + point data. + - Size mutability: columns can be inserted and deleted from DataFrame and + higher dimensional objects + - Automatic and explicit data alignment: objects can be explicitly aligned + to a set of labels, or the user can simply ignore the labels and let + `Series`, `DataFrame`, etc. automatically align the data for you in + computations. + - Powerful, flexible group by functionality to perform split-apply-combine + operations on data sets, for both aggregating and transforming data. + - Make it easy to convert ragged, differently-indexed data in other Python + and NumPy data structures into DataFrame objects. + - Intelligent label-based slicing, fancy indexing, and subsetting of large + data sets. + - Intuitive merging and joining data sets. + - Flexible reshaping and pivoting of data sets. + - Hierarchical labeling of axes (possible to have multiple labels per tick). + - Robust IO tools for loading data from flat files (CSV and delimited), + Excel files, databases, and saving/loading data from the ultrafast HDF5 + format. + - Time series-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. +""" + +# Use __all__ to let type checkers know what is part of the public API. +# Pandas is not (yet) a py.typed library: the public API is determined +# based on the documentation. +__all__ = [ + "ArrowDtype", + "BooleanDtype", + "Categorical", + "CategoricalDtype", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "DatetimeIndex", + "DatetimeTZDtype", + "ExcelFile", + "ExcelWriter", + "Flags", + "Float32Dtype", + "Float64Dtype", + "Grouper", + "HDFStore", + "Index", + "IndexSlice", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "Int8Dtype", + "Interval", + "IntervalDtype", + "IntervalIndex", + "MultiIndex", + "NA", + "NaT", + "NamedAgg", + "Period", + "PeriodDtype", + "PeriodIndex", + "RangeIndex", + "Series", + "SparseDtype", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "Timestamp", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "UInt8Dtype", + "api", + "array", + "arrays", + "bdate_range", + "concat", + "crosstab", + "cut", + "date_range", + "describe_option", + "errors", + "eval", + "factorize", + "get_dummies", + "from_dummies", + "get_option", + "infer_freq", + "interval_range", + "io", + "isna", + "isnull", + "json_normalize", + "lreshape", + "melt", + "merge", + "merge_asof", + "merge_ordered", + "notna", + "notnull", + "offsets", + "option_context", + "options", + "period_range", + "pivot", + "pivot_table", + "plotting", + "qcut", + "read_clipboard", + "read_csv", + "read_excel", + "read_feather", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_json", + "read_orc", + "read_parquet", + "read_pickle", + "read_sas", + "read_spss", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_xml", + "reset_option", + "set_eng_float_format", + "set_option", + "show_versions", + "test", + "testing", + "timedelta_range", + "to_datetime", + "to_numeric", + "to_pickle", + "to_timedelta", + "tseries", + "unique", + "value_counts", + "wide_to_long", +] diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py new file mode 100644 index 00000000..929f8a5a --- /dev/null +++ b/pandas/_config/__init__.py @@ -0,0 +1,28 @@ +""" +pandas._config is considered explicitly upstream of everything else in pandas, +should have no intra-pandas dependencies. + +importing `dates` and `display` ensures that keys needed by _libs +are initialized. +""" +__all__ = [ + "config", + "detect_console_encoding", + "get_option", + "set_option", + "reset_option", + "describe_option", + "option_context", + "options", +] +from pandas._config import config +from pandas._config import dates # pyright: ignore # noqa:F401 +from pandas._config.config import ( + describe_option, + get_option, + option_context, + options, + reset_option, + set_option, +) +from pandas._config.display import detect_console_encoding diff --git a/pandas/_config/config.py b/pandas/_config/config.py new file mode 100644 index 00000000..b4b06c81 --- /dev/null +++ b/pandas/_config/config.py @@ -0,0 +1,912 @@ +""" +The config module holds package-wide configurables and provides +a uniform API for working with them. + +Overview +======== + +This module supports the following requirements: +- options are referenced using keys in dot.notation, e.g. "x.y.option - z". +- keys are case-insensitive. +- functions should accept partial/regex keys, when unambiguous. +- options can be registered by modules at import time. +- options can be registered at init-time (via core.config_init) +- options have a default value, and (optionally) a description and + validation function associated with them. +- options can be deprecated, in which case referencing them + should produce a warning. +- deprecated options can optionally be rerouted to a replacement + so that accessing a deprecated option reroutes to a differently + named option. +- options can be reset to their default value. +- all option can be reset to their default value at once. +- all options in a certain sub - namespace can be reset at once. +- the user can set / get / reset or ask for the description of an option. +- a developer can register and mark an option as deprecated. +- you can register a callback to be invoked when the option value + is set or reset. Changing the stored value is considered misuse, but + is not verboten. + +Implementation +============== + +- Data is stored using nested dictionaries, and should be accessed + through the provided API. + +- "Registered options" and "Deprecated options" have metadata associated + with them, which are stored in auxiliary dictionaries keyed on the + fully-qualified key, e.g. "x.y.z.option". + +- the config_init module is imported by the package's __init__.py file. + placing any register_option() calls there will ensure those options + are available as soon as pandas is loaded. If you use register_option + in a module, it will only be available after that module is imported, + which you should be aware of. + +- `config_prefix` is a context_manager (for use with the `with` keyword) + which can save developers some typing, see the docstring. + +""" + +from __future__ import annotations + +from contextlib import ( + ContextDecorator, + contextmanager, +) +import re +from typing import ( + Any, + Callable, + Generic, + Iterable, + Iterator, + NamedTuple, + cast, +) +import warnings + +from pandas._typing import ( + F, + T, +) +from pandas.util._exceptions import find_stack_level + + +class DeprecatedOption(NamedTuple): + key: str + msg: str | None + rkey: str | None + removal_ver: str | None + + +class RegisteredOption(NamedTuple): + key: str + defval: object + doc: str + validator: Callable[[object], Any] | None + cb: Callable[[str], Any] | None + + +# holds deprecated option metadata +_deprecated_options: dict[str, DeprecatedOption] = {} + +# holds registered option metadata +_registered_options: dict[str, RegisteredOption] = {} + +# holds the current values for registered options +_global_config: dict[str, Any] = {} + +# keys which have a special meaning +_reserved_keys: list[str] = ["all"] + + +class OptionError(AttributeError, KeyError): + """ + Exception raised for pandas.options. + + Backwards compatible with KeyError checks. + """ + + +# +# User API + + +def _get_single_key(pat: str, silent: bool) -> str: + keys = _select_options(pat) + if len(keys) == 0: + if not silent: + _warn_if_deprecated(pat) + raise OptionError(f"No such keys(s): {repr(pat)}") + if len(keys) > 1: + raise OptionError("Pattern matched multiple keys") + key = keys[0] + + if not silent: + _warn_if_deprecated(key) + + key = _translate_key(key) + + return key + + +def _get_option(pat: str, silent: bool = False) -> Any: + key = _get_single_key(pat, silent) + + # walk the nested dict + root, k = _get_root(key) + return root[k] + + +def _set_option(*args, **kwargs) -> None: + # must at least 1 arg deal with constraints later + nargs = len(args) + if not nargs or nargs % 2 != 0: + raise ValueError("Must provide an even number of non-keyword arguments") + + # default to false + silent = kwargs.pop("silent", False) + + if kwargs: + kwarg = list(kwargs.keys())[0] + raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') + + for k, v in zip(args[::2], args[1::2]): + key = _get_single_key(k, silent) + + o = _get_registered_option(key) + if o and o.validator: + o.validator(v) + + # walk the nested dict + root, k = _get_root(key) + root[k] = v + + if o.cb: + if silent: + with warnings.catch_warnings(record=True): + o.cb(key) + else: + o.cb(key) + + +def _describe_option(pat: str = "", _print_desc: bool = True) -> str | None: + + keys = _select_options(pat) + if len(keys) == 0: + raise OptionError("No such keys(s)") + + s = "\n".join([_build_option_description(k) for k in keys]) + + if _print_desc: + print(s) + return None + return s + + +def _reset_option(pat: str, silent: bool = False) -> None: + + keys = _select_options(pat) + + if len(keys) == 0: + raise OptionError("No such keys(s)") + + if len(keys) > 1 and len(pat) < 4 and pat != "all": + raise ValueError( + "You must specify at least 4 characters when " + "resetting multiple keys, use the special keyword " + '"all" to reset all the options to their default value' + ) + + for k in keys: + _set_option(k, _registered_options[k].defval, silent=silent) + + +def get_default_val(pat: str): + key = _get_single_key(pat, silent=True) + return _get_registered_option(key).defval + + +class DictWrapper: + """provide attribute-style access to a nested dict""" + + def __init__(self, d: dict[str, Any], prefix: str = "") -> None: + object.__setattr__(self, "d", d) + object.__setattr__(self, "prefix", prefix) + + def __setattr__(self, key: str, val: Any) -> None: + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + # you can't set new keys + # can you can't overwrite subtrees + if key in self.d and not isinstance(self.d[key], dict): + _set_option(prefix, val) + else: + raise OptionError("You can only set the value of existing options") + + def __getattr__(self, key: str): + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + try: + v = object.__getattribute__(self, "d")[key] + except KeyError as err: + raise OptionError("No such option") from err + if isinstance(v, dict): + return DictWrapper(v, prefix) + else: + return _get_option(prefix) + + def __dir__(self) -> Iterable[str]: + return list(self.d.keys()) + + +# For user convenience, we'd like to have the available options described +# in the docstring. For dev convenience we'd like to generate the docstrings +# dynamically instead of maintaining them by hand. To this, we use the +# class below which wraps functions inside a callable, and converts +# __doc__ into a property function. The doctsrings below are templates +# using the py2.6+ advanced formatting syntax to plug in a concise list +# of options, and option descriptions. + + +class CallableDynamicDoc(Generic[T]): + def __init__(self, func: Callable[..., T], doc_tmpl: str) -> None: + self.__doc_tmpl__ = doc_tmpl + self.__func__ = func + + def __call__(self, *args, **kwds) -> T: + return self.__func__(*args, **kwds) + + # error: Signature of "__doc__" incompatible with supertype "object" + @property + def __doc__(self) -> str: # type: ignore[override] + opts_desc = _describe_option("all", _print_desc=False) + opts_list = pp_options_list(list(_registered_options.keys())) + return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) + + +_get_option_tmpl = """ +get_option(pat) + +Retrieves the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. + +Returns +------- +result : the value of the option + +Raises +------ +OptionError : if no such option exists + +Notes +----- +Please reference the :ref:`User Guide ` for more information. + +The available options with its descriptions: + +{opts_desc} +""" + +_set_option_tmpl = """ +set_option(pat, value) + +Sets the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. +value : object + New value of option. + +Returns +------- +None + +Raises +------ +OptionError if no such option exists + +Notes +----- +Please reference the :ref:`User Guide ` for more information. + +The available options with its descriptions: + +{opts_desc} +""" + +_describe_option_tmpl = """ +describe_option(pat, _print_desc=False) + +Prints the description for one or more registered options. + +Call with no arguments to get a listing for all registered options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp pattern. All matching keys will have their description displayed. +_print_desc : bool, default True + If True (default) the description(s) will be printed to stdout. + Otherwise, the description(s) will be returned as a unicode string + (for testing). + +Returns +------- +None by default, the description(s) as a unicode string if _print_desc +is False + +Notes +----- +Please reference the :ref:`User Guide ` for more information. + +The available options with its descriptions: + +{opts_desc} +""" + +_reset_option_tmpl = """ +reset_option(pat) + +Reset one or more options to their default value. + +Pass "all" as argument to reset all options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str/regex + If specified only options matching `prefix*` will be reset. + Note: partial matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break + in future versions if new options with similar names are introduced. + +Returns +------- +None + +Notes +----- +Please reference the :ref:`User Guide ` for more information. + +The available options with its descriptions: + +{opts_desc} +""" + +# bind the functions with their docstrings into a Callable +# and use that as the functions exposed in pd.api +get_option = CallableDynamicDoc(_get_option, _get_option_tmpl) +set_option = CallableDynamicDoc(_set_option, _set_option_tmpl) +reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl) +describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl) +options = DictWrapper(_global_config) + +# +# Functions for use by pandas developers, in addition to User - api + + +class option_context(ContextDecorator): + """ + Context manager to temporarily set options in the `with` statement context. + + You need to invoke as ``option_context(pat, val, [(pat, val), ...])``. + + Examples + -------- + >>> with option_context('display.max_rows', 10, 'display.max_columns', 5): + ... pass + """ + + def __init__(self, *args) -> None: + if len(args) % 2 != 0 or len(args) < 2: + raise ValueError( + "Need to invoke as option_context(pat, val, [(pat, val), ...])." + ) + + self.ops = list(zip(args[::2], args[1::2])) + + def __enter__(self) -> None: + self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops] + + for pat, val in self.ops: + _set_option(pat, val, silent=True) + + def __exit__(self, *args) -> None: + if self.undo: + for pat, val in self.undo: + _set_option(pat, val, silent=True) + + +def register_option( + key: str, + defval: object, + doc: str = "", + validator: Callable[[object], Any] | None = None, + cb: Callable[[str], Any] | None = None, +) -> None: + """ + Register an option in the package-wide pandas config object + + Parameters + ---------- + key : str + Fully-qualified key, e.g. "x.y.option - z". + defval : object + Default value of the option. + doc : str + Description of the option. + validator : Callable, optional + Function of a single argument, should raise `ValueError` if + called with a value which is not a legal value for the option. + cb + a function of a single argument "key", which is called + immediately after an option value is set/reset. key is + the full name of the option. + + Raises + ------ + ValueError if `validator` is specified and `defval` is not a valid value. + + """ + import keyword + import tokenize + + key = key.lower() + + if key in _registered_options: + raise OptionError(f"Option '{key}' has already been registered") + if key in _reserved_keys: + raise OptionError(f"Option '{key}' is a reserved key") + + # the default value should be legal + if validator: + validator(defval) + + # walk the nested dict, creating dicts as needed along the path + path = key.split(".") + + for k in path: + if not re.match("^" + tokenize.Name + "$", k): + raise ValueError(f"{k} is not a valid identifier") + if keyword.iskeyword(k): + raise ValueError(f"{k} is a python keyword") + + cursor = _global_config + msg = "Path prefix to option '{option}' is already an option" + + for i, p in enumerate(path[:-1]): + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:i]))) + if p not in cursor: + cursor[p] = {} + cursor = cursor[p] + + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:-1]))) + + cursor[path[-1]] = defval # initialize + + # save the option metadata + _registered_options[key] = RegisteredOption( + key=key, defval=defval, doc=doc, validator=validator, cb=cb + ) + + +def deprecate_option( + key: str, + msg: str | None = None, + rkey: str | None = None, + removal_ver: str | None = None, +) -> None: + """ + Mark option `key` as deprecated, if code attempts to access this option, + a warning will be produced, using `msg` if given, or a default message + if not. + if `rkey` is given, any access to the key will be re-routed to `rkey`. + + Neither the existence of `key` nor that if `rkey` is checked. If they + do not exist, any subsequence access will fail as usual, after the + deprecation warning is given. + + Parameters + ---------- + key : str + Name of the option to be deprecated. + must be a fully-qualified option name (e.g "x.y.z.rkey"). + msg : str, optional + Warning message to output when the key is referenced. + if no message is given a default message will be emitted. + rkey : str, optional + Name of an option to reroute access to. + If specified, any referenced `key` will be + re-routed to `rkey` including set/get/reset. + rkey must be a fully-qualified option name (e.g "x.y.z.rkey"). + used by the default message if no `msg` is specified. + removal_ver : str, optional + Specifies the version in which this option will + be removed. used by the default message if no `msg` is specified. + + Raises + ------ + OptionError + If the specified key has already been deprecated. + """ + key = key.lower() + + if key in _deprecated_options: + raise OptionError(f"Option '{key}' has already been defined as deprecated.") + + _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) + + +# +# functions internal to the module + + +def _select_options(pat: str) -> list[str]: + """ + returns a list of keys matching `pat` + + if pat=="all", returns all registered options + """ + # short-circuit for exact key + if pat in _registered_options: + return [pat] + + # else look through all of them + keys = sorted(_registered_options.keys()) + if pat == "all": # reserved key + return keys + + return [k for k in keys if re.search(pat, k, re.I)] + + +def _get_root(key: str) -> tuple[dict[str, Any], str]: + path = key.split(".") + cursor = _global_config + for p in path[:-1]: + cursor = cursor[p] + return cursor, path[-1] + + +def _is_deprecated(key: str) -> bool: + """Returns True if the given option has been deprecated""" + key = key.lower() + return key in _deprecated_options + + +def _get_deprecated_option(key: str): + """ + Retrieves the metadata for a deprecated option, if `key` is deprecated. + + Returns + ------- + DeprecatedOption (namedtuple) if key is deprecated, None otherwise + """ + try: + d = _deprecated_options[key] + except KeyError: + return None + else: + return d + + +def _get_registered_option(key: str): + """ + Retrieves the option metadata if `key` is a registered option. + + Returns + ------- + RegisteredOption (namedtuple) if key is deprecated, None otherwise + """ + return _registered_options.get(key) + + +def _translate_key(key: str) -> str: + """ + if key id deprecated and a replacement key defined, will return the + replacement key, otherwise returns `key` as - is + """ + d = _get_deprecated_option(key) + if d: + return d.rkey or key + else: + return key + + +def _warn_if_deprecated(key: str) -> bool: + """ + Checks if `key` is a deprecated option and if so, prints a warning. + + Returns + ------- + bool - True if `key` is deprecated, False otherwise. + """ + d = _get_deprecated_option(key) + if d: + if d.msg: + warnings.warn( + d.msg, + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + msg = f"'{key}' is deprecated" + if d.removal_ver: + msg += f" and will be removed in {d.removal_ver}" + if d.rkey: + msg += f", please use '{d.rkey}' instead." + else: + msg += ", please refrain from using it." + + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + return True + return False + + +def _build_option_description(k: str) -> str: + """Builds a formatted description of a registered option and prints it""" + o = _get_registered_option(k) + d = _get_deprecated_option(k) + + s = f"{k} " + + if o.doc: + s += "\n".join(o.doc.strip().split("\n")) + else: + s += "No description available." + + if o: + s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" + + if d: + rkey = d.rkey or "" + s += "\n (Deprecated" + s += f", use `{rkey}` instead." + s += ")" + + return s + + +def pp_options_list(keys: Iterable[str], width=80, _print: bool = False): + """Builds a concise listing of available options, grouped by prefix""" + from itertools import groupby + from textwrap import wrap + + def pp(name: str, ks: Iterable[str]) -> list[str]: + pfx = "- " + name + ".[" if name else "" + ls = wrap( + ", ".join(ks), + width, + initial_indent=pfx, + subsequent_indent=" ", + break_long_words=False, + ) + if ls and ls[-1] and name: + ls[-1] = ls[-1] + "]" + return ls + + ls: list[str] = [] + singles = [x for x in sorted(keys) if x.find(".") < 0] + if singles: + ls += pp("", singles) + keys = [x for x in keys if x.find(".") >= 0] + + for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]): + ks = [x[len(k) + 1 :] for x in list(g)] + ls += pp(k, ks) + s = "\n".join(ls) + if _print: + print(s) + else: + return s + + +# +# helpers + + +@contextmanager +def config_prefix(prefix) -> Iterator[None]: + """ + contextmanager for multiple invocations of API with a common prefix + + supported API functions: (register / get / set )__option + + Warning: This is not thread - safe, and won't work properly if you import + the API functions into your module using the "from x import y" construct. + + Example + ------- + import pandas._config.config as cf + with cf.config_prefix("display.font"): + cf.register_option("color", "red") + cf.register_option("size", " 5 pt") + cf.set_option(size, " 6 pt") + cf.get_option(size) + ... + + etc' + + will register options "display.font.color", "display.font.size", set the + value of "display.font.size"... and so on. + """ + # Note: reset_option relies on set_option, and on key directly + # it does not fit in to this monkey-patching scheme + + global register_option, get_option, set_option, reset_option + + def wrap(func: F) -> F: + def inner(key: str, *args, **kwds): + pkey = f"{prefix}.{key}" + return func(pkey, *args, **kwds) + + return cast(F, inner) + + _register_option = register_option + _get_option = get_option + _set_option = set_option + set_option = wrap(set_option) + get_option = wrap(get_option) + register_option = wrap(register_option) + try: + yield + finally: + set_option = _set_option + get_option = _get_option + register_option = _register_option + + +# These factories and methods are handy for use as the validator +# arg in register_option + + +def is_type_factory(_type: type[Any]) -> Callable[[Any], None]: + """ + + Parameters + ---------- + `_type` - a type to be compared against (e.g. type(x) == `_type`) + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if type(x) is not equal to `_type` + + """ + + def inner(x) -> None: + if type(x) != _type: + raise ValueError(f"Value must have type '{_type}'") + + return inner + + +def is_instance_factory(_type) -> Callable[[Any], None]: + """ + + Parameters + ---------- + `_type` - the type to be checked against + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if x is not an instance of `_type` + + """ + if isinstance(_type, (tuple, list)): + _type = tuple(_type) + type_repr = "|".join(map(str, _type)) + else: + type_repr = f"'{_type}'" + + def inner(x) -> None: + if not isinstance(x, _type): + raise ValueError(f"Value must be an instance of {type_repr}") + + return inner + + +def is_one_of_factory(legal_values) -> Callable[[Any], None]: + + callables = [c for c in legal_values if callable(c)] + legal_values = [c for c in legal_values if not callable(c)] + + def inner(x) -> None: + if x not in legal_values: + + if not any(c(x) for c in callables): + uvals = [str(lval) for lval in legal_values] + pp_values = "|".join(uvals) + msg = f"Value must be one of {pp_values}" + if len(callables): + msg += " or a callable" + raise ValueError(msg) + + return inner + + +def is_nonnegative_int(value: object) -> None: + """ + Verify that value is None or a positive int. + + Parameters + ---------- + value : None or int + The `value` to be checked. + + Raises + ------ + ValueError + When the value is not None or is a negative integer + """ + if value is None: + return + + elif isinstance(value, int): + if value >= 0: + return + + msg = "Value must be a nonnegative integer or None" + raise ValueError(msg) + + +# common type validators, for convenience +# usage: register_option(... , validator = is_int) +is_int = is_type_factory(int) +is_bool = is_type_factory(bool) +is_float = is_type_factory(float) +is_str = is_type_factory(str) +is_text = is_instance_factory((str, bytes)) + + +def is_callable(obj) -> bool: + """ + + Parameters + ---------- + `obj` - the object to be checked + + Returns + ------- + validator - returns True if object is callable + raises ValueError otherwise. + + """ + if not callable(obj): + raise ValueError("Value must be a callable") + return True diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py new file mode 100644 index 00000000..b37831f9 --- /dev/null +++ b/pandas/_config/dates.py @@ -0,0 +1,25 @@ +""" +config for datetime formatting +""" +from __future__ import annotations + +from pandas._config import config as cf + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with cf.config_prefix("display"): + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option( + "date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool + ) + cf.register_option( + "date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool + ) diff --git a/pandas/_config/display.py b/pandas/_config/display.py new file mode 100644 index 00000000..df2c3ad3 --- /dev/null +++ b/pandas/_config/display.py @@ -0,0 +1,62 @@ +""" +Unopinionated display configuration. +""" + +from __future__ import annotations + +import locale +import sys + +from pandas._config import config as cf + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding: str | None = None + + +def detect_console_encoding() -> str: + """ + Try to find the most capable encoding supported by the console. + slightly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except (AttributeError, OSError): + pass + + # try again for something better + if not encoding or "ascii" in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except locale.Error: + # can be raised by locale.setlocale(), which is + # called by getpreferredencoding + # (on some systems, see stdlib locale docs) + pass + + # when all else fails. this will usually be "ascii" + if not encoding or "ascii" in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH#3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +pc_encoding_doc = """ +: str/unicode + Defaults to the detected encoding of the console. + Specifies the encoding to be used for strings returned by to_string, + these are generally strings meant to be displayed on the console. +""" + +with cf.config_prefix("display"): + cf.register_option( + "encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text + ) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py new file mode 100644 index 00000000..c4355e95 --- /dev/null +++ b/pandas/_config/localization.py @@ -0,0 +1,179 @@ +""" +Helpers for configuring locale settings. + +Name `localization` is chosen to avoid overlap with builtin `locale` module. +""" +from __future__ import annotations + +from contextlib import contextmanager +import locale +import re +import subprocess +from typing import ( + Callable, + Iterator, +) + +from pandas._config.config import options + + +@contextmanager +def set_locale( + new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL +) -> Iterator[str | tuple[str, str]]: + """ + Context manager for temporarily setting a locale. + + Parameters + ---------- + new_locale : str or tuple + A string of the form .. For example to set + the current locale to US English with a UTF8 encoding, you would pass + "en_US.UTF-8". + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Notes + ----- + This is useful when you want to run a particular block of code under a + particular locale, without globally setting the locale. This probably isn't + thread-safe. + """ + # getlocale is not always compliant with setlocale, use setlocale. GH#46595 + current_locale = locale.setlocale(lc_var) + + try: + locale.setlocale(lc_var, new_locale) + normalized_code, normalized_encoding = locale.getlocale() + if normalized_code is not None and normalized_encoding is not None: + yield f"{normalized_code}.{normalized_encoding}" + else: + yield new_locale + finally: + locale.setlocale(lc_var, current_locale) + + +def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool: + """ + Check to see if we can set a locale, and subsequently get the locale, + without raising an Exception. + + Parameters + ---------- + lc : str + The locale to attempt to set. + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Returns + ------- + bool + Whether the passed locale can be set + """ + try: + with set_locale(lc, lc_var=lc_var): + pass + except (ValueError, locale.Error): + # horrible name for a Exception subclass + return False + else: + return True + + +def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]: + """ + Return a list of normalized locales that do not throw an ``Exception`` + when set. + + Parameters + ---------- + locales : str + A string where each locale is separated by a newline. + normalize : bool + Whether to call ``locale.normalize`` on each locale. + + Returns + ------- + valid_locales : list + A list of valid locales. + """ + return [ + loc + for loc in ( + locale.normalize(loc.strip()) if normalize else loc.strip() + for loc in locales + ) + if can_set_locale(loc) + ] + + +def _default_locale_getter() -> bytes: + return subprocess.check_output(["locale -a"], shell=True) + + +def get_locales( + prefix: str | None = None, + normalize: bool = True, + locale_getter: Callable[[], bytes] = _default_locale_getter, +) -> list[str] | None: + """ + Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + + On error will return None (no locale available, e.g. Windows) + + """ + try: + raw_locales = locale_getter() + except subprocess.CalledProcessError: + # Raised on (some? all?) Windows platforms because Note: "locale -a" + # is not defined + return None + + try: + # raw_locales is "\n" separated list of locales + # it may contain non-decodable parts, so split + # extract what we can and then rejoin. + split_raw_locales = raw_locales.split(b"\n") + out_locales = [] + for x in split_raw_locales: + try: + out_locales.append(str(x, encoding=options.display.encoding)) + except UnicodeError: + # 'locale -a' is used to populated 'raw_locales' and on + # Redhat 7 Linux (and maybe others) prints locale names + # using windows-1252 encoding. Bug only triggered by + # a few special characters and when there is an + # extensive list of installed locales. + out_locales.append(str(x, encoding="windows-1252")) + + except TypeError: + pass + + if prefix is None: + return _valid_locales(out_locales, normalize) + + pattern = re.compile(f"{prefix}.*") + found = pattern.findall("\n".join(out_locales)) + return _valid_locales(found, normalize) diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py new file mode 100644 index 00000000..f119e280 --- /dev/null +++ b/pandas/_libs/__init__.py @@ -0,0 +1,22 @@ +__all__ = [ + "NaT", + "NaTType", + "OutOfBoundsDatetime", + "Period", + "Timedelta", + "Timestamp", + "iNaT", + "Interval", +] + + +from pandas._libs.interval import Interval +from pandas._libs.tslibs import ( + NaT, + NaTType, + OutOfBoundsDatetime, + Period, + Timedelta, + Timestamp, + iNaT, +) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd new file mode 100644 index 00000000..c3b83b9b --- /dev/null +++ b/pandas/_libs/algos.pxd @@ -0,0 +1,22 @@ +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) + + +cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil + +cdef enum TiebreakEnumType: + TIEBREAK_AVERAGE + TIEBREAK_MIN, + TIEBREAK_MAX + TIEBREAK_FIRST + TIEBREAK_FIRST_DESCENDING + TIEBREAK_DENSE + + +cdef numeric_object_t get_rank_nan_fill_val( + bint rank_nans_highest, + numeric_object_t val, + bint is_datetimelike=*, +) diff --git a/pandas/_libs/algos.pyi b/pandas/_libs/algos.pyi new file mode 100644 index 00000000..5a200572 --- /dev/null +++ b/pandas/_libs/algos.pyi @@ -0,0 +1,420 @@ +from typing import Any + +import numpy as np + +from pandas._typing import npt + +class Infinity: + """ + Provide a positive Infinity comparison method for ranking. + """ + + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + +class NegInfinity: + """ + Provide a negative Infinity comparison method for ranking. + """ + + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + +def unique_deltas( + arr: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] +def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ... +def groupsort_indexer( + index: np.ndarray, # const int64_t[:] + ngroups: int, +) -> tuple[ + np.ndarray, # ndarray[int64_t, ndim=1] + np.ndarray, # ndarray[int64_t, ndim=1] +]: ... +def kth_smallest( + arr: np.ndarray, # numeric[:] + k: int, +) -> Any: ... # numeric + +# ---------------------------------------------------------------------- +# Pairwise correlation/covariance + +def nancorr( + mat: npt.NDArray[np.float64], # const float64_t[:, :] + cov: bool = ..., + minp: int | None = ..., +) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2] +def nancorr_spearman( + mat: npt.NDArray[np.float64], # ndarray[float64_t, ndim=2] + minp: int = ..., +) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2] + +# ---------------------------------------------------------------------- + +def validate_limit(nobs: int | None, limit=...) -> int: ... +def pad( + old: np.ndarray, # ndarray[numeric_object_t] + new: np.ndarray, # ndarray[numeric_object_t] + limit=..., +) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1] +def pad_inplace( + values: np.ndarray, # numeric_object_t[:] + mask: np.ndarray, # uint8_t[:] + limit=..., +) -> None: ... +def pad_2d_inplace( + values: np.ndarray, # numeric_object_t[:, :] + mask: np.ndarray, # const uint8_t[:, :] + limit=..., +) -> None: ... +def backfill( + old: np.ndarray, # ndarray[numeric_object_t] + new: np.ndarray, # ndarray[numeric_object_t] + limit=..., +) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1] +def backfill_inplace( + values: np.ndarray, # numeric_object_t[:] + mask: np.ndarray, # uint8_t[:] + limit=..., +) -> None: ... +def backfill_2d_inplace( + values: np.ndarray, # numeric_object_t[:, :] + mask: np.ndarray, # const uint8_t[:, :] + limit=..., +) -> None: ... +def is_monotonic( + arr: np.ndarray, # ndarray[numeric_object_t, ndim=1] + timelike: bool, +) -> tuple[bool, bool, bool]: ... + +# ---------------------------------------------------------------------- +# rank_1d, rank_2d +# ---------------------------------------------------------------------- + +def rank_1d( + values: np.ndarray, # ndarray[numeric_object_t, ndim=1] + labels: np.ndarray | None = ..., # const int64_t[:]=None + is_datetimelike: bool = ..., + ties_method=..., + ascending: bool = ..., + pct: bool = ..., + na_option=..., + mask: npt.NDArray[np.bool_] | None = ..., +) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1] +def rank_2d( + in_arr: np.ndarray, # ndarray[numeric_object_t, ndim=2] + axis: int = ..., + is_datetimelike: bool = ..., + ties_method=..., + ascending: bool = ..., + na_option=..., + pct: bool = ..., +) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1] +def diff_2d( + arr: np.ndarray, # ndarray[diff_t, ndim=2] + out: np.ndarray, # ndarray[out_t, ndim=2] + periods: int, + axis: int, + datetimelike: bool = ..., +) -> None: ... +def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ... +def ensure_object(arr: object) -> npt.NDArray[np.object_]: ... +def ensure_float64(arr: object, copy=...) -> npt.NDArray[np.float64]: ... +def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ... +def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ... +def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ... +def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ... +def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ... +def take_1d_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_1d_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis0_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int8( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int8_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int16( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int16_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_int32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int64_int64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_int64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float32_float32( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float32_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_float64_float64( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_object_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_bool_bool( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_axis1_bool_object( + values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=... +) -> None: ... +def take_2d_multi_int8_int8( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int8_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int16( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int16_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_int32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int32_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int64_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float32_float32( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float32_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_float64_float64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_object_object( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_bool_bool( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_bool_object( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... +def take_2d_multi_int64_int64( + values: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value=..., +) -> None: ... diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx new file mode 100644 index 00000000..c05d6a30 --- /dev/null +++ b/pandas/_libs/algos.pyx @@ -0,0 +1,1522 @@ +cimport cython +from cython cimport Py_ssize_t +from libc.math cimport ( + fabs, + sqrt, +) +from libc.stdlib cimport ( + free, + malloc, +) +from libc.string cimport memmove + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_COMPLEX64, + NPY_COMPLEX128, + NPY_FLOAT32, + NPY_FLOAT64, + NPY_INT8, + NPY_INT16, + NPY_INT32, + NPY_INT64, + NPY_OBJECT, + NPY_UINT8, + NPY_UINT16, + NPY_UINT32, + NPY_UINT64, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +cnp.import_array() + +cimport pandas._libs.util as util +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) +from pandas._libs.khash cimport ( + kh_destroy_int64, + kh_get_int64, + kh_init_int64, + kh_int64_t, + kh_put_int64, + kh_resize_int64, + khiter_t, +) +from pandas._libs.util cimport get_nat + +import pandas._libs.missing as missing + +cdef: + float64_t FP_ERR = 1e-13 + float64_t NaN = np.NaN + int64_t NPY_NAT = get_nat() + + +tiebreakers = { + "average": TIEBREAK_AVERAGE, + "min": TIEBREAK_MIN, + "max": TIEBREAK_MAX, + "first": TIEBREAK_FIRST, + "dense": TIEBREAK_DENSE, +} + + +cdef inline bint are_diff(object left, object right): + try: + return fabs(left - right) > FP_ERR + except TypeError: + return left != right + + +class Infinity: + """ + Provide a positive Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: False + __le__ = lambda self, other: isinstance(other, Infinity) + __eq__ = lambda self, other: isinstance(other, Infinity) + __ne__ = lambda self, other: not isinstance(other, Infinity) + __gt__ = lambda self, other: (not isinstance(other, Infinity) and + not missing.checknull(other)) + __ge__ = lambda self, other: not missing.checknull(other) + + +class NegInfinity: + """ + Provide a negative Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + __le__ = lambda self, other: not missing.checknull(other) + __eq__ = lambda self, other: isinstance(other, NegInfinity) + __ne__ = lambda self, other: not isinstance(other, NegInfinity) + __gt__ = lambda self, other: False + __ge__ = lambda self, other: isinstance(other, NegInfinity) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): + """ + Efficiently find the unique first-differences of the given array. + + Parameters + ---------- + arr : ndarray[int64_t] + + Returns + ------- + ndarray[int64_t] + An ordered ndarray[int64_t] + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t val + khiter_t k + kh_int64_t *table + int ret = 0 + list uniques = [] + ndarray[int64_t, ndim=1] result + + table = kh_init_int64() + kh_resize_int64(table, 10) + for i in range(n - 1): + val = arr[i + 1] - arr[i] + k = kh_get_int64(table, val) + if k == table.n_buckets: + kh_put_int64(table, val, &ret) + uniques.append(val) + kh_destroy_int64(table) + + result = np.array(uniques, dtype=np.int64) + result.sort() + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_lexsorted(list_of_arrays: list) -> bint: + cdef: + Py_ssize_t i + Py_ssize_t n, nlevels + int64_t k, cur, pre + ndarray arr + bint result = True + + nlevels = len(list_of_arrays) + n = len(list_of_arrays[0]) + + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + for i in range(nlevels): + arr = list_of_arrays[i] + assert arr.dtype.name == 'int64' + vecs[i] = cnp.PyArray_DATA(arr) + + # Assume uniqueness?? + with nogil: + for i in range(1, n): + for k in range(nlevels): + cur = vecs[k][i] + pre = vecs[k][i -1] + if cur == pre: + continue + elif cur > pre: + break + else: + result = False + break + if not result: + break + free(vecs) + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): + """ + Compute a 1-d indexer. + + The indexer is an ordering of the passed index, + ordered by the groups. + + Parameters + ---------- + index: np.ndarray[np.intp] + Mappings from group -> position. + ngroups: int64 + Number of groups. + + Returns + ------- + ndarray[intp_t, ndim=1] + Indexer + ndarray[intp_t, ndim=1] + Group Counts + + Notes + ----- + This is a reverse of the label factorization process. + """ + cdef: + Py_ssize_t i, label, n + intp_t[::1] indexer, where, counts + + counts = np.zeros(ngroups + 1, dtype=np.intp) + n = len(index) + indexer = np.zeros(n, dtype=np.intp) + where = np.zeros(ngroups + 1, dtype=np.intp) + + with nogil: + + # count group sizes, location 0 for NA + for i in range(n): + counts[index[i] + 1] += 1 + + # mark the start of each contiguous group of like-indexed data + for i in range(1, ngroups + 1): + where[i] = where[i - 1] + counts[i - 1] + + # this is our indexer + for i in range(n): + label = index[i] + 1 + indexer[where[label]] = i + where[label] += 1 + + return indexer.base, counts.base + + +cdef inline Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil: + cdef: + numeric_t t + + # cython doesn't allow pointer dereference so use array syntax + t = a[0] + a[0] = b[0] + b[0] = t + return 0 + + +cdef inline numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil: + """ + See kth_smallest.__doc__. The additional parameter n specifies the maximum + number of elements considered in arr, needed for compatibility with usage + in groupby.pyx + """ + cdef: + Py_ssize_t i, j, left, m + numeric_t x + + left = 0 + m = n - 1 + + while left < m: + x = arr[k] + i = left + j = m + + while 1: + while arr[i] < x: + i += 1 + while x < arr[j]: + j -= 1 + if i <= j: + swap(&arr[i], &arr[j]) + i += 1 + j -= 1 + + if i > j: + break + + if j < k: + left = i + if k < i: + m = j + return arr[k] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: + """ + Compute the kth smallest value in arr. Note that the input + array will be modified. + + Parameters + ---------- + arr : numeric[::1] + Array to compute the kth smallest value for, must be + contiguous + k : Py_ssize_t + + Returns + ------- + numeric + The kth smallest value in arr + """ + cdef: + numeric_t result + + with nogil: + result = kth_smallest_c(&arr[0], k, arr.shape[0]) + + return result + + +# ---------------------------------------------------------------------- +# Pairwise correlation/covariance + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): + cdef: + Py_ssize_t i, j, xi, yi, N, K + bint minpv + float64_t[:, ::1] result + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy + + N, K = (mat).shape + + if minp is None: + minpv = 1 + else: + minpv = minp + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + + with nogil: + for xi in range(K): + for yi in range(xi + 1): + # Welford's method for the variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0 + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + dx = vx - meanx + dy = vy - meany + meanx += 1. / nobs * dx + meany += 1. / nobs * dy + ssqdmx += (vx - meanx) * dx + ssqdmy += (vy - meany) * dy + covxy += (vx - meanx) * dy + + if nobs < minpv: + result[xi, yi] = result[yi, xi] = NaN + else: + divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = covxy / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result.base + +# ---------------------------------------------------------------------- +# Pairwise Spearman correlation + + +@cython.boundscheck(False) +@cython.wraparound(False) +def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray: + cdef: + Py_ssize_t i, j, xi, yi, N, K + ndarray[float64_t, ndim=2] result + ndarray[float64_t, ndim=2] ranked_mat + ndarray[float64_t, ndim=1] rankedx, rankedy + float64_t[::1] maskedx, maskedy + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + bint no_nans + float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor + + N, K = (mat).shape + + # Handle the edge case where we know all results will be nan + # to keep conditional logic inside loop simpler + if N < minp: + result = np.full((K, K), np.nan, dtype=np.float64) + return result + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + no_nans = mask.all() + + ranked_mat = np.empty((N, K), dtype=np.float64) + + # Note: we index into maskedx, maskedy in loops up to nobs, but using N is safe + # here since N >= nobs and values are stored contiguously + maskedx = np.empty(N, dtype=np.float64) + maskedy = np.empty(N, dtype=np.float64) + for i in range(K): + ranked_mat[:, i] = rank_1d(mat[:, i]) + + with nogil: + for xi in range(K): + for yi in range(xi + 1): + sumx = sumxx = sumyy = 0 + + # Fastpath for data with no nans/infs, allows avoiding mask checks + # and array reassignments + if no_nans: + mean = (N + 1) / 2. + + # now the cov numerator + for i in range(N): + vx = ranked_mat[i, xi] - mean + vy = ranked_mat[i, yi] - mean + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + else: + nobs = 0 + # Keep track of whether we need to recompute ranks + all_ranks = True + for i in range(N): + all_ranks &= not (mask[i, xi] ^ mask[i, yi]) + if mask[i, xi] and mask[i, yi]: + maskedx[nobs] = ranked_mat[i, xi] + maskedy[nobs] = ranked_mat[i, yi] + nobs += 1 + + if nobs < minp: + result[xi, yi] = result[yi, xi] = NaN + continue + else: + if not all_ranks: + with gil: + # We need to slice back to nobs because rank_1d will + # require arrays of nobs length + rankedx = rank_1d(np.asarray(maskedx)[:nobs]) + rankedy = rank_1d(np.asarray(maskedy)[:nobs]) + for i in range(nobs): + maskedx[i] = rankedx[i] + maskedy[i] = rankedy[i] + + mean = (nobs + 1) / 2. + + # now the cov numerator + for i in range(nobs): + vx = maskedx[i] - mean + vy = maskedy[i] - mean + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result + + +# ---------------------------------------------------------------------- + +def validate_limit(nobs: int | None, limit=None) -> int: + """ + Check that the `limit` argument is a positive integer. + + Parameters + ---------- + nobs : int + limit : object + + Returns + ------- + int + The limit. + """ + if limit is None: + lim = nobs + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + return lim + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad( + ndarray[numeric_object_t] old, + ndarray[numeric_object_t] new, + limit=None +) -> ndarray: + # -> ndarray[intp_t, ndim=1] + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t, ndim=1] indexer + numeric_object_t cur, next_val + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.intp) + indexer[:] = -1 + + lim = validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: + return indexer + + i = j = 0 + + cur = old[0] + + while j <= nright - 1 and new[j] < cur: + j += 1 + + while True: + if j == nright: + break + + if i == nleft - 1: + while j < nright: + if new[j] == cur: + indexer[j] = i + elif new[j] > cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + break + + next_val = old[i + 1] + + while j < nright and cur <= new[j] < next_val: + if new[j] == cur: + indexer[j] = i + elif fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + + fill_count = 0 + i += 1 + cur = next_val + + return indexer + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): + cdef: + Py_ssize_t i, N + numeric_object_t val + uint8_t prev_mask + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + lim = validate_limit(N, limit) + + val = values[0] + prev_mask = mask[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + mask[i] = prev_mask + else: + fill_count = 0 + val = values[i] + prev_mask = mask[i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None): + cdef: + Py_ssize_t i, j, N, K + numeric_object_t val + int lim, fill_count = 0 + + K, N = (values).shape + + # GH#2778 + if N == 0: + return + + lim = validate_limit(N, limit) + + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim or i == 0: + continue + fill_count += 1 + values[j, i] = val + mask[j, i] = False + else: + fill_count = 0 + val = values[j, i] + + +""" +Backfilling logic for generating fill vector + +Diagram of what's going on + +Old New Fill vector Mask + . 0 1 + . 0 1 + . 0 1 +A A 0 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 +B B 1 1 + . 2 1 + . 2 1 + . 2 1 +C C 2 1 + . 0 + . 0 +D +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill( + ndarray[numeric_object_t] old, + ndarray[numeric_object_t] new, + limit=None +) -> ndarray: + # -> ndarray[intp_t, ndim=1] + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t, ndim=1] indexer + numeric_object_t cur, prev + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.intp) + indexer[:] = -1 + + lim = validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: + return indexer + + i = nleft - 1 + j = nright - 1 + + cur = old[nleft - 1] + + while j >= 0 and new[j] > cur: + j -= 1 + + while True: + if j < 0: + break + + if i == 0: + while j >= 0: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + break + + prev = old[i - 1] + + while j >= 0 and prev < new[j] <= cur: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + + fill_count = 0 + i -= 1 + cur = prev + + return indexer + + +def backfill_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): + pad_inplace(values[::-1], mask[::-1], limit=limit) + + +def backfill_2d_inplace(numeric_object_t[:, :] values, + uint8_t[:, :] mask, + limit=None): + pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): + """ + Returns + ------- + tuple + is_monotonic_inc : bool + is_monotonic_dec : bool + is_unique : bool + """ + cdef: + Py_ssize_t i, n + numeric_object_t prev, cur + bint is_monotonic_inc = 1 + bint is_monotonic_dec = 1 + bint is_unique = 1 + bint is_strict_monotonic = 1 + + n = len(arr) + + if n == 1: + if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and arr[0] == NPY_NAT): + # single value is NaN + return False, False, True + else: + return True, True, True + elif n < 2: + return True, True, True + + if timelike and arr[0] == NPY_NAT: + return False, False, True + + if numeric_object_t is not object: + with nogil: + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + else: + # object-dtype, identical to above except we cannot use `with nogil` + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + + is_strict_monotonic = is_unique and (is_monotonic_inc or is_monotonic_dec) + return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic + + +# ---------------------------------------------------------------------- +# rank_1d, rank_2d +# ---------------------------------------------------------------------- + +cdef numeric_object_t get_rank_nan_fill_val( + bint rank_nans_highest, + numeric_object_t val, + bint is_datetimelike=False, +): + """ + Return the value we'll use to represent missing values when sorting depending + on if we'd like missing values to end up at the top/bottom. (The second parameter + is unused, but needed for fused type specialization) + """ + if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest: + return NPY_NAT + 1 + + if rank_nans_highest: + if numeric_object_t is object: + return Infinity() + elif numeric_object_t is int64_t: + return util.INT64_MAX + elif numeric_object_t is int32_t: + return util.INT32_MAX + elif numeric_object_t is int16_t: + return util.INT16_MAX + elif numeric_object_t is int8_t: + return util.INT8_MAX + elif numeric_object_t is uint64_t: + return util.UINT64_MAX + elif numeric_object_t is uint32_t: + return util.UINT32_MAX + elif numeric_object_t is uint16_t: + return util.UINT16_MAX + elif numeric_object_t is uint8_t: + return util.UINT8_MAX + else: + return np.inf + else: + if numeric_object_t is object: + return NegInfinity() + elif numeric_object_t is int64_t: + # Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN + # instead of NPY_NAT here causes build warnings and failure in + # test_cummax_i8_at_implementation_bound + return NPY_NAT + elif numeric_object_t is int32_t: + return util.INT32_MIN + elif numeric_object_t is int16_t: + return util.INT16_MIN + elif numeric_object_t is int8_t: + return util.INT8_MIN + elif numeric_object_t is uint64_t: + return 0 + elif numeric_object_t is uint32_t: + return 0 + elif numeric_object_t is uint16_t: + return 0 + elif numeric_object_t is uint8_t: + return 0 + else: + return -np.inf + + +@cython.wraparound(False) +@cython.boundscheck(False) +def rank_1d( + ndarray[numeric_object_t, ndim=1] values, + const intp_t[:] labels=None, + bint is_datetimelike=False, + ties_method="average", + bint ascending=True, + bint pct=False, + na_option="keep", + const uint8_t[:] mask=None, +): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + + Parameters + ---------- + values : array of numeric_object_t values to be ranked + labels : np.ndarray[np.intp] or None + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values`. If not called + from a groupby operation, will be None. + is_datetimelike : bool, default False + True if `values` contains datetime-like entries. + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default + 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + ascending : bool, default True + False for ranks by high (1) to low (N) + na_option : {'keep', 'top', 'bottom'}, default 'keep' + pct : bool, default False + Compute percentage rank of data within each group + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + mask : np.ndarray[bool], optional, default None + Specify locations to be treated as NA, for e.g. Categorical. + """ + cdef: + TiebreakEnumType tiebreak + Py_ssize_t N + int64_t[::1] grp_sizes + intp_t[:] lexsort_indexer + float64_t[::1] out + ndarray[numeric_object_t, ndim=1] masked_vals + numeric_object_t[:] masked_vals_memview + bint keep_na, nans_rank_highest, check_labels, check_mask + numeric_object_t nan_fill_val + + tiebreak = tiebreakers[ties_method] + if tiebreak == TIEBREAK_FIRST: + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + + keep_na = na_option == 'keep' + + N = len(values) + if labels is not None: + # TODO(cython3): cast won't be necessary (#2992) + assert len(labels) == N + out = np.empty(N) + grp_sizes = np.ones(N, dtype=np.int64) + + # If we don't care about labels, can short-circuit later label + # comparisons + check_labels = labels is not None + + # For cases where a mask is not possible, we can avoid mask checks + check_mask = ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + or numeric_object_t is object + or (numeric_object_t is int64_t and is_datetimelike) + ) + check_mask = check_mask or mask is not None + + # Copy values into new array in order to fill missing data + # with mask, without obfuscating location of missing data + # in values array + if numeric_object_t is object and values.dtype != np.object_: + masked_vals = values.astype('O') + else: + masked_vals = values.copy() + + if mask is not None: + pass + elif numeric_object_t is object: + mask = missing.isnaobj(masked_vals) + elif numeric_object_t is int64_t and is_datetimelike: + mask = (masked_vals == NPY_NAT).astype(np.uint8) + elif numeric_object_t is float64_t or numeric_object_t is float32_t: + mask = np.isnan(masked_vals).astype(np.uint8) + else: + mask = np.zeros(shape=len(masked_vals), dtype=np.uint8) + + # If `na_option == 'top'`, we want to assign the lowest rank + # to NaN regardless of ascending/descending. So if ascending, + # fill with lowest value of type to end up with lowest rank. + # If descending, fill with highest value since descending + # will flip the ordering to still end up with lowest rank. + # Symmetric logic applies to `na_option == 'bottom'` + nans_rank_highest = ascending ^ (na_option == 'top') + nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) + if nans_rank_highest: + order = [masked_vals, mask] + else: + order = [masked_vals, ~(np.asarray(mask))] + + if check_labels: + order.append(labels) + + np.putmask(masked_vals, mask, nan_fill_val) + # putmask doesn't accept a memoryview, so we assign as a separate step + masked_vals_memview = masked_vals + + # lexsort using labels, then mask, then actual values + # each label corresponds to a different group value, + # the mask helps you differentiate missing values before + # performing sort on the actual values + lexsort_indexer = np.lexsort(order).astype(np.intp, copy=False) + + if not ascending: + lexsort_indexer = lexsort_indexer[::-1] + + with nogil: + rank_sorted_1d( + out, + grp_sizes, + lexsort_indexer, + masked_vals_memview, + mask, + check_mask=check_mask, + N=N, + tiebreak=tiebreak, + keep_na=keep_na, + pct=pct, + labels=labels, + ) + + return np.asarray(out) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void rank_sorted_1d( + float64_t[::1] out, + int64_t[::1] grp_sizes, + const intp_t[:] sort_indexer, + # TODO(cython3): make const (https://github.com/cython/cython/issues/3222) + numeric_object_t[:] masked_vals, + const uint8_t[:] mask, + bint check_mask, + Py_ssize_t N, + TiebreakEnumType tiebreak=TIEBREAK_AVERAGE, + bint keep_na=True, + bint pct=False, + # https://github.com/cython/cython/issues/1630, only trailing arguments can + # currently be omitted for cdef functions, which is why we keep this at the end + const intp_t[:] labels=None, +) nogil: + """ + See rank_1d.__doc__. Handles only actual ranking, so sorting and masking should + be handled in the caller. Note that `out` and `grp_sizes` are modified inplace. + + Parameters + ---------- + out : float64_t[::1] + Array to store computed ranks + grp_sizes : int64_t[::1] + Array to store group counts, only used if pct=True. Should only be None + if labels is None. + sort_indexer : intp_t[:] + Array of indices which sorts masked_vals + masked_vals : numeric_object_t[:] + The values input to rank_1d, with missing values replaced by fill values + mask : uint8_t[:] + Array where entries are True if the value is missing, False otherwise. + check_mask : bool + If False, assumes the mask is all False to skip mask indexing + N : Py_ssize_t + The number of elements to rank. Note: it is not always true that + N == len(out) or N == len(masked_vals) (see `nancorr_spearman` usage for why) + tiebreak : TiebreakEnumType, default TIEBREAK_AVERAGE + See rank_1d.__doc__ for the different modes + keep_na : bool, default True + Whether or not to keep nulls + pct : bool, default False + Compute percentage rank of data within each group + labels : See rank_1d.__doc__, default None. None implies all labels are the same. + """ + + cdef: + Py_ssize_t i, j, dups=0, sum_ranks=0, + Py_ssize_t grp_start=0, grp_vals_seen=1, grp_na_count=0 + bint at_end, next_val_diff, group_changed, check_labels + int64_t grp_size + + check_labels = labels is not None + + # Loop over the length of the value array + # each incremental i value can be looked up in the lexsort_indexer + # array that we sorted previously, which gives us the location of + # that sorted value for retrieval back from the original + # values / masked_vals arrays + # TODO(cython3): de-duplicate once cython supports conditional nogil + if numeric_object_t is object: + with gil: + for i in range(N): + at_end = i == N - 1 + + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change. Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]], + masked_vals[sort_indexer[i+1]]) + + # We'll need this check later anyway to determine group size, so just + # compute it here since shortcircuiting won't help + group_changed = at_end or (check_labels and + (labels[sort_indexer[i]] + != labels[sort_indexer[i+1]])) + + # Update out only when there is a transition of values or labels. + # When a new value or group is encountered, go back #dups steps( + # the number of occurrence of current value) and assign the ranks + # based on the starting index of the current group (grp_start) + # and the current index + if (next_val_diff or group_changed or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + + # If keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and check_mask and mask[sort_indexer[i]]: + grp_na_count = dups + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = NaN + elif tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start + 1 + + # With n as the previous rank in the group and m as the number + # of duplicates in this stretch, if TIEBREAK_FIRST and ascending, + # then rankings should be n + 1, n + 2 ... n + m + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = j + 1 - grp_start + + # If TIEBREAK_FIRST and descending, the ranking should be + # n + m, n + (m - 1) ... n + 1. This is equivalent to + # (i - dups + 1) + (i - j + 1) - grp_start + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = grp_vals_seen + + # Look forward to the next value (using the sorting in + # lexsort_indexer). If the value does not equal the current + # value then we need to reset the dups and sum_ranks, knowing + # that a new value is coming up. The conditional also needs + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + dups = sum_ranks = 0 + grp_vals_seen += 1 + + # Similar to the previous conditional, check now if we are + # moving to a new group. If so, keep track of the index where + # the new group occurs, so the tiebreaker calculations can + # decrement that from their position. Fill in the size of each + # group encountered (used by pct calculations later). Also be + # sure to reset any of the items helping to calculate dups + if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group + if tiebreak != TIEBREAK_DENSE: + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count + else: + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[sort_indexer[j]] = grp_size + + dups = sum_ranks = 0 + grp_na_count = 0 + grp_start = i + 1 + grp_vals_seen = 1 + else: + for i in range(N): + at_end = i == N - 1 + + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change. Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + next_val_diff = at_end or (masked_vals[sort_indexer[i]] + != masked_vals[sort_indexer[i+1]]) + + # We'll need this check later anyway to determine group size, so just + # compute it here since shortcircuiting won't help + group_changed = at_end or (check_labels and + (labels[sort_indexer[i]] + != labels[sort_indexer[i+1]])) + + # Update out only when there is a transition of values or labels. + # When a new value or group is encountered, go back #dups steps( + # the number of occurrence of current value) and assign the ranks + # based on the starting index of the current group (grp_start) + # and the current index + if (next_val_diff or group_changed + or (check_mask and + (mask[sort_indexer[i]] ^ mask[sort_indexer[i+1]]))): + + # If keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and check_mask and mask[sort_indexer[i]]: + grp_na_count = dups + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = NaN + elif tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = i - grp_start + 1 + + # With n as the previous rank in the group and m as the number + # of duplicates in this stretch, if TIEBREAK_FIRST and ascending, + # then rankings should be n + 1, n + 2 ... n + m + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = j + 1 - grp_start + + # If TIEBREAK_FIRST and descending, the ranking should be + # n + m, n + (m - 1) ... n + 1. This is equivalent to + # (i - dups + 1) + (i - j + 1) - grp_start + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[sort_indexer[j]] = grp_vals_seen + + # Look forward to the next value (using the sorting in + # lexsort_indexer). If the value does not equal the current + # value then we need to reset the dups and sum_ranks, knowing + # that a new value is coming up. The conditional also needs + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff + or (check_mask and + (mask[sort_indexer[i]] + ^ mask[sort_indexer[i+1]]))): + dups = sum_ranks = 0 + grp_vals_seen += 1 + + # Similar to the previous conditional, check now if we are + # moving to a new group. If so, keep track of the index where + # the new group occurs, so the tiebreaker calculations can + # decrement that from their position. Fill in the size of each + # group encountered (used by pct calculations later). Also be + # sure to reset any of the items helping to calculate dups + if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group + if tiebreak != TIEBREAK_DENSE: + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count + else: + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[sort_indexer[j]] = grp_size + + dups = sum_ranks = 0 + grp_na_count = 0 + grp_start = i + 1 + grp_vals_seen = 1 + + if pct: + for i in range(N): + if grp_sizes[i] != 0: + out[i] = out[i] / grp_sizes[i] + + +def rank_2d( + ndarray[numeric_object_t, ndim=2] in_arr, + int axis=0, + bint is_datetimelike=False, + ties_method="average", + bint ascending=True, + na_option="keep", + bint pct=False, +): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + """ + cdef: + Py_ssize_t k, n, col + float64_t[::1, :] out # Column-major so columns are contiguous + int64_t[::1] grp_sizes + ndarray[numeric_object_t, ndim=2] values + numeric_object_t[:, :] masked_vals + intp_t[:, :] sort_indexer + uint8_t[:, :] mask + TiebreakEnumType tiebreak + bint check_mask, keep_na, nans_rank_highest + numeric_object_t nan_fill_val + + tiebreak = tiebreakers[ties_method] + if tiebreak == TIEBREAK_FIRST: + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + + keep_na = na_option == 'keep' + + # For cases where a mask is not possible, we can avoid mask checks + check_mask = ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + or numeric_object_t is object + or (numeric_object_t is int64_t and is_datetimelike) + ) + + if axis == 1: + values = np.asarray(in_arr).T.copy() + else: + values = np.asarray(in_arr).copy() + + if numeric_object_t is object: + if values.dtype != np.object_: + values = values.astype('O') + + nans_rank_highest = ascending ^ (na_option == 'top') + if check_mask: + nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) + + if numeric_object_t is object: + mask = missing.isnaobj2d(values).view(np.uint8) + elif numeric_object_t is float64_t or numeric_object_t is float32_t: + mask = np.isnan(values).view(np.uint8) + else: + # i.e. int64 and datetimelike + mask = (values == NPY_NAT).view(np.uint8) + np.putmask(values, mask, nan_fill_val) + else: + mask = np.zeros_like(values, dtype=np.uint8) + + if nans_rank_highest: + order = (values, mask) + else: + order = (values, ~np.asarray(mask)) + + n, k = (values).shape + out = np.empty((n, k), dtype='f8', order='F') + grp_sizes = np.ones(n, dtype=np.int64) + + # lexsort is slower, so only use if we need to worry about the mask + if check_mask: + sort_indexer = np.lexsort(order, axis=0).astype(np.intp, copy=False) + else: + kind = "stable" if ties_method == "first" else None + sort_indexer = values.argsort(axis=0, kind=kind).astype(np.intp, copy=False) + + if not ascending: + sort_indexer = sort_indexer[::-1, :] + + # putmask doesn't accept a memoryview, so we assign in a separate step + masked_vals = values + with nogil: + for col in range(k): + rank_sorted_1d( + out[:, col], + grp_sizes, + sort_indexer[:, col], + masked_vals[:, col], + mask[:, col], + check_mask=check_mask, + N=n, + tiebreak=tiebreak, + keep_na=keep_na, + pct=pct, + ) + + if axis == 1: + return np.asarray(out.T) + else: + return np.asarray(out) + + +ctypedef fused diff_t: + float64_t + float32_t + int8_t + int16_t + int32_t + int64_t + +ctypedef fused out_t: + float32_t + float64_t + int64_t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d( + ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr" + ndarray[out_t, ndim=2] out, + Py_ssize_t periods, + int axis, + bint datetimelike=False, +): + cdef: + Py_ssize_t i, j, sx, sy, start, stop + bint f_contig = arr.flags.f_contiguous + # bint f_contig = arr.is_f_contig() # TODO(cython3) + diff_t left, right + + # Disable for unsupported dtype combinations, + # see https://github.com/cython/cython/issues/2646 + if (out_t is float32_t + and not (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError # pragma: no cover + elif (out_t is float64_t + and (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError # pragma: no cover + elif out_t is int64_t and diff_t is not int64_t: + # We only have out_t of int64_t if we have datetimelike + raise NotImplementedError # pragma: no cover + else: + # We put this inside an indented else block to avoid cython build + # warnings about unreachable code + sx, sy = (arr).shape + with nogil: + if f_contig: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + left = arr[i, j] + right = arr[i - periods, j] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + left = arr[i, j] + right = arr[i, j - periods] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + left = arr[i, j] + right = arr[i - periods, j] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + left = arr[i, j] + right = arr[i, j - periods] + if out_t is int64_t and datetimelike: + if left == NPY_NAT or right == NPY_NAT: + out[i, j] = NPY_NAT + else: + out[i, j] = left - right + else: + out[i, j] = left - right + + +# generated from template +include "algos_common_helper.pxi" +include "algos_take_helper.pxi" diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in new file mode 100644 index 00000000..ce2e1ffb --- /dev/null +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -0,0 +1,73 @@ +""" +Template for each `dtype` helper function using 1-d template + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# ensure_dtype +# ---------------------------------------------------------------------- + + +def ensure_platform_int(object arr): + # GH3033, GH1392 + # platform int is the size of the int pointer, e.g. np.intp + if util.is_array(arr): + if (arr).descr.type_num == cnp.NPY_INTP: + return arr + else: + # equiv: arr.astype(np.intp) + return cnp.PyArray_Cast(arr, cnp.NPY_INTP) + else: + return np.array(arr, dtype=np.intp) + + +def ensure_object(object arr): + if util.is_array(arr): + if (arr).descr.type_num == NPY_OBJECT: + return arr + else: + # equiv: arr.astype(object) + return cnp.PyArray_Cast(arr, NPY_OBJECT) + else: + return np.array(arr, dtype=np.object_) + +{{py: + +# name, c_type, dtype +dtypes = [('float64', 'FLOAT64', 'float64'), + # ('float32', 'FLOAT32', 'float32'), # disabling bc unused + ('int8', 'INT8', 'int8'), + ('int16', 'INT16', 'int16'), + ('int32', 'INT32', 'int32'), + ('int64', 'INT64', 'int64'), + ('uint64', 'UINT64', 'uint64'), + # Disabling uint and complex dtypes because we do not use them + # (and compiling them increases wheel size) (except uint64) + # ('uint8', 'UINT8', 'uint8'), + # ('uint16', 'UINT16', 'uint16'), + # ('uint32', 'UINT32', 'uint32'), + # ('complex64', 'COMPLEX64', 'complex64'), + # ('complex128', 'COMPLEX128', 'complex128') +] + +def get_dispatch(dtypes): + + for name, c_type, dtype in dtypes: + yield name, c_type, dtype +}} + +{{for name, c_type, dtype in get_dispatch(dtypes)}} + + +def ensure_{{name}}(object arr, copy=True): + if util.is_array(arr): + if (arr).descr.type_num == NPY_{{c_type}}: + return arr + else: + # equiv: arr.astype(np.{{dtype}}, copy=copy) + return cnp.PyArray_Cast(arr, cnp.NPY_{{c_type}}) + else: + return np.array(arr, dtype=np.{{dtype}}) + +{{endfor}} diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in new file mode 100644 index 00000000..2a385867 --- /dev/null +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -0,0 +1,222 @@ +""" +Template for each `dtype` helper function for take + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# take_1d, take_2d +# ---------------------------------------------------------------------- + + +{{py: + +# c_type_in, c_type_out +dtypes = [ + ('uint8_t', 'uint8_t'), + ('uint8_t', 'object'), + ('int8_t', 'int8_t'), + ('int8_t', 'int32_t'), + ('int8_t', 'int64_t'), + ('int8_t', 'float64_t'), + ('int16_t', 'int16_t'), + ('int16_t', 'int32_t'), + ('int16_t', 'int64_t'), + ('int16_t', 'float64_t'), + ('int32_t', 'int32_t'), + ('int32_t', 'int64_t'), + ('int32_t', 'float64_t'), + ('int64_t', 'int64_t'), + ('int64_t', 'float64_t'), + ('float32_t', 'float32_t'), + ('float32_t', 'float64_t'), + ('float64_t', 'float64_t'), + ('object', 'object'), +] + + +def get_dispatch(dtypes): + + for (c_type_in, c_type_out) in dtypes: + + def get_name(dtype_name): + if dtype_name == "object": + return "object" + if dtype_name == "uint8_t": + return "bool" + return dtype_name[:-2] + + name = get_name(c_type_in) + dest = get_name(c_type_out) + + args = dict(name=name, dest=dest, c_type_in=c_type_in, + c_type_out=c_type_out) + + yield (name, dest, c_type_in, c_type_out) + +}} + + +{{for name, dest, c_type_in, c_type_out in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values, +{{else}} +def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, +{{endif}} + const intp_t[:] indexer, + {{c_type_out}}[:] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, n, idx + {{c_type_out}} fv + + n = indexer.shape[0] + + fv = fill_value + + {{if c_type_out != "object"}} + with nogil: + {{else}} + if True: + {{endif}} + for i in range(n): + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i] = True if values[idx] > 0 else False + {{else}} + out[i] = values[idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[intp_t, ndim=1] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + {{if c_type_in == c_type_out != "object"}} + const {{c_type_out}} *v + {{c_type_out}} *o + {{endif}} + + n = len(indexer) + k = values.shape[1] + + fv = fill_value + + {{if c_type_in == c_type_out != "object"}} + # GH#3130 + if (values.strides[1] == out.strides[1] and + values.strides[1] == sizeof({{c_type_out}}) and + sizeof({{c_type_out}}) * n >= 256): + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + v = &values[idx, 0] + o = &out[i, 0] + memmove(o, v, (sizeof({{c_type_out}}) * k)) + return + {{endif}} + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, j] > 0 else False + {{else}} + out[i, j] = values[idx, j] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[intp_t, ndim=1] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(values) + k = len(indexer) + + if n == 0 or k == 0: + return + + fv = fill_value + + for i in range(n): + for j in range(k): + idx = indexer[j] + if idx == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[i, idx] > 0 else False + {{else}} + out[i, j] = values[i, idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, + indexer, + ndarray[{{c_type_out}}, ndim=2] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[intp_t, ndim=1] idx0 = indexer[0] + ndarray[intp_t, ndim=1] idx1 = indexer[1] + {{c_type_out}} fv + + n = len(idx0) + k = len(idx1) + + fv = fill_value + for i in range(n): + idx = idx0[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + if idx1[j] == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, idx1[j]] > 0 else False + {{else}} + out[i, j] = values[idx, idx1[j]] + {{endif}} + +{{endfor}} diff --git a/pandas/_libs/arrays.pxd b/pandas/_libs/arrays.pxd new file mode 100644 index 00000000..737da29d --- /dev/null +++ b/pandas/_libs/arrays.pxd @@ -0,0 +1,11 @@ + +from numpy cimport ndarray + + +cdef class NDArrayBacked: + cdef: + readonly ndarray _ndarray + readonly object _dtype + + cpdef NDArrayBacked _from_backing_data(self, ndarray values) + cpdef __setstate__(self, state) diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi new file mode 100644 index 00000000..76e4321c --- /dev/null +++ b/pandas/_libs/arrays.pyi @@ -0,0 +1,34 @@ +from typing import Sequence + +import numpy as np + +from pandas._typing import ( + DtypeObj, + Shape, +) + +class NDArrayBacked: + _dtype: DtypeObj + _ndarray: np.ndarray + def __init__(self, values: np.ndarray, dtype: DtypeObj): ... + @classmethod + def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ... + def _from_backing_data(self, values: np.ndarray): ... + def __setstate__(self, state): ... + def __len__(self) -> int: ... + @property + def shape(self) -> Shape: ... + @property + def ndim(self) -> int: ... + @property + def size(self) -> int: ... + @property + def nbytes(self) -> int: ... + def copy(self): ... + def delete(self, loc, axis=...): ... + def swapaxes(self, axis1, axis2): ... + def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ... + def reshape(self, *args, **kwargs): ... + def ravel(self, order=...): ... + @property + def T(self): ... diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx new file mode 100644 index 00000000..f63d16e8 --- /dev/null +++ b/pandas/_libs/arrays.pyx @@ -0,0 +1,183 @@ +""" +Cython implementations for internal ExtensionArrays. +""" +cimport cython + +import numpy as np + +cimport numpy as cnp +from cpython cimport PyErr_Clear +from numpy cimport ndarray + +cnp.import_array() + + +@cython.freelist(16) +cdef class NDArrayBacked: + """ + Implementing these methods in cython improves performance quite a bit. + + import pandas as pd + + from pandas._libs.arrays import NDArrayBacked as cls + + dti = pd.date_range("2016-01-01", periods=3) + dta = dti._data + arr = dta._ndarray + + obj = cls._simple_new(arr, arr.dtype) + + # for foo in [arr, dta, obj]: ... + + %timeit foo.copy() + 299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference) + 530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked + 1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked + 328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__ + 371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new + + %timeit foo.T + 125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference) + 226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked + 911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked + 215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new + + """ + # TODO: implement take in terms of cnp.PyArray_TakeFrom + # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate + + # cdef: + # readonly ndarray _ndarray + # readonly object _dtype + + def __init__(self, ndarray values, object dtype): + self._ndarray = values + self._dtype = dtype + + @classmethod + def _simple_new(cls, ndarray values, object dtype): + cdef: + NDArrayBacked obj + obj = NDArrayBacked.__new__(cls) + obj._ndarray = values + obj._dtype = dtype + return obj + + cpdef NDArrayBacked _from_backing_data(self, ndarray values): + """ + Construct a new ExtensionArray `new_array` with `arr` as its _ndarray. + + This should round-trip: + self == self._from_backing_data(self._ndarray) + """ + # TODO: re-reuse simple_new if/when it can be cpdef + cdef: + NDArrayBacked obj + obj = NDArrayBacked.__new__(type(self)) + obj._ndarray = values + obj._dtype = self._dtype + return obj + + cpdef __setstate__(self, state): + if isinstance(state, dict): + if "_data" in state: + data = state.pop("_data") + elif "_ndarray" in state: + data = state.pop("_ndarray") + else: + raise ValueError # pragma: no cover + self._ndarray = data + self._dtype = state.pop("_dtype") + + for key, val in state.items(): + setattr(self, key, val) + elif isinstance(state, tuple): + if len(state) != 3: + if len(state) == 1 and isinstance(state[0], dict): + self.__setstate__(state[0]) + return + raise NotImplementedError(state) # pragma: no cover + + data, dtype = state[:2] + if isinstance(dtype, np.ndarray): + dtype, data = data, dtype + self._ndarray = data + self._dtype = dtype + + if isinstance(state[2], dict): + for key, val in state[2].items(): + setattr(self, key, val) + else: + raise NotImplementedError(state) # pragma: no cover + else: + raise NotImplementedError(state) # pragma: no cover + + def __len__(self) -> int: + return len(self._ndarray) + + @property + def shape(self): + # object cast bc _ndarray.shape is npy_intp* + return ((self._ndarray)).shape + + @property + def ndim(self) -> int: + return self._ndarray.ndim + + @property + def size(self) -> int: + return self._ndarray.size + + @property + def nbytes(self) -> int: + return self._ndarray.nbytes + + def copy(self, order="C"): + cdef: + cnp.NPY_ORDER order_code + int success + + success = cnp.PyArray_OrderConverter(order, &order_code) + if not success: + # clear exception so that we don't get a SystemError + PyErr_Clear() + # same message used by numpy + msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')" + raise ValueError(msg) + + res_values = cnp.PyArray_NewCopy(self._ndarray, order_code) + return self._from_backing_data(res_values) + + def delete(self, loc, axis=0): + res_values = np.delete(self._ndarray, loc, axis=axis) + return self._from_backing_data(res_values) + + def swapaxes(self, axis1, axis2): + res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2) + return self._from_backing_data(res_values) + + # TODO: pass NPY_MAXDIMS equiv to axis=None? + def repeat(self, repeats, axis: int | np.integer = 0): + if axis is None: + axis = 0 + res_values = cnp.PyArray_Repeat(self._ndarray, repeats, axis) + return self._from_backing_data(res_values) + + def reshape(self, *args, **kwargs): + res_values = self._ndarray.reshape(*args, **kwargs) + return self._from_backing_data(res_values) + + def ravel(self, order="C"): + # cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order) + # res_values = cnp.PyArray_Ravel(self._ndarray, order) + res_values = self._ndarray.ravel(order) + return self._from_backing_data(res_values) + + @property + def T(self): + res_values = self._ndarray.T + return self._from_backing_data(res_values) + + def transpose(self, *axes): + res_values = self._ndarray.transpose(*axes) + return self._from_backing_data(res_values) diff --git a/pandas/_libs/dtypes.pxd b/pandas/_libs/dtypes.pxd new file mode 100644 index 00000000..ccfb2d2e --- /dev/null +++ b/pandas/_libs/dtypes.pxd @@ -0,0 +1,36 @@ +""" +Common location for shared fused types +""" + +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# All numeric types except complex +ctypedef fused numeric_t: + int8_t + int16_t + int32_t + int64_t + + uint8_t + uint16_t + uint32_t + uint64_t + + float32_t + float64_t + +# All numeric types + object, doesn't include complex +ctypedef fused numeric_object_t: + numeric_t + object diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi new file mode 100644 index 00000000..04db0c9b --- /dev/null +++ b/pandas/_libs/groupby.pyi @@ -0,0 +1,182 @@ +from typing import Literal + +import numpy as np + +from pandas._typing import npt + +def group_median_float64( + out: np.ndarray, # ndarray[float64_t, ndim=2] + counts: npt.NDArray[np.int64], + values: np.ndarray, # ndarray[float64_t, ndim=2] + labels: npt.NDArray[np.int64], + min_count: int = ..., # Py_ssize_t +) -> None: ... +def group_cumprod_float64( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # const float64_t[:, :] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., +) -> None: ... +def group_cumsum( + out: np.ndarray, # int64float_t[:, ::1] + values: np.ndarray, # ndarray[int64float_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_shift_indexer( + out: np.ndarray, # int64_t[::1] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + periods: int, +) -> None: ... +def group_fillna_indexer( + out: np.ndarray, # ndarray[intp_t] + labels: np.ndarray, # ndarray[int64_t] + sorted_labels: npt.NDArray[np.intp], + mask: npt.NDArray[np.uint8], + direction: Literal["ffill", "bfill"], + limit: int, # int64_t + dropna: bool, +) -> None: ... +def group_any_all( + out: np.ndarray, # uint8_t[::1] + values: np.ndarray, # const uint8_t[::1] + labels: np.ndarray, # const int64_t[:] + mask: np.ndarray, # const uint8_t[::1] + val_test: Literal["any", "all"], + skipna: bool, +) -> None: ... +def group_sum( + out: np.ndarray, # complexfloatingintuint_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[complexfloatingintuint_t, ndim=2] + labels: np.ndarray, # const intp_t[:] + mask: np.ndarray | None, + result_mask: np.ndarray | None = ..., + min_count: int = ..., + is_datetimelike: bool = ..., +) -> None: ... +def group_prod( + out: np.ndarray, # int64float_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[int64float_t, ndim=2] + labels: np.ndarray, # const intp_t[:] + mask: np.ndarray | None, + result_mask: np.ndarray | None = ..., + min_count: int = ..., +) -> None: ... +def group_var( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., # Py_ssize_t + ddof: int = ..., # int64_t +) -> None: ... +def group_mean( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., # Py_ssize_t + is_datetimelike: bool = ..., # bint + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_ohlc( + out: np.ndarray, # floatingintuint_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floatingintuint_t, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_quantile( + out: npt.NDArray[np.float64], + values: np.ndarray, # ndarray[numeric, ndim=1] + labels: npt.NDArray[np.intp], + mask: npt.NDArray[np.uint8], + sort_indexer: npt.NDArray[np.intp], # const + qs: npt.NDArray[np.float64], # const + interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"], +) -> None: ... +def group_last( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + mask: npt.NDArray[np.bool_] | None, + result_mask: npt.NDArray[np.bool_] | None = ..., + min_count: int = ..., # Py_ssize_t + is_datetimelike: bool = ..., +) -> None: ... +def group_nth( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + mask: npt.NDArray[np.bool_] | None, + result_mask: npt.NDArray[np.bool_] | None = ..., + min_count: int = ..., # int64_t + rank: int = ..., # int64_t + is_datetimelike: bool = ..., +) -> None: ... +def group_rank( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + ties_method: Literal["average", "min", "max", "first", "dense"] = ..., + ascending: bool = ..., + pct: bool = ..., + na_option: Literal["keep", "top", "bottom"] = ..., + mask: npt.NDArray[np.bool_] | None = ..., +) -> None: ... +def group_max( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., + is_datetimelike: bool = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_min( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., + is_datetimelike: bool = ..., + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., +) -> None: ... +def group_cummin( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., + skipna: bool = ..., +) -> None: ... +def group_cummax( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + mask: np.ndarray | None = ..., + result_mask: np.ndarray | None = ..., + skipna: bool = ..., +) -> None: ... diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx new file mode 100644 index 00000000..299dfdf1 --- /dev/null +++ b/pandas/_libs/groupby.pyx @@ -0,0 +1,1782 @@ +cimport cython +from cython cimport ( + Py_ssize_t, + floating, +) +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) +from numpy.math cimport NAN + +cnp.import_array() + +from pandas._libs cimport util +from pandas._libs.algos cimport ( + get_rank_nan_fill_val, + kth_smallest_c, +) + +from pandas._libs.algos import ( + ensure_platform_int, + groupsort_indexer, + rank_1d, + take_2d_axis1_float64_float64, +) + +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) +from pandas._libs.missing cimport checknull + + +cdef int64_t NPY_NAT = util.get_nat() +_int64_max = np.iinfo(np.int64).max + +cdef float64_t NaN = np.NaN + +cdef enum InterpolationEnumType: + INTERPOLATION_LINEAR, + INTERPOLATION_LOWER, + INTERPOLATION_HIGHER, + INTERPOLATION_NEAREST, + INTERPOLATION_MIDPOINT + + +cdef inline float64_t median_linear(float64_t* a, int n) nogil: + cdef: + int i, j, na_count = 0 + float64_t result + float64_t* tmp + + if n == 0: + return NaN + + # count NAs + for i in range(n): + if a[i] != a[i]: + na_count += 1 + + if na_count: + if na_count == n: + return NaN + + tmp = malloc((n - na_count) * sizeof(float64_t)) + + j = 0 + for i in range(n): + if a[i] == a[i]: + tmp[j] = a[i] + j += 1 + + a = tmp + n -= na_count + + if n % 2: + result = kth_smallest_c(a, n // 2, n) + else: + result = (kth_smallest_c(a, n // 2, n) + + kth_smallest_c(a, n // 2 - 1, n)) / 2 + + if na_count: + free(a) + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_median_float64( + ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[intp_t] labels, + Py_ssize_t min_count=-1, +) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, size + ndarray[intp_t] _counts + ndarray[float64_t, ndim=2] data + ndarray[intp_t] indexer + float64_t* ptr + + assert min_count == -1, "'min_count' only used in sum and prod" + + ngroups = len(counts) + N, K = (values).shape + + indexer, _counts = groupsort_indexer(labels, ngroups) + counts[:] = _counts[1:] + + data = np.empty((K, N), dtype=np.float64) + ptr = cnp.PyArray_DATA(data) + + take_2d_axis1_float64_float64(values.T, indexer, out=data) + + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = median_linear(ptr, size) + ptr += size + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumprod_float64( + float64_t[:, ::1] out, + const float64_t[:, :] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + bint skipna=True, +) -> None: + """ + Cumulative product of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Array to store cumprod in. + values : np.ndarray[np.float64, ndim=2] + Values to take cumprod of. + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + Always false, `values` is never datetime-like. + skipna : bool + If true, ignore nans in `values`. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + float64_t val + float64_t[:, ::1] accum + intp_t lab + + N, K = (values).shape + accum = np.ones((ngroups, K), dtype=np.float64) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + if val == val: + accum[lab, j] *= val + out[i, j] = accum[lab, j] + else: + out[i, j] = NaN + if not skipna: + accum[lab, j] = NaN + + +ctypedef fused int64float_t: + int64_t + uint64_t + float32_t + float64_t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumsum( + int64float_t[:, ::1] out, + ndarray[int64float_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + bint skipna=True, + const uint8_t[:, :] mask=None, + uint8_t[:, ::1] result_mask=None, +) -> None: + """ + Cumulative sum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[ndim=2] + Array to store cumsum in. + values : np.ndarray[ndim=2] + Values to take cumsum of. + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + skipna : bool + If true, ignore nans in `values`. + mask: np.ndarray[uint8], optional + Mask of values + result_mask: np.ndarray[int8], optional + Mask of out array + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + int64float_t val, y, t, na_val + int64float_t[:, ::1] accum, compensation + uint8_t[:, ::1] accum_mask + intp_t lab + bint isna_entry, isna_prev = False + bint uses_mask = mask is not None + + N, K = (values).shape + + if uses_mask: + accum_mask = np.zeros((ngroups, K), dtype="uint8") + + accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype) + compensation = np.zeros((ngroups, K), dtype=np.asarray(values).dtype) + + na_val = _get_na_val(0, is_datetimelike) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not skipna: + if uses_mask: + isna_prev = accum_mask[lab, j] + else: + isna_prev = _treat_as_na(accum[lab, j], is_datetimelike) + + if isna_prev: + if uses_mask: + result_mask[i, j] = True + # Be deterministic, out was initialized as empty + out[i, j] = 0 + else: + out[i, j] = na_val + continue + + if isna_entry: + + if uses_mask: + result_mask[i, j] = True + # Be deterministic, out was initialized as empty + out[i, j] = 0 + else: + out[i, j] = na_val + + if not skipna: + if uses_mask: + accum_mask[lab, j] = True + else: + accum[lab, j] = na_val + + else: + # For floats, use Kahan summation to reduce floating-point + # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) + if int64float_t == float32_t or int64float_t == float64_t: + y = val - compensation[lab, j] + t = accum[lab, j] + y + compensation[lab, j] = t - accum[lab, j] - y + else: + t = val + accum[lab, j] + + accum[lab, j] = t + out[i, j] = t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_shift_indexer( + int64_t[::1] out, + const intp_t[::1] labels, + int ngroups, + int periods, +) -> None: + cdef: + Py_ssize_t N, i, j, ii, lab + int offset = 0, sign + int64_t idxer, idxer_slot + int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64) + int64_t[:, ::1] label_indexer + + N, = (labels).shape + + if periods < 0: + periods = -periods + offset = N - 1 + sign = -1 + elif periods > 0: + offset = 0 + sign = 1 + + if periods == 0: + with nogil: + for i in range(N): + out[i] = i + else: + # array of each previous indexer seen + label_indexer = np.zeros((ngroups, periods), dtype=np.int64) + with nogil: + for i in range(N): + # reverse iterator if shifting backwards + ii = offset + sign * i + lab = labels[ii] + + # Skip null keys + if lab == -1: + out[ii] = -1 + continue + + label_seen[lab] += 1 + + idxer_slot = label_seen[lab] % periods + idxer = label_indexer[lab, idxer_slot] + + if label_seen[lab] > periods: + out[ii] = idxer + else: + out[ii] = -1 + + label_indexer[lab, idxer_slot] = ii + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_fillna_indexer( + ndarray[intp_t] out, + ndarray[intp_t] labels, + ndarray[intp_t] sorted_labels, + ndarray[uint8_t] mask, + str direction, + int64_t limit, + bint dropna, +) -> None: + """ + Indexes how to fill values forwards or backwards within a group. + + Parameters + ---------- + out : np.ndarray[np.intp] + Values into which this method will write its results. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values`. + sorted_labels : np.ndarray[np.intp] + obtained by `np.argsort(labels, kind="mergesort")`; reversed if + direction == "bfill" + values : np.ndarray[np.uint8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. + direction : {'ffill', 'bfill'} + Direction for fill to be applied (forwards or backwards, respectively) + limit : Consecutive values to fill before stopping, or -1 for no limit + dropna : Flag to indicate if NaN groups should return all NaN values + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, N, idx + intp_t curr_fill_idx=-1 + int64_t filled_vals = 0 + + N = len(out) + + # Make sure all arrays are the same size + assert N == len(labels) == len(mask) + + with nogil: + for i in range(N): + idx = sorted_labels[i] + if dropna and labels[idx] == -1: # nan-group gets nan-values + curr_fill_idx = -1 + elif mask[idx] == 1: # is missing + # Stop filling once we've hit the limit + if filled_vals >= limit and limit != -1: + curr_fill_idx = -1 + filled_vals += 1 + else: # reset items when not missing + filled_vals = 0 + curr_fill_idx = idx + + out[idx] = curr_fill_idx + + # If we move to the next group, reset + # the fill_idx and counter + if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]: + curr_fill_idx = -1 + filled_vals = 0 + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_any_all( + int8_t[:, ::1] out, + const int8_t[:, :] values, + const intp_t[::1] labels, + const uint8_t[:, :] mask, + str val_test, + bint skipna, + bint nullable, +) -> None: + """ + Aggregated boolean values to show truthfulness of group elements. If the + input is a nullable type (nullable=True), the result will be computed + using Kleene logic. + + Parameters + ---------- + out : np.ndarray[np.int8] + Values into which this method will write its results. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its + ordering matching up to the corresponding record in `values` + values : np.ndarray[np.int8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. + val_test : {'any', 'all'} + String object dictating whether to use any or all truth testing + skipna : bool + Flag to ignore nan values during truth testing + nullable : bool + Whether or not the input is a nullable type. If True, the + result will be computed using Kleene logic + + Notes + ----- + This method modifies the `out` parameter rather than returning an object. + The returned values will either be 0, 1 (False or True, respectively), or + -1 to signify a masked position in the case of a nullable input. + """ + cdef: + Py_ssize_t i, j, N = len(labels), K = out.shape[1] + intp_t lab + int8_t flag_val, val + + if val_test == 'all': + # Because the 'all' value of an empty iterable in Python is True we can + # start with an array full of ones and set to zero when a False value + # is encountered + flag_val = 0 + elif val_test == 'any': + # Because the 'any' value of an empty iterable in Python is False we + # can start with an array full of zeros and set to one only if any + # value encountered is True + flag_val = 1 + else: + raise ValueError("'bool_func' must be either 'any' or 'all'!") + + out[:] = 1 - flag_val + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + for j in range(K): + if skipna and mask[i, j]: + continue + + if nullable and mask[i, j]: + # Set the position as masked if `out[lab] != flag_val`, which + # would indicate True/False has not yet been seen for any/all, + # so by Kleene logic the result is currently unknown + if out[lab, j] != flag_val: + out[lab, j] = -1 + continue + + val = values[i, j] + + # If True and 'any' or False and 'all', the result is + # already determined + if val == flag_val: + out[lab, j] = flag_val + + +# ---------------------------------------------------------------------- +# group_sum, group_prod, group_var, group_mean, group_ohlc +# ---------------------------------------------------------------------- + +ctypedef fused mean_t: + float64_t + float32_t + complex64_t + complex128_t + +ctypedef fused sum_t: + mean_t + int64_t + uint64_t + object + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_sum( + sum_t[:, ::1] out, + int64_t[::1] counts, + ndarray[sum_t, ndim=2] values, + const intp_t[::1] labels, + const uint8_t[:, :] mask, + uint8_t[:, ::1] result_mask=None, + Py_ssize_t min_count=0, + bint is_datetimelike=False, +) -> None: + """ + Only aggregates on axis=0 using Kahan summation + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + sum_t val, t, y + sum_t[:, ::1] sumx, compensation + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + bint uses_mask = mask is not None + bint isna_entry + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + # the below is equivalent to `np.zeros_like(out)` but faster + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + if sum_t is object: + # NB: this does not use 'compensation' like the non-object track does. + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if not checknull(val): + nobs[lab, j] += 1 + + if nobs[lab, j] == 1: + # i.e. we haven't added anything yet; avoid TypeError + # if e.g. val is a str and sumx[lab, j] is 0 + t = val + else: + t = sumx[lab, j] + val + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = None + + else: + out[i, j] = sumx[i, j] + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + # With dt64/td64 values, values have been cast to float64 + # instead if int64 for group_sum, but the logic + # is otherwise the same as in _treat_as_na + if uses_mask: + isna_entry = mask[i, j] + elif (sum_t is float32_t or sum_t is float64_t + or sum_t is complex64_t or sum_t is complex64_t): + # avoid warnings because of equality comparison + isna_entry = not val == val + elif sum_t is int64_t and is_datetimelike and val == NPY_NAT: + isna_entry = True + else: + isna_entry = False + + if not isna_entry: + nobs[lab, j] += 1 + y = val - compensation[lab, j] + t = sumx[lab, j] + y + compensation[lab, j] = t - sumx[lab, j] - y + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + # if we are integer dtype, not is_datetimelike, and + # not uses_mask, then getting here implies that + # counts[i] < min_count, which means we will + # be cast to float64 and masked at the end + # of WrappedCythonOp._call_cython_op. So we can safely + # set a placeholder value in out[i, j]. + if uses_mask: + result_mask[i, j] = True + elif (sum_t is float32_t or sum_t is float64_t + or sum_t is complex64_t or sum_t is complex64_t): + out[i, j] = NAN + elif sum_t is int64_t: + out[i, j] = NPY_NAT + else: + # placeholder, see above + out[i, j] = 0 + + else: + out[i, j] = sumx[i, j] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_prod( + int64float_t[:, ::1] out, + int64_t[::1] counts, + ndarray[int64float_t, ndim=2] values, + const intp_t[::1] labels, + const uint8_t[:, ::1] mask, + uint8_t[:, ::1] result_mask=None, + Py_ssize_t min_count=0, +) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + int64float_t val, count + int64float_t[:, ::1] prodx + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + bint isna_entry, uses_mask = mask is not None + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + prodx = np.ones((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + elif int64float_t is float32_t or int64float_t is float64_t: + isna_entry = not val == val + else: + isna_entry = False + + if not isna_entry: + nobs[lab, j] += 1 + prodx[lab, j] *= val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + + # else case is not possible + if uses_mask: + result_mask[i, j] = True + # Be deterministic, out was initialized as empty + out[i, j] = 0 + elif int64float_t is float32_t or int64float_t is float64_t: + out[i, j] = NAN + else: + # we only get here when < mincount which gets handled later + pass + + else: + out[i, j] = prodx[i, j] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision(True) +def group_var( + floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + int64_t ddof=1, +) -> None: + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, ct, oldmean + floating[:, ::1] mean + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + assert min_count == -1, "'min_count' only used in sum and prod" + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + mean = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + + out[:, :] = 0.0 + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + oldmean = mean[lab, j] + mean[lab, j] += (val - oldmean) / nobs[lab, j] + out[lab, j] += (val - mean[lab, j]) * (val - oldmean) + + for i in range(ncounts): + for j in range(K): + ct = nobs[i, j] + if ct <= ddof: + out[i, j] = NAN + else: + out[i, j] /= (ct - ddof) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_mean( + mean_t[:, ::1] out, + int64_t[::1] counts, + ndarray[mean_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, +) -> None: + """ + Compute the mean per label given a label assignment for each value. + NaN values are ignored. + + Parameters + ---------- + out : np.ndarray[floating] + Values into which this method will write its results. + counts : np.ndarray[int64] + A zeroed array of the same shape as labels, + populated by group sizes during algorithm. + values : np.ndarray[floating] + 2-d array of the values to find the mean of. + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its + ordering matching up to the corresponding record in `values`. + min_count : Py_ssize_t + Only used in sum and prod. Always -1. + is_datetimelike : bool + True if `values` contains datetime-like entries. + mask : ndarray[bool, ndim=2], optional + Not used. + result_mask : ndarray[bool, ndim=2], optional + Not used. + + Notes + ----- + This method modifies the `out` parameter rather than returning an object. + `counts` is modified to hold group sizes + """ + + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + mean_t val, count, y, t, nan_val + mean_t[:, ::1] sumx, compensation + int64_t[:, ::1] nobs + Py_ssize_t len_values = len(values), len_labels = len(labels) + + assert min_count == -1, "'min_count' only used in sum and prod" + + if len_values != len_labels: + raise ValueError("len(index) != len(labels)") + + # the below is equivalent to `np.zeros_like(out)` but faster + nobs = np.zeros((out).shape, dtype=np.int64) + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) + + N, K = (values).shape + nan_val = NPY_NAT if is_datetimelike else NAN + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + # not nan + if val == val and not (is_datetimelike and val == NPY_NAT): + nobs[lab, j] += 1 + y = val - compensation[lab, j] + t = sumx[lab, j] + y + compensation[lab, j] = t - sumx[lab, j] - y + sumx[lab, j] = t + + for i in range(ncounts): + for j in range(K): + count = nobs[i, j] + if nobs[i, j] == 0: + out[i, j] = nan_val + else: + out[i, j] = sumx[i, j] / count + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_ohlc( + int64float_t[:, ::1] out, + int64_t[::1] counts, + ndarray[int64float_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, +) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + int64float_t val + uint8_t[::1] first_element_set + bint isna_entry, uses_mask = not mask is None + + assert min_count == -1, "'min_count' only used in sum and prod" + + if len(labels) == 0: + return + + N, K = (values).shape + + if out.shape[1] != 4: + raise ValueError('Output array must have 4 columns') + + if K > 1: + raise NotImplementedError("Argument 'values' must have only one dimension") + + if int64float_t is float32_t or int64float_t is float64_t: + out[:] = np.nan + else: + out[:] = 0 + + first_element_set = np.zeros((counts).shape, dtype=np.uint8) + if uses_mask: + result_mask[:] = True + + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue + + counts[lab] += 1 + val = values[i, 0] + + if uses_mask: + isna_entry = mask[i, 0] + elif int64float_t is float32_t or int64float_t is float64_t: + isna_entry = val != val + else: + isna_entry = False + + if isna_entry: + continue + + if not first_element_set[lab]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val + first_element_set[lab] = True + if uses_mask: + result_mask[lab] = False + else: + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_quantile( + ndarray[float64_t, ndim=2] out, + ndarray[numeric_t, ndim=1] values, + ndarray[intp_t] labels, + ndarray[uint8_t] mask, + const intp_t[:] sort_indexer, + const float64_t[:] qs, + str interpolation, +) -> None: + """ + Calculate the quantile per group. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Array of aggregated values that will be written to. + values : np.ndarray + Array containing the values to apply the function against. + labels : ndarray[np.intp] + Array containing the unique group labels. + sort_indexer : ndarray[np.intp] + Indices describing sort order by values and labels. + qs : ndarray[float64_t] + The quantile values to search for. + interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'} + + Notes + ----- + Rather than explicitly returning a value, this function modifies the + provided `out` parameter. + """ + cdef: + Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz, k, nqs + Py_ssize_t grp_start=0, idx=0 + intp_t lab + InterpolationEnumType interp + float64_t q_val, q_idx, frac, val, next_val + int64_t[::1] counts, non_na_counts + + assert values.shape[0] == N + + if any(not (0 <= q <= 1) for q in qs): + wrong = [x for x in qs if not (0 <= x <= 1)][0] + raise ValueError( + f"Each 'q' must be between 0 and 1. Got '{wrong}' instead" + ) + + inter_methods = { + 'linear': INTERPOLATION_LINEAR, + 'lower': INTERPOLATION_LOWER, + 'higher': INTERPOLATION_HIGHER, + 'nearest': INTERPOLATION_NEAREST, + 'midpoint': INTERPOLATION_MIDPOINT, + } + interp = inter_methods[interpolation] + + nqs = len(qs) + ngroups = len(out) + counts = np.zeros(ngroups, dtype=np.int64) + non_na_counts = np.zeros(ngroups, dtype=np.int64) + + # First figure out the size of every group + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: # NA group label + continue + + counts[lab] += 1 + if not mask[i]: + non_na_counts[lab] += 1 + + with nogil: + for i in range(ngroups): + # Figure out how many group elements there are + grp_sz = counts[i] + non_na_sz = non_na_counts[i] + + if non_na_sz == 0: + for k in range(nqs): + out[i, k] = NaN + else: + for k in range(nqs): + q_val = qs[k] + + # Calculate where to retrieve the desired value + # Casting to int will intentionally truncate result + idx = grp_start + (q_val * (non_na_sz - 1)) + + val = values[sort_indexer[idx]] + # If requested quantile falls evenly on a particular index + # then write that index's value out. Otherwise interpolate + q_idx = q_val * (non_na_sz - 1) + frac = q_idx % 1 + + if frac == 0.0 or interp == INTERPOLATION_LOWER: + out[i, k] = val + else: + next_val = values[sort_indexer[idx + 1]] + if interp == INTERPOLATION_LINEAR: + out[i, k] = val + (next_val - val) * frac + elif interp == INTERPOLATION_HIGHER: + out[i, k] = next_val + elif interp == INTERPOLATION_MIDPOINT: + out[i, k] = (val + next_val) / 2.0 + elif interp == INTERPOLATION_NEAREST: + if frac > .5 or (frac == .5 and q_val > .5): # Always OK? + out[i, k] = next_val + else: + out[i, k] = val + + # Increment the index reference in sorted_arr for the next group + grp_start += grp_sz + + +# ---------------------------------------------------------------------- +# group_nth, group_last, group_rank +# ---------------------------------------------------------------------- + +cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: + if numeric_object_t is object: + # Should never be used, but we need to avoid the `val != val` below + # or else cython will raise about gil acquisition. + raise NotImplementedError + + elif numeric_object_t is int64_t: + return is_datetimelike and val == NPY_NAT + elif numeric_object_t is float32_t or numeric_object_t is float64_t: + return val != val + else: + # non-datetimelike integer + return False + + +cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): + """ + Find either the min or the max supported by numeric_object_t; 'val' is a + placeholder to effectively make numeric_object_t an argument. + """ + return get_rank_nan_fill_val( + not compute_max, + val=val, + is_datetimelike=is_datetimelike, + ) + + +cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike): + cdef: + numeric_t na_val + + if numeric_t == float32_t or numeric_t == float64_t: + na_val = NaN + elif numeric_t is int64_t and is_datetimelike: + na_val = NPY_NAT + else: + # Used in case of masks + na_val = 0 + return na_val + + +# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can +# use `const numeric_object_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_last( + numeric_object_t[:, ::1] out, + int64_t[::1] counts, + ndarray[numeric_object_t, ndim=2] values, + const intp_t[::1] labels, + const uint8_t[:, :] mask, + uint8_t[:, ::1] result_mask=None, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, +) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + numeric_object_t val + ndarray[numeric_object_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint uses_mask = mask is not None + bint isna_entry + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + if numeric_object_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if numeric_object_t is object: + # TODO(cython3): De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = checknull(val) + + if not isna_entry: + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = None + else: + out[i, j] = resx[i, j] + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not isna_entry: + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + # TODO(cython3): the entire next block can be shared + # across 3 places once conditional-nogil is available + if nobs[i, j] < min_count: + # if we are integer dtype, not is_datetimelike, and + # not uses_mask, then getting here implies that + # counts[i] < min_count, which means we will + # be cast to float64 and masked at the end + # of WrappedCythonOp._call_cython_op. So we can safely + # set a placeholder value in out[i, j]. + if uses_mask: + result_mask[i, j] = True + elif numeric_object_t is float32_t or numeric_object_t is float64_t: + out[i, j] = NAN + elif numeric_object_t is int64_t: + # Per above, this is a placeholder in + # non-is_datetimelike cases. + out[i, j] = NPY_NAT + else: + # placeholder, see above + out[i, j] = 0 + else: + out[i, j] = resx[i, j] + + +# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can +# use `const numeric_object_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_nth( + numeric_object_t[:, ::1] out, + int64_t[::1] counts, + ndarray[numeric_object_t, ndim=2] values, + const intp_t[::1] labels, + const uint8_t[:, :] mask, + uint8_t[:, ::1] result_mask=None, + int64_t min_count=-1, + int64_t rank=1, + bint is_datetimelike=False, +) -> None: + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + numeric_object_t val + ndarray[numeric_object_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint uses_mask = mask is not None + bint isna_entry + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + if numeric_object_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if numeric_object_t is object: + # TODO(cython3): De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = checknull(val) + + if not isna_entry: + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = None + else: + out[i, j] = resx[i, j] + + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not isna_entry: + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + # TODO: de-dup this whole block with group_last? + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + # if we are integer dtype, not is_datetimelike, and + # not uses_mask, then getting here implies that + # counts[i] < min_count, which means we will + # be cast to float64 and masked at the end + # of WrappedCythonOp._call_cython_op. So we can safely + # set a placeholder value in out[i, j]. + if uses_mask: + result_mask[i, j] = True + # set out[i, j] to 0 to be deterministic, as + # it was initialized with np.empty. Also ensures + # we can downcast out if appropriate. + out[i, j] = 0 + elif numeric_object_t is float32_t or numeric_object_t is float64_t: + out[i, j] = NAN + elif numeric_object_t is int64_t: + # Per above, this is a placeholder in + # non-is_datetimelike cases. + out[i, j] = NPY_NAT + else: + # placeholder, see above + out[i, j] = 0 + + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_rank( + float64_t[:, ::1] out, + ndarray[numeric_object_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + str ties_method="average", + bint ascending=True, + bint pct=False, + str na_option="keep", + const uint8_t[:, :] mask=None, +) -> None: + """ + Provides the rank of values within each group. + + Parameters + ---------- + out : np.ndarray[np.float64, ndim=2] + Values to which this method will write its results. + values : np.ndarray of numeric_object_t values to be ranked + labels : np.ndarray[np.intp] + Array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + ngroups : int + This parameter is not used, is needed to match signatures of other + groupby functions. + is_datetimelike : bool + True if `values` contains datetime-like entries. + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + ascending : bool, default True + False for ranks by high (1) to low (N) + na_option : {'keep', 'top', 'bottom'}, default 'keep' + pct : bool, default False + Compute percentage rank of data within each group + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + mask : np.ndarray[bool] or None, default None + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, k, N + ndarray[float64_t, ndim=1] result + const uint8_t[:] sub_mask + + N = values.shape[1] + + for k in range(N): + if mask is None: + sub_mask = None + else: + sub_mask = mask[:, k] + + result = rank_1d( + values=values[:, k], + labels=labels, + is_datetimelike=is_datetimelike, + ties_method=ties_method, + ascending=ascending, + pct=pct, + na_option=na_option, + mask=sub_mask, + ) + for i in range(len(result)): + if labels[i] >= 0: + out[i, k] = result[i] + + +# ---------------------------------------------------------------------- +# group_min, group_max +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef group_min_max( + numeric_t[:, ::1] out, + int64_t[::1] counts, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + bint compute_max=True, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, +): + """ + Compute minimum/maximum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[numeric_t, ndim=2] + Array to store result in. + counts : np.ndarray[int64] + Input as a zeroed array, populated by group sizes during algorithm + values : array + Values to find column-wise min/max of. + labels : np.ndarray[np.intp] + Labels to group by. + min_count : Py_ssize_t, default -1 + The minimum number of non-NA group elements, NA result if threshold + is not met + is_datetimelike : bool + True if `values` contains datetime-like entries. + compute_max : bint, default True + True to compute group-wise max, False to compute min + mask : ndarray[bool, ndim=2], optional + If not None, indices represent missing values, + otherwise the mask will not be used + result_mask : ndarray[bool, ndim=2], optional + If not None, these specify locations in the output that are NA. + Modified in-place. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + `counts` is modified to hold group sizes + """ + cdef: + Py_ssize_t i, j, N, K, lab, ngroups = len(counts) + numeric_t val + ndarray[numeric_t, ndim=2] group_min_or_max + int64_t[:, ::1] nobs + bint uses_mask = mask is not None + bint isna_entry + + # TODO(cython3): + # Instead of `labels.shape[0]` use `len(labels)` + if not len(values) == labels.shape[0]: + raise AssertionError("len(index) != len(labels)") + + min_count = max(min_count, 1) + nobs = np.zeros((out).shape, dtype=np.int64) + + group_min_or_max = np.empty_like(out) + group_min_or_max[:] = _get_min_or_max(0, compute_max, is_datetimelike) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not isna_entry: + nobs[lab, j] += 1 + if compute_max: + if val > group_min_or_max[lab, j]: + group_min_or_max[lab, j] = val + else: + if val < group_min_or_max[lab, j]: + group_min_or_max[lab, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] < min_count: + # if we are integer dtype, not is_datetimelike, and + # not uses_mask, then getting here implies that + # counts[i] < min_count, which means we will + # be cast to float64 and masked at the end + # of WrappedCythonOp._call_cython_op. So we can safely + # set a placeholder value in out[i, j]. + if uses_mask: + result_mask[i, j] = True + # set out[i, j] to 0 to be deterministic, as + # it was initialized with np.empty. Also ensures + # we can downcast out if appropriate. + out[i, j] = 0 + elif numeric_t is float32_t or numeric_t is float64_t: + out[i, j] = NAN + elif numeric_t is int64_t: + # Per above, this is a placeholder in + # non-is_datetimelike cases. + out[i, j] = NPY_NAT + else: + # placeholder, see above + out[i, j] = 0 + else: + out[i, j] = group_min_or_max[i, j] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_max( + numeric_t[:, ::1] out, + int64_t[::1] counts, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, +) -> None: + """See group_min_max.__doc__""" + group_min_max( + out, + counts, + values, + labels, + min_count=min_count, + is_datetimelike=is_datetimelike, + compute_max=True, + mask=mask, + result_mask=result_mask, + ) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_min( + numeric_t[:, ::1] out, + int64_t[::1] counts, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1, + bint is_datetimelike=False, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, +) -> None: + """See group_min_max.__doc__""" + group_min_max( + out, + counts, + values, + labels, + min_count=min_count, + is_datetimelike=is_datetimelike, + compute_max=False, + mask=mask, + result_mask=result_mask, + ) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef group_cummin_max( + numeric_t[:, ::1] out, + ndarray[numeric_t, ndim=2] values, + const uint8_t[:, ::1] mask, + uint8_t[:, ::1] result_mask, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + bint skipna, + bint compute_max, +): + """ + Cumulative minimum/maximum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : np.ndarray[numeric_t, ndim=2] + Array to store cummin/max in. + values : np.ndarray[numeric_t, ndim=2] + Values to take cummin/max of. + mask : np.ndarray[bool] or None + If not None, indices represent missing values, + otherwise the mask will not be used + result_mask : ndarray[bool, ndim=2], optional + If not None, these specify locations in the output that are NA. + Modified in-place. + labels : np.ndarray[np.intp] + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + skipna : bool + If True, ignore nans in `values`. + compute_max : bool + True if cumulative maximum should be computed, False + if cumulative minimum should be computed + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + numeric_t[:, ::1] accum + Py_ssize_t i, j, N, K + numeric_t val, mval, na_val + uint8_t[:, ::1] seen_na + intp_t lab + bint na_possible + bint uses_mask = mask is not None + bint isna_entry + + accum = np.empty((ngroups, (values).shape[1]), dtype=values.dtype) + accum[:] = _get_min_or_max(0, compute_max, is_datetimelike) + + na_val = _get_na_val(0, is_datetimelike) + + if uses_mask: + na_possible = True + # Will never be used, just to avoid uninitialized warning + na_val = 0 + elif numeric_t is float64_t or numeric_t is float32_t: + na_possible = True + elif is_datetimelike: + na_possible = True + else: + # Will never be used, just to avoid uninitialized warning + na_possible = False + + if na_possible: + seen_na = np.zeros((accum).shape, dtype=np.uint8) + + N, K = (values).shape + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + for j in range(K): + + if not skipna and na_possible and seen_na[lab, j]: + if uses_mask: + result_mask[i, j] = 1 + # Set to 0 ensures that we are deterministic and can + # downcast if appropriate + out[i, j] = 0 + + else: + out[i, j] = na_val + else: + val = values[i, j] + + if uses_mask: + isna_entry = mask[i, j] + else: + isna_entry = _treat_as_na(val, is_datetimelike) + + if not isna_entry: + mval = accum[lab, j] + if compute_max: + if val > mval: + accum[lab, j] = mval = val + else: + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + else: + seen_na[lab, j] = 1 + out[i, j] = val + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummin( + numeric_t[:, ::1] out, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, + bint skipna=True, +) -> None: + """See group_cummin_max.__doc__""" + group_cummin_max( + out=out, + values=values, + mask=mask, + result_mask=result_mask, + labels=labels, + ngroups=ngroups, + is_datetimelike=is_datetimelike, + skipna=skipna, + compute_max=False, + ) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummax( + numeric_t[:, ::1] out, + ndarray[numeric_t, ndim=2] values, + const intp_t[::1] labels, + int ngroups, + bint is_datetimelike, + const uint8_t[:, ::1] mask=None, + uint8_t[:, ::1] result_mask=None, + bint skipna=True, +) -> None: + """See group_cummin_max.__doc__""" + group_cummin_max( + out=out, + values=values, + mask=mask, + result_mask=result_mask, + labels=labels, + ngroups=ngroups, + is_datetimelike=is_datetimelike, + skipna=skipna, + compute_max=True, + ) diff --git a/pandas/_libs/hashing.pyi b/pandas/_libs/hashing.pyi new file mode 100644 index 00000000..8361026e --- /dev/null +++ b/pandas/_libs/hashing.pyi @@ -0,0 +1,9 @@ +import numpy as np + +from pandas._typing import npt + +def hash_object_array( + arr: npt.NDArray[np.object_], + key: str, + encoding: str = ..., +) -> npt.NDArray[np.uint64]: ... diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx new file mode 100644 index 00000000..9ea0fa73 --- /dev/null +++ b/pandas/_libs/hashing.pyx @@ -0,0 +1,196 @@ +# Translated from the reference implementation +# at https://github.com/veorq/SipHash + +cimport cython +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +from numpy cimport ( + import_array, + ndarray, + uint8_t, + uint32_t, + uint64_t, +) + +import_array() + +from pandas._libs.util cimport is_nan + + +@cython.boundscheck(False) +def hash_object_array( + ndarray[object] arr, str key, str encoding="utf8" +) -> np.ndarray[np.uint64]: + """ + Parameters + ---------- + arr : 1-d object ndarray of objects + key : hash key, must be 16 byte len encoded + encoding : encoding for key & arr, default to 'utf8' + + Returns + ------- + 1-d uint64 ndarray of hashes. + + Raises + ------ + TypeError + If the array contains mixed types. + + Notes + ----- + Allowed values must be strings, or nulls + mixed array types will raise TypeError. + """ + cdef: + Py_ssize_t i, n + uint64_t[::1] result + bytes data, k + uint8_t *kb + uint64_t *lens + char **vecs + char *cdata + object val + list datas = [] + + k = key.encode(encoding) + kb = k + if len(k) != 16: + raise ValueError( + f"key should be a 16-byte string encoded, got {k} (len {len(k)})" + ) + + n = len(arr) + + # create an array of bytes + vecs = malloc(n * sizeof(char *)) + lens = malloc(n * sizeof(uint64_t)) + + for i in range(n): + val = arr[i] + if isinstance(val, bytes): + data = val + elif isinstance(val, str): + data = val.encode(encoding) + elif val is None or is_nan(val): + # null, stringify and encode + data = str(val).encode(encoding) + + elif isinstance(val, tuple): + # GH#28969 we could have a tuple, but need to ensure that + # the tuple entries are themselves hashable before converting + # to str + hash(val) + data = str(val).encode(encoding) + else: + raise TypeError( + f"{val} of type {type(val)} is not a valid type for hashing, " + "must be string or null" + ) + + lens[i] = len(data) + cdata = data + + # keep the references alive through the end of the + # function + datas.append(data) + vecs[i] = cdata + + result = np.empty(n, dtype=np.uint64) + with nogil: + for i in range(n): + result[i] = low_level_siphash(vecs[i], lens[i], kb) + + free(vecs) + free(lens) + return result.base # .base to retrieve underlying np.ndarray + + +cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: + return (x << b) | (x >> (64 - b)) + + +cdef inline uint64_t u8to64_le(uint8_t* p) nogil: + return (p[0] | + p[1] << 8 | + p[2] << 16 | + p[3] << 24 | + p[4] << 32 | + p[5] << 40 | + p[6] << 48 | + p[7] << 56) + + +cdef inline void _sipround(uint64_t* v0, uint64_t* v1, + uint64_t* v2, uint64_t* v3) nogil: + v0[0] += v1[0] + v1[0] = _rotl(v1[0], 13) + v1[0] ^= v0[0] + v0[0] = _rotl(v0[0], 32) + v2[0] += v3[0] + v3[0] = _rotl(v3[0], 16) + v3[0] ^= v2[0] + v0[0] += v3[0] + v3[0] = _rotl(v3[0], 21) + v3[0] ^= v0[0] + v2[0] += v1[0] + v1[0] = _rotl(v1[0], 17) + v1[0] ^= v2[0] + v2[0] = _rotl(v2[0], 32) + + +@cython.cdivision(True) +cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, + uint8_t* key) nogil: + cdef uint64_t v0 = 0x736f6d6570736575ULL + cdef uint64_t v1 = 0x646f72616e646f6dULL + cdef uint64_t v2 = 0x6c7967656e657261ULL + cdef uint64_t v3 = 0x7465646279746573ULL + cdef uint64_t b + cdef uint64_t k0 = u8to64_le(key) + cdef uint64_t k1 = u8to64_le(key + 8) + cdef uint64_t m + cdef int i + cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t)) + cdef int left = datalen & 7 + cdef int left_byte + cdef int cROUNDS = 2 + cdef int dROUNDS = 4 + + b = (datalen) << 56 + v3 ^= k1 + v2 ^= k0 + v1 ^= k1 + v0 ^= k0 + + while (data != end): + m = u8to64_le(data) + v3 ^= m + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + v0 ^= m + + data += sizeof(uint64_t) + + for i in range(left-1, -1, -1): + b |= (data[i]) << (i * 8) + + v3 ^= b + + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + v0 ^= b + v2 ^= 0xff + + for i in range(dROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + b = v0 ^ v1 ^ v2 ^ v3 + + return b diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd new file mode 100644 index 00000000..80d7ab58 --- /dev/null +++ b/pandas/_libs/hashtable.pxd @@ -0,0 +1,141 @@ +from numpy cimport ( + intp_t, + ndarray, +) + +from pandas._libs.khash cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + kh_complex64_t, + kh_complex128_t, + kh_float32_t, + kh_float64_t, + kh_int8_t, + kh_int16_t, + kh_int32_t, + kh_int64_t, + kh_pymap_t, + kh_str_t, + kh_uint8_t, + kh_uint16_t, + kh_uint32_t, + kh_uint64_t, + khcomplex64_t, + khcomplex128_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +# prototypes for sharing + +cdef class HashTable: + pass + +cdef class UInt64HashTable(HashTable): + cdef kh_uint64_t *table + + cpdef get_item(self, uint64_t val) + cpdef set_item(self, uint64_t key, Py_ssize_t val) + +cdef class Int64HashTable(HashTable): + cdef kh_int64_t *table + + cpdef get_item(self, int64_t val) + cpdef set_item(self, int64_t key, Py_ssize_t val) + +cdef class UInt32HashTable(HashTable): + cdef kh_uint32_t *table + + cpdef get_item(self, uint32_t val) + cpdef set_item(self, uint32_t key, Py_ssize_t val) + +cdef class Int32HashTable(HashTable): + cdef kh_int32_t *table + + cpdef get_item(self, int32_t val) + cpdef set_item(self, int32_t key, Py_ssize_t val) + +cdef class UInt16HashTable(HashTable): + cdef kh_uint16_t *table + + cpdef get_item(self, uint16_t val) + cpdef set_item(self, uint16_t key, Py_ssize_t val) + +cdef class Int16HashTable(HashTable): + cdef kh_int16_t *table + + cpdef get_item(self, int16_t val) + cpdef set_item(self, int16_t key, Py_ssize_t val) + +cdef class UInt8HashTable(HashTable): + cdef kh_uint8_t *table + + cpdef get_item(self, uint8_t val) + cpdef set_item(self, uint8_t key, Py_ssize_t val) + +cdef class Int8HashTable(HashTable): + cdef kh_int8_t *table + + cpdef get_item(self, int8_t val) + cpdef set_item(self, int8_t key, Py_ssize_t val) + +cdef class Float64HashTable(HashTable): + cdef kh_float64_t *table + + cpdef get_item(self, float64_t val) + cpdef set_item(self, float64_t key, Py_ssize_t val) + +cdef class Float32HashTable(HashTable): + cdef kh_float32_t *table + + cpdef get_item(self, float32_t val) + cpdef set_item(self, float32_t key, Py_ssize_t val) + +cdef class Complex64HashTable(HashTable): + cdef kh_complex64_t *table + + cpdef get_item(self, complex64_t val) + cpdef set_item(self, complex64_t key, Py_ssize_t val) + +cdef class Complex128HashTable(HashTable): + cdef kh_complex128_t *table + + cpdef get_item(self, complex128_t val) + cpdef set_item(self, complex128_t key, Py_ssize_t val) + +cdef class PyObjectHashTable(HashTable): + cdef kh_pymap_t *table + + cpdef get_item(self, object val) + cpdef set_item(self, object key, Py_ssize_t val) + + +cdef class StringHashTable(HashTable): + cdef kh_str_t *table + + cpdef get_item(self, str val) + cpdef set_item(self, str key, Py_ssize_t val) + +cdef struct Int64VectorData: + int64_t *data + Py_ssize_t n, m + +cdef class Vector: + cdef bint external_view_exists + +cdef class Int64Vector(Vector): + cdef Int64VectorData *data + cdef ndarray ao + + cdef resize(self) + cpdef ndarray to_array(self) + cdef inline void append(self, int64_t x) + cdef extend(self, int64_t[:] x) diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi new file mode 100644 index 00000000..8500fdf2 --- /dev/null +++ b/pandas/_libs/hashtable.pyi @@ -0,0 +1,202 @@ +from typing import ( + Hashable, + Literal, +) + +import numpy as np + +from pandas._typing import npt + +def unique_label_indices( + labels: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... + +class Factorizer: + count: int + def __init__(self, size_hint: int): ... + def get_count(self) -> int: ... + +class ObjectFactorizer(Factorizer): + table: PyObjectHashTable + uniques: ObjectVector + def factorize( + self, + values: npt.NDArray[np.object_], + sort: bool = ..., + na_sentinel=..., + na_value=..., + ) -> npt.NDArray[np.intp]: ... + +class Int64Factorizer(Factorizer): + table: Int64HashTable + uniques: Int64Vector + def factorize( + self, + values: np.ndarray, # const int64_t[:] + sort: bool = ..., + na_sentinel=..., + na_value=..., + ) -> npt.NDArray[np.intp]: ... + +class Int64Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int64]: ... + +class Int32Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int32]: ... + +class Int16Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int16]: ... + +class Int8Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.int8]: ... + +class UInt64Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint64]: ... + +class UInt32Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint32]: ... + +class UInt16Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint16]: ... + +class UInt8Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.uint8]: ... + +class Float64Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.float64]: ... + +class Float32Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.float32]: ... + +class Complex128Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.complex128]: ... + +class Complex64Vector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.complex64]: ... + +class StringVector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.object_]: ... + +class ObjectVector: + def __init__(self, *args): ... + def __len__(self) -> int: ... + def to_array(self) -> npt.NDArray[np.object_]: ... + +class HashTable: + # NB: The base HashTable class does _not_ actually have these methods; + # we are putting the here for the sake of mypy to avoid + # reproducing them in each subclass below. + def __init__(self, size_hint: int = ...): ... + def __len__(self) -> int: ... + def __contains__(self, key: Hashable) -> bool: ... + def sizeof(self, deep: bool = ...) -> int: ... + def get_state(self) -> dict[str, int]: ... + # TODO: `item` type is subclass-specific + def get_item(self, item): ... # TODO: return type? + def set_item(self, item) -> None: ... + def map_locations( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + ) -> None: ... + def lookup( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + ) -> npt.NDArray[np.intp]: ... + def get_labels( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + uniques, # SubclassTypeVector + count_prior: int = ..., + na_sentinel: int = ..., + na_value: object = ..., + ) -> npt.NDArray[np.intp]: ... + def unique( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + return_inverse: bool = ..., + ) -> tuple[ + np.ndarray, # np.ndarray[subclass-specific] + npt.NDArray[np.intp], + ] | np.ndarray: ... # np.ndarray[subclass-specific] + def factorize( + self, + values: np.ndarray, # np.ndarray[subclass-specific] + na_sentinel: int = ..., + na_value: object = ..., + mask=..., + ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific] + +class Complex128HashTable(HashTable): ... +class Complex64HashTable(HashTable): ... +class Float64HashTable(HashTable): ... +class Float32HashTable(HashTable): ... + +class Int64HashTable(HashTable): + # Only Int64HashTable has get_labels_groupby, map_keys_to_values + def get_labels_groupby( + self, + values: npt.NDArray[np.int64], # const int64_t[:] + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: ... + def map_keys_to_values( + self, + keys: npt.NDArray[np.int64], + values: npt.NDArray[np.int64], # const int64_t[:] + ) -> None: ... + +class Int32HashTable(HashTable): ... +class Int16HashTable(HashTable): ... +class Int8HashTable(HashTable): ... +class UInt64HashTable(HashTable): ... +class UInt32HashTable(HashTable): ... +class UInt16HashTable(HashTable): ... +class UInt8HashTable(HashTable): ... +class StringHashTable(HashTable): ... +class PyObjectHashTable(HashTable): ... +class IntpHashTable(HashTable): ... + +def duplicated( + values: np.ndarray, + keep: Literal["last", "first", False] = ..., +) -> npt.NDArray[np.bool_]: ... +def mode( + values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = ... +) -> np.ndarray: ... +def value_count( + values: np.ndarray, + dropna: bool, + mask: npt.NDArray[np.bool_] | None = ..., +) -> tuple[np.ndarray, npt.NDArray[np.int64]]: ... # np.ndarray[same-as-values] + +# arr and values should have same dtype +def ismember( + arr: np.ndarray, + values: np.ndarray, +) -> npt.NDArray[np.bool_]: ... +def object_hash(obj) -> int: ... +def objects_are_equal(a, b) -> bool: ... diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx new file mode 100644 index 00000000..bbc17c4c --- /dev/null +++ b/pandas/_libs/hashtable.pyx @@ -0,0 +1,183 @@ +cimport cython +from cpython.mem cimport ( + PyMem_Free, + PyMem_Malloc, +) +from cpython.ref cimport ( + Py_INCREF, + PyObject, +) +from libc.stdlib cimport ( + free, + malloc, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + ndarray, + uint8_t, + uint32_t, +) +from numpy.math cimport NAN + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.dtypes cimport numeric_object_t +from pandas._libs.khash cimport ( + KHASH_TRACE_DOMAIN, + are_equivalent_float32_t, + are_equivalent_float64_t, + are_equivalent_khcomplex64_t, + are_equivalent_khcomplex128_t, + kh_needed_n_buckets, + kh_python_hash_equal, + kh_python_hash_func, + kh_str_t, + khcomplex64_t, + khcomplex128_t, + khiter_t, +) +from pandas._libs.missing cimport checknull + + +def get_hashtable_trace_domain(): + return KHASH_TRACE_DOMAIN + + +def object_hash(obj): + return kh_python_hash_func(obj) + + +def objects_are_equal(a, b): + return kh_python_hash_equal(a, b) + + +cdef int64_t NPY_NAT = util.get_nat() +SIZE_HINT_LIMIT = (1 << 20) + 7 + + +cdef Py_ssize_t _INIT_VEC_CAP = 128 + +include "hashtable_class_helper.pxi" +include "hashtable_func_helper.pxi" + + +# map derived hash-map types onto basic hash-map types: +if np.dtype(np.intp) == np.dtype(np.int64): + IntpHashTable = Int64HashTable + unique_label_indices = _unique_label_indices_int64 +elif np.dtype(np.intp) == np.dtype(np.int32): + IntpHashTable = Int32HashTable + unique_label_indices = _unique_label_indices_int32 +else: + raise ValueError(np.dtype(np.intp)) + + +cdef class Factorizer: + cdef readonly: + Py_ssize_t count + + def __cinit__(self, size_hint: int): + self.count = 0 + + def get_count(self) -> int: + return self.count + + +cdef class ObjectFactorizer(Factorizer): + cdef public: + PyObjectHashTable table + ObjectVector uniques + + def __cinit__(self, size_hint: int): + self.table = PyObjectHashTable(size_hint) + self.uniques = ObjectVector() + + def factorize( + self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None + ) -> np.ndarray: + """ + + Returns + ------- + np.ndarray[np.intp] + + Examples + -------- + Factorize values with nans replaced by na_sentinel + + >>> fac = ObjectFactorizer(3) + >>> fac.factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) + array([ 0, 1, 20]) + """ + cdef: + ndarray[intp_t] labels + + if self.uniques.external_view_exists: + uniques = ObjectVector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, na_value) + mask = (labels == na_sentinel) + # sort on + if sort: + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + labels = reverse_indexer.take(labels, mode='clip') + labels[mask] = na_sentinel + self.count = len(self.uniques) + return labels + + +cdef class Int64Factorizer(Factorizer): + cdef public: + Int64HashTable table + Int64Vector uniques + + def __cinit__(self, size_hint: int): + self.table = Int64HashTable(size_hint) + self.uniques = Int64Vector() + + def factorize(self, const int64_t[:] values, sort=False, + na_sentinel=-1, na_value=None) -> np.ndarray: + """ + Returns + ------- + ndarray[intp_t] + + Examples + -------- + Factorize values with nans replaced by na_sentinel + + >>> fac = Int64Factorizer(3) + >>> fac.factorize(np.array([1,2,3]), na_sentinel=20) + array([0, 1, 2]) + """ + cdef: + ndarray[intp_t] labels + + if self.uniques.external_view_exists: + uniques = Int64Vector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, + na_value=na_value) + + # sort on + if sort: + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + labels = reverse_indexer.take(labels) + + self.count = len(self.uniques) + return labels diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in new file mode 100644 index 00000000..54260a9a --- /dev/null +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -0,0 +1,1390 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + + +{{py: + +# name +complex_types = ['complex64', + 'complex128'] +}} + +{{for name in complex_types}} +cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil: + cdef kh{{name}}_t res + res.real = val.real + res.imag = val.imag + return res + +{{endfor}} + + +{{py: + + +# name +c_types = ['khcomplex128_t', + 'khcomplex64_t', + 'float64_t', + 'float32_t', + 'int64_t', + 'int32_t', + 'int16_t', + 'int8_t', + 'uint64_t', + 'uint32_t', + 'uint16_t', + 'uint8_t'] +}} + +{{for c_type in c_types}} + +cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: + {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }} + return val.real != val.real or val.imag != val.imag + {{elif c_type in {'float64_t', 'float32_t'} }} + return val != val + {{else}} + return False + {{endif}} + + +{{if c_type in {'khcomplex128_t', 'khcomplex64_t', 'float64_t', 'float32_t'} }} +# are_equivalent_{{c_type}} is cimported via khash.pxd +{{else}} +cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil: + return val1 == val2 +{{endif}} + +{{endfor}} + + +{{py: + +# name +cimported_types = ['complex64', + 'complex128', + 'float32', + 'float64', + 'int8', + 'int16', + 'int32', + 'int64', + 'pymap', + 'str', + 'strbox', + 'uint8', + 'uint16', + 'uint32', + 'uint64'] +}} + +{{for name in cimported_types}} +from pandas._libs.khash cimport ( + kh_destroy_{{name}}, + kh_exist_{{name}}, + kh_get_{{name}}, + kh_init_{{name}}, + kh_put_{{name}}, + kh_resize_{{name}}, +) + +{{endfor}} + +# ---------------------------------------------------------------------- +# VectorData +# ---------------------------------------------------------------------- + +from pandas._libs.tslibs.util cimport get_c_string +from pandas._libs.missing cimport C_NA + +{{py: + +# name, dtype, c_type +# the generated StringVector is not actually used +# but is included for completeness (rather ObjectVector is used +# for uniques in hashtables) + +dtypes = [('Complex128', 'complex128', 'khcomplex128_t'), + ('Complex64', 'complex64', 'khcomplex64_t'), + ('Float64', 'float64', 'float64_t'), + ('Float32', 'float32', 'float32_t'), + ('Int64', 'int64', 'int64_t'), + ('Int32', 'int32', 'int32_t'), + ('Int16', 'int16', 'int16_t'), + ('Int8', 'int8', 'int8_t'), + ('String', 'string', 'char *'), + ('UInt64', 'uint64', 'uint64_t'), + ('UInt32', 'uint32', 'uint32_t'), + ('UInt16', 'uint16', 'uint16_t'), + ('UInt8', 'uint8', 'uint8_t')] +}} + +{{for name, dtype, c_type in dtypes}} + + +{{if dtype != 'int64'}} +# Int64VectorData is defined in the .pxd file because it is needed (indirectly) +# by IntervalTree + +ctypedef struct {{name}}VectorData: + {{c_type}} *data + Py_ssize_t n, m + +{{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline void append_data_{{dtype}}({{name}}VectorData *data, + {{c_type}} x) nogil: + + data.data[data.n] = x + data.n += 1 + +{{endfor}} + +ctypedef fused vector_data: + Int64VectorData + Int32VectorData + Int16VectorData + Int8VectorData + UInt64VectorData + UInt32VectorData + UInt16VectorData + UInt8VectorData + Float64VectorData + Float32VectorData + Complex128VectorData + Complex64VectorData + StringVectorData + +cdef inline bint needs_resize(vector_data *data) nogil: + return data.n == data.m + +# ---------------------------------------------------------------------- +# Vector +# ---------------------------------------------------------------------- + +cdef class Vector: + # cdef readonly: + # bint external_view_exists + + def __cinit__(self): + self.external_view_exists = False + + +{{py: + +# name, dtype, c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t'), + ('Complex64', 'complex64', 'khcomplex64_t'), + ('Float64', 'float64', 'float64_t'), + ('UInt64', 'uint64', 'uint64_t'), + ('Int64', 'int64', 'int64_t'), + ('Float32', 'float32', 'float32_t'), + ('UInt32', 'uint32', 'uint32_t'), + ('Int32', 'int32', 'int32_t'), + ('UInt16', 'uint16', 'uint16_t'), + ('Int16', 'int16', 'int16_t'), + ('UInt8', 'uint8', 'uint8_t'), + ('Int8', 'int8', 'int8_t')] + +}} + +{{for name, dtype, c_type in dtypes}} + +cdef class {{name}}Vector(Vector): + + # For int64 we have to put this declaration in the .pxd file; + # Int64Vector is the only one we need exposed for other cython files. + {{if dtype != 'int64'}} + cdef: + {{name}}VectorData *data + ndarray ao + {{endif}} + + def __cinit__(self): + self.data = <{{name}}VectorData *>PyMem_Malloc( + sizeof({{name}}VectorData)) + if not self.data: + raise MemoryError() + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) + self.data.data = <{{c_type}}*>self.ao.data + + cdef resize(self): + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + self.ao.resize(self.data.m, refcheck=False) + self.data.data = <{{c_type}}*>self.ao.data + + def __dealloc__(self): + if self.data is not NULL: + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + cpdef ndarray to_array(self): + if self.data.m != self.data.n: + if self.external_view_exists: + # should never happen + raise ValueError("should have raised on append()") + self.ao.resize(self.data.n, refcheck=False) + self.data.m = self.data.n + self.external_view_exists = True + return self.ao + + cdef inline void append(self, {{c_type}} x): + + if needs_resize(self.data): + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.resize() + + append_data_{{dtype}}(self.data, x) + + cdef extend(self, const {{c_type}}[:] x): + for i in range(len(x)): + self.append(x[i]) + +{{endfor}} + +cdef class StringVector(Vector): + + cdef: + StringVectorData *data + + def __cinit__(self): + self.data = PyMem_Malloc(sizeof(StringVectorData)) + if not self.data: + raise MemoryError() + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + + cdef resize(self): + cdef: + char **orig_data + Py_ssize_t i, m + + m = self.data.m + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + + orig_data = self.data.data + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + for i in range(m): + self.data.data[i] = orig_data[i] + + def __dealloc__(self): + if self.data is not NULL: + if self.data.data is not NULL: + free(self.data.data) + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + cpdef ndarray[object, ndim=1] to_array(self): + cdef: + ndarray ao + Py_ssize_t n + object val + + ao = np.empty(self.data.n, dtype=object) + for i in range(self.data.n): + val = self.data.data[i] + ao[i] = val + self.external_view_exists = True + self.data.m = self.data.n + return ao + + cdef inline void append(self, char *x): + + if needs_resize(self.data): + self.resize() + + append_data_string(self.data, x) + + cdef extend(self, ndarray[object] x): + for i in range(len(x)): + self.append(x[i]) + + +cdef class ObjectVector(Vector): + + cdef: + PyObject **data + Py_ssize_t n, m + ndarray ao + + def __cinit__(self): + self.n = 0 + self.m = _INIT_VEC_CAP + self.ao = np.empty(_INIT_VEC_CAP, dtype=object) + self.data = self.ao.data + + def __len__(self) -> int: + return self.n + + cdef inline append(self, object obj): + if self.n == self.m: + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.m = max(self.m * 2, _INIT_VEC_CAP) + self.ao.resize(self.m, refcheck=False) + self.data = self.ao.data + + Py_INCREF(obj) + self.data[self.n] = obj + self.n += 1 + + cpdef ndarray[object, ndim=1] to_array(self): + if self.m != self.n: + if self.external_view_exists: + raise ValueError("should have raised on append()") + self.ao.resize(self.n, refcheck=False) + self.m = self.n + self.external_view_exists = True + return self.ao + + cdef extend(self, ndarray[object] x): + for i in range(len(x)): + self.append(x[i]) + +# ---------------------------------------------------------------------- +# HashTable +# ---------------------------------------------------------------------- + + +cdef class HashTable: + + pass + +{{py: + +# name, dtype, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'), + ('Float64', 'float64', 'float64_t', ''), + ('UInt64', 'uint64', 'uint64_t', ''), + ('Int64', 'int64', 'int64_t', ''), + ('Complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float32', 'float32', 'float32_t', ''), + ('UInt32', 'uint32', 'uint32_t', ''), + ('Int32', 'int32', 'int32_t', ''), + ('UInt16', 'uint16', 'uint16_t', ''), + ('Int16', 'int16', 'int16_t', ''), + ('UInt8', 'uint8', 'uint8_t', ''), + ('Int8', 'int8', 'int8_t', '')] + +}} + + +{{for name, dtype, c_type, to_c_type in dtypes}} + +cdef class {{name}}HashTable(HashTable): + + def __cinit__(self, int64_t size_hint=1): + self.table = kh_init_{{dtype}}() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_{{dtype}}(self.table, size_hint) + + def __len__(self) -> int: + return self.table.size + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_{{dtype}}(self.table) + self.table = NULL + + def __contains__(self, object key) -> bool: + cdef: + khiter_t k + {{c_type}} ckey + ckey = {{to_c_type}}(key) + k = kh_get_{{dtype}}(self.table, ckey) + return k != self.table.n_buckets + + def sizeof(self, deep: bool = False) -> int: + """ return the size of my table in bytes """ + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof({{dtype}}_t) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ returns infos about the state of the hashtable""" + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, {{dtype}}_t val): + # Used in core.sorting, IndexEngine.get_loc + cdef: + khiter_t k + {{c_type}} cval + cval = {{to_c_type}}(val) + k = kh_get_{{dtype}}(self.table, cval) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, {{dtype}}_t key, Py_ssize_t val): + # Used in libjoin + cdef: + khiter_t k + int ret = 0 + {{c_type}} ckey + ckey = {{to_c_type}}(key) + k = kh_put_{{dtype}}(self.table, ckey, &ret) + if kh_exist_{{dtype}}(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + {{if dtype == "int64" }} + # We only use this for int64, can reduce build size and make .pyi + # more accurate by only implementing it for int64 + @cython.boundscheck(False) + def map_keys_to_values( + self, const {{dtype}}_t[:] keys, const int64_t[:] values + ) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} key + khiter_t k + + with nogil: + for i in range(n): + key = {{to_c_type}}(keys[i]) + k = kh_put_{{dtype}}(self.table, key, &ret) + self.table.vals[k] = values[i] + {{endif}} + + @cython.boundscheck(False) + def map_locations(self, const {{dtype}}_t[:] values) -> None: + # Used in libindex, safe_sort + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} val + khiter_t k + + with nogil: + for i in range(n): + val= {{to_c_type}}(values[i]) + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = i + + @cython.boundscheck(False) + def lookup(self, const {{dtype}}_t[:] values) -> ndarray: + # -> np.ndarray[np.intp] + # Used in safe_sort, IndexEngine.get_indexer + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{c_type}} val + khiter_t k + intp_t[::1] locs = np.empty(n, dtype=np.intp) + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + object mask=None, bint return_inverse=False, bint use_result_mask=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + uniques : {{name}}Vector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + use_result_mask: bool, default False + Whether to create a result mask for the unique values. Not supported + with return_inverse=True. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + result_mask: ndarray[bool], if use_result_mask is true + The mask for the result values. + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[::1] labels + int ret = 0 + {{c_type}} val, na_value2 + khiter_t k + {{name}}VectorData *ud + UInt8Vector result_mask + UInt8VectorData *rmd + bint use_na_value, use_mask, seen_na = False + uint8_t[:] mask_values + + if return_inverse: + labels = np.empty(n, dtype=np.intp) + ud = uniques.data + use_na_value = na_value is not None + use_mask = mask is not None + if not use_mask and use_result_mask: + raise NotImplementedError # pragma: no cover + + if use_result_mask and return_inverse: + raise NotImplementedError # pragma: no cover + + result_mask = UInt8Vector() + rmd = result_mask.data + + if use_mask: + mask_values = mask.view("uint8") + + if use_na_value: + # We need this na_value2 because we want to allow users + # to *optionally* specify an NA sentinel *of the correct* type. + # We use None, to make it optional, which requires `object` type + # for the parameter. To please the compiler, we use na_value2, + # which is only used if it's *specified*. + na_value2 = {{to_c_type}}(na_value) + else: + na_value2 = {{to_c_type}}(0) + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + + if ignore_na and use_mask: + if mask_values[i]: + labels[i] = na_sentinel + continue + elif ignore_na and ( + is_nan_{{c_type}}(val) or + (use_na_value and are_equivalent_{{c_type}}(val, na_value2)) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, + # and replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + elif not ignore_na and use_result_mask: + if mask_values[i]: + if seen_na: + continue + + seen_na = True + if needs_resize(ud): + with gil: + if uniques.external_view_exists: + raise ValueError("external reference to " + "uniques held, but " + "Vector.resize() needed") + uniques.resize() + if result_mask.external_view_exists: + raise ValueError("external reference to " + "result_mask held, but " + "Vector.resize() needed") + result_mask.resize() + append_data_{{dtype}}(ud, val) + append_data_uint8(rmd, 1) + continue + + k = kh_get_{{dtype}}(self.table, val) + + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_{{dtype}}(self.table, val, &ret) + + if needs_resize(ud): + with gil: + if uniques.external_view_exists: + raise ValueError("external reference to " + "uniques held, but " + "Vector.resize() needed") + uniques.resize() + if use_result_mask: + if result_mask.external_view_exists: + raise ValueError("external reference to " + "result_mask held, but " + "Vector.resize() needed") + result_mask.resize() + append_data_{{dtype}}(ud, val) + if use_result_mask: + append_data_uint8(rmd, 0) + + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + if use_result_mask: + return uniques.to_array(), result_mask.to_array() + return uniques.to_array() + + def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + result_mask: ndarray[bool], if mask is given as input + The mask for the result values. + """ + uniques = {{name}}Vector() + use_result_mask = True if mask is not None else False + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse, mask=mask, use_result_mask=use_result_mask) + + def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None, ignore_na=True): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[intp_t] + The labels from values to uniques + """ + uniques_vector = {{name}}Vector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=ignore_na, mask=mask, + return_inverse=True) + + def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + {{if dtype == 'int64'}} + @cython.boundscheck(False) + def get_labels_groupby( + self, const {{dtype}}_t[:] values + ) -> tuple[ndarray, ndarray]: + # tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]] + cdef: + Py_ssize_t i, n = len(values) + intp_t[::1] labels + Py_ssize_t idx, count = 0 + int ret = 0 + {{c_type}} val + khiter_t k + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud + + labels = np.empty(n, dtype=np.intp) + ud = uniques.data + + with nogil: + for i in range(n): + val = {{to_c_type}}(values[i]) + + # specific for groupby + if val < 0: + labels[i] = -1 + continue + + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + idx = self.table.vals[k] + labels[i] = idx + else: + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = count + + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, val) + labels[i] = count + count += 1 + + arr_uniques = uniques.to_array() + + return np.asarray(labels), arr_uniques + {{endif}} + +{{endfor}} + + +cdef class StringHashTable(HashTable): + # these by-definition *must* be strings + # or a sentinel np.nan / None missing value + na_string_sentinel = '__nan__' + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_str() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_str(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_str(self.table) + self.table = NULL + + def sizeof(self, deep: bool = False) -> int: + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof(char *) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ returns infos about the state of the hashtable""" + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, str val): + cdef: + khiter_t k + const char *v + v = get_c_string(val) + + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, str key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + const char *v + + v = get_c_string(key) + + k = kh_put_str(self.table, v, &ret) + if kh_exist_str(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + @cython.boundscheck(False) + def get_indexer(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + ndarray[intp_t] labels = np.empty(n, dtype=np.intp) + intp_t *resbuf = labels.data + khiter_t k + kh_str_t *table = self.table + const char *v + const char **vecs + + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + v = get_c_string(val) + vecs[i] = v + + with nogil: + for i in range(n): + k = kh_get_str(table, vecs[i]) + if k != table.n_buckets: + resbuf[i] = table.vals[k] + else: + resbuf[i] = -1 + + free(vecs) + return labels + + @cython.boundscheck(False) + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + khiter_t k + intp_t[::1] locs = np.empty(n, dtype=np.intp) + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string won't recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + free(vecs) + return np.asarray(locs) + + @cython.boundscheck(False) + def map_locations(self, ndarray[object] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string won't recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_put_str(self.table, v, &ret) + self.table.vals[k] = i + free(vecs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[::1] labels + int64_t[::1] uindexer + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.zeros(n, dtype=np.intp) + uindexer = np.empty(n, dtype=np.int64) + use_na_value = na_value is not None + + # assign pointers and pre-filter out missing (if ignore_na) + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if (ignore_na + and (not isinstance(val, str) + or (use_na_value and val == na_value))): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), we can skip the actual value, and + # replace the label with na_sentinel directly + labels[i] = na_sentinel + else: + # if ignore_na is False, we also stringify NaN/None/etc. + try: + v = get_c_string(val) + except UnicodeEncodeError: + v = get_c_string(repr(val)) + vecs[i] = v + + # compute + with nogil: + for i in range(n): + if ignore_na and labels[i] == na_sentinel: + # skip entries for ignored missing values (see above) + continue + + v = vecs[i] + k = kh_get_str(self.table, v) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_str(self.table, v, &ret) + uindexer[count] = i + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + free(vecs) + + # uniques + for i in range(count): + uniques.append(values[uindexer[i]]) + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + mask : ndarray[bool], optional + Not yet implemented for StringHashTable + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None, ignore_na=True): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + mask : ndarray[bool], optional + Not yet implemented for StringHashTable. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=ignore_na, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + +cdef class PyObjectHashTable(HashTable): + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_pymap() + size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) + kh_resize_pymap(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_pymap(self.table) + self.table = NULL + + def __len__(self) -> int: + return self.table.size + + def __contains__(self, object key) -> bool: + cdef: + khiter_t k + hash(key) + + k = kh_get_pymap(self.table, key) + return k != self.table.n_buckets + + def sizeof(self, deep: bool = False) -> int: + """ return the size of my table in bytes """ + overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) + for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) + for_pairs = self.table.n_buckets * (sizeof(PyObject *) + # keys + sizeof(Py_ssize_t)) # vals + return overhead + for_flags + for_pairs + + def get_state(self) -> dict[str, int]: + """ + returns infos about the current state of the hashtable like size, + number of buckets and so on. + """ + return { + 'n_buckets' : self.table.n_buckets, + 'size' : self.table.size, + 'n_occupied' : self.table.n_occupied, + 'upper_bound' : self.table.upper_bound, + } + + cpdef get_item(self, object val): + cdef: + khiter_t k + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, object key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + char* buf + + hash(key) + + k = kh_put_pymap(self.table, key, &ret) + if kh_exist_pymap(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + def map_locations(self, ndarray[object] values) -> None: + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + + for i in range(n): + val = values[i] + hash(val) + + k = kh_put_pymap(self.table, val, &ret) + self.table.vals[k] = i + + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + intp_t[::1] locs = np.empty(n, dtype=np.intp) + + for i in range(n): + val = values[i] + hash(val) + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : bool, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + intp_t[::1] labels + int ret = 0 + object val + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.empty(n, dtype=np.intp) + use_na_value = na_value is not None + + for i in range(n): + val = values[i] + hash(val) + + if ignore_na and ( + checknull(val) + or (use_na_value and val == na_value) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, and + # replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + + k = kh_get_pymap(self.table, val) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_pymap(self.table, val, &ret) + uniques.append(val) + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), labels.base # .base -> underlying ndarray + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : bool, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + mask : ndarray[bool], optional + Not yet implemented for PyObjectHashTable + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None, object mask=None, ignore_na=True): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + mask : ndarray[bool], optional + Not yet implemented for PyObjectHashTable. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[intp_t] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=ignore_na, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + # -> np.ndarray[np.intp] + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in new file mode 100644 index 00000000..f7c41b32 --- /dev/null +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -0,0 +1,468 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +{{py: + +# name, dtype, ttype, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'complex128', + 'khcomplex128_t', 'to_khcomplex128_t'), + ('Complex64', 'complex64', 'complex64', + 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float64', 'float64', 'float64', 'float64_t', ''), + ('Float32', 'float32', 'float32', 'float32_t', ''), + ('UInt64', 'uint64', 'uint64', 'uint64_t', ''), + ('UInt32', 'uint32', 'uint32', 'uint32_t', ''), + ('UInt16', 'uint16', 'uint16', 'uint16_t', ''), + ('UInt8', 'uint8', 'uint8', 'uint8_t', ''), + ('Object', 'object', 'pymap', 'object', ''), + ('Int64', 'int64', 'int64', 'int64_t', ''), + ('Int32', 'int32', 'int32', 'int32_t', ''), + ('Int16', 'int16', 'int16', 'int16_t', ''), + ('Int8', 'int8', 'int8', 'int8_t', '')] + +}} + +{{for name, dtype, ttype, c_type, to_c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna, const uint8_t[:] mask=None): +{{else}} +cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8_t[:] mask=None): +{{endif}} + cdef: + Py_ssize_t i = 0 + Py_ssize_t n = len(values) + kh_{{ttype}}_t *table + + # Don't use Py_ssize_t, since table.n_buckets is unsigned + khiter_t k + + {{c_type}} val + + int ret = 0 + bint uses_mask = mask is not None + bint isna_entry = False + + if uses_mask and not dropna: + raise NotImplementedError("uses_mask not implemented with dropna=False") + + # we track the order in which keys are first seen (GH39009), + # khash-map isn't insertion-ordered, thus: + # table maps keys to counts + # result_keys remembers the original order of keys + + result_keys = {{name}}Vector() + table = kh_init_{{ttype}}() + + {{if dtype == 'object'}} + if uses_mask: + raise NotImplementedError("uses_mask not implemented with object dtype") + + kh_resize_{{ttype}}(table, n // 10) + + for i in range(n): + val = values[i] + if not dropna or not checknull(val): + k = kh_get_{{ttype}}(table, {{to_c_type}}val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret) + table.vals[k] = 1 + result_keys.append(val) + {{else}} + kh_resize_{{ttype}}(table, n) + + for i in range(n): + val = {{to_c_type}}(values[i]) + + if dropna: + if uses_mask: + isna_entry = mask[i] + else: + isna_entry = is_nan_{{c_type}}(val) + + if not dropna or not isna_entry: + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) + table.vals[k] = 1 + result_keys.append(val) + {{endif}} + + # collect counts in the order corresponding to result_keys: + cdef: + int64_t[::1] result_counts = np.empty(table.size, dtype=np.int64) + + for i in range(table.size): + {{if dtype == 'object'}} + k = kh_get_{{ttype}}(table, result_keys.data[i]) + {{else}} + k = kh_get_{{ttype}}(table, result_keys.data.data[i]) + {{endif}} + result_counts[i] = table.vals[k] + + kh_destroy_{{ttype}}(table) + + return result_keys.to_array(), result_counts.base + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): +{{else}} +cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'): +{{endif}} + cdef: + int ret = 0 + {{if dtype != 'object'}} + {{c_type}} value + {{else}} + PyObject* value + {{endif}} + Py_ssize_t i, n = len(values) + khiter_t k + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') + + kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) + + if keep not in ('last', 'first', False): + raise ValueError('keep must be either "first", "last" or False') + + if keep == 'last': + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + for i in range(n - 1, -1, -1): + # equivalent: range(n)[::-1], which cython doesn't like in nogil + value = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, value, &ret) + out[i] = ret == 0 + + elif keep == 'first': + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + for i in range(n): + value = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, value, &ret) + out[i] = ret == 0 + + else: + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + for i in range(n): + value = {{to_c_type}}(values[i]) + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: + out[table.vals[k]] = 1 + out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.vals[k] = i + out[i] = 0 + + kh_destroy_{{ttype}}(table) + return out + + +# ---------------------------------------------------------------------- +# Membership +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values): +{{else}} +cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): +{{endif}} + """ + Return boolean of values in arr on an + element by-element basis + + Parameters + ---------- + arr : {{dtype}} ndarray + values : {{dtype}} ndarray + + Returns + ------- + boolean ndarray len of (arr) + """ + cdef: + Py_ssize_t i, n + khiter_t k + int ret = 0 + ndarray[uint8_t] result + + {{if dtype == "object"}} + PyObject* val + {{else}} + {{c_type}} val + {{endif}} + + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + + # construct the table + n = len(values) + kh_resize_{{ttype}}(table, n) + + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + for i in range(n): + val = {{to_c_type}}(values[i]) + kh_put_{{ttype}}(table, val, &ret) + + # test membership + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + for i in range(n): + val = {{to_c_type}}(arr[i]) + k = kh_get_{{ttype}}(table, val) + result[i] = (k != table.n_buckets) + + kh_destroy_{{ttype}}(table) + return result.view(np.bool_) + +# ---------------------------------------------------------------------- +# Mode Computations +# ---------------------------------------------------------------------- + +{{endfor}} + + +ctypedef fused htfunc_t: + numeric_object_t + complex128_t + complex64_t + + +cpdef value_count(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None): + if htfunc_t is object: + return value_count_object(values, dropna, mask=mask) + + elif htfunc_t is int8_t: + return value_count_int8(values, dropna, mask=mask) + elif htfunc_t is int16_t: + return value_count_int16(values, dropna, mask=mask) + elif htfunc_t is int32_t: + return value_count_int32(values, dropna, mask=mask) + elif htfunc_t is int64_t: + return value_count_int64(values, dropna, mask=mask) + + elif htfunc_t is uint8_t: + return value_count_uint8(values, dropna, mask=mask) + elif htfunc_t is uint16_t: + return value_count_uint16(values, dropna, mask=mask) + elif htfunc_t is uint32_t: + return value_count_uint32(values, dropna, mask=mask) + elif htfunc_t is uint64_t: + return value_count_uint64(values, dropna, mask=mask) + + elif htfunc_t is float64_t: + return value_count_float64(values, dropna, mask=mask) + elif htfunc_t is float32_t: + return value_count_float32(values, dropna, mask=mask) + + elif htfunc_t is complex128_t: + return value_count_complex128(values, dropna, mask=mask) + elif htfunc_t is complex64_t: + return value_count_complex64(values, dropna, mask=mask) + + else: + raise TypeError(values.dtype) + + +cpdef duplicated(ndarray[htfunc_t] values, object keep="first"): + if htfunc_t is object: + return duplicated_object(values, keep) + + elif htfunc_t is int8_t: + return duplicated_int8(values, keep) + elif htfunc_t is int16_t: + return duplicated_int16(values, keep) + elif htfunc_t is int32_t: + return duplicated_int32(values, keep) + elif htfunc_t is int64_t: + return duplicated_int64(values, keep) + + elif htfunc_t is uint8_t: + return duplicated_uint8(values, keep) + elif htfunc_t is uint16_t: + return duplicated_uint16(values, keep) + elif htfunc_t is uint32_t: + return duplicated_uint32(values, keep) + elif htfunc_t is uint64_t: + return duplicated_uint64(values, keep) + + elif htfunc_t is float64_t: + return duplicated_float64(values, keep) + elif htfunc_t is float32_t: + return duplicated_float32(values, keep) + + elif htfunc_t is complex128_t: + return duplicated_complex128(values, keep) + elif htfunc_t is complex64_t: + return duplicated_complex64(values, keep) + + else: + raise TypeError(values.dtype) + + +cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values): + if htfunc_t is object: + return ismember_object(arr, values) + + elif htfunc_t is int8_t: + return ismember_int8(arr, values) + elif htfunc_t is int16_t: + return ismember_int16(arr, values) + elif htfunc_t is int32_t: + return ismember_int32(arr, values) + elif htfunc_t is int64_t: + return ismember_int64(arr, values) + + elif htfunc_t is uint8_t: + return ismember_uint8(arr, values) + elif htfunc_t is uint16_t: + return ismember_uint16(arr, values) + elif htfunc_t is uint32_t: + return ismember_uint32(arr, values) + elif htfunc_t is uint64_t: + return ismember_uint64(arr, values) + + elif htfunc_t is float64_t: + return ismember_float64(arr, values) + elif htfunc_t is float32_t: + return ismember_float32(arr, values) + + elif htfunc_t is complex128_t: + return ismember_complex128(arr, values) + elif htfunc_t is complex64_t: + return ismember_complex64(arr, values) + + else: + raise TypeError(values.dtype) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None): + # TODO(cython3): use const htfunct_t[:] + + cdef: + ndarray[htfunc_t] keys + ndarray[htfunc_t] modes + + int64_t[::1] counts + int64_t count, max_count = -1 + Py_ssize_t nkeys, k, j = 0 + + keys, counts = value_count(values, dropna, mask=mask) + nkeys = len(keys) + + modes = np.empty(nkeys, dtype=values.dtype) + + if htfunc_t is not object: + with nogil: + for k in range(nkeys): + count = counts[k] + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = keys[k] + else: + for k in range(nkeys): + count = counts[k] + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = keys[k] + + return modes[:j + 1] + + +{{py: + +# name, dtype, ttype, c_type +dtypes = [('Int64', 'int64', 'int64', 'int64_t'), + ('Int32', 'int32', 'int32', 'int32_t'), ] + +}} + +{{for name, dtype, ttype, c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray: + """ + Indices of the first occurrences of the unique labels + *excluding* -1. equivalent to: + np.unique(labels, return_index=True)[1] + """ + cdef: + int ret = 0 + Py_ssize_t i, n = len(labels) + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + {{name}}Vector idx = {{name}}Vector() + ndarray[{{c_type}}, ndim=1] arr + {{name}}VectorData *ud = idx.data + + kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) + + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, labels[i], &ret) + if ret != 0: + if needs_resize(ud): + with gil: + idx.resize() + append_data_{{ttype}}(ud, i) + + kh_destroy_{{ttype}}(table) + + arr = idx.to_array() + arr = arr[np.asarray(labels)[arr].argsort()] + + return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr + +{{endfor}} diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi new file mode 100644 index 00000000..575f8384 --- /dev/null +++ b/pandas/_libs/index.pyi @@ -0,0 +1,87 @@ +import numpy as np + +from pandas._typing import npt + +from pandas import MultiIndex +from pandas.core.arrays import ExtensionArray + +class IndexEngine: + over_size_threshold: bool + def __init__(self, values: np.ndarray): ... + def __contains__(self, val: object) -> bool: ... + # -> int | slice | np.ndarray[bool] + def get_loc(self, val: object) -> int | slice | np.ndarray: ... + def sizeof(self, deep: bool = ...) -> int: ... + def __sizeof__(self) -> int: ... + @property + def is_unique(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + @property + def is_monotonic_decreasing(self) -> bool: ... + @property + def is_mapping_populated(self) -> bool: ... + def clear_mapping(self): ... + def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, + targets: np.ndarray, + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + +class Float64Engine(IndexEngine): ... +class Float32Engine(IndexEngine): ... +class Complex128Engine(IndexEngine): ... +class Complex64Engine(IndexEngine): ... +class Int64Engine(IndexEngine): ... +class Int32Engine(IndexEngine): ... +class Int16Engine(IndexEngine): ... +class Int8Engine(IndexEngine): ... +class UInt64Engine(IndexEngine): ... +class UInt32Engine(IndexEngine): ... +class UInt16Engine(IndexEngine): ... +class UInt8Engine(IndexEngine): ... +class ObjectEngine(IndexEngine): ... +class DatetimeEngine(Int64Engine): ... +class TimedeltaEngine(DatetimeEngine): ... +class PeriodEngine(Int64Engine): ... +class BoolEngine(UInt8Engine): ... + +class BaseMultiIndexCodesEngine: + levels: list[np.ndarray] + offsets: np.ndarray # ndarray[uint64_t, ndim=1] + def __init__( + self, + levels: list[np.ndarray], # all entries hashable + labels: list[np.ndarray], # all entries integer-dtyped + offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] + ): ... + def get_indexer( + self, + target: npt.NDArray[np.object_], + ) -> npt.NDArray[np.intp]: ... + def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ... + def get_indexer_with_fill( + self, + target: np.ndarray, # np.ndarray[object] of tuples + values: np.ndarray, # np.ndarray[object] of tuples + method: str, + limit: int | None, + ) -> npt.NDArray[np.intp]: ... + +class ExtensionEngine: + def __init__(self, values: ExtensionArray): ... + def __contains__(self, val: object) -> bool: ... + def get_loc(self, val: object) -> int | slice | np.ndarray: ... + def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, + targets: np.ndarray, + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + @property + def is_unique(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + @property + def is_monotonic_decreasing(self) -> bool: ... + def sizeof(self, deep: bool = ...) -> int: ... + def clear_mapping(self): ... diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx new file mode 100644 index 00000000..617760c2 --- /dev/null +++ b/pandas/_libs/index.pyx @@ -0,0 +1,1080 @@ +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.hashtable cimport HashTable +from pandas._libs.tslibs.nattype cimport c_NaT as NaT +from pandas._libs.tslibs.period cimport is_period_object +from pandas._libs.tslibs.timedeltas cimport _Timedelta +from pandas._libs.tslibs.timestamps cimport _Timestamp + +from pandas._libs import ( + algos, + hashtable as _hash, +) + +from pandas._libs.lib cimport eq_NA_compat +from pandas._libs.missing cimport ( + C_NA as NA, + checknull, + is_matching_na, +) + + +cdef inline bint is_definitely_invalid_key(object val): + try: + hash(val) + except TypeError: + return True + return False + + +cdef ndarray _get_bool_indexer(ndarray values, object val): + """ + Return a ndarray[bool] of locations where val matches self.values. + + If val is not NA, this is equivalent to `self.values == val` + """ + # Caller is responsible for ensuring _check_type has already been called + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + Py_ssize_t i + object item + + if values.descr.type_num == cnp.NPY_OBJECT: + # i.e. values.dtype == object + if not checknull(val): + indexer = eq_NA_compat(values, val) + + else: + # We need to check for _matching_ NA values + indexer = np.empty(len(values), dtype=np.uint8) + + for i in range(len(values)): + item = values[i] + indexer[i] = is_matching_na(item, val) + + else: + if util.is_nan(val): + indexer = np.isnan(values) + else: + indexer = values == val + + return indexer.view(bool) + + +# Don't populate hash tables in monotonic indexes larger than this +_SIZE_CUTOFF = 1_000_000 + + +cdef _unpack_bool_indexer(ndarray[uint8_t, ndim=1, cast=True] indexer, object val): + """ + Possibly unpack a boolean mask to a single indexer. + """ + # Returns ndarray[bool] or int + cdef: + ndarray[intp_t, ndim=1] found + int count + + found = np.where(indexer)[0] + count = len(found) + + if count > 1: + return indexer + if count == 1: + return int(found[0]) + + raise KeyError(val) + + +@cython.freelist(32) +cdef class IndexEngine: + + cdef readonly: + ndarray values + HashTable mapping + bint over_size_threshold + + cdef: + bint unique, monotonic_inc, monotonic_dec + bint need_monotonic_check, need_unique_check + object _np_type + + def __init__(self, ndarray values): + self.values = values + + self.over_size_threshold = len(values) >= _SIZE_CUTOFF + self.clear_mapping() + self._np_type = values.dtype.type + + def __contains__(self, val: object) -> bool: + hash(val) + try: + self.get_loc(val) + except KeyError: + return False + return True + + cpdef get_loc(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + val = self._check_type(val) + + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(val) + values = self.values + + loc = self._searchsorted_left(val) + if loc >= len(values): + raise KeyError(val) + if values[loc] != val: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(val) + + try: + return self.mapping.get_item(val) + except OverflowError as err: + # GH#41775 OverflowError e.g. if we are uint64 and val is -1 + # or if we are int64 and value is np.iinfo(np.int64).max+1 + # (the uint64 with -1 case should actually be excluded by _check_type) + raise KeyError(val) from err + + cdef Py_ssize_t _searchsorted_left(self, val) except? -1: + """ + See ObjectEngine._searchsorted_left.__doc__. + """ + # Caller is responsible for ensuring _check_type has already been called + loc = self.values.searchsorted(self._np_type(val), side="left") + return loc + + cdef inline _get_loc_duplicates(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t diff, left, right + + if self.is_monotonic_increasing: + values = self.values + try: + left = values.searchsorted(val, side='left') + right = values.searchsorted(val, side='right') + except TypeError: + # e.g. GH#29189 get_loc(None) with a Float64Index + # 2021-09-29 Now only reached for object-dtype + raise KeyError(val) + + diff = right - left + if diff == 0: + raise KeyError(val) + elif diff == 1: + return left + else: + return slice(left, right) + + return self._maybe_get_bool_indexer(val) + + cdef _maybe_get_bool_indexer(self, object val): + # Returns ndarray[bool] or int + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + + indexer = _get_bool_indexer(self.values, val) + return _unpack_bool_indexer(indexer, val) + + def sizeof(self, deep: bool = False) -> int: + """ return the sizeof our mapping """ + if not self.is_mapping_populated: + return 0 + return self.mapping.sizeof(deep=deep) + + def __sizeof__(self) -> int: + return self.sizeof() + + @property + def is_unique(self) -> bool: + if self.need_unique_check: + self._do_unique_check() + + return self.unique == 1 + + cdef inline _do_unique_check(self): + + # this de-facto the same + self._ensure_mapping_populated() + + @property + def is_monotonic_increasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_inc == 1 + + @property + def is_monotonic_decreasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_dec == 1 + + cdef inline _do_monotonic_check(self): + cdef: + bint is_unique + try: + values = self.values + self.monotonic_inc, self.monotonic_dec, is_unique = \ + self._call_monotonic(values) + except TypeError: + self.monotonic_inc = 0 + self.monotonic_dec = 0 + is_unique = 0 + + self.need_monotonic_check = 0 + + # we can only be sure of uniqueness if is_unique=1 + if is_unique: + self.unique = 1 + self.need_unique_check = 0 + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=False) + + cdef _make_hash_table(self, Py_ssize_t n): + raise NotImplementedError # pragma: no cover + + cdef _check_type(self, object val): + hash(val) + return val + + @property + def is_mapping_populated(self) -> bool: + return self.mapping is not None + + cdef inline _ensure_mapping_populated(self): + # this populates the mapping + # if its not already populated + # also satisfies the need_unique_check + + if not self.is_mapping_populated: + + values = self.values + self.mapping = self._make_hash_table(len(values)) + self.mapping.map_locations(values) + + if len(self.mapping) == len(values): + self.unique = 1 + + self.need_unique_check = 0 + + def clear_mapping(self): + self.mapping = None + self.need_monotonic_check = 1 + self.need_unique_check = 1 + + self.unique = 0 + self.monotonic_inc = 0 + self.monotonic_dec = 0 + + def get_indexer(self, ndarray values) -> np.ndarray: + self._ensure_mapping_populated() + return self.mapping.lookup(values) + + def get_indexer_non_unique(self, ndarray targets): + """ + Return an indexer suitable for taking from a non unique index + return the labels in the same order as the target + and a missing indexer into the targets (which correspond + to the -1 indices in the results + + Returns + ------- + indexer : np.ndarray[np.intp] + missing : np.ndarray[np.intp] + """ + cdef: + ndarray values + ndarray[intp_t] result, missing + set stargets, remaining_stargets, found_nas + dict d = {} + object val + Py_ssize_t count = 0, count_missing = 0 + Py_ssize_t i, j, n, n_t, n_alloc, start, end + bint check_na_values = False + + values = self.values + stargets = set(targets) + + n = len(values) + n_t = len(targets) + if n > 10_000: + n_alloc = 10_000 + else: + n_alloc = n + + result = np.empty(n_alloc, dtype=np.intp) + missing = np.empty(n_t, dtype=np.intp) + + # map each starget to its position in the index + if ( + stargets and + len(stargets) < 5 and + not any([checknull(t) for t in stargets]) and + self.is_monotonic_increasing + ): + # if there are few enough stargets and the index is monotonically + # increasing, then use binary search for each starget + remaining_stargets = set() + for starget in stargets: + try: + start = values.searchsorted(starget, side='left') + end = values.searchsorted(starget, side='right') + except TypeError: # e.g. if we tried to search for string in int array + remaining_stargets.add(starget) + else: + if start != end: + d[starget] = list(range(start, end)) + + stargets = remaining_stargets + + if stargets: + # otherwise, map by iterating through all items in the index + + # short-circuit na check + if values.dtype == object: + check_na_values = True + # keep track of nas in values + found_nas = set() + + for i in range(n): + val = values[i] + + # GH#43870 + # handle lookup for nas + # (ie. np.nan, float("NaN"), Decimal("NaN"), dt64nat, td64nat) + if check_na_values and checknull(val): + match = [na for na in found_nas if is_matching_na(val, na)] + + # matching na not found + if not len(match): + found_nas.add(val) + + # add na to stargets to utilize `in` for stargets/d lookup + match_stargets = [ + x for x in stargets if is_matching_na(val, x) + ] + + if len(match_stargets): + # add our 'standardized' na + stargets.add(val) + + # matching na found + else: + assert len(match) == 1 + val = match[0] + + if val in stargets: + if val not in d: + d[val] = [] + d[val].append(i) + + for i in range(n_t): + val = targets[i] + + # ensure there are nas in values before looking for a matching na + if check_na_values and checknull(val): + match = [na for na in found_nas if is_matching_na(val, na)] + if len(match): + assert len(match) == 1 + val = match[0] + + # found + if val in d: + key = val + + for j in d[key]: + + # realloc if needed + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + + result[count] = j + count += 1 + + # value not found + else: + + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + result[count] = -1 + count += 1 + missing[count_missing] = i + count_missing += 1 + + return result[0:count], missing[0:count_missing] + + +cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: + # GH#1757 ndarray.searchsorted is not safe to use with array of tuples + # (treats a tuple `val` as a sequence of keys instead of a single key), + # so we implement something similar. + # This is equivalent to the stdlib's bisect.bisect_left + + cdef: + Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1 + object pval + + if hi == 0 or (hi > 0 and val > values[hi]): + return len(values) + + while lo < hi: + mid = (lo + hi) // 2 + pval = values[mid] + if val < pval: + hi = mid + elif val > pval: + lo = mid + 1 + else: + while mid > 0 and val == values[mid - 1]: + mid -= 1 + return mid + + if val <= values[mid]: + return mid + else: + return mid + 1 + + +cdef class ObjectEngine(IndexEngine): + """ + Index Engine for use with object-dtype Index, namely the base class Index. + """ + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.PyObjectHashTable(n) + + cdef Py_ssize_t _searchsorted_left(self, val) except? -1: + # using values.searchsorted here would treat a tuple `val` as a sequence + # instead of a single key, so we use a different implementation + try: + loc = _bin_search(self.values, val) + except TypeError as err: + raise KeyError(val) from err + return loc + + +cdef class DatetimeEngine(Int64Engine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + # NB: caller is responsible for ensuring tzawareness compat + # before we get here + if not (isinstance(scalar, _Timestamp) or scalar is NaT): + raise TypeError(scalar) + return scalar.value + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + self._unbox_scalar(val) + try: + self.get_loc(val) + return True + except KeyError: + return False + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=True) + + cpdef get_loc(self, object val): + # NB: the caller is responsible for ensuring that we are called + # with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine) + + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + try: + conv = self._unbox_scalar(val) + except TypeError: + raise KeyError(val) + + # Welcome to the spaghetti factory + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(conv) + values = self.values + + loc = values.searchsorted(conv, side='left') + + if loc == len(values) or values[loc] != conv: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(conv) + + try: + return self.mapping.get_item(conv) + except KeyError: + raise KeyError(val) + + +cdef class TimedeltaEngine(DatetimeEngine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + if not (isinstance(scalar, _Timedelta) or scalar is NaT): + raise TypeError(scalar) + return scalar.value + + +cdef class PeriodEngine(Int64Engine): + + cdef int64_t _unbox_scalar(self, scalar) except? -1: + if scalar is NaT: + return scalar.value + if is_period_object(scalar): + # NB: we assume that we have the correct freq here. + return scalar.ordinal + raise TypeError(scalar) + + cpdef get_loc(self, object val): + # NB: the caller is responsible for ensuring that we are called + # with either a Period or NaT + cdef: + int64_t conv + + try: + conv = self._unbox_scalar(val) + except TypeError: + raise KeyError(val) + + return Int64Engine.get_loc(self, conv) + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=True) + + +cdef class BaseMultiIndexCodesEngine: + """ + Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which + represent each label in a MultiIndex as an integer, by juxtaposing the bits + encoding each level, with appropriate offsets. + + For instance: if 3 levels have respectively 3, 6 and 1 possible values, + then their labels can be represented using respectively 2, 3 and 1 bits, + as follows: + _ _ _ _____ _ __ __ __ + |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level) + — — — ————— — —— —— —— + |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level) + — — — ————— — —— —— —— + |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels) + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ + and the resulting unsigned integer representation will be: + _ _ _ _____ _ __ __ __ __ __ __ + |0|0|0| ... |0|c0|b2|b1|b0|a1|a0| + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ + + Offsets are calculated at initialization, labels are transformed by method + _codes_to_ints. + + Keys are located by first locating each component against the respective + level, then locating (the integer representation of) codes. + """ + def __init__(self, object levels, object labels, + ndarray[uint64_t, ndim=1] offsets): + """ + Parameters + ---------- + levels : list-like of numpy arrays + Levels of the MultiIndex. + labels : list-like of numpy arrays of integer dtype + Labels of the MultiIndex. + offsets : numpy array of uint64 dtype + Pre-calculated offsets, one for each level of the index. + """ + self.levels = levels + self.offsets = offsets + + # Transform labels in a single array, and add 1 so that we are working + # with positive integers (-1 for NaN becomes 0): + codes = (np.array(labels, dtype='int64').T + 1).astype('uint64', + copy=False) + + # Map each codes combination in the index to an integer unambiguously + # (no collisions possible), based on the "offsets", which describe the + # number of bits to switch labels for each level: + lab_ints = self._codes_to_ints(codes) + + # Initialize underlying index (e.g. libindex.UInt64Engine) with + # integers representing labels: we will use its get_loc and get_indexer + self._base.__init__(self, lab_ints) + + def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray: + raise NotImplementedError("Implemented by subclass") # pragma: no cover + + def _extract_level_codes(self, target) -> np.ndarray: + """ + Map the requested list of (tuple) keys to their integer representations + for searching in the underlying integer index. + + Parameters + ---------- + target : MultiIndex + + Returns + ------ + int_keys : 1-dimensional array of dtype uint64 or object + Integers representing one combination each + """ + zt = [target._get_level_values(i) for i in range(target.nlevels)] + level_codes = [lev.get_indexer_for(codes) + 1 for lev, codes + in zip(self.levels, zt)] + return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) + + def get_indexer(self, target: np.ndarray) -> np.ndarray: + """ + Returns an array giving the positions of each value of `target` in + `self.values`, where -1 represents a value in `target` which does not + appear in `self.values` + + Parameters + ---------- + target : np.ndarray + + Returns + ------- + np.ndarray[intp_t, ndim=1] of the indexer of `target` into + `self.values` + """ + return self._base.get_indexer(self, target) + + def get_indexer_with_fill(self, ndarray target, ndarray values, + str method, object limit) -> np.ndarray: + """ + Returns an array giving the positions of each value of `target` in + `values`, where -1 represents a value in `target` which does not + appear in `values` + + If `method` is "backfill" then the position for a value in `target` + which does not appear in `values` is that of the next greater value + in `values` (if one exists), and -1 if there is no such value. + + Similarly, if the method is "pad" then the position for a value in + `target` which does not appear in `values` is that of the next smaller + value in `values` (if one exists), and -1 if there is no such value. + + Parameters + ---------- + target: ndarray[object] of tuples + need not be sorted, but all must have the same length, which must be + the same as the length of all tuples in `values` + values : ndarray[object] of tuples + must be sorted and all have the same length. Should be the set of + the MultiIndex's values. + method: string + "backfill" or "pad" + limit: int or None + if provided, limit the number of fills to this value + + Returns + ------- + np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`, + filled with the `method` (and optionally `limit`) specified + """ + assert method in ("backfill", "pad") + cdef: + int64_t i, j, next_code + int64_t num_values, num_target_values + ndarray[int64_t, ndim=1] target_order + ndarray[object, ndim=1] target_values + ndarray[int64_t, ndim=1] new_codes, new_target_codes + ndarray[intp_t, ndim=1] sorted_indexer + + target_order = np.argsort(target).astype('int64') + target_values = target[target_order] + num_values, num_target_values = len(values), len(target_values) + new_codes, new_target_codes = ( + np.empty((num_values,)).astype('int64'), + np.empty((num_target_values,)).astype('int64'), + ) + + # `values` and `target_values` are both sorted, so we walk through them + # and memoize the (ordered) set of indices in the (implicit) merged-and + # sorted list of the two which belong to each of them + # the effect of this is to create a factorization for the (sorted) + # merger of the index values, where `new_codes` and `new_target_codes` + # are the subset of the factors which appear in `values` and `target`, + # respectively + i, j, next_code = 0, 0, 0 + while i < num_values and j < num_target_values: + val, target_val = values[i], target_values[j] + if val <= target_val: + new_codes[i] = next_code + i += 1 + if target_val <= val: + new_target_codes[j] = next_code + j += 1 + next_code += 1 + + # at this point, at least one should have reached the end + # the remaining values of the other should be added to the end + assert i == num_values or j == num_target_values + while i < num_values: + new_codes[i] = next_code + i += 1 + next_code += 1 + while j < num_target_values: + new_target_codes[j] = next_code + j += 1 + next_code += 1 + + # get the indexer, and undo the sorting of `target.values` + algo = algos.backfill if method == "backfill" else algos.pad + sorted_indexer = algo(new_codes, new_target_codes, limit=limit) + return sorted_indexer[np.argsort(target_order)] + + def get_loc(self, object key): + if is_definitely_invalid_key(key): + raise TypeError(f"'{key}' is an invalid key") + if not isinstance(key, tuple): + raise KeyError(key) + try: + indices = [0 if checknull(v) else lev.get_loc(v) + 1 + for lev, v in zip(self.levels, key)] + except KeyError: + raise KeyError(key) + + # Transform indices into single integer: + lab_int = self._codes_to_ints(np.array(indices, dtype='uint64')) + + return self._base.get_loc(self, lab_int) + + def get_indexer_non_unique(self, target: np.ndarray) -> np.ndarray: + indexer = self._base.get_indexer_non_unique(self, target) + + return indexer + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + # Default __contains__ looks in the underlying mapping, which in this + # case only contains integer representations. + try: + self.get_loc(val) + return True + except (KeyError, TypeError, ValueError): + return False + + +# Generated from template. +include "index_class_helper.pxi" + + +cdef class BoolEngine(UInt8Engine): + cdef _check_type(self, object val): + if not util.is_bool_object(val): + raise KeyError(val) + return val + + +@cython.internal +@cython.freelist(32) +cdef class SharedEngine: + cdef readonly: + object values # ExtensionArray + bint over_size_threshold + + cdef: + bint unique, monotonic_inc, monotonic_dec + bint need_monotonic_check, need_unique_check + + def __contains__(self, val: object) -> bool: + # We assume before we get here: + # - val is hashable + try: + self.get_loc(val) + return True + except KeyError: + return False + + def clear_mapping(self): + # for compat with IndexEngine + pass + + @property + def is_unique(self) -> bool: + if self.need_unique_check: + arr = self.values.unique() + self.unique = len(arr) == len(self.values) + + self.need_unique_check = False + return self.unique + + cdef _do_monotonic_check(self): + raise NotImplementedError + + @property + def is_monotonic_increasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_inc == 1 + + @property + def is_monotonic_decreasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_dec == 1 + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=False) + + def sizeof(self, deep: bool = False) -> int: + """ return the sizeof our mapping """ + return 0 + + def __sizeof__(self) -> int: + return self.sizeof() + + cdef _check_type(self, object obj): + raise NotImplementedError + + cpdef get_loc(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + self._check_type(val) + + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(val) + + values = self.values + + loc = self._searchsorted_left(val) + if loc >= len(values): + raise KeyError(val) + if values[loc] != val: + raise KeyError(val) + return loc + + if not self.unique: + return self._get_loc_duplicates(val) + + return self._get_loc_duplicates(val) + + cdef inline _get_loc_duplicates(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] + cdef: + Py_ssize_t diff + + if self.is_monotonic_increasing: + values = self.values + try: + left = values.searchsorted(val, side='left') + right = values.searchsorted(val, side='right') + except TypeError: + # e.g. GH#29189 get_loc(None) with a Float64Index + raise KeyError(val) + + diff = right - left + if diff == 0: + raise KeyError(val) + elif diff == 1: + return left + else: + return slice(left, right) + + return self._maybe_get_bool_indexer(val) + + cdef Py_ssize_t _searchsorted_left(self, val) except? -1: + """ + See ObjectEngine._searchsorted_left.__doc__. + """ + try: + loc = self.values.searchsorted(val, side="left") + except TypeError as err: + # GH#35788 e.g. val=None with float64 values + raise KeyError(val) + return loc + + cdef ndarray _get_bool_indexer(self, val): + raise NotImplementedError + + cdef _maybe_get_bool_indexer(self, object val): + # Returns ndarray[bool] or int + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + + indexer = self._get_bool_indexer(val) + return _unpack_bool_indexer(indexer, val) + + def get_indexer(self, values) -> np.ndarray: + # values : type(self.values) + # Note: we only get here with self.is_unique + cdef: + Py_ssize_t i, N = len(values) + + res = np.empty(N, dtype=np.intp) + + for i in range(N): + val = values[i] + try: + loc = self.get_loc(val) + # Because we are unique, loc should always be an integer + except KeyError: + loc = -1 + else: + assert util.is_integer_object(loc), (loc, val) + res[i] = loc + + return res + + def get_indexer_non_unique(self, targets): + """ + Return an indexer suitable for taking from a non unique index + return the labels in the same order as the target + and a missing indexer into the targets (which correspond + to the -1 indices in the results + Parameters + ---------- + targets : type(self.values) + Returns + ------- + indexer : np.ndarray[np.intp] + missing : np.ndarray[np.intp] + """ + cdef: + Py_ssize_t i, N = len(targets) + + indexer = [] + missing = [] + + # See also IntervalIndex.get_indexer_pointwise + for i in range(N): + val = targets[i] + + try: + locs = self.get_loc(val) + except KeyError: + locs = np.array([-1], dtype=np.intp) + missing.append(i) + else: + if isinstance(locs, slice): + # Only needed for get_indexer_non_unique + locs = np.arange(locs.start, locs.stop, locs.step, dtype=np.intp) + elif util.is_integer_object(locs): + locs = np.array([locs], dtype=np.intp) + else: + assert locs.dtype.kind == "b" + locs = locs.nonzero()[0] + + indexer.append(locs) + + try: + indexer = np.concatenate(indexer, dtype=np.intp) + except TypeError: + # numpy<1.20 doesn't accept dtype keyword + indexer = np.concatenate(indexer).astype(np.intp, copy=False) + missing = np.array(missing, dtype=np.intp) + + return indexer, missing + + +cdef class ExtensionEngine(SharedEngine): + def __init__(self, values: "ExtensionArray"): + self.values = values + + self.over_size_threshold = len(values) >= _SIZE_CUTOFF + self.need_unique_check = True + self.need_monotonic_check = True + self.need_unique_check = True + + cdef _do_monotonic_check(self): + cdef: + bint is_unique + + values = self.values + if values._hasna: + self.monotonic_inc = 0 + self.monotonic_dec = 0 + + nunique = len(values.unique()) + self.unique = nunique == len(values) + self.need_unique_check = 0 + return + + try: + ranks = values._rank() + + except TypeError: + self.monotonic_inc = 0 + self.monotonic_dec = 0 + is_unique = 0 + else: + self.monotonic_inc, self.monotonic_dec, is_unique = \ + self._call_monotonic(ranks) + + self.need_monotonic_check = 0 + + # we can only be sure of uniqueness if is_unique=1 + if is_unique: + self.unique = 1 + self.need_unique_check = 0 + + cdef ndarray _get_bool_indexer(self, val): + if checknull(val): + return self.values.isna() + + try: + return self.values == val + except TypeError: + # e.g. if __eq__ returns a BooleanArray instead of ndarray[bool] + try: + return (self.values == val).to_numpy(dtype=bool, na_value=False) + except (TypeError, AttributeError) as err: + # e.g. (self.values == val) returned a bool + # see test_get_loc_generator[string[pyarrow]] + # e.g. self.value == val raises TypeError bc generator has no len + # see test_get_loc_generator[string[python]] + raise KeyError from err + + cdef _check_type(self, object val): + hash(val) diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in new file mode 100644 index 00000000..b9c02ba6 --- /dev/null +++ b/pandas/_libs/index_class_helper.pxi.in @@ -0,0 +1,65 @@ +""" +Template for functions of IndexEngine subclasses. + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# IndexEngine Subclass Methods +# ---------------------------------------------------------------------- + +{{py: + +# name, dtype +dtypes = [('Float64', 'float64'), + ('Float32', 'float32'), + ('Int64', 'int64'), + ('Int32', 'int32'), + ('Int16', 'int16'), + ('Int8', 'int8'), + ('UInt64', 'uint64'), + ('UInt32', 'uint32'), + ('UInt16', 'uint16'), + ('UInt8', 'uint8'), + ('Complex64', 'complex64'), + ('Complex128', 'complex128'), + ] +}} + +{{for name, dtype in dtypes}} + + +cdef class {{name}}Engine(IndexEngine): + + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.{{name}}HashTable(n) + + cdef _check_type(self, object val): + {{if name not in {'Float64', 'Float32', 'Complex64', 'Complex128'} }} + if not util.is_integer_object(val): + if util.is_float_object(val): + # Make sure Int64Index.get_loc(2.0) works + if val.is_integer(): + return int(val) + raise KeyError(val) + {{if name.startswith("U")}} + if val < 0: + # cannot have negative values with unsigned int dtype + raise KeyError(val) + {{endif}} + {{elif name not in {'Complex64', 'Complex128'} }} + if not util.is_integer_object(val) and not util.is_float_object(val): + # in particular catch bool and avoid casting True -> 1.0 + raise KeyError(val) + {{else}} + if (not util.is_integer_object(val) + and not util.is_float_object(val) + and not util.is_complex_object(val) + ): + # in particular catch bool and avoid casting True -> 1.0 + raise KeyError(val) + {{endif}} + return val + + +{{endfor}} diff --git a/pandas/_libs/indexing.pyi b/pandas/_libs/indexing.pyi new file mode 100644 index 00000000..b219f991 --- /dev/null +++ b/pandas/_libs/indexing.pyi @@ -0,0 +1,17 @@ +from typing import ( + Generic, + TypeVar, +) + +from pandas.core.indexing import IndexingMixin + +_IndexingMixinT = TypeVar("_IndexingMixinT", bound=IndexingMixin) + +class NDFrameIndexerBase(Generic[_IndexingMixinT]): + name: str + # in practise obj is either a DataFrame or a Series + obj: _IndexingMixinT + + def __init__(self, name: str, obj: _IndexingMixinT) -> None: ... + @property + def ndim(self) -> int: ... diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx new file mode 100644 index 00000000..c274b28b --- /dev/null +++ b/pandas/_libs/indexing.pyx @@ -0,0 +1,28 @@ +cdef class NDFrameIndexerBase: + """ + A base class for _NDFrameIndexer for fast instantiation and attribute access. + """ + cdef: + Py_ssize_t _ndim + + cdef public: + str name + object obj + + def __init__(self, name: str, obj): + self.obj = obj + self.name = name + self._ndim = -1 + + @property + def ndim(self) -> int: + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim == -1: + ndim = self._ndim = self.obj.ndim + if ndim > 2: + raise ValueError( # pragma: no cover + "NDFrameIndexer does not support NDFrame objects with ndim > 2" + ) + return ndim diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi new file mode 100644 index 00000000..201c7b7b --- /dev/null +++ b/pandas/_libs/internals.pyi @@ -0,0 +1,85 @@ +from typing import ( + Iterator, + Sequence, + final, + overload, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + T, + npt, +) + +from pandas import Index +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.internals.blocks import Block as B + +def slice_len(slc: slice, objlen: int = ...) -> int: ... +def get_blkno_indexers( + blknos: np.ndarray, # int64_t[:] + group: bool = ..., +) -> list[tuple[int, slice | np.ndarray]]: ... +def get_blkno_placements( + blknos: np.ndarray, + group: bool = ..., +) -> Iterator[tuple[int, BlockPlacement]]: ... +def update_blklocs_and_blknos( + blklocs: npt.NDArray[np.intp], + blknos: npt.NDArray[np.intp], + loc: int, + nblocks: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +@final +class BlockPlacement: + def __init__(self, val: int | slice | np.ndarray): ... + @property + def indexer(self) -> np.ndarray | slice: ... + @property + def as_array(self) -> np.ndarray: ... + @property + def as_slice(self) -> slice: ... + @property + def is_slice_like(self) -> bool: ... + @overload + def __getitem__(self, loc: slice | Sequence[int]) -> BlockPlacement: ... + @overload + def __getitem__(self, loc: int) -> int: ... + def __iter__(self) -> Iterator[int]: ... + def __len__(self) -> int: ... + def delete(self, loc) -> BlockPlacement: ... + def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... + def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ... + +class SharedBlock: + _mgr_locs: BlockPlacement + ndim: int + values: ArrayLike + def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ... + +class NumpyBlock(SharedBlock): + values: np.ndarray + @final + def getitem_block_index(self: T, slicer: slice) -> T: ... + +class NDArrayBackedBlock(SharedBlock): + values: NDArrayBackedExtensionArray + @final + def getitem_block_index(self: T, slicer: slice) -> T: ... + +class Block(SharedBlock): ... + +class BlockManager: + blocks: tuple[B, ...] + axes: list[Index] + _known_consolidated: bool + _is_consolidated: bool + _blknos: np.ndarray + _blklocs: np.ndarray + def __init__( + self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=... + ): ... + def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ... + def _rebuild_blknos_and_blklocs(self) -> None: ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx new file mode 100644 index 00000000..ded161c7 --- /dev/null +++ b/pandas/_libs/internals.pyx @@ -0,0 +1,836 @@ +from collections import defaultdict +import weakref + +cimport cython +from cpython.slice cimport PySlice_GetIndicesEx +from cython cimport Py_ssize_t + + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_INTP, + int64_t, + intp_t, + ndarray, +) + +cnp.import_array() + +from pandas._libs.algos import ensure_int64 + +from pandas._libs.arrays cimport NDArrayBacked +from pandas._libs.util cimport ( + is_array, + is_integer_object, +) + + +@cython.final +@cython.freelist(32) +cdef class BlockPlacement: + # __slots__ = '_as_slice', '_as_array', '_len' + cdef: + slice _as_slice + ndarray _as_array # Note: this still allows `None`; will be intp_t + bint _has_slice, _has_array, _is_known_slice_like + + def __cinit__(self, val): + cdef: + slice slc + + self._as_slice = None + self._as_array = None + self._has_slice = False + self._has_array = False + + if is_integer_object(val): + slc = slice(val, val + 1, 1) + self._as_slice = slc + self._has_slice = True + elif isinstance(val, slice): + slc = slice_canonize(val) + + if slc.start != slc.stop: + self._as_slice = slc + self._has_slice = True + else: + arr = np.empty(0, dtype=np.intp) + self._as_array = arr + self._has_array = True + else: + # Cython memoryview interface requires ndarray to be writeable. + if ( + not is_array(val) + or not cnp.PyArray_ISWRITEABLE(val) + or (val).descr.type_num != cnp.NPY_INTP + ): + arr = np.require(val, dtype=np.intp, requirements='W') + else: + arr = val + # Caller is responsible for ensuring arr.ndim == 1 + self._as_array = arr + self._has_array = True + + def __str__(self) -> str: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + v = self._as_slice + else: + v = self._as_array + + return f"{type(self).__name__}({v})" + + def __repr__(self) -> str: + return str(self) + + def __len__(self) -> int: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return slice_len(s) + else: + return len(self._as_array) + + def __iter__(self): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ + + if s is not None: + start, stop, step, _ = slice_get_indices_ex(s) + return iter(range(start, stop, step)) + else: + return iter(self._as_array) + + @property + def as_slice(self) -> slice: + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return s + else: + raise TypeError("Not slice-like") + + @property + def indexer(self): + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + return s + else: + return self._as_array + + @property + def as_array(self) -> np.ndarray: + cdef: + Py_ssize_t start, stop, end, _ + + if not self._has_array: + start, stop, step, _ = slice_get_indices_ex(self._as_slice) + # NOTE: this is the C-optimized equivalent of + # `np.arange(start, stop, step, dtype=np.intp)` + self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INTP) + self._has_array = True + + return self._as_array + + @property + def is_slice_like(self) -> bool: + cdef: + slice s = self._ensure_has_slice() + + return s is not None + + def __getitem__(self, loc): + cdef: + slice s = self._ensure_has_slice() + + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] + + if not isinstance(val, slice) and val.ndim == 0: + return val + + return BlockPlacement(val) + + def delete(self, loc) -> BlockPlacement: + return BlockPlacement(np.delete(self.as_array, loc, axis=0)) + + def append(self, others) -> BlockPlacement: + if not len(others): + return self + + return BlockPlacement( + np.concatenate([self.as_array] + [o.as_array for o in others]) + ) + + cdef BlockPlacement iadd(self, other): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step + + if is_integer_object(other) and s is not None: + other_int = other + + if other_int == 0: + # BlockPlacement is treated as immutable + return self + + start, stop, step, _ = slice_get_indices_ex(s) + start += other_int + stop += other_int + + if (step > 0 and start < 0) or (step < 0 and stop < step): + raise ValueError("iadd causes length change") + + if stop < 0: + val = slice(start, None, step) + else: + val = slice(start, stop, step) + + return BlockPlacement(val) + else: + newarr = self.as_array + other + if (newarr < 0).any(): + raise ValueError("iadd causes length change") + + val = newarr + return BlockPlacement(val) + + def add(self, other) -> BlockPlacement: + # We can get here with int or ndarray + return self.iadd(other) + + cdef slice _ensure_has_slice(self): + if not self._has_slice: + self._as_slice = indexer_as_slice(self._as_array) + self._has_slice = True + + return self._as_slice + + cpdef BlockPlacement increment_above(self, Py_ssize_t loc): + """ + Increment any entries of 'loc' or above by one. + """ + cdef: + slice nv, s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step + ndarray[intp_t, ndim=1] newarr + + if s is not None: + # see if we are either all-above or all-below, each of which + # have fastpaths available. + + start, stop, step, _ = slice_get_indices_ex(s) + + if start < loc and stop <= loc: + # We are entirely below, nothing to increment + return self + + if start >= loc and stop >= loc: + # We are entirely above, we can efficiently increment out slice + nv = slice(start + 1, stop + 1, step) + return BlockPlacement(nv) + + if loc == 0: + # fastpath where we know everything is >= 0 + newarr = self.as_array + 1 + return BlockPlacement(newarr) + + newarr = self.as_array.copy() + newarr[newarr >= loc] += 1 + return BlockPlacement(newarr) + + def tile_for_unstack(self, factor: int) -> np.ndarray: + """ + Find the new mgr_locs for the un-stacked version of a Block. + """ + cdef: + slice slc = self._ensure_has_slice() + slice new_slice + ndarray[intp_t, ndim=1] new_placement + + if slc is not None and slc.step == 1: + new_slc = slice(slc.start * factor, slc.stop * factor, 1) + # equiv: np.arange(new_slc.start, new_slc.stop, dtype=np.intp) + new_placement = cnp.PyArray_Arange(new_slc.start, new_slc.stop, 1, NPY_INTP) + else: + # Note: test_pivot_table_empty_aggfunc gets here with `slc is not None` + mapped = [ + # equiv: np.arange(x * factor, (x + 1) * factor, dtype=np.intp) + cnp.PyArray_Arange(x * factor, (x + 1) * factor, 1, NPY_INTP) + for x in self + ] + new_placement = np.concatenate(mapped) + return new_placement + + +cdef slice slice_canonize(slice s): + """ + Convert slice to canonical bounded form. + """ + cdef: + Py_ssize_t start = 0, stop = 0, step = 1 + + if s.step is None: + step = 1 + else: + step = s.step + if step == 0: + raise ValueError("slice step cannot be zero") + + if step > 0: + if s.stop is None: + raise ValueError("unbounded slice") + + stop = s.stop + if s.start is None: + start = 0 + else: + start = s.start + if start > stop: + start = stop + elif step < 0: + if s.start is None: + raise ValueError("unbounded slice") + + start = s.start + if s.stop is None: + stop = -1 + else: + stop = s.stop + if stop > start: + stop = start + + if start < 0 or (stop < 0 and s.stop is not None and step > 0): + raise ValueError("unbounded slice") + + if stop < 0: + return slice(start, None, step) + else: + return slice(start, stop, step) + + +cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: + """ + Get length of a bounded slice. + + The slice must not have any "open" bounds that would create dependency on + container size, i.e.: + - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` + - if ``s.step < 0``, ``s.start`` is not ``None`` + + Otherwise, the result is unreliable. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc must be slice") # pragma: no cover + + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) + + return length + + +cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX +): + """ + Get (start, stop, step, length) tuple for a slice. + + If `objlen` is not specified, slice must be bounded, otherwise the result + will be wrong. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc should be a slice") # pragma: no cover + + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) + + return start, stop, step, length + + +cdef slice_getitem(slice slc, ind): + cdef: + Py_ssize_t s_start, s_stop, s_step, s_len + Py_ssize_t ind_start, ind_stop, ind_step, ind_len + + s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) + + if isinstance(ind, slice): + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) + + if ind_step > 0 and ind_len == s_len: + # short-cut for no-op slice + if ind_len == s_len: + return slc + + if ind_step < 0: + s_start = s_stop - s_step + ind_step = -ind_step + + s_step *= ind_step + s_stop = s_start + ind_stop * s_step + s_start = s_start + ind_start * s_step + + if s_step < 0 and s_stop < 0: + return slice(s_start, None, s_step) + else: + return slice(s_start, s_stop, s_step) + + else: + # NOTE: + # this is the C-optimized equivalent of + # `np.arange(s_start, s_stop, s_step, dtype=np.intp)[ind]` + return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INTP)[ind] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef slice indexer_as_slice(intp_t[:] vals): + cdef: + Py_ssize_t i, n, start, stop + int64_t d + + if vals is None: + raise TypeError("vals must be ndarray") # pragma: no cover + + n = vals.shape[0] + + if n == 0 or vals[0] < 0: + return None + + if n == 1: + return slice(vals[0], vals[0] + 1, 1) + + if vals[1] < 0: + return None + + # n > 2 + d = vals[1] - vals[0] + + if d == 0: + return None + + for i in range(2, n): + if vals[i] < 0 or vals[i] - vals[i - 1] != d: + return None + + start = vals[0] + stop = start + n * d + if stop < 0 and d < 0: + return slice(start, None, d) + else: + return slice(start, stop, d) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_blkno_indexers( + int64_t[:] blknos, bint group=True +) -> list[tuple[int, slice | np.ndarray]]: + """ + Enumerate contiguous runs of integers in ndarray. + + Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` + pairs for each contiguous run found. + + If `group` is True and there is more than one run for a certain blkno, + ``(blkno, array)`` with an array containing positions of all elements equal + to blkno. + + Returns + ------- + list[tuple[int, slice | np.ndarray]] + """ + # There's blkno in this function's name because it's used in block & + # blockno handling. + cdef: + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff + cnp.npy_intp tot_len + int64_t blkno + object group_dict = defaultdict(list) + ndarray[int64_t, ndim=1] arr + + n = blknos.shape[0] + result = list() + + if n == 0: + return result + + start = 0 + cur_blkno = blknos[start] + + if group is False: + for i in range(1, n): + if blknos[i] != cur_blkno: + result.append((cur_blkno, slice(start, i))) + + start = i + cur_blkno = blknos[i] + + result.append((cur_blkno, slice(start, n))) + else: + for i in range(1, n): + if blknos[i] != cur_blkno: + group_dict[cur_blkno].append((start, i)) + + start = i + cur_blkno = blknos[i] + + group_dict[cur_blkno].append((start, n)) + + for blkno, slices in group_dict.items(): + if len(slices) == 1: + result.append((blkno, slice(slices[0][0], slices[0][1]))) + else: + tot_len = sum(stop - start for start, stop in slices) + # equiv np.empty(tot_len, dtype=np.int64) + arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0) + + i = 0 + for start, stop in slices: + for diff in range(start, stop): + arr[i] = diff + i += 1 + + result.append((blkno, arr)) + + return result + + +def get_blkno_placements(blknos, group: bool = True): + """ + Parameters + ---------- + blknos : np.ndarray[int64] + group : bool, default True + + Returns + ------- + iterator + yield (blkno, BlockPlacement) + """ + blknos = ensure_int64(blknos) + + for blkno, indexer in get_blkno_indexers(blknos, group): + yield blkno, BlockPlacement(indexer) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef update_blklocs_and_blknos( + ndarray[intp_t, ndim=1] blklocs, + ndarray[intp_t, ndim=1] blknos, + Py_ssize_t loc, + intp_t nblocks, +): + """ + Update blklocs and blknos when a new column is inserted at 'loc'. + """ + cdef: + Py_ssize_t i + cnp.npy_intp length = blklocs.shape[0] + 1 + ndarray[intp_t, ndim=1] new_blklocs, new_blknos + + # equiv: new_blklocs = np.empty(length, dtype=np.intp) + new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + + for i in range(loc): + new_blklocs[i] = blklocs[i] + new_blknos[i] = blknos[i] + + new_blklocs[loc] = 0 + new_blknos[loc] = nblocks + + for i in range(loc, length - 1): + new_blklocs[i + 1] = blklocs[i] + new_blknos[i + 1] = blknos[i] + + return new_blklocs, new_blknos + + +def _unpickle_block(values, placement, ndim): + # We have to do some gymnastics b/c "ndim" is keyword-only + + from pandas.core.internals.blocks import new_block + + return new_block(values, placement, ndim=ndim) + + +@cython.freelist(64) +cdef class SharedBlock: + """ + Defining __init__ in a cython class significantly improves performance. + """ + cdef: + public BlockPlacement _mgr_locs + readonly int ndim + + def __cinit__(self, values, placement: BlockPlacement, ndim: int): + """ + Parameters + ---------- + values : np.ndarray or ExtensionArray + We assume maybe_coerce_values has already been called. + placement : BlockPlacement + ndim : int + 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame + """ + self._mgr_locs = placement + self.ndim = ndim + + cpdef __reduce__(self): + args = (self.values, self.mgr_locs.indexer, self.ndim) + return _unpickle_block, args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + + self.mgr_locs = BlockPlacement(state[0]) + self.values = extract_array(state[1], extract_numpy=True) + if len(state) > 2: + # we stored ndim + self.ndim = state[2] + else: + # older pickle + from pandas.core.internals.api import maybe_infer_ndim + + ndim = maybe_infer_ndim(self.values, self.mgr_locs) + self.ndim = ndim + + +cdef class NumpyBlock(SharedBlock): + cdef: + public ndarray values + + def __cinit__(self, ndarray values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + cpdef NumpyBlock getitem_block_index(self, slice slicer): + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + +cdef class NDArrayBackedBlock(SharedBlock): + """ + Block backed by NDArrayBackedExtensionArray + """ + cdef public: + NDArrayBacked values + + def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer): + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + +cdef class Block(SharedBlock): + cdef: + public object values + + def __cinit__(self, object values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + +@cython.freelist(64) +cdef class BlockManager: + cdef: + public tuple blocks + public list axes + public bint _known_consolidated, _is_consolidated + public ndarray _blknos, _blklocs + public list refs + public object parent + + def __cinit__(self, blocks=None, axes=None, refs=None, parent=None, verify_integrity=True): + # None as defaults for unpickling GH#42345 + if blocks is None: + # This adds 1-2 microseconds to DataFrame(np.array([])) + return + + if isinstance(blocks, list): + # Backward compat for e.g. pyarrow + blocks = tuple(blocks) + + self.blocks = blocks + self.axes = axes.copy() # copy to make sure we are not remotely-mutable + self.refs = refs + self.parent = parent + + # Populate known_consolidate, blknos, and blklocs lazily + self._known_consolidated = False + self._is_consolidated = False + self._blknos = None + self._blklocs = None + + # ------------------------------------------------------------------- + # Block Placement + + def _rebuild_blknos_and_blklocs(self) -> None: + """ + Update mgr._blknos / mgr._blklocs. + """ + cdef: + intp_t blkno, i, j + cnp.npy_intp length = self.shape[0] + SharedBlock blk + BlockPlacement bp + ndarray[intp_t, ndim=1] new_blknos, new_blklocs + + # equiv: np.empty(length, dtype=np.intp) + new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) + # equiv: new_blknos.fill(-1) + cnp.PyArray_FILLWBYTE(new_blknos, -1) + cnp.PyArray_FILLWBYTE(new_blklocs, -1) + + for blkno, blk in enumerate(self.blocks): + bp = blk._mgr_locs + # Iterating over `bp` is a faster equivalent to + # new_blknos[bp.indexer] = blkno + # new_blklocs[bp.indexer] = np.arange(len(bp)) + for i, j in enumerate(bp): + new_blknos[j] = blkno + new_blklocs[j] = i + + for i in range(length): + # faster than `for blkno in new_blknos` + # https://github.com/cython/cython/issues/4393 + blkno = new_blknos[i] + + # If there are any -1s remaining, this indicates that our mgr_locs + # are invalid. + if blkno == -1: + raise AssertionError("Gaps in blk ref_locs") + + self._blknos = new_blknos + self._blklocs = new_blklocs + + # ------------------------------------------------------------------- + # Pickle + + cpdef __reduce__(self): + if len(self.axes) == 1: + # SingleBlockManager, __init__ expects Block, axis + args = (self.blocks[0], self.axes[0]) + else: + args = (self.blocks, self.axes) + return type(self), args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, + ) + from pandas.core.internals.managers import ensure_index + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(axes) + + for blk in state["blocks"]: + vals = blk["values"] + # older versions may hold e.g. DatetimeIndex instead of DTA + vals = extract_array(vals, extract_numpy=True) + blk["values"] = ensure_block_shape(vals, ndim=ndim) + + nbs = [ + new_block(blk["values"], blk["mgr_locs"], ndim=ndim) + for blk in state["blocks"] + ] + blocks = tuple(nbs) + self.blocks = blocks + self.axes = axes + + else: # pragma: no cover + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + + def _post_setstate(self) -> None: + self._is_consolidated = False + self._known_consolidated = False + self._rebuild_blknos_and_blklocs() + + # ------------------------------------------------------------------- + # Indexing + + cdef BlockManager _get_index_slice(self, slobj): + cdef: + SharedBlock blk, nb + BlockManager mgr + ndarray blknos, blklocs + + nbs = [] + nrefs = [] + for blk in self.blocks: + nb = blk.getitem_block_index(slobj) + nbs.append(nb) + nrefs.append(weakref.ref(blk)) + + new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] + mgr = type(self)( + tuple(nbs), new_axes, nrefs, parent=self, verify_integrity=False + ) + + # We can avoid having to rebuild blklocs/blknos + blklocs = self._blklocs + blknos = self._blknos + if blknos is not None: + mgr._blknos = blknos.copy() + mgr._blklocs = blklocs.copy() + return mgr + + def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: + + if axis == 0: + new_blocks, new_refs = self._slice_take_blocks_ax0(slobj) + elif axis == 1: + return self._get_index_slice(slobj) + else: + raise IndexError("Requested axis not found in manager") + + new_axes = list(self.axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)( + tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False + ) diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi new file mode 100644 index 00000000..4c36246e --- /dev/null +++ b/pandas/_libs/interval.pyi @@ -0,0 +1,174 @@ +from typing import ( + Any, + Generic, + TypeVar, + overload, +) + +import numpy as np +import numpy.typing as npt + +from pandas._typing import ( + IntervalClosedType, + Timedelta, + Timestamp, +) + +VALID_CLOSED: frozenset[str] + +_OrderableScalarT = TypeVar("_OrderableScalarT", int, float) +_OrderableTimesT = TypeVar("_OrderableTimesT", Timestamp, Timedelta) +_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta) + +class _LengthDescriptor: + @overload + def __get__( + self, instance: Interval[_OrderableScalarT], owner: Any + ) -> _OrderableScalarT: ... + @overload + def __get__( + self, instance: Interval[_OrderableTimesT], owner: Any + ) -> Timedelta: ... + +class _MidDescriptor: + @overload + def __get__(self, instance: Interval[_OrderableScalarT], owner: Any) -> float: ... + @overload + def __get__( + self, instance: Interval[_OrderableTimesT], owner: Any + ) -> _OrderableTimesT: ... + +class IntervalMixin: + @property + def closed_left(self) -> bool: ... + @property + def closed_right(self) -> bool: ... + @property + def open_left(self) -> bool: ... + @property + def open_right(self) -> bool: ... + @property + def is_empty(self) -> bool: ... + def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ... + +class Interval(IntervalMixin, Generic[_OrderableT]): + @property + def left(self: Interval[_OrderableT]) -> _OrderableT: ... + @property + def right(self: Interval[_OrderableT]) -> _OrderableT: ... + @property + def closed(self) -> IntervalClosedType: ... + mid: _MidDescriptor + length: _LengthDescriptor + def __init__( + self, + left: _OrderableT, + right: _OrderableT, + closed: IntervalClosedType = ..., + ) -> None: ... + def __hash__(self) -> int: ... + @overload + def __contains__( + self: Interval[Timedelta], key: Timedelta | Interval[Timedelta] + ) -> bool: ... + @overload + def __contains__( + self: Interval[Timestamp], key: Timestamp | Interval[Timestamp] + ) -> bool: ... + @overload + def __contains__( + self: Interval[_OrderableScalarT], + key: _OrderableScalarT | Interval[_OrderableScalarT], + ) -> bool: ... + @overload + def __add__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __add__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __add__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __radd__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __radd__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __radd__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __sub__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __sub__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __sub__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __rsub__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __rsub__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __rsub__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __mul__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __mul__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __rmul__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __rmul__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __truediv__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __truediv__(self: Interval[float], y: float) -> Interval[float]: ... + @overload + def __floordiv__( + self: Interval[int], y: _OrderableScalarT + ) -> Interval[_OrderableScalarT]: ... + @overload + def __floordiv__(self: Interval[float], y: float) -> Interval[float]: ... + def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ... + +def intervals_to_interval_bounds( + intervals: np.ndarray, validate_closed: bool = ... +) -> tuple[np.ndarray, np.ndarray, str]: ... + +class IntervalTree(IntervalMixin): + def __init__( + self, + left: np.ndarray, + right: np.ndarray, + closed: IntervalClosedType = ..., + leaf_size: int = ..., + ) -> None: ... + @property + def mid(self) -> np.ndarray: ... + @property + def length(self) -> np.ndarray: ... + def get_indexer(self, target) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, target + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + _na_count: int + @property + def is_overlapping(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + def clear_mapping(self) -> None: ... diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx new file mode 100644 index 00000000..67c92a0f --- /dev/null +++ b/pandas/_libs/interval.pyx @@ -0,0 +1,589 @@ +import inspect +import numbers +from operator import ( + le, + lt, +) + +from cpython.datetime cimport ( + PyDelta_Check, + import_datetime, +) + +import_datetime() + +cimport cython +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, +) +from cython cimport Py_ssize_t + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_QUICKSORT, + PyArray_ArgSort, + PyArray_Take, + float32_t, + float64_t, + int32_t, + int64_t, + ndarray, + uint64_t, +) + +cnp.import_array() + + +from pandas._libs cimport util +from pandas._libs.hashtable cimport Int64Vector +from pandas._libs.tslibs.timedeltas cimport _Timedelta +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare +from pandas._libs.tslibs.util cimport ( + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + + +cdef class IntervalMixin: + + @property + def closed_left(self): + """ + Check if the interval is closed on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('left', 'both') + + @property + def closed_right(self): + """ + Check if the interval is closed on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('right', 'both') + + @property + def open_left(self): + """ + Check if the interval is open on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is not closed on the left-side. + """ + return not self.closed_left + + @property + def open_right(self): + """ + Check if the interval is open on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is not closed on the left-side. + """ + return not self.closed_right + + @property + def mid(self): + """ + Return the midpoint of the Interval. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + @property + def length(self): + """ + Return the length of the Interval. + """ + return self.right - self.left + + @property + def is_empty(self): + """ + Indicates if an interval is empty, meaning it contains no points. + + .. versionadded:: 0.25.0 + + Returns + ------- + bool or ndarray + A boolean indicating if a scalar :class:`Interval` is empty, or a + boolean ``ndarray`` positionally indicating if an ``Interval`` in + an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is + empty. + + Examples + -------- + An :class:`Interval` that contains points is not empty: + + >>> pd.Interval(0, 1, closed='right').is_empty + False + + An ``Interval`` that does not contain any points is empty: + + >>> pd.Interval(0, 0, closed='right').is_empty + True + >>> pd.Interval(0, 0, closed='left').is_empty + True + >>> pd.Interval(0, 0, closed='neither').is_empty + True + + An ``Interval`` that contains a single point is not empty: + + >>> pd.Interval(0, 0, closed='both').is_empty + False + + An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a + boolean ``ndarray`` positionally indicating if an ``Interval`` is + empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), + ... pd.Interval(1, 2, closed='neither')] + >>> pd.arrays.IntervalArray(ivs).is_empty + array([ True, False]) + + Missing values are not considered empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan] + >>> pd.IntervalIndex(ivs).is_empty + array([ True, False]) + """ + return (self.right == self.left) & (self.closed != 'both') + + def _check_closed_matches(self, other, name='other'): + """ + Check if the closed attribute of `other` matches. + + Note that 'left' and 'right' are considered different from 'both'. + + Parameters + ---------- + other : Interval, IntervalIndex, IntervalArray + name : str + Name to use for 'other' in the error message. + + Raises + ------ + ValueError + When `other` is not closed exactly the same as self. + """ + if self.closed != other.closed: + raise ValueError(f"'{name}.closed' is {repr(other.closed)}, " + f"expected {repr(self.closed)}.") + + +cdef bint _interval_like(other): + return (hasattr(other, 'left') + and hasattr(other, 'right') + and hasattr(other, 'closed')) + + +cdef class Interval(IntervalMixin): + """ + Immutable object implementing an Interval, a bounded slice-like interval. + + Parameters + ---------- + left : orderable scalar + Left bound for the interval. + right : orderable scalar + Right bound for the interval. + closed : {'right', 'left', 'both', 'neither'}, default 'right' + Whether the interval is closed on the left-side, right-side, both or + neither. See the Notes for more detailed explanation. + + See Also + -------- + IntervalIndex : An Index of Interval objects that are all closed on the + same side. + cut : Convert continuous data into discrete bins (Categorical + of Interval objects). + qcut : Convert continuous data into bins (Categorical of Interval objects) + based on quantiles. + Period : Represents a period of time. + + Notes + ----- + The parameters `left` and `right` must be from the same type, you must be + able to compare them and they must satisfy ``left <= right``. + + A closed interval (in mathematics denoted by square brackets) contains + its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the + conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for. + An open interval (in mathematics denoted by parentheses) does not contain + its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the + conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for. + Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is + described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is + described by ``0 < x <= 5`` (``closed='right'``). + + Examples + -------- + It is possible to build Intervals of different types, like numeric ones: + + >>> iv = pd.Interval(left=0, right=5) + >>> iv + Interval(0, 5, closed='right') + + You can check if an element belongs to it, or if it contains another interval: + + >>> 2.5 in iv + True + >>> pd.Interval(left=2, right=5, closed='both') in iv + True + + You can test the bounds (``closed='right'``, so ``0 < x <= 5``): + + >>> 0 in iv + False + >>> 5 in iv + True + >>> 0.0001 in iv + True + + Calculate its length + + >>> iv.length + 5 + + You can operate with `+` and `*` over an Interval and the operation + is applied to each of its bounds, so the result depends on the type + of the bound elements + + >>> shifted_iv = iv + 3 + >>> shifted_iv + Interval(3, 8, closed='right') + >>> extended_iv = iv * 10.0 + >>> extended_iv + Interval(0.0, 50.0, closed='right') + + To create a time interval you can use Timestamps as the bounds + + >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'), + ... pd.Timestamp('2018-01-01 00:00:00'), + ... closed='left') + >>> pd.Timestamp('2017-01-01 00:00') in year_2017 + True + >>> year_2017.length + Timedelta('365 days 00:00:00') + """ + _typ = "interval" + __array_priority__ = 1000 + + cdef readonly object left + """ + Left bound for the interval. + """ + + cdef readonly object right + """ + Right bound for the interval. + """ + + cdef readonly str closed + """ + String describing the inclusive side the intervals. + + Either ``left``, ``right``, ``both`` or ``neither``. + """ + + def __init__(self, left, right, str closed='right'): + # note: it is faster to just do these checks than to use a special + # constructor (__cinit__/__new__) to avoid them + + self._validate_endpoint(left) + self._validate_endpoint(right) + + if closed not in VALID_CLOSED: + raise ValueError(f"invalid option for 'closed': {closed}") + if not left <= right: + raise ValueError("left side of interval must be <= right side") + if (isinstance(left, _Timestamp) and + not tz_compare(left.tzinfo, right.tzinfo)): + # GH 18538 + raise ValueError("left and right must have the same time zone, got " + f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}") + self.left = left + self.right = right + self.closed = closed + + def _validate_endpoint(self, endpoint): + # GH 23013 + if not (is_integer_object(endpoint) or is_float_object(endpoint) or + isinstance(endpoint, (_Timestamp, _Timedelta))): + raise ValueError("Only numeric, Timestamp and Timedelta endpoints " + "are allowed when constructing an Interval.") + + def __hash__(self): + return hash((self.left, self.right, self.closed)) + + def __contains__(self, key) -> bool: + if _interval_like(key): + key_closed_left = key.closed in ('left', 'both') + key_closed_right = key.closed in ('right', 'both') + if self.open_left and key_closed_left: + left_contained = self.left < key.left + else: + left_contained = self.left <= key.left + if self.open_right and key_closed_right: + right_contained = key.right < self.right + else: + right_contained = key.right <= self.right + return left_contained and right_contained + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) + + def __richcmp__(self, other, op: int): + if isinstance(other, Interval): + self_tuple = (self.left, self.right, self.closed) + other_tuple = (other.left, other.right, other.closed) + return PyObject_RichCompare(self_tuple, other_tuple, op) + elif util.is_array(other): + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + + return NotImplemented + + def __reduce__(self): + args = (self.left, self.right, self.closed) + return (type(self), args) + + def _repr_base(self): + left = self.left + right = self.right + + # TODO: need more general formatting methodology here + if isinstance(left, _Timestamp) and isinstance(right, _Timestamp): + left = left._short_repr + right = right._short_repr + + return left, right + + def __repr__(self) -> str: + + left, right = self._repr_base() + name = type(self).__name__ + repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' + return repr_str + + def __str__(self) -> str: + + left, right = self._repr_base() + start_symbol = '[' if self.closed_left else '(' + end_symbol = ']' if self.closed_right else ')' + return f'{start_symbol}{left}, {right}{end_symbol}' + + def __add__(self, y): + if ( + isinstance(y, numbers.Number) + or PyDelta_Check(y) + or is_timedelta64_object(y) + ): + return Interval(self.left + y, self.right + y, closed=self.closed) + elif ( + # __radd__ pattern + # TODO(cython3): remove this + isinstance(y, Interval) + and ( + isinstance(self, numbers.Number) + or PyDelta_Check(self) + or is_timedelta64_object(self) + ) + ): + return Interval(y.left + self, y.right + self, closed=y.closed) + return NotImplemented + + def __radd__(self, other): + if ( + isinstance(other, numbers.Number) + or PyDelta_Check(other) + or is_timedelta64_object(other) + ): + return Interval(self.left + other, self.right + other, closed=self.closed) + return NotImplemented + + def __sub__(self, y): + if ( + isinstance(y, numbers.Number) + or PyDelta_Check(y) + or is_timedelta64_object(y) + ): + return Interval(self.left - y, self.right - y, closed=self.closed) + return NotImplemented + + def __mul__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left * y, self.right * y, closed=self.closed) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + # __radd__ semantics + # TODO(cython3): remove this + return Interval(y.left * self, y.right * self, closed=y.closed) + return NotImplemented + + def __rmul__(self, other): + if isinstance(other, numbers.Number): + return Interval(self.left * other, self.right * other, closed=self.closed) + return NotImplemented + + def __truediv__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left / y, self.right / y, closed=self.closed) + return NotImplemented + + def __floordiv__(self, y): + if isinstance(y, numbers.Number): + return Interval( + self.left // y, self.right // y, closed=self.closed) + return NotImplemented + + def overlaps(self, other): + """ + Check whether two Interval objects overlap. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Parameters + ---------- + other : Interval + Interval to check against for an overlap. + + Returns + ------- + bool + True if the two intervals overlap. + + See Also + -------- + IntervalArray.overlaps : The corresponding method for IntervalArray. + IntervalIndex.overlaps : The corresponding method for IntervalIndex. + + Examples + -------- + >>> i1 = pd.Interval(0, 2) + >>> i2 = pd.Interval(1, 3) + >>> i1.overlaps(i2) + True + >>> i3 = pd.Interval(4, 5) + >>> i1.overlaps(i3) + False + + Intervals that share closed endpoints overlap: + + >>> i4 = pd.Interval(0, 1, closed='both') + >>> i5 = pd.Interval(1, 2, closed='both') + >>> i4.overlaps(i5) + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> i6 = pd.Interval(1, 2, closed='neither') + >>> i4.overlaps(i6) + False + """ + if not isinstance(other, Interval): + raise TypeError("`other` must be an Interval, " + f"got {type(other).__name__}") + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) and op2(other.left, self.right) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): + """ + Parameters + ---------- + intervals : ndarray + Object array of Intervals / nulls. + + validate_closed: bool, default True + Boolean indicating if all intervals must be closed on the same side. + Mismatching closed will raise if True, else return None for closed. + + Returns + ------- + tuple of + left : ndarray + right : ndarray + closed: str + """ + cdef: + object closed = None, interval + Py_ssize_t i, n = len(intervals) + ndarray left, right + bint seen_closed = False + + left = np.empty(n, dtype=intervals.dtype) + right = np.empty(n, dtype=intervals.dtype) + + for i in range(n): + interval = intervals[i] + if interval is None or util.is_nan(interval): + left[i] = np.nan + right[i] = np.nan + continue + + if not isinstance(interval, Interval): + raise TypeError(f"type {type(interval)} with value " + f"{interval} is not an interval") + + left[i] = interval.left + right[i] = interval.right + if not seen_closed: + seen_closed = True + closed = interval.closed + elif closed != interval.closed: + closed = None + if validate_closed: + raise ValueError("intervals must all be closed on the same side") + + return left, right, closed + + +include "intervaltree.pxi" diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in new file mode 100644 index 00000000..e7a31051 --- /dev/null +++ b/pandas/_libs/intervaltree.pxi.in @@ -0,0 +1,434 @@ +""" +Template for intervaltree + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +from pandas._libs.algos import is_monotonic + +ctypedef fused int_scalar_t: + int64_t + float64_t + +ctypedef fused uint_scalar_t: + uint64_t + float64_t + +ctypedef fused scalar_t: + int_scalar_t + uint_scalar_t + +# ---------------------------------------------------------------------- +# IntervalTree +# ---------------------------------------------------------------------- + +cdef class IntervalTree(IntervalMixin): + """A centered interval tree + + Based off the algorithm described on Wikipedia: + https://en.wikipedia.org/wiki/Interval_tree + + we are emulating the IndexEngine interface + """ + cdef readonly: + ndarray left, right + IntervalNode root + object dtype + str closed + object _is_overlapping, _left_sorter, _right_sorter + Py_ssize_t _na_count + + def __init__(self, left, right, closed='right', leaf_size=100): + """ + Parameters + ---------- + left, right : np.ndarray[ndim=1] + Left and right bounds for each interval. Assumed to contain no + NaNs. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + leaf_size : int, optional + Parameter that controls when the tree switches from creating nodes + to brute-force search. Tune this parameter to optimize query + performance. + """ + if closed not in ['left', 'right', 'both', 'neither']: + raise ValueError("invalid option for 'closed': %s" % closed) + + left = np.asarray(left) + right = np.asarray(right) + self.dtype = np.result_type(left, right) + self.left = np.asarray(left, dtype=self.dtype) + self.right = np.asarray(right, dtype=self.dtype) + + indices = np.arange(len(left), dtype='int64') + + self.closed = closed + + # GH 23352: ensure no nan in nodes + mask = ~np.isnan(self.left) + self._na_count = len(mask) - mask.sum() + self.left = self.left[mask] + self.right = self.right[mask] + indices = indices[mask] + + node_cls = NODE_CLASSES[str(self.dtype), closed] + self.root = node_cls(self.left, self.right, indices, leaf_size) + + @property + def left_sorter(self) -> np.ndarray: + """How to sort the left labels; this is used for binary search + """ + if self._left_sorter is None: + self._left_sorter = np.argsort(self.left) + return self._left_sorter + + @property + def right_sorter(self) -> np.ndarray: + """How to sort the right labels + """ + if self._right_sorter is None: + self._right_sorter = np.argsort(self.right) + return self._right_sorter + + @property + def is_overlapping(self) -> bool: + """ + Determine if the IntervalTree contains overlapping intervals. + Cached as self._is_overlapping. + """ + if self._is_overlapping is not None: + return self._is_overlapping + + # <= when both sides closed since endpoints can overlap + op = le if self.closed == 'both' else lt + + # overlap if start of current interval < end of previous interval + # (current and previous in terms of sorted order by left/start side) + current = self.left[self.left_sorter[1:]] + previous = self.right[self.left_sorter[:-1]] + self._is_overlapping = bool(op(current, previous).any()) + + return self._is_overlapping + + @property + def is_monotonic_increasing(self) -> bool: + """ + Return True if the IntervalTree is monotonic increasing (only equal or + increasing values), else False + """ + if self._na_count > 0: + return False + values = [self.right, self.left] + + sort_order = np.lexsort(values) + return is_monotonic(sort_order, False)[0] + + def get_indexer(self, scalar_t[:] target) -> np.ndarray: + """Return the positions corresponding to unique intervals that overlap + with the given array of scalar targets. + """ + + # TODO: write get_indexer_intervals + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result + + result = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + elif result.data.n > old_len + 1: + raise KeyError( + 'indexer does not intersect a unique set of intervals') + old_len = result.data.n + return result.to_array().astype('intp') + + def get_indexer_non_unique(self, scalar_t[:] target): + """Return the positions corresponding to intervals that overlap with + the given array of scalar targets. Non-unique positions are repeated. + """ + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result, missing + + result = Int64Vector() + missing = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + missing.append(i) + old_len = result.data.n + return (result.to_array().astype('intp'), + missing.to_array().astype('intp')) + + def __repr__(self) -> str: + return (''.format( + dtype=self.dtype, closed=self.closed, + n_elements=self.root.n_elements)) + + # compat with IndexEngine interface + def clear_mapping(self) -> None: + pass + + +cdef take(ndarray source, ndarray indices): + """Take the given positions from a 1D ndarray + """ + return PyArray_Take(source, indices, 0) + + +cdef sort_values_and_indices(all_values, all_indices, subset): + indices = take(all_indices, subset) + values = take(all_values, subset) + sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) + sorted_values = take(values, sorter) + sorted_indices = take(indices, sorter) + return sorted_values, sorted_indices + + +# ---------------------------------------------------------------------- +# Nodes +# ---------------------------------------------------------------------- + +@cython.internal +cdef class IntervalNode: + cdef readonly: + int64_t n_elements, n_center, leaf_size + bint is_leaf_node + + def __repr__(self) -> str: + if self.is_leaf_node: + return ( + f"<{type(self).__name__}: {self.n_elements} elements (terminal)>" + ) + else: + n_left = self.left_node.n_elements + n_right = self.right_node.n_elements + n_center = self.n_elements - n_left - n_right + return ( + f"<{type(self).__name__}: " + f"pivot {self.pivot}, {self.n_elements} elements " + f"({n_left} left, {n_right} right, {n_center} overlapping)>" + ) + + def counts(self): + """ + Inspect counts on this node + useful for debugging purposes + """ + if self.is_leaf_node: + return self.n_elements + else: + m = len(self.center_left_values) + l = self.left_node.counts() + r = self.right_node.counts() + return (m, (l, r)) + + +# we need specialized nodes and leaves to optimize for different dtype and +# closed values + +{{py: + +nodes = [] +for dtype in ['float64', 'int64', 'uint64']: + for closed, cmp_left, cmp_right in [ + ('left', '<=', '<'), + ('right', '<', '<='), + ('both', '<=', '<='), + ('neither', '<', '<')]: + cmp_left_converse = '<' if cmp_left == '<=' else '<=' + cmp_right_converse = '<' if cmp_right == '<=' else '<=' + if dtype.startswith('int'): + fused_prefix = 'int_' + elif dtype.startswith('uint'): + fused_prefix = 'uint_' + elif dtype.startswith('float'): + fused_prefix = '' + nodes.append((dtype, dtype.title(), + closed, closed.title(), + cmp_left, + cmp_right, + cmp_left_converse, + cmp_right_converse, + fused_prefix)) + +}} + +NODE_CLASSES = {} + +{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, + cmp_left_converse, cmp_right_converse, fused_prefix in nodes}} + + +@cython.internal +cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode(IntervalNode): + """Non-terminal node for an IntervalTree + + Categorizes intervals by those that fall to the left, those that fall to + the right, and those that overlap with the pivot. + """ + cdef readonly: + {{dtype_title}}Closed{{closed_title}}IntervalNode left_node, right_node + {{dtype}}_t[:] center_left_values, center_right_values, left, right + int64_t[:] center_left_indices, center_right_indices, indices + {{dtype}}_t min_left, max_right + {{dtype}}_t pivot + + def __init__(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + int64_t leaf_size): + + self.n_elements = len(left) + self.leaf_size = leaf_size + + # min_left and min_right are used to speed-up query by skipping + # query on sub-nodes. If this node has size 0, query is cheap, + # so these values don't matter. + if left.size > 0: + self.min_left = left.min() + self.max_right = right.max() + else: + self.min_left = 0 + self.max_right = 0 + + if self.n_elements <= leaf_size: + # make this a terminal (leaf) node + self.is_leaf_node = True + self.left = left + self.right = right + self.indices = indices + self.n_center = 0 + else: + # calculate a pivot so we can create child nodes + self.is_leaf_node = False + self.pivot = np.median(left / 2 + right / 2) + if np.isinf(self.pivot): + self.pivot = cython.cast({{dtype}}_t, 0) + if self.pivot > np.max(right): + self.pivot = np.max(left) + if self.pivot < np.min(left): + self.pivot = np.min(right) + + left_set, right_set, center_set = self.classify_intervals( + left, right) + + self.left_node = self.new_child_node(left, right, + indices, left_set) + self.right_node = self.new_child_node(left, right, + indices, right_set) + + self.center_left_values, self.center_left_indices = \ + sort_values_and_indices(left, indices, center_set) + self.center_right_values, self.center_right_indices = \ + sort_values_and_indices(right, indices, center_set) + self.n_center = len(self.center_left_indices) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef classify_intervals(self, {{dtype}}_t[:] left, {{dtype}}_t[:] right): + """Classify the given intervals based upon whether they fall to the + left, right, or overlap with this node's pivot. + """ + cdef: + Int64Vector left_ind, right_ind, overlapping_ind + Py_ssize_t i + + left_ind = Int64Vector() + right_ind = Int64Vector() + overlapping_ind = Int64Vector() + + for i in range(self.n_elements): + if right[i] {{cmp_right_converse}} self.pivot: + left_ind.append(i) + elif self.pivot {{cmp_left_converse}} left[i]: + right_ind.append(i) + else: + overlapping_ind.append(i) + + return (left_ind.to_array(), + right_ind.to_array(), + overlapping_ind.to_array()) + + cdef new_child_node(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + ndarray[int64_t, ndim=1] subset): + """Create a new child node. + """ + left = take(left, subset) + right = take(right, subset) + indices = take(indices, subset) + return {{dtype_title}}Closed{{closed_title}}IntervalNode( + left, right, indices, self.leaf_size) + + @cython.wraparound(False) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point): + """Recursively query this node and its sub-nodes for intervals that + overlap with the query point. + """ + cdef: + int64_t[:] indices + {{dtype}}_t[:] values + Py_ssize_t i + + if self.is_leaf_node: + # Once we get down to a certain size, it doesn't make sense to + # continue the binary tree structure. Instead, we use linear + # search. + for i in range(self.n_elements): + if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]: + result.append(self.indices[i]) + else: + # There are child nodes. Based on comparing our query to the pivot, + # look at the center values, then go to the relevant child. + if point < self.pivot: + values = self.center_left_values + indices = self.center_left_indices + for i in range(self.n_center): + if not values[i] {{cmp_left}} point: + break + result.append(indices[i]) + if point {{cmp_right}} self.left_node.max_right: + self.left_node.query(result, point) + elif point > self.pivot: + values = self.center_right_values + indices = self.center_right_indices + for i in range(self.n_center - 1, -1, -1): + if not point {{cmp_right}} values[i]: + break + result.append(indices[i]) + if self.right_node.min_left {{cmp_left}} point: + self.right_node.query(result, point) + else: + result.extend(self.center_left_indices) + + +NODE_CLASSES['{{dtype}}', + '{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode + +{{endfor}} diff --git a/pandas/_libs/join.pyi b/pandas/_libs/join.pyi new file mode 100644 index 00000000..11b65b85 --- /dev/null +++ b/pandas/_libs/join.pyi @@ -0,0 +1,78 @@ +import numpy as np + +from pandas._typing import npt + +def inner_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def left_outer_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, + sort: bool = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def full_outer_join( + left: np.ndarray, # const intp_t[:] + right: np.ndarray, # const intp_t[:] + max_groups: int, +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def ffill_indexer( + indexer: np.ndarray, # const intp_t[:] +) -> npt.NDArray[np.intp]: ... +def left_join_indexer_unique( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> npt.NDArray[np.intp]: ... +def left_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def inner_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def outer_join_indexer( + left: np.ndarray, # ndarray[join_t] + right: np.ndarray, # ndarray[join_t] +) -> tuple[ + np.ndarray, # np.ndarray[join_t] + npt.NDArray[np.intp], + npt.NDArray[np.intp], +]: ... +def asof_join_backward_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | float | None = ..., + use_hashtable: bool = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_forward_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | float | None = ..., + use_hashtable: bool = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... +def asof_join_nearest_on_X_by_Y( + left_values: np.ndarray, # asof_t[:] + right_values: np.ndarray, # asof_t[:] + left_by_values: np.ndarray, # by_t[:] + right_by_values: np.ndarray, # by_t[:] + allow_exact_matches: bool = ..., + tolerance: np.number | float | None = ..., + use_hashtable: bool = ..., +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx new file mode 100644 index 00000000..cc7d863b --- /dev/null +++ b/pandas/_libs/join.pyx @@ -0,0 +1,887 @@ +cimport cython +from cython cimport Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + intp_t, + ndarray, + uint64_t, +) + +cnp.import_array() + +from pandas._libs.algos import groupsort_indexer + +from pandas._libs.dtypes cimport ( + numeric_object_t, + numeric_t, +) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def inner_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_count, right_count + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + Py_ssize_t left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_outer_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups, bint sort=True): + cdef: + Py_ssize_t i, j, k, count = 0 + ndarray[intp_t] rev + intp_t[::1] left_count, right_count + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + Py_ssize_t left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0: + count += lc * rc + else: + count += lc + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + if not sort: # if not asked to sort, revert to original order + if len(left) == len(left_indexer): + # no multiple matches for any row on the left + # this is a short-cut to avoid groupsort_indexer + # otherwise, the `else` path also works in this case + rev = np.empty(len(left), dtype=np.intp) + rev.put(np.asarray(left_sorter), np.arange(len(left))) + else: + rev, _ = groupsort_indexer(left_indexer, len(left)) + + return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev) + else: + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def full_outer_join(const intp_t[:] left, const intp_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + intp_t[::1] left_sorter, right_sorter + intp_t[::1] left_count, right_count + intp_t[::1] left_indexer, right_indexer + intp_t lc, rc + intp_t left_pos = 0, right_pos = 0 + Py_ssize_t offset, position = 0 + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + else: + count += lc + rc + + left_indexer = np.empty(count, dtype=np.intp) + right_indexer = np.empty(count, dtype=np.intp) + + with nogil: + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + elif lc == 0: + for j in range(rc): + left_indexer[position + j] = -1 + right_indexer[position + j] = right_pos + j + position += rc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + # Will overwrite left/right indexer with the result + _get_result_indexer(left_sorter, left_indexer) + _get_result_indexer(right_sorter, right_indexer) + + return np.asarray(left_indexer), np.asarray(right_indexer) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) nogil: + """NOTE: overwrites indexer with the result to avoid allocating another array""" + cdef: + Py_ssize_t i, n, idx + + if len(sorter) > 0: + # cython-only equivalent to + # `res = algos.take_nd(sorter, indexer, fill_value=-1)` + n = indexer.shape[0] + for i in range(n): + idx = indexer[i] + if idx == -1: + indexer[i] = -1 + else: + indexer[i] = sorter[idx] + else: + # length-0 case + indexer[:] = -1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +def ffill_indexer(const intp_t[:] indexer) -> np.ndarray: + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[intp_t] result + intp_t val, last_obs + + result = np.empty(n, dtype=np.intp) + last_obs = -1 + + for i in range(n): + val = indexer[i] + if val == -1: + result[i] = last_obs + else: + result[i] = val + last_obs = val + + return result + + +# ---------------------------------------------------------------------- +# left_join_indexer, inner_join_indexer, outer_join_indexer +# ---------------------------------------------------------------------- + +# Joins on ordered, unique indices + +# right might contain non-unique values + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique( + ndarray[numeric_object_t] left, + ndarray[numeric_object_t] right +): + """ + Both left and right are strictly monotonic increasing. + """ + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[intp_t] indexer + numeric_object_t lval, rval + + i = 0 + j = 0 + nleft = len(left) + nright = len(right) + + indexer = np.empty(nleft, dtype=np.intp) + while True: + if i == nleft: + break + + if j == nright: + indexer[i] = -1 + i += 1 + continue + + rval = right[j] + + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + + Both left and right are monotonic increasing, but at least one of them + is non-unique (if both were unique we'd use left_join_indexer_unique). + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + + if lval == rval: + # This block is identical across + # left_join_indexer, inner_join_indexer, outer_join_indexer + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + i += 1 + count += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we keep for left_join_indexer + lindexer[count] = i + rindexer[count] = -1 + result[count] = lval + count += 1 + i += 1 + else: + # i.e. rval not in left; we discard for left_join_indexer + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + + Both left and right are monotonic increasing but not necessarily unique. + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we discard for inner_indexer + i += 1 + else: + # i.e. rval not in left; we discard for inner_indexer + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we discard for inner_indexer + i += 1 + else: + # i.e. rval not in left; we discard for inner_indexer + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right): + """ + Both left and right are monotonic increasing but not necessarily unique. + """ + cdef: + Py_ssize_t i, j, nright, nleft, count + numeric_object_t lval, rval + ndarray[intp_t] lindexer, rindexer + ndarray[numeric_object_t] result + + nleft = len(left) + nright = len(right) + + # First pass is to find the size 'count' of our output indexers. + # count will be length of left plus the number of elements of right not in + # left (counting duplicates) + i = 0 + j = 0 + count = 0 + if nleft == 0: + count = nright + elif nright == 0: + count = nleft + else: + while True: + if i == nleft: + count += nright - j + break + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + count += 1 + j += 1 + + lindexer = np.empty(count, dtype=np.intp) + rindexer = np.empty(count, dtype=np.intp) + result = np.empty(count, dtype=left.dtype) + + # do it again, but populate the indexers / result + + i = 0 + j = 0 + count = 0 + if nleft == 0: + for j in range(nright): + lindexer[j] = -1 + rindexer[j] = j + result[j] = right[j] + elif nright == 0: + for i in range(nleft): + lindexer[i] = i + rindexer[i] = -1 + result[i] = left[i] + else: + while True: + if i == nleft: + while j < nright: + lindexer[count] = -1 + rindexer[count] = j + result[count] = right[j] + count += 1 + j += 1 + break + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + count += 1 + i += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + # i.e. lval not in right; we keep for outer_join_indexer + lindexer[count] = i + rindexer[count] = -1 + result[count] = lval + count += 1 + i += 1 + else: + # i.e. rval not in left; we keep for outer_join_indexer + lindexer[count] = -1 + rindexer[count] = j + result[count] = rval + count += 1 + j += 1 + + return result, lindexer, rindexer + + +# ---------------------------------------------------------------------- +# asof_join_by +# ---------------------------------------------------------------------- + +from pandas._libs.hashtable cimport ( + HashTable, + Int64HashTable, + PyObjectHashTable, + UInt64HashTable, +) + +ctypedef fused by_t: + object + int64_t + uint64_t + + +def asof_join_backward_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=True, + tolerance=None, + bint use_hashtable=True): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + if use_hashtable: + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = 0 + for left_pos in range(left_size): + # restart right_pos if it went negative in a previous iteration + if right_pos < 0: + right_pos = 0 + + # find last position in right whose value is less than left's + if allow_exact_matches: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): + if use_hashtable: + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + else: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): + if use_hashtable: + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + right_pos -= 1 + + # save positions as the desired index + if use_hashtable: + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + else: + found_right_pos = right_pos + + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = left_values[left_pos] - right_values[found_right_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_forward_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=1, + tolerance=None, + bint use_hashtable=True): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[intp_t] left_indexer, right_indexer + bint has_tolerance = False + numeric_t tolerance_ = 0 + numeric_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = True + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + if use_hashtable: + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = right_size - 1 + for left_pos in range(left_size - 1, -1, -1): + # restart right_pos if it went over in a previous iteration + if right_pos == right_size: + right_pos = right_size - 1 + + # find first position in right whose value is greater than left's + if allow_exact_matches: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): + if use_hashtable: + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + else: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): + if use_hashtable: + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + right_pos += 1 + + # save positions as the desired index + if use_hashtable: + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + else: + found_right_pos = (right_pos + if right_pos != right_size else -1) + + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = right_values[found_right_pos] - left_values[left_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values, + numeric_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=True, + tolerance=None, + bint use_hashtable=True): + + cdef: + ndarray[intp_t] bli, bri, fli, fri + + ndarray[intp_t] left_indexer, right_indexer + Py_ssize_t left_size, i + numeric_t bdiff, fdiff + + # search both forward and backward + bli, bri = asof_join_backward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + use_hashtable + ) + fli, fri = asof_join_forward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + use_hashtable + ) + + # choose the smaller timestamp + left_size = len(left_values) + left_indexer = np.empty(left_size, dtype=np.intp) + right_indexer = np.empty(left_size, dtype=np.intp) + + for i in range(len(bri)): + # choose timestamp from right with smaller difference + if bri[i] != -1 and fri[i] != -1: + bdiff = left_values[bli[i]] - right_values[bri[i]] + fdiff = right_values[fri[i]] - left_values[fli[i]] + right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i] + else: + right_indexer[i] = bri[i] if bri[i] != -1 else fri[i] + left_indexer[i] = bli[i] + + return left_indexer, right_indexer diff --git a/pandas/_libs/json.pyi b/pandas/_libs/json.pyi new file mode 100644 index 00000000..8e7ba60c --- /dev/null +++ b/pandas/_libs/json.pyi @@ -0,0 +1,23 @@ +from typing import ( + Any, + Callable, +) + +def dumps( + obj: Any, + ensure_ascii: bool = ..., + double_precision: int = ..., + indent: int = ..., + orient: str = ..., + date_unit: str = ..., + iso_dates: bool = ..., + default_handler: None + | Callable[[Any], str | float | bool | list | dict | None] = ..., +) -> str: ... +def loads( + s: str, + precise_float: bool = ..., + numpy: bool = ..., + dtype: None = ..., + labelled: bool = ..., +) -> Any: ... diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd new file mode 100644 index 00000000..a9f819e5 --- /dev/null +++ b/pandas/_libs/khash.pxd @@ -0,0 +1,129 @@ +from cpython.object cimport PyObject +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + + +cdef extern from "khash_python.h": + const int KHASH_TRACE_DOMAIN + + ctypedef uint32_t khuint_t + ctypedef khuint_t khiter_t + + ctypedef struct khcomplex128_t: + double real + double imag + + bint are_equivalent_khcomplex128_t \ + "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil + + ctypedef struct khcomplex64_t: + float real + float imag + + bint are_equivalent_khcomplex64_t \ + "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil + + bint are_equivalent_float64_t \ + "kh_floats_hash_equal" (float64_t a, float64_t b) nogil + + bint are_equivalent_float32_t \ + "kh_floats_hash_equal" (float32_t a, float32_t b) nogil + + uint32_t kh_python_hash_func(object key) + bint kh_python_hash_equal(object a, object b) + + ctypedef struct kh_pymap_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pymap_t* kh_init_pymap() + void kh_destroy_pymap(kh_pymap_t*) + void kh_clear_pymap(kh_pymap_t*) + khuint_t kh_get_pymap(kh_pymap_t*, PyObject*) + void kh_resize_pymap(kh_pymap_t*, khuint_t) + khuint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) + void kh_del_pymap(kh_pymap_t*, khuint_t) + + bint kh_exist_pymap(kh_pymap_t*, khiter_t) + + ctypedef struct kh_pyset_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pyset_t* kh_init_pyset() + void kh_destroy_pyset(kh_pyset_t*) + void kh_clear_pyset(kh_pyset_t*) + khuint_t kh_get_pyset(kh_pyset_t*, PyObject*) + void kh_resize_pyset(kh_pyset_t*, khuint_t) + khuint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) + void kh_del_pyset(kh_pyset_t*, khuint_t) + + bint kh_exist_pyset(kh_pyset_t*, khiter_t) + + ctypedef char* kh_cstr_t + + ctypedef struct kh_str_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + size_t *vals + + kh_str_t* kh_init_str() nogil + void kh_destroy_str(kh_str_t*) nogil + void kh_clear_str(kh_str_t*) nogil + khuint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil + void kh_resize_str(kh_str_t*, khuint_t) nogil + khuint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil + void kh_del_str(kh_str_t*, khuint_t) nogil + + bint kh_exist_str(kh_str_t*, khiter_t) nogil + + ctypedef struct kh_str_starts_t: + kh_str_t *table + int starts[256] + + kh_str_starts_t* kh_init_str_starts() nogil + khuint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key, + int* ret) nogil + khuint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil + void kh_destroy_str_starts(kh_str_starts_t*) nogil + void kh_resize_str_starts(kh_str_starts_t*, khuint_t) nogil + + # sweep factorize + + ctypedef struct kh_strbox_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + PyObject **vals + + kh_strbox_t* kh_init_strbox() nogil + void kh_destroy_strbox(kh_strbox_t*) nogil + void kh_clear_strbox(kh_strbox_t*) nogil + khuint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil + void kh_resize_strbox(kh_strbox_t*, khuint_t) nogil + khuint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil + void kh_del_strbox(kh_strbox_t*, khuint_t) nogil + + bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil + + khuint_t kh_needed_n_buckets(khuint_t element_n) nogil + + +include "khash_for_primitive_helper.pxi" diff --git a/pandas/_libs/khash_for_primitive_helper.pxi.in b/pandas/_libs/khash_for_primitive_helper.pxi.in new file mode 100644 index 00000000..d0934b3e --- /dev/null +++ b/pandas/_libs/khash_for_primitive_helper.pxi.in @@ -0,0 +1,44 @@ +""" +Template for wrapping khash-tables for each primitive `dtype` + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +{{py: + +# name, c_type +primitive_types = [('int64', 'int64_t'), + ('uint64', 'uint64_t'), + ('float64', 'float64_t'), + ('int32', 'int32_t'), + ('uint32', 'uint32_t'), + ('float32', 'float32_t'), + ('int16', 'int16_t'), + ('uint16', 'uint16_t'), + ('int8', 'int8_t'), + ('uint8', 'uint8_t'), + ('complex64', 'khcomplex64_t'), + ('complex128', 'khcomplex128_t'), + ] +}} + +{{for name, c_type in primitive_types}} + +cdef extern from "khash_python.h": + ctypedef struct kh_{{name}}_t: + khuint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + {{c_type}} *keys + size_t *vals + + kh_{{name}}_t* kh_init_{{name}}() nogil + void kh_destroy_{{name}}(kh_{{name}}_t*) nogil + void kh_clear_{{name}}(kh_{{name}}_t*) nogil + khuint_t kh_get_{{name}}(kh_{{name}}_t*, {{c_type}}) nogil + void kh_resize_{{name}}(kh_{{name}}_t*, khuint_t) nogil + khuint_t kh_put_{{name}}(kh_{{name}}_t*, {{c_type}}, int*) nogil + void kh_del_{{name}}(kh_{{name}}_t*, khuint_t) nogil + + bint kh_exist_{{name}}(kh_{{name}}_t*, khiter_t) nogil + +{{endfor}} diff --git a/pandas/_libs/lib.pxd b/pandas/_libs/lib.pxd new file mode 100644 index 00000000..46a339f2 --- /dev/null +++ b/pandas/_libs/lib.pxd @@ -0,0 +1,6 @@ +from numpy cimport ndarray + + +cdef bint c_is_list_like(object, bint) except -1 + +cpdef ndarray eq_NA_compat(ndarray[object] arr, object key) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi new file mode 100644 index 00000000..77d3cbe9 --- /dev/null +++ b/pandas/_libs/lib.pyi @@ -0,0 +1,233 @@ +# TODO(npdtypes): Many types specified here can be made more specific/accurate; +# the more specific versions are specified in comments + +from typing import ( + Any, + Callable, + Final, + Generator, + Hashable, + Literal, + overload, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, + npt, +) + +# placeholder until we can specify np.ndarray[object, ndim=2] +ndarray_obj_2d = np.ndarray + +from enum import Enum + +class _NoDefault(Enum): + no_default = ... + +no_default: Final = _NoDefault.no_default +NoDefault = Literal[_NoDefault.no_default] + +i8max: int +u8max: int + +def item_from_zerodim(val: object) -> object: ... +def infer_dtype(value: object, skipna: bool = ...) -> str: ... +def is_iterator(obj: object) -> bool: ... +def is_scalar(val: object) -> bool: ... +def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... +def is_period(val: object) -> bool: ... +def is_interval(val: object) -> bool: ... +def is_decimal(val: object) -> bool: ... +def is_complex(val: object) -> bool: ... +def is_bool(val: object) -> bool: ... +def is_integer(val: object) -> bool: ... +def is_float(val: object) -> bool: ... +def is_interval_array(values: np.ndarray) -> bool: ... +def is_datetime64_array(values: np.ndarray) -> bool: ... +def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ... +def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ... +def is_time_array(values: np.ndarray, skipna: bool = ...): ... +def is_date_array(values: np.ndarray, skipna: bool = ...): ... +def is_datetime_array(values: np.ndarray, skipna: bool = ...): ... +def is_string_array(values: np.ndarray, skipna: bool = ...): ... +def is_float_array(values: np.ndarray, skipna: bool = ...): ... +def is_integer_array(values: np.ndarray, skipna: bool = ...): ... +def is_bool_array(values: np.ndarray, skipna: bool = ...): ... +def fast_multiget(mapping: dict, keys: np.ndarray, default=...) -> np.ndarray: ... +def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ... +def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ... +def fast_unique_multiple(arrays: list, sort: bool = ...) -> list: ... +def map_infer( + arr: np.ndarray, + f: Callable[[Any], Any], + convert: bool = ..., + ignore_na: bool = ..., +) -> np.ndarray: ... +@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[False] = ..., + convert_timedelta: bool = ..., + convert_period: Literal[False] = ..., + convert_interval: Literal[False] = ..., + convert_to_nullable_integer: Literal[False] = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> np.ndarray: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: Literal[True] = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[True] = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: Literal[True] = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_objects( + objects: npt.NDArray[np.object_], + *, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_period: bool = ..., + convert_interval: bool = ..., + convert_to_nullable_integer: bool = ..., + dtype_if_all_nat: DtypeObj | None = ..., +) -> ArrayLike: ... +@overload +def maybe_convert_numeric( + values: npt.NDArray[np.object_], + na_values: set, + convert_empty: bool = ..., + coerce_numeric: bool = ..., + convert_to_masked_nullable: Literal[False] = ..., +) -> tuple[np.ndarray, None]: ... +@overload +def maybe_convert_numeric( + values: npt.NDArray[np.object_], + na_values: set, + convert_empty: bool = ..., + coerce_numeric: bool = ..., + *, + convert_to_masked_nullable: Literal[True], +) -> tuple[np.ndarray, np.ndarray]: ... + +# TODO: restrict `arr`? +def ensure_string_array( + arr, + na_value: object = ..., + convert_na_value: bool = ..., + copy: bool = ..., + skipna: bool = ..., +) -> npt.NDArray[np.object_]: ... +def infer_datetimelike_array( + arr: npt.NDArray[np.object_], +) -> tuple[str, bool]: ... +def convert_nans_to_NA( + arr: npt.NDArray[np.object_], +) -> npt.NDArray[np.object_]: ... +def fast_zip(ndarrays: list) -> npt.NDArray[np.object_]: ... + +# TODO: can we be more specific about rows? +def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ... +def tuples_to_object_array( + tuples: npt.NDArray[np.object_], +) -> ndarray_obj_2d: ... + +# TODO: can we be more specific about rows? +def to_object_array(rows: object, min_width: int = ...) -> ndarray_obj_2d: ... +def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ... +def maybe_booleans_to_slice( + mask: npt.NDArray[np.uint8], +) -> slice | npt.NDArray[np.uint8]: ... +def maybe_indices_to_slice( + indices: npt.NDArray[np.intp], + max_len: int, +) -> slice | npt.NDArray[np.intp]: ... +def is_all_arraylike(obj: list) -> bool: ... + +# ----------------------------------------------------------------- +# Functions which in reality take memoryviews + +def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64 +def map_infer_mask( + arr: np.ndarray, + f: Callable[[Any], Any], + mask: np.ndarray, # const uint8_t[:] + convert: bool = ..., + na_value: Any = ..., + dtype: np.dtype = ..., +) -> np.ndarray: ... +def indices_fast( + index: npt.NDArray[np.intp], + labels: np.ndarray, # const int64_t[:] + keys: list, + sorted_labels: list[npt.NDArray[np.int64]], +) -> dict[Hashable, npt.NDArray[np.intp]]: ... +def generate_slices( + labels: np.ndarray, ngroups: int # const intp_t[:] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... +def count_level_2d( + mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True], + labels: np.ndarray, # const intp_t[:] + max_bin: int, + axis: int, +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2] +def get_level_sorter( + label: np.ndarray, # const int64_t[:] + starts: np.ndarray, # const intp_t[:] +) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1] +def generate_bins_dt64( + values: npt.NDArray[np.int64], + binner: np.ndarray, # const int64_t[:] + closed: object = ..., + hasnans: bool = ..., +) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1] +def array_equivalent_object( + left: np.ndarray, # object[:] + right: np.ndarray, # object[:] +) -> bool: ... +def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] +def get_reverse_indexer( + indexer: np.ndarray, # const intp_t[:] + length: int, +) -> npt.NDArray[np.intp]: ... +def is_bool_list(obj: list) -> bool: ... +def dtypes_all_equal(types: list[DtypeObj]) -> bool: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx new file mode 100644 index 00000000..d2c2697c --- /dev/null +++ b/pandas/_libs/lib.pyx @@ -0,0 +1,3147 @@ +from collections import abc +from decimal import Decimal +from enum import Enum +from typing import ( + Literal, + _GenericAlias, +) +import warnings + +cimport cython +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDelta_Check, + PyTime_Check, + import_datetime, +) +from cpython.iterator cimport PyIter_Check +from cpython.number cimport PyNumber_Check +from cpython.object cimport ( + Py_EQ, + PyObject_RichCompareBool, + PyTypeObject, +) +from cpython.ref cimport Py_INCREF +from cpython.sequence cimport PySequence_Check +from cpython.tuple cimport ( + PyTuple_New, + PyTuple_SET_ITEM, +) +from cython cimport ( + Py_ssize_t, + floating, +) + +from pandas.util._exceptions import find_stack_level + +import_datetime() + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + NPY_OBJECT, + PyArray_Check, + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + complex128_t, + flatiter, + float32_t, + float64_t, + int64_t, + intp_t, + ndarray, + uint8_t, + uint64_t, +) + +cnp.import_array() + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + +cdef extern from "numpy/arrayobject.h": + # cython's numpy.dtype specification is incorrect, which leads to + # errors in issubclass(self.dtype.type, np.bool_), so we directly + # include the correct version + # https://github.com/cython/cython/issues/2022 + + ctypedef class numpy.dtype [object PyArray_Descr]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. Please + # ask on cython-dev if you need more. + cdef: + int type_num + int itemsize "elsize" + char byteorder + object fields + tuple names + + PyTypeObject PySignedIntegerArrType_Type + PyTypeObject PyUnsignedIntegerArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + bint PyArray_CheckScalar(obj) nogil + + +cdef extern from "src/parse_helper.h": + int floatify(object, float64_t *result, int *maybe_int) except -1 + +from pandas._libs cimport util +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, + is_nan, +) + +from pandas._libs.tslib import array_to_datetime +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) +from pandas._libs.tslibs.period import Period + +from pandas._libs.missing cimport ( + C_NA, + checknull, + is_matching_na, + is_null_datetime64, + is_null_timedelta64, +) +from pandas._libs.tslibs.conversion cimport convert_to_tsobject +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + checknull_with_nat, +) +from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.period cimport is_period_object +from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 +from pandas._libs.tslibs.timezones cimport tz_compare + +# constants that will be compared to potentially arbitrarily large +# python int +cdef: + object oINT64_MAX = INT64_MAX + object oINT64_MIN = INT64_MIN + object oUINT64_MAX = UINT64_MAX + + float64_t NaN = np.NaN + +# python-visible +i8max = INT64_MAX +u8max = UINT64_MAX + + +@cython.wraparound(False) +@cython.boundscheck(False) +def memory_usage_of_objects(arr: object[:]) -> int64_t: + """ + Return the memory usage of an object array in bytes. + + Does not include the actual bytes of the pointers + """ + i: Py_ssize_t + n: Py_ssize_t + size: int64_t + + size = 0 + n = len(arr) + for i in range(n): + size += arr[i].__sizeof__() + return size + + +# ---------------------------------------------------------------------- + + +def is_scalar(val: object) -> bool: + """ + Return True if given object is scalar. + + Parameters + ---------- + val : object + This includes: + + - numpy array scalar (e.g. np.int64) + - Python builtin numerics + - Python builtin byte arrays and strings + - None + - datetime.datetime + - datetime.timedelta + - Period + - decimal.Decimal + - Interval + - DateOffset + - Fraction + - Number. + + Returns + ------- + bool + Return True if given object is scalar. + + Examples + -------- + >>> import datetime + >>> dt = datetime.datetime(2018, 10, 3) + >>> pd.api.types.is_scalar(dt) + True + + >>> pd.api.types.is_scalar([2, 3]) + False + + >>> pd.api.types.is_scalar({0: 1, 2: 3}) + False + + >>> pd.api.types.is_scalar((0, 2)) + False + + pandas supports PEP 3141 numbers: + + >>> from fractions import Fraction + >>> pd.api.types.is_scalar(Fraction(3, 5)) + True + """ + + # Start with C-optimized checks + if (cnp.PyArray_IsAnyScalar(val) + # PyArray_IsAnyScalar is always False for bytearrays on Py3 + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + # We differ from numpy, which claims that None is not scalar; + # see np.isscalar + or val is C_NA + or val is None): + return True + + # Next use C-optimized checks to exclude common non-scalars before falling + # back to non-optimized checks. + if PySequence_Check(val): + # e.g. list, tuple + # includes np.ndarray, Series which PyNumber_Check can return True for + return False + + # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number + return (PyNumber_Check(val) + or is_period_object(val) + or is_interval(val) + or is_offset_object(val)) + + +cdef inline int64_t get_itemsize(object val): + """ + Get the itemsize of a NumPy scalar, -1 if not a NumPy scalar. + + Parameters + ---------- + val : object + + Returns + ------- + is_ndarray : bool + """ + if PyArray_CheckScalar(val): + return cnp.PyArray_DescrFromScalar(val).itemsize + else: + return -1 + + +def is_iterator(obj: object) -> bool: + """ + Check if the object is an iterator. + + This is intended for generators, not list-like objects. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> import datetime + >>> is_iterator((x for x in [])) + True + >>> is_iterator([1, 2, 3]) + False + >>> is_iterator(datetime.datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + return PyIter_Check(obj) + + +def item_from_zerodim(val: object) -> object: + """ + If the value is a zerodim array, return the item it contains. + + Parameters + ---------- + val : object + + Returns + ------- + object + + Examples + -------- + >>> item_from_zerodim(1) + 1 + >>> item_from_zerodim('foobar') + 'foobar' + >>> item_from_zerodim(np.array(1)) + 1 + >>> item_from_zerodim(np.array([1])) + array([1]) + """ + if cnp.PyArray_IsZeroDim(val): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val) + return val + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple(list arrays, sort: bool = True): + """ + Generate a list of unique values from a list of arrays. + + Parameters + ---------- + list : array-like + List of array-like objects. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + ndarray[object] buf + Py_ssize_t k = len(arrays) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = arrays[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + + if sort is None: + try: + uniques.sort() + except TypeError: + warnings.warn( + "The values in the array are unorderable. " + "Pass `sort=False` to suppress this warning.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list: + cdef: + list buf + Py_ssize_t k = len(lists) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = lists[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list_gen(object gen, bint sort=True) -> list: + """ + Generate a list of unique values from a generator of lists. + + Parameters + ---------- + gen : generator object + Generator of lists from which the unique list is created. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + list buf + Py_ssize_t j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for buf in gen: + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def dicts_to_array(dicts: list, columns: list): + cdef: + Py_ssize_t i, j, k, n + ndarray[object, ndim=2] result + dict row + object col, onan = np.nan + + k = len(columns) + n = len(dicts) + + result = np.empty((n, k), dtype='O') + + for i in range(n): + row = dicts[i] + for j in range(k): + col = columns[j] + if col in row: + result[i, j] = row[col] + else: + result[i, j] = onan + + return result + + +def fast_zip(list ndarrays) -> ndarray[object]: + """ + For zipping multiple ndarrays into an ndarray of tuples. + """ + cdef: + Py_ssize_t i, j, k, n + ndarray[object, ndim=1] result + flatiter it + object val, tup + + k = len(ndarrays) + n = len(ndarrays[0]) + + result = np.empty(n, dtype=object) + + # initialize tuples on first pass + arr = ndarrays[0] + it = PyArray_IterNew(arr) + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + tup = PyTuple_New(k) + + PyTuple_SET_ITEM(tup, 0, val) + Py_INCREF(val) + result[i] = tup + PyArray_ITER_NEXT(it) + + for j in range(1, k): + arr = ndarrays[j] + it = PyArray_IterNew(arr) + if len(arr) != n: + raise ValueError("all arrays must be same length") + + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + PyTuple_SET_ITEM(result[i], j, val) + Py_INCREF(val) + PyArray_ITER_NEXT(it) + + return result + + +def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: + """ + Reverse indexing operation. + + Given `indexer`, make `indexer_inv` of it, such that:: + + indexer_inv[indexer[x]] = x + + Parameters + ---------- + indexer : np.ndarray[np.intp] + length : int + + Returns + ------- + np.ndarray[np.intp] + + Notes + ----- + If indexer is not unique, only first occurrence is accounted. + """ + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[intp_t, ndim=1] rev_indexer + intp_t idx + + rev_indexer = np.empty(length, dtype=np.intp) + rev_indexer[:] = -1 + for i in range(n): + idx = indexer[i] + if idx != -1: + rev_indexer[idx] = i + + return rev_indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +# Can add const once https://github.com/cython/cython/issues/1772 resolved +def has_infs(floating[:] arr) -> bool: + cdef: + Py_ssize_t i, n = len(arr) + floating inf, neginf, val + bint ret = False + + inf = np.inf + neginf = -inf + with nogil: + for i in range(n): + val = arr[i] + if val == inf or val == neginf: + ret = True + break + return ret + + +def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): + cdef: + Py_ssize_t i, n = len(indices) + intp_t k, vstart, vlast, v + + if n == 0: + return slice(0, 0) + + vstart = indices[0] + if vstart < 0 or max_len <= vstart: + return indices + + if n == 1: + return slice(vstart, (vstart + 1)) + + vlast = indices[n - 1] + if vlast < 0 or max_len <= vlast: + return indices + + k = indices[1] - indices[0] + if k == 0: + return indices + else: + for i in range(2, n): + v = indices[i] + if v - indices[i - 1] != k: + return indices + + if k > 0: + return slice(vstart, (vlast + 1), k) + else: + if vlast == 0: + return slice(vstart, None, k) + else: + return slice(vstart, (vlast - 1), k) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask): + cdef: + Py_ssize_t i, n = len(mask) + Py_ssize_t start = 0, end = 0 + bint started = False, finished = False + + for i in range(n): + if mask[i]: + if finished: + return mask.view(np.bool_) + if not started: + started = True + start = i + else: + if finished: + continue + + if started: + end = i + finished = True + + if not started: + return slice(0, 0) + if not finished: + return slice(start, None) + else: + return slice(start, end) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def array_equivalent_object(left: object[:], right: object[:]) -> bool: + """ + Perform an element by element comparison on 1-d object arrays + taking into account nan positions. + """ + cdef: + Py_ssize_t i, n = left.shape[0] + object x, y + + for i in range(n): + x = left[i] + y = right[i] + + # we are either not equal or both nan + # I think None == None will be true here + try: + if PyArray_Check(x) and PyArray_Check(y): + if not array_equivalent_object(x, y): + return False + elif (x is C_NA) ^ (y is C_NA): + return False + elif not ( + PyObject_RichCompareBool(x, y, Py_EQ) + or is_matching_na(x, y, nan_matches_none=True) + ): + return False + except ValueError: + # Avoid raising ValueError when comparing Numpy arrays to other types + if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): + # Only compare scalars to scalars and non-scalars to non-scalars + return False + elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) + and not (isinstance(x, type(y)) or isinstance(y, type(x)))): + # Check if non-scalars have the same type + return False + raise + return True + + +ctypedef fused ndarr_object: + ndarray[object, ndim=1] + ndarray[object, ndim=2] + +# TODO: get rid of this in StringArray and modify +# and go through ensure_string_array instead +@cython.wraparound(False) +@cython.boundscheck(False) +def convert_nans_to_NA(ndarr_object arr) -> ndarray: + """ + Helper for StringArray that converts null values that + are not pd.NA(e.g. np.nan, None) to pd.NA. Assumes elements + have already been validated as null. + """ + cdef: + Py_ssize_t i, m, n + object val + ndarr_object result + result = np.asarray(arr, dtype="object") + if arr.ndim == 2: + m, n = arr.shape[0], arr.shape[1] + for i in range(m): + for j in range(n): + val = arr[i, j] + if not isinstance(val, str): + result[i, j] = C_NA + else: + n = len(arr) + for i in range(n): + val = arr[i] + if not isinstance(val, str): + result[i] = C_NA + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[object] ensure_string_array( + arr, + object na_value=np.nan, + bint convert_na_value=True, + bint copy=True, + bint skipna=True, +): + """ + Returns a new numpy array with object dtype and only strings and na values. + + Parameters + ---------- + arr : array-like + The values to be converted to str, if needed. + na_value : Any, default np.nan + The value to use for na. For example, np.nan or pd.NA. + convert_na_value : bool, default True + If False, existing na values will be used unchanged in the new array. + copy : bool, default True + Whether to ensure that a new array is returned. + skipna : bool, default True + Whether or not to coerce nulls to their stringified form + (e.g. if False, NaN becomes 'nan'). + + Returns + ------- + np.ndarray[object] + An array with the input array's elements casted to str or nan-like. + """ + cdef: + Py_ssize_t i = 0, n = len(arr) + + if hasattr(arr, "to_numpy"): + + if hasattr(arr, "dtype") and arr.dtype.kind in ["m", "M"]: + # dtype check to exclude DataFrame + # GH#41409 TODO: not a great place for this + out = arr.astype(str).astype(object) + out[arr.isna()] = na_value + return out + + arr = arr.to_numpy() + elif not util.is_array(arr): + arr = np.array(arr, dtype="object") + + result = np.asarray(arr, dtype="object") + + if copy and result is arr: + result = result.copy() + + for i in range(n): + val = arr[i] + + if isinstance(val, str): + continue + + if not checknull(val): + if not util.is_float_object(val): + # f"{val}" is faster than str(val) + result[i] = f"{val}" + else: + # f"{val}" is not always equivalent to str(val) for floats + result[i] = str(val) + else: + if convert_na_value: + val = na_value + if skipna: + result[i] = val + else: + result[i] = f"{val}" + + return result + + +def is_all_arraylike(obj: list) -> bool: + """ + Should we treat these as levels of a MultiIndex, as opposed to Index items? + """ + cdef: + Py_ssize_t i, n = len(obj) + object val + bint all_arrays = True + + for i in range(n): + val = obj[i] + if not (isinstance(val, list) or + util.is_array(val) or hasattr(val, '_data')): + # TODO: EA? + # exclude tuples, frozensets as they may be contained in an Index + all_arrays = False + break + + return all_arrays + + +# ------------------------------------------------------------------------------ +# Groupby-related functions + +# TODO: could do even better if we know something about the data. eg, index has +# 1-min data, binner has 5-min data, then bins are just strides in index. This +# is a general, O(max(len(values), len(binner))) method. +@cython.boundscheck(False) +@cython.wraparound(False) +def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, + object closed='left', bint hasnans=False): + """ + Int64 (datetime64) version of generic python version in ``groupby.py``. + """ + cdef: + Py_ssize_t lenidx, lenbin, i, j, bc, vc + ndarray[int64_t, ndim=1] bins + int64_t l_bin, r_bin, nat_count + bint right_closed = closed == 'right' + + nat_count = 0 + if hasnans: + mask = values == NPY_NAT + nat_count = np.sum(mask) + values = values[~mask] + + lenidx = len(values) + lenbin = len(binner) + + if lenidx <= 0 or lenbin <= 0: + raise ValueError("Invalid length for values or for binner") + + # check binner fits data + if values[0] < binner[0]: + raise ValueError("Values falls before first bin") + + if values[lenidx - 1] > binner[lenbin - 1]: + raise ValueError("Values falls after last bin") + + bins = np.empty(lenbin - 1, dtype=np.int64) + + j = 0 # index into values + bc = 0 # bin count + + # linear scan + if right_closed: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] <= r_bin: + j += 1 + bins[bc] = j + bc += 1 + else: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] < r_bin: + j += 1 + bins[bc] = j + bc += 1 + + if nat_count > 0: + # shift bins by the number of NaT + bins = bins + nat_count + bins = np.insert(bins, 0, nat_count) + + return bins + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_level_sorter( + ndarray[int64_t, ndim=1] codes, const intp_t[:] starts +) -> ndarray: + """ + Argsort for a single level of a multi-index, keeping the order of higher + levels unchanged. `starts` points to starts of same-key indices w.r.t + to leading levels; equivalent to: + np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='mergesort') + + starts[i] for i in range(len(starts) - 1)]) + + Parameters + ---------- + codes : np.ndarray[int64_t, ndim=1] + starts : np.ndarray[intp, ndim=1] + + Returns + ------- + np.ndarray[np.int, ndim=1] + """ + cdef: + Py_ssize_t i, l, r + ndarray[intp_t, ndim=1] out = cnp.PyArray_EMPTY(1, codes.shape, cnp.NPY_INTP, 0) + + for i in range(len(starts) - 1): + l, r = starts[i], starts[i + 1] + out[l:r] = l + codes[l:r].argsort(kind='mergesort') + + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, + const intp_t[:] labels, + Py_ssize_t max_bin, + int axis): + cdef: + Py_ssize_t i, j, k, n + ndarray[int64_t, ndim=2] counts + + assert (axis == 0 or axis == 1) + n, k = (mask).shape + + if axis == 0: + counts = np.zeros((max_bin, k), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + if mask[i, j]: + counts[labels[i], j] += 1 + + else: # axis == 1 + counts = np.zeros((n, max_bin), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + if mask[i, j]: + counts[i, labels[j]] += 1 + + return counts + + +@cython.wraparound(False) +@cython.boundscheck(False) +def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups): + cdef: + Py_ssize_t i, group_size, n, start + intp_t lab + int64_t[::1] starts, ends + + n = len(labels) + + starts = np.zeros(ngroups, dtype=np.int64) + ends = np.zeros(ngroups, dtype=np.int64) + + start = 0 + group_size = 0 + with nogil: + for i in range(n): + lab = labels[i] + if lab < 0: + start += 1 + else: + group_size += 1 + if i == n - 1 or lab != labels[i + 1]: + starts[lab] = start + ends[lab] = start + group_size + start += group_size + group_size = 0 + + return np.asarray(starts), np.asarray(ends) + + +def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys, + list sorted_labels) -> dict: + """ + Parameters + ---------- + index : ndarray[intp] + labels : ndarray[int64] + keys : list + sorted_labels : list[ndarray[int64]] + """ + cdef: + Py_ssize_t i, j, k, lab, cur, start, n = len(labels) + dict result = {} + object tup + + k = len(keys) + + # Start at the first non-null entry + j = 0 + for j in range(0, n): + if labels[j] != -1: + break + else: + return result + cur = labels[j] + start = j + + for i in range(j+1, n): + lab = labels[i] + + if lab != cur: + if lab != -1: + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][i - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][i - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + result[tup] = index[start:i] + start = i + cur = lab + + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][n - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][n - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + result[tup] = index[start:] + + return result + + +# core.common import for fast inference checks + +def is_float(obj: object) -> bool: + """ + Return True if given object is float. + + Returns + ------- + bool + """ + return util.is_float_object(obj) + + +def is_integer(obj: object) -> bool: + """ + Return True if given object is integer. + + Returns + ------- + bool + """ + return util.is_integer_object(obj) + + +def is_bool(obj: object) -> bool: + """ + Return True if given object is boolean. + + Returns + ------- + bool + """ + return util.is_bool_object(obj) + + +def is_complex(obj: object) -> bool: + """ + Return True if given object is complex. + + Returns + ------- + bool + """ + return util.is_complex_object(obj) + + +cpdef bint is_decimal(object obj): + return isinstance(obj, Decimal) + + +cpdef bint is_interval(object obj): + return getattr(obj, '_typ', '_typ') == 'interval' + + +def is_period(val: object) -> bool: + """ + Return True if given object is Period. + + Returns + ------- + bool + """ + return is_period_object(val) + + +def is_list_like(obj: object, allow_sets: bool = True) -> bool: + """ + Check if the object is list-like. + + Objects that are considered list-like are for example Python + lists, tuples, sets, NumPy arrays, and Pandas Series. + + Strings and datetime objects, however, are not considered list-like. + + Parameters + ---------- + obj : object + Object to check. + allow_sets : bool, default True + If this parameter is False, sets will not be considered list-like. + + Returns + ------- + bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> import datetime + >>> is_list_like([1, 2, 3]) + True + >>> is_list_like({1, 2, 3}) + True + >>> is_list_like(datetime.datetime(2017, 1, 1)) + False + >>> is_list_like("foo") + False + >>> is_list_like(1) + False + >>> is_list_like(np.array([2])) + True + >>> is_list_like(np.array(2)) + False + """ + return c_is_list_like(obj, allow_sets) + + +cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: + # first, performance short-cuts for the most common cases + if util.is_array(obj): + # exclude zero-dimensional numpy arrays, effectively scalars + return not cnp.PyArray_IsZeroDim(obj) + elif isinstance(obj, list): + return True + # then the generic implementation + return ( + # equiv: `isinstance(obj, abc.Iterable)` + getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) + # we do not count strings/unicode/bytes as list-like + # exclude Generic types that have __iter__ + and not isinstance(obj, (str, bytes, _GenericAlias)) + # exclude zero-dimensional duck-arrays, effectively scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) + # exclude sets if allow_sets is False + and not (allow_sets is False and isinstance(obj, abc.Set)) + ) + + +_TYPE_MAP = { + "categorical": "categorical", + "category": "categorical", + "int8": "integer", + "int16": "integer", + "int32": "integer", + "int64": "integer", + "i": "integer", + "uint8": "integer", + "uint16": "integer", + "uint32": "integer", + "uint64": "integer", + "u": "integer", + "float32": "floating", + "float64": "floating", + "f": "floating", + "complex64": "complex", + "complex128": "complex", + "c": "complex", + "string": "string", + str: "string", + "S": "bytes", + "U": "string", + "bool": "boolean", + "b": "boolean", + "datetime64[ns]": "datetime64", + "M": "datetime64", + "timedelta64[ns]": "timedelta64", + "m": "timedelta64", + "interval": "interval", + Period: "period", +} + +# types only exist on certain platform +try: + np.float128 + _TYPE_MAP['float128'] = 'floating' +except AttributeError: + pass +try: + np.complex256 + _TYPE_MAP['complex256'] = 'complex' +except AttributeError: + pass +try: + np.float16 + _TYPE_MAP['float16'] = 'floating' +except AttributeError: + pass + + +@cython.internal +cdef class Seen: + """ + Class for keeping track of the types of elements + encountered when trying to perform type conversions. + """ + + cdef: + bint int_ # seen_int + bint nat_ # seen nat + bint bool_ # seen_bool + bint null_ # seen_null + bint nan_ # seen_np.nan + bint uint_ # seen_uint (unsigned integer) + bint sint_ # seen_sint (signed integer) + bint float_ # seen_float + bint object_ # seen_object + bint complex_ # seen_complex + bint datetime_ # seen_datetime + bint coerce_numeric # coerce data to numeric + bint timedelta_ # seen_timedelta + bint datetimetz_ # seen_datetimetz + bint period_ # seen_period + bint interval_ # seen_interval + + def __cinit__(self, bint coerce_numeric=False): + """ + Initialize a Seen instance. + + Parameters + ---------- + coerce_numeric : bool, default False + Whether or not to force conversion to a numeric data type if + initial methods to convert to numeric fail. + """ + self.int_ = False + self.nat_ = False + self.bool_ = False + self.null_ = False + self.nan_ = False + self.uint_ = False + self.sint_ = False + self.float_ = False + self.object_ = False + self.complex_ = False + self.datetime_ = False + self.timedelta_ = False + self.datetimetz_ = False + self.period_ = False + self.interval_ = False + self.coerce_numeric = coerce_numeric + + cdef inline bint check_uint64_conflict(self) except -1: + """ + Check whether we can safely convert a uint64 array to a numeric dtype. + + There are two cases when conversion to numeric dtype with a uint64 + array is not safe (and will therefore not be performed) + + 1) A NaN element is encountered. + + uint64 cannot be safely cast to float64 due to truncation issues + at the extreme ends of the range. + + 2) A negative number is encountered. + + There is no numerical dtype that can hold both negative numbers + and numbers greater than INT64_MAX. Hence, at least one number + will be improperly cast if we convert to a numeric dtype. + + Returns + ------- + bool + Whether or not we should return the original input array to avoid + data truncation. + + Raises + ------ + ValueError + uint64 elements were detected, and at least one of the + two conflict cases was also detected. However, we are + trying to force conversion to a numeric dtype. + """ + return (self.uint_ and (self.null_ or self.sint_) + and not self.coerce_numeric) + + cdef inline saw_null(self): + """ + Set flags indicating that a null value was encountered. + """ + self.null_ = True + self.float_ = True + + cdef saw_int(self, object val): + """ + Set flags indicating that an integer value was encountered. + + In addition to setting a flag that an integer was seen, we + also set two flags depending on the type of integer seen: + + 1) sint_ : a signed numpy integer type or a negative (signed) number in the + range of [-2**63, 0) was encountered + 2) uint_ : an unsigned numpy integer type or a positive number in the range of + [2**63, 2**64) was encountered + + Parameters + ---------- + val : Python int + Value with which to set the flags. + """ + self.int_ = True + self.sint_ = ( + self.sint_ + or (oINT64_MIN <= val < 0) + # Cython equivalent of `isinstance(val, np.signedinteger)` + or PyObject_TypeCheck(val, &PySignedIntegerArrType_Type) + ) + self.uint_ = ( + self.uint_ + or (oINT64_MAX < val <= oUINT64_MAX) + # Cython equivalent of `isinstance(val, np.unsignedinteger)` + or PyObject_TypeCheck(val, &PyUnsignedIntegerArrType_Type) + ) + + @property + def numeric_(self): + return self.complex_ or self.float_ or self.int_ + + @property + def is_bool(self): + return not (self.datetime_ or self.numeric_ or self.timedelta_ + or self.nat_) + + @property + def is_float_or_complex(self): + return not (self.bool_ or self.datetime_ or self.timedelta_ + or self.nat_) + + +cdef object _try_infer_map(object dtype): + """ + If its in our map, just return the dtype. + """ + cdef: + object val + str attr + for attr in ["name", "kind", "base", "type"]: + val = getattr(dtype, attr, None) + if val in _TYPE_MAP: + return _TYPE_MAP[val] + return None + + +def infer_dtype(value: object, skipna: bool = True) -> str: + """ + Return a string label of the type of a scalar or list-like of values. + + Parameters + ---------- + value : scalar, list, ndarray, or pandas type + skipna : bool, default True + Ignore NaN values when inferring the type. + + Returns + ------- + str + Describing the common type of the input data. + Results can include: + + - string + - bytes + - floating + - integer + - mixed-integer + - mixed-integer-float + - decimal + - complex + - categorical + - boolean + - datetime64 + - datetime + - date + - timedelta64 + - timedelta + - time + - period + - mixed + - unknown-array + + Raises + ------ + TypeError + If ndarray-like but cannot infer the dtype + + Notes + ----- + - 'mixed' is the catchall for anything that is not otherwise + specialized + - 'mixed-integer-float' are floats and integers + - 'mixed-integer' are integers mixed with non-integers + - 'unknown-array' is the catchall for something that *is* an array (has + a dtype attribute), but has a dtype unknown to pandas (e.g. external + extension array) + + Examples + -------- + >>> import datetime + >>> infer_dtype(['foo', 'bar']) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=True) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=False) + 'mixed' + + >>> infer_dtype([b'foo', b'bar']) + 'bytes' + + >>> infer_dtype([1, 2, 3]) + 'integer' + + >>> infer_dtype([1, 2, 3.5]) + 'mixed-integer-float' + + >>> infer_dtype([1.0, 2.0, 3.5]) + 'floating' + + >>> infer_dtype(['a', 1]) + 'mixed-integer' + + >>> infer_dtype([Decimal(1), Decimal(2.0)]) + 'decimal' + + >>> infer_dtype([True, False]) + 'boolean' + + >>> infer_dtype([True, False, np.nan]) + 'boolean' + + >>> infer_dtype([pd.Timestamp('20130101')]) + 'datetime' + + >>> infer_dtype([datetime.date(2013, 1, 1)]) + 'date' + + >>> infer_dtype([np.datetime64('2013-01-01')]) + 'datetime64' + + >>> infer_dtype([datetime.timedelta(0, 1, 1)]) + 'timedelta' + + >>> infer_dtype(pd.Series(list('aabc')).astype('category')) + 'categorical' + """ + cdef: + Py_ssize_t i, n + object val + ndarray values + bint seen_pdnat = False + bint seen_val = False + flatiter it + + if util.is_array(value): + values = value + elif hasattr(value, "inferred_type") and skipna is False: + # Index, use the cached attribute if possible, populate the cache otherwise + return value.inferred_type + elif hasattr(value, "dtype"): + # this will handle ndarray-like + # e.g. categoricals + dtype = value.dtype + if not cnp.PyArray_DescrCheck(dtype): + # i.e. not isinstance(dtype, np.dtype) + inferred = _try_infer_map(value.dtype) + if inferred is not None: + return inferred + return "unknown-array" + + # Unwrap Series/Index + values = np.asarray(value) + + else: + if not isinstance(value, list): + value = list(value) + if not value: + return "empty" + + from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + values = construct_1d_object_array_from_listlike(value) + + val = _try_infer_map(values.dtype) + if val is not None: + # Anything other than object-dtype should return here. + return val + + if values.descr.type_num != NPY_OBJECT: + # i.e. values.dtype != np.object + # This should not be reached + values = values.astype(object) + + n = cnp.PyArray_SIZE(values) + if n == 0: + return "empty" + + # Iterate until we find our first valid value. We will use this + # value to decide which of the is_foo_array functions to call. + it = PyArray_IterNew(values) + for i in range(n): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + + # do not use checknull to keep + # np.datetime64('nat') and np.timedelta64('nat') + if val is None or util.is_nan(val) or val is C_NA: + pass + elif val is NaT: + seen_pdnat = True + else: + seen_val = True + break + + # if all values are nan/NaT + if seen_val is False and seen_pdnat is True: + return "datetime" + # float/object nan is handled in latter logic + if seen_val is False and skipna: + return "empty" + + if util.is_datetime64_object(val): + if is_datetime64_array(values, skipna=skipna): + return "datetime64" + + elif is_timedelta(val): + if is_timedelta_or_timedelta64_array(values, skipna=skipna): + return "timedelta" + + elif util.is_integer_object(val): + # ordering matters here; this check must come after the is_timedelta + # check otherwise numpy timedelta64 objects would come through here + + if is_integer_array(values, skipna=skipna): + return "integer" + elif is_integer_float_array(values, skipna=skipna): + if is_integer_na_array(values, skipna=skipna): + return "integer-na" + else: + return "mixed-integer-float" + return "mixed-integer" + + elif PyDateTime_Check(val): + if is_datetime_array(values, skipna=skipna): + return "datetime" + elif is_date_array(values, skipna=skipna): + return "date" + + elif PyDate_Check(val): + if is_date_array(values, skipna=skipna): + return "date" + + elif PyTime_Check(val): + if is_time_array(values, skipna=skipna): + return "time" + + elif is_decimal(val): + if is_decimal_array(values, skipna=skipna): + return "decimal" + + elif util.is_complex_object(val): + if is_complex_array(values): + return "complex" + + elif util.is_float_object(val): + if is_float_array(values): + return "floating" + elif is_integer_float_array(values, skipna=skipna): + if is_integer_na_array(values, skipna=skipna): + return "integer-na" + else: + return "mixed-integer-float" + + elif util.is_bool_object(val): + if is_bool_array(values, skipna=skipna): + return "boolean" + + elif isinstance(val, str): + if is_string_array(values, skipna=skipna): + return "string" + + elif isinstance(val, bytes): + if is_bytes_array(values, skipna=skipna): + return "bytes" + + elif is_period_object(val): + if is_period_array(values, skipna=skipna): + return "period" + + elif is_interval(val): + if is_interval_array(values): + return "interval" + + cnp.PyArray_ITER_RESET(it) + for i in range(n): + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + + if util.is_integer_object(val): + return "mixed-integer" + + return "mixed" + + +def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: + """ + Infer if we have a datetime or timedelta array. + - date: we have *only* date and maybe strings, nulls + - datetime: we have *only* datetimes and maybe strings, nulls + - timedelta: we have *only* timedeltas and maybe strings, nulls + - nat: we do not have *any* date, datetimes or timedeltas, but do have + at least a NaT + - mixed: other objects (strings, a mix of tz-aware and tz-naive, or + actual objects) + + Parameters + ---------- + arr : ndarray[object] + + Returns + ------- + str: {datetime, timedelta, date, nat, mixed} + bool + """ + cdef: + Py_ssize_t i, n = len(arr) + bint seen_timedelta = False, seen_date = False, seen_datetime = False + bint seen_tz_aware = False, seen_tz_naive = False + bint seen_nat = False, seen_str = False + bint seen_period = False, seen_interval = False + list objs = [] + object v + + for i in range(n): + v = arr[i] + if isinstance(v, str): + objs.append(v) + seen_str = True + + if len(objs) == 3: + break + + elif v is None or util.is_nan(v): + # nan or None + pass + elif v is NaT: + seen_nat = True + elif PyDateTime_Check(v): + # datetime + seen_datetime = True + + # disambiguate between tz-naive and tz-aware + if v.tzinfo is None: + seen_tz_naive = True + else: + seen_tz_aware = True + + if seen_tz_naive and seen_tz_aware: + return "mixed", seen_str + elif util.is_datetime64_object(v): + # np.datetime64 + seen_datetime = True + elif PyDate_Check(v): + seen_date = True + elif is_timedelta(v): + # timedelta, or timedelta64 + seen_timedelta = True + elif is_period_object(v): + seen_period = True + break + elif is_interval(v): + seen_interval = True + break + else: + return "mixed", seen_str + + if seen_period: + if is_period_array(arr): + return "period", seen_str + return "mixed", seen_str + + if seen_interval: + if is_interval_array(arr): + return "interval", seen_str + return "mixed", seen_str + + if seen_date and not (seen_datetime or seen_timedelta): + return "date", seen_str + elif seen_datetime and not seen_timedelta: + return "datetime", seen_str + elif seen_timedelta and not seen_datetime: + return "timedelta", seen_str + elif seen_nat: + return "nat", seen_str + + # short-circuit by trying to + # actually convert these strings + # this is for performance as we don't need to try + # convert *every* string array + if len(objs): + try: + # require_iso8601 as in maybe_infer_to_datetimelike + array_to_datetime(objs, errors="raise", require_iso8601=True) + return "datetime", seen_str + except (ValueError, TypeError): + pass + + # we are *not* going to infer from strings + # for timedelta as too much ambiguity + + return "mixed", seen_str + + +cdef inline bint is_timedelta(object o): + return PyDelta_Check(o) or util.is_timedelta64_object(o) + + +@cython.internal +cdef class Validator: + + cdef: + Py_ssize_t n + dtype dtype + bint skipna + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + + cdef bint validate(self, ndarray values) except -1: + if not self.n: + return False + + if self.is_array_typed(): + # i.e. this ndarray is already of the desired dtype + return True + elif self.dtype.type_num == NPY_OBJECT: + if self.skipna: + return self._validate_skipna(values) + else: + return self._validate(values) + else: + return False + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = values.size + flatiter it = PyArray_IterNew(values) + + for i in range(n): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + if not self.is_valid(val): + return False + + return True + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate_skipna(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = values.size + flatiter it = PyArray_IterNew(values) + + for i in range(n): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + if not self.is_valid_skipna(val): + return False + + return True + + cdef bint is_valid(self, object value) except -1: + return self.is_value_typed(value) + + cdef bint is_valid_skipna(self, object value) except -1: + return self.is_valid(value) or self.is_valid_null(value) + + cdef bint is_value_typed(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_value_typed") + + cdef bint is_valid_null(self, object value) except -1: + return value is None or value is C_NA or util.is_nan(value) + + cdef bint is_array_typed(self) except -1: + return False + + +@cython.internal +cdef class BoolValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_bool_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bool_) + + +cpdef bint is_bool_array(ndarray values, bint skipna=False): + cdef: + BoolValidator validator = BoolValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class IntegerValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +# Note: only python-exposed for tests +cpdef bint is_integer_array(ndarray values, bint skipna=True): + cdef: + IntegerValidator validator = IntegerValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class IntegerNaValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return (util.is_integer_object(value) + or (util.is_nan(value) and util.is_float_object(value))) + + +cdef bint is_integer_na_array(ndarray values, bint skipna=True): + cdef: + IntegerNaValidator validator = IntegerNaValidator(len(values), + values.dtype, skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class IntegerFloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) or util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +cdef bint is_integer_float_array(ndarray values, bint skipna=True): + cdef: + IntegerFloatValidator validator = IntegerFloatValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class FloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.floating) + + +# Note: only python-exposed for tests +cpdef bint is_float_array(ndarray values): + cdef: + FloatValidator validator = FloatValidator(len(values), values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class ComplexValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return ( + util.is_complex_object(value) + or (util.is_float_object(value) and is_nan(value)) + ) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.complexfloating) + + +cdef bint is_complex_array(ndarray values): + cdef: + ComplexValidator validator = ComplexValidator(len(values), values.dtype) + return validator.validate(values) + + +@cython.internal +cdef class DecimalValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_decimal(value) + + +cdef bint is_decimal_array(ndarray values, bint skipna=False): + cdef: + DecimalValidator validator = DecimalValidator( + len(values), values.dtype, skipna=skipna + ) + return validator.validate(values) + + +@cython.internal +cdef class StringValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, str) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.str_) + + +cpdef bint is_string_array(ndarray values, bint skipna=False): + cdef: + StringValidator validator = StringValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class BytesValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, bytes) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bytes_) + + +cdef bint is_bytes_array(ndarray values, bint skipna=False): + cdef: + BytesValidator validator = BytesValidator(len(values), values.dtype, + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class TemporalValidator(Validator): + cdef: + bint all_generic_na + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + self.all_generic_na = True + + cdef inline bint is_valid(self, object value) except -1: + return self.is_value_typed(value) or self.is_valid_null(value) + + cdef bint is_valid_null(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_valid_null") + + cdef inline bint is_valid_skipna(self, object value) except -1: + cdef: + bint is_typed_null = self.is_valid_null(value) + bint is_generic_null = value is None or util.is_nan(value) + if not is_generic_null: + self.all_generic_na = False + return self.is_value_typed(value) or is_typed_null or is_generic_null + + cdef bint _validate_skipna(self, ndarray values) except -1: + """ + If we _only_ saw non-dtype-specific NA values, even if they are valid + for this dtype, we do not infer this dtype. + """ + return Validator._validate_skipna(self, values) and not self.all_generic_na + + +@cython.internal +cdef class DatetimeValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDateTime_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_datetime64(value) + + +cpdef bint is_datetime_array(ndarray values, bint skipna=True): + cdef: + DatetimeValidator validator = DatetimeValidator(len(values), + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class Datetime64Validator(DatetimeValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) + + +# Note: only python-exposed for tests +cpdef bint is_datetime64_array(ndarray values, bint skipna=True): + cdef: + Datetime64Validator validator = Datetime64Validator(len(values), + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class AnyDatetimeValidator(DatetimeValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) or ( + PyDateTime_Check(value) and value.tzinfo is None + ) + + +cdef bint is_datetime_or_datetime64_array(ndarray values, bint skipna=True): + cdef: + AnyDatetimeValidator validator = AnyDatetimeValidator(len(values), + skipna=skipna) + return validator.validate(values) + + +# Note: only python-exposed for tests +def is_datetime_with_singletz_array(values: ndarray) -> bool: + """ + Check values have the same tzinfo attribute. + Doesn't check values are datetime-like types. + """ + cdef: + Py_ssize_t i = 0, j, n = len(values) + object base_val, base_tz, val, tz + + if n == 0: + return False + + # Get a reference timezone to compare with the rest of the tzs in the array + for i in range(n): + base_val = values[i] + if base_val is not NaT and base_val is not None and not util.is_nan(base_val): + base_tz = getattr(base_val, 'tzinfo', None) + break + + for j in range(i, n): + # Compare val's timezone with the reference timezone + # NaT can coexist with tz-aware datetimes, so skip if encountered + val = values[j] + if val is not NaT and val is not None and not util.is_nan(val): + tz = getattr(val, 'tzinfo', None) + if not tz_compare(base_tz, tz): + return False + + # Note: we should only be called if a tzaware datetime has been seen, + # so base_tz should always be set at this point. + return True + + +@cython.internal +cdef class TimedeltaValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDelta_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_timedelta64(value) + + +@cython.internal +cdef class AnyTimedeltaValidator(TimedeltaValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_timedelta(value) + + +# Note: only python-exposed for tests +cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True): + """ + Infer with timedeltas and/or nat/none. + """ + cdef: + AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values), + skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class DateValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyDate_Check(value) + + +# Note: only python-exposed for tests +cpdef bint is_date_array(ndarray values, bint skipna=False): + cdef: + DateValidator validator = DateValidator(len(values), skipna=skipna) + return validator.validate(values) + + +@cython.internal +cdef class TimeValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyTime_Check(value) + + +# Note: only python-exposed for tests +cpdef bint is_time_array(ndarray values, bint skipna=False): + cdef: + TimeValidator validator = TimeValidator(len(values), skipna=skipna) + return validator.validate(values) + + +# FIXME: actually use skipna +cdef bint is_period_array(ndarray values, bint skipna=True): + """ + Is this an ndarray of Period objects (or NaT) with a single `freq`? + """ + # values should be object-dtype, but ndarray[object] assumes 1D, while + # this _may_ be 2D. + cdef: + Py_ssize_t i, N = values.size + int dtype_code = -10000 # i.e. c_FreqGroup.FR_UND + object val + flatiter it + + if N == 0: + return False + + it = PyArray_IterNew(values) + for i in range(N): + # The PyArray_GETITEM and PyArray_ITER_NEXT are faster + # equivalents to `val = values[i]` + val = PyArray_GETITEM(values, PyArray_ITER_DATA(it)) + PyArray_ITER_NEXT(it) + + if is_period_object(val): + if dtype_code == -10000: + dtype_code = val._dtype._dtype_code + elif dtype_code != val._dtype._dtype_code: + # mismatched freqs + return False + elif checknull_with_nat(val): + pass + else: + # Not a Period or NaT-like + return False + + if dtype_code == -10000: + # we saw all-NaTs, no actual Periods + return False + return True + + +# Note: only python-exposed for tests +cpdef bint is_interval_array(ndarray values): + """ + Is this an ndarray of Interval (or np.nan) with a single dtype? + """ + cdef: + Py_ssize_t i, n = len(values) + str closed = None + bint numeric = False + bint dt64 = False + bint td64 = False + object val + + if len(values) == 0: + return False + + for i in range(n): + val = values[i] + + if is_interval(val): + if closed is None: + closed = val.closed + numeric = ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ) + td64 = is_timedelta(val.left) + dt64 = PyDateTime_Check(val.left) + elif val.closed != closed: + # mismatched closedness + return False + elif numeric: + if not ( + util.is_float_object(val.left) + or util.is_integer_object(val.left) + ): + # i.e. datetime64 or timedelta64 + return False + elif td64: + if not is_timedelta(val.left): + return False + elif dt64: + if not PyDateTime_Check(val.left): + return False + else: + raise ValueError(val) + elif util.is_nan(val) or val is None: + pass + else: + return False + + if closed is None: + # we saw all-NAs, no actual Intervals + return False + return True + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_numeric( + ndarray[object, ndim=1] values, + set na_values, + bint convert_empty=True, + bint coerce_numeric=False, + bint convert_to_masked_nullable=False, +) -> tuple[np.ndarray, np.ndarray | None]: + """ + Convert object array to a numeric array if possible. + + Parameters + ---------- + values : ndarray[object] + Array of object elements to convert. + na_values : set + Set of values that should be interpreted as NaN. + convert_empty : bool, default True + If an empty array-like object is encountered, whether to interpret + that element as NaN or not. If set to False, a ValueError will be + raised if such an element is encountered and 'coerce_numeric' is False. + coerce_numeric : bool, default False + If initial attempts to convert to numeric have failed, whether to + force conversion to numeric via alternative methods or by setting the + element to NaN. Otherwise, an Exception will be raised when such an + element is encountered. + + This boolean also has an impact on how conversion behaves when a + numeric array has no suitable numerical dtype to return (i.e. uint64, + int32, uint8). If set to False, the original object array will be + returned. Otherwise, a ValueError will be raised. + convert_to_masked_nullable : bool, default False + Whether to return a mask for the converted values. This also disables + upcasting for ints with nulls to float64. + Returns + ------- + np.ndarray + Array of converted object values to numerical ones. + + Optional[np.ndarray] + If convert_to_masked_nullable is True, + returns a boolean mask for the converted values, otherwise returns None. + """ + if len(values) == 0: + return (np.array([], dtype='i8'), None) + + # fastpath for ints - try to convert all based on first value + cdef: + object val = values[0] + + if util.is_integer_object(val): + try: + maybe_ints = values.astype('i8') + if (maybe_ints == values).all(): + return (maybe_ints, None) + except (ValueError, OverflowError, TypeError): + pass + + # Otherwise, iterate and do full inference. + cdef: + int status, maybe_int + Py_ssize_t i, n = values.size + Seen seen = Seen(coerce_numeric) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_COMPLEX128, 0) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0) + ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") + float64_t fval + bint allow_null_in_int = convert_to_masked_nullable + + for i in range(n): + val = values[i] + # We only want to disable NaNs showing as float if + # a) convert_to_masked_nullable = True + # b) no floats have been seen ( assuming an int shows up later ) + # However, if no ints present (all null array), we need to return floats + allow_null_in_int = convert_to_masked_nullable and not seen.float_ + + if val.__hash__ is not None and val in na_values: + if allow_null_in_int: + seen.null_ = True + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.saw_null() + floats[i] = complexes[i] = NaN + elif util.is_float_object(val): + fval = val + if fval != fval: + seen.null_ = True + if allow_null_in_int: + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.float_ = True + else: + seen.float_ = True + floats[i] = complexes[i] = fval + elif util.is_integer_object(val): + floats[i] = complexes[i] = val + + val = int(val) + seen.saw_int(val) + + if val >= 0: + if val <= oUINT64_MAX: + uints[i] = val + else: + seen.float_ = True + + if oINT64_MIN <= val <= oINT64_MAX: + ints[i] = val + + if val < oINT64_MIN or (seen.sint_ and seen.uint_): + seen.float_ = True + + elif util.is_bool_object(val): + floats[i] = uints[i] = ints[i] = bools[i] = val + seen.bool_ = True + elif val is None or val is C_NA: + if allow_null_in_int: + seen.null_ = True + mask[i] = 1 + else: + if convert_to_masked_nullable: + mask[i] = 1 + seen.saw_null() + floats[i] = complexes[i] = NaN + elif hasattr(val, '__len__') and len(val) == 0: + if convert_empty or seen.coerce_numeric: + seen.saw_null() + floats[i] = complexes[i] = NaN + else: + raise ValueError("Empty string encountered") + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = True + elif is_decimal(val): + floats[i] = complexes[i] = val + seen.float_ = True + else: + try: + status = floatify(val, &fval, &maybe_int) + + if fval in na_values: + seen.saw_null() + floats[i] = complexes[i] = NaN + mask[i] = 1 + else: + if fval != fval: + seen.null_ = True + mask[i] = 1 + + floats[i] = fval + + if maybe_int: + as_int = int(val) + + if as_int in na_values: + mask[i] = 1 + seen.null_ = True + if not allow_null_in_int: + seen.float_ = True + else: + seen.saw_int(as_int) + + if as_int not in na_values: + if as_int < oINT64_MIN or as_int > oUINT64_MAX: + if seen.coerce_numeric: + seen.float_ = True + else: + raise ValueError("Integer out of range.") + else: + if as_int >= 0: + uints[i] = as_int + + if as_int <= oINT64_MAX: + ints[i] = as_int + + seen.float_ = seen.float_ or (seen.uint_ and seen.sint_) + else: + seen.float_ = True + except (TypeError, ValueError) as err: + if not seen.coerce_numeric: + raise type(err)(f"{err} at position {i}") + + seen.saw_null() + floats[i] = NaN + + if seen.check_uint64_conflict(): + return (values, None) + + # This occurs since we disabled float nulls showing as null in anticipation + # of seeing ints that were never seen. So then, we return float + if allow_null_in_int and seen.null_ and not seen.int_: + seen.float_ = True + + if seen.complex_: + return (complexes, None) + elif seen.float_: + if seen.null_ and convert_to_masked_nullable: + return (floats, mask.view(np.bool_)) + return (floats, None) + elif seen.int_: + if seen.null_ and convert_to_masked_nullable: + if seen.uint_: + return (uints, mask.view(np.bool_)) + else: + return (ints, mask.view(np.bool_)) + if seen.uint_: + return (uints, None) + else: + return (ints, None) + elif seen.bool_: + return (bools.view(np.bool_), None) + elif seen.uint_: + return (uints, None) + return (ints, None) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_objects(ndarray[object] objects, + *, + bint try_float=False, + bint safe=False, + bint convert_datetime=False, + bint convert_timedelta=False, + bint convert_period=False, + bint convert_interval=False, + bint convert_to_nullable_integer=False, + object dtype_if_all_nat=None) -> "ArrayLike": + """ + Type inference function-- convert object array to proper dtype + + Parameters + ---------- + objects : ndarray[object] + Array of object elements to convert. + try_float : bool, default False + If an array-like object contains only float or NaN values is + encountered, whether to convert and return an array of float dtype. + safe : bool, default False + Whether to upcast numeric type (e.g. int cast to float). If set to + True, no upcasting will be performed. + convert_datetime : bool, default False + If an array-like object contains only datetime values or NaT is + encountered, whether to convert and return an array of M8[ns] dtype. + convert_timedelta : bool, default False + If an array-like object contains only timedelta values or NaT is + encountered, whether to convert and return an array of m8[ns] dtype. + convert_period : bool, default False + If an array-like object contains only (homogeneous-freq) Period values + or NaT, whether to convert and return a PeriodArray. + convert_interval : bool, default False + If an array-like object contains only Interval objects (with matching + dtypes and closedness) or NaN, whether to convert to IntervalArray. + convert_to_nullable_integer : bool, default False + If an array-like object contains only integer values (and NaN) is + encountered, whether to convert and return an IntegerArray. + dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None + Dtype to cast to if we have all-NaT. + + Returns + ------- + np.ndarray or ExtensionArray + Array of converted object values to more specific dtypes if applicable. + """ + cdef: + Py_ssize_t i, n, itemsize_max = 0 + ndarray[float64_t] floats + ndarray[complex128_t] complexes + ndarray[int64_t] ints + ndarray[uint64_t] uints + ndarray[uint8_t] bools + int64_t[::1] idatetimes + int64_t[::1] itimedeltas + Seen seen = Seen() + object val + float64_t fval, fnan = np.nan + + n = len(objects) + + floats = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_FLOAT64, 0) + complexes = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_COMPLEX128, 0) + ints = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_INT64, 0) + uints = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_UINT64, 0) + bools = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_UINT8, 0) + mask = np.full(n, False) + + if convert_datetime: + datetimes = np.empty(n, dtype='M8[ns]') + idatetimes = datetimes.view(np.int64) + + if convert_timedelta: + timedeltas = np.empty(n, dtype='m8[ns]') + itimedeltas = timedeltas.view(np.int64) + + for i in range(n): + val = objects[i] + if itemsize_max != -1: + itemsize = get_itemsize(val) + if itemsize > itemsize_max or itemsize == -1: + itemsize_max = itemsize + + if val is None: + seen.null_ = True + floats[i] = complexes[i] = fnan + mask[i] = True + elif val is NaT: + seen.nat_ = True + if convert_datetime: + idatetimes[i] = NPY_NAT + if convert_timedelta: + itimedeltas[i] = NPY_NAT + if not (convert_datetime or convert_timedelta or convert_period): + seen.object_ = True + break + elif val is np.nan: + seen.nan_ = True + mask[i] = True + floats[i] = complexes[i] = val + elif util.is_bool_object(val): + seen.bool_ = True + bools[i] = val + elif util.is_float_object(val): + floats[i] = complexes[i] = val + seen.float_ = True + elif is_timedelta(val): + if convert_timedelta: + seen.timedelta_ = True + try: + itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8") + except OutOfBoundsTimedelta: + seen.object_ = True + break + break + else: + seen.object_ = True + break + elif util.is_integer_object(val): + seen.int_ = True + floats[i] = val + complexes[i] = val + if not seen.null_: + seen.saw_int(val) + + if ((seen.uint_ and seen.sint_) or + val > oUINT64_MAX or val < oINT64_MIN): + seen.object_ = True + break + + if seen.uint_: + uints[i] = val + elif seen.sint_: + ints[i] = val + else: + uints[i] = val + ints[i] = val + + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = True + elif PyDateTime_Check(val) or util.is_datetime64_object(val): + + # if we have an tz's attached then return the objects + if convert_datetime: + if getattr(val, 'tzinfo', None) is not None: + seen.datetimetz_ = True + break + else: + seen.datetime_ = True + try: + idatetimes[i] = convert_to_tsobject( + val, None, None, 0, 0).value + except OutOfBoundsDatetime: + seen.object_ = True + break + else: + seen.object_ = True + break + elif is_period_object(val): + if convert_period: + seen.period_ = True + break + else: + seen.object_ = True + break + elif try_float and not isinstance(val, str): + # this will convert Decimal objects + try: + floats[i] = float(val) + complexes[i] = complex(val) + seen.float_ = True + except (ValueError, TypeError): + seen.object_ = True + break + elif is_interval(val): + if convert_interval: + seen.interval_ = True + break + else: + seen.object_ = True + break + else: + seen.object_ = True + break + + # we try to coerce datetime w/tz but must all have the same tz + if seen.datetimetz_: + if is_datetime_with_singletz_array(objects): + from pandas import DatetimeIndex + dti = DatetimeIndex(objects) + + # unbox to DatetimeArray + return dti._data + seen.object_ = True + + elif seen.datetime_: + if is_datetime_or_datetime64_array(objects): + from pandas import DatetimeIndex + + try: + dti = DatetimeIndex(objects) + except OutOfBoundsDatetime: + pass + else: + # unbox to ndarray[datetime64[ns]] + return dti._data._ndarray + seen.object_ = True + + elif seen.timedelta_: + if is_timedelta_or_timedelta64_array(objects): + from pandas import TimedeltaIndex + + try: + tdi = TimedeltaIndex(objects) + except OutOfBoundsTimedelta: + pass + else: + # unbox to ndarray[timedelta64[ns]] + return tdi._data._ndarray + seen.object_ = True + + if seen.period_: + if is_period_array(objects): + from pandas import PeriodIndex + pi = PeriodIndex(objects) + + # unbox to PeriodArray + return pi._data + seen.object_ = True + + if seen.interval_: + if is_interval_array(objects): + from pandas import IntervalIndex + ii = IntervalIndex(objects) + + # unbox to IntervalArray + return ii._data + + seen.object_ = True + + if not seen.object_: + result = None + if not safe: + if seen.null_ or seen.nan_: + if seen.is_float_or_complex: + if seen.complex_: + result = complexes + elif seen.float_: + result = floats + elif seen.int_: + if convert_to_nullable_integer: + from pandas.core.arrays import IntegerArray + result = IntegerArray(ints, mask) + else: + result = floats + elif seen.nan_: + result = floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + result = datetimes + elif seen.timedelta_: + if not seen.numeric_: + result = timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + dtype = dtype_if_all_nat + if dtype is not None: + # otherwise we keep object dtype + result = _infer_all_nats( + dtype, datetimes, timedeltas + ) + + elif convert_datetime: + result = datetimes + elif convert_timedelta: + result = timedeltas + else: + if seen.complex_: + result = complexes + elif seen.float_: + result = floats + elif seen.int_: + if seen.uint_: + result = uints + else: + result = ints + elif seen.is_bool: + result = bools.view(np.bool_) + + else: + # don't cast int to float, etc. + if seen.null_: + if seen.is_float_or_complex: + if seen.complex_: + if not seen.int_: + result = complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + result = floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + result = datetimes + elif seen.timedelta_: + if not seen.numeric_: + result = timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + dtype = dtype_if_all_nat + if dtype is not None: + # otherwise we keep object dtype + result = _infer_all_nats( + dtype, datetimes, timedeltas + ) + + elif convert_datetime: + result = datetimes + elif convert_timedelta: + result = timedeltas + else: + if seen.complex_: + if not seen.int_: + result = complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + result = floats + elif seen.int_: + if seen.uint_: + result = uints + else: + result = ints + elif seen.is_bool and not seen.nan_: + result = bools.view(np.bool_) + + if result is uints or result is ints or result is floats or result is complexes: + # cast to the largest itemsize when all values are NumPy scalars + if itemsize_max > 0 and itemsize_max != result.dtype.itemsize: + result = result.astype(result.dtype.kind + str(itemsize_max)) + return result + elif result is not None: + return result + + return objects + + +cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas): + """ + If we have all-NaT values, cast these to the given dtype. + """ + if cnp.PyArray_DescrCheck(dtype): + # i.e. isinstance(dtype, np.dtype): + if dtype == "M8[ns]": + result = datetimes + elif dtype == "m8[ns]": + result = timedeltas + else: + raise ValueError(dtype) + else: + # ExtensionDtype + cls = dtype.construct_array_type() + i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0) + i8vals.fill(NPY_NAT) + result = cls(i8vals, dtype=dtype) + return result + + +class _NoDefault(Enum): + # We make this an Enum + # 1) because it round-trips through pickle correctly (see GH#40397) + # 2) because mypy does not understand singletons + no_default = "NO_DEFAULT" + + def __repr__(self) -> str: + return "" + + +# Note: no_default is exported to the public API in pandas.api.extensions +no_default = _NoDefault.no_default # Sentinel indicating the default value. +NoDefault = Literal[_NoDefault.no_default] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True, + object na_value=no_default, cnp.dtype dtype=np.dtype(object) + ) -> np.ndarray: + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + mask : ndarray + uint8 dtype ndarray indicating values not to apply `f` to. + convert : bool, default True + Whether to call `maybe_convert_objects` on the resulting ndarray + na_value : Any, optional + The result value to use for masked values. By default, the + input value is used + dtype : numpy.dtype + The numpy dtype to use for the result ndarray. + + Returns + ------- + np.ndarray + """ + cdef: + Py_ssize_t i, n + ndarray result + object val + + n = len(arr) + result = np.empty(n, dtype=dtype) + for i in range(n): + if mask[i]: + if na_value is no_default: + val = arr[i] + else: + val = na_value + else: + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=False, + convert_datetime=False, + convert_timedelta=False) + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer( + ndarray arr, object f, bint convert=True, bint ignore_na=False +) -> np.ndarray: + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + convert : bint + ignore_na : bint + If True, NA values will not have f applied + + Returns + ------- + np.ndarray + """ + cdef: + Py_ssize_t i, n + ndarray[object] result + object val + + n = len(arr) + result = cnp.PyArray_EMPTY(1, arr.shape, cnp.NPY_OBJECT, 0) + for i in range(n): + if ignore_na and checknull(arr[i]): + result[i] = arr[i] + continue + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=False, + convert_datetime=False, + convert_timedelta=False) + + return result + + +def to_object_array(rows: object, min_width: int = 0) -> ndarray: + """ + Convert a list of lists into an object array. + + Parameters + ---------- + rows : 2-d array (N, K) + List of lists to be converted into an array. + min_width : int + Minimum width of the object array. If a list + in `rows` contains fewer than `width` elements, + the remaining elements in the corresponding row + will all be `NaN`. + + Returns + ------- + np.ndarray[object, ndim=2] + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + list row + + rows = list(rows) + n = len(rows) + + k = min_width + for i in range(n): + tmp = len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + for i in range(n): + row = list(rows[i]) + + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +def tuples_to_object_array(ndarray[object] tuples): + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple tup + + n = len(tuples) + k = len(tuples[0]) + result = np.empty((n, k), dtype=object) + for i in range(n): + tup = tuples[i] + for j in range(k): + result[i, j] = tup[j] + + return result + + +def to_object_array_tuples(rows: object) -> np.ndarray: + """ + Convert a list of tuples into an object array. Any subclass of + tuple in `rows` will be casted to tuple. + + Parameters + ---------- + rows : 2-d array (N, K) + List of tuples to be converted into an array. + + Returns + ------- + np.ndarray[object, ndim=2] + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple row + + rows = list(rows) + n = len(rows) + + k = 0 + for i in range(n): + tmp = 1 if checknull(rows[i]) else len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + try: + for i in range(n): + row = rows[i] + for j in range(len(row)): + result[i, j] = row[j] + except TypeError: + # e.g. "Expected tuple, got list" + # upcast any subclasses to tuple + for i in range(n): + row = (rows[i],) if checknull(rows[i]) else tuple(rows[i]) + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray: + cdef: + Py_ssize_t i, n = len(keys) + object val + ndarray[object] output = np.empty(n, dtype='O') + + if n == 0: + # kludge, for Series + return np.empty(0, dtype='f8') + + for i in range(n): + val = keys[i] + if val in mapping: + output[i] = mapping[val] + else: + output[i] = default + + return maybe_convert_objects(output) + + +def is_bool_list(obj: list) -> bool: + """ + Check if this list contains only bool or np.bool_ objects. + + This is appreciably faster than checking `np.array(obj).dtype == bool` + + obj1 = [True, False] * 100 + obj2 = obj1 * 100 + obj3 = obj2 * 100 + obj4 = [True, None] + obj1 + + for obj in [obj1, obj2, obj3, obj4]: + %timeit is_bool_list(obj) + %timeit np.array(obj).dtype.kind == "b" + + 340 ns ± 8.22 ns + 8.78 µs ± 253 ns + + 28.8 µs ± 704 ns + 813 µs ± 17.8 µs + + 3.4 ms ± 168 µs + 78.4 ms ± 1.05 ms + + 48.1 ns ± 1.26 ns + 8.1 µs ± 198 ns + """ + cdef: + object item + + for item in obj: + if not util.is_bool_object(item): + return False + + # Note: we return True for empty list + return True + + +cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): + """ + Check for `arr == key`, treating all values as not-equal to pd.NA. + + key is assumed to have `not isna(key)` + """ + cdef: + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0) + Py_ssize_t i + object item + + for i in range(len(arr)): + item = arr[i] + if item is C_NA: + result[i] = False + else: + result[i] = item == key + + return result + + +def dtypes_all_equal(list types not None) -> bool: + """ + Faster version for: + + first = types[0] + all(is_dtype_equal(first, t) for t in types[1:]) + + And assuming all elements in the list are np.dtype/ExtensionDtype objects + + See timings at https://github.com/pandas-dev/pandas/pull/44594 + """ + first = types[0] + for t in types[1:]: + try: + if not t == first: + return False + except (TypeError, AttributeError): + return False + else: + return True diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd new file mode 100644 index 00000000..854dcf2e --- /dev/null +++ b/pandas/_libs/missing.pxd @@ -0,0 +1,19 @@ +from numpy cimport ( + ndarray, + uint8_t, +) + + +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) + +cpdef bint checknull(object val, bint inf_as_na=*) +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*) + +cdef bint is_null_datetime64(v) +cdef bint is_null_timedelta64(v) +cdef bint checknull_with_nat_and_na(object obj) + +cdef class C_NAType: + pass + +cdef C_NAType C_NA diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi new file mode 100644 index 00000000..27f22755 --- /dev/null +++ b/pandas/_libs/missing.pyi @@ -0,0 +1,18 @@ +import numpy as np +from numpy import typing as npt + +class NAType: + def __new__(cls, *args, **kwargs): ... + +NA: NAType + +def is_matching_na( + left: object, right: object, nan_matches_none: bool = ... +) -> bool: ... +def isposinf_scalar(val: object) -> bool: ... +def isneginf_scalar(val: object) -> bool: ... +def checknull(val: object, inf_as_na: bool = ...) -> bool: ... +def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... +def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx new file mode 100644 index 00000000..9b470e95 --- /dev/null +++ b/pandas/_libs/missing.pyx @@ -0,0 +1,507 @@ +from decimal import Decimal +import numbers +from sys import maxsize + +cimport cython +from cython cimport Py_ssize_t +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from pandas._libs cimport util +from pandas._libs.tslibs.nattype cimport ( + c_NaT as NaT, + checknull_with_nat, + is_dt64nat, + is_td64nat, +) +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_unit, + get_datetime64_value, + get_timedelta64_value, +) + +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + + int64_t NPY_NAT = util.get_nat() + + bint is_32bit = maxsize <= 2 ** 32 + + type cDecimal = Decimal # for faster isinstance checks + + +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False): + """ + Check if two scalars are both NA of matching types. + + Parameters + ---------- + left : Any + right : Any + nan_matches_none : bool, default False + For backwards compatibility, consider NaN as matching None. + + Returns + ------- + bool + """ + if left is None: + if nan_matches_none and util.is_nan(right): + return True + return right is None + elif left is C_NA: + return right is C_NA + elif left is NaT: + return right is NaT + elif util.is_float_object(left): + if nan_matches_none and right is None and util.is_nan(left): + return True + return ( + util.is_nan(left) + and util.is_float_object(right) + and util.is_nan(right) + ) + elif util.is_complex_object(left): + return ( + util.is_nan(left) + and util.is_complex_object(right) + and util.is_nan(right) + ) + elif util.is_datetime64_object(left): + return ( + get_datetime64_value(left) == NPY_NAT + and util.is_datetime64_object(right) + and get_datetime64_value(right) == NPY_NAT + and get_datetime64_unit(left) == get_datetime64_unit(right) + ) + elif util.is_timedelta64_object(left): + return ( + get_timedelta64_value(left) == NPY_NAT + and util.is_timedelta64_object(right) + and get_timedelta64_value(right) == NPY_NAT + and get_datetime64_unit(left) == get_datetime64_unit(right) + ) + elif is_decimal_na(left): + return is_decimal_na(right) + return False + + +cpdef bint checknull(object val, bint inf_as_na=False): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + val : object + inf_as_na : bool, default False + Whether to treat INF and -INF as NA values. + + Returns + ------- + bool + """ + if val is None or val is NaT or val is C_NA: + return True + elif util.is_float_object(val) or util.is_complex_object(val): + if val != val: + return True + elif inf_as_na: + return val == INF or val == NEGINF + return False + elif util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + elif util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + else: + return is_decimal_na(val) + + +cdef inline bint is_decimal_na(object val): + """ + Is this a decimal.Decimal object Decimal("NAN"). + """ + return isinstance(val, cDecimal) and val != val + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False): + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = checknull(val, inf_as_na=inf_as_na) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray: + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = (arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull(val, inf_as_na=inf_as_na): + result[i, j] = 1 + return result.view(np.bool_) + + +def isposinf_scalar(val: object) -> bool: + return util.is_float_object(val) and val == INF + + +def isneginf_scalar(val: object) -> bool: + return util.is_float_object(val) and val == NEGINF + + +cdef inline bint is_null_datetime64(v): + # determine if we have a null for a datetime (or integer versions), + # excluding np.timedelta64('nat') + if checknull_with_nat(v) or is_dt64nat(v): + return True + return False + + +cdef inline bint is_null_timedelta64(v): + # determine if we have a null for a timedelta (or integer versions), + # excluding np.datetime64('nat') + if checknull_with_nat(v) or is_td64nat(v): + return True + return False + + +cdef bint checknull_with_nat_and_na(object obj): + # See GH#32214 + return checknull_with_nat(obj) or obj is C_NA + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_float_nan(values: ndarray) -> ndarray: + """ + True for elements which correspond to a float nan + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if util.is_nan(val): + result[i] = True + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_numeric_na(values: ndarray) -> ndarray: + """ + Check for NA values consistent with IntegerArray/FloatingArray. + + Similar to a vectorized is_valid_na_for_dtype restricted to numeric dtypes. + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if checknull(val): + if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): + result[i] = True + else: + raise TypeError(f"'values' contains non-numeric NA {val}") + return result.view(bool) + + +# ----------------------------------------------------------------------------- +# Implementation of NA singleton + + +def _create_binary_propagating_op(name, is_divmod=False): + + def method(self, other): + if (other is C_NA or isinstance(other, str) + or isinstance(other, (numbers.Number, np.bool_)) + or util.is_array(other) and not other.shape): + # Need the other.shape clause to handle NumPy scalars, + # since we do a setitem on `out` below, which + # won't work for NumPy scalars. + if is_divmod: + return NA, NA + else: + return NA + + elif util.is_array(other): + out = np.empty(other.shape, dtype=object) + out[:] = NA + + if is_divmod: + return out, out.copy() + else: + return out + + return NotImplemented + + method.__name__ = name + return method + + +def _create_unary_propagating_op(name: str): + def method(self): + return NA + + method.__name__ = name + return method + + +cdef class C_NAType: + pass + + +class NAType(C_NAType): + """ + NA ("not available") missing value indicator. + + .. warning:: + + Experimental: the behaviour of NA can still change without warning. + + .. versionadded:: 1.0.0 + + The NA singleton is a missing value indicator defined by pandas. It is + used in certain new extension dtypes (currently the "string" dtype). + """ + + _instance = None + + def __new__(cls, *args, **kwargs): + if NAType._instance is None: + NAType._instance = C_NAType.__new__(cls, *args, **kwargs) + return NAType._instance + + def __repr__(self) -> str: + return "" + + def __format__(self, format_spec) -> str: + try: + return self.__repr__().__format__(format_spec) + except ValueError: + return self.__repr__() + + def __bool__(self): + raise TypeError("boolean value of NA is ambiguous") + + def __hash__(self): + # GH 30013: Ensure hash is large enough to avoid hash collisions with integers + exponent = 31 if is_32bit else 61 + return 2 ** exponent - 1 + + def __reduce__(self): + return "NA" + + # Binary arithmetic and comparison ops -> propagate + + __add__ = _create_binary_propagating_op("__add__") + __radd__ = _create_binary_propagating_op("__radd__") + __sub__ = _create_binary_propagating_op("__sub__") + __rsub__ = _create_binary_propagating_op("__rsub__") + __mul__ = _create_binary_propagating_op("__mul__") + __rmul__ = _create_binary_propagating_op("__rmul__") + __matmul__ = _create_binary_propagating_op("__matmul__") + __rmatmul__ = _create_binary_propagating_op("__rmatmul__") + __truediv__ = _create_binary_propagating_op("__truediv__") + __rtruediv__ = _create_binary_propagating_op("__rtruediv__") + __floordiv__ = _create_binary_propagating_op("__floordiv__") + __rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") + __mod__ = _create_binary_propagating_op("__mod__") + __rmod__ = _create_binary_propagating_op("__rmod__") + __divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True) + __rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True) + # __lshift__ and __rshift__ are not implemented + + __eq__ = _create_binary_propagating_op("__eq__") + __ne__ = _create_binary_propagating_op("__ne__") + __le__ = _create_binary_propagating_op("__le__") + __lt__ = _create_binary_propagating_op("__lt__") + __gt__ = _create_binary_propagating_op("__gt__") + __ge__ = _create_binary_propagating_op("__ge__") + + # Unary ops + + __neg__ = _create_unary_propagating_op("__neg__") + __pos__ = _create_unary_propagating_op("__pos__") + __abs__ = _create_unary_propagating_op("__abs__") + __invert__ = _create_unary_propagating_op("__invert__") + + # pow has special + def __pow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 0: + # returning positive is correct for +/- 0. + return type(other)(1) + else: + return NA + elif util.is_array(other): + return np.where(other == 0, other.dtype.type(1), NA) + + return NotImplemented + + def __rpow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 1: + return other + else: + return NA + elif util.is_array(other): + return np.where(other == 1, other, NA) + return NotImplemented + + # Logical ops using Kleene logic + + def __and__(self, other): + if other is False: + return False + elif other is True or other is C_NA: + return NA + return NotImplemented + + __rand__ = __and__ + + def __or__(self, other): + if other is True: + return True + elif other is False or other is C_NA: + return NA + return NotImplemented + + __ror__ = __or__ + + def __xor__(self, other): + if other is False or other is True or other is C_NA: + return NA + return NotImplemented + + __rxor__ = __xor__ + + __array_priority__ = 1000 + _HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + types = self._HANDLED_TYPES + (NAType,) + for x in inputs: + if not isinstance(x, types): + return NotImplemented + + if method != "__call__": + raise ValueError(f"ufunc method '{method}' not supported for NA") + result = maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is NotImplemented: + # For a NumPy ufunc that's not a binop, like np.logaddexp + index = [i for i, x in enumerate(inputs) if x is NA][0] + result = np.broadcast_arrays(*inputs)[index] + if result.ndim == 0: + result = result.item() + if ufunc.nout > 1: + result = (NA,) * ufunc.nout + + return result + + +C_NA = NAType() # C-visible +NA = C_NA # Python-visible diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi new file mode 100644 index 00000000..74a6ad87 --- /dev/null +++ b/pandas/_libs/ops.pyi @@ -0,0 +1,50 @@ +from typing import ( + Any, + Callable, + Iterable, + Literal, + overload, +) + +import numpy as np + +from pandas._typing import npt + +_BinOp = Callable[[Any, Any], Any] +_BoolOp = Callable[[Any, Any], bool] + +def scalar_compare( + values: np.ndarray, # object[:] + val: object, + op: _BoolOp, # {operator.eq, operator.ne, ...} +) -> npt.NDArray[np.bool_]: ... +def vec_compare( + left: npt.NDArray[np.object_], + right: npt.NDArray[np.object_], + op: _BoolOp, # {operator.eq, operator.ne, ...} +) -> npt.NDArray[np.bool_]: ... +def scalar_binop( + values: np.ndarray, # object[:] + val: object, + op: _BinOp, # binary operator +) -> np.ndarray: ... +def vec_binop( + left: np.ndarray, # object[:] + right: np.ndarray, # object[:] + op: _BinOp, # binary operator +) -> np.ndarray: ... +@overload +def maybe_convert_bool( + arr: npt.NDArray[np.object_], + true_values: Iterable = ..., + false_values: Iterable = ..., + convert_to_masked_nullable: Literal[False] = ..., +) -> tuple[np.ndarray, None]: ... +@overload +def maybe_convert_bool( + arr: npt.NDArray[np.object_], + true_values: Iterable = ..., + false_values: Iterable = ..., + *, + convert_to_masked_nullable: Literal[True], +) -> tuple[np.ndarray, np.ndarray]: ... diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx new file mode 100644 index 00000000..308756e3 --- /dev/null +++ b/pandas/_libs/ops.pyx @@ -0,0 +1,310 @@ +import operator + +cimport cython +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompareBool, +) +from cython cimport Py_ssize_t + +import numpy as np + +from numpy cimport ( + import_array, + ndarray, + uint8_t, +) + +import_array() + + +from pandas._libs.missing cimport checknull +from pandas._libs.util cimport is_nan + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_compare(object[:] values, object val, object op) -> ndarray: + """ + Compare each element of `values` array with the scalar `val`, with + the comparison operation described by `op`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(values) + ndarray[uint8_t, cast=True] result + bint isnull_val + int flag + object x + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + isnull_val = checknull(val) + + if flag == Py_NE: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = True + elif isnull_val: + result[i] = True + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = True + elif flag == Py_EQ: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = False + + else: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, val, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray: + """ + Compare the elements of `left` with the elements of `right` pointwise, + with the comparison operation described by `op`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(left) + ndarray[uint8_t, cast=True] result + int flag + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + + if flag == Py_NE: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = True + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + else: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_binop(object[:] values, object val, object op) -> ndarray: + """ + Apply the given binary operator `op` between each element of the array + `values` and the scalar `val`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object[::1] result + object x + + result = np.empty(n, dtype=object) + if val is None or is_nan(val): + result[:] = val + return result.base # `.base` to access underlying np.ndarray + + for i in range(n): + x = values[i] + if x is None or is_nan(x): + result[i] = x + else: + result[i] = op(x, val) + + return maybe_convert_bool(result.base)[0] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_binop(object[:] left, object[:] right, object op) -> ndarray: + """ + Apply the given binary operator `op` pointwise to the elements of + arrays `left` and `right`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(left) + object[::1] result + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + result = np.empty(n, dtype=object) + + for i in range(n): + x = left[i] + y = right[i] + try: + result[i] = op(x, y) + except TypeError: + if x is None or is_nan(x): + result[i] = x + elif y is None or is_nan(y): + result[i] = y + else: + raise + + return maybe_convert_bool(result.base)[0] # `.base` to access np.ndarray + + +def maybe_convert_bool(ndarray[object] arr, + true_values=None, + false_values=None, + convert_to_masked_nullable=False + ) -> tuple[np.ndarray, np.ndarray | None]: + cdef: + Py_ssize_t i, n + ndarray[uint8_t] result + ndarray[uint8_t] mask + object val + set true_vals, false_vals + bint has_na = False + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + mask = np.zeros(n, dtype=np.uint8) + # the defaults + true_vals = {'True', 'TRUE', 'true'} + false_vals = {'False', 'FALSE', 'false'} + + if true_values is not None: + true_vals = true_vals | set(true_values) + + if false_values is not None: + false_vals = false_vals | set(false_values) + + for i in range(n): + val = arr[i] + + if isinstance(val, bool): + if val is True: + result[i] = 1 + else: + result[i] = 0 + elif val in true_vals: + result[i] = 1 + elif val in false_vals: + result[i] = 0 + elif is_nan(val): + mask[i] = 1 + result[i] = 0 # Value here doesn't matter, will be replaced w/ nan + has_na = True + else: + return (arr, None) + + if has_na: + if convert_to_masked_nullable: + return (result.view(np.bool_), mask.view(np.bool_)) + else: + arr = result.view(np.bool_).astype(object) + np.putmask(arr, mask, np.nan) + return (arr, None) + else: + return (result.view(np.bool_), None) diff --git a/pandas/_libs/ops_dispatch.pyi b/pandas/_libs/ops_dispatch.pyi new file mode 100644 index 00000000..91b5a4db --- /dev/null +++ b/pandas/_libs/ops_dispatch.pyi @@ -0,0 +1,5 @@ +import numpy as np + +def maybe_dispatch_ufunc_to_dunder_op( + self, ufunc: np.ufunc, method: str, *inputs, **kwargs +): ... diff --git a/pandas/_libs/ops_dispatch.pyx b/pandas/_libs/ops_dispatch.pyx new file mode 100644 index 00000000..2b2a411e --- /dev/null +++ b/pandas/_libs/ops_dispatch.pyx @@ -0,0 +1,121 @@ +DISPATCHED_UFUNCS = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "eq", + "ne", + "lt", + "gt", + "le", + "ge", + "remainder", + "matmul", + "or", + "xor", + "and", + "neg", + "pos", + "abs", +} +UNARY_UFUNCS = { + "neg", + "pos", + "abs", +} +UFUNC_ALIASES = { + "subtract": "sub", + "multiply": "mul", + "floor_divide": "floordiv", + "true_divide": "truediv", + "power": "pow", + "remainder": "mod", + "divide": "truediv", + "equal": "eq", + "not_equal": "ne", + "less": "lt", + "less_equal": "le", + "greater": "gt", + "greater_equal": "ge", + "bitwise_or": "or", + "bitwise_and": "and", + "bitwise_xor": "xor", + "negative": "neg", + "absolute": "abs", + "positive": "pos", +} + +# For op(., Array) -> Array.__r{op}__ +REVERSED_NAMES = { + "lt": "__gt__", + "le": "__ge__", + "gt": "__lt__", + "ge": "__le__", + "eq": "__eq__", + "ne": "__ne__", +} + + +def maybe_dispatch_ufunc_to_dunder_op( + object self, object ufunc, str method, *inputs, **kwargs +): + """ + Dispatch a ufunc to the equivalent dunder method. + + Parameters + ---------- + self : ArrayLike + The array whose dunder method we dispatch to + ufunc : Callable + A NumPy ufunc + method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} + inputs : ArrayLike + The input arrays. + kwargs : Any + The additional keyword arguments, e.g. ``out``. + + Returns + ------- + result : Any + The result of applying the ufunc + """ + # special has the ufuncs we dispatch to the dunder op on + + op_name = ufunc.__name__ + op_name = UFUNC_ALIASES.get(op_name, op_name) + + def not_implemented(*args, **kwargs): + return NotImplemented + + if kwargs or ufunc.nin > 2: + return NotImplemented + + if method == "__call__" and op_name in DISPATCHED_UFUNCS: + + if inputs[0] is self: + name = f"__{op_name}__" + meth = getattr(self, name, not_implemented) + + if op_name in UNARY_UFUNCS: + assert len(inputs) == 1 + return meth() + + return meth(inputs[1]) + + elif inputs[1] is self: + name = REVERSED_NAMES.get(op_name, f"__r{op_name}__") + + meth = getattr(self, name, not_implemented) + result = meth(inputs[0]) + return result + + else: + # should not be reached, but covering our bases + return NotImplemented + + else: + return NotImplemented diff --git a/pandas/_libs/parsers.pyi b/pandas/_libs/parsers.pyi new file mode 100644 index 00000000..6b0bbf18 --- /dev/null +++ b/pandas/_libs/parsers.pyi @@ -0,0 +1,70 @@ +from typing import ( + Hashable, + Literal, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + Dtype, + npt, +) + +STR_NA_VALUES: set[str] + +def sanitize_objects( + values: npt.NDArray[np.object_], + na_values: set, +) -> int: ... + +class TextReader: + unnamed_cols: set[str] + table_width: int # int64_t + leading_cols: int # int64_t + header: list[list[int]] # non-negative integers + def __init__( + self, + source, + delimiter: bytes | str = ..., # single-character only + header=..., + header_start: int = ..., # int64_t + header_end: int = ..., # uint64_t + index_col=..., + names=..., + tokenize_chunksize: int = ..., # int64_t + delim_whitespace: bool = ..., + converters=..., + skipinitialspace: bool = ..., + escapechar: bytes | str | None = ..., # single-character only + doublequote: bool = ..., + quotechar: str | bytes | None = ..., # at most 1 character + quoting: int = ..., + lineterminator: bytes | str | None = ..., # at most 1 character + comment=..., + decimal: bytes | str = ..., # single-character only + thousands: bytes | str | None = ..., # single-character only + dtype: Dtype | dict[Hashable, Dtype] = ..., + usecols=..., + error_bad_lines: bool = ..., + warn_bad_lines: bool = ..., + na_filter: bool = ..., + na_values=..., + na_fvalues=..., + keep_default_na: bool = ..., + true_values=..., + false_values=..., + allow_leading_cols: bool = ..., + skiprows=..., + skipfooter: int = ..., # int64_t + verbose: bool = ..., + mangle_dupe_cols: bool = ..., + float_precision: Literal["round_trip", "legacy", "high"] | None = ..., + skip_blank_lines: bool = ..., + encoding_errors: bytes | str = ..., + ): ... + def set_noconvert(self, i: int) -> None: ... + def remove_noconvert(self, i: int) -> None: ... + def close(self) -> None: ... + def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ... + def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ... diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx new file mode 100644 index 00000000..c391a82d --- /dev/null +++ b/pandas/_libs/parsers.pyx @@ -0,0 +1,2070 @@ +# Copyright (c) 2012, Lambda Foundry, Inc. +# See LICENSE for the license +from base64 import decode +from collections import defaultdict +from csv import ( + QUOTE_MINIMAL, + QUOTE_NONE, + QUOTE_NONNUMERIC, +) +from errno import ENOENT +import inspect +import sys +import time +import warnings + +from pandas.util._exceptions import find_stack_level + +cimport cython +from cpython.bytes cimport ( + PyBytes_AsString, + PyBytes_FromString, +) +from cpython.exc cimport ( + PyErr_Fetch, + PyErr_Occurred, +) +from cpython.object cimport PyObject +from cpython.ref cimport ( + Py_INCREF, + Py_XDECREF, +) +from cpython.unicode cimport ( + PyUnicode_AsUTF8String, + PyUnicode_Decode, + PyUnicode_DecodeUTF8, +) +from cython cimport Py_ssize_t +from libc.stdlib cimport free +from libc.string cimport ( + strcasecmp, + strlen, + strncpy, +) + + +cdef extern from "Python.h": + object PyUnicode_FromString(char *v) + + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, + uint64_t, +) + +cnp.import_array() + +from pandas._libs cimport util +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, +) + +import pandas._libs.lib as lib + +from pandas._libs.khash cimport ( + kh_destroy_float64, + kh_destroy_str, + kh_destroy_str_starts, + kh_destroy_strbox, + kh_exist_str, + kh_float64_t, + kh_get_float64, + kh_get_str, + kh_get_str_starts_item, + kh_get_strbox, + kh_init_float64, + kh_init_str, + kh_init_str_starts, + kh_init_strbox, + kh_put_float64, + kh_put_str, + kh_put_str_starts_item, + kh_put_strbox, + kh_resize_float64, + kh_resize_str_starts, + kh_str_starts_t, + kh_str_t, + kh_strbox_t, + khiter_t, +) + +from pandas.errors import ( + EmptyDataError, + ParserError, + ParserWarning, +) + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.inference import is_dict_like + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + int64_t DEFAULT_CHUNKSIZE = 256 * 1024 + + +cdef extern from "headers/portable.h": + # I *think* this is here so that strcasecmp is defined on Windows + # so we don't get + # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` + # in Appveyor. + # In a sane world, the `from libc.string cimport` above would fail + # loudly. + pass + + +cdef extern from "parser/tokenizer.h": + + ctypedef enum ParserState: + START_RECORD + START_FIELD + ESCAPED_CHAR + IN_FIELD + IN_QUOTED_FIELD + ESCAPE_IN_QUOTED_FIELD + QUOTE_IN_QUOTED_FIELD + EAT_CRNL + EAT_CRNL_NOP + EAT_WHITESPACE + EAT_COMMENT + EAT_LINE_COMMENT + WHITESPACE_LINE + SKIP_LINE + FINISHED + + enum: ERROR_OVERFLOW + + ctypedef enum BadLineHandleMethod: + ERROR, + WARN, + SKIP + + ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors) + ctypedef int (*io_cleanup)(void *src) + + ctypedef struct parser_t: + void *source + io_callback cb_io + io_cleanup cb_cleanup + + int64_t chunksize # Number of bytes to prepare for each chunk + char *data # pointer to data to be processed + int64_t datalen # amount of data available + int64_t datapos + + # where to write out tokenized data + char *stream + uint64_t stream_len + uint64_t stream_cap + + # Store words in (potentially ragged) matrix for now, hmm + char **words + int64_t *word_starts # where we are in the stream + uint64_t words_len + uint64_t words_cap + uint64_t max_words_cap # maximum word cap encountered + + char *pword_start # pointer to stream start of current field + int64_t word_start # position start of current field + + int64_t *line_start # position in words for start of line + int64_t *line_fields # Number of fields in each line + uint64_t lines # Number of lines observed + uint64_t file_lines # Number of lines observed (with bad/skipped) + uint64_t lines_cap # Vector capacity + + # Tokenizing stuff + ParserState state + int doublequote # is " represented by ""? */ + char delimiter # field separator */ + int delim_whitespace # consume tabs / spaces instead + char quotechar # quote character */ + char escapechar # escape character */ + char lineterminator + int skipinitialspace # ignore spaces following delimiter? */ + int quoting # style of quoting to write */ + + char commentchar + int allow_embedded_newline + + int usecols + + Py_ssize_t expected_fields + BadLineHandleMethod on_bad_lines + + # floating point options + char decimal + char sci + + # thousands separator (comma, period) + char thousands + + int header # Boolean: 1: has header, 0: no header + int64_t header_start # header row start + uint64_t header_end # header row end + + void *skipset + PyObject *skipfunc + int64_t skip_first_N_rows + int64_t skipfooter + # pick one, depending on whether the converter requires GIL + float64_t (*double_converter)(const char *, char **, + char, char, char, + int, int *, int *) nogil + + # error handling + char *warn_msg + char *error_msg + + int64_t skip_empty_lines + + ctypedef struct coliter_t: + char **words + int64_t *line_start + int64_t col + + ctypedef struct uint_state: + int seen_sint + int seen_uint + int seen_null + + void uint_state_init(uint_state *self) + int uint64_conflict(uint_state *self) + + void coliter_setup(coliter_t *it, parser_t *parser, + int64_t i, int64_t start) nogil + void COLITER_NEXT(coliter_t, const char *) nogil + + parser_t* parser_new() + + int parser_init(parser_t *self) nogil + void parser_free(parser_t *self) nogil + void parser_del(parser_t *self) nogil + int parser_add_skiprow(parser_t *self, int64_t row) + + int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) + + void parser_set_default_options(parser_t *self) + + int parser_consume_rows(parser_t *self, size_t nrows) + + int parser_trim_buffers(parser_t *self) + + int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil + int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil + + int64_t str_to_int64(char *p_item, int64_t int_min, + int64_t int_max, int *error, char tsep) nogil + uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) nogil + + float64_t xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t round_trip(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + + int to_boolean(const char *item, uint8_t *val) nogil + + +cdef extern from "parser/io.h": + void *new_rd_source(object obj) except NULL + + int del_rd_source(void *src) + + void* buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status, const char *encoding_errors) + + +cdef class TextReader: + """ + + # source: StringIO or file object + + ..versionchange:: 1.2.0 + removed 'compression', 'memory_map', and 'encoding' argument. + These arguments are outsourced to CParserWrapper. + 'source' has to be a file handle. + """ + + cdef: + parser_t *parser + object na_fvalues + object true_values, false_values + object handle + object orig_header + bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns + bint mangle_dupe_cols, allow_leading_cols + uint64_t parser_start # this is modified after __init__ + list clocks + const char *encoding_errors + kh_str_starts_t *false_set + kh_str_starts_t *true_set + int64_t buffer_lines, skipfooter + list dtype_cast_order # list[np.dtype] + list names # can be None + set noconvert # set[int] + + cdef public: + int64_t leading_cols, table_width + object delimiter # bytes or str + object converters + object na_values + list header # list[list[non-negative integers]] + object index_col + object skiprows + object dtype + object usecols + set unnamed_cols # set[str] + + def __cinit__(self, source, + delimiter=b',', # bytes | str + header=0, + int64_t header_start=0, + uint64_t header_end=0, + index_col=None, + names=None, + tokenize_chunksize=DEFAULT_CHUNKSIZE, + bint delim_whitespace=False, + converters=None, + bint skipinitialspace=False, + escapechar=None, # bytes | str + bint doublequote=True, + quotechar=b'"', + quoting=0, # int + lineterminator=None, # bytes | str + comment=None, + decimal=b'.', # bytes | str + thousands=None, # bytes | str + dtype=None, + usecols=None, + on_bad_lines=ERROR, + bint na_filter=True, + na_values=None, + na_fvalues=None, + bint keep_default_na=True, + true_values=None, + false_values=None, + bint allow_leading_cols=True, + skiprows=None, + skipfooter=0, # int64_t + bint verbose=False, + bint mangle_dupe_cols=True, + float_precision=None, + bint skip_blank_lines=True, + encoding_errors=b"strict"): + + # set encoding for native Python and C library + if isinstance(encoding_errors, str): + encoding_errors = encoding_errors.encode("utf-8") + elif encoding_errors is None: + encoding_errors = b"strict" + Py_INCREF(encoding_errors) + self.encoding_errors = PyBytes_AsString(encoding_errors) + + self.parser = parser_new() + self.parser.chunksize = tokenize_chunksize + + self.mangle_dupe_cols = mangle_dupe_cols + + # For timekeeping + self.clocks = [] + + self.parser.usecols = (usecols is not None) + + self._setup_parser_source(source) + parser_set_default_options(self.parser) + + parser_init(self.parser) + + if delim_whitespace: + self.parser.delim_whitespace = delim_whitespace + else: + if len(delimiter) > 1: + raise ValueError('only length-1 separators excluded right now') + self.parser.delimiter = ord(delimiter) + + # ---------------------------------------- + # parser options + + self.parser.doublequote = doublequote + self.parser.skipinitialspace = skipinitialspace + self.parser.skip_empty_lines = skip_blank_lines + + if lineterminator is not None: + if len(lineterminator) != 1: + raise ValueError('Only length-1 line terminators supported') + self.parser.lineterminator = ord(lineterminator) + + if len(decimal) != 1: + raise ValueError('Only length-1 decimal markers supported') + self.parser.decimal = ord(decimal) + + if thousands is not None: + if len(thousands) != 1: + raise ValueError('Only length-1 thousands markers supported') + self.parser.thousands = ord(thousands) + + if escapechar is not None: + if len(escapechar) != 1: + raise ValueError('Only length-1 escapes supported') + self.parser.escapechar = ord(escapechar) + + self._set_quoting(quotechar, quoting) + + dtype_order = ['int64', 'float64', 'bool', 'object'] + if quoting == QUOTE_NONNUMERIC: + # consistent with csv module semantics, cast all to float + dtype_order = dtype_order[1:] + self.dtype_cast_order = [np.dtype(x) for x in dtype_order] + + if comment is not None: + if len(comment) > 1: + raise ValueError('Only length-1 comment characters supported') + self.parser.commentchar = ord(comment) + + self.parser.on_bad_lines = on_bad_lines + + self.skiprows = skiprows + if skiprows is not None: + self._make_skiprow_set() + + self.skipfooter = skipfooter + + # suboptimal + if usecols is not None: + self.has_usecols = 1 + # GH-20558, validate usecols at higher level and only pass clean + # usecols into TextReader. + self.usecols = usecols + + # TODO: XXX? + if skipfooter > 0: + self.parser.on_bad_lines = SKIP + + self.delimiter = delimiter + + self.na_values = na_values + if na_fvalues is None: + na_fvalues = set() + self.na_fvalues = na_fvalues + + self.true_values = _maybe_encode(true_values) + _true_values + self.false_values = _maybe_encode(false_values) + _false_values + + self.true_set = kset_from_list(self.true_values) + self.false_set = kset_from_list(self.false_values) + + self.keep_default_na = keep_default_na + self.converters = converters + self.na_filter = na_filter + + self.verbose = verbose + + if float_precision == "round_trip": + # see gh-15140 + self.parser.double_converter = round_trip + elif float_precision == "legacy": + self.parser.double_converter = xstrtod + elif float_precision == "high" or float_precision is None: + self.parser.double_converter = precise_xstrtod + else: + raise ValueError(f'Unrecognized float_precision option: ' + f'{float_precision}') + + # Caller is responsible for ensuring we have one of + # - None + # - DtypeObj + # - dict[Any, DtypeObj] + self.dtype = dtype + + # XXX + self.noconvert = set() + + self.index_col = index_col + + # ---------------------------------------- + # header stuff + + self.allow_leading_cols = allow_leading_cols + self.leading_cols = 0 # updated in _get_header + + # TODO: no header vs. header is not the first row + self.has_mi_columns = 0 + self.orig_header = header + if header is None: + # sentinel value + self.parser.header_start = -1 + self.parser.header_end = -1 + self.parser.header = -1 + self.parser_start = 0 + prelim_header = [] + else: + if isinstance(header, list): + if len(header) > 1: + # need to artificially skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + + self.parser_start = header[-1] + 1 + self.parser.header_start = header[0] + self.parser.header = header[0] + prelim_header = header + else: + self.parser.header_start = header + self.parser.header_end = header + self.parser_start = header + 1 + self.parser.header = header + prelim_header = [header] + + self.names = names + header, table_width, unnamed_cols = self._get_header(prelim_header) + # header, table_width, and unnamed_cols are set here, never changed + self.header = header + self.table_width = table_width + self.unnamed_cols = unnamed_cols + + if not self.table_width: + raise EmptyDataError("No columns to parse from file") + + # Compute buffer_lines as function of table width. + heuristic = 2**20 // self.table_width + self.buffer_lines = 1 + while self.buffer_lines * 2 < heuristic: + self.buffer_lines *= 2 + + def __init__(self, *args, **kwargs): + pass + + def __dealloc__(self): + _close(self) + parser_del(self.parser) + + def close(self): + _close(self) + + def _set_quoting(self, quote_char: str | bytes | None, quoting: int): + if not isinstance(quoting, int): + raise TypeError('"quoting" must be an integer') + + if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE: + raise TypeError('bad "quoting" value') + + if not isinstance(quote_char, (str, bytes)) and quote_char is not None: + dtype = type(quote_char).__name__ + raise TypeError(f'"quotechar" must be string, not {dtype}') + + if quote_char is None or quote_char == '': + if quoting != QUOTE_NONE: + raise TypeError("quotechar must be set if quoting enabled") + self.parser.quoting = quoting + self.parser.quotechar = -1 + elif len(quote_char) > 1: # 0-len case handled earlier + raise TypeError('"quotechar" must be a 1-character string') + else: + self.parser.quoting = quoting + self.parser.quotechar = ord(quote_char) + + cdef _make_skiprow_set(self): + if util.is_integer_object(self.skiprows): + parser_set_skipfirstnrows(self.parser, self.skiprows) + elif not callable(self.skiprows): + for i in self.skiprows: + parser_add_skiprow(self.parser, i) + else: + self.parser.skipfunc = self.skiprows + + cdef _setup_parser_source(self, source): + cdef: + void *ptr + + ptr = new_rd_source(source) + self.parser.source = ptr + self.parser.cb_io = &buffer_rd_bytes + self.parser.cb_cleanup = &del_rd_source + + cdef _get_header(self, list prelim_header): + # header is now a list of lists, so field_count should use header[0] + # + # modifies: + # self.parser attributes + # self.parser_start + # self.leading_cols + + cdef: + Py_ssize_t i, start, field_count, passed_count, unnamed_count, level + char *word + str name, old_name + uint64_t hr, data_line = 0 + list header = [] + set unnamed_cols = set() + + if self.parser.header_start >= 0: + + # Header is in the file + for level, hr in enumerate(prelim_header): + + this_header = [] + + if self.parser.lines < hr + 1: + self._tokenize_rows(hr + 2) + + if self.parser.lines == 0: + field_count = 0 + start = self.parser.line_start[0] + + # e.g., if header=3 and file only has 2 lines + elif (self.parser.lines < hr + 1 + and not isinstance(self.orig_header, list)) or ( + self.parser.lines < hr): + msg = self.orig_header + if isinstance(msg, list): + joined = ','.join(str(m) for m in msg) + msg = f"[{joined}], len of {len(msg)}," + raise ParserError( + f'Passed header={msg} but only ' + f'{self.parser.lines} lines in file') + + else: + field_count = self.parser.line_fields[hr] + start = self.parser.line_start[hr] + + unnamed_count = 0 + unnamed_col_indices = [] + + for i in range(field_count): + word = self.parser.words[start + i] + + name = PyUnicode_DecodeUTF8(word, strlen(word), + self.encoding_errors) + + if name == '': + if self.has_mi_columns: + name = f'Unnamed: {i}_level_{level}' + else: + name = f'Unnamed: {i}' + + unnamed_count += 1 + unnamed_col_indices.append(i) + + this_header.append(name) + + if not self.has_mi_columns and self.mangle_dupe_cols: + # Ensure that regular columns are used before unnamed ones + # to keep given names and mangle unnamed columns + col_loop_order = [i for i in range(len(this_header)) + if i not in unnamed_col_indices + ] + unnamed_col_indices + counts = {} + + for i in col_loop_order: + col = this_header[i] + old_col = col + cur_count = counts.get(col, 0) + + if cur_count > 0: + while cur_count > 0: + counts[old_col] = cur_count + 1 + col = f'{old_col}.{cur_count}' + if col in this_header: + cur_count += 1 + else: + cur_count = counts.get(col, 0) + + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_col) is not None + and self.dtype.get(col) is None + ): + self.dtype.update({col: self.dtype.get(old_col)}) + + this_header[i] = col + counts[col] = cur_count + 1 + + if self.has_mi_columns: + + # If we have grabbed an extra line, but it's not in our + # format, save in the buffer, and create an blank extra + # line for the rest of the parsing code. + if hr == prelim_header[-1]: + lc = len(this_header) + ic = (len(self.index_col) if self.index_col + is not None else 0) + + # if wrong number of blanks or no index, not our format + if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0: + hr -= 1 + self.parser_start -= 1 + this_header = [None] * lc + + data_line = hr + 1 + header.append(this_header) + unnamed_cols.update({this_header[i] for i in unnamed_col_indices}) + + if self.names is not None: + header = [self.names] + + elif self.names is not None: + # Names passed + if self.parser.lines < 1: + self._tokenize_rows(1) + + header = [self.names] + + if self.parser.lines < 1: + field_count = len(header[0]) + else: + field_count = self.parser.line_fields[data_line] + + # Enforce this unless usecols + if not self.has_usecols: + self.parser.expected_fields = max(field_count, len(self.names)) + else: + # No header passed nor to be found in the file + if self.parser.lines < 1: + self._tokenize_rows(1) + + return None, self.parser.line_fields[0], unnamed_cols + + # Corner case, not enough lines in the file + if self.parser.lines < data_line + 1: + field_count = len(header[0]) + else: # not self.has_usecols: + + field_count = self.parser.line_fields[data_line] + + # #2981 + if self.names is not None: + field_count = max(field_count, len(self.names)) + + passed_count = len(header[0]) + + if (self.has_usecols and self.allow_leading_cols and + not callable(self.usecols)): + nuse = len(self.usecols) + if nuse == passed_count: + self.leading_cols = 0 + elif self.names is None and nuse < passed_count: + self.leading_cols = field_count - passed_count + elif passed_count != field_count: + raise ValueError('Number of passed names did not match number of ' + 'header fields in the file') + # oh boy, #2442, #2981 + elif self.allow_leading_cols and passed_count < field_count: + self.leading_cols = field_count - passed_count + + return header, field_count, unnamed_cols + + def read(self, rows: int | None = None) -> dict[int, "ArrayLike"]: + """ + rows=None --> read all rows + """ + # Don't care about memory usage + columns = self._read_rows(rows, 1) + + return columns + + def read_low_memory(self, rows: int | None)-> list[dict[int, "ArrayLike"]]: + """ + rows=None --> read all rows + """ + # Conserve intermediate space + # Caller is responsible for concatenating chunks, + # see c_parser_wrapper._concatenate_chunks + cdef: + size_t rows_read = 0 + list chunks = [] + + if rows is None: + while True: + try: + chunk = self._read_rows(self.buffer_lines, 0) + if len(chunk) == 0: + break + except StopIteration: + break + else: + chunks.append(chunk) + else: + while rows_read < rows: + try: + crows = min(self.buffer_lines, rows - rows_read) + + chunk = self._read_rows(crows, 0) + if len(chunk) == 0: + break + + rows_read += len(list(chunk.values())[0]) + except StopIteration: + break + else: + chunks.append(chunk) + + parser_trim_buffers(self.parser) + + if len(chunks) == 0: + raise StopIteration + + return chunks + + cdef _tokenize_rows(self, size_t nrows): + cdef: + int status + + with nogil: + status = tokenize_nrows(self.parser, nrows, self.encoding_errors) + + if self.parser.warn_msg != NULL: + print(PyUnicode_DecodeUTF8( + self.parser.warn_msg, strlen(self.parser.warn_msg), + self.encoding_errors), file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + + # -> dict[int, "ArrayLike"] + cdef _read_rows(self, rows, bint trim): + cdef: + int64_t buffered_lines + int64_t irows + + self._start_clock() + + if rows is not None: + irows = rows + buffered_lines = self.parser.lines - self.parser_start + if buffered_lines < irows: + self._tokenize_rows(irows - buffered_lines) + + if self.skipfooter > 0: + raise ValueError('skipfooter can only be used to read ' + 'the whole file') + else: + with nogil: + status = tokenize_all_rows(self.parser, self.encoding_errors) + + if self.parser.warn_msg != NULL: + print(PyUnicode_DecodeUTF8( + self.parser.warn_msg, strlen(self.parser.warn_msg), + self.encoding_errors), file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + + if self.parser_start >= self.parser.lines: + raise StopIteration + self._end_clock('Tokenization') + + self._start_clock() + columns = self._convert_column_data(rows) + self._end_clock('Type conversion') + self._start_clock() + if len(columns) > 0: + rows_read = len(list(columns.values())[0]) + # trim + parser_consume_rows(self.parser, rows_read) + if trim: + parser_trim_buffers(self.parser) + self.parser_start -= rows_read + + self._end_clock('Parser memory cleanup') + + return columns + + cdef _start_clock(self): + self.clocks.append(time.time()) + + cdef _end_clock(self, str what): + if self.verbose: + elapsed = time.time() - self.clocks.pop(-1) + print(f'{what} took: {elapsed * 1000:.2f} ms') + + def set_noconvert(self, i: int) -> None: + self.noconvert.add(i) + + def remove_noconvert(self, i: int) -> None: + self.noconvert.remove(i) + + def _convert_column_data(self, rows: int | None) -> dict[int, "ArrayLike"]: + cdef: + int64_t i + int nused + kh_str_starts_t *na_hashset = NULL + int64_t start, end + object name, na_flist, col_dtype = None + bint na_filter = 0 + int64_t num_cols + dict result + + start = self.parser_start + + if rows is None: + end = self.parser.lines + else: + end = min(start + rows, self.parser.lines) + + num_cols = -1 + # Py_ssize_t cast prevents build warning + for i in range(self.parser.lines): + num_cols = (num_cols < self.parser.line_fields[i]) * \ + self.parser.line_fields[i] + \ + (num_cols >= self.parser.line_fields[i]) * num_cols + + usecols_not_callable_and_exists = not callable(self.usecols) and self.usecols + names_larger_num_cols = (self.names and + len(self.names) - self.leading_cols > num_cols) + + if self.table_width - self.leading_cols > num_cols: + if (usecols_not_callable_and_exists + and self.table_width - self.leading_cols < len(self.usecols) + or names_larger_num_cols): + raise ParserError(f"Too many columns specified: expected " + f"{self.table_width - self.leading_cols} " + f"and found {num_cols}") + + if (usecols_not_callable_and_exists and + all(isinstance(u, int) for u in self.usecols)): + missing_usecols = [col for col in self.usecols if col >= num_cols] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + results = {} + nused = 0 + is_default_dict_dtype = isinstance(self.dtype, defaultdict) + + for i in range(self.table_width): + if i < self.leading_cols: + # Pass through leading columns always + name = i + elif (self.usecols and not callable(self.usecols) and + nused == len(self.usecols)): + # Once we've gathered all requested columns, stop. GH5766 + break + else: + name = self._get_column_name(i, nused) + usecols = set() + if callable(self.usecols): + if self.usecols(name): + usecols = {i} + else: + usecols = self.usecols + if self.has_usecols and not (i in usecols or + name in usecols): + continue + nused += 1 + + conv = self._get_converter(i, name) + + col_dtype = None + if self.dtype is not None: + if isinstance(self.dtype, dict): + if name in self.dtype: + col_dtype = self.dtype[name] + elif i in self.dtype: + col_dtype = self.dtype[i] + elif is_default_dict_dtype: + col_dtype = self.dtype[name] + else: + if self.dtype.names: + # structured array + col_dtype = np.dtype(self.dtype.descr[i][1]) + else: + col_dtype = self.dtype + + if conv: + if col_dtype is not None: + warnings.warn((f"Both a converter and dtype were specified " + f"for column {name} - only the converter will " + f"be used."), ParserWarning, + stacklevel=find_stack_level()) + results[i] = _apply_converter(conv, self.parser, i, start, end) + continue + + # Collect the list of NaN values associated with the column. + # If we aren't supposed to do that, or none are collected, + # we set `na_filter` to `0` (`1` otherwise). + na_flist = set() + + if self.na_filter: + na_list, na_flist = self._get_na_list(i, name) + if na_list is None: + na_filter = 0 + else: + na_filter = 1 + na_hashset = kset_from_list(na_list) + else: + na_filter = 0 + + # Attempt to parse tokens and infer dtype of the column. + # Should return as the desired dtype (inferred or specified). + try: + col_res, na_count = self._convert_tokens( + i, start, end, name, na_filter, na_hashset, + na_flist, col_dtype) + finally: + # gh-21353 + # + # Cleanup the NaN hash that we generated + # to avoid memory leaks. + if na_filter: + self._free_na_set(na_hashset) + + # don't try to upcast EAs + if na_count > 0 and not is_extension_array_dtype(col_dtype): + col_res = _maybe_upcast(col_res) + + if col_res is None: + raise ParserError(f'Unable to parse column {i}') + + results[i] = col_res + + self.parser_start += end - start + + return results + + # -> tuple["ArrayLike", int]: + cdef inline _convert_tokens(self, Py_ssize_t i, int64_t start, + int64_t end, object name, bint na_filter, + kh_str_starts_t *na_hashset, + object na_flist, object col_dtype): + + if col_dtype is not None: + col_res, na_count = self._convert_with_dtype( + col_dtype, i, start, end, na_filter, + 1, na_hashset, na_flist) + + # Fallback on the parse (e.g. we requested int dtype, + # but its actually a float). + if col_res is not None: + return col_res, na_count + + if i in self.noconvert: + return self._string_convert(i, start, end, na_filter, na_hashset) + else: + col_res = None + for dt in self.dtype_cast_order: + try: + col_res, na_count = self._convert_with_dtype( + dt, i, start, end, na_filter, 0, na_hashset, na_flist) + except ValueError: + # This error is raised from trying to convert to uint64, + # and we discover that we cannot convert to any numerical + # dtype successfully. As a result, we leave the data + # column AS IS with object dtype. + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, 0, + 0, na_hashset, na_flist) + except OverflowError: + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, na_filter, + 0, na_hashset, na_flist) + + if col_res is not None: + break + + # we had a fallback parse on the dtype, so now try to cast + if col_res is not None and col_dtype is not None: + # If col_res is bool, it might actually be a bool array mixed with NaNs + # (see _try_bool_flex()). Usually this would be taken care of using + # _maybe_upcast(), but if col_dtype is a floating type we should just + # take care of that cast here. + if col_res.dtype == np.bool_ and is_float_dtype(col_dtype): + mask = col_res.view(np.uint8) == na_values[np.uint8] + col_res = col_res.astype(col_dtype) + np.putmask(col_res, mask, np.nan) + return col_res, na_count + + # NaNs are already cast to True here, so can not use astype + if col_res.dtype == np.bool_ and is_integer_dtype(col_dtype): + if na_count > 0: + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {np.bool_} dtyped data in " + f"column {i} due to NA values" + ) + + # only allow safe casts, eg. with a nan you cannot safely cast to int + try: + col_res = col_res.astype(col_dtype, casting='safe') + except TypeError: + + # float -> int conversions can fail the above + # even with no nans + col_res_orig = col_res + col_res = col_res.astype(col_dtype) + if (col_res != col_res_orig).any(): + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {col_res_orig.dtype.name} dtyped data in " + f"column {i}") + + return col_res, na_count + + cdef _convert_with_dtype(self, object dtype, Py_ssize_t i, + int64_t start, int64_t end, + bint na_filter, + bint user_dtype, + kh_str_starts_t *na_hashset, + object na_flist): + if isinstance(dtype, CategoricalDtype): + # TODO: I suspect that _categorical_convert could be + # optimized when dtype is an instance of CategoricalDtype + codes, cats, na_count = _categorical_convert( + self.parser, i, start, end, na_filter, na_hashset) + + # Method accepts list of strings, not encoded ones. + true_values = [x.decode() for x in self.true_values] + array_type = dtype.construct_array_type() + cat = array_type._from_inferred_categories( + cats, codes, dtype, true_values=true_values) + return cat, na_count + + elif is_extension_array_dtype(dtype): + result, na_count = self._string_convert(i, start, end, na_filter, + na_hashset) + + array_type = dtype.construct_array_type() + try: + # use _from_sequence_of_strings if the class defines it + if is_bool_dtype(dtype): + true_values = [x.decode() for x in self.true_values] + false_values = [x.decode() for x in self.false_values] + result = array_type._from_sequence_of_strings( + result, dtype=dtype, true_values=true_values, + false_values=false_values) + else: + result = array_type._from_sequence_of_strings(result, dtype=dtype) + except NotImplementedError: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + f"_from_sequence_of_strings in order " + f"to be used in parser methods") + + return result, na_count + + elif is_integer_dtype(dtype): + try: + result, na_count = _try_int64(self.parser, i, start, + end, na_filter, na_hashset) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Integer column has NA values in column {i}") + except OverflowError: + result = _try_uint64(self.parser, i, start, end, + na_filter, na_hashset) + na_count = 0 + + if result is not None and dtype != 'int64': + result = result.astype(dtype) + + return result, na_count + + elif is_float_dtype(dtype): + result, na_count = _try_double(self.parser, i, start, end, + na_filter, na_hashset, na_flist) + + if result is not None and dtype != 'float64': + result = result.astype(dtype) + return result, na_count + elif is_bool_dtype(dtype): + result, na_count = _try_bool_flex(self.parser, i, start, end, + na_filter, na_hashset, + self.true_set, self.false_set) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Bool column has NA values in column {i}") + return result, na_count + + elif dtype.kind == 'S': + # TODO: na handling + width = dtype.itemsize + if width > 0: + result = _to_fw_string(self.parser, i, start, end, width) + return result, 0 + + # treat as a regular string parsing + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif dtype.kind == 'U': + width = dtype.itemsize + if width > 0: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + # unicode variable width + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_object_dtype(dtype): + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_datetime64_dtype(dtype): + raise TypeError(f"the dtype {dtype} is not supported " + f"for parsing, pass this column " + f"using parse_dates instead") + else: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + # -> tuple[ndarray[object], int] + cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, + bint na_filter, kh_str_starts_t *na_hashset): + + return _string_box_utf8(self.parser, i, start, end, na_filter, + na_hashset, self.encoding_errors) + + def _get_converter(self, i: int, name): + if self.converters is None: + return None + + if name is not None and name in self.converters: + return self.converters[name] + + # Converter for position, if any + return self.converters.get(i) + + cdef _get_na_list(self, Py_ssize_t i, name): + # Note: updates self.na_values, self.na_fvalues + if self.na_values is None: + return None, set() + + if isinstance(self.na_values, dict): + key = None + values = None + + if name is not None and name in self.na_values: + key = name + elif i in self.na_values: + key = i + else: # No na_values provided for this column. + if self.keep_default_na: + return _NA_VALUES, set() + + return list(), set() + + values = self.na_values[key] + if values is not None and not isinstance(values, list): + values = list(values) + + fvalues = self.na_fvalues[key] + if fvalues is not None and not isinstance(fvalues, set): + fvalues = set(fvalues) + + return _ensure_encoded(values), fvalues + else: + if not isinstance(self.na_values, list): + self.na_values = list(self.na_values) + if not isinstance(self.na_fvalues, set): + self.na_fvalues = set(self.na_fvalues) + + return _ensure_encoded(self.na_values), self.na_fvalues + + cdef _free_na_set(self, kh_str_starts_t *table): + kh_destroy_str_starts(table) + + cdef _get_column_name(self, Py_ssize_t i, Py_ssize_t nused): + cdef int64_t j + if self.has_usecols and self.names is not None: + if (not callable(self.usecols) and + len(self.names) == len(self.usecols)): + return self.names[nused] + else: + return self.names[i - self.leading_cols] + else: + if self.header is not None: + j = i - self.leading_cols + # generate extra (bogus) headers if there are more columns than headers + # These should be strings, not integers, because otherwise we might get + # issues with callables as usecols GH#46997 + if j >= len(self.header[0]): + return str(j) + elif self.has_mi_columns: + return tuple(header_row[j] for header_row in self.header) + else: + return self.header[0][j] + else: + return None + + +# Factor out code common to TextReader.__dealloc__ and TextReader.close +# It cannot be a class method, since calling self.close() in __dealloc__ +# which causes a class attribute lookup and violates best practices +# https://cython.readthedocs.io/en/latest/src/userguide/special_methods.html#finalization-method-dealloc +cdef _close(TextReader reader): + # also preemptively free all allocated memory + parser_free(reader.parser) + if reader.true_set: + kh_destroy_str_starts(reader.true_set) + reader.true_set = NULL + if reader.false_set: + kh_destroy_str_starts(reader.false_set) + reader.false_set = NULL + + +cdef: + object _true_values = [b'True', b'TRUE', b'true'] + object _false_values = [b'False', b'FALSE', b'false'] + + +def _ensure_encoded(list lst): + cdef: + list result = [] + for x in lst: + if isinstance(x, str): + x = PyUnicode_AsUTF8String(x) + elif not isinstance(x, bytes): + x = str(x).encode('utf-8') + + result.append(x) + return result + + +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +STR_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} +_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) + + +def _maybe_upcast(arr): + """ + + """ + if issubclass(arr.dtype.type, np.integer): + na_value = na_values[arr.dtype] + arr = arr.astype(float) + np.putmask(arr, arr == na_value, np.nan) + elif arr.dtype == np.bool_: + mask = arr.view(np.uint8) == na_values[np.uint8] + arr = arr.astype(object) + np.putmask(arr, mask, np.nan) + + return arr + + +# ---------------------------------------------------------------------- +# Type conversions / inference support code + + +# -> tuple[ndarray[object], int] +cdef _string_box_utf8(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + const char *encoding_errors): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + ndarray[object] result + + int ret = 0 + kh_strbox_t *table + + object pyval + + object NA = na_values[np.object_] + khiter_t k + + table = kh_init_strbox() + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count += 1 + result[i] = NA + continue + + k = kh_get_strbox(table, word) + + # in the hash table + if k != table.n_buckets: + # this increments the refcount, but need to test + pyval = table.vals[k] + else: + # box it. new ref? + pyval = PyUnicode_Decode(word, strlen(word), "utf-8", encoding_errors) + + k = kh_put_strbox(table, word, &ret) + table.vals[k] = pyval + + result[i] = pyval + + kh_destroy_strbox(table) + + return result, na_count + + +@cython.boundscheck(False) +cdef _categorical_convert(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + "Convert column data into codes, categories" + cdef: + int na_count = 0 + Py_ssize_t i, size, lines + coliter_t it + const char *word = NULL + + int64_t NA = -1 + int64_t[::1] codes + int64_t current_category = 0 + + char *errors = "strict" + + int ret = 0 + kh_str_t *table + khiter_t k + + lines = line_end - line_start + codes = np.empty(lines, dtype=np.int64) + + # factorize parsed values, creating a hash table + # bytes -> category code + with nogil: + table = kh_init_str() + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # is in NA values + na_count += 1 + codes[i] = NA + continue + + k = kh_get_str(table, word) + # not in the hash table + if k == table.n_buckets: + k = kh_put_str(table, word, &ret) + table.vals[k] = current_category + current_category += 1 + + codes[i] = table.vals[k] + + # parse and box categories to python strings + result = np.empty(table.n_occupied, dtype=np.object_) + for k in range(table.n_buckets): + if kh_exist_str(table, k): + result[table.vals[k]] = PyUnicode_FromString(table.keys[k]) + + kh_destroy_str(table) + return np.asarray(codes), result, na_count + + +# -> ndarray[f'|S{width}'] +cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, + int64_t line_end, int64_t width): + cdef: + char *data + ndarray result + + result = np.empty(line_end - line_start, dtype=f'|S{width}') + data = result.data + + with nogil: + _to_fw_string_nogil(parser, col, line_start, line_end, width, data) + + return result + + +cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + size_t width, char *data) nogil: + cdef: + int64_t i + coliter_t it + const char *word = NULL + + coliter_setup(&it, parser, col, line_start) + + for i in range(line_end - line_start): + COLITER_NEXT(it, word) + strncpy(data, word, width) + data += width + + +cdef: + char* cinf = b'inf' + char* cposinf = b'+inf' + char* cneginf = b'-inf' + + char* cinfty = b'Infinity' + char* cposinfty = b'+Infinity' + char* cneginfty = b'-Infinity' + + +# -> tuple[ndarray[float64_t], int] | tuple[None, None] +cdef _try_double(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, object na_flist): + cdef: + int error, na_count = 0 + Py_ssize_t lines + float64_t *data + float64_t NA = na_values[np.float64] + kh_float64_t *na_fset + ndarray[float64_t] result + bint use_na_flist = len(na_flist) > 0 + + lines = line_end - line_start + result = np.empty(lines, dtype=np.float64) + data = result.data + na_fset = kset_float64_from_list(na_flist) + with nogil: + error = _try_double_nogil(parser, parser.double_converter, + col, line_start, line_end, + na_filter, na_hashset, use_na_flist, + na_fset, NA, data, &na_count) + + kh_destroy_float64(na_fset) + if error != 0: + return None, None + return result, na_count + + +cdef inline int _try_double_nogil(parser_t *parser, + float64_t (*double_converter)( + const char *, char **, char, + char, char, int, int *, int *) nogil, + int64_t col, int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + bint use_na_flist, + const kh_float64_t *na_flist, + float64_t NA, float64_t *data, + int *na_count) nogil: + cdef: + int error = 0, + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + char *p_end + khiter_t k64 + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + else: + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): + data[0] = NEGINF + else: + return 1 + if use_na_flist: + k64 = kh_get_float64(na_flist, data[0]) + if k64 != na_flist.n_buckets: + na_count[0] += 1 + data[0] = NA + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): + data[0] = NEGINF + else: + return 1 + data += 1 + + return 0 + + +cdef _try_uint64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error + Py_ssize_t lines + coliter_t it + uint64_t *data + ndarray result + uint_state state + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint64) + data = result.data + + uint_state_init(&state) + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_uint64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, data, &state) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None + + if uint64_conflict(&state): + raise ValueError('Cannot convert to numerical dtype') + + if state.seen_sint: + raise OverflowError('Overflow') + + return result + + +cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + uint64_t *data, uint_state *state) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + state.seen_null = 1 + data[i] = 0 + continue + + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +cdef _try_int64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t lines + coliter_t it + int64_t *data + ndarray result + int64_t NA = na_values[np.int64] + + lines = line_end - line_start + result = np.empty(lines, dtype=np.int64) + data = result.data + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_int64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, NA, data, &na_count) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None, None + + return result, na_count + + +cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, int64_t NA, + int64_t *data, int *na_count) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[i] = NA + continue + + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +# -> tuple[ndarray[bool], int] +cdef _try_bool_flex(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t lines + uint8_t *data + ndarray result + uint8_t NA = na_values[np.bool_] + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint8) + data = result.data + with nogil: + error = _try_bool_flex_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, true_hashset, + false_hashset, NA, data, &na_count) + if error != 0: + return None, None + return result.view(np.bool_), na_count + + +cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset, + uint8_t NA, uint8_t *data, + int *na_count) nogil: + cdef: + int error = 0 + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + data += 1 + continue + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + + return 0 + + +cdef kh_str_starts_t* kset_from_list(list values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + Py_ssize_t i + kh_str_starts_t *table + int ret = 0 + object val + + table = kh_init_str_starts() + + for i in range(len(values)): + val = values[i] + + # None creeps in sometimes, which isn't possible here + if not isinstance(val, bytes): + kh_destroy_str_starts(table) + raise ValueError('Must be all encoded bytes') + + kh_put_str_starts_item(table, PyBytes_AsString(val), &ret) + + if table.table.n_buckets <= 128: + # Resize the hash table to make it almost empty, this + # reduces amount of hash collisions on lookup thus + # "key not in table" case is faster. + # Note that this trades table memory footprint for lookup speed. + kh_resize_str_starts(table, table.table.n_buckets * 8) + + return table + + +cdef kh_float64_t* kset_float64_from_list(values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + khiter_t k + kh_float64_t *table + int ret = 0 + float64_t val + object value + + table = kh_init_float64() + + for value in values: + val = float(value) + + k = kh_put_float64(table, val, &ret) + + if table.n_buckets <= 128: + # See reasoning in kset_from_list + kh_resize_float64(table, table.n_buckets * 8) + return table + + +cdef raise_parser_error(object base, parser_t *parser): + cdef: + object old_exc + object exc_type + PyObject *type + PyObject *value + PyObject *traceback + + if PyErr_Occurred(): + PyErr_Fetch(&type, &value, &traceback) + Py_XDECREF(traceback) + + if value != NULL: + old_exc = value + Py_XDECREF(value) + + # PyErr_Fetch only returned the error message in *value, + # so the Exception class must be extracted from *type. + if isinstance(old_exc, str): + if type != NULL: + exc_type = type + else: + exc_type = ParserError + + Py_XDECREF(type) + raise exc_type(old_exc) + else: + Py_XDECREF(type) + raise old_exc + + message = f'{base}. C error: ' + if parser.error_msg != NULL: + message += parser.error_msg.decode('utf-8') + else: + message += 'no error message set' + + raise ParserError(message) + + +# ---------------------------------------------------------------------- +# NA values +def _compute_na_values(): + int64info = np.iinfo(np.int64) + int32info = np.iinfo(np.int32) + int16info = np.iinfo(np.int16) + int8info = np.iinfo(np.int8) + uint64info = np.iinfo(np.uint64) + uint32info = np.iinfo(np.uint32) + uint16info = np.iinfo(np.uint16) + uint8info = np.iinfo(np.uint8) + na_values = { + np.float64: np.nan, + np.int64: int64info.min, + np.int32: int32info.min, + np.int16: int16info.min, + np.int8: int8info.min, + np.uint64: uint64info.max, + np.uint32: uint32info.max, + np.uint16: uint16info.max, + np.uint8: uint8info.max, + np.bool_: uint8info.max, + np.object_: np.nan # oof + } + return na_values + + +na_values = _compute_na_values() + +for k in list(na_values): + na_values[np.dtype(k)] = na_values[k] + + +# -> ArrayLike +cdef _apply_converter(object f, parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end): + cdef: + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + ndarray[object] result + object val + + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + val = PyUnicode_FromString(word) + result[i] = f(val) + + return lib.maybe_convert_objects(result) + + +cdef list _maybe_encode(list values): + if values is None: + return [] + return [x.encode('utf-8') if isinstance(x, str) else x for x in values] + + +def sanitize_objects(ndarray[object] values, set na_values) -> int: + """ + Convert specified values, including the given set na_values to np.nan. + + Parameters + ---------- + values : ndarray[object] + na_values : set + + Returns + ------- + na_count : int + """ + cdef: + Py_ssize_t i, n + object val, onan + Py_ssize_t na_count = 0 + dict memo = {} + + n = len(values) + onan = np.nan + + for i in range(n): + val = values[i] + if val in na_values: + values[i] = onan + na_count += 1 + elif val in memo: + values[i] = memo[val] + else: + memo[val] = val + + return na_count diff --git a/pandas/_libs/properties.pyi b/pandas/_libs/properties.pyi new file mode 100644 index 00000000..595e3bd7 --- /dev/null +++ b/pandas/_libs/properties.pyi @@ -0,0 +1,28 @@ +from typing import ( + Sequence, + overload, +) + +from pandas._typing import ( + AnyArrayLike, + DataFrame, + Index, + Series, +) + +# note: this is a lie to make type checkers happy (they special +# case property). cache_readonly uses attribute names similar to +# property (fget) but it does not provide fset and fdel. +cache_readonly = property + +class AxisProperty: + + axis: int + def __init__(self, axis: int = ..., doc: str = ...) -> None: ... + @overload + def __get__(self, obj: DataFrame | Series, type) -> Index: ... + @overload + def __get__(self, obj: None, type) -> AxisProperty: ... + def __set__( + self, obj: DataFrame | Series, value: AnyArrayLike | Sequence + ) -> None: ... diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx new file mode 100644 index 00000000..3354290a --- /dev/null +++ b/pandas/_libs/properties.pyx @@ -0,0 +1,69 @@ +from cpython.dict cimport ( + PyDict_Contains, + PyDict_GetItem, + PyDict_SetItem, +) +from cython cimport Py_ssize_t + + +cdef class CachedProperty: + + cdef readonly: + object fget, name, __doc__ + + def __init__(self, fget): + self.fget = fget + self.name = fget.__name__ + self.__doc__ = getattr(fget, '__doc__', None) + + def __get__(self, obj, typ): + if obj is None: + # accessed on the class, not the instance + return self + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except (AttributeError): + return self + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.fget(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + raise AttributeError("Can't set attribute") + + +cache_readonly = CachedProperty + + +cdef class AxisProperty: + + cdef readonly: + Py_ssize_t axis + object __doc__ + + def __init__(self, axis=0, doc=""): + self.axis = axis + self.__doc__ = doc + + def __get__(self, obj, type): + cdef: + list axes + + if obj is None: + # Only instances have _mgr, not classes + return self + else: + axes = obj._mgr.axes + return axes[self.axis] + + def __set__(self, obj, value): + obj._set_axis(self.axis, value) diff --git a/pandas/_libs/reduction.pyi b/pandas/_libs/reduction.pyi new file mode 100644 index 00000000..ad73e941 --- /dev/null +++ b/pandas/_libs/reduction.pyi @@ -0,0 +1,8 @@ +from typing import Any + +import numpy as np + +from pandas._typing import ExtensionDtype + +def check_result_array(obj: object, dtype: np.dtype | ExtensionDtype) -> None: ... +def extract_result(res: object) -> Any: ... diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx new file mode 100644 index 00000000..7ff08426 --- /dev/null +++ b/pandas/_libs/reduction.pyx @@ -0,0 +1,33 @@ +import numpy as np + +cimport numpy as cnp + +cnp.import_array() + +from pandas._libs.util cimport is_array + + +cdef cnp.dtype _dtype_obj = np.dtype("object") + + +cpdef check_result_array(object obj, object dtype): + # Our operation is supposed to be an aggregation/reduction. If + # it returns an ndarray, this likely means an invalid operation has + # been passed. See test_apply_without_aggregation, test_agg_must_agg + if is_array(obj): + if dtype != _dtype_obj: + # If it is object dtype, the function can be a reduction/aggregation + # and still return an ndarray e.g. test_agg_over_numpy_arrays + raise ValueError("Must produce aggregated value") + + +cpdef inline extract_result(object res): + """ extract the result object, it might be a 0-dim ndarray + or a len-1 0-dim, or a scalar """ + if hasattr(res, "_values"): + # Preserve EA + res = res._values + if res.ndim == 1 and len(res) == 1: + # see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply + res = res[0] + return res diff --git a/pandas/_libs/reshape.pyi b/pandas/_libs/reshape.pyi new file mode 100644 index 00000000..110687fc --- /dev/null +++ b/pandas/_libs/reshape.pyi @@ -0,0 +1,16 @@ +import numpy as np + +from pandas._typing import npt + +def unstack( + values: np.ndarray, # reshape_t[:, :] + mask: np.ndarray, # const uint8_t[:] + stride: int, + length: int, + width: int, + new_values: np.ndarray, # reshape_t[:, :] + new_mask: np.ndarray, # uint8_t[:, :] +) -> None: ... +def explode( + values: npt.NDArray[np.object_], +) -> tuple[npt.NDArray[np.object_], npt.NDArray[np.int64]]: ... diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx new file mode 100644 index 00000000..a012bd92 --- /dev/null +++ b/pandas/_libs/reshape.pyx @@ -0,0 +1,138 @@ +cimport cython +from cython cimport Py_ssize_t +from numpy cimport ( + int64_t, + ndarray, + uint8_t, +) + +import numpy as np + +cimport numpy as cnp + +cnp.import_array() + +from pandas._libs.dtypes cimport numeric_object_t +from pandas._libs.lib cimport c_is_list_like + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask, + Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, + numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None: + """ + Transform long values to wide new_values. + + Parameters + ---------- + values : typed ndarray + mask : np.ndarray[bool] + stride : int + length : int + width : int + new_values : np.ndarray[bool] + result array + new_mask : np.ndarray[bool] + result mask + """ + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + if numeric_object_t is not object: + # evaluated at compile-time + with nogil: + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + else: + # object-dtype, identical to above but we cannot use nogil + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +def explode(ndarray[object] values): + """ + transform array list-likes to long form + preserve non-list entries + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + result + ndarray[int64_t] + counts + """ + cdef: + Py_ssize_t i, j, count, n + object v + ndarray[object] result + ndarray[int64_t] counts + + # find the resulting len + n = len(values) + counts = np.zeros(n, dtype='int64') + for i in range(n): + v = values[i] + + if c_is_list_like(v, True): + if len(v): + counts[i] += len(v) + else: + # empty list-like, use a nan marker + counts[i] += 1 + else: + counts[i] += 1 + + result = np.empty(counts.sum(), dtype='object') + count = 0 + for i in range(n): + v = values[i] + + if c_is_list_like(v, True): + if len(v): + v = list(v) + for j in range(len(v)): + result[count] = v[j] + count += 1 + else: + # empty list-like, use a nan marker + result[count] = np.nan + count += 1 + else: + # replace with the existing scalar + result[count] = v + count += 1 + return result, counts diff --git a/pandas/_libs/sparse.pyi b/pandas/_libs/sparse.pyi new file mode 100644 index 00000000..be5d251b --- /dev/null +++ b/pandas/_libs/sparse.pyi @@ -0,0 +1,47 @@ +from typing import ( + Sequence, + TypeVar, +) + +import numpy as np + +from pandas._typing import npt + +_SparseIndexT = TypeVar("_SparseIndexT", bound=SparseIndex) + +class SparseIndex: + length: int + npoints: int + def __init__(self): ... + @property + def ngaps(self) -> int: ... + @property + def nbytes(self) -> int: ... + @property + def indices(self) -> npt.NDArray[np.int32]: ... + def equals(self, other) -> bool: ... + def lookup(self, index: int) -> np.int32: ... + def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ... + def to_int_index(self) -> IntIndex: ... + def to_block_index(self) -> BlockIndex: ... + def intersect(self: _SparseIndexT, y_: SparseIndex) -> _SparseIndexT: ... + def make_union(self: _SparseIndexT, y_: SparseIndex) -> _SparseIndexT: ... + +class IntIndex(SparseIndex): + indices: npt.NDArray[np.int32] + def __init__( + self, length: int, indices: Sequence[int], check_integrity: bool = ... + ): ... + +class BlockIndex(SparseIndex): + nblocks: int + blocs: np.ndarray + blengths: np.ndarray + def __init__(self, length: int, blocs: np.ndarray, blengths: np.ndarray): ... + +def make_mask_object_ndarray( + arr: npt.NDArray[np.object_], fill_value +) -> npt.NDArray[np.bool_]: ... +def get_blocks( + indices: npt.NDArray[np.int32], +) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ... diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx new file mode 100644 index 00000000..6c10b394 --- /dev/null +++ b/pandas/_libs/sparse.pyx @@ -0,0 +1,738 @@ +cimport cython +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + + +# ----------------------------------------------------------------------------- +# Preamble stuff + +cdef float64_t NaN = np.NaN +cdef float64_t INF = np.inf + +# ----------------------------------------------------------------------------- + + +cdef class SparseIndex: + """ + Abstract superclass for sparse index types. + """ + + def __init__(self): + raise NotImplementedError + + +cdef class IntIndex(SparseIndex): + """ + Object for holding exact integer sparse indexing information + + Parameters + ---------- + length : integer + indices : array-like + Contains integers corresponding to the indices. + check_integrity : bool, default=True + Check integrity of the input. + """ + + cdef readonly: + Py_ssize_t length, npoints + ndarray indices + + def __init__(self, Py_ssize_t length, indices, bint check_integrity=True): + self.length = length + self.indices = np.ascontiguousarray(indices, dtype=np.int32) + self.npoints = len(self.indices) + + if check_integrity: + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.indices) + return IntIndex, args + + def __repr__(self) -> str: + output = 'IntIndex\n' + output += f'Indices: {repr(self.indices)}\n' + return output + + @property + def nbytes(self) -> int: + return self.indices.nbytes + + cdef check_integrity(self): + """ + Checks the following: + + - Indices are strictly ascending + - Number of indices is at most self.length + - Indices are at least 0 and at most the total length less one + + A ValueError is raised if any of these conditions is violated. + """ + + if self.npoints > self.length: + raise ValueError( + f"Too many indices. Expected {self.length} but found {self.npoints}" + ) + + # Indices are vacuously ordered and non-negative + # if the sequence of indices is empty. + if self.npoints == 0: + return + + if self.indices.min() < 0: + raise ValueError("No index can be less than zero") + + if self.indices.max() >= self.length: + raise ValueError("All indices must be less than the length") + + monotonic = np.all(self.indices[:-1] < self.indices[1:]) + if not monotonic: + raise ValueError("Indices must be strictly increasing") + + def equals(self, other: object) -> bool: + if not isinstance(other, IntIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_indices = np.array_equal(self.indices, other.indices) + return same_length and same_indices + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + cpdef to_int_index(self): + return self + + def to_block_index(self): + locs, lens = get_blocks(self.indices) + return BlockIndex(self.length, locs, lens) + + cpdef IntIndex intersect(self, SparseIndex y_): + cdef: + Py_ssize_t out_length, xi, yi = 0, result_indexer = 0 + int32_t xind + ndarray[int32_t, ndim=1] xindices, yindices, new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xindices = self.indices + yindices = y.indices + new_indices = np.empty(min( + len(xindices), len(yindices)), dtype=np.int32) + + for xi in range(self.npoints): + xind = xindices[xi] + + while yi < y.npoints and yindices[yi] < xind: + yi += 1 + + if yi >= y.npoints: + break + + # TODO: would a two-pass algorithm be faster? + if yindices[yi] == xind: + new_indices[result_indexer] = xind + result_indexer += 1 + + new_indices = new_indices[:result_indexer] + return IntIndex(self.length, new_indices) + + cpdef IntIndex make_union(self, SparseIndex y_): + + cdef: + ndarray[int32_t, ndim=1] new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise ValueError('Indices must reference same underlying length') + + new_indices = np.union1d(self.indices, y.indices) + return IntIndex(self.length, new_indices) + + @cython.wraparound(False) + cpdef int32_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + int32_t res + ndarray[int32_t, ndim=1] inds + + inds = self.indices + if self.npoints == 0: + return -1 + elif index < 0 or self.length <= index: + return -1 + + res = inds.searchsorted(index) + if res == self.npoints: + return -1 + elif inds[res] == index: + return res + else: + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, ind_val + ndarray[int32_t, ndim=1] inds + ndarray[uint8_t, ndim=1, cast=True] mask + ndarray[int32_t, ndim=1] masked + ndarray[int32_t, ndim=1] res + ndarray[int32_t, ndim=1] results + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + inds = self.indices + mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1]) + + masked = indexer[mask] + res = inds.searchsorted(masked).astype(np.int32) + + res[inds[res] != masked] = -1 + results[mask] = res + return results + + +cpdef get_blocks(ndarray[int32_t, ndim=1] indices): + cdef: + Py_ssize_t init_len, i, npoints, result_indexer = 0 + int32_t block, length = 1, cur, prev + ndarray[int32_t, ndim=1] locs, lens + + npoints = len(indices) + + # just handle the special empty case separately + if npoints == 0: + return np.array([], dtype=np.int32), np.array([], dtype=np.int32) + + # block size can't be longer than npoints + locs = np.empty(npoints, dtype=np.int32) + lens = np.empty(npoints, dtype=np.int32) + + # TODO: two-pass algorithm faster? + prev = block = indices[0] + for i in range(1, npoints): + cur = indices[i] + if cur - prev > 1: + # new block + locs[result_indexer] = block + lens[result_indexer] = length + block = cur + length = 1 + result_indexer += 1 + else: + # same block, increment length + length += 1 + + prev = cur + + locs[result_indexer] = block + lens[result_indexer] = length + result_indexer += 1 + locs = locs[:result_indexer] + lens = lens[:result_indexer] + return locs, lens + + +# ----------------------------------------------------------------------------- +# BlockIndex + +cdef class BlockIndex(SparseIndex): + """ + Object for holding block-based sparse indexing information + + Parameters + ---------- + """ + cdef readonly: + int32_t nblocks, npoints, length + ndarray blocs, blengths + + cdef: + object __weakref__ # need to be picklable + int32_t *locbuf + int32_t *lenbuf + + def __init__(self, length, blocs, blengths): + + self.blocs = np.ascontiguousarray(blocs, dtype=np.int32) + self.blengths = np.ascontiguousarray(blengths, dtype=np.int32) + + # in case we need + self.locbuf = self.blocs.data + self.lenbuf = self.blengths.data + + self.length = length + self.nblocks = np.int32(len(self.blocs)) + self.npoints = self.blengths.sum() + + # self.block_start = blocs + # self.block_end = blocs + blengths + + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.blocs, self.blengths) + return BlockIndex, args + + def __repr__(self) -> str: + output = 'BlockIndex\n' + output += f'Block locations: {repr(self.blocs)}\n' + output += f'Block lengths: {repr(self.blengths)}' + + return output + + @property + def nbytes(self) -> int: + return self.blocs.nbytes + self.blengths.nbytes + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + cdef check_integrity(self): + """ + Check: + - Locations are in ascending order + - No overlapping blocks + - Blocks to not start after end of index, nor extend beyond end + """ + cdef: + Py_ssize_t i + ndarray[int32_t, ndim=1] blocs, blengths + + blocs = self.blocs + blengths = self.blengths + + if len(blocs) != len(blengths): + raise ValueError('block bound arrays must be same length') + + for i in range(self.nblocks): + if i > 0: + if blocs[i] <= blocs[i - 1]: + raise ValueError('Locations not in ascending order') + + if i < self.nblocks - 1: + if blocs[i] + blengths[i] > blocs[i + 1]: + raise ValueError(f'Block {i} overlaps') + else: + if blocs[i] + blengths[i] > self.length: + raise ValueError(f'Block {i} extends beyond end') + + # no zero-length blocks + if blengths[i] == 0: + raise ValueError(f'Zero-length block {i}') + + def equals(self, other: object) -> bool: + if not isinstance(other, BlockIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_blocks = (np.array_equal(self.blocs, other.blocs) and + np.array_equal(self.blengths, other.blengths)) + return same_length and same_blocks + + def to_block_index(self): + return self + + cpdef to_int_index(self): + cdef: + int32_t i = 0, j, b + int32_t offset + ndarray[int32_t, ndim=1] indices + + indices = np.empty(self.npoints, dtype=np.int32) + + for b in range(self.nblocks): + offset = self.locbuf[b] + + for j in range(self.lenbuf[b]): + indices[i] = offset + j + i += 1 + + return IntIndex(self.length, indices) + + @property + def indices(self): + return self.to_int_index().indices + + cpdef BlockIndex intersect(self, SparseIndex other): + """ + Intersect two BlockIndex objects + + Returns + ------- + BlockIndex + """ + cdef: + BlockIndex y + ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen + Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0 + int32_t cur_loc, cur_length, diff + + y = other.to_block_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xloc = self.blocs + xlen = self.blengths + yloc = y.blocs + ylen = y.blengths + + # block may be split, but can't exceed original len / 2 + 1 + max_len = min(self.length, y.length) // 2 + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if xi >= self.nblocks or yi >= y.nblocks: + break + + # completely symmetric...would like to avoid code dup but oh well + if xloc[xi] >= yloc[yi]: + cur_loc = xloc[xi] + diff = xloc[xi] - yloc[yi] + + if ylen[yi] <= diff: + # have to skip this block + yi += 1 + continue + + if ylen[yi] - diff < xlen[xi]: + # take end of y block, move onward + cur_length = ylen[yi] - diff + yi += 1 + else: + # take end of x block + cur_length = xlen[xi] + xi += 1 + + else: # xloc[xi] < yloc[yi] + cur_loc = yloc[yi] + diff = yloc[yi] - xloc[xi] + + if xlen[xi] <= diff: + # have to skip this block + xi += 1 + continue + + if xlen[xi] - diff < ylen[yi]: + # take end of x block, move onward + cur_length = xlen[xi] - diff + xi += 1 + else: + # take end of y block + cur_length = ylen[yi] + yi += 1 + + out_bloc[result_indexer] = cur_loc + out_blen[result_indexer] = cur_length + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.length, out_bloc, out_blen) + + cpdef BlockIndex make_union(self, SparseIndex y): + """ + Combine together two BlockIndex objects, accepting indices if contained + in one or the other + + Parameters + ---------- + other : SparseIndex + + Notes + ----- + union is a protected keyword in Cython, hence make_union + + Returns + ------- + BlockIndex + """ + return BlockUnion(self, y.to_block_index()).result + + cpdef Py_ssize_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + Py_ssize_t i, cum_len + ndarray[int32_t, ndim=1] locs, lens + + locs = self.blocs + lens = self.blengths + + if self.nblocks == 0: + return -1 + elif index < locs[0]: + return -1 + + cum_len = 0 + for i in range(self.nblocks): + if index >= locs[i] and index < locs[i] + lens[i]: + return cum_len + index - locs[i] + cum_len += lens[i] + + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, j, ind_val + ndarray[int32_t, ndim=1] locs, lens + ndarray[int32_t, ndim=1] results + + locs = self.blocs + lens = self.blengths + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + for i in range(n): + ind_val = indexer[i] + if not (ind_val < 0 or self.length <= ind_val): + cum_len = 0 + for j in range(self.nblocks): + if ind_val >= locs[j] and ind_val < locs[j] + lens[j]: + results[i] = cum_len + ind_val - locs[j] + cum_len += lens[j] + return results + + +@cython.internal +cdef class BlockMerge: + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + cdef: + BlockIndex x, y, result + ndarray xstart, xlen, xend, ystart, ylen, yend + int32_t xi, yi # block indices + + def __init__(self, BlockIndex x, BlockIndex y): + self.x = x + self.y = y + + if x.length != y.length: + raise Exception('Indices must reference same underlying length') + + self.xstart = self.x.blocs + self.ystart = self.y.blocs + + self.xend = self.x.blocs + self.x.blengths + self.yend = self.y.blocs + self.y.blengths + + # self.xlen = self.x.blengths + # self.ylen = self.y.blengths + + self.xi = 0 + self.yi = 0 + + self.result = self._make_merged_blocks() + + cdef _make_merged_blocks(self): + raise NotImplementedError + + cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode): + if mode == 0: + self.xi = xi + self.yi = yi + else: + self.xi = yi + self.yi = xi + + +@cython.internal +cdef class BlockUnion(BlockMerge): + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + + cdef _make_merged_blocks(self): + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart + ndarray[int32_t, ndim=1] yend, out_bloc, out_blen + int32_t nstart, nend, diff + Py_ssize_t max_len, result_indexer = 0 + + xstart = self.xstart + xend = self.xend + ystart = self.ystart + yend = self.yend + + max_len = min(self.x.length, self.y.length) // 2 + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks: + break + elif self.yi >= self.y.nblocks: + # through with y, just pass through x blocks + nstart = xstart[self.xi] + nend = xend[self.xi] + self.xi += 1 + elif self.xi >= self.x.nblocks: + # through with x, just pass through y blocks + nstart = ystart[self.yi] + nend = yend[self.yi] + self.yi += 1 + else: + # find end of new block + if xstart[self.xi] < ystart[self.yi]: + nstart = xstart[self.xi] + nend = self._find_next_block_end(0) + else: + nstart = ystart[self.yi] + nend = self._find_next_block_end(1) + + out_bloc[result_indexer] = nstart + out_blen[result_indexer] = nend - nstart + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.x.length, out_bloc, out_blen) + + cdef int32_t _find_next_block_end(self, bint mode) except -1: + """ + Wow, this got complicated in a hurry + + mode 0: block started in index x + mode 1: block started in index y + """ + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart, yend + int32_t xi, yi, xnblocks, ynblocks, nend + + if mode != 0 and mode != 1: + raise Exception('Mode must be 0 or 1') + + # so symmetric code will work + if mode == 0: + xstart = self.xstart + xend = self.xend + xi = self.xi + + ystart = self.ystart + yend = self.yend + yi = self.yi + ynblocks = self.y.nblocks + else: + xstart = self.ystart + xend = self.yend + xi = self.yi + + ystart = self.xstart + yend = self.xend + yi = self.xi + ynblocks = self.x.nblocks + + nend = xend[xi] + + # done with y? + if yi == ynblocks: + self._set_current_indices(xi + 1, yi, mode) + return nend + elif nend < ystart[yi]: + # block ends before y block + self._set_current_indices(xi + 1, yi, mode) + return nend + else: + while yi < ynblocks and nend > yend[yi]: + yi += 1 + + self._set_current_indices(xi + 1, yi, mode) + + if yi == ynblocks: + return nend + + if nend < ystart[yi]: + # we're done, return the block end + return nend + else: + # merge blocks, continue searching + # this also catches the case where blocks + return self._find_next_block_end(1 - mode) + + +# ----------------------------------------------------------------------------- +# Sparse arithmetic + +include "sparse_op_helper.pxi" + + +# ----------------------------------------------------------------------------- +# SparseArray mask create operations + +def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value): + cdef: + object value + Py_ssize_t i + Py_ssize_t new_length = len(arr) + ndarray[int8_t, ndim=1] mask + + mask = np.ones(new_length, dtype=np.int8) + + for i in range(new_length): + value = arr[i] + if value == fill_value and type(value) == type(fill_value): + mask[i] = 0 + + return mask.view(dtype=bool) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in new file mode 100644 index 00000000..e6a2c7b1 --- /dev/null +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -0,0 +1,309 @@ +""" +Template for each `dtype` helper function for sparse ops + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# Sparse op +# ---------------------------------------------------------------------- + +ctypedef fused sparse_t: + float64_t + int64_t + + +cdef inline float64_t __div__(sparse_t a, sparse_t b): + if b == 0: + if a > 0: + return INF + elif a < 0: + return -INF + else: + return NaN + else: + return float(a) / b + + +cdef inline float64_t __truediv__(sparse_t a, sparse_t b): + return __div__(a, b) + + +cdef inline sparse_t __mod__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + return NaN + else: + return 0 + else: + return a % b + + +cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + # Match non-sparse Series behavior implemented in mask_zero_div_zero + if a > 0: + return INF + elif a < 0: + return -INF + return NaN + else: + return 0 + else: + return a // b + + +# ---------------------------------------------------------------------- +# sparse array op +# ---------------------------------------------------------------------- + +{{py: + +# dtype, arith_comp_group, logical_group +dtypes = [('float64', True, False), + ('int64', True, True), + ('uint8', False, True)] +# do not generate arithmetic / comparison template for uint8, +# it should be done in fused types + +def get_op(tup): + assert isinstance(tup, tuple) + assert len(tup) == 4 + + opname, lval, rval, dtype = tup + + ops_dict = {'add': '{0} + {1}', + 'sub': '{0} - {1}', + 'mul': '{0} * {1}', + 'div': '__div__({0}, {1})', + 'mod': '__mod__({0}, {1})', + 'truediv': '__truediv__({0}, {1})', + 'floordiv': '__floordiv__({0}, {1})', + 'pow': '{0} ** {1}', + 'eq': '{0} == {1}', + 'ne': '{0} != {1}', + 'lt': '{0} < {1}', + 'gt': '{0} > {1}', + 'le': '{0} <= {1}', + 'ge': '{0} >= {1}', + + 'and': '{0} & {1}', # logical op + 'or': '{0} | {1}', + 'xor': '{0} ^ {1}'} + + return ops_dict[opname].format(lval, rval) + + +def get_dispatch(dtypes): + + ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv', + 'floordiv', 'pow', + 'eq', 'ne', 'lt', 'gt', 'le', 'ge', + 'and', 'or', 'xor'] + + for opname in ops_list: + for dtype, arith_comp_group, logical_group in dtypes: + + if opname in ('div', 'truediv'): + rdtype = 'float64' + elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): + # comparison op + rdtype = 'uint8' + elif opname in ('and', 'or', 'xor'): + # logical op + rdtype = 'uint8' + else: + rdtype = dtype + + if opname in ('and', 'or', 'xor'): + if logical_group: + yield opname, dtype, rdtype + else: + if arith_comp_group: + yield opname, dtype, rdtype + +}} + + +{{for opname, dtype, rdtype in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + BlockIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + BlockIndex yindex, + {{dtype}}_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + IntIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + IntIndex yindex, + {{dtype}}_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + int32_t[:] xindices, yindices, out_indices + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if xi == xindex.npoints: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, + SparseIndex xindex, {{dtype}}_t xfill, + {{dtype}}_t[:] y, + SparseIndex yindex, {{dtype}}_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + +{{endfor}} diff --git a/pandas/_libs/src/headers/portable.h b/pandas/_libs/src/headers/portable.h new file mode 100644 index 00000000..cb8e5ba8 --- /dev/null +++ b/pandas/_libs/src/headers/portable.h @@ -0,0 +1,16 @@ +#ifndef _PANDAS_PORTABLE_H_ +#define _PANDAS_PORTABLE_H_ + +#if defined(_MSC_VER) +#define strcasecmp( s1, s2 ) _stricmp( s1, s2 ) +#endif + +// GH-23516 - works around locale perf issues +// from MUSL libc, MIT Licensed - see LICENSES +#define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) +#define getdigit_ascii(c, default) (isdigit_ascii(c) ? ((int)((c) - '0')) : default) +#define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) +#define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c)) +#define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c)) + +#endif diff --git a/pandas/_libs/src/inline_helper.h b/pandas/_libs/src/inline_helper.h new file mode 100644 index 00000000..40fd4576 --- /dev/null +++ b/pandas/_libs/src/inline_helper.h @@ -0,0 +1,27 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_ +#define PANDAS__LIBS_SRC_INLINE_HELPER_H_ + +#ifndef PANDAS_INLINE + #if defined(__clang__) + #define PANDAS_INLINE static __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define PANDAS_INLINE static __inline__ + #elif defined(_MSC_VER) + #define PANDAS_INLINE static __inline + #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define PANDAS_INLINE static inline + #else + #define PANDAS_INLINE + #endif // __GNUC__ +#endif // PANDAS_INLINE + +#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_ diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h new file mode 100644 index 00000000..e17d82d5 --- /dev/null +++ b/pandas/_libs/src/klib/khash.h @@ -0,0 +1,719 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + if (!ret) kh_del(32, h, k); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - https://github.com/stefanocasazza/ULib + - https://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.6" + +#include +#include +#include +#include "../inline_helper.h" + + +// hooks for memory allocator, C-runtime allocator used per default +#ifndef KHASH_MALLOC +#define KHASH_MALLOC malloc +#endif + +#ifndef KHASH_REALLOC +#define KHASH_REALLOC realloc +#endif + +#ifndef KHASH_CALLOC +#define KHASH_CALLOC calloc +#endif + +#ifndef KHASH_FREE +#define KHASH_FREE free +#endif + + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khuint32_t; +typedef signed int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khuint32_t; +typedef signed long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khuint64_t; +typedef signed long khint64_t; +#else +typedef unsigned long long khuint64_t; +typedef signed long long khint64_t; +#endif + +#if UINT_MAX == 0xffffu +typedef unsigned int khuint16_t; +typedef signed int khint16_t; +#elif USHRT_MAX == 0xffffu +typedef unsigned short khuint16_t; +typedef signed short khint16_t; +#endif + +#if UCHAR_MAX == 0xffu +typedef unsigned char khuint8_t; +typedef signed char khint8_t; +#endif + +typedef double khfloat64_t; +typedef float khfloat32_t; + +typedef khuint32_t khuint_t; +typedef khuint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>5]>>(i&0x1fU))&1) +#define __ac_isdel(flag, i) (0) +#define __ac_iseither(flag, i) __ac_isempty(flag, i) +#define __ac_set_isdel_false(flag, i) (0) +#define __ac_set_isempty_false(flag, i) (flag[i>>5]&=~(1ul<<(i&0x1fU))) +#define __ac_set_isempty_true(flag, i) (flag[i>>5]|=(1ul<<(i&0x1fU))) +#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i) +#define __ac_set_isdel_true(flag, i) ((void)0) + + +// specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp +khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){ + const khuint32_t SEED = 0xc70f6907UL; + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const khuint32_t M_32 = 0x5bd1e995; + const int R_32 = 24; + + // Initialize the hash to a 'random' value + khuint32_t h = SEED ^ 4; + + //handle 4 bytes: + k *= M_32; + k ^= k >> R_32; + k *= M_32; + + h *= M_32; + h ^= k; + + // Do a few final mixes of the hash to ensure the "last few + // bytes" are well-incorporated. (Really needed here?) + h ^= h >> 13; + h *= M_32; + h ^= h >> 15; + return h; +} + +// it is possible to have a special x64-version, which would need less operations, but +// using 32bit version always has also some benefits: +// - one code for 32bit and 64bit builds +// - the same case for 32bit and 64bit builds +// - no performance difference could be measured compared to a possible x64-version + +khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2){ + const khuint32_t SEED = 0xc70f6907UL; + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const khuint32_t M_32 = 0x5bd1e995; + const int R_32 = 24; + + // Initialize the hash to a 'random' value + khuint32_t h = SEED ^ 4; + + //handle first 4 bytes: + k1 *= M_32; + k1 ^= k1 >> R_32; + k1 *= M_32; + + h *= M_32; + h ^= k1; + + //handle second 4 bytes: + k2 *= M_32; + k2 ^= k2 >> R_32; + k2 *= M_32; + + h *= M_32; + h ^= k2; + + // Do a few final mixes of the hash to ensure the "last few + // bytes" are well-incorporated. + h ^= h >> 13; + h *= M_32; + h ^= h >> 15; + return h; +} + +khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){ + khuint32_t k1 = (khuint32_t)k; + khuint32_t k2 = (khuint32_t)(k >> 32); + + return murmur2_32_32to32(k1, k2); +} + + +#ifdef KHASH_LINEAR +#define __ac_inc(k, m) 1 +#else +#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m) +#endif + +#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + typedef struct { \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + extern kh_##name##_t *kh_init_##name(); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets); \ + extern khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khuint_t x); + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + typedef struct { \ + khuint_t n_buckets, size, n_occupied, upper_bound; \ + khuint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)KHASH_CALLOC(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + KHASH_FREE(h->keys); KHASH_FREE(h->flags); \ + KHASH_FREE(h->vals); \ + KHASH_FREE(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khuint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khuint_t inc, k, i, last, mask; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + inc) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets) \ + { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khuint32_t *new_flags = 0; \ + khuint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khuint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khuint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ + memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khuint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isempty_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khuint_t inc, k, i; \ + k = __hash_func(key); \ + i = k & new_mask; \ + inc = __ac_inc(k, new_mask); \ + while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isempty_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + KHASH_FREE(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khuint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + } \ + SCOPE khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khuint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ + else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + inc = __ac_inc(k, mask); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + inc) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khuint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khuint32_t] + @return The hash value [khuint_t] + */ +#define kh_int_hash_func(key) (khuint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khuint64_t] + @return The hash value [khuint_t] + */ +PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key) +{ + return (khuint_t)((key)>>33^(key)^(key)<<11); +} +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) + +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s) +{ + khuint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khuint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +PANDAS_INLINE khuint_t __ac_Wang_hash(khuint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(k) __ac_Wang_hash((khuint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name(void) + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khuint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khuint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) is the element is absent [khuint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khuint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khuint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khuint_t] + */ +#define kh_begin(h) (khuint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khuint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khuint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khuint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/* More convenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT(name, khval_t) \ + KHASH_INIT(name, khuint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_UINT64(name) \ + KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_UINT64(name, khval_t) \ + KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 16bit-integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT16(name, khval_t) \ + KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT16(name, khval_t) \ + KHASH_INIT(name, khuint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 8bit-integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT8(name, khval_t) \ + KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +#define KHASH_MAP_INIT_UINT8(name, khval_t) \ + KHASH_INIT(name, khuint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + + + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + + +#define kh_exist_str(h, k) (kh_exist(h, k)) +#define kh_exist_float64(h, k) (kh_exist(h, k)) +#define kh_exist_uint64(h, k) (kh_exist(h, k)) +#define kh_exist_int64(h, k) (kh_exist(h, k)) +#define kh_exist_float32(h, k) (kh_exist(h, k)) +#define kh_exist_int32(h, k) (kh_exist(h, k)) +#define kh_exist_uint32(h, k) (kh_exist(h, k)) +#define kh_exist_int16(h, k) (kh_exist(h, k)) +#define kh_exist_uint16(h, k) (kh_exist(h, k)) +#define kh_exist_int8(h, k) (kh_exist(h, k)) +#define kh_exist_uint8(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(str, size_t) +KHASH_MAP_INIT_INT(int32, size_t) +KHASH_MAP_INIT_UINT(uint32, size_t) +KHASH_MAP_INIT_INT64(int64, size_t) +KHASH_MAP_INIT_UINT64(uint64, size_t) +KHASH_MAP_INIT_INT16(int16, size_t) +KHASH_MAP_INIT_UINT16(uint16, size_t) +KHASH_MAP_INIT_INT8(int8, size_t) +KHASH_MAP_INIT_UINT8(uint8, size_t) + + +#endif /* __AC_KHASH_H */ diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h new file mode 100644 index 00000000..56afea04 --- /dev/null +++ b/pandas/_libs/src/klib/khash_python.h @@ -0,0 +1,446 @@ +#include +#include + + +// use numpy's definitions for complex +#include +typedef npy_complex64 khcomplex64_t; +typedef npy_complex128 khcomplex128_t; + + + +// khash should report usage to tracemalloc +#if PY_VERSION_HEX >= 0x03060000 +#include +#if PY_VERSION_HEX < 0x03070000 +#define PyTraceMalloc_Track _PyTraceMalloc_Track +#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack +#endif +#else +#define PyTraceMalloc_Track(...) +#define PyTraceMalloc_Untrack(...) +#endif + + +static const int KHASH_TRACE_DOMAIN = 424242; +void *traced_malloc(size_t size){ + void * ptr = malloc(size); + if(ptr!=NULL){ + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size); + } + return ptr; +} + +void *traced_calloc(size_t num, size_t size){ + void * ptr = calloc(num, size); + if(ptr!=NULL){ + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num*size); + } + return ptr; +} + +void *traced_realloc(void* old_ptr, size_t size){ + void * ptr = realloc(old_ptr, size); + if(ptr!=NULL){ + if(old_ptr != ptr){ + PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)old_ptr); + } + PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size); + } + return ptr; +} + +void traced_free(void* ptr){ + if(ptr!=NULL){ + PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr); + } + free(ptr); +} + + +#define KHASH_MALLOC traced_malloc +#define KHASH_REALLOC traced_realloc +#define KHASH_CALLOC traced_calloc +#define KHASH_FREE traced_free +#include "khash.h" + +// Previously we were using the built in cpython hash function for doubles +// python 2.7 https://github.com/python/cpython/blob/2.7/Objects/object.c#L1021 +// python 3.5 https://github.com/python/cpython/blob/3.5/Python/pyhash.c#L85 + +// The python 3 hash function has the invariant hash(x) == hash(int(x)) == hash(decimal(x)) +// and the size of hash may be different by platform / version (long in py2, Py_ssize_t in py3). +// We don't need those invariants because types will be cast before hashing, and if Py_ssize_t +// is 64 bits the truncation causes collision issues. Given all that, we use our own +// simple hash, viewing the double bytes as an int64 and using khash's default +// hash for 64 bit integers. +// GH 13436 showed that _Py_HashDouble doesn't work well with khash +// GH 28303 showed, that the simple xoring-version isn't good enough +// See GH 36729 for evaluation of the currently used murmur2-hash version +// An interesting alternative to expensive murmur2-hash would be to change +// the probing strategy and use e.g. the probing strategy from CPython's +// implementation of dicts, which shines for smaller sizes but is more +// predisposed to superlinear running times (see GH 36729 for comparison) + + +khuint64_t PANDAS_INLINE asuint64(double key) { + khuint64_t val; + memcpy(&val, &key, sizeof(double)); + return val; +} + +khuint32_t PANDAS_INLINE asuint32(float key) { + khuint32_t val; + memcpy(&val, &key, sizeof(float)); + return val; +} + +#define ZERO_HASH 0 +#define NAN_HASH 0 + +khuint32_t PANDAS_INLINE kh_float64_hash_func(double val){ + // 0.0 and -0.0 should have the same hash: + if (val == 0.0){ + return ZERO_HASH; + } + // all nans should have the same hash: + if ( val!=val ){ + return NAN_HASH; + } + khuint64_t as_int = asuint64(val); + return murmur2_64to32(as_int); +} + +khuint32_t PANDAS_INLINE kh_float32_hash_func(float val){ + // 0.0 and -0.0 should have the same hash: + if (val == 0.0f){ + return ZERO_HASH; + } + // all nans should have the same hash: + if ( val!=val ){ + return NAN_HASH; + } + khuint32_t as_int = asuint32(val); + return murmur2_32to32(as_int); +} + +#define kh_floats_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a))) + +#define KHASH_MAP_INIT_FLOAT64(name, khval_t) \ + KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_floats_hash_equal) + +KHASH_MAP_INIT_FLOAT64(float64, size_t) + +#define KHASH_MAP_INIT_FLOAT32(name, khval_t) \ + KHASH_INIT(name, khfloat32_t, khval_t, 1, kh_float32_hash_func, kh_floats_hash_equal) + +KHASH_MAP_INIT_FLOAT32(float32, size_t) + +khint32_t PANDAS_INLINE kh_complex128_hash_func(khcomplex128_t val){ + return kh_float64_hash_func(val.real)^kh_float64_hash_func(val.imag); +} +khint32_t PANDAS_INLINE kh_complex64_hash_func(khcomplex64_t val){ + return kh_float32_hash_func(val.real)^kh_float32_hash_func(val.imag); +} + +#define kh_complex_hash_equal(a, b) \ + (kh_floats_hash_equal(a.real, b.real) && kh_floats_hash_equal(a.imag, b.imag)) + + +#define KHASH_MAP_INIT_COMPLEX64(name, khval_t) \ + KHASH_INIT(name, khcomplex64_t, khval_t, 1, kh_complex64_hash_func, kh_complex_hash_equal) + +KHASH_MAP_INIT_COMPLEX64(complex64, size_t) + + +#define KHASH_MAP_INIT_COMPLEX128(name, khval_t) \ + KHASH_INIT(name, khcomplex128_t, khval_t, 1, kh_complex128_hash_func, kh_complex_hash_equal) + +KHASH_MAP_INIT_COMPLEX128(complex128, size_t) + + +#define kh_exist_complex64(h, k) (kh_exist(h, k)) +#define kh_exist_complex128(h, k) (kh_exist(h, k)) + + +// NaN-floats should be in the same equivalency class, see GH 22119 +int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){ + return ( + Py_IS_NAN(PyFloat_AS_DOUBLE(a)) && + Py_IS_NAN(PyFloat_AS_DOUBLE(b)) + ) + || + ( PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b) ); +} + + +// NaNs should be in the same equivalency class, see GH 41836 +// PyObject_RichCompareBool for complexobjects has a different behavior +// needs to be replaced +int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){ + return ( + Py_IS_NAN(a->cval.real) && + Py_IS_NAN(b->cval.real) && + Py_IS_NAN(a->cval.imag) && + Py_IS_NAN(b->cval.imag) + ) + || + ( + Py_IS_NAN(a->cval.real) && + Py_IS_NAN(b->cval.real) && + a->cval.imag == b->cval.imag + ) + || + ( + a->cval.real == b->cval.real && + Py_IS_NAN(a->cval.imag) && + Py_IS_NAN(b->cval.imag) + ) + || + ( + a->cval.real == b->cval.real && + a->cval.imag == b->cval.imag + ); +} + +int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b); + + +// replacing PyObject_RichCompareBool (NaN!=NaN) with pyobject_cmp (NaN==NaN), +// which treats NaNs as equivalent +// see GH 41836 +int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){ + Py_ssize_t i; + + if (Py_SIZE(a) != Py_SIZE(b)) { + return 0; + } + + for (i = 0; i < Py_SIZE(a); ++i) { + if (!pyobject_cmp(PyTuple_GET_ITEM(a, i), PyTuple_GET_ITEM(b, i))) { + return 0; + } + } + return 1; +} + + +int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) { + if (a == b) { + return 1; + } + if (Py_TYPE(a) == Py_TYPE(b)) { + // special handling for some built-in types which could have NaNs + // as we would like to have them equivalent, but the usual + // PyObject_RichCompareBool would return False + if (PyFloat_CheckExact(a)) { + return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b); + } + if (PyComplex_CheckExact(a)) { + return complexobject_cmp((PyComplexObject*)a, (PyComplexObject*)b); + } + if (PyTuple_CheckExact(a)) { + return tupleobject_cmp((PyTupleObject*)a, (PyTupleObject*)b); + } + // frozenset isn't yet supported + } + + int result = PyObject_RichCompareBool(a, b, Py_EQ); + if (result < 0) { + PyErr_Clear(); + return 0; + } + return result; +} + + +Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val) { + //Since Python3.10, nan is no longer has hash 0 + if (Py_IS_NAN(val)) { + return 0; + } +#if PY_VERSION_HEX < 0x030A0000 + return _Py_HashDouble(val); +#else + return _Py_HashDouble(NULL, val); +#endif +} + + +Py_hash_t PANDAS_INLINE floatobject_hash(PyFloatObject* key) { + return _Pandas_HashDouble(PyFloat_AS_DOUBLE(key)); +} + + +#define _PandasHASH_IMAG 1000003UL + +// replaces _Py_HashDouble with _Pandas_HashDouble +Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject* key) { + Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real); + Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag); + if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) { + return -1; + } + Py_uhash_t combined = realhash + _PandasHASH_IMAG * imaghash; + if (combined == (Py_uhash_t)-1) { + return -2; + } + return (Py_hash_t)combined; +} + + +khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key); + +//we could use any hashing algorithm, this is the original CPython's for tuples + +#if SIZEOF_PY_UHASH_T > 4 +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL) +#define _PandasHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */ +#else +#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL) +#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL) +#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL) +#define _PandasHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ +#endif + +Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) { + Py_ssize_t i, len = Py_SIZE(key); + PyObject **item = key->ob_item; + + Py_uhash_t acc = _PandasHASH_XXPRIME_5; + for (i = 0; i < len; i++) { + Py_uhash_t lane = kh_python_hash_func(item[i]); + if (lane == (Py_uhash_t)-1) { + return -1; + } + acc += lane * _PandasHASH_XXPRIME_2; + acc = _PandasHASH_XXROTATE(acc); + acc *= _PandasHASH_XXPRIME_1; + } + + /* Add input length, mangled to keep the historical value of hash(()). */ + acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL); + + if (acc == (Py_uhash_t)-1) { + return 1546275796; + } + return acc; +} + + +khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) { + Py_hash_t hash; + // For PyObject_Hash holds: + // hash(0.0) == 0 == hash(-0.0) + // yet for different nan-objects different hash-values + // are possible + if (PyFloat_CheckExact(key)) { + // we cannot use kh_float64_hash_func + // because float(k) == k holds for any int-object k + // and kh_float64_hash_func doesn't respect it + hash = floatobject_hash((PyFloatObject*)key); + } + else if (PyComplex_CheckExact(key)) { + // we cannot use kh_complex128_hash_func + // because complex(k,0) == k holds for any int-object k + // and kh_complex128_hash_func doesn't respect it + hash = complexobject_hash((PyComplexObject*)key); + } + else if (PyTuple_CheckExact(key)) { + hash = tupleobject_hash((PyTupleObject*)key); + } + else { + hash = PyObject_Hash(key); + } + + if (hash == -1) { + PyErr_Clear(); + return 0; + } + #if SIZEOF_PY_HASH_T == 4 + // it is already 32bit value + return hash; + #else + // for 64bit builds, + // we need information of the upper 32bits as well + // see GH 37615 + khuint64_t as_uint = (khuint64_t) hash; + // uints avoid undefined behavior of signed ints + return (as_uint>>32)^as_uint; + #endif +} + + +#define kh_python_hash_equal(a, b) (pyobject_cmp(a, b)) + + +// Python object + +typedef PyObject* kh_pyobject_t; + +#define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \ + KHASH_INIT(name, kh_pyobject_t, khval_t, 1, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t) + +#define KHASH_SET_INIT_PYOBJECT(name) \ + KHASH_INIT(name, kh_pyobject_t, char, 0, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_SET_INIT_PYOBJECT(pyset) + +#define kh_exist_pymap(h, k) (kh_exist(h, k)) +#define kh_exist_pyset(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(strbox, kh_pyobject_t) + +typedef struct { + kh_str_t *table; + int starts[256]; +} kh_str_starts_t; + +typedef kh_str_starts_t* p_kh_str_starts_t; + +p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) { + kh_str_starts_t *result = (kh_str_starts_t*)KHASH_CALLOC(1, sizeof(kh_str_starts_t)); + result->table = kh_init_str(); + return result; +} + +khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) { + khuint_t result = kh_put_str(table->table, key, ret); + if (*ret != 0) { + table->starts[(unsigned char)key[0]] = 1; + } + return result; +} + +khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { + unsigned char ch = *key; + if (table->starts[ch]) { + if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1; + } + return 0; +} + +void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) { + kh_destroy_str(table->table); + KHASH_FREE(table); +} + +void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khuint_t val) { + kh_resize_str(table->table, val); +} + +// utility function: given the number of elements +// returns number of necessary buckets +khuint_t PANDAS_INLINE kh_needed_n_buckets(khuint_t n_elements){ + khuint_t candidate = n_elements; + kroundup32(candidate); + khuint_t upper_bound = (khuint_t)(candidate * __ac_HASH_UPPER + 0.5); + return (upper_bound < n_elements) ? 2*candidate : candidate; + +} diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h new file mode 100644 index 00000000..d161c4e2 --- /dev/null +++ b/pandas/_libs/src/parse_helper.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ +#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ + +#include +#include "parser/tokenizer.h" + +int to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + /* Switch to precise xstrtod GH 31364 */ + *p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, + &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +int floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + +#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c new file mode 100644 index 00000000..2ed0cef3 --- /dev/null +++ b/pandas/_libs/src/parser/io.c @@ -0,0 +1,107 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "io.h" + +/* + On-disk FILE, uncompressed +*/ + +void *new_rd_source(PyObject *obj) { + rd_source *rds = (rd_source *)malloc(sizeof(rd_source)); + + if (rds == NULL) { + PyErr_NoMemory(); + return NULL; + } + /* hold on to this object */ + Py_INCREF(obj); + rds->obj = obj; + rds->buffer = NULL; + rds->position = 0; + + return (void *)rds; +} + +/* + + Cleanup callbacks + + */ + +int del_rd_source(void *rds) { + Py_XDECREF(RDS(rds)->obj); + Py_XDECREF(RDS(rds)->buffer); + free(rds); + + return 0; +} + +/* + + IO callbacks + + */ + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors) { + PyGILState_STATE state; + PyObject *result, *func, *args, *tmp; + + void *retval; + + size_t length; + rd_source *src = RDS(source); + state = PyGILState_Ensure(); + + /* delete old object */ + Py_XDECREF(src->buffer); + src->buffer = NULL; + args = Py_BuildValue("(i)", nbytes); + + func = PyObject_GetAttrString(src->obj, "read"); + + /* TODO: does this release the GIL? */ + result = PyObject_CallObject(func, args); + Py_XDECREF(args); + Py_XDECREF(func); + + if (result == NULL) { + PyGILState_Release(state); + *bytes_read = 0; + *status = CALLING_READ_FAILED; + return NULL; + } else if (!PyBytes_Check(result)) { + tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors); + Py_DECREF(result); + if (tmp == NULL) { + PyGILState_Release(state); + return NULL; + } + result = tmp; + } + + length = PySequence_Length(result); + + if (length == 0) + *status = REACHED_EOF; + else + *status = 0; + + /* hang on to the Python object */ + src->buffer = result; + retval = (void *)PyBytes_AsString(result); + + PyGILState_Release(state); + + /* TODO: more error handling */ + *bytes_read = length; + + return retval; +} diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h new file mode 100644 index 00000000..f0e8b018 --- /dev/null +++ b/pandas/_libs/src/parser/io.h @@ -0,0 +1,34 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ +#define PANDAS__LIBS_SRC_PARSER_IO_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include "tokenizer.h" + +#define FS(source) ((file_source *)source) + +typedef struct _rd_source { + PyObject *obj; + PyObject *buffer; + size_t position; +} rd_source; + +#define RDS(source) ((rd_source *)source) + +void *new_rd_source(PyObject *obj); + +int del_rd_source(void *src); + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors); + +#endif // PANDAS__LIBS_SRC_PARSER_IO_H_ diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c new file mode 100644 index 00000000..c337c3ea --- /dev/null +++ b/pandas/_libs/src/parser/tokenizer.c @@ -0,0 +1,2085 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +/* + +Low-level ascii-file processing for pandas. Combines some elements from +Python's built-in csv module and Warren Weckesser's textreader project on +GitHub. See Python Software Foundation License and BSD licenses for these. + +*/ + +#include "tokenizer.h" + +#include +#include +#include + +#include "../headers/portable.h" + +void coliter_setup(coliter_t *self, parser_t *parser, int64_t i, + int64_t start) { + // column i, starting at 0 + self->words = parser->words; + self->col = i; + self->line_start = parser->line_start + start; +} + +static void free_if_not_null(void **ptr) { + TRACE(("free_if_not_null %p\n", *ptr)) + if (*ptr != NULL) { + free(*ptr); + *ptr = NULL; + } +} + +/* + + Parser / tokenizer + +*/ + +static void *grow_buffer(void *buffer, uint64_t length, uint64_t *capacity, + int64_t space, int64_t elsize, int *error) { + uint64_t cap = *capacity; + void *newbuffer = buffer; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + while ((length + space >= cap) && (newbuffer != NULL)) { + cap = cap ? cap << 1 : 2; + buffer = newbuffer; + newbuffer = realloc(newbuffer, elsize * cap); + } + + if (newbuffer == NULL) { + // realloc failed so don't change *capacity, set *error to errno + // and return the last good realloc'd buffer so it can be freed + *error = errno; + newbuffer = buffer; + } else { + // realloc worked, update *capacity and set *error to 0 + // sigh, multiple return values + *capacity = cap; + *error = 0; + } + return newbuffer; +} + +void parser_set_default_options(parser_t *self) { + self->decimal = '.'; + self->sci = 'E'; + + // For tokenization + self->state = START_RECORD; + + self->delimiter = ','; // XXX + self->delim_whitespace = 0; + + self->doublequote = 0; + self->quotechar = '"'; + self->escapechar = 0; + + self->lineterminator = '\0'; /* NUL->standard logic */ + + self->skipinitialspace = 0; + self->quoting = QUOTE_MINIMAL; + self->allow_embedded_newline = 1; + + self->expected_fields = -1; + self->on_bad_lines = ERROR; + + self->commentchar = '#'; + self->thousands = '\0'; + + self->skipset = NULL; + self->skipfunc = NULL; + self->skip_first_N_rows = -1; + self->skip_footer = 0; +} + +parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } + +int parser_clear_data_buffers(parser_t *self) { + free_if_not_null((void *)&self->stream); + free_if_not_null((void *)&self->words); + free_if_not_null((void *)&self->word_starts); + free_if_not_null((void *)&self->line_start); + free_if_not_null((void *)&self->line_fields); + return 0; +} + +int parser_cleanup(parser_t *self) { + int status = 0; + + // XXX where to put this + free_if_not_null((void *)&self->error_msg); + free_if_not_null((void *)&self->warn_msg); + + if (self->skipset != NULL) { + kh_destroy_int64((kh_int64_t *)self->skipset); + self->skipset = NULL; + } + + if (parser_clear_data_buffers(self) < 0) { + status = -1; + } + + if (self->cb_cleanup != NULL) { + if (self->cb_cleanup(self->source) < 0) { + status = -1; + } + self->cb_cleanup = NULL; + } + + return status; +} + +int parser_init(parser_t *self) { + int64_t sz; + + /* + Initialize data buffers + */ + + self->stream = NULL; + self->words = NULL; + self->word_starts = NULL; + self->line_start = NULL; + self->line_fields = NULL; + self->error_msg = NULL; + self->warn_msg = NULL; + + // token stream + self->stream = malloc(STREAM_INIT_SIZE * sizeof(char)); + if (self->stream == NULL) { + parser_cleanup(self); + return PARSER_OUT_OF_MEMORY; + } + self->stream_cap = STREAM_INIT_SIZE; + self->stream_len = 0; + + // word pointers and metadata + sz = STREAM_INIT_SIZE / 10; + sz = sz ? sz : 1; + self->words = malloc(sz * sizeof(char *)); + self->word_starts = malloc(sz * sizeof(int64_t)); + self->max_words_cap = sz; + self->words_cap = sz; + self->words_len = 0; + + // line pointers and metadata + self->line_start = malloc(sz * sizeof(int64_t)); + + self->line_fields = malloc(sz * sizeof(int64_t)); + + self->lines_cap = sz; + self->lines = 0; + self->file_lines = 0; + + if (self->stream == NULL || self->words == NULL || + self->word_starts == NULL || self->line_start == NULL || + self->line_fields == NULL) { + parser_cleanup(self); + + return PARSER_OUT_OF_MEMORY; + } + + /* amount of bytes buffered */ + self->datalen = 0; + self->datapos = 0; + + self->line_start[0] = 0; + self->line_fields[0] = 0; + + self->pword_start = self->stream; + self->word_start = 0; + + self->state = START_RECORD; + + self->error_msg = NULL; + self->warn_msg = NULL; + + self->commentchar = '\0'; + + return 0; +} + +void parser_free(parser_t *self) { + // opposite of parser_init + parser_cleanup(self); +} + +void parser_del(parser_t *self) { + free(self); +} + +static int make_stream_space(parser_t *self, size_t nbytes) { + uint64_t i, cap, length; + int status; + void *orig_ptr, *newptr; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + + /* + TOKEN STREAM + */ + + orig_ptr = (void *)self->stream; + TRACE( + ("\n\nmake_stream_space: nbytes = %zu. grow_buffer(self->stream...)\n", + nbytes)) + self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len, + &self->stream_cap, nbytes * 2, + sizeof(char), &status); + TRACE( + ("make_stream_space: self->stream=%p, self->stream_len = %zu, " + "self->stream_cap=%zu, status=%zu\n", + self->stream, self->stream_len, self->stream_cap, status)) + + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc sets errno when moving buffer? + if (self->stream != orig_ptr) { + self->pword_start = self->stream + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = self->stream + self->word_starts[i]; + } + } + + /* + WORD VECTORS + */ + + cap = self->words_cap; + + /** + * If we are reading in chunks, we need to be aware of the maximum number + * of words we have seen in previous chunks (self->max_words_cap), so + * that way, we can properly allocate when reading subsequent ones. + * + * Otherwise, we risk a buffer overflow if we mistakenly under-allocate + * just because a recent chunk did not have as many words. + */ + if (self->words_len + nbytes < self->max_words_cap) { + length = self->max_words_cap - nbytes - 1; + } else { + length = self->words_len; + } + + self->words = + (char **)grow_buffer((void *)self->words, length, + &self->words_cap, nbytes, + sizeof(char *), &status); + TRACE( + ("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, " + "%d)\n", + self->words_len, self->words_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->words_cap) { + TRACE( + ("make_stream_space: cap != self->words_cap, nbytes = %d, " + "self->words_cap=%d\n", + nbytes, self->words_cap)) + newptr = realloc((void *)self->word_starts, + sizeof(int64_t) * self->words_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->word_starts = (int64_t *)newptr; + } + } + + /* + LINE VECTORS + */ + cap = self->lines_cap; + self->line_start = + (int64_t *)grow_buffer((void *)self->line_start, self->lines + 1, + &self->lines_cap, nbytes, + sizeof(int64_t), &status); + TRACE(( + "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", + self->lines + 1, self->lines_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->lines_cap) { + TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n", + nbytes)) + newptr = realloc((void *)self->line_fields, + sizeof(int64_t) * self->lines_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = (int64_t *)newptr; + } + } + + return 0; +} + +static int push_char(parser_t *self, char c) { + TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n", + self->stream_len + 1, c, self->stream_cap)) + if (self->stream_len >= self->stream_cap) { + TRACE( + ("push_char: ERROR!!! self->stream_len(%d) >= " + "self->stream_cap(%d)\n", + self->stream_len, self->stream_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->stream[self->stream_len++] = c; + return 0; +} + +int PANDAS_INLINE end_field(parser_t *self) { + // XXX cruft + if (self->words_len >= self->words_cap) { + TRACE( + ("end_field: ERROR!!! self->words_len(%zu) >= " + "self->words_cap(%zu)\n", + self->words_len, self->words_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + + // null terminate token + push_char(self, '\0'); + + // set pointer and metadata + self->words[self->words_len] = self->pword_start; + + TRACE(("end_field: Char diff: %d\n", self->pword_start - self->words[0])); + + TRACE(("end_field: Saw word %s at: %d. Total: %d\n", self->pword_start, + self->word_start, self->words_len + 1)) + + self->word_starts[self->words_len] = self->word_start; + self->words_len++; + + // increment line field count + self->line_fields[self->lines]++; + + // New field begin in stream + self->pword_start = self->stream + self->stream_len; + self->word_start = self->stream_len; + + return 0; +} + +static void append_warning(parser_t *self, const char *msg) { + int64_t ex_length; + int64_t length = strlen(msg); + void *newptr; + + if (self->warn_msg == NULL) { + self->warn_msg = malloc(length + 1); + snprintf(self->warn_msg, length + 1, "%s", msg); + } else { + ex_length = strlen(self->warn_msg); + newptr = realloc(self->warn_msg, ex_length + length + 1); + if (newptr != NULL) { + self->warn_msg = (char *)newptr; + snprintf(self->warn_msg + ex_length, length + 1, "%s", msg); + } + } +} + +static int end_line(parser_t *self) { + char *msg; + int64_t fields; + int64_t ex_fields = self->expected_fields; + int64_t bufsize = 100; // for error or warning messages + + fields = self->line_fields[self->lines]; + + TRACE(("end_line: Line end, nfields: %d\n", fields)); + + TRACE(("end_line: lines: %d\n", self->lines)); + if (self->lines > 0) { + if (self->expected_fields >= 0) { + ex_fields = self->expected_fields; + } else { + ex_fields = self->line_fields[self->lines - 1]; + } + } + TRACE(("end_line: ex_fields: %d\n", ex_fields)); + + if (self->state == START_FIELD_IN_SKIP_LINE || + self->state == IN_FIELD_IN_SKIP_LINE || + self->state == IN_QUOTED_FIELD_IN_SKIP_LINE || + self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE) { + TRACE(("end_line: Skipping row %d\n", self->file_lines)); + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + return 0; + } + + if (!(self->lines <= self->header_end + 1) && + (fields > ex_fields) && !(self->usecols)) { + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + + // file_lines is now the actual file line number (starting at 1) + if (self->on_bad_lines == ERROR) { + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Expected %" PRId64 " fields in line %" PRIu64 ", saw %" + PRId64 "\n", ex_fields, self->file_lines, fields); + + TRACE(("Error at line %d, %d fields\n", self->file_lines, fields)); + + return -1; + } else { + // simply skip bad lines + if (self->on_bad_lines == WARN) { + // pass up error message + msg = malloc(bufsize); + snprintf(msg, bufsize, + "Skipping line %" PRIu64 ": expected %" PRId64 + " fields, saw %" PRId64 "\n", + self->file_lines, ex_fields, fields); + append_warning(self, msg); + free(msg); + } + } + } else { + // missing trailing delimiters + if ((self->lines >= self->header_end + 1) && + fields < ex_fields) { + // might overrun the buffer when closing fields + if (make_stream_space(self, ex_fields - fields) < 0) { + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + while (fields < ex_fields) { + end_field(self); + fields++; + } + } + + // increment both line counts + self->file_lines++; + self->lines++; + + // good line, set new start point + if (self->lines >= self->lines_cap) { + TRACE(( + "end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", + self->lines, self->lines_cap)) + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - " + "possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->line_start[self->lines] = + (self->line_start[self->lines - 1] + fields); + + TRACE( + ("end_line: new line start: %d\n", self->line_start[self->lines])); + + // new line start with 0 fields + self->line_fields[self->lines] = 0; + } + + TRACE(("end_line: Finished line, at %d\n", self->lines)); + + return 0; +} + +int parser_add_skiprow(parser_t *self, int64_t row) { + khiter_t k; + kh_int64_t *set; + int ret = 0; + + if (self->skipset == NULL) { + self->skipset = (void *)kh_init_int64(); + } + + set = (kh_int64_t *)self->skipset; + + k = kh_put_int64(set, row, &ret); + set->keys[k] = row; + + return 0; +} + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { + // self->file_lines is zero based so subtract 1 from nrows + if (nrows > 0) { + self->skip_first_N_rows = nrows - 1; + } + + return 0; +} + +static int parser_buffer_bytes(parser_t *self, size_t nbytes, + const char *encoding_errors) { + int status; + size_t bytes_read; + + status = 0; + self->datapos = 0; + self->data = self->cb_io(self->source, nbytes, &bytes_read, &status, + encoding_errors); + TRACE(( + "parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n", + nbytes, bytes_read, status)); + self->datalen = bytes_read; + + if (status != REACHED_EOF && self->data == NULL) { + int64_t bufsize = 200; + self->error_msg = malloc(bufsize); + + if (status == CALLING_READ_FAILED) { + snprintf(self->error_msg, bufsize, + "Calling read(nbytes) on source failed. " + "Try engine='python'."); + } else { + snprintf(self->error_msg, bufsize, "Unknown error in IO callback"); + } + return -1; + } + + TRACE(("datalen: %d\n", self->datalen)); + + return status; +} + +/* + + Tokenization macros and state machine code + +*/ + +#define PUSH_CHAR(c) \ + TRACE( \ + ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \ + c, slen, self->stream_cap, self->stream_len)) \ + if (slen >= self->stream_cap) { \ + TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \ + self->stream_cap)) \ + int64_t bufsize = 100; \ + self->error_msg = malloc(bufsize); \ + snprintf(self->error_msg, bufsize, \ + "Buffer overflow caught - possible malformed input file.\n");\ + return PARSER_OUT_OF_MEMORY; \ + } \ + *stream++ = c; \ + slen++; + +// This is a little bit of a hack but works for now + +#define END_FIELD() \ + self->stream_len = slen; \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; + +#define END_LINE_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE_AND_FIELD_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE() END_LINE_STATE(START_RECORD) + +#define IS_TERMINATOR(c) \ + (c == lineterminator) + +#define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE)) + +// don't parse '\r' with a custom line terminator +#define IS_CARRIAGE(c) (c == carriage_symbol) + +#define IS_COMMENT_CHAR(c) (c == comment_symbol) + +#define IS_ESCAPE_CHAR(c) (c == escape_symbol) + +#define IS_SKIPPABLE_SPACE(c) \ + ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) + +// applied when in a field +#define IS_DELIMITER(c) \ + ((!self->delim_whitespace && c == self->delimiter) || \ + (self->delim_whitespace && isblank(c))) + +#define _TOKEN_CLEANUP() \ + self->stream_len = slen; \ + self->datapos = i; \ + TRACE(("_TOKEN_CLEANUP: datapos: %d, datalen: %d\n", self->datapos, \ + self->datalen)); + +#define CHECK_FOR_BOM() \ + if (*buf == '\xef' && *(buf + 1) == '\xbb' && *(buf + 2) == '\xbf') { \ + buf += 3; \ + self->datapos += 3; \ + } + +int skip_this_line(parser_t *self, int64_t rownum) { + int should_skip; + PyObject *result; + PyGILState_STATE state; + + if (self->skipfunc != NULL) { + state = PyGILState_Ensure(); + result = PyObject_CallFunction(self->skipfunc, "i", rownum); + + // Error occurred. It will be processed + // and caught at the Cython level. + if (result == NULL) { + should_skip = -1; + } else { + should_skip = PyObject_IsTrue(result); + } + + Py_XDECREF(result); + PyGILState_Release(state); + + return should_skip; + } else if (self->skipset != NULL) { + return (kh_get_int64((kh_int64_t *)self->skipset, self->file_lines) != + ((kh_int64_t *)self->skipset)->n_buckets); + } else { + return (rownum <= self->skip_first_N_rows); + } +} + +int tokenize_bytes(parser_t *self, + size_t line_limit, uint64_t start_lines) { + int64_t i; + uint64_t slen; + int should_skip; + char c; + char *stream; + char *buf = self->data + self->datapos; + + const char lineterminator = (self->lineterminator == '\0') ? + '\n' : self->lineterminator; + + // 1000 is something that couldn't fit in "char" + // thus comparing a char to it would always be "false" + const int carriage_symbol = (self->lineterminator == '\0') ? '\r' : 1000; + const int comment_symbol = (self->commentchar != '\0') ? + self->commentchar : 1000; + const int escape_symbol = (self->escapechar != '\0') ? + self->escapechar : 1000; + + if (make_stream_space(self, self->datalen - self->datapos) < 0) { + int64_t bufsize = 100; + self->error_msg = malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + + TRACE(("%s\n", buf)); + + if (self->file_lines == 0) { + CHECK_FOR_BOM(); + } + + for (i = self->datapos; i < self->datalen; ++i) { + // next character in file + c = *buf++; + + TRACE( + ("tokenize_bytes - Iter: %d Char: 0x%x Line %d field_count %d, " + "state %d\n", + i, c, self->file_lines + 1, self->line_fields[self->lines], + self->state)); + + switch (self->state) { + case START_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_DELIMITER(c)) { + // Do nothing, we're starting a new field again. + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case IN_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } + break; + + case IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + if (self->doublequote) { + self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + } + break; + + case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case WHITESPACE_LINE: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + break; + } else if (!self->delim_whitespace) { + if (isblank(c) && c != self->delimiter) { + } else { // backtrack + // use i + 1 because buf has been incremented but not i + do { + --buf; + --i; + } while (i + 1 > self->datapos && !IS_TERMINATOR(*buf)); + + // reached a newline rather than the beginning + if (IS_TERMINATOR(*buf)) { + ++buf; // move pointer to first char after newline + ++i; + } + self->state = START_FIELD; + } + break; + } + // fall through + + case EAT_WHITESPACE: + if (IS_TERMINATOR(c)) { + END_LINE(); + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_COMMENT; + break; + } else if (!isblank(c)) { + self->state = START_FIELD; + // fall through to subsequent state + } else { + // if whitespace char, keep slurping + break; + } + + case START_RECORD: + // start of record + should_skip = skip_this_line(self, self->file_lines); + + if (should_skip == -1) { + goto parsingerror; + } else if (should_skip) { + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + + if (IS_TERMINATOR(c)) { + END_LINE(); + } + } + break; + } else if (IS_TERMINATOR(c)) { + // \n\r possible? + if (self->skip_empty_lines) { + self->file_lines++; + } else { + END_LINE(); + } + break; + } else if (IS_CARRIAGE(c)) { + if (self->skip_empty_lines) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else { + self->state = EAT_CRNL; + } + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_LINE_COMMENT; + break; + } else if (isblank(c)) { + if (self->delim_whitespace) { + if (self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + } else { + self->state = EAT_WHITESPACE; + } + break; + } else if (c != self->delimiter && self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + break; + } + // fall through + } + + // normal character - fall through + // to handle as START_FIELD + self->state = START_FIELD; + + case START_FIELD: + // expecting field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_QUOTE(c)) { + // start quoted field + self->state = IN_QUOTED_FIELD; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_SKIPPABLE_SPACE(c)) { + // ignore space at start of field + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + // save empty field + END_FIELD(); + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // begin new unquoted field + PUSH_CHAR(c); + self->state = IN_FIELD; + } + break; + + case ESCAPED_CHAR: + PUSH_CHAR(c); + self->state = IN_FIELD; + break; + + case EAT_LINE_COMMENT: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } + break; + + case IN_FIELD: + // in unquoted field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case IN_QUOTED_FIELD: + // in quoted field + if (IS_ESCAPE_CHAR(c)) { + // possible escape character + self->state = ESCAPE_IN_QUOTED_FIELD; + } else if (IS_QUOTE(c)) { + if (self->doublequote) { + // double quote - " represented by "" + self->state = QUOTE_IN_QUOTED_FIELD; + } else { + // end of quote part of field + self->state = IN_FIELD; + } + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case ESCAPE_IN_QUOTED_FIELD: + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + break; + + case QUOTE_IN_QUOTED_FIELD: + // double quote - seen a quote in an quoted field + if (IS_QUOTE(c)) { + // save "" as " + + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else { + PUSH_CHAR(c); + self->state = IN_FIELD; + } + break; + + case EAT_COMMENT: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL: + if (c == '\n') { + END_LINE(); + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + END_LINE_STATE(EAT_WHITESPACE); + } else { + // Handle \r-delimited files + END_LINE_AND_FIELD_STATE(START_FIELD); + } + } else { + if (self->delim_whitespace) { + /* XXX + * first character of a new record--need to back up and + * reread + * to handle properly... + */ + i--; + buf--; // back up one character (HACK!) + END_LINE_STATE(START_RECORD); + } else { + // \r line terminator + // UGH. we don't actually want + // to consume the token. fix this later + self->stream_len = slen; + if (end_line(self) < 0) { + goto parsingerror; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + self->state = START_RECORD; + + --i; + buf--; // let's try this character again (HACK!) + if (line_limit > 0 && + self->lines == start_lines + line_limit) { + goto linelimit; + } + } + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL_NOP: // inside an ignored comment line + self->state = START_RECORD; + // \r line terminator -- parse this character again + if (c != '\n' && !IS_DELIMITER(c)) { + --i; + --buf; + } + break; + default: + break; + } + } + + _TOKEN_CLEANUP(); + + TRACE(("Finished tokenizing input\n")) + + return 0; + +parsingerror: + i++; + _TOKEN_CLEANUP(); + + return -1; + +linelimit: + i++; + _TOKEN_CLEANUP(); + + return 0; +} + +static int parser_handle_eof(parser_t *self) { + int64_t bufsize = 100; + + TRACE( + ("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state)) + + if (self->datalen != 0) return -1; + + switch (self->state) { + case START_RECORD: + case WHITESPACE_LINE: + case EAT_CRNL_NOP: + case EAT_LINE_COMMENT: + return 0; + + case ESCAPE_IN_QUOTED_FIELD: + case IN_QUOTED_FIELD: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF inside string starting at row %" PRIu64, + self->file_lines); + return -1; + + case ESCAPED_CHAR: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF following escape character"); + return -1; + + case IN_FIELD: + case START_FIELD: + case QUOTE_IN_QUOTED_FIELD: + if (end_field(self) < 0) return -1; + break; + + default: + break; + } + + if (end_line(self) < 0) + return -1; + else + return 0; +} + +int parser_consume_rows(parser_t *self, size_t nrows) { + int64_t offset, word_deletions; + uint64_t char_count, i; + + if (nrows > self->lines) { + nrows = self->lines; + } + + /* do nothing */ + if (nrows == 0) return 0; + + /* cannot guarantee that nrows + 1 has been observed */ + word_deletions = self->line_start[nrows - 1] + self->line_fields[nrows - 1]; + if (word_deletions >= 1) { + char_count = (self->word_starts[word_deletions - 1] + + strlen(self->words[word_deletions - 1]) + 1); + } else { + /* if word_deletions == 0 (i.e. this case) then char_count must + * be 0 too, as no data needs to be skipped */ + char_count = 0; + } + + TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions, + char_count)); + + /* move stream, only if something to move */ + if (char_count < self->stream_len) { + memmove(self->stream, (self->stream + char_count), + self->stream_len - char_count); + } + /* buffer counts */ + self->stream_len -= char_count; + + /* move token metadata */ + // Note: We should always have words_len < word_deletions, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->words_len - word_deletions; ++i) { + offset = i + word_deletions; + + self->words[i] = self->words[offset] - char_count; + self->word_starts[i] = self->word_starts[offset] - char_count; + } + self->words_len -= word_deletions; + + /* move current word pointer to stream */ + self->pword_start -= char_count; + self->word_start -= char_count; + + /* move line metadata */ + // Note: We should always have self->lines - nrows + 1 >= 0, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->lines - nrows + 1; ++i) { + offset = i + nrows; + self->line_start[i] = self->line_start[offset] - word_deletions; + self->line_fields[i] = self->line_fields[offset]; + } + self->lines -= nrows; + + return 0; +} + +static size_t _next_pow2(size_t sz) { + size_t result = 1; + while (result < sz) result *= 2; + return result; +} + +int parser_trim_buffers(parser_t *self) { + /* + Free memory + */ + size_t new_cap; + void *newptr; + + uint64_t i; + + /** + * Before we free up space and trim, we should + * save how many words we saw when parsing, if + * it exceeds the maximum number we saw before. + * + * This is important for when we read in chunks, + * so that we can inform subsequent chunk parsing + * as to how many words we could possibly see. + */ + if (self->words_cap > self->max_words_cap) { + self->max_words_cap = self->words_cap; + } + + /* trim words, word_starts */ + new_cap = _next_pow2(self->words_len) + 1; + if (new_cap < self->words_cap) { + TRACE(("parser_trim_buffers: new_cap < self->words_cap\n")); + self->words = realloc(self->words, new_cap * sizeof(char *)); + if (self->words == NULL) { + return PARSER_OUT_OF_MEMORY; + } + self->word_starts = realloc(self->word_starts, + new_cap * sizeof(int64_t)); + if (self->word_starts == NULL) { + return PARSER_OUT_OF_MEMORY; + } + self->words_cap = new_cap; + } + + /* trim stream */ + new_cap = _next_pow2(self->stream_len) + 1; + TRACE( + ("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = " + "%zu\n", + new_cap, self->stream_cap, self->lines_cap)); + if (new_cap < self->stream_cap) { + TRACE( + ("parser_trim_buffers: new_cap < self->stream_cap, calling " + "realloc\n")); + newptr = realloc(self->stream, new_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + // Update the pointers in the self->words array (char **) if + // `realloc` + // moved the `self->stream` buffer. This block mirrors a similar + // block in + // `make_stream_space`. + if (self->stream != newptr) { + self->pword_start = (char *)newptr + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = (char *)newptr + self->word_starts[i]; + } + } + + self->stream = newptr; + self->stream_cap = new_cap; + } + } + + /* trim line_start, line_fields */ + new_cap = _next_pow2(self->lines) + 1; + if (new_cap < self->lines_cap) { + TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n")); + newptr = realloc(self->line_start, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_start = newptr; + } + newptr = realloc(self->line_fields, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = newptr; + self->lines_cap = new_cap; + } + } + + return 0; +} + +/* + nrows : number of rows to tokenize (or until reach EOF) + all : tokenize all the data vs. certain number of rows + */ + +int _tokenize_helper(parser_t *self, size_t nrows, int all, + const char *encoding_errors) { + int status = 0; + uint64_t start_lines = self->lines; + + if (self->state == FINISHED) { + return 0; + } + + TRACE(( + "_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n", + nrows, self->datapos, self->datalen)); + + while (1) { + if (!all && self->lines - start_lines >= nrows) break; + + if (self->datapos == self->datalen) { + status = parser_buffer_bytes(self, self->chunksize, + encoding_errors); + + if (status == REACHED_EOF) { + // close out last line + status = parser_handle_eof(self); + self->state = FINISHED; + break; + } else if (status != 0) { + return status; + } + } + + TRACE( + ("_tokenize_helper: Trying to process %d bytes, datalen=%d, " + "datapos= %d\n", + self->datalen - self->datapos, self->datalen, self->datapos)); + + status = tokenize_bytes(self, nrows, start_lines); + + if (status < 0) { + // XXX + TRACE( + ("_tokenize_helper: Status %d returned from tokenize_bytes, " + "breaking\n", + status)); + status = -1; + break; + } + } + TRACE(("leaving tokenize_helper\n")); + return status; +} + +int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) { + int status = _tokenize_helper(self, nrows, 0, encoding_errors); + return status; +} + +int tokenize_all_rows(parser_t *self, const char *encoding_errors) { + int status = _tokenize_helper(self, -1, 1, encoding_errors); + return status; +} + +/* + * Function: to_boolean + * -------------------- + * + * Validate if item should be recognized as a boolean field. + * + * item: const char* representing parsed text + * val : pointer to a uint8_t of boolean representation + * + * If item is determined to be boolean, this method will set + * the appropriate value of val and return 0. A non-zero exit + * status means that item was not inferred to be boolean, and + * leaves the value of *val unmodified. + */ +int to_boolean(const char *item, uint8_t *val) { + if (strcasecmp(item, "TRUE") == 0) { + *val = 1; + return 0; + } else if (strcasecmp(item, "FALSE") == 0) { + *val = 0; + return 0; + } + + return -1; +} + +// --------------------------------------------------------------------------- +// Implementation of xstrtod + +// +// strtod.c +// +// Convert string to double +// +// Copyright (C) 2002 Michael Ringgaard. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the project nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// +// ----------------------------------------------------------------------- +// Modifications by Warren Weckesser, March 2011: +// * Rename strtod() to xstrtod(). +// * Added decimal and sci arguments. +// * Skip trailing spaces. +// * Commented out the other functions. +// Modifications by Richard T Guy, August 2013: +// * Add tsep argument for thousands separator +// + +// pessimistic but quick assessment, +// assuming that each decimal digit requires 4 bits to store +const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4; + +double xstrtod(const char *str, char **endptr, char decimal, char sci, + char tsep, int skip_trailing, int *error, int *maybe_int) { + double number; + unsigned int i_number = 0; + int exponent; + int negative; + char *p = (char *)str; + double p10; + int n; + int num_digits; + int num_decimals; + + if (maybe_int != NULL) *maybe_int = 1; + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p) && num_digits <= max_int_decimal_digits) { + i_number = i_number * 10 + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + number = i_number; + + if (num_digits > max_int_decimal_digits) { + // process what's left as double + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + } + + // Process decimal part. + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign. + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits, after the 'e'/'E', un-consume it + if (num_digits == 0) p--; + } + + if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) { + *error = ERANGE; + return HUGE_VAL; + } + + // Scale the result. + p10 = 10.; + n = exponent; + if (n < 0) n = -n; + while (n) { + if (n & 1) { + if (exponent < 0) + number /= p10; + else + number *= p10; + } + n >>= 1; + p10 *= p10; + } + + if (number == HUGE_VAL) { + *error = ERANGE; + } + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +double precise_xstrtod(const char *str, char **endptr, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) { + double number; + int exponent; + int negative; + char *p = (char *)str; + int num_digits; + int num_decimals; + int max_digits = 17; + int n; + + if (maybe_int != NULL) *maybe_int = 1; + // Cache powers of 10 in memory. + static double e[] = { + 1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, + 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, + 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, + 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, + 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, + 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, + 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, + 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, + 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, + 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, + 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, + 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, + 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308}; + + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + number = 0.; + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p)) { + if (num_digits < max_digits) { + number = number * 10. + (*p - '0'); + num_digits++; + } else { + ++exponent; + } + + p++; + p += (tsep != '\0' && *p == tsep); + } + + // Process decimal part + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (num_digits < max_digits && isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + if (num_digits >= max_digits) // Consume extra decimal digits. + while (isdigit_ascii(*p)) ++p; + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (num_digits < max_digits && isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits after the 'e'/'E', un-consume it. + if (num_digits == 0) p--; + } + + if (exponent > 308) { + *error = ERANGE; + return HUGE_VAL; + } else if (exponent > 0) { + number *= e[exponent]; + } else if (exponent < -308) { // Subnormal + if (exponent < -616) { // Prevent invalid array access. + number = 0.; + } else { + number /= e[-308 - exponent]; + number /= e[308]; + } + + } else { + number /= e[-exponent]; + } + + if (number == HUGE_VAL || number == -HUGE_VAL) *error = ERANGE; + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +/* copy a decimal number string with `decimal`, `tsep` as decimal point + and thousands separator to an equivalent c-locale decimal string (striping + `tsep`, replacing `decimal` with '.'). The returned memory should be free-d + with a call to `free`. +*/ + +char* _str_copy_decimal_str_c(const char *s, char **endpos, char decimal, + char tsep) { + const char *p = s; + size_t length = strlen(s); + char *s_copy = malloc(length + 1); + char *dst = s_copy; + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + // Copy Leading sign + if (*p == '+' || *p == '-') { + *dst++ = *p++; + } + // Copy integer part dropping `tsep` + while (isdigit_ascii(*p)) { + *dst++ = *p++; + p += (tsep != '\0' && *p == tsep); + } + // Replace `decimal` with '.' + if (*p == decimal) { + *dst++ = '.'; + p++; + } + // Copy fractional part after decimal (if any) + while (isdigit_ascii(*p)) { + *dst++ = *p++; + } + // Copy exponent if any + if (toupper_ascii(*p) == toupper_ascii('E')) { + *dst++ = *p++; + // Copy leading exponent sign (if any) + if (*p == '+' || *p == '-') { + *dst++ = *p++; + } + // Copy exponent digits + while (isdigit_ascii(*p)) { + *dst++ = *p++; + } + } + *dst++ = '\0'; // terminate + if (endpos != NULL) + *endpos = (char *)p; + return s_copy; +} + + +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) { + // 'normalize' representation to C-locale; replace decimal with '.' and + // remove t(housand)sep. + char *endptr; + char *pc = _str_copy_decimal_str_c(p, &endptr, decimal, tsep); + // This is called from a nogil block in parsers.pyx + // so need to explicitly get GIL before Python calls + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + char *endpc; + double r = PyOS_string_to_double(pc, &endpc, 0); + // PyOS_string_to_double needs to consume the whole string + if (endpc == pc + strlen(pc)) { + if (q != NULL) { + // report endptr from source string (p) + *q = endptr; + } + } else { + *error = -1; + if (q != NULL) { + // p and pc are different len due to tsep removal. Can't report + // how much it has consumed of p. Just rewind to beginning. + *q = (char *)p; // TODO(willayd): this could be undefined behavior + } + } + if (maybe_int != NULL) *maybe_int = 0; + if (PyErr_Occurred() != NULL) *error = -1; + else if (r == Py_HUGE_VAL) *error = (int)Py_HUGE_VAL; + PyErr_Clear(); + + PyGILState_Release(gstate); + free(pc); + if (skip_trailing && q != NULL && *q != p) { + while (isspace_ascii(**q)) { + (*q)++; + } + } + return r; +} + +// End of xstrtod code +// --------------------------------------------------------------------------- + +void uint_state_init(uint_state *self) { + self->seen_sint = 0; + self->seen_uint = 0; + self->seen_null = 0; +} + +int uint64_conflict(uint_state *self) { + return self->seen_uint && (self->seen_sint || self->seen_null); +} + +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep) { + const char *p = p_item; + int isneg = 0; + int64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + isneg = 1; + ++p; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + if (isneg) { + // If number is greater than pre_min, at least one more digit + // can be processed without overflowing. + int dig_pre_min = -(int_min % 10); + int64_t pre_min = int_min / 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } else { + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + int64_t pre_max = int_max / 10; + int dig_pre_max = int_max % 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + *error = 0; + return number; +} + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) { + const char *p = p_item; + uint64_t pre_max = uint_max / 10; + int dig_pre_max = uint_max % 10; + uint64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + state->seen_sint = 1; + *error = 0; + return 0; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + // + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + if (number > (uint64_t)int_max) { + state->seen_uint = 1; + } + + *error = 0; + return number; +} diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h new file mode 100644 index 00000000..eea9bfd4 --- /dev/null +++ b/pandas/_libs/src/parser/tokenizer.h @@ -0,0 +1,236 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ +#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ + +#define PY_SSIZE_T_CLEAN +#include + +#define ERROR_NO_DIGITS 1 +#define ERROR_OVERFLOW 2 +#define ERROR_INVALID_CHARS 3 + +#include +#include "../inline_helper.h" +#include "../headers/portable.h" + +#include "khash.h" + +#define STREAM_INIT_SIZE 32 + +#define REACHED_EOF 1 +#define CALLING_READ_FAILED 2 + + +/* + + C flat file parsing low level code for pandas / NumPy + + */ + +/* + * Common set of error types for the read_rows() and tokenize() + * functions. + */ + +// #define VERBOSE +#if defined(VERBOSE) +#define TRACE(X) printf X; +#else +#define TRACE(X) +#endif // VERBOSE + +#define PARSER_OUT_OF_MEMORY -1 + +/* + * TODO: Might want to couple count_rows() with read_rows() to avoid + * duplication of some file I/O. + */ + +typedef enum { + START_RECORD, + START_FIELD, + ESCAPED_CHAR, + IN_FIELD, + IN_QUOTED_FIELD, + ESCAPE_IN_QUOTED_FIELD, + QUOTE_IN_QUOTED_FIELD, + EAT_CRNL, + EAT_CRNL_NOP, + EAT_WHITESPACE, + EAT_COMMENT, + EAT_LINE_COMMENT, + WHITESPACE_LINE, + START_FIELD_IN_SKIP_LINE, + IN_FIELD_IN_SKIP_LINE, + IN_QUOTED_FIELD_IN_SKIP_LINE, + QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE, + FINISHED +} ParserState; + +typedef enum { + QUOTE_MINIMAL, + QUOTE_ALL, + QUOTE_NONNUMERIC, + QUOTE_NONE +} QuoteStyle; + +typedef enum { + ERROR, + WARN, + SKIP +} BadLineHandleMethod; + +typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status, const char *encoding_errors); +typedef int (*io_cleanup)(void *src); + +typedef struct parser_t { + void *source; + io_callback cb_io; + io_cleanup cb_cleanup; + + int64_t chunksize; // Number of bytes to prepare for each chunk + char *data; // pointer to data to be processed + int64_t datalen; // amount of data available + int64_t datapos; + + // where to write out tokenized data + char *stream; + uint64_t stream_len; + uint64_t stream_cap; + + // Store words in (potentially ragged) matrix for now, hmm + char **words; + int64_t *word_starts; // where we are in the stream + uint64_t words_len; + uint64_t words_cap; + uint64_t max_words_cap; // maximum word cap encountered + + char *pword_start; // pointer to stream start of current field + int64_t word_start; // position start of current field + + int64_t *line_start; // position in words for start of line + int64_t *line_fields; // Number of fields in each line + uint64_t lines; // Number of (good) lines observed + uint64_t file_lines; // Number of lines (including bad or skipped) + uint64_t lines_cap; // Vector capacity + + // Tokenizing stuff + ParserState state; + int doublequote; /* is " represented by ""? */ + char delimiter; /* field separator */ + int delim_whitespace; /* delimit by consuming space/tabs instead */ + char quotechar; /* quote character */ + char escapechar; /* escape character */ + char lineterminator; + int skipinitialspace; /* ignore spaces following delimiter? */ + int quoting; /* style of quoting to write */ + + char commentchar; + int allow_embedded_newline; + + int usecols; // Boolean: 1: usecols provided, 0: none provided + + Py_ssize_t expected_fields; + BadLineHandleMethod on_bad_lines; + + // floating point options + char decimal; + char sci; + + // thousands separator (comma, period) + char thousands; + + int header; // Boolean: 1: has header, 0: no header + int64_t header_start; // header row start + uint64_t header_end; // header row end + + void *skipset; + PyObject *skipfunc; + int64_t skip_first_N_rows; + int64_t skip_footer; + double (*double_converter)(const char *, char **, + char, char, char, int, int *, int *); + + // error handling + char *warn_msg; + char *error_msg; + + int skip_empty_lines; +} parser_t; + +typedef struct coliter_t { + char **words; + int64_t *line_start; + int64_t col; +} coliter_t; + +void coliter_setup(coliter_t *self, parser_t *parser, int64_t i, int64_t start); + +#define COLITER_NEXT(iter, word) \ + do { \ + const int64_t i = *iter.line_start++ + iter.col; \ + word = i >= *iter.line_start ? "" : iter.words[i]; \ + } while (0) + +parser_t *parser_new(void); + +int parser_init(parser_t *self); + +int parser_consume_rows(parser_t *self, size_t nrows); + +int parser_trim_buffers(parser_t *self); + +int parser_add_skiprow(parser_t *self, int64_t row); + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); + +void parser_free(parser_t *self); + +void parser_del(parser_t *self); + +void parser_set_default_options(parser_t *self); + +int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors); + +int tokenize_all_rows(parser_t *self, const char *encoding_errors); + +// Have parsed / type-converted a chunk of data +// and want to free memory from the token stream + +typedef struct uint_state { + int seen_sint; + int seen_uint; + int seen_null; +} uint_state; + +void uint_state_init(uint_state *self); + +int uint64_conflict(uint_state *self); + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep); +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep); +double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +double precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int); + +// GH-15140 - round_trip requires and acquires the GIL on its own +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +int to_boolean(const char *item, uint8_t *val); + +#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h new file mode 100644 index 00000000..5d0b144a --- /dev/null +++ b/pandas/_libs/src/skiplist.h @@ -0,0 +1,300 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Flexibly-sized, index-able skiplist data structure for maintaining a sorted +list of values + +Port of Wes McKinney's Cython version of Raymond Hettinger's original pure +Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) +*/ + +#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_ +#define PANDAS__LIBS_SRC_SKIPLIST_H_ + +#include +#include +#include +#include +#include "inline_helper.h" + +PANDAS_INLINE float __skiplist_nanf(void) { + const union { + int __i; + float __f; + } __bint = {0x7fc00000UL}; + return __bint.__f; +} +#define PANDAS_NAN ((double)__skiplist_nanf()) + +PANDAS_INLINE double Log2(double val) { return log(val) / log(2.); } + +typedef struct node_t node_t; + +struct node_t { + node_t **next; + int *width; + double value; + int is_nil; + int levels; + int ref_count; +}; + +typedef struct { + node_t *head; + node_t **tmp_chain; + int *tmp_steps; + int size; + int maxlevels; +} skiplist_t; + +PANDAS_INLINE double urand(void) { + return ((double)rand() + 1) / ((double)RAND_MAX + 2); +} + +PANDAS_INLINE int int_min(int a, int b) { return a < b ? a : b; } + +PANDAS_INLINE node_t *node_init(double value, int levels) { + node_t *result; + result = (node_t *)malloc(sizeof(node_t)); + if (result) { + result->value = value; + result->levels = levels; + result->is_nil = 0; + result->ref_count = 0; + result->next = (node_t **)malloc(levels * sizeof(node_t *)); + result->width = (int *)malloc(levels * sizeof(int)); + if (!(result->next && result->width) && (levels != 0)) { + free(result->next); + free(result->width); + free(result); + return NULL; + } + } + return result; +} + +// do this ourselves +PANDAS_INLINE void node_incref(node_t *node) { ++(node->ref_count); } + +PANDAS_INLINE void node_decref(node_t *node) { --(node->ref_count); } + +static void node_destroy(node_t *node) { + int i; + if (node) { + if (node->ref_count <= 1) { + for (i = 0; i < node->levels; ++i) { + node_destroy(node->next[i]); + } + free(node->next); + free(node->width); + // printf("Reference count was 1, freeing\n"); + free(node); + } else { + node_decref(node); + } + // pretty sure that freeing the struct above will be enough + } +} + +PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) { + if (skp) { + node_destroy(skp->head); + free(skp->tmp_steps); + free(skp->tmp_chain); + free(skp); + } +} + +PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) { + skiplist_t *result; + node_t *NIL, *head; + int maxlevels, i; + + maxlevels = 1 + Log2((double)expected_size); + result = (skiplist_t *)malloc(sizeof(skiplist_t)); + if (!result) { + return NULL; + } + result->tmp_chain = (node_t **)malloc(maxlevels * sizeof(node_t *)); + result->tmp_steps = (int *)malloc(maxlevels * sizeof(int)); + result->maxlevels = maxlevels; + result->size = 0; + + head = result->head = node_init(PANDAS_NAN, maxlevels); + NIL = node_init(0.0, 0); + + if (!(result->tmp_chain && result->tmp_steps && result->head && NIL)) { + skiplist_destroy(result); + node_destroy(NIL); + return NULL; + } + + node_incref(head); + + NIL->is_nil = 1; + + for (i = 0; i < maxlevels; ++i) { + head->next[i] = NIL; + head->width[i] = 1; + node_incref(NIL); + } + + return result; +} + +// 1 if left < right, 0 if left == right, -1 if left > right +PANDAS_INLINE int _node_cmp(node_t *node, double value) { + if (node->is_nil || node->value > value) { + return -1; + } else if (node->value < value) { + return 1; + } else { + return 0; + } +} + +PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) { + node_t *node; + int level; + + if (i < 0 || i >= skp->size) { + *ret = 0; + return 0; + } + + node = skp->head; + ++i; + for (level = skp->maxlevels - 1; level >= 0; --level) { + while (node->width[level] <= i) { + i -= node->width[level]; + node = node->next[level]; + } + } + + *ret = 1; + return node->value; +} + +// Returns the lowest rank of all elements with value `value`, as opposed to the +// highest rank returned by `skiplist_insert`. +PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) { + node_t *node; + int level, rank = 0; + + node = skp->head; + for (level = skp->maxlevels - 1; level >= 0; --level) { + while (_node_cmp(node->next[level], value) > 0) { + rank += node->width[level]; + node = node->next[level]; + } + } + + return rank + 1; +} + +// Returns the rank of the inserted element. When there are duplicates, +// `rank` is the highest of the group, i.e. the 'max' method of +// https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html +PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) { + node_t *node, *prevnode, *newnode, *next_at_level; + int *steps_at_level; + int size, steps, level, rank = 0; + node_t **chain; + + chain = skp->tmp_chain; + + steps_at_level = skp->tmp_steps; + memset(steps_at_level, 0, skp->maxlevels * sizeof(int)); + + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) >= 0) { + steps_at_level[level] += node->width[level]; + rank += node->width[level]; + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + size = int_min(skp->maxlevels, 1 - ((int)Log2(urand()))); + + newnode = node_init(value, size); + if (!newnode) { + return -1; + } + steps = 0; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + newnode->next[level] = prevnode->next[level]; + + prevnode->next[level] = newnode; + node_incref(newnode); // increment the reference count + + newnode->width[level] = prevnode->width[level] - steps; + prevnode->width[level] = steps + 1; + + steps += steps_at_level[level]; + } + + for (level = size; level < skp->maxlevels; ++level) { + chain[level]->width[level] += 1; + } + + ++(skp->size); + + return rank + 1; +} + +PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { + int level, size; + node_t *node, *prevnode, *tmpnode, *next_at_level; + node_t **chain; + + chain = skp->tmp_chain; + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) > 0) { + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + if (value != chain[0]->next[0]->value) { + return 0; + } + + size = chain[0]->next[0]->levels; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + + tmpnode = prevnode->next[level]; + + prevnode->width[level] += tmpnode->width[level] - 1; + prevnode->next[level] = tmpnode->next[level]; + + tmpnode->next[level] = NULL; + node_destroy(tmpnode); // decrement refcount or free + } + + for (level = size; level < skp->maxlevels; ++level) { + --(chain[level]->width[level]); + } + + --(skp->size); + return 1; +} + +#endif // PANDAS__LIBS_SRC_SKIPLIST_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h new file mode 100644 index 00000000..71df0c5a --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -0,0 +1,316 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +/* +Ultra fast JSON encoder and decoder +Developed by Jonas Tarnstrom (jonas@esn.me). + +Encoder notes: +------------------ + +:: Cyclic references :: +Cyclic referenced objects are not detected. +Set JSONObjectEncoder.recursionMax to suitable value or make sure input object +tree doesn't have cyclic references. + +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ +#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ + +#include +#include + +// Don't output any extra whitespaces when encoding +#define JSON_NO_EXTRA_WHITESPACE + +// Max decimals to encode double floating point numbers with +#ifndef JSON_DOUBLE_MAX_DECIMALS +#define JSON_DOUBLE_MAX_DECIMALS 15 +#endif + +// Max recursion depth, default for encoder +#ifndef JSON_MAX_RECURSION_DEPTH +#define JSON_MAX_RECURSION_DEPTH 1024 +#endif + +// Max recursion depth, default for decoder +#ifndef JSON_MAX_OBJECT_DEPTH +#define JSON_MAX_OBJECT_DEPTH 1024 +#endif + +/* +Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */ +#ifndef JSON_MAX_STACK_BUFFER_SIZE +#define JSON_MAX_STACK_BUFFER_SIZE 131072 +#endif + +#ifdef _WIN32 + +typedef __int64 JSINT64; +typedef unsigned __int64 JSUINT64; + +typedef __int32 JSINT32; +typedef unsigned __int32 JSUINT32; +typedef unsigned __int8 JSUINT8; +typedef unsigned __int16 JSUTF16; +typedef unsigned __int32 JSUTF32; +typedef __int64 JSLONG; + +#define EXPORTFUNCTION __declspec(dllexport) + +#define FASTCALL_MSVC __fastcall + +#define INLINE_PREFIX static __inline + +#else + +#include +typedef int64_t JSINT64; +typedef uint64_t JSUINT64; + +typedef int32_t JSINT32; +typedef uint32_t JSUINT32; + +#define FASTCALL_MSVC + +#define INLINE_PREFIX static inline + +typedef uint8_t JSUINT8; +typedef uint16_t JSUTF16; +typedef uint32_t JSUTF32; + +typedef int64_t JSLONG; + +#define EXPORTFUNCTION +#endif + +#if !(defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __LITTLE_ENDIAN__ +#else + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define __BIG_ENDIAN__ +#endif + +#endif + +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#error "Endianness not supported" +#endif + +enum JSTYPES { + JT_NULL, // NULL + JT_TRUE, // boolean true + JT_FALSE, // boolean false + JT_INT, // (JSINT32 (signed 32-bit)) + JT_LONG, // (JSINT64 (signed 64-bit)) + JT_DOUBLE, // (double) + JT_BIGNUM, // integer larger than sys.maxsize + JT_UTF8, // (char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity +}; + +typedef void * JSOBJ; +typedef void * JSITER; + +typedef struct __JSONTypeContext { + int type; + void *encoder; + void *prv; +} JSONTypeContext; + +/* +Function pointer declarations, suitable for implementing UltraJSON */ +typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc); +typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc); +typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc); +typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc); +typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, + size_t *outLen); +typedef void *(*JSPFN_MALLOC)(size_t size); +typedef void (*JSPFN_FREE)(void *pptr); +typedef void *(*JSPFN_REALLOC)(void *base, size_t size); + +typedef struct __JSONObjectEncoder { + void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen); + JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); + double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen); + + /* + Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) + Implementor should setup iteration state in ti->prv + */ + JSPFN_ITERBEGIN iterBegin; + + /* + Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. + Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this + */ + JSPFN_ITERNEXT iterNext; + + /* + Ends the iteration of an iteratable object. + Any iteration state stored in ti->prv can be freed here + */ + JSPFN_ITEREND iterEnd; + + /* + Returns a reference to the value object of an iterator + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETVALUE iterGetValue; + + /* + Return name of iterator. + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETNAME iterGetName; + + /* + Release a value as indicated by setting ti->release = 1 in the previous getValue call. + The ti->prv array should contain the necessary context to release the value + */ + void (*releaseObject)(JSOBJ obj); + + /* Library functions + Set to NULL to use STDLIB malloc,realloc,free */ + JSPFN_MALLOC malloc; + JSPFN_REALLOC realloc; + JSPFN_FREE free; + + /* + Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ + int recursionMax; + + /* + Configuration for max decimals of double floating point numbers to encode (0-9) */ + int doublePrecision; + + /* + If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ + int forceASCII; + + /* + If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ + int encodeHTMLChars; + + /* + Configuration for spaces of indent */ + int indent; + + /* + Set to an error message if error occurred */ + const char *errorMsg; + JSOBJ errorObj; + + /* Buffer stuff */ + char *start; + char *offset; + char *end; + int heap; + int level; +} JSONObjectEncoder; + +/* +Encode an object structure into JSON. + +Arguments: +obj - An anonymous type representing the object +enc - Function definitions for querying JSOBJ type +buffer - Preallocated buffer to store result in. If NULL function allocates own buffer +cbBuffer - Length of buffer (ignored if buffer is NULL) + +Returns: +Encoded JSON object as a null terminated char string. + +NOTE: +If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer. +Life cycle of the provided buffer must still be handled by caller. + +If the return value doesn't equal the specified buffer caller must release the memory using +JSONObjectEncoder.free or free() as specified when calling this function. +*/ +EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, + char *buffer, size_t cbBuffer); + +typedef struct __JSONObjectDecoder { + JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end); + int (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + int (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value); + JSOBJ (*newTrue)(void *prv); + JSOBJ (*newFalse)(void *prv); + JSOBJ (*newNull)(void *prv); + JSOBJ (*newPosInf)(void *prv); + JSOBJ (*newNegInf)(void *prv); + JSOBJ (*newObject)(void *prv, void *decoder); + JSOBJ (*endObject)(void *prv, JSOBJ obj); + JSOBJ (*newArray)(void *prv, void *decoder); + JSOBJ (*endArray)(void *prv, JSOBJ obj); + JSOBJ (*newInt)(void *prv, JSINT32 value); + JSOBJ (*newLong)(void *prv, JSINT64 value); + JSOBJ (*newUnsignedLong)(void *prv, JSUINT64 value); + JSOBJ (*newDouble)(void *prv, double value); + void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); + JSPFN_MALLOC malloc; + JSPFN_FREE free; + JSPFN_REALLOC realloc; + char *errorStr; + char *errorOffset; + int preciseFloat; + void *prv; +} JSONObjectDecoder; + +EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, + const char *buffer, size_t cbBuffer); +EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); + +#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c new file mode 100644 index 00000000..c7779b8b --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -0,0 +1,1202 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef NULL +#define NULL 0 +#endif + +struct DecoderState { + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSUINT32 objDepth; + void *prv; + JSONObjectDecoder *dec; +}; + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds); +typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); + +static JSOBJ SetError(struct DecoderState *ds, int offset, + const char *message) { + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *)message; + return NULL; +} + +double createDouble(double intNeg, double intValue, double frcValue, + int frcDecimalCount) { + static const double g_pow10[] = {1.0, + 0.1, + 0.01, + 0.001, + 0.0001, + 0.00001, + 0.000001, + 0.0000001, + 0.00000001, + 0.000000001, + 0.0000000001, + 0.00000000001, + 0.000000000001, + 0.0000000000001, + 0.00000000000001, + 0.000000000000001}; + return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; +} + +JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { + char *end; + double value; + errno = 0; + + value = strtod(ds->start, &end); + + if (errno == ERANGE) { + return SetError(ds, -1, "Range error when decoding numeric as double"); + } + + ds->start = end; + return ds->dec->newDouble(ds->prv, value); +} + +JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { + int intNeg = 1; + JSUINT64 intValue; + JSUINT64 prevIntValue; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expNeg; + double expValue; + char *offset = ds->start; + + JSUINT64 overflowLimit = LLONG_MAX; + + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { + offset++; + intNeg = -1; + overflowLimit = LLONG_MIN; + if (*(offset) == 'I') { + goto DECODE_INF; + } + } + + // Scan integer part + intValue = 0; + + while (1) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // PERF: Don't do 64-bit arithmetic here unless we have to + prevIntValue = intValue; + intValue = intValue * 10ULL + (JSLONG) (chr - 48); + + if (intNeg == 1 && prevIntValue > intValue) { + return SetError(ds, -1, "Value is too big!"); + } else if (intNeg == -1 && intValue > overflowLimit) { + return SetError(ds, -1, overflowLimit == LLONG_MAX ? + "Value is too big!" : "Value is too small"); + } + + offset++; + break; + } + case '.': { + offset++; + goto DECODE_FRACTION; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + + default: { + goto BREAK_INT_LOOP; + break; + } + } + } + +BREAK_INT_LOOP: + + ds->lastType = JT_INT; + ds->start = offset; + + if (intNeg == 1 && (intValue & 0x8000000000000000ULL) != 0) + return ds->dec->newUnsignedLong(ds->prv, intValue); + else if ((intValue >> 31)) + return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); + else + return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); + +DECODE_FRACTION: + + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + // Scan fraction part + frcValue = 0.0; + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { + frcValue = frcValue * 10.0 + (double)(chr - 48); + decimalCount++; + } + offset++; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + default: { goto BREAK_FRC_LOOP; } + } + } + +BREAK_FRC_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); + +DECODE_EXPONENT: + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + expNeg = 1.0; + + if (*(offset) == '-') { + expNeg = -1.0; + offset++; + } else if (*(offset) == '+') { + expNeg = +1.0; + offset++; + } + + expValue = 0.0; + + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + expValue = expValue * 10.0 + (double)(chr - 48); + offset++; + break; + } + default: { goto BREAK_EXP_LOOP; } + } + } + +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + + +BREAK_EXP_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * + pow(10.0, expValue * expNeg)); +} + +JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'r') goto SETERROR; + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_TRUE; + ds->start = offset; + return ds->dec->newTrue(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); +} + +JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'a') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 's') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + return ds->dec->newFalse(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); +} + +JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); +} + +void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { + char *offset; + + for (offset = ds->start; (ds->end - offset) > 0; offset++) { + switch (*offset) { + case ' ': + case '\t': + case '\r': + case '\n': + break; + + default: + ds->start = offset; + return; + } + } + + if (offset == ds->end) { + ds->start = ds->end; + } +} + +enum DECODESTRINGSTATE { + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, +}; + +static const JSUINT8 g_decoderLookup[256] = { + /* 0x00 */ DS_ISNULL, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x10 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x20 */ 1, + 1, + DS_ISQUOTE, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + DS_ISESCAPE, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, +}; + +JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { + JSUTF16 sur[2] = {0}; + int iSur = 0; + int index; + wchar_t *escOffset; + wchar_t *escStart; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start++; + + if ((size_t)(ds->end - ds->start) > escLen) { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) { + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + escStart = (wchar_t *)ds->dec->realloc(ds->escStart, + newSize * sizeof(wchar_t)); + if (!escStart) { + ds->dec->free(ds->escStart); + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = escStart; + } else { + wchar_t *oldStart = ds->escStart; + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = + (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); + if (!ds->escStart) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escHeap = 1; + memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); + } + + ds->escEnd = ds->escStart + newSize; + } + + escOffset = ds->escStart; + inputOffset = (JSUINT8 *)ds->start; + + for (;;) { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { + case DS_ISNULL: { + return SetError(ds, -1, + "Unmatched ''\"' when when decoding 'string'"); + } + case DS_ISQUOTE: { + ds->lastType = JT_UTF8; + inputOffset++; + ds->start += ((char *)inputOffset - (ds->start)); + return ds->dec->newString(ds->prv, ds->escStart, escOffset); + } + case DS_UTFLENERROR: { + return SetError( + ds, -1, + "Invalid UTF-8 sequence length when decoding 'string'"); + } + case DS_ISESCAPE: + inputOffset++; + switch (*inputOffset) { + case '\\': + *(escOffset++) = L'\\'; + inputOffset++; + continue; + case '\"': + *(escOffset++) = L'\"'; + inputOffset++; + continue; + case '/': + *(escOffset++) = L'/'; + inputOffset++; + continue; + case 'b': + *(escOffset++) = L'\b'; + inputOffset++; + continue; + case 'f': + *(escOffset++) = L'\f'; + inputOffset++; + continue; + case 'n': + *(escOffset++) = L'\n'; + inputOffset++; + continue; + case 'r': + *(escOffset++) = L'\r'; + inputOffset++; + continue; + case 't': + *(escOffset++) = L'\t'; + inputOffset++; + continue; + + case 'u': { + int index; + inputOffset++; + + for (index = 0; index < 4; index++) { + switch (*inputOffset) { + case '\0': + return SetError(ds, -1, + "Unterminated unicode " + "escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unexpected character in " + "unicode escape sequence " + "when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + + (JSUTF16)(*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'A'); + break; + } + + inputOffset++; + } + + if (iSur == 0) { + if ((sur[iSur] & 0xfc00) == 0xd800) { + // First of a surrogate pair, continue parsing + iSur++; + break; + } + (*escOffset++) = (wchar_t)sur[iSur]; + iSur = 0; + } else { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) { + return SetError(ds, -1, + "Unpaired high surrogate when " + "decoding 'string'"); + } +#if WCHAR_MAX == 0xffff + (*escOffset++) = (wchar_t)sur[0]; + (*escOffset++) = (wchar_t)sur[1]; +#else + (*escOffset++) = + (wchar_t)0x10000 + + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); +#endif + iSur = 0; + } + break; + } + + case '\0': + return SetError(ds, -1, + "Unterminated escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unrecognized escape sequence when " + "decoding 'string'"); + } + break; + + case 1: { + *(escOffset++) = (wchar_t)(*inputOffset++); + break; + } + + case 2: { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) + return SetError(ds, -1, + "Overlong 2 byte UTF-8 sequence detected " + "when decoding 'string'"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 3: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x800) + return SetError(ds, -1, + "Overlong 3 byte UTF-8 sequence detected " + "when encoding string"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 4: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; + + for (index = 0; index < 3; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x10000) + return SetError(ds, -1, + "Overlong 4 byte UTF-8 sequence detected " + "when decoding 'string'"); + +#if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; + *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; + } else { + *(escOffset++) = (wchar_t)ucs; + } +#else + *(escOffset++) = (wchar_t)ucs; +#endif + break; + } + } + } +} + +JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { + JSOBJ itemValue; + JSOBJ newObj; + int len; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newArray(ds->prv, ds->dec); + len = 0; + + ds->lastType = JT_INVALID; + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == ']') { + ds->objDepth--; + if (len == 0) { + ds->start++; + return ds->dec->endArray(ds->prv, newObj); + } + + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (1)"); + } + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case ']': { + ds->objDepth--; + return ds->dec->endArray(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (2)"); + } + + len++; + } +} + +JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj; + + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newObject(ds->prv, ds->dec); + + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == '}') { + ds->objDepth--; + ds->start++; + return ds->dec->endObject(ds->prv, newObj); + } + + ds->lastType = JT_INVALID; + itemName = decode_any(ds); + + if (itemName == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (ds->lastType != JT_UTF8) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError( + ds, -1, + "Key name of object must be 'string' when decoding 'object'"); + } + + SkipWhitespace(ds); + + if (*(ds->start++) != ':') { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } + + SkipWhitespace(ds); + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return NULL; + } + + if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + ds->dec->releaseObject(ds->prv, itemValue, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case '}': { + ds->objDepth--; + return ds->dec->endObject(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding object value"); + } + } +} + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { + for (;;) { + switch (*ds->start) { + case '\"': + return decode_string(ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'I': + case 'N': + case '-': + return decode_numeric(ds); + + case '[': + return decode_array(ds); + case '{': + return decode_object(ds); + case 't': + return decode_true(ds); + case 'f': + return decode_false(ds); + case 'n': + return decode_null(ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); + } + } +} + +JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, + size_t cbBuffer) { + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode + escaping doesn't run into the wall each time */ + char *locale; + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *)buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.prv = dec->prv; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + ds.objDepth = 0; + + ds.dec = dec; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + return SetError(&ds, -1, "Could not reserve memory block"); + } + setlocale(LC_NUMERIC, "C"); + ret = decode_any(&ds); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + ret = decode_any(&ds); + } + + if (ds.escHeap) { + dec->free(ds.escStart); + } + + SkipWhitespace(&ds); + + if (ds.start != ds.end && ret) { + dec->releaseObject(ds.prv, ret, ds.dec); + return SetError(&ds, -1, "Trailing data"); + } + + return ret; +} diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c new file mode 100644 index 00000000..5d907104 --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -0,0 +1,1200 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* +Worst cases being: + +Control characters (ASCII < 32) +0x00 (1 byte) input => \u0000 output (6 bytes) +1 * 6 => 6 (6 bytes required) + +or UTF-16 surrogate pairs +4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). + +4 * 6 => 24 bytes (12 bytes required) + +The extra 2 bytes are for the quotes around the string + +*/ +#define RESERVE_STRING(_len) (2 + ((_len)*6)) + +static const double g_pow10[] = {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000}; +static const char g_hexChars[] = "0123456789abcdef"; +static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; + +/* +FIXME: While this is fine dandy and working it's a magic value mess which +probably only the author understands. +Needs a cleanup and more documentation */ + +/* +Table for pure ascii output escaping all characters above 127 to \uXXXX */ +static const JSUINT8 g_asciiOutputTable[256] = { + /* 0x00 */ 0, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 10, + 12, + 14, + 30, + 16, + 18, + 30, + 30, + /* 0x10 */ 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + /* 0x20 */ 1, + 1, + 20, + 1, + 1, + 1, + 29, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 24, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 29, + 1, + 29, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 22, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + 1, + 1}; + +static void SetError(JSOBJ obj, JSONObjectEncoder *enc, const char *message) { + enc->errorMsg = message; + enc->errorObj = obj; +} + +/* +FIXME: Keep track of how big these get across several encoder calls and try to +make an estimate +That way we won't run our head into the wall each call */ +void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded) { + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) { + newSize *= 2; + } + + if (enc->heap) { + enc->start = (char *)enc->realloc(enc->start, newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + } else { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *)enc->malloc(newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + memcpy(enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; +} + +INLINE_PREFIX void FASTCALL_MSVC +Buffer_AppendShortHexUnchecked(char *outputOffset, unsigned short value) { + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; +} + +int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io, + const char *end) { + char *of = (char *)enc->offset; + + for (;;) { + switch (*io) { + case 0x00: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + case '\"': + (*of++) = '\\'; + (*of++) = '\"'; + break; + case '\\': + (*of++) = '\\'; + (*of++) = '\\'; + break; + case '/': + (*of++) = '\\'; + (*of++) = '/'; + break; + case '\b': + (*of++) = '\\'; + (*of++) = 'b'; + break; + case '\f': + (*of++) = '\\'; + (*of++) = 'f'; + break; + case '\n': + (*of++) = '\\'; + (*of++) = 'n'; + break; + case '\r': + (*of++) = '\\'; + (*of++) = 'r'; + break; + case '\t': + (*of++) = '\\'; + (*of++) = 't'; + break; + + case 0x26: // '/' + case 0x3c: // '<' + case 0x3e: // '>' + { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case below. + } else { + // Same as default case below. + (*of++) = (*io); + break; + } + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + break; + } + default: + (*of++) = (*io); + break; + } + io++; + } +} + +int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, + const char *io, const char *end) { + JSUTF32 ucs; + char *of = (char *)enc->offset; + + for (;;) { + JSUINT8 utflen = g_asciiOutputTable[(unsigned char)*io]; + + switch (utflen) { + case 0: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io++; + continue; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + + case 1: { + *(of++) = (*io++); + continue; + } + + case 2: { + JSUTF32 in; + JSUTF16 in16; + + if (end - io < 1) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + in = (JSUTF32)in16; + +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); +#else + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x80) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 2 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: { + JSUTF32 in; + JSUTF16 in16; + JSUINT8 in8; + + if (end - io < 2) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + memcpy(&in8, io + 2, sizeof(JSUINT8)); +#ifdef __LITTLE_ENDIAN__ + in = (JSUTF32)in16; + in |= in8 << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | + ((in & 0x3f0000) >> 16); +#else + in = in16 << 8; + in |= in8; + ucs = + ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x800) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 3 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: { + JSUTF32 in; + + if (end - io < 3) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in, io, sizeof(JSUTF32)); +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | + ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); +#else + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | + ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + if (ucs < 0x10000) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 4 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 4; + break; + } + + case 5: + case 6: { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + } + + case 29: { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case 30 below. + } else { + // Same as case 1 above. + *(of++) = (*io++); + continue; + } + } + + case 30: { + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + io++; + continue; + } + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: { + *(of++) = *((char *)(g_escapeChars + utflen + 0)); + *(of++) = *((char *)(g_escapeChars + utflen + 1)); + io++; + continue; + } + // This can never happen, it's here to make L4 VC++ happy + default: { + ucs = 0; + break; + } + } + + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs & 0x3ff) + 0xdc00); + of += 4; + } else { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short)ucs); + of += 4; + } + } +} + +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + +#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; + +INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, + char *end) { + char aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; +} + +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) { + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +// This function could be refactored to only accept enc as an argument, +// but this is a straight vendor from ujson source +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + int i; + if (enc->indent > 0) { + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); + } +} + +void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + char *wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; + wstr = enc->offset; + + // Conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (uvalue % 10)); + } while (uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { + char *wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do { + *wstr++ = (char)(48 + (uvalue % 10ULL)); + } while (uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, + double value) { + /* if input is beyond the thresholds, revert to exponential */ + const double thres_max = (double)1e16 - 1; + const double thres_min = (double)1e-15; + char precision_str[20]; + int count; + double diff = 0.0; + char *str = enc->offset; + char *wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) { + SetError(obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } + + if (!(value == value)) { + SetError(obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + /* + for very large or small numbers switch back to native sprintf for + exponentials. anyone want to write code to replace this? */ + if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) { + precision_str[0] = '%'; + precision_str[1] = '.'; +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#else + snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += snprintf(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#endif + return TRUE; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long)value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + + if (enc->doublePrecision == 0) { + diff = value - whole; + + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + // vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } else if (frac) { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; + } else { + *wstr++ = '0'; + *wstr++ = '.'; + } + + // Do whole part. Take care of sign + // conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (whole % 10)); + } while (whole /= 10); + + if (neg) { + *wstr++ = '-'; + } + strreverse(str, wstr - 1); + enc->offset += (wstr - (enc->offset)); + + return TRUE; +} + +/* +FIXME: +Handle integration functions returning NULL here */ + +/* +FIXME: +Perhaps implement recursion detection */ + +void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, + size_t cbName) { + const char *value; + char *objName; + int count; + JSOBJ iterObj; + size_t szlen; + JSONTypeContext tc; + tc.encoder = enc; + + if (enc->level > enc->recursionMax) { + SetError(obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + */ + + Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); + if (enc->errorMsg) { + return; + } + + if (name) { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) { + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) { + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + + Buffer_AppendCharUnchecked(enc, ':'); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + } + + enc->beginTypeContext(obj, &tc); + + switch (tc.type) { + case JT_INVALID: { + return; + } + + case JT_ARRAY: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '['); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(buffer, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, NULL, 0); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, ']'); + break; + } + + case JT_OBJECT: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, objName, szlen); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, '}'); + break; + } + + case JT_LONG: { + Buffer_AppendLongUnchecked(enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: { + Buffer_AppendIntUnchecked(enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: { + Buffer_AppendCharUnchecked(enc, 't'); + Buffer_AppendCharUnchecked(enc, 'r'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_FALSE: { + Buffer_AppendCharUnchecked(enc, 'f'); + Buffer_AppendCharUnchecked(enc, 'a'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 's'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_NULL: { + Buffer_AppendCharUnchecked(enc, 'n'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 'l'); + break; + } + + case JT_DOUBLE: { + if (!Buffer_AppendDoubleUnchecked(obj, enc, + enc->getDoubleValue(obj, &tc))) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + break; + } + + case JT_UTF8: { + value = enc->getStringValue(obj, &tc, &szlen); + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + break; + } + + case JT_BIGNUM: { + value = enc->getBigNumStringValue(obj, &tc, &szlen); + + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + break; + } + } + + enc->endTypeContext(obj, &tc); + enc->level--; +} + +char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, + size_t _cbBuffer) { + char *locale; + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) { + _cbBuffer = 32768; + enc->start = (char *)enc->malloc(_cbBuffer); + if (!enc->start) { + SetError(obj, enc, "Could not reserve memory block"); + return NULL; + } + enc->heap = 1; + } else { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + SetError(NULL, enc, "Could not reserve memory block"); + return NULL; + } + setlocale(LC_NUMERIC, "C"); + encode(obj, enc, NULL, 0); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + encode(obj, enc, NULL, 0); + } + + Buffer_Reserve(enc, 1); + if (enc->errorMsg) { + return NULL; + } + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; +} diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c new file mode 100644 index 00000000..c58f25b8 --- /dev/null +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -0,0 +1,601 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#define NO_IMPORT_ARRAY +#define PY_SSIZE_T_CLEAN +#include +#include +#include + +#define PRINTMARK() + +typedef struct __PyObjectDecoder { + JSONObjectDecoder dec; + + void *npyarr; // Numpy context buffer + void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls + npy_intp curdim; // Current array dimension + + PyArray_Descr *dtype; +} PyObjectDecoder; + +typedef struct __NpyArrContext { + PyObject *ret; + PyObject *labels[2]; + PyArray_Dims shape; + + PyObjectDecoder *dec; + + npy_intp i; + npy_intp elsize; + npy_intp elcount; +} NpyArrContext; + +// Numpy handling based on numpy internal code, specifically the function +// PyArray_FromIter. + +// numpy related functions are inter-dependent so declare them all here, +// to ensure the compiler catches any errors + +// standard numpy array handling +JSOBJ Object_npyNewArray(void *prv, void *decoder); +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// for more complex dtypes (object and string) fill a standard Python list +// and convert to a numpy array when done. +JSOBJ Object_npyNewArrayList(void *prv, void *decoder); +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// labelled support, encode keys and values of JS object into separate numpy +// arrays +JSOBJ Object_npyNewObject(void *prv, void *decoder); +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj); +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + +// free the numpy context buffer +void Npy_releaseContext(NpyArrContext *npyarr) { + PRINTMARK(); + if (npyarr) { + if (npyarr->shape.ptr) { + PyObject_Free(npyarr->shape.ptr); + } + if (npyarr->dec) { + npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } +} + +JSOBJ Object_npyNewArray(void *prv, void *_decoder) { + NpyArrContext *npyarr; + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim <= 0) { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + decoder->npyarr_addr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp) * NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } else { + // starting a new dimension continue the current array (and reshape + // after) + npyarr = (NpyArrContext *)decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) { + npyarr->shape.len++; + } + } + + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; +} + +PyObject *Npy_returnLabelled(NpyArrContext *npyarr) { + PyObject *ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len + 1); + for (i = 0; i < npyarr->shape.len; i++) { + if (npyarr->labels[i]) { + PyTuple_SET_ITEM(ret, i + 1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } else { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i + 1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; +} + +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { + PyObject *ret; + char *new_data; + NpyArrContext *npyarr = (NpyArrContext *)obj; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + ret = npyarr->ret; + i = npyarr->i; + + npyarr->dec->curdim--; + + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) { + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = + PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } else if (npyarr->dec->curdim <= 0) { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((PyArrayObject *)ret)->data = (void *)new_data; + // PyArray_BYTES(ret) = new_data; + } + + if (npyarr->dec->curdim <= 0) { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) { + npyarr->ret = PyArray_Newshape((PyArrayObject *)ret, &npyarr->shape, + NPY_ANYORDER); + Py_DECREF(ret); + } + + ret = Npy_returnLabelled(npyarr); + + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } + + return ret; +} + +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + PyObject *type; + PyArray_Descr *dtype; + npy_intp i; + char *new_data, *item; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + i = npyarr->i; + + npyarr->shape.ptr[npyarr->dec->curdim - 1]++; + + if (PyArray_Check((PyObject *)value)) { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) { + type = PyObject_Type(value); + if (!PyArray_DescrConverter(type, &dtype)) { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } else { + dtype = PyArray_DescrNew(npyarr->dec->dtype); + } + + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) { + Py_XDECREF(dtype); + + if (npyarr->dec->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + npyarr->elcount = 0; + npyarr->ret = PyList_New(0); + if (!npyarr->ret) { + goto fail; + } + ((JSONObjectDecoder *)npyarr->dec)->newArray = + Object_npyNewArrayList; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = + Object_npyArrayListAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = + Object_npyEndArrayList; + return Object_npyArrayListAddItem(prv, obj, value); + } + + npyarr->ret = PyArray_NewFromDescr( + &PyArray_Type, dtype, 1, &npyarr->elcount, NULL, NULL, 0, NULL); + + if (!npyarr->ret) { + goto fail; + } + } + + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP / npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), + npyarr->elcount * npyarr->elsize); + } else { + PyErr_NoMemory(); + goto fail; + } + ((PyArrayObject *)npyarr->ret)->data = (void *)new_data; + + // PyArray_BYTES(npyarr->ret) = new_data; + } + + PyArray_DIMS(npyarr->ret)[0] = i + 1; + + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL || + PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF((PyObject *)value); + npyarr->i++; + return 1; + +fail: + + Npy_releaseContext(npyarr); + return 0; +} + +JSOBJ Object_npyNewArrayList(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + PyErr_SetString( + PyExc_ValueError, + "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; +} + +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { + PyObject *list, *ret; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + // convert decoded list to numpy array + list = (PyObject *)npyarr->ret; + npyarr->ret = PyArray_FROM_O(list); + + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; + + ((JSONObjectDecoder *)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; +} + +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + PyList_Append((PyObject *)npyarr->ret, value); + Py_DECREF((PyObject *)value); + npyarr->elcount++; + return 1; +} + +JSOBJ Object_npyNewObject(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "labels only supported up to 2 dimensions"); + return NULL; + } + + return ((JSONObjectDecoder *)decoder)->newArray(prv, decoder); +} + +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { + PyObject *list; + npy_intp labelidx; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + labelidx = npyarr->dec->curdim - 1; + + list = npyarr->labels[labelidx]; + if (list) { + npyarr->labels[labelidx] = PyArray_FROM_O(list); + Py_DECREF(list); + } + + return (PyObject *)((JSONObjectDecoder *)npyarr->dec)->endArray(prv, obj); +} + +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + PyObject *label, *labels; + npy_intp labelidx; + // add key to label array, value to values array + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + label = (PyObject *)name; + labelidx = npyarr->dec->curdim - 1; + + if (!npyarr->labels[labelidx]) { + npyarr->labels[labelidx] = PyList_New(0); + } + labels = npyarr->labels[labelidx]; + // only fill label array once, assumes all column labels are the same + // for 2-dimensional arrays. + if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { + PyList_Append(labels, label); + } + + if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { + Py_DECREF(label); + return 1; + } + return 0; +} + +int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + int ret = PyDict_SetItem(obj, name, value); + Py_DECREF((PyObject *)name); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + int ret = PyList_Append(obj, value); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { + return PyUnicode_FromWideChar(start, (end - start)); +} + +JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; } + +JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } + +JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } + +JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } + +JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } + +JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } + +JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); } + +JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newInteger(void *prv, JSINT32 value) { + return PyLong_FromLong((long)value); +} + +JSOBJ Object_newLong(void *prv, JSINT64 value) { + return PyLong_FromLongLong(value); +} + +JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) { + return PyLong_FromUnsignedLongLong(value); +} + +JSOBJ Object_newDouble(void *prv, double value) { + return PyFloat_FromDouble(value); +} + +static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + if (obj != decoder->npyarr_addr) { + Py_XDECREF(((PyObject *)obj)); + } +} + +static char *g_kwlist[] = {"obj", "precise_float", "numpy", + "labelled", "dtype", NULL}; + +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *ret; + PyObject *sarg; + PyObject *arg; + PyObject *opreciseFloat = NULL; + JSONObjectDecoder *decoder; + PyObjectDecoder pyDecoder; + PyArray_Descr *dtype = NULL; + int numpy = 0, labelled = 0; + + JSONObjectDecoder dec = { + Object_newString, Object_objectAddKey, Object_arrayAddItem, + Object_newTrue, Object_newFalse, Object_newNull, + Object_newPosInf, Object_newNegInf, Object_newObject, + Object_endObject, Object_newArray, Object_endArray, + Object_newInteger, Object_newLong, Object_newUnsignedLong, + Object_newDouble, + Object_releaseObject, PyObject_Malloc, PyObject_Free, + PyObject_Realloc}; + + dec.preciseFloat = 0; + dec.prv = NULL; + + pyDecoder.dec = dec; + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + pyDecoder.npyarr_addr = NULL; + + decoder = (JSONObjectDecoder *)&pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, + &opreciseFloat, &numpy, &labelled, + PyArray_DescrConverter2, &dtype)) { + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) { + decoder->preciseFloat = 1; + } + + if (PyBytes_Check(arg)) { + sarg = arg; + } else if (PyUnicode_Check(arg)) { + sarg = PyUnicode_AsUTF8String(arg); + if (sarg == NULL) { + // Exception raised above us by codec according to docs + return NULL; + } + } else { + PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'"); + return NULL; + } + + decoder->errorStr = NULL; + decoder->errorOffset = NULL; + + if (numpy) { + pyDecoder.dtype = dtype; + decoder->newArray = Object_npyNewArray; + decoder->endArray = Object_npyEndArray; + decoder->arrayAddItem = Object_npyArrayAddItem; + + if (labelled) { + decoder->newObject = Object_npyNewObject; + decoder->endObject = Object_npyEndObject; + decoder->objectAddKey = Object_npyObjectAddKey; + } + } + + ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), + PyBytes_GET_SIZE(sarg)); + + if (sarg != arg) { + Py_DECREF(sarg); + } + + if (PyErr_Occurred()) { + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (decoder->errorStr) { + /* + FIXME: It's possible to give a much nicer error message here with actual + failing element in input etc*/ + + PyErr_Format(PyExc_ValueError, "%s", decoder->errorStr); + + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + + return NULL; + } + + return ret; +} diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c new file mode 100644 index 00000000..86cb68f8 --- /dev/null +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -0,0 +1,163 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +// Conversion routines that are useful for serialization, +// but which don't interact with JSON objects directly + +#include "date_conversions.h" +#include <../../../tslibs/src/datetime/np_datetime.h> +#include <../../../tslibs/src/datetime/np_datetime_strings.h> + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + // datetime64 is always naive + ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + // Check to see if PyDateTime has a timezone. + // Don't convert to UTC if it doesn't. + int is_tz_aware = 0; + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + if (offset == NULL) { + PyObject_Free(result); + return NULL; + } + is_tz_aware = offset != Py_None; + Py_DECREF(offset); + } + ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/src/ujson/python/date_conversions.h new file mode 100644 index 00000000..efd707f0 --- /dev/null +++ b/pandas/_libs/src/ujson/python/date_conversions.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include + +// Scales value inplace from nanosecond resolution to unit resolution +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); + +// Converts an int64 object representing a date to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len); + +// TODO(username): this function doesn't do a lot; should augment or +// replace with scaleNanosecToUnit +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base); + +// Converts a Python object representing a Date / Datetime to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len); + +// Convert a Python Date/Datetime to Unix epoch with resolution base +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base); + +char *int64ToIsoDuration(int64_t value, size_t *len); + +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c new file mode 100644 index 00000000..260f1ffb --- /dev/null +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -0,0 +1,2122 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_SSIZE_T_CLEAN +#include +#include + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include +#include +#include +#include +#include +#include "date_conversions.h" +#include "datetime.h" + +static PyTypeObject *type_decimal; +static PyTypeObject *cls_dataframe; +static PyTypeObject *cls_series; +static PyTypeObject *cls_index; +static PyTypeObject *cls_nat; +static PyTypeObject *cls_na; +PyObject *cls_timedelta; + +npy_int64 get_nat(void) { return NPY_MIN_INT64; } + +typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, + size_t *_outLen); + +typedef struct __NpyArrContext { + PyObject *array; + char *dataptr; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + int type_num; + PyArray_GetItemFunc *getitem; + + char **rowLabels; + char **columnLabels; +} NpyArrContext; + +typedef struct __PdBlockContext { + int colIdx; + int ncols; + int transpose; + + NpyArrContext **npyCtxts; // NpyArrContext for each column +} PdBlockContext; + +typedef struct __TypeContext { + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToUTF8 PyTypeToUTF8; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + PyObject *iterator; + + double doubleValue; + JSINT64 longValue; + + char *cStr; + NpyArrContext *npyarr; + PdBlockContext *pdblock; + int transpose; + char **rowLabels; + char **columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; +} TypeContext; + +typedef struct __PyObjectEncoder { + JSONObjectEncoder enc; + + // pass through the NpyArrContext when encoding multi-dimensional arrays + NpyArrContext *npyCtxtPassthru; + + // pass through the PdBlockContext when encoding blocks + PdBlockContext *blkCtxtPassthru; + + // pass-through to encode numpy data directly + int npyType; + void *npyValue; + + int datetimeIso; + NPY_DATETIMEUNIT datetimeUnit; + + // output format style for pandas data types + int outputFormat; + int originalOutputFormat; + + PyObject *defaultHandler; +} PyObjectEncoder; + +#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) + +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + +int PdBlock_iterNext(JSOBJ, JSONTypeContext *); + +void *initObjToJSON(void) { + PyObject *mod_pandas; + PyObject *mod_nattype; + PyObject *mod_natype; + PyObject *mod_decimal = PyImport_ImportModule("decimal"); + type_decimal = + (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); + Py_DECREF(mod_decimal); + + PyDateTime_IMPORT; + + mod_pandas = PyImport_ImportModule("pandas"); + if (mod_pandas) { + cls_dataframe = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "DataFrame"); + cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index"); + cls_series = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); + Py_DECREF(mod_pandas); + } + + mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype"); + if (mod_nattype) { + cls_nat = + (PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType"); + Py_DECREF(mod_nattype); + } + + mod_natype = PyImport_ImportModule("pandas._libs.missing"); + if (mod_natype) { + cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType"); + Py_DECREF(mod_natype); + } + + // GH 31463 + return NULL; +} + +static TypeContext *createTypeContext(void) { + TypeContext *pc; + + pc = PyObject_Malloc(sizeof(TypeContext)); + if (!pc) { + PyErr_NoMemory(); + return NULL; + } + pc->newObj = NULL; + pc->dictObj = NULL; + pc->itemValue = NULL; + pc->itemName = NULL; + pc->attrList = NULL; + pc->index = 0; + pc->size = 0; + pc->longValue = 0; + pc->doubleValue = 0.0; + pc->cStr = NULL; + pc->npyarr = NULL; + pc->pdblock = NULL; + pc->rowLabels = NULL; + pc->columnLabels = NULL; + pc->transpose = 0; + pc->rowLabelsLen = 0; + pc->columnLabelsLen = 0; + + return pc; +} + +static PyObject *get_values(PyObject *obj) { + PyObject *values = NULL; + + if (PyObject_TypeCheck(obj, cls_index) || + PyObject_TypeCheck(obj, cls_series)) { + // The special cases to worry about are dt64tz and category[dt64tz]. + // In both cases we want the UTC-localized datetime64 ndarray, + // without going through and object array of Timestamps. + if (PyObject_HasAttrString(obj, "tz")) { + PyObject *tz = PyObject_GetAttrString(obj, "tz"); + if (tz != Py_None) { + // Go through object array if we have dt64tz, since tz info will + // be lost if values is used directly. + Py_DECREF(tz); + values = PyObject_CallMethod(obj, "__array__", NULL); + return values; + } + Py_DECREF(tz); + } + values = PyObject_GetAttrString(obj, "values"); + if (values == NULL) { + // Clear so we can subsequently try another method + PyErr_Clear(); + } else if (PyObject_HasAttrString(values, "__array__")) { + // We may have gotten a Categorical or Sparse array so call np.array + PyObject *array_values = PyObject_CallMethod(values, "__array__", + NULL); + Py_DECREF(values); + values = array_values; + } else if (!PyArray_CheckExact(values)) { + // Didn't get a numpy array, so keep trying + Py_DECREF(values); + values = NULL; + } + } + + if (values == NULL) { + PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj)); + PyObject *repr; + if (PyObject_HasAttrString(obj, "dtype")) { + PyObject *dtype = PyObject_GetAttrString(obj, "dtype"); + repr = PyObject_Repr(dtype); + Py_DECREF(dtype); + } else { + repr = PyUnicode_FromString(""); + } + + PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", + repr, typeRepr); + Py_DECREF(repr); + Py_DECREF(typeRepr); + + return NULL; + } + + return values; +} + +static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + PyObject *ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_GetAttrString(tmp, subAttr); + Py_DECREF(tmp); + + return ret; +} + +static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + Py_ssize_t ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_Length(tmp); + Py_DECREF(tmp); + + if (ret == -1) { + return 0; + } + + return ret; +} + +static int is_simple_frame(PyObject *obj) { + PyObject *mgr = PyObject_GetAttrString(obj, "_mgr"); + if (!mgr) { + return 0; + } + int ret; + if (PyObject_HasAttrString(mgr, "blocks")) { + ret = (get_attr_length(mgr, "blocks") <= 1); + } else { + ret = 0; + } + + Py_DECREF(mgr); + return ret; +} + +static npy_int64 get_long_attr(PyObject *o, const char *attr) { + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = + (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + Py_DECREF(value); + return long_val; +} + +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; +} + +static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + PyObject *obj = (PyObject *)_obj; + *_outLen = PyBytes_GET_SIZE(obj); + return PyBytes_AS_STRING(obj); +} + +static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen); +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return int64ToIso(GET_TC(tc)->longValue, base, len); +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + return int64ToIsoDuration(GET_TC(tc)->longValue, len); +} + +/* JSON callback */ +static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, + size_t *len) { + if (!PyDate_Check(obj)) { + PyErr_SetString(PyExc_TypeError, "Expected date object"); + return NULL; + } + + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return PyDateTimeToIso(obj, base, len); +} + +static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { + PyObject *obj = (PyObject *)_obj; + PyObject *str; + PyObject *tmp; + + str = PyObject_CallMethod(obj, "isoformat", NULL); + if (str == NULL) { + *outLen = 0; + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert time"); + } + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + if (PyUnicode_Check(str)) { + tmp = str; + str = PyUnicode_AsUTF8String(str); + Py_DECREF(tmp); + } + + GET_TC(tc)->newObj = str; + + *outLen = PyBytes_GET_SIZE(str); + char *outValue = PyBytes_AS_STRING(str); + return outValue; +} + +//============================================================================= +// Numpy array iteration functions +//============================================================================= + +static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) { + if (GET_TC(tc)->npyarr && + GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) { + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } +} + +int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj), JSONTypeContext *Py_UNUSED(tc)) { + return 0; +} + +void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyArrayObject *obj; + NpyArrContext *npyarr; + + if (GET_TC(tc)->newObj) { + obj = (PyArrayObject *)GET_TC(tc)->newObj; + } else { + obj = (PyArrayObject *)_obj; + } + + npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + npyarr->array = (PyObject *)obj; + npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; + npyarr->type_num = PyArray_DESCR(obj)->type_num; + + if (GET_TC(tc)->transpose) { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; + } else { + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; + } + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; +} + +void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (npyarr) { + NpyArr_freeItemValue(obj, tc); + PyObject_Free(npyarr); + } +} + +void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj), + JSONTypeContext *Py_UNUSED(tc)) {} + +void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + // finished this dimension, reset the data pointer + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; + + NpyArr_freeItemValue(obj, tc); +} + +int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + + NpyArr_freeItemValue(obj, tc); + + if (PyArray_ISDATETIME(npyarr->array)) { + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } else { + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + } + + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; +} + +int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->curdim >= npyarr->ndim || + npyarr->index[npyarr->stridedim] >= npyarr->dim) { + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } + + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; + + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; + + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; +} + +JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + + return cStr; +} + +//============================================================================= +// Pandas block iteration functions +// +// Serialises a DataFrame column by column to avoid unnecessary data copies and +// more representative serialisation when dealing with mixed dtypes. +// +// Uses a dedicated NpyArrContext for each column. +//============================================================================= + +void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + blkCtxt->colIdx++; + } else { + blkCtxt->colIdx = 0; + } + + NpyArr_freeItemValue(obj, tc); +} + +int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + blkCtxt->colIdx++; + return NpyArr_iterNextItem(obj, tc); +} + +char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { + idx = blkCtxt->colIdx - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = GET_TC(tc)->iterNext != PdBlock_iterNext + ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 + : npyarr->index[npyarr->stridedim]; + + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = blkCtxt->colIdx; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (blkCtxt->transpose) { + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + } else { + npyarr = blkCtxt->npyCtxts[0]; + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + } + + ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt; + GET_TC(tc)->itemValue = obj; + + return 1; +} + +void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + // if transposed we exhaust each column before moving to the next + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose; + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + } +} + +void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *values, *arrays, *array; + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + Py_ssize_t i; + + obj = (PyObject *)_obj; + + GET_TC(tc)->iterGetName = GET_TC(tc)->transpose + ? PdBlock_iterGetName_Transpose + : PdBlock_iterGetName; + + blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); + if (!blkCtxt) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + GET_TC(tc)->pdblock = blkCtxt; + + blkCtxt->colIdx = 0; + blkCtxt->transpose = GET_TC(tc)->transpose; + blkCtxt->ncols = get_attr_length(obj, "columns"); + + if (blkCtxt->ncols == 0) { + blkCtxt->npyCtxts = NULL; + + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + blkCtxt->npyCtxts = + PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols); + if (!blkCtxt->npyCtxts) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + arrays = get_sub_attr(obj, "_mgr", "column_arrays"); + if (!arrays) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + for (i = 0; i < PyObject_Length(arrays); i++) { + array = PyList_GET_ITEM(arrays, i); + if (!array) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + // ensure we have a numpy array (i.e. np.asarray) + values = PyObject_CallMethod(array, "__array__", NULL); + if ((!values) || (!PyArray_CheckExact(values))) { + // Didn't get a numpy array + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + GET_TC(tc)->newObj = values; + + // init a dedicated context for this column + NpyArr_iterBegin(obj, tc); + npyarr = GET_TC(tc)->npyarr; + + GET_TC(tc)->itemValue = NULL; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL; + + blkCtxt->npyCtxts[i] = npyarr; + GET_TC(tc)->newObj = NULL; + } + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0]; + goto ARR_RET; + +ARR_RET: + Py_DECREF(arrays); +} + +void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + int i; + + GET_TC(tc)->itemValue = NULL; + npyarr = GET_TC(tc)->npyarr; + + blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt) { + for (i = 0; i < blkCtxt->ncols; i++) { + npyarr = blkCtxt->npyCtxts[i]; + if (npyarr) { + if (npyarr->array) { + Py_DECREF(npyarr->array); + npyarr->array = NULL; + } + + GET_TC(tc)->npyarr = npyarr; + NpyArr_iterEnd(obj, tc); + + blkCtxt->npyCtxts[i] = NULL; + } + } + + if (blkCtxt->npyCtxts) { + PyObject_Free(blkCtxt->npyCtxts); + } + PyObject_Free(blkCtxt); + } +} + +//============================================================================= +// Tuple iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj); + GET_TC(tc)->itemValue = NULL; +} + +int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index); + + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index++; + return 1; +} + +void Tuple_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Set iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->iterator = PyObject_GetIter(obj); +} + +int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + item = PyIter_Next(GET_TC(tc)->iterator); + + if (item == NULL) { + return 0; + } + + GET_TC(tc)->itemValue = item; + return 1; +} + +void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->iterator) { + Py_DECREF(GET_TC(tc)->iterator); + GET_TC(tc)->iterator = NULL; + } +} + +JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Set_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Dir iteration functions +// itemName ref is borrowed from PyObject_Dir (attrList). No refcount +// itemValue ref is from PyObject_GetAttr. Ref counted +//============================================================================= +void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); +} + +void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + Py_DECREF((PyObject *)GET_TC(tc)->attrList); +} + +int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj = (PyObject *)_obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject *attr; + PyObject *attrName; + char *attrStr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + if (itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = itemName = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + attr = PyUnicode_AsUTF8String(attrName); + attrStr = PyBytes_AS_STRING(attr); + + if (attrStr[0] == '_') { + Py_DECREF(attr); + continue; + } + + itemValue = PyObject_GetAttr(obj, attrName); + if (itemValue == NULL) { + PyErr_Clear(); + Py_DECREF(attr); + continue; + } + + if (PyCallable_Check(itemValue)) { + Py_DECREF(itemValue); + Py_DECREF(attr); + continue; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + + itemName = attr; + break; + } + + if (itemName == NULL) { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + return 1; +} + +JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +//============================================================================= +// List iteration functions +// itemValue is borrowed from object (which is list). No refcounting +//============================================================================= +void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj); +} + +int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index); + GET_TC(tc)->index++; + return 1; +} + +void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *List_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// pandas Index iteration functions +//============================================================================= +void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Index_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas Series iteration functions +//============================================================================= +void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas DataFrame iteration functions +//============================================================================= +void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + if (is_simple_frame(obj)) { + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + Py_INCREF(obj); + GET_TC(tc)->itemValue = obj; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// Dict iteration functions +// itemName might converted to string (Python_Str). Do refCounting +// itemValue is borrowed from object (which is dict). No refCounting +//============================================================================= +void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; +} + +int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *itemNameTmp; + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, + &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) { + return 0; + } + + if (PyUnicode_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + } else if (!PyBytes_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + itemNameTmp = GET_TC(tc)->itemName; + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + Py_DECREF(itemNameTmp); + } else { + Py_INCREF(GET_TC(tc)->itemName); + } + return 1; +} + +void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); +} + +JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +void NpyArr_freeLabels(char **labels, npy_intp len) { + npy_intp i; + + if (labels) { + for (i = 0; i < len; i++) { + PyObject_Free(labels[i]); + } + PyObject_Free(labels); + } +} + +/* + * Function: NpyArr_encodeLabels + * ----------------------------- + * + * Builds an array of "encoded" labels. + * + * labels: PyArrayObject pointer for labels to be "encoded" + * num : number of labels + * + * "encode" is quoted above because we aren't really doing encoding + * For historical reasons this function would actually encode the entire + * array into a separate buffer with a separate call to JSON_Encode + * and would leave it to complex pointer manipulation from there to + * unpack values as needed. To make things simpler and more idiomatic + * this has instead just stringified any input save for datetime values, + * which may need to be represented in various formats. + */ +char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, + npy_intp num) { + // NOTE this function steals a reference to labels. + PyObject *item = NULL; + size_t len; + npy_intp i, stride; + char **ret; + char *dataptr, *cLabel; + int type_num; + NPY_DATETIMEUNIT base = enc->datetimeUnit; + + if (!labels) { + return 0; + } + + if (PyArray_SIZE(labels) < num) { + PyErr_SetString( + PyExc_ValueError, + "Label array sizes do not match corresponding data shape"); + Py_DECREF(labels); + return 0; + } + + ret = PyObject_Malloc(sizeof(char *) * num); + if (!ret) { + PyErr_NoMemory(); + Py_DECREF(labels); + return 0; + } + + for (i = 0; i < num; i++) { + ret[i] = NULL; + } + + stride = PyArray_STRIDE(labels, 0); + dataptr = PyArray_DATA(labels); + type_num = PyArray_TYPE(labels); + + for (i = 0; i < num; i++) { + item = PyArray_GETITEM(labels, dataptr); + if (!item) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + int is_datetimelike = 0; + npy_int64 nanosecVal; + if (PyTypeNum_ISDATETIME(type_num)) { + is_datetimelike = 1; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, + "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(dataptr, &nanosecVal, 1, NULL, NULL); + } else if (PyDate_Check(item) || PyDelta_Check(item)) { + is_datetimelike = 1; + if (PyObject_HasAttrString(item, "value")) { + nanosecVal = get_long_attr(item, "value"); + } else { + if (PyDelta_Check(item)) { + nanosecVal = total_seconds(item) * + 1000000000LL; // nanoseconds per second + } else { + // datetime.* objects don't follow above rules + nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + } + } + } + + if (is_datetimelike) { + if (nanosecVal == get_nat()) { + len = 4; + cLabel = PyObject_Malloc(len + 1); + strncpy(cLabel, "null", len + 1); + } else { + if (enc->datetimeIso) { + if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { + cLabel = int64ToIsoDuration(nanosecVal, &len); + } else { + if (type_num == NPY_DATETIME) { + cLabel = int64ToIso(nanosecVal, base, &len); + } else { + cLabel = PyDateTimeToIso(item, base, &len); + } + } + if (cLabel == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + } else { + int size_of_cLabel = 21; // 21 chars for int 64 + cLabel = PyObject_Malloc(size_of_cLabel); + snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, + NpyDateTimeToEpoch(nanosecVal, base)); + len = strlen(cLabel); + } + } + } else { // Fallback to string representation + // Replace item with the string to keep it alive. + Py_SETREF(item, PyObject_Str(item)); + if (item == NULL) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(item); + len = strlen(cLabel); + } + + // Add 1 to include NULL terminator + ret[i] = PyObject_Malloc(len + 1); + memcpy(ret[i], cLabel, len + 1); + Py_DECREF(item); + + if (is_datetimelike) { + PyObject_Free(cLabel); + } + + if (PyErr_Occurred()) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + if (!ret[i]) { + PyErr_NoMemory(); + ret = 0; + break; + } + + dataptr += stride; + } + + Py_DECREF(labels); + return ret; +} + +void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) { + PyObject *tmpObj = NULL; + tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); + if (!PyErr_Occurred()) { + if (tmpObj == NULL) { + PyErr_SetString(PyExc_TypeError, + "Failed to execute default handler"); + } else { + encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0); + } + } + Py_XDECREF(tmpObj); + return; +} + +void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; + TypeContext *pc; + PyObjectEncoder *enc; + double val; + npy_int64 value; + int unit; + + tc->prv = NULL; + + if (!_obj) { + tc->type = JT_INVALID; + return; + } + + obj = (PyObject *)_obj; + enc = (PyObjectEncoder *)tc->encoder; + + if (PyBool_Check(obj)) { + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } else if (obj == Py_None) { + tc->type = JT_NULL; + return; + } + + pc = createTypeContext(); + if (!pc) { + tc->type = JT_INVALID; + return; + } + tc->prv = pc; + + if (PyTypeNum_ISDATETIME(enc->npyType)) { + int64_t longVal; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(enc->npyValue, &longVal, 1, NULL, NULL); + if (longVal == get_nat()) { + tc->type = JT_NULL; + } else { + if (enc->datetimeIso) { + if (enc->npyType == NPY_TIMEDELTA) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + } else { + pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + } + // Currently no way to pass longVal to iso function, so use + // state management + GET_TC(tc)->longValue = longVal; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); + tc->type = JT_LONG; + } + } + + // TODO(username): this prevents infinite loop with + // mixed-type DataFrames; + // refactor + enc->npyCtxtPassthru = NULL; + enc->npyType = -1; + return; + } + + if (PyIter_Check(obj) || + (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { + goto ISITERABLE; + } + + if (PyLong_Check(obj)) { + tc->type = JT_LONG; + int overflow = 0; + GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); + int err; + err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred(); + + if (overflow) { + tc->type = JT_BIGNUM; + } else if (err) { + goto INVALID; + } + + return; + } else if (PyFloat_Check(obj)) { + val = PyFloat_AS_DOUBLE(obj); + if (npy_isnan(val) || npy_isinf(val)) { + tc->type = JT_NULL; + } else { + GET_TC(tc)->doubleValue = val; + tc->type = JT_DOUBLE; + } + return; + } else if (PyBytes_Check(obj)) { + pc->PyTypeToUTF8 = PyBytesToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyUnicode_Check(obj)) { + pc->PyTypeToUTF8 = PyUnicodeToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyObject_TypeCheck(obj, type_decimal)) { + GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + tc->type = JT_DOUBLE; + return; + } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { + if (PyObject_TypeCheck(obj, cls_nat)) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyTime_Check(obj)) { + pc->PyTypeToUTF8 = PyTimeToJSON; + tc->type = JT_UTF8; + return; + } else if (PyArray_IsScalar(obj, Datetime)) { + if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyDelta_Check(obj)) { + if (PyObject_HasAttrString(obj, "value")) { + value = get_long_attr(obj, "value"); + } else { + value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec + } + + if (value == get_nat()) { + tc->type = JT_NULL; + return; + } else if (enc->datetimeIso) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + tc->type = JT_UTF8; + } else { + unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + if (scaleNanosecToUnit(&value, unit) != 0) { + // TODO(username): Add some kind of error handling here + } + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + tc->type = JT_LONG; + } + GET_TC(tc)->longValue = value; + return; + } else if (PyArray_IsScalar(obj, Integer)) { + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_INT64)); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + return; + } else if (PyArray_IsScalar(obj, Bool)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_BOOL)); + tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; + return; + } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), + PyArray_DescrFromType(NPY_DOUBLE)); + tc->type = JT_DOUBLE; + return; + } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { + PyErr_Format(PyExc_TypeError, + "%R (0d array) is not JSON serializable at the moment", + obj); + goto INVALID; + } else if (PyObject_TypeCheck(obj, cls_na)) { + tc->type = JT_NULL; + return; + } + +ISITERABLE: + + if (PyObject_TypeCheck(obj, cls_index)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (pc->newObj) { + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + } else { + goto INVALID; + } + + return; + } else if (PyObject_TypeCheck(obj, cls_series)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (!pc->newObj) { + goto INVALID; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = PyObject_GetAttrString(obj, "index"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + Py_DECREF(tmpObj); + if (!values) { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + if (!pc->columnLabels) { + goto INVALID; + } + } else { + tc->type = JT_ARRAY; + } + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyArray_Check(obj)) { + if (enc->npyCtxtPassthru) { + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterNext = NpyArr_iterNext; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + + enc->npyCtxtPassthru = NULL; + return; + } + + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyObject_TypeCheck(obj, cls_dataframe)) { + if (enc->blkCtxtPassthru) { + pc->pdblock = enc->blkCtxtPassthru; + tc->type = + (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = PdBlockPassThru_iterBegin; + pc->iterEnd = PdBlockPassThru_iterEnd; + pc->iterNext = PdBlock_iterNextItem; + pc->iterGetName = PdBlock_iterGetName; + pc->iterGetValue = NpyArr_iterGetValue; + + enc->blkCtxtPassthru = NULL; + return; + } + + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; + } + + if (is_simple_frame(obj)) { + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetName = NpyArr_iterGetName; + + pc->newObj = PyObject_GetAttrString(obj, "values"); + if (!pc->newObj) { + goto INVALID; + } + } else { + pc->iterBegin = PdBlock_iterBegin; + pc->iterEnd = PdBlock_iterEnd; + pc->iterNext = PdBlock_iterNext; + pc->iterGetName = PdBlock_iterGetName; + } + pc->iterGetValue = NpyArr_iterGetValue; + + if (enc->outputFormat == VALUES) { + tc->type = JT_ARRAY; + } else if (enc->outputFormat == RECORDS) { + tc->type = JT_ARRAY; + tmpObj = PyObject_GetAttrString(obj, "columns"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + goto INVALID; + } + } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "index") + : PyObject_GetAttrString(obj, "columns")); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->rowLabelsLen = PyObject_Size(tmpObj); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->rowLabelsLen); + Py_DECREF(tmpObj); + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "columns") + : PyObject_GetAttrString(obj, "index")); + if (!tmpObj) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + + if (enc->outputFormat == COLUMNS) { + pc->transpose = 1; + } + } else { + goto INVALID; + } + return; + } else if (PyDict_Check(obj)) { + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } else if (PyList_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } else if (PyTuple_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } else if (PyAnySet_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Set_iterBegin; + pc->iterEnd = Set_iterEnd; + pc->iterNext = Set_iterNext; + pc->iterGetValue = Set_iterGetValue; + pc->iterGetName = Set_iterGetName; + return; + } + + toDictFunc = PyObject_GetAttrString(obj, "toDict"); + + if (toDictFunc) { + PyObject *tuple = PyTuple_New(0); + PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); + + if (toDictResult == NULL) { + PyErr_Clear(); + tc->type = JT_NULL; + return; + } + + if (!PyDict_Check(toDictResult)) { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; + return; + } + + PyErr_Clear(); + + if (enc->defaultHandler) { + Object_invokeDefaultHandler(obj, enc); + goto INVALID; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + return; + +INVALID: + tc->type = JT_INVALID; + PyObject_Free(tc->prv); + tc->prv = NULL; + return; +} + +void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (tc->prv) { + Py_XDECREF(GET_TC(tc)->newObj); + GET_TC(tc)->newObj = NULL; + NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); + GET_TC(tc)->rowLabels = NULL; + NpyArr_freeLabels(GET_TC(tc)->columnLabels, + GET_TC(tc)->columnLabelsLen); + GET_TC(tc)->columnLabels = NULL; + PyObject_Free(GET_TC(tc)->cStr); + GET_TC(tc)->cStr = NULL; + PyObject_Free(tc->prv); + tc->prv = NULL; + } +} + +const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); +} + +JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->longValue; +} + +double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->doubleValue; +} + +const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + PyObject *repr = PyObject_Str(obj); + const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen); + char *bytes = PyObject_Malloc(*_outLen + 1); + memcpy(bytes, str, *_outLen + 1); + GET_TC(tc)->cStr = bytes; + + Py_DECREF(repr); + + return GET_TC(tc)->cStr; +} + +static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); } + +void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterBegin(obj, tc); +} + +int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterNext(obj, tc); +} + +void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterEnd(obj, tc); +} + +JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterGetValue(obj, tc); +} + +char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { + return GET_TC(tc)->iterGetName(obj, tc, outLen); +} + +PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, + PyObject *kwargs) { + static char *kwlist[] = {"obj", + "ensure_ascii", + "double_precision", + "encode_html_chars", + "orient", + "date_unit", + "iso_dates", + "default_handler", + "indent", + NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + int idoublePrecision = 10; // default double precision setting + PyObject *oencodeHTMLChars = NULL; + char *sOrient = NULL; + char *sdateFormat = NULL; + PyObject *oisoDates = 0; + PyObject *odefHandler = 0; + int indent = 0; + + PyObjectEncoder pyEncoder = {{ + Object_beginTypeContext, + Object_endTypeContext, + Object_getStringValue, + Object_getLongValue, + NULL, // getIntValue is unused + Object_getDoubleValue, + Object_getBigNumStringValue, + Object_iterBegin, + Object_iterNext, + Object_iterEnd, + Object_iterGetValue, + Object_iterGetName, + Object_releaseObject, + PyObject_Malloc, + PyObject_Realloc, + PyObject_Free, + -1, // recursionMax + idoublePrecision, + 1, // forceAscii + 0, // encodeHTMLChars + 0, // indent + }}; + JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.blkCtxtPassthru = NULL; + pyEncoder.npyType = -1; + pyEncoder.npyValue = NULL; + pyEncoder.datetimeIso = 0; + pyEncoder.datetimeUnit = NPY_FR_ms; + pyEncoder.outputFormat = COLUMNS; + pyEncoder.defaultHandler = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, + &oinput, &oensureAscii, &idoublePrecision, + &oencodeHTMLChars, &sOrient, &sdateFormat, + &oisoDates, &odefHandler, &indent)) { + return NULL; + } + + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) { + encoder->forceASCII = 0; + } + + if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) { + encoder->encodeHTMLChars = 1; + } + + if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) { + PyErr_Format( + PyExc_ValueError, + "Invalid value '%d' for option 'double_precision', max is '%u'", + idoublePrecision, JSON_DOUBLE_MAX_DECIMALS); + return NULL; + } + encoder->doublePrecision = idoublePrecision; + + if (sOrient != NULL) { + if (strcmp(sOrient, "records") == 0) { + pyEncoder.outputFormat = RECORDS; + } else if (strcmp(sOrient, "index") == 0) { + pyEncoder.outputFormat = INDEX; + } else if (strcmp(sOrient, "split") == 0) { + pyEncoder.outputFormat = SPLIT; + } else if (strcmp(sOrient, "values") == 0) { + pyEncoder.outputFormat = VALUES; + } else if (strcmp(sOrient, "columns") != 0) { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'orient'", sOrient); + return NULL; + } + } + + if (sdateFormat != NULL) { + if (strcmp(sdateFormat, "s") == 0) { + pyEncoder.datetimeUnit = NPY_FR_s; + } else if (strcmp(sdateFormat, "ms") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ms; + } else if (strcmp(sdateFormat, "us") == 0) { + pyEncoder.datetimeUnit = NPY_FR_us; + } else if (strcmp(sdateFormat, "ns") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ns; + } else { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'date_unit'", + sdateFormat); + return NULL; + } + } + + if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) { + pyEncoder.datetimeIso = 1; + } + + if (odefHandler != NULL && odefHandler != Py_None) { + if (!PyCallable_Check(odefHandler)) { + PyErr_SetString(PyExc_TypeError, "Default handler is not callable"); + return NULL; + } + pyEncoder.defaultHandler = odefHandler; + } + + encoder->indent = indent; + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); + if (PyErr_Occurred()) { + return NULL; + } + + if (encoder->errorMsg) { + if (ret != buffer) { + encoder->free(ret); + } + PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyUnicode_FromString(ret); + + if (ret != buffer) { + encoder->free(ret); + } + + return newobj; +} diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c new file mode 100644 index 00000000..5d4a5693 --- /dev/null +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -0,0 +1,81 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include "version.h" +#define PY_SSIZE_T_CLEAN +#include +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include "numpy/arrayobject.h" + +/* objToJSON */ +PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); +void *initObjToJSON(void); + +/* JSONToObj */ +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs); + +#define ENCODER_HELP_TEXT \ + "Use ensure_ascii=false to output UTF-8. Pass in double_precision to " \ + "alter the maximum digit precision of doubles. Set " \ + "encode_html_chars=True to encode < > & as unicode escape sequences." + +static PyMethodDef ujsonMethods[] = { + {"encode", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"decode", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {"dumps", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"loads", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static PyModuleDef moduledef = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_libjson", + .m_methods = ujsonMethods +}; + + +PyMODINIT_FUNC PyInit_json(void) { + import_array() + initObjToJSON(); // TODO(username): clean up, maybe via tp_free? + return PyModuleDef_Init(&moduledef); +} diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h new file mode 100644 index 00000000..15c55309 --- /dev/null +++ b/pandas/_libs/src/ujson/python/version.h @@ -0,0 +1,43 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ + +#define UJSON_VERSION "1.33" + +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ diff --git a/pandas/_libs/testing.pyi b/pandas/_libs/testing.pyi new file mode 100644 index 00000000..01da4969 --- /dev/null +++ b/pandas/_libs/testing.pyi @@ -0,0 +1,12 @@ +def assert_dict_equal(a, b, compare_keys: bool = ...): ... +def assert_almost_equal( + a, + b, + rtol: float = ..., + atol: float = ..., + check_dtype: bool = ..., + obj=..., + lobj=..., + robj=..., + index_values=..., +): ... diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx new file mode 100644 index 00000000..11d8fe6e --- /dev/null +++ b/pandas/_libs/testing.pyx @@ -0,0 +1,212 @@ +import cmath +import math + +import numpy as np + +from numpy cimport import_array + +import_array() + +from pandas._libs.util cimport ( + is_array, + is_complex_object, + is_real_number_object, +) + +from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) + + +cdef bint isiterable(obj): + return hasattr(obj, '__iter__') + + +cdef bint has_length(obj): + return hasattr(obj, '__len__') + + +cdef bint is_dictlike(obj): + return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + + +cpdef assert_dict_equal(a, b, bint compare_keys=True): + assert is_dictlike(a) and is_dictlike(b), ( + "Cannot compare dict objects, one or both is not dict-like" + ) + + a_keys = frozenset(a.keys()) + b_keys = frozenset(b.keys()) + + if compare_keys: + assert a_keys == b_keys + + for k in a_keys: + assert_almost_equal(a[k], b[k]) + + return True + + +cpdef assert_almost_equal(a, b, + rtol=1.e-5, atol=1.e-8, + bint check_dtype=True, + obj=None, lobj=None, robj=None, index_values=None): + """ + Check that left and right objects are almost equal. + + Parameters + ---------- + a : object + b : object + rtol : float, default 1e-5 + Relative tolerance. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. + + .. versionadded:: 1.1.0 + check_dtype: bool, default True + check dtype if both a and b are np.ndarray. + obj : str, default None + Specify object name being compared, internally used to show + appropriate assertion message. + lobj : str, default None + Specify left object name being compared, internally used to show + appropriate assertion message. + robj : str, default None + Specify right object name being compared, internally used to show + appropriate assertion message. + index_values : ndarray, default None + Specify shared index values of objects being compared, internally used + to show appropriate assertion message. + + .. versionadded:: 1.1.0 + + """ + cdef: + double diff = 0.0 + Py_ssize_t i, na, nb + double fa, fb + bint is_unequal = False, a_is_ndarray, b_is_ndarray + + if lobj is None: + lobj = a + if robj is None: + robj = b + + if isinstance(a, dict) or isinstance(b, dict): + return assert_dict_equal(a, b) + + if isinstance(a, str) or isinstance(b, str): + assert a == b, f"{a} != {b}" + return True + + a_is_ndarray = is_array(a) + b_is_ndarray = is_array(b) + + if obj is None: + if a_is_ndarray or b_is_ndarray: + obj = 'numpy array' + else: + obj = 'Iterable' + + if isiterable(a): + + if not isiterable(b): + from pandas._testing import assert_class_equal + + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + assert has_length(a) and has_length(b), ( + f"Can't compare objects without length, one or both is invalid: ({a}, {b})" + ) + + if a_is_ndarray and b_is_ndarray: + na, nb = a.size, b.size + if a.shape != b.shape: + from pandas._testing import raise_assert_detail + raise_assert_detail( + obj, f'{obj} shapes are different', a.shape, b.shape) + + if check_dtype and not is_dtype_equal(a.dtype, b.dtype): + from pandas._testing import assert_attr_equal + assert_attr_equal('dtype', a, b, obj=obj) + + if array_equivalent(a, b, strict_nan=True): + return True + + else: + na, nb = len(a), len(b) + + if na != nb: + from pandas._testing import raise_assert_detail + + # if we have a small diff set, print it + if abs(na - nb) < 10: + r = list(set(a) ^ set(b)) + else: + r = None + + raise_assert_detail(obj, f"{obj} length are different", na, nb, r) + + for i in range(len(a)): + try: + assert_almost_equal(a[i], b[i], rtol=rtol, atol=atol) + except AssertionError: + is_unequal = True + diff += 1 + + if is_unequal: + from pandas._testing import raise_assert_detail + msg = (f"{obj} values are different " + f"({np.round(diff * 100.0 / na, 5)} %)") + raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) + + return True + + elif isiterable(b): + from pandas._testing import assert_class_equal + + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + if isna(a) and isna(b): + # TODO: Should require same-dtype NA? + # nan / None comparison + return True + + if isna(a) and not isna(b) or not isna(a) and isna(b): + # boolean value of pd.NA is ambigous + raise AssertionError(f"{a} != {b}") + + if a == b: + # object comparison + return True + + if is_real_number_object(a) and is_real_number_object(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True + + fa, fb = a, b + + if not math.isclose(fa, fb, rel_tol=rtol, abs_tol=atol): + assert False, (f"expected {fb:.5f} but got {fa:.5f}, " + f"with rtol={rtol}, atol={atol}") + return True + + if is_complex_object(a) and is_complex_object(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True + + if not cmath.isclose(a, b, rel_tol=rtol, abs_tol=atol): + assert False, (f"expected {b:.5f} but got {a:.5f}, " + f"with rtol={rtol}, atol={atol}") + return True + + raise AssertionError(f"{a} != {b}") diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi new file mode 100644 index 00000000..2212f8db --- /dev/null +++ b/pandas/_libs/tslib.pyi @@ -0,0 +1,29 @@ +from datetime import tzinfo + +import numpy as np + +from pandas._typing import npt + +def format_array_from_datetime( + values: npt.NDArray[np.int64], + tz: tzinfo | None = ..., + format: str | None = ..., + na_rep: object = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.object_]: ... +def array_with_unit_to_datetime( + values: np.ndarray, + unit: str, + errors: str = ..., +) -> tuple[np.ndarray, tzinfo | None]: ... +def array_to_datetime( + values: npt.NDArray[np.object_], + errors: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool = ..., + require_iso8601: bool = ..., + allow_mixed: bool = ..., +) -> tuple[np.ndarray, tzinfo | None]: ... + +# returned ndarray may be object dtype or datetime64[ns] diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx new file mode 100644 index 00000000..a24a07b4 --- /dev/null +++ b/pandas/_libs/tslib.pyx @@ -0,0 +1,857 @@ +import warnings + +cimport cython +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + datetime, + import_datetime, + tzinfo, +) + +from pandas.util._exceptions import find_stack_level + +# import datetime C API +import_datetime() + + +cimport numpy as cnp +from numpy cimport ( + float64_t, + int64_t, + ndarray, +) + +import numpy as np + +cnp.import_array() + +import pytz + +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + check_dts_bounds, + dtstruct_to_dt64, + get_datetime64_value, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pydate_to_dt64, + pydatetime_to_dt64, + string_to_dts, +) +from pandas._libs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.conversion cimport ( + _TSObject, + cast_from_unit, + convert_datetime_to_tsobject, + get_datetime64_nanos, + precision_from_unit, +) +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare + +from pandas._libs.tslibs import ( + Resolution, + get_resolution, +) +from pandas._libs.tslibs.timestamps import Timestamp + +# Note: this is the only non-tslibs intra-pandas dependency here + +from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single + + +def _test_parse_iso8601(ts: str): + """ + TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used + only for testing, actual construction uses `convert_str_to_tsobject` + """ + cdef: + _TSObject obj + int out_local = 0, out_tzoffset = 0 + NPY_DATETIMEUNIT out_bestunit + + obj = _TSObject() + + if ts == 'now': + return Timestamp.utcnow() + elif ts == 'today': + return Timestamp.now().normalize() + + string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) + obj.value = dtstruct_to_dt64(&obj.dts) + check_dts_bounds(&obj.dts) + if out_local == 1: + obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) + return Timestamp(obj.value, tz=obj.tzinfo) + else: + return Timestamp(obj.value) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def format_array_from_datetime( + ndarray values, + tzinfo tz=None, + str format=None, + object na_rep=None, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +) -> np.ndarray: + """ + return a np object array of the string formatted values + + Parameters + ---------- + values : a 1-d i8 array + tz : tzinfo or None, default None + format : str or None, default None + a strftime capable string + na_rep : optional, default is None + a nat format + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + np.ndarray[object] + """ + cdef: + int64_t val, ns, N = values.size + bint show_ms = False, show_us = False, show_ns = False + bint basic_format = False, basic_format_day = False + _Timestamp ts + object res + npy_datetimestruct dts + + # Note that `result` (and thus `result_flat`) is C-order and + # `it` iterates C-order as well, so the iteration matches + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) + object[::1] res_flat = result.ravel() # should NOT be a copy + cnp.flatiter it = cnp.PyArray_IterNew(values) + + if na_rep is None: + na_rep = 'NaT' + + if tz is None: + # if we don't have a format nor tz, then choose + # a format based on precision + basic_format = format is None + if basic_format: + reso_obj = get_resolution(values, tz=tz, reso=reso) + show_ns = reso_obj == Resolution.RESO_NS + show_us = reso_obj == Resolution.RESO_US + show_ms = reso_obj == Resolution.RESO_MS + + elif format == "%Y-%m-%d %H:%M:%S": + # Same format as default, but with hardcoded precision (s) + basic_format = True + show_ns = show_us = show_ms = False + + elif format == "%Y-%m-%d %H:%M:%S.%f": + # Same format as default, but with hardcoded precision (us) + basic_format = show_us = True + show_ns = show_ms = False + + elif format == "%Y-%m-%d": + # Default format for dates + basic_format_day = True + + assert not (basic_format_day and basic_format) + + for i in range(N): + # Analogous to: utc_val = values[i] + val = (cnp.PyArray_ITER_DATA(it))[0] + + if val == NPY_NAT: + res = na_rep + elif basic_format_day: + + pandas_datetime_to_datetimestruct(val, reso, &dts) + res = f'{dts.year}-{dts.month:02d}-{dts.day:02d}' + + elif basic_format: + + pandas_datetime_to_datetimestruct(val, reso, &dts) + res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + + if show_ns: + ns = dts.ps // 1000 + res += f'.{ns + dts.us * 1000:09d}' + elif show_us: + res += f'.{dts.us:06d}' + elif show_ms: + res += f'.{dts.us // 1000:03d}' + + else: + + ts = Timestamp._from_value_and_reso(val, reso=reso, tz=tz) + if format is None: + # Use datetime.str, that returns ts.isoformat(sep=' ') + res = str(ts) + else: + + # invalid format string + # requires dates > 1900 + try: + # Note: dispatches to pydatetime + res = ts.strftime(format) + except ValueError: + # Use datetime.str, that returns ts.isoformat(sep=' ') + res = str(ts) + + # Note: we can index result directly instead of using PyArray_MultiIter_DATA + # like we do for the other functions because result is known C-contiguous + # and is the first argument to PyArray_MultiIterNew2. The usual pattern + # does not seem to work with object dtype. + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + res_flat[i] = res + + cnp.PyArray_ITER_NEXT(it) + + return result + + +def array_with_unit_to_datetime( + ndarray values, + str unit, + str errors="coerce" +): + """ + Convert the ndarray to datetime according to the time unit. + + This function converts an array of objects into a numpy array of + datetime64[ns]. It returns the converted array + and also returns the timezone offset + + if errors: + - raise: return converted values or raise OutOfBoundsDatetime + if out of range on the conversion or + ValueError for other conversions (e.g. a string) + - ignore: return non-convertible values as the same unit + - coerce: NaT for non-convertibles + + Parameters + ---------- + values : ndarray + Date-like objects to convert. + unit : str + Time unit to use during conversion. + errors : str, default 'raise' + Error behavior when parsing. + + Returns + ------- + result : ndarray of m8 values + tz : parsed timezone offset or None + """ + cdef: + Py_ssize_t i, j, n=len(values) + int64_t mult + int prec = 0 + ndarray[float64_t] fvalues + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_raise = errors=='raise' + bint need_to_iterate = True + ndarray[int64_t] iresult + ndarray[object] oresult + ndarray mask + object tz = None + + assert is_ignore or is_coerce or is_raise + + if unit == "ns": + if issubclass(values.dtype.type, (np.integer, np.float_)): + result = values.astype("M8[ns]", copy=False) + else: + result, tz = array_to_datetime( + values.astype(object, copy=False), + errors=errors, + ) + return result, tz + + mult, _ = precision_from_unit(unit) + + if is_raise: + # try a quick conversion to i8/f8 + # if we have nulls that are not type-compat + # then need to iterate + + if values.dtype.kind in ["i", "f", "u"]: + iresult = values.astype("i8", copy=False) + # fill missing values by comparing to NPY_NAT + mask = iresult == NPY_NAT + iresult[mask] = 0 + fvalues = iresult.astype("f8") * mult + need_to_iterate = False + + if not need_to_iterate: + # check the bounds + if (fvalues < Timestamp.min.value).any() or ( + (fvalues > Timestamp.max.value).any() + ): + raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") + + if values.dtype.kind in ["i", "u"]: + result = (iresult * mult).astype("M8[ns]") + + elif values.dtype.kind == "f": + fresult = (values * mult).astype("f8") + fresult[mask] = 0 + if prec: + fresult = round(fresult, prec) + result = fresult.astype("M8[ns]", copy=False) + + iresult = result.view("i8") + iresult[mask] = NPY_NAT + + return result, tz + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + else: + try: + iresult[i] = cast_from_unit(val, unit) + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + + else: + try: + iresult[i] = cast_from_unit(float(val), unit) + except ValueError: + if is_raise: + raise ValueError( + f"non convertible value {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit '{unit}'" + ) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + else: + + if is_raise: + raise ValueError( + f"unit='{unit}' not valid with non-numerical val='{val}'" + ) + if is_ignore: + raise AssertionError + + iresult[i] = NPY_NAT + + return result, tz + + except AssertionError: + pass + + # we have hit an exception + # and are in ignore mode + # redo as object + + oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) + for i in range(n): + val = values[i] + + if checknull_with_nat_and_na(val): + oresult[i] = NaT + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + oresult[i] = NaT + else: + try: + oresult[i] = Timestamp(cast_from_unit(val, unit)) + except OverflowError: + oresult[i] = val + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + oresult[i] = NaT + + else: + oresult[i] = val + + return oresult, tz + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef array_to_datetime( + ndarray[object] values, + str errors='raise', + bint dayfirst=False, + bint yearfirst=False, + bint utc=False, + bint require_iso8601=False, + bint allow_mixed=False, +): + """ + Converts a 1D array of date-like values to a numpy array of either: + 1) datetime64[ns] data + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError + is encountered + + Also returns a pytz.FixedOffset if an array of strings with the same + timezone offset is passed and utc=True is not passed. Otherwise, None + is returned + + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, + strings + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + utc : bool, default False + indicator whether the dates should be UTC + require_iso8601 : bool, default False + indicator whether the datetime string should be iso8601 + allow_mixed : bool, default False + Whether to allow mixed datetimes and integers. + + Returns + ------- + np.ndarray + May be datetime64[ns] or object dtype + tzinfo or None + """ + cdef: + Py_ssize_t i, n = len(values) + object val, tz + ndarray[int64_t] iresult + ndarray[object] oresult + npy_datetimestruct dts + NPY_DATETIMEUNIT out_bestunit + bint utc_convert = bool(utc) + bint seen_integer = False + bint seen_string = False + bint seen_datetime = False + bint seen_datetime_offset = False + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_same_offsets + _TSObject _ts + int64_t value + int out_local = 0, out_tzoffset = 0 + float offset_seconds, tz_offset + set out_tzoffset_vals = set() + bint string_to_dts_failed + datetime py_dt + tzinfo tz_out = None + bint found_tz = False, found_naive = False + + # specify error conditions + assert is_raise or is_ignore or is_coerce + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + try: + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + + elif PyDateTime_Check(val): + seen_datetime = True + if val.tzinfo is not None: + found_tz = True + if utc_convert: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + elif found_naive: + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + elif tz_out is not None and not tz_compare(tz_out, val.tzinfo): + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + else: + found_tz = True + tz_out = val.tzinfo + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + + else: + found_naive = True + if found_tz and not utc_convert: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + if isinstance(val, _Timestamp): + iresult[i] = val.value + else: + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + + elif PyDate_Check(val): + seen_datetime = True + iresult[i] = pydate_to_dt64(val, &dts) + check_dts_bounds(&dts) + + elif is_datetime64_object(val): + seen_datetime = True + iresult[i] = get_datetime64_nanos(val) + + elif is_integer_object(val) or is_float_object(val): + # these must be ns unit by-definition + seen_integer = True + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + elif is_raise or is_ignore: + iresult[i] = val + else: + # coerce + # we now need to parse this as if unit='ns' + # we can ONLY accept integers at this point + # if we have previously (or in future accept + # datetimes/strings, then we must coerce) + try: + iresult[i] = cast_from_unit(val, 'ns') + except OverflowError: + iresult[i] = NPY_NAT + + elif isinstance(val, str): + # string + seen_string = True + if type(val) is not str: + # GH#32264 np.str_ object + val = str(val) + + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + continue + + string_to_dts_failed = string_to_dts( + val, &dts, &out_bestunit, &out_local, + &out_tzoffset, False + ) + if string_to_dts_failed: + # An error at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i], utc): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError( + f"time data \"{val}\" at position {i} doesn't match format specified" + ) + return values, tz_out + + try: + py_dt = parse_datetime_string(val, + dayfirst=dayfirst, + yearfirst=yearfirst) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + + except (ValueError, OverflowError): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise TypeError(f"invalid string coercion to datetime for \"{val}\" at position {i}") + + if tz is not None: + seen_datetime_offset = True + # dateutil timezone objects cannot be hashed, so + # store the UTC offsets in seconds instead + out_tzoffset_vals.add(tz.total_seconds()) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + + _ts = convert_datetime_to_tsobject(py_dt, None) + iresult[i] = _ts.value + if not string_to_dts_failed: + # No error reported by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + seen_datetime_offset = True + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + out_tzoffset_vals.add(out_tzoffset * 60.) + tz = pytz.FixedOffset(out_tzoffset) + value = tz_localize_to_utc_single(value, tz) + out_local = 0 + out_tzoffset = 0 + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + iresult[i] = value + check_dts_bounds(&dts) + + else: + if is_coerce: + iresult[i] = NPY_NAT + else: + raise TypeError(f"{type(val)} is not convertible to datetime") + + except OutOfBoundsDatetime as ex: + ex.args = (str(ex) + f" present at position {i}", ) + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601 and isinstance(val, str): + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_raise: + + # Still raise OutOfBoundsDatetime, + # as error message is informative. + raise + + assert is_ignore + return values, tz_out + raise + + except OutOfBoundsDatetime: + if is_raise: + raise + + return ignore_errors_out_of_bounds_fallback(values), tz_out + + except TypeError: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif allow_mixed: + pass + elif is_raise: + raise ValueError("mixed datetimes and integers in passed array") + else: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + + if seen_datetime_offset and not utc_convert: + # GH#17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzoffset_vals) == 1 + if not is_same_offsets: + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + else: + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) + return result, tz_out + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values): + """ + Fallback for array_to_datetime if an OutOfBoundsDatetime is raised + and errors == "ignore" + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + + oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) + + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat_and_na(val): + if isinstance(val, float): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef _array_to_datetime_object( + ndarray[object] values, + str errors, + bint dayfirst=False, + bint yearfirst=False, +): + """ + Fall back function for array_to_datetime + + Attempts to parse datetime strings with dateutil to return an array + of datetime objects + + Parameters + ---------- + values : ndarray[object] + date-like objects to convert + errors : str + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + + Returns + ------- + np.ndarray[object] + Literal[None] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + bint is_ignore = errors == 'ignore' + bint is_coerce = errors == 'coerce' + bint is_raise = errors == 'raise' + ndarray[object] oresult + npy_datetimestruct dts + + assert is_raise or is_ignore or is_coerce + + oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) + + # We return an object array and only attempt to parse: + # 1) NaT or NaT-like values + # 2) datetime strings, which we return as datetime.datetime + # 3) special strings - "now" & "today" + for i in range(n): + val = values[i] + if checknull_with_nat_and_na(val) or PyDateTime_Check(val): + # GH 25978. No need to parse NaT-like or datetime-like vals + oresult[i] = val + elif isinstance(val, str): + if type(val) is not str: + # GH#32264 np.str_ objects + val = str(val) + + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except (ValueError, OverflowError) as ex: + ex.args = (f"{ex} present at position {i}", ) + if is_coerce: + oresult[i] = NaT + continue + if is_raise: + raise + return values, None + else: + if is_raise: + raise + return values, None + return oresult, None + + +cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): + # We delay this check for as long as possible + # because it catches relatively rare cases + if val == "now": + iresult[0] = Timestamp.utcnow().value + if not utc: + # GH#18705 make sure to_datetime("now") matches Timestamp("now") + warnings.warn( + "The parsing of 'now' in pd.to_datetime without `utc=True` is " + "deprecated. In a future version, this will match Timestamp('now') " + "and Timestamp.now()", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return True + elif val == "today": + iresult[0] = Timestamp.today().value + return True + return False diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py new file mode 100644 index 00000000..47143b32 --- /dev/null +++ b/pandas/_libs/tslibs/__init__.py @@ -0,0 +1,81 @@ +__all__ = [ + "dtypes", + "localize_pydatetime", + "NaT", + "NaTType", + "iNaT", + "nat_strings", + "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", + "IncompatibleFrequency", + "Period", + "Resolution", + "Timedelta", + "normalize_i8_timestamps", + "is_date_array_normalized", + "dt64arr_to_periodarr", + "delta_to_nanoseconds", + "ints_to_pydatetime", + "ints_to_pytimedelta", + "get_resolution", + "Timestamp", + "tz_convert_from_utc_single", + "tz_convert_from_utc", + "to_offset", + "Tick", + "BaseOffset", + "tz_compare", + "is_unitless", + "astype_overflowsafe", + "get_unit_from_dtype", + "periods_per_day", + "periods_per_second", + "is_supported_unit", +] + +from pandas._libs.tslibs import dtypes +from pandas._libs.tslibs.conversion import localize_pydatetime +from pandas._libs.tslibs.dtypes import ( + Resolution, + is_supported_unit, + periods_per_day, + periods_per_second, +) +from pandas._libs.tslibs.nattype import ( + NaT, + NaTType, + iNaT, + nat_strings, +) +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, + astype_overflowsafe, + is_unitless, + py_get_unit_from_dtype as get_unit_from_dtype, +) +from pandas._libs.tslibs.offsets import ( + BaseOffset, + Tick, + to_offset, +) +from pandas._libs.tslibs.period import ( + IncompatibleFrequency, + Period, +) +from pandas._libs.tslibs.timedeltas import ( + Timedelta, + delta_to_nanoseconds, + ints_to_pytimedelta, +) +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._libs.tslibs.timezones import tz_compare +from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single +from pandas._libs.tslibs.vectorized import ( + dt64arr_to_periodarr, + get_resolution, + ints_to_pydatetime, + is_date_array_normalized, + normalize_i8_timestamps, + tz_convert_from_utc, +) diff --git a/pandas/_libs/tslibs/base.pxd b/pandas/_libs/tslibs/base.pxd new file mode 100644 index 00000000..3bffff7a --- /dev/null +++ b/pandas/_libs/tslibs/base.pxd @@ -0,0 +1,5 @@ +from cpython.datetime cimport datetime + + +cdef class ABCTimestamp(datetime): + pass diff --git a/pandas/_libs/tslibs/base.pyx b/pandas/_libs/tslibs/base.pyx new file mode 100644 index 00000000..1677a8b0 --- /dev/null +++ b/pandas/_libs/tslibs/base.pyx @@ -0,0 +1,12 @@ +""" +We define base classes that will be inherited by Timestamp, Timedelta, etc +in order to allow for fast isinstance checks without circular dependency issues. + +This is analogous to core.dtypes.generic. +""" + +from cpython.datetime cimport datetime + + +cdef class ABCTimestamp(datetime): + pass diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd new file mode 100644 index 00000000..341f2176 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -0,0 +1,20 @@ +from cython cimport Py_ssize_t +from numpy cimport ( + int32_t, + int64_t, +) + +ctypedef (int32_t, int32_t, int32_t) iso_calendar_t + +cdef int dayofweek(int y, int m, int d) nogil +cdef bint is_leapyear(int64_t year) nogil +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil +cpdef int32_t get_day_of_year(int year, int month, int day) nogil +cpdef int get_lastbday(int year, int month) nogil +cpdef int get_firstbday(int year, int month) nogil + +cdef dict c_MONTH_NUMBERS + +cdef int32_t* month_offset diff --git a/pandas/_libs/tslibs/ccalendar.pyi b/pandas/_libs/tslibs/ccalendar.pyi new file mode 100644 index 00000000..993f18a6 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pyi @@ -0,0 +1,12 @@ +DAYS: list[str] +MONTH_ALIASES: dict[int, str] +MONTH_NUMBERS: dict[str, int] +MONTHS: list[str] +int_to_weekday: dict[int, str] + +def get_firstbday(year: int, month: int) -> int: ... +def get_lastbday(year: int, month: int) -> int: ... +def get_day_of_year(year: int, month: int, day: int) -> int: ... +def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ... +def get_week_of_year(year: int, month: int, day: int) -> int: ... +def get_days_in_month(year: int, month: int) -> int: ... diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx new file mode 100644 index 00000000..00ee15b7 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -0,0 +1,289 @@ +# cython: boundscheck=False +""" +Cython implementations of functions resembling the stdlib calendar module +""" + +cimport cython +from numpy cimport ( + int32_t, + int64_t, +) + +# ---------------------------------------------------------------------- +# Constants + +# Slightly more performant cython lookups than a 2D table +# The first 12 entries correspond to month lengths for non-leap years. +# The remaining 12 entries give month lengths for leap years +cdef int32_t* days_per_month_array = [ + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + +# The first 13 entries give the month days elapsed as of the first of month N +# (or the total number of days in the year for N=13) in non-leap years. +# The remaining 13 entries give the days elapsed in leap years. +cdef int32_t* month_offset = [ + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] + +# Canonical location for other modules to find name constants +MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +# The first blank line is consistent with calendar.month_name in the calendar +# standard library +MONTHS_FULL = ['', 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', + 'December'] +MONTH_NUMBERS = {name: num for num, name in enumerate(MONTHS)} +cdef dict c_MONTH_NUMBERS = MONTH_NUMBERS +MONTH_ALIASES = {(num + 1): name for num, name in enumerate(MONTHS)} +MONTH_TO_CAL_NUM = {name: num + 1 for num, name in enumerate(MONTHS)} + +DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] +DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', + 'Saturday', 'Sunday'] +int_to_weekday = {num: name for num, name in enumerate(DAYS)} +weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} + + +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil: + """ + Return the number of days in the given month of the given year. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + days_in_month : int + + Notes + ----- + Assumes that the arguments are valid. Passing a month not between 1 and 12 + risks a segfault. + """ + return days_per_month_array[12 * is_leapyear(year) + month - 1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision +cdef int dayofweek(int y, int m, int d) nogil: + """ + Find the day of week for the date described by the Y/M/D triple y, m, d + using Sakamoto's method, from wikipedia. + + 0 represents Monday. See [1]_. + + Parameters + ---------- + y : int + m : int + d : int + + Returns + ------- + weekday : int + + Notes + ----- + Assumes that y, m, d, represents a valid date. + + See Also + -------- + [1] https://docs.python.org/3/library/calendar.html#calendar.weekday + + [2] https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods + """ + cdef: + int day + + y -= m < 3 + day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 + # convert to python day + return (day + 6) % 7 + + +cdef bint is_leapyear(int64_t year) nogil: + """ + Returns 1 if the given year is a leap year, 0 otherwise. + + Parameters + ---------- + year : int + + Returns + ------- + is_leap : bool + """ + return ((year & 0x3) == 0 and # year % 4 == 0 + ((year % 100) != 0 or (year % 400) == 0)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_week_of_year(int year, int month, int day) nogil: + """ + Return the ordinal week-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + week_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + return get_iso_calendar(year, month, day)[1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil: + """ + Return the year, week, and day of year corresponding to ISO 8601 + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + year : int32_t + week : int32_t + day : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + int32_t doy, dow + int32_t iso_year, iso_week + + doy = get_day_of_year(year, month, day) + dow = dayofweek(year, month, day) + + # estimate + iso_week = (doy - 1) - dow + 3 + if iso_week >= 0: + iso_week = iso_week // 7 + 1 + + # verify + if iso_week < 0: + if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)): + iso_week = 53 + else: + iso_week = 52 + elif iso_week == 53: + if 31 - day + dow < 3: + iso_week = 1 + + iso_year = year + if iso_week == 1 and month == 12: + iso_year += 1 + + elif iso_week >= 52 and month == 1: + iso_year -= 1 + + return iso_year, iso_week, dow + 1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_day_of_year(int year, int month, int day) nogil: + """ + Return the ordinal day-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + day_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap + int32_t mo_off + int day_of_year + + isleap = is_leapyear(year) + + mo_off = month_offset[isleap * 13 + month - 1] + + day_of_year = mo_off + day + return day_of_year + + +# --------------------------------------------------------------------- +# Business Helpers + +cpdef int get_lastbday(int year, int month) nogil: + """ + Find the last day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + last_bday : int + """ + cdef: + int wkday, days_in_month + + wkday = dayofweek(year, month, 1) + days_in_month = get_days_in_month(year, month) + return days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0) + + +cpdef int get_firstbday(int year, int month) nogil: + """ + Find the first day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + first_bday : int + """ + cdef: + int first, wkday + + wkday = dayofweek(year, month, 1) + first = 1 + if wkday == 5: # on Saturday + first = 3 + elif wkday == 6: # on Sunday + first = 2 + return first diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd new file mode 100644 index 00000000..637a8499 --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pxd @@ -0,0 +1,39 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) +from numpy cimport ( + int32_t, + int64_t, + ndarray, +) + +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, +) + + +cdef class _TSObject: + cdef readonly: + npy_datetimestruct dts # npy_datetimestruct + int64_t value # numpy dt64 + tzinfo tzinfo + bint fold + + +cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst, bint yearfirst, + int32_t nanos=*) + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, + int32_t nanos=*, + NPY_DATETIMEUNIT reso=*) + +cdef int64_t get_datetime64_nanos(object val) except? -1 + +cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) +cdef int64_t cast_from_unit(object ts, str unit) except? -1 +cpdef (int64_t, int) precision_from_unit(str unit) + +cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi new file mode 100644 index 00000000..d564d767 --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pyi @@ -0,0 +1,14 @@ +from datetime import ( + datetime, + tzinfo, +) + +import numpy as np + +DT64NS_DTYPE: np.dtype +TD64NS_DTYPE: np.dtype + +def precision_from_unit( + unit: str, +) -> tuple[int, int]: ... # (int64_t, _) +def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ... diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx new file mode 100644 index 00000000..519bc656 --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pyx @@ -0,0 +1,655 @@ +cimport cython + +import warnings + +import numpy as np + +from pandas.util._exceptions import find_stack_level + +cimport numpy as cnp +from cpython.object cimport PyObject +from numpy cimport ( + int32_t, + int64_t, + intp_t, + ndarray, +) + +cnp.import_array() + +import pytz + +# stdlib datetime imports + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + datetime, + import_datetime, + time, + tzinfo, +) + +import_datetime() + +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.dtypes cimport ( + abbrev_to_npy_unit, + periods_per_second, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + astype_overflowsafe, + check_dts_bounds, + dtstruct_to_dt64, + get_datetime64_unit, + get_datetime64_value, + get_implementation_bounds, + get_unit_from_dtype, + npy_datetime, + npy_datetimestruct, + npy_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, + pydatetime_to_dt64, + pydatetime_to_dtstruct, + string_to_dts, +) + +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) + +from pandas._libs.tslibs.timezones cimport ( + get_utcoffset, + is_utc, + maybe_get_tz, + tz_compare, + utc_pytz as UTC, +) +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.tzconversion cimport ( + Localizer, + tz_localize_to_utc_single, +) + +# ---------------------------------------------------------------------- +# Constants + +DT64NS_DTYPE = np.dtype('M8[ns]') +TD64NS_DTYPE = np.dtype('m8[ns]') + + +# ---------------------------------------------------------------------- +# Unit Conversion Helpers + +cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: + """ + Return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p. + + Parameters + ---------- + ts : int, float, or None + unit : str + + Returns + ------- + int64_t + """ + cdef: + int64_t m + int p + + m, p = precision_from_unit(unit) + + # just give me the unit back + if ts is None: + return m + + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int + base = ts + frac = ts - base + if p: + frac = round(frac, p) + return (base * m) + (frac * m) + + +cpdef inline (int64_t, int) precision_from_unit(str unit): + """ + Return a casting of the unit represented to nanoseconds + the precision + to round the fractional part. + + Notes + ----- + The caller is responsible for ensuring that the default value of "ns" + takes the place of None. + """ + cdef: + int64_t m + int p + NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) + + if reso == NPY_DATETIMEUNIT.NPY_FR_Y: + # each 400 years we have 97 leap years, for an average of 97/400=.2425 + # extra days each year. We get 31556952 by writing + # 3600*24*365.2425=31556952 + m = 1_000_000_000 * 31556952 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_M: + # 2629746 comes from dividing the "Y" case by 12. + m = 1_000_000_000 * 2629746 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_W: + m = 1_000_000_000 * 3600 * 24 * 7 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_D: + m = 1_000_000_000 * 3600 * 24 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_h: + m = 1_000_000_000 * 3600 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_m: + m = 1_000_000_000 * 60 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + m = 1_000_000_000 + p = 9 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + m = 1_000_000 + p = 6 + elif reso == NPY_DATETIMEUNIT.NPY_FR_us: + m = 1000 + p = 3 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + m = 1 + p = 0 + else: + raise ValueError(f"cannot cast unit {unit}") + return m, p + + +cdef inline int64_t get_datetime64_nanos(object val) except? -1: + """ + Extract the value and unit from a np.datetime64 object, then convert the + value to nanoseconds if necessary. + """ + cdef: + npy_datetimestruct dts + NPY_DATETIMEUNIT unit + npy_datetime ival + + ival = get_datetime64_value(val) + if ival == NPY_NAT: + return NPY_NAT + + unit = get_datetime64_unit(val) + + if unit != NPY_FR_ns: + pandas_datetime_to_datetimestruct(ival, unit, &dts) + check_dts_bounds(&dts) + ival = dtstruct_to_dt64(&dts) + + return ival + + +# ---------------------------------------------------------------------- +# _TSObject Conversion + +# lightweight C object to hold datetime & int64 pair +cdef class _TSObject: + # cdef: + # npy_datetimestruct dts # npy_datetimestruct + # int64_t value # numpy dt64 + # tzinfo tzinfo + # bint fold + + def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default + self.fold = 0 + + +cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst, bint yearfirst, int32_t nanos=0): + """ + Extract datetime and int64 from any of: + - np.int64 (with unit providing a possible modifier) + - np.datetime64 + - a float (with unit providing a possible modifier) + - python int or long object (with unit providing a possible modifier) + - iso8601 string object + - python datetime object + - another timestamp object + + Raises + ------ + OutOfBoundsDatetime : ts cannot be converted within implementation bounds + """ + cdef: + _TSObject obj + + obj = _TSObject() + + if isinstance(ts, str): + return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + if ts is None or ts is NaT: + obj.value = NPY_NAT + elif is_datetime64_object(ts): + obj.value = get_datetime64_nanos(ts) + if obj.value != NPY_NAT: + pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts) + elif is_integer_object(ts): + try: + ts = ts + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp {ts}") + if ts == NPY_NAT: + obj.value = NPY_NAT + else: + if unit in ["Y", "M"]: + # GH#47266 cast_from_unit leads to weird results e.g. with "Y" + # and 150 we'd get 2120-01-01 09:00:00 + ts = np.datetime64(ts, unit) + return convert_to_tsobject(ts, tz, None, False, False) + + ts = ts * cast_from_unit(None, unit) + obj.value = ts + pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) + elif is_float_object(ts): + if ts != ts or ts == NPY_NAT: + obj.value = NPY_NAT + else: + if unit in ["Y", "M"]: + if ts == int(ts): + # GH#47266 Avoid cast_from_unit, which would give weird results + # e.g. with "Y" and 150.0 we'd get 2120-01-01 09:00:00 + return convert_to_tsobject(int(ts), tz, unit, False, False) + else: + # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01, + # but not clear what 2.5 "M" corresponds to, so we will + # disallow that case. + warnings.warn( + "Conversion of non-round float with unit={unit} is ambiguous " + "and will raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + ts = cast_from_unit(ts, unit) + obj.value = ts + pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) + elif PyDateTime_Check(ts): + return convert_datetime_to_tsobject(ts, tz, nanos) + elif PyDate_Check(ts): + # Keep the converter same as PyDateTime's + ts = datetime.combine(ts, time()) + return convert_datetime_to_tsobject(ts, tz) + else: + from .period import Period + if isinstance(ts, Period): + raise ValueError("Cannot convert Period to Timestamp " + "unambiguously. Use to_timestamp") + raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to ' + f'Timestamp') + + maybe_localize_tso(obj, tz, NPY_FR_ns) + return obj + + +cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): + if tz is not None: + _localize_tso(obj, tz, reso) + + if obj.value != NPY_NAT: + # check_overflows needs to run after _localize_tso + check_dts_bounds(&obj.dts, reso) + check_overflows(obj, reso) + + +cdef _TSObject convert_datetime_to_tsobject( + datetime ts, + tzinfo tz, + int32_t nanos=0, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): + """ + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + int64_t pps + + obj.fold = ts.fold + if tz is not None: + tz = maybe_get_tz(tz) + + if ts.tzinfo is not None: + # Convert the current timezone to the passed timezone + ts = ts.astimezone(tz) + pydatetime_to_dtstruct(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + pydatetime_to_dtstruct(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + pydatetime_to_dtstruct(ts, &obj.dts) + obj.tzinfo = tz + else: + pydatetime_to_dtstruct(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if isinstance(ts, ABCTimestamp): + obj.dts.ps = ts.nanosecond * 1000 + + if nanos: + obj.dts.ps = nanos * 1000 + + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + pps = periods_per_second(reso) + obj.value -= int(offset.total_seconds() * pps) + + check_dts_bounds(&obj.dts, reso) + check_overflows(obj, reso) + return obj + + +cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, + int tzoffset, tzinfo tz=None): + """ + Convert a datetimestruct `dts`, along with initial timezone offset + `tzoffset` to a _TSObject (with timezone object `tz` - optional). + + Parameters + ---------- + dts: npy_datetimestruct + tzoffset: int + tz : tzinfo or None + timezone for the timezone-aware output. + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + int64_t value # numpy dt64 + datetime dt + Py_ssize_t pos + + value = dtstruct_to_dt64(&dts) + obj.dts = dts + obj.tzinfo = pytz.FixedOffset(tzoffset) + obj.value = tz_localize_to_utc_single(value, obj.tzinfo) + if tz is None: + check_overflows(obj, NPY_FR_ns) + return obj + + cdef: + Localizer info = Localizer(tz, NPY_FR_ns) + + # Infer fold from offset-adjusted obj.value + # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + if info.use_utc: + pass + elif info.use_tzlocal: + info.utc_val_to_local_val(obj.value, &pos, &obj.fold) + elif info.use_dst and not info.use_pytz: + # i.e. dateutil + info.utc_val_to_local_val(obj.value, &pos, &obj.fold) + + # Keep the converter same as PyDateTime's + dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo, fold=obj.fold) + obj = convert_datetime_to_tsobject( + dt, tz, nanos=obj.dts.ps // 1000) + return obj + + +cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, + bint dayfirst=False, + bint yearfirst=False): + """ + Convert a string input `ts`, along with optional timezone object`tz` + to a _TSObject. + + The optional arguments `dayfirst` and `yearfirst` are passed to the + dateutil parser. + + Parameters + ---------- + ts : str + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + unit : str or None + dayfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "3/4/1975" as + April 3, as opposed to the standard US interpretation March 4. + yearfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "01/05/09" + as "May 9, 2001", as opposed to the default "Jan 5, 2009" + + Returns + ------- + obj : _TSObject + """ + cdef: + npy_datetimestruct dts + int out_local = 0, out_tzoffset = 0, string_to_dts_failed + datetime dt + int64_t ival + NPY_DATETIMEUNIT out_bestunit + + if len(ts) == 0 or ts in nat_strings: + ts = NaT + obj = _TSObject() + obj.value = NPY_NAT + obj.tzinfo = tz + return obj + elif ts == 'now': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns utc + dt = datetime.now(tz) + elif ts == 'today': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns a normalized datetime + dt = datetime.now(tz) + # equiv: datetime.today().replace(tzinfo=tz) + else: + string_to_dts_failed = string_to_dts( + ts, &dts, &out_bestunit, &out_local, + &out_tzoffset, False + ) + if not string_to_dts_failed: + try: + check_dts_bounds(&dts) + if out_local == 1: + return _create_tsobject_tz_using_offset(dts, + out_tzoffset, tz) + else: + ival = dtstruct_to_dt64(&dts) + if tz is not None: + # shift for _localize_tso + ival = tz_localize_to_utc_single(ival, tz, + ambiguous="raise") + + return convert_to_tsobject(ival, tz, None, False, False) + + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to dateutil + # parser will return incorrect result because it will ignore + # nanoseconds + raise + + except ValueError: + # Fall through to parse_datetime_string + pass + + try: + dt = parse_datetime_string(ts, dayfirst=dayfirst, + yearfirst=yearfirst) + except (ValueError, OverflowError): + raise ValueError("could not convert string to Timestamp") + + return convert_datetime_to_tsobject(dt, tz) + + +cdef inline check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): + """ + Check that we haven't silently overflowed in timezone conversion + + Parameters + ---------- + obj : _TSObject + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + None + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#12677 + cdef: + npy_datetimestruct lb, ub + + get_implementation_bounds(reso, &lb, &ub) + + if obj.dts.year == lb.year: + if not (obj.value < 0): + from pandas._libs.tslibs.timestamps import Timestamp + fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " + f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") + raise OutOfBoundsDatetime( + f"Converting {fmt} underflows past {Timestamp.min}" + ) + elif obj.dts.year == ub.year: + if not (obj.value > 0): + from pandas._libs.tslibs.timestamps import Timestamp + fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " + f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") + raise OutOfBoundsDatetime( + f"Converting {fmt} overflows past {Timestamp.max}" + ) + +# ---------------------------------------------------------------------- +# Localization + +cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): + """ + Given the UTC nanosecond timestamp in obj.value, find the wall-clock + representation of that timestamp in the given timezone. + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + reso : NPY_DATETIMEUNIT + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace. + """ + cdef: + int64_t local_val + Py_ssize_t outpos = -1 + Localizer info = Localizer(tz, reso) + + assert obj.tzinfo is None + + if info.use_utc: + pass + elif obj.value == NPY_NAT: + pass + else: + local_val = info.utc_val_to_local_val(obj.value, &outpos, &obj.fold) + + if info.use_pytz: + # infer we went through a pytz path, will have outpos!=-1 + tz = tz._tzinfos[tz._transition_info[outpos]] + + pandas_datetime_to_datetimestruct(local_val, reso, &obj.dts) + + obj.tzinfo = tz + + +cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + NB: Unlike the public version, this treats datetime and Timestamp objects + identically, i.e. discards nanos from Timestamps. + It also assumes that the `tz` input is not None. + """ + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + + +cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + Parameters + ---------- + dt : datetime or Timestamp + tz : tzinfo or None + + Returns + ------- + localized : datetime or Timestamp + """ + if tz is None: + return dt + elif isinstance(dt, ABCTimestamp): + return dt.tz_localize(tz) + return _localize_pydatetime(dt, tz) diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd new file mode 100644 index 00000000..35268014 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -0,0 +1,103 @@ +from numpy cimport int64_t + +from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT + + +cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) +cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev) +cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil +cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 +cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 + +cdef dict attrname_to_abbrevs + +cdef enum c_FreqGroup: + # Mirrors FreqGroup in the .pyx file + FR_ANN = 1000 + FR_QTR = 2000 + FR_MTH = 3000 + FR_WK = 4000 + FR_BUS = 5000 + FR_DAY = 6000 + FR_HR = 7000 + FR_MIN = 8000 + FR_SEC = 9000 + FR_MS = 10000 + FR_US = 11000 + FR_NS = 12000 + FR_UND = -10000 # undefined + + +cdef enum c_Resolution: + # Mirrors Resolution in the .pyx file + RESO_NS = 0 + RESO_US = 1 + RESO_MS = 2 + RESO_SEC = 3 + RESO_MIN = 4 + RESO_HR = 5 + RESO_DAY = 6 + RESO_MTH = 7 + RESO_QTR = 8 + RESO_YR = 9 + + +cdef enum PeriodDtypeCode: + # Annual freqs with various fiscal year ends. + # eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005 + A = 1000 # Default alias + A_DEC = 1000 # Annual - December year end + A_JAN = 1001 # Annual - January year end + A_FEB = 1002 # Annual - February year end + A_MAR = 1003 # Annual - March year end + A_APR = 1004 # Annual - April year end + A_MAY = 1005 # Annual - May year end + A_JUN = 1006 # Annual - June year end + A_JUL = 1007 # Annual - July year end + A_AUG = 1008 # Annual - August year end + A_SEP = 1009 # Annual - September year end + A_OCT = 1010 # Annual - October year end + A_NOV = 1011 # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005 + Q_DEC = 2000 # Quarterly - December year end + Q_JAN = 2001 # Quarterly - January year end + Q_FEB = 2002 # Quarterly - February year end + Q_MAR = 2003 # Quarterly - March year end + Q_APR = 2004 # Quarterly - April year end + Q_MAY = 2005 # Quarterly - May year end + Q_JUN = 2006 # Quarterly - June year end + Q_JUL = 2007 # Quarterly - July year end + Q_AUG = 2008 # Quarterly - August year end + Q_SEP = 2009 # Quarterly - September year end + Q_OCT = 2010 # Quarterly - October year end + Q_NOV = 2011 # Quarterly - November year end + + M = 3000 # Monthly + + W_SUN = 4000 # Weekly - Sunday end of week + W_MON = 4001 # Weekly - Monday end of week + W_TUE = 4002 # Weekly - Tuesday end of week + W_WED = 4003 # Weekly - Wednesday end of week + W_THU = 4004 # Weekly - Thursday end of week + W_FRI = 4005 # Weekly - Friday end of week + W_SAT = 4006 # Weekly - Saturday end of week + + B = 5000 # Business days + D = 6000 # Daily + H = 7000 # Hourly + T = 8000 # Minutely + S = 9000 # Secondly + L = 10000 # Millisecondly + U = 11000 # Microsecondly + N = 12000 # Nanosecondly + + UNDEFINED = -10_000 + + +cdef class PeriodDtypeBase: + cdef readonly: + PeriodDtypeCode _dtype_code + + cpdef int _get_to_timestamp_base(self) diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi new file mode 100644 index 00000000..82f62e16 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -0,0 +1,79 @@ +from enum import Enum + +# These are not public API, but are exposed in the .pyi file because they +# are imported in tests. +_attrname_to_abbrevs: dict[str, str] +_period_code_map: dict[str, int] + +def periods_per_day(reso: int) -> int: ... +def periods_per_second(reso: int) -> int: ... +def is_supported_unit(reso: int) -> bool: ... +def npy_unit_to_abbrev(reso: int) -> str: ... + +class PeriodDtypeBase: + _dtype_code: int # PeriodDtypeCode + + # actually __cinit__ + def __new__(cls, code: int): ... + @property + def _freq_group_code(self) -> int: ... + @property + def _resolution_obj(self) -> Resolution: ... + def _get_to_timestamp_base(self) -> int: ... + @property + def _freqstr(self) -> str: ... + +class FreqGroup(Enum): + FR_ANN: int + FR_QTR: int + FR_MTH: int + FR_WK: int + FR_BUS: int + FR_DAY: int + FR_HR: int + FR_MIN: int + FR_SEC: int + FR_MS: int + FR_US: int + FR_NS: int + FR_UND: int + @staticmethod + def from_period_dtype_code(code: int) -> FreqGroup: ... + +class Resolution(Enum): + RESO_NS: int + RESO_US: int + RESO_MS: int + RESO_SEC: int + RESO_MIN: int + RESO_HR: int + RESO_DAY: int + RESO_MTH: int + RESO_QTR: int + RESO_YR: int + def __lt__(self, other: Resolution) -> bool: ... + def __ge__(self, other: Resolution) -> bool: ... + @property + def attrname(self) -> str: ... + @classmethod + def from_attrname(cls, attrname: str) -> Resolution: ... + @classmethod + def get_reso_from_freqstr(cls, freq: str) -> Resolution: ... + @property + def attr_abbrev(self) -> str: ... + +class NpyDatetimeUnit(Enum): + NPY_FR_Y: int + NPY_FR_M: int + NPY_FR_W: int + NPY_FR_D: int + NPY_FR_h: int + NPY_FR_m: int + NPY_FR_s: int + NPY_FR_ms: int + NPY_FR_us: int + NPY_FR_ns: int + NPY_FR_ps: int + NPY_FR_fs: int + NPY_FR_as: int + NPY_FR_GENERIC: int diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx new file mode 100644 index 00000000..c09ac2a6 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -0,0 +1,412 @@ +# period frequency constants corresponding to scikits timeseries +# originals +cimport cython + +from enum import Enum + +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + get_conversion_factor, +) + + +cdef class PeriodDtypeBase: + """ + Similar to an actual dtype, this contains all of the information + describing a PeriodDtype in an integer code. + """ + # cdef readonly: + # PeriodDtypeCode _dtype_code + + def __cinit__(self, PeriodDtypeCode code): + self._dtype_code = code + + def __eq__(self, other): + if not isinstance(other, PeriodDtypeBase): + return False + if not isinstance(self, PeriodDtypeBase): + # cython semantics, this is a reversed op + return False + return self._dtype_code == other._dtype_code + + @property + def _freq_group_code(self) -> int: + # See also: libperiod.get_freq_group + return (self._dtype_code // 1000) * 1000 + + @property + def _resolution_obj(self) -> "Resolution": + fgc = self._freq_group_code + freq_group = FreqGroup(fgc) + abbrev = _reverse_period_code_map[freq_group.value].split("-")[0] + if abbrev == "B": + return Resolution.RESO_DAY + attrname = _abbrev_to_attrnames[abbrev] + return Resolution.from_attrname(attrname) + + @property + def _freqstr(self) -> str: + # Will be passed to to_offset in Period._maybe_convert_freq + return _reverse_period_code_map.get(self._dtype_code) + + cpdef int _get_to_timestamp_base(self): + """ + Return frequency code group used for base of to_timestamp against + frequency code. + + Return day freq code against longer freq than day. + Return second freq code against hour between second. + + Returns + ------- + int + """ + base = self._dtype_code + if base < FR_BUS: + return FR_DAY + elif FR_HR <= base <= FR_SEC: + return FR_SEC + return base + + +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": PeriodDtypeCode.A_DEC, # Annual - December year end + "A-JAN": PeriodDtypeCode.A_JAN, # Annual - January year end + "A-FEB": PeriodDtypeCode.A_FEB, # Annual - February year end + "A-MAR": PeriodDtypeCode.A_MAR, # Annual - March year end + "A-APR": PeriodDtypeCode.A_APR, # Annual - April year end + "A-MAY": PeriodDtypeCode.A_MAY, # Annual - May year end + "A-JUN": PeriodDtypeCode.A_JUN, # Annual - June year end + "A-JUL": PeriodDtypeCode.A_JUL, # Annual - July year end + "A-AUG": PeriodDtypeCode.A_AUG, # Annual - August year end + "A-SEP": PeriodDtypeCode.A_SEP, # Annual - September year end + "A-OCT": PeriodDtypeCode.A_OCT, # Annual - October year end + "A-NOV": PeriodDtypeCode.A_NOV, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": PeriodDtypeCode.Q_DEC, # Quarterly - December year end + "Q-JAN": PeriodDtypeCode.Q_JAN, # Quarterly - January year end + "Q-FEB": PeriodDtypeCode.Q_FEB, # Quarterly - February year end + "Q-MAR": PeriodDtypeCode.Q_MAR, # Quarterly - March year end + "Q-APR": PeriodDtypeCode.Q_APR, # Quarterly - April year end + "Q-MAY": PeriodDtypeCode.Q_MAY, # Quarterly - May year end + "Q-JUN": PeriodDtypeCode.Q_JUN, # Quarterly - June year end + "Q-JUL": PeriodDtypeCode.Q_JUL, # Quarterly - July year end + "Q-AUG": PeriodDtypeCode.Q_AUG, # Quarterly - August year end + "Q-SEP": PeriodDtypeCode.Q_SEP, # Quarterly - September year end + "Q-OCT": PeriodDtypeCode.Q_OCT, # Quarterly - October year end + "Q-NOV": PeriodDtypeCode.Q_NOV, # Quarterly - November year end + + "M": PeriodDtypeCode.M, # Monthly + + "W-SUN": PeriodDtypeCode.W_SUN, # Weekly - Sunday end of week + "W-MON": PeriodDtypeCode.W_MON, # Weekly - Monday end of week + "W-TUE": PeriodDtypeCode.W_TUE, # Weekly - Tuesday end of week + "W-WED": PeriodDtypeCode.W_WED, # Weekly - Wednesday end of week + "W-THU": PeriodDtypeCode.W_THU, # Weekly - Thursday end of week + "W-FRI": PeriodDtypeCode.W_FRI, # Weekly - Friday end of week + "W-SAT": PeriodDtypeCode.W_SAT, # Weekly - Saturday end of week + + "B": PeriodDtypeCode.B, # Business days + "D": PeriodDtypeCode.D, # Daily + "H": PeriodDtypeCode.H, # Hourly + "T": PeriodDtypeCode.T, # Minutely + "S": PeriodDtypeCode.S, # Secondly + "L": PeriodDtypeCode.L, # Millisecondly + "U": PeriodDtypeCode.U, # Microsecondly + "N": PeriodDtypeCode.N, # Nanosecondly +} + +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({"Y" + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith("A-")}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": PeriodDtypeCode.A, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day +}) + +cdef set _month_names = { + x.split("-")[-1] for x in _period_code_map.keys() if x.startswith("A-") +} + +# Map attribute-name resolutions to resolution abbreviations +_attrname_to_abbrevs = { + "year": "A", + "quarter": "Q", + "month": "M", + "day": "D", + "hour": "H", + "minute": "T", + "second": "S", + "millisecond": "L", + "microsecond": "U", + "nanosecond": "N", +} +cdef dict attrname_to_abbrevs = _attrname_to_abbrevs +cdef dict _abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()} + + +class FreqGroup(Enum): + # Mirrors c_FreqGroup in the .pxd file + FR_ANN = c_FreqGroup.FR_ANN + FR_QTR = c_FreqGroup.FR_QTR + FR_MTH = c_FreqGroup.FR_MTH + FR_WK = c_FreqGroup.FR_WK + FR_BUS = c_FreqGroup.FR_BUS + FR_DAY = c_FreqGroup.FR_DAY + FR_HR = c_FreqGroup.FR_HR + FR_MIN = c_FreqGroup.FR_MIN + FR_SEC = c_FreqGroup.FR_SEC + FR_MS = c_FreqGroup.FR_MS + FR_US = c_FreqGroup.FR_US + FR_NS = c_FreqGroup.FR_NS + FR_UND = c_FreqGroup.FR_UND # undefined + + @staticmethod + def from_period_dtype_code(code: int) -> "FreqGroup": + # See also: PeriodDtypeBase._freq_group_code + code = (code // 1000) * 1000 + return FreqGroup(code) + + +class Resolution(Enum): + RESO_NS = c_Resolution.RESO_NS + RESO_US = c_Resolution.RESO_US + RESO_MS = c_Resolution.RESO_MS + RESO_SEC = c_Resolution.RESO_SEC + RESO_MIN = c_Resolution.RESO_MIN + RESO_HR = c_Resolution.RESO_HR + RESO_DAY = c_Resolution.RESO_DAY + RESO_MTH = c_Resolution.RESO_MTH + RESO_QTR = c_Resolution.RESO_QTR + RESO_YR = c_Resolution.RESO_YR + + def __lt__(self, other): + return self.value < other.value + + def __ge__(self, other): + return self.value >= other.value + + @property + def attr_abbrev(self) -> str: + # string that we can pass to to_offset + return _attrname_to_abbrevs[self.attrname] + + @property + def attrname(self) -> str: + """ + Return datetime attribute name corresponding to this Resolution. + + Examples + -------- + >>> Resolution.RESO_SEC.attrname + 'second' + """ + return _reso_str_map[self.value] + + @classmethod + def from_attrname(cls, attrname: str) -> "Resolution": + """ + Return resolution str against resolution code. + + Examples + -------- + >>> Resolution.from_attrname('second') + + + >>> Resolution.from_attrname('second') == Resolution.RESO_SEC + True + """ + return cls(_str_reso_map[attrname]) + + @classmethod + def get_reso_from_freqstr(cls, freq: str) -> "Resolution": + """ + Return resolution code against frequency str. + + `freq` is given by the `offset.freqstr` for some DateOffset object. + + Examples + -------- + >>> Resolution.get_reso_from_freqstr('H') + + + >>> Resolution.get_reso_from_freqstr('H') == Resolution.RESO_HR + True + """ + try: + attr_name = _abbrev_to_attrnames[freq] + except KeyError: + # For quarterly and yearly resolutions, we need to chop off + # a month string. + split_freq = freq.split("-") + if len(split_freq) != 2: + raise + if split_freq[1] not in _month_names: + # i.e. we want e.g. "Q-DEC", not "Q-INVALID" + raise + attr_name = _abbrev_to_attrnames[split_freq[0]] + + return cls.from_attrname(attr_name) + + +class NpyDatetimeUnit(Enum): + """ + Python-space analogue to NPY_DATETIMEUNIT. + """ + NPY_FR_Y = NPY_DATETIMEUNIT.NPY_FR_Y + NPY_FR_M = NPY_DATETIMEUNIT.NPY_FR_M + NPY_FR_W = NPY_DATETIMEUNIT.NPY_FR_W + NPY_FR_D = NPY_DATETIMEUNIT.NPY_FR_D + NPY_FR_h = NPY_DATETIMEUNIT.NPY_FR_h + NPY_FR_m = NPY_DATETIMEUNIT.NPY_FR_m + NPY_FR_s = NPY_DATETIMEUNIT.NPY_FR_s + NPY_FR_ms = NPY_DATETIMEUNIT.NPY_FR_ms + NPY_FR_us = NPY_DATETIMEUNIT.NPY_FR_us + NPY_FR_ns = NPY_DATETIMEUNIT.NPY_FR_ns + NPY_FR_ps = NPY_DATETIMEUNIT.NPY_FR_ps + NPY_FR_fs = NPY_DATETIMEUNIT.NPY_FR_fs + NPY_FR_as = NPY_DATETIMEUNIT.NPY_FR_as + NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC + + +def is_supported_unit(NPY_DATETIMEUNIT reso): + return ( + reso == NPY_DATETIMEUNIT.NPY_FR_ns + or reso == NPY_DATETIMEUNIT.NPY_FR_us + or reso == NPY_DATETIMEUNIT.NPY_FR_ms + or reso == NPY_DATETIMEUNIT.NPY_FR_s + ) + + +cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): + if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # generic -> default to nanoseconds + return "ns" + elif unit == NPY_DATETIMEUNIT.NPY_FR_us: + return "us" + elif unit == NPY_DATETIMEUNIT.NPY_FR_ms: + return "ms" + elif unit == NPY_DATETIMEUNIT.NPY_FR_s: + return "s" + elif unit == NPY_DATETIMEUNIT.NPY_FR_m: + return "m" + elif unit == NPY_DATETIMEUNIT.NPY_FR_h: + return "h" + elif unit == NPY_DATETIMEUNIT.NPY_FR_D: + return "D" + elif unit == NPY_DATETIMEUNIT.NPY_FR_W: + return "W" + elif unit == NPY_DATETIMEUNIT.NPY_FR_M: + return "M" + elif unit == NPY_DATETIMEUNIT.NPY_FR_Y: + return "Y" + + # Checks for not-really-supported units go at the end, as we don't expect + # to see these often + elif unit == NPY_DATETIMEUNIT.NPY_FR_ps: + return "ps" + elif unit == NPY_DATETIMEUNIT.NPY_FR_fs: + return "fs" + elif unit == NPY_DATETIMEUNIT.NPY_FR_as: + return "as" + + else: + raise NotImplementedError(unit) + + +cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev): + if abbrev == "Y": + return NPY_DATETIMEUNIT.NPY_FR_Y + elif abbrev == "M": + return NPY_DATETIMEUNIT.NPY_FR_M + elif abbrev == "W": + return NPY_DATETIMEUNIT.NPY_FR_W + elif abbrev == "D" or abbrev == "d": + return NPY_DATETIMEUNIT.NPY_FR_D + elif abbrev == "h": + return NPY_DATETIMEUNIT.NPY_FR_h + elif abbrev == "m": + return NPY_DATETIMEUNIT.NPY_FR_m + elif abbrev == "s": + return NPY_DATETIMEUNIT.NPY_FR_s + elif abbrev == "ms": + return NPY_DATETIMEUNIT.NPY_FR_ms + elif abbrev == "us": + return NPY_DATETIMEUNIT.NPY_FR_us + elif abbrev == "ns": + return NPY_DATETIMEUNIT.NPY_FR_ns + elif abbrev == "ps": + return NPY_DATETIMEUNIT.NPY_FR_ps + elif abbrev == "fs": + return NPY_DATETIMEUNIT.NPY_FR_fs + elif abbrev == "as": + return NPY_DATETIMEUNIT.NPY_FR_as + elif abbrev is None: + return NPY_DATETIMEUNIT.NPY_FR_GENERIC + else: + raise ValueError(f"Unrecognized unit {abbrev}") + + +cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: + """ + Convert the freq to the corresponding NPY_DATETIMEUNIT to pass + to npy_datetimestruct_to_datetime. + """ + if freq == FR_MTH: + return NPY_DATETIMEUNIT.NPY_FR_M + elif freq == FR_DAY: + return NPY_DATETIMEUNIT.NPY_FR_D + elif freq == FR_HR: + return NPY_DATETIMEUNIT.NPY_FR_h + elif freq == FR_MIN: + return NPY_DATETIMEUNIT.NPY_FR_m + elif freq == FR_SEC: + return NPY_DATETIMEUNIT.NPY_FR_s + elif freq == FR_MS: + return NPY_DATETIMEUNIT.NPY_FR_ms + elif freq == FR_US: + return NPY_DATETIMEUNIT.NPY_FR_us + elif freq == FR_NS: + return NPY_DATETIMEUNIT.NPY_FR_ns + elif freq == FR_UND: + # Default to Day + return NPY_DATETIMEUNIT.NPY_FR_D + + +# TODO: use in _matplotlib.converter? +cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: + """ + How many of the given time units fit into a single day? + """ + return get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, reso) + + +cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1: + return get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, reso) + + +cdef dict _reso_str_map = { + Resolution.RESO_NS.value: "nanosecond", + Resolution.RESO_US.value: "microsecond", + Resolution.RESO_MS.value: "millisecond", + Resolution.RESO_SEC.value: "second", + Resolution.RESO_MIN.value: "minute", + Resolution.RESO_HR.value: "hour", + Resolution.RESO_DAY.value: "day", + Resolution.RESO_MTH.value: "month", + Resolution.RESO_QTR.value: "quarter", + Resolution.RESO_YR.value: "year", +} + +cdef dict _str_reso_map = {v: k for k, v in _reso_str_map.items()} diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi new file mode 100644 index 00000000..8b4bc1a3 --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyi @@ -0,0 +1,58 @@ +import numpy as np + +from pandas._typing import npt + +def build_field_sarray( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + reso: int, # NPY_DATETIMEUNIT +) -> np.ndarray: ... +def month_position_check(fields, weekdays) -> str | None: ... +def get_date_name_field( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, + locale: str | None = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.object_]: ... +def get_start_end_field( + dtindex: npt.NDArray[np.int64], + field: str, + freqstr: str | None = ..., + month_kw: int = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.bool_]: ... +def get_date_field( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int32]: ... +def get_timedelta_field( + tdindex: npt.NDArray[np.int64], # const int64_t[:] + field: str, + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int32]: ... +def isleapyear_arr( + years: np.ndarray, +) -> npt.NDArray[np.bool_]: ... +def build_isocalendar_sarray( + dtindex: npt.NDArray[np.int64], # const int64_t[:] + reso: int, # NPY_DATETIMEUNIT +) -> np.ndarray: ... +def _get_locale_names(name_type: str, locale: str | None = ...): ... + +class RoundTo: + @property + def MINUS_INFTY(self) -> int: ... + @property + def PLUS_INFTY(self) -> int: ... + @property + def NEAREST_HALF_EVEN(self) -> int: ... + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: ... + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: ... + +def round_nsint64( + values: npt.NDArray[np.int64], + mode: RoundTo, + nanos: int, +) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx new file mode 100644 index 00000000..71a0f272 --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyx @@ -0,0 +1,774 @@ +""" +Functions for accessing attributes of Timestamp/datetime64/datetime-like +objects and arrays +""" +from locale import LC_TIME + +from _strptime import LocaleTime + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int8_t, + int32_t, + int64_t, + ndarray, + uint32_t, +) + +cnp.import_array() + +from pandas._config.localization import set_locale + +from pandas._libs.tslibs.ccalendar import ( + DAYS_FULL, + MONTHS_FULL, +) + +from pandas._libs.tslibs.ccalendar cimport ( + dayofweek, + get_day_of_year, + get_days_in_month, + get_firstbday, + get_iso_calendar, + get_lastbday, + get_week_of_year, + is_leapyear, + iso_calendar_t, + month_offset, +) +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + get_unit_from_dtype, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pandas_timedelta_to_timedeltastruct, + pandas_timedeltastruct, +) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): + """ + Datetime as int64 representation to a structured array of fields + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[int32_t] years, months, days, hours, minutes, seconds, mus + + sa_dtype = [ + ("Y", "i4"), # year + ("M", "i4"), # month + ("D", "i4"), # day + ("h", "i4"), # hour + ("m", "i4"), # min + ("s", "i4"), # second + ("u", "i4"), # microsecond + ] + + out = np.empty(count, dtype=sa_dtype) + + years = out['Y'] + months = out['M'] + days = out['D'] + hours = out['h'] + minutes = out['m'] + seconds = out['s'] + mus = out['u'] + + for i in range(count): + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + years[i] = dts.year + months[i] = dts.month + days[i] = dts.day + hours[i] = dts.hour + minutes[i] = dts.min + seconds[i] = dts.sec + mus[i] = dts.us + + return out + + +def month_position_check(fields, weekdays) -> str | None: + cdef: + int32_t daysinmonth, y, m, d + bint calendar_end = True + bint business_end = True + bint calendar_start = True + bint business_start = True + bint cal + int32_t[:] years = fields["Y"] + int32_t[:] months = fields["M"] + int32_t[:] days = fields["D"] + + for y, m, d, wd in zip(years, months, days, weekdays): + if calendar_start: + calendar_start &= d == 1 + if business_start: + business_start &= d == 1 or (d <= 3 and wd == 0) + + if calendar_end or business_end: + daysinmonth = get_days_in_month(y, m) + cal = d == daysinmonth + if calendar_end: + calendar_end &= cal + if business_end: + business_end &= cal or (daysinmonth - d < 3 and wd == 4) + elif not calendar_start and not business_start: + break + + if calendar_end: + return "ce" + elif business_end: + return "be" + elif calendar_start: + return "cs" + elif business_start: + return "bs" + else: + return None + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_name_field( + const int64_t[:] dtindex, + str field, + object locale=None, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): + """ + Given a int64-based datetime index, return array of strings of date + name based on requested field (e.g. day_name) + """ + cdef: + Py_ssize_t i, count = dtindex.shape[0] + ndarray[object] out, names + npy_datetimestruct dts + int dow + + out = np.empty(count, dtype=object) + + if field == 'day_name': + if locale is None: + names = np.array(DAYS_FULL, dtype=np.object_) + else: + names = np.array(_get_locale_names('f_weekday', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + dow = dayofweek(dts.year, dts.month, dts.day) + out[i] = names[dow].capitalize() + + elif field == 'month_name': + if locale is None: + names = np.array(MONTHS_FULL, dtype=np.object_) + else: + names = np.array(_get_locale_names('f_month', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = names[dts.month].capitalize() + + else: + raise ValueError(f"Field {field} not supported") + + return out + + +cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: + """ + Analogous to DateOffset.is_on_offset checking for the month part of a date. + """ + if modby == 1: + return True + elif modby == 3: + return (month - compare_month) % 3 == 0 + else: + return month == compare_month + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_start_end_field( + const int64_t[:] dtindex, + str field, + str freqstr=None, + int month_kw=12, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): + """ + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + + Parameters + ---------- + dtindex : ndarray[int64] + field : str + frestr : str or None, default None + month_kw : int, default 12 + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + ndarray[bool] + """ + cdef: + Py_ssize_t i + int count = dtindex.shape[0] + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + npy_datetimestruct dts + int compare_month, modby + + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError(f"Custom business days is not supported by {field}") + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year. + # QuarterBegin(), BQuarterBegin() use startingMonth = starting + # month of year. Other offsets use month, startingMonth as ending + # month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( + freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + compare_month = start_month if "start" in field else end_month + if "month" in field: + modby = 1 + elif "quarter" in field: + modby = 3 + else: + modby = 12 + + if field in ["is_month_start", "is_quarter_start", "is_year_start"]: + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_firstbday(dts.year, dts.month)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + + if _is_on_month(dts.month, compare_month, modby) and dts.day == 1: + out[i] = 1 + + elif field in ["is_month_end", "is_quarter_end", "is_year_end"]: + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_lastbday(dts.year, dts.month)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + + if _is_on_month(dts.month, compare_month, modby) and ( + dts.day == get_days_in_month(dts.year, dts.month)): + out[i] = 1 + + else: + raise ValueError(f"Field {field} not supported") + + return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_field(const int64_t[:] dtindex, str field, NPY_DATETIMEUNIT reso=NPY_FR_ns): + """ + Given a int64-based datetime index, extract the year, month, etc., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + ndarray[int32_t] out + npy_datetimestruct dts + + out = np.empty(count, dtype='i4') + + if field == 'Y': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.year + return out + + elif field == 'M': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.month + return out + + elif field == 'D': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.day + return out + + elif field == 'h': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.hour + # TODO: can we de-dup with period.pyx s? + return out + + elif field == 'm': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.min + return out + + elif field == 's': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.sec + return out + + elif field == 'us': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.ps // 1000 + return out + elif field == 'doy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = get_day_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'dow': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) + return out + + elif field == 'woy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = get_week_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'q': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) // 3) + 1 + return out + + elif field == 'dim': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + out[i] = get_days_in_month(dts.year, dts.month) + return out + elif field == 'is_leap_year': + return isleapyear_arr(get_date_field(dtindex, 'Y', reso=reso)) + + raise ValueError(f"Field {field} not supported") + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_timedelta_field( + const int64_t[:] tdindex, + str field, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): + """ + Given a int64-based timedelta index, extract the days, hrs, sec., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(tdindex) + ndarray[int32_t] out + pandas_timedeltastruct tds + + out = np.empty(count, dtype='i4') + + if field == 'days': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds) + out[i] = tds.days + return out + + elif field == 'seconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds) + out[i] = tds.seconds + return out + + elif field == 'microseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds) + out[i] = tds.microseconds + return out + + elif field == 'nanoseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds) + out[i] = tds.nanoseconds + return out + + raise ValueError(f"Field {field} not supported") + + +cpdef isleapyear_arr(ndarray years): + """vectorized version of isleapyear; NaT evaluates as False""" + cdef: + ndarray[int8_t] out + + out = np.zeros(len(years), dtype='int8') + out[np.logical_or(years % 400 == 0, + np.logical_and(years % 4 == 0, + years % 100 > 0))] = 1 + return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): + """ + Given a int64-based datetime array, return the ISO 8601 year, week, and day + as a structured array. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[uint32_t] iso_years, iso_weeks, days + iso_calendar_t ret_val + + sa_dtype = [ + ("year", "u4"), + ("week", "u4"), + ("day", "u4"), + ] + + out = np.empty(count, dtype=sa_dtype) + + iso_years = out["year"] + iso_weeks = out["week"] + days = out["day"] + + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + ret_val = 0, 0, 0 + else: + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) + ret_val = get_iso_calendar(dts.year, dts.month, dts.day) + + iso_years[i] = ret_val[0] + iso_weeks[i] = ret_val[1] + days[i] = ret_val[2] + return out + + +def _get_locale_names(name_type: str, locale: object = None): + """ + Returns an array of localized day or month names. + + Parameters + ---------- + name_type : str + Attribute of LocaleTime() in which to return localized names. + locale : str + + Returns + ------- + list of locale names + """ + with set_locale(locale, LC_TIME): + return getattr(LocaleTime(), name_type) + + +# --------------------------------------------------------------------- +# Rounding + + +class RoundTo: + """ + enumeration defining the available rounding modes + + Attributes + ---------- + MINUS_INFTY + round towards -∞, or floor [2]_ + PLUS_INFTY + round towards +∞, or ceil [3]_ + NEAREST_HALF_EVEN + round to nearest, tie-break half to even [6]_ + NEAREST_HALF_MINUS_INFTY + round to nearest, tie-break half to -∞ [5]_ + NEAREST_HALF_PLUS_INFTY + round to nearest, tie-break half to +∞ [4]_ + + + References + ---------- + .. [1] "Rounding - Wikipedia" + https://en.wikipedia.org/wiki/Rounding + .. [2] "Rounding down" + https://en.wikipedia.org/wiki/Rounding#Rounding_down + .. [3] "Rounding up" + https://en.wikipedia.org/wiki/Rounding#Rounding_up + .. [4] "Round half up" + https://en.wikipedia.org/wiki/Rounding#Round_half_up + .. [5] "Round half down" + https://en.wikipedia.org/wiki/Rounding#Round_half_down + .. [6] "Round half to even" + https://en.wikipedia.org/wiki/Rounding#Round_half_to_even + """ + @property + def MINUS_INFTY(self) -> int: + return 0 + + @property + def PLUS_INFTY(self) -> int: + return 1 + + @property + def NEAREST_HALF_EVEN(self) -> int: + return 2 + + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: + return 3 + + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: + return 4 + + +cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + if value == NPY_NAT: + res = NPY_NAT + else: + res = value - value % unit + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + + if value == NPY_NAT: + res = NPY_NAT + else: + remainder = value % unit + if remainder == 0: + res = value + else: + res = value + (unit - remainder) + + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): + return _ceil_int64(values - unit // 2, unit) + + +cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): + return _floor_int64(values + unit // 2, unit) + + +def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: + """ + Applies rounding mode at given frequency + + Parameters + ---------- + values : np.ndarray[int64_t]` + mode : instance of `RoundTo` enumeration + nanos : np.int64 + Freq to round to, expressed in nanoseconds + + Returns + ------- + np.ndarray[int64_t] + """ + cdef: + int64_t unit = nanos + + if mode == RoundTo.MINUS_INFTY: + return _floor_int64(values, unit) + elif mode == RoundTo.PLUS_INFTY: + return _ceil_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY: + return _rounddown_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY: + return _roundup_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_EVEN: + # for odd unit there is no need of a tie break + if unit % 2: + return _rounddown_int64(values, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), + np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit + + # if/elif above should catch all rounding modes defined in enum 'RoundTo': + # if flow of control arrives here, it is a bug + raise ValueError("round_nsint64 called with an unrecognized rounding mode") diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd new file mode 100644 index 00000000..e878fa76 --- /dev/null +++ b/pandas/_libs/tslibs/nattype.pxd @@ -0,0 +1,18 @@ +from cpython.datetime cimport datetime +from numpy cimport int64_t + + +cdef int64_t NPY_NAT + +cdef set c_nat_strings + +cdef class _NaT(datetime): + cdef readonly: + int64_t value + +cdef _NaT c_NaT + + +cdef bint checknull_with_nat(object val) +cdef bint is_dt64nat(object val) +cdef bint is_td64nat(object val) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi new file mode 100644 index 00000000..e9ae46ce --- /dev/null +++ b/pandas/_libs/tslibs/nattype.pyi @@ -0,0 +1,129 @@ +from datetime import ( + datetime, + timedelta, + tzinfo as _tzinfo, +) + +import numpy as np + +from pandas._libs.tslibs.period import Period + +NaT: NaTType +iNaT: int +nat_strings: set[str] + +_NaTComparisonTypes = datetime | timedelta | Period | np.datetime64 | np.timedelta64 + +class _NatComparison: + def __call__(self, other: _NaTComparisonTypes) -> bool: ... + +class NaTType: + value: np.int64 + @property + def asm8(self) -> np.datetime64: ... + def to_datetime64(self) -> np.datetime64: ... + def to_numpy( + self, dtype: np.dtype | str | None = ..., copy: bool = ... + ) -> np.datetime64 | np.timedelta64: ... + @property + def is_leap_year(self) -> bool: ... + @property + def is_month_start(self) -> bool: ... + @property + def is_quarter_start(self) -> bool: ... + @property + def is_year_start(self) -> bool: ... + @property + def is_month_end(self) -> bool: ... + @property + def is_quarter_end(self) -> bool: ... + @property + def is_year_end(self) -> bool: ... + @property + def day_of_year(self) -> float: ... + @property + def dayofyear(self) -> float: ... + @property + def days_in_month(self) -> float: ... + @property + def daysinmonth(self) -> float: ... + @property + def day_of_week(self) -> float: ... + @property + def dayofweek(self) -> float: ... + @property + def week(self) -> float: ... + @property + def weekofyear(self) -> float: ... + def day_name(self) -> float: ... + def month_name(self) -> float: ... + def weekday(self) -> float: ... + def isoweekday(self) -> float: ... + def total_seconds(self) -> float: ... + def today(self, *args, **kwargs) -> NaTType: ... + def now(self, *args, **kwargs) -> NaTType: ... + def to_pydatetime(self) -> NaTType: ... + def date(self) -> NaTType: ... + def round(self) -> NaTType: ... + def floor(self) -> NaTType: ... + def ceil(self) -> NaTType: ... + @property + def tzinfo(self) -> None: ... + @property + def tz(self) -> None: ... + def tz_convert(self, tz: _tzinfo | str | None) -> NaTType: ... + def tz_localize( + self, + tz: _tzinfo | str | None, + ambiguous: str = ..., + nonexistent: str = ..., + ) -> NaTType: ... + def replace( + self, + year: int | None = ..., + month: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + microsecond: int | None = ..., + nanosecond: int | None = ..., + tzinfo: _tzinfo | None = ..., + fold: int | None = ..., + ) -> NaTType: ... + @property + def year(self) -> float: ... + @property + def quarter(self) -> float: ... + @property + def month(self) -> float: ... + @property + def day(self) -> float: ... + @property + def hour(self) -> float: ... + @property + def minute(self) -> float: ... + @property + def second(self) -> float: ... + @property + def millisecond(self) -> float: ... + @property + def microsecond(self) -> float: ... + @property + def nanosecond(self) -> float: ... + # inject Timedelta properties + @property + def days(self) -> float: ... + @property + def microseconds(self) -> float: ... + @property + def nanoseconds(self) -> float: ... + # inject Period properties + @property + def qyear(self) -> float: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + __lt__: _NatComparison + __le__: _NatComparison + __gt__: _NatComparison + __ge__: _NatComparison diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx new file mode 100644 index 00000000..0a5f86ba --- /dev/null +++ b/pandas/_libs/tslibs/nattype.pyx @@ -0,0 +1,1245 @@ +import warnings + +from pandas.util._exceptions import find_stack_level + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDelta_Check, + datetime, + import_datetime, + timedelta, +) + +import_datetime() +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport int64_t + +cnp.import_array() + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, + get_timedelta64_value, +) + +# ---------------------------------------------------------------------- +# Constants +nat_strings = {"NaT", "nat", "NAT", "nan", "NaN", "NAN"} +cdef set c_nat_strings = nat_strings + +cdef int64_t NPY_NAT = util.get_nat() +iNaT = NPY_NAT # python-visible constant + +# ---------------------------------------------------------------------- + + +def _make_nan_func(func_name: str, doc: str): + def f(*args, **kwargs): + return np.nan + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_nat_func(func_name: str, doc: str): + def f(*args, **kwargs): + return c_NaT + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_error_func(func_name: str, cls): + def f(*args, **kwargs): + raise ValueError(f"NaTType does not support {func_name}") + + f.__name__ = func_name + if isinstance(cls, str): + # passed the literal docstring directly + f.__doc__ = cls + elif cls is not None: + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + +cdef _nat_divide_op(self, other): + if PyDelta_Check(other) or util.is_timedelta64_object(other) or other is c_NaT: + return np.nan + if util.is_integer_object(other) or util.is_float_object(other): + return c_NaT + return NotImplemented + + +cdef _nat_rdivide_op(self, other): + if PyDelta_Check(other): + return np.nan + return NotImplemented + + +def __nat_unpickle(*args): + # return constant defined in the module + return c_NaT + +# ---------------------------------------------------------------------- + + +cdef class _NaT(datetime): + # cdef readonly: + # int64_t value + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __richcmp__(_NaT self, object other, int op): + if util.is_datetime64_object(other) or PyDateTime_Check(other): + # We treat NaT as datetime-like for this comparison + return op == Py_NE + + elif util.is_timedelta64_object(other) or PyDelta_Check(other): + # We treat NaT as timedelta-like for this comparison + return op == Py_NE + + elif util.is_array(other): + if other.dtype.kind in "mM": + result = np.empty(other.shape, dtype=np.bool_) + result.fill(op == Py_NE) + elif other.dtype.kind == "O": + result = np.array([PyObject_RichCompare(self, x, op) for x in other]) + elif op == Py_EQ: + result = np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + result = np.ones(other.shape, dtype=bool) + else: + return NotImplemented + return result + + elif PyDate_Check(other): + # GH#39151 don't defer to datetime.date object + if op == Py_EQ: + return False + if op == Py_NE: + return True + warnings.warn( + "Comparison of NaT with datetime.date is deprecated in " + "order to match the standard library behavior. " + "In a future version these will be considered non-comparable.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return False + + return NotImplemented + + def __add__(self, other): + if self is not c_NaT: + # TODO(cython3): remove this it moved to __radd__ + # cython __radd__ semantics + self, other = other, self + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + return c_NaT + + elif util.is_integer_object(other): + # For Period compat + return c_NaT + + elif util.is_array(other): + if other.dtype.kind in "mM": + # If we are adding to datetime64, we treat NaT as timedelta + # Either way, result dtype is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + raise TypeError(f"Cannot add NaT to ndarray with dtype {other.dtype}") + + # Includes Period, DateOffset going through here + return NotImplemented + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + # Duplicate some logic from _Timestamp.__sub__ to avoid needing + # to subclass; allows us to @final(_Timestamp.__sub__) + cdef: + bint is_rsub = False + + if self is not c_NaT: + # cython __rsub__ semantics + # TODO(cython3): remove __rsub__ logic from here + self, other = other, self + is_rsub = True + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + return c_NaT + + elif util.is_integer_object(other): + # For Period compat + return c_NaT + + elif util.is_array(other): + if other.dtype.kind == "m": + if not is_rsub: + # NaT - timedelta64 we treat NaT as datetime64, so result + # is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + + # __rsub__ logic here + # TODO(cython3): remove this, move above code out of ``if not is_rsub`` block + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + elif other.dtype.kind == "M": + # We treat NaT as a datetime, so regardless of whether this is + # NaT - other or other - NaT, the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + raise TypeError( + f"Cannot subtract NaT from ndarray with dtype {other.dtype}" + ) + + # Includes Period, DateOffset going through here + return NotImplemented + + def __rsub__(self, other): + if util.is_array(other): + if other.dtype.kind == "m": + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + elif other.dtype.kind == "M": + # We treat NaT as a datetime, so regardless of whether this is + # NaT - other or other - NaT, the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + # other cases are same, swap operands is allowed even though we subtract because this is NaT + return self.__sub__(other) + + def __pos__(self): + return NaT + + def __neg__(self): + return NaT + + def __truediv__(self, other): + return _nat_divide_op(self, other) + + def __floordiv__(self, other): + return _nat_divide_op(self, other) + + def __mul__(self, other): + if util.is_integer_object(other) or util.is_float_object(other): + return NaT + return NotImplemented + + @property + def asm8(self) -> np.datetime64: + return np.datetime64(NPY_NAT, "ns") + + def to_datetime64(self) -> np.datetime64: + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64('NaT', "ns") + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64 | np.timedelta64: + """ + Convert the Timestamp to a NumPy datetime64 or timedelta64. + + .. versionadded:: 0.25.0 + + With the default 'dtype', this is an alias method for `NaT.to_datetime64()`. + + The copy parameter is available here only for compatibility. Its value + will not affect the return value. + + Returns + ------- + numpy.datetime64 or numpy.timedelta64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.to_numpy() + numpy.datetime64('2020-03-14T15:32:52.192548651') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_numpy() + numpy.datetime64('NaT') + + >>> pd.NaT.to_numpy("m8[ns]") + numpy.timedelta64('NaT','ns') + """ + if dtype is not None: + # GH#44460 + dtype = np.dtype(dtype) + if dtype.kind == "M": + return np.datetime64("NaT").astype(dtype) + elif dtype.kind == "m": + return np.timedelta64("NaT").astype(dtype) + else: + raise ValueError( + "NaT.to_numpy dtype must be a datetime64 dtype, timedelta64 " + "dtype, or None." + ) + return self.to_datetime64() + + def __repr__(self) -> str: + return "NaT" + + def __str__(self) -> str: + return "NaT" + + def isoformat(self, sep: str = "T", timespec: str = "auto") -> str: + # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. + return "NaT" + + def __hash__(self) -> int: + return NPY_NAT + + @property + def is_leap_year(self) -> bool: + return False + + @property + def is_month_start(self) -> bool: + return False + + @property + def is_quarter_start(self) -> bool: + return False + + @property + def is_year_start(self) -> bool: + return False + + @property + def is_month_end(self) -> bool: + return False + + @property + def is_quarter_end(self) -> bool: + return False + + @property + def is_year_end(self) -> bool: + return False + + +class NaTType(_NaT): + """ + (N)ot-(A)-(T)ime, the time equivalent of NaN. + """ + + def __new__(cls): + cdef _NaT base + + base = _NaT.__new__(cls, 1, 1, 1) + base.value = NPY_NAT + + return base + + @property + def freq(self): + warnings.warn( + "NaT.freq is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return None + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # https://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __reduce__(self): + return (__nat_unpickle, (None, )) + + def __rtruediv__(self, other): + return _nat_rdivide_op(self, other) + + def __rfloordiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rmul__(self, other): + if util.is_integer_object(other) or util.is_float_object(other): + return c_NaT + return NotImplemented + + # ---------------------------------------------------------------------- + # inject the Timestamp field properties + # these by definition return np.nan + + year = property(fget=lambda self: np.nan) + quarter = property(fget=lambda self: np.nan) + month = property(fget=lambda self: np.nan) + day = property(fget=lambda self: np.nan) + hour = property(fget=lambda self: np.nan) + minute = property(fget=lambda self: np.nan) + second = property(fget=lambda self: np.nan) + millisecond = property(fget=lambda self: np.nan) + microsecond = property(fget=lambda self: np.nan) + nanosecond = property(fget=lambda self: np.nan) + + week = property(fget=lambda self: np.nan) + dayofyear = property(fget=lambda self: np.nan) + day_of_year = property(fget=lambda self: np.nan) + weekofyear = property(fget=lambda self: np.nan) + days_in_month = property(fget=lambda self: np.nan) + daysinmonth = property(fget=lambda self: np.nan) + dayofweek = property(fget=lambda self: np.nan) + day_of_week = property(fget=lambda self: np.nan) + + # inject Timedelta properties + days = property(fget=lambda self: np.nan) + seconds = property(fget=lambda self: np.nan) + microseconds = property(fget=lambda self: np.nan) + nanoseconds = property(fget=lambda self: np.nan) + + # inject pd.Period properties + qyear = property(fget=lambda self: np.nan) + + # ---------------------------------------------------------------------- + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + # These are the ones that can get their docstrings from datetime. + + # nan methods + weekday = _make_nan_func( + "weekday", + """ + Return the day of the week represented by the date. + + Monday == 0 ... Sunday == 6. + """, + ) + isoweekday = _make_nan_func( + "isoweekday", + """ + Return the day of the week represented by the date. + + Monday == 1 ... Sunday == 7. + """, + ) + total_seconds = _make_nan_func("total_seconds", timedelta.total_seconds.__doc__) + month_name = _make_nan_func( + "month_name", + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.month_name() + 'March' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.month_name() + nan + """, + ) + day_name = _make_nan_func( + "day_name", + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.day_name() + 'Saturday' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.day_name() + nan + """, + ) + # _nat_methods + date = _make_nat_func("date", datetime.date.__doc__) + + utctimetuple = _make_error_func("utctimetuple", datetime) + timetz = _make_error_func("timetz", datetime) + timetuple = _make_error_func("timetuple", datetime) + isocalendar = _make_error_func("isocalendar", datetime) + dst = _make_error_func("dst", datetime) + ctime = _make_error_func("ctime", datetime) + time = _make_error_func("time", datetime) + toordinal = _make_error_func("toordinal", datetime) + tzname = _make_error_func("tzname", datetime) + utcoffset = _make_error_func("utcoffset", datetime) + + # "fromisocalendar" was introduced in 3.8 + fromisocalendar = _make_error_func("fromisocalendar", datetime) + + # ---------------------------------------------------------------------- + # The remaining methods have docstrings copy/pasted from the analogous + # Timestamp methods. + + strftime = _make_error_func( + "strftime", + """ + Return a formatted string of the Timestamp. + + Parameters + ---------- + format : str + Format string to convert Timestamp to string. + See strftime documentation for more information on the format string: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.strftime('%Y-%m-%d %X') + '2020-03-14 15:32:52' + """, + ) + + strptime = _make_error_func( + "strptime", + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """, + ) + + utcfromtimestamp = _make_error_func( + "utcfromtimestamp", + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.utcfromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + """, + ) + fromtimestamp = _make_error_func( + "fromtimestamp", + """ + Timestamp.fromtimestamp(ts) + + Transform timestamp[, tz] to tz's local time from POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.fromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + + Note that the output may change depending on your local time. + """, + ) + combine = _make_error_func( + "combine", + """ + Timestamp.combine(date, time) + + Combine date, time into datetime with same date and time fields. + + Examples + -------- + >>> from datetime import date, time + >>> pd.Timestamp.combine(date(2020, 3, 14), time(15, 30, 15)) + Timestamp('2020-03-14 15:30:15') + """, + ) + utcnow = _make_error_func( + "utcnow", + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + + Examples + -------- + >>> pd.Timestamp.utcnow() # doctest: +SKIP + Timestamp('2020-11-16 22:50:18.092888+0000', tz='UTC') + """, + ) + + timestamp = _make_error_func( + "timestamp", + """ + Return POSIX timestamp as float. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.timestamp() + 1584199972.192548 + """ + ) + + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + astimezone = _make_error_func( + "astimezone", + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """, + ) + fromordinal = _make_error_func( + "fromordinal", + """ + Construct a timestamp from a a proleptic Gregorian ordinal. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + + Notes + ----- + By definition there cannot be any tz info on the ordinal itself. + + Examples + -------- + >>> pd.Timestamp.fromordinal(737425) + Timestamp('2020-01-01 00:00:00') + """, + ) + + # _nat_methods + to_pydatetime = _make_nat_func( + "to_pydatetime", + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.to_pydatetime() + datetime.datetime(2020, 3, 14, 15, 32, 52, 192548) + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_pydatetime() + NaT + """, + ) + + now = _make_nat_func( + "now", + """ + Return new Timestamp object representing current time local to tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.now() # doctest: +SKIP + Timestamp('2020-11-16 22:06:16.378782') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.now() + NaT + """, + ) + today = _make_nat_func( + "today", + """ + Return the current time in the local timezone. + + This differs from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.today() # doctest: +SKIP + Timestamp('2020-11-16 22:37:39.969883') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.today() + NaT + """, + ) + round = _make_nat_func( + "round", + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + + Notes + ----- + If the Timestamp has a timezone, rounding will take place relative to the + local ("wall") time and re-localized to the same timezone. When rounding + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be rounded using multiple frequency units: + + >>> ts.round(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.round(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.round(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.round(freq='L') # milliseconds + Timestamp('2020-03-14 15:32:52.193000') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.round(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.round(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.round() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.round("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.round("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + floor = _make_nat_func( + "floor", + """ + Return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, flooring will take place relative to the + local ("wall") time and re-localized to the same timezone. When flooring + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be floored using multiple frequency units: + + >>> ts.floor(freq='H') # hour + Timestamp('2020-03-14 15:00:00') + + >>> ts.floor(freq='T') # minute + Timestamp('2020-03-14 15:32:00') + + >>> ts.floor(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.floor(freq='N') # nanoseconds + Timestamp('2020-03-14 15:32:52.192548651') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.floor(freq='5T') + Timestamp('2020-03-14 15:30:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.floor(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.floor() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 03:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.floor("2H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.floor("2H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + ceil = _make_nat_func( + "ceil", + """ + Return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be ceiled using multiple frequency units: + + >>> ts.ceil(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.ceil(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.ceil(freq='S') # seconds + Timestamp('2020-03-14 15:32:53') + + >>> ts.ceil(freq='U') # microseconds + Timestamp('2020-03-14 15:32:52.192549') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.ceil(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.ceil(freq='1H30T') + Timestamp('2020-03-14 16:30:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.ceil() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.ceil("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.ceil("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """, + ) + + tz_convert = _make_nat_func( + "tz_convert", + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """, + ) + tz_localize = _make_nat_func( + "tz_localize", + """ + Localize the Timestamp to a timezone. + + Convert naive Timestamp to local time zone or remove + timezone from timezone-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + + Examples + -------- + Create a naive timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651') + + Add 'Europe/Stockholm' as timezone: + + >>> ts.tz_localize(tz='Europe/Stockholm') + Timestamp('2020-03-14 15:32:52.192548651+0100', tz='Europe/Stockholm') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_localize() + NaT + """, + ) + replace = _make_nat_func( + "replace", + """ + Implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional + + Returns + ------- + Timestamp with fields replaced + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Replace year and the hour: + + >>> ts.replace(year=1999, hour=10) + Timestamp('1999-03-14 10:32:52.192548651+0000', tz='UTC') + + Replace timezone (not a conversion): + + >>> import pytz + >>> ts.replace(tzinfo=pytz.timezone('US/Pacific')) + Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific')) + NaT + """, + ) + @property + def tz(self) -> None: + return None + + @property + def tzinfo(self) -> None: + return None + + +c_NaT = NaTType() # C-visible +NaT = c_NaT # Python-visible + + +# ---------------------------------------------------------------------- + +cdef inline bint checknull_with_nat(object val): + """ + Utility to check if a value is a nat or not. + """ + return val is None or util.is_nan(val) or val is c_NaT + + +cdef inline bint is_dt64nat(object val): + """ + Is this a np.datetime64 object np.datetime64("NaT"). + """ + if util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + return False + + +cdef inline bint is_td64nat(object val): + """ + Is this a np.timedelta64 object np.timedelta64("NaT"). + """ + if util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + return False diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd new file mode 100644 index 00000000..c1936e34 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -0,0 +1,118 @@ +cimport numpy as cnp +from cpython.datetime cimport ( + date, + datetime, +) +from numpy cimport ( + int32_t, + int64_t, +) + + +# TODO(cython3): most of these can be cimported directly from numpy +cdef extern from "numpy/ndarrayobject.h": + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_DatetimeMetaData: + NPY_DATETIMEUNIT base + int64_t num + +cdef extern from "numpy/arrayscalars.h": + ctypedef struct PyDatetimeScalarObject: + # PyObject_HEAD + npy_datetime obval + PyArray_DatetimeMetaData obmeta + + ctypedef struct PyTimedeltaScalarObject: + # PyObject_HEAD + npy_timedelta obval + PyArray_DatetimeMetaData obmeta + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct npy_datetimestruct: + int64_t year + int32_t month, day, hour, min, sec, us, ps, as + + ctypedef enum NPY_DATETIMEUNIT: + NPY_FR_Y + NPY_FR_M + NPY_FR_W + NPY_FR_D + NPY_FR_B + NPY_FR_h + NPY_FR_m + NPY_FR_s + NPY_FR_ms + NPY_FR_us + NPY_FR_ns + NPY_FR_ps + NPY_FR_fs + NPY_FR_as + NPY_FR_GENERIC + + int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT + +cdef extern from "src/datetime/np_datetime.h": + ctypedef struct pandas_timedeltastruct: + int64_t days + int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + + npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + + void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result + ) nogil + +cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 + +cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) + +cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil + +cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts) +cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) +cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts) +cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) + +cdef npy_datetime get_datetime64_value(object obj) nogil +cdef npy_timedelta get_timedelta64_value(object obj) nogil +cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil + +cdef int string_to_dts( + str val, + npy_datetimestruct* dts, + NPY_DATETIMEUNIT* out_bestunit, + int* out_local, + int* out_tzoffset, + bint want_exc, +) except? -1 + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype) + +cpdef cnp.ndarray astype_overflowsafe( + cnp.ndarray values, # ndarray[datetime64[anyunit]] + cnp.dtype dtype, # ndarray[datetime64[anyunit]] + bint copy=*, + bint round_ok=*, +) +cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1 + +cdef bint cmp_dtstructs(npy_datetimestruct* left, npy_datetimestruct* right, int op) +cdef get_implementation_bounds( + NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper +) + +cdef int64_t convert_reso( + int64_t value, + NPY_DATETIMEUNIT from_reso, + NPY_DATETIMEUNIT to_reso, + bint round_ok, +) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi new file mode 100644 index 00000000..d80d2637 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -0,0 +1,20 @@ +import numpy as np + +from pandas._typing import npt + +class OutOfBoundsDatetime(ValueError): ... +class OutOfBoundsTimedelta(ValueError): ... + +# only exposed for testing +def py_get_unit_from_dtype(dtype: np.dtype): ... +def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ... +def astype_overflowsafe( + arr: np.ndarray, + dtype: np.dtype, + copy: bool = ..., + round_ok: bool = ..., +) -> np.ndarray: ... +def is_unitless(dtype: np.dtype) -> bool: ... +def compare_mismatched_resolutions( + left: np.ndarray, right: np.ndarray, op +) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx new file mode 100644 index 00000000..c58a8d4d --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -0,0 +1,606 @@ +cimport cython +from cpython.datetime cimport ( + PyDateTime_CheckExact, + PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MICROSECOND, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_GET_DAY, + PyDateTime_GET_MONTH, + PyDateTime_GET_YEAR, + import_datetime, +) +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, +) + +import_datetime() + +import numpy as np + +cimport numpy as cnp + +cnp.import_array() +from numpy cimport ( + int64_t, + ndarray, + uint8_t, +) + +from pandas._libs.tslibs.util cimport get_c_string_buf_and_size + + +cdef extern from "src/datetime/np_datetime.h": + int cmp_npy_datetimestruct(npy_datetimestruct *a, + npy_datetimestruct *b) + + # AS, FS, PS versions exist but are not imported because they are not used. + npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + npy_datetimestruct _US_MIN_DTS, _US_MAX_DTS + npy_datetimestruct _MS_MIN_DTS, _MS_MAX_DTS + npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS + npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS + + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + +cdef extern from "src/datetime/np_datetime_strings.h": + int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset) + + +# ---------------------------------------------------------------------- +# numpy object inspection + +cdef inline npy_datetime get_datetime64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy datetime64 object + + Note that to interpret this as a datetime, the corresponding unit is + also needed. That can be found using `get_datetime64_unit`. + """ + return (obj).obval + + +cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy timedelta64 object + """ + return (obj).obval + + +cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: + """ + returns the unit part of the dtype for a numpy datetime64 object. + """ + return (obj).obmeta.base + + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype): + # NB: caller is responsible for ensuring this is *some* datetime64 or + # timedelta64 dtype, otherwise we can segfault + cdef: + cnp.PyArray_Descr* descr = dtype + PyArray_DatetimeMetaData meta + meta = get_datetime_metadata_from_dtype(descr) + return meta.base + + +def py_get_unit_from_dtype(dtype): + # for testing get_unit_from_dtype; adds 896 bytes to the .so file. + return get_unit_from_dtype(dtype) + + +def is_unitless(dtype: cnp.dtype) -> bool: + """ + Check if a datetime64 or timedelta64 dtype has no attached unit. + """ + if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]: + raise ValueError("is_unitless dtype must be datetime64 or timedelta64") + cdef: + NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype) + + return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + + +# ---------------------------------------------------------------------- +# Comparison + + +cdef bint cmp_dtstructs( + npy_datetimestruct* left, npy_datetimestruct* right, int op +): + cdef: + int cmp_res + + cmp_res = cmp_npy_datetimestruct(left, right) + if op == Py_EQ: + return cmp_res == 0 + if op == Py_NE: + return cmp_res != 0 + if op == Py_GT: + return cmp_res == 1 + if op == Py_LT: + return cmp_res == -1 + if op == Py_GE: + return cmp_res == 1 or cmp_res == 0 + else: + # i.e. op == Py_LE + return cmp_res == -1 or cmp_res == 0 + + +cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: + """ + cmp_scalar is a more performant version of PyObject_RichCompare + typed for int64_t arguments. + """ + if op == Py_EQ: + return lhs == rhs + elif op == Py_NE: + return lhs != rhs + elif op == Py_LT: + return lhs < rhs + elif op == Py_LE: + return lhs <= rhs + elif op == Py_GT: + return lhs > rhs + elif op == Py_GE: + return lhs >= rhs + + +class OutOfBoundsDatetime(ValueError): + """ + Raised when the datetime is outside the range that can be represented. + """ + pass + + +class OutOfBoundsTimedelta(ValueError): + """ + Raised when encountering a timedelta value that cannot be represented. + + Representation should be within a timedelta64[ns]. + """ + # Timedelta analogue to OutOfBoundsDatetime + pass + + +cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): + if reso == NPY_FR_ns: + upper[0] = _NS_MAX_DTS + lower[0] = _NS_MIN_DTS + elif reso == NPY_FR_us: + upper[0] = _US_MAX_DTS + lower[0] = _US_MIN_DTS + elif reso == NPY_FR_ms: + upper[0] = _MS_MAX_DTS + lower[0] = _MS_MIN_DTS + elif reso == NPY_FR_s: + upper[0] = _S_MAX_DTS + lower[0] = _S_MIN_DTS + elif reso == NPY_FR_m: + upper[0] = _M_MAX_DTS + lower[0] = _M_MIN_DTS + else: + raise NotImplementedError(reso) + + +cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): + """Raises OutOfBoundsDatetime if the given date is outside the range that + can be represented by nanosecond-resolution 64-bit integers.""" + cdef: + bint error = False + npy_datetimestruct cmp_upper, cmp_lower + + get_implementation_bounds(unit, &cmp_lower, &cmp_upper) + + if cmp_npy_datetimestruct(dts, &cmp_lower) == -1: + error = True + elif cmp_npy_datetimestruct(dts, &cmp_upper) == 1: + error = True + + if error: + fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + # TODO: "nanosecond" in the message assumes NPY_FR_ns + raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}') + + +# ---------------------------------------------------------------------- +# Conversion + +cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil: + """Convenience function to call npy_datetimestruct_to_datetime + with the by-far-most-common frequency NPY_FR_ns""" + return npy_datetimestruct_to_datetime(NPY_FR_ns, dts) + + +# just exposed for testing at the moment +def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit): + cdef: + pandas_timedeltastruct tds + pandas_timedelta_to_timedeltastruct(td64, unit, &tds) + return tds # <- returned as a dict to python + + +cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts): + if PyDateTime_CheckExact(dt): + dts.year = PyDateTime_GET_YEAR(dt) + else: + # We use dt.year instead of PyDateTime_GET_YEAR because with Timestamp + # we override year such that PyDateTime_GET_YEAR is incorrect. + dts.year = dt.year + + dts.month = PyDateTime_GET_MONTH(dt) + dts.day = PyDateTime_GET_DAY(dt) + dts.hour = PyDateTime_DATE_GET_HOUR(dt) + dts.min = PyDateTime_DATE_GET_MINUTE(dt) + dts.sec = PyDateTime_DATE_GET_SECOND(dt) + dts.us = PyDateTime_DATE_GET_MICROSECOND(dt) + dts.ps = dts.as = 0 + + +cdef inline int64_t pydatetime_to_dt64(datetime val, + npy_datetimestruct *dts): + """ + Note we are assuming that the datetime object is timezone-naive. + """ + pydatetime_to_dtstruct(val, dts) + return dtstruct_to_dt64(dts) + + +cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts): + dts.year = PyDateTime_GET_YEAR(val) + dts.month = PyDateTime_GET_MONTH(val) + dts.day = PyDateTime_GET_DAY(val) + dts.hour = dts.min = dts.sec = dts.us = 0 + dts.ps = dts.as = 0 + return + +cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): + pydate_to_dtstruct(val, dts) + return dtstruct_to_dt64(dts) + + +cdef inline int string_to_dts( + str val, + npy_datetimestruct* dts, + NPY_DATETIMEUNIT* out_bestunit, + int* out_local, + int* out_tzoffset, + bint want_exc, +) except? -1: + cdef: + Py_ssize_t length + const char* buf + + buf = get_c_string_buf_and_size(val, &length) + return parse_iso_8601_datetime(buf, length, want_exc, + dts, out_bestunit, out_local, out_tzoffset) + + +cpdef ndarray astype_overflowsafe( + ndarray values, + cnp.dtype dtype, + bint copy=True, + bint round_ok=True, +): + """ + Convert an ndarray with datetime64[X] to datetime64[Y] + or timedelta64[X] to timedelta64[Y], + raising on overflow. + """ + if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME: + # i.e. dtype.kind == "M" + pass + elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA: + # i.e. dtype.kind == "m" + pass + else: + raise TypeError( + "astype_overflowsafe values.dtype and dtype must be either " + "both-datetime64 or both-timedelta64." + ) + + cdef: + NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype) + NPY_DATETIMEUNIT to_unit = get_unit_from_dtype(dtype) + + if ( + from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + # without raising explicitly here, we end up with a SystemError + # built-in function [...] returned a result with an error + raise ValueError( + "datetime64/timedelta64 values and dtype must have a unit specified" + ) + + if from_unit == to_unit: + # Check this before allocating result for perf, might save some memory + if copy: + return values.copy() + return values + + elif from_unit > to_unit: + if round_ok: + # e.g. ns -> us, so there is no risk of overflow, so we can use + # numpy's astype safely. Note there _is_ risk of truncation. + return values.astype(dtype) + else: + iresult2 = astype_round_check(values.view("i8"), from_unit, to_unit) + return iresult2.view(dtype) + + if (values).dtype.byteorder == ">": + # GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap + values = values.astype(values.dtype.newbyteorder("<")) + + cdef: + ndarray i8values = values.view("i8") + + # equiv: result = np.empty((values).shape, dtype="i8") + ndarray iresult = cnp.PyArray_EMPTY( + values.ndim, values.shape, cnp.NPY_INT64, 0 + ) + + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values) + Py_ssize_t i, N = values.size + int64_t value, new_value + npy_datetimestruct dts + bint is_td = dtype.type_num == cnp.NPY_TIMEDELTA + + for i in range(N): + # Analogous to: item = values[i] + value = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if value == NPY_DATETIME_NAT: + new_value = NPY_DATETIME_NAT + else: + pandas_datetime_to_datetimestruct(value, from_unit, &dts) + + try: + check_dts_bounds(&dts, to_unit) + except OutOfBoundsDatetime as err: + if is_td: + from_abbrev = np.datetime_data(values.dtype)[0] + np_val = np.timedelta64(value, from_abbrev) + msg = ( + "Cannot convert {np_val} to {dtype} without overflow" + .format(np_val=str(np_val), dtype=str(dtype)) + ) + raise OutOfBoundsTimedelta(msg) from err + else: + raise + + new_value = npy_datetimestruct_to_datetime(to_unit, &dts) + + # Analogous to: iresult[i] = new_value + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value + + cnp.PyArray_MultiIter_NEXT(mi) + + return iresult.view(dtype) + + +# TODO: try to upstream this fix to numpy +def compare_mismatched_resolutions(ndarray left, ndarray right, op): + """ + Overflow-safe comparison of timedelta64/datetime64 with mismatched resolutions. + + >>> left = np.array([500], dtype="M8[Y]") + >>> right = np.array([0], dtype="M8[ns]") + >>> left < right # <- wrong! + array([ True]) + """ + + if left.dtype.kind != right.dtype.kind or left.dtype.kind not in ["m", "M"]: + raise ValueError("left and right must both be timedelta64 or both datetime64") + + cdef: + int op_code = op_to_op_code(op) + NPY_DATETIMEUNIT left_unit = get_unit_from_dtype(left.dtype) + NPY_DATETIMEUNIT right_unit = get_unit_from_dtype(right.dtype) + + # equiv: result = np.empty((left).shape, dtype="bool") + ndarray result = cnp.PyArray_EMPTY( + left.ndim, left.shape, cnp.NPY_BOOL, 0 + ) + + ndarray lvalues = left.view("i8") + ndarray rvalues = right.view("i8") + + cnp.broadcast mi = cnp.PyArray_MultiIterNew3(result, lvalues, rvalues) + int64_t lval, rval + bint res_value + + Py_ssize_t i, N = left.size + npy_datetimestruct ldts, rdts + + + for i in range(N): + # Analogous to: lval = lvalues[i] + lval = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + # Analogous to: rval = rvalues[i] + rval = (cnp.PyArray_MultiIter_DATA(mi, 2))[0] + + if lval == NPY_DATETIME_NAT or rval == NPY_DATETIME_NAT: + res_value = op_code == Py_NE + + else: + pandas_datetime_to_datetimestruct(lval, left_unit, &ldts) + pandas_datetime_to_datetimestruct(rval, right_unit, &rdts) + + res_value = cmp_dtstructs(&ldts, &rdts, op_code) + + # Analogous to: result[i] = res_value + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_value + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +import operator + + +cdef int op_to_op_code(op): + # TODO: should exist somewhere? + if op is operator.eq: + return Py_EQ + if op is operator.ne: + return Py_NE + if op is operator.le: + return Py_LE + if op is operator.lt: + return Py_LT + if op is operator.ge: + return Py_GE + if op is operator.gt: + return Py_GT + + +cdef ndarray astype_round_check( + ndarray i8values, + NPY_DATETIMEUNIT from_unit, + NPY_DATETIMEUNIT to_unit +): + # cases with from_unit > to_unit, e.g. ns->us, raise if the conversion + # involves truncation, e.g. 1500ns->1us + cdef: + Py_ssize_t i, N = i8values.size + + # equiv: iresult = np.empty((i8values).shape, dtype="i8") + ndarray iresult = cnp.PyArray_EMPTY( + i8values.ndim, i8values.shape, cnp.NPY_INT64, 0 + ) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values) + + # Note the arguments to_unit, from unit are swapped vs how they + # are passed when going to a higher-frequency reso. + int64_t mult = get_conversion_factor(to_unit, from_unit) + int64_t value, mod + + for i in range(N): + # Analogous to: item = i8values[i] + value = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if value == NPY_DATETIME_NAT: + new_value = NPY_DATETIME_NAT + else: + new_value, mod = divmod(value, mult) + if mod != 0: + # TODO: avoid runtime import + from pandas._libs.tslibs.dtypes import npy_unit_to_abbrev + from_abbrev = npy_unit_to_abbrev(from_unit) + to_abbrev = npy_unit_to_abbrev(to_unit) + raise ValueError( + f"Cannot losslessly cast '{value} {from_abbrev}' to {to_abbrev}" + ) + + # Analogous to: iresult[i] = new_value + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value + + cnp.PyArray_MultiIter_NEXT(mi) + + return iresult + + +@cython.overflowcheck(True) +cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1: + """ + Find the factor by which we need to multiply to convert from from_unit to to_unit. + """ + if ( + from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + raise ValueError("unit-less resolutions are not supported") + if from_unit > to_unit: + raise ValueError + + if from_unit == to_unit: + return 1 + + if from_unit == NPY_DATETIMEUNIT.NPY_FR_W: + return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D: + return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h: + return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m: + return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit) + + +cdef int64_t convert_reso( + int64_t value, + NPY_DATETIMEUNIT from_reso, + NPY_DATETIMEUNIT to_reso, + bint round_ok, +) except? -1: + cdef: + int64_t res_value, mult, div, mod + + if from_reso == to_reso: + return value + + elif to_reso < from_reso: + # e.g. ns -> us, no risk of overflow, but can be lossy rounding + mult = get_conversion_factor(to_reso, from_reso) + div, mod = divmod(value, mult) + if mod > 0 and not round_ok: + raise ValueError("Cannot losslessly convert units") + + # Note that when mod > 0, we follow np.timedelta64 in always + # rounding down. + res_value = div + + elif ( + from_reso == NPY_FR_Y + or from_reso == NPY_FR_M + or to_reso == NPY_FR_Y + or to_reso == NPY_FR_M + ): + # Converting by multiplying isn't _quite_ right bc the number of + # seconds in a month/year isn't fixed. + res_value = _convert_reso_with_dtstruct(value, from_reso, to_reso) + + else: + # e.g. ns -> us, risk of overflow, but no risk of lossy rounding + mult = get_conversion_factor(from_reso, to_reso) + with cython.overflowcheck(True): + # Note: caller is responsible for re-raising as OutOfBoundsTimedelta + res_value = value * mult + + return res_value + + +cdef int64_t _convert_reso_with_dtstruct( + int64_t value, + NPY_DATETIMEUNIT from_unit, + NPY_DATETIMEUNIT to_unit, +) except? -1: + cdef: + npy_datetimestruct dts + + pandas_datetime_to_datetimestruct(value, from_unit, &dts) + check_dts_bounds(&dts, to_unit) + return npy_datetimestruct_to_datetime(to_unit, &dts) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd new file mode 100644 index 00000000..215c3f84 --- /dev/null +++ b/pandas/_libs/tslibs/offsets.pxd @@ -0,0 +1,12 @@ +from numpy cimport int64_t + + +cpdef to_offset(object obj) +cdef bint is_offset_object(object obj) +cdef bint is_tick_object(object obj) + +cdef class BaseOffset: + cdef readonly: + int64_t n + bint normalize + dict _cache diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi new file mode 100644 index 00000000..c3d550c7 --- /dev/null +++ b/pandas/_libs/tslibs/offsets.pyi @@ -0,0 +1,281 @@ +from datetime import ( + datetime, + timedelta, +) +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Literal, + TypeVar, + overload, +) + +import numpy as np + +from pandas._typing import npt + +from .timedeltas import Timedelta + +if TYPE_CHECKING: + from pandas.core.indexes.datetimes import DatetimeIndex +_BaseOffsetT = TypeVar("_BaseOffsetT", bound=BaseOffset) +_DatetimeT = TypeVar("_DatetimeT", bound=datetime) +_TimedeltaT = TypeVar("_TimedeltaT", bound=timedelta) + +_relativedelta_kwds: set[str] +prefix_mapping: dict[str, type] + +class ApplyTypeError(TypeError): ... + +class BaseOffset: + n: int + def __init__(self, n: int = ..., normalize: bool = ...) -> None: ... + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def __hash__(self) -> int: ... + @property + def kwds(self) -> dict: ... + @property + def base(self) -> BaseOffset: ... + @overload + def __add__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ... + @overload + def __add__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... + @overload + def __add__(self, other: _DatetimeT) -> _DatetimeT: ... + @overload + def __add__(self, other: _TimedeltaT) -> _TimedeltaT: ... + @overload + def __radd__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ... + @overload + def __radd__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... + @overload + def __radd__(self, other: _DatetimeT) -> _DatetimeT: ... + @overload + def __radd__(self, other: _TimedeltaT) -> _TimedeltaT: ... + def __sub__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... + @overload + def __rsub__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ... + @overload + def __rsub__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... + @overload + def __rsub__(self, other: _DatetimeT) -> _DatetimeT: ... + @overload + def __rsub__(self, other: _TimedeltaT) -> _TimedeltaT: ... + def __call__(self, other): ... + @overload + def __mul__(self, other: np.ndarray) -> np.ndarray: ... + @overload + def __mul__(self: _BaseOffsetT, other: int) -> _BaseOffsetT: ... + @overload + def __rmul__(self, other: np.ndarray) -> np.ndarray: ... + @overload + def __rmul__(self: _BaseOffsetT, other: int) -> _BaseOffsetT: ... + def __neg__(self: _BaseOffsetT) -> _BaseOffsetT: ... + def copy(self: _BaseOffsetT) -> _BaseOffsetT: ... + @property + def name(self) -> str: ... + @property + def rule_code(self) -> str: ... + @property + def freqstr(self) -> str: ... + def apply_index(self, dtindex: DatetimeIndex) -> DatetimeIndex: ... + def _apply_array(self, dtarr) -> None: ... + def rollback(self, dt: datetime) -> datetime: ... + def rollforward(self, dt: datetime) -> datetime: ... + def is_on_offset(self, dt: datetime) -> bool: ... + def __setstate__(self, state) -> None: ... + def __getstate__(self): ... + @property + def nanos(self) -> int: ... + def onOffset(self, dt: datetime) -> bool: ... + def isAnchored(self) -> bool: ... + def is_anchored(self) -> bool: ... + +def _get_offset(name: str) -> BaseOffset: ... + +class SingleConstructorOffset(BaseOffset): + @classmethod + def _from_name(cls, suffix: None = ...): ... + def __reduce__(self): ... + +@overload +def to_offset(freq: None) -> None: ... +@overload +def to_offset(freq: _BaseOffsetT) -> _BaseOffsetT: ... +@overload +def to_offset(freq: timedelta | str) -> BaseOffset: ... + +class Tick(SingleConstructorOffset): + _reso: int + _prefix: str + _td64_unit: str + def __init__(self, n: int = ..., normalize: bool = ...) -> None: ... + @property + def delta(self) -> Timedelta: ... + @property + def nanos(self) -> int: ... + +def delta_to_tick(delta: timedelta) -> Tick: ... + +class Day(Tick): ... +class Hour(Tick): ... +class Minute(Tick): ... +class Second(Tick): ... +class Milli(Tick): ... +class Micro(Tick): ... +class Nano(Tick): ... + +class RelativeDeltaOffset(BaseOffset): + def __init__(self, n: int = ..., normalize: bool = ..., **kwds: Any) -> None: ... + +class BusinessMixin(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., offset: timedelta = ... + ) -> None: ... + +class BusinessDay(BusinessMixin): ... + +class BusinessHour(BusinessMixin): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + start: str | Collection[str] = ..., + end: str | Collection[str] = ..., + offset: timedelta = ..., + ) -> None: ... + +class WeekOfMonthMixin(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., weekday: int = ... + ) -> None: ... + +class YearOffset(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., month: int | None = ... + ) -> None: ... + +class BYearEnd(YearOffset): ... +class BYearBegin(YearOffset): ... +class YearEnd(YearOffset): ... +class YearBegin(YearOffset): ... + +class QuarterOffset(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., startingMonth: int | None = ... + ) -> None: ... + +class BQuarterEnd(QuarterOffset): ... +class BQuarterBegin(QuarterOffset): ... +class QuarterEnd(QuarterOffset): ... +class QuarterBegin(QuarterOffset): ... +class MonthOffset(SingleConstructorOffset): ... +class MonthEnd(MonthOffset): ... +class MonthBegin(MonthOffset): ... +class BusinessMonthEnd(MonthOffset): ... +class BusinessMonthBegin(MonthOffset): ... + +class SemiMonthOffset(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., day_of_month: int | None = ... + ) -> None: ... + +class SemiMonthEnd(SemiMonthOffset): ... +class SemiMonthBegin(SemiMonthOffset): ... + +class Week(SingleConstructorOffset): + def __init__( + self, n: int = ..., normalize: bool = ..., weekday: int | None = ... + ) -> None: ... + +class WeekOfMonth(WeekOfMonthMixin): + def __init__( + self, n: int = ..., normalize: bool = ..., week: int = ..., weekday: int = ... + ) -> None: ... + +class LastWeekOfMonth(WeekOfMonthMixin): ... + +class FY5253Mixin(SingleConstructorOffset): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + weekday: int = ..., + startingMonth: int = ..., + variation: Literal["nearest", "last"] = ..., + ) -> None: ... + +class FY5253(FY5253Mixin): ... + +class FY5253Quarter(FY5253Mixin): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + weekday: int = ..., + startingMonth: int = ..., + qtr_with_extra_week: int = ..., + variation: Literal["nearest", "last"] = ..., + ) -> None: ... + +class Easter(SingleConstructorOffset): ... + +class _CustomBusinessMonth(BusinessMixin): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + weekmask: str = ..., + holidays: list | None = ..., + calendar: np.busdaycalendar | None = ..., + offset: timedelta = ..., + ) -> None: ... + +class CustomBusinessDay(BusinessDay): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + weekmask: str = ..., + holidays: list | None = ..., + calendar: np.busdaycalendar | None = ..., + offset: timedelta = ..., + ) -> None: ... + +class CustomBusinessHour(BusinessHour): + def __init__( + self, + n: int = ..., + normalize: bool = ..., + weekmask: str = ..., + holidays: list | None = ..., + calendar: np.busdaycalendar | None = ..., + start: str = ..., + end: str = ..., + offset: timedelta = ..., + ) -> None: ... + +class CustomBusinessMonthEnd(_CustomBusinessMonth): ... +class CustomBusinessMonthBegin(_CustomBusinessMonth): ... +class DateOffset(RelativeDeltaOffset): ... + +BDay = BusinessDay +BMonthEnd = BusinessMonthEnd +BMonthBegin = BusinessMonthBegin +CBMonthEnd = CustomBusinessMonthEnd +CBMonthBegin = CustomBusinessMonthBegin +CDay = CustomBusinessDay + +def roll_qtrday( + other: datetime, n: int, month: int, day_opt: str, modby: int +) -> int: ... + +INVALID_FREQ_ERR_MSG: Literal["Invalid frequency: {0}"] + +def shift_months( + dtindex: npt.NDArray[np.int64], months: int, day_opt: str | None = ... +) -> npt.NDArray[np.int64]: ... + +_offset_map: dict[str, BaseOffset] diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx new file mode 100644 index 00000000..242eeffd --- /dev/null +++ b/pandas/_libs/tslibs/offsets.pyx @@ -0,0 +1,4456 @@ +import operator +import re +import time +import warnings + +from pandas.util._exceptions import find_stack_level + +cimport cython +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDelta_Check, + date, + datetime, + import_datetime, + time as dt_time, + timedelta, +) + +import_datetime() + +from dateutil.easter import easter +from dateutil.relativedelta import relativedelta +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + ndarray, +) + +cnp.import_array() + +# TODO: formalize having _libs.properties "above" tslibs in the dependency structure + +from pandas._libs.properties import cache_readonly + +from pandas._libs.tslibs cimport util +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + +from pandas._libs.tslibs.ccalendar import ( + MONTH_ALIASES, + MONTH_TO_CAL_NUM, + int_to_weekday, + weekday_to_int, +) + +from pandas._libs.tslibs.ccalendar cimport ( + dayofweek, + get_days_in_month, + get_firstbday, + get_lastbday, +) +from pandas._libs.tslibs.conversion cimport localize_pydatetime +from pandas._libs.tslibs.dtypes cimport periods_per_day +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + get_unit_from_dtype, + npy_datetimestruct, + npy_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, + pydate_to_dtstruct, +) + +from .dtypes cimport PeriodDtypeCode +from .timedeltas cimport ( + _Timedelta, + delta_to_nanoseconds, + is_any_td_scalar, +) + +from .timedeltas import Timedelta + +from .timestamps cimport _Timestamp + +from .timestamps import Timestamp + +# --------------------------------------------------------------------- +# Misc Helpers + +cdef bint is_offset_object(object obj): + return isinstance(obj, BaseOffset) + + +cdef bint is_tick_object(object obj): + return isinstance(obj, Tick) + + +cdef datetime _as_datetime(datetime obj): + if isinstance(obj, _Timestamp): + return obj.to_pydatetime() + return obj + + +cdef bint _is_normalized(datetime dt): + if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: + # Regardless of whether dt is datetime vs Timestamp + return False + if isinstance(dt, _Timestamp): + return dt.nanosecond == 0 + return True + + +def apply_wrapper_core(func, self, other) -> ndarray: + result = func(self, other) + result = np.asarray(result) + + if self.normalize: + # TODO: Avoid circular/runtime import + from .vectorized import normalize_i8_timestamps + reso = get_unit_from_dtype(other.dtype) + result = normalize_i8_timestamps(result.view("i8"), None, reso=reso) + + return result + + +def apply_array_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + def wrapper(self, other) -> np.ndarray: + # other is a DatetimeArray + result = apply_wrapper_core(func, self, other) + return result + + # do @functools.wraps(func) manually since it doesn't work on cdef funcs + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + return wrapper + + +def apply_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + + def wrapper(self, other): + + if other is NaT: + return NaT + elif ( + isinstance(other, BaseOffset) + or PyDelta_Check(other) + or util.is_timedelta64_object(other) + ): + # timedelta path + return func(self, other) + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime + other = Timestamp(other) + else: + # This will end up returning NotImplemented back in __add__ + raise ApplyTypeError + + tz = other.tzinfo + nano = other.nanosecond + + if self._adjust_dst: + other = other.tz_localize(None) + + result = func(self, other) + + result = Timestamp(result) + if self._adjust_dst: + result = result.tz_localize(tz) + + if self.normalize: + result = result.normalize() + + # If the offset object does not have a nanoseconds component, + # the result's nanosecond component may be lost. + if not self.normalize and nano != 0 and not hasattr(self, "nanoseconds"): + if result.nanosecond != nano: + if result.tz is not None: + # convert to UTC + value = result.tz_localize(None).value + else: + value = result.value + result = Timestamp(value + nano) + + if tz is not None and result.tzinfo is None: + result = result.tz_localize(tz) + + return result + + # do @functools.wraps(func) manually since it doesn't work on cdef funcs + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + return wrapper + + +cdef _wrap_timedelta_result(result): + """ + Tick operations dispatch to their Timedelta counterparts. Wrap the result + of these operations in a Tick if possible. + + Parameters + ---------- + result : object + + Returns + ------- + object + """ + if PyDelta_Check(result): + # convert Timedelta back to a Tick + return delta_to_tick(result) + + return result + +# --------------------------------------------------------------------- +# Business Helpers + + +cdef _get_calendar(weekmask, holidays, calendar): + """ + Generate busdaycalendar + """ + if isinstance(calendar, np.busdaycalendar): + if not holidays: + holidays = tuple(calendar.holidays) + elif not isinstance(holidays, tuple): + holidays = tuple(holidays) + else: + # trust that calendar.holidays and holidays are + # consistent + pass + return calendar, holidays + + if holidays is None: + holidays = [] + try: + holidays = holidays + calendar.holidays().tolist() + except AttributeError: + pass + holidays = [_to_dt64D(dt) for dt in holidays] + holidays = tuple(sorted(holidays)) + + kwargs = {'weekmask': weekmask} + if holidays: + kwargs['holidays'] = holidays + + busdaycalendar = np.busdaycalendar(**kwargs) + return busdaycalendar, holidays + + +cdef _to_dt64D(dt): + # Currently + # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') + # numpy.datetime64('2013-05-01T02:00:00.000000+0200') + # Thus astype is needed to cast datetime to datetime64[D] + if getattr(dt, 'tzinfo', None) is not None: + # Get the nanosecond timestamp, + # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` + # The `naive` must be the `dt` naive wall time + # instead of the naive absolute time (GH#49441) + naive = dt.replace(tzinfo=None) + dt = np.datetime64(naive, "D") + else: + dt = np.datetime64(dt) + if dt.dtype.name != "datetime64[D]": + dt = dt.astype("datetime64[D]") + return dt + + +# --------------------------------------------------------------------- +# Validation + + +cdef _validate_business_time(t_input): + if isinstance(t_input, str): + try: + t = time.strptime(t_input, '%H:%M') + return dt_time(hour=t.tm_hour, minute=t.tm_min) + except ValueError: + raise ValueError("time data must match '%H:%M' format") + elif isinstance(t_input, dt_time): + if t_input.second != 0 or t_input.microsecond != 0: + raise ValueError( + "time data must be specified only with hour and minute") + return t_input + else: + raise ValueError("time data must be string or datetime.time") + + +# --------------------------------------------------------------------- +# Constructor Helpers + +_relativedelta_kwds = {"years", "months", "weeks", "days", "year", "month", + "day", "weekday", "hour", "minute", "second", + "microsecond", "millisecond", "nanosecond", + "nanoseconds", "hours", "minutes", "seconds", + "milliseconds", "microseconds"} + + +cdef _determine_offset(kwds): + # timedelta is used for sub-daily plural offsets and all singular + # offsets, relativedelta is used for plural offsets of daily length or + # more, nanosecond(s) are handled by apply_wraps + kwds_no_nanos = dict( + (k, v) for k, v in kwds.items() + if k not in ('nanosecond', 'nanoseconds') + ) + # TODO: Are nanosecond and nanoseconds allowed somewhere? + + _kwds_use_relativedelta = ('years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond', + 'millisecond') + + use_relativedelta = False + if len(kwds_no_nanos) > 0: + if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): + if "millisecond" in kwds_no_nanos: + raise NotImplementedError( + "Using DateOffset to replace `millisecond` component in " + "datetime object is not supported. Use " + "`microsecond=timestamp.microsecond % 1000 + ms * 1000` " + "instead." + ) + offset = relativedelta(**kwds_no_nanos) + use_relativedelta = True + else: + # sub-daily offset - use timedelta (tz-aware) + offset = timedelta(**kwds_no_nanos) + elif any(nano in kwds for nano in ('nanosecond', 'nanoseconds')): + offset = timedelta(days=0) + else: + # GH 45643/45890: (historically) defaults to 1 day for non-nano + # since datetime.timedelta doesn't handle nanoseconds + offset = timedelta(days=1) + return offset, use_relativedelta + + +# --------------------------------------------------------------------- +# Mixins & Singletons + + +class ApplyTypeError(TypeError): + # sentinel class for catching the apply error to return NotImplemented + pass + + +# --------------------------------------------------------------------- +# Base Classes + +cdef class BaseOffset: + """ + Base class for DateOffset methods that are not overridden by subclasses. + """ + # ensure that reversed-ops with numpy scalars return NotImplemented + __array_priority__ = 1000 + + _day_opt = None + _attributes = tuple(["n", "normalize"]) + _use_relativedelta = False + _adjust_dst = True + _deprecations = frozenset(["isAnchored", "onOffset"]) + + # cdef readonly: + # int64_t n + # bint normalize + # dict _cache + + def __init__(self, n=1, normalize=False): + n = self._validate_n(n) + self.n = n + """ + Number of multiples of the frequency. + + Examples + -------- + >>> pd.offsets.Hour(5).n + 5 + """ + self.normalize = normalize + """ + Return boolean whether the frequency can align with midnight. + + Examples + -------- + >>> pd.offsets.Hour(5).normalize + False + """ + self._cache = {} + + def __eq__(self, other) -> bool: + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + try: + return self._params == other._params + except AttributeError: + # other is not a DateOffset object + return False + + def __ne__(self, other): + return not self == other + + def __hash__(self) -> int: + return hash(self._params) + + @cache_readonly + def _params(self): + """ + Returns a tuple containing all of the attributes needed to evaluate + equality between two DateOffset objects. + """ + d = getattr(self, "__dict__", {}) + all_paras = d.copy() + all_paras["n"] = self.n + all_paras["normalize"] = self.normalize + for attr in self._attributes: + if hasattr(self, attr) and attr not in d: + # cython attributes are not in __dict__ + all_paras[attr] = getattr(self, attr) + + if 'holidays' in all_paras and not all_paras['holidays']: + all_paras.pop('holidays') + exclude = ['kwds', 'name', 'calendar'] + attrs = [(k, v) for k, v in all_paras.items() + if (k not in exclude) and (k[0] != '_')] + attrs = sorted(set(attrs)) + params = tuple([str(type(self))] + attrs) + return params + + @property + def kwds(self) -> dict: + """ + Return a dict of extra parameters for the offset. + + Examples + -------- + >>> pd.DateOffset(5).kwds + {} + + >>> pd.offsets.FY5253Quarter().kwds + {'weekday': 0, + 'startingMonth': 1, + 'qtr_with_extra_week': 1, + 'variation': 'nearest'} + """ + # for backwards-compatibility + kwds = {name: getattr(self, name, None) for name in self._attributes + if name not in ["n", "normalize"]} + return {name: kwds[name] for name in kwds if kwds[name] is not None} + + @property + def base(self): + """ + Returns a copy of the calling offset object with n=1 and all other + attributes equal. + """ + return type(self)(n=1, normalize=self.normalize, **self.kwds) + + def __add__(self, other): + if not isinstance(self, BaseOffset): + # cython semantics; this is __radd__ + # TODO(cython3): remove this, this moved to __radd__ + return other.__add__(self) + + elif util.is_array(other) and other.dtype == object: + return np.array([self + x for x in other]) + + try: + return self._apply(other) + except ApplyTypeError: + return NotImplemented + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + if PyDateTime_Check(other): + raise TypeError('Cannot subtract datetime from offset.') + elif type(other) == type(self): + return type(self)(self.n - other.n, normalize=self.normalize, + **self.kwds) + elif not isinstance(self, BaseOffset): + # TODO(cython3): remove, this moved to __rsub__ + # cython semantics, this is __rsub__ + return (-other).__add__(self) + else: + # e.g. PeriodIndex + return NotImplemented + + def __rsub__(self, other): + return (-self).__add__(other) + + def __call__(self, other): + warnings.warn( + "DateOffset.__call__ is deprecated and will be removed in a future " + "version. Use `offset + other` instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._apply(other) + + def apply(self, other): + # GH#44522 + warnings.warn( + f"{type(self).__name__}.apply is deprecated and will be removed " + "in a future version. Use `offset + other` instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._apply(other) + + def __mul__(self, other): + if util.is_array(other): + return np.array([self * x for x in other]) + elif is_integer_object(other): + return type(self)(n=other * self.n, normalize=self.normalize, + **self.kwds) + elif not isinstance(self, BaseOffset): + # TODO(cython3): remove this, this moved to __rmul__ + # cython semantics, this is __rmul__ + return other.__mul__(self) + return NotImplemented + + def __rmul__(self, other): + return self.__mul__(other) + + def __neg__(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + return self * -1 + + def copy(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + """ + Return a copy of the frequency. + + Examples + -------- + >>> freq = pd.DateOffset(1) + >>> freq_copy = freq.copy() + >>> freq is freq_copy + False + """ + return self * 1 + + # ------------------------------------------------------------------ + # Name and Rendering Methods + + def __repr__(self) -> str: + # _output_name used by B(Year|Quarter)(End|Begin) to + # expand "B" -> "Business" + class_name = getattr(self, "_output_name", type(self).__name__) + + if abs(self.n) != 1: + plural = "s" + else: + plural = "" + + n_str = "" + if self.n != 1: + n_str = f"{self.n} * " + + out = f"<{n_str}{class_name}{plural}{self._repr_attrs()}>" + return out + + def _repr_attrs(self) -> str: + exclude = {"n", "inc", "normalize"} + attrs = [] + for attr in sorted(self._attributes): + # _attributes instead of __dict__ because cython attrs are not in __dict__ + if attr.startswith("_") or attr == "kwds" or not hasattr(self, attr): + # DateOffset may not have some of these attributes + continue + elif attr not in exclude: + value = getattr(self, attr) + attrs.append(f"{attr}={value}") + + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + @property + def name(self) -> str: + """ + Return a string representing the base frequency. + + Examples + -------- + >>> pd.offsets.Hour().name + 'H' + + >>> pd.offsets.Hour(5).name + 'H' + """ + return self.rule_code + + @property + def _prefix(self) -> str: + raise NotImplementedError("Prefix not defined") + + @property + def rule_code(self) -> str: + return self._prefix + + @cache_readonly + def freqstr(self) -> str: + """ + Return a string representing the frequency. + + Examples + -------- + >>> pd.DateOffset(5).freqstr + '<5 * DateOffsets>' + + >>> pd.offsets.BusinessHour(2).freqstr + '2BH' + + >>> pd.offsets.Nano().freqstr + 'N' + + >>> pd.offsets.Nano(-3).freqstr + '-3N' + """ + try: + code = self.rule_code + except NotImplementedError: + return str(repr(self)) + + if self.n != 1: + fstr = f"{self.n}{code}" + else: + fstr = code + + try: + if self._offset: + fstr += self._offset_str() + except AttributeError: + # TODO: standardize `_offset` vs `offset` naming convention + pass + + return fstr + + def _offset_str(self) -> str: + return "" + + # ------------------------------------------------------------------ + + def apply_index(self, dtindex): + """ + Vectorized apply of DateOffset to DatetimeIndex. + + .. deprecated:: 1.1.0 + + Use ``offset + dtindex`` instead. + + Parameters + ---------- + index : DatetimeIndex + + Returns + ------- + DatetimeIndex + + Raises + ------ + NotImplementedError + When the specific offset subclass does not have a vectorized + implementation. + """ + warnings.warn("'Offset.apply_index(other)' is deprecated. " + "Use 'offset + other' instead.", FutureWarning) + + res = self._apply_array(dtindex) + return type(dtindex)(res) + + @apply_array_wraps + def _apply_array(self, dtarr): + raise NotImplementedError( + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" + ) + + def rollback(self, dt) -> datetime: + """ + Roll provided date backward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = Timestamp(dt) + if not self.is_on_offset(dt): + dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def rollforward(self, dt) -> datetime: + """ + Roll provided date forward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = Timestamp(dt) + if not self.is_on_offset(dt): + dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def _get_offset_day(self, other: datetime) -> int: + # subclass must implement `_day_opt`; calling from the base class + # will implicitly assume day_opt = "business_end", see get_day_of_month. + cdef: + npy_datetimestruct dts + pydate_to_dtstruct(other, &dts) + return get_day_of_month(&dts, self._day_opt) + + def is_on_offset(self, dt: datetime) -> bool: + """ + Return boolean whether a timestamp intersects with this frequency. + + Parameters + ---------- + dt : datetime.datetime + Timestamp to check intersections with frequency. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Day(1) + >>> freq.is_on_offset(ts) + True + + >>> ts = pd.Timestamp(2022, 8, 6) + >>> ts.day_name() + 'Saturday' + >>> freq = pd.offsets.BusinessDay(1) + >>> freq.is_on_offset(ts) + False + """ + if self.normalize and not _is_normalized(dt): + return False + + # Default (slow) method for determining if some date is a member of the + # date range generated by this offset. Subclasses may have this + # re-implemented in a nicer way. + a = dt + b = (dt + self) - self + return a == b + + # ------------------------------------------------------------------ + + # Staticmethod so we can call from Tick.__init__, will be unnecessary + # once BaseOffset is a cdef class and is inherited by Tick + @staticmethod + def _validate_n(n) -> int: + """ + Require that `n` be an integer. + + Parameters + ---------- + n : int + + Returns + ------- + nint : int + + Raises + ------ + TypeError if `int(n)` raises + ValueError if n != int(n) + """ + if util.is_timedelta64_object(n): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + try: + nint = int(n) + except (ValueError, TypeError): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + if n != nint: + raise ValueError(f'`n` argument must be an integer, got {n}') + return nint + + def __setstate__(self, state): + """ + Reconstruct an instance from a pickled state + """ + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) + # At this point we expect state to be empty + + def __getstate__(self): + """ + Return a pickleable state + """ + state = {} + state["n"] = self.n + state["normalize"] = self.normalize + + # we don't want to actually pickle the calendar object + # as its a np.busyday; we recreate on deserialization + state.pop("calendar", None) + if "kwds" in state: + state["kwds"].pop("calendar", None) + + return state + + @property + def nanos(self): + raise ValueError(f"{self} is a non-fixed frequency") + + def onOffset(self, dt) -> bool: + warnings.warn( + "onOffset is a deprecated, use is_on_offset instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.is_on_offset(dt) + + def isAnchored(self) -> bool: + warnings.warn( + "isAnchored is a deprecated, use is_anchored instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.is_anchored() + + def is_anchored(self) -> bool: + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what is_anchored means. + """ + Return boolean whether the frequency is a unit frequency (n=1). + + Examples + -------- + >>> pd.DateOffset().is_anchored() + True + >>> pd.DateOffset(2).is_anchored() + False + """ + return self.n == 1 + + # ------------------------------------------------------------------ + + def is_month_start(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the month start. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_month_start(ts) + True + """ + return ts._get_start_end_field("is_month_start", self) + + def is_month_end(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the month end. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_month_end(ts) + False + """ + return ts._get_start_end_field("is_month_end", self) + + def is_quarter_start(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the quarter start. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_quarter_start(ts) + True + """ + return ts._get_start_end_field("is_quarter_start", self) + + def is_quarter_end(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the quarter end. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_quarter_end(ts) + False + """ + return ts._get_start_end_field("is_quarter_end", self) + + def is_year_start(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the year start. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_year_start(ts) + True + """ + return ts._get_start_end_field("is_year_start", self) + + def is_year_end(self, _Timestamp ts): + """ + Return boolean whether a timestamp occurs on the year end. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> freq = pd.offsets.Hour(5) + >>> freq.is_year_end(ts) + False + """ + return ts._get_start_end_field("is_year_end", self) + + +cdef class SingleConstructorOffset(BaseOffset): + @classmethod + def _from_name(cls, suffix=None): + # default _from_name calls cls with no args + if suffix: + raise ValueError(f"Bad freq suffix {suffix}") + return cls() + + def __reduce__(self): + # This __reduce__ implementation is for all BaseOffset subclasses + # except for RelativeDeltaOffset + # np.busdaycalendar objects do not pickle nicely, but we can reconstruct + # from attributes that do get pickled. + tup = tuple( + getattr(self, attr) if attr != "calendar" else None + for attr in self._attributes + ) + return type(self), tup + + +# --------------------------------------------------------------------- +# Tick Offsets + +cdef class Tick(SingleConstructorOffset): + _adjust_dst = False + _prefix = "undefined" + _td64_unit = "undefined" + _attributes = tuple(["n", "normalize"]) + + def __init__(self, n=1, normalize=False): + n = self._validate_n(n) + self.n = n + self.normalize = False + self._cache = {} + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + + # Note: Without making this cpdef, we get AttributeError when calling + # from __mul__ + cpdef Tick _next_higher_resolution(Tick self): + if type(self) is Day: + return Hour(self.n * 24) + if type(self) is Hour: + return Minute(self.n * 60) + if type(self) is Minute: + return Second(self.n * 60) + if type(self) is Second: + return Milli(self.n * 1000) + if type(self) is Milli: + return Micro(self.n * 1000) + if type(self) is Micro: + return Nano(self.n * 1000) + raise ValueError("Could not convert to integer offset at any resolution") + + # -------------------------------------------------------------------- + + def _repr_attrs(self) -> str: + # Since cdef classes have no __dict__, we need to override + return "" + + @property + def delta(self): + return self.n * Timedelta(self._nanos_inc) + + @property + def nanos(self) -> int64_t: + """ + Return an integer of the total number of nanoseconds. + + Raises + ------ + ValueError + If the frequency is non-fixed. + + Examples + -------- + >>> pd.offsets.Hour(5).nanos + 18000000000000 + """ + return self.n * self._nanos_inc + + def is_on_offset(self, dt: datetime) -> bool: + return True + + def is_anchored(self) -> bool: + return False + + # This is identical to BaseOffset.__hash__, but has to be redefined here + # for Python 3, because we've redefined __eq__. + def __hash__(self) -> int: + return hash(self._params) + + # -------------------------------------------------------------------- + # Comparison and Arithmetic Methods + + def __eq__(self, other): + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + return self.delta == other + + def __ne__(self, other): + return not (self == other) + + def __le__(self, other): + return self.delta.__le__(other) + + def __lt__(self, other): + return self.delta.__lt__(other) + + def __ge__(self, other): + return self.delta.__ge__(other) + + def __gt__(self, other): + return self.delta.__gt__(other) + + def __mul__(self, other): + if not isinstance(self, Tick): + # TODO(cython3), remove this, this moved to __rmul__ + # cython semantics, this is __rmul__ + return other.__mul__(self) + if is_float_object(other): + n = other * self.n + # If the new `n` is an integer, we can represent it using the + # same Tick subclass as self, otherwise we need to move up + # to a higher-resolution subclass + if np.isclose(n % 1, 0): + return type(self)(int(n)) + new_self = self._next_higher_resolution() + return new_self * other + return BaseOffset.__mul__(self, other) + + def __rmul__(self, other): + return self.__mul__(other) + + def __truediv__(self, other): + if not isinstance(self, Tick): + # cython semantics mean the args are sometimes swapped + result = other.delta.__rtruediv__(self) + else: + result = self.delta.__truediv__(other) + return _wrap_timedelta_result(result) + + def __rtruediv__(self, other): + result = self.delta.__rtruediv__(other) + return _wrap_timedelta_result(result) + + def __add__(self, other): + if not isinstance(self, Tick): + # cython semantics; this is __radd__ + # TODO(cython3): remove this, this moved to __radd__ + return other.__add__(self) + + if isinstance(other, Tick): + if type(self) == type(other): + return type(self)(self.n + other.n) + else: + return delta_to_tick(self.delta + other.delta) + try: + return self._apply(other) + except ApplyTypeError: + # Includes pd.Period + return NotImplemented + except OverflowError as err: + raise OverflowError( + f"the add operation between {self} and {other} will overflow" + ) from err + + def __radd__(self, other): + return self.__add__(other) + + def _apply(self, other): + # Timestamp can handle tz and nano sec, thus no need to use apply_wraps + if isinstance(other, _Timestamp): + # GH#15126 + return other + self.delta + elif other is NaT: + return NaT + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime + return Timestamp(other) + self + + if util.is_timedelta64_object(other) or PyDelta_Check(other): + return other + self.delta + elif isinstance(other, type(self)): + # TODO(2.0): remove once apply deprecation is enforced. + # This is reached in tests that specifically call apply, + # but should not be reached "naturally" because __add__ should + # catch this case first. + return type(self)(self.n + other.n) + + raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") + + # -------------------------------------------------------------------- + # Pickle Methods + + def __setstate__(self, state): + self.n = state["n"] + self.normalize = False + + +cdef class Day(Tick): + _nanos_inc = 24 * 3600 * 1_000_000_000 + _prefix = "D" + _td64_unit = "D" + _period_dtype_code = PeriodDtypeCode.D + _reso = NPY_DATETIMEUNIT.NPY_FR_D + + +cdef class Hour(Tick): + _nanos_inc = 3600 * 1_000_000_000 + _prefix = "H" + _td64_unit = "h" + _period_dtype_code = PeriodDtypeCode.H + _reso = NPY_DATETIMEUNIT.NPY_FR_h + + +cdef class Minute(Tick): + _nanos_inc = 60 * 1_000_000_000 + _prefix = "T" + _td64_unit = "m" + _period_dtype_code = PeriodDtypeCode.T + _reso = NPY_DATETIMEUNIT.NPY_FR_m + + +cdef class Second(Tick): + _nanos_inc = 1_000_000_000 + _prefix = "S" + _td64_unit = "s" + _period_dtype_code = PeriodDtypeCode.S + _reso = NPY_DATETIMEUNIT.NPY_FR_s + + +cdef class Milli(Tick): + _nanos_inc = 1_000_000 + _prefix = "L" + _td64_unit = "ms" + _period_dtype_code = PeriodDtypeCode.L + _reso = NPY_DATETIMEUNIT.NPY_FR_ms + + +cdef class Micro(Tick): + _nanos_inc = 1000 + _prefix = "U" + _td64_unit = "us" + _period_dtype_code = PeriodDtypeCode.U + _reso = NPY_DATETIMEUNIT.NPY_FR_us + + +cdef class Nano(Tick): + _nanos_inc = 1 + _prefix = "N" + _td64_unit = "ns" + _period_dtype_code = PeriodDtypeCode.N + _reso = NPY_DATETIMEUNIT.NPY_FR_ns + + +def delta_to_tick(delta: timedelta) -> Tick: + if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: + # nanoseconds only for pd.Timedelta + if delta.seconds == 0: + return Day(delta.days) + else: + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) + else: + return Second(seconds) + else: + nanos = delta_to_nanoseconds(delta) + if nanos % 1_000_000 == 0: + return Milli(nanos // 1_000_000) + elif nanos % 1000 == 0: + return Micro(nanos // 1000) + else: # pragma: no cover + return Nano(nanos) + + +# -------------------------------------------------------------------- + +cdef class RelativeDeltaOffset(BaseOffset): + """ + DateOffset subclass backed by a dateutil relativedelta object. + """ + _attributes = tuple(["n", "normalize"] + list(_relativedelta_kwds)) + _adjust_dst = False + + def __init__(self, n=1, normalize=False, **kwds): + BaseOffset.__init__(self, n, normalize) + + off, use_rd = _determine_offset(kwds) + object.__setattr__(self, "_offset", off) + object.__setattr__(self, "_use_relativedelta", use_rd) + for key in kwds: + val = kwds[key] + object.__setattr__(self, key, val) + + def __getstate__(self): + """ + Return a pickleable state + """ + # RelativeDeltaOffset (technically DateOffset) is the only non-cdef + # class, so the only one with __dict__ + state = self.__dict__.copy() + state["n"] = self.n + state["normalize"] = self.normalize + return state + + def __setstate__(self, state): + """ + Reconstruct an instance from a pickled state + """ + + if "offset" in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + if "_offset" in state: # pragma: no cover + raise AssertionError("Unexpected key `_offset`") + state["_offset"] = state.pop("offset") + state["kwds"]["offset"] = state["_offset"] + + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) + + self.__dict__.update(state) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + if self._use_relativedelta: + other = _as_datetime(other) + + if len(self.kwds) > 0: + tzinfo = getattr(other, "tzinfo", None) + if tzinfo is not None and self._use_relativedelta: + # perform calculation in UTC + other = other.replace(tzinfo=None) + + if hasattr(self, "nanoseconds"): + td_nano = Timedelta(nanoseconds=self.nanoseconds) + else: + td_nano = Timedelta(0) + + if self.n > 0: + for i in range(self.n): + other = other + self._offset + td_nano + else: + for i in range(-self.n): + other = other - self._offset - td_nano + + if tzinfo is not None and self._use_relativedelta: + # bring tz back from UTC calculation + other = localize_pydatetime(other, tzinfo) + + return Timestamp(other) + else: + return other + timedelta(self.n) + + @apply_array_wraps + def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) + dt64other = np.asarray(dtarr) + kwds = self.kwds + relativedelta_fast = { + "years", + "months", + "weeks", + "days", + "hours", + "minutes", + "seconds", + "microseconds", + } + # relativedelta/_offset path only valid for base DateOffset + if self._use_relativedelta and set(kwds).issubset(relativedelta_fast): + + months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n + if months: + shifted = shift_months(dt64other.view("i8"), months, reso=reso) + dt64other = shifted.view(dtarr.dtype) + + weeks = kwds.get("weeks", 0) * self.n + if weeks: + delta = Timedelta(days=7 * weeks) + td = (<_Timedelta>delta)._as_reso(reso) + dt64other = dt64other + td + + timedelta_kwds = { + k: v + for k, v in kwds.items() + if k in ["days", "hours", "minutes", "seconds", "microseconds"] + } + if timedelta_kwds: + delta = Timedelta(**timedelta_kwds) + td = (<_Timedelta>delta)._as_reso(reso) + dt64other = dt64other + (self.n * td) + return dt64other + elif not self._use_relativedelta and hasattr(self, "_offset"): + # timedelta + num_nano = getattr(self, "nanoseconds", 0) + if num_nano != 0: + rem_nano = Timedelta(nanoseconds=num_nano) + delta = Timedelta((self._offset + rem_nano) * self.n) + else: + delta = Timedelta(self._offset * self.n) + td = (<_Timedelta>delta)._as_reso(reso) + return dt64other + td + else: + # relativedelta with other keywords + kwd = set(kwds) - relativedelta_fast + raise NotImplementedError( + "DateOffset with relativedelta " + f"keyword(s) {kwd} not able to be " + "applied vectorized" + ) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return True + + +class OffsetMeta(type): + """ + Metaclass that allows us to pretend that all BaseOffset subclasses + inherit from DateOffset (which is needed for backward-compatibility). + """ + + @classmethod + def __instancecheck__(cls, obj) -> bool: + return isinstance(obj, BaseOffset) + + @classmethod + def __subclasscheck__(cls, obj) -> bool: + return issubclass(obj, BaseOffset) + + +# TODO: figure out a way to use a metaclass with a cdef class +class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): + """ + Standard kind of date increment used for a date range. + + Works exactly like the keyword argument form of relativedelta. + Note that the positional argument form of relativedelata is not + supported. Use of the keyword n is discouraged-- you would be better + off specifying n in the keywords you use, but regardless it is + there for you. n is needed for DateOffset subclasses. + + DateOffset works as follows. Each offset specify a set of dates + that conform to the DateOffset. For example, Bday defines this + set to be the set of dates that are weekdays (M-F). To test if a + date is in the set of a DateOffset dateOffset we can use the + is_on_offset method: dateOffset.is_on_offset(date). + + If a date is not on a valid date, the rollback and rollforward + methods can be used to roll the date to the nearest valid date + before/after the date. + + DateOffsets can be created to move dates forward a given number of + valid dates. For example, Bday(2) can be added to a date to move + it two business days forward. If the date does not start on a + valid date, first it is moved to a valid date. Thus pseudo code + is: + + def __add__(date): + date = rollback(date) # does nothing if date is valid + return date + + + When a date offset is created for a negative number of periods, + the date is first rolled forward. The pseudo code is: + + def __add__(date): + date = rollforward(date) # does nothing is date is valid + return date + + + Zero presents a problem. Should it roll forward or back? We + arbitrarily have it rollforward: + + date + BDay(0) == BDay.rollforward(date) + + Since 0 is a bit weird, we suggest avoiding its use. + + Besides, adding a DateOffsets specified by the singular form of the date + component can be used to replace certain component of the timestamp. + + Parameters + ---------- + n : int, default 1 + The number of time periods the offset represents. + If specified without a temporal pattern, defaults to n days. + normalize : bool, default False + Whether to round the result of a DateOffset addition down to the + previous midnight. + **kwds + Temporal parameter that add to or replace the offset value. + + Parameters that **add** to the offset (like Timedelta): + + - years + - months + - weeks + - days + - hours + - minutes + - seconds + - milliseconds + - microseconds + - nanoseconds + + Parameters that **replace** the offset value: + + - year + - month + - day + - weekday + - hour + - minute + - second + - microsecond + - nanosecond. + + See Also + -------- + dateutil.relativedelta.relativedelta : The relativedelta type is designed + to be applied to an existing datetime an can replace specific components of + that datetime, or represents an interval of time. + + Examples + -------- + >>> from pandas.tseries.offsets import DateOffset + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=3) + Timestamp('2017-04-01 09:10:11') + + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=2) + Timestamp('2017-03-01 09:10:11') + >>> ts + DateOffset(day=31) + Timestamp('2017-01-31 09:10:11') + + >>> ts + pd.DateOffset(hour=8) + Timestamp('2017-01-01 08:10:11') + """ + def __setattr__(self, name, value): + raise AttributeError("DateOffset objects are immutable.") + +# -------------------------------------------------------------------- + + +cdef class BusinessMixin(SingleConstructorOffset): + """ + Mixin to business types to provide related functions. + """ + + cdef readonly: + timedelta _offset + # Only Custom subclasses use weekmask, holiday, calendar + object weekmask, holidays, calendar + + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + self._offset = offset + + cpdef _init_custom(self, weekmask, holidays, calendar): + """ + Additional __init__ for Custom subclasses. + """ + calendar, holidays = _get_calendar( + weekmask=weekmask, holidays=holidays, calendar=calendar + ) + # Custom offset instances are identified by the + # following two attributes. See DateOffset._params() + # holidays, weekmask + self.weekmask = weekmask + self.holidays = holidays + self.calendar = calendar + + @property + def offset(self): + """ + Alias for self._offset. + """ + # Alias for backward compat + return self._offset + + def _repr_attrs(self) -> str: + if self.offset: + attrs = [f"offset={repr(self.offset)}"] + else: + attrs = [] + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + cpdef __setstate__(self, state): + # We need to use a cdef/cpdef method to set the readonly _offset attribute + if "_offset" in state: + self._offset = state.pop("_offset") + elif "offset" in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + self._offset = state.pop("offset") + + if self._prefix.startswith("C"): + # i.e. this is a Custom class + weekmask = state.pop("weekmask") + holidays = state.pop("holidays") + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=None) + self.weekmask = weekmask + self.calendar = calendar + self.holidays = holidays + + BaseOffset.__setstate__(self, state) + + +cdef class BusinessDay(BusinessMixin): + """ + DateOffset subclass representing possibly n business days. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 8, 5) + >>> ts + pd.offsets.BusinessDay() + Timestamp('2022-08-08 00:00:00') + """ + _period_dtype_code = PeriodDtypeCode.B + _prefix = "B" + _attributes = tuple(["n", "normalize", "offset"]) + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + if "_offset" in state: + self._offset = state.pop("_offset") + elif "offset" in state: + self._offset = state.pop("offset") + self._cache = state.pop("_cache", {}) + + def _offset_str(self) -> str: + def get_str(td): + off_str = "" + if td.days > 0: + off_str += str(td.days) + "D" + if td.seconds > 0: + s = td.seconds + hrs = int(s / 3600) + if hrs != 0: + off_str += str(hrs) + "H" + s -= hrs * 3600 + mts = int(s / 60) + if mts != 0: + off_str += str(mts) + "Min" + s -= mts * 60 + if s != 0: + off_str += str(s) + "s" + if td.microseconds > 0: + off_str += str(td.microseconds) + "us" + return off_str + + if PyDelta_Check(self.offset): + zero = timedelta(0, 0, 0) + if self.offset >= zero: + off_str = "+" + get_str(self.offset) + else: + off_str = "-" + get_str(-self.offset) + return off_str + else: + return "+" + repr(self.offset) + + @apply_wraps + def _apply(self, other): + if PyDateTime_Check(other): + n = self.n + wday = other.weekday() + + # avoid slowness below by operating on weeks first + weeks = n // 5 + if n <= 0 and wday > 4: + # roll forward + n += 1 + + n -= 5 * weeks + + # n is always >= 0 at this point + if n == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (n - 1) + elif wday + n <= 4: + # shift by n days without leaving the current week + days = n + else: + # shift by n days plus 2 to get past the weekend + days = n + 2 + + result = other + timedelta(days=7 * weeks + days) + if self.offset: + result = result + self.offset + return result + + elif is_any_td_scalar(other): + td = Timedelta(self.offset) + other + return BusinessDay( + self.n, offset=td.to_pytimedelta(), normalize=self.normalize + ) + else: + raise ApplyTypeError( + "Only know how to combine business day with datetime or timedelta." + ) + + @apply_array_wraps + def _apply_array(self, dtarr): + i8other = dtarr.view("i8") + reso = get_unit_from_dtype(dtarr.dtype) + res = _shift_bdays(i8other, self.n, reso=reso) + if self.offset: + res = res.view(dtarr.dtype) + Timedelta(self.offset) + res = res.view("i8") + return res + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.weekday() < 5 + + +cdef class BusinessHour(BusinessMixin): + """ + DateOffset subclass representing possibly n business hours. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + start : str, default "09:00" + Start time of your custom business hour in 24h format. + end : str, default: "17:00" + End time of your custom business hour in 24h format. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 8, 5, 16) + >>> ts + pd.offsets.BusinessHour() + Timestamp('2022-08-08 09:00:00') + """ + + _prefix = "BH" + _anchor = 0 + _attributes = tuple(["n", "normalize", "start", "end", "offset"]) + _adjust_dst = False + + cdef readonly: + tuple start, end + + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BusinessMixin.__init__(self, n, normalize, offset) + + # must be validated here to equality check + if np.ndim(start) == 0: + # i.e. not is_list_like + start = [start] + if not len(start): + raise ValueError("Must include at least 1 start time") + + if np.ndim(end) == 0: + # i.e. not is_list_like + end = [end] + if not len(end): + raise ValueError("Must include at least 1 end time") + + start = np.array([_validate_business_time(x) for x in start]) + end = np.array([_validate_business_time(x) for x in end]) + + # Validation of input + if len(start) != len(end): + raise ValueError("number of starting time and ending time must be the same") + num_openings = len(start) + + # sort starting and ending time by starting time + index = np.argsort(start) + + # convert to tuple so that start and end are hashable + start = tuple(start[index]) + end = tuple(end[index]) + + total_secs = 0 + for i in range(num_openings): + total_secs += self._get_business_hours_by_sec(start[i], end[i]) + total_secs += self._get_business_hours_by_sec( + end[i], start[(i + 1) % num_openings] + ) + if total_secs != 24 * 60 * 60: + raise ValueError( + "invalid starting and ending time(s): " + "opening hours should not touch or overlap with " + "one another" + ) + + self.start = start + self.end = end + + cpdef __setstate__(self, state): + start = state.pop("start") + start = (start,) if np.ndim(start) == 0 else tuple(start) + end = state.pop("end") + end = (end,) if np.ndim(end) == 0 else tuple(end) + self.start = start + self.end = end + + state.pop("kwds", {}) + state.pop("next_bday", None) + BusinessMixin.__setstate__(self, state) + + def _repr_attrs(self) -> str: + out = super()._repr_attrs() + # Use python string formatting to be faster than strftime + hours = ",".join( + f'{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}' + for st, en in zip(self.start, self.end) + ) + attrs = [f"{self._prefix}={hours}"] + out += ": " + ", ".join(attrs) + return out + + def _get_business_hours_by_sec(self, start, end): + """ + Return business hours in a day by seconds. + """ + # create dummy datetime to calculate business hours in a day + dtstart = datetime(2014, 4, 1, start.hour, start.minute) + day = 1 if start < end else 2 + until = datetime(2014, 4, day, end.hour, end.minute) + return int((until - dtstart).total_seconds()) + + def _get_closing_time(self, dt: datetime) -> datetime: + """ + Get the closing time of a business hour interval by its opening time. + + Parameters + ---------- + dt : datetime + Opening time of a business hour interval. + + Returns + ------- + result : datetime + Corresponding closing time. + """ + for i, st in enumerate(self.start): + if st.hour == dt.hour and st.minute == dt.minute: + return dt + timedelta( + seconds=self._get_business_hours_by_sec(st, self.end[i]) + ) + assert False + + @cache_readonly + def next_bday(self): + """ + Used for moving to next business day. + """ + if self.n >= 0: + nb_offset = 1 + else: + nb_offset = -1 + if self._prefix.startswith("C"): + # CustomBusinessHour + return CustomBusinessDay( + n=nb_offset, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + return BusinessDay(n=nb_offset) + + def _next_opening_time(self, other, sign=1): + """ + If self.n and sign have the same sign, return the earliest opening time + later than or equal to current time. + Otherwise the latest opening time earlier than or equal to current + time. + + Opening time always locates on BusinessDay. + However, closing time may not if business hour extends over midnight. + + Parameters + ---------- + other : datetime + Current time. + sign : int, default 1. + Either 1 or -1. Going forward in time if it has the same sign as + self.n. Going backward in time otherwise. + + Returns + ------- + result : datetime + Next opening time. + """ + earliest_start = self.start[0] + latest_start = self.start[-1] + + if not self.next_bday.is_on_offset(other): + # today is not business day + other = other + sign * self.next_bday + if self.n * sign >= 0: + hour, minute = earliest_start.hour, earliest_start.minute + else: + hour, minute = latest_start.hour, latest_start.minute + else: + if self.n * sign >= 0: + if latest_start < other.time(): + # current time is after latest starting time in today + other = other + sign * self.next_bday + hour, minute = earliest_start.hour, earliest_start.minute + else: + # find earliest starting time no earlier than current time + for st in self.start: + if other.time() <= st: + hour, minute = st.hour, st.minute + break + else: + if other.time() < earliest_start: + # current time is before earliest starting time in today + other = other + sign * self.next_bday + hour, minute = latest_start.hour, latest_start.minute + else: + # find latest starting time no later than current time + for st in reversed(self.start): + if other.time() >= st: + hour, minute = st.hour, st.minute + break + + return datetime(other.year, other.month, other.day, hour, minute) + + def _prev_opening_time(self, other: datetime) -> datetime: + """ + If n is positive, return the latest opening time earlier than or equal + to current time. + Otherwise the earliest opening time later than or equal to current + time. + + Parameters + ---------- + other : datetime + Current time. + + Returns + ------- + result : datetime + Previous opening time. + """ + return self._next_opening_time(other, sign=-1) + + @apply_wraps + def rollback(self, dt: datetime) -> datetime: + """ + Roll provided date backward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + dt = self._prev_opening_time(dt) + else: + dt = self._next_opening_time(dt) + return self._get_closing_time(dt) + return dt + + @apply_wraps + def rollforward(self, dt: datetime) -> datetime: + """ + Roll provided date forward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + return self._next_opening_time(dt) + else: + return self._prev_opening_time(dt) + return dt + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # used for detecting edge condition + nanosecond = getattr(other, "nanosecond", 0) + # reset timezone and nanosecond + # other may be a Timestamp, thus not use replace + other = datetime( + other.year, + other.month, + other.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + n = self.n + + # adjust other to reduce number of cases to handle + if n >= 0: + if other.time() in self.end or not self._is_on_offset(other): + other = self._next_opening_time(other) + else: + if other.time() in self.start: + # adjustment to move to previous business day + other = other - timedelta(seconds=1) + if not self._is_on_offset(other): + other = self._next_opening_time(other) + other = self._get_closing_time(other) + + # get total business hours by sec in one business day + businesshours = sum( + self._get_business_hours_by_sec(st, en) + for st, en in zip(self.start, self.end) + ) + + bd, r = divmod(abs(n * 60), businesshours // 60) + if n < 0: + bd, r = -bd, -r + + # adjust by business days first + if bd != 0: + if self._prefix.startswith("C"): + # GH#30593 this is a Custom offset + skip_bd = CustomBusinessDay( + n=bd, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + skip_bd = BusinessDay(n=bd) + # midnight business hour may not on BusinessDay + if not self.next_bday.is_on_offset(other): + prev_open = self._prev_opening_time(other) + remain = other - prev_open + other = prev_open + skip_bd + remain + else: + other = other + skip_bd + + # remaining business hours to adjust + bhour_remain = timedelta(minutes=r) + + if n >= 0: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = ( + self._get_closing_time(self._prev_opening_time(other)) - other + ) + if bhour_remain < bhour: + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._next_opening_time(other + bhour) + else: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = self._next_opening_time(other) - other + if ( + bhour_remain > bhour + or bhour_remain == bhour + and nanosecond != 0 + ): + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._get_closing_time( + self._next_opening_time( + other + bhour - timedelta(seconds=1) + ) + ) + + return other + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + + if dt.tzinfo is not None: + dt = datetime( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond + ) + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + return self._is_on_offset(dt) + + def _is_on_offset(self, dt: datetime) -> bool: + """ + Slight speedups using calculated values. + """ + # if self.normalize and not _is_normalized(dt): + # return False + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + if self.n >= 0: + op = self._prev_opening_time(dt) + else: + op = self._next_opening_time(dt) + span = (dt - op).total_seconds() + businesshours = 0 + for i, st in enumerate(self.start): + if op.hour == st.hour and op.minute == st.minute: + businesshours = self._get_business_hours_by_sec(st, self.end[i]) + if span <= businesshours: + return True + else: + return False + + +cdef class WeekOfMonthMixin(SingleConstructorOffset): + """ + Mixin for methods common to WeekOfMonth and LastWeekOfMonth. + """ + + cdef readonly: + int weekday, week + + def __init__(self, n=1, normalize=False, weekday=0): + BaseOffset.__init__(self, n, normalize) + self.weekday = weekday + + if weekday < 0 or weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {weekday}") + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + compare_day = self._get_offset_day(other) + + months = self.n + months = roll_convention(other.day, months, compare_day) + + shifted = shift_month(other, months, "start") + to_day = self._get_offset_day(shifted) + return _shift_day(shifted, to_day - shifted.day) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + @property + def rule_code(self) -> str: + weekday = int_to_weekday.get(self.weekday, "") + if self.week == -1: + # LastWeekOfMonth + return f"{self._prefix}-{weekday}" + return f"{self._prefix}-{self.week + 1}{weekday}" + + +# ---------------------------------------------------------------------- +# Year-Based Offset Classes + +cdef class YearOffset(SingleConstructorOffset): + """ + DateOffset that just needs a month. + """ + _attributes = tuple(["n", "normalize", "month"]) + + # FIXME(cython#4446): python annotation here gives compile-time errors + # _default_month: int + + cdef readonly: + int month + + def __init__(self, n=1, normalize=False, month=None): + BaseOffset.__init__(self, n, normalize) + + month = month if month is not None else self._default_month + self.month = month + + if month < 1 or month > 12: + raise ValueError("Month must go from 1 to 12") + + cpdef __setstate__(self, state): + self.month = state.pop("month") + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = {} + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["month"] = MONTH_TO_CAL_NUM[suffix] + return cls(**kwargs) + + @property + def rule_code(self) -> str: + month = MONTH_ALIASES[self.month] + return f"{self._prefix}-{month}" + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.month == self.month and dt.day == self._get_offset_day(dt) + + def _get_offset_day(self, other: datetime) -> int: + # override BaseOffset method to use self.month instead of other.month + cdef: + npy_datetimestruct dts + pydate_to_dtstruct(other, &dts) + dts.month = self.month + return get_day_of_month(&dts, self._day_opt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + years = roll_qtrday(other, self.n, self.month, self._day_opt, modby=12) + months = years * 12 + (self.month - other.month) + return shift_month(other, months, self._day_opt) + + @apply_array_wraps + def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) + shifted = shift_quarters( + dtarr.view("i8"), self.n, self.month, self._day_opt, modby=12, reso=reso + ) + return shifted + + +cdef class BYearEnd(YearOffset): + """ + DateOffset increments between the last business day of the year. + + Examples + -------- + >>> from pandas.tseries.offsets import BYearEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts - BYearEnd() + Timestamp('2019-12-31 05:01:15') + >>> ts + BYearEnd() + Timestamp('2020-12-31 05:01:15') + >>> ts + BYearEnd(3) + Timestamp('2022-12-30 05:01:15') + >>> ts + BYearEnd(-3) + Timestamp('2017-12-29 05:01:15') + >>> ts + BYearEnd(month=11) + Timestamp('2020-11-30 05:01:15') + """ + + _outputName = "BusinessYearEnd" + _default_month = 12 + _prefix = "BA" + _day_opt = "business_end" + + +cdef class BYearBegin(YearOffset): + """ + DateOffset increments between the first business day of the year. + + Examples + -------- + >>> from pandas.tseries.offsets import BYearBegin + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BYearBegin() + Timestamp('2021-01-01 05:01:15') + >>> ts - BYearBegin() + Timestamp('2020-01-01 05:01:15') + >>> ts + BYearBegin(-1) + Timestamp('2020-01-01 05:01:15') + >>> ts + BYearBegin(2) + Timestamp('2022-01-03 05:01:15') + """ + + _outputName = "BusinessYearBegin" + _default_month = 1 + _prefix = "BAS" + _day_opt = "business_start" + + +cdef class YearEnd(YearOffset): + """ + DateOffset increments between calendar year ends. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.YearEnd() + Timestamp('2022-12-31 00:00:00') + """ + + _default_month = 12 + _prefix = "A" + _day_opt = "end" + + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, month=None): + # Because YearEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + YearOffset.__init__(self, n, normalize, month) + self._period_dtype_code = PeriodDtypeCode.A + self.month % 12 + + +cdef class YearBegin(YearOffset): + """ + DateOffset increments between calendar year begin dates. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.YearBegin() + Timestamp('2023-01-01 00:00:00') + """ + + _default_month = 1 + _prefix = "AS" + _day_opt = "start" + + +# ---------------------------------------------------------------------- +# Quarter-Based Offset Classes + +cdef class QuarterOffset(SingleConstructorOffset): + _attributes = tuple(["n", "normalize", "startingMonth"]) + # TODO: Consider combining QuarterOffset and YearOffset __init__ at some + # point. Also apply_index, is_on_offset, rule_code if + # startingMonth vs month attr names are resolved + + # FIXME(cython#4446): python annotation here gives compile-time errors + # _default_starting_month: int + # _from_name_starting_month: int + + cdef readonly: + int startingMonth + + def __init__(self, n=1, normalize=False, startingMonth=None): + BaseOffset.__init__(self, n, normalize) + + if startingMonth is None: + startingMonth = self._default_starting_month + self.startingMonth = startingMonth + + cpdef __setstate__(self, state): + self.startingMonth = state.pop("startingMonth") + self.n = state.pop("n") + self.normalize = state.pop("normalize") + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["startingMonth"] = MONTH_TO_CAL_NUM[suffix] + else: + if cls._from_name_starting_month is not None: + kwargs["startingMonth"] = cls._from_name_starting_month + return cls(**kwargs) + + @property + def rule_code(self) -> str: + month = MONTH_ALIASES[self.startingMonth] + return f"{self._prefix}-{month}" + + def is_anchored(self) -> bool: + return self.n == 1 and self.startingMonth is not None + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + mod_month = (dt.month - self.startingMonth) % 3 + return mod_month == 0 and dt.day == self._get_offset_day(dt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # months_since: find the calendar quarter containing other.month, + # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. + # Then find the month in that quarter containing an is_on_offset date for + # self. `months_since` is the number of months to shift other.month + # to get to this on-offset month. + months_since = other.month % 3 - self.startingMonth % 3 + qtrs = roll_qtrday( + other, self.n, self.startingMonth, day_opt=self._day_opt, modby=3 + ) + months = qtrs * 3 - months_since + return shift_month(other, months, self._day_opt) + + @apply_array_wraps + def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) + shifted = shift_quarters( + dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso + ) + return shifted + + +cdef class BQuarterEnd(QuarterOffset): + """ + DateOffset increments between the last business day of each Quarter. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + + Examples + -------- + >>> from pandas.tseries.offsets import BQuarterEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BQuarterEnd() + Timestamp('2020-06-30 05:01:15') + >>> ts + BQuarterEnd(2) + Timestamp('2020-09-30 05:01:15') + >>> ts + BQuarterEnd(1, startingMonth=2) + Timestamp('2020-05-29 05:01:15') + >>> ts + BQuarterEnd(startingMonth=2) + Timestamp('2020-05-29 05:01:15') + """ + _output_name = "BusinessQuarterEnd" + _default_starting_month = 3 + _from_name_starting_month = 12 + _prefix = "BQ" + _day_opt = "business_end" + + +cdef class BQuarterBegin(QuarterOffset): + """ + DateOffset increments between the first business day of each Quarter. + + startingMonth = 1 corresponds to dates like 1/01/2007, 4/01/2007, ... + startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ... + startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ... + + Examples + -------- + >>> from pandas.tseries.offsets import BQuarterBegin + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BQuarterBegin() + Timestamp('2020-06-01 05:01:15') + >>> ts + BQuarterBegin(2) + Timestamp('2020-09-01 05:01:15') + >>> ts + BQuarterBegin(startingMonth=2) + Timestamp('2020-08-03 05:01:15') + >>> ts + BQuarterBegin(-1) + Timestamp('2020-03-02 05:01:15') + """ + _output_name = "BusinessQuarterBegin" + _default_starting_month = 3 + _from_name_starting_month = 1 + _prefix = "BQS" + _day_opt = "business_start" + + +cdef class QuarterEnd(QuarterOffset): + """ + DateOffset increments between Quarter end dates. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ... + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.QuarterEnd() + Timestamp('2022-03-31 00:00:00') + """ + _default_starting_month = 3 + _prefix = "Q" + _day_opt = "end" + + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, startingMonth=None): + # Because QuarterEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + QuarterOffset.__init__(self, n, normalize, startingMonth) + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + + +cdef class QuarterBegin(QuarterOffset): + """ + DateOffset increments between Quarter start dates. + + startingMonth = 1 corresponds to dates like 1/01/2007, 4/01/2007, ... + startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ... + startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ... + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.QuarterBegin() + Timestamp('2022-03-01 00:00:00') + """ + _default_starting_month = 3 + _from_name_starting_month = 1 + _prefix = "QS" + _day_opt = "start" + + +# ---------------------------------------------------------------------- +# Month-Based Offset Classes + +cdef class MonthOffset(SingleConstructorOffset): + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + compare_day = self._get_offset_day(other) + n = roll_convention(other.day, self.n, compare_day) + return shift_month(other, n, self._day_opt) + + @apply_array_wraps + def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) + shifted = shift_months(dtarr.view("i8"), self.n, self._day_opt, reso=reso) + return shifted + + cpdef __setstate__(self, state): + state.pop("_use_relativedelta", False) + state.pop("offset", None) + state.pop("_offset", None) + state.pop("kwds", {}) + + BaseOffset.__setstate__(self, state) + + +cdef class MonthEnd(MonthOffset): + """ + DateOffset of one month end. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.MonthEnd() + Timestamp('2022-01-31 00:00:00') + """ + _period_dtype_code = PeriodDtypeCode.M + _prefix = "M" + _day_opt = "end" + + +cdef class MonthBegin(MonthOffset): + """ + DateOffset of one month at beginning. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.MonthBegin() + Timestamp('2022-02-01 00:00:00') + """ + _prefix = "MS" + _day_opt = "start" + + +cdef class BusinessMonthEnd(MonthOffset): + """ + DateOffset increments between the last business day of the month. + + Examples + -------- + >>> from pandas.tseries.offsets import BMonthEnd + >>> ts = pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BMonthEnd() + Timestamp('2020-05-29 05:01:15') + >>> ts + BMonthEnd(2) + Timestamp('2020-06-30 05:01:15') + >>> ts + BMonthEnd(-2) + Timestamp('2020-03-31 05:01:15') + """ + _prefix = "BM" + _day_opt = "business_end" + + +cdef class BusinessMonthBegin(MonthOffset): + """ + DateOffset of one month at the first business day. + + Examples + -------- + >>> from pandas.tseries.offsets import BMonthBegin + >>> ts=pd.Timestamp('2020-05-24 05:01:15') + >>> ts + BMonthBegin() + Timestamp('2020-06-01 05:01:15') + >>> ts + BMonthBegin(2) + Timestamp('2020-07-01 05:01:15') + >>> ts + BMonthBegin(-3) + Timestamp('2020-03-02 05:01:15') + """ + _prefix = "BMS" + _day_opt = "business_start" + + +# --------------------------------------------------------------------- +# Semi-Month Based Offsets + +cdef class SemiMonthOffset(SingleConstructorOffset): + _default_day_of_month = 15 + _min_day_of_month = 2 + _attributes = tuple(["n", "normalize", "day_of_month"]) + + cdef readonly: + int day_of_month + + def __init__(self, n=1, normalize=False, day_of_month=None): + BaseOffset.__init__(self, n, normalize) + + if day_of_month is None: + day_of_month = self._default_day_of_month + + self.day_of_month = int(day_of_month) + if not self._min_day_of_month <= self.day_of_month <= 27: + raise ValueError( + "day_of_month must be " + f"{self._min_day_of_month}<=day_of_month<=27, " + f"got {self.day_of_month}" + ) + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.day_of_month = state.pop("day_of_month") + + @classmethod + def _from_name(cls, suffix=None): + return cls(day_of_month=suffix) + + @property + def rule_code(self) -> str: + suffix = f"-{self.day_of_month}" + return self._prefix + suffix + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + is_start = isinstance(self, SemiMonthBegin) + + # shift `other` to self.day_of_month, incrementing `n` if necessary + n = roll_convention(other.day, self.n, self.day_of_month) + + days_in_month = get_days_in_month(other.year, other.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (self.n <= 0 and other.day == 1): + n -= 1 + elif (not is_start) and (self.n > 0 and other.day == days_in_month): + n += 1 + + if is_start: + months = n // 2 + n % 2 + to_day = 1 if n % 2 else self.day_of_month + else: + months = n // 2 + to_day = 31 if n % 2 else self.day_of_month + + return shift_month(other, months, to_day) + + @apply_array_wraps + @cython.wraparound(False) + @cython.boundscheck(False) + def _apply_array(self, dtarr): + cdef: + ndarray i8other = dtarr.view("i8") + Py_ssize_t i, count = dtarr.size + int64_t val, res_val + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + npy_datetimestruct dts + int months, to_day, nadj, n = self.n + int days_in_month, day, anchor_dom = self.day_of_month + bint is_start = isinstance(self, SemiMonthBegin) + NPY_DATETIMEUNIT reso = get_unit_from_dtype(dtarr.dtype) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) + + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + day = dts.day + + # Adjust so that we are always looking at self.day_of_month, + # incrementing/decrementing n if necessary. + nadj = roll_convention(day, n, anchor_dom) + + days_in_month = get_days_in_month(dts.year, dts.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (n <= 0 and day == 1): + nadj -= 1 + elif (not is_start) and (n > 0 and day == days_in_month): + nadj += 1 + + if is_start: + # See also: SemiMonthBegin._apply + months = nadj // 2 + nadj % 2 + to_day = 1 if nadj % 2 else anchor_dom + + else: + # See also: SemiMonthEnd._apply + months = nadj // 2 + to_day = 31 if nadj % 2 else anchor_dom + + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + days_in_month = get_days_in_month(dts.year, dts.month) + dts.day = min(to_day, days_in_month) + + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return out + + +cdef class SemiMonthEnd(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the last day of the month & day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {1, 3,...,27}, default 15 + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.SemiMonthEnd() + Timestamp('2022-01-15 00:00:00') + """ + + _prefix = "SM" + _min_day_of_month = 1 + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + days_in_month = get_days_in_month(dt.year, dt.month) + return dt.day in (self.day_of_month, days_in_month) + + +cdef class SemiMonthBegin(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the first day of the month & day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {2, 3,...,27}, default 15 + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.SemiMonthBegin() + Timestamp('2022-01-15 00:00:00') + """ + + _prefix = "SMS" + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return dt.day in (1, self.day_of_month) + + +# --------------------------------------------------------------------- +# Week-Based Offset Classes + + +cdef class Week(SingleConstructorOffset): + """ + Weekly offset. + + Parameters + ---------- + weekday : int or None, default None + Always generate specific day of week. 0 for Monday. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.Week() + Timestamp('2022-01-08 00:00:00') + """ + + _inc = timedelta(weeks=1) + _prefix = "W" + _attributes = tuple(["n", "normalize", "weekday"]) + + cdef readonly: + object weekday # int or None + int _period_dtype_code + + def __init__(self, n=1, normalize=False, weekday=None): + BaseOffset.__init__(self, n, normalize) + self.weekday = weekday + + if self.weekday is not None: + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + + self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self._cache = state.pop("_cache", {}) + + def is_anchored(self) -> bool: + return self.n == 1 and self.weekday is not None + + @apply_wraps + def _apply(self, other): + if self.weekday is None: + return other + self.n * self._inc + + if not PyDateTime_Check(other): + raise TypeError( + f"Cannot add {type(other).__name__} to {type(self).__name__}" + ) + + k = self.n + otherDay = other.weekday() + if otherDay != self.weekday: + other = other + timedelta((self.weekday - otherDay) % 7) + if k > 0: + k -= 1 + + return other + timedelta(weeks=k) + + @apply_array_wraps + def _apply_array(self, dtarr): + if self.weekday is None: + td = timedelta(days=7 * self.n) + td64 = np.timedelta64(td, "ns") + return dtarr + td64 + else: + reso = get_unit_from_dtype(dtarr.dtype) + i8other = dtarr.view("i8") + return self._end_apply_index(i8other, reso=reso) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef ndarray _end_apply_index(self, ndarray i8other, NPY_DATETIMEUNIT reso): + """ + Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + i8other : const int64_t[:] + reso : NPY_DATETIMEUNIT + + Returns + ------- + ndarray[int64_t] + """ + cdef: + Py_ssize_t i, count = i8other.size + int64_t val, res_val + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + npy_datetimestruct dts + int wday, days, weeks, n = self.n + int anchor_weekday = self.weekday + int64_t DAY_PERIODS = periods_per_day(reso) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) + + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + wday = dayofweek(dts.year, dts.month, dts.day) + + days = 0 + weeks = n + if wday != anchor_weekday: + days = (anchor_weekday - wday) % 7 + if weeks > 0: + weeks -= 1 + + res_val = val + (7 * weeks + days) * DAY_PERIODS + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return out + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + elif self.weekday is None: + return True + return dt.weekday() == self.weekday + + @property + def rule_code(self) -> str: + suffix = "" + if self.weekday is not None: + weekday = int_to_weekday[self.weekday] + suffix = f"-{weekday}" + return self._prefix + suffix + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + weekday = None + else: + weekday = weekday_to_int[suffix] + return cls(weekday=weekday) + + +cdef class WeekOfMonth(WeekOfMonthMixin): + """ + Describes monthly dates like "the Tuesday of the 2nd week of each month". + + Parameters + ---------- + n : int + week : int {0, 1, 2, 3, ...}, default 0 + A specific integer for the week of the month. + e.g. 0 is 1st week of month, 1 is the 2nd week, etc. + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.WeekOfMonth() + Timestamp('2022-01-03 00:00:00') + """ + + _prefix = "WOM" + _attributes = tuple(["n", "normalize", "week", "weekday"]) + + def __init__(self, n=1, normalize=False, week=0, weekday=0): + WeekOfMonthMixin.__init__(self, n, normalize, weekday) + self.week = week + + if self.week < 0 or self.week > 3: + raise ValueError(f"Week must be 0<=week<=3, got {self.week}") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.week = state.pop("week") + + def _get_offset_day(self, other: datetime) -> int: + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the self.week'th such day in the month. + + Parameters + ---------- + other : datetime + + Returns + ------- + day : int + """ + mstart = datetime(other.year, other.month, 1) + wday = mstart.weekday() + shift_days = (self.weekday - wday) % 7 + return 1 + shift_days + self.week * 7 + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) + week = int(suffix[0]) - 1 + weekday = weekday_to_int[suffix[1:]] + return cls(week=week, weekday=weekday) + + +cdef class LastWeekOfMonth(WeekOfMonthMixin): + """ + Describes monthly dates in last week of month. + + For example "the last Tuesday of each month". + + Parameters + ---------- + n : int, default 1 + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.LastWeekOfMonth() + Timestamp('2022-01-31 00:00:00') + """ + + _prefix = "LWOM" + _attributes = tuple(["n", "normalize", "weekday"]) + + def __init__(self, n=1, normalize=False, weekday=0): + WeekOfMonthMixin.__init__(self, n, normalize, weekday) + self.week = -1 + + if self.n == 0: + raise ValueError("N cannot be 0") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.week = -1 + + def _get_offset_day(self, other: datetime) -> int: + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the last such day in the month. + + Parameters + ---------- + other: datetime + + Returns + ------- + day: int + """ + dim = get_days_in_month(other.year, other.month) + mend = datetime(other.year, other.month, dim) + wday = mend.weekday() + shift_days = (wday - self.weekday) % 7 + return dim - shift_days + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + weekday = weekday_to_int[suffix] + return cls(weekday=weekday) + + +# --------------------------------------------------------------------- +# Special Offset Classes + +cdef class FY5253Mixin(SingleConstructorOffset): + cdef readonly: + int startingMonth + int weekday + str variation + + def __init__( + self, n=1, normalize=False, weekday=0, startingMonth=1, variation="nearest" + ): + BaseOffset.__init__(self, n, normalize) + self.startingMonth = startingMonth + self.weekday = weekday + self.variation = variation + + if self.n == 0: + raise ValueError("N cannot be 0") + + if self.variation not in ["nearest", "last"]: + raise ValueError(f"{self.variation} is not a valid variation") + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self.weekday = state.pop("weekday") + self.variation = state.pop("variation") + + def is_anchored(self) -> bool: + return ( + self.n == 1 and self.startingMonth is not None and self.weekday is not None + ) + + # -------------------------------------------------------------------- + # Name-related methods + + @property + def rule_code(self) -> str: + prefix = self._prefix + suffix = self.get_rule_code_suffix() + return f"{prefix}-{suffix}" + + def _get_suffix_prefix(self) -> str: + if self.variation == "nearest": + return "N" + else: + return "L" + + def get_rule_code_suffix(self) -> str: + prefix = self._get_suffix_prefix() + month = MONTH_ALIASES[self.startingMonth] + weekday = int_to_weekday[self.weekday] + return f"{prefix}-{month}-{weekday}" + + +cdef class FY5253(FY5253Mixin): + """ + Describes 52-53 week fiscal year. This is also known as a 4-4-5 calendar. + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + https://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ... 12}, default 1 + The month in which the fiscal year ends. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.FY5253() + Timestamp('2022-01-31 00:00:00') + """ + + _prefix = "RE" + _attributes = tuple(["n", "normalize", "weekday", "startingMonth", "variation"]) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + dt = datetime(dt.year, dt.month, dt.day) + year_end = self.get_year_end(dt) + + if self.variation == "nearest": + # We have to check the year end of "this" cal year AND the previous + return year_end == dt or self.get_year_end(shift_month(dt, -1, None)) == dt + else: + return year_end == dt + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + norm = Timestamp(other).normalize() + + n = self.n + prev_year = self.get_year_end(datetime(other.year - 1, self.startingMonth, 1)) + cur_year = self.get_year_end(datetime(other.year, self.startingMonth, 1)) + next_year = self.get_year_end(datetime(other.year + 1, self.startingMonth, 1)) + + prev_year = localize_pydatetime(prev_year, other.tzinfo) + cur_year = localize_pydatetime(cur_year, other.tzinfo) + next_year = localize_pydatetime(next_year, other.tzinfo) + + # Note: next_year.year == other.year + 1, so we will always + # have other < next_year + if norm == prev_year: + n -= 1 + elif norm == cur_year: + pass + elif n > 0: + if norm < prev_year: + n -= 2 + elif prev_year < norm < cur_year: + n -= 1 + elif cur_year < norm < next_year: + pass + else: + if cur_year < norm < next_year: + n += 1 + elif prev_year < norm < cur_year: + pass + elif ( + norm.year == prev_year.year + and norm < prev_year + and prev_year - norm <= timedelta(6) + ): + # GH#14774, error when next_year.year == cur_year.year + # e.g. prev_year == datetime(2004, 1, 3), + # other == datetime(2004, 1, 1) + n -= 1 + else: + assert False + + shifted = datetime(other.year + n, self.startingMonth, 1) + result = self.get_year_end(shifted) + result = datetime( + result.year, + result.month, + result.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return result + + def get_year_end(self, dt: datetime) -> datetime: + assert dt.tzinfo is None + + dim = get_days_in_month(dt.year, self.startingMonth) + target_date = datetime(dt.year, self.startingMonth, dim) + wkday_diff = self.weekday - target_date.weekday() + if wkday_diff == 0: + # year_end is the same for "last" and "nearest" cases + return target_date + + if self.variation == "last": + days_forward = (wkday_diff % 7) - 7 + + # days_forward is always negative, so we always end up + # in the same year as dt + return target_date + timedelta(days=days_forward) + else: + # variation == "nearest": + days_forward = wkday_diff % 7 + if days_forward <= 3: + # The upcoming self.weekday is closer than the previous one + return target_date + timedelta(days_forward) + else: + # The previous self.weekday is closer than the upcoming one + return target_date + timedelta(days_forward - 7) + + @classmethod + def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): + if varion_code == "N": + variation = "nearest" + elif varion_code == "L": + variation = "last" + else: + raise ValueError(f"Unable to parse varion_code: {varion_code}") + + startingMonth = MONTH_TO_CAL_NUM[startingMonth_code] + weekday = weekday_to_int[weekday_code] + + return { + "weekday": weekday, + "startingMonth": startingMonth, + "variation": variation, + } + + @classmethod + def _from_name(cls, *args): + return cls(**cls._parse_suffix(*args)) + + +cdef class FY5253Quarter(FY5253Mixin): + """ + DateOffset increments between business quarter dates for 52-53 week fiscal year. + + Also known as a 4-4-5 calendar. + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + https://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ..., 12}, default 1 + The month in which fiscal years end. + + qtr_with_extra_week : int {1, 2, 3, 4}, default 1 + The quarter number that has the leap or 14 week when needed. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.FY5253Quarter() + Timestamp('2022-01-31 00:00:00') + """ + + _prefix = "REQ" + _attributes = tuple( + [ + "n", + "normalize", + "weekday", + "startingMonth", + "qtr_with_extra_week", + "variation", + ] + ) + + cdef readonly: + int qtr_with_extra_week + + def __init__( + self, + n=1, + normalize=False, + weekday=0, + startingMonth=1, + qtr_with_extra_week=1, + variation="nearest", + ): + FY5253Mixin.__init__( + self, n, normalize, weekday, startingMonth, variation + ) + self.qtr_with_extra_week = qtr_with_extra_week + + cpdef __setstate__(self, state): + FY5253Mixin.__setstate__(self, state) + self.qtr_with_extra_week = state.pop("qtr_with_extra_week") + + @cache_readonly + def _offset(self): + return FY5253( + startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation, + ) + + def _rollback_to_year(self, other: datetime): + """ + Roll `other` back to the most recent date that was on a fiscal year + end. + + Return the date of that year-end, the number of full quarters + elapsed between that year-end and other, and the remaining Timedelta + since the most recent quarter-end. + + Parameters + ---------- + other : datetime or Timestamp + + Returns + ------- + tuple of + prev_year_end : Timestamp giving most recent fiscal year end + num_qtrs : int + tdelta : Timedelta + """ + num_qtrs = 0 + + norm = Timestamp(other).tz_localize(None) + start = self._offset.rollback(norm) + # Note: start <= norm and self._offset.is_on_offset(start) + + if start < norm: + # roll adjustment + qtr_lens = self.get_weeks(norm) + + # check that qtr_lens is consistent with self._offset addition + end = _shift_day(start, days=7 * sum(qtr_lens)) + assert self._offset.is_on_offset(end), (start, end, qtr_lens) + + tdelta = norm - start + for qlen in qtr_lens: + if qlen * 7 <= tdelta.days: + num_qtrs += 1 + tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_reso(norm._reso) + else: + break + else: + tdelta = Timedelta(0) + + # Note: we always have tdelta.value >= 0 + return start, num_qtrs, tdelta + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # Note: self.n == 0 is not allowed. + + n = self.n + + prev_year_end, num_qtrs, tdelta = self._rollback_to_year(other) + res = prev_year_end + n += num_qtrs + if self.n <= 0 and tdelta.value > 0: + n += 1 + + # Possible speedup by handling years first. + years = n // 4 + if years: + res += self._offset * years + n -= years * 4 + + # Add an extra day to make *sure* we are getting the quarter lengths + # for the upcoming year, not the previous year + qtr_lens = self.get_weeks(res + Timedelta(days=1)) + + # Note: we always have 0 <= n < 4 + weeks = sum(qtr_lens[:n]) + if weeks: + res = _shift_day(res, days=weeks * 7) + + return res + + def get_weeks(self, dt: datetime): + ret = [13] * 4 + + year_has_extra_week = self.year_has_extra_week(dt) + + if year_has_extra_week: + ret[self.qtr_with_extra_week - 1] = 14 + + return ret + + def year_has_extra_week(self, dt: datetime) -> bool: + # Avoid round-down errors --> normalize to get + # e.g. '370D' instead of '360D23H' + norm = Timestamp(dt).normalize().tz_localize(None) + + next_year_end = self._offset.rollforward(norm) + prev_year_end = norm - self._offset + weeks_in_year = (next_year_end - prev_year_end).days / 7 + assert weeks_in_year in [52, 53], weeks_in_year + return weeks_in_year == 53 + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + if self._offset.is_on_offset(dt): + return True + + next_year_end = dt - self._offset + + qtr_lens = self.get_weeks(dt) + + current = next_year_end + for qtr_len in qtr_lens: + current = _shift_day(current, days=qtr_len * 7) + if dt == current: + return True + return False + + @property + def rule_code(self) -> str: + suffix = FY5253Mixin.rule_code.__get__(self) + qtr = self.qtr_with_extra_week + return f"{suffix}-{qtr}" + + @classmethod + def _from_name(cls, *args): + return cls( + **dict(FY5253._parse_suffix(*args[:-1]), qtr_with_extra_week=int(args[-1])) + ) + + +cdef class Easter(SingleConstructorOffset): + """ + DateOffset for the Easter holiday using logic defined in dateutil. + + Right now uses the revised method which is valid in years 1583-4099. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 1, 1) + >>> ts + pd.offsets.Easter() + Timestamp('2022-04-17 00:00:00') + """ + + cpdef __setstate__(self, state): + self.n = state.pop("n") + self.normalize = state.pop("normalize") + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + current_easter = easter(other.year) + current_easter = datetime( + current_easter.year, current_easter.month, current_easter.day + ) + current_easter = localize_pydatetime(current_easter, other.tzinfo) + + n = self.n + if n >= 0 and other < current_easter: + n -= 1 + elif n < 0 and other > current_easter: + n += 1 + # TODO: Why does this handle the 0 case the opposite of others? + + # NOTE: easter returns a datetime.date so we have to convert to type of + # other + new = easter(other.year + n) + new = datetime( + new.year, + new.month, + new.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return new + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + return date(dt.year, dt.month, dt.day) == easter(dt.year) + + +# ---------------------------------------------------------------------- +# Custom Offset classes + + +cdef class CustomBusinessDay(BusinessDay): + """ + DateOffset subclass representing custom business days excluding holidays. + + Parameters + ---------- + n : int, default 1 + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : np.busdaycalendar + offset : timedelta, default timedelta(0) + + Examples + -------- + >>> ts = pd.Timestamp(2022, 8, 5) + >>> ts + pd.offsets.CustomBusinessDay(1) + Timestamp('2022-08-08 00:00:00') + """ + + _prefix = "C" + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + _apply_array = BaseOffset._apply_array + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BusinessDay.__init__(self, n, normalize, offset) + self._init_custom(weekmask, holidays, calendar) + + cpdef __setstate__(self, state): + self.holidays = state.pop("holidays") + self.weekmask = state.pop("weekmask") + BusinessDay.__setstate__(self, state) + + @apply_wraps + def _apply(self, other): + if self.n <= 0: + roll = "forward" + else: + roll = "backward" + + if PyDateTime_Check(other): + date_in = other + np_dt = np.datetime64(date_in.date()) + + np_incr_dt = np.busday_offset( + np_dt, self.n, roll=roll, busdaycal=self.calendar + ) + + dt_date = np_incr_dt.astype(datetime) + result = datetime.combine(dt_date, date_in.time()) + + if self.offset: + result = result + self.offset + return result + + elif is_any_td_scalar(other): + td = Timedelta(self.offset) + other + return BDay(self.n, offset=td.to_pytimedelta(), normalize=self.normalize) + else: + raise ApplyTypeError( + "Only know how to combine trading day with " + "datetime, datetime64 or timedelta." + ) + + def is_on_offset(self, dt: datetime) -> bool: + if self.normalize and not _is_normalized(dt): + return False + day64 = _to_dt64D(dt) + return np.is_busday(day64, busdaycal=self.calendar) + + +cdef class CustomBusinessHour(BusinessHour): + """ + DateOffset subclass representing possibly n custom business days. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + start : str, default "09:00" + Start time of your custom business hour in 24h format. + end : str, default: "17:00" + End time of your custom business hour in 24h format. + + Examples + -------- + >>> ts = pd.Timestamp(2022, 8, 5, 16) + >>> ts + pd.offsets.CustomBusinessHour() + Timestamp('2022-08-08 09:00:00') + """ + + _prefix = "CBH" + _anchor = 0 + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "start", "end", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + start="09:00", + end="17:00", + offset=timedelta(0), + ): + BusinessHour.__init__(self, n, normalize, start=start, end=end, offset=offset) + self._init_custom(weekmask, holidays, calendar) + + +cdef class _CustomBusinessMonth(BusinessMixin): + """ + DateOffset subclass representing custom business month(s). + + Increments between beginning/end of month dates. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : np.busdaycalendar + Calendar to integrate. + offset : timedelta, default timedelta(0) + Time offset to apply. + """ + + _attributes = tuple( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BusinessMixin.__init__(self, n, normalize, offset) + self._init_custom(weekmask, holidays, calendar) + + @cache_readonly + def cbday_roll(self): + """ + Define default roll function to be called in apply method. + """ + cbday_kwds = self.kwds.copy() + cbday_kwds['offset'] = timedelta(0) + + cbday = CustomBusinessDay(n=1, normalize=False, **cbday_kwds) + + if self._prefix.endswith("S"): + # MonthBegin + roll_func = cbday.rollforward + else: + # MonthEnd + roll_func = cbday.rollback + return roll_func + + @cache_readonly + def m_offset(self): + if self._prefix.endswith("S"): + # MonthBegin + moff = MonthBegin(n=1, normalize=False) + else: + # MonthEnd + moff = MonthEnd(n=1, normalize=False) + return moff + + @cache_readonly + def month_roll(self): + """ + Define default roll function to be called in apply method. + """ + if self._prefix.endswith("S"): + # MonthBegin + roll_func = self.m_offset.rollback + else: + # MonthEnd + roll_func = self.m_offset.rollforward + return roll_func + + @apply_wraps + def _apply(self, other: datetime) -> datetime: + # First move to month offset + cur_month_offset_date = self.month_roll(other) + + # Find this custom month offset + compare_date = self.cbday_roll(cur_month_offset_date) + n = roll_convention(other.day, self.n, compare_date.day) + + new = cur_month_offset_date + n * self.m_offset + result = self.cbday_roll(new) + + if self.offset: + result = result + self.offset + return result + + +cdef class CustomBusinessMonthEnd(_CustomBusinessMonth): + _prefix = "CBM" + + +cdef class CustomBusinessMonthBegin(_CustomBusinessMonth): + _prefix = "CBMS" + + +BDay = BusinessDay +BMonthEnd = BusinessMonthEnd +BMonthBegin = BusinessMonthBegin +CBMonthEnd = CustomBusinessMonthEnd +CBMonthBegin = CustomBusinessMonthBegin +CDay = CustomBusinessDay + +# ---------------------------------------------------------------------- +# to_offset helpers + +prefix_mapping = { + offset._prefix: offset + for offset in [ + YearBegin, # 'AS' + YearEnd, # 'A' + BYearBegin, # 'BAS' + BYearEnd, # 'BA' + BusinessDay, # 'B' + BusinessMonthBegin, # 'BMS' + BusinessMonthEnd, # 'BM' + BQuarterEnd, # 'BQ' + BQuarterBegin, # 'BQS' + BusinessHour, # 'BH' + CustomBusinessDay, # 'C' + CustomBusinessMonthEnd, # 'CBM' + CustomBusinessMonthBegin, # 'CBMS' + CustomBusinessHour, # 'CBH' + MonthEnd, # 'M' + MonthBegin, # 'MS' + Nano, # 'N' + SemiMonthEnd, # 'SM' + SemiMonthBegin, # 'SMS' + Week, # 'W' + Second, # 'S' + Minute, # 'T' + Micro, # 'U' + QuarterEnd, # 'Q' + QuarterBegin, # 'QS' + Milli, # 'L' + Hour, # 'H' + Day, # 'D' + WeekOfMonth, # 'WOM' + FY5253, + FY5253Quarter, + ] +} + +# hack to handle WOM-1MON +opattern = re.compile( + r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)" +) + +_lite_rule_alias = { + "W": "W-SUN", + "Q": "Q-DEC", + + "A": "A-DEC", # YearEnd(month=12), + "Y": "A-DEC", + "AS": "AS-JAN", # YearBegin(month=1), + "YS": "AS-JAN", + "BA": "BA-DEC", # BYearEnd(month=12), + "BY": "BA-DEC", + "BAS": "BAS-JAN", # BYearBegin(month=1), + "BYS": "BAS-JAN", + + "Min": "T", + "min": "T", + "ms": "L", + "us": "U", + "ns": "N", +} + +_dont_uppercase = {"MS", "ms"} + +INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" + +# TODO: still needed? +# cache of previously seen offsets +_offset_map = {} + + +# TODO: better name? +def _get_offset(name: str) -> BaseOffset: + """ + Return DateOffset object associated with rule name. + + Examples + -------- + _get_offset('EOM') --> BMonthEnd(1) + """ + if name not in _dont_uppercase: + name = name.upper() + name = _lite_rule_alias.get(name, name) + name = _lite_rule_alias.get(name.lower(), name) + else: + name = _lite_rule_alias.get(name, name) + + if name not in _offset_map: + try: + split = name.split("-") + klass = prefix_mapping[split[0]] + # handles case where there's no suffix (and will TypeError if too + # many '-') + offset = klass._from_name(*split[1:]) + except (ValueError, TypeError, KeyError) as err: + # bad prefix or suffix + raise ValueError(INVALID_FREQ_ERR_MSG.format(name)) from err + # cache + _offset_map[name] = offset + + return _offset_map[name] + + +cpdef to_offset(freq): + """ + Return DateOffset object from string or datetime.timedelta object. + + Parameters + ---------- + freq : str, datetime.timedelta, BaseOffset or None + + Returns + ------- + DateOffset or None + + Raises + ------ + ValueError + If freq is an invalid frequency + + See Also + -------- + BaseOffset : Standard kind of date increment used for a date range. + + Examples + -------- + >>> to_offset("5min") + <5 * Minutes> + + >>> to_offset("1D1H") + <25 * Hours> + + >>> to_offset("2W") + <2 * Weeks: weekday=6> + + >>> to_offset("2B") + <2 * BusinessDays> + + >>> to_offset(pd.Timedelta(days=1)) + + + >>> to_offset(Hour()) + + """ + if freq is None: + return None + + if isinstance(freq, BaseOffset): + return freq + + if isinstance(freq, tuple): + raise TypeError( + f"to_offset does not support tuples {freq}, pass as a string instead" + ) + + elif PyDelta_Check(freq): + return delta_to_tick(freq) + + elif isinstance(freq, str): + delta = None + stride_sign = None + + try: + split = opattern.split(freq) + if split[-1] != "" and not split[-1].isspace(): + # the last element must be blank + raise ValueError("last element must be blank") + + tups = zip(split[0::4], split[1::4], split[2::4]) + for n, (sep, stride, name) in enumerate(tups): + if sep != "" and not sep.isspace(): + raise ValueError("separator must be spaces") + prefix = _lite_rule_alias.get(name) or name + if stride_sign is None: + stride_sign = -1 if stride.startswith("-") else 1 + if not stride: + stride = 1 + + if prefix in {"D", "H", "T", "S", "L", "U", "N"}: + # For these prefixes, we have something like "3H" or + # "2.5T", so we can construct a Timedelta with the + # matching unit and get our offset from delta_to_tick + td = Timedelta(1, unit=prefix) + off = delta_to_tick(td) + offset = off * float(stride) + if n != 0: + # If n==0, then stride_sign is already incorporated + # into the offset + offset *= stride_sign + else: + stride = int(stride) + offset = _get_offset(name) + offset = offset * int(np.fabs(stride) * stride_sign) + + if delta is None: + delta = offset + else: + delta = delta + offset + except (ValueError, TypeError) as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + else: + delta = None + + if delta is None: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) + + return delta + + +# ---------------------------------------------------------------------- +# RelativeDelta Arithmetic + +cdef datetime _shift_day(datetime other, int days): + """ + Increment the datetime `other` by the given number of days, retaining + the time-portion of the datetime. For tz-naive datetimes this is + equivalent to adding a timedelta. For tz-aware datetimes it is similar to + dateutil's relativedelta.__add__, but handles pytz tzinfo objects. + + Parameters + ---------- + other : datetime or Timestamp + days : int + + Returns + ------- + shifted: datetime or Timestamp + """ + if other.tzinfo is None: + return other + timedelta(days=days) + + tz = other.tzinfo + naive = other.replace(tzinfo=None) + shifted = naive + timedelta(days=days) + return localize_pydatetime(shifted, tz) + + +cdef inline int year_add_months(npy_datetimestruct dts, int months) nogil: + """ + New year number after shifting npy_datetimestruct number of months. + """ + return dts.year + (dts.month + months - 1) // 12 + + +cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: + """ + New month number after shifting npy_datetimestruct + number of months. + """ + cdef: + int new_month = (dts.month + months) % 12 + return 12 if new_month == 0 else new_month + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef ndarray shift_quarters( + ndarray dtindex, + int quarters, + int q1start_month, + str day_opt, + int modby=3, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): + """ + Given an int64 array representing nanosecond timestamps, shift all elements + by the specified number of quarters using DateOffset semantics. + + Parameters + ---------- + dtindex : int64_t[:] timestamps for input dates + quarters : int number of quarters to shift + q1start_month : int month in which Q1 begins by convention + day_opt : {'start', 'end', 'business_start', 'business_end'} + modby : int (3 for quarters, 12 for years) + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + out : ndarray[int64_t] + """ + if day_opt not in ["start", "end", "business_start", "business_end"]: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + cdef: + Py_ssize_t count = dtindex.size + ndarray out = cnp.PyArray_EMPTY(dtindex.ndim, dtindex.shape, cnp.NPY_INT64, 0) + Py_ssize_t i + int64_t val, res_val + int months_since, n + npy_datetimestruct dts + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) + + with nogil: + for i in range(count): + # Analogous to: val = dtindex[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + n = _roll_qtrday(&dts, n, months_since, day_opt) + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_day_of_month(&dts, day_opt) + + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +def shift_months( + ndarray dtindex, # int64_t, arbitrary ndim + int months, + str day_opt=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): + """ + Given an int64-based datetime index, shift all elements + specified number of months using DateOffset semantics + + day_opt: {None, 'start', 'end', 'business_start', 'business_end'} + * None: day of month + * 'start' 1st day of month + * 'end' last day of month + """ + cdef: + Py_ssize_t i + npy_datetimestruct dts + int count = dtindex.size + ndarray out = cnp.PyArray_EMPTY(dtindex.ndim, dtindex.shape, cnp.NPY_INT64, 0) + int months_to_roll + int64_t val, res_val + + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) + + if day_opt is not None and day_opt not in { + "start", "end", "business_start", "business_end" + }: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + if day_opt is None: + # TODO: can we combine this with the non-None case? + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + else: + with nogil: + for i in range(count): + + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + months_to_roll = months + + months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = get_day_of_month(&dts, day_opt) + + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef ndarray _shift_bdays( + ndarray i8other, + int periods, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): + """ + Implementation of BusinessDay.apply_offset. + + Parameters + ---------- + i8other : const int64_t[:] + periods : int + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + ndarray[int64_t] + """ + cdef: + Py_ssize_t i, n = i8other.size + ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + int64_t val, res_val + int wday, nadj, days + npy_datetimestruct dts + int64_t DAY_PERIODS = periods_per_day(reso) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, i8other) + + for i in range(n): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + # The rest of this is effectively a copy of BusinessDay.apply + nadj = periods + weeks = nadj // 5 + pandas_datetime_to_datetimestruct(val, reso, &dts) + wday = dayofweek(dts.year, dts.month, dts.day) + + if nadj <= 0 and wday > 4: + # roll forward + nadj += 1 + + nadj -= 5 * weeks + + # nadj is always >= 0 at this point + if nadj == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (nadj - 1) + elif wday + nadj <= 4: + # shift by n days without leaving the current week + days = nadj + else: + # shift by nadj days plus 2 to get past the weekend + days = nadj + 2 + + res_val = val + (7 * weeks + days) * DAY_PERIODS + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetime: + """ + Given a datetime (or Timestamp) `stamp`, an integer `months` and an + option `day_opt`, return a new datetimelike that many months later, + with day determined by `day_opt` using relativedelta semantics. + + Scalar analogue of shift_months. + + Parameters + ---------- + stamp : datetime or Timestamp + months : int + day_opt : None, 'start', 'end', 'business_start', 'business_end', or int + None: returned datetimelike has the same day as the input, or the + last day of the month if the new month is too short + 'start': returned datetimelike has day=1 + 'end': returned datetimelike has day on the last day of the month + 'business_start': returned datetimelike has day on the first + business day of the month + 'business_end': returned datetimelike has day on the last + business day of the month + int: returned datetimelike has day equal to day_opt + + Returns + ------- + shifted : datetime or Timestamp (same as input `stamp`) + """ + cdef: + int year, month, day + int days_in_month, dy + + dy = (stamp.month + months) // 12 + month = (stamp.month + months) % 12 + + if month == 0: + month = 12 + dy -= 1 + year = stamp.year + dy + + if day_opt is None: + days_in_month = get_days_in_month(year, month) + day = min(stamp.day, days_in_month) + elif day_opt == "start": + day = 1 + elif day_opt == "end": + day = get_days_in_month(year, month) + elif day_opt == "business_start": + # first business day of month + day = get_firstbday(year, month) + elif day_opt == "business_end": + # last business day of month + day = get_lastbday(year, month) + elif is_integer_object(day_opt): + days_in_month = get_days_in_month(year, month) + day = min(day_opt, days_in_month) + else: + raise ValueError(day_opt) + return stamp.replace(year=year, month=month, day=day) + + +cdef inline int get_day_of_month(npy_datetimestruct* dts, str day_opt) nogil: + """ + Find the day in `other`'s month that satisfies a DateOffset's is_on_offset + policy, as described by the `day_opt` argument. + + Parameters + ---------- + dts : npy_datetimestruct* + day_opt : {'start', 'end', 'business_start', 'business_end'} + 'start': returns 1 + 'end': returns last day of the month + 'business_start': returns the first business day of the month + 'business_end': returns the last business day of the month + + Returns + ------- + day_of_month : int + + Examples + ------- + >>> other = datetime(2017, 11, 14) + >>> get_day_of_month(other, 'start') + 1 + >>> get_day_of_month(other, 'end') + 30 + + Notes + ----- + Caller is responsible for ensuring one of the four accepted day_opt values + is passed. + """ + + if day_opt == "start": + return 1 + elif day_opt == "end": + return get_days_in_month(dts.year, dts.month) + elif day_opt == "business_start": + # first business day of month + return get_firstbday(dts.year, dts.month) + else: + # i.e. day_opt == "business_end": + # last business day of month + return get_lastbday(dts.year, dts.month) + + +cpdef int roll_convention(int other, int n, int compare) nogil: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : int, generally the day component of a datetime + n : number of periods to increment, before adjusting for rolling + compare : int, generally the day component of a datetime, in the same + month as the datetime form which `other` was taken. + + Returns + ------- + n : int number of periods to increment + """ + if n > 0 and other < compare: + n -= 1 + elif n <= 0 and other > compare: + # as if rolled forward already + n += 1 + return n + + +def roll_qtrday(other: datetime, n: int, month: int, + day_opt: str, modby: int) -> int: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : datetime or Timestamp + n : number of periods to increment, before adjusting for rolling + month : int reference month giving the first month of the year + day_opt : {'start', 'end', 'business_start', 'business_end'} + The convention to use in finding the day in a given month against + which to compare for rollforward/rollbackward decisions. + modby : int 3 for quarters, 12 for years + + Returns + ------- + n : int number of periods to increment + + See Also + -------- + get_day_of_month : Find the day in a month provided an offset. + """ + cdef: + int months_since + npy_datetimestruct dts + + if day_opt not in ["start", "end", "business_start", "business_end"]: + raise ValueError(day_opt) + + pydate_to_dtstruct(other, &dts) + + if modby == 12: + # We care about the month-of-year, not month-of-quarter, so skip mod + months_since = other.month - month + else: + months_since = other.month % modby - month % modby + + return _roll_qtrday(&dts, n, months_since, day_opt) + + +cdef inline int _roll_qtrday(npy_datetimestruct* dts, + int n, + int months_since, + str day_opt) nogil except? -1: + """ + See roll_qtrday.__doc__ + """ + + if n > 0: + if months_since < 0 or (months_since == 0 and + dts.day < get_day_of_month(dts, day_opt)): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + else: + if months_since > 0 or (months_since == 0 and + dts.day > get_day_of_month(dts, day_opt)): + # make sure to roll forward, so negate + n += 1 + return n diff --git a/pandas/_libs/tslibs/parsing.pxd b/pandas/_libs/tslibs/parsing.pxd new file mode 100644 index 00000000..25667f00 --- /dev/null +++ b/pandas/_libs/tslibs/parsing.pxd @@ -0,0 +1,3 @@ + +cpdef str get_rule_month(str source) +cpdef quarter_to_myear(int year, int quarter, str freq) diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi new file mode 100644 index 00000000..ce49136e --- /dev/null +++ b/pandas/_libs/tslibs/parsing.pyi @@ -0,0 +1,60 @@ +from datetime import datetime + +import numpy as np + +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._typing import npt + +class DateParseError(ValueError): ... + +def parse_datetime_string( + date_string: str, + dayfirst: bool = ..., + yearfirst: bool = ..., + **kwargs, +) -> datetime: ... +def parse_time_string( + arg: str, + freq: BaseOffset | str | None = ..., + dayfirst: bool | None = ..., + yearfirst: bool | None = ..., +) -> tuple[datetime, str]: ... +def _does_string_look_like_datetime(py_string: str) -> bool: ... +def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ... +def try_parse_dates( + values: npt.NDArray[np.object_], # object[:] + parser=..., + dayfirst: bool = ..., + default: datetime | None = ..., +) -> npt.NDArray[np.object_]: ... +def try_parse_date_and_time( + dates: npt.NDArray[np.object_], # object[:] + times: npt.NDArray[np.object_], # object[:] + date_parser=..., + time_parser=..., + dayfirst: bool = ..., + default: datetime | None = ..., +) -> npt.NDArray[np.object_]: ... +def try_parse_year_month_day( + years: npt.NDArray[np.object_], # object[:] + months: npt.NDArray[np.object_], # object[:] + days: npt.NDArray[np.object_], # object[:] +) -> npt.NDArray[np.object_]: ... +def try_parse_datetime_components( + years: npt.NDArray[np.object_], # object[:] + months: npt.NDArray[np.object_], # object[:] + days: npt.NDArray[np.object_], # object[:] + hours: npt.NDArray[np.object_], # object[:] + minutes: npt.NDArray[np.object_], # object[:] + seconds: npt.NDArray[np.object_], # object[:] +) -> npt.NDArray[np.object_]: ... +def format_is_iso(f: str) -> bool: ... +def guess_datetime_format( + dt_str, + dayfirst: bool | None = ..., +) -> str | None: ... +def concat_date_cols( + date_cols: tuple, + keep_trivial_numbers: bool = ..., +) -> npt.NDArray[np.object_]: ... +def get_rule_month(source: str) -> str: ... diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx new file mode 100644 index 00000000..35f97f19 --- /dev/null +++ b/pandas/_libs/tslibs/parsing.pyx @@ -0,0 +1,1202 @@ +""" +Parsing functions for datetime and datetime-like strings. +""" +import re +import time +import warnings + +from pandas.util._exceptions import find_stack_level + +cimport cython +from cpython.datetime cimport ( + datetime, + datetime_new, + import_datetime, +) +from cpython.object cimport PyObject_Str +from cython cimport Py_ssize_t +from libc.string cimport strchr + +import_datetime() + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + flatiter, + float64_t, +) + +cnp.import_array() + +# dateutil compat + +from dateutil.parser import ( + DEFAULTPARSER, + parse as du_parse, +) +from dateutil.relativedelta import relativedelta +from dateutil.tz import ( + tzlocal as _dateutil_tzlocal, + tzoffset, + tzutc as _dateutil_tzutc, +) + +from pandas._config import get_option + +from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS +from pandas._libs.tslibs.nattype cimport ( + c_NaT as NaT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, + string_to_dts, +) +from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.util cimport ( + get_c_string_buf_and_size, + is_array, +) + + +cdef extern from "../src/headers/portable.h": + int getdigit_ascii(char c, int default) nogil + +cdef extern from "../src/parser/tokenizer.h": + double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) + + +# ---------------------------------------------------------------------- +# Constants + + +class DateParseError(ValueError): + pass + + +_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, + second=0, microsecond=0) + +PARSING_WARNING_MSG = ( + "Parsing dates in {format} format when dayfirst={dayfirst} was specified. " + "This may lead to inconsistently parsed dates! Specify a format " + "to ensure consistent parsing." +) + +cdef: + set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} + +# ---------------------------------------------------------------------- +cdef: + const char* delimiters = " /-." + int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 + + +cdef inline bint _is_delimiter(const char ch): + return strchr(delimiters, ch) != NULL + + +cdef inline int _parse_1digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 1 + return result + + +cdef inline int _parse_2digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 10 + result += getdigit_ascii(s[1], -100) * 1 + return result + + +cdef inline int _parse_4digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 1000 + result += getdigit_ascii(s[1], -100) * 100 + result += getdigit_ascii(s[2], -1000) * 10 + result += getdigit_ascii(s[3], -10000) * 1 + return result + + +cdef inline object _parse_delimited_date(str date_string, bint dayfirst): + """ + Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + + At the beginning function tries to parse date in MM/DD/YYYY format, but + if month > 12 - in DD/MM/YYYY (`dayfirst == False`). + With `dayfirst == True` function makes an attempt to parse date in + DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY + + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. + For MM/YYYY: delimiter can be a space or one of /- + If `date_string` can't be converted to date, then function returns + None, None + + Parameters + ---------- + date_string : str + dayfirst : bool + + Returns: + -------- + datetime or None + str or None + Describing resolution of the parsed string. + """ + cdef: + const char* buf + Py_ssize_t length + int day = 1, month = 1, year + bint can_swap = 0 + + buf = get_c_string_buf_and_size(date_string, &length) + if length == 10 and _is_delimiter(buf[2]) and _is_delimiter(buf[5]): + # parsing MM?DD?YYYY and DD?MM?YYYY dates + month = _parse_2digit(buf) + day = _parse_2digit(buf + 3) + year = _parse_4digit(buf + 6) + reso = 'day' + can_swap = 1 + elif length == 9 and _is_delimiter(buf[1]) and _is_delimiter(buf[4]): + # parsing M?DD?YYYY and D?MM?YYYY dates + month = _parse_1digit(buf) + day = _parse_2digit(buf + 2) + year = _parse_4digit(buf + 5) + reso = 'day' + can_swap = 1 + elif length == 9 and _is_delimiter(buf[2]) and _is_delimiter(buf[4]): + # parsing MM?D?YYYY and DD?M?YYYY dates + month = _parse_2digit(buf) + day = _parse_1digit(buf + 3) + year = _parse_4digit(buf + 5) + reso = 'day' + can_swap = 1 + elif length == 8 and _is_delimiter(buf[1]) and _is_delimiter(buf[3]): + # parsing M?D?YYYY and D?M?YYYY dates + month = _parse_1digit(buf) + day = _parse_1digit(buf + 2) + year = _parse_4digit(buf + 4) + reso = 'day' + can_swap = 1 + elif length == 7 and _is_delimiter(buf[2]): + # parsing MM?YYYY dates + if buf[2] == b'.': + # we cannot reliably tell whether e.g. 10.2010 is a float + # or a date, thus we refuse to parse it here + return None, None + month = _parse_2digit(buf) + year = _parse_4digit(buf + 3) + reso = 'month' + else: + return None, None + + if month < 0 or day < 0 or year < 1000: + # some part is not an integer, so + # date_string can't be converted to date, above format + return None, None + + swapped_day_and_month = False + if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ + and (month <= MAX_MONTH or day <= MAX_MONTH): + if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: + day, month = month, day + swapped_day_and_month = True + if dayfirst and not swapped_day_and_month: + warnings.warn( + PARSING_WARNING_MSG.format( + format='MM/DD/YYYY', + dayfirst='True', + ), + stacklevel=find_stack_level(), + ) + elif not dayfirst and swapped_day_and_month: + warnings.warn( + PARSING_WARNING_MSG.format( + format='DD/MM/YYYY', + dayfirst='False (the default)', + ), + stacklevel=find_stack_level(), + ) + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + + raise DateParseError(f"Invalid date specified ({month}/{day})") + + +cdef inline bint does_string_look_like_time(str parse_string): + """ + Checks whether given string is a time: it has to start either from + H:MM or from HH:MM, and hour and minute values must be valid. + + Parameters + ---------- + parse_string : str + + Returns: + -------- + bool + Whether given string is potentially a time. + """ + cdef: + const char* buf + Py_ssize_t length + int hour = -1, minute = -1 + + buf = get_c_string_buf_and_size(parse_string, &length) + if length >= 4: + if buf[1] == b':': + # h:MM format + hour = getdigit_ascii(buf[0], -1) + minute = _parse_2digit(buf + 2) + elif buf[2] == b':': + # HH:MM format + hour = _parse_2digit(buf) + minute = _parse_2digit(buf + 3) + + return 0 <= hour <= 23 and 0 <= minute <= 59 + + +def parse_datetime_string( + # NB: This will break with np.str_ (GH#32264) even though + # isinstance(npstrobj, str) evaluates to True, so caller must ensure + # the argument is *exactly* 'str' + str date_string, + bint dayfirst=False, + bint yearfirst=False, + **kwargs, +) -> datetime: + """ + Parse datetime string, only returns datetime. + Also cares special handling matching time patterns. + + Returns + ------- + datetime + """ + + cdef: + datetime dt + + if not _does_string_look_like_datetime(date_string): + raise ValueError(f'Given date string {date_string} not likely a datetime') + + if does_string_look_like_time(date_string): + # use current datetime as default, not pass _DEFAULT_DATETIME + dt = du_parse(date_string, dayfirst=dayfirst, + yearfirst=yearfirst, **kwargs) + return dt + + dt, _ = _parse_delimited_date(date_string, dayfirst) + if dt is not None: + return dt + + # Handling special case strings today & now + if date_string == "now": + dt = datetime.now() + return dt + elif date_string == "today": + dt = datetime.today() + return dt + + try: + dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None) + return dt + except DateParseError: + raise + except ValueError: + pass + + try: + dt = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + except TypeError: + # following may be raised from dateutil + # TypeError: 'NoneType' object is not iterable + raise ValueError(f'Given date string {date_string} not likely a datetime') + + return dt + + +def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): + """ + Try hard to parse datetime string, leveraging dateutil plus some extra + goodies like quarter recognition. + + Parameters + ---------- + arg : str + freq : str or DateOffset, default None + Helps with interpreting time string if supplied + dayfirst : bool, default None + If None uses default from print_config + yearfirst : bool, default None + If None uses default from print_config + + Returns + ------- + datetime + str + Describing resolution of parsed string. + """ + if type(arg) is not str: + # GH#45580 np.str_ satisfies isinstance(obj, str) but if we annotate + # arg as "str" this raises here + if not isinstance(arg, np.str_): + raise TypeError( + "Argument 'arg' has incorrect type " + f"(expected str, got {type(arg).__name__})" + ) + arg = str(arg) + + if is_offset_object(freq): + freq = freq.rule_code + + if dayfirst is None: + dayfirst = get_option("display.date_dayfirst") + if yearfirst is None: + yearfirst = get_option("display.date_yearfirst") + + res = parse_datetime_string_with_reso(arg, freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst) + return res + + +cdef parse_datetime_string_with_reso( + str date_string, str freq=None, bint dayfirst=False, bint yearfirst=False, +): + """ + Parse datetime string and try to identify its resolution. + + Returns + ------- + datetime + str + Inferred resolution of the parsed string. + + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil + """ + cdef: + object parsed, reso + bint string_to_dts_failed + npy_datetimestruct dts + NPY_DATETIMEUNIT out_bestunit + int out_local + int out_tzoffset + + if not _does_string_look_like_datetime(date_string): + raise ValueError(f'Given date string {date_string} not likely a datetime') + + parsed, reso = _parse_delimited_date(date_string, dayfirst) + if parsed is not None: + return parsed, reso + + # Try iso8601 first, as it handles nanoseconds + # TODO: does this render some/all of parse_delimited_date redundant? + string_to_dts_failed = string_to_dts( + date_string, &dts, &out_bestunit, &out_local, + &out_tzoffset, False + ) + if not string_to_dts_failed: + if dts.ps != 0 or out_local: + # TODO: the not-out_local case we could do without Timestamp; + # avoid circular import + from pandas import Timestamp + parsed = Timestamp(date_string) + else: + parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + reso = { + NPY_DATETIMEUNIT.NPY_FR_Y: "year", + NPY_DATETIMEUNIT.NPY_FR_M: "month", + NPY_DATETIMEUNIT.NPY_FR_D: "day", + NPY_DATETIMEUNIT.NPY_FR_h: "hour", + NPY_DATETIMEUNIT.NPY_FR_m: "minute", + NPY_DATETIMEUNIT.NPY_FR_s: "second", + NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond", + NPY_DATETIMEUNIT.NPY_FR_us: "microsecond", + NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond", + }[out_bestunit] + return parsed, reso + + try: + return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + except DateParseError: + raise + except ValueError: + pass + + try: + parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, + ignoretz=False) + except (ValueError, OverflowError) as err: + # TODO: allow raise of errors within instead + raise DateParseError(err) + if parsed is None: + raise DateParseError(f"Could not parse {date_string}") + return parsed, reso + + +cpdef bint _does_string_look_like_datetime(str py_string): + """ + Checks whether given string is a datetime: it has to start with '0' or + be greater than 1000. + + Parameters + ---------- + py_string: str + + Returns + ------- + bool + Whether given string is potentially a datetime. + """ + cdef: + const char *buf + char *endptr = NULL + Py_ssize_t length = -1 + double converted_date + char first + int error = 0 + + buf = get_c_string_buf_and_size(py_string, &length) + if length >= 1: + first = buf[0] + if first == b'0': + # Strings starting with 0 are more consistent with a + # date-like string than a number + return True + elif py_string in _not_datelike_strings: + return False + else: + # xstrtod with such parameters copies behavior of python `float` + # cast; for example, " 35.e-1 " is valid string for this cast so, + # for correctly xstrtod call necessary to pass these params: + # b'.' - a dot is used as separator, b'e' - an exponential form of + # a float number can be used, b'\0' - not to use a thousand + # separator, 1 - skip extra spaces before and after, + converted_date = xstrtod(buf, &endptr, + b'.', b'e', b'\0', 1, &error, NULL) + # if there were no errors and the whole line was parsed, then ... + if error == 0 and endptr == buf + length: + return converted_date >= 1000 + + return True + + +cdef inline object _parse_dateabbr_string(object date_string, datetime default, + str freq=None): + cdef: + object ret + # year initialized to prevent compiler warnings + int year = -1, quarter = -1, month, mnum + Py_ssize_t date_len + + # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 + assert isinstance(date_string, str) + + if date_string in nat_strings: + return NaT, '' + + date_string = date_string.upper() + date_len = len(date_string) + + if date_len == 4: + # parse year only like 2000 + try: + ret = default.replace(year=int(date_string)) + return ret, 'year' + except ValueError: + pass + + try: + if 4 <= date_len <= 7: + i = date_string.index('Q', 1, 6) + if i == 1: + quarter = int(date_string[0]) + if date_len == 4 or (date_len == 5 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d)') + year = 2000 + int(date_string[-2:]) + elif date_len == 6 or (date_len == 7 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d\d\d)') + year = int(date_string[-4:]) + else: + raise ValueError + elif i == 2 or i == 3: + # r'(\d\d)-?Q(\d)' + if date_len == 4 or (date_len == 5 + and date_string[i - 1] == '-'): + quarter = int(date_string[-1]) + year = 2000 + int(date_string[:2]) + else: + raise ValueError + elif i == 4 or i == 5: + if date_len == 6 or (date_len == 7 + and date_string[i - 1] == '-'): + # r'(\d\d\d\d)-?Q(\d)' + quarter = int(date_string[-1]) + year = int(date_string[:4]) + else: + raise ValueError + + if not (1 <= quarter <= 4): + raise DateParseError(f'Incorrect quarterly string is given, ' + f'quarter must be ' + f'between 1 and 4: {date_string}') + + try: + # GH#1228 + year, month = quarter_to_myear(year, quarter, freq) + except KeyError: + raise DateParseError("Unable to retrieve month " + "information from given " + f"freq: {freq}") + + ret = default.replace(year=year, month=month) + return ret, 'quarter' + + except DateParseError: + raise + except ValueError: + pass + + if date_len == 6 and freq == 'M': + year = int(date_string[:4]) + month = int(date_string[4:6]) + try: + ret = default.replace(year=year, month=month) + return ret, 'month' + except ValueError: + pass + + for pat in ['%Y-%m', '%b %Y', '%b-%Y']: + try: + ret = datetime.strptime(date_string, pat) + return ret, 'month' + except ValueError: + pass + + raise ValueError(f'Unable to parse {date_string}') + + +cpdef quarter_to_myear(int year, int quarter, str freq): + """ + A quarterly frequency defines a "year" which may not coincide with + the calendar-year. Find the calendar-year and calendar-month associated + with the given year and quarter under the `freq`-derived calendar. + + Parameters + ---------- + year : int + quarter : int + freq : str or None + + Returns + ------- + year : int + month : int + + See Also + -------- + Period.qyear + """ + if quarter <= 0 or quarter > 4: + raise ValueError("Quarter must be 1 <= q <= 4") + + if freq is not None: + mnum = c_MONTH_NUMBERS[get_rule_month(freq)] + 1 + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + else: + month = (quarter - 1) * 3 + 1 + + return year, month + + +cdef dateutil_parse( + str timestr, + object default, + bint ignoretz=False, + bint dayfirst=False, + bint yearfirst=False, +): + """ lifted from dateutil to get resolution""" + + cdef: + str attr + datetime ret + object res + object reso = None + dict repl = {} + + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) + + if res is None: + raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}") + + for attr in ["year", "month", "day", "hour", + "minute", "second", "microsecond"]: + value = getattr(res, attr) + if value is not None: + repl[attr] = value + reso = attr + + if reso is None: + raise ValueError(f"Unable to parse datetime string: {timestr}") + + if reso == 'microsecond': + if repl['microsecond'] == 0: + reso = 'second' + elif repl['microsecond'] % 1000 == 0: + reso = 'millisecond' + + ret = default.replace(**repl) + if res.weekday is not None and not res.day: + ret = ret + relativedelta.relativedelta(weekday=res.weekday) + if not ignoretz: + if res.tzname and res.tzname in time.tzname: + ret = ret.replace(tzinfo=_dateutil_tzlocal()) + elif res.tzoffset == 0: + ret = ret.replace(tzinfo=_dateutil_tzutc()) + elif res.tzoffset: + ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) + return ret, reso + + +# ---------------------------------------------------------------------- +# Parsing for type-inference + + +def try_parse_dates( + object[:] values, parser=None, bint dayfirst=False, default=None, +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[::1] result + + n = len(values) + result = np.empty(n, dtype='O') + + if parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + # EAFP here + try: + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + except Exception: + # Since parser is user-defined, we can't guess what it might raise + return values + else: + parse_date = parser + + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + + return result.base # .base to access underlying ndarray + + +def try_parse_date_and_time( + object[:] dates, + object[:] times, + date_parser=None, + time_parser=None, + bint dayfirst=False, + default=None, +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[::1] result + + n = len(dates) + # TODO(cython3): Use len instead of `shape[0]` + if times.shape[0] != n: + raise ValueError('Length of dates and times must be equal') + result = np.empty(n, dtype='O') + + if date_parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + else: + parse_date = date_parser + + if time_parser is None: + parse_time = lambda x: du_parse(x) + + else: + parse_time = time_parser + + for i in range(n): + d = parse_date(str(dates[i])) + t = parse_time(str(times[i])) + result[i] = datetime(d.year, d.month, d.day, + t.hour, t.minute, t.second) + + return result.base # .base to access underlying ndarray + + +def try_parse_year_month_day( + object[:] years, object[:] months, object[:] days +) -> np.ndarray: + cdef: + Py_ssize_t i, n + object[::1] result + + n = len(years) + # TODO(cython3): Use len instead of `shape[0]` + if months.shape[0] != n or days.shape[0] != n: + raise ValueError('Length of years/months/days must all be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) + + return result.base # .base to access underlying ndarray + + +def try_parse_datetime_components(object[:] years, + object[:] months, + object[:] days, + object[:] hours, + object[:] minutes, + object[:] seconds) -> np.ndarray: + + cdef: + Py_ssize_t i, n + object[::1] result + int secs + double float_secs + double micros + + n = len(years) + # TODO(cython3): Use len instead of `shape[0]` + if ( + months.shape[0] != n + or days.shape[0] != n + or hours.shape[0] != n + or minutes.shape[0] != n + or seconds.shape[0] != n + ): + raise ValueError('Length of all datetime components must be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + float_secs = float(seconds[i]) + secs = int(float_secs) + + micros = float_secs - secs + if micros > 0: + micros = micros * 1000000 + + result[i] = datetime(int(years[i]), int(months[i]), int(days[i]), + int(hours[i]), int(minutes[i]), secs, + int(micros)) + + return result.base # .base to access underlying ndarray + + +# ---------------------------------------------------------------------- +# Miscellaneous + + +# Class copied verbatim from https://github.com/dateutil/dateutil/pull/732 +# +# We use this class to parse and tokenize date strings. However, as it is +# a private class in the dateutil library, relying on backwards compatibility +# is not practical. In fact, using this class issues warnings (xref gh-21322). +# Thus, we port the class over so that both issues are resolved. +# +# Copyright (c) 2017 - dateutil contributors +class _timelex: + def __init__(self, instream): + if getattr(instream, 'decode', None) is not None: + instream = instream.decode() + + if isinstance(instream, str): + self.stream = instream + elif getattr(instream, 'read', None) is None: + raise TypeError( + 'Parser must be a string or character stream, not ' + f'{type(instream).__name__}') + else: + self.stream = instream.read() + + def get_tokens(self): + """ + This function breaks the time string into lexical units (tokens), which + can be parsed by the parser. Lexical units are demarcated by changes in + the character set, so any continuous string of letters is considered + one unit, any continuous string of numbers is considered one unit. + The main complication arises from the fact that dots ('.') can be used + both as separators (e.g. "Sep.20.2009") or decimal points (e.g. + "4:30:21.447"). As such, it is necessary to read the full context of + any dot-separated strings before breaking it into tokens; as such, this + function maintains a "token stack", for when the ambiguous context + demands that multiple tokens be parsed at once. + """ + cdef: + Py_ssize_t n + + stream = self.stream.replace('\x00', '') + + # TODO: Change \s --> \s+ (this doesn't match existing behavior) + # TODO: change the punctuation block to punc+ (does not match existing) + # TODO: can we merge the two digit patterns? + tokens = re.findall(r"\s|" + r"(? bint: + """ + Does format match the iso8601 set that can be handled by the C parser? + Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different + but must be consistent. Leading 0s in dates and times are optional. + """ + iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}'.format + excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] + + for date_sep in [' ', '/', '\\', '-', '.', '']: + for time_sep in [' ', 'T']: + for micro_or_tz in ['', '%z', '%Z', '.%f', '.%f%z', '.%f%Z']: + if (iso_template(date_sep=date_sep, + time_sep=time_sep, + micro_or_tz=micro_or_tz, + ).startswith(f) and f not in excluded_formats): + return True + return False + + +def guess_datetime_format(dt_str, bint dayfirst=False): + """ + Guess the datetime format of a given datetime string. + + Parameters + ---------- + dt_str : str + Datetime string to guess the format of. + dayfirst : bool, default False + If True parses dates with the day first, eg 20/01/2005 + Warning: dayfirst=True is not strict, but will prefer to parse + with day first (this is a known bug). + + Returns + ------- + ret : datetime format string (for `strftime` or `strptime`) + """ + + if not isinstance(dt_str, str): + return None + + day_attribute_and_format = (('day',), '%d', 2) + + # attr name, format, padding (if any) + datetime_attrs_to_format = [ + (('year', 'month', 'day'), '%Y%m%d', 0), + (('year',), '%Y', 0), + (('month',), '%B', 0), + (('month',), '%b', 0), + (('month',), '%m', 2), + day_attribute_and_format, + (('hour',), '%H', 2), + (('minute',), '%M', 2), + (('second',), '%S', 2), + (('microsecond',), '%f', 6), + (('second', 'microsecond'), '%S.%f', 0), + (('tzinfo',), '%z', 0), + (('tzinfo',), '%Z', 0), + (('day_of_week',), '%a', 0), + (('day_of_week',), '%A', 0), + (('meridiem',), '%p', 0), + ] + + if dayfirst: + datetime_attrs_to_format.remove(day_attribute_and_format) + datetime_attrs_to_format.insert(0, day_attribute_and_format) + + try: + parsed_datetime = du_parse(dt_str, dayfirst=dayfirst) + except (ValueError, OverflowError): + # In case the datetime can't be parsed, its format cannot be guessed + return None + + if parsed_datetime is None: + return None + + # _DATEUTIL_LEXER_SPLIT from dateutil will never raise here + tokens = _DATEUTIL_LEXER_SPLIT(dt_str) + + # Normalize offset part of tokens. + # There are multiple formats for the timezone offset. + # To pass the comparison condition between the output of `strftime` and + # joined tokens, which is carried out at the final step of the function, + # the offset part of the tokens must match the '%z' format like '+0900' + # instead of ‘+09:00’. + if parsed_datetime.tzinfo is not None: + offset_index = None + if len(tokens) > 0 and tokens[-1] == 'Z': + # the last 'Z' means zero offset + offset_index = -1 + elif len(tokens) > 1 and tokens[-2] in ('+', '-'): + # ex. [..., '+', '0900'] + offset_index = -2 + elif len(tokens) > 3 and tokens[-4] in ('+', '-'): + # ex. [..., '+', '09', ':', '00'] + offset_index = -4 + + if offset_index is not None: + # If the input string has a timezone offset like '+0900', + # the offset is separated into two tokens, ex. ['+', '0900’]. + # This separation will prevent subsequent processing + # from correctly parsing the time zone format. + # So in addition to the format nomalization, we rejoin them here. + tokens[offset_index] = parsed_datetime.strftime("%z") + tokens = tokens[:offset_index + 1 or None] + + format_guess = [None] * len(tokens) + found_attrs = set() + + for attrs, attr_format, padding in datetime_attrs_to_format: + # If a given attribute has been placed in the format string, skip + # over other formats for that same underlying attribute (IE, month + # can be represented in multiple different ways) + if set(attrs) & found_attrs: + continue + + if parsed_datetime.tzinfo is None and attr_format in ("%Z", "%z"): + continue + + parsed_formatted = parsed_datetime.strftime(attr_format) + for i, token_format in enumerate(format_guess): + token_filled = tokens[i].zfill(padding) + if token_format is None and token_filled == parsed_formatted: + format_guess[i] = attr_format + tokens[i] = token_filled + found_attrs.update(attrs) + break + + # Only consider it a valid guess if we have a year, month and day + if len({'year', 'month', 'day'} & found_attrs) != 3: + return None + + output_format = [] + for i, guess in enumerate(format_guess): + if guess is not None: + # Either fill in the format placeholder (like %Y) + output_format.append(guess) + else: + # Or just the token separate (IE, the dashes in "01-01-2013") + try: + # If the token is numeric, then we likely didn't parse it + # properly, so our guess is wrong + float(tokens[i]) + return None + except ValueError: + pass + + output_format.append(tokens[i]) + + guessed_format = ''.join(output_format) + + # rebuild string, capturing any inferred padding + dt_str = ''.join(tokens) + if parsed_datetime.strftime(guessed_format) == dt_str: + return guessed_format + else: + return None + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): + """ + Convert `item` to str. + + Parameters + ---------- + item : object + keep_trivial_numbers : bool + if True, then conversion (to string from integer/float zero) + is not performed + + Returns + ------- + str or int or float + """ + cdef: + float64_t float_item + + if keep_trivial_numbers: + if isinstance(item, int): + if item == 0: + return item + elif isinstance(item, float): + float_item = item + if float_item == 0.0 or float_item != float_item: + return item + + if not isinstance(item, str): + item = PyObject_Str(item) + + return item + + +@cython.wraparound(False) +@cython.boundscheck(False) +def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray: + """ + Concatenates elements from numpy arrays in `date_cols` into strings. + + Parameters + ---------- + date_cols : tuple[ndarray] + keep_trivial_numbers : bool, default True + if True and len(date_cols) == 1, then + conversion (to string from integer/float zero) is not performed + + Returns + ------- + arr_of_rows : ndarray[object] + + Examples + -------- + >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object) + >>> times=np.array(['11:20', '10:45'], dtype=object) + >>> result = concat_date_cols((dates, times)) + >>> result + array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object) + """ + cdef: + Py_ssize_t rows_count = 0, col_count = len(date_cols) + Py_ssize_t col_idx, row_idx + list list_to_join + cnp.ndarray[object] iters + object[::1] iters_view + flatiter it + cnp.ndarray[object] result + object[::1] result_view + + if col_count == 0: + return np.zeros(0, dtype=object) + + if not all(is_array(array) for array in date_cols): + raise ValueError("not all elements from date_cols are numpy arrays") + + rows_count = min(len(array) for array in date_cols) + result = np.zeros(rows_count, dtype=object) + result_view = result + + if col_count == 1: + array = date_cols[0] + it = PyArray_IterNew(array) + for row_idx in range(rows_count): + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + result_view[row_idx] = convert_to_unicode(item, + keep_trivial_numbers) + PyArray_ITER_NEXT(it) + else: + # create fixed size list - more efficient memory allocation + list_to_join = [None] * col_count + iters = np.zeros(col_count, dtype=object) + + # create memoryview of iters ndarray, that will contain some + # flatiter's for each array in `date_cols` - more efficient indexing + iters_view = iters + for col_idx, array in enumerate(date_cols): + iters_view[col_idx] = PyArray_IterNew(array) + + # array elements that are on the same line are converted to one string + for row_idx in range(rows_count): + for col_idx, array in enumerate(date_cols): + # this cast is needed, because we did not find a way + # to efficiently store `flatiter` type objects in ndarray + it = iters_view[col_idx] + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + list_to_join[col_idx] = convert_to_unicode(item, False) + PyArray_ITER_NEXT(it) + result_view[row_idx] = " ".join(list_to_join) + + return result + + +cpdef str get_rule_month(str source): + """ + Return starting month of given freq, default is December. + + Parameters + ---------- + source : str + Derived from `freq.rule_code` or `freq.freqstr`. + + Returns + ------- + rule_month: str + + Examples + -------- + >>> get_rule_month('D') + 'DEC' + + >>> get_rule_month('A-JAN') + 'JAN' + """ + source = source.upper() + if "-" not in source: + return "DEC" + else: + return source.split("-")[1] diff --git a/pandas/_libs/tslibs/period.pxd b/pandas/_libs/tslibs/period.pxd new file mode 100644 index 00000000..46c6e52c --- /dev/null +++ b/pandas/_libs/tslibs/period.pxd @@ -0,0 +1,7 @@ +from numpy cimport int64_t + +from .np_datetime cimport npy_datetimestruct + + +cdef bint is_period_object(object obj) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi new file mode 100644 index 00000000..5ad91964 --- /dev/null +++ b/pandas/_libs/tslibs/period.pyi @@ -0,0 +1,129 @@ +from datetime import timedelta +from typing import Literal + +import numpy as np + +from pandas._libs.tslibs.nattype import NaTType +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._typing import ( + Frequency, + Timezone, + npt, +) + +INVALID_FREQ_ERR_MSG: str +DIFFERENT_FREQ: str + +class IncompatibleFrequency(ValueError): ... + +def periodarr_to_dt64arr( + periodarr: npt.NDArray[np.int64], # const int64_t[:] + freq: int, +) -> npt.NDArray[np.int64]: ... +def period_asfreq_arr( + arr: npt.NDArray[np.int64], + freq1: int, + freq2: int, + end: bool, +) -> npt.NDArray[np.int64]: ... +def get_period_field_arr( + field: str, + arr: npt.NDArray[np.int64], # const int64_t[:] + freq: int, +) -> npt.NDArray[np.int64]: ... +def from_ordinals( + values: npt.NDArray[np.int64], # const int64_t[:] + freq: timedelta | BaseOffset | str, +) -> npt.NDArray[np.int64]: ... +def extract_ordinals( + values: npt.NDArray[np.object_], + freq: Frequency | int, +) -> npt.NDArray[np.int64]: ... +def extract_freq( + values: npt.NDArray[np.object_], +) -> BaseOffset: ... + +# exposed for tests +def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ... +def period_ordinal( + y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int +) -> int: ... +def freq_to_dtype_code(freq: BaseOffset) -> int: ... +def validate_end_alias(how: str) -> Literal["E", "S"]: ... + +class PeriodMixin: + @property + def end_time(self) -> Timestamp: ... + @property + def start_time(self) -> Timestamp: ... + def _require_matching_freq(self, other, base: bool = ...) -> None: ... + +class Period(PeriodMixin): + ordinal: int # int64_t + freq: BaseOffset + + # error: "__new__" must return a class instance (got "Union[Period, NaTType]") + def __new__( # type: ignore[misc] + cls, + value=..., + freq: int | str | BaseOffset | None = ..., + ordinal: int | None = ..., + year: int | None = ..., + month: int | None = ..., + quarter: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + ) -> Period | NaTType: ... + @classmethod + def _maybe_convert_freq(cls, freq) -> BaseOffset: ... + @classmethod + def _from_ordinal(cls, ordinal: int, freq) -> Period: ... + @classmethod + def now(cls, freq: BaseOffset = ...) -> Period: ... + def strftime(self, fmt: str) -> str: ... + def to_timestamp( + self, + freq: str | BaseOffset | None = ..., + how: str = ..., + tz: Timezone | None = ..., + ) -> Timestamp: ... + def asfreq(self, freq: str | BaseOffset, how: str = ...) -> Period: ... + @property + def freqstr(self) -> str: ... + @property + def is_leap_year(self) -> bool: ... + @property + def daysinmonth(self) -> int: ... + @property + def days_in_month(self) -> int: ... + @property + def qyear(self) -> int: ... + @property + def quarter(self) -> int: ... + @property + def day_of_year(self) -> int: ... + @property + def weekday(self) -> int: ... + @property + def day_of_week(self) -> int: ... + @property + def week(self) -> int: ... + @property + def weekofyear(self) -> int: ... + @property + def second(self) -> int: ... + @property + def minute(self) -> int: ... + @property + def hour(self) -> int: ... + @property + def day(self) -> int: ... + @property + def month(self) -> int: ... + @property + def year(self) -> int: ... + def __sub__(self, other) -> Period | BaseOffset: ... + def __add__(self, other) -> Period: ... diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx new file mode 100644 index 00000000..a9d607ca --- /dev/null +++ b/pandas/_libs/tslibs/period.pyx @@ -0,0 +1,2656 @@ +import warnings + +from pandas.util._exceptions import find_stack_level + +cimport numpy as cnp +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject, + PyObject_RichCompare, + PyObject_RichCompareBool, +) +from numpy cimport ( + int32_t, + int64_t, + ndarray, +) + +import numpy as np + +cnp.import_array() + +cimport cython +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDelta_Check, + datetime, + import_datetime, +) +from libc.stdlib cimport ( + free, + malloc, +) +from libc.string cimport ( + memset, + strlen, +) +from libc.time cimport ( + strftime, + tm, +) + +# import datetime C API +import_datetime() + +cimport pandas._libs.tslibs.util as util +from pandas._libs.missing cimport C_NA +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_D, + NPY_FR_us, + astype_overflowsafe, + check_dts_bounds, + get_timedelta64_value, + npy_datetimestruct, + npy_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, +) + +from pandas._libs.tslibs.timestamps import Timestamp + +from pandas._libs.tslibs.ccalendar cimport ( + dayofweek, + get_day_of_year, + get_days_in_month, + get_week_of_year, + is_leapyear, +) +from pandas._libs.tslibs.timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) + +from pandas._libs.tslibs.conversion import DT64NS_DTYPE + +from pandas._libs.tslibs.dtypes cimport ( + FR_ANN, + FR_BUS, + FR_DAY, + FR_HR, + FR_MIN, + FR_MS, + FR_MTH, + FR_NS, + FR_QTR, + FR_SEC, + FR_UND, + FR_US, + FR_WK, + PeriodDtypeBase, + attrname_to_abbrevs, + freq_group_code_to_npy_unit, +) +from pandas._libs.tslibs.parsing cimport quarter_to_myear + +from pandas._libs.tslibs.parsing import parse_time_string + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.offsets cimport ( + BaseOffset, + is_offset_object, + is_tick_object, + to_offset, +) + +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +cdef: + enum: + INT32_MIN = -2_147_483_648LL + + +ctypedef struct asfreq_info: + int64_t intraday_conversion_factor + int is_end + int to_end + int from_end + +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil + + +cdef extern from *: + """ + // must use npy typedef b/c int64_t is aliased in cython-generated c + // unclear why we need LL for that row. + // see https://github.com/pandas-dev/pandas/pull/34416/ + static npy_int64 daytime_conversion_factor_matrix[7][7] = { + {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, + {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL}, + {0, 0, 1, 60, 60000, 60000000, 60000000000}, + {0, 0, 0, 1, 1000, 1000000, 1000000000}, + {0, 0, 0, 0, 1, 1000, 1000000}, + {0, 0, 0, 0, 0, 1, 1000}, + {0, 0, 0, 0, 0, 0, 1}}; + """ + int64_t daytime_conversion_factor_matrix[7][7] + + +cdef int max_value(int left, int right) nogil: + if left > right: + return left + return right + + +cdef int min_value(int left, int right) nogil: + if left < right: + return left + return right + + +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + int row = min_value(from_index, to_index) + int col = max_value(from_index, to_index) + # row or col < 6 means frequency strictly lower than Daily, which + # do not use daytime_conversion_factors + if row < 6: + return 0 + elif col < 6: + return 0 + return daytime_conversion_factor_matrix[row - 6][col - 6] + + +cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info) nogil: + return INT32_MIN + + +cdef int64_t no_op(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal + + +cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + if from_group == FR_UND: + from_group = FR_DAY + + if from_group == FR_BUS: + if to_group == FR_ANN: + return asfreq_BtoA + elif to_group == FR_QTR: + return asfreq_BtoQ + elif to_group == FR_MTH: + return asfreq_BtoM + elif to_group == FR_WK: + return asfreq_BtoW + elif to_group == FR_BUS: + return no_op + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_BtoDT + else: + return nofunc + + elif to_group == FR_BUS: + if from_group == FR_ANN: + return asfreq_AtoB + elif from_group == FR_QTR: + return asfreq_QtoB + elif from_group == FR_MTH: + return asfreq_MtoB + elif from_group == FR_WK: + return asfreq_WtoB + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_DTtoB + else: + return nofunc + + elif from_group == FR_ANN: + if to_group == FR_ANN: + return asfreq_AtoA + elif to_group == FR_QTR: + return asfreq_AtoQ + elif to_group == FR_MTH: + return asfreq_AtoM + elif to_group == FR_WK: + return asfreq_AtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_AtoDT + else: + return nofunc + + elif from_group == FR_QTR: + if to_group == FR_ANN: + return asfreq_QtoA + elif to_group == FR_QTR: + return asfreq_QtoQ + elif to_group == FR_MTH: + return asfreq_QtoM + elif to_group == FR_WK: + return asfreq_QtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_QtoDT + else: + return nofunc + + elif from_group == FR_MTH: + if to_group == FR_ANN: + return asfreq_MtoA + elif to_group == FR_QTR: + return asfreq_MtoQ + elif to_group == FR_MTH: + return no_op + elif to_group == FR_WK: + return asfreq_MtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_MtoDT + else: + return nofunc + + elif from_group == FR_WK: + if to_group == FR_ANN: + return asfreq_WtoA + elif to_group == FR_QTR: + return asfreq_WtoQ + elif to_group == FR_MTH: + return asfreq_WtoM + elif to_group == FR_WK: + return asfreq_WtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_WtoDT + else: + return nofunc + + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if to_group == FR_ANN: + return asfreq_DTtoA + elif to_group == FR_QTR: + return asfreq_DTtoQ + elif to_group == FR_MTH: + return asfreq_DTtoM + elif to_group == FR_WK: + return asfreq_DTtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if from_group > to_group: + return downsample_daytime + else: + return upsample_daytime + + else: + return nofunc + + else: + return nofunc + + +# -------------------------------------------------------------------- +# Frequency Conversion Helpers + +cdef int64_t DtoB_weekday(int64_t unix_date) nogil: + return ((unix_date + 4) // 7) * 5 + ((unix_date + 4) % 7) - 4 + + +cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, + int64_t unix_date) nogil: + # calculate the current week (counting from 1970-01-01) treating + # sunday as last day of a week + cdef: + int day_of_week = dayofweek(dts.year, dts.month, dts.day) + + if roll_back == 1: + if day_of_week > 4: + # change to friday before weekend + unix_date -= (day_of_week - 4) + else: + if day_of_week > 4: + # change to Monday after weekend + unix_date += (7 - day_of_week) + + return DtoB_weekday(unix_date) + + +cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + if af_info.is_end: + return (ordinal + 1) * af_info.intraday_conversion_factor - 1 + else: + return ordinal * af_info.intraday_conversion_factor + + +cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal // af_info.intraday_conversion_factor + + +cdef inline int64_t transform_via_day(int64_t ordinal, + asfreq_info *af_info, + freq_conv_func first_func, + freq_conv_func second_func) nogil: + cdef: + int64_t result + + result = first_func(ordinal, af_info) + result = second_func(result, af_info) + return result + + +# -------------------------------------------------------------------- +# Conversion _to_ Daily Freq + +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + npy_datetimestruct dts + + ordinal += af_info.is_end + + dts.year = ordinal + 1970 + dts.month = 1 + adjust_dts_for_month(&dts, af_info.from_end) + + unix_date = unix_date_from_ymd(dts.year, dts.month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + npy_datetimestruct dts + + ordinal += af_info.is_end + + dts.year = ordinal // 4 + 1970 + dts.month = (ordinal % 4) * 3 + 1 + adjust_dts_for_month(&dts, af_info.from_end) + + unix_date = unix_date_from_ymd(dts.year, dts.month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + int year, month + + ordinal += af_info.is_end + + year = ordinal // 12 + 1970 + month = ordinal % 12 + 1 + + unix_date = unix_date_from_ymd(year, month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = (ordinal * 7 + af_info.from_end - 4 + + (7 - 1) * (af_info.is_end - 1)) + return upsample_daytime(ordinal, af_info) + + +# -------------------------------------------------------------------- +# Conversion _to_ BusinessDay Freq + +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_AtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_QtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_MtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_WtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_WtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = downsample_daytime(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + # This usage defines roll_back the opposite way from the others + roll_back = 1 - af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Daily Freq + +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + return dts_to_year_ordinal(&dts, af_info.to_end) + + +cdef int DtoQ_yq(int64_t ordinal, asfreq_info *af_info, npy_datetimestruct* dts) nogil: + cdef: + int quarter + + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, dts) + adjust_dts_for_qtr(dts, af_info.to_end) + + quarter = month_to_quarter(dts.month) + return quarter + + +cdef int64_t asfreq_DTtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int quarter + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + + quarter = DtoQ_yq(ordinal, af_info, &dts) + return ((dts.year - 1970) * 4 + quarter - 1) + + +cdef int64_t asfreq_DTtoM(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + return dts_to_month_ordinal(&dts) + + +cdef int64_t asfreq_DTtoW(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = downsample_daytime(ordinal, af_info) + return unix_date_to_week(ordinal, af_info.to_end) + + +cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) nogil: + return (unix_date + 3 - to_end) // 7 + 1 + + +# -------------------------------------------------------------------- +# Conversion _from_ BusinessDay Freq + +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3 + return upsample_daytime(ordinal, af_info) + + +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Annual Freq + +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Quarterly Freq + +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Monthly Freq + +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Weekly Freq + +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- + +@cython.cdivision +cdef char* c_strftime(npy_datetimestruct *dts, char *fmt): + """ + Generate a nice string representation of the period + object, originally from DateObject_strftime + + Parameters + ---------- + dts : npy_datetimestruct* + fmt : char* + + Returns + ------- + result : char* + """ + cdef: + tm c_date + char *result + int result_len = strlen(fmt) + 50 + + c_date.tm_sec = dts.sec + c_date.tm_min = dts.min + c_date.tm_hour = dts.hour + c_date.tm_mday = dts.day + c_date.tm_mon = dts.month - 1 + c_date.tm_year = dts.year - 1900 + c_date.tm_wday = (dayofweek(dts.year, dts.month, dts.day) + 1) % 7 + c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1 + c_date.tm_isdst = -1 + + result = malloc(result_len * sizeof(char)) + + strftime(result, result_len, fmt, &c_date) + + return result + + +# ---------------------------------------------------------------------- +# Conversion between date_info and npy_datetimestruct + +cdef inline int get_freq_group(int freq) nogil: + # See also FreqGroup.get_freq_group + return (freq // 1000) * 1000 + + +cdef inline int get_freq_group_index(int freq) nogil: + return freq // 1000 + + +cdef void adjust_dts_for_month(npy_datetimestruct* dts, int from_end) nogil: + if from_end != 12: + dts.month += from_end + if dts.month > 12: + dts.month -= 12 + else: + dts.year -= 1 + + +cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) nogil: + if to_end != 12: + dts.month -= to_end + if dts.month <= 0: + dts.month += 12 + else: + dts.year += 1 + + +# Find the unix_date (days elapsed since datetime(1970, 1, 1) +# for the given year/month/day. +# Assumes GREGORIAN_CALENDAR */ +cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: + # Calculate the absolute date + cdef: + npy_datetimestruct dts + int64_t unix_date + + memset(&dts, 0, sizeof(npy_datetimestruct)) + dts.year = year + dts.month = month + dts.day = day + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) + return unix_date + + +cdef inline int64_t dts_to_month_ordinal(npy_datetimestruct* dts) nogil: + # AKA: use npy_datetimestruct_to_datetime(NPY_FR_M, &dts) + return ((dts.year - 1970) * 12 + dts.month - 1) + + +cdef inline int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nogil: + cdef: + int64_t result + + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) + if dts.month > to_end: + return result + 1 + else: + return result + + +cdef inline int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogil: + cdef: + int quarter + + adjust_dts_for_qtr(dts, to_end) + quarter = month_to_quarter(dts.month) + return ((dts.year - 1970) * 4 + quarter - 1) + + +cdef inline int get_anchor_month(int freq, int freq_group) nogil: + cdef: + int fmonth + fmonth = freq - freq_group + if fmonth == 0: + fmonth = 12 + return fmonth + + +# specifically _dont_ use cdvision or else ordinals near -1 are assigned to +# incorrect dates GH#19643 +@cython.cdivision(False) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: + """ + Generate an ordinal in period space + + Parameters + ---------- + dts: npy_datetimestruct* + freq : int + + Returns + ------- + period_ordinal : int64_t + """ + cdef: + int64_t unix_date + int freq_group, fmonth, mdiff + NPY_DATETIMEUNIT unit + + freq_group = get_freq_group(freq) + + if freq_group == FR_ANN: + fmonth = get_anchor_month(freq, freq_group) + return dts_to_year_ordinal(dts, fmonth) + + elif freq_group == FR_QTR: + fmonth = get_anchor_month(freq, freq_group) + return dts_to_qtr_ordinal(dts, fmonth) + + elif freq_group == FR_WK: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + return unix_date_to_week(unix_date, freq - FR_WK) + + elif freq == FR_BUS: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + return DtoB(dts, 0, unix_date) + + unit = freq_group_code_to_npy_unit(freq) + return npy_datetimestruct_to_datetime(unit, dts) + + +cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil: + cdef: + int64_t unix_date, nanos + npy_datetimestruct dts2 + + unix_date = get_unix_date(ordinal, freq) + nanos = get_time_nanos(freq, unix_date, ordinal) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts) + + pandas_datetime_to_datetimestruct(nanos, NPY_DATETIMEUNIT.NPY_FR_ns, &dts2) + dts.hour = dts2.hour + dts.min = dts2.min + dts.sec = dts2.sec + dts.us = dts2.us + dts.ps = dts2.ps + + +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: + """ + Returns the proleptic Gregorian ordinal of the date, as an integer. + This corresponds to the number of days since Jan., 1st, 1970 AD. + When the instance has a frequency less than daily, the proleptic date + is calculated for the last day of the period. + + Parameters + ---------- + period_ordinal : int64_t + freq : int + + Returns + ------- + unix_date : int64_t number of days since datetime(1970, 1, 1) + """ + cdef: + asfreq_info af_info + freq_conv_func toDaily = NULL + + if freq == FR_DAY: + return period_ordinal + + toDaily = get_asfreq_func(freq, FR_DAY) + get_asfreq_info(freq, FR_DAY, True, &af_info) + return toDaily(period_ordinal, &af_info) + + +@cython.cdivision +cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: + """ + Find the number of nanoseconds after midnight on the given unix_date + that the ordinal represents in the given frequency. + + Parameters + ---------- + freq : int + unix_date : int64_t + ordinal : int64_t + + Returns + ------- + int64_t + """ + cdef: + int64_t sub, factor + int64_t nanos_in_day = 24 * 3600 * 10**9 + + freq = get_freq_group(freq) + + if freq <= FR_DAY: + return 0 + + elif freq == FR_NS: + factor = 1 + + elif freq == FR_US: + factor = 10**3 + + elif freq == FR_MS: + factor = 10**6 + + elif freq == FR_SEC: + factor = 10 **9 + + elif freq == FR_MIN: + factor = 10**9 * 60 + + else: + # We must have freq == FR_HR + factor = 10**9 * 3600 + + sub = ordinal - unix_date * (nanos_in_day / factor) + return sub * factor + + +cdef int get_yq(int64_t ordinal, int freq, npy_datetimestruct* dts): + """ + Find the year and quarter of a Period with the given ordinal and frequency + + Parameters + ---------- + ordinal : int64_t + freq : int + dts : *npy_datetimestruct + + Returns + ------- + quarter : int + describes the implied quarterly frequency associated with `freq` + + Notes + ----- + Sets dts.year in-place. + """ + cdef: + asfreq_info af_info + int qtr_freq + int64_t unix_date + int quarter + + unix_date = get_unix_date(ordinal, freq) + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + get_asfreq_info(FR_DAY, qtr_freq, True, &af_info) + + quarter = DtoQ_yq(unix_date, &af_info, dts) + return quarter + + +cdef inline int month_to_quarter(int month) nogil: + return (month - 1) // 3 + 1 + + +# ---------------------------------------------------------------------- +# Period logic + +@cython.wraparound(False) +@cython.boundscheck(False) +def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq): + """ + Convert array to datetime64 values from a set of ordinals corresponding to + periods per period convention. + """ + cdef: + int64_t[::1] out + Py_ssize_t i, N + + if freq < 6000: # i.e. FR_DAY, hard-code to avoid need to cast + N = len(periodarr) + out = np.empty(N, dtype="i8") + + # We get here with freqs that do not correspond to a datetime64 unit + for i in range(N): + out[i] = period_ordinal_to_dt64(periodarr[i], freq) + + return out.base # .base to access underlying np.ndarray + + else: + # Short-circuit for performance + if freq == FR_NS: + # TODO: copy? + return periodarr.base + + if freq == FR_US: + dta = periodarr.base.view("M8[us]") + elif freq == FR_MS: + dta = periodarr.base.view("M8[ms]") + elif freq == FR_SEC: + dta = periodarr.base.view("M8[s]") + elif freq == FR_MIN: + dta = periodarr.base.view("M8[m]") + elif freq == FR_HR: + dta = periodarr.base.view("M8[h]") + elif freq == FR_DAY: + dta = periodarr.base.view("M8[D]") + return astype_overflowsafe(dta, dtype=DT64NS_DTYPE) + + +cdef void get_asfreq_info(int from_freq, int to_freq, + bint is_end, asfreq_info *af_info) nogil: + """ + Construct the `asfreq_info` object used to convert an ordinal from + `from_freq` to `to_freq`. + + Parameters + ---------- + from_freq : int + to_freq int + is_end : bool + af_info : *asfreq_info + """ + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + af_info.is_end = is_end + + af_info.intraday_conversion_factor = get_daytime_conversion_factor( + get_freq_group_index(max_value(from_group, FR_DAY)), + get_freq_group_index(max_value(to_group, FR_DAY))) + + if from_group == FR_WK: + af_info.from_end = calc_week_end(from_freq, from_group) + elif from_group == FR_ANN: + af_info.from_end = calc_a_year_end(from_freq, from_group) + elif from_group == FR_QTR: + af_info.from_end = calc_a_year_end(from_freq, from_group) + + if to_group == FR_WK: + af_info.to_end = calc_week_end(to_freq, to_group) + elif to_group == FR_ANN: + af_info.to_end = calc_a_year_end(to_freq, to_group) + elif to_group == FR_QTR: + af_info.to_end = calc_a_year_end(to_freq, to_group) + + +@cython.cdivision +cdef int calc_a_year_end(int freq, int group) nogil: + cdef: + int result = (freq - group) % 12 + if result == 0: + return 12 + else: + return result + + +cdef inline int calc_week_end(int freq, int group) nogil: + return freq - group + + +cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): + """ + Convert period ordinal from one frequency to another, and if upsampling, + choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t retval + + _period_asfreq(&ordinal, &retval, 1, freq1, freq2, end) + return retval + + +@cython.wraparound(False) +@cython.boundscheck(False) +def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): + """ + Convert int64-array of period ordinals from one frequency to another, and + if upsampling, choose to use start ('S') or end ('E') of period. + """ + cdef: + Py_ssize_t n = len(arr) + Py_ssize_t increment = arr.strides[0] // 8 + ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, 0) + + _period_asfreq( + cnp.PyArray_DATA(arr), + cnp.PyArray_DATA(result), + n, + freq1, + freq2, + end, + increment, + ) + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef void _period_asfreq( + int64_t* ordinals, + int64_t* out, + Py_ssize_t length, + int freq1, + int freq2, + bint end, + Py_ssize_t increment=1, +): + """See period_asfreq.__doc__""" + cdef: + Py_ssize_t i + freq_conv_func func + asfreq_info af_info + int64_t val + + if length == 1 and ordinals[0] == NPY_NAT: + # fastpath avoid calling get_asfreq_func + out[0] = NPY_NAT + return + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, end, &af_info) + + for i in range(length): + val = ordinals[i * increment] + if val != NPY_NAT: + val = func(val, &af_info) + out[i] = val + + +cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, + int s, int us, int ps, int freq): + """ + Find the ordinal representation of the given datetime components at the + frequency `freq`. + + Parameters + ---------- + y : int + m : int + d : int + h : int + min : int + s : int + us : int + ps : int + + Returns + ------- + ordinal : int64_t + """ + cdef: + npy_datetimestruct dts + dts.year = y + dts.month = m + dts.day = d + dts.hour = h + dts.min = min + dts.sec = s + dts.us = us + dts.ps = ps + return get_period_ordinal(&dts, freq) + + +cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: + cdef: + npy_datetimestruct dts + + if ordinal == NPY_NAT: + return NPY_NAT + + get_date_info(ordinal, freq, &dts) + + check_dts_bounds(&dts) + return npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) + + +cdef str period_format(int64_t value, int freq, object fmt=None): + cdef: + int freq_group + + if value == NPY_NAT: + return "NaT" + + if isinstance(fmt, str): + fmt = fmt.encode("utf-8") + + if fmt is None: + freq_group = get_freq_group(freq) + if freq_group == FR_ANN: + fmt = b'%Y' + elif freq_group == FR_QTR: + fmt = b'%FQ%q' + elif freq_group == FR_MTH: + fmt = b'%Y-%m' + elif freq_group == FR_WK: + left = period_asfreq(value, freq, FR_DAY, 0) + right = period_asfreq(value, freq, FR_DAY, 1) + return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}" + elif freq_group == FR_BUS or freq_group == FR_DAY: + fmt = b'%Y-%m-%d' + elif freq_group == FR_HR: + fmt = b'%Y-%m-%d %H:00' + elif freq_group == FR_MIN: + fmt = b'%Y-%m-%d %H:%M' + elif freq_group == FR_SEC: + fmt = b'%Y-%m-%d %H:%M:%S' + elif freq_group == FR_MS: + fmt = b'%Y-%m-%d %H:%M:%S.%l' + elif freq_group == FR_US: + fmt = b'%Y-%m-%d %H:%M:%S.%u' + elif freq_group == FR_NS: + fmt = b'%Y-%m-%d %H:%M:%S.%n' + else: + raise ValueError(f"Unknown freq: {freq}") + + return _period_strftime(value, freq, fmt) + + +cdef list extra_fmts = [(b"%q", b"^`AB`^"), + (b"%f", b"^`CD`^"), + (b"%F", b"^`EF`^"), + (b"%l", b"^`GH`^"), + (b"%u", b"^`IJ`^"), + (b"%n", b"^`KL`^")] + +cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", + "^`GH`^", "^`IJ`^", "^`KL`^"] + +cdef str _period_strftime(int64_t value, int freq, bytes fmt): + cdef: + Py_ssize_t i + npy_datetimestruct dts + char *formatted + bytes pat, brepl + list found_pat = [False] * len(extra_fmts) + int quarter + int32_t us, ps + str result, repl + + get_date_info(value, freq, &dts) + + # Find our additional directives in the pattern and replace them with + # placeholders that are not processed by c_strftime + for i in range(len(extra_fmts)): + pat = extra_fmts[i][0] + brepl = extra_fmts[i][1] + if pat in fmt: + fmt = fmt.replace(pat, brepl) + found_pat[i] = True + + # Execute c_strftime to process the usual datetime directives + formatted = c_strftime(&dts, fmt) + + result = util.char_to_string(formatted) + free(formatted) + + # Now we will fill the placeholders corresponding to our additional directives + + # First prepare the contents + # Save these to local vars as dts can be modified by get_yq below + us = dts.us + ps = dts.ps + if any(found_pat[0:3]): + # Note: this modifies `dts` in-place so that year becomes fiscal year + # However it looses the us and ps + quarter = get_yq(value, freq, &dts) + else: + quarter = 0 + + # Now do the filling per se + for i in range(len(extra_fmts)): + if found_pat[i]: + + if i == 0: # %q, 1-digit quarter. + repl = f"{quarter}" + elif i == 1: # %f, 2-digit 'Fiscal' year + repl = f"{(dts.year % 100):02d}" + elif i == 2: # %F, 'Fiscal' year with a century + repl = str(dts.year) + elif i == 3: # %l, milliseconds + repl = f"{(us // 1_000):03d}" + elif i == 4: # %u, microseconds + repl = f"{(us):06d}" + elif i == 5: # %n, nanoseconds + repl = f"{((us * 1000) + (ps // 1000)):09d}" + + result = result.replace(str_extra_fmts[i], repl) + + return result + + +# ---------------------------------------------------------------------- +# period accessors + +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + + +cdef int pyear(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.year + + +cdef int pqyear(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + + get_yq(ordinal, freq, &dts) + return dts.year + + +cdef int pquarter(int64_t ordinal, int freq): + cdef: + int quarter + npy_datetimestruct dts + quarter = get_yq(ordinal, freq, &dts) + return quarter + + +cdef int pmonth(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.month + + +cdef int pday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.day + + +cdef int pweekday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dayofweek(dts.year, dts.month, dts.day) + + +cdef int pday_of_year(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_day_of_year(dts.year, dts.month, dts.day) + + +cdef int pweek(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_week_of_year(dts.year, dts.month, dts.day) + + +cdef int phour(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.hour + + +cdef int pminute(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.min + + +cdef int psecond(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.sec + + +cdef int pdays_in_month(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_days_in_month(dts.year, dts.month) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_period_field_arr(str field, const int64_t[:] arr, int freq): + cdef: + Py_ssize_t i, sz + int64_t[::1] out + accessor f + + func = _get_accessor_func(field) + if func is NULL: + raise ValueError(f"Unrecognized field name: {field}") + + sz = len(arr) + out = np.empty(sz, dtype=np.int64) + + for i in range(sz): + if arr[i] == NPY_NAT: + out[i] = -1 + continue + out[i] = func(arr[i], freq) + + return out.base # .base to access underlying np.ndarray + + +cdef accessor _get_accessor_func(str field): + if field == "year": + return pyear + elif field == "qyear": + return pqyear + elif field == "quarter": + return pquarter + elif field == "month": + return pmonth + elif field == "day": + return pday + elif field == "hour": + return phour + elif field == "minute": + return pminute + elif field == "second": + return psecond + elif field == "week": + return pweek + elif field == "day_of_year": + return pday_of_year + elif field == "weekday" or field == "day_of_week": + return pweekday + elif field == "days_in_month": + return pdays_in_month + return NULL + + +@cython.wraparound(False) +@cython.boundscheck(False) +def from_ordinals(const int64_t[:] values, freq): + cdef: + Py_ssize_t i, n = len(values) + int64_t[::1] result = np.empty(len(values), dtype="i8") + int64_t val + + freq = to_offset(freq) + if not isinstance(freq, BaseOffset): + raise ValueError("freq not specified and cannot be inferred") + + for i in range(n): + val = values[i] + if val == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = Period(val, freq=freq).ordinal + + return result.base + + +@cython.wraparound(False) +@cython.boundscheck(False) +def extract_ordinals(ndarray values, freq) -> np.ndarray: + # values is object-dtype, may be 2D + + cdef: + Py_ssize_t i, n = values.size + int64_t ordinal + ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) + object p + + if values.descr.type_num != cnp.NPY_OBJECT: + # if we don't raise here, we'll segfault later! + raise TypeError("extract_ordinals values must be object-dtype") + + freqstr = Period._maybe_convert_freq(freq).freqstr + + for i in range(n): + # Analogous to: p = values[i] + p = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + ordinal = _extract_ordinal(p, freqstr, freq) + + # Analogous to: ordinals[i] = ordinal + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ordinal + + cnp.PyArray_MultiIter_NEXT(mi) + + return ordinals + + +cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: + """ + See extract_ordinals. + """ + cdef: + int64_t ordinal + + if checknull_with_nat(item) or item is C_NA: + ordinal = NPY_NAT + elif util.is_integer_object(item): + if item == NPY_NAT: + ordinal = NPY_NAT + else: + raise TypeError(item) + else: + try: + ordinal = item.ordinal + + if item.freqstr != freqstr: + msg = DIFFERENT_FREQ.format(cls="PeriodIndex", + own_freq=freqstr, + other_freq=item.freqstr) + raise IncompatibleFrequency(msg) + + except AttributeError: + item = Period(item, freq=freq) + if item is NaT: + # input may contain NaT-like string + ordinal = NPY_NAT + else: + ordinal = item.ordinal + + return ordinal + + +def extract_freq(ndarray[object] values) -> BaseOffset: + # TODO: Change type to const object[:] when Cython supports that. + + cdef: + Py_ssize_t i, n = len(values) + object value + + for i in range(n): + value = values[i] + + if is_period_object(value): + return value.freq + + raise ValueError('freq not specified and cannot be inferred') + +# ----------------------------------------------------------------------- +# period helpers + + +DIFFERENT_FREQ = ("Input has different freq={other_freq} " + "from {cls}(freq={own_freq})") + + +class IncompatibleFrequency(ValueError): + pass + + +cdef class PeriodMixin: + # Methods shared between Period and PeriodArray + + @property + def start_time(self) -> Timestamp: + """ + Get the Timestamp for the start of the period. + + Returns + ------- + Timestamp + + See Also + -------- + Period.end_time : Return the end Timestamp. + Period.dayofyear : Return the day of year. + Period.daysinmonth : Return the days in that month. + Period.dayofweek : Return the day of the week. + + Examples + -------- + >>> period = pd.Period('2012-1-1', freq='D') + >>> period + Period('2012-01-01', 'D') + + >>> period.start_time + Timestamp('2012-01-01 00:00:00') + + >>> period.end_time + Timestamp('2012-01-01 23:59:59.999999999') + """ + return self.to_timestamp(how="start") + + @property + def end_time(self) -> Timestamp: + """ + Get the Timestamp for the end of the period. + + Returns + ------- + Timestamp + + See Also + -------- + Period.start_time : Return the start Timestamp. + Period.dayofyear : Return the day of year. + Period.daysinmonth : Return the days in that month. + Period.dayofweek : Return the day of the week. + """ + return self.to_timestamp(how="end") + + def _require_matching_freq(self, other, base=False): + # See also arrays.period.raise_on_incompatible + if is_offset_object(other): + other_freq = other + else: + other_freq = other.freq + + if base: + condition = self.freq.base != other_freq.base + else: + condition = self.freq != other_freq + + if condition: + msg = DIFFERENT_FREQ.format( + cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other_freq.freqstr, + ) + raise IncompatibleFrequency(msg) + + +cdef class _Period(PeriodMixin): + + cdef readonly: + int64_t ordinal + PeriodDtypeBase _dtype + BaseOffset freq + + # higher than np.ndarray, np.matrix, np.timedelta64 + __array_priority__ = 100 + + dayofweek = _Period.day_of_week + dayofyear = _Period.day_of_year + + def __cinit__(self, int64_t ordinal, BaseOffset freq): + self.ordinal = ordinal + self.freq = freq + # Note: this is more performant than PeriodDtype.from_date_offset(freq) + # because from_date_offset cannot be made a cdef method (until cython + # supported cdef classmethods) + self._dtype = PeriodDtypeBase(freq._period_dtype_code) + + @classmethod + def _maybe_convert_freq(cls, object freq) -> BaseOffset: + """ + Internally we allow integer and tuple representations (for now) that + are not recognized by to_offset, so we convert them here. Also, a + Period's freq attribute must have `freq.n > 0`, which we check for here. + + Returns + ------- + DateOffset + """ + if isinstance(freq, int): + # We already have a dtype code + dtype = PeriodDtypeBase(freq) + freq = dtype._freqstr + + freq = to_offset(freq) + + if freq.n <= 0: + raise ValueError("Frequency must be positive, because it " + f"represents span: {freq.freqstr}") + + return freq + + @classmethod + def _from_ordinal(cls, ordinal: int64_t, freq) -> "Period": + """ + Fast creation from an ordinal and freq that are already validated! + """ + if ordinal == NPY_NAT: + return NaT + else: + freq = cls._maybe_convert_freq(freq) + self = _Period.__new__(cls, ordinal, freq) + return self + + def __richcmp__(self, other, op): + if is_period_object(other): + if other.freq != self.freq: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + self._require_matching_freq(other) + return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) + elif other is NaT: + return op == Py_NE + elif util.is_array(other): + # GH#44285 + if cnp.PyArray_IsZeroDim(other): + return PyObject_RichCompare(self, other.item(), op) + else: + # in particular ndarray[object]; see test_pi_cmp_period + return np.array([PyObject_RichCompare(self, x, op) for x in other]) + return NotImplemented + + def __hash__(self): + return hash((self.ordinal, self.freqstr)) + + def _add_timedeltalike_scalar(self, other) -> "Period": + cdef: + int64_t inc + + if not is_tick_object(self.freq): + raise IncompatibleFrequency("Input cannot be converted to " + f"Period(freq={self.freqstr})") + + if util.is_timedelta64_object(other) and get_timedelta64_value(other) == NPY_NAT: + # i.e. np.timedelta64("nat") + return NaT + + try: + inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False) + except ValueError as err: + raise IncompatibleFrequency("Input cannot be converted to " + f"Period(freq={self.freqstr})") from err + # TODO: overflow-check here + ordinal = self.ordinal + inc + return Period(ordinal=ordinal, freq=self.freq) + + def _add_offset(self, other) -> "Period": + # Non-Tick DateOffset other + cdef: + int64_t ordinal + + self._require_matching_freq(other, base=True) + + ordinal = self.ordinal + other.n + return Period(ordinal=ordinal, freq=self.freq) + + def __add__(self, other): + if not is_period_object(self): + # cython semantics; this is analogous to a call to __radd__ + # TODO(cython3): remove this + if self is NaT: + return NaT + return other.__add__(self) + + if is_any_td_scalar(other): + return self._add_timedeltalike_scalar(other) + elif is_offset_object(other): + return self._add_offset(other) + elif other is NaT: + return NaT + elif util.is_integer_object(other): + ordinal = self.ordinal + other * self.freq.n + return Period(ordinal=ordinal, freq=self.freq) + + elif is_period_object(other): + # can't add datetime-like + # GH#17983; can't just return NotImplemented bc we get a RecursionError + # when called via np.add.reduce see TestNumpyReductions.test_add + # in npdev build + sname = type(self).__name__ + oname = type(other).__name__ + raise TypeError(f"unsupported operand type(s) for +: '{sname}' " + f"and '{oname}'") + + return NotImplemented + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + if not is_period_object(self): + # cython semantics; this is like a call to __rsub__ + # TODO(cython3): remove this + if self is NaT: + return NaT + return NotImplemented + + elif ( + is_any_td_scalar(other) + or is_offset_object(other) + or util.is_integer_object(other) + ): + return self + (-other) + elif is_period_object(other): + self._require_matching_freq(other) + # GH 23915 - mul by base freq since __add__ is agnostic of n + return (self.ordinal - other.ordinal) * self.freq.base + elif other is NaT: + return NaT + + return NotImplemented + + def __rsub__(self, other): + if other is NaT: + return NaT + return NotImplemented + + def asfreq(self, freq, how='E') -> "Period": + """ + Convert Period to desired frequency, at the start or end of the interval. + + Parameters + ---------- + freq : str, BaseOffset + The desired frequency. + how : {'E', 'S', 'end', 'start'}, default 'end' + Start or end of the timespan. + + Returns + ------- + resampled : Period + """ + freq = self._maybe_convert_freq(freq) + how = validate_end_alias(how) + base1 = self._dtype._dtype_code + base2 = freq_to_dtype_code(freq) + + # self.n can't be negative or 0 + end = how == 'E' + if end: + ordinal = self.ordinal + self.freq.n - 1 + else: + ordinal = self.ordinal + ordinal = period_asfreq(ordinal, base1, base2, end) + + return Period(ordinal=ordinal, freq=freq) + + def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp: + """ + Return the Timestamp representation of the Period. + + Uses the target frequency specified at the part of the period specified + by `how`, which is either `Start` or `Finish`. + + Parameters + ---------- + freq : str or DateOffset + Target frequency. Default is 'D' if self.freq is week or + longer and 'S' otherwise. + how : str, default 'S' (start) + One of 'S', 'E'. Can be aliased as case insensitive + 'Start', 'Finish', 'Begin', 'End'. + + Returns + ------- + Timestamp + """ + if tz is not None: + # GH#34522 + warnings.warn( + "Period.to_timestamp `tz` argument is deprecated and will " + "be removed in a future version. Use " + "`per.to_timestamp(...).tz_localize(tz)` instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + how = validate_end_alias(how) + + end = how == 'E' + if end: + if freq == "B" or self.freq == "B": + # roll forward to ensure we land on B date + adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns") + return self.to_timestamp(how="start") + adjust + endpoint = (self + self.freq).to_timestamp(how='start') + return endpoint - np.timedelta64(1, "ns") + + if freq is None: + freq = self._dtype._get_to_timestamp_base() + base = freq + else: + freq = self._maybe_convert_freq(freq) + base = freq._period_dtype_code + + val = self.asfreq(freq, how) + + dt64 = period_ordinal_to_dt64(val.ordinal, base) + return Timestamp(dt64, tz=tz) + + @property + def year(self) -> int: + """ + Return the year this Period falls on. + """ + base = self._dtype._dtype_code + return pyear(self.ordinal, base) + + @property + def month(self) -> int: + """ + Return the month this Period falls on. + """ + base = self._dtype._dtype_code + return pmonth(self.ordinal, base) + + @property + def day(self) -> int: + """ + Get day of the month that a Period falls on. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day of the week. + Period.dayofyear : Get the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.day + 11 + """ + base = self._dtype._dtype_code + return pday(self.ordinal, base) + + @property + def hour(self) -> int: + """ + Get the hour of the day component of the Period. + + Returns + ------- + int + The hour as an integer, between 0 and 23. + + See Also + -------- + Period.second : Get the second component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.hour + 13 + + Period longer than a day + + >>> p = pd.Period("2018-03-11", freq="M") + >>> p.hour + 0 + """ + base = self._dtype._dtype_code + return phour(self.ordinal, base) + + @property + def minute(self) -> int: + """ + Get minute of the hour component of the Period. + + Returns + ------- + int + The minute as an integer, between 0 and 59. + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.second : Get the second component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.minute + 3 + """ + base = self._dtype._dtype_code + return pminute(self.ordinal, base) + + @property + def second(self) -> int: + """ + Get the second component of the Period. + + Returns + ------- + int + The second of the Period (ranges from 0 to 59). + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.second + 12 + """ + base = self._dtype._dtype_code + return psecond(self.ordinal, base) + + @property + def weekofyear(self) -> int: + """ + Get the week of the year on the given Period. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day component of the Period. + Period.weekday : Get the day component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11", "H") + >>> p.weekofyear + 10 + + >>> p = pd.Period("2018-02-01", "D") + >>> p.weekofyear + 5 + + >>> p = pd.Period("2018-01-06", "D") + >>> p.weekofyear + 1 + """ + base = self._dtype._dtype_code + return pweek(self.ordinal, base) + + @property + def week(self) -> int: + """ + Get the week of the year on the given Period. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day component of the Period. + Period.weekday : Get the day component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11", "H") + >>> p.week + 10 + + >>> p = pd.Period("2018-02-01", "D") + >>> p.week + 5 + + >>> p = pd.Period("2018-01-06", "D") + >>> p.week + 1 + """ + return self.weekofyear + + @property + def day_of_week(self) -> int: + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.day_of_week : Day of the week the period lies in. + Period.weekday : Alias of Period.day_of_week. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.day_of_week + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.day_of_week + 6 + >>> per.start_time.day_of_week + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.day_of_week + 2 + >>> per.end_time.day_of_week + 2 + """ + base = self._dtype._dtype_code + return pweekday(self.ordinal, base) + + @property + def weekday(self) -> int: + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.dayofweek : Day of the week the period lies in. + Period.weekday : Alias of Period.dayofweek. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.dayofweek + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.dayofweek + 6 + >>> per.start_time.dayofweek + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.dayofweek + 2 + >>> per.end_time.dayofweek + 2 + """ + # Docstring is a duplicate from dayofweek. Reusing docstrings with + # Appender doesn't work for properties in Cython files, and setting + # the __doc__ attribute is also not possible. + return self.dayofweek + + @property + def day_of_year(self) -> int: + """ + Return the day of the year. + + This attribute returns the day of the year on which the particular + date occurs. The return value ranges between 1 to 365 for regular + years and 1 to 366 for leap years. + + Returns + ------- + int + The day of year. + + See Also + -------- + Period.day : Return the day of the month. + Period.day_of_week : Return the day of week. + PeriodIndex.day_of_year : Return the day of year of all indexes. + + Examples + -------- + >>> period = pd.Period("2015-10-23", freq='H') + >>> period.day_of_year + 296 + >>> period = pd.Period("2012-12-31", freq='D') + >>> period.day_of_year + 366 + >>> period = pd.Period("2013-01-01", freq='D') + >>> period.day_of_year + 1 + """ + base = self._dtype._dtype_code + return pday_of_year(self.ordinal, base) + + @property + def quarter(self) -> int: + """ + Return the quarter this Period falls on. + """ + base = self._dtype._dtype_code + return pquarter(self.ordinal, base) + + @property + def qyear(self) -> int: + """ + Fiscal year the Period lies in according to its starting-quarter. + + The `year` and the `qyear` of the period will be the same if the fiscal + and calendar years are the same. When they are not, the fiscal year + can be different from the calendar year of the period. + + Returns + ------- + int + The fiscal year of the period. + + See Also + -------- + Period.year : Return the calendar year of the period. + + Examples + -------- + If the natural and fiscal year are the same, `qyear` and `year` will + be the same. + + >>> per = pd.Period('2018Q1', freq='Q') + >>> per.qyear + 2018 + >>> per.year + 2018 + + If the fiscal year starts in April (`Q-MAR`), the first quarter of + 2018 will start in April 2017. `year` will then be 2017, but `qyear` + will be the fiscal year, 2018. + + >>> per = pd.Period('2018Q1', freq='Q-MAR') + >>> per.start_time + Timestamp('2017-04-01 00:00:00') + >>> per.qyear + 2018 + >>> per.year + 2017 + """ + base = self._dtype._dtype_code + return pqyear(self.ordinal, base) + + @property + def days_in_month(self) -> int: + """ + Get the total number of days in the month that this period falls on. + + Returns + ------- + int + + See Also + -------- + Period.daysinmonth : Gets the number of days in the month. + DatetimeIndex.daysinmonth : Gets the number of days in the month. + calendar.monthrange : Returns a tuple containing weekday + (0-6 ~ Mon-Sun) and number of days (28-31). + + Examples + -------- + >>> p = pd.Period('2018-2-17') + >>> p.days_in_month + 28 + + >>> pd.Period('2018-03-01').days_in_month + 31 + + Handles the leap year case as well: + + >>> p = pd.Period('2016-2-17') + >>> p.days_in_month + 29 + """ + base = self._dtype._dtype_code + return pdays_in_month(self.ordinal, base) + + @property + def daysinmonth(self) -> int: + """ + Get the total number of days of the month that this period falls on. + + Returns + ------- + int + + See Also + -------- + Period.days_in_month : Return the days of the month. + Period.dayofyear : Return the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.daysinmonth + 31 + """ + return self.days_in_month + + @property + def is_leap_year(self) -> bool: + """ + Return True if the period's year is in a leap year. + """ + return bool(is_leapyear(self.year)) + + @classmethod + def now(cls, freq=None): + """ + Return the period of now's date. + """ + return Period(datetime.now(), freq=freq) + + @property + def freqstr(self) -> str: + """ + Return a string representation of the frequency. + """ + return self.freq.freqstr + + def __repr__(self) -> str: + base = self._dtype._dtype_code + formatted = period_format(self.ordinal, base) + return f"Period('{formatted}', '{self.freqstr}')" + + def __str__(self) -> str: + """ + Return a string representation for a particular DataFrame + """ + base = self._dtype._dtype_code + formatted = period_format(self.ordinal, base) + value = str(formatted) + return value + + def __setstate__(self, state): + self.freq = state[1] + self.ordinal = state[2] + + def __reduce__(self): + object_state = None, self.freq, self.ordinal + return (Period, object_state) + + def strftime(self, fmt: str) -> str: + r""" + Returns a formatted string representation of the :class:`Period`. + + ``fmt`` must be a string containing one or several directives. + The method recognizes the same directives as the :func:`time.strftime` + function of the standard Python distribution, as well as the specific + additional directives ``%f``, ``%F``, ``%q``, ``%l``, ``%u``, ``%n``. + (formatting & docs originally from scikits.timeries). + + +-----------+--------------------------------+-------+ + | Directive | Meaning | Notes | + +===========+================================+=======+ + | ``%a`` | Locale's abbreviated weekday | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%A`` | Locale's full weekday name. | | + +-----------+--------------------------------+-------+ + | ``%b`` | Locale's abbreviated month | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%B`` | Locale's full month name. | | + +-----------+--------------------------------+-------+ + | ``%c`` | Locale's appropriate date and | | + | | time representation. | | + +-----------+--------------------------------+-------+ + | ``%d`` | Day of the month as a decimal | | + | | number [01,31]. | | + +-----------+--------------------------------+-------+ + | ``%f`` | 'Fiscal' year without a | \(1) | + | | century as a decimal number | | + | | [00,99] | | + +-----------+--------------------------------+-------+ + | ``%F`` | 'Fiscal' year with a century | \(2) | + | | as a decimal number | | + +-----------+--------------------------------+-------+ + | ``%H`` | Hour (24-hour clock) as a | | + | | decimal number [00,23]. | | + +-----------+--------------------------------+-------+ + | ``%I`` | Hour (12-hour clock) as a | | + | | decimal number [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%j`` | Day of the year as a decimal | | + | | number [001,366]. | | + +-----------+--------------------------------+-------+ + | ``%m`` | Month as a decimal number | | + | | [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%M`` | Minute as a decimal number | | + | | [00,59]. | | + +-----------+--------------------------------+-------+ + | ``%p`` | Locale's equivalent of either | \(3) | + | | AM or PM. | | + +-----------+--------------------------------+-------+ + | ``%q`` | Quarter as a decimal number | | + | | [1,4] | | + +-----------+--------------------------------+-------+ + | ``%S`` | Second as a decimal number | \(4) | + | | [00,61]. | | + +-----------+--------------------------------+-------+ + | ``%l`` | Millisecond as a decimal number| | + | | [000,999]. | | + +-----------+--------------------------------+-------+ + | ``%u`` | Microsecond as a decimal number| | + | | [000000,999999]. | | + +-----------+--------------------------------+-------+ + | ``%n`` | Nanosecond as a decimal number | | + | | [000000000,999999999]. | | + +-----------+--------------------------------+-------+ + | ``%U`` | Week number of the year | \(5) | + | | (Sunday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Sunday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%w`` | Weekday as a decimal number | | + | | [0(Sunday),6]. | | + +-----------+--------------------------------+-------+ + | ``%W`` | Week number of the year | \(5) | + | | (Monday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Monday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%x`` | Locale's appropriate date | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%X`` | Locale's appropriate time | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%y`` | Year without century as a | | + | | decimal number [00,99]. | | + +-----------+--------------------------------+-------+ + | ``%Y`` | Year with century as a decimal | | + | | number. | | + +-----------+--------------------------------+-------+ + | ``%Z`` | Time zone name (no characters | | + | | if no time zone exists). | | + +-----------+--------------------------------+-------+ + | ``%%`` | A literal ``'%'`` character. | | + +-----------+--------------------------------+-------+ + + Notes + ----- + + (1) + The ``%f`` directive is the same as ``%y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (2) + The ``%F`` directive is the same as ``%Y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (3) + The ``%p`` directive only affects the output hour field + if the ``%I`` directive is used to parse the hour. + + (4) + The range really is ``0`` to ``61``; this accounts for leap + seconds and the (very rare) double leap seconds. + + (5) + The ``%U`` and ``%W`` directives are only used in calculations + when the day of the week and the year are specified. + + Examples + -------- + + >>> a = Period(freq='Q-JUL', year=2006, quarter=1) + >>> a.strftime('%F-Q%q') + '2006-Q1' + >>> # Output the last month in the quarter of this date + >>> a.strftime('%b-%Y') + 'Oct-2005' + >>> + >>> a = Period(freq='D', year=2001, month=1, day=1) + >>> a.strftime('%d-%b-%Y') + '01-Jan-2001' + >>> a.strftime('%b. %d, %Y was a %A') + 'Jan. 01, 2001 was a Monday' + """ + base = self._dtype._dtype_code + return period_format(self.ordinal, base, fmt) + + +class Period(_Period): + """ + Represents a period of time. + + Parameters + ---------- + value : Period or str, default None + The time period represented (e.g., '4Q2005'). This represents neither + the start or the end of the period, but rather the entire period itself. + freq : str, default None + One of pandas period strings or corresponding objects. Accepted + strings are listed in the :ref:`offset alias section ` in the user docs. + ordinal : int, default None + The period offset from the proleptic Gregorian epoch. + year : int, default None + Year value of the period. + month : int, default 1 + Month value of the period. + quarter : int, default None + Quarter value of the period. + day : int, default 1 + Day value of the period. + hour : int, default 0 + Hour value of the period. + minute : int, default 0 + Minute value of the period. + second : int, default 0 + Second value of the period. + + Examples + -------- + >>> period = pd.Period('2012-1-1', freq='D') + >>> period + Period('2012-01-01', 'D') + """ + + def __new__(cls, value=None, freq=None, ordinal=None, + year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None): + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + # ordinal is the period offset from the gregorian proleptic epoch + cdef _Period self + + if freq is not None: + freq = cls._maybe_convert_freq(freq) + nanosecond = 0 + + if ordinal is not None and value is not None: + raise ValueError("Only value or ordinal but not both should be " + "given but not both") + elif ordinal is not None: + if not util.is_integer_object(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + + elif value is None: + if (year is None and month is None and + quarter is None and day is None and + hour is None and minute is None and second is None): + ordinal = NPY_NAT + else: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + # set defaults + month = 1 if month is None else month + day = 1 if day is None else day + hour = 0 if hour is None else hour + minute = 0 if minute is None else minute + second = 0 if second is None else second + + ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif is_period_object(value): + other = value + if freq is None or freq._period_dtype_code == other.freq._period_dtype_code: + ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + ordinal = converted.ordinal + + elif checknull_with_nat(value) or (isinstance(value, str) and + value in nat_strings): + # explicit str check is necessary to avoid raising incorrectly + # if we have a non-hashable value. + ordinal = NPY_NAT + + elif isinstance(value, str) or util.is_integer_object(value): + if util.is_integer_object(value): + if value == NPY_NAT: + value = "NaT" + + value = str(value) + value = value.upper() + dt, reso = parse_time_string(value, freq) + try: + ts = Timestamp(value) + except ValueError: + nanosecond = 0 + else: + nanosecond = ts.nanosecond + if nanosecond != 0: + reso = 'nanosecond' + if dt is NaT: + ordinal = NPY_NAT + + if freq is None: + try: + freq = attrname_to_abbrevs[reso] + except KeyError: + raise ValueError(f"Invalid frequency or could not " + f"infer: {reso}") + freq = to_offset(freq) + + elif PyDateTime_Check(value): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + if isinstance(dt, Timestamp): + nanosecond = dt.nanosecond + elif util.is_datetime64_object(value): + dt = Timestamp(value) + if freq is None: + raise ValueError('Must supply freq for datetime value') + nanosecond = dt.nanosecond + elif PyDate_Check(value): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + if ordinal is None: + base = freq_to_dtype_code(freq) + ordinal = period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 1000*nanosecond, base) + + return cls._from_ordinal(ordinal, freq) + + +cdef bint is_period_object(object obj): + return isinstance(obj, _Period) + + +cpdef int freq_to_dtype_code(BaseOffset freq) except? -1: + try: + return freq._period_dtype_code + except AttributeError as err: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err + + +cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, + int hour, int minute, int second, + BaseOffset freq): + base = freq_to_dtype_code(freq) + if quarter is not None: + year, month = quarter_to_myear(year, quarter, freq.freqstr) + + return period_ordinal(year, month, day, hour, + minute, second, 0, 0, base) + + +def validate_end_alias(how: str) -> str: # Literal["E", "S"] + how_dict = {'S': 'S', 'E': 'E', + 'START': 'S', 'FINISH': 'E', + 'BEGIN': 'S', 'END': 'E'} + how = how_dict.get(str(how).upper()) + if how not in {'S', 'E'}: + raise ValueError('How must be one of S or E') + return how diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c new file mode 100644 index 00000000..2bac6c72 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -0,0 +1,1093 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include +#include +#include +#include "np_datetime.h" + + +const npy_datetimestruct _AS_MIN_DTS = { + 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; +const npy_datetimestruct _FS_MIN_DTS = { + 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; +const npy_datetimestruct _PS_MIN_DTS = { + 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; +const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +const npy_datetimestruct _US_MIN_DTS = { + -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; +const npy_datetimestruct _MS_MIN_DTS = { + -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +const npy_datetimestruct _S_MIN_DTS = { + -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; +const npy_datetimestruct _M_MIN_DTS = { + -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; + +const npy_datetimestruct _AS_MAX_DTS = { + 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; +const npy_datetimestruct _FS_MAX_DTS = { + 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; +const npy_datetimestruct _PS_MAX_DTS = { + 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; +const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; +const npy_datetimestruct _US_MAX_DTS = { + 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +const npy_datetimestruct _MS_MAX_DTS = { + 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; +const npy_datetimestruct _S_MAX_DTS = { + 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +const npy_datetimestruct _M_MAX_DTS = { + 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; + + +const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + +/* + * Adjusts a datetimestruct based on a seconds offset. Assumes + * the current values are valid. + */ +NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts, + int seconds) { + int minutes; + + dts->sec += seconds; + if (dts->sec < 0) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + if (dts->sec < 0) { + --minutes; + dts->sec += 60; + } + add_minutes_to_datetimestruct(dts, minutes); + } else if (dts->sec >= 60) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + add_minutes_to_datetimestruct(dts, minutes); + } +} + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} +/* +* Returns the offset from utc of the timezone as a timedelta. +* The caller is responsible for ensuring that the tzinfo +* attribute exists on the datetime object. +* +* If the passed object is timezone naive, Py_None is returned. +* If extraction of the offset fails, NULL is returned. +* +* NOTE: This function is not vendored from numpy. +*/ +PyObject *extract_utc_offset(PyObject *obj) { + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; + } + return offset; + } + return tmp; +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * The following implementation just asks for attributes, and thus + * supports datetime duck typing. The tzinfo time zone conversion + * requires this style of access as well. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + /* Apply the time zone offset if datetime obj is tz-aware */ + if (offset != NULL) { + if (offset == Py_None) { + Py_DECREF(offset); + return 0; + } + PyObject *tmp_int; + int seconds_offset, minutes_offset; + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + Py_DECREF(offset); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyLong_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; +} + + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +PyArray_DatetimeMetaData +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h new file mode 100644 index 00000000..6ab915e5 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -0,0 +1,102 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + +extern const npy_datetimestruct _AS_MIN_DTS; +extern const npy_datetimestruct _AS_MAX_DTS; +extern const npy_datetimestruct _FS_MIN_DTS; +extern const npy_datetimestruct _FS_MAX_DTS; +extern const npy_datetimestruct _PS_MIN_DTS; +extern const npy_datetimestruct _PS_MAX_DTS; +extern const npy_datetimestruct _NS_MIN_DTS; +extern const npy_datetimestruct _NS_MAX_DTS; +extern const npy_datetimestruct _US_MIN_DTS; +extern const npy_datetimestruct _US_MAX_DTS; +extern const npy_datetimestruct _MS_MIN_DTS; +extern const npy_datetimestruct _MS_MAX_DTS; +extern const npy_datetimestruct _S_MIN_DTS; +extern const npy_datetimestruct _S_MAX_DTS; +extern const npy_datetimestruct _M_MIN_DTS; +extern const npy_datetimestruct _M_MAX_DTS; + +// stuff pandas needs +// ---------------------------------------------------------------------------- + +PyObject *extract_utc_offset(PyObject *obj); + +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out); + +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts); + +void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, + npy_datetimestruct *result); + +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result); + +extern const int days_per_month_table[2][12]; + +// stuff numpy-derived code needs in header +// ---------------------------------------------------------------------------- + +int is_leapyear(npy_int64 year); + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 +get_datetimestruct_days(const npy_datetimestruct *dts); + + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b); + + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid. + */ +void +add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); + +/* + * This function returns the DateTimeMetaData + * contained within the provided datetime dtype. + */ +PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( + PyArray_Descr *dtype); + + +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c new file mode 100644 index 00000000..cfbaed01 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -0,0 +1,969 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#define PY_SSIZE_T_CLEAN +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include + +#include +#include +#include + +#include "np_datetime.h" +#include "np_datetime_strings.h" + + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset) { + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + bestunit = NPY_FR_Y; + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + bestunit = NPY_FR_M; + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + } + + /* PARSE THE DAY */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + bestunit = NPY_FR_D; + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + bestunit = NPY_FR_h; + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + bestunit = NPY_FR_m; + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + } else { + bestunit = NPY_FR_s; + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } else { + bestunit = NPY_FR_ns; + } + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } + +parse_timezone: + /* trim any whitespace between time/timezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen != 0) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (utc) { + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + } + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h new file mode 100644 index 00000000..511d9a40 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int +parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, + int *out_tzoffset); + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base); + +/* + * Converts an pandas_timedeltastruct to an ISO 8601 string. + * + * Mutates outlen to provide size of (non-NULL terminated) string. + * + * Currently has no error handling + */ +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, + size_t *outlen); +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi new file mode 100644 index 00000000..8e1acb2f --- /dev/null +++ b/pandas/_libs/tslibs/strptime.pyi @@ -0,0 +1,12 @@ +import numpy as np + +from pandas._typing import npt + +def array_strptime( + values: npt.NDArray[np.object_], + fmt: str | None, + exact: bool = ..., + errors: str = ..., +) -> tuple[np.ndarray, np.ndarray]: ... + +# first ndarray is M8[ns], second is object ndarray of tzinfo | None diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx new file mode 100644 index 00000000..7aaeefc3 --- /dev/null +++ b/pandas/_libs/tslibs/strptime.pyx @@ -0,0 +1,538 @@ +"""Strptime-related classes and functions. +""" +import calendar +import locale +import re +import time + +from cpython.datetime cimport ( + date, + tzinfo, +) + +from _thread import allocate_lock as _thread_allocate_lock + +import numpy as np +import pytz + +from numpy cimport ( + int64_t, + ndarray, +) + +from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_nat_strings as nat_strings, +) +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, + dtstruct_to_dt64, + npy_datetimestruct, +) + + +cdef dict _parse_code_table = {'y': 0, + 'Y': 1, + 'm': 2, + 'B': 3, + 'b': 4, + 'd': 5, + 'H': 6, + 'I': 7, + 'M': 8, + 'S': 9, + 'f': 10, + 'A': 11, + 'a': 12, + 'w': 13, + 'j': 14, + 'U': 15, + 'W': 16, + 'Z': 17, + 'p': 18, # an additional key, only with I + 'z': 19, + 'G': 20, + 'V': 21, + 'u': 22} + + +def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='raise'): + """ + Calculates the datetime structs represented by the passed array of strings + + Parameters + ---------- + values : ndarray of string-like objects + fmt : string-like regex + exact : matches must be exact if True, search if False + errors : string specifying error handling, {'raise', 'ignore', 'coerce'} + """ + + cdef: + Py_ssize_t i, n = len(values) + npy_datetimestruct dts + int64_t[::1] iresult + object[::1] result_timezone + int year, month, day, minute, hour, second, weekday, julian + int week_of_year, week_of_year_start, parse_code, ordinal + int iso_week, iso_year + int64_t us, ns + object val, group_key, ampm, found, timezone + dict found_key + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + + assert is_raise or is_ignore or is_coerce + + if fmt is not None: + if '%W' in fmt or '%U' in fmt: + if '%Y' not in fmt and '%y' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") + if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") + elif '%Z' in fmt and '%z' in fmt: + raise ValueError("Cannot parse both %Z and %z") + + global _TimeRE_cache, _regex_cache + with _cache_lock: + if _getlang() != _TimeRE_cache.locale_time.lang: + _TimeRE_cache = TimeRE() + _regex_cache.clear() + if len(_regex_cache) > _CACHE_MAX_SIZE: + _regex_cache.clear() + locale_time = _TimeRE_cache.locale_time + format_regex = _regex_cache.get(fmt) + if not format_regex: + try: + format_regex = _TimeRE_cache.compile(fmt) + # KeyError raised when a bad format is found; can be specified as + # \\, in which case it was a stray % but with a space after it + except KeyError, err: + bad_directive = err.args[0] + if bad_directive == "\\": + bad_directive = "%" + del err + raise ValueError(f"'{bad_directive}' is a bad directive " + f"in format '{fmt}'") + # IndexError only occurs when the format string is "%" + except IndexError: + raise ValueError(f"stray % in format '{fmt}'") + _regex_cache[fmt] = format_regex + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + result_timezone = np.empty(n, dtype='object') + + dts.us = dts.ps = dts.as = 0 + + for i in range(n): + val = values[i] + if isinstance(val, str): + if val in nat_strings: + iresult[i] = NPY_NAT + continue + else: + if checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + continue + else: + val = str(val) + + # exact matching + if exact: + found = format_regex.match(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data '{val}' does not match " + f"format '{fmt}' (match)") + if len(val) != found.end(): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"unconverted data remains: {val[found.end():]}") + + # search + else: + found = format_regex.search(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data {repr(val)} does not match format " + f"{repr(fmt)} (search)") + + iso_year = -1 + year = 1900 + month = day = 1 + hour = minute = second = ns = us = 0 + timezone = None + # Default to -1 to signify that values not known; not critical to have, + # though + iso_week = week_of_year = -1 + week_of_year_start = -1 + # weekday and julian defaulted to -1 so as to signal need to calculate + # values + weekday = julian = -1 + found_dict = found.groupdict() + for group_key in found_dict.iterkeys(): + # Directives not explicitly handled below: + # c, x, X + # handled by making out of other directives + # U, W + # worthless without day of the week + parse_code = _parse_code_table[group_key] + + if parse_code == 0: + year = int(found_dict['y']) + # Open Group specification for strptime() states that a %y + # value in the range of [00, 68] is in the century 2000, while + # [69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 + elif parse_code == 1: + year = int(found_dict['Y']) + elif parse_code == 2: + month = int(found_dict['m']) + # elif group_key == 'B': + elif parse_code == 3: + month = locale_time.f_month.index(found_dict['B'].lower()) + # elif group_key == 'b': + elif parse_code == 4: + month = locale_time.a_month.index(found_dict['b'].lower()) + # elif group_key == 'd': + elif parse_code == 5: + day = int(found_dict['d']) + # elif group_key == 'H': + elif parse_code == 6: + hour = int(found_dict['H']) + elif parse_code == 7: + hour = int(found_dict['I']) + ampm = found_dict.get('p', '').lower() + # If there was no AM/PM indicator, we'll treat this like AM + if ampm in ('', locale_time.am_pm[0]): + # We're in AM so the hour is correct unless we're + # looking at 12 midnight. + # 12 midnight == 12 AM == hour 0 + if hour == 12: + hour = 0 + elif ampm == locale_time.am_pm[1]: + # We're in PM so we need to add 12 to the hour unless + # we're looking at 12 noon. + # 12 noon == 12 PM == hour 12 + if hour != 12: + hour += 12 + elif parse_code == 8: + minute = int(found_dict['M']) + elif parse_code == 9: + second = int(found_dict['S']) + elif parse_code == 10: + s = found_dict['f'] + # Pad to always return nanoseconds + s += "0" * (9 - len(s)) + us = long(s) + ns = us % 1000 + us = us // 1000 + elif parse_code == 11: + weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + elif parse_code == 12: + weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + elif parse_code == 13: + weekday = int(found_dict['w']) + if weekday == 0: + weekday = 6 + else: + weekday -= 1 + elif parse_code == 14: + julian = int(found_dict['j']) + elif parse_code == 15 or parse_code == 16: + week_of_year = int(found_dict[group_key]) + if group_key == 'U': + # U starts week on Sunday. + week_of_year_start = 6 + else: + # W starts week on Monday. + week_of_year_start = 0 + elif parse_code == 17: + timezone = pytz.timezone(found_dict['Z']) + elif parse_code == 19: + timezone = parse_timezone_directive(found_dict['z']) + elif parse_code == 20: + iso_year = int(found_dict['G']) + elif parse_code == 21: + iso_week = int(found_dict['V']) + elif parse_code == 22: + weekday = int(found_dict['u']) + weekday -= 1 + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif year != -1 and week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + else: + raise ValueError("ISO week directive '%V' is incompatible with " + "the year directive '%Y'. Use the ISO year " + "'%G' instead.") + + # If we know the wk of the year and what day of that wk, we can figure + # out the Julian day of the year. + if julian == -1 and weekday != -1: + if week_of_year != -1: + week_starts_Mon = week_of_year_start == 0 + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year != -1 and iso_week != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, + weekday + 1) + # Cannot pre-calculate date() since can change in Julian + # calculation and thus could have different value for the day of the wk + # calculation. + try: + if julian == -1: + # Need to add 1 to result since first day of the year is 1, not + # 0. + ordinal = date(year, month, day).toordinal() + julian = ordinal - date(year, 1, 1).toordinal() + 1 + else: + # Assume that if they bothered to include Julian day it will + # be accurate. + datetime_result = date.fromordinal( + (julian - 1) + date(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + if weekday == -1: + weekday = date(year, month, day).weekday() + + dts.year = year + dts.month = month + dts.day = day + dts.hour = hour + dts.min = minute + dts.sec = second + dts.us = us + dts.ps = ns * 1000 + + iresult[i] = dtstruct_to_dt64(&dts) + try: + check_dts_bounds(&dts) + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + result_timezone[i] = timezone + + return result, result_timezone.base + + +""" +TimeRE, _calc_julian_from_U_or_W are vendored +from the standard library, see +https://github.com/python/cpython/blob/main/Lib/_strptime.py +The original module-level docstring follows. + +Strptime-related classes and functions. +CLASSES: + LocaleTime -- Discovers and stores locale-specific time information + TimeRE -- Creates regexes for pattern matching a string of text containing + time information +FUNCTIONS: + _getlang -- Figure out what language is being used for the locale + strptime -- Calculates the time struct represented by the passed-in string +""" + +from _strptime import ( + LocaleTime, + TimeRE as _TimeRE, + _getlang, +) + + +class TimeRE(_TimeRE): + """ + Handle conversion from format directives to regexes. + + Creates regexes for pattern matching a string of text containing + time information + """ + + def __init__(self, locale_time=None): + """ + Create keys/values. + + Order of execution is important for dependency reasons. + """ + self._Z = None + super().__init__(locale_time=locale_time) + # GH 48767: Overrides for cpython's TimeRE + # 1) Parse up to nanos instead of micros + self.update({"f": r"(?P[0-9]{1,9})"}), + + def __getitem__(self, key): + if key == "Z": + # lazy computation + if self._Z is None: + self._Z = self.__seqToRE(pytz.all_timezones, 'Z') + # Note: handling Z is the key difference vs using the stdlib + # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with + # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version. + return self._Z + return super().__getitem__(key) + + +_cache_lock = _thread_allocate_lock() +# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock +# first! +_TimeRE_cache = TimeRE() +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_regex_cache = {} + + +cdef int _calc_julian_from_U_or_W(int year, int week_of_year, + int day_of_week, int week_starts_Mon): + """ + Calculate the Julian day based on the year, week of the year, and day of + the week, with week_start_day representing whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0). + + Parameters + ---------- + year : int + the year + week_of_year : int + week taken from format U or W + week_starts_Mon : int + represents whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0) + + Returns + ------- + int + converted julian day + """ + + cdef: + int first_weekday, week_0_length, days_to_week + + first_weekday = date(year, 1, 1).weekday() + # If we are dealing with the %U directive (week starts on Sunday), it's + # easier to just shift the view to Sunday being the first day of the + # week. + if not week_starts_Mon: + first_weekday = (first_weekday + 1) % 7 + day_of_week = (day_of_week + 1) % 7 + + # Need to watch out for a week 0 (when the first day of the year is not + # the same as that specified by %U or %W). + week_0_length = (7 - first_weekday) % 7 + if week_of_year == 0: + return 1 + day_of_week - first_weekday + else: + days_to_week = week_0_length + (7 * (week_of_year - 1)) + return 1 + days_to_week + day_of_week + + +cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """ + Calculate the Julian day based on the ISO 8601 year, week, and weekday. + + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday). + + Parameters + ---------- + iso_year : int + the year taken from format %G + iso_week : int + the week taken from format %V + iso_weekday : int + weekday taken from format %u + + Returns + ------- + (int, int) + the iso year and the Gregorian ordinal date / julian date + """ + + cdef: + int correction, ordinal + + correction = date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + + +cdef tzinfo parse_timezone_directive(str z): + """ + Parse the '%z' directive and return a pytz.FixedOffset + + Parameters + ---------- + z : string of the UTC offset + + Returns + ------- + pytz.FixedOffset + + Notes + ----- + This is essentially similar to the cpython implementation + https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479 + """ + + cdef: + int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds + int total_minutes + object gmtoff_remainder, gmtoff_remainder_padding + + if z == 'Z': + return pytz.FixedOffset(0) + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + raise ValueError(f"Inconsistent use of : in {z}") + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + + # Pad to always return microseconds. + gmtoff_remainder = z[8:] + pad_number = 6 - len(gmtoff_remainder) + gmtoff_remainder_padding = "0" * pad_number + microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) + + total_minutes = ((hours * 60) + minutes + (seconds // 60) + + (microseconds // 60_000_000)) + total_minutes = -total_minutes if z.startswith("-") else total_minutes + return pytz.FixedOffset(total_minutes) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd new file mode 100644 index 00000000..3251e10a --- /dev/null +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -0,0 +1,27 @@ +from cpython.datetime cimport timedelta +from numpy cimport int64_t + +from .np_datetime cimport NPY_DATETIMEUNIT + + +# Exposed for tslib, not intended for outside use. +cpdef int64_t delta_to_nanoseconds( + delta, NPY_DATETIMEUNIT reso=*, bint round_ok=* +) except? -1 +cdef convert_to_timedelta64(object ts, str unit) +cdef bint is_any_td_scalar(object obj) +cdef object ensure_td64ns(object ts) + + +cdef class _Timedelta(timedelta): + cdef readonly: + int64_t value # nanoseconds + bint _is_populated # are my components populated + int64_t _d, _h, _m, _s, _ms, _us, _ns + NPY_DATETIMEUNIT _reso + + cpdef timedelta to_pytimedelta(_Timedelta self) + cdef bint _has_ns(self) + cdef _ensure_components(_Timedelta self) + cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op) + cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi new file mode 100644 index 00000000..1fb2bf1b --- /dev/null +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -0,0 +1,156 @@ +from datetime import timedelta +from typing import ( + ClassVar, + Literal, + TypeVar, + overload, +) + +import numpy as np + +from pandas._libs.tslibs import ( + NaTType, + Tick, +) +from pandas._typing import npt + +# This should be kept consistent with the keys in the dict timedelta_abbrevs +# in pandas/_libs/tslibs/timedeltas.pyx +UnitChoices = Literal[ + "Y", + "y", + "M", + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "t", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "l", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", + "u", + "ns", + "nanoseconds", + "nano", + "nanos", + "nanosecond", + "n", +] +_S = TypeVar("_S", bound=timedelta) + +def ints_to_pytimedelta( + arr: npt.NDArray[np.timedelta64], + box: bool = ..., +) -> npt.NDArray[np.object_]: ... +def array_to_timedelta64( + values: npt.NDArray[np.object_], + unit: str | None = ..., + errors: str = ..., +) -> np.ndarray: ... # np.ndarray[m8ns] +def parse_timedelta_unit(unit: str | None) -> UnitChoices: ... +def delta_to_nanoseconds( + delta: np.timedelta64 | timedelta | Tick, + reso: int = ..., # NPY_DATETIMEUNIT + round_ok: bool = ..., +) -> int: ... + +class Timedelta(timedelta): + min: ClassVar[Timedelta] + max: ClassVar[Timedelta] + resolution: ClassVar[Timedelta] + value: int # np.int64 + # error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") + def __new__( # type: ignore[misc] + cls: type[_S], + value=..., + unit: str | None = ..., + **kwargs: float | np.integer | np.floating, + ) -> _S | NaTType: ... + @classmethod + def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ... + @property + def days(self) -> int: ... + @property + def seconds(self) -> int: ... + @property + def microseconds(self) -> int: ... + def total_seconds(self) -> float: ... + def to_pytimedelta(self) -> timedelta: ... + def to_timedelta64(self) -> np.timedelta64: ... + @property + def asm8(self) -> np.timedelta64: ... + # TODO: round/floor/ceil could return NaT? + def round(self: _S, freq: str) -> _S: ... + def floor(self: _S, freq: str) -> _S: ... + def ceil(self: _S, freq: str) -> _S: ... + @property + def resolution_string(self) -> str: ... + def __add__(self, other: timedelta) -> Timedelta: ... + def __radd__(self, other: timedelta) -> Timedelta: ... + def __sub__(self, other: timedelta) -> Timedelta: ... + def __rsub__(self, other: timedelta) -> Timedelta: ... + def __neg__(self) -> Timedelta: ... + def __pos__(self) -> Timedelta: ... + def __abs__(self) -> Timedelta: ... + def __mul__(self, other: float) -> Timedelta: ... + def __rmul__(self, other: float) -> Timedelta: ... + # error: Signature of "__floordiv__" incompatible with supertype "timedelta" + @overload # type: ignore[override] + def __floordiv__(self, other: timedelta) -> int: ... + @overload + def __floordiv__(self, other: float) -> Timedelta: ... + @overload + def __floordiv__( + self, other: npt.NDArray[np.timedelta64] + ) -> npt.NDArray[np.intp]: ... + @overload + def __floordiv__( + self, other: npt.NDArray[np.number] + ) -> npt.NDArray[np.timedelta64] | Timedelta: ... + @overload + def __rfloordiv__(self, other: timedelta | str) -> int: ... + @overload + def __rfloordiv__(self, other: None | NaTType) -> NaTType: ... + @overload + def __rfloordiv__(self, other: np.ndarray) -> npt.NDArray[np.timedelta64]: ... + @overload + def __truediv__(self, other: timedelta) -> float: ... + @overload + def __truediv__(self, other: float) -> Timedelta: ... + def __mod__(self, other: timedelta) -> Timedelta: ... + def __divmod__(self, other: timedelta) -> tuple[int, Timedelta]: ... + def __le__(self, other: timedelta) -> bool: ... + def __lt__(self, other: timedelta) -> bool: ... + def __ge__(self, other: timedelta) -> bool: ... + def __gt__(self, other: timedelta) -> bool: ... + def __hash__(self) -> int: ... + def isoformat(self) -> str: ... + def to_numpy(self) -> np.timedelta64: ... + @property + def freq(self) -> None: ... + @property + def is_populated(self) -> bool: ... + def _as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx new file mode 100644 index 00000000..0b33af11 --- /dev/null +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -0,0 +1,2042 @@ +import collections +import warnings + +from pandas.util._exceptions import find_stack_level + +cimport cython +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject, + PyObject_RichCompare, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + ndarray, +) + +cnp.import_array() + +from cpython.datetime cimport ( + PyDateTime_Check, + PyDelta_Check, + import_datetime, + timedelta, +) + +import_datetime() + + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.conversion cimport ( + cast_from_unit, + precision_from_unit, +) +from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + cmp_dtstructs, + cmp_scalar, + convert_reso, + get_conversion_factor, + get_datetime64_unit, + get_timedelta64_value, + get_unit_from_dtype, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pandas_timedelta_to_timedeltastruct, + pandas_timedeltastruct, +) + +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) + +from pandas._libs.tslibs.offsets cimport is_tick_object +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) + +# ---------------------------------------------------------------------- +# Constants + +# components named tuple +Components = collections.namedtuple( + "Components", + [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ], +) + +# This should be kept consistent with UnitChoices in pandas/_libs/tslibs/timedeltas.pyi +cdef dict timedelta_abbrevs = { + "Y": "Y", + "y": "Y", + "M": "M", + "W": "W", + "w": "W", + "D": "D", + "d": "D", + "days": "D", + "day": "D", + "hours": "h", + "hour": "h", + "hr": "h", + "h": "h", + "m": "m", + "minute": "m", + "min": "m", + "minutes": "m", + "t": "m", + "s": "s", + "seconds": "s", + "sec": "s", + "second": "s", + "ms": "ms", + "milliseconds": "ms", + "millisecond": "ms", + "milli": "ms", + "millis": "ms", + "l": "ms", + "us": "us", + "microseconds": "us", + "microsecond": "us", + "µs": "us", + "micro": "us", + "micros": "us", + "u": "us", + "ns": "ns", + "nanoseconds": "ns", + "nano": "ns", + "nanos": "ns", + "nanosecond": "ns", + "n": "ns", +} + +_no_input = object() + + +# ---------------------------------------------------------------------- +# API + +@cython.boundscheck(False) +@cython.wraparound(False) +def ints_to_pytimedelta(ndarray m8values, box=False): + """ + convert an i8 repr to an ndarray of timedelta or Timedelta (if box == + True) + + Parameters + ---------- + arr : ndarray[timedelta64] + box : bool, default False + + Returns + ------- + result : ndarray[object] + array of Timedelta or timedeltas objects + """ + cdef: + NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype) + Py_ssize_t i, n = m8values.size + int64_t value + object res_val + + # Note that `result` (and thus `result_flat`) is C-order and + # `it` iterates C-order as well, so the iteration matches + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) + object[::1] res_flat = result.ravel() # should NOT be a copy + + ndarray arr = m8values.view("i8") + cnp.flatiter it = cnp.PyArray_IterNew(arr) + + for i in range(n): + # Analogous to: value = arr[i] + value = (cnp.PyArray_ITER_DATA(it))[0] + + if value == NPY_NAT: + res_val = NaT + else: + if box: + res_val = _timedelta_from_value_and_reso(Timedelta, value, reso=reso) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ns: + res_val = timedelta(microseconds=int(value) / 1000) + elif reso == NPY_DATETIMEUNIT.NPY_FR_us: + res_val = timedelta(microseconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + res_val = timedelta(milliseconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + res_val = timedelta(seconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_m: + res_val = timedelta(minutes=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_h: + res_val = timedelta(hours=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_D: + res_val = timedelta(days=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_W: + res_val = timedelta(weeks=value) + else: + # Month, Year, NPY_FR_GENERIC, pico, femto, atto + raise NotImplementedError(reso) + + # Note: we can index result directly instead of using PyArray_MultiIter_DATA + # like we do for the other functions because result is known C-contiguous + # and is the first argument to PyArray_MultiIterNew2. The usual pattern + # does not seem to work with object dtype. + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + res_flat[i] = res_val + + cnp.PyArray_ITER_NEXT(it) + + return result + + +# ---------------------------------------------------------------------- + + +cpdef int64_t delta_to_nanoseconds( + delta, + NPY_DATETIMEUNIT reso=NPY_FR_ns, + bint round_ok=True, +) except? -1: + # Note: this will raise on timedelta64 with Y or M unit + + cdef: + NPY_DATETIMEUNIT in_reso + int64_t n + + if is_tick_object(delta): + n = delta.n + in_reso = delta._reso + + elif isinstance(delta, _Timedelta): + n = delta.value + in_reso = delta._reso + + elif is_timedelta64_object(delta): + in_reso = get_datetime64_unit(delta) + if in_reso == NPY_DATETIMEUNIT.NPY_FR_Y or in_reso == NPY_DATETIMEUNIT.NPY_FR_M: + raise ValueError( + "delta_to_nanoseconds does not support Y or M units, " + "as their duration in nanoseconds is ambiguous." + ) + n = get_timedelta64_value(delta) + + elif PyDelta_Check(delta): + in_reso = NPY_DATETIMEUNIT.NPY_FR_us + try: + n = ( + delta.days * 24 * 3600 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) + except OverflowError as err: + raise OutOfBoundsTimedelta(*err.args) from err + + else: + raise TypeError(type(delta)) + + try: + return convert_reso(n, in_reso, reso, round_ok=round_ok) + except (OutOfBoundsDatetime, OverflowError) as err: + # Catch OutOfBoundsDatetime bc convert_reso can call check_dts_bounds + # for Y/M-resolution cases + unit_str = npy_unit_to_abbrev(reso) + raise OutOfBoundsTimedelta( + f"Cannot cast {str(delta)} to unit={unit_str} without overflow." + ) from err + + +@cython.overflowcheck(True) +cdef object ensure_td64ns(object ts): + """ + Overflow-safe implementation of td64.astype("m8[ns]") + + Parameters + ---------- + ts : np.timedelta64 + + Returns + ------- + np.timedelta64[ns] + """ + cdef: + NPY_DATETIMEUNIT td64_unit + int64_t td64_value, mult + str unitstr + + td64_unit = get_datetime64_unit(ts) + if ( + td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns + and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + unitstr = npy_unit_to_abbrev(td64_unit) + + td64_value = get_timedelta64_value(ts) + + mult = precision_from_unit(unitstr)[0] + try: + # NB: cython#1381 this cannot be *= + td64_value = td64_value * mult + except OverflowError as err: + raise OutOfBoundsTimedelta(ts) from err + + return np.timedelta64(td64_value, "ns") + + return ts + + +cdef convert_to_timedelta64(object ts, str unit): + """ + Convert an incoming object to a timedelta64 if possible. + Before calling, unit must be standardized to avoid repeated unit conversion + + Handle these types of objects: + - timedelta/Timedelta + - timedelta64 + - an offset + - np.int64 (with unit providing a possible modifier) + - None/NaT + + Return an ns based int64 + """ + # Caller is responsible for checking unit not in ["Y", "y", "M"] + + if checknull_with_nat(ts): + return np.timedelta64(NPY_NAT, "ns") + elif isinstance(ts, _Timedelta): + # already in the proper format + if ts._reso != NPY_FR_ns: + raise NotImplementedError + ts = np.timedelta64(ts.value, "ns") + elif is_timedelta64_object(ts): + ts = ensure_td64ns(ts) + elif is_integer_object(ts): + if ts == NPY_NAT: + return np.timedelta64(NPY_NAT, "ns") + else: + ts = _maybe_cast_from_unit(ts, unit) + elif is_float_object(ts): + ts = _maybe_cast_from_unit(ts, unit) + elif isinstance(ts, str): + if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): + ts = parse_iso_format_string(ts) + else: + ts = parse_timedelta_string(ts) + ts = np.timedelta64(ts, "ns") + elif is_tick_object(ts): + ts = np.timedelta64(ts.nanos, "ns") + + if PyDelta_Check(ts): + ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") + elif not is_timedelta64_object(ts): + raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") + return ts.astype("timedelta64[ns]") + + +cdef _maybe_cast_from_unit(ts, str unit): + # caller is responsible for checking + # assert unit not in ["Y", "y", "M"] + try: + ts = cast_from_unit(ts, unit) + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot cast {ts} from {unit} to 'ns' without overflow." + ) from err + + ts = np.timedelta64(ts, "ns") + return ts + + +@cython.boundscheck(False) +@cython.wraparound(False) +def array_to_timedelta64( + ndarray values, str unit=None, str errors="raise" +) -> ndarray: + # values is object-dtype, may be 2D + """ + Convert an ndarray to an array of timedeltas. If errors == 'coerce', + coerce non-convertible objects to NaT. Otherwise, raise. + + Returns + ------- + np.ndarray[timedelta64ns] + """ + # Caller is responsible for checking + assert unit not in ["Y", "y", "M"] + + cdef: + Py_ssize_t i, n = values.size + ndarray result = np.empty((values).shape, dtype="m8[ns]") + object item + int64_t ival + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) + cnp.flatiter it + + if values.descr.type_num != cnp.NPY_OBJECT: + # raise here otherwise we segfault below + raise TypeError("array_to_timedelta64 'values' must have object dtype") + + if errors not in {'ignore', 'raise', 'coerce'}: + raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") + + if unit is not None and errors != "coerce": + it = cnp.PyArray_IterNew(values) + for i in range(n): + # Analogous to: item = values[i] + item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it)) + if isinstance(item, str): + raise ValueError( + "unit must not be specified if the input contains a str" + ) + cnp.PyArray_ITER_NEXT(it) + + # Usually, we have all strings. If so, we hit the fast path. + # If this path fails, we try conversion a different way, and + # this is where all of the error handling will take place. + try: + for i in range(n): + # Analogous to: item = values[i] + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + ival = _item_to_timedelta64_fastpath(item) + + # Analogous to: iresult[i] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + + except (TypeError, ValueError): + cnp.PyArray_MultiIter_RESET(mi) + + parsed_unit = parse_timedelta_unit(unit or 'ns') + for i in range(n): + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + ival = _item_to_timedelta64(item, parsed_unit, errors) + + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: + """ + See array_to_timedelta64. + """ + if item is NaT: + # we allow this check in the fast-path because NaT is a C-object + # so this is an inexpensive check + return NPY_NAT + else: + return parse_timedelta_string(item) + + +cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1: + """ + See array_to_timedelta64. + """ + try: + return get_timedelta64_value(convert_to_timedelta64(item, parsed_unit)) + except ValueError as err: + if errors == "coerce": + return NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{item}' to NumPy timedelta" + raise ValueError(msg) from err + else: + raise + + +cdef inline int64_t parse_timedelta_string(str ts) except? -1: + """ + Parse a regular format timedelta string. Return an int64_t (in ns) + or raise a ValueError on an invalid parse. + """ + + cdef: + unicode c + bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 + object current_unit = None + int64_t result = 0, m = 0, r + list number = [], frac = [], unit = [] + + # neg : tracks if we have a leading negative for the value + # have_dot : tracks if we are processing a dot (either post hhmmss or + # inside an expression) + # have_value : track if we have at least 1 leading unit + # have_hhmmss : tracks if we have a regular format hh:mm:ss + + if len(ts) == 0 or ts in nat_strings: + return NPY_NAT + + for c in ts: + + # skip whitespace / commas + if c == ' ' or c == ',': + pass + + # positive signs are ignored + elif c == '+': + pass + + # neg + elif c == '-': + + if neg or have_value or have_hhmmss: + raise ValueError("only leading negative signs are allowed") + + neg = 1 + + # number (ascii codes) + elif ord(c) >= 48 and ord(c) <= 57: + + if have_dot: + + # we found a dot, but now its just a fraction + if len(unit): + number.append(c) + have_dot = 0 + else: + frac.append(c) + + elif not len(unit): + number.append(c) + + else: + r = timedelta_from_spec(number, frac, unit) + unit, number, frac = [], [c], [] + + result += timedelta_as_neg(r, neg) + + # hh:mm:ss. + elif c == ':': + + # we flip this off if we have a leading value + if have_value: + neg = 0 + + # we are in the pattern hh:mm:ss pattern + if len(number): + if current_unit is None: + current_unit = 'h' + m = 1000000000 * 3600 + elif current_unit == 'h': + current_unit = 'm' + m = 1000000000 * 60 + elif current_unit == 'm': + current_unit = 's' + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_hhmmss = 1 + else: + raise ValueError(f"expecting hh:mm:ss format, received: {ts}") + + unit, number = [], [] + + # after the decimal point + elif c == '.': + + if len(number) and current_unit is not None: + + # by definition we had something like + # so we need to evaluate the final field from a + # hh:mm:ss (so current_unit is 'm') + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format before .") + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_value = 1 + unit, number, frac = [], [], [] + + have_dot = 1 + + # unit + else: + unit.append(c) + have_value = 1 + have_dot = 0 + + # we had a dot, but we have a fractional + # value since we have an unit + if have_dot and len(unit): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + + # we have a dot as part of a regular format + # e.g. hh:mm:ss.fffffff + elif have_dot: + + if ((len(number) or len(frac)) and not len(unit) + and current_unit is None): + raise ValueError("no units specified") + + if len(frac) > 0 and len(frac) <= 3: + m = 10**(3 -len(frac)) * 1000 * 1000 + elif len(frac) > 3 and len(frac) <= 6: + m = 10**(6 -len(frac)) * 1000 + elif len(frac) > 6 and len(frac) <= 9: + m = 10**(9 -len(frac)) + else: + m = 1 + frac = frac[:9] + r = int(''.join(frac)) * m + result += timedelta_as_neg(r, neg) + + # we have a regular format + # we must have seconds at this point (hence the unit is still 'm') + elif current_unit is not None: + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format") + m = 1000000000 + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + + # we have a last abbreviation + elif len(unit): + if len(number): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + else: + raise ValueError("unit abbreviation w/o a number") + + # we only have symbols and no numbers + elif len(number) == 0: + raise ValueError("symbols w/o a number") + + # treat as nanoseconds + # but only if we don't have anything else + else: + if have_value: + raise ValueError("have leftover units") + if len(number): + r = timedelta_from_spec(number, frac, 'ns') + result += timedelta_as_neg(r, neg) + + return result + + +cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): + """ + + Parameters + ---------- + value : int64_t of the timedelta value + neg : bool if the a negative value + """ + if neg: + return -value + return value + + +cdef inline timedelta_from_spec(object number, object frac, object unit): + """ + + Parameters + ---------- + number : a list of number digits + frac : a list of frac digits + unit : a list of unit characters + """ + cdef: + str n + + unit = ''.join(unit) + if unit in ["M", "Y", "y"]: + warnings.warn( + "Units 'M', 'Y' and 'y' do not represent unambiguous " + "timedelta values and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) + + n = ''.join(number) + '.' + ''.join(frac) + return cast_from_unit(float(n), unit) + + +cpdef inline str parse_timedelta_unit(str unit): + """ + Parameters + ---------- + unit : str or None + + Returns + ------- + str + Canonical unit string. + + Raises + ------ + ValueError : on non-parseable input + """ + if unit is None: + return "ns" + elif unit == "M": + return unit + try: + return timedelta_abbrevs[unit.lower()] + except KeyError: + raise ValueError(f"invalid unit abbreviation: {unit}") + +# ---------------------------------------------------------------------- +# Timedelta ops utilities + +cdef bint _validate_ops_compat(other): + # return True if we are compat with operating + if checknull_with_nat(other): + return True + elif is_any_td_scalar(other): + return True + elif isinstance(other, str): + return True + return False + + +def _op_unary_method(func, name): + def f(self): + new_value = func(self.value) + return _timedelta_from_value_and_reso(Timedelta, new_value, self._reso) + f.__name__ = name + return f + + +def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + if other is NaT: + return NaT + + elif is_datetime64_object(other) or ( + PyDateTime_Check(other) and not isinstance(other, ABCTimestamp) + ): + # this case is for a datetime object that is specifically + # *not* a Timestamp, as the Timestamp case will be + # handled after `_validate_ops_compat` returns False below + from pandas._libs.tslibs.timestamps import Timestamp + return op(self, Timestamp(other)) + # We are implicitly requiring the canonical behavior to be + # defined by Timestamp methods. + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return f(self, item) + + elif other.dtype.kind in ['m', 'M']: + return op(self.to_timedelta64(), other) + elif other.dtype.kind == 'O': + return np.array([op(self, x) for x in other]) + else: + return NotImplemented + + elif not _validate_ops_compat(other): + # Includes any of our non-cython classes + return NotImplemented + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + + if other is NaT: + # e.g. if original other was timedelta64('NaT') + return NaT + + # We allow silent casting to the lower resolution if and only + # if it is lossless. + try: + if self._reso < other._reso: + other = (<_Timedelta>other)._as_reso(self._reso, round_ok=False) + elif self._reso > other._reso: + self = (<_Timedelta>self)._as_reso(other._reso, round_ok=False) + except ValueError as err: + raise ValueError( + "Timedelta addition/subtraction with mismatched resolutions is not " + "allowed when casting to the lower resolution would require " + "lossy rounding." + ) from err + + res = op(self.value, other.value) + if res == NPY_NAT: + # e.g. test_implementation_limits + # TODO: more generally could do an overflowcheck in op? + return NaT + + return _timedelta_from_value_and_reso(Timedelta, res, reso=self._reso) + + f.__name__ = name + return f + + +# ---------------------------------------------------------------------- +# Timedelta Construction + +cdef inline int64_t parse_iso_format_string(str ts) except? -1: + """ + Extracts and cleanses the appropriate values from a match object with + groups for each component of an ISO 8601 duration + + Parameters + ---------- + ts: str + ISO 8601 Duration formatted string + + Returns + ------- + ns: int64_t + Precision in nanoseconds of matched ISO 8601 duration + + Raises + ------ + ValueError + If ``ts`` cannot be parsed + """ + + cdef: + unicode c + int64_t result = 0, r + int p = 0, sign = 1 + object dec_unit = 'ms', err_msg + bint have_dot = 0, have_value = 0, neg = 0 + list number = [], unit = [] + + err_msg = f"Invalid ISO 8601 Duration format - {ts}" + + if ts[0] == "-": + sign = -1 + ts = ts[1:] + + for c in ts: + # number (ascii codes) + if 48 <= ord(c) <= 57: + + have_value = 1 + if have_dot: + if p == 3 and dec_unit != 'ns': + unit.append(dec_unit) + if dec_unit == 'ms': + dec_unit = 'us' + elif dec_unit == 'us': + dec_unit = 'ns' + p = 0 + p += 1 + + if not len(unit): + number.append(c) + else: + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [c] + else: + if c == 'P' or c == 'T': + pass # ignore marking characters P and T + elif c == '-': + if neg or have_value: + raise ValueError(err_msg) + else: + neg = 1 + elif c == "+": + pass + elif c in ['W', 'D', 'H', 'M']: + if c in ['H', 'M'] and len(number) > 2: + raise ValueError(err_msg) + if c == 'M': + c = 'min' + unit.append(c) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [] + elif c == '.': + # append any seconds + if len(number): + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + unit, number = [], [] + have_dot = 1 + elif c == 'S': + if have_dot: # ms, us, or ns + if not len(number) or p > 3: + raise ValueError(err_msg) + # pad to 3 digits as required + pad = 3 - p + while pad > 0: + number.append('0') + pad -= 1 + + r = timedelta_from_spec(number, '0', dec_unit) + result += timedelta_as_neg(r, neg) + else: # seconds + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + else: + raise ValueError(err_msg) + + if not have_value: + # Received string only - never parsed any values + raise ValueError(err_msg) + + return sign*result + + +cdef _to_py_int_float(v): + # Note: This used to be defined inside Timedelta.__new__ + # but cython will not allow `cdef` functions to be defined dynamically. + if is_integer_object(v): + return int(v) + elif is_float_object(v): + return float(v) + raise TypeError(f"Invalid type {type(v)}. Must be int or float.") + + +def _timedelta_unpickle(value, reso): + return _timedelta_from_value_and_reso(Timedelta, value, reso) + + +cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): + # Could make this a classmethod if/when cython supports cdef classmethods + cdef: + _Timedelta td_base + + # For millisecond and second resos, we cannot actually pass int(value) because + # many cases would fall outside of the pytimedelta implementation bounds. + # We pass 0 instead, and override seconds, microseconds, days. + # In principle we could pass 0 for ns and us too. + if reso == NPY_FR_ns: + td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) + elif reso == NPY_DATETIMEUNIT.NPY_FR_us: + td_base = _Timedelta.__new__(cls, microseconds=int(value)) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + td_base = _Timedelta.__new__(cls, milliseconds=0) + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + td_base = _Timedelta.__new__(cls, seconds=0) + # Other resolutions are disabled but could potentially be implemented here: + # elif reso == NPY_DATETIMEUNIT.NPY_FR_m: + # td_base = _Timedelta.__new__(Timedelta, minutes=int(value)) + # elif reso == NPY_DATETIMEUNIT.NPY_FR_h: + # td_base = _Timedelta.__new__(Timedelta, hours=int(value)) + # elif reso == NPY_DATETIMEUNIT.NPY_FR_D: + # td_base = _Timedelta.__new__(Timedelta, days=int(value)) + else: + raise NotImplementedError( + "Only resolutions 's', 'ms', 'us', 'ns' are supported." + ) + + + td_base.value = value + td_base._is_populated = 0 + td_base._reso = reso + return td_base + + +class MinMaxReso: + """ + We need to define min/max/resolution on both the Timedelta _instance_ + and Timedelta class. On an instance, these depend on the object's _reso. + On the class, we default to the values we would get with nanosecond _reso. + """ + def __init__(self, name): + self._name = name + + def __get__(self, obj, type=None): + if self._name == "min": + val = np.iinfo(np.int64).min + 1 + elif self._name == "max": + val = np.iinfo(np.int64).max + else: + assert self._name == "resolution" + val = 1 + + if obj is None: + # i.e. this is on the class, default to nanos + return Timedelta(val) + else: + return Timedelta._from_value_and_reso(val, obj._reso) + + def __set__(self, obj, value): + raise AttributeError(f"{self._name} is not settable.") + + +# Similar to Timestamp/datetime, this is a construction requirement for +# timedeltas that we need to do object instantiation in python. This will +# serve as a C extension type that shadows the Python class, where we do any +# heavy lifting. +cdef class _Timedelta(timedelta): + # cdef readonly: + # int64_t value # nanoseconds + # bint _is_populated # are my components populated + # int64_t _d, _h, _m, _s, _ms, _us, _ns + # NPY_DATETIMEUNIT _reso + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + min = MinMaxReso("min") + max = MinMaxReso("max") + resolution = MinMaxReso("resolution") + + @property + def days(self) -> int: # TODO(cython3): make cdef property + # NB: using the python C-API PyDateTime_DELTA_GET_DAYS will fail + # (or be incorrect) + self._ensure_components() + return self._d + + @property + def seconds(self) -> int: # TODO(cython3): make cdef property + # NB: using the python C-API PyDateTime_DELTA_GET_SECONDS will fail + # (or be incorrect) + self._ensure_components() + return self._h * 3600 + self._m * 60 + self._s + + @property + def microseconds(self) -> int: # TODO(cython3): make cdef property + # NB: using the python C-API PyDateTime_DELTA_GET_MICROSECONDS will fail + # (or be incorrect) + self._ensure_components() + return self._ms * 1000 + self._us + + def total_seconds(self) -> float: + """Total seconds in the duration.""" + # We need to override bc we overrided days/seconds/microseconds + # TODO: add nanos/1e9? + return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000 + + @property + def freq(self) -> None: + """ + Freq property. + + .. deprecated:: 1.5.0 + This argument is deprecated. + """ + # GH#46430 + warnings.warn( + "Timedelta.freq is deprecated and will be removed in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + return None + + @property + def is_populated(self) -> bool: + """ + Is_populated property. + + .. deprecated:: 1.5.0 + This argument is deprecated. + """ + # GH#46430 + warnings.warn( + "Timedelta.is_populated is deprecated and will be removed in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._is_populated + + def __hash__(_Timedelta self): + if self._has_ns(): + # Note: this does *not* satisfy the invariance + # td1 == td2 \\Rightarrow hash(td1) == hash(td2) + # if td1 and td2 have different _resos. timedelta64 also has this + # non-invariant behavior. + # see GH#44504 + return hash(self.value) + else: + return timedelta.__hash__(self) + + def __richcmp__(_Timedelta self, object other, int op): + cdef: + _Timedelta ots + + if isinstance(other, _Timedelta): + ots = other + elif is_any_td_scalar(other): + ots = Timedelta(other) + # TODO: watch out for overflows + + elif other is NaT: + return op == Py_NE + + elif util.is_array(other): + if other.dtype.kind == "m": + return PyObject_RichCompare(self.asm8, other, op) + elif other.dtype.kind == "O": + # operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + if op == Py_EQ: + return np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + return np.ones(other.shape, dtype=bool) + return NotImplemented # let other raise TypeError + + else: + return NotImplemented + + if self._reso == ots._reso: + return cmp_scalar(self.value, ots.value, op) + return self._compare_mismatched_resos(ots, op) + + # TODO: re-use/share with Timestamp + cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op): + # Can't just dispatch to numpy as they silently overflow and get it wrong + cdef: + npy_datetimestruct dts_self + npy_datetimestruct dts_other + + # dispatch to the datetimestruct utils instead of writing new ones! + pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) + pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) + return cmp_dtstructs(&dts_self, &dts_other, op) + + cdef bint _has_ns(self): + if self._reso == NPY_FR_ns: + return self.value % 1000 != 0 + elif self._reso < NPY_FR_ns: + # i.e. seconds, millisecond, microsecond + return False + else: + raise NotImplementedError(self._reso) + + cdef _ensure_components(_Timedelta self): + """ + compute the components + """ + if self._is_populated: + return + + cdef: + pandas_timedeltastruct tds + + pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds) + self._d = tds.days + self._h = tds.hrs + self._m = tds.min + self._s = tds.sec + self._ms = tds.ms + self._us = tds.us + self._ns = tds.ns + self._seconds = tds.seconds + self._microseconds = tds.microseconds + + self._is_populated = 1 + + cpdef timedelta to_pytimedelta(_Timedelta self): + """ + Convert a pandas Timedelta object into a python ``datetime.timedelta`` object. + + Timedelta objects are internally saved as numpy datetime64[ns] dtype. + Use to_pytimedelta() to convert to object dtype. + + Returns + ------- + datetime.timedelta or numpy.array of datetime.timedelta + + See Also + -------- + to_timedelta : Convert argument to Timedelta type. + + Notes + ----- + Any nanosecond resolution will be lost. + """ + if self._reso == NPY_FR_ns: + return timedelta(microseconds=int(self.value) / 1000) + + # TODO(@WillAyd): is this the right way to use components? + self._ensure_components() + return timedelta( + days=self._d, seconds=self._seconds, microseconds=self._microseconds + ) + + def to_timedelta64(self) -> np.timedelta64: + """ + Return a numpy.timedelta64 object with 'ns' precision. + """ + cdef: + str abbrev = npy_unit_to_abbrev(self._reso) + # TODO: way to create a np.timedelta64 obj with the reso directly + # instead of having to get the abbrev? + return np.timedelta64(self.value, abbrev) + + def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: + """ + Convert the Timedelta to a NumPy timedelta64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timedelta.to_timedelta64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.timedelta64 + + See Also + -------- + Series.to_numpy : Similar method for Series. + """ + if dtype is not None or copy is not False: + raise ValueError( + "Timedelta.to_numpy dtype and copy arguments are ignored" + ) + return self.to_timedelta64() + + def view(self, dtype): + """ + Array view compatibility. + """ + return np.timedelta64(self.value).view(dtype) + + @property + def components(self): + """ + Return a components namedtuple-like. + """ + self._ensure_components() + # return the named tuple + return Components(self._d, self._h, self._m, self._s, + self._ms, self._us, self._ns) + + @property + def delta(self): + """ + Return the timedelta in nanoseconds (ns), for internal compatibility. + + .. deprecated:: 1.5.0 + This argument is deprecated. + + Returns + ------- + int + Timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 42 ns') + >>> td.delta + 86400000000042 + + >>> td = pd.Timedelta('3 s') + >>> td.delta + 3000000000 + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.delta + 3005000 + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.delta + 42 + """ + # Deprecated GH#46476 + warnings.warn( + "Timedelta.delta is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.value + + @property + def asm8(self) -> np.timedelta64: + """ + Return a numpy timedelta64 array scalar view. + + Provides access to the array scalar view (i.e. a combination of the + value and the units) associated with the numpy.timedelta64().view(), + including a 64-bit integer representation of the timedelta in + nanoseconds (Python int compatible). + + Returns + ------- + numpy timedelta64 array scalar view + Array scalar view of the timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.asm8 + numpy.timedelta64(86520000003042,'ns') + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.asm8 + numpy.timedelta64(123000000000,'ns') + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.asm8 + numpy.timedelta64(3005000,'ns') + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.asm8 + numpy.timedelta64(42,'ns') + """ + return self.to_timedelta64() + + @property + def resolution_string(self) -> str: + """ + Return a string representing the lowest timedelta resolution. + + Each timedelta has a defined resolution that represents the lowest OR + most granular level of precision. Each level of resolution is + represented by a short string as defined below: + + Resolution: Return value + + * Days: 'D' + * Hours: 'H' + * Minutes: 'T' + * Seconds: 'S' + * Milliseconds: 'L' + * Microseconds: 'U' + * Nanoseconds: 'N' + + Returns + ------- + str + Timedelta resolution. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.resolution_string + 'N' + + >>> td = pd.Timedelta('1 days 2 min 3 us') + >>> td.resolution_string + 'U' + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.resolution_string + 'S' + + >>> td = pd.Timedelta(36, unit='us') + >>> td.resolution_string + 'U' + """ + self._ensure_components() + if self._ns: + return "N" + elif self._us: + return "U" + elif self._ms: + return "L" + elif self._s: + return "S" + elif self._m: + return "T" + elif self._h: + return "H" + else: + return "D" + + @property + def nanoseconds(self): + """ + Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. + + Returns + ------- + int + Number of nanoseconds. + + See Also + -------- + Timedelta.components : Return all attributes with assigned values + (i.e. days, hours, minutes, seconds, milliseconds, microseconds, + nanoseconds). + + Examples + -------- + **Using string input** + + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + + >>> td.nanoseconds + 42 + + **Using integer input** + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.nanoseconds + 42 + """ + self._ensure_components() + return self._ns + + def _repr_base(self, format=None) -> str: + """ + + Parameters + ---------- + format : None|all|sub_day|long + + Returns + ------- + converted : string of a Timedelta + + """ + cdef: + str sign, fmt + dict comp_dict + object subs + + self._ensure_components() + + if self._d < 0: + sign = " +" + else: + sign = " " + + if format == 'all': + fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." + "{milliseconds:03}{microseconds:03}{nanoseconds:03}") + else: + # if we have a partial day + subs = (self._h or self._m or self._s or + self._ms or self._us or self._ns) + + if self._ms or self._us or self._ns: + seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}" + if self._ns: + # GH#9309 + seconds_fmt += "{nanoseconds:03}" + else: + seconds_fmt = "{seconds:02}" + + if format == 'sub_day' and not self._d: + fmt = "{hours:02}:{minutes:02}:" + seconds_fmt + elif subs or format == 'long': + fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt + else: + fmt = "{days} days" + + comp_dict = self.components._asdict() + comp_dict['sign'] = sign + + return fmt.format(**comp_dict) + + def __repr__(self) -> str: + repr_based = self._repr_base(format='long') + return f"Timedelta('{repr_based}')" + + def __str__(self) -> str: + return self._repr_base(format='long') + + def __bool__(self) -> bool: + return self.value != 0 + + def isoformat(self) -> str: + """ + Format the Timedelta as ISO 8601 Duration. + + ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the + values. See https://en.wikipedia.org/wiki/ISO_8601#Durations. + + Returns + ------- + str + + See Also + -------- + Timestamp.isoformat : Function is used to convert the given + Timestamp object into the ISO format. + + Notes + ----- + The longest component is days, whose value may be larger than + 365. + Every component is always included, even if its value is 0. + Pandas uses nanosecond precision, so up to 9 decimal places may + be included in the seconds component. + Trailing 0's are removed from the seconds component after the decimal. + We do not 0 pad components, so it's `...T5H...`, not `...T05H...` + + Examples + -------- + >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, + ... milliseconds=10, microseconds=10, nanoseconds=12) + + >>> td.isoformat() + 'P6DT0H50M3.010010012S' + >>> pd.Timedelta(hours=1, seconds=10).isoformat() + 'P0DT1H0M10S' + >>> pd.Timedelta(days=500.5).isoformat() + 'P500DT12H0M0S' + """ + components = self.components + seconds = (f'{components.seconds}.' + f'{components.milliseconds:0>3}' + f'{components.microseconds:0>3}' + f'{components.nanoseconds:0>3}') + # Trim unnecessary 0s, 1.000000000 -> 1 + seconds = seconds.rstrip('0').rstrip('.') + tpl = (f'P{components.days}DT{components.hours}' + f'H{components.minutes}M{seconds}S') + return tpl + + # ---------------------------------------------------------------- + # Constructors + + @classmethod + def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): + # exposing as classmethod for testing + return _timedelta_from_value_and_reso(cls, value, reso) + + def _as_unit(self, str unit, bint round_ok=True): + dtype = np.dtype(f"m8[{unit}]") + reso = get_unit_from_dtype(dtype) + try: + return self._as_reso(reso, round_ok=round_ok) + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot cast {self} to unit='{unit}' without overflow." + ) from err + + @cython.cdivision(False) + cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True): + cdef: + int64_t value, mult, div, mod + + if reso == self._reso: + return self + + value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + return type(self)._from_value_and_reso(value, reso=reso) + + +# Python front end to C extension type _Timedelta +# This serves as the box for timedelta64 + +class Timedelta(_Timedelta): + """ + Represents a duration, the difference between two dates or times. + + Timedelta is the pandas equivalent of python's ``datetime.timedelta`` + and is interchangeable with it in most cases. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, str, or int + unit : str, default 'ns' + Denote the unit of the input, if input is an integer. + + Possible values: + + * 'W', 'D', 'T', 'S', 'L', 'U', or 'N' + * 'days' or 'day' + * 'hours', 'hour', 'hr', or 'h' + * 'minutes', 'minute', 'min', or 'm' + * 'seconds', 'second', or 'sec' + * 'milliseconds', 'millisecond', 'millis', or 'milli' + * 'microseconds', 'microsecond', 'micros', or 'micro' + * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. + + **kwargs + Available kwargs: {days, seconds, microseconds, + milliseconds, minutes, hours, weeks}. + Values for construction in compat with datetime.timedelta. + Numpy ints and floats will be coerced to python ints and floats. + + Notes + ----- + The constructor may take in either both values of value and unit or + kwargs as above. Either one of them must be used during initialization + + The ``.value`` attribute is always in ns. + + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds. + + Examples + -------- + Here we initialize Timedelta object with both value and unit + + >>> td = pd.Timedelta(1, "d") + >>> td + Timedelta('1 days 00:00:00') + + Here we initialize the Timedelta object with kwargs + + >>> td2 = pd.Timedelta(days=1) + >>> td2 + Timedelta('1 days 00:00:00') + + We see that either way we get the same result + """ + + _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", + "milliseconds", "microseconds", "nanoseconds"} + + def __new__(cls, object value=_no_input, unit=None, **kwargs): + cdef _Timedelta td_base + + if value is _no_input: + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + unsupported_kwargs = set(kwargs) + unsupported_kwargs.difference_update(cls._req_any_kwargs_new) + if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) + + # GH43764, convert any input to nanoseconds first and then + # create the timestamp. This ensures that any potential + # nanosecond contributions from kwargs parsed as floats + # are taken into consideration. + seconds = int(( + ( + (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 + + kwargs.get('hours', 0) + ) * 3600 + + kwargs.get('minutes', 0) * 60 + + kwargs.get('seconds', 0) + ) * 1_000_000_000 + ) + + value = np.timedelta64( + int(kwargs.get('nanoseconds', 0)) + + int(kwargs.get('microseconds', 0) * 1_000) + + int(kwargs.get('milliseconds', 0) * 1_000_000) + + seconds + ) + + if unit in {'Y', 'y', 'M'}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + # GH 30543 if pd.Timedelta already passed, return it + # check that only value is passed + if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: + return value + elif isinstance(value, _Timedelta): + value = value.value + elif isinstance(value, str): + if unit is not None: + raise ValueError("unit must not be specified if the value is a str") + if (len(value) > 0 and value[0] == 'P') or ( + len(value) > 1 and value[:2] == '-P' + ): + value = parse_iso_format_string(value) + else: + value = parse_timedelta_string(value) + value = np.timedelta64(value) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + value = ensure_td64ns(value) + elif is_tick_object(value): + value = np.timedelta64(value.nanos, 'ns') + elif is_integer_object(value) or is_float_object(value): + # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) + value = convert_to_timedelta64(value, unit) + elif checknull_with_nat(value): + return NaT + else: + raise ValueError( + "Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}" + ) + + if is_timedelta64_object(value): + value = value.view('i8') + + # nat + if value == NPY_NAT: + return NaT + + return _timedelta_from_value_and_reso(cls, value, NPY_FR_ns) + + def __setstate__(self, state): + if len(state) == 1: + # older pickle, only supported nanosecond + value = state[0] + reso = NPY_FR_ns + else: + value, reso = state + self.value = value + self._reso = reso + + def __reduce__(self): + object_state = self.value, self._reso + return (_timedelta_unpickle, object_state) + + @cython.cdivision(True) + def _round(self, freq, mode): + cdef: + int64_t result, unit, remainder + ndarray[int64_t] arr + + from pandas._libs.tslibs.offsets import to_offset + + to_offset(freq).nanos # raises on non-fixed freq + unit = delta_to_nanoseconds(to_offset(freq), self._reso) + + arr = np.array([self.value], dtype="i8") + result = round_nsint64(arr, mode, unit)[0] + return Timedelta._from_value_and_reso(result, self._reso) + + def round(self, freq): + """ + Round the Timedelta to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, RoundTo.NEAREST_HALF_EVEN) + + def floor(self, freq): + """ + Return a new Timedelta floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + """ + return self._round(freq, RoundTo.MINUS_INFTY) + + def ceil(self, freq): + """ + Return a new Timedelta ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + """ + return self._round(freq, RoundTo.PLUS_INFTY) + + # ---------------------------------------------------------------- + # Arithmetic Methods + # TODO: Can some of these be defined in the cython class? + + __neg__ = _op_unary_method(lambda x: -x, '__neg__') + __pos__ = _op_unary_method(lambda x: x, '__pos__') + __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + + def __mul__(self, other): + if is_integer_object(other) or is_float_object(other): + if util.is_nan(other): + # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT + return NaT + + return _timedelta_from_value_and_reso( + Timedelta, + (other * self.value), + reso=self._reso, + ) + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return self.__mul__(item) + return other * self.to_timedelta64() + + return NotImplemented + + __rmul__ = __mul__ + + def __truediv__(self, other): + cdef: + int64_t new_value + + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + if other._reso != self._reso: + raise ValueError( + "division between Timedeltas with mismatched resolutions " + "are not supported. Explicitly cast to matching resolutions " + "before dividing." + ) + return self.value / float(other.value) + + elif is_integer_object(other) or is_float_object(other): + # integers or floats + if util.is_nan(other): + return NaT + return Timedelta._from_value_and_reso( + (self.value / other), self._reso + ) + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return self.__truediv__(item) + return self.to_timedelta64() / other + + return NotImplemented + + def __rtruediv__(self, other): + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + if self._reso != other._reso: + raise ValueError( + "division between Timedeltas with mismatched resolutions " + "are not supported. Explicitly cast to matching resolutions " + "before dividing." + ) + return float(other.value) / self.value + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return self.__rtruediv__(item) + elif other.dtype.kind == "O": + # GH#31869 + return np.array([x / self for x in other]) + + # TODO: if other.dtype.kind == "m" and other.dtype != self.asm8.dtype + # then should disallow for consistency with scalar behavior; requires + # deprecation cycle. (or changing scalar behavior) + return other / self.to_timedelta64() + + return NotImplemented + + def __floordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + if self._reso != other._reso: + raise ValueError( + "floordivision between Timedeltas with mismatched resolutions " + "are not supported. Explicitly cast to matching resolutions " + "before dividing." + ) + return self.value // other.value + + elif is_integer_object(other) or is_float_object(other): + if util.is_nan(other): + return NaT + return type(self)._from_value_and_reso(self.value // other, self._reso) + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return self.__floordiv__(item) + + if other.dtype.kind == 'm': + # also timedelta-like + if self._reso != NPY_FR_ns: + raise NotImplementedError + return _broadcast_floordiv_td64(self.value, other, _floordiv) + elif other.dtype.kind in ['i', 'u', 'f']: + if other.ndim == 0: + return self // other.item() + else: + return self.to_timedelta64() // other + + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + return NotImplemented + + def __rfloordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if _should_cast_to_timedelta(other): + # We interpret NaT as timedelta64("NaT") + other = Timedelta(other) + if other is NaT: + return np.nan + if self._reso != other._reso: + raise ValueError( + "floordivision between Timedeltas with mismatched resolutions " + "are not supported. Explicitly cast to matching resolutions " + "before dividing." + ) + return other.value // self.value + + elif is_array(other): + if other.ndim == 0: + # see also: item_from_zerodim + item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) + return self.__rfloordiv__(item) + + if other.dtype.kind == 'm': + # also timedelta-like + if self._reso != NPY_FR_ns: + raise NotImplementedError + return _broadcast_floordiv_td64(self.value, other, _rfloordiv) + + # Includes integer array // Timedelta, disallowed in GH#19761 + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + return NotImplemented + + def __mod__(self, other): + # Naive implementation, room for optimization + return self.__divmod__(other)[1] + + def __rmod__(self, other): + # Naive implementation, room for optimization + return self.__rdivmod__(other)[1] + + def __divmod__(self, other): + # Naive implementation, room for optimization + div = self // other + return div, self - div * other + + def __rdivmod__(self, other): + # Naive implementation, room for optimization + div = other // self + return div, other - div * self + + +cdef bint is_any_td_scalar(object obj): + """ + Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))` + + Parameters + ---------- + obj : object + + Returns + ------- + bool + """ + return ( + PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj) + ) + + +cdef bint _should_cast_to_timedelta(object obj): + """ + Should we treat this object as a Timedelta for the purpose of a binary op + """ + return ( + is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str) + ) + + +cdef _floordiv(int64_t value, right): + return value // right + + +cdef _rfloordiv(int64_t value, right): + # analogous to referencing operator.div, but there is no operator.rfloordiv + return right // value + + +cdef _broadcast_floordiv_td64( + int64_t value, + ndarray other, + object (*operation)(int64_t value, object right) +): + """ + Boilerplate code shared by Timedelta.__floordiv__ and + Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. + + Parameters + ---------- + value : int64_t; `self.value` from a Timedelta object + other : object + operation : function, either _floordiv or _rfloordiv + + Returns + ------- + result : varies based on `other` + """ + # assumes other.dtype.kind == 'm', i.e. other is timedelta-like + # assumes other.ndim != 0 + + # We need to watch out for np.timedelta64('NaT'). + mask = other.view('i8') == NPY_NAT + + res = operation(value, other.astype('m8[ns]', copy=False).astype('i8')) + + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd new file mode 100644 index 00000000..0ecb2682 --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -0,0 +1,40 @@ +from cpython.datetime cimport ( + datetime, + tzinfo, +) +from numpy cimport int64_t + +from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, +) +from pandas._libs.tslibs.offsets cimport BaseOffset + + +cdef _Timestamp create_timestamp_from_ts(int64_t value, + npy_datetimestruct dts, + tzinfo tz, + BaseOffset freq, + bint fold, + NPY_DATETIMEUNIT reso=*) + + +cdef class _Timestamp(ABCTimestamp): + cdef readonly: + int64_t value, nanosecond, year + BaseOffset _freq + NPY_DATETIMEUNIT _reso + + cdef bint _get_start_end_field(self, str field, freq) + cdef _get_date_name_field(self, str field, object locale) + cdef int64_t _maybe_convert_value_to_local(self) + cdef bint _can_compare(self, datetime other) + cpdef to_datetime64(self) + cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1 + cpdef void _set_freq(self, freq) + cdef _warn_on_field_deprecation(_Timestamp self, freq, str field) + cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op) + cdef _Timestamp _as_reso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*) diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi new file mode 100644 index 00000000..e4be7fda --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -0,0 +1,224 @@ +from datetime import ( + date as _date, + datetime, + time as _time, + timedelta, + tzinfo as _tzinfo, +) +from time import struct_time +from typing import ( + ClassVar, + TypeVar, + overload, +) + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + NaTType, + Period, + Tick, + Timedelta, +) + +_DatetimeT = TypeVar("_DatetimeT", bound=datetime) + +def integer_op_not_supported(obj: object) -> TypeError: ... + +class Timestamp(datetime): + min: ClassVar[Timestamp] + max: ClassVar[Timestamp] + + resolution: ClassVar[Timedelta] + value: int # np.int64 + # error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") + def __new__( # type: ignore[misc] + cls: type[_DatetimeT], + ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ..., + freq: int | None | str | BaseOffset = ..., + tz: str | _tzinfo | None | int = ..., + unit: str | int | None = ..., + year: int | None = ..., + month: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + microsecond: int | None = ..., + nanosecond: int | None = ..., + tzinfo: _tzinfo | None = ..., + *, + fold: int | None = ..., + ) -> _DatetimeT | NaTType: ... + def _set_freq(self, freq: BaseOffset | None) -> None: ... + @classmethod + def _from_value_and_reso( + cls, value: int, reso: int, tz: _tzinfo | None + ) -> Timestamp: ... + @property + def year(self) -> int: ... + @property + def month(self) -> int: ... + @property + def day(self) -> int: ... + @property + def hour(self) -> int: ... + @property + def minute(self) -> int: ... + @property + def second(self) -> int: ... + @property + def microsecond(self) -> int: ... + @property + def tzinfo(self) -> _tzinfo | None: ... + @property + def tz(self) -> _tzinfo | None: ... + @property + def fold(self) -> int: ... + @classmethod + def fromtimestamp( + cls: type[_DatetimeT], ts: float, tz: _tzinfo | None = ... + ) -> _DatetimeT: ... + @classmethod + def utcfromtimestamp(cls: type[_DatetimeT], ts: float) -> _DatetimeT: ... + @classmethod + def today(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ... + @classmethod + def fromordinal( + cls: type[_DatetimeT], + ordinal: int, + freq: str | BaseOffset | None = ..., + tz: _tzinfo | str | None = ..., + ) -> _DatetimeT: ... + @classmethod + def now(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ... + @classmethod + def utcnow(cls: type[_DatetimeT]) -> _DatetimeT: ... + # error: Signature of "combine" incompatible with supertype "datetime" + @classmethod + def combine( # type: ignore[override] + cls, date: _date, time: _time + ) -> datetime: ... + @classmethod + def fromisoformat(cls: type[_DatetimeT], date_string: str) -> _DatetimeT: ... + def strftime(self, format: str) -> str: ... + def __format__(self, fmt: str) -> str: ... + def toordinal(self) -> int: ... + def timetuple(self) -> struct_time: ... + def timestamp(self) -> float: ... + def utctimetuple(self) -> struct_time: ... + def date(self) -> _date: ... + def time(self) -> _time: ... + def timetz(self) -> _time: ... + # LSP violation: nanosecond is not present in datetime.datetime.replace + # and has positional args following it + def replace( # type: ignore[override] + self: _DatetimeT, + year: int | None = ..., + month: int | None = ..., + day: int | None = ..., + hour: int | None = ..., + minute: int | None = ..., + second: int | None = ..., + microsecond: int | None = ..., + nanosecond: int | None = ..., + tzinfo: _tzinfo | type[object] | None = ..., + fold: int | None = ..., + ) -> _DatetimeT: ... + # LSP violation: datetime.datetime.astimezone has a default value for tz + def astimezone( # type: ignore[override] + self: _DatetimeT, tz: _tzinfo | None + ) -> _DatetimeT: ... + def ctime(self) -> str: ... + def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ... + @classmethod + def strptime(cls, date_string: str, format: str) -> datetime: ... + def utcoffset(self) -> timedelta | None: ... + def tzname(self) -> str | None: ... + def dst(self) -> timedelta | None: ... + def __le__(self, other: datetime) -> bool: ... # type: ignore[override] + def __lt__(self, other: datetime) -> bool: ... # type: ignore[override] + def __ge__(self, other: datetime) -> bool: ... # type: ignore[override] + def __gt__(self, other: datetime) -> bool: ... # type: ignore[override] + # error: Signature of "__add__" incompatible with supertype "date"/"datetime" + @overload # type: ignore[override] + def __add__(self, other: np.ndarray) -> np.ndarray: ... + @overload + def __add__( + self: _DatetimeT, other: timedelta | np.timedelta64 | Tick + ) -> _DatetimeT: ... + def __radd__(self: _DatetimeT, other: timedelta) -> _DatetimeT: ... + @overload # type: ignore[override] + def __sub__(self, other: datetime) -> Timedelta: ... + @overload + def __sub__( + self: _DatetimeT, other: timedelta | np.timedelta64 | Tick + ) -> _DatetimeT: ... + def __hash__(self) -> int: ... + def weekday(self) -> int: ... + def isoweekday(self) -> int: ... + def isocalendar(self) -> tuple[int, int, int]: ... + @property + def is_leap_year(self) -> bool: ... + @property + def is_month_start(self) -> bool: ... + @property + def is_quarter_start(self) -> bool: ... + @property + def is_year_start(self) -> bool: ... + @property + def is_month_end(self) -> bool: ... + @property + def is_quarter_end(self) -> bool: ... + @property + def is_year_end(self) -> bool: ... + def to_pydatetime(self, warn: bool = ...) -> datetime: ... + def to_datetime64(self) -> np.datetime64: ... + def to_period(self, freq: BaseOffset | str | None = ...) -> Period: ... + def to_julian_date(self) -> np.float64: ... + @property + def asm8(self) -> np.datetime64: ... + def tz_convert(self: _DatetimeT, tz: _tzinfo | str | None) -> _DatetimeT: ... + # TODO: could return NaT? + def tz_localize( + self: _DatetimeT, + tz: _tzinfo | str | None, + ambiguous: str = ..., + nonexistent: str = ..., + ) -> _DatetimeT: ... + def normalize(self: _DatetimeT) -> _DatetimeT: ... + # TODO: round/floor/ceil could return NaT? + def round( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def floor( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def ceil( + self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ... + ) -> _DatetimeT: ... + def day_name(self, locale: str | None = ...) -> str: ... + def month_name(self, locale: str | None = ...) -> str: ... + @property + def day_of_week(self) -> int: ... + @property + def dayofweek(self) -> int: ... + @property + def day_of_year(self) -> int: ... + @property + def dayofyear(self) -> int: ... + @property + def quarter(self) -> int: ... + @property + def week(self) -> int: ... + def to_numpy( + self, dtype: np.dtype | None = ..., copy: bool = ... + ) -> np.datetime64: ... + @property + def _date_repr(self) -> str: ... + @property + def days_in_month(self) -> int: ... + @property + def daysinmonth(self) -> int: ... + def _as_unit(self, unit: str, round_ok: bool = ...) -> Timestamp: ... diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx new file mode 100644 index 00000000..a574b648 --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -0,0 +1,2420 @@ +""" +_Timestamp is a c-defined subclass of datetime.datetime + +_Timestamp is PITA. Because we inherit from datetime, which has very specific +construction requirements, we need to do object instantiation in python +(see Timestamp class below). This will serve as a C extension type that +shadows the python class, where we do any heavy lifting. +""" +import warnings + +cimport cython + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int8_t, + int64_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below + PyDate_Check, + PyDateTime_Check, + PyDelta_Check, + PyTZInfo_Check, + datetime, + import_datetime, + time, + tzinfo as tzinfo_type, +) +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, + PyObject_RichCompare, + PyObject_RichCompareBool, +) + +import_datetime() + +from pandas._libs.tslibs cimport ccalendar +from pandas._libs.tslibs.base cimport ABCTimestamp + +from pandas.util._exceptions import find_stack_level + +from pandas._libs.tslibs.conversion cimport ( + _TSObject, + convert_datetime_to_tsobject, + convert_to_tsobject, + maybe_localize_tso, +) +from pandas._libs.tslibs.dtypes cimport ( + npy_unit_to_abbrev, + periods_per_day, + periods_per_second, +) +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +from pandas._libs.tslibs.fields import ( + RoundTo, + get_date_name_field, + get_start_end_field, + round_nsint64, +) + +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + cmp_dtstructs, + cmp_scalar, + convert_reso, + get_conversion_factor, + get_datetime64_unit, + get_datetime64_value, + get_unit_from_dtype, + npy_datetimestruct, + npy_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, + pydatetime_to_dtstruct, +) + +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) + +from pandas._libs.tslibs.offsets cimport ( + BaseOffset, + is_offset_object, + to_offset, +) +from pandas._libs.tslibs.timedeltas cimport ( + _Timedelta, + delta_to_nanoseconds, + ensure_td64ns, + is_any_td_scalar, +) + +from pandas._libs.tslibs.timedeltas import Timedelta + +from pandas._libs.tslibs.timezones cimport ( + get_timezone, + is_utc, + maybe_get_tz, + treat_tz_as_pytz, + tz_compare, + utc_pytz as UTC, +) +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_from_utc_single, + tz_localize_to_utc_single, +) + +# ---------------------------------------------------------------------- +# Constants +_zero_time = time(0, 0) +_no_input = object() + +# ---------------------------------------------------------------------- + + +cdef inline _Timestamp create_timestamp_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + BaseOffset freq, + bint fold, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): + """ convenience routine to construct a Timestamp from its parts """ + cdef: + _Timestamp ts_base + int64_t pass_year = dts.year + + # We pass year=1970/1972 here and set year below because with non-nanosecond + # resolution we may have datetimes outside of the stdlib pydatetime + # implementation bounds, which would raise. + # NB: this means the C-API macro PyDateTime_GET_YEAR is unreliable. + if 1 <= pass_year <= 9999: + # we are in-bounds for pydatetime + pass + elif ccalendar.is_leapyear(dts.year): + pass_year = 1972 + else: + pass_year = 1970 + + ts_base = _Timestamp.__new__(Timestamp, pass_year, dts.month, + dts.day, dts.hour, dts.min, + dts.sec, dts.us, tz, fold=fold) + + ts_base.value = value + ts_base._freq = freq + ts_base.year = dts.year + ts_base.nanosecond = dts.ps // 1000 + ts_base._reso = reso + + return ts_base + + +def _unpickle_timestamp(value, freq, tz, reso=NPY_FR_ns): + # GH#41949 dont warn on unpickle if we have a freq + ts = Timestamp._from_value_and_reso(value, reso, tz) + ts._set_freq(freq) + return ts + + +# ---------------------------------------------------------------------- + +def integer_op_not_supported(obj): + # GH#22535 add/sub of integers and int-arrays is no longer allowed + # Note we return rather than raise the exception so we can raise in + # the caller; mypy finds this more palatable. + cls = type(obj).__name__ + + # GH#30886 using an fstring raises SystemError + int_addsub_msg = ( + f"Addition/subtraction of integers and integer-arrays with {cls} is " + "no longer supported. Instead of adding/subtracting `n`, " + "use `n * obj.freq`" + ) + return TypeError(int_addsub_msg) + + +class MinMaxReso: + """ + We need to define min/max/resolution on both the Timestamp _instance_ + and Timestamp class. On an instance, these depend on the object's _reso. + On the class, we default to the values we would get with nanosecond _reso. + + See also: timedeltas.MinMaxReso + """ + def __init__(self, name): + self._name = name + + def __get__(self, obj, type=None): + cls = Timestamp + if self._name == "min": + val = np.iinfo(np.int64).min + 1 + elif self._name == "max": + val = np.iinfo(np.int64).max + else: + assert self._name == "resolution" + val = 1 + cls = Timedelta + + if obj is None: + # i.e. this is on the class, default to nanos + return cls(val) + elif self._name == "resolution": + return Timedelta._from_value_and_reso(val, obj._reso) + else: + return Timestamp._from_value_and_reso(val, obj._reso, tz=None) + + def __set__(self, obj, value): + raise AttributeError(f"{self._name} is not settable.") + + +# ---------------------------------------------------------------------- + +cdef class _Timestamp(ABCTimestamp): + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + dayofweek = _Timestamp.day_of_week + dayofyear = _Timestamp.day_of_year + + min = MinMaxReso("min") + max = MinMaxReso("max") + resolution = MinMaxReso("resolution") # GH#21336, GH#21365 + + cpdef void _set_freq(self, freq): + # set the ._freq attribute without going through the constructor, + # which would issue a warning + # Caller is responsible for validation + self._freq = freq + + @property + def freq(self): + warnings.warn( + "Timestamp.freq is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._freq + + # ----------------------------------------------------------------- + # Constructors + + @classmethod + def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz): + cdef: + npy_datetimestruct dts + _TSObject obj = _TSObject() + + if value == NPY_NAT: + return NaT + + if reso < NPY_DATETIMEUNIT.NPY_FR_s or reso > NPY_DATETIMEUNIT.NPY_FR_ns: + raise NotImplementedError( + "Only resolutions 's', 'ms', 'us', 'ns' are supported." + ) + + obj.value = value + pandas_datetime_to_datetimestruct(value, reso, &obj.dts) + maybe_localize_tso(obj, tz, reso) + + return create_timestamp_from_ts( + value, obj.dts, tz=obj.tzinfo, freq=None, fold=obj.fold, reso=reso + ) + + @classmethod + def _from_dt64(cls, dt64: np.datetime64): + # construct a Timestamp from a np.datetime64 object, keeping the + # resolution of the input. + # This is herely mainly so we can incrementally implement non-nano + # (e.g. only tznaive at first) + cdef: + npy_datetimestruct dts + int64_t value + NPY_DATETIMEUNIT reso + + reso = get_datetime64_unit(dt64) + value = get_datetime64_value(dt64) + return cls._from_value_and_reso(value, reso, None) + + # ----------------------------------------------------------------- + + def __hash__(_Timestamp self): + if self.nanosecond: + return hash(self.value) + if not (1 <= self.year <= 9999): + # out of bounds for pydatetime + return hash(self.value) + if self.fold: + return datetime.__hash__(self.replace(fold=0)) + return datetime.__hash__(self) + + def __richcmp__(_Timestamp self, object other, int op): + cdef: + _Timestamp ots + int ndim + + if isinstance(other, _Timestamp): + ots = other + elif other is NaT: + return op == Py_NE + elif is_datetime64_object(other): + ots = _Timestamp._from_dt64(other) + elif PyDateTime_Check(other): + if self.nanosecond == 0: + val = self.to_pydatetime() + return PyObject_RichCompareBool(val, other, op) + + try: + ots = type(self)(other) + except ValueError: + return self._compare_outside_nanorange(other, op) + + elif is_array(other): + # avoid recursion error GH#15183 + if other.dtype.kind == "M": + if self.tz is None: + return PyObject_RichCompare(self.asm8, other, op) + elif op == Py_NE: + return np.ones(other.shape, dtype=np.bool_) + elif op == Py_EQ: + return np.zeros(other.shape, dtype=np.bool_) + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + elif other.dtype.kind == "O": + # Operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + elif op == Py_NE: + return np.ones(other.shape, dtype=np.bool_) + elif op == Py_EQ: + return np.zeros(other.shape, dtype=np.bool_) + return NotImplemented + + elif PyDate_Check(other): + # returning NotImplemented defers to the `date` implementation + # which incorrectly drops tz and normalizes to midnight + # before comparing + # We follow the stdlib datetime behavior of never being equal + warnings.warn( + "Comparison of Timestamp with datetime.date is deprecated in " + "order to match the standard library behavior. " + "In a future version these will be considered non-comparable. " + "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return NotImplemented + else: + return NotImplemented + + if not self._can_compare(ots): + if op == Py_NE or op == Py_EQ: + return NotImplemented + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + if self._reso == ots._reso: + return cmp_scalar(self.value, ots.value, op) + return self._compare_mismatched_resos(ots, op) + + # TODO: copied from Timedelta; try to de-duplicate + cdef inline bint _compare_mismatched_resos(self, _Timestamp other, int op): + # Can't just dispatch to numpy as they silently overflow and get it wrong + cdef: + npy_datetimestruct dts_self + npy_datetimestruct dts_other + + # dispatch to the datetimestruct utils instead of writing new ones! + pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self) + pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other) + return cmp_dtstructs(&dts_self, &dts_other, op) + + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef: + datetime dtval = self.to_pydatetime(warn=False) + + if not self._can_compare(other): + return NotImplemented + + if self.nanosecond == 0: + return PyObject_RichCompareBool(dtval, other, op) + + # otherwise we have dtval < self + if op == Py_NE: + return True + if op == Py_EQ: + return False + if op == Py_LE or op == Py_LT: + return self.year <= other.year + if op == Py_GE or op == Py_GT: + return self.year >= other.year + + cdef bint _can_compare(self, datetime other): + if self.tzinfo is not None: + return other.tzinfo is not None + return other.tzinfo is None + + @cython.overflowcheck(True) + def __add__(self, other): + cdef: + int64_t nanos = 0 + + if is_any_td_scalar(other): + if is_timedelta64_object(other): + other_reso = get_datetime64_unit(other) + if ( + other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + # TODO: deprecate allowing this? We only get here + # with test_timedelta_add_timestamp_interval + other = np.timedelta64(other.view("i8"), "ns") + elif ( + other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M + ): + # TODO: deprecate allowing these? or handle more like the + # corresponding DateOffsets? + # TODO: no tests get here + other = ensure_td64ns(other) + + if isinstance(other, _Timedelta): + # TODO: share this with __sub__, Timedelta.__add__ + # We allow silent casting to the lower resolution if and only + # if it is lossless. See also Timestamp.__sub__ + # and Timedelta.__add__ + try: + if self._reso < other._reso: + other = (<_Timedelta>other)._as_reso(self._reso, round_ok=False) + elif self._reso > other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + except ValueError as err: + raise ValueError( + "Timestamp addition with mismatched resolutions is not " + "allowed when casting to the lower resolution would require " + "lossy rounding." + ) from err + + try: + nanos = delta_to_nanoseconds( + other, reso=self._reso, round_ok=False + ) + except OutOfBoundsTimedelta: + raise + except ValueError as err: + raise ValueError( + "Addition between Timestamp and Timedelta with mismatched " + "resolutions is not allowed when casting to the lower " + "resolution would require lossy rounding." + ) from err + + try: + new_value = self.value + nanos + except OverflowError: + # Use Python ints + # Hit in test_tdi_add_overflow + new_value = int(self.value) + int(nanos) + + try: + result = type(self)._from_value_and_reso( + new_value, reso=self._reso, tz=self.tzinfo + ) + except OverflowError as err: + # TODO: don't hard-code nanosecond here + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {new_value}" + ) from err + + if result is not NaT: + result._set_freq(self._freq) # avoid warning in constructor + return result + + elif is_integer_object(other): + raise integer_op_not_supported(self) + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 + other + return np.asarray( + [self + other[n] for n in range(len(other))], + dtype=object, + ) + + elif not isinstance(self, _Timestamp): + # cython semantics, args have been switched and this is __radd__ + # TODO(cython3): remove this it moved to __radd__ + return other.__add__(self) + return NotImplemented + + def __radd__(self, other): + # Have to duplicate checks to avoid infinite recursion due to NotImplemented + if is_any_td_scalar(other) or is_integer_object(other) or is_array(other): + return self.__add__(other) + return NotImplemented + + def __sub__(self, other): + if other is NaT: + return NaT + + elif is_any_td_scalar(other) or is_integer_object(other): + neg_other = -other + return self + neg_other + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 - other + return np.asarray( + [self - other[n] for n in range(len(other))], + dtype=object, + ) + return NotImplemented + + # coerce if necessary if we are a Timestamp-like + if (PyDateTime_Check(self) + and (PyDateTime_Check(other) or is_datetime64_object(other))): + # both_timestamps is to determine whether Timedelta(self - other) + # should raise the OOB error, or fall back returning a timedelta. + # TODO(cython3): clean out the bits that moved to __rsub__ + both_timestamps = (isinstance(other, _Timestamp) and + isinstance(self, _Timestamp)) + if isinstance(self, _Timestamp): + other = type(self)(other) + else: + self = type(other)(self) + + if (self.tzinfo is None) ^ (other.tzinfo is None): + raise TypeError( + "Cannot subtract tz-naive and tz-aware datetime-like objects." + ) + + # We allow silent casting to the lower resolution if and only + # if it is lossless. + try: + if self._reso < other._reso: + other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) + elif self._reso > other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + except ValueError as err: + raise ValueError( + "Timestamp subtraction with mismatched resolutions is not " + "allowed when casting to the lower resolution would require " + "lossy rounding." + ) from err + + # scalar Timestamp/datetime - Timestamp/datetime -> yields a + # Timedelta + try: + res_value = self.value - other.value + return Timedelta._from_value_and_reso(res_value, self._reso) + except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: + if isinstance(other, _Timestamp): + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err + # We get here in stata tests, fall back to stdlib datetime + # method and return stdlib timedelta object + pass + elif is_datetime64_object(self): + # GH#28286 cython semantics for __rsub__, `other` is actually + # the Timestamp + # TODO(cython3): remove this, this moved to __rsub__ + return type(other)(self) - other + + return NotImplemented + + def __rsub__(self, other): + if PyDateTime_Check(other): + try: + return type(self)(other) - self + except (OverflowError, OutOfBoundsDatetime) as err: + # We get here in stata tests, fall back to stdlib datetime + # method and return stdlib timedelta object + pass + elif is_datetime64_object(other): + return type(self)(other) - self + return NotImplemented + + # ----------------------------------------------------------------- + + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val + tzinfo own_tz = self.tzinfo + npy_datetimestruct dts + + if own_tz is not None and not is_utc(own_tz): + pydatetime_to_dtstruct(self, &dts) + val = npy_datetimestruct_to_datetime(self._reso, &dts) + self.nanosecond + else: + val = self.value + return val + + @cython.boundscheck(False) + cdef bint _get_start_end_field(self, str field, freq): + cdef: + int64_t val + dict kwds + ndarray[uint8_t, cast=True] out + int month_kw + + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + freqstr = self._freqstr + else: + month_kw = 12 + freqstr = None + + val = self._maybe_convert_value_to_local() + + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw, self._reso) + return out[0] + + cdef _warn_on_field_deprecation(self, freq, str field): + """ + Warn if the removal of .freq change the value of start/end properties. + """ + cdef: + bint needs = False + + if freq is not None: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + freqstr = self._freqstr + if month_kw != 12: + needs = True + if freqstr.startswith("B"): + needs = True + + if needs: + warnings.warn( + "Timestamp.freq is deprecated and will be removed in a future " + "version. When you have a freq, use " + f"freq.{field}(timestamp) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + @property + def is_month_start(self) -> bool: + """ + Return True if date is first day of month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_month_start + False + + >>> ts = pd.Timestamp(2020, 1, 1) + >>> ts.is_month_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == 1 + self._warn_on_field_deprecation(self._freq, "is_month_start") + return self._get_start_end_field("is_month_start", self._freq) + + @property + def is_month_end(self) -> bool: + """ + Return True if date is last day of month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_month_end + False + + >>> ts = pd.Timestamp(2020, 12, 31) + >>> ts.is_month_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == self.days_in_month + self._warn_on_field_deprecation(self._freq, "is_month_end") + return self._get_start_end_field("is_month_end", self._freq) + + @property + def is_quarter_start(self) -> bool: + """ + Return True if date is first day of the quarter. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_quarter_start + False + + >>> ts = pd.Timestamp(2020, 4, 1) + >>> ts.is_quarter_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == 1 and self.month % 3 == 1 + self._warn_on_field_deprecation(self._freq, "is_quarter_start") + return self._get_start_end_field("is_quarter_start", self._freq) + + @property + def is_quarter_end(self) -> bool: + """ + Return True if date is last day of the quarter. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_quarter_end + False + + >>> ts = pd.Timestamp(2020, 3, 31) + >>> ts.is_quarter_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return (self.month % 3) == 0 and self.day == self.days_in_month + self._warn_on_field_deprecation(self._freq, "is_quarter_end") + return self._get_start_end_field("is_quarter_end", self._freq) + + @property + def is_year_start(self) -> bool: + """ + Return True if date is first day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_year_start + False + + >>> ts = pd.Timestamp(2020, 1, 1) + >>> ts.is_year_start + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.day == self.month == 1 + self._warn_on_field_deprecation(self._freq, "is_year_start") + return self._get_start_end_field("is_year_start", self._freq) + + @property + def is_year_end(self) -> bool: + """ + Return True if date is last day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_year_end + False + + >>> ts = pd.Timestamp(2020, 12, 31) + >>> ts.is_year_end + True + """ + if self._freq is None: + # fast-path for non-business frequencies + return self.month == 12 and self.day == 31 + self._warn_on_field_deprecation(self._freq, "is_year_end") + return self._get_start_end_field("is_year_end", self._freq) + + @cython.boundscheck(False) + cdef _get_date_name_field(self, str field, object locale): + cdef: + int64_t val + object[::1] out + + val = self._maybe_convert_value_to_local() + + out = get_date_name_field(np.array([val], dtype=np.int64), + field, locale=locale, reso=self._reso) + return out[0] + + def day_name(self, locale=None) -> str: + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.day_name() + 'Saturday' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.day_name() + nan + """ + return self._get_date_name_field("day_name", locale) + + def month_name(self, locale=None) -> str: + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : str, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.month_name() + 'March' + + Analogous for ``pd.NaT``: + + >>> pd.NaT.month_name() + nan + """ + return self._get_date_name_field("month_name", locale) + + @property + def is_leap_year(self) -> bool: + """ + Return True if year is a leap year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.is_leap_year + True + """ + return bool(ccalendar.is_leapyear(self.year)) + + @property + def day_of_week(self) -> int: + """ + Return day of the week. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.day_of_week + 5 + """ + return self.weekday() + + @property + def day_of_year(self) -> int: + """ + Return the day of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.day_of_year + 74 + """ + return ccalendar.get_day_of_year(self.year, self.month, self.day) + + @property + def quarter(self) -> int: + """ + Return the quarter of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.quarter + 1 + """ + return ((self.month - 1) // 3) + 1 + + @property + def week(self) -> int: + """ + Return the week number of the year. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.week + 11 + """ + return ccalendar.get_week_of_year(self.year, self.month, self.day) + + @property + def days_in_month(self) -> int: + """ + Return the number of days in the month. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14) + >>> ts.days_in_month + 31 + """ + return ccalendar.get_days_in_month(self.year, self.month) + + # ----------------------------------------------------------------- + # Transformation Methods + + def normalize(self) -> "Timestamp": + """ + Normalize Timestamp to midnight, preserving tz information. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14, 15, 30) + >>> ts.normalize() + Timestamp('2020-03-14 00:00:00') + """ + cdef: + local_val = self._maybe_convert_value_to_local() + int64_t normalized + int64_t ppd = periods_per_day(self._reso) + _Timestamp ts + + normalized = normalize_i8_stamp(local_val, ppd) + ts = type(self)._from_value_and_reso(normalized, reso=self._reso, tz=None) + return ts.tz_localize(self.tzinfo) + + # ----------------------------------------------------------------- + # Pickle Methods + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # https://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __setstate__(self, state): + self.value = state[0] + self._freq = state[1] + self.tzinfo = state[2] + + if len(state) == 3: + # pre-non-nano pickle + # TODO: no tests get here 2022-05-10 + reso = NPY_FR_ns + else: + reso = state[4] + self._reso = reso + + def __reduce__(self): + object_state = self.value, self._freq, self.tzinfo, self._reso + return (_unpickle_timestamp, object_state) + + # ----------------------------------------------------------------- + # Rendering Methods + + def isoformat(self, sep: str = "T", timespec: str = "auto") -> str: + """ + Return the time formatted according to ISO 8610. + + The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn'. + By default, the fractional part is omitted if self.microsecond == 0 + and self.nanosecond == 0. + + If self.tzinfo is not None, the UTC offset is also attached, giving + giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'. + + Parameters + ---------- + sep : str, default 'T' + String used as the separator between the date and time. + + timespec : str, default 'auto' + Specifies the number of additional terms of the time to include. + The valid values are 'auto', 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds', and 'nanoseconds'. + + Returns + ------- + str + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.isoformat() + '2020-03-14T15:32:52.192548651' + >>> ts.isoformat(timespec='microseconds') + '2020-03-14T15:32:52.192548' + """ + base_ts = "microseconds" if timespec == "nanoseconds" else timespec + base = super(_Timestamp, self).isoformat(sep=sep, timespec=base_ts) + # We need to replace the fake year 1970 with our real year + base = f"{self.year}-" + base.split("-", 1)[1] + + if self.nanosecond == 0 and timespec != "nanoseconds": + return base + + if self.tzinfo is not None: + base1, base2 = base[:-6], base[-6:] + else: + base1, base2 = base, "" + + if timespec == "nanoseconds" or (timespec == "auto" and self.nanosecond): + if self.microsecond: + base1 += f"{self.nanosecond:03d}" + else: + base1 += f".{self.nanosecond:09d}" + + return base1 + base2 + + def __repr__(self) -> str: + stamp = self._repr_base + zone = None + + try: + stamp += self.strftime('%z') + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime('%z') + + if self.tzinfo: + zone = get_timezone(self.tzinfo) + try: + stamp += zone.strftime(' %%Z') + except AttributeError: + # e.g. tzlocal has no `strftime` + pass + + tz = f", tz='{zone}'" if zone is not None else "" + freq = "" if self._freq is None else f", freq='{self._freqstr}'" + + return f"Timestamp('{stamp}'{tz}{freq})" + + @property + def _repr_base(self) -> str: + return f"{self._date_repr} {self._time_repr}" + + @property + def _date_repr(self) -> str: + # Ideal here would be self.strftime("%Y-%m-%d"), but + # the datetime strftime() methods require year >= 1900 and is slower + return f'{self.year}-{self.month:02d}-{self.day:02d}' + + @property + def _time_repr(self) -> str: + result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + + if self.nanosecond != 0: + result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + elif self.microsecond != 0: + result += f'.{self.microsecond:06d}' + + return result + + @property + def _short_repr(self) -> str: + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + + # ----------------------------------------------------------------- + # Conversion Methods + + @cython.cdivision(False) + cdef _Timestamp _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True): + cdef: + int64_t value, mult, div, mod + + if reso == self._reso: + return self + + value = convert_reso(self.value, self._reso, reso, round_ok=round_ok) + return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo) + + def _as_unit(self, str unit, bint round_ok=True): + dtype = np.dtype(f"M8[{unit}]") + reso = get_unit_from_dtype(dtype) + try: + return self._as_reso(reso, round_ok=round_ok) + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Cannot cast {self} to unit='{unit}' without overflow." + ) from err + + @property + def asm8(self) -> np.datetime64: + """ + Return numpy datetime64 format in nanoseconds. + + Examples + -------- + >>> ts = pd.Timestamp(2020, 3, 14, 15) + >>> ts.asm8 + numpy.datetime64('2020-03-14T15:00:00.000000000') + """ + return self.to_datetime64() + + def timestamp(self): + """ + Return POSIX timestamp as float. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.timestamp() + 1584199972.192548 + """ + # GH 17329 + # Note: Naive timestamps will not match datetime.stdlib + + denom = periods_per_second(self._reso) + + return round(self.value / denom, 6) + + cpdef datetime to_pydatetime(_Timestamp self, bint warn=True): + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548') + >>> ts.to_pydatetime() + datetime.datetime(2020, 3, 14, 15, 32, 52, 192548) + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_pydatetime() + NaT + """ + if self.nanosecond != 0 and warn: + warnings.warn("Discarding nonzero nanoseconds in conversion.", + UserWarning, stacklevel=find_stack_level()) + + return datetime(self.year, self.month, self.day, + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo, fold=self.fold) + + cpdef to_datetime64(self): + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + # TODO: find a way to construct dt64 directly from _reso + abbrev = npy_unit_to_abbrev(self._reso) + return np.datetime64(self.value, abbrev) + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.to_numpy() + numpy.datetime64('2020-03-14T15:32:52.192548651') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.to_numpy() + numpy.datetime64('NaT') + """ + if dtype is not None or copy is not False: + raise ValueError( + "Timestamp.to_numpy dtype and copy arguments are ignored." + ) + return self.to_datetime64() + + def to_period(self, freq=None): + """ + Return an period of which this timestamp is an observation. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> # Year end frequency + >>> ts.to_period(freq='Y') + Period('2020', 'A-DEC') + + >>> # Month end frequency + >>> ts.to_period(freq='M') + Period('2020-03', 'M') + + >>> # Weekly frequency + >>> ts.to_period(freq='W') + Period('2020-03-09/2020-03-15', 'W-SUN') + + >>> # Quarter end frequency + >>> ts.to_period(freq='Q') + Period('2020Q1', 'Q-DEC') + """ + from pandas import Period + + if self.tz is not None: + # GH#21333 + warnings.warn( + "Converting to Period representation will drop timezone information.", + UserWarning, + stacklevel=find_stack_level(), + ) + + if freq is None: + freq = self._freq + warnings.warn( + "In a future version, calling 'Timestamp.to_period()' without " + "passing a 'freq' will raise an exception.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return Period(self, freq=freq) + + +# ---------------------------------------------------------------------- + +# Python front end to C extension type _Timestamp +# This serves as the box for datetime64 + + +class Timestamp(_Timestamp): + """ + Pandas replacement for python datetime.datetime object. + + Timestamp is the pandas equivalent of python's Datetime + and is interchangeable with it in most cases. It's the type used + for the entries that make up a DatetimeIndex, and other timeseries + oriented data structures in pandas. + + Parameters + ---------- + ts_input : datetime-like, str, int, float + Value to be converted to Timestamp. + freq : str, DateOffset + Offset which Timestamp will have. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + unit : str + Unit used for conversion if ts_input is of type int or float. The + valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For + example, 's' means seconds and 'ms' means milliseconds. + year, month, day : int + hour, minute, second, microsecond : int, optional, default 0 + nanosecond : int, optional, default 0 + tzinfo : datetime.tzinfo, optional, default None + fold : {0, 1}, default None, keyword-only + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time. + + .. versionadded:: 1.1.0 + + Notes + ----- + There are essentially three calling conventions for the constructor. The + primary form accepts four parameters. They can be passed by position or + keyword. + + The other two forms mimic the parameters from ``datetime.datetime``. They + can be passed by either position or keyword, but not both mixed together. + + Examples + -------- + Using the primary calling convention: + + This converts a datetime-like string + + >>> pd.Timestamp('2017-01-01T12') + Timestamp('2017-01-01 12:00:00') + + This converts a float representing a Unix epoch in units of seconds + + >>> pd.Timestamp(1513393355.5, unit='s') + Timestamp('2017-12-16 03:02:35.500000') + + This converts an int representing a Unix-epoch in units of seconds + and for a particular timezone + + >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') + Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') + + Using the other two forms that mimic the API for ``datetime.datetime``: + + >>> pd.Timestamp(2017, 1, 1, 12) + Timestamp('2017-01-01 12:00:00') + + >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) + Timestamp('2017-01-01 12:00:00') + """ + + @classmethod + def fromordinal(cls, ordinal, freq=None, tz=None): + """ + Construct a timestamp from a a proleptic Gregorian ordinal. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + + Notes + ----- + By definition there cannot be any tz info on the ordinal itself. + + Examples + -------- + >>> pd.Timestamp.fromordinal(737425) + Timestamp('2020-01-01 00:00:00') + """ + return cls(datetime.fromordinal(ordinal), + freq=freq, tz=tz) + + @classmethod + def now(cls, tz=None): + """ + Return new Timestamp object representing current time local to tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.now() # doctest: +SKIP + Timestamp('2020-11-16 22:06:16.378782') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.now() + NaT + """ + if isinstance(tz, str): + tz = maybe_get_tz(tz) + return cls(datetime.now(tz)) + + @classmethod + def today(cls, tz=None): + """ + Return the current time in the local timezone. + + This differs from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + + Examples + -------- + >>> pd.Timestamp.today() # doctest: +SKIP + Timestamp('2020-11-16 22:37:39.969883') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.today() + NaT + """ + return cls.now(tz) + + @classmethod + def utcnow(cls): + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + + Examples + -------- + >>> pd.Timestamp.utcnow() # doctest: +SKIP + Timestamp('2020-11-16 22:50:18.092888+0000', tz='UTC') + """ + return cls.now(UTC) + + @classmethod + def utcfromtimestamp(cls, ts): + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.utcfromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + """ + # GH#22451 + warnings.warn( + "The behavior of Timestamp.utcfromtimestamp is deprecated, in a " + "future version will return a timezone-aware Timestamp with UTC " + "timezone. To keep the old behavior, use " + "Timestamp.utcfromtimestamp(ts).tz_localize(None). " + "To get the future behavior, use Timestamp.fromtimestamp(ts, 'UTC')", + FutureWarning, + stacklevel=find_stack_level(), + ) + return cls(datetime.utcfromtimestamp(ts)) + + @classmethod + def fromtimestamp(cls, ts, tz=None): + """ + Timestamp.fromtimestamp(ts) + + Transform timestamp[, tz] to tz's local time from POSIX timestamp. + + Examples + -------- + >>> pd.Timestamp.fromtimestamp(1584199972) + Timestamp('2020-03-14 15:32:52') + + Note that the output may change depending on your local time. + """ + tz = maybe_get_tz(tz) + return cls(datetime.fromtimestamp(ts, tz)) + + def strftime(self, format): + """ + Return a formatted string of the Timestamp. + + Parameters + ---------- + format : str + Format string to convert Timestamp to string. + See strftime documentation for more information on the format string: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts.strftime('%Y-%m-%d %X') + '2020-03-14 15:32:52' + """ + return datetime.strftime(self, format) + + # Issue 25016. + @classmethod + def strptime(cls, date_string, format): + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """ + raise NotImplementedError( + "Timestamp.strptime() is not implemented. " + "Use to_datetime() to parse date strings." + ) + + @classmethod + def combine(cls, date, time): + """ + Timestamp.combine(date, time) + + Combine date, time into datetime with same date and time fields. + + Examples + -------- + >>> from datetime import date, time + >>> pd.Timestamp.combine(date(2020, 3, 14), time(15, 30, 15)) + Timestamp('2020-03-14 15:30:15') + """ + return cls(datetime.combine(date, time)) + + def __new__( + cls, + object ts_input=_no_input, + object freq=None, + tz=None, + unit=None, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo_type tzinfo=None, + *, + fold=None, + ): + # The parameter list folds together legacy parameter names (the first + # four) and positional and keyword parameter names from pydatetime. + # + # There are three calling forms: + # + # - In the legacy form, the first parameter, ts_input, is required + # and may be datetime-like, str, int, or float. The second + # parameter, offset, is optional and may be str or DateOffset. + # + # - ints in the first, second, and third arguments indicate + # pydatetime positional arguments. Only the first 8 arguments + # (standing in for year, month, day, hour, minute, second, + # microsecond, tzinfo) may be non-None. As a shortcut, we just + # check that the second argument is an int. + # + # - Nones for the first four (legacy) arguments indicate pydatetime + # keyword arguments. year, month, and day are required. As a + # shortcut, we just check that the first argument was not passed. + # + # Mixing pydatetime positional and keyword arguments is forbidden! + + cdef: + _TSObject ts + tzinfo_type tzobj + + _date_attributes = [year, month, day, hour, minute, second, + microsecond, nanosecond] + + if tzinfo is not None: + # GH#17690 tzinfo must be a datetime.tzinfo object, ensured + # by the cython annotation. + if tz is not None: + if (is_integer_object(tz) + and is_integer_object(ts_input) + and is_integer_object(freq) + ): + # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) + # TODO(GH#45307): this will still be fragile to + # mixed-and-matched positional/keyword arguments + ts_input = datetime( + ts_input, + freq, + tz, + unit or 0, + year or 0, + month or 0, + day or 0, + fold=fold or 0, + ) + nanosecond = hour + tz = tzinfo + return cls(ts_input, nanosecond=nanosecond, tz=tz) + + raise ValueError('Can provide at most one of tz, tzinfo') + + # User passed tzinfo instead of tz; avoid silently ignoring + tz, tzinfo = tzinfo, None + + # Allow fold only for unambiguous input + if fold is not None: + if fold not in [0, 1]: + raise ValueError( + "Valid values for the fold argument are None, 0, or 1." + ) + + if (ts_input is not _no_input and not ( + PyDateTime_Check(ts_input) and + getattr(ts_input, 'tzinfo', None) is None)): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, " + "float, numpy.datetime64, str, or timezone-aware " + "datetime-like. Pass naive datetime-like or build " + "Timestamp from components." + ) + + if tz is not None and PyTZInfo_Check(tz) and treat_tz_as_pytz(tz): + raise ValueError( + "pytz timezones do not support fold. Please use dateutil " + "timezones." + ) + + if hasattr(ts_input, 'fold'): + ts_input = ts_input.replace(fold=fold) + + # GH 30543 if pd.Timestamp already passed, return it + # check that only ts_input is passed + # checking verbosely, because cython doesn't optimize + # list comprehensions (as of cython 0.29.x) + if (isinstance(ts_input, _Timestamp) and freq is None and + tz is None and unit is None and year is None and + month is None and day is None and hour is None and + minute is None and second is None and + microsecond is None and nanosecond is None and + tzinfo is None): + return ts_input + elif isinstance(ts_input, str): + # User passed a date string to parse. + # Check that the user didn't also pass a date attribute kwarg. + if any(arg is not None for arg in _date_attributes): + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string" + ) + + elif ts_input is _no_input: + # GH 31200 + # When year, month or day is not given, we call the datetime + # constructor to make sure we get the same error message + # since Timestamp inherits datetime + datetime_kwargs = { + "hour": hour or 0, + "minute": minute or 0, + "second": second or 0, + "microsecond": microsecond or 0, + "fold": fold or 0 + } + if year is not None: + datetime_kwargs["year"] = year + if month is not None: + datetime_kwargs["month"] = month + if day is not None: + datetime_kwargs["day"] = day + + ts_input = datetime(**datetime_kwargs) + + elif is_integer_object(freq): + # User passed positional arguments: + # Timestamp(year, month, day[, hour[, minute[, second[, + # microsecond[, nanosecond[, tzinfo]]]]]]) + ts_input = datetime(ts_input, freq, tz, unit or 0, + year or 0, month or 0, day or 0, fold=fold or 0) + nanosecond = hour + tz = minute + freq = None + unit = None + + if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " + "the tz parameter. Use tz_convert instead.") + + tzobj = maybe_get_tz(tz) + if tzobj is not None and is_datetime64_object(ts_input): + # GH#24559, GH#42288 In the future we will treat datetime64 as + # wall-time (consistent with DatetimeIndex) + warnings.warn( + "In a future version, when passing a np.datetime64 object and " + "a timezone to Timestamp, the datetime64 will be interpreted " + "as a wall time, not a UTC time. To interpret as a UTC time, " + "use `Timestamp(dt64).tz_localize('UTC').tz_convert(tz)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Once this deprecation is enforced, we can do + # return Timestamp(ts_input).tz_localize(tzobj) + ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond or 0) + + if ts.value == NPY_NAT: + return NaT + + if freq is None: + # GH 22311: Try to extract the frequency of a given Timestamp input + freq = getattr(ts_input, '_freq', None) + else: + warnings.warn( + "The 'freq' argument in Timestamp is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if not is_offset_object(freq): + freq = to_offset(freq) + + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) + + def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): + cdef: + int64_t nanos + + to_offset(freq).nanos # raises on non-fixed freq + nanos = delta_to_nanoseconds(to_offset(freq), self._reso) + + if self.tz is not None: + value = self.tz_localize(None).value + else: + value = self.value + + value = np.array([value], dtype=np.int64) + + # Will only ever contain 1 element for timestamp + r = round_nsint64(value, mode, nanos)[0] + result = Timestamp._from_value_and_reso(r, self._reso, None) + if self.tz is not None: + result = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + return result + + def round(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + + Notes + ----- + If the Timestamp has a timezone, rounding will take place relative to the + local ("wall") time and re-localized to the same timezone. When rounding + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be rounded using multiple frequency units: + + >>> ts.round(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.round(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.round(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.round(freq='L') # milliseconds + Timestamp('2020-03-14 15:32:52.193000') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.round(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.round(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.round() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.round("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.round("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round( + freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent + ) + + def floor(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, flooring will take place relative to the + local ("wall") time and re-localized to the same timezone. When flooring + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be floored using multiple frequency units: + + >>> ts.floor(freq='H') # hour + Timestamp('2020-03-14 15:00:00') + + >>> ts.floor(freq='T') # minute + Timestamp('2020-03-14 15:32:00') + + >>> ts.floor(freq='S') # seconds + Timestamp('2020-03-14 15:32:52') + + >>> ts.floor(freq='N') # nanoseconds + Timestamp('2020-03-14 15:32:52.192548651') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.floor(freq='5T') + Timestamp('2020-03-14 15:30:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.floor(freq='1H30T') + Timestamp('2020-03-14 15:00:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.floor() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 03:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.floor("2H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.floor("2H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Raises + ------ + ValueError if the freq cannot be converted. + + Notes + ----- + If the Timestamp has a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + + A timestamp can be ceiled using multiple frequency units: + + >>> ts.ceil(freq='H') # hour + Timestamp('2020-03-14 16:00:00') + + >>> ts.ceil(freq='T') # minute + Timestamp('2020-03-14 15:33:00') + + >>> ts.ceil(freq='S') # seconds + Timestamp('2020-03-14 15:32:53') + + >>> ts.ceil(freq='U') # microseconds + Timestamp('2020-03-14 15:32:52.192549') + + ``freq`` can also be a multiple of a single unit, like '5T' (i.e. 5 minutes): + + >>> ts.ceil(freq='5T') + Timestamp('2020-03-14 15:35:00') + + or a combination of multiple units, like '1H30T' (i.e. 1 hour and 30 minutes): + + >>> ts.ceil(freq='1H30T') + Timestamp('2020-03-14 16:30:00') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.ceil() + NaT + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> ts_tz = pd.Timestamp("2021-10-31 01:30:00").tz_localize("Europe/Amsterdam") + + >>> ts_tz.ceil("H", ambiguous=False) + Timestamp('2021-10-31 02:00:00+0100', tz='Europe/Amsterdam') + + >>> ts_tz.ceil("H", ambiguous=True) + Timestamp('2021-10-31 02:00:00+0200', tz='Europe/Amsterdam') + """ + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + @property + def tz(self): + """ + Alias for tzinfo. + + Examples + -------- + >>> ts = pd.Timestamp(1584226800, unit='s', tz='Europe/Stockholm') + >>> ts.tz + + """ + return self.tzinfo + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError( + "Cannot directly set timezone. " + "Use tz_localize() or tz_convert() as appropriate" + ) + + @property + def _freqstr(self): + return getattr(self._freq, "freqstr", self._freq) + + @property + def freqstr(self): + """ + Return the total number of days in the month. + """ + warnings.warn( + "Timestamp.freqstr is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._freqstr + + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): + """ + Localize the Timestamp to a timezone. + + Convert naive Timestamp to local time zone or remove + timezone from timezone-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + + Examples + -------- + Create a naive timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651') + + Add 'Europe/Stockholm' as timezone: + + >>> ts.tz_localize(tz='Europe/Stockholm') + Timestamp('2020-03-14 15:32:52.192548651+0100', tz='Europe/Stockholm') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_localize() + NaT + """ + if ambiguous == 'infer': + raise ValueError('Cannot infer offset with only one time.') + + nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') + if nonexistent not in nonexistent_options and not PyDelta_Check(nonexistent): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or a timedelta object" + ) + + if self.tzinfo is None: + # tz naive, localize + tz = maybe_get_tz(tz) + if not isinstance(ambiguous, str): + ambiguous = [ambiguous] + value = tz_localize_to_utc_single(self.value, tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + reso=self._reso) + elif tz is None: + # reset tz + value = tz_convert_from_utc_single(self.value, self.tz, reso=self._reso) + + else: + raise TypeError( + "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + ) + + out = type(self)._from_value_and_reso(value, self._reso, tz=tz) + if out is not NaT: + out._set_freq(self._freq) # avoid warning in constructor + return out + + def tz_convert(self, tz): + """ + Convert timezone-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + + Examples + -------- + Create a timestamp object with UTC timezone: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Change to Tokyo timezone: + + >>> ts.tz_convert(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Can also use ``astimezone``: + + >>> ts.astimezone(tz='Asia/Tokyo') + Timestamp('2020-03-15 00:32:52.192548651+0900', tz='Asia/Tokyo') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.tz_convert(tz='Asia/Tokyo') + NaT + """ + if self.tzinfo is None: + # tz naive, use tz_localize + raise TypeError( + "Cannot convert tz-naive Timestamp, use tz_localize to localize" + ) + else: + # Same UTC timestamp, different time zone + tz = maybe_get_tz(tz) + out = type(self)._from_value_and_reso(self.value, reso=self._reso, tz=tz) + if out is not NaT: + out._set_freq(self._freq) # avoid warning in constructor + return out + + astimezone = tz_convert + + def replace( + self, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=object, + fold=None, + ): + """ + Implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional + + Returns + ------- + Timestamp with fields replaced + + Examples + -------- + Create a timestamp object: + + >>> ts = pd.Timestamp('2020-03-14T15:32:52.192548651', tz='UTC') + >>> ts + Timestamp('2020-03-14 15:32:52.192548651+0000', tz='UTC') + + Replace year and the hour: + + >>> ts.replace(year=1999, hour=10) + Timestamp('1999-03-14 10:32:52.192548651+0000', tz='UTC') + + Replace timezone (not a conversion): + + >>> import pytz + >>> ts.replace(tzinfo=pytz.timezone('US/Pacific')) + Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific') + + Analogous for ``pd.NaT``: + + >>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific')) + NaT + """ + + cdef: + npy_datetimestruct dts + int64_t value + object k, v + datetime ts_input + tzinfo_type tzobj + + # set to naive if needed + tzobj = self.tzinfo + value = self.value + + # GH 37610. Preserve fold when replacing. + if fold is None: + fold = self.fold + + if tzobj is not None: + value = tz_convert_from_utc_single(value, tzobj, reso=self._reso) + + # setup components + pandas_datetime_to_datetimestruct(value, self._reso, &dts) + dts.ps = self.nanosecond * 1000 + + # replace + def validate(k, v): + """ validate integers """ + if not is_integer_object(v): + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) + return v + + if year is not None: + dts.year = validate('year', year) + if month is not None: + dts.month = validate('month', month) + if day is not None: + dts.day = validate('day', day) + if hour is not None: + dts.hour = validate('hour', hour) + if minute is not None: + dts.min = validate('minute', minute) + if second is not None: + dts.sec = validate('second', second) + if microsecond is not None: + dts.us = validate('microsecond', microsecond) + if nanosecond is not None: + dts.ps = validate('nanosecond', nanosecond) * 1000 + if tzinfo is not object: + tzobj = tzinfo + + # reconstruct & check bounds + if tzobj is not None and treat_tz_as_pytz(tzobj): + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 + ts_input = tzobj.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us), + is_dst=not bool(fold)) + tzobj = ts_input.tzinfo + else: + kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, + 'hour': dts.hour, 'minute': dts.min, 'second': dts.sec, + 'microsecond': dts.us, 'tzinfo': tzobj, + 'fold': fold} + ts_input = datetime(**kwargs) + + ts = convert_datetime_to_tsobject( + ts_input, tzobj, nanos=dts.ps // 1000, reso=self._reso + ) + return create_timestamp_from_ts( + ts.value, dts, tzobj, self._freq, fold, reso=self._reso + ) + + def to_julian_date(self) -> np.float64: + """ + Convert TimeStamp to a Julian Date. + + 0 Julian date is noon January 1, 4713 BC. + + Examples + -------- + >>> ts = pd.Timestamp('2020-03-14T15:32:52') + >>> ts.to_julian_date() + 2458923.147824074 + """ + year = self.year + month = self.month + day = self.day + if month <= 2: + year -= 1 + month += 12 + return (day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) / 24.0) + + def isoweekday(self): + """ + Return the day of the week represented by the date. + + Monday == 1 ... Sunday == 7. + """ + # same as super().isoweekday(), but that breaks because of how + # we have overriden year, see note in create_timestamp_from_ts + return self.weekday() + 1 + + def weekday(self): + """ + Return the day of the week represented by the date. + + Monday == 0 ... Sunday == 6. + """ + # same as super().weekday(), but that breaks because of how + # we have overriden year, see note in create_timestamp_from_ts + return ccalendar.dayofweek(self.year, self.month, self.day) + + +# Aliases +Timestamp.weekofyear = Timestamp.week +Timestamp.daysinmonth = Timestamp.days_in_month + + +# ---------------------------------------------------------------------- +# Scalar analogues to functions in vectorized.pyx + + +@cython.cdivision(False) +cdef inline int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd) nogil: + """ + Round the localized nanosecond timestamp down to the previous midnight. + + Parameters + ---------- + local_val : int64_t + ppd : int64_t + Periods per day in the Timestamp's resolution. + + Returns + ------- + int64_t + """ + return local_val - (local_val % ppd) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd new file mode 100644 index 00000000..c1a4e2bd --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pxd @@ -0,0 +1,23 @@ +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) + + +cdef tzinfo utc_pytz + +cpdef bint is_utc(tzinfo tz) +cdef bint is_tzlocal(tzinfo tz) +cdef bint is_zoneinfo(tzinfo tz) + +cdef bint treat_tz_as_pytz(tzinfo tz) + +cpdef bint tz_compare(tzinfo start, tzinfo end) +cpdef object get_timezone(tzinfo tz) +cpdef tzinfo maybe_get_tz(object tz) + +cdef timedelta get_utcoffset(tzinfo tz, datetime obj) +cpdef bint is_fixed_offset(tzinfo tz) + +cdef object get_dst_info(tzinfo tz) diff --git a/pandas/_libs/tslibs/timezones.pyi b/pandas/_libs/tslibs/timezones.pyi new file mode 100644 index 00000000..4e9f0c6a --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pyi @@ -0,0 +1,21 @@ +from datetime import ( + datetime, + tzinfo, +) +from typing import Callable + +import numpy as np + +# imported from dateutil.tz +dateutil_gettz: Callable[[str], tzinfo] + +def tz_standardize(tz: tzinfo) -> tzinfo: ... +def tz_compare(start: tzinfo | None, end: tzinfo | None) -> bool: ... +def infer_tzinfo( + start: datetime | None, + end: datetime | None, +) -> tzinfo | None: ... +def maybe_get_tz(tz: str | int | np.int64 | tzinfo | None) -> tzinfo | None: ... +def get_timezone(tz: tzinfo) -> tzinfo | str: ... +def is_utc(tz: tzinfo | None) -> bool: ... +def is_fixed_offset(tz: tzinfo) -> bool: ... diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx new file mode 100644 index 00000000..abf8bbc5 --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pyx @@ -0,0 +1,450 @@ +from datetime import ( + timedelta, + timezone, +) + +from pandas.compat._optional import import_optional_dependency + +try: + # py39+ + import zoneinfo + from zoneinfo import ZoneInfo +except ImportError: + zoneinfo = None + ZoneInfo = None + +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) + +# dateutil compat + +from dateutil.tz import ( + gettz as dateutil_gettz, + tzfile as _dateutil_tzfile, + tzlocal as _dateutil_tzlocal, + tzutc as _dateutil_tzutc, +) +import pytz +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo + +UTC = pytz.utc + + +import numpy as np + +cimport numpy as cnp +from numpy cimport int64_t + +cnp.import_array() + +# ---------------------------------------------------------------------- +from pandas._libs.tslibs.util cimport ( + get_nat, + is_integer_object, +) + + +cdef int64_t NPY_NAT = get_nat() +cdef tzinfo utc_stdlib = timezone.utc +cdef tzinfo utc_pytz = UTC +cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc() + +cdef tzinfo utc_zoneinfo = None + + +# ---------------------------------------------------------------------- + +cdef inline bint is_utc_zoneinfo(tzinfo tz): + # Workaround for cases with missing tzdata + # https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025 + if tz is None or zoneinfo is None: + return False + + global utc_zoneinfo + if utc_zoneinfo is None: + try: + utc_zoneinfo = ZoneInfo("UTC") + except zoneinfo.ZoneInfoNotFoundError: + return False + # Warn if tzdata is too old, even if there is a system tzdata to alert + # users about the mismatch between local/system tzdata + import_optional_dependency("tzdata", errors="warn", min_version="2022.1") + + return tz is utc_zoneinfo + + +cpdef inline bint is_utc(tzinfo tz): + return ( + tz is utc_pytz + or tz is utc_stdlib + or isinstance(tz, _dateutil_tzutc) + or tz is utc_dateutil_str + or is_utc_zoneinfo(tz) + ) + + +cdef inline bint is_zoneinfo(tzinfo tz): + if ZoneInfo is None: + return False + return isinstance(tz, ZoneInfo) + + +cdef inline bint is_tzlocal(tzinfo tz): + return isinstance(tz, _dateutil_tzlocal) + + +cdef inline bint treat_tz_as_pytz(tzinfo tz): + return (hasattr(tz, '_utc_transition_times') and + hasattr(tz, '_transition_info')) + + +cdef inline bint treat_tz_as_dateutil(tzinfo tz): + return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') + + +# Returns str or tzinfo object +cpdef inline object get_timezone(tzinfo tz): + """ + We need to do several things here: + 1) Distinguish between pytz and dateutil timezones + 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* + but a different tz object) + 3) Provide something to serialize when we're storing a datetime object + in pytables. + + We return a string prefaced with dateutil if it's a dateutil tz, else just + the tz name. It needs to be a string so that we can serialize it with + UJSON/pytables. maybe_get_tz (below) is the inverse of this process. + """ + if tz is None: + raise TypeError("tz argument cannot be None") + if is_utc(tz): + return tz + else: + if treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError( + 'Bad tz filename. Dateutil on python 3 on windows has a ' + 'bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones ' + 'implicitly by passing a string like "dateutil/Europe' + '/London" when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil/' + tz._filename + else: + # tz is a pytz timezone or unknown. + try: + zone = tz.zone + if zone is None: + return tz + return zone + except AttributeError: + return tz + + +cpdef inline tzinfo maybe_get_tz(object tz): + """ + (Maybe) Construct a timezone object from a string. If tz is a string, use + it to construct a timezone object. Otherwise, just return tz. + """ + if isinstance(tz, str): + if tz == 'tzlocal()': + tz = _dateutil_tzlocal() + elif tz.startswith('dateutil/'): + zone = tz[9:] + tz = dateutil_gettz(zone) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone + elif tz[0] in {'-', '+'}: + hours = int(tz[0:3]) + minutes = int(tz[0] + tz[4:6]) + tz = timezone(timedelta(hours=hours, minutes=minutes)) + elif tz[0:4] in {'UTC-', 'UTC+'}: + hours = int(tz[3:6]) + minutes = int(tz[3] + tz[7:9]) + tz = timezone(timedelta(hours=hours, minutes=minutes)) + else: + tz = pytz.timezone(tz) + elif is_integer_object(tz): + tz = pytz.FixedOffset(tz / 60) + elif isinstance(tz, tzinfo): + pass + elif tz is None: + pass + else: + raise TypeError(type(tz)) + return tz + + +def _p_tz_cache_key(tz: tzinfo): + """ + Python interface for cache function to facilitate testing. + """ + return tz_cache_key(tz) + + +# Timezone data caches, key is the pytz string or dateutil file name. +dst_cache = {} + + +cdef inline object tz_cache_key(tzinfo tz): + """ + Return the key in the cache for the timezone info object or None + if unknown. + + The key is currently the tz string for pytz timezones, the filename for + dateutil timezones. + + Notes + ----- + This cannot just be the hash of a timezone object. Unfortunately, the + hashes of two dateutil tz objects which represent the same timezone are + not equal (even though the tz objects will compare equal and represent + the same tz file). Also, pytz objects are not always hashable so we use + str(tz) instead. + """ + if isinstance(tz, _pytz_BaseTzInfo): + return tz.zone + elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on ' + 'windows has a bug which causes tzfile._filename ' + 'to be the same for all timezone files. Please ' + 'construct dateutil timezones implicitly by ' + 'passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil' + tz._filename + else: + return None + + +# ---------------------------------------------------------------------- +# UTC Offsets + + +cdef timedelta get_utcoffset(tzinfo tz, datetime obj): + try: + return tz._utcoffset + except AttributeError: + return tz.utcoffset(obj) + + +cpdef inline bint is_fixed_offset(tzinfo tz): + if treat_tz_as_dateutil(tz): + if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: + return 1 + else: + return 0 + elif treat_tz_as_pytz(tz): + if (len(tz._transition_info) == 0 + and len(tz._utc_transition_times) == 0): + return 1 + else: + return 0 + elif is_zoneinfo(tz): + return 0 + # This also implicitly accepts datetime.timezone objects which are + # considered fixed + return 1 + + +cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz): + """ + Transition times in dateutil timezones are stored in local non-dst + time. This code converts them to UTC. It's the reverse of the code + in dateutil.tz.tzfile.__init__. + """ + new_trans = list(tz._trans_list) + last_std_offset = 0 + for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): + if not tti.isdst: + last_std_offset = tti.offset + new_trans[i] = trans - last_std_offset + return new_trans + + +cdef int64_t[::1] unbox_utcoffsets(object transinfo): + cdef: + Py_ssize_t i, sz + int64_t[::1] arr + + sz = len(transinfo) + arr = np.empty(sz, dtype='i8') + + for i in range(sz): + arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 + + return arr + + +# ---------------------------------------------------------------------- +# Daylight Savings + + +cdef object get_dst_info(tzinfo tz): + """ + Returns + ------- + ndarray[int64_t] + Nanosecond UTC times of DST transitions. + ndarray[int64_t] + Nanosecond UTC offsets corresponding to DST transitions. + str + Describing the type of tzinfo object. + """ + cache_key = tz_cache_key(tz) + if cache_key is None: + # e.g. pytz.FixedOffset, matplotlib.dates._UTC, + # psycopg2.tz.FixedOffsetTimezone + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 + # If we have e.g. ZoneInfo here, the get_utcoffset call will return None, + # so the total_seconds() call will raise AttributeError. + return (np.array([NPY_NAT + 1], dtype=np.int64), + np.array([num], dtype=np.int64), + "unknown") + + if cache_key not in dst_cache: + if treat_tz_as_pytz(tz): + trans = np.array(tz._utc_transition_times, dtype='M8[ns]') + trans = trans.view('i8') + if tz._utc_transition_times[0].year == 1: + trans[0] = NPY_NAT + 1 + deltas = unbox_utcoffsets(tz._transition_info) + typ = 'pytz' + + elif treat_tz_as_dateutil(tz): + if len(tz._trans_list): + # get utc trans times + trans_list = _get_utc_trans_times_from_dateutil_tz(tz) + trans = np.hstack([ + np.array([0], dtype='M8[s]'), # place holder for 1st item + np.array(trans_list, dtype='M8[s]')]).astype( + 'M8[ns]') # all trans listed + trans = trans.view('i8') + trans[0] = NPY_NAT + 1 + + # deltas + deltas = np.array([v.offset for v in ( + tz._ttinfo_before,) + tz._trans_idx], dtype='i8') + deltas *= 1_000_000_000 + typ = 'dateutil' + + elif is_fixed_offset(tz): + trans = np.array([NPY_NAT + 1], dtype=np.int64) + deltas = np.array([tz._ttinfo_std.offset], + dtype='i8') * 1_000_000_000 + typ = 'fixed' + else: + # 2018-07-12 this is not reached in the tests, and this case + # is not handled in any of the functions that call + # get_dst_info. If this case _were_ hit the calling + # functions would then hit an IndexError because they assume + # `deltas` is non-empty. + # (under the just-deleted code that returned empty arrays) + raise AssertionError("dateutil tzinfo is not a FixedOffset " + "and has an empty `_trans_list`.", tz) + else: + # static tzinfo, we can get here with pytz.StaticTZInfo + # which are not caught by treat_tz_as_pytz + trans = np.array([NPY_NAT + 1], dtype=np.int64) + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 + deltas = np.array([num], dtype=np.int64) + typ = "static" + + dst_cache[cache_key] = (trans, deltas, typ) + + return dst_cache[cache_key] + + +def infer_tzinfo(datetime start, datetime end): + if start is not None and end is not None: + tz = start.tzinfo + if not tz_compare(tz, end.tzinfo): + raise AssertionError(f'Inputs must both have the same timezone, ' + f'{tz} != {end.tzinfo}') + elif start is not None: + tz = start.tzinfo + elif end is not None: + tz = end.tzinfo + else: + tz = None + return tz + + +cpdef bint tz_compare(tzinfo start, tzinfo end): + """ + Compare string representations of timezones + + The same timezone can be represented as different instances of + timezones. For example + `` and + `` are essentially same + timezones but aren't evaluated such, but the string representation + for both of these is `'Europe/Paris'`. + + This exists only to add a notion of equality to pytz-style zones + that is compatible with the notion of equality expected of tzinfo + subclasses. + + Parameters + ---------- + start : tzinfo + end : tzinfo + + Returns: + ------- + bool + """ + # GH 18523 + if is_utc(start): + # GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent + return is_utc(end) + elif is_utc(end): + # Ensure we don't treat tzlocal as equal to UTC when running in UTC + return False + elif start is None or end is None: + return start is None and end is None + return get_timezone(start) == get_timezone(end) + + +def tz_standardize(tz: tzinfo) -> tzinfo: + """ + If the passed tz is a pytz timezone object, "normalize" it to the a + consistent version + + Parameters + ---------- + tz : tzinfo + + Returns + ------- + tzinfo + + Examples + -------- + >>> from datetime import datetime + >>> from pytz import timezone + >>> tz = timezone('US/Pacific').normalize( + ... datetime(2014, 1, 1, tzinfo=pytz.utc) + ... ).tzinfo + >>> tz + + >>> tz_standardize(tz) + + + >>> tz = timezone('US/Pacific') + >>> tz + + >>> tz_standardize(tz) + + """ + if treat_tz_as_pytz(tz): + return pytz.timezone(str(tz)) + return tz diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd new file mode 100644 index 00000000..13735fb5 --- /dev/null +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -0,0 +1,39 @@ +from cpython.datetime cimport tzinfo +from numpy cimport ( + int64_t, + intp_t, + ndarray, +) + +from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT + + +cpdef int64_t tz_convert_from_utc_single( + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=* +) except? -1 +cdef int64_t tz_localize_to_utc_single( + int64_t val, + tzinfo tz, + object ambiguous=*, + object nonexistent=*, + NPY_DATETIMEUNIT reso=*, +) except? -1 + + +cdef class Localizer: + cdef: + tzinfo tz + NPY_DATETIMEUNIT _reso + bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz + ndarray trans + Py_ssize_t ntrans + const int64_t[::1] deltas + int64_t delta + int64_t* tdata + + cdef inline int64_t utc_val_to_local_val( + self, + int64_t utc_val, + Py_ssize_t* pos, + bint* fold=?, + ) except? -1 diff --git a/pandas/_libs/tslibs/tzconversion.pyi b/pandas/_libs/tslibs/tzconversion.pyi new file mode 100644 index 00000000..fab73f96 --- /dev/null +++ b/pandas/_libs/tslibs/tzconversion.pyi @@ -0,0 +1,21 @@ +from datetime import ( + timedelta, + tzinfo, +) +from typing import Iterable + +import numpy as np + +from pandas._typing import npt + +# tz_convert_from_utc_single exposed for testing +def tz_convert_from_utc_single( + val: np.int64, tz: tzinfo, reso: int = ... +) -> np.int64: ... +def tz_localize_to_utc( + vals: npt.NDArray[np.int64], + tz: tzinfo | None, + ambiguous: str | bool | Iterable[bool] | None = ..., + nonexistent: str | timedelta | np.timedelta64 | None = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx new file mode 100644 index 00000000..4487136a --- /dev/null +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -0,0 +1,725 @@ +""" +timezone conversion +""" +cimport cython +from cpython.datetime cimport ( + PyDelta_Check, + datetime, + datetime_new, + import_datetime, + timedelta, + tzinfo, +) +from cython cimport Py_ssize_t + +import_datetime() + +import numpy as np +import pytz + +cimport numpy as cnp +from numpy cimport ( + int64_t, + intp_t, + ndarray, + uint8_t, +) + +cnp.import_array() + +from pandas._libs.tslibs.dtypes cimport ( + periods_per_day, + periods_per_second, +) +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, +) +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, + is_fixed_offset, + is_tzlocal, + is_utc, + is_zoneinfo, + utc_pytz, +) + + +cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64) + + +@cython.freelist(16) +@cython.final +cdef class Localizer: + # cdef: + # tzinfo tz + # NPY_DATETIMEUNIT _reso + # bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz + # ndarray trans + # Py_ssize_t ntrans + # const int64_t[::1] deltas + # int64_t delta + # int64_t* tdata + + @cython.initializedcheck(False) + @cython.boundscheck(False) + def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso): + self.tz = tz + self._reso = reso + self.use_utc = self.use_tzlocal = self.use_fixed = False + self.use_dst = self.use_pytz = False + self.ntrans = -1 # placeholder + self.delta = -1 # placeholder + self.deltas = _deltas_placeholder + self.tdata = NULL + + if is_utc(tz) or tz is None: + self.use_utc = True + + elif is_tzlocal(tz) or is_zoneinfo(tz): + self.use_tzlocal = True + + else: + trans, deltas, typ = get_dst_info(tz) + if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + # NB: using floordiv here is implicitly assuming we will + # never see trans or deltas that are not an integer number + # of seconds. + # TODO: avoid these np.array calls + if reso == NPY_DATETIMEUNIT.NPY_FR_us: + trans = np.array(trans) // 1_000 + deltas = np.array(deltas) // 1_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + trans = np.array(trans) // 1_000_000 + deltas = np.array(deltas) // 1_000_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + trans = np.array(trans) // 1_000_000_000 + deltas = np.array(deltas) // 1_000_000_000 + else: + raise NotImplementedError(reso) + + self.trans = trans + self.ntrans = self.trans.shape[0] + self.deltas = deltas + + if typ != "pytz" and typ != "dateutil": + # static/fixed; in this case we know that len(delta) == 1 + self.use_fixed = True + self.delta = deltas[0] + else: + self.use_dst = True + if typ == "pytz": + self.use_pytz = True + self.tdata = cnp.PyArray_DATA(trans) + + @cython.boundscheck(False) + cdef inline int64_t utc_val_to_local_val( + self, int64_t utc_val, Py_ssize_t* pos, bint* fold=NULL + ) except? -1: + if self.use_utc: + return utc_val + elif self.use_tzlocal: + return utc_val + _tz_localize_using_tzinfo_api( + utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold + ) + elif self.use_fixed: + return utc_val + self.delta + else: + pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1 + if fold is not NULL: + fold[0] = _infer_dateutil_fold( + utc_val, self.trans, self.deltas, pos[0] + ) + + return utc_val + self.deltas[pos[0]] + + +cdef int64_t tz_localize_to_utc_single( + int64_t val, + tzinfo tz, + object ambiguous=None, + object nonexistent=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +) except? -1: + """See tz_localize_to_utc.__doc__""" + cdef: + int64_t delta + int64_t[::1] deltas + + if val == NPY_NAT: + return val + + elif is_utc(tz) or tz is None: + # TODO: test with non-nano + return val + + elif is_tzlocal(tz) or is_zoneinfo(tz): + return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso) + + elif is_fixed_offset(tz): + _, deltas, _ = get_dst_info(tz) + delta = deltas[0] + # TODO: de-duplicate with Localizer.__init__ + if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + if reso == NPY_DATETIMEUNIT.NPY_FR_us: + delta = delta // 1000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + delta = delta // 1_000_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + delta = delta // 1_000_000_000 + + return val - delta + + else: + return tz_localize_to_utc( + np.array([val], dtype="i8"), + tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + reso=reso, + )[0] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_localize_to_utc( + ndarray[int64_t] vals, + tzinfo tz, + object ambiguous=None, + object nonexistent=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): + """ + Localize tzinfo-naive i8 to given time zone (using pytz). If + there are ambiguities in the values, raise AmbiguousTimeError. + + Parameters + ---------- + vals : ndarray[int64_t] + tz : tzinfo or None + ambiguous : str, bool, or arraylike + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times, but the array must have the same length as vals) + - bool if True, treat all vals as DST. If False, treat them as non-DST + - 'NaT' will return NaT where there are ambiguous times + + nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ +timedelta-like} + How to handle non-existent times when converting wall times to UTC + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + localized : ndarray[int64_t] + """ + cdef: + ndarray[uint8_t, cast=True] ambiguous_array + Py_ssize_t i, idx, pos, n = vals.shape[0] + Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + int64_t v, left, right, val, new_local, remaining_mins + int64_t first_delta, delta + int64_t shift_delta = 0 + ndarray[int64_t] result_a, result_b, dst_hours + int64_t[::1] result + npy_datetimestruct dts + bint infer_dst = False, is_dst = False, fill = False + bint shift_forward = False, shift_backward = False + bint fill_nonexist = False + str stamp + Localizer info = Localizer(tz, reso=reso) + int64_t pph = periods_per_day(reso) // 24 + + # Vectorized version of DstTzInfo.localize + if info.use_utc: + return vals.copy() + + result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) + + if info.use_tzlocal: + for i in range(n): + v = vals[i] + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = v - _tz_localize_using_tzinfo_api( + v, tz, to_utc=True, reso=reso + ) + return result.base # to return underlying ndarray + + elif info.use_fixed: + delta = info.delta + for i in range(n): + v = vals[i] + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = v - delta + return result.base # to return underlying ndarray + + # silence false-positive compiler warning + ambiguous_array = np.empty(0, dtype=bool) + if isinstance(ambiguous, str): + if ambiguous == 'infer': + infer_dst = True + elif ambiguous == 'NaT': + fill = True + elif isinstance(ambiguous, bool): + is_dst = True + if ambiguous: + ambiguous_array = np.ones(len(vals), dtype=bool) + else: + ambiguous_array = np.zeros(len(vals), dtype=bool) + elif hasattr(ambiguous, '__iter__'): + is_dst = True + if len(ambiguous) != len(vals): + raise ValueError("Length of ambiguous bool-array must be " + "the same size as vals") + ambiguous_array = np.asarray(ambiguous, dtype=bool) + + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift_forward': + shift_forward = True + elif nonexistent == 'shift_backward': + shift_backward = True + elif PyDelta_Check(nonexistent): + from .timedeltas import delta_to_nanoseconds + shift_delta = delta_to_nanoseconds(nonexistent, reso=reso) + elif nonexistent not in ('raise', None): + msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " + "shift_backwards} or a timedelta object") + raise ValueError(msg) + + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) + result_a, result_b =_get_utc_bounds( + vals, info.tdata, info.ntrans, info.deltas, reso=reso + ) + + # silence false-positive compiler warning + dst_hours = np.empty(0, dtype=np.int64) + if infer_dst: + dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso) + + # Pre-compute delta_idx_offset that will be used if we go down non-existent + # paths. + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + first_delta = info.deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 + else: + delta_idx_offset = 0 + + for i in range(n): + val = vals[i] + left = result_a[i] + right = result_b[i] + if val == NPY_NAT: + # TODO: test with non-nano + result[i] = val + elif left != NPY_NAT and right != NPY_NAT: + if left == right: + # TODO: test with non-nano + result[i] = left + else: + if infer_dst and dst_hours[i] != NPY_NAT: + # TODO: test with non-nano + result[i] = dst_hours[i] + elif is_dst: + if ambiguous_array[i]: + result[i] = left + else: + result[i] = right + elif fill: + # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val, reso=reso) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp}, try using the " + "'ambiguous' argument" + ) + elif left != NPY_NAT: + result[i] = left + elif right != NPY_NAT: + # TODO: test with non-nano + result[i] = right + else: + # Handle nonexistent times + if shift_forward or shift_backward or shift_delta != 0: + # Shift the nonexistent time to the closest existing time + remaining_mins = val % pph + if shift_delta != 0: + # Validate that we don't relocalize on another nonexistent + # time + if -1 < shift_delta + remaining_mins < pph: + raise ValueError( + "The provided timedelta will relocalize on a " + f"nonexistent time: {nonexistent}" + ) + new_local = val + shift_delta + elif shift_forward: + new_local = val + (pph - remaining_mins) + else: + # Subtract 1 since the beginning hour is _inclusive_ of + # nonexistent times + new_local = val - remaining_mins - 1 + + delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) + + delta_idx = delta_idx - delta_idx_offset + result[i] = new_local - info.deltas[delta_idx] + elif fill_nonexist: + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val, reso=reso) + raise pytz.NonExistentTimeError(stamp) + + return result.base # .base to get underlying ndarray + + +cdef inline Py_ssize_t bisect_right_i8(int64_t *data, + int64_t val, Py_ssize_t n): + # Caller is responsible for checking n > 0 + # This looks very similar to local_search_right in the ndarray.searchsorted + # implementation. + cdef: + Py_ssize_t pivot, left = 0, right = n + + # edge cases + if val > data[n - 1]: + return n + + # Caller is responsible for ensuring 'val >= data[0]'. This is + # ensured by the fact that 'data' comes from get_dst_info where data[0] + # is *always* NPY_NAT+1. If that ever changes, we will need to restore + # the following disabled check. + # if val < data[0]: + # return 0 + + while left < right: + pivot = left + (right - left) // 2 + + if data[pivot] <= val: + left = pivot + 1 + else: + right = pivot + + return left + + +cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso): + """ Helper function to render exception messages""" + from pandas._libs.tslibs.timestamps import Timestamp + ts = Timestamp._from_value_and_reso(val, reso, None) + return str(ts) + + +cdef _get_utc_bounds( + ndarray vals, + int64_t* tdata, + Py_ssize_t ntrans, + const int64_t[::1] deltas, + NPY_DATETIMEUNIT reso, +): + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) + + cdef: + ndarray result_a, result_b + Py_ssize_t i, n = vals.size + int64_t val, v_left, v_right + Py_ssize_t isl, isr, pos_left, pos_right + int64_t ppd = periods_per_day(reso) + + result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) + result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) + + for i in range(n): + # This loops resembles the "Find the two best possibilities" block + # in pytz's DstTZInfo.localize method. + result_a[i] = NPY_NAT + result_b[i] = NPY_NAT + + val = vals[i] + if val == NPY_NAT: + continue + + # TODO: be careful of overflow in val-ppd + isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1 + if isl < 0: + isl = 0 + + v_left = val - deltas[isl] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + + # TODO: be careful of overflow in val+ppd + isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1 + if isr < 0: + isr = 0 + + v_right = val - deltas[isr] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == val: + result_b[i] = v_right + + return result_a, result_b + + +@cython.boundscheck(False) +cdef ndarray[int64_t] _get_dst_hours( + # vals, reso only needed here to potential render an exception message + const int64_t[:] vals, + ndarray[int64_t] result_a, + ndarray[int64_t] result_b, + NPY_DATETIMEUNIT reso, +): + cdef: + Py_ssize_t i, n = vals.shape[0] + ndarray[uint8_t, cast=True] mismatch + ndarray[int64_t] delta, dst_hours + ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff + list trans_grp + intp_t switch_idx + int64_t left, right + + dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0) + dst_hours[:] = NPY_NAT + + mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0) + + for i in range(n): + left = result_a[i] + right = result_b[i] + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + if left != right and left != NPY_NAT and right != NPY_NAT: + mismatch[i] = 1 + + trans_idx = mismatch.nonzero()[0] + + if trans_idx.size == 1: + # see test_tz_localize_to_utc_ambiguous_infer + stamp = _render_tstamp(vals[trans_idx[0]], reso=reso) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp} as there " + "are no repeated times" + ) + + # Split the array into contiguous chunks (where the difference between + # indices is 1). These are effectively dst transitions in different + # years which is useful for checking that there is not an ambiguous + # transition in an individual year. + if trans_idx.size > 0: + one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1 + trans_grp = np.array_split(trans_idx, one_diff) + + # Iterate through each day, if there are no hours where the + # delta is negative (indicates a repeat of hour) the switch + # cannot be inferred + for grp in trans_grp: + + delta = np.diff(result_a[grp]) + if grp.size == 1 or np.all(delta > 0): + # see test_tz_localize_to_utc_ambiguous_infer + stamp = _render_tstamp(vals[grp[0]], reso=reso) + raise pytz.AmbiguousTimeError(stamp) + + # Find the index for the switch and pull from a for dst and b + # for standard + switch_idxs = (delta <= 0).nonzero()[0] + if switch_idxs.size > 1: + # see test_tz_localize_to_utc_ambiguous_infer + raise pytz.AmbiguousTimeError( + f"There are {switch_idxs.size} dst switches when " + "there should only be 1." + ) + + switch_idx = switch_idxs[0] + 1 + # Pull the only index and adjust + a_idx = grp[:switch_idx] + b_idx = grp[switch_idx:] + dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) + + return dst_hours + + +# ---------------------------------------------------------------------- +# Timezone Conversion + +cpdef int64_t tz_convert_from_utc_single( + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns +) except? -1: + """ + Convert the val (in i8) from UTC to tz + + This is a single value version of tz_convert_from_utc. + + Parameters + ---------- + utc_val : int64 + tz : tzinfo + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + converted: int64 + """ + cdef: + Localizer info = Localizer(tz, reso=reso) + Py_ssize_t pos + + # Note: caller is responsible for ensuring utc_val != NPY_NAT + return info.utc_val_to_local_val(utc_val, &pos) + + +# OSError may be thrown by tzlocal on windows at or close to 1970-01-01 +# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 +cdef int64_t _tz_localize_using_tzinfo_api( + int64_t val, + tzinfo tz, + bint to_utc=True, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + bint* fold=NULL, +) except? -1: + """ + Convert the i8 representation of a datetime from a general-case timezone to + UTC, or vice-versa using the datetime/tzinfo API. + + Private, not intended for use outside of tslibs.tzconversion. + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting _to_ UTC, False if going the other direction. + reso : NPY_DATETIMEUNIT + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment. + Only passed with to_utc=False. + + Returns + ------- + delta : int64_t + Value to add when converting from utc, subtract when converting to utc. + + Notes + ----- + Sets fold by pointer + """ + cdef: + npy_datetimestruct dts + datetime dt + int64_t delta + timedelta td + int64_t pps = periods_per_second(reso) + + pandas_datetime_to_datetimestruct(val, reso, &dts) + + # datetime_new is cython-optimized constructor + if not to_utc: + # tz.utcoffset only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + dt = _astimezone(dts, tz) + + if fold is not NULL: + # NB: fold is only passed with to_utc=False + fold[0] = dt.fold + else: + dt = datetime_new(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, None) + + td = tz.utcoffset(dt) + delta = int(td.total_seconds() * pps) + return delta + + +cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz): + """ + Optimized equivalent to: + + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, utc_pytz) + dt = dt.astimezone(tz) + + Derived from the datetime.astimezone implementation at + https://github.com/python/cpython/blob/main/Modules/_datetimemodule.c#L6187 + + NB: we are assuming tz is not None. + """ + cdef: + datetime result + + result = datetime_new(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + return tz.fromutc(result) + + +# NB: relies on dateutil internals, subject to change. +@cython.boundscheck(False) +@cython.wraparound(False) +cdef bint _infer_dateutil_fold( + int64_t value, + const int64_t[::1] trans, + const int64_t[::1] deltas, + Py_ssize_t pos, +): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + value : int64_t + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. + pos : Py_ssize_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + """ + cdef: + bint fold = 0 + int64_t fold_delta + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if value - fold_delta < trans[pos]: + fold = 1 + + return fold diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd new file mode 100644 index 00000000..492b7d51 --- /dev/null +++ b/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,221 @@ + +from cpython.object cimport PyTypeObject + + +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + return PyUnicode_FromString(data); + } + """ + object char_to_string(const char* data) + + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + + # Note that following functions can potentially raise an exception, + # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can + # potentially allocate memory inside in unlikely case of when underlying + # unicode object was stored as non-utf8 and utf8 wasn't requested before. + const char* PyUnicode_AsUTF8AndSize(object obj, + Py_ssize_t* length) except NULL + +from numpy cimport ( + float64_t, + int64_t, +) + + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + + +cdef inline int64_t get_nat(): + return NPY_MIN_INT64 + + +# -------------------------------------------------------------------- +# Type Checking + +cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ + return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + and not is_timedelta64_object(obj)) + + +cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef inline bint is_bool_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_bool : bool + """ + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef inline bint is_real_number_object(object obj) nogil: + return is_bool_object(obj) or is_integer_object(obj) or is_float_object(obj) + + +cdef inline bint is_timedelta64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.timedelta64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_datetime64 : bool + """ + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef inline bint is_array(object val): + """ + Cython equivalent of `isinstance(val, np.ndarray)` + + Parameters + ---------- + val : object + + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) + + +cdef inline bint is_nan(object val): + """ + Check if val is a Not-A-Number float or complex, including + float('NaN') and np.nan. + + Parameters + ---------- + val : object + + Returns + ------- + is_nan : bool + """ + cdef float64_t fval + if is_float_object(val): + fval = val + return fval != fval + return is_complex_object(val) and val != val + + +cdef inline const char* get_c_string_buf_and_size(str py_string, + Py_ssize_t *length) except NULL: + """ + Extract internal char* buffer of unicode or bytes object `py_string` with + getting length of this internal buffer saved in `length`. + + Notes + ----- + Python object owns memory, thus returned char* must not be freed. + `length` can be NULL if getting buffer length is not needed. + + Parameters + ---------- + py_string : str + length : Py_ssize_t* + + Returns + ------- + buf : const char* + """ + return PyUnicode_AsUTF8AndSize(py_string, length) + + +cdef inline const char* get_c_string(str py_string) except NULL: + return get_c_string_buf_and_size(py_string, NULL) diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi new file mode 100644 index 00000000..d24541ae --- /dev/null +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -0,0 +1,46 @@ +""" +For cython types that cannot be represented precisely, closest-available +python equivalents are used, and the precise types kept as adjacent comments. +""" +from datetime import tzinfo + +import numpy as np + +from pandas._libs.tslibs.dtypes import Resolution +from pandas._libs.tslibs.offsets import BaseOffset +from pandas._typing import npt + +def dt64arr_to_periodarr( + stamps: npt.NDArray[np.int64], + freq: int, + tz: tzinfo | None, + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int64]: ... +def is_date_array_normalized( + stamps: npt.NDArray[np.int64], + tz: tzinfo | None, + reso: int, # NPY_DATETIMEUNIT +) -> bool: ... +def normalize_i8_timestamps( + stamps: npt.NDArray[np.int64], + tz: tzinfo | None, + reso: int, # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int64]: ... +def get_resolution( + stamps: npt.NDArray[np.int64], + tz: tzinfo | None = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> Resolution: ... +def ints_to_pydatetime( + arr: npt.NDArray[np.int64], + tz: tzinfo | None = ..., + freq: BaseOffset | None = ..., + fold: bool = ..., + box: str = ..., + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.object_]: ... +def tz_convert_from_utc( + stamps: npt.NDArray[np.int64], + tz: tzinfo | None, + reso: int = ..., # NPY_DATETIMEUNIT +) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx new file mode 100644 index 00000000..b63b4cf1 --- /dev/null +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -0,0 +1,393 @@ +cimport cython +from cpython.datetime cimport ( + date, + datetime, + time, + tzinfo, +) + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + int64_t, + intp_t, + ndarray, +) + +cnp.import_array() + +from .dtypes import Resolution + +from .dtypes cimport ( + c_Resolution, + periods_per_day, +) +from .nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from .np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, +) +from .offsets cimport BaseOffset +from .period cimport get_period_ordinal +from .timestamps cimport create_timestamp_from_ts +from .timezones cimport is_utc +from .tzconversion cimport Localizer + + +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns): + # stamps is int64_t, arbitrary ndim + """ + Convert the values (in i8) from UTC to tz + + Parameters + ---------- + stamps : ndarray[int64] + tz : tzinfo + + Returns + ------- + ndarray[int64] + """ + cdef: + Localizer info = Localizer(tz, reso=reso) + int64_t utc_val, local_val + Py_ssize_t pos, i, n = stamps.size + + ndarray result + cnp.broadcast mi + + if tz is None or is_utc(tz) or stamps.size == 0: + # Much faster than going through the "standard" pattern below + return stamps.copy() + + result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) + mi = cnp.PyArray_MultiIterNew2(result, stamps) + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if utc_val == NPY_NAT: + local_val = NPY_NAT + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) + + # Analogous to: result[i] = local_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = local_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +# ------------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +def ints_to_pydatetime( + ndarray stamps, + tzinfo tz=None, + BaseOffset freq=None, + bint fold=False, + str box="datetime", + NPY_DATETIMEUNIT reso=NPY_FR_ns, +) -> np.ndarray: + # stamps is int64, arbitrary ndim + """ + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. + + Parameters + ---------- + stamps : array of i8 + tz : str, optional + convert to this timezone + freq : BaseOffset, optional + freq to convert + fold : bint, default is 0 + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + * If datetime, convert to datetime.datetime + * If date, convert to datetime.date + * If time, convert to datetime.time + * If Timestamp, convert to pandas.Timestamp + + reso : NPY_DATETIMEUNIT, default NPY_FR_ns + + Returns + ------- + ndarray[object] of type specified by box + """ + cdef: + Localizer info = Localizer(tz, reso=reso) + int64_t utc_val, local_val + Py_ssize_t i, n = stamps.size + Py_ssize_t pos = -1 # unused, avoid not-initialized warning + + npy_datetimestruct dts + tzinfo new_tz + bint use_date = False, use_time = False, use_ts = False, use_pydt = False + object res_val + + # Note that `result` (and thus `result_flat`) is C-order and + # `it` iterates C-order as well, so the iteration matches + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0) + object[::1] res_flat = result.ravel() # should NOT be a copy + cnp.flatiter it = cnp.PyArray_IterNew(stamps) + + if box == "date": + assert (tz is None), "tz should be None when converting to date" + use_date = True + elif box == "timestamp": + use_ts = True + elif box == "time": + use_time = True + elif box == "datetime": + use_pydt = True + else: + raise ValueError( + "box must be one of 'datetime', 'date', 'time' or 'timestamp'" + ) + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_ITER_DATA(it))[0] + + new_tz = tz + + if utc_val == NPY_NAT: + res_val = NaT + + else: + + local_val = info.utc_val_to_local_val(utc_val, &pos) + if info.use_pytz: + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos]] + + pandas_datetime_to_datetimestruct(local_val, reso, &dts) + + if use_ts: + res_val = create_timestamp_from_ts( + utc_val, dts, new_tz, freq, fold, reso=reso + ) + elif use_pydt: + res_val = datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, + new_tz, fold=fold, + ) + elif use_date: + res_val = date(dts.year, dts.month, dts.day) + else: + res_val = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold) + + # Note: we can index result directly instead of using PyArray_MultiIter_DATA + # like we do for the other functions because result is known C-contiguous + # and is the first argument to PyArray_MultiIterNew2. The usual pattern + # does not seem to work with object dtype. + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + res_flat[i] = res_val + + cnp.PyArray_ITER_NEXT(it) + + return result + + +# ------------------------------------------------------------------------- + + +cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): + if dts.ps != 0: + return c_Resolution.RESO_NS + elif dts.us != 0: + if dts.us % 1000 == 0: + return c_Resolution.RESO_MS + return c_Resolution.RESO_US + elif dts.sec != 0: + return c_Resolution.RESO_SEC + elif dts.min != 0: + return c_Resolution.RESO_MIN + elif dts.hour != 0: + return c_Resolution.RESO_HR + return c_Resolution.RESO_DAY + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_resolution( + ndarray stamps, tzinfo tz=None, NPY_DATETIMEUNIT reso=NPY_FR_ns +) -> Resolution: + # stamps is int64_t, any ndim + cdef: + Localizer info = Localizer(tz, reso=reso) + int64_t utc_val, local_val + Py_ssize_t i, n = stamps.size + Py_ssize_t pos = -1 # unused, avoid not-initialized warning + cnp.flatiter it = cnp.PyArray_IterNew(stamps) + + npy_datetimestruct dts + c_Resolution pd_reso = c_Resolution.RESO_DAY, curr_reso + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it)) + + if utc_val == NPY_NAT: + pass + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) + + pandas_datetime_to_datetimestruct(local_val, reso, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < pd_reso: + pd_reso = curr_reso + + cnp.PyArray_ITER_NEXT(it) + + return Resolution(pd_reso) + + +# ------------------------------------------------------------------------- + + +@cython.cdivision(False) +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso): + # stamps is int64_t, arbitrary ndim + """ + Normalize each of the (nanosecond) timezone aware timestamps in the given + array by rounding down to the beginning of the day (i.e. midnight). + This is midnight for timezone, `tz`. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + reso : NPY_DATETIMEUNIT + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Localizer info = Localizer(tz, reso=reso) + int64_t utc_val, local_val, res_val + Py_ssize_t i, n = stamps.size + Py_ssize_t pos = -1 # unused, avoid not-initialized warning + + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps) + int64_t ppd = periods_per_day(reso) + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if utc_val == NPY_NAT: + res_val = NPY_NAT + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) + res_val = local_val - (local_val % ppd) + + # Analogous to: result[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -> bool: + # stamps is int64_t, arbitrary ndim + """ + Check if all of the given (nanosecond) timestamps are normalized to + midnight, i.e. hour == minute == second == 0. If the optional timezone + `tz` is not None, then this is midnight for this timezone. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + reso : NPY_DATETIMEUNIT + + Returns + ------- + is_normalized : bool True if all stamps are normalized + """ + cdef: + Localizer info = Localizer(tz, reso=reso) + int64_t utc_val, local_val + Py_ssize_t i, n = stamps.size + Py_ssize_t pos = -1 # unused, avoid not-initialized warning + cnp.flatiter it = cnp.PyArray_IterNew(stamps) + int64_t ppd = periods_per_day(reso) + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it)) + + local_val = info.utc_val_to_local_val(utc_val, &pos) + + if local_val % ppd != 0: + return False + + cnp.PyArray_ITER_NEXT(it) + + return True + + +# ------------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +def dt64arr_to_periodarr( + ndarray stamps, int freq, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns +): + # stamps is int64_t, arbitrary ndim + cdef: + Localizer info = Localizer(tz, reso=reso) + Py_ssize_t i, n = stamps.size + Py_ssize_t pos = -1 # unused, avoid not-initialized warning + int64_t utc_val, local_val, res_val + + npy_datetimestruct dts + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps) + + for i in range(n): + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if utc_val == NPY_NAT: + res_val = NPY_NAT + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) + pandas_datetime_to_datetimestruct(local_val, reso, &dts) + res_val = get_period_ordinal(&dts, freq) + + # Analogous to: result[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return result diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd new file mode 100644 index 00000000..18009a1a --- /dev/null +++ b/pandas/_libs/util.pxd @@ -0,0 +1,17 @@ +cimport numpy as cnp +from libc.stdint cimport ( + INT8_MAX, + INT8_MIN, + INT16_MAX, + INT16_MIN, + INT32_MAX, + INT32_MIN, + INT64_MAX, + INT64_MIN, + UINT8_MAX, + UINT16_MAX, + UINT32_MAX, + UINT64_MAX, +) + +from pandas._libs.tslibs.util cimport * diff --git a/pandas/_libs/window/__init__.py b/pandas/_libs/window/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/_libs/window/aggregations.pyi b/pandas/_libs/window/aggregations.pyi new file mode 100644 index 00000000..b926a7cb --- /dev/null +++ b/pandas/_libs/window/aggregations.pyi @@ -0,0 +1,127 @@ +from typing import ( + Any, + Callable, + Literal, +) + +import numpy as np + +from pandas._typing import ( + WindowingRankType, + npt, +) + +def roll_sum( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_mean( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_var( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + ddof: int = ..., +) -> np.ndarray: ... # np.ndarray[float] +def roll_skew( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_kurt( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_median_c( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_max( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_min( + values: np.ndarray, # np.ndarray[np.float64] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t +) -> np.ndarray: ... # np.ndarray[float] +def roll_quantile( + values: np.ndarray, # const float64_t[:] + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + quantile: float, # float64_t + interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"], +) -> np.ndarray: ... # np.ndarray[float] +def roll_rank( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + minp: int, + percentile: bool, + method: WindowingRankType, + ascending: bool, +) -> np.ndarray: ... # np.ndarray[float] +def roll_apply( + obj: object, + start: np.ndarray, # np.ndarray[np.int64] + end: np.ndarray, # np.ndarray[np.int64] + minp: int, # int64_t + function: Callable[..., Any], + raw: bool, + args: tuple[Any, ...], + kwargs: dict[str, Any], +) -> npt.NDArray[np.float64]: ... +def roll_weighted_sum( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, +) -> np.ndarray: ... # np.ndarray[np.float64] +def roll_weighted_mean( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, +) -> np.ndarray: ... # np.ndarray[np.float64] +def roll_weighted_var( + values: np.ndarray, # const float64_t[:] + weights: np.ndarray, # const float64_t[:] + minp: int, # int64_t + ddof: int, # unsigned int +) -> np.ndarray: ... # np.ndarray[np.float64] +def ewm( + vals: np.ndarray, # const float64_t[:] + start: np.ndarray, # const int64_t[:] + end: np.ndarray, # const int64_t[:] + minp: int, + com: float, # float64_t + adjust: bool, + ignore_na: bool, + deltas: np.ndarray, # const float64_t[:] + normalize: bool, +) -> np.ndarray: ... # np.ndarray[np.float64] +def ewmcov( + input_x: np.ndarray, # const float64_t[:] + start: np.ndarray, # const int64_t[:] + end: np.ndarray, # const int64_t[:] + minp: int, + input_y: np.ndarray, # const float64_t[:] + com: float, # float64_t + adjust: bool, + ignore_na: bool, + bias: bool, +) -> np.ndarray: ... # np.ndarray[np.float64] diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx new file mode 100644 index 00000000..68c05f2b --- /dev/null +++ b/pandas/_libs/window/aggregations.pyx @@ -0,0 +1,1940 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +cimport cython +from libc.math cimport ( + round, + signbit, + sqrt, +) +from libcpp.deque cimport deque + +from pandas._libs.algos cimport TiebreakEnumType + +import numpy as np + +cimport numpy as cnp +from numpy cimport ( + float32_t, + float64_t, + int64_t, + ndarray, +) + +cnp.import_array() + +from pandas._libs.algos import is_monotonic + +from pandas._libs.dtypes cimport numeric_t + + +cdef extern from "../src/skiplist.h": + ctypedef struct node_t: + node_t **next + int *width + double value + int is_nil + int levels + int ref_count + + ctypedef struct skiplist_t: + node_t *head + node_t **tmp_chain + int *tmp_steps + int size + int maxlevels + + skiplist_t* skiplist_init(int) nogil + void skiplist_destroy(skiplist_t*) nogil + double skiplist_get(skiplist_t*, int, int*) nogil + int skiplist_insert(skiplist_t*, double) nogil + int skiplist_remove(skiplist_t*, double) nogil + int skiplist_rank(skiplist_t*, double) nogil + int skiplist_min_rank(skiplist_t*, double) nogil + +cdef: + float32_t MINfloat32 = np.NINF + float64_t MINfloat64 = np.NINF + + float32_t MAXfloat32 = np.inf + float64_t MAXfloat64 = np.inf + + float64_t NaN = np.NaN + +cdef bint is_monotonic_increasing_start_end_bounds( + ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end +): + return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + +# ---------------------------------------------------------------------- +# Rolling sum + + +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x, + int64_t num_consecutive_same_value, float64_t prev_value + ) nogil: + cdef: + float64_t result + + if nobs == 0 == minp: + result = 0 + elif nobs >= minp: + if num_consecutive_same_value >= nobs: + result = prev_value * nobs + else: + result = sum_x + else: + result = NaN + + return result + + +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation, int64_t *num_consecutive_same_value, + float64_t *prev_value) nogil: + """ add a value from the sum calc using Kahan summation """ + + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] + 1 + y = val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + + # GH#42064, record num of same values to remove floating point artifacts + if val == prev_value[0]: + num_consecutive_same_value[0] += 1 + else: + # reset to 1 (include current value itself) + num_consecutive_same_value[0] = 1 + prev_value[0] = val + + +cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation) nogil: + """ remove a value from the sum calc using Kahan summation """ + + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] - 1 + y = - val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + + +def roll_sum(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t sum_x, compensation_add, compensation_remove, prev_value + int64_t s, e, num_consecutive_same_value + int64_t nobs = 0, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + # setup + prev_value = values[s] + num_consecutive_same_value = 0 + sum_x = compensation_add = compensation_remove = 0 + nobs = 0 + for j in range(s, e): + add_sum(values[j], &nobs, &sum_x, &compensation_add, + &num_consecutive_same_value, &prev_value) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + remove_sum(values[j], &nobs, &sum_x, &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_sum(values[j], &nobs, &sum_x, &compensation_add, + &num_consecutive_same_value, &prev_value) + + output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 + + return output + + +# ---------------------------------------------------------------------- +# Rolling mean + + +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct, + float64_t sum_x, int64_t num_consecutive_same_value, + float64_t prev_value) nogil: + cdef: + float64_t result + + if nobs >= minp and nobs > 0: + result = sum_x / nobs + if num_consecutive_same_value >= nobs: + result = prev_value + elif neg_ct == 0 and result < 0: + # all positive + result = 0 + elif neg_ct == nobs and result > 0: + # all negative + result = 0 + else: + pass + else: + result = NaN + return result + + +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct, float64_t *compensation, + int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: + """ add a value from the mean calc using Kahan summation """ + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] + 1 + y = val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + if signbit(val): + neg_ct[0] = neg_ct[0] + 1 + + # GH#42064, record num of same values to remove floating point artifacts + if val == prev_value[0]: + num_consecutive_same_value[0] += 1 + else: + # reset to 1 (include current value itself) + num_consecutive_same_value[0] = 1 + prev_value[0] = val + + +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct, float64_t *compensation) nogil: + """ remove a value from the mean calc using Kahan summation """ + cdef: + float64_t y, t + + if val == val: + nobs[0] = nobs[0] - 1 + y = - val - compensation[0] + t = sum_x[0] + y + compensation[0] = t - sum_x[0] - y + sum_x[0] = t + if signbit(val): + neg_ct[0] = neg_ct[0] - 1 + + +def roll_mean(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + float64_t val, compensation_add, compensation_remove, sum_x, prev_value + int64_t s, e, num_consecutive_same_value + Py_ssize_t nobs, i, j, neg_ct, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + # setup + compensation_add = compensation_remove = sum_x = 0 + nobs = neg_ct = 0 + prev_value = values[s] + num_consecutive_same_value = 0 + for j in range(s, e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, + &num_consecutive_same_value, &prev_value) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, + &num_consecutive_same_value, &prev_value) + + output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value) + + if not is_monotonic_increasing_bounds: + nobs = 0 + neg_ct = 0 + sum_x = 0.0 + compensation_remove = 0.0 + return output + +# ---------------------------------------------------------------------- +# Rolling variance + + +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case & repeatedly same values case + if nobs == 1 or num_consecutive_same_value >= nobs: + result = 0 + else: + result = ssqdm_x / (nobs - ddof) + else: + result = NaN + + return result + + +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x, float64_t *compensation, + int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: + """ add a value from the var calc """ + cdef: + float64_t delta, prev_mean, y, t + + # GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan` + if val != val: + return + + nobs[0] = nobs[0] + 1 + + # GH#42064, record num of same values to remove floating point artifacts + if val == prev_value[0]: + num_consecutive_same_value[0] += 1 + else: + # reset to 1 (include current value itself) + num_consecutive_same_value[0] = 1 + prev_value[0] = val + + # Welford's method for the online variance-calculation + # using Kahan summation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + prev_mean = mean_x[0] - compensation[0] + y = val - compensation[0] + t = y - mean_x[0] + compensation[0] = t + mean_x[0] - y + delta = t + if nobs[0]: + mean_x[0] = mean_x[0] + delta / nobs[0] + else: + mean_x[0] = 0 + ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) + + +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x, float64_t *compensation) nogil: + """ remove a value from the var calc """ + cdef: + float64_t delta, prev_mean, y, t + if val == val: + nobs[0] = nobs[0] - 1 + if nobs[0]: + # Welford's method for the online variance-calculation + # using Kahan summation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + prev_mean = mean_x[0] - compensation[0] + y = val - compensation[0] + t = y - mean_x[0] + compensation[0] = t + mean_x[0] - y + delta = t + mean_x[0] = mean_x[0] - delta / nobs[0] + ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0]) + else: + mean_x[0] = 0 + ssqdm_x[0] = 0 + + +def roll_var(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray: + """ + Numerically stable implementation using Welford's method. + """ + cdef: + float64_t mean_x, ssqdm_x, nobs, compensation_add, + float64_t compensation_remove, prev_value + int64_t s, e, num_consecutive_same_value + Py_ssize_t i, j, N = len(start) + ndarray[float64_t] output + bint is_monotonic_increasing_bounds + + minp = max(minp, 1) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + + with nogil: + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + prev_value = values[s] + num_consecutive_same_value = 0 + + mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0 + for j in range(s, e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, + &num_consecutive_same_value, &prev_value) + + else: + + # After the first window, observations can both be added + # and removed + + # calculate deletes + for j in range(start[i - 1], s): + remove_var(values[j], &nobs, &mean_x, &ssqdm_x, + &compensation_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, + &num_consecutive_same_value, &prev_value) + + output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value) + + if not is_monotonic_increasing_bounds: + nobs = 0.0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_remove = 0.0 + + return output + +# ---------------------------------------------------------------------- +# Rolling skewness + + +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, float64_t xxx, + int64_t num_consecutive_same_value + ) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R + + if nobs >= minp: + dnobs = nobs + A = x / dnobs + B = xx / dnobs - A * A + C = xxx / dnobs - A * A * A - 3 * A * B + + if nobs < 3: + result = NaN + # GH 42064 46431 + # uniform case, force result to be 0 + elif num_consecutive_same_value >= nobs: + result = 0.0 + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + elif B <= 1e-14: + result = NaN + else: + R = sqrt(B) + result = ((sqrt(dnobs * (dnobs - 1.)) * C) / + ((dnobs - 2) * R * R * R)) + else: + result = NaN + + return result + + +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + int64_t *num_consecutive_same_value, + float64_t *prev_value, + ) nogil: + """ add a value from the skew calc """ + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] + 1 + + y = val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + + # GH#42064, record num of same values to remove floating point artifacts + if val == prev_value[0]: + num_consecutive_same_value[0] += 1 + else: + # reset to 1 (include current value itself) + num_consecutive_same_value[0] = 1 + prev_value[0] = val + + +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx) nogil: + """ remove a value from the skew calc """ + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] - 1 + + y = - val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = - val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = - val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + + +def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t val, prev, min_val, mean_val, sum_val = 0 + float64_t compensation_xxx_add, compensation_xxx_remove + float64_t compensation_xx_add, compensation_xx_remove + float64_t compensation_x_add, compensation_x_remove + float64_t x, xx, xxx + float64_t prev_value + int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 + int64_t s, e, num_consecutive_same_value + ndarray[float64_t] output, mean_array, values_copy + bint is_monotonic_increasing_bounds + + minp = max(minp, 3) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + min_val = np.nanmin(values) + values_copy = np.copy(values) + + with nogil: + for i in range(0, V): + val = values_copy[i] + if val == val: + nobs_mean += 1 + sum_val += val + mean_val = sum_val / nobs_mean + # Other cases would lead to imprecision for smallest values + if min_val - mean_val > -1e5: + mean_val = round(mean_val) + for i in range(0, V): + values_copy[i] = values_copy[i] - mean_val + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + prev_value = values[s] + num_consecutive_same_value = 0 + + compensation_xxx_add = compensation_xxx_remove = 0 + compensation_xx_add = compensation_xx_remove = 0 + compensation_x_add = compensation_x_remove = 0 + x = xx = xxx = 0 + nobs = 0 + for j in range(s, e): + val = values_copy[j] + add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, + &compensation_xx_add, &compensation_xxx_add, + &num_consecutive_same_value, &prev_value) + + else: + + # After the first window, observations can both be added + # and removed + # calculate deletes + for j in range(start[i - 1], s): + val = values_copy[j] + remove_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_remove, + &compensation_xx_remove, &compensation_xxx_remove) + + # calculate adds + for j in range(end[i - 1], e): + val = values_copy[j] + add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, + &compensation_xx_add, &compensation_xxx_add, + &num_consecutive_same_value, &prev_value) + + output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value) + + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 + + return output + +# ---------------------------------------------------------------------- +# Rolling kurtosis + + +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx, + int64_t num_consecutive_same_value, + ) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K + + if nobs >= minp: + if nobs < 4: + result = NaN + # GH 42064 46431 + # uniform case, force result to be -3. + elif num_consecutive_same_value >= nobs: + result = -3. + else: + dnobs = nobs + A = x / dnobs + R = A * A + B = xx / dnobs - R + R = R * A + C = xxx / dnobs - R - 3 * A * B + R = R * A + D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + if B <= 1e-14: + result = NaN + else: + K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) + result = K / ((dnobs - 2.) * (dnobs - 3.)) + else: + result = NaN + + return result + + +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx, + int64_t *num_consecutive_same_value, + float64_t *prev_value + ) nogil: + """ add a value from the kurotic calc """ + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] + 1 + + y = val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + y = val * val * val * val - compensation_xxxx[0] + t = xxxx[0] + y + compensation_xxxx[0] = t - xxxx[0] - y + xxxx[0] = t + + # GH#42064, record num of same values to remove floating point artifacts + if val == prev_value[0]: + num_consecutive_same_value[0] += 1 + else: + # reset to 1 (include current value itself) + num_consecutive_same_value[0] = 1 + prev_value[0] = val + + +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx) nogil: + """ remove a value from the kurotic calc """ + cdef: + float64_t y, t + + # Not NaN + if val == val: + nobs[0] = nobs[0] - 1 + + y = - val - compensation_x[0] + t = x[0] + y + compensation_x[0] = t - x[0] - y + x[0] = t + y = - val * val - compensation_xx[0] + t = xx[0] + y + compensation_xx[0] = t - xx[0] - y + xx[0] = t + y = - val * val * val - compensation_xxx[0] + t = xxx[0] + y + compensation_xxx[0] = t - xxx[0] - y + xxx[0] = t + y = - val * val * val * val - compensation_xxxx[0] + t = xxxx[0] + y + compensation_xxxx[0] = t - xxxx[0] - y + xxxx[0] = t + + +def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + float64_t val, prev, mean_val, min_val, sum_val = 0 + float64_t compensation_xxxx_add, compensation_xxxx_remove + float64_t compensation_xxx_remove, compensation_xxx_add + float64_t compensation_xx_remove, compensation_xx_add + float64_t compensation_x_remove, compensation_x_add + float64_t x, xx, xxx, xxxx + float64_t prev_value + int64_t nobs, s, e, num_consecutive_same_value + int64_t N = len(start), V = len(values), nobs_mean = 0 + ndarray[float64_t] output, values_copy + bint is_monotonic_increasing_bounds + + minp = max(minp, 4) + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + output = np.empty(N, dtype=np.float64) + values_copy = np.copy(values) + min_val = np.nanmin(values) + + with nogil: + for i in range(0, V): + val = values_copy[i] + if val == val: + nobs_mean += 1 + sum_val += val + mean_val = sum_val / nobs_mean + # Other cases would lead to imprecision for smallest values + if min_val - mean_val > -1e4: + mean_val = round(mean_val) + for i in range(0, V): + values_copy[i] = values_copy[i] - mean_val + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + prev_value = values[s] + num_consecutive_same_value = 0 + + compensation_xxxx_add = compensation_xxxx_remove = 0 + compensation_xxx_remove = compensation_xxx_add = 0 + compensation_xx_remove = compensation_xx_add = 0 + compensation_x_remove = compensation_x_add = 0 + x = xx = xxx = xxxx = 0 + nobs = 0 + for j in range(s, e): + add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_add, &compensation_xx_add, + &compensation_xxx_add, &compensation_xxxx_add, + &num_consecutive_same_value, &prev_value) + + else: + + # After the first window, observations can both be added + # and removed + # calculate deletes + for j in range(start[i - 1], s): + remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_remove, &compensation_xx_remove, + &compensation_xxx_remove, &compensation_xxxx_remove) + + # calculate adds + for j in range(end[i - 1], e): + add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, + &compensation_x_add, &compensation_xx_add, + &compensation_xxx_add, &compensation_xxxx_add, + &num_consecutive_same_value, &prev_value) + + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value) + + if not is_monotonic_increasing_bounds: + nobs = 0 + x = 0.0 + xx = 0.0 + xxx = 0.0 + xxxx = 0.0 + + return output + + +# ---------------------------------------------------------------------- +# Rolling median, min, max + + +def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + cdef: + Py_ssize_t i, j + bint err = False, is_monotonic_increasing_bounds + int midpoint, ret = 0 + int64_t nobs = 0, N = len(start), s, e, win + float64_t val, res, prev + skiplist_t *sl + ndarray[float64_t] output + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + if (end - start).max() == 0: + output[:] = NaN + return output + win = (end - start).max() + sl = skiplist_init(win) + if sl == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + + if i != 0: + skiplist_destroy(sl) + sl = skiplist_init(win) + nobs = 0 + # setup + for j in range(s, e): + val = values[j] + if val == val: + nobs += 1 + err = skiplist_insert(sl, val) == -1 + if err: + break + + else: + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if val == val: + nobs += 1 + err = skiplist_insert(sl, val) == -1 + if err: + break + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if val == val: + skiplist_remove(sl, val) + nobs -= 1 + if nobs >= minp: + midpoint = (nobs / 2) + if nobs % 2: + res = skiplist_get(sl, midpoint, &ret) + else: + res = (skiplist_get(sl, midpoint, &ret) + + skiplist_get(sl, (midpoint - 1), &ret)) / 2 + if ret == 0: + res = NaN + else: + res = NaN + + output[i] = res + + if not is_monotonic_increasing_bounds: + nobs = 0 + skiplist_destroy(sl) + sl = skiplist_init(win) + + skiplist_destroy(sl) + if err: + raise MemoryError("skiplist_insert failed") + return output + + +# ---------------------------------------------------------------------- + +# Moving maximum / minimum code taken from Bottleneck under the terms +# of its Simplified BSD license +# https://github.com/pydata/bottleneck + + +cdef inline numeric_t init_mm(numeric_t ai, Py_ssize_t *nobs, bint is_max) nogil: + + if numeric_t in cython.floating: + if ai == ai: + nobs[0] = nobs[0] + 1 + elif is_max: + if numeric_t == cython.float: + ai = MINfloat32 + else: + ai = MINfloat64 + else: + if numeric_t == cython.float: + ai = MAXfloat32 + else: + ai = MAXfloat64 + + else: + nobs[0] = nobs[0] + 1 + + return ai + + +cdef inline void remove_mm(numeric_t aold, Py_ssize_t *nobs) nogil: + """ remove a value from the mm calc """ + if numeric_t in cython.floating and aold == aold: + nobs[0] = nobs[0] - 1 + + +cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs, + numeric_t value) nogil: + cdef: + numeric_t result + + if numeric_t in cython.floating: + if nobs >= minp: + result = value + else: + result = NaN + else: + result = value + + return result + + +def roll_max(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + closed : 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints + + Returns + ------- + np.ndarray[float] + """ + return _roll_min_max(values, start, end, minp, is_max=1) + + +def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp) -> np.ndarray: + """ + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + + Returns + ------- + np.ndarray[float] + """ + return _roll_min_max(values, start, end, minp, is_max=0) + + +cdef _roll_min_max(ndarray[numeric_t] values, + ndarray[int64_t] starti, + ndarray[int64_t] endi, + int64_t minp, + bint is_max): + cdef: + numeric_t ai + int64_t curr_win_size, start + Py_ssize_t i, k, nobs = 0, N = len(starti) + deque Q[int64_t] # min/max always the front + deque W[int64_t] # track the whole window for nobs compute + ndarray[float64_t, ndim=1] output + + output = np.empty(N, dtype=np.float64) + Q = deque[int64_t]() + W = deque[int64_t]() + + with nogil: + + # This is using a modified version of the C++ code in this + # SO post: https://stackoverflow.com/a/12239580 + # The original impl didn't deal with variable window sizes + # So the code was optimized for that + + # first window's size + curr_win_size = endi[0] - starti[0] + # GH 32865 + # Anchor output index to values index to provide custom + # BaseIndexer support + for i in range(N): + + curr_win_size = endi[i] - starti[i] + if i == 0: + start = starti[i] + else: + start = endi[i - 1] + + for k in range(start, endi[i]): + ai = init_mm(values[k], &nobs, is_max) + # Discard previous entries if we find new min or max + if is_max: + while not Q.empty() and ((ai >= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + else: + while not Q.empty() and ((ai <= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + Q.push_back(k) + W.push_back(k) + + # Discard entries outside and left of current window + while not Q.empty() and Q.front() <= starti[i] - 1: + Q.pop_front() + while not W.empty() and W.front() <= starti[i] - 1: + remove_mm(values[W.front()], &nobs) + W.pop_front() + + # Save output based on index in input value array + if not Q.empty() and curr_win_size > 0: + output[i] = calc_mm(minp, nobs, values[Q.front()]) + else: + output[i] = NaN + + return output + + +cdef enum InterpolationType: + LINEAR, + LOWER, + HIGHER, + NEAREST, + MIDPOINT + + +interpolation_types = { + 'linear': LINEAR, + 'lower': LOWER, + 'higher': HIGHER, + 'nearest': NEAREST, + 'midpoint': MIDPOINT, +} + + +def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, + float64_t quantile, str interpolation) -> np.ndarray: + """ + O(N log(window)) implementation using skip list + """ + cdef: + Py_ssize_t i, j, s, e, N = len(start), idx + int ret = 0 + int64_t nobs = 0, win + float64_t val, prev, midpoint, idx_with_fraction + float64_t vlow, vhigh + skiplist_t *skiplist + InterpolationType interpolation_type + ndarray[float64_t] output + + if quantile <= 0.0 or quantile >= 1.0: + raise ValueError(f"quantile value {quantile} not in [0, 1]") + + try: + interpolation_type = interpolation_types[interpolation] + except KeyError: + raise ValueError(f"Interpolation '{interpolation}' is not supported") + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + win = (end - start).max() + if win == 0: + output[:] = NaN + return output + skiplist = skiplist_init(win) + if skiplist == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i != 0: + nobs = 0 + skiplist_destroy(skiplist) + skiplist = skiplist_init(win) + + # setup + for j in range(s, e): + val = values[j] + if val == val: + nobs += 1 + skiplist_insert(skiplist, val) + + else: + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if val == val: + nobs += 1 + skiplist_insert(skiplist, val) + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if val == val: + skiplist_remove(skiplist, val) + nobs -= 1 + if nobs >= minp: + if nobs == 1: + # Single value in skip list + output[i] = skiplist_get(skiplist, 0, &ret) + else: + idx_with_fraction = quantile * (nobs - 1) + idx = idx_with_fraction + + if idx_with_fraction == idx: + # no need to interpolate + output[i] = skiplist_get(skiplist, idx, &ret) + continue + + if interpolation_type == LINEAR: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = ((vlow + (vhigh - vlow) * + (idx_with_fraction - idx))) + elif interpolation_type == LOWER: + output[i] = skiplist_get(skiplist, idx, &ret) + elif interpolation_type == HIGHER: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == NEAREST: + # the same behaviour as round() + if idx_with_fraction - idx == 0.5: + if idx % 2 == 0: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get( + skiplist, idx + 1, &ret) + elif idx_with_fraction - idx < 0.5: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == MIDPOINT: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = (vlow + vhigh) / 2 + + if ret == 0: + output[i] = NaN + else: + output[i] = NaN + + skiplist_destroy(skiplist) + + return output + + +rolling_rank_tiebreakers = { + "average": TiebreakEnumType.TIEBREAK_AVERAGE, + "min": TiebreakEnumType.TIEBREAK_MIN, + "max": TiebreakEnumType.TIEBREAK_MAX, +} + + +def roll_rank(const float64_t[:] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, bint percentile, + str method, bint ascending) -> np.ndarray: + """ + O(N log(window)) implementation using skip list + + derived from roll_quantile + """ + cdef: + Py_ssize_t i, j, s, e, N = len(start), idx + float64_t rank_min = 0, rank = 0 + int64_t nobs = 0, win + float64_t val + skiplist_t *skiplist + float64_t[::1] output + TiebreakEnumType rank_type + + try: + rank_type = rolling_rank_tiebreakers[method] + except KeyError: + raise ValueError(f"Method '{method}' is not supported") + + is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( + start, end + ) + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=np.float64) + + win = (end - start).max() + if win == 0: + output[:] = NaN + return np.asarray(output) + skiplist = skiplist_init(win) + if skiplist == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + for i in range(N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i != 0: + nobs = 0 + skiplist_destroy(skiplist) + skiplist = skiplist_init(win) + + # setup + for j in range(s, e): + val = values[j] if ascending else -values[j] + if val == val: + nobs += 1 + rank = skiplist_insert(skiplist, val) + if rank == -1: + raise MemoryError("skiplist_insert failed") + if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: + # The average rank of `val` is the sum of the ranks of all + # instances of `val` in the skip list divided by the number + # of instances. The sum of consecutive integers from 1 to N + # is N * (N + 1) / 2. + # The sum of the ranks is the sum of integers from the + # lowest rank to the highest rank, which is the sum of + # integers from 1 to the highest rank minus the sum of + # integers from 1 to one less than the lowest rank. + rank_min = skiplist_min_rank(skiplist, val) + rank = (((rank * (rank + 1) / 2) + - ((rank_min - 1) * rank_min / 2)) + / (rank - rank_min + 1)) + elif rank_type == TiebreakEnumType.TIEBREAK_MIN: + rank = skiplist_min_rank(skiplist, val) + else: + rank = NaN + + else: + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] if ascending else -values[j] + if val == val: + skiplist_remove(skiplist, val) + nobs -= 1 + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] if ascending else -values[j] + if val == val: + nobs += 1 + rank = skiplist_insert(skiplist, val) + if rank == -1: + raise MemoryError("skiplist_insert failed") + if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: + rank_min = skiplist_min_rank(skiplist, val) + rank = (((rank * (rank + 1) / 2) + - ((rank_min - 1) * rank_min / 2)) + / (rank - rank_min + 1)) + elif rank_type == TiebreakEnumType.TIEBREAK_MIN: + rank = skiplist_min_rank(skiplist, val) + else: + rank = NaN + if nobs >= minp: + output[i] = rank / nobs if percentile else rank + else: + output[i] = NaN + + skiplist_destroy(skiplist) + + return np.asarray(output) + + +def roll_apply(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, + object function, bint raw, + tuple args, dict kwargs) -> np.ndarray: + cdef: + ndarray[float64_t] output, counts + ndarray[float64_t, cast=True] arr + Py_ssize_t i, s, e, N = len(start), n = len(obj) + + if n == 0: + return np.array([], dtype=np.float64) + + arr = np.asarray(obj) + + # ndarray input + if raw and not arr.flags.c_contiguous: + arr = arr.copy('C') + + counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + + s = start[i] + e = end[i] + + if counts[i] >= minp: + if raw: + output[i] = function(arr[s:e], *args, **kwargs) + else: + output[i] = function(obj.iloc[s:e], *args, **kwargs) + else: + output[i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling sum and mean for weighted window + + +def roll_weighted_sum( + const float64_t[:] values, const float64_t[:] weights, int minp +) -> np.ndarray: + return _roll_weighted_sum_mean(values, weights, minp, avg=0) + + +def roll_weighted_mean( + const float64_t[:] values, const float64_t[:] weights, int minp +) -> np.ndarray: + return _roll_weighted_sum_mean(values, weights, minp, avg=1) + + +cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values, + const float64_t[:] weights, + int minp, bint avg): + """ + Assume len(weights) << len(values) + """ + cdef: + float64_t[:] output, tot_wgt, counts + Py_ssize_t in_i, win_i, win_n, in_n + float64_t val_in, val_win, c, w + + in_n = len(values) + win_n = len(weights) + + output = np.zeros(in_n, dtype=np.float64) + counts = np.zeros(in_n, dtype=np.float64) + if avg: + tot_wgt = np.zeros(in_n, dtype=np.float64) + + elif minp > in_n: + minp = in_n + 1 + + minp = max(minp, 1) + + with nogil: + if avg: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + tot_wgt[in_i + (win_n - win_i) - 1] += val_win + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + else: + w = tot_wgt[in_i] + if w == 0: + output[in_i] = NaN + else: + output[in_i] /= tot_wgt[in_i] + + else: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling var for weighted window + + +cdef inline float64_t calc_weighted_var(float64_t t, + float64_t sum_w, + Py_ssize_t win_n, + unsigned int ddof, + float64_t nobs, + int64_t minp) nogil: + """ + Calculate weighted variance for a window using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + win_n: Py_ssize_t + window size + ddof: unsigned int + delta degrees of freedom + nobs: float64_t + number of observations + minp: int64_t + minimum number of observations + + Returns + ------- + result : float64_t + weighted variance of the window + """ + + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case + if nobs == 1: + result = 0 + else: + result = t * win_n / ((win_n - ddof) * sum_w) + if result < 0: + result = 0 + else: + result = NaN + + return result + + +cdef inline void add_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to include value and weight pair in weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if val != val: + return + + nobs[0] = nobs[0] + 1 + + q = val - mean[0] + temp = sum_w[0] + w + r = q * w / temp + + mean[0] = mean[0] + r + t[0] = t[0] + r * sum_w[0] * q + sum_w[0] = temp + + +cdef inline void remove_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to remove value and weight pair from weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if val == val: + nobs[0] = nobs[0] - 1 + + if nobs[0]: + q = val - mean[0] + temp = sum_w[0] - w + r = q * w / temp + + mean[0] = mean[0] - r + t[0] = t[0] - r * sum_w[0] * q + sum_w[0] = temp + + else: + t[0] = 0 + sum_w[0] = 0 + mean[0] = 0 + + +def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights, + int64_t minp, unsigned int ddof): + """ + Calculates weighted rolling variance using West's online algorithm. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + values: float64_t[:] + values to roll window over + weights: float64_t[:] + array of weights whose length is window size + minp: int64_t + minimum number of observations to calculate + variance of a window + ddof: unsigned int + the divisor used in variance calculations + is the window size - ddof + + Returns + ------- + output: float64_t[:] + weighted variances of windows + """ + + cdef: + float64_t t = 0, sum_w = 0, mean = 0, nobs = 0 + float64_t val, pre_val, w, pre_w + Py_ssize_t i, n, win_n + float64_t[:] output + + n = len(values) + win_n = len(weights) + output = np.empty(n, dtype=np.float64) + + with nogil: + + for i in range(min(win_n, n)): + add_weighted_var(values[i], weights[i], &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + for i in range(win_n, n): + val = values[i] + pre_val = values[i - win_n] + + w = weights[i % win_n] + pre_w = weights[(i - win_n) % win_n] + + if val == val: + if pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + add_weighted_var(val, w, &t, &sum_w, &mean, &nobs) + + elif pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + return output + + +# ---------------------------------------------------------------------- +# Exponentially weighted moving + +def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end, + int minp, float64_t com, bint adjust, bint ignore_na, + const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray: + """ + Compute exponentially-weighted moving average or sum using center-of-mass. + + Parameters + ---------- + vals : ndarray (float64 type) + start: ndarray (int64 type) + end: ndarray (int64 type) + minp : int + com : float64 + adjust : bool + ignore_na : bool + deltas : ndarray (float64 type), optional. If None, implicitly assumes equally + spaced points (used when `times` is not passed) + normalize : bool, optional. + If True, calculate the mean. If False, calculate the sum. + + Returns + ------- + np.ndarray[float64_t] + """ + + cdef: + Py_ssize_t i, j, s, e, nobs, win_size, N = len(vals), M = len(start) + const float64_t[:] sub_vals + const float64_t[:] sub_deltas=None + ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64) + float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur + bint is_observation, use_deltas + + if N == 0: + return output + + use_deltas = deltas is not None + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + for j in range(M): + s = start[j] + e = end[j] + sub_vals = vals[s:e] + # note that len(deltas) = len(vals) - 1 and deltas[i] is to be used in + # conjunction with vals[i+1] + if use_deltas: + sub_deltas = deltas[s:e - 1] + win_size = len(sub_vals) + sub_output = np.empty(win_size, dtype=np.float64) + + weighted = sub_vals[0] + is_observation = weighted == weighted + nobs = int(is_observation) + sub_output[0] = weighted if nobs >= minp else NaN + old_wt = 1. + + with nogil: + for i in range(1, win_size): + cur = sub_vals[i] + is_observation = cur == cur + nobs += is_observation + if weighted == weighted: + + if is_observation or not ignore_na: + if normalize: + if use_deltas: + old_wt *= old_wt_factor ** sub_deltas[i - 1] + else: + old_wt *= old_wt_factor + else: + weighted = old_wt_factor * weighted + if is_observation: + if normalize: + # avoid numerical errors on constant series + if weighted != cur: + weighted = old_wt * weighted + new_wt * cur + weighted /= (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1. + else: + weighted += cur + elif is_observation: + weighted = cur + + sub_output[i] = weighted if nobs >= minp else NaN + + output[s:e] = sub_output + + return output + + +def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end, + int minp, const float64_t[:] input_y, float64_t com, bint adjust, + bint ignore_na, bint bias) -> np.ndarray: + """ + Compute exponentially-weighted moving variance using center-of-mass. + + Parameters + ---------- + input_x : ndarray (float64 type) + start: ndarray (int64 type) + end: ndarray (int64 type) + minp : int + input_y : ndarray (float64 type) + com : float64 + adjust : bool + ignore_na : bool + bias : bool + + Returns + ------- + np.ndarray[float64_t] + """ + + cdef: + Py_ssize_t i, j, s, e, win_size, nobs + Py_ssize_t N = len(input_x), M = len(input_y), L = len(start) + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t numerator, denominator + const float64_t[:] sub_x_vals, sub_y_vals + ndarray[float64_t] sub_out, output = np.empty(N, dtype=np.float64) + bint is_observation + + if M != N: + raise ValueError(f"arrays are of different lengths ({N} and {M})") + + if N == 0: + return output + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + for j in range(L): + s = start[j] + e = end[j] + sub_x_vals = input_x[s:e] + sub_y_vals = input_y[s:e] + win_size = len(sub_x_vals) + sub_out = np.empty(win_size, dtype=np.float64) + + mean_x = sub_x_vals[0] + mean_y = sub_y_vals[0] + is_observation = (mean_x == mean_x) and (mean_y == mean_y) + nobs = int(is_observation) + if not is_observation: + mean_x = NaN + mean_y = NaN + sub_out[0] = (0. if bias else NaN) if nobs >= minp else NaN + cov = 0. + sum_wt = 1. + sum_wt2 = 1. + old_wt = 1. + + with nogil: + for i in range(1, win_size): + cur_x = sub_x_vals[i] + cur_y = sub_y_vals[i] + is_observation = (cur_x == cur_x) and (cur_y == cur_y) + nobs += is_observation + if mean_x == mean_x: + if is_observation or not ignore_na: + sum_wt *= old_wt_factor + sum_wt2 *= (old_wt_factor * old_wt_factor) + old_wt *= old_wt_factor + if is_observation: + old_mean_x = mean_x + old_mean_y = mean_y + + # avoid numerical errors on constant series + if mean_x != cur_x: + mean_x = ((old_wt * old_mean_x) + + (new_wt * cur_x)) / (old_wt + new_wt) + + # avoid numerical errors on constant series + if mean_y != cur_y: + mean_y = ((old_wt * old_mean_y) + + (new_wt * cur_y)) / (old_wt + new_wt) + cov = ((old_wt * (cov + ((old_mean_x - mean_x) * + (old_mean_y - mean_y)))) + + (new_wt * ((cur_x - mean_x) * + (cur_y - mean_y)))) / (old_wt + new_wt) + sum_wt += new_wt + sum_wt2 += (new_wt * new_wt) + old_wt += new_wt + if not adjust: + sum_wt /= old_wt + sum_wt2 /= (old_wt * old_wt) + old_wt = 1. + elif is_observation: + mean_x = cur_x + mean_y = cur_y + + if nobs >= minp: + if not bias: + numerator = sum_wt * sum_wt + denominator = numerator - sum_wt2 + if denominator > 0: + sub_out[i] = (numerator / denominator) * cov + else: + sub_out[i] = NaN + else: + sub_out[i] = cov + else: + sub_out[i] = NaN + + output[s:e] = sub_out + + return output diff --git a/pandas/_libs/window/indexers.pyi b/pandas/_libs/window/indexers.pyi new file mode 100644 index 00000000..c9bc64be --- /dev/null +++ b/pandas/_libs/window/indexers.pyi @@ -0,0 +1,12 @@ +import numpy as np + +from pandas._typing import npt + +def calculate_variable_window_bounds( + num_values: int, # int64_t + window_size: int, # int64_t + min_periods, + center: bool, + closed: str | None, + index: np.ndarray, # const int64_t[:] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx new file mode 100644 index 00000000..465865de --- /dev/null +++ b/pandas/_libs/window/indexers.pyx @@ -0,0 +1,149 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import numpy as np + +from numpy cimport ( + int64_t, + ndarray, +) + +# Cython routines for window indexers + + +def calculate_variable_window_bounds( + int64_t num_values, + int64_t window_size, + object min_periods, # unused but here to match get_window_bounds signature + bint center, + str closed, + const int64_t[:] index +): + """ + Calculate window boundaries for rolling windows from a time offset. + + Parameters + ---------- + num_values : int64 + total number of values + + window_size : int64 + window size calculated from the offset + + min_periods : object + ignored, exists for compatibility + + center : bint + center the rolling window on the current observation + + closed : str + string of side of the window that should be closed + + index : ndarray[int64] + time series index to roll over + + Returns + ------- + (ndarray[int64], ndarray[int64]) + """ + cdef: + bint left_closed = False + bint right_closed = False + ndarray[int64_t, ndim=1] start, end + int64_t start_bound, end_bound, index_growth_sign = 1 + Py_ssize_t i, j + + if num_values <= 0: + return np.empty(0, dtype='int64'), np.empty(0, dtype='int64') + + # default is 'right' + if closed is None: + closed = 'right' + + if closed in ['right', 'both']: + right_closed = True + + if closed in ['left', 'both']: + left_closed = True + + # GH 43997: + # If the forward and the backward facing windows + # would result in a fraction of 1/2 a nanosecond + # we need to make both interval ends inclusive. + if center and window_size % 2 == 1: + right_closed = True + left_closed = True + + if index[num_values - 1] < index[0]: + index_growth_sign = -1 + + start = np.empty(num_values, dtype='int64') + start.fill(-1) + end = np.empty(num_values, dtype='int64') + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + if center: + end_bound = index[0] + index_growth_sign * window_size / 2 + for j in range(0, num_values): + if (index[j] - end_bound) * index_growth_sign < 0: + end[0] = j + 1 + elif (index[j] - end_bound) * index_growth_sign == 0 and right_closed: + end[0] = j + 1 + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[0] = j + break + + with nogil: + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, num_values): + if center: + end_bound = index[i] + index_growth_sign * window_size / 2 + start_bound = index[i] - index_growth_sign * window_size / 2 + else: + end_bound = index[i] + start_bound = index[i] - index_growth_sign * window_size + + # left endpoint is closed + if left_closed: + start_bound -= 1 * index_growth_sign + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if (index[j] - start_bound) * index_growth_sign > 0: + start[i] = j + break + + # for centered window advance the end bound until we are + # outside the constraint + if center: + for j in range(end[i - 1], num_values + 1): + if j == num_values: + end[i] = j + elif ((index[j] - end_bound) * index_growth_sign == 0 and + right_closed): + end[i] = j + 1 + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[i] = j + break + # end bound is previous end + # or current index + elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed and not center: + end[i] -= 1 + return start, end diff --git a/pandas/_libs/writers.pyi b/pandas/_libs/writers.pyi new file mode 100644 index 00000000..611c0c7c --- /dev/null +++ b/pandas/_libs/writers.pyi @@ -0,0 +1,21 @@ +import numpy as np + +from pandas._typing import ArrayLike + +def write_csv_rows( + data: list[ArrayLike], + data_index: np.ndarray, + nlevels: int, + cols: np.ndarray, + writer: object, # _csv.writer +) -> None: ... +def convert_json_to_lines(arr: str) -> str: ... +def max_len_string_array( + arr: np.ndarray, # pandas_string[:] +) -> int: ... +def word_len(val: object) -> int: ... +def string_array_replace_from_nan_rep( + arr: np.ndarray, # np.ndarray[object, ndim=1] + nan_rep: object, + replace: object = ..., +) -> None: ... diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx new file mode 100644 index 00000000..cd42b08a --- /dev/null +++ b/pandas/_libs/writers.pyx @@ -0,0 +1,175 @@ +cimport cython +import numpy as np + +from cpython cimport ( + PyBytes_GET_SIZE, + PyUnicode_GET_LENGTH, +) +from numpy cimport ( + ndarray, + uint8_t, +) + +ctypedef fused pandas_string: + str + bytes + + +@cython.boundscheck(False) +@cython.wraparound(False) +def write_csv_rows( + list data, + ndarray data_index, + Py_ssize_t nlevels, + ndarray cols, + object writer +) -> None: + """ + Write the given data to the writer object, pre-allocating where possible + for performance improvements. + + Parameters + ---------- + data : list[ArrayLike] + data_index : ndarray + nlevels : int + cols : ndarray + writer : _csv.writer + """ + # In crude testing, N>100 yields little marginal improvement + cdef: + Py_ssize_t i, j = 0, k = len(data_index), N = 100, ncols = len(cols) + list rows + + # pre-allocate rows + rows = [[None] * (nlevels + ncols) for _ in range(N)] + + if nlevels == 1: + for j in range(k): + row = rows[j % N] + row[0] = data_index[j] + for i in range(ncols): + row[1 + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + elif nlevels > 1: + for j in range(k): + row = rows[j % N] + row[:nlevels] = list(data_index[j]) + for i in range(ncols): + row[nlevels + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + else: + for j in range(k): + row = rows[j % N] + for i in range(ncols): + row[i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + + if j >= 0 and (j < N - 1 or (j % N) != N - 1): + writer.writerows(rows[:((j + 1) % N)]) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(arr: str) -> str: + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, length + bint in_quotes = False, is_escaping = False + ndarray[uint8_t, ndim=1] narr + unsigned char val, newline, comma, left_bracket, right_bracket, quote + unsigned char backslash + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + val = narr[i] + if val == quote and i > 0 and not is_escaping: + in_quotes = ~in_quotes + if val == backslash or is_escaping: + is_escaping = ~is_escaping + if val == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif val == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif val == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tobytes().decode('utf-8') + '\n' # GH:36888 + + +# stata, pytables +@cython.boundscheck(False) +@cython.wraparound(False) +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: + """ + Return the maximum size of elements in a 1-dim string array. + """ + cdef: + Py_ssize_t i, m = 0, wlen = 0, length = arr.shape[0] + pandas_string val + + for i in range(length): + val = arr[i] + wlen = word_len(val) + + if wlen > m: + m = wlen + + return m + + +cpdef inline Py_ssize_t word_len(object val): + """ + Return the maximum length of a string or bytes value. + """ + cdef: + Py_ssize_t wlen = 0 + + if isinstance(val, str): + wlen = PyUnicode_GET_LENGTH(val) + elif isinstance(val, bytes): + wlen = PyBytes_GET_SIZE(val) + + return wlen + +# ------------------------------------------------------------------ +# PyTables Helpers + + +@cython.boundscheck(False) +@cython.wraparound(False) +def string_array_replace_from_nan_rep( + ndarray[object, ndim=1] arr, + object nan_rep, + object replace=np.nan +) -> None: + """ + Replace the values in the array with 'replacement' if + they are 'nan_rep'. Return the same array. + """ + cdef: + Py_ssize_t length = len(arr), i = 0 + + for i in range(length): + if arr[i] == nan_rep: + arr[i] = replace diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py new file mode 100644 index 00000000..e0e1b49a --- /dev/null +++ b/pandas/_testing/__init__.py @@ -0,0 +1,1161 @@ +from __future__ import annotations + +import collections +from datetime import datetime +from decimal import Decimal +import operator +import os +import re +import string +from sys import byteorder +from typing import ( + TYPE_CHECKING, + Callable, + ContextManager, + Counter, + Iterable, +) +import warnings + +import numpy as np + +from pandas._config.localization import ( + can_set_locale, + get_locales, + set_locale, +) + +from pandas._typing import Dtype +from pandas.compat import pa_version_under1p01 + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_sequence, + is_unsigned_integer_dtype, + pandas_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + bdate_range, +) +from pandas._testing._io import ( + close, + network, + round_trip_localpath, + round_trip_pathlib, + round_trip_pickle, + write_to_compressed, +) +from pandas._testing._random import ( + randbool, + rands, + rands_array, +) +from pandas._testing._warnings import ( + assert_produces_warning, + maybe_produces_warning, +) +from pandas._testing.asserters import ( + assert_almost_equal, + assert_attr_equal, + assert_categorical_equal, + assert_class_equal, + assert_contains_all, + assert_copy, + assert_datetime_array_equal, + assert_dict_equal, + assert_equal, + assert_extension_array_equal, + assert_frame_equal, + assert_index_equal, + assert_indexing_slices_equivalent, + assert_interval_array_equal, + assert_is_sorted, + assert_is_valid_plot_return_object, + assert_metadata_equivalent, + assert_numpy_array_equal, + assert_period_array_equal, + assert_series_equal, + assert_sp_array_equal, + assert_timedelta_array_equal, + raise_assert_detail, +) +from pandas._testing.compat import ( + get_dtype, + get_obj, +) +from pandas._testing.contexts import ( + RNGContext, + decompress_file, + ensure_clean, + ensure_clean_dir, + ensure_safe_environment_variables, + set_timezone, + use_numexpr, + with_csv_dialect, +) +from pandas.core.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.arrays import ( + BaseMaskedArray, + ExtensionArray, + PandasArray, +) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import ( + PeriodIndex, + TimedeltaIndex, + ) + +_N = 30 +_K = 4 + +UNSIGNED_INT_NUMPY_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] +SIGNED_INT_NUMPY_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"] +SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"] +ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES +ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES + +FLOAT_NUMPY_DTYPES: list[Dtype] = [float, "float32", "float64"] +FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"] +COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] +STRING_DTYPES: list[Dtype] = [str, "str", "U"] + +DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] + +BOOL_DTYPES: list[Dtype] = [bool, "bool"] +BYTES_DTYPES: list[Dtype] = [bytes, "bytes"] +OBJECT_DTYPES: list[Dtype] = [object, "object"] + +ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES +ALL_NUMPY_DTYPES = ( + ALL_REAL_NUMPY_DTYPES + + COMPLEX_DTYPES + + STRING_DTYPES + + DATETIME64_DTYPES + + TIMEDELTA64_DTYPES + + BOOL_DTYPES + + OBJECT_DTYPES + + BYTES_DTYPES +) + +NARROW_NP_DTYPES = [ + np.float16, + np.float32, + np.int8, + np.int16, + np.int32, + np.uint8, + np.uint16, + np.uint32, +] + +ENDIAN = {"little": "<", "big": ">"}[byteorder] + +NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] +NP_NAT_OBJECTS = [ + cls("NaT", unit) + for cls in [np.datetime64, np.timedelta64] + for unit in [ + "Y", + "M", + "W", + "D", + "h", + "m", + "s", + "ms", + "us", + "ns", + "ps", + "fs", + "as", + ] +] + +if not pa_version_under1p01: + import pyarrow as pa + + UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] + SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()] + ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES + + FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()] + STRING_PYARROW_DTYPES = [pa.string(), pa.utf8()] + + TIME_PYARROW_DTYPES = [ + pa.time32("s"), + pa.time32("ms"), + pa.time64("us"), + pa.time64("ns"), + ] + DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()] + DATETIME_PYARROW_DTYPES = [ + pa.timestamp(unit=unit, tz=tz) + for unit in ["s", "ms", "us", "ns"] + for tz in [None, "UTC", "US/Pacific", "US/Eastern"] + ] + TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]] + + BOOL_PYARROW_DTYPES = [pa.bool_()] + + # TODO: Add container like pyarrow types: + # https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions + ALL_PYARROW_DTYPES = ( + ALL_INT_PYARROW_DTYPES + + FLOAT_PYARROW_DTYPES + + TIME_PYARROW_DTYPES + + DATE_PYARROW_DTYPES + + DATETIME_PYARROW_DTYPES + + TIMEDELTA_PYARROW_DTYPES + + BOOL_PYARROW_DTYPES + ) + + +EMPTY_STRING_PATTERN = re.compile("^$") + +# set testing_mode +_testing_mode_warnings = (DeprecationWarning, ResourceWarning) + + +def set_testing_mode() -> None: + # set the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + for category in _testing_mode_warnings: + warnings.simplefilter("always", category) + + +def reset_testing_mode() -> None: + # reset the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + for category in _testing_mode_warnings: + warnings.simplefilter("ignore", category) + + +set_testing_mode() + + +def reset_display_options() -> None: + """ + Reset the display options for printing and representing objects. + """ + pd.reset_option("^display.", silent=True) + + +# ----------------------------------------------------------------------------- +# Comparators + + +def equalContents(arr1, arr2) -> bool: + """ + Checks if the set of unique elements of arr1 and arr2 are equivalent. + """ + return frozenset(arr1) == frozenset(arr2) + + +def box_expected(expected, box_cls, transpose=True): + """ + Helper function to wrap the expected output of a test in a given box_class. + + Parameters + ---------- + expected : np.ndarray, Index, Series + box_cls : {Index, Series, DataFrame} + + Returns + ------- + subclass of box_cls + """ + if box_cls is pd.array: + if isinstance(expected, RangeIndex): + # pd.array would return an IntegerArray + expected = PandasArray(np.asarray(expected._values)) + else: + expected = pd.array(expected) + elif box_cls is Index: + expected = Index._with_infer(expected) + elif box_cls is Series: + expected = Series(expected) + elif box_cls is DataFrame: + expected = Series(expected).to_frame() + if transpose: + # for vector operations, we need a DataFrame to be a single-row, + # not a single-column, in order to operate against non-DataFrame + # vectors of the same length. But convert to two rows to avoid + # single-row special cases in datetime arithmetic + expected = expected.T + expected = pd.concat([expected] * 2, ignore_index=True) + elif box_cls is np.ndarray or box_cls is np.array: + expected = np.array(expected) + elif box_cls is to_array: + expected = to_array(expected) + else: + raise NotImplementedError(box_cls) + return expected + + +def to_array(obj): + """ + Similar to pd.array, but does not cast numpy dtypes to nullable dtypes. + """ + # temporary implementation until we get pd.array in place + dtype = getattr(obj, "dtype", None) + + if dtype is None: + return np.asarray(obj) + + return extract_array(obj, extract_numpy=True) + + +# ----------------------------------------------------------------------------- +# Others + + +def getCols(k) -> str: + return string.ascii_uppercase[:k] + + +# make index +def makeStringIndex(k=10, name=None) -> Index: + return Index(rands_array(nchars=10, size=k), name=name) + + +def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex: + """make a length k index or n categories""" + x = rands_array(nchars=4, size=n, replace=False) + return CategoricalIndex( + Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs + ) + + +def makeIntervalIndex(k=10, name=None, **kwargs) -> IntervalIndex: + """make a length k IntervalIndex""" + x = np.linspace(0, 100, num=(k + 1)) + return IntervalIndex.from_breaks(x, name=name, **kwargs) + + +def makeBoolIndex(k=10, name=None) -> Index: + if k == 1: + return Index([True], name=name) + elif k == 2: + return Index([False, True], name=name) + return Index([False, True] + [False] * (k - 2), name=name) + + +def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex: + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + if is_integer_dtype(dtype): + values = np.arange(k, dtype=dtype) + if is_unsigned_integer_dtype(dtype): + values += 2 ** (dtype.itemsize * 8 - 1) + elif is_float_dtype(dtype): + values = np.random.random_sample(k) - np.random.random_sample(1) + values.sort() + values = values * (10 ** np.random.randint(0, 9)) + else: + raise NotImplementedError(f"wrong dtype {dtype}") + + return NumericIndex(values, dtype=dtype, name=name) + + +def makeIntIndex(k=10, name=None) -> Int64Index: + base_idx = makeNumericIndex(k, name=name, dtype="int64") + return Int64Index(base_idx) + + +def makeUIntIndex(k=10, name=None) -> UInt64Index: + base_idx = makeNumericIndex(k, name=name, dtype="uint64") + return UInt64Index(base_idx) + + +def makeRangeIndex(k=10, name=None, **kwargs) -> RangeIndex: + return RangeIndex(0, k, 1, name=name, **kwargs) + + +def makeFloatIndex(k=10, name=None) -> Float64Index: + base_idx = makeNumericIndex(k, name=name, dtype="float64") + return Float64Index(base_idx) + + +def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex: + dt = datetime(2000, 1, 1) + dr = bdate_range(dt, periods=k, freq=freq, name=name) + return DatetimeIndex(dr, name=name, **kwargs) + + +def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex: + return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) + + +def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex: + dt = datetime(2000, 1, 1) + return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) + + +def makeMultiIndex(k=10, names=None, **kwargs): + N = (k // 2) + 1 + rng = range(N) + mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs) + assert len(mi) >= k # GH#38795 + return mi[:k] + + +def index_subclass_makers_generator(): + make_index_funcs = [ + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + makeRangeIndex, + makeIntervalIndex, + makeCategoricalIndex, + makeMultiIndex, + ] + yield from make_index_funcs + + +def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: + """ + Generator which can be iterated over to get instances of all the classes + which represent time-series. + + Parameters + ---------- + k: length of each of the index instances + """ + make_index_funcs: list[Callable[..., Index]] = [ + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + ] + for make_index_func in make_index_funcs: + yield make_index_func(k=k) + + +# make series +def make_rand_series(name=None, dtype=np.float64) -> Series: + index = makeStringIndex(_N) + data = np.random.randn(_N) + with np.errstate(invalid="ignore"): + data = data.astype(dtype, copy=False) + return Series(data, index=index, name=name) + + +def makeFloatSeries(name=None) -> Series: + return make_rand_series(name=name) + + +def makeStringSeries(name=None) -> Series: + return make_rand_series(name=name) + + +def makeObjectSeries(name=None) -> Series: + data = makeStringIndex(_N) + data = Index(data, dtype=object) + index = makeStringIndex(_N) + return Series(data, index=index, name=name) + + +def getSeriesData() -> dict[str, Series]: + index = makeStringIndex(_N) + return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)} + + +def makeTimeSeries(nper=None, freq="B", name=None) -> Series: + if nper is None: + nper = _N + return Series( + np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name + ) + + +def makePeriodSeries(nper=None, name=None) -> Series: + if nper is None: + nper = _N + return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name) + + +def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]: + return {c: makeTimeSeries(nper, freq) for c in getCols(_K)} + + +def getPeriodData(nper=None) -> dict[str, Series]: + return {c: makePeriodSeries(nper) for c in getCols(_K)} + + +# make frame +def makeTimeDataFrame(nper=None, freq="B") -> DataFrame: + data = getTimeSeriesData(nper, freq) + return DataFrame(data) + + +def makeDataFrame() -> DataFrame: + data = getSeriesData() + return DataFrame(data) + + +def getMixedTypeDict(): + index = Index(["a", "b", "c", "d", "e"]) + + data = { + "A": [0.0, 1.0, 2.0, 3.0, 4.0], + "B": [0.0, 1.0, 0.0, 1.0, 0.0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": bdate_range("1/1/2009", periods=5), + } + + return index, data + + +def makeMixedDataFrame() -> DataFrame: + return DataFrame(getMixedTypeDict()[1]) + + +def makePeriodFrame(nper=None) -> DataFrame: + data = getPeriodData(nper) + return DataFrame(data) + + +def makeCustomIndex( + nentries, + nlevels, + prefix="#", + names: bool | str | list[str] | None = False, + ndupe_l=None, + idx_type=None, +) -> Index: + """ + Create an index/multindex with given dimensions, levels, names, etc' + + nentries - number of entries in index + nlevels - number of levels (> 1 produces multindex) + prefix - a string prefix for labels + names - (Optional), bool or list of strings. if True will use default + names, if false will use no names, if a list is given, the name of + each level in the index will be taken from the list. + ndupe_l - (Optional), list of ints, the number of rows for which the + label will repeated at the corresponding level, you can specify just + the first few, the rest will use the default ndupe_l of 1. + len(ndupe_l) <= nlevels. + idx_type - "i"/"f"/"s"/"dt"/"p"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s" creates a string + "dt" create a datetime index. + "td" create a datetime index. + + if unspecified, string labels will be generated. + """ + if ndupe_l is None: + ndupe_l = [1] * nlevels + assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels + assert names is None or names is False or names is True or len(names) is nlevels + assert idx_type is None or ( + idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 + ) + + if names is True: + # build default names + names = [prefix + str(i) for i in range(nlevels)] + if names is False: + # pass None to index constructor for no name + names = None + + # make singleton case uniform + if isinstance(names, str) and nlevels == 1: + names = [names] + + # specific 1D index type requested? + idx_func_dict: dict[str, Callable[..., Index]] = { + "i": makeIntIndex, + "f": makeFloatIndex, + "s": makeStringIndex, + "dt": makeDateIndex, + "td": makeTimedeltaIndex, + "p": makePeriodIndex, + } + idx_func = idx_func_dict.get(idx_type) + if idx_func: + idx = idx_func(nentries) + # but we need to fill in the name + if names: + idx.name = names[0] + return idx + elif idx_type is not None: + raise ValueError( + f"{repr(idx_type)} is not a legal value for `idx_type`, " + "use 'i'/'f'/'s'/'dt'/'p'/'td'." + ) + + if len(ndupe_l) < nlevels: + ndupe_l.extend([1] * (nlevels - len(ndupe_l))) + assert len(ndupe_l) == nlevels + + assert all(x > 0 for x in ndupe_l) + + list_of_lists = [] + for i in range(nlevels): + + def keyfunc(x): + import re + + numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") + return [int(num) for num in numeric_tuple] + + # build a list of lists to create the index from + div_factor = nentries // ndupe_l[i] + 1 + + # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585 + # and Generic Alias Type. + cnt: Counter[str] = collections.Counter() + for j in range(div_factor): + label = f"{prefix}_l{i}_g{j}" + cnt[label] = ndupe_l[i] + # cute Counter trick + result = sorted(cnt.elements(), key=keyfunc)[:nentries] + list_of_lists.append(result) + + tuples = list(zip(*list_of_lists)) + + # convert tuples to index + if nentries == 1: + # we have a single level of tuples, i.e. a regular Index + name = None if names is None else names[0] + index = Index(tuples[0], name=name) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) + else: + index = MultiIndex.from_tuples(tuples, names=names) + return index + + +def makeCustomDataframe( + nrows, + ncols, + c_idx_names=True, + r_idx_names=True, + c_idx_nlevels=1, + r_idx_nlevels=1, + data_gen_f=None, + c_ndupe_l=None, + r_ndupe_l=None, + dtype=None, + c_idx_type=None, + r_idx_type=None, +) -> DataFrame: + """ + Create a DataFrame using supplied parameters. + + Parameters + ---------- + nrows, ncols - number of data rows/cols + c_idx_names, idx_names - False/True/list of strings, yields No names , + default names or uses the provided names for the levels of the + corresponding index. You can provide a single string when + c_idx_nlevels ==1. + c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex + r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex + data_gen_f - a function f(row,col) which return the data value + at that position, the default generator used yields values of the form + "RxCy" based on position. + c_ndupe_l, r_ndupe_l - list of integers, determines the number + of duplicates for each label at a given level of the corresponding + index. The default `None` value produces a multiplicity of 1 across + all levels, i.e. a unique index. Will accept a partial list of length + N < idx_nlevels, for just the first N levels. If ndupe doesn't divide + nrows/ncol, the last label might have lower multiplicity. + dtype - passed to the DataFrame constructor as is, in case you wish to + have more control in conjunction with a custom `data_gen_f` + r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s" creates a string index + "dt" create a datetime index. + "td" create a timedelta index. + + if unspecified, string labels will be generated. + + Examples + -------- + # 5 row, 3 columns, default names on both, single index on both axis + >> makeCustomDataframe(5,3) + + # make the data a random int between 1 and 100 + >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) + + # 2-level multiindex on rows with each label duplicated + # twice on first level, default names on both axis, single + # index on both axis + >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) + + # DatetimeIndex on row, index with unicode labels on columns + # no names on either axis + >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, + r_idx_type="dt",c_idx_type="u") + + # 4-level multindex on rows with names provided, 2-level multindex + # on columns with default labels and default names. + >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, + r_idx_names=["FEE","FIH","FOH","FUM"], + c_idx_nlevels=2) + + >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) + """ + assert c_idx_nlevels > 0 + assert r_idx_nlevels > 0 + assert r_idx_type is None or ( + r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1 + ) + assert c_idx_type is None or ( + c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1 + ) + + columns = makeCustomIndex( + ncols, + nlevels=c_idx_nlevels, + prefix="C", + names=c_idx_names, + ndupe_l=c_ndupe_l, + idx_type=c_idx_type, + ) + index = makeCustomIndex( + nrows, + nlevels=r_idx_nlevels, + prefix="R", + names=r_idx_names, + ndupe_l=r_ndupe_l, + idx_type=r_idx_type, + ) + + # by default, generate data based on location + if data_gen_f is None: + data_gen_f = lambda r, c: f"R{r}C{c}" + + data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] + + return DataFrame(data, index, columns, dtype=dtype) + + +def _create_missing_idx(nrows, ncols, density, random_state=None): + if random_state is None: + random_state = np.random + else: + random_state = np.random.RandomState(random_state) + + # below is cribbed from scipy.sparse + size = round((1 - density) * nrows * ncols) + # generate a few more to ensure unique values + min_rows = 5 + fac = 1.02 + extra_size = min(size + min_rows, fac * size) + + def _gen_unique_rand(rng, _extra_size): + ind = rng.rand(int(_extra_size)) + return np.unique(np.floor(ind * nrows * ncols))[:size] + + ind = _gen_unique_rand(random_state, extra_size) + while ind.size < size: + extra_size *= 1.05 + ind = _gen_unique_rand(random_state, extra_size) + + j = np.floor(ind * 1.0 / nrows).astype(int) + i = (ind - j * nrows).astype(int) + return i.tolist(), j.tolist() + + +def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame: + df = makeDataFrame() + i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) + df.values[i, j] = np.nan + return df + + +class SubclassedSeries(Series): + _metadata = ["testattr", "name"] + + @property + def _constructor(self): + # For testing, those properties return a generic callable, and not + # the actual class. In this case that is equivalent, but it is to + # ensure we don't rely on the property returning a class + # See https://github.com/pandas-dev/pandas/pull/46018 and + # https://github.com/pandas-dev/pandas/issues/32638 and linked issues + return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) + + @property + def _constructor_expanddim(self): + return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) + + +class SubclassedDataFrame(DataFrame): + _metadata = ["testattr"] + + @property + def _constructor(self): + return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) + + @property + def _constructor_sliced(self): + return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) + + +class SubclassedCategorical(Categorical): + @property + def _constructor(self): + return SubclassedCategorical + + +def _make_skipna_wrapper(alternative, skipna_alternative=None): + """ + Create a function for calling on an array. + + Parameters + ---------- + alternative : function + The function to be called on the array with no NaNs. + Only used when 'skipna_alternative' is None. + skipna_alternative : function + The function to be called on the original array + + Returns + ------- + function + """ + if skipna_alternative: + + def skipna_wrapper(x): + return skipna_alternative(x.values) + + else: + + def skipna_wrapper(x): + nona = x.dropna() + if len(nona) == 0: + return np.nan + return alternative(nona) + + return skipna_wrapper + + +def convert_rows_list_to_csv_str(rows_list: list[str]) -> str: + """ + Convert list of CSV rows to single CSV-formatted string for current OS. + + This method is used for creating expected value of to_csv() method. + + Parameters + ---------- + rows_list : List[str] + Each element represents the row of csv. + + Returns + ------- + str + Expected output of to_csv() in current OS. + """ + sep = os.linesep + return sep.join(rows_list) + sep + + +def external_error_raised(expected_exception: type[Exception]) -> ContextManager: + """ + Helper function to mark pytest.raises that have an external error message. + + Parameters + ---------- + expected_exception : Exception + Expected error to raise. + + Returns + ------- + Callable + Regular `pytest.raises` function with `match` equal to `None`. + """ + import pytest + + return pytest.raises(expected_exception, match=None) # noqa: PDF010 + + +cython_table = pd.core.common._cython_table.items() + + +def get_cython_table_params(ndframe, func_names_and_expected): + """ + Combine frame, functions from com._cython_table + keys and expected result. + + Parameters + ---------- + ndframe : DataFrame or Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value. + + Returns + ------- + list + List of three items (DataFrame, function, expected result) + """ + results = [] + for func_name, expected in func_names_and_expected: + results.append((ndframe, func_name, expected)) + results += [ + (ndframe, func, expected) + for func, name in cython_table + if name == func_name + ] + return results + + +def get_op_from_name(op_name: str) -> Callable: + """ + The operator function for a given op name. + + Parameters + ---------- + op_name : str + The op name, in form of "add" or "__add__". + + Returns + ------- + function + A function performing the operation. + """ + short_opname = op_name.strip("_") + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + +# ----------------------------------------------------------------------------- +# Indexing test helpers + + +def getitem(x): + return x + + +def setitem(x): + return x + + +def loc(x): + return x.loc + + +def iloc(x): + return x.iloc + + +def at(x): + return x.at + + +def iat(x): + return x.iat + + +# ----------------------------------------------------------------------------- + + +def shares_memory(left, right) -> bool: + """ + Pandas-compat for np.shares_memory. + """ + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + return np.shares_memory(left, right) + elif isinstance(left, np.ndarray): + # Call with reversed args to get to unpacking logic below. + return shares_memory(right, left) + + if isinstance(left, RangeIndex): + return False + if isinstance(left, MultiIndex): + return shares_memory(left._codes, right) + if isinstance(left, (Index, Series)): + return shares_memory(left._values, right) + + if isinstance(left, NDArrayBackedExtensionArray): + return shares_memory(left._ndarray, right) + if isinstance(left, pd.core.arrays.SparseArray): + return shares_memory(left.sp_values, right) + if isinstance(left, pd.core.arrays.IntervalArray): + return shares_memory(left._left, right) or shares_memory(left._right, right) + + if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": + # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 + if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": + # error: "ExtensionArray" has no attribute "_data" + left_pa_data = left._data # type: ignore[attr-defined] + # error: "ExtensionArray" has no attribute "_data" + right_pa_data = right._data # type: ignore[attr-defined] + left_buf1 = left_pa_data.chunk(0).buffers()[1] + right_buf1 = right_pa_data.chunk(0).buffers()[1] + return left_buf1 == right_buf1 + + if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray): + # By convention, we'll say these share memory if they share *either* + # the _data or the _mask + return np.shares_memory(left._data, right._data) or np.shares_memory( + left._mask, right._mask + ) + + if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1: + arr = left._mgr.arrays[0] + return shares_memory(arr, right) + + raise NotImplementedError(type(left), type(right)) + + +__all__ = [ + "ALL_INT_EA_DTYPES", + "ALL_INT_NUMPY_DTYPES", + "ALL_NUMPY_DTYPES", + "ALL_REAL_NUMPY_DTYPES", + "all_timeseries_index_generator", + "assert_almost_equal", + "assert_attr_equal", + "assert_categorical_equal", + "assert_class_equal", + "assert_contains_all", + "assert_copy", + "assert_datetime_array_equal", + "assert_dict_equal", + "assert_equal", + "assert_extension_array_equal", + "assert_frame_equal", + "assert_index_equal", + "assert_indexing_slices_equivalent", + "assert_interval_array_equal", + "assert_is_sorted", + "assert_is_valid_plot_return_object", + "assert_metadata_equivalent", + "assert_numpy_array_equal", + "assert_period_array_equal", + "assert_produces_warning", + "assert_series_equal", + "assert_sp_array_equal", + "assert_timedelta_array_equal", + "at", + "BOOL_DTYPES", + "box_expected", + "BYTES_DTYPES", + "can_set_locale", + "close", + "COMPLEX_DTYPES", + "convert_rows_list_to_csv_str", + "DATETIME64_DTYPES", + "decompress_file", + "EMPTY_STRING_PATTERN", + "ENDIAN", + "ensure_clean", + "ensure_clean_dir", + "ensure_safe_environment_variables", + "equalContents", + "external_error_raised", + "FLOAT_EA_DTYPES", + "FLOAT_NUMPY_DTYPES", + "getCols", + "get_cython_table_params", + "get_dtype", + "getitem", + "get_locales", + "getMixedTypeDict", + "get_obj", + "get_op_from_name", + "getPeriodData", + "getSeriesData", + "getTimeSeriesData", + "iat", + "iloc", + "index_subclass_makers_generator", + "loc", + "makeBoolIndex", + "makeCategoricalIndex", + "makeCustomDataframe", + "makeCustomIndex", + "makeDataFrame", + "makeDateIndex", + "makeFloatIndex", + "makeFloatSeries", + "makeIntervalIndex", + "makeIntIndex", + "makeMissingDataframe", + "makeMixedDataFrame", + "makeMultiIndex", + "makeNumericIndex", + "makeObjectSeries", + "makePeriodFrame", + "makePeriodIndex", + "makePeriodSeries", + "make_rand_series", + "makeRangeIndex", + "makeStringIndex", + "makeStringSeries", + "makeTimeDataFrame", + "makeTimedeltaIndex", + "makeTimeSeries", + "makeUIntIndex", + "maybe_produces_warning", + "NARROW_NP_DTYPES", + "network", + "NP_NAT_OBJECTS", + "NULL_OBJECTS", + "OBJECT_DTYPES", + "raise_assert_detail", + "randbool", + "rands", + "reset_display_options", + "reset_testing_mode", + "RNGContext", + "round_trip_localpath", + "round_trip_pathlib", + "round_trip_pickle", + "setitem", + "set_locale", + "set_testing_mode", + "set_timezone", + "shares_memory", + "SIGNED_INT_EA_DTYPES", + "SIGNED_INT_NUMPY_DTYPES", + "STRING_DTYPES", + "SubclassedCategorical", + "SubclassedDataFrame", + "SubclassedSeries", + "TIMEDELTA64_DTYPES", + "to_array", + "UNSIGNED_INT_EA_DTYPES", + "UNSIGNED_INT_NUMPY_DTYPES", + "use_numexpr", + "with_csv_dialect", + "write_to_compressed", +] diff --git a/pandas/_testing/_hypothesis.py b/pandas/_testing/_hypothesis.py new file mode 100644 index 00000000..5256a303 --- /dev/null +++ b/pandas/_testing/_hypothesis.py @@ -0,0 +1,89 @@ +""" +Hypothesis data generator helpers. +""" +from datetime import datetime + +from hypothesis import strategies as st +from hypothesis.extra.dateutil import timezones as dateutil_timezones +from hypothesis.extra.pytz import timezones as pytz_timezones + +from pandas.compat import is_platform_windows + +import pandas as pd + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + YearBegin, + YearEnd, +) + +OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3) + +OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3) + +OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3) + +OPTIONAL_DICTS = st.lists( + st.one_of(st.none(), st.dictionaries(st.text(), st.integers())), + max_size=10, + min_size=3, +) + +OPTIONAL_LISTS = st.lists( + st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)), + max_size=10, + min_size=3, +) + +OPTIONAL_ONE_OF_ALL = st.one_of( + OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT +) + +if is_platform_windows(): + DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1)) +else: + DATETIME_NO_TZ = st.datetimes() + +DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes( + min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), + max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), + timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()), +) + +DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes( + min_value=pd.Timestamp.min.to_pydatetime(warn=False), + max_value=pd.Timestamp.max.to_pydatetime(warn=False), +) + +INT_NEG_999_TO_POS_999 = st.integers(-999, 999) + +# The strategy for each type is registered in conftest.py, as they don't carry +# enough runtime information (e.g. type hints) to infer how to build them. +YQM_OFFSET = st.one_of( + *map( + st.from_type, + [ + MonthBegin, + MonthEnd, + BMonthBegin, + BMonthEnd, + QuarterBegin, + QuarterEnd, + BQuarterBegin, + BQuarterEnd, + YearBegin, + YearEnd, + BYearBegin, + BYearEnd, + ], + ) +) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py new file mode 100644 index 00000000..d1acdff8 --- /dev/null +++ b/pandas/_testing/_io.py @@ -0,0 +1,437 @@ +from __future__ import annotations + +import bz2 +from functools import wraps +import gzip +import io +import socket +import tarfile +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) +import zipfile + +from pandas._typing import ( + FilePath, + ReadPickleBuffer, +) +from pandas.compat import get_lzma_file +from pandas.compat._optional import import_optional_dependency + +import pandas as pd +from pandas._testing._random import rands +from pandas._testing.contexts import ensure_clean + +from pandas.io.common import urlopen + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +# skip tests on exceptions with these messages +_network_error_messages = ( + # 'urlopen error timed out', + # 'timeout: timed out', + # 'socket.timeout: timed out', + "timed out", + "Server Hangup", + "HTTP Error 503: Service Unavailable", + "502: Proxy Error", + "HTTP Error 502: internal error", + "HTTP Error 502", + "HTTP Error 503", + "HTTP Error 403", + "HTTP Error 400", + "Temporary failure in name resolution", + "Name or service not known", + "Connection refused", + "certificate verify", +) + +# or this e.errno/e.reason.errno +_network_errno_vals = ( + 101, # Network is unreachable + 111, # Connection refused + 110, # Connection timed out + 104, # Connection reset Error + 54, # Connection reset by peer + 60, # urllib.error.URLError: [Errno 60] Connection timed out +) + +# Both of the above shouldn't mask real issues such as 404's +# or refused connections (changed DNS). +# But some tests (test_data yahoo) contact incredibly flakey +# servers. + +# and conditionally raise on exception types in _get_default_network_errors + + +def _get_default_network_errors(): + # Lazy import for http.client & urllib.error + # because it imports many things from the stdlib + import http.client + import urllib.error + + return ( + OSError, + http.client.HTTPException, + TimeoutError, + urllib.error.URLError, + socket.timeout, + ) + + +def optional_args(decorator): + """ + allows a decorator to take optional positional and keyword arguments. + Assumes that taking a single, callable, positional argument means that + it is decorating a function, i.e. something like this:: + + @my_decorator + def function(): pass + + Calls decorator with decorator(f, *args, **kwargs) + """ + + @wraps(decorator) + def wrapper(*args, **kwargs): + def dec(f): + return decorator(f, *args, **kwargs) + + is_decorating = not kwargs and len(args) == 1 and callable(args[0]) + if is_decorating: + f = args[0] + args = () + return dec(f) + else: + return dec + + return wrapper + + +@optional_args +def network( + t, + url="https://www.google.com", + raise_on_error=False, + check_before_test=False, + error_classes=None, + skip_errnos=_network_errno_vals, + _skip_on_messages=_network_error_messages, +): + """ + Label a test as requiring network connection and, if an error is + encountered, only raise if it does not find a network connection. + + In comparison to ``network``, this assumes an added contract to your test: + you must assert that, under normal conditions, your test will ONLY fail if + it does not have network connectivity. + + You can call this in 3 ways: as a standard decorator, with keyword + arguments, or with a positional argument that is the url to check. + + Parameters + ---------- + t : callable + The test requiring network connectivity. + url : path + The url to test via ``pandas.io.common.urlopen`` to check + for connectivity. Defaults to 'https://www.google.com'. + raise_on_error : bool + If True, never catches errors. + check_before_test : bool + If True, checks connectivity before running the test case. + error_classes : tuple or Exception + error classes to ignore. If not in ``error_classes``, raises the error. + defaults to OSError. Be careful about changing the error classes here. + skip_errnos : iterable of int + Any exception that has .errno or .reason.erno set to one + of these values will be skipped with an appropriate + message. + _skip_on_messages: iterable of string + any exception e for which one of the strings is + a substring of str(e) will be skipped with an appropriate + message. Intended to suppress errors where an errno isn't available. + + Notes + ----- + * ``raise_on_error`` supersedes ``check_before_test`` + + Returns + ------- + t : callable + The decorated test ``t``, with checks for connectivity errors. + + Example + ------- + + Tests decorated with @network will fail if it's possible to make a network + connection to another URL (defaults to google.com):: + + >>> from pandas import _testing as tm + >>> @tm.network + ... def test_network(): + ... with pd.io.common.urlopen("rabbit://bonanza.com"): + ... pass + >>> test_network() # doctest: +SKIP + Traceback + ... + URLError: + + You can specify alternative URLs:: + + >>> @tm.network("https://www.yahoo.com") + ... def test_something_with_yahoo(): + ... raise OSError("Failure Message") + >>> test_something_with_yahoo() # doctest: +SKIP + Traceback (most recent call last): + ... + OSError: Failure Message + + If you set check_before_test, it will check the url first and not run the + test on failure:: + + >>> @tm.network("failing://url.blaher", check_before_test=True) + ... def test_something(): + ... print("I ran!") + ... raise ValueError("Failure") + >>> test_something() # doctest: +SKIP + Traceback (most recent call last): + ... + + Errors not related to networking will always be raised. + """ + import pytest + + if error_classes is None: + error_classes = _get_default_network_errors() + + t.network = True + + @wraps(t) + def wrapper(*args, **kwargs): + if ( + check_before_test + and not raise_on_error + and not can_connect(url, error_classes) + ): + pytest.skip( + f"May not have network connectivity because cannot connect to {url}" + ) + try: + return t(*args, **kwargs) + except Exception as err: + errno = getattr(err, "errno", None) + if not errno and hasattr(errno, "reason"): + # error: "Exception" has no attribute "reason" + errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined] + + if errno in skip_errnos: + pytest.skip(f"Skipping test due to known errno and error {err}") + + e_str = str(err) + + if any(m.lower() in e_str.lower() for m in _skip_on_messages): + pytest.skip( + f"Skipping test because exception message is known and error {err}" + ) + + if not isinstance(err, error_classes) or raise_on_error: + raise + else: + pytest.skip( + f"Skipping test due to lack of connectivity and error {err}" + ) + + return wrapper + + +def can_connect(url, error_classes=None) -> bool: + """ + Try to connect to the given url. True if succeeds, False if OSError + raised + + Parameters + ---------- + url : basestring + The URL to try to connect to + + Returns + ------- + connectable : bool + Return True if no OSError (unable to connect) or URLError (bad url) was + raised + """ + if error_classes is None: + error_classes = _get_default_network_errors() + + try: + with urlopen(url, timeout=20) as response: + # Timeout just in case rate-limiting is applied + if response.status != 200: + return False + except error_classes: + return False + else: + return True + + +# ------------------------------------------------------------------ +# File-IO + + +def round_trip_pickle( + obj: Any, path: FilePath | ReadPickleBuffer | None = None +) -> DataFrame | Series: + """ + Pickle an object and then read it again. + + Parameters + ---------- + obj : any object + The object to pickle and then re-read. + path : str, path object or file-like object, default None + The path where the pickled object is written and then read. + + Returns + ------- + pandas object + The original object that was pickled and then re-read. + """ + _path = path + if _path is None: + _path = f"__{rands(10)}__.pickle" + with ensure_clean(_path) as temp_path: + pd.to_pickle(obj, temp_path) + return pd.read_pickle(temp_path) + + +def round_trip_pathlib(writer, reader, path: str | None = None): + """ + Write an object to file specified by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + Path = pytest.importorskip("pathlib").Path + if path is None: + path = "___pathlib___" + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path: str | None = None): + """ + Write an object to file specified by a py.path LocalPath and read it back. + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + LocalPath = pytest.importorskip("py.path").local + if path is None: + path = "___localpath___" + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj + + +def write_to_compressed(compression, path, data, dest="test"): + """ + Write data to a compressed file. + + Parameters + ---------- + compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'} + The compression type to use. + path : str + The file path to write the data. + data : str + The data to write. + dest : str, default "test" + The destination file (for ZIP only) + + Raises + ------ + ValueError : An invalid compression value was passed in. + """ + args: tuple[Any, ...] = (data,) + mode = "wb" + method = "write" + compress_method: Callable + + if compression == "zip": + compress_method = zipfile.ZipFile + mode = "w" + args = (dest, data) + method = "writestr" + elif compression == "tar": + compress_method = tarfile.TarFile + mode = "w" + file = tarfile.TarInfo(name=dest) + bytes = io.BytesIO(data) + file.size = len(data) + args = (file, bytes) + method = "addfile" + elif compression == "gzip": + compress_method = gzip.GzipFile + elif compression == "bz2": + compress_method = bz2.BZ2File + elif compression == "zstd": + compress_method = import_optional_dependency("zstandard").open + elif compression == "xz": + compress_method = get_lzma_file() + else: + raise ValueError(f"Unrecognized compression type: {compression}") + + with compress_method(path, mode=mode) as f: + getattr(f, method)(*args) + + +# ------------------------------------------------------------------ +# Plotting + + +def close(fignum=None) -> None: + from matplotlib.pyplot import ( + close as _close, + get_fignums, + ) + + if fignum is None: + for fignum in get_fignums(): + _close(fignum) + else: + _close(fignum) diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py new file mode 100644 index 00000000..880fffea --- /dev/null +++ b/pandas/_testing/_random.py @@ -0,0 +1,36 @@ +import string + +import numpy as np + + +def randbool(size=(), p: float = 0.5): + return np.random.rand(*size) <= p + + +RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1)) +RANDU_CHARS = np.array( + list("".join(map(chr, range(1488, 1488 + 26))) + string.digits), + dtype=(np.unicode_, 1), +) + + +def rands_array(nchars, size, dtype="O", replace=True) -> np.ndarray: + """ + Generate an array of byte strings. + """ + retval = ( + np.random.choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace) + .view((np.str_, nchars)) + .reshape(size) + ) + return retval.astype(dtype) + + +def rands(nchars) -> str: + """ + Generate one random byte string. + + See `rands_array` if you want to create an array of random strings. + + """ + return "".join(np.random.choice(RANDS_CHARS, nchars)) diff --git a/pandas/_testing/_warnings.py b/pandas/_testing/_warnings.py new file mode 100644 index 00000000..a5b0d1e1 --- /dev/null +++ b/pandas/_testing/_warnings.py @@ -0,0 +1,220 @@ +from __future__ import annotations + +from contextlib import ( + contextmanager, + nullcontext, +) +import re +import sys +from typing import ( + Literal, + Sequence, + Type, + cast, +) +import warnings + + +@contextmanager +def assert_produces_warning( + expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None = Warning, + filter_level: Literal[ + "error", "ignore", "always", "default", "module", "once" + ] = "always", + check_stacklevel: bool = True, + raise_on_extra_warnings: bool = True, + match: str | None = None, +): + """ + Context manager for running code expected to either raise a specific warning, + multiple specific warnings, or not raise any warnings. Verifies that the code + raises the expected warning(s), and that it does not raise any other unexpected + warnings. It is basically a wrapper around ``warnings.catch_warnings``. + + Parameters + ---------- + expected_warning : {Warning, False, tuple[Warning, ...], None}, default Warning + The type of Exception raised. ``exception.Warning`` is the base + class for all warnings. To raise multiple types of exceptions, + pass them as a tuple. To check that no warning is returned, + specify ``False`` or ``None``. + filter_level : str or None, default "always" + Specifies whether warnings are ignored, displayed, or turned + into errors. + Valid values are: + + * "error" - turns matching warnings into exceptions + * "ignore" - discard the warning + * "always" - always emit a warning + * "default" - print the warning the first time it is generated + from each location + * "module" - print the warning the first time it is generated + from each module + * "once" - print the warning the first time it is generated + + check_stacklevel : bool, default True + If True, displays the line that called the function containing + the warning to show were the function is called. Otherwise, the + line that implements the function is displayed. + raise_on_extra_warnings : bool, default True + Whether extra warnings not of the type `expected_warning` should + cause the test to fail. + match : str, optional + Match warning message. + + Examples + -------- + >>> import warnings + >>> with assert_produces_warning(): + ... warnings.warn(UserWarning()) + ... + >>> with assert_produces_warning(False): + ... warnings.warn(RuntimeWarning()) + ... + Traceback (most recent call last): + ... + AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. + >>> with assert_produces_warning(UserWarning): + ... warnings.warn(RuntimeWarning()) + Traceback (most recent call last): + ... + AssertionError: Did not see expected warning of class 'UserWarning'. + + ..warn:: This is *not* thread-safe. + """ + __tracebackhide__ = True + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter(filter_level) + yield w + + if expected_warning: + expected_warning = cast(Type[Warning], expected_warning) + _assert_caught_expected_warning( + caught_warnings=w, + expected_warning=expected_warning, + match=match, + check_stacklevel=check_stacklevel, + ) + + if raise_on_extra_warnings: + _assert_caught_no_extra_warnings( + caught_warnings=w, + expected_warning=expected_warning, + ) + + +def maybe_produces_warning(warning: type[Warning], condition: bool, **kwargs): + """ + Return a context manager that possibly checks a warning based on the condition + """ + if condition: + return assert_produces_warning(warning, **kwargs) + else: + return nullcontext() + + +def _assert_caught_expected_warning( + *, + caught_warnings: Sequence[warnings.WarningMessage], + expected_warning: type[Warning], + match: str | None, + check_stacklevel: bool, +) -> None: + """Assert that there was the expected warning among the caught warnings.""" + saw_warning = False + matched_message = False + unmatched_messages = [] + + for actual_warning in caught_warnings: + if issubclass(actual_warning.category, expected_warning): + saw_warning = True + + if check_stacklevel: + _assert_raised_with_correct_stacklevel(actual_warning) + + if match is not None: + if re.search(match, str(actual_warning.message)): + matched_message = True + else: + unmatched_messages.append(actual_warning.message) + + if not saw_warning: + raise AssertionError( + f"Did not see expected warning of class " + f"{repr(expected_warning.__name__)}" + ) + + if match and not matched_message: + raise AssertionError( + f"Did not see warning {repr(expected_warning.__name__)} " + f"matching '{match}'. The emitted warning messages are " + f"{unmatched_messages}" + ) + + +def _assert_caught_no_extra_warnings( + *, + caught_warnings: Sequence[warnings.WarningMessage], + expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None, +) -> None: + """Assert that no extra warnings apart from the expected ones are caught.""" + extra_warnings = [] + + for actual_warning in caught_warnings: + if _is_unexpected_warning(actual_warning, expected_warning): + # GH#38630 pytest.filterwarnings does not suppress these. + if actual_warning.category == ResourceWarning: + # GH 44732: Don't make the CI flaky by filtering SSL-related + # ResourceWarning from dependencies + unclosed_ssl = ( + "unclosed transport bool: + """Check if the actual warning issued is unexpected.""" + if actual_warning and not expected_warning: + return True + expected_warning = cast(Type[Warning], expected_warning) + return bool(not issubclass(actual_warning.category, expected_warning)) + + +def _assert_raised_with_correct_stacklevel( + actual_warning: warnings.WarningMessage, +) -> None: + from inspect import ( + getframeinfo, + stack, + ) + + caller = getframeinfo(stack()[4][0]) + msg = ( + "Warning not set with correct stacklevel. " + f"File where warning is raised: {actual_warning.filename} != " + f"{caller.filename}. Warning message: {actual_warning.message}" + ) + assert actual_warning.filename == caller.filename, msg diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py new file mode 100644 index 00000000..6c3b8960 --- /dev/null +++ b/pandas/_testing/asserters.py @@ -0,0 +1,1495 @@ +from __future__ import annotations + +from typing import ( + Literal, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.lib import ( + NoDefault, + no_default, +) +from pandas._libs.missing import is_matching_na +from pandas._libs.sparse import SparseIndex +import pandas._libs.testing as _testing +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_categorical_dtype, + is_extension_array_dtype, + is_interval_dtype, + is_number, + is_numeric_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + PandasDtype, +) +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +from pandas.core.algorithms import take_nd +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +from pandas.core.arrays.string_ import StringDtype +from pandas.core.indexes.api import safe_sort_index + +from pandas.io.formats.printing import pprint_thing + + +def assert_almost_equal( + left, + right, + check_dtype: bool | Literal["equiv"] = "equiv", + check_less_precise: bool | int | NoDefault = no_default, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + **kwargs, +) -> None: + """ + Check that the left and right objects are approximately equal. + + By approximately equal, we refer to objects that are numbers or that + contain numbers which may be equivalent to specific levels of precision. + + Parameters + ---------- + left : object + right : object + check_dtype : bool or {'equiv'}, default 'equiv' + Check dtype if both a and b are the same type. If 'equiv' is passed in, + then `RangeIndex` and `Int64Index` are also considered equivalent + when doing type checking. + check_less_precise : bool or int, default False + Specify comparison precision. 5 digits (False) or 3 digits (True) + after decimal points are compared. If int, then specify the number + of digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + rtol : float, default 1e-5 + Relative tolerance. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. + + .. versionadded:: 1.1.0 + """ + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + if isinstance(left, Index): + assert_index_equal( + left, + right, + check_exact=False, + exact=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, Series): + assert_series_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, DataFrame): + assert_frame_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + **kwargs, + ) + + else: + # Other sequences. + if check_dtype: + if is_number(left) and is_number(right): + # Do not compare numeric classes, like np.float64 and float. + pass + elif is_bool(left) and is_bool(right): + # Do not compare bool classes, like np.bool_ and bool. + pass + else: + if isinstance(left, np.ndarray) or isinstance(right, np.ndarray): + obj = "numpy array" + else: + obj = "Input" + assert_class_equal(left, right, obj=obj) + + # if we have "equiv", this becomes True + _testing.assert_almost_equal( + left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs + ) + + +def _get_tol_from_less_precise(check_less_precise: bool | int) -> float: + """ + Return the tolerance equivalent to the deprecated `check_less_precise` + parameter. + + Parameters + ---------- + check_less_precise : bool or int + + Returns + ------- + float + Tolerance to be used as relative/absolute tolerance. + + Examples + -------- + >>> # Using check_less_precise as a bool: + >>> _get_tol_from_less_precise(False) + 5e-06 + >>> _get_tol_from_less_precise(True) + 0.0005 + >>> # Using check_less_precise as an int representing the decimal + >>> # tolerance intended: + >>> _get_tol_from_less_precise(2) + 0.005 + >>> _get_tol_from_less_precise(8) + 5e-09 + """ + if isinstance(check_less_precise, bool): + if check_less_precise: + # 3-digit tolerance + return 0.5e-3 + else: + # 5-digit tolerance + return 0.5e-5 + else: + # Equivalent to setting checking_less_precise= + return 0.5 * 10**-check_less_precise + + +def _check_isinstance(left, right, cls): + """ + Helper method for our assert_* methods that ensures that + the two objects being compared have the right type before + proceeding with the comparison. + + Parameters + ---------- + left : The first object being compared. + right : The second object being compared. + cls : The class type to check against. + + Raises + ------ + AssertionError : Either `left` or `right` is not an instance of `cls`. + """ + cls_name = cls.__name__ + + if not isinstance(left, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(left)} instead" + ) + if not isinstance(right, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(right)} instead" + ) + + +def assert_dict_equal(left, right, compare_keys: bool = True) -> None: + + _check_isinstance(left, right, dict) + _testing.assert_dict_equal(left, right, compare_keys=compare_keys) + + +def assert_index_equal( + left: Index, + right: Index, + exact: bool | str = "equiv", + check_names: bool = True, + check_less_precise: bool | int | NoDefault = no_default, + check_exact: bool = True, + check_categorical: bool = True, + check_order: bool = True, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + obj: str = "Index", +) -> None: + """ + Check that left and right Index are equal. + + Parameters + ---------- + left : Index + right : Index + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + check_names : bool, default True + Whether to check the names attribute. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_exact : bool, default True + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_order : bool, default True + Whether to compare the order of index entries as well as their values. + If True, both indexes must contain the same elements, in the same order. + If False, both indexes must contain the same elements, but in any order. + + .. versionadded:: 1.2.0 + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'Index' + Specify object name being compared, internally used to show appropriate + assertion message. + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Index([1, 2, 3]) + >>> b = pd.Index([1, 2, 3]) + >>> tm.assert_index_equal(a, b) + """ + __tracebackhide__ = True + + def _check_types(left, right, obj="Index") -> None: + if not exact: + return + + assert_class_equal(left, right, exact=exact, obj=obj) + assert_attr_equal("inferred_type", left, right, obj=obj) + + # Skip exact dtype checking when `check_categorical` is False + if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): + if check_categorical: + assert_attr_equal("dtype", left, right, obj=obj) + assert_index_equal(left.categories, right.categories, exact=exact) + return + + assert_attr_equal("dtype", left, right, obj=obj) + + def _get_ilevel_values(index, level): + # accept level number only + unique = index.levels[level] + level_codes = index.codes[level] + filled = take_nd(unique._values, level_codes, fill_value=unique._na_value) + return unique._shallow_copy(filled, name=index.names[level]) + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + # instance validation + _check_isinstance(left, right, Index) + + # class / dtype comparison + _check_types(left, right, obj=obj) + + # level comparison + if left.nlevels != right.nlevels: + msg1 = f"{obj} levels are different" + msg2 = f"{left.nlevels}, {left}" + msg3 = f"{right.nlevels}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # length comparison + if len(left) != len(right): + msg1 = f"{obj} length are different" + msg2 = f"{len(left)}, {left}" + msg3 = f"{len(right)}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # If order doesn't matter then sort the index entries + if not check_order: + left = safe_sort_index(left) + right = safe_sort_index(right) + + # MultiIndex special comparison for little-friendly error messages + if left.nlevels > 1: + left = cast(MultiIndex, left) + right = cast(MultiIndex, right) + + for level in range(left.nlevels): + # cannot use get_level_values here because it can change dtype + llevel = _get_ilevel_values(left, level) + rlevel = _get_ilevel_values(right, level) + + lobj = f"MultiIndex level [{level}]" + assert_index_equal( + llevel, + rlevel, + exact=exact, + check_names=check_names, + check_exact=check_exact, + rtol=rtol, + atol=atol, + obj=lobj, + ) + # get_level_values may change dtype + _check_types(left.levels[level], right.levels[level], obj=obj) + + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: + if not left.equals(right): + mismatch = left._values != right._values + + if is_extension_array_dtype(mismatch): + mismatch = cast("ExtensionArray", mismatch).fillna(True) + + diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right) + else: + + # if we have "equiv", this becomes True + exact_bool = bool(exact) + _testing.assert_almost_equal( + left.values, + right.values, + rtol=rtol, + atol=atol, + check_dtype=exact_bool, + obj=obj, + lobj=left, + robj=right, + ) + + # metadata comparison + if check_names: + assert_attr_equal("names", left, right, obj=obj) + if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex): + assert_attr_equal("freq", left, right, obj=obj) + if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex): + assert_interval_array_equal(left._values, right._values) + + if check_categorical: + if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype): + assert_categorical_equal(left._values, right._values, obj=f"{obj} category") + + +def assert_class_equal(left, right, exact: bool | str = True, obj="Input") -> None: + """ + Checks classes are equal. + """ + from pandas.core.indexes.numeric import NumericIndex + + __tracebackhide__ = True + + def repr_class(x): + if isinstance(x, Index): + # return Index as it is to include values in the error message + return x + + return type(x).__name__ + + if type(left) == type(right): + return + + if exact == "equiv": + # accept equivalence of NumericIndex (sub-)classes + if isinstance(left, NumericIndex) and isinstance(right, NumericIndex): + return + + msg = f"{obj} classes are different" + raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + + +def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None: + """ + Check attributes are equal. Both objects must have attribute. + + Parameters + ---------- + attr : str + Attribute name being compared. + left : object + right : object + obj : str, default 'Attributes' + Specify object name being compared, internally used to show appropriate + assertion message + """ + __tracebackhide__ = True + + left_attr = getattr(left, attr) + right_attr = getattr(right, attr) + + if left_attr is right_attr or is_matching_na(left_attr, right_attr): + # e.g. both np.nan, both NaT, both pd.NA, ... + return None + + try: + result = left_attr == right_attr + except TypeError: + # datetimetz on rhs may raise TypeError + result = False + if (left_attr is pd.NA) ^ (right_attr is pd.NA): + result = False + elif not isinstance(result, bool): + result = result.all() + + if not result: + msg = f'Attribute "{attr}" are different' + raise_assert_detail(obj, msg, left_attr, right_attr) + return None + + +def assert_is_valid_plot_return_object(objs) -> None: + import matplotlib.pyplot as plt + + if isinstance(objs, (Series, np.ndarray)): + for el in objs.ravel(): + msg = ( + "one of 'objs' is not a matplotlib Axes instance, " + f"type encountered {repr(type(el).__name__)}" + ) + assert isinstance(el, (plt.Axes, dict)), msg + else: + msg = ( + "objs is neither an ndarray of Artist instances nor a single " + "ArtistArtist instance, tuple, or dict, 'objs' is a " + f"{repr(type(objs).__name__)}" + ) + assert isinstance(objs, (plt.Artist, tuple, dict)), msg + + +def assert_is_sorted(seq) -> None: + """Assert that the sequence is sorted.""" + if isinstance(seq, (Index, Series)): + seq = seq.values + # sorting does not change precisions + assert_numpy_array_equal(seq, np.sort(np.array(seq))) + + +def assert_categorical_equal( + left, right, check_dtype=True, check_category_order=True, obj="Categorical" +) -> None: + """ + Test that Categoricals are equivalent. + + Parameters + ---------- + left : Categorical + right : Categorical + check_dtype : bool, default True + Check that integer dtype of the codes are the same. + check_category_order : bool, default True + Whether the order of the categories should be compared, which + implies identical integer codes. If False, only the resulting + values are compared. The ordered attribute is + checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + _check_isinstance(left, right, Categorical) + + exact: bool | str + if isinstance(left.categories, RangeIndex) or isinstance( + right.categories, RangeIndex + ): + exact = "equiv" + else: + # We still want to require exact matches for NumericIndex + exact = True + + if check_category_order: + assert_index_equal( + left.categories, right.categories, obj=f"{obj}.categories", exact=exact + ) + assert_numpy_array_equal( + left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes" + ) + else: + try: + lc = left.categories.sort_values() + rc = right.categories.sort_values() + except TypeError: + # e.g. '<' not supported between instances of 'int' and 'str' + lc, rc = left.categories, right.categories + assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact) + assert_index_equal( + left.categories.take(left.codes), + right.categories.take(right.codes), + obj=f"{obj}.values", + exact=exact, + ) + + assert_attr_equal("ordered", left, right, obj=obj) + + +def assert_interval_array_equal( + left, right, exact="equiv", obj="IntervalArray" +) -> None: + """ + Test that two IntervalArrays are equivalent. + + Parameters + ---------- + left, right : IntervalArray + The IntervalArrays to compare. + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + obj : str, default 'IntervalArray' + Specify object name being compared, internally used to show appropriate + assertion message + """ + _check_isinstance(left, right, IntervalArray) + + kwargs = {} + if left._left.dtype.kind in ["m", "M"]: + # We have a DatetimeArray or TimedeltaArray + kwargs["check_freq"] = False + + assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs) + assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs) + + assert_attr_equal("closed", left, right, obj=obj) + + +def assert_period_array_equal(left, right, obj="PeriodArray") -> None: + _check_isinstance(left, right, PeriodArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_attr_equal("freq", left, right, obj=obj) + + +def assert_datetime_array_equal( + left, right, obj="DatetimeArray", check_freq=True +) -> None: + __tracebackhide__ = True + _check_isinstance(left, right, DatetimeArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + if check_freq: + assert_attr_equal("freq", left, right, obj=obj) + assert_attr_equal("tz", left, right, obj=obj) + + +def assert_timedelta_array_equal( + left, right, obj="TimedeltaArray", check_freq=True +) -> None: + __tracebackhide__ = True + _check_isinstance(left, right, TimedeltaArray) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + if check_freq: + assert_attr_equal("freq", left, right, obj=obj) + + +def raise_assert_detail(obj, message, left, right, diff=None, index_values=None): + __tracebackhide__ = True + + msg = f"""{obj} are different + +{message}""" + + if isinstance(index_values, np.ndarray): + msg += f"\n[index]: {pprint_thing(index_values)}" + + if isinstance(left, np.ndarray): + left = pprint_thing(left) + elif ( + isinstance(left, CategoricalDtype) + or isinstance(left, PandasDtype) + or isinstance(left, StringDtype) + ): + left = repr(left) + + if isinstance(right, np.ndarray): + right = pprint_thing(right) + elif ( + isinstance(right, CategoricalDtype) + or isinstance(right, PandasDtype) + or isinstance(right, StringDtype) + ): + right = repr(right) + + msg += f""" +[left]: {left} +[right]: {right}""" + + if diff is not None: + msg += f"\n[diff]: {diff}" + + raise AssertionError(msg) + + +def assert_numpy_array_equal( + left, + right, + strict_nan=False, + check_dtype: bool | Literal["equiv"] = True, + err_msg=None, + check_same=None, + obj="numpy array", + index_values=None, +) -> None: + """ + Check that 'np.ndarray' is equivalent. + + Parameters + ---------- + left, right : numpy.ndarray or iterable + The two arrays to be compared. + strict_nan : bool, default False + If True, consider NaN and None to be different. + check_dtype : bool, default True + Check dtype if both a and b are np.ndarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'numpy array' + Specify object name being compared, internally used to show appropriate + assertion message. + index_values : numpy.ndarray, default None + optional index (shared by both left and right), used in output. + """ + __tracebackhide__ = True + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an np.ndarray + _check_isinstance(left, right, np.ndarray) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + def _raise(left, right, err_msg): + if err_msg is None: + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shapes are different", left.shape, right.shape + ) + + diff = 0 + for left_arr, right_arr in zip(left, right): + # count up differences + if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan): + diff += 1 + + diff = diff * 100.0 / left.size + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right, index_values=index_values) + + raise AssertionError(err_msg) + + # compare shape and values + if not array_equivalent(left, right, strict_nan=strict_nan): + _raise(left, right, err_msg) + + if check_dtype: + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + assert_attr_equal("dtype", left, right, obj=obj) + + +def assert_extension_array_equal( + left, + right, + check_dtype: bool | Literal["equiv"] = True, + index_values=None, + check_less_precise=no_default, + check_exact=False, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, +) -> None: + """ + Check that left and right ExtensionArrays are equal. + + Parameters + ---------- + left, right : ExtensionArray + The two arrays to compare. + check_dtype : bool, default True + Whether to check if the ExtensionArray dtypes are identical. + index_values : numpy.ndarray, default None + Optional index (shared by both left and right), used in output. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_exact : bool, default False + Whether to compare number exactly. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + + Notes + ----- + Missing values are checked separately from valid values. + A mask of missing values is computed for each and checked to match. + The remaining all-valid values are cast to object dtype and checked. + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Series([1, 2, 3, 4]) + >>> b, c = a.array, a.array + >>> tm.assert_extension_array_equal(b, c) + """ + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" + assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" + if check_dtype: + assert_attr_equal("dtype", left, right, obj="ExtensionArray") + + if ( + isinstance(left, DatetimeLikeArrayMixin) + and isinstance(right, DatetimeLikeArrayMixin) + and type(right) == type(left) + ): + # Avoid slow object-dtype comparisons + # np.asarray for case where we have a np.MaskedArray + assert_numpy_array_equal( + np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values + ) + return + + left_na = np.asarray(left.isna()) + right_na = np.asarray(right.isna()) + assert_numpy_array_equal( + left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values + ) + + left_valid = left[~left_na].to_numpy(dtype=object) + right_valid = right[~right_na].to_numpy(dtype=object) + if check_exact: + assert_numpy_array_equal( + left_valid, right_valid, obj="ExtensionArray", index_values=index_values + ) + else: + _testing.assert_almost_equal( + left_valid, + right_valid, + check_dtype=bool(check_dtype), + rtol=rtol, + atol=atol, + obj="ExtensionArray", + index_values=index_values, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_series_equal( + left, + right, + check_dtype: bool | Literal["equiv"] = True, + check_index_type: bool | Literal["equiv"] = "equiv", + check_series_type=True, + check_less_precise: bool | int | NoDefault = no_default, + check_names=True, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_category_order=True, + check_freq=True, + check_flags=True, + rtol=1.0e-5, + atol=1.0e-8, + obj="Series", + *, + check_index=True, + check_like=False, +) -> None: + """ + Check that left and right Series are equal. + + Parameters + ---------- + left : Series + right : Series + check_dtype : bool, default True + Whether to check the Series dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_series_type : bool, default True + Whether to check the Series class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_names : bool, default True + Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_category_order : bool, default True + Whether to compare category order of internal Categoricals. + + .. versionadded:: 1.0.2 + check_freq : bool, default True + Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. + + .. versionadded:: 1.1.0 + check_flags : bool, default True + Whether to check the `flags` attribute. + + .. versionadded:: 1.2.0 + + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'Series' + Specify object name being compared, internally used to show appropriate + assertion message. + check_index : bool, default True + Whether to check index equivalence. If False, then compare only values. + + .. versionadded:: 1.3.0 + check_like : bool, default False + If True, ignore the order of the index. Must be False if check_index is False. + Note: same labels must be with the same data. + + .. versionadded:: 1.5.0 + + Examples + -------- + >>> from pandas import testing as tm + >>> a = pd.Series([1, 2, 3, 4]) + >>> b = pd.Series([1, 2, 3, 4]) + >>> tm.assert_series_equal(a, b) + """ + __tracebackhide__ = True + + if not check_index and check_like: + raise ValueError("check_like must be False if check_index is False") + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + # instance validation + _check_isinstance(left, right, Series) + + if check_series_type: + assert_class_equal(left, right, obj=obj) + + # length comparison + if len(left) != len(right): + msg1 = f"{len(left)}, {left.index}" + msg2 = f"{len(right)}, {right.index}" + raise_assert_detail(obj, "Series length are different", msg1, msg2) + + if check_flags: + assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + + if check_index: + # GH #38183 + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + if check_like: + left, right = left.reindex_like(right), right + + if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)): + lidx = left.index + ridx = right.index + assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq) + + if check_dtype: + # We want to skip exact dtype checking when `check_categorical` + # is False. We'll still raise if only one is a `Categorical`, + # regardless of `check_categorical` + if ( + isinstance(left.dtype, CategoricalDtype) + and isinstance(right.dtype, CategoricalDtype) + and not check_categorical + ): + pass + else: + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype): + left_values = left._values + right_values = right._values + # Only check exact if dtype is numeric + if isinstance(left_values, ExtensionArray) and isinstance( + right_values, ExtensionArray + ): + assert_extension_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + else: + assert_numpy_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + obj=str(obj), + index_values=np.asarray(left.index), + ) + elif check_datetimelike_compat and ( + needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype) + ): + # we want to check only if we have compat dtypes + # e.g. integer and M|m are NOT compat, but we can simply check + # the values in that case + + # datetimelike may have different objects (e.g. datetime.datetime + # vs Timestamp) but will compare equal + if not Index(left._values).equals(Index(right._values)): + msg = ( + f"[datetimelike_compat=True] {left._values} " + f"is not equal to {right._values}." + ) + raise AssertionError(msg) + elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype): + assert_interval_array_equal(left.array, right.array) + elif isinstance(left.dtype, CategoricalDtype) or isinstance( + right.dtype, CategoricalDtype + ): + _testing.assert_almost_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=bool(check_dtype), + obj=str(obj), + index_values=np.asarray(left.index), + ) + elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype): + assert_extension_array_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + elif is_extension_array_dtype_and_needs_i8_conversion( + left.dtype, right.dtype + ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype): + assert_extension_array_equal( + left._values, + right._values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype): + # DatetimeArray or TimedeltaArray + assert_extension_array_equal( + left._values, + right._values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + else: + _testing.assert_almost_equal( + left._values, + right._values, + rtol=rtol, + atol=atol, + check_dtype=bool(check_dtype), + obj=str(obj), + index_values=np.asarray(left.index), + ) + + # metadata comparison + if check_names: + assert_attr_equal("name", left, right, obj=obj) + + if check_categorical: + if isinstance(left.dtype, CategoricalDtype) or isinstance( + right.dtype, CategoricalDtype + ): + assert_categorical_equal( + left._values, + right._values, + obj=f"{obj} category", + check_category_order=check_category_order, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_frame_equal( + left, + right, + check_dtype: bool | Literal["equiv"] = True, + check_index_type: bool | Literal["equiv"] = "equiv", + check_column_type="equiv", + check_frame_type=True, + check_less_precise=no_default, + check_names=True, + by_blocks=False, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_like=False, + check_freq=True, + check_flags=True, + rtol=1.0e-5, + atol=1.0e-8, + obj="DataFrame", +) -> None: + """ + Check that left and right DataFrame are equal. + + This function is intended to compare two DataFrames and output any + differences. It is mostly intended for use in unit tests. + Additional parameters allow varying the strictness of the + equality checks performed. + + Parameters + ---------- + left : DataFrame + First DataFrame to compare. + right : DataFrame + Second DataFrame to compare. + check_dtype : bool, default True + Whether to check the DataFrame dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_column_type : bool or {'equiv'}, default 'equiv' + Whether to check the columns class, dtype and inferred_type + are identical. Is passed as the ``exact`` argument of + :func:`assert_index_equal`. + check_frame_type : bool, default True + Whether to check the DataFrame class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + + .. deprecated:: 1.1.0 + Use `rtol` and `atol` instead to define relative/absolute + tolerance, respectively. Similar to :func:`math.isclose`. + check_names : bool, default True + Whether to check that the `names` attribute for both the `index` + and `column` attributes of the DataFrame is identical. + by_blocks : bool, default False + Specify how to compare internal data. If False, compare by columns. + If True, compare by blocks. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_like : bool, default False + If True, ignore the order of index & columns. + Note: index labels must match their respective rows + (same as in columns) - same labels must be with the same data. + check_freq : bool, default True + Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. + + .. versionadded:: 1.1.0 + check_flags : bool, default True + Whether to check the `flags` attribute. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + + .. versionadded:: 1.1.0 + obj : str, default 'DataFrame' + Specify object name being compared, internally used to show appropriate + assertion message. + + See Also + -------- + assert_series_equal : Equivalent method for asserting Series equality. + DataFrame.equals : Check DataFrame equality. + + Examples + -------- + This example shows comparing two DataFrames that are equal + but with columns of differing dtypes. + + >>> from pandas.testing import assert_frame_equal + >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + + df1 equals itself. + + >>> assert_frame_equal(df1, df1) + + df1 differs from df2 as column 'b' is of a different type. + + >>> assert_frame_equal(df1, df2) + Traceback (most recent call last): + ... + AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different + + Attribute "dtype" are different + [left]: int64 + [right]: float64 + + Ignore differing dtypes in columns with check_dtype. + + >>> assert_frame_equal(df1, df2, check_dtype=False) + """ + __tracebackhide__ = True + + if check_less_precise is not no_default: + warnings.warn( + "The 'check_less_precise' keyword in testing.assert_*_equal " + "is deprecated and will be removed in a future version. " + "You can stop passing 'check_less_precise' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + rtol = atol = _get_tol_from_less_precise(check_less_precise) + + # instance validation + _check_isinstance(left, right, DataFrame) + + if check_frame_type: + assert isinstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) + + # shape comparison + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}" + ) + + if check_flags: + assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + + # index comparison + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + # column comparison + assert_index_equal( + left.columns, + right.columns, + exact=check_column_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.columns", + ) + + if check_like: + left, right = left.reindex_like(right), right + + # compare by blocks + if by_blocks: + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() + for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): + assert dtype in lblocks + assert dtype in rblocks + assert_frame_equal( + lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj + ) + + # compare by columns + else: + for i, col in enumerate(left.columns): + # We have already checked that columns match, so we can do + # fast location-based lookups + lcol = left._ixs(i, axis=1) + rcol = right._ixs(i, axis=1) + + # GH #38183 + # use check_index=False, because we do not want to run + # assert_index_equal for each column, + # as we already checked it for the whole dataframe before. + assert_series_equal( + lcol, + rcol, + check_dtype=check_dtype, + check_index_type=check_index_type, + check_exact=check_exact, + check_names=check_names, + check_datetimelike_compat=check_datetimelike_compat, + check_categorical=check_categorical, + check_freq=check_freq, + obj=f'{obj}.iloc[:, {i}] (column name="{col}")', + rtol=rtol, + atol=atol, + check_index=False, + check_flags=False, + ) + + +def assert_equal(left, right, **kwargs) -> None: + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + + Parameters + ---------- + left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = True + + if isinstance(left, Index): + assert_index_equal(left, right, **kwargs) + if isinstance(left, (DatetimeIndex, TimedeltaIndex)): + assert left.freq == right.freq, (left.freq, right.freq) + elif isinstance(left, Series): + assert_series_equal(left, right, **kwargs) + elif isinstance(left, DataFrame): + assert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) + elif isinstance(left, TimedeltaArray): + assert_timedelta_array_equal(left, right, **kwargs) + elif isinstance(left, ExtensionArray): + assert_extension_array_equal(left, right, **kwargs) + elif isinstance(left, np.ndarray): + assert_numpy_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + assert kwargs == {} + assert_almost_equal(left, right) + + +def assert_sp_array_equal(left, right) -> None: + """ + Check that the left and right SparseArray are equal. + + Parameters + ---------- + left : SparseArray + right : SparseArray + """ + _check_isinstance(left, right, pd.arrays.SparseArray) + + assert_numpy_array_equal(left.sp_values, right.sp_values) + + # SparseIndex comparison + assert isinstance(left.sp_index, SparseIndex) + assert isinstance(right.sp_index, SparseIndex) + + left_index = left.sp_index + right_index = right.sp_index + + if not left_index.equals(right_index): + raise_assert_detail( + "SparseArray.index", "index are not equal", left_index, right_index + ) + else: + # Just ensure a + pass + + assert_attr_equal("fill_value", left, right) + assert_attr_equal("dtype", left, right) + assert_numpy_array_equal(left.to_dense(), right.to_dense()) + + +def assert_contains_all(iterable, dic) -> None: + for k in iterable: + assert k in dic, f"Did not contain item: {repr(k)}" + + +def assert_copy(iter1, iter2, **eql_kwargs) -> None: + """ + iter1, iter2: iterables that produce elements + comparable with assert_almost_equal + + Checks that the elements are equal, but not + the same object. (Does not check that items + in sequences are also not the same object) + """ + for elem1, elem2 in zip(iter1, iter2): + assert_almost_equal(elem1, elem2, **eql_kwargs) + msg = ( + f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be " + "different objects, but they were the same object." + ) + assert elem1 is not elem2, msg + + +def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool: + """ + Checks that we have the combination of an ExtensionArraydtype and + a dtype that should be converted to int64 + + Returns + ------- + bool + + Related to issue #37609 + """ + return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype) + + +def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None: + """ + Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable, + ser[l_slc]. + """ + expected = ser.iloc[i_slc] + + assert_series_equal(ser.loc[l_slc], expected) + + if not ser.index.is_integer(): + # For integer indices, .loc and plain getitem are position-based. + assert_series_equal(ser[l_slc], expected) + + +def assert_metadata_equivalent(left, right) -> None: + """ + Check that ._metadata attributes are equivalent. + """ + for attr in left._metadata: + val = getattr(left, attr, None) + if right is None: + assert val is None + else: + assert val == getattr(right, attr, None) diff --git a/pandas/_testing/compat.py b/pandas/_testing/compat.py new file mode 100644 index 00000000..e2ac8f77 --- /dev/null +++ b/pandas/_testing/compat.py @@ -0,0 +1,23 @@ +""" +Helpers for sharing tests between DataFrame/Series +""" + +from pandas import DataFrame + + +def get_dtype(obj): + if isinstance(obj, DataFrame): + # Note: we are assuming only one column + return obj.dtypes.iat[0] + else: + return obj.dtype + + +def get_obj(df: DataFrame, klass): + """ + For sharing tests using frame_or_series, either return the DataFrame + unchanged or return it's first column as a Series. + """ + if klass is DataFrame: + return df + return df._ixs(0, axis=1) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py new file mode 100644 index 00000000..e64adb06 --- /dev/null +++ b/pandas/_testing/contexts.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +from contextlib import contextmanager +import os +from pathlib import Path +from shutil import rmtree +import tempfile +from typing import ( + IO, + Any, + Iterator, +) +import uuid + +import numpy as np + +from pandas import set_option + +from pandas.io.common import get_handle + + +@contextmanager +def decompress_file(path, compression) -> Iterator[IO[bytes]]: + """ + Open a compressed file and return a file object. + + Parameters + ---------- + path : str + The path where the file is read from. + + compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd', None} + Name of the decompression to use + + Returns + ------- + file object + """ + with get_handle(path, "rb", compression=compression, is_text=False) as handle: + yield handle.handle + + +@contextmanager +def set_timezone(tz: str) -> Iterator[None]: + """ + Context manager for temporarily setting a timezone. + + Parameters + ---------- + tz : str + A string representing a valid timezone. + + Examples + -------- + >>> from datetime import datetime + >>> from dateutil.tz import tzlocal + >>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP + 'IST' + + >>> with set_timezone('US/Eastern'): + ... tzlocal().tzname(datetime(2021, 1, 1)) + ... + 'EST' + """ + import os + import time + + def setTZ(tz): + if tz is None: + try: + del os.environ["TZ"] + except KeyError: + pass + else: + os.environ["TZ"] = tz + time.tzset() + + orig_tz = os.environ.get("TZ") + setTZ(tz) + try: + yield + finally: + setTZ(orig_tz) + + +@contextmanager +def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any): + """ + Gets a temporary path and agrees to remove on close. + + This implementation does not use tempfile.mkstemp to avoid having a file handle. + If the code using the returned path wants to delete the file itself, windows + requires that no program has a file handle to it. + + Parameters + ---------- + filename : str (optional) + suffix of the created file. + return_filelike : bool (default False) + if True, returns a file-like which is *always* cleaned. Necessary for + savefig and other functions which want to append extensions. + **kwargs + Additional keywords are passed to open(). + + """ + folder = Path(tempfile.gettempdir()) + + if filename is None: + filename = "" + filename = str(uuid.uuid4()) + filename + path = folder / filename + + path.touch() + + handle_or_str: str | IO = str(path) + if return_filelike: + kwargs.setdefault("mode", "w+b") + handle_or_str = open(path, **kwargs) + + try: + yield handle_or_str + finally: + if not isinstance(handle_or_str, str): + handle_or_str.close() + if path.is_file(): + path.unlink() + + +@contextmanager +def ensure_clean_dir() -> Iterator[str]: + """ + Get a temporary directory path and agrees to remove on close. + + Yields + ------ + Temporary directory path + """ + directory_name = tempfile.mkdtemp(suffix="") + try: + yield directory_name + finally: + try: + rmtree(directory_name) + except OSError: + pass + + +@contextmanager +def ensure_safe_environment_variables() -> Iterator[None]: + """ + Get a context manager to safely set environment variables + + All changes will be undone on close, hence environment variables set + within this contextmanager will neither persist nor change global state. + """ + saved_environ = dict(os.environ) + try: + yield + finally: + os.environ.clear() + os.environ.update(saved_environ) + + +@contextmanager +def with_csv_dialect(name, **kwargs) -> Iterator[None]: + """ + Context manager to temporarily register a CSV dialect for parsing CSV. + + Parameters + ---------- + name : str + The name of the dialect. + kwargs : mapping + The parameters for the dialect. + + Raises + ------ + ValueError : the name of the dialect conflicts with a builtin one. + + See Also + -------- + csv : Python's CSV library. + """ + import csv + + _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"} + + if name in _BUILTIN_DIALECTS: + raise ValueError("Cannot override builtin dialect.") + + csv.register_dialect(name, **kwargs) + try: + yield + finally: + csv.unregister_dialect(name) + + +@contextmanager +def use_numexpr(use, min_elements=None) -> Iterator[None]: + from pandas.core.computation import expressions as expr + + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + + olduse = expr.USE_NUMEXPR + oldmin = expr._MIN_ELEMENTS + set_option("compute.use_numexpr", use) + expr._MIN_ELEMENTS = min_elements + try: + yield + finally: + expr._MIN_ELEMENTS = oldmin + set_option("compute.use_numexpr", olduse) + + +class RNGContext: + """ + Context manager to set the numpy random number generator speed. Returns + to the original value upon exiting the context manager. + + Parameters + ---------- + seed : int + Seed for numpy.random.seed + + Examples + -------- + with RNGContext(42): + np.random.randn() + """ + + def __init__(self, seed) -> None: + self.seed = seed + + def __enter__(self) -> None: + + self.start_state = np.random.get_state() + np.random.seed(self.seed) + + def __exit__(self, exc_type, exc_value, traceback) -> None: + + np.random.set_state(self.start_state) diff --git a/pandas/_typing.py b/pandas/_typing.py new file mode 100644 index 00000000..03fb5fcb --- /dev/null +++ b/pandas/_typing.py @@ -0,0 +1,340 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, + tzinfo, +) +from os import PathLike +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Hashable, + Iterator, + List, + Literal, + Mapping, + Optional, + Protocol, + Sequence, + Tuple, + Type as type_t, + TypeVar, + Union, +) + +import numpy as np + +# To prevent import cycles place any internal imports in the branch below +# and use a string literal forward reference to it in subsequent types +# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles +if TYPE_CHECKING: + import numpy.typing as npt + + from pandas._libs import ( + NaTType, + Period, + Timedelta, + Timestamp, + ) + from pandas._libs.tslibs import BaseOffset + + from pandas.core.dtypes.dtypes import ExtensionDtype + + from pandas import Interval + from pandas.core.arrays.base import ExtensionArray + from pandas.core.frame import DataFrame + from pandas.core.generic import NDFrame + from pandas.core.groupby.generic import ( + DataFrameGroupBy, + GroupBy, + SeriesGroupBy, + ) + from pandas.core.indexes.base import Index + from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, + SingleBlockManager, + ) + from pandas.core.resample import Resampler + from pandas.core.series import Series + from pandas.core.window.rolling import BaseWindow + + from pandas.io.formats.format import EngFormatter + + ScalarLike_co = Union[ + int, + float, + complex, + str, + bytes, + np.generic, + ] + + # numpy compatible types + NumpyValueArrayLike = Union[ScalarLike_co, npt.ArrayLike] + # Name "npt._ArrayLikeInt_co" is not defined [name-defined] + NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined] + +else: + npt: Any = None + +HashableT = TypeVar("HashableT", bound=Hashable) + +# array-like + +ArrayLike = Union["ExtensionArray", np.ndarray] +AnyArrayLike = Union[ArrayLike, "Index", "Series"] + +# scalars + +PythonScalar = Union[str, float, bool] +DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] +PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] +Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime] +IntStrT = TypeVar("IntStrT", int, str) + + +# timestamp and timedelta convertible types + +TimestampConvertibleTypes = Union[ + "Timestamp", datetime, np.datetime64, np.int64, float, str +] +TimedeltaConvertibleTypes = Union[ + "Timedelta", timedelta, np.timedelta64, np.int64, float, str +] +Timezone = Union[str, tzinfo] + +# NDFrameT is stricter and ensures that the same subclass of NDFrame always is +# used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a +# Series is passed into a function, a Series is always returned and if a DataFrame is +# passed in, a DataFrame is always returned. +NDFrameT = TypeVar("NDFrameT", bound="NDFrame") + +NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index") + +Axis = Union[str, int] +IndexLabel = Union[Hashable, Sequence[Hashable]] +Level = Hashable +Shape = Tuple[int, ...] +Suffixes = Tuple[Optional[str], Optional[str]] +Ordered = Optional[bool] +JSONSerializable = Optional[Union[PythonScalar, List, Dict]] +Frequency = Union[str, "BaseOffset"] +Axes = Union[AnyArrayLike, List, range] + +RandomState = Union[ + int, + ArrayLike, + np.random.Generator, + np.random.BitGenerator, + np.random.RandomState, +] + +# dtypes +NpDtype = Union[str, np.dtype, type_t[Union[str, complex, bool, object]]] +Dtype = Union["ExtensionDtype", NpDtype] +AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"] +# DtypeArg specifies all allowable dtypes in a functions its dtype argument +DtypeArg = Union[Dtype, Dict[Hashable, Dtype]] +DtypeObj = Union[np.dtype, "ExtensionDtype"] + +# converters +ConvertersArg = Dict[Hashable, Callable[[Dtype], Dtype]] + +# parse_dates +ParseDatesArg = Union[ + bool, List[Hashable], List[List[Hashable]], Dict[Hashable, List[Hashable]] +] + +# For functions like rename that convert one label to another +Renamer = Union[Mapping[Any, Hashable], Callable[[Any], Hashable]] + +# to maintain type information across generic functions and parametrization +T = TypeVar("T") + +# used in decorators to preserve the signature of the function it decorates +# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators +FuncType = Callable[..., Any] +F = TypeVar("F", bound=FuncType) + +# types of vectorized key functions for DataFrame::sort_values and +# DataFrame::sort_index, among others +ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]] +IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]] + +# types of `func` kwarg for DataFrame.aggregate and Series.aggregate +AggFuncTypeBase = Union[Callable, str] +AggFuncTypeDict = Dict[Hashable, Union[AggFuncTypeBase, List[AggFuncTypeBase]]] +AggFuncType = Union[ + AggFuncTypeBase, + List[AggFuncTypeBase], + AggFuncTypeDict, +] +AggObjType = Union[ + "Series", + "DataFrame", + "GroupBy", + "SeriesGroupBy", + "DataFrameGroupBy", + "BaseWindow", + "Resampler", +] + +PythonFuncType = Callable[[Any], Any] + +# filenames and file-like-objects +AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True) +AnyStr_con = TypeVar("AnyStr_con", str, bytes, contravariant=True) + + +class BaseBuffer(Protocol): + @property + def mode(self) -> str: + # for _get_filepath_or_buffer + ... + + def fileno(self) -> int: + # for _MMapWrapper + ... + + def seek(self, __offset: int, __whence: int = ...) -> int: + # with one argument: gzip.GzipFile, bz2.BZ2File + # with two arguments: zip.ZipFile, read_sas + ... + + def seekable(self) -> bool: + # for bz2.BZ2File + ... + + def tell(self) -> int: + # for zip.ZipFile, read_stata, to_stata + ... + + +class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): + def read(self, __n: int | None = ...) -> AnyStr_cov: + # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File + ... + + +class WriteBuffer(BaseBuffer, Protocol[AnyStr_con]): + def write(self, __b: AnyStr_con) -> Any: + # for gzip.GzipFile, bz2.BZ2File + ... + + def flush(self) -> Any: + # for gzip.GzipFile, bz2.BZ2File + ... + + +class ReadPickleBuffer(ReadBuffer[bytes], Protocol): + def readline(self) -> AnyStr_cov: + ... + + +class WriteExcelBuffer(WriteBuffer[bytes], Protocol): + def truncate(self, size: int | None = ...) -> int: + ... + + +class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol): + def __iter__(self) -> Iterator[AnyStr_cov]: + # for engine=python + ... + + def readline(self) -> AnyStr_cov: + # for engine=python + ... + + @property + def closed(self) -> bool: + # for enine=pyarrow + ... + + +FilePath = Union[str, "PathLike[str]"] + +# for arbitrary kwargs passed during reading/writing files +StorageOptions = Optional[Dict[str, Any]] + + +# compression keywords and compression +CompressionDict = Dict[str, Any] +CompressionOptions = Optional[ + Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict] +] + +# types in DataFrameFormatter +FormattersType = Union[ + List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] +] +ColspaceType = Mapping[Hashable, Union[str, int]] +FloatFormatType = Union[str, Callable, "EngFormatter"] +ColspaceArgType = Union[ + str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]] +] + +# Arguments for fillna() +FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] + +# internals +Manager = Union[ + "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager" +] +SingleManager = Union["SingleArrayManager", "SingleBlockManager"] +Manager2D = Union["ArrayManager", "BlockManager"] + +# indexing +# PositionalIndexer -> valid 1D positional indexer, e.g. can pass +# to ndarray.__getitem__ +# ScalarIndexer is for a single value as the index +# SequenceIndexer is for list like or slices (but not tuples) +# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays +# These are used in various __getitem__ overloads +# TODO(typing#684): add Ellipsis, see +# https://github.com/python/typing/issues/684#issuecomment-548203158 +# https://bugs.python.org/issue41810 +# Using List[int] here rather than Sequence[int] to disallow tuples. +ScalarIndexer = Union[int, np.integer] +SequenceIndexer = Union[slice, List[int], np.ndarray] +PositionalIndexer = Union[ScalarIndexer, SequenceIndexer] +PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer] +PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple] +if TYPE_CHECKING: + TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]] +else: + TakeIndexer = Any + +# Shared by functions such as drop and astype +IgnoreRaise = Literal["ignore", "raise"] + +# Windowing rank methods +WindowingRankType = Literal["average", "min", "max"] + +# read_csv engines +CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] + +# read_xml parsers +XMLParsers = Literal["lxml", "etree"] + +# Interval closed type +IntervalLeftRight = Literal["left", "right"] +IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]] + +# datetime and NaTType +DatetimeNaTType = Union[datetime, "NaTType"] +DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]] + +# sort_index +SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] +NaPosition = Literal["first", "last"] + +# quantile interpolation +QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"] + +# plotting +PlottingOrientation = Literal["horizontal", "vertical"] diff --git a/pandas/_version.py b/pandas/_version.py new file mode 100644 index 00000000..0d5430bd --- /dev/null +++ b/pandas/_version.py @@ -0,0 +1,560 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = " (tag: v1.5.3)" + git_full = "2e218d10984e9919f0296931d92ea851c6a6faf5" + git_date = "2023-01-19 10:26:43 +0700" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "pandas-" + cfg.versionfile_source = "pandas/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except OSError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print(f"unable to find command, tried {commands}") + return None, None + stdout = p.communicate()[0].strip().decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs) + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "main". + tags = {r for r in refs if re.search(r"\d", r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post0.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post0.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py new file mode 100644 index 00000000..9d4f7212 --- /dev/null +++ b/pandas/api/__init__.py @@ -0,0 +1,14 @@ +""" public toolkit API """ +from pandas.api import ( + extensions, + indexers, + interchange, + types, +) + +__all__ = [ + "interchange", + "extensions", + "indexers", + "types", +] diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py new file mode 100644 index 00000000..ea5f1ba9 --- /dev/null +++ b/pandas/api/extensions/__init__.py @@ -0,0 +1,33 @@ +""" +Public API for extending pandas objects. +""" + +from pandas._libs.lib import no_default + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) + +from pandas.core.accessor import ( + register_dataframe_accessor, + register_index_accessor, + register_series_accessor, +) +from pandas.core.algorithms import take +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) + +__all__ = [ + "no_default", + "ExtensionDtype", + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + "ExtensionArray", + "ExtensionScalarOpsMixin", +] diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py new file mode 100644 index 00000000..78357f11 --- /dev/null +++ b/pandas/api/indexers/__init__.py @@ -0,0 +1,17 @@ +""" +Public API for Rolling Window Indexers. +""" + +from pandas.core.indexers import check_array_indexer +from pandas.core.indexers.objects import ( + BaseIndexer, + FixedForwardWindowIndexer, + VariableOffsetWindowIndexer, +) + +__all__ = [ + "check_array_indexer", + "BaseIndexer", + "FixedForwardWindowIndexer", + "VariableOffsetWindowIndexer", +] diff --git a/pandas/api/interchange/__init__.py b/pandas/api/interchange/__init__.py new file mode 100644 index 00000000..2f3a73bc --- /dev/null +++ b/pandas/api/interchange/__init__.py @@ -0,0 +1,8 @@ +""" +Public API for DataFrame interchange protocol. +""" + +from pandas.core.interchange.dataframe_protocol import DataFrame +from pandas.core.interchange.from_dataframe import from_dataframe + +__all__ = ["from_dataframe", "DataFrame"] diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py new file mode 100644 index 00000000..fb1abdd5 --- /dev/null +++ b/pandas/api/types/__init__.py @@ -0,0 +1,23 @@ +""" +Public toolkit API. +""" + +from pandas._libs.lib import infer_dtype + +from pandas.core.dtypes.api import * # noqa: F401, F403 +from pandas.core.dtypes.concat import union_categoricals +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +__all__ = [ + "infer_dtype", + "union_categoricals", + "CategoricalDtype", + "DatetimeTZDtype", + "IntervalDtype", + "PeriodDtype", +] diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py new file mode 100644 index 00000000..3a8e80a6 --- /dev/null +++ b/pandas/arrays/__init__.py @@ -0,0 +1,36 @@ +""" +All of pandas' ExtensionArrays. + +See :ref:`extending.extension-types` for more. +""" +from pandas.core.arrays import ( + ArrowExtensionArray, + ArrowStringArray, + BooleanArray, + Categorical, + DatetimeArray, + FloatingArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + StringArray, + TimedeltaArray, +) + +__all__ = [ + "ArrowExtensionArray", + "ArrowStringArray", + "BooleanArray", + "Categorical", + "DatetimeArray", + "FloatingArray", + "IntegerArray", + "IntervalArray", + "PandasArray", + "PeriodArray", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py new file mode 100644 index 00000000..80f66c94 --- /dev/null +++ b/pandas/compat/__init__.py @@ -0,0 +1,166 @@ +""" +compat +====== + +Cross-compatible functions for different versions of Python. + +Other items: +* platform checker +""" +from __future__ import annotations + +import os +import platform +import sys +from typing import TYPE_CHECKING + +from pandas._typing import F +from pandas.compat.numpy import ( + is_numpy_dev, + np_version_under1p21, +) +from pandas.compat.pyarrow import ( + pa_version_under1p01, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, + pa_version_under5p0, + pa_version_under6p0, + pa_version_under7p0, + pa_version_under8p0, + pa_version_under9p0, +) + +if TYPE_CHECKING: + import lzma + +PY39 = sys.version_info >= (3, 9) +PY310 = sys.version_info >= (3, 10) +PY311 = sys.version_info >= (3, 11) +PYPY = platform.python_implementation() == "PyPy" +IS64 = sys.maxsize > 2**32 + + +def set_function_name(f: F, name: str, cls) -> F: + """ + Bind the name/qualname attributes of the function. + """ + f.__name__ = name + f.__qualname__ = f"{cls.__name__}.{name}" + f.__module__ = cls.__module__ + return f + + +def is_platform_little_endian() -> bool: + """ + Checking if the running platform is little endian. + + Returns + ------- + bool + True if the running platform is little endian. + """ + return sys.byteorder == "little" + + +def is_platform_windows() -> bool: + """ + Checking if the running platform is windows. + + Returns + ------- + bool + True if the running platform is windows. + """ + return sys.platform in ["win32", "cygwin"] + + +def is_platform_linux() -> bool: + """ + Checking if the running platform is linux. + + Returns + ------- + bool + True if the running platform is linux. + """ + return sys.platform == "linux" + + +def is_platform_mac() -> bool: + """ + Checking if the running platform is mac. + + Returns + ------- + bool + True if the running platform is mac. + """ + return sys.platform == "darwin" + + +def is_platform_arm() -> bool: + """ + Checking if the running platform use ARM architecture. + + Returns + ------- + bool + True if the running platform uses ARM architecture. + """ + return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith( + "armv" + ) + + +def is_ci_environment() -> bool: + """ + Checking if running in a continuous integration environment by checking + the PANDAS_CI environment variable. + + Returns + ------- + bool + True if the running in a continuous integration environment. + """ + return os.environ.get("PANDAS_CI", "0") == "1" + + +def get_lzma_file() -> type[lzma.LZMAFile]: + """ + Importing the `LZMAFile` class from the `lzma` module. + + Returns + ------- + class + The `LZMAFile` class from the `lzma` module. + + Raises + ------ + RuntimeError + If the `lzma` module was not imported correctly, or didn't exist. + """ + try: + import lzma + except ImportError: + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper dependencies, " + "might be required to solve this issue." + ) + return lzma.LZMAFile + + +__all__ = [ + "is_numpy_dev", + "np_version_under1p21", + "pa_version_under1p01", + "pa_version_under2p0", + "pa_version_under3p0", + "pa_version_under4p0", + "pa_version_under5p0", + "pa_version_under6p0", + "pa_version_under7p0", + "pa_version_under8p0", + "pa_version_under9p0", +] diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py new file mode 100644 index 00000000..3caa9275 --- /dev/null +++ b/pandas/compat/_optional.py @@ -0,0 +1,173 @@ +from __future__ import annotations + +import importlib +import sys +import types +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas.util.version import Version + +# Update install.rst when updating versions! + +VERSIONS = { + "bs4": "4.9.3", + "blosc": "1.21.0", + "bottleneck": "1.3.2", + "brotli": "0.7.0", + "fastparquet": "0.4.0", + "fsspec": "2021.07.0", + "html5lib": "1.1", + "hypothesis": "6.13.0", + "gcsfs": "2021.07.0", + "jinja2": "3.0.0", + "lxml.etree": "4.6.3", + "matplotlib": "3.3.2", + "numba": "0.53.1", + "numexpr": "2.7.3", + "odfpy": "1.4.1", + "openpyxl": "3.0.7", + "pandas_gbq": "0.15.0", + "psycopg2": "2.8.6", # (dt dec pq3 ext lo64) + "pymysql": "1.0.2", + "pyarrow": "1.0.1", + "pyreadstat": "1.1.2", + "pytest": "6.0", + "pyxlsb": "1.0.8", + "s3fs": "2021.08.0", + "scipy": "1.7.1", + "snappy": "0.6.0", + "sqlalchemy": "1.4.16", + "tables": "3.6.1", + "tabulate": "0.8.9", + "xarray": "0.19.0", + "xlrd": "2.0.1", + "xlwt": "1.3.0", + "xlsxwriter": "1.4.3", + "zstandard": "0.15.2", + "tzdata": "2022.1", +} + +# A mapping from import name to package name (on PyPI) for packages where +# these two names are different. + +INSTALL_MAPPING = { + "bs4": "beautifulsoup4", + "bottleneck": "Bottleneck", + "brotli": "brotlipy", + "jinja2": "Jinja2", + "lxml.etree": "lxml", + "odf": "odfpy", + "pandas_gbq": "pandas-gbq", + "snappy": "python-snappy", + "sqlalchemy": "SQLAlchemy", + "tables": "pytables", +} + + +def get_version(module: types.ModuleType) -> str: + version = getattr(module, "__version__", None) + if version is None: + # xlrd uses a capitalized attribute name + version = getattr(module, "__VERSION__", None) + + if version is None: + if module.__name__ == "brotli": + # brotli doesn't contain attributes to confirm it's version + return "" + if module.__name__ == "snappy": + # snappy doesn't contain attributes to confirm it's version + # See https://github.com/andrix/python-snappy/pull/119 + return "" + raise ImportError(f"Can't determine version for {module.__name__}") + if module.__name__ == "psycopg2": + # psycopg2 appends " (dt dec pq3 ext lo64)" to it's version + version = version.split()[0] + return version + + +def import_optional_dependency( + name: str, + extra: str = "", + errors: str = "raise", + min_version: str | None = None, +): + """ + Import an optional dependency. + + By default, if a dependency is missing an ImportError with a nice + message will be raised. If a dependency is present, but too old, + we raise. + + Parameters + ---------- + name : str + The module name. + extra : str + Additional text to include in the ImportError message. + errors : str {'raise', 'warn', 'ignore'} + What to do when a dependency is not found or its version is too old. + + * raise : Raise an ImportError + * warn : Only applicable when a module's version is to old. + Warns that the version is too old and returns None + * ignore: If the module is not installed, return None, otherwise, + return the module, even if the version is too old. + It's expected that users validate the version locally when + using ``errors="ignore"`` (see. ``io/html.py``) + min_version : str, default None + Specify a minimum version that is different from the global pandas + minimum version required. + Returns + ------- + maybe_module : Optional[ModuleType] + The imported module, when found and the version is correct. + None is returned when the package is not found and `errors` + is False, or when the package's version is too old and `errors` + is ``'warn'``. + """ + + assert errors in {"warn", "raise", "ignore"} + + package_name = INSTALL_MAPPING.get(name) + install_name = package_name if package_name is not None else name + + msg = ( + f"Missing optional dependency '{install_name}'. {extra} " + f"Use pip or conda to install {install_name}." + ) + try: + module = importlib.import_module(name) + except ImportError: + if errors == "raise": + raise ImportError(msg) + else: + return None + + # Handle submodules: if we have submodule, grab parent module from sys.modules + parent = name.split(".")[0] + if parent != name: + install_name = parent + module_to_get = sys.modules[install_name] + else: + module_to_get = module + minimum_version = min_version if min_version is not None else VERSIONS.get(parent) + if minimum_version: + version = get_version(module_to_get) + if version and Version(version) < Version(minimum_version): + msg = ( + f"Pandas requires version '{minimum_version}' or newer of '{parent}' " + f"(version '{version}' currently installed)." + ) + if errors == "warn": + warnings.warn( + msg, + UserWarning, + stacklevel=find_stack_level(), + ) + return None + elif errors == "raise": + raise ImportError(msg) + + return module diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py new file mode 100644 index 00000000..5bec8e5f --- /dev/null +++ b/pandas/compat/chainmap.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import ( + ChainMap, + TypeVar, +) + +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + +class DeepChainMap(ChainMap[_KT, _VT]): + """ + Variant of ChainMap that allows direct updates to inner scopes. + + Only works when all passed mapping are mutable. + """ + + def __setitem__(self, key: _KT, value: _VT) -> None: + for mapping in self.maps: + if key in mapping: + mapping[key] = value + return + self.maps[0][key] = value + + def __delitem__(self, key: _KT) -> None: + """ + Raises + ------ + KeyError + If `key` doesn't exist. + """ + for mapping in self.maps: + if key in mapping: + del mapping[key] + return + raise KeyError(key) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py new file mode 100644 index 00000000..6f31358d --- /dev/null +++ b/pandas/compat/numpy/__init__.py @@ -0,0 +1,34 @@ +""" support numpy compatibility across versions """ +import numpy as np + +from pandas.util.version import Version + +# numpy versioning +_np_version = np.__version__ +_nlv = Version(_np_version) +np_version_under1p21 = _nlv < Version("1.21") +np_version_under1p22 = _nlv < Version("1.22") +np_version_gte1p22 = _nlv >= Version("1.22") +np_version_gte1p24 = _nlv >= Version("1.24") +is_numpy_dev = _nlv.dev is not None +_min_numpy_ver = "1.20.3" + +if is_numpy_dev or not np_version_under1p22: + np_percentile_argname = "method" +else: + np_percentile_argname = "interpolation" + + +if _nlv < Version(_min_numpy_ver): + raise ImportError( + f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n" + f"your numpy version is {_np_version}.\n" + f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version" + ) + + +__all__ = [ + "np", + "_np_version", + "is_numpy_dev", +] diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py new file mode 100644 index 00000000..140d4178 --- /dev/null +++ b/pandas/compat/numpy/function.py @@ -0,0 +1,433 @@ +""" +For compatibility with numpy libraries, pandas functions or methods have to +accept '*args' and '**kwargs' parameters to accommodate numpy arguments that +are not actually used or respected in the pandas implementation. + +To ensure that users do not abuse these parameters, validation is performed in +'validators.py' to make sure that any extra parameters passed correspond ONLY +to those in the numpy signature. Part of that validation includes whether or +not the user attempted to pass in non-default values for these extraneous +parameters. As we want to discourage users from relying on these parameters +when calling the pandas implementation, we want them only to pass in the +default values for these parameters. + +This module provides a set of commonly used default arguments for functions and +methods that are spread throughout the codebase. This module will make it +easier to adjust to future upstream changes in the analogous numpy signatures. +""" +from __future__ import annotations + +from typing import ( + Any, + TypeVar, + overload, +) + +from numpy import ndarray + +from pandas._libs.lib import ( + is_bool, + is_integer, +) +from pandas._typing import Axis +from pandas.errors import UnsupportedFunctionCall +from pandas.util._validators import ( + validate_args, + validate_args_and_kwargs, + validate_kwargs, +) + +AxisNoneT = TypeVar("AxisNoneT", Axis, None) + + +class CompatValidator: + def __init__( + self, + defaults, + fname=None, + method: str | None = None, + max_fname_arg_count=None, + ) -> None: + self.fname = fname + self.method = method + self.defaults = defaults + self.max_fname_arg_count = max_fname_arg_count + + def __call__( + self, + args, + kwargs, + fname=None, + max_fname_arg_count=None, + method: str | None = None, + ) -> None: + if args or kwargs: + fname = self.fname if fname is None else fname + max_fname_arg_count = ( + self.max_fname_arg_count + if max_fname_arg_count is None + else max_fname_arg_count + ) + method = self.method if method is None else method + + if method == "args": + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == "kwargs": + validate_kwargs(fname, kwargs, self.defaults) + elif method == "both": + validate_args_and_kwargs( + fname, args, kwargs, max_fname_arg_count, self.defaults + ) + else: + raise ValueError(f"invalid validation method '{method}'") + + +ARGMINMAX_DEFAULTS = {"out": None} +validate_argmin = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 +) +validate_argmax = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1 +) + + +def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]: + if isinstance(skipna, ndarray) or skipna is None: + args = (skipna,) + args + skipna = True + + return skipna, args + + +def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: + """ + If 'Series.argmin' is called via the 'numpy' library, the third parameter + in its signature is 'out', which takes either an ndarray or 'None', so + check if the 'skipna' parameter is either an instance of ndarray or is + None, since 'skipna' itself should be a boolean + """ + skipna, args = process_skipna(skipna, args) + validate_argmin(args, kwargs) + return skipna + + +def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: + """ + If 'Series.argmax' is called via the 'numpy' library, the third parameter + in its signature is 'out', which takes either an ndarray or 'None', so + check if the 'skipna' parameter is either an instance of ndarray or is + None, since 'skipna' itself should be a boolean + """ + skipna, args = process_skipna(skipna, args) + validate_argmax(args, kwargs) + return skipna + + +ARGSORT_DEFAULTS: dict[str, int | str | None] = {} +ARGSORT_DEFAULTS["axis"] = -1 +ARGSORT_DEFAULTS["kind"] = "quicksort" +ARGSORT_DEFAULTS["order"] = None +ARGSORT_DEFAULTS["kind"] = None + + +validate_argsort = CompatValidator( + ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" +) + +# two different signatures of argsort, this second validation for when the +# `kind` param is supported +ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {} +ARGSORT_DEFAULTS_KIND["axis"] = -1 +ARGSORT_DEFAULTS_KIND["order"] = None +validate_argsort_kind = CompatValidator( + ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both" +) + + +def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool: + """ + If 'Categorical.argsort' is called via the 'numpy' library, the first + parameter in its signature is 'axis', which takes either an integer or + 'None', so check if the 'ascending' parameter has either integer type or is + None, since 'ascending' itself should be a boolean + """ + if is_integer(ascending) or ascending is None: + args = (ascending,) + args + ascending = True + + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) + # error: Incompatible return value type (got "int", expected "bool") + return ascending # type: ignore[return-value] + + +CLIP_DEFAULTS: dict[str, Any] = {"out": None} +validate_clip = CompatValidator( + CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 +) + + +@overload +def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: + ... + + +@overload +def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: + ... + + +def validate_clip_with_axis( + axis: ndarray | AxisNoneT, args, kwargs +) -> AxisNoneT | None: + """ + If 'NDFrame.clip' is called via the numpy library, the third parameter in + its signature is 'out', which can takes an ndarray, so check if the 'axis' + parameter is an instance of ndarray, since 'axis' itself should either be + an integer or None + """ + if isinstance(axis, ndarray): + args = (axis,) + args + # error: Incompatible types in assignment (expression has type "None", + # variable has type "Union[ndarray[Any, Any], str, int]") + axis = None # type: ignore[assignment] + + validate_clip(args, kwargs) + # error: Incompatible return value type (got "Union[ndarray[Any, Any], + # str, int]", expected "Union[str, int, None]") + return axis # type: ignore[return-value] + + +CUM_FUNC_DEFAULTS: dict[str, Any] = {} +CUM_FUNC_DEFAULTS["dtype"] = None +CUM_FUNC_DEFAULTS["out"] = None +validate_cum_func = CompatValidator( + CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1 +) +validate_cumsum = CompatValidator( + CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1 +) + + +def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool: + """ + If this function is called via the 'numpy' library, the third parameter in + its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so + check if the 'skipna' parameter is a boolean or not + """ + if not is_bool(skipna): + args = (skipna,) + args + skipna = True + + validate_cum_func(args, kwargs, fname=name) + return skipna + + +ALLANY_DEFAULTS: dict[str, bool | None] = {} +ALLANY_DEFAULTS["dtype"] = None +ALLANY_DEFAULTS["out"] = None +ALLANY_DEFAULTS["keepdims"] = False +ALLANY_DEFAULTS["axis"] = None +validate_all = CompatValidator( + ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1 +) +validate_any = CompatValidator( + ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 +) + +LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False} +validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") + +MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False} +validate_min = CompatValidator( + MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 +) +validate_max = CompatValidator( + MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 +) + +RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"} +validate_reshape = CompatValidator( + RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 +) + +REPEAT_DEFAULTS: dict[str, Any] = {"axis": None} +validate_repeat = CompatValidator( + REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 +) + +ROUND_DEFAULTS: dict[str, Any] = {"out": None} +validate_round = CompatValidator( + ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 +) + +SORT_DEFAULTS: dict[str, int | str | None] = {} +SORT_DEFAULTS["axis"] = -1 +SORT_DEFAULTS["kind"] = "quicksort" +SORT_DEFAULTS["order"] = None +validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") + +STAT_FUNC_DEFAULTS: dict[str, Any | None] = {} +STAT_FUNC_DEFAULTS["dtype"] = None +STAT_FUNC_DEFAULTS["out"] = None + +SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +SUM_DEFAULTS["axis"] = None +SUM_DEFAULTS["keepdims"] = False +SUM_DEFAULTS["initial"] = None + +PROD_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +PROD_DEFAULTS["axis"] = None +PROD_DEFAULTS["keepdims"] = False +PROD_DEFAULTS["initial"] = None + +MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +MEDIAN_DEFAULTS["overwrite_input"] = False +MEDIAN_DEFAULTS["keepdims"] = False + +STAT_FUNC_DEFAULTS["keepdims"] = False + +validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs") +validate_sum = CompatValidator( + SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1 +) +validate_prod = CompatValidator( + PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1 +) +validate_mean = CompatValidator( + STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1 +) +validate_median = CompatValidator( + MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 +) + +STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {} +STAT_DDOF_FUNC_DEFAULTS["dtype"] = None +STAT_DDOF_FUNC_DEFAULTS["out"] = None +STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False +validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") + +TAKE_DEFAULTS: dict[str, str | None] = {} +TAKE_DEFAULTS["out"] = None +TAKE_DEFAULTS["mode"] = "raise" +validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") + + +def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool: + """ + If this function is called via the 'numpy' library, the third parameter in + its signature is 'axis', which takes either an ndarray or 'None', so check + if the 'convert' parameter is either an instance of ndarray or is None + """ + if isinstance(convert, ndarray) or convert is None: + args = (convert,) + args + convert = True + + validate_take(args, kwargs, max_fname_arg_count=3, method="both") + return convert + + +TRANSPOSE_DEFAULTS = {"axes": None} +validate_transpose = CompatValidator( + TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 +) + + +def validate_window_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .{name}() directly instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_rolling_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .rolling(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_expanding_func(name, args, kwargs) -> None: + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .expanding(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_groupby_func(name, args, kwargs, allowed=None) -> None: + """ + 'args' and 'kwargs' should be empty, except for allowed kwargs because all + of their necessary parameters are explicitly listed in the function + signature + """ + if allowed is None: + allowed = [] + + kwargs = set(kwargs) - set(allowed) + + if len(args) + len(kwargs) > 0: + raise UnsupportedFunctionCall( + "numpy operations are not valid with groupby. " + f"Use .groupby(...).{name}() instead" + ) + + +RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") + + +def validate_resampler_func(method: str, args, kwargs) -> None: + """ + 'args' and 'kwargs' should be empty because all of their necessary + parameters are explicitly listed in the function signature + """ + if len(args) + len(kwargs) > 0: + if method in RESAMPLER_NUMPY_OPS: + raise UnsupportedFunctionCall( + "numpy operations are not valid with resample. " + f"Use .resample(...).{method}() instead" + ) + else: + raise TypeError("too many arguments passed in") + + +def validate_minmax_axis(axis: int | None, ndim: int = 1) -> None: + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is zero + or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + ndim : int, default 1 + + Raises + ------ + ValueError + """ + if axis is None: + return + if axis >= ndim or (axis < 0 and ndim + axis < 0): + raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})") diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py new file mode 100644 index 00000000..813e8de7 --- /dev/null +++ b/pandas/compat/pickle_compat.py @@ -0,0 +1,306 @@ +""" +Support pre-0.12 series pickle compatibility. +""" +from __future__ import annotations + +import contextlib +import copy +import io +import pickle as pkl +from typing import ( + TYPE_CHECKING, + Iterator, +) +import warnings + +import numpy as np + +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import BaseOffset + +from pandas import Index +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.internals import BlockManager + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +def load_reduce(self): + stack = self.stack + args = stack.pop() + func = stack[-1] + + try: + stack[-1] = func(*args) + return + except TypeError as err: + + # If we have a deprecated function, + # try to replace and try again. + + msg = "_reconstruct: First argument must be a sub-type of ndarray" + + if msg in str(err): + try: + cls = args[0] + stack[-1] = object.__new__(cls) + return + except TypeError: + pass + elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): + # TypeError: object.__new__(Day) is not safe, use Day.__new__() + cls = args[0] + stack[-1] = cls.__new__(*args) + return + elif args and issubclass(args[0], PeriodArray): + cls = args[0] + stack[-1] = NDArrayBacked.__new__(*args) + return + + raise + + +_sparse_msg = """\ + +Loading a saved '{cls}' as a {new} with sparse values. +'{cls}' is now removed. You should re-save this dataset in its new format. +""" + + +class _LoadSparseSeries: + # To load a SparseSeries as a Series[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "Series", but must return + # a subtype of "_LoadSparseSeries") + def __new__(cls) -> Series: # type: ignore[misc] + from pandas import Series + + warnings.warn( + _sparse_msg.format(cls="SparseSeries", new="Series"), + FutureWarning, + stacklevel=6, + ) + + return Series(dtype=object) + + +class _LoadSparseFrame: + # To load a SparseDataFrame as a DataFrame[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "DataFrame", but must + # return a subtype of "_LoadSparseFrame") + def __new__(cls) -> DataFrame: # type: ignore[misc] + from pandas import DataFrame + + warnings.warn( + _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"), + FutureWarning, + stacklevel=6, + ) + + return DataFrame() + + +# If classes are moved, provide compat here. +_class_locations_map = { + ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), + # 15477 + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), + # 10890 + ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), + ("pandas.sparse.series", "SparseTimeSeries"): ( + "pandas.core.sparse.series", + "SparseSeries", + ), + # 12588, extensions moving + ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"), + ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"), + # 18543 moving period + ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"), + ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"), + # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype + ("pandas.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + ("pandas._libs.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + # 15998 top-level dirs moving + ("pandas.sparse.array", "SparseArray"): ( + "pandas.core.arrays.sparse", + "SparseArray", + ), + ("pandas.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.sparse.frame", "SparseDataFrame"): ( + "pandas.core.sparse.frame", + "_LoadSparseFrame", + ), + ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), + ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), + ("pandas.indexes.numeric", "Int64Index"): ( + "pandas.core.indexes.numeric", + "Int64Index", + ), + ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"), + ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"), + ("pandas.tseries.index", "_new_DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "_new_DatetimeIndex", + ), + ("pandas.tseries.index", "DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "DatetimeIndex", + ), + ("pandas.tseries.period", "PeriodIndex"): ( + "pandas.core.indexes.period", + "PeriodIndex", + ), + # 19269, arrays moving + ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"), + # 19939, add timedeltaindex, float64index compat from 15998 move + ("pandas.tseries.tdi", "TimedeltaIndex"): ( + "pandas.core.indexes.timedeltas", + "TimedeltaIndex", + ), + ("pandas.indexes.numeric", "Float64Index"): ( + "pandas.core.indexes.numeric", + "Float64Index", + ), + ("pandas.core.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.core.sparse.frame", "SparseDataFrame"): ( + "pandas.compat.pickle_compat", + "_LoadSparseFrame", + ), +} + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat and uses a non-public class of the pickle module. + + +class Unpickler(pkl._Unpickler): + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super().find_class(module, name) + + +Unpickler.dispatch = copy.copy(Unpickler.dispatch) +Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + + +def load_newobj(self): + args = self.stack.pop() + cls = self.stack[-1] + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + elif issubclass(cls, DatetimeArray) and not args: + arr = np.array([], dtype="M8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif issubclass(cls, TimedeltaArray) and not args: + arr = np.array([], dtype="m8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif cls is BlockManager and not args: + obj = cls.__new__(cls, (), [], None, False) + else: + obj = cls.__new__(cls, *args) + + self.stack[-1] = obj + + +Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj + + +def load_newobj_ex(self): + kwargs = self.stack.pop() + args = self.stack.pop() + cls = self.stack.pop() + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + else: + obj = cls.__new__(cls, *args, **kwargs) + self.append(obj) + + +try: + Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex +except (AttributeError, KeyError): + pass + + +def load(fh, encoding: str | None = None, is_verbose: bool = False): + """ + Load a pickle, with a provided encoding, + + Parameters + ---------- + fh : a filelike object + encoding : an optional encoding + is_verbose : show exception output + """ + try: + fh.seek(0) + if encoding is not None: + up = Unpickler(fh, encoding=encoding) + else: + up = Unpickler(fh) + # "Unpickler" has no attribute "is_verbose" [attr-defined] + up.is_verbose = is_verbose # type: ignore[attr-defined] + + return up.load() + except (ValueError, TypeError): + raise + + +def loads( + bytes_object: bytes, + *, + fix_imports: bool = True, + encoding: str = "ASCII", + errors: str = "strict", +): + """ + Analogous to pickle._loads. + """ + fd = io.BytesIO(bytes_object) + return Unpickler( + fd, fix_imports=fix_imports, encoding=encoding, errors=errors + ).load() + + +@contextlib.contextmanager +def patch_pickle() -> Iterator[None]: + """ + Temporarily patch pickle to use our unpickler. + """ + orig_loads = pkl.loads + try: + setattr(pkl, "loads", loads) + yield + finally: + setattr(pkl, "loads", orig_loads) diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py new file mode 100644 index 00000000..6965865a --- /dev/null +++ b/pandas/compat/pyarrow.py @@ -0,0 +1,30 @@ +""" support pyarrow compatibility across versions """ + +from __future__ import annotations + +from pandas.util.version import Version + +try: + import pyarrow as pa + + _pa_version = pa.__version__ + _palv = Version(_pa_version) + pa_version_under1p01 = _palv < Version("1.0.1") + pa_version_under2p0 = _palv < Version("2.0.0") + pa_version_under3p0 = _palv < Version("3.0.0") + pa_version_under4p0 = _palv < Version("4.0.0") + pa_version_under5p0 = _palv < Version("5.0.0") + pa_version_under6p0 = _palv < Version("6.0.0") + pa_version_under7p0 = _palv < Version("7.0.0") + pa_version_under8p0 = _palv < Version("8.0.0") + pa_version_under9p0 = _palv < Version("9.0.0") +except ImportError: + pa_version_under1p01 = True + pa_version_under2p0 = True + pa_version_under3p0 = True + pa_version_under4p0 = True + pa_version_under5p0 = True + pa_version_under6p0 = True + pa_version_under7p0 = True + pa_version_under8p0 = True + pa_version_under9p0 = True diff --git a/pandas/conftest.py b/pandas/conftest.py new file mode 100644 index 00000000..cf735bf5 --- /dev/null +++ b/pandas/conftest.py @@ -0,0 +1,1881 @@ +""" +This file is very long and growing, but it was decided to not split it yet, as +it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 + +Instead of splitting it was decided to define sections here: +- Configuration / Settings +- Autouse fixtures +- Common arguments +- Missing values & co. +- Classes +- Indices +- Series' +- DataFrames +- Operators & Operations +- Data sets/files +- Time zones +- Dtypes +- Misc +""" + +from collections import abc +from datetime import ( + date, + datetime, + time, + timedelta, + timezone, +) +from decimal import Decimal +import operator +import os +from typing import Callable + +from dateutil.tz import ( + tzlocal, + tzutc, +) +import hypothesis +from hypothesis import strategies as st +import numpy as np +import pytest +from pytz import ( + FixedOffset, + utc, +) + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, +) + +import pandas as pd +from pandas import ( + DataFrame, + Interval, + Period, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) + +try: + import pyarrow as pa +except ImportError: + has_pyarrow = False +else: + del pa + has_pyarrow = True + +zoneinfo = None +if pd.compat.PY39: + # Import "zoneinfo" could not be resolved (reportMissingImports) + import zoneinfo # type: ignore[no-redef] + + # Although zoneinfo can be imported in Py39, it is effectively + # "not available" without tzdata/IANA tz data. + # We will set zoneinfo to not found in this case + try: + zoneinfo.ZoneInfo("UTC") # type: ignore[attr-defined] + except zoneinfo.ZoneInfoNotFoundError: # type: ignore[attr-defined] + zoneinfo = None + +# Until https://github.com/numpy/numpy/issues/19078 is sorted out, just suppress +suppress_npdev_promotion_warning = pytest.mark.filterwarnings( + "ignore:Promotion of numbers and bools:FutureWarning" +) + +# ---------------------------------------------------------------- +# Configuration / Settings +# ---------------------------------------------------------------- +# pytest + + +def pytest_addoption(parser) -> None: + parser.addoption("--skip-slow", action="store_true", help="skip slow tests") + parser.addoption("--skip-network", action="store_true", help="skip network tests") + parser.addoption("--skip-db", action="store_true", help="skip db tests") + parser.addoption( + "--run-high-memory", action="store_true", help="run high memory tests" + ) + parser.addoption("--only-slow", action="store_true", help="run only slow tests") + parser.addoption( + "--strict-data-files", + action="store_true", + help="Fail if a test is skipped for missing data file.", + ) + + +def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None: + """Ignore doctest warning. + + Parameters + ---------- + item : pytest.Item + pytest test item. + path : str + Module path to Python object, e.g. "pandas.core.frame.DataFrame.append". A + warning will be filtered when item.name ends with in given path. So it is + sufficient to specify e.g. "DataFrame.append". + message : str + Message to be filtered. + """ + if item.name.endswith(path): + item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}")) + + +def pytest_collection_modifyitems(items, config): + skip_slow = config.getoption("--skip-slow") + only_slow = config.getoption("--only-slow") + skip_network = config.getoption("--skip-network") + skip_db = config.getoption("--skip-db") + + marks = [ + (pytest.mark.slow, "slow", skip_slow, "--skip-slow"), + (pytest.mark.network, "network", skip_network, "--network"), + (pytest.mark.db, "db", skip_db, "--skip-db"), + ] + + # Warnings from doctests that can be ignored; place reason in comment above. + # Each entry specifies (path, message) - see the ignore_doctest_warning function + ignored_doctest_warnings = [ + # Deprecations where the docstring will emit a warning + ("DataFrame.append", "The frame.append method is deprecated"), + ("Series.append", "The series.append method is deprecated"), + ("dtypes.common.is_categorical", "is_categorical is deprecated"), + ("Categorical.replace", "Categorical.replace is deprecated"), + ("dtypes.common.is_extension_type", "'is_extension_type' is deprecated"), + ("Index.is_mixed", "Index.is_mixed is deprecated"), + ("MultiIndex._is_lexsorted", "MultiIndex.is_lexsorted is deprecated"), + # Docstring divides by zero to show behavior difference + ("missing.mask_zero_div_zero", "divide by zero encountered"), + # Docstring demonstrates the call raises a warning + ("_validators.validate_axis_style_args", "Use named arguments"), + ] + + for item in items: + if config.getoption("--doctest-modules") or config.getoption( + "--doctest-cython", default=False + ): + # autouse=True for the add_doctest_imports can lead to expensive teardowns + # since doctest_namespace is a session fixture + item.add_marker(pytest.mark.usefixtures("add_doctest_imports")) + + for path, message in ignored_doctest_warnings: + ignore_doctest_warning(item, path, message) + + # mark all tests in the pandas/tests/frame directory with "arraymanager" + if "/frame/" in item.nodeid: + item.add_marker(pytest.mark.arraymanager) + item.add_marker(suppress_npdev_promotion_warning) + + for (mark, kwd, skip_if_found, arg_name) in marks: + if kwd in item.keywords: + # If we're skipping, no need to actually add the marker or look for + # other markers + if skip_if_found: + item.add_marker(pytest.mark.skip(f"skipping due to {arg_name}")) + break + + item.add_marker(mark) + + if only_slow and "slow" not in item.keywords: + item.add_marker(pytest.mark.skip("skipping due to --only-slow")) + + +# Hypothesis +hypothesis.settings.register_profile( + "ci", + # Hypothesis timing checks are tuned for scalars by default, so we bump + # them from 200ms to 500ms per test case as the global default. If this + # is too short for a specific test, (a) try to make it faster, and (b) + # if it really is slow add `@settings(deadline=...)` with a working value, + # or `deadline=None` to entirely disable timeouts for that test. + # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to + # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969) + deadline=None, + suppress_health_check=(hypothesis.HealthCheck.too_slow,), +) +hypothesis.settings.load_profile("ci") + +# Registering these strategies makes them globally available via st.from_type, +# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) + +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) + + +@pytest.fixture +def add_doctest_imports(doctest_namespace) -> None: + """ + Make `np` and `pd` names available for doctests. + """ + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + + +# ---------------------------------------------------------------- +# Autouse fixtures +# ---------------------------------------------------------------- +@pytest.fixture(autouse=True) +def configure_tests() -> None: + """ + Configure settings for all tests and test modules. + """ + pd.set_option("chained_assignment", "raise") + + +# ---------------------------------------------------------------- +# Common arguments +# ---------------------------------------------------------------- +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={repr(x)}") +def axis(request): + """ + Fixture for returning the axis numbers of a DataFrame. + """ + return request.param + + +axis_frame = axis + + +@pytest.fixture(params=[1, "columns"], ids=lambda x: f"axis={repr(x)}") +def axis_1(request): + """ + Fixture for returning aliases of axis 1 of a DataFrame. + """ + return request.param + + +@pytest.fixture(params=[True, False, None]) +def observed(request): + """ + Pass in the observed keyword to groupby for [True, False] + This indicates whether categoricals should return values for + values which are not in the grouper [False / None], or only values which + appear in the grouper [True]. [None] is supported for future compatibility + if we decide to change the default (and would need to warn if this + parameter is not passed). + """ + return request.param + + +@pytest.fixture(params=[True, False, None]) +def ordered(request): + """ + Boolean 'ordered' parameter for Categorical. + """ + return request.param + + +@pytest.fixture(params=["first", "last", False]) +def keep(request): + """ + Valid values for the 'keep' parameter used in + .duplicated or .drop_duplicates + """ + return request.param + + +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): + """ + Fixture for trying all interval 'inclusive' parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def closed(request): + """ + Fixture for trying all interval closed parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def other_closed(request): + """ + Secondary closed fixture to allow parametrizing over all pairs of closed. + """ + return request.param + + +@pytest.fixture( + params=[ + None, + "gzip", + "bz2", + "zip", + "xz", + "tar", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression(request): + """ + Fixture for trying common compression types in compression tests. + """ + return request.param + + +@pytest.fixture( + params=[ + "gzip", + "bz2", + "zip", + "xz", + "tar", + pytest.param("zstd", marks=td.skip_if_no("zstandard")), + ] +) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def writable(request): + """ + Fixture that an array is writable. + """ + return request.param + + +@pytest.fixture(params=["inner", "outer", "left", "right"]) +def join_type(request): + """ + Fixture for trying all types of join operations. + """ + return request.param + + +@pytest.fixture(params=["nlargest", "nsmallest"]) +def nselect_method(request): + """ + Fixture for trying all nselect methods. + """ + return request.param + + +# ---------------------------------------------------------------- +# Missing values & co. +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) +def nulls_fixture(request): + """ + Fixture for each null type in pandas. + """ + return request.param + + +nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture + + +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): + """ + Fixture for each null type in pandas, each null type exactly once. + """ + return request.param + + +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture + + +@pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__) +def np_nat_fixture(request): + """ + Fixture for each NaT type in numpy. + """ + return request.param + + +# Generate cartesian product of np_nat_fixture: +np_nat_fixture2 = np_nat_fixture + + +# ---------------------------------------------------------------- +# Classes +# ---------------------------------------------------------------- + + +@pytest.fixture(params=[DataFrame, Series]) +def frame_or_series(request): + """ + Fixture to parametrize over DataFrame and Series. + """ + return request.param + + +# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]" +@pytest.fixture( + params=[Index, Series], ids=["index", "series"] # type: ignore[list-item] +) +def index_or_series(request): + """ + Fixture to parametrize over Index and Series, made necessary by a mypy + bug, giving an error: + + List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + + See GH#29725 + """ + return request.param + + +# Generate cartesian product of index_or_series fixture: +index_or_series2 = index_or_series + + +@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"]) +def index_or_series_or_array(request): + """ + Fixture to parametrize over Index, Series, and ExtensionArray + """ + return request.param + + +@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +box_with_array2 = box_with_array + + +@pytest.fixture +def dict_subclass(): + """ + Fixture for a dictionary subclass. + """ + + class TestSubDict(dict): + def __init__(self, *args, **kwargs) -> None: + dict.__init__(self, *args, **kwargs) + + return TestSubDict + + +@pytest.fixture +def non_dict_mapping_subclass(): + """ + Fixture for a non-mapping dictionary subclass. + """ + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict) -> None: + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self): + return self._data.__iter__() + + def __len__(self): + return self._data.__len__() + + return TestNonDictMapping + + +# ---------------------------------------------------------------- +# Indices +# ---------------------------------------------------------------- +@pytest.fixture +def multiindex_year_month_day_dataframe_random_data(): + """ + DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data + """ + tdf = tm.makeTimeDataFrame(100) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use Int64Index, to make sure things work + ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd + + +@pytest.fixture +def lexsorted_two_level_string_multiindex() -> MultiIndex: + """ + 2-level MultiIndex, lexsorted, with string names. + """ + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + + +@pytest.fixture +def multiindex_dataframe_random_data( + lexsorted_two_level_string_multiindex, +) -> DataFrame: + """DataFrame with 2 level MultiIndex with random data""" + index = lexsorted_two_level_string_multiindex + return DataFrame( + np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") + ) + + +def _create_multiindex(): + """ + MultiIndex used to test the general functionality of this object + """ + + # See Also: tests.multi.conftest.idx + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + return MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + + +def _create_mi_with_dt64tz_level(): + """ + MultiIndex with a level that is a tzaware DatetimeIndex. + """ + # GH#8367 round trip with pickle + return MultiIndex.from_product( + [[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")], + names=["one", "two", "three"], + ) + + +indices_dict = { + "string": tm.makeStringIndex(100), + "datetime": tm.makeDateIndex(100), + "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), + "period": tm.makePeriodIndex(100), + "timedelta": tm.makeTimedeltaIndex(100), + "int": tm.makeIntIndex(100), + "uint": tm.makeUIntIndex(100), + "range": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "complex64": tm.makeFloatIndex(100).astype("complex64"), + "complex128": tm.makeFloatIndex(100).astype("complex128"), + "num_int64": tm.makeNumericIndex(100, dtype="int64"), + "num_int32": tm.makeNumericIndex(100, dtype="int32"), + "num_int16": tm.makeNumericIndex(100, dtype="int16"), + "num_int8": tm.makeNumericIndex(100, dtype="int8"), + "num_uint64": tm.makeNumericIndex(100, dtype="uint64"), + "num_uint32": tm.makeNumericIndex(100, dtype="uint32"), + "num_uint16": tm.makeNumericIndex(100, dtype="uint16"), + "num_uint8": tm.makeNumericIndex(100, dtype="uint8"), + "num_float64": tm.makeNumericIndex(100, dtype="float64"), + "num_float32": tm.makeNumericIndex(100, dtype="float32"), + "bool-object": tm.makeBoolIndex(10).astype(object), + "bool-dtype": Index(np.random.randn(10) < 0), + "categorical": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), + "multi": _create_multiindex(), + "repeats": Index([0, 0, 1, 1, 2, 2]), + "nullable_int": Index(np.arange(100), dtype="Int64"), + "nullable_uint": Index(np.arange(100), dtype="UInt16"), + "nullable_float": Index(np.arange(100), dtype="Float32"), + "nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"), + "string-python": Index(pd.array(tm.makeStringIndex(100), dtype="string[python]")), +} +if has_pyarrow: + idx = Index(pd.array(tm.makeStringIndex(100), dtype="string[pyarrow]")) + indices_dict["string-pyarrow"] = idx + + +@pytest.fixture(params=indices_dict.keys()) +def index(request): + """ + Fixture for many "simple" kinds of indices. + + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() + + +# Needed to generate cartesian product of indices +index_fixture2 = index + + +@pytest.fixture( + params=[ + key for key in indices_dict if not isinstance(indices_dict[key], MultiIndex) + ] +) +def index_flat(request): + """ + index fixture, but excluding MultiIndex cases. + """ + key = request.param + return indices_dict[key].copy() + + +# Alias so we can test with cartesian product of index_flat +index_flat2 = index_flat + + +@pytest.fixture( + params=[ + key + for key in indices_dict + if not ( + key in ["int", "uint", "range", "empty", "repeats", "bool-dtype"] + or key.startswith("num_") + ) + and not isinstance(indices_dict[key], MultiIndex) + ] +) +def index_with_missing(request): + """ + Fixture for indices with missing values. + + Integer-dtype and empty cases are excluded because they cannot hold missing + values. + + MultiIndex is excluded because isna() is not defined for MultiIndex. + """ + + # GH 35538. Use deep copy to avoid illusive bug on np-dev + # GHA pipeline that writes into indices_dict despite copy + ind = indices_dict[request.param].copy(deep=True) + vals = ind.values + if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: + # For setting missing values in the top level of MultiIndex + vals = ind.tolist() + vals[0] = (None,) + vals[0][1:] + vals[-1] = (None,) + vals[-1][1:] + return MultiIndex.from_tuples(vals) + else: + vals[0] = None + vals[-1] = None + return type(ind)(vals) + + +# ---------------------------------------------------------------- +# Series' +# ---------------------------------------------------------------- +@pytest.fixture +def string_series() -> Series: + """ + Fixture for Series of floats with Index of unique strings + """ + s = tm.makeStringSeries() + s.name = "series" + return s + + +@pytest.fixture +def object_series() -> Series: + """ + Fixture for Series of dtype object with Index of unique strings + """ + s = tm.makeObjectSeries() + s.name = "objects" + return s + + +@pytest.fixture +def datetime_series() -> Series: + """ + Fixture for Series of floats with DatetimeIndex + """ + s = tm.makeTimeSeries() + s.name = "ts" + return s + + +def _create_series(index): + """Helper for the _series dict""" + size = len(index) + data = np.random.randn(size) + return Series(data, index=index, name="a") + + +_series = { + f"series-with-{index_id}-index": _create_series(index) + for index_id, index in indices_dict.items() +} + + +@pytest.fixture +def series_with_simple_index(index) -> Series: + """ + Fixture for tests on series with changing types of indices. + """ + return _create_series(index) + + +@pytest.fixture +def series_with_multilevel_index() -> Series: + """ + Fixture with a Series with a 2-level MultiIndex. + """ + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + data = np.random.randn(8) + ser = Series(data, index=index) + ser[3] = np.NaN + return ser + + +_narrow_series = { + f"{dtype.__name__}-series": tm.make_rand_series(name="a", dtype=dtype) + for dtype in tm.NARROW_NP_DTYPES +} + + +_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} + + +@pytest.fixture(params=_index_or_series_objs.keys()) +def index_or_series_obj(request): + """ + Fixture for tests on indexes, series and series with a narrow dtype + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_objs[request.param].copy(deep=True) + + +# ---------------------------------------------------------------- +# DataFrames +# ---------------------------------------------------------------- +@pytest.fixture +def int_frame() -> DataFrame: + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + vpBeWjM651 1 0 1 0 + 5JyxmrP1En -1 0 0 0 + qEDaoD49U2 -1 1 0 0 + m66TkTfsFe 0 0 0 0 + EHPaNzEUFm -1 0 -1 0 + fpRJCevQhi 2 0 0 0 + OlQvnmfi3Q 0 0 -2 0 + ... .. .. .. .. + uB1FPlz4uP 0 0 0 1 + EcSe6yNzCU 0 0 -1 0 + L50VudaiI8 -1 1 -2 0 + y3bpw4nwIp 0 -1 0 0 + H0RdLLwrCT 1 1 0 0 + rY82K0vMwm 0 0 0 0 + 1OPIUjnkjk 2 0 0 0 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()).astype("int64") + + +@pytest.fixture +def datetime_frame() -> DataFrame: + """ + Fixture for DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def float_frame() -> DataFrame: + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 + qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 + tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 + wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 + M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 + QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 + r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 + ... ... ... ... ... + IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 + lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 + qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 + yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 + 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 + eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 + xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()) + + +@pytest.fixture +def mixed_type_frame() -> DataFrame: + """ + Fixture for DataFrame of float/int/string columns with RangeIndex + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ + return DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "float32": np.array([1.0] * 10, dtype="float32"), + "int32": np.array([1] * 10, dtype="int32"), + }, + index=np.arange(10), + ) + + +@pytest.fixture +def rand_series_with_duplicate_datetimeindex() -> Series: + """ + Fixture for Series with a DatetimeIndex that has duplicates. + """ + dates = [ + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + + return Series(np.random.randn(len(dates)), index=dates) + + +# ---------------------------------------------------------------- +# Scalars +# ---------------------------------------------------------------- +@pytest.fixture( + params=[ + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), + (Period("2012-01", freq="M"), "period[M]"), + (Period("2012-02-01", freq="D"), "period[D]"), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + (Timedelta(seconds=500), "timedelta64[ns]"), + ] +) +def ea_scalar_and_dtype(request): + return request.param + + +# ---------------------------------------------------------------- +# Operators & Operations +# ---------------------------------------------------------------- +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] + + +@pytest.fixture(params=_all_arithmetic_operators) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + operator.eq, + operator.ne, + operator.lt, + operator.le, + operator.gt, + operator.ge, + operator.and_, + ops.rand_, + operator.xor, + ops.rxor, + operator.or_, + ops.ror_, + ] +) +def all_binary_operators(request): + """ + Fixture for operator and roperator arithmetic, comparison, and logical ops. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + ] +) +def all_arithmetic_functions(request): + """ + Fixture for operator and roperator arithmetic functions. + + Notes + ----- + This includes divmod and rdivmod, whereas all_arithmetic_operators + does not. + """ + return request.param + + +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +_all_reductions = _all_numeric_reductions + _all_boolean_reductions + + +@pytest.fixture(params=_all_reductions) +def all_reductions(request): + """ + Fixture for all (boolean + numeric) reduction names. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ] +) +def comparison_op(request): + """ + Fixture for operator module comparison functions. + """ + return request.param + + +@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) +def compare_operators_no_eq_ne(request): + """ + Fixture for dunder names for compare operations except == and != + + * >= + * > + * < + * <= + """ + return request.param + + +@pytest.fixture( + params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] +) +def all_logical_operators(request): + """ + Fixture for dunder names for common logical operations + + * | + * & + * ^ + """ + return request.param + + +# ---------------------------------------------------------------- +# Data sets/files +# ---------------------------------------------------------------- +@pytest.fixture +def strict_data_files(pytestconfig): + """ + Returns the configuration for the test setting `--strict-data-files`. + """ + return pytestconfig.getoption("--strict-data-files") + + +@pytest.fixture +def datapath(strict_data_files: str) -> Callable[..., str]: + """ + Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") + + def deco(*args): + path = os.path.join(BASE_PATH, *args) + if not os.path.exists(path): + if strict_data_files: + raise ValueError( + f"Could not find file {path} and --strict-data-files is set." + ) + else: + pytest.skip(f"Could not find {path}.") + return path + + return deco + + +@pytest.fixture +def iris(datapath) -> DataFrame: + """ + The iris dataset as a DataFrame. + """ + return pd.read_csv(datapath("io", "data", "csv", "iris.csv")) + + +# ---------------------------------------------------------------- +# Time zones +# ---------------------------------------------------------------- +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + "+01:15", + "-02:15", + "UTC+01:15", + "UTC-02:15", + tzutc(), + tzlocal(), + FixedOffset(300), + FixedOffset(0), + FixedOffset(-300), + timezone.utc, + timezone(timedelta(hours=1)), + timezone(timedelta(hours=-1), name="foo"), +] +if zoneinfo is not None: + TIMEZONES.extend([zoneinfo.ZoneInfo("US/Pacific"), zoneinfo.ZoneInfo("UTC")]) +TIMEZONE_IDS = [repr(i) for i in TIMEZONES] + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) +@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) +def tz_naive_fixture(request): + """ + Fixture for trying timezones including default (None): {0} + """ + return request.param + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) +@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) +def tz_aware_fixture(request): + """ + Fixture for trying explicit timezones: {0} + """ + return request.param + + +# Generate cartesian product of tz_aware_fixture: +tz_aware_fixture2 = tz_aware_fixture + + +_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc] +if zoneinfo is not None: + _UTCS.append(zoneinfo.ZoneInfo("UTC")) + + +@pytest.fixture(params=_UTCS) +def utc_fixture(request): + """ + Fixture to provide variants of UTC timezone strings and tzinfo objects. + """ + return request.param + + +utc_fixture2 = utc_fixture + + +# ---------------------------------------------------------------- +# Dtypes +# ---------------------------------------------------------------- +@pytest.fixture(params=tm.STRING_DTYPES) +def string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * str + * 'str' + * 'U' + """ + return request.param + + +@pytest.fixture( + params=[ + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ] +) +def nullable_string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture( + params=[ + "python", + pytest.param("pyarrow", marks=td.skip_if_no("pyarrow", min_version="1.0.0")), + ] +) +def string_storage(request): + """ + Parametrized fixture for pd.options.mode.string_storage. + + * 'python' + * 'pyarrow' + """ + return request.param + + +# Alias so we can test with cartesian product of string_storage +string_storage2 = string_storage + + +@pytest.fixture(params=tm.BYTES_DTYPES) +def bytes_dtype(request): + """ + Parametrized fixture for bytes dtypes. + + * bytes + * 'bytes' + """ + return request.param + + +@pytest.fixture(params=tm.OBJECT_DTYPES) +def object_dtype(request): + """ + Parametrized fixture for object dtypes. + + * object + * 'object' + """ + return request.param + + +@pytest.fixture( + params=[ + "object", + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ] +) +def any_string_dtype(request): + """ + Parametrized fixture for string dtypes. + * 'object' + * 'string[python]' + * 'string[pyarrow]' + """ + return request.param + + +@pytest.fixture(params=tm.DATETIME64_DTYPES) +def datetime64_dtype(request): + """ + Parametrized fixture for datetime64 dtypes. + + * 'datetime64[ns]' + * 'M8[ns]' + """ + return request.param + + +@pytest.fixture(params=tm.TIMEDELTA64_DTYPES) +def timedelta64_dtype(request): + """ + Parametrized fixture for timedelta64 dtypes. + + * 'timedelta64[ns]' + * 'm8[ns]' + """ + return request.param + + +@pytest.fixture +def fixed_now_ts() -> Timestamp: + """ + Fixture emits fixed Timestamp.now() + """ + return Timestamp( + year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22 + ) + + +@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES) +def float_numpy_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.FLOAT_EA_DTYPES) +def float_ea_dtype(request): + """ + Parameterized fixture for float dtypes. + + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES + tm.FLOAT_EA_DTYPES) +def any_float_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.COMPLEX_DTYPES) +def complex_dtype(request): + """ + Parameterized fixture for complex dtypes. + + * complex + * 'complex64' + * 'complex128' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) +def any_signed_int_numpy_dtype(request): + """ + Parameterized fixture for signed integer dtypes. + + * int + * 'int8' + * 'int16' + * 'int32' + * 'int64' + """ + return request.param + + +@pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES) +def any_unsigned_int_numpy_dtype(request): + """ + Parameterized fixture for unsigned integer dtypes. + + * 'uint8' + * 'uint16' + * 'uint32' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES) +def any_int_numpy_dtype(request): + """ + Parameterized fixture for any integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES) +def any_int_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES + tm.ALL_INT_EA_DTYPES) +def any_int_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES) +def any_numeric_ea_dtype(request): + """ + Parameterized fixture for any nullable integer dtype and + any float ea dtypes. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + +@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES) +def any_signed_int_ea_dtype(request): + """ + Parameterized fixture for any signed nullable integer dtype. + + * 'Int8' + * 'Int16' + * 'Int32' + * 'Int64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES) +def any_real_numpy_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=tm.ALL_NUMPY_DTYPES) +def any_numpy_dtype(request): + """ + Parameterized fixture for all numpy dtypes. + + * bool + * 'bool' + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * str + * 'str' + * 'U' + * bytes + * 'bytes' + * 'datetime64[ns]' + * 'M8[ns]' + * 'timedelta64[ns]' + * 'm8[ns]' + * object + * 'object' + """ + return request.param + + +@pytest.fixture( + params=tm.ALL_REAL_NUMPY_DTYPES + + tm.COMPLEX_DTYPES + + tm.ALL_INT_EA_DTYPES + + tm.FLOAT_EA_DTYPES +) +def any_numeric_dtype(request): + """ + Parameterized fixture for all numeric dtypes. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + * 'Float32' + * 'Float64' + """ + return request.param + + +# categoricals are handled separately +_any_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("string", ["a", pd.NA, "c"]), + ("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), + ("mixed", ["a", np.nan, 2.0]), + ("floating", [1.0, np.nan, 2.0]), + ("integer", [1, np.nan, 2]), + ("mixed-integer-float", [1, np.nan, 2.0]), + ("decimal", [Decimal(1), np.nan, Decimal(2)]), + ("boolean", [True, np.nan, False]), + ("boolean", [True, pd.NA, False]), + ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), + ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), + ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), + # The following two dtypes are commented out due to GH 23554 + # ('complex', [1 + 1j, np.nan, 2 + 2j]), + # ('timedelta64', [np.timedelta64(1, 'D'), + # np.nan, np.timedelta64(2, 'D')]), + ("timedelta", [timedelta(1), np.nan, timedelta(2)]), + ("time", [time(1), np.nan, time(2)]), + ("period", [Period(2013), pd.NaT, Period(2018)]), + ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), +] +ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id + + +@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) +def any_skipna_inferred_dtype(request): + """ + Fixture for all inferred dtypes from _libs.lib.infer_dtype + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + * 'mixed-integer-float' + * 'floating' + * 'integer' + * 'decimal' + * 'boolean' + * 'datetime64' + * 'datetime' + * 'date' + * 'timedelta' + * 'time' + * 'period' + * 'interval' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> import pandas._libs.lib as lib + >>> + >>> def test_something(any_skipna_inferred_dtype): + ... inferred_dtype, values = any_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values + + +# ---------------------------------------------------------------- +# Misc +# ---------------------------------------------------------------- +@pytest.fixture +def ip(): + """ + Get an instance of IPython.InteractiveShell. + + Will raise a skip if IPython is not installed. + """ + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell + + # GH#35711 make sure sqlite history file handle is not leaked + from traitlets.config import Config # isort:skip + + c = Config() + c.HistoryManager.hist_file = ":memory:" + + return InteractiveShell(config=c) + + +@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) +def spmatrix(request): + """ + Yields scipy sparse matrix classes. + """ + from scipy import sparse + + return getattr(sparse, request.param + "_matrix") + + +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) and o != "Tick" + ] +) +def tick_classes(request): + """ + Fixture for Tick based datetime offsets available for a time series. + """ + return request.param + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + """ + return request.param + + +@pytest.fixture() +def fsspectest(): + pytest.importorskip("fsspec") + from fsspec import register_implementation + from fsspec.implementations.memory import MemoryFileSystem + from fsspec.registry import _registry as registry + + class TestMemoryFS(MemoryFileSystem): + protocol = "testmem" + test = [None] + + def __init__(self, **kwargs) -> None: + self.test[0] = kwargs.pop("test", None) + super().__init__(**kwargs) + + register_implementation("testmem", TestMemoryFS, clobber=True) + yield TestMemoryFS() + registry.pop("testmem", None) + TestMemoryFS.test[0] = None + TestMemoryFS.store.clear() + + +@pytest.fixture( + params=[ + ("foo", None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + # possibly-matching NAs + (np.nan, np.nan, np.nan), + (np.nan, pd.NaT, None), + (np.nan, pd.NA, None), + (pd.NA, pd.NA, pd.NA), + ] +) +def names(request): + """ + A 3-tuple of names, the first two for operands, the last for a result. + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) +def indexer_sli(request): + """ + Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.loc, tm.iloc]) +def indexer_li(request): + """ + Parametrize over loc.__getitem__, iloc.__getitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.iloc]) +def indexer_si(request): + """ + Parametrize over __setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc]) +def indexer_sl(request): + """ + Parametrize over __setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.at, tm.loc]) +def indexer_al(request): + """ + Parametrize over at.__setitem__, loc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.iat, tm.iloc]) +def indexer_ial(request): + """ + Parametrize over iat.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture +def using_array_manager(): + """ + Fixture to check if the array manager is being used. + """ + return pd.options.mode.data_manager == "array" + + +@pytest.fixture +def using_copy_on_write() -> bool: + """ + Fixture to check if Copy-on-Write is enabled. + """ + return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" diff --git a/pandas/core/__init__.py b/pandas/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/_numba/__init__.py b/pandas/core/_numba/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py new file mode 100644 index 00000000..13d8b52b --- /dev/null +++ b/pandas/core/_numba/executor.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + + +@functools.lru_cache(maxsize=None) +def generate_shared_aggregator( + func: Callable[..., Scalar], + nopython: bool, + nogil: bool, + parallel: bool, +): + """ + Generate a Numba function that loops over the columns 2D object and applies + a 1D numba kernel over each column. + + Parameters + ---------- + func : function + aggregation function to be applied to each column + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def column_looper( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + *args, + ): + result = np.empty((len(start), values.shape[1]), dtype=np.float64) + for i in numba.prange(values.shape[1]): + result[:, i] = func(values[:, i], start, end, min_periods, *args) + return result + + return column_looper diff --git a/pandas/core/_numba/kernels/__init__.py b/pandas/core/_numba/kernels/__init__.py new file mode 100644 index 00000000..219ff023 --- /dev/null +++ b/pandas/core/_numba/kernels/__init__.py @@ -0,0 +1,6 @@ +from pandas.core._numba.kernels.mean_ import sliding_mean +from pandas.core._numba.kernels.min_max_ import sliding_min_max +from pandas.core._numba.kernels.sum_ import sliding_sum +from pandas.core._numba.kernels.var_ import sliding_var + +__all__ = ["sliding_mean", "sliding_sum", "sliding_var", "sliding_min_max"] diff --git a/pandas/core/_numba/kernels/mean_.py b/pandas/core/_numba/kernels/mean_.py new file mode 100644 index 00000000..725989e0 --- /dev/null +++ b/pandas/core/_numba/kernels/mean_.py @@ -0,0 +1,150 @@ +""" +Numba 1D mean kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_mean( + val: float, + nobs: int, + sum_x: float, + neg_ct: int, + compensation: float, + num_consecutive_same_value: int, + prev_value: float, +) -> tuple[int, float, int, float, int, float]: + if not np.isnan(val): + nobs += 1 + y = val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + if val < 0: + neg_ct += 1 + + if val == prev_value: + num_consecutive_same_value += 1 + else: + num_consecutive_same_value = 1 + prev_value = val + + return nobs, sum_x, neg_ct, compensation, num_consecutive_same_value, prev_value + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_mean( + val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float +) -> tuple[int, float, int, float]: + if not np.isnan(val): + nobs -= 1 + y = -val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + if val < 0: + neg_ct -= 1 + return nobs, sum_x, neg_ct, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_mean( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, +) -> np.ndarray: + N = len(start) + nobs = 0 + sum_x = 0.0 + neg_ct = 0 + compensation_add = 0.0 + compensation_remove = 0.0 + + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + prev_value = values[s] + num_consecutive_same_value = 0 + + for j in range(s, e): + val = values[j] + ( + nobs, + sum_x, + neg_ct, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_mean( + val, + nobs, + sum_x, + neg_ct, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, sum_x, neg_ct, compensation_remove = remove_mean( + val, nobs, sum_x, neg_ct, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + ( + nobs, + sum_x, + neg_ct, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_mean( + val, + nobs, + sum_x, + neg_ct, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + + if nobs >= min_periods and nobs > 0: + result = sum_x / nobs + if num_consecutive_same_value >= nobs: + result = prev_value + elif neg_ct == 0 and result < 0: + result = 0 + elif neg_ct == nobs and result > 0: + result = 0 + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + neg_ct = 0 + compensation_remove = 0.0 + + return output diff --git a/pandas/core/_numba/kernels/min_max_.py b/pandas/core/_numba/kernels/min_max_.py new file mode 100644 index 00000000..4f237fc1 --- /dev/null +++ b/pandas/core/_numba/kernels/min_max_.py @@ -0,0 +1,70 @@ +""" +Numba 1D min/max kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_min_max( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + is_max: bool, +) -> np.ndarray: + N = len(start) + nobs = 0 + output = np.empty(N, dtype=np.float64) + # Use deque once numba supports it + # https://github.com/numba/numba/issues/7417 + Q: list = [] + W: list = [] + for i in range(N): + + curr_win_size = end[i] - start[i] + if i == 0: + st = start[i] + else: + st = end[i - 1] + + for k in range(st, end[i]): + ai = values[k] + if not np.isnan(ai): + nobs += 1 + elif is_max: + ai = -np.inf + else: + ai = np.inf + # Discard previous entries if we find new min or max + if is_max: + while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): + Q.pop() + else: + while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]): + Q.pop() + Q.append(k) + W.append(k) + + # Discard entries outside and left of current window + while Q and Q[0] <= start[i] - 1: + Q.pop(0) + while W and W[0] <= start[i] - 1: + if not np.isnan(values[W[0]]): + nobs -= 1 + W.pop(0) + + # Save output based on index in input value array + if Q and curr_win_size > 0 and nobs >= min_periods: + output[i] = values[Q[0]] + else: + output[i] = np.nan + + return output diff --git a/pandas/core/_numba/kernels/shared.py b/pandas/core/_numba/kernels/shared.py new file mode 100644 index 00000000..6e6bcef5 --- /dev/null +++ b/pandas/core/_numba/kernels/shared.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import numba +import numpy as np + + +@numba.jit( + # error: Any? not callable + numba.boolean(numba.int64[:]), # type: ignore[misc] + nopython=True, + nogil=True, + parallel=False, +) +def is_monotonic_increasing(bounds: np.ndarray) -> bool: + """Check if int64 values are monotonically increasing.""" + n = len(bounds) + if n < 2: + return True + prev = bounds[0] + for i in range(1, n): + cur = bounds[i] + if cur < prev: + return False + prev = cur + return True diff --git a/pandas/core/_numba/kernels/sum_.py b/pandas/core/_numba/kernels/sum_.py new file mode 100644 index 00000000..05689718 --- /dev/null +++ b/pandas/core/_numba/kernels/sum_.py @@ -0,0 +1,138 @@ +""" +Numba 1D sum kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_sum( + val: float, + nobs: int, + sum_x: float, + compensation: float, + num_consecutive_same_value: int, + prev_value: float, +) -> tuple[int, float, float, int, float]: + if not np.isnan(val): + nobs += 1 + y = val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + + if val == prev_value: + num_consecutive_same_value += 1 + else: + num_consecutive_same_value = 1 + prev_value = val + + return nobs, sum_x, compensation, num_consecutive_same_value, prev_value + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_sum( + val: float, nobs: int, sum_x: float, compensation: float +) -> tuple[int, float, float]: + if not np.isnan(val): + nobs -= 1 + y = -val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + return nobs, sum_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_sum( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, +) -> np.ndarray: + N = len(start) + nobs = 0 + sum_x = 0.0 + compensation_add = 0.0 + compensation_remove = 0.0 + + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + prev_value = values[s] + num_consecutive_same_value = 0 + + for j in range(s, e): + val = values[j] + ( + nobs, + sum_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_sum( + val, + nobs, + sum_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, sum_x, compensation_remove = remove_sum( + val, nobs, sum_x, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + ( + nobs, + sum_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_sum( + val, + nobs, + sum_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + + if nobs == 0 == min_periods: + result = 0.0 + elif nobs >= min_periods: + if num_consecutive_same_value >= nobs: + result = prev_value * nobs + else: + result = sum_x + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 + + return output diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py new file mode 100644 index 00000000..b1c72832 --- /dev/null +++ b/pandas/core/_numba/kernels/var_.py @@ -0,0 +1,159 @@ +""" +Numba 1D var kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_var( + val: float, + nobs: int, + mean_x: float, + ssqdm_x: float, + compensation: float, + num_consecutive_same_value: int, + prev_value: float, +) -> tuple[int, float, float, float, int, float]: + if not np.isnan(val): + + if val == prev_value: + num_consecutive_same_value += 1 + else: + num_consecutive_same_value = 1 + prev_value = val + + nobs += 1 + prev_mean = mean_x - compensation + y = val - compensation + t = y - mean_x + compensation = t + mean_x - y + delta = t + if nobs: + mean_x += delta / nobs + else: + mean_x = 0 + ssqdm_x += (val - prev_mean) * (val - mean_x) + return nobs, mean_x, ssqdm_x, compensation, num_consecutive_same_value, prev_value + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_var( + val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float +) -> tuple[int, float, float, float]: + if not np.isnan(val): + nobs -= 1 + if nobs: + prev_mean = mean_x - compensation + y = val - compensation + t = y - mean_x + compensation = t + mean_x - y + delta = t + mean_x -= delta / nobs + ssqdm_x -= (val - prev_mean) * (val - mean_x) + else: + mean_x = 0 + ssqdm_x = 0 + return nobs, mean_x, ssqdm_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_var( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, + ddof: int = 1, +) -> np.ndarray: + N = len(start) + nobs = 0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_add = 0.0 + compensation_remove = 0.0 + + min_periods = max(min_periods, 1) + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + + prev_value = values[s] + num_consecutive_same_value = 0 + + for j in range(s, e): + val = values[j] + ( + nobs, + mean_x, + ssqdm_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_var( + val, + nobs, + mean_x, + ssqdm_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, mean_x, ssqdm_x, compensation_remove = remove_var( + val, nobs, mean_x, ssqdm_x, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + ( + nobs, + mean_x, + ssqdm_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) = add_var( + val, + nobs, + mean_x, + ssqdm_x, + compensation_add, + num_consecutive_same_value, + prev_value, + ) + + if nobs >= min_periods and nobs > ddof: + if nobs == 1 or num_consecutive_same_value >= nobs: + result = 0.0 + else: + result = ssqdm_x / (nobs - ddof) + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + mean_x = 0.0 + ssqdm_x = 0.0 + compensation_remove = 0.0 + + return output diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py new file mode 100644 index 00000000..07fa5799 --- /dev/null +++ b/pandas/core/accessor.py @@ -0,0 +1,298 @@ +""" + +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" +from __future__ import annotations + +import warnings + +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + + +class DirNamesMixin: + _accessors: set[str] = set() + _hidden_attrs: frozenset[str] = frozenset() + + def _dir_deletions(self) -> set[str]: + """ + Delete unwanted __dir__ for this object. + """ + return self._accessors | self._hidden_attrs + + def _dir_additions(self) -> set[str]: + """ + Add additional __dir__ for this object. + """ + return {accessor for accessor in self._accessors if hasattr(self, accessor)} + + def __dir__(self) -> list[str]: + """ + Provide method name lookup and completion. + + Notes + ----- + Only provide 'public' methods. + """ + rv = set(super().__dir__()) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) + + +class PandasDelegate: + """ + Abstract base class for delegating methods/properties. + """ + + def _delegate_property_get(self, name, *args, **kwargs): + raise TypeError(f"You cannot access the property {name}") + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise TypeError(f"The property {name} cannot be set") + + def _delegate_method(self, name, *args, **kwargs): + raise TypeError(f"You cannot call method {name}") + + @classmethod + def _add_delegate_accessors( + cls, delegate, accessors, typ: str, overwrite: bool = False + ): + """ + Add accessors to cls from the delegate class. + + Parameters + ---------- + cls + Class to add the methods/properties to. + delegate + Class to get methods/properties and doc-strings. + accessors : list of str + List of accessors to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + """ + + def _create_delegator_property(name): + def _getter(self): + return self._delegate_property_get(name) + + def _setter(self, new_values): + return self._delegate_property_set(name, new_values) + + _getter.__name__ = name + _setter.__name__ = name + + return property( + fget=_getter, fset=_setter, doc=getattr(delegate, name).__doc__ + ) + + def _create_delegator_method(name): + def f(self, *args, **kwargs): + return self._delegate_method(name, *args, **kwargs) + + f.__name__ = name + f.__doc__ = getattr(delegate, name).__doc__ + + return f + + for name in accessors: + + if typ == "property": + f = _create_delegator_property(name) + else: + f = _create_delegator_method(name) + + # don't overwrite existing methods/properties + if overwrite or not hasattr(cls, name): + setattr(cls, name, f) + + +def delegate_names(delegate, accessors, typ: str, overwrite: bool = False): + """ + Add delegated names to a class using a class decorator. This provides + an alternative usage to directly calling `_add_delegate_accessors` + below a class definition. + + Parameters + ---------- + delegate : object + The class to get methods/properties & doc-strings. + accessors : Sequence[str] + List of accessor to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + @delegate_names(Categorical, ["categories", "ordered"], "property") + class CategoricalAccessor(PandasDelegate): + [...] + """ + + def add_delegate_accessors(cls): + cls._add_delegate_accessors(delegate, accessors, typ, overwrite=overwrite) + return cls + + return add_delegate_accessors + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We use a UserWarning instead of a custom Warning + + +class CachedAccessor: + """ + Custom property-like object. + + A descriptor for caching accessors. + + Parameters + ---------- + name : str + Namespace that will be accessed under, e.g. ``df.foo``. + accessor : cls + Class with the extension methods. + + Notes + ----- + For accessor, The class's __init__ method assumes that one of + ``Series``, ``DataFrame`` or ``Index`` as the + single argument ``data``. + """ + + def __init__(self, name: str, accessor) -> None: + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + accessor_obj = self._accessor(obj) + # Replace the property with the accessor object. Inspired by: + # https://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # NDFrame + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +@doc(klass="", others="") +def _register_accessor(name, cls): + """ + Register a custom accessor on {klass} objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Returns + ------- + callable + A class decorator. + + See Also + -------- + register_dataframe_accessor : Register a custom accessor on DataFrame objects. + register_series_accessor : Register a custom accessor on Series objects. + register_index_accessor : Register a custom accessor on Index objects. + + Notes + ----- + When accessed, your accessor will be initialized with the pandas object + the user is interacting with. So the signature must be + + .. code-block:: python + + def __init__(self, pandas_object): # noqa: E999 + ... + + For consistency with pandas methods, you should raise an ``AttributeError`` + if the data passed to your accessor has an incorrect dtype. + + >>> pd.Series(['a', 'b']).dt + Traceback (most recent call last): + ... + AttributeError: Can only use .dt accessor with datetimelike values + + Examples + -------- + In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + + Back in an interactive IPython session: + + .. code-block:: ipython + + In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10), + ...: "latitude": np.linspace(0, 20)}}) + In [2]: ds.geo.center + Out[2]: (5.0, 10.0) + In [3]: ds.geo.plot() # plots data on a map + """ + + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting " + f"attribute with the same name.", + UserWarning, + stacklevel=find_stack_level(), + ) + setattr(cls, name, CachedAccessor(name, accessor)) + cls._accessors.add(name) + return accessor + + return decorator + + +@doc(_register_accessor, klass="DataFrame") +def register_dataframe_accessor(name): + from pandas import DataFrame + + return _register_accessor(name, DataFrame) + + +@doc(_register_accessor, klass="Series") +def register_series_accessor(name): + from pandas import Series + + return _register_accessor(name, Series) + + +@doc(_register_accessor, klass="Index") +def register_index_accessor(name): + from pandas import Index + + return _register_accessor(name, Index) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py new file mode 100644 index 00000000..9f67b8d4 --- /dev/null +++ b/pandas/core/algorithms.py @@ -0,0 +1,2002 @@ +""" +Generic data algorithms. This module is experimental at the moment and not +intended for public consumption +""" +from __future__ import annotations + +import inspect +import operator +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Hashable, + Literal, + Sequence, + cast, + final, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import ( + algos, + hashtable as htable, + iNaT, + lib, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + DtypeObj, + IndexLabel, + TakeIndexer, + npt, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + infer_dtype_from_array, + sanitize_to_nanoseconds, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_object, + ensure_platform_int, + is_array_like, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_signed_integer_dtype, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ( + BaseMaskedDtype, + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCExtensionArray, + ABCIndex, + ABCMultiIndex, + ABCRangeIndex, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +from pandas.core.array_algos.take import take_nd +from pandas.core.construction import ( + array as pd_array, + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import validate_indices + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + ) + from pandas.core.arrays import ( + BaseMaskedArray, + ExtensionArray, + ) + + +# --------------- # +# dtype access # +# --------------- # +def _ensure_data(values: ArrayLike) -> np.ndarray: + """ + routine to ensure that our data is of the correct + input dtype for lower-level routines + + This will coerce: + - ints -> int64 + - uint -> uint64 + - bool -> uint8 + - datetimelike -> i8 + - datetime64tz -> i8 (in local tz) + - categorical -> codes + + Parameters + ---------- + values : np.ndarray or ExtensionArray + + Returns + ------- + np.ndarray + """ + + if not isinstance(values, ABCMultiIndex): + # extract_array would raise + values = extract_array(values, extract_numpy=True) + + if is_object_dtype(values.dtype): + return ensure_object(np.asarray(values)) + + elif isinstance(values.dtype, BaseMaskedDtype): + # i.e. BooleanArray, FloatingArray, IntegerArray + values = cast("BaseMaskedArray", values) + if not values._hasna: + # No pd.NAs -> We can avoid an object-dtype cast (and copy) GH#41816 + # recurse to avoid re-implementing logic for eg bool->uint8 + return _ensure_data(values._data) + return np.asarray(values) + + elif is_categorical_dtype(values.dtype): + # NB: cases that go through here should NOT be using _reconstruct_data + # on the back-end. + values = cast("Categorical", values) + return values.codes + + elif is_bool_dtype(values.dtype): + if isinstance(values, np.ndarray): + # i.e. actually dtype == np.dtype("bool") + return np.asarray(values).view("uint8") + else: + # e.g. Sparse[bool, False] # TODO: no test cases get here + return np.asarray(values).astype("uint8", copy=False) + + elif is_integer_dtype(values.dtype): + return np.asarray(values) + + elif is_float_dtype(values.dtype): + # Note: checking `values.dtype == "float128"` raises on Windows and 32bit + # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]" + # has no attribute "itemsize" + if values.dtype.itemsize in [2, 12, 16]: # type: ignore[union-attr] + # we dont (yet) have float128 hashtable support + return ensure_float64(values) + return np.asarray(values) + + elif is_complex_dtype(values.dtype): + return cast(np.ndarray, values) + + # datetimelike + elif needs_i8_conversion(values.dtype): + if isinstance(values, np.ndarray): + values = sanitize_to_nanoseconds(values) + npvalues = values.view("i8") + npvalues = cast(np.ndarray, npvalues) + return npvalues + + # we have failed, return object + values = np.asarray(values, dtype=object) + return ensure_object(values) + + +def _reconstruct_data( + values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike +) -> ArrayLike: + """ + reverse of _ensure_data + + Parameters + ---------- + values : np.ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + original : AnyArrayLike + + Returns + ------- + ExtensionArray or np.ndarray + """ + if isinstance(values, ABCExtensionArray) and values.dtype == dtype: + # Catch DatetimeArray/TimedeltaArray + return values + + if not isinstance(dtype, np.dtype): + # i.e. ExtensionDtype; note we have ruled out above the possibility + # that values.dtype == dtype + cls = dtype.construct_array_type() + + values = cls._from_sequence(values, dtype=dtype) + + else: + if is_datetime64_dtype(dtype): + dtype = np.dtype("datetime64[ns]") + elif is_timedelta64_dtype(dtype): + dtype = np.dtype("timedelta64[ns]") + + values = values.astype(dtype, copy=False) + + return values + + +def _ensure_arraylike(values) -> ArrayLike: + """ + ensure that we are arraylike if not already + """ + if not is_array_like(values): + inferred = lib.infer_dtype(values, skipna=False) + if inferred in ["mixed", "string", "mixed-integer"]: + # "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160 + if isinstance(values, tuple): + values = list(values) + values = construct_1d_object_array_from_listlike(values) + else: + values = np.asarray(values) + return values + + +_hashtables = { + "complex128": htable.Complex128HashTable, + "complex64": htable.Complex64HashTable, + "float64": htable.Float64HashTable, + "float32": htable.Float32HashTable, + "uint64": htable.UInt64HashTable, + "uint32": htable.UInt32HashTable, + "uint16": htable.UInt16HashTable, + "uint8": htable.UInt8HashTable, + "int64": htable.Int64HashTable, + "int32": htable.Int32HashTable, + "int16": htable.Int16HashTable, + "int8": htable.Int8HashTable, + "string": htable.StringHashTable, + "object": htable.PyObjectHashTable, +} + + +def _get_hashtable_algo(values: np.ndarray): + """ + Parameters + ---------- + values : np.ndarray + + Returns + ------- + htable : HashTable subclass + values : ndarray + """ + values = _ensure_data(values) + + ndtype = _check_object_for_strings(values) + htable = _hashtables[ndtype] + return htable, values + + +def _check_object_for_strings(values: np.ndarray) -> str: + """ + Check if we can use string hashtable instead of object hashtable. + + Parameters + ---------- + values : ndarray + + Returns + ------- + str + """ + ndtype = values.dtype.name + if ndtype == "object": + + # it's cheaper to use a String Hash Table than Object; we infer + # including nulls because that is the only difference between + # StringHashTable and ObjectHashtable + if lib.infer_dtype(values, skipna=False) in ["string"]: + ndtype = "string" + return ndtype + + +# --------------- # +# top-level algos # +# --------------- # + + +def unique(values): + """ + Return unique values based on a hash table. + + Uniques are returned in order of appearance. This does NOT sort. + + Significantly faster than numpy.unique for long enough sequences. + Includes NA values. + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + numpy.ndarray or ExtensionArray + + The return can be: + + * Index : when the input is an Index + * Categorical : when the input is a Categorical dtype + * ndarray : when the input is a Series/ndarray + + Return numpy.ndarray or ExtensionArray. + + See Also + -------- + Index.unique : Return unique values from an Index. + Series.unique : Return unique values of Series object. + + Examples + -------- + >>> pd.unique(pd.Series([2, 1, 3, 3])) + array([2, 1, 3]) + + >>> pd.unique(pd.Series([2] + [1] * 5)) + array([2, 1]) + + >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.unique( + ... pd.Series( + ... [ + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... ] + ... ) + ... ) + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + >>> pd.unique( + ... pd.Index( + ... [ + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... pd.Timestamp("20160101", tz="US/Eastern"), + ... ] + ... ) + ... ) + DatetimeIndex(['2016-01-01 00:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', + freq=None) + + >>> pd.unique(list("baabc")) + array(['b', 'a', 'c'], dtype=object) + + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + An ordered Categorical preserves the category ordering. + + >>> pd.unique( + ... pd.Series( + ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) + ... ) + ... ) + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + + An array of tuples + + >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + """ + return unique_with_mask(values) + + +def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None): + """See algorithms.unique for docs. Takes a mask for masked arrays.""" + values = _ensure_arraylike(values) + + if is_extension_array_dtype(values.dtype): + # Dispatch to extension dtype's unique. + return values.unique() + + original = values + htable, values = _get_hashtable_algo(values) + + table = htable(len(values)) + if mask is None: + uniques = table.unique(values) + uniques = _reconstruct_data(uniques, original.dtype, original) + return uniques + + else: + uniques, mask = table.unique(values, mask=mask) + uniques = _reconstruct_data(uniques, original.dtype, original) + assert mask is not None # for mypy + return uniques, mask.astype("bool") + + +unique1d = unique + + +def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]: + """ + Compute the isin boolean array. + + Parameters + ---------- + comps : array-like + values : array-like + + Returns + ------- + ndarray[bool] + Same length as `comps`. + """ + if not is_list_like(comps): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(comps).__name__}]" + ) + if not is_list_like(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(values).__name__}]" + ) + + if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): + orig_values = values + values = _ensure_arraylike(list(values)) + + if ( + len(values) > 0 + and is_numeric_dtype(values) + and not is_signed_integer_dtype(comps) + ): + # GH#46485 Use object to avoid upcast to float64 later + # TODO: Share with _find_common_type_compat + values = construct_1d_object_array_from_listlike(list(orig_values)) + + elif isinstance(values, ABCMultiIndex): + # Avoid raising in extract_array + values = np.array(values) + else: + values = extract_array(values, extract_numpy=True, extract_range=True) + + comps_array = _ensure_arraylike(comps) + comps_array = extract_array(comps_array, extract_numpy=True) + if not isinstance(comps_array, np.ndarray): + # i.e. Extension Array + return comps_array.isin(values) + + elif needs_i8_conversion(comps_array.dtype): + # Dispatch to DatetimeLikeArrayMixin.isin + return pd_array(comps_array).isin(values) + elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps_array.dtype): + # e.g. comps_array are integers and values are datetime64s + return np.zeros(comps_array.shape, dtype=bool) + # TODO: not quite right ... Sparse/Categorical + elif needs_i8_conversion(values.dtype): + return isin(comps_array, values.astype(object)) + + elif isinstance(values.dtype, ExtensionDtype): + return isin(np.asarray(comps_array), np.asarray(values)) + + # GH16012 + # Ensure np.in1d doesn't get object types or it *may* throw an exception + # Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array), + # in1d is faster for small sizes + if ( + len(comps_array) > 1_000_000 + and len(values) <= 26 + and not is_object_dtype(comps_array) + ): + # If the values include nan we need to check for nan explicitly + # since np.nan it not equal to np.nan + if isna(values).any(): + + def f(c, v): + return np.logical_or(np.in1d(c, v), np.isnan(c)) + + else: + f = np.in1d + + else: + common = np.find_common_type([values.dtype, comps_array.dtype], []) + values = values.astype(common, copy=False) + comps_array = comps_array.astype(common, copy=False) + f = htable.ismember + + return f(comps_array, values) + + +def factorize_array( + values: np.ndarray, + na_sentinel: int | None = -1, + size_hint: int | None = None, + na_value: object = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[npt.NDArray[np.intp], np.ndarray]: + """ + Factorize a numpy array to codes and uniques. + + This doesn't do any coercion of types or unboxing before factorization. + + Parameters + ---------- + values : ndarray + na_sentinel : int, default -1 + size_hint : int, optional + Passed through to the hashtable's 'get_labels' method + na_value : object, optional + A value in `values` to consider missing. Note: only use this + parameter when you know that you don't have any values pandas would + consider missing in the array (NaN for float data, iNaT for + datetimes, etc.). + mask : ndarray[bool], optional + If not None, the mask is used as indicator for missing values + (True = missing, False = valid) instead of `na_value` or + condition "val != val". + + Returns + ------- + codes : ndarray[np.intp] + uniques : ndarray + """ + ignore_na = na_sentinel is not None + if not ignore_na: + na_sentinel = -1 + + original = values + if values.dtype.kind in ["m", "M"]: + # _get_hashtable_algo will cast dt64/td64 to i8 via _ensure_data, so we + # need to do the same to na_value. We are assuming here that the passed + # na_value is an appropriately-typed NaT. + # e.g. test_where_datetimelike_categorical + na_value = iNaT + + hash_klass, values = _get_hashtable_algo(values) + + table = hash_klass(size_hint or len(values)) + uniques, codes = table.factorize( + values, + na_sentinel=na_sentinel, + na_value=na_value, + mask=mask, + ignore_na=ignore_na, + ) + + # re-cast e.g. i8->dt64/td64, uint8->bool + uniques = _reconstruct_data(uniques, original.dtype, original) + + codes = ensure_platform_int(codes) + return codes, uniques + + +@doc( + values=dedent( + """\ + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + """ + ), + sort=dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + size_hint=dedent( + """\ + size_hint : int, optional + Hint to the hashtable sizer. + """ + ), +) +def factorize( + values, + sort: bool = False, + na_sentinel: int | None | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + size_hint: int | None = None, +) -> tuple[np.ndarray, np.ndarray | Index]: + """ + Encode the object as an enumerated type or categorical variable. + + This method is useful for obtaining a numeric representation of an + array when all that matters is identifying distinct values. `factorize` + is available as both a top-level function :func:`pandas.factorize`, + and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. + + Parameters + ---------- + {values}{sort} + na_sentinel : int or None, default -1 + Value to mark "not found". If None, will not drop the NaN + from the uniques of the values. + + .. deprecated:: 1.5.0 + The na_sentinel argument is deprecated and + will be removed in a future version of pandas. Specify use_na_sentinel as + either True or False. + + .. versionchanged:: 1.1.2 + + use_na_sentinel : bool, default True + If True, the sentinel -1 will be used for NaN values. If False, + NaN values will be encoded as non-negative integers and will not drop the + NaN from the uniques of the values. + + .. versionadded:: 1.5.0 + {size_hint}\ + + Returns + ------- + codes : ndarray + An integer ndarray that's an indexer into `uniques`. + ``uniques.take(codes)`` will have the same values as `values`. + uniques : ndarray, Index, or Categorical + The unique valid values. When `values` is Categorical, `uniques` + is a Categorical. When `values` is some other pandas object, an + `Index` is returned. Otherwise, a 1-D ndarray is returned. + + .. note:: + + Even if there's a missing value in `values`, `uniques` will + *not* contain an entry for it. + + See Also + -------- + cut : Discretize continuous-valued array. + unique : Find the unique value in an array. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + These examples all show factorize as a top-level method like + ``pd.factorize(values)``. The results are identical for methods like + :meth:`Series.factorize`. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b']) + >>> codes + array([0, 0, 1, 2, 0]...) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + With ``sort=True``, the `uniques` will be sorted, and `codes` will be + shuffled so that the relationship is the maintained. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True) + >>> codes + array([1, 1, 0, 2, 1]...) + >>> uniques + array(['a', 'b', 'c'], dtype=object) + + When ``use_na_sentinel=True`` (the default), missing values are indicated in + the `codes` with the sentinel value ``-1`` and missing values are not + included in `uniques`. + + >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b']) + >>> codes + array([ 0, -1, 1, 2, 0]...) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + Thus far, we've only factorized lists (which are internally coerced to + NumPy arrays). When factorizing pandas objects, the type of `uniques` + will differ. For Categoricals, a `Categorical` is returned. + + >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]...) + >>> uniques + ['a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Notice that ``'b'`` is in ``uniques.categories``, despite not being + present in ``cat.values``. + + For all other pandas objects, an Index of the appropriate type is + returned. + + >>> cat = pd.Series(['a', 'a', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]...) + >>> uniques + Index(['a', 'c'], dtype='object') + + If NaN is in the values, and we want to include NaN in the uniques of the + values, it can be achieved by setting ``use_na_sentinel=False``. + + >>> values = np.array([1, 2, 1, np.nan]) + >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True + >>> codes + array([ 0, 1, 0, -1]) + >>> uniques + array([1., 2.]) + + >>> codes, uniques = pd.factorize(values, use_na_sentinel=False) + >>> codes + array([0, 1, 0, 2]) + >>> uniques + array([ 1., 2., nan]) + """ + # Implementation notes: This method is responsible for 3 things + # 1.) coercing data to array-like (ndarray, Index, extension array) + # 2.) factorizing codes and uniques + # 3.) Maybe boxing the uniques in an Index + # + # Step 2 is dispatched to extension types (like Categorical). They are + # responsible only for factorization. All data coercion, sorting and boxing + # should happen here. + + # GH#46910 deprecated na_sentinel in favor of use_na_sentinel: + # na_sentinel=None corresponds to use_na_sentinel=False + # na_sentinel=-1 correspond to use_na_sentinel=True + # Other na_sentinel values will not be supported when the deprecation is enforced. + na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) + if isinstance(values, ABCRangeIndex): + return values.factorize(sort=sort) + + values = _ensure_arraylike(values) + original = values + if not isinstance(values, ABCMultiIndex): + values = extract_array(values, extract_numpy=True) + + # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques + # of values, assign na_sentinel=-1 to replace code value for NaN. + dropna = na_sentinel is not None + + if ( + isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray)) + and values.freq is not None + ): + # The presence of 'freq' means we can fast-path sorting and know there + # aren't NAs + codes, uniques = values.factorize(sort=sort) + return _re_wrap_factorize(original, uniques, codes) + + elif not isinstance(values.dtype, np.dtype): + if ( + na_sentinel == -1 or na_sentinel is None + ) and "use_na_sentinel" in inspect.signature(values.factorize).parameters: + # Avoid using catch_warnings when possible + # GH#46910 - TimelikeOps has deprecated signature + codes, uniques = values.factorize( # type: ignore[call-arg] + use_na_sentinel=na_sentinel is not None + ) + else: + na_sentinel_arg = -1 if na_sentinel is None else na_sentinel + with warnings.catch_warnings(): + # We've already warned above + warnings.filterwarnings("ignore", ".*use_na_sentinel.*", FutureWarning) + codes, uniques = values.factorize(na_sentinel=na_sentinel_arg) + + else: + values = np.asarray(values) # convert DTA/TDA/MultiIndex + # TODO: pass na_sentinel=na_sentinel to factorize_array. When sort is True and + # na_sentinel is None we append NA on the end because safe_sort does not + # handle null values in uniques. + if na_sentinel is None and sort: + na_sentinel_arg = -1 + elif na_sentinel is None: + na_sentinel_arg = None + else: + na_sentinel_arg = na_sentinel + + if not dropna and not sort and is_object_dtype(values): + # factorize can now handle differentiating various types of null values. + # These can only occur when the array has object dtype. + # However, for backwards compatibility we only use the null for the + # provided dtype. This may be revisited in the future, see GH#48476. + null_mask = isna(values) + if null_mask.any(): + na_value = na_value_for_dtype(values.dtype, compat=False) + # Don't modify (potentially user-provided) array + values = np.where(null_mask, na_value, values) + + codes, uniques = factorize_array( + values, + na_sentinel=na_sentinel_arg, + size_hint=size_hint, + ) + + if sort and len(uniques) > 0: + if na_sentinel is None: + # TODO: Can remove when na_sentinel=na_sentinel as in TODO above + na_sentinel = -1 + uniques, codes = safe_sort( + uniques, codes, na_sentinel=na_sentinel, assume_unique=True, verify=False + ) + + if not dropna and sort: + # TODO: Can remove entire block when na_sentinel=na_sentinel as in TODO above + if na_sentinel is None: + na_sentinel_arg = -1 + else: + na_sentinel_arg = na_sentinel + code_is_na = codes == na_sentinel_arg + if code_is_na.any(): + # na_value is set based on the dtype of uniques, and compat set to False is + # because we do not want na_value to be 0 for integers + na_value = na_value_for_dtype(uniques.dtype, compat=False) + uniques = np.append(uniques, [na_value]) + codes = np.where(code_is_na, len(uniques) - 1, codes) + + uniques = _reconstruct_data(uniques, original.dtype, original) + + return _re_wrap_factorize(original, uniques, codes) + + +def resolve_na_sentinel( + na_sentinel: int | None | lib.NoDefault, + use_na_sentinel: bool | lib.NoDefault, +) -> int | None: + """ + Determine value of na_sentinel for factorize methods. + + See GH#46910 for details on the deprecation. + + Parameters + ---------- + na_sentinel : int, None, or lib.no_default + Value passed to the method. + use_na_sentinel : bool or lib.no_default + Value passed to the method. + + Returns + ------- + Resolved value of na_sentinel. + """ + if na_sentinel is not lib.no_default and use_na_sentinel is not lib.no_default: + raise ValueError( + "Cannot specify both `na_sentinel` and `use_na_sentile`; " + f"got `na_sentinel={na_sentinel}` and `use_na_sentinel={use_na_sentinel}`" + ) + if na_sentinel is lib.no_default: + result = -1 if use_na_sentinel is lib.no_default or use_na_sentinel else None + else: + if na_sentinel is None: + msg = ( + "Specifying `na_sentinel=None` is deprecated, specify " + "`use_na_sentinel=False` instead." + ) + elif na_sentinel == -1: + msg = ( + "Specifying `na_sentinel=-1` is deprecated, specify " + "`use_na_sentinel=True` instead." + ) + else: + msg = ( + "Specifying the specific value to use for `na_sentinel` is " + "deprecated and will be removed in a future version of pandas. " + "Specify `use_na_sentinel=True` to use the sentinel value -1, and " + "`use_na_sentinel=False` to encode NaN values." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + result = na_sentinel + return result + + +def _re_wrap_factorize(original, uniques, codes: np.ndarray): + """ + Wrap factorize results in Series or Index depending on original type. + """ + if isinstance(original, ABCIndex): + uniques = ensure_wrapped_if_datetimelike(uniques) + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index + + uniques = Index(uniques) + + return codes, uniques + + +def value_counts( + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, +) -> Series: + """ + Compute a histogram of the counts of non-null values. + + Parameters + ---------- + values : ndarray (1-d) + sort : bool, default True + Sort by values + ascending : bool, default False + Sort in ascending order + normalize: bool, default False + If True then compute a relative histogram + bins : integer, optional + Rather than count values, group them into half-open bins, + convenience for pd.cut, only works with numeric data + dropna : bool, default True + Don't include counts of NaN + + Returns + ------- + Series + """ + from pandas import ( + Index, + Series, + ) + + name = getattr(values, "name", None) + + if bins is not None: + from pandas.core.reshape.tile import cut + + values = Series(values) + try: + ii = cut(values, bins, include_lowest=True) + except TypeError as err: + raise TypeError("bins argument only works with numeric data.") from err + + # count, remove nulls (from the index), and but the bins + result = ii.value_counts(dropna=dropna) + result = result[result.index.notna()] + result.index = result.index.astype("interval") + result = result.sort_index() + + # if we are dropna and we have NO values + if dropna and (result._values == 0).all(): + result = result.iloc[0:0] + + # normalizing is by len of all (regardless of dropna) + counts = np.array([len(ii)]) + + else: + + if is_extension_array_dtype(values): + + # handle Categorical and sparse, + result = Series(values)._values.value_counts(dropna=dropna) + result.name = name + counts = result._values + + else: + values = _ensure_arraylike(values) + keys, counts = value_counts_arraylike(values, dropna) + + # For backwards compatibility, we let Index do its normal type + # inference, _except_ for if if infers from object to bool. + idx = Index._with_infer(keys) + if idx.dtype == bool and keys.dtype == object: + idx = idx.astype(object) + + result = Series(counts, index=idx, name=name) + + if sort: + result = result.sort_values(ascending=ascending) + + if normalize: + result = result / counts.sum() + + return result + + +# Called once from SparseArray, otherwise could be private +def value_counts_arraylike( + values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = None +) -> tuple[ArrayLike, npt.NDArray[np.int64]]: + """ + Parameters + ---------- + values : np.ndarray + dropna : bool + mask : np.ndarray[bool] or None, default None + + Returns + ------- + uniques : np.ndarray + counts : np.ndarray[np.int64] + """ + original = values + values = _ensure_data(values) + + keys, counts = htable.value_count(values, dropna, mask=mask) + + if needs_i8_conversion(original.dtype): + # datetime, timedelta, or period + + if dropna: + mask = keys != iNaT + keys, counts = keys[mask], counts[mask] + + res_keys = _reconstruct_data(keys, original.dtype, original) + return res_keys, counts + + +def duplicated( + values: ArrayLike, keep: Literal["first", "last", False] = "first" +) -> npt.NDArray[np.bool_]: + """ + Return boolean ndarray denoting duplicate values. + + Parameters + ---------- + values : nd.array, ExtensionArray or Series + Array over which to check for duplicate values. + keep : {'first', 'last', False}, default 'first' + - ``first`` : Mark duplicates as ``True`` except for the first + occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last + occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + duplicated : ndarray[bool] + """ + values = _ensure_data(values) + return htable.duplicated(values, keep=keep) + + +def mode( + values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None +) -> ArrayLike: + """ + Returns the mode(s) of an array. + + Parameters + ---------- + values : array-like + Array over which to check for duplicate values. + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + np.ndarray or ExtensionArray + """ + values = _ensure_arraylike(values) + original = values + + if needs_i8_conversion(values.dtype): + # Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray. + values = ensure_wrapped_if_datetimelike(values) + values = cast("ExtensionArray", values) + return values._mode(dropna=dropna) + + values = _ensure_data(values) + + npresult = htable.mode(values, dropna=dropna, mask=mask) + try: + npresult = np.sort(npresult) + except TypeError as err: + warnings.warn( + f"Unable to sort modes: {err}", + stacklevel=find_stack_level(), + ) + + result = _reconstruct_data(npresult, original.dtype, original) + return result + + +def rank( + values: ArrayLike, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, +) -> npt.NDArray[np.float64]: + """ + Rank the values along a given axis. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + Array whose values will be ranked. The number of dimensions in this + array must not exceed 2. + axis : int, default 0 + Axis over which to perform rankings. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + The method by which tiebreaks are broken during the ranking. + na_option : {'keep', 'top'}, default 'keep' + The method by which NaNs are placed in the ranking. + - ``keep``: rank each NaN value with a NaN ranking + - ``top``: replace each NaN with either +/- inf so that they + there are ranked at the top + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to the display the returned rankings in integer form + (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1). + """ + is_datetimelike = needs_i8_conversion(values.dtype) + values = _ensure_data(values) + + if values.ndim == 1: + ranks = algos.rank_1d( + values, + is_datetimelike=is_datetimelike, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + elif values.ndim == 2: + ranks = algos.rank_2d( + values, + axis=axis, + is_datetimelike=is_datetimelike, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + raise TypeError("Array with ndim > 2 are not supported.") + + return ranks + + +def checked_add_with_arr( + arr: npt.NDArray[np.int64], + b: int | npt.NDArray[np.int64], + arr_mask: npt.NDArray[np.bool_] | None = None, + b_mask: npt.NDArray[np.bool_] | None = None, +) -> npt.NDArray[np.int64]: + """ + Perform array addition that checks for underflow and overflow. + + Performs the addition of an int64 array and an int64 integer (or array) + but checks that they do not result in overflow first. For elements that + are indicated to be NaN, whether or not there is overflow for that element + is automatically ignored. + + Parameters + ---------- + arr : np.ndarray[int64] addend. + b : array or scalar addend. + arr_mask : np.ndarray[bool] or None, default None + array indicating which elements to exclude from checking + b_mask : np.ndarray[bool] or None, default None + array or scalar indicating which element(s) to exclude from checking + + Returns + ------- + sum : An array for elements x + b for each element x in arr if b is + a scalar or an array for elements x + y for each element pair + (x, y) in (arr, b). + + Raises + ------ + OverflowError if any x + y exceeds the maximum or minimum int64 value. + """ + # For performance reasons, we broadcast 'b' to the new array 'b2' + # so that it has the same size as 'arr'. + b2 = np.broadcast_to(b, arr.shape) + if b_mask is not None: + # We do the same broadcasting for b_mask as well. + b2_mask = np.broadcast_to(b_mask, arr.shape) + else: + b2_mask = None + + # For elements that are NaN, regardless of their value, we should + # ignore whether they overflow or not when doing the checked add. + if arr_mask is not None and b2_mask is not None: + not_nan = np.logical_not(arr_mask | b2_mask) + elif arr_mask is not None: + not_nan = np.logical_not(arr_mask) + elif b_mask is not None: + # error: Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has + # incompatible type "Optional[ndarray[Any, dtype[bool_]]]"; + # expected "Union[_SupportsArray[dtype[Any]], _NestedSequence + # [_SupportsArray[dtype[Any]]], bool, int, float, complex, str + # , bytes, _NestedSequence[Union[bool, int, float, complex, str + # , bytes]]]" + not_nan = np.logical_not(b2_mask) # type: ignore[arg-type] + else: + not_nan = np.empty(arr.shape, dtype=bool) + not_nan.fill(True) + + # gh-14324: For each element in 'arr' and its corresponding element + # in 'b2', we check the sign of the element in 'b2'. If it is positive, + # we then check whether its sum with the element in 'arr' exceeds + # np.iinfo(np.int64).max. If so, we have an overflow error. If it + # it is negative, we then check whether its sum with the element in + # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow + # error as well. + i8max = lib.i8max + i8min = iNaT + + mask1 = b2 > 0 + mask2 = b2 < 0 + + if not mask1.any(): + to_raise = ((i8min - b2 > arr) & not_nan).any() + elif not mask2.any(): + to_raise = ((i8max - b2 < arr) & not_nan).any() + else: + to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or ( + (i8min - b2[mask2] > arr[mask2]) & not_nan[mask2] + ).any() + + if to_raise: + raise OverflowError("Overflow in int64 addition") + + result = arr + b + if arr_mask is not None or b2_mask is not None: + np.putmask(result, ~not_nan, iNaT) + + return result + + +# --------------- # +# select n # +# --------------- # + + +class SelectN: + def __init__(self, obj, n: int, keep: str) -> None: + self.obj = obj + self.n = n + self.keep = keep + + if self.keep not in ("first", "last", "all"): + raise ValueError('keep must be either "first", "last" or "all"') + + def compute(self, method: str) -> DataFrame | Series: + raise NotImplementedError + + @final + def nlargest(self): + return self.compute("nlargest") + + @final + def nsmallest(self): + return self.compute("nsmallest") + + @final + @staticmethod + def is_valid_dtype_n_method(dtype: DtypeObj) -> bool: + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ( + is_numeric_dtype(dtype) and not is_complex_dtype(dtype) + ) or needs_i8_conversion(dtype) + + +class SelectNSeries(SelectN): + """ + Implement n largest/smallest for Series + + Parameters + ---------- + obj : Series + n : int + keep : {'first', 'last'}, default 'first' + + Returns + ------- + nordered : Series + """ + + def compute(self, method: str) -> Series: + + from pandas.core.reshape.concat import concat + + n = self.n + dtype = self.obj.dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError(f"Cannot use method '{method}' with dtype {dtype}") + + if n <= 0: + return self.obj[[]] + + dropped = self.obj.dropna() + nan_index = self.obj.drop(dropped.index) + + # slow method + if n >= len(self.obj): + ascending = method == "nsmallest" + return self.obj.sort_values(ascending=ascending).head(n) + + # fast method + new_dtype = dropped.dtype + arr = _ensure_data(dropped.values) + if method == "nlargest": + arr = -arr + if is_integer_dtype(new_dtype): + # GH 21426: ensure reverse ordering at boundaries + arr -= 1 + + elif is_bool_dtype(new_dtype): + # GH 26154: ensure False is smaller than True + arr = 1 - (-arr) + + if self.keep == "last": + arr = arr[::-1] + + nbase = n + narr = len(arr) + n = min(n, narr) + + # arr passed into kth_smallest must be contiguous. We copy + # here because kth_smallest will modify its input + kth_val = algos.kth_smallest(arr.copy(order="C"), n - 1) + (ns,) = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind="mergesort")] + + if self.keep != "all": + inds = inds[:n] + findex = nbase + else: + if len(inds) < nbase and len(nan_index) + len(inds) >= nbase: + findex = len(nan_index) + len(inds) + else: + findex = len(inds) + + if self.keep == "last": + # reverse indices + inds = narr - 1 - inds + + return concat([dropped.iloc[inds], nan_index]).iloc[:findex] + + +class SelectNFrame(SelectN): + """ + Implement n largest/smallest for DataFrame + + Parameters + ---------- + obj : DataFrame + n : int + keep : {'first', 'last'}, default 'first' + columns : list or str + + Returns + ------- + nordered : DataFrame + """ + + def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> None: + super().__init__(obj, n, keep) + if not is_list_like(columns) or isinstance(columns, tuple): + columns = [columns] + + columns = cast(Sequence[Hashable], columns) + columns = list(columns) + self.columns = columns + + def compute(self, method: str) -> DataFrame: + + from pandas.core.api import Int64Index + + n = self.n + frame = self.obj + columns = self.columns + + for column in columns: + dtype = frame[column].dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError( + f"Column {repr(column)} has dtype {dtype}, " + f"cannot use method {repr(method)} with this dtype" + ) + + def get_indexer(current_indexer, other_indexer): + """ + Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == "nsmallest": + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + # For each column we apply method to cur_frame[column]. + # If it's the last column or if we have the number of + # results desired we are done. + # Otherwise there are duplicates of the largest/smallest + # value and we need to look at the rest of the columns + # to determine which of the rows with the largest/smallest + # value in the column to keep. + series = cur_frame[column] + is_last_column = len(columns) - 1 == i + values = getattr(series, method)( + cur_n, keep=self.keep if is_last_column else "all" + ) + + if is_last_column or len(values) <= cur_n: + indexer = get_indexer(indexer, values.index) + break + + # Now find all values which are equal to + # the (nsmallest: largest)/(nlargest: smallest) + # from our series. + border_value = values == values[values.index[-1]] + + # Some of these values are among the top-n + # some aren't. + unsafe_values = values[border_value] + + # These values are definitely among the top-n + safe_values = values[~border_value] + indexer = get_indexer(indexer, safe_values.index) + + # Go on and separate the unsafe_values on the remaining + # columns. + cur_frame = cur_frame.loc[unsafe_values.index] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + + # If there is only one column, the frame is already sorted. + if len(columns) == 1: + return frame + + ascending = method == "nsmallest" + + return frame.sort_values(columns, ascending=ascending, kind="mergesort") + + +# ---- # +# take # +# ---- # + + +def take( + arr, + indices: TakeIndexer, + axis: int = 0, + allow_fill: bool = False, + fill_value=None, +): + """ + Take elements from an array. + + Parameters + ---------- + arr : array-like or scalar value + Non array-likes (sequences/scalars without a dtype) are coerced + to an ndarray. + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + axis : int, default 0 + The axis over which to select values. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type (``self.dtype.na_value``) is used. + + For multi-dimensional `arr`, each *element* is filled with + `fill_value`. + + Returns + ------- + ndarray or ExtensionArray + Same type as the input. + + Raises + ------ + IndexError + When `indices` is out of bounds for the array. + ValueError + When the indexer contains negative values other than ``-1`` + and `allow_fill` is True. + + Notes + ----- + When `allow_fill` is False, `indices` may be whatever dimensionality + is accepted by NumPy for `arr`. + + When `allow_fill` is True, `indices` should be 1-D. + + See Also + -------- + numpy.take : Take elements from an array along an axis. + + Examples + -------- + >>> import pandas as pd + + With the default ``allow_fill=False``, negative numbers indicate + positional indices from the right. + + >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1]) + array([10, 10, 30]) + + Setting ``allow_fill=True`` will place `fill_value` in those positions. + + >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True) + array([10., 10., nan]) + + >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True, + ... fill_value=-10) + array([ 10, 10, -10]) + """ + if not is_array_like(arr): + arr = np.asarray(arr) + + indices = np.asarray(indices, dtype=np.intp) + + if allow_fill: + # Pandas style, -1 means NA + validate_indices(indices, arr.shape[axis]) + result = take_nd( + arr, indices, axis=axis, allow_fill=True, fill_value=fill_value + ) + else: + # NumPy style + result = arr.take(indices, axis=axis) + return result + + +# ------------ # +# searchsorted # +# ------------ # + + +def searchsorted( + arr: ArrayLike, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, +) -> npt.NDArray[np.intp] | np.intp: + """ + Find indices where elements should be inserted to maintain order. + + .. versionadded:: 0.25.0 + + Find the indices into a sorted array `arr` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `arr` would be preserved. + + Assuming that `arr` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``arr[i-1] < value <= self[i]`` + right ``arr[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + arr: np.ndarray, ExtensionArray, Series + Input array. If `sorter` is None, then it must be sorted in + ascending order, otherwise `sorter` must be an array of indices + that sort it. + value : array-like or scalar + Values to insert into `arr`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + if sorter is not None: + sorter = ensure_platform_int(sorter) + + if ( + isinstance(arr, np.ndarray) + and is_integer_dtype(arr.dtype) + and (is_integer(value) or is_integer_dtype(value)) + ): + # if `arr` and `value` have different dtypes, `arr` would be + # recast by numpy, causing a slow search. + # Before searching below, we therefore try to give `value` the + # same dtype as `arr`, while guarding against integer overflows. + iinfo = np.iinfo(arr.dtype.type) + value_arr = np.array([value]) if is_scalar(value) else np.array(value) + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): + # value within bounds, so no overflow, so can convert value dtype + # to dtype of arr + dtype = arr.dtype + else: + dtype = value_arr.dtype + + if is_scalar(value): + # We know that value is int + value = cast(int, dtype.type(value)) + else: + value = pd_array(cast(ArrayLike, value), dtype=dtype) + else: + # E.g. if `arr` is an array with dtype='datetime64[ns]' + # and `value` is a pd.Timestamp, we may need to convert value + arr = ensure_wrapped_if_datetimelike(arr) + + # Argument 1 to "searchsorted" of "ndarray" has incompatible type + # "Union[NumpyValueArrayLike, ExtensionArray]"; expected "NumpyValueArrayLike" + return arr.searchsorted(value, side=side, sorter=sorter) # type: ignore[arg-type] + + +# ---- # +# diff # +# ---- # + +_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} + + +def diff(arr, n: int, axis: int = 0): + """ + difference of n between self, + analogous to s-s.shift(n) + + Parameters + ---------- + arr : ndarray or ExtensionArray + n : int + number of periods + axis : {0, 1} + axis to shift on + stacklevel : int, default 3 + The stacklevel for the lost dtype warning. + + Returns + ------- + shifted + """ + + n = int(n) + na = np.nan + dtype = arr.dtype + + is_bool = is_bool_dtype(dtype) + if is_bool: + op = operator.xor + else: + op = operator.sub + + if isinstance(dtype, PandasDtype): + # PandasArray cannot necessarily hold shifted versions of itself. + arr = arr.to_numpy() + dtype = arr.dtype + + if not isinstance(dtype, np.dtype): + # i.e ExtensionDtype + if hasattr(arr, f"__{op.__name__}__"): + if axis != 0: + raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}") + return op(arr, arr.shift(n)) + else: + warnings.warn( + "dtype lost in 'diff()'. In the future this will raise a " + "TypeError. Convert to a suitable dtype prior to calling 'diff'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + arr = np.asarray(arr) + dtype = arr.dtype + + is_timedelta = False + if needs_i8_conversion(arr.dtype): + dtype = np.int64 + arr = arr.view("i8") + na = iNaT + is_timedelta = True + + elif is_bool: + # We have to cast in order to be able to hold np.nan + dtype = np.object_ + + elif is_integer_dtype(dtype): + # We have to cast in order to be able to hold np.nan + + # int8, int16 are incompatible with float64, + # see https://github.com/cython/cython/issues/2646 + if arr.dtype.name in ["int8", "int16"]: + dtype = np.float32 + else: + dtype = np.float64 + + orig_ndim = arr.ndim + if orig_ndim == 1: + # reshape so we can always use algos.diff_2d + arr = arr.reshape(-1, 1) + # TODO: require axis == 0 + + dtype = np.dtype(dtype) + out_arr = np.empty(arr.shape, dtype=dtype) + + na_indexer = [slice(None)] * 2 + na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) + out_arr[tuple(na_indexer)] = na + + if arr.dtype.name in _diff_special: + # TODO: can diff_2d dtype specialization troubles be fixed by defining + # out_arr inside diff_2d? + algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta) + else: + # To keep mypy happy, _res_indexer is a list while res_indexer is + # a tuple, ditto for lag_indexer. + _res_indexer = [slice(None)] * 2 + _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(_res_indexer) + + _lag_indexer = [slice(None)] * 2 + _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(_lag_indexer) + + out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer]) + + if is_timedelta: + out_arr = out_arr.view("timedelta64[ns]") + + if orig_ndim == 1: + out_arr = out_arr[:, 0] + return out_arr + + +# -------------------------------------------------------------------- +# Helper functions + +# Note: safe_sort is in algorithms.py instead of sorting.py because it is +# low-dependency, is used in this module, and used private methods from +# this module. +def safe_sort( + values, + codes=None, + na_sentinel: int = -1, + assume_unique: bool = False, + verify: bool = True, +) -> np.ndarray | MultiIndex | tuple[np.ndarray | MultiIndex, np.ndarray]: + """ + Sort ``values`` and reorder corresponding ``codes``. + + ``values`` should be unique if ``codes`` is not None. + Safe for use with mixed types (int, str), orders ints before strs. + + Parameters + ---------- + values : list-like + Sequence; must be unique if ``codes`` is not None. + codes : list_like, optional + Indices to ``values``. All out of bound indices are treated as + "not found" and will be masked with ``na_sentinel``. + na_sentinel : int, default -1 + Value in ``codes`` to mark "not found". + Ignored when ``codes`` is None. + assume_unique : bool, default False + When True, ``values`` are assumed to be unique, which can speed up + the calculation. Ignored when ``codes`` is None. + verify : bool, default True + Check if codes are out of bound for the values and put out of bound + codes equal to na_sentinel. If ``verify=False``, it is assumed there + are no out of bound codes. Ignored when ``codes`` is None. + + .. versionadded:: 0.25.0 + + Returns + ------- + ordered : ndarray or MultiIndex + Sorted ``values`` + new_codes : ndarray + Reordered ``codes``; returned when ``codes`` is not None. + + Raises + ------ + TypeError + * If ``values`` is not list-like or if ``codes`` is neither None + nor list-like + * If ``values`` cannot be sorted + ValueError + * If ``codes`` is not None and ``values`` contain duplicates. + """ + if not is_list_like(values): + raise TypeError( + "Only list-like objects are allowed to be passed to safe_sort as values" + ) + original_values = values + is_mi = isinstance(original_values, ABCMultiIndex) + + if not isinstance(values, (np.ndarray, ABCExtensionArray)): + # don't convert to string types + dtype, _ = infer_dtype_from_array(values) + # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values = np.asarray(values, dtype=dtype) # type: ignore[arg-type] + + sorter = None + ordered: np.ndarray | MultiIndex + + if ( + not is_extension_array_dtype(values) + and lib.infer_dtype(values, skipna=False) == "mixed-integer" + ): + ordered = _sort_mixed(values) + else: + try: + sorter = values.argsort() + if is_mi: + # Operate on original object instead of casted array (MultiIndex) + ordered = original_values.take(sorter) + else: + ordered = values.take(sorter) + except TypeError: + # Previous sorters failed or were not applicable, try `_sort_mixed` + # which would work, but which fails for special case of 1d arrays + # with tuples. + if values.size and isinstance(values[0], tuple): + ordered = _sort_tuples(values, original_values) + else: + ordered = _sort_mixed(values) + + # codes: + + if codes is None: + return ordered + + if not is_list_like(codes): + raise TypeError( + "Only list-like objects or None are allowed to " + "be passed to safe_sort as codes" + ) + codes = ensure_platform_int(np.asarray(codes)) + + if not assume_unique and not len(unique(values)) == len(values): + raise ValueError("values should be unique if codes is not None") + + if sorter is None: + # mixed types + hash_klass, values = _get_hashtable_algo(values) + t = hash_klass(len(values)) + t.map_locations(values) + sorter = ensure_platform_int(t.lookup(ordered)) + + if na_sentinel == -1: + # take_nd is faster, but only works for na_sentinels of -1 + order2 = sorter.argsort() + new_codes = take_nd(order2, codes, fill_value=-1) + if verify: + mask = (codes < -len(values)) | (codes >= len(values)) + else: + mask = None + else: + reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer.put(sorter, np.arange(len(sorter))) + # Out of bound indices will be masked with `na_sentinel` next, so we + # may deal with them here without performance loss using `mode='wrap'` + new_codes = reverse_indexer.take(codes, mode="wrap") + + mask = codes == na_sentinel + if verify: + mask = mask | (codes < -len(values)) | (codes >= len(values)) + + if mask is not None: + np.putmask(new_codes, mask, na_sentinel) + + return ordered, ensure_platform_int(new_codes) + + +def _sort_mixed(values) -> np.ndarray: + """order ints before strings in 1d arrays, safe in py3""" + str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) + none_pos = np.array([x is None for x in values], dtype=bool) + nums = np.sort(values[~str_pos & ~none_pos]) + strs = np.sort(values[str_pos]) + return np.concatenate( + [nums, np.asarray(strs, dtype=object), np.array(values[none_pos])] + ) + + +@overload +def _sort_tuples(values: np.ndarray, original_values: np.ndarray) -> np.ndarray: + ... + + +@overload +def _sort_tuples(values: np.ndarray, original_values: MultiIndex) -> MultiIndex: + ... + + +def _sort_tuples( + values: np.ndarray, original_values: np.ndarray | MultiIndex +) -> np.ndarray | MultiIndex: + """ + Convert array of tuples (1d) to array or array (2d). + We need to keep the columns separately as they contain different types and + nans (can't use `np.sort` as it may fail when str and nan are mixed in a + column as types cannot be compared). + We have to apply the indexer to the original values to keep the dtypes in + case of MultiIndexes + """ + from pandas.core.internals.construction import to_arrays + from pandas.core.sorting import lexsort_indexer + + arrays, _ = to_arrays(values, None) + indexer = lexsort_indexer(arrays, orders=True) + return original_values[indexer] + + +def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike: + """ + Extracts the union from lvals and rvals with respect to duplicates and nans in + both arrays. + + Parameters + ---------- + lvals: np.ndarray or ExtensionArray + left values which is ordered in front. + rvals: np.ndarray or ExtensionArray + right values ordered after lvals. + + Returns + ------- + np.ndarray or ExtensionArray + Containing the unsorted union of both arrays. + + Notes + ----- + Caller is responsible for ensuring lvals.dtype == rvals.dtype. + """ + indexer = [] + l_count = value_counts(lvals, dropna=False) + r_count = value_counts(rvals, dropna=False) + l_count, r_count = l_count.align(r_count, fill_value=0) + unique_array = unique(concat_compat([lvals, rvals])) + unique_array = ensure_wrapped_if_datetimelike(unique_array) + + for i, value in enumerate(unique_array): + indexer += [i] * int(max(l_count.at[value], r_count.at[value])) + return unique_array.take(indexer) diff --git a/pandas/core/api.py b/pandas/core/api.py new file mode 100644 index 00000000..3d2547fc --- /dev/null +++ b/pandas/core/api.py @@ -0,0 +1,148 @@ +from pandas._libs import ( + NaT, + Period, + Timedelta, + Timestamp, +) +from pandas._libs.missing import NA + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import ( + isna, + isnull, + notna, + notnull, +) + +from pandas.core.algorithms import ( + factorize, + unique, + value_counts, +) +from pandas.core.arrays import Categorical +from pandas.core.arrays.arrow import ArrowDtype +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.core.arrays.string_ import StringDtype +from pandas.core.construction import array +from pandas.core.flags import Flags +from pandas.core.groupby import ( + Grouper, + NamedAgg, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + NumericIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, +) +from pandas.core.indexes.datetimes import ( + bdate_range, + date_range, +) +from pandas.core.indexes.interval import ( + Interval, + interval_range, +) +from pandas.core.indexes.period import period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.indexing import IndexSlice +from pandas.core.series import Series +from pandas.core.tools.datetimes import to_datetime +from pandas.core.tools.numeric import to_numeric +from pandas.core.tools.timedeltas import to_timedelta + +from pandas.io.formats.format import set_eng_float_format +from pandas.tseries.offsets import DateOffset + +# DataFrame needs to be imported after NamedAgg to avoid a circular import +from pandas.core.frame import DataFrame # isort:skip + +__all__ = [ + "array", + "ArrowDtype", + "bdate_range", + "BooleanDtype", + "Categorical", + "CategoricalDtype", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "date_range", + "DatetimeIndex", + "DatetimeTZDtype", + "factorize", + "Flags", + "Float32Dtype", + "Float64Dtype", + "Float64Index", + "Grouper", + "Index", + "IndexSlice", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "Int64Index", + "Int8Dtype", + "Interval", + "IntervalDtype", + "IntervalIndex", + "interval_range", + "isna", + "isnull", + "MultiIndex", + "NA", + "NamedAgg", + "NaT", + "notna", + "notnull", + "NumericIndex", + "Period", + "PeriodDtype", + "PeriodIndex", + "period_range", + "RangeIndex", + "Series", + "set_eng_float_format", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "timedelta_range", + "Timestamp", + "to_datetime", + "to_numeric", + "to_timedelta", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "UInt64Index", + "UInt8Dtype", + "unique", + "value_counts", +] diff --git a/pandas/core/apply.py b/pandas/core/apply.py new file mode 100644 index 00000000..4987a18a --- /dev/null +++ b/pandas/core/apply.py @@ -0,0 +1,1603 @@ +from __future__ import annotations + +import abc +from collections import defaultdict +from functools import partial +import inspect +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + DefaultDict, + Dict, + Hashable, + Iterable, + Iterator, + List, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._config import option_context + +from pandas._libs import lib +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + AggObjType, + Axis, + NDFrameT, + npt, +) +from pandas.errors import ( + DataError, + SpecificationError, +) +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import ( + find_stack_level, + rewrite_warning, +) + +from pandas.core.dtypes.cast import is_nested_object +from pandas.core.dtypes.common import ( + is_dict_like, + is_extension_array_dtype, + is_list_like, + is_sequence, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCNDFrame, + ABCSeries, +) + +from pandas.core.algorithms import safe_sort +from pandas.core.base import SelectionMixin +import pandas.core.common as com +from pandas.core.construction import ( + create_series_with_explicit_dtype, + ensure_wrapped_if_datetimelike, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + from pandas.core.groupby import GroupBy + from pandas.core.resample import Resampler + from pandas.core.window.rolling import BaseWindow + + +ResType = Dict[int, Any] + + +def frame_apply( + obj: DataFrame, + func: AggFuncType, + axis: Axis = 0, + raw: bool = False, + result_type: str | None = None, + args=None, + kwargs=None, +) -> FrameApply: + """construct and return a row or column based frame apply object""" + axis = obj._get_axis_number(axis) + klass: type[FrameApply] + if axis == 0: + klass = FrameRowApply + elif axis == 1: + klass = FrameColumnApply + + return klass( + obj, + func, + raw=raw, + result_type=result_type, + args=args, + kwargs=kwargs, + ) + + +class Apply(metaclass=abc.ABCMeta): + axis: int + + def __init__( + self, + obj: AggObjType, + func, + raw: bool, + result_type: str | None, + args, + kwargs, + ) -> None: + self.obj = obj + self.raw = raw + self.args = args or () + self.kwargs = kwargs or {} + + if result_type not in [None, "reduce", "broadcast", "expand"]: + raise ValueError( + "invalid value for result_type, must be one " + "of {None, 'reduce', 'broadcast', 'expand'}" + ) + + self.result_type = result_type + + # curry if needed + if ( + (kwargs or args) + and not isinstance(func, (np.ufunc, str)) + and not is_list_like(func) + ): + + def f(x): + return func(x, *args, **kwargs) + + else: + f = func + + self.orig_f: AggFuncType = func + self.f: AggFuncType = f + + @abc.abstractmethod + def apply(self) -> DataFrame | Series: + pass + + def agg(self) -> DataFrame | Series | None: + """ + Provide an implementation for the aggregators. + + Returns + ------- + Result of aggregation, or None if agg cannot be performed by + this method. + """ + obj = self.obj + arg = self.f + args = self.args + kwargs = self.kwargs + + if isinstance(arg, str): + return self.apply_str() + + if is_dict_like(arg): + return self.agg_dict_like() + elif is_list_like(arg): + # we require a list, but not a 'str' + return self.agg_list_like() + + if callable(arg): + f = com.get_cython_func(arg) + if f and not args and not kwargs: + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {arg} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning(old_msg, FutureWarning, new_msg): + return getattr(obj, f)() + + # caller can react + return None + + def transform(self) -> DataFrame | Series: + """ + Transform a DataFrame or Series. + + Returns + ------- + DataFrame or Series + Result of applying ``func`` along the given axis of the + Series or DataFrame. + + Raises + ------ + ValueError + If the transform function fails or does not transform. + """ + obj = self.obj + func = self.orig_f + axis = self.axis + args = self.args + kwargs = self.kwargs + + is_series = obj.ndim == 1 + + if obj._get_axis_number(axis) == 1: + assert not is_series + return obj.T.transform(func, 0, *args, **kwargs).T + + if is_list_like(func) and not is_dict_like(func): + func = cast(List[AggFuncTypeBase], func) + # Convert func equivalent dict + if is_series: + func = {com.get_callable_name(v) or v: v for v in func} + else: + func = {col: func for col in obj} + + if is_dict_like(func): + func = cast(AggFuncTypeDict, func) + return self.transform_dict_like(func) + + # func is either str or callable + func = cast(AggFuncTypeBase, func) + try: + result = self.transform_str_or_callable(func) + except TypeError: + raise + except Exception as err: + raise ValueError("Transform function failed") from err + + # Functions that transform may return empty Series/DataFrame + # when the dtype is not appropriate + if ( + isinstance(result, (ABCSeries, ABCDataFrame)) + and result.empty + and not obj.empty + ): + raise ValueError("Transform function failed") + # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type + # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy, + # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame, + # Series]" + if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( + obj.index # type:ignore[arg-type] + ): + raise ValueError("Function did not transform") + + return result + + def transform_dict_like(self, func): + """ + Compute transform in the case of a dict-like func + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + args = self.args + kwargs = self.kwargs + + # transform is currently only for Series/DataFrame + assert isinstance(obj, ABCNDFrame) + + if len(func) == 0: + raise ValueError("No transform functions were provided") + + func = self.normalize_dictlike_arg("transform", obj, func) + + results: dict[Hashable, DataFrame | Series] = {} + failed_names = [] + all_type_errors = True + for name, how in func.items(): + colg = obj._gotitem(name, ndim=1) + try: + results[name] = colg.transform(how, 0, *args, **kwargs) + except Exception as err: + if str(err) in { + "Function did not transform", + "No transform functions were provided", + }: + raise err + else: + if not isinstance(err, TypeError): + all_type_errors = False + failed_names.append(name) + # combine results + if not results: + klass = TypeError if all_type_errors else ValueError + raise klass("Transform function failed") + if len(failed_names) > 0: + warnings.warn( + f"{failed_names} did not transform successfully. If any error is " + f"raised, this will raise in a future version of pandas. " + f"Drop these columns/ops to avoid this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return concat(results, axis=1) + + def transform_str_or_callable(self, func) -> DataFrame | Series: + """ + Compute transform in the case of a string or callable func + """ + obj = self.obj + args = self.args + kwargs = self.kwargs + + if isinstance(func, str): + return self._try_aggregate_string_function(obj, func, *args, **kwargs) + + if not args and not kwargs: + f = com.get_cython_func(func) + if f: + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {func} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning(old_msg, FutureWarning, new_msg): + return getattr(obj, f)() + + # Two possible ways to use a UDF - apply or call directly + try: + return obj.apply(func, args=args, **kwargs) + except Exception: + return func(obj, *args, **kwargs) + + def agg_list_like(self) -> DataFrame | Series: + """ + Compute aggregation in the case of a list-like argument. + + Returns + ------- + Result of aggregation. + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + arg = cast(List[AggFuncTypeBase], self.f) + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + elif obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions + selected_obj = obj._selected_obj + else: + selected_obj = obj._obj_with_exclusions + + results = [] + keys = [] + failed_names = [] + + depr_nuisance_columns_msg = ( + "{} did not aggregate successfully. If any error is " + "raised this will raise in a future version of pandas. " + "Drop these columns/ops to avoid this warning." + ) + + # degenerate case + if selected_obj.ndim == 1: + for a in arg: + colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) + try: + new_res = colg.aggregate(a) + + except TypeError: + failed_names.append(com.get_callable_name(a) or a) + else: + results.append(new_res) + + # make sure we find a good name + name = com.get_callable_name(a) or a + keys.append(name) + + # multiples + else: + indices = [] + for index, col in enumerate(selected_obj): + colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) + try: + # Capture and suppress any warnings emitted by us in the call + # to agg below, but pass through any warnings that were + # generated otherwise. + # This is necessary because of https://bugs.python.org/issue29672 + # See GH #43741 for more details + with warnings.catch_warnings(record=True) as record: + new_res = colg.aggregate(arg) + if len(record) > 0: + match = re.compile(depr_nuisance_columns_msg.format(".*")) + for warning in record: + if re.match(match, str(warning.message)): + failed_names.append(col) + else: + warnings.warn_explicit( + message=warning.message, + category=warning.category, + filename=warning.filename, + lineno=warning.lineno, + ) + + except (TypeError, DataError): + failed_names.append(col) + except ValueError as err: + # cannot aggregate + if "Must produce aggregated value" in str(err): + # raised directly in _aggregate_named + failed_names.append(col) + elif "no results" in str(err): + # reached in test_frame_apply.test_nuiscance_columns + # where the colg.aggregate(arg) ends up going through + # the selected_obj.ndim == 1 branch above with arg == ["sum"] + # on a datetime64[ns] column + failed_names.append(col) + else: + raise + else: + results.append(new_res) + indices.append(index) + + keys = selected_obj.columns.take(indices) + + # if we are empty + if not len(results): + raise ValueError("no results") + + if len(failed_names) > 0: + warnings.warn( + depr_nuisance_columns_msg.format(failed_names), + FutureWarning, + stacklevel=find_stack_level(), + ) + + try: + concatenated = concat(results, keys=keys, axis=1, sort=False) + except TypeError as err: + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + from pandas import Series + + result = Series(results, index=keys, name=obj.name) + if is_nested_object(result): + raise ValueError( + "cannot combine transform and aggregation operations" + ) from err + return result + else: + # Concat uses the first index to determine the final indexing order. + # The union of a shorter first index with the other indices causes + # the index sorting to be different from the order of the aggregating + # functions. Reindex if this is the case. + index_size = concatenated.index.size + full_ordered_index = next( + result.index for result in results if result.index.size == index_size + ) + return concatenated.reindex(full_ordered_index, copy=False) + + def agg_dict_like(self) -> DataFrame | Series: + """ + Compute aggregation in the case of a dict-like argument. + + Returns + ------- + Result of aggregation. + """ + from pandas import Index + from pandas.core.reshape.concat import concat + + obj = self.obj + arg = cast(AggFuncTypeDict, self.f) + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + if not isinstance(obj, SelectionMixin): + # i.e. obj is Series or DataFrame + selected_obj = obj + selection = None + else: + selected_obj = obj._selected_obj + selection = obj._selection + + arg = self.normalize_dictlike_arg("agg", selected_obj, arg) + + if selected_obj.ndim == 1: + # key only used for output + colg = obj._gotitem(selection, ndim=1) + results = {key: colg.agg(how) for key, how in arg.items()} + else: + # key used for column selection and output + results = { + key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items() + } + + # set the final keys + keys = list(arg.keys()) + + # Avoid making two isinstance calls in all and any below + is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()] + + # combine results + if all(is_ndframe): + keys_to_use: Iterable[Hashable] + keys_to_use = [k for k in keys if not results[k].empty] + # Have to check, if at least one DataFrame is not empty. + keys_to_use = keys_to_use if keys_to_use != [] else keys + if selected_obj.ndim == 2: + # keys are columns, so we can preserve names + ktu = Index(keys_to_use) + ktu._set_names(selected_obj.columns.names) + keys_to_use = ktu + + axis = 0 if isinstance(obj, ABCSeries) else 1 + result = concat( + {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use + ) + elif any(is_ndframe): + # There is a mix of NDFrames and scalars + raise ValueError( + "cannot perform both aggregation " + "and transformation operations " + "simultaneously" + ) + else: + from pandas import Series + + # we have a dict of scalars + # GH 36212 use name only if obj is a series + if obj.ndim == 1: + obj = cast("Series", obj) + name = obj.name + else: + name = None + + result = Series(results, name=name) + + return result + + def apply_str(self) -> DataFrame | Series: + """ + Compute apply in case of a string. + + Returns + ------- + result: Series or DataFrame + """ + # Caller is responsible for checking isinstance(self.f, str) + f = cast(str, self.f) + + obj = self.obj + + # Support for `frame.transform('method')` + # Some methods (shift, etc.) require the axis argument, others + # don't, so inspect and insert if necessary. + func = getattr(obj, f, None) + if callable(func): + sig = inspect.getfullargspec(func) + arg_names = (*sig.args, *sig.kwonlyargs) + if self.axis != 0 and ( + "axis" not in arg_names or f in ("corrwith", "mad", "skew") + ): + raise ValueError(f"Operation {f} does not support axis=1") + elif "axis" in arg_names: + self.kwargs["axis"] = self.axis + elif self.axis != 0: + raise ValueError(f"Operation {f} does not support axis=1") + return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs) + + def apply_multiple(self) -> DataFrame | Series: + """ + Compute apply in case of a list-like or dict-like. + + Returns + ------- + result: Series, DataFrame, or None + Result when self.f is a list-like or dict-like, None otherwise. + """ + return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs) + + def normalize_dictlike_arg( + self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict + ) -> AggFuncTypeDict: + """ + Handler for dict-like argument. + + Ensures that necessary columns exist if obj is a DataFrame, and + that a nested renamer is not passed. Also normalizes to all lists + when values consists of a mix of list and non-lists. + """ + assert how in ("apply", "agg", "transform") + + # Can't use func.values(); wouldn't work for a Series + if ( + how == "agg" + and isinstance(obj, ABCSeries) + and any(is_list_like(v) for _, v in func.items()) + ) or (any(is_dict_like(v) for _, v in func.items())): + # GH 15931 - deprecation of renaming keys + raise SpecificationError("nested renamer is not supported") + + if obj.ndim != 1: + # Check for missing columns on a frame + cols = set(func.keys()) - set(obj.columns) + if len(cols) > 0: + cols_sorted = list(safe_sort(list(cols))) + raise KeyError(f"Column(s) {cols_sorted} do not exist") + + aggregator_types = (list, tuple, dict) + + # if we have a dict of any non-scalars + # eg. {'A' : ['mean']}, normalize all to + # be list-likes + # Cannot use func.values() because arg may be a Series + if any(isinstance(x, aggregator_types) for _, x in func.items()): + new_func: AggFuncTypeDict = {} + for k, v in func.items(): + if not isinstance(v, aggregator_types): + new_func[k] = [v] + else: + new_func[k] = v + func = new_func + return func + + def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs): + """ + if arg is a string, then try to operate on it: + - try to find a function (or attribute) on ourselves + - try to find a numpy function + - raise + """ + assert isinstance(arg, str) + + f = getattr(obj, arg, None) + if f is not None: + if callable(f): + return f(*args, **kwargs) + + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 + return f + + f = getattr(np, arg, None) + if f is not None and hasattr(obj, "__array__"): + # in particular exclude Window + return f(obj, *args, **kwargs) + + raise AttributeError( + f"'{arg}' is not a valid function for '{type(obj).__name__}' object" + ) + + +class NDFrameApply(Apply): + """ + Methods shared by FrameApply and SeriesApply but + not GroupByApply or ResamplerWindowApply + """ + + @property + def index(self) -> Index: + # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type + # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy, + # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame, + # Series]" + return self.obj.index # type:ignore[arg-type] + + @property + def agg_axis(self) -> Index: + return self.obj._get_agg_axis(self.axis) + + +class FrameApply(NDFrameApply): + obj: DataFrame + + # --------------------------------------------------------------- + # Abstract Methods + + @property + @abc.abstractmethod + def result_index(self) -> Index: + pass + + @property + @abc.abstractmethod + def result_columns(self) -> Index: + pass + + @property + @abc.abstractmethod + def series_generator(self) -> Iterator[Series]: + pass + + @abc.abstractmethod + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + pass + + # --------------------------------------------------------------- + + @property + def res_columns(self) -> Index: + return self.result_columns + + @property + def columns(self) -> Index: + return self.obj.columns + + @cache_readonly + def values(self): + return self.obj.values + + @cache_readonly + def dtypes(self) -> Series: + return self.obj.dtypes + + def apply(self) -> DataFrame | Series: + """compute the results""" + # dispatch to agg + if is_list_like(self.f): + return self.apply_multiple() + + # all empty + if len(self.columns) == 0 and len(self.index) == 0: + return self.apply_empty_result() + + # string dispatch + if isinstance(self.f, str): + return self.apply_str() + + # ufunc + elif isinstance(self.f, np.ufunc): + with np.errstate(all="ignore"): + results = self.obj._mgr.apply("apply", func=self.f) + # _constructor will retain self.index and self.columns + return self.obj._constructor(data=results) + + # broadcasting + if self.result_type == "broadcast": + return self.apply_broadcast(self.obj) + + # one axis empty + elif not all(self.obj.shape): + return self.apply_empty_result() + + # raw + elif self.raw: + return self.apply_raw() + + return self.apply_standard() + + def agg(self): + obj = self.obj + axis = self.axis + + # TODO: Avoid having to change state + self.obj = self.obj if self.axis == 0 else self.obj.T + self.axis = 0 + + result = None + try: + result = super().agg() + except TypeError as err: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {err}" + ) + raise exc from err + finally: + self.obj = obj + self.axis = axis + + if axis == 1: + result = result.T if result is not None else result + + if result is None: + result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + + return result + + def apply_empty_result(self): + """ + we have an empty result; at least 1 axis is 0 + + we will try to apply the function to an empty + series in order to see if this is a reduction function + """ + assert callable(self.f) + + # we are not asked to reduce or infer reduction + # so just return a copy of the existing object + if self.result_type not in ["reduce", None]: + return self.obj.copy() + + # we may need to infer + should_reduce = self.result_type == "reduce" + + from pandas import Series + + if not should_reduce: + try: + if self.axis == 0: + r = self.f(Series([], dtype=np.float64)) + else: + r = self.f(Series(index=self.columns, dtype=np.float64)) + except Exception: + pass + else: + should_reduce = not isinstance(r, Series) + + if should_reduce: + if len(self.agg_axis): + r = self.f(Series([], dtype=np.float64)) + else: + r = np.nan + + return self.obj._constructor_sliced(r, index=self.agg_axis) + else: + return self.obj.copy() + + def apply_raw(self): + """apply to the values as a numpy array""" + + def wrap_function(func): + """ + Wrap user supplied function to work around numpy issue. + + see https://github.com/numpy/numpy/issues/8352 + """ + + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + if isinstance(result, str): + result = np.array(result, dtype=object) + return result + + return wrapper + + result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values) + + # TODO: mixed type case + if result.ndim == 2: + return self.obj._constructor(result, index=self.index, columns=self.columns) + else: + return self.obj._constructor_sliced(result, index=self.agg_axis) + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + assert callable(self.f) + + result_values = np.empty_like(target.values) + + # axis which we want to compare compliance + result_compare = target.shape[0] + + for i, col in enumerate(target.columns): + res = self.f(target[col]) + ares = np.asarray(res).ndim + + # must be a scalar or 1d + if ares > 1: + raise ValueError("too many dims to broadcast") + elif ares == 1: + + # must match return dim + if result_compare != len(res): + raise ValueError("cannot broadcast result") + + result_values[:, i] = res + + # we *always* preserve the original index / columns + result = self.obj._constructor( + result_values, index=target.index, columns=target.columns + ) + return result + + def apply_standard(self): + results, res_index = self.apply_series_generator() + + # wrap results + return self.wrap_results(results, res_index) + + def apply_series_generator(self) -> tuple[ResType, Index]: + assert callable(self.f) + + series_gen = self.series_generator + res_index = self.result_index + + results = {} + + with option_context("mode.chained_assignment", None): + for i, v in enumerate(series_gen): + # ignore SettingWithCopy here in case the user mutates + results[i] = self.f(v) + if isinstance(results[i], ABCSeries): + # If we have a view on v, we need to make a copy because + # series_generator will swap out the underlying data + results[i] = results[i].copy(deep=False) + + return results, res_index + + def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series: + from pandas import Series + + # see if we can infer the results + if len(results) > 0 and 0 in results and is_sequence(results[0]): + return self.wrap_results_for_axis(results, res_index) + + # dict of scalars + + # the default dtype of an empty Series will be `object`, but this + # code can be hit by df.mean() where the result should have dtype + # float64 even if it's an empty Series. + constructor_sliced = self.obj._constructor_sliced + if constructor_sliced is Series: + result = create_series_with_explicit_dtype( + results, dtype_if_empty=np.float64 + ) + else: + result = constructor_sliced(results) + result.index = res_index + + return result + + def apply_str(self) -> DataFrame | Series: + # Caller is responsible for checking isinstance(self.f, str) + # TODO: GH#39993 - Avoid special-casing by replacing with lambda + if self.f == "size": + # Special-cased because DataFrame.size returns a single scalar + obj = self.obj + value = obj.shape[self.axis] + return obj._constructor_sliced(value, index=self.agg_axis) + return super().apply_str() + + +class FrameRowApply(FrameApply): + axis = 0 + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + return super().apply_broadcast(target) + + @property + def series_generator(self): + return (self.obj._ixs(i, axis=1) for i in range(len(self.columns))) + + @property + def result_index(self) -> Index: + return self.columns + + @property + def result_columns(self) -> Index: + return self.index + + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + """return the results for the rows""" + + if self.result_type == "reduce": + # e.g. test_apply_dict GH#8735 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + + elif self.result_type is None and all( + isinstance(x, dict) for x in results.values() + ): + # Our operation was a to_dict op e.g. + # test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + + try: + result = self.obj._constructor(data=results) + except ValueError as err: + if "All arrays must be of the same length" in str(err): + # e.g. result = [[2, 3], [1.5], ['foo', 'bar']] + # see test_agg_listlike_result GH#29587 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res + else: + raise + + if not isinstance(results[0], ABCSeries): + if len(result.index) == len(self.res_columns): + result.index = self.res_columns + + if len(result.columns) == len(res_index): + result.columns = res_index + + return result + + +class FrameColumnApply(FrameApply): + axis = 1 + + def apply_broadcast(self, target: DataFrame) -> DataFrame: + result = super().apply_broadcast(target.T) + return result.T + + @property + def series_generator(self): + values = self.values + values = ensure_wrapped_if_datetimelike(values) + assert len(values) > 0 + + # We create one Series object, and will swap out the data inside + # of it. Kids: don't do this at home. + ser = self.obj._ixs(0, axis=0) + mgr = ser._mgr + + if is_extension_array_dtype(ser.dtype): + # values will be incorrect for this block + # TODO(EA2D): special case would be unnecessary with 2D EAs + obj = self.obj + for i in range(len(obj)): + yield obj._ixs(i, axis=0) + + else: + for (arr, name) in zip(values, self.index): + # GH#35462 re-pin mgr in case setitem changed it + ser._mgr = mgr + mgr.set_values(arr) + object.__setattr__(ser, "_name", name) + yield ser + + @property + def result_index(self) -> Index: + return self.index + + @property + def result_columns(self) -> Index: + return self.columns + + def wrap_results_for_axis( + self, results: ResType, res_index: Index + ) -> DataFrame | Series: + """return the results for the columns""" + result: DataFrame | Series + + # we have requested to expand + if self.result_type == "expand": + result = self.infer_to_same_shape(results, res_index) + + # we have a non-series and don't want inference + elif not isinstance(results[0], ABCSeries): + result = self.obj._constructor_sliced(results) + result.index = res_index + + # we may want to infer results + else: + result = self.infer_to_same_shape(results, res_index) + + return result + + def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: + """infer the results to the same shape as the input object""" + result = self.obj._constructor(data=results) + result = result.T + + # set the index + result.index = res_index + + # infer dtypes + result = result.infer_objects() + + return result + + +class SeriesApply(NDFrameApply): + obj: Series + axis = 0 + + def __init__( + self, + obj: Series, + func: AggFuncType, + convert_dtype: bool, + args, + kwargs, + ) -> None: + self.convert_dtype = convert_dtype + + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self) -> DataFrame | Series: + obj = self.obj + + if len(obj) == 0: + return self.apply_empty_result() + + # dispatch to agg + if is_list_like(self.f): + return self.apply_multiple() + + if isinstance(self.f, str): + # if we are a string, try to dispatch + return self.apply_str() + + # self.f is Callable + return self.apply_standard() + + def agg(self): + result = super().agg() + if result is None: + f = self.f + kwargs = self.kwargs + + # string, list-like, and dict-like are entirely handled in super + assert callable(f) + + # we can be called from an inner function which + # passes this meta-data + kwargs.pop("_level", None) + + # try a regular apply, this evaluates lambdas + # row-by-row; however if the lambda is expected a Series + # expression, e.g.: lambda x: x-x.quantile(0.25) + # this will fail, so we can try a vectorized evaluation + + # we cannot FIRST try the vectorized evaluation, because + # then .agg and .apply would have different semantics if the + # operation is actually defined on the Series, e.g. str + try: + result = self.obj.apply(f) + except (ValueError, AttributeError, TypeError): + result = f(self.obj) + + return result + + def apply_empty_result(self) -> Series: + obj = self.obj + return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__( + obj, method="apply" + ) + + def apply_standard(self) -> DataFrame | Series: + # caller is responsible for ensuring that f is Callable + f = cast(Callable, self.f) + obj = self.obj + + with np.errstate(all="ignore"): + if isinstance(f, np.ufunc): + return f(obj) + + # row-wise access + if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"): + # GH#23179 some EAs do not have `map` + mapped = obj._values.map(f) + else: + values = obj.astype(object)._values + mapped = lib.map_infer( + values, + f, + convert=self.convert_dtype, + ) + + if len(mapped) and isinstance(mapped[0], ABCSeries): + # GH#43986 Need to do list(mapped) in order to get treated as nested + # See also GH#25959 regarding EA support + return obj._constructor_expanddim(list(mapped), index=obj.index) + else: + return obj._constructor(mapped, index=obj.index).__finalize__( + obj, method="apply" + ) + + +class GroupByApply(Apply): + def __init__( + self, + obj: GroupBy[NDFrameT], + func: AggFuncType, + args, + kwargs, + ) -> None: + kwargs = kwargs.copy() + self.axis = obj.obj._get_axis_number(kwargs.get("axis", 0)) + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self): + raise NotImplementedError + + def transform(self): + raise NotImplementedError + + +class ResamplerWindowApply(Apply): + axis = 0 + obj: Resampler | BaseWindow + + def __init__( + self, + obj: Resampler | BaseWindow, + func: AggFuncType, + args, + kwargs, + ) -> None: + super().__init__( + obj, + func, + raw=False, + result_type=None, + args=args, + kwargs=kwargs, + ) + + def apply(self): + raise NotImplementedError + + def transform(self): + raise NotImplementedError + + +def reconstruct_func( + func: AggFuncType | None, **kwargs +) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]: + """ + This is the internal function to reconstruct func given if there is relabeling + or not and also normalize the keyword to get new order of columns. + + If named aggregation is applied, `func` will be None, and kwargs contains the + column and aggregation function information to be parsed; + If named aggregation is not applied, `func` is either string (e.g. 'min') or + Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name + and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]}) + + If relabeling is True, will return relabeling, reconstructed func, column + names, and the reconstructed order of columns. + If relabeling is False, the columns and order will be None. + + Parameters + ---------- + func: agg function (e.g. 'min' or Callable) or list of agg functions + (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}). + **kwargs: dict, kwargs used in is_multi_agg_with_relabel and + normalize_keyword_aggregation function for relabelling + + Returns + ------- + relabelling: bool, if there is relabelling or not + func: normalized and mangled func + columns: list of column names + order: array of columns indices + + Examples + -------- + >>> reconstruct_func(None, **{"foo": ("col", "min")}) + (True, defaultdict(, {'col': ['min']}), ('foo',), array([0])) + + >>> reconstruct_func("min") + (False, 'min', None, None) + """ + relabeling = func is None and is_multi_agg_with_relabel(**kwargs) + columns: list[str] | None = None + order: npt.NDArray[np.intp] | None = None + + if not relabeling: + if isinstance(func, list) and len(func) > len(set(func)): + + # GH 28426 will raise error if duplicated function names are used and + # there is no reassigned name + raise SpecificationError( + "Function names must be unique if there is no new column names " + "assigned" + ) + elif func is None: + # nicer error message + raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).") + + if relabeling: + func, columns, order = normalize_keyword_aggregation(kwargs) + + return relabeling, func, columns, order + + +def is_multi_agg_with_relabel(**kwargs) -> bool: + """ + Check whether kwargs passed to .agg look like multi-agg with relabeling. + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + bool + + Examples + -------- + >>> is_multi_agg_with_relabel(a="max") + False + >>> is_multi_agg_with_relabel(a_max=("a", "max"), a_min=("a", "min")) + True + >>> is_multi_agg_with_relabel() + False + """ + return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and ( + len(kwargs) > 0 + ) + + +def normalize_keyword_aggregation( + kwargs: dict, +) -> tuple[dict, list[str], npt.NDArray[np.intp]]: + """ + Normalize user-provided "named aggregation" kwargs. + Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs + to the old Dict[str, List[scalar]]]. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + aggspec : dict + The transformed kwargs. + columns : List[str] + The user-provided keys. + col_idx_order : List[int] + List of columns indices. + + Examples + -------- + >>> normalize_keyword_aggregation({"output": ("input", "sum")}) + (defaultdict(, {'input': ['sum']}), ('output',), array([0])) + """ + from pandas.core.indexes.base import Index + + # Normalize the aggregation functions as Mapping[column, List[func]], + # process normally, then fixup the names. + # TODO: aggspec type: typing.Dict[str, List[AggScalar]] + # May be hitting https://github.com/python/mypy/issues/5958 + # saying it doesn't have an attribute __name__ + aggspec: DefaultDict = defaultdict(list) + order = [] + columns, pairs = list(zip(*kwargs.items())) + + for column, aggfunc in pairs: + aggspec[column].append(aggfunc) + order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + # uniquify aggfunc name if duplicated in order list + uniquified_order = _make_unique_kwarg_list(order) + + # GH 25719, due to aggspec will change the order of assigned columns in aggregation + # uniquified_aggspec will store uniquified order list and will compare it with order + # based on index + aggspec_order = [ + (column, com.get_callable_name(aggfunc) or aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] + uniquified_aggspec = _make_unique_kwarg_list(aggspec_order) + + # get the new index of columns by comparison + col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) + return aggspec, columns, col_idx_order + + +def _make_unique_kwarg_list( + seq: Sequence[tuple[Any, Any]] +) -> Sequence[tuple[Any, Any]]: + """ + Uniquify aggfunc name of the pairs in the order list + + Examples: + -------- + >>> kwarg_list = [('a', ''), ('a', ''), ('b', '')] + >>> _make_unique_kwarg_list(kwarg_list) + [('a', '_0'), ('a', '_1'), ('b', '')] + """ + return [ + (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) + if seq.count(pair) > 1 + else pair + for i, pair in enumerate(seq) + ] + + +def relabel_result( + result: DataFrame | Series, + func: dict[str, list[Callable | str]], + columns: Iterable[Hashable], + order: Iterable[int], +) -> dict[Hashable, Series]: + """ + Internal function to reorder result if relabelling is True for + dataframe.agg, and return the reordered result in dict. + + Parameters: + ---------- + result: Result from aggregation + func: Dict of (column name, funcs) + columns: New columns name for relabelling + order: New order for relabelling + + Examples: + --------- + >>> result = DataFrame({"A": [np.nan, 2, np.nan], + ... "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]}) # doctest: +SKIP + >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]} + >>> columns = ("foo", "aab", "bar", "dat") + >>> order = [0, 1, 2, 3] + >>> _relabel_result(result, func, columns, order) # doctest: +SKIP + dict(A=Series([2.0, NaN, NaN, NaN], index=["foo", "aab", "bar", "dat"]), + C=Series([NaN, 6.0, NaN, NaN], index=["foo", "aab", "bar", "dat"]), + B=Series([NaN, NaN, 2.5, 4.0], index=["foo", "aab", "bar", "dat"])) + """ + from pandas.core.indexes.base import Index + + reordered_indexes = [ + pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1]) + ] + reordered_result_in_dict: dict[Hashable, Series] = {} + idx = 0 + + reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1 + for col, fun in func.items(): + s = result[col].dropna() + + # In the `_aggregate`, the callable names are obtained and used in `result`, and + # these names are ordered alphabetically. e.g. + # C2 C1 + # 1 NaN + # amax NaN 4.0 + # max NaN 4.0 + # sum 18.0 6.0 + # Therefore, the order of functions for each column could be shuffled + # accordingly so need to get the callable name if it is not parsed names, and + # reorder the aggregated result for each column. + # e.g. if df.agg(c1=("C2", sum), c2=("C2", lambda x: min(x))), correct order is + # [sum, ], but in `result`, it will be [, sum], and we need to + # reorder so that aggregated values map to their functions regarding the order. + + # However there is only one column being used for aggregation, not need to + # reorder since the index is not sorted, and keep as is in `funcs`, e.g. + # A + # min 1.0 + # mean 1.5 + # mean 1.5 + if reorder_mask: + fun = [ + com.get_callable_name(f) if not isinstance(f, str) else f for f in fun + ] + col_idx_order = Index(s.index).get_indexer(fun) + s = s[col_idx_order] + + # assign the new user-provided "named aggregation" as index names, and reindex + # it based on the whole user-provided names. + s.index = reordered_indexes[idx : idx + len(fun)] + reordered_result_in_dict[col] = s.reindex(columns, copy=False) + idx = idx + len(fun) + return reordered_result_in_dict + + +# TODO: Can't use, because mypy doesn't like us setting __name__ +# error: "partial[Any]" has no attribute "__name__" +# the type is: +# typing.Sequence[Callable[..., ScalarResult]] +# -> typing.Sequence[Callable[..., ScalarResult]]: + + +def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: + """ + Possibly mangle a list of aggfuncs. + + Parameters + ---------- + aggfuncs : Sequence + + Returns + ------- + mangled: list-like + A new AggSpec sequence, where lambdas have been converted + to have unique names. + + Notes + ----- + If just one aggfunc is passed, the name will not be mangled. + """ + if len(aggfuncs) <= 1: + # don't mangle for .agg([lambda x: .]) + return aggfuncs + i = 0 + mangled_aggfuncs = [] + for aggfunc in aggfuncs: + if com.get_callable_name(aggfunc) == "": + aggfunc = partial(aggfunc) + aggfunc.__name__ = f"" + i += 1 + mangled_aggfuncs.append(aggfunc) + + return mangled_aggfuncs + + +def maybe_mangle_lambdas(agg_spec: Any) -> Any: + """ + Make new lambdas with unique names. + + Parameters + ---------- + agg_spec : Any + An argument to GroupBy.agg. + Non-dict-like `agg_spec` are pass through as is. + For dict-like `agg_spec` a new spec is returned + with name-mangled lambdas. + + Returns + ------- + mangled : Any + Same type as the input. + + Examples + -------- + >>> maybe_mangle_lambdas('sum') + 'sum' + >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP + [, + .f(*args, **kwargs)>] + """ + is_dict = is_dict_like(agg_spec) + if not (is_dict or is_list_like(agg_spec)): + return agg_spec + mangled_aggspec = type(agg_spec)() # dict or OrderedDict + + if is_dict: + for key, aggfuncs in agg_spec.items(): + if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): + mangled_aggfuncs = _managle_lambda_list(aggfuncs) + else: + mangled_aggfuncs = aggfuncs + + mangled_aggspec[key] = mangled_aggfuncs + else: + mangled_aggspec = _managle_lambda_list(agg_spec) + + return mangled_aggspec + + +def validate_func_kwargs( + kwargs: dict, +) -> tuple[list[str], list[str | Callable[..., Any]]]: + """ + Validates types of user-provided "named aggregation" kwargs. + `TypeError` is raised if aggfunc is not `str` or callable. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + columns : List[str] + List of user-provied keys. + func : List[Union[str, callable[...,Any]]] + List of user-provided aggfuncs + + Examples + -------- + >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) + (['one', 'two'], ['min', 'max']) + """ + tuple_given_message = "func is expected but received {} in **kwargs." + columns = list(kwargs) + func = [] + for col_func in kwargs.values(): + if not (isinstance(col_func, str) or callable(col_func)): + raise TypeError(tuple_given_message.format(type(col_func).__name__)) + func.append(col_func) + if not columns: + no_arg_message = "Must provide 'func' or named aggregation **kwargs." + raise TypeError(no_arg_message) + return columns, func diff --git a/pandas/core/array_algos/__init__.py b/pandas/core/array_algos/__init__.py new file mode 100644 index 00000000..a7655a01 --- /dev/null +++ b/pandas/core/array_algos/__init__.py @@ -0,0 +1,9 @@ +""" +core.array_algos is for algorithms that operate on ndarray and ExtensionArray. +These should: + +- Assume that any Index, Series, or DataFrame objects have already been unwrapped. +- Assume that any list arguments have already been cast to ndarray/EA. +- Not depend on Index, Series, or DataFrame, nor import any of these. +- May dispatch to ExtensionArray methods, but should not import from core.arrays. +""" diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py new file mode 100644 index 00000000..3e59a267 --- /dev/null +++ b/pandas/core/array_algos/masked_reductions.py @@ -0,0 +1,149 @@ +""" +masked_reductions.py is for reduction algorithms using a mask-based approach +for missing values. +""" +from __future__ import annotations + +from typing import Callable + +import numpy as np + +from pandas._libs import missing as libmissing +from pandas._typing import npt + +from pandas.core.nanops import check_below_min_count + + +def _sumprod( + func: Callable, + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + min_count: int = 0, + axis: int | None = None, +): + """ + Sum or product for 1D masked array. + + Parameters + ---------- + func : np.sum or np.prod + values : np.ndarray + Numpy array with the values (can be of any dtype that support the + operation). + mask : np.ndarray[bool] + Boolean numpy array (True values indicate missing values). + skipna : bool, default True + Whether to skip NA. + min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. + axis : int, optional, default None + """ + if not skipna: + if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count): + return libmissing.NA + else: + return func(values, axis=axis) + else: + if check_below_min_count(values.shape, mask, min_count) and ( + axis is None or values.ndim == 1 + ): + return libmissing.NA + + return func(values, where=~mask, axis=axis) + + +def sum( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + min_count: int = 0, + axis: int | None = None, +): + return _sumprod( + np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis + ) + + +def prod( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + min_count: int = 0, + axis: int | None = None, +): + return _sumprod( + np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis + ) + + +def _minmax( + func: Callable, + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + axis: int | None = None, +): + """ + Reduction for 1D masked array. + + Parameters + ---------- + func : np.min or np.max + values : np.ndarray + Numpy array with the values (can be of any dtype that support the + operation). + mask : np.ndarray[bool] + Boolean numpy array (True values indicate missing values). + skipna : bool, default True + Whether to skip NA. + axis : int, optional, default None + """ + if not skipna: + if mask.any() or not values.size: + # min/max with empty array raise in numpy, pandas returns NA + return libmissing.NA + else: + return func(values) + else: + subset = values[~mask] + if subset.size: + return func(subset) + else: + # min/max with empty array raise in numpy, pandas returns NA + return libmissing.NA + + +def min( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + axis: int | None = None, +): + return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) + + +def max( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, + axis: int | None = None, +): + return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) + + +# TODO: axis kwarg +def mean(values: np.ndarray, mask: npt.NDArray[np.bool_], skipna: bool = True): + if not values.size or mask.all(): + return libmissing.NA + _sum = _sumprod(np.sum, values=values, mask=mask, skipna=skipna) + count = np.count_nonzero(~mask) + mean_value = _sum / count + return mean_value diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py new file mode 100644 index 00000000..17622e78 --- /dev/null +++ b/pandas/core/array_algos/putmask.py @@ -0,0 +1,146 @@ +""" +EA-compatible analogue to np.putmask +""" +from __future__ import annotations + +from typing import Any + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + npt, +) +from pandas.compat import np_version_under1p21 + +from pandas.core.dtypes.cast import infer_dtype_from +from pandas.core.dtypes.common import is_list_like + +from pandas.core.arrays import ExtensionArray + + +def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None: + """ + ExtensionArray-compatible implementation of np.putmask. The main + difference is we do not handle repeating or truncating like numpy. + + Parameters + ---------- + values: np.ndarray or ExtensionArray + mask : np.ndarray[bool] + We assume extract_bool_array has already been called. + value : Any + """ + + if ( + not isinstance(values, np.ndarray) + or (values.dtype == object and not lib.is_scalar(value)) + # GH#43424: np.putmask raises TypeError if we cannot cast between types with + # rule = "safe", a stricter guarantee we may not have here + or ( + isinstance(value, np.ndarray) and not np.can_cast(value.dtype, values.dtype) + ) + ): + # GH#19266 using np.putmask gives unexpected results with listlike value + # along with object dtype + if is_list_like(value) and len(value) == len(values): + values[mask] = value[mask] + else: + values[mask] = value + else: + # GH#37833 np.putmask is more performant than __setitem__ + np.putmask(values, mask, value) + + +def putmask_without_repeat( + values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any +) -> None: + """ + np.putmask will truncate or repeat if `new` is a listlike with + len(new) != len(values). We require an exact match. + + Parameters + ---------- + values : np.ndarray + mask : np.ndarray[bool] + new : Any + """ + if np_version_under1p21: + new = setitem_datetimelike_compat(values, mask.sum(), new) + + if getattr(new, "ndim", 0) >= 1: + new = new.astype(values.dtype, copy=False) + + # TODO: this prob needs some better checking for 2D cases + nlocs = mask.sum() + if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1: + shape = np.shape(new) + # np.shape compat for if setitem_datetimelike_compat + # changed arraylike to list e.g. test_where_dt64_2d + if nlocs == shape[-1]: + # GH#30567 + # If length of ``new`` is less than the length of ``values``, + # `np.putmask` would first repeat the ``new`` array and then + # assign the masked values hence produces incorrect result. + # `np.place` on the other hand uses the ``new`` values at it is + # to place in the masked locations of ``values`` + np.place(values, mask, new) + # i.e. values[mask] = new + elif mask.shape[-1] == shape[-1] or shape[-1] == 1: + np.putmask(values, mask, new) + else: + raise ValueError("cannot assign mismatch length to masked array") + else: + np.putmask(values, mask, new) + + +def validate_putmask( + values: ArrayLike, mask: np.ndarray +) -> tuple[npt.NDArray[np.bool_], bool]: + """ + Validate mask and check if this putmask operation is a no-op. + """ + mask = extract_bool_array(mask) + if mask.shape != values.shape: + raise ValueError("putmask: mask and data must be the same size") + + noop = not mask.any() + return mask, noop + + +def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]: + """ + If we have a SparseArray or BooleanArray, convert it to ndarray[bool]. + """ + if isinstance(mask, ExtensionArray): + # We could have BooleanArray, Sparse[bool], ... + # Except for BooleanArray, this is equivalent to just + # np.asarray(mask, dtype=bool) + mask = mask.to_numpy(dtype=bool, na_value=False) + + mask = np.asarray(mask, dtype=bool) + return mask + + +def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other): + """ + Parameters + ---------- + values : np.ndarray + num_set : int + For putmask, this is mask.sum() + other : Any + """ + if values.dtype == object: + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]: + # https://github.com/numpy/numpy/issues/12550 + # timedelta64 will incorrectly cast to int + if not is_list_like(other): + other = [other] * num_set + else: + other = list(other) + + return other diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py new file mode 100644 index 00000000..d3d9cb1b --- /dev/null +++ b/pandas/core/array_algos/quantile.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import ( + ArrayLike, + Scalar, + npt, +) +from pandas.compat.numpy import np_percentile_argname + +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + + +def quantile_compat( + values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str +) -> ArrayLike: + """ + Compute the quantiles of the given values for each quantile in `qs`. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + qs : np.ndarray[float64] + interpolation : str + + Returns + ------- + np.ndarray or ExtensionArray + """ + if isinstance(values, np.ndarray): + fill_value = na_value_for_dtype(values.dtype, compat=False) + mask = isna(values) + return quantile_with_mask(values, mask, fill_value, qs, interpolation) + else: + return values._quantile(qs, interpolation) + + +def quantile_with_mask( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + fill_value, + qs: npt.NDArray[np.float64], + interpolation: str, +) -> np.ndarray: + """ + Compute the quantiles of the given values for each quantile in `qs`. + + Parameters + ---------- + values : np.ndarray + For ExtensionArray, this is _values_for_factorize()[0] + mask : np.ndarray[bool] + mask = isna(values) + For ExtensionArray, this is computed before calling _value_for_factorize + fill_value : Scalar + The value to interpret fill NA entries with + For ExtensionArray, this is _values_for_factorize()[1] + qs : np.ndarray[float64] + interpolation : str + Type of interpolation + + Returns + ------- + np.ndarray + + Notes + ----- + Assumes values is already 2D. For ExtensionArray this means np.atleast_2d + has been called on _values_for_factorize()[0] + + Quantile is computed along axis=1. + """ + assert values.shape == mask.shape + if values.ndim == 1: + # unsqueeze, operate, re-squeeze + values = np.atleast_2d(values) + mask = np.atleast_2d(mask) + res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation) + return res_values[0] + + assert values.ndim == 2 + + is_empty = values.shape[1] == 0 + + if is_empty: + # create the array of na_values + # 2d len(values) * len(qs) + flat = np.array([fill_value] * len(qs)) + result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) + else: + result = _nanpercentile( + values, + qs * 100.0, + na_value=fill_value, + mask=mask, + interpolation=interpolation, + ) + + result = np.array(result, copy=False) + result = result.T + + return result + + +def _nanpercentile_1d( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + qs: npt.NDArray[np.float64], + na_value: Scalar, + interpolation: str, +) -> Scalar | np.ndarray: + """ + Wrapper for np.percentile that skips missing values, specialized to + 1-dimensional case. + + Parameters + ---------- + values : array over which to find quantiles + mask : ndarray[bool] + locations in values that should be considered missing + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + # mask is Union[ExtensionArray, ndarray] + values = values[~mask] + + if len(values) == 0: + # Can't pass dtype=values.dtype here bc we might have na_value=np.nan + # with values.dtype=int64 see test_quantile_empty + # equiv: 'np.array([na_value] * len(qs))' but much faster + return np.full(len(qs), na_value) + + return np.percentile( + values, + qs, + # error: No overload variant of "percentile" matches argument + # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]" + # , "Dict[str, str]" [call-overload] + **{np_percentile_argname: interpolation}, # type: ignore[call-overload] + ) + + +def _nanpercentile( + values: np.ndarray, + qs: npt.NDArray[np.float64], + *, + na_value, + mask: npt.NDArray[np.bool_], + interpolation: str, +): + """ + Wrapper for np.percentile that skips missing values. + + Parameters + ---------- + values : np.ndarray[ndim=2] over which to find quantiles + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + mask : np.ndarray[bool] + locations in values that should be considered missing + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = _nanpercentile( + values.view("i8"), + qs=qs, + na_value=na_value.view("i8"), + mask=mask, + interpolation=interpolation, + ) + + # Note: we have to do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + + if mask.any(): + # Caller is responsible for ensuring mask shape match + assert mask.shape == values.shape + result = [ + _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) + for (val, m) in zip(list(values), list(mask)) + ] + if values.dtype.kind == "f": + # preserve itemsize + result = np.array(result, dtype=values.dtype, copy=False).T + else: + result = np.array(result, copy=False).T + if ( + result.dtype != values.dtype + and not mask.all() + and (result == result.astype(values.dtype, copy=False)).all() + ): + # mask.all() will never get cast back to int + # e.g. values id integer dtype and result is floating dtype, + # only cast back to integer dtype if result values are all-integer. + result = result.astype(values.dtype, copy=False) + return result + else: + return np.percentile( + values, + qs, + axis=1, + # error: No overload variant of "percentile" matches argument types + # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]", + # "int", "Dict[str, str]" [call-overload] + **{np_percentile_argname: interpolation}, # type: ignore[call-overload] + ) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py new file mode 100644 index 00000000..466eeb76 --- /dev/null +++ b/pandas/core/array_algos/replace.py @@ -0,0 +1,161 @@ +""" +Methods used by Block.replace and related methods. +""" +from __future__ import annotations + +import operator +import re +from typing import ( + Any, + Pattern, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + Scalar, + npt, +) + +from pandas.core.dtypes.common import ( + is_datetimelike_v_numeric, + is_numeric_v_string_like, + is_re, + is_re_compilable, + is_scalar, +) +from pandas.core.dtypes.missing import isna + + +def should_use_regex(regex: bool, to_replace: Any) -> bool: + """ + Decide whether to treat `to_replace` as a regular expression. + """ + if is_re(to_replace): + regex = True + + regex = regex and is_re_compilable(to_replace) + + # Don't use regex if the pattern is empty. + regex = regex and re.compile(to_replace).pattern != "" + return regex + + +def compare_or_regex_search( + a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_] +) -> ArrayLike | bool: + """ + Compare two array-like inputs of the same shape or two scalar values + + Calls operator.eq or re.search, depending on regex argument. If regex is + True, perform an element-wise regex matching. + + Parameters + ---------- + a : array-like + b : scalar or regex pattern + regex : bool + mask : np.ndarray[bool] + + Returns + ------- + mask : array-like of bool + """ + if isna(b): + return ~mask + + def _check_comparison_types( + result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern + ): + """ + Raises an error if the two arrays (a,b) cannot be compared. + Otherwise, returns the comparison result as expected. + """ + if is_scalar(result) and isinstance(a, np.ndarray): + type_names = [type(a).__name__, type(b).__name__] + + type_names[0] = f"ndarray(dtype={a.dtype})" + + raise TypeError( + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" + ) + + if not regex or not should_use_regex(regex, b): + # TODO: should use missing.mask_missing? + op = lambda x: operator.eq(x, b) + else: + op = np.vectorize( + lambda x: bool(re.search(b, x)) + if isinstance(x, str) and isinstance(b, (str, Pattern)) + else False + ) + + # GH#32621 use mask to avoid comparing to NAs + if isinstance(a, np.ndarray): + a = a[mask] + + if is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + return np.zeros(a.shape, dtype=bool) + + elif is_datetimelike_v_numeric(a, b): + # GH#29553 avoid deprecation warnings from numpy + _check_comparison_types(False, a, b) + return False + + result = op(a) + + if isinstance(result, np.ndarray) and mask is not None: + # The shape of the mask can differ to that of the result + # since we may compare only a subset of a's or b's elements + tmp = np.zeros(mask.shape, dtype=np.bool_) + np.place(tmp, mask, result) + result = tmp + + _check_comparison_types(result, a, b) + return result + + +def replace_regex( + values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None +) -> None: + """ + Parameters + ---------- + values : ArrayLike + Object dtype. + rx : re.Pattern + value : Any + mask : np.ndarray[bool], optional + + Notes + ----- + Alters values in-place. + """ + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[np.object_]) + + if mask is None: + values[:] = f(values) + else: + values[mask] = f(values[mask]) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py new file mode 100644 index 00000000..188725f0 --- /dev/null +++ b/pandas/core/array_algos/take.py @@ -0,0 +1,584 @@ +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + cast, + overload, +) + +import numpy as np + +from pandas._libs import ( + algos as libalgos, + lib, +) +from pandas._typing import ( + ArrayLike, + npt, +) + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_obj, +) +from pandas.core.dtypes.missing import na_value_for_dtype + +from pandas.core.construction import ensure_wrapped_if_datetimelike + +if TYPE_CHECKING: + from pandas.core.arrays._mixins import NDArrayBackedExtensionArray + from pandas.core.arrays.base import ExtensionArray + + +@overload +def take_nd( + arr: np.ndarray, + indexer, + axis: int = ..., + fill_value=..., + allow_fill: bool = ..., +) -> np.ndarray: + ... + + +@overload +def take_nd( + arr: ExtensionArray, + indexer, + axis: int = ..., + fill_value=..., + allow_fill: bool = ..., +) -> ArrayLike: + ... + + +def take_nd( + arr: ArrayLike, + indexer, + axis: int = 0, + fill_value=lib.no_default, + allow_fill: bool = True, +) -> ArrayLike: + + """ + Specialized Cython take which sets NaN values in one pass + + This dispatches to ``take`` defined on ExtensionArrays. It does not + currently dispatch to ``SparseArray.take`` for sparse ``arr``. + + Note: this function assumes that the indexer is a valid(ated) indexer with + no out of bound indices. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + Input array. + indexer : ndarray + 1-D array of indices to take, subarrays corresponding to -1 value + indices are filed with fill_value + axis : int, default 0 + Axis to take from + fill_value : any, default np.nan + Fill value to replace -1 values with + allow_fill : bool, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + + Returns + ------- + subarray : np.ndarray or ExtensionArray + May be the same type as the input, or cast to an ndarray. + """ + if fill_value is lib.no_default: + fill_value = na_value_for_dtype(arr.dtype, compat=False) + elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if arr.dtype != dtype: + # EA.take is strict about returning a new object of the same type + # so for that case cast upfront + arr = arr.astype(dtype) + + if not isinstance(arr, np.ndarray): + # i.e. ExtensionArray, + # includes for EA to catch DatetimeArray, TimedeltaArray + if not is_1d_only_ea_obj(arr): + # i.e. DatetimeArray, TimedeltaArray + arr = cast("NDArrayBackedExtensionArray", arr) + return arr.take( + indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis + ) + + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + arr = np.asarray(arr) + return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill) + + +def _take_nd_ndarray( + arr: np.ndarray, + indexer: npt.NDArray[np.intp] | None, + axis: int, + fill_value, + allow_fill: bool, +) -> np.ndarray: + + if indexer is None: + indexer = np.arange(arr.shape[axis], dtype=np.intp) + dtype, fill_value = arr.dtype, arr.dtype.type() + else: + indexer = ensure_platform_int(indexer) + + dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( + arr, indexer, fill_value, allow_fill + ) + + flip_order = False + if arr.ndim == 2 and arr.flags.f_contiguous: + flip_order = True + + if flip_order: + arr = arr.T + axis = arr.ndim - axis - 1 + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out_shape_ = list(arr.shape) + out_shape_[axis] = len(indexer) + out_shape = tuple(out_shape_) + if arr.flags.f_contiguous and axis == arr.ndim - 1: + # minor tweak that can make an order-of-magnitude difference + # for dataframes initialized directly from 2-d ndarrays + # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its + # f-contiguous transpose) + out = np.empty(out_shape, dtype=dtype, order="F") + else: + out = np.empty(out_shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + if flip_order: + out = out.T + return out + + +def take_1d( + arr: ArrayLike, + indexer: npt.NDArray[np.intp], + fill_value=None, + allow_fill: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> ArrayLike: + """ + Specialized version for 1D arrays. Differences compared to `take_nd`: + + - Assumes input array has already been converted to numpy array / EA + - Assumes indexer is already guaranteed to be intp dtype ndarray + - Only works for 1D arrays + + To ensure the lowest possible overhead. + + Note: similarly to `take_nd`, this function assumes that the indexer is + a valid(ated) indexer with no out of bound indices. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + Input array. + indexer : ndarray + 1-D array of indices to take (validated indices, intp dtype). + fill_value : any, default np.nan + Fill value to replace -1 values with + allow_fill : bool, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + mask : np.ndarray, optional, default None + If `allow_fill` is True, and the mask (where indexer == -1) is already + known, it can be passed to avoid recomputation. + """ + if not isinstance(arr, np.ndarray): + # ExtensionArray -> dispatch to their method + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + if not allow_fill: + return arr.take(indexer) + + dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( + arr, indexer, fill_value, True, mask + ) + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out = np.empty(indexer.shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + return out + + +def take_2d_multi( + arr: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + fill_value=np.nan, +) -> np.ndarray: + """ + Specialized Cython take which sets NaN values in one pass. + """ + # This is only called from one place in DataFrame._reindex_multi, + # so we know indexer is well-behaved. + assert indexer is not None + assert indexer[0] is not None + assert indexer[1] is not None + + row_idx, col_idx = indexer + + row_idx = ensure_platform_int(row_idx) + col_idx = ensure_platform_int(col_idx) + indexer = row_idx, col_idx + mask_info = None + + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + mask_info = (row_mask, col_mask), (row_needs, col_needs) + + if not (row_needs or col_needs): + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) + + func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) + if func is None and arr.dtype != out.dtype: + func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) + if func is not None: + func = _convert_wrapper(func, out.dtype) + + if func is not None: + func(arr, indexer, out=out, fill_value=fill_value) + else: + # test_reindex_multi + _take_2d_multi_object( + arr, indexer, out, fill_value=fill_value, mask_info=mask_info + ) + + return out + + +@functools.lru_cache(maxsize=128) +def _get_take_nd_function_cached( + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int +): + """ + Part of _get_take_nd_function below that doesn't need `mask_info` and thus + can be cached (mask_info potentially contains a numpy ndarray which is not + hashable and thus cannot be used as argument for cached function). + """ + tup = (arr_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + return func + + # We get here with string, uint, float16, and complex dtypes that could + # potentially be handled in algos_take_helper. + # Also a couple with (M8[ns], object) and (m8[ns], object) + tup = (out_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + func = _convert_wrapper(func, out_dtype) + return func + + return None + + +def _get_take_nd_function( + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None +): + """ + Get the appropriate "take" implementation for the given dimension, axis + and dtypes. + """ + func = None + if ndim <= 2: + # for this part we don't need `mask_info` -> use the cached algo lookup + func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis) + + if func is None: + + def func(arr, indexer, out, fill_value=np.nan): + indexer = ensure_platform_int(indexer) + _take_nd_object( + arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info + ) + + return func + + +def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): + if arr_dtype is not None: + arr = arr.view(arr_dtype) + if out_dtype is not None: + out = out.view(out_dtype) + if fill_wrap is not None: + fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _convert_wrapper(f, conv_dtype): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): + if conv_dtype == object: + # GH#39755 avoid casting dt64/td64 to integers + arr = ensure_wrapped_if_datetimelike(arr) + arr = arr.astype(conv_dtype) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +_take_1d_dict = { + ("int8", "int8"): libalgos.take_1d_int8_int8, + ("int8", "int32"): libalgos.take_1d_int8_int32, + ("int8", "int64"): libalgos.take_1d_int8_int64, + ("int8", "float64"): libalgos.take_1d_int8_float64, + ("int16", "int16"): libalgos.take_1d_int16_int16, + ("int16", "int32"): libalgos.take_1d_int16_int32, + ("int16", "int64"): libalgos.take_1d_int16_int64, + ("int16", "float64"): libalgos.take_1d_int16_float64, + ("int32", "int32"): libalgos.take_1d_int32_int32, + ("int32", "int64"): libalgos.take_1d_int32_int64, + ("int32", "float64"): libalgos.take_1d_int32_float64, + ("int64", "int64"): libalgos.take_1d_int64_int64, + ("int64", "float64"): libalgos.take_1d_int64_float64, + ("float32", "float32"): libalgos.take_1d_float32_float32, + ("float32", "float64"): libalgos.take_1d_float32_float64, + ("float64", "float64"): libalgos.take_1d_float64_float64, + ("object", "object"): libalgos.take_1d_object_object, + ("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64 + ), +} + +_take_2d_axis0_dict = { + ("int8", "int8"): libalgos.take_2d_axis0_int8_int8, + ("int8", "int32"): libalgos.take_2d_axis0_int8_int32, + ("int8", "int64"): libalgos.take_2d_axis0_int8_int64, + ("int8", "float64"): libalgos.take_2d_axis0_int8_float64, + ("int16", "int16"): libalgos.take_2d_axis0_int16_int16, + ("int16", "int32"): libalgos.take_2d_axis0_int16_int32, + ("int16", "int64"): libalgos.take_2d_axis0_int16_int64, + ("int16", "float64"): libalgos.take_2d_axis0_int16_float64, + ("int32", "int32"): libalgos.take_2d_axis0_int32_int32, + ("int32", "int64"): libalgos.take_2d_axis0_int32_int64, + ("int32", "float64"): libalgos.take_2d_axis0_int32_float64, + ("int64", "int64"): libalgos.take_2d_axis0_int64_int64, + ("int64", "float64"): libalgos.take_2d_axis0_int64_float64, + ("float32", "float32"): libalgos.take_2d_axis0_float32_float32, + ("float32", "float64"): libalgos.take_2d_axis0_float32_float64, + ("float64", "float64"): libalgos.take_2d_axis0_float64_float64, + ("object", "object"): libalgos.take_2d_axis0_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_axis0_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_axis1_dict = { + ("int8", "int8"): libalgos.take_2d_axis1_int8_int8, + ("int8", "int32"): libalgos.take_2d_axis1_int8_int32, + ("int8", "int64"): libalgos.take_2d_axis1_int8_int64, + ("int8", "float64"): libalgos.take_2d_axis1_int8_float64, + ("int16", "int16"): libalgos.take_2d_axis1_int16_int16, + ("int16", "int32"): libalgos.take_2d_axis1_int16_int32, + ("int16", "int64"): libalgos.take_2d_axis1_int16_int64, + ("int16", "float64"): libalgos.take_2d_axis1_int16_float64, + ("int32", "int32"): libalgos.take_2d_axis1_int32_int32, + ("int32", "int64"): libalgos.take_2d_axis1_int32_int64, + ("int32", "float64"): libalgos.take_2d_axis1_int32_float64, + ("int64", "int64"): libalgos.take_2d_axis1_int64_int64, + ("int64", "float64"): libalgos.take_2d_axis1_int64_float64, + ("float32", "float32"): libalgos.take_2d_axis1_float32_float32, + ("float32", "float64"): libalgos.take_2d_axis1_float32_float64, + ("float64", "float64"): libalgos.take_2d_axis1_float64_float64, + ("object", "object"): libalgos.take_2d_axis1_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_axis1_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_multi_dict = { + ("int8", "int8"): libalgos.take_2d_multi_int8_int8, + ("int8", "int32"): libalgos.take_2d_multi_int8_int32, + ("int8", "int64"): libalgos.take_2d_multi_int8_int64, + ("int8", "float64"): libalgos.take_2d_multi_int8_float64, + ("int16", "int16"): libalgos.take_2d_multi_int16_int16, + ("int16", "int32"): libalgos.take_2d_multi_int16_int32, + ("int16", "int64"): libalgos.take_2d_multi_int16_int64, + ("int16", "float64"): libalgos.take_2d_multi_int16_float64, + ("int32", "int32"): libalgos.take_2d_multi_int32_int32, + ("int32", "int64"): libalgos.take_2d_multi_int32_int64, + ("int32", "float64"): libalgos.take_2d_multi_int32_float64, + ("int64", "int64"): libalgos.take_2d_multi_int64_int64, + ("int64", "float64"): libalgos.take_2d_multi_int64_float64, + ("float32", "float32"): libalgos.take_2d_multi_float32_float32, + ("float32", "float64"): libalgos.take_2d_multi_float32_float64, + ("float64", "float64"): libalgos.take_2d_multi_float64_float64, + ("object", "object"): libalgos.take_2d_multi_object_object, + ("bool", "bool"): _view_wrapper( + libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8 + ), + ("bool", "object"): _view_wrapper( + libalgos.take_2d_multi_bool_object, np.uint8, None + ), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), + ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper( + libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + + +def _take_nd_object( + arr: np.ndarray, + indexer: npt.NDArray[np.intp], + out: np.ndarray, + axis: int, + fill_value, + mask_info, +): + if mask_info is not None: + mask, needs_masking = mask_info + else: + mask = indexer == -1 + needs_masking = mask.any() + if arr.dtype != out.dtype: + arr = arr.astype(out.dtype) + if arr.shape[axis] > 0: + arr.take(indexer, axis=axis, out=out) + if needs_masking: + outindexer = [slice(None)] * arr.ndim + outindexer[axis] = mask + out[tuple(outindexer)] = fill_value + + +def _take_2d_multi_object( + arr: np.ndarray, + indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], + out: np.ndarray, + fill_value, + mask_info, +) -> None: + # this is not ideal, performance-wise, but it's better than raising + # an exception (best to optimize in Cython to avoid getting here) + row_idx, col_idx = indexer # both np.intp + if mask_info is not None: + (row_mask, col_mask), (row_needs, col_needs) = mask_info + else: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + if fill_value is not None: + if row_needs: + out[row_mask, :] = fill_value + if col_needs: + out[:, col_mask] = fill_value + for i in range(len(row_idx)): + u_ = row_idx[i] + for j in range(len(col_idx)): + v = col_idx[j] + out[i, j] = arr[u_, v] + + +def _take_preprocess_indexer_and_fill_value( + arr: np.ndarray, + indexer: npt.NDArray[np.intp], + fill_value, + allow_fill: bool, + mask: npt.NDArray[np.bool_] | None = None, +): + mask_info: tuple[np.ndarray | None, bool] | None = None + + if not allow_fill: + dtype, fill_value = arr.dtype, arr.dtype.type() + mask_info = None, False + else: + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + if mask is not None: + needs_masking = True + else: + mask = indexer == -1 + needs_masking = bool(mask.any()) + mask_info = mask, needs_masking + if not needs_masking: + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + return dtype, fill_value, mask_info diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py new file mode 100644 index 00000000..93b029c2 --- /dev/null +++ b/pandas/core/array_algos/transforms.py @@ -0,0 +1,40 @@ +""" +transforms.py is for shape-preserving functions. +""" + +from __future__ import annotations + +import numpy as np + + +def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray: + new_values = values + + if periods == 0 or values.size == 0: + return new_values.copy() + + # make sure array sent to np.roll is c_contiguous + f_ordered = values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + if new_values.size: + new_values = np.roll( + new_values, + np.intp(periods), + axis=axis, + ) + + axis_indexer = [slice(None)] * values.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return new_values diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py new file mode 100644 index 00000000..280a599d --- /dev/null +++ b/pandas/core/arraylike.py @@ -0,0 +1,533 @@ +""" +Methods that can be shared by many array-like classes or subclasses: + Series + Index + ExtensionArray +""" +from __future__ import annotations + +import operator +from typing import Any +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ABCNDFrame + +from pandas.core import roperator +from pandas.core.construction import extract_array +from pandas.core.ops.common import unpack_zerodim_and_defer + +REDUCTION_ALIASES = { + "maximum": "max", + "minimum": "min", + "add": "sum", + "multiply": "prod", +} + + +class OpsMixin: + # ------------------------------------------------------------- + # Comparisons + + def _cmp_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__eq__") + def __eq__(self, other): + return self._cmp_method(other, operator.eq) + + @unpack_zerodim_and_defer("__ne__") + def __ne__(self, other): + return self._cmp_method(other, operator.ne) + + @unpack_zerodim_and_defer("__lt__") + def __lt__(self, other): + return self._cmp_method(other, operator.lt) + + @unpack_zerodim_and_defer("__le__") + def __le__(self, other): + return self._cmp_method(other, operator.le) + + @unpack_zerodim_and_defer("__gt__") + def __gt__(self, other): + return self._cmp_method(other, operator.gt) + + @unpack_zerodim_and_defer("__ge__") + def __ge__(self, other): + return self._cmp_method(other, operator.ge) + + # ------------------------------------------------------------- + # Logical Methods + + def _logical_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__and__") + def __and__(self, other): + return self._logical_method(other, operator.and_) + + @unpack_zerodim_and_defer("__rand__") + def __rand__(self, other): + return self._logical_method(other, roperator.rand_) + + @unpack_zerodim_and_defer("__or__") + def __or__(self, other): + return self._logical_method(other, operator.or_) + + @unpack_zerodim_and_defer("__ror__") + def __ror__(self, other): + return self._logical_method(other, roperator.ror_) + + @unpack_zerodim_and_defer("__xor__") + def __xor__(self, other): + return self._logical_method(other, operator.xor) + + @unpack_zerodim_and_defer("__rxor__") + def __rxor__(self, other): + return self._logical_method(other, roperator.rxor) + + # ------------------------------------------------------------- + # Arithmetic Methods + + def _arith_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + return self._arith_method(other, operator.add) + + @unpack_zerodim_and_defer("__radd__") + def __radd__(self, other): + return self._arith_method(other, roperator.radd) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + return self._arith_method(other, operator.sub) + + @unpack_zerodim_and_defer("__rsub__") + def __rsub__(self, other): + return self._arith_method(other, roperator.rsub) + + @unpack_zerodim_and_defer("__mul__") + def __mul__(self, other): + return self._arith_method(other, operator.mul) + + @unpack_zerodim_and_defer("__rmul__") + def __rmul__(self, other): + return self._arith_method(other, roperator.rmul) + + @unpack_zerodim_and_defer("__truediv__") + def __truediv__(self, other): + return self._arith_method(other, operator.truediv) + + @unpack_zerodim_and_defer("__rtruediv__") + def __rtruediv__(self, other): + return self._arith_method(other, roperator.rtruediv) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + return self._arith_method(other, operator.floordiv) + + @unpack_zerodim_and_defer("__rfloordiv") + def __rfloordiv__(self, other): + return self._arith_method(other, roperator.rfloordiv) + + @unpack_zerodim_and_defer("__mod__") + def __mod__(self, other): + return self._arith_method(other, operator.mod) + + @unpack_zerodim_and_defer("__rmod__") + def __rmod__(self, other): + return self._arith_method(other, roperator.rmod) + + @unpack_zerodim_and_defer("__divmod__") + def __divmod__(self, other): + return self._arith_method(other, divmod) + + @unpack_zerodim_and_defer("__rdivmod__") + def __rdivmod__(self, other): + return self._arith_method(other, roperator.rdivmod) + + @unpack_zerodim_and_defer("__pow__") + def __pow__(self, other): + return self._arith_method(other, operator.pow) + + @unpack_zerodim_and_defer("__rpow__") + def __rpow__(self, other): + return self._arith_method(other, roperator.rpow) + + +# ----------------------------------------------------------------------------- +# Helpers to implement __array_ufunc__ + + +def _is_aligned(frame, other): + """ + Helper to check if a DataFrame is aligned with another DataFrame or Series. + """ + from pandas import DataFrame + + if isinstance(other, DataFrame): + return frame._indexed_same(other) + else: + # Series -> match index + return frame.columns.equals(other.index) + + +def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): + """ + In the future DataFrame, inputs to ufuncs will be aligned before applying + the ufunc, but for now we ignore the index but raise a warning if behaviour + would change in the future. + This helper detects the case where a warning is needed and then fallbacks + to applying the ufunc on arrays to avoid alignment. + + See https://github.com/pandas-dev/pandas/pull/39239 + """ + from pandas import DataFrame + from pandas.core.generic import NDFrame + + n_alignable = sum(isinstance(x, NDFrame) for x in inputs) + n_frames = sum(isinstance(x, DataFrame) for x in inputs) + + if n_alignable >= 2 and n_frames >= 1: + # if there are 2 alignable inputs (Series or DataFrame), of which at least 1 + # is a DataFrame -> we would have had no alignment before -> warn that this + # will align in the future + + # the first frame is what determines the output index/columns in pandas < 1.2 + first_frame = next(x for x in inputs if isinstance(x, DataFrame)) + + # check if the objects are aligned or not + non_aligned = sum( + not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) + ) + + # if at least one is not aligned -> warn and fallback to array behaviour + if non_aligned: + warnings.warn( + "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " + "combination). Currently, the indices are ignored and the result " + "takes the index/columns of the first DataFrame. In the future , " + "the DataFrames/Series will be aligned before applying the ufunc.\n" + "Convert one of the arguments to a NumPy array " + "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " + "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " + "the ufunc to obtain the future behaviour and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # keep the first dataframe of the inputs, other DataFrame/Series is + # converted to array for fallback behaviour + new_inputs = [] + for x in inputs: + if x is first_frame: + new_inputs.append(x) + elif isinstance(x, NDFrame): + new_inputs.append(np.asarray(x)) + else: + new_inputs.append(x) + + # call the ufunc on those transformed inputs + return getattr(ufunc, method)(*new_inputs, **kwargs) + + # signal that we didn't fallback / execute the ufunc yet + return NotImplemented + + +def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): + """ + Compatibility with numpy ufuncs. + + See also + -------- + numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ + """ + from pandas.core.generic import NDFrame + from pandas.core.internals import BlockManager + + cls = type(self) + + kwargs = _standardize_out_kwarg(**kwargs) + + # for backwards compatibility check and potentially fallback for non-aligned frames + result = _maybe_fallback(ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # for binary ops, use our custom dunder methods + result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # Determine if we should defer. + no_defer = ( + np.ndarray.__array_ufunc__, + cls.__array_ufunc__, + ) + + for item in inputs: + higher_priority = ( + hasattr(item, "__array_priority__") + and item.__array_priority__ > self.__array_priority__ + ) + has_array_ufunc = ( + hasattr(item, "__array_ufunc__") + and type(item).__array_ufunc__ not in no_defer + and not isinstance(item, self._HANDLED_TYPES) + ) + if higher_priority or has_array_ufunc: + return NotImplemented + + # align all the inputs. + types = tuple(type(x) for x in inputs) + alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] + + if len(alignable) > 1: + # This triggers alignment. + # At the moment, there aren't any ufuncs with more than two inputs + # so this ends up just being x1.index | x2.index, but we write + # it to handle *args. + + if len(set(types)) > 1: + # We currently don't handle ufunc(DataFrame, Series) + # well. Previously this raised an internal ValueError. We might + # support it someday, so raise a NotImplementedError. + raise NotImplementedError( + "Cannot apply ufunc {} to mixed DataFrame and Series " + "inputs.".format(ufunc) + ) + axes = self.axes + for obj in alignable[1:]: + # this relies on the fact that we aren't handling mixed + # series / frame ufuncs. + for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): + axes[i] = ax1.union(ax2) + + reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) + inputs = tuple( + x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x + for x, t in zip(inputs, types) + ) + else: + reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) + + if self.ndim == 1: + names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] + name = names[0] if len(set(names)) == 1 else None + reconstruct_kwargs = {"name": name} + else: + reconstruct_kwargs = {} + + def reconstruct(result): + if ufunc.nout > 1: + # np.modf, np.frexp, np.divmod + return tuple(_reconstruct(x) for x in result) + + return _reconstruct(result) + + def _reconstruct(result): + if lib.is_scalar(result): + return result + + if result.ndim != self.ndim: + if method == "outer": + if self.ndim == 2: + # we already deprecated for Series + msg = ( + "outer method for ufunc {} is not implemented on " + "pandas objects. Returning an ndarray, but in the " + "future this will raise a 'NotImplementedError'. " + "Consider explicitly converting the DataFrame " + "to an array with '.to_numpy()' first." + ) + warnings.warn( + msg.format(ufunc), FutureWarning, stacklevel=find_stack_level() + ) + return result + raise NotImplementedError + return result + if isinstance(result, BlockManager): + # we went through BlockManager.apply e.g. np.sqrt + result = self._constructor(result, **reconstruct_kwargs, copy=False) + else: + # we converted an array, lost our axes + result = self._constructor( + result, **reconstruct_axes, **reconstruct_kwargs, copy=False + ) + # TODO: When we support multiple values in __finalize__, this + # should pass alignable to `__finalize__` instead of self. + # Then `np.add(a, b)` would consider attrs from both a and b + # when a and b are NDFrames. + if len(alignable) == 1: + result = result.__finalize__(self) + return result + + if "out" in kwargs: + # e.g. test_multiindex_get_loc + result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) + return reconstruct(result) + + if method == "reduce": + # e.g. test.series.test_ufunc.test_reduce + result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + + # We still get here with kwargs `axis` for e.g. np.maximum.accumulate + # and `dtype` and `keepdims` for np.ptp + + if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): + # Just give up on preserving types in the complex case. + # In theory we could preserve them for them. + # * nout>1 is doable if BlockManager.apply took nout and + # returned a Tuple[BlockManager]. + # * len(inputs) > 1 is doable when we know that we have + # aligned blocks / dtypes. + + # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add + inputs = tuple(np.asarray(x) for x in inputs) + # Note: we can't use default_array_ufunc here bc reindexing means + # that `self` may not be among `inputs` + result = getattr(ufunc, method)(*inputs, **kwargs) + elif self.ndim == 1: + # ufunc(series, ...) + inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + else: + # ufunc(dataframe) + if method == "__call__" and not kwargs: + # for np.(..) calls + # kwargs cannot necessarily be handled block-by-block, so only + # take this path if there are no kwargs + mgr = inputs[0]._mgr + result = mgr.apply(getattr(ufunc, method)) + else: + # otherwise specific ufunc methods (eg np..accumulate(..)) + # Those can have an axis keyword and thus can't be called block-by-block + result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) + # e.g. np.negative (only one reached), with "where" and "out" in kwargs + + result = reconstruct(result) + return result + + +def _standardize_out_kwarg(**kwargs) -> dict: + """ + If kwargs contain "out1" and "out2", replace that with a tuple "out" + + np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or + `out1=out1, out2=out2)` + """ + if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: + out1 = kwargs.pop("out1") + out2 = kwargs.pop("out2") + out = (out1, out2) + kwargs["out"] = out + return kwargs + + +def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + If we have an `out` keyword, then call the ufunc without `out` and then + set the result into the given `out`. + """ + + # Note: we assume _standardize_out_kwarg has already been called. + out = kwargs.pop("out") + where = kwargs.pop("where", None) + + result = getattr(ufunc, method)(*inputs, **kwargs) + + if result is NotImplemented: + return NotImplemented + + if isinstance(result, tuple): + # i.e. np.divmod, np.modf, np.frexp + if not isinstance(out, tuple) or len(out) != len(result): + raise NotImplementedError + + for arr, res in zip(out, result): + _assign_where(arr, res, where) + + return out + + if isinstance(out, tuple): + if len(out) == 1: + out = out[0] + else: + raise NotImplementedError + + _assign_where(out, result, where) + return out + + +def _assign_where(out, result, where) -> None: + """ + Set a ufunc result into 'out', masking with a 'where' argument if necessary. + """ + if where is None: + # no 'where' arg passed to ufunc + out[:] = result + else: + np.putmask(out, where, result) + + +def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Fallback to the behavior we would get if we did not define __array_ufunc__. + + Notes + ----- + We are assuming that `self` is among `inputs`. + """ + if not any(x is self for x in inputs): + raise NotImplementedError + + new_inputs = [x if x is not self else np.asarray(x) for x in inputs] + + return getattr(ufunc, method)(*new_inputs, **kwargs) + + +def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Dispatch ufunc reductions to self's reduction methods. + """ + assert method == "reduce" + + if len(inputs) != 1 or inputs[0] is not self: + return NotImplemented + + if ufunc.__name__ not in REDUCTION_ALIASES: + return NotImplemented + + method_name = REDUCTION_ALIASES[ufunc.__name__] + + # NB: we are assuming that min/max represent minimum/maximum methods, + # which would not be accurate for e.g. Timestamp.min + if not hasattr(self, method_name): + return NotImplemented + + if self.ndim > 1: + if isinstance(self, ABCNDFrame): + # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA + kwargs["numeric_only"] = False + + if "axis" not in kwargs: + # For DataFrame reductions we don't want the default axis=0 + # Note: np.min is not a ufunc, but uses array_function_dispatch, + # so calls DataFrame.min (without ever getting here) with the np.min + # default of axis=None, which DataFrame.min catches and changes to axis=0. + # np.minimum.reduce(df) gets here bc axis is not in kwargs, + # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) + kwargs["axis"] = 0 + + # By default, numpy's reductions do not skip NaNs, so we have to + # pass skipna=False + return getattr(self, method_name)(skipna=False, **kwargs) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py new file mode 100644 index 00000000..79be8760 --- /dev/null +++ b/pandas/core/arrays/__init__.py @@ -0,0 +1,43 @@ +from pandas.core.arrays.arrow import ArrowExtensionArray +from pandas.core.arrays.base import ( + ExtensionArray, + ExtensionOpsMixin, + ExtensionScalarOpsMixin, +) +from pandas.core.arrays.boolean import BooleanArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.floating import FloatingArray +from pandas.core.arrays.integer import IntegerArray +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.masked import BaseMaskedArray +from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.arrays.period import ( + PeriodArray, + period_array, +) +from pandas.core.arrays.sparse import SparseArray +from pandas.core.arrays.string_ import StringArray +from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.core.arrays.timedeltas import TimedeltaArray + +__all__ = [ + "ArrowExtensionArray", + "ExtensionArray", + "ExtensionOpsMixin", + "ExtensionScalarOpsMixin", + "ArrowStringArray", + "BaseMaskedArray", + "BooleanArray", + "Categorical", + "DatetimeArray", + "FloatingArray", + "IntegerArray", + "IntervalArray", + "PandasArray", + "PeriodArray", + "period_array", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py new file mode 100644 index 00000000..f17d3430 --- /dev/null +++ b/pandas/core/arrays/_mixins.py @@ -0,0 +1,514 @@ +from __future__ import annotations + +from functools import wraps +from typing import ( + TYPE_CHECKING, + Any, + Literal, + Sequence, + TypeVar, + cast, + overload, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.arrays import NDArrayBacked +from pandas._typing import ( + ArrayLike, + Dtype, + F, + PositionalIndexer2D, + PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, + Shape, + TakeIndexer, + npt, + type_t, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_insert_loc, +) + +from pandas.core.dtypes.common import ( + is_dtype_equal, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import array_equivalent + +from pandas.core import missing +from pandas.core.algorithms import ( + take, + unique, + value_counts, +) +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.array_algos.transforms import shift +from pandas.core.arrays.base import ExtensionArray +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.sorting import nargminmax + +NDArrayBackedExtensionArrayT = TypeVar( + "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" +) + +if TYPE_CHECKING: + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas import Series + + +def ravel_compat(meth: F) -> F: + """ + Decorator to ravel a 2D array before passing it to a cython operation, + then reshape the result to our own shape. + """ + + @wraps(meth) + def method(self, *args, **kwargs): + if self.ndim == 1: + return meth(self, *args, **kwargs) + + flags = self._ndarray.flags + flat = self.ravel("K") + result = meth(flat, *args, **kwargs) + order = "F" if flags.f_contiguous else "C" + return result.reshape(self.shape, order=order) + + return cast(F, method) + + +class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): + """ + ExtensionArray that is backed by a single NumPy ndarray. + """ + + _ndarray: np.ndarray + + # scalar used to denote NA value inside our self._ndarray, e.g. -1 + # for Categorical, iNaT for Period. Outside of object dtype, + # self.isna() should be exactly locations in self._ndarray with + # _internal_fill_value. + _internal_fill_value: Any + + def _box_func(self, x): + """ + Wrap numpy type in our dtype.type if necessary. + """ + return x + + def _validate_scalar(self, value): + # used by NDArrayBackedExtensionIndex.insert + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + # We handle datetime64, datetime64tz, timedelta64, and period + # dtypes here. Everything else we pass through to the underlying + # ndarray. + if dtype is None or dtype is self.dtype: + return self._from_backing_data(self._ndarray) + + if isinstance(dtype, type): + # we sometimes pass non-dtype objects, e.g np.ndarray; + # pass those through to the underlying ndarray + return self._ndarray.view(dtype) + + dtype = pandas_dtype(dtype) + arr = self._ndarray + + if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): + cls = dtype.construct_array_type() + return cls(arr.view("i8"), dtype=dtype) + elif dtype == "M8[ns]": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray(arr.view("i8"), dtype=dtype) + elif dtype == "m8[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(arr.view("i8"), dtype=dtype) + + # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible + # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, + # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, + # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + return arr.view(dtype=dtype) # type: ignore[arg-type] + + def take( + self: NDArrayBackedExtensionArrayT, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: int = 0, + ) -> NDArrayBackedExtensionArrayT: + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) + return self._from_backing_data(new_data) + + # ------------------------------------------------------------------------ + + def equals(self, other) -> bool: + if type(self) is not type(other): + return False + if not is_dtype_equal(self.dtype, other.dtype): + return False + return bool(array_equivalent(self._ndarray, other._ndarray)) + + @classmethod + def _from_factorized(cls, values, original): + assert values.dtype == original._ndarray.dtype + return original._from_backing_data(values) + + def _values_for_argsort(self) -> np.ndarray: + return self._ndarray + + def _values_for_factorize(self): + return self._ndarray, self._internal_fill_value + + # Signature of "argmin" incompatible with supertype "ExtensionArray" + def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmin", axis=axis) + + # Signature of "argmax" incompatible with supertype "ExtensionArray" + def argmax(self, axis: int = 0, skipna: bool = True): # type: ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmax", axis=axis) + + def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: + new_data = unique(self._ndarray) + return self._from_backing_data(new_data) + + @classmethod + @doc(ExtensionArray._concat_same_type) + def _concat_same_type( + cls: type[NDArrayBackedExtensionArrayT], + to_concat: Sequence[NDArrayBackedExtensionArrayT], + axis: int = 0, + ) -> NDArrayBackedExtensionArrayT: + dtypes = {str(x.dtype) for x in to_concat} + if len(dtypes) != 1: + raise ValueError("to_concat must have the same dtype (tz)", dtypes) + + new_values = [x._ndarray for x in to_concat] + new_arr = np.concatenate(new_values, axis=axis) + return to_concat[0]._from_backing_data(new_arr) + + @doc(ExtensionArray.searchsorted) + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + # TODO(2.0): use _validate_setitem_value once dt64tz mismatched-timezone + # deprecation is enforced + npvalue = self._validate_searchsorted_value(value) + return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) + + def _validate_searchsorted_value( + self, value: NumpyValueArrayLike | ExtensionArray + ) -> NumpyValueArrayLike: + # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, + # we can remove this and use _validate_setitem_value directly + if isinstance(value, ExtensionArray): + return value.to_numpy() + else: + return value + + @doc(ExtensionArray.shift) + def shift(self, periods=1, fill_value=None, axis=0): + + fill_value = self._validate_shift_value(fill_value) + new_values = shift(self._ndarray, periods, axis, fill_value) + + return self._from_backing_data(new_values) + + def _validate_shift_value(self, fill_value): + # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, + # we can remove this and use validate_fill_value directly + return self._validate_scalar(fill_value) + + def __setitem__(self, key, value) -> None: + key = check_array_indexer(self, key) + value = self._validate_setitem_value(value) + self._ndarray[key] = value + + def _validate_setitem_value(self, value): + return value + + @overload + def __getitem__(self, key: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__( + self: NDArrayBackedExtensionArrayT, + key: SequenceIndexer | PositionalIndexerTuple, + ) -> NDArrayBackedExtensionArrayT: + ... + + def __getitem__( + self: NDArrayBackedExtensionArrayT, + key: PositionalIndexer2D, + ) -> NDArrayBackedExtensionArrayT | Any: + if lib.is_integer(key): + # fast-path + result = self._ndarray[key] + if self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # error: Incompatible types in assignment (expression has type "ExtensionArray", + # variable has type "Union[int, slice, ndarray]") + key = extract_array(key, extract_numpy=True) # type: ignore[assignment] + key = check_array_indexer(self, key) + result = self._ndarray[key] + if lib.is_scalar(result): + return self._box_func(result) + + result = self._from_backing_data(result) + return result + + def _fill_mask_inplace( + self, method: str, limit, mask: npt.NDArray[np.bool_] + ) -> None: + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + func(self._ndarray.T, limit=limit, mask=mask.T) + return + + @doc(ExtensionArray.fillna) + def fillna( + self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None + ) -> NDArrayBackedExtensionArrayT: + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) + + mask = self.isna() + # error: Argument 2 to "check_value_size" has incompatible type + # "ExtensionArray"; expected "ndarray" + value = missing.check_value_size( + value, mask, len(self) # type: ignore[arg-type] + ) + + if mask.any(): + if method is not None: + # TODO: check value is None + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + npvalues = self._ndarray.T.copy() + func(npvalues, limit=limit, mask=mask.T) + npvalues = npvalues.T + + # TODO: PandasArray didn't used to copy, need tests for this + new_values = self._from_backing_data(npvalues) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + # We validate the fill_value even if there is nothing to fill + if value is not None: + self._validate_setitem_value(value) + + new_values = self.copy() + return new_values + + # ------------------------------------------------------------------------ + # Reductions + + def _wrap_reduction_result(self, axis: int | None, result): + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # ------------------------------------------------------------------------ + # __array_function__ methods + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + """ + Analogue to np.putmask(self, mask, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Raises + ------ + TypeError + If value cannot be cast to self.dtype. + """ + value = self._validate_setitem_value(value) + + np.putmask(self._ndarray, mask, value) + + def _where( + self: NDArrayBackedExtensionArrayT, mask: npt.NDArray[np.bool_], value + ) -> NDArrayBackedExtensionArrayT: + """ + Analogue to np.where(mask, self, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Raises + ------ + TypeError + If value cannot be cast to self.dtype. + """ + value = self._validate_setitem_value(value) + + res_values = np.where(mask, self._ndarray, value) + return self._from_backing_data(res_values) + + # ------------------------------------------------------------------------ + # Index compat methods + + def insert( + self: NDArrayBackedExtensionArrayT, loc: int, item + ) -> NDArrayBackedExtensionArrayT: + """ + Make new ExtensionArray inserting new item at location. Follows + Python list.append semantics for negative values. + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + type(self) + """ + loc = validate_insert_loc(loc, len(self)) + + code = self._validate_scalar(item) + + new_vals = np.concatenate( + ( + self._ndarray[:loc], + np.asarray([code], dtype=self._ndarray.dtype), + self._ndarray[loc:], + ) + ) + return self._from_backing_data(new_vals) + + # ------------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas assumes they're there. + + def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NA values. + + Returns + ------- + Series + """ + if self.ndim != 1: + raise NotImplementedError + + from pandas import ( + Index, + Series, + ) + + if dropna: + # error: Unsupported operand type for ~ ("ExtensionArray") + values = self[~self.isna()]._ndarray # type: ignore[operator] + else: + values = self._ndarray + + result = value_counts(values, sort=False, dropna=dropna) + + index_arr = self._from_backing_data(np.asarray(result.index._data)) + index = Index(index_arr, name=result.index.name) + return Series(result._values, index=index, name=result.name) + + def _quantile( + self: NDArrayBackedExtensionArrayT, + qs: npt.NDArray[np.float64], + interpolation: str, + ) -> NDArrayBackedExtensionArrayT: + # TODO: disable for Categorical if not ordered? + + mask = np.asarray(self.isna()) + arr = self._ndarray + fill_value = self._internal_fill_value + + res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) + + res_values = self._cast_quantile_result(res_values) + return self._from_backing_data(res_values) + + # TODO: see if we can share this with other dispatch-wrapping methods + def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: + """ + Cast the result of quantile_with_mask to an appropriate dtype + to pass to _from_backing_data in _quantile. + """ + return res_values + + # ------------------------------------------------------------------------ + # numpy-like methods + + @classmethod + def _empty( + cls: type_t[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype + ) -> NDArrayBackedExtensionArrayT: + """ + Analogous to np.empty(shape, dtype=dtype) + + Parameters + ---------- + shape : tuple[int] + dtype : ExtensionDtype + """ + # The base implementation uses a naive approach to find the dtype + # for the backing ndarray + arr = cls._from_sequence([], dtype=dtype) + backing = np.empty(shape, dtype=arr._ndarray.dtype) + return arr._from_backing_data(backing) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py new file mode 100644 index 00000000..3bef3e59 --- /dev/null +++ b/pandas/core/arrays/_ranges.py @@ -0,0 +1,194 @@ +""" +Helper functions to generate range-like data for DatetimeArray +(and possibly TimedeltaArray/PeriodArray) +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs.lib import i8max +from pandas._libs.tslibs import ( + BaseOffset, + OutOfBoundsDatetime, + Timedelta, + Timestamp, + iNaT, +) +from pandas._typing import npt + + +def generate_regular_range( + start: Timestamp | Timedelta | None, + end: Timestamp | Timedelta | None, + periods: int | None, + freq: BaseOffset, +) -> npt.NDArray[np.intp]: + """ + Generate a range of dates or timestamps with the spans between dates + described by the given `freq` DateOffset. + + Parameters + ---------- + start : Timedelta, Timestamp or None + First point of produced date range. + end : Timedelta, Timestamp or None + Last point of produced date range. + periods : int or None + Number of periods in produced date range. + freq : Tick + Describes space between dates in produced date range. + + Returns + ------- + ndarray[np.int64] Representing nanoseconds. + """ + istart = start.value if start is not None else None + iend = end.value if end is not None else None + stride = freq.nanos + + if periods is None and istart is not None and iend is not None: + b = istart + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = b + (iend - b) // stride * stride + stride // 2 + 1 + elif istart is not None and periods is not None: + b = istart + e = _generate_range_overflow_safe(b, periods, stride, side="start") + elif iend is not None and periods is not None: + e = iend + stride + b = _generate_range_overflow_safe(e, periods, stride, side="end") + else: + raise ValueError( + "at least 'start' or 'end' should be specified if a 'period' is given." + ) + + with np.errstate(over="raise"): + # If the range is sufficiently large, np.arange may overflow + # and incorrectly return an empty array if not caught. + try: + values = np.arange(b, e, stride, dtype=np.int64) + except FloatingPointError: + xdr = [b] + while xdr[-1] != e: + xdr.append(xdr[-1] + stride) + values = np.array(xdr[:-1], dtype=np.int64) + return values + + +def _generate_range_overflow_safe( + endpoint: int, periods: int, stride: int, side: str = "start" +) -> int: + """ + Calculate the second endpoint for passing to np.arange, checking + to avoid an integer overflow. Catch OverflowError and re-raise + as OutOfBoundsDatetime. + + Parameters + ---------- + endpoint : int + nanosecond timestamp of the known endpoint of the desired range + periods : int + number of periods in the desired range + stride : int + nanoseconds between periods in the desired range + side : {'start', 'end'} + which end of the range `endpoint` refers to + + Returns + ------- + other_end : int + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#14187 raise instead of incorrectly wrapping around + assert side in ["start", "end"] + + i64max = np.uint64(i8max) + msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" + + with np.errstate(over="raise"): + # if periods * strides cannot be multiplied within the *uint64* bounds, + # we cannot salvage the operation by recursing, so raise + try: + addend = np.uint64(periods) * np.uint64(np.abs(stride)) + except FloatingPointError as err: + raise OutOfBoundsDatetime(msg) from err + + if np.abs(addend) <= i64max: + # relatively easy case without casting concerns + return _generate_range_overflow_safe_signed(endpoint, periods, stride, side) + + elif (endpoint > 0 and side == "start" and stride > 0) or ( + endpoint < 0 and side == "end" and stride > 0 + ): + # no chance of not-overflowing + raise OutOfBoundsDatetime(msg) + + elif side == "end" and endpoint > i64max and endpoint - stride <= i64max: + # in _generate_regular_range we added `stride` thereby overflowing + # the bounds. Adjust to fix this. + return _generate_range_overflow_safe( + endpoint - stride, periods - 1, stride, side + ) + + # split into smaller pieces + mid_periods = periods // 2 + remaining = periods - mid_periods + assert 0 < remaining < periods, (remaining, periods, endpoint, stride) + + midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side) + return _generate_range_overflow_safe(midpoint, remaining, stride, side) + + +def _generate_range_overflow_safe_signed( + endpoint: int, periods: int, stride: int, side: str +) -> int: + """ + A special case for _generate_range_overflow_safe where `periods * stride` + can be calculated without overflowing int64 bounds. + """ + assert side in ["start", "end"] + if side == "end": + stride *= -1 + + with np.errstate(over="raise"): + addend = np.int64(periods) * np.int64(stride) + try: + # easy case with no overflows + result = np.int64(endpoint) + addend + if result == iNaT: + # Putting this into a DatetimeArray/TimedeltaArray + # would incorrectly be interpreted as NaT + raise OverflowError + # error: Incompatible return value type (got "signedinteger[_64Bit]", + # expected "int") + return result # type: ignore[return-value] + except (FloatingPointError, OverflowError): + # with endpoint negative and addend positive we risk + # FloatingPointError; with reversed signed we risk OverflowError + pass + + # if stride and endpoint had opposite signs, then endpoint + addend + # should never overflow. so they must have the same signs + assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) + + if stride > 0: + # watch out for very special case in which we just slightly + # exceed implementation bounds, but when passing the result to + # np.arange will get a result slightly within the bounds + + # error: Incompatible types in assignment (expression has type + # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]") + result = np.uint64(endpoint) + np.uint64(addend) # type: ignore[assignment] + i64max = np.uint64(i8max) + assert result > i64max + if result <= i64max + np.uint64(stride): + # error: Incompatible return value type (got "unsignedinteger", expected + # "int") + return result # type: ignore[return-value] + + raise OutOfBoundsDatetime( + f"Cannot generate range with {side}={endpoint} and periods={periods}" + ) diff --git a/pandas/core/arrays/arrow/__init__.py b/pandas/core/arrays/arrow/__init__.py new file mode 100644 index 00000000..e7fa6fae --- /dev/null +++ b/pandas/core/arrays/arrow/__init__.py @@ -0,0 +1,4 @@ +from pandas.core.arrays.arrow.array import ArrowExtensionArray +from pandas.core.arrays.arrow.dtype import ArrowDtype + +__all__ = ["ArrowDtype", "ArrowExtensionArray"] diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py new file mode 100644 index 00000000..6e6ef6a2 --- /dev/null +++ b/pandas/core/arrays/arrow/_arrow_utils.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import warnings + +import numpy as np +import pyarrow + +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level + + +def fallback_performancewarning(version: str | None = None) -> None: + """ + Raise a PerformanceWarning for falling back to ExtensionArray's + non-pyarrow method + """ + msg = "Falling back on a non-pyarrow code path which may decrease performance." + if version is not None: + msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + + +def pyarrow_array_to_numpy_and_mask( + arr, dtype: np.dtype +) -> tuple[np.ndarray, np.ndarray]: + """ + Convert a primitive pyarrow.Array to a numpy array and boolean mask based + on the buffers of the Array. + + At the moment pyarrow.BooleanArray is not supported. + + Parameters + ---------- + arr : pyarrow.Array + dtype : numpy.dtype + + Returns + ------- + (data, mask) + Tuple of two numpy arrays with the raw data (with specified dtype) and + a boolean mask (validity mask, so False means missing) + """ + dtype = np.dtype(dtype) + + buflist = arr.buffers() + # Since Arrow buffers might contain padding and the data might be offset, + # the buffer gets sliced here before handing it to numpy. + # See also https://github.com/pandas-dev/pandas/issues/40896 + offset = arr.offset * dtype.itemsize + length = len(arr) * dtype.itemsize + data_buf = buflist[1][offset : offset + length] + data = np.frombuffer(data_buf, dtype=dtype) + bitmask = buflist[0] + if bitmask is not None: + mask = pyarrow.BooleanArray.from_buffers( + pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset + ) + mask = np.asarray(mask) + else: + mask = np.ones(len(arr), dtype=bool) + return data, mask diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py new file mode 100644 index 00000000..4dfd8942 --- /dev/null +++ b/pandas/core/arrays/arrow/array.py @@ -0,0 +1,1086 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + TypeVar, +) + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + Dtype, + PositionalIndexer, + TakeIndexer, + npt, +) +from pandas.compat import ( + pa_version_under1p01, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, + pa_version_under5p0, + pa_version_under6p0, + pa_version_under7p0, +) +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_integer, + is_integer_dtype, + is_scalar, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.algorithms import resolve_na_sentinel +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays.base import ExtensionArray +from pandas.core.indexers import ( + check_array_indexer, + unpack_tuple_and_ellipses, + validate_indices, +) + +if not pa_version_under1p01: + import pyarrow as pa + import pyarrow.compute as pc + + from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning + from pandas.core.arrays.arrow.dtype import ArrowDtype + + ARROW_CMP_FUNCS = { + "eq": pc.equal, + "ne": pc.not_equal, + "lt": pc.less, + "gt": pc.greater, + "le": pc.less_equal, + "ge": pc.greater_equal, + } + + ARROW_LOGICAL_FUNCS = { + "and": NotImplemented if pa_version_under2p0 else pc.and_kleene, + "rand": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.and_kleene(y, x), + "or": NotImplemented if pa_version_under2p0 else pc.or_kleene, + "ror": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.or_kleene(y, x), + "xor": NotImplemented if pa_version_under2p0 else pc.xor, + "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), + } + + def cast_for_truediv( + arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar + ) -> pa.ChunkedArray: + # Ensure int / int -> float mirroring Python/Numpy behavior + # as pc.divide_checked(int, int) -> int + if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( + pa_object.type + ): + return arrow_array.cast(pa.float64()) + return arrow_array + + def floordiv_compat( + left: pa.ChunkedArray | pa.Array | pa.Scalar, + right: pa.ChunkedArray | pa.Array | pa.Scalar, + ) -> pa.ChunkedArray: + # Ensure int // int -> int mirroring Python/Numpy behavior + # as pc.floor(pc.divide_checked(int, int)) -> float + result = pc.floor(pc.divide_checked(left, right)) + if pa.types.is_integer(left.type) and pa.types.is_integer(right.type): + result = result.cast(left.type) + return result + + ARROW_ARITHMETIC_FUNCS = { + "add": NotImplemented if pa_version_under2p0 else pc.add_checked, + "radd": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.add_checked(y, x), + "sub": NotImplemented if pa_version_under2p0 else pc.subtract_checked, + "rsub": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.subtract_checked(y, x), + "mul": NotImplemented if pa_version_under2p0 else pc.multiply_checked, + "rmul": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.multiply_checked(y, x), + "truediv": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.divide_checked(cast_for_truediv(x, y), y), + "rtruediv": NotImplemented + if pa_version_under2p0 + else lambda x, y: pc.divide_checked(y, cast_for_truediv(x, y)), + "floordiv": NotImplemented + if pa_version_under2p0 + else lambda x, y: floordiv_compat(x, y), + "rfloordiv": NotImplemented + if pa_version_under2p0 + else lambda x, y: floordiv_compat(y, x), + "mod": NotImplemented, + "rmod": NotImplemented, + "divmod": NotImplemented, + "rdivmod": NotImplemented, + "pow": NotImplemented if pa_version_under4p0 else pc.power_checked, + "rpow": NotImplemented + if pa_version_under4p0 + else lambda x, y: pc.power_checked(y, x), + } + +if TYPE_CHECKING: + from pandas import Series + +ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray") + + +def to_pyarrow_type( + dtype: ArrowDtype | pa.DataType | Dtype | None, +) -> pa.DataType | None: + """ + Convert dtype to a pyarrow type instance. + """ + if isinstance(dtype, ArrowDtype): + pa_dtype = dtype.pyarrow_dtype + elif isinstance(dtype, pa.DataType): + pa_dtype = dtype + elif dtype: + # Accepts python types too + pa_dtype = pa.from_numpy_dtype(dtype) + else: + pa_dtype = None + return pa_dtype + + +class ArrowExtensionArray(OpsMixin, ExtensionArray): + """ + Pandas ExtensionArray backed by a PyArrow ChunkedArray. + + .. warning:: + + ArrowExtensionArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : pyarrow.Array or pyarrow.ChunkedArray + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + ArrowExtensionArray + + Notes + ----- + Most methods are implemented using `pyarrow compute functions. `__ + Some methods may either raise an exception or raise a ``PerformanceWarning`` if an + associated compute function is not available based on the installed version of PyArrow. + + Please install the latest version of PyArrow to enable the best functionality and avoid + potential bugs in prior versions of PyArrow. + + Examples + -------- + Create an ArrowExtensionArray with :func:`pandas.array`: + + >>> pd.array([1, 1, None], dtype="int64[pyarrow]") + + [1, 1, ] + Length: 3, dtype: int64[pyarrow] + """ # noqa: E501 (http link too long) + + _data: pa.ChunkedArray + _dtype: ArrowDtype + + def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: + if pa_version_under1p01: + msg = "pyarrow>=1.0.0 is required for PyArrow backed ArrowExtensionArray." + raise ImportError(msg) + if isinstance(values, pa.Array): + self._data = pa.chunked_array([values]) + elif isinstance(values, pa.ChunkedArray): + self._data = values + else: + raise ValueError( + f"Unsupported type '{type(values)}' for ArrowExtensionArray" + ) + self._dtype = ArrowDtype(self._data.type) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + """ + Construct a new ExtensionArray from a sequence of scalars. + """ + pa_dtype = to_pyarrow_type(dtype) + is_cls = isinstance(scalars, cls) + if is_cls or isinstance(scalars, (pa.Array, pa.ChunkedArray)): + if is_cls: + scalars = scalars._data + if pa_dtype: + scalars = scalars.cast(pa_dtype) + return cls(scalars) + else: + return cls( + pa.chunked_array(pa.array(scalars, type=pa_dtype, from_pandas=True)) + ) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy=False + ): + """ + Construct a new ExtensionArray from a sequence of strings. + """ + pa_type = to_pyarrow_type(dtype) + if pa_type is None: + # Let pyarrow try to infer or raise + scalars = strings + elif pa.types.is_timestamp(pa_type): + from pandas.core.tools.datetimes import to_datetime + + scalars = to_datetime(strings, errors="raise") + elif pa.types.is_date(pa_type): + from pandas.core.tools.datetimes import to_datetime + + scalars = to_datetime(strings, errors="raise").date + elif pa.types.is_duration(pa_type): + from pandas.core.tools.timedeltas import to_timedelta + + scalars = to_timedelta(strings, errors="raise") + elif pa.types.is_time(pa_type): + from pandas.core.tools.times import to_time + + # "coerce" to allow "null times" (None) to not raise + scalars = to_time(strings, errors="coerce") + elif pa.types.is_boolean(pa_type): + from pandas.core.arrays import BooleanArray + + scalars = BooleanArray._from_sequence_of_strings(strings).to_numpy() + elif ( + pa.types.is_integer(pa_type) + or pa.types.is_floating(pa_type) + or pa.types.is_decimal(pa_type) + ): + from pandas.core.tools.numeric import to_numeric + + scalars = to_numeric(strings, errors="raise") + else: + raise NotImplementedError( + f"Converting strings to {pa_type} is not implemented." + ) + return cls._from_sequence(scalars, dtype=pa_type, copy=copy) + + def __getitem__(self, item: PositionalIndexer): + """Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + item = check_array_indexer(self, item) + + if isinstance(item, np.ndarray): + if not len(item): + # Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string] + if self._dtype.name == "string" and self._dtype.storage == "pyarrow": + pa_dtype = pa.string() + else: + pa_dtype = self._dtype.pyarrow_dtype + return type(self)(pa.chunked_array([], type=pa_dtype)) + elif is_integer_dtype(item.dtype): + return self.take(item) + elif is_bool_dtype(item.dtype): + return type(self)(self._data.filter(item)) + else: + raise IndexError( + "Only integers, slices and integer or " + "boolean arrays are valid indices." + ) + elif isinstance(item, tuple): + item = unpack_tuple_and_ellipses(item) + + # error: Non-overlapping identity check (left operand type: + # "Union[Union[int, integer[Any]], Union[slice, List[int], + # ndarray[Any, Any]]]", right operand type: "ellipsis") + if item is Ellipsis: # type: ignore[comparison-overlap] + # TODO: should be handled by pyarrow? + item = slice(None) + + if is_scalar(item) and not is_integer(item): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + # We are not an array indexer, so maybe e.g. a slice or integer + # indexer. We dispatch to pyarrow. + value = self._data[item] + if isinstance(value, pa.ChunkedArray): + return type(self)(value) + else: + scalar = value.as_py() + if scalar is None: + return self._dtype.na_value + else: + return scalar + + def __arrow_array__(self, type=None): + """Convert myself to a pyarrow ChunkedArray.""" + return self._data + + def __invert__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + if pa_version_under2p0: + raise NotImplementedError("__invert__ not implement for pyarrow < 2.0") + return type(self)(pc.invert(self._data)) + + def __neg__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + return type(self)(pc.negate_checked(self._data)) + + def __pos__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + return type(self)(self._data) + + def __abs__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + return type(self)(pc.abs_checked(self._data)) + + def _cmp_method(self, other, op): + from pandas.arrays import BooleanArray + + pc_func = ARROW_CMP_FUNCS[op.__name__] + if isinstance(other, ArrowExtensionArray): + result = pc_func(self._data, other._data) + elif isinstance(other, (np.ndarray, list)): + result = pc_func(self._data, other) + elif is_scalar(other): + try: + result = pc_func(self._data, pa.scalar(other)) + except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid): + mask = isna(self) | isna(other) + valid = ~mask + result = np.zeros(len(self), dtype="bool") + result[valid] = op(np.array(self)[valid], other) + return BooleanArray(result, mask) + else: + raise NotImplementedError( + f"{op.__name__} not implemented for {type(other)}" + ) + + if pa_version_under2p0: + result = result.to_pandas().values + else: + result = result.to_numpy() + return BooleanArray._from_sequence(result) + + def _evaluate_op_method(self, other, op, arrow_funcs): + pc_func = arrow_funcs[op.__name__] + if pc_func is NotImplemented: + raise NotImplementedError(f"{op.__name__} not implemented.") + if isinstance(other, ArrowExtensionArray): + result = pc_func(self._data, other._data) + elif isinstance(other, (np.ndarray, list)): + result = pc_func(self._data, pa.array(other, from_pandas=True)) + elif is_scalar(other): + result = pc_func(self._data, pa.scalar(other)) + else: + raise NotImplementedError( + f"{op.__name__} not implemented for {type(other)}" + ) + return type(self)(result) + + def _logical_method(self, other, op): + return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) + + def _arith_method(self, other, op): + return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS) + + def equals(self, other) -> bool: + if not isinstance(other, ArrowExtensionArray): + return False + # I'm told that pyarrow makes __eq__ behave like pandas' equals; + # TODO: is this documented somewhere? + return self._data == other._data + + @property + def dtype(self) -> ArrowDtype: + """ + An instance of 'ExtensionDtype'. + """ + return self._dtype + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + return self._data.nbytes + + def __len__(self) -> int: + """ + Length of this array. + + Returns + ------- + length : int + """ + return len(self._data) + + @property + def _hasna(self) -> bool: + return self._data.null_count > 0 + + def isna(self) -> npt.NDArray[np.bool_]: + """ + Boolean NumPy array indicating if each value is missing. + + This should return a 1-D array the same length as 'self'. + """ + if pa_version_under2p0: + return self._data.is_null().to_pandas().values + else: + return self._data.is_null().to_numpy() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def argsort( + self, + ascending: bool = True, + kind: str = "quicksort", + na_position: str = "last", + *args, + **kwargs, + ) -> np.ndarray: + order = "ascending" if ascending else "descending" + null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None) + if null_placement is None or pa_version_under7p0: + # Although pc.array_sort_indices exists in version 6 + # there's a bug that affects the pa.ChunkedArray backing + # https://issues.apache.org/jira/browse/ARROW-12042 + fallback_performancewarning("7") + return super().argsort( + ascending=ascending, kind=kind, na_position=na_position + ) + + result = pc.array_sort_indices( + self._data, order=order, null_placement=null_placement + ) + if pa_version_under2p0: + np_result = result.to_pandas().values + else: + np_result = result.to_numpy() + return np_result.astype(np.intp, copy=False) + + def _argmin_max(self, skipna: bool, method: str) -> int: + if self._data.length() in (0, self._data.null_count) or ( + self._hasna and not skipna + ): + # For empty or all null, pyarrow returns -1 but pandas expects TypeError + # For skipna=False and data w/ null, pandas expects NotImplementedError + # let ExtensionArray.arg{max|min} raise + return getattr(super(), f"arg{method}")(skipna=skipna) + + if pa_version_under6p0: + raise NotImplementedError( + f"arg{method} only implemented for pyarrow version >= 6.0" + ) + + value = getattr(pc, method)(self._data, skip_nulls=skipna) + return pc.index(self._data, value).as_py() + + def argmin(self, skipna: bool = True) -> int: + return self._argmin_max(skipna, "min") + + def argmax(self, skipna: bool = True) -> int: + return self._argmin_max(skipna, "max") + + def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + """ + Return a shallow copy of the array. + + Underlying ChunkedArray is immutable, so a deep copy is unnecessary. + + Returns + ------- + type(self) + """ + return type(self)(self._data) + + def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + """ + Return ArrowExtensionArray without NA values. + + Returns + ------- + ArrowExtensionArray + """ + if pa_version_under6p0: + fallback_performancewarning(version="6") + return super().dropna() + else: + return type(self)(pc.drop_null(self._data)) + + def isin(self, values) -> npt.NDArray[np.bool_]: + if pa_version_under2p0: + fallback_performancewarning(version="2") + return super().isin(values) + + # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True + # for null values, so we short-circuit to return all False array. + if not len(values): + return np.zeros(len(self), dtype=bool) + + kwargs = {} + if pa_version_under3p0: + # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises + # with unexpected keyword argument in pyarrow 3.0.0+ + kwargs["skip_null"] = True + + result = pc.is_in( + self._data, value_set=pa.array(values, from_pandas=True), **kwargs + ) + # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls + # to False + return np.array(result, dtype=np.bool_) + + def _values_for_factorize(self) -> tuple[np.ndarray, Any]: + """ + Return an array and missing value suitable for factorization. + + Returns + ------- + values : ndarray + na_value : pd.NA + + Notes + ----- + The values returned by this method are also used in + :func:`pandas.util.hash_pandas_object`. + """ + if pa_version_under2p0: + values = self._data.to_pandas().values + else: + values = self._data.to_numpy() + return values, self.dtype.na_value + + @doc(ExtensionArray.factorize) + def factorize( + self, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ) -> tuple[np.ndarray, ExtensionArray]: + resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) + if pa_version_under4p0: + encoded = self._data.dictionary_encode() + else: + null_encoding = "mask" if resolved_na_sentinel is not None else "encode" + encoded = self._data.dictionary_encode(null_encoding=null_encoding) + indices = pa.chunked_array( + [c.indices for c in encoded.chunks], type=encoded.type.index_type + ).to_pandas() + if indices.dtype.kind == "f": + indices[np.isnan(indices)] = ( + resolved_na_sentinel if resolved_na_sentinel is not None else -1 + ) + indices = indices.astype(np.int64, copy=False) + + if encoded.num_chunks: + uniques = type(self)(encoded.chunk(0).dictionary) + if resolved_na_sentinel is None and pa_version_under4p0: + # TODO: share logic with BaseMaskedArray.factorize + # Insert na with the proper code + na_mask = indices.values == -1 + na_index = na_mask.argmax() + if na_mask[na_index]: + na_code = 0 if na_index == 0 else indices[:na_index].max() + 1 + uniques = uniques.insert(na_code, self.dtype.na_value) + indices[indices >= na_code] += 1 + indices[indices == -1] = na_code + else: + uniques = type(self)(pa.array([], type=encoded.type.value_type)) + + return indices.values, uniques + + def reshape(self, *args, **kwargs): + raise NotImplementedError( + f"{type(self)} does not support reshape " + f"as backed by a 1D pyarrow.ChunkedArray." + ) + + def take( + self, + indices: TakeIndexer, + allow_fill: bool = False, + fill_value: Any = None, + ) -> ArrowExtensionArray: + """ + Take elements from an array. + + Parameters + ---------- + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + Returns + ------- + ExtensionArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + + See Also + -------- + numpy.take + api.extensions.take + + Notes + ----- + ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, + ``iloc``, when `indices` is a sequence of values. Additionally, + it's called by :meth:`Series.reindex`, or any other method + that causes realignment, with a `fill_value`. + """ + # TODO: Remove once we got rid of the (indices < 0) check + if not is_array_like(indices): + indices_array = np.asanyarray(indices) + else: + # error: Incompatible types in assignment (expression has type + # "Sequence[int]", variable has type "ndarray") + indices_array = indices # type: ignore[assignment] + + if len(self._data) == 0 and (indices_array >= 0).any(): + raise IndexError("cannot do a non-empty take") + if indices_array.size > 0 and indices_array.max() >= len(self._data): + raise IndexError("out of bounds value in 'indices'.") + + if allow_fill: + fill_mask = indices_array < 0 + if fill_mask.any(): + validate_indices(indices_array, len(self._data)) + # TODO(ARROW-9433): Treat negative indices as NULL + indices_array = pa.array(indices_array, mask=fill_mask) + result = self._data.take(indices_array) + if isna(fill_value): + return type(self)(result) + # TODO: ArrowNotImplementedError: Function fill_null has no + # kernel matching input types (array[string], scalar[string]) + result = type(self)(result) + result[fill_mask] = fill_value + return result + # return type(self)(pc.fill_null(result, pa.scalar(fill_value))) + else: + # Nothing to fill + return type(self)(self._data.take(indices)) + else: # allow_fill=False + # TODO(ARROW-9432): Treat negative indices as indices from the right. + if (indices_array < 0).any(): + # Don't modify in-place + indices_array = np.copy(indices_array) + indices_array[indices_array < 0] += len(self._data) + return type(self)(self._data.take(indices_array)) + + def unique(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: + """ + Compute the ArrowExtensionArray of unique values. + + Returns + ------- + ArrowExtensionArray + """ + if pa_version_under2p0: + fallback_performancewarning(version="2") + return super().unique() + else: + return type(self)(pc.unique(self._data)) + + def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + Index, + Series, + ) + + vc = self._data.value_counts() + + values = vc.field(0) + counts = vc.field(1) + if dropna and self._data.null_count > 0: + mask = values.is_valid() + values = values.filter(mask) + counts = counts.filter(mask) + + # No missing values so we can adhere to the interface and return a numpy array. + counts = np.array(counts) + + index = Index(type(self)(values)) + + return Series(counts, index=index).astype("Int64") + + @classmethod + def _concat_same_type( + cls: type[ArrowExtensionArrayT], to_concat + ) -> ArrowExtensionArrayT: + """ + Concatenate multiple ArrowExtensionArrays. + + Parameters + ---------- + to_concat : sequence of ArrowExtensionArrays + + Returns + ------- + ArrowExtensionArray + """ + chunks = [array for ea in to_concat for array in ea._data.iterchunks()] + arr = pa.chunked_array(chunks) + return cls(arr) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + """ + Return a scalar result of performing the reduction operation. + + Parameters + ---------- + name : str + Name of the function, supported values are: + { any, all, min, max, sum, mean, median, prod, + std, var, sem, kurt, skew }. + skipna : bool, default True + If True, skip NaN values. + **kwargs + Additional keyword arguments passed to the reduction function. + Currently, `ddof` is the only supported kwarg. + + Returns + ------- + scalar + + Raises + ------ + TypeError : subclass does not define reductions + """ + if name == "sem": + + def pyarrow_meth(data, skipna, **kwargs): + numerator = pc.stddev(data, skip_nulls=skipna, **kwargs) + denominator = pc.sqrt_checked( + pc.subtract_checked( + pc.count(self._data, skip_nulls=skipna), kwargs["ddof"] + ) + ) + return pc.divide_checked(numerator, denominator) + + else: + pyarrow_name = { + "median": "approximate_median", + "prod": "product", + "std": "stddev", + "var": "variance", + }.get(name, name) + # error: Incompatible types in assignment + # (expression has type "Optional[Any]", variable has type + # "Callable[[Any, Any, KwArg(Any)], Any]") + pyarrow_meth = getattr(pc, pyarrow_name, None) # type: ignore[assignment] + if pyarrow_meth is None: + # Let ExtensionArray._reduce raise the TypeError + return super()._reduce(name, skipna=skipna, **kwargs) + try: + result = pyarrow_meth(self._data, skip_nulls=skipna, **kwargs) + except (AttributeError, NotImplementedError, TypeError) as err: + msg = ( + f"'{type(self).__name__}' with dtype {self.dtype} " + f"does not support reduction '{name}' with pyarrow " + f"version {pa.__version__}. '{name}' may be supported by " + f"upgrading pyarrow." + ) + raise TypeError(msg) from err + if pc.is_null(result).as_py(): + return self.dtype.na_value + return result.as_py() + + def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + """Set one or more values inplace. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + key = check_array_indexer(self, key) + indices = self._indexing_key_to_indices(key) + value = self._maybe_convert_setitem_value(value) + + argsort = np.argsort(indices) + indices = indices[argsort] + + if is_scalar(value): + value = np.broadcast_to(value, len(self)) + elif len(indices) != len(value): + raise ValueError("Length of indexer and values mismatch") + else: + value = np.asarray(value)[argsort] + + self._data = self._set_via_chunk_iteration(indices=indices, value=value) + + def _indexing_key_to_indices( + self, key: int | slice | np.ndarray + ) -> npt.NDArray[np.intp]: + """ + Convert indexing key for self into positional indices. + + Parameters + ---------- + key : int | slice | np.ndarray + + Returns + ------- + npt.NDArray[np.intp] + """ + n = len(self) + if isinstance(key, slice): + indices = np.arange(n)[key] + elif is_integer(key): + # error: Invalid index type "List[Union[int, ndarray[Any, Any]]]" + # for "ndarray[Any, dtype[signedinteger[Any]]]"; expected type + # "Union[SupportsIndex, _SupportsArray[dtype[Union[bool_, + # integer[Any]]]], _NestedSequence[_SupportsArray[dtype[Union + # [bool_, integer[Any]]]]], _NestedSequence[Union[bool, int]] + # , Tuple[Union[SupportsIndex, _SupportsArray[dtype[Union[bool_ + # , integer[Any]]]], _NestedSequence[_SupportsArray[dtype[Union + # [bool_, integer[Any]]]]], _NestedSequence[Union[bool, int]]], ...]]" + indices = np.arange(n)[[key]] # type: ignore[index] + elif is_bool_dtype(key): + key = np.asarray(key) + if len(key) != n: + raise ValueError("Length of indexer and values mismatch") + indices = key.nonzero()[0] + else: + key = np.asarray(key) + indices = np.arange(n)[key] + return indices + + # TODO: redefine _rank using pc.rank with pyarrow 9.0 + + def _quantile( + self: ArrowExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str + ) -> ArrowExtensionArrayT: + """ + Compute the quantiles of self for each quantile in `qs`. + + Parameters + ---------- + qs : np.ndarray[float64] + interpolation: str + + Returns + ------- + same type as self + """ + if pa_version_under4p0: + raise NotImplementedError( + "quantile only supported for pyarrow version >= 4.0" + ) + result = pc.quantile(self._data, q=qs, interpolation=interpolation) + return type(self)(result) + + def _mode(self: ArrowExtensionArrayT, dropna: bool = True) -> ArrowExtensionArrayT: + """ + Returns the mode(s) of the ExtensionArray. + + Always returns `ExtensionArray` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NA values. + Not implemented by pyarrow. + + Returns + ------- + same type as self + Sorted, if possible. + """ + if pa_version_under6p0: + raise NotImplementedError("mode only supported for pyarrow version >= 6.0") + modes = pc.mode(self._data, pc.count_distinct(self._data).as_py()) + values = modes.field(0) + counts = modes.field(1) + # counts sorted descending i.e counts[0] = max + mask = pc.equal(counts, counts[0]) + most_common = values.filter(mask) + return type(self)(most_common) + + def _maybe_convert_setitem_value(self, value): + """Maybe convert value to be pyarrow compatible.""" + # TODO: Make more robust like ArrowStringArray._maybe_convert_setitem_value + return value + + def _set_via_chunk_iteration( + self, indices: npt.NDArray[np.intp], value: npt.NDArray[Any] + ) -> pa.ChunkedArray: + """ + Loop through the array chunks and set the new values while + leaving the chunking layout unchanged. + + Parameters + ---------- + indices : npt.NDArray[np.intp] + Position indices for the underlying ChunkedArray. + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Notes + ----- + Assumes that indices is sorted. Caller is responsible for sorting. + """ + new_data = [] + stop = 0 + for chunk in self._data.iterchunks(): + start, stop = stop, stop + len(chunk) + if len(indices) == 0 or stop <= indices[0]: + new_data.append(chunk) + else: + n = int(np.searchsorted(indices, stop, side="left")) + c_ind = indices[:n] - start + indices = indices[n:] + n = len(c_ind) + c_value, value = value[:n], value[n:] + new_data.append(self._replace_with_indices(chunk, c_ind, c_value)) + return pa.chunked_array(new_data) + + @classmethod + def _replace_with_indices( + cls, + chunk: pa.Array, + indices: npt.NDArray[np.intp], + value: npt.NDArray[Any], + ) -> pa.Array: + """ + Replace items selected with a set of positional indices. + + Analogous to pyarrow.compute.replace_with_mask, except that replacement + positions are identified via indices rather than a mask. + + Parameters + ---------- + chunk : pa.Array + indices : npt.NDArray[np.intp] + value : npt.NDArray[Any] + Replacement value(s). + + Returns + ------- + pa.Array + """ + n = len(indices) + + if n == 0: + return chunk + + start, stop = indices[[0, -1]] + + if (stop - start) == (n - 1): + # fast path for a contiguous set of indices + arrays = [ + chunk[:start], + pa.array(value, type=chunk.type, from_pandas=True), + chunk[stop + 1 :], + ] + arrays = [arr for arr in arrays if len(arr)] + if len(arrays) == 1: + return arrays[0] + return pa.concat_arrays(arrays) + + mask = np.zeros(len(chunk), dtype=np.bool_) + mask[indices] = True + + if pa_version_under5p0: + arr = chunk.to_numpy(zero_copy_only=False) + arr[mask] = value + return pa.array(arr, type=chunk.type) + + if isna(value).all(): + return pc.if_else(mask, None, chunk) + + return pc.replace_with_mask(chunk, mask, value) diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py new file mode 100644 index 00000000..48e2c5bd --- /dev/null +++ b/pandas/core/arrays/arrow/dtype.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import re + +import numpy as np + +from pandas._typing import DtypeObj +from pandas.compat import pa_version_under1p01 +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.base import ( + StorageExtensionDtype, + register_extension_dtype, +) + +if not pa_version_under1p01: + import pyarrow as pa + + +@register_extension_dtype +class ArrowDtype(StorageExtensionDtype): + """ + An ExtensionDtype for PyArrow data types. + + .. warning:: + + ArrowDtype is considered experimental. The implementation and + parts of the API may change without warning. + + While most ``dtype`` arguments can accept the "string" + constructor, e.g. ``"int64[pyarrow]"``, ArrowDtype is useful + if the data type contains parameters like ``pyarrow.timestamp``. + + Parameters + ---------- + pyarrow_dtype : pa.DataType + An instance of a `pyarrow.DataType `__. + + Attributes + ---------- + pyarrow_dtype + + Methods + ------- + None + + Returns + ------- + ArrowDtype + + Examples + -------- + >>> import pyarrow as pa + >>> pd.ArrowDtype(pa.int64()) + int64[pyarrow] + + Types with parameters must be constructed with ArrowDtype. + + >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York")) + timestamp[s, tz=America/New_York][pyarrow] + >>> pd.ArrowDtype(pa.list_(pa.int64())) + list[pyarrow] + """ # noqa: E501 + + _metadata = ("storage", "pyarrow_dtype") # type: ignore[assignment] + + def __init__(self, pyarrow_dtype: pa.DataType) -> None: + super().__init__("pyarrow") + if pa_version_under1p01: + raise ImportError("pyarrow>=1.0.1 is required for ArrowDtype") + if not isinstance(pyarrow_dtype, pa.DataType): + raise ValueError( + f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " + f"of a pyarrow.DataType. Got {type(pyarrow_dtype)} instead." + ) + self.pyarrow_dtype = pyarrow_dtype + + def __repr__(self) -> str: + return self.name + + @property + def type(self): + """ + Returns pyarrow.DataType. + """ + return type(self.pyarrow_dtype) + + @property + def name(self) -> str: # type: ignore[override] + """ + A string identifying the data type. + """ + return f"{str(self.pyarrow_dtype)}[{self.storage}]" + + @cache_readonly + def numpy_dtype(self) -> np.dtype: + """Return an instance of the related numpy dtype""" + try: + return np.dtype(self.pyarrow_dtype.to_pandas_dtype()) + except (NotImplementedError, TypeError): + return np.dtype(object) + + @cache_readonly + def kind(self) -> str: + return self.numpy_dtype.kind + + @cache_readonly + def itemsize(self) -> int: + """Return the number of bytes in this dtype""" + return self.numpy_dtype.itemsize + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.arrow import ArrowExtensionArray + + return ArrowExtensionArray + + @classmethod + def construct_from_string(cls, string: str) -> ArrowDtype: + """ + Construct this type from a string. + + Parameters + ---------- + string : str + string should follow the format f"{pyarrow_type}[pyarrow]" + e.g. int64[pyarrow] + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if not string.endswith("[pyarrow]"): + raise TypeError(f"'{string}' must end with '[pyarrow]'") + if string == "string[pyarrow]": + # Ensure Registry.find skips ArrowDtype to use StringDtype instead + raise TypeError("string[pyarrow] should be constructed by StringDtype") + base_type = string.split("[pyarrow]")[0] + try: + pa_dtype = pa.type_for_alias(base_type) + except ValueError as err: + has_parameters = re.search(r"\[.*\]", base_type) + if has_parameters: + raise NotImplementedError( + "Passing pyarrow type specific parameters " + f"({has_parameters.group()}) in the string is not supported. " + "Please construct an ArrowDtype object with a pyarrow_dtype " + "instance with specific parameters." + ) from err + raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err + return cls(pa_dtype) + + @property + def _is_numeric(self) -> bool: + """ + Whether columns with this dtype should be considered numeric. + """ + # TODO: pa.types.is_boolean? + return ( + pa.types.is_integer(self.pyarrow_dtype) + or pa.types.is_floating(self.pyarrow_dtype) + or pa.types.is_decimal(self.pyarrow_dtype) + ) + + @property + def _is_boolean(self) -> bool: + """ + Whether this dtype should be considered boolean. + """ + return pa.types.is_boolean(self.pyarrow_dtype) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # We unwrap any masked dtypes, find the common dtype we would use + # for that, then re-mask the result. + # Mirrors BaseMaskedDtype + from pandas.core.dtypes.cast import find_common_type + + new_dtype = find_common_type( + [ + dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype + for dtype in dtypes + ] + ) + if not isinstance(new_dtype, np.dtype): + return None + try: + pa_dtype = pa.from_numpy_dtype(new_dtype) + return type(self)(pa_dtype) + except NotImplementedError: + return None + + def __from_arrow__(self, array: pa.Array | pa.ChunkedArray): + """ + Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. + """ + array_class = self.construct_array_type() + return array_class(array) diff --git a/pandas/core/arrays/arrow/extension_types.py b/pandas/core/arrays/arrow/extension_types.py new file mode 100644 index 00000000..c9badb2b --- /dev/null +++ b/pandas/core/arrays/arrow/extension_types.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import json + +import pyarrow + +from pandas._typing import IntervalClosedType + +from pandas.core.arrays.interval import VALID_CLOSED + + +class ArrowPeriodType(pyarrow.ExtensionType): + def __init__(self, freq) -> None: + # attributes need to be set first before calling + # super init (as that calls serialize) + self._freq = freq + pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period") + + @property + def freq(self): + return self._freq + + def __arrow_ext_serialize__(self) -> bytes: + metadata = {"freq": self.freq} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized) -> ArrowPeriodType: + metadata = json.loads(serialized.decode()) + return ArrowPeriodType(metadata["freq"]) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return type(self) == type(other) and self.freq == other.freq + else: + return NotImplemented + + def __hash__(self) -> int: + return hash((str(self), self.freq)) + + def to_pandas_dtype(self): + import pandas as pd + + return pd.PeriodDtype(freq=self.freq) + + +# register the type with a dummy instance +_period_type = ArrowPeriodType("D") +pyarrow.register_extension_type(_period_type) + + +class ArrowIntervalType(pyarrow.ExtensionType): + def __init__(self, subtype, closed: IntervalClosedType) -> None: + # attributes need to be set first before calling + # super init (as that calls serialize) + assert closed in VALID_CLOSED + self._closed: IntervalClosedType = closed + if not isinstance(subtype, pyarrow.DataType): + subtype = pyarrow.type_for_alias(str(subtype)) + self._subtype = subtype + + storage_type = pyarrow.struct([("left", subtype), ("right", subtype)]) + pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval") + + @property + def subtype(self): + return self._subtype + + @property + def closed(self) -> IntervalClosedType: + return self._closed + + def __arrow_ext_serialize__(self) -> bytes: + metadata = {"subtype": str(self.subtype), "closed": self.closed} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized) -> ArrowIntervalType: + metadata = json.loads(serialized.decode()) + subtype = pyarrow.type_for_alias(metadata["subtype"]) + closed = metadata["closed"] + return ArrowIntervalType(subtype, closed) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return ( + type(self) == type(other) + and self.subtype == other.subtype + and self.closed == other.closed + ) + else: + return NotImplemented + + def __hash__(self) -> int: + return hash((str(self), str(self.subtype), self.closed)) + + def to_pandas_dtype(self): + import pandas as pd + + return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed) + + +# register the type with a dummy instance +_interval_type = ArrowIntervalType(pyarrow.int64(), "left") +pyarrow.register_extension_type(_interval_type) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py new file mode 100644 index 00000000..be44c7e4 --- /dev/null +++ b/pandas/core/arrays/base.py @@ -0,0 +1,1863 @@ +""" +An interface for extending pandas with custom arrays. + +.. warning:: + + This is an experimental API and subject to breaking changes + without warning. +""" +from __future__ import annotations + +import inspect +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Iterator, + Literal, + Sequence, + TypeVar, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + FillnaOptions, + PositionalIndexer, + ScalarIndexer, + SequenceIndexer, + Shape, + TakeIndexer, + npt, +) +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_insert_loc, +) + +from pandas.core.dtypes.cast import maybe_cast_to_extension_array +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_list_like, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + arraylike, + missing, + roperator, +) +from pandas.core.algorithms import ( + factorize_array, + isin, + mode, + rank, + resolve_na_sentinel, + unique, +) +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.sorting import ( + nargminmax, + nargsort, +) + +if TYPE_CHECKING: + + class ExtensionArraySupportsAnyAll("ExtensionArray"): + def any(self, *, skipna: bool = True) -> bool: + pass + + def all(self, *, skipna: bool = True) -> bool: + pass + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + +_extension_array_shared_docs: dict[str, str] = {} + +ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") + + +class ExtensionArray: + """ + Abstract base class for custom 1-D array types. + + pandas will recognize instances of this class as proper arrays + with a custom type and will not attempt to coerce them to objects. They + may be stored directly inside a :class:`DataFrame` or :class:`Series`. + + Attributes + ---------- + dtype + nbytes + ndim + shape + + Methods + ------- + argsort + astype + copy + dropna + factorize + fillna + equals + insert + isin + isna + ravel + repeat + searchsorted + shift + take + tolist + unique + view + _concat_same_type + _formatter + _from_factorized + _from_sequence + _from_sequence_of_strings + _reduce + _values_for_argsort + _values_for_factorize + + Notes + ----- + The interface includes the following abstract methods that must be + implemented by subclasses: + + * _from_sequence + * _from_factorized + * __getitem__ + * __len__ + * __eq__ + * dtype + * nbytes + * isna + * take + * copy + * _concat_same_type + + A default repr displaying the type, (truncated) data, length, + and dtype is provided. It can be customized or replaced by + by overriding: + + * __repr__ : A default repr for the ExtensionArray. + * _formatter : Print scalars inside a Series or DataFrame. + + Some methods require casting the ExtensionArray to an ndarray of Python + objects with ``self.astype(object)``, which may be expensive. When + performance is a concern, we highly recommend overriding the following + methods: + + * fillna + * dropna + * unique + * factorize / _values_for_factorize + * argsort, argmax, argmin / _values_for_argsort + * searchsorted + + The remaining methods implemented on this class should be performant, + as they only compose abstract methods. Still, a more efficient + implementation may be available, and these methods can be overridden. + + One can implement methods to handle array reductions. + + * _reduce + + One can implement methods to handle parsing from strings that will be used + in methods such as ``pandas.io.parsers.read_csv``. + + * _from_sequence_of_strings + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + + ExtensionArrays are limited to 1 dimension. + + They may be backed by none, one, or many NumPy arrays. For example, + ``pandas.Categorical`` is an extension array backed by two arrays, + one for codes and one for categories. An array of IPv6 address may + be backed by a NumPy structured array with two fields, one for the + lower 64 bits and one for the upper 64 bits. Or they may be backed + by some other storage type, like Python lists. Pandas makes no + assumptions on how the data are stored, just that it can be converted + to a NumPy array. + The ExtensionArray interface does not impose any rules on how this data + is stored. However, currently, the backing data cannot be stored in + attributes called ``.values`` or ``._values`` to ensure full compatibility + with pandas internals. But other names as ``.data``, ``._data``, + ``._items``, ... can be freely used. + + If implementing NumPy's ``__array_ufunc__`` interface, pandas expects + that + + 1. You defer by returning ``NotImplemented`` when any Series are present + in `inputs`. Pandas will extract the arrays and call the ufunc again. + 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. + Pandas inspect this to determine whether the ufunc is valid for the + types present. + + See :ref:`extending.extension.ufunc` for more. + + By default, ExtensionArrays are not hashable. Immutable subclasses may + override this behavior. + """ + + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. + # Don't override this. + _typ = "extension" + + # ------------------------------------------------------------------------ + # Constructors + # ------------------------------------------------------------------------ + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + """ + Construct a new ExtensionArray from a sequence of scalars. + + Parameters + ---------- + scalars : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type`` or be converted into this type in this method. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy=False + ): + """ + Construct a new ExtensionArray from a sequence of strings. + + Parameters + ---------- + strings : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type``. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_factorized(cls, values, original): + """ + Reconstruct an ExtensionArray after factorization. + + Parameters + ---------- + values : ndarray + An integer ndarray with the factorized values. + original : ExtensionArray + The original ExtensionArray that factorize was called on. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + ExtensionArray.factorize : Encode the extension array as an enumerated type. + """ + raise AbstractMethodError(cls) + + # ------------------------------------------------------------------------ + # Must be a Sequence + # ------------------------------------------------------------------------ + @overload + def __getitem__(self, item: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT: + ... + + def __getitem__( + self: ExtensionArrayT, item: PositionalIndexer + ) -> ExtensionArrayT | Any: + """ + Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + * list[int]: A list of int + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + raise AbstractMethodError(self) + + def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + """ + Set one or more values inplace. + + This method is not required to satisfy the pandas extension array + interface. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + # Some notes to the ExtensionArray implementor who may have ended up + # here. While this method is not required for the interface, if you + # *do* choose to implement __setitem__, then some semantics should be + # observed: + # + # * Setting multiple values : ExtensionArrays should support setting + # multiple values at once, 'key' will be a sequence of integers and + # 'value' will be a same-length sequence. + # + # * Broadcasting : For a sequence 'key' and a scalar 'value', + # each position in 'key' should be set to 'value'. + # + # * Coercion : Most users will expect basic coercion to work. For + # example, a string like '2018-01-01' is coerced to a datetime + # when setting on a datetime64ns array. In general, if the + # __init__ method coerces that value, then so should __setitem__ + # Note, also, that Series/DataFrame.where internally use __setitem__ + # on a copy of the data. + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") + + def __len__(self) -> int: + """ + Length of this array + + Returns + ------- + length : int + """ + raise AbstractMethodError(self) + + def __iter__(self) -> Iterator[Any]: + """ + Iterate over elements of the array. + """ + # This needs to be implemented so that pandas recognizes extension + # arrays as list-like. The default implementation makes successive + # calls to ``__getitem__``, which may be slower than necessary. + for i in range(len(self)): + yield self[i] + + def __contains__(self, item: object) -> bool | np.bool_: + """ + Return for `item in self`. + """ + # GH37867 + # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA] + # would raise a TypeError. The implementation below works around that. + if is_scalar(item) and isna(item): + if not self._can_hold_na: + return False + elif item is self.dtype.na_value or isinstance(item, self.dtype.type): + return self._hasna + else: + return False + else: + # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no + # attribute "any" + return (item == self).any() # type: ignore[union-attr] + + # error: Signature of "__eq__" incompatible with supertype "object" + def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] + """ + Return for `self == other` (element-wise equality). + """ + # Implementer note: this should return a boolean numpy ndarray or + # a boolean ExtensionArray. + # When `other` is one of Series, Index, or DataFrame, this method should + # return NotImplemented (to ensure that those objects are responsible for + # first unpacking the arrays, and then dispatch the operation to the + # underlying arrays) + raise AbstractMethodError(self) + + # error: Signature of "__ne__" incompatible with supertype "object" + def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] + """ + Return for `self != other` (element-wise in-equality). + """ + return ~(self == other) + + def __init_subclass__(cls, **kwargs) -> None: + factorize = getattr(cls, "factorize") + if ( + "use_na_sentinel" not in inspect.signature(factorize).parameters + # TimelikeOps uses old factorize args to ensure we don't break things + and cls.__name__ not in ("TimelikeOps", "DatetimeArray", "TimedeltaArray") + ): + # See GH#46910 for details on the deprecation + name = cls.__name__ + warnings.warn( + f"The `na_sentinel` argument of `{name}.factorize` is deprecated. " + f"In the future, pandas will use the `use_na_sentinel` argument " + f"instead. Add this argument to `{name}.factorize` to be compatible " + f"with future versions of pandas and silence this warning.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy ndarray. + + .. versionadded:: 1.0.0 + + This is similar to :meth:`numpy.asarray`, but may provide additional control + over how the conversion is done. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + Returns + ------- + numpy.ndarray + """ + result = np.asarray(self, dtype=dtype) + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[self.isna()] = na_value + return result + + # ------------------------------------------------------------------------ + # Required attributes + # ------------------------------------------------------------------------ + + @property + def dtype(self) -> ExtensionDtype: + """ + An instance of 'ExtensionDtype'. + """ + raise AbstractMethodError(self) + + @property + def shape(self) -> Shape: + """ + Return a tuple of the array dimensions. + """ + return (len(self),) + + @property + def size(self) -> int: + """ + The number of elements in the array. + """ + # error: Incompatible return value type (got "signedinteger[_64Bit]", + # expected "int") [return-value] + return np.prod(self.shape) # type: ignore[return-value] + + @property + def ndim(self) -> int: + """ + Extension Arrays are only allowed to be 1-dimensional. + """ + return 1 + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + # If this is expensive to compute, return an approximate lower bound + # on the number of bytes needed. + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Additional Methods + # ------------------------------------------------------------------------ + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : np.ndarray or ExtensionArray + An ExtensionArray if dtype is ExtensionDtype, + Otherwise a NumPy ndarray with 'dtype' for its dtype. + """ + + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self.dtype): + if not copy: + return self + else: + return self.copy() + + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + return cls._from_sequence(self, dtype=dtype, copy=copy) + + return np.array(self, dtype=dtype, copy=copy) + + def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: + """ + A 1-D array indicating if each value is missing. + + Returns + ------- + na_values : Union[np.ndarray, ExtensionArray] + In most cases, this should return a NumPy ndarray. For + exceptional cases like ``SparseArray``, where returning + an ndarray would be expensive, an ExtensionArray may be + returned. + + Notes + ----- + If returning an ExtensionArray, then + + * ``na_values._is_boolean`` should be True + * `na_values` should implement :func:`ExtensionArray._reduce` + * ``na_values.any`` and ``na_values.all`` should be implemented + """ + raise AbstractMethodError(self) + + @property + def _hasna(self) -> bool: + # GH#22680 + """ + Equivalent to `self.isna().any()`. + + Some ExtensionArray subclasses may be able to optimize this check. + """ + return bool(self.isna().any()) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + + Notes + ----- + The caller is responsible for *not* modifying these values in-place, so + it is safe for implementors to give views on `self`. + + Functions that use this (e.g. ExtensionArray.argsort) should ignore + entries with missing values in the original array (according to `self.isna()`). + This means that the corresponding entries in the returned array don't need to + be modified to sort correctly. + """ + # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. + return np.array(self) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def argsort( + self, + ascending: bool = True, + kind: str = "quicksort", + na_position: str = "last", + *args, + **kwargs, + ) -> np.ndarray: + """ + Return the indices that would sort this array. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. + *args, **kwargs: + Passed through to :func:`numpy.argsort`. + + Returns + ------- + np.ndarray[np.intp] + Array of indices that sort ``self``. If NaN values are contained, + NaN values are placed at the end. + + See Also + -------- + numpy.argsort : Sorting implementation used internally. + """ + # Implementor note: You have two places to override the behavior of + # argsort. + # 1. _values_for_argsort : construct the values passed to np.argsort + # 2. argsort : total control over sorting. In case of overriding this, + # it is recommended to also override argmax/argmin + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + + values = self._values_for_argsort() + return nargsort( + values, + kind=kind, + ascending=ascending, + na_position=na_position, + mask=np.asarray(self.isna()), + ) + + def argmin(self, skipna: bool = True) -> int: + """ + Return the index of minimum value. + + In case of multiple occurrences of the minimum value, the index + corresponding to the first occurrence is returned. + + Parameters + ---------- + skipna : bool, default True + + Returns + ------- + int + + See Also + -------- + ExtensionArray.argmax + """ + # Implementor note: You have two places to override the behavior of + # argmin. + # 1. _values_for_argsort : construct the values used in nargminmax + # 2. argmin itself : total control over sorting. + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmin") + + def argmax(self, skipna: bool = True) -> int: + """ + Return the index of maximum value. + + In case of multiple occurrences of the maximum value, the index + corresponding to the first occurrence is returned. + + Parameters + ---------- + skipna : bool, default True + + Returns + ------- + int + + See Also + -------- + ExtensionArray.argmin + """ + # Implementor note: You have two places to override the behavior of + # argmax. + # 1. _values_for_argsort : construct the values used in nargminmax + # 2. argmax itself : total control over sorting. + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return nargminmax(self, "argmax") + + def fillna( + self: ExtensionArrayT, + value: object | ArrayLike | None = None, + method: FillnaOptions | None = None, + limit: int | None = None, + ) -> ExtensionArrayT: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like 'value' can be given. It's expected + that the array-like have the same length as 'self'. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap. + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + ExtensionArray + With NA/NaN filled. + """ + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + # error: Argument 2 to "check_value_size" has incompatible type + # "ExtensionArray"; expected "ndarray" + value = missing.check_value_size( + value, mask, len(self) # type: ignore[arg-type] + ) + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method) + npvalues = self.astype(object) + func(npvalues, limit=limit, mask=mask) + new_values = self._from_sequence(npvalues, dtype=self.dtype) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + def dropna(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Return ExtensionArray without NA values. + + Returns + ------- + valid : ExtensionArray + """ + # error: Unsupported operand type for ~ ("ExtensionArray") + return self[~self.isna()] # type: ignore[operator] + + def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: + """ + Shift values by desired number. + + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. + + Returns + ------- + ExtensionArray + Shifted. + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + """ + # Note: this implementation assumes that `self.dtype.na_value` can be + # stored in an instance of your ExtensionArray with `self.dtype`. + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=self.dtype + ) + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def unique(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Compute the ExtensionArray of unique values. + + Returns + ------- + uniques : ExtensionArray + """ + uniques = unique(self.astype(object)) + return self._from_sequence(uniques, dtype=self.dtype) + + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `self` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Assuming that `self` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``self[i-1] < value <= self[i]`` + right ``self[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + value : array-like, list or scalar + Value(s) to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + # Note: the base tests provided by pandas only test the basics. + # We do not test + # 1. Values outside the range of the `data_for_sorting` fixture + # 2. Values between the values in the `data_for_sorting` fixture + # 3. Missing values. + arr = self.astype(object) + if isinstance(value, ExtensionArray): + value = value.astype(object) + return arr.searchsorted(value, side=side, sorter=sorter) + + def equals(self, other: object) -> bool: + """ + Return if another array is equivalent to this array. + + Equivalent means that both arrays have the same shape and dtype, and + all values compare equal. Missing values in the same location are + considered equal (in contrast with normal equality). + + Parameters + ---------- + other : ExtensionArray + Array to compare to this Array. + + Returns + ------- + boolean + Whether the arrays are equivalent. + """ + if type(self) != type(other): + return False + other = cast(ExtensionArray, other) + if not is_dtype_equal(self.dtype, other.dtype): + return False + elif len(self) != len(other): + return False + else: + equal_values = self == other + if isinstance(equal_values, ExtensionArray): + # boolean array with NA -> fill with False + equal_values = equal_values.fillna(False) + # error: Unsupported left operand type for & ("ExtensionArray") + equal_na = self.isna() & other.isna() # type: ignore[operator] + return bool((equal_values | equal_na).all()) + + def isin(self, values) -> npt.NDArray[np.bool_]: + """ + Pointwise comparison for set containment in the given values. + + Roughly equivalent to `np.array([x in values for x in self])` + + Parameters + ---------- + values : Sequence + + Returns + ------- + np.ndarray[bool] + """ + return isin(np.asarray(self), values) + + def _values_for_factorize(self) -> tuple[np.ndarray, Any]: + """ + Return an array and missing value suitable for factorization. + + Returns + ------- + values : ndarray + + An array suitable for factorization. This should maintain order + and be a supported dtype (Float64, Int64, UInt64, String, Object). + By default, the extension array is cast to object dtype. + na_value : object + The value in `values` to consider missing. This will be treated + as NA in the factorization routines, so it will be coded as + `na_sentinel` and not included in `uniques`. By default, + ``np.nan`` is used. + + Notes + ----- + The values returned by this method are also used in + :func:`pandas.util.hash_pandas_object`. + """ + return self.astype(object), np.nan + + def factorize( + self, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ) -> tuple[np.ndarray, ExtensionArray]: + """ + Encode the extension array as an enumerated type. + + Parameters + ---------- + na_sentinel : int, default -1 + Value to use in the `codes` array to indicate missing values. + + .. deprecated:: 1.5.0 + The na_sentinel argument is deprecated and + will be removed in a future version of pandas. Specify use_na_sentinel + as either True or False. + + use_na_sentinel : bool, default True + If True, the sentinel -1 will be used for NaN values. If False, + NaN values will be encoded as non-negative integers and will not drop the + NaN from the uniques of the values. + + .. versionadded:: 1.5.0 + + Returns + ------- + codes : ndarray + An integer NumPy array that's an indexer into the original + ExtensionArray. + uniques : ExtensionArray + An ExtensionArray containing the unique values of `self`. + + .. note:: + + uniques will *not* contain an entry for the NA value of + the ExtensionArray if there are any missing values present + in `self`. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + + Notes + ----- + :meth:`pandas.factorize` offers a `sort` keyword as well. + """ + # Implementer note: There are two ways to override the behavior of + # pandas.factorize + # 1. _values_for_factorize and _from_factorize. + # Specify the values passed to pandas' internal factorization + # routines, and how to convert from those values back to the + # original ExtensionArray. + # 2. ExtensionArray.factorize. + # Complete control over factorization. + resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) + arr, na_value = self._values_for_factorize() + + codes, uniques = factorize_array( + arr, na_sentinel=resolved_na_sentinel, na_value=na_value + ) + + uniques_ea = self._from_factorized(uniques, self) + return codes, uniques_ea + + _extension_array_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_array : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + ExtensionArray.take : Take arbitrary positions. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.repeat(2) + ['a', 'a', 'b', 'b', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.repeat([1, 2, 3]) + ['a', 'b', 'b', 'c', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] + """ + + @Substitution(klass="ExtensionArray") + @Appender(_extension_array_shared_docs["repeat"]) + def repeat( + self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None + ) -> ExtensionArrayT: + nv.validate_repeat((), {"axis": axis}) + ind = np.arange(len(self)).repeat(repeats) + return self.take(ind) + + # ------------------------------------------------------------------------ + # Indexing methods + # ------------------------------------------------------------------------ + + def take( + self: ExtensionArrayT, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + ) -> ExtensionArrayT: + """ + Take elements from an array. + + Parameters + ---------- + indices : sequence of int or one-dimensional np.ndarray of int + Indices to be taken. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + Returns + ------- + ExtensionArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + + See Also + -------- + numpy.take : Take elements from an array along an axis. + api.extensions.take : Take elements from an array. + + Notes + ----- + ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, + ``iloc``, when `indices` is a sequence of values. Additionally, + it's called by :meth:`Series.reindex`, or any other method + that causes realignment, with a `fill_value`. + + Examples + -------- + Here's an example implementation, which relies on casting the + extension array to object dtype. This uses the helper method + :func:`pandas.api.extensions.take`. + + .. code-block:: python + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + # If the ExtensionArray is backed by an ndarray, then + # just pass that here instead of coercing to object. + data = self.astype(object) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + # fill value should always be translated from the scalar + # type for the array, to the physical storage type for + # the data, before passing to take. + + result = take(data, indices, fill_value=fill_value, + allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + """ + # Implementer note: The `fill_value` parameter should be a user-facing + # value, an instance of self.dtype.type. When passed `fill_value=None`, + # the default of `self.dtype.na_value` should be used. + # This may differ from the physical storage type your ExtensionArray + # uses. In this case, your implementation is responsible for casting + # the user-facing type to the storage type, before using + # pandas.api.extensions.take + raise AbstractMethodError(self) + + def copy(self: ExtensionArrayT) -> ExtensionArrayT: + """ + Return a copy of the array. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(self) + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + """ + Return a view on the array. + + Parameters + ---------- + dtype : str, np.dtype, or ExtensionDtype, optional + Default None. + + Returns + ------- + ExtensionArray or np.ndarray + A view on the :class:`ExtensionArray`'s data. + """ + # NB: + # - This must return a *new* object referencing the same data, not self. + # - The only case that *must* be implemented is with dtype=None, + # giving a view with the same dtype as self. + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] + + # ------------------------------------------------------------------------ + # Printing + # ------------------------------------------------------------------------ + + def __repr__(self) -> str: + if self.ndim > 1: + return self._repr_2d() + + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = format_object_summary( + self, self._formatter(), indent_for_name=False + ).rstrip(", \n") + class_name = f"<{type(self).__name__}>\n" + return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + + def _repr_2d(self) -> str: + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + lines = [ + format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( + ", \n" + ) + for x in self + ] + data = ",\n".join(lines) + class_name = f"<{type(self).__name__}>" + return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" + + def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: + """ + Formatting function for scalar values. + + This is used in the default '__repr__'. The returned formatting + function receives instances of your scalar type. + + Parameters + ---------- + boxed : bool, default False + An indicated for whether or not your array is being printed + within a Series, DataFrame, or Index (True), or just by + itself (False). This may be useful if you want scalar values + to appear differently within a Series versus on its own (e.g. + quoted or not). + + Returns + ------- + Callable[[Any], str] + A callable that gets instances of the scalar type and + returns a string. By default, :func:`repr` is used + when ``boxed=False`` and :func:`str` is used when + ``boxed=True``. + """ + if boxed: + return str + return repr + + # ------------------------------------------------------------------------ + # Reshaping + # ------------------------------------------------------------------------ + + def transpose(self, *axes: int) -> ExtensionArray: + """ + Return a transposed view on this array. + + Because ExtensionArrays are always 1D, this is a no-op. It is included + for compatibility with np.ndarray. + """ + return self[:] + + @property + def T(self) -> ExtensionArray: + return self.transpose() + + def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray: + """ + Return a flattened view on this array. + + Parameters + ---------- + order : {None, 'C', 'F', 'A', 'K'}, default 'C' + + Returns + ------- + ExtensionArray + + Notes + ----- + - Because ExtensionArrays are 1D-only, this is a no-op. + - The "order" argument is ignored, is for compatibility with NumPy. + """ + return self + + @classmethod + def _concat_same_type( + cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] + ) -> ExtensionArrayT: + """ + Concatenate multiple array of this dtype. + + Parameters + ---------- + to_concat : sequence of this type + + Returns + ------- + ExtensionArray + """ + # Implementer note: this method will only be called with a sequence of + # ExtensionArrays of this class and with the same dtype as self. This + # should allow "easy" concatenation (no upcasting needed), and result + # in a new ExtensionArray of the same dtype. + # Note: this strict behaviour is only guaranteed starting with pandas 1.1 + raise AbstractMethodError(cls) + + # The _can_hold_na attribute is set to True so that pandas internals + # will use the ExtensionDtype.na_value as the NA value in operations + # such as take(), reindex(), shift(), etc. In addition, those results + # will then be of the ExtensionArray subclass rather than an array + # of objects + @cache_readonly + def _can_hold_na(self) -> bool: + return self.dtype._can_hold_na + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + """ + Return a scalar result of performing the reduction operation. + + Parameters + ---------- + name : str + Name of the function, supported values are: + { any, all, min, max, sum, mean, median, prod, + std, var, sem, kurt, skew }. + skipna : bool, default True + If True, skip NaN values. + **kwargs + Additional keyword arguments passed to the reduction function. + Currently, `ddof` is the only supported kwarg. + + Returns + ------- + scalar + + Raises + ------ + TypeError : subclass does not define reductions + """ + meth = getattr(self, name, None) + if meth is None: + raise TypeError( + f"'{type(self).__name__}' with dtype {self.dtype} " + f"does not support reduction '{name}'" + ) + return meth(skipna=skipna, **kwargs) + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: ClassVar[None] # type: ignore[assignment] + + # ------------------------------------------------------------------------ + # Non-Optimized Default Methods; in the case of the private methods here, + # these are not guaranteed to be stable across pandas versions. + + def tolist(self) -> list: + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + """ + if self.ndim > 1: + return [x.tolist() for x in self] + return list(self) + + def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: + indexer = np.delete(np.arange(len(self)), loc) + return self.take(indexer) + + def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT: + """ + Insert an item at the given position. + + Parameters + ---------- + loc : int + item : scalar-like + + Returns + ------- + same type as self + + Notes + ----- + This method should be both type and dtype-preserving. If the item + cannot be held in an array of this type/dtype, either ValueError or + TypeError should be raised. + + The default implementation relies on _from_sequence to raise on invalid + items. + """ + loc = validate_insert_loc(loc, len(self)) + + item_arr = type(self)._from_sequence([item], dtype=self.dtype) + + return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]]) + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + """ + Analogue to np.putmask(self, mask, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + If listlike, must be arraylike with same length as self. + + Returns + ------- + None + + Notes + ----- + Unlike np.putmask, we do not repeat listlike values with mismatched length. + 'value' should either be a scalar or an arraylike with the same length + as self. + """ + if is_list_like(value): + val = value[mask] + else: + val = value + + self[mask] = val + + def _where( + self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value + ) -> ExtensionArrayT: + """ + Analogue to np.where(mask, self, value) + + Parameters + ---------- + mask : np.ndarray[bool] + value : scalar or listlike + + Returns + ------- + same type as self + """ + result = self.copy() + + if is_list_like(value): + val = value[~mask] + else: + val = value + + result[~mask] = val + return result + + def _fill_mask_inplace( + self, method: str, limit, mask: npt.NDArray[np.bool_] + ) -> None: + """ + Replace values in locations specified by 'mask' using pad or backfill. + + See also + -------- + ExtensionArray.fillna + """ + func = missing.get_fill_func(method) + npvalues = self.astype(object) + # NB: if we don't copy mask here, it may be altered inplace, which + # would mess up the `self[mask] = ...` below. + func(npvalues, limit=limit, mask=mask.copy()) + new_values = self._from_sequence(npvalues, dtype=self.dtype) + self[mask] = new_values[mask] + return + + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + + # TODO: we only have tests that get here with dt64 and td64 + # TODO: all tests that get here use the defaults for all the kwds + return rank( + self, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + + @classmethod + def _empty(cls, shape: Shape, dtype: ExtensionDtype): + """ + Create an ExtensionArray with the given shape and dtype. + + See also + -------- + ExtensionDtype.empty + ExtensionDtype.empty is the 'official' public version of this API. + """ + # Implementer note: while ExtensionDtype.empty is the public way to + # call this method, it is still required to implement this `_empty` + # method as well (it is called internally in pandas) + obj = cls._from_sequence([], dtype=dtype) + + taker = np.broadcast_to(np.intp(-1), shape) + result = obj.take(taker, allow_fill=True) + if not isinstance(result, cls) or dtype != result.dtype: + raise NotImplementedError( + f"Default 'empty' implementation is invalid for dtype='{dtype}'" + ) + return result + + def _quantile( + self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str + ) -> ExtensionArrayT: + """ + Compute the quantiles of self for each quantile in `qs`. + + Parameters + ---------- + qs : np.ndarray[float64] + interpolation: str + + Returns + ------- + same type as self + """ + mask = np.asarray(self.isna()) + arr = np.asarray(self) + fill_value = np.nan + + res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) + return type(self)._from_sequence(res_values) + + def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT: + """ + Returns the mode(s) of the ExtensionArray. + + Always returns `ExtensionArray` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NA values. + + Returns + ------- + same type as self + Sorted, if possible. + """ + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray[Any, Any]]", expected "ExtensionArrayT") + return mode(self, dropna=dropna) # type: ignore[return-value] + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if any( + isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) + + +class ExtensionOpsMixin: + """ + A base class for linking the operators to their dunder names. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _create_arithmetic_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_arithmetic_ops(cls): + setattr(cls, "__add__", cls._create_arithmetic_method(operator.add)) + setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd)) + setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub)) + setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub)) + setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul)) + setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul)) + setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow)) + setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow)) + setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod)) + setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod)) + setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv)) + setattr( + cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv) + ) + setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv)) + setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv)) + setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod)) + setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod)) + + @classmethod + def _create_comparison_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_comparison_ops(cls): + setattr(cls, "__eq__", cls._create_comparison_method(operator.eq)) + setattr(cls, "__ne__", cls._create_comparison_method(operator.ne)) + setattr(cls, "__lt__", cls._create_comparison_method(operator.lt)) + setattr(cls, "__gt__", cls._create_comparison_method(operator.gt)) + setattr(cls, "__le__", cls._create_comparison_method(operator.le)) + setattr(cls, "__ge__", cls._create_comparison_method(operator.ge)) + + @classmethod + def _create_logical_method(cls, op): + raise AbstractMethodError(cls) + + @classmethod + def _add_logical_ops(cls): + setattr(cls, "__and__", cls._create_logical_method(operator.and_)) + setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_)) + setattr(cls, "__or__", cls._create_logical_method(operator.or_)) + setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_)) + setattr(cls, "__xor__", cls._create_logical_method(operator.xor)) + setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) + + +class ExtensionScalarOpsMixin(ExtensionOpsMixin): + """ + A mixin for defining ops on an ExtensionArray. + + It is assumed that the underlying scalar objects have the operators + already defined. + + Notes + ----- + If you have defined a subclass MyExtensionArray(ExtensionArray), then + use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to + get the arithmetic operators. After the definition of MyExtensionArray, + insert the lines + + MyExtensionArray._add_arithmetic_ops() + MyExtensionArray._add_comparison_ops() + + to link the operators to your class. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None): + """ + A class method that returns a method that will correspond to an + operator for an ExtensionArray subclass, by dispatching to the + relevant operator defined on the individual elements of the + ExtensionArray. + + Parameters + ---------- + op : function + An operator that takes arguments op(a, b) + coerce_to_dtype : bool, default True + boolean indicating whether to attempt to convert + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. + + Returns + ------- + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. + + Examples + -------- + Given an ExtensionArray subclass called MyExtensionArray, use + + __add__ = cls._create_method(operator.add) + + in the class definition of MyExtensionArray to create the operator + for addition, that will be based on the operator implementation + of the underlying elements of the ExtensionArray + """ + + def _binop(self, other): + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: # Assume its an object + ovalues = [param] * len(self) + return ovalues + + if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)): + # rely on pandas to unbox and dispatch to us + return NotImplemented + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + def _maybe_convert(arr): + if coerce_to_dtype: + # https://github.com/pandas-dev/pandas/issues/22850 + # We catch all regular exceptions here, and fall back + # to an ndarray. + res = maybe_cast_to_extension_array(type(self), arr) + if not isinstance(res, type(self)): + # exception raised in _from_sequence; ensure we have ndarray + res = np.asarray(arr) + else: + res = np.asarray(arr, dtype=result_dtype) + return res + + if op.__name__ in {"divmod", "rdivmod"}: + a, b = zip(*res) + return _maybe_convert(a), _maybe_convert(b) + + return _maybe_convert(res) + + op_name = f"__{op.__name__}__" + return set_function_name(_binop, op_name, cls) + + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op) + + @classmethod + def _create_comparison_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py new file mode 100644 index 00000000..35b9de3f --- /dev/null +++ b/pandas/core/arrays/boolean.py @@ -0,0 +1,380 @@ +from __future__ import annotations + +import numbers +from typing import ( + TYPE_CHECKING, + cast, +) + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + DtypeObj, + type_t, +) + +from pandas.core.dtypes.common import ( + is_list_like, + is_numeric_dtype, +) +from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.missing import isna + +from pandas.core import ops +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) + +if TYPE_CHECKING: + import pyarrow + + from pandas._typing import npt + + +@register_extension_dtype +class BooleanDtype(BaseMaskedDtype): + """ + Extension dtype for boolean data. + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanDtype is considered experimental. The implementation and + parts of the API may change without warning. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.BooleanDtype() + BooleanDtype + """ + + name = "boolean" + + # https://github.com/python/mypy/issues/4125 + # error: Signature of "type" incompatible with supertype "BaseMaskedDtype" + @property + def type(self) -> type: # type: ignore[override] + return np.bool_ + + @property + def kind(self) -> str: + return "b" + + @property + def numpy_dtype(self) -> np.dtype: + return np.dtype("bool") + + @classmethod + def construct_array_type(cls) -> type_t[BooleanArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return BooleanArray + + def __repr__(self) -> str: + return "BooleanDtype" + + @property + def _is_boolean(self) -> bool: + return True + + @property + def _is_numeric(self) -> bool: + return True + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BooleanArray: + """ + Construct BooleanArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + if array.type != pyarrow.bool_(): + raise TypeError(f"Expected array of boolean type, got {array.type} instead") + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + buflist = arr.buffers() + data = pyarrow.BooleanArray.from_buffers( + arr.type, len(arr), [None, buflist[1]], offset=arr.offset + ).to_numpy(zero_copy_only=False) + if arr.null_count != 0: + mask = pyarrow.BooleanArray.from_buffers( + arr.type, len(arr), [None, buflist[0]], offset=arr.offset + ).to_numpy(zero_copy_only=False) + mask = ~mask + else: + mask = np.zeros(len(arr), dtype=bool) + + bool_arr = BooleanArray(data, mask) + results.append(bool_arr) + + if not results: + return BooleanArray( + np.array([], dtype=np.bool_), np.array([], dtype=np.bool_) + ) + else: + return BooleanArray._concat_same_type(results) + + +def coerce_to_array( + values, mask=None, copy: bool = False +) -> tuple[np.ndarray, np.ndarray]: + """ + Coerce the input values array to numpy arrays with a mask. + + Parameters + ---------- + values : 1D list-like + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + if isinstance(values, BooleanArray): + if mask is not None: + raise ValueError("cannot pass mask for BooleanArray input") + values, mask = values._data, values._mask + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + mask_values = None + if isinstance(values, np.ndarray) and values.dtype == np.bool_: + if copy: + values = values.copy() + elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): + mask_values = isna(values) + + values_bool = np.zeros(len(values), dtype=bool) + values_bool[~mask_values] = values[~mask_values].astype(bool) + + if not np.all( + values_bool[~mask_values].astype(values.dtype) == values[~mask_values] + ): + raise TypeError("Need to pass bool-like values") + + values = values_bool + else: + values_object = np.asarray(values, dtype=object) + + inferred_dtype = lib.infer_dtype(values_object, skipna=True) + integer_like = ("floating", "integer", "mixed-integer-float") + if inferred_dtype not in ("boolean", "empty") + integer_like: + raise TypeError("Need to pass bool-like values") + + # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_] + # within this branch, it assumes it can also be None + mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) + values = np.zeros(len(values), dtype=bool) + values[~mask_values] = values_object[~mask_values].astype(bool) + + # if the values were integer-like, validate it were actually 0/1's + if (inferred_dtype in integer_like) and not ( + np.all( + values[~mask_values].astype(float) + == values_object[~mask_values].astype(float) + ) + ): + raise TypeError("Need to pass bool-like values") + + if mask is None and mask_values is None: + mask = np.zeros(values.shape, dtype=bool) + elif mask is None: + mask = mask_values + else: + if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: + if mask_values is not None: + mask = mask | mask_values + else: + if copy: + mask = mask.copy() + else: + mask = np.array(mask, dtype=bool) + if mask_values is not None: + mask = mask | mask_values + + if values.shape != mask.shape: + raise ValueError("values.shape and mask.shape must match") + + return values, mask + + +class BooleanArray(BaseMaskedArray): + """ + Array of boolean (True/False) data with missing values. + + This is a pandas Extension array for boolean data, under the hood + represented by 2 numpy arrays: a boolean array with the data and + a boolean array with the mask (True indicating missing). + + BooleanArray implements Kleene logic (sometimes called three-value + logic) for logical operations. See :ref:`boolean.kleene` for more. + + To construct an BooleanArray from generic array-like input, use + :func:`pandas.array` specifying ``dtype="boolean"`` (see examples + below). + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : numpy.ndarray + A 1-d boolean-dtype array with the data. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values (True + indicates missing). + copy : bool, default False + Whether to copy the `values` and `mask` arrays. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + BooleanArray + + Examples + -------- + Create an BooleanArray with :func:`pandas.array`: + + >>> pd.array([True, False, None], dtype="boolean") + + [True, False, ] + Length: 3, dtype: boolean + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = False + # Fill values used for any/all + _truthy_value = True + _falsey_value = False + _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} + _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} + + def __init__( + self, values: np.ndarray, mask: np.ndarray, copy: bool = False + ) -> None: + if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): + raise TypeError( + "values should be boolean numpy array. Use " + "the 'pd.array' function instead" + ) + self._dtype = BooleanDtype() + super().__init__(values, mask, copy=copy) + + @property + def dtype(self) -> BooleanDtype: + return self._dtype + + @classmethod + def _from_sequence_of_strings( + cls, + strings: list[str], + *, + dtype: Dtype | None = None, + copy: bool = False, + true_values: list[str] | None = None, + false_values: list[str] | None = None, + ) -> BooleanArray: + true_values_union = cls._TRUE_VALUES.union(true_values or []) + false_values_union = cls._FALSE_VALUES.union(false_values or []) + + def map_string(s): + if isna(s): + return s + elif s in true_values_union: + return True + elif s in false_values_union: + return False + else: + raise ValueError(f"{s} cannot be cast to bool") + + scalars = [map_string(x) for x in strings] + return cls._from_sequence(scalars, dtype=dtype, copy=copy) + + _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) + + @classmethod + def _coerce_to_array( + cls, value, *, dtype: DtypeObj, copy: bool = False + ) -> tuple[np.ndarray, np.ndarray]: + if dtype: + assert dtype == "boolean" + return coerce_to_array(value, copy=copy) + + def _logical_method(self, other, op): + + assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} + other_is_scalar = lib.is_scalar(other) + mask = None + + if isinstance(other, BooleanArray): + other, mask = other._data, other._mask + elif is_list_like(other): + other = np.asarray(other, dtype="bool") + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + other, mask = coerce_to_array(other, copy=False) + elif isinstance(other, np.bool_): + other = other.item() + + if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): + raise TypeError( + "'other' should be pandas.NA or a bool. " + f"Got {type(other).__name__} instead." + ) + + if not other_is_scalar and len(self) != len(other): + raise ValueError("Lengths must match") + + if op.__name__ in {"or_", "ror_"}: + result, mask = ops.kleene_or(self._data, other, self._mask, mask) + elif op.__name__ in {"and_", "rand_"}: + result, mask = ops.kleene_and(self._data, other, self._mask, mask) + else: + # i.e. xor, rxor + result, mask = ops.kleene_xor(self._data, other, self._mask, mask) + + # i.e. BooleanArray + return self._maybe_mask_result(result, mask) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py new file mode 100644 index 00000000..7219573a --- /dev/null +++ b/pandas/core/arrays/categorical.py @@ -0,0 +1,3008 @@ +from __future__ import annotations + +from csv import QUOTE_NONNUMERIC +from functools import partial +import operator +from shutil import get_terminal_size +from typing import ( + TYPE_CHECKING, + Hashable, + Literal, + Sequence, + TypeVar, + Union, + cast, + overload, +) +from warnings import ( + catch_warnings, + simplefilter, + warn, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + algos as libalgos, + lib, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.lib import ( + NoDefault, + no_default, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + NpDtype, + Ordered, + Shape, + npt, + type_t, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + deprecate_kwarg, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_datetime64_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_hashable, + is_integer_dtype, + is_list_like, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core import ( + arraylike, + ops, +) +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +import pandas.core.algorithms as algorithms +from pandas.core.algorithms import ( + factorize, + take_nd, + unique1d, +) +from pandas.core.arrays._mixins import ( + NDArrayBackedExtensionArray, + ravel_compat, +) +from pandas.core.base import ( + ExtensionArray, + NoNewAttributesMixin, + PandasObject, +) +import pandas.core.common as com +from pandas.core.construction import ( + extract_array, + sanitize_array, +) +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.sorting import nargsort +from pandas.core.strings.object_array import ObjectStringArrayMixin + +from pandas.io.formats import console + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + + +CategoricalT = TypeVar("CategoricalT", bound="Categorical") + + +def _cat_compare_op(op): + opname = f"__{op.__name__}__" + fill_value = True if op is operator.ne else False + + @unpack_zerodim_and_defer(opname) + def func(self, other): + hashable = is_hashable(other) + if is_list_like(other) and len(other) != len(self) and not hashable: + # in hashable case we may have a tuple that is itself a category + raise ValueError("Lengths must match.") + + if not self.ordered: + if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: + raise TypeError( + "Unordered Categoricals can only compare equality or not" + ) + if isinstance(other, Categorical): + # Two Categoricals can only be compared if the categories are + # the same (maybe up to ordering, depending on ordered) + + msg = "Categoricals can only be compared if 'categories' are the same." + if not self._categories_match_up_to_permutation(other): + raise TypeError(msg) + + if not self.ordered and not self.categories.equals(other.categories): + # both unordered and different order + other_codes = recode_for_categories( + other.codes, other.categories, self.categories, copy=False + ) + else: + other_codes = other._codes + + ret = op(self._codes, other_codes) + mask = (self._codes == -1) | (other_codes == -1) + if mask.any(): + ret[mask] = fill_value + return ret + + if hashable: + if other in self.categories: + i = self._unbox_scalar(other) + ret = op(self._codes, i) + + if opname not in {"__eq__", "__ge__", "__gt__"}: + # GH#29820 performance trick; get_loc will always give i>=0, + # so in the cases (__ne__, __le__, __lt__) the setting + # here is a no-op, so can be skipped. + mask = self._codes == -1 + ret[mask] = fill_value + return ret + else: + return ops.invalid_comparison(self, other, op) + else: + # allow categorical vs object dtype array comparisons for equality + # these are only positional comparisons + if opname not in ["__eq__", "__ne__"]: + raise TypeError( + f"Cannot compare a Categorical for op {opname} with " + f"type {type(other)}.\nIf you want to compare values, " + "use 'np.asarray(cat) other'." + ) + + if isinstance(other, ExtensionArray) and needs_i8_conversion(other.dtype): + # We would return NotImplemented here, but that messes up + # ExtensionIndex's wrapped methods + return op(other, self) + return getattr(np.array(self), opname)(np.array(other)) + + func.__name__ = opname + + return func + + +def contains(cat, key, container) -> bool: + """ + Helper for membership check for ``key`` in ``cat``. + + This is a helper method for :method:`__contains__` + and :class:`CategoricalIndex.__contains__`. + + Returns True if ``key`` is in ``cat.categories`` and the + location of ``key`` in ``categories`` is in ``container``. + + Parameters + ---------- + cat : :class:`Categorical`or :class:`categoricalIndex` + key : a hashable object + The key to check membership for. + container : Container (e.g. list-like or mapping) + The container to check for membership in. + + Returns + ------- + is_in : bool + True if ``key`` is in ``self.categories`` and location of + ``key`` in ``categories`` is in ``container``, else False. + + Notes + ----- + This method does not check for NaN values. Do that separately + before calling this method. + """ + hash(key) + + # get location of key in categories. + # If a KeyError, the key isn't in categories, so logically + # can't be in container either. + try: + loc = cat.categories.get_loc(key) + except (KeyError, TypeError): + return False + + # loc is the location of key in categories, but also the *value* + # for key in container. So, `key` may be in categories, + # but still not in `container`. Example ('b' in categories, + # but not in values): + # 'b' in Categorical(['a'], categories=['a', 'b']) # False + if is_scalar(loc): + return loc in container + else: + # if categories is an IntervalIndex, loc is an array. + return any(loc_ in container for loc_ in loc) + + +class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): + """ + Represent a categorical variable in classic R / S-plus fashion. + + `Categoricals` can only take on only a limited, and usually fixed, number + of possible values (`categories`). In contrast to statistical categorical + variables, a `Categorical` might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + All values of the `Categorical` are either in `categories` or `np.nan`. + Assigning values outside of `categories` will raise a `ValueError`. Order + is defined by the order of the `categories`, not lexical order of the + values. + + Parameters + ---------- + values : list-like + The values of the categorical. If categories are given, values not in + categories will be replaced with NaN. + categories : Index-like (unique), optional + The unique categories for this categorical. If not given, the + categories are assumed to be the unique values of `values` (sorted, if + possible, otherwise in the order in which they appear). + ordered : bool, default False + Whether or not this categorical is treated as a ordered categorical. + If True, the resulting categorical will be ordered. + An ordered categorical respects, when sorted, the order of its + `categories` attribute (which in turn is the `categories` argument, if + provided). + dtype : CategoricalDtype + An instance of ``CategoricalDtype`` to use for this categorical. + + Attributes + ---------- + categories : Index + The categories of this categorical + codes : ndarray + The codes (integer positions, which point to the categories) of this + categorical, read only. + ordered : bool + Whether or not this Categorical is ordered. + dtype : CategoricalDtype + The instance of ``CategoricalDtype`` storing the ``categories`` + and ``ordered``. + + Methods + ------- + from_codes + __array__ + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + CategoricalDtype : Type for categorical data. + CategoricalIndex : An Index with an underlying ``Categorical``. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + >>> pd.Categorical([1, 2, 3, 1, 2, 3]) + [1, 2, 3, 1, 2, 3] + Categories (3, int64): [1, 2, 3] + + >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Missing values are not included as a category. + + >>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan]) + >>> c + [1, 2, 3, 1, 2, 3, NaN] + Categories (3, int64): [1, 2, 3] + + However, their presence is indicated in the `codes` attribute + by code `-1`. + + >>> c.codes + array([ 0, 1, 2, 0, 1, 2, -1], dtype=int8) + + Ordered `Categoricals` can be sorted according to the custom order + of the categories and can have a min and max value. + + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, + ... categories=['c', 'b', 'a']) + >>> c + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['c' < 'b' < 'a'] + >>> c.min() + 'c' + """ + + # For comparisons, so that numpy uses our implementation if the compare + # ops, which raise + __array_priority__ = 1000 + # tolist is not actually deprecated, just suppressed in the __dir__ + _hidden_attrs = PandasObject._hidden_attrs | frozenset(["tolist"]) + _typ = "categorical" + + _dtype: CategoricalDtype + + def __init__( + self, + values, + categories=None, + ordered=None, + dtype: Dtype | None = None, + fastpath: bool = False, + copy: bool = True, + ) -> None: + + dtype = CategoricalDtype._from_values_or_dtype( + values, categories, ordered, dtype + ) + # At this point, dtype is always a CategoricalDtype, but + # we may have dtype.categories be None, and we need to + # infer categories in a factorization step further below + + if fastpath: + codes = coerce_indexer_dtype(values, dtype.categories) + dtype = CategoricalDtype(ordered=False).update_dtype(dtype) + super().__init__(codes, dtype) + return + + if not is_list_like(values): + # GH#38433 + warn( + "Allowing scalars in the Categorical constructor is deprecated " + "and will raise in a future version. Use `[value]` instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + values = [values] + + # null_mask indicates missing values we want to exclude from inference. + # This means: only missing values in list-likes (not arrays/ndframes). + null_mask = np.array(False) + + # sanitize input + if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) + elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): + values = com.convert_to_list_like(values) + if isinstance(values, list) and len(values) == 0: + # By convention, empty lists result in object dtype: + values = np.array([], dtype=object) + elif isinstance(values, np.ndarray): + if values.ndim > 1: + # preempt sanitize_array from raising ValueError + raise NotImplementedError( + "> 1 ndim Categorical are not supported at this time" + ) + values = sanitize_array(values, None) + else: + # i.e. must be a list + arr = sanitize_array(values, None) + null_mask = isna(arr) + if null_mask.any(): + # We remove null values here, then below will re-insert + # them, grep "full_codes" + arr_list = [values[idx] for idx in np.where(~null_mask)[0]] + + # GH#44900 Do not cast to float if we have only missing values + if arr_list or arr.dtype == "object": + sanitize_dtype = None + else: + sanitize_dtype = arr.dtype + + arr = sanitize_array(arr_list, None, dtype=sanitize_dtype) + values = arr + + if dtype.categories is None: + try: + codes, categories = factorize(values, sort=True) + except TypeError as err: + codes, categories = factorize(values, sort=False) + if dtype.ordered: + # raise, as we don't have a sortable data structure and so + # the user should give us one by specifying categories + raise TypeError( + "'values' is not ordered, please " + "explicitly specify the categories order " + "by passing in a categories argument." + ) from err + + # we're inferring from values + dtype = CategoricalDtype(categories, dtype.ordered) + + elif is_categorical_dtype(values.dtype): + old_codes = extract_array(values)._codes + codes = recode_for_categories( + old_codes, values.dtype.categories, dtype.categories, copy=copy + ) + + else: + codes = _get_codes_for_values(values, dtype.categories) + + if null_mask.any(): + # Reinsert -1 placeholders for previously removed missing values + full_codes = -np.ones(null_mask.shape, dtype=codes.dtype) + full_codes[~null_mask] = codes + codes = full_codes + + dtype = CategoricalDtype(ordered=False).update_dtype(dtype) + arr = coerce_indexer_dtype(codes, dtype.categories) + super().__init__(arr, dtype) + + @property + def dtype(self) -> CategoricalDtype: + """ + The :class:`~pandas.api.types.CategoricalDtype` for this instance. + """ + return self._dtype + + @property + def _internal_fill_value(self) -> int: + # using the specific numpy integer instead of python int to get + # the correct dtype back from _quantile in the all-NA case + dtype = self._ndarray.dtype + return dtype.type(-1) + + @property + def _constructor(self) -> type[Categorical]: + return Categorical + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + return Categorical(scalars, dtype=dtype, copy=copy) + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + """ + Coerce this type to another dtype + + Parameters + ---------- + dtype : numpy dtype or pandas type + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and dtype is categorical, the original + object is returned. + """ + dtype = pandas_dtype(dtype) + if self.dtype is dtype: + result = self.copy() if copy else self + + elif is_categorical_dtype(dtype): + dtype = cast("Union[str, CategoricalDtype]", dtype) + + # GH 10696/18593/18630 + dtype = self.dtype.update_dtype(dtype) + self = self.copy() if copy else self + result = self._set_dtype(dtype) + + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + + elif is_integer_dtype(dtype) and self.isna().any(): + raise ValueError("Cannot convert float NaN to integer") + + elif len(self.codes) == 0 or len(self.categories) == 0: + result = np.array( + self, + dtype=dtype, + copy=copy, + ) + + else: + # GH8628 (PERF): astype category codes instead of astyping array + new_cats = self.categories._values + + try: + new_cats = new_cats.astype(dtype=dtype, copy=copy) + fill_value = self.categories._na_value + if not is_valid_na_for_dtype(fill_value, dtype): + fill_value = lib.item_from_zerodim( + np.array(self.categories._na_value).astype(dtype) + ) + except ( + TypeError, # downstream error msg for CategoricalIndex is misleading + ValueError, + ): + msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}" + raise ValueError(msg) + + result = take_nd( + new_cats, ensure_platform_int(self._codes), fill_value=fill_value + ) + + return result + + def to_list(self): + """ + Alias for tolist. + """ + return self.tolist() + + @classmethod + def _from_inferred_categories( + cls, inferred_categories, inferred_codes, dtype, true_values=None + ): + """ + Construct a Categorical from inferred values. + + For inferred categories (`dtype` is None) the categories are sorted. + For explicit `dtype`, the `inferred_categories` are cast to the + appropriate type. + + Parameters + ---------- + inferred_categories : Index + inferred_codes : Index + dtype : CategoricalDtype or 'category' + true_values : list, optional + If none are provided, the default ones are + "True", "TRUE", and "true." + + Returns + ------- + Categorical + """ + from pandas import ( + Index, + to_datetime, + to_numeric, + to_timedelta, + ) + + cats = Index(inferred_categories) + known_categories = ( + isinstance(dtype, CategoricalDtype) and dtype.categories is not None + ) + + if known_categories: + # Convert to a specialized type with `dtype` if specified. + if dtype.categories.is_numeric(): + cats = to_numeric(inferred_categories, errors="coerce") + elif is_datetime64_dtype(dtype.categories): + cats = to_datetime(inferred_categories, errors="coerce") + elif is_timedelta64_dtype(dtype.categories): + cats = to_timedelta(inferred_categories, errors="coerce") + elif dtype.categories.is_boolean(): + if true_values is None: + true_values = ["True", "TRUE", "true"] + + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Index") + cats = cats.isin(true_values) # type: ignore[assignment] + + if known_categories: + # Recode from observation order to dtype.categories order. + categories = dtype.categories + codes = recode_for_categories(inferred_codes, cats, categories) + elif not cats.is_monotonic_increasing: + # Sort categories and recode for unknown categories. + unsorted = cats.copy() + categories = cats.sort_values() + + codes = recode_for_categories(inferred_codes, unsorted, categories) + dtype = CategoricalDtype(categories, ordered=False) + else: + dtype = CategoricalDtype(cats, ordered=False) + codes = inferred_codes + + return cls(codes, dtype=dtype, fastpath=True) + + @classmethod + def from_codes( + cls, codes, categories=None, ordered=None, dtype: Dtype | None = None + ) -> Categorical: + """ + Make a Categorical type from codes and categories or dtype. + + This constructor is useful if you already have codes and + categories/dtype and so do not need the (computation intensive) + factorization step, which is usually done on the constructor. + + If your data does not follow this convention, please use the normal + constructor. + + Parameters + ---------- + codes : array-like of int + An integer array, where each integer points to a category in + categories or dtype.categories, or else is -1 for NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here, then they must be provided + in `dtype`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + + Returns + ------- + Categorical + + Examples + -------- + >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) + ['a', 'b', 'a', 'b'] + Categories (2, object): ['a' < 'b'] + """ + dtype = CategoricalDtype._from_values_or_dtype( + categories=categories, ordered=ordered, dtype=dtype + ) + if dtype.categories is None: + msg = ( + "The categories must be provided in 'categories' or " + "'dtype'. Both were None." + ) + raise ValueError(msg) + + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") + codes = codes.to_numpy(dtype=np.int64) + else: + codes = np.asarray(codes) + if len(codes) and not is_integer_dtype(codes): + raise ValueError("codes need to be array-like integers") + + if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): + raise ValueError("codes need to be between -1 and len(categories)-1") + + return cls(codes, dtype=dtype, fastpath=True) + + # ------------------------------------------------------------------ + # Categories/Codes/Ordered + + @property + def categories(self) -> Index: + """ + The categories of this categorical. + + Setting assigns new values to each category (effectively a rename of + each individual category). + + The assigned value has to be a list-like object. All items must be + unique and the number of items in the new categories must be the same + as the number of items in the old categories. + + Assigning to `categories` is a inplace operation! + + Raises + ------ + ValueError + If the new categories do not validate as categories or if the + number of new categories is unequal the number of old categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + """ + return self.dtype.categories + + @categories.setter + def categories(self, categories) -> None: + warn( + "Setting categories in-place is deprecated and will raise in a " + "future version. Use rename_categories instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + self._set_categories(categories) + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self.dtype.ordered + + @property + def codes(self) -> np.ndarray: + """ + The category codes of this categorical. + + Codes are an array of integers which are the positions of the actual + values in the categories array. + + There is no setter, use the other categorical methods and the normal item + setter to change values in the categorical. + + Returns + ------- + ndarray[int] + A non-writable view of the `codes` array. + """ + v = self._codes.view() + v.flags.writeable = False + return v + + def _set_categories(self, categories, fastpath=False): + """ + Sets new categories inplace + + Parameters + ---------- + fastpath : bool, default False + Don't perform validation of the categories for uniqueness or nulls + + Examples + -------- + >>> c = pd.Categorical(['a', 'b']) + >>> c + ['a', 'b'] + Categories (2, object): ['a', 'b'] + + >>> c._set_categories(pd.Index(['a', 'c'])) + >>> c + ['a', 'c'] + Categories (2, object): ['a', 'c'] + """ + if fastpath: + new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) + else: + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if ( + not fastpath + and self.dtype.categories is not None + and len(new_dtype.categories) != len(self.dtype.categories) + ): + raise ValueError( + "new categories need to have the same number of " + "items as the old categories!" + ) + + super().__init__(self._ndarray, new_dtype) + + def _set_dtype(self, dtype: CategoricalDtype) -> Categorical: + """ + Internal method for directly updating the CategoricalDtype + + Parameters + ---------- + dtype : CategoricalDtype + + Notes + ----- + We don't do any validation here. It's assumed that the dtype is + a (valid) instance of `CategoricalDtype`. + """ + codes = recode_for_categories(self.codes, self.categories, dtype.categories) + return type(self)(codes, dtype=dtype, fastpath=True) + + @overload + def set_ordered( + self, value, *, inplace: NoDefault | Literal[False] = ... + ) -> Categorical: + ... + + @overload + def set_ordered(self, value, *, inplace: Literal[True]) -> None: + ... + + @overload + def set_ordered(self, value, *, inplace: bool) -> Categorical | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + def set_ordered( + self, value, inplace: bool | NoDefault = no_default + ) -> Categorical | None: + """ + Set the ordered attribute to the boolean value. + + Parameters + ---------- + value : bool + Set whether this categorical is ordered (True) or not (False). + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to the value. + + .. deprecated:: 1.5.0 + + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "set_ordered is deprecated and will be removed in " + "a future version. setting ordered-ness on categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + new_dtype = CategoricalDtype(self.categories, ordered=value) + cat = self if inplace else self.copy() + NDArrayBacked.__init__(cat, cat._ndarray, new_dtype) + if not inplace: + return cat + return None + + @overload + def as_ordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: + ... + + @overload + def as_ordered(self, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def as_ordered(self, inplace: bool | NoDefault = no_default) -> Categorical | None: + """ + Set the Categorical to be ordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to True. + + .. deprecated:: 1.5.0 + + Returns + ------- + Categorical or None + Ordered Categorical or None if ``inplace=True``. + """ + if inplace is not no_default: + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(True, inplace=inplace) + + @overload + def as_unordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: + ... + + @overload + def as_unordered(self, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def as_unordered( + self, inplace: bool | NoDefault = no_default + ) -> Categorical | None: + """ + Set the Categorical to be unordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to False. + + .. deprecated:: 1.5.0 + + Returns + ------- + Categorical or None + Unordered Categorical or None if ``inplace=True``. + """ + if inplace is not no_default: + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(False, inplace=inplace) + + def set_categories( + self, new_categories, ordered=None, rename=False, inplace=no_default + ): + """ + Set the categories to the specified new_categories. + + `new_categories` can include new categories (which will result in + unused categories) or remove old categories (which results in values + set to NaN). If `rename==True`, the categories will simple be renamed + (less or more items than in old categories will result in values set to + NaN or in unused categories respectively). + + This method can be used to perform more than one action of adding, + removing, and reordering simultaneously and is therefore faster than + performing the individual steps via the more specialised methods. + + On the other hand this methods does not do checks (e.g., whether the + old categories are included in the new categories on a reorder), which + can result in surprising changes, for example when using special string + dtypes, which does not considers a S1 string equal to a single char + python string. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, default False + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + rename : bool, default False + Whether or not the new_categories should be considered as a rename + of the old categories or as reordered categories. + inplace : bool, default False + Whether or not to reorder the categories in-place or return a copy + of this categorical with reordered categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + Categorical with reordered categories or None if inplace. + + Raises + ------ + ValueError + If new_categories does not validate as categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "set_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if ordered is None: + ordered = self.dtype.ordered + new_dtype = CategoricalDtype(new_categories, ordered=ordered) + + cat = self if inplace else self.copy() + if rename: + if cat.dtype.categories is not None and len(new_dtype.categories) < len( + cat.dtype.categories + ): + # remove all _codes which are larger and set to -1/NaN + cat._codes[cat._codes >= len(new_dtype.categories)] = -1 + codes = cat._codes + else: + codes = recode_for_categories( + cat.codes, cat.categories, new_dtype.categories + ) + NDArrayBacked.__init__(cat, codes, new_dtype) + + if not inplace: + return cat + + @overload + def rename_categories( + self, new_categories, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def rename_categories(self, new_categories, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "new_categories"] + ) + def rename_categories( + self, new_categories, inplace: bool | NoDefault = no_default + ) -> Categorical | None: + """ + Rename categories. + + Parameters + ---------- + new_categories : list-like, dict-like or callable + + New categories which will replace old categories. + + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. + + * dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. + + * callable : a callable that is called on all items in the old + categories and whose return values comprise the new categories. + + inplace : bool, default False + Whether or not to rename the categories inplace or return a copy of + this categorical with renamed categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories + + See Also + -------- + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + ['A', 'A', 'b'] + Categories (2, object): ['A', 'b'] + + You may also provide a callable to create the new categories + + >>> c.rename_categories(lambda x: x.upper()) + ['A', 'A', 'B'] + Categories (2, object): ['A', 'B'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "rename_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + if is_dict_like(new_categories): + new_categories = [new_categories.get(item, item) for item in cat.categories] + elif callable(new_categories): + new_categories = [new_categories(item) for item in cat.categories] + + cat._set_categories(new_categories) + if not inplace: + return cat + return None + + def reorder_categories(self, new_categories, ordered=None, inplace=no_default): + """ + Reorder categories as specified in new_categories. + + `new_categories` need to include all old categories and no new category + items. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, optional + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + inplace : bool, default False + Whether or not to reorder the categories inplace or return a copy of + this categorical with reordered categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If the new categories do not contain all old category items or any + new ones + + See Also + -------- + rename_categories : Rename categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "reorder_categories is deprecated and will be removed in " + "a future version. Reordering categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if set(self.dtype.categories) != set(new_categories): + raise ValueError( + "items in new_categories are not the same as in old categories" + ) + + with catch_warnings(): + simplefilter("ignore") + return self.set_categories(new_categories, ordered=ordered, inplace=inplace) + + @overload + def add_categories( + self, new_categories, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def add_categories(self, new_categories, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "new_categories"] + ) + def add_categories( + self, new_categories, inplace: bool | NoDefault = no_default + ) -> Categorical | None: + """ + Add new categories. + + `new_categories` will be included at the last/highest place in the + categories and will be unused directly after this call. + + Parameters + ---------- + new_categories : category or list-like of category + The new categories to be included. + inplace : bool, default False + Whether or not to add the categories inplace or return a copy of + this categorical with added categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with new categories added or None if ``inplace=True``. + + Raises + ------ + ValueError + If the new categories include old categories or do not validate as + categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['c', 'b', 'c']) + >>> c + ['c', 'b', 'c'] + Categories (2, object): ['b', 'c'] + + >>> c.add_categories(['d', 'a']) + ['c', 'b', 'c'] + Categories (4, object): ['b', 'c', 'd', 'a'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "add_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(new_categories): + new_categories = [new_categories] + already_included = set(new_categories) & set(self.dtype.categories) + if len(already_included) != 0: + raise ValueError( + f"new categories must not include old categories: {already_included}" + ) + new_categories = list(self.dtype.categories) + list(new_categories) + new_dtype = CategoricalDtype(new_categories, self.ordered) + + cat = self if inplace else self.copy() + codes = coerce_indexer_dtype(cat._ndarray, new_dtype.categories) + NDArrayBacked.__init__(cat, codes, new_dtype) + if not inplace: + return cat + return None + + def remove_categories(self, removals, inplace=no_default): + """ + Remove the specified categories. + + `removals` must be included in the old categories. Values which were in + the removed categories will be set to NaN + + Parameters + ---------- + removals : category or list of categories + The categories which should be removed. + inplace : bool, default False + Whether or not to remove the categories inplace or return a copy of + this categorical with removed categories. + + .. deprecated:: 1.3.0 + + Returns + ------- + cat : Categorical or None + Categorical with removed categories or None if ``inplace=True``. + + Raises + ------ + ValueError + If the removals are not contained in the categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c + ['a', 'c', 'b', 'c', 'd'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c.remove_categories(['d', 'a']) + [NaN, 'c', 'b', 'c', NaN] + Categories (2, object): ['b', 'c'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "remove_categories is deprecated and will be removed in " + "a future version. Removing unused categories will always " + "return a new Categorical object.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(removals): + removals = [removals] + + removal_set = set(removals) + not_included = removal_set - set(self.dtype.categories) + new_categories = [c for c in self.dtype.categories if c not in removal_set] + + # GH 10156 + if any(isna(removals)): + not_included = {x for x in not_included if notna(x)} + new_categories = [x for x in new_categories if notna(x)] + + if len(not_included) != 0: + raise ValueError(f"removals must all be in old categories: {not_included}") + + with catch_warnings(): + simplefilter("ignore") + return self.set_categories( + new_categories, ordered=self.ordered, rename=False, inplace=inplace + ) + + @overload + def remove_unused_categories( + self, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def remove_unused_categories(self, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def remove_unused_categories( + self, inplace: bool | NoDefault = no_default + ) -> Categorical | None: + """ + Remove categories which are not used. + + Parameters + ---------- + inplace : bool, default False + Whether or not to drop unused categories inplace or return a copy of + this categorical with unused categories dropped. + + .. deprecated:: 1.2.0 + + Returns + ------- + cat : Categorical or None + Categorical with unused categories dropped or None if ``inplace=True``. + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + set_categories : Set the categories to the specified ones. + + Examples + -------- + >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c + ['a', 'c', 'b', 'c', 'd'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c[2] = 'a' + >>> c[4] = 'c' + >>> c + ['a', 'c', 'a', 'c', 'c'] + Categories (4, object): ['a', 'b', 'c', 'd'] + + >>> c.remove_unused_categories() + ['a', 'c', 'a', 'c', 'c'] + Categories (2, object): ['a', 'c'] + """ + if inplace is not no_default: + warn( + "The `inplace` parameter in pandas.Categorical." + "remove_unused_categories is deprecated and " + "will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + idx, inv = np.unique(cat._codes, return_inverse=True) + + if idx.size != 0 and idx[0] == -1: # na sentinel + idx, inv = idx[1:], inv - 1 + + new_categories = cat.dtype.categories.take(idx) + new_dtype = CategoricalDtype._from_fastpath( + new_categories, ordered=self.ordered + ) + new_codes = coerce_indexer_dtype(inv, new_dtype.categories) + NDArrayBacked.__init__(cat, new_codes, new_dtype) + if not inplace: + return cat + return None + + # ------------------------------------------------------------------ + + def map(self, mapper): + """ + Map categories using an input mapping or function. + + Maps the categories to new categories. If the mapping correspondence is + one-to-one the result is a :class:`~pandas.Categorical` which has the + same order property as the original, otherwise a :class:`~pandas.Index` + is returned. NaN values are unaffected. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.Categorical or pandas.Index + Mapped categorical. + + See Also + -------- + CategoricalIndex.map : Apply a mapping correspondence on a + :class:`~pandas.CategoricalIndex`. + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> cat.map(lambda x: x.upper()) + ['A', 'B', 'C'] + Categories (3, object): ['A', 'B', 'C'] + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + ['first', 'second', 'third'] + Categories (3, object): ['first', 'second', 'third'] + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) + >>> cat + ['a', 'b', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + [3, 2, 1] + Categories (3, int64): [3 < 2 < 1] + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> cat.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + new_categories = self.categories.map(mapper) + try: + return self.from_codes( + self._codes.copy(), categories=new_categories, ordered=self.ordered + ) + except ValueError: + # NA values are represented in self._codes with -1 + # np.take causes NA values to take final element in new_categories + if np.any(self._codes == -1): + new_categories = new_categories.insert(len(new_categories), np.nan) + return np.take(new_categories, self._codes) + + __eq__ = _cat_compare_op(operator.eq) + __ne__ = _cat_compare_op(operator.ne) + __lt__ = _cat_compare_op(operator.lt) + __gt__ = _cat_compare_op(operator.gt) + __le__ = _cat_compare_op(operator.le) + __ge__ = _cat_compare_op(operator.ge) + + # ------------------------------------------------------------- + # Validators; ideally these can be de-duplicated + + def _validate_setitem_value(self, value): + if not is_hashable(value): + # wrap scalars and hashable-listlikes in list + return self._validate_listlike(value) + else: + return self._validate_scalar(value) + + _validate_searchsorted_value = _validate_setitem_value + + def _validate_scalar(self, fill_value): + """ + Convert a user-facing fill_value to a representation to use with our + underlying ndarray, raising TypeError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : int + + Raises + ------ + TypeError + """ + + if is_valid_na_for_dtype(fill_value, self.categories.dtype): + fill_value = -1 + elif fill_value in self.categories: + fill_value = self._unbox_scalar(fill_value) + else: + raise TypeError( + "Cannot setitem on a Categorical with a new " + f"category ({fill_value}), set the categories first" + ) from None + return fill_value + + # ------------------------------------------------------------- + + @ravel_compat + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + The numpy array interface. + + Returns + ------- + numpy.array + A numpy array of either the specified dtype or, + if dtype==None (default), the same dtype as + categorical.categories.dtype. + """ + ret = take_nd(self.categories._values, self._codes) + if dtype and not is_dtype_equal(dtype, self.categories.dtype): + return np.asarray(ret, dtype) + # When we're a Categorical[ExtensionArray], like Interval, + # we need to ensure __array__ gets all the way to an + # ndarray. + return np.asarray(ret) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_numpy_ufuncs_out + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + # e.g. TestCategoricalAnalytics::test_min_max_ordered + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + # for all other cases, raise for now (similarly as what happens in + # Series.__array_prepare__) + raise TypeError( + f"Object with dtype {self.dtype} cannot perform " + f"the numpy op {ufunc.__name__}" + ) + + def __setstate__(self, state) -> None: + """Necessary for making this object picklable""" + if not isinstance(state, dict): + return super().__setstate__(state) + + if "_dtype" not in state: + state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"]) + + if "_codes" in state and "_ndarray" not in state: + # backward compat, changed what is property vs attribute + state["_ndarray"] = state.pop("_codes") + + super().__setstate__(state) + + @property + def nbytes(self) -> int: + return self._codes.nbytes + self.dtype.categories.values.nbytes + + def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) + + def isna(self) -> np.ndarray: + """ + Detect missing values + + Missing values (-1 in .codes) are detected. + + Returns + ------- + np.ndarray[bool] of whether my values are null + + See Also + -------- + isna : Top-level isna. + isnull : Alias of isna. + Categorical.notna : Boolean inverse of Categorical.isna. + + """ + return self._codes == -1 + + isnull = isna + + def notna(self) -> np.ndarray: + """ + Inverse of isna + + Both missing values (-1 in .codes) and NA as a category are detected as + null. + + Returns + ------- + np.ndarray[bool] of whether my values are not null + + See Also + -------- + notna : Top-level notna. + notnull : Alias of notna. + Categorical.isna : Boolean inverse of Categorical.notna. + + """ + return ~self.isna() + + notnull = notna + + def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of each category. + + Every category will have an entry, even those with a count of 0. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + CategoricalIndex, + Series, + ) + + code, cat = self._codes, self.categories + ncat, mask = (len(cat), code >= 0) + ix, clean = np.arange(ncat), mask.all() + + if dropna or clean: + obs = code if clean else code[mask] + count = np.bincount(obs, minlength=ncat or 0) + else: + count = np.bincount(np.where(mask, code, ncat)) + ix = np.append(ix, -1) + + ix = coerce_indexer_dtype(ix, self.dtype.categories) + ix = self._from_backing_data(ix) + + return Series(count, index=CategoricalIndex(ix), dtype="int64") + + # error: Argument 2 of "_empty" is incompatible with supertype + # "NDArrayBackedExtensionArray"; supertype defines the argument type as + # "ExtensionDtype" + @classmethod + def _empty( # type: ignore[override] + cls: type_t[Categorical], shape: Shape, dtype: CategoricalDtype + ) -> Categorical: + """ + Analogous to np.empty(shape, dtype=dtype) + + Parameters + ---------- + shape : tuple[int] + dtype : CategoricalDtype + """ + arr = cls._from_sequence([], dtype=dtype) + + # We have to use np.zeros instead of np.empty otherwise the resulting + # ndarray may contain codes not supported by this dtype, in which + # case repr(result) could segfault. + backing = np.zeros(shape, dtype=arr._ndarray.dtype) + + return arr._from_backing_data(backing) + + def _internal_get_values(self): + """ + Return the values. + + For internal compatibility with pandas formatting. + + Returns + ------- + np.ndarray or Index + A numpy array of the same dtype as categorical.categories.dtype or + Index if datetime / periods. + """ + # if we are a datetime and period index, return Index to keep metadata + if needs_i8_conversion(self.categories.dtype): + return self.categories.take(self._codes, fill_value=NaT) + elif is_integer_dtype(self.categories) and -1 in self._codes: + return self.categories.astype("object").take(self._codes, fill_value=np.nan) + return np.array(self) + + def check_for_ordered(self, op) -> None: + """assert that we are ordered""" + if not self.ordered: + raise TypeError( + f"Categorical is not ordered for operation {op}\n" + "you can use .as_ordered() to change the " + "Categorical to an ordered one\n" + ) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def argsort(self, ascending=True, kind="quicksort", **kwargs): + """ + Return the indices that would sort the Categorical. + + .. versionchanged:: 0.25.0 + + Changed to sort missing values at the end. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. + **kwargs: + passed through to :func:`numpy.argsort`. + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + numpy.ndarray.argsort + + Notes + ----- + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. + + Examples + -------- + >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() + array([2, 0, 1, 3]) + + >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], + ... categories=['c', 'b', 'a'], + ... ordered=True) + >>> cat.argsort() + array([3, 0, 1, 2]) + + Missing values are placed at the end + + >>> cat = pd.Categorical([2, None, 1]) + >>> cat.argsort() + array([2, 0, 1]) + """ + return super().argsort(ascending=ascending, kind=kind, **kwargs) + + @overload + def sort_values( + self, + *, + inplace: Literal[False] = ..., + ascending: bool = ..., + na_position: str = ..., + ) -> Categorical: + ... + + @overload + def sort_values( + self, *, inplace: Literal[True], ascending: bool = ..., na_position: str = ... + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_values( + self, inplace: bool = False, ascending: bool = True, na_position: str = "last" + ) -> Categorical | None: + """ + Sort the Categorical by category value returning a new + Categorical by default. + + While an ordering is applied to the category values, sorting in this + context refers more to organizing and grouping together based on + matching category values. Thus, this function can be called on an + unordered Categorical instance unlike the functions 'Categorical.min' + and 'Categorical.max'. + + Parameters + ---------- + inplace : bool, default False + Do operation in place. + ascending : bool, default True + Order ascending. Passing False orders descending. The + ordering parameter provides the method by which the + category values are organized. + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + + Returns + ------- + Categorical or None + + See Also + -------- + Categorical.sort + Series.sort_values + + Examples + -------- + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + >>> c + [1, 2, 2, 1, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values() + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, 1, 1] + Categories (3, int64): [1, 2, 5] + + Inplace sorting can be done as well: + + >>> c.sort_values(inplace=True) + >>> c + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + + 'sort_values' behaviour with NaNs. Note that 'na_position' + is independent of the 'ascending' parameter: + + >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) + >>> c + [NaN, 2, 2, NaN, 5] + Categories (2, int64): [2, 5] + >>> c.sort_values() + [2, 2, 5, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(na_position='first') + [NaN, NaN, 2, 2, 5] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False, na_position='first') + [NaN, NaN, 5, 2, 2] + Categories (2, int64): [2, 5] + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {repr(na_position)}") + + sorted_idx = nargsort(self, ascending=ascending, na_position=na_position) + + if not inplace: + codes = self._codes[sorted_idx] + return self._from_backing_data(codes) + self._codes[:] = self._codes[sorted_idx] + return None + + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + vff = self._values_for_rank() + return algorithms.rank( + vff, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy.array + + """ + from pandas import Series + + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype("float64") + values[mask] = np.nan + elif self.categories.is_numeric(): + values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank().values) + ) + return values + + def to_dense(self) -> np.ndarray: + """ + Return my 'dense' representation + + For internal compatibility with numpy arrays. + + Returns + ------- + dense : array + """ + warn( + "Categorical.to_dense is deprecated and will be removed in " + "a future version. Use np.asarray(cat) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return np.asarray(self) + + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat + + @property + def _codes(self) -> np.ndarray: + return self._ndarray + + @_codes.setter + def _codes(self, value: np.ndarray): + warn( + "Setting the codes on a Categorical is deprecated and will raise in " + "a future version. Create a new Categorical object instead", + FutureWarning, + stacklevel=find_stack_level(), + ) # GH#40606 + NDArrayBacked.__init__(self, value, self.dtype) + + def _box_func(self, i: int): + if i == -1: + return np.NaN + return self.categories[i] + + def _unbox_scalar(self, key) -> int: + # searchsorted is very performance sensitive. By converting codes + # to same dtype as self.codes, we get much faster performance. + code = self.categories.get_loc(key) + code = self._ndarray.dtype.type(code) + return code + + # ------------------------------------------------------------------ + + def take_nd( + self, indexer, allow_fill: bool = False, fill_value=None + ) -> Categorical: + # GH#27745 deprecate alias that other EAs dont have + warn( + "Categorical.take_nd is deprecated, use Categorical.take instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) + + def __iter__(self): + """ + Returns an Iterator over the values of this Categorical. + """ + if self.ndim == 1: + return iter(self._internal_get_values().tolist()) + else: + return (self[n] for n in range(len(self))) + + def __contains__(self, key) -> bool: + """ + Returns True if `key` is in this Categorical. + """ + # if key is a NaN, check if any NaN is in self. + if is_valid_na_for_dtype(key, self.categories.dtype): + return bool(self.isna().any()) + + return contains(self, key, container=self._codes) + + # ------------------------------------------------------------------ + # Rendering Methods + + def _formatter(self, boxed: bool = False): + # Defer to CategoricalFormatter's formatter. + return None + + def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str: + """ + a short repr displaying only max_vals and an optional (but default + footer) + """ + num = max_vals // 2 + head = self[:num]._get_repr(length=False, footer=False) + tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False) + + result = f"{head[:-1]}, ..., {tail[1:]}" + if footer: + result = f"{result}\n{self._repr_footer()}" + + return str(result) + + def _repr_categories(self) -> list[str]: + """ + return the base repr for the categories + """ + max_categories = ( + 10 + if get_option("display.max_categories") == 0 + else get_option("display.max_categories") + ) + from pandas.io.formats import format as fmt + + format_array = partial( + fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC + ) + if len(self.categories) > max_categories: + num = max_categories // 2 + head = format_array(self.categories[:num]) + tail = format_array(self.categories[-num:]) + category_strs = head + ["..."] + tail + else: + category_strs = format_array(self.categories) + + # Strip all leading spaces, which format_array adds for columns... + category_strs = [x.strip() for x in category_strs] + return category_strs + + def _repr_categories_info(self) -> str: + """ + Returns a string representation of the footer. + """ + category_strs = self._repr_categories() + dtype = str(self.categories.dtype) + levheader = f"Categories ({len(self.categories)}, {dtype}): " + width, height = get_terminal_size() + max_width = get_option("display.width") or width + if console.in_ipython_frontend(): + # 0 = no breaks + max_width = 0 + levstring = "" + start = True + cur_col_len = len(levheader) # header + sep_len, sep = (3, " < ") if self.ordered else (2, ", ") + linesep = sep.rstrip() + "\n" # remove whitespace + for val in category_strs: + if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: + levstring += linesep + (" " * (len(levheader) + 1)) + cur_col_len = len(levheader) + 1 # header + a whitespace + elif not start: + levstring += sep + cur_col_len += len(val) + levstring += val + start = False + # replace to simple save space by + return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" + + def _repr_footer(self) -> str: + info = self._repr_categories_info() + return f"Length: {len(self)}\n{info}" + + def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: + from pandas.io.formats import format as fmt + + formatter = fmt.CategoricalFormatter( + self, length=length, na_rep=na_rep, footer=footer + ) + result = formatter.to_string() + return str(result) + + def __repr__(self) -> str: + """ + String representation. + """ + _maxlen = 10 + if len(self._codes) > _maxlen: + result = self._tidy_repr(_maxlen) + elif len(self._codes) > 0: + result = self._get_repr(length=len(self) > _maxlen) + else: + msg = self._get_repr(length=False, footer=True).replace("\n", ", ") + result = f"[], {msg}" + + return result + + # ------------------------------------------------------------------ + + def _validate_listlike(self, value): + # NB: here we assume scalar-like tuples have already been excluded + value = extract_array(value, extract_numpy=True) + + # require identical categories set + if isinstance(value, Categorical): + if not is_dtype_equal(self.dtype, value.dtype): + raise TypeError( + "Cannot set a Categorical with another, " + "without identical categories" + ) + # is_dtype_equal implies categories_match_up_to_permutation + value = self._encode_with_my_categories(value) + return value._codes + + from pandas import Index + + # tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914 + to_add = Index._with_infer(value, tupleize_cols=False).difference( + self.categories + ) + + # no assignments of values not in categories, but it's always ok to set + # something to np.nan + if len(to_add) and not isna(to_add).all(): + raise TypeError( + "Cannot setitem on a Categorical with a new " + "category, set the categories first" + ) + + codes = self.categories.get_indexer(value) + return codes.astype(self._ndarray.dtype, copy=False) + + def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: + """ + Compute the inverse of a categorical, returning + a dict of categories -> indexers. + + *This is an internal function* + + Returns + ------- + Dict[Hashable, np.ndarray[np.intp]] + dict of categories -> indexers + + Examples + -------- + >>> c = pd.Categorical(list('aabca')) + >>> c + ['a', 'a', 'b', 'c', 'a'] + Categories (3, object): ['a', 'b', 'c'] + >>> c.categories + Index(['a', 'b', 'c'], dtype='object') + >>> c.codes + array([0, 0, 1, 2, 0], dtype=int8) + >>> c._reverse_indexer() + {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])} + + """ + categories = self.categories + r, counts = libalgos.groupsort_indexer( + ensure_platform_int(self.codes), categories.size + ) + counts = ensure_int64(counts).cumsum() + _result = (r[start:end] for start, end in zip(counts, counts[1:])) + return dict(zip(categories, _result)) + + # ------------------------------------------------------------------ + # Reductions + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, *, skipna=True, **kwargs): + """ + The minimum value of the object. + + Only ordered `Categoricals` have a minimum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + min : the minimum of this `Categorical` + """ + nv.validate_minmax_axis(kwargs.get("axis", 0)) + nv.validate_min((), kwargs) + self.check_for_ordered("min") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].min() + else: + return np.nan + else: + pointer = self._codes.min() + return self._wrap_reduction_result(None, pointer) + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, *, skipna=True, **kwargs): + """ + The maximum value of the object. + + Only ordered `Categoricals` have a maximum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + max : the maximum of this `Categorical` + """ + nv.validate_minmax_axis(kwargs.get("axis", 0)) + nv.validate_max((), kwargs) + self.check_for_ordered("max") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].max() + else: + return np.nan + else: + pointer = self._codes.max() + return self._wrap_reduction_result(None, pointer) + + def mode(self, dropna: bool = True) -> Categorical: + """ + Returns the mode(s) of the Categorical. + + Always returns `Categorical` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + modes : `Categorical` (sorted) + """ + warn( + "Categorical.mode is deprecated and will be removed in a future version. " + "Use Series.mode instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._mode(dropna=dropna) + + def _mode(self, dropna: bool = True) -> Categorical: + codes = self._codes + mask = None + if dropna: + mask = self.isna() + + res_codes = algorithms.mode(codes, mask=mask) + res_codes = cast(np.ndarray, res_codes) + assert res_codes.dtype == codes.dtype + res = self._from_backing_data(res_codes) + return res + + # ------------------------------------------------------------------ + # ExtensionArray Interface + + def unique(self): + """ + Return the ``Categorical`` which ``categories`` and ``codes`` are + unique. + + .. versionchanged:: 1.3.0 + + Previously, unused categories were dropped from the new categories. + + Returns + ------- + Categorical + + See Also + -------- + pandas.unique + CategoricalIndex.unique + Series.unique : Return unique values of Series object. + + Examples + -------- + >>> pd.Categorical(list("baabc")).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique() + ['b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] + """ + unique_codes = unique1d(self.codes) + return self._from_backing_data(unique_codes) + + def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: + # make sure we have correct itemsize for resulting codes + assert res_values.dtype == self._ndarray.dtype + return res_values + + def equals(self, other: object) -> bool: + """ + Returns True if categorical arrays are equal. + + Parameters + ---------- + other : `Categorical` + + Returns + ------- + bool + """ + if not isinstance(other, Categorical): + return False + elif self._categories_match_up_to_permutation(other): + other = self._encode_with_my_categories(other) + return np.array_equal(self._codes, other._codes) + return False + + @classmethod + def _concat_same_type( + cls: type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0 + ) -> CategoricalT: + from pandas.core.dtypes.concat import union_categoricals + + first = to_concat[0] + if axis >= first.ndim: + raise ValueError( + f"axis {axis} is out of bounds for array of dimension {first.ndim}" + ) + + if axis == 1: + # Flatten, concatenate then reshape + if not all(x.ndim == 2 for x in to_concat): + raise ValueError + + # pass correctly-shaped to union_categoricals + tc_flat = [] + for obj in to_concat: + tc_flat.extend([obj[:, i] for i in range(obj.shape[1])]) + + res_flat = cls._concat_same_type(tc_flat, axis=0) + + result = res_flat.reshape(len(first), -1, order="F") + return result + + result = union_categoricals(to_concat) + return result + + # ------------------------------------------------------------------ + + def _encode_with_my_categories(self, other: Categorical) -> Categorical: + """ + Re-encode another categorical using this Categorical's categories. + + Notes + ----- + This assumes we have already checked + self._categories_match_up_to_permutation(other). + """ + # Indexing on codes is more efficient if categories are the same, + # so we can apply some optimizations based on the degree of + # dtype-matching. + codes = recode_for_categories( + other.codes, other.categories, self.categories, copy=False + ) + return self._from_backing_data(codes) + + def _categories_match_up_to_permutation(self, other: Categorical) -> bool: + """ + Returns True if categoricals are the same dtype + same categories, and same ordered + + Parameters + ---------- + other : Categorical + + Returns + ------- + bool + """ + return hash(self.dtype) == hash(other.dtype) + + def is_dtype_equal(self, other) -> bool: + warn( + "Categorical.is_dtype_equal is deprecated and will be removed " + "in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + try: + return self._categories_match_up_to_permutation(other) + except (AttributeError, TypeError): + return False + + def describe(self) -> DataFrame: + """ + Describes this Categorical + + Returns + ------- + description: `DataFrame` + A dataframe with frequency and counts by category. + """ + counts = self.value_counts(dropna=False) + freqs = counts / counts.sum() + + from pandas import Index + from pandas.core.reshape.concat import concat + + result = concat([counts, freqs], axis=1) + result.columns = Index(["counts", "freqs"]) + result.index.name = "categories" + + return result + + def isin(self, values) -> npt.NDArray[np.bool_]: + """ + Check whether `values` are contained in Categorical. + + Return a boolean NumPy Array showing whether each element in + the Categorical matches an element in the passed sequence of + `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + np.ndarray[bool] + + Raises + ------ + TypeError + * If `values` is not a set or list-like + + See Also + -------- + pandas.Series.isin : Equivalent method on Series. + + Examples + -------- + >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo']) + >>> s.isin(['cow', 'lama']) + array([ True, True, True, False, True, False]) + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + array([ True, False, True, False, True, False]) + """ + if not is_list_like(values): + values_type = type(values).__name__ + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{values_type}]" + ) + values = sanitize_array(values, None, None) + null_mask = np.asarray(isna(values)) + code_values = self.categories.get_indexer(values) + code_values = code_values[null_mask | (code_values >= 0)] + return algorithms.isin(self.codes, code_values) + + @overload + def replace( + self, to_replace, value, *, inplace: Literal[False] = ... + ) -> Categorical: + ... + + @overload + def replace(self, to_replace, value, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + def replace(self, to_replace, value, inplace: bool = False) -> Categorical | None: + """ + Replaces all instances of one value with another + + Parameters + ---------- + to_replace: object + The value to be replaced + + value: object + The value to replace it with + + inplace: bool + Whether the operation is done in-place + + Returns + ------- + None if inplace is True, otherwise the new Categorical after replacement + + + Examples + -------- + >>> s = pd.Categorical([1, 2, 1, 3]) + >>> s.replace(1, 3) + [3, 2, 3, 3] + Categories (2, int64): [2, 3] + """ + # GH#44929 deprecation + warn( + "Categorical.replace is deprecated and will be removed in a future " + "version. Use Series.replace directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._replace(to_replace=to_replace, value=value, inplace=inplace) + + def _replace(self, *, to_replace, value, inplace: bool = False): + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + # build a dict of (to replace -> value) pairs + if is_list_like(to_replace): + # if to_replace is list-like and value is scalar + replace_dict = {replace_value: value for replace_value in to_replace} + else: + # if both to_replace and value are scalar + replace_dict = {to_replace: value} + + # other cases, like if both to_replace and value are list-like or if + # to_replace is a dict, are handled separately in NDFrame + for replace_value, new_value in replace_dict.items(): + if new_value == replace_value: + continue + if replace_value in cat.categories: + if isna(new_value): + with catch_warnings(): + simplefilter("ignore") + cat.remove_categories(replace_value, inplace=True) + continue + + categories = cat.categories.tolist() + index = categories.index(replace_value) + + if new_value in cat.categories: + value_index = categories.index(new_value) + cat._codes[cat._codes == index] = value_index + with catch_warnings(): + simplefilter("ignore") + cat.remove_categories(replace_value, inplace=True) + else: + categories[index] = new_value + with catch_warnings(): + simplefilter("ignore") + cat.rename_categories(categories, inplace=True) + if not inplace: + return cat + + # ------------------------------------------------------------------------ + # String methods interface + def _str_map( + self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True + ): + # Optimization to apply the callable `f` to the categories once + # and rebuild the result by `take`ing from the result with the codes. + # Returns the same type as the object-dtype implementation though. + from pandas.core.arrays import PandasArray + + categories = self.categories + codes = self.codes + result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype) + return take_nd(result, codes, fill_value=na_value) + + def _str_get_dummies(self, sep="|"): + # sep may not be in categories. Just bail on this. + from pandas.core.arrays import PandasArray + + return PandasArray(self.astype(str))._str_get_dummies(sep) + + +# The Series.cat accessor + + +@delegate_names( + delegate=Categorical, accessors=["categories", "ordered"], typ="property" +) +@delegate_names( + delegate=Categorical, + accessors=[ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + typ="method", +) +class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for categorical properties of the Series values. + + Be aware that assigning to `categories` is a inplace operation, while all + methods return new categorical data per default (but can be called with + `inplace=True`). + + Parameters + ---------- + data : Series or CategoricalIndex + + Examples + -------- + >>> s = pd.Series(list("abbccc")).astype("category") + >>> s + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + + >>> s.cat.categories + Index(['a', 'b', 'c'], dtype='object') + + >>> s.cat.rename_categories(list("cba")) + 0 c + 1 b + 2 b + 3 a + 4 a + 5 a + dtype: category + Categories (3, object): ['c', 'b', 'a'] + + >>> s.cat.reorder_categories(list("cba")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['c', 'b', 'a'] + + >>> s.cat.add_categories(["d", "e"]) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + + >>> s.cat.remove_categories(["a", "c"]) + 0 NaN + 1 b + 2 b + 3 NaN + 4 NaN + 5 NaN + dtype: category + Categories (1, object): ['b'] + + >>> s1 = s.cat.add_categories(["d", "e"]) + >>> s1.cat.remove_unused_categories() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + + >>> s.cat.set_categories(list("abcde")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + + >>> s.cat.as_ordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a' < 'b' < 'c'] + + >>> s.cat.as_unordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): ['a', 'b', 'c'] + """ + + def __init__(self, data) -> None: + self._validate(data) + self._parent = data.values + self._index = data.index + self._name = data.name + self._freeze() + + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a 'category' dtype") + + def _delegate_property_get(self, name): + return getattr(self._parent, name) + + def _delegate_property_set(self, name, new_values): + return setattr(self._parent, name, new_values) + + @property + def codes(self) -> Series: + """ + Return Series of codes as well as the index. + """ + from pandas import Series + + return Series(self._parent.codes, index=self._index) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + method = getattr(self._parent, name) + res = method(*args, **kwargs) + if res is not None: + return Series(res, index=self._index, name=self._name) + + +# utility routines + + +def _get_codes_for_values(values, categories: Index) -> np.ndarray: + """ + utility routine to turn values into codes given the specified categories + + If `values` is known to be a Categorical, use recode_for_categories instead. + """ + if values.ndim > 1: + flat = values.ravel() + codes = _get_codes_for_values(flat, categories) + return codes.reshape(values.shape) + + codes = categories.get_indexer_for(values) + return coerce_indexer_dtype(codes, categories) + + +def recode_for_categories( + codes: np.ndarray, old_categories, new_categories, copy: bool = True +) -> np.ndarray: + """ + Convert a set of codes for to a new set of categories + + Parameters + ---------- + codes : np.ndarray + old_categories, new_categories : Index + copy: bool, default True + Whether to copy if the codes are unchanged. + + Returns + ------- + new_codes : np.ndarray[np.int64] + + Examples + -------- + >>> old_cat = pd.Index(['b', 'a', 'c']) + >>> new_cat = pd.Index(['a', 'b']) + >>> codes = np.array([0, 1, 1, 2]) + >>> recode_for_categories(codes, old_cat, new_cat) + array([ 1, 0, 0, -1], dtype=int8) + """ + if len(old_categories) == 0: + # All null anyway, so just retain the nulls + if copy: + return codes.copy() + return codes + elif new_categories.equals(old_categories): + # Same categories, so no need to actually recode + if copy: + return codes.copy() + return codes + + indexer = coerce_indexer_dtype( + new_categories.get_indexer(old_categories), new_categories + ) + new_codes = take_nd(indexer, codes, fill_value=-1) + return new_codes + + +def factorize_from_iterable(values) -> tuple[np.ndarray, Index]: + """ + Factorize an input `values` into `categories` and `codes`. Preserves + categorical dtype in `categories`. + + Parameters + ---------- + values : list-like + + Returns + ------- + codes : ndarray + categories : Index + If `values` has a categorical dtype, then `categories` is + a CategoricalIndex keeping the categories and order of `values`. + """ + from pandas import CategoricalIndex + + if not is_list_like(values): + raise TypeError("Input must be list-like") + + categories: Index + if is_categorical_dtype(values): + values = extract_array(values) + # The Categorical we want to build has the same categories + # as values but its codes are by def [0, ..., len(n_categories) - 1] + cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype) + cat = Categorical.from_codes(cat_codes, dtype=values.dtype) + + categories = CategoricalIndex(cat) + codes = values.codes + else: + # The value of ordered is irrelevant since we don't use cat as such, + # but only the resulting categories, the order of which is independent + # from ordered. Set ordered to False as default. See GH #15457 + cat = Categorical(values, ordered=False) + categories = cat.categories + codes = cat.codes + return codes, categories + + +def factorize_from_iterables(iterables) -> tuple[list[np.ndarray], list[Index]]: + """ + A higher-level wrapper over `factorize_from_iterable`. + + Parameters + ---------- + iterables : list-like of list-likes + + Returns + ------- + codes : list of ndarrays + categories : list of Indexes + + Notes + ----- + See `factorize_from_iterable` for more info. + """ + if len(iterables) == 0: + # For consistency, it should return two empty lists. + return [], [] + + codes, categories = zip(*(factorize_from_iterable(it) for it in iterables)) + return list(codes), list(categories) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py new file mode 100644 index 00000000..471ecc59 --- /dev/null +++ b/pandas/core/arrays/datetimelike.py @@ -0,0 +1,2286 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, + TypeVar, + Union, + cast, + final, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import ( + algos, + lib, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import ( + BaseOffset, + IncompatibleFrequency, + NaT, + NaTType, + Period, + Resolution, + Tick, + Timestamp, + delta_to_nanoseconds, + get_unit_from_dtype, + iNaT, + ints_to_pydatetime, + ints_to_pytimedelta, + to_offset, +) +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) +from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions +from pandas._libs.tslibs.timestamps import integer_op_not_supported +from pandas._typing import ( + ArrayLike, + DatetimeLikeScalar, + Dtype, + DtypeObj, + NpDtype, + PositionalIndexer2D, + PositionalIndexerTuple, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + NullFrequencyError, + PerformanceWarning, +) +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_all_strings, + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, +) +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCMultiIndex, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, +) + +from pandas.core import ( + nanops, + ops, +) +from pandas.core.algorithms import ( + checked_add_with_arr, + isin, + mode, + unique1d, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._mixins import ( + NDArrayBackedExtensionArray, + ravel_compat, +) +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.integer import IntegerArray +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import ( + check_array_indexer, + check_setitem_lengths, +) +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.ops.invalid import ( + invalid_comparison, + make_invalid_op, +) + +from pandas.tseries import frequencies + +if TYPE_CHECKING: + + from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, + ) + +DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType] +DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") + + +class InvalidComparison(Exception): + """ + Raised by _validate_comparison_value to indicate to caller it should + return invalid_comparison. + """ + + pass + + +class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): + """ + Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray + + Assumes that __new__/__init__ defines: + _data + _freq + + and that the inheriting class has methods: + _generate_range + """ + + # _infer_matches -> which infer_dtype strings are close enough to our own + _infer_matches: tuple[str, ...] + _is_recognized_dtype: Callable[[DtypeObj], bool] + _recognized_scalars: tuple[type, ...] + _ndarray: np.ndarray + + @cache_readonly + def _can_hold_na(self) -> bool: + return True + + def __init__(self, data, dtype: Dtype | None = None, freq=None, copy=False) -> None: + raise AbstractMethodError(self) + + @property + def _scalar_type(self) -> type[DatetimeLikeScalar]: + """ + The scalar associated with this datelike + + * PeriodArray : Period + * DatetimeArray : Timestamp + * TimedeltaArray : Timedelta + """ + raise AbstractMethodError(self) + + def _scalar_from_string(self, value: str) -> DTScalarOrNaT: + """ + Construct a scalar type from a string. + + Parameters + ---------- + value : str + + Returns + ------- + Period, Timestamp, or Timedelta, or NaT + Whatever the type of ``self._scalar_type`` is. + + Notes + ----- + This should call ``self._check_compatible_with`` before + unboxing the result. + """ + raise AbstractMethodError(self) + + def _unbox_scalar( + self, value: DTScalarOrNaT, setitem: bool = False + ) -> np.int64 | np.datetime64 | np.timedelta64: + """ + Unbox the integer value of a scalar `value`. + + Parameters + ---------- + value : Period, Timestamp, Timedelta, or NaT + Depending on subclass. + setitem : bool, default False + Whether to check compatibility with setitem strictness. + + Returns + ------- + int + + Examples + -------- + >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP + 10000000000 + """ + raise AbstractMethodError(self) + + def _check_compatible_with( + self, other: DTScalarOrNaT, setitem: bool = False + ) -> None: + """ + Verify that `self` and `other` are compatible. + + * DatetimeArray verifies that the timezones (if any) match + * PeriodArray verifies that the freq matches + * Timedelta has no verification + + In each case, NaT is considered compatible. + + Parameters + ---------- + other + setitem : bool, default False + For __setitem__ we may have stricter compatibility restrictions than + for comparisons. + + Raises + ------ + Exception + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------ + # NDArrayBackedExtensionArray compat + + @cache_readonly + def _data(self) -> np.ndarray: + return self._ndarray + + # ------------------------------------------------------------------ + + def _box_func(self, x): + """ + box function to get object from internal representation + """ + raise AbstractMethodError(self) + + def _box_values(self, values) -> np.ndarray: + """ + apply box func to passed values + """ + return lib.map_infer(values, self._box_func, convert=False) + + def __iter__(self): + if self.ndim > 1: + return (self[n] for n in range(len(self))) + else: + return (self._box_func(v) for v in self.asi8) + + @property + def asi8(self) -> npt.NDArray[np.int64]: + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + # do not cache or you'll create a memory leak + return self._ndarray.view("i8") + + # ---------------------------------------------------------------- + # Rendering Methods + + def _format_native_types( + self, *, na_rep="NaT", date_format=None + ) -> npt.NDArray[np.object_]: + """ + Helper method for astype when converting to strings. + + Returns + ------- + ndarray[str] + """ + raise AbstractMethodError(self) + + def _formatter(self, boxed: bool = False): + # TODO: Remove Datetime & DatetimeTZ formatters. + return "'{}'".format + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + # used for Timedelta/DatetimeArray, overwritten by PeriodArray + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + return self._ndarray + + @overload + def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT: + ... + + @overload + def __getitem__( + self: DatetimeLikeArrayT, + item: SequenceIndexer | PositionalIndexerTuple, + ) -> DatetimeLikeArrayT: + ... + + def __getitem__( + self: DatetimeLikeArrayT, key: PositionalIndexer2D + ) -> DatetimeLikeArrayT | DTScalarOrNaT: + """ + This getitem defers to the underlying array, which by-definition can + only handle list-likes, slices, and integer scalars + """ + # Use cast as we know we will get back a DatetimeLikeArray or DTScalar, + # but skip evaluating the Union at runtime for performance + # (see https://github.com/pandas-dev/pandas/pull/44624) + result = cast( + "Union[DatetimeLikeArrayT, DTScalarOrNaT]", super().__getitem__(key) + ) + if lib.is_scalar(result): + return result + else: + # At this point we know the result is an array. + result = cast(DatetimeLikeArrayT, result) + result._freq = self._get_getitem_freq(key) + return result + + def _get_getitem_freq(self, key) -> BaseOffset | None: + """ + Find the `freq` attribute to assign to the result of a __getitem__ lookup. + """ + is_period = is_period_dtype(self.dtype) + if is_period: + freq = self.freq + elif self.ndim != 1: + freq = None + else: + key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice + freq = None + if isinstance(key, slice): + if self.freq is not None and key.step is not None: + freq = key.step * self.freq + else: + freq = self.freq + elif key is Ellipsis: + # GH#21282 indexing with Ellipsis is similar to a full slice, + # should preserve `freq` attribute + freq = self.freq + elif com.is_bool_indexer(key): + new_key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + if isinstance(new_key, slice): + return self._get_getitem_freq(new_key) + return freq + + # error: Argument 1 of "__setitem__" is incompatible with supertype + # "ExtensionArray"; supertype defines the argument type as "Union[int, + # ndarray]" + def __setitem__( # type: ignore[override] + self, + key: int | Sequence[int] | Sequence[bool] | slice, + value: NaTType | Any | Sequence[Any], + ) -> None: + # I'm fudging the types a bit here. "Any" above really depends + # on type(self). For PeriodArray, it's Period (or stuff coercible + # to a period in from_sequence). For DatetimeArray, it's Timestamp... + # I don't know if mypy can do that, possibly with Generics. + # https://mypy.readthedocs.io/en/latest/generics.html + + no_op = check_setitem_lengths(key, value, self) + + # Calling super() before the no_op short-circuit means that we raise + # on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array. + super().__setitem__(key, value) + + if no_op: + return + + self._maybe_clear_freq() + + def _maybe_clear_freq(self): + # inplace operations like __setitem__ may invalidate the freq of + # DatetimeArray and TimedeltaArray + pass + + def astype(self, dtype, copy: bool = True): + # Some notes on cases we don't have to handle here in the base class: + # 1. PeriodArray.astype handles period -> period + # 2. DatetimeArray.astype handles conversion between tz. + # 3. DatetimeArray.astype handles datetime -> period + dtype = pandas_dtype(dtype) + + if is_object_dtype(dtype): + if self.dtype.kind == "M": + self = cast("DatetimeArray", self) + # *much* faster than self._box_values + # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff + i8data = self.asi8 + converted = ints_to_pydatetime( + i8data, + tz=self.tz, + freq=self.freq, + box="timestamp", + reso=self._reso, + ) + return converted + + elif self.dtype.kind == "m": + return ints_to_pytimedelta(self._ndarray, box=True) + + return self._box_values(self.asi8.ravel()).reshape(self.shape) + + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + elif is_string_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + # we deliberately ignore int32 vs. int64 here. + # See https://github.com/pandas-dev/pandas/issues/24381 for more. + values = self.asi8 + + if is_unsigned_integer_dtype(dtype): + # Again, we ignore int32 vs. int64 + values = values.view("uint64") + if dtype != np.uint64: + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will return " + "exactly the specified dtype instead of uint64, and will " + "raise if that conversion overflows.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif (self.asi8 < 0).any(): + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will " + "raise if the conversion overflows, as it did in this " + "case with negative int64 values.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif dtype != np.int64: + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will return " + "exactly the specified dtype instead of int64, and will " + "raise if that conversion overflows.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if copy: + values = values.copy() + return values + elif ( + is_datetime_or_timedelta_dtype(dtype) + and not is_dtype_equal(self.dtype, dtype) + ) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) + else: + return np.asarray(self, dtype=dtype) + + @overload + def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT: + ... + + @overload + def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray: + ... + + @overload + def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray: + ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: + ... + + def view(self, dtype: Dtype | None = None) -> ArrayLike: + # we need to explicitly call super() method as long as the `@overload`s + # are present in this file. + return super().view(dtype) + + # ------------------------------------------------------------------ + # ExtensionArray Interface + + @classmethod + def _concat_same_type( + cls: type[DatetimeLikeArrayT], + to_concat: Sequence[DatetimeLikeArrayT], + axis: int = 0, + ) -> DatetimeLikeArrayT: + new_obj = super()._concat_same_type(to_concat, axis) + + obj = to_concat[0] + dtype = obj.dtype + + new_freq = None + if is_period_dtype(dtype): + new_freq = obj.freq + elif axis == 0: + # GH 3232: If the concat result is evenly spaced, we can retain the + # original frequency + to_concat = [x for x in to_concat if len(x)] + + if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): + pairs = zip(to_concat[:-1], to_concat[1:]) + if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): + new_freq = obj.freq + + new_obj._freq = new_freq + return new_obj + + def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT: + # error: Unexpected keyword argument "order" for "copy" + new_obj = super().copy(order=order) # type: ignore[call-arg] + new_obj._freq = self.freq + return new_obj + + # ------------------------------------------------------------------ + # Validation Methods + # TODO: try to de-duplicate these, ensure identical behavior + + def _validate_comparison_value(self, other): + if isinstance(other, str): + try: + # GH#18435 strings get a pass from tzawareness compat + other = self._scalar_from_string(other) + except (ValueError, IncompatibleFrequency): + # failed to parse as Timestamp/Timedelta/Period + raise InvalidComparison(other) + + if isinstance(other, self._recognized_scalars) or other is NaT: + other = self._scalar_type(other) + try: + self._check_compatible_with(other) + except (TypeError, IncompatibleFrequency) as err: + # e.g. tzawareness mismatch + raise InvalidComparison(other) from err + + elif not is_list_like(other): + raise InvalidComparison(other) + + elif len(other) != len(self): + raise ValueError("Lengths must match") + + else: + try: + other = self._validate_listlike(other, allow_object=True) + self._check_compatible_with(other) + except (TypeError, IncompatibleFrequency) as err: + if is_object_dtype(getattr(other, "dtype", None)): + # We will have to operate element-wise + pass + else: + raise InvalidComparison(other) from err + + return other + + def _validate_shift_value(self, fill_value): + # TODO(2.0): once this deprecation is enforced, use _validate_scalar + if is_valid_na_for_dtype(fill_value, self.dtype): + fill_value = NaT + elif isinstance(fill_value, self._recognized_scalars): + fill_value = self._scalar_type(fill_value) + else: + new_fill: DatetimeLikeScalar + + # only warn if we're not going to raise + if self._scalar_type is Period and lib.is_integer(fill_value): + # kludge for #31971 since Period(integer) tries to cast to str + new_fill = Period._from_ordinal(fill_value, freq=self.freq) + else: + new_fill = self._scalar_type(fill_value) + + # stacklevel here is chosen to be correct when called from + # DataFrame.shift or Series.shift + warnings.warn( + f"Passing {type(fill_value)} to shift is deprecated and " + "will raise in a future version, pass " + f"{self._scalar_type.__name__} instead.", + FutureWarning, + # There is no way to hard-code the level since this might be + # reached directly or called from the Index or Block method + stacklevel=find_stack_level(), + ) + fill_value = new_fill + + return self._unbox(fill_value, setitem=True) + + def _validate_scalar( + self, + value, + *, + allow_listlike: bool = False, + setitem: bool = True, + unbox: bool = True, + ): + """ + Validate that the input value can be cast to our scalar_type. + + Parameters + ---------- + value : object + allow_listlike: bool, default False + When raising an exception, whether the message should say + listlike inputs are allowed. + setitem : bool, default True + Whether to check compatibility with setitem strictness. + unbox : bool, default True + Whether to unbox the result before returning. Note: unbox=False + skips the setitem compatibility check. + + Returns + ------- + self._scalar_type or NaT + """ + if isinstance(value, self._scalar_type): + pass + + elif isinstance(value, str): + # NB: Careful about tzawareness + try: + value = self._scalar_from_string(value) + except ValueError as err: + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) from err + + elif is_valid_na_for_dtype(value, self.dtype): + # GH#18295 + value = NaT + + elif isna(value): + # if we are dt64tz and value is dt64("NaT"), dont cast to NaT, + # or else we'll fail to raise in _unbox_scalar + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + + elif isinstance(value, self._recognized_scalars): + value = self._scalar_type(value) + + else: + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + + if not unbox: + # NB: In general NDArrayBackedExtensionArray will unbox here; + # this option exists to prevent a performance hit in + # TimedeltaIndex.get_loc + return value + return self._unbox_scalar(value, setitem=setitem) + + def _validation_error_message(self, value, allow_listlike: bool = False) -> str: + """ + Construct an exception message on validation error. + + Some methods allow only scalar inputs, while others allow either scalar + or listlike. + + Parameters + ---------- + allow_listlike: bool, default False + + Returns + ------- + str + """ + if allow_listlike: + msg = ( + f"value should be a '{self._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{type(value).__name__}' instead." + ) + else: + msg = ( + f"value should be a '{self._scalar_type.__name__}' or 'NaT'. " + f"Got '{type(value).__name__}' instead." + ) + return msg + + def _validate_listlike(self, value, allow_object: bool = False): + if isinstance(value, type(self)): + return value + + if isinstance(value, list) and len(value) == 0: + # We treat empty list as our own dtype. + return type(self)._from_sequence([], dtype=self.dtype) + + if hasattr(value, "dtype") and value.dtype == object: + # `array` below won't do inference if value is an Index or Series. + # so do so here. in the Index case, inferred_type may be cached. + if lib.infer_dtype(value) in self._infer_matches: + try: + value = type(self)._from_sequence(value) + except (ValueError, TypeError): + if allow_object: + return value + msg = self._validation_error_message(value, True) + raise TypeError(msg) + + # Do type inference if necessary up front (after unpacking PandasArray) + # e.g. we passed PeriodIndex.values and got an ndarray of Periods + value = extract_array(value, extract_numpy=True) + value = pd_array(value) + value = extract_array(value, extract_numpy=True) + + if is_all_strings(value): + # We got a StringArray + try: + # TODO: Could use from_sequence_of_strings if implemented + # Note: passing dtype is necessary for PeriodArray tests + value = type(self)._from_sequence(value, dtype=self.dtype) + except ValueError: + pass + + if is_categorical_dtype(value.dtype): + # e.g. we have a Categorical holding self.dtype + if is_dtype_equal(value.categories.dtype, self.dtype): + # TODO: do we need equal dtype or just comparable? + value = value._internal_get_values() + value = extract_array(value, extract_numpy=True) + + if allow_object and is_object_dtype(value.dtype): + pass + + elif not type(self)._is_recognized_dtype(value.dtype): + msg = self._validation_error_message(value, True) + raise TypeError(msg) + + return value + + def _validate_searchsorted_value(self, value): + if not is_list_like(value): + return self._validate_scalar(value, allow_listlike=True, setitem=False) + else: + value = self._validate_listlike(value) + + return self._unbox(value) + + def _validate_setitem_value(self, value): + if is_list_like(value): + value = self._validate_listlike(value) + else: + return self._validate_scalar(value, allow_listlike=True) + + return self._unbox(value, setitem=True) + + def _unbox( + self, other, setitem: bool = False + ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: + """ + Unbox either a scalar with _unbox_scalar or an instance of our own type. + """ + if lib.is_scalar(other): + other = self._unbox_scalar(other, setitem=setitem) + else: + # same type as self + self._check_compatible_with(other, setitem=setitem) + other = other._ndarray + return other + + # ------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas assumes they're there. + + @ravel_compat + def map(self, mapper): + # TODO(GH-23179): Add ExtensionArray.map + # Need to figure out if we want ExtensionArray.map first. + # If so, then we can refactor IndexOpsMixin._map_values to + # a standalone function and call from here.. + # Else, just rewrite _map_infer_values to do the right thing. + from pandas import Index + + return Index(self).map(mapper).array + + def isin(self, values) -> npt.NDArray[np.bool_]: + """ + Compute boolean array of whether each value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + ndarray[bool] + """ + if not hasattr(values, "dtype"): + values = np.asarray(values) + + if values.dtype.kind in ["f", "i", "u", "c"]: + # TODO: de-duplicate with equals, validate_comparison_value + return np.zeros(self.shape, dtype=bool) + + if not isinstance(values, type(self)): + inferable = [ + "timedelta", + "timedelta64", + "datetime", + "datetime64", + "date", + "period", + ] + if values.dtype == object: + inferred = lib.infer_dtype(values, skipna=False) + if inferred not in inferable: + if inferred == "string": + pass + + elif "mixed" in inferred: + return isin(self.astype(object), values) + else: + return np.zeros(self.shape, dtype=bool) + + try: + values = type(self)._from_sequence(values) + except ValueError: + return isin(self.astype(object), values) + + try: + self._check_compatible_with(values) + except (TypeError, ValueError): + # Includes tzawareness mismatch and IncompatibleFrequencyError + return np.zeros(self.shape, dtype=bool) + + return isin(self.asi8, values.asi8) + + # ------------------------------------------------------------------ + # Null Handling + + def isna(self) -> npt.NDArray[np.bool_]: + return self._isnan + + @property # NB: override with cache_readonly in immutable subclasses + def _isnan(self) -> npt.NDArray[np.bool_]: + """ + return if each value is nan + """ + return self.asi8 == iNaT + + @property # NB: override with cache_readonly in immutable subclasses + def _hasna(self) -> bool: + """ + return if I have any nans; enables various perf speedups + """ + return bool(self._isnan.any()) + + def _maybe_mask_results( + self, result: np.ndarray, fill_value=iNaT, convert=None + ) -> np.ndarray: + """ + Parameters + ---------- + result : np.ndarray + fill_value : object, default iNaT + convert : str, dtype or None + + Returns + ------- + result : ndarray with values replace by the fill_value + + mask the result if needed, convert to the provided dtype if its not + None + + This is an internal routine. + """ + if self._hasna: + if convert: + result = result.astype(convert) + if fill_value is None: + fill_value = np.nan + np.putmask(result, self._isnan, fill_value) + return result + + # ------------------------------------------------------------------ + # Frequency Properties/Methods + + @property + def freq(self): + """ + Return the frequency object if it is set, otherwise None. + """ + return self._freq + + @freq.setter + def freq(self, value) -> None: + if value is not None: + value = to_offset(value) + self._validate_frequency(self, value) + + if self.ndim > 1: + raise ValueError("Cannot set freq with ndim > 1") + + self._freq = value + + @property + def freqstr(self) -> str | None: + """ + Return the frequency object as a string if its set, otherwise None. + """ + if self.freq is None: + return None + return self.freq.freqstr + + @property # NB: override with cache_readonly in immutable subclasses + def inferred_freq(self) -> str | None: + """ + Tries to return a string representing a frequency generated by infer_freq. + + Returns None if it can't autodetect the frequency. + """ + if self.ndim != 1: + return None + try: + return frequencies.infer_freq(self) + except ValueError: + return None + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution_obj(self) -> Resolution | None: + freqstr = self.freqstr + if freqstr is None: + return None + try: + return Resolution.get_reso_from_freqstr(freqstr) + except KeyError: + return None + + @property # NB: override with cache_readonly in immutable subclasses + def resolution(self) -> str: + """ + Returns day, hour, minute, second, millisecond or microsecond + """ + # error: Item "None" of "Optional[Any]" has no attribute "attrname" + return self._resolution_obj.attrname # type: ignore[union-attr] + + @classmethod + def _validate_frequency(cls, index, freq, **kwargs): + """ + Validate that a frequency is compatible with the values of a given + Datetime Array/Index or Timedelta Array/Index + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + The index on which to determine if the given frequency is valid + freq : DateOffset + The frequency to validate + """ + # TODO: this is not applicable to PeriodArray, move to correct Mixin + inferred = index.inferred_freq + if index.size == 0 or inferred == freq.freqstr: + return None + + try: + on_freq = cls._generate_range( + start=index[0], end=None, periods=len(index), freq=freq, **kwargs + ) + if not np.array_equal(index.asi8, on_freq.asi8): + raise ValueError + except ValueError as e: + if "non-fixed" in str(e): + # non-fixed frequencies are not meaningful for timedelta64; + # we retain that error message + raise e + # GH#11587 the main way this is reached is if the `np.array_equal` + # check above is False. This can also be reached if index[0] + # is `NaT`, in which case the call to `cls._generate_range` will + # raise a ValueError, which we re-raise with a more targeted + # message. + raise ValueError( + f"Inferred frequency {inferred} from passed values " + f"does not conform to passed frequency {freq.freqstr}" + ) from e + + @classmethod + def _generate_range( + cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs + ) -> DatetimeLikeArrayT: + raise AbstractMethodError(cls) + + # monotonicity/uniqueness properties are called via frequencies.infer_freq, + # see GH#23789 + + @property + def _is_monotonic_increasing(self) -> bool: + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def _is_monotonic_decreasing(self) -> bool: + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def _is_unique(self) -> bool: + return len(unique1d(self.asi8.ravel("K"))) == self.size + + # ------------------------------------------------------------------ + # Arithmetic Methods + + def _cmp_method(self, other, op): + if self.ndim > 1 and getattr(other, "shape", None) == self.shape: + # TODO: handle 2D-like listlikes + return op(self.ravel(), other.ravel()).reshape(self.shape) + + try: + other = self._validate_comparison_value(other) + except InvalidComparison: + return invalid_comparison(self, other, op) + + dtype = getattr(other, "dtype", None) + if is_object_dtype(dtype): + # We have to use comp_method_OBJECT_ARRAY instead of numpy + # comparison otherwise it would fail to raise when + # comparing tz-aware and tz-naive + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY( + op, np.asarray(self.astype(object)), other + ) + return result + + if other is NaT: + if op is operator.ne: + result = np.ones(self.shape, dtype=bool) + else: + result = np.zeros(self.shape, dtype=bool) + return result + + if not is_period_dtype(self.dtype): + self = cast(TimelikeOps, self) + if self._reso != other._reso: + if not isinstance(other, type(self)): + # i.e. Timedelta/Timestamp, cast to ndarray and let + # compare_mismatched_resolutions handle broadcasting + other_arr = np.array(other.asm8) + else: + other_arr = other._ndarray + return compare_mismatched_resolutions(self._ndarray, other_arr, op) + + other_vals = self._unbox(other) + # GH#37462 comparison on i8 values is almost 2x faster than M8/m8 + result = op(self._ndarray.view("i8"), other_vals.view("i8")) + + o_mask = isna(other) + mask = self._isnan | o_mask + if mask.any(): + nat_result = op is operator.ne + np.putmask(result, mask, nat_result) + + return result + + # pow is invalid for all three subclasses; TimedeltaArray will override + # the multiplication and division ops + __pow__ = make_invalid_op("__pow__") + __rpow__ = make_invalid_op("__rpow__") + __mul__ = make_invalid_op("__mul__") + __rmul__ = make_invalid_op("__rmul__") + __truediv__ = make_invalid_op("__truediv__") + __rtruediv__ = make_invalid_op("__rtruediv__") + __floordiv__ = make_invalid_op("__floordiv__") + __rfloordiv__ = make_invalid_op("__rfloordiv__") + __mod__ = make_invalid_op("__mod__") + __rmod__ = make_invalid_op("__rmod__") + __divmod__ = make_invalid_op("__divmod__") + __rdivmod__ = make_invalid_op("__rdivmod__") + + @final + def _add_datetimelike_scalar(self, other) -> DatetimeArray: + if not is_timedelta64_dtype(self.dtype): + raise TypeError( + f"cannot add {type(self).__name__} and {type(other).__name__}" + ) + + self = cast("TimedeltaArray", self) + + from pandas.core.arrays import DatetimeArray + from pandas.core.arrays.datetimes import tz_to_dtype + + assert other is not NaT + other = Timestamp(other) + if other is NaT: + # In this case we specifically interpret NaT as a datetime, not + # the timedelta interpretation we would get by returning self + NaT + result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self._unit}]") + # Preserve our resolution + return DatetimeArray._simple_new(result, dtype=result.dtype) + + if self._reso != other._reso: + raise NotImplementedError( + "Addition between TimedeltaArray and Timestamp with mis-matched " + "resolutions is not yet supported." + ) + + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) + dtype = tz_to_dtype(tz=other.tz, unit=self._unit) + res_values = result.view(f"M8[{self._unit}]") + return DatetimeArray._simple_new(res_values, dtype=dtype, freq=self.freq) + + @final + def _add_datetime_arraylike(self, other) -> DatetimeArray: + if not is_timedelta64_dtype(self.dtype): + raise TypeError( + f"cannot add {type(self).__name__} and {type(other).__name__}" + ) + + # At this point we have already checked that other.dtype is datetime64 + other = ensure_wrapped_if_datetimelike(other) + # defer to DatetimeArray.__add__ + return other + self + + @final + def _sub_datetimelike_scalar(self, other: datetime | np.datetime64): + if self.dtype.kind != "M": + raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") + + self = cast("DatetimeArray", self) + # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] + + # error: Non-overlapping identity check (left operand type: "Union[datetime, + # datetime64]", right operand type: "NaTType") [comparison-overlap] + assert other is not NaT # type: ignore[comparison-overlap] + other = Timestamp(other) + # error: Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if other is NaT: # type: ignore[comparison-overlap] + return self - NaT + + try: + self._assert_tzawareness_compat(other) + except TypeError as err: + new_message = str(err).replace("compare", "subtract") + raise type(err)(new_message) from err + + i8 = self.asi8 + result = checked_add_with_arr(i8, -other.value, arr_mask=self._isnan) + return result.view("timedelta64[ns]") + + @final + def _sub_datetime_arraylike(self, other): + if self.dtype.kind != "M": + raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") + + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + self = cast("DatetimeArray", self) + other = ensure_wrapped_if_datetimelike(other) + + try: + self._assert_tzawareness_compat(other) + except TypeError as err: + new_message = str(err).replace("compare", "subtract") + raise type(err)(new_message) from err + + self_i8 = self.asi8 + other_i8 = other.asi8 + new_values = checked_add_with_arr( + self_i8, -other_i8, arr_mask=self._isnan, b_mask=other._isnan + ) + return new_values.view("timedelta64[ns]") + + @final + def _sub_period(self, other: Period) -> npt.NDArray[np.object_]: + if not is_period_dtype(self.dtype): + raise TypeError(f"cannot subtract Period from a {type(self).__name__}") + + # If the operation is well-defined, we return an object-dtype ndarray + # of DateOffsets. Null entries are filled with pd.NaT + self._check_compatible_with(other) + new_i8_data = checked_add_with_arr( + self.asi8, -other.ordinal, arr_mask=self._isnan + ) + new_data = np.array([self.freq.base * x for x in new_i8_data]) + + if self._hasna: + new_data[self._isnan] = NaT + + return new_data + + @final + def _add_period(self, other: Period) -> PeriodArray: + if not is_timedelta64_dtype(self.dtype): + raise TypeError(f"cannot add Period to a {type(self).__name__}") + + # We will wrap in a PeriodArray and defer to the reversed operation + from pandas.core.arrays.period import PeriodArray + + i8vals = np.broadcast_to(other.ordinal, self.shape) + parr = PeriodArray(i8vals, freq=other.freq) + return parr + self + + def _add_offset(self, offset): + raise AbstractMethodError(self) + + def _add_timedeltalike_scalar(self, other): + """ + Add a delta of a timedeltalike + + Returns + ------- + Same type as self + """ + if isna(other): + # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds + new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype) + new_values.fill(iNaT) + return type(self)._simple_new(new_values, dtype=self.dtype) + + # PeriodArray overrides, so we only get here with DTA/TDA + # error: "DatetimeLikeArrayMixin" has no attribute "_reso" + inc = delta_to_nanoseconds(other, reso=self._reso) # type: ignore[attr-defined] + + new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan) + new_values = new_values.view(self._ndarray.dtype) + + new_freq = None + if isinstance(self.freq, Tick) or is_period_dtype(self.dtype): + # adding a scalar preserves freq + new_freq = self.freq + + # error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked" + return type(self)._simple_new( # type: ignore[call-arg] + new_values, dtype=self.dtype, freq=new_freq + ) + + def _add_timedelta_arraylike( + self, other: TimedeltaArray | npt.NDArray[np.timedelta64] + ): + """ + Add a delta of a TimedeltaIndex + + Returns + ------- + Same type as self + """ + # overridden by PeriodArray + + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + other = ensure_wrapped_if_datetimelike(other) + other = cast("TimedeltaArray", other) + + self_i8 = self.asi8 + other_i8 = other.asi8 + new_values = checked_add_with_arr( + self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan + ) + return type(self)(new_values, dtype=self.dtype) + + @final + def _add_nat(self): + """ + Add pd.NaT to self + """ + if is_period_dtype(self.dtype): + raise TypeError( + f"Cannot add {type(self).__name__} and {type(NaT).__name__}" + ) + self = cast("TimedeltaArray | DatetimeArray", self) + + # GH#19124 pd.NaT is treated like a timedelta for both timedelta + # and datetime dtypes + result = np.empty(self.shape, dtype=np.int64) + result.fill(iNaT) + result = result.view(self._ndarray.dtype) # preserve reso + return type(self)._simple_new(result, dtype=self.dtype, freq=None) + + @final + def _sub_nat(self): + """ + Subtract pd.NaT from self + """ + # GH#19124 Timedelta - datetime is not in general well-defined. + # We make an exception for pd.NaT, which in this case quacks + # like a timedelta. + # For datetime64 dtypes by convention we treat NaT as a datetime, so + # this subtraction returns a timedelta64 dtype. + # For period dtype, timedelta64 is a close-enough return dtype. + result = np.empty(self.shape, dtype=np.int64) + result.fill(iNaT) + return result.view("timedelta64[ns]") + + @final + def _sub_period_array(self, other: PeriodArray) -> npt.NDArray[np.object_]: + if not is_period_dtype(self.dtype): + raise TypeError( + f"cannot subtract {other.dtype}-dtype from {type(self).__name__}" + ) + + self = cast("PeriodArray", self) + self._require_matching_freq(other) + + new_i8_values = checked_add_with_arr( + self.asi8, -other.asi8, arr_mask=self._isnan, b_mask=other._isnan + ) + + new_values = np.array([self.freq.base * x for x in new_i8_values]) + if self._hasna or other._hasna: + mask = self._isnan | other._isnan + new_values[mask] = NaT + return new_values + + @final + def _addsub_object_array(self, other: np.ndarray, op): + """ + Add or subtract array-like of DateOffset objects + + Parameters + ---------- + other : np.ndarray[object] + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if len(other) == 1 and self.ndim == 1: + # If both 1D then broadcasting is unambiguous + return op(self, other[0]) + + warnings.warn( + "Adding/subtracting object-dtype array to " + f"{type(self).__name__} not vectorized.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + # Caller is responsible for broadcasting if necessary + assert self.shape == other.shape, (self.shape, other.shape) + + with warnings.catch_warnings(): + # filter out warnings about Timestamp.freq + warnings.filterwarnings("ignore", category=FutureWarning) + res_values = op(self.astype("O"), np.asarray(other)) + + result = pd_array(res_values.ravel()) + result = extract_array(result, extract_numpy=True).reshape(self.shape) + return result + + def _time_shift( + self: DatetimeLikeArrayT, periods: int, freq=None + ) -> DatetimeLikeArrayT: + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None and freq != self.freq: + if isinstance(freq, str): + freq = to_offset(freq) + offset = periods * freq + return self + offset + + if periods == 0 or len(self) == 0: + # GH#14811 empty case + return self.copy() + + if self.freq is None: + raise NullFrequencyError("Cannot shift with no freq") + + start = self[0] + periods * self.freq + end = self[-1] + periods * self.freq + + # Note: in the DatetimeTZ case, _generate_range will infer the + # appropriate timezone from `start` and `end`, so tz does not need + # to be passed explicitly. + return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + other_dtype = getattr(other, "dtype", None) + + # scalar others + if other is NaT: + result = self._add_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_timedeltalike_scalar(other) + elif isinstance(other, BaseOffset): + # specifically _not_ a Tick + result = self._add_offset(other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._add_datetimelike_scalar(other) + elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype): + result = self._add_period(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = cast("PeriodArray", self)._addsub_int_array_or_scalar( + other * self.freq.n, operator.add + ) + + # array-like others + elif is_timedelta64_dtype(other_dtype): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_timedelta_arraylike(other) + elif is_object_dtype(other_dtype): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.add) + elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype): + # DatetimeIndex, ndarray[datetime64] + return self._add_datetime_arraylike(other) + elif is_integer_dtype(other_dtype): + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = cast("PeriodArray", self)._addsub_int_array_or_scalar( + other * self.freq.n, operator.add + ) + else: + # Includes Categorical, other ExtensionArrays + # For PeriodDtype, if self is a TimedeltaArray and other is a + # PeriodArray with a timedelta-like (i.e. Tick) freq, this + # operation is valid. Defer to the PeriodArray implementation. + # In remaining cases, this will end up raising TypeError. + return NotImplemented + + if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + + other_dtype = getattr(other, "dtype", None) + + # scalar others + if other is NaT: + result = self._sub_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_timedeltalike_scalar(-other) + elif isinstance(other, BaseOffset): + # specifically _not_ a Tick + result = self._add_offset(-other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._sub_datetimelike_scalar(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = cast("PeriodArray", self)._addsub_int_array_or_scalar( + other * self.freq.n, operator.sub + ) + + elif isinstance(other, Period): + result = self._sub_period(other) + + # array-like others + elif is_timedelta64_dtype(other_dtype): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_timedelta_arraylike(-other) + elif is_object_dtype(other_dtype): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.sub) + elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype): + # DatetimeIndex, ndarray[datetime64] + result = self._sub_datetime_arraylike(other) + elif is_period_dtype(other_dtype): + # PeriodIndex + result = self._sub_period_array(other) + elif is_integer_dtype(other_dtype): + if not is_period_dtype(self.dtype): + raise integer_op_not_supported(self) + result = cast("PeriodArray", self)._addsub_int_array_or_scalar( + other * self.freq.n, operator.sub + ) + else: + # Includes ExtensionArrays, float_dtype + return NotImplemented + + if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __rsub__(self, other): + other_dtype = getattr(other, "dtype", None) + + if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype): + # ndarray[datetime64] cannot be subtracted from self, so + # we need to wrap in DatetimeArray/Index and flip the operation + if lib.is_scalar(other): + # i.e. np.datetime64 object + return Timestamp(other) - self + if not isinstance(other, DatetimeLikeArrayMixin): + # Avoid down-casting DatetimeIndex + from pandas.core.arrays import DatetimeArray + + other = DatetimeArray(other) + return other - self + elif ( + is_datetime64_any_dtype(self.dtype) + and hasattr(other, "dtype") + and not is_datetime64_any_dtype(other.dtype) + ): + # GH#19959 datetime - datetime is well-defined as timedelta, + # but any other type - datetime is not well-defined. + raise TypeError( + f"cannot subtract {type(self).__name__} from {type(other).__name__}" + ) + elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype): + # TODO: Can we simplify/generalize these cases at all? + raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}") + elif is_timedelta64_dtype(self.dtype): + self = cast("TimedeltaArray", self) + return (-self) + other + + # We get here with e.g. datetime objects + return -(self - other) + + def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: + result = self + other + self[:] = result[:] + + if not is_period_dtype(self.dtype): + # restore freq, which is invalidated by setitem + self._freq = result.freq + return self + + def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: + result = self - other + self[:] = result[:] + + if not is_period_dtype(self.dtype): + # restore freq, which is invalidated by setitem + self._freq = result.freq + return self + + # -------------------------------------------------------------- + # Reductions + + def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + """ + Return the minimum value of the Array or minimum along + an axis. + + See Also + -------- + numpy.ndarray.min + Index.min : Return the minimum value in an Index. + Series.min : Return the minimum value in a Series. + """ + nv.validate_min((), kwargs) + nv.validate_minmax_axis(axis, self.ndim) + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmin( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + if result is NaT: + return NaT + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + """ + Return the maximum value of the Array or maximum along + an axis. + + See Also + -------- + numpy.ndarray.max + Index.max : Return the maximum value in an Index. + Series.max : Return the maximum value in a Series. + """ + nv.validate_max((), kwargs) + nv.validate_minmax_axis(axis, self.ndim) + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmax( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + if result is NaT: + return result + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def mean(self, *, skipna: bool = True, axis: int | None = 0): + """ + Return the mean value of the Array. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements. + axis : int, optional, default 0 + + Returns + ------- + scalar + Timestamp or Timedelta. + + See Also + -------- + numpy.ndarray.mean : Returns the average of array elements along a given axis. + Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + """ + if is_period_dtype(self.dtype): + # See discussion in GH#24757 + raise TypeError( + f"mean is not implemented for {type(self).__name__} since the " + "meaning is ambiguous. An alternative is " + "obj.to_timestamp(how='start').mean()" + ) + + result = nanops.nanmean( + self._ndarray, axis=axis, skipna=skipna, mask=self.isna() + ) + return self._wrap_reduction_result(axis, result) + + def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + nv.validate_median((), kwargs) + + if axis is not None and abs(axis) >= self.ndim: + raise ValueError("abs(axis) must be less than ndim") + + if is_period_dtype(self.dtype): + # pass datetime64 values to nanops to get correct NaT semantics + result = nanops.nanmedian( + self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna + ) + result = result.view("i8") + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def _mode(self, dropna: bool = True): + mask = None + if dropna: + mask = self.isna() + + i8modes = mode(self.view("i8"), mask=mask) + npmodes = i8modes.view(self._ndarray.dtype) + npmodes = cast(np.ndarray, npmodes) + return self._from_backing_data(npmodes) + + +class DatelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. + """ + + @Substitution( + URL="https://docs.python.org/3/library/datetime.html" + "#strftime-and-strptime-behavior" + ) + def strftime(self, date_format: str) -> npt.NDArray[np.object_]: + """ + Convert to Index using specified date_format. + + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc <%(URL)s>`__. + + Formats supported by the C `strftime` API but not by the python string format + doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be + preferably replaced with their supported equivalents (such as `"%%H:%%M"`, + `"%%I:%%M:%%S %%p"`). + + Note that `PeriodIndex` support additional directives, detailed in + `Period.strftime`. + + Parameters + ---------- + date_format : str + Date format string (e.g. "%%Y-%%m-%%d"). + + Returns + ------- + ndarray[object] + NumPy ndarray of formatted strings. + + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + Timestamp.strftime : Format a single Timestamp. + Period.strftime : Format a single Period. + + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), + ... periods=3, freq='s') + >>> rng.strftime('%%B %%d, %%Y, %%r') + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='object') + """ + result = self._format_native_types(date_format=date_format, na_rep=np.nan) + return result.astype(object, copy=False) + + +_round_doc = """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + Notes + ----- + If the timestamps have a timezone, {op}ing will take place relative to the + local ("wall") time and re-localized to the same timezone. When {op}ing + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='T') + """ + +_round_example = """>>> rng.round('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.round("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + +_floor_example = """>>> rng.floor('H') + DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.floor("H") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + +_ceil_example = """>>> rng.ceil('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 13:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.ceil("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 13:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.ceil("H", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + + >>> rng_tz.ceil("H", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + """ + + +TimelikeOpsT = TypeVar("TimelikeOpsT", bound="TimelikeOps") + + +class TimelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. + """ + + _default_dtype: np.dtype + + def __init__(self, values, dtype=None, freq=lib.no_default, copy: bool = False): + values = extract_array(values, extract_numpy=True) + if isinstance(values, IntegerArray): + values = values.to_numpy("int64", na_value=iNaT) + + inferred_freq = getattr(values, "_freq", None) + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + if isinstance(values, type(self)): + if explicit_none: + # don't inherit from values + pass + elif freq is None: + freq = values.freq + elif freq and values.freq: + freq = to_offset(freq) + freq, _ = validate_inferred_freq(freq, values.freq, False) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_dtype_equal(dtype, values.dtype): + # TODO: we only have tests for this for DTA, not TDA (2022-07-01) + raise TypeError( + f"dtype={dtype} does not match data dtype {values.dtype}" + ) + + dtype = values.dtype + values = values._ndarray + + elif dtype is None: + dtype = self._default_dtype + + if not isinstance(values, np.ndarray): + raise ValueError( + f"Unexpected type '{type(values).__name__}'. 'values' must be a " + f"{type(self).__name__}, ndarray, or Series or Index " + "containing one of those." + ) + if values.ndim not in [1, 2]: + raise ValueError("Only 1-dimensional input arrays are supported.") + + if values.dtype == "i8": + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(self._default_dtype) + + dtype = self._validate_dtype(values, dtype) + + if freq == "infer": + raise ValueError( + f"Frequency inference not allowed in {type(self).__name__}.__init__. " + "Use 'pd.array()' instead." + ) + + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) + + NDArrayBacked.__init__(self, values=values, dtype=dtype) + self._freq = freq + + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + + @classmethod + def _validate_dtype(cls, values, dtype): + raise AbstractMethodError(cls) + + # -------------------------------------------------------------- + + @cache_readonly + def _reso(self) -> int: + return get_unit_from_dtype(self._ndarray.dtype) + + @cache_readonly + def _unit(self) -> str: + # e.g. "ns", "us", "ms" + # error: Argument 1 to "dtype_to_unit" has incompatible type + # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]" + return dtype_to_unit(self.dtype) # type: ignore[arg-type] + + # -------------------------------------------------------------- + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if ( + ufunc in [np.isnan, np.isinf, np.isfinite] + and len(inputs) == 1 + and inputs[0] is self + ): + # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 + return getattr(ufunc, method)(self._ndarray, **kwargs) + + return super().__array_ufunc__(ufunc, method, *inputs, **kwargs) + + def _round(self, freq, mode, ambiguous, nonexistent): + # round the local times + if is_datetime64tz_dtype(self.dtype): + # operate on naive timestamps, then convert back to aware + self = cast("DatetimeArray", self) + naive = self.tz_localize(None) + result = naive._round(freq, mode, ambiguous, nonexistent) + return result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + + values = self.view("i8") + values = cast(np.ndarray, values) + nanos = to_offset(freq).nanos # raises on non-fixed frequencies + nanos = delta_to_nanoseconds(to_offset(freq), self._reso) + result_i8 = round_nsint64(values, mode, nanos) + result = self._maybe_mask_results(result_i8, fill_value=iNaT) + result = result.view(self._ndarray.dtype) + return self._simple_new(result, dtype=self.dtype) + + @Appender((_round_doc + _round_example).format(op="round")) + def round(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) + + @Appender((_round_doc + _floor_example).format(op="floor")) + def floor(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + @Appender((_round_doc + _ceil_example).format(op="ceil")) + def ceil(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + # -------------------------------------------------------------- + # Reductions + + def any(self, *, axis: int | None = None, skipna: bool = True) -> bool: + # GH#34479 discussion of desired behavior long-term + return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + + def all(self, *, axis: int | None = None, skipna: bool = True) -> bool: + # GH#34479 discussion of desired behavior long-term + return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + + # -------------------------------------------------------------- + # Frequency Methods + + def _maybe_clear_freq(self) -> None: + self._freq = None + + def _with_freq(self, freq): + """ + Helper to get a view on the same data, with a new freq. + + Parameters + ---------- + freq : DateOffset, None, or "infer" + + Returns + ------- + Same type as self + """ + # GH#29843 + if freq is None: + # Always valid + pass + elif len(self) == 0 and isinstance(freq, BaseOffset): + # Always valid. In the TimedeltaArray case, we assume this + # is a Tick offset. + pass + else: + # As an internal method, we can ensure this assertion always holds + assert freq == "infer" + freq = to_offset(self.inferred_freq) + + arr = self.view() + arr._freq = freq + return arr + + # -------------------------------------------------------------- + + # GH#46910 - Keep old signature to test we don't break things for EA library authors + def factorize( # type:ignore[override] + self, + na_sentinel: int = -1, + sort: bool = False, + ): + if self.freq is not None: + # We must be unique, so can short-circuit (and retain freq) + codes = np.arange(len(self), dtype=np.intp) + uniques = self.copy() # TODO: copy or view? + if sort and self.freq.n < 0: + codes = codes[::-1] + uniques = uniques[::-1] + return codes, uniques + # FIXME: shouldn't get here; we are ignoring sort + return super().factorize(na_sentinel=na_sentinel) + + +# ------------------------------------------------------------------- +# Shared Constructor Helpers + + +def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str): + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.asarray(data) + copy = False + elif isinstance(data, ABCMultiIndex): + raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.") + else: + data = extract_array(data, extract_numpy=True) + + if isinstance(data, IntegerArray): + data = data.to_numpy("int64", na_value=iNaT) + copy = False + elif not isinstance(data, (np.ndarray, ExtensionArray)): + # GH#24539 e.g. xarray, dask object + data = np.asarray(data) + + elif isinstance(data, ABCCategorical): + # GH#18664 preserve tz in going DTI->Categorical->DTI + # TODO: cases where we need to do another pass through maybe_convert_dtype, + # e.g. the categories are timedelta64s + data = data.categories.take(data.codes, fill_value=NaT)._values + copy = False + + return data, copy + + +@overload +def validate_periods(periods: None) -> None: + ... + + +@overload +def validate_periods(periods: float) -> int: + ... + + +def validate_periods(periods: float | None) -> int | None: + """ + If a `periods` argument is passed to the Datetime/Timedelta Array/Index + constructor, cast it to an integer. + + Parameters + ---------- + periods : None, float, int + + Returns + ------- + periods : None or int + + Raises + ------ + TypeError + if periods is None, float, or int + """ + if periods is not None: + if lib.is_float(periods): + periods = int(periods) + elif not lib.is_integer(periods): + raise TypeError(f"periods must be a number, got {periods}") + # error: Incompatible return value type (got "Optional[float]", + # expected "Optional[int]") + return periods # type: ignore[return-value] + + +def validate_inferred_freq( + freq, inferred_freq, freq_infer +) -> tuple[BaseOffset | None, bool]: + """ + If the user passes a freq and another freq is inferred from passed data, + require that they match. + + Parameters + ---------- + freq : DateOffset or None + inferred_freq : DateOffset or None + freq_infer : bool + + Returns + ------- + freq : DateOffset or None + freq_infer : bool + + Notes + ----- + We assume at this point that `maybe_infer_freq` has been called, so + `freq` is either a DateOffset object or None. + """ + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError( + f"Inferred frequency {inferred_freq} from passed " + "values does not conform to passed frequency " + f"{freq.freqstr}" + ) + elif freq is None: + freq = inferred_freq + freq_infer = False + + return freq, freq_infer + + +def maybe_infer_freq(freq): + """ + Comparing a DateOffset to the string "infer" raises, so we need to + be careful about comparisons. Make a dummy variable `freq_infer` to + signify the case where the given freq is "infer" and set freq to None + to avoid comparison trouble later on. + + Parameters + ---------- + freq : {DateOffset, None, str} + + Returns + ------- + freq : {DateOffset, None} + freq_infer : bool + Whether we should inherit the freq of passed data. + """ + freq_infer = False + if not isinstance(freq, BaseOffset): + # if a passed freq is None, don't infer automatically + if freq != "infer": + freq = to_offset(freq) + else: + freq_infer = True + freq = None + return freq, freq_infer + + +def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype) -> str: + """ + Return the unit str corresponding to the dtype's resolution. + + Parameters + ---------- + dtype : DatetimeTZDtype or np.dtype + If np.dtype, we assume it is a datetime64 dtype. + + Returns + ------- + str + """ + if isinstance(dtype, DatetimeTZDtype): + return dtype.unit + return np.datetime_data(dtype)[0] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py new file mode 100644 index 00000000..f1ddba0c --- /dev/null +++ b/pandas/core/arrays/datetimes.py @@ -0,0 +1,2586 @@ +from __future__ import annotations + +from datetime import ( + datetime, + time, + timedelta, + tzinfo, +) +from typing import ( + TYPE_CHECKING, + Literal, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + lib, + tslib, +) +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Resolution, + Timestamp, + astype_overflowsafe, + fields, + get_resolution, + get_unit_from_dtype, + ints_to_pydatetime, + is_date_array_normalized, + is_supported_unit, + is_unitless, + normalize_i8_timestamps, + timezones, + to_offset, + tz_convert_from_utc, + tzconversion, +) +from pandas._typing import npt +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_inclusive + +from pandas.core.dtypes.astype import astype_dt64_to_dt64tz +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + INT64_DTYPE, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_object_dtype, + is_period_dtype, + is_sparse, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays._ranges import generate_regular_range +import pandas.core.common as com + +from pandas.tseries.frequencies import get_period_alias +from pandas.tseries.offsets import ( + BDay, + Day, + Tick, +) + +if TYPE_CHECKING: + + from pandas import DataFrame + from pandas.core.arrays import ( + PeriodArray, + TimedeltaArray, + ) + +_midnight = time(0, 0) + + +def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"): + """ + Return a datetime64[ns] dtype appropriate for the given timezone. + + Parameters + ---------- + tz : tzinfo or None + unit : str, default "ns" + + Returns + ------- + np.dtype or Datetime64TZDType + """ + if tz is None: + return np.dtype(f"M8[{unit}]") + else: + return DatetimeTZDtype(tz=tz, unit=unit) + + +def _field_accessor(name: str, field: str, docstring=None): + def f(self): + values = self._local_timestamps() + + if field in self._bool_ops: + result: np.ndarray + + if field.endswith(("start", "end")): + freq = self.freq + month_kw = 12 + if freq: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + + result = fields.get_start_end_field( + values, field, self.freqstr, month_kw, reso=self._reso + ) + else: + result = fields.get_date_field(values, field, reso=self._reso) + + # these return a boolean by-definition + return result + + if field in self._object_ops: + result = fields.get_date_name_field(values, field, reso=self._reso) + result = self._maybe_mask_results(result, fill_value=None) + + else: + result = fields.get_date_field(values, field, reso=self._reso) + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): + """ + Pandas ExtensionArray for tz-naive or tz-aware datetime data. + + .. warning:: + + DatetimeArray is currently experimental, and its API may change + without warning. In particular, :attr:`DatetimeArray.dtype` is + expected to change to always be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : Series, Index, DatetimeArray, ndarray + The datetime data. + + For DatetimeArray `values` (or a Series or Index boxing one), + `dtype` and `freq` will be extracted from `values`. + + dtype : numpy.dtype or DatetimeTZDtype + Note that the only NumPy dtype allowed is 'datetime64[ns]'. + freq : str or Offset, optional + The frequency. + copy : bool, default False + Whether to copy the underlying array of values. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "datetimearray" + _internal_fill_value = np.datetime64("NaT", "ns") + _recognized_scalars = (datetime, np.datetime64) + _is_recognized_dtype = is_datetime64_any_dtype + _infer_matches = ("datetime", "datetime64", "date") + + @property + def _scalar_type(self) -> type[Timestamp]: + return Timestamp + + # define my properties & methods for delegation + _bool_ops: list[str] = [ + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ] + _object_ops: list[str] = ["freq", "tz"] + _field_ops: list[str] = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "weekday", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "days_in_month", + "daysinmonth", + "microsecond", + "nanosecond", + ] + _other_ops: list[str] = ["date", "time", "timetz"] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods: list[str] = [ + "to_period", + "tz_localize", + "tz_convert", + "normalize", + "strftime", + "round", + "floor", + "ceil", + "month_name", + "day_name", + ] + + # ndim is inherited from ExtensionArray, must exist to ensure + # Timestamp.__richcmp__(DateTimeArray) operates pointwise + + # ensure that operations with numpy arrays defer to our implementation + __array_priority__ = 1000 + + # ----------------------------------------------------------------- + # Constructors + + _dtype: np.dtype | DatetimeTZDtype + _freq: BaseOffset | None = None + _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__ + + @classmethod + def _validate_dtype(cls, values, dtype): + # used in TimeLikeOps.__init__ + _validate_dt64_dtype(values.dtype) + dtype = _validate_dt64_dtype(dtype) + return dtype + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, + values: np.ndarray, + freq: BaseOffset | None = None, + dtype=DT64NS_DTYPE, + ) -> DatetimeArray: + assert isinstance(values, np.ndarray) + assert dtype.kind == "M" + if isinstance(dtype, np.dtype): + assert dtype == values.dtype + assert not is_unitless(dtype) + else: + # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC], + # then values.dtype should be M8[us]. + assert dtype._reso == get_unit_from_dtype(values.dtype) + + result = super()._simple_new(values, dtype) + result._freq = freq + return result + + @classmethod + def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_sequence_not_strict( + cls, + data, + dtype=None, + copy: bool = False, + tz=None, + freq: str | BaseOffset | lib.NoDefault | None = lib.no_default, + dayfirst: bool = False, + yearfirst: bool = False, + ambiguous="raise", + ): + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + freq, freq_infer = dtl.maybe_infer_freq(freq) + + subarr, tz, inferred_freq = _sequence_to_dt64ns( + data, + dtype=dtype, + copy=copy, + tz=tz, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + if explicit_none: + freq = None + + dtype = tz_to_dtype(tz) + result = cls._simple_new(subarr, freq=freq, dtype=dtype) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq, ambiguous=ambiguous) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range( + cls, + start, + end, + periods, + freq, + tz=None, + normalize=False, + ambiguous="raise", + nonexistent="raise", + inclusive="both", + ): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + freq = to_offset(freq) + + if start is not None: + start = Timestamp(start) + + if end is not None: + end = Timestamp(end) + + if start is NaT or end is NaT: + raise ValueError("Neither `start` nor `end` can be NaT") + + left_inclusive, right_inclusive = validate_inclusive(inclusive) + start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) + tz = _infer_tz_from_endpoints(start, end, tz) + + if tz is not None: + # Localize the start and end arguments + start_tz = None if start is None else start.tz + end_tz = None if end is None else end.tz + start = _maybe_localize_point( + start, start_tz, start, freq, tz, ambiguous, nonexistent + ) + end = _maybe_localize_point( + end, end_tz, end, freq, tz, ambiguous, nonexistent + ) + if freq is not None: + # We break Day arithmetic (fixed 24 hour) here and opt for + # Day to mean calendar day (23/24/25 hour). Therefore, strip + # tz info from start and day to avoid DST arithmetic + if isinstance(freq, Day): + if start is not None: + start = start.tz_localize(None) + if end is not None: + end = end.tz_localize(None) + + if isinstance(freq, Tick): + i8values = generate_regular_range(start, end, periods, freq) + else: + xdr = generate_range(start=start, end=end, periods=periods, offset=freq) + i8values = np.array([x.value for x in xdr], dtype=np.int64) + + endpoint_tz = start.tz if start is not None else end.tz + + if tz is not None and endpoint_tz is None: + + if not timezones.is_utc(tz): + # short-circuit tz_localize_to_utc which would make + # an unnecessary copy with UTC but be a no-op. + i8values = tzconversion.tz_localize_to_utc( + i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + + # i8values is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz, ambiguous, nonexistent) + if end is not None: + end = end.tz_localize(tz, ambiguous, nonexistent) + else: + # Create a linearly spaced date_range in local time + # Nanosecond-granularity timestamps aren't always correctly + # representable with doubles, so we limit the range that we + # pass to np.linspace as much as possible + i8values = ( + np.linspace(0, end.value - start.value, periods, dtype="int64") + + start.value + ) + if i8values.dtype != "i8": + # 2022-01-09 I (brock) am not sure if it is possible for this + # to overflow and cast to e.g. f8, but if it does we need to cast + i8values = i8values.astype("i8") + + if start == end: + if not left_inclusive and not right_inclusive: + i8values = i8values[1:-1] + else: + start_i8 = Timestamp(start).value + end_i8 = Timestamp(end).value + if not left_inclusive or not right_inclusive: + if not left_inclusive and len(i8values) and i8values[0] == start_i8: + i8values = i8values[1:] + if not right_inclusive and len(i8values) and i8values[-1] == end_i8: + i8values = i8values[:-1] + + dt64_values = i8values.view("datetime64[ns]") + dtype = tz_to_dtype(tz) + return cls._simple_new(dt64_values, freq=freq, dtype=dtype) + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timestamp.") + self._check_compatible_with(value, setitem=setitem) + return value.asm8 + + def _scalar_from_string(self, value) -> Timestamp | NaTType: + return Timestamp(value, tz=self.tz) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + self._assert_tzawareness_compat(other) + if setitem: + # Stricter check for setitem vs comparison methods + if self.tz is not None and not timezones.tz_compare(self.tz, other.tz): + # TODO(2.0): remove this check. GH#37605 + warnings.warn( + "Setitem-like behavior with mismatched timezones is deprecated " + "and will change in a future version. Instead of raising " + "(or for Index, Series, and DataFrame methods, coercing to " + "object dtype), the value being set (or passed as a " + "fill_value, or inserted) will be cast to the existing " + "DatetimeArray/DatetimeIndex/Series/DataFrame column's " + "timezone. To retain the old behavior, explicitly cast to " + "object dtype before the operation.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") + + # ----------------------------------------------------------------- + # Descriptive Properties + + def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: + # GH#42228 + value = x.view("i8") + ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz) + # Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if ts is not NaT: # type: ignore[comparison-overlap] + # GH#41586 + # do this instead of passing to the constructor to avoid FutureWarning + ts._set_freq(self.freq) + return ts + + @property + # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype" + # incompatible with return type "ExtensionDtype" in supertype + # "ExtensionArray" + def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override] + """ + The dtype for the DatetimeArray. + + .. warning:: + + A future version of pandas will change dtype to never be a + ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will + always be an instance of an ``ExtensionDtype`` subclass. + + Returns + ------- + numpy.dtype or DatetimeTZDtype + If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` + is returned. + + If the values are tz-aware, then the ``DatetimeTZDtype`` + is returned. + """ + return self._dtype + + @property + def tz(self) -> tzinfo | None: + """ + Return the timezone. + + Returns + ------- + datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None + Returns None when the array is tz-naive. + """ + # GH 18595 + return getattr(self.dtype, "tz", None) + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError( + "Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate" + ) + + @property + def tzinfo(self) -> tzinfo | None: + """ + Alias for tz attribute + """ + return self.tz + + @property # NB: override with cache_readonly in immutable subclasses + def is_normalized(self) -> bool: + """ + Returns True if all of the dates are at midnight ("no time") + """ + return is_date_array_normalized(self.asi8, self.tz, reso=self._reso) + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution_obj(self) -> Resolution: + return get_resolution(self.asi8, self.tz, reso=self._reso) + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def __array__(self, dtype=None) -> np.ndarray: + if dtype is None and self.tz: + # The default for tz-aware is object, to preserve tz info + dtype = object + + return super().__array__(dtype=dtype) + + def __iter__(self): + """ + Return an iterator over the boxed values + + Yields + ------ + tstamp : Timestamp + """ + if self.ndim > 1: + for i in range(len(self)): + yield self[i] + else: + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = (length // chunksize) + 1 + + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pydatetime( + data[start_i:end_i], + tz=self.tz, + freq=self.freq, + box="timestamp", + reso=self._reso, + ) + yield from converted + + def astype(self, dtype, copy: bool = True): + # We handle + # --> datetime + # --> period + # DatetimeLikeArrayMixin Super handles the rest. + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif ( + self.tz is None + and is_datetime64_dtype(dtype) + and not is_unitless(dtype) + and is_supported_unit(get_unit_from_dtype(dtype)) + ): + # unit conversion e.g. datetime64[s] + res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) + return type(self)._simple_new(res_values, dtype=res_values.dtype) + # TODO: preserve freq? + + elif is_datetime64_ns_dtype(dtype): + return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) + + elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): + # tzaware unit conversion e.g. datetime64[s, UTC] + np_dtype = np.dtype(dtype.str) + res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) + return type(self)._simple_new(res_values, dtype=dtype) + # TODO: preserve freq? + + elif ( + self.tz is None + and is_datetime64_dtype(dtype) + and dtype != self.dtype + and is_unitless(dtype) + ): + # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype + warnings.warn( + "Passing unit-less datetime64 dtype to .astype is deprecated " + "and will raise in a future version. Pass 'datetime64[ns]' instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + # unit conversion e.g. datetime64[s] + return self._ndarray.astype(dtype) + + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + + # ----------------------------------------------------------------- + # Rendering Methods + + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> npt.NDArray[np.object_]: + from pandas.io.formats.format import get_format_datetime64_from_values + + fmt = get_format_datetime64_from_values(self, date_format) + + return tslib.format_array_from_datetime( + self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._reso + ) + + # ----------------------------------------------------------------- + # Comparison Methods + + def _has_same_tz(self, other) -> bool: + + # vzone shouldn't be None if value is non-datetime like + if isinstance(other, np.datetime64): + # convert to Timestamp as np.datetime64 doesn't have tz attr + other = Timestamp(other) + + if not hasattr(other, "tzinfo"): + return False + other_tz = other.tzinfo + return timezones.tz_compare(self.tzinfo, other_tz) + + def _assert_tzawareness_compat(self, other) -> None: + # adapted from _Timestamp._assert_tzawareness_compat + other_tz = getattr(other, "tzinfo", None) + other_dtype = getattr(other, "dtype", None) + + if is_datetime64tz_dtype(other_dtype): + # Get tzinfo from Series dtype + other_tz = other.dtype.tz + if other is NaT: + # pd.NaT quacks both aware and naive + pass + elif self.tz is None: + if other_tz is not None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects." + ) + elif other_tz is None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects" + ) + + # ----------------------------------------------------------------- + # Arithmetic Methods + + def _add_offset(self, offset) -> DatetimeArray: + + assert not isinstance(offset, Tick) + + if self.tz is not None: + values = self.tz_localize(None) + else: + values = self + + try: + result = offset._apply_array(values).view(values.dtype) + except NotImplementedError: + warnings.warn( + "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + result = self.astype("O") + offset + result = type(self)._from_sequence(result) + if not len(self): + # GH#30336 _from_sequence won't be able to infer self.tz + return result.tz_localize(self.tz) + + else: + result = DatetimeArray._simple_new(result, dtype=result.dtype) + if self.tz is not None: + # FIXME: tz_localize with non-nano + result = result.tz_localize(self.tz) + + return result + + # ----------------------------------------------------------------- + # Timezone Conversion and Localization Methods + + def _local_timestamps(self) -> npt.NDArray[np.int64]: + """ + Convert to an i8 (unix-like nanosecond timestamp) representation + while keeping the local timezone and not using UTC. + This is used to calculate time-of-day information as if the timestamps + were timezone-naive. + """ + if self.tz is None or timezones.is_utc(self.tz): + # Avoid the copy that would be made in tzconversion + return self.asi8 + return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso) + + def tz_convert(self, tz) -> DatetimeArray: + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time. Corresponding timestamps would be converted + to this time zone of the Datetime Array/Index. A `tz` of None will + convert to UTC and remove the timezone information. + + Returns + ------- + Array or Index + + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + + See Also + -------- + DatetimeIndex.tz : A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + + Examples + -------- + With the `tz` parameter, we can change the DatetimeIndex + to other time zones: + + >>> dti = pd.date_range(start='2014-08-01 09:00', + ... freq='H', periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert('US/Central') + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='H') + + With the ``tz=None``, we can remove the timezone (after converting + to UTC if necessary): + + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', + ... periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert(None) + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='H') + """ + tz = timezones.maybe_get_tz(tz) + + if self.tz is None: + # tz naive, use tz_localize + raise TypeError( + "Cannot convert tz-naive timestamps, use tz_localize to localize" + ) + + # No conversion since timestamps are all UTC to begin with + dtype = tz_to_dtype(tz, unit=self._unit) + return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) + + @dtl.ravel_compat + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: + """ + Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object + and makes this time zone aware. It does not move the time to another + time zone. + + This method can also be used to do the inverse -- to create a time + zone unaware object from an aware object. To that end, pass `tz=None`. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone to convert timestamps to. Passing ``None`` will + remove the time zone information preserving local time. + ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + + See Also + -------- + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + + Examples + -------- + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + if self.tz is not None: + if tz is None: + new_dates = tz_convert_from_utc(self.asi8, self.tz) + else: + raise TypeError("Already tz-aware, use tz_convert to convert.") + else: + tz = timezones.maybe_get_tz(tz) + # Convert to UTC + + new_dates = tzconversion.tz_localize_to_utc( + self.asi8, + tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + reso=self._reso, + ) + new_dates = new_dates.view(f"M8[{self._unit}]") + dtype = tz_to_dtype(tz, unit=self._unit) + + freq = None + if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])): + # we can preserve freq + # TODO: Also for fixed-offsets + freq = self.freq + elif tz is None and self.tz is None: + # no-op + freq = self.freq + return self._simple_new(new_dates, dtype=dtype, freq=freq) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timestamp methods + + def to_pydatetime(self) -> npt.NDArray[np.object_]: + """ + Return an ndarray of datetime.datetime objects. + + Returns + ------- + datetimes : ndarray[object] + """ + return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._reso) + + def normalize(self) -> DatetimeArray: + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases, when the time does not matter. + Length is unaltered. The timezones are unaffected. + + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on Datetime Array/Index. + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same + name and index. DatetimeIndex will have the same name. + + See Also + -------- + floor : Floor the datetimes to the specified freq. + ceil : Ceil the datetimes to the specified freq. + round : Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H', + ... periods=3, tz='Asia/Calcutta') + >>> idx + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq='H') + >>> idx.normalize() + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + """ + new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso) + dt64_values = new_values.view(self._ndarray.dtype) + + dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) + dta = dta._with_freq("infer") + if self.tz is not None: + dta = dta.tz_localize(self.tz) + return dta + + def to_period(self, freq=None) -> PeriodArray: + """ + Cast to PeriodArray/Index at a particular frequency. + + Converts DatetimeArray/Index to PeriodArray/Index. + + Parameters + ---------- + freq : str or Offset, optional + One of pandas' :ref:`offset strings ` + or an Offset object. Will be inferred by default. + + Returns + ------- + PeriodArray/Index + + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + + See Also + -------- + PeriodIndex: Immutable ndarray holding ordinal values. + DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + + Examples + -------- + >>> df = pd.DataFrame({"y": [1, 2, 3]}, + ... index=pd.to_datetime(["2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00"])) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]') + + Infer the daily frequency + + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]') + """ + from pandas.core.arrays import PeriodArray + + if self.tz is not None: + warnings.warn( + "Converting to PeriodArray/Index representation " + "will drop timezone information.", + UserWarning, + stacklevel=find_stack_level(), + ) + + if freq is None: + freq = self.freqstr or self.inferred_freq + + if freq is None: + raise ValueError( + "You must pass a freq argument as current index has none." + ) + + res = get_period_alias(freq) + + # https://github.com/pandas-dev/pandas/issues/33358 + if res is None: + res = freq + + freq = res + + return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) + + def to_perioddelta(self, freq) -> TimedeltaArray: + """ + Calculate deltas between self values and self converted to Periods at a freq. + + Used for vectorized offsets. + + Parameters + ---------- + freq : Period frequency + + Returns + ------- + TimedeltaArray/Index + """ + # Deprecaation GH#34853 + warnings.warn( + "to_perioddelta is deprecated and will be removed in a " + "future version. " + "Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead.", + FutureWarning, + # stacklevel chosen to be correct for when called from DatetimeIndex + stacklevel=find_stack_level(), + ) + from pandas.core.arrays.timedeltas import TimedeltaArray + + if self._ndarray.dtype != "M8[ns]": + raise NotImplementedError("Only supported for nanosecond resolution.") + + i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 + m8delta = i8delta.view("m8[ns]") + return TimedeltaArray(m8delta) + + # ----------------------------------------------------------------- + # Properties - Vectorized Timestamp Properties/Methods + + def month_name(self, locale=None) -> npt.NDArray[np.object_]: + """ + Return the month names with specified locale. + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the month name. + Default is English locale. + + Returns + ------- + Series or Index + Series or Index of month names. + + Examples + -------- + >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3)) + >>> s + 0 2018-01-31 + 1 2018-02-28 + 2 2018-03-31 + dtype: datetime64[ns] + >>> s.dt.month_name() + 0 January + 1 February + 2 March + dtype: object + + >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) + >>> idx + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], + dtype='datetime64[ns]', freq='M') + >>> idx.month_name() + Index(['January', 'February', 'March'], dtype='object') + """ + values = self._local_timestamps() + + result = fields.get_date_name_field( + values, "month_name", locale=locale, reso=self._reso + ) + result = self._maybe_mask_results(result, fill_value=None) + return result + + def day_name(self, locale=None) -> npt.NDArray[np.object_]: + """ + Return the day names with specified locale. + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the day name. + Default is English locale. + + Returns + ------- + Series or Index + Series or Index of day names. + + Examples + -------- + >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3)) + >>> s + 0 2018-01-01 + 1 2018-01-02 + 2 2018-01-03 + dtype: datetime64[ns] + >>> s.dt.day_name() + 0 Monday + 1 Tuesday + 2 Wednesday + dtype: object + + >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) + >>> idx + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.day_name() + Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') + """ + values = self._local_timestamps() + + result = fields.get_date_name_field( + values, "day_name", locale=locale, reso=self._reso + ) + result = self._maybe_mask_results(result, fill_value=None) + return result + + @property + def time(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of :class:`datetime.time` objects. + + The time part of the Timestamps. + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + timestamps = self._local_timestamps() + + return ints_to_pydatetime(timestamps, box="time", reso=self._reso) + + @property + def timetz(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of :class:`datetime.time` objects with timezones. + + The time part of the Timestamps. + """ + return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._reso) + + @property + def date(self) -> npt.NDArray[np.object_]: + """ + Returns numpy array of python :class:`datetime.date` objects. + + Namely, the date part of Timestamps without time and + timezone information. + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + timestamps = self._local_timestamps() + + return ints_to_pydatetime(timestamps, box="date", reso=self._reso) + + def isocalendar(self) -> DataFrame: + """ + Calculate year, week, and day according to the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + With columns year, week and day. + + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + + Examples + -------- + >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + >>> idx.isocalendar() + year week day + 2019-12-29 2019 52 7 + 2019-12-30 2020 1 1 + 2019-12-31 2020 1 2 + 2020-01-01 2020 1 3 + >>> idx.isocalendar().week + 2019-12-29 52 + 2019-12-30 1 + 2019-12-31 1 + 2020-01-01 1 + Freq: D, Name: week, dtype: UInt32 + """ + from pandas import DataFrame + + values = self._local_timestamps() + sarray = fields.build_isocalendar_sarray(values, reso=self._reso) + iso_calendar_df = DataFrame( + sarray, columns=["year", "week", "day"], dtype="UInt32" + ) + if self._hasna: + iso_calendar_df.iloc[self._isnan] = None + return iso_calendar_df + + @property + def weekofyear(self): + """ + The week ordinal of the year. + + .. deprecated:: 1.1.0 + + weekofyear and week have been deprecated. + Please use DatetimeIndex.isocalendar().week instead. + """ + warnings.warn( + "weekofyear and week have been deprecated, please use " + "DatetimeIndex.isocalendar().week instead, which returns " + "a Series. To exactly reproduce the behavior of week and " + "weekofyear and return an Index, you may call " + "pd.Int64Index(idx.isocalendar().week)", + FutureWarning, + stacklevel=find_stack_level(), + ) + week_series = self.isocalendar().week + if week_series.hasnans: + return week_series.to_numpy(dtype="float64", na_value=np.nan) + return week_series.to_numpy(dtype="int64") + + week = weekofyear + + year = _field_accessor( + "year", + "Y", + """ + The year of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="Y") + ... ) + >>> datetime_series + 0 2000-12-31 + 1 2001-12-31 + 2 2002-12-31 + dtype: datetime64[ns] + >>> datetime_series.dt.year + 0 2000 + 1 2001 + 2 2002 + dtype: int64 + """, + ) + month = _field_accessor( + "month", + "M", + """ + The month as January=1, December=12. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="M") + ... ) + >>> datetime_series + 0 2000-01-31 + 1 2000-02-29 + 2 2000-03-31 + dtype: datetime64[ns] + >>> datetime_series.dt.month + 0 1 + 1 2 + 2 3 + dtype: int64 + """, + ) + day = _field_accessor( + "day", + "D", + """ + The day of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="D") + ... ) + >>> datetime_series + 0 2000-01-01 + 1 2000-01-02 + 2 2000-01-03 + dtype: datetime64[ns] + >>> datetime_series.dt.day + 0 1 + 1 2 + 2 3 + dtype: int64 + """, + ) + hour = _field_accessor( + "hour", + "h", + """ + The hours of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="h") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: datetime64[ns] + >>> datetime_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + minute = _field_accessor( + "minute", + "m", + """ + The minutes of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="T") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:01:00 + 2 2000-01-01 00:02:00 + dtype: datetime64[ns] + >>> datetime_series.dt.minute + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + second = _field_accessor( + "second", + "s", + """ + The seconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="s") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: datetime64[ns] + >>> datetime_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + microsecond = _field_accessor( + "microsecond", + "us", + """ + The microseconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="us") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00.000000 + 1 2000-01-01 00:00:00.000001 + 2 2000-01-01 00:00:00.000002 + dtype: datetime64[ns] + >>> datetime_series.dt.microsecond + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + nanosecond = _field_accessor( + "nanosecond", + "ns", + """ + The nanoseconds of the datetime. + + Examples + -------- + >>> datetime_series = pd.Series( + ... pd.date_range("2000-01-01", periods=3, freq="ns") + ... ) + >>> datetime_series + 0 2000-01-01 00:00:00.000000000 + 1 2000-01-01 00:00:00.000000001 + 2 2000-01-01 00:00:00.000000002 + dtype: datetime64[ns] + >>> datetime_series.dt.nanosecond + 0 0 + 1 1 + 2 2 + dtype: int64 + """, + ) + _dayofweek_doc = """ + The day of the week with Monday=0, Sunday=6. + + Return the day of the week. It is assumed the week starts on + Monday, which is denoted by 0 and ends on Sunday which is denoted + by 6. This method is available on both Series with datetime + values (using the `dt` accessor) or DatetimeIndex. + + Returns + ------- + Series or Index + Containing integers indicating the day number. + + See Also + -------- + Series.dt.dayofweek : Alias. + Series.dt.weekday : Alias. + Series.dt.day_name : Returns the name of the day of the week. + + Examples + -------- + >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() + >>> s.dt.dayofweek + 2016-12-31 5 + 2017-01-01 6 + 2017-01-02 0 + 2017-01-03 1 + 2017-01-04 2 + 2017-01-05 3 + 2017-01-06 4 + 2017-01-07 5 + 2017-01-08 6 + Freq: D, dtype: int64 + """ + day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) + dayofweek = day_of_week + weekday = day_of_week + + day_of_year = _field_accessor( + "dayofyear", + "doy", + """ + The ordinal day of the year. + """, + ) + dayofyear = day_of_year + quarter = _field_accessor( + "quarter", + "q", + """ + The quarter of the date. + """, + ) + days_in_month = _field_accessor( + "days_in_month", + "dim", + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + _is_month_doc = """ + Indicates whether the date is the {first_or_last} day of the month. + + Returns + ------- + Series or array + For Series, returns a Series with boolean values. + For DatetimeIndex, returns a boolean array. + + See Also + -------- + is_month_start : Return a boolean indicating whether the date + is the first day of the month. + is_month_end : Return a boolean indicating whether the date + is the last day of the month. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> s = pd.Series(pd.date_range("2018-02-27", periods=3)) + >>> s + 0 2018-02-27 + 1 2018-02-28 + 2 2018-03-01 + dtype: datetime64[ns] + >>> s.dt.is_month_start + 0 False + 1 False + 2 True + dtype: bool + >>> s.dt.is_month_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2018-02-27", periods=3) + >>> idx.is_month_start + array([False, False, True]) + >>> idx.is_month_end + array([False, True, False]) + """ + is_month_start = _field_accessor( + "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first") + ) + + is_month_end = _field_accessor( + "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last") + ) + + is_quarter_start = _field_accessor( + "is_quarter_start", + "is_quarter_start", + """ + Indicator for whether the date is the first day of a quarter. + + Returns + ------- + is_quarter_start : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_end : Similar property for indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_start=df.dates.dt.is_quarter_start) + dates quarter is_quarter_start + 0 2017-03-30 1 False + 1 2017-03-31 1 False + 2 2017-04-01 2 True + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_start + array([False, False, True, False]) + """, + ) + is_quarter_end = _field_accessor( + "is_quarter_end", + "is_quarter_end", + """ + Indicator for whether the date is the last day of a quarter. + + Returns + ------- + is_quarter_end : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_start : Similar property indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_end=df.dates.dt.is_quarter_end) + dates quarter is_quarter_end + 0 2017-03-30 1 False + 1 2017-03-31 1 True + 2 2017-04-01 2 False + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_end + array([False, True, False, False]) + """, + ) + is_year_start = _field_accessor( + "is_year_start", + "is_year_start", + """ + Indicate whether the date is the first day of a year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_end : Similar property indicating the last day of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_start + 0 False + 1 False + 2 True + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_start + array([False, False, True]) + """, + ) + is_year_end = _field_accessor( + "is_year_end", + "is_year_end", + """ + Indicate whether the date is the last day of the year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_start : Similar property indicating the start of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_end + array([False, True, False]) + """, + ) + is_leap_year = _field_accessor( + "is_leap_year", + "is_leap_year", + """ + Boolean indicator if the date belongs to a leap year. + + A leap year is a year, which has 366 days (instead of 365) including + 29th of February as an intercalary day. + Leap years are years which are multiples of four with the exception + of years divisible by 100 but not by 400. + + Returns + ------- + Series or ndarray + Booleans indicating if dates belong to a leap year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y") + >>> idx + DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], + dtype='datetime64[ns]', freq='A-DEC') + >>> idx.is_leap_year + array([ True, False, False]) + + >>> dates_series = pd.Series(idx) + >>> dates_series + 0 2012-12-31 + 1 2013-12-31 + 2 2014-12-31 + dtype: datetime64[ns] + >>> dates_series.dt.is_leap_year + 0 True + 1 False + 2 False + dtype: bool + """, + ) + + def to_julian_date(self) -> npt.NDArray[np.float64]: + """ + Convert Datetime Array to float64 ndarray of Julian Dates. + 0 Julian date is noon January 1, 4713 BC. + https://en.wikipedia.org/wiki/Julian_day + """ + + # http://mysite.verizon.net/aesir_research/date/jdalg2.htm + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) + testarr = month < 3 + year[testarr] -= 1 + month[testarr] += 12 + return ( + day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) + - np.floor(year / 100) + + np.floor(year / 400) + + 1_721_118.5 + + ( + self.hour + + self.minute / 60 + + self.second / 3600 + + self.microsecond / 3600 / 10**6 + + self.nanosecond / 3600 / 10**9 + ) + / 24 + ) + + # ----------------------------------------------------------------- + # Reductions + + def std( + self, + axis=None, + dtype=None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + """ + Return sample standard deviation over requested axis. + + Normalized by N-1 by default. This can be changed using the ddof argument + + Parameters + ---------- + axis : int optional, default None + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to `None`. + ddof : int, default 1 + Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result will be + NA. + + Returns + ------- + Timedelta + """ + # Because std is translation-invariant, we can get self.std + # by calculating (self - Timestamp(0)).std, and we can do it + # without creating a copy by using a view on self._ndarray + from pandas.core.arrays import TimedeltaArray + + # Find the td64 dtype with the same resolution as our dt64 dtype + dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64") + dtype = np.dtype(dtype_str) + + tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype) + + return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: + """ + Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. + """ + result, tz, freq = _sequence_to_dt64ns( + data, + allow_mixed=True, + require_iso8601=require_iso8601, + ) + + dtype = tz_to_dtype(tz) + dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) + return dta + + +def _sequence_to_dt64ns( + data, + dtype=None, + copy: bool = False, + tz=None, + dayfirst: bool = False, + yearfirst: bool = False, + ambiguous="raise", + *, + allow_mixed: bool = False, + require_iso8601: bool = False, +): + """ + Parameters + ---------- + data : list-like + dtype : dtype, str, or None, default None + copy : bool, default False + tz : tzinfo, str, or None, default None + dayfirst : bool, default False + yearfirst : bool, default False + ambiguous : str, bool, or arraylike, default 'raise' + See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. + require_iso8601 : bool, default False + Only consider ISO-8601 formats when parsing strings. + + Returns + ------- + result : numpy.ndarray + The sequence converted to a numpy array with dtype ``datetime64[ns]``. + tz : tzinfo or None + Either the user-provided tzinfo or one inferred from the data. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + TypeError : PeriodDType data is passed + """ + + inferred_freq = None + + dtype = _validate_dt64_dtype(dtype) + tz = timezones.maybe_get_tz(tz) + + # if dtype has an embedded tz, capture it + tz = validate_tz_from_dtype(dtype, tz) + + data, copy = dtl.ensure_arraylike_for_datetimelike( + data, copy, cls_name="DatetimeArray" + ) + + if isinstance(data, DatetimeArray): + inferred_freq = data.freq + + # By this point we are assured to have either a numpy array or Index + data, copy = maybe_convert_dtype(data, copy, tz=tz) + data_dtype = getattr(data, "dtype", None) + + if ( + is_object_dtype(data_dtype) + or is_string_dtype(data_dtype) + or is_sparse(data_dtype) + ): + # TODO: We do not have tests specific to string-dtypes, + # also complex or categorical or other extension + copy = False + if lib.infer_dtype(data, skipna=False) == "integer": + data = data.astype(np.int64) + else: + # data comes back here as either i8 to denote UTC timestamps + # or M8[ns] to denote wall times + data, inferred_tz = objects_to_datetime64ns( + data, + dayfirst=dayfirst, + yearfirst=yearfirst, + allow_object=False, + allow_mixed=allow_mixed, + require_iso8601=require_iso8601, + ) + if tz and inferred_tz: + # two timezones: convert to intended from base UTC repr + if data.dtype == "i8": + # GH#42505 + # by convention, these are _already_ UTC, e.g + return data.view(DT64NS_DTYPE), tz, None + + if timezones.is_utc(tz): + # Fastpath, avoid copy made in tzconversion + utc_vals = data.view("i8") + else: + utc_vals = tz_convert_from_utc(data.view("i8"), tz) + data = utc_vals.view(DT64NS_DTYPE) + elif inferred_tz: + tz = inferred_tz + + data_dtype = data.dtype + + # `data` may have originally been a Categorical[datetime64[ns, tz]], + # so we need to handle these types. + if is_datetime64tz_dtype(data_dtype): + # DatetimeArray -> ndarray + tz = _maybe_infer_tz(tz, data.tz) + result = data._ndarray + + elif is_datetime64_dtype(data_dtype): + # tz-naive DatetimeArray or ndarray[datetime64] + data = getattr(data, "_ndarray", data) + if data.dtype != DT64NS_DTYPE: + data = astype_overflowsafe(data, dtype=DT64NS_DTYPE) + copy = False + + if tz is not None: + # Convert tz-naive to UTC + tz = timezones.maybe_get_tz(tz) + # TODO: if tz is UTC, are there situations where we *don't* want a + # copy? tz_localize_to_utc always makes one. + data = tzconversion.tz_localize_to_utc( + data.view("i8"), tz, ambiguous=ambiguous + ) + data = data.view(DT64NS_DTYPE) + + assert data.dtype == DT64NS_DTYPE, data.dtype + result = data + + else: + # must be integer dtype otherwise + # assume this data are epoch timestamps + if tz: + tz = timezones.maybe_get_tz(tz) + + if data.dtype != INT64_DTYPE: + data = data.astype(np.int64, copy=False) + result = data.view(DT64NS_DTYPE) + + if copy: + result = result.copy() + + assert isinstance(result, np.ndarray), type(result) + assert result.dtype == "M8[ns]", result.dtype + + # We have to call this again after possibly inferring a tz above + validate_tz_from_dtype(dtype, tz) + + return result, tz, inferred_freq + + +def objects_to_datetime64ns( + data: np.ndarray, + dayfirst, + yearfirst, + utc=False, + errors="raise", + require_iso8601: bool = False, + allow_object: bool = False, + allow_mixed: bool = False, +): + """ + Convert data to array of timestamps. + + Parameters + ---------- + data : np.ndarray[object] + dayfirst : bool + yearfirst : bool + utc : bool, default False + Whether to convert timezone-aware timestamps to UTC. + errors : {'raise', 'ignore', 'coerce'} + require_iso8601 : bool, default False + allow_object : bool + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. + + Returns + ------- + result : ndarray + np.int64 dtype if returned values represent UTC timestamps + np.datetime64[ns] if returned values represent wall times + object if mixed timezones + inferred_tz : tzinfo or None + + Raises + ------ + ValueError : if data cannot be converted to datetimes + """ + assert errors in ["raise", "ignore", "coerce"] + + # if str-dtype, convert + data = np.array(data, copy=False, dtype=np.object_) + + flags = data.flags + order: Literal["F", "C"] = "F" if flags.f_contiguous else "C" + try: + result, tz_parsed = tslib.array_to_datetime( + data.ravel("K"), + errors=errors, + utc=utc, + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601, + allow_mixed=allow_mixed, + ) + result = result.reshape(data.shape, order=order) + except OverflowError as err: + # Exception is raised when a part of date is greater than 32 bit signed int + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + # Return i8 values to denote unix timestamps + return result.view("i8"), tz_parsed + elif is_datetime64_dtype(result): + # returning M8[ns] denotes wall-times; since tz is None + # the distinction is a thin one + return result, tz_parsed + elif is_object_dtype(result): + # GH#23675 when called via `pd.to_datetime`, returning an object-dtype + # array is allowed. When called via `pd.DatetimeIndex`, we can + # only accept datetime64 dtype, so raise TypeError if object-dtype + # is returned, as that indicates the values can be recognized as + # datetimes but they have conflicting timezones/awareness + if allow_object: + return result, tz_parsed + raise TypeError(result) + else: # pragma: no cover + # GH#23675 this TypeError should never be hit, whereas the TypeError + # in the object-dtype branch above is reachable. + raise TypeError(result) + + +def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + tz : tzinfo or None, default None + + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + + Raises + ------ + TypeError : PeriodDType data is passed + """ + if not hasattr(data, "dtype"): + # e.g. collections.deque + return data, copy + + if is_float_dtype(data.dtype): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(DT64NS_DTYPE) + copy = False + if ( + tz is not None + and len(data) > 0 + and not timezones.is_utc(timezones.maybe_get_tz(tz)) + ): + # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes + warnings.warn( + "The behavior of DatetimeArray._from_sequence with a timezone-aware " + "dtype and floating-dtype data is deprecated. In a future version, " + "this data will be interpreted as nanosecond UTC timestamps " + "instead of wall-times, matching the behavior with integer dtypes. " + "To retain the old behavior, explicitly cast to 'datetime64[ns]' " + "before passing the data to pandas. To get the future behavior, " + "first cast to 'int64'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): + # GH#29794 enforcing deprecation introduced in GH#23539 + raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") + elif is_period_dtype(data.dtype): + # Note: without explicitly raising here, PeriodIndex + # test_setops.test_join_does_not_recur fails + raise TypeError( + "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" + ) + + elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype): + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + +# ------------------------------------------------------------------- +# Validation and Inference + + +def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None: + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError( + f"data is already tz-aware {inferred_tz}, unable to " + f"set specified tz: {tz}" + ) + return tz + + +def _validate_dt64_dtype(dtype): + """ + Check that a dtype, if passed, represents either a numpy datetime64[ns] + dtype or a pandas DatetimeTZDtype. + + Parameters + ---------- + dtype : object + + Returns + ------- + dtype : None, numpy.dtype, or DatetimeTZDtype + + Raises + ------ + ValueError : invalid dtype + + Notes + ----- + Unlike validate_tz_from_dtype, this does _not_ allow non-existent + tz errors to go through + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("M8")): + # no precision, disallowed GH#24806 + msg = ( + "Passing in 'datetime64' dtype with no precision is not allowed. " + "Please pass in 'datetime64[ns]' instead." + ) + raise ValueError(msg) + + if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance( + dtype, (np.dtype, DatetimeTZDtype) + ): + raise ValueError( + f"Unexpected value for 'dtype': '{dtype}'. " + "Must be 'datetime64[ns]' or DatetimeTZDtype'." + ) + + if getattr(dtype, "tz", None): + # https://github.com/pandas-dev/pandas/issues/18595 + # Ensure that we have a standard timezone for pytz objects. + # Without this, things like adding an array of timedeltas and + # a tz-aware Timestamp (with a tz specific to its datetime) will + # be incorrect(ish?) for the array as a whole + dtype = cast(DatetimeTZDtype, dtype) + dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) + + return dtype + + +def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None: + """ + If the given dtype is a DatetimeTZDtype, extract the implied + tzinfo object from it and check that it does not conflict with the given + tz. + + Parameters + ---------- + dtype : dtype, str + tz : None, tzinfo + + Returns + ------- + tz : consensus tzinfo + + Raises + ------ + ValueError : on tzinfo mismatch + """ + if dtype is not None: + if isinstance(dtype, str): + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + # Things like `datetime64[ns]`, which is OK for the + # constructors, but also nonsense, which should be validated + # but not by us. We *do* allow non-existent tz errors to + # go through + pass + dtz = getattr(dtype, "tz", None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype with a tz") + tz = dtz + + if tz is not None and is_datetime64_dtype(dtype): + # We also need to check for the case where the user passed a + # tz-naive dtype (i.e. datetime64[ns]) + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError( + "cannot supply both a tz and a " + "timezone-naive dtype (i.e. datetime64[ns])" + ) + + return tz + + +def _infer_tz_from_endpoints( + start: Timestamp, end: Timestamp, tz: tzinfo | None +) -> tzinfo | None: + """ + If a timezone is not explicitly given via `tz`, see if one can + be inferred from the `start` and `end` endpoints. If more than one + of these inputs provides a timezone, require that they all agree. + + Parameters + ---------- + start : Timestamp + end : Timestamp + tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if start and end timezones do not agree + """ + try: + inferred_tz = timezones.infer_tzinfo(start, end) + except AssertionError as err: + # infer_tzinfo raises AssertionError if passed mismatched timezones + raise TypeError( + "Start and end cannot both be tz-aware with different timezones" + ) from err + + inferred_tz = timezones.maybe_get_tz(inferred_tz) + tz = timezones.maybe_get_tz(tz) + + if tz is not None and inferred_tz is not None: + if not timezones.tz_compare(inferred_tz, tz): + raise AssertionError("Inferred time zone not equal to passed time zone") + + elif inferred_tz is not None: + tz = inferred_tz + + return tz + + +def _maybe_normalize_endpoints( + start: Timestamp | None, end: Timestamp | None, normalize: bool +): + _normalized = True + + if start is not None: + if normalize: + start = start.normalize() + _normalized = True + else: + _normalized = _normalized and start.time() == _midnight + + if end is not None: + if normalize: + end = end.normalize() + _normalized = True + else: + _normalized = _normalized and end.time() == _midnight + + return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : argument that should be None + is_not_none : argument that should not be None + freq : Tick, DateOffset, or None + tz : str, timezone object or None + ambiguous: str, localization behavior for ambiguous times + nonexistent: str, localization behavior for nonexistent times + + Returns + ------- + ts : Timestamp + """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if is_none is None and is_not_none is not None: + # Note: We can't ambiguous='infer' a singular ambiguous time; however, + # we have historically defaulted ambiguous=False + ambiguous = ambiguous if ambiguous != "infer" else False + localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None} + if isinstance(freq, Tick) or freq is None: + localize_args["tz"] = tz + ts = ts.tz_localize(**localize_args) + return ts + + +def generate_range(start=None, end=None, periods=None, offset=BDay()): + """ + Generates a sequence of dates corresponding to the specified time + offset. Similar to dateutil.rrule except uses pandas DateOffset + objects to represent time increments. + + Parameters + ---------- + start : datetime, (default None) + end : datetime, (default None) + periods : int, (default None) + offset : DateOffset, (default BDay()) + + Notes + ----- + * This method is faster for generating weekdays than dateutil.rrule + * At least two of (start, end, periods) must be specified. + * If both start and end are specified, the returned dates will + satisfy start <= date <= end. + + Returns + ------- + dates : generator object + """ + offset = to_offset(offset) + + start = Timestamp(start) + start = start if start is not NaT else None + end = Timestamp(end) + end = end if end is not NaT else None + + if start and not offset.is_on_offset(start): + start = offset.rollforward(start) + + elif end and not offset.is_on_offset(end): + end = offset.rollback(end) + + if periods is None and end < start and offset.n >= 0: + end = None + periods = 0 + + if end is None: + end = start + (periods - 1) * offset + + if start is None: + start = end - (periods - 1) * offset + + cur = start + if offset.n >= 0: + while cur <= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset._apply(cur) + if next_date <= cur: + raise ValueError(f"Offset {offset} did not increment date") + cur = next_date + else: + while cur >= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset._apply(cur) + if next_date >= cur: + raise ValueError(f"Offset {offset} did not decrement date") + cur = next_date diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py new file mode 100644 index 00000000..0c14fac5 --- /dev/null +++ b/pandas/core/arrays/floating.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import numpy as np + +from pandas.core.dtypes.common import is_float_dtype +from pandas.core.dtypes.dtypes import register_extension_dtype + +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) + + +class FloatingDtype(NumericDtype): + """ + An ExtensionDtype to hold a single size of floating dtype. + + These specific implementations are subclasses of the non-public + FloatingDtype. For example we have Float32Dtype to represent float32. + + The attributes name & type are set when these subclasses are created. + """ + + _default_np_dtype = np.dtype(np.float64) + _checker = is_float_dtype + + @classmethod + def construct_array_type(cls) -> type[FloatingArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return FloatingArray + + @classmethod + def _str_to_dtype_mapping(cls): + return FLOAT_STR_TO_DTYPE + + @classmethod + def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: + """ + Safely cast the values to the given dtype. + + "safe" in this context means the casting is lossless. + """ + # This is really only here for compatibility with IntegerDtype + # Here for compat with IntegerDtype + return values.astype(dtype, copy=copy) + + +class FloatingArray(NumericArray): + """ + Array of floating (optional missing) values. + + .. versionadded:: 1.2.0 + + .. warning:: + + FloatingArray is currently experimental, and its API or internal + implementation may change without warning. Especially the behaviour + regarding NaN (distinct from NA missing values) is subject to change. + + We represent a FloatingArray with 2 numpy arrays: + + - data: contains a numpy float array of the appropriate dtype + - mask: a boolean array holding a mask on the data, True is missing + + To construct an FloatingArray from generic array-like input, use + :func:`pandas.array` with one of the float dtypes (see examples). + + See :ref:`integer_na` for more. + + Parameters + ---------- + values : numpy.ndarray + A 1-d float-dtype array. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values. + copy : bool, default False + Whether to copy the `values` and `mask`. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + FloatingArray + + Examples + -------- + Create an FloatingArray with :func:`pandas.array`: + + >>> pd.array([0.1, None, 0.3], dtype=pd.Float32Dtype()) + + [0.1, , 0.3] + Length: 3, dtype: Float32 + + String aliases for the dtypes are also available. They are capitalized. + + >>> pd.array([0.1, None, 0.3], dtype="Float32") + + [0.1, , 0.3] + Length: 3, dtype: Float32 + """ + + _dtype_cls = FloatingDtype + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = np.nan + # Fill values used for any/all + _truthy_value = 1.0 + _falsey_value = 0.0 + + +_dtype_docstring = """ +An ExtensionDtype for {dtype} data. + +This dtype uses ``pd.NA`` as missing value indicator. + +Attributes +---------- +None + +Methods +------- +None +""" + +# create the Dtype + + +@register_extension_dtype +class Float32Dtype(FloatingDtype): + type = np.float32 + name = "Float32" + __doc__ = _dtype_docstring.format(dtype="float32") + + +@register_extension_dtype +class Float64Dtype(FloatingDtype): + type = np.float64 + name = "Float64" + __doc__ = _dtype_docstring.format(dtype="float64") + + +FLOAT_STR_TO_DTYPE = { + "float32": Float32Dtype(), + "float64": Float64Dtype(), +} diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py new file mode 100644 index 00000000..24e5fa1b --- /dev/null +++ b/pandas/core/arrays/integer.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import numpy as np + +from pandas.core.dtypes.base import register_extension_dtype +from pandas.core.dtypes.common import is_integer_dtype + +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) + + +class IntegerDtype(NumericDtype): + """ + An ExtensionDtype to hold a single size & kind of integer dtype. + + These specific implementations are subclasses of the non-public + IntegerDtype. For example we have Int8Dtype to represent signed int 8s. + + The attributes name & type are set when these subclasses are created. + """ + + _default_np_dtype = np.dtype(np.int64) + _checker = is_integer_dtype + + @classmethod + def construct_array_type(cls) -> type[IntegerArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return IntegerArray + + @classmethod + def _str_to_dtype_mapping(cls): + return INT_STR_TO_DTYPE + + @classmethod + def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: + """ + Safely cast the values to the given dtype. + + "safe" in this context means the casting is lossless. e.g. if 'values' + has a floating dtype, each value must be an integer. + """ + try: + return values.astype(dtype, casting="safe", copy=copy) + except TypeError as err: + casted = values.astype(dtype, copy=copy) + if (casted == values).all(): + return casted + + raise TypeError( + f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}" + ) from err + + +class IntegerArray(NumericArray): + """ + Array of integer (optional missing) values. + + .. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as the missing value rather + than :attr:`numpy.nan`. + + .. warning:: + + IntegerArray is currently experimental, and its API or internal + implementation may change without warning. + + We represent an IntegerArray with 2 numpy arrays: + + - data: contains a numpy integer array of the appropriate dtype + - mask: a boolean array holding a mask on the data, True is missing + + To construct an IntegerArray from generic array-like input, use + :func:`pandas.array` with one of the integer dtypes (see examples). + + See :ref:`integer_na` for more. + + Parameters + ---------- + values : numpy.ndarray + A 1-d integer-dtype array. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values. + copy : bool, default False + Whether to copy the `values` and `mask`. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + IntegerArray + + Examples + -------- + Create an IntegerArray with :func:`pandas.array`. + + >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype()) + >>> int_array + + [1, , 3] + Length: 3, dtype: Int32 + + String aliases for the dtypes are also available. They are capitalized. + + >>> pd.array([1, None, 3], dtype='Int32') + + [1, , 3] + Length: 3, dtype: Int32 + + >>> pd.array([1, None, 3], dtype='UInt16') + + [1, , 3] + Length: 3, dtype: UInt16 + """ + + _dtype_cls = IntegerDtype + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = 1 + # Fill values used for any/all + _truthy_value = 1 + _falsey_value = 0 + + +_dtype_docstring = """ +An ExtensionDtype for {dtype} integer data. + +.. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as its missing value, + rather than :attr:`numpy.nan`. + +Attributes +---------- +None + +Methods +------- +None +""" + +# create the Dtype + + +@register_extension_dtype +class Int8Dtype(IntegerDtype): + type = np.int8 + name = "Int8" + __doc__ = _dtype_docstring.format(dtype="int8") + + +@register_extension_dtype +class Int16Dtype(IntegerDtype): + type = np.int16 + name = "Int16" + __doc__ = _dtype_docstring.format(dtype="int16") + + +@register_extension_dtype +class Int32Dtype(IntegerDtype): + type = np.int32 + name = "Int32" + __doc__ = _dtype_docstring.format(dtype="int32") + + +@register_extension_dtype +class Int64Dtype(IntegerDtype): + type = np.int64 + name = "Int64" + __doc__ = _dtype_docstring.format(dtype="int64") + + +@register_extension_dtype +class UInt8Dtype(IntegerDtype): + type = np.uint8 + name = "UInt8" + __doc__ = _dtype_docstring.format(dtype="uint8") + + +@register_extension_dtype +class UInt16Dtype(IntegerDtype): + type = np.uint16 + name = "UInt16" + __doc__ = _dtype_docstring.format(dtype="uint16") + + +@register_extension_dtype +class UInt32Dtype(IntegerDtype): + type = np.uint32 + name = "UInt32" + __doc__ = _dtype_docstring.format(dtype="uint32") + + +@register_extension_dtype +class UInt64Dtype(IntegerDtype): + type = np.uint64 + name = "UInt64" + __doc__ = _dtype_docstring.format(dtype="uint64") + + +INT_STR_TO_DTYPE: dict[str, IntegerDtype] = { + "int8": Int8Dtype(), + "int16": Int16Dtype(), + "int32": Int32Dtype(), + "int64": Int64Dtype(), + "uint8": UInt8Dtype(), + "uint16": UInt16Dtype(), + "uint32": UInt32Dtype(), + "uint64": UInt64Dtype(), +} diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py new file mode 100644 index 00000000..ea5c6d52 --- /dev/null +++ b/pandas/core/arrays/interval.py @@ -0,0 +1,1748 @@ +from __future__ import annotations + +import operator +from operator import ( + le, + lt, +) +import textwrap +from typing import ( + TYPE_CHECKING, + Literal, + Sequence, + TypeVar, + Union, + cast, + overload, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +from pandas._libs.interval import ( + VALID_CLOSED, + Interval, + IntervalMixin, + intervals_to_interval_bounds, +) +from pandas._libs.missing import NA +from pandas._typing import ( + ArrayLike, + Dtype, + IntervalClosedType, + NpDtype, + PositionalIndexer, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import IntCastingNaNError +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, +) + +from pandas.core.dtypes.cast import LossySetitemError +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCIntervalIndex, + ABCPeriodIndex, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core.algorithms import ( + isin, + take, + unique, + value_counts, +) +from pandas.core.arrays.base import ( + ExtensionArray, + _extension_array_shared_docs, +) +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import check_array_indexer +from pandas.core.indexes.base import ensure_index +from pandas.core.ops import ( + invalid_comparison, + unpack_zerodim_and_defer, +) + +if TYPE_CHECKING: + from pandas import ( + Index, + Series, + ) + + +IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") +IntervalOrNA = Union[Interval, float] + +_interval_shared_docs: dict[str, str] = {} + +_shared_docs_kwargs = { + "klass": "IntervalArray", + "qualname": "arrays.IntervalArray", + "name": "", +} + + +_interval_shared_docs[ + "class" +] = """ +%(summary)s + +.. versionadded:: %(versionadded)s + +Parameters +---------- +data : array-like (1-dimensional) + Array-like containing Interval objects from which to build the + %(klass)s. +closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both or + neither. +dtype : dtype or None, default None + If None, dtype will be inferred. +copy : bool, default False + Copy the input data. +%(name)s\ +verify_integrity : bool, default True + Verify that the %(klass)s is valid. + +Attributes +---------- +left +right +closed +mid +length +is_empty +is_non_overlapping_monotonic +%(extra_attributes)s\ + +Methods +------- +from_arrays +from_tuples +from_breaks +contains +overlaps +set_closed +to_tuples +%(extra_methods)s\ + +See Also +-------- +Index : The base pandas Index type. +Interval : A bounded slice-like interval; the elements of an %(klass)s. +interval_range : Function to create a fixed frequency IntervalIndex. +cut : Bin values into discrete Intervals. +qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. + +Notes +----- +See the `user guide +`__ +for more. + +%(examples)s\ +""" + + +@Appender( + _interval_shared_docs["class"] + % { + "klass": "IntervalArray", + "summary": "Pandas array for interval data that are closed on the same side.", + "versionadded": "0.24.0", + "name": "", + "extra_attributes": "", + "extra_methods": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalArray`` can be constructed directly from an array-like of + ``Interval`` objects: + + >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) + + [(0, 1], (1, 5]] + Length: 2, dtype: interval[int64, right] + + It may also be constructed using one of the constructor + methods: :meth:`IntervalArray.from_arrays`, + :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. + """ + ), + } +) +class IntervalArray(IntervalMixin, ExtensionArray): + can_hold_na = True + _na_value = _fill_value = np.nan + + @property + def ndim(self) -> Literal[1]: + return 1 + + # To make mypy recognize the fields + _left: np.ndarray + _right: np.ndarray + _dtype: IntervalDtype + + # --------------------------------------------------------------------- + # Constructors + + def __new__( + cls: type[IntervalArrayT], + data, + closed=None, + dtype: Dtype | None = None, + copy: bool = False, + verify_integrity: bool = True, + ): + + data = extract_array(data, extract_numpy=True) + + if isinstance(data, cls): + left = data._left + right = data._right + closed = closed or data.closed + else: + + # don't allow scalars + if is_scalar(data): + msg = ( + f"{cls.__name__}(...) must be called with a collection " + f"of some kind, {data} was passed" + ) + raise TypeError(msg) + + # might need to convert empty or purely na data + data = _maybe_convert_platform_interval(data) + left, right, infer_closed = intervals_to_interval_bounds( + data, validate_closed=closed is None + ) + if left.dtype == object: + left = lib.maybe_convert_objects(left) + right = lib.maybe_convert_objects(right) + closed = closed or infer_closed + + return cls._simple_new( + left, + right, + closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + @classmethod + def _simple_new( + cls: type[IntervalArrayT], + left, + right, + closed: IntervalClosedType | None = None, + copy: bool = False, + dtype: Dtype | None = None, + verify_integrity: bool = True, + ) -> IntervalArrayT: + result = IntervalMixin.__new__(cls) + + if closed is None and isinstance(dtype, IntervalDtype): + closed = dtype.closed + + closed = closed or "right" + left = ensure_index(left, copy=copy) + right = ensure_index(right, copy=copy) + + if dtype is not None: + # GH 19262: dtype must be an IntervalDtype to override inferred + dtype = pandas_dtype(dtype) + if is_interval_dtype(dtype): + dtype = cast(IntervalDtype, dtype) + if dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + else: + msg = f"dtype must be an IntervalDtype, got {dtype}" + raise TypeError(msg) + + if dtype.closed is None: + # possibly loading an old pickle + dtype = IntervalDtype(dtype.subtype, closed) + elif closed != dtype.closed: + raise ValueError("closed keyword does not match dtype.closed") + + # coerce dtypes to match if needed + if is_float_dtype(left) and is_integer_dtype(right): + right = right.astype(left.dtype) + elif is_float_dtype(right) and is_integer_dtype(left): + left = left.astype(right.dtype) + + if type(left) != type(right): + msg = ( + f"must not have differing left [{type(left).__name__}] and " + f"right [{type(right).__name__}] types" + ) + raise ValueError(msg) + elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalArray" + ) + raise TypeError(msg) + elif isinstance(left, ABCPeriodIndex): + msg = "Period dtypes are not supported, use a PeriodIndex instead" + raise ValueError(msg) + elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): + msg = ( + "left and right must have the same time zone, got " + f"'{left.tz}' and '{right.tz}'" + ) + raise ValueError(msg) + + # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray + left = ensure_wrapped_if_datetimelike(left) + left = extract_array(left, extract_numpy=True) + right = ensure_wrapped_if_datetimelike(right) + right = extract_array(right, extract_numpy=True) + + lbase = getattr(left, "_ndarray", left).base + rbase = getattr(right, "_ndarray", right).base + if lbase is not None and lbase is rbase: + # If these share data, then setitem could corrupt our IA + right = right.copy() + + dtype = IntervalDtype(left.dtype, closed=closed) + result._dtype = dtype + + result._left = left + result._right = right + if verify_integrity: + result._validate() + return result + + @classmethod + def _from_sequence( + cls: type[IntervalArrayT], + scalars, + *, + dtype: Dtype | None = None, + copy: bool = False, + ) -> IntervalArrayT: + return cls(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_factorized( + cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT + ) -> IntervalArrayT: + if len(values) == 0: + # An empty array returns object-dtype here. We can't create + # a new IA from an (empty) object-dtype array, so turn it into the + # correct dtype. + values = values.astype(original.dtype.subtype) + return cls(values, closed=original.closed) + + _interval_shared_docs["from_breaks"] = textwrap.dedent( + """ + Construct an %(klass)s from an array of splits. + + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither.\ + %(name)s + copy : bool, default False + Copy the data. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct from a left and right array. + %(klass)s.from_tuples : Construct from a sequence of tuples. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % { + "klass": "IntervalArray", + "name": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def from_breaks( + cls: type[IntervalArrayT], + breaks, + closed: IntervalClosedType | None = "right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + breaks = _maybe_convert_platform_interval(breaks) + + return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) + + _interval_shared_docs["from_arrays"] = textwrap.dedent( + """ + Construct from two arrays defining the left and right bounds. + + Parameters + ---------- + left : array-like (1-dimensional) + Left bounds for each interval. + right : array-like (1-dimensional) + Right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither.\ + %(name)s + copy : bool, default False + Copy the data. + dtype : dtype, optional + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + Raises + ------ + ValueError + When a value is missing in only one of `left` or `right`. + When a value in `left` is greater than the corresponding value + in `right`. + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + %(klass)s.from_tuples : Construct an %(klass)s from an + array-like of tuples. + + Notes + ----- + Each element of `left` must be less than or equal to the `right` + element at the same position. If an element is missing, it must be + missing in both `left` and `right`. A TypeError is raised when + using an unsupported type for `left` or `right`. At the moment, + 'category', 'object', and 'string' subtypes are not supported. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % { + "klass": "IntervalArray", + "name": "", + "examples": textwrap.dedent( + """\ + >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def from_arrays( + cls: type[IntervalArrayT], + left, + right, + closed: IntervalClosedType | None = "right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + left = _maybe_convert_platform_interval(left) + right = _maybe_convert_platform_interval(right) + + return cls._simple_new( + left, right, closed, copy=copy, dtype=dtype, verify_integrity=True + ) + + _interval_shared_docs["from_tuples"] = textwrap.dedent( + """ + Construct an %(klass)s from an array-like of tuples. + + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither.\ + %(name)s + copy : bool, default False + By-default copy the data, this is compat only and ignored. + dtype : dtype or None, default None + If None, dtype will be inferred. + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct an %(klass)s from a left and + right array. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % { + "klass": "IntervalArray", + "name": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) + + [(0, 1], (1, 2]] + Length: 2, dtype: interval[int64, right] + """ + ), + } + ) + def from_tuples( + cls: type[IntervalArrayT], + data, + closed="right", + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalArrayT: + if len(data): + left, right = [], [] + else: + # ensure that empty data keeps input dtype + left = right = data + + for d in data: + if isna(d): + lhs = rhs = np.nan + else: + name = cls.__name__ + try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] + lhs, rhs = d + except ValueError as err: + msg = f"{name}.from_tuples requires tuples of length 2, got {d}" + raise ValueError(msg) from err + except TypeError as err: + msg = f"{name}.from_tuples received an invalid item, {d}" + raise TypeError(msg) from err + left.append(lhs) + right.append(rhs) + + return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) + + def _validate(self): + """ + Verify that the IntervalArray is valid. + + Checks that + + * closed is valid + * left and right match lengths + * left and right have the same missing values + * left is always below right + """ + if self.closed not in VALID_CLOSED: + msg = f"invalid option for 'closed': {self.closed}" + raise ValueError(msg) + if len(self._left) != len(self._right): + msg = "left and right must have the same length" + raise ValueError(msg) + left_mask = notna(self._left) + right_mask = notna(self._right) + if not (left_mask == right_mask).all(): + msg = ( + "missing values must be missing in the same " + "location both left and right sides" + ) + raise ValueError(msg) + if not (self._left[left_mask] <= self._right[left_mask]).all(): + msg = "left side of interval must be <= right side" + raise ValueError(msg) + + def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: + """ + Return a new IntervalArray with the replacement attributes + + Parameters + ---------- + left : Index + Values to be used for the left-side of the intervals. + right : Index + Values to be used for the right-side of the intervals. + """ + return self._simple_new(left, right, closed=self.closed, verify_integrity=False) + + # --------------------------------------------------------------------- + # Descriptive + + @property + def dtype(self) -> IntervalDtype: + return self._dtype + + @property + def nbytes(self) -> int: + return self.left.nbytes + self.right.nbytes + + @property + def size(self) -> int: + # Avoid materializing self.values + return self.left.size + + # --------------------------------------------------------------------- + # EA Interface + + def __iter__(self): + return iter(np.asarray(self)) + + def __len__(self) -> int: + return len(self._left) + + @overload + def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: + ... + + @overload + def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT: + ... + + def __getitem__( + self: IntervalArrayT, key: PositionalIndexer + ) -> IntervalArrayT | IntervalOrNA: + key = check_array_indexer(self, key) + left = self._left[key] + right = self._right[key] + + if not isinstance(left, (np.ndarray, ExtensionArray)): + # scalar + if is_scalar(left) and isna(left): + return self._fill_value + return Interval(left, right, self.closed) + if np.ndim(left) > 1: + # GH#30588 multi-dimensional indexer disallowed + raise ValueError("multi-dimensional indexing not allowed") + return self._shallow_copy(left, right) + + def __setitem__(self, key, value) -> None: + value_left, value_right = self._validate_setitem_value(value) + key = check_array_indexer(self, key) + + self._left[key] = value_left + self._right[key] = value_right + + def _cmp_method(self, other, op): + # ensure pandas array for list-like and eliminate non-interval scalars + if is_list_like(other): + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + other = pd_array(other) + elif not isinstance(other, Interval): + # non-interval scalar -> no matches + if other is NA: + # GH#31882 + from pandas.core.arrays import BooleanArray + + arr = np.empty(self.shape, dtype=bool) + mask = np.ones(self.shape, dtype=bool) + return BooleanArray(arr, mask) + return invalid_comparison(self, other, op) + + # determine the dtype of the elements we want to compare + if isinstance(other, Interval): + other_dtype = pandas_dtype("interval") + elif not is_categorical_dtype(other.dtype): + other_dtype = other.dtype + else: + # for categorical defer to categories for dtype + other_dtype = other.categories.dtype + + # extract intervals if we have interval categories with matching closed + if is_interval_dtype(other_dtype): + if self.closed != other.categories.closed: + return invalid_comparison(self, other, op) + + other = other.categories.take( + other.codes, allow_fill=True, fill_value=other.categories._na_value + ) + + # interval-like -> need same closed and matching endpoints + if is_interval_dtype(other_dtype): + if self.closed != other.closed: + return invalid_comparison(self, other, op) + elif not isinstance(other, Interval): + other = type(self)(other) + + if op is operator.eq: + return (self._left == other.left) & (self._right == other.right) + elif op is operator.ne: + return (self._left != other.left) | (self._right != other.right) + elif op is operator.gt: + return (self._left > other.left) | ( + (self._left == other.left) & (self._right > other.right) + ) + elif op is operator.ge: + return (self == other) | (self > other) + elif op is operator.lt: + return (self._left < other.left) | ( + (self._left == other.left) & (self._right < other.right) + ) + else: + # operator.lt + return (self == other) | (self < other) + + # non-interval/non-object dtype -> no matches + if not is_object_dtype(other_dtype): + return invalid_comparison(self, other, op) + + # object dtype -> iteratively check for intervals + result = np.zeros(len(self), dtype=bool) + for i, obj in enumerate(other): + try: + result[i] = op(self[i], obj) + except TypeError: + if obj is NA: + # comparison with np.nan returns NA + # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 + result = result.astype(object) + result[i] = NA + else: + raise + return result + + @unpack_zerodim_and_defer("__eq__") + def __eq__(self, other): + return self._cmp_method(other, operator.eq) + + @unpack_zerodim_and_defer("__ne__") + def __ne__(self, other): + return self._cmp_method(other, operator.ne) + + @unpack_zerodim_and_defer("__gt__") + def __gt__(self, other): + return self._cmp_method(other, operator.gt) + + @unpack_zerodim_and_defer("__ge__") + def __ge__(self, other): + return self._cmp_method(other, operator.ge) + + @unpack_zerodim_and_defer("__lt__") + def __lt__(self, other): + return self._cmp_method(other, operator.lt) + + @unpack_zerodim_and_defer("__le__") + def __le__(self, other): + return self._cmp_method(other, operator.le) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def argsort( + self, + ascending: bool = True, + kind: str = "quicksort", + na_position: str = "last", + *args, + **kwargs, + ) -> np.ndarray: + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + + if ascending and kind == "quicksort" and na_position == "last": + return np.lexsort((self.right, self.left)) + + # TODO: other cases we can use lexsort for? much more performant. + return super().argsort( + ascending=ascending, kind=kind, na_position=na_position, **kwargs + ) + + def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: + nv.validate_minmax_axis(axis, self.ndim) + + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + obj = self[~mask] + else: + obj = self + + indexer = obj.argsort()[0] + return obj[indexer] + + def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: + nv.validate_minmax_axis(axis, self.ndim) + + if not len(self): + return self._na_value + + mask = self.isna() + if mask.any(): + if not skipna: + return self._na_value + obj = self[~mask] + else: + obj = self + + indexer = obj.argsort()[-1] + return obj[indexer] + + def fillna( + self: IntervalArrayT, value=None, method=None, limit=None + ) -> IntervalArrayT: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series + If a scalar value is passed it is used to fill all missing values. + Alternatively, a Series or dict can be used to fill in different + values for each index. The value should not be a list. The + value(s) passed should be either Interval objects or NA/NaN. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + (Not implemented yet for IntervalArray) + Method to use for filling holes in reindexed Series + limit : int, default None + (Not implemented yet for IntervalArray) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : IntervalArray with NA/NaN filled + """ + if method is not None: + raise TypeError("Filling by method is not supported for IntervalArray.") + if limit is not None: + raise TypeError("limit is not supported for IntervalArray.") + + value_left, value_right = self._validate_scalar(value) + + left = self.left.fillna(value=value_left) + right = self.right.fillna(value=value_right) + return self._shallow_copy(left, right) + + def astype(self, dtype, copy: bool = True): + """ + Cast to an ExtensionArray or NumPy array with dtype 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ExtensionArray or ndarray + ExtensionArray or NumPy ndarray with 'dtype' for its dtype. + """ + from pandas import Index + + if dtype is not None: + dtype = pandas_dtype(dtype) + + if is_interval_dtype(dtype): + if dtype == self.dtype: + return self.copy() if copy else self + + # need to cast to different subtype + try: + # We need to use Index rules for astype to prevent casting + # np.nan entries to int subtypes + new_left = Index(self._left, copy=False).astype(dtype.subtype) + new_right = Index(self._right, copy=False).astype(dtype.subtype) + except IntCastingNaNError: + # e.g test_subtype_integer + raise + except (TypeError, ValueError) as err: + # e.g. test_subtype_integer_errors f8->u8 can be lossy + # and raises ValueError + msg = ( + f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + ) + raise TypeError(msg) from err + return self._shallow_copy(new_left, new_right) + else: + try: + return super().astype(dtype, copy=copy) + except (TypeError, ValueError) as err: + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) from err + + def equals(self, other) -> bool: + if type(self) != type(other): + return False + + return bool( + self.closed == other.closed + and self.left.equals(other.left) + and self.right.equals(other.right) + ) + + @classmethod + def _concat_same_type( + cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] + ) -> IntervalArrayT: + """ + Concatenate multiple IntervalArray + + Parameters + ---------- + to_concat : sequence of IntervalArray + + Returns + ------- + IntervalArray + """ + closed_set = {interval.closed for interval in to_concat} + if len(closed_set) != 1: + raise ValueError("Intervals must all be closed on the same side.") + closed = closed_set.pop() + + left = np.concatenate([interval.left for interval in to_concat]) + right = np.concatenate([interval.right for interval in to_concat]) + return cls._simple_new(left, right, closed=closed, copy=False) + + def copy(self: IntervalArrayT) -> IntervalArrayT: + """ + Return a copy of the array. + + Returns + ------- + IntervalArray + """ + left = self._left.copy() + right = self._right.copy() + closed = self.closed + # TODO: Could skip verify_integrity here. + return type(self).from_arrays(left, right, closed=closed) + + def isna(self) -> np.ndarray: + return isna(self._left) + + def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + # ExtensionArray.shift doesn't work for two reasons + # 1. IntervalArray.dtype.na_value may not be correct for the dtype. + # 2. IntervalArray._from_sequence only accepts NaN for missing values, + # not other values like NaT + + empty_len = min(abs(periods), len(self)) + if isna(fill_value): + from pandas import Index + + fill_value = Index(self._left, copy=False)._na_value + empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + else: + empty = self._from_sequence([fill_value] * empty_len) + + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def take( + self: IntervalArrayT, + indices, + *, + allow_fill: bool = False, + fill_value=None, + axis=None, + **kwargs, + ) -> IntervalArrayT: + """ + Take elements from the IntervalArray. + + Parameters + ---------- + indices : sequence of integers + Indices to be taken. + + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : Interval or NA, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + axis : any, default None + Present for compat with IntervalIndex; does nothing. + + Returns + ------- + IntervalArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + """ + nv.validate_take((), kwargs) + + fill_left = fill_right = fill_value + if allow_fill: + fill_left, fill_right = self._validate_scalar(fill_value) + + left_take = take( + self._left, indices, allow_fill=allow_fill, fill_value=fill_left + ) + right_take = take( + self._right, indices, allow_fill=allow_fill, fill_value=fill_right + ) + + return self._shallow_copy(left_take, right_take) + + def _validate_listlike(self, value): + # list-like of intervals + try: + array = IntervalArray(value) + self._check_closed_matches(array, name="value") + value_left, value_right = array.left, array.right + except TypeError as err: + # wrong type: not interval or NA + msg = f"'value' should be an interval type, got {type(value)} instead." + raise TypeError(msg) from err + + try: + self.left._validate_fill_value(value_left) + except (LossySetitemError, TypeError) as err: + msg = ( + "'value' should be a compatible interval type, " + f"got {type(value)} instead." + ) + raise TypeError(msg) from err + + return value_left, value_right + + def _validate_scalar(self, value): + if isinstance(value, Interval): + self._check_closed_matches(value, name="value") + left, right = value.left, value.right + # TODO: check subdtype match like _validate_setitem_value? + elif is_valid_na_for_dtype(value, self.left.dtype): + # GH#18295 + left = right = self.left._na_value + else: + raise TypeError( + "can only insert Interval objects and NA into an IntervalArray" + ) + return left, right + + def _validate_setitem_value(self, value): + + if is_valid_na_for_dtype(value, self.left.dtype): + # na value: need special casing to set directly on numpy arrays + value = self.left._na_value + if is_integer_dtype(self.dtype.subtype): + # can't set NaN on a numpy integer array + # GH#45484 TypeError, not ValueError, matches what we get with + # non-NA un-holdable value. + raise TypeError("Cannot set float NaN to integer-backed IntervalArray") + value_left, value_right = value, value + + elif isinstance(value, Interval): + # scalar interval + self._check_closed_matches(value, name="value") + value_left, value_right = value.left, value.right + self.left._validate_fill_value(value_left) + self.left._validate_fill_value(value_right) + + else: + return self._validate_listlike(value) + + return value_left, value_right + + def value_counts(self, dropna: bool = True) -> Series: + """ + Returns a Series containing counts of each interval. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + # TODO: implement this is a non-naive way! + return value_counts(np.asarray(self), dropna=dropna) + + # --------------------------------------------------------------------- + # Rendering Methods + + def _format_data(self) -> str: + + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + n = len(self) + max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) + + formatter = str + + if n == 0: + summary = "[]" + elif n == 1: + first = formatter(self[0]) + summary = f"[{first}]" + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = f"[{first}, {last}]" + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + head_str = ", ".join(head) + tail_str = ", ".join(tail) + summary = f"[{head_str} ... {tail_str}]" + else: + tail = [formatter(x) for x in self] + tail_str = ", ".join(tail) + summary = f"[{tail_str}]" + + return summary + + def __repr__(self) -> str: + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = self._format_data() + class_name = f"<{type(self).__name__}>\n" + + template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + return template + + def _format_space(self) -> str: + space = " " * (len(type(self).__name__) + 1) + return f"\n{space}" + + # --------------------------------------------------------------------- + # Vectorized Interval Properties/Attributes + + @property + def left(self): + """ + Return the left endpoints of each Interval in the IntervalArray as an Index. + """ + from pandas import Index + + return Index(self._left, copy=False) + + @property + def right(self): + """ + Return the right endpoints of each Interval in the IntervalArray as an Index. + """ + from pandas import Index + + return Index(self._right, copy=False) + + @property + def length(self) -> Index: + """ + Return an Index with entries denoting the length of each Interval. + """ + return self.right - self.left + + @property + def mid(self) -> Index: + """ + Return the midpoint of each Interval in the IntervalArray as an Index. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + _interval_shared_docs["overlaps"] = textwrap.dedent( + """ + Check elementwise if an Interval overlaps the values in the %(klass)s. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Parameters + ---------- + other : %(klass)s + Interval to check against for an overlap. + + Returns + ------- + ndarray + Boolean array positionally indicating where an overlap occurs. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + + Examples + -------- + %(examples)s + >>> intervals.overlaps(pd.Interval(0.5, 1.5)) + array([ True, True, False]) + + Intervals that share closed endpoints overlap: + + >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) + array([ True, True, True]) + + Intervals that only have an open endpoint in common do not overlap: + + >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) + array([False, True, False]) + """ + ) + + @Appender( + _interval_shared_docs["overlaps"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + >>> data = [(0, 1), (1, 3), (2, 4)] + >>> intervals = pd.arrays.IntervalArray.from_tuples(data) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def overlaps(self, other): + if isinstance(other, (IntervalArray, ABCIntervalIndex)): + raise NotImplementedError + elif not isinstance(other, Interval): + msg = f"`other` must be Interval-like, got {type(other).__name__}" + raise TypeError(msg) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) & op2(other.left, self.right) + + # --------------------------------------------------------------------- + + @property + def closed(self) -> IntervalClosedType: + """ + String describing the inclusive side the intervals. + + Either ``left``, ``right``, ``both`` or ``neither``. + """ + return self.dtype.closed + + _interval_shared_docs["set_closed"] = textwrap.dedent( + """ + Return an identical %(klass)s closed on the specified side. + + Parameters + ---------- + closed : {'left', 'right', 'both', 'neither'} + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + new_index : %(klass)s + + %(examples)s\ + """ + ) + + @Appender( + _interval_shared_docs["set_closed"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) + >>> index + + [(0, 1], (1, 2], (2, 3]] + Length: 3, dtype: interval[int64, right] + >>> index.set_closed('both') + + [[0, 1], [1, 2], [2, 3]] + Length: 3, dtype: interval[int64, both] + """ + ), + } + ) + def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT: + if closed not in VALID_CLOSED: + msg = f"invalid option for 'closed': {closed}" + raise ValueError(msg) + + return type(self)._simple_new( + left=self._left, right=self._right, closed=closed, verify_integrity=False + ) + + _interval_shared_docs[ + "is_non_overlapping_monotonic" + ] = """ + Return a boolean whether the %(klass)s is non-overlapping and monotonic. + + Non-overlapping means (no Intervals share points), and monotonic means + either monotonic increasing or monotonic decreasing. + """ + + # https://github.com/python/mypy/issues/1362 + # Mypy does not support decorated properties + @property # type: ignore[misc] + @Appender( + _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs + ) + def is_non_overlapping_monotonic(self) -> bool: + # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) + # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) + # we already require left <= right + + # strict inequality for closed == 'both'; equality implies overlapping + # at a point when both sides of intervals are included + if self.closed == "both": + return bool( + (self._right[:-1] < self._left[1:]).all() + or (self._left[:-1] > self._right[1:]).all() + ) + + # non-strict inequality when closed != 'both'; at least one side is + # not included in the intervals, so equality does not imply overlapping + return bool( + (self._right[:-1] <= self._left[1:]).all() + or (self._left[:-1] >= self._right[1:]).all() + ) + + # --------------------------------------------------------------------- + # Conversion + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + Return the IntervalArray's data as a numpy array of Interval + objects (with dtype='object') + """ + left = self._left + right = self._right + mask = self.isna() + closed = self.closed + + result = np.empty(len(left), dtype=object) + for i in range(len(left)): + if mask[i]: + result[i] = np.nan + else: + result[i] = Interval(left[i], right[i], closed) + return result + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + + from pandas.core.arrays.arrow.extension_types import ArrowIntervalType + + try: + subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) + except TypeError as err: + raise TypeError( + f"Conversion to arrow with subtype '{self.dtype.subtype}' " + "is not supported" + ) from err + interval_type = ArrowIntervalType(subtype, self.closed) + storage_array = pyarrow.StructArray.from_arrays( + [ + pyarrow.array(self._left, type=subtype, from_pandas=True), + pyarrow.array(self._right, type=subtype, from_pandas=True), + ], + names=["left", "right"], + ) + mask = self.isna() + if mask.any(): + # if there are missing values, set validity bitmap also on the array level + null_bitmap = pyarrow.array(~mask).buffers()[1] + storage_array = pyarrow.StructArray.from_buffers( + storage_array.type, + len(storage_array), + [null_bitmap], + children=[storage_array.field(0), storage_array.field(1)], + ) + + if type is not None: + if type.equals(interval_type.storage_type): + return storage_array + elif isinstance(type, ArrowIntervalType): + # ensure we have the same subtype and closed attributes + if not type.equals(interval_type): + raise TypeError( + "Not supported to convert IntervalArray to type with " + f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " + f"and 'closed' ({self.closed} vs {type.closed}) attributes" + ) + else: + raise TypeError( + f"Not supported to convert IntervalArray to '{type}' type" + ) + + return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) + + _interval_shared_docs[ + "to_tuples" + ] = """ + Return an %(return_type)s of tuples of the form (left, right). + + Parameters + ---------- + na_tuple : bool, default True + Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA + value itself if False, ``nan``. + + Returns + ------- + tuples: %(return_type)s + %(examples)s\ + """ + + @Appender( + _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""} + ) + def to_tuples(self, na_tuple=True) -> np.ndarray: + tuples = com.asarray_tuplesafe(zip(self._left, self._right)) + if not na_tuple: + # GH 18756 + tuples = np.where(~self.isna(), tuples, np.nan) + return tuples + + # --------------------------------------------------------------------- + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + value_left, value_right = self._validate_setitem_value(value) + + if isinstance(self._left, np.ndarray): + np.putmask(self._left, mask, value_left) + np.putmask(self._right, mask, value_right) + else: + self._left._putmask(mask, value_left) + self._right._putmask(mask, value_right) + + def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT: + """ + Return a new IntervalArray inserting new item at location. Follows + Python numpy.insert semantics for negative values. Only Interval + objects and NA can be inserted into an IntervalIndex + + Parameters + ---------- + loc : int + item : Interval + + Returns + ------- + IntervalArray + """ + left_insert, right_insert = self._validate_scalar(item) + + new_left = self.left.insert(loc, left_insert) + new_right = self.right.insert(loc, right_insert) + + return self._shallow_copy(new_left, new_right) + + def delete(self: IntervalArrayT, loc) -> IntervalArrayT: + if isinstance(self._left, np.ndarray): + new_left = np.delete(self._left, loc) + new_right = np.delete(self._right, loc) + else: + new_left = self._left.delete(loc) + new_right = self._right.delete(loc) + return self._shallow_copy(left=new_left, right=new_right) + + @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) + def repeat( + self: IntervalArrayT, + repeats: int | Sequence[int], + axis: int | None = None, + ) -> IntervalArrayT: + nv.validate_repeat((), {"axis": axis}) + left_repeat = self.left.repeat(repeats) + right_repeat = self.right.repeat(repeats) + return self._shallow_copy(left=left_repeat, right=right_repeat) + + _interval_shared_docs["contains"] = textwrap.dedent( + """ + Check elementwise if the Intervals contain the value. + + Return a boolean mask whether the value is contained in the Intervals + of the %(klass)s. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + other : scalar + The value to check whether it is contained in the Intervals. + + Returns + ------- + boolean array + + See Also + -------- + Interval.contains : Check whether Interval object contains value. + %(klass)s.overlaps : Check if an Interval overlaps the values in the + %(klass)s. + + Examples + -------- + %(examples)s + >>> intervals.contains(0.5) + array([ True, False, False]) + """ + ) + + @Appender( + _interval_shared_docs["contains"] + % { + "klass": "IntervalArray", + "examples": textwrap.dedent( + """\ + >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, dtype: interval[int64, right] + """ + ), + } + ) + def contains(self, other): + if isinstance(other, Interval): + raise NotImplementedError("contains not implemented for two intervals") + + return (self._left < other if self.open_left else self._left <= other) & ( + other < self._right if self.open_right else other <= self._right + ) + + def isin(self, values) -> npt.NDArray[np.bool_]: + if not hasattr(values, "dtype"): + values = np.array(values) + values = extract_array(values, extract_numpy=True) + + if is_interval_dtype(values.dtype): + if self.closed != values.closed: + # not comparable -> no overlap + return np.zeros(self.shape, dtype=bool) + + if is_dtype_equal(self.dtype, values.dtype): + # GH#38353 instead of casting to object, operating on a + # complex128 ndarray is much more performant. + left = self._combined.view("complex128") + right = values._combined.view("complex128") + # error: Argument 1 to "in1d" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any], + # ndarray[Any, dtype[Any]]]"; expected + # "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], bool, + # int, float, complex, str, bytes, _NestedSequence[ + # Union[bool, int, float, complex, str, bytes]]]" + return np.in1d(left, right) # type: ignore[arg-type] + + elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( + values.left.dtype + ): + # not comparable -> no overlap + return np.zeros(self.shape, dtype=bool) + + return isin(self.astype(object), values.astype(object)) + + @property + def _combined(self) -> ArrayLike: + left = self.left._values.reshape(-1, 1) + right = self.right._values.reshape(-1, 1) + if needs_i8_conversion(left.dtype): + comb = left._concat_same_type([left, right], axis=1) + else: + comb = np.concatenate([left, right], axis=1) + return comb + + def _from_combined(self, combined: np.ndarray) -> IntervalArray: + """ + Create a new IntervalArray with our dtype from a 1D complex128 ndarray. + """ + nc = combined.view("i8").reshape(-1, 2) + + dtype = self._left.dtype + if needs_i8_conversion(dtype): + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_left = type(self._left)._from_sequence( # type: ignore[attr-defined] + nc[:, 0], dtype=dtype + ) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_right = type(self._right)._from_sequence( # type: ignore[attr-defined] + nc[:, 1], dtype=dtype + ) + else: + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) + return self._shallow_copy(left=new_left, right=new_right) + + def unique(self) -> IntervalArray: + # No overload variant of "__getitem__" of "ExtensionArray" matches argument + # type "Tuple[slice, int]" + nc = unique( + self._combined.view("complex128")[:, 0] # type: ignore[call-overload] + ) + nc = nc[:, None] + return self._from_combined(nc) + + +def _maybe_convert_platform_interval(values) -> ArrayLike: + """ + Try to do platform conversion, with special casing for IntervalArray. + Wrapper around maybe_convert_platform that alters the default return + dtype in certain cases to be compatible with IntervalArray. For example, + empty lists return with integer dtype instead of object dtype, which is + prohibited for IntervalArray. + + Parameters + ---------- + values : array-like + + Returns + ------- + array + """ + if isinstance(values, (list, tuple)) and len(values) == 0: + # GH 19016 + # empty lists/tuples get object dtype by default, but this is + # prohibited for IntervalArray, so coerce to integer instead + return np.array([], dtype=np.int64) + elif not is_list_like(values) or isinstance(values, ABCDataFrame): + # This will raise later, but we avoid passing to maybe_convert_platform + return values + elif is_categorical_dtype(values): + values = np.asarray(values) + elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): + # TODO: should we just cast these to list? + return values + else: + values = extract_array(values, extract_numpy=True) + + if not hasattr(values, "dtype"): + return np.asarray(values) + return values diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py new file mode 100644 index 00000000..5cdd632d --- /dev/null +++ b/pandas/core/arrays/masked.py @@ -0,0 +1,1294 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Literal, + Sequence, + TypeVar, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + AstypeArg, + DtypeObj, + NpDtype, + PositionalIndexer, + Scalar, + ScalarIndexer, + SequenceIndexer, + Shape, + npt, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc +from pandas.util._validators import validate_fillna_kwargs + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import ( + is_bool, + is_bool_dtype, + is_datetime64_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import BaseMaskedDtype +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core import ( + algorithms as algos, + arraylike, + missing, + nanops, + ops, +) +from pandas.core.algorithms import ( + factorize_array, + isin, + take, +) +from pandas.core.array_algos import masked_reductions +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.indexers import check_array_indexer +from pandas.core.ops import invalid_comparison + +if TYPE_CHECKING: + from pandas import Series + from pandas.core.arrays import BooleanArray + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + +from pandas.compat.numpy import function as nv + +BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") + + +class BaseMaskedArray(OpsMixin, ExtensionArray): + """ + Base class for masked arrays (which use _data and _mask to store the data). + + numpy based + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value: Scalar + # our underlying data and mask are each ndarrays + _data: np.ndarray + _mask: npt.NDArray[np.bool_] + + # Fill values used for any/all + _truthy_value = Scalar # bool(_truthy_value) = True + _falsey_value = Scalar # bool(_falsey_value) = False + + def __init__( + self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False + ) -> None: + # values is supposed to already be validated in the subclass + if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): + raise TypeError( + "mask should be boolean numpy array. Use " + "the 'pd.array' function instead" + ) + if values.shape != mask.shape: + raise ValueError("values.shape must match mask.shape") + + if copy: + values = values.copy() + mask = mask.copy() + + self._data = values + self._mask = mask + + @classmethod + def _from_sequence( + cls: type[BaseMaskedArrayT], scalars, *, dtype=None, copy: bool = False + ) -> BaseMaskedArrayT: + values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy) + return cls(values, mask) + + @property + def dtype(self) -> BaseMaskedDtype: + raise AbstractMethodError(self) + + @overload + def __getitem__(self, item: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT: + ... + + def __getitem__( + self: BaseMaskedArrayT, item: PositionalIndexer + ) -> BaseMaskedArrayT | Any: + item = check_array_indexer(self, item) + + newmask = self._mask[item] + if is_bool(newmask): + # This is a scalar indexing + if newmask: + return self.dtype.na_value + return self._data[item] + + return type(self)(self._data[item], newmask) + + @doc(ExtensionArray.fillna) + def fillna( + self: BaseMaskedArrayT, value=None, method=None, limit=None + ) -> BaseMaskedArrayT: + value, method = validate_fillna_kwargs(value, method) + + mask = self._mask + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method, ndim=self.ndim) + npvalues = self._data.copy().T + new_mask = mask.copy().T + func(npvalues, limit=limit, mask=new_mask) + return type(self)(npvalues.T, new_mask.T) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + @classmethod + def _coerce_to_array( + cls, values, *, dtype: DtypeObj, copy: bool = False + ) -> tuple[np.ndarray, np.ndarray]: + raise AbstractMethodError(cls) + + def _validate_setitem_value(self, value): + """ + Check if we have a scalar that we can cast losslessly. + + Raises + ------ + TypeError + """ + kind = self.dtype.kind + # TODO: get this all from np_can_hold_element? + if kind == "b": + if lib.is_bool(value): + return value + + elif kind == "f": + if lib.is_integer(value) or lib.is_float(value): + return value + + else: + if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): + return value + # TODO: unsigned checks + + # Note: without the "str" here, the f-string rendering raises in + # py38 builds. + raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") + + def __setitem__(self, key, value) -> None: + key = check_array_indexer(self, key) + + if is_scalar(value): + if is_valid_na_for_dtype(value, self.dtype): + self._mask[key] = True + else: + value = self._validate_setitem_value(value) + self._data[key] = value + self._mask[key] = False + return + + value, mask = self._coerce_to_array(value, dtype=self.dtype) + + self._data[key] = value + self._mask[key] = mask + + def __iter__(self): + if self.ndim == 1: + for i in range(len(self)): + if self._mask[i]: + yield self.dtype.na_value + else: + yield self._data[i] + else: + for i in range(len(self)): + yield self[i] + + def __len__(self) -> int: + return len(self._data) + + @property + def shape(self) -> Shape: + return self._data.shape + + @property + def ndim(self) -> int: + return self._data.ndim + + def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT: + data = self._data.swapaxes(axis1, axis2) + mask = self._mask.swapaxes(axis1, axis2) + return type(self)(data, mask) + + def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT: + data = np.delete(self._data, loc, axis=axis) + mask = np.delete(self._mask, loc, axis=axis) + return type(self)(data, mask) + + def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + data = self._data.reshape(*args, **kwargs) + mask = self._mask.reshape(*args, **kwargs) + return type(self)(data, mask) + + def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + # TODO: need to make sure we have the same order for data/mask + data = self._data.ravel(*args, **kwargs) + mask = self._mask.ravel(*args, **kwargs) + return type(self)(data, mask) + + @property + def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(self._data.T, self._mask.T) + + def round(self, decimals: int = 0, *args, **kwargs): + """ + Round each value in the array a to the given number of decimals. + + Parameters + ---------- + decimals : int, default 0 + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + NumericArray + Rounded values of the NumericArray. + + See Also + -------- + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + Series.round : Round values of a Series. + """ + nv.validate_round(args, kwargs) + values = np.round(self._data, decimals=decimals, **kwargs) + + # Usually we'll get same type as self, but ndarray[bool] casts to float + return self._maybe_mask_result(values, self._mask.copy()) + + # ------------------------------------------------------------------ + # Unary Methods + + def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(~self._data, self._mask.copy()) + + def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(-self._data, self._mask.copy()) + + def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return self.copy() + + def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(abs(self._data), self._mask.copy()) + + # ------------------------------------------------------------------ + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy Array. + + By default converts to an object-dtype NumPy array. Specify the `dtype` and + `na_value` keywords to customize the conversion. + + Parameters + ---------- + dtype : dtype, default object + The numpy dtype to convert to. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + the array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. This is typically + only possible when no missing values are present and `dtype` + is the equivalent numpy dtype. + na_value : scalar, optional + Scalar missing value indicator to use in numpy array. Defaults + to the native missing value indicator of this array (pd.NA). + + Returns + ------- + numpy.ndarray + + Examples + -------- + An object-dtype is the default result + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a.to_numpy() + array([True, False, ], dtype=object) + + When no missing values are present, an equivalent dtype can be used. + + >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool") + array([ True, False]) + >>> pd.array([1, 2], dtype="Int64").to_numpy("int64") + array([1, 2]) + + However, requesting such dtype will raise a ValueError if + missing values are present and the default missing value :attr:`NA` + is used. + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a + + [True, False, ] + Length: 3, dtype: boolean + + >>> a.to_numpy(dtype="bool") + Traceback (most recent call last): + ... + ValueError: cannot convert to bool numpy array in presence of missing values + + Specify a valid `na_value` instead + + >>> a.to_numpy(dtype="bool", na_value=False) + array([ True, False, False]) + """ + if na_value is lib.no_default: + na_value = libmissing.NA + if dtype is None: + dtype = object + if self._hasna: + if ( + not is_object_dtype(dtype) + and not is_string_dtype(dtype) + and na_value is libmissing.NA + ): + raise ValueError( + f"cannot convert to '{dtype}'-dtype NumPy array " + "with missing values. Specify an appropriate 'na_value' " + "for this dtype." + ) + # don't pass copy to astype -> always need a copy since we are mutating + data = self._data.astype(dtype) + data[self._mask] = na_value + else: + data = self._data.astype(dtype, copy=copy) + return data + + @overload + def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: + ... + + @overload + def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: + ... + + @overload + def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: + ... + + def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + # if we are astyping to another nullable masked dtype, we can fastpath + if isinstance(dtype, BaseMaskedDtype): + # TODO deal with NaNs for FloatingArray case + data = self._data.astype(dtype.numpy_dtype, copy=copy) + # mask is copied depending on whether the data was copied, and + # not directly depending on the `copy` keyword + mask = self._mask if data is self._data else self._mask.copy() + cls = dtype.construct_array_type() + return cls(data, mask, copy=False) + + if isinstance(dtype, ExtensionDtype): + eacls = dtype.construct_array_type() + return eacls._from_sequence(self, dtype=dtype, copy=copy) + + na_value: float | np.datetime64 | lib.NoDefault + + # coerce + if is_float_dtype(dtype): + # In astype, we consider dtype=float to also mean na_value=np.nan + na_value = np.nan + elif is_datetime64_dtype(dtype): + na_value = np.datetime64("NaT") + else: + na_value = lib.no_default + + # to_numpy will also raise, but we get somewhat nicer exception messages here + if is_integer_dtype(dtype) and self._hasna: + raise ValueError("cannot convert NA to integer") + if is_bool_dtype(dtype) and self._hasna: + # careful: astype_nansafe converts np.nan to True + raise ValueError("cannot convert float NaN to bool") + + data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) + if self.dtype.kind == "f": + # TODO: make this consistent between IntegerArray/FloatingArray, + # see test_astype_str + return astype_nansafe(data, dtype, copy=False) + return data + + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """ + the array interface, return my values + We return an object array here to preserve our scalar values + """ + return self.to_numpy(dtype=dtype) + + _HANDLED_TYPES: tuple[type, ...] + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # For MaskedArray inputs, we apply the ufunc to ._data + # and mask the result. + + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_ufunc_with_out + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, BaseMaskedArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + from pandas.core.arrays import ( + BooleanArray, + FloatingArray, + IntegerArray, + ) + + if is_bool_dtype(x.dtype): + m = mask.copy() + return BooleanArray(x, m) + elif is_integer_dtype(x.dtype): + m = mask.copy() + return IntegerArray(x, m) + elif is_float_dtype(x.dtype): + m = mask.copy() + if x.dtype == np.float16: + # reached in e.g. np.sqrt on BooleanArray + # we don't support float16 + x = x.astype(np.float32) + return FloatingArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if ufunc.nout > 1: + # e.g. np.divmod + return tuple(reconstruct(x) for x in result) + elif method == "reduce": + # e.g. np.add.reduce; test_ufunc_reduce_raises + if self._mask.any(): + return self._na_value + return result + else: + return reconstruct(result) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + return pa.array(self._data, mask=self._mask, type=type) + + @property + def _hasna(self) -> bool: + # Note: this is expensive right now! The hope is that we can + # make this faster by having an optional mask, but not have to change + # source code using it.. + + # error: Incompatible return value type (got "bool_", expected "bool") + return self._mask.any() # type: ignore[return-value] + + def _propagate_mask( + self, mask: npt.NDArray[np.bool_] | None, other + ) -> npt.NDArray[np.bool_]: + if mask is None: + mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy + if other is libmissing.NA: + # GH#45421 don't alter inplace + mask = mask | True + else: + mask = self._mask | mask + return mask + + def _arith_method(self, other, op): + op_name = op.__name__ + omask = None + + if isinstance(other, BaseMaskedArray): + other, omask = other._data, other._mask + + elif is_list_like(other): + if not isinstance(other, ExtensionArray): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + + # We wrap the non-masked arithmetic logic used for numpy dtypes + # in Series/Index arithmetic ops. + other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) + pd_op = ops.get_array_op(op) + other = ensure_wrapped_if_datetimelike(other) + + if op_name in {"pow", "rpow"} and isinstance(other, np.bool_): + # Avoid DeprecationWarning: In future, it will be an error + # for 'np.bool_' scalars to be interpreted as an index + # e.g. test_array_scalar_like_equivalence + other = bool(other) + + mask = self._propagate_mask(omask, other) + + if other is libmissing.NA: + result = np.ones_like(self._data) + if self.dtype.kind == "b": + if op_name in { + "floordiv", + "rfloordiv", + "pow", + "rpow", + "truediv", + "rtruediv", + }: + # GH#41165 Try to match non-masked Series behavior + # This is still imperfect GH#46043 + raise NotImplementedError( + f"operator '{op_name}' not implemented for bool dtypes" + ) + elif op_name in {"mod", "rmod"}: + dtype = "int8" + else: + dtype = "bool" + result = result.astype(dtype) + elif "truediv" in op_name and self.dtype.kind != "f": + # The actual data here doesn't matter since the mask + # will be all-True, but since this is division, we want + # to end up with floating dtype. + result = result.astype(np.float64) + else: + # Make sure we do this before the "pow" mask checks + # to get an expected exception message on shape mismatch. + if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]: + # TODO(GH#30188) ATM we don't match the behavior of non-masked + # types with respect to floordiv-by-zero + pd_op = op + + with np.errstate(all="ignore"): + result = pd_op(self._data, other) + + if op_name == "pow": + # 1 ** x is 1. + mask = np.where((self._data == 1) & ~self._mask, False, mask) + # x ** 0 is 1. + if omask is not None: + mask = np.where((other == 0) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 0, False, mask) + + elif op_name == "rpow": + # 1 ** x is 1. + if omask is not None: + mask = np.where((other == 1) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 1, False, mask) + # x ** 0 is 1. + mask = np.where((self._data == 0) & ~self._mask, False, mask) + + return self._maybe_mask_result(result, mask) + + _logical_method = _arith_method + + def _cmp_method(self, other, op) -> BooleanArray: + from pandas.core.arrays import BooleanArray + + mask = None + + if isinstance(other, BaseMaskedArray): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if other is libmissing.NA: + # numpy does not handle pd.NA well as "other" scalar (it returns + # a scalar False instead of an array) + # This may be fixed by NA.__array_ufunc__. Revisit this check + # once that's implemented. + result = np.zeros(self._data.shape, dtype="bool") + mask = np.ones(self._data.shape, dtype="bool") + else: + with warnings.catch_warnings(): + # numpy may show a FutureWarning: + # elementwise comparison failed; returning scalar instead, + # but in the future will perform elementwise comparison + # before returning NotImplemented. We fall back to the correct + # behavior today, so that should be fine to ignore. + warnings.filterwarnings("ignore", "elementwise", FutureWarning) + with np.errstate(all="ignore"): + method = getattr(self._data, f"__{op.__name__}__") + result = method(other) + + if result is NotImplemented: + result = invalid_comparison(self._data, other, op) + + mask = self._propagate_mask(mask, other) + return BooleanArray(result, mask, copy=False) + + def _maybe_mask_result(self, result, mask): + """ + Parameters + ---------- + result : array-like or tuple[array-like] + mask : array-like bool + """ + if isinstance(result, tuple): + # i.e. divmod + div, mod = result + return ( + self._maybe_mask_result(div, mask), + self._maybe_mask_result(mod, mask), + ) + + if is_float_dtype(result.dtype): + from pandas.core.arrays import FloatingArray + + return FloatingArray(result, mask, copy=False) + + elif is_bool_dtype(result.dtype): + from pandas.core.arrays import BooleanArray + + return BooleanArray(result, mask, copy=False) + + elif result.dtype == "timedelta64[ns]": + # e.g. test_numeric_arr_mul_tdscalar_numexpr_path + from pandas.core.arrays import TimedeltaArray + + if not isinstance(result, TimedeltaArray): + result = TimedeltaArray._simple_new(result) + + result[mask] = result.dtype.type("NaT") + return result + + elif is_integer_dtype(result.dtype): + from pandas.core.arrays import IntegerArray + + return IntegerArray(result, mask, copy=False) + + else: + result[mask] = np.nan + return result + + def isna(self) -> np.ndarray: + return self._mask.copy() + + @property + def _na_value(self): + return self.dtype.na_value + + @property + def nbytes(self) -> int: + return self._data.nbytes + self._mask.nbytes + + @classmethod + def _concat_same_type( + cls: type[BaseMaskedArrayT], + to_concat: Sequence[BaseMaskedArrayT], + axis: int = 0, + ) -> BaseMaskedArrayT: + data = np.concatenate([x._data for x in to_concat], axis=axis) + mask = np.concatenate([x._mask for x in to_concat], axis=axis) + return cls(data, mask) + + def take( + self: BaseMaskedArrayT, + indexer, + *, + allow_fill: bool = False, + fill_value: Scalar | None = None, + axis: int = 0, + ) -> BaseMaskedArrayT: + # we always fill with 1 internally + # to avoid upcasting + data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value + result = take( + self._data, + indexer, + fill_value=data_fill_value, + allow_fill=allow_fill, + axis=axis, + ) + + mask = take( + self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis + ) + + # if we are filling + # we only fill where the indexer is null + # not existing missing values + # TODO(jreback) what if we have a non-na float as a fill value? + if allow_fill and notna(fill_value): + fill_mask = np.asarray(indexer) == -1 + result[fill_mask] = fill_value + mask = mask ^ fill_mask + + return type(self)(result, mask, copy=False) + + # error: Return type "BooleanArray" of "isin" incompatible with return type + # "ndarray" in supertype "ExtensionArray" + def isin(self, values) -> BooleanArray: # type: ignore[override] + + from pandas.core.arrays import BooleanArray + + # algorithms.isin will eventually convert values to an ndarray, so no extra + # cost to doing it here first + values_arr = np.asarray(values) + result = isin(self._data, values_arr) + + if self._hasna: + values_have_NA = is_object_dtype(values_arr.dtype) and any( + val is self.dtype.na_value for val in values_arr + ) + + # For now, NA does not propagate so set result according to presence of NA, + # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion + result[self._mask] = values_have_NA + + mask = np.zeros(self._data.shape, dtype=bool) + return BooleanArray(result, mask, copy=False) + + def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + data, mask = self._data, self._mask + data = data.copy() + mask = mask.copy() + return type(self)(data, mask, copy=False) + + def unique(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + """ + Compute the BaseMaskedArray of unique values. + + Returns + ------- + uniques : BaseMaskedArray + """ + uniques, mask = algos.unique_with_mask(self._data, self._mask) + return type(self)(uniques, mask, copy=False) + + @doc(ExtensionArray.searchsorted) + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + if self._hasna: + raise ValueError( + "searchsorted requires array to be sorted, which is impossible " + "with NAs present." + ) + if isinstance(value, ExtensionArray): + value = value.astype(object) + # Base class searchsorted would cast to object, which is *much* slower. + return self._data.searchsorted(value, side=side, sorter=sorter) + + @doc(ExtensionArray.factorize) + def factorize( + self, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ) -> tuple[np.ndarray, ExtensionArray]: + resolved_na_sentinel = algos.resolve_na_sentinel(na_sentinel, use_na_sentinel) + arr = self._data + mask = self._mask + + # Pass non-None na_sentinel; recode and add NA to uniques if necessary below + na_sentinel_arg = -1 if resolved_na_sentinel is None else resolved_na_sentinel + codes, uniques = factorize_array(arr, na_sentinel=na_sentinel_arg, mask=mask) + + # check that factorize_array correctly preserves dtype. + assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype) + + has_na = mask.any() + if resolved_na_sentinel is not None or not has_na: + size = len(uniques) + else: + # Make room for an NA value + size = len(uniques) + 1 + uniques_mask = np.zeros(size, dtype=bool) + if resolved_na_sentinel is None and has_na: + na_index = mask.argmax() + # Insert na with the proper code + if na_index == 0: + na_code = np.intp(0) + else: + # mypy error: Slice index must be an integer or None + # https://github.com/python/mypy/issues/2410 + na_code = codes[:na_index].max() + 1 # type: ignore[misc] + codes[codes >= na_code] += 1 + codes[codes == -1] = na_code + # dummy value for uniques; not used since uniques_mask will be True + uniques = np.insert(uniques, na_code, 0) + uniques_mask[na_code] = True + uniques_ea = type(self)(uniques, uniques_mask) + + return codes, uniques_ea + + @doc(ExtensionArray._values_for_argsort) + def _values_for_argsort(self) -> np.ndarray: + return self._data + + def value_counts(self, dropna: bool = True) -> Series: + """ + Returns a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import ( + Index, + Series, + ) + from pandas.arrays import IntegerArray + + if dropna: + keys, counts = algos.value_counts_arraylike( + self._data, dropna=True, mask=self._mask + ) + res = Series(counts, index=keys) + res.index = res.index.astype(self.dtype) + res = res.astype("Int64") + return res + + # compute counts on the data with no nans + data = self._data[~self._mask] + value_counts = Index(data).value_counts() + + index = value_counts.index + + # if we want nans, count the mask + if dropna: + counts = value_counts._values + else: + counts = np.empty(len(value_counts) + 1, dtype="int64") + counts[:-1] = value_counts + counts[-1] = self._mask.sum() + + index = index.insert(len(index), self.dtype.na_value) + + index = index.astype(self.dtype) + + mask = np.zeros(len(counts), dtype="bool") + counts_array = IntegerArray(counts, mask) + + return Series(counts_array, index=index) + + @doc(ExtensionArray.equals) + def equals(self, other) -> bool: + if type(self) != type(other): + return False + if other.dtype != self.dtype: + return False + + # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT + # equal. + if not np.array_equal(self._mask, other._mask): + return False + + left = self._data[~self._mask] + right = other._data[~other._mask] + return array_equivalent(left, right, dtype_equal=True) + + def _quantile( + self, qs: npt.NDArray[np.float64], interpolation: str + ) -> BaseMaskedArray: + """ + Dispatch to quantile_with_mask, needed because we do not have + _from_factorized. + + Notes + ----- + We assume that all impacted cases are 1D-only. + """ + res = quantile_with_mask( + self._data, + mask=self._mask, + # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype) + # instead of np.nan + fill_value=np.nan, + qs=qs, + interpolation=interpolation, + ) + + if self._hasna: + # Our result mask is all-False unless we are all-NA, in which + # case it is all-True. + if self.ndim == 2: + # I think this should be out_mask=self.isna().all(axis=1) + # but am holding off until we have tests + raise NotImplementedError + elif self.isna().all(): + out_mask = np.ones(res.shape, dtype=bool) + + if is_integer_dtype(self.dtype): + # We try to maintain int dtype if possible for not all-na case + # as well + res = np.zeros(res.shape, dtype=self.dtype.numpy_dtype) + else: + out_mask = np.zeros(res.shape, dtype=bool) + else: + out_mask = np.zeros(res.shape, dtype=bool) + return self._maybe_mask_result(res, mask=out_mask) + + # ------------------------------------------------------------------ + # Reductions + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if name in {"any", "all", "min", "max", "sum", "prod"}: + return getattr(self, name)(skipna=skipna, **kwargs) + + data = self._data + mask = self._mask + + if name in {"mean"}: + op = getattr(masked_reductions, name) + result = op(data, mask, skipna=skipna, **kwargs) + return result + + # coerce to a nan-aware float if needed + # (we explicitly use NaN within reductions) + if self._hasna: + data = self.to_numpy("float64", na_value=np.nan) + + # median, var, std, skew, kurt, idxmin, idxmax + op = getattr(nanops, "nan" + name) + result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) + + if np.isnan(result): + return libmissing.NA + + return result + + def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): + if isinstance(result, np.ndarray): + axis = kwargs["axis"] + if skipna: + # we only retain mask for all-NA rows/columns + mask = self._mask.all(axis=axis) + else: + mask = self._mask.any(axis=axis) + + return self._maybe_mask_result(result, mask) + return result + + def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_sum((), kwargs) + + # TODO: do this in validate_sum? + if "out" in kwargs: + # np.sum; test_floating_array_numpy_sum + if kwargs["out"] is not None: + raise NotImplementedError + kwargs.pop("out") + + result = masked_reductions.sum( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "sum", result, skipna=skipna, axis=axis, **kwargs + ) + + def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_prod((), kwargs) + result = masked_reductions.prod( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "prod", result, skipna=skipna, axis=axis, **kwargs + ) + + def min(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_min((), kwargs) + return masked_reductions.min( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + + def max(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_max((), kwargs) + return masked_reductions.max( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + + def any(self, *, skipna: bool = True, **kwargs): + """ + Return whether any element is truthy. + + Returns False unless there is at least one element that is truthy. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is truthy, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BaseMaskedArray.all : Return whether all elements are truthy. + + Examples + -------- + The result indicates whether any element is truthy (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="Float64").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([1, 0, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + >>> pd.array([0, 0, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + # error: Argument 3 to "putmask" has incompatible type "object"; + # expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], + # bool, int, float, complex, str, bytes, + # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]" + np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type] + result = values.any() + if skipna: + return result + else: + if result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + + def all(self, *, skipna: bool = True, **kwargs): + """ + Return whether all elements are truthy. + + Returns True unless there is at least one element that is falsey. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is falsey, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is truthy. + + Examples + -------- + The result indicates whether all elements are truthy (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([1, 1, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="Float64").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([1, 1, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + >>> pd.array([1, 0, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + # error: Argument 3 to "putmask" has incompatible type "object"; + # expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], + # bool, int, float, complex, str, bytes, + # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]" + np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type] + result = values.all() + + if skipna: + return result + else: + if not result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py new file mode 100644 index 00000000..b32cbdcb --- /dev/null +++ b/pandas/core/arrays/numeric.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import numbers +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Mapping, + TypeVar, +) + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_string_dtype, + pandas_dtype, +) + +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) + +if TYPE_CHECKING: + import pyarrow + + +T = TypeVar("T", bound="NumericArray") + + +class NumericDtype(BaseMaskedDtype): + _default_np_dtype: np.dtype + _checker: Callable[[Any], bool] # is_foo_dtype + + def __repr__(self) -> str: + return f"{self.name}Dtype()" + + @cache_readonly + def is_signed_integer(self) -> bool: + return self.kind == "i" + + @cache_readonly + def is_unsigned_integer(self) -> bool: + return self.kind == "u" + + @property + def _is_numeric(self) -> bool: + return True + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BaseMaskedArray: + """ + Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays.arrow._arrow_utils import ( + pyarrow_array_to_numpy_and_mask, + ) + + array_class = self.construct_array_type() + + pyarrow_type = pyarrow.from_numpy_dtype(self.type) + if not array.type.equals(pyarrow_type): + # test_from_arrow_type_error raise for string, but allow + # through itemsize conversion GH#31896 + rt_dtype = pandas_dtype(array.type.to_pandas_dtype()) + if rt_dtype.kind not in ["i", "u", "f"]: + # Could allow "c" or potentially disallow float<->int conversion, + # but at the moment we specifically test that uint<->int works + raise TypeError( + f"Expected array of {self} type, got {array.type} instead" + ) + + array = array.cast(pyarrow_type) + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.numpy_dtype) + num_arr = array_class(data.copy(), ~mask, copy=False) + results.append(num_arr) + + if not results: + return array_class( + np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_) + ) + elif len(results) == 1: + # avoid additional copy in _concat_same_type + return results[0] + else: + return array_class._concat_same_type(results) + + @classmethod + def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]: + raise AbstractMethodError(cls) + + @classmethod + def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype: + """ + Convert a string representation or a numpy dtype to NumericDtype. + """ + if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))): + # Avoid DeprecationWarning from NumPy about np.dtype("Int64") + # https://github.com/numpy/numpy/pull/7476 + dtype = dtype.lower() + + if not isinstance(dtype, NumericDtype): + mapping = cls._str_to_dtype_mapping() + try: + dtype = mapping[str(np.dtype(dtype))] + except KeyError as err: + raise ValueError(f"invalid dtype specified {dtype}") from err + return dtype + + @classmethod + def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: + """ + Safely cast the values to the given dtype. + + "safe" in this context means the casting is lossless. + """ + raise AbstractMethodError(cls) + + +def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype): + checker = dtype_cls._checker + + inferred_type = None + + if dtype is None and hasattr(values, "dtype"): + if checker(values.dtype): + dtype = values.dtype + + if dtype is not None: + dtype = dtype_cls._standardize_dtype(dtype) + + cls = dtype_cls.construct_array_type() + if isinstance(values, cls): + values, mask = values._data, values._mask + if dtype is not None: + values = values.astype(dtype.numpy_dtype, copy=False) + + if copy: + values = values.copy() + mask = mask.copy() + return values, mask, dtype, inferred_type + + values = np.array(values, copy=copy) + inferred_type = None + if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): + inferred_type = lib.infer_dtype(values, skipna=True) + if inferred_type == "empty": + pass + elif inferred_type == "boolean": + name = dtype_cls.__name__.strip("_") + raise TypeError(f"{values.dtype} cannot be converted to {name}") + + elif is_bool_dtype(values) and checker(dtype): + values = np.array(values, dtype=default_dtype, copy=copy) + + elif not (is_integer_dtype(values) or is_float_dtype(values)): + name = dtype_cls.__name__.strip("_") + raise TypeError(f"{values.dtype} cannot be converted to {name}") + + if values.ndim != 1: + raise TypeError("values must be a 1D list-like") + + if mask is None: + mask = libmissing.is_numeric_na(values) + else: + assert len(mask) == len(values) + + if mask.ndim != 1: + raise TypeError("mask must be a 1D list-like") + + # infer dtype if needed + if dtype is None: + dtype = default_dtype + else: + dtype = dtype.type + + # we copy as need to coerce here + if mask.any(): + values = values.copy() + values[mask] = cls._internal_fill_value + if inferred_type in ("string", "unicode"): + # casts from str are always safe since they raise + # a ValueError if the str cannot be parsed into a float + values = values.astype(dtype, copy=copy) + else: + values = dtype_cls._safe_cast(values, dtype, copy=False) + + return values, mask, dtype, inferred_type + + +class NumericArray(BaseMaskedArray): + """ + Base class for IntegerArray and FloatingArray. + """ + + _dtype_cls: type[NumericDtype] + + def __init__( + self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False + ) -> None: + checker = self._dtype_cls._checker + if not (isinstance(values, np.ndarray) and checker(values.dtype)): + descr = ( + "floating" + if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap] + else "integer" + ) + raise TypeError( + f"values should be {descr} numpy array. Use " + "the 'pd.array' function instead" + ) + if values.dtype == np.float16: + # If we don't raise here, then accessing self.dtype would raise + raise TypeError("FloatingArray does not support np.float16 dtype.") + + super().__init__(values, mask, copy=copy) + + @cache_readonly + def dtype(self) -> NumericDtype: + mapping = self._dtype_cls._str_to_dtype_mapping() + return mapping[str(self._data.dtype)] + + @classmethod + def _coerce_to_array( + cls, value, *, dtype: DtypeObj, copy: bool = False + ) -> tuple[np.ndarray, np.ndarray]: + dtype_cls = cls._dtype_cls + default_dtype = dtype_cls._default_np_dtype + mask = None + values, mask, _, _ = _coerce_to_data_and_mask( + value, mask, dtype, copy, dtype_cls, default_dtype + ) + return values, mask + + @classmethod + def _from_sequence_of_strings( + cls: type[T], strings, *, dtype: Dtype | None = None, copy: bool = False + ) -> T: + from pandas.core.tools.numeric import to_numeric + + scalars = to_numeric(strings, errors="raise") + return cls._from_sequence(scalars, dtype=dtype, copy=copy) + + _HANDLED_TYPES = (np.ndarray, numbers.Number) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py new file mode 100644 index 00000000..36c67d2f --- /dev/null +++ b/pandas/core/arrays/numpy_.py @@ -0,0 +1,435 @@ +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + Dtype, + NpDtype, + Scalar, + npt, +) +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + arraylike, + nanops, + ops, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.strings.object_array import ObjectStringArrayMixin + + +class PandasArray( + OpsMixin, + NDArrayBackedExtensionArray, + ObjectStringArrayMixin, +): + """ + A pandas ExtensionArray for NumPy data. + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + values : ndarray + The NumPy ndarray to wrap. Must be 1-dimensional. + copy : bool, default False + Whether to copy `values`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # If you're wondering why pd.Series(cls) doesn't put the array in an + # ExtensionBlock, search for `ABCPandasArray`. We check for + # that _typ to ensure that users don't unnecessarily use EAs inside + # pandas internals, which turns off things like block consolidation. + _typ = "npy_extension" + __array_priority__ = 1000 + _ndarray: np.ndarray + _dtype: PandasDtype + _internal_fill_value = np.nan + + # ------------------------------------------------------------------------ + # Constructors + + def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None: + if isinstance(values, type(self)): + values = values._ndarray + if not isinstance(values, np.ndarray): + raise ValueError( + f"'values' must be a NumPy array, not {type(values).__name__}" + ) + + if values.ndim == 0: + # Technically we support 2, but do not advertise that fact. + raise ValueError("PandasArray must be 1-dimensional.") + + if copy: + values = values.copy() + + dtype = PandasDtype(values.dtype) + super().__init__(values, dtype) + + @classmethod + def _from_sequence( + cls, scalars, *, dtype: Dtype | None = None, copy: bool = False + ) -> PandasArray: + if isinstance(dtype, PandasDtype): + dtype = dtype._dtype + + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], + # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] + if ( + result.ndim > 1 + and not hasattr(scalars, "dtype") + and (dtype is None or dtype == object) + ): + # e.g. list-of-tuples + result = construct_1d_object_array_from_listlike(scalars) + + if copy and result is scalars: + result = result.copy() + return cls(result) + + def _from_backing_data(self, arr: np.ndarray) -> PandasArray: + return type(self)(arr) + + # ------------------------------------------------------------------------ + # Data + + @property + def dtype(self) -> PandasDtype: + return self._dtype + + # ------------------------------------------------------------------------ + # NumPy Array Interface + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + return np.asarray(self._ndarray, dtype=dtype) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # Lightly modified version of + # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html + # The primary modification is not boxing scalar return values + # in PandasArray, since pandas' ExtensionArrays are 1-d. + out = kwargs.get("out", ()) + + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_ufunc_unary + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. tests.series.test_ufunc.TestNumpyReductions + return result + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) + if out: + kwargs["out"] = tuple( + x._ndarray if isinstance(x, PandasArray) else x for x in out + ) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if ufunc.nout > 1: + # multiple return values; re-box array-like results + return tuple(type(self)(x) for x in result) + elif method == "at": + # no return value + return None + elif method == "reduce": + if isinstance(result, np.ndarray): + # e.g. test_np_reduce_2d + return type(self)(result) + + # e.g. test_np_max_nested_tuples + return result + else: + # one return value; re-box array-like results + return type(self)(result) + + # ------------------------------------------------------------------------ + # Pandas ExtensionArray Interface + + def isna(self) -> np.ndarray: + return isna(self._ndarray) + + def _validate_scalar(self, fill_value): + if fill_value is None: + # Primarily for subclasses + fill_value = self.dtype.na_value + return fill_value + + def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: + if self.dtype.kind in ["i", "u", "b"]: + fv = None + else: + fv = np.nan + return self._ndarray, fv + + # ------------------------------------------------------------------------ + # Reductions + + def any( + self, + *, + axis: int | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_any((), {"out": out, "keepdims": keepdims}) + result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def all( + self, + *, + axis: int | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_all((), {"out": out, "keepdims": keepdims}) + result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = nanops.nanmin( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = nanops.nanmax( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def sum( + self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: + nv.validate_sum((), kwargs) + result = nanops.nansum( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def prod( + self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: + nv.validate_prod((), kwargs) + result = nanops.nanprod( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def mean( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) + result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def median( + self, + *, + axis: int | None = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_median( + (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} + ) + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def std( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" + ) + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def var( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" + ) + result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def sem( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof=1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" + ) + result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) + + def kurt( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" + ) + result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + def skew( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" + ) + result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) + + # ------------------------------------------------------------------------ + # Additional Methods + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + result = np.asarray(self._ndarray, dtype=dtype) + + if (copy or na_value is not lib.no_default) and result is self._ndarray: + result = result.copy() + + if na_value is not lib.no_default: + result[self.isna()] = na_value + + return result + + # ------------------------------------------------------------------------ + # Ops + + def __invert__(self) -> PandasArray: + return type(self)(~self._ndarray) + + def __neg__(self) -> PandasArray: + return type(self)(-self._ndarray) + + def __pos__(self) -> PandasArray: + return type(self)(+self._ndarray) + + def __abs__(self) -> PandasArray: + return type(self)(abs(self._ndarray)) + + def _cmp_method(self, other, op): + if isinstance(other, PandasArray): + other = other._ndarray + + other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) + pd_op = ops.get_array_op(op) + other = ensure_wrapped_if_datetimelike(other) + with np.errstate(all="ignore"): + result = pd_op(self._ndarray, other) + + if op is divmod or op is ops.rdivmod: + a, b = result + if isinstance(a, np.ndarray): + # for e.g. op vs TimedeltaArray, we may already + # have an ExtensionArray, in which case we do not wrap + return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b) + return a, b + + if isinstance(result, np.ndarray): + # for e.g. multiplication vs TimedeltaArray, we may already + # have an ExtensionArray, in which case we do not wrap + return self._wrap_ndarray_result(result) + return result + + _arith_method = _cmp_method + + def _wrap_ndarray_result(self, result: np.ndarray): + # If we have timedelta64[ns] result, return a TimedeltaArray instead + # of a PandasArray + if result.dtype == "timedelta64[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._simple_new(result) + return type(self)(result) + + # ------------------------------------------------------------------------ + # String methods interface + _str_na_value = np.nan diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py new file mode 100644 index 00000000..e6ea7b08 --- /dev/null +++ b/pandas/core/arrays/period.py @@ -0,0 +1,1182 @@ +from __future__ import annotations + +from datetime import timedelta +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, + TypeVar, + overload, +) + +import numpy as np + +from pandas._libs import ( + algos as libalgos, + lib, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Timedelta, + astype_overflowsafe, + dt64arr_to_periodarr as c_dt64arr_to_periodarr, + get_unit_from_dtype, + iNaT, + parsing, + period as libperiod, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.offsets import ( + Tick, + delta_to_tick, +) +from pandas._libs.tslibs.period import ( + DIFFERENT_FREQ, + IncompatibleFrequency, + Period, + get_period_field_arr, + period_asfreq_arr, +) +from pandas._typing import ( + AnyArrayLike, + Dtype, + NpDtype, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_period_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.algorithms as algos +from pandas.core.arrays import datetimelike as dtl +import pandas.core.common as com + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ) + + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) + from pandas.core.arrays.base import ExtensionArray + + +BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset) + + +_shared_doc_kwargs = { + "klass": "PeriodArray", +} + + +def _field_accessor(name: str, docstring=None): + def f(self): + base = self.freq._period_dtype_code + result = get_period_field_arr(name, self.asi8, base) + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): + """ + Pandas ExtensionArray for storing Period data. + + Users should use :func:`~pandas.period_array` to create new instances. + Alternatively, :func:`~pandas.array` can be used to create new instances + from a sequence of Period scalars. + + Parameters + ---------- + values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] + The data to store. These should be arrays that can be directly + converted to ordinals without inference or copy (PeriodArray, + ndarray[int64]), or a box around such an array (Series[period], + PeriodIndex). + dtype : PeriodDtype, optional + A PeriodDtype instance from which to extract a `freq`. If both + `freq` and `dtype` are specified, then the frequencies must match. + freq : str or DateOffset + The `freq` to use for the array. Mostly applicable when `values` + is an ndarray of integers, when `freq` is required. When `values` + is a PeriodArray (or box around), it's checked that ``values.freq`` + matches `freq`. + copy : bool, default False + Whether to copy the ordinals before storing. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + Period: Represents a period of time. + PeriodIndex : Immutable Index for period data. + period_range: Create a fixed-frequency PeriodArray. + array: Construct a pandas array. + + Notes + ----- + There are two components to a PeriodArray + + - ordinals : integer ndarray + - freq : pd.tseries.offsets.Offset + + The values are physically stored as a 1-D ndarray of integers. These are + called "ordinals" and represent some kind of offset from a base. + + The `freq` indicates the span covered by each element of the array. + All elements in the PeriodArray have the same `freq`. + """ + + # array priority higher than numpy scalars + __array_priority__ = 1000 + _typ = "periodarray" # ABCPeriodArray + _internal_fill_value = np.int64(iNaT) + _recognized_scalars = (Period,) + _is_recognized_dtype = is_period_dtype + _infer_matches = ("period",) + + @property + def _scalar_type(self) -> type[Period]: + return Period + + # Names others delegate to us + _other_ops: list[str] = [] + _bool_ops: list[str] = ["is_leap_year"] + _object_ops: list[str] = ["start_time", "end_time", "freq"] + _field_ops: list[str] = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "weekday", + "week", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "qyear", + "days_in_month", + "daysinmonth", + ] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"] + + _dtype: PeriodDtype + + # -------------------------------------------------------------------- + # Constructors + + def __init__( + self, values, dtype: Dtype | None = None, freq=None, copy: bool = False + ) -> None: + freq = validate_dtype_freq(dtype, freq) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + if isinstance(values, ABCSeries): + values = values._values + if not isinstance(values, type(self)): + raise TypeError("Incorrect dtype") + + elif isinstance(values, ABCPeriodIndex): + values = values._values + + if isinstance(values, type(self)): + if freq is not None and freq != values.freq: + raise raise_on_incompatible(values, freq) + values, freq = values._ndarray, values.freq + + values = np.array(values, dtype="int64", copy=copy) + if freq is None: + raise ValueError("freq is not specified and cannot be inferred") + NDArrayBacked.__init__(self, values, PeriodDtype(freq)) + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, + values: np.ndarray, + freq: BaseOffset | None = None, + dtype: Dtype | None = None, + ) -> PeriodArray: + # alias for PeriodArray.__init__ + assertion_msg = "Should be numpy array of type i8" + assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg + return cls(values, freq=freq, dtype=dtype) + + @classmethod + def _from_sequence( + cls: type[PeriodArray], + scalars: Sequence[Period | None] | AnyArrayLike, + *, + dtype: Dtype | None = None, + copy: bool = False, + ) -> PeriodArray: + if dtype and isinstance(dtype, PeriodDtype): + freq = dtype.freq + else: + freq = None + + if isinstance(scalars, cls): + validate_dtype_freq(scalars.dtype, freq) + if copy: + scalars = scalars.copy() + return scalars + + periods = np.asarray(scalars, dtype=object) + + freq = freq or libperiod.extract_freq(periods) + ordinals = libperiod.extract_ordinals(periods, freq) + return cls(ordinals, freq=freq) + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy: bool = False + ) -> PeriodArray: + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @classmethod + def _from_datetime64(cls, data, freq, tz=None) -> PeriodArray: + """ + Construct a PeriodArray from a datetime64 array + + Parameters + ---------- + data : ndarray[datetime64[ns], datetime64[ns, tz]] + freq : str or Tick + tz : tzinfo, optional + + Returns + ------- + PeriodArray[freq] + """ + data, freq = dt64arr_to_periodarr(data, freq, tz) + return cls(data, freq=freq) + + @classmethod + def _generate_range(cls, start, end, periods, freq, fields): + periods = dtl.validate_periods(periods) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + field_count = len(fields) + if start is not None or end is not None: + if field_count > 0: + raise ValueError( + "Can either instantiate from fields or endpoints, but not both" + ) + subarr, freq = _get_ordinal_range(start, end, periods, freq) + elif field_count > 0: + subarr, freq = _range_from_fields(freq=freq, **fields) + else: + raise ValueError("Not enough parameters to construct Period range") + + return subarr, freq + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + # error: Argument 1 of "_unbox_scalar" is incompatible with supertype + # "DatetimeLikeArrayMixin"; supertype defines the argument type as + # "Union[Union[Period, Any, Timedelta], NaTType]" + def _unbox_scalar( # type: ignore[override] + self, + value: Period | NaTType, + setitem: bool = False, + ) -> np.int64: + if value is NaT: + # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" + return np.int64(value.value) # type: ignore[union-attr] + elif isinstance(value, self._scalar_type): + self._check_compatible_with(value, setitem=setitem) + return np.int64(value.ordinal) + else: + raise ValueError(f"'value' should be a Period. Got '{value}' instead.") + + def _scalar_from_string(self, value: str) -> Period: + return Period(value, freq=self.freq) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + self._require_matching_freq(other) + + # -------------------------------------------------------------------- + # Data / Attributes + + @cache_readonly + def dtype(self) -> PeriodDtype: + return self._dtype + + # error: Read-only property cannot override read-write property + @property # type: ignore[misc] + def freq(self) -> BaseOffset: + """ + Return the frequency object for this PeriodArray. + """ + return self.dtype.freq + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + if dtype == "i8": + return self.asi8 + elif dtype == bool: + return ~self._isnan + + # This will raise TypeError for non-object dtypes + return np.array(list(self), dtype=object) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + if type is not None: + if pyarrow.types.is_integer(type): + return pyarrow.array(self._ndarray, mask=self.isna(), type=type) + elif isinstance(type, ArrowPeriodType): + # ensure we have the same freq + if self.freqstr != type.freq: + raise TypeError( + "Not supported to convert PeriodArray to array with different " + f"'freq' ({self.freqstr} vs {type.freq})" + ) + else: + raise TypeError( + f"Not supported to convert PeriodArray to '{type}' type" + ) + + period_type = ArrowPeriodType(self.freqstr) + storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64") + return pyarrow.ExtensionArray.from_storage(period_type, storage_array) + + # -------------------------------------------------------------------- + # Vectorized analogues of Period properties + + year = _field_accessor( + "year", + """ + The year of the period. + """, + ) + month = _field_accessor( + "month", + """ + The month as January=1, December=12. + """, + ) + day = _field_accessor( + "day", + """ + The days of the period. + """, + ) + hour = _field_accessor( + "hour", + """ + The hour of the period. + """, + ) + minute = _field_accessor( + "minute", + """ + The minute of the period. + """, + ) + second = _field_accessor( + "second", + """ + The second of the period. + """, + ) + weekofyear = _field_accessor( + "week", + """ + The week ordinal of the year. + """, + ) + week = weekofyear + day_of_week = _field_accessor( + "day_of_week", + """ + The day of the week with Monday=0, Sunday=6. + """, + ) + dayofweek = day_of_week + weekday = dayofweek + dayofyear = day_of_year = _field_accessor( + "day_of_year", + """ + The ordinal day of the year. + """, + ) + quarter = _field_accessor( + "quarter", + """ + The quarter of the date. + """, + ) + qyear = _field_accessor("qyear") + days_in_month = _field_accessor( + "days_in_month", + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + + @property + def is_leap_year(self) -> np.ndarray: + """ + Logical indicating if the date belongs to a leap year. + """ + return isleapyear_arr(np.asarray(self.year)) + + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: + """ + Cast to DatetimeArray/Index. + + Parameters + ---------- + freq : str or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise. + how : {'s', 'e', 'start', 'end'} + Whether to use the start or end of the time period being converted. + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays import DatetimeArray + + how = libperiod.validate_end_alias(how) + + end = how == "E" + if end: + if freq == "B" or self.freq == "B": + # roll forward to ensure we land on B date + adjust = Timedelta(1, "D") - Timedelta(1, "ns") + return self.to_timestamp(how="start") + adjust + else: + adjust = Timedelta(1, "ns") + return (self + self.freq).to_timestamp(how="start") - adjust + + if freq is None: + freq = self._dtype._get_to_timestamp_base() + base = freq + else: + freq = Period._maybe_convert_freq(freq) + base = freq._period_dtype_code + + new_parr = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base) + dta = DatetimeArray(new_data) + + if self.freq.name == "B": + # See if we can retain BDay instead of Day in cases where + # len(self) is too small for infer_freq to distinguish between them + diffs = libalgos.unique_deltas(self.asi8) + if len(diffs) == 1: + diff = diffs[0] + if diff == self.freq.n: + dta._freq = self.freq + elif diff == 1: + dta._freq = self.freq.base + # TODO: other cases? + return dta + else: + return dta._with_freq("infer") + + # -------------------------------------------------------------------- + + def _time_shift(self, periods: int, freq=None) -> PeriodArray: + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None: + raise TypeError( + "`freq` argument is not supported for " + f"{type(self).__name__}._time_shift" + ) + return self + periods + + def _box_func(self, x) -> Period | NaTType: + return Period._from_ordinal(ordinal=x, freq=self.freq) + + @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex") + def asfreq(self, freq=None, how: str = "E") -> PeriodArray: + """ + Convert the {klass} to the specified frequency `freq`. + + Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments + to each :class:`~pandas.Period` in this {klass}. + + Parameters + ---------- + freq : str + A frequency. + how : str {{'E', 'S'}}, default 'E' + Whether the elements should be aligned to the end + or start within pa period. + + * 'E', 'END', or 'FINISH' for end, + * 'S', 'START', or 'BEGIN' for start. + + January 31st ('END') vs. January 1st ('START') for example. + + Returns + ------- + {klass} + The transformed {klass} with the new frequency. + + See Also + -------- + {other}.asfreq: Convert each Period in a {other_name} to the given frequency. + Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency. + + Examples + -------- + >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') + >>> pidx + PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], + dtype='period[A-DEC]') + + >>> pidx.asfreq('M') + PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', + '2015-12'], dtype='period[M]') + + >>> pidx.asfreq('M', how='S') + PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', + '2015-01'], dtype='period[M]') + """ + how = libperiod.validate_end_alias(how) + + freq = Period._maybe_convert_freq(freq) + + base1 = self._dtype._dtype_code + base2 = freq._period_dtype_code + + asi8 = self.asi8 + # self.freq.n can't be negative or 0 + end = how == "E" + if end: + ordinal = asi8 + self.freq.n - 1 + else: + ordinal = asi8 + + new_data = period_asfreq_arr(ordinal, base1, base2, end) + + if self._hasna: + new_data[self._isnan] = iNaT + + return type(self)(new_data, freq=freq) + + # ------------------------------------------------------------------ + # Rendering Methods + + def _formatter(self, boxed: bool = False): + if boxed: + return str + return "'{}'".format + + @dtl.ravel_compat + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> npt.NDArray[np.object_]: + """ + actually format my specific types + """ + values = self.astype(object) + + # Create the formatter function + if date_format: + formatter = lambda per: per.strftime(date_format) + else: + # Uses `_Period.str` which in turn uses `format_period` + formatter = lambda per: str(per) + + # Apply the formatter to all values in the array, possibly with a mask + if self._hasna: + mask = self._isnan + values[mask] = na_rep + imask = ~mask + values[imask] = np.array([formatter(per) for per in values[imask]]) + else: + values = np.array([formatter(per) for per in values]) + return values + + # ------------------------------------------------------------------ + + def astype(self, dtype, copy: bool = True): + # We handle Period[T] -> Period[U] + # Our parent handles everything else. + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + else: + return self.copy() + if is_period_dtype(dtype): + return self.asfreq(dtype.freq) + + if is_datetime64_any_dtype(dtype): + # GH#45038 match PeriodIndex behavior. + tz = getattr(dtype, "tz", None) + return self.to_timestamp().tz_localize(tz) + + return super().astype(dtype, copy=copy) + + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + npvalue = self._validate_searchsorted_value(value).view("M8[ns]") + + # Cast to M8 to get datetime-like NaT placement + m8arr = self._ndarray.view("M8[ns]") + return m8arr.searchsorted(npvalue, side=side, sorter=sorter) + + def fillna(self, value=None, method=None, limit=None) -> PeriodArray: + if method is not None: + # view as dt64 so we get treated as timelike in core.missing + dta = self.view("M8[ns]") + result = dta.fillna(value=value, method=method, limit=limit) + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray[Any, Any]]", expected "PeriodArray") + return result.view(self.dtype) # type: ignore[return-value] + return super().fillna(value=value, method=method, limit=limit) + + def _quantile( + self: PeriodArray, + qs: npt.NDArray[np.float64], + interpolation: str, + ) -> PeriodArray: + # dispatch to DatetimeArray implementation + dtres = self.view("M8[ns]")._quantile(qs, interpolation) + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray[Any, Any]]", expected "PeriodArray") + return dtres.view(self.dtype) # type: ignore[return-value] + + # ------------------------------------------------------------------ + # Arithmetic Methods + + def _addsub_int_array_or_scalar( + self, other: np.ndarray | int, op: Callable[[Any, Any], Any] + ) -> PeriodArray: + """ + Add or subtract array of integers; equivalent to applying + `_time_shift` pointwise. + + Parameters + ---------- + other : np.ndarray[int64] or int + op : {operator.add, operator.sub} + + Returns + ------- + result : PeriodArray + """ + assert op in [operator.add, operator.sub] + if op is operator.sub: + other = -other + res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) + return type(self)(res_values, freq=self.freq) + + def _add_offset(self, other: BaseOffset): + assert not isinstance(other, Tick) + + self._require_matching_freq(other, base=True) + return self._addsub_int_array_or_scalar(other.n, operator.add) + + # TODO: can we de-duplicate with Period._add_timedeltalike_scalar? + def _add_timedeltalike_scalar(self, other): + """ + Parameters + ---------- + other : timedelta, Tick, np.timedelta64 + + Returns + ------- + PeriodArray + """ + if not isinstance(self.freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise raise_on_incompatible(self, other) + + if isna(other): + # i.e. np.timedelta64("NaT") + return super()._add_timedeltalike_scalar(other) + + td = np.asarray(Timedelta(other).asm8) + return self._add_timedelta_arraylike(td) + + def _add_timedelta_arraylike( + self, other: TimedeltaArray | npt.NDArray[np.timedelta64] + ) -> PeriodArray: + """ + Parameters + ---------- + other : TimedeltaArray or ndarray[timedelta64] + + Returns + ------- + PeriodArray + """ + freq = self.freq + if not isinstance(freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise TypeError( + f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}" + ) + + dtype = np.dtype(f"m8[{freq._td64_unit}]") + + try: + delta = astype_overflowsafe( + np.asarray(other), dtype=dtype, copy=False, round_ok=False + ) + except ValueError as err: + # e.g. if we have minutes freq and try to add 30s + # "Cannot losslessly convert units" + raise IncompatibleFrequency( + "Cannot add/subtract timedelta-like from PeriodArray that is " + "not an integer multiple of the PeriodArray's freq." + ) from err + + b_mask = np.isnat(delta) + + res_values = algos.checked_add_with_arr( + self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask + ) + np.putmask(res_values, self._isnan | b_mask, iNaT) + return type(self)(res_values, freq=self.freq) + + def _check_timedeltalike_freq_compat(self, other): + """ + Arithmetic operations with timedelta-like scalars or array `other` + are only valid if `other` is an integer multiple of `self.freq`. + If the operation is valid, find that integer multiple. Otherwise, + raise because the operation is invalid. + + Parameters + ---------- + other : timedelta, np.timedelta64, Tick, + ndarray[timedelta64], TimedeltaArray, TimedeltaIndex + + Returns + ------- + multiple : int or ndarray[int64] + + Raises + ------ + IncompatibleFrequency + """ + assert isinstance(self.freq, Tick) # checked by calling function + + dtype = np.dtype(f"m8[{self.freq._td64_unit}]") + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + td = np.asarray(Timedelta(other).asm8) + else: + td = np.asarray(other) + + try: + delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False) + except ValueError as err: + raise raise_on_incompatible(self, other) from err + + delta = delta.view("i8") + return lib.item_from_zerodim(delta) + + +def raise_on_incompatible(left, right): + """ + Helper function to render a consistent error message when raising + IncompatibleFrequency. + + Parameters + ---------- + left : PeriodArray + right : None, DateOffset, Period, ndarray, or timedelta-like + + Returns + ------- + IncompatibleFrequency + Exception to be raised by the caller. + """ + # GH#24283 error message format depends on whether right is scalar + if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None: + other_freq = None + elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)): + other_freq = right.freqstr + else: + other_freq = delta_to_tick(Timedelta(right)).freqstr + + msg = DIFFERENT_FREQ.format( + cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq + ) + return IncompatibleFrequency(msg) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def period_array( + data: Sequence[Period | str | None] | AnyArrayLike, + freq: str | Tick | None = None, + copy: bool = False, +) -> PeriodArray: + """ + Construct a new PeriodArray from a sequence of Period scalars. + + Parameters + ---------- + data : Sequence of Period objects + A sequence of Period objects. These are required to all have + the same ``freq.`` Missing values can be indicated by ``None`` + or ``pandas.NaT``. + freq : str, Tick, or Offset + The frequency of every element of the array. This can be specified + to avoid inferring the `freq` from `data`. + copy : bool, default False + Whether to ensure a copy of the data is made. + + Returns + ------- + PeriodArray + + See Also + -------- + PeriodArray + pandas.PeriodIndex + + Examples + -------- + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A')]) + + ['2017', '2018'] + Length: 2, dtype: period[A-DEC] + + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A'), + ... pd.NaT]) + + ['2017', '2018', 'NaT'] + Length: 3, dtype: period[A-DEC] + + Integers that look like years are handled + + >>> period_array([2000, 2001, 2002], freq='D') + + ['2000-01-01', '2001-01-01', '2002-01-01'] + Length: 3, dtype: period[D] + + Datetime-like strings may also be passed + + >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') + + ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] + Length: 4, dtype: period[Q-DEC] + """ + data_dtype = getattr(data, "dtype", None) + + if is_datetime64_dtype(data_dtype): + return PeriodArray._from_datetime64(data, freq) + if is_period_dtype(data_dtype): + return PeriodArray(data, freq=freq) + + # other iterable of some kind + if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): + data = list(data) + + arrdata = np.asarray(data) + + dtype: PeriodDtype | None + if freq: + dtype = PeriodDtype(freq) + else: + dtype = None + + if is_float_dtype(arrdata) and len(arrdata) > 0: + raise TypeError("PeriodIndex does not allow floating point in construction") + + if is_integer_dtype(arrdata.dtype): + arr = arrdata.astype(np.int64, copy=False) + # error: Argument 2 to "from_ordinals" has incompatible type "Union[str, + # Tick, None]"; expected "Union[timedelta, BaseOffset, str]" + ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type] + return PeriodArray(ordinals, dtype=dtype) + + data = ensure_object(arrdata) + + return PeriodArray._from_sequence(data, dtype=dtype) + + +@overload +def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT: + ... + + +@overload +def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset: + ... + + +def validate_dtype_freq( + dtype, freq: BaseOffsetT | timedelta | str | None +) -> BaseOffsetT: + """ + If both a dtype and a freq are available, ensure they match. If only + dtype is available, extract the implied freq. + + Parameters + ---------- + dtype : dtype + freq : DateOffset or None + + Returns + ------- + freq : DateOffset + + Raises + ------ + ValueError : non-period dtype + IncompatibleFrequency : mismatch between dtype and freq + """ + if freq is not None: + # error: Incompatible types in assignment (expression has type + # "BaseOffset", variable has type "Union[BaseOffsetT, timedelta, + # str, None]") + freq = to_offset(freq) # type: ignore[assignment] + + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_period_dtype(dtype): + raise ValueError("dtype must be PeriodDtype") + if freq is None: + freq = dtype.freq + elif freq != dtype.freq: + raise IncompatibleFrequency("specified freq and dtype are different") + # error: Incompatible return value type (got "Union[BaseOffset, Any, None]", + # expected "BaseOffset") + return freq # type: ignore[return-value] + + +def dt64arr_to_periodarr( + data, freq, tz=None +) -> tuple[npt.NDArray[np.int64], BaseOffset]: + """ + Convert an datetime-like array to values Period ordinals. + + Parameters + ---------- + data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] + freq : Optional[Union[str, Tick]] + Must match the `freq` on the `data` if `data` is a DatetimeIndex + or Series. + tz : Optional[tzinfo] + + Returns + ------- + ordinals : ndarray[int64] + freq : Tick + The frequency extracted from the Series or DatetimeIndex if that's + used. + + """ + if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M": + raise ValueError(f"Wrong dtype: {data.dtype}") + + if freq is None: + if isinstance(data, ABCIndex): + data, freq = data._values, data.freq + elif isinstance(data, ABCSeries): + data, freq = data._values, data.dt.freq + + elif isinstance(data, (ABCIndex, ABCSeries)): + data = data._values + + reso = get_unit_from_dtype(data.dtype) + freq = Period._maybe_convert_freq(freq) + base = freq._period_dtype_code + return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq + + +def _get_ordinal_range(start, end, periods, freq, mult=1): + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + + if freq is not None: + freq = to_offset(freq) + mult = freq.n + + if start is not None: + start = Period(start, freq) + if end is not None: + end = Period(end, freq) + + is_start_per = isinstance(start, Period) + is_end_per = isinstance(end, Period) + + if is_start_per and is_end_per and start.freq != end.freq: + raise ValueError("start and end must have same freq") + if start is NaT or end is NaT: + raise ValueError("start and end must not be NaT") + + if freq is None: + if is_start_per: + freq = start.freq + elif is_end_per: + freq = end.freq + else: # pragma: no cover + raise ValueError("Could not infer freq from start/end") + + if periods is not None: + periods = periods * mult + if start is None: + data = np.arange( + end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64 + ) + else: + data = np.arange( + start.ordinal, start.ordinal + periods, mult, dtype=np.int64 + ) + else: + data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) + + return data, freq + + +def _range_from_fields( + year=None, + month=None, + quarter=None, + day=None, + hour=None, + minute=None, + second=None, + freq=None, +) -> tuple[np.ndarray, BaseOffset]: + if hour is None: + hour = 0 + if minute is None: + minute = 0 + if second is None: + second = 0 + if day is None: + day = 1 + + ordinals = [] + + if quarter is not None: + if freq is None: + freq = to_offset("Q") + base = FreqGroup.FR_QTR.value + else: + freq = to_offset(freq) + base = libperiod.freq_to_dtype_code(freq) + if base != FreqGroup.FR_QTR.value: + raise AssertionError("base must equal FR_QTR") + + freqstr = freq.freqstr + year, quarter = _make_field_arrays(year, quarter) + for y, q in zip(year, quarter): + y, m = parsing.quarter_to_myear(y, q, freqstr) + val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) + ordinals.append(val) + else: + freq = to_offset(freq) + base = libperiod.freq_to_dtype_code(freq) + arrays = _make_field_arrays(year, month, day, hour, minute, second) + for y, mth, d, h, mn, s in zip(*arrays): + ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) + + return np.array(ordinals, dtype=np.int64), freq + + +def _make_field_arrays(*fields) -> list[np.ndarray]: + length = None + for x in fields: + if isinstance(x, (list, np.ndarray, ABCSeries)): + if length is not None and len(x) != length: + raise ValueError("Mismatched Period array lengths") + elif length is None: + length = len(x) + + # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected + # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int, + # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]" + return [ + np.asarray(x) + if isinstance(x, (np.ndarray, list, ABCSeries)) + else np.repeat(x, length) # type: ignore[arg-type] + for x in fields + ] diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/arrays/sparse/__init__.py new file mode 100644 index 00000000..56dbc6df --- /dev/null +++ b/pandas/core/arrays/sparse/__init__.py @@ -0,0 +1,21 @@ +from pandas.core.arrays.sparse.accessor import ( + SparseAccessor, + SparseFrameAccessor, +) +from pandas.core.arrays.sparse.array import ( + BlockIndex, + IntIndex, + SparseArray, + make_sparse_index, +) +from pandas.core.arrays.sparse.dtype import SparseDtype + +__all__ = [ + "BlockIndex", + "IntIndex", + "make_sparse_index", + "SparseAccessor", + "SparseArray", + "SparseDtype", + "SparseFrameAccessor", +] diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py new file mode 100644 index 00000000..f1e4412f --- /dev/null +++ b/pandas/core/arrays/sparse/accessor.py @@ -0,0 +1,396 @@ +"""Sparse accessor""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import find_common_type + +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.arrays.sparse.dtype import SparseDtype + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +class BaseAccessor: + _validation_msg = "Can only use the '.sparse' accessor with Sparse data." + + def __init__(self, data=None) -> None: + self._parent = data + self._validate(data) + + def _validate(self, data): + raise NotImplementedError + + +@delegate_names( + SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property" +) +class SparseAccessor(BaseAccessor, PandasDelegate): + """ + Accessor for SparseSparse from other sparse matrix data types. + """ + + def _validate(self, data): + if not isinstance(data.dtype, SparseDtype): + raise AttributeError(self._validation_msg) + + def _delegate_property_get(self, name, *args, **kwargs): + return getattr(self._parent.array, name) + + def _delegate_method(self, name, *args, **kwargs): + if name == "from_coo": + return self.from_coo(*args, **kwargs) + elif name == "to_coo": + return self.to_coo(*args, **kwargs) + else: + raise ValueError + + @classmethod + def from_coo(cls, A, dense_index=False) -> Series: + """ + Create a Series with sparse values from a scipy.sparse.coo_matrix. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + If False (default), the SparseSeries index consists of only the + coords of the non-null entries of the original coo_matrix. + If True, the SparseSeries index consists of the full sorted + (row, col) coordinates of the coo_matrix. + + Returns + ------- + s : Series + A Series with sparse values. + + Examples + -------- + >>> from scipy import sparse + + >>> A = sparse.coo_matrix( + ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4) + ... ) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + + >>> A.todense() + matrix([[0., 0., 1., 2.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + + >>> ss = pd.Series.sparse.from_coo(A) + >>> ss + 0 2 1.0 + 3 2.0 + 1 0 3.0 + dtype: Sparse[float64, nan] + """ + from pandas import Series + from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series + + result = coo_to_sparse_series(A, dense_index=dense_index) + result = Series(result.array, index=result.index, copy=False) + + return result + + def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ + Create a scipy.sparse.coo_matrix from a Series with MultiIndex. + + Use row_levels and column_levels to determine the row and column + coordinates respectively. row_levels and column_levels are the names + (labels) or numbers of the levels. {row_levels, column_levels} must be + a partition of the MultiIndex level names (or numbers). + + Parameters + ---------- + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + When `row_levels` and/or `column_levels` refer to a single level, + set to `True` for a faster execution. + + Returns + ------- + y : scipy.sparse.coo_matrix + rows : list (row labels) + columns : list (column labels) + + Examples + -------- + >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + >>> s.index = pd.MultiIndex.from_tuples( + ... [ + ... (1, 2, "a", 0), + ... (1, 2, "a", 1), + ... (1, 1, "b", 0), + ... (1, 1, "b", 1), + ... (2, 1, "b", 0), + ... (2, 1, "b", 1) + ... ], + ... names=["A", "B", "C", "D"], + ... ) + >>> s + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: float64 + + >>> ss = s.astype("Sparse") + >>> ss + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: Sparse[float64, nan] + + >>> A, rows, columns = ss.sparse.to_coo( + ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ... ) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[0., 0., 1., 3.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + + >>> rows + [(1, 1), (1, 2), (2, 1)] + >>> columns + [('a', 0), ('a', 1), ('b', 0), ('b', 1)] + """ + from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo + + A, rows, columns = sparse_series_to_coo( + self._parent, row_levels, column_levels, sort_labels=sort_labels + ) + return A, rows, columns + + def to_dense(self) -> Series: + """ + Convert a Series from sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + Series: + A Series with the same values, stored as a dense array. + + Examples + -------- + >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0])) + >>> series + 0 0 + 1 1 + 2 0 + dtype: Sparse[int64, 0] + + >>> series.sparse.to_dense() + 0 0 + 1 1 + 2 0 + dtype: int64 + """ + from pandas import Series + + return Series( + self._parent.array.to_dense(), + index=self._parent.index, + name=self._parent.name, + ) + + +class SparseFrameAccessor(BaseAccessor, PandasDelegate): + """ + DataFrame accessor for sparse data. + + .. versionadded:: 0.25.0 + """ + + def _validate(self, data): + dtypes = data.dtypes + if not all(isinstance(t, SparseDtype) for t in dtypes): + raise AttributeError(self._validation_msg) + + @classmethod + def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame: + """ + Create a new DataFrame from a scipy sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.spmatrix + Must be convertible to csc format. + index, columns : Index, optional + Row and column labels to use for the resulting DataFrame. + Defaults to a RangeIndex. + + Returns + ------- + DataFrame + Each column of the DataFrame is stored as a + :class:`arrays.SparseArray`. + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.eye(3) + >>> pd.DataFrame.sparse.from_spmatrix(mat) + 0 1 2 + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas._libs.sparse import IntIndex + + from pandas import DataFrame + + data = data.tocsc() + index, columns = cls._prep_index(data, index, columns) + n_rows, n_columns = data.shape + # We need to make sure indices are sorted, as we create + # IntIndex with no input validation (i.e. check_integrity=False ). + # Indices may already be sorted in scipy in which case this adds + # a small overhead. + data.sort_indices() + indices = data.indices + indptr = data.indptr + array_data = data.data + dtype = SparseDtype(array_data.dtype, 0) + arrays = [] + for i in range(n_columns): + sl = slice(indptr[i], indptr[i + 1]) + idx = IntIndex(n_rows, indices[sl], check_integrity=False) + arr = SparseArray._simple_new(array_data[sl], idx, dtype) + arrays.append(arr) + return DataFrame._from_arrays( + arrays, columns=columns, index=index, verify_integrity=False + ) + + def to_dense(self) -> DataFrame: + """ + Convert a DataFrame with sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + DataFrame + A DataFrame with the same values stored as dense arrays. + + Examples + -------- + >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])}) + >>> df.sparse.to_dense() + A + 0 0 + 1 1 + 2 0 + """ + from pandas import DataFrame + + data = {k: v.array.to_dense() for k, v in self._parent.items()} + return DataFrame(data, index=self._parent.index, columns=self._parent.columns) + + def to_coo(self): + """ + Return the contents of the frame as a sparse SciPy COO matrix. + + .. versionadded:: 0.25.0 + + Returns + ------- + coo_matrix : scipy.sparse.spmatrix + If the caller is heterogeneous and contains booleans or objects, + the result will be of dtype=object. See Notes. + + Notes + ----- + The dtype will be the lowest-common-denominator type (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. By numpy.find_common_type convention, mixing int64 and + and uint64 will result in a float64 dtype. + """ + import_optional_dependency("scipy") + from scipy.sparse import coo_matrix + + dtype = find_common_type(self._parent.dtypes.to_list()) + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + + cols, rows, data = [], [], [] + for col, (_, ser) in enumerate(self._parent.items()): + sp_arr = ser.array + if sp_arr.fill_value != 0: + raise ValueError("fill value must be 0 when converting to COO matrix") + + row = sp_arr.sp_index.indices + cols.append(np.repeat(col, len(row))) + rows.append(row) + data.append(sp_arr.sp_values.astype(dtype, copy=False)) + + cols = np.concatenate(cols) + rows = np.concatenate(rows) + data = np.concatenate(data) + return coo_matrix((data, (rows, cols)), shape=self._parent.shape) + + @property + def density(self) -> float: + """ + Ratio of non-sparse points to total (dense) data points. + """ + tmp = np.mean([column.array.density for _, column in self._parent.items()]) + # error: Expression of type "floating" cannot be assigned to return type "float" + return tmp # pyright: ignore[reportGeneralTypeIssues] + + @staticmethod + def _prep_index(data, index, columns): + from pandas.core.indexes.api import ( + default_index, + ensure_index, + ) + + N, K = data.shape + if index is None: + index = default_index(N) + else: + index = ensure_index(index) + if columns is None: + columns = default_index(K) + else: + columns = ensure_index(columns) + + if len(columns) != K: + raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}") + if len(index) != N: + raise ValueError(f"Index length mismatch: {len(index)} vs. {N}") + return index, columns diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py new file mode 100644 index 00000000..62ae6163 --- /dev/null +++ b/pandas/core/arrays/sparse/array.py @@ -0,0 +1,1959 @@ +""" +SparseArray data structure +""" +from __future__ import annotations + +from collections import abc +import numbers +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Sequence, + TypeVar, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +import pandas._libs.sparse as splib +from pandas._libs.sparse import ( + BlockIndex, + IntIndex, + SparseIndex, +) +from pandas._libs.tslibs import NaT +from pandas._typing import ( + ArrayLike, + AstypeArg, + Dtype, + NpDtype, + PositionalIndexer, + Scalar, + ScalarIndexer, + SequenceIndexer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_bool_kwarg, + validate_insert_loc, +) + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + find_common_type, + maybe_box_datetimelike, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_integer, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) + +from pandas.core import arraylike +import pandas.core.algorithms as algos +from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.sparse.dtype import SparseDtype +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import ( + extract_array, + sanitize_array, +) +from pandas.core.indexers import ( + check_array_indexer, + unpack_tuple_and_ellipses, +) +from pandas.core.missing import interpolate_2d +from pandas.core.nanops import check_below_min_count +import pandas.core.ops as ops + +import pandas.io.formats.printing as printing + +# See https://github.com/python/typing/issues/684 +if TYPE_CHECKING: + from enum import Enum + + class ellipsis(Enum): + Ellipsis = "..." + + Ellipsis = ellipsis.Ellipsis + + from scipy.sparse import spmatrix + + from pandas._typing import ( + FillnaOptions, + NumpySorter, + ) + + SparseIndexKind = Literal["integer", "block"] + + from pandas import Series + +else: + ellipsis = type(Ellipsis) + + +# ---------------------------------------------------------------------------- +# Array + +SparseArrayT = TypeVar("SparseArrayT", bound="SparseArray") + +_sparray_doc_kwargs = {"klass": "SparseArray"} + + +def _get_fill(arr: SparseArray) -> np.ndarray: + """ + Create a 0-dim ndarray containing the fill value + + Parameters + ---------- + arr : SparseArray + + Returns + ------- + fill_value : ndarray + 0-dim ndarray with just the fill value. + + Notes + ----- + coerce fill_value to arr dtype if possible + int64 SparseArray can have NaN as fill_value if there is no missing + """ + try: + return np.asarray(arr.fill_value, dtype=arr.dtype.subtype) + except ValueError: + return np.asarray(arr.fill_value) + + +def _sparse_array_op( + left: SparseArray, right: SparseArray, op: Callable, name: str +) -> SparseArray: + """ + Perform a binary operation between two arrays. + + Parameters + ---------- + left : Union[SparseArray, ndarray] + right : Union[SparseArray, ndarray] + op : Callable + The binary operation to perform + name str + Name of the callable. + + Returns + ------- + SparseArray + """ + if name.startswith("__"): + # For lookups in _libs.sparse we need non-dunder op name + name = name[2:-2] + + # dtype used to find corresponding sparse method + ltype = left.dtype.subtype + rtype = right.dtype.subtype + + if not is_dtype_equal(ltype, rtype): + subtype = find_common_type([ltype, rtype]) + ltype = SparseDtype(subtype, left.fill_value) + rtype = SparseDtype(subtype, right.fill_value) + + left = left.astype(ltype, copy=False) + right = right.astype(rtype, copy=False) + dtype = ltype.subtype + else: + dtype = ltype + + # dtype the result must have + result_dtype = None + + if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: + with np.errstate(all="ignore"): + result = op(left.to_dense(), right.to_dense()) + fill = op(_get_fill(left), _get_fill(right)) + + if left.sp_index.ngaps == 0: + index = left.sp_index + else: + index = right.sp_index + elif left.sp_index.equals(right.sp_index): + with np.errstate(all="ignore"): + result = op(left.sp_values, right.sp_values) + fill = op(_get_fill(left), _get_fill(right)) + index = left.sp_index + else: + if name[0] == "r": + left, right = right, left + name = name[1:] + + if name in ("and", "or", "xor") and dtype == "bool": + opname = f"sparse_{name}_uint8" + # to make template simple, cast here + left_sp_values = left.sp_values.view(np.uint8) + right_sp_values = right.sp_values.view(np.uint8) + result_dtype = bool + else: + opname = f"sparse_{name}_{dtype}" + left_sp_values = left.sp_values + right_sp_values = right.sp_values + + if ( + name in ["floordiv", "mod"] + and (right == 0).any() + and left.dtype.kind in ["i", "u"] + ): + # Match the non-Sparse Series behavior + opname = f"sparse_{name}_float64" + left_sp_values = left_sp_values.astype("float64") + right_sp_values = right_sp_values.astype("float64") + + sparse_op = getattr(splib, opname) + + with np.errstate(all="ignore"): + result, index, fill = sparse_op( + left_sp_values, + left.sp_index, + left.fill_value, + right_sp_values, + right.sp_index, + right.fill_value, + ) + + if name == "divmod": + # result is a 2-tuple + # error: Incompatible return value type (got "Tuple[SparseArray, + # SparseArray]", expected "SparseArray") + return ( # type: ignore[return-value] + _wrap_result(name, result[0], index, fill[0], dtype=result_dtype), + _wrap_result(name, result[1], index, fill[1], dtype=result_dtype), + ) + + if result_dtype is None: + result_dtype = result.dtype + + return _wrap_result(name, result, index, fill, dtype=result_dtype) + + +def _wrap_result( + name: str, data, sparse_index, fill_value, dtype: Dtype | None = None +) -> SparseArray: + """ + wrap op result to have correct dtype + """ + if name.startswith("__"): + # e.g. __eq__ --> eq + name = name[2:-2] + + if name in ("eq", "ne", "lt", "gt", "le", "ge"): + dtype = bool + + fill_value = lib.item_from_zerodim(fill_value) + + if is_bool_dtype(dtype): + # fill_value may be np.bool_ + fill_value = bool(fill_value) + return SparseArray( + data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype + ) + + +class SparseArray(OpsMixin, PandasObject, ExtensionArray): + """ + An ExtensionArray for storing sparse data. + + Parameters + ---------- + data : array-like or scalar + A dense array of values to store in the SparseArray. This may contain + `fill_value`. + sparse_index : SparseIndex, optional + index : Index + + .. deprecated:: 1.4.0 + Use a function like `np.full` to construct an array with the desired + repeats of the scalar value instead. + + fill_value : scalar, optional + Elements in data that are ``fill_value`` are not stored in the + SparseArray. For memory savings, this should be the most common value + in `data`. By default, `fill_value` depends on the dtype of `data`: + + =========== ========== + data.dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool False + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The fill value is potentially specified in three ways. In order of + precedence, these are + + 1. The `fill_value` argument + 2. ``dtype.fill_value`` if `fill_value` is None and `dtype` is + a ``SparseDtype`` + 3. ``data.dtype.fill_value`` if `fill_value` is None and `dtype` + is not a ``SparseDtype`` and `data` is a ``SparseArray``. + + kind : str + Can be 'integer' or 'block', default is 'integer'. + The type of storage for sparse locations. + + * 'block': Stores a `block` and `block_length` for each + contiguous *span* of sparse values. This is best when + sparse data tends to be clumped together, with large + regions of ``fill-value`` values between sparse values. + * 'integer': uses an integer to store the location of + each sparse value. + + dtype : np.dtype or SparseDtype, optional + The dtype to use for the SparseArray. For numpy dtypes, this + determines the dtype of ``self.sp_values``. For SparseDtype, + this determines ``self.sp_values`` and ``self.fill_value``. + copy : bool, default False + Whether to explicitly copy the incoming `data` array. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> from pandas.arrays import SparseArray + >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + """ + + _subtyp = "sparse_array" # register ABCSparseArray + _hidden_attrs = PandasObject._hidden_attrs | frozenset(["get_values"]) + _sparse_index: SparseIndex + _sparse_values: np.ndarray + _dtype: SparseDtype + + def __init__( + self, + data, + sparse_index=None, + index=None, + fill_value=None, + kind: SparseIndexKind = "integer", + dtype: Dtype | None = None, + copy: bool = False, + ) -> None: + + if fill_value is None and isinstance(dtype, SparseDtype): + fill_value = dtype.fill_value + + if isinstance(data, type(self)): + # disable normal inference on dtype, sparse_index, & fill_value + if sparse_index is None: + sparse_index = data.sp_index + if fill_value is None: + fill_value = data.fill_value + if dtype is None: + dtype = data.dtype + # TODO: make kind=None, and use data.kind? + data = data.sp_values + + # Handle use-provided dtype + if isinstance(dtype, str): + # Two options: dtype='int', regular numpy dtype + # or dtype='Sparse[int]', a sparse dtype + try: + dtype = SparseDtype.construct_from_string(dtype) + except TypeError: + dtype = pandas_dtype(dtype) + + if isinstance(dtype, SparseDtype): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + if index is not None: + warnings.warn( + "The index argument has been deprecated and will be " + "removed in a future version. Use a function like np.full " + "to construct an array with the desired repeats of the " + "scalar value instead.\n\n", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if index is not None and not is_scalar(data): + raise Exception("must only pass scalars with an index") + + if is_scalar(data): + if index is not None and data is None: + data = np.nan + + if index is not None: + npoints = len(index) + elif sparse_index is None: + npoints = 1 + else: + npoints = sparse_index.length + + data = construct_1d_arraylike_from_scalar(data, npoints, dtype=None) + dtype = data.dtype + + if dtype is not None: + dtype = pandas_dtype(dtype) + + # TODO: disentangle the fill_value dtype inference from + # dtype inference + if data is None: + # TODO: What should the empty dtype be? Object or float? + + # error: Argument "dtype" to "array" has incompatible type + # "Union[ExtensionDtype, dtype[Any], None]"; expected "Union[dtype[Any], + # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, + # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" + data = np.array([], dtype=dtype) # type: ignore[arg-type] + + if not is_array_like(data): + try: + # probably shared code in sanitize_series + + data = sanitize_array(data, index=None) + except ValueError: + # NumPy may raise a ValueError on data like [1, []] + # we retry with object dtype here. + if dtype is None: + dtype = object + data = np.atleast_1d(np.asarray(data, dtype=dtype)) + else: + raise + + if copy: + # TODO: avoid double copy when dtype forces cast. + data = data.copy() + + if fill_value is None: + fill_value_dtype = data.dtype if dtype is None else dtype + if fill_value_dtype is None: + fill_value = np.nan + else: + fill_value = na_value_for_dtype(fill_value_dtype) + + if isinstance(data, type(self)) and sparse_index is None: + sparse_index = data._sparse_index + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, + # Any]]]" + sparse_values = np.asarray( + data.sp_values, dtype=dtype # type: ignore[arg-type] + ) + elif sparse_index is None: + data = extract_array(data, extract_numpy=True) + if not isinstance(data, np.ndarray): + # EA + if is_datetime64tz_dtype(data.dtype): + warnings.warn( + f"Creating SparseArray from {data.dtype} data " + "loses timezone information. Cast to object before " + "sparse to retain timezone information.", + UserWarning, + stacklevel=find_stack_level(), + ) + data = np.asarray(data, dtype="datetime64[ns]") + if fill_value is NaT: + fill_value = np.datetime64("NaT", "ns") + data = np.asarray(data) + sparse_values, sparse_index, fill_value = make_sparse( + # error: Argument "dtype" to "make_sparse" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[str, dtype[Any], None]" + data, + kind=kind, + fill_value=fill_value, + dtype=dtype, # type: ignore[arg-type] + ) + else: + # error: Argument "dtype" to "asarray" has incompatible type + # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected + # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, + # Any]]]" + sparse_values = np.asarray(data, dtype=dtype) # type: ignore[arg-type] + if len(sparse_values) != sparse_index.npoints: + raise AssertionError( + f"Non array-like type {type(sparse_values)} must " + "have the same length as the index" + ) + self._sparse_index = sparse_index + self._sparse_values = sparse_values + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + + @classmethod + def _simple_new( + cls: type[SparseArrayT], + sparse_array: np.ndarray, + sparse_index: SparseIndex, + dtype: SparseDtype, + ) -> SparseArrayT: + new = object.__new__(cls) + new._sparse_index = sparse_index + new._sparse_values = sparse_array + new._dtype = dtype + return new + + @classmethod + def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT: + """ + Create a SparseArray from a scipy.sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.sp_matrix + This should be a SciPy sparse matrix where the size + of the second dimension is 1. In other words, a + sparse matrix with a single column. + + Returns + ------- + SparseArray + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.coo_matrix((4, 1)) + >>> pd.arrays.SparseArray.from_spmatrix(mat) + [0.0, 0.0, 0.0, 0.0] + Fill: 0.0 + IntIndex + Indices: array([], dtype=int32) + """ + length, ncol = data.shape + + if ncol != 1: + raise ValueError(f"'data' must have a single column, not '{ncol}'") + + # our sparse index classes require that the positions be strictly + # increasing. So we need to sort loc, and arr accordingly. + data = data.tocsc() + data.sort_indices() + arr = data.data + idx = data.indices + + zero = np.array(0, dtype=arr.dtype).item() + dtype = SparseDtype(arr.dtype, zero) + index = IntIndex(length, idx) + + return cls._simple_new(arr, index, dtype) + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + fill_value = self.fill_value + + if self.sp_index.ngaps == 0: + # Compat for na dtype and int values. + return self.sp_values + if dtype is None: + # Can NumPy represent this type? + # If not, `np.result_type` will raise. We catch that + # and return object. + if is_datetime64_any_dtype(self.sp_values.dtype): + # However, we *do* special-case the common case of + # a datetime64 with pandas NaT. + if fill_value is NaT: + # Can't put pd.NaT in a datetime64[ns] + fill_value = np.datetime64("NaT") + try: + dtype = np.result_type(self.sp_values.dtype, type(fill_value)) + except TypeError: + dtype = object + + out = np.full(self.shape, fill_value, dtype=dtype) + out[self.sp_index.indices] = self.sp_values + return out + + def __setitem__(self, key, value): + # I suppose we could allow setting of non-fill_value elements. + # TODO(SparseArray.__setitem__): remove special cases in + # ExtensionBlock.where + msg = "SparseArray does not support item assignment via setitem" + raise TypeError(msg) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False): + return cls(scalars, dtype=dtype) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + # ------------------------------------------------------------------------ + # Data + # ------------------------------------------------------------------------ + @property + def sp_index(self) -> SparseIndex: + """ + The SparseIndex containing the location of non- ``fill_value`` points. + """ + return self._sparse_index + + @property + def sp_values(self) -> np.ndarray: + """ + An ndarray containing the non- ``fill_value`` values. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) + >>> s.sp_values + array([1, 2]) + """ + return self._sparse_values + + @property + def dtype(self) -> SparseDtype: + return self._dtype + + @property + def fill_value(self): + """ + Elements in `data` that are `fill_value` are not stored. + + For memory savings, this should be the most common value in the array. + """ + return self.dtype.fill_value + + @fill_value.setter + def fill_value(self, value): + self._dtype = SparseDtype(self.dtype.subtype, value) + + @property + def kind(self) -> SparseIndexKind: + """ + The kind of sparse index for this array. One of {'integer', 'block'}. + """ + if isinstance(self.sp_index, IntIndex): + return "integer" + else: + return "block" + + @property + def _valid_sp_values(self) -> np.ndarray: + sp_vals = self.sp_values + mask = notna(sp_vals) + return sp_vals[mask] + + def __len__(self) -> int: + return self.sp_index.length + + @property + def _null_fill_value(self) -> bool: + return self._dtype._is_na_fill_value + + def _fill_value_matches(self, fill_value) -> bool: + if self._null_fill_value: + return isna(fill_value) + else: + return self.fill_value == fill_value + + @property + def nbytes(self) -> int: + return self.sp_values.nbytes + self.sp_index.nbytes + + @property + def density(self) -> float: + """ + The percent of non- ``fill_value`` points, as decimal. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.density + 0.6 + """ + return self.sp_index.npoints / self.sp_index.length + + @property + def npoints(self) -> int: + """ + The number of non- ``fill_value`` points. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.npoints + 3 + """ + return self.sp_index.npoints + + def isna(self): + # If null fill value, we want SparseDtype[bool, true] + # to preserve the same memory usage. + dtype = SparseDtype(bool, self._null_fill_value) + if self._null_fill_value: + return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype) + mask = np.full(len(self), False, dtype=np.bool_) + mask[self.sp_index.indices] = isna(self.sp_values) + return type(self)(mask, fill_value=False, dtype=dtype) + + def fillna( + self: SparseArrayT, + value=None, + method: FillnaOptions | None = None, + limit: int | None = None, + ) -> SparseArrayT: + """ + Fill missing values with `value`. + + Parameters + ---------- + value : scalar, optional + method : str, optional + + .. warning:: + + Using 'method' will result in high memory use, + as all `fill_value` methods will be converted to + an in-memory ndarray + + limit : int, optional + + Returns + ------- + SparseArray + + Notes + ----- + When `value` is specified, the result's ``fill_value`` depends on + ``self.fill_value``. The goal is to maintain low-memory use. + + If ``self.fill_value`` is NA, the result dtype will be + ``SparseDtype(self.dtype, fill_value=value)``. This will preserve + amount of memory used before and after filling. + + When ``self.fill_value`` is not NA, the result dtype will be + ``self.dtype``. Again, this preserves the amount of memory used. + """ + if (method is None and value is None) or ( + method is not None and value is not None + ): + raise ValueError("Must specify one of 'method' or 'value'.") + + elif method is not None: + msg = "fillna with 'method' requires high memory usage." + warnings.warn( + msg, + PerformanceWarning, + stacklevel=find_stack_level(), + ) + new_values = np.asarray(self) + # interpolate_2d modifies new_values inplace + interpolate_2d(new_values, method=method, limit=limit) + return type(self)(new_values, fill_value=self.fill_value) + + else: + new_values = np.where(isna(self.sp_values), value, self.sp_values) + + if self._null_fill_value: + # This is essentially just updating the dtype. + new_dtype = SparseDtype(self.dtype.subtype, fill_value=value) + else: + new_dtype = self.dtype + + return self._simple_new(new_values, self._sparse_index, new_dtype) + + def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT: + + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + subtype = np.result_type(fill_value, self.dtype.subtype) + + if subtype != self.dtype.subtype: + # just coerce up front + arr = self.astype(SparseDtype(subtype, self.fill_value)) + else: + arr = self + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=arr.dtype + ) + + if periods > 0: + a = empty + b = arr[:-periods] + else: + a = arr[abs(periods) :] + b = empty + return arr._concat_same_type([a, b]) + + def _first_fill_value_loc(self): + """ + Get the location of the first fill value. + + Returns + ------- + int + """ + if len(self) == 0 or self.sp_index.npoints == len(self): + return -1 + + indices = self.sp_index.indices + if not len(indices) or indices[0] > 0: + return 0 + + # a number larger than 1 should be appended to + # the last in case of fill value only appears + # in the tail of array + diff = np.r_[np.diff(indices), 2] + return indices[(diff > 1).argmax()] + 1 + + def unique(self: SparseArrayT) -> SparseArrayT: + uniques = algos.unique(self.sp_values) + if len(self.sp_values) != len(self): + fill_loc = self._first_fill_value_loc() + # Inorder to align the behavior of pd.unique or + # pd.Series.unique, we should keep the original + # order, here we use unique again to find the + # insertion place. Since the length of sp_values + # is not large, maybe minor performance hurt + # is worthwhile to the correctness. + insert_loc = len(algos.unique(self.sp_values[:fill_loc])) + uniques = np.insert(uniques, insert_loc, self.fill_value) + return type(self)._from_sequence(uniques, dtype=self.dtype) + + def _values_for_factorize(self): + # Still override this for hash_pandas_object + return np.asarray(self), self.fill_value + + def factorize( + self, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ) -> tuple[np.ndarray, SparseArray]: + # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA] + # The sparsity on this is backwards from what Sparse would want. Want + # ExtensionArray.factorize -> Tuple[EA, EA] + # Given that we have to return a dense array of codes, why bother + # implementing an efficient factorize? + codes, uniques = algos.factorize( + np.asarray(self), na_sentinel=na_sentinel, use_na_sentinel=use_na_sentinel + ) + if na_sentinel is lib.no_default: + na_sentinel = -1 + if use_na_sentinel is lib.no_default or use_na_sentinel: + codes[codes == -1] = na_sentinel + uniques_sp = SparseArray(uniques, dtype=self.dtype) + return codes, uniques_sp + + def value_counts(self, dropna: bool = True) -> Series: + """ + Returns a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN, even if NaN is in sp_values. + + Returns + ------- + counts : Series + """ + from pandas import ( + Index, + Series, + ) + + keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna) + fcounts = self.sp_index.ngaps + if fcounts > 0 and (not self._null_fill_value or not dropna): + mask = isna(keys) if self._null_fill_value else keys == self.fill_value + if mask.any(): + counts[mask] += fcounts + else: + # error: Argument 1 to "insert" has incompatible type "Union[ + # ExtensionArray,ndarray[Any, Any]]"; expected "Union[ + # _SupportsArray[dtype[Any]], Sequence[_SupportsArray[dtype + # [Any]]], Sequence[Sequence[_SupportsArray[dtype[Any]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]], Sequence + # [Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]]]" + keys = np.insert(keys, 0, self.fill_value) # type: ignore[arg-type] + counts = np.insert(counts, 0, fcounts) + + if not isinstance(keys, ABCIndex): + index = Index(keys) + else: + index = keys + return Series(counts, index=index) + + def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str): + + if self._null_fill_value or self.sp_index.ngaps == 0: + # We can avoid densifying + npvalues = self.sp_values + mask = np.zeros(npvalues.shape, dtype=bool) + else: + npvalues = self.to_numpy() + mask = self.isna() + + fill_value = na_value_for_dtype(npvalues.dtype, compat=False) + res_values = quantile_with_mask( + npvalues, + mask, + fill_value, + qs, + interpolation, + ) + + # Special case: the returned array isn't _really_ sparse, so we don't + # wrap it in a SparseArray + return res_values + + # -------- + # Indexing + # -------- + @overload + def __getitem__(self, key: ScalarIndexer) -> Any: + ... + + @overload + def __getitem__( + self: SparseArrayT, + key: SequenceIndexer | tuple[int | ellipsis, ...], + ) -> SparseArrayT: + ... + + def __getitem__( + self: SparseArrayT, + key: PositionalIndexer | tuple[int | ellipsis, ...], + ) -> SparseArrayT | Any: + + if isinstance(key, tuple): + key = unpack_tuple_and_ellipses(key) + # Non-overlapping identity check (left operand type: + # "Union[Union[Union[int, integer[Any]], Union[slice, List[int], + # ndarray[Any, Any]]], Tuple[Union[int, ellipsis], ...]]", + # right operand type: "ellipsis") + if key is Ellipsis: # type: ignore[comparison-overlap] + raise ValueError("Cannot slice with Ellipsis") + + if is_integer(key): + return self._get_val_at(key) + elif isinstance(key, tuple): + # error: Invalid index type "Tuple[Union[int, ellipsis], ...]" + # for "ndarray[Any, Any]"; expected type + # "Union[SupportsIndex, _SupportsArray[dtype[Union[bool_, + # integer[Any]]]], _NestedSequence[_SupportsArray[dtype[ + # Union[bool_, integer[Any]]]]], _NestedSequence[Union[ + # bool, int]], Tuple[Union[SupportsIndex, _SupportsArray[ + # dtype[Union[bool_, integer[Any]]]], _NestedSequence[ + # _SupportsArray[dtype[Union[bool_, integer[Any]]]]], + # _NestedSequence[Union[bool, int]]], ...]]" + data_slice = self.to_dense()[key] # type: ignore[index] + elif isinstance(key, slice): + + # Avoid densifying when handling contiguous slices + if key.step is None or key.step == 1: + start = 0 if key.start is None else key.start + if start < 0: + start += len(self) + + end = len(self) if key.stop is None else key.stop + if end < 0: + end += len(self) + + indices = self.sp_index.indices + keep_inds = np.flatnonzero((indices >= start) & (indices < end)) + sp_vals = self.sp_values[keep_inds] + + sp_index = indices[keep_inds].copy() + + # If we've sliced to not include the start of the array, all our indices + # should be shifted. NB: here we are careful to also not shift by a + # negative value for a case like [0, 1][-100:] where the start index + # should be treated like 0 + if start > 0: + sp_index -= start + + # Length of our result should match applying this slice to a range + # of the length of our original array + new_len = len(range(len(self))[key]) + new_sp_index = make_sparse_index(new_len, sp_index, self.kind) + return type(self)._simple_new(sp_vals, new_sp_index, self.dtype) + else: + indices = np.arange(len(self), dtype=np.int32)[key] + return self.take(indices) + + elif not is_list_like(key): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + + else: + if isinstance(key, SparseArray): + # NOTE: If we guarantee that SparseDType(bool) + # has only fill_value - true, false or nan + # (see GH PR 44955) + # we can apply mask very fast: + if is_bool_dtype(key): + if isna(key.fill_value): + return self.take(key.sp_index.indices[key.sp_values]) + if not key.fill_value: + return self.take(key.sp_index.indices) + n = len(self) + mask = np.full(n, True, dtype=np.bool_) + mask[key.sp_index.indices] = False + return self.take(np.arange(n)[mask]) + else: + key = np.asarray(key) + + key = check_array_indexer(self, key) + + if com.is_bool_indexer(key): + # mypy doesn't know we have an array here + key = cast(np.ndarray, key) + return self.take(np.arange(len(key), dtype=np.int32)[key]) + elif hasattr(key, "__len__"): + return self.take(key) + else: + raise ValueError(f"Cannot slice with '{key}'") + + return type(self)(data_slice, kind=self.kind) + + def _get_val_at(self, loc): + loc = validate_insert_loc(loc, len(self)) + + sp_loc = self.sp_index.lookup(loc) + if sp_loc == -1: + return self.fill_value + else: + val = self.sp_values[sp_loc] + val = maybe_box_datetimelike(val, self.sp_values.dtype) + return val + + def take( + self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None + ) -> SparseArrayT: + if is_scalar(indices): + raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") + indices = np.asarray(indices, dtype=np.int32) + + dtype = None + if indices.size == 0: + result = np.array([], dtype="object") + dtype = self.dtype + elif allow_fill: + result = self._take_with_fill(indices, fill_value=fill_value) + else: + return self._take_without_fill(indices) + + return type(self)( + result, fill_value=self.fill_value, kind=self.kind, dtype=dtype + ) + + def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: + if fill_value is None: + fill_value = self.dtype.na_value + + if indices.min() < -1: + raise ValueError( + "Invalid value in 'indices'. Must be between -1 " + "and the length of the array." + ) + + if indices.max() >= len(self): + raise IndexError("out of bounds value in 'indices'.") + + if len(self) == 0: + # Empty... Allow taking only if all empty + if (indices == -1).all(): + dtype = np.result_type(self.sp_values, type(fill_value)) + taken = np.empty_like(indices, dtype=dtype) + taken.fill(fill_value) + return taken + else: + raise IndexError("cannot do a non-empty take from an empty axes.") + + # sp_indexer may be -1 for two reasons + # 1.) we took for an index of -1 (new) + # 2.) we took a value that was self.fill_value (old) + sp_indexer = self.sp_index.lookup_array(indices) + new_fill_indices = indices == -1 + old_fill_indices = (sp_indexer == -1) & ~new_fill_indices + + if self.sp_index.npoints == 0 and old_fill_indices.all(): + # We've looked up all valid points on an all-sparse array. + taken = np.full( + sp_indexer.shape, fill_value=self.fill_value, dtype=self.dtype.subtype + ) + + elif self.sp_index.npoints == 0: + # Avoid taking from the empty self.sp_values + _dtype = np.result_type(self.dtype.subtype, type(fill_value)) + taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype) + else: + taken = self.sp_values.take(sp_indexer) + + # Fill in two steps. + # Old fill values + # New fill values + # potentially coercing to a new dtype at each stage. + + m0 = sp_indexer[old_fill_indices] < 0 + m1 = sp_indexer[new_fill_indices] < 0 + + result_type = taken.dtype + + if m0.any(): + result_type = np.result_type(result_type, type(self.fill_value)) + taken = taken.astype(result_type) + taken[old_fill_indices] = self.fill_value + + if m1.any(): + result_type = np.result_type(result_type, type(fill_value)) + taken = taken.astype(result_type) + taken[new_fill_indices] = fill_value + + return taken + + def _take_without_fill(self: SparseArrayT, indices) -> SparseArrayT: + to_shift = indices < 0 + + n = len(self) + + if (indices.max() >= n) or (indices.min() < -n): + if n == 0: + raise IndexError("cannot do a non-empty take from an empty axes.") + else: + raise IndexError("out of bounds value in 'indices'.") + + if to_shift.any(): + indices = indices.copy() + indices[to_shift] += n + + sp_indexer = self.sp_index.lookup_array(indices) + value_mask = sp_indexer != -1 + new_sp_values = self.sp_values[sp_indexer[value_mask]] + + value_indices = np.flatnonzero(value_mask).astype(np.int32, copy=False) + + new_sp_index = make_sparse_index(len(indices), value_indices, kind=self.kind) + return type(self)._simple_new(new_sp_values, new_sp_index, dtype=self.dtype) + + def searchsorted( + self, + v: ArrayLike | object, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + + msg = "searchsorted requires high memory usage." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + if not is_scalar(v): + v = np.asarray(v) + v = np.asarray(v) + return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) + + def copy(self: SparseArrayT) -> SparseArrayT: + values = self.sp_values.copy() + return self._simple_new(values, self.sp_index, self.dtype) + + @classmethod + def _concat_same_type( + cls: type[SparseArrayT], to_concat: Sequence[SparseArrayT] + ) -> SparseArrayT: + fill_value = to_concat[0].fill_value + + values = [] + length = 0 + + if to_concat: + sp_kind = to_concat[0].kind + else: + sp_kind = "integer" + + sp_index: SparseIndex + if sp_kind == "integer": + indices = [] + + for arr in to_concat: + int_idx = arr.sp_index.indices.copy() + int_idx += length # TODO: wraparound + length += arr.sp_index.length + + values.append(arr.sp_values) + indices.append(int_idx) + + data = np.concatenate(values) + indices_arr = np.concatenate(indices) + # error: Argument 2 to "IntIndex" has incompatible type + # "ndarray[Any, dtype[signedinteger[_32Bit]]]"; + # expected "Sequence[int]" + sp_index = IntIndex(length, indices_arr) # type: ignore[arg-type] + + else: + # when concatenating block indices, we don't claim that you'll + # get an identical index as concatenating the values and then + # creating a new index. We don't want to spend the time trying + # to merge blocks across arrays in `to_concat`, so the resulting + # BlockIndex may have more blocks. + blengths = [] + blocs = [] + + for arr in to_concat: + block_idx = arr.sp_index.to_block_index() + + values.append(arr.sp_values) + blocs.append(block_idx.blocs.copy() + length) + blengths.append(block_idx.blengths) + length += arr.sp_index.length + + data = np.concatenate(values) + blocs_arr = np.concatenate(blocs) + blengths_arr = np.concatenate(blengths) + + sp_index = BlockIndex(length, blocs_arr, blengths_arr) + + return cls(data, sparse_index=sp_index, fill_value=fill_value) + + def astype(self, dtype: AstypeArg | None = None, copy: bool = True): + """ + Change the dtype of a SparseArray. + + The output will always be a SparseArray. To convert to a dense + ndarray with a certain dtype, use :meth:`numpy.asarray`. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + For SparseDtype, this changes the dtype of + ``self.sp_values`` and the ``self.fill_value``. + + For other dtypes, this only changes the dtype of + ``self.sp_values``. + + copy : bool, default True + Whether to ensure a copy is made, even if not necessary. + + Returns + ------- + SparseArray + + Examples + -------- + >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + >>> arr.astype(SparseDtype(np.dtype('int32'))) + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + Using a NumPy dtype with a different kind (e.g. float) will coerce + just ``self.sp_values``. + + >>> arr.astype(SparseDtype(np.dtype('float64'))) + ... # doctest: +NORMALIZE_WHITESPACE + [nan, nan, 1.0, 2.0] + Fill: nan + IntIndex + Indices: array([2, 3], dtype=int32) + + Using a SparseDtype, you can also change the fill value as well. + + >>> arr.astype(SparseDtype("float64", fill_value=0.0)) + ... # doctest: +NORMALIZE_WHITESPACE + [0.0, 0.0, 1.0, 2.0] + Fill: 0.0 + IntIndex + Indices: array([2, 3], dtype=int32) + """ + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + else: + return self.copy() + + future_dtype = pandas_dtype(dtype) + if not isinstance(future_dtype, SparseDtype): + # GH#34457 + warnings.warn( + "The behavior of .astype from SparseDtype to a non-sparse dtype " + "is deprecated. In a future version, this will return a non-sparse " + "array with the requested dtype. To retain the old behavior, use " + "`obj.astype(SparseDtype(dtype))`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + dtype = self.dtype.update_dtype(dtype) + subtype = pandas_dtype(dtype._subtype_with_str) + sp_values = astype_nansafe(self.sp_values, subtype, copy=copy) + + # error: Argument 1 to "_simple_new" of "SparseArray" has incompatible type + # "ExtensionArray"; expected "ndarray" + return self._simple_new( + sp_values, self.sp_index, dtype # type: ignore[arg-type] + ) + + def map(self: SparseArrayT, mapper) -> SparseArrayT: + """ + Map categories using an input mapping or function. + + Parameters + ---------- + mapper : dict, Series, callable + The correspondence from old values to new. + + Returns + ------- + SparseArray + The output array will have the same density as the input. + The output fill value will be the result of applying the + mapping to ``self.fill_value`` + + Examples + -------- + >>> arr = pd.arrays.SparseArray([0, 1, 2]) + >>> arr.map(lambda x: x + 10) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.map({0: 10, 1: 11, 2: 12}) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.map(pd.Series([10, 11, 12], index=[0, 1, 2])) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + """ + # this is used in apply. + # We get hit since we're an "is_extension_type" but regular extension + # types are not hit. This may be worth adding to the interface. + if isinstance(mapper, ABCSeries): + mapper = mapper.to_dict() + + if isinstance(mapper, abc.Mapping): + fill_value = mapper.get(self.fill_value, self.fill_value) + sp_values = [mapper.get(x, None) for x in self.sp_values] + else: + fill_value = mapper(self.fill_value) + sp_values = [mapper(x) for x in self.sp_values] + + return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value) + + def to_dense(self) -> np.ndarray: + """ + Convert SparseArray to a NumPy array. + + Returns + ------- + arr : NumPy array + """ + return np.asarray(self, dtype=self.sp_values.dtype) + + def _where(self, mask, value): + # NB: may not preserve dtype, e.g. result may be Sparse[float64] + # while self is Sparse[int64] + naive_implementation = np.where(mask, self, value) + dtype = SparseDtype(naive_implementation.dtype, fill_value=self.fill_value) + result = type(self)._from_sequence(naive_implementation, dtype=dtype) + return result + + # ------------------------------------------------------------------------ + # IO + # ------------------------------------------------------------------------ + def __setstate__(self, state) -> None: + """Necessary for making this object picklable""" + if isinstance(state, tuple): + # Compat for pandas < 0.24.0 + nd_state, (fill_value, sp_index) = state + sparse_values = np.array([]) + sparse_values.__setstate__(nd_state) + + self._sparse_values = sparse_values + self._sparse_index = sp_index + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + else: + self.__dict__.update(state) + + def nonzero(self) -> tuple[npt.NDArray[np.int32]]: + if self.fill_value == 0: + return (self.sp_index.indices,) + else: + return (self.sp_index.indices[self.sp_values != 0],) + + # ------------------------------------------------------------------------ + # Reductions + # ------------------------------------------------------------------------ + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + method = getattr(self, name, None) + + if method is None: + raise TypeError(f"cannot perform {name} with type {self.dtype}") + + if skipna: + arr = self + else: + arr = self.dropna() + + return getattr(arr, name)(**kwargs) + + def all(self, axis=None, *args, **kwargs): + """ + Tests whether all elements evaluate True + + Returns + ------- + all : bool + + See Also + -------- + numpy.all + """ + nv.validate_all(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and not np.all(self.fill_value): + return False + + return values.all() + + def any(self, axis=0, *args, **kwargs): + """ + Tests whether at least one of elements evaluate True + + Returns + ------- + any : bool + + See Also + -------- + numpy.any + """ + nv.validate_any(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and np.any(self.fill_value): + return True + + return values.any().item() + + def sum( + self, axis: int = 0, min_count: int = 0, skipna: bool = True, *args, **kwargs + ) -> Scalar: + """ + Sum of non-NA/null values + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + min_count : int, default 0 + The required number of valid values to perform the summation. If fewer + than ``min_count`` valid values are present, the result will be the missing + value indicator for subarray type. + *args, **kwargs + Not Used. NumPy compatibility. + + Returns + ------- + scalar + """ + nv.validate_sum(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + has_na = self.sp_index.ngaps > 0 and not self._null_fill_value + + if has_na and not skipna: + return na_value_for_dtype(self.dtype.subtype, compat=False) + + if self._null_fill_value: + if check_below_min_count(valid_vals.shape, None, min_count): + return na_value_for_dtype(self.dtype.subtype, compat=False) + return sp_sum + else: + nsparse = self.sp_index.ngaps + if check_below_min_count(valid_vals.shape, None, min_count - nsparse): + return na_value_for_dtype(self.dtype.subtype, compat=False) + return sp_sum + self.fill_value * nsparse + + def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray: + """ + Cumulative sum of non-NA/null values. + + When performing the cumulative summation, any non-NA/null values will + be skipped. The resulting SparseArray will preserve the locations of + NaN values, but the fill value will be `np.nan` regardless. + + Parameters + ---------- + axis : int or None + Axis over which to perform the cumulative summation. If None, + perform cumulative summation over flattened array. + + Returns + ------- + cumsum : SparseArray + """ + nv.validate_cumsum(args, kwargs) + + if axis is not None and axis >= self.ndim: # Mimic ndarray behaviour. + raise ValueError(f"axis(={axis}) out of bounds") + + if not self._null_fill_value: + return SparseArray(self.to_dense()).cumsum() + + return SparseArray( + self.sp_values.cumsum(), + sparse_index=self.sp_index, + fill_value=self.fill_value, + ) + + def mean(self, axis=0, *args, **kwargs): + """ + Mean of non-NA/null values + + Returns + ------- + mean : float + """ + nv.validate_mean(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + ct = len(valid_vals) + + if self._null_fill_value: + return sp_sum / ct + else: + nsparse = self.sp_index.ngaps + return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) + + def max(self, *, axis: int | None = None, skipna: bool = True): + """ + Max of array values, ignoring NA values if specified. + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. + + Returns + ------- + scalar + """ + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("max", skipna=skipna) + + def min(self, *, axis: int | None = None, skipna: bool = True): + """ + Min of array values, ignoring NA values if specified. + + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. + + Returns + ------- + scalar + """ + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("min", skipna=skipna) + + def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: + """ + Min/max of non-NA/null values + + Parameters + ---------- + kind : {"min", "max"} + skipna : bool + + Returns + ------- + scalar + """ + valid_vals = self._valid_sp_values + has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0 + + if len(valid_vals) > 0: + sp_min_max = getattr(valid_vals, kind)() + + # If a non-null fill value is currently present, it might be the min/max + if has_nonnull_fill_vals: + func = max if kind == "max" else min + return func(sp_min_max, self.fill_value) + elif skipna: + return sp_min_max + elif self.sp_index.ngaps == 0: + # No NAs present + return sp_min_max + else: + return na_value_for_dtype(self.dtype.subtype, compat=False) + elif has_nonnull_fill_vals: + return self.fill_value + else: + return na_value_for_dtype(self.dtype.subtype, compat=False) + + def _argmin_argmax(self, kind: Literal["argmin", "argmax"]) -> int: + + values = self._sparse_values + index = self._sparse_index.indices + mask = np.asarray(isna(values)) + func = np.argmax if kind == "argmax" else np.argmin + + idx = np.arange(values.shape[0]) + non_nans = values[~mask] + non_nan_idx = idx[~mask] + + _candidate = non_nan_idx[func(non_nans)] + candidate = index[_candidate] + + if isna(self.fill_value): + return candidate + if kind == "argmin" and self[candidate] < self.fill_value: + return candidate + if kind == "argmax" and self[candidate] > self.fill_value: + return candidate + _loc = self._first_fill_value_loc() + if _loc == -1: + # fill_value doesn't exist + return candidate + else: + return _loc + + def argmax(self, skipna: bool = True) -> int: + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return self._argmin_argmax("argmax") + + def argmin(self, skipna: bool = True) -> int: + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return self._argmin_argmax("argmin") + + # ------------------------------------------------------------------------ + # Ufuncs + # ------------------------------------------------------------------------ + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (SparseArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. tests.arrays.sparse.test_arithmetics.test_ndarray_inplace + res = arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + return res + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. tests.series.test_ufunc.TestNumpyReductions + return result + + if len(inputs) == 1: + # No alignment necessary. + sp_values = getattr(ufunc, method)(self.sp_values, **kwargs) + fill_value = getattr(ufunc, method)(self.fill_value, **kwargs) + + if ufunc.nout > 1: + # multiple outputs. e.g. modf + arrays = tuple( + self._simple_new( + sp_value, self.sp_index, SparseDtype(sp_value.dtype, fv) + ) + for sp_value, fv in zip(sp_values, fill_value) + ) + return arrays + elif method == "reduce": + # e.g. reductions + return sp_values + + return self._simple_new( + sp_values, self.sp_index, SparseDtype(sp_values.dtype, fill_value) + ) + + new_inputs = tuple(np.asarray(x) for x in inputs) + result = getattr(ufunc, method)(*new_inputs, **kwargs) + if out: + if len(out) == 1: + out = out[0] + return out + + if ufunc.nout > 1: + return tuple(type(self)(x) for x in result) + elif method == "at": + # no return value + return None + else: + return type(self)(result) + + # ------------------------------------------------------------------------ + # Ops + # ------------------------------------------------------------------------ + + def _arith_method(self, other, op): + op_name = op.__name__ + + if isinstance(other, SparseArray): + return _sparse_array_op(self, other, op, op_name) + + elif is_scalar(other): + with np.errstate(all="ignore"): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + if op_name == "divmod": + left, right = result + lfill, rfill = fill + return ( + _wrap_result(op_name, left, self.sp_index, lfill), + _wrap_result(op_name, right, self.sp_index, rfill), + ) + + return _wrap_result(op_name, result, self.sp_index, fill) + + else: + other = np.asarray(other) + with np.errstate(all="ignore"): + if len(self) != len(other): + raise AssertionError( + f"length mismatch: {len(self)} vs. {len(other)}" + ) + if not isinstance(other, SparseArray): + dtype = getattr(other, "dtype", None) + other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) + return _sparse_array_op(self, other, op, op_name) + + def _cmp_method(self, other, op) -> SparseArray: + if not is_scalar(other) and not isinstance(other, type(self)): + # convert list-like to ndarray + other = np.asarray(other) + + if isinstance(other, np.ndarray): + # TODO: make this more flexible than just ndarray... + other = SparseArray(other, fill_value=self.fill_value) + + if isinstance(other, SparseArray): + if len(self) != len(other): + raise ValueError( + f"operands have mismatched length {len(self)} and {len(other)}" + ) + + op_name = op.__name__.strip("_") + return _sparse_array_op(self, other, op, op_name) + else: + # scalar + with np.errstate(all="ignore"): + fill_value = op(self.fill_value, other) + result = np.full(len(self), fill_value, dtype=np.bool_) + result[self.sp_index.indices] = op(self.sp_values, other) + + return type(self)( + result, + fill_value=fill_value, + dtype=np.bool_, + ) + + _logical_method = _cmp_method + + def _unary_method(self, op) -> SparseArray: + fill_value = op(np.array(self.fill_value)).item() + dtype = SparseDtype(self.dtype.subtype, fill_value) + # NOTE: if fill_value doesn't change + # we just have to apply op to sp_values + if isna(self.fill_value) or fill_value == self.fill_value: + values = op(self.sp_values) + return type(self)._simple_new(values, self.sp_index, self.dtype) + # In the other case we have to recalc indexes + return type(self)(op(self.to_dense()), dtype=dtype) + + def __pos__(self) -> SparseArray: + return self._unary_method(operator.pos) + + def __neg__(self) -> SparseArray: + return self._unary_method(operator.neg) + + def __invert__(self) -> SparseArray: + return self._unary_method(operator.invert) + + def __abs__(self) -> SparseArray: + return self._unary_method(operator.abs) + + # ---------- + # Formatting + # ----------- + def __repr__(self) -> str: + pp_str = printing.pprint_thing(self) + pp_fill = printing.pprint_thing(self.fill_value) + pp_index = printing.pprint_thing(self.sp_index) + return f"{pp_str}\nFill: {pp_fill}\n{pp_index}" + + def _formatter(self, boxed=False): + # Defer to the formatter from the GenericArrayFormatter calling us. + # This will infer the correct formatter from the dtype of the values. + return None + + +def make_sparse( + arr: np.ndarray, + kind: SparseIndexKind = "block", + fill_value=None, + dtype: NpDtype | None = None, +): + """ + Convert ndarray to sparse format + + Parameters + ---------- + arr : ndarray + kind : {'block', 'integer'} + fill_value : NaN or another value + dtype : np.dtype, optional + copy : bool, default False + + Returns + ------- + (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) + """ + assert isinstance(arr, np.ndarray) + + if arr.ndim > 1: + raise TypeError("expected dimension <= 1 data") + + if fill_value is None: + fill_value = na_value_for_dtype(arr.dtype) + + if isna(fill_value): + mask = notna(arr) + else: + # cast to object comparison to be safe + if is_string_dtype(arr.dtype): + arr = arr.astype(object) + + if is_object_dtype(arr.dtype): + # element-wise equality check method in numpy doesn't treat + # each element type, eg. 0, 0.0, and False are treated as + # same. So we have to check the both of its type and value. + mask = splib.make_mask_object_ndarray(arr, fill_value) + else: + mask = arr != fill_value + + length = len(arr) + if length != len(mask): + # the arr is a SparseArray + indices = mask.sp_index.indices + else: + indices = mask.nonzero()[0].astype(np.int32) + + index = make_sparse_index(length, indices, kind) + sparsified_values = arr[mask] + if dtype is not None: + # error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str, + # dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]" + sparsified_values = astype_nansafe( + sparsified_values, dtype=dtype # type: ignore[arg-type] + ) + # TODO: copy + return sparsified_values, index, fill_value + + +@overload +def make_sparse_index(length: int, indices, kind: Literal["block"]) -> BlockIndex: + ... + + +@overload +def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntIndex: + ... + + +def make_sparse_index(length: int, indices, kind: SparseIndexKind) -> SparseIndex: + index: SparseIndex + if kind == "block": + locs, lens = splib.get_blocks(indices) + index = BlockIndex(length, locs, lens) + elif kind == "integer": + index = IntIndex(length, indices) + else: # pragma: no cover + raise ValueError("must be block or integer type") + return index diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py new file mode 100644 index 00000000..eaed6257 --- /dev/null +++ b/pandas/core/arrays/sparse/dtype.py @@ -0,0 +1,416 @@ +"""Sparse Dtype""" +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, +) +import warnings + +import numpy as np + +from pandas._typing import ( + Dtype, + DtypeObj, + type_t, +) +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +if TYPE_CHECKING: + from pandas.core.arrays.sparse.array import SparseArray + + +@register_extension_dtype +class SparseDtype(ExtensionDtype): + """ + Dtype for data stored in :class:`SparseArray`. + + This dtype implements the pandas ExtensionDtype interface. + + Parameters + ---------- + dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 + The dtype of the underlying array storing the non-fill value values. + fill_value : scalar, optional + The scalar value not stored in the SparseArray. By default, this + depends on `dtype`. + + =========== ========== + dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The default value may be overridden by specifying a `fill_value`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # We include `_is_na_fill_value` in the metadata to avoid hash collisions + # between SparseDtype(float, 0.0) and SparseDtype(float, nan). + # Without is_na_fill_value in the comparison, those would be equal since + # hash(nan) is (sometimes?) 0. + _metadata = ("_dtype", "_fill_value", "_is_na_fill_value") + + def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: + + if isinstance(dtype, type(self)): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + dtype = pandas_dtype(dtype) + if is_string_dtype(dtype): + dtype = np.dtype("object") + + if fill_value is None: + fill_value = na_value_for_dtype(dtype) + + self._dtype = dtype + self._fill_value = fill_value + self._check_fill_value() + + def __hash__(self) -> int: + # Python3 doesn't inherit __hash__ when a base class overrides + # __eq__, so we explicitly do it here. + return super().__hash__() + + def __eq__(self, other: Any) -> bool: + # We have to override __eq__ to handle NA values in _metadata. + # The base class does simple == checks, which fail for NA. + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + + if isinstance(other, type(self)): + subtype = self.subtype == other.subtype + if self._is_na_fill_value: + # this case is complicated by two things: + # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) + # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) + # i.e. we want to treat any floating-point NaN as equal, but + # not a floating-point NaN and a datetime NaT. + fill_value = ( + other._is_na_fill_value + and isinstance(self.fill_value, type(other.fill_value)) + or isinstance(other.fill_value, type(self.fill_value)) + ) + else: + fill_value = self.fill_value == other.fill_value + + return subtype and fill_value + return False + + @property + def fill_value(self): + """ + The fill value of the array. + + Converting the SparseArray to a dense ndarray will fill the + array with this value. + + .. warning:: + + It's possible to end up with a SparseArray that has ``fill_value`` + values in ``sp_values``. This can occur, for example, when setting + ``SparseArray.fill_value`` directly. + """ + return self._fill_value + + def _check_fill_value(self): + if not is_scalar(self._fill_value): + raise ValueError( + f"fill_value must be a scalar. Got {self._fill_value} instead" + ) + # TODO: Right now we can use Sparse boolean array + # with any fill_value. Here was an attempt + # to allow only 3 value: True, False or nan + # but plenty test has failed. + # see pull 44955 + # if self._is_boolean and not ( + # is_bool(self._fill_value) or isna(self._fill_value) + # ): + # raise ValueError( + # "fill_value must be True, False or nan " + # f"for boolean type. Got {self._fill_value} instead" + # ) + + @property + def _is_na_fill_value(self) -> bool: + return isna(self.fill_value) + + @property + def _is_numeric(self) -> bool: + return not is_object_dtype(self.subtype) + + @property + def _is_boolean(self) -> bool: + return is_bool_dtype(self.subtype) + + @property + def kind(self) -> str: + """ + The sparse kind. Either 'integer', or 'block'. + """ + return self.subtype.kind + + @property + def type(self): + return self.subtype.type + + @property + def subtype(self): + return self._dtype + + @property + def name(self) -> str: + return f"Sparse[{self.subtype.name}, {repr(self.fill_value)}]" + + def __repr__(self) -> str: + return self.name + + @classmethod + def construct_array_type(cls) -> type_t[SparseArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.sparse.array import SparseArray + + return SparseArray + + @classmethod + def construct_from_string(cls, string: str) -> SparseDtype: + """ + Construct a SparseDtype from a string form. + + Parameters + ---------- + string : str + Can take the following forms. + + string dtype + ================ ============================ + 'int' SparseDtype[np.int64, 0] + 'Sparse' SparseDtype[np.float64, nan] + 'Sparse[int]' SparseDtype[np.int64, 0] + 'Sparse[int, 0]' SparseDtype[np.int64, 0] + ================ ============================ + + It is not possible to specify non-default fill values + with a string. An argument like ``'Sparse[int, 1]'`` + will raise a ``TypeError`` because the default fill value + for integers is 0. + + Returns + ------- + SparseDtype + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + msg = f"Cannot construct a 'SparseDtype' from '{string}'" + if string.startswith("Sparse"): + try: + sub_type, has_fill_value = cls._parse_subtype(string) + except ValueError as err: + raise TypeError(msg) from err + else: + result = SparseDtype(sub_type) + msg = ( + f"Cannot construct a 'SparseDtype' from '{string}'.\n\nIt " + "looks like the fill_value in the string is not " + "the default for the dtype. Non-default fill_values " + "are not supported. Use the 'SparseDtype()' " + "constructor instead." + ) + if has_fill_value and str(result) != string: + raise TypeError(msg) + return result + else: + raise TypeError(msg) + + @staticmethod + def _parse_subtype(dtype: str) -> tuple[str, bool]: + """ + Parse a string to get the subtype + + Parameters + ---------- + dtype : str + A string like + + * Sparse[subtype] + * Sparse[subtype, fill_value] + + Returns + ------- + subtype : str + + Raises + ------ + ValueError + When the subtype cannot be extracted. + """ + xpr = re.compile(r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$") + m = xpr.match(dtype) + has_fill_value = False + if m: + subtype = m.groupdict()["subtype"] + has_fill_value = bool(m.groupdict()["fill_value"]) + elif dtype == "Sparse": + subtype = "float64" + else: + raise ValueError(f"Cannot parse {dtype}") + return subtype, has_fill_value + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + dtype = getattr(dtype, "dtype", dtype) + if isinstance(dtype, str) and dtype.startswith("Sparse"): + sub_type, _ = cls._parse_subtype(dtype) + dtype = np.dtype(sub_type) + elif isinstance(dtype, cls): + return True + return isinstance(dtype, np.dtype) or dtype == "Sparse" + + def update_dtype(self, dtype) -> SparseDtype: + """ + Convert the SparseDtype to a new dtype. + + This takes care of converting the ``fill_value``. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, SparseDtype] + The new dtype to use. + + * For a SparseDtype, it is simply returned + * For a NumPy dtype (or str), the current fill value + is converted to the new dtype, and a SparseDtype + with `dtype` and the new fill value is returned. + + Returns + ------- + SparseDtype + A new SparseDtype with the correct `dtype` and fill value + for that `dtype`. + + Raises + ------ + ValueError + When the current fill value cannot be converted to the + new `dtype` (e.g. trying to convert ``np.nan`` to an + integer dtype). + + + Examples + -------- + >>> SparseDtype(int, 0).update_dtype(float) + Sparse[float64, 0.0] + + >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) + Sparse[float64, nan] + """ + cls = type(self) + dtype = pandas_dtype(dtype) + + if not isinstance(dtype, cls): + if not isinstance(dtype, np.dtype): + raise TypeError("sparse arrays of extension dtypes not supported") + + fvarr = astype_nansafe(np.array(self.fill_value), dtype) + # NB: not fv_0d.item(), as that casts dt64->int + fill_value = fvarr[0] + dtype = cls(dtype, fill_value=fill_value) + + return dtype + + @property + def _subtype_with_str(self): + """ + Whether the SparseDtype's subtype should be considered ``str``. + + Typically, pandas will store string data in an object-dtype array. + When converting values to a dtype, e.g. in ``.astype``, we need to + be more specific, we need the actual underlying type. + + Returns + ------- + >>> SparseDtype(int, 1)._subtype_with_str + dtype('int64') + + >>> SparseDtype(object, 1)._subtype_with_str + dtype('O') + + >>> dtype = SparseDtype(str, '') + >>> dtype.subtype + dtype('O') + + >>> dtype._subtype_with_str + + """ + if isinstance(self.fill_value, str): + return type(self.fill_value) + return self.subtype + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # TODO for now only handle SparseDtypes and numpy dtypes => extend + # with other compatible extension dtypes + if any( + isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype) + for x in dtypes + ): + return None + + fill_values = [x.fill_value for x in dtypes if isinstance(x, SparseDtype)] + fill_value = fill_values[0] + + # np.nan isn't a singleton, so we may end up with multiple + # NaNs here, so we ignore the all NA case too. + if not (len(set(fill_values)) == 1 or isna(fill_values).all()): + warnings.warn( + "Concatenating sparse arrays with multiple fill " + f"values: '{fill_values}'. Picking the first and " + "converting the rest.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes] + return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value) diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py new file mode 100644 index 00000000..88e1778d --- /dev/null +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -0,0 +1,211 @@ +""" +Interaction with scipy.sparse matrices. + +Currently only includes to_coo helpers. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterable, +) + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + IndexLabel, + npt, +) + +from pandas.core.dtypes.missing import notna + +from pandas.core.algorithms import factorize +from pandas.core.indexes.api import MultiIndex +from pandas.core.series import Series + +if TYPE_CHECKING: + import scipy.sparse + + +def _check_is_partition(parts: Iterable, whole: Iterable): + whole = set(whole) + parts = [set(x) for x in parts] + if set.intersection(*parts) != set(): + raise ValueError("Is not a partition because intersection is not null.") + if set.union(*parts) != whole: + raise ValueError("Is not a partition because union is not the whole.") + + +def _levels_to_axis( + ss, + levels: tuple[int] | list[int], + valid_ilocs: npt.NDArray[np.intp], + sort_labels: bool = False, +) -> tuple[npt.NDArray[np.intp], list[IndexLabel]]: + """ + For a MultiIndexed sparse Series `ss`, return `ax_coords` and `ax_labels`, + where `ax_coords` are the coordinates along one of the two axes of the + destination sparse matrix, and `ax_labels` are the labels from `ss`' Index + which correspond to these coordinates. + + Parameters + ---------- + ss : Series + levels : tuple/list + valid_ilocs : numpy.ndarray + Array of integer positions of valid values for the sparse matrix in ss. + sort_labels : bool, default False + Sort the axis labels before forming the sparse matrix. When `levels` + refers to a single level, set to True for a faster execution. + + Returns + ------- + ax_coords : numpy.ndarray (axis coordinates) + ax_labels : list (axis labels) + """ + # Since the labels are sorted in `Index.levels`, when we wish to sort and + # there is only one level of the MultiIndex for this axis, the desired + # output can be obtained in the following simpler, more efficient way. + if sort_labels and len(levels) == 1: + ax_coords = ss.index.codes[levels[0]][valid_ilocs] + ax_labels = ss.index.levels[levels[0]] + + else: + levels_values = lib.fast_zip( + [ss.index.get_level_values(lvl).values for lvl in levels] + ) + codes, ax_labels = factorize(levels_values, sort=sort_labels) + ax_coords = codes[valid_ilocs] + + ax_labels = ax_labels.tolist() + return ax_coords, ax_labels + + +def _to_ijv( + ss, + row_levels: tuple[int] | list[int] = (0,), + column_levels: tuple[int] | list[int] = (1,), + sort_labels: bool = False, +) -> tuple[ + np.ndarray, + npt.NDArray[np.intp], + npt.NDArray[np.intp], + list[IndexLabel], + list[IndexLabel], +]: + """ + For an arbitrary MultiIndexed sparse Series return (v, i, j, ilabels, + jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo + constructor, and ilabels and jlabels are the row and column labels + respectively. + + Parameters + ---------- + ss : Series + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + When `row_levels` and/or `column_levels` refer to a single level, + set to `True` for a faster execution. + + Returns + ------- + values : numpy.ndarray + Valid values to populate a sparse matrix, extracted from + ss. + i_coords : numpy.ndarray (row coordinates of the values) + j_coords : numpy.ndarray (column coordinates of the values) + i_labels : list (row labels) + j_labels : list (column labels) + """ + # index and column levels must be a partition of the index + _check_is_partition([row_levels, column_levels], range(ss.index.nlevels)) + # From the sparse Series, get the integer indices and data for valid sparse + # entries. + sp_vals = ss.array.sp_values + na_mask = notna(sp_vals) + values = sp_vals[na_mask] + valid_ilocs = ss.array.sp_index.indices[na_mask] + + i_coords, i_labels = _levels_to_axis( + ss, row_levels, valid_ilocs, sort_labels=sort_labels + ) + + j_coords, j_labels = _levels_to_axis( + ss, column_levels, valid_ilocs, sort_labels=sort_labels + ) + + return values, i_coords, j_coords, i_labels, j_labels + + +def sparse_series_to_coo( + ss: Series, + row_levels: Iterable[int] = (0,), + column_levels: Iterable[int] = (1,), + sort_labels: bool = False, +) -> tuple[scipy.sparse.coo_matrix, list[IndexLabel], list[IndexLabel]]: + """ + Convert a sparse Series to a scipy.sparse.coo_matrix using index + levels row_levels, column_levels as the row and column + labels respectively. Returns the sparse_matrix, row and column labels. + """ + import scipy.sparse + + if ss.index.nlevels < 2: + raise ValueError("to_coo requires MultiIndex with nlevels >= 2.") + if not ss.index.is_unique: + raise ValueError( + "Duplicate index entries are not allowed in to_coo transformation." + ) + + # to keep things simple, only rely on integer indexing (not labels) + row_levels = [ss.index._get_level_number(x) for x in row_levels] + column_levels = [ss.index._get_level_number(x) for x in column_levels] + + v, i, j, rows, columns = _to_ijv( + ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels + ) + sparse_matrix = scipy.sparse.coo_matrix( + (v, (i, j)), shape=(len(rows), len(columns)) + ) + return sparse_matrix, rows, columns + + +def coo_to_sparse_series( + A: scipy.sparse.coo_matrix, dense_index: bool = False +) -> Series: + """ + Convert a scipy.sparse.coo_matrix to a SparseSeries. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + + Returns + ------- + Series + + Raises + ------ + TypeError if A is not a coo_matrix + """ + from pandas import SparseDtype + + try: + ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + except AttributeError as err: + raise TypeError( + f"Expected coo_matrix. Got {type(A).__name__} instead." + ) from err + ser = ser.sort_index() + ser = ser.astype(SparseDtype(ser.dtype)) + if dense_index: + # is there a better constructor method to use here? + i = range(A.shape[0]) + j = range(A.shape[1]) + ind = MultiIndex.from_product([i, j]) + ser = ser.reindex(ind) + return ser diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py new file mode 100644 index 00000000..0e2df9f7 --- /dev/null +++ b/pandas/core/arrays/string_.py @@ -0,0 +1,578 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._libs.arrays import NDArrayBacked +from pandas._typing import ( + Dtype, + Scalar, + npt, + type_t, +) +from pandas.compat import pa_version_under1p01 +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.base import ( + ExtensionDtype, + StorageExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_dtype_equal, + is_integer_dtype, + is_object_dtype, + is_string_dtype, + pandas_dtype, +) + +from pandas.core import ops +from pandas.core.array_algos import masked_reductions +from pandas.core.arrays import ( + ExtensionArray, + FloatingArray, + IntegerArray, +) +from pandas.core.arrays.floating import FloatingDtype +from pandas.core.arrays.integer import IntegerDtype +from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.missing import isna + +if TYPE_CHECKING: + import pyarrow + + from pandas import Series + + +@register_extension_dtype +class StringDtype(StorageExtensionDtype): + """ + Extension dtype for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringDtype is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + storage : {"python", "pyarrow"}, optional + If not given, the value of ``pd.options.mode.string_storage``. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.StringDtype() + string[python] + + >>> pd.StringDtype(storage="pyarrow") + string[pyarrow] + """ + + name = "string" + + #: StringDtype().na_value uses pandas.NA + @property + def na_value(self) -> libmissing.NAType: + return libmissing.NA + + _metadata = ("storage",) + + def __init__(self, storage=None) -> None: + if storage is None: + storage = get_option("mode.string_storage") + if storage not in {"python", "pyarrow"}: + raise ValueError( + f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." + ) + if storage == "pyarrow" and pa_version_under1p01: + raise ImportError( + "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + ) + self.storage = storage + + @property + def type(self) -> type[str]: + return str + + @classmethod + def construct_from_string(cls, string): + """ + Construct a StringDtype from a string. + + Parameters + ---------- + string : str + The type of the name. The storage type will be taking from `string`. + Valid options and their storage types are + + ========================== ============================================== + string result storage + ========================== ============================================== + ``'string'`` pd.options.mode.string_storage, default python + ``'string[python]'`` python + ``'string[pyarrow]'`` pyarrow + ========================== ============================================== + + Returns + ------- + StringDtype + + Raise + ----- + TypeError + If the string is not a valid option. + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string == "string": + return cls() + elif string == "string[python]": + return cls(storage="python") + elif string == "string[pyarrow]": + return cls(storage="pyarrow") + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + # https://github.com/pandas-dev/pandas/issues/36126 + # error: Signature of "construct_array_type" incompatible with supertype + # "ExtensionDtype" + def construct_array_type( # type: ignore[override] + self, + ) -> type_t[BaseStringArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.string_arrow import ArrowStringArray + + if self.storage == "python": + return StringArray + else: + return ArrowStringArray + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> BaseStringArray: + """ + Construct StringArray from pyarrow Array/ChunkedArray. + """ + if self.storage == "pyarrow": + from pandas.core.arrays.string_arrow import ArrowStringArray + + return ArrowStringArray(array) + else: + + import pyarrow + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + # using _from_sequence to ensure None is converted to NA + str_arr = StringArray._from_sequence(np.array(arr)) + results.append(str_arr) + + if results: + return StringArray._concat_same_type(results) + else: + return StringArray(np.array([], dtype="object")) + + +class BaseStringArray(ExtensionArray): + """ + Mixin class for StringArray, ArrowStringArray. + """ + + pass + + +class StringArray(BaseStringArray, PandasArray): + """ + Extension array for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : array-like + The array of data. + + .. warning:: + + Currently, this expects an object-dtype ndarray + where the elements are Python strings + or nan-likes (``None``, ``np.nan``, ``NA``). + This may change without warning in the future. Use + :meth:`pandas.array` with ``dtype="string"`` for a stable way of + creating a `StringArray` from any sequence. + + .. versionchanged:: 1.5.0 + + StringArray now accepts array-likes containing + nan-likes(``None``, ``np.nan``) for the ``values`` parameter + in addition to strings and :attr:`pandas.NA` + + copy : bool, default False + Whether to copy the array of data. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + array + The recommended function for creating a StringArray. + Series.str + The string methods are available on Series backed by + a StringArray. + + Notes + ----- + StringArray returns a BooleanArray for comparison methods. + + Examples + -------- + >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string") + + ['This is', 'some text', , 'data.'] + Length: 4, dtype: string + + Unlike arrays instantiated with ``dtype="object"``, ``StringArray`` + will convert the values to strings. + + >>> pd.array(['1', 1], dtype="object") + + ['1', 1] + Length: 2, dtype: object + >>> pd.array(['1', 1], dtype="string") + + ['1', '1'] + Length: 2, dtype: string + + However, instantiating StringArrays directly with non-strings will raise an error. + + For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`: + + >>> pd.array(["a", None, "c"], dtype="string") == "a" + + [True, , False] + Length: 3, dtype: boolean + """ + + # undo the PandasArray hack + _typ = "extension" + + def __init__(self, values, copy=False) -> None: + values = extract_array(values) + + super().__init__(values, copy=copy) + if not isinstance(values, type(self)): + self._validate() + NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python")) + + def _validate(self): + """Validate that we only store NA or strings.""" + if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True): + raise ValueError("StringArray requires a sequence of strings or pandas.NA") + if self._ndarray.dtype != "object": + raise ValueError( + "StringArray requires a sequence of strings or pandas.NA. Got " + f"'{self._ndarray.dtype}' dtype instead." + ) + # Check to see if need to convert Na values to pd.NA + if self._ndarray.ndim > 2: + # Ravel if ndims > 2 b/c no cythonized version available + lib.convert_nans_to_NA(self._ndarray.ravel("K")) + else: + lib.convert_nans_to_NA(self._ndarray) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): + if dtype and not (isinstance(dtype, str) and dtype == "string"): + dtype = pandas_dtype(dtype) + assert isinstance(dtype, StringDtype) and dtype.storage == "python" + + from pandas.core.arrays.masked import BaseMaskedArray + + if isinstance(scalars, BaseMaskedArray): + # avoid costly conversion to object dtype + na_values = scalars._mask + result = scalars._data + result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) + result[na_values] = libmissing.NA + + else: + # convert non-na-likes to str, and nan-likes to StringDtype().na_value + result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy) + + # Manually creating new array avoids the validation step in the __init__, so is + # faster. Refactor need for validation? + new_string_array = cls.__new__(cls) + NDArrayBacked.__init__(new_string_array, result, StringDtype(storage="python")) + + return new_string_array + + @classmethod + def _from_sequence_of_strings( + cls, strings, *, dtype: Dtype | None = None, copy=False + ): + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @classmethod + def _empty(cls, shape, dtype) -> StringArray: + values = np.empty(shape, dtype=object) + values[:] = libmissing.NA + return cls(values).astype(dtype, copy=False) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + if type is None: + type = pa.string() + + values = self._ndarray.copy() + values[self.isna()] = None + return pa.array(values, type=type, from_pandas=True) + + def _values_for_factorize(self): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = None + return arr, None + + def __setitem__(self, key, value): + value = extract_array(value, extract_numpy=True) + if isinstance(value, type(self)): + # extract_array doesn't extract PandasArray subclasses + value = value._ndarray + + key = check_array_indexer(self, key) + scalar_key = lib.is_scalar(key) + scalar_value = lib.is_scalar(value) + if scalar_key and not scalar_value: + raise ValueError("setting an array element with a sequence.") + + # validate new items + if scalar_value: + if isna(value): + value = libmissing.NA + elif not isinstance(value, str): + raise ValueError( + f"Cannot set non-string value '{value}' into a StringArray." + ) + else: + if not is_array_like(value): + value = np.asarray(value, dtype=object) + if len(value) and not lib.is_string_array(value, skipna=True): + raise ValueError("Must provide strings.") + + value[isna(value)] = libmissing.NA + + super().__setitem__(key, value) + + def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: + # the super() method NDArrayBackedExtensionArray._putmask uses + # np.putmask which doesn't properly handle None/pd.NA, so using the + # base class implementation that uses __setitem__ + ExtensionArray._putmask(self, mask, value) + + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif isinstance(dtype, IntegerDtype): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype.numpy_dtype) + return IntegerArray(values, mask, copy=False) + elif isinstance(dtype, FloatingDtype): + arr = self.copy() + mask = self.isna() + arr[mask] = "0" + values = arr.astype(dtype.numpy_dtype) + return FloatingArray(values, mask, copy=False) + elif isinstance(dtype, ExtensionDtype): + return super().astype(dtype, copy=copy) + elif np.issubdtype(dtype, np.floating): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype) + values[mask] = np.nan + return values + + return super().astype(dtype, copy) + + def _reduce( + self, name: str, *, skipna: bool = True, axis: int | None = 0, **kwargs + ): + if name in ["min", "max"]: + return getattr(self, name)(skipna=skipna, axis=axis) + + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + + def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = masked_reductions.min( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = masked_reductions.max( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def value_counts(self, dropna: bool = True) -> Series: + from pandas import value_counts + + result = value_counts(self._ndarray, dropna=dropna).astype("Int64") + result.index = result.index.astype(self.dtype) + return result + + def memory_usage(self, deep: bool = False) -> int: + result = self._ndarray.nbytes + if deep: + return result + lib.memory_usage_of_objects(self._ndarray) + return result + + def _cmp_method(self, other, op): + from pandas.arrays import BooleanArray + + if isinstance(other, StringArray): + other = other._ndarray + + mask = isna(self) | isna(other) + valid = ~mask + + if not lib.is_scalar(other): + if len(other) != len(self): + # prevent improper broadcasting when other is 2D + raise ValueError( + f"Lengths of operands do not match: {len(self)} != {len(other)}" + ) + + other = np.asarray(other) + other = other[valid] + + if op.__name__ in ops.ARITHMETIC_BINOPS: + result = np.empty_like(self._ndarray, dtype="object") + result[mask] = libmissing.NA + result[valid] = op(self._ndarray[valid], other) + return StringArray(result) + else: + # logical + result = np.zeros(len(self._ndarray), dtype="bool") + result[valid] = op(self._ndarray[valid], other) + return BooleanArray(result, mask) + + _arith_method = _cmp_method + + # ------------------------------------------------------------------------ + # String methods interface + # error: Incompatible types in assignment (expression has type "NAType", + # base class "PandasArray" defined the type as "float") + _str_na_value = libmissing.NA # type: ignore[assignment] + + def _str_map( + self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True + ): + from pandas.arrays import BooleanArray + + if dtype is None: + dtype = StringDtype(storage="python") + if na_value is None: + na_value = self.dtype.na_value + + mask = isna(self) + arr = np.asarray(self) + + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: type[IntegerArray] | type[BooleanArray] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + + na_value_is_na = isna(na_value) + if na_value_is_na: + na_value = 1 + result = lib.map_infer_mask( + arr, + f, + mask.view("uint8"), + convert=False, + na_value=na_value, + # error: Argument 1 to "dtype" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected + # "Type[object]" + dtype=np.dtype(dtype), # type: ignore[arg-type] + ) + + if not na_value_is_na: + mask[:] = False + + return constructor(result, mask) + + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # i.e. StringDtype + result = lib.map_infer_mask( + arr, f, mask.view("uint8"), convert=False, na_value=na_value + ) + return StringArray(result) + else: + # This is when the result type is object. We reach this when + # -> We know the result type is truly object (e.g. .encode returns bytes + # or .findall returns a list). + # -> We don't know the result type. E.g. `.get` can return anything. + return lib.map_infer_mask(arr, f, mask.view("uint8")) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py new file mode 100644 index 00000000..ed71e263 --- /dev/null +++ b/pandas/core/arrays/string_arrow.py @@ -0,0 +1,479 @@ +from __future__ import annotations + +from collections.abc import Callable # noqa: PDF001 +import re +from typing import Union + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + NpDtype, + Scalar, + npt, +) +from pandas.compat import ( + pa_version_under1p01, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, +) + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_dtype_equal, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.arrays.arrow import ArrowExtensionArray +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.integer import Int64Dtype +from pandas.core.arrays.numeric import NumericDtype +from pandas.core.arrays.string_ import ( + BaseStringArray, + StringDtype, +) +from pandas.core.strings.object_array import ObjectStringArrayMixin + +if not pa_version_under1p01: + import pyarrow as pa + import pyarrow.compute as pc + + from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning + +ArrowStringScalarOrNAT = Union[str, libmissing.NAType] + + +def _chk_pyarrow_available() -> None: + if pa_version_under1p01: + msg = "pyarrow>=1.0.0 is required for PyArrow backed ArrowExtensionArray." + raise ImportError(msg) + + +# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from +# ObjectStringArrayMixin because we want to have the object-dtype based methods as +# fallback for the ones that pyarrow doesn't yet support + + +class ArrowStringArray(ArrowExtensionArray, BaseStringArray, ObjectStringArrayMixin): + """ + Extension array for string data in a ``pyarrow.ChunkedArray``. + + .. versionadded:: 1.2.0 + + .. warning:: + + ArrowStringArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : pyarrow.Array or pyarrow.ChunkedArray + The array of data. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + array + The recommended function for creating a ArrowStringArray. + Series.str + The string methods are available on Series backed by + a ArrowStringArray. + + Notes + ----- + ArrowStringArray returns a BooleanArray for comparison methods. + + Examples + -------- + >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]") + + ['This is', 'some text', , 'data.'] + Length: 4, dtype: string + """ + + # error: Incompatible types in assignment (expression has type "StringDtype", + # base class "ArrowExtensionArray" defined the type as "ArrowDtype") + _dtype: StringDtype # type: ignore[assignment] + + def __init__(self, values) -> None: + super().__init__(values) + self._dtype = StringDtype(storage="pyarrow") + + if not pa.types.is_string(self._data.type): + raise ValueError( + "ArrowStringArray requires a PyArrow (chunked) array of string type" + ) + + @classmethod + def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): + from pandas.core.arrays.masked import BaseMaskedArray + + _chk_pyarrow_available() + + if dtype and not (isinstance(dtype, str) and dtype == "string"): + dtype = pandas_dtype(dtype) + assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow" + + if isinstance(scalars, BaseMaskedArray): + # avoid costly conversion to object dtype in ensure_string_array and + # numerical issues with Float32Dtype + na_values = scalars._mask + result = scalars._data + result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) + return cls(pa.array(result, mask=na_values, type=pa.string())) + + # convert non-na-likes to str + result = lib.ensure_string_array(scalars, copy=copy) + return cls(pa.array(result, type=pa.string(), from_pandas=True)) + + @classmethod + def _from_sequence_of_strings( + cls, strings, dtype: Dtype | None = None, copy: bool = False + ): + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + @property + def dtype(self) -> StringDtype: # type: ignore[override] + """ + An instance of 'string[pyarrow]'. + """ + return self._dtype + + def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: + """Correctly construct numpy arrays when passed to `np.asarray()`.""" + return self.to_numpy(dtype=dtype) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value=lib.no_default, + ) -> np.ndarray: + """ + Convert to a NumPy ndarray. + """ + # TODO: copy argument is ignored + + result = np.array(self._data, dtype=dtype) + if self._data.null_count > 0: + if na_value is lib.no_default: + if dtype and np.issubdtype(dtype, np.floating): + return result + na_value = self._dtype.na_value + mask = self.isna() + result[mask] = na_value + return result + + def insert(self, loc: int, item) -> ArrowStringArray: + if not isinstance(item, str) and item is not libmissing.NA: + raise TypeError("Scalar must be NA or str") + return super().insert(loc, item) + + def _maybe_convert_setitem_value(self, value): + """Maybe convert value to be pyarrow compatible.""" + if is_scalar(value): + if isna(value): + value = None + elif not isinstance(value, str): + raise ValueError("Scalar must be NA or str") + else: + value = np.array(value, dtype=object, copy=True) + value[isna(value)] = None + for v in value: + if not (v is None or isinstance(v, str)): + raise ValueError("Scalar must be NA or str") + return value + + def isin(self, values) -> npt.NDArray[np.bool_]: + if pa_version_under2p0: + fallback_performancewarning(version="2") + return super().isin(values) + + value_set = [ + pa_scalar.as_py() + for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values] + if pa_scalar.type in (pa.string(), pa.null()) + ] + + # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True + # for null values, so we short-circuit to return all False array. + if not len(value_set): + return np.zeros(len(self), dtype=bool) + + kwargs = {} + if pa_version_under3p0: + # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises + # with unexpected keyword argument in pyarrow 3.0.0+ + kwargs["skip_null"] = True + + result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs) + # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls + # to False + return np.array(result, dtype=np.bool_) + + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + + if is_dtype_equal(dtype, self.dtype): + if copy: + return self.copy() + return self + + elif isinstance(dtype, NumericDtype): + data = self._data.cast(pa.from_numpy_dtype(dtype.numpy_dtype)) + return dtype.__from_arrow__(data) + + return super().astype(dtype, copy=copy) + + # ------------------------------------------------------------------------ + # String methods interface + + # error: Incompatible types in assignment (expression has type "NAType", + # base class "ObjectStringArrayMixin" defined the type as "float") + _str_na_value = libmissing.NA # type: ignore[assignment] + + def _str_map( + self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True + ): + # TODO: de-duplicate with StringArray method. This method is moreless copy and + # paste. + + from pandas.arrays import ( + BooleanArray, + IntegerArray, + ) + + if dtype is None: + dtype = self.dtype + if na_value is None: + na_value = self.dtype.na_value + + mask = isna(self) + arr = np.asarray(self) + + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: type[IntegerArray] | type[BooleanArray] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + + na_value_is_na = isna(na_value) + if na_value_is_na: + na_value = 1 + result = lib.map_infer_mask( + arr, + f, + mask.view("uint8"), + convert=False, + na_value=na_value, + # error: Argument 1 to "dtype" has incompatible type + # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected + # "Type[object]" + dtype=np.dtype(dtype), # type: ignore[arg-type] + ) + + if not na_value_is_na: + mask[:] = False + + return constructor(result, mask) + + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # i.e. StringDtype + result = lib.map_infer_mask( + arr, f, mask.view("uint8"), convert=False, na_value=na_value + ) + result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True) + return type(self)(result) + else: + # This is when the result type is object. We reach this when + # -> We know the result type is truly object (e.g. .encode returns bytes + # or .findall returns a list). + # -> We don't know the result type. E.g. `.get` can return anything. + return lib.map_infer_mask(arr, f, mask.view("uint8")) + + def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True): + if flags: + fallback_performancewarning() + return super()._str_contains(pat, case, flags, na, regex) + + if regex: + if pa_version_under4p0 or case is False: + fallback_performancewarning(version="4") + return super()._str_contains(pat, case, flags, na, regex) + else: + result = pc.match_substring_regex(self._data, pat) + else: + if case: + result = pc.match_substring(self._data, pat) + else: + result = pc.match_substring(pc.utf8_upper(self._data), pat.upper()) + result = BooleanDtype().__from_arrow__(result) + if not isna(na): + result[isna(result)] = bool(na) + return result + + def _str_startswith(self, pat: str, na=None): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_startswith(pat, na) + + pat = "^" + re.escape(pat) + return self._str_contains(pat, na=na, regex=True) + + def _str_endswith(self, pat: str, na=None): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_endswith(pat, na) + + pat = re.escape(pat) + "$" + return self._str_contains(pat, na=na, regex=True) + + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + if ( + pa_version_under4p0 + or isinstance(pat, re.Pattern) + or callable(repl) + or not case + or flags + ): + fallback_performancewarning(version="4") + return super()._str_replace(pat, repl, n, case, flags, regex) + + func = pc.replace_substring_regex if regex else pc.replace_substring + result = func(self._data, pattern=pat, replacement=repl, max_replacements=n) + return type(self)(result) + + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None + ): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_match(pat, case, flags, na) + + if not pat.startswith("^"): + pat = "^" + pat + return self._str_contains(pat, case, flags, na, regex=True) + + def _str_fullmatch( + self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None + ): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_fullmatch(pat, case, flags, na) + + if not pat.endswith("$") or pat.endswith("//$"): + pat = pat + "$" + return self._str_match(pat, case, flags, na) + + def _str_isalnum(self): + result = pc.utf8_is_alnum(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isalpha(self): + result = pc.utf8_is_alpha(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isdecimal(self): + result = pc.utf8_is_decimal(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isdigit(self): + result = pc.utf8_is_digit(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_islower(self): + result = pc.utf8_is_lower(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isnumeric(self): + result = pc.utf8_is_numeric(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isspace(self): + if pa_version_under2p0: + fallback_performancewarning(version="2") + return super()._str_isspace() + + result = pc.utf8_is_space(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_istitle(self): + result = pc.utf8_is_title(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_isupper(self): + result = pc.utf8_is_upper(self._data) + return BooleanDtype().__from_arrow__(result) + + def _str_len(self): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_len() + + result = pc.utf8_length(self._data) + return Int64Dtype().__from_arrow__(result) + + def _str_lower(self): + return type(self)(pc.utf8_lower(self._data)) + + def _str_upper(self): + return type(self)(pc.utf8_upper(self._data)) + + def _str_strip(self, to_strip=None): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_strip(to_strip) + + if to_strip is None: + result = pc.utf8_trim_whitespace(self._data) + else: + result = pc.utf8_trim(self._data, characters=to_strip) + return type(self)(result) + + def _str_lstrip(self, to_strip=None): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_lstrip(to_strip) + + if to_strip is None: + result = pc.utf8_ltrim_whitespace(self._data) + else: + result = pc.utf8_ltrim(self._data, characters=to_strip) + return type(self)(result) + + def _str_rstrip(self, to_strip=None): + if pa_version_under4p0: + fallback_performancewarning(version="4") + return super()._str_rstrip(to_strip) + + if to_strip is None: + result = pc.utf8_rtrim_whitespace(self._data) + else: + result = pc.utf8_rtrim(self._data, characters=to_strip) + return type(self)(result) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py new file mode 100644 index 00000000..4011f298 --- /dev/null +++ b/pandas/core/arrays/timedeltas.py @@ -0,0 +1,1021 @@ +from __future__ import annotations + +from datetime import timedelta +from typing import ( + TYPE_CHECKING, + cast, +) + +import numpy as np + +from pandas._libs import ( + lib, + tslibs, +) +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Tick, + Timedelta, + astype_overflowsafe, + iNaT, + periods_per_second, + to_offset, +) +from pandas._libs.tslibs.conversion import precision_from_unit +from pandas._libs.tslibs.fields import get_timedelta_field +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, + ints_to_pytimedelta, + parse_timedelta_unit, +) +from pandas._typing import ( + DtypeObj, + NpDtype, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.util._validators import validate_endpoints + +from pandas.core.dtypes.astype import astype_td64_unit_conversion +from pandas.core.dtypes.common import ( + TD64NS_DTYPE, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import nanops +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays._ranges import generate_regular_range +import pandas.core.common as com +from pandas.core.ops.common import unpack_zerodim_and_defer + +if TYPE_CHECKING: + from pandas import DataFrame + + +def _field_accessor(name: str, alias: str, docstring: str): + def f(self) -> np.ndarray: + values = self.asi8 + result = get_timedelta_field(values, alias, reso=self._reso) + if self._hasna: + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = f"\n{docstring}\n" + return property(f) + + +class TimedeltaArray(dtl.TimelikeOps): + """ + Pandas ExtensionArray for timedelta data. + + .. warning:: + + TimedeltaArray is currently experimental, and its API may change + without warning. In particular, :attr:`TimedeltaArray.dtype` is + expected to change to be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : array-like + The timedelta data. + + dtype : numpy.dtype + Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted. + freq : Offset, optional + copy : bool, default False + Whether to copy the underlying array of data. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "timedeltaarray" + _internal_fill_value = np.timedelta64("NaT", "ns") + _recognized_scalars = (timedelta, np.timedelta64, Tick) + _is_recognized_dtype = is_timedelta64_dtype + _infer_matches = ("timedelta", "timedelta64") + + @property + def _scalar_type(self) -> type[Timedelta]: + return Timedelta + + __array_priority__ = 1000 + # define my properties & methods for delegation + _other_ops: list[str] = [] + _bool_ops: list[str] = [] + _object_ops: list[str] = ["freq"] + _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _datetimelike_methods: list[str] = [ + "to_pytimedelta", + "total_seconds", + "round", + "floor", + "ceil", + ] + + # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray) + # operates pointwise. + + def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType: + y = x.view("i8") + if y == NaT.value: + return NaT + return Timedelta._from_value_and_reso(y, reso=self._reso) + + @property + # error: Return type "dtype" of "dtype" incompatible with return type + # "ExtensionDtype" in supertype "ExtensionArray" + def dtype(self) -> np.dtype: # type: ignore[override] + """ + The dtype for the TimedeltaArray. + + .. warning:: + + A future version of pandas will change dtype to be an instance + of a :class:`pandas.api.extensions.ExtensionDtype` subclass, + not a ``numpy.dtype``. + + Returns + ------- + numpy.dtype + """ + return self._ndarray.dtype + + # ---------------------------------------------------------------- + # Constructors + + _freq = None + _default_dtype = TD64NS_DTYPE # used in TimeLikeOps.__init__ + + @classmethod + def _validate_dtype(cls, values, dtype): + # used in TimeLikeOps.__init__ + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) + return dtype + + # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" + @classmethod + def _simple_new( # type: ignore[override] + cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE + ) -> TimedeltaArray: + # Require td64 dtype, not unit-less, matching values.dtype + assert isinstance(dtype, np.dtype) and dtype.kind == "m" + assert not tslibs.is_unitless(dtype) + assert isinstance(values, np.ndarray), type(values) + assert dtype == values.dtype + + result = super()._simple_new(values=values, dtype=dtype) + result._freq = freq + return result + + @classmethod + def _from_sequence( + cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False + ) -> TimedeltaArray: + if dtype: + _validate_td64_dtype(dtype) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) + freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) + + return cls._simple_new(data, dtype=data.dtype, freq=freq) + + @classmethod + def _from_sequence_not_strict( + cls, + data, + dtype=TD64NS_DTYPE, + copy: bool = False, + freq=lib.no_default, + unit=None, + ) -> TimedeltaArray: + if dtype: + _validate_td64_dtype(dtype) + + assert unit not in ["Y", "y", "M"] # caller is responsible for checking + + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None + + freq, freq_infer = dtl.maybe_infer_freq(freq) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + if explicit_none: + freq = None + + result = cls._simple_new(data, dtype=data.dtype, freq=freq) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range(cls, start, end, periods, freq, closed=None): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + + if start is not None: + start = Timedelta(start) + + if end is not None: + end = Timedelta(end) + + left_closed, right_closed = validate_endpoints(closed) + + if freq is not None: + index = generate_regular_range(start, end, periods, freq) + else: + index = np.linspace(start.value, end.value, periods).astype("i8") + + if not left_closed: + index = index[1:] + if not right_closed: + index = index[:-1] + + td64values = index.view("m8[ns]") + return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq) + + # ---------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timedelta.") + self._check_compatible_with(value, setitem=setitem) + return np.timedelta64(value.value, "ns") + + def _scalar_from_string(self, value) -> Timedelta | NaTType: + return Timedelta(value) + + def _check_compatible_with(self, other, setitem: bool = False) -> None: + # we don't have anything to validate. + pass + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def astype(self, dtype, copy: bool = True): + # We handle + # --> timedelta64[ns] + # --> timedelta64 + # DatetimeLikeArrayMixin super call handles other cases + dtype = pandas_dtype(dtype) + + if dtype.kind == "m": + return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) + + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + + def __iter__(self): + if self.ndim > 1: + for i in range(len(self)): + yield self[i] + else: + # convert in chunks of 10k for efficiency + data = self._ndarray + length = len(self) + chunksize = 10000 + chunks = (length // chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pytimedelta(data[start_i:end_i], box=True) + yield from converted + + # ---------------------------------------------------------------- + # Reductions + + def sum( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + keepdims: bool = False, + initial=None, + skipna: bool = True, + min_count: int = 0, + ): + nv.validate_sum( + (), {"dtype": dtype, "out": out, "keepdims": keepdims, "initial": initial} + ) + + result = nanops.nansum( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + return self._wrap_reduction_result(axis, result) + + def std( + self, + *, + axis: int | None = None, + dtype: NpDtype | None = None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" + ) + + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + # ---------------------------------------------------------------- + # Rendering Methods + + def _formatter(self, boxed: bool = False): + from pandas.io.formats.format import get_format_timedelta64 + + return get_format_timedelta64(self, box=True) + + def _format_native_types( + self, *, na_rep="NaT", date_format=None, **kwargs + ) -> npt.NDArray[np.object_]: + from pandas.io.formats.format import get_format_timedelta64 + + # Relies on TimeDelta._repr_base + formatter = get_format_timedelta64(self._ndarray, na_rep) + # equiv: np.array([formatter(x) for x in self._ndarray]) + # but independent of dimension + return np.frompyfunc(formatter, 1, 1)(self._ndarray) + + # ---------------------------------------------------------------- + # Arithmetic Methods + + def _add_offset(self, other): + assert not isinstance(other, Tick) + raise TypeError( + f"cannot add the type {type(other).__name__} to a {type(self).__name__}" + ) + + @unpack_zerodim_and_defer("__mul__") + def __mul__(self, other) -> TimedeltaArray: + if is_scalar(other): + # numpy will accept float and int, raise TypeError for others + result = self._ndarray * other + freq = None + if self.freq is not None and not isna(other): + freq = self.freq * other + return type(self)._simple_new(result, dtype=result.dtype, freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): + # Exclude timedelta64 here so we correctly raise TypeError + # for that instead of ValueError + raise ValueError("Cannot multiply with unequal lengths") + + if is_object_dtype(other.dtype): + # this multiplication will succeed only if all elements of other + # are int or float scalars, so we will end up with + # timedelta64[ns]-dtyped result + arr = self._ndarray + result = [arr[n] * other[n] for n in range(len(self))] + result = np.array(result) + return type(self)._simple_new(result, dtype=result.dtype) + + # numpy will accept float or int dtype, raise TypeError for others + result = self._ndarray * other + return type(self)._simple_new(result, dtype=result.dtype) + + __rmul__ = __mul__ + + @unpack_zerodim_and_defer("__truediv__") + def __truediv__(self, other): + # timedelta / X is well-defined for timedelta-like or numeric X + + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + # mypy assumes that __new__ returns an instance of the class + # github.com/python/mypy/issues/1020 + if cast("Timedelta | NaTType", other) is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return self._ndarray / other + + elif lib.is_scalar(other): + # assume it is numeric + result = self._ndarray / other + freq = None + if self.freq is not None: + # Tick division is not implemented, so operate on Timedelta + freq = self.freq.delta / other + freq = to_offset(freq) + return type(self)._simple_new(result, dtype=result.dtype, freq=freq) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return self._ndarray / other + + elif is_object_dtype(other.dtype): + # We operate on raveled arrays to avoid problems in inference + # on NaT + # TODO: tests with non-nano + srav = self.ravel() + orav = other.ravel() + result_list = [srav[n] / orav[n] for n in range(len(srav))] + result = np.array(result_list).reshape(self.shape) + + # We need to do dtype inference in order to keep DataFrame ops + # behavior consistent with Series behavior + inferred = lib.infer_dtype(result, skipna=False) + if inferred == "timedelta": + flat = result.ravel() + result = type(self)._from_sequence(flat).reshape(result.shape) + elif inferred == "floating": + result = result.astype(float) + elif inferred == "datetime": + # GH#39750 this occurs when result is all-NaT, in which case + # we want to interpret these NaTs as td64. + # We construct an all-td64NaT result. + # error: Incompatible types in assignment (expression has type + # "TimedeltaArray", variable has type "ndarray[Any, + # dtype[floating[_64Bit]]]") + result = self * np.nan # type: ignore[assignment] + + return result + + else: + result = self._ndarray / other + return type(self)._simple_new(result, dtype=result.dtype) + + @unpack_zerodim_and_defer("__rtruediv__") + def __rtruediv__(self, other): + # X / timedelta is defined only for timedelta-like X + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + # mypy assumes that __new__ returns an instance of the class + # github.com/python/mypy/issues/1020 + if cast("Timedelta | NaTType", other) is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return other / self._ndarray + + elif lib.is_scalar(other): + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return other / self._ndarray + + elif is_object_dtype(other.dtype): + # Note: unlike in __truediv__, we do not _need_ to do type + # inference on the result. It does not raise, a numeric array + # is returned. GH#23829 + result_list = [other[n] / self[n] for n in range(len(self))] + return np.array(result_list) + + else: + raise TypeError( + f"Cannot divide {other.dtype} data by {type(self).__name__}" + ) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + + if is_scalar(other): + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + # mypy assumes that __new__ returns an instance of the class + # github.com/python/mypy/issues/1020 + if cast("Timedelta | NaTType", other) is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + return other.__rfloordiv__(self._ndarray) + + # at this point we should only have numeric scalars; anything + # else will raise + result = self._ndarray // other + freq = None + if self.freq is not None: + # Note: freq gets division, not floor-division + freq = self.freq / other + if freq.nanos == 0 and self.freq.nanos != 0: + # e.g. if self.freq is Nano(1) then dividing by 2 + # rounds down to zero + freq = None + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = self.asi8 // other.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.float64) + np.putmask(result, mask, np.nan) + return result + + elif is_object_dtype(other.dtype): + # error: Incompatible types in assignment (expression has type + # "List[Any]", variable has type "ndarray") + srav = self.ravel() + orav = other.ravel() + res_list = [srav[n] // orav[n] for n in range(len(srav))] + result_flat = np.asarray(res_list) + inferred = lib.infer_dtype(result_flat, skipna=False) + + result = result_flat.reshape(self.shape) + + if inferred == "timedelta": + result, _ = sequence_to_td64ns(result) + return type(self)(result) + if inferred == "datetime": + # GH#39750 occurs when result is all-NaT, which in this + # case should be interpreted as td64nat. This can only + # occur when self is all-td64nat + return self * np.nan + return result + + elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): + result = self._ndarray // other + return type(self)(result) + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + @unpack_zerodim_and_defer("__rfloordiv__") + def __rfloordiv__(self, other): + + if is_scalar(other): + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + # mypy assumes that __new__ returns an instance of the class + # github.com/python/mypy/issues/1020 + if cast("Timedelta | NaTType", other) is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + return other.__floordiv__(self._ndarray) + + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = other.asi8 // self.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.float64) + np.putmask(result, mask, np.nan) + return result + + elif is_object_dtype(other.dtype): + result_list = [other[n] // self[n] for n in range(len(self))] + result = np.array(result_list) + return result + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + @unpack_zerodim_and_defer("__mod__") + def __mod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + return self - (self // other) * other + + @unpack_zerodim_and_defer("__rmod__") + def __rmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + return other - (other // self) * self + + @unpack_zerodim_and_defer("__divmod__") + def __divmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + + res1 = self // other + res2 = self - res1 * other + return res1, res2 + + @unpack_zerodim_and_defer("__rdivmod__") + def __rdivmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, self._recognized_scalars): + other = Timedelta(other) + + res1 = other // self + res2 = other - res1 * self + return res1, res2 + + def __neg__(self) -> TimedeltaArray: + if self.freq is not None: + return type(self)(-self._ndarray, freq=-self.freq) + return type(self)(-self._ndarray) + + def __pos__(self) -> TimedeltaArray: + return type(self)(self._ndarray.copy(), freq=self.freq) + + def __abs__(self) -> TimedeltaArray: + # Note: freq is not preserved + return type(self)(np.abs(self._ndarray)) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timedelta methods + + def total_seconds(self) -> npt.NDArray[np.float64]: + """ + Return total duration of each element expressed in seconds. + + This method is available directly on TimedeltaArray, TimedeltaIndex + and on Series containing timedelta values under the ``.dt`` namespace. + + Returns + ------- + seconds : [ndarray, Float64Index, Series] + When the calling object is a TimedeltaArray, the return type + is ndarray. When the calling object is a TimedeltaIndex, + the return type is a Float64Index. When the calling object + is a Series, the return type is Series of type `float64` whose + index is the same as the original. + + See Also + -------- + datetime.timedelta.total_seconds : Standard library version + of this method. + TimedeltaIndex.components : Return a DataFrame with components of + each Timedelta. + + Examples + -------- + **Series** + + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.total_seconds() + 0 0.0 + 1 86400.0 + 2 172800.0 + 3 259200.0 + 4 345600.0 + dtype: float64 + + **TimedeltaIndex** + + >>> idx = pd.to_timedelta(np.arange(5), unit='d') + >>> idx + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + + >>> idx.total_seconds() + Float64Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], + dtype='float64') + """ + pps = periods_per_second(self._reso) + return self._maybe_mask_results(self.asi8 / pps, fill_value=None) + + def to_pytimedelta(self) -> npt.NDArray[np.object_]: + """ + Return an ndarray of datetime.timedelta objects. + + Returns + ------- + timedeltas : ndarray[object] + """ + return ints_to_pytimedelta(self._ndarray) + + days = _field_accessor("days", "days", "Number of days for each element.") + seconds = _field_accessor( + "seconds", + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + ) + microseconds = _field_accessor( + "microseconds", + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + ) + nanoseconds = _field_accessor( + "nanoseconds", + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + ) + + @property + def components(self) -> DataFrame: + """ + Return a DataFrame of the individual resolution components of the Timedeltas. + + The components (days, hours, minutes seconds, milliseconds, microseconds, + nanoseconds) are returned as columns in a DataFrame. + + Returns + ------- + DataFrame + """ + from pandas import DataFrame + + columns = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ] + hasnans = self._hasna + if hasnans: + + def f(x): + if isna(x): + return [np.nan] * len(columns) + return x.components + + else: + + def f(x): + return x.components + + result = DataFrame([f(x) for x in self], columns=columns) + if not hasnans: + result = result.astype("int64") + return result + + +# --------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_td64ns( + data, copy: bool = False, unit=None, errors="raise" +) -> tuple[np.ndarray, Tick | None]: + """ + Parameters + ---------- + data : list-like + copy : bool, default False + unit : str, optional + The timedelta unit to treat integers as multiples of. For numeric + data this defaults to ``'ns'``. + Must be un-specified if the data contains a str and ``errors=="raise"``. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + converted : numpy.ndarray + The sequence converted to a numpy array with dtype ``timedelta64[ns]``. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + assert unit not in ["Y", "y", "M"] # caller is responsible for checking + + inferred_freq = None + if unit is not None: + unit = parse_timedelta_unit(unit) + + data, copy = dtl.ensure_arraylike_for_datetimelike( + data, copy, cls_name="TimedeltaArray" + ) + + if isinstance(data, TimedeltaArray): + inferred_freq = data.freq + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data.dtype) or is_string_dtype(data.dtype): + # no need to make a copy, need to convert if string-dtyped + data = _objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data.dtype): + # treat as multiples of the given unit + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data.dtype): + # cast the unit, multiply base/frac separately + # to avoid precision issues from float -> int + mask = np.isnan(data) + # The next few lines are effectively a vectorized 'cast_from_unit' + m, p = precision_from_unit(unit or "ns") + base = data.astype(np.int64) + frac = data - base + if p: + frac = np.round(frac, p) + data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]") + data[mask] = iNaT + copy = False + + elif is_timedelta64_dtype(data.dtype): + if data.dtype != TD64NS_DTYPE: + # non-nano unit + data = astype_overflowsafe(data, dtype=TD64NS_DTYPE) + copy = False + + else: + # This includes datetime64-dtype, see GH#23539, GH#29794 + raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]") + + data = np.array(data, copy=copy) + + assert data.dtype == "m8[ns]", data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : numpy.ndarray with integer-dtype + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = f"timedelta64[{unit}]" + data = data.view(dtype_str) + + data = astype_overflowsafe(data, dtype=TD64NS_DTYPE) + + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def _objects_to_td64ns(data, unit=None, errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + Must not be specified if the data contains a str. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, unit=unit, errors=errors) + return result.view("timedelta64[ns]") + + +def _validate_td64_dtype(dtype) -> DtypeObj: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("timedelta64")): + # no precision disallowed GH#24806 + msg = ( + "Passing in 'timedelta' dtype with no precision is not allowed. " + "Please pass in 'timedelta64[ns]' instead." + ) + raise ValueError(msg) + + if not is_dtype_equal(dtype, TD64NS_DTYPE): + raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]") + + return dtype diff --git a/pandas/core/base.py b/pandas/core/base.py new file mode 100644 index 00000000..b04d4348 --- /dev/null +++ b/pandas/core/base.py @@ -0,0 +1,1334 @@ +""" +Base and utility classes for pandas objects. +""" + +from __future__ import annotations + +import textwrap +from typing import ( + TYPE_CHECKING, + Any, + Generic, + Hashable, + Literal, + TypeVar, + cast, + final, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + ArrayLike, + DtypeObj, + IndexLabel, + NDFrameT, + Shape, + npt, +) +from pandas.compat import PYPY +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dict_like, + is_extension_array_dtype, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) + +from pandas.core import ( + algorithms, + nanops, + ops, +) +from pandas.core.accessor import DirNamesMixin +from pandas.core.algorithms import ( + duplicated, + unique1d, + value_counts, +) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ExtensionArray +from pandas.core.construction import ( + create_series_with_explicit_dtype, + ensure_wrapped_if_datetimelike, + extract_array, +) + +if TYPE_CHECKING: + + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ScalarLike_co, + ) + + from pandas import ( + Categorical, + Series, + ) + + +_shared_docs: dict[str, str] = {} +_indexops_doc_kwargs = { + "klass": "IndexOpsMixin", + "inplace": "", + "unique": "IndexOpsMixin", + "duplicated": "IndexOpsMixin", +} + +_T = TypeVar("_T", bound="IndexOpsMixin") + + +class PandasObject(DirNamesMixin): + """ + Baseclass for various pandas objects. + """ + + # results from calls to methods decorated with cache_readonly get added to _cache + _cache: dict[str, Any] + + @property + def _constructor(self): + """ + Class constructor (for this class it's just `__class__`. + """ + return type(self) + + def __repr__(self) -> str: + """ + Return a string representation for a particular object. + """ + # Should be overwritten by base classes + return object.__repr__(self) + + def _reset_cache(self, key: str | None = None) -> None: + """ + Reset cached properties. If ``key`` is passed, only clears that key. + """ + if not hasattr(self, "_cache"): + return + if key is None: + self._cache.clear() + else: + self._cache.pop(key, None) + + def __sizeof__(self) -> int: + """ + Generates the total memory usage for an object that returns + either a value or Series of values + """ + memory_usage = getattr(self, "memory_usage", None) + if memory_usage: + mem = memory_usage(deep=True) + return int(mem if is_scalar(mem) else mem.sum()) + + # no memory_usage attribute, so fall back to object's 'sizeof' + return super().__sizeof__() + + +class NoNewAttributesMixin: + """ + Mixin which prevents adding new attributes. + + Prevents additional attributes via xxx.attribute = "something" after a + call to `self.__freeze()`. Mainly used to prevent the user from using + wrong attributes on an accessor (`Series.cat/.str/.dt`). + + If you really want to add a new attribute at a later time, you need to use + `object.__setattr__(self, key, value)`. + """ + + def _freeze(self): + """ + Prevents setting additional attributes. + """ + object.__setattr__(self, "__frozen", True) + + # prevent adding any attribute via s.xxx.new_attribute = ... + def __setattr__(self, key: str, value) -> None: + # _cache is used by a decorator + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if getattr(self, "__frozen", False) and not ( + key == "_cache" + or key in type(self).__dict__ + or getattr(self, key, None) is not None + ): + raise AttributeError(f"You cannot add any new attribute '{key}'") + object.__setattr__(self, key, value) + + +class SelectionMixin(Generic[NDFrameT]): + """ + mixin implementing the selection & aggregation interface on a group-like + object sub-classes need to define: obj, exclusions + """ + + obj: NDFrameT + _selection: IndexLabel | None = None + exclusions: frozenset[Hashable] + _internal_names = ["_cache", "__setstate__"] + _internal_names_set = set(_internal_names) + + @final + @property + def _selection_list(self): + if not isinstance( + self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray) + ): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + if self._selection is None or isinstance(self.obj, ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @final + @cache_readonly + def ndim(self) -> int: + return self._selected_obj.ndim + + @final + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, ABCDataFrame): + return self.obj[self._selection_list] + + if len(self.exclusions) > 0: + # equivalent to `self.obj.drop(self.exclusions, axis=1) + # but this avoids consolidating and making a copy + # TODO: following GH#45287 can we now use .drop directly without + # making a copy? + return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise IndexError(f"Column(s) {self._selection} already selected") + + if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(set(key)): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") + return self._gotitem(list(key), ndim=2) + + elif not getattr(self, "as_index", False): + if key not in self.obj.columns: + raise KeyError(f"Column not found: {key}") + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError(f"Column not found: {key}") + subset = self.obj[key] + ndim = subset.ndim + return self._gotitem(key, ndim=ndim, subset=subset) + + def _gotitem(self, key, ndim: int, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : str / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + raise AbstractMethodError(self) + + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + agg = aggregate + + +class IndexOpsMixin(OpsMixin): + """ + Common ops mixin to support a unified interface / docs for Series / Index + """ + + # ndarray compatibility + __array_priority__ = 1000 + _hidden_attrs: frozenset[str] = frozenset( + ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ + ) + + @property + def dtype(self) -> DtypeObj: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + + @property + def _values(self) -> ExtensionArray | np.ndarray: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + + def transpose(self: _T, *args, **kwargs) -> _T: + """ + Return the transpose, which is by definition self. + + Returns + ------- + %(klass)s + """ + nv.validate_transpose(args, kwargs) + return self + + T = property( + transpose, + doc=""" + Return the transpose, which is by definition self. + """, + ) + + @property + def shape(self) -> Shape: + """ + Return a tuple of the shape of the underlying data. + """ + return self._values.shape + + def __len__(self) -> int: + # We need this defined here for mypy + raise AbstractMethodError(self) + + @property + def ndim(self) -> Literal[1]: + """ + Number of dimensions of the underlying data, by definition 1. + """ + return 1 + + def item(self): + """ + Return the first element of the underlying data as a Python scalar. + + Returns + ------- + scalar + The first element of %(klass)s. + + Raises + ------ + ValueError + If the data is not length-1. + """ + if len(self) == 1: + return next(iter(self)) + raise ValueError("can only convert an array of size 1 to a Python scalar") + + @property + def nbytes(self) -> int: + """ + Return the number of bytes in the underlying data. + """ + return self._values.nbytes + + @property + def size(self) -> int: + """ + Return the number of elements in the underlying data. + """ + return len(self._values) + + @property + def array(self) -> ExtensionArray: + """ + The ExtensionArray of the data backing this Series or Index. + + Returns + ------- + ExtensionArray + An ExtensionArray of the values stored within. For extension + types, this is the actual array. For NumPy native types, this + is a thin (no copy) wrapper around :class:`numpy.ndarray`. + + ``.array`` differs ``.values`` which may require converting the + data to a different form. + + See Also + -------- + Index.to_numpy : Similar method that always returns a NumPy array. + Series.to_numpy : Similar method that always returns a NumPy array. + + Notes + ----- + This table lays out the different array types for each extension + dtype within pandas. + + ================== ============================= + dtype array type + ================== ============================= + category Categorical + period PeriodArray + interval IntervalArray + IntegerNA IntegerArray + string StringArray + boolean BooleanArray + datetime64[ns, tz] DatetimeArray + ================== ============================= + + For any 3rd-party extension types, the array type will be an + ExtensionArray. + + For all remaining dtypes ``.array`` will be a + :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray + stored within. If you absolutely need a NumPy array (possibly with + copying / coercing data), then use :meth:`Series.to_numpy` instead. + + Examples + -------- + For regular NumPy types like int, and float, a PandasArray + is returned. + + >>> pd.Series([1, 2, 3]).array + + [1, 2, 3] + Length: 3, dtype: int64 + + For extension types, like Categorical, the actual ExtensionArray + is returned + + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.array + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] + """ + raise AbstractMethodError(self) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: object = lib.no_default, + **kwargs, + ) -> np.ndarray: + """ + A NumPy ndarray representing the values in this Series or Index. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + .. versionadded:: 1.0.0 + + **kwargs + Additional keywords passed through to the ``to_numpy`` method + of the underlying array (for extension arrays). + + .. versionadded:: 1.0.0 + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.array : Get the actual data stored within. + Index.array : Get the actual data stored within. + DataFrame.to_numpy : Similar method for DataFrame. + + Notes + ----- + The returned array will be the same up to equality (values equal + in `self` will be equal in the returned array; likewise for values + that are not equal). When `self` contains an ExtensionArray, the + dtype may be different. For example, for a category-dtype Series, + ``to_numpy()`` will return a NumPy array and the categorical dtype + will be lost. + + For NumPy dtypes, this will be a reference to the actual data stored + in this Series or Index (assuming ``copy=False``). Modifying the result + in place will modify the data stored in the Series or Index (not that + we recommend doing that). + + For extension types, ``to_numpy()`` *may* require copying data and + coercing the result to a NumPy type (possibly object), which may be + expensive. When you need a no-copy reference to the underlying data, + :attr:`Series.array` should be used instead. + + This table lays out the different dtypes and default return types of + ``to_numpy()`` for various dtypes within pandas. + + ================== ================================ + dtype array type + ================== ================================ + category[T] ndarray[T] (same dtype as input) + period ndarray[object] (Periods) + interval ndarray[object] (Intervals) + IntegerNA ndarray[object] + datetime64[ns] datetime64[ns] + datetime64[ns, tz] ndarray[object] (Timestamps) + ================== ================================ + + Examples + -------- + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + + Specify the `dtype` to control how datetime-aware data is represented. + Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` + objects, each with the correct ``tz``. + + >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser.to_numpy(dtype=object) + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET')], + dtype=object) + + Or ``dtype='datetime64[ns]'`` to return an ndarray of native + datetime64 values. The values are converted to UTC and the timezone + info is dropped. + + >>> ser.to_numpy(dtype="datetime64[ns]") + ... # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], + dtype='datetime64[ns]') + """ + if is_extension_array_dtype(self.dtype): + return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) + elif kwargs: + bad_keys = list(kwargs.keys())[0] + raise TypeError( + f"to_numpy() got an unexpected keyword argument '{bad_keys}'" + ) + + result = np.asarray(self._values, dtype=dtype) + # TODO(GH-24345): Avoid potential double copy + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[np.asanyarray(self.isna())] = na_value + return result + + @property + def empty(self) -> bool: + return not self.size + + def max(self, axis=None, skipna: bool = True, *args, **kwargs): + """ + Return the maximum value of the Index. + + Parameters + ---------- + axis : int, optional + For compatibility with NumPy. Only 0 or None are allowed. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + scalar + Maximum value. + + See Also + -------- + Index.min : Return the minimum value in an Index. + Series.max : Return the maximum value in a Series. + DataFrame.max : Return the maximum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.max() + 3 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.max() + 'c' + + For a MultiIndex, the maximum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.max() + ('b', 2) + """ + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return nanops.nanmax(self._values, skipna=skipna) + + @doc(op="max", oppose="min", value="largest") + def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """ + Return int position of the {value} value in the Series. + + If the {op}imum is achieved in multiple locations, + the first row position is returned. + + Parameters + ---------- + axis : {{None}} + Unused. Parameter needed for compatibility with DataFrame. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + int + Row position of the {op}imum value. + + See Also + -------- + Series.arg{op} : Return position of the {op}imum value. + Series.arg{oppose} : Return position of the {oppose}imum value. + numpy.ndarray.arg{op} : Equivalent method for numpy arrays. + Series.idxmax : Return index label of the maximum values. + Series.idxmin : Return index label of the minimum values. + + Examples + -------- + Consider dataset containing cereal calories + + >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, + ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) + >>> s + Corn Flakes 100.0 + Almond Delight 110.0 + Cinnamon Toast Crunch 120.0 + Cocoa Puff 110.0 + dtype: float64 + + >>> s.argmax() + 2 + >>> s.argmin() + 0 + + The maximum cereal calories is the third element and + the minimum cereal calories is the first element, + since series is zero-indexed. + """ + delegate = self._values + nv.validate_minmax_axis(axis) + skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) + + if isinstance(delegate, ExtensionArray): + if not skipna and delegate.isna().any(): + return -1 + else: + return delegate.argmax() + else: + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return nanops.nanargmax( # type: ignore[return-value] + delegate, skipna=skipna + ) + + def min(self, axis=None, skipna: bool = True, *args, **kwargs): + """ + Return the minimum value of the Index. + + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series. + skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. + + Returns + ------- + scalar + Minimum value. + + See Also + -------- + Index.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.min() + 1 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.min() + 'a' + + For a MultiIndex, the minimum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.min() + ('a', 1) + """ + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return nanops.nanmin(self._values, skipna=skipna) + + @doc(argmax, op="min", oppose="max", value="smallest") + def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: + delegate = self._values + nv.validate_minmax_axis(axis) + skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) + + if isinstance(delegate, ExtensionArray): + if not skipna and delegate.isna().any(): + return -1 + else: + return delegate.argmin() + else: + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return nanops.nanargmin( # type: ignore[return-value] + delegate, skipna=skipna + ) + + def tolist(self): + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + + See Also + -------- + numpy.ndarray.tolist : Return the array as an a.ndim-levels deep + nested list of Python scalars. + """ + return self._values.tolist() + + to_list = tolist + + def __iter__(self): + """ + Return an iterator of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + iterator + """ + # We are explicitly making element iterators. + if not isinstance(self._values, np.ndarray): + # Check type instead of dtype to catch DTA/TDA + return iter(self._values) + else: + return map(self._values.item, range(self._values.size)) + + @cache_readonly + def hasnans(self) -> bool: + """ + Return True if there are any NaNs. + + Enables various performance speedups. + """ + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" + # has no attribute "any" + return bool(isna(self).any()) # type: ignore[union-attr] + + def isna(self) -> npt.NDArray[np.bool_]: + return isna(self._values) + + def _reduce( + self, + op, + name: str, + *, + axis=0, + skipna=True, + numeric_only=None, + filter_type=None, + **kwds, + ): + """ + Perform the reduction type operation if we can. + """ + func = getattr(self, name, None) + if func is None: + raise TypeError( + f"{type(self).__name__} cannot perform the operation {name}" + ) + return func(skipna=skipna, **kwds) + + @final + def _map_values(self, mapper, na_action=None): + """ + An internal function that maps values using the input + correspondence (which can be a dict, Series, or function). + + Parameters + ---------- + mapper : function, dict, or Series + The input correspondence object + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping function + + Returns + ------- + Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + """ + # we can fastpath dict/Series to an efficient map + # as we know that we are not going to have to yield + # python types + if is_dict_like(mapper): + if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): + # If a dictionary subclass defines a default value method, + # convert mapper to a lookup function (GH #15999). + dict_with_default = mapper + mapper = lambda x: dict_with_default[x] + else: + # Dictionary does not have a default. Thus it's safe to + # convert to an Series for efficiency. + # we specify the keys here to handle the + # possibility that they are tuples + + # The return value of mapping with an empty mapper is + # expected to be pd.Series(np.nan, ...). As np.nan is + # of dtype float64 the return value of this method should + # be float64 as well + mapper = create_series_with_explicit_dtype( + mapper, dtype_if_empty=np.float64 + ) + + if isinstance(mapper, ABCSeries): + if na_action not in (None, "ignore"): + msg = ( + "na_action must either be 'ignore' or None, " + f"{na_action} was passed" + ) + raise ValueError(msg) + + if na_action == "ignore": + mapper = mapper[mapper.index.notna()] + + # Since values were input this means we came from either + # a dict or a series and mapper should be an index + if is_categorical_dtype(self.dtype): + # use the built in categorical series mapper which saves + # time by mapping the categories instead of all values + + cat = cast("Categorical", self._values) + return cat.map(mapper) + + values = self._values + + indexer = mapper.index.get_indexer(values) + new_values = algorithms.take_nd(mapper._values, indexer) + + return new_values + + # we must convert to python types + if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): + # GH#23179 some EAs do not have `map` + values = self._values + if na_action is not None: + raise NotImplementedError + map_f = lambda values, f: values.map(f) + else: + values = self._values.astype(object) + if na_action == "ignore": + map_f = lambda values, f: lib.map_infer_mask( + values, f, isna(values).view(np.uint8) + ) + elif na_action is None: + map_f = lib.map_infer + else: + msg = ( + "na_action must either be 'ignore' or None, " + f"{na_action} was passed" + ) + raise ValueError(msg) + + # mapper is a function + new_values = map_f(values, mapper) + + return new_values + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ) -> Series: + """ + Return a Series containing counts of unique values. + + The resulting object will be in descending order so that the + first element is the most frequently-occurring element. + Excludes NA values by default. + + Parameters + ---------- + normalize : bool, default False + If True then the object returned will contain the relative + frequencies of the unique values. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + bins : int, optional + Rather than count values, group them into half-open bins, + a convenience for ``pd.cut``, only works with numeric data. + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + Series + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.count: Number of non-NA elements in a DataFrame. + DataFrame.value_counts: Equivalent method on DataFrames. + + Examples + -------- + >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) + >>> index.value_counts() + 3.0 2 + 1.0 1 + 2.0 1 + 4.0 1 + dtype: int64 + + With `normalize` set to `True`, returns the relative frequency by + dividing all values by the sum of values. + + >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) + >>> s.value_counts(normalize=True) + 3.0 0.4 + 1.0 0.2 + 2.0 0.2 + 4.0 0.2 + dtype: float64 + + **bins** + + Bins can be useful for going from a continuous variable to a + categorical variable; instead of counting unique + apparitions of values, divide the index in the specified + number of half-open bins. + + >>> s.value_counts(bins=3) + (0.996, 2.0] 2 + (2.0, 3.0] 2 + (3.0, 4.0] 1 + dtype: int64 + + **dropna** + + With `dropna` set to `False` we can also see NaN index values. + + >>> s.value_counts(dropna=False) + 3.0 2 + 1.0 1 + 2.0 1 + 4.0 1 + NaN 1 + dtype: int64 + """ + return value_counts( + self, + sort=sort, + ascending=ascending, + normalize=normalize, + bins=bins, + dropna=dropna, + ) + + def unique(self): + values = self._values + + if not isinstance(values, np.ndarray): + result: ArrayLike = values.unique() + if ( + isinstance(self.dtype, np.dtype) and self.dtype.kind in ["m", "M"] + ) and isinstance(self, ABCSeries): + # GH#31182 Series._values returns EA + # unpack numpy datetime for backward-compat + result = np.asarray(result) + else: + result = unique1d(values) + + return result + + def nunique(self, dropna: bool = True) -> int: + """ + Return number of unique elements in the object. + + Excludes NA values by default. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the count. + + Returns + ------- + int + + See Also + -------- + DataFrame.nunique: Method nunique for DataFrame. + Series.count: Count non-NA/null observations in the Series. + + Examples + -------- + >>> s = pd.Series([1, 3, 5, 7, 7]) + >>> s + 0 1 + 1 3 + 2 5 + 3 7 + 4 7 + dtype: int64 + + >>> s.nunique() + 4 + """ + uniqs = self.unique() + if dropna: + uniqs = remove_na_arraylike(uniqs) + return len(uniqs) + + @property + def is_unique(self) -> bool: + """ + Return boolean if values in the object are unique. + + Returns + ------- + bool + """ + return self.nunique(dropna=False) == len(self) + + @property + def is_monotonic(self) -> bool: + """ + Return boolean if values in the object are monotonically increasing. + + .. deprecated:: 1.5.0 + is_monotonic is deprecated and will be removed in a future version. + Use is_monotonic_increasing instead. + + Returns + ------- + bool + """ + warnings.warn( + "is_monotonic is deprecated and will be removed in a future version. " + "Use is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.is_monotonic_increasing + + @property + def is_monotonic_increasing(self) -> bool: + """ + Return boolean if values in the object are monotonically increasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic_increasing + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return boolean if values in the object are monotonically decreasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic_decreasing + + def _memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of the values. + + Parameters + ---------- + deep : bool, default False + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption. + + Returns + ------- + bytes used + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False or if used on PyPy + """ + if hasattr(self.array, "memory_usage"): + # https://github.com/python/mypy/issues/1424 + # error: "ExtensionArray" has no attribute "memory_usage" + return self.array.memory_usage(deep=deep) # type: ignore[attr-defined] + + v = self.array.nbytes + if deep and is_object_dtype(self) and not PYPY: + values = cast(np.ndarray, self._values) + v += lib.memory_usage_of_objects(values) + return v + + @doc( + algorithms.factorize, + values="", + order="", + size_hint="", + sort=textwrap.dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + ) + def factorize( + self, + sort: bool = False, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ): + return algorithms.factorize( + self, sort=sort, na_sentinel=na_sentinel, use_na_sentinel=use_na_sentinel + ) + + _shared_docs[ + "searchsorted" + ] = """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted {klass} `self` such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + .. note:: + + The {klass} *must* be monotonically sorted, otherwise + wrong locations will likely be returned. Pandas does *not* + check this for you. + + Parameters + ---------- + value : array-like or scalar + Values to insert into `self`. + side : {{'left', 'right'}}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + + Returns + ------- + int or array of int + A scalar or array of insertion points with the + same shape as `value`. + + See Also + -------- + sort_values : Sort by the values along either axis. + numpy.searchsorted : Similar method from NumPy. + + Notes + ----- + Binary search is used to find the required insertion points. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> ser + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> ser.searchsorted(4) + 3 + + >>> ser.searchsorted([0, 4]) + array([0, 3]) + + >>> ser.searchsorted([1, 3], side='left') + array([0, 2]) + + >>> ser.searchsorted([1, 3], side='right') + array([1, 3]) + + >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000'])) + >>> ser + 0 2000-03-11 + 1 2000-03-12 + 2 2000-03-13 + dtype: datetime64[ns] + + >>> ser.searchsorted('3/14/2000') + 3 + + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser + ['apple', 'bread', 'bread', 'cheese', 'milk'] + Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] + + >>> ser.searchsorted('bread') + 1 + + >>> ser.searchsorted(['bread'], side='right') + array([3]) + + If the values are not monotonically sorted, wrong locations + may be returned: + + >>> ser = pd.Series([2, 1, 3]) + >>> ser + 0 2 + 1 1 + 2 3 + dtype: int64 + + >>> ser.searchsorted(1) # doctest: +SKIP + 0 # wrong result, correct would be 1 + """ + + # This overload is needed so that the call to searchsorted in + # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result + + @overload + # The following ignore is also present in numpy/__init__.pyi + # Possibly a mypy bug?? + # error: Overloaded function signatures 1 and 2 overlap with incompatible + # return types [misc] + def searchsorted( # type: ignore[misc] + self, + value: ScalarLike_co, + side: Literal["left", "right"] = ..., + sorter: NumpySorter = ..., + ) -> np.intp: + ... + + @overload + def searchsorted( + self, + value: npt.ArrayLike | ExtensionArray, + side: Literal["left", "right"] = ..., + sorter: NumpySorter = ..., + ) -> npt.NDArray[np.intp]: + ... + + @doc(_shared_docs["searchsorted"], klass="Index") + def searchsorted( + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + + values = self._values + if not isinstance(values, np.ndarray): + # Going through EA.searchsorted directly improves performance GH#38083 + return values.searchsorted(value, side=side, sorter=sorter) + + return algorithms.searchsorted( + values, + value, + side=side, + sorter=sorter, + ) + + def drop_duplicates(self, keep="first"): + duplicated = self._duplicated(keep=keep) + # error: Value of type "IndexOpsMixin" is not indexable + return self[~duplicated] # type: ignore[index] + + @final + def _duplicated( + self, keep: Literal["first", "last", False] = "first" + ) -> npt.NDArray[np.bool_]: + return duplicated(self._values, keep=keep) + + def _arith_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape) + rvalues = ensure_wrapped_if_datetimelike(rvalues) + + with np.errstate(all="ignore"): + result = ops.arithmetic_op(lvalues, rvalues, op) + + return self._construct_result(result, name=res_name) + + def _construct_result(self, result, name): + """ + Construct an appropriately-wrapped result from the ArrayLike result + of an arithmetic-like operation. + """ + raise AbstractMethodError(self) diff --git a/pandas/core/common.py b/pandas/core/common.py new file mode 100644 index 00000000..641ddba0 --- /dev/null +++ b/pandas/core/common.py @@ -0,0 +1,710 @@ +""" +Misc tools for implementing data structures + +Note: pandas.core.common is *not* part of the public API. +""" +from __future__ import annotations + +import builtins +from collections import ( + abc, + defaultdict, +) +import contextlib +from functools import partial +import inspect +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Hashable, + Iterable, + Iterator, + Sequence, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + NpDtype, + RandomState, + T, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import iterable_not_string +from pandas.core.dtypes.missing import isna + +if TYPE_CHECKING: + from pandas import Index + + +def flatten(line): + """ + Flatten an arbitrarily nested sequence. + + Parameters + ---------- + line : sequence + The non string sequence to flatten + + Notes + ----- + This doesn't consider strings sequences. + + Returns + ------- + flattened : generator + """ + for element in line: + if iterable_not_string(element): + yield from flatten(element) + else: + yield element + + +def consensus_name_attr(objs): + name = objs[0].name + for obj in objs[1:]: + try: + if obj.name != name: + name = None + except ValueError: + name = None + return name + + +def is_bool_indexer(key: Any) -> bool: + """ + Check whether `key` is a valid boolean indexer. + + Parameters + ---------- + key : Any + Only list-likes may be considered boolean indexers. + All other types are not considered a boolean indexer. + For array-like input, boolean ndarrays or ExtensionArrays + with ``_is_boolean`` set are considered boolean indexers. + + Returns + ------- + bool + Whether `key` is a valid boolean indexer. + + Raises + ------ + ValueError + When the array is an object-dtype ndarray or ExtensionArray + and contains missing values. + + See Also + -------- + check_array_indexer : Check that `key` is a valid array to index, + and convert to an ndarray. + """ + if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( + is_array_like(key) and is_extension_array_dtype(key.dtype) + ): + if key.dtype == np.object_: + key_array = np.asarray(key) + + if not lib.is_bool_array(key_array): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" + if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any(): + # Don't raise on e.g. ["A", "B", np.nan], see + # test_loc_getitem_list_of_labels_categoricalindex_with_na + raise ValueError(na_msg) + return False + return True + elif is_bool_dtype(key.dtype): + return True + elif isinstance(key, list): + # check if np.array(key).dtype would be bool + if len(key) > 0: + if type(key) is not list: + # GH#42461 cython will raise TypeError if we pass a subclass + key = list(key) + return lib.is_bool_list(key) + + return False + + +def cast_scalar_indexer(val, warn_float: bool = False): + """ + To avoid numpy DeprecationWarnings, cast float to integer where valid. + + Parameters + ---------- + val : scalar + warn_float : bool, default False + If True, issue deprecation warning for a float indexer. + + Returns + ------- + outval : scalar + """ + # assumes lib.is_scalar(val) + if lib.is_float(val) and val.is_integer(): + if warn_float: + warnings.warn( + "Indexing with a float is deprecated, and will raise an IndexError " + "in pandas 2.0. You can manually convert to an integer key instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return int(val) + return val + + +def not_none(*args): + """ + Returns a generator consisting of the arguments that are not None. + """ + return (arg for arg in args if arg is not None) + + +def any_none(*args) -> bool: + """ + Returns a boolean indicating if any argument is None. + """ + return any(arg is None for arg in args) + + +def all_none(*args) -> bool: + """ + Returns a boolean indicating if all arguments are None. + """ + return all(arg is None for arg in args) + + +def any_not_none(*args) -> bool: + """ + Returns a boolean indicating if any argument is not None. + """ + return any(arg is not None for arg in args) + + +def all_not_none(*args) -> bool: + """ + Returns a boolean indicating if all arguments are not None. + """ + return all(arg is not None for arg in args) + + +def count_not_none(*args) -> int: + """ + Returns the count of arguments that are not None. + """ + return sum(x is not None for x in args) + + +@overload +def asarray_tuplesafe( + values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ... +) -> np.ndarray: + # ExtensionArray can only be returned when values is an Index, all other iterables + # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type + # signature, so instead we special-case some common types. + ... + + +@overload +def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: + ... + + +def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: + + if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): + values = list(values) + elif isinstance(values, ABCIndex): + return values._values + + if isinstance(values, list) and dtype in [np.object_, object]: + return construct_1d_object_array_from_listlike(values) + + try: + with warnings.catch_warnings(): + # Can remove warning filter once NumPy 1.24 is min version + warnings.simplefilter("ignore", np.VisibleDeprecationWarning) + result = np.asarray(values, dtype=dtype) + except ValueError: + # Using try/except since it's more performant than checking is_list_like + # over each element + # error: Argument 1 to "construct_1d_object_array_from_listlike" + # has incompatible type "Iterable[Any]"; expected "Sized" + return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] + + if issubclass(result.dtype.type, str): + result = np.asarray(values, dtype=object) + + if result.ndim == 2: + # Avoid building an array of arrays: + values = [tuple(x) for x in values] + result = construct_1d_object_array_from_listlike(values) + + return result + + +def index_labels_to_array( + labels: np.ndarray | Iterable, dtype: NpDtype | None = None +) -> np.ndarray: + """ + Transform label or iterable of labels to array, for use in Index. + + Parameters + ---------- + dtype : dtype + If specified, use as dtype of the resulting array, otherwise infer. + + Returns + ------- + array + """ + if isinstance(labels, (str, tuple)): + labels = [labels] + + if not isinstance(labels, (list, np.ndarray)): + try: + labels = list(labels) + except TypeError: # non-iterable + labels = [labels] + + labels = asarray_tuplesafe(labels, dtype=dtype) + + return labels + + +def maybe_make_list(obj): + if obj is not None and not isinstance(obj, (tuple, list)): + return [obj] + return obj + + +def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: + """ + If obj is Iterable but not list-like, consume into list. + """ + if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): + return list(obj) + obj = cast(Collection, obj) + return obj + + +def is_null_slice(obj) -> bool: + """ + We have a null slice. + """ + return ( + isinstance(obj, slice) + and obj.start is None + and obj.stop is None + and obj.step is None + ) + + +def is_true_slices(line) -> list[bool]: + """ + Find non-trivial slices in "line": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in line] + + +# TODO: used only once in indexing; belongs elsewhere? +def is_full_slice(obj, line: int) -> bool: + """ + We have a full length slice. + """ + return ( + isinstance(obj, slice) + and obj.start == 0 + and obj.stop == line + and obj.step is None + ) + + +def get_callable_name(obj): + # typical case has name + if hasattr(obj, "__name__"): + return getattr(obj, "__name__") + # some objects don't; could recurse + if isinstance(obj, partial): + return get_callable_name(obj.func) + # fall back to class name + if callable(obj): + return type(obj).__name__ + # everything failed (probably because the argument + # wasn't actually callable); we return None + # instead of the empty string in this case to allow + # distinguishing between no name and a name of '' + return None + + +def apply_if_callable(maybe_callable, obj, **kwargs): + """ + Evaluate possibly callable input using obj and kwargs if it is callable, + otherwise return as it is. + + Parameters + ---------- + maybe_callable : possibly a callable + obj : NDFrame + **kwargs + """ + if callable(maybe_callable): + return maybe_callable(obj, **kwargs) + + return maybe_callable + + +def standardize_mapping(into): + """ + Helper function to standardize a supplied mapping. + + Parameters + ---------- + into : instance or subclass of collections.abc.Mapping + Must be a class, an initialized collections.defaultdict, + or an instance of a collections.abc.Mapping subclass. + + Returns + ------- + mapping : a collections.abc.Mapping subclass or other constructor + a callable object that can accept an iterator to create + the desired Mapping. + + See Also + -------- + DataFrame.to_dict + Series.to_dict + """ + if not inspect.isclass(into): + if isinstance(into, defaultdict): + return partial(defaultdict, into.default_factory) + into = type(into) + if not issubclass(into, abc.Mapping): + raise TypeError(f"unsupported type: {into}") + elif into == defaultdict: + raise TypeError("to_dict() only accepts initialized defaultdicts") + return into + + +@overload +def random_state(state: np.random.Generator) -> np.random.Generator: + ... + + +@overload +def random_state( + state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None, +) -> np.random.RandomState: + ... + + +def random_state(state: RandomState | None = None): + """ + Helper function for processing random_state arguments. + + Parameters + ---------- + state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. + If receives an int, array-like, or BitGenerator, passes to + np.random.RandomState() as seed. + If receives an np.random RandomState or Generator, just returns that unchanged. + If receives `None`, returns np.random. + If receives anything else, raises an informative ValueError. + + .. versionchanged:: 1.1.0 + + array-like and BitGenerator object now passed to np.random.RandomState() + as seed + + Default None. + + Returns + ------- + np.random.RandomState or np.random.Generator. If state is None, returns np.random + + """ + if ( + is_integer(state) + or is_array_like(state) + or isinstance(state, np.random.BitGenerator) + ): + # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, + # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected + # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], + # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], + # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, + # integer[Any]]]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, + # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], + # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, + # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], + # BitGenerator]" + return np.random.RandomState(state) # type: ignore[arg-type] + elif isinstance(state, np.random.RandomState): + return state + elif isinstance(state, np.random.Generator): + return state + elif state is None: + return np.random + else: + raise ValueError( + "random_state must be an integer, array-like, a BitGenerator, Generator, " + "a numpy RandomState, or None" + ) + + +def pipe( + obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs +) -> T: + """ + Apply a function ``func`` to object ``obj`` either by passing obj as the + first argument to the function or, in the case that the func is a tuple, + interpret the first element of the tuple as a function and pass the obj to + that function as a keyword argument whose key is the value of the second + element of the tuple. + + Parameters + ---------- + func : callable or tuple of (callable, str) + Function to apply to this object or, alternatively, a + ``(callable, data_keyword)`` tuple where ``data_keyword`` is a + string indicating the keyword of ``callable`` that expects the + object. + *args : iterable, optional + Positional arguments passed into ``func``. + **kwargs : dict, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + """ + if isinstance(func, tuple): + func, target = func + if target in kwargs: + msg = f"{target} is both the pipe target and a keyword argument" + raise ValueError(msg) + kwargs[target] = obj + return func(*args, **kwargs) + else: + return func(obj, *args, **kwargs) + + +def get_rename_function(mapper): + """ + Returns a function that will map names/labels, dependent if mapper + is a dict, Series or just a function. + """ + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + + return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper + + +def convert_to_list_like( + values: Hashable | Iterable | AnyArrayLike, +) -> list | AnyArrayLike: + """ + Convert list-like or scalar input to list-like. List, numpy and pandas array-like + inputs are returned unmodified whereas others are converted to list. + """ + if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): + return values + elif isinstance(values, abc.Iterable) and not isinstance(values, str): + return list(values) + + return [values] + + +@contextlib.contextmanager +def temp_setattr(obj, attr: str, value) -> Iterator[None]: + """Temporarily set attribute on an object. + + Args: + obj: Object whose attribute will be modified. + attr: Attribute to modify. + value: Value to temporarily set attribute to. + + Yields: + obj with modified attribute. + """ + old_value = getattr(obj, attr) + setattr(obj, attr, value) + try: + yield obj + finally: + setattr(obj, attr, old_value) + + +def require_length_match(data, index: Index) -> None: + """ + Check the length of data matches the length of the index. + """ + if len(data) != len(index): + raise ValueError( + "Length of values " + f"({len(data)}) " + "does not match length of index " + f"({len(index)})" + ) + + +# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, +# whereas np.min and np.max (which directly call obj.min and obj.max) +# default to axis=None. +_builtin_table = { + builtins.sum: np.sum, + builtins.max: np.maximum.reduce, + builtins.min: np.minimum.reduce, +} + +_cython_table = { + builtins.sum: "sum", + builtins.max: "max", + builtins.min: "min", + np.all: "all", + np.any: "any", + np.sum: "sum", + np.nansum: "sum", + np.mean: "mean", + np.nanmean: "mean", + np.prod: "prod", + np.nanprod: "prod", + np.std: "std", + np.nanstd: "std", + np.var: "var", + np.nanvar: "var", + np.median: "median", + np.nanmedian: "median", + np.max: "max", + np.nanmax: "max", + np.min: "min", + np.nanmin: "min", + np.cumprod: "cumprod", + np.nancumprod: "cumprod", + np.cumsum: "cumsum", + np.nancumsum: "cumsum", +} + + +def get_cython_func(arg: Callable) -> str | None: + """ + if we define an internal function for this argument, return it + """ + return _cython_table.get(arg) + + +def is_builtin_func(arg): + """ + if we define a builtin function for this argument, return it, + otherwise return the arg + """ + return _builtin_table.get(arg, arg) + + +def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: + """ + If a name is missing then replace it by level_n, where n is the count + + .. versionadded:: 1.4.0 + + Parameters + ---------- + names : list-like + list of column names or None values. + + Returns + ------- + list + list of column names with the None values replaced. + """ + return [f"level_{i}" if name is None else name for i, name in enumerate(names)] + + +def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool: + """Determine the Boolean value of numeric_only. + + See GH#46560 for details on the deprecation. + + Parameters + ---------- + numeric_only : bool, None, or lib.no_default + Value passed to the method. + + Returns + ------- + Resolved value of numeric_only. + """ + if numeric_only is lib.no_default: + # Methods that behave like numeric_only=True and only got the numeric_only + # arg in 1.5.0 default to lib.no_default + result = True + elif numeric_only is None: + # Methods that had the numeric_only arg prior to 1.5.0 and try all columns + # first default to None + result = False + else: + result = numeric_only + return result + + +def deprecate_numeric_only_default( + cls: type, name: str, deprecate_none: bool = False +) -> None: + """Emit FutureWarning message for deprecation of numeric_only. + + See GH#46560 for details on the deprecation. + + Parameters + ---------- + cls : type + pandas type that is generating the warning. + name : str + Name of the method that is generating the warning. + deprecate_none : bool, default False + Whether to also warn about the deprecation of specifying ``numeric_only=None``. + """ + if name in ["all", "any"]: + arg_name = "bool_only" + else: + arg_name = "numeric_only" + + msg = ( + f"The default value of {arg_name} in {cls.__name__}.{name} is " + "deprecated. In a future version, it will default to False. " + ) + if deprecate_none: + msg += f"In addition, specifying '{arg_name}=None' is deprecated. " + msg += ( + f"Select only valid columns or specify the value of {arg_name} to silence " + "this warning." + ) + + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) diff --git a/pandas/core/computation/__init__.py b/pandas/core/computation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py new file mode 100644 index 00000000..2e7a0f84 --- /dev/null +++ b/pandas/core/computation/align.py @@ -0,0 +1,214 @@ +""" +Core eval alignment algorithms. +""" +from __future__ import annotations + +from functools import ( + partial, + wraps, +) +from typing import ( + TYPE_CHECKING, + Callable, + Sequence, +) +import warnings + +import numpy as np + +from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.computation.common import result_type_many + +if TYPE_CHECKING: + from pandas._typing import F + + from pandas.core.generic import NDFrame + from pandas.core.indexes.api import Index + + +def _align_core_single_unary_op( + term, +) -> tuple[partial | type[NDFrame], dict[str, Index] | None]: + + typ: partial | type[NDFrame] + axes: dict[str, Index] | None = None + + if isinstance(term.value, np.ndarray): + typ = partial(np.asanyarray, dtype=term.value.dtype) + else: + typ = type(term.value) + if hasattr(term.value, "axes"): + axes = _zip_axes_from_type(typ, term.value.axes) + + return typ, axes + + +def _zip_axes_from_type( + typ: type[NDFrame], new_axes: Sequence[Index] +) -> dict[str, Index]: + return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} + + +def _any_pandas_objects(terms) -> bool: + """ + Check a sequence of terms for instances of PandasObject. + """ + return any(isinstance(term.value, PandasObject) for term in terms) + + +def _filter_special_cases(f) -> Callable[[F], F]: + @wraps(f) + def wrapper(terms): + # single unary operand + if len(terms) == 1: + return _align_core_single_unary_op(terms[0]) + + term_values = (term.value for term in terms) + + # we don't have any pandas objects + if not _any_pandas_objects(terms): + return result_type_many(*term_values), None + + return f(terms) + + return wrapper + + +@_filter_special_cases +def _align_core(terms): + term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")] + term_dims = [terms[i].value.ndim for i in term_index] + + from pandas import Series + + ndims = Series(dict(zip(term_index, term_dims))) + + # initial axes are the axes of the largest-axis'd term + biggest = terms[ndims.idxmax()].value + typ = biggest._constructor + axes = biggest.axes + naxes = len(axes) + gt_than_one_axis = naxes > 1 + + for value in (terms[i].value for i in term_index): + is_series = isinstance(value, ABCSeries) + is_series_and_gt_one_axis = is_series and gt_than_one_axis + + for axis, items in enumerate(value.axes): + if is_series_and_gt_one_axis: + ax, itm = naxes - 1, value.index + else: + ax, itm = axis, items + + if not axes[ax].is_(itm): + axes[ax] = axes[ax].join(itm, how="outer") + + for i, ndim in ndims.items(): + for axis, items in zip(range(ndim), axes): + ti = terms[i].value + + if hasattr(ti, "reindex"): + transpose = isinstance(ti, ABCSeries) and naxes > 1 + reindexer = axes[naxes - 1] if transpose else items + + term_axis_size = len(ti.axes[axis]) + reindexer_size = len(reindexer) + + ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) + if ordm >= 1 and reindexer_size >= 10000: + w = ( + f"Alignment difference on axis {axis} is larger " + f"than an order of magnitude on term {repr(terms[i].name)}, " + f"by more than {ordm:.4g}; performance may suffer." + ) + warnings.warn( + w, category=PerformanceWarning, stacklevel=find_stack_level() + ) + + f = partial(ti.reindex, reindexer, axis=axis, copy=False) + + terms[i].update(f()) + + terms[i].update(terms[i].value.values) + + return typ, _zip_axes_from_type(typ, axes) + + +def align_terms(terms): + """ + Align a set of terms. + """ + try: + # flatten the parse tree (a nested list, really) + terms = list(com.flatten(terms)) + except TypeError: + # can't iterate so it must just be a constant or single variable + if isinstance(terms.value, (ABCSeries, ABCDataFrame)): + typ = type(terms.value) + return typ, _zip_axes_from_type(typ, terms.value.axes) + return np.result_type(terms.type), None + + # if all resolved variables are numeric scalars + if all(term.is_scalar for term in terms): + return result_type_many(*(term.value for term in terms)).type, None + + # perform the main alignment + typ, axes = _align_core(terms) + return typ, axes + + +def reconstruct_object(typ, obj, axes, dtype): + """ + Reconstruct an object given its type, raw value, and possibly empty + (None) axes. + + Parameters + ---------- + typ : object + A type + obj : object + The value to use in the type constructor + axes : dict + The axes to use to construct the resulting pandas object + + Returns + ------- + ret : typ + An object of type ``typ`` with the value `obj` and possible axes + `axes`. + """ + try: + typ = typ.type + except AttributeError: + pass + + res_t = np.result_type(obj.dtype, dtype) + + if not isinstance(typ, partial) and issubclass(typ, PandasObject): + return typ(obj, dtype=res_t, **axes) + + # special case for pathological things like ~True/~False + if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_: + ret_value = res_t.type(obj) + else: + ret_value = typ(obj).astype(res_t) + # The condition is to distinguish 0-dim array (returned in case of + # scalar) and 1 element array + # e.g. np.array(0) and np.array([0]) + if ( + len(obj.shape) == 1 + and len(obj) == 1 + and not isinstance(ret_value, np.ndarray) + ): + ret_value = np.array([ret_value]).astype(res_t) + + return ret_value diff --git a/pandas/core/computation/api.py b/pandas/core/computation/api.py new file mode 100644 index 00000000..bd3be5b3 --- /dev/null +++ b/pandas/core/computation/api.py @@ -0,0 +1,2 @@ +__all__ = ["eval"] +from pandas.core.computation.eval import eval diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py new file mode 100644 index 00000000..3221b158 --- /dev/null +++ b/pandas/core/computation/check.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from pandas.compat._optional import import_optional_dependency + +ne = import_optional_dependency("numexpr", errors="warn") +NUMEXPR_INSTALLED = ne is not None +if NUMEXPR_INSTALLED: + NUMEXPR_VERSION = ne.__version__ +else: + NUMEXPR_VERSION = None + +__all__ = ["NUMEXPR_INSTALLED", "NUMEXPR_VERSION"] diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py new file mode 100644 index 00000000..a1ac3dfa --- /dev/null +++ b/pandas/core/computation/common.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from functools import reduce + +import numpy as np + +from pandas._config import get_option + + +def ensure_decoded(s) -> str: + """ + If we have bytes, decode them to unicode. + """ + if isinstance(s, (np.bytes_, bytes)): + s = s.decode(get_option("display.encoding")) + return s + + +def result_type_many(*arrays_and_dtypes): + """ + Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32) + argument limit. + """ + try: + return np.result_type(*arrays_and_dtypes) + except ValueError: + # we have > NPY_MAXARGS terms in our expression + return reduce(np.result_type, arrays_and_dtypes) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py new file mode 100644 index 00000000..2ea5a536 --- /dev/null +++ b/pandas/core/computation/engines.py @@ -0,0 +1,144 @@ +""" +Engine classes for :func:`~pandas.eval` +""" +from __future__ import annotations + +import abc +from typing import TYPE_CHECKING + +from pandas.errors import NumExprClobberingError + +from pandas.core.computation.align import ( + align_terms, + reconstruct_object, +) +from pandas.core.computation.ops import ( + MATHOPS, + REDUCTIONS, +) + +import pandas.io.formats.printing as printing + +if TYPE_CHECKING: + from pandas.core.computation.expr import Expr + +_ne_builtins = frozenset(MATHOPS + REDUCTIONS) + + +def _check_ne_builtin_clash(expr: Expr) -> None: + """ + Attempt to prevent foot-shooting in a helpful way. + + Parameters + ---------- + expr : Expr + Terms can contain + """ + names = expr.names + overlap = names & _ne_builtins + + if overlap: + s = ", ".join([repr(x) for x in overlap]) + raise NumExprClobberingError( + f'Variables in expression "{expr}" overlap with builtins: ({s})' + ) + + +class AbstractEngine(metaclass=abc.ABCMeta): + """Object serving as a base class for all engines.""" + + has_neg_frac = False + + def __init__(self, expr) -> None: + self.expr = expr + self.aligned_axes = None + self.result_type = None + + def convert(self) -> str: + """ + Convert an expression for evaluation. + + Defaults to return the expression as a string. + """ + return printing.pprint_thing(self.expr) + + def evaluate(self) -> object: + """ + Run the engine on the expression. + + This method performs alignment which is necessary no matter what engine + is being used, thus its implementation is in the base class. + + Returns + ------- + object + The result of the passed expression. + """ + if not self._is_aligned: + self.result_type, self.aligned_axes = align_terms(self.expr.terms) + + # make sure no names in resolvers and locals/globals clash + res = self._evaluate() + return reconstruct_object( + self.result_type, res, self.aligned_axes, self.expr.terms.return_type + ) + + @property + def _is_aligned(self) -> bool: + return self.aligned_axes is not None and self.result_type is not None + + @abc.abstractmethod + def _evaluate(self): + """ + Return an evaluated expression. + + Parameters + ---------- + env : Scope + The local and global environment in which to evaluate an + expression. + + Notes + ----- + Must be implemented by subclasses. + """ + pass + + +class NumExprEngine(AbstractEngine): + """NumExpr engine class""" + + has_neg_frac = True + + def _evaluate(self): + import numexpr as ne + + # convert the expression to a valid numexpr expression + s = self.convert() + + env = self.expr.env + scope = env.full_scope + _check_ne_builtin_clash(self.expr) + return ne.evaluate(s, local_dict=scope) + + +class PythonEngine(AbstractEngine): + """ + Evaluate an expression in Python space. + + Mostly for testing purposes. + """ + + has_neg_frac = False + + def evaluate(self): + return self.expr() + + def _evaluate(self) -> None: + pass + + +ENGINES: dict[str, type[AbstractEngine]] = { + "numexpr": NumExprEngine, + "python": PythonEngine, +} diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py new file mode 100644 index 00000000..f833b59a --- /dev/null +++ b/pandas/core/computation/eval.py @@ -0,0 +1,414 @@ +""" +Top level ``eval`` module. +""" +from __future__ import annotations + +import tokenize +from typing import TYPE_CHECKING +import warnings + +from pandas._libs.lib import no_default +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.computation.engines import ENGINES +from pandas.core.computation.expr import ( + PARSERS, + Expr, +) +from pandas.core.computation.parsing import tokenize_string +from pandas.core.computation.scope import ensure_scope +from pandas.core.generic import NDFrame + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas.core.computation.ops import BinOp + + +def _check_engine(engine: str | None) -> str: + """ + Make sure a valid engine is passed. + + Parameters + ---------- + engine : str + String to validate. + + Raises + ------ + KeyError + * If an invalid engine is passed. + ImportError + * If numexpr was requested but doesn't exist. + + Returns + ------- + str + Engine name. + """ + from pandas.core.computation.check import NUMEXPR_INSTALLED + from pandas.core.computation.expressions import USE_NUMEXPR + + if engine is None: + engine = "numexpr" if USE_NUMEXPR else "python" + + if engine not in ENGINES: + valid_engines = list(ENGINES.keys()) + raise KeyError( + f"Invalid engine '{engine}' passed, valid engines are {valid_engines}" + ) + + # TODO: validate this in a more general way (thinking of future engines + # that won't necessarily be import-able) + # Could potentially be done on engine instantiation + if engine == "numexpr" and not NUMEXPR_INSTALLED: + raise ImportError( + "'numexpr' is not installed or an unsupported version. Cannot use " + "engine='numexpr' for query/eval if 'numexpr' is not installed" + ) + + return engine + + +def _check_parser(parser: str): + """ + Make sure a valid parser is passed. + + Parameters + ---------- + parser : str + + Raises + ------ + KeyError + * If an invalid parser is passed + """ + if parser not in PARSERS: + raise KeyError( + f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}" + ) + + +def _check_resolvers(resolvers): + if resolvers is not None: + for resolver in resolvers: + if not hasattr(resolver, "__getitem__"): + name = type(resolver).__name__ + raise TypeError( + f"Resolver of type '{name}' does not " + "implement the __getitem__ method" + ) + + +def _check_expression(expr): + """ + Make sure an expression is not an empty string + + Parameters + ---------- + expr : object + An object that can be converted to a string + + Raises + ------ + ValueError + * If expr is an empty string + """ + if not expr: + raise ValueError("expr cannot be an empty string") + + +def _convert_expression(expr) -> str: + """ + Convert an object to an expression. + + This function converts an object to an expression (a unicode string) and + checks to make sure it isn't empty after conversion. This is used to + convert operators to their string representation for recursive calls to + :func:`~pandas.eval`. + + Parameters + ---------- + expr : object + The object to be converted to a string. + + Returns + ------- + str + The string representation of an object. + + Raises + ------ + ValueError + * If the expression is empty. + """ + s = pprint_thing(expr) + _check_expression(s) + return s + + +def _check_for_locals(expr: str, stack_level: int, parser: str): + + at_top_of_stack = stack_level == 0 + not_pandas_parser = parser != "pandas" + + if not_pandas_parser: + msg = "The '@' prefix is only supported by the pandas parser" + elif at_top_of_stack: + msg = ( + "The '@' prefix is not allowed in top-level eval calls.\n" + "please refer to your variables by name without the '@' prefix." + ) + + if at_top_of_stack or not_pandas_parser: + for toknum, tokval in tokenize_string(expr): + if toknum == tokenize.OP and tokval == "@": + raise SyntaxError(msg) + + +def eval( + expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users + parser: str = "pandas", + engine: str | None = None, + truediv=no_default, + local_dict=None, + global_dict=None, + resolvers=(), + level=0, + target=None, + inplace=False, +): + """ + Evaluate a Python expression as a string using various backends. + + The following arithmetic operations are supported: ``+``, ``-``, ``*``, + ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following + boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). + Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, + :keyword:`or`, and :keyword:`not` with the same semantics as the + corresponding bitwise operators. :class:`~pandas.Series` and + :class:`~pandas.DataFrame` objects are supported and behave as they would + with plain ol' Python evaluation. + + Parameters + ---------- + expr : str + The expression to evaluate. This string cannot contain any Python + `statements + `__, + only Python `expressions + `__. + parser : {'pandas', 'python'}, default 'pandas' + The parser to use to construct the syntax tree from the expression. The + default of ``'pandas'`` parses code slightly different than standard + Python. Alternatively, you can parse an expression using the + ``'python'`` parser to retain strict Python semantics. See the + :ref:`enhancing performance ` documentation for + more details. + engine : {'python', 'numexpr'}, default 'numexpr' + + The engine used to evaluate the expression. Supported engines are + + - None : tries to use ``numexpr``, falls back to ``python`` + - ``'numexpr'`` : This default engine evaluates pandas objects using + numexpr for large speed ups in complex expressions with large frames. + - ``'python'`` : Performs operations as if you had ``eval``'d in top + level python. This engine is generally not that useful. + + More backends may be available in the future. + + truediv : bool, optional + Whether to use true division, like in Python >= 3. + + .. deprecated:: 1.0.0 + + local_dict : dict or None, optional + A dictionary of local variables, taken from locals() by default. + global_dict : dict or None, optional + A dictionary of global variables, taken from globals() by default. + resolvers : list of dict-like or None, optional + A list of objects implementing the ``__getitem__`` special method that + you can use to inject an additional collection of namespaces to use for + variable lookup. For example, this is used in the + :meth:`~DataFrame.query` method to inject the + ``DataFrame.index`` and ``DataFrame.columns`` + variables that refer to their respective :class:`~pandas.DataFrame` + instance attributes. + level : int, optional + The number of prior stack frames to traverse and add to the current + scope. Most users will **not** need to change this parameter. + target : object, optional, default None + This is the target object for assignment. It is used when there is + variable assignment in the expression. If so, then `target` must + support item assignment with string keys, and if a copy is being + returned, it must also support `.copy()`. + inplace : bool, default False + If `target` is provided, and the expression mutates `target`, whether + to modify `target` inplace. Otherwise, return a copy of `target` with + the mutation. + + Returns + ------- + ndarray, numeric scalar, DataFrame, Series, or None + The completion value of evaluating the given code or None if ``inplace=True``. + + Raises + ------ + ValueError + There are many instances where such an error can be raised: + + - `target=None`, but the expression is multiline. + - The expression is multiline, but not all them have item assignment. + An example of such an arrangement is this: + + a = b + 1 + a + 2 + + Here, there are expressions on different lines, making it multiline, + but the last line has no variable assigned to the output of `a + 2`. + - `inplace=True`, but the expression is missing item assignment. + - Item assignment is provided, but the `target` does not support + string item assignment. + - Item assignment is provided and `inplace=False`, but the `target` + does not support the `.copy()` method + + See Also + -------- + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. + + Notes + ----- + The ``dtype`` of any objects involved in an arithmetic ``%`` operation are + recursively cast to ``float64``. + + See the :ref:`enhancing performance ` documentation for + more details. + + Examples + -------- + >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) + >>> df + animal age + 0 dog 10 + 1 pig 20 + + We can add a new column using ``pd.eval``: + + >>> pd.eval("double_age = df.age * 2", target=df) + animal age double_age + 0 dog 10 20 + 1 pig 20 40 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + if truediv is not no_default: + warnings.warn( + ( + "The `truediv` parameter in pd.eval is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + + exprs: list[str | BinOp] + if isinstance(expr, str): + _check_expression(expr) + exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] + else: + # ops.BinOp; for internal compat, not intended to be passed by users + exprs = [expr] + multi_line = len(exprs) > 1 + + if multi_line and target is None: + raise ValueError( + "multi-line expressions are only valid in the " + "context of data, use DataFrame.eval" + ) + engine = _check_engine(engine) + _check_parser(parser) + _check_resolvers(resolvers) + + ret = None + first_expr = True + target_modified = False + + for expr in exprs: + expr = _convert_expression(expr) + _check_for_locals(expr, level, parser) + + # get our (possibly passed-in) scope + env = ensure_scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) + + # construct the engine and evaluate the parsed expression + eng = ENGINES[engine] + eng_inst = eng(parsed_expr) + ret = eng_inst.evaluate() + + if parsed_expr.assigner is None: + if multi_line: + raise ValueError( + "Multi-line expressions are only valid " + "if all expressions contain an assignment" + ) + elif inplace: + raise ValueError("Cannot operate inplace if there is no assignment") + + # assign if needed + assigner = parsed_expr.assigner + if env.target is not None and assigner is not None: + target_modified = True + + # if returning a copy, copy only on the first assignment + if not inplace and first_expr: + try: + target = env.target.copy() + except AttributeError as err: + raise ValueError("Cannot return a copy of the target") from err + else: + target = env.target + + # TypeError is most commonly raised (e.g. int, list), but you + # get IndexError if you try to do this assignment on np.ndarray. + # we will ignore numpy warnings here; e.g. if trying + # to use a non-numeric indexer + try: + with warnings.catch_warnings(record=True): + # TODO: Filter the warnings we actually care about here. + if inplace and isinstance(target, NDFrame): + target.loc[:, assigner] = ret + else: + target[assigner] = ret + except (TypeError, IndexError) as err: + raise ValueError("Cannot assign expression output to target") from err + + if not resolvers: + resolvers = ({assigner: ret},) + else: + # existing resolver needs updated to handle + # case of mutating existing column in copy + for resolver in resolvers: + if assigner in resolver: + resolver[assigner] = ret + break + else: + resolvers += ({assigner: ret},) + + ret = None + first_expr = False + + # We want to exclude `inplace=None` as being False. + if inplace is False: + return target if target_modified else ret diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py new file mode 100644 index 00000000..ff3f259f --- /dev/null +++ b/pandas/core/computation/expr.py @@ -0,0 +1,844 @@ +""" +:func:`~pandas.eval` parsers. +""" +from __future__ import annotations + +import ast +from functools import ( + partial, + reduce, +) +from keyword import iskeyword +import tokenize +from typing import ( + Callable, + TypeVar, +) + +import numpy as np + +from pandas.compat import PY39 +from pandas.errors import UndefinedVariableError + +import pandas.core.common as com +from pandas.core.computation.ops import ( + ARITH_OPS_SYMS, + BOOL_OPS_SYMS, + CMP_OPS_SYMS, + LOCAL_TAG, + MATHOPS, + REDUCTIONS, + UNARY_OPS_SYMS, + BinOp, + Constant, + Div, + FuncNode, + Op, + Term, + UnaryOp, + is_term, +) +from pandas.core.computation.parsing import ( + clean_backtick_quoted_toks, + tokenize_string, +) +from pandas.core.computation.scope import Scope + +import pandas.io.formats.printing as printing + + +def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]: + """ + Rewrite the assignment operator for PyTables expressions that use ``=`` + as a substitute for ``==``. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + return toknum, "==" if tokval == "=" else tokval + + +def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]: + """ + Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise + precedence is changed to boolean precedence. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == tokenize.OP: + if tokval == "&": + return tokenize.NAME, "and" + elif tokval == "|": + return tokenize.NAME, "or" + return toknum, tokval + return toknum, tokval + + +def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]: + """ + Replace local variables with a syntactically valid name. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tuple of int, str + Either the input or token or the replacement values + + Notes + ----- + This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as + ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_`` + is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it. + """ + toknum, tokval = tok + if toknum == tokenize.OP and tokval == "@": + return tokenize.OP, LOCAL_TAG + return toknum, tokval + + +def _compose2(f, g): + """ + Compose 2 callables. + """ + return lambda *args, **kwargs: f(g(*args, **kwargs)) + + +def _compose(*funcs): + """ + Compose 2 or more callables. + """ + assert len(funcs) > 1, "At least 2 callables must be passed to compose" + return reduce(_compose2, funcs) + + +def _preparse( + source: str, + f=_compose( + _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks + ), +) -> str: + """ + Compose a collection of tokenization functions. + + Parameters + ---------- + source : str + A Python source code string + f : callable + This takes a tuple of (toknum, tokval) as its argument and returns a + tuple with the same structure but possibly different elements. Defaults + to the composition of ``_rewrite_assign``, ``_replace_booleans``, and + ``_replace_locals``. + + Returns + ------- + str + Valid Python source code + + Notes + ----- + The `f` parameter can be any callable that takes *and* returns input of the + form ``(toknum, tokval)``, where ``toknum`` is one of the constants from + the ``tokenize`` module and ``tokval`` is a string. + """ + assert callable(f), "f must be callable" + return tokenize.untokenize(f(x) for x in tokenize_string(source)) + + +def _is_type(t): + """ + Factory for a type checking function of type ``t`` or tuple of types. + """ + return lambda x: isinstance(x.value, t) + + +_is_list = _is_type(list) +_is_str = _is_type(str) + + +# partition all AST nodes +_all_nodes = frozenset( + node + for node in (getattr(ast, name) for name in dir(ast)) + if isinstance(node, type) and issubclass(node, ast.AST) +) + + +def _filter_nodes(superclass, all_nodes=_all_nodes): + """ + Filter out AST nodes that are subclasses of ``superclass``. + """ + node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass)) + return frozenset(node_names) + + +_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes)) +_mod_nodes = _filter_nodes(ast.mod) +_stmt_nodes = _filter_nodes(ast.stmt) +_expr_nodes = _filter_nodes(ast.expr) +_expr_context_nodes = _filter_nodes(ast.expr_context) +_boolop_nodes = _filter_nodes(ast.boolop) +_operator_nodes = _filter_nodes(ast.operator) +_unary_op_nodes = _filter_nodes(ast.unaryop) +_cmp_op_nodes = _filter_nodes(ast.cmpop) +_comprehension_nodes = _filter_nodes(ast.comprehension) +_handler_nodes = _filter_nodes(ast.excepthandler) +_arguments_nodes = _filter_nodes(ast.arguments) +_keyword_nodes = _filter_nodes(ast.keyword) +_alias_nodes = _filter_nodes(ast.alias) + +if not PY39: + _slice_nodes = _filter_nodes(ast.slice) + + +# nodes that we don't support directly but are needed for parsing +_hacked_nodes = frozenset(["Assign", "Module", "Expr"]) + + +_unsupported_expr_nodes = frozenset( + [ + "Yield", + "GeneratorExp", + "IfExp", + "DictComp", + "SetComp", + "Repr", + "Lambda", + "Set", + "AST", + "Is", + "IsNot", + ] +) + +# these nodes are low priority or won't ever be supported (e.g., AST) +_unsupported_nodes = ( + _stmt_nodes + | _mod_nodes + | _handler_nodes + | _arguments_nodes + | _keyword_nodes + | _alias_nodes + | _expr_context_nodes + | _unsupported_expr_nodes +) - _hacked_nodes + +# we're adding a different assignment in some cases to be equality comparison +# and we don't want `stmt` and friends in their so get only the class whose +# names are capitalized +_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes +intersection = _unsupported_nodes & _base_supported_nodes +_msg = f"cannot both support and not support {intersection}" +assert not intersection, _msg + + +def _node_not_implemented(node_name: str) -> Callable[..., None]: + """ + Return a function that raises a NotImplementedError with a passed node name. + """ + + def f(self, *args, **kwargs): + raise NotImplementedError(f"'{node_name}' nodes are not implemented") + + return f + + +# should be bound by BaseExprVisitor but that creates a circular dependency: +# _T is used in disallow, but disallow is used to define BaseExprVisitor +# https://github.com/microsoft/pyright/issues/2315 +_T = TypeVar("_T") + + +def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]: + """ + Decorator to disallow certain nodes from parsing. Raises a + NotImplementedError instead. + + Returns + ------- + callable + """ + + def disallowed(cls: type[_T]) -> type[_T]: + # error: "Type[_T]" has no attribute "unsupported_nodes" + cls.unsupported_nodes = () # type: ignore[attr-defined] + for node in nodes: + new_method = _node_not_implemented(node) + name = f"visit_{node}" + # error: "Type[_T]" has no attribute "unsupported_nodes" + cls.unsupported_nodes += (name,) # type: ignore[attr-defined] + setattr(cls, name, new_method) + return cls + + return disallowed + + +def _op_maker(op_class, op_symbol): + """ + Return a function to create an op class with its symbol already passed. + + Returns + ------- + callable + """ + + def f(self, node, *args, **kwargs): + """ + Return a partial function with an Op subclass with an operator already passed. + + Returns + ------- + callable + """ + return partial(op_class, op_symbol, *args, **kwargs) + + return f + + +_op_classes = {"binary": BinOp, "unary": UnaryOp} + + +def add_ops(op_classes): + """ + Decorator to add default implementation of ops. + """ + + def f(cls): + for op_attr_name, op_class in op_classes.items(): + ops = getattr(cls, f"{op_attr_name}_ops") + ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map") + for op in ops: + op_node = ops_map[op] + if op_node is not None: + made_op = _op_maker(op_class, op) + setattr(cls, f"visit_{op_node}", made_op) + return cls + + return f + + +@disallow(_unsupported_nodes) +@add_ops(_op_classes) +class BaseExprVisitor(ast.NodeVisitor): + """ + Custom ast walker. Parsers of other engines should subclass this class + if necessary. + + Parameters + ---------- + env : Scope + engine : str + parser : str + preparser : callable + """ + + const_type: type[Term] = Constant + term_type = Term + + binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS + binary_op_nodes = ( + "Gt", + "Lt", + "GtE", + "LtE", + "Eq", + "NotEq", + "In", + "NotIn", + "BitAnd", + "BitOr", + "And", + "Or", + "Add", + "Sub", + "Mult", + None, + "Pow", + "FloorDiv", + "Mod", + ) + binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes)) + + unary_ops = UNARY_OPS_SYMS + unary_op_nodes = "UAdd", "USub", "Invert", "Not" + unary_op_nodes_map = {k: v for k, v in zip(unary_ops, unary_op_nodes)} + + rewrite_map = { + ast.Eq: ast.In, + ast.NotEq: ast.NotIn, + ast.In: ast.In, + ast.NotIn: ast.NotIn, + } + + unsupported_nodes: tuple[str, ...] + + def __init__(self, env, engine, parser, preparser=_preparse) -> None: + self.env = env + self.engine = engine + self.parser = parser + self.preparser = preparser + self.assigner = None + + def visit(self, node, **kwargs): + if isinstance(node, str): + clean = self.preparser(node) + try: + node = ast.fix_missing_locations(ast.parse(clean)) + except SyntaxError as e: + if any(iskeyword(x) for x in clean.split()): + e.msg = "Python keyword not valid identifier in numexpr query" + raise e + + method = "visit_" + type(node).__name__ + visitor = getattr(self, method) + return visitor(node, **kwargs) + + def visit_Module(self, node, **kwargs): + if len(node.body) != 1: + raise SyntaxError("only a single expression is allowed") + expr = node.body[0] + return self.visit(expr, **kwargs) + + def visit_Expr(self, node, **kwargs): + return self.visit(node.value, **kwargs) + + def _rewrite_membership_op(self, node, left, right): + # the kind of the operator (is actually an instance) + op_instance = node.op + op_type = type(op_instance) + + # must be two terms and the comparison operator must be ==/!=/in/not in + if is_term(left) and is_term(right) and op_type in self.rewrite_map: + + left_list, right_list = map(_is_list, (left, right)) + left_str, right_str = map(_is_str, (left, right)) + + # if there are any strings or lists in the expression + if left_list or right_list or left_str or right_str: + op_instance = self.rewrite_map[op_type]() + + # pop the string variable out of locals and replace it with a list + # of one string, kind of a hack + if right_str: + name = self.env.add_tmp([right.value]) + right = self.term_type(name, self.env) + + if left_str: + name = self.env.add_tmp([left.value]) + left = self.term_type(name, self.env) + + op = self.visit(op_instance) + return op, op_instance, left, right + + def _maybe_transform_eq_ne(self, node, left=None, right=None): + if left is None: + left = self.visit(node.left, side="left") + if right is None: + right = self.visit(node.right, side="right") + op, op_class, left, right = self._rewrite_membership_op(node, left, right) + return op, op_class, left, right + + def _maybe_downcast_constants(self, left, right): + f32 = np.dtype(np.float32) + if ( + left.is_scalar + and hasattr(left, "value") + and not right.is_scalar + and right.return_type == f32 + ): + # right is a float32 array, left is a scalar + name = self.env.add_tmp(np.float32(left.value)) + left = self.term_type(name, self.env) + if ( + right.is_scalar + and hasattr(right, "value") + and not left.is_scalar + and left.return_type == f32 + ): + # left is a float32 array, right is a scalar + name = self.env.add_tmp(np.float32(right.value)) + right = self.term_type(name, self.env) + + return left, right + + def _maybe_eval(self, binop, eval_in_python): + # eval `in` and `not in` (for now) in "partial" python space + # things that can be evaluated in "eval" space will be turned into + # temporary variables. for example, + # [1,2] in a + 2 * b + # in that case a + 2 * b will be evaluated using numexpr, and the "in" + # call will be evaluated using isin (in python space) + return binop.evaluate( + self.env, self.engine, self.parser, self.term_type, eval_in_python + ) + + def _maybe_evaluate_binop( + self, + op, + op_class, + lhs, + rhs, + eval_in_python=("in", "not in"), + maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="), + ): + res = op(lhs, rhs) + + if res.has_invalid_return_type: + raise TypeError( + f"unsupported operand type(s) for {res.op}: " + f"'{lhs.type}' and '{rhs.type}'" + ) + + if self.engine != "pytables" and ( + res.op in CMP_OPS_SYMS + and getattr(lhs, "is_datetime", False) + or getattr(rhs, "is_datetime", False) + ): + # all date ops must be done in python bc numexpr doesn't work + # well with NaT + return self._maybe_eval(res, self.binary_ops) + + if res.op in eval_in_python: + # "in"/"not in" ops are always evaluated in python + return self._maybe_eval(res, eval_in_python) + elif self.engine != "pytables": + if ( + getattr(lhs, "return_type", None) == object + or getattr(rhs, "return_type", None) == object + ): + # evaluate "==" and "!=" in python if either of our operands + # has an object return type + return self._maybe_eval(res, eval_in_python + maybe_eval_in_python) + return res + + def visit_BinOp(self, node, **kwargs): + op, op_class, left, right = self._maybe_transform_eq_ne(node) + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) + + def visit_Div(self, node, **kwargs): + return lambda lhs, rhs: Div(lhs, rhs) + + def visit_UnaryOp(self, node, **kwargs): + op = self.visit(node.op) + operand = self.visit(node.operand) + return op(operand) + + def visit_Name(self, node, **kwargs): + return self.term_type(node.id, self.env, **kwargs) + + def visit_NameConstant(self, node, **kwargs) -> Term: + return self.const_type(node.value, self.env) + + def visit_Num(self, node, **kwargs) -> Term: + return self.const_type(node.n, self.env) + + def visit_Constant(self, node, **kwargs) -> Term: + return self.const_type(node.n, self.env) + + def visit_Str(self, node, **kwargs): + name = self.env.add_tmp(node.s) + return self.term_type(name, self.env) + + def visit_List(self, node, **kwargs): + name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts]) + return self.term_type(name, self.env) + + visit_Tuple = visit_List + + def visit_Index(self, node, **kwargs): + """df.index[4]""" + return self.visit(node.value) + + def visit_Subscript(self, node, **kwargs): + from pandas import eval as pd_eval + + value = self.visit(node.value) + slobj = self.visit(node.slice) + result = pd_eval( + slobj, local_dict=self.env, engine=self.engine, parser=self.parser + ) + try: + # a Term instance + v = value.value[result] + except AttributeError: + # an Op instance + lhs = pd_eval( + value, local_dict=self.env, engine=self.engine, parser=self.parser + ) + v = lhs[result] + name = self.env.add_tmp(v) + return self.term_type(name, env=self.env) + + def visit_Slice(self, node, **kwargs): + """df.index[slice(4,6)]""" + lower = node.lower + if lower is not None: + lower = self.visit(lower).value + upper = node.upper + if upper is not None: + upper = self.visit(upper).value + step = node.step + if step is not None: + step = self.visit(step).value + + return slice(lower, upper, step) + + def visit_Assign(self, node, **kwargs): + """ + support a single assignment node, like + + c = a + b + + set the assigner at the top level, must be a Name node which + might or might not exist in the resolvers + + """ + if len(node.targets) != 1: + raise SyntaxError("can only assign a single expression") + if not isinstance(node.targets[0], ast.Name): + raise SyntaxError("left hand side of an assignment must be a single name") + if self.env.target is None: + raise ValueError("cannot assign without a target object") + + try: + assigner = self.visit(node.targets[0], **kwargs) + except UndefinedVariableError: + assigner = node.targets[0].id + + self.assigner = getattr(assigner, "name", assigner) + if self.assigner is None: + raise SyntaxError( + "left hand side of an assignment must be a single resolvable name" + ) + + return self.visit(node.value, **kwargs) + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = node.ctx + if isinstance(ctx, ast.Load): + # resolve the value + resolved = self.visit(value).value + try: + v = getattr(resolved, attr) + name = self.env.add_tmp(v) + return self.term_type(name, self.env) + except AttributeError: + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + raise + + raise ValueError(f"Invalid Attribute context {type(ctx).__name__}") + + def visit_Call(self, node, side=None, **kwargs): + + if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__": + res = self.visit_Attribute(node.func) + elif not isinstance(node.func, ast.Name): + raise TypeError("Only named functions are supported") + else: + try: + res = self.visit(node.func) + except UndefinedVariableError: + # Check if this is a supported function name + try: + res = FuncNode(node.func.id) + except ValueError: + # Raise original error + raise + + if res is None: + # error: "expr" has no attribute "id" + raise ValueError( + f"Invalid function call {node.func.id}" # type: ignore[attr-defined] + ) + if hasattr(res, "value"): + res = res.value + + if isinstance(res, FuncNode): + + new_args = [self.visit(arg) for arg in node.args] + + if node.keywords: + raise TypeError( + f'Function "{res.name}" does not support keyword arguments' + ) + + return res(*new_args) + + else: + + new_args = [self.visit(arg).value for arg in node.args] + + for key in node.keywords: + if not isinstance(key, ast.keyword): + # error: "expr" has no attribute "id" + raise ValueError( + "keyword error in function call " # type: ignore[attr-defined] + f"'{node.func.id}'" + ) + + if key.arg: + kwargs[key.arg] = self.visit(key.value).value + + name = self.env.add_tmp(res(*new_args, **kwargs)) + return self.term_type(name=name, env=self.env) + + def translate_In(self, op): + return op + + def visit_Compare(self, node, **kwargs): + ops = node.ops + comps = node.comparators + + # base case: we have something like a CMP b + if len(comps) == 1: + op = self.translate_In(ops[0]) + binop = ast.BinOp(op=op, left=node.left, right=comps[0]) + return self.visit(binop) + + # recursive case: we have a chained comparison, a CMP b CMP c, etc. + left = node.left + values = [] + for op, comp in zip(ops, comps): + new_node = self.visit( + ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)]) + ) + left = comp + values.append(new_node) + return self.visit(ast.BoolOp(op=ast.And(), values=values)) + + def _try_visit_binop(self, bop): + if isinstance(bop, (Op, Term)): + return bop + return self.visit(bop) + + def visit_BoolOp(self, node, **kwargs): + def visitor(x, y): + lhs = self._try_visit_binop(x) + rhs = self._try_visit_binop(y) + + op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs) + return self._maybe_evaluate_binop(op, node.op, lhs, rhs) + + operands = node.values + return reduce(visitor, operands) + + +_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"]) +_numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS) + + +@disallow( + (_unsupported_nodes | _python_not_supported) + - (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"])) +) +class PandasExprVisitor(BaseExprVisitor): + def __init__( + self, + env, + engine, + parser, + preparser=partial( + _preparse, + f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks), + ), + ) -> None: + super().__init__(env, engine, parser, preparser) + + +@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"])) +class PythonExprVisitor(BaseExprVisitor): + def __init__( + self, env, engine, parser, preparser=lambda source, f=None: source + ) -> None: + super().__init__(env, engine, parser, preparser=preparser) + + +class Expr: + """ + Object encapsulating an expression. + + Parameters + ---------- + expr : str + engine : str, optional, default 'numexpr' + parser : str, optional, default 'pandas' + env : Scope, optional, default None + level : int, optional, default 2 + """ + + env: Scope + engine: str + parser: str + + def __init__( + self, + expr, + engine: str = "numexpr", + parser: str = "pandas", + env: Scope | None = None, + level: int = 0, + ) -> None: + self.expr = expr + self.env = env or Scope(level=level + 1) + self.engine = engine + self.parser = parser + self._visitor = PARSERS[parser](self.env, self.engine, self.parser) + self.terms = self.parse() + + @property + def assigner(self): + return getattr(self._visitor, "assigner", None) + + def __call__(self): + return self.terms(self.env) + + def __repr__(self) -> str: + return printing.pprint_thing(self.terms) + + def __len__(self) -> int: + return len(self.expr) + + def parse(self): + """ + Parse an expression. + """ + return self._visitor.visit(self.expr) + + @property + def names(self): + """ + Get the names in an expression. + """ + if is_term(self.terms): + return frozenset([self.terms.name]) + return frozenset(term.name for term in com.flatten(self.terms)) + + +PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor} diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py new file mode 100644 index 00000000..afb4d0d5 --- /dev/null +++ b/pandas/core/computation/expressions.py @@ -0,0 +1,286 @@ +""" +Expressions +----------- + +Offer fast expression evaluation through numexpr + +""" +from __future__ import annotations + +import operator +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._typing import FuncType +from pandas.util._exceptions import find_stack_level + +from pandas.core.computation.check import NUMEXPR_INSTALLED +from pandas.core.ops import roperator + +if NUMEXPR_INSTALLED: + import numexpr as ne + +_TEST_MODE: bool | None = None +_TEST_RESULT: list[bool] = [] +USE_NUMEXPR = NUMEXPR_INSTALLED +_evaluate: FuncType | None = None +_where: FuncType | None = None + +# the set of dtypes that we will allow pass to numexpr +_ALLOWED_DTYPES = { + "evaluate": {"int64", "int32", "float64", "float32", "bool"}, + "where": {"int64", "float64", "bool"}, +} + +# the minimum prod shape that we will use numexpr +_MIN_ELEMENTS = 1_000_000 + + +def set_use_numexpr(v=True) -> None: + # set/unset to use numexpr + global USE_NUMEXPR + if NUMEXPR_INSTALLED: + USE_NUMEXPR = v + + # choose what we are going to do + global _evaluate, _where + + _evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard + _where = _where_numexpr if USE_NUMEXPR else _where_standard + + +def set_numexpr_threads(n=None) -> None: + # if we are using numexpr, set the threads to n + # otherwise reset + if NUMEXPR_INSTALLED and USE_NUMEXPR: + if n is None: + n = ne.detect_number_of_cores() + ne.set_num_threads(n) + + +def _evaluate_standard(op, op_str, a, b): + """ + Standard evaluation. + """ + if _TEST_MODE: + _store_test_result(False) + return op(a, b) + + +def _can_use_numexpr(op, op_str, a, b, dtype_check): + """return a boolean if we WILL be using numexpr""" + if op_str is not None: + + # required min elements (otherwise we are adding overhead) + if a.size > _MIN_ELEMENTS: + # check for dtype compatibility + dtypes: set[str] = set() + for o in [a, b]: + # ndarray and Series Case + if hasattr(o, "dtype"): + dtypes |= {o.dtype.name} + + # allowed are a superset + if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes: + return True + + return False + + +def _evaluate_numexpr(op, op_str, a, b): + result = None + + if _can_use_numexpr(op, op_str, a, b, "evaluate"): + is_reversed = op.__name__.strip("_").startswith("r") + if is_reversed: + # we were originally called by a reversed op method + a, b = b, a + + a_value = a + b_value = b + + try: + result = ne.evaluate( + f"a_value {op_str} b_value", + local_dict={"a_value": a_value, "b_value": b_value}, + casting="safe", + ) + except TypeError: + # numexpr raises eg for array ** array with integers + # (https://github.com/pydata/numexpr/issues/379) + pass + except NotImplementedError: + if _bool_arith_fallback(op_str, a, b): + pass + else: + raise + + if is_reversed: + # reverse order to original for fallback + a, b = b, a + + if _TEST_MODE: + _store_test_result(result is not None) + + if result is None: + result = _evaluate_standard(op, op_str, a, b) + + return result + + +_op_str_mapping = { + operator.add: "+", + roperator.radd: "+", + operator.mul: "*", + roperator.rmul: "*", + operator.sub: "-", + roperator.rsub: "-", + operator.truediv: "/", + roperator.rtruediv: "/", + # floordiv not supported by numexpr 2.x + operator.floordiv: None, + roperator.rfloordiv: None, + # we require Python semantics for mod of negative for backwards compatibility + # see https://github.com/pydata/numexpr/issues/365 + # so sticking with unaccelerated for now GH#36552 + operator.mod: None, + roperator.rmod: None, + operator.pow: "**", + roperator.rpow: "**", + operator.eq: "==", + operator.ne: "!=", + operator.le: "<=", + operator.lt: "<", + operator.ge: ">=", + operator.gt: ">", + operator.and_: "&", + roperator.rand_: "&", + operator.or_: "|", + roperator.ror_: "|", + operator.xor: "^", + roperator.rxor: "^", + divmod: None, + roperator.rdivmod: None, +} + + +def _where_standard(cond, a, b): + # Caller is responsible for extracting ndarray if necessary + return np.where(cond, a, b) + + +def _where_numexpr(cond, a, b): + # Caller is responsible for extracting ndarray if necessary + result = None + + if _can_use_numexpr(None, "where", a, b, "where"): + + result = ne.evaluate( + "where(cond_value, a_value, b_value)", + local_dict={"cond_value": cond, "a_value": a, "b_value": b}, + casting="safe", + ) + + if result is None: + result = _where_standard(cond, a, b) + + return result + + +# turn myself on +set_use_numexpr(get_option("compute.use_numexpr")) + + +def _has_bool_dtype(x): + try: + return x.dtype == bool + except AttributeError: + return isinstance(x, (bool, np.bool_)) + + +_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"} + + +def _bool_arith_fallback(op_str, a, b): + """ + Check if we should fallback to the python `_evaluate_standard` in case + of an unsupported operation by numexpr, which is the case for some + boolean ops. + """ + if _has_bool_dtype(a) and _has_bool_dtype(b): + if op_str in _BOOL_OP_UNSUPPORTED: + warnings.warn( + f"evaluating in Python space because the {repr(op_str)} " + "operator is not supported by numexpr for the bool dtype, " + f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead.", + stacklevel=find_stack_level(), + ) + return True + return False + + +def evaluate(op, a, b, use_numexpr: bool = True): + """ + Evaluate and return the expression of the op on a and b. + + Parameters + ---------- + op : the actual operand + a : left operand + b : right operand + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + op_str = _op_str_mapping[op] + if op_str is not None: + if use_numexpr: + # error: "None" not callable + return _evaluate(op, op_str, a, b) # type: ignore[misc] + return _evaluate_standard(op, op_str, a, b) + + +def where(cond, a, b, use_numexpr=True): + """ + Evaluate the where condition cond on a and b. + + Parameters + ---------- + cond : np.ndarray[bool] + a : return if cond is True + b : return if cond is False + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + assert _where is not None + return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b) + + +def set_test_mode(v: bool = True) -> None: + """ + Keeps track of whether numexpr was used. + + Stores an additional ``True`` for every successful use of evaluate with + numexpr since the last ``get_test_result``. + """ + global _TEST_MODE, _TEST_RESULT + _TEST_MODE = v + _TEST_RESULT = [] + + +def _store_test_result(used_numexpr: bool) -> None: + global _TEST_RESULT + if used_numexpr: + _TEST_RESULT.append(used_numexpr) + + +def get_test_result() -> list[bool]: + """ + Get test result and reset test_results. + """ + global _TEST_RESULT + res = _TEST_RESULT + _TEST_RESULT = [] + return res diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py new file mode 100644 index 00000000..cb7b33e4 --- /dev/null +++ b/pandas/core/computation/ops.py @@ -0,0 +1,619 @@ +""" +Operator classes for eval. +""" + +from __future__ import annotations + +from datetime import datetime +from functools import partial +import operator +from typing import ( + Callable, + Iterable, + Literal, +) + +import numpy as np + +from pandas._libs.tslibs import Timestamp + +from pandas.core.dtypes.common import ( + is_list_like, + is_scalar, +) + +import pandas.core.common as com +from pandas.core.computation.common import ( + ensure_decoded, + result_type_many, +) +from pandas.core.computation.scope import DEFAULT_GLOBALS + +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) + +REDUCTIONS = ("sum", "prod") + +_unary_math_ops = ( + "sin", + "cos", + "exp", + "log", + "expm1", + "log1p", + "sqrt", + "sinh", + "cosh", + "tanh", + "arcsin", + "arccos", + "arctan", + "arccosh", + "arcsinh", + "arctanh", + "abs", + "log10", + "floor", + "ceil", +) +_binary_math_ops = ("arctan2",) + +MATHOPS = _unary_math_ops + _binary_math_ops + + +LOCAL_TAG = "__pd_eval_local_" + + +class Term: + def __new__(cls, name, env, side=None, encoding=None): + klass = Constant if not isinstance(name, str) else cls + # error: Argument 2 for "super" not an instance of argument 1 + supr_new = super(Term, klass).__new__ # type: ignore[misc] + return supr_new(klass) + + is_local: bool + + def __init__(self, name, env, side=None, encoding=None) -> None: + # name is a str for Term, but may be something else for subclasses + self._name = name + self.env = env + self.side = side + tname = str(name) + self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS + self._value = self._resolve_name() + self.encoding = encoding + + @property + def local_name(self) -> str: + return self.name.replace(LOCAL_TAG, "") + + def __repr__(self) -> str: + return pprint_thing(self.name) + + def __call__(self, *args, **kwargs): + return self.value + + def evaluate(self, *args, **kwargs) -> Term: + return self + + def _resolve_name(self): + local_name = str(self.local_name) + is_local = self.is_local + if local_name in self.env.scope and isinstance( + self.env.scope[local_name], type + ): + is_local = False + + res = self.env.resolve(local_name, is_local=is_local) + self.update(res) + + if hasattr(res, "ndim") and res.ndim > 2: + raise NotImplementedError( + "N-dimensional objects, where N > 2, are not supported with eval" + ) + return res + + def update(self, value) -> None: + """ + search order for local (i.e., @variable) variables: + + scope, key_variable + [('locals', 'local_name'), + ('globals', 'local_name'), + ('locals', 'key'), + ('globals', 'key')] + """ + key = self.name + + # if it's a variable name (otherwise a constant) + if isinstance(key, str): + self.env.swapkey(self.local_name, key, new_value=value) + + self.value = value + + @property + def is_scalar(self) -> bool: + return is_scalar(self._value) + + @property + def type(self): + try: + # potentially very slow for large, mixed dtype frames + return self._value.values.dtype + except AttributeError: + try: + # ndarray + return self._value.dtype + except AttributeError: + # scalar + return type(self._value) + + return_type = type + + @property + def raw(self) -> str: + return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})" + + @property + def is_datetime(self) -> bool: + try: + t = self.type.type + except AttributeError: + t = self.type + + return issubclass(t, (datetime, np.datetime64)) + + @property + def value(self): + return self._value + + @value.setter + def value(self, new_value): + self._value = new_value + + @property + def name(self): + return self._name + + @property + def ndim(self) -> int: + return self._value.ndim + + +class Constant(Term): + def __init__(self, value, env, side=None, encoding=None) -> None: + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + @property + def name(self): + return self.value + + def __repr__(self) -> str: + # in python 2 str() of float + # can truncate shorter than repr() + return repr(self.name) + + +_bool_op_map = {"not": "~", "and": "&", "or": "|"} + + +class Op: + """ + Hold an operator of arbitrary arity. + """ + + op: str + + def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None: + self.op = _bool_op_map.get(op, op) + self.operands = operands + self.encoding = encoding + + def __iter__(self): + return iter(self.operands) + + def __repr__(self) -> str: + """ + Print a generic n-ary operator and its operands using infix notation. + """ + # recurse over the operands + parened = (f"({pprint_thing(opr)})" for opr in self.operands) + return pprint_thing(f" {self.op} ".join(parened)) + + @property + def return_type(self): + # clobber types to bool if the op is a boolean operator + if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS): + return np.bool_ + return result_type_many(*(term.type for term in com.flatten(self))) + + @property + def has_invalid_return_type(self) -> bool: + types = self.operand_types + obj_dtype_set = frozenset([np.dtype("object")]) + return self.return_type == object and types - obj_dtype_set + + @property + def operand_types(self): + return frozenset(term.type for term in com.flatten(self)) + + @property + def is_scalar(self) -> bool: + return all(operand.is_scalar for operand in self.operands) + + @property + def is_datetime(self) -> bool: + try: + t = self.return_type.type + except AttributeError: + t = self.return_type + + return issubclass(t, (datetime, np.datetime64)) + + +def _in(x, y): + """ + Compute the vectorized membership of ``x in y`` if possible, otherwise + use Python. + """ + try: + return x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return y.isin(x) + except AttributeError: + pass + return x in y + + +def _not_in(x, y): + """ + Compute the vectorized membership of ``x not in y`` if possible, + otherwise use Python. + """ + try: + return ~x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return ~y.isin(x) + except AttributeError: + pass + return x not in y + + +CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in") +_cmp_ops_funcs = ( + operator.gt, + operator.lt, + operator.ge, + operator.le, + operator.eq, + operator.ne, + _in, + _not_in, +) +_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs)) + +BOOL_OPS_SYMS = ("&", "|", "and", "or") +_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) +_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs)) + +ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%") +_arith_ops_funcs = ( + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.pow, + operator.floordiv, + operator.mod, +) +_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs)) + +SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%") +_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) +_special_case_arith_ops_dict = dict( + zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs) +) + +_binary_ops_dict = {} + +for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): + _binary_ops_dict.update(d) + + +def _cast_inplace(terms, acceptable_dtypes, dtype): + """ + Cast an expression inplace. + + Parameters + ---------- + terms : Op + The expression that should cast. + acceptable_dtypes : list of acceptable numpy.dtype + Will not cast if term's dtype in this list. + dtype : str or numpy.dtype + The dtype to cast to. + """ + dt = np.dtype(dtype) + for term in terms: + if term.type in acceptable_dtypes: + continue + + try: + new_value = term.value.astype(dt) + except AttributeError: + new_value = dt.type(term.value) + term.update(new_value) + + +def is_term(obj) -> bool: + return isinstance(obj, Term) + + +class BinOp(Op): + """ + Hold a binary operator and its operands. + + Parameters + ---------- + op : str + lhs : Term or Op + rhs : Term or Op + """ + + def __init__(self, op: str, lhs, rhs) -> None: + super().__init__(op, (lhs, rhs)) + self.lhs = lhs + self.rhs = rhs + + self._disallow_scalar_only_bool_ops() + + self.convert_values() + + try: + self.func = _binary_ops_dict[op] + except KeyError as err: + # has to be made a list for python3 + keys = list(_binary_ops_dict.keys()) + raise ValueError( + f"Invalid binary operator {repr(op)}, valid operators are {keys}" + ) from err + + def __call__(self, env): + """ + Recursively evaluate an expression in Python space. + + Parameters + ---------- + env : Scope + + Returns + ------- + object + The result of an evaluated expression. + """ + # recurse over the left/right nodes + left = self.lhs(env) + right = self.rhs(env) + + return self.func(left, right) + + def evaluate(self, env, engine: str, parser, term_type, eval_in_python): + """ + Evaluate a binary operation *before* being passed to the engine. + + Parameters + ---------- + env : Scope + engine : str + parser : str + term_type : type + eval_in_python : list + + Returns + ------- + term_type + The "pre-evaluated" expression as an instance of ``term_type`` + """ + if engine == "python": + res = self(env) + else: + # recurse over the left/right nodes + + left = self.lhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + + right = self.rhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + + # base cases + if self.op in eval_in_python: + res = self.func(left.value, right.value) + else: + from pandas.core.computation.eval import eval + + res = eval(self, local_dict=env, engine=engine, parser=parser) + + name = env.add_tmp(res) + return term_type(name, env=env) + + def convert_values(self) -> None: + """ + Convert datetimes to a comparable value in an expression. + """ + + def stringify(value): + encoder: Callable + if self.encoding is not None: + encoder = partial(pprint_thing_encoded, encoding=self.encoding) + else: + encoder = pprint_thing + return encoder(value) + + lhs, rhs = self.lhs, self.rhs + + if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar: + v = rhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.rhs.update(v) + + if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar: + v = lhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.lhs.update(v) + + def _disallow_scalar_only_bool_ops(self): + rhs = self.rhs + lhs = self.lhs + + # GH#24883 unwrap dtype if necessary to ensure we have a type object + rhs_rt = rhs.return_type + rhs_rt = getattr(rhs_rt, "type", rhs_rt) + lhs_rt = lhs.return_type + lhs_rt = getattr(lhs_rt, "type", lhs_rt) + if ( + (lhs.is_scalar or rhs.is_scalar) + and self.op in _bool_ops_dict + and ( + not ( + issubclass(rhs_rt, (bool, np.bool_)) + and issubclass(lhs_rt, (bool, np.bool_)) + ) + ) + ): + raise NotImplementedError("cannot evaluate scalar only bool ops") + + +def isnumeric(dtype) -> bool: + return issubclass(np.dtype(dtype).type, np.number) + + +class Div(BinOp): + """ + Div operator to special case casting. + + Parameters + ---------- + lhs, rhs : Term or Op + The Terms or Ops in the ``/`` expression. + """ + + def __init__(self, lhs, rhs) -> None: + super().__init__("/", lhs, rhs) + + if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): + raise TypeError( + f"unsupported operand type(s) for {self.op}: " + f"'{lhs.return_type}' and '{rhs.return_type}'" + ) + + # do not upcast float32s to float64 un-necessarily + acceptable_dtypes = [np.float32, np.float_] + _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) + + +UNARY_OPS_SYMS = ("+", "-", "~", "not") +_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) +_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) + + +class UnaryOp(Op): + """ + Hold a unary operator and its operands. + + Parameters + ---------- + op : str + The token used to represent the operator. + operand : Term or Op + The Term or Op operand to the operator. + + Raises + ------ + ValueError + * If no function associated with the passed operator token is found. + """ + + def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None: + super().__init__(op, (operand,)) + self.operand = operand + + try: + self.func = _unary_ops_dict[op] + except KeyError as err: + raise ValueError( + f"Invalid unary operator {repr(op)}, " + f"valid operators are {UNARY_OPS_SYMS}" + ) from err + + def __call__(self, env) -> MathCall: + operand = self.operand(env) + # error: Cannot call function of unknown type + return self.func(operand) # type: ignore[operator] + + def __repr__(self) -> str: + return pprint_thing(f"{self.op}({self.operand})") + + @property + def return_type(self) -> np.dtype: + operand = self.operand + if operand.return_type == np.dtype("bool"): + return np.dtype("bool") + if isinstance(operand, Op) and ( + operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict + ): + return np.dtype("bool") + return np.dtype("int") + + +class MathCall(Op): + def __init__(self, func, args) -> None: + super().__init__(func.name, args) + self.func = func + + def __call__(self, env): + # error: "Op" not callable + operands = [op(env) for op in self.operands] # type: ignore[operator] + with np.errstate(all="ignore"): + return self.func.func(*operands) + + def __repr__(self) -> str: + operands = map(str, self.operands) + return pprint_thing(f"{self.op}({','.join(operands)})") + + +class FuncNode: + def __init__(self, name: str) -> None: + if name not in MATHOPS: + raise ValueError(f'"{name}" is not a supported function') + self.name = name + self.func = getattr(np, name) + + def __call__(self, *args): + return MathCall(self, args) diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py new file mode 100644 index 00000000..89d1f213 --- /dev/null +++ b/pandas/core/computation/parsing.py @@ -0,0 +1,195 @@ +""" +:func:`~pandas.eval` source string parsing functions +""" +from __future__ import annotations + +from io import StringIO +from keyword import iskeyword +import token +import tokenize +from typing import ( + Hashable, + Iterator, +) + +# A token value Python's tokenizer probably will never use. +BACKTICK_QUOTED_STRING = 100 + + +def create_valid_python_identifier(name: str) -> str: + """ + Create valid Python identifiers from any string. + + Check if name contains any special characters. If it contains any + special characters, the special characters will be replaced by + a special string and a prefix is added. + + Raises + ------ + SyntaxError + If the returned name is not a Python valid identifier, raise an exception. + This can happen if there is a hashtag in the name, as the tokenizer will + than terminate and not find the backtick. + But also for characters that fall out of the range of (U+0001..U+007F). + """ + if name.isidentifier() and not iskeyword(name): + return name + + # Create a dict with the special characters and their replacement string. + # EXACT_TOKEN_TYPES contains these special characters + # token.tok_name contains a readable description of the replacement string. + special_characters_replacements = { + char: f"_{token.tok_name[tokval]}_" + for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items()) + } + special_characters_replacements.update( + { + " ": "_", + "?": "_QUESTIONMARK_", + "!": "_EXCLAMATIONMARK_", + "$": "_DOLLARSIGN_", + "€": "_EUROSIGN_", + "°": "_DEGREESIGN_", + # Including quotes works, but there are exceptions. + "'": "_SINGLEQUOTE_", + '"': "_DOUBLEQUOTE_", + # Currently not possible. Terminates parser and won't find backtick. + # "#": "_HASH_", + } + ) + + name = "".join([special_characters_replacements.get(char, char) for char in name]) + name = "BACKTICK_QUOTED_STRING_" + name + + if not name.isidentifier(): + raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.") + + return name + + +def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]: + """ + Clean up a column name if surrounded by backticks. + + Backtick quoted string are indicated by a certain tokval value. If a string + is a backtick quoted token it will processed by + :func:`_create_valid_python_identifier` so that the parser can find this + string when the query is executed. + In this case the tok will get the NAME tokval. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tok : Tuple[int, str] + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == BACKTICK_QUOTED_STRING: + return tokenize.NAME, create_valid_python_identifier(tokval) + return toknum, tokval + + +def clean_column_name(name: Hashable) -> Hashable: + """ + Function to emulate the cleaning of a backtick quoted name. + + The purpose for this function is to see what happens to the name of + identifier if it goes to the process of being parsed a Python code + inside a backtick quoted string and than being cleaned + (removed of any special characters). + + Parameters + ---------- + name : hashable + Name to be cleaned. + + Returns + ------- + name : hashable + Returns the name after tokenizing and cleaning. + + Notes + ----- + For some cases, a name cannot be converted to a valid Python identifier. + In that case :func:`tokenize_string` raises a SyntaxError. + In that case, we just return the name unmodified. + + If this name was used in the query string (this makes the query call impossible) + an error will be raised by :func:`tokenize_backtick_quoted_string` instead, + which is not caught and propagates to the user level. + """ + try: + tokenized = tokenize_string(f"`{name}`") + tokval = next(tokenized)[1] + return create_valid_python_identifier(tokval) + except SyntaxError: + return name + + +def tokenize_backtick_quoted_string( + token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int +) -> tuple[int, str]: + """ + Creates a token from a backtick quoted string. + + Moves the token_generator forwards till right after the next backtick. + + Parameters + ---------- + token_generator : Iterator[tokenize.TokenInfo] + The generator that yields the tokens of the source string (Tuple[int, str]). + The generator is at the first token after the backtick (`) + + source : str + The Python source code string. + + string_start : int + This is the start of backtick quoted string inside the source string. + + Returns + ------- + tok: Tuple[int, str] + The token that represents the backtick quoted string. + The integer is equal to BACKTICK_QUOTED_STRING (100). + """ + for _, tokval, start, _, _ in token_generator: + if tokval == "`": + string_end = start[1] + break + + return BACKTICK_QUOTED_STRING, source[string_start:string_end] + + +def tokenize_string(source: str) -> Iterator[tuple[int, str]]: + """ + Tokenize a Python source code string. + + Parameters + ---------- + source : str + The Python source code string. + + Returns + ------- + tok_generator : Iterator[Tuple[int, str]] + An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]). + """ + line_reader = StringIO(source).readline + token_generator = tokenize.generate_tokens(line_reader) + + # Loop over all tokens till a backtick (`) is found. + # Then, take all tokens till the next backtick to form a backtick quoted string + for toknum, tokval, start, _, _ in token_generator: + if tokval == "`": + try: + yield tokenize_backtick_quoted_string( + token_generator, source, string_start=start[1] + 1 + ) + except Exception as err: + raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err + else: + yield toknum, tokval diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py new file mode 100644 index 00000000..5fbc26c0 --- /dev/null +++ b/pandas/core/computation/pytables.py @@ -0,0 +1,656 @@ +""" manage PyTables query interface via Expressions """ +from __future__ import annotations + +import ast +from functools import partial +from typing import ( + TYPE_CHECKING, + Any, +) + +import numpy as np + +from pandas._libs.tslibs import ( + Timedelta, + Timestamp, +) +from pandas._typing import npt +from pandas.errors import UndefinedVariableError + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com +from pandas.core.computation import ( + expr, + ops, + scope as _scope, +) +from pandas.core.computation.common import ensure_decoded +from pandas.core.computation.expr import BaseExprVisitor +from pandas.core.computation.ops import is_term +from pandas.core.construction import extract_array +from pandas.core.indexes.base import Index + +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) + +if TYPE_CHECKING: + from pandas.compat.chainmap import DeepChainMap + + +class PyTablesScope(_scope.Scope): + __slots__ = ("queryables",) + + queryables: dict[str, Any] + + def __init__( + self, + level: int, + global_dict=None, + local_dict=None, + queryables: dict[str, Any] | None = None, + ) -> None: + super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict) + self.queryables = queryables or {} + + +class Term(ops.Term): + env: PyTablesScope + + def __new__(cls, name, env, side=None, encoding=None): + if isinstance(name, str): + klass = cls + else: + klass = Constant + return object.__new__(klass) + + def __init__(self, name, env: PyTablesScope, side=None, encoding=None) -> None: + super().__init__(name, env, side=side, encoding=encoding) + + def _resolve_name(self): + # must be a queryables + if self.side == "left": + # Note: The behavior of __new__ ensures that self.name is a str here + if self.name not in self.env.queryables: + raise NameError(f"name {repr(self.name)} is not defined") + return self.name + + # resolve the rhs (and allow it to be None) + try: + return self.env.resolve(self.name, is_local=False) + except UndefinedVariableError: + return self.name + + # read-only property overwriting read/write property + @property # type: ignore[misc] + def value(self): + return self._value + + +class Constant(Term): + def __init__(self, value, env: PyTablesScope, side=None, encoding=None) -> None: + assert isinstance(env, PyTablesScope), type(env) + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + +class BinOp(ops.BinOp): + + _max_selectors = 31 + + op: str + queryables: dict[str, Any] + condition: str | None + + def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding) -> None: + super().__init__(op, lhs, rhs) + self.queryables = queryables + self.encoding = encoding + self.condition = None + + def _disallow_scalar_only_bool_ops(self): + pass + + def prune(self, klass): + def pr(left, right): + """create and return a new specialized BinOp from myself""" + if left is None: + return right + elif right is None: + return left + + k = klass + if isinstance(left, ConditionBinOp): + if isinstance(right, ConditionBinOp): + k = JointConditionBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + elif isinstance(left, FilterBinOp): + if isinstance(right, FilterBinOp): + k = JointFilterBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + return k( + self.op, left, right, queryables=self.queryables, encoding=self.encoding + ).evaluate() + + left, right = self.lhs, self.rhs + + if is_term(left) and is_term(right): + res = pr(left.value, right.value) + elif not is_term(left) and is_term(right): + res = pr(left.prune(klass), right.value) + elif is_term(left) and not is_term(right): + res = pr(left.value, right.prune(klass)) + elif not (is_term(left) or is_term(right)): + res = pr(left.prune(klass), right.prune(klass)) + + return res + + def conform(self, rhs): + """inplace conform rhs""" + if not is_list_like(rhs): + rhs = [rhs] + if isinstance(rhs, np.ndarray): + rhs = rhs.ravel() + return rhs + + @property + def is_valid(self) -> bool: + """return True if this is a valid field""" + return self.lhs in self.queryables + + @property + def is_in_table(self) -> bool: + """ + return True if this is a valid column name for generation (e.g. an + actual column in the table) + """ + return self.queryables.get(self.lhs) is not None + + @property + def kind(self): + """the kind of my field""" + return getattr(self.queryables.get(self.lhs), "kind", None) + + @property + def meta(self): + """the meta of my field""" + return getattr(self.queryables.get(self.lhs), "meta", None) + + @property + def metadata(self): + """the metadata of my field""" + return getattr(self.queryables.get(self.lhs), "metadata", None) + + def generate(self, v) -> str: + """create and return the op string for this TermValue""" + val = v.tostring(self.encoding) + return f"({self.lhs} {self.op} {val})" + + def convert_value(self, v) -> TermValue: + """ + convert the expression that is in the term to something that is + accepted by pytables + """ + + def stringify(value): + if self.encoding is not None: + return pprint_thing_encoded(value, encoding=self.encoding) + return pprint_thing(value) + + kind = ensure_decoded(self.kind) + meta = ensure_decoded(self.meta) + if kind == "datetime64" or kind == "datetime": + if isinstance(v, (int, float)): + v = stringify(v) + v = ensure_decoded(v) + v = Timestamp(v) + if v.tz is not None: + v = v.tz_convert("UTC") + return TermValue(v, v.value, kind) + elif kind == "timedelta64" or kind == "timedelta": + if isinstance(v, str): + v = Timedelta(v).value + else: + v = Timedelta(v, unit="s").value + return TermValue(int(v), v, kind) + elif meta == "category": + metadata = extract_array(self.metadata, extract_numpy=True) + result: npt.NDArray[np.intp] | np.intp | int + if v not in metadata: + result = -1 + else: + result = metadata.searchsorted(v, side="left") + return TermValue(result, result, "integer") + elif kind == "integer": + v = int(float(v)) + return TermValue(v, v, kind) + elif kind == "float": + v = float(v) + return TermValue(v, v, kind) + elif kind == "bool": + if isinstance(v, str): + v = not v.strip().lower() in [ + "false", + "f", + "no", + "n", + "none", + "0", + "[]", + "{}", + "", + ] + else: + v = bool(v) + return TermValue(v, v, kind) + elif isinstance(v, str): + # string quoting + return TermValue(v, stringify(v), "string") + else: + raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") + + def convert_values(self): + pass + + +class FilterBinOp(BinOp): + filter: tuple[Any, Any, Index] | None = None + + def __repr__(self) -> str: + if self.filter is None: + return "Filter: Not Initialized" + return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]") + + def invert(self): + """invert the filter""" + if self.filter is not None: + self.filter = ( + self.filter[0], + self.generate_filter_op(invert=True), + self.filter[2], + ) + return self + + def format(self): + """return the actual filter format""" + return [self.filter] + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + rhs = self.conform(self.rhs) + values = list(rhs) + + if self.is_in_table: + + # if too many values to create the expression, use a filter instead + if self.op in ["==", "!="] and len(values) > self._max_selectors: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, Index(values)) + + return self + return None + + # equality conditions + if self.op in ["==", "!="]: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, Index(values)) + + else: + raise TypeError( + f"passing a filterable condition to a non-table indexer [{self}]" + ) + + return self + + def generate_filter_op(self, invert: bool = False): + if (self.op == "!=" and not invert) or (self.op == "==" and invert): + return lambda axis, vals: ~axis.isin(vals) + else: + return lambda axis, vals: axis.isin(vals) + + +class JointFilterBinOp(FilterBinOp): + def format(self): + raise NotImplementedError("unable to collapse Joint Filters") + + def evaluate(self): + return self + + +class ConditionBinOp(BinOp): + def __repr__(self) -> str: + return pprint_thing(f"[Condition : [{self.condition}]]") + + def invert(self): + """invert the condition""" + # if self.condition is not None: + # self.condition = "~(%s)" % self.condition + # return self + raise NotImplementedError( + "cannot use an invert condition when passing to numexpr" + ) + + def format(self): + """return the actual ne format""" + return self.condition + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + # convert values if we are in the table + if not self.is_in_table: + return None + + rhs = self.conform(self.rhs) + values = [self.convert_value(v) for v in rhs] + + # equality conditions + if self.op in ["==", "!="]: + + # too many values to create the expression? + if len(values) <= self._max_selectors: + vs = [self.generate(v) for v in values] + self.condition = f"({' | '.join(vs)})" + + # use a filter after reading + else: + return None + else: + self.condition = self.generate(values[0]) + + return self + + +class JointConditionBinOp(ConditionBinOp): + def evaluate(self): + self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})" + return self + + +class UnaryOp(ops.UnaryOp): + def prune(self, klass): + + if self.op != "~": + raise NotImplementedError("UnaryOp only support invert type ops") + + operand = self.operand + operand = operand.prune(klass) + + if operand is not None and ( + issubclass(klass, ConditionBinOp) + and operand.condition is not None + or not issubclass(klass, ConditionBinOp) + and issubclass(klass, FilterBinOp) + and operand.filter is not None + ): + return operand.invert() + return None + + +class PyTablesExprVisitor(BaseExprVisitor): + const_type = Constant + term_type = Term + + def __init__(self, env, engine, parser, **kwargs) -> None: + super().__init__(env, engine, parser) + for bin_op in self.binary_ops: + bin_node = self.binary_op_nodes_map[bin_op] + setattr( + self, + f"visit_{bin_node}", + lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs), + ) + + def visit_UnaryOp(self, node, **kwargs): + if isinstance(node.op, (ast.Not, ast.Invert)): + return UnaryOp("~", self.visit(node.operand)) + elif isinstance(node.op, ast.USub): + return self.const_type(-self.visit(node.operand).value, self.env) + elif isinstance(node.op, ast.UAdd): + raise NotImplementedError("Unary addition not supported") + + def visit_Index(self, node, **kwargs): + return self.visit(node.value).value + + def visit_Assign(self, node, **kwargs): + cmpr = ast.Compare( + ops=[ast.Eq()], left=node.targets[0], comparators=[node.value] + ) + return self.visit(cmpr) + + def visit_Subscript(self, node, **kwargs): + # only allow simple subscripts + + value = self.visit(node.value) + slobj = self.visit(node.slice) + try: + value = value.value + except AttributeError: + pass + + if isinstance(slobj, Term): + # In py39 np.ndarray lookups with Term containing int raise + slobj = slobj.value + + try: + return self.const_type(value[slobj], self.env) + except TypeError as err: + raise ValueError( + f"cannot subscript {repr(value)} with {repr(slobj)}" + ) from err + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = type(node.ctx) + if ctx == ast.Load: + # resolve the value + resolved = self.visit(value) + + # try to get the value to see if we are another expression + try: + resolved = resolved.value + except (AttributeError): + pass + + try: + return self.term_type(getattr(resolved, attr), self.env) + except AttributeError: + + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + + raise ValueError(f"Invalid Attribute context {ctx.__name__}") + + def translate_In(self, op): + return ast.Eq() if isinstance(op, ast.In) else op + + def _rewrite_membership_op(self, node, left, right): + return self.visit(node.op), node.op, left, right + + +def _validate_where(w): + """ + Validate that the where statement is of the right type. + + The type may either be String, Expr, or list-like of Exprs. + + Parameters + ---------- + w : String term expression, Expr, or list-like of Exprs. + + Returns + ------- + where : The original where clause if the check was successful. + + Raises + ------ + TypeError : An invalid data type was passed in for w (e.g. dict). + """ + if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)): + raise TypeError( + "where must be passed as a string, PyTablesExpr, " + "or list-like of PyTablesExpr" + ) + + return w + + +class PyTablesExpr(expr.Expr): + """ + Hold a pytables-like expression, comprised of possibly multiple 'terms'. + + Parameters + ---------- + where : string term expression, PyTablesExpr, or list-like of PyTablesExprs + queryables : a "kinds" map (dict of column name -> kind), or None if column + is non-indexable + encoding : an encoding that will encode the query terms + + Returns + ------- + a PyTablesExpr object + + Examples + -------- + 'index>=date' + "columns=['A', 'D']" + 'columns=A' + 'columns==A' + "~(columns=['A','B'])" + 'index>df.index[3] & string="bar"' + '(index>df.index[3] & index<=df.index[6]) | string="bar"' + "ts>=Timestamp('2012-02-01')" + "major_axis>=20130101" + """ + + _visitor: PyTablesExprVisitor | None + env: PyTablesScope + expr: str + + def __init__( + self, + where, + queryables: dict[str, Any] | None = None, + encoding=None, + scope_level: int = 0, + ) -> None: + + where = _validate_where(where) + + self.encoding = encoding + self.condition = None + self.filter = None + self.terms = None + self._visitor = None + + # capture the environment if needed + local_dict: DeepChainMap[Any, Any] | None = None + + if isinstance(where, PyTablesExpr): + local_dict = where.env.scope + _where = where.expr + + elif is_list_like(where): + where = list(where) + for idx, w in enumerate(where): + if isinstance(w, PyTablesExpr): + local_dict = w.env.scope + else: + w = _validate_where(w) + where[idx] = w + _where = " & ".join([f"({w})" for w in com.flatten(where)]) + else: + # _validate_where ensures we otherwise have a string + _where = where + + self.expr = _where + self.env = PyTablesScope(scope_level + 1, local_dict=local_dict) + + if queryables is not None and isinstance(self.expr, str): + self.env.queryables.update(queryables) + self._visitor = PyTablesExprVisitor( + self.env, + queryables=queryables, + parser="pytables", + engine="pytables", + encoding=encoding, + ) + self.terms = self.parse() + + def __repr__(self) -> str: + if self.terms is not None: + return pprint_thing(self.terms) + return pprint_thing(self.expr) + + def evaluate(self): + """create and return the numexpr condition and filter""" + try: + self.condition = self.terms.prune(ConditionBinOp) + except AttributeError as err: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid condition" + ) from err + try: + self.filter = self.terms.prune(FilterBinOp) + except AttributeError as err: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid filter" + ) from err + + return self.condition, self.filter + + +class TermValue: + """hold a term value the we use to construct a condition/filter""" + + def __init__(self, value, converted, kind: str) -> None: + assert isinstance(kind, str), kind + self.value = value + self.converted = converted + self.kind = kind + + def tostring(self, encoding) -> str: + """quote the string if not encoded else encode and return""" + if self.kind == "string": + if encoding is not None: + return str(self.converted) + return f'"{self.converted}"' + elif self.kind == "float": + # python 2 str(float) is not always + # round-trippable so use repr() + return repr(self.converted) + return str(self.converted) + + +def maybe_expression(s) -> bool: + """loose checking if s is a pytables-acceptable expression""" + if not isinstance(s, str): + return False + ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",) + + # make sure we have an op at least + return any(op in s for op in ops) diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py new file mode 100644 index 00000000..5188b446 --- /dev/null +++ b/pandas/core/computation/scope.py @@ -0,0 +1,330 @@ +""" +Module for scope operations +""" +from __future__ import annotations + +import datetime +import inspect +from io import StringIO +import itertools +import pprint +import struct +import sys + +import numpy as np + +from pandas._libs.tslibs import Timestamp +from pandas.compat.chainmap import DeepChainMap +from pandas.errors import UndefinedVariableError + + +def ensure_scope( + level: int, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs +) -> Scope: + """Ensure that we are grabbing the correct scope.""" + return Scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + +def _replacer(x) -> str: + """ + Replace a number with its hexadecimal representation. Used to tag + temporary variables with their calling scope's id. + """ + # get the hex repr of the binary char and remove 0x and pad by pad_size + # zeros + try: + hexin = ord(x) + except TypeError: + # bytes literals masquerade as ints when iterating in py3 + hexin = x + + return hex(hexin) + + +def _raw_hex_id(obj) -> str: + """Return the padded hexadecimal id of ``obj``.""" + # interpret as a pointer since that's what really what id returns + packed = struct.pack("@P", id(obj)) + return "".join([_replacer(x) for x in packed]) + + +DEFAULT_GLOBALS = { + "Timestamp": Timestamp, + "datetime": datetime.datetime, + "True": True, + "False": False, + "list": list, + "tuple": tuple, + "inf": np.inf, + "Inf": np.inf, +} + + +def _get_pretty_string(obj) -> str: + """ + Return a prettier version of obj. + + Parameters + ---------- + obj : object + Object to pretty print + + Returns + ------- + str + Pretty print object repr + """ + sio = StringIO() + pprint.pprint(obj, stream=sio) + return sio.getvalue() + + +class Scope: + """ + Object to hold scope, with a few bells to deal with some custom syntax + and contexts added by pandas. + + Parameters + ---------- + level : int + global_dict : dict or None, optional, default None + local_dict : dict or Scope or None, optional, default None + resolvers : list-like or None, optional, default None + target : object + + Attributes + ---------- + level : int + scope : DeepChainMap + target : object + temps : dict + """ + + __slots__ = ["level", "scope", "target", "resolvers", "temps"] + level: int + scope: DeepChainMap + resolvers: DeepChainMap + temps: dict + + def __init__( + self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None + ) -> None: + self.level = level + 1 + + # shallow copy because we don't want to keep filling this up with what + # was there before if there are multiple calls to Scope/_ensure_scope + self.scope = DeepChainMap(DEFAULT_GLOBALS.copy()) + self.target = target + + if isinstance(local_dict, Scope): + self.scope.update(local_dict.scope) + if local_dict.target is not None: + self.target = local_dict.target + self._update(local_dict.level) + + frame = sys._getframe(self.level) + + try: + # shallow copy here because we don't want to replace what's in + # scope when we align terms (alignment accesses the underlying + # numpy array of pandas objects) + scope_global = self.scope.new_child( + (global_dict if global_dict is not None else frame.f_globals).copy() + ) + self.scope = DeepChainMap(scope_global) + if not isinstance(local_dict, Scope): + scope_local = self.scope.new_child( + (local_dict if local_dict is not None else frame.f_locals).copy() + ) + self.scope = DeepChainMap(scope_local) + finally: + del frame + + # assumes that resolvers are going from outermost scope to inner + if isinstance(local_dict, Scope): + resolvers += tuple(local_dict.resolvers.maps) + self.resolvers = DeepChainMap(*resolvers) + self.temps = {} + + def __repr__(self) -> str: + scope_keys = _get_pretty_string(list(self.scope.keys())) + res_keys = _get_pretty_string(list(self.resolvers.keys())) + return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})" + + @property + def has_resolvers(self) -> bool: + """ + Return whether we have any extra scope. + + For example, DataFrames pass Their columns as resolvers during calls to + ``DataFrame.eval()`` and ``DataFrame.query()``. + + Returns + ------- + hr : bool + """ + return bool(len(self.resolvers)) + + def resolve(self, key: str, is_local: bool): + """ + Resolve a variable name in a possibly local context. + + Parameters + ---------- + key : str + A variable name + is_local : bool + Flag indicating whether the variable is local or not (prefixed with + the '@' symbol) + + Returns + ------- + value : object + The value of a particular variable + """ + try: + # only look for locals in outer scope + if is_local: + return self.scope[key] + + # not a local variable so check in resolvers if we have them + if self.has_resolvers: + return self.resolvers[key] + + # if we're here that means that we have no locals and we also have + # no resolvers + assert not is_local and not self.has_resolvers + return self.scope[key] + except KeyError: + try: + # last ditch effort we look in temporaries + # these are created when parsing indexing expressions + # e.g., df[df > 0] + return self.temps[key] + except KeyError as err: + raise UndefinedVariableError(key, is_local) from err + + def swapkey(self, old_key: str, new_key: str, new_value=None) -> None: + """ + Replace a variable name, with a potentially new value. + + Parameters + ---------- + old_key : str + Current variable name to replace + new_key : str + New variable name to replace `old_key` with + new_value : object + Value to be replaced along with the possible renaming + """ + if self.has_resolvers: + maps = self.resolvers.maps + self.scope.maps + else: + maps = self.scope.maps + + maps.append(self.temps) + + for mapping in maps: + if old_key in mapping: + mapping[new_key] = new_value + return + + def _get_vars(self, stack, scopes: list[str]) -> None: + """ + Get specifically scoped variables from a list of stack frames. + + Parameters + ---------- + stack : list + A list of stack frames as returned by ``inspect.stack()`` + scopes : sequence of strings + A sequence containing valid stack frame attribute names that + evaluate to a dictionary. For example, ('locals', 'globals') + """ + variables = itertools.product(scopes, stack) + for scope, (frame, _, _, _, _, _) in variables: + try: + d = getattr(frame, "f_" + scope) + self.scope = DeepChainMap(self.scope.new_child(d)) + finally: + # won't remove it, but DECREF it + # in Py3 this probably isn't necessary since frame won't be + # scope after the loop + del frame + + def _update(self, level: int) -> None: + """ + Update the current scope by going back `level` levels. + + Parameters + ---------- + level : int + """ + sl = level + 1 + + # add sl frames to the scope starting with the + # most distant and overwriting with more current + # makes sure that we can capture variable scope + stack = inspect.stack() + + try: + self._get_vars(stack[:sl], scopes=["locals"]) + finally: + del stack[:], stack + + def add_tmp(self, value) -> str: + """ + Add a temporary variable to the scope. + + Parameters + ---------- + value : object + An arbitrary object to be assigned to a temporary variable. + + Returns + ------- + str + The name of the temporary variable created. + """ + name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}" + + # add to inner most scope + assert name not in self.temps + self.temps[name] = value + assert name in self.temps + + # only increment if the variable gets put in the scope + return name + + @property + def ntemps(self) -> int: + """The number of temporary variables in this scope""" + return len(self.temps) + + @property + def full_scope(self) -> DeepChainMap: + """ + Return the full scope for use with passing to engines transparently + as a mapping. + + Returns + ------- + vars : DeepChainMap + All variables in this scope. + """ + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[Any, Any]]") + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[str, Any]]") + maps = ( + [self.temps] + + self.resolvers.maps # type: ignore[operator] + + self.scope.maps # type: ignore[operator] + ) + return DeepChainMap(*maps) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py new file mode 100644 index 00000000..e6beab0b --- /dev/null +++ b/pandas/core/config_init.py @@ -0,0 +1,991 @@ +""" +This module is imported from the pandas package __init__.py file +in order to ensure that the core.config options registered here will +be available as soon as the user loads the package. if register_option +is invoked inside specific modules, they will not be registered until that +module is imported, which may or may not be a problem. + +If you need to make sure options are available even before a certain +module is imported, register them here rather than in the module. + +""" +from __future__ import annotations + +import os +from typing import Callable +import warnings + +import pandas._config.config as cf +from pandas._config.config import ( + is_bool, + is_callable, + is_instance_factory, + is_int, + is_nonnegative_int, + is_one_of_factory, + is_str, + is_text, +) + +from pandas.util._exceptions import find_stack_level + +# compute + +use_bottleneck_doc = """ +: bool + Use the bottleneck library to accelerate if it is installed, + the default is True + Valid values: False,True +""" + + +def use_bottleneck_cb(key) -> None: + from pandas.core import nanops + + nanops.set_use_bottleneck(cf.get_option(key)) + + +use_numexpr_doc = """ +: bool + Use the numexpr library to accelerate computation if it is installed, + the default is True + Valid values: False,True +""" + + +def use_numexpr_cb(key) -> None: + from pandas.core.computation import expressions + + expressions.set_use_numexpr(cf.get_option(key)) + + +use_numba_doc = """ +: bool + Use the numba engine option for select operations if it is installed, + the default is False + Valid values: False,True +""" + + +def use_numba_cb(key) -> None: + from pandas.core.util import numba_ + + numba_.set_use_numba(cf.get_option(key)) + + +with cf.config_prefix("compute"): + cf.register_option( + "use_bottleneck", + True, + use_bottleneck_doc, + validator=is_bool, + cb=use_bottleneck_cb, + ) + cf.register_option( + "use_numexpr", True, use_numexpr_doc, validator=is_bool, cb=use_numexpr_cb + ) + cf.register_option( + "use_numba", False, use_numba_doc, validator=is_bool, cb=use_numba_cb + ) +# +# options from the "display" namespace + +pc_precision_doc = """ +: int + Floating point output precision in terms of number of places after the + decimal, for regular formatting as well as scientific notation. Similar + to ``precision`` in :meth:`numpy.set_printoptions`. +""" + +pc_colspace_doc = """ +: int + Default space for DataFrame columns. +""" + +pc_max_rows_doc = """ +: int + If max_rows is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the height of the terminal and print a truncated object which fits + the screen height. The IPython notebook, IPython qtconsole, or + IDLE do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_min_rows_doc = """ +: int + The numbers of rows to show in a truncated view (when `max_rows` is + exceeded). Ignored when `max_rows` is set to None or 0. When set to + None, follows the value of `max_rows`. +""" + +pc_max_cols_doc = """ +: int + If max_cols is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the width of the terminal and print a truncated object which fits + the screen width. The IPython notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_max_categories_doc = """ +: int + This sets the maximum number of categories pandas should output when + printing out a `Categorical` or a Series of dtype "category". +""" + +pc_max_info_cols_doc = """ +: int + max_info_columns is used in DataFrame.info method to decide if + per column information will be printed. +""" + +pc_nb_repr_h_doc = """ +: boolean + When True, IPython notebook will use html representation for + pandas objects (if it is available). +""" + +pc_pprint_nest_depth = """ +: int + Controls the number of nested levels to process when pretty-printing +""" + +pc_multi_sparse_doc = """ +: boolean + "sparsify" MultiIndex display (don't display repeated + elements in outer levels within groups) +""" + +float_format_doc = """ +: callable + The callable should accept a floating point number and return + a string with the desired format of the number. This is used + in some places like SeriesFormatter. + See formats.format.EngFormatter for an example. +""" + +max_colwidth_doc = """ +: int or None + The maximum width in characters of a column in the repr of + a pandas data structure. When the column overflows, a "..." + placeholder is embedded in the output. A 'None' value means unlimited. +""" + +colheader_justify_doc = """ +: 'left'/'right' + Controls the justification of column headers. used by DataFrameFormatter. +""" + +pc_expand_repr_doc = """ +: boolean + Whether to print out the full DataFrame repr for wide DataFrames across + multiple lines, `max_columns` is still respected, but the output will + wrap-around across multiple "pages" if its width exceeds `display.width`. +""" + +pc_show_dimensions_doc = """ +: boolean or 'truncate' + Whether to print out dimensions at the end of DataFrame repr. + If 'truncate' is specified, only print out the dimensions if the + frame is truncated (e.g. not display all rows and/or columns) +""" + +pc_east_asian_width_doc = """ +: boolean + Whether to use the Unicode East Asian Width to calculate the display text + width. + Enabling this may affect to the performance (default: False) +""" + +pc_ambiguous_as_wide_doc = """ +: boolean + Whether to handle Unicode characters belong to Ambiguous as Wide (width=2) + (default: False) +""" + +pc_latex_repr_doc = """ +: boolean + Whether to produce a latex DataFrame representation for jupyter + environments that support it. + (default: False) +""" + +pc_table_schema_doc = """ +: boolean + Whether to publish a Table Schema representation for frontends + that support it. + (default: False) +""" + +pc_html_border_doc = """ +: int + A ``border=value`` attribute is inserted in the ```` tag + for the DataFrame HTML repr. +""" + +pc_html_use_mathjax_doc = """\ +: boolean + When True, Jupyter notebook will process table contents using MathJax, + rendering mathematical expressions enclosed by the dollar symbol. + (default: True) +""" + +pc_max_dir_items = """\ +: int + The number of items that will be added to `dir(...)`. 'None' value means + unlimited. Because dir is cached, changing this option will not immediately + affect already existing dataframes until a column is deleted or added. + + This is for instance used to suggest columns from a dataframe to tab + completion. +""" + +pc_width_doc = """ +: int + Width of the display in characters. In case python/IPython is running in + a terminal this can be set to None and pandas will correctly auto-detect + the width. + Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible to correctly detect the width. +""" + +pc_chop_threshold_doc = """ +: float or None + if set to a float value, all float values smaller than the given threshold + will be displayed as exactly 0 by repr and friends. +""" + +pc_max_seq_items = """ +: int or None + When pretty-printing a long sequence, no more then `max_seq_items` + will be printed. If items are omitted, they will be denoted by the + addition of "..." to the resulting string. + + If set to None, the number of items to be printed is unlimited. +""" + +pc_max_info_rows_doc = """ +: int or None + df.info() will usually show null-counts for each column. + For large frames this can be quite slow. max_info_rows and max_info_cols + limit this null check only to frames with smaller dimensions than + specified. +""" + +pc_large_repr_doc = """ +: 'truncate'/'info' + For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can + show a truncated table (the default from 0.13), or switch to the view from + df.info() (the behaviour in earlier versions of pandas). +""" + +pc_memory_usage_doc = """ +: bool, string or None + This specifies if the memory usage of a DataFrame should be displayed when + df.info() is called. Valid values True,False,'deep' +""" + +pc_latex_escape = """ +: bool + This specifies if the to_latex method of a Dataframe uses escapes special + characters. + Valid values: False,True +""" + +pc_latex_longtable = """ +:bool + This specifies if the to_latex method of a Dataframe uses the longtable + format. + Valid values: False,True +""" + +pc_latex_multicolumn = """ +: bool + This specifies if the to_latex method of a Dataframe uses multicolumns + to pretty-print MultiIndex columns. + Valid values: False,True +""" + +pc_latex_multicolumn_format = """ +: string + This specifies the format for multicolumn headers. + Can be surrounded with '|'. + Valid values: 'l', 'c', 'r', 'p{}' +""" + +pc_latex_multirow = """ +: bool + This specifies if the to_latex method of a Dataframe uses multirows + to pretty-print MultiIndex rows. + Valid values: False,True +""" + + +def table_schema_cb(key) -> None: + from pandas.io.formats.printing import enable_data_resource_formatter + + enable_data_resource_formatter(cf.get_option(key)) + + +def is_terminal() -> bool: + """ + Detect if Python is running in a terminal. + + Returns True if Python is running in a terminal or False if not. + """ + try: + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore[name-defined] + except NameError: # assume standard Python interpreter in a terminal + return True + else: + if hasattr(ip, "kernel"): # IPython as a Jupyter kernel + return False + else: # IPython in a terminal + return True + + +with cf.config_prefix("display"): + cf.register_option("precision", 6, pc_precision_doc, validator=is_nonnegative_int) + cf.register_option( + "float_format", + None, + float_format_doc, + validator=is_one_of_factory([None, is_callable]), + ) + + def _deprecate_column_space(key): + warnings.warn( + "column_space is deprecated and will be removed " + "in a future version. Use df.to_string(col_space=...) " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + cf.register_option("column_space", 12, validator=is_int, cb=_deprecate_column_space) + cf.register_option( + "max_info_rows", + 1690785, + pc_max_info_rows_doc, + validator=is_instance_factory((int, type(None))), + ) + cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) + cf.register_option( + "min_rows", + 10, + pc_min_rows_doc, + validator=is_instance_factory([type(None), int]), + ) + cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) + + def _deprecate_negative_int_max_colwidth(key): + value = cf.get_option(key) + if value is not None and value < 0: + warnings.warn( + "Passing a negative integer is deprecated in version 1.0 and " + "will not be supported in future version. Instead, use None " + "to not limit the column width.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + cf.register_option( + # TODO(2.0): change `validator=is_nonnegative_int` see GH#31569 + "max_colwidth", + 50, + max_colwidth_doc, + validator=is_instance_factory([type(None), int]), + cb=_deprecate_negative_int_max_colwidth, + ) + if is_terminal(): + max_cols = 0 # automatically determine optimal number of columns + else: + max_cols = 20 # cannot determine optimal number of columns + cf.register_option( + "max_columns", max_cols, pc_max_cols_doc, validator=is_nonnegative_int + ) + cf.register_option( + "large_repr", + "truncate", + pc_large_repr_doc, + validator=is_one_of_factory(["truncate", "info"]), + ) + cf.register_option("max_info_columns", 100, pc_max_info_cols_doc, validator=is_int) + cf.register_option( + "colheader_justify", "right", colheader_justify_doc, validator=is_text + ) + cf.register_option("notebook_repr_html", True, pc_nb_repr_h_doc, validator=is_bool) + cf.register_option("pprint_nest_depth", 3, pc_pprint_nest_depth, validator=is_int) + cf.register_option("multi_sparse", True, pc_multi_sparse_doc, validator=is_bool) + cf.register_option("expand_frame_repr", True, pc_expand_repr_doc) + cf.register_option( + "show_dimensions", + "truncate", + pc_show_dimensions_doc, + validator=is_one_of_factory([True, False, "truncate"]), + ) + cf.register_option("chop_threshold", None, pc_chop_threshold_doc) + cf.register_option("max_seq_items", 100, pc_max_seq_items) + cf.register_option( + "width", 80, pc_width_doc, validator=is_instance_factory([type(None), int]) + ) + cf.register_option( + "memory_usage", + True, + pc_memory_usage_doc, + validator=is_one_of_factory([None, True, False, "deep"]), + ) + cf.register_option( + "unicode.east_asian_width", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option( + "unicode.ambiguous_as_wide", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option("latex.repr", False, pc_latex_repr_doc, validator=is_bool) + cf.register_option("latex.escape", True, pc_latex_escape, validator=is_bool) + cf.register_option("latex.longtable", False, pc_latex_longtable, validator=is_bool) + cf.register_option( + "latex.multicolumn", True, pc_latex_multicolumn, validator=is_bool + ) + cf.register_option( + "latex.multicolumn_format", "l", pc_latex_multicolumn, validator=is_text + ) + cf.register_option("latex.multirow", False, pc_latex_multirow, validator=is_bool) + cf.register_option( + "html.table_schema", + False, + pc_table_schema_doc, + validator=is_bool, + cb=table_schema_cb, + ) + cf.register_option("html.border", 1, pc_html_border_doc, validator=is_int) + cf.register_option( + "html.use_mathjax", True, pc_html_use_mathjax_doc, validator=is_bool + ) + cf.register_option( + "max_dir_items", 100, pc_max_dir_items, validator=is_nonnegative_int + ) + +tc_sim_interactive_doc = """ +: boolean + Whether to simulate interactive mode for purposes of testing +""" + +with cf.config_prefix("mode"): + cf.register_option("sim_interactive", False, tc_sim_interactive_doc) + +use_inf_as_null_doc = """ +: boolean + use_inf_as_null had been deprecated and will be removed in a future + version. Use `use_inf_as_na` instead. +""" + +use_inf_as_na_doc = """ +: boolean + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA + (new way). +""" + +# We don't want to start importing everything at the global context level +# or we'll hit circular deps. + + +def use_inf_as_na_cb(key) -> None: + from pandas.core.dtypes.missing import _use_inf_as_na + + _use_inf_as_na(key) + + +with cf.config_prefix("mode"): + cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) + cf.register_option( + "use_inf_as_null", False, use_inf_as_null_doc, cb=use_inf_as_na_cb + ) + + +cf.deprecate_option( + "mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na" +) + + +data_manager_doc = """ +: string + Internal data manager type; can be "block" or "array". Defaults to "block", + unless overridden by the 'PANDAS_DATA_MANAGER' environment variable (needs + to be set before pandas is imported). +""" + + +with cf.config_prefix("mode"): + cf.register_option( + "data_manager", + # Get the default from an environment variable, if set, otherwise defaults + # to "block". This environment variable can be set for testing. + os.environ.get("PANDAS_DATA_MANAGER", "block"), + data_manager_doc, + validator=is_one_of_factory(["block", "array"]), + ) + + +# TODO better name? +copy_on_write_doc = """ +: bool + Use new copy-view behaviour using Copy-on-Write. Defaults to False, + unless overridden by the 'PANDAS_COPY_ON_WRITE' environment variable + (if set to "1" for True, needs to be set before pandas is imported). +""" + + +with cf.config_prefix("mode"): + cf.register_option( + "copy_on_write", + # Get the default from an environment variable, if set, otherwise defaults + # to False. This environment variable can be set for testing. + os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1", + copy_on_write_doc, + validator=is_bool, + ) + + +# user warnings +chained_assignment = """ +: string + Raise an exception, warn, or no action if trying to use chained assignment, + The default is warn +""" + +with cf.config_prefix("mode"): + cf.register_option( + "chained_assignment", + "warn", + chained_assignment, + validator=is_one_of_factory([None, "warn", "raise"]), + ) + + +string_storage_doc = """ +: string + The default storage for StringDtype. +""" + +with cf.config_prefix("mode"): + cf.register_option( + "string_storage", + "python", + string_storage_doc, + validator=is_one_of_factory(["python", "pyarrow"]), + ) + +# Set up the io.excel specific reader configuration. +reader_engine_doc = """ +: string + The default Excel reader engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlrd"] +_xlsm_options = ["xlrd", "openpyxl"] +_xlsx_options = ["xlrd", "openpyxl"] +_ods_options = ["odf"] +_xlsb_options = ["pyxlsb"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=is_one_of_factory(_xls_options + ["auto"]), + ) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=is_one_of_factory(_xlsm_options + ["auto"]), + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=is_one_of_factory(_xlsx_options + ["auto"]), + ) + + +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=is_one_of_factory(_ods_options + ["auto"]), + ) + +with cf.config_prefix("io.excel.xlsb"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsb", others=", ".join(_xlsb_options)), + validator=is_one_of_factory(_xlsb_options + ["auto"]), + ) + +# Set up the io.excel specific writer configuration. +writer_engine_doc = """ +: string + The default Excel writer engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlwt"] +_xlsm_options = ["openpyxl"] +_xlsx_options = ["openpyxl", "xlsxwriter"] +_ods_options = ["odf"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=str, + ) +cf.deprecate_option( + "io.excel.xls.writer", + msg="As the xlwt package is no longer maintained, the xlwt engine will be " + "removed in a future version of pandas. This is the only engine in pandas that " + "supports writing in the xls format. Install openpyxl and write to an " + "xlsx file instead.", +) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=str, + ) + + +# Set up the io.parquet specific configuration. +parquet_engine_doc = """ +: string + The default parquet reader/writer engine. Available options: + 'auto', 'pyarrow', 'fastparquet', the default is 'auto' +""" + +with cf.config_prefix("io.parquet"): + cf.register_option( + "engine", + "auto", + parquet_engine_doc, + validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]), + ) + + +# Set up the io.sql specific configuration. +sql_engine_doc = """ +: string + The default sql reader/writer engine. Available options: + 'auto', 'sqlalchemy', the default is 'auto' +""" + +with cf.config_prefix("io.sql"): + cf.register_option( + "engine", + "auto", + sql_engine_doc, + validator=is_one_of_factory(["auto", "sqlalchemy"]), + ) + +# -------- +# Plotting +# --------- + +plotting_backend_doc = """ +: str + The plotting backend to use. The default value is "matplotlib", the + backend provided with pandas. Other backends can be specified by + providing the name of the module that implements the backend. +""" + + +def register_plotting_backend_cb(key) -> None: + if key == "matplotlib": + # We defer matplotlib validation, since it's the default + return + from pandas.plotting._core import _get_plot_backend + + _get_plot_backend(key) + + +with cf.config_prefix("plotting"): + cf.register_option( + "backend", + defval="matplotlib", + doc=plotting_backend_doc, + validator=register_plotting_backend_cb, + ) + + +register_converter_doc = """ +: bool or 'auto'. + Whether to register converters with matplotlib's units registry for + dates, times, datetimes, and Periods. Toggling to False will remove + the converters, restoring any converters that pandas overwrote. +""" + + +def register_converter_cb(key) -> None: + from pandas.plotting import ( + deregister_matplotlib_converters, + register_matplotlib_converters, + ) + + if cf.get_option(key): + register_matplotlib_converters() + else: + deregister_matplotlib_converters() + + +with cf.config_prefix("plotting.matplotlib"): + cf.register_option( + "register_converters", + "auto", + register_converter_doc, + validator=is_one_of_factory(["auto", True, False]), + cb=register_converter_cb, + ) + +# ------ +# Styler +# ------ + +styler_sparse_index_doc = """ +: bool + Whether to sparsify the display of a hierarchical index. Setting to False will + display each explicit level element in a hierarchical key for each row. +""" + +styler_sparse_columns_doc = """ +: bool + Whether to sparsify the display of hierarchical columns. Setting to False will + display each explicit level element in a hierarchical key for each column. +""" + +styler_render_repr = """ +: str + Determine which output to use in Jupyter Notebook in {"html", "latex"}. +""" + +styler_max_elements = """ +: int + The maximum number of data-cell (", indent) + + if self.fmt.header: + self._write_col_header(indent + self.indent_delta) + + if self.show_row_idx_names: + self._write_row_header(indent + self.indent_delta) + + self.write("", indent) + + def _get_formatted_values(self) -> dict[int, list[str]]: + with option_context("display.max_colwidth", None): + fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)} + return fmt_values + + def _write_body(self, indent: int) -> None: + self.write("", indent) + fmt_values = self._get_formatted_values() + + # write values + if self.fmt.index and isinstance(self.frame.index, MultiIndex): + self._write_hierarchical_rows(fmt_values, indent + self.indent_delta) + else: + self._write_regular_rows(fmt_values, indent + self.indent_delta) + + self.write("", indent) + + def _write_regular_rows( + self, fmt_values: Mapping[int, list[str]], indent: int + ) -> None: + is_truncated_horizontally = self.fmt.is_truncated_horizontally + is_truncated_vertically = self.fmt.is_truncated_vertically + + nrows = len(self.fmt.tr_frame) + + if self.fmt.index: + fmt = self.fmt._get_formatter("__index__") + if fmt is not None: + index_values = self.fmt.tr_frame.index.map(fmt) + else: + index_values = self.fmt.tr_frame.index.format() + + row: list[str] = [] + for i in range(nrows): + + if is_truncated_vertically and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + row = [] + if self.fmt.index: + row.append(index_values[i]) + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Add blank cell before data cells. + elif self.show_col_idx_names: + row.append("") + row.extend(fmt_values[j][i] for j in range(self.ncols)) + + if is_truncated_horizontally: + dot_col_ix = self.fmt.tr_col_num + self.row_levels + row.insert(dot_col_ix, "...") + self.write_tr( + row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels + ) + + def _write_hierarchical_rows( + self, fmt_values: Mapping[int, list[str]], indent: int + ) -> None: + template = 'rowspan="{span}" valign="top"' + + is_truncated_horizontally = self.fmt.is_truncated_horizontally + is_truncated_vertically = self.fmt.is_truncated_vertically + frame = self.fmt.tr_frame + nrows = len(frame) + + assert isinstance(frame.index, MultiIndex) + idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) + idx_values = list(zip(*idx_values)) + + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) + + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + if is_truncated_vertically: + # Insert ... row and adjust idx_values and + # level_lengths to take this into account. + ins_row = self.fmt.tr_row_num + inserted = False + for lnum, records in enumerate(level_lengths): + rec_new = {} + for tag, span in list(records.items()): + if tag >= ins_row: + rec_new[tag + 1] = span + elif tag + span > ins_row: + rec_new[tag] = span + 1 + + # GH 14882 - Make sure insertion done once + if not inserted: + dot_row = list(idx_values[ins_row - 1]) + dot_row[-1] = "..." + idx_values.insert(ins_row, tuple(dot_row)) + inserted = True + else: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + else: + rec_new[tag] = span + # If ins_row lies between tags, all cols idx cols + # receive ... + if tag + span == ins_row: + rec_new[ins_row] = 1 + if lnum == 0: + idx_values.insert( + ins_row, tuple(["..."] * len(level_lengths)) + ) + + # GH 14882 - Place ... in correct level + elif inserted: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + level_lengths[lnum] = rec_new + + level_lengths[inner_lvl][ins_row] = 1 + for ix_col in range(len(fmt_values)): + fmt_values[ix_col].insert(ins_row, "...") + nrows += 1 + + for i in range(nrows): + row = [] + tags = {} + + sparse_offset = 0 + j = 0 + for records, v in zip(level_lengths, idx_values[i]): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + sparse_offset += 1 + continue + + j += 1 + row.append(v) + + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if is_truncated_horizontally: + row.insert( + self.row_levels - sparse_offset + self.fmt.tr_col_num, "..." + ) + self.write_tr( + row, + indent, + self.indent_delta, + tags=tags, + nindex_levels=len(levels) - sparse_offset, + ) + else: + row = [] + for i in range(len(frame)): + if is_truncated_vertically and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + idx_values = list( + zip(*frame.index.format(sparsify=False, adjoin=False, names=False)) + ) + row = [] + row.extend(idx_values[i]) + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if is_truncated_horizontally: + row.insert(self.row_levels + self.fmt.tr_col_num, "...") + self.write_tr( + row, + indent, + self.indent_delta, + tags=None, + nindex_levels=frame.index.nlevels, + ) + + +class NotebookFormatter(HTMLFormatter): + """ + Internal class for formatting output data in html for display in Jupyter + Notebooks. This class is intended for functionality specific to + DataFrame._repr_html_() and DataFrame.to_html(notebook=True) + """ + + def _get_formatted_values(self) -> dict[int, list[str]]: + return {i: self.fmt.format_col(i) for i in range(self.ncols)} + + def _get_columns_formatted_values(self) -> list[str]: + return self.columns.format() + + def write_style(self) -> None: + # We use the "scoped" attribute here so that the desired + # style properties for the data frame are not then applied + # throughout the entire notebook. + template_first = """\ + """ + template_select = """\ + .dataframe %s { + %s: %s; + }""" + element_props = [ + ("tbody tr th:only-of-type", "vertical-align", "middle"), + ("tbody tr th", "vertical-align", "top"), + ] + if isinstance(self.columns, MultiIndex): + element_props.append(("thead tr th", "text-align", "left")) + if self.show_row_idx_names: + element_props.append( + ("thead tr:last-of-type th", "text-align", "right") + ) + else: + element_props.append(("thead th", "text-align", "right")) + template_mid = "\n\n".join(map(lambda t: template_select % t, element_props)) + template = dedent("\n".join((template_first, template_mid, template_last))) + self.write(template) + + def render(self) -> list[str]: + self.write("
    ") + self.write_style() + super().render() + self.write("
    ") + return self.elements diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py new file mode 100644 index 00000000..e0f6e01a --- /dev/null +++ b/pandas/io/formats/info.py @@ -0,0 +1,1116 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +import sys +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Iterable, + Iterator, + Mapping, + Sequence, +) + +from pandas._config import get_option + +from pandas._typing import ( + Dtype, + WriteBuffer, +) + +from pandas.io.formats import format as fmt +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + + +frame_max_cols_sub = dedent( + """\ + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used.""" +) + + +show_counts_sub = dedent( + """\ + show_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the DataFrame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts.""" +) + +null_counts_sub = dedent( + """ + null_counts : bool, optional + .. deprecated:: 1.2.0 + Use show_counts instead.""" +) + + +frame_examples_sub = dedent( + """\ + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, + ... "float_col": float_values}) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> df = pd.DataFrame({ + ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) + ... }) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 165.9 MB""" +) + + +frame_see_also_sub = dedent( + """\ + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns.""" +) + + +frame_sub_kwargs = { + "klass": "DataFrame", + "type_sub": " and columns", + "max_cols_sub": frame_max_cols_sub, + "show_counts_sub": show_counts_sub, + "null_counts_sub": null_counts_sub, + "examples_sub": frame_examples_sub, + "see_also_sub": frame_see_also_sub, + "version_added_sub": "", +} + + +series_examples_sub = dedent( + """\ + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> s = pd.Series(text_values, index=int_values) + >>> s.info() + + Int64Index: 5 entries, 1 to 5 + Series name: None + Non-Null Count Dtype + -------------- ----- + 5 non-null object + dtypes: object(1) + memory usage: 80.0+ bytes + + Prints a summary excluding information about its values: + + >>> s.info(verbose=False) + + Int64Index: 5 entries, 1 to 5 + dtypes: object(1) + memory usage: 80.0+ bytes + + Pipe output of Series.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> s.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big Series and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) + >>> s.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Series name: None + Non-Null Count Dtype + -------------- ----- + 1000000 non-null object + dtypes: object(1) + memory usage: 7.6+ MB + + >>> s.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Series name: None + Non-Null Count Dtype + -------------- ----- + 1000000 non-null object + dtypes: object(1) + memory usage: 55.3 MB""" +) + + +series_see_also_sub = dedent( + """\ + Series.describe: Generate descriptive statistics of Series. + Series.memory_usage: Memory usage of Series.""" +) + + +series_sub_kwargs = { + "klass": "Series", + "type_sub": "", + "max_cols_sub": "", + "show_counts_sub": show_counts_sub, + "null_counts_sub": "", + "examples_sub": series_examples_sub, + "see_also_sub": series_see_also_sub, + "version_added_sub": "\n.. versionadded:: 1.4.0\n", +} + + +INFO_DOCSTRING = dedent( + """ + Print a concise summary of a {klass}. + + This method prints information about a {klass} including + the index dtype{type_sub}, non-null values and memory usage. + {version_added_sub}\ + + Parameters + ---------- + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output.\ + {max_cols_sub} + memory_usage : bool, str, optional + Specifies whether total memory usage of the {klass} + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. See the + :ref:`Frequently Asked Questions ` for more + details. + {show_counts_sub}{null_counts_sub} + + Returns + ------- + None + This method prints a summary of a {klass} and returns None. + + See Also + -------- + {see_also_sub} + + Examples + -------- + {examples_sub} + """ +) + + +def _put_str(s: str | Dtype, space: int) -> str: + """ + Make string of specified length, padding to the right if necessary. + + Parameters + ---------- + s : Union[str, Dtype] + String to be formatted. + space : int + Length to force string to be of. + + Returns + ------- + str + String coerced to given length. + + Examples + -------- + >>> pd.io.formats.info._put_str("panda", 6) + 'panda ' + >>> pd.io.formats.info._put_str("panda", 4) + 'pand' + """ + return str(s)[:space].ljust(space) + + +def _sizeof_fmt(num: float, size_qualifier: str) -> str: + """ + Return size in human readable format. + + Parameters + ---------- + num : int + Size in bytes. + size_qualifier : str + Either empty, or '+' (if lower bound). + + Returns + ------- + str + Size in human readable format. + + Examples + -------- + >>> _sizeof_fmt(23028, '') + '22.5 KB' + + >>> _sizeof_fmt(23028, '+') + '22.5+ KB' + """ + for x in ["bytes", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"{num:3.1f}{size_qualifier} {x}" + num /= 1024.0 + return f"{num:3.1f}{size_qualifier} PB" + + +def _initialize_memory_usage( + memory_usage: bool | str | None = None, +) -> bool | str: + """Get memory usage based on inputs and display options.""" + if memory_usage is None: + memory_usage = get_option("display.memory_usage") + return memory_usage + + +class BaseInfo(ABC): + """ + Base class for DataFrameInfo and SeriesInfo. + + Parameters + ---------- + data : DataFrame or Series + Either dataframe or series. + memory_usage : bool or str, optional + If "deep", introspect the data deeply by interrogating object dtypes + for system-level memory consumption, and include it in the returned + values. + """ + + data: DataFrame | Series + memory_usage: bool | str + + @property + @abstractmethod + def dtypes(self) -> Iterable[Dtype]: + """ + Dtypes. + + Returns + ------- + dtypes : sequence + Dtype of each of the DataFrame's columns (or one series column). + """ + + @property + @abstractmethod + def dtype_counts(self) -> Mapping[str, int]: + """Mapping dtype - number of counts.""" + + @property + @abstractmethod + def non_null_counts(self) -> Sequence[int]: + """Sequence of non-null counts for all columns or column (if series).""" + + @property + @abstractmethod + def memory_usage_bytes(self) -> int: + """ + Memory usage in bytes. + + Returns + ------- + memory_usage_bytes : int + Object's total memory usage in bytes. + """ + + @property + def memory_usage_string(self) -> str: + """Memory usage in a form of human readable string.""" + return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" + + @property + def size_qualifier(self) -> str: + size_qualifier = "" + if self.memory_usage: + if self.memory_usage != "deep": + # size_qualifier is just a best effort; not guaranteed to catch + # all cases (e.g., it misses categorical data even with object + # categories) + if ( + "object" in self.dtype_counts + or self.data.index._is_memory_usage_qualified() + ): + size_qualifier = "+" + return size_qualifier + + @abstractmethod + def render( + self, + *, + buf: WriteBuffer[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, + ) -> None: + pass + + +class DataFrameInfo(BaseInfo): + """ + Class storing dataframe-specific info. + """ + + def __init__( + self, + data: DataFrame, + memory_usage: bool | str | None = None, + ) -> None: + self.data: DataFrame = data + self.memory_usage = _initialize_memory_usage(memory_usage) + + @property + def dtype_counts(self) -> Mapping[str, int]: + return _get_dataframe_dtype_counts(self.data) + + @property + def dtypes(self) -> Iterable[Dtype]: + """ + Dtypes. + + Returns + ------- + dtypes + Dtype of each of the DataFrame's columns. + """ + return self.data.dtypes + + @property + def ids(self) -> Index: + """ + Column names. + + Returns + ------- + ids : Index + DataFrame's column names. + """ + return self.data.columns + + @property + def col_count(self) -> int: + """Number of columns to be summarized.""" + return len(self.ids) + + @property + def non_null_counts(self) -> Sequence[int]: + """Sequence of non-null counts for all columns or column (if series).""" + return self.data.count() + + @property + def memory_usage_bytes(self) -> int: + if self.memory_usage == "deep": + deep = True + else: + deep = False + return self.data.memory_usage(index=True, deep=deep).sum() + + def render( + self, + *, + buf: WriteBuffer[str] | None, + max_cols: int | None, + verbose: bool | None, + show_counts: bool | None, + ) -> None: + printer = DataFrameInfoPrinter( + info=self, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + printer.to_buffer(buf) + + +class SeriesInfo(BaseInfo): + """ + Class storing series-specific info. + """ + + def __init__( + self, + data: Series, + memory_usage: bool | str | None = None, + ) -> None: + self.data: Series = data + self.memory_usage = _initialize_memory_usage(memory_usage) + + def render( + self, + *, + buf: WriteBuffer[str] | None = None, + max_cols: int | None = None, + verbose: bool | None = None, + show_counts: bool | None = None, + ) -> None: + if max_cols is not None: + raise ValueError( + "Argument `max_cols` can only be passed " + "in DataFrame.info, not Series.info" + ) + printer = SeriesInfoPrinter( + info=self, + verbose=verbose, + show_counts=show_counts, + ) + printer.to_buffer(buf) + + @property + def non_null_counts(self) -> Sequence[int]: + return [self.data.count()] + + @property + def dtypes(self) -> Iterable[Dtype]: + return [self.data.dtypes] + + @property + def dtype_counts(self) -> Mapping[str, int]: + from pandas.core.frame import DataFrame + + return _get_dataframe_dtype_counts(DataFrame(self.data)) + + @property + def memory_usage_bytes(self) -> int: + """Memory usage in bytes. + + Returns + ------- + memory_usage_bytes : int + Object's total memory usage in bytes. + """ + if self.memory_usage == "deep": + deep = True + else: + deep = False + return self.data.memory_usage(index=True, deep=deep) + + +class InfoPrinterAbstract: + """ + Class for printing dataframe or series info. + """ + + def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: + """Save dataframe info into buffer.""" + table_builder = self._create_table_builder() + lines = table_builder.get_lines() + if buf is None: # pragma: no cover + buf = sys.stdout + fmt.buffer_put_lines(buf, lines) + + @abstractmethod + def _create_table_builder(self) -> TableBuilderAbstract: + """Create instance of table builder.""" + + +class DataFrameInfoPrinter(InfoPrinterAbstract): + """ + Class for printing dataframe info. + + Parameters + ---------- + info : DataFrameInfo + Instance of DataFrameInfo. + max_cols : int, optional + When to switch from the verbose to the truncated output. + verbose : bool, optional + Whether to print the full summary. + show_counts : bool, optional + Whether to show the non-null counts. + """ + + def __init__( + self, + info: DataFrameInfo, + max_cols: int | None = None, + verbose: bool | None = None, + show_counts: bool | None = None, + ) -> None: + self.info = info + self.data = info.data + self.verbose = verbose + self.max_cols = self._initialize_max_cols(max_cols) + self.show_counts = self._initialize_show_counts(show_counts) + + @property + def max_rows(self) -> int: + """Maximum info rows to be displayed.""" + return get_option("display.max_info_rows", len(self.data) + 1) + + @property + def exceeds_info_cols(self) -> bool: + """Check if number of columns to be summarized does not exceed maximum.""" + return bool(self.col_count > self.max_cols) + + @property + def exceeds_info_rows(self) -> bool: + """Check if number of rows to be summarized does not exceed maximum.""" + return bool(len(self.data) > self.max_rows) + + @property + def col_count(self) -> int: + """Number of columns to be summarized.""" + return self.info.col_count + + def _initialize_max_cols(self, max_cols: int | None) -> int: + if max_cols is None: + return get_option("display.max_info_columns", self.col_count + 1) + return max_cols + + def _initialize_show_counts(self, show_counts: bool | None) -> bool: + if show_counts is None: + return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) + else: + return show_counts + + def _create_table_builder(self) -> DataFrameTableBuilder: + """ + Create instance of table builder based on verbosity and display settings. + """ + if self.verbose: + return DataFrameTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + elif self.verbose is False: # specifically set to False, not necessarily None + return DataFrameTableBuilderNonVerbose(info=self.info) + else: + if self.exceeds_info_cols: + return DataFrameTableBuilderNonVerbose(info=self.info) + else: + return DataFrameTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + + +class SeriesInfoPrinter(InfoPrinterAbstract): + """Class for printing series info. + + Parameters + ---------- + info : SeriesInfo + Instance of SeriesInfo. + verbose : bool, optional + Whether to print the full summary. + show_counts : bool, optional + Whether to show the non-null counts. + """ + + def __init__( + self, + info: SeriesInfo, + verbose: bool | None = None, + show_counts: bool | None = None, + ) -> None: + self.info = info + self.data = info.data + self.verbose = verbose + self.show_counts = self._initialize_show_counts(show_counts) + + def _create_table_builder(self) -> SeriesTableBuilder: + """ + Create instance of table builder based on verbosity. + """ + if self.verbose or self.verbose is None: + return SeriesTableBuilderVerbose( + info=self.info, + with_counts=self.show_counts, + ) + else: + return SeriesTableBuilderNonVerbose(info=self.info) + + def _initialize_show_counts(self, show_counts: bool | None) -> bool: + if show_counts is None: + return True + else: + return show_counts + + +class TableBuilderAbstract(ABC): + """ + Abstract builder for info table. + """ + + _lines: list[str] + info: BaseInfo + + @abstractmethod + def get_lines(self) -> list[str]: + """Product in a form of list of lines (strings).""" + + @property + def data(self) -> DataFrame | Series: + return self.info.data + + @property + def dtypes(self) -> Iterable[Dtype]: + """Dtypes of each of the DataFrame's columns.""" + return self.info.dtypes + + @property + def dtype_counts(self) -> Mapping[str, int]: + """Mapping dtype - number of counts.""" + return self.info.dtype_counts + + @property + def display_memory_usage(self) -> bool: + """Whether to display memory usage.""" + return bool(self.info.memory_usage) + + @property + def memory_usage_string(self) -> str: + """Memory usage string with proper size qualifier.""" + return self.info.memory_usage_string + + @property + def non_null_counts(self) -> Sequence[int]: + return self.info.non_null_counts + + def add_object_type_line(self) -> None: + """Add line with string representation of dataframe to the table.""" + self._lines.append(str(type(self.data))) + + def add_index_range_line(self) -> None: + """Add line with range of indices to the table.""" + self._lines.append(self.data.index._summary()) + + def add_dtypes_line(self) -> None: + """Add summary line with dtypes present in dataframe.""" + collected_dtypes = [ + f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) + ] + self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") + + +class DataFrameTableBuilder(TableBuilderAbstract): + """ + Abstract builder for dataframe info table. + + Parameters + ---------- + info : DataFrameInfo. + Instance of DataFrameInfo. + """ + + def __init__(self, *, info: DataFrameInfo) -> None: + self.info: DataFrameInfo = info + + def get_lines(self) -> list[str]: + self._lines = [] + if self.col_count == 0: + self._fill_empty_info() + else: + self._fill_non_empty_info() + return self._lines + + def _fill_empty_info(self) -> None: + """Add lines to the info table, pertaining to empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self._lines.append(f"Empty {type(self.data).__name__}\n") + + @abstractmethod + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + + @property + def data(self) -> DataFrame: + """DataFrame.""" + return self.info.data + + @property + def ids(self) -> Index: + """Dataframe columns.""" + return self.info.ids + + @property + def col_count(self) -> int: + """Number of dataframe columns to be summarized.""" + return self.info.col_count + + def add_memory_usage_line(self) -> None: + """Add line containing memory usage.""" + self._lines.append(f"memory usage: {self.memory_usage_string}") + + +class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): + """ + Dataframe info table builder for non-verbose output. + """ + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_columns_summary_line() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + def add_columns_summary_line(self) -> None: + self._lines.append(self.ids._summary(name="Columns")) + + +class TableBuilderVerboseMixin(TableBuilderAbstract): + """ + Mixin for verbose info output. + """ + + SPACING: str = " " * 2 + strrows: Sequence[Sequence[str]] + gross_column_widths: Sequence[int] + with_counts: bool + + @property + @abstractmethod + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + + @property + def header_column_widths(self) -> Sequence[int]: + """Widths of header columns (only titles).""" + return [len(col) for col in self.headers] + + def _get_gross_column_widths(self) -> Sequence[int]: + """Get widths of columns containing both headers and actual content.""" + body_column_widths = self._get_body_column_widths() + return [ + max(*widths) + for widths in zip(self.header_column_widths, body_column_widths) + ] + + def _get_body_column_widths(self) -> Sequence[int]: + """Get widths of table content columns.""" + strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) + return [max(len(x) for x in col) for col in strcols] + + def _gen_rows(self) -> Iterator[Sequence[str]]: + """ + Generator function yielding rows content. + + Each element represents a row comprising a sequence of strings. + """ + if self.with_counts: + return self._gen_rows_with_counts() + else: + return self._gen_rows_without_counts() + + @abstractmethod + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + + @abstractmethod + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + + def add_header_line(self) -> None: + header_line = self.SPACING.join( + [ + _put_str(header, col_width) + for header, col_width in zip(self.headers, self.gross_column_widths) + ] + ) + self._lines.append(header_line) + + def add_separator_line(self) -> None: + separator_line = self.SPACING.join( + [ + _put_str("-" * header_colwidth, gross_colwidth) + for header_colwidth, gross_colwidth in zip( + self.header_column_widths, self.gross_column_widths + ) + ] + ) + self._lines.append(separator_line) + + def add_body_lines(self) -> None: + for row in self.strrows: + body_line = self.SPACING.join( + [ + _put_str(col, gross_colwidth) + for col, gross_colwidth in zip(row, self.gross_column_widths) + ] + ) + self._lines.append(body_line) + + def _gen_non_null_counts(self) -> Iterator[str]: + """Iterator with string representation of non-null counts.""" + for count in self.non_null_counts: + yield f"{count} non-null" + + def _gen_dtypes(self) -> Iterator[str]: + """Iterator with string representation of column dtypes.""" + for dtype in self.dtypes: + yield pprint_thing(dtype) + + +class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): + """ + Dataframe info table builder for verbose output. + """ + + def __init__( + self, + *, + info: DataFrameInfo, + with_counts: bool, + ) -> None: + self.info = info + self.with_counts = with_counts + self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) + self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty dataframe.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_columns_summary_line() + self.add_header_line() + self.add_separator_line() + self.add_body_lines() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + @property + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + if self.with_counts: + return [" # ", "Column", "Non-Null Count", "Dtype"] + return [" # ", "Column", "Dtype"] + + def add_columns_summary_line(self) -> None: + self._lines.append(f"Data columns (total {self.col_count} columns):") + + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + yield from zip( + self._gen_line_numbers(), + self._gen_columns(), + self._gen_dtypes(), + ) + + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + yield from zip( + self._gen_line_numbers(), + self._gen_columns(), + self._gen_non_null_counts(), + self._gen_dtypes(), + ) + + def _gen_line_numbers(self) -> Iterator[str]: + """Iterator with string representation of column numbers.""" + for i, _ in enumerate(self.ids): + yield f" {i}" + + def _gen_columns(self) -> Iterator[str]: + """Iterator with string representation of column names.""" + for col in self.ids: + yield pprint_thing(col) + + +class SeriesTableBuilder(TableBuilderAbstract): + """ + Abstract builder for series info table. + + Parameters + ---------- + info : SeriesInfo. + Instance of SeriesInfo. + """ + + def __init__(self, *, info: SeriesInfo) -> None: + self.info: SeriesInfo = info + + def get_lines(self) -> list[str]: + self._lines = [] + self._fill_non_empty_info() + return self._lines + + @property + def data(self) -> Series: + """Series.""" + return self.info.data + + def add_memory_usage_line(self) -> None: + """Add line containing memory usage.""" + self._lines.append(f"memory usage: {self.memory_usage_string}") + + @abstractmethod + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + + +class SeriesTableBuilderNonVerbose(SeriesTableBuilder): + """ + Series info table builder for non-verbose output. + """ + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + +class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): + """ + Series info table builder for verbose output. + """ + + def __init__( + self, + *, + info: SeriesInfo, + with_counts: bool, + ) -> None: + self.info = info + self.with_counts = with_counts + self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) + self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() + + def _fill_non_empty_info(self) -> None: + """Add lines to the info table, pertaining to non-empty series.""" + self.add_object_type_line() + self.add_index_range_line() + self.add_series_name_line() + self.add_header_line() + self.add_separator_line() + self.add_body_lines() + self.add_dtypes_line() + if self.display_memory_usage: + self.add_memory_usage_line() + + def add_series_name_line(self) -> None: + self._lines.append(f"Series name: {self.data.name}") + + @property + def headers(self) -> Sequence[str]: + """Headers names of the columns in verbose table.""" + if self.with_counts: + return ["Non-Null Count", "Dtype"] + return ["Dtype"] + + def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data without counts.""" + yield from self._gen_dtypes() + + def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: + """Iterator with string representation of body data with counts.""" + yield from zip( + self._gen_non_null_counts(), + self._gen_dtypes(), + ) + + +def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: + """ + Create mapping between datatypes and their number of occurrences. + """ + # groupby dtype.name to collect e.g. Categorical columns + return df.dtypes.value_counts().groupby(lambda x: x.name).sum() diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py new file mode 100644 index 00000000..6bf4412b --- /dev/null +++ b/pandas/io/formats/latex.py @@ -0,0 +1,832 @@ +""" +Module for formatting output data in Latex. +""" +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Iterator, + Sequence, +) + +import numpy as np + +from pandas.core.dtypes.generic import ABCMultiIndex + +if TYPE_CHECKING: + from pandas.io.formats.format import DataFrameFormatter + + +def _split_into_full_short_caption( + caption: str | tuple[str, str] | None +) -> tuple[str, str]: + """Extract full and short captions from caption string/tuple. + + Parameters + ---------- + caption : str or tuple, optional + Either table caption string or tuple (full_caption, short_caption). + If string is provided, then it is treated as table full caption, + while short_caption is considered an empty string. + + Returns + ------- + full_caption, short_caption : tuple + Tuple of full_caption, short_caption strings. + """ + if caption: + if isinstance(caption, str): + full_caption = caption + short_caption = "" + else: + try: + full_caption, short_caption = caption + except ValueError as err: + msg = "caption must be either a string or a tuple of two strings" + raise ValueError(msg) from err + else: + full_caption = "" + short_caption = "" + return full_caption, short_caption + + +class RowStringConverter(ABC): + r"""Converter for dataframe rows into LaTeX strings. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + multicolumn: bool, optional + Whether to use \multicolumn macro. + multicolumn_format: str, optional + Multicolumn format. + multirow: bool, optional + Whether to use \multirow macro. + + """ + + def __init__( + self, + formatter: DataFrameFormatter, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + ) -> None: + self.fmt = formatter + self.frame = self.fmt.frame + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.clinebuf: list[list[int]] = [] + self.strcols = self._get_strcols() + self.strrows = list(zip(*self.strcols)) + + def get_strrow(self, row_num: int) -> str: + """Get string representation of the row.""" + row = self.strrows[row_num] + + is_multicol = ( + row_num < self.column_levels and self.fmt.header and self.multicolumn + ) + + is_multirow = ( + row_num >= self.header_levels + and self.fmt.index + and self.multirow + and self.index_levels > 1 + ) + + is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1 + + crow = self._preprocess_row(row) + + if is_multicol: + crow = self._format_multicolumn(crow) + if is_multirow: + crow = self._format_multirow(crow, row_num) + + lst = [] + lst.append(" & ".join(crow)) + lst.append(" \\\\") + if is_cline_maybe_required: + cline = self._compose_cline(row_num, len(self.strcols)) + lst.append(cline) + return "".join(lst) + + @property + def _header_row_num(self) -> int: + """Number of rows in header.""" + return self.header_levels if self.fmt.header else 0 + + @property + def index_levels(self) -> int: + """Integer number of levels in index.""" + return self.frame.index.nlevels + + @property + def column_levels(self) -> int: + return self.frame.columns.nlevels + + @property + def header_levels(self) -> int: + nlevels = self.column_levels + if self.fmt.has_index_names and self.fmt.show_index_names: + nlevels += 1 + return nlevels + + def _get_strcols(self) -> list[list[str]]: + """String representation of the columns.""" + if self.fmt.frame.empty: + strcols = [[self._empty_info_line]] + else: + strcols = self.fmt.get_strcols() + + # reestablish the MultiIndex that has been joined by get_strcols() + if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): + out = self.frame.index.format( + adjoin=False, + sparsify=self.fmt.sparsify, + names=self.fmt.has_index_names, + na_rep=self.fmt.na_rep, + ) + + # index.format will sparsify repeated entries with empty strings + # so pad these with some empty space + def pad_empties(x): + for pad in reversed(x): + if pad: + break + return [x[0]] + [i if i else " " * len(pad) for i in x[1:]] + + gen = (pad_empties(i) for i in out) + + # Add empty spaces for each column level + clevels = self.frame.columns.nlevels + out = [[" " * len(i[-1])] * clevels + i for i in gen] + + # Add the column names to the last index column + cnames = self.frame.columns.names + if any(cnames): + new_names = [i if i else "{}" for i in cnames] + out[self.frame.index.nlevels - 1][:clevels] = new_names + + # Get rid of old multiindex column and add new ones + strcols = out + strcols[1:] + return strcols + + @property + def _empty_info_line(self): + return ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {self.frame.columns}\n" + f"Index: {self.frame.index}" + ) + + def _preprocess_row(self, row: Sequence[str]) -> list[str]: + """Preprocess elements of the row.""" + if self.fmt.escape: + crow = _escape_symbols(row) + else: + crow = [x if x else "{}" for x in row] + if self.fmt.bold_rows and self.fmt.index: + crow = _convert_to_bold(crow, self.index_levels) + return crow + + def _format_multicolumn(self, row: list[str]) -> list[str]: + r""" + Combine columns belonging to a group to a single multicolumn entry + according to self.multicolumn_format + + e.g.: + a & & & b & c & + will become + \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} + """ + row2 = row[: self.index_levels] + ncol = 1 + coltext = "" + + def append_col(): + # write multicolumn if needed + if ncol > 1: + row2.append( + f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}" + f"{{{coltext.strip()}}}" + ) + # don't modify where not needed + else: + row2.append(coltext) + + for c in row[self.index_levels :]: + # if next col has text, write the previous + if c.strip(): + if coltext: + append_col() + coltext = c + ncol = 1 + # if not, add it to the previous multicolumn + else: + ncol += 1 + # write last column name + if coltext: + append_col() + return row2 + + def _format_multirow(self, row: list[str], i: int) -> list[str]: + r""" + Check following rows, whether row should be a multirow + + e.g.: becomes: + a & 0 & \multirow{2}{*}{a} & 0 & + & 1 & & 1 & + b & 0 & \cline{1-2} + b & 0 & + """ + for j in range(self.index_levels): + if row[j].strip(): + nrow = 1 + for r in self.strrows[i + 1 :]: + if not r[j].strip(): + nrow += 1 + else: + break + if nrow > 1: + # overwrite non-multirow entry + row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}" + # save when to end the current block with \cline + self.clinebuf.append([i + nrow - 1, j + 1]) + return row + + def _compose_cline(self, i: int, icol: int) -> str: + """ + Create clines after multirow-blocks are finished. + """ + lst = [] + for cl in self.clinebuf: + if cl[0] == i: + lst.append(f"\n\\cline{{{cl[1]:d}-{icol:d}}}") + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + return "".join(lst) + + +class RowStringIterator(RowStringConverter): + """Iterator over rows of the header or the body of the table.""" + + @abstractmethod + def __iter__(self) -> Iterator[str]: + """Iterate over LaTeX string representations of rows.""" + + +class RowHeaderIterator(RowStringIterator): + """Iterator for the table header rows.""" + + def __iter__(self) -> Iterator[str]: + for row_num in range(len(self.strrows)): + if row_num < self._header_row_num: + yield self.get_strrow(row_num) + + +class RowBodyIterator(RowStringIterator): + """Iterator for the table body rows.""" + + def __iter__(self) -> Iterator[str]: + for row_num in range(len(self.strrows)): + if row_num >= self._header_row_num: + yield self.get_strrow(row_num) + + +class TableBuilderAbstract(ABC): + """ + Abstract table builder producing string representation of LaTeX table. + + Parameters + ---------- + formatter : `DataFrameFormatter` + Instance of `DataFrameFormatter`. + column_format: str, optional + Column format, for example, 'rcl' for three columns. + multicolumn: bool, optional + Use multicolumn to enhance MultiIndex columns. + multicolumn_format: str, optional + The alignment for multicolumns, similar to column_format. + multirow: bool, optional + Use multirow to enhance MultiIndex rows. + caption: str, optional + Table caption. + short_caption: str, optional + Table short caption. + label: str, optional + LaTeX label. + position: str, optional + Float placement specifier, for example, 'htb'. + """ + + def __init__( + self, + formatter: DataFrameFormatter, + column_format: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | None = None, + short_caption: str | None = None, + label: str | None = None, + position: str | None = None, + ) -> None: + self.fmt = formatter + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.short_caption = short_caption + self.label = label + self.position = position + + def get_result(self) -> str: + """String representation of LaTeX table.""" + elements = [ + self.env_begin, + self.top_separator, + self.header, + self.middle_separator, + self.env_body, + self.bottom_separator, + self.env_end, + ] + result = "\n".join([item for item in elements if item]) + trailing_newline = "\n" + result += trailing_newline + return result + + @property + @abstractmethod + def env_begin(self) -> str: + """Beginning of the environment.""" + + @property + @abstractmethod + def top_separator(self) -> str: + """Top level separator.""" + + @property + @abstractmethod + def header(self) -> str: + """Header lines.""" + + @property + @abstractmethod + def middle_separator(self) -> str: + """Middle level separator.""" + + @property + @abstractmethod + def env_body(self) -> str: + """Environment body.""" + + @property + @abstractmethod + def bottom_separator(self) -> str: + """Bottom level separator.""" + + @property + @abstractmethod + def env_end(self) -> str: + """End of the environment.""" + + +class GenericTableBuilder(TableBuilderAbstract): + """Table builder producing string representation of LaTeX table.""" + + @property + def header(self) -> str: + iterator = self._create_row_iterator(over="header") + return "\n".join(list(iterator)) + + @property + def top_separator(self) -> str: + return "\\toprule" + + @property + def middle_separator(self) -> str: + return "\\midrule" if self._is_separator_required() else "" + + @property + def env_body(self) -> str: + iterator = self._create_row_iterator(over="body") + return "\n".join(list(iterator)) + + def _is_separator_required(self) -> bool: + return bool(self.header and self.env_body) + + @property + def _position_macro(self) -> str: + r"""Position macro, extracted from self.position, like [h].""" + return f"[{self.position}]" if self.position else "" + + @property + def _caption_macro(self) -> str: + r"""Caption macro, extracted from self.caption. + + With short caption: + \caption[short_caption]{caption_string}. + + Without short caption: + \caption{caption_string}. + """ + if self.caption: + return "".join( + [ + r"\caption", + f"[{self.short_caption}]" if self.short_caption else "", + f"{{{self.caption}}}", + ] + ) + return "" + + @property + def _label_macro(self) -> str: + r"""Label macro, extracted from self.label, like \label{ref}.""" + return f"\\label{{{self.label}}}" if self.label else "" + + def _create_row_iterator(self, over: str) -> RowStringIterator: + """Create iterator over header or body of the table. + + Parameters + ---------- + over : {'body', 'header'} + Over what to iterate. + + Returns + ------- + RowStringIterator + Iterator over body or header. + """ + iterator_kind = self._select_iterator(over) + return iterator_kind( + formatter=self.fmt, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + ) + + def _select_iterator(self, over: str) -> type[RowStringIterator]: + """Select proper iterator over table rows.""" + if over == "header": + return RowHeaderIterator + elif over == "body": + return RowBodyIterator + else: + msg = f"'over' must be either 'header' or 'body', but {over} was provided" + raise ValueError(msg) + + +class LongTableBuilder(GenericTableBuilder): + """Concrete table builder for longtable. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = LongTableBuilder(formatter, caption='a long table', + ... label='tab:long', column_format='lrl') + >>> table = builder.get_result() + >>> print(table) + \\begin{longtable}{lrl} + \\caption{a long table} + \\label{tab:long}\\\\ + \\toprule + {} & a & b \\\\ + \\midrule + \\endfirsthead + \\caption[]{a long table} \\\\ + \\toprule + {} & a & b \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{3}{r}{{Continued on next page}} \\\\ + \\midrule + \\endfoot + + \\bottomrule + \\endlastfoot + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\end{longtable} + + """ + + @property + def env_begin(self) -> str: + first_row = ( + f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}" + ) + elements = [first_row, f"{self._caption_and_label()}"] + return "\n".join([item for item in elements if item]) + + def _caption_and_label(self) -> str: + if self.caption or self.label: + double_backslash = "\\\\" + elements = [f"{self._caption_macro}", f"{self._label_macro}"] + caption_and_label = "\n".join([item for item in elements if item]) + caption_and_label += double_backslash + return caption_and_label + else: + return "" + + @property + def middle_separator(self) -> str: + iterator = self._create_row_iterator(over="header") + + # the content between \endfirsthead and \endhead commands + # mitigates repeated List of Tables entries in the final LaTeX + # document when dealing with longtable environments; GH #34360 + elements = [ + "\\midrule", + "\\endfirsthead", + f"\\caption[]{{{self.caption}}} \\\\" if self.caption else "", + self.top_separator, + self.header, + "\\midrule", + "\\endhead", + "\\midrule", + f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}" + "{{Continued on next page}} \\\\", + "\\midrule", + "\\endfoot\n", + "\\bottomrule", + "\\endlastfoot", + ] + if self._is_separator_required(): + return "\n".join(elements) + return "" + + @property + def bottom_separator(self) -> str: + return "" + + @property + def env_end(self) -> str: + return "\\end{longtable}" + + +class RegularTableBuilder(GenericTableBuilder): + """Concrete table builder for regular table. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = RegularTableBuilder(formatter, caption='caption', label='lab', + ... column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{table} + \\centering + \\caption{caption} + \\label{lab} + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + \\end{table} + + """ + + @property + def env_begin(self) -> str: + elements = [ + f"\\begin{{table}}{self._position_macro}", + "\\centering", + f"{self._caption_macro}", + f"{self._label_macro}", + f"\\begin{{tabular}}{{{self.column_format}}}", + ] + return "\n".join([item for item in elements if item]) + + @property + def bottom_separator(self) -> str: + return "\\bottomrule" + + @property + def env_end(self) -> str: + return "\n".join(["\\end{tabular}", "\\end{table}"]) + + +class TabularBuilder(GenericTableBuilder): + """Concrete table builder for tabular environment. + + >>> from pandas.io.formats import format as fmt + >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + >>> formatter = fmt.DataFrameFormatter(df) + >>> builder = TabularBuilder(formatter, column_format='lrc') + >>> table = builder.get_result() + >>> print(table) + \\begin{tabular}{lrc} + \\toprule + {} & a & b \\\\ + \\midrule + 0 & 1 & b1 \\\\ + 1 & 2 & b2 \\\\ + \\bottomrule + \\end{tabular} + + """ + + @property + def env_begin(self) -> str: + return f"\\begin{{tabular}}{{{self.column_format}}}" + + @property + def bottom_separator(self) -> str: + return "\\bottomrule" + + @property + def env_end(self) -> str: + return "\\end{tabular}" + + +class LatexFormatter: + r""" + Used to render a DataFrame to a LaTeX tabular/longtable environment output. + + Parameters + ---------- + formatter : `DataFrameFormatter` + longtable : bool, default False + Use longtable environment. + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns + multicolumn : bool, default False + Use \multicolumn to enhance MultiIndex columns. + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. + caption : str or tuple, optional + Tuple (full_caption, short_caption), + which results in \caption[short_caption]{full_caption}; + if a single string is passed, no short caption will be set. + label : str, optional + The LaTeX label to be placed inside ``\label{}`` in the output. + position : str, optional + The LaTeX positional argument for tables, to be placed after + ``\begin{}`` in the output. + + See Also + -------- + HTMLFormatter + """ + + def __init__( + self, + formatter: DataFrameFormatter, + longtable: bool = False, + column_format: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | tuple[str, str] | None = None, + label: str | None = None, + position: str | None = None, + ) -> None: + self.fmt = formatter + self.frame = self.fmt.frame + self.longtable = longtable + self.column_format = column_format + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption, self.short_caption = _split_into_full_short_caption(caption) + self.label = label + self.position = position + + def to_string(self) -> str: + """ + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. + """ + return self.builder.get_result() + + @property + def builder(self) -> TableBuilderAbstract: + """Concrete table builder. + + Returns + ------- + TableBuilder + """ + builder = self._select_builder() + return builder( + formatter=self.fmt, + column_format=self.column_format, + multicolumn=self.multicolumn, + multicolumn_format=self.multicolumn_format, + multirow=self.multirow, + caption=self.caption, + short_caption=self.short_caption, + label=self.label, + position=self.position, + ) + + def _select_builder(self) -> type[TableBuilderAbstract]: + """Select proper table builder.""" + if self.longtable: + return LongTableBuilder + if any([self.caption, self.label, self.position]): + return RegularTableBuilder + return TabularBuilder + + @property + def column_format(self) -> str | None: + """Column format.""" + return self._column_format + + @column_format.setter + def column_format(self, input_column_format: str | None) -> None: + """Setter for column format.""" + if input_column_format is None: + self._column_format = ( + self._get_index_format() + self._get_column_format_based_on_dtypes() + ) + elif not isinstance(input_column_format, str): + raise ValueError( + f"column_format must be str or unicode, " + f"not {type(input_column_format)}" + ) + else: + self._column_format = input_column_format + + def _get_column_format_based_on_dtypes(self) -> str: + """Get column format based on data type. + + Right alignment for numbers and left - for strings. + """ + + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + return "l" + + dtypes = self.frame.dtypes._values + return "".join(map(get_col_type, dtypes)) + + def _get_index_format(self) -> str: + """Get index column format.""" + return "l" * self.frame.index.nlevels if self.fmt.index else "" + + +def _escape_symbols(row: Sequence[str]) -> list[str]: + """Carry out string replacements for special symbols. + + Parameters + ---------- + row : list + List of string, that may contain special symbols. + + Returns + ------- + list + list of strings with the special symbols replaced. + """ + return [ + ( + x.replace("\\", "\\textbackslash ") + .replace("_", "\\_") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~", "\\textasciitilde ") + .replace("^", "\\textasciicircum ") + .replace("&", "\\&") + if (x and x != "{}") + else "{}" + ) + for x in row + ] + + +def _convert_to_bold(crow: Sequence[str], ilevels: int) -> list[str]: + """Convert elements in ``crow`` to bold.""" + return [ + f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x + for j, x in enumerate(crow) + ] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py new file mode 100644 index 00000000..abb341f6 --- /dev/null +++ b/pandas/io/formats/printing.py @@ -0,0 +1,515 @@ +""" +Printing tools. +""" +from __future__ import annotations + +import sys +from typing import ( + Any, + Callable, + Dict, + Iterable, + Mapping, + Sequence, + TypeVar, + Union, +) + +from pandas._config import get_option + +from pandas.core.dtypes.inference import is_sequence + +EscapeChars = Union[Mapping[str, str], Iterable[str]] +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + +def adjoin(space: int, *lists: list[str], **kwargs) -> str: + """ + Glues together two sets of strings using the amount of space requested. + The idea is to prettify. + + ---------- + space : int + number of spaces for padding + lists : str + list of str which being joined + strlen : callable + function used to calculate the length of each str. Needed for unicode + handling. + justfunc : callable + function used to justify str. Needed for unicode handling. + """ + strlen = kwargs.pop("strlen", len) + justfunc = kwargs.pop("justfunc", justify) + + out_lines = [] + newLists = [] + lengths = [max(map(strlen, x)) + space for x in lists[:-1]] + # not the last one + lengths.append(max(map(len, lists[-1]))) + maxLen = max(map(len, lists)) + for i, lst in enumerate(lists): + nl = justfunc(lst, lengths[i], mode="left") + nl.extend([" " * lengths[i]] * (maxLen - len(lst))) + newLists.append(nl) + toJoin = zip(*newLists) + for lines in toJoin: + out_lines.append("".join(lines)) + return "\n".join(out_lines) + + +def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]: + """ + Perform ljust, center, rjust against string or list-like + """ + if mode == "left": + return [x.ljust(max_len) for x in texts] + elif mode == "center": + return [x.center(max_len) for x in texts] + else: + return [x.rjust(max_len) for x in texts] + + +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather than rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq( + seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather than calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = "{{{body}}}" + else: + fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + # handle sets, no slicing + r = [ + pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq))) + ] + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += "," + + return fmt.format(body=body) + + +def _pprint_dict( + seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather than calling this directly. + """ + fmt = "{{{things}}}" + pairs = [] + + pfmt = "{key}: {val}" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + ) + ) + + if nitems < len(seq): + return fmt.format(things=", ".join(pairs) + ", ...") + else: + return fmt.format(things=", ".join(pairs)) + + +def pprint_thing( + thing: Any, + _nest_lvl: int = 0, + escape_chars: EscapeChars | None = None, + default_escapes: bool = False, + quote_strings: bool = False, + max_seq_items: int | None = None, +) -> str: + """ + This function is the sanctioned way of converting objects + to a string representation and properly handles nested sequences. + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : int or None, default None + Pass through to other pretty printers to limit sequence printing + + Returns + ------- + str + """ + + def as_escaped_string( + thing: Any, escape_chars: EscapeChars | None = escape_chars + ) -> str: + translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or () + + result = str(thing) + for c in escape_chars: + result = result.replace(c, translate[c]) + return result + + if hasattr(thing, "__next__"): + return str(thing) + elif isinstance(thing, dict) and _nest_lvl < get_option( + "display.pprint_nest_depth" + ): + result = _pprint_dict( + thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items + ) + elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): + result = _pprint_seq( + thing, + _nest_lvl, + escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items, + ) + elif isinstance(thing, str) and quote_strings: + result = f"'{as_escaped_string(thing)}'" + else: + result = as_escaped_string(thing) + + return result + + +def pprint_thing_encoded( + object, encoding: str = "utf-8", errors: str = "replace" +) -> bytes: + value = pprint_thing(object) # get unicode representation of object + return value.encode(encoding, errors) + + +def enable_data_resource_formatter(enable: bool) -> None: + if "IPython" not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + if enable: + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + from traitlets import ObjectName + + class TableSchemaFormatter(BaseFormatter): + print_method = ObjectName("_repr_data_resource_") + # Incompatible types in assignment (expression has type + # "Tuple[Type[Dict[Any, Any]]]", base class "BaseFormatter" + # defined the type as "Type[str]") + _return_type = (dict,) # type: ignore[assignment] + + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False + + +def default_pprint(thing: Any, max_seq_items: int | None = None) -> str: + return pprint_thing( + thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=True, + max_seq_items=max_seq_items, + ) + + +def format_object_summary( + obj, + formatter: Callable, + is_justify: bool = True, + name: str | None = None, + indent_for_name: bool = True, + line_break_each_value: bool = False, +) -> str: + """ + Return the formatted obj as a unicode string + + Parameters + ---------- + obj : object + must be iterable and support __getitem__ + formatter : callable + string formatter for an element + is_justify : bool + should justify the display + name : name, optional + defaults to the class name of the obj + indent_for_name : bool, default True + Whether subsequent lines should be indented to + align with the name. + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width. + + .. versionadded:: 0.25.0 + + Returns + ------- + summary string + """ + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import get_adjustment + + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option("display.width") or 80 + if name is None: + name = type(obj).__name__ + + if indent_for_name: + name_len = len(name) + space1 = f'\n{(" " * (name_len + 1))}' + space2 = f'\n{(" " * (name_len + 2))}' + else: + space1 = "\n" + space2 = "\n " # space for the opening '[' + + n = len(obj) + if line_break_each_value: + # If we want to vertically align on each value of obj, we need to + # separate values by a line break and indent the values + sep = ",\n " + " " * len(name) + else: + sep = "," + max_seq_items = get_option("display.max_seq_items") or n + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionally handle unicode eastern asian width + adj = get_adjustment() + + def _extend_line( + s: str, line: str, value: str, display_width: int, next_line_prefix: str + ) -> tuple[str, str]: + + if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values: list[str]) -> int: + if values: + return max(adj.len(x) for x in values) + else: + return 0 + + close = ", " + + if n == 0: + summary = f"[]{close}" + elif n == 1 and not line_break_each_value: + first = formatter(obj[0]) + summary = f"[{first}]{close}" + elif n == 2 and not line_break_each_value: + first = formatter(obj[0]) + last = formatter(obj[-1]) + summary = f"[{first}, {last}]{close}" + else: + + if max_seq_items == 1: + # If max_seq_items=1 show only last element + head = [] + tail = [formatter(x) for x in obj[-1:]] + elif n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in obj[:n]] + tail = [formatter(x) for x in obj[-n:]] + else: + head = [] + tail = [formatter(x) for x in obj] + + # adjust all values to max length if needed + if is_justify: + if line_break_each_value: + # Justify each string in the values of head and tail, so the + # strings will right align when head and tail are stacked + # vertically. + head, tail = _justify(head, tail) + elif is_truncated or not ( + len(", ".join(head)) < display_width + and len(", ".join(tail)) < display_width + ): + # Each string in head and tail should align with each other + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + # If we are not truncated and we are only a single + # line, then don't justify + + if line_break_each_value: + # Now head and tail are of type List[Tuple[str]]. Below we + # convert them into List[str], so there will be one string per + # value. Also truncate items horizontally if wider than + # max_space + max_space = display_width - len(space2) + value = tail[0] + for max_items in reversed(range(1, len(value) + 1)): + pprinted_seq = _pprint_seq(value, max_seq_items=max_items) + if len(pprinted_seq) < max_space: + break + head = [_pprint_seq(x, max_seq_items=max_items) for x in head] + tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] + + summary = "" + line = space2 + + for max_items in range(len(head)): + word = head[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + "..." + line = space2 + + for max_items in range(len(tail) - 1): + word = tail[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) + summary += line + + # right now close is either '' or ', ' + # Now we want to include the ']', but not the maybe space. + close = "]" + close.rstrip(" ") + summary += close + + if len(summary) > (display_width) or line_break_each_value: + summary += space1 + else: # one row + summary += " " + + # remove initial space + summary = "[" + summary[len(space2) :] + + return summary + + +def _justify( + head: list[Sequence[str]], tail: list[Sequence[str]] +) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]: + """ + Justify items in head and tail, so they are right-aligned when stacked. + + Parameters + ---------- + head : list-like of list-likes of strings + tail : list-like of list-likes of strings + + Returns + ------- + tuple of list of tuples of strings + Same as head and tail, but items are right aligned when stacked + vertically. + + Examples + -------- + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) + """ + combined = head + tail + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) + for inner_seq in combined: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + + # justify each item in each list-like in head and tail using max_length + head = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head + ] + tail = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail + ] + # https://github.com/python/mypy/issues/4975 + # error: Incompatible return value type (got "Tuple[List[Sequence[str]], + # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]], + # List[Tuple[str, ...]]]") + return head, tail # type: ignore[return-value] + + +class PrettyDict(Dict[_KT, _VT]): + """Dict extension to support abbreviated __repr__""" + + def __repr__(self) -> str: + return pprint_thing(self) diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py new file mode 100644 index 00000000..071afc05 --- /dev/null +++ b/pandas/io/formats/string.py @@ -0,0 +1,212 @@ +""" +Module for formatting output data in console (to string). +""" +from __future__ import annotations + +from shutil import get_terminal_size +from typing import ( + TYPE_CHECKING, + Iterable, +) + +import numpy as np + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas.io.formats.format import DataFrameFormatter + + +class StringFormatter: + """Formatter for string representation of a dataframe.""" + + def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None: + self.fmt = fmt + self.adj = fmt.adj + self.frame = fmt.frame + self.line_width = line_width + + def to_string(self) -> str: + text = self._get_string_representation() + if self.fmt.should_show_dimensions: + text = "".join([text, self.fmt.dimensions_info]) + return text + + def _get_strcols(self) -> list[list[str]]: + strcols = self.fmt.get_strcols() + if self.fmt.is_truncated: + strcols = self._insert_dot_separators(strcols) + return strcols + + def _get_string_representation(self) -> str: + if self.fmt.frame.empty: + return self._empty_info_line + + strcols = self._get_strcols() + + if self.line_width is None: + # no need to wrap around just print the whole frame + return self.adj.adjoin(1, *strcols) + + if self._need_to_wrap_around: + return self._join_multiline(strcols) + + return self._fit_strcols_to_terminal_width(strcols) + + @property + def _empty_info_line(self) -> str: + return ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {pprint_thing(self.frame.columns)}\n" + f"Index: {pprint_thing(self.frame.index)}" + ) + + @property + def _need_to_wrap_around(self) -> bool: + return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0) + + def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]: + str_index = self.fmt._get_formatted_index(self.fmt.tr_frame) + index_length = len(str_index) + + if self.fmt.is_truncated_horizontally: + strcols = self._insert_dot_separator_horizontal(strcols, index_length) + + if self.fmt.is_truncated_vertically: + strcols = self._insert_dot_separator_vertical(strcols, index_length) + + return strcols + + @property + def _adjusted_tr_col_num(self) -> int: + return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num + + def _insert_dot_separator_horizontal( + self, strcols: list[list[str]], index_length: int + ) -> list[list[str]]: + strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length) + return strcols + + def _insert_dot_separator_vertical( + self, strcols: list[list[str]], index_length: int + ) -> list[list[str]]: + n_header_rows = index_length - len(self.fmt.tr_frame) + row_num = self.fmt.tr_row_num + for ix, col in enumerate(strcols): + cwidth = self.adj.len(col[row_num]) + + if self.fmt.is_truncated_horizontally: + is_dot_col = ix == self._adjusted_tr_col_num + else: + is_dot_col = False + + if cwidth > 3 or is_dot_col: + dots = "..." + else: + dots = ".." + + if ix == 0 and self.fmt.index: + dot_mode = "left" + elif is_dot_col: + cwidth = 4 + dot_mode = "right" + else: + dot_mode = "right" + + dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0] + col.insert(row_num + n_header_rows, dot_str) + return strcols + + def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: + lwidth = self.line_width + adjoin_width = 1 + strcols = list(strcols_input) + + if self.fmt.index: + idx = strcols.pop(0) + lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width + + col_widths = [ + np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 + for col in strcols + ] + + assert lwidth is not None + col_bins = _binify(col_widths, lwidth) + nbins = len(col_bins) + + if self.fmt.is_truncated_vertically: + assert self.fmt.max_rows_fitted is not None + nrows = self.fmt.max_rows_fitted + 1 + else: + nrows = len(self.frame) + + str_lst = [] + start = 0 + for i, end in enumerate(col_bins): + row = strcols[start:end] + if self.fmt.index: + row.insert(0, idx) + if nbins > 1: + if end <= len(strcols) and i < nbins - 1: + row.append([" \\"] + [" "] * (nrows - 1)) + else: + row.append([" "] * nrows) + str_lst.append(self.adj.adjoin(adjoin_width, *row)) + start = end + return "\n\n".join(str_lst) + + def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str: + from pandas import Series + + lines = self.adj.adjoin(1, *strcols).split("\n") + max_len = Series(lines).str.len().max() + # plus truncate dot col + width, _ = get_terminal_size() + dif = max_len - width + # '+ 1' to avoid too wide repr (GH PR #17023) + adj_dif = dif + 1 + col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) + n_cols = len(col_lens) + counter = 0 + while adj_dif > 0 and n_cols > 1: + counter += 1 + mid = round(n_cols / 2) + mid_ix = col_lens.index[mid] + col_len = col_lens[mid_ix] + # adjoin adds one + adj_dif -= col_len + 1 + col_lens = col_lens.drop(mid_ix) + n_cols = len(col_lens) + + # subtract index column + max_cols_fitted = n_cols - self.fmt.index + # GH-21180. Ensure that we print at least two. + max_cols_fitted = max(max_cols_fitted, 2) + self.fmt.max_cols_fitted = max_cols_fitted + + # Call again _truncate to cut frame appropriately + # and then generate string representation + self.fmt.truncate() + strcols = self._get_strcols() + return self.adj.adjoin(1, *strcols) + + +def _binify(cols: list[int], line_width: int) -> list[int]: + adjoin_width = 1 + bins = [] + curr_width = 0 + i_last_column = len(cols) - 1 + for i, w in enumerate(cols): + w_adjoined = w + adjoin_width + curr_width += w_adjoined + if i_last_column == i: + wrap = curr_width + 1 > line_width and i > 0 + else: + wrap = curr_width + 2 > line_width and i > 0 + if wrap: + bins.append(i) + curr_width = w_adjoined + + bins.append(len(cols)) + return bins diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py new file mode 100644 index 00000000..59c586e0 --- /dev/null +++ b/pandas/io/formats/style.py @@ -0,0 +1,4258 @@ +""" +Module for applying conditional formatting to DataFrames and Series. +""" +from __future__ import annotations + +from contextlib import contextmanager +import copy +from functools import partial +import operator +from typing import ( + Any, + Callable, + Hashable, + Sequence, + overload, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +from pandas._typing import ( + Axis, + FilePath, + IndexLabel, + Level, + QuantileInterpolation, + Scalar, + StorageOptions, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import ( + Substitution, + doc, +) +from pandas.util._exceptions import find_stack_level + +import pandas as pd +from pandas import ( + IndexSlice, + RangeIndex, +) +import pandas.core.common as com +from pandas.core.frame import ( + DataFrame, + Series, +) +from pandas.core.generic import NDFrame +from pandas.core.shared_docs import _shared_docs + +from pandas.io.formats.format import save_to_buffer + +jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") + +from pandas.io.formats.style_render import ( + CSSProperties, + CSSStyles, + ExtFormatter, + StylerRenderer, + Subset, + Tooltips, + format_table_styles, + maybe_convert_css_to_tuples, + non_reducing_slice, + refactor_levels, +) + +try: + import matplotlib as mpl + import matplotlib.pyplot as plt + + has_mpl = True +except ImportError: + has_mpl = False + no_mpl_message = "{0} requires matplotlib." + + +@contextmanager +def _mpl(func: Callable): + if has_mpl: + yield plt, mpl + else: + raise ImportError(no_mpl_message.format(func.__name__)) + + +#### +# Shared Doc Strings + +subset = """subset : label, array-like, IndexSlice, optional + A valid 2d input to `DataFrame.loc[]`, or, in the case of a 1d input + or single key, to `DataFrame.loc[:, ]` where the columns are + prioritised, to limit ``data`` to *before* applying the function.""" + +props = """props : str, default None + CSS properties to use for highlighting. If ``props`` is given, ``color`` + is not used.""" + +color = """color : str, default 'yellow' + Background color to use for highlighting.""" + +buf = """buf : str, path object, file-like object, optional + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If ``None``, the result is + returned as a string.""" + +encoding = """encoding : str, optional + Character encoding setting for file output (and meta tags if available). + Defaults to ``pandas.options.styler.render.encoding`` value of "utf-8".""" + +# +### + + +class Styler(StylerRenderer): + r""" + Helps style a DataFrame or Series according to the data with HTML and CSS. + + Parameters + ---------- + data : Series or DataFrame + Data to be styled - either a Series or DataFrame. + precision : int, optional + Precision to round floats to. If not given defaults to + ``pandas.options.styler.format.precision``. + + .. versionchanged:: 1.4.0 + table_styles : list-like, default None + List of {selector: (attr, value)} dicts; see Notes. + uuid : str, default None + A unique identifier to avoid CSS collisions; generated automatically. + caption : str, tuple, default None + String caption to attach to the table. Tuple only used for LaTeX dual captions. + table_attributes : str, default None + Items that show up in the opening ``
    ) elements that will be rendered before + trimming will occur over columns, rows or both if needed. +""" + +styler_max_rows = """ +: int, optional + The maximum number of rows that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + +styler_max_columns = """ +: int, optional + The maximum number of columns that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + +styler_precision = """ +: int + The precision for floats and complex numbers. +""" + +styler_decimal = """ +: str + The character representation for the decimal separator for floats and complex. +""" + +styler_thousands = """ +: str, optional + The character representation for thousands separator for floats, int and complex. +""" + +styler_na_rep = """ +: str, optional + The string representation for values identified as missing. +""" + +styler_escape = """ +: str, optional + Whether to escape certain characters according to the given context; html or latex. +""" + +styler_formatter = """ +: str, callable, dict, optional + A formatter object to be used as default within ``Styler.format``. +""" + +styler_multirow_align = """ +: {"c", "t", "b"} + The specifier for vertical alignment of sparsified LaTeX multirows. +""" + +styler_multicol_align = r""" +: {"r", "c", "l", "naive-l", "naive-r"} + The specifier for horizontal alignment of sparsified LaTeX multicolumns. Pipe + decorators can also be added to non-naive values to draw vertical + rules, e.g. "\|r" will draw a rule on the left side of right aligned merged cells. +""" + +styler_hrules = """ +: bool + Whether to add horizontal rules on top and bottom and below the headers. +""" + +styler_environment = """ +: str + The environment to replace ``\\begin{table}``. If "longtable" is used results + in a specific longtable environment format. +""" + +styler_encoding = """ +: str + The encoding used for output HTML and LaTeX files. +""" + +styler_mathjax = """ +: bool + If False will render special CSS classes to table attributes that indicate Mathjax + will not be used in Jupyter Notebook. +""" + +with cf.config_prefix("styler"): + cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=is_bool) + + cf.register_option( + "sparse.columns", True, styler_sparse_columns_doc, validator=is_bool + ) + + cf.register_option( + "render.repr", + "html", + styler_render_repr, + validator=is_one_of_factory(["html", "latex"]), + ) + + cf.register_option( + "render.max_elements", + 2**18, + styler_max_elements, + validator=is_nonnegative_int, + ) + + cf.register_option( + "render.max_rows", + None, + styler_max_rows, + validator=is_nonnegative_int, + ) + + cf.register_option( + "render.max_columns", + None, + styler_max_columns, + validator=is_nonnegative_int, + ) + + cf.register_option("render.encoding", "utf-8", styler_encoding, validator=is_str) + + cf.register_option("format.decimal", ".", styler_decimal, validator=is_str) + + cf.register_option( + "format.precision", 6, styler_precision, validator=is_nonnegative_int + ) + + cf.register_option( + "format.thousands", + None, + styler_thousands, + validator=is_instance_factory([type(None), str]), + ) + + cf.register_option( + "format.na_rep", + None, + styler_na_rep, + validator=is_instance_factory([type(None), str]), + ) + + cf.register_option( + "format.escape", + None, + styler_escape, + validator=is_one_of_factory([None, "html", "latex"]), + ) + + cf.register_option( + "format.formatter", + None, + styler_formatter, + validator=is_instance_factory([type(None), dict, Callable, str]), + ) + + cf.register_option("html.mathjax", True, styler_mathjax, validator=is_bool) + + cf.register_option( + "latex.multirow_align", + "c", + styler_multirow_align, + validator=is_one_of_factory(["c", "t", "b", "naive"]), + ) + + val_mca = ["r", "|r|", "|r", "r|", "c", "|c|", "|c", "c|", "l", "|l|", "|l", "l|"] + val_mca += ["naive-l", "naive-r"] + cf.register_option( + "latex.multicol_align", + "r", + styler_multicol_align, + validator=is_one_of_factory(val_mca), + ) + + cf.register_option("latex.hrules", False, styler_hrules, validator=is_bool) + + cf.register_option( + "latex.environment", + None, + styler_environment, + validator=is_instance_factory([type(None), str]), + ) diff --git a/pandas/core/construction.py b/pandas/core/construction.py new file mode 100644 index 00000000..4b63d492 --- /dev/null +++ b/pandas/core/construction.py @@ -0,0 +1,917 @@ +""" +Constructor functions intended to be shared by pd.array, Series.__init__, +and Index.__new__. + +These should not depend on core.internals. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Sequence, + Union, + cast, + overload, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib +from pandas._libs.tslibs.period import Period +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, + T, +) +from pandas.errors import IntCastingNaNError +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import ( + ExtensionDtype, + _registry as registry, +) +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + construct_1d_object_array_from_listlike, + maybe_cast_to_datetime, + maybe_cast_to_integer_array, + maybe_convert_platform, + maybe_infer_to_datetimelike, + maybe_upcast, + sanitize_to_nanoseconds, +) +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_timedelta64_ns_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCPandasArray, + ABCRangeIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.common as com + +if TYPE_CHECKING: + from pandas import ( + ExtensionArray, + Index, + Series, + ) + + +def array( + data: Sequence[object] | AnyArrayLike, + dtype: Dtype | None = None, + copy: bool = True, +) -> ExtensionArray: + """ + Create an array. + + Parameters + ---------- + data : Sequence of objects + The scalars inside `data` should be instances of the + scalar type for `dtype`. It's expected that `data` + represents a 1-dimensional array of data. + + When `data` is an Index or Series, the underlying array + will be extracted from `data`. + + dtype : str, np.dtype, or ExtensionDtype, optional + The dtype to use for the array. This may be a NumPy + dtype or an extension type registered with pandas using + :meth:`pandas.api.extensions.register_extension_dtype`. + + If not specified, there are two possibilities: + + 1. When `data` is a :class:`Series`, :class:`Index`, or + :class:`ExtensionArray`, the `dtype` will be taken + from the data. + 2. Otherwise, pandas will attempt to infer the `dtype` + from the data. + + Note that when `data` is a NumPy array, ``data.dtype`` is + *not* used for inferring the array type. This is because + NumPy cannot represent all the types of data that can be + held in extension arrays. + + Currently, pandas will infer an extension dtype for sequences of + + ============================== ======================================= + Scalar Type Array Type + ============================== ======================================= + :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` + :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` + :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` + :class:`int` :class:`pandas.arrays.IntegerArray` + :class:`float` :class:`pandas.arrays.FloatingArray` + :class:`str` :class:`pandas.arrays.StringArray` or + :class:`pandas.arrays.ArrowStringArray` + :class:`bool` :class:`pandas.arrays.BooleanArray` + ============================== ======================================= + + The ExtensionArray created when the scalar type is :class:`str` is determined by + ``pd.options.mode.string_storage`` if the dtype is not explicitly given. + + For all other cases, NumPy's usual inference rules will be used. + + .. versionchanged:: 1.0.0 + + Pandas infers nullable-integer dtype for integer data, + string dtype for string data, and nullable-boolean dtype + for boolean data. + + .. versionchanged:: 1.2.0 + + Pandas now also infers nullable-floating dtype for float-like + input data + + copy : bool, default True + Whether to copy the data, even if not necessary. Depending + on the type of `data`, creating the new array may require + copying data, even if ``copy=False``. + + Returns + ------- + ExtensionArray + The newly created array. + + Raises + ------ + ValueError + When `data` is not 1-dimensional. + + See Also + -------- + numpy.array : Construct a NumPy array. + Series : Construct a pandas Series. + Index : Construct a pandas Index. + arrays.PandasArray : ExtensionArray wrapping a NumPy array. + Series.array : Extract the array stored within a Series. + + Notes + ----- + Omitting the `dtype` argument means pandas will attempt to infer the + best array type from the values in the data. As new array types are + added by pandas and 3rd party libraries, the "best" array type may + change. We recommend specifying `dtype` to ensure that + + 1. the correct array type for the data is returned + 2. the returned array type doesn't change as new extension types + are added by pandas and third-party libraries + + Additionally, if the underlying memory representation of the returned + array matters, we recommend specifying the `dtype` as a concrete object + rather than a string alias or allowing it to be inferred. For example, + a future version of pandas or a 3rd-party library may include a + dedicated ExtensionArray for string data. In this event, the following + would no longer return a :class:`arrays.PandasArray` backed by a NumPy + array. + + >>> pd.array(['a', 'b'], dtype=str) + + ['a', 'b'] + Length: 2, dtype: str32 + + This would instead return the new ExtensionArray dedicated for string + data. If you really need the new array to be backed by a NumPy array, + specify that in the dtype. + + >>> pd.array(['a', 'b'], dtype=np.dtype(" + ['a', 'b'] + Length: 2, dtype: str32 + + Finally, Pandas has arrays that mostly overlap with NumPy + + * :class:`arrays.DatetimeArray` + * :class:`arrays.TimedeltaArray` + + When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is + passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` + rather than a ``PandasArray``. This is for symmetry with the case of + timezone-aware data, which NumPy does not natively support. + + >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') + + ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] + Length: 2, dtype: datetime64[ns] + + >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') + + ['0 days 01:00:00', '0 days 02:00:00'] + Length: 2, dtype: timedelta64[ns] + + Examples + -------- + If a dtype is not specified, pandas will infer the best dtype from the values. + See the description of `dtype` for the types pandas infers for. + + >>> pd.array([1, 2]) + + [1, 2] + Length: 2, dtype: Int64 + + >>> pd.array([1, 2, np.nan]) + + [1, 2, ] + Length: 3, dtype: Int64 + + >>> pd.array([1.1, 2.2]) + + [1.1, 2.2] + Length: 2, dtype: Float64 + + >>> pd.array(["a", None, "c"]) + + ['a', , 'c'] + Length: 3, dtype: string + + >>> with pd.option_context("string_storage", "pyarrow"): + ... arr = pd.array(["a", None, "c"]) + ... + >>> arr + + ['a', , 'c'] + Length: 3, dtype: string + + >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) + + ['2000-01-01', '2000-01-01'] + Length: 2, dtype: period[D] + + You can use the string alias for `dtype` + + >>> pd.array(['a', 'b', 'a'], dtype='category') + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] + + Or specify the actual dtype + + >>> pd.array(['a', 'b', 'a'], + ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) + ['a', 'b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] + + If pandas does not infer a dedicated extension type a + :class:`arrays.PandasArray` is returned. + + >>> pd.array([1 + 1j, 3 + 2j]) + + [(1+1j), (3+2j)] + Length: 2, dtype: complex128 + + As mentioned in the "Notes" section, new extension types may be added + in the future (by pandas or 3rd party libraries), causing the return + value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` + as a NumPy dtype if you need to ensure there's no future change in + behavior. + + >>> pd.array([1, 2], dtype=np.dtype("int32")) + + [1, 2] + Length: 2, dtype: int32 + + `data` must be 1-dimensional. A ValueError is raised when the input + has the wrong dimensionality. + + >>> pd.array(1) + Traceback (most recent call last): + ... + ValueError: Cannot pass scalar '1' to 'pandas.array'. + """ + from pandas.core.arrays import ( + BooleanArray, + DatetimeArray, + ExtensionArray, + FloatingArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + TimedeltaArray, + ) + from pandas.core.arrays.string_ import StringDtype + + if lib.is_scalar(data): + msg = f"Cannot pass scalar '{data}' to 'pandas.array'." + raise ValueError(msg) + + if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)): + # Note: we exclude np.ndarray here, will do type inference on it + dtype = data.dtype + + data = extract_array(data, extract_numpy=True) + + # this returns None for not-found dtypes. + if isinstance(dtype, str): + dtype = registry.find(dtype) or dtype + + if is_extension_array_dtype(dtype): + cls = cast(ExtensionDtype, dtype).construct_array_type() + return cls._from_sequence(data, dtype=dtype, copy=copy) + + if dtype is None: + inferred_dtype = lib.infer_dtype(data, skipna=True) + if inferred_dtype == "period": + period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data) + return PeriodArray._from_sequence(period_data, copy=copy) + + elif inferred_dtype == "interval": + return IntervalArray(data, copy=copy) + + elif inferred_dtype.startswith("datetime"): + # datetime, datetime64 + try: + return DatetimeArray._from_sequence(data, copy=copy) + except ValueError: + # Mixture of timezones, fall back to PandasArray + pass + + elif inferred_dtype.startswith("timedelta"): + # timedelta, timedelta64 + return TimedeltaArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "string": + # StringArray/ArrowStringArray depending on pd.options.mode.string_storage + return StringDtype().construct_array_type()._from_sequence(data, copy=copy) + + elif inferred_dtype == "integer": + return IntegerArray._from_sequence(data, copy=copy) + + elif ( + inferred_dtype in ("floating", "mixed-integer-float") + and getattr(data, "dtype", None) != np.float16 + ): + # GH#44715 Exclude np.float16 bc FloatingArray does not support it; + # we will fall back to PandasArray. + return FloatingArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "boolean": + return BooleanArray._from_sequence(data, copy=copy) + + # Pandas overrides NumPy for + # 1. datetime64[ns] + # 2. timedelta64[ns] + # so that a DatetimeArray is returned. + if is_datetime64_ns_dtype(dtype): + return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) + elif is_timedelta64_ns_dtype(dtype): + return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) + + return PandasArray._from_sequence(data, dtype=dtype, copy=copy) + + +@overload +def extract_array( + obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... +) -> ArrayLike: + ... + + +@overload +def extract_array( + obj: T, extract_numpy: bool = ..., extract_range: bool = ... +) -> T | ArrayLike: + ... + + +def extract_array( + obj: T, extract_numpy: bool = False, extract_range: bool = False +) -> T | ArrayLike: + """ + Extract the ndarray or ExtensionArray from a Series or Index. + + For all other types, `obj` is just returned as is. + + Parameters + ---------- + obj : object + For Series / Index, the underlying ExtensionArray is unboxed. + + extract_numpy : bool, default False + Whether to extract the ndarray from a PandasArray. + + extract_range : bool, default False + If we have a RangeIndex, return range._values if True + (which is a materialized integer ndarray), otherwise return unchanged. + + Returns + ------- + arr : object + + Examples + -------- + >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Other objects like lists, arrays, and DataFrames are just passed through. + + >>> extract_array([1, 2, 3]) + [1, 2, 3] + + For an ndarray-backed Series / Index the ndarray is returned. + + >>> extract_array(pd.Series([1, 2, 3])) + array([1, 2, 3]) + + To extract all the way down to the ndarray, pass ``extract_numpy=True``. + + >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) + array([1, 2, 3]) + """ + if isinstance(obj, (ABCIndex, ABCSeries)): + if isinstance(obj, ABCRangeIndex): + if extract_range: + return obj._values + # https://github.com/python/mypy/issues/1081 + # error: Incompatible return value type (got "RangeIndex", expected + # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]") + return obj # type: ignore[return-value] + + return obj._values + + elif extract_numpy and isinstance(obj, ABCPandasArray): + return obj.to_numpy() + + return obj + + +def ensure_wrapped_if_datetimelike(arr): + """ + Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. + """ + if isinstance(arr, np.ndarray): + if arr.dtype.kind == "M": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray._from_sequence(arr) + + elif arr.dtype.kind == "m": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._from_sequence(arr) + + return arr + + +def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: + """ + Convert numpy MaskedArray to ensure mask is softened. + """ + mask = ma.getmaskarray(data) + if mask.any(): + data, fill_value = maybe_upcast(data, copy=True) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value + else: + data = data.copy() + return data + + +def sanitize_array( + data, + index: Index | None, + dtype: DtypeObj | None = None, + copy: bool = False, + raise_cast_failure: bool = True, + *, + allow_2d: bool = False, +) -> ArrayLike: + """ + Sanitize input data to an ndarray or ExtensionArray, copy if specified, + coerce to the dtype if specified. + + Parameters + ---------- + data : Any + index : Index or None, default None + dtype : np.dtype, ExtensionDtype, or None, default None + copy : bool, default False + raise_cast_failure : bool, default True + allow_2d : bool, default False + If False, raise if we have a 2D Arraylike. + + Returns + ------- + np.ndarray or ExtensionArray + + Notes + ----- + raise_cast_failure=False is only intended to be True when called from the + DataFrame constructor, as the dtype keyword there may be interpreted as only + applying to a subset of columns, see GH#24435. + """ + if isinstance(data, ma.MaskedArray): + data = sanitize_masked_array(data) + + if isinstance(dtype, PandasDtype): + # Avoid ending up with a PandasArray + dtype = dtype.numpy_dtype + + # extract ndarray or ExtensionArray, ensure we have no PandasArray + data = extract_array(data, extract_numpy=True, extract_range=True) + + if isinstance(data, np.ndarray) and data.ndim == 0: + if dtype is None: + dtype = data.dtype + data = lib.item_from_zerodim(data) + elif isinstance(data, range): + # GH#16804 + data = range_to_ndarray(data) + copy = False + + if not is_list_like(data): + if index is None: + raise ValueError("index must be specified when data is not list-like") + data = construct_1d_arraylike_from_scalar(data, len(index), dtype) + return data + + # GH#846 + if isinstance(data, np.ndarray): + if isinstance(data, np.matrix): + data = data.A + + if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): + # possibility of nan -> garbage + try: + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + # casting aligning with IntCastingNaNError below + with np.errstate(invalid="ignore"): + subarr = _try_cast(data, dtype, copy, True) + except IntCastingNaNError: + warnings.warn( + "In a future version, passing float-dtype values containing NaN " + "and an integer dtype will raise IntCastingNaNError " + "(subclass of ValueError) instead of silently ignoring the " + "passed dtype. To retain the old behavior, call Series(arr) or " + "DataFrame(arr) without passing a dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + subarr = np.array(data, copy=copy) + except ValueError: + if not raise_cast_failure: + # i.e. called via DataFrame constructor + warnings.warn( + "In a future version, passing float-dtype values and an " + "integer dtype to DataFrame will retain floating dtype " + "if they cannot be cast losslessly (matching Series behavior). " + "To retain the old behavior, use DataFrame(data).astype(dtype)", + FutureWarning, + stacklevel=find_stack_level(), + ) + # GH#40110 until the deprecation is enforced, we _dont_ + # ignore the dtype for DataFrame, and _do_ cast even though + # it is lossy. + dtype = cast(np.dtype, dtype) + return np.array(data, dtype=dtype, copy=copy) + + # We ignore the dtype arg and return floating values, + # e.g. test_constructor_floating_data_int_dtype + # TODO: where is the discussion that documents the reason for this? + subarr = np.array(data, copy=copy) + else: + # we will try to copy by-definition here + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + + elif isinstance(data, ABCExtensionArray): + # it is already ensured above this is not a PandasArray + subarr = data + + if dtype is not None: + subarr = subarr.astype(dtype, copy=copy) + elif copy: + subarr = subarr.copy() + + else: + if isinstance(data, (set, frozenset)): + # Raise only for unordered sets, e.g., not for dict_keys + raise TypeError(f"'{type(data).__name__}' type is unordered") + + # materialize e.g. generators, convert e.g. tuples, abc.ValueView + if hasattr(data, "__array__"): + # e.g. dask array GH#38645 + data = np.array(data, copy=copy) + else: + data = list(data) + + if dtype is not None or len(data) == 0: + try: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + except ValueError: + if is_integer_dtype(dtype): + casted = np.array(data, copy=False) + if casted.dtype.kind == "f": + # GH#40110 match the behavior we have if we passed + # a ndarray[float] to begin with + return sanitize_array( + casted, + index, + dtype, + copy=False, + raise_cast_failure=raise_cast_failure, + allow_2d=allow_2d, + ) + else: + raise + else: + raise + else: + subarr = maybe_convert_platform(data) + if subarr.dtype == object: + subarr = cast(np.ndarray, subarr) + subarr = maybe_infer_to_datetimelike(subarr) + + subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) + + if isinstance(subarr, np.ndarray): + # at this point we should have dtype be None or subarr.dtype == dtype + dtype = cast(np.dtype, dtype) + subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) + + return subarr + + +def range_to_ndarray(rng: range) -> np.ndarray: + """ + Cast a range object to ndarray. + """ + # GH#30171 perf avoid realizing range as a list in np.array + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") + except OverflowError: + # GH#30173 handling for ranges that overflow int64 + if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0): + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") + except OverflowError: + arr = construct_1d_object_array_from_listlike(list(rng)) + else: + arr = construct_1d_object_array_from_listlike(list(rng)) + return arr + + +def _sanitize_ndim( + result: ArrayLike, + data, + dtype: DtypeObj | None, + index: Index | None, + *, + allow_2d: bool = False, +) -> ArrayLike: + """ + Ensure we have a 1-dimensional result array. + """ + if getattr(result, "ndim", 0) == 0: + raise ValueError("result should be arraylike with ndim > 0") + + elif result.ndim == 1: + # the result that we want + result = _maybe_repeat(result, index) + + elif result.ndim > 1: + if isinstance(data, np.ndarray): + if allow_2d: + return result + raise ValueError("Data must be 1-dimensional") + if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): + # i.e. PandasDtype("O") + + result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) + cls = dtype.construct_array_type() + result = cls._from_sequence(result, dtype=dtype) + else: + # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type + # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str, + # dtype[Any], None]" + result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type] + return result + + +def _sanitize_str_dtypes( + result: np.ndarray, data, dtype: np.dtype | None, copy: bool +) -> np.ndarray: + """ + Ensure we have a dtype that is supported by pandas. + """ + + # This is to prevent mixed-type Series getting all casted to + # NumPy string type, e.g. NaN --> '-1#IND'. + if issubclass(result.dtype.type, str): + # GH#16605 + # If not empty convert the data to dtype + # GH#19853: If data is a scalar, result has already the result + if not lib.is_scalar(data): + if not np.all(isna(data)): + data = np.array(data, dtype=dtype, copy=False) + result = np.array(data, dtype=object, copy=copy) + return result + + +def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: + """ + If we have a length-1 array and an index describing how long we expect + the result to be, repeat the array. + """ + if index is not None: + if 1 == len(arr) != len(index): + arr = arr.repeat(len(index)) + return arr + + +def _try_cast( + arr: list | np.ndarray, + dtype: DtypeObj | None, + copy: bool, + raise_cast_failure: bool, +) -> ArrayLike: + """ + Convert input to numpy ndarray and optionally cast to a given dtype. + + Parameters + ---------- + arr : ndarray or list + Excludes: ExtensionArray, Series, Index. + dtype : np.dtype, ExtensionDtype or None + copy : bool + If False, don't copy the data if not needed. + raise_cast_failure : bool + If True, and if a dtype is specified, raise errors during casting. + Otherwise an object array is returned. + + Returns + ------- + np.ndarray or ExtensionArray + """ + is_ndarray = isinstance(arr, np.ndarray) + + if dtype is None: + # perf shortcut as this is the most common case + if is_ndarray: + arr = cast(np.ndarray, arr) + if arr.dtype != object: + return sanitize_to_nanoseconds(arr, copy=copy) + + out = maybe_infer_to_datetimelike(arr) + if out is arr and copy: + out = out.copy() + return out + + else: + # i.e. list + varr = np.array(arr, copy=False) + # filter out cases that we _dont_ want to go through + # maybe_infer_to_datetimelike + if varr.dtype != object or varr.size == 0: + return varr + return maybe_infer_to_datetimelike(varr) + + elif isinstance(dtype, ExtensionDtype): + # create an extension array from its dtype + if isinstance(dtype, DatetimeTZDtype): + # We can't go through _from_sequence because it handles dt64naive + # data differently; _from_sequence treats naive as wall times, + # while maybe_cast_to_datetime treats it as UTC + # see test_maybe_promote_any_numpy_dtype_with_datetimetz + # TODO(2.0): with deprecations enforced, should be able to remove + # special case. + return maybe_cast_to_datetime(arr, dtype) + # TODO: copy? + + array_type = dtype.construct_array_type()._from_sequence + subarr = array_type(arr, dtype=dtype, copy=copy) + return subarr + + elif is_object_dtype(dtype): + if not is_ndarray: + subarr = construct_1d_object_array_from_listlike(arr) + return subarr + return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) + + elif dtype.kind == "U": + # TODO: test cases with arr.dtype.kind in ["m", "M"] + if is_ndarray: + arr = cast(np.ndarray, arr) + shape = arr.shape + if arr.ndim > 1: + arr = arr.ravel() + else: + shape = (len(arr),) + return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape( + shape + ) + + elif dtype.kind in ["m", "M"]: + return maybe_cast_to_datetime(arr, dtype) + + try: + # GH#15832: Check if we are requesting a numeric dtype and + # that we can convert the data to the requested dtype. + if is_integer_dtype(dtype): + # this will raise if we have e.g. floats + + subarr = maybe_cast_to_integer_array(arr, dtype) + else: + # 4 tests fail if we move this to a try/except/else; see + # test_constructor_compound_dtypes, test_constructor_cast_failure + # test_constructor_dict_cast2, test_loc_setitem_dtype + subarr = np.array(arr, dtype=dtype, copy=copy) + + except (ValueError, TypeError): + if raise_cast_failure: + raise + else: + # we only get here with raise_cast_failure False, which means + # called via the DataFrame constructor + # GH#24435 + warnings.warn( + f"Could not cast to {dtype}, falling back to object. This " + "behavior is deprecated. In a future version, when a dtype is " + "passed to 'DataFrame', either all columns will be cast to that " + "dtype, or a TypeError will be raised.", + FutureWarning, + stacklevel=find_stack_level(), + ) + subarr = np.array(arr, dtype=object, copy=copy) + return subarr + + +def is_empty_data(data: Any) -> bool: + """ + Utility to check if a Series is instantiated with empty data, + which does not contain dtype information. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + Returns + ------- + bool + """ + is_none = data is None + is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") + is_simple_empty = is_list_like_without_dtype and not data + return is_none or is_simple_empty + + +def create_series_with_explicit_dtype( + data: Any = None, + index: ArrayLike | Index | None = None, + dtype: Dtype | None = None, + name: str | None = None, + copy: bool = False, + fastpath: bool = False, + dtype_if_empty: Dtype = object, +) -> Series: + """ + Helper to pass an explicit dtype when instantiating an empty Series. + + This silences a DeprecationWarning described in GitHub-17261. + + Parameters + ---------- + data : Mirrored from Series.__init__ + index : Mirrored from Series.__init__ + dtype : Mirrored from Series.__init__ + name : Mirrored from Series.__init__ + copy : Mirrored from Series.__init__ + fastpath : Mirrored from Series.__init__ + dtype_if_empty : str, numpy.dtype, or ExtensionDtype + This dtype will be passed explicitly if an empty Series will + be instantiated. + + Returns + ------- + Series + """ + from pandas.core.series import Series + + if is_empty_data(data) and dtype is None: + dtype = dtype_if_empty + return Series( + data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath + ) diff --git a/pandas/core/describe.py b/pandas/core/describe.py new file mode 100644 index 00000000..ce2fa950 --- /dev/null +++ b/pandas/core/describe.py @@ -0,0 +1,429 @@ +""" +Module responsible for execution of NDFrame.describe() method. + +Method NDFrame.describe() delegates actual execution to function describe_ndframe(). +""" +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.tslibs import Timestamp +from pandas._typing import ( + DtypeObj, + NDFrameT, + npt, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_percentile + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_complex_dtype, + is_datetime64_any_dtype, + is_extension_array_dtype, + is_numeric_dtype, + is_timedelta64_dtype, +) + +import pandas as pd +from pandas.core.reshape.concat import concat + +from pandas.io.formats.format import format_percentiles + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +def describe_ndframe( + *, + obj: NDFrameT, + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, + datetime_is_numeric: bool, + percentiles: Sequence[float] | np.ndarray | None, +) -> NDFrameT: + """Describe series or dataframe. + + Called from pandas.core.generic.NDFrame.describe() + + Parameters + ---------- + obj: DataFrame or Series + Either dataframe or series to be described. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored for ``Series``. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored for ``Series``. + datetime_is_numeric : bool, default False + Whether to treat datetime dtypes as numeric. + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should fall between 0 and 1. + The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + + Returns + ------- + Dataframe or series description. + """ + percentiles = refine_percentiles(percentiles) + + describer: NDFrameDescriberAbstract + if obj.ndim == 1: + describer = SeriesDescriber( + obj=cast("Series", obj), + datetime_is_numeric=datetime_is_numeric, + ) + else: + describer = DataFrameDescriber( + obj=cast("DataFrame", obj), + include=include, + exclude=exclude, + datetime_is_numeric=datetime_is_numeric, + ) + + result = describer.describe(percentiles=percentiles) + return cast(NDFrameT, result) + + +class NDFrameDescriberAbstract(ABC): + """Abstract class for describing dataframe or series. + + Parameters + ---------- + obj : Series or DataFrame + Object to be described. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + + def __init__(self, obj: DataFrame | Series, datetime_is_numeric: bool) -> None: + self.obj = obj + self.datetime_is_numeric = datetime_is_numeric + + @abstractmethod + def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame | Series: + """Do describe either series or dataframe. + + Parameters + ---------- + percentiles : list-like of numbers + The percentiles to include in the output. + """ + + +class SeriesDescriber(NDFrameDescriberAbstract): + """Class responsible for creating series description.""" + + obj: Series + + def describe(self, percentiles: Sequence[float] | np.ndarray) -> Series: + describe_func = select_describe_func( + self.obj, + self.datetime_is_numeric, + ) + return describe_func(self.obj, percentiles) + + +class DataFrameDescriber(NDFrameDescriberAbstract): + """Class responsible for creating dataobj description. + + Parameters + ---------- + obj : DataFrame + DataFrame to be described. + include : 'all', list-like of dtypes or None + A white list of data types to include in the result. + exclude : list-like of dtypes or None + A black list of data types to omit from the result. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + + def __init__( + self, + obj: DataFrame, + *, + include: str | Sequence[str] | None, + exclude: str | Sequence[str] | None, + datetime_is_numeric: bool, + ) -> None: + self.include = include + self.exclude = exclude + + if obj.ndim == 2 and obj.columns.size == 0: + raise ValueError("Cannot describe a DataFrame without columns") + + super().__init__(obj, datetime_is_numeric=datetime_is_numeric) + + def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: + data = self._select_data() + + ldesc: list[Series] = [] + for _, series in data.items(): + describe_func = select_describe_func(series, self.datetime_is_numeric) + ldesc.append(describe_func(series, percentiles)) + + col_names = reorder_columns(ldesc) + d = concat( + [x.reindex(col_names, copy=False) for x in ldesc], + axis=1, + sort=False, + ) + d.columns = data.columns.copy() + return d + + def _select_data(self): + """Select columns to be described.""" + if (self.include is None) and (self.exclude is None): + # when some numerics are found, keep only numerics + default_include: list[npt.DTypeLike] = [np.number] + if self.datetime_is_numeric: + default_include.append("datetime") + data = self.obj.select_dtypes(include=default_include) + if len(data.columns) == 0: + data = self.obj + elif self.include == "all": + if self.exclude is not None: + msg = "exclude must be None when include is 'all'" + raise ValueError(msg) + data = self.obj + else: + data = self.obj.select_dtypes( + include=self.include, + exclude=self.exclude, + ) + return data + + +def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]: + """Set a convenient order for rows for display.""" + names: list[Hashable] = [] + ldesc_indexes = sorted((x.index for x in ldesc), key=len) + for idxnames in ldesc_indexes: + for name in idxnames: + if name not in names: + names.append(name) + return names + + +def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: + """Describe series containing numerical data. + + Parameters + ---------- + series : Series + Series to be described. + percentiles : list-like of numbers + The percentiles to include in the output. + """ + from pandas import Series + + formatted_percentiles = format_percentiles(percentiles) + + stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] + d = ( + [series.count(), series.mean(), series.std(), series.min()] + + series.quantile(percentiles).tolist() + + [series.max()] + ) + # GH#48340 - always return float on non-complex numeric data + dtype: DtypeObj | None + if is_extension_array_dtype(series): + dtype = pd.Float64Dtype() + elif is_numeric_dtype(series) and not is_complex_dtype(series): + dtype = np.dtype("float") + else: + dtype = None + return Series(d, index=stat_index, name=series.name, dtype=dtype) + + +def describe_categorical_1d( + data: Series, + percentiles_ignored: Sequence[float], +) -> Series: + """Describe series containing categorical data. + + Parameters + ---------- + data : Series + Series to be described. + percentiles_ignored : list-like of numbers + Ignored, but in place to unify interface. + """ + names = ["count", "unique", "top", "freq"] + objcounts = data.value_counts() + count_unique = len(objcounts[objcounts != 0]) + if count_unique > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + dtype = None + else: + # If the DataFrame is empty, set 'top' and 'freq' to None + # to maintain output shape consistency + top, freq = np.nan, np.nan + dtype = "object" + + result = [data.count(), count_unique, top, freq] + + from pandas import Series + + return Series(result, index=names, name=data.name, dtype=dtype) + + +def describe_timestamp_as_categorical_1d( + data: Series, + percentiles_ignored: Sequence[float], +) -> Series: + """Describe series containing timestamp data treated as categorical. + + Parameters + ---------- + data : Series + Series to be described. + percentiles_ignored : list-like of numbers + Ignored, but in place to unify interface. + """ + names = ["count", "unique"] + objcounts = data.value_counts() + count_unique = len(objcounts[objcounts != 0]) + result = [data.count(), count_unique] + dtype = None + if count_unique > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + tz = data.dt.tz + asint = data.dropna().values.view("i8") + top = Timestamp(top) + if top.tzinfo is not None and tz is not None: + # Don't tz_localize(None) if key is already tz-aware + top = top.tz_convert(tz) + else: + top = top.tz_localize(tz) + names += ["top", "freq", "first", "last"] + result += [ + top, + freq, + Timestamp(asint.min(), tz=tz), + Timestamp(asint.max(), tz=tz), + ] + + # If the DataFrame is empty, set 'top' and 'freq' to None + # to maintain output shape consistency + else: + names += ["top", "freq"] + result += [np.nan, np.nan] + dtype = "object" + + from pandas import Series + + return Series(result, index=names, name=data.name, dtype=dtype) + + +def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series: + """Describe series containing datetime64 dtype. + + Parameters + ---------- + data : Series + Series to be described. + percentiles : list-like of numbers + The percentiles to include in the output. + """ + # GH-30164 + from pandas import Series + + formatted_percentiles = format_percentiles(percentiles) + + stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"] + d = ( + [data.count(), data.mean(), data.min()] + + data.quantile(percentiles).tolist() + + [data.max()] + ) + return Series(d, index=stat_index, name=data.name) + + +def select_describe_func( + data: Series, + datetime_is_numeric: bool, +) -> Callable: + """Select proper function for describing series based on data type. + + Parameters + ---------- + data : Series + Series to be described. + datetime_is_numeric : bool + Whether to treat datetime dtypes as numeric. + """ + if is_bool_dtype(data.dtype): + return describe_categorical_1d + elif is_numeric_dtype(data): + return describe_numeric_1d + elif is_datetime64_any_dtype(data.dtype): + if datetime_is_numeric: + return describe_timestamp_1d + else: + warnings.warn( + "Treating datetime data as categorical rather than numeric in " + "`.describe` is deprecated and will be removed in a future " + "version of pandas. Specify `datetime_is_numeric=True` to " + "silence this warning and adopt the future behavior now.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return describe_timestamp_as_categorical_1d + elif is_timedelta64_dtype(data.dtype): + return describe_numeric_1d + else: + return describe_categorical_1d + + +def refine_percentiles( + percentiles: Sequence[float] | np.ndarray | None, +) -> np.ndarray[Any, np.dtype[np.float64]]: + """ + Ensure that percentiles are unique and sorted. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. + """ + if percentiles is None: + return np.array([0.25, 0.5, 0.75]) + + # explicit conversion of `percentiles` to list + percentiles = list(percentiles) + + # get them all to be in [0, 1] + validate_percentile(percentiles) + + # median should always be included + if 0.5 not in percentiles: + percentiles.append(0.5) + + percentiles = np.asarray(percentiles) + + # sort and check for duplicates + unique_pcts = np.unique(percentiles) + assert percentiles is not None + if len(unique_pcts) < len(percentiles): + raise ValueError("percentiles cannot contain duplicates") + + return unique_pcts diff --git a/pandas/core/dtypes/__init__.py b/pandas/core/dtypes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py new file mode 100644 index 00000000..e6a59bf1 --- /dev/null +++ b/pandas/core/dtypes/api.py @@ -0,0 +1,87 @@ +from pandas.core.dtypes.common import ( + is_array_like, + is_bool, + is_bool_dtype, + is_categorical, + is_categorical_dtype, + is_complex, + is_complex_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_extension_type, + is_file_like, + is_float, + is_float_dtype, + is_hashable, + is_int64_dtype, + is_integer, + is_integer_dtype, + is_interval, + is_interval_dtype, + is_iterator, + is_list_like, + is_named_tuple, + is_number, + is_numeric_dtype, + is_object_dtype, + is_period_dtype, + is_re, + is_re_compilable, + is_scalar, + is_signed_integer_dtype, + is_sparse, + is_string_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) + +__all__ = [ + "is_array_like", + "is_bool", + "is_bool_dtype", + "is_categorical", + "is_categorical_dtype", + "is_complex", + "is_complex_dtype", + "is_datetime64_any_dtype", + "is_datetime64_dtype", + "is_datetime64_ns_dtype", + "is_datetime64tz_dtype", + "is_dict_like", + "is_dtype_equal", + "is_extension_array_dtype", + "is_extension_type", + "is_file_like", + "is_float", + "is_float_dtype", + "is_hashable", + "is_int64_dtype", + "is_integer", + "is_integer_dtype", + "is_interval", + "is_interval_dtype", + "is_iterator", + "is_list_like", + "is_named_tuple", + "is_number", + "is_numeric_dtype", + "is_object_dtype", + "is_period_dtype", + "is_re", + "is_re_compilable", + "is_scalar", + "is_signed_integer_dtype", + "is_sparse", + "is_string_dtype", + "is_timedelta64_dtype", + "is_timedelta64_ns_dtype", + "is_unsigned_integer_dtype", + "pandas_dtype", +] diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py new file mode 100644 index 00000000..7fb58468 --- /dev/null +++ b/pandas/core/dtypes/astype.py @@ -0,0 +1,418 @@ +""" +Functions for implementing 'astype' methods according to pandas conventions, +particularly ones that differ from numpy. +""" +from __future__ import annotations + +import inspect +from typing import ( + TYPE_CHECKING, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import is_unitless +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._typing import ( + ArrayLike, + DtypeObj, + IgnoreRaise, +) +from pandas.errors import IntCastingNaNError +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_integer_dtype, + is_object_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.missing import isna + +if TYPE_CHECKING: + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + ) + + +_dtype_obj = np.dtype(object) + + +@overload +def astype_nansafe( + arr: np.ndarray, dtype: np.dtype, copy: bool = ..., skipna: bool = ... +) -> np.ndarray: + ... + + +@overload +def astype_nansafe( + arr: np.ndarray, dtype: ExtensionDtype, copy: bool = ..., skipna: bool = ... +) -> ExtensionArray: + ... + + +def astype_nansafe( + arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False +) -> ArrayLike: + """ + Cast the elements of an array to a given dtype a nan-safe manner. + + Parameters + ---------- + arr : ndarray + dtype : np.dtype or ExtensionDtype + copy : bool, default True + If False, a view will be attempted but may fail, if + e.g. the item sizes don't align. + skipna: bool, default False + Whether or not we should skip NaN when casting as a string-type. + + Raises + ------ + ValueError + The dtype was a datetime64/timedelta64 dtype, but it had no unit. + """ + + # We get here with 0-dim from sparse + arr = np.atleast_1d(arr) + + # dispatch on extension dtype if needed + if isinstance(dtype, ExtensionDtype): + return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) + + elif not isinstance(dtype, np.dtype): # pragma: no cover + raise ValueError("dtype must be np.dtype or ExtensionDtype") + + if arr.dtype.kind in ["m", "M"] and ( + issubclass(dtype.type, str) or dtype == _dtype_obj + ): + from pandas.core.construction import ensure_wrapped_if_datetimelike + + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) + + if issubclass(dtype.type, str): + shape = arr.shape + if arr.ndim > 1: + arr = arr.ravel() + return lib.ensure_string_array( + arr, skipna=skipna, convert_na_value=False + ).reshape(shape) + + elif is_datetime64_dtype(arr.dtype): + if dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") + return arr.view(dtype) + + # allow frequency conversions + if dtype.kind == "M": + return arr.astype(dtype) + + raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") + + elif is_timedelta64_dtype(arr.dtype): + if dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") + return arr.view(dtype) + + elif dtype.kind == "m": + return astype_td64_unit_conversion(arr, dtype, copy=copy) + + raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") + + elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): + return _astype_float_to_int_nansafe(arr, dtype, copy) + + elif is_object_dtype(arr.dtype): + + # if we have a datetime/timedelta array of objects + # then coerce to a proper dtype and recall astype_nansafe + + if is_datetime64_dtype(dtype): + from pandas import to_datetime + + return astype_nansafe( + to_datetime(arr.ravel()).values.reshape(arr.shape), + dtype, + copy=copy, + ) + elif is_timedelta64_dtype(dtype): + # bc we know arr.dtype == object, this is equivalent to + # `np.asarray(to_timedelta(arr))`, but using a lower-level API that + # does not require a circular import. + return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) + + if dtype.name in ("datetime64", "timedelta64"): + msg = ( + f"The '{dtype.name}' dtype has no unit. Please pass in " + f"'{dtype.name}[ns]' instead." + ) + raise ValueError(msg) + + if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): + # Explicit copy, or required since NumPy can't view from / to object. + return arr.astype(dtype, copy=True) + + return arr.astype(dtype, copy=copy) + + +def _astype_float_to_int_nansafe( + values: np.ndarray, dtype: np.dtype, copy: bool +) -> np.ndarray: + """ + astype with a check preventing converting NaN to an meaningless integer value. + """ + if not np.isfinite(values).all(): + raise IntCastingNaNError( + "Cannot convert non-finite values (NA or inf) to integer" + ) + if dtype.kind == "u": + # GH#45151 + if not (values >= 0).all(): + raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}") + return values.astype(dtype, copy=copy) + + +def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: + """ + Cast array (ndarray or ExtensionArray) to the new dtype. + + Parameters + ---------- + values : ndarray or ExtensionArray + dtype : dtype object + copy : bool, default False + copy if indicated + + Returns + ------- + ndarray or ExtensionArray + """ + if ( + values.dtype.kind in ["m", "M"] + and dtype.kind in ["i", "u"] + and isinstance(dtype, np.dtype) + and dtype.itemsize != 8 + ): + # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced + msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" + raise TypeError(msg) + + if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): + return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) + + if is_dtype_equal(values.dtype, dtype): + if copy: + return values.copy() + return values + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + values = values.astype(dtype, copy=copy) + + else: + values = astype_nansafe(values, dtype, copy=copy) + + # in pandas we don't store numpy str dtypes, so convert to object + if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + return values + + +def astype_array_safe( + values: ArrayLike, dtype, copy: bool = False, errors: IgnoreRaise = "raise" +) -> ArrayLike: + """ + Cast array (ndarray or ExtensionArray) to the new dtype. + + This basically is the implementation for DataFrame/Series.astype and + includes all custom logic for pandas (NaN-safety, converting str to object, + not allowing ) + + Parameters + ---------- + values : ndarray or ExtensionArray + dtype : str, dtype convertible + copy : bool, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + ndarray or ExtensionArray + """ + errors_legal_values = ("raise", "ignore") + + if errors not in errors_legal_values: + invalid_arg = ( + "Expected value of kwarg 'errors' to be one of " + f"{list(errors_legal_values)}. Supplied value is '{errors}'" + ) + raise ValueError(invalid_arg) + + if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): + msg = ( + f"Expected an instance of {dtype.__name__}, " + "but got the class instead. Try instantiating 'dtype'." + ) + raise TypeError(msg) + + dtype = pandas_dtype(dtype) + if isinstance(dtype, PandasDtype): + # Ensure we don't end up with a PandasArray + dtype = dtype.numpy_dtype + + if ( + is_datetime64_dtype(values.dtype) + # need to do np.dtype check instead of is_datetime64_dtype + # otherwise pyright complains + and isinstance(dtype, np.dtype) + and dtype.kind == "M" + and not is_unitless(dtype) + and not is_dtype_equal(dtype, values.dtype) + ): + # unit conversion, we would re-cast to nanosecond, so this is + # effectively just a copy (regardless of copy kwd) + # TODO(2.0): remove special-case + return values.copy() + + try: + new_values = astype_array(values, dtype, copy=copy) + except (ValueError, TypeError): + # e.g. astype_nansafe can fail on object-dtype of strings + # trying to convert to float + if errors == "ignore": + new_values = values + else: + raise + + return new_values + + +def astype_td64_unit_conversion( + values: np.ndarray, dtype: np.dtype, copy: bool +) -> np.ndarray: + """ + By pandas convention, converting to non-nano timedelta64 + returns an int64-dtyped array with ints representing multiples + of the desired timedelta unit. This is essentially division. + + Parameters + ---------- + values : np.ndarray[timedelta64[ns]] + dtype : np.dtype + timedelta64 with unit not-necessarily nano + copy : bool + + Returns + ------- + np.ndarray + """ + if is_dtype_equal(values.dtype, dtype): + if copy: + return values.copy() + return values + + # otherwise we are converting to non-nano + result = values.astype(dtype, copy=False) # avoid double-copying + result = result.astype(np.float64) + + mask = isna(values) + np.putmask(result, mask, np.nan) + return result + + +def astype_dt64_to_dt64tz( + values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False +) -> DatetimeArray: + # GH#33401 we have inconsistent behaviors between + # Datetimeindex[naive].astype(tzaware) + # Series[dt64].astype(tzaware) + # This collects them in one place to prevent further fragmentation. + + from pandas.core.construction import ensure_wrapped_if_datetimelike + + values = ensure_wrapped_if_datetimelike(values) + values = cast("DatetimeArray", values) + aware = isinstance(dtype, DatetimeTZDtype) + + if via_utc: + # Series.astype behavior + + # caller is responsible for checking this + assert values.tz is None and aware + dtype = cast(DatetimeTZDtype, dtype) + + if copy: + # this should be the only copy + values = values.copy() + + warnings.warn( + "Using .astype to convert from timezone-naive dtype to " + "timezone-aware dtype is deprecated and will raise in a " + "future version. Use ser.dt.tz_localize instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # GH#33401 this doesn't match DatetimeArray.astype, which + # goes through the `not via_utc` path + return values.tz_localize("UTC").tz_convert(dtype.tz) + + else: + # DatetimeArray/DatetimeIndex.astype behavior + if values.tz is None and aware: + dtype = cast(DatetimeTZDtype, dtype) + warnings.warn( + "Using .astype to convert from timezone-naive dtype to " + "timezone-aware dtype is deprecated and will raise in a " + "future version. Use obj.tz_localize instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return values.tz_localize(dtype.tz) + + elif aware: + # GH#18951: datetime64_tz dtype but not equal means different tz + dtype = cast(DatetimeTZDtype, dtype) + result = values.tz_convert(dtype.tz) + if copy: + result = result.copy() + return result + + elif values.tz is not None: + warnings.warn( + "Using .astype to convert from timezone-aware dtype to " + "timezone-naive dtype is deprecated and will raise in a " + "future version. Use obj.tz_localize(None) or " + "obj.tz_convert('UTC').tz_localize(None) instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + + result = values.tz_convert("UTC").tz_localize(None) + if copy: + result = result.copy() + return result + + raise NotImplementedError("dtype_equal case should be handled elsewhere") diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py new file mode 100644 index 00000000..5ec2aaab --- /dev/null +++ b/pandas/core/dtypes/base.py @@ -0,0 +1,528 @@ +""" +Extend pandas with custom array types. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + TypeVar, + cast, + overload, +) + +import numpy as np + +from pandas._libs import missing as libmissing +from pandas._libs.hashtable import object_hash +from pandas._typing import ( + DtypeObj, + Shape, + npt, + type_t, +) +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + + # To parameterize on same ExtensionDtype + ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype") + + +class ExtensionDtype: + """ + A custom data type, to be paired with an ExtensionArray. + + See Also + -------- + extensions.register_extension_dtype: Register an ExtensionType + with pandas as class decorator. + extensions.ExtensionArray: Abstract base class for custom 1-D array types. + + Notes + ----- + The interface includes the following abstract methods that must + be implemented by subclasses: + + * type + * name + * construct_array_type + + The following attributes and methods influence the behavior of the dtype in + pandas operations + + * _is_numeric + * _is_boolean + * _get_common_dtype + + The `na_value` class attribute can be used to set the default NA value + for this type. :attr:`numpy.nan` is used by default. + + ExtensionDtypes are required to be hashable. The base class provides + a default implementation, which relies on the ``_metadata`` class + attribute. ``_metadata`` should be a tuple containing the strings + that define your data type. For example, with ``PeriodDtype`` that's + the ``freq`` attribute. + + **If you have a parametrized dtype you should set the ``_metadata`` + class property**. + + Ideally, the attributes in ``_metadata`` will match the + parameters to your ``ExtensionDtype.__init__`` (if any). If any of + the attributes in ``_metadata`` don't implement the standard + ``__eq__`` or ``__hash__``, the default implementations here will not + work. + + For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method + can be implemented: this method receives a pyarrow Array or ChunkedArray + as only argument and is expected to return the appropriate pandas + ExtensionArray for this dtype and the passed values:: + + class ExtensionDtype: + + def __from_arrow__( + self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + ) -> ExtensionArray: + ... + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + """ + + _metadata: tuple[str, ...] = () + + def __str__(self) -> str: + return self.name + + def __eq__(self, other: Any) -> bool: + """ + Check whether 'other' is equal to self. + + By default, 'other' is considered equal if either + + * it's a string matching 'self.name'. + * it's an instance of this type and all of the attributes + in ``self._metadata`` are equal between `self` and `other`. + + Parameters + ---------- + other : Any + + Returns + ------- + bool + """ + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + if isinstance(other, type(self)): + return all( + getattr(self, attr) == getattr(other, attr) for attr in self._metadata + ) + return False + + def __hash__(self) -> int: + # for python>=3.10, different nan objects have different hashes + # we need to avoid that und thus use hash function with old behavior + return object_hash(tuple(getattr(self, attr) for attr in self._metadata)) + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + @property + def na_value(self) -> object: + """ + Default NA value to use for this type. + + This is used in e.g. ExtensionArray.take. This should be the + user-facing "boxed" version of the NA value, not the physical NA value + for storage. e.g. for JSONArray, this is an empty dictionary. + """ + return np.nan + + @property + def type(self) -> type_t[Any]: + """ + The scalar type for the array, e.g. ``int`` + + It's expected ``ExtensionArray[item]`` returns an instance + of ``ExtensionDtype.type`` for scalar ``item``, assuming + that value is valid (not NA). NA values do not need to be + instances of `type`. + """ + raise AbstractMethodError(self) + + @property + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV'), default 'O' + + This should match the NumPy dtype used when the array is + converted to an ndarray, which is probably 'O' for object if + the extension type cannot be represented as a built-in NumPy + type. + + See Also + -------- + numpy.dtype.kind + """ + return "O" + + @property + def name(self) -> str: + """ + A string identifying the data type. + + Will be used for display in, e.g. ``Series.dtype`` + """ + raise AbstractMethodError(self) + + @property + def names(self) -> list[str] | None: + """ + Ordered list of field names, or None if there are no fields. + + This is for compatibility with NumPy arrays, and may be removed in the + future. + """ + return None + + @classmethod + def construct_array_type(cls) -> type_t[ExtensionArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + raise AbstractMethodError(cls) + + def empty(self, shape: Shape) -> type_t[ExtensionArray]: + """ + Construct an ExtensionArray of this dtype with the given shape. + + Analogous to numpy.empty. + + Parameters + ---------- + shape : int or tuple[int] + + Returns + ------- + ExtensionArray + """ + cls = self.construct_array_type() + return cls._empty(shape, dtype=self) + + @classmethod + def construct_from_string( + cls: type_t[ExtensionDtypeT], string: str + ) -> ExtensionDtypeT: + r""" + Construct this type from a string. + + This is useful mainly for data types that accept parameters. + For example, a period dtype accepts a frequency parameter that + can be set as ``period[H]`` (where H means hourly frequency). + + By default, in the abstract class, just the name of the type is + expected. But subclasses can overwrite this method to accept + parameters. + + Parameters + ---------- + string : str + The name of the type, for example ``category``. + + Returns + ------- + ExtensionDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a class cannot be constructed from this 'string'. + + Examples + -------- + For extension dtypes with arguments the following may be an + adequate implementation. + + >>> @classmethod + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) + ... else: + ... raise TypeError( + ... f"Cannot construct a '{cls.__name__}' from '{string}'" + ... ) + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + if string != cls.name: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + return cls() + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Check if we match 'dtype'. + + Parameters + ---------- + dtype : object + The object to check. + + Returns + ------- + bool + + Notes + ----- + The default implementation is True if + + 1. ``cls.construct_from_string(dtype)`` is an instance + of ``cls``. + 2. ``dtype`` is an object and is an instance of ``cls`` + 3. ``dtype`` has a ``dtype`` attribute, and any of the above + conditions is true for ``dtype.dtype``. + """ + dtype = getattr(dtype, "dtype", dtype) + + if isinstance(dtype, (ABCSeries, ABCIndex, ABCDataFrame, np.dtype)): + # https://github.com/pandas-dev/pandas/issues/22960 + # avoid passing data to `construct_from_string`. This could + # cause a FutureWarning from numpy about failing elementwise + # comparison from, e.g., comparing DataFrame == 'category'. + return False + elif dtype is None: + return False + elif isinstance(dtype, cls): + return True + if isinstance(dtype, str): + try: + return cls.construct_from_string(dtype) is not None + except TypeError: + return False + return False + + @property + def _is_numeric(self) -> bool: + """ + Whether columns with this dtype should be considered numeric. + + By default ExtensionDtypes are assumed to be non-numeric. + They'll be excluded from operations that exclude non-numeric + columns, like (groupby) reductions, plotting, etc. + """ + return False + + @property + def _is_boolean(self) -> bool: + """ + Whether this dtype should be considered boolean. + + By default, ExtensionDtypes are assumed to be non-numeric. + Setting this to True will affect the behavior of several places, + e.g. + + * is_bool + * boolean indexing + + Returns + ------- + bool + """ + return False + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + """ + Return the common dtype, if one exists. + + Used in `find_common_type` implementation. This is for example used + to determine the resulting dtype in a concat operation. + + If no common dtype exists, return None (which gives the other dtypes + the chance to determine a common dtype). If all dtypes in the list + return None, then the common dtype will be "object" dtype (this means + it is never needed to return "object" dtype from this method itself). + + Parameters + ---------- + dtypes : list of dtypes + The dtypes for which to determine a common dtype. This is a list + of np.dtype or ExtensionDtype instances. + + Returns + ------- + Common dtype (np.dtype or ExtensionDtype) or None + """ + if len(set(dtypes)) == 1: + # only itself + return self + else: + return None + + @property + def _can_hold_na(self) -> bool: + """ + Can arrays of this dtype hold NA values? + """ + return True + + +class StorageExtensionDtype(ExtensionDtype): + """ExtensionDtype that may be backed by more than one implementation.""" + + name: str + _metadata = ("storage",) + + def __init__(self, storage=None) -> None: + self.storage = storage + + def __repr__(self) -> str: + return f"{self.name}[{self.storage}]" + + def __str__(self): + return self.name + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str) and other == self.name: + return True + return super().__eq__(other) + + def __hash__(self) -> int: + # custom __eq__ so have to override __hash__ + return super().__hash__() + + @property + def na_value(self) -> libmissing.NAType: + return libmissing.NA + + +def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: + """ + Register an ExtensionType with pandas as class decorator. + + This enables operations like ``.astype(name)`` for the name + of the ExtensionDtype. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + >>> from pandas.api.extensions import register_extension_dtype, ExtensionDtype + >>> @register_extension_dtype + ... class MyExtensionDtype(ExtensionDtype): + ... name = "myextension" + """ + _registry.register(cls) + return cls + + +class Registry: + """ + Registry for dtype inference. + + The registry allows one to map a string repr of a extension + dtype to an extension dtype. The string alias can be used in several + places, including + + * Series and Index constructors + * :meth:`pandas.array` + * :meth:`pandas.Series.astype` + + Multiple extension types can be registered. + These are tried in order. + """ + + def __init__(self) -> None: + self.dtypes: list[type_t[ExtensionDtype]] = [] + + def register(self, dtype: type_t[ExtensionDtype]) -> None: + """ + Parameters + ---------- + dtype : ExtensionDtype class + """ + if not issubclass(dtype, ExtensionDtype): + raise ValueError("can only register pandas extension dtypes") + + self.dtypes.append(dtype) + + @overload + def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: + ... + + @overload + def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT: + ... + + @overload + def find(self, dtype: str) -> ExtensionDtype | None: + ... + + @overload + def find( + self, dtype: npt.DTypeLike + ) -> type_t[ExtensionDtype] | ExtensionDtype | None: + ... + + def find( + self, dtype: type_t[ExtensionDtype] | ExtensionDtype | npt.DTypeLike + ) -> type_t[ExtensionDtype] | ExtensionDtype | None: + """ + Parameters + ---------- + dtype : ExtensionDtype class or instance or str or numpy dtype or python type + + Returns + ------- + return the first matching dtype, otherwise return None + """ + if not isinstance(dtype, str): + dtype_type: type_t + if not isinstance(dtype, type): + dtype_type = type(dtype) + else: + dtype_type = dtype + if issubclass(dtype_type, ExtensionDtype): + # cast needed here as mypy doesn't know we have figured + # out it is an ExtensionDtype or type_t[ExtensionDtype] + return cast("ExtensionDtype | type_t[ExtensionDtype]", dtype) + + return None + + for dtype_type in self.dtypes: + try: + return dtype_type.construct_from_string(dtype) + except TypeError: + pass + + return None + + +_registry = Registry() diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py new file mode 100644 index 00000000..f7df0b8a --- /dev/null +++ b/pandas/core/dtypes/cast.py @@ -0,0 +1,2105 @@ +""" +Routines for casting. +""" + +from __future__ import annotations + +from datetime import ( + date, + datetime, + timedelta, +) +import functools +from typing import ( + TYPE_CHECKING, + Any, + Sized, + TypeVar, + cast, + overload, +) +import warnings + +from dateutil.parser import ParserError +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + OutOfBoundsDatetime, + OutOfBoundsTimedelta, + Timedelta, + Timestamp, + astype_overflowsafe, +) +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + Scalar, +) +from pandas.errors import IntCastingNaNError +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + TD64NS_DTYPE, + ensure_int8, + ensure_int16, + ensure_int32, + ensure_int64, + ensure_object, + ensure_str, + is_bool, + is_bool_dtype, + is_complex, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import is_list_like +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, + na_value_for_dtype, + notna, +) + +if TYPE_CHECKING: + + from pandas import Index + from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, + ) + + +_int8_max = np.iinfo(np.int8).max +_int16_max = np.iinfo(np.int16).max +_int32_max = np.iinfo(np.int32).max +_int64_max = np.iinfo(np.int64).max + +_dtype_obj = np.dtype(object) + +NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) + + +def maybe_convert_platform( + values: list | tuple | range | np.ndarray | ExtensionArray, +) -> ArrayLike: + """try to do platform conversion, allow ndarray or list here""" + arr: ArrayLike + + if isinstance(values, (list, tuple, range)): + arr = construct_1d_object_array_from_listlike(values) + else: + # The caller is responsible for ensuring that we have np.ndarray + # or ExtensionArray here. + arr = values + + if arr.dtype == _dtype_obj: + arr = cast(np.ndarray, arr) + arr = lib.maybe_convert_objects(arr) + + return arr + + +def is_nested_object(obj) -> bool: + """ + return a boolean if we have a nested object, e.g. a Series with 1 or + more Series elements + + This may not be necessarily be performant. + + """ + return bool( + isinstance(obj, ABCSeries) + and is_object_dtype(obj.dtype) + and any(isinstance(v, ABCSeries) for v in obj._values) + ) + + +def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: + """ + Cast scalar to Timestamp or Timedelta if scalar is datetime-like + and dtype is not object. + + Parameters + ---------- + value : scalar + dtype : Dtype, optional + + Returns + ------- + scalar + """ + if dtype == _dtype_obj: + pass + elif isinstance(value, (np.datetime64, datetime)): + value = Timestamp(value) + elif isinstance(value, (np.timedelta64, timedelta)): + value = Timedelta(value) + + return value + + +def maybe_box_native(value: Scalar) -> Scalar: + """ + If passed a scalar cast the scalar to a python native type. + + Parameters + ---------- + value : scalar or Series + + Returns + ------- + scalar or Series + """ + if is_float(value): + # error: Argument 1 to "float" has incompatible type + # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; + # expected "Union[SupportsFloat, _SupportsIndex, str]" + value = float(value) # type: ignore[arg-type] + elif is_integer(value): + # error: Argument 1 to "int" has incompatible type + # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; + # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]" + value = int(value) # type: ignore[arg-type] + elif is_bool(value): + value = bool(value) + elif isinstance(value, (np.datetime64, np.timedelta64)): + value = maybe_box_datetimelike(value) + return value + + +def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: + """ + Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting + into a numpy array. Failing to unbox would risk dropping nanoseconds. + + Notes + ----- + Caller is responsible for checking dtype.kind in ["m", "M"] + """ + if is_valid_na_for_dtype(value, dtype): + # GH#36541: can't fill array directly with pd.NaT + # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) + # ValueError: cannot convert float NaN to integer + value = dtype.type("NaT", "ns") + elif isinstance(value, Timestamp): + if value.tz is None: + value = value.to_datetime64() + elif not isinstance(dtype, DatetimeTZDtype): + raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") + elif isinstance(value, Timedelta): + value = value.to_timedelta64() + + _disallow_mismatched_datetimelike(value, dtype) + return value + + +def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): + """ + numpy allows np.array(dt64values, dtype="timedelta64[ns]") and + vice-versa, but we do not want to allow this, so we need to + check explicitly + """ + vdtype = getattr(value, "dtype", None) + if vdtype is None: + return + elif (vdtype.kind == "m" and dtype.kind == "M") or ( + vdtype.kind == "M" and dtype.kind == "m" + ): + raise TypeError(f"Cannot cast {repr(value)} to {dtype}") + + +@overload +def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray: + ... + + +@overload +def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike: + ... + + +def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: + """ + try to cast to the specified dtype (e.g. convert back to bool/int + or could be an astype of float64->float32 + """ + do_round = False + + if isinstance(dtype, str): + if dtype == "infer": + inferred_type = lib.infer_dtype(result, skipna=False) + if inferred_type == "boolean": + dtype = "bool" + elif inferred_type == "integer": + dtype = "int64" + elif inferred_type == "datetime64": + dtype = "datetime64[ns]" + elif inferred_type in ["timedelta", "timedelta64"]: + dtype = "timedelta64[ns]" + + # try to upcast here + elif inferred_type == "floating": + dtype = "int64" + if issubclass(result.dtype.type, np.number): + do_round = True + + else: + # TODO: complex? what if result is already non-object? + dtype = "object" + + dtype = np.dtype(dtype) + + if not isinstance(dtype, np.dtype): + # enforce our signature annotation + raise TypeError(dtype) # pragma: no cover + + converted = maybe_downcast_numeric(result, dtype, do_round) + if converted is not result: + return converted + + # a datetimelike + # GH12821, iNaT is cast to float + if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: + result = result.astype(dtype) + + elif dtype.kind == "m" and result.dtype == _dtype_obj: + # test_where_downcast_to_td64 + result = cast(np.ndarray, result) + result = array_to_timedelta64(result) + + elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj: + return np.asarray(maybe_cast_to_datetime(result, dtype=dtype)) + + return result + + +@overload +def maybe_downcast_numeric( + result: np.ndarray, dtype: np.dtype, do_round: bool = False +) -> np.ndarray: + ... + + +@overload +def maybe_downcast_numeric( + result: ExtensionArray, dtype: DtypeObj, do_round: bool = False +) -> ArrayLike: + ... + + +def maybe_downcast_numeric( + result: ArrayLike, dtype: DtypeObj, do_round: bool = False +) -> ArrayLike: + """ + Subset of maybe_downcast_to_dtype restricted to numeric dtypes. + + Parameters + ---------- + result : ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + do_round : bool + + Returns + ------- + ndarray or ExtensionArray + """ + if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): + # e.g. SparseDtype has no itemsize attr + return result + + def trans(x): + if do_round: + return x.round() + return x + + if dtype.kind == result.dtype.kind: + # don't allow upcasts here (except if empty) + if result.dtype.itemsize <= dtype.itemsize and result.size: + return result + + if is_bool_dtype(dtype) or is_integer_dtype(dtype): + + if not result.size: + # if we don't have any elements, just astype it + return trans(result).astype(dtype) + + # do a test on the first element, if it fails then we are done + r = result.ravel() + arr = np.array([r[0]]) + + if isna(arr).any(): + # if we have any nulls, then we are done + return result + + elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): + # a comparable, e.g. a Decimal may slip in here + return result + + if ( + issubclass(result.dtype.type, (np.object_, np.number)) + and notna(result).all() + ): + new_result = trans(result).astype(dtype) + if new_result.dtype.kind == "O" or result.dtype.kind == "O": + # np.allclose may raise TypeError on object-dtype + if (new_result == result).all(): + return new_result + else: + if np.allclose(new_result, result, rtol=0): + return new_result + + elif ( + issubclass(dtype.type, np.floating) + and not is_bool_dtype(result.dtype) + and not is_string_dtype(result.dtype) + ): + new_result = result.astype(dtype) + + # Adjust tolerances based on floating point size + size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16} + + atol = size_tols.get(new_result.dtype.itemsize, 0.0) + + # Check downcast float values are still equal within 7 digits when + # converting from float64 to float32 + if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): + return new_result + + elif dtype.kind == result.dtype.kind == "c": + new_result = result.astype(dtype) + + if array_equivalent(new_result, result): + # TODO: use tolerance like we do for float? + return new_result + + return result + + +def maybe_cast_pointwise_result( + result: ArrayLike, + dtype: DtypeObj, + numeric_only: bool = False, + same_dtype: bool = True, +) -> ArrayLike: + """ + Try casting result of a pointwise operation back to the original dtype if + appropriate. + + Parameters + ---------- + result : array-like + Result to cast. + dtype : np.dtype or ExtensionDtype + Input Series from which result was calculated. + numeric_only : bool, default False + Whether to cast only numerics or datetimes as well. + same_dtype : bool, default True + Specify dtype when calling _from_sequence + + Returns + ------- + result : array-like + result maybe casted to the dtype. + """ + + assert not is_scalar(result) + + if isinstance(dtype, ExtensionDtype): + if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): + # TODO: avoid this special-casing + # We have to special case categorical so as not to upcast + # things like counts back to categorical + + cls = dtype.construct_array_type() + if same_dtype: + result = maybe_cast_to_extension_array(cls, result, dtype=dtype) + else: + result = maybe_cast_to_extension_array(cls, result) + + elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only: + result = maybe_downcast_to_dtype(result, dtype) + + return result + + +def maybe_cast_to_extension_array( + cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None +) -> ArrayLike: + """ + Call to `_from_sequence` that returns the object unchanged on Exception. + + Parameters + ---------- + cls : class, subclass of ExtensionArray + obj : arraylike + Values to pass to cls._from_sequence + dtype : ExtensionDtype, optional + + Returns + ------- + ExtensionArray or obj + """ + from pandas.core.arrays.string_ import BaseStringArray + + assert isinstance(cls, type), f"must pass a type: {cls}" + assertion_msg = f"must pass a subclass of ExtensionArray: {cls}" + assert issubclass(cls, ABCExtensionArray), assertion_msg + + # Everything can be converted to StringArrays, but we may not want to convert + if issubclass(cls, BaseStringArray) and lib.infer_dtype(obj) != "string": + return obj + + try: + result = cls._from_sequence(obj, dtype=dtype) + except Exception: + # We can't predict what downstream EA constructors may raise + result = obj + return result + + +@overload +def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: + ... + + +@overload +def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: + ... + + +def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: + """ + If we have a dtype that cannot hold NA values, find the best match that can. + """ + if isinstance(dtype, ExtensionDtype): + if dtype._can_hold_na: + return dtype + elif isinstance(dtype, IntervalDtype): + # TODO(GH#45349): don't special-case IntervalDtype, allow + # overriding instead of returning object below. + return IntervalDtype(np.float64, closed=dtype.closed) + return _dtype_obj + elif dtype.kind == "b": + return _dtype_obj + elif dtype.kind in ["i", "u"]: + return np.dtype(np.float64) + return dtype + + +def maybe_promote(dtype: np.dtype, fill_value=np.nan): + """ + Find the minimal dtype that can hold both the given dtype and fill_value. + + Parameters + ---------- + dtype : np.dtype + fill_value : scalar, default np.nan + + Returns + ------- + dtype + Upcasted from dtype argument if necessary. + fill_value + Upcasted from fill_value argument if necessary. + + Raises + ------ + ValueError + If fill_value is a non-scalar and dtype is not object. + """ + # TODO(2.0): need to directly use the non-cached version as long as we + # possibly raise a deprecation warning for datetime dtype + if dtype.kind == "M": + return _maybe_promote(dtype, fill_value) + # for performance, we are using a cached version of the actual implementation + # of the function in _maybe_promote. However, this doesn't always work (in case + # of non-hashable arguments), so we fallback to the actual implementation if needed + try: + # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type + # "Type[Any]"; expected "Hashable" [arg-type] + return _maybe_promote_cached( + dtype, fill_value, type(fill_value) # type: ignore[arg-type] + ) + except TypeError: + # if fill_value is not hashable (required for caching) + return _maybe_promote(dtype, fill_value) + + +@functools.lru_cache(maxsize=128) +def _maybe_promote_cached(dtype, fill_value, fill_value_type): + # The cached version of _maybe_promote below + # This also use fill_value_type as (unused) argument to use this in the + # cache lookup -> to differentiate 1 and True + return _maybe_promote(dtype, fill_value) + + +def _maybe_promote(dtype: np.dtype, fill_value=np.nan): + # The actual implementation of the function, use `maybe_promote` above for + # a cached version. + if not is_scalar(fill_value): + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + if not is_object_dtype(dtype): + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + raise ValueError("fill_value must be a scalar") + dtype = _dtype_obj + return dtype, fill_value + + kinds = ["i", "u", "f", "c", "m", "M"] + if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds: + dtype = ensure_dtype_can_hold_na(dtype) + fv = na_value_for_dtype(dtype) + return dtype, fv + + elif isinstance(dtype, CategoricalDtype): + if fill_value in dtype.categories or isna(fill_value): + return dtype, fill_value + else: + return object, ensure_object(fill_value) + + elif isna(fill_value): + dtype = _dtype_obj + if fill_value is None: + # but we retain e.g. pd.NA + fill_value = np.nan + return dtype, fill_value + + # returns tuple of (dtype, fill_value) + if issubclass(dtype.type, np.datetime64): + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + + # TODO(2.0): once this deprecation is enforced, this whole case + # becomes equivalent to: + # dta = DatetimeArray._from_sequence([], dtype="M8[ns]") + # try: + # fv = dta._validate_setitem_value(fill_value) + # return dta.dtype, fv + # except (ValueError, TypeError): + # return _dtype_obj, fill_value + if isinstance(fill_value, date) and not isinstance(fill_value, datetime): + # deprecate casting of date object to match infer_dtype_from_scalar + # and DatetimeArray._validate_setitem_value + try: + fv = Timestamp(fill_value).to_datetime64() + except OutOfBoundsDatetime: + pass + else: + warnings.warn( + "Using a `date` object for fill_value with `datetime64[ns]` " + "dtype is deprecated. In a future version, this will be cast " + "to object dtype. Pass `fill_value=Timestamp(date_obj)` instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return dtype, fv + elif isinstance(fill_value, str): + try: + # explicitly wrap in str to convert np.str_ + fv = Timestamp(str(fill_value)) + except (ValueError, TypeError): + pass + else: + if isna(fv) or fv.tz is None: + return dtype, fv.asm8 + + return np.dtype("object"), fill_value + + elif issubclass(dtype.type, np.timedelta64): + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + + return np.dtype("object"), fill_value + + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + dtype = np.dtype(np.float64) + + elif dtype.kind == "f": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.float64 and dtype is np.float32 + dtype = mst + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif is_bool(fill_value): + if not issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + if not np.can_cast(fill_value, dtype): + # upcast to prevent overflow + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + if dtype.kind == "f": + # Case where we disagree with numpy + dtype = np.dtype(np.object_) + + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, (np.integer, np.floating)): + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.complex128 and dtype is np.complex64 + dtype = mst + + else: + dtype = np.dtype(np.object_) + + # in case we have a string that looked like a number + if issubclass(dtype.type, (bytes, str)): + dtype = np.dtype(np.object_) + + fill_value = _ensure_dtype_type(fill_value, dtype) + return dtype, fill_value + + +def _ensure_dtype_type(value, dtype: np.dtype): + """ + Ensure that the given value is an instance of the given dtype. + + e.g. if out dtype is np.complex64_, we should have an instance of that + as opposed to a python complex object. + + Parameters + ---------- + value : object + dtype : np.dtype + + Returns + ------- + object + """ + # Start with exceptions in which we do _not_ cast to numpy types + + if dtype == _dtype_obj: + return value + + # Note: before we get here we have already excluded isna(value) + return dtype.type(value) + + +def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: + """ + Interpret the dtype from a scalar or array. + + Parameters + ---------- + val : object + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if not is_list_like(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + +def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: + """ + Interpret the dtype from a scalar. + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar belongs to pandas extension types is inferred as + object + """ + dtype: DtypeObj = _dtype_obj + + # a 1-element ndarray + if isinstance(val, np.ndarray): + if val.ndim != 0: + msg = "invalid ndarray passed to infer_dtype_from_scalar" + raise ValueError(msg) + + dtype = val.dtype + val = lib.item_from_zerodim(val) + + elif isinstance(val, str): + + # If we create an empty array using a string to infer + # the dtype, NumPy will only allocate one character per entry + # so this is kind of bad. Alternately we could use np.repeat + # instead of np.empty (but then you still don't want things + # coming out as np.str_! + + dtype = _dtype_obj + + elif isinstance(val, (np.datetime64, datetime)): + try: + val = Timestamp(val) + except OutOfBoundsDatetime: + return _dtype_obj, val + + # error: Non-overlapping identity check (left operand type: "Timestamp", + # right operand type: "NaTType") + if val is NaT or val.tz is None: # type: ignore[comparison-overlap] + dtype = np.dtype("M8[ns]") + val = val.to_datetime64() + else: + if pandas_dtype: + dtype = DatetimeTZDtype(unit="ns", tz=val.tz) + else: + # return datetimetz as object + return _dtype_obj, val + + elif isinstance(val, (np.timedelta64, timedelta)): + try: + val = Timedelta(val) + except (OutOfBoundsTimedelta, OverflowError): + dtype = _dtype_obj + else: + dtype = np.dtype("m8[ns]") + val = np.timedelta64(val.value, "ns") + + elif is_bool(val): + dtype = np.dtype(np.bool_) + + elif is_integer(val): + if isinstance(val, np.integer): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.int64) + + try: + np.array(val, dtype=dtype) + except OverflowError: + dtype = np.array(val).dtype + + elif is_float(val): + if isinstance(val, np.floating): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.float64) + + elif is_complex(val): + dtype = np.dtype(np.complex_) + + elif pandas_dtype: + if lib.is_period(val): + dtype = PeriodDtype(freq=val.freq) + elif lib.is_interval(val): + subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] + dtype = IntervalDtype(subtype=subtype, closed=val.closed) + + return dtype, val + + +def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: + """ + Convert datetimelike-keyed dicts to a Timestamp-keyed dict. + + Parameters + ---------- + d: dict-like object + + Returns + ------- + dict + """ + return {maybe_box_datetimelike(key): value for key, value in d.items()} + + +def infer_dtype_from_array( + arr, pandas_dtype: bool = False +) -> tuple[DtypeObj, ArrayLike]: + """ + Infer the dtype from an array. + + Parameters + ---------- + arr : array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object + + Returns + ------- + tuple (numpy-compat/pandas-compat dtype, array) + + Notes + ----- + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes + are not coerced by stringifying or conversion + + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + + Examples + -------- + >>> np.asarray([1, '1']) + array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + (dtype('O'), [1, '1']) + """ + if isinstance(arr, np.ndarray): + return arr.dtype, arr + + if not is_list_like(arr): + raise TypeError("'arr' must be list-like") + + if pandas_dtype and is_extension_array_dtype(arr): + return arr.dtype, arr + + elif isinstance(arr, ABCSeries): + return arr.dtype, np.asarray(arr) + + # don't force numpy coerce with nan's + inferred = lib.infer_dtype(arr, skipna=False) + if inferred in ["string", "bytes", "mixed", "mixed-integer"]: + return (np.dtype(np.object_), arr) + + arr = np.asarray(arr) + return arr.dtype, arr + + +def _maybe_infer_dtype_type(element): + """ + Try to infer an object's dtype, for use in arithmetic ops. + + Uses `element.dtype` if that's available. + Objects implementing the iterator protocol are cast to a NumPy array, + and from there the array's type is used. + + Parameters + ---------- + element : object + Possibly has a `.dtype` attribute, and possibly the iterator + protocol. + + Returns + ------- + tipo : type + + Examples + -------- + >>> from collections import namedtuple + >>> Foo = namedtuple("Foo", "dtype") + >>> _maybe_infer_dtype_type(Foo(np.dtype("i8"))) + dtype('int64') + """ + tipo = None + if hasattr(element, "dtype"): + tipo = element.dtype + elif is_list_like(element): + element = np.asarray(element) + tipo = element.dtype + return tipo + + +def maybe_upcast( + values: NumpyArrayT, + fill_value: Scalar = np.nan, + copy: bool = False, +) -> tuple[NumpyArrayT, Scalar]: + """ + Provide explicit type promotion and coercion. + + Parameters + ---------- + values : np.ndarray + The array that we may want to upcast. + fill_value : what we want to fill with + copy : bool, default True + If True always make a copy even if no upcast is required. + + Returns + ------- + values: np.ndarray + the original array, possibly upcast + fill_value: + the fill value, possibly upcast + """ + new_dtype, fill_value = maybe_promote(values.dtype, fill_value) + # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) + upcast_values = values.astype(new_dtype, copy=copy) + + # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]], + # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]", + # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period, + # Timestamp, Timedelta, Any]]]") + return upcast_values, fill_value # type: ignore[return-value] + + +def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: + """ + Change string like dtypes to object for + ``DataFrame.select_dtypes()``. + """ + # error: Argument 1 to has incompatible type "Type[generic]"; expected + # "Union[dtype[Any], ExtensionDtype, None]" + # error: Argument 2 to has incompatible type "Type[generic]"; expected + # "Union[dtype[Any], ExtensionDtype, None]" + non_string_dtypes = dtype_set - { + np.dtype("S").type, # type: ignore[arg-type] + np.dtype(" np.ndarray: + """coerce the indexer input array to the smallest dtype possible""" + length = len(categories) + if length < _int8_max: + return ensure_int8(indexer) + elif length < _int16_max: + return ensure_int16(indexer) + elif length < _int32_max: + return ensure_int32(indexer) + return ensure_int64(indexer) + + +def soft_convert_objects( + values: np.ndarray, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + period: bool = True, + copy: bool = True, +) -> ArrayLike: + """ + Try to coerce datetime, timedelta, and numeric object-dtype columns + to inferred dtype. + + Parameters + ---------- + values : np.ndarray[object] + datetime : bool, default True + numeric: bool, default True + timedelta : bool, default True + period : bool, default True + copy : bool, default True + + Returns + ------- + np.ndarray or ExtensionArray + """ + validate_bool_kwarg(datetime, "datetime") + validate_bool_kwarg(numeric, "numeric") + validate_bool_kwarg(timedelta, "timedelta") + validate_bool_kwarg(copy, "copy") + + conversion_count = sum((datetime, numeric, timedelta)) + if conversion_count == 0: + raise ValueError("At least one of datetime, numeric or timedelta must be True.") + + # Soft conversions + if datetime or timedelta: + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. + try: + converted = lib.maybe_convert_objects( + values, + convert_datetime=datetime, + convert_timedelta=timedelta, + convert_period=period, + ) + except (OutOfBoundsDatetime, ValueError): + return values + if converted is not values: + return converted + + if numeric and is_object_dtype(values.dtype): + converted, _ = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) + + # If all NaNs, then do not-alter + values = converted if not isna(converted).all() else values + values = values.copy() if copy else values + + return values + + +def convert_dtypes( + input_array: ArrayLike, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, + convert_floating: bool = True, +) -> DtypeObj: + """ + Convert objects to best possible type, and optionally, + to types supporting ``pd.NA``. + + Parameters + ---------- + input_array : ExtensionArray or np.ndarray + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. + + Returns + ------- + np.dtype, or ExtensionDtype + """ + inferred_dtype: str | DtypeObj + + if ( + convert_string or convert_integer or convert_boolean or convert_floating + ) and isinstance(input_array, np.ndarray): + + if is_object_dtype(input_array.dtype): + inferred_dtype = lib.infer_dtype(input_array) + else: + inferred_dtype = input_array.dtype + + if is_string_dtype(inferred_dtype): + if not convert_string or inferred_dtype == "bytes": + return input_array.dtype + else: + return pandas_dtype("string") + + if convert_integer: + target_int_dtype = pandas_dtype("Int64") + + if is_integer_dtype(input_array.dtype): + from pandas.core.arrays.integer import INT_STR_TO_DTYPE + + inferred_dtype = INT_STR_TO_DTYPE.get( + input_array.dtype.name, target_int_dtype + ) + elif is_numeric_dtype(input_array.dtype): + # TODO: de-dup with maybe_cast_to_integer_array? + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = target_int_dtype + else: + inferred_dtype = input_array.dtype + + if convert_floating: + if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( + input_array.dtype + ): + from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE + + inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get( + input_array.dtype.name, pandas_dtype("Float64") + ) + # if we could also convert to integer, check if all floats + # are actually integers + if convert_integer: + # TODO: de-dup with maybe_cast_to_integer_array? + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = pandas_dtype("Int64") + else: + inferred_dtype = inferred_float_dtype + else: + inferred_dtype = inferred_float_dtype + + if convert_boolean: + if is_bool_dtype(input_array.dtype): + inferred_dtype = pandas_dtype("boolean") + elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": + inferred_dtype = pandas_dtype("boolean") + + if isinstance(inferred_dtype, str): + # If we couldn't do anything else, then we retain the dtype + inferred_dtype = input_array.dtype + + else: + return input_array.dtype + + # error: Incompatible return value type (got "Union[str, Union[dtype[Any], + # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") + return inferred_dtype # type: ignore[return-value] + + +def maybe_infer_to_datetimelike( + value: np.ndarray, +) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: + """ + we might have a array (or single object) that is datetime like, + and no dtype is passed don't change the value unless we find a + datetime/timedelta set + + this is pretty strict in that a datetime/timedelta is REQUIRED + in addition to possible nulls/string likes + + Parameters + ---------- + value : np.ndarray[object] + + Returns + ------- + np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray + + """ + if not isinstance(value, np.ndarray) or value.dtype != object: + # Caller is responsible for passing only ndarray[object] + raise TypeError(type(value)) # pragma: no cover + + v = np.array(value, copy=False) + + shape = v.shape + if v.ndim != 1: + v = v.ravel() + + if not len(v): + return value + + def try_datetime(v: np.ndarray) -> ArrayLike: + # Coerce to datetime64, datetime64tz, or in corner cases + # object[datetimes] + from pandas.core.arrays.datetimes import sequence_to_datetimes + + try: + # GH#19671 we pass require_iso8601 to be relatively strict + # when parsing strings. + dta = sequence_to_datetimes(v, require_iso8601=True) + except (ValueError, TypeError): + # e.g. is not convertible to datetime + return v.reshape(shape) + else: + # GH#19761 we may have mixed timezones, in which cast 'dta' is + # an ndarray[object]. Only 1 test + # relies on this behavior, see GH#40111 + return dta.reshape(shape) + + def try_timedelta(v: np.ndarray) -> np.ndarray: + # safe coerce to timedelta64 + + # will try first with a string & object conversion + try: + # bc we know v.dtype == object, this is equivalent to + # `np.asarray(to_timedelta(v))`, but using a lower-level API that + # does not require a circular import. + td_values = array_to_timedelta64(v).view("m8[ns]") + except (ValueError, OverflowError): + return v.reshape(shape) + else: + return td_values.reshape(shape) + + inferred_type, seen_str = lib.infer_datetimelike_array(ensure_object(v)) + if inferred_type in ["period", "interval"]: + # Incompatible return value type (got "Union[ExtensionArray, ndarray]", + # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray, + # IntervalArray]") + return lib.maybe_convert_objects( # type: ignore[return-value] + v, convert_period=True, convert_interval=True + ) + + if inferred_type == "datetime": + # error: Incompatible types in assignment (expression has type "ExtensionArray", + # variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + elif inferred_type == "timedelta": + value = try_timedelta(v) + elif inferred_type == "nat": + + # if all NaT, return as datetime + if isna(v).all(): + # error: Incompatible types in assignment (expression has type + # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + else: + + # We have at least a NaT and a string + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but technically is also a datetime + value = try_timedelta(v) + if lib.infer_dtype(value, skipna=False) in ["mixed"]: + # cannot skip missing values, as NaT implies that the string + # is actually a datetime + + # error: Incompatible types in assignment (expression has type + # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") + value = try_datetime(v) # type: ignore[assignment] + + if value.dtype.kind in ["m", "M"] and seen_str: + # TODO(2.0): enforcing this deprecation should close GH#40111 + warnings.warn( + f"Inferring {value.dtype} from data containing strings is deprecated " + "and will be removed in a future version. To retain the old behavior " + f"explicitly pass Series(data, dtype={value.dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return value + + +def maybe_cast_to_datetime( + value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None +) -> ExtensionArray | np.ndarray: + """ + try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + + We allow a list *only* when dtype is not None. + """ + from pandas.core.arrays.datetimes import sequence_to_datetimes + from pandas.core.arrays.timedeltas import TimedeltaArray + + if not is_list_like(value): + raise TypeError("value must be listlike") + + if is_timedelta64_dtype(dtype): + # TODO: _from_sequence would raise ValueError in cases where + # _ensure_nanosecond_dtype raises TypeError + dtype = cast(np.dtype, dtype) + dtype = _ensure_nanosecond_dtype(dtype) + res = TimedeltaArray._from_sequence(value, dtype=dtype) + return res + + if dtype is not None: + is_datetime64 = is_datetime64_dtype(dtype) + is_datetime64tz = is_datetime64tz_dtype(dtype) + + vdtype = getattr(value, "dtype", None) + + if is_datetime64 or is_datetime64tz: + dtype = _ensure_nanosecond_dtype(dtype) + + value = np.array(value, copy=False) + + # we have an array of datetime or timedeltas & nulls + if value.size or not is_dtype_equal(value.dtype, dtype): + _disallow_mismatched_datetimelike(value, dtype) + + try: + if is_datetime64: + dta = sequence_to_datetimes(value) + # GH 25843: Remove tz information since the dtype + # didn't specify one + + if dta.tz is not None: + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # equiv: dta.view(dtype) + # Note: NOT equivalent to dta.astype(dtype) + dta = dta.tz_localize(None) + + value = dta + elif is_datetime64tz: + dtype = cast(DatetimeTZDtype, dtype) + # The string check can be removed once issue #13712 + # is solved. String data that is passed with a + # datetime64tz is assumed to be naive which should + # be localized to the timezone. + is_dt_string = is_string_dtype(value.dtype) + dta = sequence_to_datetimes(value) + if dta.tz is not None: + value = dta.astype(dtype, copy=False) + elif is_dt_string: + # Strings here are naive, so directly localize + # equiv: dta.astype(dtype) # though deprecated + + value = dta.tz_localize(dtype.tz) + else: + # Numeric values are UTC at this point, + # so localize and convert + # equiv: Series(dta).astype(dtype) # though deprecated + if getattr(vdtype, "kind", None) == "M": + # GH#24559, GH#33401 deprecate behavior inconsistent + # with DatetimeArray/DatetimeIndex + warnings.warn( + "In a future version, constructing a Series " + "from datetime64[ns] data and a " + "DatetimeTZDtype will interpret the data " + "as wall-times instead of " + "UTC times, matching the behavior of " + "DatetimeIndex. To treat the data as UTC " + "times, use pd.Series(data).dt" + ".tz_localize('UTC').tz_convert(dtype.tz) " + "or pd.Series(data.view('int64'), dtype=dtype)", + FutureWarning, + stacklevel=find_stack_level(), + ) + + value = dta.tz_localize("UTC").tz_convert(dtype.tz) + except OutOfBoundsDatetime: + raise + except ParserError: + # Note: this is dateutil's ParserError, not ours. + pass + + elif getattr(vdtype, "kind", None) in ["m", "M"]: + # we are already datetimelike and want to coerce to non-datetimelike; + # astype_nansafe will raise for anything other than object, then upcast. + # see test_datetimelike_values_with_object_dtype + # error: Argument 2 to "astype_nansafe" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" + return astype_nansafe(value, dtype) # type: ignore[arg-type] + + elif isinstance(value, np.ndarray): + if value.dtype.kind in ["M", "m"]: + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified + value = sanitize_to_nanoseconds(value) + + elif value.dtype == _dtype_obj: + value = maybe_infer_to_datetimelike(value) + + elif isinstance(value, list): + # we only get here with dtype=None, which we do not allow + raise ValueError( + "maybe_cast_to_datetime allows a list *only* if dtype is not None" + ) + + # at this point we have converted or raised in all cases where we had a list + return cast(ArrayLike, value) + + +def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: + """ + Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. + """ + dtype = values.dtype + if dtype.kind == "M" and dtype != DT64NS_DTYPE: + values = astype_overflowsafe(values, dtype=DT64NS_DTYPE) + + elif dtype.kind == "m" and dtype != TD64NS_DTYPE: + values = astype_overflowsafe(values, dtype=TD64NS_DTYPE) + + elif copy: + values = values.copy() + + return values + + +def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: + """ + Convert dtypes with granularity less than nanosecond to nanosecond + + >>> _ensure_nanosecond_dtype(np.dtype("M8[s]")) + dtype('>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) + Traceback (most recent call last): + ... + TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] + """ + msg = ( + f"The '{dtype.name}' dtype has no unit. " + f"Please pass in '{dtype.name}[ns]' instead." + ) + + # unpack e.g. SparseDtype + dtype = getattr(dtype, "subtype", dtype) + + if not isinstance(dtype, np.dtype): + # i.e. datetime64tz + pass + + elif dtype.kind == "M" and dtype != DT64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("M8[ns]"): + if dtype.name == "datetime64": + raise ValueError(msg) + dtype = DT64NS_DTYPE + else: + raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") + + elif dtype.kind == "m" and dtype != TD64NS_DTYPE: + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("m8[ns]"): + if dtype.name == "timedelta64": + raise ValueError(msg) + dtype = TD64NS_DTYPE + else: + raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + return dtype + + +# TODO: other value-dependent functions to standardize here include +# dtypes.concat.cast_to_common_type and Index._find_common_type_compat +def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: + """ + Find the type/dtype for a the result of an operation between these objects. + + This is similar to find_common_type, but looks at the objects instead + of just their dtypes. This can be useful in particular when one of the + objects does not have a `dtype`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : Any + + Returns + ------- + np.dtype or ExtensionDtype + + See also + -------- + find_common_type + numpy.result_type + """ + new_dtype: DtypeObj + + if ( + isinstance(left, np.ndarray) + and left.dtype.kind in ["i", "u", "c"] + and (lib.is_integer(right) or lib.is_float(right)) + ): + # e.g. with int8 dtype and right=512, we want to end up with + # np.int16, whereas infer_dtype_from(512) gives np.int64, + # which will make us upcast too far. + if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f": + right = int(right) + + new_dtype = np.result_type(left, right) + + elif is_valid_na_for_dtype(right, left.dtype): + # e.g. IntervalDtype[int] and None/np.nan + new_dtype = ensure_dtype_can_hold_na(left.dtype) + + else: + dtype, _ = infer_dtype_from(right, pandas_dtype=True) + + new_dtype = find_common_type([left.dtype, dtype]) + + return new_dtype + + +def common_dtype_categorical_compat( + objs: list[Index | ArrayLike], dtype: DtypeObj +) -> DtypeObj: + """ + Update the result of find_common_type to account for NAs in a Categorical. + + Parameters + ---------- + objs : list[np.ndarray | ExtensionArray | Index] + dtype : np.dtype or ExtensionDtype + + Returns + ------- + np.dtype or ExtensionDtype + """ + # GH#38240 + + # TODO: more generally, could do `not can_hold_na(dtype)` + if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]: + + for obj in objs: + # We don't want to accientally allow e.g. "categorical" str here + obj_dtype = getattr(obj, "dtype", None) + if isinstance(obj_dtype, CategoricalDtype): + if isinstance(obj, ABCIndex): + # This check may already be cached + hasnas = obj.hasnans + else: + # Categorical + hasnas = cast("Categorical", obj)._hasna + + if hasnas: + # see test_union_int_categorical_with_nan + dtype = np.dtype(np.float64) + break + return dtype + + +@overload +def find_common_type(types: list[np.dtype]) -> np.dtype: + ... + + +@overload +def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: + ... + + +@overload +def find_common_type(types: list[DtypeObj]) -> DtypeObj: + ... + + +def find_common_type(types): + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ + if not types: + raise ValueError("no types given") + + first = types[0] + + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) + # => object + if lib.dtypes_all_equal(list(types)): + return first + + # get unique types (dict.fromkeys is used as order-preserving set()) + types = list(dict.fromkeys(types).keys()) + + if any(isinstance(t, ExtensionDtype) for t in types): + for t in types: + if isinstance(t, ExtensionDtype): + res = t._get_common_dtype(types) + if res is not None: + return res + return np.dtype("object") + + # take lowest unit + if all(is_datetime64_dtype(t) for t in types): + return np.dtype("datetime64[ns]") + if all(is_timedelta64_dtype(t) for t in types): + return np.dtype("timedelta64[ns]") + + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + for t in types: + if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): + return np.dtype("object") + + return np.find_common_type(types, []) + + +def construct_2d_arraylike_from_scalar( + value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool +) -> np.ndarray: + + shape = (length, width) + + if dtype.kind in ["m", "M"]: + value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) + elif dtype == _dtype_obj: + if isinstance(value, (np.timedelta64, np.datetime64)): + # calling np.array below would cast to pytimedelta/pydatetime + out = np.empty(shape, dtype=object) + out.fill(value) + return out + + # Attempt to coerce to a numpy array + try: + arr = np.array(value, dtype=dtype, copy=copy) + except (ValueError, TypeError) as err: + raise TypeError( + f"DataFrame constructor called with incompatible data and dtype: {err}" + ) from err + + if arr.ndim != 0: + raise ValueError("DataFrame constructor not properly called!") + + return np.full(shape, arr) + + +def construct_1d_arraylike_from_scalar( + value: Scalar, length: int, dtype: DtypeObj | None +) -> ArrayLike: + """ + create a np.ndarray / pandas type of specified shape and dtype + filled with values + + Parameters + ---------- + value : scalar value + length : int + dtype : pandas_dtype or np.dtype + + Returns + ------- + np.ndarray / pandas type of length, filled with value + + """ + + if dtype is None: + try: + dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) + except OutOfBoundsDatetime: + dtype = _dtype_obj + + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + seq = [] if length == 0 else [value] + subarr = cls._from_sequence(seq, dtype=dtype).repeat(length) + + else: + + if length and is_integer_dtype(dtype) and isna(value): + # coerce if we have nan for an integer dtype + dtype = np.dtype("float64") + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = np.dtype("object") + if not isna(value): + value = ensure_str(value) + elif dtype.kind in ["M", "m"]: + value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) + + subarr = np.empty(length, dtype=dtype) + if length: + # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes + subarr.fill(value) + + return subarr + + +def _maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): + """ + Wrap _maybe_unbox_datetimelike with a check for a timezone-aware Timestamp + along with a timezone-naive datetime64 dtype, which is deprecated. + """ + # Caller is responsible for checking dtype.kind in ["m", "M"] + + if isinstance(value, datetime): + # we dont want to box dt64, in particular datetime64("NaT") + value = maybe_box_datetimelike(value, dtype) + + try: + value = _maybe_unbox_datetimelike(value, dtype) + except TypeError: + if ( + isinstance(value, Timestamp) + and value.tzinfo is not None + and isinstance(dtype, np.dtype) + and dtype.kind == "M" + ): + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_value = value.tz_localize(None) + return _maybe_unbox_datetimelike(new_value, dtype) + else: + raise + return value + + +def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: + """ + Transform any list-like object in a 1-dimensional numpy array of object + dtype. + + Parameters + ---------- + values : any iterable which has a len() + + Raises + ------ + TypeError + * If `values` does not have a len() + + Returns + ------- + 1-dimensional numpy array of dtype object + """ + # numpy will try to interpret nested lists as further dimensions, hence + # making a 1D array that contains list-likes is a bit tricky: + result = np.empty(len(values), dtype="object") + result[:] = values + return result + + +def maybe_cast_to_integer_array( + arr: list | np.ndarray, dtype: np.dtype, copy: bool = False +) -> np.ndarray: + """ + Takes any dtype and returns the casted version, raising for when data is + incompatible with integer/unsigned integer dtypes. + + Parameters + ---------- + arr : np.ndarray or list + The array to cast. + dtype : np.dtype + The integer dtype to cast the array to. + copy: bool, default False + Whether to make a copy of the array before returning. + + Returns + ------- + ndarray + Array of integer or unsigned integer dtype. + + Raises + ------ + OverflowError : the dtype is incompatible with the data + ValueError : loss of precision has occurred during casting + + Examples + -------- + If you try to coerce negative values to unsigned integers, it raises: + + >>> pd.Series([-1], dtype="uint64") + Traceback (most recent call last): + ... + OverflowError: Trying to coerce negative values to unsigned integers + + Also, if you try to coerce float values to integers, it raises: + + >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) + Traceback (most recent call last): + ... + ValueError: Trying to coerce float values to integers + """ + assert is_integer_dtype(dtype) + + try: + if not isinstance(arr, np.ndarray): + casted = np.array(arr, dtype=dtype, copy=copy) + else: + casted = arr.astype(dtype, copy=copy) + except OverflowError as err: + raise OverflowError( + "The elements provided in the data cannot all be " + f"casted to the dtype {dtype}" + ) from err + + if np.array_equal(arr, casted): + return casted + + # We do this casting to allow for proper + # data and dtype checking. + # + # We didn't do this earlier because NumPy + # doesn't handle `uint64` correctly. + arr = np.asarray(arr) + + if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): + raise OverflowError("Trying to coerce negative values to unsigned integers") + + if is_float_dtype(arr.dtype): + if not np.isfinite(arr).all(): + raise IntCastingNaNError( + "Cannot convert non-finite values (NA or inf) to integer" + ) + raise ValueError("Trying to coerce float values to integers") + if is_object_dtype(arr.dtype): + raise ValueError("Trying to coerce float values to integers") + + if casted.dtype < arr.dtype: + # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows + warnings.warn( + f"Values are too large to be losslessly cast to {dtype}. " + "In a future version this will raise OverflowError. To retain the " + f"old behavior, use pd.Series(values).astype({dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + if arr.dtype.kind in ["m", "M"]: + # test_constructor_maskedarray_nonfloat + warnings.warn( + f"Constructing Series or DataFrame from {arr.dtype} values and " + f"dtype={dtype} is deprecated and will raise in a future version. " + "Use values.view(dtype) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + # No known cases that get here, but raising explicitly to cover our bases. + raise ValueError(f"values cannot be losslessly cast to {dtype}") + + +def can_hold_element(arr: ArrayLike, element: Any) -> bool: + """ + Can we do an inplace setitem with this element in an array with this dtype? + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + element : Any + + Returns + ------- + bool + """ + dtype = arr.dtype + if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: + if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): + # np.dtype here catches datetime64ns and timedelta64ns; we assume + # in this case that we have DatetimeArray/TimedeltaArray + arr = cast( + "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr + ) + try: + arr._validate_setitem_value(element) + return True + except (ValueError, TypeError): + # TODO(2.0): stop catching ValueError for tzaware, see + # _catch_deprecated_value_error + return False + + # This is technically incorrect, but maintains the behavior of + # ExtensionBlock._can_hold_element + return True + + try: + np_can_hold_element(dtype, element) + return True + except (TypeError, LossySetitemError): + return False + + +def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: + """ + Raise if we cannot losslessly set this element into an ndarray with this dtype. + + Specifically about places where we disagree with numpy. i.e. there are + cases where numpy will raise in doing the setitem that we do not check + for here, e.g. setting str "X" into a numeric ndarray. + + Returns + ------- + Any + The element, potentially cast to the dtype. + + Raises + ------ + ValueError : If we cannot losslessly store this element with this dtype. + """ + if dtype == _dtype_obj: + return element + + tipo = _maybe_infer_dtype_type(element) + + if dtype.kind in ["i", "u"]: + if isinstance(element, range): + if _dtype_can_hold_range(element, dtype): + return element + raise LossySetitemError + + elif is_integer(element) or (is_float(element) and element.is_integer()): + # e.g. test_setitem_series_int8 if we have a python int 1 + # tipo may be np.int32, despite the fact that it will fit + # in smaller int dtypes. + info = np.iinfo(dtype) + if info.min <= element <= info.max: + return dtype.type(element) + raise LossySetitemError + + if tipo is not None: + if tipo.kind not in ["i", "u"]: + if isinstance(element, np.ndarray) and element.dtype.kind == "f": + # If all can be losslessly cast to integers, then we can hold them + with np.errstate(invalid="ignore"): + # We check afterwards if cast was losslessly, so no need to show + # the warning + casted = element.astype(dtype) + comp = casted == element + if comp.all(): + # Return the casted values bc they can be passed to + # np.putmask, whereas the raw values cannot. + # see TestSetitemFloatNDarrayIntoIntegerSeries + return casted + raise LossySetitemError + + # Anything other than integer we cannot hold + raise LossySetitemError + elif ( + dtype.kind == "u" + and isinstance(element, np.ndarray) + and element.dtype.kind == "i" + ): + # see test_where_uint64 + casted = element.astype(dtype) + if (casted == element).all(): + # TODO: faster to check (element >=0).all()? potential + # itemsize issues there? + return casted + raise LossySetitemError + elif dtype.itemsize < tipo.itemsize: + raise LossySetitemError + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype; we can put this into an ndarray + # losslessly iff it has no NAs + if element._hasna: + raise LossySetitemError + return element + + return element + + raise LossySetitemError + + elif dtype.kind == "f": + if lib.is_integer(element) or lib.is_float(element): + casted = dtype.type(element) + if np.isnan(casted) or casted == element: + return casted + # otherwise e.g. overflow see TestCoercionFloat32 + raise LossySetitemError + + if tipo is not None: + # TODO: itemsize check? + if tipo.kind not in ["f", "i", "u"]: + # Anything other than float/integer we cannot hold + raise LossySetitemError + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype or FloatingDtype; + # we can put this into an ndarray losslessly iff it has no NAs + if element._hasna: + raise LossySetitemError + return element + elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind: + if isinstance(element, np.ndarray): + # e.g. TestDataFrameIndexingWhere::test_where_alignment + casted = element.astype(dtype) + # TODO(np>=1.20): we can just use np.array_equal with equal_nan + if array_equivalent(casted, element): + return casted + raise LossySetitemError + + return element + + raise LossySetitemError + + elif dtype.kind == "c": + if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): + if np.isnan(element): + # see test_where_complex GH#6345 + return dtype.type(element) + + casted = dtype.type(element) + if casted == element: + return casted + # otherwise e.g. overflow see test_32878_complex_itemsize + raise LossySetitemError + + if tipo is not None: + if tipo.kind in ["c", "f", "i", "u"]: + return element + raise LossySetitemError + raise LossySetitemError + + elif dtype.kind == "b": + if tipo is not None: + if tipo.kind == "b": + if not isinstance(tipo, np.dtype): + # i.e. we have a BooleanArray + if element._hasna: + # i.e. there are pd.NA elements + raise LossySetitemError + return element + raise LossySetitemError + if lib.is_bool(element): + return element + raise LossySetitemError + + elif dtype.kind == "S": + # TODO: test tests.frame.methods.test_replace tests get here, + # need more targeted tests. xref phofl has a PR about this + if tipo is not None: + if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: + return element + raise LossySetitemError + if isinstance(element, bytes) and len(element) <= dtype.itemsize: + return element + raise LossySetitemError + + raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) + + +class LossySetitemError(Exception): + """ + Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. + """ + + pass diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py new file mode 100644 index 00000000..9355e81f --- /dev/null +++ b/pandas/core/dtypes/common.py @@ -0,0 +1,1885 @@ +""" +Common type operations. +""" +from __future__ import annotations + +from typing import ( + Any, + Callable, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Interval, + Period, + algos, + lib, +) +from pandas._libs.tslibs import conversion +from pandas._typing import ( + ArrayLike, + DtypeObj, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCIndex, +) +from pandas.core.dtypes.inference import ( + is_array_like, + is_bool, + is_complex, + is_dataclass, + is_decimal, + is_dict_like, + is_file_like, + is_float, + is_hashable, + is_integer, + is_interval, + is_iterator, + is_list_like, + is_named_tuple, + is_nested_list_like, + is_number, + is_re, + is_re_compilable, + is_scalar, + is_sequence, +) + +DT64NS_DTYPE = conversion.DT64NS_DTYPE +TD64NS_DTYPE = conversion.TD64NS_DTYPE +INT64_DTYPE = np.dtype(np.int64) + +# oh the troubles to reduce import time +_is_scipy_sparse = None + +ensure_float64 = algos.ensure_float64 + + +def ensure_float(arr): + """ + Ensure that an array object has a float dtype if possible. + + Parameters + ---------- + arr : array-like + The array whose data type we want to enforce as float. + + Returns + ------- + float_arr : The original array cast to the float dtype if + possible. Otherwise, the original array is returned. + """ + if is_extension_array_dtype(arr.dtype): + if is_float_dtype(arr.dtype): + arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan) + else: + arr = arr.to_numpy(dtype="float64", na_value=np.nan) + elif issubclass(arr.dtype.type, (np.integer, np.bool_)): + arr = arr.astype(float) + return arr + + +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object +ensure_uint64 = algos.ensure_uint64 + + +def ensure_str(value: bytes | Any) -> str: + """ + Ensure that bytes and non-strings get converted into ``str`` objects. + """ + if isinstance(value, bytes): + value = value.decode("utf-8") + elif not isinstance(value, str): + value = str(value) + return value + + +def ensure_python_int(value: int | np.integer) -> int: + """ + Ensure that a value is a python int. + + Parameters + ---------- + value: int or numpy.integer + + Returns + ------- + int + + Raises + ------ + TypeError: if the value isn't an int or can't be converted to one. + """ + if not (is_integer(value) or is_float(value)): + if not is_scalar(value): + raise TypeError( + f"Value needs to be a scalar value, was type {type(value).__name__}" + ) + raise TypeError(f"Wrong type {type(value)} for value {value}") + try: + new_value = int(value) + assert new_value == value + except (TypeError, ValueError, AssertionError) as err: + raise TypeError(f"Wrong type {type(value)} for value {value}") from err + return new_value + + +def classes(*klasses) -> Callable: + """Evaluate if the tipo is a subclass of the klasses.""" + return lambda tipo: issubclass(tipo, klasses) + + +def classes_and_not_datetimelike(*klasses) -> Callable: + """ + Evaluate if the tipo is a subclass of the klasses + and not a datetimelike. + """ + return lambda tipo: ( + issubclass(tipo, klasses) + and not issubclass(tipo, (np.datetime64, np.timedelta64)) + ) + + +def is_object_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.object_)) + + +def is_sparse(arr) -> bool: + """ + Check whether an array-like is a 1-D pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. + + Parameters + ---------- + arr : array-like + Array-like to check. + + Returns + ------- + bool + Whether or not the array-like is a pandas sparse array. + + Examples + -------- + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0])) + True + >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0]))) + True + + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. + + >>> from scipy.sparse import bsr_matrix + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) + False + + Returns `False` if the parameter has more than one dimension. + """ + from pandas.core.arrays.sparse import SparseDtype + + dtype = getattr(arr, "dtype", arr) + return isinstance(dtype, SparseDtype) + + +def is_scipy_sparse(arr) -> bool: + """ + Check whether an array-like is a scipy.sparse.spmatrix instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is a scipy.sparse.spmatrix instance. + + Notes + ----- + If scipy is not installed, this function will always return False. + + Examples + -------- + >>> from scipy.sparse import bsr_matrix + >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) + True + >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) + False + """ + global _is_scipy_sparse + + if _is_scipy_sparse is None: + try: + from scipy.sparse import issparse as _is_scipy_sparse + except ImportError: + _is_scipy_sparse = lambda _: False + + assert _is_scipy_sparse is not None + return _is_scipy_sparse(arr) + + +def is_categorical(arr) -> bool: + """ + Check whether an array-like is a Categorical instance. + + .. deprecated:: 1.1.0 + Use ``is_categorical_dtype`` instead. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a Categorical instance. + + Examples + -------- + >>> is_categorical([1, 2, 3]) + False + + Categoricals, Series Categoricals, and CategoricalIndex will return True. + + >>> cat = pd.Categorical([1, 2, 3]) + >>> is_categorical(cat) + True + >>> is_categorical(pd.Series(cat)) + True + >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) + True + """ + warnings.warn( + "is_categorical is deprecated and will be removed in a future version. " + "Use is_categorical_dtype instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) + + +def is_datetime64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) + True + >>> is_datetime64_dtype(np.array([], dtype=int)) + False + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_dtype([1, 2, 3]) + False + """ + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + + +def is_datetime64tz_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. + + Examples + -------- + >>> is_datetime64tz_dtype(object) + False + >>> is_datetime64tz_dtype([1, 2, 3]) + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) + True + >>> is_datetime64tz_dtype(s) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) + True + >>> is_timedelta64_dtype([1, 2, 3]) + False + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + >>> is_timedelta64_dtype('0 days') + False + """ + if isinstance(arr_or_dtype, np.dtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "m" + + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + + +def is_period_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Period dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Period dtype. + + Examples + -------- + >>> is_period_dtype(object) + False + >>> is_period_dtype(PeriodDtype(freq="D")) + True + >>> is_period_dtype([1, 2, 3]) + False + >>> is_period_dtype(pd.Period("2017-01-01")) + False + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Period + + if arr_or_dtype is None: + return False + return PeriodDtype.is_dtype(arr_or_dtype) + + +def is_interval_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Interval dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Interval dtype. + + Examples + -------- + >>> is_interval_dtype(object) + False + >>> is_interval_dtype(IntervalDtype()) + True + >>> is_interval_dtype([1, 2, 3]) + False + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) + False + >>> is_interval_dtype(pd.IntervalIndex([interval])) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.type is Interval + + if arr_or_dtype is None: + return False + return IntervalDtype.is_dtype(arr_or_dtype) + + +def is_categorical_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Categorical dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Categorical dtype. + + Examples + -------- + >>> is_categorical_dtype(object) + False + >>> is_categorical_dtype(CategoricalDtype()) + True + >>> is_categorical_dtype([1, 2, 3]) + False + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) + True + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + True + """ + if isinstance(arr_or_dtype, ExtensionDtype): + # GH#33400 fastpath for dtype object + return arr_or_dtype.name == "category" + + if arr_or_dtype is None: + return False + return CategoricalDtype.is_dtype(arr_or_dtype) + + +def is_string_or_object_np_dtype(dtype: np.dtype) -> bool: + """ + Faster alternative to is_string_dtype, assumes we have a np.dtype object. + """ + return dtype == object or dtype.kind in "SU" + + +def is_string_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the string dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(pd.Series([1, 2])) + False + """ + # TODO: gh-15585: consider making the checks stricter. + def condition(dtype) -> bool: + return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + + def is_excluded_dtype(dtype) -> bool: + """ + These have kind = "O" but aren't string dtypes so need to be explicitly excluded + """ + return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) + + return _is_dtype(arr_or_dtype, condition) + + +def is_dtype_equal(source, target) -> bool: + """ + Check if two dtypes are equal. + + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ------- + boolean + Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") + False + """ + if isinstance(target, str): + if not isinstance(source, str): + # GH#38516 ensure we get the same behavior from + # is_dtype_equal(CDT, "category") and CDT == "category" + try: + src = get_dtype(source) + if isinstance(src, ExtensionDtype): + return src == target + except (TypeError, AttributeError, ImportError): + return False + elif isinstance(source, str): + return is_dtype_equal(target, source) + + try: + source = get_dtype(source) + target = get_dtype(target) + return source == target + except (TypeError, AttributeError, ImportError): + + # invalid comparison + # object == category will hit this + return False + + +def is_any_int_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + This function is internal and should not be exposed in the public API. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype. + + Examples + -------- + >>> is_any_int_dtype(str) + False + >>> is_any_int_dtype(int) + True + >>> is_any_int_dtype(float) + False + >>> is_any_int_dtype(np.uint64) + True + >>> is_any_int_dtype(np.datetime64) + False + >>> is_any_int_dtype(np.timedelta64) + True + >>> is_any_int_dtype(np.array(['a', 'b'])) + False + >>> is_any_int_dtype(pd.Series([1, 2])) + True + >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_any_int_dtype(pd.Index([1, 2.])) # float + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64)) + + +def is_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype and + not an instance of timedelta64. + + Examples + -------- + >>> is_integer_dtype(str) + False + >>> is_integer_dtype(int) + True + >>> is_integer_dtype(float) + False + >>> is_integer_dtype(np.uint64) + True + >>> is_integer_dtype('int8') + True + >>> is_integer_dtype('Int8') + True + >>> is_integer_dtype(pd.Int8Dtype) + True + >>> is_integer_dtype(np.datetime64) + False + >>> is_integer_dtype(np.timedelta64) + False + >>> is_integer_dtype(np.array(['a', 'b'])) + False + >>> is_integer_dtype(pd.Series([1, 2])) + True + >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_integer_dtype(pd.Index([1, 2.])) # float + False + """ + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer)) + + +def is_signed_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a signed integer dtype. + + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_signed_integer_dtype(str) + False + >>> is_signed_integer_dtype(int) + True + >>> is_signed_integer_dtype(float) + False + >>> is_signed_integer_dtype(np.uint64) # unsigned + False + >>> is_signed_integer_dtype('int8') + True + >>> is_signed_integer_dtype('Int8') + True + >>> is_signed_integer_dtype(pd.Int8Dtype) + True + >>> is_signed_integer_dtype(np.datetime64) + False + >>> is_signed_integer_dtype(np.timedelta64) + False + >>> is_signed_integer_dtype(np.array(['a', 'b'])) + False + >>> is_signed_integer_dtype(pd.Series([1, 2])) + True + >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) + + +def is_unsigned_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an unsigned integer dtype. + + The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also + considered as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an unsigned integer dtype. + + Examples + -------- + >>> is_unsigned_integer_dtype(str) + False + >>> is_unsigned_integer_dtype(int) # signed + False + >>> is_unsigned_integer_dtype(float) + False + >>> is_unsigned_integer_dtype(np.uint64) + True + >>> is_unsigned_integer_dtype('uint8') + True + >>> is_unsigned_integer_dtype('UInt8') + True + >>> is_unsigned_integer_dtype(pd.UInt8Dtype) + True + >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + False + >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed + False + >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + True + """ + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger) + ) + + +def is_int64_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the int64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the int64 dtype. + + Notes + ----- + Depending on system architecture, the return value of `is_int64_dtype( + int)` will be True if the OS uses 64-bit integers and False if the OS + uses 32-bit integers. + + Examples + -------- + >>> is_int64_dtype(str) + False + >>> is_int64_dtype(np.int32) + False + >>> is_int64_dtype(np.int64) + True + >>> is_int64_dtype('int8') + False + >>> is_int64_dtype('Int8') + False + >>> is_int64_dtype(pd.Int64Dtype) + True + >>> is_int64_dtype(float) + False + >>> is_int64_dtype(np.uint64) # unsigned + False + >>> is_int64_dtype(np.array(['a', 'b'])) + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_int64_dtype(pd.Index([1, 2.])) # float + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.int64)) + + +def is_datetime64_any_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + bool + Whether or not the array or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_any_dtype(str) + False + >>> is_datetime64_any_dtype(int) + False + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive + True + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_any_dtype(np.array([1, 2])) + False + >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) + True + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True + """ + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # GH#33400 fastpath for dtype object + return arr_or_dtype.kind == "M" + + if arr_or_dtype is None: + return False + return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) + + +def is_datetime64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64[ns] dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + bool + Whether or not the array or dtype is of the datetime64[ns] dtype. + + Examples + -------- + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit + False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) + True + """ + if arr_or_dtype is None: + return False + try: + tipo = get_dtype(arr_or_dtype) + except TypeError: + if is_datetime64tz_dtype(arr_or_dtype): + tipo = get_dtype(arr_or_dtype.dtype) + else: + return False + return tipo == DT64NS_DTYPE or ( + isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns" + ) + + +def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + False + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False + """ + return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) + + +def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. + + Examples + -------- + >>> is_datetime_or_timedelta_dtype(str) + False + >>> is_datetime_or_timedelta_dtype(int) + False + >>> is_datetime_or_timedelta_dtype(np.datetime64) + True + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + False + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + False + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a: ArrayLike, b) -> bool: + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") + is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") + is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") + is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") + + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b) -> bool: + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> from datetime import datetime + >>> dt = np.datetime64(datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + +def needs_i8_conversion(arr_or_dtype) -> bool: + """ + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype should be converted to int64. + + Examples + -------- + >>> needs_i8_conversion(str) + False + >>> needs_i8_conversion(np.int64) + False + >>> needs_i8_conversion(np.datetime64) + True + >>> needs_i8_conversion(np.array(['a', 'b'])) + False + >>> needs_i8_conversion(pd.Series([1, 2])) + False + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + """ + if arr_or_dtype is None: + return False + if isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype.kind in ["m", "M"] + elif isinstance(arr_or_dtype, ExtensionDtype): + return isinstance(arr_or_dtype, (PeriodDtype, DatetimeTZDtype)) + + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + if isinstance(dtype, np.dtype): + return dtype.kind in ["m", "M"] + return isinstance(dtype, (PeriodDtype, DatetimeTZDtype)) + + +def is_numeric_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a numeric dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a numeric dtype. + + Examples + -------- + >>> is_numeric_dtype(str) + False + >>> is_numeric_dtype(int) + True + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) + False + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) + True + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) + False + """ + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) + + +def is_float_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a float dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a float dtype. + + Examples + -------- + >>> is_float_dtype(str) + False + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) + True + >>> is_float_dtype(np.array(['a', 'b'])) + False + >>> is_float_dtype(pd.Series([1, 2])) + False + >>> is_float_dtype(pd.Index([1, 2.])) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.floating)) + + +def is_bool_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a boolean dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a boolean dtype. + + Notes + ----- + An ExtensionArray is considered boolean when the ``_is_boolean`` + attribute is set to True. + + Examples + -------- + >>> is_bool_dtype(str) + False + >>> is_bool_dtype(int) + False + >>> is_bool_dtype(bool) + True + >>> is_bool_dtype(np.bool_) + True + >>> is_bool_dtype(np.array(['a', 'b'])) + False + >>> is_bool_dtype(pd.Series([1, 2])) + False + >>> is_bool_dtype(np.array([True, False])) + True + >>> is_bool_dtype(pd.Categorical([True, False])) + True + >>> is_bool_dtype(pd.arrays.SparseArray([True, False])) + True + """ + if arr_or_dtype is None: + return False + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + + if isinstance(dtype, CategoricalDtype): + arr_or_dtype = dtype.categories + # now we use the special definition for Index + + if isinstance(arr_or_dtype, ABCIndex): + # Allow Index[object] that is all-bools or Index["boolean"] + return arr_or_dtype.inferred_type == "boolean" + elif isinstance(dtype, ExtensionDtype): + return getattr(dtype, "_is_boolean", False) + + return issubclass(dtype.type, np.bool_) + + +def is_extension_type(arr) -> bool: + """ + Check whether an array-like is of a pandas extension class instance. + + .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. + + Parameters + ---------- + arr : array-like, scalar + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a pandas extension class instance. + + Examples + -------- + >>> is_extension_type([1, 2, 3]) + False + >>> is_extension_type(np.array([1, 2, 3])) + False + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) + True + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + True + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True + """ + warnings.warn( + "'is_extension_type' is deprecated and will be removed in a future " + "version. Use 'is_extension_array_dtype' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_categorical_dtype(arr): + return True + elif is_sparse(arr): + return True + elif is_datetime64tz_dtype(arr): + return True + return False + + +def is_1d_only_ea_obj(obj: Any) -> bool: + """ + ExtensionArray that does not support 2D, or more specifically that does + not use HybridBlock. + """ + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, + ) + + return isinstance(obj, ExtensionArray) and not isinstance( + obj, (DatetimeArray, TimedeltaArray, PeriodArray) + ) + + +def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: + """ + Analogue to is_extension_array_dtype but excluding DatetimeTZDtype. + """ + # Note: if other EA dtypes are ever held in HybridBlock, exclude those + # here too. + # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype + # to exclude ArrowTimestampUSDtype + return isinstance(dtype, ExtensionDtype) and not isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) + ) + + +def is_extension_array_dtype(arr_or_dtype) -> bool: + """ + Check if an object is a pandas extension array type. + + See the :ref:`Use Guide ` for more. + + Parameters + ---------- + arr_or_dtype : object + For array-like input, the ``.dtype`` attribute will + be extracted. + + Returns + ------- + bool + Whether the `arr_or_dtype` is an extension array type. + + Notes + ----- + This checks whether an object implements the pandas extension + array interface. In pandas, this includes: + + * Categorical + * Sparse + * Interval + * Period + * DatetimeArray + * TimedeltaArray + + Third-party libraries may implement arrays or types satisfying + this interface as well. + + Examples + -------- + >>> from pandas.api.types import is_extension_array_dtype + >>> arr = pd.Categorical(['a', 'b']) + >>> is_extension_array_dtype(arr) + True + >>> is_extension_array_dtype(arr.dtype) + True + + >>> arr = np.array(['a', 'b']) + >>> is_extension_array_dtype(arr.dtype) + False + """ + dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype) + if isinstance(dtype, ExtensionDtype): + return True + elif isinstance(dtype, np.dtype): + return False + else: + return registry.find(dtype) is not None + + +def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool: + """ + Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype. + + Notes + ----- + Checks only for dtype objects, not dtype-castable strings or types. + """ + return isinstance(dtype, ExtensionDtype) or ( + isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"] + ) + + +def is_complex_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a complex dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a complex dtype. + + Examples + -------- + >>> is_complex_dtype(str) + False + >>> is_complex_dtype(int) + False + >>> is_complex_dtype(np.complex_) + True + >>> is_complex_dtype(np.array(['a', 'b'])) + False + >>> is_complex_dtype(pd.Series([1, 2])) + False + >>> is_complex_dtype(np.array([1 + 1j, 5])) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) + + +def _is_dtype(arr_or_dtype, condition) -> bool: + """ + Return true if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtype]] + + Returns + ------- + bool + + """ + if arr_or_dtype is None: + return False + try: + dtype = get_dtype(arr_or_dtype) + except (TypeError, ValueError): + return False + return condition(dtype) + + +def get_dtype(arr_or_dtype) -> DtypeObj: + """ + Get the dtype instance associated with an array + or dtype object. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. + + Raises + ------ + TypeError : The passed in object is None. + """ + if arr_or_dtype is None: + raise TypeError("Cannot deduce dtype from null object") + + # fastpath + elif isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + + # if we have an array-like + elif hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + return pandas_dtype(arr_or_dtype) + + +def _is_dtype_type(arr_or_dtype, condition) -> bool: + """ + Return true if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtypeType]] + + Returns + ------- + bool : if the condition is satisfied for the arr_or_dtype + """ + if arr_or_dtype is None: + return condition(type(None)) + + # fastpath + if isinstance(arr_or_dtype, np.dtype): + return condition(arr_or_dtype.type) + elif isinstance(arr_or_dtype, type): + if issubclass(arr_or_dtype, ExtensionDtype): + arr_or_dtype = arr_or_dtype.type + return condition(np.dtype(arr_or_dtype).type) + + # if we have an array-like + if hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + # we are not possibly a dtype + elif is_list_like(arr_or_dtype): + return condition(type(None)) + + try: + tipo = pandas_dtype(arr_or_dtype).type + except (TypeError, ValueError): + if is_scalar(arr_or_dtype): + return condition(type(None)) + + return False + + return condition(tipo) + + +def infer_dtype_from_object(dtype) -> type: + """ + Get a numpy dtype.type-style object for a dtype object. + + This methods also includes handling of the datetime64[ns] and + datetime64[ns, TZ] objects. + + If no dtype can be found, we return ``object``. + + Parameters + ---------- + dtype : dtype, type + The dtype object whose numpy dtype.type-style + object we want to extract. + + Returns + ------- + type + """ + if isinstance(dtype, type) and issubclass(dtype, np.generic): + # Type object from a dtype + + return dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + # dtype object + try: + _validate_date_like_dtype(dtype) + except TypeError: + # Should still pass if we don't have a date-like + pass + return dtype.type + + try: + dtype = pandas_dtype(dtype) + except TypeError: + pass + + if is_extension_array_dtype(dtype): + return dtype.type + elif isinstance(dtype, str): + + # TODO(jreback) + # should deprecate these + if dtype in ["datetimetz", "datetime64tz"]: + return DatetimeTZDtype.type + elif dtype in ["period"]: + raise NotImplementedError + + if dtype in ["datetime", "timedelta"]: + dtype += "64" + try: + return infer_dtype_from_object(getattr(np, dtype)) + except (AttributeError, TypeError): + # Handles cases like get_dtype(int) i.e., + # Python objects that are valid dtypes + # (unlike user-defined types, in general) + # + # TypeError handles the float16 type code of 'e' + # further handle internal types + pass + + return infer_dtype_from_object(np.dtype(dtype)) + + +def _validate_date_like_dtype(dtype) -> None: + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + frequency provided is too specific) + """ + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError(e) from e + if typ not in ["generic", "ns"]: + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) + + +def validate_all_hashable(*args, error_name: str | None = None) -> None: + """ + Return None if all args are hashable, else raise a TypeError. + + Parameters + ---------- + *args + Arguments to validate. + error_name : str, optional + The name to use if error + + Raises + ------ + TypeError : If an argument is not hashable + + Returns + ------- + None + """ + if not all(is_hashable(arg) for arg in args): + if error_name: + raise TypeError(f"{error_name} must be a hashable type") + else: + raise TypeError("All elements must be hashable") + + +def pandas_dtype(dtype) -> DtypeObj: + """ + Convert input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + + Raises + ------ + TypeError if not a dtype + """ + # short-circuit + if isinstance(dtype, np.ndarray): + return dtype.dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + return dtype + + # registered extension types + result = registry.find(dtype) + if result is not None: + return result + + # try a numpy dtype + # raise a consistent TypeError if failed + try: + npdtype = np.dtype(dtype) + except SyntaxError as err: + # np.dtype uses `eval` which can raise SyntaxError + raise TypeError(f"data type '{dtype}' not understood") from err + + # Any invalid dtype (such as pd.Timestamp) should raise an error. + # np.dtype(invalid_type).kind = 0 for such objects. However, this will + # also catch some valid dtypes such as object, np.object_ and 'object' + # which we safeguard against by catching them earlier and returning + # np.dtype(valid_dtype) before this condition is evaluated. + if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]: + # check hashability to avoid errors/DeprecationWarning when we get + # here and `dtype` is an array + return npdtype + elif npdtype.kind == "O": + raise TypeError(f"dtype '{dtype}' not understood") + + return npdtype + + +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. + + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. + """ + dtype = value.dtype + + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" + + +__all__ = [ + "classes", + "classes_and_not_datetimelike", + "DT64NS_DTYPE", + "ensure_float", + "ensure_float64", + "ensure_python_int", + "ensure_str", + "get_dtype", + "infer_dtype_from_object", + "INT64_DTYPE", + "is_1d_only_ea_dtype", + "is_1d_only_ea_obj", + "is_all_strings", + "is_any_int_dtype", + "is_array_like", + "is_bool", + "is_bool_dtype", + "is_categorical", + "is_categorical_dtype", + "is_complex", + "is_complex_dtype", + "is_dataclass", + "is_datetime64_any_dtype", + "is_datetime64_dtype", + "is_datetime64_ns_dtype", + "is_datetime64tz_dtype", + "is_datetimelike_v_numeric", + "is_datetime_or_timedelta_dtype", + "is_decimal", + "is_dict_like", + "is_dtype_equal", + "is_ea_or_datetimelike_dtype", + "is_extension_array_dtype", + "is_extension_type", + "is_file_like", + "is_float_dtype", + "is_int64_dtype", + "is_integer_dtype", + "is_interval", + "is_interval_dtype", + "is_iterator", + "is_named_tuple", + "is_nested_list_like", + "is_number", + "is_numeric_dtype", + "is_numeric_v_string_like", + "is_object_dtype", + "is_period_dtype", + "is_re", + "is_re_compilable", + "is_scipy_sparse", + "is_sequence", + "is_signed_integer_dtype", + "is_sparse", + "is_string_dtype", + "is_string_or_object_np_dtype", + "is_timedelta64_dtype", + "is_timedelta64_ns_dtype", + "is_unsigned_integer_dtype", + "needs_i8_conversion", + "pandas_dtype", + "TD64NS_DTYPE", + "validate_all_hashable", +] diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py new file mode 100644 index 00000000..5fb63e09 --- /dev/null +++ b/pandas/core/dtypes/concat.py @@ -0,0 +1,365 @@ +""" +Utility functions related to concat. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + cast, +) +import warnings + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.cast import ( + common_dtype_categorical_compat, + find_common_type, +) +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_sparse, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, +) +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCExtensionArray, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.arrays import Categorical + from pandas.core.arrays.sparse import SparseArray + + +def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: + """ + Helper function for `arr.astype(common_dtype)` but handling all special + cases. + """ + if is_dtype_equal(arr.dtype, dtype): + return arr + + if is_sparse(arr) and not is_sparse(dtype): + # TODO(2.0): remove special case once SparseArray.astype deprecation + # is enforced. + # problem case: SparseArray.astype(dtype) doesn't follow the specified + # dtype exactly, but converts this to Sparse[dtype] -> first manually + # convert to dense array + + # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _ + # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any, + # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict, + # Tuple[Any, Any]]]" [arg-type] + arr = cast("SparseArray", arr) + return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type] + + # astype_array includes ensure_wrapped_if_datetimelike + return astype_array(arr, dtype=dtype, copy=False) + + +def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False): + """ + provide concatenation of an array of arrays each of which is a single + 'normalized' dtypes (in that for example, if it's object, then it is a + non-datetimelike and provide a combined dtype for the resulting array that + preserves the overall dtype if possible) + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + ea_compat_axis : bool, default False + For ExtensionArray compat, behave as if axis == 1 when determining + whether to drop empty arrays. + + Returns + ------- + a single array, preserving the combined dtypes + """ + # filter empty arrays + # 1-d dtypes always are included here + def is_nonempty(x) -> bool: + if x.ndim <= axis: + return True + return x.shape[axis] > 0 + + # If all arrays are empty, there's nothing to convert, just short-cut to + # the concatenation, #3121. + # + # Creating an empty array directly is tempting, but the winnings would be + # marginal given that it would still require shape & dtype calculation and + # np.concatenate which has them both implemented is compiled. + non_empties = [x for x in to_concat if is_nonempty(x)] + if non_empties and axis == 0 and not ea_compat_axis: + # ea_compat_axis see GH#39574 + to_concat = non_empties + + dtypes = {obj.dtype for obj in to_concat} + kinds = {obj.dtype.kind for obj in to_concat} + contains_datetime = any( + isinstance(dtype, (np.dtype, DatetimeTZDtype)) and dtype.kind in ["m", "M"] + for dtype in dtypes + ) or any(isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat) + + all_empty = not len(non_empties) + single_dtype = len({x.dtype for x in to_concat}) == 1 + any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat) + + if contains_datetime: + return _concat_datetime(to_concat, axis=axis) + + if any_ea: + # we ignore axis here, as internally concatting with EAs is always + # for axis=0 + if not single_dtype: + target_dtype = find_common_type([x.dtype for x in to_concat]) + target_dtype = common_dtype_categorical_compat(to_concat, target_dtype) + to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat] + + if isinstance(to_concat[0], ABCExtensionArray): + # TODO: what about EA-backed Index? + cls = type(to_concat[0]) + return cls._concat_same_type(to_concat) + else: + return np.concatenate(to_concat) + + elif all_empty: + # we have all empties, but may need to coerce the result dtype to + # object if we have non-numeric type operands (numpy would otherwise + # cast this to float) + if len(kinds) != 1: + + if not len(kinds - {"i", "u", "f"}) or not len(kinds - {"b", "i", "u"}): + # let numpy coerce + pass + else: + # coerce to object + to_concat = [x.astype("object") for x in to_concat] + kinds = {"o"} + + result = np.concatenate(to_concat, axis=axis) + if "b" in kinds and result.dtype.kind in ["i", "u", "f"]: + # GH#39817 + warnings.warn( + "Behavior when concatenating bool-dtype and numeric-dtype arrays is " + "deprecated; in a future version these will cast to object dtype " + "(instead of coercing bools to numeric values). To retain the old " + "behavior, explicitly cast bool-dtype arrays to numeric dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result + + +def union_categoricals( + to_union, sort_categories: bool = False, ignore_order: bool = False +) -> Categorical: + """ + Combine list-like of Categorical-like, unioning categories. + + All categories must have the same dtype. + + Parameters + ---------- + to_union : list-like + Categorical, CategoricalIndex, or Series with dtype='category'. + sort_categories : bool, default False + If true, resulting categories will be lexsorted, otherwise + they will be ordered as they appear in the data. + ignore_order : bool, default False + If true, the ordered attribute of the Categoricals will be ignored. + Results in an unordered categorical. + + Returns + ------- + Categorical + + Raises + ------ + TypeError + - all inputs do not have the same dtype + - all inputs do not have the same ordered property + - all inputs are ordered and their categories are not identical + - sort_categories=True and Categoricals are ordered + ValueError + Empty list of categoricals passed + + Notes + ----- + To learn more about categories, see `link + `__ + + Examples + -------- + If you want to combine categoricals that do not necessarily have + the same categories, `union_categoricals` will combine a list-like + of categoricals. The new categories will be the union of the + categories being combined. + + >>> a = pd.Categorical(["b", "c"]) + >>> b = pd.Categorical(["a", "b"]) + >>> pd.api.types.union_categoricals([a, b]) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] + + By default, the resulting categories will be ordered as they appear + in the `categories` of the data. If you want the categories to be + lexsorted, use `sort_categories=True` argument. + + >>> pd.api.types.union_categoricals([a, b], sort_categories=True) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['a', 'b', 'c'] + + `union_categoricals` also works with the case of combining two + categoricals of the same categories and order information (e.g. what + you could also `append` for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + >>> pd.api.types.union_categoricals([a, b]) + ['a', 'b', 'a', 'b', 'a'] + Categories (2, object): ['a' < 'b'] + + Raises `TypeError` because the categories are ordered and not identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> pd.api.types.union_categoricals([a, b]) + Traceback (most recent call last): + ... + TypeError: to union ordered Categoricals, all categories must be the same + + New in version 0.20.0 + + Ordered categoricals with different categories or orderings can be + combined by using the `ignore_ordered=True` argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + >>> pd.api.types.union_categoricals([a, b], ignore_order=True) + ['a', 'b', 'c', 'c', 'b', 'a'] + Categories (3, object): ['a', 'b', 'c'] + + `union_categoricals` also works with a `CategoricalIndex`, or `Series` + containing categorical data, but note that the resulting array will + always be a plain `Categorical` + + >>> a = pd.Series(["b", "c"], dtype='category') + >>> b = pd.Series(["a", "b"], dtype='category') + >>> pd.api.types.union_categoricals([a, b]) + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] + """ + from pandas import Categorical + from pandas.core.arrays.categorical import recode_for_categories + + if len(to_union) == 0: + raise ValueError("No Categoricals to union") + + def _maybe_unwrap(x): + if isinstance(x, (ABCCategoricalIndex, ABCSeries)): + return x._values + elif isinstance(x, Categorical): + return x + else: + raise TypeError("all components to combine must be Categorical") + + to_union = [_maybe_unwrap(x) for x in to_union] + first = to_union[0] + + if not all( + is_dtype_equal(other.categories.dtype, first.categories.dtype) + for other in to_union[1:] + ): + raise TypeError("dtype of categories must be the same") + + ordered = False + if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]): + # identical categories - fastpath + categories = first.categories + ordered = first.ordered + + all_codes = [first._encode_with_my_categories(x)._codes for x in to_union] + new_codes = np.concatenate(all_codes) + + if sort_categories and not ignore_order and ordered: + raise TypeError("Cannot use sort_categories=True with ordered Categoricals") + + if sort_categories and not categories.is_monotonic_increasing: + categories = categories.sort_values() + indexer = categories.get_indexer(first.categories) + + from pandas.core.algorithms import take_nd + + new_codes = take_nd(indexer, new_codes, fill_value=-1) + elif ignore_order or all(not c.ordered for c in to_union): + # different categories - union and recode + cats = first.categories.append([c.categories for c in to_union[1:]]) + categories = cats.unique() + if sort_categories: + categories = categories.sort_values() + + new_codes = [ + recode_for_categories(c.codes, c.categories, categories) for c in to_union + ] + new_codes = np.concatenate(new_codes) + else: + # ordered - to show a proper error message + if all(c.ordered for c in to_union): + msg = "to union ordered Categoricals, all categories must be the same" + raise TypeError(msg) + else: + raise TypeError("Categorical.ordered must be the same") + + if ignore_order: + ordered = False + + return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) + + +def _concatenate_2d(to_concat, axis: int): + # coerce to 2d if needed & concatenate + if axis == 1: + to_concat = [np.atleast_2d(x) for x in to_concat] + return np.concatenate(to_concat, axis=axis) + + +def _concat_datetime(to_concat, axis=0): + """ + provide concatenation of an datetimelike array of arrays each of which is a + single M8[ns], datetime64[ns, tz] or m8[ns] dtype + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + + Returns + ------- + a single array, preserving the combined dtypes + """ + from pandas.core.construction import ensure_wrapped_if_datetimelike + + to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat] + + single_dtype = len({x.dtype for x in to_concat}) == 1 + + # multiple types, need to coerce to object + if not single_dtype: + # ensure_wrapped_if_datetimelike ensures that astype(object) wraps + # in Timestamp/Timedelta + return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis) + + result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) + return result diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py new file mode 100644 index 00000000..e2570e6b --- /dev/null +++ b/pandas/core/dtypes/dtypes.py @@ -0,0 +1,1487 @@ +""" +Define extension dtypes. +""" +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, + MutableMapping, + cast, +) + +import numpy as np +import pytz + +from pandas._libs import missing as libmissing +from pandas._libs.interval import Interval +from pandas._libs.properties import cache_readonly +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + NaTType, + Period, + Timestamp, + dtypes, + timezones, + to_offset, + tz_compare, +) +from pandas._typing import ( + Dtype, + DtypeObj, + Ordered, + npt, + type_t, +) + +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCIndex, +) +from pandas.core.dtypes.inference import ( + is_bool, + is_list_like, +) + +if TYPE_CHECKING: + from datetime import tzinfo + + import pyarrow + + from pandas import ( + Categorical, + Index, + ) + from pandas.core.arrays import ( + BaseMaskedArray, + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + ) + +str_type = str + + +class PandasExtensionDtype(ExtensionDtype): + """ + A np.dtype duck-typed class, suitable for holding a custom dtype. + + THIS IS NOT A REAL NUMPY DTYPE + """ + + type: Any + kind: Any + # The Any type annotations above are here only because mypy seems to have a + # problem dealing with multiple inheritance from PandasExtensionDtype + # and ExtensionDtype's @properties in the subclasses below. The kind and + # type variables in those subclasses are explicitly typed below. + subdtype = None + str: str_type + num = 100 + shape: tuple[int, ...] = () + itemsize = 8 + base: DtypeObj | None = None + isbuiltin = 0 + isnative = 0 + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __repr__(self) -> str_type: + """ + Return a string representation for a particular object. + """ + return str(self) + + def __hash__(self) -> int: + raise NotImplementedError("sub-classes should implement an __hash__ method") + + def __getstate__(self) -> dict[str_type, Any]: + # pickle support; we don't want to pickle the cache + return {k: getattr(self, k, None) for k in self._metadata} + + @classmethod + def reset_cache(cls) -> None: + """clear the cache""" + cls._cache_dtypes = {} + + +class CategoricalDtypeType(type): + """ + the type of CategoricalDtype, this metaclass determines subclass ability + """ + + pass + + +@register_extension_dtype +class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): + """ + Type for categorical data with the categories and orderedness. + + Parameters + ---------- + categories : sequence, optional + Must be unique, and must not contain any nulls. + The categories are stored in an Index, + and if an index is provided the dtype of that index will be used. + ordered : bool or None, default False + Whether or not this categorical is treated as a ordered categorical. + None can be used to maintain the ordered value of existing categoricals when + used in operations that combine categoricals, e.g. astype, and will resolve to + False if there is no existing ordered to maintain. + + Attributes + ---------- + categories + ordered + + Methods + ------- + None + + See Also + -------- + Categorical : Represent a categorical variable in classic R / S-plus fashion. + + Notes + ----- + This class is useful for specifying the type of a ``Categorical`` + independent of the values. See :ref:`categorical.categoricaldtype` + for more. + + Examples + -------- + >>> t = pd.CategoricalDtype(categories=['b', 'a'], ordered=True) + >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t) + 0 a + 1 b + 2 a + 3 NaN + dtype: category + Categories (2, object): ['b' < 'a'] + + An empty CategoricalDtype with a specific dtype can be created + by providing an empty index. As follows, + + >>> pd.CategoricalDtype(pd.DatetimeIndex([])).categories.dtype + dtype(' None: + self._finalize(categories, ordered, fastpath=False) + + @classmethod + def _from_fastpath( + cls, categories=None, ordered: bool | None = None + ) -> CategoricalDtype: + self = cls.__new__(cls) + self._finalize(categories, ordered, fastpath=True) + return self + + @classmethod + def _from_categorical_dtype( + cls, dtype: CategoricalDtype, categories=None, ordered: Ordered = None + ) -> CategoricalDtype: + if categories is ordered is None: + return dtype + if categories is None: + categories = dtype.categories + if ordered is None: + ordered = dtype.ordered + return cls(categories, ordered) + + @classmethod + def _from_values_or_dtype( + cls, + values=None, + categories=None, + ordered: bool | None = None, + dtype: Dtype | None = None, + ) -> CategoricalDtype: + """ + Construct dtype from the input parameters used in :class:`Categorical`. + + This constructor method specifically does not do the factorization + step, if that is needed to find the categories. This constructor may + therefore return ``CategoricalDtype(categories=None, ordered=None)``, + which may not be useful. Additional steps may therefore have to be + taken to create the final dtype. + + The return dtype is specified from the inputs in this prioritized + order: + 1. if dtype is a CategoricalDtype, return dtype + 2. if dtype is the string 'category', create a CategoricalDtype from + the supplied categories and ordered parameters, and return that. + 3. if values is a categorical, use value.dtype, but override it with + categories and ordered if either/both of those are not None. + 4. if dtype is None and values is not a categorical, construct the + dtype from categories and ordered, even if either of those is None. + + Parameters + ---------- + values : list-like, optional + The list-like must be 1-dimensional. + categories : list-like, optional + Categories for the CategoricalDtype. + ordered : bool, optional + Designating if the categories are ordered. + dtype : CategoricalDtype or the string "category", optional + If ``CategoricalDtype``, cannot be used together with + `categories` or `ordered`. + + Returns + ------- + CategoricalDtype + + Examples + -------- + >>> pd.CategoricalDtype._from_values_or_dtype() + CategoricalDtype(categories=None, ordered=None) + >>> pd.CategoricalDtype._from_values_or_dtype( + ... categories=['a', 'b'], ordered=True + ... ) + CategoricalDtype(categories=['a', 'b'], ordered=True) + >>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False) + >>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True) + >>> pd.CategoricalDtype._from_values_or_dtype( + ... c, ['x', 'y'], ordered=True, dtype=dtype2 + ... ) + Traceback (most recent call last): + ... + ValueError: Cannot specify `categories` or `ordered` together with + `dtype`. + + The supplied dtype takes precedence over values' dtype: + + >>> pd.CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) + CategoricalDtype(categories=['x', 'y'], ordered=False) + """ + + if dtype is not None: + # The dtype argument takes precedence over values.dtype (if any) + if isinstance(dtype, str): + if dtype == "category": + dtype = CategoricalDtype(categories, ordered) + else: + raise ValueError(f"Unknown dtype {repr(dtype)}") + elif categories is not None or ordered is not None: + raise ValueError( + "Cannot specify `categories` or `ordered` together with `dtype`." + ) + elif not isinstance(dtype, CategoricalDtype): + raise ValueError(f"Cannot not construct CategoricalDtype from {dtype}") + elif cls.is_dtype(values): + # If no "dtype" was passed, use the one from "values", but honor + # the "ordered" and "categories" arguments + dtype = values.dtype._from_categorical_dtype( + values.dtype, categories, ordered + ) + else: + # If dtype=None and values is not categorical, create a new dtype. + # Note: This could potentially have categories=None and + # ordered=None. + dtype = CategoricalDtype(categories, ordered) + + return cast(CategoricalDtype, dtype) + + @classmethod + def construct_from_string(cls, string: str_type) -> CategoricalDtype: + """ + Construct a CategoricalDtype from a string. + + Parameters + ---------- + string : str + Must be the string "category" in order to be successfully constructed. + + Returns + ------- + CategoricalDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a CategoricalDtype cannot be constructed from the input. + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string != cls.name: + raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'") + + # need ordered=None to ensure that operations specifying dtype="category" don't + # override the ordered value for existing categoricals + return cls(ordered=None) + + def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None: + + if ordered is not None: + self.validate_ordered(ordered) + + if categories is not None: + categories = self.validate_categories(categories, fastpath=fastpath) + + self._categories = categories + self._ordered = ordered + + def __setstate__(self, state: MutableMapping[str_type, Any]) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._categories = state.pop("categories", None) + self._ordered = state.pop("ordered", False) + + def __hash__(self) -> int: + # _hash_categories returns a uint64, so use the negative + # space for when we have unknown categories to avoid a conflict + if self.categories is None: + if self.ordered: + return -1 + else: + return -2 + # We *do* want to include the real self.ordered here + return int(self._hash_categories) + + def __eq__(self, other: Any) -> bool: + """ + Rules for CDT equality: + 1) Any CDT is equal to the string 'category' + 2) Any CDT is equal to itself + 3) Any CDT is equal to a CDT with categories=None regardless of ordered + 4) A CDT with ordered=True is only equal to another CDT with + ordered=True and identical categories in the same order + 5) A CDT with ordered={False, None} is only equal to another CDT with + ordered={False, None} and identical categories, but same order is + not required. There is no distinction between False/None. + 6) Any other comparison returns False + """ + if isinstance(other, str): + return other == self.name + elif other is self: + return True + elif not (hasattr(other, "ordered") and hasattr(other, "categories")): + return False + elif self.categories is None or other.categories is None: + # For non-fully-initialized dtypes, these are only equal to + # - the string "category" (handled above) + # - other CategoricalDtype with categories=None + return self.categories is other.categories + elif self.ordered or other.ordered: + # At least one has ordered=True; equal if both have ordered=True + # and the same values for categories in the same order. + return (self.ordered == other.ordered) and self.categories.equals( + other.categories + ) + else: + # Neither has ordered=True; equal if both have the same categories, + # but same order is not necessary. There is no distinction between + # ordered=False and ordered=None: CDT(., False) and CDT(., None) + # will be equal if they have the same categories. + left = self.categories + right = other.categories + + # GH#36280 the ordering of checks here is for performance + if not left.dtype == right.dtype: + return False + + if len(left) != len(right): + return False + + if self.categories.equals(other.categories): + # Check and see if they happen to be identical categories + return True + + if left.dtype != object: + # Faster than calculating hash + indexer = left.get_indexer(right) + # Because left and right have the same length and are unique, + # `indexer` not having any -1s implies that there is a + # bijection between `left` and `right`. + return (indexer != -1).all() + + # With object-dtype we need a comparison that identifies + # e.g. int(2) as distinct from float(2) + return hash(self) == hash(other) + + def __repr__(self) -> str_type: + if self.categories is None: + data = "None" + else: + data = self.categories._format_data(name=type(self).__name__) + if data is None: + # self.categories is RangeIndex + data = str(self.categories._range) + data = data.rstrip(", ") + return f"CategoricalDtype(categories={data}, ordered={self.ordered})" + + @cache_readonly + def _hash_categories(self) -> int: + from pandas.core.util.hashing import ( + combine_hash_arrays, + hash_array, + hash_tuples, + ) + + categories = self.categories + ordered = self.ordered + + if len(categories) and isinstance(categories[0], tuple): + # assumes if any individual category is a tuple, then all our. ATM + # I don't really want to support just some of the categories being + # tuples. + cat_list = list(categories) # breaks if a np.array of categories + cat_array = hash_tuples(cat_list) + else: + if categories.dtype == "O" and len({type(x) for x in categories}) != 1: + # TODO: hash_array doesn't handle mixed types. It casts + # everything to a str first, which means we treat + # {'1', '2'} the same as {'1', 2} + # find a better solution + hashed = hash((tuple(categories), ordered)) + return hashed + + if DatetimeTZDtype.is_dtype(categories.dtype): + # Avoid future warning. + categories = categories.view("datetime64[ns]") + + cat_array = hash_array(np.asarray(categories), categorize=False) + if ordered: + cat_array = np.vstack( + [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)] + ) + else: + cat_array = np.array([cat_array]) + combined_hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) + return np.bitwise_xor.reduce(combined_hashed) + + @classmethod + def construct_array_type(cls) -> type_t[Categorical]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas import Categorical + + return Categorical + + @staticmethod + def validate_ordered(ordered: Ordered) -> None: + """ + Validates that we have a valid ordered parameter. If + it is not a boolean, a TypeError will be raised. + + Parameters + ---------- + ordered : object + The parameter to be verified. + + Raises + ------ + TypeError + If 'ordered' is not a boolean. + """ + if not is_bool(ordered): + raise TypeError("'ordered' must either be 'True' or 'False'") + + @staticmethod + def validate_categories(categories, fastpath: bool = False) -> Index: + """ + Validates that we have good categories + + Parameters + ---------- + categories : array-like + fastpath : bool + Whether to skip nan and uniqueness checks + + Returns + ------- + categories : Index + """ + from pandas.core.indexes.base import Index + + if not fastpath and not is_list_like(categories): + raise TypeError( + f"Parameter 'categories' must be list-like, was {repr(categories)}" + ) + elif not isinstance(categories, ABCIndex): + categories = Index._with_infer(categories, tupleize_cols=False) + + if not fastpath: + + if categories.hasnans: + raise ValueError("Categorical categories cannot be null") + + if not categories.is_unique: + raise ValueError("Categorical categories must be unique") + + if isinstance(categories, ABCCategoricalIndex): + categories = categories.categories + + return categories + + def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype: + """ + Returns a CategoricalDtype with categories and ordered taken from dtype + if specified, otherwise falling back to self if unspecified + + Parameters + ---------- + dtype : CategoricalDtype + + Returns + ------- + new_dtype : CategoricalDtype + """ + if isinstance(dtype, str) and dtype == "category": + # dtype='category' should not change anything + return self + elif not self.is_dtype(dtype): + raise ValueError( + f"a CategoricalDtype must be passed to perform an update, " + f"got {repr(dtype)}" + ) + else: + # from here on, dtype is a CategoricalDtype + dtype = cast(CategoricalDtype, dtype) + + # update categories/ordered unless they've been explicitly passed as None + new_categories = ( + dtype.categories if dtype.categories is not None else self.categories + ) + new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered + + return CategoricalDtype(new_categories, new_ordered) + + @property + def categories(self) -> Index: + """ + An ``Index`` containing the unique categories allowed. + """ + return self._categories + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self._ordered + + @property + def _is_boolean(self) -> bool: + from pandas.core.dtypes.common import is_bool_dtype + + return is_bool_dtype(self.categories) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + from pandas.core.arrays.sparse import SparseDtype + + # check if we have all categorical dtype with identical categories + if all(isinstance(x, CategoricalDtype) for x in dtypes): + first = dtypes[0] + if all(first == other for other in dtypes[1:]): + return first + + # special case non-initialized categorical + # TODO we should figure out the expected return value in general + non_init_cats = [ + isinstance(x, CategoricalDtype) and x.categories is None for x in dtypes + ] + if all(non_init_cats): + return self + elif any(non_init_cats): + return None + + # categorical is aware of Sparse -> extract sparse subdtypes + dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes] + # extract the categories' dtype + non_cat_dtypes = [ + x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes + ] + # TODO should categorical always give an answer? + from pandas.core.dtypes.cast import find_common_type + + return find_common_type(non_cat_dtypes) + + +@register_extension_dtype +class DatetimeTZDtype(PandasExtensionDtype): + """ + An ExtensionDtype for timezone-aware datetime data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. + + Attributes + ---------- + unit + tz + + Methods + ------- + None + + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. + + Examples + -------- + >>> pd.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] + + >>> pd.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] + """ + + type: type[Timestamp] = Timestamp + kind: str_type = "M" + num = 101 + base = np.dtype("M8[ns]") # TODO: depend on reso? + _metadata = ("unit", "tz") + _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + @property + def na_value(self) -> NaTType: + return NaT + + @cache_readonly + def str(self): + return f"|M8[{self._unit}]" + + def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None: + if isinstance(unit, DatetimeTZDtype): + # error: "str" has no attribute "tz" + unit, tz = unit.unit, unit.tz # type: ignore[attr-defined] + + if unit != "ns": + if isinstance(unit, str) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + f"Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is no longer supported. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + raise ValueError(msg) + if unit not in ["s", "ms", "us", "ns"]: + raise ValueError("DatetimeTZDtype only supports s, ms, us, ns units") + + if tz: + tz = timezones.maybe_get_tz(tz) + tz = timezones.tz_standardize(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + if tz is None: + raise TypeError("A 'tz' is required.") + + self._unit = unit + self._tz = tz + + @cache_readonly + def _reso(self) -> int: + """ + The NPY_DATETIMEUNIT corresponding to this dtype's resolution. + """ + reso = { + "s": dtypes.NpyDatetimeUnit.NPY_FR_s, + "ms": dtypes.NpyDatetimeUnit.NPY_FR_ms, + "us": dtypes.NpyDatetimeUnit.NPY_FR_us, + "ns": dtypes.NpyDatetimeUnit.NPY_FR_ns, + }[self._unit] + return reso.value + + @property + def unit(self) -> str_type: + """ + The precision of the datetime data. + """ + return self._unit + + @property + def tz(self) -> tzinfo: + """ + The timezone. + """ + return self._tz + + @classmethod + def construct_array_type(cls) -> type_t[DatetimeArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import DatetimeArray + + return DatetimeArray + + @classmethod + def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: + """ + Construct a DatetimeTZDtype from a string. + + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, ]``, + where ```` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'" + match = cls._match.match(string) + if match: + d = match.groupdict() + try: + return cls(unit=d["unit"], tz=d["tz"]) + except (KeyError, TypeError, ValueError) as err: + # KeyError if maybe_get_tz tries and fails to get a + # pytz timezone (actually pytz.UnknownTimeZoneError). + # TypeError if we pass a nonsense tz; + # ValueError if we pass a unit other than "ns" + raise TypeError(msg) from err + raise TypeError(msg) + + def __str__(self) -> str_type: + return f"datetime64[{self.unit}, {self.tz}]" + + @property + def name(self) -> str_type: + """A string representation of the dtype.""" + return str(self) + + def __hash__(self) -> int: + # make myself hashable + # TODO: update this. + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + if other.startswith("M8["): + other = "datetime64[" + other[3:] + return other == self.name + + return ( + isinstance(other, DatetimeTZDtype) + and self.unit == other.unit + and tz_compare(self.tz, other.tz) + ) + + def __setstate__(self, state) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._tz = state["tz"] + self._unit = state["unit"] + + +@register_extension_dtype +class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype): + """ + An ExtensionDtype for Period data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + freq : str or DateOffset + The frequency of this PeriodDtype. + + Attributes + ---------- + freq + + Methods + ------- + None + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] + + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] + """ + + type: type[Period] = Period + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 102 + _metadata = ("freq",) + _match = re.compile(r"(P|p)eriod\[(?P.+)\]") + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, freq=None): + """ + Parameters + ---------- + freq : frequency + """ + if isinstance(freq, PeriodDtype): + return freq + + elif freq is None: + # empty constructor for pickle compat + # -10_000 corresponds to PeriodDtypeCode.UNDEFINED + u = dtypes.PeriodDtypeBase.__new__(cls, -10_000) + u._freq = None + return u + + if not isinstance(freq, BaseOffset): + freq = cls._parse_dtype_strict(freq) + + try: + return cls._cache_dtypes[freq.freqstr] + except KeyError: + dtype_code = freq._period_dtype_code + u = dtypes.PeriodDtypeBase.__new__(cls, dtype_code) + u._freq = freq + cls._cache_dtypes[freq.freqstr] = u + return u + + def __reduce__(self): + return type(self), (self.freq,) + + @property + def freq(self): + """ + The frequency object of this PeriodDtype. + """ + return self._freq + + @classmethod + def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset: + if isinstance(freq, str): # note: freq is already of type str! + if freq.startswith("period[") or freq.startswith("Period["): + m = cls._match.search(freq) + if m is not None: + freq = m.group("freq") + + freq_offset = to_offset(freq) + if freq_offset is not None: + return freq_offset + + raise ValueError("could not construct PeriodDtype") + + @classmethod + def construct_from_string(cls, string: str_type) -> PeriodDtype: + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if ( + isinstance(string, str) + and (string.startswith("period[") or string.startswith("Period[")) + or isinstance(string, BaseOffset) + ): + # do not parse string like U as period[U] + # avoid tuple to be regarded as freq + try: + return cls(freq=string) + except ValueError: + pass + if isinstance(string, str): + msg = f"Cannot construct a 'PeriodDtype' from '{string}'" + else: + msg = f"'construct_from_string' expects a string, got {type(string)}" + raise TypeError(msg) + + def __str__(self) -> str_type: + return self.name + + @property + def name(self) -> str_type: + return f"period[{self.freq.freqstr}]" + + @property + def na_value(self) -> NaTType: + return NaT + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other in [self.name, self.name.title()] + + elif isinstance(other, PeriodDtype): + + # For freqs that can be held by a PeriodDtype, this check is + # equivalent to (and much faster than) self.freq == other.freq + sfreq = self.freq + ofreq = other.freq + return ( + sfreq.n == ofreq.n + and sfreq._period_dtype_code == ofreq._period_dtype_code + ) + + return False + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + def __setstate__(self, state) -> None: + # for pickle compat. __getstate__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._freq = state["freq"] + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + if isinstance(dtype, str): + # PeriodDtype can be instantiated from freq string like "U", + # but doesn't regard freq str like "U" as dtype. + if dtype.startswith("period[") or dtype.startswith("Period["): + try: + if cls._parse_dtype_strict(dtype) is not None: + return True + else: + return False + except ValueError: + return False + else: + return False + return super().is_dtype(dtype) + + @classmethod + def construct_array_type(cls) -> type_t[PeriodArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import PeriodArray + + return PeriodArray + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> PeriodArray: + """ + Construct PeriodArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays import PeriodArray + from pandas.core.arrays.arrow._arrow_utils import ( + pyarrow_array_to_numpy_and_mask, + ) + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=np.dtype(np.int64)) + parr = PeriodArray(data.copy(), freq=self.freq, copy=False) + # error: Invalid index type "ndarray[Any, dtype[bool_]]" for "PeriodArray"; + # expected type "Union[int, Sequence[int], Sequence[bool], slice]" + parr[~mask] = NaT # type: ignore[index] + results.append(parr) + + if not results: + return PeriodArray(np.array([], dtype="int64"), freq=self.freq, copy=False) + return PeriodArray._concat_same_type(results) + + +@register_extension_dtype +class IntervalDtype(PandasExtensionDtype): + """ + An ExtensionDtype for Interval data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + subtype : str, np.dtype + The dtype of the Interval bounds. + + Attributes + ---------- + subtype + + Methods + ------- + None + + Examples + -------- + >>> pd.IntervalDtype(subtype='int64', closed='both') + interval[int64, both] + """ + + name = "interval" + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 103 + _metadata = ( + "subtype", + "closed", + ) + + _match = re.compile( + r"(I|i)nterval\[(?P[^,]+(\[.+\])?)" + r"(, (?P(right|left|both|neither)))?\]" + ) + + _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, subtype=None, closed: str_type | None = None): + from pandas.core.dtypes.common import ( + is_string_dtype, + pandas_dtype, + ) + + if closed is not None and closed not in {"right", "left", "both", "neither"}: + raise ValueError("closed must be one of 'right', 'left', 'both', 'neither'") + + if isinstance(subtype, IntervalDtype): + if closed is not None and closed != subtype.closed: + raise ValueError( + "dtype.closed and 'closed' do not match. " + "Try IntervalDtype(dtype.subtype, closed) instead." + ) + return subtype + elif subtype is None: + # we are called as an empty constructor + # generally for pickle compat + u = object.__new__(cls) + u._subtype = None + u._closed = closed + return u + elif isinstance(subtype, str) and subtype.lower() == "interval": + subtype = None + else: + if isinstance(subtype, str): + m = cls._match.search(subtype) + if m is not None: + gd = m.groupdict() + subtype = gd["subtype"] + if gd.get("closed", None) is not None: + if closed is not None: + if closed != gd["closed"]: + raise ValueError( + "'closed' keyword does not match value " + "specified in dtype string" + ) + closed = gd["closed"] + + try: + subtype = pandas_dtype(subtype) + except TypeError as err: + raise TypeError("could not construct IntervalDtype") from err + + if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + + key = str(subtype) + str(closed) + try: + return cls._cache_dtypes[key] + except KeyError: + u = object.__new__(cls) + u._subtype = subtype + u._closed = closed + cls._cache_dtypes[key] = u + return u + + @cache_readonly + def _can_hold_na(self) -> bool: + subtype = self._subtype + if subtype is None: + # partially-initialized + raise NotImplementedError( + "_can_hold_na is not defined for partially-initialized IntervalDtype" + ) + if subtype.kind in ["i", "u"]: + return False + return True + + @property + def closed(self): + return self._closed + + @property + def subtype(self): + """ + The dtype of the Interval bounds. + """ + return self._subtype + + @classmethod + def construct_array_type(cls) -> type[IntervalArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import IntervalArray + + return IntervalArray + + @classmethod + def construct_from_string(cls, string: str_type) -> IntervalDtype: + """ + attempt to construct this type from a string, raise a TypeError + if its not possible + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + if string.lower() == "interval" or cls._match.search(string) is not None: + return cls(string) + + msg = ( + f"Cannot construct a 'IntervalDtype' from '{string}'.\n\n" + "Incorrectly formatted string passed to constructor. " + "Valid formats include Interval or Interval[dtype] " + "where dtype is numeric, datetime, or timedelta" + ) + raise TypeError(msg) + + @property + def type(self) -> type[Interval]: + return Interval + + def __str__(self) -> str_type: + if self.subtype is None: + return "interval" + if self.closed is None: + # Only partially initialized GH#38394 + return f"interval[{self.subtype}]" + return f"interval[{self.subtype}, {self.closed}]" + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other.lower() in (self.name.lower(), str(self).lower()) + elif not isinstance(other, IntervalDtype): + return False + elif self.subtype is None or other.subtype is None: + # None should match any subtype + return True + elif self.closed != other.closed: + return False + else: + from pandas.core.dtypes.common import is_dtype_equal + + return is_dtype_equal(self.subtype, other.subtype) + + def __setstate__(self, state) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._subtype = state["subtype"] + + # backward-compat older pickles won't have "closed" key + self._closed = state.pop("closed", None) + + @classmethod + def is_dtype(cls, dtype: object) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + if isinstance(dtype, str): + if dtype.lower().startswith("interval"): + try: + if cls.construct_from_string(dtype) is not None: + return True + else: + return False + except (ValueError, TypeError): + return False + else: + return False + return super().is_dtype(dtype) + + def __from_arrow__( + self, array: pyarrow.Array | pyarrow.ChunkedArray + ) -> IntervalArray: + """ + Construct IntervalArray from pyarrow Array/ChunkedArray. + """ + import pyarrow + + from pandas.core.arrays import IntervalArray + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + if isinstance(arr, pyarrow.ExtensionArray): + arr = arr.storage + left = np.asarray(arr.field("left"), dtype=self.subtype) + right = np.asarray(arr.field("right"), dtype=self.subtype) + iarr = IntervalArray.from_arrays(left, right, closed=self.closed) + results.append(iarr) + + if not results: + return IntervalArray.from_arrays( + np.array([], dtype=self.subtype), + np.array([], dtype=self.subtype), + closed=self.closed, + ) + return IntervalArray._concat_same_type(results) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + if not all(isinstance(x, IntervalDtype) for x in dtypes): + return None + + closed = cast("IntervalDtype", dtypes[0]).closed + if not all(cast("IntervalDtype", x).closed == closed for x in dtypes): + return np.dtype(object) + + from pandas.core.dtypes.cast import find_common_type + + common = find_common_type([cast("IntervalDtype", x).subtype for x in dtypes]) + if common == object: + return np.dtype(object) + return IntervalDtype(common, closed=closed) + + +class PandasDtype(ExtensionDtype): + """ + A Pandas ExtensionDtype for NumPy dtypes. + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + dtype : object + Object to be converted to a NumPy data type object. + + See Also + -------- + numpy.dtype + """ + + _metadata = ("_dtype",) + + def __init__(self, dtype: npt.DTypeLike | PandasDtype | None) -> None: + if isinstance(dtype, PandasDtype): + # make constructor univalent + dtype = dtype.numpy_dtype + self._dtype = np.dtype(dtype) + + def __repr__(self) -> str: + return f"PandasDtype({repr(self.name)})" + + @property + def numpy_dtype(self) -> np.dtype: + """ + The NumPy dtype this PandasDtype wraps. + """ + return self._dtype + + @property + def name(self) -> str: + """ + A bit-width name for this data-type. + """ + return self._dtype.name + + @property + def type(self) -> type[np.generic]: + """ + The type object used to instantiate a scalar of this NumPy data-type. + """ + return self._dtype.type + + @property + def _is_numeric(self) -> bool: + # exclude object, str, unicode, void. + return self.kind in set("biufc") + + @property + def _is_boolean(self) -> bool: + return self.kind == "b" + + @classmethod + def construct_from_string(cls, string: str) -> PandasDtype: + try: + dtype = np.dtype(string) + except TypeError as err: + if not isinstance(string, str): + msg = f"'construct_from_string' expects a string, got {type(string)}" + else: + msg = f"Cannot construct a 'PandasDtype' from '{string}'" + raise TypeError(msg) from err + return cls(dtype) + + @classmethod + def construct_array_type(cls) -> type_t[PandasArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import PandasArray + + return PandasArray + + @property + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV') identifying the general kind of data. + """ + return self._dtype.kind + + @property + def itemsize(self) -> int: + """ + The element size of this data-type object. + """ + return self._dtype.itemsize + + +class BaseMaskedDtype(ExtensionDtype): + """ + Base class for dtypes for BaseMaskedArray subclasses. + """ + + name: str + base = None + type: type + + @property + def na_value(self) -> libmissing.NAType: + return libmissing.NA + + @cache_readonly + def numpy_dtype(self) -> np.dtype: + """Return an instance of our numpy dtype""" + return np.dtype(self.type) + + @cache_readonly + def kind(self) -> str: + return self.numpy_dtype.kind + + @cache_readonly + def itemsize(self) -> int: + """Return the number of bytes in this dtype""" + return self.numpy_dtype.itemsize + + @classmethod + def construct_array_type(cls) -> type_t[BaseMaskedArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + raise NotImplementedError + + @classmethod + def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype: + """ + Construct the MaskedDtype corresponding to the given numpy dtype. + """ + if dtype.kind == "b": + from pandas.core.arrays.boolean import BooleanDtype + + return BooleanDtype() + elif dtype.kind in ["i", "u"]: + from pandas.core.arrays.integer import INT_STR_TO_DTYPE + + return INT_STR_TO_DTYPE[dtype.name] + elif dtype.kind == "f": + from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE + + return FLOAT_STR_TO_DTYPE[dtype.name] + else: + raise NotImplementedError(dtype) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # We unwrap any masked dtypes, find the common dtype we would use + # for that, then re-mask the result. + from pandas.core.dtypes.cast import find_common_type + + new_dtype = find_common_type( + [ + dtype.numpy_dtype if isinstance(dtype, BaseMaskedDtype) else dtype + for dtype in dtypes + ] + ) + if not isinstance(new_dtype, np.dtype): + # If we ever support e.g. Masked[DatetimeArray] then this will change + return None + try: + return type(self).from_numpy_dtype(new_dtype) + except (KeyError, NotImplementedError): + return None diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py new file mode 100644 index 00000000..a6634cca --- /dev/null +++ b/pandas/core/dtypes/generic.py @@ -0,0 +1,166 @@ +""" define generic base classes for pandas objects """ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Type, + cast, +) + +if TYPE_CHECKING: + from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + UInt64Index, + ) + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PandasArray, + PeriodArray, + TimedeltaArray, + ) + from pandas.core.generic import NDFrame + + +# define abstract base classes to enable isinstance type checking on our +# objects +def create_pandas_abc_type(name, attr, comp): + def _check(inst): + return getattr(inst, attr, "_typ") in comp + + # https://github.com/python/mypy/issues/1006 + # error: 'classmethod' used with a non-method + @classmethod # type: ignore[misc] + def _instancecheck(cls, inst) -> bool: + return _check(inst) and not isinstance(inst, type) + + @classmethod # type: ignore[misc] + def _subclasscheck(cls, inst) -> bool: + # Raise instead of returning False + # This is consistent with default __subclasscheck__ behavior + if not isinstance(inst, type): + raise TypeError("issubclass() arg 1 must be a class") + + return _check(inst) + + dct = {"__instancecheck__": _instancecheck, "__subclasscheck__": _subclasscheck} + meta = type("ABCBase", (type,), dct) + return meta(name, (), dct) + + +ABCInt64Index = cast( + "Type[Int64Index]", + create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index",)), +) +ABCUInt64Index = cast( + "Type[UInt64Index]", + create_pandas_abc_type("ABCUInt64Index", "_typ", ("uint64index",)), +) +ABCRangeIndex = cast( + "Type[RangeIndex]", + create_pandas_abc_type("ABCRangeIndex", "_typ", ("rangeindex",)), +) +ABCFloat64Index = cast( + "Type[Float64Index]", + create_pandas_abc_type("ABCFloat64Index", "_typ", ("float64index",)), +) +ABCMultiIndex = cast( + "Type[MultiIndex]", + create_pandas_abc_type("ABCMultiIndex", "_typ", ("multiindex",)), +) +ABCDatetimeIndex = cast( + "Type[DatetimeIndex]", + create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",)), +) +ABCTimedeltaIndex = cast( + "Type[TimedeltaIndex]", + create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",)), +) +ABCPeriodIndex = cast( + "Type[PeriodIndex]", + create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",)), +) +ABCCategoricalIndex = cast( + "Type[CategoricalIndex]", + create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",)), +) +ABCIntervalIndex = cast( + "Type[IntervalIndex]", + create_pandas_abc_type("ABCIntervalIndex", "_typ", ("intervalindex",)), +) +ABCIndex = cast( + "Type[Index]", + create_pandas_abc_type( + "ABCIndex", + "_typ", + { + "index", + "int64index", + "rangeindex", + "float64index", + "uint64index", + "numericindex", + "multiindex", + "datetimeindex", + "timedeltaindex", + "periodindex", + "categoricalindex", + "intervalindex", + }, + ), +) + + +ABCNDFrame = cast( + "Type[NDFrame]", + create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe")), +) +ABCSeries = cast( + "Type[Series]", + create_pandas_abc_type("ABCSeries", "_typ", ("series",)), +) +ABCDataFrame = cast( + "Type[DataFrame]", create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) +) + +ABCCategorical = cast( + "Type[Categorical]", + create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")), +) +ABCDatetimeArray = cast( + "Type[DatetimeArray]", + create_pandas_abc_type("ABCDatetimeArray", "_typ", ("datetimearray")), +) +ABCTimedeltaArray = cast( + "Type[TimedeltaArray]", + create_pandas_abc_type("ABCTimedeltaArray", "_typ", ("timedeltaarray")), +) +ABCPeriodArray = cast( + "Type[PeriodArray]", + create_pandas_abc_type("ABCPeriodArray", "_typ", ("periodarray",)), +) +ABCExtensionArray = cast( + "Type[ExtensionArray]", + create_pandas_abc_type( + "ABCExtensionArray", + "_typ", + # Note: IntervalArray and SparseArray are included bc they have _typ="extension" + {"extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"}, + ), +) +ABCPandasArray = cast( + "Type[PandasArray]", + create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)), +) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py new file mode 100644 index 00000000..893e4a9b --- /dev/null +++ b/pandas/core/dtypes/inference.py @@ -0,0 +1,466 @@ +""" basic inference routines """ + +from __future__ import annotations + +from collections import abc +from numbers import Number +import re +from typing import Pattern +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ArrayLike +from pandas.util._exceptions import find_stack_level + +is_bool = lib.is_bool + +is_integer = lib.is_integer + +is_float = lib.is_float + +is_complex = lib.is_complex + +is_scalar = lib.is_scalar + +is_decimal = lib.is_decimal + +is_interval = lib.is_interval + +is_list_like = lib.is_list_like + +is_iterator = lib.is_iterator + + +def is_number(obj) -> bool: + """ + Check if the object is a number. + + Returns True when the object is a number, and False if is not. + + Parameters + ---------- + obj : any type + The object to check if is a number. + + Returns + ------- + is_number : bool + Whether `obj` is a number or not. + + See Also + -------- + api.types.is_integer: Checks a subgroup of numbers. + + Examples + -------- + >>> from pandas.api.types import is_number + >>> is_number(1) + True + >>> is_number(7.15) + True + + Booleans are valid because they are int subclass. + + >>> is_number(False) + True + + >>> is_number("foo") + False + >>> is_number("5") + False + """ + return isinstance(obj, (Number, np.number)) + + +def iterable_not_string(obj) -> bool: + """ + Check if the object is an iterable but not a string. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter_not_string : bool + Whether `obj` is a non-string iterable. + + Examples + -------- + >>> iterable_not_string([1, 2, 3]) + True + >>> iterable_not_string("foo") + False + >>> iterable_not_string(1) + False + """ + return isinstance(obj, abc.Iterable) and not isinstance(obj, str) + + +def is_file_like(obj) -> bool: + """ + Check if the object is a file-like object. + + For objects to be considered file-like, they must + be an iterator AND have either a `read` and/or `write` + method as an attribute. + + Note: file-like objects must be iterable, but + iterable objects need not be file-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_file_like : bool + Whether `obj` has file-like properties. + + Examples + -------- + >>> import io + >>> buffer = io.StringIO("data") + >>> is_file_like(buffer) + True + >>> is_file_like([1, 2, 3]) + False + """ + if not (hasattr(obj, "read") or hasattr(obj, "write")): + return False + + return bool(hasattr(obj, "__iter__")) + + +def is_re(obj) -> bool: + """ + Check if the object is a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex : bool + Whether `obj` is a regex pattern. + + Examples + -------- + >>> is_re(re.compile(".*")) + True + >>> is_re("foo") + False + """ + return isinstance(obj, Pattern) + + +def is_re_compilable(obj) -> bool: + """ + Check if the object can be compiled into a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex_compilable : bool + Whether `obj` can be compiled as a regex pattern. + + Examples + -------- + >>> is_re_compilable(".*") + True + >>> is_re_compilable(1) + False + """ + try: + re.compile(obj) + except TypeError: + return False + else: + return True + + +def is_array_like(obj) -> bool: + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + return is_list_like(obj) and hasattr(obj, "dtype") + + +def is_nested_list_like(obj) -> bool: + """ + Check if the object is list-like, and that all of its elements + are also list-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_nested_list_like([[1, 2, 3]]) + True + >>> is_nested_list_like([{1, 2, 3}, {1, 2, 3}]) + True + >>> is_nested_list_like(["foo"]) + False + >>> is_nested_list_like([]) + False + >>> is_nested_list_like([[1, 2, 3], 1]) + False + + Notes + ----- + This won't reliably detect whether a consumable iterator (e. g. + a generator) is a nested-list-like without consuming the iterator. + To avoid consuming it, we always return False if the outer container + doesn't define `__len__`. + + See Also + -------- + is_list_like + """ + return ( + is_list_like(obj) + and hasattr(obj, "__len__") + and len(obj) > 0 + and all(is_list_like(item) for item in obj) + ) + + +def is_dict_like(obj) -> bool: + """ + Check if the object is dict-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_dict_like : bool + Whether `obj` has dict-like properties. + + Examples + -------- + >>> is_dict_like({1: 2}) + True + >>> is_dict_like([1, 2, 3]) + False + >>> is_dict_like(dict) + False + >>> is_dict_like(dict()) + True + """ + dict_like_attrs = ("__getitem__", "keys", "__contains__") + return ( + all(hasattr(obj, attr) for attr in dict_like_attrs) + # [GH 25196] exclude classes + and not isinstance(obj, type) + ) + + +def is_named_tuple(obj) -> bool: + """ + Check if the object is a named tuple. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_named_tuple : bool + Whether `obj` is a named tuple. + + Examples + -------- + >>> from collections import namedtuple + >>> Point = namedtuple("Point", ["x", "y"]) + >>> p = Point(1, 2) + >>> + >>> is_named_tuple(p) + True + >>> is_named_tuple((1, 2)) + False + """ + return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") + + +def is_hashable(obj) -> bool: + """ + Return True if hash(obj) will succeed, False otherwise. + + Some types will pass a test against collections.abc.Hashable but fail when + they are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Returns + ------- + bool + + Examples + -------- + >>> import collections + >>> a = ([],) + >>> isinstance(a, collections.abc.Hashable) + True + >>> is_hashable(a) + False + """ + # Unfortunately, we can't use isinstance(obj, collections.abc.Hashable), + # which can be faster than calling hash. That is because numpy scalars + # fail this test. + + # Reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 + + try: + hash(obj) + except TypeError: + return False + else: + return True + + +def is_sequence(obj) -> bool: + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + try: + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, (str, bytes)) + except (TypeError, AttributeError): + return False + + +def is_dataclass(item): + """ + Checks if the object is a data-class instance + + Parameters + ---------- + item : object + + Returns + -------- + is_dataclass : bool + True if the item is an instance of a data-class, + will return false if you pass the data class itself + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> is_dataclass(Point) + False + >>> is_dataclass(Point(0,2)) + True + + """ + try: + from dataclasses import is_dataclass + + return is_dataclass(item) and not isinstance(item, type) + except ImportError: + return False + + +def is_inferred_bool_dtype(arr: ArrayLike) -> bool: + """ + Check if this is a ndarray[bool] or an ndarray[object] of bool objects. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + + Returns + ------- + bool + + Notes + ----- + This does not include the special treatment is_bool_dtype uses for + Categorical. + """ + if not isinstance(arr, np.ndarray): + return False + + dtype = arr.dtype + if dtype == np.dtype(bool): + return True + elif dtype == np.dtype("object"): + result = lib.is_bool_array(arr) + if result: + # GH#46188 + warnings.warn( + "In a future version, object-dtype columns with all-bool values " + "will not be included in reductions with bool_only=True. " + "Explicitly cast to bool dtype instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result + + return False diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py new file mode 100644 index 00000000..e7f57ae0 --- /dev/null +++ b/pandas/core/dtypes/missing.py @@ -0,0 +1,783 @@ +""" +missing types & inference +""" +from __future__ import annotations + +from decimal import Decimal +from functools import partial +from typing import ( + TYPE_CHECKING, + overload, +) + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +import pandas._libs.missing as libmissing +from pandas._libs.tslibs import ( + NaT, + Period, + iNaT, +) + +from pandas.core.dtypes.common import ( + DT64NS_DTYPE, + TD64NS_DTYPE, + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetimelike_v_numeric, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_or_object_np_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import is_list_like + +if TYPE_CHECKING: + from pandas._typing import ( + ArrayLike, + DtypeObj, + NDFrame, + NDFrameT, + Scalar, + npt, + ) + + from pandas.core.indexes.base import Index + + +isposinf_scalar = libmissing.isposinf_scalar +isneginf_scalar = libmissing.isneginf_scalar + +nan_checker = np.isnan +INF_AS_NA = False +_dtype_object = np.dtype("object") +_dtype_str = np.dtype(str) + + +@overload +def isna(obj: Scalar) -> bool: + ... + + +@overload +def isna( + obj: ArrayLike | Index | list, +) -> npt.NDArray[np.bool_]: + ... + + +@overload +def isna(obj: NDFrameT) -> NDFrameT: + ... + + +# handle unions +@overload +def isna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: + ... + + +@overload +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + ... + + +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + """ + Detect missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN`` + in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : scalar or array-like + Object to check for null or missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is missing. + + See Also + -------- + notna : Boolean inverse of pandas.isna. + Series.isna : Detect missing values in a Series. + DataFrame.isna : Detect missing values in a DataFrame. + Index.isna : Detect missing values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.isna('dog') + False + + >>> pd.isna(pd.NA) + True + + >>> pd.isna(np.nan) + True + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.isna(array) + array([[False, True, False], + [False, False, True]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.isna(index) + array([False, False, True, False]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.isna(df) + 0 1 2 + 0 False False False + 1 False True False + + >>> pd.isna(df[1]) + 0 False + 1 True + Name: 1, dtype: bool + """ + return _isna(obj) + + +isnull = isna + + +def _isna(obj, inf_as_na: bool = False): + """ + Detect missing values, treating None, NaN or NA as null. Infinite + values will also be treated as null if inf_as_na is True. + + Parameters + ---------- + obj: ndarray or object value + Input array or scalar value. + inf_as_na: bool + Whether to treat infinity as null. + + Returns + ------- + boolean ndarray or boolean + """ + if is_scalar(obj): + return libmissing.checknull(obj, inf_as_na=inf_as_na) + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isna is not defined for MultiIndex") + elif isinstance(obj, type): + return False + elif isinstance(obj, (np.ndarray, ABCExtensionArray)): + return _isna_array(obj, inf_as_na=inf_as_na) + elif isinstance(obj, ABCIndex): + # Try to use cached isna, which also short-circuits for integer dtypes + # and avoids materializing RangeIndex._values + if not obj._can_hold_na: + return obj.isna() + return _isna_array(obj._values, inf_as_na=inf_as_na) + + elif isinstance(obj, ABCSeries): + result = _isna_array(obj._values, inf_as_na=inf_as_na) + # box + result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) + return result + elif isinstance(obj, ABCDataFrame): + return obj.isna() + elif isinstance(obj, list): + return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na) + elif hasattr(obj, "__array__"): + return _isna_array(np.asarray(obj), inf_as_na=inf_as_na) + else: + return False + + +def _use_inf_as_na(key): + """ + Option change callback for na/inf behaviour. + + Choose which replacement for numpy.isnan / -numpy.isfinite is used. + + Parameters + ---------- + flag: bool + True means treat None, NaN, INF, -INF as null (old way), + False means None and NaN are null, but INF, -INF are not null + (new way). + + Notes + ----- + This approach to setting global module values is discussed and + approved here: + + * https://stackoverflow.com/questions/4859217/ + programmatically-creating-variables-in-python/4859312#4859312 + """ + inf_as_na = get_option(key) + globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na) + if inf_as_na: + globals()["nan_checker"] = lambda x: ~np.isfinite(x) + globals()["INF_AS_NA"] = True + else: + globals()["nan_checker"] = np.isnan + globals()["INF_AS_NA"] = False + + +def _isna_array(values: ArrayLike, inf_as_na: bool = False): + """ + Return an array indicating which values of the input array are NaN / NA. + + Parameters + ---------- + obj: ndarray or ExtensionArray + The input array whose elements are to be checked. + inf_as_na: bool + Whether or not to treat infinite values as NA. + + Returns + ------- + array-like + Array of boolean values denoting the NA status of each element. + """ + dtype = values.dtype + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + if inf_as_na and is_categorical_dtype(dtype): + result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na) + else: + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has + # type "ndarray[Any, dtype[bool_]]") + result = values.isna() # type: ignore[assignment] + elif is_string_or_object_np_dtype(values.dtype): + result = _isna_string_dtype(values, inf_as_na=inf_as_na) + elif needs_i8_conversion(dtype): + # this is the NaT pattern + result = values.view("i8") == iNaT + else: + if inf_as_na: + result = ~np.isfinite(values) + else: + result = np.isnan(values) + + return result + + +def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]: + # Working around NumPy ticket 1542 + dtype = values.dtype + + if dtype.kind in ("S", "U"): + result = np.zeros(values.shape, dtype=bool) + else: + + if values.ndim == 1: + result = libmissing.isnaobj(values, inf_as_na=inf_as_na) + elif values.ndim == 2: + result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na) + else: + # 0-D, reached via e.g. mask_missing + result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na) + result = result.reshape(values.shape) + + return result + + +@overload +def notna(obj: Scalar) -> bool: + ... + + +@overload +def notna( + obj: ArrayLike | Index | list, +) -> npt.NDArray[np.bool_]: + ... + + +@overload +def notna(obj: NDFrameT) -> NDFrameT: + ... + + +# handle unions +@overload +def notna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: + ... + + +@overload +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + ... + + +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + """ + Detect non-missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna('dog') + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if isinstance(res, bool): + return not res + return ~res + + +notnull = notna + + +def isna_compat(arr, fill_value=np.nan) -> bool: + """ + Parameters + ---------- + arr: a numpy array + fill_value: fill value, default to np.nan + + Returns + ------- + True if we can fill using this fill_value + """ + if isna(fill_value): + dtype = arr.dtype + return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) + return True + + +def array_equivalent( + left, + right, + strict_nan: bool = False, + dtype_equal: bool = False, +) -> bool: + """ + True if two arrays, left and right, have equal non-NaN elements, and NaNs + in corresponding locations. False otherwise. It is assumed that left and + right are NumPy arrays of the same dtype. The behavior of this function + (particularly with respect to NaNs) is not defined if the dtypes are + different. + + Parameters + ---------- + left, right : ndarrays + strict_nan : bool, default False + If True, consider NaN and None to be different. + dtype_equal : bool, default False + Whether `left` and `right` are known to have the same dtype + according to `is_dtype_equal`. Some methods like `BlockManager.equals`. + require that the dtypes match. Setting this to ``True`` can improve + performance, but will give different results for arrays that are + equal but different dtypes. + + Returns + ------- + b : bool + Returns True if the arrays are equivalent. + + Examples + -------- + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) + True + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) + False + """ + left, right = np.asarray(left), np.asarray(right) + + # shape compat + if left.shape != right.shape: + return False + + if dtype_equal: + # fastpath when we require that the dtypes match (Block.equals) + if left.dtype.kind in ["f", "c"]: + return _array_equivalent_float(left, right) + elif is_datetimelike_v_numeric(left.dtype, right.dtype): + return False + elif needs_i8_conversion(left.dtype): + return _array_equivalent_datetimelike(left, right) + elif is_string_or_object_np_dtype(left.dtype): + # TODO: fastpath for pandas' StringDtype + return _array_equivalent_object(left, right, strict_nan) + else: + return np.array_equal(left, right) + + # Slow path when we allow comparing different dtypes. + # Object arrays can contain None, NaN and NaT. + # string dtypes must be come to this path for NumPy 1.7.1 compat + if left.dtype.kind in "OSU" or right.dtype.kind in "OSU": + # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]` + # or `in ("O", "S", "U")` + return _array_equivalent_object(left, right, strict_nan) + + # NaNs can occur in float and complex arrays. + if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): + if not (left.size and right.size): + return True + return ((left == right) | (isna(left) & isna(right))).all() + + elif is_datetimelike_v_numeric(left, right): + # GH#29553 avoid numpy deprecation warning + return False + + elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): + # datetime64, timedelta64, Period + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.view("i8") + right = right.view("i8") + + # if we have structured dtypes, compare first + if ( + left.dtype.type is np.void or right.dtype.type is np.void + ) and left.dtype != right.dtype: + return False + + return np.array_equal(left, right) + + +def _array_equivalent_float(left, right) -> bool: + return bool(((left == right) | (np.isnan(left) & np.isnan(right))).all()) + + +def _array_equivalent_datetimelike(left, right): + return np.array_equal(left.view("i8"), right.view("i8")) + + +def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bool): + if not strict_nan: + # isna considers NaN and None to be equivalent. + + if left.flags["F_CONTIGUOUS"] and right.flags["F_CONTIGUOUS"]: + # we can improve performance by doing a copy-free ravel + # e.g. in frame_methods.Equals.time_frame_nonunique_equal + # if we transposed the frames + left = left.ravel("K") + right = right.ravel("K") + + return lib.array_equivalent_object( + ensure_object(left.ravel()), ensure_object(right.ravel()) + ) + + for left_value, right_value in zip(left, right): + if left_value is NaT and right_value is not NaT: + return False + + elif left_value is libmissing.NA and right_value is not libmissing.NA: + return False + + elif isinstance(left_value, float) and np.isnan(left_value): + if not isinstance(right_value, float) or not np.isnan(right_value): + return False + else: + try: + if np.any(np.asarray(left_value != right_value)): + return False + except TypeError as err: + if "boolean value of NA is ambiguous" in str(err): + return False + raise + except ValueError: + # numpy can raise a ValueError if left and right cannot be + # compared (e.g. nested arrays) + return False + return True + + +def array_equals(left: ArrayLike, right: ArrayLike) -> bool: + """ + ExtensionArray-compatible implementation of array_equivalent. + """ + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ABCExtensionArray): + return left.equals(right) + else: + return array_equivalent(left, right, dtype_equal=True) + + +def infer_fill_value(val): + """ + infer the fill value for the nan/NaT from the provided + scalar/ndarray/list-like if we are a NaT, return the correct dtyped + element to provide proper block construction + """ + if not is_list_like(val): + val = [val] + val = np.array(val, copy=False) + if needs_i8_conversion(val.dtype): + return np.array("NaT", dtype=val.dtype) + elif is_object_dtype(val.dtype): + dtype = lib.infer_dtype(ensure_object(val), skipna=False) + if dtype in ["datetime", "datetime64"]: + return np.array("NaT", dtype=DT64NS_DTYPE) + elif dtype in ["timedelta", "timedelta64"]: + return np.array("NaT", dtype=TD64NS_DTYPE) + return np.nan + + +def maybe_fill(arr: np.ndarray) -> np.ndarray: + """ + Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype. + """ + if arr.dtype.kind not in ("u", "i", "b"): + arr.fill(np.nan) + return arr + + +def na_value_for_dtype(dtype: DtypeObj, compat: bool = True): + """ + Return a dtype compat na value + + Parameters + ---------- + dtype : string / dtype + compat : bool, default True + + Returns + ------- + np.dtype or a pandas dtype + + Examples + -------- + >>> na_value_for_dtype(np.dtype('int64')) + 0 + >>> na_value_for_dtype(np.dtype('int64'), compat=False) + nan + >>> na_value_for_dtype(np.dtype('float64')) + nan + >>> na_value_for_dtype(np.dtype('bool')) + False + >>> na_value_for_dtype(np.dtype('datetime64[ns]')) + numpy.datetime64('NaT') + """ + + if isinstance(dtype, ExtensionDtype): + return dtype.na_value + elif needs_i8_conversion(dtype): + return dtype.type("NaT", "ns") + elif is_float_dtype(dtype): + return np.nan + elif is_integer_dtype(dtype): + if compat: + return 0 + return np.nan + elif is_bool_dtype(dtype): + if compat: + return False + return np.nan + return np.nan + + +def remove_na_arraylike(arr): + """ + Return array-like containing only true/non-NaN values, possibly empty. + """ + if is_extension_array_dtype(arr): + return arr[notna(arr)] + else: + return arr[notna(np.asarray(arr))] + + +def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: + """ + isna check that excludes incompatible dtypes + + Parameters + ---------- + obj : object + dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype + + Returns + ------- + bool + """ + if not lib.is_scalar(obj) or not isna(obj): + return False + elif dtype.kind == "M": + if isinstance(dtype, np.dtype): + # i.e. not tzaware + return not isinstance(obj, (np.timedelta64, Decimal)) + # we have to rule out tznaive dt64("NaT") + return not isinstance(obj, (np.timedelta64, np.datetime64, Decimal)) + elif dtype.kind == "m": + return not isinstance(obj, (np.datetime64, Decimal)) + elif dtype.kind in ["i", "u", "f", "c"]: + # Numeric + return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64)) + elif dtype.kind == "b": + # We allow pd.NA, None, np.nan in BooleanArray (same as IntervalDtype) + return lib.is_float(obj) or obj is None or obj is libmissing.NA + + elif dtype == _dtype_str: + # numpy string dtypes to avoid float np.nan + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal, float)) + + elif dtype == _dtype_object: + # This is needed for Categorical, but is kind of weird + return True + + elif isinstance(dtype, PeriodDtype): + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) + + elif isinstance(dtype, IntervalDtype): + return lib.is_float(obj) or obj is None or obj is libmissing.NA + + elif isinstance(dtype, CategoricalDtype): + return is_valid_na_for_dtype(obj, dtype.categories.dtype) + + # fallback, default to allowing NaN, None, NA, NaT + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) + + +def isna_all(arr: ArrayLike) -> bool: + """ + Optimized equivalent to isna(arr).all() + """ + total_len = len(arr) + + # Usually it's enough to check but a small fraction of values to see if + # a block is NOT null, chunks should help in such cases. + # parameters 1000 and 40 were chosen arbitrarily + chunk_len = max(total_len // 40, 1000) + + dtype = arr.dtype + if dtype.kind == "f" and isinstance(dtype, np.dtype): + checker = nan_checker + + elif ( + (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]) + or isinstance(dtype, DatetimeTZDtype) + or dtype.type is Period + ): + # error: Incompatible types in assignment (expression has type + # "Callable[[Any], Any]", variable has type "ufunc") + checker = lambda x: np.asarray(x.view("i8")) == iNaT # type: ignore[assignment] + + else: + # error: Incompatible types in assignment (expression has type "Callable[[Any], + # Any]", variable has type "ufunc") + checker = lambda x: _isna_array( # type: ignore[assignment] + x, inf_as_na=INF_AS_NA + ) + + return all( + checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len) + ) diff --git a/pandas/core/flags.py b/pandas/core/flags.py new file mode 100644 index 00000000..f07c6917 --- /dev/null +++ b/pandas/core/flags.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import weakref + + +class Flags: + """ + Flags that apply to pandas objects. + + .. versionadded:: 1.2.0 + + Parameters + ---------- + obj : Series or DataFrame + The object these flags are associated with. + allows_duplicate_labels : bool, default True + Whether to allow duplicate labels in this object. By default, + duplicate labels are permitted. Setting this to ``False`` will + cause an :class:`errors.DuplicateLabelError` to be raised when + `index` (or columns for DataFrame) is not unique, or any + subsequent operation on introduces duplicates. + See :ref:`duplicates.disallow` for more. + + .. warning:: + + This is an experimental feature. Currently, many methods fail to + propagate the ``allows_duplicate_labels`` value. In future versions + it is expected that every method taking or returning one or more + DataFrame or Series objects will propagate ``allows_duplicate_labels``. + + Notes + ----- + Attributes can be set in two ways + + >>> df = pd.DataFrame() + >>> df.flags + + >>> df.flags.allows_duplicate_labels = False + >>> df.flags + + + >>> df.flags['allows_duplicate_labels'] = True + >>> df.flags + + """ + + _keys = {"allows_duplicate_labels"} + + def __init__(self, obj, *, allows_duplicate_labels) -> None: + self._allows_duplicate_labels = allows_duplicate_labels + self._obj = weakref.ref(obj) + + @property + def allows_duplicate_labels(self) -> bool: + """ + Whether this object allows duplicate labels. + + Setting ``allows_duplicate_labels=False`` ensures that the + index (and columns of a DataFrame) are unique. Most methods + that accept and return a Series or DataFrame will propagate + the value of ``allows_duplicate_labels``. + + See :ref:`duplicates` for more. + + See Also + -------- + DataFrame.attrs : Set global metadata on this object. + DataFrame.set_flags : Set global flags on this object. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a']) + >>> df.flags.allows_duplicate_labels + True + >>> df.flags.allows_duplicate_labels = False + Traceback (most recent call last): + ... + pandas.errors.DuplicateLabelError: Index has duplicates. + positions + label + a [0, 1] + """ + return self._allows_duplicate_labels + + @allows_duplicate_labels.setter + def allows_duplicate_labels(self, value: bool) -> None: + value = bool(value) + obj = self._obj() + if obj is None: + raise ValueError("This flag's object has been deleted.") + + if not value: + for ax in obj.axes: + ax._maybe_check_unique() + + self._allows_duplicate_labels = value + + def __getitem__(self, key): + if key not in self._keys: + raise KeyError(key) + + return getattr(self, key) + + def __setitem__(self, key, value) -> None: + if key not in self._keys: + raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}") + setattr(self, key, value) + + def __repr__(self) -> str: + return f"" + + def __eq__(self, other): + if isinstance(other, type(self)): + return self.allows_duplicate_labels == other.allows_duplicate_labels + return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py new file mode 100644 index 00000000..9e0f1363 --- /dev/null +++ b/pandas/core/frame.py @@ -0,0 +1,12030 @@ +""" +DataFrame +--------- +An efficient 2D container for potentially mixed-type time series or other +labeled data series. + +Similar to its R counterpart, data.frame, except providing automatic data +alignment and a host of useful data manipulation methods having to do with the +labeling information +""" +from __future__ import annotations + +import collections +from collections import abc +import datetime +import functools +from io import StringIO +import itertools +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Iterator, + Literal, + Mapping, + Sequence, + cast, + overload, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._config import get_option + +from pandas._libs import ( + algos as libalgos, + lib, + properties, +) +from pandas._libs.hashtable import duplicated +from pandas._libs.lib import ( + NoDefault, + no_default, +) +from pandas._typing import ( + AggFuncType, + AnyArrayLike, + ArrayLike, + Axes, + Axis, + ColspaceArgType, + CompressionOptions, + Dtype, + DtypeObj, + FilePath, + FillnaOptions, + FloatFormatType, + FormattersType, + Frequency, + IgnoreRaise, + IndexKeyFunc, + IndexLabel, + Level, + NaPosition, + PythonFuncType, + QuantileInterpolation, + ReadBuffer, + Renamer, + Scalar, + SortKind, + StorageOptions, + Suffixes, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + WriteBuffer, + npt, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import ( + function as nv, + np_percentile_argname, +) +from pandas.errors import InvalidIndexError +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, + rewrite_axis_style_signature, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) + +from pandas.core.dtypes.cast import ( + LossySetitemError, + can_hold_element, + construct_1d_arraylike_from_scalar, + construct_2d_arraylike_from_scalar, + find_common_type, + infer_dtype_from_scalar, + invalidate_string_dtypes, + maybe_box_native, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + infer_dtype_from_object, + is_1d_only_ea_dtype, + is_bool_dtype, + is_dataclass, + is_datetime64_any_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_sequence, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + algorithms, + common as com, + nanops, + ops, +) +from pandas.core.accessor import CachedAccessor +from pandas.core.apply import ( + reconstruct_func, + relabel_result, +) +from pandas.core.array_algos.take import take_2d_multi +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.sparse import SparseFrameAccessor +from pandas.core.construction import ( + extract_array, + sanitize_array, + sanitize_masked_array, +) +from pandas.core.generic import NDFrame +from pandas.core.indexers import check_key_length +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + PeriodIndex, + default_index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.core.indexes.multi import ( + MultiIndex, + maybe_droplevels, +) +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, + convert_to_index_sliceable, +) +from pandas.core.internals import ( + ArrayManager, + BlockManager, +) +from pandas.core.internals.construction import ( + arrays_to_mgr, + dataclasses_to_dicts, + dict_to_mgr, + mgr_to_mgr, + ndarray_to_mgr, + nested_data_to_arrays, + rec_array_to_mgr, + reorder_arrays, + to_arrays, + treat_as_nested, +) +from pandas.core.reshape.melt import melt +from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import ( + get_group_index, + lexsort_indexer, + nargsort, +) + +from pandas.io.common import get_handle +from pandas.io.formats import ( + console, + format as fmt, +) +from pandas.io.formats.info import ( + INFO_DOCSTRING, + DataFrameInfo, + frame_sub_kwargs, +) +import pandas.plotting + +if TYPE_CHECKING: + + from pandas.core.groupby.generic import DataFrameGroupBy + from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg + from pandas.core.internals import SingleDataManager + from pandas.core.resample import Resampler + + from pandas.io.formats.style import Styler + +# --------------------------------------------------------------------- +# Docstring templates + +_shared_doc_kwargs = { + "axes": "index, columns", + "klass": "DataFrame", + "axes_single_arg": "{0 or 'index', 1 or 'columns'}", + "axis": """axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", + "inplace": """ + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one.""", + "optional_by": """ + by : str or list of str + Name or list of names to sort by. + + - if `axis` is 0 or `'index'` then `by` may contain index + levels and/or column labels. + - if `axis` is 1 or `'columns'` then `by` may contain column + levels and/or index labels.""", + "optional_labels": """labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to.""", + "optional_axis": """axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1).""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + +_numeric_only_doc = """numeric_only : bool or None, default None + Include only float, int, boolean data. If None, will attempt to use + everything, then use only numeric data +""" + +_merge_doc = """ +Merge DataFrame or named Series objects with a database-style join. + +A named Series object is treated as a DataFrame with a single named column. + +The join is done on columns or indexes. If joining columns on +columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes +on indexes or indexes on a column or columns, the index will be passed on. +When performing a cross merge, no column specifications to merge on are +allowed. + +.. warning:: + + If both key columns contain rows where the key is a null value, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. + +Parameters +----------%s +right : DataFrame or named Series + Object to merge with. +how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner' + Type of merge to be performed. + + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order. + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order. + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically. + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + + .. versionadded:: 1.2.0 + +on : label or list + Column or index level names to join on. These must be found in both + DataFrames. If `on` is None and not merging on indexes then this defaults + to the intersection of the columns in both DataFrames. +left_on : label or list, or array-like + Column or index level names to join on in the left DataFrame. Can also + be an array or list of arrays of the length of the left DataFrame. + These arrays are treated as if they are columns. +right_on : label or list, or array-like + Column or index level names to join on in the right DataFrame. Can also + be an array or list of arrays of the length of the right DataFrame. + These arrays are treated as if they are columns. +left_index : bool, default False + Use the index from the left DataFrame as the join key(s). If it is a + MultiIndex, the number of keys in the other DataFrame (either the index + or a number of columns) must match the number of levels. +right_index : bool, default False + Use the index from the right DataFrame as the join key. Same caveats as + left_index. +sort : bool, default False + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword). +suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. +copy : bool, default True + If False, avoid copy if possible. +indicator : bool or str, default False + If True, adds a column to the output DataFrame called "_merge" with + information on the source of each row. The column can be given a different + name by providing a string argument. The column will have a Categorical + type with the value of "left_only" for observations whose merge key only + appears in the left DataFrame, "right_only" for observations + whose merge key only appears in the right DataFrame, and "both" + if the observation's merge key is found in both DataFrames. + +validate : str, optional + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + +Returns +------- +DataFrame + A DataFrame of the two merged objects. + +See Also +-------- +merge_ordered : Merge with optional filling/interpolation. +merge_asof : Merge on nearest keys. +DataFrame.join : Similar method using indices. + +Notes +----- +Support for specifying index levels as the `on`, `left_on`, and +`right_on` parameters was added in version 0.23.0 +Support for merging named Series objects was added in version 0.24.0 + +Examples +-------- +>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [1, 2, 3, 5]}) +>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [5, 6, 7, 8]}) +>>> df1 + lkey value +0 foo 1 +1 bar 2 +2 baz 3 +3 foo 5 +>>> df2 + rkey value +0 foo 5 +1 bar 6 +2 baz 7 +3 foo 8 + +Merge df1 and df2 on the lkey and rkey columns. The value columns have +the default suffixes, _x and _y, appended. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey') + lkey value_x rkey value_y +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2 with specified left and right suffixes +appended to any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', +... suffixes=('_left', '_right')) + lkey value_left rkey value_right +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2, but raise an exception if the DataFrames have +any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) +Traceback (most recent call last): +... +ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') + +>>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) +>>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) +>>> df1 + a b +0 foo 1 +1 bar 2 +>>> df2 + a c +0 foo 3 +1 baz 4 + +>>> df1.merge(df2, how='inner', on='a') + a b c +0 foo 1 3 + +>>> df1.merge(df2, how='left', on='a') + a b c +0 foo 1 3.0 +1 bar 2 NaN + +>>> df1 = pd.DataFrame({'left': ['foo', 'bar']}) +>>> df2 = pd.DataFrame({'right': [7, 8]}) +>>> df1 + left +0 foo +1 bar +>>> df2 + right +0 7 +1 8 + +>>> df1.merge(df2, how='cross') + left right +0 foo 7 +1 foo 8 +2 bar 7 +3 bar 8 +""" + + +# ----------------------------------------------------------------------- +# DataFrame class + + +class DataFrame(NDFrame, OpsMixin): + """ + Two-dimensional, size-mutable, potentially heterogeneous tabular data. + + Data structure also contains labeled axes (rows and columns). + Arithmetic operations align on both row and column labels. Can be + thought of as a dict-like container for Series objects. The primary + pandas data structure. + + Parameters + ---------- + data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame + Dict can contain Series, arrays, constants, dataclass or list-like objects. If + data is a dict, column order follows insertion-order. If a dict contains Series + which have an index defined, it is aligned by its index. + + .. versionchanged:: 0.25.0 + If data is a list of dicts, column order follows insertion-order. + + index : Index or array-like + Index to use for resulting frame. Will default to RangeIndex if + no indexing information part of input data and no index provided. + columns : Index or array-like + Column labels to use for resulting frame when data does not have them, + defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels, + will perform column selection instead. + dtype : dtype, default None + Data type to force. Only a single dtype is allowed. If None, infer. + copy : bool or None, default None + Copy data from inputs. + For dict data, the default of None behaves like ``copy=True``. For DataFrame + or 2d ndarray input, the default of None behaves like ``copy=False``. + If data is a dict containing one or more Series (possibly of different dtypes), + ``copy=False`` will ensure that these inputs are not copied. + + .. versionchanged:: 1.3.0 + + See Also + -------- + DataFrame.from_records : Constructor from tuples, also record arrays. + DataFrame.from_dict : From dicts of Series, arrays, or dicts. + read_csv : Read a comma-separated values (csv) file into DataFrame. + read_table : Read general delimited file into DataFrame. + read_clipboard : Read text from clipboard into DataFrame. + + Notes + ----- + Please reference the :ref:`User Guide ` for more information. + + Examples + -------- + Constructing DataFrame from a dictionary. + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + Notice that the inferred dtype is int64. + + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + To enforce a single dtype: + + >>> df = pd.DataFrame(data=d, dtype=np.int8) + >>> df.dtypes + col1 int8 + col2 int8 + dtype: object + + Constructing DataFrame from a dictionary including Series: + + >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])} + >>> pd.DataFrame(data=d, index=[0, 1, 2, 3]) + col1 col2 + 0 0 NaN + 1 1 NaN + 2 2 2.0 + 3 3 3.0 + + Constructing DataFrame from numpy ndarray: + + >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + ... columns=['a', 'b', 'c']) + >>> df2 + a b c + 0 1 2 3 + 1 4 5 6 + 2 7 8 9 + + Constructing DataFrame from a numpy ndarray that has labeled columns: + + >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) + >>> df3 = pd.DataFrame(data, columns=['c', 'a']) + ... + >>> df3 + c a + 0 3 1 + 1 6 4 + 2 9 7 + + Constructing DataFrame from dataclass: + + >>> from dataclasses import make_dataclass + >>> Point = make_dataclass("Point", [("x", int), ("y", int)]) + >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + x y + 0 0 0 + 1 0 3 + 2 2 3 + """ + + _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set + _typ = "dataframe" + _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray) + _accessors: set[str] = {"sparse"} + _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) + _mgr: BlockManager | ArrayManager + + @property + def _constructor(self) -> Callable[..., DataFrame]: + return DataFrame + + _constructor_sliced: Callable[..., Series] = Series + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data=None, + index: Axes | None = None, + columns: Axes | None = None, + dtype: Dtype | None = None, + copy: bool | None = None, + ) -> None: + + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, DataFrame): + data = data._mgr + + if isinstance(data, (BlockManager, ArrayManager)): + # first check if a Manager is passed without any other arguments + # -> use fastpath (without checking Manager type) + if index is None and columns is None and dtype is None and not copy: + # GH#33357 fastpath + NDFrame.__init__(self, data) + return + + manager = get_option("mode.data_manager") + + # GH47215 + if index is not None and isinstance(index, set): + raise ValueError("index cannot be a set") + if columns is not None and isinstance(columns, set): + raise ValueError("columns cannot be a set") + + if copy is None: + if isinstance(data, dict): + # retain pre-GH#38939 default behavior + copy = True + elif ( + manager == "array" + and isinstance(data, (np.ndarray, ExtensionArray)) + and data.ndim == 2 + ): + # INFO(ArrayManager) by default copy the 2D input array to get + # contiguous 1D arrays + copy = True + else: + copy = False + + if isinstance(data, (BlockManager, ArrayManager)): + mgr = self._init_mgr( + data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy + ) + + elif isinstance(data, dict): + # GH#38939 de facto copy defaults to False only in non-dict cases + mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) + elif isinstance(data, ma.MaskedArray): + import numpy.ma.mrecords as mrecords + + # masked recarray + if isinstance(data, mrecords.MaskedRecords): + mgr = rec_array_to_mgr( + data, + index, + columns, + dtype, + copy, + typ=manager, + ) + warnings.warn( + "Support for MaskedRecords is deprecated and will be " + "removed in a future version. Pass " + "{name: data[name] for name in data.dtype.names} instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # a masked array + else: + data = sanitize_masked_array(data) + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + + elif isinstance(data, (np.ndarray, Series, Index, ExtensionArray)): + if data.dtype.names: + # i.e. numpy structured array + data = cast(np.ndarray, data) + mgr = rec_array_to_mgr( + data, + index, + columns, + dtype, + copy, + typ=manager, + ) + elif getattr(data, "name", None) is not None: + # i.e. Series/Index with non-None name + mgr = dict_to_mgr( + # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no + # attribute "name" + {data.name: data}, # type: ignore[union-attr] + index, + columns, + dtype=dtype, + typ=manager, + ) + else: + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + + # For data is list-like, or Iterable (will consume into list) + elif is_list_like(data): + if not isinstance(data, (abc.Sequence, ExtensionArray)): + if hasattr(data, "__array__"): + # GH#44616 big perf improvement for e.g. pytorch tensor + data = np.asarray(data) + else: + data = list(data) + if len(data) > 0: + if is_dataclass(data[0]): + data = dataclasses_to_dicts(data) + if not isinstance(data, np.ndarray) and treat_as_nested(data): + # exclude ndarray as we may have cast it a few lines above + if columns is not None: + columns = ensure_index(columns) + arrays, columns, index = nested_data_to_arrays( + # error: Argument 3 to "nested_data_to_arrays" has incompatible + # type "Optional[Collection[Any]]"; expected "Optional[Index]" + data, + columns, + index, # type: ignore[arg-type] + dtype, + ) + mgr = arrays_to_mgr( + arrays, + columns, + index, + dtype=dtype, + typ=manager, + ) + else: + mgr = ndarray_to_mgr( + data, + index, + columns, + dtype=dtype, + copy=copy, + typ=manager, + ) + else: + mgr = dict_to_mgr( + {}, + index, + columns, + dtype=dtype, + typ=manager, + ) + # For data is scalar + else: + if index is None or columns is None: + raise ValueError("DataFrame constructor not properly called!") + + index = ensure_index(index) + columns = ensure_index(columns) + + if not dtype: + dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True) + + # For data is a scalar extension dtype + if isinstance(dtype, ExtensionDtype): + # TODO(EA2D): special case not needed with 2D EAs + + values = [ + construct_1d_arraylike_from_scalar(data, len(index), dtype) + for _ in range(len(columns)) + ] + mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager) + else: + arr2d = construct_2d_arraylike_from_scalar( + data, + len(index), + len(columns), + dtype, + copy, + ) + + mgr = ndarray_to_mgr( + arr2d, + index, + columns, + dtype=arr2d.dtype, + copy=False, + typ=manager, + ) + + # ensure correct Manager type according to settings + mgr = mgr_to_mgr(mgr, typ=manager) + + NDFrame.__init__(self, mgr) + + # ---------------------------------------------------------------------- + def __dataframe__( + self, nan_as_null: bool = False, allow_copy: bool = True + ) -> DataFrameXchg: + """ + Return the dataframe interchange object implementing the interchange protocol. + + Parameters + ---------- + nan_as_null : bool, default False + Whether to tell the DataFrame to overwrite null values in the data + with ``NaN`` (or ``NaT``). + allow_copy : bool, default True + Whether to allow memory copying when exporting. If set to False + it would cause non-zero-copy exports to fail. + + Returns + ------- + DataFrame interchange object + The object which consuming library can use to ingress the dataframe. + + Notes + ----- + Details on the interchange protocol: + https://data-apis.org/dataframe-protocol/latest/index.html + + `nan_as_null` currently has no effect; once support for nullable extension + dtypes is added, this value should be propagated to columns. + """ + + from pandas.core.interchange.dataframe import PandasDataFrameXchg + + return PandasDataFrameXchg(self, nan_as_null, allow_copy) + + # ---------------------------------------------------------------------- + + @property + def axes(self) -> list[Index]: + """ + Return a list representing the axes of the DataFrame. + + It has the row axis labels and column axis labels as the only members. + They are returned in that order. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.axes + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], + dtype='object')] + """ + return [self.index, self.columns] + + @property + def shape(self) -> tuple[int, int]: + """ + Return a tuple representing the dimensionality of the DataFrame. + + See Also + -------- + ndarray.shape : Tuple of array dimensions. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.shape + (2, 2) + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4], + ... 'col3': [5, 6]}) + >>> df.shape + (2, 3) + """ + return len(self.index), len(self.columns) + + @property + def _is_homogeneous_type(self) -> bool: + """ + Whether all the columns in a DataFrame have the same type. + + Returns + ------- + bool + + See Also + -------- + Index._is_homogeneous_type : Whether the object has a single + dtype. + MultiIndex._is_homogeneous_type : Whether all the levels of a + MultiIndex have the same dtype. + + Examples + -------- + >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type + True + >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous_type + False + + Items with the same type but different sizes are considered + different types. + + >>> DataFrame({ + ... "A": np.array([1, 2], dtype=np.int32), + ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type + False + """ + if isinstance(self._mgr, ArrayManager): + return len({arr.dtype for arr in self._mgr.arrays}) == 1 + if self._mgr.any_extension_types: + return len({block.dtype for block in self._mgr.blocks}) == 1 + else: + return not self._is_mixed_type + + @property + def _can_fast_transpose(self) -> bool: + """ + Can we transpose this DataFrame without creating any new array objects. + """ + if isinstance(self._mgr, ArrayManager): + return False + blocks = self._mgr.blocks + if len(blocks) != 1: + return False + + dtype = blocks[0].dtype + # TODO(EA2D) special case would be unnecessary with 2D EAs + return not is_1d_only_ea_dtype(dtype) + + # error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of + # "_values" incompatible with return type "ndarray" in supertype "NDFrame" + @property + def _values( # type: ignore[override] + self, + ) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray: + """ + Analogue to ._values that may return a 2D ExtensionArray. + """ + self._consolidate_inplace() + + mgr = self._mgr + + if isinstance(mgr, ArrayManager): + if len(mgr.arrays) == 1 and not is_1d_only_ea_dtype(mgr.arrays[0].dtype): + # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" + # has no attribute "reshape" + return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] + return self.values + + blocks = mgr.blocks + if len(blocks) != 1: + return self.values + + arr = blocks[0].values + if arr.ndim == 1: + # non-2D ExtensionArray + return self.values + + # more generally, whatever we allow in NDArrayBackedExtensionBlock + arr = cast("np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray", arr) + return arr.T + + # ---------------------------------------------------------------------- + # Rendering Methods + + def _repr_fits_vertical_(self) -> bool: + """ + Check length against max_rows. + """ + max_rows = get_option("display.max_rows") + return len(self) <= max_rows + + def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: + """ + Check if full repr fits in horizontal boundaries imposed by the display + options width and max_columns. + + In case of non-interactive session, no boundaries apply. + + `ignore_width` is here so ipynb+HTML output can behave the way + users expect. display.max_columns remains in effect. + GH3541, GH3573 + """ + width, height = console.get_console_size() + max_columns = get_option("display.max_columns") + nb_columns = len(self.columns) + + # exceed max columns + if (max_columns and nb_columns > max_columns) or ( + (not ignore_width) and width and nb_columns > (width // 2) + ): + return False + + # used by repr_html under IPython notebook or scripts ignore terminal + # dims + if ignore_width or width is None or not console.in_interactive_session(): + return True + + if get_option("display.width") is not None or console.in_ipython_frontend(): + # check at least the column row for excessive width + max_rows = 1 + else: + max_rows = get_option("display.max_rows") + + # when auto-detecting, so width=None and not in ipython front end + # check whether repr fits horizontal by actually checking + # the width of the rendered repr + buf = StringIO() + + # only care about the stuff we'll actually print out + # and to_string on entire frame may be expensive + d = self + + if max_rows is not None: # unlimited rows + # min of two, where one may be None + d = d.iloc[: min(max_rows, len(d))] + else: + return True + + d.to_string(buf=buf) + value = buf.getvalue() + repr_width = max(len(line) for line in value.split("\n")) + + return repr_width < width + + def _info_repr(self) -> bool: + """ + True if the repr should show the info view. + """ + info_repr_option = get_option("display.large_repr") == "info" + return info_repr_option and not ( + self._repr_fits_horizontal_() and self._repr_fits_vertical_() + ) + + def __repr__(self) -> str: + """ + Return a string representation for a particular DataFrame. + """ + if self._info_repr(): + buf = StringIO() + self.info(buf=buf) + return buf.getvalue() + + repr_params = fmt.get_dataframe_repr_params() + return self.to_string(**repr_params) + + def _repr_html_(self) -> str | None: + """ + Return a html representation for a particular DataFrame. + + Mainly for IPython notebook. + """ + if self._info_repr(): + buf = StringIO() + self.info(buf=buf) + # need to escape the , should be the first line. + val = buf.getvalue().replace("<", r"<", 1) + val = val.replace(">", r">", 1) + return "
    " + val + "
    " + + if get_option("display.notebook_repr_html"): + max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") + max_cols = get_option("display.max_columns") + show_dimensions = get_option("display.show_dimensions") + + formatter = fmt.DataFrameFormatter( + self, + columns=None, + col_space=None, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + justify=None, + index_names=True, + header=True, + index=True, + bold_rows=True, + escape=True, + max_rows=max_rows, + min_rows=min_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=".", + ) + return fmt.DataFrameRenderer(formatter).to_html(notebook=True) + else: + return None + + @overload + def to_string( + self, + buf: None = ..., + columns: Sequence[str] | None = ..., + col_space: int | list[int] | dict[Hashable, int] | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: fmt.FormattersType | None = ..., + float_format: fmt.FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool = ..., + decimal: str = ..., + line_width: int | None = ..., + min_rows: int | None = ..., + max_colwidth: int | None = ..., + encoding: str | None = ..., + ) -> str: + ... + + @overload + def to_string( + self, + buf: FilePath | WriteBuffer[str], + columns: Sequence[str] | None = ..., + col_space: int | list[int] | dict[Hashable, int] | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: fmt.FormattersType | None = ..., + float_format: fmt.FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool = ..., + decimal: str = ..., + line_width: int | None = ..., + min_rows: int | None = ..., + max_colwidth: int | None = ..., + encoding: str | None = ..., + ) -> None: + ... + + @Substitution( + header_type="bool or sequence of str", + header="Write out the column names. If a list of strings " + "is given, it is assumed to be aliases for the " + "column names", + col_space_type="int, list or dict of int", + col_space="The minimum width of each column. If a list of ints is given " + "every integers corresponds with one column. If a dict is given, the key " + "references the column, while the value defines the space to use.", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_string( + self, + buf: FilePath | WriteBuffer[str] | None = None, + columns: Sequence[str] | None = None, + col_space: int | list[int] | dict[Hashable, int] | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: fmt.FormattersType | None = None, + float_format: fmt.FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + justify: str | None = None, + max_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool = False, + decimal: str = ".", + line_width: int | None = None, + min_rows: int | None = None, + max_colwidth: int | None = None, + encoding: str | None = None, + ) -> str | None: + """ + Render a DataFrame to a console-friendly tabular output. + %(shared_params)s + line_width : int, optional + Width to wrap a line in characters. + min_rows : int, optional + The number of rows to display in the console in a truncated repr + (when number of rows is above `max_rows`). + max_colwidth : int, optional + Max width to truncate each column in characters. By default, no limit. + + .. versionadded:: 1.0.0 + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + %(returns)s + See Also + -------- + to_html : Convert DataFrame to HTML. + + Examples + -------- + >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]} + >>> df = pd.DataFrame(d) + >>> print(df.to_string()) + col1 col2 + 0 1 4 + 1 2 5 + 2 3 6 + """ + from pandas import option_context + + with option_context("display.max_colwidth", max_colwidth): + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + min_rows=min_rows, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + ) + return fmt.DataFrameRenderer(formatter).to_string( + buf=buf, + encoding=encoding, + line_width=line_width, + ) + + # ---------------------------------------------------------------------- + + @property + def style(self) -> Styler: + """ + Returns a Styler object. + + Contains methods for building a styled HTML representation of the DataFrame. + + See Also + -------- + io.formats.style.Styler : Helps style a DataFrame or Series according to the + data with HTML and CSS. + """ + from pandas.io.formats.style import Styler + + return Styler(self) + + _shared_docs[ + "items" + ] = r""" + Iterate over (column name, Series) pairs. + + Iterates over the DataFrame columns, returning a tuple with + the column name and the content as a Series. + + Yields + ------ + label : object + The column names for the DataFrame being iterated over. + content : Series + The column entries belonging to each label, as a Series. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as + (index, Series) pairs. + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples + of the values. + + Examples + -------- + >>> df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'], + ... 'population': [1864, 22000, 80000]}, + ... index=['panda', 'polar', 'koala']) + >>> df + species population + panda bear 1864 + polar bear 22000 + koala marsupial 80000 + >>> for label, content in df.items(): + ... print(f'label: {label}') + ... print(f'content: {content}', sep='\n') + ... + label: species + content: + panda bear + polar bear + koala marsupial + Name: species, dtype: object + label: population + content: + panda 1864 + polar 22000 + koala 80000 + Name: population, dtype: int64 + """ + + @Appender(_shared_docs["items"]) + def items(self) -> Iterable[tuple[Hashable, Series]]: + if self.columns.is_unique and hasattr(self, "_item_cache"): + for k in self.columns: + yield k, self._get_item_cache(k) + else: + for i, k in enumerate(self.columns): + yield k, self._ixs(i, axis=1) + + _shared_docs[ + "iteritems" + ] = r""" + Iterate over (column name, Series) pairs. + + .. deprecated:: 1.5.0 + iteritems is deprecated and will be removed in a future version. + Use .items instead. + + Iterates over the DataFrame columns, returning a tuple with + the column name and the content as a Series. + + Yields + ------ + label : object + The column names for the DataFrame being iterated over. + content : Series + The column entries belonging to each label, as a Series. + + See Also + -------- + DataFrame.iter : Recommended alternative. + DataFrame.iterrows : Iterate over DataFrame rows as + (index, Series) pairs. + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples + of the values. + """ + + @Appender(_shared_docs["iteritems"]) + def iteritems(self) -> Iterable[tuple[Hashable, Series]]: + warnings.warn( + "iteritems is deprecated and will be removed in a future version. " + "Use .items instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + yield from self.items() + + def iterrows(self) -> Iterable[tuple[Hashable, Series]]: + """ + Iterate over DataFrame rows as (index, Series) pairs. + + Yields + ------ + index : label or tuple of label + The index of the row. A tuple for a `MultiIndex`. + data : Series + The data of the row as a Series. + + See Also + -------- + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + 1. Because ``iterrows`` returns a Series for each row, + it does **not** preserve dtypes across the rows (dtypes are + preserved across columns for DataFrames). For example, + + >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) + >>> row = next(df.iterrows())[1] + >>> row + int 1.0 + float 1.5 + Name: 0, dtype: float64 + >>> print(row['int'].dtype) + float64 + >>> print(df['int'].dtype) + int64 + + To preserve dtypes while iterating over the rows, it is better + to use :meth:`itertuples` which returns namedtuples of the values + and which is generally faster than ``iterrows``. + + 2. You should **never modify** something you are iterating over. + This is not guaranteed to work in all cases. Depending on the + data types, the iterator returns a copy and not a view, and writing + to it will have no effect. + """ + columns = self.columns + klass = self._constructor_sliced + for k, v in zip(self.index, self.values): + s = klass(v, index=columns, name=k).__finalize__(self) + yield k, s + + def itertuples( + self, index: bool = True, name: str | None = "Pandas" + ) -> Iterable[tuple[Any, ...]]: + """ + Iterate over DataFrame rows as namedtuples. + + Parameters + ---------- + index : bool, default True + If True, return the index as the first element of the tuple. + name : str or None, default "Pandas" + The name of the returned namedtuples or None to return regular + tuples. + + Returns + ------- + iterator + An object to iterate over namedtuples for each row in the + DataFrame with the first field possibly being the index and + following fields being the column values. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) + pairs. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + The column names will be renamed to positional names if they are + invalid Python identifiers, repeated, or start with an underscore. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, + ... index=['dog', 'hawk']) + >>> df + num_legs num_wings + dog 4 0 + hawk 2 2 + >>> for row in df.itertuples(): + ... print(row) + ... + Pandas(Index='dog', num_legs=4, num_wings=0) + Pandas(Index='hawk', num_legs=2, num_wings=2) + + By setting the `index` parameter to False we can remove the index + as the first element of the tuple: + + >>> for row in df.itertuples(index=False): + ... print(row) + ... + Pandas(num_legs=4, num_wings=0) + Pandas(num_legs=2, num_wings=2) + + With the `name` parameter set we set a custom name for the yielded + namedtuples: + + >>> for row in df.itertuples(name='Animal'): + ... print(row) + ... + Animal(Index='dog', num_legs=4, num_wings=0) + Animal(Index='hawk', num_legs=2, num_wings=2) + """ + arrays = [] + fields = list(self.columns) + if index: + arrays.append(self.index) + fields.insert(0, "Index") + + # use integer indexing because of possible duplicate column names + arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) + + if name is not None: + # https://github.com/python/mypy/issues/9046 + # error: namedtuple() expects a string literal as the first argument + itertuple = collections.namedtuple( # type: ignore[misc] + name, fields, rename=True + ) + return map(itertuple._make, zip(*arrays)) + + # fallback to regular tuples + return zip(*arrays) + + def __len__(self) -> int: + """ + Returns length of info axis, but here we use the index. + """ + return len(self.index) + + @overload + def dot(self, other: Series) -> Series: + ... + + @overload + def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame: + ... + + def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: + """ + Compute the matrix multiplication between the DataFrame and other. + + This method computes the matrix product between the DataFrame and the + values of an other Series, DataFrame or a numpy array. + + It can also be called using ``self @ other`` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the matrix product with. + + Returns + ------- + Series or DataFrame + If other is a Series, return the matrix product between self and + other as a Series. If other is a DataFrame or a numpy.array, return + the matrix product of self and other in a DataFrame of a np.array. + + See Also + -------- + Series.dot: Similar method for Series. + + Notes + ----- + The dimensions of DataFrame and other must be compatible in order to + compute the matrix multiplication. In addition, the column names of + DataFrame and the index of other must contain the same values, as they + will be aligned prior to the multiplication. + + The dot method for Series computes the inner product, instead of the + matrix product here. + + Examples + -------- + Here we multiply a DataFrame with a Series. + + >>> df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + >>> s = pd.Series([1, 1, 2, 1]) + >>> df.dot(s) + 0 -4 + 1 5 + dtype: int64 + + Here we multiply a DataFrame with another DataFrame. + + >>> other = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(other) + 0 1 + 0 1 4 + 1 2 2 + + Note that the dot method give the same result as @ + + >>> df @ other + 0 1 + 0 1 4 + 1 2 2 + + The dot method works also if other is an np.array. + + >>> arr = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(arr) + 0 1 + 0 1 4 + 1 2 2 + + Note how shuffling of the objects does not change the result. + + >>> s2 = s.reindex([1, 0, 2, 3]) + >>> df.dot(s2) + 0 -4 + 1 5 + dtype: int64 + """ + if isinstance(other, (Series, DataFrame)): + common = self.columns.union(other.index) + if len(common) > len(self.columns) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(columns=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right._values + else: + left = self + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[1] != rvals.shape[0]: + raise ValueError( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, DataFrame): + return self._constructor( + np.dot(lvals, rvals), index=left.index, columns=other.columns + ) + elif isinstance(other, Series): + return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) + elif isinstance(rvals, (np.ndarray, Index)): + result = np.dot(lvals, rvals) + if result.ndim == 2: + return self._constructor(result, index=left.index) + else: + return self._constructor_sliced(result, index=left.index) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + @overload + def __matmul__(self, other: Series) -> Series: + ... + + @overload + def __matmul__(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: + ... + + def __matmul__(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other) -> DataFrame: + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + try: + return self.T.dot(np.transpose(other)).T + except ValueError as err: + if "shape mismatch" not in str(err): + raise + # GH#21581 give exception message for original shapes + msg = f"shapes {np.shape(other)} and {self.shape} not aligned" + raise ValueError(msg) from err + + # ---------------------------------------------------------------------- + # IO methods (to / from other formats) + + @classmethod + def from_dict( + cls, + data: dict, + orient: str = "columns", + dtype: Dtype | None = None, + columns: Axes | None = None, + ) -> DataFrame: + """ + Construct DataFrame from dict of array-like or dicts. + + Creates DataFrame object from dictionary by columns or by index + allowing dtype specification. + + Parameters + ---------- + data : dict + Of the form {field : array-like} or {field : dict}. + orient : {'columns', 'index', 'tight'}, default 'columns' + The "orientation" of the data. If the keys of the passed dict + should be the columns of the resulting DataFrame, pass 'columns' + (default). Otherwise if the keys should be rows, pass 'index'. + If 'tight', assume a dict with keys ['index', 'columns', 'data', + 'index_names', 'column_names']. + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + + dtype : dtype, default None + Data type to force, otherwise infer. + columns : list, default None + Column labels to use when ``orient='index'``. Raises a ValueError + if used with ``orient='columns'`` or ``orient='tight'``. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.from_records : DataFrame from structured ndarray, sequence + of tuples or dicts, or DataFrame. + DataFrame : DataFrame object creation using constructor. + DataFrame.to_dict : Convert the DataFrame to a dictionary. + + Examples + -------- + By default the keys of the dict become the DataFrame columns: + + >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Specify ``orient='index'`` to create the DataFrame using dictionary + keys as rows: + + >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data, orient='index') + 0 1 2 3 + row_1 3 2 1 0 + row_2 a b c d + + When using the 'index' orientation, the column names can be + specified manually: + + >>> pd.DataFrame.from_dict(data, orient='index', + ... columns=['A', 'B', 'C', 'D']) + A B C D + row_1 3 2 1 0 + row_2 a b c d + + Specify ``orient='tight'`` to create the DataFrame using a 'tight' + format: + + >>> data = {'index': [('a', 'b'), ('a', 'c')], + ... 'columns': [('x', 1), ('y', 2)], + ... 'data': [[1, 3], [2, 4]], + ... 'index_names': ['n1', 'n2'], + ... 'column_names': ['z1', 'z2']} + >>> pd.DataFrame.from_dict(data, orient='tight') + z1 x y + z2 1 2 + n1 n2 + a b 1 3 + c 2 4 + """ + index = None + orient = orient.lower() + if orient == "index": + if len(data) > 0: + # TODO speed up Series case + if isinstance(list(data.values())[0], (Series, dict)): + data = _from_nested_dict(data) + else: + index = list(data.keys()) + # error: Incompatible types in assignment (expression has type + # "List[Any]", variable has type "Dict[Any, Any]") + data = list(data.values()) # type: ignore[assignment] + elif orient == "columns" or orient == "tight": + if columns is not None: + raise ValueError(f"cannot use columns parameter with orient='{orient}'") + else: # pragma: no cover + raise ValueError( + f"Expected 'index', 'columns' or 'tight' for orient parameter. " + f"Got '{orient}' instead" + ) + + if orient != "tight": + return cls(data, index=index, columns=columns, dtype=dtype) + else: + realdata = data["data"] + + def create_index(indexlist, namelist): + index: Index + if len(namelist) > 1: + index = MultiIndex.from_tuples(indexlist, names=namelist) + else: + index = Index(indexlist, name=namelist[0]) + return index + + index = create_index(data["index"], data["index_names"]) + columns = create_index(data["columns"], data["column_names"]) + return cls(realdata, index=index, columns=columns, dtype=dtype) + + def to_numpy( + self, + dtype: npt.DTypeLike | None = None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Convert the DataFrame to a NumPy array. + + By default, the dtype of the returned array will be the common NumPy + dtype of all types in the DataFrame. For example, if the dtypes are + ``float16`` and ``float32``, the results dtype will be ``float32``. + This may require copying data and coercing values, which may be + expensive. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the dtypes of the DataFrame columns. + + .. versionadded:: 1.1.0 + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.to_numpy : Similar method for Series. + + Examples + -------- + >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy() + array([[1, 3], + [2, 4]]) + + With heterogeneous data, the lowest common type will have to + be used. + + >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) + >>> df.to_numpy() + array([[1. , 3. ], + [2. , 4.5]]) + + For a mix of numeric and non-numeric types, the output array will + have object dtype. + + >>> df['C'] = pd.date_range('2000', periods=2) + >>> df.to_numpy() + array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], + [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) + """ + self._consolidate_inplace() + if dtype is not None: + dtype = np.dtype(dtype) + result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) + if result.dtype is not dtype: + result = np.array(result, dtype=dtype, copy=False) + + return result + + @overload + def to_dict( + self, + orient: Literal["dict", "list", "series", "split", "tight", "index"] = ..., + into: type[dict] = ..., + ) -> dict: + ... + + @overload + def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]: + ... + + def to_dict( + self, + orient: Literal[ + "dict", "list", "series", "split", "tight", "records", "index" + ] = "dict", + into: type[dict] = dict, + ) -> dict | list[dict]: + """ + Convert the DataFrame to a dictionary. + + The type of the key-value pairs can be customized with the parameters + (see below). + + Parameters + ---------- + orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'} + Determines the type of the values of the dictionary. + + - 'dict' (default) : dict like {column -> {index -> value}} + - 'list' : dict like {column -> [values]} + - 'series' : dict like {column -> Series(values)} + - 'split' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} + - 'tight' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values], + 'index_names' -> [index.names], 'column_names' -> [column.names]} + - 'records' : list like + [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + + Abbreviations are allowed. `s` indicates `series` and `sp` + indicates `split`. + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + + into : class, default dict + The collections.abc.Mapping subclass used for all Mappings + in the return value. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + Returns + ------- + dict, list or collections.abc.Mapping + Return a collections.abc.Mapping object representing the DataFrame. + The resulting transformation depends on the `orient` parameter. + + See Also + -------- + DataFrame.from_dict: Create a DataFrame from a dictionary. + DataFrame.to_json: Convert a DataFrame to JSON format. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], + ... 'col2': [0.5, 0.75]}, + ... index=['row1', 'row2']) + >>> df + col1 col2 + row1 1 0.50 + row2 2 0.75 + >>> df.to_dict() + {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}} + + You can specify the return orientation. + + >>> df.to_dict('series') + {'col1': row1 1 + row2 2 + Name: col1, dtype: int64, + 'col2': row1 0.50 + row2 0.75 + Name: col2, dtype: float64} + + >>> df.to_dict('split') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]]} + + >>> df.to_dict('records') + [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}] + + >>> df.to_dict('index') + {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} + + >>> df.to_dict('tight') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]} + + You can also specify the mapping type. + + >>> from collections import OrderedDict, defaultdict + >>> df.to_dict(into=OrderedDict) + OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])), + ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))]) + + If you want a `defaultdict`, you need to initialize it: + + >>> dd = defaultdict(list) + >>> df.to_dict('records', into=dd) + [defaultdict(, {'col1': 1, 'col2': 0.5}), + defaultdict(, {'col1': 2, 'col2': 0.75})] + """ + if not self.columns.is_unique: + warnings.warn( + "DataFrame columns are not unique, some columns will be omitted.", + UserWarning, + stacklevel=find_stack_level(), + ) + # GH16122 + into_c = com.standardize_mapping(into) + + # error: Incompatible types in assignment (expression has type "str", + # variable has type "Literal['dict', 'list', 'series', 'split', 'tight', + # 'records', 'index']") + orient = orient.lower() # type: ignore[assignment] + # GH32515 + if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { + "dict", + "list", + "series", + "split", + "records", + "index", + }: + warnings.warn( + "Using short name for 'orient' is deprecated. Only the " + "options: ('dict', list, 'series', 'split', 'records', 'index') " + "will be used in a future version. Use one of the above " + "to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if orient.startswith("d"): + orient = "dict" + elif orient.startswith("l"): + orient = "list" + elif orient.startswith("sp"): + orient = "split" + elif orient.startswith("s"): + orient = "series" + elif orient.startswith("r"): + orient = "records" + elif orient.startswith("i"): + orient = "index" + + if orient == "dict": + return into_c((k, v.to_dict(into)) for k, v in self.items()) + + elif orient == "list": + return into_c( + (k, list(map(maybe_box_native, v.tolist()))) for k, v in self.items() + ) + + elif orient == "split": + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ) + ) + + elif orient == "tight": + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ("index_names", list(self.index.names)), + ("column_names", list(self.columns.names)), + ) + ) + + elif orient == "series": + return into_c((k, v) for k, v in self.items()) + + elif orient == "records": + columns = self.columns.tolist() + rows = ( + dict(zip(columns, row)) + for row in self.itertuples(index=False, name=None) + ) + return [ + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows + ] + + elif orient == "index": + if not self.index.is_unique: + raise ValueError("DataFrame index must be unique for orient='index'.") + return into_c( + (t[0], dict(zip(self.columns, map(maybe_box_native, t[1:])))) + for t in self.itertuples(name=None) + ) + + else: + raise ValueError(f"orient '{orient}' not understood") + + def to_gbq( + self, + destination_table: str, + project_id: str | None = None, + chunksize: int | None = None, + reauth: bool = False, + if_exists: str = "fail", + auth_local_webserver: bool = True, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, + progress_bar: bool = True, + credentials=None, + ) -> None: + """ + Write a DataFrame to a Google BigQuery table. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + destination_table : str + Name of table to be written, in the form ``dataset.tablename``. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + chunksize : int, optional + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists raise pandas_gbq.gbq.TableCreationError. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + auth_local_webserver : bool, default True + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + + .. versionchanged:: 1.5.0 + Default value is changed to ``True``. Google has deprecated the + ``auth_local_webserver = False`` `"out of band" (copy-paste) + flow + `_. + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. + + *New in version 0.3.1 of pandas-gbq*. + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + *New in version 0.5.0 of pandas-gbq*. + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + *New in version 0.5.0 of pandas-gbq*. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to + override default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service + Account :class:`google.oauth2.service_account.Credentials` + directly. + + *New in version 0.8.0 of pandas-gbq*. + + See Also + -------- + pandas_gbq.to_gbq : This function in the pandas-gbq library. + read_gbq : Read a DataFrame from Google BigQuery. + """ + from pandas.io import gbq + + gbq.to_gbq( + self, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + ) + + @classmethod + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float: bool = False, + nrows: int | None = None, + ) -> DataFrame: + """ + Convert structured or record ndarray to DataFrame. + + Creates a DataFrame object from a structured ndarray, sequence of + tuples or dicts, or DataFrame. + + Parameters + ---------- + data : structured ndarray, sequence of tuples or dicts, or DataFrame + Structured input data. + index : str, list of fields, array-like + Field of array to use as the index, alternately a specific set of + input labels to use. + exclude : sequence, default None + Columns or fields to exclude. + columns : sequence, default None + Column names to use. If the passed data do not have names + associated with them, this argument provides names for the + columns. Otherwise this argument indicates the order of the columns + in the result (any names not found in the data will become all-NA + columns). + coerce_float : bool, default False + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + nrows : int, default None + Number of rows to read if data is an iterator. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.from_dict : DataFrame from dict of array-like or dicts. + DataFrame : DataFrame object creation using constructor. + + Examples + -------- + Data can be provided as a structured ndarray: + + >>> data = np.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')], + ... dtype=[('col_1', 'i4'), ('col_2', 'U1')]) + >>> pd.DataFrame.from_records(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Data can be provided as a list of dicts: + + >>> data = [{'col_1': 3, 'col_2': 'a'}, + ... {'col_1': 2, 'col_2': 'b'}, + ... {'col_1': 1, 'col_2': 'c'}, + ... {'col_1': 0, 'col_2': 'd'}] + >>> pd.DataFrame.from_records(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Data can be provided as a list of tuples with corresponding columns: + + >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')] + >>> pd.DataFrame.from_records(data, columns=['col_1', 'col_2']) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + """ + result_index = None + + # Make a copy of the input columns so we can modify it + if columns is not None: + columns = ensure_index(columns) + + def maybe_reorder( + arrays: list[ArrayLike], arr_columns: Index, columns: Index, index + ) -> tuple[list[ArrayLike], Index, Index | None]: + """ + If our desired 'columns' do not match the data's pre-existing 'arr_columns', + we re-order our arrays. This is like a pre-emptive (cheap) reindex. + """ + if len(arrays): + length = len(arrays[0]) + else: + length = 0 + + result_index = None + if len(arrays) == 0 and index is None and length == 0: + # for backward compat use an object Index instead of RangeIndex + result_index = Index([]) + + arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) + return arrays, arr_columns, result_index + + if is_iterator(data): + if nrows == 0: + return cls() + + try: + first_row = next(data) + except StopIteration: + return cls(index=index, columns=columns) + + dtype = None + if hasattr(first_row, "dtype") and first_row.dtype.names: + dtype = first_row.dtype + + values = [first_row] + + if nrows is None: + values += data + else: + values.extend(itertools.islice(data, nrows - 1)) + + if dtype is not None: + data = np.array(values, dtype=dtype) + else: + data = values + + if isinstance(data, dict): + if columns is None: + columns = arr_columns = ensure_index(sorted(data)) + arrays = [data[k] for k in columns] + else: + arrays = [] + arr_columns_list = [] + for k, v in data.items(): + if k in columns: + arr_columns_list.append(k) + arrays.append(v) + + arr_columns = Index(arr_columns_list) + arrays, arr_columns, result_index = maybe_reorder( + arrays, arr_columns, columns, index + ) + + elif isinstance(data, (np.ndarray, DataFrame)): + arrays, columns = to_arrays(data, columns) + arr_columns = columns + else: + arrays, arr_columns = to_arrays(data, columns) + if coerce_float: + for i, arr in enumerate(arrays): + if arr.dtype == object: + # error: Argument 1 to "maybe_convert_objects" has + # incompatible type "Union[ExtensionArray, ndarray]"; + # expected "ndarray" + arrays[i] = lib.maybe_convert_objects( + arr, # type: ignore[arg-type] + try_float=True, + ) + + arr_columns = ensure_index(arr_columns) + if columns is None: + columns = arr_columns + else: + arrays, arr_columns, result_index = maybe_reorder( + arrays, arr_columns, columns, index + ) + + if exclude is None: + exclude = set() + else: + exclude = set(exclude) + + if index is not None: + if isinstance(index, str) or not hasattr(index, "__iter__"): + i = columns.get_loc(index) + exclude.add(index) + if len(arrays) > 0: + result_index = Index(arrays[i], name=index) + else: + result_index = Index([], name=index) + else: + try: + index_data = [arrays[arr_columns.get_loc(field)] for field in index] + except (KeyError, TypeError): + # raised by get_loc, see GH#29258 + result_index = index + else: + result_index = ensure_index_from_sequences(index_data, names=index) + exclude.update(index) + + if any(exclude): + arr_exclude = [x for x in exclude if x in arr_columns] + to_remove = [arr_columns.get_loc(col) for col in arr_exclude] + arrays = [v for i, v in enumerate(arrays) if i not in to_remove] + + columns = columns.drop(exclude) + + manager = get_option("mode.data_manager") + mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) + + return cls(mgr) + + def to_records( + self, index: bool = True, column_dtypes=None, index_dtypes=None + ) -> np.recarray: + """ + Convert DataFrame to a NumPy record array. + + Index will be included as the first field of the record array if + requested. + + Parameters + ---------- + index : bool, default True + Include index in resulting record array, stored in 'index' + field or using the index label, if set. + column_dtypes : str, type, dict, default None + If a string or type, the data type to store all columns. If + a dictionary, a mapping of column names and indices (zero-indexed) + to specific data types. + index_dtypes : str, type, dict, default None + If a string or type, the data type to store all index levels. If + a dictionary, a mapping of index level names and indices + (zero-indexed) to specific data types. + + This mapping is applied only if `index=True`. + + Returns + ------- + numpy.recarray + NumPy ndarray with the DataFrame labels as fields and each row + of the DataFrame as entries. + + See Also + -------- + DataFrame.from_records: Convert structured or record ndarray + to DataFrame. + numpy.recarray: An ndarray that allows field access using + attributes, analogous to typed columns in a + spreadsheet. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2], 'B': [0.5, 0.75]}, + ... index=['a', 'b']) + >>> df + A B + a 1 0.50 + b 2 0.75 + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('index', 'O'), ('A', '>> df.index = df.index.rename("I") + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index=False) + rec.array([(1, 0.5 ), (2, 0.75)], + dtype=[('A', '>> df.to_records(column_dtypes={"A": "int32"}) + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index_dtypes=">> index_dtypes = f">> df.to_records(index_dtypes=index_dtypes) + rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], + dtype=[('I', 'S1'), ('A', ' DataFrame: + """ + Create DataFrame from a list of arrays corresponding to the columns. + + Parameters + ---------- + arrays : list-like of arrays + Each array in the list corresponds to one column, in order. + columns : list-like, Index + The column names for the resulting DataFrame. + index : list-like, Index + The rows labels for the resulting DataFrame. + dtype : dtype, optional + Optional dtype to enforce for all arrays. + verify_integrity : bool, default True + Validate and homogenize all input. If set to False, it is assumed + that all elements of `arrays` are actual arrays how they will be + stored in a block (numpy ndarray or ExtensionArray), have the same + length as and are aligned with the index, and that `columns` and + `index` are ensured to be an Index object. + + Returns + ------- + DataFrame + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + manager = get_option("mode.data_manager") + columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(columns) must match len(arrays)") + mgr = arrays_to_mgr( + arrays, + columns, + index, + dtype=dtype, + verify_integrity=verify_integrity, + typ=manager, + ) + return cls(mgr) + + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path", + ) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "path"]) + def to_stata( + self, + path: FilePath | WriteBuffer[bytes], + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + version: int | None = 114, + convert_strl: Sequence[Hashable] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float, str]] | None = None, + ) -> None: + """ + Export DataFrame object to Stata dta format. + + Writes the DataFrame to a Stata dataset file. + "dta" files contain a Stata dataset. + + Parameters + ---------- + path : str, path object, or buffer + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + + convert_dates : dict + Dictionary mapping columns containing datetime types to stata + internal format to use when writing the dates. Options are 'tc', + 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer + or a name. Datetime columns that do not have a conversion type + specified will be converted to 'tc'. Raises NotImplementedError if + a datetime column has timezone information. + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder`. + time_stamp : datetime + A datetime to use as file creation date. Default is the current + time. + data_label : str, optional + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as + values. Each label must be 80 characters or smaller. + version : {{114, 117, 118, 119, None}}, default 114 + Version to use in the output dta file. Set to None to let pandas + decide between 118 or 119 formats depending on the number of + columns in the frame. Version 114 can be read by Stata 10 and + later. Version 117 can be read by Stata 13 or later. Version 118 + is supported in Stata 14 and later. Version 119 is supported in + Stata 15 and later. Version 114 limits string variables to 244 + characters or fewer while versions 117 and later allow strings + with lengths up to 2,000,000 characters. Versions 118 and 119 + support Unicode characters, and version 119 supports more than + 32,767 variables. + + Version 119 should usually only be used when the number of + variables exceeds the capacity of dta format 118. Exporting + smaller datasets in format 119 may have unintended consequences, + and, as of November 2020, Stata SE cannot read version 119 files. + + .. versionchanged:: 1.0.0 + + Added support for formats 118 and 119. + + convert_strl : list, optional + List of column names to convert to string columns to Stata StrL + format. Only available if version is 117. Storing strings in the + StrL format can produce smaller dta files if strings have more than + 8 characters and values are repeated. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. Labels for a single variable must be 32,000 + characters or smaller. + + .. versionadded:: 1.4.0 + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + * Column dtype is not representable in Stata + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + See Also + -------- + read_stata : Import Stata data files. + io.stata.StataWriter : Low-level writer for Stata data files. + io.stata.StataWriter117 : Low-level writer for version 117 files. + + Examples + -------- + >>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', + ... 'parrot'], + ... 'speed': [350, 18, 361, 15]}}) + >>> df.to_stata('animals.dta') # doctest: +SKIP + """ + if version not in (114, 117, 118, 119, None): + raise ValueError("Only formats 114, 117, 118 and 119 are supported.") + if version == 114: + if convert_strl is not None: + raise ValueError("strl is not supported in format 114") + from pandas.io.stata import StataWriter as statawriter + elif version == 117: + # mypy: Name 'statawriter' already defined (possibly by an import) + from pandas.io.stata import ( # type: ignore[no-redef] + StataWriter117 as statawriter, + ) + else: # versions 118 and 119 + # mypy: Name 'statawriter' already defined (possibly by an import) + from pandas.io.stata import ( # type: ignore[no-redef] + StataWriterUTF8 as statawriter, + ) + + kwargs: dict[str, Any] = {} + if version is None or version >= 117: + # strl conversion is only supported >= 117 + kwargs["convert_strl"] = convert_strl + if version is None or version >= 118: + # Specifying the version is only supported for UTF8 (118 or 119) + kwargs["version"] = version + + writer = statawriter( + path, + self, + convert_dates=convert_dates, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + write_index=write_index, + variable_labels=variable_labels, + compression=compression, + storage_options=storage_options, + value_labels=value_labels, + **kwargs, + ) + writer.write_file() + + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: + """ + Write a DataFrame to the binary Feather format. + + Parameters + ---------- + path : str, path object, file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If a string or a path, + it will be used as Root Directory path when writing a partitioned dataset. + **kwargs : + Additional keywords passed to :func:`pyarrow.feather.write_feather`. + Starting with pyarrow 0.17, this includes the `compression`, + `compression_level`, `chunksize` and `version` keywords. + + .. versionadded:: 1.1.0 + + Notes + ----- + This function writes the dataframe as a `feather file + `_. Requires a default + index. For saving the DataFrame with your custom index use a method that + supports custom indices e.g. `to_parquet`. + """ + from pandas.io.feather_format import to_feather + + to_feather(self, path, **kwargs) + + @doc( + Series.to_markdown, + klass=_shared_doc_kwargs["klass"], + storage_options=_shared_docs["storage_options"], + examples="""Examples + -------- + >>> df = pd.DataFrame( + ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} + ... ) + >>> print(df.to_markdown()) + | | animal_1 | animal_2 | + |---:|:-----------|:-----------| + | 0 | elk | dog | + | 1 | pig | quetzal | + + Output markdown with a tabulate option. + + >>> print(df.to_markdown(tablefmt="grid")) + +----+------------+------------+ + | | animal_1 | animal_2 | + +====+============+============+ + | 0 | elk | dog | + +----+------------+------------+ + | 1 | pig | quetzal | + +----+------------+------------+""", + ) + def to_markdown( + self, + buf: FilePath | WriteBuffer[str] | None = None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, + ) -> str | None: + if "showindex" in kwargs: + warnings.warn( + "'showindex' is deprecated. Only 'index' will be used " + "in a future version. Use 'index' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") + kwargs.setdefault("showindex", index) + tabulate = import_optional_dependency("tabulate") + result = tabulate.tabulate(self, **kwargs) + if buf is None: + return result + + with get_handle(buf, mode, storage_options=storage_options) as handles: + handles.handle.write(result) + return None + + @overload + def to_parquet( + self, + path: None = ..., + engine: str = ..., + compression: str | None = ..., + index: bool | None = ..., + partition_cols: list[str] | None = ..., + storage_options: StorageOptions = ..., + **kwargs, + ) -> bytes: + ... + + @overload + def to_parquet( + self, + path: FilePath | WriteBuffer[bytes], + engine: str = ..., + compression: str | None = ..., + index: bool | None = ..., + partition_cols: list[str] | None = ..., + storage_options: StorageOptions = ..., + **kwargs, + ) -> None: + ... + + @doc(storage_options=_shared_docs["storage_options"]) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_parquet( + self, + path: FilePath | WriteBuffer[bytes] | None = None, + engine: str = "auto", + compression: str | None = "snappy", + index: bool | None = None, + partition_cols: list[str] | None = None, + storage_options: StorageOptions = None, + **kwargs, + ) -> bytes | None: + """ + Write a DataFrame to the binary parquet format. + + This function writes the dataframe as a `parquet file + `_. You can choose different parquet + backends, and have the option of compression. See + :ref:`the user guide ` for more details. + + Parameters + ---------- + path : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If None, the result is + returned as bytes. If a string or path, it will be used as Root Directory + path when writing a partitioned dataset. + + .. versionchanged:: 1.2.0 + + Previously this was "fname" + + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. + If ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + partition_cols : list, optional, default None + Column names by which to partition the dataset. + Columns are partitioned in the order they are given. + Must be None if path is not a string. + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs + Additional arguments passed to the parquet library. See + :ref:`pandas io ` for more details. + + Returns + ------- + bytes if no path argument is provided else None + + See Also + -------- + read_parquet : Read a parquet file. + DataFrame.to_orc : Write an orc file. + DataFrame.to_csv : Write a csv file. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_hdf : Write to hdf. + + Notes + ----- + This function requires either the `fastparquet + `_ or `pyarrow + `_ library. + + Examples + -------- + >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}}) + >>> df.to_parquet('df.parquet.gzip', + ... compression='gzip') # doctest: +SKIP + >>> pd.read_parquet('df.parquet.gzip') # doctest: +SKIP + col1 col2 + 0 1 3 + 1 2 4 + + If you want to get a buffer to the parquet content you can use a io.BytesIO + object, as long as you don't use partition_cols, which creates multiple files. + + >>> import io + >>> f = io.BytesIO() + >>> df.to_parquet(f) + >>> f.seek(0) + 0 + >>> content = f.read() + """ + from pandas.io.parquet import to_parquet + + return to_parquet( + self, + path, + engine, + compression=compression, + index=index, + partition_cols=partition_cols, + storage_options=storage_options, + **kwargs, + ) + + def to_orc( + self, + path: FilePath | WriteBuffer[bytes] | None = None, + *, + engine: Literal["pyarrow"] = "pyarrow", + index: bool | None = None, + engine_kwargs: dict[str, Any] | None = None, + ) -> bytes | None: + """ + Write a DataFrame to the ORC format. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + path : str, file-like object or None, default None + If a string, it will be used as Root Directory path + when writing a partitioned dataset. By file-like object, + we refer to objects with a write() method, such as a file handle + (e.g. via builtin open function). If path is None, + a bytes object is returned. + engine : str, default 'pyarrow' + ORC library to use. Pyarrow must be >= 7.0.0. + index : bool, optional + If ``True``, include the dataframe's index(es) in the file output. + If ``False``, they will not be written to the file. + If ``None``, similar to ``infer`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + engine_kwargs : dict[str, Any] or None, default None + Additional keyword arguments passed to :func:`pyarrow.orc.write_table`. + + Returns + ------- + bytes if no path argument is provided else None + + Raises + ------ + NotImplementedError + Dtype of one or more columns is category, unsigned integers, interval, + period or sparse. + ValueError + engine is not pyarrow. + + See Also + -------- + read_orc : Read a ORC file. + DataFrame.to_parquet : Write a parquet file. + DataFrame.to_csv : Write a csv file. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_hdf : Write to hdf. + + Notes + ----- + * Before using this function you should read the :ref:`user guide about + ORC ` and :ref:`install optional dependencies `. + * This function requires `pyarrow `_ + library. + * For supported dtypes please refer to `supported ORC features in Arrow + `__. + * Currently timezones in datetime columns are not preserved when a + dataframe is converted into ORC files. + + Examples + -------- + >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [4, 3]}) + >>> df.to_orc('df.orc') # doctest: +SKIP + >>> pd.read_orc('df.orc') # doctest: +SKIP + col1 col2 + 0 1 4 + 1 2 3 + + If you want to get a buffer to the orc content you can write it to io.BytesIO + >>> import io + >>> b = io.BytesIO(df.to_orc()) # doctest: +SKIP + >>> b.seek(0) # doctest: +SKIP + 0 + >>> content = b.read() # doctest: +SKIP + """ + from pandas.io.orc import to_orc + + return to_orc( + self, path, engine=engine, index=index, engine_kwargs=engine_kwargs + ) + + @overload + def to_html( + self, + buf: FilePath | WriteBuffer[str], + columns: Sequence[Level] | None = ..., + col_space: ColspaceArgType | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: FormattersType | None = ..., + float_format: FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool | str = ..., + decimal: str = ..., + bold_rows: bool = ..., + classes: str | list | tuple | None = ..., + escape: bool = ..., + notebook: bool = ..., + border: int | bool | None = ..., + table_id: str | None = ..., + render_links: bool = ..., + encoding: str | None = ..., + ) -> None: + ... + + @overload + def to_html( + self, + buf: None = ..., + columns: Sequence[Level] | None = ..., + col_space: ColspaceArgType | None = ..., + header: bool | Sequence[str] = ..., + index: bool = ..., + na_rep: str = ..., + formatters: FormattersType | None = ..., + float_format: FloatFormatType | None = ..., + sparsify: bool | None = ..., + index_names: bool = ..., + justify: str | None = ..., + max_rows: int | None = ..., + max_cols: int | None = ..., + show_dimensions: bool | str = ..., + decimal: str = ..., + bold_rows: bool = ..., + classes: str | list | tuple | None = ..., + escape: bool = ..., + notebook: bool = ..., + border: int | bool | None = ..., + table_id: str | None = ..., + render_links: bool = ..., + encoding: str | None = ..., + ) -> str: + ... + + @Substitution( + header_type="bool", + header="Whether to print column labels, default True", + col_space_type="str or int, list or dict of int or str", + col_space="The minimum width of each column in CSS length " + "units. An int is assumed to be px units.\n\n" + " .. versionadded:: 0.25.0\n" + " Ability to use str", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + columns: Sequence[Level] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: FormattersType | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + justify: str | None = None, + max_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, + decimal: str = ".", + bold_rows: bool = True, + classes: str | list | tuple | None = None, + escape: bool = True, + notebook: bool = False, + border: int | bool | None = None, + table_id: str | None = None, + render_links: bool = False, + encoding: str | None = None, + ) -> str | None: + """ + Render a DataFrame as an HTML table. + %(shared_params)s + bold_rows : bool, default True + Make the row labels bold in the output. + classes : str or list or tuple, default None + CSS class(es) to apply to the resulting html table. + escape : bool, default True + Convert the characters <, >, and & to HTML-safe sequences. + notebook : {True, False}, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + `` tag. Default ``pd.options.display.html.border``. + table_id : str, optional + A css id is included in the opening `
    ` tag if specified. + render_links : bool, default False + Convert URLs to HTML links. + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + %(returns)s + See Also + -------- + to_string : Convert DataFrame to a string. + """ + if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: + raise ValueError("Invalid value for justify parameter") + + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + header=header, + index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + justify=justify, + index_names=index_names, + escape=escape, + decimal=decimal, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + ) + # TODO: a generic formatter wld b in DataFrameFormatter + return fmt.DataFrameRenderer(formatter).to_html( + buf=buf, + classes=classes, + notebook=notebook, + border=border, + encoding=encoding, + table_id=table_id, + render_links=render_links, + ) + + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buffer", + ) + def to_xml( + self, + path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + index: bool = True, + root_name: str | None = "data", + row_name: str | None = "row", + na_rep: str | None = None, + attr_cols: list[str] | None = None, + elem_cols: list[str] | None = None, + namespaces: dict[str | None, str] | None = None, + prefix: str | None = None, + encoding: str = "utf-8", + xml_declaration: bool | None = True, + pretty_print: bool | None = True, + parser: str | None = "lxml", + stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + ) -> str | None: + """ + Render a DataFrame to an XML document. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a ``write()`` function. If None, the result is returned + as a string. + index : bool, default True + Whether to include index in XML document. + root_name : str, default 'data' + The name of root element in XML document. + row_name : str, default 'row' + The name of row element in XML document. + na_rep : str, optional + Missing data representation. + attr_cols : list-like, optional + List of columns to write as attributes in row element. + Hierarchical columns will be flattened with underscore + delimiting the different levels. + elem_cols : list-like, optional + List of columns to write as children in row element. By default, + all columns output as children of row element. Hierarchical + columns will be flattened with underscore delimiting the + different levels. + namespaces : dict, optional + All namespaces to be defined in root element. Keys of dict + should be prefix names and values of dict corresponding URIs. + Default namespaces should be given empty string key. For + example, :: + + namespaces = {{"": "https://example.com"}} + + prefix : str, optional + Namespace prefix to be used for every element and/or attribute + in document. This should be one of the keys in ``namespaces`` + dict. + encoding : str, default 'utf-8' + Encoding of the resulting document. + xml_declaration : bool, default True + Whether to include the XML declaration at start of document. + pretty_print : bool, default True + Whether output should be pretty printed with indentation and + line breaks. + parser : {{'lxml','etree'}}, default 'lxml' + Parser module to use for building of tree. Only 'lxml' and + 'etree' are supported. With 'lxml', the ability to use XSLT + stylesheet is supported. + stylesheet : str, path object or file-like object, optional + A URL, file-like object, or a raw string containing an XSLT + script used to transform the raw XML output. Script should use + layout of elements and attributes from original output. This + argument requires ``lxml`` to be installed. Only XSLT 1.0 + scripts and not later versions is currently supported. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + Returns + ------- + None or str + If ``io`` is None, returns the resulting XML format as a + string. Otherwise returns None. + + See Also + -------- + to_json : Convert the pandas object to a JSON string. + to_html : Convert DataFrame to a html. + + Examples + -------- + >>> df = pd.DataFrame({{'shape': ['square', 'circle', 'triangle'], + ... 'degrees': [360, 360, 180], + ... 'sides': [4, np.nan, 3]}}) + + >>> df.to_xml() # doctest: +SKIP + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + + + + >>> df.to_xml(attr_cols=[ + ... 'index', 'shape', 'degrees', 'sides' + ... ]) # doctest: +SKIP + + + + + + + + >>> df.to_xml(namespaces={{"doc": "https://example.com"}}, + ... prefix="doc") # doctest: +SKIP + + + + 0 + square + 360 + 4.0 + + + 1 + circle + 360 + + + + 2 + triangle + 180 + 3.0 + + + """ + + from pandas.io.formats.xml import ( + EtreeXMLFormatter, + LxmlXMLFormatter, + ) + + lxml = import_optional_dependency("lxml.etree", errors="ignore") + + TreeBuilder: type[EtreeXMLFormatter] | type[LxmlXMLFormatter] + + if parser == "lxml": + if lxml is not None: + TreeBuilder = LxmlXMLFormatter + else: + raise ImportError( + "lxml not found, please install or use the etree parser." + ) + + elif parser == "etree": + TreeBuilder = EtreeXMLFormatter + + else: + raise ValueError("Values for parser can only be lxml or etree.") + + xml_formatter = TreeBuilder( + self, + path_or_buffer=path_or_buffer, + index=index, + root_name=root_name, + row_name=row_name, + na_rep=na_rep, + attr_cols=attr_cols, + elem_cols=elem_cols, + namespaces=namespaces, + prefix=prefix, + encoding=encoding, + xml_declaration=xml_declaration, + pretty_print=pretty_print, + stylesheet=stylesheet, + compression=compression, + storage_options=storage_options, + ) + + return xml_formatter.write_output() + + # ---------------------------------------------------------------------- + @doc(INFO_DOCSTRING, **frame_sub_kwargs) + def info( + self, + verbose: bool | None = None, + buf: WriteBuffer[str] | None = None, + max_cols: int | None = None, + memory_usage: bool | str | None = None, + show_counts: bool | None = None, + null_counts: bool | None = None, + ) -> None: + if null_counts is not None: + if show_counts is not None: + raise ValueError("null_counts used with show_counts. Use show_counts.") + warnings.warn( + "null_counts is deprecated. Use show_counts instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + show_counts = null_counts + info = DataFrameInfo( + data=self, + memory_usage=memory_usage, + ) + info.render( + buf=buf, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + + def memory_usage(self, index: bool = True, deep: bool = False) -> Series: + """ + Return the memory usage of each column in bytes. + + The memory usage can optionally include the contribution of + the index and elements of `object` dtype. + + This value is displayed in `DataFrame.info` by default. This can be + suppressed by setting ``pandas.options.display.memory_usage`` to False. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the DataFrame's + index in returned Series. If ``index=True``, the memory usage of + the index is the first item in the output. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned values. + + Returns + ------- + Series + A Series whose index is the original column names and whose values + is the memory usage of each column in bytes. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of an + ndarray. + Series.memory_usage : Bytes consumed by a Series. + Categorical : Memory-efficient array for string values with + many repeated values. + DataFrame.info : Concise summary of a DataFrame. + + Notes + ----- + See the :ref:`Frequently Asked Questions ` for more + details. + + Examples + -------- + >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool'] + >>> data = dict([(t, np.ones(shape=5000, dtype=int).astype(t)) + ... for t in dtypes]) + >>> df = pd.DataFrame(data) + >>> df.head() + int64 float64 complex128 object bool + 0 1 1.0 1.0+0.0j 1 True + 1 1 1.0 1.0+0.0j 1 True + 2 1 1.0 1.0+0.0j 1 True + 3 1 1.0 1.0+0.0j 1 True + 4 1 1.0 1.0+0.0j 1 True + + >>> df.memory_usage() + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + >>> df.memory_usage(index=False) + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + The memory footprint of `object` dtype columns is ignored by default: + + >>> df.memory_usage(deep=True) + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 180000 + bool 5000 + dtype: int64 + + Use a Categorical for efficient storage of an object-dtype column with + many repeated values. + + >>> df['object'].astype('category').memory_usage(deep=True) + 5244 + """ + result = self._constructor_sliced( + [c.memory_usage(index=False, deep=deep) for col, c in self.items()], + index=self.columns, + dtype=np.intp, + ) + if index: + index_memory_usage = self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ) + result = index_memory_usage._append(result) + return result + + def transpose(self, *args, copy: bool = False) -> DataFrame: + """ + Transpose index and columns. + + Reflect the DataFrame over its main diagonal by writing rows as columns + and vice-versa. The property :attr:`.T` is an accessor to the method + :meth:`transpose`. + + Parameters + ---------- + *args : tuple, optional + Accepted for compatibility with NumPy. + copy : bool, default False + Whether to copy the data after transposing, even for DataFrames + with a single dtype. + + Note that a copy is always required for mixed dtype DataFrames, + or for DataFrames with any extension types. + + Returns + ------- + DataFrame + The transposed DataFrame. + + See Also + -------- + numpy.transpose : Permute the dimensions of a given array. + + Notes + ----- + Transposing a DataFrame with mixed dtypes will result in a homogeneous + DataFrame with the `object` dtype. In such a case, a copy of the data + is always made. + + Examples + -------- + **Square DataFrame with homogeneous dtype** + + >>> d1 = {'col1': [1, 2], 'col2': [3, 4]} + >>> df1 = pd.DataFrame(data=d1) + >>> df1 + col1 col2 + 0 1 3 + 1 2 4 + + >>> df1_transposed = df1.T # or df1.transpose() + >>> df1_transposed + 0 1 + col1 1 2 + col2 3 4 + + When the dtype is homogeneous in the original DataFrame, we get a + transposed DataFrame with the same dtype: + + >>> df1.dtypes + col1 int64 + col2 int64 + dtype: object + >>> df1_transposed.dtypes + 0 int64 + 1 int64 + dtype: object + + **Non-square DataFrame with mixed dtypes** + + >>> d2 = {'name': ['Alice', 'Bob'], + ... 'score': [9.5, 8], + ... 'employed': [False, True], + ... 'kids': [0, 0]} + >>> df2 = pd.DataFrame(data=d2) + >>> df2 + name score employed kids + 0 Alice 9.5 False 0 + 1 Bob 8.0 True 0 + + >>> df2_transposed = df2.T # or df2.transpose() + >>> df2_transposed + 0 1 + name Alice Bob + score 9.5 8.0 + employed False True + kids 0 0 + + When the DataFrame has mixed dtypes, we get a transposed DataFrame with + the `object` dtype: + + >>> df2.dtypes + name object + score float64 + employed bool + kids int64 + dtype: object + >>> df2_transposed.dtypes + 0 object + 1 object + dtype: object + """ + nv.validate_transpose(args, {}) + # construct the args + + dtypes = list(self.dtypes) + + if self._can_fast_transpose: + # Note: tests pass without this, but this improves perf quite a bit. + new_vals = self._values.T + if copy: + new_vals = new_vals.copy() + + result = self._constructor(new_vals, index=self.columns, columns=self.index) + + elif ( + self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) + ): + # We have EAs with the same dtype. We can preserve that dtype in transpose. + dtype = dtypes[0] + arr_type = dtype.construct_array_type() + values = self.values + + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = type(self)._from_arrays( + new_values, index=self.columns, columns=self.index + ) + + else: + new_arr = self.values.T + if copy: + new_arr = new_arr.copy() + result = self._constructor(new_arr, index=self.columns, columns=self.index) + + return result.__finalize__(self, method="transpose") + + @property + def T(self) -> DataFrame: + return self.transpose() + + # ---------------------------------------------------------------------- + # Indexing Methods + + def _ixs(self, i: int, axis: int = 0) -> Series: + """ + Parameters + ---------- + i : int + axis : int + + Returns + ------- + Series + """ + # irow + if axis == 0: + new_mgr = self._mgr.fast_xs(i) + + # if we are a copy, mark as such + copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None + result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__( + self + ) + result._set_is_copy(self, copy=copy) + return result + + # icol + else: + label = self.columns[i] + + col_mgr = self._mgr.iget(i) + result = self._box_col_values(col_mgr, i) + + # this is a cached value, mark it so + result._set_as_cached(label, self) + return result + + def _get_column_array(self, i: int) -> ArrayLike: + """ + Get the values of the i'th column (ndarray or ExtensionArray, as stored + in the Block) + + Warning! The returned array is a view but doesn't handle Copy-on-Write, + so this should be used with caution (for read-only purposes). + """ + return self._mgr.iget_values(i) + + def _iter_column_arrays(self) -> Iterator[ArrayLike]: + """ + Iterate over the arrays of all columns in order. + This returns the values as stored in the Block (ndarray or ExtensionArray). + + Warning! The returned array is a view but doesn't handle Copy-on-Write, + so this should be used with caution (for read-only purposes). + """ + for i in range(len(self.columns)): + yield self._get_column_array(i) + + def __getitem__(self, key): + check_deprecated_indexers(key) + key = lib.item_from_zerodim(key) + key = com.apply_if_callable(key, self) + + if is_hashable(key) and not is_iterator(key): + # is_iterator to exclude generator e.g. test_getitem_listlike + # shortcut if the key is in columns + is_mi = isinstance(self.columns, MultiIndex) + # GH#45316 Return view if key is not duplicated + # Only use drop_duplicates with duplicates for performance + if not is_mi and ( + self.columns.is_unique + and key in self.columns + or key in self.columns.drop_duplicates(keep=False) + ): + return self._get_item_cache(key) + + elif is_mi and self.columns.is_unique and key in self.columns: + return self._getitem_multilevel(key) + # Do we have a slicer (on rows)? + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + if isinstance(indexer, np.ndarray): + indexer = lib.maybe_indices_to_slice( + indexer.astype(np.intp, copy=False), len(self) + ) + if isinstance(indexer, np.ndarray): + # GH#43223 If we can not convert, use take + return self.take(indexer, axis=0) + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._slice(indexer, axis=0) + + # Do we have a (boolean) DataFrame? + if isinstance(key, DataFrame): + return self.where(key) + + # Do we have a (boolean) 1d indexer? + if com.is_bool_indexer(key): + return self._getitem_bool_array(key) + + # We are left with two options: a single key, and a collection of keys, + # We interpret tuples as collections only for non-MultiIndex + is_single_key = isinstance(key, tuple) or not is_list_like(key) + + if is_single_key: + if self.columns.nlevels > 1: + return self._getitem_multilevel(key) + indexer = self.columns.get_loc(key) + if is_integer(indexer): + indexer = [indexer] + else: + if is_iterator(key): + key = list(key) + indexer = self.columns._get_indexer_strict(key, "columns")[1] + + # take() does not accept boolean indexers + if getattr(indexer, "dtype", None) == bool: + indexer = np.where(indexer)[0] + + data = self._take_with_is_copy(indexer, axis=1) + + if is_single_key: + # What does looking for a single key in a non-unique index return? + # The behavior is inconsistent. It returns a Series, except when + # - the key itself is repeated (test on data.shape, #9519), or + # - we have a MultiIndex on columns (test on self.columns, #21309) + if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): + # GH#26490 using data[key] can cause RecursionError + return data._get_item_cache(key) + + return data + + def _getitem_bool_array(self, key): + # also raises Exception if object array with NA values + # warning here just in case -- previously __setitem__ was + # reindexing but __getitem__ was not; it seems more reasonable to + # go with the __setitem__ behavior since that is more consistent + # with all other indexing behavior + if isinstance(key, Series) and not key.index.equals(self.index): + warnings.warn( + "Boolean Series key will be reindexed to match DataFrame index.", + UserWarning, + stacklevel=find_stack_level(), + ) + elif len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}." + ) + + # check_bool_indexer will throw exception if Series key cannot + # be reindexed to match DataFrame rows + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + return self._take_with_is_copy(indexer, axis=0) + + def _getitem_multilevel(self, key): + # self.columns is a MultiIndex + loc = self.columns.get_loc(key) + if isinstance(loc, (slice, np.ndarray)): + new_columns = self.columns[loc] + result_columns = maybe_droplevels(new_columns, key) + if self._is_mixed_type: + result = self.reindex(columns=new_columns) + result.columns = result_columns + else: + new_values = self.values[:, loc] + result = self._constructor( + new_values, index=self.index, columns=result_columns + ) + result = result.__finalize__(self) + + # If there is only one column being returned, and its name is + # either an empty string, or a tuple with an empty string as its + # first element, then treat the empty string as a placeholder + # and return the column as if the user had provided that empty + # string in the key. If the result is a Series, exclude the + # implied empty string from its name. + if len(result.columns) == 1: + top = result.columns[0] + if isinstance(top, tuple): + top = top[0] + if top == "": + result = result[""] + if isinstance(result, Series): + result = self._constructor_sliced( + result, index=self.index, name=key + ) + + result._set_is_copy(self) + return result + else: + # loc is neither a slice nor ndarray, so must be an int + return self._ixs(loc, axis=1) + + def _get_value(self, index, col, takeable: bool = False) -> Scalar: + """ + Quickly retrieve single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + takeable : interpret the index/col as indexers, default False + + Returns + ------- + scalar + + Notes + ----- + Assumes that both `self.index._index_as_unique` and + `self.columns._index_as_unique`; Caller is responsible for checking. + """ + if takeable: + series = self._ixs(col, axis=1) + return series._values[index] + + series = self._get_item_cache(col) + engine = self.index._engine + + if not isinstance(self.index, MultiIndex): + # CategoricalIndex: Trying to use the engine fastpath may give incorrect + # results if our categories are integers that dont match our codes + # IntervalIndex: IntervalTree has no get_loc + row = self.index.get_loc(index) + return series._values[row] + + # For MultiIndex going through engine effectively restricts us to + # same-length tuples; see test_get_set_value_no_partial_indexing + loc = engine.get_loc(index) + return series._values[loc] + + def isetitem(self, loc, value) -> None: + """ + Set the given value in the column with position 'loc'. + + This is a positional analogue to __setitem__. + + Parameters + ---------- + loc : int or sequence of ints + value : scalar or arraylike + + Notes + ----- + Unlike `frame.iloc[:, i] = value`, `frame.isetitem(loc, value)` will + _never_ try to set the values in place, but will always insert a new + array. + + In cases where `frame.columns` is unique, this is equivalent to + `frame[frame.columns[i]] = value`. + """ + arraylike = self._sanitize_column(value) + self._iset_item_mgr(loc, arraylike, inplace=False) + + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + + # see if we can slice the rows + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._setitem_slice(indexer, value) + + if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: + self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) + elif isinstance(value, DataFrame): + self._set_item_frame_value(key, value) + elif ( + is_list_like(value) + and not self.columns.is_unique + and 1 < len(self.columns.get_indexer_for([key])) == len(value) + ): + # Column to set is duplicated + self._setitem_array([key], value) + else: + # set column + self._set_item(key, value) + + def _setitem_slice(self, key: slice, value): + # NB: we can't just use self.loc[key] = value because that + # operates on labels and we need to operate positional for + # backwards-compat, xref GH#31469 + self._check_setitem_copy() + self.iloc[key] = value + + def _setitem_array(self, key, value): + # also raises Exception if object array with NA values + if com.is_bool_indexer(key): + # bool indexer is indexing along rows + if len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}!" + ) + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + self._check_setitem_copy() + if isinstance(value, DataFrame): + # GH#39931 reindex since iloc does not align + value = value.reindex(self.index.take(indexer)) + self.iloc[indexer] = value + + else: + # Note: unlike self.iloc[:, indexer] = value, this will + # never try to overwrite values inplace + + if isinstance(value, DataFrame): + check_key_length(self.columns, key, value) + for k1, k2 in zip(key, value.columns): + self[k1] = value[k2] + + elif not is_list_like(value): + for col in key: + self[col] = value + + elif isinstance(value, np.ndarray) and value.ndim == 2: + self._iset_not_inplace(key, value) + + elif np.ndim(value) > 1: + # list of lists + value = DataFrame(value).values + return self._setitem_array(key, value) + + else: + self._iset_not_inplace(key, value) + + def _iset_not_inplace(self, key, value): + # GH#39510 when setting with df[key] = obj with a list-like key and + # list-like value, we iterate over those listlikes and set columns + # one at a time. This is different from dispatching to + # `self.loc[:, key]= value` because loc.__setitem__ may overwrite + # data inplace, whereas this will insert new arrays. + + def igetitem(obj, i: int): + # Note: we catch DataFrame obj before getting here, but + # hypothetically would return obj.iloc[:, i] + if isinstance(obj, np.ndarray): + return obj[..., i] + else: + return obj[i] + + if self.columns.is_unique: + if np.shape(value)[-1] != len(key): + raise ValueError("Columns must be same length as key") + + for i, col in enumerate(key): + self[col] = igetitem(value, i) + + else: + + ilocs = self.columns.get_indexer_non_unique(key)[0] + if (ilocs < 0).any(): + # key entries not in self.columns + raise NotImplementedError + + if np.shape(value)[-1] != len(ilocs): + raise ValueError("Columns must be same length as key") + + assert np.ndim(value) <= 2 + + orig_columns = self.columns + + # Using self.iloc[:, i] = ... may set values inplace, which + # by convention we do not do in __setitem__ + try: + self.columns = Index(range(len(self.columns))) + for i, iloc in enumerate(ilocs): + self[iloc] = igetitem(value, i) + finally: + self.columns = orig_columns + + def _setitem_frame(self, key, value): + # support boolean setting with DataFrame input, e.g. + # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + key = self._constructor(key, **self._construct_axes_dict()) + + if key.size and not is_bool_dtype(key.values): + raise TypeError( + "Must pass DataFrame or 2-d ndarray with boolean values only" + ) + + self._check_inplace_setting(value) + self._check_setitem_copy() + self._where(-key, value, inplace=True) + + def _set_item_frame_value(self, key, value: DataFrame) -> None: + self._ensure_valid_index(value) + + # align columns + if key in self.columns: + loc = self.columns.get_loc(key) + cols = self.columns[loc] + len_cols = 1 if is_scalar(cols) else len(cols) + if len_cols != len(value.columns): + raise ValueError("Columns must be same length as key") + + # align right-hand-side columns if self.columns + # is multi-index and self[key] is a sub-frame + if isinstance(self.columns, MultiIndex) and isinstance( + loc, (slice, Series, np.ndarray, Index) + ): + cols_droplevel = maybe_droplevels(cols, key) + if len(cols_droplevel) and not cols_droplevel.equals(value.columns): + value = value.reindex(cols_droplevel, axis=1) + + for col, col_droplevel in zip(cols, cols_droplevel): + self[col] = value[col_droplevel] + return + + if is_scalar(cols): + self[cols] = value[value.columns[0]] + return + + # now align rows + arraylike = _reindex_for_setitem(value, self.index) + self._set_item_mgr(key, arraylike) + return + + if len(value.columns) != 1: + raise ValueError( + "Cannot set a DataFrame with multiple columns to the single " + f"column {key}" + ) + + self[key] = value[value.columns[0]] + + def _iset_item_mgr( + self, loc: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: + # when called from _set_item_mgr loc can be anything returned from get_loc + self._mgr.iset(loc, value, inplace=inplace) + self._clear_item_cache() + + def _set_item_mgr(self, key, value: ArrayLike) -> None: + try: + loc = self._info_axis.get_loc(key) + except KeyError: + # This item wasn't present, just insert at end + self._mgr.insert(len(self._info_axis), key, value) + else: + self._iset_item_mgr(loc, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _iset_item(self, loc: int, value) -> None: + arraylike = self._sanitize_column(value) + self._iset_item_mgr(loc, arraylike, inplace=True) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _set_item(self, key, value) -> None: + """ + Add series to DataFrame in specified column. + + If series is a numpy-array (not a Series/TimeSeries), it must be the + same length as the DataFrames index or an error will be thrown. + + Series/TimeSeries will be conformed to the DataFrames index to + ensure homogeneity. + """ + value = self._sanitize_column(value) + + if ( + key in self.columns + and value.ndim == 1 + and not is_extension_array_dtype(value) + ): + # broadcast across multiple columns if necessary + if not self.columns.is_unique or isinstance(self.columns, MultiIndex): + existing_piece = self[key] + if isinstance(existing_piece, DataFrame): + value = np.tile(value, (len(existing_piece.columns), 1)).T + + self._set_item_mgr(key, value) + + def _set_value( + self, index: IndexLabel, col, value: Scalar, takeable: bool = False + ) -> None: + """ + Put single value at passed column and index. + + Parameters + ---------- + index : Label + row label + col : Label + column label + value : scalar + takeable : bool, default False + Sets whether or not index/col interpreted as indexers + """ + try: + if takeable: + icol = col + iindex = cast(int, index) + else: + icol = self.columns.get_loc(col) + iindex = self.index.get_loc(index) + self._mgr.column_setitem(icol, iindex, value, inplace=True) + self._clear_item_cache() + + except (KeyError, TypeError, ValueError, LossySetitemError): + # get_loc might raise a KeyError for missing labels (falling back + # to (i)loc will do expansion of the index) + # column_setitem will do validation that may raise TypeError, + # ValueError, or LossySetitemError + # set using a non-recursive method & reset the cache + if takeable: + self.iloc[index, col] = value + else: + self.loc[index, col] = value + self._item_cache.pop(col, None) + + except InvalidIndexError as ii_err: + # GH48729: Seems like you are trying to assign a value to a + # row when only scalar options are permitted + raise InvalidIndexError( + f"You can only assign a scalar value not a {type(value)}" + ) from ii_err + + def _ensure_valid_index(self, value) -> None: + """ + Ensure that if we don't have an index, that we can create one from the + passed value. + """ + # GH5632, make sure that we are a Series convertible + if not len(self.index) and is_list_like(value) and len(value): + if not isinstance(value, DataFrame): + try: + value = Series(value) + except (ValueError, NotImplementedError, TypeError) as err: + raise ValueError( + "Cannot set a frame with no defined index " + "and a value that cannot be converted to a Series" + ) from err + + # GH31368 preserve name of index + index_copy = value.index.copy() + if self.index.name is not None: + index_copy.name = self.index.name + + self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan) + + def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: + """ + Provide boxed values for a column. + """ + # Lookup in columns so that if e.g. a str datetime was passed + # we attach the Timestamp object as the name. + name = self.columns[loc] + klass = self._constructor_sliced + # We get index=self.index bc values is a SingleDataManager + return klass(values, name=name, fastpath=True).__finalize__(self) + + # ---------------------------------------------------------------------- + # Lookup Caching + + def _clear_item_cache(self) -> None: + self._item_cache.clear() + + def _get_item_cache(self, item: Hashable) -> Series: + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + # All places that call _get_item_cache have unique columns, + # pending resolution of GH#33047 + + loc = self.columns.get_loc(item) + res = self._ixs(loc, axis=1) + + cache[item] = res + + # for a chain + res._is_copy = self._is_copy + return res + + def _reset_cacher(self) -> None: + # no-op for DataFrame + pass + + def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: + """ + The object has called back to us saying maybe it has changed. + """ + loc = self._info_axis.get_loc(item) + arraylike = value._values + + old = self._ixs(loc, axis=1) + if old._values is value._values and inplace: + # GH#46149 avoid making unnecessary copies/block-splitting + return + + self._mgr.iset(loc, arraylike, inplace=inplace) + + # ---------------------------------------------------------------------- + # Unsorted + + @overload + def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFrame: + ... + + @overload + def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: + ... + + @overload + def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "expr"]) + def query(self, expr: str, inplace: bool = False, **kwargs) -> DataFrame | None: + """ + Query the columns of a DataFrame with a boolean expression. + + Parameters + ---------- + expr : str + The query string to evaluate. + + You can refer to variables + in the environment by prefixing them with an '@' character like + ``@a + b``. + + You can refer to column names that are not valid Python variable names + by surrounding them in backticks. Thus, column names containing spaces + or punctuations (besides underscores) or starting with digits must be + surrounded by backticks. (For example, a column named "Area (cm^2)" would + be referenced as ```Area (cm^2)```). Column names which are Python keywords + (like "list", "for", "import", etc) cannot be used. + + For example, if one of your columns is called ``a a`` and you want + to sum it with ``b``, your query should be ```a a` + b``. + + .. versionadded:: 0.25.0 + Backtick quoting introduced. + + .. versionadded:: 1.0.0 + Expanding functionality of backtick quoting for more than only spaces. + + inplace : bool + Whether to modify the DataFrame rather than creating a new one. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by :meth:`DataFrame.query`. + + Returns + ------- + DataFrame or None + DataFrame resulting from the provided query expression or + None if ``inplace=True``. + + See Also + -------- + eval : Evaluate a string describing operations on + DataFrame columns. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. + + Notes + ----- + The result of the evaluation of this expression is first passed to + :attr:`DataFrame.loc` and if that fails because of a + multidimensional key (e.g., a DataFrame) then the result will be passed + to :meth:`DataFrame.__getitem__`. + + This method uses the top-level :func:`eval` function to + evaluate the passed query. + + The :meth:`~pandas.DataFrame.query` method uses a slightly + modified Python syntax by default. For example, the ``&`` and ``|`` + (bitwise) operators have the precedence of their boolean cousins, + :keyword:`and` and :keyword:`or`. This *is* syntactically valid Python, + however the semantics are different. + + You can change the semantics of the expression by passing the keyword + argument ``parser='python'``. This enforces the same semantics as + evaluation in Python space. Likewise, you can pass ``engine='python'`` + to evaluate an expression using Python itself as a backend. This is not + recommended as it is inefficient compared to using ``numexpr`` as the + engine. + + The :attr:`DataFrame.index` and + :attr:`DataFrame.columns` attributes of the + :class:`~pandas.DataFrame` instance are placed in the query namespace + by default, which allows you to treat both the index and columns of the + frame as a column in the frame. + The identifier ``index`` is used for the frame index; you can also + use the name of the index to identify it in a query. Please note that + Python keywords may not be used as identifiers. + + For further details and examples see the ``query`` documentation in + :ref:`indexing `. + + *Backtick quoted variables* + + Backtick quoted variables are parsed as literal Python code and + are converted internally to a Python valid identifier. + This can lead to the following problems. + + During parsing a number of disallowed characters inside the backtick + quoted string are replaced by strings that are allowed as a Python identifier. + These characters include all operators in Python, the space character, the + question mark, the exclamation mark, the dollar sign, and the euro sign. + For other characters that fall outside the ASCII range (U+0001..U+007F) + and those that are not further specified in PEP 3131, + the query parser will raise an error. + This excludes whitespace different than the space character, + but also the hashtag (as it is used for comments) and the backtick + itself (backtick can also not be escaped). + + In a special case, quotes that make a pair around a backtick can + confuse the parser. + For example, ```it's` > `that's``` will raise an error, + as it forms a quoted string (``'s > `that'``) with a backtick inside. + + See also the Python documentation about lexical analysis + (https://docs.python.org/3/reference/lexical_analysis.html) + in combination with the source code in :mod:`pandas.core.computation.parsing`. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), + ... 'B': range(10, 0, -2), + ... 'C C': range(10, 5, -1)}) + >>> df + A B C C + 0 1 10 10 + 1 2 8 9 + 2 3 6 8 + 3 4 4 7 + 4 5 2 6 + >>> df.query('A > B') + A B C C + 4 5 2 6 + + The previous expression is equivalent to + + >>> df[df.A > df.B] + A B C C + 4 5 2 6 + + For columns with spaces in their name, you can use backtick quoting. + + >>> df.query('B == `C C`') + A B C C + 0 1 10 10 + + The previous expression is equivalent to + + >>> df[df.B == df['C C']] + A B C C + 0 1 10 10 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not isinstance(expr, str): + msg = f"expr must be a string to be evaluated, {type(expr)} given" + raise ValueError(msg) + kwargs["level"] = kwargs.pop("level", 0) + 2 + kwargs["target"] = None + res = self.eval(expr, **kwargs) + + try: + result = self.loc[res] + except ValueError: + # when res is multi-dimensional loc raises, but this is sometimes a + # valid query + result = self[res] + + if inplace: + self._update_inplace(result) + return None + else: + return result + + @overload + def eval(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> Any: + ... + + @overload + def eval(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "expr"]) + def eval(self, expr: str, inplace: bool = False, **kwargs) -> Any | None: + """ + Evaluate a string describing operations on DataFrame columns. + + Operates on columns only, not specific rows or elements. This allows + `eval` to run arbitrary code, which can make you vulnerable to code + injection if you pass user input to this function. + + Parameters + ---------- + expr : str + The expression string to evaluate. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by + :meth:`~pandas.DataFrame.query`. + + Returns + ------- + ndarray, scalar, pandas object, or None + The result of the evaluation or None if ``inplace=True``. + + See Also + -------- + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.assign : Can evaluate an expression or function to create new + values for a column. + eval : Evaluate a Python expression as a string using various + backends. + + Notes + ----- + For more details see the API documentation for :func:`~eval`. + For detailed examples see :ref:`enhancing performance with eval + `. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.eval('A + B') + 0 11 + 1 10 + 2 9 + 3 8 + 4 7 + dtype: int64 + + Assignment is allowed though by default the original DataFrame is not + modified. + + >>> df.eval('C = A + B') + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + + Use ``inplace=True`` to modify the original DataFrame. + + >>> df.eval('C = A + B', inplace=True) + >>> df + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + + Multiple columns can be assigned to using multi-line expressions: + + >>> df.eval( + ... ''' + ... C = A + B + ... D = A - B + ... ''' + ... ) + A B C D + 0 1 10 11 -9 + 1 2 8 10 -6 + 2 3 6 9 -3 + 3 4 4 8 0 + 4 5 2 7 3 + """ + from pandas.core.computation.eval import eval as _eval + + inplace = validate_bool_kwarg(inplace, "inplace") + kwargs["level"] = kwargs.pop("level", 0) + 2 + index_resolvers = self._get_index_resolvers() + column_resolvers = self._get_cleaned_column_resolvers() + resolvers = column_resolvers, index_resolvers + if "target" not in kwargs: + kwargs["target"] = self + kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + resolvers + + return _eval(expr, inplace=inplace, **kwargs) + + def select_dtypes(self, include=None, exclude=None) -> DataFrame: + """ + Return a subset of the DataFrame's columns based on the column dtypes. + + Parameters + ---------- + include, exclude : scalar or list-like + A selection of dtypes or strings to be included/excluded. At least + one of these parameters must be supplied. + + Returns + ------- + DataFrame + The subset of the frame including the dtypes in ``include`` and + excluding the dtypes in ``exclude``. + + Raises + ------ + ValueError + * If both of ``include`` and ``exclude`` are empty + * If ``include`` and ``exclude`` have overlapping elements + * If any kind of string dtype is passed in. + + See Also + -------- + DataFrame.dtypes: Return Series with the data type of each column. + + Notes + ----- + * To select all *numeric* types, use ``np.number`` or ``'number'`` + * To select strings you must use the ``object`` dtype, but note that + this will return *all* object dtype columns + * See the `numpy dtype hierarchy + `__ + * To select datetimes, use ``np.datetime64``, ``'datetime'`` or + ``'datetime64'`` + * To select timedeltas, use ``np.timedelta64``, ``'timedelta'`` or + ``'timedelta64'`` + * To select Pandas categorical dtypes, use ``'category'`` + * To select Pandas datetimetz dtypes, use ``'datetimetz'`` (new in + 0.20.0) or ``'datetime64[ns, tz]'`` + + Examples + -------- + >>> df = pd.DataFrame({'a': [1, 2] * 3, + ... 'b': [True, False] * 3, + ... 'c': [1.0, 2.0] * 3}) + >>> df + a b c + 0 1 True 1.0 + 1 2 False 2.0 + 2 1 True 1.0 + 3 2 False 2.0 + 4 1 True 1.0 + 5 2 False 2.0 + + >>> df.select_dtypes(include='bool') + b + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + + >>> df.select_dtypes(include=['float64']) + c + 0 1.0 + 1 2.0 + 2 1.0 + 3 2.0 + 4 1.0 + 5 2.0 + + >>> df.select_dtypes(exclude=['int64']) + b c + 0 True 1.0 + 1 False 2.0 + 2 True 1.0 + 3 False 2.0 + 4 True 1.0 + 5 False 2.0 + """ + if not is_list_like(include): + include = (include,) if include is not None else () + if not is_list_like(exclude): + exclude = (exclude,) if exclude is not None else () + + selection = (frozenset(include), frozenset(exclude)) + + if not any(selection): + raise ValueError("at least one of include or exclude must be nonempty") + + # convert the myriad valid dtypes object to a single representation + def check_int_infer_dtype(dtypes): + converted_dtypes: list[type] = [] + for dtype in dtypes: + # Numpy maps int to different types (int32, in64) on Windows and Linux + # see https://github.com/numpy/numpy/issues/9464 + if (isinstance(dtype, str) and dtype == "int") or (dtype is int): + converted_dtypes.append(np.int32) + converted_dtypes.append(np.int64) + elif dtype == "float" or dtype is float: + # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 + converted_dtypes.extend([np.float64, np.float32]) + else: + converted_dtypes.append(infer_dtype_from_object(dtype)) + return frozenset(converted_dtypes) + + include = check_int_infer_dtype(include) + exclude = check_int_infer_dtype(exclude) + + for dtypes in (include, exclude): + invalidate_string_dtypes(dtypes) + + # can't both include AND exclude! + if not include.isdisjoint(exclude): + raise ValueError(f"include and exclude overlap on {(include & exclude)}") + + def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool: + # GH 46870: BooleanDtype._is_numeric == True but should be excluded + return issubclass(dtype.type, tuple(dtypes_set)) or ( + np.number in dtypes_set + and getattr(dtype, "_is_numeric", False) + and not is_bool_dtype(dtype) + ) + + def predicate(arr: ArrayLike) -> bool: + dtype = arr.dtype + if include: + if not dtype_predicate(dtype, include): + return False + + if exclude: + if dtype_predicate(dtype, exclude): + return False + + return True + + mgr = self._mgr._get_data_subset(predicate).copy(deep=None) + return type(self)(mgr).__finalize__(self) + + def insert( + self, + loc: int, + column: Hashable, + value: Scalar | AnyArrayLike, + allow_duplicates: bool | lib.NoDefault = lib.no_default, + ) -> None: + """ + Insert column into DataFrame at specified location. + + Raises a ValueError if `column` is already contained in the DataFrame, + unless `allow_duplicates` is set to True. + + Parameters + ---------- + loc : int + Insertion index. Must verify 0 <= loc <= len(columns). + column : str, number, or hashable object + Label of the inserted column. + value : Scalar, Series, or array-like + allow_duplicates : bool, optional, default lib.no_default + + See Also + -------- + Index.insert : Insert new item by index. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + >>> df.insert(1, "newcol", [99, 99]) + >>> df + col1 newcol col2 + 0 1 99 3 + 1 2 99 4 + >>> df.insert(0, "col1", [100, 100], allow_duplicates=True) + >>> df + col1 col1 newcol col2 + 0 100 1 99 3 + 1 100 2 99 4 + + Notice that pandas uses index alignment in case of `value` from type `Series`: + + >>> df.insert(0, "col0", pd.Series([5, 6], index=[1, 2])) + >>> df + col0 col1 col1 newcol col2 + 0 NaN 100 1 99 3 + 1 5.0 100 2 99 4 + """ + if allow_duplicates is lib.no_default: + allow_duplicates = False + if allow_duplicates and not self.flags.allows_duplicate_labels: + raise ValueError( + "Cannot specify 'allow_duplicates=True' when " + "'self.flags.allows_duplicate_labels' is False." + ) + if not allow_duplicates and column in self.columns: + # Should this be a different kind of error?? + raise ValueError(f"cannot insert {column}, already exists") + if not isinstance(loc, int): + raise TypeError("loc must be int") + + value = self._sanitize_column(value) + self._mgr.insert(loc, column, value) + + def assign(self, **kwargs) -> DataFrame: + r""" + Assign new columns to a DataFrame. + + Returns a new object with all original columns in addition to new ones. + Existing columns that are re-assigned will be overwritten. + + Parameters + ---------- + **kwargs : dict of {str: callable or Series} + The column names are keywords. If the values are + callable, they are computed on the DataFrame and + assigned to the new columns. The callable must not + change input DataFrame (though pandas doesn't check it). + If the values are not callable, (e.g. a Series, scalar, or array), + they are simply assigned. + + Returns + ------- + DataFrame + A new DataFrame with the new columns in addition to + all the existing columns. + + Notes + ----- + Assigning multiple columns within the same ``assign`` is possible. + Later items in '\*\*kwargs' may refer to newly created or modified + columns in 'df'; items are computed and assigned into 'df' in order. + + Examples + -------- + >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, + ... index=['Portland', 'Berkeley']) + >>> df + temp_c + Portland 17.0 + Berkeley 25.0 + + Where the value is a callable, evaluated on `df`: + + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + Alternatively, the same behavior can be achieved by directly + referencing an existing Series or sequence: + + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + You can create multiple columns within the same assign where one + of the columns depends on another one defined within the same assign: + + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, + ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + temp_c temp_f temp_k + Portland 17.0 62.6 290.15 + Berkeley 25.0 77.0 298.15 + """ + data = self.copy() + + for k, v in kwargs.items(): + data[k] = com.apply_if_callable(v, data) + return data + + def _sanitize_column(self, value) -> ArrayLike: + """ + Ensures new columns (which go into the BlockManager as new blocks) are + always copied and converted into an array. + + Parameters + ---------- + value : scalar, Series, or array-like + + Returns + ------- + numpy.ndarray or ExtensionArray + """ + self._ensure_valid_index(value) + + # We can get there through isetitem with a DataFrame + # or through loc single_block_path + if isinstance(value, DataFrame): + return _reindex_for_setitem(value, self.index) + elif is_dict_like(value): + return _reindex_for_setitem(Series(value), self.index) + + if is_list_like(value): + com.require_length_match(value, self.index) + return sanitize_array(value, self.index, copy=True, allow_2d=True) + + @property + def _series(self): + return { + item: Series( + self._mgr.iget(idx), index=self.index, name=item, fastpath=True + ) + for idx, item in enumerate(self.columns) + } + + def lookup( + self, row_labels: Sequence[IndexLabel], col_labels: Sequence[IndexLabel] + ) -> np.ndarray: + """ + Label-based "fancy indexing" function for DataFrame. + + .. deprecated:: 1.2.0 + DataFrame.lookup is deprecated, + use pandas.factorize and NumPy indexing instead. + For further details see + :ref:`Looking up values by index/column labels `. + + Given equal-length arrays of row and column labels, return an + array of the values corresponding to each (row, col) pair. + + Parameters + ---------- + row_labels : sequence + The row labels to use for lookup. + col_labels : sequence + The column labels to use for lookup. + + Returns + ------- + numpy.ndarray + The found values. + """ + msg = ( + "The 'lookup' method is deprecated and will be " + "removed in a future version. " + "You can use DataFrame.melt and DataFrame.loc " + "as a substitute." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + n = len(row_labels) + if n != len(col_labels): + raise ValueError("Row labels must have same size as column labels") + if not (self.index.is_unique and self.columns.is_unique): + # GH#33041 + raise ValueError("DataFrame.lookup requires unique index and columns") + + thresh = 1000 + if not self._is_mixed_type or n > thresh: + values = self.values + ridx = self.index.get_indexer(row_labels) + cidx = self.columns.get_indexer(col_labels) + if (ridx == -1).any(): + raise KeyError("One or more row labels was not found") + if (cidx == -1).any(): + raise KeyError("One or more column labels was not found") + flat_index = ridx * len(self.columns) + cidx + result = values.flat[flat_index] + else: + result = np.empty(n, dtype="O") + for i, (r, c) in enumerate(zip(row_labels, col_labels)): + result[i] = self._get_value(r, c) + + if is_object_dtype(result): + result = lib.maybe_convert_objects(result) + + return result + + # ---------------------------------------------------------------------- + # Reindexing and alignment + + def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy): + frame = self + + columns = axes["columns"] + if columns is not None: + frame = frame._reindex_columns( + columns, method, copy, level, fill_value, limit, tolerance + ) + + index = axes["index"] + if index is not None: + frame = frame._reindex_index( + index, method, copy, level, fill_value, limit, tolerance + ) + + return frame + + def _reindex_index( + self, + new_index, + method, + copy: bool, + level: Level, + fill_value=np.nan, + limit=None, + tolerance=None, + ): + new_index, indexer = self.index.reindex( + new_index, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {0: [new_index, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_columns( + self, + new_columns, + method, + copy: bool, + level: Level, + fill_value=None, + limit=None, + tolerance=None, + ): + new_columns, indexer = self.columns.reindex( + new_columns, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {1: [new_columns, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_multi( + self, axes: dict[str, Index], copy: bool, fill_value + ) -> DataFrame: + """ + We are guaranteed non-Nones in the axes. + """ + + new_index, row_indexer = self.index.reindex(axes["index"]) + new_columns, col_indexer = self.columns.reindex(axes["columns"]) + + if row_indexer is not None and col_indexer is not None: + # Fastpath. By doing two 'take's at once we avoid making an + # unnecessary copy. + # We only get here with `not self._is_mixed_type`, which (almost) + # ensures that self.values is cheap. It may be worth making this + # condition more specific. + indexer = row_indexer, col_indexer + new_values = take_2d_multi(self.values, indexer, fill_value=fill_value) + return self._constructor(new_values, index=new_index, columns=new_columns) + else: + return self._reindex_with_indexers( + {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, + copy=copy, + fill_value=fill_value, + ) + + @doc(NDFrame.align, **_shared_doc_kwargs) + def align( + self, + other: DataFrame, + join: Literal["outer", "inner", "left", "right"] = "outer", + axis: Axis | None = None, + level: Level = None, + copy: bool = True, + fill_value=None, + method: FillnaOptions | None = None, + limit: int | None = None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> DataFrame: + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] | lib.NoDefault = ..., + copy: bool | lib.NoDefault = ..., + ) -> DataFrame: + ... + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: bool | lib.NoDefault = ..., + copy: bool | lib.NoDefault = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "set_axis" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @Appender( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + Change the row labels. + + >>> df.set_axis(['a', 'b', 'c'], axis='index') + A B + a 1 4 + b 2 5 + c 3 6 + + Change the column labels. + + >>> df.set_axis(['I', 'II'], axis='columns') + I II + 0 1 4 + 1 2 5 + 2 3 6 + + Now, update the labels without copying the underlying data. + + >>> df.set_axis(['i', 'ii'], axis='columns', copy=False) + i ii + 0 1 4 + 1 2 5 + 2 3 6 + """ + ) + @Substitution( + **_shared_doc_kwargs, + extended_summary_sub=" column or", + axis_description_sub=", and 1 identifies the columns", + see_also_sub=" or columns", + ) + @Appender(NDFrame.set_axis.__doc__) + def set_axis( + self, + labels, + axis: Axis = 0, + inplace: bool | lib.NoDefault = lib.no_default, + *, + copy: bool | lib.NoDefault = lib.no_default, + ): + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) + @rewrite_axis_style_signature( + "labels", + [ + ("method", None), + ("copy", None), + ("level", None), + ("fill_value", np.nan), + ("limit", None), + ("tolerance", None), + ], + ) + def reindex(self, *args, **kwargs) -> DataFrame: + axes = validate_axis_style_args(self, args, kwargs, "labels", "reindex") + kwargs.update(axes) + # Pop these, since the values are in `kwargs` under different names + kwargs.pop("axis", None) + kwargs.pop("labels", None) + return super().reindex(**kwargs) + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level = ..., + inplace: Literal[True], + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level = ..., + inplace: Literal[False] = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame: + ... + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level = ..., + inplace: bool = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "drop" incompatible with supertype "NDFrame" + # github.com/python/mypy/issues/12387 + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( # type: ignore[override] + self, + labels: IndexLabel = None, + axis: Axis = 0, + index: IndexLabel = None, + columns: IndexLabel = None, + level: Level = None, + inplace: bool = False, + errors: IgnoreRaise = "raise", + ) -> DataFrame | None: + """ + Drop specified labels from rows or columns. + + Remove rows or columns by specifying label names and corresponding + axis, or by specifying directly index or column names. When using a + multi-index, labels on different levels can be removed by specifying + the level. See the `user guide ` + for more information about the now unused levels. + + Parameters + ---------- + labels : single label or list-like + Index or column labels to drop. A tuple will be used as a single + label and not treated as a list-like. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Whether to drop labels from the index (0 or 'index') or + columns (1 or 'columns'). + index : single label or list-like + Alternative to specifying axis (``labels, axis=0`` + is equivalent to ``index=labels``). + columns : single label or list-like + Alternative to specifying axis (``labels, axis=1`` + is equivalent to ``columns=labels``). + level : int or level name, optional + For MultiIndex, level from which the labels will be removed. + inplace : bool, default False + If False, return a copy. Otherwise, do operation + inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are + dropped. + + Returns + ------- + DataFrame or None + DataFrame without the removed index or column labels or + None if ``inplace=True``. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis. + + See Also + -------- + DataFrame.loc : Label-location based indexer for selection by label. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. + DataFrame.drop_duplicates : Return DataFrame with duplicate rows + removed, optionally only considering certain columns. + Series.drop : Return Series with specified index labels removed. + + Examples + -------- + >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), + ... columns=['A', 'B', 'C', 'D']) + >>> df + A B C D + 0 0 1 2 3 + 1 4 5 6 7 + 2 8 9 10 11 + + Drop columns + + >>> df.drop(['B', 'C'], axis=1) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + >>> df.drop(columns=['B', 'C']) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + Drop a row by index + + >>> df.drop([0, 1]) + A B C D + 2 8 9 10 11 + + Drop columns and/or rows of MultiIndex DataFrame + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], + ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], + ... [250, 150], [1.5, 0.8], [320, 250], + ... [1, 0.8], [0.3, 0.2]]) + >>> df + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + weight 1.0 0.8 + length 0.3 0.2 + + Drop a specific index combination from the MultiIndex + DataFrame, i.e., drop the combination ``'falcon'`` and + ``'weight'``, which deletes only the corresponding row + + >>> df.drop(index=('falcon', 'weight')) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + length 0.3 0.2 + + >>> df.drop(index='cow', columns='small') + big + lama speed 45.0 + weight 200.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + + >>> df.drop(index='length', level=1) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + falcon speed 320.0 250.0 + weight 1.0 0.8 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + @overload + def rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: Literal[True], + level: Level = ..., + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: Literal[False] = ..., + level: Level = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame: + ... + + @overload + def rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: bool = ..., + level: Level = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame | None: + ... + + def rename( + self, + mapper: Renamer | None = None, + *, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, + copy: bool | None = None, + inplace: bool = False, + level: Level = None, + errors: IgnoreRaise = "ignore", + ) -> DataFrame | None: + """ + Alter axes labels. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + mapper : dict-like or function + Dict-like or function transformations to apply to + that axis' values. Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` and + ``columns``. + index : dict-like or function + Alternative to specifying axis (``mapper, axis=0`` + is equivalent to ``index=mapper``). + columns : dict-like or function + Alternative to specifying axis (``mapper, axis=1`` + is equivalent to ``columns=mapper``). + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis to target with ``mapper``. Can be either the axis name + ('index', 'columns') or number (0, 1). The default is 'index'. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + If True then value of copy is ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. + + Returns + ------- + DataFrame or None + DataFrame with the renamed axis labels or None if ``inplace=True``. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis and + "errors='raise'". + + See Also + -------- + DataFrame.rename_axis : Set the name of the axis. + + Examples + -------- + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Rename columns using a mapping: + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + Rename index using a mapping: + + >>> df.rename(index={0: "x", 1: "y", 2: "z"}) + A B + x 1 4 + y 2 5 + z 3 6 + + Cast index labels to a different type: + + >>> df.index + RangeIndex(start=0, stop=3, step=1) + >>> df.rename(index=str).index + Index(['0', '1', '2'], dtype='object') + + >>> df.rename(columns={"A": "a", "B": "b", "C": "c"}, errors="raise") + Traceback (most recent call last): + KeyError: ['C'] not found in axis + + Using axis-style parameters: + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + """ + return super()._rename( + mapper=mapper, + index=index, + columns=columns, + axis=axis, + copy=copy, + inplace=inplace, + level=level, + errors=errors, + ) + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> DataFrame: + ... + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[True], + limit: int | None = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "fillna" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + @doc(NDFrame.fillna, **_shared_doc_kwargs) + def fillna( # type: ignore[override] + self, + value: Hashable | Mapping | Series | DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool = False, + limit: int | None = None, + downcast: dict | None = None, + ) -> DataFrame | None: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + def pop(self, item: Hashable) -> Series: + """ + Return item and drop from frame. Raise KeyError if not found. + + Parameters + ---------- + item : label + Label of column to be popped. + + Returns + ------- + Series + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=('name', 'class', 'max_speed')) + >>> df + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + >>> df.pop('class') + 0 bird + 1 bird + 2 mammal + 3 mammal + Name: class, dtype: object + + >>> df + name max_speed + 0 falcon 389.0 + 1 parrot 24.0 + 2 lion 80.5 + 3 monkey NaN + """ + return super().pop(item=item) + + # error: Signature of "replace" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def replace( + self, + to_replace=..., + value=..., + *, + inplace: Literal[False] = ..., + limit: int | None = ..., + regex: bool = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> DataFrame: + ... + + @overload + def replace( + self, + to_replace=..., + value=..., + *, + inplace: Literal[True], + limit: int | None = ..., + regex: bool = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> None: + ... + + # error: Signature of "replace" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "to_replace", "value"] + ) + @doc(NDFrame.replace, **_shared_doc_kwargs) + def replace( # type: ignore[override] + self, + to_replace=None, + value=lib.no_default, + inplace: bool = False, + limit: int | None = None, + regex: bool = False, + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, + ) -> DataFrame | None: + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + def _replace_columnwise( + self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex + ): + """ + Dispatch to Series.replace column-wise. + + Parameters + ---------- + mapping : dict + of the form {col: (target, value)} + inplace : bool + regex : bool or same types as `to_replace` in DataFrame.replace + + Returns + ------- + DataFrame or None + """ + # Operate column-wise + res = self if inplace else self.copy() + ax = self.columns + + for i in range(len(ax)): + if ax[i] in mapping: + ser = self.iloc[:, i] + + target, value = mapping[ax[i]] + newobj = ser.replace(target, value, regex=regex) + + res._iset_item(i, newobj) + + if inplace: + return + return res.__finalize__(self) + + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) + def shift( + self, + periods: int = 1, + freq: Frequency | None = None, + axis: Axis = 0, + fill_value: Hashable = lib.no_default, + ) -> DataFrame: + axis = self._get_axis_number(axis) + + ncols = len(self.columns) + if ( + axis == 1 + and periods != 0 + and freq is None + and fill_value is lib.no_default + and ncols > 0 + ): + # We will infer fill_value to match the closest column + + # Use a column that we know is valid for our column's dtype GH#38434 + label = self.columns[0] + + if periods > 0: + result = self.iloc[:, :-periods] + for col in range(min(ncols, abs(periods))): + # TODO(EA2D): doing this in a loop unnecessary with 2D EAs + # Define filler inside loop so we get a copy + filler = self.iloc[:, 0].shift(len(self)) + result.insert(0, label, filler, allow_duplicates=True) + else: + result = self.iloc[:, -periods:] + for col in range(min(ncols, abs(periods))): + # Define filler inside loop so we get a copy + filler = self.iloc[:, -1].shift(len(self)) + result.insert( + len(result.columns), label, filler, allow_duplicates=True + ) + + result.columns = self.columns.copy() + return result + elif ( + axis == 1 + and periods != 0 + and fill_value is not lib.no_default + and ncols > 0 + ): + arrays = self._mgr.arrays + if len(arrays) > 1 or ( + # If we only have one block and we know that we can't + # keep the same dtype (i.e. the _can_hold_element check) + # then we can go through the reindex_indexer path + # (and avoid casting logic in the Block method). + # The exception to this (until 2.0) is datetimelike + # dtypes with integers, which cast. + not can_hold_element(arrays[0], fill_value) + # TODO(2.0): remove special case for integer-with-datetimelike + # once deprecation is enforced + and not ( + lib.is_integer(fill_value) and needs_i8_conversion(arrays[0].dtype) + ) + ): + # GH#35488 we need to watch out for multi-block cases + # We only get here with fill_value not-lib.no_default + nper = abs(periods) + nper = min(nper, ncols) + if periods > 0: + indexer = np.array( + [-1] * nper + list(range(ncols - periods)), dtype=np.intp + ) + else: + indexer = np.array( + list(range(nper, ncols)) + [-1] * nper, dtype=np.intp + ) + mgr = self._mgr.reindex_indexer( + self.columns, + indexer, + axis=0, + fill_value=fill_value, + allow_dups=True, + ) + res_df = self._constructor(mgr) + return res_df.__finalize__(self, method="shift") + + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + @overload + def set_index( + self, + keys, + *, + drop: bool = ..., + append: bool = ..., + inplace: Literal[False] = ..., + verify_integrity: bool = ..., + ) -> DataFrame: + ... + + @overload + def set_index( + self, + keys, + *, + drop: bool = ..., + append: bool = ..., + inplace: Literal[True], + verify_integrity: bool = ..., + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "keys"]) + def set_index( + self, + keys, + drop: bool = True, + append: bool = False, + inplace: bool = False, + verify_integrity: bool = False, + ) -> DataFrame | None: + """ + Set the DataFrame index using existing columns. + + Set the DataFrame index (row labels) using one or more existing + columns or arrays (of the correct length). The index can replace the + existing index or expand on it. + + Parameters + ---------- + keys : label or array-like or list of labels/arrays + This parameter can be either a single column key, a single array of + the same length as the calling DataFrame, or a list containing an + arbitrary combination of column keys and arrays. Here, "array" + encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and + instances of :class:`~collections.abc.Iterator`. + drop : bool, default True + Delete columns to be used as the new index. + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + verify_integrity : bool, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this + method. + + Returns + ------- + DataFrame or None + Changed row labels or None if ``inplace=True``. + + See Also + -------- + DataFrame.reset_index : Opposite of set_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using an Index and a column: + + >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 + + Create a MultiIndex using two Series: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> df.set_index([s, s**2]) + month year sale + 1 1 1 2012 55 + 2 4 4 2014 40 + 3 9 7 2013 84 + 4 16 10 2014 31 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + self._check_inplace_and_allows_duplicate_labels(inplace) + if not isinstance(keys, list): + keys = [keys] + + err_msg = ( + 'The parameter "keys" may be a column key, one-dimensional ' + "array, or a list containing only valid column keys and " + "one-dimensional arrays." + ) + + missing: list[Hashable] = [] + for col in keys: + if isinstance(col, (Index, Series, np.ndarray, list, abc.Iterator)): + # arrays are fine as long as they are one-dimensional + # iterators get converted to list below + if getattr(col, "ndim", 1) != 1: + raise ValueError(err_msg) + else: + # everything else gets tried as a key; see GH 24969 + try: + found = col in self.columns + except TypeError as err: + raise TypeError( + f"{err_msg}. Received column of type {type(col)}" + ) from err + else: + if not found: + missing.append(col) + + if missing: + raise KeyError(f"None of {missing} are in the columns") + + if inplace: + frame = self + else: + frame = self.copy() + + arrays = [] + names: list[Hashable] = [] + if append: + names = list(self.index.names) + if isinstance(self.index, MultiIndex): + for i in range(self.index.nlevels): + arrays.append(self.index._get_level_values(i)) + else: + arrays.append(self.index) + + to_remove: list[Hashable] = [] + for col in keys: + if isinstance(col, MultiIndex): + for n in range(col.nlevels): + arrays.append(col._get_level_values(n)) + names.extend(col.names) + elif isinstance(col, (Index, Series)): + # if Index then not MultiIndex (treated above) + + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Series]"; expected "Index" + arrays.append(col) # type:ignore[arg-type] + names.append(col.name) + elif isinstance(col, (list, np.ndarray)): + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[List[Any], ndarray]"; expected "Index" + arrays.append(col) # type: ignore[arg-type] + names.append(None) + elif isinstance(col, abc.Iterator): + # error: Argument 1 to "append" of "list" has incompatible type + # "List[Any]"; expected "Index" + arrays.append(list(col)) # type: ignore[arg-type] + names.append(None) + # from here, col can only be a column label + else: + arrays.append(frame[col]._values) + names.append(col) + if drop: + to_remove.append(col) + + if len(arrays[-1]) != len(self): + # check newest element against length of calling frame, since + # ensure_index_from_sequences would not raise for append=False. + raise ValueError( + f"Length mismatch: Expected {len(self)} rows, " + f"received array of length {len(arrays[-1])}" + ) + + index = ensure_index_from_sequences(arrays, names) + + if verify_integrity and not index.is_unique: + duplicates = index[index.duplicated()].unique() + raise ValueError(f"Index has duplicate keys: {duplicates}") + + # use set to handle duplicate column names gracefully in case of drop + for c in set(to_remove): + del frame[c] + + # clear up memory usage + index._cleanup() + + frame.index = index + + if not inplace: + return frame + return None + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: bool = ..., + inplace: Literal[False] = ..., + col_level: Hashable = ..., + col_fill: Hashable = ..., + allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, + ) -> DataFrame: + ... + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: bool = ..., + inplace: Literal[True], + col_level: Hashable = ..., + col_fill: Hashable = ..., + allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, + ) -> None: + ... + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: bool = ..., + inplace: bool = ..., + col_level: Hashable = ..., + col_fill: Hashable = ..., + allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, + ) -> DataFrame | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + def reset_index( + self, + level: IndexLabel = None, + drop: bool = False, + inplace: bool = False, + col_level: Hashable = 0, + col_fill: Hashable = "", + allow_duplicates: bool | lib.NoDefault = lib.no_default, + names: Hashable | Sequence[Hashable] = None, + ) -> DataFrame | None: + """ + Reset the index, or a level of it. + + Reset the index of the DataFrame, and use the default one instead. + If the DataFrame has a MultiIndex, this method can remove one or more + levels. + + Parameters + ---------- + level : int, str, tuple, or list, default None + Only remove the given levels from the index. Removes all levels by + default. + drop : bool, default False + Do not try to insert index into dataframe columns. This resets + the index to the default integer index. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + col_level : int or str, default 0 + If the columns have multiple levels, determines which level the + labels are inserted into. By default it is inserted into the first + level. + col_fill : object, default '' + If the columns have multiple levels, determines how the other + levels are named. If None then the index name is repeated. + allow_duplicates : bool, optional, default lib.no_default + Allow duplicate column labels to be created. + + .. versionadded:: 1.5.0 + + names : int, str or 1-dimensional list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. + + .. versionadded:: 1.5.0 + + Returns + ------- + DataFrame or None + DataFrame with the new index or None if ``inplace=True``. + + See Also + -------- + DataFrame.set_index : Opposite of reset_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal NaN + + When we reset the index, the old index is added as a column, and a + new sequential index is used: + + >>> df.reset_index() + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + We can use the `drop` parameter to avoid the old index being added as + a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal NaN + + You can also use `reset_index` with `MultiIndex`. + + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), + ... ('species', 'type')]) + >>> df = pd.DataFrame([(389.0, 'fly'), + ... ( 24.0, 'fly'), + ... ( 80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed species + max type + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey NaN jump + + Using the `names` parameter, choose a name for the index column: + + >>> df.reset_index(names=['classes', 'names']) + classes names speed species + max type + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey NaN jump + + If the index has multiple levels, we can reset a subset of them: + + >>> df.reset_index(level='class') + class speed species + max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we are not dropping the index, by default, it is placed in the top + level. We can place it in another level: + + >>> df.reset_index(level='class', col_level=1) + speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + When the index is inserted under another level, we can specify under + which one with the parameter `col_fill`: + + >>> df.reset_index(level='class', col_level=1, col_fill='species') + species speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we specify a nonexistent level for `col_fill`, it is created: + + >>> df.reset_index(level='class', col_level=1, col_fill='genus') + genus speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + """ + inplace = validate_bool_kwarg(inplace, "inplace") + self._check_inplace_and_allows_duplicate_labels(inplace) + if inplace: + new_obj = self + else: + new_obj = self.copy(deep=None) + if allow_duplicates is not lib.no_default: + allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates") + + new_index = default_index(len(new_obj)) + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) + + if not drop: + to_insert: Iterable[tuple[Any, Any | None]] + + default = "index" if "index" not in self else "level_0" + names = self.index._get_default_index_names(names, default) + + if isinstance(self.index, MultiIndex): + to_insert = zip(self.index.levels, self.index.codes) + else: + to_insert = ((self.index, None),) + + multi_col = isinstance(self.columns, MultiIndex) + for i, (lev, lab) in reversed(list(enumerate(to_insert))): + if level is not None and i not in level: + continue + name = names[i] + if multi_col: + col_name = list(name) if isinstance(name, tuple) else [name] + if col_fill is None: + if len(col_name) not in (1, self.columns.nlevels): + raise ValueError( + "col_fill=None is incompatible " + f"with incomplete column name {name}" + ) + col_fill = col_name[0] + + lev_num = self.columns._get_level_number(col_level) + name_lst = [col_fill] * lev_num + col_name + missing = self.columns.nlevels - len(name_lst) + name_lst += [col_fill] * missing + name = tuple(name_lst) + + # to ndarray and maybe infer different dtype + level_values = lev._values + if level_values.dtype == np.object_: + level_values = lib.maybe_convert_objects(level_values) + + if lab is not None: + # if we have the codes, extract the values with a mask + level_values = algorithms.take( + level_values, lab, allow_fill=True, fill_value=lev._na_value + ) + + new_obj.insert( + 0, + name, + level_values, + allow_duplicates=allow_duplicates, + ) + + new_obj.index = new_index + if not inplace: + return new_obj + + return None + + # ---------------------------------------------------------------------- + # Reindex-based selection methods + + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + def isna(self) -> DataFrame: + result = self._constructor(self._mgr.isna(func=isna)) + return result.__finalize__(self, method="isna") + + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + def isnull(self) -> DataFrame: + """ + DataFrame.isnull is an alias for DataFrame.isna. + """ + return self.isna() + + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + def notna(self) -> DataFrame: + return ~self.isna() + + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + def notnull(self) -> DataFrame: + """ + DataFrame.notnull is an alias for DataFrame.notna. + """ + return ~self.isna() + + @overload + def dropna( + self, + *, + axis: Axis = ..., + how: str | NoDefault = ..., + thresh: int | NoDefault = ..., + subset: IndexLabel = ..., + inplace: Literal[False] = ..., + ) -> DataFrame: + ... + + @overload + def dropna( + self, + *, + axis: Axis = ..., + how: str | NoDefault = ..., + thresh: int | NoDefault = ..., + subset: IndexLabel = ..., + inplace: Literal[True], + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def dropna( + self, + axis: Axis = 0, + how: str | NoDefault = no_default, + thresh: int | NoDefault = no_default, + subset: IndexLabel = None, + inplace: bool = False, + ) -> DataFrame | None: + """ + Remove missing values. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine if rows or columns which contain missing values are + removed. + + * 0, or 'index' : Drop rows which contain missing values. + * 1, or 'columns' : Drop columns which contain missing value. + + .. versionchanged:: 1.0.0 + + Pass tuple or list to drop on multiple axes. + Only a single axis is allowed. + + how : {'any', 'all'}, default 'any' + Determine if row or column is removed from DataFrame, when we have + at least one NA or all NA. + + * 'any' : If any NA values are present, drop that row or column. + * 'all' : If all values are NA, drop that row or column. + + thresh : int, optional + Require that many non-NA values. Cannot be combined with how. + subset : column label or sequence of labels, optional + Labels along other axis to consider, e.g. if you are dropping rows + these would be a list of columns to include. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + + Returns + ------- + DataFrame or None + DataFrame with NA entries dropped from it or None if ``inplace=True``. + + See Also + -------- + DataFrame.isna: Indicate missing values. + DataFrame.notna : Indicate existing (non-missing) values. + DataFrame.fillna : Replace missing values. + Series.dropna : Drop missing values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], + ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], + ... "born": [pd.NaT, pd.Timestamp("1940-04-25"), + ... pd.NaT]}) + >>> df + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Drop the rows where at least one element is missing. + + >>> df.dropna() + name toy born + 1 Batman Batmobile 1940-04-25 + + Drop the columns where at least one element is missing. + + >>> df.dropna(axis='columns') + name + 0 Alfred + 1 Batman + 2 Catwoman + + Drop the rows where all elements are missing. + + >>> df.dropna(how='all') + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Keep only the rows with at least 2 non-NA values. + + >>> df.dropna(thresh=2) + name toy born + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Define in which columns to look for missing values. + + >>> df.dropna(subset=['name', 'toy']) + name toy born + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Keep the DataFrame with valid entries in the same variable. + + >>> df.dropna(inplace=True) + >>> df + name toy born + 1 Batman Batmobile 1940-04-25 + """ + if (how is not no_default) and (thresh is not no_default): + raise TypeError( + "You cannot set both the how and thresh arguments at the same time." + ) + + if how is no_default: + how = "any" + + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(axis, (tuple, list)): + # GH20987 + raise TypeError("supplying multiple axes to axis is no longer supported.") + + axis = self._get_axis_number(axis) + agg_axis = 1 - axis + + agg_obj = self + if subset is not None: + # subset needs to be list + if not is_list_like(subset): + subset = [subset] + ax = self._get_axis(agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(np.array(subset)[check].tolist()) + agg_obj = self.take(indices, axis=agg_axis) + + if thresh is not no_default: + count = agg_obj.count(axis=agg_axis) + mask = count >= thresh + elif how == "any": + # faster equivalent to 'agg_obj.count(agg_axis) == self.shape[agg_axis]' + mask = notna(agg_obj).all(axis=agg_axis, bool_only=False) + elif how == "all": + # faster equivalent to 'agg_obj.count(agg_axis) > 0' + mask = notna(agg_obj).any(axis=agg_axis, bool_only=False) + else: + raise ValueError(f"invalid how option: {how}") + + if np.all(mask): + result = self.copy() + else: + result = self.loc(axis=axis)[mask] + + if not inplace: + return result + self._update_inplace(result) + return None + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) + def drop_duplicates( + self, + subset: Hashable | Sequence[Hashable] | None = None, + keep: Literal["first", "last", False] = "first", + inplace: bool = False, + ignore_index: bool = False, + ) -> DataFrame | None: + """ + Return DataFrame with duplicate rows removed. + + Considering certain columns is optional. Indexes, including time indexes + are ignored. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to keep. + - ``first`` : Drop duplicates except for the first occurrence. + - ``last`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + DataFrame or None + DataFrame with duplicates removed or None if ``inplace=True``. + + See Also + -------- + DataFrame.value_counts: Count unique combinations of columns. + + Examples + -------- + Consider dataset containing ramen rating. + + >>> df = pd.DataFrame({ + ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5] + ... }) + >>> df + brand style rating + 0 Yum Yum cup 4.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + By default, it removes duplicate rows based on all columns. + + >>> df.drop_duplicates() + brand style rating + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + To remove duplicates on specific column(s), use ``subset``. + + >>> df.drop_duplicates(subset=['brand']) + brand style rating + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + + To remove duplicates and keep last occurrences, use ``keep``. + + >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') + brand style rating + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 4 Indomie pack 5.0 + """ + if self.empty: + return self.copy() + + inplace = validate_bool_kwarg(inplace, "inplace") + ignore_index = validate_bool_kwarg(ignore_index, "ignore_index") + duplicated = self.duplicated(subset, keep=keep) + + result = self[-duplicated] + if ignore_index: + result.index = default_index(len(result)) + + if inplace: + self._update_inplace(result) + return None + else: + return result + + def duplicated( + self, + subset: Hashable | Sequence[Hashable] | None = None, + keep: Literal["first", "last", False] = "first", + ) -> Series: + """ + Return boolean Series denoting duplicate rows. + + Considering certain columns is optional. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to mark. + + - ``first`` : Mark duplicates as ``True`` except for the first occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + Series + Boolean series for each duplicated rows. + + See Also + -------- + Index.duplicated : Equivalent method on index. + Series.duplicated : Equivalent method on Series. + Series.drop_duplicates : Remove duplicate values from Series. + DataFrame.drop_duplicates : Remove duplicate values from DataFrame. + + Examples + -------- + Consider dataset containing ramen rating. + + >>> df = pd.DataFrame({ + ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5] + ... }) + >>> df + brand style rating + 0 Yum Yum cup 4.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 + + By default, for each set of duplicated values, the first occurrence + is set on False and all others on True. + + >>> df.duplicated() + 0 False + 1 True + 2 False + 3 False + 4 False + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True. + + >>> df.duplicated(keep='last') + 0 True + 1 False + 2 False + 3 False + 4 False + dtype: bool + + By setting ``keep`` on False, all duplicates are True. + + >>> df.duplicated(keep=False) + 0 True + 1 True + 2 False + 3 False + 4 False + dtype: bool + + To find duplicates on specific column(s), use ``subset``. + + >>> df.duplicated(subset=['brand']) + 0 False + 1 True + 2 False + 3 True + 4 True + dtype: bool + """ + + if self.empty: + return self._constructor_sliced(dtype=bool) + + def f(vals) -> tuple[np.ndarray, int]: + labels, shape = algorithms.factorize(vals, size_hint=len(self)) + return labels.astype("i8", copy=False), len(shape) + + if subset is None: + # https://github.com/pandas-dev/pandas/issues/28770 + # Incompatible types in assignment (expression has type "Index", variable + # has type "Sequence[Any]") + subset = self.columns # type: ignore[assignment] + elif ( + not np.iterable(subset) + or isinstance(subset, str) + or isinstance(subset, tuple) + and subset in self.columns + ): + subset = (subset,) + + # needed for mypy since can't narrow types using np.iterable + subset = cast(Sequence, subset) + + # Verify all columns in subset exist in the queried dataframe + # Otherwise, raise a KeyError, same as if you try to __getitem__ with a + # key that doesn't exist. + diff = set(subset) - set(self.columns) + if diff: + raise KeyError(Index(diff)) + + if len(subset) == 1 and self.columns.is_unique: + # GH#45236 This is faster than get_group_index below + result = self[subset[0]].duplicated(keep) + result.name = None + else: + vals = (col.values for name, col in self.items() if name in subset) + labels, shape = map(list, zip(*map(f, vals))) + + ids = get_group_index( + labels, + # error: Argument 1 to "tuple" has incompatible type "List[_T]"; + # expected "Iterable[int]" + tuple(shape), # type: ignore[arg-type] + sort=False, + xnull=False, + ) + result = self._constructor_sliced(duplicated(ids, keep), index=self.index) + return result.__finalize__(self, method="duplicated") + + # ---------------------------------------------------------------------- + # Sorting + # error: Signature of "sort_values" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def sort_values( + self, + by: IndexLabel, + *, + axis: Axis = ..., + ascending=..., + inplace: Literal[False] = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + key: ValueKeyFunc = ..., + ) -> DataFrame: + ... + + @overload + def sort_values( + self, + by: IndexLabel, + *, + axis: Axis = ..., + ascending=..., + inplace: Literal[True], + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + key: ValueKeyFunc = ..., + ) -> None: + ... + + # TODO: Just move the sort_values doc here. + # error: Signature of "sort_values" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "by"]) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_values.__doc__) + def sort_values( # type: ignore[override] + self, + by: IndexLabel, + axis: Axis = 0, + ascending: bool | list[bool] | tuple[bool, ...] = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, + key: ValueKeyFunc = None, + ) -> DataFrame | None: + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + if not isinstance(by, list): + by = [by] + # error: Argument 1 to "len" has incompatible type "Union[bool, List[bool]]"; + # expected "Sized" + if is_sequence(ascending) and ( + len(by) != len(ascending) # type: ignore[arg-type] + ): + # error: Argument 1 to "len" has incompatible type "Union[bool, + # List[bool]]"; expected "Sized" + raise ValueError( + f"Length of ascending ({len(ascending)})" # type: ignore[arg-type] + f" != length of by ({len(by)})" + ) + if len(by) > 1: + + keys = [self._get_label_or_level_values(x, axis=axis) for x in by] + + # need to rewrap columns in Series to apply key function + if key is not None: + # error: List comprehension has incompatible type List[Series]; + # expected List[ndarray] + keys = [ + Series(k, name=name) # type: ignore[misc] + for (k, name) in zip(keys, by) + ] + + indexer = lexsort_indexer( + keys, orders=ascending, na_position=na_position, key=key + ) + elif len(by): + # len(by) == 1 + + by = by[0] + k = self._get_label_or_level_values(by, axis=axis) + + # need to rewrap column in Series to apply key function + if key is not None: + # error: Incompatible types in assignment (expression has type + # "Series", variable has type "ndarray") + k = Series(k, name=by) # type: ignore[assignment] + + if isinstance(ascending, (tuple, list)): + ascending = ascending[0] + + indexer = nargsort( + k, kind=kind, ascending=ascending, na_position=na_position, key=key + ) + else: + return self.copy() + + new_data = self._mgr.take( + indexer, axis=self._get_block_manager_axis(axis), verify=False + ) + + if ignore_index: + new_data.set_axis( + self._get_block_manager_axis(axis), default_index(len(indexer)) + ) + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="sort_values") + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: Literal[True], + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> None: + ... + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: Literal[False] = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> DataFrame: + ... + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: bool = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "sort_index" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_index( # type: ignore[override] + self, + axis: Axis = 0, + level: IndexLabel = None, + ascending: bool | Sequence[bool] = True, + inplace: bool = False, + kind: SortKind = "quicksort", + na_position: NaPosition = "last", + sort_remaining: bool = True, + ignore_index: bool = False, + key: IndexKeyFunc = None, + ) -> DataFrame | None: + """ + Sort object by labels (along an axis). + + Returns a new DataFrame sorted by label if `inplace` argument is + ``False``, otherwise updates the original DataFrame and returns None. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis along which to sort. The value 0 identifies the rows, + and 1 identifies the columns. + level : int or level name or list of ints or list of level names + If not None, sort on values in specified index level(s). + ascending : bool or list-like of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. `mergesort` and `stable` are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. For MultiIndex + inputs, the key is applied *per level*. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame or None + The original DataFrame sorted by the labels or None if ``inplace=True``. + + See Also + -------- + Series.sort_index : Sort Series by the index. + DataFrame.sort_values : Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. + + Examples + -------- + >>> df = pd.DataFrame([1, 2, 3, 4, 5], index=[100, 29, 234, 1, 150], + ... columns=['A']) + >>> df.sort_index() + A + 1 4 + 29 2 + 100 1 + 150 5 + 234 3 + + By default, it sorts in ascending order, to sort in descending order, + use ``ascending=False`` + + >>> df.sort_index(ascending=False) + A + 234 3 + 150 5 + 100 1 + 29 2 + 1 4 + + A key function can be specified which is applied to the index before + sorting. For a ``MultiIndex`` this is applied to each level separately. + + >>> df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=['A', 'b', 'C', 'd']) + >>> df.sort_index(key=lambda x: x.str.lower()) + a + A 1 + b 2 + C 3 + d 4 + """ + return super().sort_index( + axis=axis, + level=level, + ascending=ascending, + inplace=inplace, + kind=kind, + na_position=na_position, + sort_remaining=sort_remaining, + ignore_index=ignore_index, + key=key, + ) + + def value_counts( + self, + subset: Sequence[Hashable] | None = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + dropna: bool = True, + ) -> Series: + """ + Return a Series containing counts of unique rows in the DataFrame. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + dropna : bool, default True + Don’t include counts of rows that contain NA values. + + .. versionadded:: 1.3.0 + + Returns + ------- + Series + + See Also + -------- + Series.value_counts: Equivalent method on Series. + + Notes + ----- + The returned Series will have a MultiIndex with one level per input + column. By default, rows that contain any NA values are omitted from + the result. By default, the resulting Series will be in descending + order so that the first element is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6], + ... 'num_wings': [2, 0, 0, 0]}, + ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + cat 4 0 + ant 6 0 + + >>> df.value_counts() + num_legs num_wings + 4 0 2 + 2 2 1 + 6 0 1 + dtype: int64 + + >>> df.value_counts(sort=False) + num_legs num_wings + 2 2 1 + 4 0 2 + 6 0 1 + dtype: int64 + + >>> df.value_counts(ascending=True) + num_legs num_wings + 2 2 1 + 6 0 1 + 4 0 2 + dtype: int64 + + >>> df.value_counts(normalize=True) + num_legs num_wings + 4 0 0.50 + 2 2 0.25 + 6 0 0.25 + dtype: float64 + + With `dropna` set to `False` we can also count rows with NA values. + + >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'], + ... 'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']}) + >>> df + first_name middle_name + 0 John Smith + 1 Anne + 2 John + 3 Beth Louise + + >>> df.value_counts() + first_name middle_name + Beth Louise 1 + John Smith 1 + dtype: int64 + + >>> df.value_counts(dropna=False) + first_name middle_name + Anne NaN 1 + Beth Louise 1 + John Smith 1 + NaN 1 + dtype: int64 + """ + if subset is None: + subset = self.columns.tolist() + + counts = self.groupby(subset, dropna=dropna).grouper.size() + + if sort: + counts = counts.sort_values(ascending=ascending) + if normalize: + counts /= counts.sum() + + # Force MultiIndex for single column + if len(subset) == 1: + counts.index = MultiIndex.from_arrays( + [counts.index], names=[counts.index.name] + ) + + return counts + + def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + """ + Return the first `n` rows ordered by `columns` in descending order. + + Return the first `n` rows with the largest values in `columns`, in + descending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=False).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of rows to return. + columns : label or list of labels + Column label(s) to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - ``first`` : prioritize the first occurrence(s) + - ``last`` : prioritize the last occurrence(s) + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + Returns + ------- + DataFrame + The first `n` rows ordered by the given columns in descending + order. + + See Also + -------- + DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in + ascending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Notes + ----- + This function cannot be used with all column types. For example, when + specifying columns with `object` or `category` dtypes, ``TypeError`` is + raised. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nlargest`` to select the three + rows having the largest values in column "population". + + >>> df.nlargest(3, 'population') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nlargest(3, 'population', keep='last') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nlargest(3, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + + To order by the largest values in column "population" and then "GDP", + we can specify multiple columns like in the next example. + + >>> df.nlargest(3, ['population', 'GDP']) + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + """ + return algorithms.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest() + + def nsmallest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame: + """ + Return the first `n` rows ordered by `columns` in ascending order. + + Return the first `n` rows with the smallest values in `columns`, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=True).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of items to retrieve. + columns : list or str + Column name or names to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - ``first`` : take the first occurrence. + - ``last`` : take the last occurrence. + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.nlargest : Return the first `n` rows ordered by `columns` in + descending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 337000, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 337000 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nsmallest`` to select the + three rows having the smallest values in column "population". + + >>> df.nsmallest(3, 'population') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nsmallest(3, 'population', keep='last') + population GDP alpha-2 + Anguilla 11300 311 AI + Tuvalu 11300 38 TV + Nauru 337000 182 NR + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nsmallest(3, 'population', keep='all') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + Nauru 337000 182 NR + + To order by the smallest values in column "population" and then "GDP", we can + specify multiple columns like in the next example. + + >>> df.nsmallest(3, ['population', 'GDP']) + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Nauru 337000 182 NR + """ + return algorithms.SelectNFrame( + self, n=n, keep=keep, columns=columns + ).nsmallest() + + @doc( + Series.swaplevel, + klass=_shared_doc_kwargs["klass"], + extra_params=dedent( + """axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to swap levels on. 0 or 'index' for row-wise, 1 or + 'columns' for column-wise.""" + ), + examples=dedent( + """\ + Examples + -------- + >>> df = pd.DataFrame( + ... {"Grade": ["A", "B", "A", "C"]}, + ... index=[ + ... ["Final exam", "Final exam", "Coursework", "Coursework"], + ... ["History", "Geography", "History", "Geography"], + ... ["January", "February", "March", "April"], + ... ], + ... ) + >>> df + Grade + Final exam History January A + Geography February B + Coursework History March A + Geography April C + + In the following example, we will swap the levels of the indices. + Here, we will swap the levels column-wise, but levels can be swapped row-wise + in a similar manner. Note that column-wise is the default behaviour. + By not supplying any arguments for i and j, we swap the last and second to + last indices. + + >>> df.swaplevel() + Grade + Final exam January History A + February Geography B + Coursework March History A + April Geography C + + By supplying one argument, we can choose which index to swap the last + index with. We can for example swap the first index with the last one as + follows. + + >>> df.swaplevel(0) + Grade + January History Final exam A + February Geography Final exam B + March History Coursework A + April Geography Coursework C + + We can also define explicitly which indices we want to swap by supplying values + for both i and j. Here, we for example swap the first and second indices. + + >>> df.swaplevel(0, 1) + Grade + History Final exam January A + Geography Final exam February B + History Coursework March A + Geography Coursework April C""" + ), + ) + def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame: + result = self.copy() + + axis = self._get_axis_number(axis) + + if not isinstance(result._get_axis(axis), MultiIndex): # pragma: no cover + raise TypeError("Can only swap levels on a hierarchical axis.") + + if axis == 0: + assert isinstance(result.index, MultiIndex) + result.index = result.index.swaplevel(i, j) + else: + assert isinstance(result.columns, MultiIndex) + result.columns = result.columns.swaplevel(i, j) + return result + + def reorder_levels(self, order: Sequence[Axis], axis: Axis = 0) -> DataFrame: + """ + Rearrange index levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). + axis : {0 or 'index', 1 or 'columns'}, default 0 + Where to reorder levels. + + Returns + ------- + DataFrame + + Examples + -------- + >>> data = { + ... "class": ["Mammals", "Mammals", "Reptiles"], + ... "diet": ["Omnivore", "Carnivore", "Carnivore"], + ... "species": ["Humans", "Dogs", "Snakes"], + ... } + >>> df = pd.DataFrame(data, columns=["class", "diet", "species"]) + >>> df = df.set_index(["class", "diet"]) + >>> df + species + class diet + Mammals Omnivore Humans + Carnivore Dogs + Reptiles Carnivore Snakes + + Let's reorder the levels of the index: + + >>> df.reorder_levels(["diet", "class"]) + species + diet class + Omnivore Mammals Humans + Carnivore Mammals Dogs + Reptiles Snakes + """ + axis = self._get_axis_number(axis) + if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover + raise TypeError("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + + if axis == 0: + assert isinstance(result.index, MultiIndex) + result.index = result.index.reorder_levels(order) + else: + assert isinstance(result.columns, MultiIndex) + result.columns = result.columns.reorder_levels(order) + return result + + # ---------------------------------------------------------------------- + # Arithmetic Methods + + def _cmp_method(self, other, op): + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None): + return ops.frame_arith_method_with_reindex(self, other, op) + + axis = 1 # only relevant for Series other case + other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],)) + + self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + with np.errstate(all="ignore"): + bm = self._mgr.apply(array_op, right=right) + return self._constructor(bm) + + elif isinstance(right, DataFrame): + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex + + # TODO operate_blockwise expects a manager of the same type + with np.errstate(all="ignore"): + bm = self._mgr.operate_blockwise( + # error: Argument 1 to "operate_blockwise" of "ArrayManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "ArrayManager" + # error: Argument 1 to "operate_blockwise" of "BlockManager" has + # incompatible type "Union[ArrayManager, BlockManager]"; expected + # "BlockManager" + right._mgr, # type: ignore[arg-type] + array_op, + ) + return self._constructor(bm) + + elif isinstance(right, Series) and axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + with np.errstate(all="ignore"): + arrays = [ + array_op(_left, _right) + for _left, _right in zip(self._iter_column_arrays(), right) + ] + + elif isinstance(right, Series): + assert right.index.equals(self.index) # Handle other cases later + right = right._values + + with np.errstate(all="ignore"): + arrays = [array_op(left, right) for left in self._iter_column_arrays()] + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + return type(self)._from_arrays( + arrays, self.columns, self.index, verify_integrity=False + ) + + def _combine_frame(self, other: DataFrame, func, fill_value=None): + # at this point we have `self._indexed_same(other)` + + if fill_value is None: + # since _arith_op may be called in a loop, avoid function call + # overhead if possible by doing this check once + _arith_op = func + + else: + + def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) + left, right = ops.fill_binop(left, right, fill_value) + return func(left, right) + + new_data = self._dispatch_frame_op(other, _arith_op) + return new_data + + def _construct_result(self, result) -> DataFrame: + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + """ + out = self._constructor(result, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + out.index = self.index + return out + + def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = self // other + mod = self - div * other + return div, mod + + def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: + # Naive implementation, room for optimization + div = other // self + mod = other - div * self + return div, mod + + # ---------------------------------------------------------------------- + # Combination-Related + + @doc( + _shared_docs["compare"], + """ +Returns +------- +DataFrame + DataFrame that shows the differences stacked side by side. + + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + +Raises +------ +ValueError + When the two DataFrames don't have identical labels or shape. + +See Also +-------- +Series.compare : Compare with another Series and show differences. +DataFrame.equals : Test whether two objects contain the same elements. + +Notes +----- +Matching NaNs will not appear as a difference. + +Can only compare identically-labeled +(i.e. same shape, identical row and column labels) DataFrames + +Examples +-------- +>>> df = pd.DataFrame( +... {{ +... "col1": ["a", "a", "b", "b", "a"], +... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], +... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] +... }}, +... columns=["col1", "col2", "col3"], +... ) +>>> df + col1 col2 col3 +0 a 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 3.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +>>> df2 = df.copy() +>>> df2.loc[0, 'col1'] = 'c' +>>> df2.loc[2, 'col3'] = 4.0 +>>> df2 + col1 col2 col3 +0 c 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 4.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +Align the differences on columns + +>>> df.compare(df2) + col1 col3 + self other self other +0 a c NaN NaN +2 NaN NaN 3.0 4.0 + +Assign result_names + +>>> df.compare(df2, result_names=("left", "right")) + col1 col3 + left right left right +0 a c NaN NaN +2 NaN NaN 3.0 4.0 + +Stack the differences on rows + +>>> df.compare(df2, align_axis=0) + col1 col3 +0 self a NaN + other c NaN +2 self NaN 3.0 + other NaN 4.0 + +Keep the equal values + +>>> df.compare(df2, keep_equal=True) + col1 col3 + self other self other +0 a c 1.0 1.0 +2 b b 3.0 4.0 + +Keep all original rows and columns + +>>> df.compare(df2, keep_shape=True) + col1 col2 col3 + self other self other self other +0 a c NaN NaN NaN NaN +1 NaN NaN NaN NaN NaN NaN +2 NaN NaN NaN NaN 3.0 4.0 +3 NaN NaN NaN NaN NaN NaN +4 NaN NaN NaN NaN NaN NaN + +Keep all original rows and columns and also all original values + +>>> df.compare(df2, keep_shape=True, keep_equal=True) + col1 col2 col3 + self other self other self other +0 a c 1.0 1.0 1.0 1.0 +1 a a 2.0 2.0 2.0 2.0 +2 b b 3.0 3.0 3.0 4.0 +3 b b NaN NaN 4.0 4.0 +4 a a 5.0 5.0 5.0 5.0 +""", + klass=_shared_doc_kwargs["klass"], + ) + def compare( + self, + other: DataFrame, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame: + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + result_names=result_names, + ) + + def combine( + self, + other: DataFrame, + func: Callable[[Series, Series], Series | Hashable], + fill_value=None, + overwrite: bool = True, + ) -> DataFrame: + """ + Perform column-wise combine with another DataFrame. + + Combines a DataFrame with `other` DataFrame using `func` + to element-wise combine columns. The row and column indexes of the + resulting DataFrame will be the union of the two. + + Parameters + ---------- + other : DataFrame + The DataFrame to merge column-wise. + func : function + Function that takes two series as inputs and return a Series or a + scalar. Used to merge the two dataframes column by columns. + fill_value : scalar value, default None + The value to fill NaNs with prior to passing any column to the + merge func. + overwrite : bool, default True + If True, columns in `self` that do not exist in `other` will be + overwritten with NaNs. + + Returns + ------- + DataFrame + Combination of the provided DataFrames. + + See Also + -------- + DataFrame.combine_first : Combine two DataFrame objects and default to + non-null values in frame calling the method. + + Examples + -------- + Combine using a simple function that chooses the smaller column. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) + A B + 0 0 3 + 1 0 3 + + Example using a true element-wise combine function. + + >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, np.minimum) + A B + 0 1 2 + 1 0 3 + + Using `fill_value` fills Nones prior to passing the column to the + merge function. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 4.0 + + However, if the same element in both dataframes is None, that None + is preserved + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 3.0 + + Example that demonstrates the use of `overwrite` and behavior when + the axis differ between the dataframes. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) + >>> df1.combine(df2, take_smaller) + A B C + 0 NaN NaN NaN + 1 NaN 3.0 -10.0 + 2 NaN 3.0 1.0 + + >>> df1.combine(df2, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 -10.0 + 2 NaN 3.0 1.0 + + Demonstrating the preference of the passed in dataframe. + + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) + >>> df2.combine(df1, take_smaller) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 NaN + 2 NaN 3.0 NaN + + >>> df2.combine(df1, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + other_idxlen = len(other.index) # save for compare + + this, other = self.align(other, copy=False) + new_index = this.index + + if other.empty and len(new_index) == len(self.index): + return self.copy() + + if self.empty and len(other) == other_idxlen: + return other.copy() + + # sorts if possible + new_columns = this.columns.union(other.columns) + do_fill = fill_value is not None + result = {} + for col in new_columns: + series = this[col] + otherSeries = other[col] + + this_dtype = series.dtype + other_dtype = otherSeries.dtype + + this_mask = isna(series) + other_mask = isna(otherSeries) + + # don't overwrite columns unnecessarily + # DO propagate if this column is not in the intersection + if not overwrite and other_mask.all(): + result[col] = this[col].copy() + continue + + if do_fill: + series = series.copy() + otherSeries = otherSeries.copy() + series[this_mask] = fill_value + otherSeries[other_mask] = fill_value + + if col not in self.columns: + # If self DataFrame does not have col in other DataFrame, + # try to promote series, which is all NaN, as other_dtype. + new_dtype = other_dtype + try: + series = series.astype(new_dtype, copy=False) + except ValueError: + # e.g. new_dtype is integer types + pass + else: + # if we have different dtypes, possibly promote + new_dtype = find_common_type([this_dtype, other_dtype]) + series = series.astype(new_dtype, copy=False) + otherSeries = otherSeries.astype(new_dtype, copy=False) + + arr = func(series, otherSeries) + if isinstance(new_dtype, np.dtype): + # if new_dtype is an EA Dtype, then `func` is expected to return + # the correct dtype without any additional casting + # error: No overload variant of "maybe_downcast_to_dtype" matches + # argument types "Union[Series, Hashable]", "dtype[Any]" + arr = maybe_downcast_to_dtype( # type: ignore[call-overload] + arr, new_dtype + ) + + result[col] = arr + + # convert_objects just in case + return self._constructor(result, index=new_index, columns=new_columns) + + def combine_first(self, other: DataFrame) -> DataFrame: + """ + Update null elements with value in the same location in `other`. + + Combine two DataFrame objects by filling null values in one DataFrame + with non-null values from other DataFrame. The row and column indexes + of the resulting DataFrame will be the union of the two. The resulting + dataframe contains the 'first' dataframe values and overrides the + second one values where both first.loc[index, col] and + second.loc[index, col] are not missing values, upon calling + first.combine_first(second). + + Parameters + ---------- + other : DataFrame + Provided DataFrame to use to fill null values. + + Returns + ------- + DataFrame + The result of combining the provided DataFrame with the other object. + + See Also + -------- + DataFrame.combine : Perform series-wise operation on two DataFrames + using a given function. + + Examples + -------- + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine_first(df2) + A B + 0 1.0 3.0 + 1 0.0 4.0 + + Null values still persist if the location of that null value + does not exist in `other` + + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2]) + >>> df1.combine_first(df2) + A B C + 0 NaN 4.0 NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + import pandas.core.computation.expressions as expressions + + def combiner(x, y): + mask = extract_array(isna(x)) + + x_values = extract_array(x, extract_numpy=True) + y_values = extract_array(y, extract_numpy=True) + + # If the column y in other DataFrame is not in first DataFrame, + # just return y_values. + if y.name not in self.columns: + return y_values + + return expressions.where(mask, y_values, x_values) + + combined = self.combine(other, combiner, overwrite=False) + + dtypes = { + col: find_common_type([self.dtypes[col], other.dtypes[col]]) + for col in self.columns.intersection(other.columns) + if not is_dtype_equal(combined.dtypes[col], self.dtypes[col]) + } + + if dtypes: + combined = combined.astype(dtypes) + + return combined + + def update( + self, + other, + join: str = "left", + overwrite: bool = True, + filter_func=None, + errors: str = "ignore", + ) -> None: + """ + Modify in place using non-NA values from another DataFrame. + + Aligns on indices. There is no return value. + + Parameters + ---------- + other : DataFrame, or object coercible into a DataFrame + Should have at least one matching index/column label + with the original DataFrame. If a Series is passed, + its name attribute must be set, and that will be + used as the column name to align with the original DataFrame. + join : {'left'}, default 'left' + Only left join is implemented, keeping the index and columns of the + original object. + overwrite : bool, default True + How to handle non-NA values for overlapping keys: + + * True: overwrite original DataFrame's values + with values from `other`. + * False: only update values that are NA in + the original DataFrame. + + filter_func : callable(1d-array) -> bool 1d-array, optional + Can choose to replace values other than NA. Return True for values + that should be updated. + errors : {'raise', 'ignore'}, default 'ignore' + If 'raise', will raise a ValueError if the DataFrame and `other` + both contain non-NA data in the same place. + + Returns + ------- + None : method directly changes calling object + + Raises + ------ + ValueError + * When `errors='raise'` and there's overlapping non-NA data. + * When `errors` is not either `'ignore'` or `'raise'` + NotImplementedError + * If `join != 'left'` + + See Also + -------- + dict.update : Similar method for dictionaries. + DataFrame.merge : For column(s)-on-column(s) operations. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, 5, 6], + ... 'C': [7, 8, 9]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + The DataFrame's length does not increase as a result of the update, + only values at matching index/column labels are updated. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) + >>> df.update(new_df) + >>> df + A B + 0 a d + 1 b e + 2 c f + + For Series, its name attribute must be set. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2]) + >>> df.update(new_column) + >>> df + A B + 0 a d + 1 b y + 2 c e + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2]) + >>> df.update(new_df) + >>> df + A B + 0 a x + 1 b d + 2 c e + + If `other` contains NaNs the corresponding values are not updated + in the original dataframe. + + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4.0 + 1 2 500.0 + 2 3 6.0 + """ + import pandas.core.computation.expressions as expressions + + # TODO: Support other joins + if join != "left": # pragma: no cover + raise NotImplementedError("Only left join is supported") + if errors not in ["ignore", "raise"]: + raise ValueError("The parameter errors must be either 'ignore' or 'raise'") + + if not isinstance(other, DataFrame): + other = DataFrame(other) + + other = other.reindex_like(self) + + for col in self.columns: + this = self[col]._values + that = other[col]._values + if filter_func is not None: + with np.errstate(all="ignore"): + mask = ~filter_func(this) | isna(that) + else: + if errors == "raise": + mask_this = notna(that) + mask_that = notna(this) + if any(mask_this & mask_that): + raise ValueError("Data overlaps.") + + if overwrite: + mask = isna(that) + else: + mask = notna(this) + + # don't overwrite columns unnecessarily + if mask.all(): + continue + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In a future version, `df.iloc") + self.loc[:, col] = expressions.where(mask, this, that) + + # ---------------------------------------------------------------------- + # Data reshaping + @Appender( + """ +Examples +-------- +>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', +... 'Parrot', 'Parrot'], +... 'Max Speed': [380., 370., 24., 26.]}) +>>> df + Animal Max Speed +0 Falcon 380.0 +1 Falcon 370.0 +2 Parrot 24.0 +3 Parrot 26.0 +>>> df.groupby(['Animal']).mean() + Max Speed +Animal +Falcon 375.0 +Parrot 25.0 + +**Hierarchical Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, +... index=index) +>>> df + Max Speed +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +>>> df.groupby(level=0).mean() + Max Speed +Animal +Falcon 370.0 +Parrot 25.0 +>>> df.groupby(level="Type").mean() + Max Speed +Type +Captive 210.0 +Wild 185.0 + +We can also choose to include NA in group keys or not by setting +`dropna` parameter, the default setting is `True`. + +>>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] +>>> df = pd.DataFrame(l, columns=["a", "b", "c"]) + +>>> df.groupby(by=["b"]).sum() + a c +b +1.0 2 3 +2.0 2 5 + +>>> df.groupby(by=["b"], dropna=False).sum() + a c +b +1.0 2 3 +2.0 2 5 +NaN 1 4 + +>>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]] +>>> df = pd.DataFrame(l, columns=["a", "b", "c"]) + +>>> df.groupby(by="a").sum() + b c +a +a 13.0 13.0 +b 12.3 123.0 + +>>> df.groupby(by="a", dropna=False).sum() + b c +a +a 13.0 13.0 +b 12.3 123.0 +NaN 12.3 33.0 + +When using ``.apply()``, use ``group_keys`` to include or exclude the group keys. +The ``group_keys`` argument defaults to ``True`` (include). + +>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', +... 'Parrot', 'Parrot'], +... 'Max Speed': [380., 370., 24., 26.]}) +>>> df.groupby("Animal", group_keys=True).apply(lambda x: x) + Animal Max Speed +Animal +Falcon 0 Falcon 380.0 + 1 Falcon 370.0 +Parrot 2 Parrot 24.0 + 3 Parrot 26.0 + +>>> df.groupby("Animal", group_keys=False).apply(lambda x: x) + Animal Max Speed +0 Falcon 380.0 +1 Falcon 370.0 +2 Parrot 24.0 +3 Parrot 26.0 +""" + ) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis: Axis = 0, + level: IndexLabel | None = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool | lib.NoDefault = no_default, + squeeze: bool | lib.NoDefault = no_default, + observed: bool = False, + dropna: bool = True, + ) -> DataFrameGroupBy: + from pandas.core.groupby.generic import DataFrameGroupBy + + if squeeze is not no_default: + warnings.warn( + ( + "The `squeeze` parameter is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + squeeze = False + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + return DataFrameGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + dropna=dropna, + ) + + _shared_docs[ + "pivot" + ] = """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ----------%s + index : str or object or a list of str, optional + Column to use to make new frame's index. If None, uses + existing index. + + .. versionchanged:: 1.1.0 + Also accept list of index names. + + columns : str or object or a list of str + Column to use to make new frame's columns. + + .. versionchanged:: 1.1.0 + Also accept list of columns names. + + values : str, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index='foo', columns='bar', values='baz') + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar')['baz'] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"],values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 + + A ValueError is raised if there are any duplicates. + + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 + + Notice that the first two rows are the same for our `index` + and `columns` arguments. + + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ + + @Substitution("") + @Appender(_shared_docs["pivot"]) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def pivot(self, index=None, columns=None, values=None) -> DataFrame: + from pandas.core.reshape.pivot import pivot + + return pivot(self, index=index, columns=columns, values=values) + + _shared_docs[ + "pivot_table" + ] = """ + Create a spreadsheet-style pivot table as a DataFrame. + + The levels in the pivot table will be stored in MultiIndex objects + (hierarchical indexes) on the index and columns of the result DataFrame. + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function, list of functions, dict, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + If dict is passed, the key is column to aggregate and value + is function or list of functions. + fill_value : scalar, default None + Value to replace missing values with (in the resulting pivot table, + after aggregation). + margins : bool, default False + Add all row / columns (e.g. for subtotal / grand totals). + dropna : bool, default True + Do not include columns whose entries are all NaN. If True, + rows with a NaN value in any column will be omitted before + computing margins. + margins_name : str, default 'All' + Name of the row / column that will contain the totals + when margins is True. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 0.25.0 + + sort : bool, default True + Specifies if the result should be sorted. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + An Excel style pivot table. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.melt: Unpivot a DataFrame from wide to long format, + optionally leaving identifiers set. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + We can also fill missing values using the `fill_value` parameter. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum, fill_value=0) + >>> table + C large small + A B + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': np.mean}) + >>> table + D E + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': [min, max, np.mean]}) + >>> table + D E + mean max mean min + A C + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 + """ + + @Substitution("") + @Appender(_shared_docs["pivot_table"]) + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc="mean", + fill_value=None, + margins=False, + dropna=True, + margins_name="All", + observed=False, + sort=True, + ) -> DataFrame: + from pandas.core.reshape.pivot import pivot_table + + return pivot_table( + self, + values=values, + index=index, + columns=columns, + aggfunc=aggfunc, + fill_value=fill_value, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + sort=sort, + ) + + def stack(self, level: Level = -1, dropna: bool = True): + """ + Stack the prescribed level(s) from columns to index. + + Return a reshaped DataFrame or Series having a multi-level + index with one or more new inner-most levels compared to the current + DataFrame. The new inner-most levels are created by pivoting the + columns of the current dataframe: + + - if the columns have a single level, the output is a Series; + - if the columns have multiple levels, the new index + level(s) is (are) taken from the prescribed level(s) and + the output is a DataFrame. + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) to stack from the column axis onto the index + axis, defined as one index or label, or a list of indices + or labels. + dropna : bool, default True + Whether to drop rows in the resulting Frame/Series with + missing values. Stacking a column level onto the index + axis can create combinations of index and column values + that are missing from the original dataframe. See Examples + section. + + Returns + ------- + DataFrame or Series + Stacked dataframe or series. + + See Also + -------- + DataFrame.unstack : Unstack prescribed level(s) from index axis + onto column axis. + DataFrame.pivot : Reshape dataframe from long format to wide + format. + DataFrame.pivot_table : Create a spreadsheet-style pivot table + as a DataFrame. + + Notes + ----- + The function is named by analogy with a collection of books + being reorganized from being side by side on a horizontal + position (the columns of the dataframe) to being stacked + vertically on top of each other (in the index of the + dataframe). + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + **Single level columns** + + >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['cat', 'dog'], + ... columns=['weight', 'height']) + + Stacking a dataframe with a single level column axis returns a Series: + + >>> df_single_level_cols + weight height + cat 0 1 + dog 2 3 + >>> df_single_level_cols.stack() + cat weight 0 + height 1 + dog weight 2 + height 3 + dtype: int64 + + **Multi level columns: simple case** + + >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('weight', 'pounds')]) + >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], + ... index=['cat', 'dog'], + ... columns=multicol1) + + Stacking a dataframe with a multi-level column axis: + + >>> df_multi_level_cols1 + weight + kg pounds + cat 1 2 + dog 2 4 + >>> df_multi_level_cols1.stack() + weight + cat kg 1 + pounds 2 + dog kg 2 + pounds 4 + + **Missing values** + + >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('height', 'm')]) + >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + It is common to have missing values when stacking a dataframe + with multi-level columns, as the stacked dataframe typically + has more values than the original dataframe. Missing values + are filled with NaNs: + + >>> df_multi_level_cols2 + weight height + kg m + cat 1.0 2.0 + dog 3.0 4.0 + >>> df_multi_level_cols2.stack() + height weight + cat kg NaN 1.0 + m 2.0 NaN + dog kg NaN 3.0 + m 4.0 NaN + + **Prescribing the level(s) to be stacked** + + The first parameter controls which level or levels are stacked: + + >>> df_multi_level_cols2.stack(0) + kg m + cat height NaN 2.0 + weight 1.0 NaN + dog height NaN 4.0 + weight 3.0 NaN + >>> df_multi_level_cols2.stack([0, 1]) + cat height m 2.0 + weight kg 1.0 + dog height m 4.0 + weight kg 3.0 + dtype: float64 + + **Dropping missing values** + + >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + Note that rows where all values are missing are dropped by + default but this behaviour can be controlled via the dropna + keyword parameter: + + >>> df_multi_level_cols3 + weight height + kg m + cat NaN 1.0 + dog 2.0 3.0 + >>> df_multi_level_cols3.stack(dropna=False) + height weight + cat kg NaN NaN + m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + >>> df_multi_level_cols3.stack(dropna=True) + height weight + cat m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + """ + from pandas.core.reshape.reshape import ( + stack, + stack_multiple, + ) + + if isinstance(level, (tuple, list)): + result = stack_multiple(self, level, dropna=dropna) + else: + result = stack(self, level, dropna=dropna) + + return result.__finalize__(self, method="stack") + + def explode( + self, + column: IndexLabel, + ignore_index: bool = False, + ) -> DataFrame: + """ + Transform each element of a list-like to a row, replicating index values. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + column : IndexLabel + Column(s) to explode. + For multiple columns, specify a non-empty list with each element + be str or tuple, and all specified columns their list-like data + on same row of the frame must have matching length. + + .. versionadded:: 1.3.0 + Multi-column explode + + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + Exploded lists to rows of the subset columns; + index will be duplicated for these rows. + + Raises + ------ + ValueError : + * If columns of the frame are not unique. + * If specified columns to explode is empty list. + * If specified columns to explode have not matching count of + elements rowwise in the frame. + + See Also + -------- + DataFrame.unstack : Pivot a level of the (necessarily hierarchical) + index labels. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + Series.explode : Explode a DataFrame from list-like columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, sets, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of rows in the + output will be non-deterministic when exploding sets. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]], + ... 'B': 1, + ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]}) + >>> df + A B C + 0 [0, 1, 2] 1 [a, b, c] + 1 foo 1 NaN + 2 [] 1 [] + 3 [3, 4] 1 [d, e] + + Single-column explode. + + >>> df.explode('A') + A B C + 0 0 1 [a, b, c] + 0 1 1 [a, b, c] + 0 2 1 [a, b, c] + 1 foo 1 NaN + 2 NaN 1 [] + 3 3 1 [d, e] + 3 4 1 [d, e] + + Multi-column explode. + + >>> df.explode(list('AC')) + A B C + 0 0 1 a + 0 1 1 b + 0 2 1 c + 1 foo 1 NaN + 2 NaN 1 NaN + 3 3 1 d + 3 4 1 e + """ + if not self.columns.is_unique: + raise ValueError("columns must be unique") + + columns: list[Hashable] + if is_scalar(column) or isinstance(column, tuple): + columns = [column] + elif isinstance(column, list) and all( + is_scalar(c) or isinstance(c, tuple) for c in column + ): + if not column: + raise ValueError("column must be nonempty") + if len(column) > len(set(column)): + raise ValueError("column must be unique") + columns = column + else: + raise ValueError("column must be a scalar, tuple, or list thereof") + + df = self.reset_index(drop=True) + if len(columns) == 1: + result = df[columns[0]].explode() + else: + mylen = lambda x: len(x) if is_list_like(x) else -1 + counts0 = self[columns[0]].apply(mylen) + for c in columns[1:]: + if not all(counts0 == self[c].apply(mylen)): + raise ValueError("columns must have matching element counts") + result = DataFrame({c: df[c].explode() for c in columns}) + result = df.drop(columns, axis=1).join(result) + if ignore_index: + result.index = default_index(len(result)) + else: + result.index = self.index.take(result.index) + result = result.reindex(columns=self.columns, copy=False) + + return result.__finalize__(self, method="explode") + + def unstack(self, level: Level = -1, fill_value=None): + """ + Pivot a level of the (necessarily hierarchical) index labels. + + Returns a DataFrame having a new level of column labels whose inner-most level + consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + + Parameters + ---------- + level : int, str, or list of these, default -1 (last level) + Level(s) of index to unstack, can pass level name. + fill_value : int, str or dict + Replace NaN with this value if the unstack produces missing values. + + Returns + ------- + Series or DataFrame + + See Also + -------- + DataFrame.pivot : Pivot a table based on column values. + DataFrame.stack : Pivot a level of the column labels (inverse operation + from `unstack`). + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + + >>> s.unstack(level=-1) + a b + one 1.0 2.0 + two 3.0 4.0 + + >>> s.unstack(level=0) + one two + a 1.0 3.0 + b 2.0 4.0 + + >>> df = s.unstack(level=0) + >>> df.unstack() + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + """ + from pandas.core.reshape.reshape import unstack + + result = unstack(self, level, fill_value) + + return result.__finalize__(self, method="unstack") + + @Appender(_shared_docs["melt"] % {"caller": "df.melt(", "other": "melt"}) + def melt( + self, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level: Level = None, + ignore_index: bool = True, + ) -> DataFrame: + + return melt( + self, + id_vars=id_vars, + value_vars=value_vars, + var_name=var_name, + value_name=value_name, + col_level=col_level, + ignore_index=ignore_index, + ).__finalize__(self, method="melt") + + # ---------------------------------------------------------------------- + # Time series-related + + @doc( + Series.diff, + klass="DataFrame", + extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " + "Take difference over rows (0) or columns (1).\n", + other_klass="Series", + examples=dedent( + """ + Difference with previous row + + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df + a b c + 0 1 1 1 + 1 2 1 4 + 2 3 2 9 + 3 4 3 16 + 4 5 5 25 + 5 6 8 36 + + >>> df.diff() + a b c + 0 NaN NaN NaN + 1 1.0 0.0 3.0 + 2 1.0 1.0 5.0 + 3 1.0 1.0 7.0 + 4 1.0 2.0 9.0 + 5 1.0 3.0 11.0 + + Difference with previous column + + >>> df.diff(axis=1) + a b c + 0 NaN 0 0 + 1 NaN -1 3 + 2 NaN -1 7 + 3 NaN -1 13 + 4 NaN 0 20 + 5 NaN 2 28 + + Difference with 3rd previous row + + >>> df.diff(periods=3) + a b c + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 3.0 2.0 15.0 + 4 3.0 4.0 21.0 + 5 3.0 6.0 27.0 + + Difference with following row + + >>> df.diff(periods=-1) + a b c + 0 -1.0 0.0 -3.0 + 1 -1.0 -1.0 -5.0 + 2 -1.0 -1.0 -7.0 + 3 -1.0 -2.0 -9.0 + 4 -1.0 -3.0 -11.0 + 5 NaN NaN NaN + + Overflow in input dtype + + >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) + >>> df.diff() + a + 0 NaN + 1 255.0""" + ), + ) + def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + if not lib.is_integer(periods): + if not ( + is_float(periods) + # error: "int" has no attribute "is_integer" + and periods.is_integer() # type: ignore[attr-defined] + ): + raise ValueError("periods must be an integer") + periods = int(periods) + + axis = self._get_axis_number(axis) + if axis == 1 and periods != 0: + return self - self.shift(periods, axis=axis) + + new_data = self._mgr.diff(n=periods, axis=axis) + return self._constructor(new_data).__finalize__(self, "diff") + + # ---------------------------------------------------------------------- + # Function application + + def _gotitem( + self, + key: IndexLabel, + ndim: int, + subset: DataFrame | Series | None = None, + ) -> DataFrame | Series: + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + if subset is None: + subset = self + elif subset.ndim == 1: # is Series + return subset + + # TODO: _shallow_copy(subset)? + return subset[key] + + _agg_summary_and_see_also_doc = dedent( + """ + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + core.groupby.GroupBy : Perform operations over groups. + core.resample.Resampler : Perform operations over resampled bins. + core.window.Rolling : Perform operations over rolling window. + core.window.Expanding : Perform operations over expanding window. + core.window.ExponentialMovingWindow : Perform operation over exponential weighted + window. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> df = pd.DataFrame([[1, 2, 3], + ... [4, 5, 6], + ... [7, 8, 9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) + + Aggregate these functions over the rows. + + >>> df.agg(['sum', 'min']) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 + + Different aggregations per column. + + >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + A B + sum 12.0 NaN + min 1.0 2.0 + max NaN 8.0 + + Aggregate different functions over the columns and rename the index of the resulting + DataFrame. + + >>> df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean)) + A B C + x 7.0 NaN NaN + y NaN 2.0 NaN + z NaN NaN 6.0 + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + """ + ) + + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + see_also=_agg_summary_and_see_also_doc, + examples=_agg_examples_doc, + ) + def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + from pandas.core.apply import frame_apply + + axis = self._get_axis_number(axis) + + relabeling, func, columns, order = reconstruct_func(func, **kwargs) + + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.agg() + + if relabeling: + # This is to keep the order to columns occurrence unchanged, and also + # keep the order of new columns occurrence unchanged + + # For the return values of reconstruct_func, if relabeling is + # False, columns and order will be None. + assert columns is not None + assert order is not None + + result_in_dict = relabel_result(result, func, columns, order) + result = DataFrame(result_in_dict, index=columns) + + return result + + agg = aggregate + + # error: Signature of "any" incompatible with supertype "NDFrame" [override] + @overload # type: ignore[override] + def any( + self, + *, + axis: Axis = ..., + bool_only: bool | None = ..., + skipna: bool = ..., + level: None = ..., + **kwargs, + ) -> Series: + ... + + @overload + def any( + self, + *, + axis: Axis = ..., + bool_only: bool | None = ..., + skipna: bool = ..., + level: Level, + **kwargs, + ) -> DataFrame | Series: + ... + + @doc(NDFrame.any, **_shared_doc_kwargs) + def any( + self, + axis: Axis = 0, + bool_only: bool | None = None, + skipna: bool = True, + level: Level = None, + **kwargs, + ) -> DataFrame | Series: + ... + + @doc( + _shared_docs["transform"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) + def transform( + self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + ) -> DataFrame: + from pandas.core.apply import frame_apply + + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.transform() + assert isinstance(result, DataFrame) + return result + + def apply( + self, + func: AggFuncType, + axis: Axis = 0, + raw: bool = False, + result_type: Literal["expand", "reduce", "broadcast"] | None = None, + args=(), + **kwargs, + ): + """ + Apply a function along an axis of the DataFrame. + + Objects passed to the function are Series objects whose index is + either the DataFrame's index (``axis=0``) or the DataFrame's columns + (``axis=1``). By default (``result_type=None``), the final return type + is inferred from the return type of the applied function. Otherwise, + it depends on the `result_type` argument. + + Parameters + ---------- + func : function + Function to apply to each column or row. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the function is applied: + + * 0 or 'index': apply function to each column. + * 1 or 'columns': apply function to each row. + + raw : bool, default False + Determines if row or column is passed as a Series or ndarray object: + + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray objects + instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + result_type : {'expand', 'reduce', 'broadcast', None}, default None + These only act when ``axis=1`` (columns): + + * 'expand' : list-like results will be turned into columns. + * 'reduce' : returns a Series if possible rather than expanding + list-like results. This is the opposite of 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the DataFrame, the original index and columns will be + retained. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. + args : tuple + Positional arguments to pass to `func` in addition to the + array/series. + **kwargs + Additional keyword arguments to pass as keywords arguments to + `func`. + + Returns + ------- + Series or DataFrame + Result of applying ``func`` along the given axis of the + DataFrame. + + See Also + -------- + DataFrame.applymap: For elementwise operations. + DataFrame.aggregate: Only perform aggregating type operations. + DataFrame.transform: Only perform transforming type operations. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) + >>> df + A B + 0 4 9 + 1 4 9 + 2 4 9 + + Using a numpy universal function (in this case the same as + ``np.sqrt(df)``): + + >>> df.apply(np.sqrt) + A B + 0 2.0 3.0 + 1 2.0 3.0 + 2 2.0 3.0 + + Using a reducing function on either axis + + >>> df.apply(np.sum, axis=0) + A 12 + B 27 + dtype: int64 + + >>> df.apply(np.sum, axis=1) + 0 13 + 1 13 + 2 13 + dtype: int64 + + Returning a list-like will result in a Series + + >>> df.apply(lambda x: [1, 2], axis=1) + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + dtype: object + + Passing ``result_type='expand'`` will expand list-like results + to columns of a Dataframe + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand') + 0 1 + 0 1 2 + 1 1 2 + 2 1 2 + + Returning a Series inside the function is similar to passing + ``result_type='expand'``. The resulting column names + will be the Series index. + + >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) + foo bar + 0 1 2 + 1 1 2 + 2 1 2 + + Passing ``result_type='broadcast'`` will ensure the same shape + result, whether list-like or scalar is returned by the function, + and broadcast it along the axis. The resulting column names will + be the originals. + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast') + A B + 0 1 2 + 1 1 2 + 2 1 2 + """ + from pandas.core.apply import frame_apply + + op = frame_apply( + self, + func=func, + axis=axis, + raw=raw, + result_type=result_type, + args=args, + kwargs=kwargs, + ) + return op.apply().__finalize__(self, method="apply") + + def applymap( + self, func: PythonFuncType, na_action: str | None = None, **kwargs + ) -> DataFrame: + """ + Apply a function to a Dataframe elementwise. + + This method applies a function that accepts and returns a scalar + to every element of a DataFrame. + + Parameters + ---------- + func : callable + Python function, returns a single value from a single value. + na_action : {None, 'ignore'}, default None + If ‘ignore’, propagate NaN values, without passing them to func. + + .. versionadded:: 1.2 + + **kwargs + Additional keyword arguments to pass as keywords arguments to + `func`. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + Transformed DataFrame. + + See Also + -------- + DataFrame.apply : Apply a function along input axis of DataFrame. + + Examples + -------- + >>> df = pd.DataFrame([[1, 2.12], [3.356, 4.567]]) + >>> df + 0 1 + 0 1.000 2.120 + 1 3.356 4.567 + + >>> df.applymap(lambda x: len(str(x))) + 0 1 + 0 3 4 + 1 5 5 + + Like Series.map, NA values can be ignored: + + >>> df_copy = df.copy() + >>> df_copy.iloc[0, 0] = pd.NA + >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore') + 0 1 + 0 NaN 4 + 1 5.0 5 + + Note that a vectorized version of `func` often exists, which will + be much faster. You could square each number elementwise. + + >>> df.applymap(lambda x: x**2) + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + + But it's better to avoid applymap in that case. + + >>> df ** 2 + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + """ + if na_action not in {"ignore", None}: + raise ValueError( + f"na_action must be 'ignore' or None. Got {repr(na_action)}" + ) + ignore_na = na_action == "ignore" + func = functools.partial(func, **kwargs) + + # if we have a dtype == 'M8[ns]', provide boxed values + def infer(x): + if x.empty: + return lib.map_infer(x, func, ignore_na=ignore_na) + return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na) + + return self.apply(infer).__finalize__(self, "applymap") + + # ---------------------------------------------------------------------- + # Merging / joining methods + + def append( + self, + other, + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False, + ) -> DataFrame: + """ + Append rows of `other` to the end of caller, returning a new object. + + .. deprecated:: 1.4.0 + Use :func:`concat` instead. For further details see + :ref:`whatsnew_140.deprecations.frame_series_append` + + Columns in `other` that are not in the caller are added as new columns. + + Parameters + ---------- + other : DataFrame or Series/dict-like object, or list of these + The data to append. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + verify_integrity : bool, default False + If True, raise ValueError on creating index with duplicates. + sort : bool, default False + Sort columns if the columns of `self` and `other` are not aligned. + + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + Returns + ------- + DataFrame + A new DataFrame consisting of the rows of caller and the rows of `other`. + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + If a list of dict/series is passed and the keys are all contained in + the DataFrame's index, the order of the columns in the resulting + DataFrame will be unchanged. + + Iteratively appending rows to a DataFrame can be more computationally + intensive than a single concatenate. A better solution is to append + those rows to a list and then concatenate the list with the original + DataFrame all at once. + + Examples + -------- + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y']) + >>> df + A B + x 1 2 + y 3 4 + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y']) + >>> df.append(df2) + A B + x 1 2 + y 3 4 + x 5 6 + y 7 8 + + With `ignore_index` set to True: + + >>> df.append(df2, ignore_index=True) + A B + 0 1 2 + 1 3 4 + 2 5 6 + 3 7 8 + + The following, while not recommended methods for generating DataFrames, + show two ways to generate a DataFrame from multiple data sources. + + Less efficient: + + >>> df = pd.DataFrame(columns=['A']) + >>> for i in range(5): + ... df = df.append({'A': i}, ignore_index=True) + >>> df + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + + More efficient: + + >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], + ... ignore_index=True) + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + """ + warnings.warn( + "The frame.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.concat instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._append(other, ignore_index, verify_integrity, sort) + + def _append( + self, + other, + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False, + ) -> DataFrame: + if isinstance(other, (Series, dict)): + if isinstance(other, dict): + if not ignore_index: + raise TypeError("Can only append a dict if ignore_index=True") + other = Series(other) + if other.name is None and not ignore_index: + raise TypeError( + "Can only append a Series if ignore_index=True " + "or if the Series has a name" + ) + + index = Index([other.name], name=self.index.name) + row_df = other.to_frame().T + # infer_objects is needed for + # test_append_empty_frame_to_series_with_dateutil_tz + other = row_df.infer_objects().rename_axis(index.names, copy=False) + elif isinstance(other, list): + if not other: + pass + elif not isinstance(other[0], DataFrame): + other = DataFrame(other) + if self.index.name is not None and not ignore_index: + other.index.name = self.index.name + + from pandas.core.reshape.concat import concat + + if isinstance(other, (list, tuple)): + to_concat = [self, *other] + else: + to_concat = [self, other] + + result = concat( + to_concat, + ignore_index=ignore_index, + verify_integrity=verify_integrity, + sort=sort, + ) + return result.__finalize__(self, method="append") + + def join( + self, + other: DataFrame | Series | list[DataFrame | Series], + on: IndexLabel | None = None, + how: str = "left", + lsuffix: str = "", + rsuffix: str = "", + sort: bool = False, + validate: str | None = None, + ) -> DataFrame: + """ + Join columns of another DataFrame. + + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by + passing a list. + + Parameters + ---------- + other : DataFrame, Series, or a list containing any combination of them + Index should be similar to one of the columns in this one. If a + Series is passed, its name attribute must be set, and that will be + used as the column name in the resulting joined DataFrame. + on : str, list of str, or array-like, optional + Column or index level name(s) in the caller to join on the index + in `other`, otherwise joins index-on-index. If multiple + values given, the `other` DataFrame must have a MultiIndex. Can + pass an array as the join key if it is not already contained in + the calling DataFrame. Like an Excel VLOOKUP operation. + how : {'left', 'right', 'outer', 'inner'}, default 'left' + How to handle the operation of the two objects. + + * left: use calling frame's index (or column if on is specified) + * right: use `other`'s index. + * outer: form union of calling frame's index (or column if on is + specified) with `other`'s index, and sort it. + lexicographically. + * inner: form intersection of calling frame's index (or column if + on is specified) with `other`'s index, preserving the order + of the calling's one. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + + .. versionadded:: 1.2.0 + + lsuffix : str, default '' + Suffix to use from left frame's overlapping columns. + rsuffix : str, default '' + Suffix to use from right frame's overlapping columns. + sort : bool, default False + Order result DataFrame lexicographically by the join key. If False, + the order of the join key depends on the join type (how keyword). + validate : str, optional + If specified, checks if join is of specified type. + * "one_to_one" or "1:1": check if join keys are unique in both left + and right datasets. + * "one_to_many" or "1:m": check if join keys are unique in left dataset. + * "many_to_one" or "m:1": check if join keys are unique in right dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + .. versionadded:: 1.5.0 + + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. + + See Also + -------- + DataFrame.merge : For column(s)-on-column(s) operations. + + Notes + ----- + Parameters `on`, `lsuffix`, and `rsuffix` are not supported when + passing a list of `DataFrame` objects. + + Support for specifying index levels as the `on` parameter was added + in version 0.23.0. + + Examples + -------- + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K2 A2 + 3 K3 A3 + 4 K4 A4 + 5 K5 A5 + + >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], + ... 'B': ['B0', 'B1', 'B2']}) + + >>> other + key B + 0 K0 B0 + 1 K1 B1 + 2 K2 B2 + + Join DataFrames using their indexes. + + >>> df.join(other, lsuffix='_caller', rsuffix='_other') + key_caller A key_other B + 0 K0 A0 K0 B0 + 1 K1 A1 K1 B1 + 2 K2 A2 K2 B2 + 3 K3 A3 NaN NaN + 4 K4 A4 NaN NaN + 5 K5 A5 NaN NaN + + If we want to join using the key columns, we need to set key to be + the index in both `df` and `other`. The joined DataFrame will have + key as its index. + + >>> df.set_index('key').join(other.set_index('key')) + A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN + + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's + index in the result. + + >>> df.join(other.set_index('key'), on='key') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K2 A2 B2 + 3 K3 A3 NaN + 4 K4 A4 NaN + 5 K5 A5 NaN + + Using non-unique key values shows how they are matched. + + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K1 A2 + 3 K3 A3 + 4 K0 A4 + 5 K1 A5 + + >>> df.join(other.set_index('key'), on='key', validate='m:1') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K1 A2 B1 + 3 K3 A3 NaN + 4 K0 A4 B0 + 5 K1 A5 B1 + """ + return self._join_compat( + other, + on=on, + how=how, + lsuffix=lsuffix, + rsuffix=rsuffix, + sort=sort, + validate=validate, + ) + + def _join_compat( + self, + other: DataFrame | Series | Iterable[DataFrame | Series], + on: IndexLabel | None = None, + how: str = "left", + lsuffix: str = "", + rsuffix: str = "", + sort: bool = False, + validate: str | None = None, + ): + from pandas.core.reshape.concat import concat + from pandas.core.reshape.merge import merge + + if isinstance(other, Series): + if other.name is None: + raise ValueError("Other Series must have a name") + other = DataFrame({other.name: other}) + + if isinstance(other, DataFrame): + if how == "cross": + return merge( + self, + other, + how=how, + on=on, + suffixes=(lsuffix, rsuffix), + sort=sort, + validate=validate, + ) + return merge( + self, + other, + left_on=on, + how=how, + left_index=on is None, + right_index=True, + suffixes=(lsuffix, rsuffix), + sort=sort, + validate=validate, + ) + else: + if on is not None: + raise ValueError( + "Joining multiple DataFrames only supported for joining on index" + ) + + if rsuffix or lsuffix: + raise ValueError( + "Suffixes not supported when joining multiple DataFrames" + ) + + # Mypy thinks the RHS is a + # "Union[DataFrame, Series, Iterable[Union[DataFrame, Series]]]" whereas + # the LHS is an "Iterable[DataFrame]", but in reality both types are + # "Iterable[Union[DataFrame, Series]]" due to the if statements + frames = [cast("DataFrame | Series", self)] + list(other) + + can_concat = all(df.index.is_unique for df in frames) + + # join indexes only using concat + if can_concat: + if how == "left": + res = concat( + frames, axis=1, join="outer", verify_integrity=True, sort=sort + ) + return res.reindex(self.index, copy=False) + else: + return concat( + frames, axis=1, join=how, verify_integrity=True, sort=sort + ) + + joined = frames[0] + + for frame in frames[1:]: + joined = merge( + joined, + frame, + how=how, + left_index=True, + right_index=True, + validate=validate, + ) + + return joined + + @Substitution("") + @Appender(_merge_doc, indents=2) + def merge( + self, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate: str | None = None, + ) -> DataFrame: + from pandas.core.reshape.merge import merge + + return merge( + self, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + copy=copy, + indicator=indicator, + validate=validate, + ) + + def round( + self, decimals: int | dict[IndexLabel, int] | Series = 0, *args, **kwargs + ) -> DataFrame: + """ + Round a DataFrame to a variable number of decimal places. + + Parameters + ---------- + decimals : int, dict, Series + Number of decimal places to round each column to. If an int is + given, round each column to the same number of places. + Otherwise dict and Series round to variable numbers of places. + Column names should be in the keys if `decimals` is a + dict-like, or in the index if `decimals` is a Series. Any + columns not included in `decimals` will be left as is. Elements + of `decimals` which are not columns of the input will be + ignored. + *args + Additional keywords have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. + + Returns + ------- + DataFrame + A DataFrame with the affected columns rounded to the specified + number of decimal places. + + See Also + -------- + numpy.around : Round a numpy array to the given number of decimals. + Series.round : Round a Series to the given number of decimals. + + Examples + -------- + >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) + >>> df + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specified with the column names as index and the number of + decimal places as value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) + >>> df.round(decimals) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + """ + from pandas.core.reshape.concat import concat + + def _dict_round(df: DataFrame, decimals): + for col, vals in df.items(): + try: + yield _series_round(vals, decimals[col]) + except KeyError: + yield vals + + def _series_round(ser: Series, decimals: int): + if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype): + return ser.round(decimals) + return ser + + nv.validate_round(args, kwargs) + + if isinstance(decimals, (dict, Series)): + if isinstance(decimals, Series) and not decimals.index.is_unique: + raise ValueError("Index of decimals must be unique") + if is_dict_like(decimals) and not all( + is_integer(value) for _, value in decimals.items() + ): + raise TypeError("Values in decimals must be integers") + new_cols = list(_dict_round(self, decimals)) + elif is_integer(decimals): + # Dispatch to Series.round + new_cols = [_series_round(v, decimals) for _, v in self.items()] + else: + raise TypeError("decimals must be an integer, a dict-like or a Series") + + if len(new_cols) > 0: + return self._constructor( + concat(new_cols, axis=1), index=self.index, columns=self.columns + ).__finalize__(self, method="round") + else: + return self + + # ---------------------------------------------------------------------- + # Statistical methods, etc. + + def corr( + self, + method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", + min_periods: int = 1, + numeric_only: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: + """ + Compute pairwise correlation of columns, excluding NA/null values. + + Parameters + ---------- + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. Note that the returned matrix from corr + will have 1 along the diagonals and will be symmetric + regardless of the callable's behavior. + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. Currently only available for Pearson + and Spearman correlation. + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + .. deprecated:: 1.5.0 + The default value of ``numeric_only`` will be ``False`` in a future + version of pandas. + + Returns + ------- + DataFrame + Correlation matrix. + + See Also + -------- + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + Series.corr : Compute the correlation between two Series. + + Notes + ----- + Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations. + + * `Pearson correlation coefficient `_ + * `Kendall rank correlation coefficient `_ + * `Spearman's rank correlation coefficient `_ + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], + ... columns=['dogs', 'cats']) + >>> df.corr(method=histogram_intersection) + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 + + >>> df = pd.DataFrame([(1, 1), (2, np.nan), (np.nan, 3), (4, 4)], + ... columns=['dogs', 'cats']) + >>> df.corr(min_periods=3) + dogs cats + dogs 1.0 NaN + cats NaN 1.0 + """ # noqa:E501 + numeric_only_bool = com.resolve_numeric_only(numeric_only) + data = self._get_numeric_data() if numeric_only_bool else self + if numeric_only is lib.no_default and len(data.columns) < len(self.columns): + com.deprecate_numeric_only_default(type(self), "corr") + + cols = data.columns + idx = cols.copy() + mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) + + if method == "pearson": + correl = libalgos.nancorr(mat, minp=min_periods) + elif method == "spearman": + correl = libalgos.nancorr_spearman(mat, minp=min_periods) + elif method == "kendall" or callable(method): + if min_periods is None: + min_periods = 1 + mat = mat.T + corrf = nanops.get_corr_func(method) + K = len(cols) + correl = np.empty((K, K), dtype=float) + mask = np.isfinite(mat) + for i, ac in enumerate(mat): + for j, bc in enumerate(mat): + if i > j: + continue + + valid = mask[i] & mask[j] + if valid.sum() < min_periods: + c = np.nan + elif i == j: + c = 1.0 + elif not valid.all(): + c = corrf(ac[valid], bc[valid]) + else: + c = corrf(ac, bc) + correl[i, j] = c + correl[j, i] = c + else: + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + return self._constructor(correl, index=idx, columns=cols) + + def cov( + self, + min_periods: int | None = None, + ddof: int | None = 1, + numeric_only: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: + """ + Compute pairwise covariance of columns, excluding NA/null values. + + Compute the pairwise covariance among the series of a DataFrame. + The returned data frame is the `covariance matrix + `__ of the columns + of the DataFrame. + + Both NA and null values are automatically excluded from the + calculation. (See the note below about bias from missing values.) + A threshold can be set for the minimum number of + observations for each value created. Comparisons with observations + below this threshold will be returned as ``NaN``. + + This method is generally used for the analysis of time series data to + understand the relationship between different measures + across time. + + Parameters + ---------- + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. + + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + .. versionadded:: 1.1.0 + + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + .. deprecated:: 1.5.0 + The default value of ``numeric_only`` will be ``False`` in a future + version of pandas. + + Returns + ------- + DataFrame + The covariance matrix of the series of the DataFrame. + + See Also + -------- + Series.cov : Compute covariance with another Series. + core.window.ewm.ExponentialMovingWindow.cov : Exponential weighted sample + covariance. + core.window.expanding.Expanding.cov : Expanding sample covariance. + core.window.rolling.Rolling.cov : Rolling sample covariance. + + Notes + ----- + Returns the covariance matrix of the DataFrame's time series. + The covariance is normalized by N-ddof. + + For DataFrames that have Series that are missing data (assuming that + data is `missing at random + `__) + the returned covariance matrix will be an unbiased estimate + of the variance and covariance between the member Series. + + However, for many applications this estimate may not be acceptable + because the estimate covariance matrix is not guaranteed to be positive + semi-definite. This could lead to estimate correlations having + absolute values which are greater than one, and/or a non-invertible + covariance matrix. See `Estimation of covariance matrices + `__ for more details. + + Examples + -------- + >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], + ... columns=['dogs', 'cats']) + >>> df.cov() + dogs cats + dogs 0.666667 -1.000000 + cats -1.000000 1.666667 + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(1000, 5), + ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df.cov() + a b c d e + a 0.998438 -0.020161 0.059277 -0.008943 0.014144 + b -0.020161 1.059352 -0.008543 -0.024738 0.009826 + c 0.059277 -0.008543 1.010670 -0.001486 -0.000271 + d -0.008943 -0.024738 -0.001486 0.921297 -0.013692 + e 0.014144 0.009826 -0.000271 -0.013692 0.977795 + + **Minimum number of periods** + + This method also supports an optional ``min_periods`` keyword + that specifies the required minimum number of non-NA observations for + each column pair in order to have a valid result: + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(20, 3), + ... columns=['a', 'b', 'c']) + >>> df.loc[df.index[:5], 'a'] = np.nan + >>> df.loc[df.index[5:10], 'b'] = np.nan + >>> df.cov(min_periods=12) + a b c + a 0.316741 NaN -0.150812 + b NaN 1.248003 0.191417 + c -0.150812 0.191417 0.895202 + """ + numeric_only_bool = com.resolve_numeric_only(numeric_only) + data = self._get_numeric_data() if numeric_only_bool else self + if numeric_only is lib.no_default and len(data.columns) < len(self.columns): + com.deprecate_numeric_only_default(type(self), "cov") + + cols = data.columns + idx = cols.copy() + mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) + + if notna(mat).all(): + if min_periods is not None and min_periods > len(mat): + base_cov = np.empty((mat.shape[1], mat.shape[1])) + base_cov.fill(np.nan) + else: + base_cov = np.cov(mat.T, ddof=ddof) + base_cov = base_cov.reshape((len(cols), len(cols))) + else: + base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods) + + return self._constructor(base_cov, index=idx, columns=cols) + + def corrwith( + self, + other: DataFrame | Series, + axis: Axis = 0, + drop: bool = False, + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", + numeric_only: bool | lib.NoDefault = lib.no_default, + ) -> Series: + """ + Compute pairwise correlation. + + Pairwise correlation is computed between rows or columns of + DataFrame with rows or columns of Series or DataFrame. DataFrames + are first aligned along both axes before computing the + correlations. + + Parameters + ---------- + other : DataFrame, Series + Object with which to compute correlations. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' to compute row-wise, 1 or 'columns' for + column-wise. + drop : bool, default False + Drop missing indices from result. + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. + + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + .. deprecated:: 1.5.0 + The default value of ``numeric_only`` will be ``False`` in a future + version of pandas. + + Returns + ------- + Series + Pairwise correlations. + + See Also + -------- + DataFrame.corr : Compute pairwise correlation of columns. + + Examples + -------- + >>> index = ["a", "b", "c", "d", "e"] + >>> columns = ["one", "two", "three", "four"] + >>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), index=index, columns=columns) + >>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), index=index[:4], columns=columns) + >>> df1.corrwith(df2) + one 1.0 + two 1.0 + three 1.0 + four 1.0 + dtype: float64 + + >>> df2.corrwith(df1, axis=1) + a 1.0 + b 1.0 + c 1.0 + d 1.0 + e NaN + dtype: float64 + """ # noqa:E501 + axis = self._get_axis_number(axis) + numeric_only_bool = com.resolve_numeric_only(numeric_only) + this = self._get_numeric_data() if numeric_only_bool else self + if numeric_only is lib.no_default and len(this.columns) < len(self.columns): + com.deprecate_numeric_only_default(type(self), "corrwith") + + if isinstance(other, Series): + return this.apply(lambda x: other.corr(x, method=method), axis=axis) + + if numeric_only_bool: + other = other._get_numeric_data() + left, right = this.align(other, join="inner", copy=False) + + if axis == 1: + left = left.T + right = right.T + + if method == "pearson": + # mask missing values + left = left + right * 0 + right = right + left * 0 + + # demeaned data + ldem = left - left.mean(numeric_only=numeric_only_bool) + rdem = right - right.mean(numeric_only=numeric_only_bool) + + num = (ldem * rdem).sum() + dom = ( + (left.count() - 1) + * left.std(numeric_only=numeric_only_bool) + * right.std(numeric_only=numeric_only_bool) + ) + + correl = num / dom + + elif method in ["kendall", "spearman"] or callable(method): + + def c(x): + return nanops.nancorr(x[0], x[1], method=method) + + correl = self._constructor_sliced( + map(c, zip(left.values.T, right.values.T)), index=left.columns + ) + + else: + raise ValueError( + f"Invalid method {method} was passed, " + "valid methods are: 'pearson', 'kendall', " + "'spearman', or callable" + ) + + if not drop: + # Find non-matching labels along the given axis + # and append missing correlations (GH 22375) + raxis = 1 if axis == 0 else 0 + result_index = this._get_axis(raxis).union(other._get_axis(raxis)) + idx_diff = result_index.difference(correl.index) + + if len(idx_diff) > 0: + correl = correl._append( + Series([np.nan] * len(idx_diff), index=idx_diff) + ) + + return correl + + # ---------------------------------------------------------------------- + # ndarray-like stats methods + + def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False): + """ + Count non-NA cells for each column or row. + + The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending + on `pandas.options.mode.use_inf_as_na`) are considered NA. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index' counts are generated for each column. + If 1 or 'columns' counts are generated for each row. + level : int or str, optional + If the axis is a `MultiIndex` (hierarchical), count along a + particular `level`, collapsing into a `DataFrame`. + A `str` specifies the level name. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + Returns + ------- + Series or DataFrame + For each column/row the number of non-NA/null entries. + If `level` is specified returns a `DataFrame`. + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.value_counts: Count unique combinations of columns. + DataFrame.shape: Number of DataFrame rows and columns (including NA + elements). + DataFrame.isna: Boolean same-sized DataFrame showing places of NA + elements. + + Examples + -------- + Constructing DataFrame from a dictionary: + + >>> df = pd.DataFrame({"Person": + ... ["John", "Myla", "Lewis", "John", "Myla"], + ... "Age": [24., np.nan, 21., 33, 26], + ... "Single": [False, True, True, True, False]}) + >>> df + Person Age Single + 0 John 24.0 False + 1 Myla NaN True + 2 Lewis 21.0 True + 3 John 33.0 True + 4 Myla 26.0 False + + Notice the uncounted NA values: + + >>> df.count() + Person 5 + Age 4 + Single 5 + dtype: int64 + + Counts for each **row**: + + >>> df.count(axis='columns') + 0 3 + 1 2 + 2 3 + 3 3 + 4 3 + dtype: int64 + """ + axis = self._get_axis_number(axis) + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.count(level=1) should use df.groupby(level=1).count().", + FutureWarning, + stacklevel=find_stack_level(), + ) + res = self._count_level(level, axis=axis, numeric_only=numeric_only) + return res.__finalize__(self, method="count") + + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + # GH #423 + if len(frame._get_axis(axis)) == 0: + result = self._constructor_sliced(0, index=frame._get_agg_axis(axis)) + else: + if frame._is_mixed_type or frame._mgr.any_extension_types: + # the or any_extension_types is really only hit for single- + # column frames with an extension array + result = notna(frame).sum(axis=axis) + else: + # GH13407 + series_counts = notna(frame).sum(axis=axis) + counts = series_counts.values + result = self._constructor_sliced( + counts, index=frame._get_agg_axis(axis) + ) + + return result.astype("int64").__finalize__(self, method="count") + + def _count_level(self, level: Level, axis: int = 0, numeric_only: bool = False): + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + count_axis = frame._get_axis(axis) + agg_axis = frame._get_agg_axis(axis) + + if not isinstance(count_axis, MultiIndex): + raise TypeError( + f"Can only count levels on hierarchical {self._get_axis_name(axis)}." + ) + + # Mask NaNs: Mask rows or columns where the index level is NaN, and all + # values in the DataFrame that are NaN + if frame._is_mixed_type: + # Since we have mixed types, calling notna(frame.values) might + # upcast everything to object + values_mask = notna(frame).values + else: + # But use the speedup when we have homogeneous dtypes + values_mask = notna(frame.values) + + index_mask = notna(count_axis.get_level_values(level=level)) + if axis == 1: + mask = index_mask & values_mask + else: + mask = index_mask.reshape(-1, 1) & values_mask + + if isinstance(level, int): + level_number = level + else: + level_number = count_axis._get_level_number(level) + + level_name = count_axis._names[level_number] + level_index = count_axis.levels[level_number]._rename(name=level_name) + level_codes = ensure_platform_int(count_axis.codes[level_number]) + counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis) + + if axis == 1: + result = self._constructor(counts, index=agg_axis, columns=level_index) + else: + result = self._constructor(counts, index=level_index, columns=agg_axis) + + return result + + def _reduce( + self, + op, + name: str, + *, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool | None = None, + filter_type=None, + **kwds, + ): + assert filter_type is None or filter_type == "bool", filter_type + out_dtype = "bool" if filter_type == "bool" else None + + if numeric_only is None and name in ["mean", "median"]: + own_dtypes = [arr.dtype for arr in self._mgr.arrays] + + dtype_is_dt = np.array( + [is_datetime64_any_dtype(dtype) for dtype in own_dtypes], + dtype=bool, + ) + if dtype_is_dt.any(): + warnings.warn( + "DataFrame.mean and DataFrame.median with numeric_only=None " + "will include datetime64 and datetime64tz columns in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Non-copy equivalent to + # dt64_cols = self.dtypes.apply(is_datetime64_any_dtype) + # cols = self.columns[~dt64_cols] + # self = self[cols] + predicate = lambda x: not is_datetime64_any_dtype(x.dtype) + mgr = self._mgr._get_data_subset(predicate) + self = type(self)(mgr) + + # TODO: Make other agg func handle axis=None properly GH#21597 + axis = self._get_axis_number(axis) + labels = self._get_agg_axis(axis) + assert axis in [0, 1] + + def func(values: np.ndarray): + # We only use this in the case that operates on self.values + return op(values, axis=axis, skipna=skipna, **kwds) + + def blk_func(values, axis=1): + if isinstance(values, ExtensionArray): + if not is_1d_only_ea_dtype(values.dtype) and not isinstance( + self._mgr, ArrayManager + ): + return values._reduce(name, axis=1, skipna=skipna, **kwds) + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=axis, skipna=skipna, **kwds) + + def _get_data() -> DataFrame: + if filter_type is None: + data = self._get_numeric_data() + else: + # GH#25101, GH#24434 + assert filter_type == "bool" + data = self._get_bool_data() + return data + + numeric_only_bool = com.resolve_numeric_only(numeric_only) + if numeric_only is not None or axis == 0: + # For numeric_only non-None and axis non-None, we know + # which blocks to use and no try/except is needed. + # For numeric_only=None only the case with axis==0 and no object + # dtypes are unambiguous can be handled with BlockManager.reduce + # Case with EAs see GH#35881 + df = self + if numeric_only_bool: + df = _get_data() + if axis == 1: + df = df.T + axis = 0 + + ignore_failures = numeric_only is None + + # After possibly _get_data and transposing, we are now in the + # simple case where we can use BlockManager.reduce + res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) + out = df._constructor(res).iloc[0] + if out_dtype is not None: + out = out.astype(out_dtype) + if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: + # Even if we are object dtype, follow numpy and return + # float64, see test_apply_funcs_over_empty + out = out.astype(np.float64) + + if numeric_only is None and out.shape[0] != df.shape[1]: + # columns have been dropped GH#41480 + com.deprecate_numeric_only_default( + type(self), name, deprecate_none=True + ) + + return out + + assert numeric_only is None + + data = self + values = data.values + + try: + result = func(values) + + except TypeError: + # e.g. in nanops trying to convert strs to float + + data = _get_data() + labels = data._get_agg_axis(axis) + + values = data.values + with np.errstate(all="ignore"): + result = func(values) + + # columns have been dropped GH#41480 + arg_name = "numeric_only" + if name in ["all", "any"]: + arg_name = "bool_only" + warnings.warn( + "Dropping of nuisance columns in DataFrame reductions " + f"(with '{arg_name}=None') is deprecated; in a future " + "version this will raise TypeError. Select only valid " + "columns before calling the reduction.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass + + result = self._constructor_sliced(result, index=labels) + return result + + def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: + """ + Special case for _reduce to try to avoid a potentially-expensive transpose. + + Apply the reduction block-wise along axis=1 and then reduce the resulting + 1D arrays. + """ + if name == "all": + result = np.ones(len(self), dtype=bool) + ufunc = np.logical_and + elif name == "any": + result = np.zeros(len(self), dtype=bool) + # error: Incompatible types in assignment + # (expression has type "_UFunc_Nin2_Nout1[Literal['logical_or'], + # Literal[20], Literal[False]]", variable has type + # "_UFunc_Nin2_Nout1[Literal['logical_and'], Literal[20], + # Literal[True]]") + ufunc = np.logical_or # type: ignore[assignment] + else: + raise NotImplementedError(name) + + for arr in self._mgr.arrays: + middle = func(arr, axis=0, skipna=skipna) + result = ufunc(result, middle) + + res_ser = self._constructor_sliced(result, index=self.index) + return res_ser + + def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: + """ + Count number of distinct elements in specified axis. + + Return Series with number of distinct elements. Can ignore NaN + values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for + column-wise. + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + Series + + See Also + -------- + Series.nunique: Method nunique for Series. + DataFrame.count: Count non-NA cells for each column or row. + + Examples + -------- + >>> df = pd.DataFrame({'A': [4, 5, 6], 'B': [4, 1, 1]}) + >>> df.nunique() + A 3 + B 2 + dtype: int64 + + >>> df.nunique(axis=1) + 0 1 + 1 2 + 2 2 + dtype: int64 + """ + return self.apply(Series.nunique, axis=axis, dropna=dropna) + + @doc(_shared_docs["idxmin"], numeric_only_default="False") + def idxmin( + self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False + ) -> Series: + axis = self._get_axis_number(axis) + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + res = data._reduce( + nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + assert isinstance(indices, np.ndarray) # for mypy + + index = data._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis)) + return final_result.__finalize__(self, method="idxmin") + + @doc(_shared_docs["idxmax"], numeric_only_default="False") + def idxmax( + self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False + ) -> Series: + + axis = self._get_axis_number(axis) + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + res = data._reduce( + nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + assert isinstance(indices, np.ndarray) # for mypy + + index = data._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis)) + return final_result.__finalize__(self, method="idxmax") + + def _get_agg_axis(self, axis_num: int) -> Index: + """ + Let's be explicit about this. + """ + if axis_num == 0: + return self.columns + elif axis_num == 1: + return self.index + else: + raise ValueError(f"Axis must be 0 or 1 (got {repr(axis_num)})") + + def mode( + self, axis: Axis = 0, numeric_only: bool = False, dropna: bool = True + ) -> DataFrame: + """ + Get the mode(s) of each element along the selected axis. + + The mode of a set of values is the value that appears most often. + It can be multiple values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to iterate over while searching for the mode: + + * 0 or 'index' : get mode of each column + * 1 or 'columns' : get mode of each row. + + numeric_only : bool, default False + If True, only apply to numeric columns. + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + DataFrame + The modes of each column or row. + + See Also + -------- + Series.mode : Return the highest frequency value in a Series. + Series.value_counts : Return the counts of values in a Series. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 2, 2), + ... ('mammal', 4, np.nan), + ... ('arthropod', 8, 0), + ... ('bird', 2, np.nan)], + ... index=('falcon', 'horse', 'spider', 'ostrich'), + ... columns=('species', 'legs', 'wings')) + >>> df + species legs wings + falcon bird 2 2.0 + horse mammal 4 NaN + spider arthropod 8 0.0 + ostrich bird 2 NaN + + By default, missing values are not considered, and the mode of wings + are both 0 and 2. Because the resulting DataFrame has two rows, + the second row of ``species`` and ``legs`` contains ``NaN``. + + >>> df.mode() + species legs wings + 0 bird 2.0 0.0 + 1 NaN NaN 2.0 + + Setting ``dropna=False`` ``NaN`` values are considered and they can be + the mode (like for wings). + + >>> df.mode(dropna=False) + species legs wings + 0 bird 2 NaN + + Setting ``numeric_only=True``, only the mode of numeric columns is + computed, and columns of other types are ignored. + + >>> df.mode(numeric_only=True) + legs wings + 0 2.0 0.0 + 1 NaN 2.0 + + To compute the mode over columns and not rows, use the axis parameter: + + >>> df.mode(axis='columns', numeric_only=True) + 0 1 + falcon 2.0 NaN + horse 4.0 NaN + spider 0.0 8.0 + ostrich 2.0 NaN + """ + data = self if not numeric_only else self._get_numeric_data() + + def f(s): + return s.mode(dropna=dropna) + + data = data.apply(f, axis=axis) + # Ensure index is type stable (should always use int index) + if data.empty: + data.index = default_index(0) + + return data + + @overload + def quantile( + self, + q: float = ..., + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series: + ... + + @overload + def quantile( + self, + q: AnyArrayLike | Sequence[float], + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series | DataFrame: + ... + + @overload + def quantile( + self, + q: float | AnyArrayLike | Sequence[float] = ..., + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series | DataFrame: + ... + + def quantile( + self, + q: float | AnyArrayLike | Sequence[float] = 0.5, + axis: Axis = 0, + numeric_only: bool | lib.NoDefault = no_default, + interpolation: QuantileInterpolation = "linear", + method: Literal["single", "table"] = "single", + ) -> Series | DataFrame: + """ + Return values at the given quantile over requested axis. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value between 0 <= q <= 1, the quantile(s) to compute. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + numeric_only : bool, default True + If False, the quantile of datetime and timedelta data will be + computed as well. + + .. deprecated:: 1.5.0 + The default value of ``numeric_only`` will be ``False`` in a future + version of pandas. + + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + method : {'single', 'table'}, default 'single' + Whether to compute quantiles per-column ('single') or over all columns + ('table'). When 'table', the only allowed interpolation methods are + 'nearest', 'lower', and 'higher'. + + Returns + ------- + Series or DataFrame + + If ``q`` is an array, a DataFrame will be returned where the + index is ``q``, the columns are the columns of self, and the + values are the quantiles. + If ``q`` is a float, a Series will be returned where the + index is the columns of self and the values are the quantiles. + + See Also + -------- + core.window.rolling.Rolling.quantile: Rolling quantile. + numpy.percentile: Numpy function to compute the percentile. + + Examples + -------- + >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), + ... columns=['a', 'b']) + >>> df.quantile(.1) + a 1.3 + b 3.7 + Name: 0.1, dtype: float64 + >>> df.quantile([.1, .5]) + a b + 0.1 1.3 3.7 + 0.5 2.5 55.0 + + Specifying `method='table'` will compute the quantile over all columns. + + >>> df.quantile(.1, method="table", interpolation="nearest") + a 1 + b 1 + Name: 0.1, dtype: int64 + >>> df.quantile([.1, .5], method="table", interpolation="nearest") + a b + 0.1 1 1 + 0.5 3 100 + + Specifying `numeric_only=False` will also compute the quantile of + datetime and timedelta data. + + >>> df = pd.DataFrame({'A': [1, 2], + ... 'B': [pd.Timestamp('2010'), + ... pd.Timestamp('2011')], + ... 'C': [pd.Timedelta('1 days'), + ... pd.Timedelta('2 days')]}) + >>> df.quantile(0.5, numeric_only=False) + A 1.5 + B 2010-07-02 12:00:00 + C 1 days 12:00:00 + Name: 0.5, dtype: object + """ + validate_percentile(q) + axis = self._get_axis_number(axis) + any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes) + if numeric_only is no_default and any_not_numeric: + com.deprecate_numeric_only_default(type(self), "quantile") + numeric_only = com.resolve_numeric_only(numeric_only) + + if not is_list_like(q): + # BlockManager.quantile expects listlike, so we wrap and unwrap here + # error: List item 0 has incompatible type "Union[float, Union[Union[ + # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]"; + # expected "float" + res_df = self.quantile( # type: ignore[call-overload] + [q], + axis=axis, + numeric_only=numeric_only, + interpolation=interpolation, + method=method, + ) + if method == "single": + res = res_df.iloc[0] + else: + # cannot directly iloc over sparse arrays + res = res_df.T.iloc[:, 0] + if axis == 1 and len(self) == 0: + # GH#41544 try to get an appropriate dtype + dtype = find_common_type(list(self.dtypes)) + if needs_i8_conversion(dtype): + return res.astype(dtype) + return res + + q = Index(q, dtype=np.float64) + data = self._get_numeric_data() if numeric_only else self + + if axis == 1: + data = data.T + + if len(data.columns) == 0: + # GH#23925 _get_numeric_data may have dropped all columns + cols = Index([], name=self.columns.name) + + dtype = np.float64 + if axis == 1: + # GH#41544 try to get an appropriate dtype + cdtype = find_common_type(list(self.dtypes)) + if needs_i8_conversion(cdtype): + dtype = cdtype + + res = self._constructor([], index=q, columns=cols, dtype=dtype) + return res.__finalize__(self, method="quantile") + + valid_method = {"single", "table"} + if method not in valid_method: + raise ValueError( + f"Invalid method: {method}. Method must be in {valid_method}." + ) + if method == "single": + # error: Argument "qs" to "quantile" of "BlockManager" has incompatible type + # "Index"; expected "Float64Index" + res = data._mgr.quantile( + qs=q, axis=1, interpolation=interpolation # type: ignore[arg-type] + ) + elif method == "table": + valid_interpolation = {"nearest", "lower", "higher"} + if interpolation not in valid_interpolation: + raise ValueError( + f"Invalid interpolation: {interpolation}. " + f"Interpolation must be in {valid_interpolation}" + ) + # handle degenerate case + if len(data) == 0: + if data.ndim == 2: + dtype = find_common_type(list(self.dtypes)) + else: + dtype = self.dtype + return self._constructor([], index=q, columns=data.columns, dtype=dtype) + + q_idx = np.quantile( # type: ignore[call-overload] + np.arange(len(data)), q, **{np_percentile_argname: interpolation} + ) + + by = data.columns + if len(by) > 1: + keys = [data._get_label_or_level_values(x) for x in by] + indexer = lexsort_indexer(keys) + else: + by = by[0] + k = data._get_label_or_level_values(by) # type: ignore[arg-type] + indexer = nargsort(k) + + res = data._mgr.take(indexer[q_idx], verify=False) + res.axes[1] = q + + result = self._constructor(res) + return result.__finalize__(self, method="quantile") + + @doc(NDFrame.asfreq, **_shared_doc_kwargs) + def asfreq( + self, + freq: Frequency, + method: FillnaOptions | None = None, + how: str | None = None, + normalize: bool = False, + fill_value: Hashable = None, + ) -> DataFrame: + return super().asfreq( + freq=freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + @doc(NDFrame.resample, **_shared_doc_kwargs) + def resample( + self, + rule, + axis: Axis = 0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on: Level = None, + level: Level = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys: bool | lib.NoDefault = no_default, + ) -> Resampler: + return super().resample( + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + loffset=loffset, + base=base, + on=on, + level=level, + origin=origin, + offset=offset, + group_keys=group_keys, + ) + + def to_timestamp( + self, + freq: Frequency | None = None, + how: str = "start", + axis: Axis = 0, + copy: bool = True, + ) -> DataFrame: + """ + Cast to DatetimeIndex of timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + DataFrame with DatetimeIndex + """ + new_obj = self.copy(deep=copy) + + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + if not isinstance(old_ax, PeriodIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + + new_ax = old_ax.to_timestamp(freq=freq, how=how) + + setattr(new_obj, axis_name, new_ax) + return new_obj + + def to_period( + self, freq: Frequency | None = None, axis: Axis = 0, copy: bool = True + ) -> DataFrame: + """ + Convert DataFrame from DatetimeIndex to PeriodIndex. + + Convert DataFrame from DatetimeIndex to PeriodIndex with desired + frequency (inferred from index if not passed). + + Parameters + ---------- + freq : str, default + Frequency of the PeriodIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + DataFrame with PeriodIndex + + Examples + -------- + >>> idx = pd.to_datetime( + ... [ + ... "2001-03-31 00:00:00", + ... "2002-05-31 00:00:00", + ... "2003-08-31 00:00:00", + ... ] + ... ) + + >>> idx + DatetimeIndex(['2001-03-31', '2002-05-31', '2003-08-31'], + dtype='datetime64[ns]', freq=None) + + >>> idx.to_period("M") + PeriodIndex(['2001-03', '2002-05', '2003-08'], dtype='period[M]') + + For the yearly frequency + + >>> idx.to_period("Y") + PeriodIndex(['2001', '2002', '2003'], dtype='period[A-DEC]') + """ + new_obj = self.copy(deep=copy) + + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + if not isinstance(old_ax, DatetimeIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + + new_ax = old_ax.to_period(freq=freq) + + setattr(new_obj, axis_name, new_ax) + return new_obj + + def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: + """ + Whether each element in the DataFrame is contained in values. + + Parameters + ---------- + values : iterable, Series, DataFrame or dict + The result will only be true at a location if all the + labels match. If `values` is a Series, that's the index. If + `values` is a dict, the keys must be the column names, + which must match. If `values` is a DataFrame, + then both the index and column labels must match. + + Returns + ------- + DataFrame + DataFrame of booleans showing whether each element in the DataFrame + is contained in values. + + See Also + -------- + DataFrame.eq: Equality test for DataFrame. + Series.isin: Equivalent method on Series. + Series.str.contains: Test if pattern or regex is contained within a + string of a Series or Index. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + To check if ``values`` is *not* in the DataFrame, use the ``~`` operator: + + >>> ~df.isin([0, 2]) + num_legs num_wings + falcon False False + dog True False + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in other. + + >>> other = pd.DataFrame({'num_legs': [8, 3], 'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon False True + dog False False + """ + if isinstance(values, dict): + from pandas.core.reshape.concat import concat + + values = collections.defaultdict(list, values) + result = concat( + ( + self.iloc[:, [i]].isin(values[col]) + for i, col in enumerate(self.columns) + ), + axis=1, + ) + elif isinstance(values, Series): + if not values.index.is_unique: + raise ValueError("cannot compute isin with a duplicate axis.") + result = self.eq(values.reindex_like(self), axis="index") + elif isinstance(values, DataFrame): + if not (values.columns.is_unique and values.index.is_unique): + raise ValueError("cannot compute isin with a duplicate axis.") + result = self.eq(values.reindex_like(self)) + else: + if not is_list_like(values): + raise TypeError( + "only list-like or dict-like objects are allowed " + "to be passed to DataFrame.isin(), " + f"you passed a '{type(values).__name__}'" + ) + # error: Argument 2 to "isin" has incompatible type "Union[Sequence[Any], + # Mapping[Any, Any]]"; expected "Union[Union[ExtensionArray, + # ndarray[Any, Any]], Index, Series]" + result = self._constructor( + algorithms.isin( + self.values.ravel(), values # type: ignore[arg-type] + ).reshape(self.shape), + self.index, + self.columns, + ) + return result.__finalize__(self, method="isin") + + # ---------------------------------------------------------------------- + # Add index and columns + _AXIS_ORDERS = ["index", "columns"] + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = { + **NDFrame._AXIS_TO_AXIS_NUMBER, + 1: 1, + "columns": 1, + } + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 1 + _info_axis_name = "columns" + + index = properties.AxisProperty( + axis=1, doc="The index (row labels) of the DataFrame." + ) + columns = properties.AxisProperty(axis=0, doc="The column labels of the DataFrame.") + + @property + def _AXIS_NUMBERS(self) -> dict[str, int]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NUMBERS + return {"index": 0, "columns": 1} + + @property + def _AXIS_NAMES(self) -> dict[int, str]: + """.. deprecated:: 1.1.0""" + super()._AXIS_NAMES + return {0: "index", 1: "columns"} + + # ---------------------------------------------------------------------- + # Add plotting methods to DataFrame + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + hist = pandas.plotting.hist_frame + boxplot = pandas.plotting.boxplot_frame + sparse = CachedAccessor("sparse", SparseFrameAccessor) + + # ---------------------------------------------------------------------- + # Internal Interface Methods + + def _to_dict_of_blocks(self, copy: bool = True): + """ + Return a dict of dtype -> Constructor Types that + each is a homogeneous dtype. + + Internal ONLY - only works for BlockManager + """ + mgr = self._mgr + # convert to BlockManager if needed -> this way support ArrayManager as well + mgr = mgr_to_mgr(mgr, "block") + mgr = cast(BlockManager, mgr) + return { + k: self._constructor(v).__finalize__(self) + for k, v, in mgr.to_dict(copy=copy).items() + } + + @property + def values(self) -> np.ndarray: + """ + Return a Numpy representation of the DataFrame. + + .. warning:: + + We recommend using :meth:`DataFrame.to_numpy` instead. + + Only the values in the DataFrame will be returned, the axes labels + will be removed. + + Returns + ------- + numpy.ndarray + The values of the DataFrame. + + See Also + -------- + DataFrame.to_numpy : Recommended alternative to this method. + DataFrame.index : Retrieve the index labels. + DataFrame.columns : Retrieving the column names. + + Notes + ----- + The dtype will be a lower-common-denominator dtype (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. Use this + with care if you are not dealing with the blocks. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. If dtypes are int32 and uint8, dtype will be upcast to + int32. By :func:`numpy.find_common_type` convention, mixing int64 + and uint64 will result in a float64 dtype. + + Examples + -------- + A DataFrame where all columns are the same type (e.g., int64) results + in an array of the same type. + + >>> df = pd.DataFrame({'age': [ 3, 29], + ... 'height': [94, 170], + ... 'weight': [31, 115]}) + >>> df + age height weight + 0 3 94 31 + 1 29 170 115 + >>> df.dtypes + age int64 + height int64 + weight int64 + dtype: object + >>> df.values + array([[ 3, 94, 31], + [ 29, 170, 115]]) + + A DataFrame with mixed type columns(e.g., str/object, int64, float32) + results in an ndarray of the broadest type that accommodates these + mixed types (e.g., object). + + >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'), + ... ('lion', 80.5, 1), + ... ('monkey', np.nan, None)], + ... columns=('name', 'max_speed', 'rank')) + >>> df2.dtypes + name object + max_speed float64 + rank object + dtype: object + >>> df2.values + array([['parrot', 24.0, 'second'], + ['lion', 80.5, 1], + ['monkey', nan, None]], dtype=object) + """ + self._consolidate_inplace() + return self._mgr.as_array() + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> DataFrame: + ... + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: bool = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "ffill" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def ffill( # type: ignore[override] + self, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast: dict | None = None, + ) -> DataFrame | None: + return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast=..., + ) -> DataFrame: + ... + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast=..., + ) -> None: + ... + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: bool = ..., + limit: None | int = ..., + downcast=..., + ) -> DataFrame | None: + ... + + # error: Signature of "bfill" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def bfill( # type: ignore[override] + self, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast=None, + ) -> DataFrame | None: + return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "lower", "upper"] + ) + def clip( + self: DataFrame, + lower: float | None = None, + upper: float | None = None, + axis: Axis | None = None, + inplace: bool = False, + *args, + **kwargs, + ) -> DataFrame | None: + return super().clip(lower, upper, axis, inplace, *args, **kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + def interpolate( + self: DataFrame, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> DataFrame | None: + return super().interpolate( + method, + axis, + limit, + inplace, + limit_direction, + limit_area, + downcast, + **kwargs, + ) + + @overload + def where( + self, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> DataFrame: + ... + + @overload + def where( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def where( + self, + cond, + other=..., + *, + inplace: bool = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "where" incompatible with supertype "NDFrame" + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def where( # type: ignore[override] + self, + cond, + other=lib.no_default, + inplace: bool = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = "raise", + try_cast: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame | None: + return super().where( + cond, + other, + inplace=inplace, + axis=axis, + level=level, + try_cast=try_cast, + ) + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> DataFrame: + ... + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: bool = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "mask" incompatible with supertype "NDFrame" + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def mask( # type: ignore[override] + self, + cond, + other=np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = "raise", + try_cast: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame | None: + return super().mask( + cond, + other, + inplace=inplace, + axis=axis, + level=level, + try_cast=try_cast, + ) + + +DataFrame._add_numeric_operations() + +ops.add_flex_arithmetic_methods(DataFrame) + + +def _from_nested_dict(data) -> collections.defaultdict: + new_data: collections.defaultdict = collections.defaultdict(dict) + for index, s in data.items(): + for col, v in s.items(): + new_data[col][index] = v + return new_data + + +def _reindex_for_setitem(value: DataFrame | Series, index: Index) -> ArrayLike: + # reindex if necessary + + if value.index.equals(index) or not len(index): + return value._values.copy() + + # GH#4107 + try: + reindexed_value = value.reindex(index)._values + except ValueError as err: + # raised in MultiIndex.from_tuples, see test_insert_error_msmgs + if not value.index.is_unique: + # duplicate axis + raise err + + raise TypeError( + "incompatible index of inserted column with frame index" + ) from err + return reindexed_value diff --git a/pandas/core/generic.py b/pandas/core/generic.py new file mode 100644 index 00000000..958bba2d --- /dev/null +++ b/pandas/core/generic.py @@ -0,0 +1,12926 @@ +# pyright: reportPropertyTypeMismatch=false +from __future__ import annotations + +import collections +from datetime import timedelta +import functools +import gc +import json +import operator +import pickle +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Hashable, + Literal, + Mapping, + NoReturn, + Sequence, + Type, + cast, + final, + overload, +) +import warnings +import weakref + +import numpy as np + +from pandas._config import config + +from pandas._libs import lib +from pandas._libs.tslibs import ( + Period, + Tick, + Timestamp, + to_offset, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Axis, + ColspaceArgType, + CompressionOptions, + Dtype, + DtypeArg, + DtypeObj, + FilePath, + FillnaOptions, + FloatFormatType, + FormattersType, + Frequency, + IgnoreRaise, + IndexKeyFunc, + IndexLabel, + IntervalClosedType, + JSONSerializable, + Level, + Manager, + NaPosition, + NDFrameT, + RandomState, + Renamer, + SortKind, + StorageOptions, + Suffixes, + T, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + WriteBuffer, + npt, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + InvalidIndexError, + SettingWithCopyError, + SettingWithCopyWarning, +) +from pandas.util._decorators import ( + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, + rewrite_axis_style_signature, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_fillna_kwargs, + validate_inclusive, +) + +from pandas.core.dtypes.common import ( + ensure_object, + ensure_platform_int, + ensure_str, + is_bool, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_list_like, + is_number, + is_numeric_dtype, + is_re_compilable, + is_scalar, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import ( + is_hashable, + is_nested_list_like, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + algorithms as algos, + arraylike, + common as com, + indexing, + missing, + nanops, + sample, +) +from pandas.core.array_algos.replace import should_use_regex +from pandas.core.arrays import ExtensionArray +from pandas.core.base import PandasObject +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, +) +from pandas.core.describe import describe_ndframe +from pandas.core.flags import Flags +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + RangeIndex, + default_index, + ensure_index, +) +from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, +) +from pandas.core.internals.construction import mgr_to_mgr +from pandas.core.missing import find_valid_index +from pandas.core.ops import align_method_FRAME +from pandas.core.reshape.concat import concat +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import get_indexer_indexer +from pandas.core.window import ( + Expanding, + ExponentialMovingWindow, + Rolling, + Window, +) + +from pandas.io.formats import format as fmt +from pandas.io.formats.format import ( + DataFrameFormatter, + DataFrameRenderer, +) +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + + from pandas._libs.tslibs import BaseOffset + + from pandas.core.frame import DataFrame + from pandas.core.indexers.objects import BaseIndexer + from pandas.core.resample import Resampler + from pandas.core.series import Series + + from pandas.io.pytables import HDFStore + + +# goal is to be able to define the docs close to function, while still being +# able to share +_shared_docs = {**_shared_docs} +_shared_doc_kwargs = { + "axes": "keywords for axes", + "klass": "Series/DataFrame", + "axes_single_arg": "int or labels for object", + "args_transpose": "axes to permute (int or label for object)", + "inplace": """ + inplace : bool, default False + If True, performs operation inplace and returns None.""", + "optional_by": """ + by : str or list of str + Name or list of names to sort by""", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + + +bool_t = bool # Need alias because NDFrame has def bool: + + +class NDFrame(PandasObject, indexing.IndexingMixin): + """ + N-dimensional analogue of DataFrame. Store multi-dimensional in a + size-mutable, labeled data structure + + Parameters + ---------- + data : BlockManager + axes : list + copy : bool, default False + """ + + _internal_names: list[str] = [ + "_mgr", + "_cacher", + "_item_cache", + "_cache", + "_is_copy", + "_subtyp", + "_name", + "_default_kind", + "_default_fill_value", + "_metadata", + "__array_struct__", + "__array_interface__", + "_flags", + ] + _internal_names_set: set[str] = set(_internal_names) + _accessors: set[str] = set() + _hidden_attrs: frozenset[str] = frozenset( + ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"] + ) + _metadata: list[str] = [] + _is_copy: weakref.ReferenceType[NDFrame] | None = None + _mgr: Manager + _attrs: dict[Hashable, Any] + _typ: str + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data: Manager, + copy: bool_t = False, + attrs: Mapping[Hashable, Any] | None = None, + ) -> None: + # copy kwarg is retained for mypy compat, is not used + + object.__setattr__(self, "_is_copy", None) + object.__setattr__(self, "_mgr", data) + object.__setattr__(self, "_item_cache", {}) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + object.__setattr__(self, "_attrs", attrs) + object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) + + @classmethod + def _init_mgr( + cls, + mgr: Manager, + axes, + dtype: Dtype | None = None, + copy: bool_t = False, + ) -> Manager: + """passed a manager and a axes dict""" + for a, axe in axes.items(): + if axe is not None: + axe = ensure_index(axe) + bm_axis = cls._get_block_manager_axis(a) + mgr = mgr.reindex_axis(axe, axis=bm_axis) + + # make a copy if explicitly requested + if copy: + mgr = mgr.copy() + if dtype is not None: + # avoid further copies if we can + if ( + isinstance(mgr, BlockManager) + and len(mgr.blocks) == 1 + and is_dtype_equal(mgr.blocks[0].values.dtype, dtype) + ): + pass + else: + mgr = mgr.astype(dtype=dtype) + return mgr + + def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT: + """ + Private helper function to create a DataFrame with specific manager. + + Parameters + ---------- + typ : {"block", "array"} + copy : bool, default True + Only controls whether the conversion from Block->ArrayManager + copies the 1D arrays (to ensure proper/contiguous memory layout). + + Returns + ------- + DataFrame + New DataFrame using specified manager type. Is not guaranteed + to be a copy or not. + """ + new_mgr: Manager + new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) + # fastpath of passing a manager doesn't check the option/manager class + return self._constructor(new_mgr).__finalize__(self) + + # ---------------------------------------------------------------------- + # attrs and flags + + @property + def attrs(self) -> dict[Hashable, Any]: + """ + Dictionary of global attributes of this dataset. + + .. warning:: + + attrs is experimental and may change without warning. + + See Also + -------- + DataFrame.flags : Global flags applying to this object. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[Hashable, Any]) -> None: + self._attrs = dict(value) + + @final + @property + def flags(self) -> Flags: + """ + Get the properties associated with this pandas object. + + The available flags are + + * :attr:`Flags.allows_duplicate_labels` + + See Also + -------- + Flags : Flags that apply to pandas objects. + DataFrame.attrs : Global metadata applying to this dataset. + + Notes + ----- + "Flags" differ from "metadata". Flags reflect properties of the + pandas object (the Series or DataFrame). Metadata refer to properties + of the dataset, and should be stored in :attr:`DataFrame.attrs`. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}) + >>> df.flags + + + Flags can be get or set using ``.`` + + >>> df.flags.allows_duplicate_labels + True + >>> df.flags.allows_duplicate_labels = False + + Or by slicing with a key + + >>> df.flags["allows_duplicate_labels"] + False + >>> df.flags["allows_duplicate_labels"] = True + """ + return self._flags + + @final + def set_flags( + self: NDFrameT, + *, + copy: bool_t = False, + allows_duplicate_labels: bool_t | None = None, + ) -> NDFrameT: + """ + Return a new object with updated flags. + + Parameters + ---------- + allows_duplicate_labels : bool, optional + Whether the returned object allows duplicate labels. + + Returns + ------- + Series or DataFrame + The same type as the caller. + + See Also + -------- + DataFrame.attrs : Global metadata applying to this dataset. + DataFrame.flags : Global flags applying to this object. + + Notes + ----- + This method returns a new object that's a view on the same data + as the input. Mutating the input or the output values will be reflected + in the other. + + This method is intended to be used in method chains. + + "Flags" differ from "metadata". Flags reflect properties of the + pandas object (the Series or DataFrame). Metadata refer to properties + of the dataset, and should be stored in :attr:`DataFrame.attrs`. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2]}) + >>> df.flags.allows_duplicate_labels + True + >>> df2 = df.set_flags(allows_duplicate_labels=False) + >>> df2.flags.allows_duplicate_labels + False + """ + df = self.copy(deep=copy) + if allows_duplicate_labels is not None: + df.flags["allows_duplicate_labels"] = allows_duplicate_labels + return df + + @final + @classmethod + def _validate_dtype(cls, dtype) -> DtypeObj | None: + """validate the passed dtype""" + if dtype is not None: + dtype = pandas_dtype(dtype) + + # a compound dtype + if dtype.kind == "V": + raise NotImplementedError( + "compound dtypes are not implemented " + f"in the {cls.__name__} constructor" + ) + + return dtype + + # ---------------------------------------------------------------------- + # Construction + + @property + def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]: + """ + Used when a manipulation result has the same dimensions as the + original. + """ + raise AbstractMethodError(self) + + # ---------------------------------------------------------------------- + # Internals + + @final + @property + def _data(self): + # GH#33054 retained because some downstream packages uses this, + # e.g. fastparquet + return self._mgr + + # ---------------------------------------------------------------------- + # Axis + _stat_axis_number = 0 + _stat_axis_name = "index" + _AXIS_ORDERS: list[str] + _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} + _info_axis_number: int + _info_axis_name: str + _AXIS_LEN: int + + @property + def _AXIS_NUMBERS(self) -> dict[str, int]: + """.. deprecated:: 1.1.0""" + warnings.warn( + "_AXIS_NUMBERS has been deprecated.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return {"index": 0} + + @property + def _AXIS_NAMES(self) -> dict[int, str]: + """.. deprecated:: 1.1.0""" + level = self.ndim + 1 + warnings.warn( + "_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=level + ) + return {0: "index"} + + @final + def _construct_axes_dict(self, axes=None, **kwargs): + """Return an axes dictionary for myself.""" + d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} + d.update(kwargs) + return d + + @final + @classmethod + def _construct_axes_from_arguments( + cls, args, kwargs, require_all: bool_t = False, sentinel=None + ): + """ + Construct and returns axes if supplied in args/kwargs. + + If require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs). + + sentinel specifies the default parameter when an axis is not + supplied; useful to distinguish when a user explicitly passes None + in scenarios where None has special meaning. + """ + # construct the args + args = list(args) + for a in cls._AXIS_ORDERS: + + # look for a argument by position + if a not in kwargs: + try: + kwargs[a] = args.pop(0) + except IndexError as err: + if require_all: + raise TypeError( + "not enough/duplicate arguments specified!" + ) from err + + axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS} + return axes, kwargs + + @final + @classmethod + def _get_axis_number(cls, axis: Axis) -> int: + try: + return cls._AXIS_TO_AXIS_NUMBER[axis] + except KeyError: + raise ValueError(f"No axis named {axis} for object type {cls.__name__}") + + @final + @classmethod + def _get_axis_name(cls, axis: Axis) -> str: + axis_number = cls._get_axis_number(axis) + return cls._AXIS_ORDERS[axis_number] + + @final + def _get_axis(self, axis: Axis) -> Index: + axis_number = self._get_axis_number(axis) + assert axis_number in {0, 1} + return self.index if axis_number == 0 else self.columns + + @final + @classmethod + def _get_block_manager_axis(cls, axis: Axis) -> int: + """Map the axis to the block_manager axis.""" + axis = cls._get_axis_number(axis) + ndim = cls._AXIS_LEN + if ndim == 2: + # i.e. DataFrame + return 1 - axis + return axis + + @final + def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: + # index or columns + axis_index = getattr(self, axis) + d = {} + prefix = axis[0] + + for i, name in enumerate(axis_index.names): + if name is not None: + key = level = name + else: + # prefix with 'i' or 'c' depending on the input axis + # e.g., you must do ilevel_0 for the 0th level of an unnamed + # multiiindex + key = f"{prefix}level_{i}" + level = i + + level_values = axis_index.get_level_values(level) + s = level_values.to_series() + s.index = axis_index + d[key] = s + + # put the index/columns itself in the dict + if isinstance(axis_index, MultiIndex): + dindex = axis_index + else: + dindex = axis_index.to_series() + + d[axis] = dindex + return d + + @final + def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: + from pandas.core.computation.parsing import clean_column_name + + d: dict[str, Series | MultiIndex] = {} + for axis_name in self._AXIS_ORDERS: + d.update(self._get_axis_resolvers(axis_name)) + + return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} + + @final + def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: + """ + Return the special character free column resolvers of a dataframe. + + Column names with special characters are 'cleaned up' so that they can + be referred to by backtick quoting. + Used in :meth:`DataFrame.eval`. + """ + from pandas.core.computation.parsing import clean_column_name + + if isinstance(self, ABCSeries): + return {clean_column_name(self.name): self} + + return { + clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) + } + + @property + def _info_axis(self) -> Index: + return getattr(self, self._info_axis_name) + + @property + def _stat_axis(self) -> Index: + return getattr(self, self._stat_axis_name) + + @property + def shape(self) -> tuple[int, ...]: + """ + Return a tuple of axis dimensions + """ + return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) + + @property + def axes(self) -> list[Index]: + """ + Return index label(s) of the internal NDFrame + """ + # we do it this way because if we have reversed axes, then + # the block manager shows then reversed + return [self._get_axis(a) for a in self._AXIS_ORDERS] + + @property + def ndim(self) -> int: + """ + Return an int representing the number of axes / array dimensions. + + Return 1 if Series. Otherwise return 2 if DataFrame. + + See Also + -------- + ndarray.ndim : Number of array dimensions. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.ndim + 1 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.ndim + 2 + """ + return self._mgr.ndim + + @property + def size(self) -> int: + """ + Return an int representing the number of elements in this object. + + Return the number of rows if Series. Otherwise return the number of + rows times number of columns if DataFrame. + + See Also + -------- + ndarray.size : Number of elements in the array. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.size + 3 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.size + 4 + """ + # error: Incompatible return value type (got "signedinteger[_64Bit]", + # expected "int") [return-value] + return np.prod(self.shape) # type: ignore[return-value] + + @overload + def set_axis( + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] | lib.NoDefault = ..., + copy: bool_t | lib.NoDefault = ..., + ) -> NDFrameT: + ... + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool_t | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def set_axis( + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: bool_t | lib.NoDefault = ..., + copy: bool_t | lib.NoDefault = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def set_axis( + self: NDFrameT, + labels, + axis: Axis = 0, + inplace: bool_t | lib.NoDefault = lib.no_default, + *, + copy: bool_t | lib.NoDefault = lib.no_default, + ) -> NDFrameT | None: + """ + Assign desired index to given axis. + + Indexes for%(extended_summary_sub)s row labels can be changed by assigning + a list-like or Index. + + Parameters + ---------- + labels : list-like, Index + The values for the new index. + + axis : %(axes_single_arg)s, default 0 + The axis to update. The value 0 identifies the rows. For `Series` + this parameter is unused and defaults to 0. + + inplace : bool, default False + Whether to return a new %(klass)s instance. + + .. deprecated:: 1.5.0 + + copy : bool, default True + Whether to make a copy of the underlying data. + + .. versionadded:: 1.5.0 + + Returns + ------- + renamed : %(klass)s or None + An object of type %(klass)s or None if ``inplace=True``. + + See Also + -------- + %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. + """ + if inplace is not lib.no_default: + warnings.warn( + f"{type(self).__name__}.set_axis 'inplace' keyword is deprecated " + "and will be removed in a future version. Use " + "`obj = obj.set_axis(..., copy=False)` instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + if inplace: + if copy is True: + raise ValueError("Cannot specify both inplace=True and copy=True") + copy = False + elif copy is lib.no_default: + copy = True + + self._check_inplace_and_allows_duplicate_labels(inplace) + return self._set_axis_nocheck(labels, axis, inplace, copy=copy) + + @final + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): + if inplace: + setattr(self, self._get_axis_name(axis), labels) + else: + # With copy=False, we create a new object but don't copy the + # underlying data. + obj = self.copy(deep=copy) + setattr(obj, obj._get_axis_name(axis), labels) + return obj + + def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: + labels = ensure_index(labels) + self._mgr.set_axis(axis, labels) + self._clear_item_cache() + + @final + def swapaxes( + self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True + ) -> NDFrameT: + """ + Interchange axes and swap values axes appropriately. + + Returns + ------- + y : same as input + """ + i = self._get_axis_number(axis1) + j = self._get_axis_number(axis2) + + if i == j: + if copy: + return self.copy() + return self + + mapping = {i: j, j: i} + + new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) + new_values = self.values.swapaxes(i, j) + if copy: + new_values = new_values.copy() + + return self._constructor( + new_values, + *new_axes, + ).__finalize__(self, method="swapaxes") + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: + """ + Return {klass} with requested index / column level(s) removed. + + Parameters + ---------- + level : int, str, or list-like + If a string is given, must be the name of a level + If list-like, elements must be names or positional indexes + of levels. + + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + Axis along which the level(s) is removed: + + * 0 or 'index': remove level(s) in column. + * 1 or 'columns': remove level(s) in row. + + For `Series` this parameter is unused and defaults to 0. + + Returns + ------- + {klass} + {klass} with requested index / column level(s) removed. + + Examples + -------- + >>> df = pd.DataFrame([ + ... [1, 2, 3, 4], + ... [5, 6, 7, 8], + ... [9, 10, 11, 12] + ... ]).set_index([0, 1]).rename_axis(['a', 'b']) + + >>> df.columns = pd.MultiIndex.from_tuples([ + ... ('c', 'e'), ('d', 'f') + ... ], names=['level_1', 'level_2']) + + >>> df + level_1 c d + level_2 e f + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + + >>> df.droplevel('a') + level_1 c d + level_2 e f + b + 2 3 4 + 6 7 8 + 10 11 12 + + >>> df.droplevel('level_2', axis=1) + level_1 c d + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + """ + labels = self._get_axis(axis) + new_labels = labels.droplevel(level) + return self.set_axis(new_labels, axis=axis) + + def pop(self, item: Hashable) -> Series | Any: + result = self[item] + del self[item] + + return result + + @final + def squeeze(self, axis=None): + """ + Squeeze 1 dimensional axis objects into scalars. + + Series or DataFrames with a single element are squeezed to a scalar. + DataFrames with a single column or a single row are squeezed to a + Series. Otherwise the object is unchanged. + + This method is most useful when you don't know if your + object is a Series or DataFrame, but you do know it has just a single + column. In that case you can safely call `squeeze` to ensure you have a + Series. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default None + A specific axis to squeeze. By default, all length-1 axes are + squeezed. For `Series` this parameter is unused and defaults to `None`. + + Returns + ------- + DataFrame, Series, or scalar + The projection after squeezing `axis` or all the axes. + + See Also + -------- + Series.iloc : Integer-location based indexing for selecting scalars. + DataFrame.iloc : Integer-location based indexing for selecting Series. + Series.to_frame : Inverse of DataFrame.squeeze for a + single-column DataFrame. + + Examples + -------- + >>> primes = pd.Series([2, 3, 5, 7]) + + Slicing might produce a Series with a single value: + + >>> even_primes = primes[primes % 2 == 0] + >>> even_primes + 0 2 + dtype: int64 + + >>> even_primes.squeeze() + 2 + + Squeezing objects with more than one value in every axis does nothing: + + >>> odd_primes = primes[primes % 2 == 1] + >>> odd_primes + 1 3 + 2 5 + 3 7 + dtype: int64 + + >>> odd_primes.squeeze() + 1 3 + 2 5 + 3 7 + dtype: int64 + + Squeezing is even more effective when used with DataFrames. + + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) + >>> df + a b + 0 1 2 + 1 3 4 + + Slicing a single column will produce a DataFrame with the columns + having only one value: + + >>> df_a = df[['a']] + >>> df_a + a + 0 1 + 1 3 + + So the columns can be squeezed down, resulting in a Series: + + >>> df_a.squeeze('columns') + 0 1 + 1 3 + Name: a, dtype: int64 + + Slicing a single row from a single column will produce a single + scalar DataFrame: + + >>> df_0a = df.loc[df.index < 1, ['a']] + >>> df_0a + a + 0 1 + + Squeezing the rows produces a single scalar Series: + + >>> df_0a.squeeze('rows') + a 1 + Name: 0, dtype: int64 + + Squeezing all axes will project directly into a scalar: + + >>> df_0a.squeeze() + 1 + """ + axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) + return self.iloc[ + tuple( + 0 if i in axis and len(a) == 1 else slice(None) + for i, a in enumerate(self.axes) + ) + ] + + # ---------------------------------------------------------------------- + # Rename + + def _rename( + self: NDFrameT, + mapper: Renamer | None = None, + *, + index: Renamer | None = None, + columns: Renamer | None = None, + axis: Axis | None = None, + copy: bool_t | None = None, + inplace: bool_t = False, + level: Level | None = None, + errors: str = "ignore", + ) -> NDFrameT | None: + # called by Series.rename and DataFrame.rename + + if mapper is None and index is None and columns is None: + raise TypeError("must pass an index to rename") + + if index is not None or columns is not None: + if axis is not None: + raise TypeError( + "Cannot specify both 'axis' and any of 'index' or 'columns'" + ) + elif mapper is not None: + raise TypeError( + "Cannot specify both 'mapper' and any of 'index' or 'columns'" + ) + else: + # use the mapper argument + if axis and self._get_axis_number(axis) == 1: + columns = mapper + else: + index = mapper + + self._check_inplace_and_allows_duplicate_labels(inplace) + result = self if inplace else self.copy(deep=copy) + + for axis_no, replacements in enumerate((index, columns)): + if replacements is None: + continue + + ax = self._get_axis(axis_no) + f = com.get_rename_function(replacements) + + if level is not None: + level = ax._get_level_number(level) + + # GH 13473 + if not callable(replacements): + if ax._is_multi and level is not None: + indexer = ax.get_level_values(level).get_indexer_for(replacements) + else: + indexer = ax.get_indexer_for(replacements) + + if errors == "raise" and len(indexer[indexer == -1]): + missing_labels = [ + label + for index, label in enumerate(replacements) + if indexer[index] == -1 + ] + raise KeyError(f"{missing_labels} not found in axis") + + new_index = ax._transform_index(f, level=level) + result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) + result._clear_item_cache() + + if inplace: + self._update_inplace(result) + return None + else: + return result.__finalize__(self, method="rename") + + @overload + def rename_axis( + self: NDFrameT, + mapper: IndexLabel | lib.NoDefault = ..., + *, + inplace: Literal[False] = ..., + **kwargs, + ) -> NDFrameT: + ... + + @overload + def rename_axis( + self, + mapper: IndexLabel | lib.NoDefault = ..., + *, + inplace: Literal[True], + **kwargs, + ) -> None: + ... + + @overload + def rename_axis( + self: NDFrameT, + mapper: IndexLabel | lib.NoDefault = ..., + *, + inplace: bool_t = ..., + **kwargs, + ) -> NDFrameT | None: + ... + + @rewrite_axis_style_signature("mapper", [("copy", True)]) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "mapper"]) + def rename_axis( + self: NDFrameT, + mapper: IndexLabel | lib.NoDefault = lib.no_default, + inplace: bool_t = False, + **kwargs, + ) -> NDFrameT | None: + """ + Set the name of the axis for the index or columns. + + Parameters + ---------- + mapper : scalar, list-like, optional + Value to set the axis name attribute. + index, columns : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. + Note that the ``columns`` parameter is not allowed if the + object is a Series. This parameter only apply for DataFrame + type objects. + + Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` + and/or ``columns``. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to rename. For `Series` this parameter is unused and defaults to 0. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Modifies the object directly, instead of creating a new Series + or DataFrame. + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Series.rename : Alter Series index labels or name. + DataFrame.rename : Alter DataFrame index labels or name. + Index.rename : Set new names on index. + + Notes + ----- + ``DataFrame.rename_axis`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + The first calling convention will only modify the names of + the index and/or the names of the Index object that is the columns. + In this case, the parameter ``copy`` is ignored. + + The second calling convention will modify the names of the + corresponding index if mapper is a list or a scalar. + However, if mapper is dict-like or a function, it will use the + deprecated behavior of modifying the axis *labels*. + + We *highly* recommend using keyword arguments to clarify your + intent. + + Examples + -------- + **Series** + + >>> s = pd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: object + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: object + + **DataFrame** + + >>> df = pd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("limbs", axis="columns") + >>> df + limbs num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + **MultiIndex** + + >>> df.index = pd.MultiIndex.from_product([['mammal'], + ... ['dog', 'cat', 'monkey']], + ... names=['type', 'name']) + >>> df + limbs num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(index={'type': 'class'}) + limbs num_legs num_arms + class name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(columns=str.upper) + LIMBS num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + """ + kwargs["inplace"] = inplace + axes, kwargs = self._construct_axes_from_arguments( + (), kwargs, sentinel=lib.no_default + ) + copy = kwargs.pop("copy", True) + inplace = kwargs.pop("inplace", False) + axis = kwargs.pop("axis", 0) + if axis is not None: + axis = self._get_axis_number(axis) + + if kwargs: + raise TypeError( + "rename_axis() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + + if mapper is not lib.no_default: + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or ( + is_list_like(mapper) and not is_dict_like(mapper) + ) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + raise ValueError("Use `.rename` to alter labels with a mapper.") + else: + # Use new behavior. Means that index and/or columns + # is specified + result = self if inplace else self.copy(deep=copy) + + for axis in range(self._AXIS_LEN): + v = axes.get(self._get_axis_name(axis)) + if v is lib.no_default: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) + if non_mapper: + newnames = v + else: + f = com.get_rename_function(v) + curnames = self._get_axis(axis).names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, inplace=True) + if not inplace: + return result + return None + + @final + def _set_axis_name(self, name, axis=0, inplace=False): + """ + Set the name(s) of the axis. + + Parameters + ---------- + name : str or list of str + Name(s) to set. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to set the label. The value 0 or 'index' specifies index, + and the value 1 or 'columns' specifies columns. + inplace : bool, default False + If `True`, do operation inplace and return None. + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or `None` if `inplace` is `True`. + + See Also + -------- + DataFrame.rename : Alter the axis labels of :class:`DataFrame`. + Series.rename : Alter the index labels or set the index name + of :class:`Series`. + Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. + + Examples + -------- + >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs + dog 4 + cat 4 + monkey 2 + >>> df._set_axis_name("animal") + num_legs + animal + dog 4 + cat 4 + monkey 2 + >>> df.index = pd.MultiIndex.from_product( + ... [["mammal"], ['dog', 'cat', 'monkey']]) + >>> df._set_axis_name(["type", "name"]) + num_legs + type name + mammal dog 4 + cat 4 + monkey 2 + """ + axis = self._get_axis_number(axis) + idx = self._get_axis(axis).set_names(name) + + inplace = validate_bool_kwarg(inplace, "inplace") + renamed = self if inplace else self.copy() + if axis == 0: + renamed.index = idx + else: + renamed.columns = idx + + if not inplace: + return renamed + + # ---------------------------------------------------------------------- + # Comparison Methods + + @final + def _indexed_same(self, other) -> bool_t: + return all( + self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS + ) + + @final + def equals(self, other: object) -> bool_t: + """ + Test whether two objects contain the same elements. + + This function allows two Series or DataFrames to be compared against + each other to see if they have the same shape and elements. NaNs in + the same location are considered equal. + + The row/column index do not need to have the same type, as long + as the values are considered equal. Corresponding columns must be of + the same dtype. + + Parameters + ---------- + other : Series or DataFrame + The other Series or DataFrame to be compared with the first. + + Returns + ------- + bool + True if all elements are the same in both objects, False + otherwise. + + See Also + -------- + Series.eq : Compare two Series objects of the same length + and return a Series where each element is True if the element + in each Series is equal, False otherwise. + DataFrame.eq : Compare two DataFrame objects of the same shape and + return a DataFrame where each element is True if the respective + element in each DataFrame is equal, False otherwise. + testing.assert_series_equal : Raises an AssertionError if left and + right are not equal. Provides an easy interface to ignore + inequality in dtypes, indexes and precision among others. + testing.assert_frame_equal : Like assert_series_equal, but targets + DataFrames. + numpy.array_equal : Return True if two arrays have the same shape + and elements, False otherwise. + + Examples + -------- + >>> df = pd.DataFrame({1: [10], 2: [20]}) + >>> df + 1 2 + 0 10 20 + + DataFrames df and exactly_equal have the same types and values for + their elements and column labels, which will return True. + + >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) + >>> exactly_equal + 1 2 + 0 10 20 + >>> df.equals(exactly_equal) + True + + DataFrames df and different_column_type have the same element + types and values, but have different types for the column labels, + which will still return True. + + >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) + >>> different_column_type + 1.0 2.0 + 0 10 20 + >>> df.equals(different_column_type) + True + + DataFrames df and different_data_type have different types for the + same values for their elements, and will return False even though + their column labels are the same values and types. + + >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) + >>> different_data_type + 1 2 + 0 10.0 20.0 + >>> df.equals(different_data_type) + False + """ + if not (isinstance(other, type(self)) or isinstance(self, type(other))): + return False + other = cast(NDFrame, other) + return self._mgr.equals(other._mgr) + + # ------------------------------------------------------------------------- + # Unary Methods + + @final + def __neg__(self: NDFrameT) -> NDFrameT: + def blk_func(values: ArrayLike): + if is_bool_dtype(values.dtype): + # error: Argument 1 to "inv" has incompatible type "Union + # [ExtensionArray, ndarray[Any, Any]]"; expected + # "_SupportsInversion[ndarray[Any, dtype[bool_]]]" + return operator.inv(values) # type: ignore[arg-type] + else: + # error: Argument 1 to "neg" has incompatible type "Union + # [ExtensionArray, ndarray[Any, Any]]"; expected + # "_SupportsNeg[ndarray[Any, dtype[Any]]]" + return operator.neg(values) # type: ignore[arg-type] + + new_data = self._mgr.apply(blk_func) + res = self._constructor(new_data) + return res.__finalize__(self, method="__neg__") + + @final + def __pos__(self: NDFrameT) -> NDFrameT: + def blk_func(values: ArrayLike): + if is_bool_dtype(values.dtype): + return values.copy() + else: + # error: Argument 1 to "pos" has incompatible type "Union + # [ExtensionArray, ndarray[Any, Any]]"; expected + # "_SupportsPos[ndarray[Any, dtype[Any]]]" + return operator.pos(values) # type: ignore[arg-type] + + new_data = self._mgr.apply(blk_func) + res = self._constructor(new_data) + return res.__finalize__(self, method="__pos__") + + @final + def __invert__(self: NDFrameT) -> NDFrameT: + if not self.size: + # inv fails with 0 len + return self + + new_data = self._mgr.apply(operator.invert) + return self._constructor(new_data).__finalize__(self, method="__invert__") + + @final + def __nonzero__(self) -> NoReturn: + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + @final + def bool(self) -> bool_t: + """ + Return the bool of a single element Series or DataFrame. + + This must be a boolean scalar value, either True or False. It will raise a + ValueError if the Series or DataFrame does not have exactly 1 element, or that + element is not boolean (integer values 0 and 1 will also raise an exception). + + Returns + ------- + bool + The value in the Series or DataFrame. + + See Also + -------- + Series.astype : Change the data type of a Series, including to boolean. + DataFrame.astype : Change the data type of a DataFrame, including to boolean. + numpy.bool_ : NumPy boolean data type, used by pandas for boolean values. + + Examples + -------- + The method will only work for single element objects with a boolean value: + + >>> pd.Series([True]).bool() + True + >>> pd.Series([False]).bool() + False + + >>> pd.DataFrame({'col': [True]}).bool() + True + >>> pd.DataFrame({'col': [False]}).bool() + False + """ + v = self.squeeze() + if isinstance(v, (bool, np.bool_)): + return bool(v) + elif is_scalar(v): + raise ValueError( + "bool cannot act on a non-boolean single element " + f"{type(self).__name__}" + ) + + self.__nonzero__() + # for mypy (__nonzero__ raises) + return True + + @final + def abs(self: NDFrameT) -> NDFrameT: + """ + Return a Series/DataFrame with absolute numeric value of each element. + + This function only applies to elements that are all numeric. + + Returns + ------- + abs + Series/DataFrame containing the absolute value of each element. + + See Also + -------- + numpy.absolute : Calculate the absolute value element-wise. + + Notes + ----- + For ``complex`` inputs, ``1.2 + 1j``, the absolute value is + :math:`\\sqrt{ a^2 + b^2 }`. + + Examples + -------- + Absolute numeric values in a Series. + + >>> s = pd.Series([-1.10, 2, -3.33, 4]) + >>> s.abs() + 0 1.10 + 1 2.00 + 2 3.33 + 3 4.00 + dtype: float64 + + Absolute numeric values in a Series with complex numbers. + + >>> s = pd.Series([1.2 + 1j]) + >>> s.abs() + 0 1.56205 + dtype: float64 + + Absolute numeric values in a Series with a Timedelta element. + + >>> s = pd.Series([pd.Timedelta('1 days')]) + >>> s.abs() + 0 1 days + dtype: timedelta64[ns] + + Select rows with data closest to certain value using argsort (from + `StackOverflow `__). + + >>> df = pd.DataFrame({ + ... 'a': [4, 5, 6, 7], + ... 'b': [10, 20, 30, 40], + ... 'c': [100, 50, -30, -50] + ... }) + >>> df + a b c + 0 4 10 100 + 1 5 20 50 + 2 6 30 -30 + 3 7 40 -50 + >>> df.loc[(df.c - 43).abs().argsort()] + a b c + 1 5 20 50 + 0 4 10 100 + 2 6 30 -30 + 3 7 40 -50 + """ + res_mgr = self._mgr.apply(np.abs) + return self._constructor(res_mgr).__finalize__(self, name="abs") + + @final + def __abs__(self: NDFrameT) -> NDFrameT: + return self.abs() + + @final + def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT: + return self.round(decimals).__finalize__(self, method="__round__") + + # ------------------------------------------------------------------------- + # Label or Level Combination Helpers + # + # A collection of helper methods for DataFrame/Series operations that + # accept a combination of column/index labels and levels. All such + # operations should utilize/extend these methods when possible so that we + # have consistent precedence and validation logic throughout the library. + + @final + def _is_level_reference(self, key: Level, axis=0) -> bool_t: + """ + Test whether a key is a level reference for a given axis. + + To be considered a level reference, `key` must be a string that: + - (axis=0): Matches the name of an index level and does NOT match + a column label. + - (axis=1): Matches the name of a column level and does NOT match + an index label. + + Parameters + ---------- + key : Hashable + Potential level name for the given axis + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + is_level : bool + """ + axis = self._get_axis_number(axis) + + return ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and not self._is_label_reference(key, axis=axis) + ) + + @final + def _is_label_reference(self, key: Level, axis=0) -> bool_t: + """ + Test whether a key is a label reference for a given axis. + + To be considered a label reference, `key` must be a string that: + - (axis=0): Matches a column label + - (axis=1): Matches an index label + + Parameters + ---------- + key : Hashable + Potential label name, i.e. Index entry. + axis : int, default 0 + Axis perpendicular to the axis that labels are associated with + (0 means search for column labels, 1 means search for index labels) + + Returns + ------- + is_label: bool + """ + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + return ( + key is not None + and is_hashable(key) + and any(key in self.axes[ax] for ax in other_axes) + ) + + @final + def _is_label_or_level_reference(self, key: Level, axis: int = 0) -> bool_t: + """ + Test whether a key is a label or level reference for a given axis. + + To be considered either a label or a level reference, `key` must be a + string that: + - (axis=0): Matches a column label or an index level + - (axis=1): Matches an index label or a column level + + Parameters + ---------- + key : Hashable + Potential label or level name + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + bool + """ + return self._is_level_reference(key, axis=axis) or self._is_label_reference( + key, axis=axis + ) + + @final + def _check_label_or_level_ambiguity(self, key: Level, axis: int = 0) -> None: + """ + Check whether `key` is ambiguous. + + By ambiguous, we mean that it matches both a level of the input + `axis` and a label of the other axis. + + Parameters + ---------- + key : Hashable + Label or level name. + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns). + + Raises + ------ + ValueError: `key` is ambiguous + """ + + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + if ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and any(key in self.axes[ax] for ax in other_axes) + ): + + # Build an informative and grammatical warning + level_article, level_type = ( + ("an", "index") if axis == 0 else ("a", "column") + ) + + label_article, label_type = ( + ("a", "column") if axis == 0 else ("an", "index") + ) + + msg = ( + f"'{key}' is both {level_article} {level_type} level and " + f"{label_article} {label_type} label, which is ambiguous." + ) + raise ValueError(msg) + + @final + def _get_label_or_level_values(self, key: Level, axis: int = 0) -> ArrayLike: + """ + Return a 1-D array of values associated with `key`, a label or level + from the given `axis`. + + Retrieval logic: + - (axis=0): Return column values if `key` matches a column label. + Otherwise return index level values if `key` matches an index + level. + - (axis=1): Return row values if `key` matches an index label. + Otherwise return column level values if 'key' matches a column + level + + Parameters + ---------- + key : Hashable + Label or level name. + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + np.ndarray or ExtensionArray + + Raises + ------ + KeyError + if `key` matches neither a label nor a level + ValueError + if `key` matches multiple labels + FutureWarning + if `key` is ambiguous. This will become an ambiguity error in a + future version + """ + axis = self._get_axis_number(axis) + other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] + + if self._is_label_reference(key, axis=axis): + self._check_label_or_level_ambiguity(key, axis=axis) + values = self.xs(key, axis=other_axes[0])._values + elif self._is_level_reference(key, axis=axis): + # error: Incompatible types in assignment (expression has type "Union[ + # ExtensionArray, ndarray[Any, Any]]", variable has type "ndarray[Any, + # Any]") + values = ( + self.axes[axis] + .get_level_values(key) # type: ignore[assignment] + ._values + ) + else: + raise KeyError(key) + + # Check for duplicates + if values.ndim > 1: + + if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): + multi_message = ( + "\n" + "For a multi-index, the label must be a " + "tuple with elements corresponding to each level." + ) + else: + multi_message = "" + + label_axis_name = "column" if axis == 0 else "index" + raise ValueError( + f"The {label_axis_name} label '{key}' is not unique.{multi_message}" + ) + + return values + + @final + def _drop_labels_or_levels(self, keys, axis: int = 0): + """ + Drop labels and/or levels for the given `axis`. + + For each key in `keys`: + - (axis=0): If key matches a column label then drop the column. + Otherwise if key matches an index level then drop the level. + - (axis=1): If key matches an index label then drop the row. + Otherwise if key matches a column level then drop the level. + + Parameters + ---------- + keys : str or list of str + labels or levels to drop + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + dropped: DataFrame + + Raises + ------ + ValueError + if any `keys` match neither a label nor a level + """ + axis = self._get_axis_number(axis) + + # Validate keys + keys = com.maybe_make_list(keys) + invalid_keys = [ + k for k in keys if not self._is_label_or_level_reference(k, axis=axis) + ] + + if invalid_keys: + raise ValueError( + "The following keys are not valid labels or " + f"levels for axis {axis}: {invalid_keys}" + ) + + # Compute levels and labels to drop + levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] + + labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] + + # Perform copy upfront and then use inplace operations below. + # This ensures that we always perform exactly one copy. + # ``copy`` and/or ``inplace`` options could be added in the future. + dropped = self.copy() + + if axis == 0: + # Handle dropping index levels + if levels_to_drop: + dropped.reset_index(levels_to_drop, drop=True, inplace=True) + + # Handle dropping columns labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=1, inplace=True) + else: + # Handle dropping column levels + if levels_to_drop: + if isinstance(dropped.columns, MultiIndex): + # Drop the specified levels from the MultiIndex + dropped.columns = dropped.columns.droplevel(levels_to_drop) + else: + # Drop the last level of Index by replacing with + # a RangeIndex + dropped.columns = RangeIndex(dropped.columns.size) + + # Handle dropping index labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=0, inplace=True) + + return dropped + + # ---------------------------------------------------------------------- + # Iteration + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: ClassVar[None] # type: ignore[assignment] + + def __iter__(self): + """ + Iterate over info axis. + + Returns + ------- + iterator + Info axis as iterator. + """ + return iter(self._info_axis) + + # can we get a better explanation of this? + def keys(self) -> Index: + """ + Get the 'info axis' (see Indexing for more). + + This is index for Series, columns for DataFrame. + + Returns + ------- + Index + Info axis. + """ + return self._info_axis + + def items(self): + """ + Iterate over (label, values) on info axis + + This is index for Series and columns for DataFrame. + + Returns + ------- + Generator + """ + for h in self._info_axis: + yield h, self[h] + + def __len__(self) -> int: + """Returns length of info axis""" + return len(self._info_axis) + + @final + def __contains__(self, key) -> bool_t: + """True if the key is in the info axis""" + return key in self._info_axis + + @property + def empty(self) -> bool_t: + """ + Indicator whether Series/DataFrame is empty. + + True if Series/DataFrame is entirely empty (no items), meaning any of the + axes are of length 0. + + Returns + ------- + bool + If Series/DataFrame is empty, return True, if not return False. + + See Also + -------- + Series.dropna : Return series without null values. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. + + Notes + ----- + If Series/DataFrame contains only NaNs, it is still not considered empty. See + the example below. + + Examples + -------- + An example of an actual empty DataFrame. Notice the index is empty: + + >>> df_empty = pd.DataFrame({'A' : []}) + >>> df_empty + Empty DataFrame + Columns: [A] + Index: [] + >>> df_empty.empty + True + + If we only have NaNs in our DataFrame, it is not considered empty! We + will need to drop the NaNs to make the DataFrame empty: + + >>> df = pd.DataFrame({'A' : [np.nan]}) + >>> df + A + 0 NaN + >>> df.empty + False + >>> df.dropna().empty + True + + >>> ser_empty = pd.Series({'A' : []}) + >>> ser_empty + A [] + dtype: object + >>> ser_empty.empty + False + >>> ser_empty = pd.Series() + >>> ser_empty.empty + True + """ + return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) + + # ---------------------------------------------------------------------- + # Array Interface + + # This is also set in IndexOpsMixin + # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented + __array_priority__: int = 1000 + + def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + return np.asarray(self._values, dtype=dtype) + + def __array_wrap__( + self, + result: np.ndarray, + context: tuple[Callable, tuple[Any, ...], int] | None = None, + ): + """ + Gets called after a ufunc and other functions. + + Parameters + ---------- + result: np.ndarray + The result of the ufunc or other function called on the NumPy array + returned by __array__ + context: tuple of (func, tuple, int) + This parameter is returned by ufuncs as a 3-element tuple: (name of the + ufunc, arguments of the ufunc, domain of the ufunc), but is not set by + other numpy functions.q + + Notes + ----- + Series implements __array_ufunc_ so this not called for ufunc on Series. + """ + # Note: at time of dask 2022.01.0, this is still used by dask + warnings.warn( + "The __array_wrap__ method of DataFrame and Series will be removed in " + "a future version", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + res = lib.item_from_zerodim(result) + if is_scalar(res): + # e.g. we get here with np.ptp(series) + # ptp also requires the item_from_zerodim + return res + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) + return self._constructor(res, **d).__finalize__(self, method="__array_wrap__") + + @final + def __array_ufunc__( + self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any + ): + return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs) + + # ---------------------------------------------------------------------- + # Picklability + + @final + def __getstate__(self) -> dict[str, Any]: + meta = {k: getattr(self, k, None) for k in self._metadata} + return { + "_mgr": self._mgr, + "_typ": self._typ, + "_metadata": self._metadata, + "attrs": self.attrs, + "_flags": {k: self.flags[k] for k in self.flags._keys}, + **meta, + } + + @final + def __setstate__(self, state) -> None: + if isinstance(state, BlockManager): + self._mgr = state + elif isinstance(state, dict): + if "_data" in state and "_mgr" not in state: + # compat for older pickles + state["_mgr"] = state.pop("_data") + typ = state.get("_typ") + if typ is not None: + attrs = state.get("_attrs", {}) + object.__setattr__(self, "_attrs", attrs) + flags = state.get("_flags", {"allows_duplicate_labels": True}) + object.__setattr__(self, "_flags", Flags(self, **flags)) + + # set in the order of internal names + # to avoid definitional recursion + # e.g. say fill_value needing _mgr to be + # defined + meta = set(self._internal_names + self._metadata) + for k in list(meta): + if k in state and k != "_flags": + v = state[k] + object.__setattr__(self, k, v) + + for k, v in state.items(): + if k not in meta: + object.__setattr__(self, k, v) + + else: + raise NotImplementedError("Pre-0.12 pickles are no longer supported") + elif len(state) == 2: + raise NotImplementedError("Pre-0.12 pickles are no longer supported") + + self._item_cache: dict[Hashable, Series] = {} + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + # string representation based upon iterating over self + # (since, by definition, `PandasContainers` are iterable) + prepr = f"[{','.join(map(pprint_thing, self))}]" + return f"{type(self).__name__}({prepr})" + + @final + def _repr_latex_(self): + """ + Returns a LaTeX representation for a particular object. + Mainly for use with nbconvert (jupyter notebook conversion to pdf). + """ + if config.get_option("display.latex.repr"): + return self.to_latex() + else: + return None + + @final + def _repr_data_resource_(self): + """ + Not a real Jupyter special repr method, but we use the same + naming convention. + """ + if config.get_option("display.html.table_schema"): + data = self.head(config.get_option("display.max_rows")) + + as_json = data.to_json(orient="table") + as_json = cast(str, as_json) + return json.loads(as_json, object_pairs_hook=collections.OrderedDict) + + # ---------------------------------------------------------------------- + # I/O Methods + + @final + @deprecate_kwarg(old_arg_name="verbose", new_arg_name=None) + @deprecate_kwarg(old_arg_name="encoding", new_arg_name=None) + @doc( + klass="object", + storage_options=_shared_docs["storage_options"], + storage_options_versionadded="1.2.0", + ) + def to_excel( + self, + excel_writer, + sheet_name: str = "Sheet1", + na_rep: str = "", + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool_t = True, + index: bool_t = True, + index_label: IndexLabel = None, + startrow: int = 0, + startcol: int = 0, + engine: str | None = None, + merge_cells: bool_t = True, + encoding: lib.NoDefault = lib.no_default, + inf_rep: str = "inf", + verbose: lib.NoDefault = lib.no_default, + freeze_panes: tuple[int, int] | None = None, + storage_options: StorageOptions = None, + ) -> None: + """ + Write {klass} to an Excel sheet. + + To write a single {klass} to an Excel .xlsx file it is only necessary to + specify a target file name. To write to multiple sheets it is necessary to + create an `ExcelWriter` object with a target file name, and specify a sheet + in the file to write to. + + Multiple sheets may be written to by specifying unique `sheet_name`. + With all data written to the file it is necessary to save the changes. + Note that creating an `ExcelWriter` object with a file name that already + exists will result in the contents of the existing file being erased. + + Parameters + ---------- + excel_writer : path-like, file-like, or ExcelWriter object + File path or existing ExcelWriter. + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame. + na_rep : str, default '' + Missing data representation. + float_format : str, optional + Format string for floating point numbers. For example + ``float_format="%.2f"`` will format 0.1234 to 0.12. + columns : sequence or list of str, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of string is given it is + assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + index_label : str or sequence, optional + Column label for index column(s) if desired. If not specified, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + startrow : int, default 0 + Upper left cell row to dump data frame. + startcol : int, default 0 + Upper left cell column to dump data frame. + engine : str, optional + Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and + ``io.excel.xlsm.writer``. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future version + of pandas. + + merge_cells : bool, default True + Write MultiIndex and Hierarchical Rows as merged cells. + encoding : str, optional + Encoding of the resulting excel file. Only necessary for xlwt, + other writers support unicode natively. + + .. deprecated:: 1.5.0 + + This keyword was not used. + + inf_rep : str, default 'inf' + Representation for infinity (there is no native representation for + infinity in Excel). + verbose : bool, default True + Display more information in the error logs. + + .. deprecated:: 1.5.0 + + This keyword was not used. + + freeze_panes : tuple of int (length 2), optional + Specifies the one-based bottommost row and rightmost column that + is to be frozen. + {storage_options} + + .. versionadded:: {storage_options_versionadded} + + See Also + -------- + to_csv : Write DataFrame to a comma-separated values (csv) file. + ExcelWriter : Class for writing DataFrame objects into excel sheets. + read_excel : Read an Excel file into a pandas DataFrame. + read_csv : Read a comma-separated values (csv) file into DataFrame. + io.formats.style.Styler.to_excel : Add styles to Excel sheet. + + Notes + ----- + For compatibility with :meth:`~DataFrame.to_csv`, + to_excel serializes lists and dicts to strings before writing. + + Once a workbook has been saved it is not possible to write further + data without rewriting the whole workbook. + + Examples + -------- + + Create, write to and save a workbook: + + >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df1.to_excel("output.xlsx") # doctest: +SKIP + + To specify the sheet name: + + >>> df1.to_excel("output.xlsx", + ... sheet_name='Sheet_name_1') # doctest: +SKIP + + If you wish to write to more than one sheet in the workbook, it is + necessary to specify an ExcelWriter object: + + >>> df2 = df1.copy() + >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP + ... df1.to_excel(writer, sheet_name='Sheet_name_1') + ... df2.to_excel(writer, sheet_name='Sheet_name_2') + + ExcelWriter can also be used to append to an existing Excel file: + + >>> with pd.ExcelWriter('output.xlsx', + ... mode='a') as writer: # doctest: +SKIP + ... df.to_excel(writer, sheet_name='Sheet_name_3') + + To set the library that is used to write the Excel file, + you can pass the `engine` keyword (the default engine is + automatically chosen depending on the file extension): + + >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP + """ + + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + df, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + storage_options=storage_options, + ) + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buf", + ) + def to_json( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + orient: str | None = None, + date_format: str | None = None, + double_precision: int = 10, + force_ascii: bool_t = True, + date_unit: str = "ms", + default_handler: Callable[[Any], JSONSerializable] | None = None, + lines: bool_t = False, + compression: CompressionOptions = "infer", + index: bool_t = True, + indent: int | None = None, + storage_options: StorageOptions = None, + ) -> str | None: + """ + Convert the object to a JSON string. + + Note NaN's and None will be converted to null and datetime objects + will be converted to UNIX timestamps. + + Parameters + ---------- + path_or_buf : str, path object, file-like object, or None, default None + String, path object (implementing os.PathLike[str]), or file-like + object implementing a write() function. If None, the result is + returned as a string. + orient : str + Indication of expected JSON string format. + + * Series: + + - default is 'index' + - allowed values are: {{'split', 'records', 'index', 'table'}}. + + * DataFrame: + + - default is 'columns' + - allowed values are: {{'split', 'records', 'index', 'columns', + 'values', 'table'}}. + + * The format of the JSON string: + + - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], + 'data' -> [values]}} + - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] + - 'index' : dict like {{index -> {{column -> value}}}} + - 'columns' : dict like {{column -> {{index -> value}}}} + - 'values' : just the values array + - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} + + Describing the data, where data component is like ``orient='records'``. + + date_format : {{None, 'epoch', 'iso'}} + Type of date conversion. 'epoch' = epoch milliseconds, + 'iso' = ISO8601. The default depends on the `orient`. For + ``orient='table'``, the default is 'iso'. For all other orients, + the default is 'epoch'. + double_precision : int, default 10 + The number of decimal places to use when encoding + floating point values. + force_ascii : bool, default True + Force encoded string to be ASCII. + date_unit : str, default 'ms' (milliseconds) + The time unit to encode to, governs timestamp and ISO8601 + precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, + microsecond, and nanosecond respectively. + default_handler : callable, default None + Handler to call if object cannot otherwise be converted to a + suitable format for JSON. Should receive a single argument which is + the object to convert and return a serialisable object. + lines : bool, default False + If 'orient' is 'records' write out line-delimited json format. Will + throw ValueError if incorrect 'orient' since others are not + list-like. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + index : bool, default True + Whether to include the index values in the JSON string. Not + including the index (``index=False``) is only supported when + orient is 'split' or 'table'. + indent : int, optional + Length of whitespace used to indent each record. + + .. versionadded:: 1.0.0 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting json format as a + string. Otherwise returns None. + + See Also + -------- + read_json : Convert a JSON string to pandas object. + + Notes + ----- + The behavior of ``indent=0`` varies from the stdlib, which does not + indent the output but does insert newlines. Currently, ``indent=0`` + and the default ``indent=None`` are equivalent in pandas, though this + may change in a future release. + + ``orient='table'`` contains a 'pandas_version' field under 'schema'. + This stores the version of `pandas` used in the latest revision of the + schema. + + Examples + -------- + >>> import json + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) + + >>> result = df.to_json(orient="split") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "columns": [ + "col 1", + "col 2" + ], + "index": [ + "row 1", + "row 2" + ], + "data": [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + }} + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> result = df.to_json(orient="records") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + {{ + "col 1": "a", + "col 2": "b" + }}, + {{ + "col 1": "c", + "col 2": "d" + }} + ] + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> result = df.to_json(orient="index") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "row 1": {{ + "col 1": "a", + "col 2": "b" + }}, + "row 2": {{ + "col 1": "c", + "col 2": "d" + }} + }} + + Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: + + >>> result = df.to_json(orient="columns") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "col 1": {{ + "row 1": "a", + "row 2": "c" + }}, + "col 2": {{ + "row 1": "b", + "row 2": "d" + }} + }} + + Encoding/decoding a Dataframe using ``'values'`` formatted JSON: + + >>> result = df.to_json(orient="values") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + + Encoding with Table Schema: + + >>> result = df.to_json(orient="table") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + {{ + "schema": {{ + "fields": [ + {{ + "name": "index", + "type": "string" + }}, + {{ + "name": "col 1", + "type": "string" + }}, + {{ + "name": "col 2", + "type": "string" + }} + ], + "primaryKey": [ + "index" + ], + "pandas_version": "1.4.0" + }}, + "data": [ + {{ + "index": "row 1", + "col 1": "a", + "col 2": "b" + }}, + {{ + "index": "row 2", + "col 1": "c", + "col 2": "d" + }} + ] + }} + """ + from pandas.io import json + + if date_format is None and orient == "table": + date_format = "iso" + elif date_format is None: + date_format = "epoch" + + config.is_nonnegative_int(indent) + indent = indent or 0 + + return json.to_json( + path_or_buf=path_or_buf, + obj=self, + orient=orient, + date_format=date_format, + double_precision=double_precision, + force_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + lines=lines, + compression=compression, + index=index, + indent=indent, + storage_options=storage_options, + ) + + @final + def to_hdf( + self, + path_or_buf: FilePath | HDFStore, + key: str, + mode: str = "a", + complevel: int | None = None, + complib: str | None = None, + append: bool_t = False, + format: str | None = None, + index: bool_t = True, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + dropna: bool_t | None = None, + data_columns: Literal[True] | list[str] | None = None, + errors: str = "strict", + encoding: str = "UTF-8", + ) -> None: + """ + Write the contained data to an HDF5 file using HDFStore. + + Hierarchical Data Format (HDF) is self-describing, allowing an + application to interpret the structure and contents of a file with + no outside information. One HDF file can hold a mix of related objects + which can be accessed as a group or as individual objects. + + In order to add another DataFrame or Series to an existing HDF file + please use append mode and a different a key. + + .. warning:: + + One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, + but the type of the subclass is lost upon storing. + + For more information see the :ref:`user guide `. + + Parameters + ---------- + path_or_buf : str or pandas.HDFStore + File path or HDFStore object. + key : str + Identifier for the group in the store. + mode : {'a', 'w', 'r+'}, default 'a' + Mode to open file: + + - 'w': write, a new file is created (an existing file with + the same name would be deleted). + - 'a': append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + - 'r+': similar to 'a', but the file must already exist. + complevel : {0-9}, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + append : bool, default False + For Table formats, append the input data to the existing. + format : {'fixed', 'table', None}, default 'fixed' + Possible values: + + - 'fixed': Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + - 'table': Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. + - If None, pd.get_option('io.hdf.default_format') is checked, + followed by fallback to "fixed". + index : bool, default True + Write DataFrame index as a column. + min_itemsize : dict or int, optional + Map column names to minimum string sizes for columns. + nan_rep : Any, optional + How to represent null values as str. + Not allowed with append=True. + dropna : bool, default False, optional + Remove missing values. + data_columns : list of columns or True, optional + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See + :ref:`Query via data columns`. for + more information. + Applicable only to format='table'. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + encoding : str, default "UTF-8" + + See Also + -------- + read_hdf : Read from HDF file. + DataFrame.to_orc : Write a DataFrame to the binary orc format. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_sql : Write to a SQL table. + DataFrame.to_feather : Write out feather-format for DataFrames. + DataFrame.to_csv : Write out to a csv file. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + ... index=['a', 'b', 'c']) # doctest: +SKIP + >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP + + We can add another object to the same file: + + >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP + >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP + + Reading from HDF file: + + >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP + A B + a 1 4 + b 2 5 + c 3 6 + >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + """ + from pandas.io import pytables + + # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected + # "Union[DataFrame, Series]" [arg-type] + pytables.to_hdf( + path_or_buf, + key, + self, # type: ignore[arg-type] + mode=mode, + complevel=complevel, + complib=complib, + append=append, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + + @final + def to_sql( + self, + name: str, + con, + schema: str | None = None, + if_exists: str = "fail", + index: bool_t = True, + index_label: IndexLabel = None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + method: str | None = None, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Databases supported by SQLAlchemy [1]_ are supported. Tables can be + newly created, appended to, or overwritten. + + Parameters + ---------- + name : str + Name of SQL table. + con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. Legacy support is provided for sqlite3.Connection objects. The user + is responsible for engine disposal and connection closure for the SQLAlchemy + connectable See `here \ + `_. + + schema : str, optional + Specify the schema (if database flavor supports this). If None, use + default schema. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + How to behave if the table already exists. + + * fail: Raise a ValueError. + * replace: Drop the table before inserting new values. + * append: Insert new values to the existing table. + + index : bool, default True + Write DataFrame index as a column. Uses `index_label` as the column + name in the table. + index_label : str or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 legacy mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + Returns + ------- + None or int + Number of rows affected by to_sql. None is returned if the callable + passed into ``method`` does not return an integer number of rows. + + The number of returned rows affected is the sum of the ``rowcount`` + attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not + reflect the exact number of written rows as stipulated in the + `sqlite3 `__ or + `SQLAlchemy `__. + + .. versionadded:: 1.4.0 + + Raises + ------ + ValueError + When the table already exists and `if_exists` is 'fail' (the + default). + + See Also + -------- + read_sql : Read a DataFrame from a table. + + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + References + ---------- + .. [1] https://docs.sqlalchemy.org + .. [2] https://www.python.org/dev/peps/pep-0249/ + + Examples + -------- + Create an in-memory SQLite database. + + >>> from sqlalchemy import create_engine + >>> engine = create_engine('sqlite://', echo=False) + + Create a table from scratch with 3 rows. + + >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) + >>> df + name + 0 User 1 + 1 User 2 + 2 User 3 + + >>> df.to_sql('users', con=engine) + 3 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] + + An `sqlalchemy.engine.Connection` can also be passed to `con`: + + >>> with engine.begin() as connection: + ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) + ... df1.to_sql('users', con=connection, if_exists='append') + 2 + + This is allowed to support operations that require that the same + DBAPI connection is used for the entire operation. + + >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) + >>> df2.to_sql('users', con=engine, if_exists='append') + 2 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), + (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), + (1, 'User 7')] + + Overwrite the table with just ``df2``. + + >>> df2.to_sql('users', con=engine, if_exists='replace', + ... index_label='id') + 2 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 6'), (1, 'User 7')] + + Specify the dtype (especially useful for integers with missing values). + Notice that while pandas is forced to store the data as floating point, + the database supports nullable integers. When fetching the data with + Python, we get back integer scalars. + + >>> df = pd.DataFrame({"A": [1, None, 2]}) + >>> df + A + 0 1.0 + 1 NaN + 2 2.0 + + >>> from sqlalchemy.types import Integer + >>> df.to_sql('integers', con=engine, index=False, + ... dtype={"A": Integer()}) + 3 + + >>> engine.execute("SELECT * FROM integers").fetchall() + [(1,), (None,), (2,)] + """ # noqa:E501 + from pandas.io import sql + + return sql.to_sql( + self, + name, + con, + schema=schema, + if_exists=if_exists, + index=index, + index_label=index_label, + chunksize=chunksize, + dtype=dtype, + method=method, + ) + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path", + ) + def to_pickle( + self, + path: FilePath | WriteBuffer[bytes], + compression: CompressionOptions = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, + ) -> None: + """ + Pickle (serialize) object to file. + + Parameters + ---------- + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. File path where + the pickled object will be stored. + {compression_options} + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible + values are 0, 1, 2, 3, 4, 5. A negative value for the protocol + parameter is equivalent to setting its value to HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html. + + {storage_options} + + .. versionadded:: 1.2.0 + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ # noqa: E501 + from pandas.io.pickle import to_pickle + + to_pickle( + self, + path, + compression=compression, + protocol=protocol, + storage_options=storage_options, + ) + + @final + def to_clipboard( + self, excel: bool_t = True, sep: str | None = None, **kwargs + ) -> None: + r""" + Copy object to the system clipboard. + + Write a text representation of object to the system clipboard. + This can be pasted into Excel, for example. + + Parameters + ---------- + excel : bool, default True + Produce output in a csv format for easy pasting into excel. + + - True, use the provided separator for csv pasting. + - False, write a string representation of the object to the clipboard. + + sep : str, default ``'\t'`` + Field delimiter. + **kwargs + These parameters will be passed to DataFrame.to_csv. + + See Also + -------- + DataFrame.to_csv : Write a DataFrame to a comma-separated values + (csv) file. + read_clipboard : Read text from clipboard and pass to read_csv. + + Notes + ----- + Requirements for your platform. + + - Linux : `xclip`, or `xsel` (with `PyQt4` modules) + - Windows : none + - macOS : none + + This method uses the processes developed for the package `pyperclip`. A + solution to render any output string format is given in the examples. + + Examples + -------- + Copy the contents of a DataFrame to the clipboard. + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) + + >>> df.to_clipboard(sep=',') # doctest: +SKIP + ... # Wrote the following to the system clipboard: + ... # ,A,B,C + ... # 0,1,2,3 + ... # 1,4,5,6 + + We can omit the index by passing the keyword `index` and setting + it to false. + + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP + ... # Wrote the following to the system clipboard: + ... # A,B,C + ... # 1,2,3 + ... # 4,5,6 + + Using the original `pyperclip` package for any string output format. + + .. code-block:: python + + import pyperclip + html = df.style.to_html() + pyperclip.copy(html) + """ + from pandas.io import clipboards + + clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) + + @final + def to_xarray(self): + """ + Return an xarray object from the pandas object. + + Returns + ------- + xarray.DataArray or xarray.Dataset + Data in the pandas structure converted to Dataset if the object is + a DataFrame, or a DataArray if the object is a Series. + + See Also + -------- + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Notes + ----- + See the `xarray docs `__ + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), + ... ('parrot', 'bird', 24.0, 2), + ... ('lion', 'mammal', 80.5, 4), + ... ('monkey', 'mammal', np.nan, 4)], + ... columns=['name', 'class', 'max_speed', + ... 'num_legs']) + >>> df + name class max_speed num_legs + 0 falcon bird 389.0 2 + 1 parrot bird 24.0 2 + 2 lion mammal 80.5 4 + 3 monkey mammal NaN 4 + + >>> df.to_xarray() + + Dimensions: (index: 4) + Coordinates: + * index (index) int64 0 1 2 3 + Data variables: + name (index) object 'falcon' 'parrot' 'lion' 'monkey' + class (index) object 'bird' 'bird' 'mammal' 'mammal' + max_speed (index) float64 389.0 24.0 80.5 nan + num_legs (index) int64 2 2 4 4 + + >>> df['max_speed'].to_xarray() + + array([389. , 24. , 80.5, nan]) + Coordinates: + * index (index) int64 0 1 2 3 + + >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', + ... '2018-01-02', '2018-01-02']) + >>> df_multiindex = pd.DataFrame({'date': dates, + ... 'animal': ['falcon', 'parrot', + ... 'falcon', 'parrot'], + ... 'speed': [350, 18, 361, 15]}) + >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) + + >>> df_multiindex + speed + date animal + 2018-01-01 falcon 350 + parrot 18 + 2018-01-02 falcon 361 + parrot 15 + + >>> df_multiindex.to_xarray() + + Dimensions: (date: 2, animal: 2) + Coordinates: + * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * animal (animal) object 'falcon' 'parrot' + Data variables: + speed (date, animal) int64 350 18 361 15 + """ + xarray = import_optional_dependency("xarray") + + if self.ndim == 1: + return xarray.DataArray.from_series(self) + else: + return xarray.Dataset.from_dataframe(self) + + @overload + def to_latex( + self, + buf: None = ..., + columns: Sequence[Hashable] | None = ..., + col_space: ColspaceArgType | None = ..., + header: bool_t | Sequence[str] = ..., + index: bool_t = ..., + na_rep: str = ..., + formatters: FormattersType | None = ..., + float_format: FloatFormatType | None = ..., + sparsify: bool_t | None = ..., + index_names: bool_t = ..., + bold_rows: bool_t = ..., + column_format: str | None = ..., + longtable: bool_t | None = ..., + escape: bool_t | None = ..., + encoding: str | None = ..., + decimal: str = ..., + multicolumn: bool_t | None = ..., + multicolumn_format: str | None = ..., + multirow: bool_t | None = ..., + caption: str | tuple[str, str] | None = ..., + label: str | None = ..., + position: str | None = ..., + ) -> str: + ... + + @overload + def to_latex( + self, + buf: FilePath | WriteBuffer[str], + columns: Sequence[Hashable] | None = ..., + col_space: ColspaceArgType | None = ..., + header: bool_t | Sequence[str] = ..., + index: bool_t = ..., + na_rep: str = ..., + formatters: FormattersType | None = ..., + float_format: FloatFormatType | None = ..., + sparsify: bool_t | None = ..., + index_names: bool_t = ..., + bold_rows: bool_t = ..., + column_format: str | None = ..., + longtable: bool_t | None = ..., + escape: bool_t | None = ..., + encoding: str | None = ..., + decimal: str = ..., + multicolumn: bool_t | None = ..., + multicolumn_format: str | None = ..., + multirow: bool_t | None = ..., + caption: str | tuple[str, str] | None = ..., + label: str | None = ..., + position: str | None = ..., + ) -> None: + ... + + @final + @doc(returns=fmt.return_docstring) + def to_latex( + self, + buf: FilePath | WriteBuffer[str] | None = None, + columns: Sequence[Hashable] | None = None, + col_space: ColspaceArgType | None = None, + header: bool_t | Sequence[str] = True, + index: bool_t = True, + na_rep: str = "NaN", + formatters: FormattersType | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool_t | None = None, + index_names: bool_t = True, + bold_rows: bool_t = False, + column_format: str | None = None, + longtable: bool_t | None = None, + escape: bool_t | None = None, + encoding: str | None = None, + decimal: str = ".", + multicolumn: bool_t | None = None, + multicolumn_format: str | None = None, + multirow: bool_t | None = None, + caption: str | tuple[str, str] | None = None, + label: str | None = None, + position: str | None = None, + ) -> str | None: + r""" + Render object to a LaTeX tabular, longtable, or nested table. + + Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted + into a main LaTeX document or read from an external file + with ``\input{{table.tex}}``. + + .. versionchanged:: 1.0.0 + Added caption and label arguments. + + .. versionchanged:: 1.2.0 + Added position argument, changed meaning of caption argument. + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : list of label, optional + The subset of columns to write. Writes all columns by default. + col_space : int, optional + The minimum width of each column. + header : bool or list of str, default True + Write out the column names. If a list of strings is given, + it is assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + na_rep : str, default 'NaN' + Missing data representation. + formatters : list of functions or dict of {{str: function}}, optional + Formatter functions to apply to columns' elements by position or + name. The result of each function must be a unicode string. + List must be of length equal to the number of columns. + float_format : one-parameter function or str, optional, default None + Formatter for floating point numbers. For example + ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will + both result in 0.1234 being formatted as 0.12. + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. By default, the value will be + read from the config module. + index_names : bool, default True + Prints the names of the indexes. + bold_rows : bool, default False + Make the row labels bold in the output. + column_format : str, optional + The columns format as specified in `LaTeX table format + `__ e.g. 'rcl' for 3 + columns. By default, 'l' will be used for all columns except + columns of numbers, which default to 'r'. + longtable : bool, optional + By default, the value will be read from the pandas config + module. Use a longtable environment instead of tabular. Requires + adding a \usepackage{{longtable}} to your LaTeX preamble. + escape : bool, optional + By default, the value will be read from the pandas config + module. When set to False prevents from escaping latex special + characters in column names. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + multicolumn : bool, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. Requires adding a + \usepackage{{multirow}} to your LaTeX preamble. Will print + centered labels (instead of top-aligned) across the contained + rows, separating groups via clines. The default will be read + from the pandas config module. + caption : str or tuple, optional + Tuple (full_caption, short_caption), + which results in ``\caption[short_caption]{{full_caption}}``; + if a single string is passed, no short caption will be set. + + .. versionadded:: 1.0.0 + + .. versionchanged:: 1.2.0 + Optionally allow caption to be a tuple ``(full_caption, short_caption)``. + + label : str, optional + The LaTeX label to be placed inside ``\label{{}}`` in the output. + This is used with ``\ref{{}}`` in the main ``.tex`` file. + + .. versionadded:: 1.0.0 + position : str, optional + The LaTeX positional argument for tables, to be placed after + ``\begin{{}}`` in the output. + + .. versionadded:: 1.2.0 + {returns} + See Also + -------- + io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX + with conditional formatting. + DataFrame.to_string : Render a DataFrame to a console-friendly + tabular output. + DataFrame.to_html : Render a DataFrame as an HTML table. + + Examples + -------- + >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'], + ... mask=['red', 'purple'], + ... weapon=['sai', 'bo staff'])) + >>> print(df.to_latex(index=False)) # doctest: +SKIP + \begin{{tabular}}{{lll}} + \toprule + name & mask & weapon \\ + \midrule + Raphael & red & sai \\ + Donatello & purple & bo staff \\ + \bottomrule + \end{{tabular}} + """ + msg = ( + "In future versions `DataFrame.to_latex` is expected to utilise the base " + "implementation of `Styler.to_latex` for formatting and rendering. " + "The arguments signature may therefore change. It is recommended instead " + "to use `DataFrame.style.to_latex` which also contains additional " + "functionality." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + # Get defaults from the pandas config + if self.ndim == 1: + self = self.to_frame() + if longtable is None: + longtable = config.get_option("display.latex.longtable") + if escape is None: + escape = config.get_option("display.latex.escape") + if multicolumn is None: + multicolumn = config.get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = config.get_option("display.latex.multicolumn_format") + if multirow is None: + multirow = config.get_option("display.latex.multirow") + + self = cast("DataFrame", self) + formatter = DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + header=header, + index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names, + escape=escape, + decimal=decimal, + ) + return DataFrameRenderer(formatter).to_latex( + buf=buf, + column_format=column_format, + longtable=longtable, + encoding=encoding, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + position=position, + ) + + @overload + def to_csv( + self, + path_or_buf: None = ..., + sep: str = ..., + na_rep: str = ..., + float_format: str | Callable | None = ..., + columns: Sequence[Hashable] | None = ..., + header: bool_t | list[str] = ..., + index: bool_t = ..., + index_label: IndexLabel | None = ..., + mode: str = ..., + encoding: str | None = ..., + compression: CompressionOptions = ..., + quoting: int | None = ..., + quotechar: str = ..., + lineterminator: str | None = ..., + chunksize: int | None = ..., + date_format: str | None = ..., + doublequote: bool_t = ..., + escapechar: str | None = ..., + decimal: str = ..., + errors: str = ..., + storage_options: StorageOptions = ..., + ) -> str: + ... + + @overload + def to_csv( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], + sep: str = ..., + na_rep: str = ..., + float_format: str | Callable | None = ..., + columns: Sequence[Hashable] | None = ..., + header: bool_t | list[str] = ..., + index: bool_t = ..., + index_label: IndexLabel | None = ..., + mode: str = ..., + encoding: str | None = ..., + compression: CompressionOptions = ..., + quoting: int | None = ..., + quotechar: str = ..., + lineterminator: str | None = ..., + chunksize: int | None = ..., + date_format: str | None = ..., + doublequote: bool_t = ..., + escapechar: str | None = ..., + decimal: str = ..., + errors: str = ..., + storage_options: StorageOptions = ..., + ) -> None: + ... + + @final + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buf", + ) + @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") + def to_csv( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + sep: str = ",", + na_rep: str = "", + float_format: str | Callable | None = None, + columns: Sequence[Hashable] | None = None, + header: bool_t | list[str] = True, + index: bool_t = True, + index_label: IndexLabel | None = None, + mode: str = "w", + encoding: str | None = None, + compression: CompressionOptions = "infer", + quoting: int | None = None, + quotechar: str = '"', + lineterminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, + doublequote: bool_t = True, + escapechar: str | None = None, + decimal: str = ".", + errors: str = "strict", + storage_options: StorageOptions = None, + ) -> str | None: + r""" + Write object to a comma-separated values (csv) file. + + Parameters + ---------- + path_or_buf : str, path object, file-like object, or None, default None + String, path object (implementing os.PathLike[str]), or file-like + object implementing a write() function. If None, the result is + returned as a string. If a non-binary file object is passed, it should + be opened with `newline=''`, disabling universal newlines. If a binary + file object is passed, `mode` might need to contain a `'b'`. + + .. versionchanged:: 1.2.0 + + Support for binary file objects was introduced. + + sep : str, default ',' + String of length 1. Field delimiter for the output file. + na_rep : str, default '' + Missing data representation. + float_format : str, Callable, default None + Format string for floating point numbers. If a Callable is given, it takes + precedence over other numeric formatting parameters, like decimal. + columns : sequence, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + index_label : str or sequence, or False, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the object uses MultiIndex. If + False do not print fields for index names. Use index_label=False + for easier importing in R. + mode : str, default 'w' + Python write mode. The available write modes are the same as + :py:func:`open`. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` + is a non-binary file object. + {compression_options} + + .. versionchanged:: 1.0.0 + + May now be a dict with key 'method' as compression mode + and other entries as additional compression options if + compression mode is 'zip'. + + .. versionchanged:: 1.1.0 + + Passing compression options as keys in dict is + supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'. + + .. versionchanged:: 1.2.0 + + Compression is supported for binary file objects. + + .. versionchanged:: 1.2.0 + + Previous versions forwarded dict entries for 'gzip' to + `gzip.open` instead of `gzip.GzipFile` which prevented + setting `mtime`. + + quoting : optional constant from csv module + Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are converted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric. + quotechar : str, default '\"' + String of length 1. Character used to quote fields. + lineterminator : str, optional + The newline character or character sequence to use in the output + file. Defaults to `os.linesep`, which depends on the OS in which + this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). + + .. versionchanged:: 1.5.0 + + Previously was line_terminator, changed for consistency with + read_csv and the standard library 'csv' module. + + chunksize : int or None + Rows to write at a time. + date_format : str, default None + Format string for datetime objects. + doublequote : bool, default True + Control quoting of `quotechar` inside a field. + escapechar : str, default None + String of length 1. Character used to escape `sep` and `quotechar` + when appropriate. + decimal : str, default '.' + Character recognized as decimal separator. E.g. use ',' for + European data. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + + .. versionadded:: 1.1.0 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting csv format as a + string. Otherwise returns None. + + See Also + -------- + read_csv : Load a CSV file into a DataFrame. + to_excel : Write DataFrame to an Excel file. + + Examples + -------- + >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}}) + >>> df.to_csv(index=False) + 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' + + Create 'out.zip' containing 'out.csv' + + >>> compression_opts = dict(method='zip', + ... archive_name='out.csv') # doctest: +SKIP + >>> df.to_csv('out.zip', index=False, + ... compression=compression_opts) # doctest: +SKIP + + To write a csv file to a new folder or nested folder you will first + need to create it using either Pathlib or os: + + >>> from pathlib import Path # doctest: +SKIP + >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP + >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP + >>> df.to_csv(filepath) # doctest: +SKIP + + >>> import os # doctest: +SKIP + >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP + >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP + """ + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + formatter = DataFrameFormatter( + frame=df, + header=header, + index=index, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + ) + + return DataFrameRenderer(formatter).to_csv( + path_or_buf, + lineterminator=lineterminator, + sep=sep, + encoding=encoding, + errors=errors, + compression=compression, + quoting=quoting, + columns=columns, + index_label=index_label, + mode=mode, + chunksize=chunksize, + quotechar=quotechar, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + storage_options=storage_options, + ) + + # ---------------------------------------------------------------------- + # Lookup Caching + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + raise AbstractMethodError(self) + + def _maybe_update_cacher( + self, + clear: bool_t = False, + verify_is_copy: bool_t = True, + inplace: bool_t = False, + ) -> None: + """ + See if we need to update our parent cacher if clear, then clear our + cache. + + Parameters + ---------- + clear : bool, default False + Clear the item cache. + verify_is_copy : bool, default True + Provide is_copy checks. + """ + + if verify_is_copy: + self._check_setitem_copy(t="referent") + + if clear: + self._clear_item_cache() + + def _clear_item_cache(self) -> None: + raise AbstractMethodError(self) + + # ---------------------------------------------------------------------- + # Indexing Methods + + def take( + self: NDFrameT, indices, axis=0, is_copy: bool_t | None = None, **kwargs + ) -> NDFrameT: + """ + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. + + Parameters + ---------- + indices : array-like + An array of ints indicating which positions to take. + axis : {0 or 'index', 1 or 'columns', None}, default 0 + The axis on which to select elements. ``0`` means that we are + selecting rows, ``1`` means that we are selecting columns. + For `Series` this parameter is unused and defaults to 0. + is_copy : bool + Before pandas 1.0, ``is_copy=False`` can be specified to ensure + that the return value is an actual copy. Starting with pandas 1.0, + ``take`` always returns a copy, and the keyword is therefore + deprecated. + + .. deprecated:: 1.0.0 + **kwargs + For compatibility with :meth:`numpy.take`. Has no effect on the + output. + + Returns + ------- + taken : same type as caller + An array-like containing the elements taken from the object. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by labels. + DataFrame.iloc : Select a subset of a DataFrame by positions. + numpy.take : Take elements from an array along an axis. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=['name', 'class', 'max_speed'], + ... index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + name class max_speed + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 + """ + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + nv.validate_take((), kwargs) + + return self._take(indices, axis) + + def _take( + self: NDFrameT, + indices, + axis=0, + convert_indices: bool_t = True, + ) -> NDFrameT: + """ + Internal version of the `take` allowing specification of additional args. + + See the docstring of `take` for full explanation of the parameters. + """ + self._consolidate_inplace() + + new_data = self._mgr.take( + indices, + axis=self._get_block_manager_axis(axis), + verify=True, + convert_indices=convert_indices, + ) + return self._constructor(new_data).__finalize__(self, method="take") + + def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT: + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). + + See the docstring of `take` for full explanation of the parameters. + """ + result = self._take(indices=indices, axis=axis) + # Maybe set copy if we didn't actually change the index. + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + return result + + @final + def xs( + self: NDFrameT, + key: IndexLabel, + axis: Axis = 0, + level: IndexLabel = None, + drop_level: bool_t = True, + ) -> NDFrameT: + """ + Return cross-section from the Series/DataFrame. + + This method takes a `key` argument to select data at a particular + level of a MultiIndex. + + Parameters + ---------- + key : label or tuple of label + Label contained in the index, or partially in a MultiIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis to retrieve cross-section on. + level : object, defaults to first n levels (n=1 or len(key)) + In case of a key partially contained in a MultiIndex, indicate + which levels are used. Levels can be referred by label or position. + drop_level : bool, default True + If False, returns object with same levels as self. + + Returns + ------- + Series or DataFrame + Cross-section from the original Series or DataFrame + corresponding to the selected index levels. + + See Also + -------- + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. + DataFrame.iloc : Purely integer-location based indexing + for selection by position. + + Notes + ----- + `xs` can not be used to set values. + + MultiIndex Slicers is a generic way to get/set values on + any level or levels. + It is a superset of `xs` functionality, see + :ref:`MultiIndex Slicers `. + + Examples + -------- + >>> d = {'num_legs': [4, 4, 2, 2], + ... 'num_wings': [0, 0, 2, 2], + ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], + ... 'animal': ['cat', 'dog', 'bat', 'penguin'], + ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} + >>> df = pd.DataFrame(data=d) + >>> df = df.set_index(['class', 'animal', 'locomotion']) + >>> df + num_legs num_wings + class animal locomotion + mammal cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + bird penguin walks 2 2 + + Get values at specified index + + >>> df.xs('mammal') + num_legs num_wings + animal locomotion + cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + + Get values at several indexes + + >>> df.xs(('mammal', 'dog')) + num_legs num_wings + locomotion + walks 4 0 + + Get values at specified index and level + + >>> df.xs('cat', level=1) + num_legs num_wings + class locomotion + mammal walks 4 0 + + Get values at several indexes and levels + + >>> df.xs(('bird', 'walks'), + ... level=[0, 'locomotion']) + num_legs num_wings + animal + penguin 2 2 + + Get values at specified column and axis + + >>> df.xs('num_wings', axis=1) + class animal locomotion + mammal cat walks 0 + dog walks 0 + bat flies 2 + bird penguin walks 2 + Name: num_wings, dtype: int64 + """ + axis = self._get_axis_number(axis) + labels = self._get_axis(axis) + + if isinstance(key, list): + warnings.warn( + "Passing lists as key for xs is deprecated and will be removed in a " + "future version. Pass key as a tuple instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if level is not None: + if not isinstance(labels, MultiIndex): + raise TypeError("Index must be a MultiIndex") + loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) + + # create the tuple of the indexer + _indexer = [slice(None)] * self.ndim + _indexer[axis] = loc + indexer = tuple(_indexer) + + result = self.iloc[indexer] + setattr(result, result._get_axis_name(axis), new_ax) + return result + + if axis == 1: + if drop_level: + return self[key] + index = self.columns + else: + index = self.index + + self._consolidate_inplace() + + if isinstance(index, MultiIndex): + loc, new_index = index._get_loc_level(key, level=0) + if not drop_level: + if lib.is_integer(loc): + new_index = index[loc : loc + 1] + else: + new_index = index[loc] + else: + loc = index.get_loc(key) + + if isinstance(loc, np.ndarray): + if loc.dtype == np.bool_: + (inds,) = loc.nonzero() + return self._take_with_is_copy(inds, axis=axis) + else: + return self._take_with_is_copy(loc, axis=axis) + + if not is_scalar(loc): + new_index = index[loc] + + if is_scalar(loc) and axis == 0: + # In this case loc should be an integer + if self.ndim == 1: + # if we encounter an array-like and we only have 1 dim + # that means that their are list/ndarrays inside the Series! + # so just return them (GH 6394) + return self._values[loc] + + new_mgr = self._mgr.fast_xs(loc) + + result = self._constructor_sliced( + new_mgr, name=self.index[loc] + ).__finalize__(self) + elif is_scalar(loc): + result = self.iloc[:, slice(loc, loc + 1)] + elif axis == 1: + result = self.iloc[:, loc] + else: + result = self.iloc[loc] + result.index = new_index + + # this could be a view + # but only in a single-dtyped view sliceable case + result._set_is_copy(self, copy=not result._is_view) + return result + + def __getitem__(self, item): + raise AbstractMethodError(self) + + def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT: + """ + Construct a slice of this container. + + Slicing with this method is *always* positional. + """ + assert isinstance(slobj, slice), type(slobj) + axis = self._get_block_manager_axis(axis) + result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) + result = result.__finalize__(self) + + # this could be a view + # but only in a single-dtyped view sliceable case + is_copy = axis != 0 or result._is_view + result._set_is_copy(self, copy=is_copy) + return result + + @final + def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: + if not copy: + self._is_copy = None + else: + assert ref is not None + self._is_copy = weakref.ref(ref) + + def _check_is_chained_assignment_possible(self) -> bool_t: + """ + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. + + Should be called just near setting a value + + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. + """ + if self._is_copy: + self._check_setitem_copy(t="referent") + return False + + @final + def _check_setitem_copy(self, t="setting", force=False): + """ + + Parameters + ---------- + t : str, the type of setting error + force : bool, default False + If True, then force showing an error. + + validate if we are doing a setitem on a chained copy. + + It is technically possible to figure out that we are setting on + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. + + df = DataFrame(np.arange(0,9), columns=['count']) + df['group'] = 'b' + + # This technically need not raise SettingWithCopy if both are view + # (which is not generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. + df.iloc[0:5]['group'] = 'a' + + """ + if ( + config.get_option("mode.copy_on_write") + and config.get_option("mode.data_manager") == "block" + ): + return + + # return early if the check is not needed + if not (force or self._is_copy): + return + + value = config.get_option("mode.chained_assignment") + if value is None: + return + + # see if the copy is not actually referred; if so, then dissolve + # the copy weakref + if self._is_copy is not None and not isinstance(self._is_copy, str): + r = self._is_copy() + if not gc.get_referents(r) or (r is not None and r.shape == self.shape): + self._is_copy = None + return + + # a custom message + if isinstance(self._is_copy, str): + t = self._is_copy + + elif t == "referent": + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + else: + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame.\n" + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + if value == "raise": + raise SettingWithCopyError(t) + elif value == "warn": + warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) + + def __delitem__(self, key) -> None: + """ + Delete item + """ + deleted = False + + maybe_shortcut = False + if self.ndim == 2 and isinstance(self.columns, MultiIndex): + try: + # By using engine's __contains__ we effectively + # restrict to same-length tuples + maybe_shortcut = key not in self.columns._engine + except TypeError: + pass + + if maybe_shortcut: + # Allow shorthand to delete all columns whose first len(key) + # elements match key: + if not isinstance(key, tuple): + key = (key,) + for col in self.columns: + if isinstance(col, tuple) and col[: len(key)] == key: + del self[col] + deleted = True + if not deleted: + # If the above loop ran and didn't delete anything because + # there was no match, this call should raise the appropriate + # exception: + loc = self.axes[-1].get_loc(key) + self._mgr = self._mgr.idelete(loc) + + # delete from the caches + try: + del self._item_cache[key] + except KeyError: + pass + + # ---------------------------------------------------------------------- + # Unsorted + + @final + def _check_inplace_and_allows_duplicate_labels(self, inplace): + if inplace and not self.flags.allows_duplicate_labels: + raise ValueError( + "Cannot specify 'inplace=True' when " + "'self.flags.allows_duplicate_labels' is False." + ) + + @final + def get(self, key, default=None): + """ + Get item from object for given key (ex: DataFrame column). + + Returns default value if not found. + + Parameters + ---------- + key : object + + Returns + ------- + value : same type as items contained in object + + Examples + -------- + >>> df = pd.DataFrame( + ... [ + ... [24.3, 75.7, "high"], + ... [31, 87.8, "high"], + ... [22, 71.6, "medium"], + ... [35, 95, "medium"], + ... ], + ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], + ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), + ... ) + + >>> df + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df.get(["temp_celsius", "windspeed"]) + temp_celsius windspeed + 2014-02-12 24.3 high + 2014-02-13 31.0 high + 2014-02-14 22.0 medium + 2014-02-15 35.0 medium + + If the key isn't found, the default value will be used. + + >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") + 'default_value' + """ + try: + return self[key] + except (KeyError, ValueError, IndexError): + return default + + @final + @property + def _is_view(self) -> bool_t: + """Return boolean indicating if self is view of another array""" + return self._mgr.is_view + + @final + def reindex_like( + self: NDFrameT, + other, + method: str | None = None, + copy: bool_t = True, + limit=None, + tolerance=None, + ) -> NDFrameT: + """ + Return an object with matching indices as other object. + + Conform the object to the same index on all axes. Optional + filling logic, placing NaN in locations having no value + in the previous index. A new object is produced unless the + new index is equivalent to the current one and copy=False. + + Parameters + ---------- + other : Object of the same data type + Its row and column indices are used to define the new indices + of this object. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: propagate last valid observation forward to next + valid + * backfill / bfill: use next valid observation to fill gap + * nearest: use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + limit : int, default None + Maximum number of consecutive labels to fill for inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + Series or DataFrame + Same type as caller, but with changed indices on each axis. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + + Notes + ----- + Same as calling + ``.reindex(index=other.index, columns=other.columns,...)``. + + Examples + -------- + >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], + ... [31, 87.8, 'high'], + ... [22, 71.6, 'medium'], + ... [35, 95, 'medium']], + ... columns=['temp_celsius', 'temp_fahrenheit', + ... 'windspeed'], + ... index=pd.date_range(start='2014-02-12', + ... end='2014-02-15', freq='D')) + + >>> df1 + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df2 = pd.DataFrame([[28, 'low'], + ... [30, 'low'], + ... [35.1, 'medium']], + ... columns=['temp_celsius', 'windspeed'], + ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', + ... '2014-02-15'])) + + >>> df2 + temp_celsius windspeed + 2014-02-12 28.0 low + 2014-02-13 30.0 low + 2014-02-15 35.1 medium + + >>> df2.reindex_like(df1) + temp_celsius temp_fahrenheit windspeed + 2014-02-12 28.0 NaN low + 2014-02-13 30.0 NaN low + 2014-02-14 NaN NaN NaN + 2014-02-15 35.1 NaN medium + """ + d = other._construct_axes_dict( + axes=self._AXIS_ORDERS, + method=method, + copy=copy, + limit=limit, + tolerance=tolerance, + ) + + return self.reindex(**d) + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: Literal[True], + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def drop( + self: NDFrameT, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: Literal[False] = ..., + errors: IgnoreRaise = ..., + ) -> NDFrameT: + ... + + @overload + def drop( + self: NDFrameT, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: bool_t = ..., + errors: IgnoreRaise = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( + self: NDFrameT, + labels: IndexLabel = None, + axis: Axis = 0, + index: IndexLabel = None, + columns: IndexLabel = None, + level: Level | None = None, + inplace: bool_t = False, + errors: IgnoreRaise = "raise", + ) -> NDFrameT | None: + + inplace = validate_bool_kwarg(inplace, "inplace") + + if labels is not None: + if index is not None or columns is not None: + raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") + axis_name = self._get_axis_name(axis) + axes = {axis_name: labels} + elif index is not None or columns is not None: + axes, _ = self._construct_axes_from_arguments((index, columns), {}) + else: + raise ValueError( + "Need to specify at least one of 'labels', 'index' or 'columns'" + ) + + obj = self + + for axis, labels in axes.items(): + if labels is not None: + obj = obj._drop_axis(labels, axis, level=level, errors=errors) + + if inplace: + self._update_inplace(obj) + else: + return obj + + @final + def _drop_axis( + self: NDFrameT, + labels, + axis, + level=None, + errors: IgnoreRaise = "raise", + only_slice: bool_t = False, + ) -> NDFrameT: + """ + Drop labels from specified axis. Used in the ``drop`` method + internally. + + Parameters + ---------- + labels : single label or list-like + axis : int or axis name + level : int or level name, default None + For MultiIndex + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + only_slice : bool, default False + Whether indexing along columns should be view-only. + + """ + axis_num = self._get_axis_number(axis) + axis = self._get_axis(axis) + + if axis.is_unique: + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + new_axis = axis.drop(labels, level=level, errors=errors) + else: + new_axis = axis.drop(labels, errors=errors) + indexer = axis.get_indexer(new_axis) + + # Case for non-unique axis + else: + is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) + labels = ensure_object(com.index_labels_to_array(labels)) + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + mask = ~axis.get_level_values(level).isin(labels) + + # GH 18561 MultiIndex.drop should raise if label is absent + if errors == "raise" and mask.all(): + raise KeyError(f"{labels} not found in axis") + elif ( + isinstance(axis, MultiIndex) + and labels.dtype == "object" + and not is_tuple_labels + ): + # Set level to zero in case of MultiIndex and label is string, + # because isin can't handle strings for MultiIndexes GH#36293 + # In case of tuples we get dtype object but have to use isin GH#42771 + mask = ~axis.get_level_values(0).isin(labels) + else: + mask = ~axis.isin(labels) + # Check if label doesn't exist along axis + labels_missing = (axis.get_indexer_for(labels) == -1).any() + if errors == "raise" and labels_missing: + raise KeyError(f"{labels} not found in axis") + + if is_extension_array_dtype(mask.dtype): + # GH#45860 + mask = mask.to_numpy(dtype=bool) + + indexer = mask.nonzero()[0] + new_axis = axis.take(indexer) + + bm_axis = self.ndim - axis_num - 1 + new_mgr = self._mgr.reindex_indexer( + new_axis, + indexer, + axis=bm_axis, + allow_dups=True, + only_slice=only_slice, + ) + result = self._constructor(new_mgr) + if self.ndim == 1: + result.name = self.name + + return result.__finalize__(self) + + @final + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + """ + Replace self internals with result. + + Parameters + ---------- + result : same type as self + verify_is_copy : bool, default True + Provide is_copy checks. + """ + # NOTE: This does *not* call __finalize__ and that's an explicit + # decision that we may revisit in the future. + self._reset_cache() + self._clear_item_cache() + self._mgr = result._mgr + self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) + + @final + def add_prefix(self: NDFrameT, prefix: str) -> NDFrameT: + """ + Prefix labels with string `prefix`. + + For Series, the row labels are prefixed. + For DataFrame, the column labels are prefixed. + + Parameters + ---------- + prefix : str + The string to add before each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_suffix: Suffix row labels with string `suffix`. + DataFrame.add_suffix: Suffix column labels with string `suffix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_prefix('item_') + item_0 1 + item_1 2 + item_2 3 + item_3 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_prefix('col_') + col_A col_B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{prefix}{}".format, prefix=prefix) + + mapper = {self._info_axis_name: f} + # error: Incompatible return value type (got "Optional[NDFrameT]", + # expected "NDFrameT") + # error: Argument 1 to "rename" of "NDFrame" has incompatible type + # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" + return self._rename(**mapper) # type: ignore[return-value, arg-type] + + @final + def add_suffix(self: NDFrameT, suffix: str) -> NDFrameT: + """ + Suffix labels with string `suffix`. + + For Series, the row labels are suffixed. + For DataFrame, the column labels are suffixed. + + Parameters + ---------- + suffix : str + The string to add after each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_prefix: Prefix row labels with string `prefix`. + DataFrame.add_prefix: Prefix column labels with string `prefix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_suffix('_item') + 0_item 1 + 1_item 2 + 2_item 3 + 3_item 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_suffix('_col') + A_col B_col + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{}{suffix}".format, suffix=suffix) + + mapper = {self._info_axis_name: f} + # error: Incompatible return value type (got "Optional[NDFrameT]", + # expected "NDFrameT") + # error: Argument 1 to "rename" of "NDFrame" has incompatible type + # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" + return self._rename(**mapper) # type: ignore[return-value, arg-type] + + @overload + def sort_values( + self: NDFrameT, + *, + axis: Axis = ..., + ascending=..., + inplace: Literal[False] = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool_t = ..., + key: ValueKeyFunc = ..., + ) -> NDFrameT: + ... + + @overload + def sort_values( + self, + *, + axis: Axis = ..., + ascending=..., + inplace: Literal[True], + kind: str = ..., + na_position: str = ..., + ignore_index: bool_t = ..., + key: ValueKeyFunc = ..., + ) -> None: + ... + + @overload + def sort_values( + self: NDFrameT, + *, + axis: Axis = ..., + ascending=..., + inplace: bool_t = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool_t = ..., + key: ValueKeyFunc = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_values( + self: NDFrameT, + axis: Axis = 0, + ascending=True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool_t = False, + key: ValueKeyFunc = None, + ) -> NDFrameT | None: + """ + Sort by the values along either axis. + + Parameters + ----------%(optional_by)s + axis : %(axes_single_arg)s, default 0 + Axis to be sorted. + ascending : bool or list of bool, default True + Sort ascending vs. descending. Specify list for multiple sort + orders. If this is a list of bools, must match the length of + the by. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. `mergesort` and `stable` are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the + end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + Apply the key function to the values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect a + ``Series`` and return a Series with the same shape as the input. + It will be applied to each column in `by` independently. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame or None + DataFrame with sorted values or None if ``inplace=True``. + + See Also + -------- + DataFrame.sort_index : Sort a DataFrame by the index. + Series.sort_values : Similar method for a Series. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], + ... 'col2': [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] + ... }) + >>> df + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + Sort by col1 + + >>> df.sort_values(by=['col1']) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 NaN 8 4 D + + Sort by multiple columns + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 col4 + 1 A 1 1 B + 0 A 2 0 a + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 NaN 8 4 D + + Sort Descending + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 col4 + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + 3 NaN 8 4 D + + Putting NAs first + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 col4 + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + + Sorting with a key function + + >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 NaN 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + Natural sort with the key argument, + using the `natsort ` package. + + >>> df = pd.DataFrame({ + ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], + ... "value": [10, 20, 30, 40, 50] + ... }) + >>> df + time value + 0 0hr 10 + 1 128hr 20 + 2 72hr 30 + 3 48hr 40 + 4 96hr 50 + >>> from natsort import index_natsorted + >>> df.sort_values( + ... by="time", + ... key=lambda x: np.argsort(index_natsorted(df["time"])) + ... ) + time value + 0 0hr 10 + 3 48hr 40 + 2 72hr 30 + 4 96hr 50 + 1 128hr 20 + """ + raise AbstractMethodError(self) + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool_t | Sequence[bool_t] = ..., + inplace: Literal[True], + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool_t = ..., + ignore_index: bool_t = ..., + key: IndexKeyFunc = ..., + ) -> None: + ... + + @overload + def sort_index( + self: NDFrameT, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool_t | Sequence[bool_t] = ..., + inplace: Literal[False] = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool_t = ..., + ignore_index: bool_t = ..., + key: IndexKeyFunc = ..., + ) -> NDFrameT: + ... + + @overload + def sort_index( + self: NDFrameT, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool_t | Sequence[bool_t] = ..., + inplace: bool_t = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool_t = ..., + ignore_index: bool_t = ..., + key: IndexKeyFunc = ..., + ) -> NDFrameT | None: + ... + + def sort_index( + self: NDFrameT, + axis: Axis = 0, + level: IndexLabel = None, + ascending: bool_t | Sequence[bool_t] = True, + inplace: bool_t = False, + kind: SortKind = "quicksort", + na_position: NaPosition = "last", + sort_remaining: bool_t = True, + ignore_index: bool_t = False, + key: IndexKeyFunc = None, + ) -> NDFrameT | None: + + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + + target = self._get_axis(axis) + + indexer = get_indexer_indexer( + target, level, ascending, kind, na_position, sort_remaining, key + ) + + if indexer is None: + if inplace: + result = self + else: + result = self.copy() + + if ignore_index: + result.index = default_index(len(self)) + if inplace: + return None + else: + return result + + baxis = self._get_block_manager_axis(axis) + new_data = self._mgr.take(indexer, axis=baxis, verify=False) + + # reconstruct axis if needed + new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic()) + + if ignore_index: + axis = 1 if isinstance(self, ABCDataFrame) else 0 + new_data.set_axis(axis, default_index(len(indexer))) + + result = self._constructor(new_data) + + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="sort_index") + + @doc( + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels="", + optional_axis="", + ) + def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: + """ + Conform {klass} to new index with optional filling logic. + + Places NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + ``copy=False``. + + Parameters + ---------- + {optional_labels} + {axes} : array-like, optional + New labels / index to conform to, should be specified using + keywords. Preferably an Index object to avoid duplicating data. + {optional_axis} + method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: Propagate last valid observation forward to next + valid. + * backfill / bfill: Use next valid observation to fill gap. + * nearest: Use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + limit : int, default None + Maximum number of consecutive elements to forward or backward fill. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + {klass} with changed index. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...)`` + * ``(labels, axis={{'index', 'columns'}}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Create a dataframe with some fictional data. + + >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] + >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], + ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, + ... index=index) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the dataframe. By default + values in the new index that do not have corresponding + records in the dataframe are assigned ``NaN``. + + >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', + ... 'Chrome'] + >>> df.reindex(new_index) + http_status response_time + Safari 404.0 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404.0 0.08 + Chrome 200.0 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value='missing') + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + We can also reindex the columns. + + >>> df.reindex(columns=['http_status', 'user_agent']) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(['http_status', 'user_agent'], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + To further illustrate the filling functionality in + ``reindex``, we will create a dataframe with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') + >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, + ... index=date_index) + >>> df2 + prices + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + + Suppose we decide to expand the dataframe to cover a wider + date range. + + >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to back-propagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method='bfill') + prices + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original dataframe + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at dataframe values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original dataframe, use the ``fillna()`` method. + + See the :ref:`user guide ` for more. + """ + # TODO: Decide if we care about having different examples for different + # kinds + + # construct the args + axes, kwargs = self._construct_axes_from_arguments(args, kwargs) + method = missing.clean_reindex_fill_method(kwargs.pop("method", None)) + level = kwargs.pop("level", None) + copy = kwargs.pop("copy", None) + limit = kwargs.pop("limit", None) + tolerance = kwargs.pop("tolerance", None) + fill_value = kwargs.pop("fill_value", None) + + # Series.reindex doesn't use / need the axis kwarg + # We pop and ignore it here, to make writing Series/Frame generic code + # easier + kwargs.pop("axis", None) + + if kwargs: + raise TypeError( + "reindex() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + self._consolidate_inplace() + + # if all axes that are requested to reindex are equal, then only copy + # if indicated must have index names equal here as well as values + if all( + self._get_axis(axis).identical(ax) + for axis, ax in axes.items() + if ax is not None + ): + return self.copy(deep=copy) + + # check if we are a multi reindex + if self._needs_reindex_multi(axes, method, level): + return self._reindex_multi(axes, copy, fill_value) + + # perform the reindex on the axes + return self._reindex_axes( + axes, level, limit, tolerance, method, fill_value, copy + ).__finalize__(self, method="reindex") + + def _reindex_axes( + self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy + ) -> NDFrameT: + """Perform the reindex for all the axes.""" + obj = self + for a in self._AXIS_ORDERS: + labels = axes[a] + if labels is None: + continue + + ax = self._get_axis(a) + new_index, indexer = ax.reindex( + labels, level=level, limit=limit, tolerance=tolerance, method=method + ) + + axis = self._get_axis_number(a) + obj = obj._reindex_with_indexers( + {axis: [new_index, indexer]}, + fill_value=fill_value, + copy=copy, + allow_dups=False, + ) + # If we've made a copy once, no need to make another one + copy = False + + return obj + + def _needs_reindex_multi(self, axes, method, level) -> bool_t: + """Check if we do need a multi reindex.""" + return ( + (com.count_not_none(*axes.values()) == self._AXIS_LEN) + and method is None + and level is None + and not self._is_mixed_type + ) + + def _reindex_multi(self, axes, copy, fill_value): + raise AbstractMethodError(self) + + @final + def _reindex_with_indexers( + self: NDFrameT, + reindexers, + fill_value=None, + copy: bool_t = False, + allow_dups: bool_t = False, + ) -> NDFrameT: + """allow_dups indicates an internal call here""" + # reindex doing multiple operations on different axes if indicated + new_data = self._mgr + for axis in sorted(reindexers.keys()): + index, indexer = reindexers[axis] + baxis = self._get_block_manager_axis(axis) + + if index is None: + continue + + index = ensure_index(index) + if indexer is not None: + indexer = ensure_platform_int(indexer) + + # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi) + new_data = new_data.reindex_indexer( + index, + indexer, + axis=baxis, + fill_value=fill_value, + allow_dups=allow_dups, + copy=copy, + ) + # If we've made a copy once, no need to make another one + copy = False + + if copy and new_data is self._mgr: + new_data = new_data.copy() + + return self._constructor(new_data).__finalize__(self) + + def filter( + self: NDFrameT, + items=None, + like: str | None = None, + regex: str | None = None, + axis=None, + ) -> NDFrameT: + """ + Subset the dataframe rows or columns according to the specified index labels. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the index. + + Parameters + ---------- + items : list-like + Keep labels from axis which are in items. + like : str + Keep labels from axis for which "like in label == True". + regex : str (regular expression) + Keep labels from axis for which re.search(regex, label) == True. + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + The axis to filter on, expressed either as an index (int) + or axis name (str). By default this is the info axis, 'columns' for + DataFrame. For `Series` this parameter is unused and defaults to `None`. + + Returns + ------- + same type as input object + + See Also + -------- + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. + + Notes + ----- + The ``items``, ``like``, and ``regex`` parameters are + enforced to be mutually exclusive. + + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. + + Examples + -------- + >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), + ... index=['mouse', 'rabbit'], + ... columns=['one', 'two', 'three']) + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 + + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + """ + nkw = com.count_not_none(items, like, regex) + if nkw > 1: + raise TypeError( + "Keyword arguments `items`, `like`, or `regex` " + "are mutually exclusive" + ) + + if axis is None: + axis = self._info_axis_name + labels = self._get_axis(axis) + + if items is not None: + name = self._get_axis_name(axis) + return self.reindex(**{name: [r for r in items if r in labels]}) + elif like: + + def f(x) -> bool_t: + assert like is not None # needed for mypy + return like in ensure_str(x) + + values = labels.map(f) + return self.loc(axis=axis)[values] + elif regex: + + def f(x) -> bool_t: + return matcher.search(ensure_str(x)) is not None + + matcher = re.compile(regex) + values = labels.map(f) + return self.loc(axis=axis)[values] + else: + raise TypeError("Must pass either `items`, `like`, or `regex`") + + @final + def head(self: NDFrameT, n: int = 5) -> NDFrameT: + """ + Return the first `n` rows. + + This function returns the first `n` rows for the object based + on position. It is useful for quickly testing if your object + has the right type of data in it. + + For negative values of `n`, this function returns all rows except + the last `|n|` rows, equivalent to ``df[:n]``. + + If n is larger than the number of rows, this function returns all rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + same type as caller + The first `n` rows of the caller object. + + See Also + -------- + DataFrame.tail: Returns the last `n` rows. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the first 5 lines + + >>> df.head() + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + + Viewing the first `n` lines (three in this case) + + >>> df.head(3) + animal + 0 alligator + 1 bee + 2 falcon + + For negative values of `n` + + >>> df.head(-3) + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + """ + return self.iloc[:n] + + @final + def tail(self: NDFrameT, n: int = 5) -> NDFrameT: + """ + Return the last `n` rows. + + This function returns last `n` rows from the object based on + position. It is useful for quickly verifying data, for example, + after sorting or appending rows. + + For negative values of `n`, this function returns all rows except + the first `|n|` rows, equivalent to ``df[|n|:]``. + + If n is larger than the number of rows, this function returns all rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + type of caller + The last `n` rows of the caller object. + + See Also + -------- + DataFrame.head : The first `n` rows of the caller object. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last 5 lines + + >>> df.tail() + animal + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last `n` lines (three in this case) + + >>> df.tail(3) + animal + 6 shark + 7 whale + 8 zebra + + For negative values of `n` + + >>> df.tail(-3) + animal + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + """ + if n == 0: + return self.iloc[0:0] + return self.iloc[-n:] + + @final + def sample( + self: NDFrameT, + n: int | None = None, + frac: float | None = None, + replace: bool_t = False, + weights=None, + random_state: RandomState | None = None, + axis: Axis | None = None, + ignore_index: bool_t = False, + ) -> NDFrameT: + """ + Return a random sample of items from an axis of object. + + You can use `random_state` for reproducibility. + + Parameters + ---------- + n : int, optional + Number of items from axis to return. Cannot be used with `frac`. + Default = 1 if `frac` = None. + frac : float, optional + Fraction of axis items to return. Cannot be used with `n`. + replace : bool, default False + Allow or disallow sampling of the same row more than once. + weights : str or ndarray-like, optional + Default 'None' results in equal probability weighting. + If passed a Series, will align with target object on index. Index + values in weights not found in sampled object will be ignored and + index values in sampled object not in weights will be assigned + weights of zero. + If called on a DataFrame, will accept the name of a column + when axis = 0. + Unless weights are a Series, weights must be same length as axis + being sampled. + If weights do not sum to 1, they will be normalized to sum to 1. + Missing values in the weights column will be treated as zero. + Infinite values not allowed. + random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional + If int, array-like, or BitGenerator, seed for random number generator. + If np.random.RandomState or np.random.Generator, use as given. + + .. versionchanged:: 1.1.0 + + array-like and BitGenerator object now passed to np.random.RandomState() + as seed + + .. versionchanged:: 1.4.0 + + np.random.Generator objects now accepted + + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + Axis to sample. Accepts axis number or name. Default is stat axis + for given data type. For `Series` this parameter is unused and defaults to `None`. + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.3.0 + + Returns + ------- + Series or DataFrame + A new object of same type as caller containing `n` items randomly + sampled from the caller object. + + See Also + -------- + DataFrameGroupBy.sample: Generates random samples from each group of a + DataFrame object. + SeriesGroupBy.sample: Generates random samples from each group of a + Series object. + numpy.random.choice: Generates a random sample from a given 1-D numpy + array. + + Notes + ----- + If `frac` > 1, `replacement` should be set to `True`. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], + ... 'num_wings': [2, 0, 0, 0], + ... 'num_specimen_seen': [10, 2, 1, 8]}, + ... index=['falcon', 'dog', 'spider', 'fish']) + >>> df + num_legs num_wings num_specimen_seen + falcon 2 2 10 + dog 4 0 2 + spider 8 0 1 + fish 0 0 8 + + Extract 3 random elements from the ``Series`` ``df['num_legs']``: + Note that we use `random_state` to ensure the reproducibility of + the examples. + + >>> df['num_legs'].sample(n=3, random_state=1) + fish 0 + spider 8 + falcon 2 + Name: num_legs, dtype: int64 + + A random 50% sample of the ``DataFrame`` with replacement: + + >>> df.sample(frac=0.5, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + + An upsample sample of the ``DataFrame`` with replacement: + Note that `replace` parameter has to be `True` for `frac` parameter > 1. + + >>> df.sample(frac=2, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + falcon 2 2 10 + falcon 2 2 10 + fish 0 0 8 + dog 4 0 2 + fish 0 0 8 + dog 4 0 2 + + Using a DataFrame column as weights. Rows with larger value in the + `num_specimen_seen` column are more likely to be sampled. + + >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) + num_legs num_wings num_specimen_seen + falcon 2 2 10 + fish 0 0 8 + """ # noqa:E501 + if axis is None: + axis = self._stat_axis_number + + axis = self._get_axis_number(axis) + obj_len = self.shape[axis] + + # Process random_state argument + rs = com.random_state(random_state) + + size = sample.process_sampling_size(n, frac, replace) + if size is None: + assert frac is not None + size = round(frac * obj_len) + + if weights is not None: + weights = sample.preprocess_weights(self, weights, axis) + + sampled_indices = sample.sample(obj_len, size, replace, weights, rs) + result = self.take(sampled_indices, axis=axis) + + if ignore_index: + result.index = default_index(len(result)) + + return result + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + r""" + Apply chainable functions that expect Series or DataFrames. + + Parameters + ---------- + func : function + Function to apply to the {klass}. + ``args``, and ``kwargs`` are passed into ``func``. + Alternatively a ``(callable, data_keyword)`` tuple where + ``data_keyword`` is a string indicating the keyword of + ``callable`` that expects the {klass}. + args : iterable, optional + Positional arguments passed into ``func``. + kwargs : mapping, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + + See Also + -------- + DataFrame.apply : Apply a function along input axis of DataFrame. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + + Notes + ----- + Use ``.pipe`` when chaining together functions that expect + Series, DataFrames or GroupBy objects. Instead of writing + + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP + + You can write + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe(func, arg2=b, arg3=c) + ... ) # doctest: +SKIP + + If you have a function that takes the data as (say) the second + argument, pass a tuple indicating which keyword expects the + data. For example, suppose ``f`` takes its data as ``arg2``: + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe((func, 'arg2'), arg1=a, arg3=c) + ... ) # doctest: +SKIP + """ + return com.pipe(self, func, *args, **kwargs) + + # ---------------------------------------------------------------------- + # Attribute access + + @final + def __finalize__( + self: NDFrameT, other, method: str | None = None, **kwargs + ) -> NDFrameT: + """ + Propagate metadata from other to self. + + Parameters + ---------- + other : the object from which to get the attributes that we are going + to propagate + method : str, optional + A passed method name providing context on where ``__finalize__`` + was called. + + .. warning:: + + The value passed as `method` are not currently considered + stable across pandas releases. + """ + if isinstance(other, NDFrame): + for name in other.attrs: + self.attrs[name] = other.attrs[name] + + self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels + # For subclasses using _metadata. + for name in set(self._metadata) & set(other._metadata): + assert isinstance(name, str) + object.__setattr__(self, name, getattr(other, name, None)) + + if method == "concat": + attrs = other.objs[0].attrs + check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) + if check_attrs: + for name in attrs: + self.attrs[name] = attrs[name] + + allows_duplicate_labels = all( + x.flags.allows_duplicate_labels for x in other.objs + ) + self.flags.allows_duplicate_labels = allows_duplicate_labels + + return self + + def __getattr__(self, name: str): + """ + After regular attribute access, try looking up the name + This allows simpler access to columns for interactive use. + """ + # Note: obj.x will always call obj.__getattribute__('x') prior to + # calling obj.__getattr__('x'). + if ( + name not in self._internal_names_set + and name not in self._metadata + and name not in self._accessors + and self._info_axis._can_hold_identifiers_and_holds_name(name) + ): + return self[name] + return object.__getattribute__(self, name) + + def __setattr__(self, name: str, value) -> None: + """ + After regular attribute access, try setting the name + This allows simpler access to columns for interactive use. + """ + # first try regular attribute access via __getattribute__, so that + # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify + # the same attribute. + + try: + object.__getattribute__(self, name) + return object.__setattr__(self, name, value) + except AttributeError: + pass + + # if this fails, go on to more involved attribute setting + # (note that this matches __getattr__, above). + if name in self._internal_names_set: + object.__setattr__(self, name, value) + elif name in self._metadata: + object.__setattr__(self, name, value) + else: + try: + existing = getattr(self, name) + if isinstance(existing, Index): + object.__setattr__(self, name, value) + elif name in self._info_axis: + self[name] = value + else: + object.__setattr__(self, name, value) + except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn( + "Pandas doesn't allow columns to be " + "created via a new attribute name - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=find_stack_level(), + ) + object.__setattr__(self, name, value) + + @final + def _dir_additions(self) -> set[str]: + """ + add the string-like attributes from the info_axis. + If info_axis is a MultiIndex, its first level values are used. + """ + additions = super()._dir_additions() + if self._info_axis._can_hold_strings: + additions.update(self._info_axis._dir_additions_for_owner) + return additions + + # ---------------------------------------------------------------------- + # Consolidation of internals + + @final + def _protect_consolidate(self, f): + """ + Consolidate _mgr -- if the blocks have changed, then clear the + cache + """ + if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): + return f() + blocks_before = len(self._mgr.blocks) + result = f() + if len(self._mgr.blocks) != blocks_before: + self._clear_item_cache() + return result + + @final + def _consolidate_inplace(self) -> None: + """Consolidate data in place and return None""" + + def f(): + self._mgr = self._mgr.consolidate() + + self._protect_consolidate(f) + + @final + def _consolidate(self): + """ + Compute NDFrame with "consolidated" internals (data of each dtype + grouped together in a single ndarray). + + Returns + ------- + consolidated : same type as caller + """ + f = lambda: self._mgr.consolidate() + cons_data = self._protect_consolidate(f) + return self._constructor(cons_data).__finalize__(self) + + @final + @property + def _is_mixed_type(self) -> bool_t: + if self._mgr.is_single_block: + return False + + if self._mgr.any_extension_types: + # Even if they have the same dtype, we can't consolidate them, + # so we pretend this is "mixed'" + return True + + return self.dtypes.nunique() > 1 + + @final + def _check_inplace_setting(self, value) -> bool_t: + """check whether we allow in-place setting with this type of value""" + if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: + + # allow an actual np.nan thru + if is_float(value) and np.isnan(value): + return True + + raise TypeError( + "Cannot do inplace boolean setting on " + "mixed-types with a non np.nan value" + ) + + return True + + @final + def _get_numeric_data(self: NDFrameT) -> NDFrameT: + return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) + + @final + def _get_bool_data(self): + return self._constructor(self._mgr.get_bool_data()).__finalize__(self) + + # ---------------------------------------------------------------------- + # Internal Interface Methods + + @property + def values(self): + raise AbstractMethodError(self) + + @property + def _values(self) -> np.ndarray: + """internal implementation""" + raise AbstractMethodError(self) + + @property + def dtypes(self): + """ + Return the dtypes in the DataFrame. + + This returns a Series with the data type of each column. + The result's index is the original DataFrame's columns. Columns + with mixed types are stored with the ``object`` dtype. See + :ref:`the User Guide ` for more. + + Returns + ------- + pandas.Series + The data type of each column. + + Examples + -------- + >>> df = pd.DataFrame({'float': [1.0], + ... 'int': [1], + ... 'datetime': [pd.Timestamp('20180310')], + ... 'string': ['foo']}) + >>> df.dtypes + float float64 + int int64 + datetime datetime64[ns] + string object + dtype: object + """ + data = self._mgr.get_dtypes() + return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_) + + def astype( + self: NDFrameT, dtype, copy: bool_t = True, errors: IgnoreRaise = "raise" + ) -> NDFrameT: + """ + Cast a pandas object to a specified dtype ``dtype``. + + Parameters + ---------- + dtype : data type, or dict of column name -> data type + Use a numpy.dtype or Python type to cast entire pandas object to + the same type. Alternatively, use {col: dtype, ...}, where col is a + column label and dtype is a numpy.dtype or Python type to cast one + or more of the DataFrame's columns to column-specific types. + copy : bool, default True + Return a copy when ``copy=True`` (be very careful setting + ``copy=False`` as changes to values then may propagate to other + pandas objects). + errors : {'raise', 'ignore'}, default 'raise' + Control raising of exceptions on invalid data for provided dtype. + + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object. + + Returns + ------- + casted : same type as caller + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + numpy.ndarray.astype : Cast a numpy array to a specified type. + + Notes + ----- + .. deprecated:: 1.3.0 + + Using ``astype`` to convert from timezone-naive dtype to + timezone-aware dtype is deprecated and will raise in a + future version. Use :meth:`Series.dt.tz_localize` instead. + + Examples + -------- + Create a DataFrame: + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + Cast all columns to int32: + + >>> df.astype('int32').dtypes + col1 int32 + col2 int32 + dtype: object + + Cast col1 to int32 using a dictionary: + + >>> df.astype({'col1': 'int32'}).dtypes + col1 int32 + col2 int64 + dtype: object + + Create a series: + + >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser + 0 1 + 1 2 + dtype: int32 + >>> ser.astype('int64') + 0 1 + 1 2 + dtype: int64 + + Convert to categorical type: + + >>> ser.astype('category') + 0 1 + 1 2 + dtype: category + Categories (2, int64): [1, 2] + + Convert to ordered categorical type with custom ordering: + + >>> from pandas.api.types import CategoricalDtype + >>> cat_dtype = CategoricalDtype( + ... categories=[2, 1], ordered=True) + >>> ser.astype(cat_dtype) + 0 1 + 1 2 + dtype: category + Categories (2, int64): [2 < 1] + + Note that using ``copy=False`` and changing data on a new + pandas object may propagate changes: + + >>> s1 = pd.Series([1, 2]) + >>> s2 = s1.astype('int64', copy=False) + >>> s2[0] = 10 + >>> s1 # note that s1[0] has changed too + 0 10 + 1 2 + dtype: int64 + + Create a series of dates: + + >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) + >>> ser_date + 0 2020-01-01 + 1 2020-01-02 + 2 2020-01-03 + dtype: datetime64[ns] + """ + if is_dict_like(dtype): + if self.ndim == 1: # i.e. Series + if len(dtype) > 1 or self.name not in dtype: + raise KeyError( + "Only the Series name can be used for " + "the key in Series dtype mappings." + ) + new_type = dtype[self.name] + return self.astype(new_type, copy, errors) + + # GH#44417 cast to Series so we can use .iat below, which will be + # robust in case we + from pandas import Series + + dtype_ser = Series(dtype, dtype=object) + + for col_name in dtype_ser.index: + if col_name not in self: + raise KeyError( + "Only a column name can be used for the " + "key in a dtype mappings argument. " + f"'{col_name}' not found in columns." + ) + + dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False) + + results = [] + for i, (col_name, col) in enumerate(self.items()): + cdt = dtype_ser.iat[i] + if isna(cdt): + res_col = col.copy() if copy else col + else: + res_col = col.astype(dtype=cdt, copy=copy, errors=errors) + results.append(res_col) + + elif is_extension_array_dtype(dtype) and self.ndim > 1: + # GH 18099/22869: columnwise conversion to extension dtype + # GH 24704: use iloc to handle duplicate column names + # TODO(EA2D): special case not needed with 2D EAs + results = [ + self.iloc[:, i].astype(dtype, copy=copy) + for i in range(len(self.columns)) + ] + + else: + # else, only a single dtype is given + new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) + return self._constructor(new_data).__finalize__(self, method="astype") + + # GH 33113: handle empty frame or series + if not results: + return self.copy() + + # GH 19920: retain column metadata after concat + result = concat(results, axis=1, copy=False) + # GH#40810 retain subclass + # error: Incompatible types in assignment + # (expression has type "NDFrameT", variable has type "DataFrame") + result = self._constructor(result) # type: ignore[assignment] + result.columns = self.columns + result = result.__finalize__(self, method="astype") + # https://github.com/python/mypy/issues/8354 + return cast(NDFrameT, result) + + @final + def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT: + """ + Make a copy of this object's indices and data. + + When ``deep=True`` (default), a new object will be created with a + copy of the calling object's data and indices. Modifications to + the data or indices of the copy will not be reflected in the + original object (see notes below). + + When ``deep=False``, a new object will be created without copying + the calling object's data or index (only references to the data + and index are copied). Any changes to the data of the original + will be reflected in the shallow copy (and vice versa). + + Parameters + ---------- + deep : bool, default True + Make a deep copy, including a copy of the data and the indices. + With ``deep=False`` neither the indices nor the data are copied. + + Returns + ------- + copy : Series or DataFrame + Object type matches caller. + + Notes + ----- + When ``deep=True``, data is copied but actual Python objects + will not be copied recursively, only the reference to the object. + This is in contrast to `copy.deepcopy` in the Standard Library, + which recursively copies object data (see examples below). + + While ``Index`` objects are copied when ``deep=True``, the underlying + numpy array is not copied for performance reasons. Since ``Index`` is + immutable, the underlying data can be safely shared and a copy + is not needed. + + Since pandas is not thread safe, see the + :ref:`gotchas ` when copying in a threading + environment. + + Examples + -------- + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> s + a 1 + b 2 + dtype: int64 + + >>> s_copy = s.copy() + >>> s_copy + a 1 + b 2 + dtype: int64 + + **Shallow copy versus default (deep) copy:** + + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> deep = s.copy() + >>> shallow = s.copy(deep=False) + + Shallow copy shares data and index with original. + + >>> s is shallow + False + >>> s.values is shallow.values and s.index is shallow.index + True + + Deep copy has own copy of data and index. + + >>> s is deep + False + >>> s.values is deep.values or s.index is deep.index + False + + Updates to the data shared by shallow copy and original is reflected + in both; deep copy remains unchanged. + + >>> s[0] = 3 + >>> shallow[1] = 4 + >>> s + a 3 + b 4 + dtype: int64 + >>> shallow + a 3 + b 4 + dtype: int64 + >>> deep + a 1 + b 2 + dtype: int64 + + Note that when copying an object containing Python objects, a deep copy + will copy the data, but will not do so recursively. Updating a nested + data object will be reflected in the deep copy. + + >>> s = pd.Series([[1, 2], [3, 4]]) + >>> deep = s.copy() + >>> s[0][0] = 10 + >>> s + 0 [10, 2] + 1 [3, 4] + dtype: object + >>> deep + 0 [10, 2] + 1 [3, 4] + dtype: object + """ + data = self._mgr.copy(deep=deep) + self._clear_item_cache() + return self._constructor(data).__finalize__(self, method="copy") + + @final + def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT: + return self.copy(deep=deep) + + @final + def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + @final + def _convert( + self: NDFrameT, + datetime: bool_t = False, + numeric: bool_t = False, + timedelta: bool_t = False, + ) -> NDFrameT: + """ + Attempt to infer better dtype for object columns. + + Parameters + ---------- + datetime : bool, default False + If True, convert to date where possible. + numeric : bool, default False + If True, attempt to convert to numbers (including strings), with + unconvertible values becoming NaN. + timedelta : bool, default False + If True, convert to timedelta where possible. + + Returns + ------- + converted : same as input object + """ + validate_bool_kwarg(datetime, "datetime") + validate_bool_kwarg(numeric, "numeric") + validate_bool_kwarg(timedelta, "timedelta") + return self._constructor( + self._mgr.convert( + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=True, + ) + ).__finalize__(self) + + @final + def infer_objects(self: NDFrameT) -> NDFrameT: + """ + Attempt to infer better dtypes for object columns. + + Attempts soft conversion of object-dtyped + columns, leaving non-object and unconvertible + columns unchanged. The inference rules are the + same as during normal Series/DataFrame construction. + + Returns + ------- + converted : same type as input object + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to numeric type. + convert_dtypes : Convert argument to best possible dtype. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) + >>> df = df.iloc[1:] + >>> df + A + 1 1 + 2 2 + 3 3 + + >>> df.dtypes + A object + dtype: object + + >>> df.infer_objects().dtypes + A int64 + dtype: object + """ + # numeric=False necessary to only soft convert; + # python objects will still be converted to + # native numpy numeric types + return self._constructor( + self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True) + ).__finalize__(self, method="infer_objects") + + @final + def convert_dtypes( + self: NDFrameT, + infer_objects: bool_t = True, + convert_string: bool_t = True, + convert_integer: bool_t = True, + convert_boolean: bool_t = True, + convert_floating: bool_t = True, + ) -> NDFrameT: + """ + Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + infer_objects : bool, default True + Whether object dtypes should be converted to the best possible types. + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. + + .. versionadded:: 1.2.0 + + Returns + ------- + Series or DataFrame + Copy of input object with new dtype. + + See Also + -------- + infer_objects : Infer dtypes of objects. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + + Notes + ----- + By default, ``convert_dtypes`` will attempt to convert a Series (or each + Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options + ``convert_string``, ``convert_integer``, ``convert_boolean`` and + ``convert_boolean``, it is possible to turn off individual conversions + to ``StringDtype``, the integer extension types, ``BooleanDtype`` + or floating extension types, respectively. + + For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference + rules as during normal Series/DataFrame construction. Then, if possible, + convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer + or floating extension type, otherwise leave as ``object``. + + If the dtype is integer, convert to an appropriate integer extension type. + + If the dtype is numeric, and consists of all integers, convert to an + appropriate integer extension type. Otherwise, convert to an + appropriate floating extension type. + + .. versionchanged:: 1.2 + Starting with pandas 1.2, this method also converts float columns + to the nullable floating extension type. + + In the future, as new dtypes are added that support ``pd.NA``, the results + of this method will change to support those new dtypes. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), + ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), + ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), + ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), + ... } + ... ) + + Start with a DataFrame with default dtypes. + + >>> df + a b c d e f + 0 1 x True h 10.0 NaN + 1 2 y False i NaN 100.5 + 2 3 z NaN NaN 20.0 200.0 + + >>> df.dtypes + a int32 + b object + c object + d object + e float64 + f float64 + dtype: object + + Convert the DataFrame to use best possible dtypes. + + >>> dfn = df.convert_dtypes() + >>> dfn + a b c d e f + 0 1 x True h 10 + 1 2 y False i 100.5 + 2 3 z 20 200.0 + + >>> dfn.dtypes + a Int32 + b string + c boolean + d string + e Int64 + f Float64 + dtype: object + + Start with a Series of strings and missing data represented by ``np.nan``. + + >>> s = pd.Series(["a", "b", np.nan]) + >>> s + 0 a + 1 b + 2 NaN + dtype: object + + Obtain a Series with dtype ``StringDtype``. + + >>> s.convert_dtypes() + 0 a + 1 b + 2 + dtype: string + """ + if self.ndim == 1: + return self._convert_dtypes( + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + else: + results = [ + col._convert_dtypes( + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + for col_name, col in self.items() + ] + if len(results) > 0: + result = concat(results, axis=1, copy=False, keys=self.columns) + cons = cast(Type["DataFrame"], self._constructor) + result = cons(result) + result = result.__finalize__(self, method="convert_dtypes") + # https://github.com/python/mypy/issues/8354 + return cast(NDFrameT, result) + else: + return self.copy() + + # ---------------------------------------------------------------------- + # Filling NA's + + @overload + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> NDFrameT: + ... + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[True], + limit: int | None = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool_t = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> NDFrameT | None: + ... + + @doc(**_shared_doc_kwargs) + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool_t = False, + limit: int | None = None, + downcast: dict | None = None, + ) -> NDFrameT | None: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series, or DataFrame + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot + be a list. + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use next valid observation to fill gap. + axis : {axes_single_arg} + Axis along which to fill missing values. For `Series` + this parameter is unused and defaults to 0. + inplace : bool, default False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a + DataFrame). + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + downcast : dict, default is None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + + See Also + -------- + interpolate : Fill NaN values using interpolation. + reindex : Conform object to new index. + asfreq : Convert TimeSeries to specified frequency. + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, np.nan], + ... [np.nan, 3, np.nan, 4]], + ... columns=list("ABCD")) + >>> df + A B C D + 0 NaN 2.0 NaN 0.0 + 1 3.0 4.0 NaN 1.0 + 2 NaN NaN NaN NaN + 3 NaN 3.0 NaN 4.0 + + Replace all NaN elements with 0s. + + >>> df.fillna(0) + A B C D + 0 0.0 2.0 0.0 0.0 + 1 3.0 4.0 0.0 1.0 + 2 0.0 0.0 0.0 0.0 + 3 0.0 3.0 0.0 4.0 + + We can also propagate non-null values forward or backward. + + >>> df.fillna(method="ffill") + A B C D + 0 NaN 2.0 NaN 0.0 + 1 3.0 4.0 NaN 1.0 + 2 3.0 4.0 NaN 1.0 + 3 3.0 3.0 NaN 4.0 + + Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, + 2, and 3 respectively. + + >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}} + >>> df.fillna(value=values) + A B C D + 0 0.0 2.0 2.0 0.0 + 1 3.0 4.0 2.0 1.0 + 2 0.0 1.0 2.0 3.0 + 3 0.0 3.0 2.0 4.0 + + Only replace the first NaN element. + + >>> df.fillna(value=values, limit=1) + A B C D + 0 0.0 2.0 2.0 0.0 + 1 3.0 4.0 NaN 1.0 + 2 NaN 1.0 NaN 3.0 + 3 NaN 3.0 NaN 4.0 + + When filling using a DataFrame, replacement happens along + the same column names and same indices + + >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE")) + >>> df.fillna(df2) + A B C D + 0 0.0 2.0 0.0 0.0 + 1 3.0 4.0 0.0 1.0 + 2 0.0 0.0 0.0 NaN + 3 0.0 3.0 0.0 4.0 + + Note that column D is not affected since it is not present in df2. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + value, method = validate_fillna_kwargs(value, method) + + self._consolidate_inplace() + + # set the default here, so functions examining the signaure + # can detect if something was set (e.g. in groupby) (GH9221) + if axis is None: + axis = 0 + axis = self._get_axis_number(axis) + + if value is None: + if not self._mgr.is_single_block and axis == 1: + if inplace: + raise NotImplementedError() + result = self.T.fillna(method=method, limit=limit).T + + return result + + new_data = self._mgr.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + downcast=downcast, + ) + else: + if self.ndim == 1: + if isinstance(value, (dict, ABCSeries)): + if not len(value): + # test_fillna_nonscalar + if inplace: + return None + return self.copy() + value = create_series_with_explicit_dtype( + value, dtype_if_empty=object + ) + value = value.reindex(self.index, copy=False) + value = value._values + elif not is_list_like(value): + pass + else: + raise TypeError( + '"value" parameter must be a scalar, dict ' + "or Series, but you passed a " + f'"{type(value).__name__}"' + ) + + new_data = self._mgr.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + elif isinstance(value, (dict, ABCSeries)): + if axis == 1: + raise NotImplementedError( + "Currently only can fill " + "with dict/Series column " + "by column" + ) + + result = self if inplace else self.copy() + is_dict = isinstance(downcast, dict) + for k, v in value.items(): + if k not in result: + continue + + # error: Item "None" of "Optional[Dict[Any, Any]]" has no + # attribute "get" + downcast_k = ( + downcast + if not is_dict + else downcast.get(k) # type: ignore[union-attr] + ) + + res_k = result[k].fillna(v, limit=limit, downcast=downcast_k) + + if not inplace: + result[k] = res_k + else: + # We can write into our existing column(s) iff dtype + # was preserved. + if isinstance(res_k, ABCSeries): + # i.e. 'k' only shows up once in self.columns + if res_k.dtype == result[k].dtype: + result.loc[:, k] = res_k + else: + # Different dtype -> no way to do inplace. + result[k] = res_k + else: + # see test_fillna_dict_inplace_nonunique_columns + locs = result.columns.get_loc(k) + if isinstance(locs, slice): + locs = np.arange(self.shape[1])[locs] + elif ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "b" + ): + locs = locs.nonzero()[0] + elif not ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "i" + ): + # Should never be reached, but let's cover our bases + raise NotImplementedError( + "Unexpected get_loc result, please report a bug at " + "https://github.com/pandas-dev/pandas" + ) + + for i, loc in enumerate(locs): + res_loc = res_k.iloc[:, i] + target = self.iloc[:, loc] + + if res_loc.dtype == target.dtype: + result.iloc[:, loc] = res_loc + else: + result.isetitem(loc, res_loc) + + return result if not inplace else None + + elif not is_list_like(value): + if axis == 1: + + result = self.T.fillna(value=value, limit=limit).T + + # error: Incompatible types in assignment (expression has type + # "NDFrameT", variable has type "Union[ArrayManager, + # SingleArrayManager, BlockManager, SingleBlockManager]") + new_data = result # type: ignore[assignment] + else: + + new_data = self._mgr.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + elif isinstance(value, ABCDataFrame) and self.ndim == 2: + + new_data = self.where(self.notna(), value)._mgr + else: + raise ValueError(f"invalid fill value with a {type(value)}") + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="fillna") + + @overload + def ffill( + self: NDFrameT, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> NDFrameT: + ... + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def ffill( + self: NDFrameT, + *, + axis: None | Axis = ..., + inplace: bool_t = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @doc(klass=_shared_doc_kwargs["klass"]) + def ffill( + self: NDFrameT, + axis: None | Axis = None, + inplace: bool_t = False, + limit: None | int = None, + downcast: dict | None = None, + ) -> NDFrameT | None: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + pad = ffill + + @overload + def bfill( + self: NDFrameT, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> NDFrameT: + ... + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def bfill( + self: NDFrameT, + *, + axis: None | Axis = ..., + inplace: bool_t = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @doc(klass=_shared_doc_kwargs["klass"]) + def bfill( + self: NDFrameT, + axis: None | Axis = None, + inplace: bool_t = False, + limit: None | int = None, + downcast: dict | None = None, + ) -> NDFrameT | None: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. + + Returns + ------- + {klass} or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + backfill = bfill + + @overload + def replace( + self: NDFrameT, + to_replace=..., + value=..., + *, + inplace: Literal[False] = ..., + limit: int | None = ..., + regex: bool_t = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> NDFrameT: + ... + + @overload + def replace( + self, + to_replace=..., + value=..., + *, + inplace: Literal[True], + limit: int | None = ..., + regex: bool_t = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def replace( + self: NDFrameT, + to_replace=..., + value=..., + *, + inplace: bool_t = ..., + limit: int | None = ..., + regex: bool_t = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> NDFrameT | None: + ... + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "to_replace", "value"] + ) + @doc( + _shared_docs["replace"], + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) + def replace( + self: NDFrameT, + to_replace=None, + value=lib.no_default, + inplace: bool_t = False, + limit: int | None = None, + regex: bool_t = False, + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, + ) -> NDFrameT | None: + if not ( + is_scalar(to_replace) + or is_re_compilable(to_replace) + or is_list_like(to_replace) + ): + raise TypeError( + "Expecting 'to_replace' to be either a scalar, array-like, " + "dict or None, got invalid type " + f"{repr(type(to_replace).__name__)}" + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_bool(regex) and to_replace is not None: + raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") + + self._consolidate_inplace() + + if value is lib.no_default or method is not lib.no_default: + # GH#36984 if the user explicitly passes value=None we want to + # respect that. We have the corner case where the user explicitly + # passes value=None *and* a method, which we interpret as meaning + # they want the (documented) default behavior. + if method is lib.no_default: + # TODO: get this to show up as the default in the docs? + method = "pad" + + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + if isinstance(self, ABCDataFrame): + from pandas import Series + + result = self.apply( + Series._replace_single, + args=(to_replace, method, inplace, limit), + ) + if inplace: + return None + return result + return self._replace_single(to_replace, method, inplace, limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError( + 'If "to_replace" and "value" are both None ' + 'and "to_replace" is not a list, then ' + "regex must be a mapping" + ) + to_replace = regex + regex = True + + items = list(to_replace.items()) + if items: + keys, values = zip(*items) + else: + keys, values = ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError( + "If a nested mapping is passed, all values " + "of the top level mapping must be mappings" + ) + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = list(zip(*v.items())) or ([], []) + + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace( + to_replace, value, inplace=inplace, limit=limit, regex=regex + ) + else: + + # need a non-zero len on all axes + if not self.size: + if inplace: + return None + return self.copy() + + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + # Note: Checking below for `in foo.keys()` instead of + # `in foo` is needed for when we have a Series and not dict + mapping = { + col: (to_replace[col], value[col]) + for col in to_replace.keys() + if col in value.keys() and col in self + } + return self._replace_columnwise(mapping, inplace, regex) + + # {'A': NA} -> 0 + elif not is_list_like(value): + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-like to_replace " + "and non-None value" + ) + mapping = { + col: (to_rep, value) for col, to_rep in to_replace.items() + } + return self._replace_columnwise(mapping, inplace, regex) + else: + raise TypeError("value argument must be scalar, dict, or Series") + + elif is_list_like(to_replace): + if not is_list_like(value): + # e.g. to_replace = [NA, ''] and value is 0, + # so we replace NA with 0 and then replace '' with 0 + value = [value] * len(to_replace) + + # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] + if len(to_replace) != len(value): + raise ValueError( + f"Replacement lists must match in length. " + f"Expecting {len(to_replace)} got {len(value)} " + ) + new_data = self._mgr.replace_list( + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, + ) + + elif to_replace is None: + if not ( + is_re_compilable(regex) + or is_list_like(regex) + or is_dict_like(regex) + ): + raise TypeError( + f"'regex' must be a string or a compiled regular expression " + f"or a list or dict of strings or regular expressions, " + f"you passed a {repr(type(regex).__name__)}" + ) + return self.replace( + regex, value, inplace=inplace, limit=limit, regex=True + ) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-value and " + "non-None to_replace" + ) + mapping = {col: (to_replace, val) for col, val in value.items()} + return self._replace_columnwise(mapping, inplace, regex) + + elif not is_list_like(value): # NA -> 0 + regex = should_use_regex(regex, to_replace) + if regex: + new_data = self._mgr.replace_regex( + to_replace=to_replace, + value=value, + inplace=inplace, + ) + else: + new_data = self._mgr.replace( + to_replace=to_replace, value=value, inplace=inplace + ) + else: + raise TypeError( + f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' + ) + + result = self._constructor(new_data) + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="replace") + + def interpolate( + self: NDFrameT, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool_t = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> NDFrameT | None: + """ + Fill NaN values using an interpolation method. + + Please note that only ``method='linear'`` is supported for + DataFrame/Series with a MultiIndex. + + Parameters + ---------- + method : str, default 'linear' + Interpolation technique to use. One of: + + * 'linear': Ignore the index and treat the values as equally + spaced. This is the only method supported on MultiIndexes. + * 'time': Works on daily and higher resolution data to interpolate + given length of interval. + * 'index', 'values': use the actual numerical values of the index. + * 'pad': Fill in NaNs using existing values. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', + 'barycentric', 'polynomial': Passed to + `scipy.interpolate.interp1d`. These methods use the numerical + values of the index. Both 'polynomial' and 'spline' require that + you also specify an `order` (int), e.g. + ``df.interpolate(method='polynomial', order=5)``. + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', + 'cubicspline': Wrappers around the SciPy interpolation methods of + similar names. See `Notes`. + * 'from_derivatives': Refers to + `scipy.interpolate.BPoly.from_derivatives` which + replaces 'piecewise_polynomial' interpolation method in + scipy 0.18. + + axis : {{0 or 'index', 1 or 'columns', None}}, default None + Axis to interpolate along. For `Series` this parameter is unused + and defaults to 0. + limit : int, optional + Maximum number of consecutive NaNs to fill. Must be greater than + 0. + inplace : bool, default False + Update the data in place if possible. + limit_direction : {{'forward', 'backward', 'both'}}, Optional + Consecutive NaNs will be filled in this direction. + + If limit is specified: + * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. + * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be + 'backwards'. + + If 'limit' is not specified: + * If 'method' is 'backfill' or 'bfill', the default is 'backward' + * else the default is 'forward' + + .. versionchanged:: 1.1.0 + raises ValueError if `limit_direction` is 'forward' or 'both' and + method is 'backfill' or 'bfill'. + raises ValueError if `limit_direction` is 'backward' or 'both' and + method is 'pad' or 'ffill'. + + limit_area : {{`None`, 'inside', 'outside'}}, default None + If limit is specified, consecutive NaNs will be filled with this + restriction. + + * ``None``: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + + downcast : optional, 'infer' or None, defaults to None + Downcast dtypes if possible. + ``**kwargs`` : optional + Keyword arguments to pass on to the interpolating function. + + Returns + ------- + Series or DataFrame or None + Returns the same object type as the caller, interpolated at + some or all ``NaN`` values or None if ``inplace=True``. + + See Also + -------- + fillna : Fill missing values using different methods. + scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials + (Akima interpolator). + scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the + Bernstein basis. + scipy.interpolate.interp1d : Interpolate a 1-D function. + scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh + interpolator). + scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic + interpolation. + scipy.interpolate.CubicSpline : Cubic spline data interpolator. + + Notes + ----- + The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of + similar names. These use the actual numerical values of the index. + For more information on their behavior, see the + `SciPy documentation + `__. + + Examples + -------- + Filling in ``NaN`` in a :class:`~pandas.Series` via linear + interpolation. + + >>> s = pd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 NaN + 3 3.0 + dtype: float64 + >>> s.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + Filling in ``NaN`` in a Series by padding, but filling at most two + consecutive ``NaN`` at a time. + + >>> s = pd.Series([np.nan, "single_one", np.nan, + ... "fill_two_more", np.nan, np.nan, np.nan, + ... 4.71, np.nan]) + >>> s + 0 NaN + 1 single_one + 2 NaN + 3 fill_two_more + 4 NaN + 5 NaN + 6 NaN + 7 4.71 + 8 NaN + dtype: object + >>> s.interpolate(method='pad', limit=2) + 0 NaN + 1 single_one + 2 single_one + 3 fill_two_more + 4 fill_two_more + 5 fill_two_more + 6 NaN + 7 4.71 + 8 4.71 + dtype: object + + Filling in ``NaN`` in a Series via polynomial interpolation or splines: + Both 'polynomial' and 'spline' methods require that you also specify + an ``order`` (int). + + >>> s = pd.Series([0, 2, np.nan, 8]) + >>> s.interpolate(method='polynomial', order=2) + 0 0.000000 + 1 2.000000 + 2 4.666667 + 3 8.000000 + dtype: float64 + + Fill the DataFrame forward (that is, going down) along each column + using linear interpolation. + + Note how the last entry in column 'a' is interpolated differently, + because there is no entry after it to use for interpolation. + Note how the first entry in column 'b' remains ``NaN``, because there + is no entry before it to use for interpolation. + + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + ... (np.nan, 2.0, np.nan, np.nan), + ... (2.0, 3.0, np.nan, 9.0), + ... (np.nan, 4.0, -4.0, 16.0)], + ... columns=list('abcd')) + >>> df + a b c d + 0 0.0 NaN -1.0 1.0 + 1 NaN 2.0 NaN NaN + 2 2.0 3.0 NaN 9.0 + 3 NaN 4.0 -4.0 16.0 + >>> df.interpolate(method='linear', limit_direction='forward', axis=0) + a b c d + 0 0.0 NaN -1.0 1.0 + 1 1.0 2.0 -2.0 5.0 + 2 2.0 3.0 -3.0 9.0 + 3 2.0 4.0 -4.0 16.0 + + Using polynomial interpolation. + + >>> df['d'].interpolate(method='polynomial', order=2) + 0 1.0 + 1 4.0 + 2 9.0 + 3 16.0 + Name: d, dtype: float64 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = self._get_axis_number(axis) + + fillna_methods = ["ffill", "bfill", "pad", "backfill"] + should_transpose = axis == 1 and method not in fillna_methods + + obj = self.T if should_transpose else self + + if obj.empty: + return self.copy() + + if method not in fillna_methods: + axis = self._info_axis_number + + if isinstance(obj.index, MultiIndex) and method != "linear": + raise ValueError( + "Only `method=linear` interpolation is supported on MultiIndexes." + ) + + # Set `limit_direction` depending on `method` + if limit_direction is None: + limit_direction = ( + "backward" if method in ("backfill", "bfill") else "forward" + ) + else: + if method in ("pad", "ffill") and limit_direction != "forward": + raise ValueError( + f"`limit_direction` must be 'forward' for method `{method}`" + ) + if method in ("backfill", "bfill") and limit_direction != "backward": + raise ValueError( + f"`limit_direction` must be 'backward' for method `{method}`" + ) + + if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")): + raise TypeError( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + + # create/use the index + if method == "linear": + # prior default + index = Index(np.arange(len(obj.index))) + else: + index = obj.index + methods = {"index", "values", "nearest", "time"} + is_numeric_or_datetime = ( + is_numeric_dtype(index.dtype) + or is_datetime64_any_dtype(index.dtype) + or is_timedelta64_dtype(index.dtype) + ) + if method not in methods and not is_numeric_or_datetime: + raise ValueError( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + + if isna(index).any(): + raise NotImplementedError( + "Interpolation with NaNs in the index " + "has not been implemented. Try filling " + "those NaNs before interpolating." + ) + new_data = obj._mgr.interpolate( + method=method, + axis=axis, + index=index, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + inplace=inplace, + downcast=downcast, + **kwargs, + ) + + result = self._constructor(new_data) + if should_transpose: + result = result.T + if inplace: + return self._update_inplace(result) + else: + return result.__finalize__(self, method="interpolate") + + # ---------------------------------------------------------------------- + # Timeseries methods Methods + + @final + def asof(self, where, subset=None): + """ + Return the last row(s) without any NaNs before `where`. + + The last row (for each element in `where`, if list) without any + NaN is taken. + In case of a :class:`~pandas.DataFrame`, the last row without NaN + considering only the subset of columns (if not `None`) + + If there is no good value, NaN is returned for a Series or + a Series of NaN values for a DataFrame + + Parameters + ---------- + where : date or array-like of dates + Date(s) before which the last row(s) are returned. + subset : str or array-like of str, default `None` + For DataFrame, if not `None`, only use these columns to + check for NaNs. + + Returns + ------- + scalar, Series, or DataFrame + + The return can be: + + * scalar : when `self` is a Series and `where` is a scalar + * Series: when `self` is a Series and `where` is an array-like, + or when `self` is a DataFrame and `where` is a scalar + * DataFrame : when `self` is a DataFrame and `where` is an + array-like + + Return scalar, Series, or DataFrame. + + See Also + -------- + merge_asof : Perform an asof merge. Similar to left join. + + Notes + ----- + Dates are assumed to be sorted. Raises if this is not the case. + + Examples + -------- + A Series and a scalar `where`. + + >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) + >>> s + 10 1.0 + 20 2.0 + 30 NaN + 40 4.0 + dtype: float64 + + >>> s.asof(20) + 2.0 + + For a sequence `where`, a Series is returned. The first value is + NaN, because the first element of `where` is before the first + index value. + + >>> s.asof([5, 20]) + 5 NaN + 20 2.0 + dtype: float64 + + Missing values are not considered. The following is ``2.0``, not + NaN, even though NaN is at the index location for ``30``. + + >>> s.asof(30) + 2.0 + + Take all columns into consideration + + >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + ... 'b': [None, None, None, None, 500]}, + ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', + ... '2018-02-27 09:02:00', + ... '2018-02-27 09:03:00', + ... '2018-02-27 09:04:00', + ... '2018-02-27 09:05:00'])) + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30'])) + a b + 2018-02-27 09:03:30 NaN NaN + 2018-02-27 09:04:30 NaN NaN + + Take a single column into consideration + + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30']), + ... subset=['a']) + a b + 2018-02-27 09:03:30 30 NaN + 2018-02-27 09:04:30 40 NaN + """ + if isinstance(where, str): + where = Timestamp(where) + + if not self.index.is_monotonic_increasing: + raise ValueError("asof requires a sorted index") + + is_series = isinstance(self, ABCSeries) + if is_series: + if subset is not None: + raise ValueError("subset is not valid for Series") + else: + if subset is None: + subset = self.columns + if not is_list_like(subset): + subset = [subset] + + is_list = is_list_like(where) + if not is_list: + start = self.index[0] + if isinstance(self.index, PeriodIndex): + where = Period(where, freq=self.index.freq) + + if where < start: + if not is_series: + return self._constructor_sliced( + index=self.columns, name=where, dtype=np.float64 + ) + return np.nan + + # It's always much faster to use a *while* loop here for + # Series than pre-computing all the NAs. However a + # *while* loop is extremely expensive for DataFrame + # so we later pre-compute all the NAs and use the same + # code path whether *where* is a scalar or list. + # See PR: https://github.com/pandas-dev/pandas/pull/14476 + if is_series: + loc = self.index.searchsorted(where, side="right") + if loc > 0: + loc -= 1 + + values = self._values + while loc > 0 and isna(values[loc]): + loc -= 1 + return values[loc] + + if not isinstance(where, Index): + where = Index(where) if is_list else Index([where]) + + nulls = self.isna() if is_series else self[subset].isna().any(axis=1) + if nulls.all(): + if is_series: + self = cast("Series", self) + return self._constructor(np.nan, index=where, name=self.name) + elif is_list: + self = cast("DataFrame", self) + return self._constructor(np.nan, index=where, columns=self.columns) + else: + self = cast("DataFrame", self) + return self._constructor_sliced( + np.nan, index=self.columns, name=where[0] + ) + + locs = self.index.asof_locs(where, ~(nulls._values)) + + # mask the missing + missing = locs == -1 + data = self.take(locs) + data.index = where + if missing.any(): + # GH#16063 only do this setting when necessary, otherwise + # we'd cast e.g. bools to floats + data.loc[missing] = np.nan + return data if is_list else data.iloc[-1] + + # ---------------------------------------------------------------------- + # Action Methods + + @doc(klass=_shared_doc_kwargs["klass"]) + def isna(self: NDFrameT) -> NDFrameT: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + {klass} + Mask of bool values for each element in {klass} that + indicates whether an element is an NA value. + + See Also + -------- + {klass}.isnull : Alias of isna. + {klass}.notna : Boolean inverse of isna. + {klass}.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], + ... born=[pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... name=['Alfred', 'Batman', ''], + ... toy=[None, 'Batmobile', 'Joker'])) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ + return isna(self).__finalize__(self, method="isna") + + @doc(isna, klass=_shared_doc_kwargs["klass"]) + def isnull(self: NDFrameT) -> NDFrameT: + return isna(self).__finalize__(self, method="isnull") + + @doc(klass=_shared_doc_kwargs["klass"]) + def notna(self: NDFrameT) -> NDFrameT: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + {klass} + Mask of bool values for each element in {klass} that + indicates whether an element is not an NA value. + + See Also + -------- + {klass}.notnull : Alias of notna. + {klass}.isna : Boolean inverse of notna. + {klass}.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], + ... born=[pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... name=['Alfred', 'Batman', ''], + ... toy=[None, 'Batmobile', 'Joker'])) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ + return notna(self).__finalize__(self, method="notna") + + @doc(notna, klass=_shared_doc_kwargs["klass"]) + def notnull(self: NDFrameT) -> NDFrameT: + return notna(self).__finalize__(self, method="notnull") + + @final + def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): + if (lower is not None and np.any(isna(lower))) or ( + upper is not None and np.any(isna(upper)) + ): + raise ValueError("Cannot use an NA value as a clip threshold") + + result = self + mask = isna(self._values) + + with np.errstate(all="ignore"): + if upper is not None: + subset = self <= upper + result = result.where(subset, upper, axis=None, inplace=False) + if lower is not None: + subset = self >= lower + result = result.where(subset, lower, axis=None, inplace=False) + + if np.any(mask): + result[mask] = np.nan + + if inplace: + return self._update_inplace(result) + else: + return result + + @final + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + if axis is not None: + axis = self._get_axis_number(axis) + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == "le": + return self._clip_with_scalar(None, threshold, inplace=inplace) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + if isinstance(self, ABCSeries): + threshold = self._constructor(threshold, index=self.index) + else: + threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] + + # GH 40420 + # Treat missing thresholds as no bounds, not clipping the values + if is_list_like(threshold): + fill_value = np.inf if method.__name__ == "le" else -np.inf + threshold_inf = threshold.fillna(fill_value) + else: + threshold_inf = threshold + + subset = method(threshold_inf, axis=axis) | isna(self) + + # GH 40420 + return self.where(subset, threshold, axis=axis, inplace=inplace) + + def clip( + self: NDFrameT, + lower=None, + upper=None, + axis: Axis | None = None, + inplace: bool_t = False, + *args, + **kwargs, + ) -> NDFrameT | None: + """ + Trim values at input threshold(s). + + Assigns values outside boundary to boundary values. Thresholds + can be singular values or array like, and in the latter case + the clipping is performed element-wise in the specified axis. + + Parameters + ---------- + lower : float or array-like, default None + Minimum threshold value. All values below this + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. + upper : float or array-like, default None + Maximum threshold value. All values above this + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. + axis : {{0 or 'index', 1 or 'columns', None}}, default None + Align object with lower and upper along the given axis. + For `Series` this parameter is unused and defaults to `None`. + inplace : bool, default False + Whether to perform the operation in place on the data. + *args, **kwargs + Additional keywords have no effect but might be accepted + for compatibility with numpy. + + Returns + ------- + Series or DataFrame or None + Same type as calling object with the values outside the + clip boundaries replaced or None if ``inplace=True``. + + See Also + -------- + Series.clip : Trim values at input threshold in series. + DataFrame.clip : Trim values at input threshold in dataframe. + numpy.clip : Clip (limit) the values in an array. + + Examples + -------- + >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} + >>> df = pd.DataFrame(data) + >>> df + col_0 col_1 + 0 9 -2 + 1 -3 -7 + 2 0 6 + 3 -1 8 + 4 5 -5 + + Clips per column using lower and upper thresholds: + + >>> df.clip(-4, 6) + col_0 col_1 + 0 6 -2 + 1 -3 -4 + 2 0 6 + 3 -1 6 + 4 5 -4 + + Clips using specific lower and upper thresholds per column element: + + >>> t = pd.Series([2, -4, -1, 6, 3]) + >>> t + 0 2 + 1 -4 + 2 -1 + 3 6 + 4 3 + dtype: int64 + + >>> df.clip(t, t + 4, axis=0) + col_0 col_1 + 0 6 2 + 1 -3 -4 + 2 0 3 + 3 6 8 + 4 5 3 + + Clips using specific lower threshold per column element, with missing values: + + >>> t = pd.Series([2, -4, np.NaN, 6, 3]) + >>> t + 0 2.0 + 1 -4.0 + 2 NaN + 3 6.0 + 4 3.0 + dtype: float64 + + >>> df.clip(t, axis=0) + col_0 col_1 + 0 9 2 + 1 -3 -4 + 2 0 6 + 3 6 8 + 4 5 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = nv.validate_clip_with_axis(axis, args, kwargs) + if axis is not None: + axis = self._get_axis_number(axis) + + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + # GH 19992 + # numpy doesn't drop a list-like bound containing NaN + isna_lower = isna(lower) + if not is_list_like(lower): + if np.any(isna_lower): + lower = None + elif np.all(isna_lower): + lower = None + isna_upper = isna(upper) + if not is_list_like(upper): + if np.any(isna_upper): + upper = None + elif np.all(isna_upper): + upper = None + + # GH 2747 (arguments were reversed) + if ( + lower is not None + and upper is not None + and is_scalar(lower) + and is_scalar(upper) + ): + lower, upper = min(lower, upper), max(lower, upper) + + # fast-path for scalars + if (lower is None or (is_scalar(lower) and is_number(lower))) and ( + upper is None or (is_scalar(upper) and is_number(upper)) + ): + return self._clip_with_scalar(lower, upper, inplace=inplace) + + result = self + if lower is not None: + result = result._clip_with_one_bound( + lower, method=self.ge, axis=axis, inplace=inplace + ) + if upper is not None: + if inplace: + result = self + result = result._clip_with_one_bound( + upper, method=self.le, axis=axis, inplace=inplace + ) + + return result + + @doc(**_shared_doc_kwargs) + def asfreq( + self: NDFrameT, + freq: Frequency, + method: FillnaOptions | None = None, + how: str | None = None, + normalize: bool_t = False, + fill_value: Hashable = None, + ) -> NDFrameT: + """ + Convert time series to specified frequency. + + Returns the original data conformed to a new index with the specified + frequency. + + If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index + is the result of transforming the original index with + :meth:`PeriodIndex.asfreq ` (so the original index + will map one-to-one to the new index). + + Otherwise, the new index will be equivalent to ``pd.date_range(start, end, + freq=freq)`` where ``start`` and ``end`` are, respectively, the first and + last entries in the original index (see :func:`pandas.date_range`). The + values corresponding to any timesteps in the new index which were not present + in the original index will be null (``NaN``), unless a method for filling + such unknowns is provided (see the ``method`` parameter below). + + The :meth:`resample` method is more appropriate if an operation on each group of + timesteps (such as an aggregate) is necessary to represent the data at the new + frequency. + + Parameters + ---------- + freq : DateOffset or str + Frequency DateOffset or string. + method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None + Method to use for filling holes in reindexed Series (note this + does not fill NaNs that already were present): + + * 'pad' / 'ffill': propagate last valid observation forward to next + valid + * 'backfill' / 'bfill': use NEXT valid observation to fill. + how : {{'start', 'end'}}, default end + For PeriodIndex only (see PeriodIndex.asfreq). + normalize : bool, default False + Whether to reset output index to midnight. + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + {klass} + {klass} object reindexed to the specified frequency. + + See Also + -------- + reindex : Conform DataFrame to new index with optional filling logic. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + Start by creating a series with 4 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=4, freq='T') + >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) + >>> df = pd.DataFrame({{'s': series}}) + >>> df + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:03:00 3.0 + + Upsample the series into 30 second bins. + + >>> df.asfreq(freq='30S') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 NaN + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``fill value``. + + >>> df.asfreq(freq='30S', fill_value=9.0) + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 9.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 9.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 9.0 + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``method``. + + >>> df.asfreq(freq='30S', method='bfill') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 2.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 3.0 + 2000-01-01 00:03:00 3.0 + """ + from pandas.core.resample import asfreq + + return asfreq( + self, + freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + @final + def at_time(self: NDFrameT, time, asof: bool_t = False, axis=None) -> NDFrameT: + """ + Select values at particular time of day (e.g., 9:30AM). + + Parameters + ---------- + time : datetime.time or str + axis : {0 or 'index', 1 or 'columns'}, default 0 + For `Series` this parameter is unused and defaults to 0. + + Returns + ------- + Series or DataFrame + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + between_time : Select values between particular times of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_at_time : Get just the index locations for + values at particular time of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-09 12:00:00 2 + 2018-04-10 00:00:00 3 + 2018-04-10 12:00:00 4 + + >>> ts.at_time('12:00') + A + 2018-04-09 12:00:00 2 + 2018-04-10 12:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + + indexer = index.indexer_at_time(time, asof=asof) + return self._take_with_is_copy(indexer, axis=axis) + + @final + def between_time( + self: NDFrameT, + start_time, + end_time, + include_start: bool_t | lib.NoDefault = lib.no_default, + include_end: bool_t | lib.NoDefault = lib.no_default, + inclusive: IntervalClosedType | None = None, + axis=None, + ) -> NDFrameT: + """ + Select values between particular times of the day (e.g., 9:00-9:30 AM). + + By setting ``start_time`` to be later than ``end_time``, + you can get the times that are *not* between the two times. + + Parameters + ---------- + start_time : datetime.time or str + Initial time as a time filter limit. + end_time : datetime.time or str + End time as a time filter limit. + include_start : bool, default True + Whether the start time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + include_end : bool, default True + Whether the end time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; whether to set each bound as closed or open. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine range time on index or columns value. + For `Series` this parameter is unused and defaults to 0. + + Returns + ------- + Series or DataFrame + Data from the original object filtered to the specified dates range. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + at_time : Select values at a particular time of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_between_time : Get just the index locations for + values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + 2018-04-12 01:00:00 4 + + >>> ts.between_time('0:15', '0:45') + A + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + + You get the times that are *not* between two times by setting + ``start_time`` later than ``end_time``: + + >>> ts.between_time('0:45', '0:15') + A + 2018-04-09 00:00:00 1 + 2018-04-12 01:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + if not isinstance(index, DatetimeIndex): + raise TypeError("Index must be DatetimeIndex") + + old_include_arg_used = (include_start != lib.no_default) or ( + include_end != lib.no_default + ) + + if old_include_arg_used and inclusive is not None: + raise ValueError( + "Deprecated arguments `include_start` and `include_end` " + "cannot be passed if `inclusive` has been given." + ) + # If any of the deprecated arguments ('include_start', 'include_end') + # have been passed + elif old_include_arg_used: + warnings.warn( + "`include_start` and `include_end` are deprecated in " + "favour of `inclusive`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + left = True if include_start is lib.no_default else include_start + right = True if include_end is lib.no_default else include_end + + inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = { + (True, True): "both", + (True, False): "left", + (False, True): "right", + (False, False): "neither", + } + inclusive = inc_dict[(left, right)] + elif inclusive is None: + # On arg removal inclusive can default to "both" + inclusive = "both" + left_inclusive, right_inclusive = validate_inclusive(inclusive) + indexer = index.indexer_between_time( + start_time, + end_time, + include_start=left_inclusive, + include_end=right_inclusive, + ) + return self._take_with_is_copy(indexer, axis=axis) + + @doc(**_shared_doc_kwargs) + def resample( + self, + rule, + axis: Axis = 0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on: Level = None, + level: Level = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys: bool_t | lib.NoDefault = lib.no_default, + ) -> Resampler: + """ + Resample time-series data. + + Convenience method for frequency conversion and resampling of time series. + The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`, + or `TimedeltaIndex`), or the caller must pass the label of a datetime-like + series/index to the ``on``/``level`` keyword parameter. + + Parameters + ---------- + rule : DateOffset, Timedelta or str + The offset string or object representing target conversion. + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + Which axis to use for up- or down-sampling. For `Series` this parameter + is unused and defaults to 0. Must be + `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. + closed : {{'right', 'left'}}, default None + Which side of bin interval is closed. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + label : {{'right', 'left'}}, default None + Which bin edge label to label bucket with. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + convention : {{'start', 'end', 's', 'e'}}, default 'start' + For `PeriodIndex` only, controls whether to use the start or + end of `rule`. + kind : {{'timestamp', 'period'}}, optional, default None + Pass 'timestamp' to convert the resulting index to a + `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. + By default the input representation is retained. + loffset : timedelta, default None + Adjust the resampled time labels. + + .. deprecated:: 1.1.0 + You should add the loffset to the `df.index` after the resample. + See below. + + base : int, default 0 + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + .. deprecated:: 1.1.0 + The new arguments that you should use are 'offset' or 'origin'. + + on : str, optional + For a DataFrame, column to use instead of index for resampling. + Column must be datetime-like. + level : str or int, optional + For a MultiIndex, level (name or number) to use for + resampling. `level` must be datetime-like. + origin : Timestamp or str, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + If string, must be one of the following: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + + .. versionadded:: 1.1.0 + + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + + .. versionadded:: 1.3.0 + + offset : Timedelta or str, default is None + An offset timedelta added to the origin. + + .. versionadded:: 1.1.0 + + group_keys : bool, optional + Whether to include the group keys in the result index when using + ``.apply()`` on the resampled object. Not specifying ``group_keys`` + will retain values-dependent behavior from pandas 1.4 + and earlier (see :ref:`pandas 1.5.0 Release notes + ` + for examples). In a future version of pandas, the behavior will + default to the same as specifying ``group_keys=False``. + + .. versionadded:: 1.5.0 + + Returns + ------- + pandas.core.Resampler + :class:`~pandas.core.Resampler` object. + + See Also + -------- + Series.resample : Resample a Series. + DataFrame.resample : Resample a DataFrame. + groupby : Group {klass} by mapping, function, label, or list of labels. + asfreq : Reindex a {klass} with the given frequency without grouping. + + Notes + ----- + See the `user guide + `__ + for more. + + To learn more about the offset strings, please see `this link + `__. + + Examples + -------- + Start by creating a series with 9 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=9, freq='T') + >>> series = pd.Series(range(9), index=index) + >>> series + 2000-01-01 00:00:00 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:02:00 2 + 2000-01-01 00:03:00 3 + 2000-01-01 00:04:00 4 + 2000-01-01 00:05:00 5 + 2000-01-01 00:06:00 6 + 2000-01-01 00:07:00 7 + 2000-01-01 00:08:00 8 + Freq: T, dtype: int64 + + Downsample the series into 3 minute bins and sum the values + of the timestamps falling into a bin. + + >>> series.resample('3T').sum() + 2000-01-01 00:00:00 3 + 2000-01-01 00:03:00 12 + 2000-01-01 00:06:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but label each + bin using the right edge instead of the left. Please note that the + value in the bucket used as the label is not included in the bucket, + which it labels. For example, in the original series the + bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed + value in the resampled bucket with the label ``2000-01-01 00:03:00`` + does not include 3 (if it did, the summed value would be 6, not 3). + To include this value close the right side of the bin interval as + illustrated in the example below this one. + + >>> series.resample('3T', label='right').sum() + 2000-01-01 00:03:00 3 + 2000-01-01 00:06:00 12 + 2000-01-01 00:09:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> series.resample('3T', label='right', closed='right').sum() + 2000-01-01 00:00:00 0 + 2000-01-01 00:03:00 6 + 2000-01-01 00:06:00 15 + 2000-01-01 00:09:00 15 + Freq: 3T, dtype: int64 + + Upsample the series into 30 second bins. + + >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 1.0 + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + Freq: 30S, dtype: float64 + + Upsample the series into 30 second bins and fill the ``NaN`` + values using the ``ffill`` method. + + >>> series.resample('30S').ffill()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 1 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Upsample the series into 30 second bins and fill the + ``NaN`` values using the ``bfill`` method. + + >>> series.resample('30S').bfill()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 1 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 2 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Pass a custom function via ``apply`` + + >>> def custom_resampler(arraylike): + ... return np.sum(arraylike) + 5 + ... + >>> series.resample('3T').apply(custom_resampler) + 2000-01-01 00:00:00 8 + 2000-01-01 00:03:00 17 + 2000-01-01 00:06:00 26 + Freq: 3T, dtype: int64 + + For a Series with a PeriodIndex, the keyword `convention` can be + used to control whether to use the start or end of `rule`. + + Resample a year by quarter using 'start' `convention`. Values are + assigned to the first quarter of the period. + + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', + ... freq='A', + ... periods=2)) + >>> s + 2012 1 + 2013 2 + Freq: A-DEC, dtype: int64 + >>> s.resample('Q', convention='start').asfreq() + 2012Q1 1.0 + 2012Q2 NaN + 2012Q3 NaN + 2012Q4 NaN + 2013Q1 2.0 + 2013Q2 NaN + 2013Q3 NaN + 2013Q4 NaN + Freq: Q-DEC, dtype: float64 + + Resample quarters by month using 'end' `convention`. Values are + assigned to the last month of the period. + + >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', + ... freq='Q', + ... periods=4)) + >>> q + 2018Q1 1 + 2018Q2 2 + 2018Q3 3 + 2018Q4 4 + Freq: Q-DEC, dtype: int64 + >>> q.resample('M', convention='end').asfreq() + 2018-03 1.0 + 2018-04 NaN + 2018-05 NaN + 2018-06 2.0 + 2018-07 NaN + 2018-08 NaN + 2018-09 3.0 + 2018-10 NaN + 2018-11 NaN + 2018-12 4.0 + Freq: M, dtype: float64 + + For DataFrame objects, the keyword `on` can be used to specify the + column instead of the index for resampling. + + >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} + >>> df = pd.DataFrame(d) + >>> df['week_starting'] = pd.date_range('01/01/2018', + ... periods=8, + ... freq='W') + >>> df + price volume week_starting + 0 10 50 2018-01-07 + 1 11 60 2018-01-14 + 2 9 40 2018-01-21 + 3 13 100 2018-01-28 + 4 14 50 2018-02-04 + 5 18 100 2018-02-11 + 6 17 40 2018-02-18 + 7 19 50 2018-02-25 + >>> df.resample('M', on='week_starting').mean() + price volume + week_starting + 2018-01-31 10.75 62.5 + 2018-02-28 17.00 60.0 + + For a DataFrame with MultiIndex, the keyword `level` can be used to + specify on which level the resampling needs to take place. + + >>> days = pd.date_range('1/1/2000', periods=4, freq='D') + >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} + >>> df2 = pd.DataFrame( + ... d2, + ... index=pd.MultiIndex.from_product( + ... [days, ['morning', 'afternoon']] + ... ) + ... ) + >>> df2 + price volume + 2000-01-01 morning 10 50 + afternoon 11 60 + 2000-01-02 morning 9 40 + afternoon 13 100 + 2000-01-03 morning 14 50 + afternoon 18 100 + 2000-01-04 morning 17 40 + afternoon 19 50 + >>> df2.resample('D', level=0).sum() + price volume + 2000-01-01 21 110 + 2000-01-02 22 140 + 2000-01-03 32 150 + 2000-01-04 36 90 + + If you want to adjust the start of the bins based on a fixed timestamp: + + >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' + >>> rng = pd.date_range(start, end, freq='7min') + >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + >>> ts + 2000-10-01 23:30:00 0 + 2000-10-01 23:37:00 3 + 2000-10-01 23:44:00 6 + 2000-10-01 23:51:00 9 + 2000-10-01 23:58:00 12 + 2000-10-02 00:05:00 15 + 2000-10-02 00:12:00 18 + 2000-10-02 00:19:00 21 + 2000-10-02 00:26:00 24 + Freq: 7T, dtype: int64 + + >>> ts.resample('17min').sum() + 2000-10-01 23:14:00 0 + 2000-10-01 23:31:00 9 + 2000-10-01 23:48:00 21 + 2000-10-02 00:05:00 54 + 2000-10-02 00:22:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', origin='epoch').sum() + 2000-10-01 23:18:00 0 + 2000-10-01 23:35:00 18 + 2000-10-01 23:52:00 27 + 2000-10-02 00:09:00 39 + 2000-10-02 00:26:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', origin='2000-01-01').sum() + 2000-10-01 23:24:00 3 + 2000-10-01 23:41:00 15 + 2000-10-01 23:58:00 45 + 2000-10-02 00:15:00 45 + Freq: 17T, dtype: int64 + + If you want to adjust the start of the bins with an `offset` Timedelta, the two + following lines are equivalent: + + >>> ts.resample('17min', origin='start').sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + >>> ts.resample('17min', offset='23h30min').sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + If you want to take the largest Timestamp as the end of the bins: + + >>> ts.resample('17min', origin='end').sum() + 2000-10-01 23:35:00 0 + 2000-10-01 23:52:00 18 + 2000-10-02 00:09:00 27 + 2000-10-02 00:26:00 63 + Freq: 17T, dtype: int64 + + In contrast with the `start_day`, you can use `end_day` to take the ceiling + midnight of the largest Timestamp as the end of the bins and drop the bins + not containing data: + + >>> ts.resample('17min', origin='end_day').sum() + 2000-10-01 23:38:00 3 + 2000-10-01 23:55:00 15 + 2000-10-02 00:12:00 45 + 2000-10-02 00:29:00 45 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `base` argument, you can now use `offset`, + in this example it is equivalent to have `base=2`: + + >>> ts.resample('17min', offset='2min').sum() + 2000-10-01 23:16:00 0 + 2000-10-01 23:33:00 9 + 2000-10-01 23:50:00 36 + 2000-10-02 00:07:00 39 + 2000-10-02 00:24:00 24 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `loffset` argument: + + >>> from pandas.tseries.frequencies import to_offset + >>> loffset = '19min' + >>> ts_out = ts.resample('17min').sum() + >>> ts_out.index = ts_out.index + to_offset(loffset) + >>> ts_out + 2000-10-01 23:33:00 0 + 2000-10-01 23:50:00 9 + 2000-10-02 00:07:00 21 + 2000-10-02 00:24:00 54 + 2000-10-02 00:41:00 24 + Freq: 17T, dtype: int64 + """ + from pandas.core.resample import get_resampler + + axis = self._get_axis_number(axis) + return get_resampler( + self, + freq=rule, + label=label, + closed=closed, + axis=axis, + kind=kind, + loffset=loffset, + convention=convention, + base=base, + key=on, + level=level, + origin=origin, + offset=offset, + group_keys=group_keys, + ) + + @final + def first(self: NDFrameT, offset) -> NDFrameT: + """ + Select initial periods of time series data based on a date offset. + + When having a DataFrame with dates as index, this function can + select the first few rows based on a date offset. + + Parameters + ---------- + offset : str, DateOffset or dateutil.relativedelta + The offset length of the data that will be selected. For instance, + '1M' will display all the rows having their index within the first month. + + Returns + ------- + Series or DataFrame + A subset of the caller. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + last : Select final periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the first 3 days: + + >>> ts.first('3D') + A + 2018-04-09 1 + 2018-04-11 2 + + Notice the data for 3 first calendar days were returned, not the first + 3 days observed in the dataset, and therefore data for 2018-04-13 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'first' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + # GH#29623 if first value is end of period, remove offset with n = 1 + # before adding the real offset + end_date = end = self.index[0] - offset.base + offset + else: + end_date = end = self.index[0] + offset + + # Tick-like, e.g. 3 weeks + if isinstance(offset, Tick) and end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] + + return self.loc[:end] + + @final + def last(self: NDFrameT, offset) -> NDFrameT: + """ + Select final periods of time series data based on a date offset. + + For a DataFrame with a sorted DatetimeIndex, this function + selects the last few rows based on a date offset. + + Parameters + ---------- + offset : str, DateOffset, dateutil.relativedelta + The offset length of the data that will be selected. For instance, + '3D' will display all the rows having their index within the last 3 days. + + Returns + ------- + Series or DataFrame + A subset of the caller. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + first : Select initial periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the last 3 days: + + >>> ts.last('3D') + A + 2018-04-13 3 + 2018-04-15 4 + + Notice the data for 3 last calendar days were returned, not the last + 3 observed days in the dataset, and therefore data for 2018-04-11 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'last' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + + start_date = self.index[-1] - offset + start = self.index.searchsorted(start_date, side="right") + return self.iloc[start:] + + @final + def rank( + self: NDFrameT, + axis=0, + method: str = "average", + numeric_only: bool_t | None | lib.NoDefault = lib.no_default, + na_option: str = "keep", + ascending: bool_t = True, + pct: bool_t = False, + ) -> NDFrameT: + """ + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Index to direct ranking. + For `Series` this parameter is unused and defaults to 0. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups. + + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + + * keep: assign NaN rank to NaN values + * top: assign lowest rank to NaN values + * bottom: assign highest rank to NaN values + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + + Returns + ------- + same type as caller + Return a Series or DataFrame with data ranks as values. + + See Also + -------- + core.groupby.GroupBy.rank : Rank of values within each group. + + Examples + -------- + >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', + ... 'spider', 'snake'], + ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df + Animal Number_legs + 0 cat 4.0 + 1 penguin 2.0 + 2 dog 4.0 + 3 spider 8.0 + 4 snake NaN + + Ties are assigned the mean of the ranks (by default) for the group. + + >>> s = pd.Series(range(5), index=list("abcde")) + >>> s["d"] = s["b"] + >>> s.rank() + a 1.0 + b 2.5 + c 4.0 + d 2.5 + e 5.0 + dtype: float64 + + The following example shows how the method behaves with the above + parameters: + + * default_rank: this is the default behaviour obtained without using + any parameter. + * max_rank: setting ``method = 'max'`` the records that have the + same values are ranked using the highest rank (e.g.: since 'cat' + and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) + * NA_bottom: choosing ``na_option = 'bottom'``, if there are records + with NaN values they are placed at the bottom of the ranking. + * pct_rank: when setting ``pct = True``, the ranking is expressed as + percentile rank. + + >>> df['default_rank'] = df['Number_legs'].rank() + >>> df['max_rank'] = df['Number_legs'].rank(method='max') + >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') + >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df + Animal Number_legs default_rank max_rank NA_bottom pct_rank + 0 cat 4.0 2.5 3.0 2.5 0.625 + 1 penguin 2.0 1.0 1.0 1.0 0.250 + 2 dog 4.0 2.5 3.0 2.5 0.625 + 3 spider 8.0 4.0 4.0 4.0 1.000 + 4 snake NaN NaN NaN 5.0 NaN + """ + warned = False + if numeric_only is None: + # GH#45036 + warnings.warn( + f"'numeric_only=None' in {type(self).__name__}.rank is deprecated " + "and will raise in a future version. Pass either 'True' or " + "'False'. 'False' will be the default.", + FutureWarning, + stacklevel=find_stack_level(), + ) + warned = True + elif numeric_only is lib.no_default: + numeric_only = None + + axis = self._get_axis_number(axis) + + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + + def ranker(data): + if data.ndim == 2: + # i.e. DataFrame, we cast to ndarray + values = data.values + else: + # i.e. Series, can dispatch to EA + values = data._values + + if isinstance(values, ExtensionArray): + ranks = values._rank( + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + ranks = algos.rank( + values, + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + + ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) + return ranks_obj.__finalize__(self, method="rank") + + # if numeric_only is None, and we can't get anything, we try with + # numeric_only=True + if numeric_only is None: + try: + return ranker(self) + except TypeError: + numeric_only = True + if not warned: + # Only warn here if we didn't already issue a warning above + # GH#45036 + warnings.warn( + f"Dropping of nuisance columns in {type(self).__name__}.rank " + "is deprecated; in a future version this will raise TypeError. " + "Select only valid columns before calling rank.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if numeric_only: + if self.ndim == 1 and not is_numeric_dtype(self.dtype): + # GH#47500 + warnings.warn( + f"Calling Series.rank with numeric_only={numeric_only} and dtype " + f"{self.dtype} is deprecated and will raise a TypeError in a " + "future version of pandas", + category=FutureWarning, + stacklevel=find_stack_level(), + ) + data = self._get_numeric_data() + else: + data = self + + return ranker(data) + + @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) + def compare( + self, + other, + align_axis: Axis = 1, + keep_shape: bool_t = False, + keep_equal: bool_t = False, + result_names: Suffixes = ("self", "other"), + ): + from pandas.core.reshape.concat import concat + + if type(self) is not type(other): + cls_self, cls_other = type(self).__name__, type(other).__name__ + raise TypeError( + f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" + ) + + mask = ~((self == other) | (self.isna() & other.isna())) + + if not keep_equal: + self = self.where(mask) + other = other.where(mask) + + if not keep_shape: + if isinstance(self, ABCDataFrame): + cmask = mask.any() + rmask = mask.any(axis=1) + self = self.loc[rmask, cmask] + other = other.loc[rmask, cmask] + else: + self = self[mask] + other = other[mask] + if not isinstance(result_names, tuple): + raise TypeError( + f"Passing 'result_names' as a {type(result_names)} is not " + "supported. Provide 'result_names' as a tuple instead." + ) + + if align_axis in (1, "columns"): # This is needed for Series + axis = 1 + else: + axis = self._get_axis_number(align_axis) + + diff = concat([self, other], axis=axis, keys=result_names) + + if axis >= self.ndim: + # No need to reorganize data if stacking on new axis + # This currently applies for stacking two Series on columns + return diff + + ax = diff._get_axis(axis) + ax_names = np.array(ax.names) + + # set index names to positions to avoid confusion + ax.names = np.arange(len(ax_names)) + + # bring self-other to inner level + order = list(range(1, ax.nlevels)) + [0] + if isinstance(diff, ABCDataFrame): + diff = diff.reorder_levels(order, axis=axis) + else: + diff = diff.reorder_levels(order) + + # restore the index names in order + diff._get_axis(axis=axis).names = ax_names[order] + + # reorder axis to keep things organized + indices = ( + np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() + ) + diff = diff.take(indices, axis=axis) + + return diff + + @doc(**_shared_doc_kwargs) + def align( + self: NDFrameT, + other: NDFrameT, + join: Literal["outer", "inner", "left", "right"] = "outer", + axis: Axis | None = None, + level: Level = None, + copy: bool_t = True, + fill_value: Hashable = None, + method: FillnaOptions | None = None, + limit: int | None = None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> NDFrameT: + """ + Align two objects on their axes with the specified join method. + + Join method is specified for each axis Index. + + Parameters + ---------- + other : DataFrame or Series + join : {{'outer', 'inner', 'left', 'right'}}, default 'outer' + axis : allowed axis of the other object, default None + Align on index (0), columns (1), or both (None). + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + copy : bool, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None + Method to use for filling holes in reindexed Series: + + - pad / ffill: propagate last valid observation forward to next valid. + - backfill / bfill: use NEXT valid observation to fill gap. + + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + fill_axis : {axes_single_arg}, default 0 + Filling axis, method and limit. + broadcast_axis : {axes_single_arg}, default None + Broadcast values along this axis, if aligning two objects of + different dimensions. + + Returns + ------- + (left, right) : ({klass}, type of other) + Aligned objects. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2] + ... ) + >>> other = pd.DataFrame( + ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]], + ... columns=["A", "B", "C", "D"], + ... index=[2, 3, 4], + ... ) + >>> df + D B E A + 1 1 2 3 4 + 2 6 7 8 9 + >>> other + A B C D + 2 10 20 30 40 + 3 60 70 80 90 + 4 600 700 800 900 + + Align on columns: + + >>> left, right = df.align(other, join="outer", axis=1) + >>> left + A B C D E + 1 4 2 NaN 1 3 + 2 9 7 NaN 6 8 + >>> right + A B C D E + 2 10 20 30 40 NaN + 3 60 70 80 90 NaN + 4 600 700 800 900 NaN + + We can also align on the index: + + >>> left, right = df.align(other, join="outer", axis=0) + >>> left + D B E A + 1 1.0 2.0 3.0 4.0 + 2 6.0 7.0 8.0 9.0 + 3 NaN NaN NaN NaN + 4 NaN NaN NaN NaN + >>> right + A B C D + 1 NaN NaN NaN NaN + 2 10.0 20.0 30.0 40.0 + 3 60.0 70.0 80.0 90.0 + 4 600.0 700.0 800.0 900.0 + + Finally, the default `axis=None` will align on both index and columns: + + >>> left, right = df.align(other, join="outer", axis=None) + >>> left + A B C D E + 1 4.0 2.0 NaN 1.0 3.0 + 2 9.0 7.0 NaN 6.0 8.0 + 3 NaN NaN NaN NaN NaN + 4 NaN NaN NaN NaN NaN + >>> right + A B C D E + 1 NaN NaN NaN NaN NaN + 2 10.0 20.0 30.0 40.0 NaN + 3 60.0 70.0 80.0 90.0 NaN + 4 600.0 700.0 800.0 900.0 NaN + """ + + method = missing.clean_fill_method(method) + + if broadcast_axis == 1 and self.ndim != other.ndim: + if isinstance(self, ABCSeries): + # this means other is a DataFrame, and we need to broadcast + # self + cons = self._constructor_expanddim + df = cons( + {c: self for c in other.columns}, **other._construct_axes_dict() + ) + return df._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + # this means self is a DataFrame, and we need to broadcast + # other + cons = other._constructor_expanddim + df = cons( + {c: other for c in self.columns}, **self._construct_axes_dict() + ) + return self._align_frame( + df, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + + if axis is not None: + axis = self._get_axis_number(axis) + if isinstance(other, ABCDataFrame): + return self._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + return self._align_series( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + @final + def _align_frame( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + # defaults + join_index, join_columns = None, None + ilidx, iridx = None, None + clidx, cridx = None, None + + is_series = isinstance(self, ABCSeries) + + if (axis is None or axis == 0) and not self.index.equals(other.index): + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if ( + (axis is None or axis == 1) + and not is_series + and not self.columns.equals(other.columns) + ): + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True + ) + + if is_series: + reindexers = {0: [join_index, ilidx]} + else: + reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} + + left = self._reindex_with_indexers( + reindexers, copy=copy, fill_value=fill_value, allow_dups=True + ) + # other must be always DataFrame + right = other._reindex_with_indexers( + {0: [join_index, iridx], 1: [join_columns, cridx]}, + copy=copy, + fill_value=fill_value, + allow_dups=True, + ) + + if method is not None: + _left = left.fillna(method=method, axis=fill_axis, limit=limit) + assert _left is not None # needed for mypy + left = _left + right = right.fillna(method=method, axis=fill_axis, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + left, right = _align_as_utc(left, right, join_index) + + return ( + left.__finalize__(self), + right.__finalize__(other), + ) + + @final + def _align_series( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + + is_series = isinstance(self, ABCSeries) + + if (not is_series and axis is None) or axis not in [None, 0, 1]: + raise ValueError("Must specify axis=0 or 1") + + if is_series and axis == 1: + raise ValueError("cannot align series to a series other than axis 0") + + # series/series compat, other must always be a Series + if not axis: + + # equal + if self.index.equals(other.index): + join_index, lidx, ridx = None, None, None + else: + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if is_series: + left = self._reindex_indexer(join_index, lidx, copy) + elif lidx is None or join_index is None: + left = self.copy() if copy else self + else: + left = self._constructor( + self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) + ) + + right = other._reindex_indexer(join_index, ridx, copy) + + else: + + # one has > 1 ndim + fdata = self._mgr + join_index = self.axes[1] + lidx, ridx = None, None + if not join_index.equals(other.index): + join_index, lidx, ridx = join_index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if lidx is not None: + bm_axis = self._get_block_manager_axis(1) + fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) + + if copy and fdata is self._mgr: + fdata = fdata.copy() + + left = self._constructor(fdata) + + if ridx is None: + right = other + else: + right = other.reindex(join_index, level=level) + + # fill + fill_na = notna(fill_value) or (method is not None) + if fill_na: + left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) + right = right.fillna(fill_value, method=method, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + if is_series or (not is_series and axis == 0): + left, right = _align_as_utc(left, right, join_index) + + return ( + left.__finalize__(self), + right.__finalize__(other), + ) + + @final + def _where( + self, + cond, + other=lib.no_default, + inplace=False, + axis=None, + level=None, + ): + """ + Equivalent to public method `where`, except that `other` is not + applied as a function even if callable. Used in __setitem__. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + if axis is not None: + axis = self._get_axis_number(axis) + + # align the cond to same shape as myself + cond = com.apply_if_callable(cond, self) + if isinstance(cond, NDFrame): + cond, _ = cond.align(self, join="right", broadcast_axis=1, copy=False) + else: + if not hasattr(cond, "shape"): + cond = np.asanyarray(cond) + if cond.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + cond = self._constructor(cond, **self._construct_axes_dict()) + + # make sure we are boolean + fill_value = bool(inplace) + cond = cond.fillna(fill_value) + + msg = "Boolean array expected for the condition, not {dtype}" + + if not cond.empty: + if not isinstance(cond, ABCDataFrame): + # This is a single-dimensional object. + if not is_bool_dtype(cond): + raise ValueError(msg.format(dtype=cond.dtype)) + else: + for dt in cond.dtypes: + if not is_bool_dtype(dt): + raise ValueError(msg.format(dtype=dt)) + else: + # GH#21947 we have an empty DataFrame/Series, could be object-dtype + cond = cond.astype(bool) + + cond = -cond if inplace else cond + cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False) + + # try to align with other + if isinstance(other, NDFrame): + + # align with me + if other.ndim <= self.ndim: + + _, other = self.align( + other, + join="left", + axis=axis, + level=level, + fill_value=None, + copy=False, + ) + + # if we are NOT aligned, raise as we cannot where index + if axis is None and not other._indexed_same(self): + raise InvalidIndexError + + elif other.ndim < self.ndim: + # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 + other = other._values + if axis == 0: + other = np.reshape(other, (-1, 1)) + elif axis == 1: + other = np.reshape(other, (1, -1)) + + other = np.broadcast_to(other, self.shape) + + # slice me out of the other + else: + raise NotImplementedError( + "cannot align with a higher dimensional NDFrame" + ) + + elif not isinstance(other, (MultiIndex, NDFrame)): + # mainly just catching Index here + other = extract_array(other, extract_numpy=True) + + if isinstance(other, (np.ndarray, ExtensionArray)): + + if other.shape != self.shape: + if self.ndim != 1: + # In the ndim == 1 case we may have + # other length 1, which we treat as scalar (GH#2745, GH#4192) + # or len(other) == icond.sum(), which we treat like + # __setitem__ (GH#3235) + raise ValueError( + "other must be the same shape as self when an ndarray" + ) + + # we are the same shape, so create an actual object for alignment + else: + other = self._constructor(other, **self._construct_axes_dict()) + + if axis is None: + axis = 0 + + if self.ndim == getattr(other, "ndim", 0): + align = True + else: + align = self._get_axis_number(axis) == 1 + + if inplace: + # we may have different type blocks come out of putmask, so + # reconstruct the block manager + + self._check_inplace_setting(other) + new_data = self._mgr.putmask(mask=cond, new=other, align=align) + result = self._constructor(new_data) + return self._update_inplace(result) + + else: + new_data = self._mgr.where( + other=other, + cond=cond, + align=align, + ) + result = self._constructor(new_data) + return result.__finalize__(self) + + @overload + def where( + self: NDFrameT, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> NDFrameT: + ... + + @overload + def where( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def where( + self: NDFrameT, + cond, + other=..., + *, + inplace: bool_t = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> NDFrameT | None: + ... + + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + @doc( + klass=_shared_doc_kwargs["klass"], + cond="True", + cond_rev="False", + name="where", + name_other="mask", + ) + def where( + self: NDFrameT, + cond, + other=np.nan, + inplace: bool_t = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = "raise", + try_cast: bool_t | lib.NoDefault = lib.no_default, + ) -> NDFrameT | None: + """ + Replace values where the condition is {cond_rev}. + + Parameters + ---------- + cond : bool {klass}, array-like, or callable + Where `cond` is {cond}, keep the original value. Where + {cond_rev}, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the {klass} and + should return boolean {klass} or array. The callable must + not change input {klass} (though pandas doesn't check it). + other : scalar, {klass}, or callable + Entries where `cond` is {cond_rev} are replaced with + corresponding value from `other`. + If other is callable, it is computed on the {klass} and + should return scalar or {klass}. The callable must not + change input {klass} (though pandas doesn't check it). + inplace : bool, default False + Whether to perform the operation in place on the data. + axis : int, default None + Alignment axis if needed. For `Series` this parameter is + unused and defaults to 0. + level : int, default None + Alignment level if needed. + errors : str, {{'raise', 'ignore'}}, default 'raise' + Note that currently this parameter won't affect + the results and will always coerce to a suitable dtype. + + - 'raise' : allow exceptions to be raised. + - 'ignore' : suppress exceptions. On error return original object. + + .. deprecated:: 1.5.0 + This argument had no effect. + + try_cast : bool, default None + Try to cast the result back to the input type (if possible). + + .. deprecated:: 1.3.0 + Manually cast back if necessary. + + Returns + ------- + Same type as caller or None if ``inplace=True``. + + See Also + -------- + :func:`DataFrame.{name_other}` : Return an object of same shape as + self. + + Notes + ----- + The {name} method is an application of the if-then idiom. For each + element in the calling DataFrame, if ``cond`` is ``{cond}`` the + element is used; otherwise the corresponding element from the DataFrame + ``other`` is used. If the axis of ``other`` does not align with axis of + ``cond`` {klass}, the misaligned index positions will be filled with + {cond_rev}. + + The signature for :func:`DataFrame.where` differs from + :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to + ``np.where(m, df1, df2)``. + + For further details and examples see the ``{name}`` documentation in + :ref:`indexing `. + + The dtype of the object takes precedence. The fill value is casted to + the object's dtype, if this can be done losslessly. + + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.where(s > 0) + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + 4 4.0 + dtype: float64 + >>> s.mask(s > 0) + 0 0.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + >>> s = pd.Series(range(5)) + >>> t = pd.Series([True, False]) + >>> s.where(t, 99) + 0 0 + 1 99 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.mask(t, 99) + 0 99 + 1 1 + 2 99 + 3 99 + 4 99 + dtype: int64 + + >>> s.where(s > 1, 10) + 0 10 + 1 10 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> s.mask(s > 1, 10) + 0 0 + 1 1 + 2 10 + 3 10 + 4 10 + dtype: int64 + + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) + >>> df + A B + 0 0 1 + 1 2 3 + 2 4 5 + 3 6 7 + 4 8 9 + >>> m = df % 3 == 0 + >>> df.where(m, -df) + A B + 0 0 -1 + 1 -2 3 + 2 -4 -5 + 3 6 -7 + 4 -8 9 + >>> df.where(m, -df) == np.where(m, df, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + >>> df.where(m, -df) == df.mask(~m, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + """ + other = com.apply_if_callable(other, self) + + if try_cast is not lib.no_default: + warnings.warn( + "try_cast keyword is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._where(cond, other, inplace, axis, level) + + @overload + def mask( + self: NDFrameT, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> NDFrameT: + ... + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def mask( + self: NDFrameT, + cond, + other=..., + *, + inplace: bool_t = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool_t | lib.NoDefault = ..., + ) -> NDFrameT | None: + ... + + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + @doc( + where, + klass=_shared_doc_kwargs["klass"], + cond="False", + cond_rev="True", + name="mask", + name_other="where", + ) + def mask( + self: NDFrameT, + cond, + other=np.nan, + inplace: bool_t = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = "raise", + try_cast: bool_t | lib.NoDefault = lib.no_default, + ) -> NDFrameT | None: + + inplace = validate_bool_kwarg(inplace, "inplace") + cond = com.apply_if_callable(cond, self) + + if try_cast is not lib.no_default: + warnings.warn( + "try_cast keyword is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # see gh-21891 + if not hasattr(cond, "__invert__"): + cond = np.array(cond) + + return self.where( + ~cond, + other=other, + inplace=inplace, + axis=axis, + level=level, + ) + + @doc(klass=_shared_doc_kwargs["klass"]) + def shift( + self: NDFrameT, + periods: int = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = None, + ) -> NDFrameT: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int + Number of periods to shift. Can be positive or negative. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of + those attributes exist, a ValueError is thrown. + axis : {{0 or 'index', 1 or 'columns', None}}, default None + Shift direction. For `Series` this parameter is unused and defaults to 0. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + the default depends on the dtype of `self`. + For numeric data, ``np.nan`` is used. + For datetime, timedelta, or period data, etc. :attr:`NaT` is used. + For extension dtypes, ``self.dtype.na_value`` is used. + + .. versionchanged:: 1.1.0 + + Returns + ------- + {klass} + Copy of input object, shifted. + + See Also + -------- + Index.shift : Shift values of Index. + DatetimeIndex.shift : Shift values of DatetimeIndex. + PeriodIndex.shift : Shift values of PeriodIndex. + tshift : Shift the time index, using the index's frequency if + available. + + Examples + -------- + >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], + ... "Col2": [13, 23, 18, 33, 48], + ... "Col3": [17, 27, 22, 37, 52]}}, + ... index=pd.date_range("2020-01-01", "2020-01-05")) + >>> df + Col1 Col2 Col3 + 2020-01-01 10 13 17 + 2020-01-02 20 23 27 + 2020-01-03 15 18 22 + 2020-01-04 30 33 37 + 2020-01-05 45 48 52 + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 2020-01-01 NaN NaN NaN + 2020-01-02 NaN NaN NaN + 2020-01-03 NaN NaN NaN + 2020-01-04 10.0 13.0 17.0 + 2020-01-05 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis="columns") + Col1 Col2 Col3 + 2020-01-01 NaN 10 13 + 2020-01-02 NaN 20 23 + 2020-01-03 NaN 15 18 + 2020-01-04 NaN 30 33 + 2020-01-05 NaN 45 48 + + >>> df.shift(periods=3, fill_value=0) + Col1 Col2 Col3 + 2020-01-01 0 0 0 + 2020-01-02 0 0 0 + 2020-01-03 0 0 0 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + + >>> df.shift(periods=3, freq="D") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df.shift(periods=3, freq="infer") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + """ + if periods == 0: + return self.copy() + + if freq is None: + # when freq is None, data is shifted, index is not + axis = self._get_axis_number(axis) + new_data = self._mgr.shift( + periods=periods, axis=axis, fill_value=fill_value + ) + return self._constructor(new_data).__finalize__(self, method="shift") + + # when freq is given, index is shifted, data is not + index = self._get_axis(axis) + + if freq == "infer": + freq = getattr(index, "freq", None) + + if freq is None: + freq = getattr(index, "inferred_freq", None) + + if freq is None: + msg = "Freq was not set in the index hence cannot be inferred" + raise ValueError(msg) + + elif isinstance(freq, str): + freq = to_offset(freq) + + if isinstance(index, PeriodIndex): + orig_freq = to_offset(index.freq) + if freq != orig_freq: + assert orig_freq is not None # for mypy + raise ValueError( + f"Given freq {freq.rule_code} does not match " + f"PeriodIndex freq {orig_freq.rule_code}" + ) + new_ax = index.shift(periods) + else: + new_ax = index.shift(periods, freq) + + result = self.set_axis(new_ax, axis=axis) + return result.__finalize__(self, method="shift") + + @final + def slice_shift(self: NDFrameT, periods: int = 1, axis=0) -> NDFrameT: + """ + Equivalent to `shift` without copying data. + + .. deprecated:: 1.2.0 + slice_shift is deprecated, + use DataFrame/Series.shift instead. + + The shifted data will not include the dropped periods and the + shifted axis will be smaller than the original. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + axis : {0 or 'index', 1 or 'columns', None}, default 0 + For `Series` this parameter is unused and defaults to 0. + + Returns + ------- + shifted : same type as caller + + Notes + ----- + While the `slice_shift` is faster than `shift`, you may pay for it + later during alignment. + """ + + msg = ( + "The 'slice_shift' method is deprecated " + "and will be removed in a future version. " + "You can use DataFrame/Series.shift instead." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + if periods == 0: + return self + + if periods > 0: + vslicer = slice(None, -periods) + islicer = slice(periods, None) + else: + vslicer = slice(-periods, None) + islicer = slice(None, periods) + + new_obj = self._slice(vslicer, axis=axis) + shifted_axis = self._get_axis(axis)[islicer] + new_obj = new_obj.set_axis(shifted_axis, axis=axis, copy=False) + return new_obj.__finalize__(self, method="slice_shift") + + @final + def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFrameT: + """ + Shift the time index, using the index's frequency if available. + + .. deprecated:: 1.1.0 + Use `shift` instead. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + freq : DateOffset, timedelta, or str, default None + Increment to use from the tseries module + or time rule expressed as a string (e.g. 'EOM'). + axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 + Corresponds to the axis that contains the Index. + For `Series` this parameter is unused and defaults to 0. + + Returns + ------- + shifted : Series/DataFrame + + Notes + ----- + If freq is not specified then tries to use the freq or inferred_freq + attributes of the index. If neither of those attributes exist, a + ValueError is thrown + """ + warnings.warn( + ( + "tshift is deprecated and will be removed in a future version. " + "Please use shift instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + + if freq is None: + freq = "infer" + + return self.shift(periods, freq, axis) + + def truncate( + self: NDFrameT, before=None, after=None, axis=None, copy: bool_t = True + ) -> NDFrameT: + """ + Truncate a Series or DataFrame before and after some index value. + + This is a useful shorthand for boolean indexing based on index + values above or below certain thresholds. + + Parameters + ---------- + before : date, str, int + Truncate all rows before this index value. + after : date, str, int + Truncate all rows after this index value. + axis : {0 or 'index', 1 or 'columns'}, optional + Axis to truncate. Truncates the index (rows) by default. + For `Series` this parameter is unused and defaults to 0. + copy : bool, default is True, + Return a copy of the truncated section. + + Returns + ------- + type of caller + The truncated Series or DataFrame. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by label. + DataFrame.iloc : Select a subset of a DataFrame by position. + + Notes + ----- + If the index being truncated contains only datetime values, + `before` and `after` may be specified as strings instead of + Timestamps. + + Examples + -------- + >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], + ... 'B': ['f', 'g', 'h', 'i', 'j'], + ... 'C': ['k', 'l', 'm', 'n', 'o']}, + ... index=[1, 2, 3, 4, 5]) + >>> df + A B C + 1 a f k + 2 b g l + 3 c h m + 4 d i n + 5 e j o + + >>> df.truncate(before=2, after=4) + A B C + 2 b g l + 3 c h m + 4 d i n + + The columns of a DataFrame can be truncated. + + >>> df.truncate(before="A", after="B", axis="columns") + A B + 1 a f + 2 b g + 3 c h + 4 d i + 5 e j + + For Series, only rows can be truncated. + + >>> df['A'].truncate(before=2, after=4) + 2 b + 3 c + 4 d + Name: A, dtype: object + + The index values in ``truncate`` can be datetimes or string + dates. + + >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') + >>> df = pd.DataFrame(index=dates, data={'A': 1}) + >>> df.tail() + A + 2016-01-31 23:59:56 1 + 2016-01-31 23:59:57 1 + 2016-01-31 23:59:58 1 + 2016-01-31 23:59:59 1 + 2016-02-01 00:00:00 1 + + >>> df.truncate(before=pd.Timestamp('2016-01-05'), + ... after=pd.Timestamp('2016-01-10')).tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Because the index is a DatetimeIndex containing only dates, we can + specify `before` and `after` as strings. They will be coerced to + Timestamps before truncation. + + >>> df.truncate('2016-01-05', '2016-01-10').tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Note that ``truncate`` assumes a 0 value for any unspecified time + component (midnight). This differs from partial string slicing, which + returns any partially matching dates. + + >>> df.loc['2016-01-05':'2016-01-10', :].tail() + A + 2016-01-10 23:59:55 1 + 2016-01-10 23:59:56 1 + 2016-01-10 23:59:57 1 + 2016-01-10 23:59:58 1 + 2016-01-10 23:59:59 1 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + # GH 17935 + # Check that index is sorted + if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: + raise ValueError("truncate requires a sorted index") + + # if we have a date index, convert to dates, otherwise + # treat like a slice + if ax._is_all_dates: + from pandas.core.tools.datetimes import to_datetime + + before = to_datetime(before) + after = to_datetime(after) + + if before is not None and after is not None and before > after: + raise ValueError(f"Truncate: {after} must be after {before}") + + if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1: + before, after = after, before + + slicer = [slice(None, None)] * self._AXIS_LEN + slicer[axis] = slice(before, after) + result = self.loc[tuple(slicer)] + + if isinstance(ax, MultiIndex): + setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) + + if copy: + result = result.copy() + + return result + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def tz_convert( + self: NDFrameT, tz, axis=0, level=None, copy: bool_t = True + ) -> NDFrameT: + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object + axis : the axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + {klass} + Object with time zone converted axis. + + Raises + ------ + TypeError + If the axis is tz-naive. + """ + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_convert(ax, tz): + if not hasattr(ax, "tz_convert"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_convert(tz) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_convert(ax.levels[level], tz) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_convert(ax, tz) + + result = self.copy(deep=copy) + result = result.set_axis(ax, axis=axis, copy=False) + return result.__finalize__(self, method="tz_convert") + + @final + @doc(klass=_shared_doc_kwargs["klass"]) + def tz_localize( + self: NDFrameT, + tz, + axis=0, + level=None, + copy: bool_t = True, + ambiguous="raise", + nonexistent: str = "raise", + ) -> NDFrameT: + """ + Localize tz-naive index of a Series or DataFrame to target time zone. + + This operation localizes the Index. To localize the values in a + timezone-naive Series, use :meth:`Series.dt.tz_localize`. + + Parameters + ---------- + tz : str or tzinfo + axis : the axis to localize + level : int, str, default None + If axis ia a MultiIndex, localize a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + nonexistent : str, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. Valid values are: + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + Returns + ------- + {klass} + Same type as the input. + + Raises + ------ + TypeError + If the TimeSeries is tz-aware and tz is not None. + + Examples + -------- + Localize local times: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) + >>> s.tz_localize('CET') + 2018-09-15 01:30:00+02:00 1 + dtype: int64 + + Be careful with DST changes. When there is sequential data, pandas + can infer the DST time: + + >>> s = pd.Series(range(7), + ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.Series(range(3), + ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + 2018-10-28 01:20:00+02:00 0 + 2018-10-28 02:36:00+02:00 1 + 2018-10-28 03:46:00+01:00 2 + dtype: int64 + + If the DST transition causes nonexistent times, you can shift these + dates forward or backward with a timedelta object or `'shift_forward'` + or `'shift_backward'`. + + >>> s = pd.Series(range(2), + ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 2015-03-29 03:00:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 2015-03-29 01:59:59.999999999+01:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 2015-03-29 03:30:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_localize(ax, tz, ambiguous, nonexistent): + if not hasattr(ax, "tz_localize"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_localize(ax, tz, ambiguous, nonexistent) + + result = self.copy(deep=copy) + result = result.set_axis(ax, axis=axis, copy=False) + return result.__finalize__(self, method="tz_localize") + + # ---------------------------------------------------------------------- + # Numeric Methods + + @final + def describe( + self: NDFrameT, + percentiles=None, + include=None, + exclude=None, + datetime_is_numeric: bool_t = False, + ) -> NDFrameT: + """ + Generate descriptive statistics. + + Descriptive statistics include those that summarize the central + tendency, dispersion and shape of a + dataset's distribution, excluding ``NaN`` values. + + Analyzes both numeric and object series, as well + as ``DataFrame`` column sets of mixed data types. The output + will vary depending on what is provided. Refer to the notes + below for more detail. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should + fall between 0 and 1. The default is + ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored + for ``Series``. Here are the options: + + - 'all' : All columns of the input will be included in the output. + - A list-like of dtypes : Limits the results to the + provided data types. + To limit the result to numeric types submit + ``numpy.number``. To limit it instead to object columns submit + the ``numpy.object`` data type. Strings + can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + select pandas categorical columns, use ``'category'`` + - None (default) : The result will include all numeric columns. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored + for ``Series``. Here are the options: + + - A list-like of dtypes : Excludes the provided data types + from the result. To exclude numeric types submit + ``numpy.number``. To exclude object columns submit the data + type ``numpy.object``. Strings can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To + exclude pandas categorical columns, use ``'category'`` + - None (default) : The result will exclude nothing. + datetime_is_numeric : bool, default False + Whether to treat datetime dtypes as numeric. This affects statistics + calculated for the column. For DataFrame input, this also + controls whether datetime columns are included by default. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or DataFrame + Summary statistics of the Series or Dataframe provided. + + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the observations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. + + Notes + ----- + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is the + same as the median. + + For object data (e.g. strings or timestamps), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. Timestamps also include the ``first`` and ``last`` items. + + If multiple object values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. + + For mixed data types provided via a ``DataFrame``, the default is to + return only an analysis of numeric columns. If the dataframe consists + only of object and categorical data without any numeric columns, the + default is to return an analysis of both the object and categorical + columns. If ``include='all'`` is provided as an option, the result + will include a union of attributes of each type. + + The `include` and `exclude` parameters can be used to limit + which columns in a ``DataFrame`` are analyzed for the output. + The parameters are ignored when analyzing a ``Series``. + + Examples + -------- + Describing a numeric ``Series``. + + >>> s = pd.Series([1, 2, 3]) + >>> s.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + dtype: float64 + + Describing a categorical ``Series``. + + >>> s = pd.Series(['a', 'a', 'b', 'c']) + >>> s.describe() + count 4 + unique 3 + top a + freq 2 + dtype: object + + Describing a timestamp ``Series``. + + >>> s = pd.Series([ + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01") + ... ]) + >>> s.describe(datetime_is_numeric=True) + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 + dtype: object + + Describing a ``DataFrame``. By default only numeric fields + are returned. + + >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), + ... 'numeric': [1, 2, 3], + ... 'object': ['a', 'b', 'c'] + ... }) + >>> df.describe() + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing all columns of a ``DataFrame`` regardless of data type. + + >>> df.describe(include='all') # doctest: +SKIP + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN a + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN + + Describing a column from a ``DataFrame`` by accessing it as + an attribute. + + >>> df.numeric.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + Name: numeric, dtype: float64 + + Including only numeric columns in a ``DataFrame`` description. + + >>> df.describe(include=[np.number]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Including only string columns in a ``DataFrame`` description. + + >>> df.describe(include=[object]) # doctest: +SKIP + object + count 3 + unique 3 + top a + freq 1 + + Including only categorical columns from a ``DataFrame`` description. + + >>> df.describe(include=['category']) + categorical + count 3 + unique 3 + top d + freq 1 + + Excluding numeric columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[np.number]) # doctest: +SKIP + categorical object + count 3 3 + unique 3 3 + top f a + freq 1 1 + + Excluding object columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[object]) # doctest: +SKIP + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 + """ + return describe_ndframe( + obj=self, + include=include, + exclude=exclude, + datetime_is_numeric=datetime_is_numeric, + percentiles=percentiles, + ) + + @final + def pct_change( + self: NDFrameT, + periods=1, + fill_method="pad", + limit=None, + freq=None, + **kwargs, + ) -> NDFrameT: + """ + Percentage change between the current and a prior element. + + Computes the percentage change from the immediately previous row by + default. This is useful in comparing the percentage of change in a time + series of elements. + + Parameters + ---------- + periods : int, default 1 + Periods to shift for forming percent change. + fill_method : str, default 'pad' + How to handle NAs **before** computing percent changes. + limit : int, default None + The number of consecutive NAs to fill before stopping. + freq : DateOffset, timedelta, or str, optional + Increment to use from time series API (e.g. 'M' or BDay()). + **kwargs + Additional keyword arguments are passed into + `DataFrame.shift` or `Series.shift`. + + Returns + ------- + chg : Series or DataFrame + The same type as the calling object. + + See Also + -------- + Series.diff : Compute the difference of two elements in a Series. + DataFrame.diff : Compute the difference of two elements in a DataFrame. + Series.shift : Shift the index by some number of periods. + DataFrame.shift : Shift the index by some number of periods. + + Examples + -------- + **Series** + + >>> s = pd.Series([90, 91, 85]) + >>> s + 0 90 + 1 91 + 2 85 + dtype: int64 + + >>> s.pct_change() + 0 NaN + 1 0.011111 + 2 -0.065934 + dtype: float64 + + >>> s.pct_change(periods=2) + 0 NaN + 1 NaN + 2 -0.055556 + dtype: float64 + + See the percentage change in a Series where filling NAs with last + valid observation forward to next valid. + + >>> s = pd.Series([90, 91, None, 85]) + >>> s + 0 90.0 + 1 91.0 + 2 NaN + 3 85.0 + dtype: float64 + + >>> s.pct_change(fill_method='ffill') + 0 NaN + 1 0.011111 + 2 0.000000 + 3 -0.065934 + dtype: float64 + + **DataFrame** + + Percentage change in French franc, Deutsche Mark, and Italian lira from + 1980-01-01 to 1980-03-01. + + >>> df = pd.DataFrame({ + ... 'FR': [4.0405, 4.0963, 4.3149], + ... 'GR': [1.7246, 1.7482, 1.8519], + ... 'IT': [804.74, 810.01, 860.13]}, + ... index=['1980-01-01', '1980-02-01', '1980-03-01']) + >>> df + FR GR IT + 1980-01-01 4.0405 1.7246 804.74 + 1980-02-01 4.0963 1.7482 810.01 + 1980-03-01 4.3149 1.8519 860.13 + + >>> df.pct_change() + FR GR IT + 1980-01-01 NaN NaN NaN + 1980-02-01 0.013810 0.013684 0.006549 + 1980-03-01 0.053365 0.059318 0.061876 + + Percentage of change in GOOG and APPL stock volume. Shows computing + the percentage change between columns. + + >>> df = pd.DataFrame({ + ... '2016': [1769950, 30586265], + ... '2015': [1500923, 40912316], + ... '2014': [1371819, 41403351]}, + ... index=['GOOG', 'APPL']) + >>> df + 2016 2015 2014 + GOOG 1769950 1500923 1371819 + APPL 30586265 40912316 41403351 + + >>> df.pct_change(axis='columns', periods=-1) + 2016 2015 2014 + GOOG 0.179241 0.094112 NaN + APPL -0.252395 -0.011860 NaN + """ + axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) + if fill_method is None: + data = self + else: + _data = self.fillna(method=fill_method, axis=axis, limit=limit) + assert _data is not None # needed for mypy + data = _data + + shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) + # Unsupported left operand type for / ("NDFrameT") + rs = data / shifted - 1 # type: ignore[operator] + if freq is not None: + # Shift method is implemented differently when freq is not None + # We want to restore the original index + rs = rs.loc[~rs.index.duplicated()] + rs = rs.reindex_like(data) + return rs.__finalize__(self, method="pct_change") + + @final + def _agg_by_level( + self, + name: str, + axis: Axis = 0, + level: Level = 0, + skipna: bool_t = True, + **kwargs, + ): + if axis is None: + raise ValueError("Must specify 'axis' when aggregating by level.") + grouped = self.groupby(level=level, axis=axis, sort=False) + if hasattr(grouped, name) and skipna: + return getattr(grouped, name)(**kwargs) + axis = self._get_axis_number(axis) + method = getattr(type(self), name) + applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) + return grouped.aggregate(applyf) + + @final + def _logical_func( + self, + name: str, + func, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool_t: + nv.validate_logical_func((), kwargs, fname=name) + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.any(level=1) should use df.groupby(level=1).any()", + FutureWarning, + stacklevel=find_stack_level(), + ) + if bool_only is not None: + raise NotImplementedError( + "Option bool_only is not implemented with option level." + ) + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + + if self.ndim > 1 and axis is None: + # Reduce along one dimension then the other, to simplify DataFrame._reduce + res = self._logical_func( + name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs + ) + return res._logical_func(name, func, skipna=skipna, **kwargs) + + if ( + self.ndim > 1 + and axis == 1 + and len(self._mgr.arrays) > 1 + # TODO(EA2D): special-case not needed + and all(x.ndim == 2 for x in self._mgr.arrays) + and bool_only is not None + and not kwargs + ): + # Fastpath avoiding potentially expensive transpose + obj = self + if bool_only: + obj = self._get_bool_data() + return obj._reduce_axis1(name, func, skipna=skipna) + + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=bool_only, + filter_type="bool", + ) + + def any( + self, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> DataFrame | Series | bool_t: + return self._logical_func( + "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs + ) + + def all( + self, + axis: Axis = 0, + bool_only: bool_t | None = None, + skipna: bool_t = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool_t: + return self._logical_func( + "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs + ) + + @final + def _accum_func( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + *args, + **kwargs, + ): + skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + if axis == 1: + return self.T._accum_func( + name, func, axis=0, skipna=skipna, *args, **kwargs + ).T + + def block_accum_func(blk_values): + values = blk_values.T if hasattr(blk_values, "T") else blk_values + + result = nanops.na_accum_func(values, func, skipna=skipna) + + result = result.T if hasattr(result, "T") else result + return result + + result = self._mgr.apply(block_accum_func) + + return self._constructor(result).__finalize__(self, method=name) + + def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func( + "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs + ) + + def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func( + "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs + ) + + def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) + + def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): + return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) + + @final + def _stat_function_ddof( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + nv.validate_stat_ddof_func((), kwargs, fname=name) + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.var(level=1) should use df.groupby(level=1).var().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, ddof=ddof + ) + return self._reduce( + func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof + ) + + def sem( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def var( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def std( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ddof: int = 1, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function_ddof( + "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + @final + def _stat_function( + self, + name: str, + func, + axis: Axis | None | lib.NoDefault = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + if name == "median": + nv.validate_median((), kwargs) + else: + nv.validate_stat_func((), kwargs, fname=name) + + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + + if axis is None and level is None and self.ndim > 1: + # user must have explicitly passed axis=None + # GH#21597 + warnings.warn( + f"In a future version, DataFrame.{name}(axis=None) will return a " + f"scalar {name} over the entire DataFrame. To retain the old " + f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'", + FutureWarning, + stacklevel=find_stack_level(), + ) + if axis is lib.no_default: + axis = None + + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.median(level=1) should use df.groupby(level=1).median().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only + ) + return self._reduce( + func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only + ) + + def min( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + return self._stat_function( + "min", + nanops.nanmin, + axis, + skipna, + level, + numeric_only, + **kwargs, + ) + + def max( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ): + return self._stat_function( + "max", + nanops.nanmax, + axis, + skipna, + level, + numeric_only, + **kwargs, + ) + + def mean( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs + ) + + def median( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs + ) + + def skew( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs + ) + + def kurt( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + **kwargs, + ) -> Series | float: + return self._stat_function( + "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs + ) + + kurtosis = kurt + + @final + def _min_count_stat_function( + self, + name: str, + func, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count: int = 0, + **kwargs, + ): + if name == "sum": + nv.validate_sum((), kwargs) + elif name == "prod": + nv.validate_prod((), kwargs) + else: + nv.validate_stat_func((), kwargs, fname=name) + + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.sum(level=1) should use df.groupby(level=1).sum().", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level( + name, + axis=axis, + level=level, + skipna=skipna, + min_count=min_count, + numeric_only=numeric_only, + ) + + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + ) + + def sum( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count=0, + **kwargs, + ): + return self._min_count_stat_function( + "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + def prod( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + numeric_only: bool_t | None = None, + min_count: int = 0, + **kwargs, + ): + return self._min_count_stat_function( + "prod", + nanops.nanprod, + axis, + skipna, + level, + numeric_only, + min_count, + **kwargs, + ) + + product = prod + + def mad( + self, + axis: Axis | None = None, + skipna: bool_t = True, + level: Level | None = None, + ) -> Series | float: + """ + {desc} + + .. deprecated:: 1.5.0 + mad is deprecated. + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values when computing the result. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + Returns + ------- + {name1} or {name2} (if level specified)\ + {see_also}\ + {examples} + """ + msg = ( + "The 'mad' method is deprecated and will be removed in a future version. " + "To compute the same result, you may do `(df - df.mean()).abs().mean()`." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + if not is_bool(skipna): + warnings.warn( + "Passing None for skipna is deprecated and will raise in a future" + "version. Pass True instead. Only boolean values will be allowed " + "in the future.", + FutureWarning, + stacklevel=find_stack_level(), + ) + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. df.mad(level=1) should use df.groupby(level=1).mad()", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) + + data = self._get_numeric_data() + if axis == 0: + # error: Unsupported operand types for - ("NDFrame" and "float") + demeaned = data - data.mean(axis=0) # type: ignore[operator] + else: + demeaned = data.sub(data.mean(axis=1), axis=0) + return np.abs(demeaned).mean(axis=axis, skipna=skipna) + + @classmethod + def _add_numeric_operations(cls): + """ + Add the operations to the cls; evaluate the doc strings again + """ + axis_descr, name1, name2 = _doc_params(cls) + + @deprecate_nonkeyword_arguments( + version=None, + allowed_args=["self"], + name="DataFrame.any and Series.any", + ) + @doc( + _bool_doc, + desc=_any_desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also=_any_see_also, + examples=_any_examples, + empty_value=False, + ) + def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) + + setattr(cls, "any", any) + + @doc( + _bool_doc, + desc=_all_desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also=_all_see_also, + examples=_all_examples, + empty_value=True, + ) + def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) + + setattr(cls, "all", all) + + # error: Argument 1 to "doc" has incompatible type "Optional[str]"; expected + # "Union[str, Callable[..., Any]]" + @doc( + NDFrame.mad.__doc__, # type: ignore[arg-type] + desc="Return the mean absolute deviation of the values " + "over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also="", + examples="", + ) + def mad(self, axis=None, skipna=True, level=None): + return NDFrame.mad(self, axis, skipna, level) + + setattr(cls, "mad", mad) + + @doc( + _num_ddof_doc, + desc="Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes="", + examples="", + ) + def sem( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "sem", sem) + + @doc( + _num_ddof_doc, + desc="Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes="", + examples=_var_examples, + ) + def var( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "var", var) + + @doc( + _num_ddof_doc, + desc="Return sample standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + notes=_std_notes, + examples=_std_examples, + ) + def std( + self, + axis=None, + skipna=True, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + setattr(cls, "std", std) + + @doc( + _cnum_doc, + desc="minimum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="min", + examples=_cummin_examples, + ) + def cummin(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cummin", cummin) + + @doc( + _cnum_doc, + desc="maximum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="max", + examples=_cummax_examples, + ) + def cummax(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cummax", cummax) + + @doc( + _cnum_doc, + desc="sum", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="sum", + examples=_cumsum_examples, + ) + def cumsum(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cumsum", cumsum) + + @doc( + _cnum_doc, + desc="product", + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name="prod", + examples=_cumprod_examples, + ) + def cumprod(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + + setattr(cls, "cumprod", cumprod) + + @doc( + _num_doc, + desc="Return the sum of the values over the requested axis.\n\n" + "This is equivalent to the method ``numpy.sum``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count=_min_count_stub, + see_also=_stat_func_see_also, + examples=_sum_examples, + ) + def sum( + self, + axis=None, + skipna=True, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.sum( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + setattr(cls, "sum", sum) + + @doc( + _num_doc, + desc="Return the product of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count=_min_count_stub, + see_also=_stat_func_see_also, + examples=_prod_examples, + ) + def prod( + self, + axis=None, + skipna=True, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.prod( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + setattr(cls, "prod", prod) + cls.product = prod + + @doc( + _num_doc, + desc="Return the mean of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def mean( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "mean", mean) + + @doc( + _num_doc, + desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def skew( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "skew", skew) + + @doc( + _num_doc, + desc="Return unbiased kurtosis over requested axis.\n\n" + "Kurtosis obtained using Fisher's definition of\n" + "kurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def kurt( + self, + axis: Axis | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "kurt", kurt) + cls.kurtosis = kurt + + @doc( + _num_doc, + desc="Return the median of the values over the requested axis.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def median( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "median", median) + + @doc( + _num_doc, + desc="Return the maximum of the values over the requested axis.\n\n" + "If you want the *index* of the maximum, use ``idxmax``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmax``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also=_stat_func_see_also, + examples=_max_examples, + ) + def max( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "max", max) + + @doc( + _num_doc, + desc="Return the minimum of the values over the requested axis.\n\n" + "If you want the *index* of the minimum, use ``idxmin``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmin``.", + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also=_stat_func_see_also, + examples=_min_examples, + ) + def min( + self, + axis: int | None | lib.NoDefault = lib.no_default, + skipna=True, + level=None, + numeric_only=None, + **kwargs, + ): + return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) + + setattr(cls, "min", min) + + @final + @doc(Rolling) + def rolling( + self, + window: int | timedelta | BaseOffset | BaseIndexer, + min_periods: int | None = None, + center: bool_t = False, + win_type: str | None = None, + on: str | None = None, + axis: Axis = 0, + closed: str | None = None, + step: int | None = None, + method: str = "single", + ) -> Window | Rolling: + axis = self._get_axis_number(axis) + + if win_type is not None: + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + + @final + @doc(Expanding) + def expanding( + self, + min_periods: int = 1, + center: bool_t | None = None, + axis: Axis = 0, + method: str = "single", + ) -> Expanding: + axis = self._get_axis_number(axis) + if center is not None: + warnings.warn( + "The `center` argument on `expanding` will be removed in the future.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + center = False + + return Expanding( + self, min_periods=min_periods, center=center, axis=axis, method=method + ) + + @final + @doc(ExponentialMovingWindow) + def ewm( + self, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool_t = True, + ignore_na: bool_t = False, + axis: Axis = 0, + times: str | np.ndarray | DataFrame | Series | None = None, + method: str = "single", + ) -> ExponentialMovingWindow: + axis = self._get_axis_number(axis) + return ExponentialMovingWindow( + self, + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + method=method, + ) + + # ---------------------------------------------------------------------- + # Arithmetic Methods + + @final + def _inplace_method(self, other, op): + """ + Wrap arithmetic method to operate inplace. + """ + result = op(self, other) + + if ( + self.ndim == 1 + and result._indexed_same(self) + and is_dtype_equal(result.dtype, self.dtype) + ): + # GH#36498 this inplace op can _actually_ be inplace. + self._values[:] = result._values + return self + + # Delete cacher + self._reset_cacher() + + # this makes sure that we are aligned like the input + # we are updating inplace so we want to ignore is_copy + self._update_inplace( + result.reindex_like(self, copy=False), verify_is_copy=False + ) + return self + + def __iadd__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for + ("Type[NDFrame]") + return self._inplace_method(other, type(self).__add__) # type: ignore[operator] + + def __isub__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for - ("Type[NDFrame]") + return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] + + def __imul__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for * ("Type[NDFrame]") + return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] + + def __itruediv__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for / ("Type[NDFrame]") + return self._inplace_method( + other, type(self).__truediv__ # type: ignore[operator] + ) + + def __ifloordiv__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for // ("Type[NDFrame]") + return self._inplace_method( + other, type(self).__floordiv__ # type: ignore[operator] + ) + + def __imod__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for % ("Type[NDFrame]") + return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] + + def __ipow__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for ** ("Type[NDFrame]") + return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] + + def __iand__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for & ("Type[NDFrame]") + return self._inplace_method(other, type(self).__and__) # type: ignore[operator] + + def __ior__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for | ("Type[NDFrame]") + return self._inplace_method(other, type(self).__or__) # type: ignore[operator] + + def __ixor__(self: NDFrameT, other) -> NDFrameT: + # error: Unsupported left operand type for ^ ("Type[NDFrame]") + return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] + + # ---------------------------------------------------------------------- + # Misc methods + + @final + def _find_valid_index(self, *, how: str) -> Hashable | None: + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + idx_first_valid : type of index + """ + idxpos = find_valid_index(self._values, how=how) + if idxpos is None: + return None + return self.index[idxpos] + + @final + @doc(position="first", klass=_shared_doc_kwargs["klass"]) + def first_valid_index(self) -> Hashable | None: + """ + Return index for {position} non-NA value or None, if no non-NA value is found. + + Returns + ------- + scalar : type of index + + Notes + ----- + If all elements are non-NA/null, returns None. + Also returns None for empty {klass}. + """ + return self._find_valid_index(how="first") + + @final + @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) + def last_valid_index(self) -> Hashable | None: + return self._find_valid_index(how="last") + + +def _doc_params(cls): + """Return a tuple of the doc params.""" + axis_descr = ( + f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" + ) + name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" + name2 = cls.__name__ + return axis_descr, name, name2 + + +_num_doc = """ +{desc} + +Parameters +---------- +axis : {axis_descr} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. +skipna : bool, default True + Exclude NA/null values when computing the result. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + .. deprecated:: 1.3.0 + The level keyword is deprecated. Use groupby instead. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. + + .. deprecated:: 1.5.0 + Specifying ``numeric_only=None`` is deprecated. The default value will be + ``False`` in a future version of pandas. + +{min_count}\ +**kwargs + Additional keyword arguments to be passed to the function. + +Returns +------- +{name1} or {name2} (if level specified)\ +{see_also}\ +{examples} +""" + +_num_ddof_doc = """ +{desc} + +Parameters +---------- +axis : {axis_descr} + For `Series` this parameter is unused and defaults to 0. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + .. deprecated:: 1.3.0 + The level keyword is deprecated. Use groupby instead. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. + + .. deprecated:: 1.5.0 + Specifying ``numeric_only=None`` is deprecated. The default value will be + ``False`` in a future version of pandas. + +Returns +------- +{name1} or {name2} (if level specified) \ +{notes}\ +{examples} +""" + +_std_notes = """ + +Notes +----- +To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the +default `ddof=1`)""" + +_std_examples = """ + +Examples +-------- +>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], +... 'age': [21, 25, 62, 43], +... 'height': [1.61, 1.87, 1.49, 2.01]} +... ).set_index('person_id') +>>> df + age height +person_id +0 21 1.61 +1 25 1.87 +2 62 1.49 +3 43 2.01 + +The standard deviation of the columns can be found as follows: + +>>> df.std() +age 18.786076 +height 0.237417 + +Alternatively, `ddof=0` can be set to normalize by N instead of N-1: + +>>> df.std(ddof=0) +age 16.269219 +height 0.205609""" + +_var_examples = """ + +Examples +-------- +>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], +... 'age': [21, 25, 62, 43], +... 'height': [1.61, 1.87, 1.49, 2.01]} +... ).set_index('person_id') +>>> df + age height +person_id +0 21 1.61 +1 25 1.87 +2 62 1.49 +3 43 2.01 + +>>> df.var() +age 352.916667 +height 0.056367 + +Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: + +>>> df.var(ddof=0) +age 264.687500 +height 0.042275""" + +_bool_doc = """ +{desc} + +Parameters +---------- +axis : {{0 or 'index', 1 or 'columns', None}}, default 0 + Indicate which axis or axes should be reduced. For `Series` this parameter + is unused and defaults to 0. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + +bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be {empty_value}, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + .. deprecated:: 1.3.0 + The level keyword is deprecated. Use groupby instead. +**kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +{name1} or {name2} + If level is specified, then, {name2} is returned; otherwise, {name1} + is returned. + +{see_also} +{examples}""" + +_all_desc = """\ +Return whether all elements are True, potentially over an axis. + +Returns True unless there at least one element within a series or +along a Dataframe axis that is False or equivalent (e.g. zero or +empty).""" + +_all_examples = """\ +Examples +-------- +**Series** + +>>> pd.Series([True, True]).all() +True +>>> pd.Series([True, False]).all() +False +>>> pd.Series([], dtype="float64").all() +True +>>> pd.Series([np.nan]).all() +True +>>> pd.Series([np.nan]).all(skipna=False) +True + +**DataFrames** + +Create a dataframe from a dictionary. + +>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) +>>> df + col1 col2 +0 True True +1 True False + +Default behaviour checks if values in each column all return True. + +>>> df.all() +col1 True +col2 False +dtype: bool + +Specify ``axis='columns'`` to check if values in each row all return True. + +>>> df.all(axis='columns') +0 True +1 False +dtype: bool + +Or ``axis=None`` for whether every value is True. + +>>> df.all(axis=None) +False +""" + +_all_see_also = """\ +See Also +-------- +Series.all : Return True if all elements are True. +DataFrame.any : Return True if one (or more) elements are True. +""" + +_cnum_doc = """ +Return cumulative {desc} over a DataFrame or Series axis. + +Returns a DataFrame or Series of the same size containing the cumulative +{desc}. + +Parameters +---------- +axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +*args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +{name1} or {name2} + Return cumulative {desc} of {name1} or {name2}. + +See Also +-------- +core.window.expanding.Expanding.{accum_func_name} : Similar functionality + but ignores ``NaN`` values. +{name2}.{accum_func_name} : Return the {desc} over + {name2} axis. +{name2}.cummax : Return cumulative maximum over {name2} axis. +{name2}.cummin : Return cumulative minimum over {name2} axis. +{name2}.cumsum : Return cumulative sum over {name2} axis. +{name2}.cumprod : Return cumulative product over {name2} axis. + +{examples}""" + +_cummin_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummin() +0 2.0 +1 NaN +2 2.0 +3 -1.0 +4 -1.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummin(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the minimum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummin() + A B +0 2.0 1.0 +1 2.0 NaN +2 1.0 0.0 + +To iterate over columns and find the minimum in each row, +use ``axis=1`` + +>>> df.cummin(axis=1) + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cumsum_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumsum() +0 2.0 +1 NaN +2 7.0 +3 6.0 +4 6.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumsum(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the sum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumsum() + A B +0 2.0 1.0 +1 5.0 NaN +2 6.0 1.0 + +To iterate over columns and find the sum in each row, +use ``axis=1`` + +>>> df.cumsum(axis=1) + A B +0 2.0 3.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_cumprod_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumprod() +0 2.0 +1 NaN +2 10.0 +3 -10.0 +4 -0.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumprod(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the product +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumprod() + A B +0 2.0 1.0 +1 6.0 NaN +2 6.0 0.0 + +To iterate over columns and find the product in each row, +use ``axis=1`` + +>>> df.cumprod(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cummax_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummax() +0 2.0 +1 NaN +2 5.0 +3 5.0 +4 5.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummax(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the maximum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummax() + A B +0 2.0 1.0 +1 3.0 NaN +2 3.0 1.0 + +To iterate over columns and find the maximum in each row, +use ``axis=1`` + +>>> df.cummax(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_any_see_also = """\ +See Also +-------- +numpy.any : Numpy version of this method. +Series.any : Return whether any element is True. +Series.all : Return whether all elements are True. +DataFrame.any : Return whether any element is True over requested axis. +DataFrame.all : Return whether all elements are True over requested axis. +""" + +_any_desc = """\ +Return whether any element is True, potentially over an axis. + +Returns False unless there is at least one element within a series or +along a Dataframe axis that is True or equivalent (e.g. non-zero or +non-empty).""" + +_any_examples = """\ +Examples +-------- +**Series** + +For Series input, the output is a scalar indicating whether any element +is True. + +>>> pd.Series([False, False]).any() +False +>>> pd.Series([True, False]).any() +True +>>> pd.Series([], dtype="float64").any() +False +>>> pd.Series([np.nan]).any() +False +>>> pd.Series([np.nan]).any(skipna=False) +True + +**DataFrame** + +Whether each column contains at least one True element (the default). + +>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) +>>> df + A B C +0 1 0 0 +1 2 2 0 + +>>> df.any() +A True +B True +C False +dtype: bool + +Aggregating over the columns. + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) +>>> df + A B +0 True 1 +1 False 2 + +>>> df.any(axis='columns') +0 True +1 True +dtype: bool + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) +>>> df + A B +0 True 1 +1 False 0 + +>>> df.any(axis='columns') +0 True +1 False +dtype: bool + +Aggregating over the entire DataFrame with ``axis=None``. + +>>> df.any(axis=None) +True + +`any` for an empty DataFrame is an empty Series. + +>>> pd.DataFrame([]).any() +Series([], dtype: bool) +""" + +_shared_docs[ + "stat_func_example" +] = """ + +Examples +-------- +>>> idx = pd.MultiIndex.from_arrays([ +... ['warm', 'warm', 'cold', 'cold'], +... ['dog', 'falcon', 'fish', 'spider']], +... names=['blooded', 'animal']) +>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) +>>> s +blooded animal +warm dog 4 + falcon 2 +cold fish 0 + spider 8 +Name: legs, dtype: int64 + +>>> s.{stat_func}() +{default_output}""" + +_sum_examples = _shared_docs["stat_func_example"].format( + stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 +) + +_sum_examples += """ + +By default, the sum of an empty or all-NA Series is ``0``. + +>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default +0.0 + +This can be controlled with the ``min_count`` parameter. For example, if +you'd like the sum of an empty series to be NaN, pass ``min_count=1``. + +>>> pd.Series([], dtype="float64").sum(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).sum() +0.0 + +>>> pd.Series([np.nan]).sum(min_count=1) +nan""" + +_max_examples: str = _shared_docs["stat_func_example"].format( + stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 +) + +_min_examples: str = _shared_docs["stat_func_example"].format( + stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 +) + +_stat_func_see_also = """ + +See Also +-------- +Series.sum : Return the sum. +Series.min : Return the minimum. +Series.max : Return the maximum. +Series.idxmin : Return the index of the minimum. +Series.idxmax : Return the index of the maximum. +DataFrame.sum : Return the sum over the requested axis. +DataFrame.min : Return the minimum over the requested axis. +DataFrame.max : Return the maximum over the requested axis. +DataFrame.idxmin : Return the index of the minimum over the requested axis. +DataFrame.idxmax : Return the index of the maximum over the requested axis.""" + +_prod_examples = """ + +Examples +-------- +By default, the product of an empty or all-NA Series is ``1`` + +>>> pd.Series([], dtype="float64").prod() +1.0 + +This can be controlled with the ``min_count`` parameter + +>>> pd.Series([], dtype="float64").prod(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).prod() +1.0 + +>>> pd.Series([np.nan]).prod(min_count=1) +nan""" + +_min_count_stub = """\ +min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. +""" + + +def _align_as_utc( + left: NDFrameT, right: NDFrameT, join_index: Index | None +) -> tuple[NDFrameT, NDFrameT]: + """ + If we are aligning timezone-aware DatetimeIndexes and the timezones + do not match, convert both to UTC. + """ + if is_datetime64tz_dtype(left.index.dtype): + if left.index.tz != right.index.tz: + if join_index is not None: + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index + + return left, right diff --git a/pandas/core/groupby/__init__.py b/pandas/core/groupby/__init__.py new file mode 100644 index 00000000..8248f378 --- /dev/null +++ b/pandas/core/groupby/__init__.py @@ -0,0 +1,15 @@ +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + NamedAgg, + SeriesGroupBy, +) +from pandas.core.groupby.groupby import GroupBy +from pandas.core.groupby.grouper import Grouper + +__all__ = [ + "DataFrameGroupBy", + "NamedAgg", + "SeriesGroupBy", + "GroupBy", + "Grouper", +] diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py new file mode 100644 index 00000000..ad1f36e0 --- /dev/null +++ b/pandas/core/groupby/base.py @@ -0,0 +1,166 @@ +""" +Provide basic components for groupby. These definitions +hold the allowlist of methods that are exposed on the +SeriesGroupBy and the DataFrameGroupBy objects. +""" +from __future__ import annotations + +import dataclasses +from typing import ( + Hashable, + Literal, +) + + +@dataclasses.dataclass(order=True, frozen=True) +class OutputKey: + label: Hashable + position: int + + +# special case to prevent duplicate plots when catching exceptions when +# forwarding methods from NDFrames +plotting_methods = frozenset(["plot", "hist"]) + +common_apply_allowlist = ( + frozenset( + [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "corr", + "cov", + "diff", + ] + ) + | plotting_methods +) + +series_apply_allowlist: frozenset[str] = ( + common_apply_allowlist + | frozenset( + {"nlargest", "nsmallest", "is_monotonic_increasing", "is_monotonic_decreasing"} + ) +) | frozenset(["dtype", "unique"]) + +dataframe_apply_allowlist: frozenset[str] = common_apply_allowlist | frozenset( + ["dtypes", "corrwith"] +) + +# cythonized transformations or canned "agg+broadcast", which do not +# require postprocessing of the result by transform. +cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"]) + +# List of aggregation/reduction functions. +# These map each group to a single numeric value +reduction_kernels = frozenset( + [ + "all", + "any", + "corrwith", + "count", + "first", + "idxmax", + "idxmin", + "last", + "mad", + "max", + "mean", + "median", + "min", + "nth", + "nunique", + "prod", + # as long as `quantile`'s signature accepts only + # a single quantile value, it's a reduction. + # GH#27526 might change that. + "quantile", + "sem", + "size", + "skew", + "std", + "sum", + "var", + ] +) + +# List of transformation functions. +# a transformation is a function that, for each group, +# produces a result that has the same shape as the group. + + +# TODO(2.0) Remove after pad/backfill deprecation enforced +def maybe_normalize_deprecated_kernels(kernel) -> Literal["bfill", "ffill"]: + if kernel == "backfill": + kernel = "bfill" + elif kernel == "pad": + kernel = "ffill" + return kernel + + +transformation_kernels = frozenset( + [ + "backfill", + "bfill", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "diff", + "ffill", + "fillna", + "ngroup", + "pad", + "pct_change", + "rank", + "shift", + "tshift", + ] +) + +# these are all the public methods on Grouper which don't belong +# in either of the above lists +groupby_other_methods = frozenset( + [ + "agg", + "aggregate", + "apply", + "boxplot", + # corr and cov return ngroups*ncolumns rows, so they + # are neither a transformation nor a reduction + "corr", + "cov", + "describe", + "dtypes", + "expanding", + "ewm", + "filter", + "get_group", + "groups", + "head", + "hist", + "indices", + "ndim", + "ngroups", + "ohlc", + "pipe", + "plot", + "resample", + "rolling", + "tail", + "take", + "transform", + "sample", + "value_counts", + ] +) +# Valid values of `name` for `groupby.transform(name)` +# NOTE: do NOT edit this directly. New additions should be inserted +# into the appropriate list above. +transform_kernel_allowlist = reduction_kernels | transformation_kernels diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py new file mode 100644 index 00000000..a9ad2401 --- /dev/null +++ b/pandas/core/groupby/categorical.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas.core.algorithms import unique1d +from pandas.core.arrays.categorical import ( + Categorical, + CategoricalDtype, + recode_for_categories, +) + +if TYPE_CHECKING: + from pandas.core.indexes.api import CategoricalIndex + + +def recode_for_groupby( + c: Categorical, sort: bool, observed: bool +) -> tuple[Categorical, Categorical | None]: + """ + Code the categories to ensure we can groupby for categoricals. + + If observed=True, we return a new Categorical with the observed + categories only. + + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + c : Categorical + sort : bool + The value of the sort parameter groupby was called with. + observed : bool + Account only for the observed values + + Returns + ------- + Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + Categorical or None + If we are observed, return the original categorical, otherwise None + """ + # we only care about observed values + if observed: + # In cases with c.ordered, this is equivalent to + # return c.remove_unused_categories(), c + + unique_codes = unique1d(c.codes) + + take_codes = unique_codes[unique_codes != -1] + if c.ordered: + take_codes = np.sort(take_codes) + + # we recode according to the uniques + categories = c.categories.take(take_codes) + codes = recode_for_categories(c.codes, c.categories, categories) + + # return a new categorical that maps our new codes + # and categories + dtype = CategoricalDtype(categories, ordered=c.ordered) + return Categorical(codes, dtype=dtype, fastpath=True), c + + # Already sorted according to c.categories; all is fine + if sort: + return c, None + + # sort=False should order groups in as-encountered order (GH-8868) + cat = c.unique() + + # See GH-38140 for block below + # exclude nan from indexer for categories + take_codes = cat.codes[cat.codes != -1] + if cat.ordered: + take_codes = np.sort(take_codes) + cat = cat.set_categories(cat.categories.take(take_codes)) + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)]) + + return c.reorder_categories(cat.categories), None + + +def recode_from_groupby( + c: Categorical, sort: bool, ci: CategoricalIndex +) -> CategoricalIndex: + """ + Reverse the codes_to_groupby to account for sort / observed. + + Parameters + ---------- + c : Categorical + sort : bool + The value of the sort parameter groupby was called with. + ci : CategoricalIndex + The codes / categories to recode + + Returns + ------- + CategoricalIndex + """ + # we re-order to the original category orderings + if sort: + # error: "CategoricalIndex" has no attribute "set_categories" + return ci.set_categories(c.categories) # type: ignore[attr-defined] + + # we are not sorting, so add unobserved to the end + new_cats = c.categories[~c.categories.isin(ci.categories)] + # error: "CategoricalIndex" has no attribute "add_categories" + return ci.add_categories(new_cats) # type: ignore[attr-defined] diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py new file mode 100644 index 00000000..7e6e138f --- /dev/null +++ b/pandas/core/groupby/generic.py @@ -0,0 +1,1942 @@ +""" +Define the SeriesGroupBy and DataFrameGroupBy +classes that hold the groupby interfaces (and some implementations). + +These are user facing as the result of the ``df.groupby(...)`` operations, +which here returns a DataFrameGroupBy object. +""" +from __future__ import annotations + +from collections import abc +from functools import partial +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Mapping, + NamedTuple, + Sequence, + TypeVar, + Union, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Interval, + lib, + reduction as libreduction, +) +from pandas._typing import ( + ArrayLike, + Manager, + Manager2D, + SingleManager, +) +from pandas.errors import SpecificationError +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_int64, + is_bool, + is_categorical_dtype, + is_dict_like, + is_integer_dtype, + is_interval_dtype, + is_scalar, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import ( + algorithms, + nanops, +) +from pandas.core.apply import ( + GroupByApply, + maybe_mangle_lambdas, + reconstruct_func, + validate_func_kwargs, +) +from pandas.core.arrays.categorical import Categorical +import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.frame import DataFrame +from pandas.core.groupby import base +from pandas.core.groupby.groupby import ( + GroupBy, + _agg_template, + _apply_docs, + _transform_template, + warn_dropping_nuisance_columns_deprecated, +) +from pandas.core.groupby.grouper import get_grouper +from pandas.core.indexes.api import ( + Index, + MultiIndex, + all_indexes_same, +) +from pandas.core.indexes.category import CategoricalIndex +from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs +from pandas.core.util.numba_ import maybe_use_numba + +from pandas.plotting import boxplot_frame_groupby + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + +# TODO(typing) the return value on this callable should be any *scalar*. +AggScalar = Union[str, Callable[..., Any]] +# TODO: validate types on ScalarResult and move to _typing +# Blocked from using by https://github.com/python/mypy/issues/1484 +# See note at _mangle_lambda_list +ScalarResult = TypeVar("ScalarResult") + + +class NamedAgg(NamedTuple): + column: Hashable + aggfunc: AggScalar + + +def generate_property(name: str, klass: type[DataFrame | Series]): + """ + Create a property for a GroupBy subclass to dispatch to DataFrame/Series. + + Parameters + ---------- + name : str + klass : {DataFrame, Series} + + Returns + ------- + property + """ + + def prop(self): + return self._make_wrapper(name) + + parent_method = getattr(klass, name) + prop.__doc__ = parent_method.__doc__ or "" + prop.__name__ = name + return property(prop) + + +def pin_allowlisted_properties( + klass: type[DataFrame | Series], allowlist: frozenset[str] +): + """ + Create GroupBy member defs for DataFrame/Series names in a allowlist. + + Parameters + ---------- + klass : DataFrame or Series class + class where members are defined. + allowlist : frozenset[str] + Set of names of klass methods to be constructed + + Returns + ------- + class decorator + + Notes + ----- + Since we don't want to override methods explicitly defined in the + base class, any such name is skipped. + """ + + def pinner(cls): + for name in allowlist: + if hasattr(cls, name): + # don't override anything that was explicitly defined + # in the base class + continue + + prop = generate_property(name, klass) + setattr(cls, name, prop) + + return cls + + return pinner + + +@pin_allowlisted_properties(Series, base.series_apply_allowlist) +class SeriesGroupBy(GroupBy[Series]): + _apply_allowlist = base.series_apply_allowlist + + def _wrap_agged_manager(self, mgr: Manager) -> Series: + if mgr.ndim == 1: + mgr = cast(SingleManager, mgr) + single = mgr + else: + mgr = cast(Manager2D, mgr) + single = mgr.iget(0) + ser = self.obj._constructor(single, name=self.obj.name) + # NB: caller is responsible for setting ser.index + return ser + + def _get_data_to_aggregate(self) -> SingleManager: + ser = self._obj_with_exclusions + single = ser._mgr + return single + + def _iterate_slices(self) -> Iterable[Series]: + yield self._selected_obj + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).min() + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg('min') + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max']) + min max + 1 1 2 + 2 3 4 + + The output column names can be controlled by passing + the desired column names and aggregations as keyword arguments. + + >>> s.groupby([1, 1, 2, 2]).agg( + ... minimum='min', + ... maximum='max', + ... ) + minimum maximum + 1 1 2 + 2 3 4 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the aggregating function. + + >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min()) + 1 1.0 + 2 3.0 + dtype: float64 + """ + ) + + @Appender( + _apply_docs["template"].format( + input="series", examples=_apply_docs["series_examples"] + ) + ) + def apply(self, func, *args, **kwargs) -> Series: + return super().apply(func, *args, **kwargs) + + @doc(_agg_template, examples=_agg_examples_doc, klass="Series") + def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + with self._group_selection_context(): + data = self._selected_obj + result = self._aggregate_with_numba( + data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + index = self.grouper.result_index + return self.obj._constructor(result.ravel(), index=index, name=data.name) + + relabeling = func is None + columns = None + if relabeling: + columns, func = validate_func_kwargs(kwargs) + kwargs = {} + + if isinstance(func, str): + return getattr(self, func)(*args, **kwargs) + + elif isinstance(func, abc.Iterable): + # Catch instances of lists / tuples + # but not the class list / tuple itself. + func = maybe_mangle_lambdas(func) + ret = self._aggregate_multiple_funcs(func) + if relabeling: + # columns is not narrowed by mypy from relabeling flag + assert columns is not None # for mypy + ret.columns = columns + return ret + + else: + cyfunc = com.get_cython_func(func) + if cyfunc and not args and not kwargs: + return getattr(self, cyfunc)() + + if self.grouper.nkeys > 1: + return self._python_agg_general(func, *args, **kwargs) + + try: + return self._python_agg_general(func, *args, **kwargs) + except KeyError: + # TODO: KeyError is raised in _python_agg_general, + # see test_groupby.test_basic + result = self._aggregate_named(func, *args, **kwargs) + + # result is a dict whose keys are the elements of result_index + index = self.grouper.result_index + return create_series_with_explicit_dtype( + result, index=index, dtype_if_empty=object + ) + + agg = aggregate + + def _aggregate_multiple_funcs(self, arg) -> DataFrame: + if isinstance(arg, dict): + + # show the deprecation, but only if we + # have not shown a higher level one + # GH 15931 + raise SpecificationError("nested renamer is not supported") + + elif any(isinstance(x, (tuple, list)) for x in arg): + arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] + + # indicated column order + columns = next(zip(*arg)) + else: + # list of functions / function names + columns = [] + for f in arg: + columns.append(com.get_callable_name(f) or f) + + arg = zip(columns, arg) + + results: dict[base.OutputKey, DataFrame | Series] = {} + for idx, (name, func) in enumerate(arg): + + key = base.OutputKey(label=name, position=idx) + results[key] = self.aggregate(func) + + if any(isinstance(x, DataFrame) for x in results.values()): + from pandas import concat + + res_df = concat( + results.values(), axis=1, keys=[key.label for key in results.keys()] + ) + return res_df + + indexed_output = {key.position: val for key, val in results.items()} + output = self.obj._constructor_expanddim(indexed_output, index=None) + output.columns = Index(key.label for key in results) + + output = self._reindex_output(output) + return output + + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> Series: + """ + Wrap the dict result of a GroupBy aggregation into a Series. + """ + assert len(output) == 1 + values = next(iter(output.values())) + result = self.obj._constructor(values) + result.name = self.obj.name + return result + + def _wrap_applied_output( + self, + data: Series, + values: list[Any], + not_indexed_same: bool = False, + override_group_keys: bool = False, + ) -> DataFrame | Series: + """ + Wrap the output of SeriesGroupBy.apply into the expected result. + + Parameters + ---------- + data : Series + Input data for groupby operation. + values : List[Any] + Applied output for each group. + not_indexed_same : bool, default False + Whether the applied outputs are not indexed the same as the group axes. + + Returns + ------- + DataFrame or Series + """ + if len(values) == 0: + # GH #6265 + return self.obj._constructor( + [], + name=self.obj.name, + index=self.grouper.result_index, + dtype=data.dtype, + ) + assert values is not None + + if isinstance(values[0], dict): + # GH #823 #24880 + index = self.grouper.result_index + res_df = self.obj._constructor_expanddim(values, index=index) + res_df = self._reindex_output(res_df) + # if self.observed is False, + # keep all-NaN rows created while re-indexing + res_ser = res_df.stack(dropna=self.observed) + res_ser.name = self.obj.name + return res_ser + elif isinstance(values[0], (Series, DataFrame)): + result = self._concat_objects( + values, + not_indexed_same=not_indexed_same, + override_group_keys=override_group_keys, + ) + if isinstance(result, Series): + result.name = self.obj.name + return result + else: + # GH #6265 #24880 + result = self.obj._constructor( + data=values, index=self.grouper.result_index, name=self.obj.name + ) + return self._reindex_output(result) + + def _aggregate_named(self, func, *args, **kwargs): + # Note: this is very similar to _aggregate_series_pure_python, + # but that does not pin group.name + result = {} + initialized = False + + for name, group in self: + object.__setattr__(group, "name", name) + + output = func(group, *args, **kwargs) + output = libreduction.extract_result(output) + if not initialized: + # We only do this validation on the first iteration + libreduction.check_result_array(output, group.dtype) + initialized = True + result[name] = output + + return result + + @Substitution(klass="Series") + @Appender(_transform_template) + def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + return self._transform( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ): + assert axis == 0 # handled by caller + + obj = self._selected_obj + + try: + result = self.grouper._cython_operation( + "transform", obj._values, how, axis, **kwargs + ) + except NotImplementedError as err: + raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err + + return obj._constructor(result, index=self.obj.index, name=obj.name) + + def _transform_general(self, func: Callable, *args, **kwargs) -> Series: + """ + Transform with a callable func`. + """ + assert callable(func) + klass = type(self.obj) + + results = [] + for name, group in self.grouper.get_iterator( + self._selected_obj, axis=self.axis + ): + # this setattr is needed for test_transform_lambda_with_datetimetz + object.__setattr__(group, "name", name) + res = func(group, *args, **kwargs) + + results.append(klass(res, index=group.index)) + + # check for empty "results" to avoid concat ValueError + if results: + from pandas.core.reshape.concat import concat + + concatenated = concat(results) + result = self._set_result_index_ordered(concatenated) + else: + result = self.obj._constructor(dtype=np.float64) + + result.name = self.obj.name + return result + + def filter(self, func, dropna: bool = True, *args, **kwargs): + """ + Return a copy of a Series excluding elements from groups that + do not satisfy the boolean criterion specified by func. + + Parameters + ---------- + func : function + To apply to each group. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + if False, groups that evaluate False are filled with NaNs. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + 1 2 + 3 4 + 5 6 + Name: B, dtype: int64 + + Returns + ------- + filtered : Series + """ + if isinstance(func, str): + wrapper = lambda x: getattr(x, func)(*args, **kwargs) + else: + wrapper = lambda x: func(x, *args, **kwargs) + + # Interpret np.nan as False. + def true_and_notna(x) -> bool: + b = wrapper(x) + return b and notna(b) + + try: + indices = [ + self._get_index(name) for name, group in self if true_and_notna(group) + ] + except (ValueError, TypeError) as err: + raise TypeError("the filter must return a boolean result") from err + + filtered = self._apply_filter(indices, dropna) + return filtered + + def nunique(self, dropna: bool = True) -> Series: + """ + Return number of unique elements in the group. + + Returns + ------- + Series + Number of unique values within each group. + """ + ids, _, _ = self.grouper.group_info + + val = self.obj._values + + codes, _ = algorithms.factorize(val, sort=False) + sorter = np.lexsort((codes, ids)) + codes = codes[sorter] + ids = ids[sorter] + + # group boundaries are where group ids change + # unique observations are where sorted values change + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + inc = np.r_[1, codes[1:] != codes[:-1]] + + # 1st item of each group is a new unique observation + mask = codes == -1 + if dropna: + inc[idx] = 1 + inc[mask] = 0 + else: + inc[mask & np.r_[False, mask[:-1]]] = 0 + inc[idx] = 1 + + out = np.add.reduceat(inc, idx).astype("int64", copy=False) + if len(ids): + # NaN/NaT group exists if the head of ids is -1, + # so remove it from res and exclude its index from idx + if ids[0] == -1: + res = out[1:] + idx = idx[np.flatnonzero(idx)] + else: + res = out + else: + res = out[1:] + ri = self.grouper.result_index + + # we might have duplications among the bins + if len(res) != len(ri): + res, out = np.zeros(len(ri), dtype=out.dtype), res + res[ids[idx]] = out + + result = self.obj._constructor(res, index=ri, name=self.obj.name) + return self._reindex_output(result, fill_value=0) + + @doc(Series.describe) + def describe(self, **kwargs): + return super().describe(**kwargs) + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ) -> Series: + + from pandas.core.reshape.merge import get_join_indexers + from pandas.core.reshape.tile import cut + + ids, _, _ = self.grouper.group_info + val = self.obj._values + + names = self.grouper.names + [self.obj.name] + + if is_categorical_dtype(val.dtype) or ( + bins is not None and not np.iterable(bins) + ): + # scalar bins cannot be done at top level + # in a backward compatible way + # GH38672 relates to categorical dtype + ser = self.apply( + Series.value_counts, + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + ) + ser.index.names = names + return ser + + # groupby removes null keys from groupings + mask = ids != -1 + ids, val = ids[mask], val[mask] + + if bins is None: + lab, lev = algorithms.factorize(val, sort=True) + llab = lambda lab, inc: lab[inc] + else: + + # lab is a Categorical with categories an IntervalIndex + lab = cut(Series(val), bins, include_lowest=True) + # error: "ndarray" has no attribute "cat" + lev = lab.cat.categories # type: ignore[attr-defined] + # error: No overload variant of "take" of "_ArrayOrScalarCommon" matches + # argument types "Any", "bool", "Union[Any, float]" + lab = lev.take( # type: ignore[call-overload] + # error: "ndarray" has no attribute "cat" + lab.cat.codes, # type: ignore[attr-defined] + allow_fill=True, + # error: Item "ndarray" of "Union[ndarray, Index]" has no attribute + # "_na_value" + fill_value=lev._na_value, # type: ignore[union-attr] + ) + llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] + + if is_interval_dtype(lab.dtype): + # TODO: should we do this inside II? + lab_interval = cast(Interval, lab) + + sorter = np.lexsort((lab_interval.left, lab_interval.right, ids)) + else: + sorter = np.lexsort((lab, ids)) + + ids, lab = ids[sorter], lab[sorter] + + # group boundaries are where group ids change + idchanges = 1 + np.nonzero(ids[1:] != ids[:-1])[0] + idx = np.r_[0, idchanges] + if not len(ids): + idx = idchanges + + # new values are where sorted labels change + lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) + inc = np.r_[True, lchanges] + if not len(val): + inc = lchanges + inc[idx] = True # group boundaries are also new values + out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts + + # num. of times each group should be repeated + rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx)) + + # multi-index components + codes = self.grouper.reconstructed_codes + # error: Incompatible types in assignment (expression has type + # "List[ndarray[Any, dtype[_SCT]]]", + # variable has type "List[ndarray[Any, dtype[signedinteger[Any]]]]") + codes = [ # type: ignore[assignment] + rep(level_codes) for level_codes in codes + ] + [llab(lab, inc)] + # error: List item 0 has incompatible type "Union[ndarray[Any, Any], Index]"; + # expected "Index" + levels = [ping.group_index for ping in self.grouper.groupings] + [ + lev # type: ignore[list-item] + ] + + if dropna: + mask = codes[-1] != -1 + if mask.all(): + dropna = False + else: + out, codes = out[mask], [level_codes[mask] for level_codes in codes] + + if normalize: + out = out.astype("float") + d = np.diff(np.r_[idx, len(ids)]) + if dropna: + m = ids[lab == -1] + np.add.at(d, m, -1) + acc = rep(d)[mask] + else: + acc = rep(d) + out /= acc + + if sort and bins is None: + cat = ids[inc][mask] if dropna else ids[inc] + sorter = np.lexsort((out if ascending else -out, cat)) + out, codes[-1] = out[sorter], codes[-1][sorter] + + if bins is not None: + # for compat. with libgroupby.value_counts need to ensure every + # bin is present at every index level, null filled with zeros + diff = np.zeros(len(out), dtype="bool") + for level_codes in codes[:-1]: + diff |= np.r_[True, level_codes[1:] != level_codes[:-1]] + + ncat, nbin = diff.sum(), len(levels[-1]) + + left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)] + + right = [diff.cumsum() - 1, codes[-1]] + + _, idx = get_join_indexers(left, right, sort=False, how="left") + out = np.where(idx != -1, out[idx], 0) + + if sort: + sorter = np.lexsort((out if ascending else -out, left[0])) + out, left[-1] = out[sorter], left[-1][sorter] + + # build the multi-index w/ full levels + def build_codes(lev_codes: np.ndarray) -> np.ndarray: + return np.repeat(lev_codes[diff], nbin) + + codes = [build_codes(lev_codes) for lev_codes in codes[:-1]] + codes.append(left[-1]) + + mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) + + if is_integer_dtype(out.dtype): + out = ensure_int64(out) + return self.obj._constructor(out, index=mi, name=self.obj.name) + + @doc(Series.nlargest) + def nlargest(self, n: int = 5, keep: str = "first") -> Series: + f = partial(Series.nlargest, n=n, keep=keep) + data = self._obj_with_exclusions + # Don't change behavior if result index happens to be the same, i.e. + # already ordered and n >= all group sizes. + result = self._python_apply_general(f, data, not_indexed_same=True) + return result + + @doc(Series.nsmallest) + def nsmallest(self, n: int = 5, keep: str = "first") -> Series: + f = partial(Series.nsmallest, n=n, keep=keep) + data = self._obj_with_exclusions + # Don't change behavior if result index happens to be the same, i.e. + # already ordered and n >= all group sizes. + result = self._python_apply_general(f, data, not_indexed_same=True) + return result + + +@pin_allowlisted_properties(DataFrame, base.dataframe_apply_allowlist) +class DataFrameGroupBy(GroupBy[DataFrame]): + + _apply_allowlist = base.dataframe_apply_allowlist + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "A": [1, 1, 2, 2], + ... "B": [1, 2, 3, 4], + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], + ... } + ... ) + + >>> df + A B C + 0 1 1 0.362838 + 1 1 2 0.227877 + 2 2 3 1.267767 + 3 2 4 -0.562860 + + The aggregation is for each column. + + >>> df.groupby('A').agg('min') + B C + A + 1 1 0.227877 + 2 3 -0.562860 + + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.562860 1.267767 + + Select a column for aggregation + + >>> df.groupby('A').B.agg(['min', 'max']) + min max + A + 1 1 2 + 2 3 4 + + User-defined function for aggregation + + >>> df.groupby('A').agg(lambda x: sum(x) + 2) + B C + A + 1 5 2.590715 + 2 9 2.704907 + + Different aggregations per column + + >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'}) + B C + min max sum + A + 1 1 2 0.590715 + 2 3 4 0.704907 + + To control the output names with different aggregations per column, + pandas supports "named aggregation" + + >>> df.groupby("A").agg( + ... b_min=pd.NamedAgg(column="B", aggfunc="min"), + ... c_sum=pd.NamedAgg(column="C", aggfunc="sum")) + b_min c_sum + A + 1 1 0.590715 + 2 3 0.704907 + + - The keywords are the *output* column names + - The values are tuples whose first element is the column to select + and the second element is the aggregation to apply to that column. + Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields + ``['column', 'aggfunc']`` to make it clearer what the arguments are. + As usual, the aggregation can be a callable or a string alias. + + See :ref:`groupby.aggregate.named` for more. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the aggregating function. + + >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min()) + B + A + 1 1.0 + 2 3.0 + """ + ) + + @doc(_agg_template, examples=_agg_examples_doc, klass="DataFrame") + def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + with self._group_selection_context(): + data = self._selected_obj + result = self._aggregate_with_numba( + data, func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + index = self.grouper.result_index + return self.obj._constructor(result, index=index, columns=data.columns) + + relabeling, func, columns, order = reconstruct_func(func, **kwargs) + func = maybe_mangle_lambdas(func) + + op = GroupByApply(self, func, args, kwargs) + result = op.agg() + if not is_dict_like(func) and result is not None: + return result + elif relabeling and result is not None: + # this should be the only (non-raising) case with relabeling + # used reordered index of columns + result = result.iloc[:, order] + result.columns = columns + + if result is None: + + # grouper specific aggregations + if self.grouper.nkeys > 1: + # test_groupby_as_index_series_scalar gets here with 'not self.as_index' + return self._python_agg_general(func, *args, **kwargs) + elif args or kwargs: + # test_pass_args_kwargs gets here (with and without as_index) + # can't return early + result = self._aggregate_frame(func, *args, **kwargs) + + elif self.axis == 1: + # _aggregate_multiple_funcs does not allow self.axis == 1 + # Note: axis == 1 precludes 'not self.as_index', see __init__ + result = self._aggregate_frame(func) + return result + + else: + + # try to treat as if we are passing a list + gba = GroupByApply(self, [func], args=(), kwargs={}) + try: + result = gba.agg() + + except ValueError as err: + if "no results" not in str(err): + # raised directly by _aggregate_multiple_funcs + raise + result = self._aggregate_frame(func) + + else: + sobj = self._selected_obj + + if isinstance(sobj, Series): + # GH#35246 test_groupby_as_index_select_column_sum_empty_df + result.columns = self._obj_with_exclusions.columns.copy() + else: + # Retain our column names + result.columns._set_names( + sobj.columns.names, level=list(range(sobj.columns.nlevels)) + ) + # select everything except for the last level, which is the one + # containing the name of the function(s), see GH#32040 + result.columns = result.columns.droplevel(-1) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result.index = Index(range(len(result))) + + return result + + agg = aggregate + + def _iterate_slices(self) -> Iterable[Series]: + obj = self._selected_obj + if self.axis == 1: + obj = obj.T + + if isinstance(obj, Series) and obj.name not in self.exclusions: + # Occurs when doing DataFrameGroupBy(...)["X"] + yield obj + else: + for label, values in obj.items(): + if label in self.exclusions: + continue + + yield values + + def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: + if self.grouper.nkeys != 1: + raise AssertionError("Number of keys must be 1") + + obj = self._obj_with_exclusions + + result: dict[Hashable, NDFrame | np.ndarray] = {} + if self.axis == 0: + # test_pass_args_kwargs_duplicate_columns gets here with non-unique columns + for name, data in self.grouper.get_iterator(obj, self.axis): + fres = func(data, *args, **kwargs) + result[name] = fres + else: + # we get here in a number of test_multilevel tests + for name in self.indices: + grp_df = self.get_group(name, obj=obj) + fres = func(grp_df, *args, **kwargs) + result[name] = fres + + result_index = self.grouper.result_index + other_ax = obj.axes[1 - self.axis] + out = self.obj._constructor(result, index=other_ax, columns=result_index) + if self.axis == 0: + out = out.T + + return out + + def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: + # only for axis==0 + # tests that get here with non-unique cols: + # test_resample_with_timedelta_yields_no_empty_groups, + # test_resample_apply_product + + obj = self._obj_with_exclusions + result: dict[int, NDFrame] = {} + + for i, (item, sgb) in enumerate(self._iterate_column_groupbys(obj)): + result[i] = sgb.aggregate(func, *args, **kwargs) + + res_df = self.obj._constructor(result) + res_df.columns = obj.columns + return res_df + + def _wrap_applied_output( + self, + data: DataFrame, + values: list, + not_indexed_same: bool = False, + override_group_keys: bool = False, + ): + + if len(values) == 0: + result = self.obj._constructor( + index=self.grouper.result_index, columns=data.columns + ) + result = result.astype(data.dtypes, copy=False) + return result + + # GH12824 + first_not_none = next(com.not_none(*values), None) + + if first_not_none is None: + # GH9684 - All values are None, return an empty frame. + return self.obj._constructor() + elif isinstance(first_not_none, DataFrame): + return self._concat_objects( + values, + not_indexed_same=not_indexed_same, + override_group_keys=override_group_keys, + ) + + key_index = self.grouper.result_index if self.as_index else None + + if isinstance(first_not_none, (np.ndarray, Index)): + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return self.obj._constructor_sliced( + values, index=key_index, name=self._selection + ) + elif not isinstance(first_not_none, Series): + # values are not series or array-like but scalars + # self._selection not passed through to Series as the + # result should not take the name of original selection + # of columns + if self.as_index: + return self.obj._constructor_sliced(values, index=key_index) + else: + result = self.obj._constructor(values, columns=[self._selection]) + self._insert_inaxis_grouper_inplace(result) + return result + else: + # values are Series + return self._wrap_applied_output_series( + values, + not_indexed_same, + first_not_none, + key_index, + override_group_keys, + ) + + def _wrap_applied_output_series( + self, + values: list[Series], + not_indexed_same: bool, + first_not_none, + key_index, + override_group_keys: bool, + ) -> DataFrame | Series: + # this is to silence a DeprecationWarning + # TODO(2.0): Remove when default dtype of empty Series is object + kwargs = first_not_none._construct_axes_dict() + backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) + values = [x if (x is not None) else backup for x in values] + + all_indexed_same = all_indexes_same(x.index for x in values) + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + applied_index = self._selected_obj._get_axis(self.axis) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + if singular_series: + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + values, + not_indexed_same=not_indexed_same, + override_group_keys=override_group_keys, + ) + + # still a series + # path added as of GH 5545 + elif all_indexed_same: + from pandas.core.reshape.concat import concat + + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects( + values, + not_indexed_same=True, + override_group_keys=override_group_keys, + ) + + # Combine values + # vstack+constructor is faster than concat and handles MI-columns + stacked_values = np.vstack([np.asarray(v) for v in values]) + + if self.axis == 0: + index = key_index + columns = first_not_none.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = list(names)[0] + else: + index = first_not_none.index + columns = key_index + stacked_values = stacked_values.T + + if stacked_values.dtype == object: + # We'll have the DataFrame constructor do inference + stacked_values = stacked_values.tolist() + result = self.obj._constructor(stacked_values, index=index, columns=columns) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + + return self._reindex_output(result) + + def _cython_transform( + self, + how: str, + numeric_only: bool | lib.NoDefault = lib.no_default, + axis: int = 0, + **kwargs, + ) -> DataFrame: + assert axis == 0 # handled by caller + # TODO: no tests with self.ndim == 1 for DataFrameGroupBy + numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis) + + # With self.axis == 0, we have multi-block tests + # e.g. test_rank_min_int, test_cython_transform_frame + # test_transform_numeric_ret + # With self.axis == 1, _get_data_to_aggregate does a transpose + # so we always have a single block. + mgr: Manager2D = self._get_data_to_aggregate() + orig_mgr_len = len(mgr) + if numeric_only_bool: + mgr = mgr.get_numeric_data(copy=False) + + def arr_func(bvalues: ArrayLike) -> ArrayLike: + return self.grouper._cython_operation( + "transform", bvalues, how, 1, **kwargs + ) + + # We could use `mgr.apply` here and not have to set_axis, but + # we would have to do shape gymnastics for ArrayManager compat + res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) + res_mgr.set_axis(1, mgr.axes[1]) + + if len(res_mgr) < orig_mgr_len: + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) + + res_df = self.obj._constructor(res_mgr) + if self.axis == 1: + res_df = res_df.T + return res_df + + def _transform_general(self, func, *args, **kwargs): + from pandas.core.reshape.concat import concat + + applied = [] + obj = self._obj_with_exclusions + gen = self.grouper.get_iterator(obj, axis=self.axis) + fast_path, slow_path = self._define_paths(func, *args, **kwargs) + + # Determine whether to use slow or fast path by evaluating on the first group. + # Need to handle the case of an empty generator and process the result so that + # it does not need to be computed again. + try: + name, group = next(gen) + except StopIteration: + pass + else: + object.__setattr__(group, "name", name) + try: + path, res = self._choose_path(fast_path, slow_path, group) + except TypeError: + return self._transform_item_by_item(obj, fast_path) + except ValueError as err: + msg = "transform must return a scalar value for each group" + raise ValueError(msg) from err + if group.size > 0: + res = _wrap_transform_general_frame(self.obj, group, res) + applied.append(res) + + # Compute and process with the remaining groups + emit_alignment_warning = False + for name, group in gen: + if group.size == 0: + continue + object.__setattr__(group, "name", name) + res = path(group) + if ( + not emit_alignment_warning + and res.ndim == 2 + and not res.index.equals(group.index) + ): + emit_alignment_warning = True + + res = _wrap_transform_general_frame(self.obj, group, res) + applied.append(res) + + if emit_alignment_warning: + # GH#45648 + warnings.warn( + "In a future version of pandas, returning a DataFrame in " + "groupby.transform will align with the input's index. Apply " + "`.to_numpy()` to the result in the transform function to keep " + "the current behavior and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + concat_index = obj.columns if self.axis == 0 else obj.index + other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 + concatenated = concat(applied, axis=self.axis, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) + return self._set_result_index_ordered(concatenated) + + @Substitution(klass="DataFrame") + @Appender(_transform_template) + def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + return self._transform( + func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) + + def _define_paths(self, func, *args, **kwargs): + if isinstance(func, str): + fast_path = lambda group: getattr(group, func)(*args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis + ) + else: + fast_path = lambda group: func(group, *args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: func(x, *args, **kwargs), axis=self.axis + ) + return fast_path, slow_path + + def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFrame): + path = slow_path + res = slow_path(group) + + if self.ngroups == 1: + # no need to evaluate multiple paths when only + # a single group exists + return path, res + + # if we make it here, test if we can use the fast path + try: + res_fast = fast_path(group) + except AssertionError: + raise # pragma: no cover + except Exception: + # GH#29631 For user-defined function, we can't predict what may be + # raised; see test_transform.test_transform_fastpath_raises + return path, res + + # verify fast path returns either: + # a DataFrame with columns equal to group.columns + # OR a Series with index equal to group.columns + if isinstance(res_fast, DataFrame): + if not res_fast.columns.equals(group.columns): + return path, res + elif isinstance(res_fast, Series): + if not res_fast.index.equals(group.columns): + return path, res + else: + return path, res + + if res_fast.equals(res): + path = fast_path + + return path, res + + def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: + # iterate through columns, see test_transform_exclude_nuisance + # gets here with non-unique columns + output = {} + inds = [] + for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)): + try: + output[i] = sgb.transform(wrapper) + except TypeError: + # e.g. trying to call nanmean with string values + warn_dropping_nuisance_columns_deprecated( + type(self), "transform", numeric_only=False + ) + else: + inds.append(i) + + if not output: + raise TypeError("Transform function invalid for data types") + + columns = obj.columns.take(inds) + + result = self.obj._constructor(output, index=obj.index) + result.columns = columns + return result + + def filter(self, func, dropna=True, *args, **kwargs): + """ + Return a copy of a DataFrame excluding filtered elements. + + Elements from groups are filtered if they do not satisfy the + boolean criterion specified by func. + + Parameters + ---------- + func : function + Function to apply to each subframe. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + If False, groups that evaluate False are filled with NaNs. + + Returns + ------- + filtered : DataFrame + + Notes + ----- + Each subframe is endowed the attribute 'name' in case you need to know + which group you are working on. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> grouped.filter(lambda x: x['B'].mean() > 3.) + A B C + 1 bar 2 5.0 + 3 bar 4 1.0 + 5 bar 6 9.0 + """ + indices = [] + + obj = self._selected_obj + gen = self.grouper.get_iterator(obj, axis=self.axis) + + for name, group in gen: + object.__setattr__(group, "name", name) + + res = func(group, *args, **kwargs) + + try: + res = res.squeeze() + except AttributeError: # allow e.g., scalars and frames to pass + pass + + # interpret the result of the filter + if is_bool(res) or (is_scalar(res) and isna(res)): + if res and notna(res): + indices.append(self._get_index(name)) + else: + # non scalars aren't allowed + raise TypeError( + f"filter function returned a {type(res).__name__}, " + "but expected a scalar bool" + ) + + return self._apply_filter(indices, dropna) + + def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy: + if self.axis == 1: + # GH 37725 + raise ValueError("Cannot subset columns when using axis=1") + # per GH 23566 + if isinstance(key, tuple) and len(key) > 1: + # if len == 1, then it becomes a SeriesGroupBy and this is actually + # valid syntax, so don't raise warning + warnings.warn( + "Indexing with multiple keys (implicitly converted to a tuple " + "of keys) will be deprecated, use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return super().__getitem__(key) + + def _gotitem(self, key, ndim: int, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy( + subset, + self.grouper, + axis=self.axis, + level=self.level, + grouper=self.grouper, + exclusions=self.exclusions, + selection=key, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + mutated=self.mutated, + dropna=self.dropna, + ) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy( + subset, + level=self.level, + grouper=self.grouper, + selection=key, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + dropna=self.dropna, + ) + + raise AssertionError("invalid ndim for _gotitem") + + def _get_data_to_aggregate(self) -> Manager2D: + obj = self._obj_with_exclusions + if self.axis == 1: + return obj.T._mgr + else: + return obj._mgr + + def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: + # zip in reverse so we can always insert at loc 0 + columns = result.columns + for name, lev, in_axis in zip( + reversed(self.grouper.names), + reversed(self.grouper.get_group_levels()), + reversed([grp.in_axis for grp in self.grouper.groupings]), + ): + # GH #28549 + # When using .apply(-), name will be in columns already + if in_axis and name not in columns: + result.insert(0, name, lev) + + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> DataFrame: + """ + Wrap the dict result of a GroupBy aggregation into a DataFrame. + """ + indexed_output = {key.position: val for key, val in output.items()} + columns = Index([key.label for key in output]) + columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names) + + result = self.obj._constructor(indexed_output) + result.columns = columns + return result + + def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: + if not self.as_index: + # GH 41998 - empty mgr always gets index of length 0 + rows = mgr.shape[1] if mgr.shape[0] > 0 else 0 + index = Index(range(rows)) + mgr.set_axis(1, index) + result = self.obj._constructor(mgr) + + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + else: + index = self.grouper.result_index + mgr.set_axis(1, index) + result = self.obj._constructor(mgr) + + if self.axis == 1: + result = result.T + + # Note: we only need to pass datetime=True in order to get numeric + # values converted + return self._reindex_output(result)._convert(datetime=True) + + def _iterate_column_groupbys(self, obj: DataFrame | Series): + for i, colname in enumerate(obj.columns): + yield colname, SeriesGroupBy( + obj.iloc[:, i], + selection=colname, + grouper=self.grouper, + exclusions=self.exclusions, + observed=self.observed, + ) + + def _apply_to_column_groupbys(self, func, obj: DataFrame | Series) -> DataFrame: + from pandas.core.reshape.concat import concat + + columns = obj.columns + results = [ + func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj) + ] + + if not len(results): + # concat would raise + return DataFrame([], columns=columns, index=self.grouper.result_index) + else: + return concat(results, keys=columns, axis=1) + + def nunique(self, dropna: bool = True) -> DataFrame: + """ + Return DataFrame with counts of unique elements in each position. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + nunique: DataFrame + + Examples + -------- + >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam', + ... 'ham', 'ham'], + ... 'value1': [1, 5, 5, 2, 5, 5], + ... 'value2': list('abbaxy')}) + >>> df + id value1 value2 + 0 spam 1 a + 1 egg 5 b + 2 egg 5 b + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + + >>> df.groupby('id').nunique() + value1 value2 + id + egg 1 1 + ham 1 2 + spam 2 1 + + Check for rows with the same id but conflicting values: + + >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any()) + id value1 value2 + 0 spam 1 a + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + """ + + if self.axis != 0: + # see test_groupby_crash_on_nunique + return self._python_agg_general(lambda sgb: sgb.nunique(dropna)) + + obj = self._obj_with_exclusions + results = self._apply_to_column_groupbys( + lambda sgb: sgb.nunique(dropna), obj=obj + ) + + if not self.as_index: + results.index = Index(range(len(results))) + self._insert_inaxis_grouper_inplace(results) + + return results + + @doc( + _shared_docs["idxmax"], + numeric_only_default="True for axis=0, False for axis=1", + ) + def idxmax( + self, + axis=0, + skipna: bool = True, + numeric_only: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: + axis = DataFrame._get_axis_number(axis) + if numeric_only is lib.no_default: + # Cannot use self._resolve_numeric_only; we must pass None to + # DataFrame.idxmax for backwards compatibility + numeric_only_arg = None if axis == 0 else False + else: + numeric_only_arg = numeric_only + + def func(df): + with warnings.catch_warnings(): + # Suppress numeric_only warnings here, will warn below + warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmax") + res = df._reduce( + nanops.nanargmax, + "argmax", + axis=axis, + skipna=skipna, + numeric_only=numeric_only_arg, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) + + func.__name__ = "idxmax" + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) + self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only) + return result + + @doc( + _shared_docs["idxmin"], + numeric_only_default="True for axis=0, False for axis=1", + ) + def idxmin( + self, + axis=0, + skipna: bool = True, + numeric_only: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: + axis = DataFrame._get_axis_number(axis) + if numeric_only is lib.no_default: + # Cannot use self._resolve_numeric_only; we must pass None to + # DataFrame.idxmin for backwards compatibility + numeric_only_arg = None if axis == 0 else False + else: + numeric_only_arg = numeric_only + + def func(df): + with warnings.catch_warnings(): + # Suppress numeric_only warnings here, will warn below + warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmin") + res = df._reduce( + nanops.nanargmin, + "argmin", + axis=axis, + skipna=skipna, + numeric_only=numeric_only_arg, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) + + func.__name__ = "idxmin" + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) + self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only) + return result + + boxplot = boxplot_frame_groupby + + def value_counts( + self, + subset: Sequence[Hashable] | None = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + dropna: bool = True, + ) -> DataFrame | Series: + """ + Return a Series or DataFrame containing counts of unique rows. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + dropna : bool, default True + Don’t include counts of rows that contain NA values. + + Returns + ------- + Series or DataFrame + Series if the groupby as_index is True, otherwise DataFrame. + + See Also + -------- + Series.value_counts: Equivalent method on Series. + DataFrame.value_counts: Equivalent method on DataFrame. + SeriesGroupBy.value_counts: Equivalent method on SeriesGroupBy. + + Notes + ----- + - If the groupby as_index is True then the returned Series will have a + MultiIndex with one level per input column. + - If the groupby as_index is False then the returned DataFrame will have an + additional column with the value_counts. The column is labelled 'count' or + 'proportion', depending on the ``normalize`` parameter. + + By default, rows that contain any NA values are omitted from + the result. + + By default, the result will be in descending order so that the + first element of each group is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'], + ... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'], + ... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR'] + ... }) + + >>> df + gender education country + 0 male low US + 1 male medium FR + 2 female high US + 3 male low FR + 4 female high FR + 5 male low FR + + >>> df.groupby('gender').value_counts() + gender education country + female high FR 1 + US 1 + male low FR 2 + US 1 + medium FR 1 + dtype: int64 + + >>> df.groupby('gender').value_counts(ascending=True) + gender education country + female high FR 1 + US 1 + male low US 1 + medium FR 1 + low FR 2 + dtype: int64 + + >>> df.groupby('gender').value_counts(normalize=True) + gender education country + female high FR 0.50 + US 0.50 + male low FR 0.50 + US 0.25 + medium FR 0.25 + dtype: float64 + + >>> df.groupby('gender', as_index=False).value_counts() + gender education country count + 0 female high FR 1 + 1 female high US 1 + 2 male low FR 2 + 3 male low US 1 + 4 male medium FR 1 + + >>> df.groupby('gender', as_index=False).value_counts(normalize=True) + gender education country proportion + 0 female high FR 0.50 + 1 female high US 0.50 + 2 male low FR 0.50 + 3 male low US 0.25 + 4 male medium FR 0.25 + """ + if self.axis == 1: + raise NotImplementedError( + "DataFrameGroupBy.value_counts only handles axis=0" + ) + + with self._group_selection_context(): + df = self.obj + + in_axis_names = { + grouping.name for grouping in self.grouper.groupings if grouping.in_axis + } + if isinstance(self._selected_obj, Series): + name = self._selected_obj.name + keys = [] if name in in_axis_names else [self._selected_obj] + else: + unique_cols = set(self._selected_obj.columns) + if subset is not None: + subsetted = set(subset) + clashing = subsetted & set(in_axis_names) + if clashing: + raise ValueError( + f"Keys {clashing} in subset cannot be in " + "the groupby column keys." + ) + doesnt_exist = subsetted - unique_cols + if doesnt_exist: + raise ValueError( + f"Keys {doesnt_exist} in subset do not " + f"exist in the DataFrame." + ) + else: + subsetted = unique_cols + + keys = [ + # Can't use .values because the column label needs to be preserved + self._selected_obj.iloc[:, idx] + for idx, name in enumerate(self._selected_obj.columns) + if name not in in_axis_names and name in subsetted + ] + + groupings = list(self.grouper.groupings) + for key in keys: + grouper, _, _ = get_grouper( + df, + key=key, + axis=self.axis, + sort=self.sort, + observed=False, + dropna=dropna, + ) + groupings += list(grouper.groupings) + + # Take the size of the overall columns + gb = df.groupby( + groupings, + sort=self.sort, + observed=self.observed, + dropna=self.dropna, + ) + result_series = cast(Series, gb.size()) + + # GH-46357 Include non-observed categories + # of non-grouping columns regardless of `observed` + if any( + isinstance(grouping.grouping_vector, (Categorical, CategoricalIndex)) + and not grouping._observed + for grouping in groupings + ): + levels_list = [ping.result_index for ping in groupings] + multi_index, _ = MultiIndex.from_product( + levels_list, names=[ping.name for ping in groupings] + ).sortlevel() + result_series = result_series.reindex(multi_index, fill_value=0) + + if normalize: + # Normalize the results by dividing by the original group sizes. + # We are guaranteed to have the first N levels be the + # user-requested grouping. + levels = list( + range(len(self.grouper.groupings), result_series.index.nlevels) + ) + indexed_group_size = result_series.groupby( + result_series.index.droplevel(levels), + sort=self.sort, + dropna=self.dropna, + ).transform("sum") + result_series /= indexed_group_size + + # Handle groups of non-observed categories + result_series = result_series.fillna(0.0) + + if sort: + # Sort the values and then resort by the main grouping + index_level = range(len(self.grouper.groupings)) + result_series = result_series.sort_values( + ascending=ascending + ).sort_index(level=index_level, sort_remaining=False) + + result: Series | DataFrame + if self.as_index: + result = result_series + else: + # Convert to frame + name = "proportion" if normalize else "count" + index = result_series.index + columns = com.fill_missing_names(index.names) + if name in columns: + raise ValueError( + f"Column label '{name}' is duplicate of result column" + ) + result_series.name = name + result_series.index = index.set_names(range(len(columns))) + result_frame = result_series.reset_index() + result_frame.columns = columns + [name] + result = result_frame + return result.__finalize__(self.obj, method="value_counts") + + +def _wrap_transform_general_frame( + obj: DataFrame, group: DataFrame, res: DataFrame | Series +) -> DataFrame: + from pandas import concat + + if isinstance(res, Series): + # we need to broadcast across the + # other dimension; this will preserve dtypes + # GH14457 + if res.index.is_(obj.index): + res_frame = concat([res] * len(group.columns), axis=1) + res_frame.columns = group.columns + res_frame.index = group.index + else: + res_frame = obj._constructor( + np.tile(res.values, (len(group.index), 1)), + columns=group.columns, + index=group.index, + ) + assert isinstance(res_frame, DataFrame) + return res_frame + else: + return res diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py new file mode 100644 index 00000000..1c3a95b3 --- /dev/null +++ b/pandas/core/groupby/groupby.py @@ -0,0 +1,4437 @@ +""" +Provide the groupby split-apply-combine paradigm. Define the GroupBy +class providing the base-class of operations. + +The SeriesGroupBy and DataFrameGroupBy sub-class +(defined in pandas.core.groupby.generic) +expose these user-facing objects to provide specific functionality. +""" +from __future__ import annotations + +from contextlib import ( + contextmanager, + nullcontext, +) +import datetime +from functools import ( + partial, + wraps, +) +import inspect +from textwrap import dedent +import types +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Iterable, + Iterator, + List, + Literal, + Mapping, + Sequence, + TypeVar, + Union, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._config.config import option_context + +from pandas._libs import ( + Timestamp, + lib, +) +import pandas._libs.groupby as libgroupby +from pandas._typing import ( + ArrayLike, + IndexLabel, + NDFrameT, + PositionalIndexer, + RandomState, + Scalar, + T, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + DataError, +) +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, + doc, +) +from pandas.util._exceptions import ( + find_stack_level, + rewrite_warning, +) + +from pandas.core.dtypes.cast import ensure_dtype_can_hold_na +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_datetime64_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core import nanops +from pandas.core._numba import executor +import pandas.core.algorithms as algorithms +from pandas.core.arrays import ( + BaseMaskedArray, + BooleanArray, + Categorical, + ExtensionArray, +) +from pandas.core.base import ( + PandasObject, + SelectionMixin, +) +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import ( + base, + numba_, + ops, +) +from pandas.core.groupby.indexing import ( + GroupByIndexingMixin, + GroupByNthSelector, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, + RangeIndex, +) +from pandas.core.internals.blocks import ensure_block_shape +import pandas.core.sample as sample +from pandas.core.series import Series +from pandas.core.sorting import get_group_index_sorter +from pandas.core.util.numba_ import ( + get_jit_arguments, + maybe_use_numba, +) + +if TYPE_CHECKING: + from pandas.core.window import ( + ExpandingGroupby, + ExponentialMovingWindowGroupby, + RollingGroupby, + ) + +_common_see_also = """ + See Also + -------- + Series.%(name)s : Apply a function %(name)s to a Series. + DataFrame.%(name)s : Apply a function %(name)s + to each row or column of a DataFrame. +""" + +_apply_docs = { + "template": """ + Apply function ``func`` group-wise and combine the results together. + + The function passed to ``apply`` must take a {input} as its first + argument and return a DataFrame, Series or scalar. ``apply`` will + then take care of combining the results back together into a single + dataframe or series. ``apply`` is therefore a highly flexible + grouping method. + + While ``apply`` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like ``agg`` or ``transform``. Pandas offers a wide range of method that will + be much faster than using ``apply`` for their specific purposes, so try to + use them before reaching for ``apply``. + + Parameters + ---------- + func : callable + A callable that takes a {input} as its first argument, and + returns a dataframe, a series or a scalar. In addition the + callable may take positional and keyword arguments. + args, kwargs : tuple and dict + Optional positional and keyword arguments to pass to ``func``. + + Returns + ------- + applied : Series or DataFrame + + See Also + -------- + pipe : Apply function to the full GroupBy object instead of to each + group. + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. + + Notes + ----- + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + {examples} + """, + "dataframe_examples": """ + >>> df = pd.DataFrame({'A': 'a a b'.split(), + ... 'B': [1,2,3], + ... 'C': [4,6,5]}) + >>> g1 = df.groupby('A', group_keys=False) + >>> g2 = df.groupby('A', group_keys=True) + + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: + + Example 1: below the function passed to `apply` takes a DataFrame as + its argument and returns a DataFrame. `apply` combines the result for + each group together into a new DataFrame: + + >>> g1[['B', 'C']].apply(lambda x: x / x.sum()) + B C + 0 0.333333 0.4 + 1 0.666667 0.6 + 2 1.000000 1.0 + + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2[['B', 'C']].apply(lambda x: x / x.sum()) + B C + A + a 0 0.333333 0.4 + 1 0.666667 0.6 + b 2 1.000000 1.0 + + Example 2: The function passed to `apply` takes a DataFrame as + its argument and returns a Series. `apply` combines the result for + each group together into a new DataFrame. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g1[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + >>> g2[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + + Example 3: The function passed to `apply` takes a DataFrame as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g1.apply(lambda x: x.C.max() - x.B.min()) + A + a 5 + b 2 + dtype: int64""", + "series_examples": """ + >>> s = pd.Series([0, 1, 2], index='a a b'.split()) + >>> g1 = s.groupby(s.index, group_keys=False) + >>> g2 = s.groupby(s.index, group_keys=True) + + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: + + Example 1: The function passed to `apply` takes a Series as + its argument and returns a Series. `apply` combines the result for + each group together into a new Series. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``. + + >>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2) + a 0.0 + a 2.0 + b 1.0 + dtype: float64 + + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2) + a a 0.0 + a 2.0 + b b 1.0 + dtype: float64 + + Example 2: The function passed to `apply` takes a Series as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g1.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64 + + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + + >>> g2.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64""", +} + +_groupby_agg_method_template = """ +Compute {fname} of group values. + +Parameters +---------- +numeric_only : bool, default {no} + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. +min_count : int, default {mc} + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + +Returns +------- +Series or DataFrame + Computed {fname} of values within each group. +""" + +_pipe_template = """ +Apply a ``func`` with arguments to this %(klass)s object and return its result. + +Use `.pipe` when you want to improve readability by chaining together +functions that expect Series, DataFrames, GroupBy or Resampler objects. +Instead of writing + +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP + +You can write + +>>> (df.groupby('group') +... .pipe(f) +... .pipe(g, arg1=a) +... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP + +which is much more readable. + +Parameters +---------- +func : callable or tuple of (callable, str) + Function to apply to this %(klass)s object or, alternatively, + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the + %(klass)s object. +args : iterable, optional + Positional arguments passed into `func`. +kwargs : dict, optional + A dictionary of keyword arguments passed into `func`. + +Returns +------- +object : the return type of `func`. + +See Also +-------- +Series.pipe : Apply a function with arguments to a series. +DataFrame.pipe: Apply a function with arguments to a dataframe. +apply : Apply function to each group instead of to the + full %(klass)s object. + +Notes +----- +See more `here +`_ + +Examples +-------- +%(examples)s +""" + +_transform_template = """ +Call function producing a same-indexed %(klass)s on each group. + +Returns a %(klass)s having the same indexes as the original object +filled with the transformed values. + +Parameters +---------- +f : function + Function to apply to each group. See the Notes section below for requirements. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 +*args + Positional arguments to pass to func. +engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or the global setting ``compute.use_numba`` + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + Keyword arguments to be passed into func. + +Returns +------- +%(klass)s + +See Also +-------- +%(klass)s.groupby.apply : Apply function ``func`` group-wise and combine + the results together. +%(klass)s.groupby.aggregate : Aggregate using one or more + operations over the specified axis. +%(klass)s.transform : Call ``func`` on self producing a %(klass)s with the + same axis shape as self. + +Notes +----- +Each group is endowed the attribute 'name' in case you need to know +which group you are working on. + +The current implementation imposes three requirements on f: + +* f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. +* if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. +* f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + +.. deprecated:: 1.5.0 + + When using ``.transform`` on a grouped DataFrame and the transformation function + returns a DataFrame, currently pandas does not align the result's index + with the input's index. This behavior is deprecated and alignment will + be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + result of the transformation function to avoid alignment. + +Examples +-------- + +>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', +... 'foo', 'bar'], +... 'B' : ['one', 'one', 'two', 'three', +... 'two', 'two'], +... 'C' : [1, 5, 5, 2, 5, 5], +... 'D' : [2.0, 5., 8., 1., 2., 9.]}) +>>> grouped = df.groupby('A')[['C', 'D']] +>>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D +0 -1.154701 -0.577350 +1 0.577350 0.000000 +2 0.577350 1.154701 +3 -1.154701 -1.000000 +4 0.577350 -0.577350 +5 0.577350 1.000000 + +Broadcast result of the transformation + +>>> grouped.transform(lambda x: x.max() - x.min()) + C D +0 4.0 6.0 +1 3.0 8.0 +2 4.0 6.0 +3 3.0 8.0 +4 4.0 6.0 +5 3.0 8.0 + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + +>>> grouped.transform(lambda x: x.astype(int).max()) + C D +0 5 8 +1 5 9 +2 5 8 +3 5 9 +4 5 8 +5 5 9 +""" + +_agg_template = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. Only passing a single function is supported + with this engine. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 +*args + Positional arguments to pass to func. +engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + Keyword arguments to be passed into func. + +Returns +------- +{klass} + +See Also +-------- +{klass}.groupby.apply : Apply function func group-wise + and combine the results together. +{klass}.groupby.transform : Aggregate using one or more + operations over the specified axis. +{klass}.aggregate : Transforms the Series on each group + based on the given function. + +Notes +----- +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. +{examples}""" + + +@final +class GroupByPlot(PandasObject): + """ + Class implementing the .plot attribute for groupby objects. + """ + + def __init__(self, groupby: GroupBy) -> None: + self._groupby = groupby + + def __call__(self, *args, **kwargs): + def f(self): + return self.plot(*args, **kwargs) + + f.__name__ = "plot" + return self._groupby.apply(f) + + def __getattr__(self, name: str): + def attr(*args, **kwargs): + def f(self): + return getattr(self.plot, name)(*args, **kwargs) + + return self._groupby.apply(f) + + return attr + + +_KeysArgType = Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], +] + + +class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): + _group_selection: IndexLabel | None = None + _apply_allowlist: frozenset[str] = frozenset() + _hidden_attrs = PandasObject._hidden_attrs | { + "as_index", + "axis", + "dropna", + "exclusions", + "grouper", + "group_keys", + "keys", + "level", + "mutated", + "obj", + "observed", + "sort", + "squeeze", + } + + axis: int + grouper: ops.BaseGrouper + keys: _KeysArgType | None = None + group_keys: bool | lib.NoDefault + + @final + def __len__(self) -> int: + return len(self.groups) + + @final + def __repr__(self) -> str: + # TODO: Better repr for GroupBy object + return object.__repr__(self) + + @final + @property + def groups(self) -> dict[Hashable, np.ndarray]: + """ + Dict {group name -> group labels}. + """ + return self.grouper.groups + + @final + @property + def ngroups(self) -> int: + return self.grouper.ngroups + + @final + @property + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: + """ + Dict {group name -> group indices}. + """ + return self.grouper.indices + + @final + def _get_indices(self, names): + """ + Safe get multiple indices, translate keys for + datelike to underlying repr. + """ + + def get_converter(s): + # possibly convert to the actual key types + # in the indices, could be a Timestamp or a np.datetime64 + if isinstance(s, datetime.datetime): + return lambda key: Timestamp(key) + elif isinstance(s, np.datetime64): + return lambda key: Timestamp(key).asm8 + else: + return lambda key: key + + if len(names) == 0: + return [] + + if len(self.indices) > 0: + index_sample = next(iter(self.indices)) + else: + index_sample = None # Dummy sample + + name_sample = names[0] + if isinstance(index_sample, tuple): + if not isinstance(name_sample, tuple): + msg = "must supply a tuple to get_group with multiple grouping keys" + raise ValueError(msg) + if not len(name_sample) == len(index_sample): + try: + # If the original grouper was a tuple + return [self.indices[name] for name in names] + except KeyError as err: + # turns out it wasn't a tuple + msg = ( + "must supply a same-length tuple to get_group " + "with multiple grouping keys" + ) + raise ValueError(msg) from err + + converters = [get_converter(s) for s in index_sample] + names = (tuple(f(n) for f, n in zip(converters, name)) for name in names) + + else: + converter = get_converter(index_sample) + names = (converter(name) for name in names) + + return [self.indices.get(name, []) for name in names] + + @final + def _get_index(self, name): + """ + Safe get index, translate keys for datelike to underlying repr. + """ + return self._get_indices([name])[0] + + @final + @cache_readonly + def _selected_obj(self): + # Note: _selected_obj is always just `self.obj` for SeriesGroupBy + + if self._selection is None or isinstance(self.obj, Series): + if self._group_selection is not None: + return self.obj[self._group_selection] + return self.obj + else: + return self.obj[self._selection] + + @final + def _dir_additions(self) -> set[str]: + return self.obj._dir_additions() | self._apply_allowlist + + @Substitution( + klass="GroupBy", + examples=dedent( + """\ + >>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]}) + >>> df + A B + 0 a 1 + 1 b 2 + 2 a 3 + 3 b 4 + + To get the difference between each groups maximum and minimum value in one + pass, you can do + + >>> df.groupby('A').pipe(lambda x: x.max() - x.min()) + B + A + a 2 + b 2""" + ), + ) + @Appender(_pipe_template) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + return com.pipe(self, func, *args, **kwargs) + + plot = property(GroupByPlot) + + @final + def get_group(self, name, obj=None) -> DataFrame | Series: + """ + Construct DataFrame from group with provided name. + + Parameters + ---------- + name : object + The name of the group to get as a DataFrame. + obj : DataFrame, default None + The DataFrame to take the DataFrame out of. If + it is None, the object groupby was called on will + be used. + + Returns + ------- + group : same type as obj + """ + if obj is None: + obj = self._selected_obj + + inds = self._get_index(name) + if not len(inds): + raise KeyError(name) + + return obj._take_with_is_copy(inds, axis=self.axis) + + @final + def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: + """ + Groupby iterator. + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + keys = self.keys + if isinstance(keys, list) and len(keys) == 1: + warnings.warn( + ( + "In a future version of pandas, a length 1 " + "tuple will be returned when iterating over a " + "groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.grouper.get_iterator(self._selected_obj, axis=self.axis) + + +# To track operations that expand dimensions, like ohlc +OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame) + + +class GroupBy(BaseGroupBy[NDFrameT]): + """ + Class for grouping and aggregating relational data. + + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.groupby(...) to use GroupBy, but you can also do: + + :: + + grouped = groupby(obj, ...) + + Parameters + ---------- + obj : pandas object + axis : int, default 0 + level : int, default None + Level of MultiIndex + groupings : list of Grouping objects + Most users should ignore this + exclusions : array-like, optional + List of columns to exclude + name : str + Most users should ignore this + + Returns + ------- + **Attributes** + groups : dict + {group name -> group labels} + len(grouped) : int + Number of groups + + Notes + ----- + After grouping, see aggregate, apply, and transform functions. Here are + some other brief notes about usage. When grouping by multiple groups, the + result index will be a MultiIndex (hierarchical) by default. + + Iteration produces (key, group) tuples, i.e. chunking the data by group. So + you can write code like: + + :: + + grouped = obj.groupby(keys, axis=axis) + for key, group in grouped: + # do something with the data + + Function calls on GroupBy, if not specially implemented, "dispatch" to the + grouped data. So if you group a DataFrame and wish to invoke the std() + method on each group, you can simply do: + + :: + + df.groupby(mapper).std() + + rather than + + :: + + df.groupby(mapper).aggregate(np.std) + + You can pass arguments to these "wrapped" functions, too. + + See the online documentation for full exposition on these topics and much + more + """ + + grouper: ops.BaseGrouper + as_index: bool + + @final + def __init__( + self, + obj: NDFrameT, + keys: _KeysArgType | None = None, + axis: int = 0, + level: IndexLabel | None = None, + grouper: ops.BaseGrouper | None = None, + exclusions: frozenset[Hashable] | None = None, + selection: IndexLabel | None = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool | lib.NoDefault = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, + dropna: bool = True, + ) -> None: + + self._selection = selection + + assert isinstance(obj, NDFrame), type(obj) + + self.level = level + + if not as_index: + if not isinstance(obj, DataFrame): + raise TypeError("as_index=False only valid with DataFrame") + if axis != 0: + raise ValueError("as_index=False only valid for axis=0") + + self.as_index = as_index + self.keys = keys + self.sort = sort + self.group_keys = group_keys + self.squeeze = squeeze + self.observed = observed + self.mutated = mutated + self.dropna = dropna + + if grouper is None: + from pandas.core.groupby.grouper import get_grouper + + grouper, exclusions, obj = get_grouper( + obj, + keys, + axis=axis, + level=level, + sort=sort, + observed=observed, + mutated=self.mutated, + dropna=self.dropna, + ) + + self.obj = obj + self.axis = obj._get_axis_number(axis) + self.grouper = grouper + self.exclusions = frozenset(exclusions) if exclusions else frozenset() + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + @final + def _make_wrapper(self, name: str) -> Callable: + assert name in self._apply_allowlist + + with self._group_selection_context(): + # need to setup the selection + # as are not passed directly but in the grouper + f = getattr(self._obj_with_exclusions, name) + if not isinstance(f, types.MethodType): + # error: Incompatible return value type + # (got "NDFrameT", expected "Callable[..., Any]") [return-value] + return cast(Callable, self.apply(lambda self: getattr(self, name))) + + f = getattr(type(self._obj_with_exclusions), name) + sig = inspect.signature(f) + + def wrapper(*args, **kwargs): + # a little trickery for aggregation functions that need an axis + # argument + if "axis" in sig.parameters: + if kwargs.get("axis", None) is None: + kwargs["axis"] = self.axis + + numeric_only = kwargs.get("numeric_only", lib.no_default) + + def curried(x): + with warnings.catch_warnings(): + # Catch any warnings from dispatch to DataFrame; we'll emit + # a warning for groupby below + match = "The default value of numeric_only " + warnings.filterwarnings("ignore", match, FutureWarning) + return f(x, *args, **kwargs) + + # preserve the name so we can detect it when calling plot methods, + # to avoid duplicates + curried.__name__ = name + + # special case otherwise extra plots are created when catching the + # exception below + if name in base.plotting_methods: + return self.apply(curried) + + is_transform = name in base.transformation_kernels + + # Transform needs to keep the same schema, including when empty + if is_transform and self._obj_with_exclusions.empty: + return self._obj_with_exclusions + + result = self._python_apply_general( + curried, + self._obj_with_exclusions, + is_transform=is_transform, + not_indexed_same=not is_transform, + ) + + if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1: + missing = self._obj_with_exclusions.columns.difference(result.columns) + if len(missing) > 0: + warn_dropping_nuisance_columns_deprecated( + type(self), name, numeric_only + ) + + if self.grouper.has_dropped_na and is_transform: + # result will have dropped rows due to nans, fill with null + # and ensure index is ordered same as the input + result = self._set_result_index_ordered(result) + return result + + wrapper.__name__ = name + return wrapper + + # ----------------------------------------------------------------- + # Selection + + @final + def _set_group_selection(self) -> None: + """ + Create group based selection. + + Used when selection is not passed directly but instead via a grouper. + + NOTE: this should be paired with a call to _reset_group_selection + """ + # This is a no-op for SeriesGroupBy + grp = self.grouper + if not ( + self.as_index + and grp.groupings is not None + and self.obj.ndim > 1 + and self._group_selection is None + ): + return + + groupers = [g.name for g in grp.groupings if g.level is None and g.in_axis] + + if len(groupers): + # GH12839 clear selected obj cache when group selection changes + ax = self.obj._info_axis + self._group_selection = ax.difference(Index(groupers), sort=False).tolist() + self._reset_cache("_selected_obj") + + @final + def _reset_group_selection(self) -> None: + """ + Clear group based selection. + + Used for methods needing to return info on each group regardless of + whether a group selection was previously set. + """ + if self._group_selection is not None: + # GH12839 clear cached selection too when changing group selection + self._group_selection = None + self._reset_cache("_selected_obj") + + @contextmanager + def _group_selection_context(self) -> Iterator[GroupBy]: + """ + Set / reset the _group_selection_context. + """ + self._set_group_selection() + try: + yield self + finally: + self._reset_group_selection() + + def _iterate_slices(self) -> Iterable[Series]: + raise AbstractMethodError(self) + + # ----------------------------------------------------------------- + # Dispatch/Wrapping + + @final + def _concat_objects( + self, + values, + not_indexed_same: bool = False, + override_group_keys: bool = False, + ): + from pandas.core.reshape.concat import concat + + def reset_identity(values): + # reset the identities of the components + # of the values to prevent aliasing + for v in com.not_none(*values): + ax = v._get_axis(self.axis) + ax._reset_identity() + return values + + if self.group_keys and not override_group_keys: + + values = reset_identity(values) + if self.as_index: + + # possible MI return case + group_keys = self.grouper.result_index + group_levels = self.grouper.levels + group_names = self.grouper.names + + result = concat( + values, + axis=self.axis, + keys=group_keys, + levels=group_levels, + names=group_names, + sort=False, + ) + else: + + # GH5610, returns a MI, with the first level being a + # range index + keys = list(range(len(values))) + result = concat(values, axis=self.axis, keys=keys) + + elif not not_indexed_same: + result = concat(values, axis=self.axis) + + ax = self._selected_obj._get_axis(self.axis) + if self.dropna: + labels = self.grouper.group_info[0] + mask = labels != -1 + ax = ax[mask] + + # this is a very unfortunate situation + # we can't use reindex to restore the original order + # when the ax has duplicates + # so we resort to this + # GH 14776, 30667 + if ax.has_duplicates and not result.axes[self.axis].equals(ax): + target = algorithms.unique1d(ax._values) + indexer, _ = result.index.get_indexer_non_unique(target) + result = result.take(indexer, axis=self.axis) + else: + result = result.reindex(ax, axis=self.axis, copy=False) + + else: + values = reset_identity(values) + result = concat(values, axis=self.axis) + + name = self.obj.name if self.obj.ndim == 1 else self._selection + if isinstance(result, Series) and name is not None: + + result.name = name + + return result + + @final + def _set_result_index_ordered( + self, result: OutputFrameOrSeries + ) -> OutputFrameOrSeries: + # set the result index on the passed values object and + # return the new object, xref 8046 + + obj_axis = self.obj._get_axis(self.axis) + + if self.grouper.is_monotonic and not self.grouper.has_dropped_na: + # shortcut if we have an already ordered grouper + result = result.set_axis(obj_axis, axis=self.axis, copy=False) + return result + + # row order is scrambled => sort the rows by position in original index + original_positions = Index(self.grouper.result_ilocs()) + result = result.set_axis(original_positions, axis=self.axis, copy=False) + result = result.sort_index(axis=self.axis) + if self.grouper.has_dropped_na: + # Add back in any missing rows due to dropna - index here is integral + # with values referring to the row of the input so can use RangeIndex + result = result.reindex(RangeIndex(len(obj_axis)), axis=self.axis) + result = result.set_axis(obj_axis, axis=self.axis, copy=False) + + return result + + def _indexed_output_to_ndframe( + self, result: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: + raise AbstractMethodError(self) + + @final + def _wrap_aggregated_output( + self, + output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike], + qs: npt.NDArray[np.float64] | None = None, + ): + """ + Wraps the output of GroupBy aggregations into the expected result. + + Parameters + ---------- + output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + """ + + if isinstance(output, (Series, DataFrame)): + # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce, + # in which case our columns are already set correctly. + # ATM we do not get here for SeriesGroupBy; when we do, we will + # need to require that result.name already match self.obj.name + result = output + else: + result = self._indexed_output_to_ndframe(output) + + if not self.as_index: + # `not self.as_index` is only relevant for DataFrameGroupBy, + # enforced in __init__ + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + index = Index(range(self.grouper.ngroups)) + + else: + index = self.grouper.result_index + + if qs is not None: + # We get here with len(qs) != 1 and not self.as_index + # in test_pass_args_kwargs + index = _insert_quantile_level(index, qs) + + result.index = index + + if self.axis == 1: + # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy + result = result.T + if result.index.equals(self.obj.index): + # Retain e.g. DatetimeIndex/TimedeltaIndex freq + result.index = self.obj.index.copy() + # TODO: Do this more systematically + + return self._reindex_output(result, qs=qs) + + @final + def _wrap_transformed_output( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: + """ + Wraps the output of GroupBy transformations into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + Series for SeriesGroupBy, DataFrame for DataFrameGroupBy + """ + if isinstance(output, (Series, DataFrame)): + result = output + else: + result = self._indexed_output_to_ndframe(output) + + if self.axis == 1: + # Only relevant for DataFrameGroupBy + result = result.T + result.columns = self.obj.columns + + result.index = self.obj.index + return result + + def _wrap_applied_output( + self, + data, + values: list, + not_indexed_same: bool = False, + override_group_keys: bool = False, + ): + raise AbstractMethodError(self) + + def _resolve_numeric_only( + self, how: str, numeric_only: bool | lib.NoDefault, axis: int + ) -> bool: + """ + Determine subclass-specific default value for 'numeric_only'. + + For SeriesGroupBy we want the default to be False (to match Series behavior). + For DataFrameGroupBy we want it to be True (for backwards-compat). + + Parameters + ---------- + numeric_only : bool or lib.no_default + axis : int + Axis passed to the groupby op (not self.axis). + + Returns + ------- + bool + """ + # GH#41291 + if numeric_only is lib.no_default: + # i.e. not explicitly passed by user + if self.obj.ndim == 2: + # i.e. DataFrameGroupBy + numeric_only = axis != 1 + # GH#42395 GH#43108 GH#43154 + # Regression from 1.2.5 to 1.3 caused object columns to be dropped + if self.axis: + obj = self._obj_with_exclusions.T + else: + obj = self._obj_with_exclusions + check = obj._get_numeric_data() + if len(obj.columns) and not len(check.columns) and not obj.empty: + numeric_only = False + + else: + numeric_only = False + + if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype): + # GH#47500 + warnings.warn( + f"{type(self).__name__}.{how} called with " + f"numeric_only={numeric_only} and dtype {self.obj.dtype}. This will " + "raise a TypeError in a future version of pandas", + category=FutureWarning, + stacklevel=find_stack_level(), + ) + raise NotImplementedError( + f"{type(self).__name__}.{how} does not implement numeric_only" + ) + + return numeric_only + + def _maybe_warn_numeric_only_depr( + self, how: str, result: DataFrame | Series, numeric_only: bool | lib.NoDefault + ) -> None: + """Emit warning on numeric_only behavior deprecation when appropriate. + + Parameters + ---------- + how : str + Groupby kernel name. + result : + Result of the groupby operation. + numeric_only : bool or lib.no_default + Argument as passed by user. + """ + if ( + self._obj_with_exclusions.ndim != 1 + and result.ndim > 1 + and len(result.columns) < len(self._obj_with_exclusions.columns) + ): + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) + + # ----------------------------------------------------------------- + # numba + + @final + def _numba_prep(self, data): + ids, _, ngroups = self.grouper.group_info + sorted_index = get_group_index_sorter(ids, ngroups) + sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False) + + sorted_data = data.take(sorted_index, axis=self.axis).to_numpy() + if len(self.grouper.groupings) > 1: + raise NotImplementedError( + "More than 1 grouping labels are not supported with engine='numba'" + ) + # GH 46867 + index_data = data.index + if isinstance(index_data, MultiIndex): + group_key = self.grouper.groupings[0].name + index_data = index_data.get_level_values(group_key) + sorted_index_data = index_data.take(sorted_index).to_numpy() + + starts, ends = lib.generate_slices(sorted_ids, ngroups) + return ( + starts, + ends, + sorted_index_data, + sorted_data, + ) + + def _numba_agg_general( + self, + func: Callable, + engine_kwargs: dict[str, bool] | None, + *aggregator_args, + ): + """ + Perform groupby with a standard numerical aggregation function (e.g. mean) + with Numba. + """ + if not self.as_index: + raise NotImplementedError( + "as_index=False is not supported. Use .reset_index() instead." + ) + if self.axis == 1: + raise NotImplementedError("axis=1 is not supported.") + + with self._group_selection_context(): + data = self._selected_obj + df = data if data.ndim == 2 else data.to_frame() + starts, ends, sorted_index, sorted_data = self._numba_prep(df) + aggregator = executor.generate_shared_aggregator( + func, **get_jit_arguments(engine_kwargs) + ) + result = aggregator(sorted_data, starts, ends, 0, *aggregator_args) + + index = self.grouper.result_index + if data.ndim == 1: + result_kwargs = {"name": data.name} + result = result.ravel() + else: + result_kwargs = {"columns": data.columns} + return data._constructor(result, index=index, **result_kwargs) + + @final + def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs): + """ + Perform groupby transform routine with the numba engine. + + This routine mimics the data splitting routine of the DataSplitter class + to generate the indices of each group in the sorted data and then passes the + data and indices into a Numba jitted function. + """ + starts, ends, sorted_index, sorted_data = self._numba_prep(data) + numba_.validate_udf(func) + numba_transform_func = numba_.generate_numba_transform_func( + func, **get_jit_arguments(engine_kwargs, kwargs) + ) + result = numba_transform_func( + sorted_data, + sorted_index, + starts, + ends, + len(data.columns), + *args, + ) + # result values needs to be resorted to their original positions since we + # evaluated the data sorted by group + return result.take(np.argsort(sorted_index), axis=0) + + @final + def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs): + """ + Perform groupby aggregation routine with the numba engine. + + This routine mimics the data splitting routine of the DataSplitter class + to generate the indices of each group in the sorted data and then passes the + data and indices into a Numba jitted function. + """ + starts, ends, sorted_index, sorted_data = self._numba_prep(data) + numba_.validate_udf(func) + numba_agg_func = numba_.generate_numba_agg_func( + func, **get_jit_arguments(engine_kwargs, kwargs) + ) + result = numba_agg_func( + sorted_data, + sorted_index, + starts, + ends, + len(data.columns), + *args, + ) + return result + + # ----------------------------------------------------------------- + # apply/agg/transform + + @Appender( + _apply_docs["template"].format( + input="dataframe", examples=_apply_docs["dataframe_examples"] + ) + ) + def apply(self, func, *args, **kwargs) -> NDFrameT: + # GH#50538 + is_np_func = func in com._cython_table and func not in com._builtin_table + orig_func = func + func = com.is_builtin_func(func) + + if isinstance(func, str): + if hasattr(self, func): + res = getattr(self, func) + if callable(res): + return res(*args, **kwargs) + elif args or kwargs: + raise ValueError(f"Cannot pass arguments to property {func}") + return res + + else: + raise TypeError(f"apply func should be callable, not '{func}'") + + elif args or kwargs: + if callable(func): + + @wraps(func) + def f(g): + with np.errstate(all="ignore"): + return func(g, *args, **kwargs) + + elif hasattr(nanops, "nan" + func): + # TODO: should we wrap this in to e.g. _is_builtin_func? + f = getattr(nanops, "nan" + func) + + else: + raise ValueError( + "func must be a callable if args or kwargs are supplied" + ) + else: + + f = func + + # ignore SettingWithCopy here in case the user mutates + with option_context("mode.chained_assignment", None): + try: + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {orig_func} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning( + old_msg, FutureWarning, new_msg + ) if is_np_func else nullcontext(): + result = self._python_apply_general(f, self._selected_obj) + except TypeError: + # gh-20949 + # try again, with .apply acting as a filtering + # operation, by excluding the grouping column + # This would normally not be triggered + # except if the udf is trying an operation that + # fails on *some* columns, e.g. a numeric operation + # on a string grouper column + + with self._group_selection_context(): + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {orig_func} failed on a column. If any error " + f"is raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning( + old_msg, FutureWarning, new_msg + ) if is_np_func else nullcontext(): + return self._python_apply_general(f, self._selected_obj) + + return result + + @final + def _python_apply_general( + self, + f: Callable, + data: DataFrame | Series, + not_indexed_same: bool | None = None, + is_transform: bool = False, + is_agg: bool = False, + ) -> NDFrameT: + """ + Apply function f in python space + + Parameters + ---------- + f : callable + Function to apply + data : Series or DataFrame + Data to apply f to + not_indexed_same: bool, optional + When specified, overrides the value of not_indexed_same. Apply behaves + differently when the result index is equal to the input index, but + this can be coincidental leading to value-dependent behavior. + is_transform : bool, default False + Indicator for whether the function is actually a transform + and should not have group keys prepended. This is used + in _make_wrapper which generates both transforms (e.g. diff) + and non-transforms (e.g. corr) + is_agg : bool, default False + Indicator for whether the function is an aggregation. When the + result is empty, we don't want to warn for this case. + See _GroupBy._python_agg_general. + + Returns + ------- + Series or DataFrame + data after applying f + """ + values, mutated = self.grouper.apply(f, data, self.axis) + if not_indexed_same is None: + not_indexed_same = mutated or self.mutated + override_group_keys = False + + is_empty_agg = is_agg and len(values) == 0 + if (not not_indexed_same and self.group_keys is lib.no_default) and not ( + is_transform or is_empty_agg + ): + # We've detected value-dependent behavior: the result's index depends on + # whether the user's function `f` returned the same index or not. + msg = ( + "Not prepending group keys to the result index of " + "transform-like apply. In the future, the group keys " + "will be included in the index, regardless of whether " + "the applied function returns a like-indexed object.\n" + "To preserve the previous behavior, use\n\n\t" + ">>> .groupby(..., group_keys=False)\n\n" + "To adopt the future behavior and silence this warning, use " + "\n\n\t>>> .groupby(..., group_keys=True)" + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + # We want to behave as if `self.group_keys=False` when reconstructing + # the object. However, we don't want to mutate the stateful GroupBy + # object, so we just override it. + # When this deprecation is enforced then override_group_keys + # may be removed. + override_group_keys = True + + return self._wrap_applied_output( + data, + values, + not_indexed_same, + override_group_keys=is_transform or override_group_keys, + ) + + @final + def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs): + func = com.is_builtin_func(func) + f = lambda x: func(x, *args, **kwargs) + + # iterate through "columns" ex exclusions to populate output dict + output: dict[base.OutputKey, ArrayLike] = {} + + if self.ngroups == 0: + # agg_series below assumes ngroups > 0 + return self._python_apply_general(f, self._selected_obj, is_agg=True) + + for idx, obj in enumerate(self._iterate_slices()): + name = obj.name + + try: + # if this function is invalid for this dtype, we will ignore it. + result = self.grouper.agg_series(obj, f) + except TypeError: + if raise_on_typeerror: + raise + warn_dropping_nuisance_columns_deprecated( + type(self), "agg", numeric_only=False + ) + continue + + key = base.OutputKey(label=name, position=idx) + output[key] = result + + if not output: + return self._python_apply_general(f, self._selected_obj) + + return self._wrap_aggregated_output(output) + + @final + def _agg_general( + self, + numeric_only: bool | lib.NoDefault = True, + min_count: int = -1, + *, + alias: str, + npfunc: Callable, + ): + + with self._group_selection_context(): + # try a cython aggregation if we can + result = self._cython_agg_general( + how=alias, + alt=npfunc, + numeric_only=numeric_only, + min_count=min_count, + ) + return result.__finalize__(self.obj, method="groupby") + + def _agg_py_fallback( + self, values: ArrayLike, ndim: int, alt: Callable + ) -> ArrayLike: + """ + Fallback to pure-python aggregation if _cython_operation raises + NotImplementedError. + """ + # We get here with a) EADtypes and b) object dtype + + if values.ndim == 1: + # For DataFrameGroupBy we only get here with ExtensionArray + ser = Series(values) + else: + # We only get here with values.dtype == object + # TODO: special case not needed with ArrayManager + df = DataFrame(values.T) + # bc we split object blocks in grouped_reduce, we have only 1 col + # otherwise we'd have to worry about block-splitting GH#39329 + assert df.shape[1] == 1 + # Avoid call to self.values that can occur in DataFrame + # reductions; see GH#28949 + ser = df.iloc[:, 0] + + # We do not get here with UDFs, so we know that our dtype + # should always be preserved by the implemented aggregations + # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype? + res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True) + + if isinstance(values, Categorical): + # Because we only get here with known dtype-preserving + # reductions, we cast back to Categorical. + # TODO: if we ever get "rank" working, exclude it here. + res_values = type(values)._from_sequence(res_values, dtype=values.dtype) + + # If we are DataFrameGroupBy and went through a SeriesGroupByPath + # then we need to reshape + # GH#32223 includes case with IntegerArray values, ndarray res_values + # test_groupby_duplicate_columns with object dtype values + return ensure_block_shape(res_values, ndim=ndim) + + @final + def _cython_agg_general( + self, + how: str, + alt: Callable, + numeric_only: bool | lib.NoDefault, + min_count: int = -1, + ignore_failures: bool = True, + **kwargs, + ): + # Note: we never get here with how="ohlc" for DataFrameGroupBy; + # that goes through SeriesGroupBy + numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis=0) + + data = self._get_data_to_aggregate() + is_ser = data.ndim == 1 + + orig_len = len(data) + if numeric_only_bool: + if is_ser and not is_numeric_dtype(self._selected_obj.dtype): + # GH#41291 match Series behavior + kwd_name = "numeric_only" + if how in ["any", "all"]: + kwd_name = "bool_only" + raise NotImplementedError( + f"{type(self).__name__}.{how} does not implement {kwd_name}." + ) + elif not is_ser: + data = data.get_numeric_data(copy=False) + + def array_func(values: ArrayLike) -> ArrayLike: + try: + result = self.grouper._cython_operation( + "aggregate", + values, + how, + axis=data.ndim - 1, + min_count=min_count, + **kwargs, + ) + except NotImplementedError: + # generally if we have numeric_only=False + # and non-applicable functions + # try to python agg + # TODO: shouldn't min_count matter? + result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) + + return result + + # TypeError -> we may have an exception in trying to aggregate + # continue and exclude the block + new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures) + + if not is_ser and len(new_mgr) < orig_len: + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) + + res = self._wrap_agged_manager(new_mgr) + if is_ser: + res.index = self.grouper.result_index + return self._reindex_output(res) + else: + return res + + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ): + raise AbstractMethodError(self) + + @final + def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + + if maybe_use_numba(engine): + # TODO: tests with self._selected_obj.ndim == 1 on DataFrameGroupBy + with self._group_selection_context(): + data = self._selected_obj + df = data if data.ndim == 2 else data.to_frame() + result = self._transform_with_numba( + df, func, *args, engine_kwargs=engine_kwargs, **kwargs + ) + if self.obj.ndim == 2: + return cast(DataFrame, self.obj)._constructor( + result, index=data.index, columns=data.columns + ) + else: + return cast(Series, self.obj)._constructor( + result.ravel(), index=data.index, name=data.name + ) + + # optimized transforms + func = com.get_cython_func(func) or func + + if not isinstance(func, str): + return self._transform_general(func, *args, **kwargs) + + elif func not in base.transform_kernel_allowlist: + msg = f"'{func}' is not a valid function name for transform(name)" + raise ValueError(msg) + elif func in base.cythonized_kernels or func in base.transformation_kernels: + # cythonized transform or canned "agg+broadcast" + return getattr(self, func)(*args, **kwargs) + + else: + # i.e. func in base.reduction_kernels + + # GH#30918 Use _transform_fast only when we know func is an aggregation + # If func is a reduction, we need to broadcast the + # result to the whole group. Compute func result + # and deal with possible broadcasting below. + # Temporarily set observed for dealing with categoricals. + with com.temp_setattr(self, "observed", True): + with com.temp_setattr(self, "as_index", True): + # GH#49834 - result needs groups in the index for + # _wrap_transform_fast_result + result = getattr(self, func)(*args, **kwargs) + + return self._wrap_transform_fast_result(result) + + @final + def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: + """ + Fast transform path for aggregations. + """ + obj = self._obj_with_exclusions + + # for each col, reshape to size of original frame by take operation + ids, _, _ = self.grouper.group_info + result = result.reindex(self.grouper.result_index, axis=self.axis, copy=False) + + if self.obj.ndim == 1: + # i.e. SeriesGroupBy + out = algorithms.take_nd(result._values, ids) + output = obj._constructor(out, index=obj.index, name=obj.name) + else: + # `.size()` gives Series output on DataFrame input, need axis 0 + axis = 0 if result.ndim == 1 else self.axis + # GH#46209 + # Don't convert indices: negative indices need to give rise + # to null values in the result + output = result._take(ids, axis=axis, convert_indices=False) + output = output.set_axis(obj._get_axis(self.axis), axis=axis) + return output + + # ----------------------------------------------------------------- + # Utilities + + @final + def _apply_filter(self, indices, dropna): + if len(indices) == 0: + indices = np.array([], dtype="int64") + else: + indices = np.sort(np.concatenate(indices)) + if dropna: + filtered = self._selected_obj.take(indices, axis=self.axis) + else: + mask = np.empty(len(self._selected_obj.index), dtype=bool) + mask.fill(False) + mask[indices.astype(int)] = True + # mask fails to broadcast when passed to where; broadcast manually. + mask = np.tile(mask, list(self._selected_obj.shape[1:]) + [1]).T + filtered = self._selected_obj.where(mask) # Fill with NaNs. + return filtered + + @final + def _cumcount_array(self, ascending: bool = True) -> np.ndarray: + """ + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Notes + ----- + this is currently implementing sort=False + (though the default is sort=True) for groupby in general + """ + ids, _, ngroups = self.grouper.group_info + sorter = get_group_index_sorter(ids, ngroups) + ids, count = ids[sorter], len(ids) + + if count == 0: + return np.empty(0, dtype=np.int64) + + run = np.r_[True, ids[:-1] != ids[1:]] + rep = np.diff(np.r_[np.nonzero(run)[0], count]) + out = (~run).cumsum() + + if ascending: + out -= np.repeat(out[run], rep) + else: + out = np.repeat(out[np.r_[run[1:], True]], rep) - out + + if self.grouper.has_dropped_na: + out = np.where(ids == -1, np.nan, out.astype(np.float64, copy=False)) + else: + out = out.astype(np.int64, copy=False) + + rev = np.empty(count, dtype=np.intp) + rev[sorter] = np.arange(count, dtype=np.intp) + return out[rev] + + # ----------------------------------------------------------------- + + @final + @property + def _obj_1d_constructor(self) -> Callable: + # GH28330 preserve subclassed Series/DataFrames + if isinstance(self.obj, DataFrame): + return self.obj._constructor_sliced + assert isinstance(self.obj, Series) + return self.obj._constructor + + @final + def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool): + """ + Shared func to call any / all Cython GroupBy implementations. + """ + + def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]: + if is_object_dtype(vals.dtype): + # GH#37501: don't raise on pd.NA when skipna=True + if skipna: + func = np.vectorize( + lambda x: bool(x) if not isna(x) else True, otypes=[bool] + ) + vals = func(vals) + else: + vals = vals.astype(bool, copy=False) + + vals = cast(np.ndarray, vals) + elif isinstance(vals, BaseMaskedArray): + vals = vals._data.astype(bool, copy=False) + else: + vals = vals.astype(bool, copy=False) + + return vals.view(np.int8), bool + + def result_to_bool( + result: np.ndarray, + inference: type, + nullable: bool = False, + ) -> ArrayLike: + if nullable: + return BooleanArray(result.astype(bool, copy=False), result == -1) + else: + return result.astype(inference, copy=False) + + return self._get_cythonized_result( + libgroupby.group_any_all, + numeric_only=False, + cython_dtype=np.dtype(np.int8), + needs_mask=True, + needs_nullable=True, + pre_processing=objs_to_bool, + post_processing=result_to_bool, + val_test=val_test, + skipna=skipna, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def any(self, skipna: bool = True): + """ + Return True if any value in the group is truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + Series or DataFrame + DataFrame or Series of boolean values, where a value is True if any element + is True within its respective group, False otherwise. + """ + return self._bool_agg("any", skipna) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def all(self, skipna: bool = True): + """ + Return True if all values in the group are truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + Series or DataFrame + DataFrame or Series of boolean values, where a value is True if all elements + are True within its respective group, False otherwise. + """ + return self._bool_agg("all", skipna) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def count(self) -> NDFrameT: + """ + Compute count of group, excluding missing values. + + Returns + ------- + Series or DataFrame + Count of values within each group. + """ + data = self._get_data_to_aggregate() + ids, _, ngroups = self.grouper.group_info + mask = ids != -1 + + is_series = data.ndim == 1 + + def hfunc(bvalues: ArrayLike) -> ArrayLike: + # TODO(EA2D): reshape would not be necessary with 2D EAs + if bvalues.ndim == 1: + # EA + masked = mask & ~isna(bvalues).reshape(1, -1) + else: + masked = mask & ~isna(bvalues) + + counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1) + if is_series: + assert counted.ndim == 2 + assert counted.shape[0] == 1 + return counted[0] + return counted + + new_mgr = data.grouped_reduce(hfunc) + + # If we are grouping on categoricals we want unobserved categories to + # return zero, rather than the default of NaN which the reindexing in + # _wrap_agged_manager() returns. GH 35028 + with com.temp_setattr(self, "observed", True): + result = self._wrap_agged_manager(new_mgr) + + if result.ndim == 1: + result.index = self.grouper.result_index + + return self._reindex_output(result, fill_value=0) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def mean( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + engine: str = "cython", + engine_kwargs: dict[str, bool] | None = None, + ): + """ + Compute mean of groups, excluding missing values. + + Parameters + ---------- + numeric_only : bool, default True + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + Returns + ------- + pandas.Series or pandas.DataFrame + %(see_also)s + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + + Groupby one column and return the mean of the remaining columns in + each group. + + >>> df.groupby('A').mean() + B C + A + 1 3.0 1.333333 + 2 4.0 1.500000 + + Groupby two columns and return the mean of the remaining column. + + >>> df.groupby(['A', 'B']).mean() + C + A B + 1 2.0 2.0 + 4.0 1.0 + 2 3.0 1.0 + 5.0 2.0 + + Groupby one column and return the mean of only particular column in + the group. + + >>> df.groupby('A')['B'].mean() + A + 1 3.0 + 2 4.0 + Name: B, dtype: float64 + """ + numeric_only_bool = self._resolve_numeric_only("mean", numeric_only, axis=0) + + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_mean + + return self._numba_agg_general(sliding_mean, engine_kwargs) + else: + result = self._cython_agg_general( + "mean", + alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), + numeric_only=numeric_only, + ) + return result.__finalize__(self.obj, method="groupby") + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): + """ + Compute median of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + numeric_only : bool, default True + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + + Returns + ------- + Series or DataFrame + Median of values within each group. + """ + numeric_only_bool = self._resolve_numeric_only("median", numeric_only, axis=0) + + result = self._cython_agg_general( + "median", + alt=lambda x: Series(x).median(numeric_only=numeric_only_bool), + numeric_only=numeric_only, + ) + return result.__finalize__(self.obj, method="groupby") + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def std( + self, + ddof: int = 1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + numeric_only: bool | lib.NoDefault = lib.no_default, + ): + """ + Compute standard deviation of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series or DataFrame + Standard deviation of values within each group. + """ + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_var + + return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof)) + else: + # Resolve numeric_only so that var doesn't warn + numeric_only_bool = self._resolve_numeric_only("std", numeric_only, axis=0) + if ( + numeric_only_bool + and self.obj.ndim == 1 + and not is_numeric_dtype(self.obj.dtype) + ): + raise TypeError( + f"{type(self).__name__}.std called with " + f"numeric_only={numeric_only} and dtype {self.obj.dtype}" + ) + result = self._get_cythonized_result( + libgroupby.group_var, + cython_dtype=np.dtype(np.float64), + numeric_only=numeric_only_bool, + needs_counts=True, + post_processing=lambda vals, inference: np.sqrt(vals), + ddof=ddof, + ) + self._maybe_warn_numeric_only_depr("std", result, numeric_only) + return result + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def var( + self, + ddof: int = 1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + numeric_only: bool | lib.NoDefault = lib.no_default, + ): + """ + Compute variance of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting + ``compute.use_numba`` + + .. versionadded:: 1.4.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.4.0 + + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series or DataFrame + Variance of values within each group. + """ + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_var + + return self._numba_agg_general(sliding_var, engine_kwargs, ddof) + else: + return self._cython_agg_general( + "var", + alt=lambda x: Series(x).var(ddof=ddof), + numeric_only=numeric_only, + ignore_failures=numeric_only is lib.no_default, + ddof=ddof, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default): + """ + Compute standard error of the mean of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series or DataFrame + Standard error of the mean of values within each group. + """ + # Reolve numeric_only so that std doesn't warn + numeric_only_bool = self._resolve_numeric_only("sem", numeric_only, axis=0) + if ( + numeric_only_bool + and self.obj.ndim == 1 + and not is_numeric_dtype(self.obj.dtype) + ): + raise TypeError( + f"{type(self).__name__}.sem called with " + f"numeric_only={numeric_only} and dtype {self.obj.dtype}" + ) + result = self.std(ddof=ddof, numeric_only=numeric_only_bool) + self._maybe_warn_numeric_only_depr("sem", result, numeric_only) + + if result.ndim == 1: + result /= np.sqrt(self.count()) + else: + cols = result.columns.difference(self.exclusions).unique() + counts = self.count() + result_ilocs = result.columns.get_indexer_for(cols) + count_ilocs = counts.columns.get_indexer_for(cols) + with warnings.catch_warnings(): + # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, + # this catching will be unnecessary + warnings.filterwarnings( + "ignore", ".*will attempt to set the values inplace.*" + ) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + return result + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def size(self) -> DataFrame | Series: + """ + Compute group sizes. + + Returns + ------- + DataFrame or Series + Number of rows in each group as a Series if as_index is True + or a DataFrame if as_index is False. + """ + result = self.grouper.size() + + # GH28330 preserve subclassed Series/DataFrames through calls + if isinstance(self.obj, Series): + result = self._obj_1d_constructor(result, name=self.obj.name) + else: + result = self._obj_1d_constructor(result) + + if not self.as_index: + # error: Incompatible types in assignment (expression has + # type "DataFrame", variable has type "Series") + result = result.rename("size").reset_index() # type: ignore[assignment] + + return self._reindex_output(result, fill_value=0) + + @final + @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) + def sum( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_sum + + return self._numba_agg_general( + sliding_sum, + engine_kwargs, + ) + else: + # If we are grouping on categoricals we want unobserved categories to + # return zero, rather than the default of NaN which the reindexing in + # _agg_general() returns. GH #31422 + with com.temp_setattr(self, "observed", True): + result = self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="sum", + npfunc=np.sum, + ) + + return self._reindex_output(result, fill_value=0) + + @final + @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) + def prod( + self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0 + ): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod + ) + + @final + @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1) + def min( + self, + numeric_only: bool = False, + min_count: int = -1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_agg_general(sliding_min_max, engine_kwargs, False) + else: + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="min", + npfunc=np.min, + ) + + @final + @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1) + def max( + self, + numeric_only: bool = False, + min_count: int = -1, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + ): + if maybe_use_numba(engine): + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_agg_general(sliding_min_max, engine_kwargs, True) + else: + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="max", + npfunc=np.max, + ) + + @final + @Substitution(name="groupby") + def first(self, numeric_only: bool = False, min_count: int = -1): + """ + Compute the first non-null entry of each column. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + min_count : int, default -1 + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + + Returns + ------- + Series or DataFrame + First non-null of values within each group. + + See Also + -------- + DataFrame.groupby : Apply a function groupby to each row or column of a + DataFrame. + DataFrame.core.groupby.GroupBy.last : Compute the last non-null entry of each + column. + DataFrame.core.groupby.GroupBy.nth : Take the nth row from each group. + + Examples + -------- + >>> df = pd.DataFrame(dict(A=[1, 1, 3], B=[None, 5, 6], C=[1, 2, 3], + ... D=['3/11/2000', '3/12/2000', '3/13/2000'])) + >>> df['D'] = pd.to_datetime(df['D']) + >>> df.groupby("A").first() + B C D + A + 1 5.0 1 2000-03-11 + 3 6.0 3 2000-03-13 + >>> df.groupby("A").first(min_count=2) + B C D + A + 1 NaN 1.0 2000-03-11 + 3 NaN NaN NaT + >>> df.groupby("A").first(numeric_only=True) + B C + A + 1 5.0 1 + 3 6.0 3 + """ + + def first_compat(obj: NDFrameT, axis: int = 0): + def first(x: Series): + """Helper function for first item that isn't NA.""" + arr = x.array[notna(x.array)] + if not len(arr): + return np.nan + return arr[0] + + if isinstance(obj, DataFrame): + return obj.apply(first, axis=axis) + elif isinstance(obj, Series): + return first(obj) + else: # pragma: no cover + raise TypeError(type(obj)) + + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="first", + npfunc=first_compat, + ) + + @final + @Substitution(name="groupby") + def last(self, numeric_only: bool = False, min_count: int = -1): + """ + Compute the last non-null entry of each column. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. + min_count : int, default -1 + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + + Returns + ------- + Series or DataFrame + Last non-null of values within each group. + + See Also + -------- + DataFrame.groupby : Apply a function groupby to each row or column of a + DataFrame. + DataFrame.core.groupby.GroupBy.first : Compute the first non-null entry of each + column. + DataFrame.core.groupby.GroupBy.nth : Take the nth row from each group. + + Examples + -------- + >>> df = pd.DataFrame(dict(A=[1, 1, 3], B=[5, None, 6], C=[1, 2, 3])) + >>> df.groupby("A").last() + B C + A + 1 5.0 2 + 3 6.0 3 + """ + + def last_compat(obj: NDFrameT, axis: int = 0): + def last(x: Series): + """Helper function for last item that isn't NA.""" + arr = x.array[notna(x.array)] + if not len(arr): + return np.nan + return arr[-1] + + if isinstance(obj, DataFrame): + return obj.apply(last, axis=axis) + elif isinstance(obj, Series): + return last(obj) + else: # pragma: no cover + raise TypeError(type(obj)) + + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="last", + npfunc=last_compat, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def ohlc(self) -> DataFrame: + """ + Compute open, high, low and close values of a group, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Returns + ------- + DataFrame + Open, high, low and close values within each group. + """ + if self.obj.ndim == 1: + # self._iterate_slices() yields only self._selected_obj + obj = self._selected_obj + + is_numeric = is_numeric_dtype(obj.dtype) + if not is_numeric: + raise DataError("No numeric types to aggregate") + + res_values = self.grouper._cython_operation( + "aggregate", obj._values, "ohlc", axis=0, min_count=-1 + ) + + agg_names = ["open", "high", "low", "close"] + result = self.obj._constructor_expanddim( + res_values, index=self.grouper.result_index, columns=agg_names + ) + return self._reindex_output(result) + + return self._apply_to_column_groupbys( + lambda x: x.ohlc(), self._obj_with_exclusions + ) + + @doc(DataFrame.describe) + def describe(self, **kwargs): + with self._group_selection_context(): + if len(self._selected_obj) == 0: + described = self._selected_obj.describe(**kwargs) + if self._selected_obj.ndim == 1: + result = described + else: + result = described.unstack() + return result.to_frame().T.iloc[:0] + + result = self._python_apply_general( + lambda x: x.describe(**kwargs), + self._selected_obj, + not_indexed_same=True, + ) + if self.axis == 1: + return result.T + return result.unstack() + + @final + def resample(self, rule, *args, **kwargs): + """ + Provide resampling when using a TimeGrouper. + + Given a grouper, the function resamples it according to a string + "string" -> "frequency". + + See the :ref:`frequency aliases ` + documentation for more details. + + Parameters + ---------- + rule : str or DateOffset + The offset string or object representing target grouper conversion. + *args, **kwargs + Possible arguments are `how`, `fill_method`, `limit`, `kind` and + `on`, and other arguments of `TimeGrouper`. + + Returns + ------- + Grouper + Return a new grouper with our resampler appended. + + See Also + -------- + Grouper : Specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. + + Examples + -------- + >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') + >>> df = pd.DataFrame(data=4 * [range(2)], + ... index=idx, + ... columns=['a', 'b']) + >>> df.iloc[2, 0] = 5 + >>> df + a b + 2000-01-01 00:00:00 0 1 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:02:00 5 1 + 2000-01-01 00:03:00 0 1 + + Downsample the DataFrame into 3 minute bins and sum the values of + the timestamps falling into a bin. + + >>> df.groupby('a').resample('3T').sum() + a b + a + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 + + Upsample the series into 30 second bins. + + >>> df.groupby('a').resample('30S').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 + + Resample by month. Values are assigned to the month of the period. + + >>> df.groupby('a').resample('M').sum() + a b + a + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> df.groupby('a').resample('3T', closed='right').sum() + a b + a + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 + + Downsample the series into 3 minute bins and close the right side of + the bin interval, but label each bin using the right edge instead of + the left. + + >>> df.groupby('a').resample('3T', closed='right', label='right').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 + """ + from pandas.core.resample import get_resampler_for_grouping + + return get_resampler_for_grouping(self, rule, *args, **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def rolling(self, *args, **kwargs) -> RollingGroupby: + """ + Return a rolling grouper, providing rolling functionality per group. + """ + from pandas.core.window import RollingGroupby + + return RollingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + _as_index=self.as_index, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def expanding(self, *args, **kwargs) -> ExpandingGroupby: + """ + Return an expanding grouper, providing expanding + functionality per group. + """ + from pandas.core.window import ExpandingGroupby + + return ExpandingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def ewm(self, *args, **kwargs) -> ExponentialMovingWindowGroupby: + """ + Return an ewm grouper, providing ewm functionality per group. + """ + from pandas.core.window import ExponentialMovingWindowGroupby + + return ExponentialMovingWindowGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) + + @final + def _fill(self, direction: Literal["ffill", "bfill"], limit=None): + """ + Shared function for `pad` and `backfill` to call Cython method. + + Parameters + ---------- + direction : {'ffill', 'bfill'} + Direction passed to underlying Cython function. `bfill` will cause + values to be filled backwards. `ffill` and any other values will + default to a forward fill + limit : int, default None + Maximum number of consecutive values to fill. If `None`, this + method will convert to -1 prior to passing to Cython + + Returns + ------- + `Series` or `DataFrame` with filled values + + See Also + -------- + pad : Returns Series with minimum number of char in object. + backfill : Backward fill the missing values in the dataset. + """ + # Need int value for Cython + if limit is None: + limit = -1 + + ids, _, _ = self.grouper.group_info + sorted_labels = np.argsort(ids, kind="mergesort").astype(np.intp, copy=False) + if direction == "bfill": + sorted_labels = sorted_labels[::-1] + + col_func = partial( + libgroupby.group_fillna_indexer, + labels=ids, + sorted_labels=sorted_labels, + direction=direction, + limit=limit, + dropna=self.dropna, + ) + + def blk_func(values: ArrayLike) -> ArrayLike: + mask = isna(values) + if values.ndim == 1: + indexer = np.empty(values.shape, dtype=np.intp) + col_func(out=indexer, mask=mask) + return algorithms.take_nd(values, indexer) + + else: + # We broadcast algorithms.take_nd analogous to + # np.take_along_axis + + # Note: we only get here with backfill/pad, + # so if we have a dtype that cannot hold NAs, + # then there will be no -1s in indexer, so we can use + # the original dtype (no need to ensure_dtype_can_hold_na) + if isinstance(values, np.ndarray): + dtype = values.dtype + if self.grouper.has_dropped_na: + # dropped null groups give rise to nan in the result + dtype = ensure_dtype_can_hold_na(values.dtype) + out = np.empty(values.shape, dtype=dtype) + else: + out = type(values)._empty(values.shape, dtype=values.dtype) + + for i in range(len(values)): + # call group_fillna_indexer column-wise + indexer = np.empty(values.shape[1], dtype=np.intp) + col_func(out=indexer, mask=mask[i]) + out[i, :] = algorithms.take_nd(values[i], indexer) + return out + + obj = self._obj_with_exclusions + if self.axis == 1: + obj = obj.T + mgr = obj._mgr + res_mgr = mgr.apply(blk_func) + + new_obj = obj._constructor(res_mgr) + if isinstance(new_obj, Series): + new_obj.name = obj.name + + return self._wrap_transformed_output(new_obj) + + @final + @Substitution(name="groupby") + def ffill(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.ffill: Returns Series with minimum number of char in object. + DataFrame.ffill: Object with missing values filled or None if inplace=True. + Series.fillna: Fill NaN values of a Series. + DataFrame.fillna: Fill NaN values of a DataFrame. + """ + return self._fill("ffill", limit=limit) + + def pad(self, limit=None): + """ + Forward fill the values. + + .. deprecated:: 1.4 + Use ffill instead. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + """ + warnings.warn( + "pad is deprecated and will be removed in a future version. " + "Use ffill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.ffill(limit=limit) + + @final + @Substitution(name="groupby") + def bfill(self, limit=None): + """ + Backward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.bfill : Backward fill the missing values in the dataset. + DataFrame.bfill: Backward fill the missing values in the dataset. + Series.fillna: Fill NaN values of a Series. + DataFrame.fillna: Fill NaN values of a DataFrame. + """ + return self._fill("bfill", limit=limit) + + def backfill(self, limit=None): + """ + Backward fill the values. + + .. deprecated:: 1.4 + Use bfill instead. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + """ + warnings.warn( + "backfill is deprecated and will be removed in a future version. " + "Use bfill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.bfill(limit=limit) + + # https://github.com/python/mypy/issues/1362 + # Mypy does not support decorated properties + @final # type: ignore[misc] + @property + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def nth(self) -> GroupByNthSelector: + """ + Take the nth row from each group if n is an int, otherwise a subset of rows. + + Can be either a call or an index. dropna is not available with index notation. + Index notation accepts a comma separated list of integers and slices. + + If dropna, will take the nth non-null row, dropna is either + 'all' or 'any'; this is equivalent to calling dropna(how=dropna) + before the groupby. + + Parameters + ---------- + n : int, slice or list of ints and slices + A single nth value for the row or a list of nth values or slices. + + .. versionchanged:: 1.4.0 + Added slice and lists containing slices. + Added index notation. + + dropna : {'any', 'all', None}, default None + Apply the specified dropna operation before counting which row is + the nth row. Only supported if n is an int. + + Returns + ------- + Series or DataFrame + N-th value within each group. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) + >>> g = df.groupby('A') + >>> g.nth(0) + B + A + 1 NaN + 2 3.0 + >>> g.nth(1) + B + A + 1 2.0 + 2 5.0 + >>> g.nth(-1) + B + A + 1 4.0 + 2 5.0 + >>> g.nth([0, 1]) + B + A + 1 NaN + 1 2.0 + 2 3.0 + 2 5.0 + >>> g.nth(slice(None, -1)) + B + A + 1 NaN + 1 2.0 + 2 3.0 + + Index notation may also be used + + >>> g.nth[0, 1] + B + A + 1 NaN + 1 2.0 + 2 3.0 + 2 5.0 + >>> g.nth[:-1] + B + A + 1 NaN + 1 2.0 + 2 3.0 + + Specifying `dropna` allows count ignoring ``NaN`` + + >>> g.nth(0, dropna='any') + B + A + 1 2.0 + 2 3.0 + + NaNs denote group exhausted when using dropna + + >>> g.nth(3, dropna='any') + B + A + 1 NaN + 2 NaN + + Specifying `as_index=False` in `groupby` keeps the original index. + + >>> df.groupby('A', as_index=False).nth(1) + A B + 1 1 2.0 + 4 2 5.0 + """ + return GroupByNthSelector(self) + + def _nth( + self, + n: PositionalIndexer | tuple, + dropna: Literal["any", "all", None] = None, + ) -> NDFrameT: + if not dropna: + with self._group_selection_context(): + mask = self._make_mask_from_positional_indexer(n) + + ids, _, _ = self.grouper.group_info + + # Drop NA values in grouping + mask = mask & (ids != -1) + + out = self._mask_selected_obj(mask) + if not self.as_index: + return out + + result_index = self.grouper.result_index + if self.axis == 0: + out.index = result_index[ids[mask]] + if not self.observed and isinstance(result_index, CategoricalIndex): + out = out.reindex(result_index) + + out = self._reindex_output(out) + else: + out.columns = result_index[ids[mask]] + + return out.sort_index(axis=self.axis) if self.sort else out + + # dropna is truthy + if not is_integer(n): + raise ValueError("dropna option only supported for an integer argument") + + if dropna not in ["any", "all"]: + # Note: when agg-ing picker doesn't raise this, just returns NaN + raise ValueError( + "For a DataFrame or Series groupby.nth, dropna must be " + "either None, 'any' or 'all', " + f"(was passed {dropna})." + ) + + # old behaviour, but with all and any support for DataFrames. + # modified in GH 7559 to have better perf + n = cast(int, n) + max_len = n if n >= 0 else -1 - n + dropped = self.obj.dropna(how=dropna, axis=self.axis) + + # get a new grouper for our dropped obj + if self.keys is None and self.level is None: + + # we don't have the grouper info available + # (e.g. we have selected out + # a column that is not in the current object) + axis = self.grouper.axis + grouper = axis[axis.isin(dropped.index)] + + else: + + # create a grouper with the original parameters, but on dropped + # object + from pandas.core.groupby.grouper import get_grouper + + grouper, _, _ = get_grouper( + dropped, + key=self.keys, + axis=self.axis, + level=self.level, + sort=self.sort, + mutated=self.mutated, + ) + + grb = dropped.groupby( + grouper, as_index=self.as_index, sort=self.sort, axis=self.axis + ) + sizes, result = grb.size(), grb.nth(n) + mask = (sizes < max_len)._values + + # set the results which don't meet the criteria + if len(result) and mask.any(): + result.loc[mask] = np.nan + + # reset/reindex to the original groups + if len(self.obj) == len(dropped) or len(result) == len( + self.grouper.result_index + ): + result.index = self.grouper.result_index + else: + result = result.reindex(self.grouper.result_index) + + return result + + @final + def quantile( + self, + q=0.5, + interpolation: str = "linear", + numeric_only: bool | lib.NoDefault = lib.no_default, + ): + """ + Return group values at the given quantile, a la numpy.percentile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value(s) between 0 and 1 providing the quantile(s) to compute. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + Method to use when the desired quantile falls between two points. + numeric_only : bool, default True + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series or DataFrame + Return type determined by caller of GroupBy object. + + See Also + -------- + Series.quantile : Similar method for Series. + DataFrame.quantile : Similar method for DataFrame. + numpy.percentile : NumPy method to compute qth percentile. + + Examples + -------- + >>> df = pd.DataFrame([ + ... ['a', 1], ['a', 2], ['a', 3], + ... ['b', 1], ['b', 3], ['b', 5] + ... ], columns=['key', 'val']) + >>> df.groupby('key').quantile() + val + key + a 2.0 + b 3.0 + """ + numeric_only_bool = self._resolve_numeric_only("quantile", numeric_only, axis=0) + if ( + numeric_only_bool + and self.obj.ndim == 1 + and not is_numeric_dtype(self.obj.dtype) + ): + raise TypeError( + f"{type(self).__name__}.quantile called with " + f"numeric_only={numeric_only} and dtype {self.obj.dtype}" + ) + + def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: + if is_object_dtype(vals): + raise TypeError( + "'quantile' cannot be performed against 'object' dtypes!" + ) + + inference: np.dtype | None = None + if is_integer_dtype(vals.dtype): + if isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = vals + inference = np.dtype(np.int64) + elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) + elif is_datetime64_dtype(vals.dtype): + inference = np.dtype("datetime64[ns]") + out = np.asarray(vals).astype(float) + elif is_timedelta64_dtype(vals.dtype): + inference = np.dtype("timedelta64[ns]") + out = np.asarray(vals).astype(float) + elif isinstance(vals, ExtensionArray) and is_float_dtype(vals): + inference = np.dtype(np.float64) + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = np.asarray(vals) + + return out, inference + + def post_processor(vals: np.ndarray, inference: np.dtype | None) -> np.ndarray: + if inference: + # Check for edge case + if not ( + is_integer_dtype(inference) + and interpolation in {"linear", "midpoint"} + ): + vals = vals.astype(inference) + + return vals + + orig_scalar = is_scalar(q) + if orig_scalar: + q = [q] + + qs = np.array(q, dtype=np.float64) + ids, _, ngroups = self.grouper.group_info + nqs = len(qs) + + func = partial( + libgroupby.group_quantile, labels=ids, qs=qs, interpolation=interpolation + ) + + # Put '-1' (NaN) labels as the last group so it does not interfere + # with the calculations. Note: length check avoids failure on empty + # labels. In that case, the value doesn't matter + na_label_for_sorting = ids.max() + 1 if len(ids) > 0 else 0 + labels_for_lexsort = np.where(ids == -1, na_label_for_sorting, ids) + + def blk_func(values: ArrayLike) -> ArrayLike: + mask = isna(values) + vals, inference = pre_processor(values) + + ncols = 1 + if vals.ndim == 2: + ncols = vals.shape[0] + shaped_labels = np.broadcast_to( + labels_for_lexsort, (ncols, len(labels_for_lexsort)) + ) + else: + shaped_labels = labels_for_lexsort + + out = np.empty((ncols, ngroups, nqs), dtype=np.float64) + + # Get an index of values sorted by values and then labels + order = (vals, shaped_labels) + sort_arr = np.lexsort(order).astype(np.intp, copy=False) + + if vals.ndim == 1: + func(out[0], values=vals, mask=mask, sort_indexer=sort_arr) + else: + for i in range(ncols): + func(out[i], values=vals[i], mask=mask[i], sort_indexer=sort_arr[i]) + + if vals.ndim == 1: + out = out.ravel("K") + else: + out = out.reshape(ncols, ngroups * nqs) + return post_processor(out, inference) + + obj = self._obj_with_exclusions + is_ser = obj.ndim == 1 + mgr = self._get_data_to_aggregate() + data = mgr.get_numeric_data() if numeric_only_bool else mgr + ignore_failures = numeric_only_bool + res_mgr = data.grouped_reduce(blk_func, ignore_failures=ignore_failures) + + if ( + numeric_only is lib.no_default + and not is_ser + and len(res_mgr.items) != len(mgr.items) + ): + warn_dropping_nuisance_columns_deprecated( + type(self), "quantile", numeric_only + ) + + if len(res_mgr.items) == 0: + # re-call grouped_reduce to get the desired exception message + mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) + + if is_ser: + res = self._wrap_agged_manager(res_mgr) + else: + res = obj._constructor(res_mgr) + + if orig_scalar: + # Avoid expensive MultiIndex construction + return self._wrap_aggregated_output(res) + return self._wrap_aggregated_output(res, qs=qs) + + @final + @Substitution(name="groupby") + def ngroup(self, ascending: bool = True): + """ + Number each group from 0 to the number of groups - 1. + + This is the enumerative complement of cumcount. Note that the + numbers given to the groups match the order in which the groups + would be seen when iterating over the groupby object, not the + order they are first observed. + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from number of group - 1 to 0. + + Returns + ------- + Series + Unique numbers for each group. + + See Also + -------- + .cumcount : Number the rows in each group. + + Examples + -------- + >>> df = pd.DataFrame({"A": list("aaabba")}) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').ngroup() + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 0 + dtype: int64 + >>> df.groupby('A').ngroup(ascending=False) + 0 1 + 1 1 + 2 1 + 3 0 + 4 0 + 5 1 + dtype: int64 + >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() + 0 0 + 1 0 + 2 1 + 3 3 + 4 2 + 5 0 + dtype: int64 + """ + with self._group_selection_context(): + index = self._selected_obj.index + comp_ids = self.grouper.group_info[0] + + dtype: type + if self.grouper.has_dropped_na: + comp_ids = np.where(comp_ids == -1, np.nan, comp_ids) + dtype = np.float64 + else: + dtype = np.int64 + + result = self._obj_1d_constructor(comp_ids, index, dtype=dtype) + if not ascending: + result = self.ngroups - 1 - result + return result + + @final + @Substitution(name="groupby") + def cumcount(self, ascending: bool = True): + """ + Number each item in each group from 0 to the length of that group - 1. + + Essentially this is equivalent to + + .. code-block:: python + + self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Returns + ------- + Series + Sequence number of each element within each group. + + See Also + -------- + .ngroup : Number the groups themselves. + + Examples + -------- + >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').cumcount() + 0 0 + 1 1 + 2 2 + 3 0 + 4 1 + 5 3 + dtype: int64 + >>> df.groupby('A').cumcount(ascending=False) + 0 3 + 1 2 + 2 1 + 3 1 + 4 0 + 5 0 + dtype: int64 + """ + with self._group_selection_context(): + index = self._selected_obj._get_axis(self.axis) + cumcounts = self._cumcount_array(ascending=ascending) + return self._obj_1d_constructor(cumcounts, index) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def rank( + self, + method: str = "average", + ascending: bool = True, + na_option: str = "keep", + pct: bool = False, + axis: int = 0, + ) -> NDFrameT: + """ + Provide the rank of values within each group. + + Parameters + ---------- + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + * average: average rank of group. + * min: lowest rank in group. + * max: highest rank in group. + * first: ranks assigned in order they appear in the array. + * dense: like 'min', but rank always increases by 1 between groups. + ascending : bool, default True + False for ranks by high (1) to low (N). + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are. + * top: smallest rank if ascending. + * bottom: smallest rank if descending. + pct : bool, default False + Compute percentage rank of data within each group. + axis : int, default 0 + The axis of the object over which to compute the rank. + + Returns + ------- + DataFrame with ranking of values within each group + %(see_also)s + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], + ... "value": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5], + ... } + ... ) + >>> df + group value + 0 a 2 + 1 a 4 + 2 a 2 + 3 a 3 + 4 a 5 + 5 b 1 + 6 b 2 + 7 b 4 + 8 b 1 + 9 b 5 + >>> for method in ['average', 'min', 'max', 'dense', 'first']: + ... df[f'{method}_rank'] = df.groupby('group')['value'].rank(method) + >>> df + group value average_rank min_rank max_rank dense_rank first_rank + 0 a 2 1.5 1.0 2.0 1.0 1.0 + 1 a 4 4.0 4.0 4.0 3.0 4.0 + 2 a 2 1.5 1.0 2.0 1.0 2.0 + 3 a 3 3.0 3.0 3.0 2.0 3.0 + 4 a 5 5.0 5.0 5.0 4.0 5.0 + 5 b 1 1.5 1.0 2.0 1.0 1.0 + 6 b 2 3.0 3.0 3.0 2.0 3.0 + 7 b 4 4.0 4.0 4.0 3.0 4.0 + 8 b 1 1.5 1.0 2.0 1.0 2.0 + 9 b 5 5.0 5.0 5.0 4.0 5.0 + """ + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + + kwargs = { + "ties_method": method, + "ascending": ascending, + "na_option": na_option, + "pct": pct, + } + if axis != 0: + # DataFrame uses different keyword name + kwargs["method"] = kwargs.pop("ties_method") + f = lambda x: x.rank(axis=axis, numeric_only=False, **kwargs) + result = self._python_apply_general( + f, self._selected_obj, is_transform=True + ) + return result + + return self._cython_transform( + "rank", + numeric_only=False, + axis=axis, + **kwargs, + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumprod(self, axis=0, *args, **kwargs) -> NDFrameT: + """ + Cumulative product for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + f = lambda x: x.cumprod(axis=axis, **kwargs) + return self._python_apply_general(f, self._selected_obj, is_transform=True) + + return self._cython_transform("cumprod", **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumsum(self, axis=0, *args, **kwargs) -> NDFrameT: + """ + Cumulative sum for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + f = lambda x: x.cumsum(axis=axis, **kwargs) + return self._python_apply_general(f, self._selected_obj, is_transform=True) + + return self._cython_transform("cumsum", **kwargs) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummin(self, axis=0, numeric_only=False, **kwargs) -> NDFrameT: + """ + Cumulative min for each group. + + Returns + ------- + Series or DataFrame + """ + skipna = kwargs.get("skipna", True) + if axis != 0: + f = lambda x: np.minimum.accumulate(x, axis) + numeric_only_bool = self._resolve_numeric_only("cummax", numeric_only, axis) + obj = self._selected_obj + if numeric_only_bool: + obj = obj._get_numeric_data() + return self._python_apply_general(f, obj, is_transform=True) + + return self._cython_transform( + "cummin", numeric_only=numeric_only, skipna=skipna + ) + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummax(self, axis=0, numeric_only=False, **kwargs) -> NDFrameT: + """ + Cumulative max for each group. + + Returns + ------- + Series or DataFrame + """ + skipna = kwargs.get("skipna", True) + if axis != 0: + f = lambda x: np.maximum.accumulate(x, axis) + numeric_only_bool = self._resolve_numeric_only("cummax", numeric_only, axis) + obj = self._selected_obj + if numeric_only_bool: + obj = obj._get_numeric_data() + return self._python_apply_general(f, obj, is_transform=True) + + return self._cython_transform( + "cummax", numeric_only=numeric_only, skipna=skipna + ) + + @final + def _get_cythonized_result( + self, + base_func: Callable, + cython_dtype: np.dtype, + numeric_only: bool | lib.NoDefault = lib.no_default, + needs_counts: bool = False, + needs_nullable: bool = False, + needs_mask: bool = False, + pre_processing=None, + post_processing=None, + **kwargs, + ): + """ + Get result for Cythonized functions. + + Parameters + ---------- + base_func : callable, Cythonized function to be called + cython_dtype : np.dtype + Type of the array that will be modified by the Cython call. + numeric_only : bool, default True + Whether only numeric datatypes should be computed + needs_counts : bool, default False + Whether the counts should be a part of the Cython call + needs_mask : bool, default False + Whether boolean mask needs to be part of the Cython call + signature + needs_nullable : bool, default False + Whether a bool specifying if the input is nullable is part + of the Cython call signature + pre_processing : function, default None + Function to be applied to `values` prior to passing to Cython. + Function should return a tuple where the first element is the + values to be passed to Cython and the second element is an optional + type which the values should be converted to after being returned + by the Cython operation. This function is also responsible for + raising a TypeError if the values have an invalid type. Raises + if `needs_values` is False. + post_processing : function, default None + Function to be applied to result of Cython function. Should accept + an array of values as the first argument and type inferences as its + second argument, i.e. the signature should be + (ndarray, Type). If `needs_nullable=True`, a third argument should be + `nullable`, to allow for processing specific to nullable values. + **kwargs : dict + Extra arguments to be passed back to Cython funcs + + Returns + ------- + `Series` or `DataFrame` with filled values + """ + how = base_func.__name__ + numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis=0) + + if post_processing and not callable(post_processing): + raise ValueError("'post_processing' must be a callable!") + if pre_processing and not callable(pre_processing): + raise ValueError("'pre_processing' must be a callable!") + + grouper = self.grouper + + ids, _, ngroups = grouper.group_info + + base_func = partial(base_func, labels=ids) + + def blk_func(values: ArrayLike) -> ArrayLike: + values = values.T + ncols = 1 if values.ndim == 1 else values.shape[1] + + result: ArrayLike + result = np.zeros(ngroups * ncols, dtype=cython_dtype) + result = result.reshape((ngroups, ncols)) + + func = partial(base_func, out=result) + + inferences = None + + if needs_counts: + counts = np.zeros(self.ngroups, dtype=np.int64) + func = partial(func, counts=counts) + + vals = values + if pre_processing: + vals, inferences = pre_processing(vals) + + vals = vals.astype(cython_dtype, copy=False) + if vals.ndim == 1: + vals = vals.reshape((-1, 1)) + func = partial(func, values=vals) + + if needs_mask: + mask = isna(values).view(np.uint8) + if mask.ndim == 1: + mask = mask.reshape(-1, 1) + func = partial(func, mask=mask) + + if needs_nullable: + is_nullable = isinstance(values, BaseMaskedArray) + func = partial(func, nullable=is_nullable) + + func(**kwargs) # Call func to modify indexer values in place + + if values.ndim == 1: + assert result.shape[1] == 1, result.shape + result = result[:, 0] + + if post_processing: + pp_kwargs = {} + if needs_nullable: + pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray) + + result = post_processing(result, inferences, **pp_kwargs) + + return result.T + + obj = self._obj_with_exclusions + + # Operate block-wise instead of column-by-column + is_ser = obj.ndim == 1 + mgr = self._get_data_to_aggregate() + orig_mgr_len = len(mgr) + + if numeric_only_bool: + mgr = mgr.get_numeric_data() + + res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) + + if not is_ser and len(res_mgr.items) != orig_mgr_len: + howstr = how.replace("group_", "") + warn_dropping_nuisance_columns_deprecated(type(self), howstr, numeric_only) + + if len(res_mgr.items) == 0: + # We re-call grouped_reduce to get the right exception message + mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) + + if is_ser: + out = self._wrap_agged_manager(res_mgr) + else: + out = obj._constructor(res_mgr) + + return self._wrap_aggregated_output(out) + + @final + @Substitution(name="groupby") + def shift(self, periods=1, freq=None, axis=0, fill_value=None): + """ + Shift each group by periods observations. + + If freq is passed, the index will be increased using the periods and the freq. + + Parameters + ---------- + periods : int, default 1 + Number of periods to shift. + freq : str, optional + Frequency string. + axis : axis to shift, default 0 + Shift direction. + fill_value : optional + The scalar value to use for newly introduced missing values. + + Returns + ------- + Series or DataFrame + Object shifted within each group. + + See Also + -------- + Index.shift : Shift values of Index. + tshift : Shift the time index, using the index’s frequency + if available. + """ + if freq is not None or axis != 0: + f = lambda x: x.shift(periods, freq, axis, fill_value) + return self._python_apply_general(f, self._selected_obj, is_transform=True) + + ids, _, ngroups = self.grouper.group_info + res_indexer = np.zeros(len(ids), dtype=np.int64) + + libgroupby.group_shift_indexer(res_indexer, ids, ngroups, periods) + + obj = self._obj_with_exclusions + + res = obj._reindex_with_indexers( + {self.axis: (obj.axes[self.axis], res_indexer)}, + fill_value=fill_value, + allow_dups=True, + ) + return res + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def diff(self, periods: int = 1, axis: int = 0) -> NDFrameT: + """ + First discrete difference of element. + + Calculates the difference of each element compared with another + element in the group (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative values. + axis : axis to shift, default 0 + Take difference over rows (0) or columns (1). + + Returns + ------- + Series or DataFrame + First differences. + """ + if axis != 0: + return self.apply(lambda x: x.diff(periods=periods, axis=axis)) + + obj = self._obj_with_exclusions + shifted = self.shift(periods=periods, axis=axis) + + # GH45562 - to retain existing behavior and match behavior of Series.diff(), + # int8 and int16 are coerced to float32 rather than float64. + dtypes_to_f32 = ["int8", "int16"] + if obj.ndim == 1: + if obj.dtype in dtypes_to_f32: + shifted = shifted.astype("float32") + else: + to_coerce = [c for c, dtype in obj.dtypes.items() if dtype in dtypes_to_f32] + if len(to_coerce): + shifted = shifted.astype({c: "float32" for c in to_coerce}) + + return obj - shifted + + @final + @Substitution(name="groupby") + @Appender(_common_see_also) + def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis=0): + """ + Calculate pct_change of each value to previous entry in group. + + Returns + ------- + Series or DataFrame + Percentage changes within each group. + """ + # TODO(GH#23918): Remove this conditional for SeriesGroupBy when + # GH#23918 is fixed + if freq is not None or axis != 0: + f = lambda x: x.pct_change( + periods=periods, + fill_method=fill_method, + limit=limit, + freq=freq, + axis=axis, + ) + return self._python_apply_general(f, self._selected_obj, is_transform=True) + + if fill_method is None: # GH30463 + fill_method = "ffill" + limit = 0 + filled = getattr(self, fill_method)(limit=limit) + fill_grp = filled.groupby( + self.grouper.codes, axis=self.axis, group_keys=self.group_keys + ) + shifted = fill_grp.shift(periods=periods, freq=freq, axis=self.axis) + return (filled / shifted) - 1 + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def head(self, n: int = 5) -> NDFrameT: + """ + Return first n rows of each group. + + Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Parameters + ---------- + n : int + If positive: number of entries to include from start of each group. + If negative: number of entries to exclude from end of each group. + + Returns + ------- + Series or DataFrame + Subset of original Series or DataFrame as determined by n. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], + ... columns=['A', 'B']) + >>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6 + >>> df.groupby('A').head(-1) + A B + 0 1 2 + """ + self._reset_group_selection() + mask = self._make_mask_from_positional_indexer(slice(None, n)) + return self._mask_selected_obj(mask) + + @final + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def tail(self, n: int = 5) -> NDFrameT: + """ + Return last n rows of each group. + + Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Parameters + ---------- + n : int + If positive: number of entries to include from end of each group. + If negative: number of entries to exclude from start of each group. + + Returns + ------- + Series or DataFrame + Subset of original Series or DataFrame as determined by n. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], + ... columns=['A', 'B']) + >>> df.groupby('A').tail(1) + A B + 1 a 2 + 3 b 2 + >>> df.groupby('A').tail(-1) + A B + 1 a 2 + 3 b 2 + """ + self._reset_group_selection() + if n: + mask = self._make_mask_from_positional_indexer(slice(-n, None)) + else: + mask = self._make_mask_from_positional_indexer([]) + + return self._mask_selected_obj(mask) + + @final + def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT: + """ + Return _selected_obj with mask applied to the correct axis. + + Parameters + ---------- + mask : np.ndarray[bool] + Boolean mask to apply. + + Returns + ------- + Series or DataFrame + Filtered _selected_obj. + """ + ids = self.grouper.group_info[0] + mask = mask & (ids != -1) + + if self.axis == 0: + return self._selected_obj[mask] + else: + return self._selected_obj.iloc[:, mask] + + @final + def _reindex_output( + self, + output: OutputFrameOrSeries, + fill_value: Scalar = np.NaN, + qs: npt.NDArray[np.float64] | None = None, + ) -> OutputFrameOrSeries: + """ + If we have categorical groupers, then we might want to make sure that + we have a fully re-indexed output to the levels. This means expanding + the output space to accommodate all values in the cartesian product of + our groups, regardless of whether they were observed in the data or + not. This will expand the output space if there are missing groups. + + The method returns early without modifying the input if the number of + groupings is less than 2, self.observed == True or none of the groupers + are categorical. + + Parameters + ---------- + output : Series or DataFrame + Object resulting from grouping and applying an operation. + fill_value : scalar, default np.NaN + Value to use for unobserved categories if self.observed is False. + qs : np.ndarray[float64] or None, default None + quantile values, only relevant for quantile. + + Returns + ------- + Series or DataFrame + Object (potentially) re-indexed to include all possible groups. + """ + groupings = self.grouper.groupings + if len(groupings) == 1: + return output + + # if we only care about the observed values + # we are done + elif self.observed: + return output + + # reindexing only applies to a Categorical grouper + elif not any( + isinstance(ping.grouping_vector, (Categorical, CategoricalIndex)) + for ping in groupings + ): + return output + + levels_list = [ping.group_index for ping in groupings] + names = self.grouper.names + if qs is not None: + # error: Argument 1 to "append" of "list" has incompatible type + # "ndarray[Any, dtype[floating[_64Bit]]]"; expected "Index" + levels_list.append(qs) # type: ignore[arg-type] + names = names + [None] + index, _ = MultiIndex.from_product(levels_list, names=names).sortlevel() + + if self.as_index: + # Always holds for SeriesGroupBy unless GH#36507 is implemented + d = { + self.obj._get_axis_name(self.axis): index, + "copy": False, + "fill_value": fill_value, + } + return output.reindex(**d) + + # GH 13204 + # Here, the categorical in-axis groupers, which need to be fully + # expanded, are columns in `output`. An idea is to do: + # output = output.set_index(self.grouper.names) + # .reindex(index).reset_index() + # but special care has to be taken because of possible not-in-axis + # groupers. + # So, we manually select and drop the in-axis grouper columns, + # reindex `output`, and then reset the in-axis grouper columns. + + # Select in-axis groupers + in_axis_grps = ( + (i, ping.name) for (i, ping) in enumerate(groupings) if ping.in_axis + ) + g_nums, g_names = zip(*in_axis_grps) + + output = output.drop(labels=list(g_names), axis=1) + + # Set a temp index and reindex (possibly expanding) + output = output.set_index(self.grouper.result_index).reindex( + index, copy=False, fill_value=fill_value + ) + + # Reset in-axis grouper columns + # (using level numbers `g_nums` because level names may not be unique) + output = output.reset_index(level=g_nums) + + return output.reset_index(drop=True) + + @final + def sample( + self, + n: int | None = None, + frac: float | None = None, + replace: bool = False, + weights: Sequence | Series | None = None, + random_state: RandomState | None = None, + ): + """ + Return a random sample of items from each group. + + You can use `random_state` for reproducibility. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + n : int, optional + Number of items to return for each group. Cannot be used with + `frac` and must be no larger than the smallest group unless + `replace` is True. Default is one if `frac` is None. + frac : float, optional + Fraction of items to return. Cannot be used with `n`. + replace : bool, default False + Allow or disallow sampling of the same row more than once. + weights : list-like, optional + Default None results in equal probability weighting. + If passed a list-like then values must have the same length as + the underlying DataFrame or Series object and will be used as + sampling probabilities after normalization within each group. + Values must be non-negative with at least one positive element + within each group. + random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional + If int, array-like, or BitGenerator, seed for random number generator. + If np.random.RandomState or np.random.Generator, use as given. + + .. versionchanged:: 1.4.0 + + np.random.Generator objects now accepted + + Returns + ------- + Series or DataFrame + A new object of same type as caller containing items randomly + sampled within each group from the caller object. + + See Also + -------- + DataFrame.sample: Generate random samples from a DataFrame object. + numpy.random.choice: Generate a random sample from a given 1-D numpy + array. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"a": ["red"] * 2 + ["blue"] * 2 + ["black"] * 2, "b": range(6)} + ... ) + >>> df + a b + 0 red 0 + 1 red 1 + 2 blue 2 + 3 blue 3 + 4 black 4 + 5 black 5 + + Select one row at random for each distinct value in column a. The + `random_state` argument can be used to guarantee reproducibility: + + >>> df.groupby("a").sample(n=1, random_state=1) + a b + 4 black 4 + 2 blue 2 + 1 red 1 + + Set `frac` to sample fixed proportions rather than counts: + + >>> df.groupby("a")["b"].sample(frac=0.5, random_state=2) + 5 5 + 2 2 + 0 0 + Name: b, dtype: int64 + + Control sample probabilities within groups by setting weights: + + >>> df.groupby("a").sample( + ... n=1, + ... weights=[1, 1, 1, 0, 0, 1], + ... random_state=1, + ... ) + a b + 5 black 5 + 2 blue 2 + 0 red 0 + """ # noqa:E501 + size = sample.process_sampling_size(n, frac, replace) + if weights is not None: + weights_arr = sample.preprocess_weights( + self._selected_obj, weights, axis=self.axis + ) + + random_state = com.random_state(random_state) + + group_iterator = self.grouper.get_iterator(self._selected_obj, self.axis) + + sampled_indices = [] + for labels, obj in group_iterator: + grp_indices = self.indices[labels] + group_size = len(grp_indices) + if size is not None: + sample_size = size + else: + assert frac is not None + sample_size = round(frac * group_size) + + grp_sample = sample.sample( + group_size, + size=sample_size, + replace=replace, + weights=None if weights is None else weights_arr[grp_indices], + random_state=random_state, + ) + sampled_indices.append(grp_indices[grp_sample]) + + sampled_indices = np.concatenate(sampled_indices) + return self._selected_obj.take(sampled_indices, axis=self.axis) + + +@doc(GroupBy) +def get_groupby( + obj: NDFrame, + by: _KeysArgType | None = None, + axis: int = 0, + level=None, + grouper: ops.BaseGrouper | None = None, + exclusions=None, + selection=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool | lib.NoDefault = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, + dropna: bool = True, +) -> GroupBy: + + klass: type[GroupBy] + if isinstance(obj, Series): + from pandas.core.groupby.generic import SeriesGroupBy + + klass = SeriesGroupBy + elif isinstance(obj, DataFrame): + from pandas.core.groupby.generic import DataFrameGroupBy + + klass = DataFrameGroupBy + else: # pragma: no cover + raise TypeError(f"invalid type: {obj}") + + return klass( + obj=obj, + keys=by, + axis=axis, + level=level, + grouper=grouper, + exclusions=exclusions, + selection=selection, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + mutated=mutated, + dropna=dropna, + ) + + +def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiIndex: + """ + Insert the sequence 'qs' of quantiles as the inner-most level of a MultiIndex. + + The quantile level in the MultiIndex is a repeated copy of 'qs'. + + Parameters + ---------- + idx : Index + qs : np.ndarray[float64] + + Returns + ------- + MultiIndex + """ + nqs = len(qs) + + if idx._is_multi: + idx = cast(MultiIndex, idx) + lev_codes, lev = Index(qs).factorize() + levels = list(idx.levels) + [lev] + codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] + mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) + else: + mi = MultiIndex.from_product([idx, qs]) + return mi + + +def warn_dropping_nuisance_columns_deprecated(cls, how: str, numeric_only) -> None: + if numeric_only is not lib.no_default and not numeric_only: + # numeric_only was specified and falsey but still dropped nuisance columns + warnings.warn( + "Dropping invalid columns in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif numeric_only is lib.no_default: + warnings.warn( + "The default value of numeric_only in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, numeric_only will default to False. " + f"Either specify numeric_only or select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py new file mode 100644 index 00000000..52377f4b --- /dev/null +++ b/pandas/core/groupby/grouper.py @@ -0,0 +1,997 @@ +""" +Provide user facing operators for doing the split part of the +split-apply-combine paradigm. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + final, +) +import warnings + +import numpy as np + +from pandas._typing import ( + ArrayLike, + NDFrameT, + npt, +) +from pandas.errors import InvalidIndexError +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import sanitize_to_nanoseconds +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_list_like, + is_scalar, +) + +import pandas.core.algorithms as algorithms +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.groupby import ops +from pandas.core.groupby.categorical import ( + recode_for_groupby, + recode_from_groupby, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, +) +from pandas.core.series import Series + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + + +class Grouper: + """ + A Grouper allows the user to specify a groupby instruction for an object. + + This specification will select a column via the key parameter, or if the + level and/or axis parameters are given, a level of the index of the target + object. + + If `axis` and/or `level` are passed as keywords to both `Grouper` and + `groupby`, the values passed to `Grouper` take precedence. + + Parameters + ---------- + key : str, defaults to None + Groupby key, which selects the grouping column of the target. + level : name/number, defaults to None + The level for the target index. + freq : str / frequency object, defaults to None + This will groupby the specified frequency if the target selection + (via key or level) is a datetime-like object. For full specification + of available frequencies, please see `here + `_. + axis : str, int, defaults to 0 + Number/name of the axis. + sort : bool, default to False + Whether to sort the resulting labels. + closed : {'left' or 'right'} + Closed end of interval. Only when `freq` parameter is passed. + label : {'left' or 'right'} + Interval boundary to use for labeling. + Only when `freq` parameter is passed. + convention : {'start', 'end', 'e', 's'} + If grouper is PeriodIndex and `freq` parameter is passed. + base : int, default 0 + Only when `freq` parameter is passed. + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + .. deprecated:: 1.1.0 + The new arguments that you should use are 'offset' or 'origin'. + + loffset : str, DateOffset, timedelta object + Only when `freq` parameter is passed. + + .. deprecated:: 1.1.0 + loffset is only working for ``.resample(...)`` and not for + Grouper (:issue:`28302`). + However, loffset is also deprecated for ``.resample(...)`` + See: :class:`DataFrame.resample` + + origin : Timestamp or str, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + If string, must be one of the following: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + + .. versionadded:: 1.1.0 + + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + + .. versionadded:: 1.3.0 + + offset : Timedelta or str, default is None + An offset timedelta added to the origin. + + .. versionadded:: 1.1.0 + + dropna : bool, default True + If True, and if group keys contain NA values, NA values together with + row/column will be dropped. If False, NA values will also be treated as + the key in groups. + + .. versionadded:: 1.2.0 + + Returns + ------- + A specification for a groupby instruction + + Examples + -------- + Syntactic sugar for ``df.groupby('A')`` + + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"], + ... "Speed": [100, 5, 200, 300, 15], + ... } + ... ) + >>> df + Animal Speed + 0 Falcon 100 + 1 Parrot 5 + 2 Falcon 200 + 3 Falcon 300 + 4 Parrot 15 + >>> df.groupby(pd.Grouper(key="Animal")).mean() + Speed + Animal + Falcon 200.0 + Parrot 10.0 + + Specify a resample operation on the column 'Publish date' + + >>> df = pd.DataFrame( + ... { + ... "Publish date": [ + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-09"), + ... pd.Timestamp("2000-01-16") + ... ], + ... "ID": [0, 1, 2, 3], + ... "Price": [10, 20, 30, 40] + ... } + ... ) + >>> df + Publish date ID Price + 0 2000-01-02 0 10 + 1 2000-01-02 1 20 + 2 2000-01-09 2 30 + 3 2000-01-16 3 40 + >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean() + ID Price + Publish date + 2000-01-02 0.5 15.0 + 2000-01-09 2.0 30.0 + 2000-01-16 3.0 40.0 + + If you want to adjust the start of the bins based on a fixed timestamp: + + >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' + >>> rng = pd.date_range(start, end, freq='7min') + >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) + >>> ts + 2000-10-01 23:30:00 0 + 2000-10-01 23:37:00 3 + 2000-10-01 23:44:00 6 + 2000-10-01 23:51:00 9 + 2000-10-01 23:58:00 12 + 2000-10-02 00:05:00 15 + 2000-10-02 00:12:00 18 + 2000-10-02 00:19:00 21 + 2000-10-02 00:26:00 24 + Freq: 7T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min')).sum() + 2000-10-01 23:14:00 0 + 2000-10-01 23:31:00 9 + 2000-10-01 23:48:00 21 + 2000-10-02 00:05:00 54 + 2000-10-02 00:22:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum() + 2000-10-01 23:18:00 0 + 2000-10-01 23:35:00 18 + 2000-10-01 23:52:00 27 + 2000-10-02 00:09:00 39 + 2000-10-02 00:26:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum() + 2000-10-01 23:24:00 3 + 2000-10-01 23:41:00 15 + 2000-10-01 23:58:00 45 + 2000-10-02 00:15:00 45 + Freq: 17T, dtype: int64 + + If you want to adjust the start of the bins with an `offset` Timedelta, the two + following lines are equivalent: + + >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum() + 2000-10-01 23:30:00 9 + 2000-10-01 23:47:00 21 + 2000-10-02 00:04:00 54 + 2000-10-02 00:21:00 24 + Freq: 17T, dtype: int64 + + To replace the use of the deprecated `base` argument, you can now use `offset`, + in this example it is equivalent to have `base=2`: + + >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum() + 2000-10-01 23:16:00 0 + 2000-10-01 23:33:00 9 + 2000-10-01 23:50:00 36 + 2000-10-02 00:07:00 39 + 2000-10-02 00:24:00 24 + Freq: 17T, dtype: int64 + """ + + axis: int + sort: bool + dropna: bool + _gpr_index: Index | None + _grouper: Index | None + + _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort", "dropna") + + def __new__(cls, *args, **kwargs): + if kwargs.get("freq") is not None: + from pandas.core.resample import TimeGrouper + + _check_deprecated_resample_kwargs(kwargs, origin=cls) + cls = TimeGrouper + return super().__new__(cls) + + def __init__( + self, + key=None, + level=None, + freq=None, + axis: int = 0, + sort: bool = False, + dropna: bool = True, + ) -> None: + self.key = key + self.level = level + self.freq = freq + self.axis = axis + self.sort = sort + self.dropna = dropna + + self.grouper = None + self._gpr_index = None + self.obj = None + self.indexer = None + self.binner = None + self._grouper = None + self._indexer = None + + @final + @property + def ax(self) -> Index: + index = self._gpr_index + if index is None: + raise ValueError("_set_grouper must be called before ax is accessed") + return index + + def _get_grouper( + self, obj: NDFrameT, validate: bool = True + ) -> tuple[Any, ops.BaseGrouper, NDFrameT]: + """ + Parameters + ---------- + obj : Series or DataFrame + validate : bool, default True + if True, validate the grouper + + Returns + ------- + a tuple of binner, grouper, obj (possibly sorted) + """ + self._set_grouper(obj) + # error: Value of type variable "NDFrameT" of "get_grouper" cannot be + # "Optional[Any]" + # error: Incompatible types in assignment (expression has type "BaseGrouper", + # variable has type "None") + self.grouper, _, self.obj = get_grouper( # type: ignore[type-var,assignment] + self.obj, + [self.key], + axis=self.axis, + level=self.level, + sort=self.sort, + validate=validate, + dropna=self.dropna, + ) + + # error: Incompatible return value type (got "Tuple[None, None, None]", + # expected "Tuple[Any, BaseGrouper, NDFrameT]") + return self.binner, self.grouper, self.obj # type: ignore[return-value] + + @final + def _set_grouper(self, obj: NDFrame, sort: bool = False) -> None: + """ + given an object and the specifications, setup the internal grouper + for this particular specification + + Parameters + ---------- + obj : Series or DataFrame + sort : bool, default False + whether the resulting grouper should be sorted + """ + assert obj is not None + + if self.key is not None and self.level is not None: + raise ValueError("The Grouper cannot specify both a key and a level!") + + # Keep self.grouper value before overriding + if self._grouper is None: + # TODO: What are we assuming about subsequent calls? + self._grouper = self._gpr_index + self._indexer = self.indexer + + # the key must be a valid info item + if self.key is not None: + key = self.key + # The 'on' is already defined + if getattr(self._gpr_index, "name", None) == key and isinstance( + obj, Series + ): + # Sometimes self._grouper will have been resorted while + # obj has not. In this case there is a mismatch when we + # call self._grouper.take(obj.index) so we need to undo the sorting + # before we call _grouper.take. + assert self._grouper is not None + if self._indexer is not None: + reverse_indexer = self._indexer.argsort() + unsorted_ax = self._grouper.take(reverse_indexer) + ax = unsorted_ax.take(obj.index) + else: + ax = self._grouper.take(obj.index) + else: + if key not in obj._info_axis: + raise KeyError(f"The grouper name {key} is not found") + ax = Index(obj[key], name=key) + + else: + ax = obj._get_axis(self.axis) + if self.level is not None: + level = self.level + + # if a level is given it must be a mi level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + ax = Index(ax._get_level_values(level), name=ax.names[level]) + + else: + if level not in (0, ax.name): + raise ValueError(f"The level {level} is not valid") + + # possibly sort + if (self.sort or sort) and not ax.is_monotonic_increasing: + # use stable sort to support first, last, nth + # TODO: why does putting na_position="first" fix datetimelike cases? + indexer = self.indexer = ax.array.argsort( + kind="mergesort", na_position="first" + ) + ax = ax.take(indexer) + obj = obj.take(indexer, axis=self.axis) + + # error: Incompatible types in assignment (expression has type + # "NDFrameT", variable has type "None") + self.obj = obj # type: ignore[assignment] + self._gpr_index = ax + + @final + @property + def groups(self): + # error: "None" has no attribute "groups" + return self.grouper.groups # type: ignore[attr-defined] + + @final + def __repr__(self) -> str: + attrs_list = ( + f"{attr_name}={repr(getattr(self, attr_name))}" + for attr_name in self._attributes + if getattr(self, attr_name) is not None + ) + attrs = ", ".join(attrs_list) + cls_name = type(self).__name__ + return f"{cls_name}({attrs})" + + +@final +class Grouping: + """ + Holds the grouping information for a single key + + Parameters + ---------- + index : Index + grouper : + obj : DataFrame or Series + name : Label + level : + observed : bool, default False + If we are a Categorical, use the observed values + in_axis : if the Grouping is a column in self.obj and hence among + Groupby.exclusions list + + Returns + ------- + **Attributes**: + * indices : dict of {group -> index_list} + * codes : ndarray, group codes + * group_index : unique groups + * groups : dict of {group -> label_list} + """ + + _codes: npt.NDArray[np.signedinteger] | None = None + _group_index: Index | None = None + _passed_categorical: bool + _all_grouper: Categorical | None + _index: Index + + def __init__( + self, + index: Index, + grouper=None, + obj: NDFrame | None = None, + level=None, + sort: bool = True, + observed: bool = False, + in_axis: bool = False, + dropna: bool = True, + ) -> None: + self.level = level + self._orig_grouper = grouper + self.grouping_vector = _convert_grouper(index, grouper) + self._all_grouper = None + self._index = index + self._sort = sort + self.obj = obj + self._observed = observed + self.in_axis = in_axis + self._dropna = dropna + + self._passed_categorical = False + + # we have a single grouper which may be a myriad of things, + # some of which are dependent on the passing in level + + ilevel = self._ilevel + if ilevel is not None: + mapper = self.grouping_vector + # In extant tests, the new self.grouping_vector matches + # `index.get_level_values(ilevel)` whenever + # mapper is None and isinstance(index, MultiIndex) + ( + self.grouping_vector, # Index + self._codes, + self._group_index, + ) = index._get_grouper_for_level(mapper, level=ilevel, dropna=dropna) + + # a passed Grouper like, directly get the grouper in the same way + # as single grouper groupby, use the group_info to get codes + elif isinstance(self.grouping_vector, Grouper): + # get the new grouper; we already have disambiguated + # what key/level refer to exactly, don't need to + # check again as we have by this point converted these + # to an actual value (rather than a pd.Grouper) + assert self.obj is not None # for mypy + _, newgrouper, newobj = self.grouping_vector._get_grouper( + self.obj, validate=False + ) + self.obj = newobj + + ng = newgrouper._get_grouper() + if isinstance(newgrouper, ops.BinGrouper): + # in this case we have `ng is newgrouper` + self.grouping_vector = ng + else: + # ops.BaseGrouper + # use Index instead of ndarray so we can recover the name + self.grouping_vector = Index(ng, name=newgrouper.result_index.name) + + elif is_categorical_dtype(self.grouping_vector): + # a passed Categorical + self._passed_categorical = True + + self.grouping_vector, self._all_grouper = recode_for_groupby( + self.grouping_vector, sort, observed + ) + + elif not isinstance( + self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray) + ): + # no level passed + if getattr(self.grouping_vector, "ndim", 1) != 1: + t = self.name or str(type(self.grouping_vector)) + raise ValueError(f"Grouper for '{t}' not 1-dimensional") + + self.grouping_vector = index.map(self.grouping_vector) + + if not ( + hasattr(self.grouping_vector, "__len__") + and len(self.grouping_vector) == len(index) + ): + grper = pprint_thing(self.grouping_vector) + errmsg = ( + "Grouper result violates len(labels) == " + f"len(data)\nresult: {grper}" + ) + self.grouping_vector = None # Try for sanity + raise AssertionError(errmsg) + + if isinstance(self.grouping_vector, np.ndarray): + # if we have a date/time-like grouper, make sure that we have + # Timestamps like + self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector) + + def __repr__(self) -> str: + return f"Grouping({self.name})" + + def __iter__(self): + return iter(self.indices) + + @cache_readonly + def name(self) -> Hashable: + ilevel = self._ilevel + if ilevel is not None: + return self._index.names[ilevel] + + if isinstance(self._orig_grouper, (Index, Series)): + return self._orig_grouper.name + + elif isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.result_index.name + + elif isinstance(self.grouping_vector, Index): + return self.grouping_vector.name + + # otherwise we have ndarray or ExtensionArray -> no name + return None + + @cache_readonly + def _ilevel(self) -> int | None: + """ + If necessary, converted index level name to index level position. + """ + level = self.level + if level is None: + return None + if not isinstance(level, int): + index = self._index + if level not in index.names: + raise AssertionError(f"Level {level} not in index") + return index.names.index(level) + return level + + @property + def ngroups(self) -> int: + return len(self.group_index) + + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: + # we have a list of groupers + if isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.indices + + values = Categorical(self.grouping_vector) + return values._reverse_indexer() + + @property + def codes(self) -> npt.NDArray[np.signedinteger]: + if self._codes is not None: + # _codes is set in __init__ for MultiIndex cases + return self._codes + + return self._codes_and_uniques[0] + + @cache_readonly + def group_arraylike(self) -> ArrayLike: + """ + Analogous to result_index, but holding an ArrayLike to ensure + we can retain ExtensionDtypes. + """ + if self._group_index is not None: + # _group_index is set in __init__ for MultiIndex cases + return self._group_index._values + + elif self._all_grouper is not None: + # retain dtype for categories, including unobserved ones + return self.result_index._values + + return self._codes_and_uniques[1] + + @cache_readonly + def result_index(self) -> Index: + # result_index retains dtype for categories, including unobserved ones, + # which group_index does not + if self._all_grouper is not None: + group_idx = self.group_index + assert isinstance(group_idx, CategoricalIndex) + return recode_from_groupby(self._all_grouper, self._sort, group_idx) + return self.group_index + + @cache_readonly + def group_index(self) -> Index: + if self._group_index is not None: + # _group_index is set in __init__ for MultiIndex cases + return self._group_index + + uniques = self._codes_and_uniques[1] + return Index._with_infer(uniques, name=self.name) + + @cache_readonly + def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: + if self._passed_categorical: + # we make a CategoricalIndex out of the cat grouper + # preserving the categories / ordered attributes; + # doesn't (yet - GH#46909) handle dropna=False + cat = self.grouping_vector + categories = cat.categories + + if self._observed: + ucodes = algorithms.unique1d(cat.codes) + ucodes = ucodes[ucodes != -1] + if self._sort or cat.ordered: + ucodes = np.sort(ucodes) + else: + ucodes = np.arange(len(categories)) + + uniques = Categorical.from_codes( + codes=ucodes, categories=categories, ordered=cat.ordered + ) + return cat.codes, uniques + + elif isinstance(self.grouping_vector, ops.BaseGrouper): + # we have a list of groupers + codes = self.grouping_vector.codes_info + # error: Incompatible types in assignment (expression has type "Union + # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical") + uniques = ( + self.grouping_vector.result_index._values # type: ignore[assignment] + ) + else: + # GH35667, replace dropna=False with use_na_sentinel=False + # error: Incompatible types in assignment (expression has type "Union[ + # ndarray[Any, Any], Index]", variable has type "Categorical") + codes, uniques = algorithms.factorize( # type: ignore[assignment] + self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna + ) + return codes, uniques + + @cache_readonly + def groups(self) -> dict[Hashable, np.ndarray]: + return self._index.groupby(Categorical.from_codes(self.codes, self.group_index)) + + +def get_grouper( + obj: NDFrameT, + key=None, + axis: int = 0, + level=None, + sort: bool = True, + observed: bool = False, + mutated: bool = False, + validate: bool = True, + dropna: bool = True, +) -> tuple[ops.BaseGrouper, frozenset[Hashable], NDFrameT]: + """ + Create and return a BaseGrouper, which is an internal + mapping of how to create the grouper indexers. + This may be composed of multiple Grouping objects, indicating + multiple groupers + + Groupers are ultimately index mappings. They can originate as: + index mappings, keys to columns, functions, or Groupers + + Groupers enable local references to axis,level,sort, while + the passed in axis, level, and sort are 'global'. + + This routine tries to figure out what the passing in references + are and then creates a Grouping for each one, combined into + a BaseGrouper. + + If observed & we have a categorical grouper, only show the observed + values. + + If validate, then check for key/level overlaps. + + """ + group_axis = obj._get_axis(axis) + + # validate that the passed single level is compatible with the passed + # axis of the object + if level is not None: + # TODO: These if-block and else-block are almost same. + # MultiIndex instance check is removable, but it seems that there are + # some processes only for non-MultiIndex in else-block, + # eg. `obj.index.name != level`. We have to consider carefully whether + # these are applicable for MultiIndex. Even if these are applicable, + # we need to check if it makes no side effect to subsequent processes + # on the outside of this condition. + # (GH 17621) + if isinstance(group_axis, MultiIndex): + if is_list_like(level) and len(level) == 1: + level = level[0] + + if key is None and is_scalar(level): + # Get the level values from group_axis + key = group_axis.get_level_values(level) + level = None + + else: + # allow level to be a length-one list-like object + # (e.g., level=[0]) + # GH 13901 + if is_list_like(level): + nlevels = len(level) + if nlevels == 1: + level = level[0] + elif nlevels == 0: + raise ValueError("No group keys passed!") + else: + raise ValueError("multiple levels only valid with MultiIndex") + + if isinstance(level, str): + if obj._get_axis(axis).name != level: + raise ValueError( + f"level name {level} is not the name " + f"of the {obj._get_axis_name(axis)}" + ) + elif level > 0 or level < -1: + raise ValueError("level > 0 or level < -1 only valid with MultiIndex") + + # NOTE: `group_axis` and `group_axis.get_level_values(level)` + # are same in this section. + level = None + key = group_axis + + # a passed-in Grouper, directly convert + if isinstance(key, Grouper): + binner, grouper, obj = key._get_grouper(obj, validate=False) + if key.key is None: + return grouper, frozenset(), obj + else: + return grouper, frozenset({key.key}), obj + + # already have a BaseGrouper, just return it + elif isinstance(key, ops.BaseGrouper): + return key, frozenset(), obj + + if not isinstance(key, list): + keys = [key] + match_axis_length = False + else: + keys = key + match_axis_length = len(keys) == len(group_axis) + + # what are we after, exactly? + any_callable = any(callable(g) or isinstance(g, dict) for g in keys) + any_groupers = any(isinstance(g, (Grouper, Grouping)) for g in keys) + any_arraylike = any( + isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys + ) + + # is this an index replacement? + if ( + not any_callable + and not any_arraylike + and not any_groupers + and match_axis_length + and level is None + ): + if isinstance(obj, DataFrame): + all_in_columns_index = all( + g in obj.columns or g in obj.index.names for g in keys + ) + else: + assert isinstance(obj, Series) + all_in_columns_index = all(g in obj.index.names for g in keys) + + if not all_in_columns_index: + keys = [com.asarray_tuplesafe(keys)] + + if isinstance(level, (tuple, list)): + if key is None: + keys = [None] * len(level) + levels = level + else: + levels = [level] * len(keys) + + groupings: list[Grouping] = [] + exclusions: set[Hashable] = set() + + # if the actual grouper should be obj[key] + def is_in_axis(key) -> bool: + + if not _is_label_like(key): + if obj.ndim == 1: + return False + + # items -> .columns for DataFrame, .index for Series + items = obj.axes[-1] + try: + items.get_loc(key) + except (KeyError, TypeError, InvalidIndexError): + # TypeError shows up here if we pass e.g. Int64Index + return False + + return True + + # if the grouper is obj[name] + def is_in_obj(gpr) -> bool: + if not hasattr(gpr, "name"): + return False + try: + return gpr is obj[gpr.name] + except (KeyError, IndexError, InvalidIndexError): + # IndexError reached in e.g. test_skip_group_keys when we pass + # lambda here + # InvalidIndexError raised on key-types inappropriate for index, + # e.g. DatetimeIndex.get_loc(tuple()) + return False + + for gpr, level in zip(keys, levels): + + if is_in_obj(gpr): # df.groupby(df['name']) + in_axis = True + exclusions.add(gpr.name) + + elif is_in_axis(gpr): # df.groupby('name') + if gpr in obj: + if validate: + obj._check_label_or_level_ambiguity(gpr, axis=axis) + in_axis, name, gpr = True, gpr, obj[gpr] + if gpr.ndim != 1: + # non-unique columns; raise here to get the name in the + # exception message + raise ValueError(f"Grouper for '{name}' not 1-dimensional") + exclusions.add(name) + elif obj._is_level_reference(gpr, axis=axis): + in_axis, level, gpr = False, gpr, None + else: + raise KeyError(gpr) + elif isinstance(gpr, Grouper) and gpr.key is not None: + # Add key to exclusions + exclusions.add(gpr.key) + in_axis = False + else: + in_axis = False + + # create the Grouping + # allow us to passing the actual Grouping as the gpr + ping = ( + Grouping( + group_axis, + gpr, + obj=obj, + level=level, + sort=sort, + observed=observed, + in_axis=in_axis, + dropna=dropna, + ) + if not isinstance(gpr, Grouping) + else gpr + ) + + groupings.append(ping) + + if len(groupings) == 0 and len(obj): + raise ValueError("No group keys passed!") + elif len(groupings) == 0: + groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) + + # create the internals grouper + grouper = ops.BaseGrouper( + group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + ) + return grouper, frozenset(exclusions), obj + + +def _is_label_like(val) -> bool: + return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val)) + + +def _convert_grouper(axis: Index, grouper): + if isinstance(grouper, dict): + return grouper.get + elif isinstance(grouper, Series): + if grouper.index.equals(axis): + return grouper._values + else: + return grouper.reindex(axis)._values + elif isinstance(grouper, MultiIndex): + return grouper._values + elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)): + if len(grouper) != len(axis): + raise ValueError("Grouper and axis must be same length") + + if isinstance(grouper, (list, tuple)): + grouper = com.asarray_tuplesafe(grouper) + return grouper + else: + return grouper + + +def _check_deprecated_resample_kwargs(kwargs, origin): + """ + Check for use of deprecated parameters in ``resample`` and related functions. + + Raises the appropriate warnings if these parameters are detected. + Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629). + + Parameters + ---------- + kwargs : dict + Dictionary of keyword arguments to check for deprecated parameters. + origin : object + From where this function is being called; either Grouper or TimeGrouper. Used + to determine an approximate stacklevel. + """ + # Deprecation warning of `base` and `loffset` since v1.1.0: + # we are raising the warning here to be able to set the `stacklevel` + # properly since we need to raise the `base` and `loffset` deprecation + # warning from three different cases: + # core/generic.py::NDFrame.resample + # core/groupby/groupby.py::GroupBy.resample + # core/groupby/grouper.py::Grouper + # raising these warnings from TimeGrouper directly would fail the test: + # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base + + if kwargs.get("base", None) is not None: + warnings.warn( + "'base' in .resample() and in Grouper() is deprecated.\n" + "The new arguments that you should use are 'offset' or 'origin'.\n" + '\n>>> df.resample(freq="3s", base=2)\n' + "\nbecomes:\n" + '\n>>> df.resample(freq="3s", offset="2s")\n', + FutureWarning, + stacklevel=find_stack_level(), + ) + if kwargs.get("loffset", None) is not None: + warnings.warn( + "'loffset' in .resample() and in Grouper() is deprecated.\n" + '\n>>> df.resample(freq="3s", loffset="8H")\n' + "\nbecomes:\n" + "\n>>> from pandas.tseries.frequencies import to_offset" + '\n>>> df = df.resample(freq="3s").mean()' + '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n', + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py new file mode 100644 index 00000000..750097b4 --- /dev/null +++ b/pandas/core/groupby/indexing.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterable, + Literal, + cast, +) + +import numpy as np + +from pandas._typing import PositionalIndexer +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.groupby import groupby + + +class GroupByIndexingMixin: + """ + Mixin for adding ._positional_selector to GroupBy. + """ + + @cache_readonly + def _positional_selector(self) -> GroupByPositionalSelector: + """ + Return positional selection for each group. + + ``groupby._positional_selector[i:j]`` is similar to + ``groupby.apply(lambda x: x.iloc[i:j])`` + but much faster and preserves the original index and order. + + ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head` + and :meth:`~GroupBy.tail`. For example: + + - ``head(5)`` + - ``_positional_selector[5:-5]`` + - ``tail(5)`` + + together return all the rows. + + Allowed inputs for the index are: + + - An integer valued iterable, e.g. ``range(2, 4)``. + - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``. + + The output format is the same as :meth:`~GroupBy.head` and + :meth:`~GroupBy.tail`, namely + a subset of the ``DataFrame`` or ``Series`` with the index and order preserved. + + Returns + ------- + Series + The filtered subset of the original Series. + DataFrame + The filtered subset of the original DataFrame. + + See Also + -------- + DataFrame.iloc : Purely integer-location based indexing for selection by + position. + GroupBy.head : Return first n rows of each group. + GroupBy.tail : Return last n rows of each group. + GroupBy.nth : Take the nth row from each group if n is an int, or a + subset of rows, if n is a list of ints. + + Notes + ----- + - The slice step cannot be negative. + - If the index specification results in overlaps, the item is not duplicated. + - If the index specification changes the order of items, then + they are returned in their original order. + By contrast, ``DataFrame.iloc`` can change the row order. + - ``groupby()`` parameters such as as_index and dropna are ignored. + + The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth` + with ``as_index=False`` are: + + - Input to ``_positional_selector`` can include + one or more slices whereas ``nth`` + just handles an integer or a list of integers. + - ``_positional_selector`` can accept a slice relative to the + last row of each group. + - ``_positional_selector`` does not have an equivalent to the + ``nth()`` ``dropna`` parameter. + + Examples + -------- + >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], + ... columns=["A", "B"]) + >>> df.groupby("A")._positional_selector[1:2] + A B + 1 a 2 + 4 b 5 + + >>> df.groupby("A")._positional_selector[1, -1] + A B + 1 a 2 + 2 a 3 + 4 b 5 + """ + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return GroupByPositionalSelector(groupby_self) + + def _make_mask_from_positional_indexer( + self, + arg: PositionalIndexer | tuple, + ) -> np.ndarray: + if is_list_like(arg): + if all(is_integer(i) for i in cast(Iterable, arg)): + mask = self._make_mask_from_list(cast(Iterable[int], arg)) + else: + mask = self._make_mask_from_tuple(cast(tuple, arg)) + + elif isinstance(arg, slice): + mask = self._make_mask_from_slice(arg) + elif is_integer(arg): + mask = self._make_mask_from_int(cast(int, arg)) + else: + raise TypeError( + f"Invalid index {type(arg)}. " + "Must be integer, list-like, slice or a tuple of " + "integers and slices" + ) + + if isinstance(mask, bool): + if mask: + mask = self._ascending_count >= 0 + else: + mask = self._ascending_count < 0 + + return cast(np.ndarray, mask) + + def _make_mask_from_int(self, arg: int) -> np.ndarray: + if arg >= 0: + return self._ascending_count == arg + else: + return self._descending_count == (-arg - 1) + + def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray: + positive = [arg for arg in args if arg >= 0] + negative = [-arg - 1 for arg in args if arg < 0] + + mask: bool | np.ndarray = False + + if positive: + mask |= np.isin(self._ascending_count, positive) + + if negative: + mask |= np.isin(self._descending_count, negative) + + return mask + + def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray: + mask: bool | np.ndarray = False + + for arg in args: + if is_integer(arg): + mask |= self._make_mask_from_int(cast(int, arg)) + elif isinstance(arg, slice): + mask |= self._make_mask_from_slice(arg) + else: + raise ValueError( + f"Invalid argument {type(arg)}. Should be int or slice." + ) + + return mask + + def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray: + start = arg.start + stop = arg.stop + step = arg.step + + if step is not None and step < 0: + raise ValueError(f"Invalid step {step}. Must be non-negative") + + mask: bool | np.ndarray = True + + if step is None: + step = 1 + + if start is None: + if step > 1: + mask &= self._ascending_count % step == 0 + + elif start >= 0: + mask &= self._ascending_count >= start + + if step > 1: + mask &= (self._ascending_count - start) % step == 0 + + else: + mask &= self._descending_count < -start + + offset_array = self._descending_count + start + 1 + limit_array = ( + self._ascending_count + self._descending_count + (start + 1) + ) < 0 + offset_array = np.where(limit_array, self._ascending_count, offset_array) + + mask &= offset_array % step == 0 + + if stop is not None: + if stop >= 0: + mask &= self._ascending_count < stop + else: + mask &= self._descending_count >= -stop + + return mask + + @cache_readonly + def _ascending_count(self) -> np.ndarray: + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return groupby_self._cumcount_array() + + @cache_readonly + def _descending_count(self) -> np.ndarray: + if TYPE_CHECKING: + groupby_self = cast(groupby.GroupBy, self) + else: + groupby_self = self + + return groupby_self._cumcount_array(ascending=False) + + +@doc(GroupByIndexingMixin._positional_selector) +class GroupByPositionalSelector: + def __init__(self, groupby_object: groupby.GroupBy) -> None: + self.groupby_object = groupby_object + + def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series: + """ + Select by positional index per group. + + Implements GroupBy._positional_selector + + Parameters + ---------- + arg : PositionalIndexer | tuple + Allowed values are: + - int + - int valued iterable such as list or range + - slice with step either None or positive + - tuple of integers and slices + + Returns + ------- + Series + The filtered subset of the original groupby Series. + DataFrame + The filtered subset of the original groupby DataFrame. + + See Also + -------- + DataFrame.iloc : Integer-location based indexing for selection by position. + GroupBy.head : Return first n rows of each group. + GroupBy.tail : Return last n rows of each group. + GroupBy._positional_selector : Return positional selection for each group. + GroupBy.nth : Take the nth row from each group if n is an int, or a + subset of rows, if n is a list of ints. + """ + self.groupby_object._reset_group_selection() + mask = self.groupby_object._make_mask_from_positional_indexer(arg) + return self.groupby_object._mask_selected_obj(mask) + + +class GroupByNthSelector: + """ + Dynamically substituted for GroupBy.nth to enable both call and index + """ + + def __init__(self, groupby_object: groupby.GroupBy) -> None: + self.groupby_object = groupby_object + + def __call__( + self, + n: PositionalIndexer | tuple, + dropna: Literal["any", "all", None] = None, + ) -> DataFrame | Series: + return self.groupby_object._nth(n, dropna) + + def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series: + return self.groupby_object._nth(n) diff --git a/pandas/core/groupby/numba_.py b/pandas/core/groupby/numba_.py new file mode 100644 index 00000000..acfc690a --- /dev/null +++ b/pandas/core/groupby/numba_.py @@ -0,0 +1,181 @@ +"""Common utilities for Numba operations with groupby ops""" +from __future__ import annotations + +import functools +import inspect +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import ( + NumbaUtilError, + jit_user_function, +) + + +def validate_udf(func: Callable) -> None: + """ + Validate user defined function for ops when using Numba with groupby ops. + + The first signature arguments should include: + + def f(values, index, ...): + ... + + Parameters + ---------- + func : function, default False + user defined function + + Returns + ------- + None + + Raises + ------ + NumbaUtilError + """ + if not callable(func): + raise NotImplementedError( + "Numba engine can only be used with a single function." + ) + udf_signature = list(inspect.signature(func).parameters.keys()) + expected_args = ["values", "index"] + min_number_args = len(expected_args) + if ( + len(udf_signature) < min_number_args + or udf_signature[:min_number_args] != expected_args + ): + raise NumbaUtilError( + f"The first {min_number_args} arguments to {func.__name__} must be " + f"{expected_args}" + ) + + +@functools.lru_cache(maxsize=None) +def generate_numba_agg_func( + func: Callable[..., Scalar], + nopython: bool, + nogil: bool, + parallel: bool, +) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: + """ + Generate a numba jitted agg function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a groupby agg function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the groupby evaluation loop. + + Parameters + ---------- + func : function + function to be applied to each group and will be JITed + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def group_agg( + values: np.ndarray, + index: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + num_columns: int, + *args: Any, + ) -> np.ndarray: + + assert len(begin) == len(end) + num_groups = len(begin) + + result = np.empty((num_groups, num_columns)) + for i in numba.prange(num_groups): + group_index = index[begin[i] : end[i]] + for j in numba.prange(num_columns): + group = values[begin[i] : end[i], j] + result[i, j] = numba_func(group, group_index, *args) + return result + + return group_agg + + +@functools.lru_cache(maxsize=None) +def generate_numba_transform_func( + func: Callable[..., np.ndarray], + nopython: bool, + nogil: bool, + parallel: bool, +) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: + """ + Generate a numba jitted transform function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a groupby transform function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the groupby evaluation loop. + + Parameters + ---------- + func : function + function to be applied to each window and will be JITed + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def group_transform( + values: np.ndarray, + index: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + num_columns: int, + *args: Any, + ) -> np.ndarray: + + assert len(begin) == len(end) + num_groups = len(begin) + + result = np.empty((len(values), num_columns)) + for i in numba.prange(num_groups): + group_index = index[begin[i] : end[i]] + for j in numba.prange(num_columns): + group = values[begin[i] : end[i], j] + result[begin[i] : end[i], j] = numba_func(group, group_index, *args) + return result + + return group_transform diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py new file mode 100644 index 00000000..00de92d1 --- /dev/null +++ b/pandas/core/groupby/ops.py @@ -0,0 +1,1371 @@ +""" +Provide classes to perform the groupby aggregate operations. + +These are not exposed to the user and provide implementations of the grouping +operations, primarily in cython. These classes (BaseGrouper and BinGrouper) +are contained *in* the SeriesGroupBy and DataFrameGroupBy objects. +""" +from __future__ import annotations + +import collections +import functools +from typing import ( + TYPE_CHECKING, + Callable, + Generic, + Hashable, + Iterator, + NoReturn, + Sequence, + final, +) + +import numpy as np + +from pandas._libs import ( + NaT, + lib, +) +import pandas._libs.groupby as libgroupby +import pandas._libs.reduction as libreduction +from pandas._typing import ( + ArrayLike, + DtypeObj, + NDFrameT, + Shape, + npt, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import ( + maybe_cast_pointwise_result, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_platform_int, + ensure_uint64, + is_1d_only_ea_dtype, + is_bool_dtype, + is_complex_dtype, + is_datetime64_any_dtype, + is_float_dtype, + is_integer_dtype, + is_numeric_dtype, + is_sparse, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.missing import ( + isna, + maybe_fill, +) + +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.floating import FloatingDtype +from pandas.core.arrays.integer import IntegerDtype +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) +from pandas.core.arrays.string_ import StringDtype +from pandas.core.frame import DataFrame +from pandas.core.groupby import grouper +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, + ensure_index, +) +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_flattened_list, + get_group_index, + get_group_index_sorter, + get_indexer_dict, +) + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + + +class WrappedCythonOp: + """ + Dispatch logic for functions defined in _libs.groupby + + Parameters + ---------- + kind: str + Whether the operation is an aggregate or transform. + how: str + Operation name, e.g. "mean". + has_dropped_na: bool + True precisely when dropna=True and the grouper contains a null value. + """ + + # Functions for which we do _not_ attempt to cast the cython result + # back to the original dtype. + cast_blocklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"]) + + def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None: + self.kind = kind + self.how = how + self.has_dropped_na = has_dropped_na + + _CYTHON_FUNCTIONS = { + "aggregate": { + "sum": "group_sum", + "prod": "group_prod", + "min": "group_min", + "max": "group_max", + "mean": "group_mean", + "median": "group_median_float64", + "var": "group_var", + "first": "group_nth", + "last": "group_last", + "ohlc": "group_ohlc", + }, + "transform": { + "cumprod": "group_cumprod_float64", + "cumsum": "group_cumsum", + "cummin": "group_cummin", + "cummax": "group_cummax", + "rank": "group_rank", + }, + } + + # "group_any" and "group_all" are also support masks, but don't go + # through WrappedCythonOp + _MASKED_CYTHON_FUNCTIONS = { + "cummin", + "cummax", + "min", + "max", + "last", + "first", + "rank", + "sum", + "ohlc", + "cumsum", + "prod", + } + + _cython_arity = {"ohlc": 4} # OHLC + + # Note: we make this a classmethod and pass kind+how so that caching + # works at the class level and not the instance level + @classmethod + @functools.lru_cache(maxsize=None) + def _get_cython_function( + cls, kind: str, how: str, dtype: np.dtype, is_numeric: bool + ): + + dtype_str = dtype.name + ftype = cls._CYTHON_FUNCTIONS[kind][how] + + # see if there is a fused-type version of function + # only valid for numeric + f = getattr(libgroupby, ftype) + if is_numeric: + return f + elif dtype == np.dtype(object): + if how in ["median", "cumprod"]: + # no fused types -> no __signatures__ + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + elif "object" not in f.__signatures__: + # raise NotImplementedError here rather than TypeError later + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + return f + else: + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas/", + dtype, + ) + + def _get_cython_vals(self, values: np.ndarray) -> np.ndarray: + """ + Cast numeric dtypes to float64 for functions that only support that. + + Parameters + ---------- + values : np.ndarray + + Returns + ------- + values : np.ndarray + """ + how = self.how + + if how in ["median", "cumprod"]: + # these two only have float64 implementations + # We should only get here with is_numeric, as non-numeric cases + # should raise in _get_cython_function + values = ensure_float64(values) + + elif values.dtype.kind in ["i", "u"]: + if how in ["var", "mean"] or ( + self.kind == "transform" and self.has_dropped_na + ): + # result may still include NaN, so we have to cast + values = ensure_float64(values) + + elif how in ["sum", "ohlc", "prod", "cumsum"]: + # Avoid overflow during group op + if values.dtype.kind == "i": + values = ensure_int64(values) + else: + values = ensure_uint64(values) + + return values + + # TODO: general case implementation overridable by EAs. + def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): + """ + Check if we can do this operation with our cython functions. + + Raises + ------ + NotImplementedError + This is either not a valid function for this dtype, or + valid but not implemented in cython. + """ + how = self.how + + if is_numeric: + # never an invalid op for those dtypes, so return early as fastpath + return + + if isinstance(dtype, CategoricalDtype): + # NotImplementedError for methods that can fall back to a + # non-cython implementation. + if how in ["sum", "prod", "cumsum", "cumprod"]: + raise TypeError(f"{dtype} type does not support {how} operations") + elif how not in ["rank"]: + # only "rank" is implemented in cython + raise NotImplementedError(f"{dtype} dtype not supported") + elif not dtype.ordered: + # TODO: TypeError? + raise NotImplementedError(f"{dtype} dtype not supported") + + elif is_sparse(dtype): + # categoricals are only 1d, so we + # are not setup for dim transforming + raise NotImplementedError(f"{dtype} dtype not supported") + elif is_datetime64_any_dtype(dtype): + # TODO: same for period_dtype? no for these methods with Period + # we raise NotImplemented if this is an invalid operation + # entirely, e.g. adding datetimes + if how in ["sum", "prod", "cumsum", "cumprod"]: + raise TypeError(f"datetime64 type does not support {how} operations") + elif is_timedelta64_dtype(dtype): + if how in ["prod", "cumprod"]: + raise TypeError(f"timedelta64 type does not support {how} operations") + + def _get_output_shape(self, ngroups: int, values: np.ndarray) -> Shape: + how = self.how + kind = self.kind + + arity = self._cython_arity.get(how, 1) + + out_shape: Shape + if how == "ohlc": + out_shape = (ngroups, 4) + elif arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + elif kind == "transform": + out_shape = values.shape + else: + out_shape = (ngroups,) + values.shape[1:] + return out_shape + + def _get_out_dtype(self, dtype: np.dtype) -> np.dtype: + how = self.how + + if how == "rank": + out_dtype = "float64" + else: + if is_numeric_dtype(dtype): + out_dtype = f"{dtype.kind}{dtype.itemsize}" + else: + out_dtype = "object" + return np.dtype(out_dtype) + + def _get_result_dtype(self, dtype: np.dtype) -> np.dtype: + """ + Get the desired dtype of a result based on the + input dtype and how it was computed. + + Parameters + ---------- + dtype : np.dtype + + Returns + ------- + np.dtype + The desired dtype of the result. + """ + how = self.how + + if how in ["sum", "cumsum", "sum", "prod"]: + if dtype == np.dtype(bool): + return np.dtype(np.int64) + elif how in ["mean", "median", "var"]: + if is_float_dtype(dtype) or is_complex_dtype(dtype): + return dtype + elif is_numeric_dtype(dtype): + return np.dtype(np.float64) + return dtype + + def uses_mask(self) -> bool: + return self.how in self._MASKED_CYTHON_FUNCTIONS + + @final + def _ea_wrap_cython_operation( + self, + values: ExtensionArray, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + **kwargs, + ) -> ArrayLike: + """ + If we have an ExtensionArray, unwrap, call _cython_operation, and + re-wrap if appropriate. + """ + if isinstance(values, BaseMaskedArray) and self.uses_mask(): + return self._masked_ea_wrap_cython_operation( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + **kwargs, + ) + + elif isinstance(values, Categorical) and self.uses_mask(): + assert self.how == "rank" # the only one implemented ATM + assert values.ordered # checked earlier + mask = values.isna() + npvalues = values._ndarray + + res_values = self._cython_op_ndim_compat( + npvalues, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + **kwargs, + ) + + # If we ever have more than just "rank" here, we'll need to do + # `if self.how in self.cast_blocklist` like we do for other dtypes. + return res_values + + npvalues = self._ea_to_cython_values(values) + + res_values = self._cython_op_ndim_compat( + npvalues, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=None, + **kwargs, + ) + + if self.how in self.cast_blocklist: + # i.e. how in ["rank"], since other cast_blocklist methods dont go + # through cython_operation + return res_values + + return self._reconstruct_ea_result(values, res_values) + + # TODO: general case implementation overridable by EAs. + def _ea_to_cython_values(self, values: ExtensionArray) -> np.ndarray: + # GH#43682 + if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)): + # All of the functions implemented here are ordinal, so we can + # operate on the tz-naive equivalents + npvalues = values._ndarray.view("M8[ns]") + elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)): + # IntegerArray or BooleanArray + npvalues = values.to_numpy("float64", na_value=np.nan) + elif isinstance(values.dtype, FloatingDtype): + # FloatingArray + npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan) + elif isinstance(values.dtype, StringDtype): + # StringArray + npvalues = values.to_numpy(object, na_value=np.nan) + else: + raise NotImplementedError( + f"function is not implemented for this dtype: {values.dtype}" + ) + return npvalues + + # TODO: general case implementation overridable by EAs. + def _reconstruct_ea_result( + self, values: ExtensionArray, res_values: np.ndarray + ) -> ExtensionArray: + """ + Construct an ExtensionArray result from an ndarray result. + """ + dtype: BaseMaskedDtype | StringDtype + + if isinstance(values.dtype, StringDtype): + dtype = values.dtype + string_array_cls = dtype.construct_array_type() + return string_array_cls._from_sequence(res_values, dtype=dtype) + + elif isinstance(values.dtype, BaseMaskedDtype): + new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) + dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) + masked_array_cls = dtype.construct_array_type() + return masked_array_cls._from_sequence(res_values, dtype=dtype) + + elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)): + # In to_cython_values we took a view as M8[ns] + assert res_values.dtype == "M8[ns]" + res_values = res_values.view(values._ndarray.dtype) + return values._from_backing_data(res_values) + + raise NotImplementedError + + @final + def _masked_ea_wrap_cython_operation( + self, + values: BaseMaskedArray, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + **kwargs, + ) -> BaseMaskedArray: + """ + Equivalent of `_ea_wrap_cython_operation`, but optimized for masked EA's + and cython algorithms which accept a mask. + """ + orig_values = values + + # libgroupby functions are responsible for NOT altering mask + mask = values._mask + if self.kind != "aggregate": + result_mask = mask.copy() + else: + result_mask = np.zeros(ngroups, dtype=bool) + + arr = values._data + + res_values = self._cython_op_ndim_compat( + arr, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + + if self.how == "ohlc": + result_mask = np.tile(result_mask, (4, 1)).T + + # res_values should already have the correct dtype, we just need to + # wrap in a MaskedArray + return orig_values._maybe_mask_result(res_values, result_mask) + + @final + def _cython_op_ndim_compat( + self, + values: np.ndarray, + *, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + mask: npt.NDArray[np.bool_] | None = None, + result_mask: npt.NDArray[np.bool_] | None = None, + **kwargs, + ) -> np.ndarray: + if values.ndim == 1: + # expand to 2d, dispatch, then squeeze if appropriate + values2d = values[None, :] + if mask is not None: + mask = mask[None, :] + if result_mask is not None: + result_mask = result_mask[None, :] + res = self._call_cython_op( + values2d, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + if res.shape[0] == 1: + return res[0] + + # otherwise we have OHLC + return res.T + + return self._call_cython_op( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=mask, + result_mask=result_mask, + **kwargs, + ) + + @final + def _call_cython_op( + self, + values: np.ndarray, # np.ndarray[ndim=2] + *, + min_count: int, + ngroups: int, + comp_ids: np.ndarray, + mask: npt.NDArray[np.bool_] | None, + result_mask: npt.NDArray[np.bool_] | None, + **kwargs, + ) -> np.ndarray: # np.ndarray[ndim=2] + orig_values = values + + dtype = values.dtype + is_numeric = is_numeric_dtype(dtype) + + is_datetimelike = needs_i8_conversion(dtype) + + if is_datetimelike: + values = values.view("int64") + is_numeric = True + elif is_bool_dtype(dtype): + values = values.view("uint8") + if values.dtype == "float16": + values = values.astype(np.float32) + + values = values.T + if mask is not None: + mask = mask.T + if result_mask is not None: + result_mask = result_mask.T + + out_shape = self._get_output_shape(ngroups, values) + func = self._get_cython_function(self.kind, self.how, values.dtype, is_numeric) + values = self._get_cython_vals(values) + out_dtype = self._get_out_dtype(values.dtype) + + result = maybe_fill(np.empty(out_shape, dtype=out_dtype)) + if self.kind == "aggregate": + counts = np.zeros(ngroups, dtype=np.int64) + if self.how in ["min", "max", "mean", "last", "first"]: + func( + out=result, + counts=counts, + values=values, + labels=comp_ids, + min_count=min_count, + mask=mask, + result_mask=result_mask, + is_datetimelike=is_datetimelike, + ) + elif self.how in ["sum"]: + # We support datetimelike + func( + out=result, + counts=counts, + values=values, + labels=comp_ids, + mask=mask, + result_mask=result_mask, + min_count=min_count, + is_datetimelike=is_datetimelike, + ) + elif self.how in ["ohlc", "prod"]: + func( + result, + counts, + values, + comp_ids, + min_count=min_count, + mask=mask, + result_mask=result_mask, + ) + else: + func(result, counts, values, comp_ids, min_count, **kwargs) + else: + # TODO: min_count + if self.uses_mask(): + if self.how != "rank": + # TODO: should rank take result_mask? + kwargs["result_mask"] = result_mask + func( + out=result, + values=values, + labels=comp_ids, + ngroups=ngroups, + is_datetimelike=is_datetimelike, + mask=mask, + **kwargs, + ) + else: + func( + out=result, + values=values, + labels=comp_ids, + ngroups=ngroups, + is_datetimelike=is_datetimelike, + **kwargs, + ) + + if self.kind == "aggregate": + # i.e. counts is defined. Locations where count ArrayLike: + """ + Call our cython function, with appropriate pre- and post- processing. + """ + if values.ndim > 2: + raise NotImplementedError("number of dimensions is currently limited to 2") + elif values.ndim == 2: + assert axis == 1, axis + elif not is_1d_only_ea_dtype(values.dtype): + # Note: it is *not* the case that axis is always 0 for 1-dim values, + # as we can have 1D ExtensionArrays that we need to treat as 2D + assert axis == 0 + + dtype = values.dtype + is_numeric = is_numeric_dtype(dtype) + + # can we do this operation with our cython functions + # if not raise NotImplementedError + self._disallow_invalid_ops(dtype, is_numeric) + + if not isinstance(values, np.ndarray): + # i.e. ExtensionArray + return self._ea_wrap_cython_operation( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + **kwargs, + ) + + return self._cython_op_ndim_compat( + values, + min_count=min_count, + ngroups=ngroups, + comp_ids=comp_ids, + mask=None, + **kwargs, + ) + + +class BaseGrouper: + """ + This is an internal Grouper class, which actually holds + the generated groups + + Parameters + ---------- + axis : Index + groupings : Sequence[Grouping] + all the grouping instances to handle in this grouper + for example for grouper list to groupby, need to pass the list + sort : bool, default True + whether this grouper will give sorted result or not + group_keys : bool, default True + mutated : bool, default False + indexer : np.ndarray[np.intp], optional + the indexer created by Grouper + some groupers (TimeGrouper) will sort its axis and its + group_info is also sorted, so need the indexer to reorder + + """ + + axis: Index + + def __init__( + self, + axis: Index, + groupings: Sequence[grouper.Grouping], + sort: bool = True, + group_keys: bool = True, + mutated: bool = False, + indexer: npt.NDArray[np.intp] | None = None, + dropna: bool = True, + ) -> None: + assert isinstance(axis, Index), axis + + self.axis = axis + self._groupings: list[grouper.Grouping] = list(groupings) + self._sort = sort + self.group_keys = group_keys + self.mutated = mutated + self.indexer = indexer + self.dropna = dropna + + @property + def groupings(self) -> list[grouper.Grouping]: + return self._groupings + + @property + def shape(self) -> Shape: + return tuple(ping.ngroups for ping in self.groupings) + + def __iter__(self) -> Iterator[Hashable]: + return iter(self.indices) + + @property + def nkeys(self) -> int: + return len(self.groupings) + + def get_iterator( + self, data: NDFrameT, axis: int = 0 + ) -> Iterator[tuple[Hashable, NDFrameT]]: + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + splitter = self._get_splitter(data, axis=axis) + keys = self.group_keys_seq + yield from zip(keys, splitter) + + @final + def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter: + """ + Returns + ------- + Generator yielding subsetted objects + """ + ids, _, ngroups = self.group_info + return get_splitter(data, ids, ngroups, axis=axis) + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self.groupings[0].grouping_vector + + @final + @cache_readonly + def group_keys_seq(self): + if len(self.groupings) == 1: + return self.levels[0] + else: + ids, _, ngroups = self.group_info + + # provide "flattened" iterator for multi-group setting + return get_flattened_list(ids, ngroups, self.levels, self.codes) + + @final + def apply( + self, f: Callable, data: DataFrame | Series, axis: int = 0 + ) -> tuple[list, bool]: + mutated = self.mutated + splitter = self._get_splitter(data, axis=axis) + group_keys = self.group_keys_seq + result_values = [] + + # This calls DataSplitter.__iter__ + zipped = zip(group_keys, splitter) + + for key, group in zipped: + object.__setattr__(group, "name", key) + + # group might be modified + group_axes = group.axes + res = f(group) + if not mutated and not _is_indexed_like(res, group_axes, axis): + mutated = True + result_values.append(res) + # getattr pattern for __name__ is needed for functools.partial objects + if len(group_keys) == 0 and getattr(f, "__name__", None) in [ + "mad", + "skew", + "sum", + "prod", + ]: + # If group_keys is empty, then no function calls have been made, + # so we will not have raised even if this is an invalid dtype. + # So do one dummy call here to raise appropriate TypeError. + f(data.iloc[:0]) + + return result_values, mutated + + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: + """dict {group name -> group indices}""" + if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): + # This shows unused categories in indices GH#38642 + return self.groupings[0].indices + codes_list = [ping.codes for ping in self.groupings] + keys = [ping.group_index for ping in self.groupings] + return get_indexer_dict(codes_list, keys) + + @final + def result_ilocs(self) -> npt.NDArray[np.intp]: + """ + Get the original integer locations of result_index in the input. + """ + # Original indices are where group_index would go via sorting. + # But when dropna is true, we need to remove null values while accounting for + # any gaps that then occur because of them. + group_index = get_group_index( + self.codes, self.shape, sort=self._sort, xnull=True + ) + group_index, _ = compress_group_index(group_index, sort=self._sort) + + if self.has_dropped_na: + mask = np.where(group_index >= 0) + # Count how many gaps are caused by previous null values for each position + null_gaps = np.cumsum(group_index == -1)[mask] + group_index = group_index[mask] + + result = get_group_index_sorter(group_index, self.ngroups) + + if self.has_dropped_na: + # Shift by the number of prior null gaps + result += np.take(null_gaps, result) + + return result + + @final + @property + def codes(self) -> list[npt.NDArray[np.signedinteger]]: + return [ping.codes for ping in self.groupings] + + @property + def levels(self) -> list[Index]: + return [ping.group_index for ping in self.groupings] + + @property + def names(self) -> list[Hashable]: + return [ping.name for ping in self.groupings] + + @final + def size(self) -> Series: + """ + Compute group sizes. + """ + ids, _, ngroups = self.group_info + out: np.ndarray | list + if ngroups: + out = np.bincount(ids[ids != -1], minlength=ngroups) + else: + out = [] + return Series(out, index=self.result_index, dtype="int64") + + @cache_readonly + def groups(self) -> dict[Hashable, np.ndarray]: + """dict {group name -> group labels}""" + if len(self.groupings) == 1: + return self.groupings[0].groups + else: + to_groupby = zip(*(ping.grouping_vector for ping in self.groupings)) + index = Index(to_groupby) + return self.axis.groupby(index) + + @final + @cache_readonly + def is_monotonic(self) -> bool: + # return if my group orderings are monotonic + return Index(self.group_info[0]).is_monotonic_increasing + + @final + @cache_readonly + def has_dropped_na(self) -> bool: + """ + Whether grouper has null value(s) that are dropped. + """ + return bool((self.group_info[0] < 0).any()) + + @cache_readonly + def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + comp_ids, obs_group_ids = self._get_compressed_codes() + + ngroups = len(obs_group_ids) + comp_ids = ensure_platform_int(comp_ids) + + return comp_ids, obs_group_ids, ngroups + + @final + @cache_readonly + def codes_info(self) -> npt.NDArray[np.intp]: + # return the codes of items in original grouped axis + ids, _, _ = self.group_info + if self.indexer is not None: + sorter = np.lexsort((ids, self.indexer)) + ids = ids[sorter] + ids = ensure_platform_int(ids) + # TODO: if numpy annotates np.lexsort, this ensure_platform_int + # may become unnecessary + return ids + + @final + def _get_compressed_codes( + self, + ) -> tuple[npt.NDArray[np.signedinteger], npt.NDArray[np.intp]]: + # The first returned ndarray may have any signed integer dtype + if len(self.groupings) > 1: + group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True) + return compress_group_index(group_index, sort=self._sort) + # FIXME: compress_group_index's second return value is int64, not intp + + ping = self.groupings[0] + return ping.codes, np.arange(len(ping.group_index), dtype=np.intp) + + @final + @cache_readonly + def ngroups(self) -> int: + return len(self.result_index) + + @property + def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]: + codes = self.codes + ids, obs_ids, _ = self.group_info + return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True) + + @cache_readonly + def result_index(self) -> Index: + if len(self.groupings) == 1: + return self.groupings[0].result_index.rename(self.names[0]) + + codes = self.reconstructed_codes + levels = [ping.result_index for ping in self.groupings] + return MultiIndex( + levels=levels, codes=codes, verify_integrity=False, names=self.names + ) + + @final + def get_group_levels(self) -> list[ArrayLike]: + # Note: only called from _insert_inaxis_grouper_inplace, which + # is only called for BaseGrouper, never for BinGrouper + if len(self.groupings) == 1: + return [self.groupings[0].group_arraylike] + + name_list = [] + for ping, codes in zip(self.groupings, self.reconstructed_codes): + codes = ensure_platform_int(codes) + levels = ping.group_arraylike.take(codes) + + name_list.append(levels) + + return name_list + + # ------------------------------------------------------------ + # Aggregation functions + + @final + def _cython_operation( + self, + kind: str, + values, + how: str, + axis: int, + min_count: int = -1, + **kwargs, + ) -> ArrayLike: + """ + Returns the values of a cython operation. + """ + assert kind in ["transform", "aggregate"] + + cy_op = WrappedCythonOp(kind=kind, how=how, has_dropped_na=self.has_dropped_na) + + ids, _, _ = self.group_info + ngroups = self.ngroups + return cy_op.cython_operation( + values=values, + axis=axis, + min_count=min_count, + comp_ids=ids, + ngroups=ngroups, + **kwargs, + ) + + @final + def agg_series( + self, obj: Series, func: Callable, preserve_dtype: bool = False + ) -> ArrayLike: + """ + Parameters + ---------- + obj : Series + func : function taking a Series and returning a scalar-like + preserve_dtype : bool + Whether the aggregation is known to be dtype-preserving. + + Returns + ------- + np.ndarray or ExtensionArray + """ + # test_groupby_empty_with_category gets here with self.ngroups == 0 + # and len(obj) > 0 + + if len(obj) == 0: + # SeriesGrouper would raise if we were to call _aggregate_series_fast + result = self._aggregate_series_pure_python(obj, func) + + elif not isinstance(obj._values, np.ndarray): + result = self._aggregate_series_pure_python(obj, func) + + # we can preserve a little bit more aggressively with EA dtype + # because maybe_cast_pointwise_result will do a try/except + # with _from_sequence. NB we are assuming here that _from_sequence + # is sufficiently strict that it casts appropriately. + preserve_dtype = True + + else: + result = self._aggregate_series_pure_python(obj, func) + + npvalues = lib.maybe_convert_objects(result, try_float=False) + if preserve_dtype: + out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) + else: + out = npvalues + return out + + @final + def _aggregate_series_pure_python( + self, obj: Series, func: Callable + ) -> npt.NDArray[np.object_]: + ids, _, ngroups = self.group_info + + counts = np.zeros(ngroups, dtype=int) + result = np.empty(ngroups, dtype="O") + initialized = False + + # equiv: splitter = self._get_splitter(obj, axis=0) + splitter = get_splitter(obj, ids, ngroups, axis=0) + + for i, group in enumerate(splitter): + res = func(group) + res = libreduction.extract_result(res) + + if not initialized: + # We only do this validation on the first iteration + libreduction.check_result_array(res, group.dtype) + initialized = True + + counts[i] = group.shape[0] + result[i] = res + + return result + + +class BinGrouper(BaseGrouper): + """ + This is an internal Grouper class + + Parameters + ---------- + bins : the split index of binlabels to group the item of axis + binlabels : the label list + mutated : bool, default False + indexer : np.ndarray[np.intp] + + Examples + -------- + bins: [2, 4, 6, 8, 10] + binlabels: DatetimeIndex(['2005-01-01', '2005-01-03', + '2005-01-05', '2005-01-07', '2005-01-09'], + dtype='datetime64[ns]', freq='2D') + + the group_info, which contains the label of each item in grouped + axis, the index of label in label list, group number, is + + (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5) + + means that, the grouped axis has 10 items, can be grouped into 5 + labels, the first and second items belong to the first label, the + third and forth items belong to the second label, and so on + + """ + + bins: npt.NDArray[np.int64] + binlabels: Index + mutated: bool + + def __init__( + self, + bins, + binlabels, + mutated: bool = False, + indexer=None, + ) -> None: + self.bins = ensure_int64(bins) + self.binlabels = ensure_index(binlabels) + self.mutated = mutated + self.indexer = indexer + + # These lengths must match, otherwise we could call agg_series + # with empty self.bins, which would raise in libreduction. + assert len(self.binlabels) == len(self.bins) + + @cache_readonly + def groups(self): + """dict {group name -> group labels}""" + # this is mainly for compat + # GH 3881 + result = { + key: value + for key, value in zip(self.binlabels, self.bins) + if key is not NaT + } + return result + + @property + def nkeys(self) -> int: + # still matches len(self.groupings), but we can hard-code + return 1 + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self + + def get_iterator(self, data: NDFrame, axis: int = 0): + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + if axis == 0: + slicer = lambda start, edge: data.iloc[start:edge] + else: + slicer = lambda start, edge: data.iloc[:, start:edge] + + length = len(data.axes[axis]) + + start = 0 + for edge, label in zip(self.bins, self.binlabels): + if label is not NaT: + yield label, slicer(start, edge) + start = edge + + if start < length: + yield self.binlabels[-1], slicer(start, None) + + @cache_readonly + def indices(self): + indices = collections.defaultdict(list) + + i = 0 + for label, bin in zip(self.binlabels, self.bins): + if i < bin: + if label is not NaT: + indices[label] = list(range(i, bin)) + i = bin + return indices + + @cache_readonly + def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + ngroups = self.ngroups + obs_group_ids = np.arange(ngroups, dtype=np.intp) + rep = np.diff(np.r_[0, self.bins]) + + rep = ensure_platform_int(rep) + if ngroups == len(self.bins): + comp_ids = np.repeat(np.arange(ngroups), rep) + else: + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) + + return ( + ensure_platform_int(comp_ids), + obs_group_ids, + ngroups, + ) + + @cache_readonly + def reconstructed_codes(self) -> list[np.ndarray]: + # get unique result indices, and prepend 0 as groupby starts from the first + return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] + + @cache_readonly + def result_index(self) -> Index: + if len(self.binlabels) != 0 and isna(self.binlabels[0]): + return self.binlabels[1:] + + return self.binlabels + + @property + def levels(self) -> list[Index]: + return [self.binlabels] + + @property + def names(self) -> list[Hashable]: + return [self.binlabels.name] + + @property + def groupings(self) -> list[grouper.Grouping]: + lev = self.binlabels + ping = grouper.Grouping(lev, lev, in_axis=False, level=None) + return [ping] + + def _aggregate_series_fast(self, obj: Series, func: Callable) -> NoReturn: + # -> np.ndarray[object] + raise NotImplementedError( + "This should not be reached; use _aggregate_series_pure_python" + ) + + +def _is_indexed_like(obj, axes, axis: int) -> bool: + if isinstance(obj, Series): + if len(axes) > 1: + return False + return obj.axes[axis].equals(axes[axis]) + elif isinstance(obj, DataFrame): + return obj.axes[axis].equals(axes[axis]) + + return False + + +# ---------------------------------------------------------------------- +# Splitting / application + + +class DataSplitter(Generic[NDFrameT]): + def __init__( + self, + data: NDFrameT, + labels: npt.NDArray[np.intp], + ngroups: int, + axis: int = 0, + ) -> None: + self.data = data + self.labels = ensure_platform_int(labels) # _should_ already be np.intp + self.ngroups = ngroups + + self.axis = axis + assert isinstance(axis, int), axis + + @cache_readonly + def slabels(self) -> npt.NDArray[np.intp]: + # Sorted labels + return self.labels.take(self._sort_idx) + + @cache_readonly + def _sort_idx(self) -> npt.NDArray[np.intp]: + # Counting sort indexer + return get_group_index_sorter(self.labels, self.ngroups) + + def __iter__(self): + sdata = self.sorted_data + + if self.ngroups == 0: + # we are inside a generator, rather than raise StopIteration + # we merely return signal the end + return + + starts, ends = lib.generate_slices(self.slabels, self.ngroups) + + for start, end in zip(starts, ends): + yield self._chop(sdata, slice(start, end)) + + @cache_readonly + def sorted_data(self) -> NDFrameT: + return self.data.take(self._sort_idx, axis=self.axis) + + def _chop(self, sdata, slice_obj: slice) -> NDFrame: + raise AbstractMethodError(self) + + +class SeriesSplitter(DataSplitter): + def _chop(self, sdata: Series, slice_obj: slice) -> Series: + # fastpath equivalent to `sdata.iloc[slice_obj]` + mgr = sdata._mgr.get_slice(slice_obj) + ser = sdata._constructor(mgr, name=sdata.name, fastpath=True) + return ser.__finalize__(sdata, method="groupby") + + +class FrameSplitter(DataSplitter): + def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: + # Fastpath equivalent to: + # if self.axis == 0: + # return sdata.iloc[slice_obj] + # else: + # return sdata.iloc[:, slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + df = sdata._constructor(mgr) + return df.__finalize__(sdata, method="groupby") + + +def get_splitter( + data: NDFrame, labels: np.ndarray, ngroups: int, axis: int = 0 +) -> DataSplitter: + if isinstance(data, Series): + klass: type[DataSplitter] = SeriesSplitter + else: + # i.e. DataFrame + klass = FrameSplitter + + return klass(data, labels, ngroups, axis) diff --git a/pandas/core/index.py b/pandas/core/index.py new file mode 100644 index 00000000..19e9c6b2 --- /dev/null +++ b/pandas/core/index.py @@ -0,0 +1,37 @@ +# pyright: reportUnusedImport = false +from __future__ import annotations + +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas.core.indexes.api import ( # noqa:F401 + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + NaT, + NumericIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + get_objs_combined_axis, +) +from pandas.core.indexes.multi import sparsify_labels # noqa:F401 + +# GH#30193 +warnings.warn( + "pandas.core.index is deprecated and will be removed in a future version. " + "The public classes are available in the top-level namespace.", + FutureWarning, + stacklevel=find_stack_level(), +) + +__all__: list[str] = [] diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py new file mode 100644 index 00000000..6431f12a --- /dev/null +++ b/pandas/core/indexers/__init__.py @@ -0,0 +1,31 @@ +from pandas.core.indexers.utils import ( + check_array_indexer, + check_key_length, + check_setitem_lengths, + deprecate_ndim_indexing, + is_empty_indexer, + is_list_like_indexer, + is_scalar_indexer, + is_valid_positional_slice, + length_of_indexer, + maybe_convert_indices, + unpack_1tuple, + unpack_tuple_and_ellipses, + validate_indices, +) + +__all__ = [ + "is_valid_positional_slice", + "is_list_like_indexer", + "is_scalar_indexer", + "is_empty_indexer", + "check_setitem_lengths", + "validate_indices", + "maybe_convert_indices", + "length_of_indexer", + "deprecate_ndim_indexing", + "unpack_1tuple", + "check_key_length", + "check_array_indexer", + "unpack_tuple_and_ellipses", +] diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py new file mode 100644 index 00000000..c15cbf36 --- /dev/null +++ b/pandas/core/indexers/objects.py @@ -0,0 +1,398 @@ +"""Indexer objects for computing start/end window bounds for rolling operations""" +from __future__ import annotations + +from datetime import timedelta + +import numpy as np + +from pandas._libs.window.indexers import calculate_variable_window_bounds +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ensure_platform_int + +from pandas.tseries.offsets import Nano + +get_window_bounds_doc = """ +Computes the bounds of a window. + +Parameters +---------- +num_values : int, default 0 + number of values that will be aggregated over +window_size : int, default 0 + the number of rows in a window +min_periods : int, default None + min_periods passed from the top level rolling API +center : bool, default None + center passed from the top level rolling API +closed : str, default None + closed passed from the top level rolling API +step : int, default None + step passed from the top level rolling API + .. versionadded:: 1.5 +win_type : str, default None + win_type passed from the top level rolling API + +Returns +------- +A tuple of ndarray[int64]s, indicating the boundaries of each +window +""" + + +class BaseIndexer: + """Base class for window bounds calculations.""" + + def __init__( + self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs + ) -> None: + """ + Parameters + ---------- + **kwargs : + keyword arguments that will be available when get_window_bounds is called + """ + self.index_array = index_array + self.window_size = window_size + # Set user defined kwargs as attributes that can be used in get_window_bounds + for key, value in kwargs.items(): + setattr(self, key, value) + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + raise NotImplementedError + + +class FixedWindowIndexer(BaseIndexer): + """Creates window boundaries that are of fixed length.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + if center: + offset = (self.window_size - 1) // 2 + else: + offset = 0 + + end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64") + start = end - self.window_size + if closed in ["left", "both"]: + start -= 1 + if closed in ["left", "neither"]: + end -= 1 + + end = np.clip(end, 0, num_values) + start = np.clip(start, 0, num_values) + + return start, end + + +class VariableWindowIndexer(BaseIndexer): + """Creates window boundaries that are of variable length, namely for time series.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + # error: Argument 4 to "calculate_variable_window_bounds" has incompatible + # type "Optional[bool]"; expected "bool" + # error: Argument 6 to "calculate_variable_window_bounds" has incompatible + # type "Optional[ndarray]"; expected "ndarray" + return calculate_variable_window_bounds( + num_values, + self.window_size, + min_periods, + center, # type: ignore[arg-type] + closed, + self.index_array, # type: ignore[arg-type] + ) + + +class VariableOffsetWindowIndexer(BaseIndexer): + """Calculate window boundaries based on a non-fixed offset such as a BusinessDay.""" + + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + index=None, + offset=None, + **kwargs, + ) -> None: + super().__init__(index_array, window_size, **kwargs) + self.index = index + self.offset = offset + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + if step is not None: + raise NotImplementedError("step not implemented for variable offset window") + if num_values <= 0: + return np.empty(0, dtype="int64"), np.empty(0, dtype="int64") + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = "right" if self.index is not None else "both" + + right_closed = closed in ["right", "both"] + left_closed = closed in ["left", "both"] + + if self.index[num_values - 1] < self.index[0]: + index_growth_sign = -1 + else: + index_growth_sign = 1 + + start = np.empty(num_values, dtype="int64") + start.fill(-1) + end = np.empty(num_values, dtype="int64") + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, num_values): + end_bound = self.index[i] + start_bound = self.index[i] - index_growth_sign * self.offset + + # left endpoint is closed + if left_closed: + start_bound -= Nano(1) + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if (self.index[j] - start_bound) * index_growth_sign > timedelta(0): + start[i] = j + break + + # end bound is previous end + # or current index + if (self.index[end[i - 1]] - end_bound) * index_growth_sign <= timedelta(0): + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed: + end[i] -= 1 + + return start, end + + +class ExpandingIndexer(BaseIndexer): + """Calculate expanding window bounds, mimicking df.expanding()""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + return ( + np.zeros(num_values, dtype=np.int64), + np.arange(1, num_values + 1, dtype=np.int64), + ) + + +class FixedForwardWindowIndexer(BaseIndexer): + """ + Creates window boundaries for fixed-length windows that include the current row. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + """ + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + if center: + raise ValueError("Forward-looking windows can't have center=True") + if closed is not None: + raise ValueError( + "Forward-looking windows don't support setting the closed argument" + ) + if step is None: + step = 1 + + start = np.arange(0, num_values, step, dtype="int64") + end = start + self.window_size + if self.window_size: + end = np.clip(end, 0, num_values) + + return start, end + + +class GroupbyIndexer(BaseIndexer): + """Calculate bounds to compute groupby rolling, mimicking df.groupby().rolling()""" + + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int | BaseIndexer = 0, + groupby_indices: dict | None = None, + window_indexer: type[BaseIndexer] = BaseIndexer, + indexer_kwargs: dict | None = None, + **kwargs, + ) -> None: + """ + Parameters + ---------- + index_array : np.ndarray or None + np.ndarray of the index of the original object that we are performing + a chained groupby operation over. This index has been pre-sorted relative to + the groups + window_size : int or BaseIndexer + window size during the windowing operation + groupby_indices : dict or None + dict of {group label: [positional index of rows belonging to the group]} + window_indexer : BaseIndexer + BaseIndexer class determining the start and end bounds of each group + indexer_kwargs : dict or None + Custom kwargs to be passed to window_indexer + **kwargs : + keyword arguments that will be available when get_window_bounds is called + """ + self.groupby_indices = groupby_indices or {} + self.window_indexer = window_indexer + self.indexer_kwargs = indexer_kwargs.copy() if indexer_kwargs else {} + super().__init__( + index_array=index_array, + window_size=self.indexer_kwargs.pop("window_size", window_size), + **kwargs, + ) + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + # 1) For each group, get the indices that belong to the group + # 2) Use the indices to calculate the start & end bounds of the window + # 3) Append the window bounds in group order + start_arrays = [] + end_arrays = [] + window_indices_start = 0 + for key, indices in self.groupby_indices.items(): + index_array: np.ndarray | None + + if self.index_array is not None: + index_array = self.index_array.take(ensure_platform_int(indices)) + else: + index_array = self.index_array + indexer = self.window_indexer( + index_array=index_array, + window_size=self.window_size, + **self.indexer_kwargs, + ) + start, end = indexer.get_window_bounds( + len(indices), min_periods, center, closed, step + ) + start = start.astype(np.int64) + end = end.astype(np.int64) + assert len(start) == len( + end + ), "these should be equal in length from get_window_bounds" + # Cannot use groupby_indices as they might not be monotonic with the object + # we're rolling over + window_indices = np.arange( + window_indices_start, window_indices_start + len(indices) + ) + window_indices_start += len(indices) + # Extend as we'll be slicing window like [start, end) + window_indices = np.append(window_indices, [window_indices[-1] + 1]).astype( + np.int64, copy=False + ) + start_arrays.append(window_indices.take(ensure_platform_int(start))) + end_arrays.append(window_indices.take(ensure_platform_int(end))) + if len(start_arrays) == 0: + return np.array([], dtype=np.int64), np.array([], dtype=np.int64) + start = np.concatenate(start_arrays) + end = np.concatenate(end_arrays) + return start, end + + +class ExponentialMovingWindowIndexer(BaseIndexer): + """Calculate ewm window bounds (the entire window)""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: int | None = None, + center: bool | None = None, + closed: str | None = None, + step: int | None = None, + ) -> tuple[np.ndarray, np.ndarray]: + + return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64) diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py new file mode 100644 index 00000000..0f3cdc41 --- /dev/null +++ b/pandas/core/indexers/utils.py @@ -0,0 +1,566 @@ +""" +Low-dependency indexing utilities. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) +import warnings + +import numpy as np + +from pandas._typing import AnyArrayLike +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.frame import DataFrame + from pandas.core.indexes.base import Index + +# ----------------------------------------------------------- +# Indexer Identification + + +def is_valid_positional_slice(slc: slice) -> bool: + """ + Check if a slice object can be interpreted as a positional indexer. + + Parameters + ---------- + slc : slice + + Returns + ------- + bool + + Notes + ----- + A valid positional slice may also be interpreted as a label-based slice + depending on the index being sliced. + """ + + def is_int_or_none(val): + return val is None or is_integer(val) + + return ( + is_int_or_none(slc.start) + and is_int_or_none(slc.stop) + and is_int_or_none(slc.step) + ) + + +def is_list_like_indexer(key) -> bool: + """ + Check if we have a list-like indexer that is *not* a NamedTuple. + + Parameters + ---------- + key : object + + Returns + ------- + bool + """ + # allow a list_like, but exclude NamedTuples which can be indexers + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) + + +def is_scalar_indexer(indexer, ndim: int) -> bool: + """ + Return True if we are all scalar indexers. + + Parameters + ---------- + indexer : object + ndim : int + Number of dimensions in the object being indexed. + + Returns + ------- + bool + """ + if ndim == 1 and is_integer(indexer): + # GH37748: allow indexer to be an integer for Series + return True + if isinstance(indexer, tuple) and len(indexer) == ndim: + return all(is_integer(x) for x in indexer) + return False + + +def is_empty_indexer(indexer) -> bool: + """ + Check if we have an empty indexer. + + Parameters + ---------- + indexer : object + + Returns + ------- + bool + """ + if is_list_like(indexer) and not len(indexer): + return True + if not isinstance(indexer, tuple): + indexer = (indexer,) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + + +# ----------------------------------------------------------- +# Indexer Validation + + +def check_setitem_lengths(indexer, value, values) -> bool: + """ + Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + Key for the setitem. + value : array-like + Value for the setitem. + values : array-like + Values being set into. + + Returns + ------- + bool + Whether this is an empty listlike setting which is a no-op. + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't match. + """ + no_op = False + + if isinstance(indexer, (np.ndarray, list)): + # We can ignore other listlikes because they are either + # a) not necessarily 1-D indexers, e.g. tuple + # b) boolean indexers e.g. BoolArray + if is_list_like(value): + if len(indexer) != len(value) and values.ndim == 1: + # boolean with truth values == len of the value is ok too + if isinstance(indexer, list): + indexer = np.array(indexer) + if not ( + isinstance(indexer, np.ndarray) + and indexer.dtype == np.bool_ + and indexer.sum() == len(value) + ): + raise ValueError( + "cannot set using a list-like indexer " + "with a different length than the value" + ) + if not len(indexer): + no_op = True + + elif isinstance(indexer, slice): + if is_list_like(value): + if len(value) != length_of_indexer(indexer, values) and values.ndim == 1: + # In case of two dimensional value is used row-wise and broadcasted + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) + if not len(value): + no_op = True + + return no_op + + +def validate_indices(indices: np.ndarray, n: int) -> None: + """ + Perform bounds-checking for an indexer. + + -1 is allowed for indicating missing values. + + Parameters + ---------- + indices : ndarray + n : int + Length of the array being indexed. + + Raises + ------ + ValueError + + Examples + -------- + >>> validate_indices(np.array([1, 2]), 3) # OK + + >>> validate_indices(np.array([1, -2]), 3) + Traceback (most recent call last): + ... + ValueError: negative dimensions are not allowed + + >>> validate_indices(np.array([1, 2, 3]), 3) + Traceback (most recent call last): + ... + IndexError: indices are out-of-bounds + + >>> validate_indices(np.array([-1, -1]), 0) # OK + + >>> validate_indices(np.array([0, 1]), 0) + Traceback (most recent call last): + ... + IndexError: indices are out-of-bounds + """ + if len(indices): + min_idx = indices.min() + if min_idx < -1: + msg = f"'indices' contains values less than allowed ({min_idx} < -1)" + raise ValueError(msg) + + max_idx = indices.max() + if max_idx >= n: + raise IndexError("indices are out-of-bounds") + + +# ----------------------------------------------------------- +# Indexer Conversion + + +def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray: + """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + Array of indices that we are to convert. + n : int + Number of elements in the array that we are indexing. + verify : bool, default True + Check that all entries are between 0 and n - 1, inclusive. + + Returns + ------- + array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError + One of the converted indices either exceeded the number of, + elements (specified by `n`), or was still negative. + """ + if isinstance(indices, list): + indices = np.array(indices) + if len(indices) == 0: + # If `indices` is empty, np.array will return a float, + # and will cause indexing errors. + return np.empty(0, dtype=np.intp) + + mask = indices < 0 + if mask.any(): + indices = indices.copy() + indices[mask] += n + + if verify: + mask = (indices >= n) | (indices < 0) + if mask.any(): + raise IndexError("indices are out-of-bounds") + return indices + + +# ----------------------------------------------------------- +# Unsorted + + +def length_of_indexer(indexer, target=None) -> int: + """ + Return the expected length of target[indexer] + + Returns + ------- + int + """ + if target is not None and isinstance(indexer, slice): + target_len = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += target_len + if stop is None or stop > target_len: + stop = target_len + elif stop < 0: + stop += target_len + if step is None: + step = 1 + elif step < 0: + start, stop = stop + 1, start + 1 + step = -step + return (stop - start + step - 1) // step + elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): + if isinstance(indexer, list): + indexer = np.array(indexer) + + if indexer.dtype == bool: + # GH#25774 + return indexer.sum() + return len(indexer) + elif isinstance(indexer, range): + return (indexer.stop - indexer.start) // indexer.step + elif not is_list_like_indexer(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") + + +def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None: + """ + Helper function to raise the deprecation warning for multi-dimensional + indexing on 1D Series/Index. + + GH#27125 indexer like idx[:, None] expands dim, but we cannot do that + and keep an index, so we currently return ndarray, which is deprecated + (Deprecation GH#30588). + """ + if np.ndim(result) > 1: + warnings.warn( + "Support for multi-dimensional indexing (e.g. `obj[:, None]`) " + "is deprecated and will be removed in a future " + "version. Convert to a numpy array before indexing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + +def unpack_1tuple(tup): + """ + If we have a length-1 tuple/list that contains a slice, unpack to just + the slice. + + Notes + ----- + The list case is deprecated. + """ + if len(tup) == 1 and isinstance(tup[0], slice): + # if we don't have a MultiIndex, we may still be able to handle + # a 1-tuple. see test_1tuple_without_multiindex + + if isinstance(tup, list): + # GH#31299 + warnings.warn( + "Indexing with a single-item list containing a " + "slice is deprecated and will raise in a future " + "version. Pass a tuple instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return tup[0] + return tup + + +def check_key_length(columns: Index, key, value: DataFrame) -> None: + """ + Checks if a key used as indexer has the same length as the columns it is + associated with. + + Parameters + ---------- + columns : Index The columns of the DataFrame to index. + key : A list-like of keys to index with. + value : DataFrame The value to set for the keys. + + Raises + ------ + ValueError: If the length of key is not equal to the number of columns in value + or if the number of columns referenced by key is not equal to number + of columns. + """ + if columns.is_unique: + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + else: + # Missing keys in columns are represented as -1 + if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): + raise ValueError("Columns must be same length as key") + + +def unpack_tuple_and_ellipses(item: tuple): + """ + Possibly unpack arr[..., n] to arr[n] + """ + if len(item) > 1: + # Note: we are assuming this indexing is being done on a 1D arraylike + if item[0] is Ellipsis: + item = item[1:] + elif item[-1] is Ellipsis: + item = item[:-1] + + if len(item) > 1: + raise IndexError("too many indices for array.") + + item = item[0] + return item + + +# ----------------------------------------------------------- +# Public indexer validation + + +def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: + """ + Check if `indexer` is a valid array indexer for `array`. + + For a boolean mask, `array` and `indexer` are checked to have the same + length. The dtype is validated, and if it is an integer or boolean + ExtensionArray, it is checked if there are missing values present, and + it is converted to the appropriate numpy array. Other dtypes will raise + an error. + + Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed + through as is. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + array : array-like + The array that is being indexed (only used for the length). + indexer : array-like or list-like + The array-like that's used to index. List-like input that is not yet + a numpy array or an ExtensionArray is converted to one. Other input + types are passed through as is. + + Returns + ------- + numpy.ndarray + The validated indexer as a numpy array that can be used to index. + + Raises + ------ + IndexError + When the lengths don't match. + ValueError + When `indexer` cannot be converted to a numpy ndarray to index + (e.g. presence of missing values). + + See Also + -------- + api.types.is_bool_dtype : Check if `key` is of boolean dtype. + + Examples + -------- + When checking a boolean mask, a boolean ndarray is returned when the + arguments are all valid. + + >>> mask = pd.array([True, False]) + >>> arr = pd.array([1, 2]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + An IndexError is raised when the lengths don't match. + + >>> mask = pd.array([True, False, True]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + Traceback (most recent call last): + ... + IndexError: Boolean index has wrong length: 3 instead of 2. + + NA values in a boolean array are treated as False. + + >>> mask = pd.array([True, pd.NA]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + A numpy boolean mask will get passed through (if the length is correct): + + >>> mask = np.array([True, False]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + Similarly for integer indexers, an integer ndarray is returned when it is + a valid indexer, otherwise an error is (for integer indexers, a matching + length is not required): + + >>> indexer = pd.array([0, 2], dtype="Int64") + >>> arr = pd.array([1, 2, 3]) + >>> pd.api.indexers.check_array_indexer(arr, indexer) + array([0, 2]) + + >>> indexer = pd.array([0, pd.NA], dtype="Int64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + ValueError: Cannot index with an integer indexer containing NA values + + For non-integer/boolean dtypes, an appropriate error is raised: + + >>> indexer = np.array([0., 2.], dtype="float64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + IndexError: arrays used as indices must be of integer or boolean type + """ + from pandas.core.construction import array as pd_array + + # whatever is not an array-like is returned as-is (possible valid array + # indexers that are not array-like: integer, slice, Ellipsis, None) + # In this context, tuples are not considered as array-like, as they have + # a specific meaning in indexing (multi-dimensional indexing) + if is_list_like(indexer): + if isinstance(indexer, tuple): + return indexer + else: + return indexer + + # convert list-likes to array + if not is_array_like(indexer): + indexer = pd_array(indexer) + if len(indexer) == 0: + # empty list is converted to float array by pd.array + indexer = np.array([], dtype=np.intp) + + dtype = indexer.dtype + if is_bool_dtype(dtype): + if is_extension_array_dtype(dtype): + indexer = indexer.to_numpy(dtype=bool, na_value=False) + else: + indexer = np.asarray(indexer, dtype=bool) + + # GH26658 + if len(indexer) != len(array): + raise IndexError( + f"Boolean index has wrong length: " + f"{len(indexer)} instead of {len(array)}" + ) + elif is_integer_dtype(dtype): + try: + indexer = np.asarray(indexer, dtype=np.intp) + except ValueError as err: + raise ValueError( + "Cannot index with an integer indexer containing NA values" + ) from err + else: + raise IndexError("arrays used as indices must be of integer or boolean type") + + return indexer diff --git a/pandas/core/indexes/__init__.py b/pandas/core/indexes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py new file mode 100644 index 00000000..46959aa5 --- /dev/null +++ b/pandas/core/indexes/accessors.py @@ -0,0 +1,512 @@ +""" +datetimelike delegation +""" +from __future__ import annotations + +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_list_like, + is_period_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): + _hidden_attrs = PandasObject._hidden_attrs | { + "orig", + "name", + } + + def __init__(self, data: Series, orig) -> None: + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + self._parent = data + self.orig = orig + self.name = getattr(data, "name", None) + self._freeze() + + def _get_values(self): + data = self._parent + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self.name) + + elif is_period_dtype(data.dtype): + return PeriodArray(data, copy=False) + + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + def _delegate_property_get(self, name): + from pandas import Series + + values = self._get_values() + + result = getattr(values, name) + + # maybe need to upcast (ints) + if isinstance(result, np.ndarray): + if is_integer_dtype(result): + result = result.astype("int64") + elif not is_list_like(result): + return result + + result = np.asarray(result) + + if self.orig is not None: + index = self.orig.index + else: + index = self._parent.index + # return the result as a Series, which is by definition a copy + result = Series(result, index=index, name=self.name).__finalize__(self._parent) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a property of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise ValueError( + "modifications to a property of a datetimelike object are not supported. " + "Change values on the original." + ) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + values = self._get_values() + + method = getattr(values, name) + result = method(*args, **kwargs) + + if not is_list_like(result): + return result + + result = Series(result, index=self._parent.index, name=self.name).__finalize__( + self._parent + ) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a method of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_methods, typ="method" +) +class DatetimeProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: datetime64[ns] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) + >>> hours_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: datetime64[ns] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q")) + >>> quarters_series + 0 2000-03-31 + 1 2000-06-30 + 2 2000-09-30 + dtype: datetime64[ns] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + dtype: int64 + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + def to_pydatetime(self) -> np.ndarray: + """ + Return the data as an array of :class:`datetime.datetime` objects. + + Timezone information is retained if present. + + .. warning:: + + Python's datetime uses microsecond resolution, which is lower than + pandas (nanosecond). The values are truncated. + + Returns + ------- + numpy.ndarray + Object dtype array containing native Python datetime objects. + + See Also + -------- + datetime.datetime : Standard library value for a datetime. + + Examples + -------- + >>> s = pd.Series(pd.date_range('20180310', periods=2)) + >>> s + 0 2018-03-10 + 1 2018-03-11 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 11, 0, 0)], dtype=object) + + pandas' nanosecond precision is truncated to microseconds. + + >>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns')) + >>> s + 0 2018-03-10 00:00:00.000000000 + 1 2018-03-10 00:00:00.000000001 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 10, 0, 0)], dtype=object) + """ + return self._get_values().to_pydatetime() + + @property + def freq(self): + return self._get_values().inferred_freq + + def isocalendar(self) -> DataFrame: + """ + Calculate year, week, and day according to the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + With columns year, week and day. + + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + + Examples + -------- + >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT])) + >>> ser.dt.isocalendar() + year week day + 0 2009 53 5 + 1 + >>> ser.dt.isocalendar().week + 0 53 + 1 + Name: week, dtype: UInt32 + """ + return self._get_values().isocalendar().set_index(self._parent.index) + + @property + def weekofyear(self): + """ + The week ordinal of the year according to the ISO 8601 standard. + + .. deprecated:: 1.1.0 + + Series.dt.weekofyear and Series.dt.week have been deprecated. Please + call :func:`Series.dt.isocalendar` and access the ``week`` column + instead. + """ + warnings.warn( + "Series.dt.weekofyear and Series.dt.week have been deprecated. " + "Please use Series.dt.isocalendar().week instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + week_series = self.isocalendar().week + week_series.name = self.name + if week_series.hasnans: + return week_series.astype("float64") + return week_series.astype("int64") + + week = weekofyear + + +@delegate_names( + delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=TimedeltaArray, + accessors=TimedeltaArray._datetimelike_methods, + typ="method", +) +class TimedeltaProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.timedelta_range(start="1 second", periods=3, freq="S") + ... ) + >>> seconds_series + 0 0 days 00:00:01 + 1 0 days 00:00:02 + 2 0 days 00:00:03 + dtype: timedelta64[ns] + >>> seconds_series.dt.seconds + 0 1 + 1 2 + 2 3 + dtype: int64 + """ + + def to_pytimedelta(self) -> np.ndarray: + """ + Return an array of native :class:`datetime.timedelta` objects. + + Python's standard `datetime` library uses a different representation + timedelta's. This method converts a Series of pandas Timedeltas + to `datetime.timedelta` format with the same length as the original + Series. + + Returns + ------- + numpy.ndarray + Array of 1D containing data with `datetime.timedelta` type. + + See Also + -------- + datetime.timedelta : A duration expressing the difference + between two date, time, or datetime. + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d")) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.to_pytimedelta() + array([datetime.timedelta(0), datetime.timedelta(days=1), + datetime.timedelta(days=2), datetime.timedelta(days=3), + datetime.timedelta(days=4)], dtype=object) + """ + return self._get_values().to_pytimedelta() + + @property + def components(self): + """ + Return a Dataframe of the components of the Timedeltas. + + Returns + ------- + DataFrame + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s')) + >>> s + 0 0 days 00:00:00 + 1 0 days 00:00:01 + 2 0 days 00:00:02 + 3 0 days 00:00:03 + 4 0 days 00:00:04 + dtype: timedelta64[ns] + >>> s.dt.components + days hours minutes seconds milliseconds microseconds nanoseconds + 0 0 0 0 0 0 0 0 + 1 0 0 0 1 0 0 0 + 2 0 0 0 2 0 0 0 + 3 0 0 0 3 0 0 0 + 4 0 0 0 4 0 0 0 + """ + return ( + self._get_values() + .components.set_index(self._parent.index) + .__finalize__(self._parent) + ) + + @property + def freq(self): + return self._get_values().inferred_freq + + +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method" +) +class PeriodProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + + Examples + -------- + >>> seconds_series = pd.Series( + ... pd.period_range( + ... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s" + ... ) + ... ) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + 3 2000-01-01 00:00:03 + dtype: period[S] + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> hours_series = pd.Series( + ... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h") + ... ) + >>> hours_series + 0 2000-01-01 00:00 + 1 2000-01-01 01:00 + 2 2000-01-01 02:00 + 3 2000-01-01 03:00 + dtype: period[H] + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + 3 3 + dtype: int64 + + >>> quarters_series = pd.Series( + ... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC") + ... ) + >>> quarters_series + 0 2000Q1 + 1 2000Q2 + 2 2000Q3 + 3 2000Q4 + dtype: period[Q-DEC] + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + """ + + +class CombinedDatetimelikeProperties( + DatetimeProperties, TimedeltaProperties, PeriodProperties +): + def __new__(cls, data: Series): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + orig = data if is_categorical_dtype(data.dtype) else None + if orig is not None: + data = data._constructor( + orig.array, + name=orig.name, + copy=False, + dtype=orig._values.categories.dtype, + index=orig.index, + ) + + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + elif is_period_dtype(data.dtype): + return PeriodProperties(data, orig) + + raise AttributeError("Can only use .dt accessor with datetimelike values") diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py new file mode 100644 index 00000000..b041e6a6 --- /dev/null +++ b/pandas/core/indexes/api.py @@ -0,0 +1,386 @@ +from __future__ import annotations + +import textwrap +from typing import cast + +import numpy as np + +from pandas._libs import ( + NaT, + lib, +) +from pandas.errors import InvalidIndexError + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.common import is_dtype_equal + +from pandas.core.algorithms import safe_sort +from pandas.core.indexes.base import ( + Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + get_unanimous_names, +) +from pandas.core.indexes.category import CategoricalIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.interval import IntervalIndex +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.range import RangeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +_sort_msg = textwrap.dedent( + """\ +Sorting because non-concatenation axis is not aligned. A future version +of pandas will change to not sort by default. + +To accept the future behavior, pass 'sort=False'. + +To retain the current behavior and silence the warning, pass 'sort=True'. +""" +) + + +__all__ = [ + "Index", + "MultiIndex", + "NumericIndex", + "Float64Index", + "Int64Index", + "CategoricalIndex", + "IntervalIndex", + "RangeIndex", + "UInt64Index", + "InvalidIndexError", + "TimedeltaIndex", + "PeriodIndex", + "DatetimeIndex", + "_new_Index", + "NaT", + "ensure_index", + "ensure_index_from_sequences", + "get_objs_combined_axis", + "union_indexes", + "get_unanimous_names", + "all_indexes_same", + "default_index", + "safe_sort_index", +] + + +def get_objs_combined_axis( + objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False +) -> Index: + """ + Extract combined index: return intersection or union (depending on the + value of "intersect") of indexes on given axis, or None if all objects + lack indexes (e.g. they are numpy arrays). + + Parameters + ---------- + objs : list + Series or DataFrame objects, may be mix of the two. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + axis : {0 or 'index', 1 or 'outer'}, default 0 + The axis to extract indexes from. + sort : bool, default True + Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. + + Returns + ------- + Index + """ + obs_idxes = [obj._get_axis(axis) for obj in objs] + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) + + +def _get_distinct_objs(objs: list[Index]) -> list[Index]: + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids: set[int] = set() + res = [] + for obj in objs: + if id(obj) not in ids: + ids.add(id(obj)) + res.append(obj) + return res + + +def _get_combined_index( + indexes: list[Index], + intersect: bool = False, + sort: bool = False, + copy: bool = False, +) -> Index: + """ + Return the union or intersection of indexes. + + Parameters + ---------- + indexes : list of Index or list objects + When intersect=True, do not accept list of lists. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + sort : bool, default False + Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. + + Returns + ------- + Index + """ + # TODO: handle index names! + indexes = _get_distinct_objs(indexes) + if len(indexes) == 0: + index = Index([]) + elif len(indexes) == 1: + index = indexes[0] + elif intersect: + index = indexes[0] + for other in indexes[1:]: + index = index.intersection(other) + else: + index = union_indexes(indexes, sort=False) + index = ensure_index(index) + + if sort: + index = safe_sort_index(index) + # GH 29879 + if copy: + index = index.copy() + + return index + + +def safe_sort_index(index: Index) -> Index: + """ + Returns the sorted index + + We keep the dtypes and the name attributes. + + Parameters + ---------- + index : an Index + + Returns + ------- + Index + """ + if index.is_monotonic_increasing: + return index + + try: + array_sorted = safe_sort(index) + except TypeError: + pass + else: + if isinstance(array_sorted, MultiIndex): + return array_sorted + + array_sorted = cast(np.ndarray, array_sorted) + if isinstance(index, MultiIndex): + index = MultiIndex.from_tuples(array_sorted, names=index.names) + else: + index = Index(array_sorted, name=index.name, dtype=index.dtype) + + return index + + +def union_indexes(indexes, sort: bool | None = True) -> Index: + """ + Return the union of indexes. + + The behavior of sort and names is not consistent. + + Parameters + ---------- + indexes : list of Index or list objects + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + if len(indexes) == 0: + raise AssertionError("Must have at least 1 Index to union") + if len(indexes) == 1: + result = indexes[0] + if isinstance(result, list): + result = Index(sorted(result)) + return result + + indexes, kind = _sanitize_and_check(indexes) + + def _unique_indices(inds, dtype) -> Index: + """ + Convert indexes to lists and concatenate them, removing duplicates. + + The final dtype is inferred. + + Parameters + ---------- + inds : list of Index or list objects + dtype : dtype to set for the resulting Index + + Returns + ------- + Index + """ + + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index( + lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort), + dtype=dtype, + ) + + def _find_common_index_dtype(inds): + """ + Finds a common type for the indexes to pass through to resulting index. + + Parameters + ---------- + inds: list of Index or list objects + + Returns + ------- + The common type or None if no indexes were given + """ + dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)] + if dtypes: + dtype = find_common_type(dtypes) + else: + dtype = None + + return dtype + + if kind == "special": + result = indexes[0] + first = result + + dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] + dti_tzs = [x for x in dtis if x.tz is not None] + if len(dti_tzs) not in [0, len(dtis)]: + # TODO: this behavior is not tested (so may not be desired), + # but is kept in order to keep behavior the same when + # deprecating union_many + # test_frame_from_dict_with_mixed_indexes + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if len(dtis) == len(indexes): + sort = True + if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): + # i.e. timezones mismatch + # TODO(2.0): once deprecation is enforced, this union will + # cast to UTC automatically. + indexes = [x.tz_convert("UTC") for x in indexes] + + result = indexes[0] + + elif len(dtis) > 1: + # If we have mixed timezones, our casting behavior may depend on + # the order of indexes, which we don't want. + sort = False + + # TODO: what about Categorical[dt64]? + # test_frame_from_dict_with_mixed_indexes + indexes = [x.astype(object, copy=False) for x in indexes] + result = indexes[0] + + for other in indexes[1:]: + result = result.union(other, sort=None if sort else False) + return result + + elif kind == "array": + dtype = _find_common_index_dtype(indexes) + index = indexes[0] + if not all(index.equals(other) for other in indexes[1:]): + index = _unique_indices(indexes, dtype) + + name = get_unanimous_names(*indexes)[0] + if name != index.name: + index = index.rename(name) + return index + else: # kind='list' + dtype = _find_common_index_dtype(indexes) + return _unique_indices(indexes, dtype) + + +def _sanitize_and_check(indexes): + """ + Verify the type of indexes and convert lists to Index. + + Cases: + + - [list, list, ...]: Return ([list, list, ...], 'list') + - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) + Lists are sorted and converted to Index. + - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) + TYPE = 'special' if at least one special type, 'array' otherwise. + + Parameters + ---------- + indexes : list of Index or list objects + + Returns + ------- + sanitized_indexes : list of Index or list objects + type : {'list', 'array', 'special'} + """ + kinds = list({type(index) for index in indexes}) + + if list in kinds: + if len(kinds) > 1: + indexes = [ + Index(list(x)) if not isinstance(x, Index) else x for x in indexes + ] + kinds.remove(list) + else: + return indexes, "list" + + if len(kinds) > 1 or Index not in kinds: + return indexes, "special" + else: + return indexes, "array" + + +def all_indexes_same(indexes) -> bool: + """ + Determine if all indexes contain the same elements. + + Parameters + ---------- + indexes : iterable of Index objects + + Returns + ------- + bool + True if all indexes contain the same elements, False otherwise. + """ + itr = iter(indexes) + first = next(itr) + return all(first.equals(index) for index in itr) + + +def default_index(n: int) -> RangeIndex: + rng = range(0, n) + return RangeIndex._simple_new(rng, name=None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py new file mode 100644 index 00000000..447ba8d7 --- /dev/null +++ b/pandas/core/indexes/base.py @@ -0,0 +1,7489 @@ +from __future__ import annotations + +from datetime import datetime +import functools +from itertools import zip_longest +import operator +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Hashable, + Iterable, + Literal, + NoReturn, + Sequence, + TypeVar, + cast, + final, + overload, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + algos as libalgos, + index as libindex, + lib, +) +import pandas._libs.join as libjoin +from pandas._libs.lib import ( + is_datetime_array, + no_default, +) +from pandas._libs.missing import is_float_nan +from pandas._libs.tslibs import ( + IncompatibleFrequency, + OutOfBoundsDatetime, + Timestamp, + is_unitless, + tz_compare, +) +from pandas._typing import ( + ArrayLike, + Axes, + Dtype, + DtypeObj, + F, + IgnoreRaise, + Level, + Shape, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + DuplicateLabelError, + IntCastingNaNError, + InvalidIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import ( + find_stack_level, + rewrite_exception, +) + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.cast import ( + LossySetitemError, + can_hold_element, + common_dtype_categorical_compat, + ensure_dtype_can_hold_na, + find_common_type, + infer_dtype_from, + maybe_cast_pointwise_result, + np_can_hold_element, +) +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_object, + ensure_platform_int, + is_bool_dtype, + is_categorical_dtype, + is_dtype_equal, + is_ea_or_datetimelike_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_interval_dtype, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_signed_integer_dtype, + is_string_dtype, + is_unsigned_integer_dtype, + needs_i8_conversion, + pandas_dtype, + validate_all_hashable, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCMultiIndex, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.inference import is_dict_like +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, +) + +from pandas.core import ( + arraylike, + missing, + ops, +) +from pandas.core.accessor import CachedAccessor +import pandas.core.algorithms as algos +from pandas.core.array_algos.putmask import ( + setitem_datetimelike_compat, + validate_putmask, +) +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +from pandas.core.arrays.datetimes import ( + tz_to_dtype, + validate_tz_from_dtype, +) +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.arrays.string_ import StringArray +from pandas.core.base import ( + IndexOpsMixin, + PandasObject, +) +import pandas.core.common as com +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + sanitize_array, +) +from pandas.core.indexers import deprecate_ndim_indexing +from pandas.core.indexes.frozen import FrozenList +from pandas.core.ops import get_op_result_name +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.sorting import ( + ensure_key_mapped, + get_group_index_sorter, + nargsort, +) +from pandas.core.strings import StringMethods + +from pandas.io.formats.printing import ( + PrettyDict, + default_pprint, + format_object_summary, + pprint_thing, +) + +if TYPE_CHECKING: + from pandas import ( + CategoricalIndex, + DataFrame, + MultiIndex, + Series, + ) + from pandas.core.arrays import PeriodArray + + +__all__ = ["Index"] + +_unsortable_types = frozenset(("mixed", "mixed-integer")) + +_index_doc_kwargs: dict[str, str] = { + "klass": "Index", + "inplace": "", + "target_klass": "Index", + "raises_section": "", + "unique": "Index", + "duplicated": "np.ndarray", +} +_index_shared_docs: dict[str, str] = {} +str_t = str + + +_dtype_obj = np.dtype("object") + + +def _maybe_return_indexers(meth: F) -> F: + """ + Decorator to simplify 'return_indexers' checks in Index.join. + """ + + @functools.wraps(meth) + def join( + self, + other, + how: str_t = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): + join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort) + if not return_indexers: + return join_index + + if lidx is not None: + lidx = ensure_platform_int(lidx) + if ridx is not None: + ridx = ensure_platform_int(ridx) + return join_index, lidx, ridx + + return cast(F, join) + + +def disallow_kwargs(kwargs: dict[str, Any]) -> None: + if kwargs: + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + + +def _new_Index(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__. + """ + # required for backward compat, because PI can't be instantiated with + # ordinals through __new__ GH #13277 + if issubclass(cls, ABCPeriodIndex): + from pandas.core.indexes.period import _new_PeriodIndex + + return _new_PeriodIndex(cls, **d) + + if issubclass(cls, ABCMultiIndex): + if "labels" in d and "codes" not in d: + # GH#23752 "labels" kwarg has been replaced with "codes" + d["codes"] = d.pop("labels") + + # Since this was a valid MultiIndex at pickle-time, we don't need to + # check validty at un-pickle time. + d["verify_integrity"] = False + + elif "dtype" not in d and "data" in d: + # Prevent Index.__new__ from conducting inference; + # "data" key not in RangeIndex + d["dtype"] = d["data"].dtype + return cls.__new__(cls, **d) + + +_IndexT = TypeVar("_IndexT", bound="Index") + + +class Index(IndexOpsMixin, PandasObject): + """ + Immutable sequence used for indexing and alignment. + + The basic object storing axis labels for all pandas objects. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + If dtype is None, we find the dtype that best fits the data. + If an actual dtype is provided, we coerce to that dtype if it's safe. + Otherwise, an error will be raised. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + tupleize_cols : bool (default: True) + When True, attempt to create a MultiIndex if possible. + + See Also + -------- + RangeIndex : Index implementing a monotonic integer range. + CategoricalIndex : Index of :class:`Categorical` s. + MultiIndex : A multi-level, or hierarchical Index. + IntervalIndex : An Index of :class:`Interval` s. + DatetimeIndex : Index of datetime64 data. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + NumericIndex : Index of numpy int/uint/float data. + Int64Index : Index of purely int64 labels (deprecated). + UInt64Index : Index of purely uint64 labels (deprecated). + Float64Index : Index of purely float64 labels (deprecated). + + Notes + ----- + An Index instance can **only** contain hashable objects + + Examples + -------- + >>> pd.Index([1, 2, 3]) + Int64Index([1, 2, 3], dtype='int64') + + >>> pd.Index(list('abc')) + Index(['a', 'b', 'c'], dtype='object') + """ + + # tolist is not actually deprecated, just suppressed in the __dir__ + _hidden_attrs: frozenset[str] = ( + PandasObject._hidden_attrs + | IndexOpsMixin._hidden_attrs + | frozenset(["contains", "set_value"]) + ) + + # To hand over control to subclasses + _join_precedence = 1 + + # Cython methods; see github.com/cython/cython/issues/2647 + # for why we need to wrap these instead of making them class attributes + # Moreover, cython will choose the appropriate-dtyped sub-function + # given the dtypes of the passed arguments + + @final + def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + # can_use_libjoin assures sv and ov are ndarrays + sv = cast(np.ndarray, sv) + ov = cast(np.ndarray, ov) + return libjoin.left_join_indexer_unique(sv, ov) + + @final + def _left_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + # can_use_libjoin assures sv and ov are ndarrays + sv = cast(np.ndarray, sv) + ov = cast(np.ndarray, ov) + joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + @final + def _inner_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + # can_use_libjoin assures sv and ov are ndarrays + sv = cast(np.ndarray, sv) + ov = cast(np.ndarray, ov) + joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + @final + def _outer_indexer( + self: _IndexT, other: _IndexT + ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + # Caller is responsible for ensuring other.dtype == self.dtype + sv = self._get_engine_target() + ov = other._get_engine_target() + # can_use_libjoin assures sv and ov are ndarrays + sv = cast(np.ndarray, sv) + ov = cast(np.ndarray, ov) + joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov) + joined = self._from_join_target(joined_ndarray) + return joined, lidx, ridx + + _typ: str = "index" + _data: ExtensionArray | np.ndarray + _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = ( + np.ndarray, + ExtensionArray, + ) + _id: object | None = None + _name: Hashable = None + # MultiIndex.levels previously allowed setting the index name. We + # don't allow this anymore, and raise if it happens rather than + # failing silently. + _no_setting_name: bool = False + _comparables: list[str] = ["name"] + _attributes: list[str] = ["name"] + _is_numeric_dtype: bool = False + _can_hold_strings: bool = True + + # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, + # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and + # associated code in pandas 2.0. + _is_backward_compat_public_numeric_index: bool = False + + @property + def _engine_type( + self, + ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]: + return libindex.ObjectEngine + + # whether we support partial string indexing. Overridden + # in DatetimeIndex and PeriodIndex + _supports_partial_string_indexing = False + + _accessors = {"str"} + + str = CachedAccessor("str", StringMethods) + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs + ) -> Index: + + if kwargs: + warnings.warn( + "Passing keywords other than 'data', 'dtype', 'copy', 'name', " + "'tupleize_cols' is deprecated and will raise TypeError in a " + "future version. Use the specific Index subclass directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + from pandas.core.arrays import PandasArray + from pandas.core.indexes.range import RangeIndex + + name = maybe_extract_name(name, data, cls) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if "tz" in kwargs: + tz = kwargs.pop("tz") + validate_tz_from_dtype(dtype, tz) + dtype = tz_to_dtype(tz) + + if type(data) is PandasArray: + # ensure users don't accidentally put a PandasArray in an index, + # but don't unpack StringArray + data = data.to_numpy() + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + + data_dtype = getattr(data, "dtype", None) + + # range + if isinstance(data, (range, RangeIndex)): + result = RangeIndex(start=data, copy=copy, name=name) + if dtype is not None: + return result.astype(dtype, copy=False) + return result + + elif is_ea_or_datetimelike_dtype(dtype): + # non-EA dtype indexes have special casting logic, so we punt here + klass = cls._dtype_to_subclass(dtype) + if klass is not Index: + return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) + + ea_cls = dtype.construct_array_type() + data = ea_cls._from_sequence(data, dtype=dtype, copy=copy) + disallow_kwargs(kwargs) + return Index._simple_new(data, name=name) + + elif is_ea_or_datetimelike_dtype(data_dtype): + data_dtype = cast(DtypeObj, data_dtype) + klass = cls._dtype_to_subclass(data_dtype) + if klass is not Index: + result = klass(data, copy=copy, name=name, **kwargs) + if dtype is not None: + return result.astype(dtype, copy=False) + return result + elif dtype is not None: + # GH#45206 + data = data.astype(dtype, copy=False) + + disallow_kwargs(kwargs) + data = extract_array(data, extract_numpy=True) + return Index._simple_new(data, name=name) + + # index-like + elif ( + isinstance(data, Index) + and data._is_backward_compat_public_numeric_index + and dtype is None + ): + return data._constructor(data, name=name, copy=copy) + elif isinstance(data, (np.ndarray, Index, ABCSeries)): + + if isinstance(data, ABCMultiIndex): + data = data._values + + if dtype is not None: + # we need to avoid having numpy coerce + # things that look like ints/floats to ints unless + # they are actually ints, e.g. '0' and 0.0 + # should not be coerced + # GH 11836 + data = sanitize_array(data, None, dtype=dtype, copy=copy) + + dtype = data.dtype + + if data.dtype.kind in ["i", "u", "f"]: + # maybe coerce to a sub-class + arr = data + elif data.dtype.kind in ["b", "c"]: + # No special subclass, and Index._ensure_array won't do this + # for us. + arr = np.asarray(data) + else: + arr = com.asarray_tuplesafe(data, dtype=_dtype_obj) + + if dtype is None: + arr = _maybe_cast_data_without_dtype( + arr, cast_numeric_deprecated=True + ) + dtype = arr.dtype + + if kwargs: + return cls(arr, dtype, copy=copy, name=name, **kwargs) + + klass = cls._dtype_to_subclass(arr.dtype) + arr = klass._ensure_array(arr, dtype, copy) + disallow_kwargs(kwargs) + return klass._simple_new(arr, name) + + elif is_scalar(data): + raise cls._scalar_data_error(data) + elif hasattr(data, "__array__"): + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) + else: + + if tupleize_cols and is_list_like(data): + # GH21470: convert iterable to list before determining if empty + if is_iterator(data): + data = list(data) + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # 10697 + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_tuples( + data, names=name or kwargs.get("names") + ) + # other iterable of some kind + + subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj) + if dtype is None: + # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated + subarr = _maybe_cast_data_without_dtype( + subarr, cast_numeric_deprecated=False + ) + dtype = subarr.dtype + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + + @classmethod + def _ensure_array(cls, data, dtype, copy: bool): + """ + Ensure we have a valid array to pass to _simple_new. + """ + if data.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + if copy: + # asarray_tuplesafe does not always copy underlying data, + # so need to make sure that this happens + data = data.copy() + return data + + @final + @classmethod + def _dtype_to_subclass(cls, dtype: DtypeObj): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + + if isinstance(dtype, ExtensionDtype): + if isinstance(dtype, DatetimeTZDtype): + from pandas import DatetimeIndex + + return DatetimeIndex + elif isinstance(dtype, CategoricalDtype): + from pandas import CategoricalIndex + + return CategoricalIndex + elif isinstance(dtype, IntervalDtype): + from pandas import IntervalIndex + + return IntervalIndex + elif isinstance(dtype, PeriodDtype): + from pandas import PeriodIndex + + return PeriodIndex + + elif isinstance(dtype, SparseDtype): + warnings.warn( + "In a future version, passing a SparseArray to pd.Index " + "will store that array directly instead of converting to a " + "dense numpy ndarray. To retain the old behavior, use " + "pd.Index(arr.to_numpy()) instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + return cls._dtype_to_subclass(dtype.subtype) + + return Index + + if dtype.kind == "M": + from pandas import DatetimeIndex + + return DatetimeIndex + + elif dtype.kind == "m": + from pandas import TimedeltaIndex + + return TimedeltaIndex + + elif is_float_dtype(dtype): + from pandas.core.api import Float64Index + + return Float64Index + elif is_unsigned_integer_dtype(dtype): + from pandas.core.api import UInt64Index + + return UInt64Index + elif is_signed_integer_dtype(dtype): + from pandas.core.api import Int64Index + + return Int64Index + + elif dtype == _dtype_obj: + # NB: assuming away MultiIndex + return Index + + elif issubclass( + dtype.type, (str, bool, np.bool_, complex, np.complex64, np.complex128) + ): + return Index + + raise NotImplementedError(dtype) + + """ + NOTE for new Index creation: + + - _simple_new: It returns new Index with the same type as the caller. + All metadata (such as name) must be provided by caller's responsibility. + Using _shallow_copy is recommended because it fills these metadata + otherwise specified. + + - _shallow_copy: It returns new Index with the same type (using + _simple_new), but fills caller's metadata otherwise specified. Passed + kwargs will overwrite corresponding metadata. + + See each method's docstring. + """ + + @property + def asi8(self): + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + warnings.warn( + "Index.asi8 is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return None + + @classmethod + def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT: + """ + We require that we have a dtype compat for the values. If we are passed + a non-dtype compat, then coerce using the constructor. + + Must be careful not to recurse. + """ + assert isinstance(values, cls._data_cls), type(values) + + result = object.__new__(cls) + result._data = values + result._name = name + result._cache = {} + result._reset_identity() + + return result + + @classmethod + def _with_infer(cls, *args, **kwargs): + """ + Constructor that uses the 1.0.x behavior inferring numeric dtypes + for ndarray[object] inputs. + """ + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning) + result = cls(*args, **kwargs) + + if result.dtype == _dtype_obj and not result._is_multi: + # error: Argument 1 to "maybe_convert_objects" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any]]"; expected + # "ndarray[Any, Any]" + values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] + if values.dtype.kind in ["i", "u", "f", "b"]: + return Index(values, name=result.name) + + return result + + @cache_readonly + def _constructor(self: _IndexT) -> type[_IndexT]: + return type(self) + + @final + def _maybe_check_unique(self) -> None: + """ + Check that an Index has no duplicates. + + This is typically only called via + `NDFrame.flags.allows_duplicate_labels.setter` when it's set to + True (duplicates aren't allowed). + + Raises + ------ + DuplicateLabelError + When the index is not unique. + """ + if not self.is_unique: + msg = """Index has duplicates.""" + duplicates = self._format_duplicate_message() + msg += f"\n{duplicates}" + + raise DuplicateLabelError(msg) + + @final + def _format_duplicate_message(self) -> DataFrame: + """ + Construct the DataFrame for a DuplicateLabelError. + + This returns a DataFrame indicating the labels and positions + of duplicates in an index. This should only be called when it's + already known that duplicates are present. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'a']) + >>> idx._format_duplicate_message() + positions + label + a [0, 2] + """ + from pandas import Series + + duplicates = self[self.duplicated(keep="first")].unique() + assert len(duplicates) + + out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] + if self._is_multi: + # test_format_duplicate_labels_message_multi + # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] + out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] + + if self.nlevels == 1: + out = out.rename_axis("label") + return out.to_frame(name="positions") + + # -------------------------------------------------------------------- + # Index Internals Methods + + @final + def _get_attributes_dict(self) -> dict[str_t, Any]: + """ + Return an attributes dict for my class. + + Temporarily added back for compatibility issue in dask, see + https://github.com/pandas-dev/pandas/pull/43895 + """ + warnings.warn( + "The Index._get_attributes_dict method is deprecated, and will be " + "removed in a future version", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return {k: getattr(self, k, None) for k in self._attributes} + + def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: + """ + Create a new Index with the same class as the caller, don't copy the + data, use the same object attributes with passed in attributes taking + precedence. + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + name : Label, defaults to self.name + """ + name = self._name if name is no_default else name + + return self._simple_new(values, name=name) + + def _view(self: _IndexT) -> _IndexT: + """ + fastpath to make a shallow copy, i.e. new object with same data. + """ + result = self._simple_new(self._values, name=self._name) + + result._cache = self._cache + return result + + @final + def _rename(self: _IndexT, name: Hashable) -> _IndexT: + """ + fastpath for rename if new name is already validated. + """ + result = self._view() + result._name = name + return result + + @final + def is_(self, other) -> bool: + """ + More flexible, faster check like ``is`` but that works through views. + + Note: this is *not* the same as ``Index.identical()``, which checks + that metadata is also the same. + + Parameters + ---------- + other : object + Other object to compare against. + + Returns + ------- + bool + True if both have same underlying data, False otherwise. + + See Also + -------- + Index.identical : Works like ``Index.is_`` but also checks metadata. + """ + if self is other: + return True + elif not hasattr(other, "_id"): + return False + elif self._id is None or other._id is None: + return False + else: + return self._id is other._id + + @final + def _reset_identity(self) -> None: + """ + Initializes or resets ``_id`` attribute with new object. + """ + self._id = object() + + @final + def _cleanup(self) -> None: + self._engine.clear_mapping() + + @cache_readonly + def _engine( + self, + ) -> libindex.IndexEngine | libindex.ExtensionEngine: + # For base class (object dtype) we get ObjectEngine + target_values = self._get_engine_target() + if ( + isinstance(target_values, ExtensionArray) + and self._engine_type is libindex.ObjectEngine + ): + return libindex.ExtensionEngine(target_values) + + target_values = cast(np.ndarray, target_values) + # to avoid a reference cycle, bind `target_values` to a local variable, so + # `self` is not passed into the lambda. + if target_values.dtype == bool: + return libindex.BoolEngine(target_values) + elif target_values.dtype == np.complex64: + return libindex.Complex64Engine(target_values) + elif target_values.dtype == np.complex128: + return libindex.Complex128Engine(target_values) + + # error: Argument 1 to "ExtensionEngine" has incompatible type + # "ndarray[Any, Any]"; expected "ExtensionArray" + return self._engine_type(target_values) # type: ignore[arg-type] + + @final + @cache_readonly + def _dir_additions_for_owner(self) -> set[str_t]: + """ + Add the string-like labels to the owner dataframe/series dir output. + + If this is a MultiIndex, it's first level values are used. + """ + return { + c + for c in self.unique(level=0)[: get_option("display.max_dir_items")] + if isinstance(c, str) and c.isidentifier() + } + + # -------------------------------------------------------------------- + # Array-Like Methods + + # ndarray compat + def __len__(self) -> int: + """ + Return the length of the Index. + """ + return len(self._data) + + def __array__(self, dtype=None) -> np.ndarray: + """ + The array interface, return my values. + """ + return np.asarray(self._data, dtype=dtype) + + def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): + if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): + return NotImplemented + + # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set + # operations and not logical operations, so don't dispatch + # This is deprecated, so this full 'if' clause can be removed once + # deprecation is enforced in 2.0 + if not ( + method == "__call__" + and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor) + ): + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if "out" in kwargs: + # e.g. test_dti_isub_tdi + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + new_inputs = [x if x is not self else x._values for x in inputs] + result = getattr(ufunc, method)(*new_inputs, **kwargs) + if ufunc.nout == 2: + # i.e. np.divmod, np.modf, np.frexp + return tuple(self.__array_wrap__(x) for x in result) + + return self.__array_wrap__(result) + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc and other functions e.g. np.split. + """ + result = lib.item_from_zerodim(result) + if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: + return result + + return Index(result, name=self.name) + + @cache_readonly + def dtype(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + return self._data.dtype + + @final + def ravel(self, order="C"): + """ + Return an ndarray of the flattened values of the underlying data. + + Returns + ------- + numpy.ndarray + Flattened array. + + See Also + -------- + numpy.ndarray.ravel : Return a flattened array. + """ + warnings.warn( + "Index.ravel returning ndarray is deprecated; in a future version " + "this will return a view on self.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if needs_i8_conversion(self.dtype): + # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]" + # has no attribute "_ndarray" + values = self._data._ndarray # type: ignore[union-attr] + elif is_interval_dtype(self.dtype): + values = np.asarray(self._data) + else: + values = self._get_engine_target() + return values.ravel(order=order) + + def view(self, cls=None): + + # we need to see if we are subclassing an + # index type here + if cls is not None and not hasattr(cls, "_typ"): + dtype = cls + if isinstance(cls, str): + dtype = pandas_dtype(cls) + + if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion( + dtype + ): + if dtype.kind == "m" and dtype != "m8[ns]": + # e.g. m8[s] + return self._data.view(cls) + + idx_cls = self._dtype_to_subclass(dtype) + # NB: we only get here for subclasses that override + # _data_cls such that it is a type and not a tuple + # of types. + arr_cls = idx_cls._data_cls + arr = arr_cls(self._data.view("i8"), dtype=dtype) + return idx_cls._simple_new(arr, name=self.name) + + result = self._data.view(cls) + else: + result = self._view() + if isinstance(result, Index): + result._id = self._id + return result + + def astype(self, dtype, copy: bool = True): + """ + Create an Index with values cast to dtypes. + + The class of a new Index is determined by dtype. When conversion is + impossible, a TypeError exception is raised. + + Parameters + ---------- + dtype : numpy dtype or pandas type + Note that any signed integer `dtype` is treated as ``'int64'``, + and any unsigned integer `dtype` is treated as ``'uint64'``, + regardless of the size. + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and internal requirements on dtype are + satisfied, the original data is used to create a new Index + or the original Index is returned. + + Returns + ------- + Index + Index with values cast to specified dtype. + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + if is_dtype_equal(self.dtype, dtype): + # Ensure that self.astype(self.dtype) is self + return self.copy() if copy else self + + values = self._data + if isinstance(values, ExtensionArray): + if isinstance(dtype, np.dtype) and dtype.kind == "M" and is_unitless(dtype): + # TODO(2.0): remove this special-casing once this is enforced + # in DTA.astype + raise TypeError(f"Cannot cast {type(self).__name__} to dtype") + + with rewrite_exception(type(values).__name__, type(self).__name__): + new_values = values.astype(dtype, copy=copy) + + elif is_float_dtype(self.dtype) and needs_i8_conversion(dtype): + # NB: this must come before the ExtensionDtype check below + # TODO: this differs from Series behavior; can/should we align them? + raise TypeError( + f"Cannot convert Float64Index to dtype {dtype}; integer " + "values are required for conversion" + ) + + elif isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() + # Note: for RangeIndex and CategoricalDtype self vs self._values + # behaves differently here. + new_values = cls._from_sequence(self, dtype=dtype, copy=copy) + + else: + try: + if dtype == str: + # GH#38607 + new_values = values.astype(dtype, copy=copy) + else: + # GH#13149 specifically use astype_nansafe instead of astype + new_values = astype_nansafe(values, dtype=dtype, copy=copy) + except IntCastingNaNError: + raise + except (TypeError, ValueError) as err: + if dtype.kind == "u" and "losslessly" in str(err): + # keep the message from _astype_float_to_int_nansafe + raise + raise TypeError( + f"Cannot cast {type(self).__name__} to dtype {dtype}" + ) from err + + # pass copy=False because any copying will be done in the astype above + if self._is_backward_compat_public_numeric_index: + # this block is needed so e.g. NumericIndex[int8].astype("int32") returns + # NumericIndex[int32] and not Int64Index with dtype int64. + # When Int64Index etc. are removed from the code base, removed this also. + if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype): + return self._constructor( + new_values, name=self.name, dtype=dtype, copy=False + ) + return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) + + _index_shared_docs[ + "take" + ] = """ + Return a new %(klass)s of the values selected by the indices. + + For internal compatibility with numpy arrays. + + Parameters + ---------- + indices : array-like + Indices to be taken. + axis : int, optional + The axis over which to select values, always 0. + allow_fill : bool, default True + fill_value : scalar, default None + If allow_fill=True and fill_value is not None, indices specified by + -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. + + Returns + ------- + Index + An index formed of elements at the given indices. Will be the same + type as self, except for RangeIndex. + + See Also + -------- + numpy.ndarray.take: Return an array formed from the + elements of a at the given indices. + """ + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take( + self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs + ): + if kwargs: + nv.validate_take((), kwargs) + if is_scalar(indices): + raise TypeError("Expected indices to be array-like") + indices = ensure_platform_int(indices) + allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) + + # Note: we discard fill_value and use self._na_value, only relevant + # in the case where allow_fill is True and fill_value is not None + values = self._values + if isinstance(values, np.ndarray): + taken = algos.take( + values, indices, allow_fill=allow_fill, fill_value=self._na_value + ) + else: + # algos.take passes 'axis' keyword which not all EAs accept + taken = values.take( + indices, allow_fill=allow_fill, fill_value=self._na_value + ) + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(taken, name=self.name) + + @final + def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool: + """ + We only use pandas-style take when allow_fill is True _and_ + fill_value is not None. + """ + if allow_fill and fill_value is not None: + # only fill if we are passing a non-None fill_value + if self._can_hold_na: + if (indices < -1).any(): + raise ValueError( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + else: + cls_name = type(self).__name__ + raise ValueError( + f"Unable to fill values because {cls_name} cannot contain NA" + ) + else: + allow_fill = False + return allow_fill + + _index_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_index : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + >>> idx.repeat(2) + Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') + >>> idx.repeat([1, 2, 3]) + Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') + """ + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats, axis=None): + repeats = ensure_platform_int(repeats) + nv.validate_repeat((), {"axis": axis}) + res_values = self._values.repeat(repeats) + + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(res_values, name=self.name) + + # -------------------------------------------------------------------- + # Copying Methods + + def copy( + self: _IndexT, + name: Hashable | None = None, + deep: bool = False, + dtype: Dtype | None = None, + names: Sequence[Hashable] | None = None, + ) -> _IndexT: + """ + Make a copy of this object. + + Name and dtype sets those attributes on the new object. + + Parameters + ---------- + name : Label, optional + Set name for new object. + deep : bool, default False + dtype : numpy dtype or pandas type, optional + Set dtype for new object. + + .. deprecated:: 1.2.0 + use ``astype`` method instead. + names : list-like, optional + Kept for compatibility with MultiIndex. Should not be used. + + .. deprecated:: 1.4.0 + use ``name`` instead. + + Returns + ------- + Index + Index refer to new object which is a copy of this object. + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if names is not None: + warnings.warn( + "parameter names is deprecated and will be removed in a future " + "version. Use the name parameter instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + name = self._validate_names(name=name, names=names, deep=deep)[0] + if deep: + new_data = self._data.copy() + new_index = type(self)._simple_new(new_data, name=name) + else: + new_index = self._rename(name=name) + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + @final + def __copy__(self: _IndexT, **kwargs) -> _IndexT: + return self.copy(**kwargs) + + @final + def __deepcopy__(self: _IndexT, memo=None) -> _IndexT: + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + # -------------------------------------------------------------------- + # Rendering Methods + + @final + def __repr__(self) -> str_t: + """ + Return a string representation for this object. + """ + klass_name = type(self).__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + attrs_str = [f"{k}={v}" for k, v in attrs] + prepr = f",{space}".join(attrs_str) + + # no data provided, just attributes + if data is None: + data = "" + + return f"{klass_name}({data}{prepr})" + + def _format_space(self) -> str_t: + + # using space here controls if the attributes + # are line separated or not (the default) + + # max_seq_items = get_option('display.max_seq_items') + # if len(self) > max_seq_items: + # space = "\n%s" % (' ' * (len(klass) + 1)) + return " " + + @property + def _formatter_func(self): + """ + Return the formatter function. + """ + return default_pprint + + def _format_data(self, name=None) -> str_t: + """ + Return the formatted data as a unicode string. + """ + # do we want to justify (only do so for non-objects) + is_justify = True + + if self.inferred_type == "string": + is_justify = False + elif self.inferred_type == "categorical": + self = cast("CategoricalIndex", self) + if is_object_dtype(self.categories): + is_justify = False + + return format_object_summary( + self, + self._formatter_func, + is_justify=is_justify, + name=name, + line_break_each_value=self._is_multi, + ) + + def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: + """ + Return a list of tuples of the (attr,formatted_value). + """ + attrs: list[tuple[str_t, str_t | int | bool | None]] = [] + + if not self._is_multi: + attrs.append(("dtype", f"'{self.dtype}'")) + + if self.name is not None: + attrs.append(("name", default_pprint(self.name))) + elif self._is_multi and any(x is not None for x in self.names): + attrs.append(("names", default_pprint(self.names))) + + max_seq_items = get_option("display.max_seq_items") or len(self) + if len(self) > max_seq_items: + attrs.append(("length", len(self))) + return attrs + + @final + def _get_level_names(self) -> Hashable | Sequence[Hashable]: + """ + Return a name or list of names with None replaced by the level number. + """ + if self._is_multi: + return [ + level if name is None else name for level, name in enumerate(self.names) + ] + else: + return 0 if self.name is None else self.name + + @final + def _mpl_repr(self) -> np.ndarray: + # how to represent ourselves to matplotlib + if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": + return cast(np.ndarray, self.values) + return self.astype(object, copy=False)._values + + def format( + self, + name: bool = False, + formatter: Callable | None = None, + na_rep: str_t = "NaN", + ) -> list[str_t]: + """ + Render a string representation of the Index. + """ + header = [] + if name: + header.append( + pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, na_rep=na_rep) + + def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: + from pandas.io.formats.format import format_array + + values = self._values + + if is_object_dtype(values.dtype): + values = cast(np.ndarray, values) + values = lib.maybe_convert_objects(values, safe=True) + + result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] + + # could have nans + mask = is_float_nan(values) + if mask.any(): + result_arr = np.array(result) + result_arr[mask] = na_rep + result = result_arr.tolist() + else: + result = trim_front(format_array(values, None, justify="left")) + return header + result + + @final + def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: + """ + Format specified values of `self` and return them. + + .. deprecated:: 1.2.0 + + Parameters + ---------- + slicer : int, array-like + An indexer into `self` that specifies which values + are used in the formatting process. + kwargs : dict + Options for specifying how the values should be formatted. + These options include the following: + + 1) na_rep : str + The value that serves as a placeholder for NULL values + 2) quoting : bool or None + Whether or not there are quoted values in `self` + 3) date_format : str + The format used to represent date-like values. + + Returns + ------- + numpy.ndarray + Formatted values. + """ + warnings.warn( + "The 'to_native_types' method is deprecated and will be removed in " + "a future version. Use 'astype(str)' instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + values = self + if slicer is not None: + values = values[slicer] + return values._format_native_types(**kwargs) + + def _format_native_types( + self, *, na_rep="", quoting=None, **kwargs + ) -> npt.NDArray[np.object_]: + """ + Actually format specific types of the index. + """ + mask = isna(self) + if not self.is_object() and not quoting: + values = np.asarray(self).astype(str) + else: + values = np.array(self, dtype=object, copy=True) + + values[mask] = na_rep + return values + + def _summary(self, name=None) -> str_t: + """ + Return a summarized representation. + + Parameters + ---------- + name : str + name to use in the summary representation + + Returns + ------- + String with a summarized representation of the index + """ + if len(self) > 0: + head = self[0] + if hasattr(head, "format") and not isinstance(head, str): + head = head.format() + elif needs_i8_conversion(self.dtype): + # e.g. Timedelta, display as values, not quoted + head = self._formatter_func(head).replace("'", "") + tail = self[-1] + if hasattr(tail, "format") and not isinstance(tail, str): + tail = tail.format() + elif needs_i8_conversion(self.dtype): + # e.g. Timedelta, display as values, not quoted + tail = self._formatter_func(tail).replace("'", "") + + index_summary = f", {head} to {tail}" + else: + index_summary = "" + + if name is None: + name = type(self).__name__ + return f"{name}: {len(self)} entries{index_summary}" + + # -------------------------------------------------------------------- + # Conversion Methods + + def to_flat_index(self: _IndexT) -> _IndexT: + """ + Identity method. + + This is implemented for compatibility with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + + def to_series(self, index=None, name: Hashable = None) -> Series: + """ + Create a Series with both index and values equal to the index keys. + + Useful with map for returning an indexer based on an index. + + Parameters + ---------- + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + The dtype will be based on the type of the Index values. + + See Also + -------- + Index.to_frame : Convert an Index to a DataFrame. + Series.to_frame : Convert Series to DataFrame. + + Examples + -------- + >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + + By default, the original Index and original name is reused. + + >>> idx.to_series() + animal + Ant Ant + Bear Bear + Cow Cow + Name: animal, dtype: object + + To enforce a new Index, specify new labels to ``index``: + + >>> idx.to_series(index=[0, 1, 2]) + 0 Ant + 1 Bear + 2 Cow + Name: animal, dtype: object + + To override the name of the resulting column, specify `name`: + + >>> idx.to_series(name='zoo') + animal + Ant Ant + Bear Bear + Cow Cow + Name: zoo, dtype: object + """ + from pandas import Series + + if index is None: + index = self._view() + if name is None: + name = self.name + + return Series(self._values.copy(), index=index, name=name) + + def to_frame( + self, index: bool = True, name: Hashable = lib.no_default + ) -> DataFrame: + """ + Create a DataFrame with a column containing the Index. + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original Index. + + name : object, default None + The passed name should substitute for the index name (if it has + one). + + Returns + ------- + DataFrame + DataFrame containing the original Index data. + + See Also + -------- + Index.to_series : Convert an Index to a Series. + Series.to_frame : Convert Series to DataFrame. + + Examples + -------- + >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + >>> idx.to_frame() + animal + animal + Ant Ant + Bear Bear + Cow Cow + + By default, the original Index is reused. To enforce a new Index: + + >>> idx.to_frame(index=False) + animal + 0 Ant + 1 Bear + 2 Cow + + To override the name of the resulting column, specify `name`: + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow + """ + from pandas import DataFrame + + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Index's name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + if name is lib.no_default: + name = self._get_level_names() + result = DataFrame({name: self._values.copy()}) + + if index: + result.index = self + return result + + # -------------------------------------------------------------------- + # Name-Centric Methods + + @property + def name(self) -> Hashable: + """ + Return Index or MultiIndex name. + """ + return self._name + + @name.setter + def name(self, value: Hashable) -> None: + if self._no_setting_name: + # Used in MultiIndex.levels to avoid silently ignoring name updates. + raise RuntimeError( + "Cannot set name on a level of a MultiIndex. Use " + "'MultiIndex.set_names' instead." + ) + maybe_extract_name(value, None, type(self)) + self._name = value + + @final + def _validate_names( + self, name=None, names=None, deep: bool = False + ) -> list[Hashable]: + """ + Handles the quirks of having a singular 'name' parameter for general + Index and plural 'names' parameter for MultiIndex. + """ + from copy import deepcopy + + if names is not None and name is not None: + raise TypeError("Can only provide one of `names` and `name`") + elif names is None and name is None: + new_names = deepcopy(self.names) if deep else self.names + elif names is not None: + if not is_list_like(names): + raise TypeError("Must pass list-like as `names`.") + new_names = names + elif not is_list_like(name): + new_names = [name] + else: + new_names = name + + if len(new_names) != len(self.names): + raise ValueError( + f"Length of new names must be {len(self.names)}, got {len(new_names)}" + ) + + # All items in 'new_names' need to be hashable + validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name") + + return new_names + + def _get_default_index_names( + self, names: Hashable | Sequence[Hashable] | None = None, default=None + ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + Index names to set. + default : str + Default name of index. + + Raises + ------ + TypeError + if names not str or list-like + """ + from pandas.core.indexes.multi import MultiIndex + + if names is not None: + if isinstance(names, str) or isinstance(names, int): + names = [names] + + if not isinstance(names, list) and names is not None: + raise ValueError("Index names must be str or 1-dimensional list") + + if not names: + if isinstance(self, MultiIndex): + names = com.fill_missing_names(self.names) + else: + names = [default] if self.name is None else [self.name] + + return names + + def _get_names(self) -> FrozenList: + return FrozenList((self.name,)) + + def _set_names(self, values, *, level=None) -> None: + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + + Raises + ------ + TypeError if each name is not hashable. + """ + if not is_list_like(values): + raise ValueError("Names must be a list-like") + if len(values) != 1: + raise ValueError(f"Length of new names must be 1, got {len(values)}") + + # GH 20527 + # All items in 'name' need to be hashable: + validate_all_hashable(*values, error_name=f"{type(self).__name__}.name") + + self._name = values[0] + + names = property(fset=_set_names, fget=_get_names) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) + def set_names(self, names, level=None, inplace: bool = False): + """ + Set Index or MultiIndex name. + + Able to set new names partially and by level. + + Parameters + ---------- + + names : label or list of label or dict-like for MultiIndex + Name(s) to set. + + .. versionchanged:: 1.3.0 + + level : int, label or list of int or label, optional + If the index is a MultiIndex and names is not dict-like, level(s) to set + (None for all levels). Otherwise level must be None. + + .. versionchanged:: 1.3.0 + + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Index.rename : Able to set new names without level. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + >>> idx.set_names('quarter') + Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]]) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + ) + >>> idx.set_names(['kind', 'year'], inplace=True) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.set_names('species', level=0) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + + When renaming levels with a dict, levels can not be passed. + + >>> idx.set_names({'kind': 'snake'}) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['snake', 'year']) + """ + if level is not None and not isinstance(self, ABCMultiIndex): + raise ValueError("Level must be None for non-MultiIndex") + + elif level is not None and not is_list_like(level) and is_list_like(names): + raise TypeError("Names must be a string when a single level is provided.") + + elif not is_list_like(names) and level is None and self.nlevels > 1: + raise TypeError("Must pass list-like as `names`.") + + elif is_dict_like(names) and not isinstance(self, ABCMultiIndex): + raise TypeError("Can only pass dict-like as `names` for MultiIndex.") + + elif is_dict_like(names) and level is not None: + raise TypeError("Can not pass level for dictlike `names`.") + + if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: + # Transform dict to list of new names and corresponding levels + level, names_adjusted = [], [] + for i, name in enumerate(self.names): + if name in names.keys(): + level.append(i) + names_adjusted.append(names[name]) + names = names_adjusted + + if not is_list_like(names): + names = [names] + if level is not None and not is_list_like(level): + level = [level] + + if inplace: + idx = self + else: + idx = self._view() + + idx._set_names(names, level=level) + if not inplace: + return idx + + def rename(self, name, inplace=False): + """ + Alter Index or MultiIndex name. + + Able to set new names without level. Defaults to returning new index. + Length of names must match number of levels in MultiIndex. + + Parameters + ---------- + name : label or list of labels + Name(s) to set. + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Index.set_names : Able to set new names partially and by level. + + Examples + -------- + >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') + >>> idx.rename('grade') + Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]], + ... names=['kind', 'year']) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.rename(['species', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + >>> idx.rename('species') + Traceback (most recent call last): + TypeError: Must pass list-like as `names`. + """ + return self.set_names([name], inplace=inplace) + + # -------------------------------------------------------------------- + # Level-Centric Methods + + @property + def nlevels(self) -> int: + """ + Number of levels. + """ + return 1 + + def _sort_levels_monotonic(self: _IndexT) -> _IndexT: + """ + Compat with MultiIndex. + """ + return self + + @final + def _validate_index_level(self, level) -> None: + """ + Validate index level. + + For single-level Index getting level number is a no-op, but some + verification must be done like in MultiIndex. + + """ + if isinstance(level, int): + if level < 0 and level != -1: + raise IndexError( + "Too many levels: Index has only 1 level, " + f"{level} is not a valid level number" + ) + elif level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) + elif level != self.name: + raise KeyError( + f"Requested level ({level}) does not match index name ({self.name})" + ) + + def _get_level_number(self, level) -> int: + self._validate_index_level(level) + return 0 + + def sortlevel(self, level=None, ascending=True, sort_remaining=None): + """ + For internal compatibility with the Index API. + + Sort the Index. This is for compat with MultiIndex + + Parameters + ---------- + ascending : bool, default True + False to sort in descending order + + level, sort_remaining are compat parameters + + Returns + ------- + Index + """ + if not isinstance(ascending, (list, bool)): + raise TypeError( + "ascending must be a single bool value or" + "a list of bool values of length 1" + ) + + if isinstance(ascending, list): + if len(ascending) != 1: + raise TypeError("ascending must be a list of bool values of length 1") + ascending = ascending[0] + + if not isinstance(ascending, bool): + raise TypeError("ascending must be a bool value") + + return self.sort_values(return_indexer=True, ascending=ascending) + + def _get_level_values(self, level) -> Index: + """ + Return an Index of values for requested level. + + This is primarily useful to get an individual level of values from a + MultiIndex, but is provided on Index as well for compatibility. + + Parameters + ---------- + level : int or str + It is either the integer position or the name of the level. + + Returns + ------- + Index + Calling object, as there is only one level in the Index. + + See Also + -------- + MultiIndex.get_level_values : Get values for a level of a MultiIndex. + + Notes + ----- + For Index, level should be 0, since there are no multiple levels. + + Examples + -------- + >>> idx = pd.Index(list('abc')) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + + Get level values by supplying `level` as integer: + + >>> idx.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object') + """ + self._validate_index_level(level) + return self + + get_level_values = _get_level_values + + @final + def droplevel(self, level=0): + """ + Return index with requested level(s) removed. + + If resulting index has only 1 level left, the result will be + of Index type, not MultiIndex. + + Parameters + ---------- + level : int, str, or list-like, default 0 + If a string is given, must be the name of a level + If list-like, elements must be names or indexes of levels. + + Returns + ------- + Index or MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + + >>> mi.droplevel() + MultiIndex([(3, 5), + (4, 6)], + names=['y', 'z']) + + >>> mi.droplevel(2) + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel('z') + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel(['x', 'y']) + Int64Index([5, 6], dtype='int64', name='z') + """ + if not isinstance(level, (tuple, list)): + level = [level] + + levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] + + return self._drop_level_numbers(levnums) + + @final + def _drop_level_numbers(self, levnums: list[int]): + """ + Drop MultiIndex levels by level _number_, not name. + """ + + if not levnums and not isinstance(self, ABCMultiIndex): + return self + if len(levnums) >= self.nlevels: + raise ValueError( + f"Cannot remove {len(levnums)} levels from an index with " + f"{self.nlevels} levels: at least one level must be left." + ) + # The two checks above guarantee that here self is a MultiIndex + self = cast("MultiIndex", self) + + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + for i in levnums: + new_levels.pop(i) + new_codes.pop(i) + new_names.pop(i) + + if len(new_levels) == 1: + lev = new_levels[0] + + if len(lev) == 0: + # If lev is empty, lev.take will fail GH#42055 + if len(new_codes[0]) == 0: + # GH#45230 preserve RangeIndex here + # see test_reset_index_empty_rangeindex + result = lev[:0] + else: + res_values = algos.take(lev._values, new_codes[0], allow_fill=True) + # _constructor instead of type(lev) for RangeIndex compat GH#35230 + result = lev._constructor._simple_new(res_values, name=new_names[0]) + else: + # set nan if needed + mask = new_codes[0] == -1 + result = new_levels[0].take(new_codes[0]) + if mask.any(): + result = result.putmask(mask, np.nan) + + result._name = new_names[0] + + return result + else: + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex( + levels=new_levels, + codes=new_codes, + names=new_names, + verify_integrity=False, + ) + + def _get_grouper_for_level( + self, + mapper, + *, + level=None, + dropna: bool = True, + ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: + """ + Get index grouper corresponding to an index level + + Parameters + ---------- + mapper: Group mapping function or None + Function mapping index values to groups + level : int or None + Index level, positional + dropna : bool + dropna from groupby + + Returns + ------- + grouper : Index + Index of values to group on. + labels : ndarray of int or None + Array of locations in level_index. + uniques : Index or None + Index of unique values for level. + """ + assert level is None or level == 0 + if mapper is None: + grouper = self + else: + grouper = self.map(mapper) + + return grouper, None, None + + # -------------------------------------------------------------------- + # Introspection Methods + + @cache_readonly + @final + def _can_hold_na(self) -> bool: + if isinstance(self.dtype, ExtensionDtype): + if isinstance(self.dtype, IntervalDtype): + # FIXME(GH#45720): this is inaccurate for integer-backed + # IntervalArray, but without it other.categories.take raises + # in IntervalArray._cmp_method + return True + return self.dtype._can_hold_na + if self.dtype.kind in ["i", "u", "b"]: + return False + return True + + @final + @property + def is_monotonic(self) -> bool: + """ + Alias for is_monotonic_increasing. + + .. deprecated:: 1.5.0 + is_monotonic is deprecated and will be removed in a future version. + Use is_monotonic_increasing instead. + """ + warnings.warn( + "is_monotonic is deprecated and will be removed in a future version. " + "Use is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.is_monotonic_increasing + + @property + def is_monotonic_increasing(self) -> bool: + """ + Return a boolean if the values are equal or increasing. + + Examples + -------- + >>> Index([1, 2, 3]).is_monotonic_increasing + True + >>> Index([1, 2, 2]).is_monotonic_increasing + True + >>> Index([1, 3, 2]).is_monotonic_increasing + False + """ + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return a boolean if the values are equal or decreasing. + + Examples + -------- + >>> Index([3, 2, 1]).is_monotonic_decreasing + True + >>> Index([3, 2, 2]).is_monotonic_decreasing + True + >>> Index([3, 1, 2]).is_monotonic_decreasing + False + """ + return self._engine.is_monotonic_decreasing + + @final + @property + def _is_strictly_monotonic_increasing(self) -> bool: + """ + Return if the index is strictly monotonic increasing + (only increasing) values. + + Examples + -------- + >>> Index([1, 2, 3])._is_strictly_monotonic_increasing + True + >>> Index([1, 2, 2])._is_strictly_monotonic_increasing + False + >>> Index([1, 3, 2])._is_strictly_monotonic_increasing + False + """ + return self.is_unique and self.is_monotonic_increasing + + @final + @property + def _is_strictly_monotonic_decreasing(self) -> bool: + """ + Return if the index is strictly monotonic decreasing + (only decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing + True + >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing + False + >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing + False + """ + return self.is_unique and self.is_monotonic_decreasing + + @cache_readonly + def is_unique(self) -> bool: + """ + Return if the index has unique values. + """ + return self._engine.is_unique + + @final + @property + def has_duplicates(self) -> bool: + """ + Check if the Index has duplicate values. + + Returns + ------- + bool + Whether or not the Index has duplicate values. + + Examples + -------- + >>> idx = pd.Index([1, 5, 7, 7]) + >>> idx.has_duplicates + True + + >>> idx = pd.Index([1, 5, 7]) + >>> idx.has_duplicates + False + + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.has_duplicates + True + + >>> idx = pd.Index(["Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.has_duplicates + False + """ + return not self.is_unique + + @final + def is_boolean(self) -> bool: + """ + Check if the Index only consists of booleans. + + Returns + ------- + bool + Whether or not the Index only consists of booleans. + + See Also + -------- + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([True, False, True]) + >>> idx.is_boolean() + True + + >>> idx = pd.Index(["True", "False", "True"]) + >>> idx.is_boolean() + False + + >>> idx = pd.Index([True, False, "True"]) + >>> idx.is_boolean() + False + """ + return self.inferred_type in ["boolean"] + + @final + def is_integer(self) -> bool: + """ + Check if the Index only consists of integers. + + Returns + ------- + bool + Whether or not the Index only consists of integers. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_integer() + True + + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_integer() + False + + >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) + >>> idx.is_integer() + False + """ + return self.inferred_type in ["integer"] + + @final + def is_floating(self) -> bool: + """ + Check if the Index is a floating type. + + The Index may consist of only floats, NaNs, or a mix of floats, + integers, or NaNs. + + Returns + ------- + bool + Whether or not the Index only consists of only consists of floats, NaNs, or + a mix of floats, integers, or NaNs. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1, 2, 3, 4, np.nan]) + >>> idx.is_floating() + True + + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_floating() + False + """ + return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] + + @final + def is_numeric(self) -> bool: + """ + Check if the Index only consists of numeric data. + + Returns + ------- + bool + Whether or not the Index only consists of numeric data. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0, np.nan]) + >>> idx.is_numeric() + True + + >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"]) + >>> idx.is_numeric() + False + """ + return self.inferred_type in ["integer", "floating"] + + @final + def is_object(self) -> bool: + """ + Check if the Index is of the object dtype. + + Returns + ------- + bool + Whether or not the Index is of the object dtype. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) + >>> idx.is_object() + True + + >>> idx = pd.Index(["Apple", "Mango", 2.0]) + >>> idx.is_object() + True + + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.is_object() + False + + >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) + >>> idx.is_object() + False + """ + return is_object_dtype(self.dtype) + + @final + def is_categorical(self) -> bool: + """ + Check if the Index holds categorical data. + + Returns + ------- + bool + True if the Index is categorical. + + See Also + -------- + CategoricalIndex : Index for categorical data. + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_interval : Check if the Index holds Interval objects. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.is_categorical() + True + + >>> idx = pd.Index([1, 3, 5, 7]) + >>> idx.is_categorical() + False + + >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) + >>> s + 0 Peter + 1 Victor + 2 Elisabeth + 3 Mar + dtype: object + >>> s.index.is_categorical() + False + """ + return self.inferred_type in ["categorical"] + + @final + def is_interval(self) -> bool: + """ + Check if the Index holds Interval objects. + + Returns + ------- + bool + Whether or not the Index holds Interval objects. + + See Also + -------- + IntervalIndex : Index for Interval objects. + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_mixed : Check if the Index holds data with mixed data types. + + Examples + -------- + >>> idx = pd.Index([pd.Interval(left=0, right=5), + ... pd.Interval(left=5, right=10)]) + >>> idx.is_interval() + True + + >>> idx = pd.Index([1, 3, 5, 7]) + >>> idx.is_interval() + False + """ + return self.inferred_type in ["interval"] + + @final + def is_mixed(self) -> bool: + """ + Check if the Index holds data with mixed data types. + + Returns + ------- + bool + Whether or not the Index holds data with mixed data types. + + See Also + -------- + is_boolean : Check if the Index only consists of booleans. + is_integer : Check if the Index only consists of integers. + is_floating : Check if the Index is a floating type. + is_numeric : Check if the Index only consists of numeric data. + is_object : Check if the Index is of the object dtype. + is_categorical : Check if the Index holds categorical data. + is_interval : Check if the Index holds Interval objects. + + Examples + -------- + >>> idx = pd.Index(['a', np.nan, 'b']) + >>> idx.is_mixed() + True + + >>> idx = pd.Index([1.0, 2.0, 3.0, 5.0]) + >>> idx.is_mixed() + False + """ + warnings.warn( + "Index.is_mixed is deprecated and will be removed in a future version. " + "Check index.inferred_type directly instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.inferred_type in ["mixed"] + + @final + def holds_integer(self) -> bool: + """ + Whether the type is an integer type. + """ + return self.inferred_type in ["integer", "mixed-integer"] + + @cache_readonly + def inferred_type(self) -> str_t: + """ + Return a string of the type inferred from the values. + """ + return lib.infer_dtype(self._values, skipna=False) + + @cache_readonly + @final + def _is_all_dates(self) -> bool: + """ + Whether or not the index values only consist of dates. + """ + if needs_i8_conversion(self.dtype): + return True + elif self.dtype != _dtype_obj: + # TODO(ExtensionIndex): 3rd party EA might override? + # Note: this includes IntervalIndex, even when the left/right + # contain datetime-like objects. + return False + elif self._is_multi: + return False + return is_datetime_array(ensure_object(self._values)) + + @cache_readonly + @final + def is_all_dates(self) -> bool: + """ + Whether or not the index values only consist of dates. + """ + warnings.warn( + "Index.is_all_dates is deprecated, will be removed in a future version. " + "check index.inferred_type instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._is_all_dates + + @final + @cache_readonly + def _is_multi(self) -> bool: + """ + Cached check equivalent to isinstance(self, MultiIndex) + """ + return isinstance(self, ABCMultiIndex) + + # -------------------------------------------------------------------- + # Pickle Methods + + def __reduce__(self): + d = {"data": self._data, "name": self.name} + return _new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Null Handling Methods + + @cache_readonly + def _na_value(self): + """The expected NA value to use with this index.""" + dtype = self.dtype + if isinstance(dtype, np.dtype): + if dtype.kind in ["m", "M"]: + return NaT + return np.nan + return dtype.na_value + + @cache_readonly + def _isnan(self) -> npt.NDArray[np.bool_]: + """ + Return if each value is NaN. + """ + if self._can_hold_na: + return isna(self) + else: + # shouldn't reach to this condition by checking hasnans beforehand + values = np.empty(len(self), dtype=np.bool_) + values.fill(False) + return values + + @cache_readonly + def hasnans(self) -> bool: + """ + Return True if there are any NaNs. + + Enables various performance speedups. + """ + if self._can_hold_na: + return bool(self._isnan.any()) + else: + return False + + @final + def isna(self) -> npt.NDArray[np.bool_]: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get + mapped to ``True`` values. + Everything else get mapped to ``False`` values. Characters such as + empty strings `''` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + numpy.ndarray[bool] + A boolean array of whether my values are NA. + + See Also + -------- + Index.notna : Boolean inverse of isna. + Index.dropna : Omit entries with missing values. + isna : Top-level isna. + Series.isna : Detect missing values in Series object. + + Examples + -------- + Show which entries in a pandas.Index are NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.isna() + array([False, False, True]) + + Empty strings are not considered NA values. None is considered an NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.isna() + array([False, False, False, True]) + + For datetimes, `NaT` (Not a Time) is considered as an NA value. + + >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), + ... pd.Timestamp(''), None, pd.NaT]) + >>> idx + DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], + dtype='datetime64[ns]', freq=None) + >>> idx.isna() + array([False, True, True, True]) + """ + return self._isnan + + isnull = isna + + @final + def notna(self) -> npt.NDArray[np.bool_]: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to ``True``. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` + values. + + Returns + ------- + numpy.ndarray[bool] + Boolean array to indicate which entries are not NA. + + See Also + -------- + Index.notnull : Alias of notna. + Index.isna: Inverse of notna. + notna : Top-level notna. + + Examples + -------- + Show which entries in an Index are not NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.notna() + array([ True, True, False]) + + Empty strings are not considered NA values. None is considered a NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.notna() + array([ True, True, True, False]) + """ + return ~self.isna() + + notnull = notna + + def fillna(self, value=None, downcast=None): + """ + Fill NA/NaN values with the specified value. + + Parameters + ---------- + value : scalar + Scalar value to use to fill holes (e.g. 0). + This value cannot be a list-likes. + downcast : dict, default is None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + Index + + See Also + -------- + DataFrame.fillna : Fill NaN values of a DataFrame. + Series.fillna : Fill NaN Values of a Series. + """ + + value = self._require_scalar(value) + if self.hasnans: + result = self.putmask(self._isnan, value) + if downcast is None: + # no need to care metadata other than name + # because it can't have freq if it has NaTs + return Index._with_infer(result, name=self.name) + raise NotImplementedError( + f"{type(self).__name__}.fillna does not support 'downcast' " + "argument values other than 'None'." + ) + return self._view() + + def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: + """ + Return Index without NA/NaN values. + + Parameters + ---------- + how : {'any', 'all'}, default 'any' + If the Index is a MultiIndex, drop the value when any or all levels + are NaN. + + Returns + ------- + Index + """ + if how not in ("any", "all"): + raise ValueError(f"invalid how option: {how}") + + if self.hasnans: + res_values = self._values[~self._isnan] + return type(self)._simple_new(res_values, name=self.name) + return self._view() + + # -------------------------------------------------------------------- + # Uniqueness Methods + + def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: + """ + Return unique values in the index. + + Unique values are returned in order of appearance, this does NOT sort. + + Parameters + ---------- + level : int or hashable, optional + Only return values from specified level (for MultiIndex). + If int, gets the level by integer position, else by level name. + + Returns + ------- + Index + + See Also + -------- + unique : Numpy array of unique values in that column. + Series.unique : Return unique values of Series object. + """ + if level is not None: + self._validate_index_level(level) + + if self.is_unique: + return self._view() + + result = super().unique() + return self._shallow_copy(result) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: + """ + Return Index with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + Returns + ------- + deduplicated : Index + + See Also + -------- + Series.drop_duplicates : Equivalent method on Series. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Index.duplicated : Related method on Index, indicating duplicate + Index values. + + Examples + -------- + Generate an pandas.Index with duplicate values. + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + + The `keep` parameter controls which duplicate values are removed. + The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> idx.drop_duplicates(keep='first') + Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + + The value 'last' keeps the last occurrence for each set of duplicated + entries. + + >>> idx.drop_duplicates(keep='last') + Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + + The value ``False`` discards all sets of duplicated entries. + + >>> idx.drop_duplicates(keep=False) + Index(['cow', 'beetle', 'hippo'], dtype='object') + """ + if self.is_unique: + return self._view() + + return super().drop_duplicates(keep=keep) + + def duplicated( + self, keep: Literal["first", "last", False] = "first" + ) -> npt.NDArray[np.bool_]: + """ + Indicate duplicate index values. + + Duplicated values are indicated as ``True`` values in the resulting + array. Either all duplicates, all except the first, or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + The value or values in a set of duplicates to mark as missing. + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + np.ndarray[bool] + + See Also + -------- + Series.duplicated : Equivalent method on pandas.Series. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Index.drop_duplicates : Remove duplicate values from Index. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set to False and all others to True: + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> idx.duplicated() + array([False, False, True, False, True]) + + which is equivalent to + + >>> idx.duplicated(keep='first') + array([False, False, True, False, True]) + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> idx.duplicated(keep='last') + array([ True, False, True, False, False]) + + By setting keep on ``False``, all duplicates are True: + + >>> idx.duplicated(keep=False) + array([ True, False, True, False, True]) + """ + if self.is_unique: + # fastpath available bc we are immutable + return np.zeros(len(self), dtype=bool) + return self._duplicated(keep=keep) + + # -------------------------------------------------------------------- + # Arithmetic & Logical Methods + + def __iadd__(self, other): + # alias for __add__ + return self + other + + @final + def __and__(self, other): + warnings.warn( + "Index.__and__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__and__. Use index.intersection(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.intersection(other) + + @final + def __or__(self, other): + warnings.warn( + "Index.__or__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__or__. Use index.union(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.union(other) + + @final + def __xor__(self, other): + warnings.warn( + "Index.__xor__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__xor__. Use index.symmetric_difference(other) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.symmetric_difference(other) + + @final + def __nonzero__(self) -> NoReturn: + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + # -------------------------------------------------------------------- + # Set Operation Methods + + def _get_reconciled_name_object(self, other): + """ + If the result of a set operation will be self, + return self, unless the name changes, in which + case make a shallow copy of self. + """ + name = get_op_result_name(self, other) + if self.name is not name: + return self.rename(name) + return self + + @final + def _validate_sort_keyword(self, sort): + if sort not in [None, False]: + raise ValueError( + "The 'sort' keyword only takes the values of " + f"None or False; {sort} was passed." + ) + + @final + def _deprecate_dti_setop(self, other: Index, setop: str_t): + """ + Deprecate setop behavior between timezone-aware DatetimeIndexes with + mismatched timezones. + """ + # Caller is responsibelf or checking + # `not is_dtype_equal(self.dtype, other.dtype)` + if ( + isinstance(self, ABCDatetimeIndex) + and isinstance(other, ABCDatetimeIndex) + and self.tz is not None + and other.tz is not None + ): + # GH#39328, GH#45357 + warnings.warn( + f"In a future version, the {setop} of DatetimeIndex objects " + "with mismatched timezones will cast both to UTC instead of " + "object dtype. To retain the old behavior, " + f"use `index.astype(object).{setop}(other)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + @final + def union(self, other, sort=None): + """ + Form the union of two Index objects. + + If the Index objects are incompatible, both Index objects will be + cast to dtype('object') first. + + .. versionchanged:: 0.25.0 + + Parameters + ---------- + other : Index or array-like + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. + + Returns + ------- + union : Index + + Examples + -------- + Union matching dtypes + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.union(idx2) + Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + + Union mismatched dtypes + + >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) + >>> idx2 = pd.Index([1, 2, 3, 4]) + >>> idx1.union(idx2) + Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') + + MultiIndex case + + >>> idx1 = pd.MultiIndex.from_arrays( + ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] + ... ) + >>> idx1 + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue')], + ) + >>> idx2 = pd.MultiIndex.from_arrays( + ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] + ... ) + >>> idx2 + MultiIndex([(3, 'Red'), + (3, 'Green'), + (2, 'Red'), + (2, 'Green')], + ) + >>> idx1.union(idx2) + MultiIndex([(1, 'Blue'), + (1, 'Red'), + (2, 'Blue'), + (2, 'Green'), + (2, 'Red'), + (3, 'Green'), + (3, 'Red')], + ) + >>> idx1.union(idx2, sort=False) + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue'), + (3, 'Red'), + (3, 'Green'), + (2, 'Green')], + ) + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if not is_dtype_equal(self.dtype, other.dtype): + if ( + isinstance(self, ABCMultiIndex) + and not is_object_dtype(unpack_nested_dtype(other)) + and len(other) > 0 + ): + raise NotImplementedError( + "Can only union MultiIndex with MultiIndex or Index of tuples, " + "try mi.to_flat_index().union(other) instead." + ) + self._deprecate_dti_setop(other, "union") + + dtype = self._find_common_type_compat(other) + left = self.astype(dtype, copy=False) + right = other.astype(dtype, copy=False) + return left.union(right, sort=sort) + + elif not len(other) or self.equals(other): + # NB: whether this (and the `if not len(self)` check below) come before + # or after the is_dtype_equal check above affects the returned dtype + return self._get_reconciled_name_object(other) + + elif not len(self): + return other._get_reconciled_name_object(self) + + result = self._union(other, sort=sort) + + return self._wrap_setop_result(other, result) + + def _union(self, other: Index, sort): + """ + Specific union logic should go here. In subclasses, union behavior + should be overwritten here rather than in `self.union`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index + """ + lvals = self._values + rvals = other._values + + if ( + sort is None + and self.is_monotonic_increasing + and other.is_monotonic_increasing + and not (self.has_duplicates and other.has_duplicates) + and self._can_use_libjoin + ): + # Both are monotonic and at least one is unique, so can use outer join + # (actually don't need either unique, but without this restriction + # test_union_same_value_duplicated_in_both fails) + try: + return self._outer_indexer(other)[0] + except (TypeError, IncompatibleFrequency): + # incomparable objects; should only be for object dtype + value_list = list(lvals) + + # worth making this faster? a very unusual case + value_set = set(lvals) + value_list.extend([x for x in rvals if x not in value_set]) + # If objects are unorderable, we must have object dtype. + return np.array(value_list, dtype=object) + + elif not other.is_unique: + # other has duplicates + result = algos.union_with_duplicates(lvals, rvals) + return _maybe_try_sort(result, sort) + + # Self may have duplicates; other already checked as unique + # find indexes of things in "other" that are not in "self" + if self._index_as_unique: + indexer = self.get_indexer(other) + missing = (indexer == -1).nonzero()[0] + else: + missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) + + if len(missing) > 0: + other_diff = rvals.take(missing) + result = concat_compat((lvals, other_diff)) + else: + result = lvals + + if not self.is_monotonic_increasing or not other.is_monotonic_increasing: + # if both are monotonic then result should already be sorted + result = _maybe_try_sort(result, sort) + + return result + + @final + def _wrap_setop_result(self, other: Index, result) -> Index: + name = get_op_result_name(self, other) + if isinstance(result, Index): + if result.name != name: + result = result.rename(name) + else: + result = self._shallow_copy(result, name=name) + return result + + @final + def intersection(self, other, sort=False): + """ + Form the intersection of two Index objects. + + This returns a new Index with elements common to the index and `other`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + intersection : Index + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.intersection(idx2) + Int64Index([3, 4], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if not is_dtype_equal(self.dtype, other.dtype): + self._deprecate_dti_setop(other, "intersection") + + if self.equals(other): + if self.has_duplicates: + return self.unique()._get_reconciled_name_object(other) + return self._get_reconciled_name_object(other) + + if len(self) == 0 or len(other) == 0: + # fastpath; we need to be careful about having commutativity + + if self._is_multi or other._is_multi: + # _convert_can_do_setop ensures that we have both or neither + # We retain self.levels + return self[:0].rename(result_name) + + dtype = self._find_common_type_compat(other) + if is_dtype_equal(self.dtype, dtype): + # Slicing allows us to retain DTI/TDI.freq, RangeIndex + + # Note: self[:0] vs other[:0] affects + # 1) which index's `freq` we get in DTI/TDI cases + # This may be a historical artifact, i.e. no documented + # reason for this choice. + # 2) The `step` we get in RangeIndex cases + if len(self) == 0: + return self[:0].rename(result_name) + else: + return other[:0].rename(result_name) + + return Index([], dtype=dtype, name=result_name) + + elif not self._should_compare(other): + # We can infer that the intersection is empty. + if isinstance(self, ABCMultiIndex): + return self[:0].rename(result_name) + return Index([], name=result_name) + + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + other = other.astype(dtype, copy=False) + return this.intersection(other, sort=sort) + + result = self._intersection(other, sort=sort) + return self._wrap_intersection_result(other, result) + + def _intersection(self, other: Index, sort=False): + """ + intersection specialized to the case with matching dtypes. + """ + if ( + self.is_monotonic_increasing + and other.is_monotonic_increasing + and self._can_use_libjoin + ): + try: + result = self._inner_indexer(other)[0] + except TypeError: + # non-comparable; should only be for object dtype + pass + else: + # TODO: algos.unique1d should preserve DTA/TDA + res = algos.unique1d(result) + return ensure_wrapped_if_datetimelike(res) + + res_values = self._intersection_via_get_indexer(other, sort=sort) + res_values = _maybe_try_sort(res_values, sort) + return res_values + + def _wrap_intersection_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + + @final + def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike: + """ + Find the intersection of two Indexes using get_indexer. + + Returns + ------- + np.ndarray or ExtensionArray + The returned array will be unique. + """ + left_unique = self.unique() + right_unique = other.unique() + + # even though we are unique, we need get_indexer_for for IntervalIndex + indexer = left_unique.get_indexer_for(right_unique) + + mask = indexer != -1 + + taker = indexer.take(mask.nonzero()[0]) + if sort is False: + # sort bc we want the elements in the same order they are in self + # unnecessary in the case with sort=None bc we will sort later + taker = np.sort(taker) + + result = left_unique.take(taker)._values + return result + + @final + def difference(self, other, sort=None): + """ + Return a new Index with elements of index not in `other`. + + This is the set difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + Returns + ------- + difference : Index + + Examples + -------- + >>> idx1 = pd.Index([2, 1, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.difference(idx2) + Int64Index([1, 2], dtype='int64') + >>> idx1.difference(idx2, sort=False) + Int64Index([2, 1], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + # Note: we do NOT call _deprecate_dti_setop here, as there + # is no requirement that .difference be commutative, so it does + # not cast to object. + + if self.equals(other): + # Note: we do not (yet) sort even if sort=None GH#24959 + return self[:0].rename(result_name) + + if len(other) == 0: + # Note: we do not (yet) sort even if sort=None GH#24959 + return self.rename(result_name) + + if not self._should_compare(other): + # Nothing matches -> difference is everything + return self.rename(result_name) + + result = self._difference(other, sort=sort) + return self._wrap_difference_result(other, result) + + def _difference(self, other, sort): + # overridden by RangeIndex + + this = self.unique() + + indexer = this.get_indexer_for(other) + indexer = indexer.take((indexer != -1).nonzero()[0]) + + label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) + the_diff = this._values.take(label_diff) + the_diff = _maybe_try_sort(the_diff, sort) + + return the_diff + + def _wrap_difference_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + + def symmetric_difference(self, other, result_name=None, sort=None): + """ + Compute the symmetric difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + result_name : str + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + Returns + ------- + symmetric_difference : Index + + Notes + ----- + ``symmetric_difference`` contains elements that appear in either + ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by + ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates + dropped. + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([2, 3, 4, 5]) + >>> idx1.symmetric_difference(idx2) + Int64Index([1, 5], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name_update = self._convert_can_do_setop(other) + if result_name is None: + result_name = result_name_update + + if not is_dtype_equal(self.dtype, other.dtype): + self._deprecate_dti_setop(other, "symmetric_difference") + + if not self._should_compare(other): + return self.union(other, sort=sort).rename(result_name) + + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + that = other.astype(dtype, copy=False) + return this.symmetric_difference(that, sort=sort).rename(result_name) + + this = self.unique() + other = other.unique() + indexer = this.get_indexer_for(other) + + # {this} minus {other} + common_indexer = indexer.take((indexer != -1).nonzero()[0]) + left_indexer = np.setdiff1d( + np.arange(this.size), common_indexer, assume_unique=True + ) + left_diff = this._values.take(left_indexer) + + # {other} minus {this} + right_indexer = (indexer == -1).nonzero()[0] + right_diff = other._values.take(right_indexer) + + res_values = concat_compat([left_diff, right_diff]) + res_values = _maybe_try_sort(res_values, sort) + + # pass dtype so we retain object dtype + result = Index(res_values, name=result_name, dtype=res_values.dtype) + + if self._is_multi: + self = cast("MultiIndex", self) + if len(result) == 0: + # On equal symmetric_difference MultiIndexes the difference is empty. + # Therefore, an empty MultiIndex is returned GH#13490 + return type(self)( + levels=[[] for _ in range(self.nlevels)], + codes=[[] for _ in range(self.nlevels)], + names=result.name, + ) + return type(self).from_tuples(result, names=result.name) + + return result + + @final + def _assert_can_do_setop(self, other) -> bool: + if not is_list_like(other): + raise TypeError("Input must be Index or array-like") + return True + + def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: + if not isinstance(other, Index): + # TODO(2.0): no need to special-case here once _with_infer + # deprecation is enforced + if hasattr(other, "dtype"): + other = Index(other, name=self.name, dtype=other.dtype) + else: + # e.g. list + other = Index(other, name=self.name) + result_name = self.name + else: + result_name = get_op_result_name(self, other) + return other, result_name + + # -------------------------------------------------------------------- + # Indexing Methods + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + + .. deprecated:: 1.4 + Use index.get_indexer([item], method=...) instead. + + tolerance : int or float, optional + Maximum distance from index value for inexact matches. The value of + the index at the matching location must satisfy the equation + ``abs(index[loc] - key) <= tolerance``. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> unique_index = pd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True]) + """ + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if using pad, " + "backfill or nearest lookups" + ) + casted_key = self._maybe_cast_indexer(key) + try: + return self._engine.get_loc(casted_key) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # If we have a listlike key, _check_indexing_error will raise + # InvalidIndexError. Otherwise we fall through and re-raise + # the TypeError. + self._check_indexing_error(key) + raise + + # GH#42269 + warnings.warn( + f"Passing method to {type(self).__name__}.get_loc is deprecated " + "and will raise in a future version. Use " + "index.get_indexer([item], method=...) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if is_scalar(key) and isna(key) and not self.hasnans: + raise KeyError(key) + + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) + + indexer = self.get_indexer([key], method=method, tolerance=tolerance) + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError("get_loc requires scalar valued input") + loc = indexer.item() + if loc == -1: + raise KeyError(key) + return loc + + _index_shared_docs[ + "get_indexer" + ] = """ + Compute indexer and mask for new index given the current index. + + The indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + indexer : np.ndarray[np.intp] + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + %(raises_section)s + Notes + ----- + Returns -1 for unmatched values, for further explanation see the + example below. + + Examples + -------- + >>> index = pd.Index(['c', 'a', 'b']) + >>> index.get_indexer(['a', 'b', 'x']) + array([ 1, 2, -1]) + + Notice that the return value is an array of locations in ``index`` + and ``x`` is marked by -1, as it is not in ``index``. + """ + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + @final + def get_indexer( + self, + target, + method: str_t | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + method = missing.clean_reindex_fill_method(method) + orig_target = target + target = self._maybe_cast_listlike_indexer(target) + + self._check_indexing_method(method, limit, tolerance) + + if not self._index_as_unique: + raise InvalidIndexError(self._requires_unique_msg) + + if len(target) == 0: + return np.array([], dtype=np.intp) + + if not self._should_compare(target) and not self._should_partial_index(target): + # IntervalIndex get special treatment bc numeric scalars can be + # matched to Interval scalars + return self._get_indexer_non_comparable(target, method=method, unique=True) + + if is_categorical_dtype(self.dtype): + # _maybe_cast_listlike_indexer ensures target has our dtype + # (could improve perf by doing _should_compare check earlier?) + assert is_dtype_equal(self.dtype, target.dtype) + + indexer = self._engine.get_indexer(target.codes) + if self.hasnans and target.hasnans: + # After _maybe_cast_listlike_indexer, target elements which do not + # belong to some category are changed to NaNs + # Mask to track actual NaN values compared to inserted NaN values + # GH#45361 + target_nans = isna(orig_target) + loc = self.get_loc(np.nan) + mask = target.isna() + indexer[target_nans] = loc + indexer[mask & ~target_nans] = -1 + return indexer + + if is_categorical_dtype(target.dtype): + # potential fastpath + # get an indexer for unique categories then propagate to codes via take_nd + # get_indexer instead of _get_indexer needed for MultiIndex cases + # e.g. test_append_different_columns_types + categories_indexer = self.get_indexer(target.categories) + + indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) + + if (not self._is_multi and self.hasnans) and target.hasnans: + # Exclude MultiIndex because hasnans raises NotImplementedError + # we should only get here if we are unique, so loc is an integer + # GH#41934 + loc = self.get_loc(np.nan) + mask = target.isna() + indexer[mask] = loc + + return ensure_platform_int(indexer) + + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer( + ptarget, method=method, limit=limit, tolerance=tolerance + ) + + if is_dtype_equal(self.dtype, target.dtype) and self.equals(target): + # Only call equals if we have same dtype to avoid inference/casting + return np.arange(len(target), dtype=np.intp) + + if not is_dtype_equal(self.dtype, target.dtype) and not is_interval_dtype( + self.dtype + ): + # IntervalIndex gets special treatment for partial-indexing + dtype = self._find_common_type_compat(target) + + this = self.astype(dtype, copy=False) + target = target.astype(dtype, copy=False) + return this._get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + + return self._get_indexer(target, method, limit, tolerance) + + def _get_indexer( + self, + target: Index, + method: str_t | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, target) + + if method in ["pad", "backfill"]: + indexer = self._get_fill_indexer(target, method, limit, tolerance) + elif method == "nearest": + indexer = self._get_nearest_indexer(target, limit, tolerance) + else: + if target._is_multi and self._is_multi: + engine = self._engine + # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" + # has no attribute "_extract_level_codes" + tgt_values = engine._extract_level_codes( # type: ignore[union-attr] + target + ) + else: + tgt_values = target._get_engine_target() + + indexer = self._engine.get_indexer(tgt_values) + + return ensure_platform_int(indexer) + + @final + def _should_partial_index(self, target: Index) -> bool: + """ + Should we attempt partial-matching indexing? + """ + if is_interval_dtype(self.dtype): + if is_interval_dtype(target.dtype): + return False + # See https://github.com/pandas-dev/pandas/issues/47772 the commented + # out code can be restored (instead of hardcoding `return True`) + # once that issue if fixed + # "Index" has no attribute "left" + # return self.left._should_compare(target) # type: ignore[attr-defined] + return True + return False + + @final + def _check_indexing_method( + self, + method: str_t | None, + limit: int | None = None, + tolerance=None, + ) -> None: + """ + Raise if we have a get_indexer `method` that is not supported or valid. + """ + if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: + # in practice the clean_reindex_fill_method call would raise + # before we get here + raise ValueError("Invalid fill method") # pragma: no cover + + if self._is_multi: + if method == "nearest": + raise NotImplementedError( + "method='nearest' not implemented yet " + "for MultiIndex; see GitHub issue 9365" + ) + elif method == "pad" or method == "backfill": + if tolerance is not None: + raise NotImplementedError( + "tolerance not implemented yet for MultiIndex" + ) + + if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype): + # GH#37871 for now this is only for IntervalIndex and CategoricalIndex + if method is not None: + raise NotImplementedError( + f"method {method} not yet implemented for {type(self).__name__}" + ) + + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + if limit is not None: + raise ValueError( + "limit argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + + def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: + # override this method on subclasses + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + return tolerance + + @final + def _get_fill_indexer( + self, target: Index, method: str_t, limit: int | None = None, tolerance=None + ) -> npt.NDArray[np.intp]: + + if self._is_multi: + # TODO: get_indexer_with_fill docstring says values must be _sorted_ + # but that doesn't appear to be enforced + # error: "IndexEngine" has no attribute "get_indexer_with_fill" + engine = self._engine + return engine.get_indexer_with_fill( # type: ignore[union-attr] + target=target._values, values=self._values, method=method, limit=limit + ) + + if self.is_monotonic_increasing and target.is_monotonic_increasing: + target_values = target._get_engine_target() + own_values = self._get_engine_target() + if not isinstance(target_values, np.ndarray) or not isinstance( + own_values, np.ndarray + ): + raise NotImplementedError + + if method == "pad": + indexer = libalgos.pad(own_values, target_values, limit=limit) + else: + # i.e. "backfill" + indexer = libalgos.backfill(own_values, target_values, limit=limit) + else: + indexer = self._get_fill_indexer_searchsorted(target, method, limit) + if tolerance is not None and len(self): + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + @final + def _get_fill_indexer_searchsorted( + self, target: Index, method: str_t, limit: int | None = None + ) -> npt.NDArray[np.intp]: + """ + Fallback pad/backfill get_indexer that works for monotonic decreasing + indexes and non-monotonic targets. + """ + if limit is not None: + raise ValueError( + f"limit argument for {repr(method)} method only well-defined " + "if index and target are monotonic" + ) + + side: Literal["left", "right"] = "left" if method == "pad" else "right" + + # find exact matches first (this simplifies the algorithm) + indexer = self.get_indexer(target) + nonexact = indexer == -1 + indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) + if side == "left": + # searchsorted returns "indices into a sorted array such that, + # if the corresponding elements in v were inserted before the + # indices, the order of a would be preserved". + # Thus, we need to subtract 1 to find values to the left. + indexer[nonexact] -= 1 + # This also mapped not found values (values of 0 from + # np.searchsorted) to -1, which conveniently is also our + # sentinel for missing values + else: + # Mark indices to the right of the largest value as not found + indexer[indexer == len(self)] = -1 + return indexer + + @final + def _get_nearest_indexer( + self, target: Index, limit: int | None, tolerance + ) -> npt.NDArray[np.intp]: + """ + Get the indexer for the nearest index labels; requires an index with + values that can be subtracted from each other (e.g., not strings or + tuples). + """ + if not len(self): + return self._get_fill_indexer(target, "pad") + + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + + left_distances = self._difference_compat(target, left_indexer) + right_distances = self._difference_compat(target, right_indexer) + + op = operator.lt if self.is_monotonic_increasing else operator.le + indexer = np.where( + # error: Argument 1&2 has incompatible type "Union[ExtensionArray, + # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE, + # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" + op(left_distances, right_distances) # type: ignore[arg-type] + | (right_indexer == -1), + left_indexer, + right_indexer, + ) + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + @final + def _filter_indexer_tolerance( + self, + target: Index, + indexer: npt.NDArray[np.intp], + tolerance, + ) -> npt.NDArray[np.intp]: + + distance = self._difference_compat(target, indexer) + + return np.where(distance <= tolerance, indexer, -1) + + @final + def _difference_compat( + self, target: Index, indexer: npt.NDArray[np.intp] + ) -> ArrayLike: + # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] + # of DateOffset objects, which do not support __abs__ (and would be slow + # if they did) + + if isinstance(self.dtype, PeriodDtype): + # Note: we only get here with matching dtypes + own_values = cast("PeriodArray", self._data)._ndarray + target_values = cast("PeriodArray", target._data)._ndarray + diff = own_values[indexer] - target_values + else: + # error: Unsupported left operand type for - ("ExtensionArray") + diff = self._values[indexer] - target._values # type: ignore[operator] + return abs(diff) + + # -------------------------------------------------------------------- + # Indexer Conversion Methods + + @final + def _validate_positional_slice(self, key: slice) -> None: + """ + For positional indexing, a slice must have either int or None + for each of start, stop, and step. + """ + self._validate_indexer("positional", key.start, "iloc") + self._validate_indexer("positional", key.stop, "iloc") + self._validate_indexer("positional", key.step, "iloc") + + def _convert_slice_indexer(self, key: slice, kind: str_t): + """ + Convert a slice indexer. + + By definition, these are labels unless 'iloc' is passed in. + Floats are not allowed as the start, step, or stop of the slice. + + Parameters + ---------- + key : label of the slice bound + kind : {'loc', 'getitem'} + """ + assert kind in ["loc", "getitem"], kind + + # potentially cast the bounds to integers + start, stop, step = key.start, key.stop, key.step + + # figure out if this is a positional indexer + def is_int(v): + return v is None or is_integer(v) + + is_index_slice = is_int(start) and is_int(stop) and is_int(step) + + # special case for interval_dtype bc we do not do partial-indexing + # on integer Intervals when slicing + # TODO: write this in terms of e.g. should_partial_index? + ints_are_positional = self._should_fallback_to_positional or is_interval_dtype( + self.dtype + ) + is_positional = is_index_slice and ints_are_positional + + if kind == "getitem": + # called from the getitem slicers, validate that we are in fact integers + if self.is_integer() or is_index_slice: + # Note: these checks are redundant if we know is_index_slice + self._validate_indexer("slice", key.start, "getitem") + self._validate_indexer("slice", key.stop, "getitem") + self._validate_indexer("slice", key.step, "getitem") + return key + + # convert the slice to an indexer here + + # if we are mixed and have integers + if is_positional: + try: + # Validate start & stop + if start is not None: + self.get_loc(start) + if stop is not None: + self.get_loc(stop) + is_positional = False + except KeyError: + pass + + if com.is_null_slice(key): + # It doesn't matter if we are positional or label based + indexer = key + elif is_positional: + if kind == "loc": + # GH#16121, GH#24612, GH#31810 + warnings.warn( + "Slicing a positional slice with .loc is not supported, " + "and will raise TypeError in a future version. " + "Use .loc with labels or .iloc with positions instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + indexer = key + else: + indexer = self.slice_indexer(start, stop, step) + + return indexer + + @final + def _invalid_indexer(self, form: str_t, key) -> TypeError: + """ + Consistent invalid indexer message. + """ + return TypeError( + f"cannot do {form} indexing on {type(self).__name__} with these " + f"indexers [{key}] of type {type(key).__name__}" + ) + + # -------------------------------------------------------------------- + # Reindex Methods + + @final + def _validate_can_reindex(self, indexer: np.ndarray) -> None: + """ + Check if we are allowing reindexing with this particular indexer. + + Parameters + ---------- + indexer : an integer ndarray + + Raises + ------ + ValueError if its a duplicate axis + """ + # trying to reindex on an axis with duplicates + if not self._index_as_unique and len(indexer): + raise ValueError("cannot reindex on an axis with duplicate labels") + + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, npt.NDArray[np.intp] | None]: + """ + Create index with target's values. + + Parameters + ---------- + target : an iterable + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + level : int, optional + Level of multiindex. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : int or float, optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray[np.intp] or None + Indices of output values in original index. + + Raises + ------ + TypeError + If ``method`` passed along with ``level``. + ValueError + If non-unique multi-index + ValueError + If non-unique index and ``method`` or ``limit`` passed. + + See Also + -------- + Series.reindex : Conform Series to new index with optional filling logic. + DataFrame.reindex : Conform DataFrame to new index with optional filling logic. + + Examples + -------- + >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx + Index(['car', 'bike', 'train', 'tractor'], dtype='object') + >>> idx.reindex(['car', 'bike']) + (Index(['car', 'bike'], dtype='object'), array([0, 1])) + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, "name") + + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = ensure_has_len(target) # target may be an iterator + + if not isinstance(target, Index) and len(target) == 0: + if level is not None and self._is_multi: + # "Index" has no attribute "levels"; maybe "nlevels"? + idx = self.levels[level] # type: ignore[attr-defined] + else: + idx = self + target = idx[:0] + else: + target = ensure_index(target) + + if level is not None and ( + isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex) + ): + if method is not None: + raise TypeError("Fill method not supported if level passed") + + # TODO: tests where passing `keep_order=not self._is_multi` + # makes a difference for non-MultiIndex case + target, indexer, _ = self._join_level( + target, level, how="right", keep_order=not self._is_multi + ) + + else: + if self.equals(target): + indexer = None + else: + if self._index_as_unique: + indexer = self.get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + elif self._is_multi: + raise ValueError("cannot handle a non-unique multi-index!") + else: + if method is not None or limit is not None: + raise ValueError( + "cannot reindex a non-unique index " + "with a method or limit" + ) + indexer, _ = self.get_indexer_non_unique(target) + + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and " + "will raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + target = self._wrap_reindex_result(target, indexer, preserve_names) + return target, indexer + + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool): + if preserve_names and target.nlevels == 1 and target.name != self.name: + target = target.copy(deep=False) + target.name = self.name + return target + + @final + def _reindex_non_unique( + self, target: Index + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: + """ + Create a new index with target's values (move/add/delete values as + necessary) use with non-unique Index and a possibly non-unique target. + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray[np.intp] + Indices of output values in original index. + new_indexer : np.ndarray[np.intp] or None + + """ + target = ensure_index(target) + if len(target) == 0: + # GH#13691 + return self[:0], np.array([], dtype=np.intp), None + + indexer, missing = self.get_indexer_non_unique(target) + check = indexer != -1 + new_labels = self.take(indexer[check]) + new_indexer = None + + if len(missing): + length = np.arange(len(indexer), dtype=np.intp) + + missing = ensure_platform_int(missing) + missing_labels = target.take(missing) + missing_indexer = length[~check] + cur_labels = self.take(indexer[check]).values + cur_indexer = length[check] + + # Index constructor below will do inference + new_labels = np.empty((len(indexer),), dtype=object) + new_labels[cur_indexer] = cur_labels + new_labels[missing_indexer] = missing_labels + + # GH#38906 + if not len(self): + + new_indexer = np.arange(0, dtype=np.intp) + + # a unique indexer + elif target.is_unique: + + # see GH5553, make sure we use the right indexer + new_indexer = np.arange(len(indexer), dtype=np.intp) + new_indexer[cur_indexer] = np.arange(len(cur_labels)) + new_indexer[missing_indexer] = -1 + + # we have a non_unique selector, need to use the original + # indexer here + else: + + # need to retake to have the same size as the indexer + indexer[~check] = -1 + + # reset the new indexer to account for the new size + new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) + new_indexer[~check] = -1 + + if isinstance(self, ABCMultiIndex): + new_index = type(self).from_tuples(new_labels, names=self.names) + else: + new_index = Index._with_infer(new_labels, name=self.name) + return new_index, indexer, new_indexer + + # -------------------------------------------------------------------- + # Join Methods + + @overload + def join( + self, + other: Index, + *, + how: str_t = ..., + level: Level = ..., + return_indexers: Literal[True], + sort: bool = ..., + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + ... + + @overload + def join( + self, + other: Index, + *, + how: str_t = ..., + level: Level = ..., + return_indexers: Literal[False] = ..., + sort: bool = ..., + ) -> Index: + ... + + @overload + def join( + self, + other: Index, + *, + how: str_t = ..., + level: Level = ..., + return_indexers: bool = ..., + sort: bool = ..., + ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + ... + + @final + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "other"]) + @_maybe_return_indexers + def join( + self, + other: Index, + how: str_t = "left", + level: Level = None, + return_indexers: bool = False, + sort: bool = False, + ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + """ + Compute join_index and indexers to conform data structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : bool, default False + sort : bool, default False + Sort the join keys lexicographically in the result Index. If False, + the order of the join keys depends on the join type (how keyword). + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + other = ensure_index(other) + + if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): + if (self.tz is None) ^ (other.tz is None): + # Raise instead of casting to object below. + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if not self._is_multi and not other._is_multi: + # We have specific handling for MultiIndex below + pself, pother = self._maybe_promote(other) + if pself is not self or pother is not other: + return pself.join( + pother, how=how, level=level, return_indexers=True, sort=sort + ) + + lindexer: np.ndarray | None + rindexer: np.ndarray | None + + # try to figure out the join level + # GH3662 + if level is None and (self._is_multi or other._is_multi): + + # have the same levels/names so a simple join + if self.names == other.names: + pass + else: + return self._join_multi(other, how=how) + + # join on the level + if level is not None and (self._is_multi or other._is_multi): + return self._join_level(other, level, how=how) + + if len(other) == 0: + if how in ("left", "outer"): + join_index = self._view() + rindexer = np.broadcast_to(np.intp(-1), len(join_index)) + return join_index, None, rindexer + elif how in ("right", "inner", "cross"): + join_index = other._view() + lindexer = np.array([]) + return join_index, lindexer, None + + if len(self) == 0: + if how in ("right", "outer"): + join_index = other._view() + lindexer = np.broadcast_to(np.intp(-1), len(join_index)) + return join_index, lindexer, None + elif how in ("left", "inner", "cross"): + join_index = self._view() + rindexer = np.array([]) + return join_index, None, rindexer + + if self._join_precedence < other._join_precedence: + how = {"right": "left", "left": "right"}.get(how, how) + join_index, lidx, ridx = other.join( + self, how=how, level=level, return_indexers=True + ) + lidx, ridx = ridx, lidx + return join_index, lidx, ridx + + if not is_dtype_equal(self.dtype, other.dtype): + dtype = self._find_common_type_compat(other) + this = self.astype(dtype, copy=False) + other = other.astype(dtype, copy=False) + return this.join(other, how=how, return_indexers=True) + + _validate_join_method(how) + + if not self.is_unique and not other.is_unique: + return self._join_non_unique(other, how=how) + elif not self.is_unique or not other.is_unique: + if self.is_monotonic_increasing and other.is_monotonic_increasing: + if not is_interval_dtype(self.dtype): + # otherwise we will fall through to _join_via_get_indexer + # GH#39133 + # go through object dtype for ea till engine is supported properly + return self._join_monotonic(other, how=how) + else: + return self._join_non_unique(other, how=how) + elif ( + self.is_monotonic_increasing + and other.is_monotonic_increasing + and self._can_use_libjoin + and ( + not isinstance(self, ABCMultiIndex) + or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) + ) + and not is_categorical_dtype(self.dtype) + ): + # Categorical is monotonic if data are ordered as categories, but join can + # not handle this in case of not lexicographically monotonic GH#38502 + try: + return self._join_monotonic(other, how=how) + except TypeError: + # object dtype; non-comparable objects + pass + + return self._join_via_get_indexer(other, how, sort) + + @final + def _join_via_get_indexer( + self, other: Index, how: str_t, sort: bool + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # Fallback if we do not have any fastpaths available based on + # uniqueness/monotonicity + + # Note: at this point we have checked matching dtypes + + if how == "left": + join_index = self + elif how == "right": + join_index = other + elif how == "inner": + # TODO: sort=False here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.intersection(other, sort=False) + elif how == "outer": + # TODO: sort=True here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.union(other) + + if sort: + join_index = join_index.sort_values() + + if join_index is self: + lindexer = None + else: + lindexer = self.get_indexer_for(join_index) + if join_index is other: + rindexer = None + else: + rindexer = other.get_indexer_for(join_index) + return join_index, lindexer, rindexer + + @final + def _join_multi(self, other: Index, how: str_t): + from pandas.core.indexes.multi import MultiIndex + from pandas.core.reshape.merge import restore_dropped_levels_multijoin + + # figure out join names + self_names_list = list(com.not_none(*self.names)) + other_names_list = list(com.not_none(*other.names)) + self_names_order = self_names_list.index + other_names_order = other_names_list.index + self_names = set(self_names_list) + other_names = set(other_names_list) + overlap = self_names & other_names + + # need at least 1 in common + if not overlap: + raise ValueError("cannot join with no overlapping index names") + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + + # Drop the non-matching levels from left and right respectively + ldrop_names = sorted(self_names - overlap, key=self_names_order) + rdrop_names = sorted(other_names - overlap, key=other_names_order) + + # if only the order differs + if not len(ldrop_names + rdrop_names): + self_jnlevels = self + other_jnlevels = other.reorder_levels(self.names) + else: + self_jnlevels = self.droplevel(ldrop_names) + other_jnlevels = other.droplevel(rdrop_names) + + # Join left and right + # Join on same leveled multi-index frames is supported + join_idx, lidx, ridx = self_jnlevels.join( + other_jnlevels, how=how, return_indexers=True + ) + + # Restore the dropped levels + # Returned index level order is + # common levels, ldrop_names, rdrop_names + dropped_names = ldrop_names + rdrop_names + + # error: Argument 5/6 to "restore_dropped_levels_multijoin" has + # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any + # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]" + levels, codes, names = restore_dropped_levels_multijoin( + self, + other, + dropped_names, + join_idx, + lidx, # type: ignore[arg-type] + ridx, # type: ignore[arg-type] + ) + + # Re-create the multi-index + multi_join_idx = MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=False + ) + + multi_join_idx = multi_join_idx.remove_unused_levels() + + return multi_join_idx, lidx, ridx + + jl = list(overlap)[0] + + # Case where only one index is multi + # make the indices into mi's that match + flip_order = False + if isinstance(self, MultiIndex): + self, other = other, self + flip_order = True + # flip if join method is right or left + how = {"right": "left", "left": "right"}.get(how, how) + + level = other.names.index(jl) + result = self._join_level(other, level, how=how) + + if flip_order: + return result[0], result[2], result[1] + return result + + @final + def _join_non_unique( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: + from pandas.core.reshape.merge import get_join_indexers + + # We only get here if dtypes match + assert self.dtype == other.dtype + + left_idx, right_idx = get_join_indexers( + [self._values], [other._values], how=how, sort=True + ) + mask = left_idx == -1 + + join_array = self._values.take(left_idx) + right = other._values.take(right_idx) + + if isinstance(join_array, np.ndarray): + # error: Argument 3 to "putmask" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any]]"; expected + # "Union[_SupportsArray[dtype[Any]], _NestedSequence[ + # _SupportsArray[dtype[Any]]], bool, int, float, complex, + # str, bytes, _NestedSequence[Union[bool, int, float, + # complex, str, bytes]]]" + np.putmask(join_array, mask, right) # type: ignore[arg-type] + else: + join_array._putmask(mask, right) + + join_index = self._wrap_joined_index(join_array, other) + + return join_index, left_idx, right_idx + + @final + def _join_level( + self, other: Index, level, how: str_t = "left", keep_order: bool = True + ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + """ + The join method *only* affects the level of the resulting + MultiIndex. Otherwise it just exactly aligns the Index data to the + labels of the level in the MultiIndex. + + If ```keep_order == True```, the order of the data indexed by the + MultiIndex will not be changed; otherwise, it will tie out + with `other`. + """ + from pandas.core.indexes.multi import MultiIndex + + def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: + """ + Returns sorter for the inner most level while preserving the + order of higher levels. + + Parameters + ---------- + labels : list[np.ndarray] + Each ndarray has signed integer dtype, not necessarily identical. + + Returns + ------- + np.ndarray[np.intp] + """ + if labels[0].size == 0: + return np.empty(0, dtype=np.intp) + + if len(labels) == 1: + return get_group_index_sorter(ensure_platform_int(labels[0])) + + # find indexers of beginning of each set of + # same-key labels w.r.t all but last level + tic = labels[0][:-1] != labels[0][1:] + for lab in labels[1:-1]: + tic |= lab[:-1] != lab[1:] + + starts = np.hstack(([True], tic, [True])).nonzero()[0] + lab = ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, ensure_platform_int(starts)) + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + raise TypeError("Join on level between two MultiIndex objects is ambiguous") + + left, right = self, other + + flip_order = not isinstance(self, MultiIndex) + if flip_order: + left, right = right, left + how = {"right": "left", "left": "right"}.get(how, how) + + assert isinstance(left, MultiIndex) + + level = left._get_level_number(level) + old_level = left.levels[level] + + if not right.is_unique: + raise NotImplementedError( + "Index._join_level on non-unique index is not implemented" + ) + + new_level, left_lev_indexer, right_lev_indexer = old_level.join( + right, how=how, return_indexers=True + ) + + if left_lev_indexer is None: + if keep_order or len(left) == 0: + left_indexer = None + join_index = left + else: # sort the leaves + left_indexer = _get_leaf_sorter(left.codes[: level + 1]) + join_index = left[left_indexer] + + else: + left_lev_indexer = ensure_platform_int(left_lev_indexer) + rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) + old_codes = left.codes[level] + + taker = old_codes[old_codes != -1] + new_lev_codes = rev_indexer.take(taker) + + new_codes = list(left.codes) + new_codes[level] = new_lev_codes + + new_levels = list(left.levels) + new_levels[level] = new_level + + if keep_order: # just drop missing values. o.w. keep order + left_indexer = np.arange(len(left), dtype=np.intp) + left_indexer = cast(np.ndarray, left_indexer) + mask = new_lev_codes != -1 + if not mask.all(): + new_codes = [lab[mask] for lab in new_codes] + left_indexer = left_indexer[mask] + + else: # tie out the order with other + if level == 0: # outer most level, take the fast route + max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() + ngroups = 1 + max_new_lev + left_indexer, counts = libalgos.groupsort_indexer( + new_lev_codes, ngroups + ) + + # missing values are placed first; drop them! + left_indexer = left_indexer[counts[0] :] + new_codes = [lab[left_indexer] for lab in new_codes] + + else: # sort the leaves + mask = new_lev_codes != -1 + mask_all = mask.all() + if not mask_all: + new_codes = [lab[mask] for lab in new_codes] + + left_indexer = _get_leaf_sorter(new_codes[: level + 1]) + new_codes = [lab[left_indexer] for lab in new_codes] + + # left_indexers are w.r.t masked frame. + # reverse to original frame! + if not mask_all: + left_indexer = mask.nonzero()[0][left_indexer] + + join_index = MultiIndex( + levels=new_levels, + codes=new_codes, + names=left.names, + verify_integrity=False, + ) + + if right_lev_indexer is not None: + right_indexer = right_lev_indexer.take(join_index.codes[level]) + else: + right_indexer = join_index.codes[level] + + if flip_order: + left_indexer, right_indexer = right_indexer, left_indexer + + left_indexer = ( + None if left_indexer is None else ensure_platform_int(left_indexer) + ) + right_indexer = ( + None if right_indexer is None else ensure_platform_int(right_indexer) + ) + return join_index, left_indexer, right_indexer + + @final + def _join_monotonic( + self, other: Index, how: str_t = "left" + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + # We only get here with matching dtypes and both monotonic increasing + assert other.dtype == self.dtype + + if self.equals(other): + ret_index = other if how == "right" else self + return ret_index, None, None + + ridx: np.ndarray | None + lidx: np.ndarray | None + + if self.is_unique and other.is_unique: + # We can perform much better than the general case + if how == "left": + join_index = self + lidx = None + ridx = self._left_indexer_unique(other) + elif how == "right": + join_index = other + lidx = other._left_indexer_unique(self) + ridx = None + elif how == "inner": + join_array, lidx, ridx = self._inner_indexer(other) + join_index = self._wrap_joined_index(join_array, other) + elif how == "outer": + join_array, lidx, ridx = self._outer_indexer(other) + join_index = self._wrap_joined_index(join_array, other) + else: + if how == "left": + join_array, lidx, ridx = self._left_indexer(other) + elif how == "right": + join_array, ridx, lidx = other._left_indexer(self) + elif how == "inner": + join_array, lidx, ridx = self._inner_indexer(other) + elif how == "outer": + join_array, lidx, ridx = self._outer_indexer(other) + + join_index = self._wrap_joined_index(join_array, other) + + lidx = None if lidx is None else ensure_platform_int(lidx) + ridx = None if ridx is None else ensure_platform_int(ridx) + return join_index, lidx, ridx + + def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _IndexT: + assert other.dtype == self.dtype + + if isinstance(self, ABCMultiIndex): + name = self.names if self.names == other.names else None + # error: Incompatible return value type (got "MultiIndex", + # expected "_IndexT") + return self._constructor(joined, name=name) # type: ignore[return-value] + else: + name = get_op_result_name(self, other) + return self._constructor._with_infer(joined, name=name, dtype=self.dtype) + + @cache_readonly + def _can_use_libjoin(self) -> bool: + """ + Whether we can use the fastpaths implement in _libs.join + """ + if type(self) is Index: + # excludes EAs + return isinstance(self.dtype, np.dtype) + return not is_interval_dtype(self.dtype) + + # -------------------------------------------------------------------- + # Uncategorized Methods + + @property + def values(self) -> ArrayLike: + """ + Return an array representing the data in the Index. + + .. warning:: + + We recommend using :attr:`Index.array` or + :meth:`Index.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + array: numpy.ndarray or ExtensionArray + + See Also + -------- + Index.array : Reference to the underlying data. + Index.to_numpy : A NumPy array representing the underlying data. + """ + return self._data + + # error: Decorated property not supported + # https://github.com/python/mypy/issues/1362 + @cache_readonly # type: ignore[misc] + @doc(IndexOpsMixin.array) + def array(self) -> ExtensionArray: + array = self._data + if isinstance(array, np.ndarray): + from pandas.core.arrays.numpy_ import PandasArray + + array = PandasArray(array) + return array + + @property + def _values(self) -> ExtensionArray | np.ndarray: + """ + The best array representation. + + This is an ndarray or ExtensionArray. + + ``_values`` are consistent between ``Series`` and ``Index``. + + It may differ from the public '.values' method. + + index | values | _values | + ----------------- | --------------- | ------------- | + Index | ndarray | ndarray | + CategoricalIndex | Categorical | Categorical | + DatetimeIndex | ndarray[M8ns] | DatetimeArray | + DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | + PeriodIndex | ndarray[object] | PeriodArray | + IntervalIndex | IntervalArray | IntervalArray | + + See Also + -------- + values : Values + """ + return self._data + + def _get_engine_target(self) -> ArrayLike: + """ + Get the ndarray or ExtensionArray that we can pass to the IndexEngine + constructor. + """ + vals = self._values + if isinstance(vals, StringArray): + # GH#45652 much more performant than ExtensionEngine + return vals._ndarray + if type(self) is Index and isinstance(self._values, ExtensionArray): + # TODO(ExtensionIndex): remove special-case, just use self._values + return self._values.astype(object) + return vals + + def _from_join_target(self, result: np.ndarray) -> ArrayLike: + """ + Cast the ndarray returned from one of the libjoin.foo_indexer functions + back to type(self)._data. + """ + return result + + @doc(IndexOpsMixin._memory_usage) + def memory_usage(self, deep: bool = False) -> int: + result = self._memory_usage(deep=deep) + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + @final + def where(self, cond, other=None) -> Index: + """ + Replace values where the condition is False. + + The replacement is taken from other. + + Parameters + ---------- + cond : bool array-like with the same length as self + Condition to select the values on. + other : scalar, or array-like, default None + Replacement if the condition is False. + + Returns + ------- + pandas.Index + A copy of self with values replaced from other + where the condition is False. + + See Also + -------- + Series.where : Same method for Series. + DataFrame.where : Same method for DataFrame. + + Examples + -------- + >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx + Index(['car', 'bike', 'train', 'tractor'], dtype='object') + >>> idx.where(idx.isin(['car', 'train']), 'other') + Index(['car', 'other', 'train', 'other'], dtype='object') + """ + if isinstance(self, ABCMultiIndex): + raise NotImplementedError( + ".where is not supported for MultiIndex operations" + ) + cond = np.asarray(cond, dtype=bool) + return self.putmask(~cond, other) + + # construction helpers + @final + @classmethod + def _scalar_data_error(cls, data): + # We return the TypeError so that we can raise it from the constructor + # in order to keep mypy happy + return TypeError( + f"{cls.__name__}(...) must be called with a collection of some " + f"kind, {repr(data)} was passed" + ) + + @final + @classmethod + def _string_data_error(cls, data): + raise TypeError( + "String dtype not supported, you may need " + "to explicitly cast to a numeric type" + ) + + def _validate_fill_value(self, value): + """ + Check if the value can be inserted into our array without casting, + and convert it to an appropriate native type if necessary. + + Raises + ------ + TypeError + If the value cannot be inserted into an array of this dtype. + """ + dtype = self.dtype + if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: + # return np_can_hold_element(dtype, value) + try: + return np_can_hold_element(dtype, value) + except LossySetitemError as err: + # re-raise as TypeError for consistency + raise TypeError from err + elif not can_hold_element(self._values, value): + raise TypeError + return value + + @final + def _require_scalar(self, value): + """ + Check that this is a scalar value that we can use for setitem-like + operations without changing dtype. + """ + if not is_scalar(value): + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") + return value + + def _is_memory_usage_qualified(self) -> bool: + """ + Return a boolean if we need a qualified .info display. + """ + return self.is_object() + + def is_type_compatible(self, kind: str_t) -> bool: + """ + Whether the index type is compatible with the provided type. + """ + warnings.warn( + "Index.is_type_compatible is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return kind == self.inferred_type + + def __contains__(self, key: Any) -> bool: + """ + Return a boolean indicating whether the provided key is in the index. + + Parameters + ---------- + key : label + The key to check if it is present in the index. + + Returns + ------- + bool + Whether the key search is in the index. + + Raises + ------ + TypeError + If the key is not hashable. + + See Also + -------- + Index.isin : Returns an ndarray of boolean dtype indicating whether the + list-like key is in the index. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + + >>> 2 in idx + True + >>> 6 in idx + False + """ + hash(key) + try: + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: ClassVar[None] # type: ignore[assignment] + + @final + def __setitem__(self, key, value): + raise TypeError("Index does not support mutable operations") + + def __getitem__(self, key): + """ + Override numpy.ndarray's __getitem__ method to work as desired. + + This function adds lists and Series as valid boolean indexers + (ndarrays only supports ndarray with dtype=bool). + + If resulting ndim != 1, plain ndarray is returned instead of + corresponding `Index` subclass. + + """ + getitem = self._data.__getitem__ + + if is_integer(key) or is_float(key): + # GH#44051 exclude bool, which would return a 2d ndarray + key = com.cast_scalar_indexer(key, warn_float=True) + return getitem(key) + + if isinstance(key, slice): + # This case is separated from the conditional above to avoid + # pessimization com.is_bool_indexer and ndim checks. + result = getitem(key) + # Going through simple_new for performance. + return type(self)._simple_new(result, name=self._name) + + if com.is_bool_indexer(key): + # if we have list[bools, length=1e5] then doing this check+convert + # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__ + # time below from 3.8 ms to 496 µs + # if we already have ndarray[bool], the overhead is 1.4 µs or .25% + if is_extension_array_dtype(getattr(key, "dtype", None)): + key = key.to_numpy(dtype=bool, na_value=False) + else: + key = np.asarray(key, dtype=bool) + + result = getitem(key) + # Because we ruled out integer above, we always get an arraylike here + if result.ndim > 1: + deprecate_ndim_indexing(result) + if hasattr(result, "_ndarray"): + # i.e. NDArrayBackedExtensionArray + # Unpack to ndarray for MPL compat + # error: Item "ndarray[Any, Any]" of + # "Union[ExtensionArray, ndarray[Any, Any]]" + # has no attribute "_ndarray" + return result._ndarray # type: ignore[union-attr] + return result + + # NB: Using _constructor._simple_new would break if MultiIndex + # didn't override __getitem__ + return self._constructor._simple_new(result, name=self._name) + + def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._data[slobj] + return type(self)._simple_new(res, name=self._name) + + @final + def _can_hold_identifiers_and_holds_name(self, name) -> bool: + """ + Faster check for ``name in self`` when we know `name` is a Python + identifier (e.g. in NDFrame.__getattr__, which hits this to support + . key lookup). For indexes that can't hold identifiers (everything + but object & categorical) we just return False. + + https://github.com/pandas-dev/pandas/issues/19764 + """ + if self.is_object() or is_string_dtype(self.dtype) or self.is_categorical(): + return name in self + return False + + def append(self, other: Index | Sequence[Index]) -> Index: + """ + Append a collection of Index options together. + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + Index + """ + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat += list(other) + else: + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Sequence[Index]]"; expected "Index" + to_concat.append(other) # type: ignore[arg-type] + + for obj in to_concat: + if not isinstance(obj, Index): + raise TypeError("all inputs must be Index") + + names = {obj.name for obj in to_concat} + name = None if len(names) > 1 else self.name + + return self._concat(to_concat, name) + + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: + """ + Concatenate multiple Index objects. + """ + to_concat_vals = [x._values for x in to_concat] + + result = concat_compat(to_concat_vals) + + is_numeric = result.dtype.kind in ["i", "u", "f"] + if self._is_backward_compat_public_numeric_index and is_numeric: + return type(self)._simple_new(result, name=name) + + return Index._with_infer(result, name=name) + + @final + def putmask(self, mask, value) -> Index: + """ + Return a new Index of the values set with the mask. + + Returns + ------- + Index + + See Also + -------- + numpy.ndarray.putmask : Changes elements of an array + based on conditional and input values. + """ + mask, noop = validate_putmask(self._values, mask) + if noop: + return self.copy() + + if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): + # e.g. None -> np.nan, see also Block._standardize_fill_value + value = self._na_value + try: + converted = self._validate_fill_value(value) + except (LossySetitemError, ValueError, TypeError) as err: + if is_object_dtype(self): # pragma: no cover + raise err + + dtype = self._find_common_type_compat(value) + return self.astype(dtype).putmask(mask, value) + + values = self._values.copy() + + if isinstance(values, np.ndarray): + converted = setitem_datetimelike_compat(values, mask.sum(), converted) + np.putmask(values, mask, converted) + + else: + # Note: we use the original value here, not converted, as + # _validate_fill_value is not idempotent + values._putmask(mask, value) + + return self._shallow_copy(values) + + def equals(self, other: Any) -> bool: + """ + Determine if two Index object are equal. + + The things that are being compared are: + + * The elements inside the Index object. + * The order of the elements inside the Index object. + + Parameters + ---------- + other : Any + The other object to compare against. + + Returns + ------- + bool + True if "other" is an Index and it has the same elements and order + as the calling index; False otherwise. + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3]) + >>> idx1 + Int64Index([1, 2, 3], dtype='int64') + >>> idx1.equals(pd.Index([1, 2, 3])) + True + + The elements inside are compared + + >>> idx2 = pd.Index(["1", "2", "3"]) + >>> idx2 + Index(['1', '2', '3'], dtype='object') + + >>> idx1.equals(idx2) + False + + The order is compared + + >>> ascending_idx = pd.Index([1, 2, 3]) + >>> ascending_idx + Int64Index([1, 2, 3], dtype='int64') + >>> descending_idx = pd.Index([3, 2, 1]) + >>> descending_idx + Int64Index([3, 2, 1], dtype='int64') + >>> ascending_idx.equals(descending_idx) + False + + The dtype is *not* compared + + >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') + >>> int64_idx + Int64Index([1, 2, 3], dtype='int64') + >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') + >>> uint64_idx + UInt64Index([1, 2, 3], dtype='uint64') + >>> int64_idx.equals(uint64_idx) + True + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): + # if other is not object, use other's logic for coercion + return other.equals(self) + + if isinstance(other, ABCMultiIndex): + # d-level MultiIndex can equal d-tuple Index + return other.equals(self) + + if isinstance(self._values, ExtensionArray): + # Dispatch to the ExtensionArray's .equals method. + if not isinstance(other, type(self)): + return False + + earr = cast(ExtensionArray, self._data) + return earr.equals(other._data) + + if is_extension_array_dtype(other.dtype): + # All EA-backed Index subclasses override equals + return other.equals(self) + + return array_equivalent(self._values, other._values) + + @final + def identical(self, other) -> bool: + """ + Similar to equals, but checks that object attributes and types are also equal. + + Returns + ------- + bool + If two Index objects have equal elements and same type True, + otherwise False. + """ + return ( + self.equals(other) + and all( + getattr(self, c, None) == getattr(other, c, None) + for c in self._comparables + ) + and type(self) == type(other) + ) + + @final + def asof(self, label): + """ + Return the label from the index, or, if not present, the previous one. + + Assuming that the index is sorted, return the passed index label if it + is in the index, or return the previous index label if the passed one + is not in the index. + + Parameters + ---------- + label : object + The label up to which the method returns the latest index label. + + Returns + ------- + object + The passed label if it is in the index. The previous label if the + passed label is not in the sorted index or `NaN` if there is no + such label. + + See Also + -------- + Series.asof : Return the latest value in a Series up to the + passed index. + merge_asof : Perform an asof merge (similar to left join but it + matches on nearest key rather than equal key). + Index.get_loc : An `asof` is a thin wrapper around `get_loc` + with method='pad'. + + Examples + -------- + `Index.asof` returns the latest index label up to the passed label. + + >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) + >>> idx.asof('2014-01-01') + '2013-12-31' + + If the label is in the index, the method returns the passed label. + + >>> idx.asof('2014-01-02') + '2014-01-02' + + If all of the labels in the index are later than the passed label, + NaN is returned. + + >>> idx.asof('1999-01-02') + nan + + If the index is not sorted, an error is raised. + + >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', + ... '2014-01-03']) + >>> idx_not_sorted.asof('2013-12-31') + Traceback (most recent call last): + ValueError: index must be monotonic increasing or decreasing + """ + self._searchsorted_monotonic(label) # validate sortedness + try: + loc = self.get_loc(label) + except (KeyError, TypeError): + # KeyError -> No exact match, try for padded + # TypeError -> passed e.g. non-hashable, fall through to get + # the tested exception message + indexer = self.get_indexer([label], method="pad") + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError("asof requires scalar valued input") + loc = indexer.item() + if loc == -1: + return self._na_value + else: + if isinstance(loc, slice): + loc = loc.indices(len(self))[-1] + + return self[loc] + + def asof_locs( + self, where: Index, mask: npt.NDArray[np.bool_] + ) -> npt.NDArray[np.intp]: + """ + Return the locations (indices) of labels in the index. + + As in the `asof` function, if the label (a particular entry in + `where`) is not in the index, the latest index label up to the + passed label is chosen and its index returned. + + If all of the labels in the index are later than a label in `where`, + -1 is returned. + + `mask` is used to ignore NA values in the index during calculation. + + Parameters + ---------- + where : Index + An Index consisting of an array of timestamps. + mask : np.ndarray[bool] + Array of booleans denoting where values in the original + data are not NA. + + Returns + ------- + np.ndarray[np.intp] + An array of locations (indices) of the labels from the Index + which correspond to the return values of the `asof` function + for every element in `where`. + """ + # error: No overload variant of "searchsorted" of "ndarray" matches argument + # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" + # TODO: will be fixed when ExtensionArray.searchsorted() is fixed + locs = self._values[mask].searchsorted( + where._values, side="right" # type: ignore[call-overload] + ) + locs = np.where(locs > 0, locs - 1, 0) + + result = np.arange(len(self), dtype=np.intp)[mask].take(locs) + + first_value = self._values[mask.argmax()] + result[(locs == 0) & (where._values < first_value)] = -1 + + return result + + def sort_values( + self, + return_indexer: bool = False, + ascending: bool = True, + na_position: str_t = "last", + key: Callable | None = None, + ): + """ + Return a sorted copy of the index. + + Return a sorted copy of the index, and optionally return the indices + that sorted the index itself. + + Parameters + ---------- + return_indexer : bool, default False + Should the indices that would sort the index be returned. + ascending : bool, default True + Should the index values be sorted in an ascending order. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + + .. versionadded:: 1.2.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. + + .. versionadded:: 1.1.0 + + Returns + ------- + sorted_index : pandas.Index + Sorted copy of the index. + indexer : numpy.ndarray, optional + The indices that the index itself was sorted by. + + See Also + -------- + Series.sort_values : Sort values of a Series. + DataFrame.sort_values : Sort values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([10, 100, 1, 1000]) + >>> idx + Int64Index([10, 100, 1, 1000], dtype='int64') + + Sort values in ascending order (default behavior). + + >>> idx.sort_values() + Int64Index([1, 10, 100, 1000], dtype='int64') + + Sort values in descending order, and also get the indices `idx` was + sorted by. + + >>> idx.sort_values(ascending=False, return_indexer=True) + (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + """ + idx = ensure_key_mapped(self, key) + + # GH 35584. Sort missing values according to na_position kwarg + # ignore na_position for MultiIndex + if not isinstance(self, ABCMultiIndex): + _as = nargsort( + items=idx, ascending=ascending, na_position=na_position, key=key + ) + else: + _as = idx.argsort() + if not ascending: + _as = _as[::-1] + + sorted_index = self.take(_as) + + if return_indexer: + return sorted_index, _as + else: + return sorted_index + + @final + def sort(self, *args, **kwargs): + """ + Use sort_values instead. + """ + raise TypeError("cannot sort an Index object in-place, use sort_values instead") + + def shift(self, periods=1, freq=None): + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or str, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.Index + Shifted index. + + See Also + -------- + Series.shift : Shift values of Series. + + Notes + ----- + This method is only implemented for datetime-like index classes, + i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. + + Examples + -------- + Put the first 5 month starts of 2011 into an index. + + >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') + >>> month_starts + DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', + '2011-05-01'], + dtype='datetime64[ns]', freq='MS') + + Shift the index by 10 days. + + >>> month_starts.shift(10, freq='D') + DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', + '2011-05-11'], + dtype='datetime64[ns]', freq=None) + + The default value of `freq` is the `freq` attribute of the index, + which is 'MS' (month start) in this example. + + >>> month_starts.shift(10) + DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', + '2012-03-01'], + dtype='datetime64[ns]', freq='MS') + """ + raise NotImplementedError( + f"This method is only implemented for DatetimeIndex, PeriodIndex and " + f"TimedeltaIndex; Got type {type(self).__name__}" + ) + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + """ + Return the integer indices that would sort the index. + + Parameters + ---------- + *args + Passed to `numpy.ndarray.argsort`. + **kwargs + Passed to `numpy.ndarray.argsort`. + + Returns + ------- + np.ndarray[np.intp] + Integer indices that would sort the index if used as + an indexer. + + See Also + -------- + numpy.argsort : Similar method for NumPy arrays. + Index.sort_values : Return sorted copy of Index. + + Examples + -------- + >>> idx = pd.Index(['b', 'a', 'd', 'c']) + >>> idx + Index(['b', 'a', 'd', 'c'], dtype='object') + + >>> order = idx.argsort() + >>> order + array([1, 0, 3, 2]) + + >>> idx[order] + Index(['a', 'b', 'c', 'd'], dtype='object') + """ + # This works for either ndarray or EA, is overridden + # by RangeIndex, MultIIndex + return self._data.argsort(*args, **kwargs) + + @final + def get_value(self, series: Series, key): + """ + Fast lookup of value from 1-dimensional ndarray. + + Only use this if you know what you're doing. + + Returns + ------- + scalar or Series + """ + warnings.warn( + "get_value is deprecated and will be removed in a future version. " + "Use Series[key] instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + self._check_indexing_error(key) + + try: + # GH 20882, 21257 + # First try to convert the key to a location + # If that fails, raise a KeyError if an integer + # index, otherwise, see if key is an integer, and + # try that + loc = self.get_loc(key) + except KeyError: + if not self._should_fallback_to_positional: + raise + elif is_integer(key): + # If the Index cannot hold integer, then this is unambiguously + # a locational lookup. + loc = key + else: + raise + + return self._get_values_for_loc(series, loc, key) + + def _check_indexing_error(self, key): + if not is_scalar(key): + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + """ + Should an integer key be treated as positional? + """ + return not self.holds_integer() + + def _get_values_for_loc(self, series: Series, loc, key): + """ + Do a positional lookup on the given Series, returning either a scalar + or a Series. + + Assumes that `series.index is self` + + key is included for MultiIndex compat. + """ + if is_integer(loc): + return series._values[loc] + + return series.iloc[loc] + + @final + def set_value(self, arr, key, value) -> None: + """ + Fast lookup of value from 1-dimensional ndarray. + + .. deprecated:: 1.0 + + Notes + ----- + Only use this if you know what you're doing. + """ + warnings.warn( + ( + "The 'set_value' method is deprecated, and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + loc = self._engine.get_loc(key) + if not can_hold_element(arr, value): + raise ValueError + arr[loc] = value + + _index_shared_docs[ + "get_indexer_non_unique" + ] = """ + Compute indexer and mask for new index given the current index. + + The indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + + Returns + ------- + indexer : np.ndarray[np.intp] + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : np.ndarray[np.intp] + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array. + """ + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique( + self, target + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + target = ensure_index(target) + target = self._maybe_cast_listlike_indexer(target) + + if not self._should_compare(target) and not is_interval_dtype(self.dtype): + # IntervalIndex get special treatment bc numeric scalars can be + # matched to Interval scalars + return self._get_indexer_non_comparable(target, method=None, unique=False) + + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer_non_unique(ptarget) + + if not is_dtype_equal(self.dtype, target.dtype): + # TODO: if object, could use infer_dtype to preempt costly + # conversion if still non-comparable? + dtype = self._find_common_type_compat(target) + + this = self.astype(dtype, copy=False) + that = target.astype(dtype, copy=False) + return this.get_indexer_non_unique(that) + + # Note: _maybe_promote ensures we never get here with MultiIndex + # self and non-Multi target + tgt_values = target._get_engine_target() + if self._is_multi and target._is_multi: + engine = self._engine + # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has + # no attribute "_extract_level_codes" + tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr] + + indexer, missing = self._engine.get_indexer_non_unique(tgt_values) + return ensure_platform_int(indexer), ensure_platform_int(missing) + + @final + def get_indexer_for(self, target) -> npt.NDArray[np.intp]: + """ + Guaranteed return of an indexer even when non-unique. + + This dispatches to get_indexer or get_indexer_non_unique + as appropriate. + + Returns + ------- + np.ndarray[np.intp] + List of indices. + + Examples + -------- + >>> idx = pd.Index([np.nan, 'var1', np.nan]) + >>> idx.get_indexer_for([np.nan]) + array([0, 2]) + """ + if self._index_as_unique: + return self.get_indexer(target) + indexer, _ = self.get_indexer_non_unique(target) + return indexer + + def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]: + """ + Analogue to get_indexer that raises if any elements are missing. + """ + keyarr = key + if not isinstance(keyarr, Index): + keyarr = com.asarray_tuplesafe(keyarr) + + if self._index_as_unique: + indexer = self.get_indexer_for(keyarr) + keyarr = self.reindex(keyarr)[0] + else: + keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) + + self._raise_if_missing(keyarr, indexer, axis_name) + + keyarr = self.take(indexer) + if isinstance(key, Index): + # GH 42790 - Preserve name from an Index + keyarr.name = key.name + if keyarr.dtype.kind in ["m", "M"]: + # DTI/TDI.take can infer a freq in some cases when we dont want one + if isinstance(key, list) or ( + isinstance(key, type(self)) + # "Index" has no attribute "freq" + and key.freq is None # type: ignore[attr-defined] + ): + keyarr = keyarr._with_freq(None) + + return keyarr, indexer + + def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: + """ + Check that indexer can be used to return a result. + + e.g. at least one element was found, + unless the list of keys was actually empty. + + Parameters + ---------- + key : list-like + Targeted labels (only used to show correct error message). + indexer: array-like of booleans + Indices corresponding to the key, + (with -1 indicating not found). + axis_name : str + + Raises + ------ + KeyError + If at least one key was requested but none was found. + """ + if len(key) == 0: + return + + # Count missing values + missing_mask = indexer < 0 + nmissing = missing_mask.sum() + + if nmissing: + + # TODO: remove special-case; this is just to keep exception + # message tests from raising while debugging + use_interval_msg = is_interval_dtype(self.dtype) or ( + is_categorical_dtype(self.dtype) + # "Index" has no attribute "categories" [attr-defined] + and is_interval_dtype( + self.categories.dtype # type: ignore[attr-defined] + ) + ) + + if nmissing == len(indexer): + if use_interval_msg: + key = list(key) + raise KeyError(f"None of [{key}] are in the [{axis_name}]") + + not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) + raise KeyError(f"{not_found} not in index") + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[True] = ... + ) -> npt.NDArray[np.intp]: + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: Literal[False] + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + ... + + @overload + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + ... + + @final + def _get_indexer_non_comparable( + self, target: Index, method, unique: bool = True + ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + Called from get_indexer or get_indexer_non_unique when the target + is of a non-comparable dtype. + + For get_indexer lookups with method=None, get_indexer is an _equality_ + check, so non-comparable dtypes mean we will always have no matches. + + For get_indexer lookups with a method, get_indexer is an _inequality_ + check, so non-comparable dtypes mean we will always raise TypeError. + + Parameters + ---------- + target : Index + method : str or None + unique : bool, default True + * True if called from get_indexer. + * False if called from get_indexer_non_unique. + + Raises + ------ + TypeError + If doing an inequality check, i.e. method is not None. + """ + if method is not None: + other = unpack_nested_dtype(target) + raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") + + no_matches = -1 * np.ones(target.shape, dtype=np.intp) + if unique: + # This is for get_indexer + return no_matches + else: + # This is for get_indexer_non_unique + missing = np.arange(len(target), dtype=np.intp) + return no_matches, missing + + @property + def _index_as_unique(self) -> bool: + """ + Whether we should treat this as unique for the sake of + get_indexer vs get_indexer_non_unique. + + For IntervalIndex compat. + """ + return self.is_unique + + _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" + + @final + def _maybe_promote(self, other: Index) -> tuple[Index, Index]: + """ + When dealing with an object-dtype Index and a non-object Index, see + if we can upcast the object-dtype one to improve performance. + """ + + if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): + if ( + self.tz is not None + and other.tz is not None + and not tz_compare(self.tz, other.tz) + ): + # standardize on UTC + return self.tz_convert("UTC"), other.tz_convert("UTC") + + elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): + try: + return type(other)(self), other + except OutOfBoundsDatetime: + return self, other + elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): + # TODO: we dont have tests that get here + return type(other)(self), other + + elif self.dtype.kind == "u" and other.dtype.kind == "i": + # GH#41873 + if other.min() >= 0: + # lookup min as it may be cached + # TODO: may need itemsize check if we have non-64-bit Indexes + return self, other.astype(self.dtype) + + elif self._is_multi and not other._is_multi: + try: + # "Type[Index]" has no attribute "from_tuples" + other = type(self).from_tuples(other) # type: ignore[attr-defined] + except (TypeError, ValueError): + # let's instead try with a straight Index + self = Index(self._values) + + if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): + # Reverse op so we dont need to re-implement on the subclasses + other, self = other._maybe_promote(self) + + return self, other + + @final + def _find_common_type_compat(self, target) -> DtypeObj: + """ + Implementation of find_common_type that adjusts for Index-specific + special cases. + """ + if is_valid_na_for_dtype(target, self.dtype): + # e.g. setting NA value into IntervalArray[int64] + dtype = ensure_dtype_can_hold_na(self.dtype) + if is_dtype_equal(self.dtype, dtype): + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas" + ) + return dtype + + target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) + + # special case: if one dtype is uint64 and the other a signed int, return object + # See https://github.com/pandas-dev/pandas/issues/26778 for discussion + # Now it's: + # * float | [u]int -> float + # * uint64 | signed int -> object + # We may change union(float | [u]int) to go to object. + if self.dtype == "uint64" or target_dtype == "uint64": + if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype( + target_dtype + ): + return _dtype_obj + + dtype = find_common_type([self.dtype, target_dtype]) + dtype = common_dtype_categorical_compat([self, target], dtype) + return dtype + + @final + def _should_compare(self, other: Index) -> bool: + """ + Check if `self == other` can ever have non-False entries. + """ + + if (other.is_boolean() and self.is_numeric()) or ( + self.is_boolean() and other.is_numeric() + ): + # GH#16877 Treat boolean labels passed to a numeric index as not + # found. Without this fix False and True would be treated as 0 and 1 + # respectively. + return False + + other = unpack_nested_dtype(other) + dtype = other.dtype + return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + if self.dtype.kind == "b": + return dtype.kind == "b" + elif is_numeric_dtype(self.dtype): + return is_numeric_dtype(dtype) + return True + + @final + def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: + """ + Group the index labels by a given array of values. + + Parameters + ---------- + values : array + Values used to determine the groups. + + Returns + ------- + dict + {group name -> group labels} + """ + # TODO: if we are a MultiIndex, we can do better + # that converting to tuples + if isinstance(values, ABCMultiIndex): + values = values._values + values = Categorical(values) + result = values._reverse_indexer() + + # map to the label + result = {k: self.take(v) for k, v in result.items()} + + return PrettyDict(result) + + def map(self, mapper, na_action=None): + """ + Map values using an input mapping or function. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping correspondence. + + Returns + ------- + applied : Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + """ + from pandas.core.indexes.multi import MultiIndex + + new_values = self._map_values(mapper, na_action=na_action) + + # we can return a MultiIndex + if new_values.size and isinstance(new_values[0], tuple): + if isinstance(self, MultiIndex): + names = self.names + elif self.name: + names = [self.name] * len(new_values[0]) + else: + names = None + return MultiIndex.from_tuples(new_values, names=names) + + dtype = None + if not new_values.size: + # empty + dtype = self.dtype + + # e.g. if we are floating and new_values is all ints, then we + # don't want to cast back to floating. But if we are UInt64 + # and new_values is all ints, we want to try. + same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type + if same_dtype: + new_values = maybe_cast_pointwise_result( + new_values, self.dtype, same_dtype=same_dtype + ) + + if self._is_backward_compat_public_numeric_index and is_numeric_dtype( + new_values.dtype + ): + return self._constructor( + new_values, dtype=dtype, copy=False, name=self.name + ) + + return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) + + # TODO: De-duplicate with map, xref GH#32349 + @final + def _transform_index(self, func, *, level=None) -> Index: + """ + Apply function to all values found in index. + + This includes transforming multiindex entries separately. + Only apply function to one level of the MultiIndex if level is specified. + """ + if isinstance(self, ABCMultiIndex): + if level is not None: + # Caller is responsible for ensuring level is positional. + items = [ + tuple(func(y) if i == level else y for i, y in enumerate(x)) + for x in self + ] + else: + items = [tuple(func(y) for y in x) for x in self] + return type(self).from_tuples(items, names=self.names) + else: + items = [func(x) for x in self] + return Index(items, name=self.name, tupleize_cols=False) + + def isin(self, values, level=None) -> npt.NDArray[np.bool_]: + """ + Return a boolean array where the index values are in `values`. + + Compute boolean array of whether each index value is found in the + passed set of values. The length of the returned boolean array matches + the length of the index. + + Parameters + ---------- + values : set or list-like + Sought values. + level : str or int, optional + Name or position of the index level to use (if the index is a + `MultiIndex`). + + Returns + ------- + np.ndarray[bool] + NumPy array of boolean values. + + See Also + -------- + Series.isin : Same for Series. + DataFrame.isin : Same method for DataFrames. + + Notes + ----- + In the case of `MultiIndex` you must either specify `values` as a + list-like object containing tuples that are the same length as the + number of levels, or specify `level`. Otherwise it will raise a + ``ValueError``. + + If `level` is specified: + + - if it is the name of one *and only one* index level, use that level; + - otherwise it should be a number indicating level position. + + Examples + -------- + >>> idx = pd.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + + >>> idx.isin([1, 4]) + array([ True, False, False]) + + >>> midx = pd.MultiIndex.from_arrays([[1,2,3], + ... ['red', 'blue', 'green']], + ... names=('number', 'color')) + >>> midx + MultiIndex([(1, 'red'), + (2, 'blue'), + (3, 'green')], + names=['number', 'color']) + + Check whether the strings in the 'color' level of the MultiIndex + are in a list of colors. + + >>> midx.isin(['red', 'orange', 'yellow'], level='color') + array([ True, False, False]) + + To check across the levels of a MultiIndex, pass a list of tuples: + + >>> midx.isin([(1, 'red'), (3, 'red')]) + array([ True, False, False]) + + For a DatetimeIndex, string values in `values` are converted to + Timestamps. + + >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] + >>> dti = pd.to_datetime(dates) + >>> dti + DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], + dtype='datetime64[ns]', freq=None) + + >>> dti.isin(['2000-03-11']) + array([ True, False, False]) + """ + if level is not None: + self._validate_index_level(level) + return algos.isin(self._values, values) + + def _get_string_slice(self, key: str_t): + # this is for partial string indexing, + # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex + raise NotImplementedError + + def slice_indexer( + self, + start: Hashable | None = None, + end: Hashable | None = None, + step: int | None = None, + kind=no_default, + ) -> slice: + """ + Compute the slice indexer for input labels and step. + + Index needs to be ordered and unique. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, default None + kind : str, default None + + .. deprecated:: 1.4.0 + + Returns + ------- + indexer : slice + + Raises + ------ + KeyError : If key does not exist, or key is not unique and index is + not ordered. + + Notes + ----- + This function assumes that the data is sorted, so use at your own peril + + Examples + -------- + This is a method on all index types. For example you can do: + + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_indexer(start='b', end='c') + slice(1, 3, None) + + >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) + >>> idx.slice_indexer(start='b', end=('c', 'g')) + slice(1, 3, None) + """ + self._deprecated_arg(kind, "kind", "slice_indexer") + + start_slice, end_slice = self.slice_locs(start, end, step=step) + + # return a slice + if not is_scalar(start_slice): + raise AssertionError("Start slice bound is non-scalar") + if not is_scalar(end_slice): + raise AssertionError("End slice bound is non-scalar") + + return slice(start_slice, end_slice, step) + + def _maybe_cast_indexer(self, key): + """ + If we have a float key and are not a floating index, then try to cast + to an int if equivalent. + """ + return key + + def _maybe_cast_listlike_indexer(self, target) -> Index: + """ + Analogue to maybe_cast_indexer for get_indexer instead of get_loc. + """ + return ensure_index(target) + + @final + def _validate_indexer(self, form: str_t, key, kind: str_t): + """ + If we are positional indexer, validate that we have appropriate + typed bounds must be an integer. + """ + assert kind in ["getitem", "iloc"] + + if key is not None and not is_integer(key): + raise self._invalid_indexer(form, key) + + def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.3.0 + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + assert kind in ["loc", "getitem", None, no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them, if index does not contain label + if (is_float(label) or is_integer(label)) and label not in self: + raise self._invalid_indexer("slice", label) + + return label + + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): + if self.is_monotonic_increasing: + return self.searchsorted(label, side=side) + elif self.is_monotonic_decreasing: + # np.searchsorted expects ascending sort order, have to reverse + # everything for it to work (element ordering, search side and + # resulting value). + pos = self[::-1].searchsorted( + label, side="right" if side == "left" else "left" + ) + return len(self) - pos + + raise ValueError("index must be monotonic increasing or decreasing") + + def get_slice_bound( + self, label, side: Literal["left", "right"], kind=no_default + ) -> int: + """ + Calculate slice bound that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if ``side=='right'``) position + of given label. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.4.0 + + Returns + ------- + int + Index of label. + """ + assert kind in ["loc", "getitem", None, no_default] + self._deprecated_arg(kind, "kind", "get_slice_bound") + + if side not in ("left", "right"): + raise ValueError( + "Invalid value for side kwarg, must be either " + f"'left' or 'right': {side}" + ) + + original_label = label + + # For datetime indices label may be a string that has to be converted + # to datetime boundary according to its resolution. + label = self._maybe_cast_slice_bound(label, side) + + # we need to look up the label + try: + slc = self.get_loc(label) + except KeyError as err: + try: + return self._searchsorted_monotonic(label, side) + except ValueError: + # raise the original KeyError + raise err + + if isinstance(slc, np.ndarray): + # get_loc may return a boolean array, which + # is OK as long as they are representable by a slice. + assert is_bool_dtype(slc.dtype) + slc = lib.maybe_booleans_to_slice(slc.view("u1")) + if isinstance(slc, np.ndarray): + raise KeyError( + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" + ) + + if isinstance(slc, slice): + if side == "left": + return slc.start + else: + return slc.stop + else: + if side == "right": + return slc + 1 + else: + return slc + + def slice_locs( + self, start=None, end=None, step=None, kind=no_default + ) -> tuple[int, int]: + """ + Compute slice locations for input labels. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, defaults None + If None, defaults to 1. + kind : {'loc', 'getitem'} or None + + .. deprecated:: 1.4.0 + + Returns + ------- + start, end : int + + See Also + -------- + Index.get_loc : Get location for a single label. + + Notes + ----- + This method only works if the index is monotonic or unique. + + Examples + -------- + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_locs(start='b', end='c') + (1, 3) + """ + self._deprecated_arg(kind, "kind", "slice_locs") + inc = step is None or step >= 0 + + if not inc: + # If it's a reverse slice, temporarily swap bounds. + start, end = end, start + + # GH 16785: If start and end happen to be date strings with UTC offsets + # attempt to parse and check that the offsets are the same + if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the same UTC offset") + + start_slice = None + if start is not None: + start_slice = self.get_slice_bound(start, "left") + if start_slice is None: + start_slice = 0 + + end_slice = None + if end is not None: + end_slice = self.get_slice_bound(end, "right") + if end_slice is None: + end_slice = len(self) + + if not inc: + # Bounds at this moment are swapped, swap them back and shift by 1. + # + # slice_locs('B', 'A', step=-1): s='B', e='A' + # + # s='A' e='B' + # AFTER SWAP: | | + # v ------------------> V + # ----------------------------------- + # | | |A|A|A|A| | | | | |B|B| | | | | + # ----------------------------------- + # ^ <------------------ ^ + # SHOULD BE: | | + # end=s-1 start=e-1 + # + end_slice, start_slice = start_slice - 1, end_slice - 1 + + # i == -1 triggers ``len(self) + i`` selection that points to the + # last element, not before-the-first one, subtracting len(self) + # compensates that. + if end_slice == -1: + end_slice -= len(self) + if start_slice == -1: + start_slice -= len(self) + + return start_slice, end_slice + + def delete(self: _IndexT, loc) -> _IndexT: + """ + Make new Index with passed location(-s) deleted. + + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete(1) + Index(['a', 'c'], dtype='object') + + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete([0, 2]) + Index(['b'], dtype='object') + """ + values = self._values + res_values: ArrayLike + if isinstance(values, np.ndarray): + # TODO(__array_function__): special casing will be unnecessary + res_values = np.delete(values, loc) + else: + res_values = values.delete(loc) + + # _constructor so RangeIndex->Int64Index + return self._constructor._simple_new(res_values, name=self.name) + + def insert(self, loc: int, item) -> Index: + """ + Make new Index inserting new item at location. + + Follows Python numpy.insert semantics for negative values. + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + """ + item = lib.item_from_zerodim(item) + if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object: + item = self._na_value + + arr = self._values + + try: + if isinstance(arr, ExtensionArray): + res_values = arr.insert(loc, item) + return type(self)._simple_new(res_values, name=self.name) + else: + item = self._validate_fill_value(item) + except (TypeError, ValueError, LossySetitemError): + # e.g. trying to insert an integer into a DatetimeIndex + # We cannot keep the same dtype, so cast to the (often object) + # minimal shared dtype before doing the insert. + dtype = self._find_common_type_compat(item) + return self.astype(dtype).insert(loc, item) + + if arr.dtype != object or not isinstance( + item, (tuple, np.datetime64, np.timedelta64) + ): + # with object-dtype we need to worry about numpy incorrectly casting + # dt64/td64 to integer, also about treating tuples as sequences + # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 + casted = arr.dtype.type(item) + new_values = np.insert(arr, loc, casted) + + else: + # error: No overload variant of "insert" matches argument types + # "ndarray[Any, Any]", "int", "None" + new_values = np.insert(arr, loc, None) # type: ignore[call-overload] + loc = loc if loc >= 0 else loc - 1 + new_values[loc] = item + + if self._typ == "numericindex": + # Use self._constructor instead of Index to retain NumericIndex GH#43921 + # TODO(2.0) can use Index instead of self._constructor + return self._constructor._with_infer(new_values, name=self.name) + else: + return Index._with_infer(new_values, name=self.name) + + def drop( + self, + labels: Index | np.ndarray | Iterable[Hashable], + errors: IgnoreRaise = "raise", + ) -> Index: + """ + Make new Index with passed list of labels deleted. + + Parameters + ---------- + labels : array-like or scalar + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + + Returns + ------- + dropped : Index + Will be same type as self, except for RangeIndex. + + Raises + ------ + KeyError + If not all of the labels are found in the selected axis + """ + if not isinstance(labels, Index): + # avoid materializing e.g. RangeIndex + arr_dtype = "object" if self.dtype == "object" else None + labels = com.index_labels_to_array(labels, dtype=arr_dtype) + + indexer = self.get_indexer_for(labels) + mask = indexer == -1 + if mask.any(): + if errors != "ignore": + raise KeyError(f"{list(labels[mask])} not found in axis") + indexer = indexer[~mask] + return self.delete(indexer) + + # -------------------------------------------------------------------- + # Generated Arithmetic, Comparison, and Unary Methods + + def _cmp_method(self, other, op): + """ + Wrapper used to dispatch comparison operations. + """ + if self.is_(other): + # fastpath + if op in {operator.eq, operator.le, operator.ge}: + arr = np.ones(len(self), dtype=bool) + if self._can_hold_na and not isinstance(self, ABCMultiIndex): + # TODO: should set MultiIndex._can_hold_na = False? + arr[self.isna()] = False + return arr + elif op is operator.ne: + arr = np.zeros(len(self), dtype=bool) + if self._can_hold_na and not isinstance(self, ABCMultiIndex): + arr[self.isna()] = True + return arr + + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( + self + ) != len(other): + raise ValueError("Lengths must match to compare") + + if not isinstance(other, ABCMultiIndex): + other = extract_array(other, extract_numpy=True) + else: + other = np.asarray(other) + + if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): + # e.g. PeriodArray, Categorical + with np.errstate(all="ignore"): + result = op(self._values, other) + + elif isinstance(self._values, ExtensionArray): + result = op(self._values, other) + + elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): + # don't pass MultiIndex + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) + + else: + with np.errstate(all="ignore"): + result = ops.comparison_op(self._values, other, op) + + return result + + def _construct_result(self, result, name): + if isinstance(result, tuple): + return ( + Index._with_infer(result[0], name=name), + Index._with_infer(result[1], name=name), + ) + return Index._with_infer(result, name=name) + + def _arith_method(self, other, op): + if ( + isinstance(other, Index) + and is_object_dtype(other.dtype) + and type(other) is not Index + ): + # We return NotImplemented for object-dtype index *subclasses* so they have + # a chance to implement ops before we unwrap them. + # See https://github.com/pandas-dev/pandas/issues/31109 + return NotImplemented + + return super()._arith_method(other, op) + + @final + def _unary_method(self, op): + result = op(self._values) + return Index(result, name=self.name) + + def __abs__(self) -> Index: + return self._unary_method(operator.abs) + + def __neg__(self) -> Index: + return self._unary_method(operator.neg) + + def __pos__(self) -> Index: + return self._unary_method(operator.pos) + + def __invert__(self) -> Index: + # GH#8875 + return self._unary_method(operator.inv) + + # -------------------------------------------------------------------- + # Reductions + + def any(self, *args, **kwargs): + """ + Return whether any element is Truthy. + + Parameters + ---------- + *args + Required for compatibility with numpy. + **kwargs + Required for compatibility with numpy. + + Returns + ------- + any : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. + + See Also + -------- + Index.all : Return whether all elements are True. + Series.all : Return whether all elements are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + >>> index = pd.Index([0, 1, 2]) + >>> index.any() + True + + >>> index = pd.Index([0, 0, 0]) + >>> index.any() + False + """ + nv.validate_any(args, kwargs) + self._maybe_disable_logical_methods("any") + # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected + # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, + # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], + # _SupportsArray]" + return np.any(self.values) # type: ignore[arg-type] + + def all(self, *args, **kwargs): + """ + Return whether all elements are Truthy. + + Parameters + ---------- + *args + Required for compatibility with numpy. + **kwargs + Required for compatibility with numpy. + + Returns + ------- + all : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. + + See Also + -------- + Index.any : Return whether any element in an Index is True. + Series.any : Return whether any element in a Series is True. + Series.all : Return whether all elements in a Series are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + True, because nonzero integers are considered True. + + >>> pd.Index([1, 2, 3]).all() + True + + False, because ``0`` is considered False. + + >>> pd.Index([0, 1, 2]).all() + False + """ + nv.validate_all(args, kwargs) + self._maybe_disable_logical_methods("all") + # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected + # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, + # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], + # _SupportsArray]" + return np.all(self.values) # type: ignore[arg-type] + + @final + def _maybe_disable_logical_methods(self, opname: str_t) -> None: + """ + raise if this Index subclass does not support any or all. + """ + if ( + isinstance(self, ABCMultiIndex) + or needs_i8_conversion(self.dtype) + or is_interval_dtype(self.dtype) + or is_categorical_dtype(self.dtype) + or is_float_dtype(self.dtype) + ): + # This call will raise + make_invalid_op(opname)(self) + + @Appender(IndexOpsMixin.argmin.__doc__) + def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: + nv.validate_argmin(args, kwargs) + nv.validate_minmax_axis(axis) + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return -1 + return super().argmin(skipna=skipna) + + @Appender(IndexOpsMixin.argmax.__doc__) + def argmax(self, axis=None, skipna=True, *args, **kwargs) -> int: + nv.validate_argmax(args, kwargs) + nv.validate_minmax_axis(axis) + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return -1 + return super().argmax(skipna=skipna) + + @doc(IndexOpsMixin.min) + def min(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + if len(self) and self.is_monotonic_increasing: + # quick check + first = self[0] + if not isna(first): + return first + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return self._na_value + + if not self._is_multi and not isinstance(self._values, np.ndarray): + # "ExtensionArray" has no attribute "min" + return self._values.min(skipna=skipna) # type: ignore[attr-defined] + + return super().min(skipna=skipna) + + @doc(IndexOpsMixin.max) + def max(self, axis=None, skipna=True, *args, **kwargs): + nv.validate_max(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + if len(self) and self.is_monotonic_increasing: + # quick check + last = self[-1] + if not isna(last): + return last + + if not self._is_multi and self.hasnans: + # Take advantage of cache + mask = self._isnan + if not skipna or mask.all(): + return self._na_value + + if not self._is_multi and not isinstance(self._values, np.ndarray): + # "ExtensionArray" has no attribute "max" + return self._values.max(skipna=skipna) # type: ignore[attr-defined] + + return super().max(skipna=skipna) + + # -------------------------------------------------------------------- + + @final + @property + def shape(self) -> Shape: + """ + Return a tuple of the shape of the underlying data. + """ + # See GH#27775, GH#27384 for history/reasoning in how this is defined. + return (len(self),) + + @final + def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None: + """ + Issue a FutureWarning if the arg/kwarg is not no_default. + """ + if value is not no_default: + warnings.warn( + f"'{name}' argument in {methodname} is deprecated " + "and will be removed in a future version. Do not pass it.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + +def ensure_index_from_sequences(sequences, names=None) -> Index: + """ + Construct an index from sequences of data. + + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) + MultiIndex([('a', 'a'), + ('a', 'b')], + names=['L1', 'L2']) + + See Also + -------- + ensure_index + """ + from pandas.core.indexes.multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index._with_infer(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + +def ensure_index(index_like: Axes, copy: bool = False) -> Index: + """ + Ensure that we have an index from some index-like object. + + Parameters + ---------- + index_like : sequence + An Index or other sequence + copy : bool, default False + + Returns + ------- + index : Index or MultiIndex + + See Also + -------- + ensure_index_from_sequences + + Examples + -------- + >>> ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex([('a', 'b'), + ('a', 'c')], + ) + """ + if isinstance(index_like, Index): + if copy: + index_like = index_like.copy() + return index_like + + if isinstance(index_like, ABCSeries): + name = index_like.name + return Index._with_infer(index_like, name=name, copy=copy) + + if is_iterator(index_like): + index_like = list(index_like) + + if isinstance(index_like, list): + if type(index_like) is not list: + # must check for exactly list here because of strict type + # check in clean_index_list + index_like = list(index_like) + + if len(index_like) and lib.is_all_arraylike(index_like): + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_arrays(index_like) + else: + return Index._with_infer(index_like, copy=copy, tupleize_cols=False) + else: + return Index._with_infer(index_like, copy=copy) + + +def ensure_has_len(seq): + """ + If seq is an iterator, put its values into a list. + """ + try: + len(seq) + except TypeError: + return list(seq) + else: + return seq + + +def trim_front(strings: list[str]) -> list[str]: + """ + Trims zeros and decimal points. + + Examples + -------- + >>> trim_front([" a", " b"]) + ['a', 'b'] + + >>> trim_front([" a", " "]) + ['a', ''] + """ + if not strings: + return strings + while all(strings) and all(x[0] == " " for x in strings): + strings = [x[1:] for x in strings] + return strings + + +def _validate_join_method(method: str) -> None: + if method not in ["left", "right", "inner", "outer"]: + raise ValueError(f"do not recognize join method {method}") + + +def maybe_extract_name(name, obj, cls) -> Hashable: + """ + If no name is passed, then extract it from data, validating hashability. + """ + if name is None and isinstance(obj, (Index, ABCSeries)): + # Note we don't just check for "name" attribute since that would + # pick up e.g. dtype.name + name = obj.name + + # GH#29069 + if not is_hashable(name): + raise TypeError(f"{cls.__name__}.name must be a hashable type") + + return name + + +_cast_depr_msg = ( + "In a future version, passing an object-dtype arraylike to pd.Index will " + "not infer numeric values to numeric dtype (matching the Series behavior). " + "To retain the old behavior, explicitly pass the desired dtype or use the " + "desired Index subclass" +) + + +def _maybe_cast_data_without_dtype( + subarr: np.ndarray, cast_numeric_deprecated: bool = True +) -> ArrayLike: + """ + If we have an arraylike input but no passed dtype, try to infer + a supported dtype. + + Parameters + ---------- + subarr : np.ndarray[object] + cast_numeric_deprecated : bool, default True + Whether to issue a FutureWarning when inferring numeric dtypes. + + Returns + ------- + np.ndarray or ExtensionArray + """ + + result = lib.maybe_convert_objects( + subarr, + convert_datetime=True, + convert_timedelta=True, + convert_period=True, + convert_interval=True, + dtype_if_all_nat=np.dtype("datetime64[ns]"), + ) + if result.dtype.kind in ["i", "u", "f"]: + if not cast_numeric_deprecated: + # i.e. we started with a list, not an ndarray[object] + return result + + warnings.warn( + "In a future version, the Index constructor will not infer numeric " + "dtypes when passed object-dtype sequences (matching Series behavior)", + FutureWarning, + stacklevel=find_stack_level(), + ) + result = ensure_wrapped_if_datetimelike(result) + return result + + +def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: + """ + Return common name if all indices agree, otherwise None (level-by-level). + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + list + A list representing the unanimous 'names' found. + """ + name_tups = [tuple(i.names) for i in indexes] + name_sets = [{*ns} for ns in zip_longest(*name_tups)] + names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) + return names + + +def unpack_nested_dtype(other: _IndexT) -> _IndexT: + """ + When checking if our dtype is comparable with another, we need + to unpack CategoricalDtype to look at its categories.dtype. + + Parameters + ---------- + other : Index + + Returns + ------- + Index + """ + dtype = other.dtype + if is_categorical_dtype(dtype): + # If there is ever a SparseIndex, this could get dispatched + # here too. + # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "categories" + return dtype.categories # type: ignore[union-attr] + return other + + +def _maybe_try_sort(result, sort): + if sort is None: + try: + result = algos.safe_sort(result) + except TypeError as err: + warnings.warn( + f"{err}, sort order is undefined for incomparable objects.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) + return result diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py new file mode 100644 index 00000000..a39a8105 --- /dev/null +++ b/pandas/core/indexes/category.py @@ -0,0 +1,585 @@ +from __future__ import annotations + +from typing import ( + Any, + Hashable, +) +import warnings + +import numpy as np + +from pandas._libs import index as libindex +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) + +from pandas.core.arrays.categorical import ( + Categorical, + contains, +) +from pandas.core.construction import extract_array +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) +from pandas.core.indexes.extension import ( + NDArrayBackedExtensionIndex, + inherit_names, +) + +from pandas.io.formats.printing import pprint_thing + +_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) + + +@inherit_names( + [ + "argsort", + "tolist", + "codes", + "categories", + "ordered", + "_reverse_indexer", + "searchsorted", + "is_dtype_equal", + "min", + "max", + ], + Categorical, +) +@inherit_names( + [ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + Categorical, + wrap=True, +) +class CategoricalIndex(NDArrayBackedExtensionIndex): + """ + Index based on an underlying :class:`Categorical`. + + CategoricalIndex, like Categorical, can only take on a limited, + and usually fixed, number of possible values (`categories`). Also, + like Categorical, it might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + Parameters + ---------- + data : array-like (1-dimensional) + The values of the categorical. If `categories` are given, values not in + `categories` will be replaced with NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here (and also not in `dtype`), they + will be inferred from the `data`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + copy : bool, default False + Make a copy of input ndarray. + name : object, optional + Name to be stored in the index. + + Attributes + ---------- + codes + categories + ordered + + Methods + ------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + as_ordered + as_unordered + map + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + Index : The base pandas Index type. + Categorical : A categorical array. + CategoricalDtype : Type for categorical data. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') + + ``CategoricalIndex`` can also be instantiated from a ``Categorical``: + + >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"]) + >>> pd.CategoricalIndex(c) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'], ordered=False, dtype='category') + + Ordered ``CategoricalIndex`` can have a min and max value. + + >>> ci = pd.CategoricalIndex( + ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] + ... ) + >>> ci + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['c', 'b', 'a'], ordered=True, dtype='category') + >>> ci.min() + 'c' + """ + + _typ = "categoricalindex" + _data_cls = Categorical + + @property + def _can_hold_strings(self): + return self.categories._can_hold_strings + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + return self.categories._should_fallback_to_positional + + codes: np.ndarray + categories: Index + ordered: bool | None + _data: Categorical + _values: Categorical + + @property + def _engine_type(self) -> type[libindex.IndexEngine]: + # self.codes can have dtype int8, int16, int32 or int64, so we need + # to return the corresponding engine type (libindex.Int8Engine, etc.). + return { + np.int8: libindex.Int8Engine, + np.int16: libindex.Int16Engine, + np.int32: libindex.Int32Engine, + np.int64: libindex.Int64Engine, + }[self.codes.dtype.type] + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + categories=None, + ordered=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> CategoricalIndex: + + name = maybe_extract_name(name, data, cls) + + if data is None: + # GH#38944 + warnings.warn( + "Constructing a CategoricalIndex without passing data is " + "deprecated and will raise in a future version. " + "Use CategoricalIndex([], ...) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + data = [] + + if is_scalar(data): + raise cls._scalar_data_error(data) + + data = Categorical( + data, categories=categories, ordered=ordered, dtype=dtype, copy=copy + ) + + return cls._simple_new(data, name=name) + + # -------------------------------------------------------------------- + + def _is_dtype_compat(self, other) -> Categorical: + """ + *this is an internal non-public method* + + provide a comparison between the dtype of self and other (coercing if + needed) + + Parameters + ---------- + other : Index + + Returns + ------- + Categorical + + Raises + ------ + TypeError if the dtypes are not compatible + """ + if is_categorical_dtype(other): + other = extract_array(other) + if not other._categories_match_up_to_permutation(self): + raise TypeError( + "categories must match existing categories when appending" + ) + + elif other._is_multi: + # preempt raising NotImplementedError in isna call + raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex") + else: + values = other + + cat = Categorical(other, dtype=self.dtype) + other = CategoricalIndex(cat) + if not other.isin(values).all(): + raise TypeError( + "cannot append a non-category item to a CategoricalIndex" + ) + other = other._values + + if not ((other == values) | (isna(other) & isna(values))).all(): + # GH#37667 see test_equals_non_category + raise TypeError( + "categories must match existing categories when appending" + ) + + return other + + @doc(Index.astype) + def astype(self, dtype: Dtype, copy: bool = True) -> Index: + from pandas.core.api import NumericIndex + + dtype = pandas_dtype(dtype) + + categories = self.categories + # the super method always returns Int64Index, UInt64Index and Float64Index + # but if the categories are a NumericIndex with dtype float32, we want to + # return an index with the same dtype as self.categories. + if categories._is_backward_compat_public_numeric_index: + assert isinstance(categories, NumericIndex) # mypy complaint fix + try: + categories._validate_dtype(dtype) + except ValueError: + pass + else: + new_values = self._data.astype(dtype, copy=copy) + # pass copy=False because any copying has been done in the + # _data.astype call above + return categories._constructor(new_values, name=self.name, copy=False) + + return super().astype(dtype, copy=copy) + + def equals(self, other: object) -> bool: + """ + Determine if two CategoricalIndex objects contain the same elements. + + Returns + ------- + bool + If two CategoricalIndex objects have equal elements True, + otherwise False. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + try: + other = self._is_dtype_compat(other) + except (TypeError, ValueError): + return False + + return self._data.equals(other) + + # -------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + return self.categories._formatter_func + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs: list[tuple[str, str | int | bool | None]] + + attrs = [ + ( + "categories", + "[" + ", ".join(self._data._repr_categories()) + "]", + ), + ("ordered", self.ordered), + ] + extra = super()._format_attrs() + return attrs + extra + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + result = [ + pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep + for x in self._values + ] + return header + result + + # -------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + return "categorical" + + @doc(Index.__contains__) + def __contains__(self, key: Any) -> bool: + # if key is a NaN, check if any NaN is in self. + if is_valid_na_for_dtype(key, self.categories.dtype): + return self.hasnans + + return contains(self, key, container=self._engine) + + # TODO(2.0): remove reindex once non-unique deprecation is enforced + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, npt.NDArray[np.intp] | None]: + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray[np.intp] or None + Indices of output values in original index + + """ + if method is not None: + raise NotImplementedError( + "argument method is not implemented for CategoricalIndex.reindex" + ) + if level is not None: + raise NotImplementedError( + "argument level is not implemented for CategoricalIndex.reindex" + ) + if limit is not None: + raise NotImplementedError( + "argument limit is not implemented for CategoricalIndex.reindex" + ) + + target = ibase.ensure_index(target) + + if self.equals(target): + indexer = None + missing = np.array([], dtype=np.intp) + else: + indexer, missing = self.get_indexer_non_unique(target) + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and will " + "raise in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + new_target: Index + if len(self) and indexer is not None: + new_target = self.take(indexer) + else: + new_target = target + + # filling in missing if needed + if len(missing): + cats = self.categories.get_indexer(target) + + if not isinstance(target, CategoricalIndex) or (cats == -1).any(): + new_target, indexer, _ = super()._reindex_non_unique(target) + else: + # error: "Index" has no attribute "codes" + codes = new_target.codes.copy() # type: ignore[attr-defined] + codes[indexer == -1] = cats[missing] + cat = self._data._from_backing_data(codes) + new_target = type(self)._simple_new(cat, name=self.name) + + # we always want to return an Index type here + # to be consistent with .reindex for other index types (e.g. they don't + # coerce based on the actual values, only on the dtype) + # unless we had an initial Categorical to begin with + # in which case we are going to conform to the passed Categorical + if is_categorical_dtype(target): + cat = Categorical(new_target, dtype=target.dtype) + new_target = type(self)._simple_new(cat, name=self.name) + else: + # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target + new_target_array = np.asarray(new_target) + new_target = Index._with_infer(new_target_array, name=self.name) + + return new_target, indexer + + # -------------------------------------------------------------------- + # Indexing Methods + + def _maybe_cast_indexer(self, key) -> int: + # GH#41933: we have to do this instead of self._data._validate_scalar + # because this will correctly get partial-indexing on Interval categories + try: + return self._data._unbox_scalar(key) + except KeyError: + if is_valid_na_for_dtype(key, self.categories.dtype): + return -1 + raise + + def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: + if isinstance(values, CategoricalIndex): + values = values._data + if isinstance(values, Categorical): + # Indexing on codes is more efficient if categories are the same, + # so we can apply some optimizations based on the degree of + # dtype-matching. + cat = self._data._encode_with_my_categories(values) + codes = cat._codes + else: + codes = self.categories.get_indexer(values) + codes = codes.astype(self.codes.dtype, copy=False) + cat = self._data._from_backing_data(codes) + return type(self)._simple_new(cat) + + # -------------------------------------------------------------------- + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + return self.categories._is_comparable_dtype(dtype) + + def take_nd(self, *args, **kwargs) -> CategoricalIndex: + """Alias for `take`""" + warnings.warn( + "CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.take(*args, **kwargs) + + def map(self, mapper): + """ + Map values using input an input mapping or function. + + Maps the values (their categories, not the codes) of the index to new + categories. If the mapping correspondence is one-to-one the result is a + :class:`~pandas.CategoricalIndex` which has the same order property as + the original, otherwise an :class:`~pandas.Index` is returned. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.CategoricalIndex or pandas.Index + Mapped index. + + See Also + -------- + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=False, dtype='category') + >>> idx.map(lambda x: x.upper()) + CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], + ordered=False, dtype='category') + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) + CategoricalIndex(['first', 'second', 'third'], categories=['first', + 'second', 'third'], ordered=False, dtype='category') + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=True, dtype='category') + >>> idx.map({'a': 3, 'b': 2, 'c': 1}) + CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, + dtype='category') + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> idx.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + mapped = self._values.map(mapper) + return Index(mapped, name=self.name) + + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: + # if calling index is category, don't check dtype of others + try: + cat = Categorical._concat_same_type( + [self._is_dtype_compat(c) for c in to_concat] + ) + except TypeError: + # not all to_concat elements are among our categories (or NA) + from pandas.core.dtypes.concat import concat_compat + + res = concat_compat([x._values for x in to_concat]) + return Index(res, name=name) + else: + return type(self)._simple_new(cat, name=name) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py new file mode 100644 index 00000000..1a13cddd --- /dev/null +++ b/pandas/core/indexes/datetimelike.py @@ -0,0 +1,709 @@ +""" +Base and utility classes for tseries type pandas objects. +""" +from __future__ import annotations + +from datetime import datetime +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Sequence, + TypeVar, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._libs import ( + NaT, + Timedelta, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + Resolution, + Tick, + parsing, + to_offset, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_integer, + is_list_like, +) +from pandas.core.dtypes.concat import concat_compat + +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, +) +from pandas.core.indexes.extension import ( + NDArrayBackedExtensionIndex, + inherit_names, +) +from pandas.core.indexes.range import RangeIndex +from pandas.core.tools.timedeltas import to_timedelta + +if TYPE_CHECKING: + from pandas import CategoricalIndex + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + +_T = TypeVar("_T", bound="DatetimeIndexOpsMixin") +_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin") + + +@inherit_names( + ["inferred_freq", "_resolution_obj", "resolution"], + DatetimeLikeArrayMixin, + cache=True, +) +@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin) +class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): + """ + Common ops mixin to support a unified interface datetimelike Index. + """ + + _is_numeric_dtype = False + _can_hold_strings = False + _data: DatetimeArray | TimedeltaArray | PeriodArray + freq: BaseOffset | None + freqstr: str | None + _resolution_obj: Resolution + + # ------------------------------------------------------------------------ + + @cache_readonly + def hasnans(self) -> bool: + return self._data._hasna + + def equals(self, other: Any) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + elif other.dtype.kind in ["f", "i", "u", "c"]: + return False + elif not isinstance(other, type(self)): + should_try = False + inferable = self._data._infer_matches + if other.dtype == object: + should_try = other.inferred_type in inferable + elif is_categorical_dtype(other.dtype): + other = cast("CategoricalIndex", other) + should_try = other.categories.inferred_type in inferable + + if should_try: + try: + other = type(self)(other) + except (ValueError, TypeError, OverflowError): + # e.g. + # ValueError -> cannot parse str entry, or OutOfBoundsDatetime + # TypeError -> trying to convert IntervalIndex to DatetimeIndex + # OverflowError -> Index([very_large_timedeltas]) + return False + + if not is_dtype_equal(self.dtype, other.dtype): + # have different timezone + return False + + return np.array_equal(self.asi8, other.asi8) + + @Appender(Index.__contains__.__doc__) + def __contains__(self, key: Any) -> bool: + hash(key) + try: + self.get_loc(key) + except (KeyError, TypeError, ValueError): + return False + return True + + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) + return super()._convert_tolerance(tolerance, target) + + # -------------------------------------------------------------------- + # Rendering Methods + + def format( + self, + name: bool = False, + formatter: Callable | None = None, + na_rep: str = "NaT", + date_format: str | None = None, + ) -> list[str]: + """ + Render a string representation of the Index. + """ + header = [] + if name: + header.append( + ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, na_rep=na_rep, date_format=date_format) + + def _format_with_header( + self, header: list[str], na_rep: str = "NaT", date_format: str | None = None + ) -> list[str]: + # matches base class except for whitespace padding and date_format + return header + list( + self._format_native_types(na_rep=na_rep, date_format=date_format) + ) + + @property + def _formatter_func(self): + return self._data._formatter() + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + attrs = super()._format_attrs() + for attrib in self._attributes: + # iterating over _attributes prevents us from doing this for PeriodIndex + if attrib == "freq": + freq = self.freqstr + if freq is not None: + freq = repr(freq) # e.g. D -> 'D' + attrs.append(("freq", freq)) + return attrs + + @Appender(Index._summary.__doc__) + def _summary(self, name=None) -> str: + result = super()._summary(name=name) + if self.freq: + result += f"\nFreq: {self.freqstr}" + + return result + + # -------------------------------------------------------------------- + # Indexing Methods + + @final + def _can_partial_date_slice(self, reso: Resolution) -> bool: + # e.g. test_getitem_setitem_periodindex + # History of conversation GH#3452, GH#3931, GH#2369, GH#14826 + return reso > self._resolution_obj + # NB: for DTI/PI, not TDI + + def _parsed_string_to_bounds(self, reso: Resolution, parsed): + raise NotImplementedError + + def _parse_with_reso(self, label: str): + # overridden by TimedeltaIndex + try: + if self.freq is None or hasattr(self.freq, "rule_code"): + freq = self.freq + except NotImplementedError: + freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) + parsed, reso_str = parsing.parse_time_string(label, freq) + reso = Resolution.from_attrname(reso_str) + return parsed, reso + + def _get_string_slice(self, key: str): + # overridden by TimedeltaIndex + parsed, reso = self._parse_with_reso(key) + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + raise KeyError(key) from err + + @final + def _partial_date_slice( + self, + reso: Resolution, + parsed: datetime, + ): + """ + Parameters + ---------- + reso : Resolution + parsed : datetime + + Returns + ------- + slice or ndarray[intp] + """ + if not self._can_partial_date_slice(reso): + raise ValueError + + t1, t2 = self._parsed_string_to_bounds(reso, parsed) + vals = self._data._ndarray + unbox = self._data._unbox + + if self.is_monotonic_increasing: + + if len(self) and ( + (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1]) + ): + # we are out of range + raise KeyError + + # TODO: does this depend on being monotonic _increasing_? + + # a monotonic (sorted) series can be sliced + left = vals.searchsorted(unbox(t1), side="left") + right = vals.searchsorted(unbox(t2), side="right") + return slice(left, right) + + else: + lhs_mask = vals >= unbox(t1) + rhs_mask = vals <= unbox(t2) + + # try to find the dates + return (lhs_mask & rhs_mask).nonzero()[0] + + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + """ + If label is a string, cast it to scalar type according to resolution. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'loc', 'getitem'} or None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + assert kind in ["loc", "getitem", None, lib.no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + if isinstance(label, str): + try: + parsed, reso = self._parse_with_reso(label) + except ValueError as err: + # DTI -> parsing.DateParseError + # TDI -> 'unit abbreviation w/o a number' + # PI -> string cannot be parsed as datetime-like + raise self._invalid_indexer("slice", label) from err + + lower, upper = self._parsed_string_to_bounds(reso, parsed) + return lower if side == "left" else upper + elif not isinstance(label, self._data._recognized_scalars): + raise self._invalid_indexer("slice", label) + + return label + + # -------------------------------------------------------------------- + # Arithmetic Methods + + def shift(self: _T, periods: int = 1, freq=None) -> _T: + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or string, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.DatetimeIndex + Shifted index. + + See Also + -------- + Index.shift : Shift values of Index. + PeriodIndex.shift : Shift values of PeriodIndex. + """ + arr = self._data.view() + arr._freq = self.freq + result = arr._time_shift(periods, freq=freq) + return type(self)._simple_new(result, name=self.name) + + # -------------------------------------------------------------------- + + @doc(Index._maybe_cast_listlike_indexer) + def _maybe_cast_listlike_indexer(self, keyarr): + try: + res = self._data._validate_listlike(keyarr, allow_object=True) + except (ValueError, TypeError): + if not isinstance(keyarr, ExtensionArray): + # e.g. we don't want to cast DTA to ndarray[object] + res = com.asarray_tuplesafe(keyarr) + # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray + else: + res = keyarr + return Index(res, dtype=res.dtype) + + +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): + """ + Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, + but not PeriodIndex + """ + + _data: DatetimeArray | TimedeltaArray + _comparables = ["name", "freq"] + _attributes = ["name", "freq"] + + # Compat for frequency inference, see GH#23789 + _is_monotonic_increasing = Index.is_monotonic_increasing + _is_monotonic_decreasing = Index.is_monotonic_decreasing + _is_unique = Index.is_unique + + _join_precedence = 10 + + def _with_freq(self, freq): + arr = self._data._with_freq(freq) + return type(self)._simple_new(arr, name=self._name) + + def is_type_compatible(self, kind: str) -> bool: + warnings.warn( + f"{type(self).__name__}.is_type_compatible is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return kind in self._data._infer_matches + + @property + def values(self) -> np.ndarray: + # NB: For Datetime64TZ this is lossy + return self._data._ndarray + + # -------------------------------------------------------------------- + # Set Operation Methods + + @cache_readonly + def _as_range_index(self) -> RangeIndex: + # Convert our i8 representations to RangeIndex + # Caller is responsible for checking isinstance(self.freq, Tick) + freq = cast(Tick, self.freq) + tick = freq.delta.value + rng = range(self[0].value, self[-1].value + tick, tick) + return RangeIndex(rng) + + def _can_range_setop(self, other): + return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) + + def _wrap_range_setop(self, other, res_i8): + new_freq = None + if not len(res_i8): + # RangeIndex defaults to step=1, which we don't want. + new_freq = self.freq + elif isinstance(res_i8, RangeIndex): + new_freq = to_offset(Timedelta(res_i8.step)) + res_i8 = res_i8 + + # TODO: we cannot just do + # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq) + # because test_setops_preserve_freq fails with _validate_frequency raising. + # This raising is incorrect, as 'on_freq' is incorrect. This will + # be fixed by GH#41493 + res_values = res_i8.values.view(self._data._ndarray.dtype) + result = type(self._data)._simple_new( + res_values, dtype=self.dtype, freq=new_freq + ) + return self._wrap_setop_result(other, result) + + def _range_intersect(self, other, sort): + # Dispatch to RangeIndex intersection logic. + left = self._as_range_index + right = other._as_range_index + res_i8 = left.intersection(right, sort=sort) + return self._wrap_range_setop(other, res_i8) + + def _range_union(self, other, sort): + # Dispatch to RangeIndex union logic. + left = self._as_range_index + right = other._as_range_index + res_i8 = left.union(right, sort=sort) + return self._wrap_range_setop(other, res_i8) + + def _intersection(self, other: Index, sort=False) -> Index: + """ + intersection specialized to the case with matching dtypes and both non-empty. + """ + other = cast("DatetimeTimedeltaMixin", other) + + if self._can_range_setop(other): + return self._range_intersect(other, sort=sort) + + if not self._can_fast_intersect(other): + result = Index._intersection(self, other, sort=sort) + # We need to invalidate the freq because Index._intersection + # uses _shallow_copy on a view of self._data, which will preserve + # self.freq if we're not careful. + # At this point we should have result.dtype == self.dtype + # and type(result) is type(self._data) + result = self._wrap_setop_result(other, result) + return result._with_freq(None)._with_freq("infer") + + else: + return self._fast_intersect(other, sort) + + def _fast_intersect(self, other, sort): + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + # after sorting, the intersection always starts with the right index + # and ends with the index of which the last elements is smallest + end = min(left[-1], right[-1]) + start = right[0] + + if end < start: + result = self[:0] + else: + lslice = slice(*left.slice_locs(start, end)) + result = left._values[lslice] + + return result + + def _can_fast_intersect(self: _T, other: _T) -> bool: + # Note: we only get here with len(self) > 0 and len(other) > 0 + if self.freq is None: + return False + + elif other.freq != self.freq: + return False + + elif not self.is_monotonic_increasing: + # Because freq is not None, we must then be monotonic decreasing + return False + + # this along with matching freqs ensure that we "line up", + # so intersection will preserve freq + # Note we are assuming away Ticks, as those go through _range_intersect + # GH#42104 + return self.freq.n == 1 + + def _can_fast_union(self: _T, other: _T) -> bool: + # Assumes that type(self) == type(other), as per the annotation + # The ability to fast_union also implies that `freq` should be + # retained on union. + freq = self.freq + + if freq is None or freq != other.freq: + return False + + if not self.is_monotonic_increasing: + # Because freq is not None, we must then be monotonic decreasing + # TODO: do union on the reversed indexes? + return False + + if len(self) == 0 or len(other) == 0: + # only reached via union_many + return True + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + right_start = right[0] + left_end = left[-1] + + # Only need to "adjoin", not overlap + return (right_start == left_end + freq) or right_start in left + + def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT: + # Caller is responsible for ensuring self and other are non-empty + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + elif sort is False: + # TDIs are not in the "correct" order and we don't want + # to sort but want to remove overlaps + left, right = self, other + left_start = left[0] + loc = right.searchsorted(left_start, side="left") + right_chunk = right._values[:loc] + dates = concat_compat((left._values, right_chunk)) + result = type(self)._simple_new(dates, name=self.name) + return result + else: + left, right = other, self + + left_end = left[-1] + right_end = right[-1] + + # concatenate + if left_end < right_end: + loc = right.searchsorted(left_end, side="right") + right_chunk = right._values[loc:] + dates = concat_compat([left._values, right_chunk]) + # The can_fast_union check ensures that the result.freq + # should match self.freq + dates = type(self._data)(dates, freq=self.freq) + result = type(self)._simple_new(dates) + return result + else: + return left + + def _union(self, other, sort): + # We are called by `union`, which is responsible for this validation + assert isinstance(other, type(self)) + assert self.dtype == other.dtype + + if self._can_range_setop(other): + return self._range_union(other, sort=sort) + + if self._can_fast_union(other): + result = self._fast_union(other, sort=sort) + # in the case with sort=None, the _can_fast_union check ensures + # that result.freq == self.freq + return result + else: + return super()._union(other, sort)._with_freq("infer") + + # -------------------------------------------------------------------- + # Join Methods + + def _get_join_freq(self, other): + """ + Get the freq to attach to the result of a join operation. + """ + freq = None + if self._can_fast_union(other): + freq = self.freq + return freq + + def _wrap_joined_index(self, joined, other): + assert other.dtype == self.dtype, (other.dtype, self.dtype) + result = super()._wrap_joined_index(joined, other) + result._data._freq = self._get_join_freq(other) + return result + + def _get_engine_target(self) -> np.ndarray: + # engine methods and libjoin methods need dt64/td64 values cast to i8 + return self._data._ndarray.view("i8") + + def _from_join_target(self, result: np.ndarray): + # view e.g. i8 back to M8[ns] + result = result.view(self._data._ndarray.dtype) + return self._data._from_backing_data(result) + + # -------------------------------------------------------------------- + # List-like Methods + + def _get_delete_freq(self, loc: int | slice | Sequence[int]): + """ + Find the `freq` for self.delete(loc). + """ + freq = None + if self.freq is not None: + if is_integer(loc): + if loc in (0, -len(self), -1, len(self) - 1): + freq = self.freq + else: + if is_list_like(loc): + # error: Incompatible types in assignment (expression has + # type "Union[slice, ndarray]", variable has type + # "Union[int, slice, Sequence[int]]") + loc = lib.maybe_indices_to_slice( # type: ignore[assignment] + np.asarray(loc, dtype=np.intp), len(self) + ) + if isinstance(loc, slice) and loc.step in (1, None): + if loc.start in (0, None) or loc.stop in (len(self), None): + freq = self.freq + return freq + + def _get_insert_freq(self, loc: int, item): + """ + Find the `freq` for self.insert(loc, item). + """ + value = self._data._validate_scalar(item) + item = self._data._box_func(value) + + freq = None + if self.freq is not None: + # freq can be preserved on edge cases + if self.size: + if item is NaT: + pass + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + freq = self.freq + elif (loc == len(self)) and item - self.freq == self[-1]: + freq = self.freq + else: + # Adding a single item to an empty index may preserve freq + if isinstance(self.freq, Tick): + # all TimedeltaIndex cases go through here; is_on_offset + # would raise TypeError + freq = self.freq + elif self.freq.is_on_offset(item): + freq = self.freq + return freq + + @doc(NDArrayBackedExtensionIndex.delete) + def delete(self, loc) -> DatetimeTimedeltaMixin: + result = super().delete(loc) + result._data._freq = self._get_delete_freq(loc) + return result + + @doc(NDArrayBackedExtensionIndex.insert) + def insert(self, loc: int, item): + result = super().insert(loc, item) + if isinstance(result, type(self)): + # i.e. parent class method did not cast + result._data._freq = self._get_insert_freq(loc, item) + return result + + # -------------------------------------------------------------------- + # NDArray-Like Methods + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take((), kwargs) + indices = np.asarray(indices, dtype=np.intp) + + result = NDArrayBackedExtensionIndex.take( + self, indices, axis, allow_fill, fill_value, **kwargs + ) + + maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) + if isinstance(maybe_slice, slice): + freq = self._data._get_getitem_freq(maybe_slice) + result._data._freq = freq + return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py new file mode 100644 index 00000000..33cc39cc --- /dev/null +++ b/pandas/core/indexes/datetimes.py @@ -0,0 +1,1251 @@ +from __future__ import annotations + +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) +import operator +from typing import ( + TYPE_CHECKING, + Hashable, + Literal, +) +import warnings + +import numpy as np + +from pandas._libs import ( + NaT, + Period, + Timestamp, + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + Resolution, + periods_per_day, + timezones, + to_offset, +) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit +from pandas._libs.tslibs.offsets import prefix_mapping +from pandas._typing import ( + Dtype, + DtypeObj, + IntervalClosedType, + IntervalLeftRight, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_scalar, +) +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.arrays.datetimes import ( + DatetimeArray, + tz_to_dtype, +) +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + get_unanimous_names, + maybe_extract_name, +) +from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin +from pandas.core.indexes.extension import inherit_names +from pandas.core.tools.times import to_time + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Float64Index, + PeriodIndex, + TimedeltaIndex, + ) + + +def _new_DatetimeIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ + if "data" in d and not isinstance(d["data"], DatetimeIndex): + # Avoid need to verify integrity by calling simple_new directly + data = d.pop("data") + if not isinstance(data, DatetimeArray): + # For backward compat with older pickles, we may need to construct + # a DatetimeArray to adapt to the newer _simple_new signature + tz = d.pop("tz") + freq = d.pop("freq") + dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) + else: + dta = data + for key in ["tz", "freq"]: + # These are already stored in our DatetimeArray; if they are + # also in the pickle and don't match, we have a problem. + if key in d: + assert d[key] == getattr(dta, key) + d.pop(key) + result = cls._simple_new(dta, **d) + else: + with warnings.catch_warnings(): + # TODO: If we knew what was going in to **d, we might be able to + # go through _simple_new instead + warnings.simplefilter("ignore") + result = cls.__new__(cls, **d) + + return result + + +@inherit_names( + DatetimeArray._field_ops + + [ + method + for method in DatetimeArray._datetimelike_methods + if method not in ("tz_localize", "tz_convert", "strftime") + ], + DatetimeArray, + wrap=True, +) +@inherit_names(["is_normalized", "_resolution_obj"], DatetimeArray, cache=True) +@inherit_names( + [ + "tz", + "tzinfo", + "dtype", + "to_pydatetime", + "_format_native_types", + "date", + "time", + "timetz", + "std", + ] + + DatetimeArray._bool_ops, + DatetimeArray, +) +class DatetimeIndex(DatetimeTimedeltaMixin): + """ + Immutable ndarray-like of datetime64 data. + + Represented internally as int64, and which can be boxed to Timestamp objects + that are subclasses of datetime and carry metadata. + + Parameters + ---------- + data : array-like (1-dimensional) + Datetime-like data to construct index with. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str + Set the Timezone of the data. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + closed : {'left', 'right'}, optional + Set whether to include `start` and `end` that are on the + boundary. The default includes boundary points on either end. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. + dayfirst : bool, default False + If True, parse dates in `data` with the day first order. + yearfirst : bool, default False + If True parse dates in `data` with the year first order. + dtype : numpy.dtype or DatetimeTZDtype or str, default None + Note that the only NumPy dtype allowed is ‘datetime64[ns]’. + copy : bool, default False + Make a copy of input ndarray. + name : label, default None + Name to be stored in the index. + + Attributes + ---------- + year + month + day + hour + minute + second + microsecond + nanosecond + date + time + timetz + dayofyear + day_of_year + weekofyear + week + dayofweek + day_of_week + weekday + quarter + tz + freq + freqstr + is_month_start + is_month_end + is_quarter_start + is_quarter_end + is_year_start + is_year_end + is_leap_year + inferred_freq + + Methods + ------- + normalize + strftime + snap + tz_convert + tz_localize + round + floor + ceil + to_period + to_perioddelta + to_pydatetime + to_series + to_frame + month_name + day_name + mean + std + + See Also + -------- + Index : The base pandas Index type. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + to_datetime : Convert argument to datetime. + date_range : Create a fixed-frequency DatetimeIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "datetimeindex" + + _data_cls = DatetimeArray + _supports_partial_string_indexing = True + + @property + def _engine_type(self) -> type[libindex.DatetimeEngine]: + return libindex.DatetimeEngine + + _data: DatetimeArray + inferred_freq: str | None + tz: tzinfo | None + + # -------------------------------------------------------------------- + # methods that dispatch to DatetimeArray and wrap result + + @doc(DatetimeArray.strftime) + def strftime(self, date_format) -> Index: + arr = self._data.strftime(date_format) + return Index(arr, name=self.name, dtype=object) + + @doc(DatetimeArray.tz_convert) + def tz_convert(self, tz) -> DatetimeIndex: + arr = self._data.tz_convert(tz) + return type(self)._simple_new(arr, name=self.name) + + @doc(DatetimeArray.tz_localize) + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex: + arr = self._data.tz_localize(tz, ambiguous, nonexistent) + return type(self)._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_period) + def to_period(self, freq=None) -> PeriodIndex: + from pandas.core.indexes.api import PeriodIndex + + arr = self._data.to_period(freq) + return PeriodIndex._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_perioddelta) + def to_perioddelta(self, freq) -> TimedeltaIndex: + from pandas.core.indexes.api import TimedeltaIndex + + arr = self._data.to_perioddelta(freq) + return TimedeltaIndex._simple_new(arr, name=self.name) + + @doc(DatetimeArray.to_julian_date) + def to_julian_date(self) -> Float64Index: + from pandas.core.indexes.api import Float64Index + + arr = self._data.to_julian_date() + return Float64Index._simple_new(arr, name=self.name) + + @doc(DatetimeArray.isocalendar) + def isocalendar(self) -> DataFrame: + df = self._data.isocalendar() + return df.set_index(self) + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + freq: str | BaseOffset | lib.NoDefault = lib.no_default, + tz=None, + normalize: bool = False, + closed=None, + ambiguous="raise", + dayfirst: bool = False, + yearfirst: bool = False, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> DatetimeIndex: + + if is_scalar(data): + raise cls._scalar_data_error(data) + + # - Cases checked above all return/raise before reaching here - # + + name = maybe_extract_name(name, data, cls) + + if ( + isinstance(data, DatetimeArray) + and freq is lib.no_default + and tz is None + and dtype is None + ): + # fastpath, similar logic in TimedeltaIndex.__new__; + # Note in this particular case we retain non-nano. + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + elif ( + isinstance(data, DatetimeArray) + and freq is lib.no_default + and tz is None + and is_dtype_equal(data.dtype, dtype) + ): + # Reached via Index.__new__ when we call .astype + # TODO(2.0): special casing can be removed once _from_sequence_not_strict + # no longer chokes on non-nano + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + + dtarr = DatetimeArray._from_sequence_not_strict( + data, + dtype=dtype, + copy=copy, + tz=tz, + freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + subarr = cls._simple_new(dtarr, name=name) + return subarr + + # -------------------------------------------------------------------- + + @cache_readonly + def _is_dates_only(self) -> bool: + """ + Return a boolean if we are only dates (and don't have a timezone) + + Returns + ------- + bool + """ + from pandas.io.formats.format import is_dates_only + + # error: Argument 1 to "is_dates_only" has incompatible type + # "Union[ExtensionArray, ndarray]"; expected "Union[ndarray, + # DatetimeArray, Index, DatetimeIndex]" + return self.tz is None and is_dates_only(self._values) # type: ignore[arg-type] + + def __reduce__(self): + d = {"data": self._data, "name": self.name} + return _new_DatetimeIndex, (type(self), d), None + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + if self.tz is not None: + # If we have tz, we can compare to tzaware + return is_datetime64tz_dtype(dtype) + # if we dont have tz, we can only compare to tznaive + return is_datetime64_dtype(dtype) + + # -------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + from pandas.io.formats.format import get_format_datetime64 + + formatter = get_format_datetime64(is_dates_only=self._is_dates_only) + return lambda x: f"'{formatter(x)}'" + + # -------------------------------------------------------------------- + # Set Operation Methods + + def _can_range_setop(self, other) -> bool: + # GH 46702: If self or other have non-UTC tzs, DST transitions prevent + # range representation due to no singular step + if ( + self.tz is not None + and not timezones.is_utc(self.tz) + and not timezones.is_fixed_offset(self.tz) + ): + return False + if ( + other.tz is not None + and not timezones.is_utc(other.tz) + and not timezones.is_fixed_offset(other.tz) + ): + return False + return super()._can_range_setop(other) + + def union_many(self, others): + """ + A bit of a hack to accelerate unioning a collection of indexes. + """ + warnings.warn( + "DatetimeIndex.union_many is deprecated and will be removed in " + "a future version. Use obj.union instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + this = self + + for other in others: + if not isinstance(this, DatetimeIndex): + this = Index.union(this, other) + continue + + if not isinstance(other, DatetimeIndex): + try: + other = DatetimeIndex(other) + except TypeError: + pass + + this, other = this._maybe_utc_convert(other) + + if len(self) and len(other) and this._can_fast_union(other): + # union already has fastpath handling for empty cases + this = this._fast_union(other) + else: + this = Index.union(this, other) + + res_name = get_unanimous_names(self, *others)[0] + if this.name != res_name: + return this.rename(res_name) + return this + + def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: + this = self + + if isinstance(other, DatetimeIndex): + if (self.tz is None) ^ (other.tz is None): + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if not timezones.tz_compare(self.tz, other.tz): + this = self.tz_convert("UTC") + other = other.tz_convert("UTC") + return this, other + + # -------------------------------------------------------------------- + + def _get_time_micros(self) -> npt.NDArray[np.int64]: + """ + Return the number of microseconds since midnight. + + Returns + ------- + ndarray[int64_t] + """ + values = self._data._local_timestamps() + + reso = self._data._reso + ppd = periods_per_day(reso) + + frac = values % ppd + if reso == NpyDatetimeUnit.NPY_FR_ns.value: + micros = frac // 1000 + elif reso == NpyDatetimeUnit.NPY_FR_us.value: + micros = frac + elif reso == NpyDatetimeUnit.NPY_FR_ms.value: + micros = frac * 1000 + elif reso == NpyDatetimeUnit.NPY_FR_s.value: + micros = frac * 1_000_000 + else: # pragma: no cover + raise NotImplementedError(reso) + + micros[self._isnan] = -1 + return micros + + def to_series(self, keep_tz=lib.no_default, index=None, name=None): + """ + Create a Series with both index and values equal to the index keys. + + Useful with map for returning an indexer based on an index. + + Parameters + ---------- + keep_tz : optional, defaults True + Return the data keeping the timezone. + + If keep_tz is True: + + If the timezone is not set, the resulting + Series will have a datetime64[ns] dtype. + + Otherwise the Series will have an datetime64[ns, tz] dtype; the + tz will be preserved. + + If keep_tz is False: + + Series will have a datetime64[ns] dtype. TZ aware + objects will have the tz removed. + + .. versionchanged:: 1.0.0 + The default value is now True. In a future version, + this keyword will be removed entirely. Stop passing the + argument to obtain the future behavior and silence the warning. + + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + """ + from pandas import Series + + if index is None: + index = self._view() + if name is None: + name = self.name + + if keep_tz is not lib.no_default: + if keep_tz: + warnings.warn( + "The 'keep_tz' keyword in DatetimeIndex.to_series " + "is deprecated and will be removed in a future version. " + "You can stop passing 'keep_tz' to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + "Specifying 'keep_tz=False' is deprecated and this " + "option will be removed in a future release. If " + "you want to remove the timezone information, you " + "can do 'idx.tz_convert(None)' before calling " + "'to_series'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + keep_tz = True + + if keep_tz and self.tz is not None: + # preserve the tz & copy + values = self.copy(deep=True) + else: + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray]", variable has type "DatetimeIndex") + values = self._values.view("M8[ns]").copy() # type: ignore[assignment] + + return Series(values, index=index, name=name) + + def snap(self, freq="S") -> DatetimeIndex: + """ + Snap time stamps to nearest occurring frequency. + + Returns + ------- + DatetimeIndex + """ + # Superdumb, punting on any optimizing + freq = to_offset(freq) + + dta = self._data.copy() + + for i, v in enumerate(self): + s = v + if not freq.is_on_offset(s): + t0 = freq.rollback(s) + t1 = freq.rollforward(s) + if abs(s - t0) < abs(t1 - s): + s = t0 + else: + s = t1 + dta[i] = s + + return DatetimeIndex._simple_new(dta, name=self.name) + + # -------------------------------------------------------------------- + # Indexing Methods + + def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): + """ + Calculate datetime bounds for parsed time string and its resolution. + + Parameters + ---------- + reso : str + Resolution provided by parsed string. + parsed : datetime + Datetime from parsed string. + + Returns + ------- + lower, upper: pd.Timestamp + """ + per = Period(parsed, freq=reso.attr_abbrev) + start, end = per.start_time, per.end_time + + # GH 24076 + # If an incoming date string contained a UTC offset, need to localize + # the parsed date to this offset first before aligning with the index's + # timezone + start = start.tz_localize(parsed.tzinfo) + end = end.tz_localize(parsed.tzinfo) + + if parsed.tzinfo is not None: + if self.tz is None: + raise ValueError( + "The index must be timezone aware when indexing " + "with a date string with a UTC offset" + ) + start = self._maybe_cast_for_get_loc(start) + end = self._maybe_cast_for_get_loc(end) + return start, end + + def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None: + # GH#36148 + # we get here with isinstance(key, self._data._recognized_scalars) + try: + self._data._assert_tzawareness_compat(key) + except TypeError: + if self.tz is None: + msg = ( + "Indexing a timezone-naive DatetimeIndex with a " + "timezone-aware datetime is deprecated and will " + "raise KeyError in a future version. " + "Use a timezone-naive object instead." + ) + elif one_way: + # we special-case timezone-naive strings and timezone-aware + # DatetimeIndex + return + else: + msg = ( + "Indexing a timezone-aware DatetimeIndex with a " + "timezone-naive datetime is deprecated and will " + "raise KeyError in a future version. " + "Use a timezone-aware object instead." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + self._check_indexing_error(key) + + orig_key = key + if is_valid_na_for_dtype(key, self.dtype): + key = NaT + + if isinstance(key, self._data._recognized_scalars): + # needed to localize naive datetimes + self._deprecate_mismatched_indexing(key) + key = self._maybe_cast_for_get_loc(key) + + elif isinstance(key, str): + + try: + parsed, reso = self._parse_with_reso(key) + except ValueError as err: + raise KeyError(key) from err + self._deprecate_mismatched_indexing(parsed, one_way=True) + + if self._can_partial_date_slice(reso): + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + if method is None: + raise KeyError(key) from err + + key = self._maybe_cast_for_get_loc(key) + + elif isinstance(key, timedelta): + # GH#20464 + raise TypeError( + f"Cannot index {type(self).__name__} with {type(key).__name__}" + ) + + elif isinstance(key, time): + if method is not None: + raise NotImplementedError( + "cannot yet lookup inexact labels when key is a time object" + ) + return self.indexer_at_time(key) + + else: + # unrecognized type + raise KeyError(key) + + try: + return Index.get_loc(self, key, method, tolerance) + except KeyError as err: + raise KeyError(orig_key) from err + + def _maybe_cast_for_get_loc(self, key) -> Timestamp: + # needed to localize naive datetimes or dates (GH 35690) + try: + key = Timestamp(key) + except ValueError as err: + # FIXME(dateutil#1180): we get here because parse_with_reso + # doesn't raise on "t2m" + if not isinstance(key, str): + # Not expected to be reached, but check to be sure + raise # pragma: no cover + raise KeyError(key) from err + + if key.tzinfo is None: + key = key.tz_localize(self.tz) + else: + key = key.tz_convert(self.tz) + return key + + @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + + # GH#42855 handle date here instead of get_slice_bound + if isinstance(label, date) and not isinstance(label, datetime): + # Pandas supports slicing with dates, treated as datetimes at midnight. + # https://github.com/pandas-dev/pandas/issues/31501 + label = Timestamp(label).to_pydatetime() + + label = super()._maybe_cast_slice_bound(label, side, kind=kind) + self._deprecate_mismatched_indexing(label) + return self._maybe_cast_for_get_loc(label) + + def slice_indexer(self, start=None, end=None, step=None, kind=lib.no_default): + """ + Return indexer for specified label slice. + Index.slice_indexer, customized to handle time slicing. + + In addition to functionality provided by Index.slice_indexer, does the + following: + + - if both `start` and `end` are instances of `datetime.time`, it + invokes `indexer_between_time` + - if `start` and `end` are both either string or None perform + value-based selection in non-monotonic cases. + + """ + self._deprecated_arg(kind, "kind", "slice_indexer") + + # For historical reasons DatetimeIndex supports slices between two + # instances of datetime.time as if it were applying a slice mask to + # an array of (self.hour, self.minute, self.seconds, self.microsecond). + if isinstance(start, time) and isinstance(end, time): + if step is not None and step != 1: + raise ValueError("Must have step size of 1 with time slices") + return self.indexer_between_time(start, end) + + if isinstance(start, time) or isinstance(end, time): + raise KeyError("Cannot mix time and non-time slice keys") + + def check_str_or_none(point): + return point is not None and not isinstance(point, str) + + # GH#33146 if start and end are combinations of str and None and Index is not + # monotonic, we can not use Index.slice_indexer because it does not honor the + # actual elements, is only searching for start and end + if ( + check_str_or_none(start) + or check_str_or_none(end) + or self.is_monotonic_increasing + ): + return Index.slice_indexer(self, start, end, step, kind=kind) + + mask = np.array(True) + deprecation_mask = np.array(True) + if start is not None: + start_casted = self._maybe_cast_slice_bound(start, "left") + mask = start_casted <= self + deprecation_mask = start_casted == self + + if end is not None: + end_casted = self._maybe_cast_slice_bound(end, "right") + mask = (self <= end_casted) & mask + deprecation_mask = (end_casted == self) | deprecation_mask + + if not deprecation_mask.any(): + warnings.warn( + "Value based partial slicing on non-monotonic DatetimeIndexes " + "with non-existing keys is deprecated and will raise a " + "KeyError in a future Version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + indexer = mask.nonzero()[0][::step] + if len(indexer) == len(self): + return slice(None) + else: + return indexer + + # -------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + # b/c datetime is represented as microseconds since the epoch, make + # sure we can't have ambiguous indexing + return "datetime64" + + def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]: + """ + Return index locations of values at particular time of day. + + Parameters + ---------- + time : datetime.time or str + Time passed in either as object (datetime.time) or as string in + appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", + "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"). + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + indexer_between_time : Get index locations of values between particular + times of day. + DataFrame.at_time : Select values at particular time of day. + """ + if asof: + raise NotImplementedError("'asof' argument is not supported") + + if isinstance(time, str): + from dateutil.parser import parse + + time = parse(time).time() + + if time.tzinfo: + if self.tz is None: + raise ValueError("Index must be timezone aware.") + time_micros = self.tz_convert(time.tzinfo)._get_time_micros() + else: + time_micros = self._get_time_micros() + micros = _time_to_micros(time) + return (time_micros == micros).nonzero()[0] + + def indexer_between_time( + self, start_time, end_time, include_start: bool = True, include_end: bool = True + ) -> npt.NDArray[np.intp]: + """ + Return index locations of values between particular times of day. + + Parameters + ---------- + start_time, end_time : datetime.time, str + Time passed either as object (datetime.time) or as string in + appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", + "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p"). + include_start : bool, default True + include_end : bool, default True + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + indexer_at_time : Get index locations of values at particular time of day. + DataFrame.between_time : Select values between particular times of day. + """ + start_time = to_time(start_time) + end_time = to_time(end_time) + time_micros = self._get_time_micros() + start_micros = _time_to_micros(start_time) + end_micros = _time_to_micros(end_time) + + if include_start and include_end: + lop = rop = operator.le + elif include_start: + lop = operator.le + rop = operator.lt + elif include_end: + lop = operator.lt + rop = operator.le + else: + lop = rop = operator.lt + + if start_time <= end_time: + join_op = operator.and_ + else: + join_op = operator.or_ + + mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros)) + + return mask.nonzero()[0] + + +def date_range( + start=None, + end=None, + periods=None, + freq=None, + tz=None, + normalize: bool = False, + name: Hashable = None, + closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default, + inclusive: IntervalClosedType | None = None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex. + + Returns the range of equally spaced time points (where the difference between any + two adjacent points is specified by the given frequency) such that they all + satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp., + the first and last time points in that range that fall on the boundary of ``freq`` + (if given as a frequency string) or that are valid for ``freq`` (if given as a + :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of ``start``, + ``end``, or ``freq`` is *not* specified, this missing parameter can be computed + given ``periods``, the number of timesteps in the range. See the note below.) + + Parameters + ---------- + start : str or datetime-like, optional + Left bound for generating dates. + end : str or datetime-like, optional + Right bound for generating dates. + periods : int, optional + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. See + :ref:`here ` for a list of + frequency aliases. + tz : str or tzinfo, optional + Time zone name for returning localized DatetimeIndex, for example + 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is + timezone-naive. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + closed : {None, 'left', 'right'}, optional + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None, the default). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + rng : DatetimeIndex + + See Also + -------- + DatetimeIndex : An immutable container for datetimes. + timedelta_range : Return a fixed frequency TimedeltaIndex. + period_range : Return a fixed frequency PeriodIndex. + interval_range : Return a fixed frequency IntervalIndex. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``DatetimeIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + **Specifying the values** + + The next four examples generate the same `DatetimeIndex`, but vary + the combination of `start`, `end` and `periods`. + + Specify `start` and `end`, with the default daily frequency. + + >>> pd.date_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `start` and `periods`, the number of periods (days). + + >>> pd.date_range(start='1/1/2018', periods=8) + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `end` and `periods`, the number of periods (days). + + >>> pd.date_range(end='1/1/2018', periods=8) + DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', + '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + Specify `start`, `end`, and `periods`; the frequency is generated + automatically (linearly spaced). + + >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) + DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', + '2018-04-27 00:00:00'], + dtype='datetime64[ns]', freq=None) + + **Other Parameters** + + Changed the `freq` (frequency) to ``'M'`` (month end frequency). + + >>> pd.date_range(start='1/1/2018', periods=5, freq='M') + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', + '2018-05-31'], + dtype='datetime64[ns]', freq='M') + + Multiples are allowed + + >>> pd.date_range(start='1/1/2018', periods=5, freq='3M') + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + `freq` can also be specified as an Offset object. + + >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + Specify `tz` to set the timezone. + + >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') + DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', + '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', + '2018-01-05 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq='D') + + `inclusive` controls whether to include `start` and `end` that are on the + boundary. The default, "both", includes boundary points on either end. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + + Use ``inclusive='left'`` to exclude `end` if it falls on the boundary. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left') + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], + dtype='datetime64[ns]', freq='D') + + Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and + similarly ``inclusive='neither'`` will exclude both `start` and `end`. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') + DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + """ + if inclusive is not None and closed is not lib.no_default: + raise ValueError( + "Deprecated argument `closed` cannot be passed" + "if argument `inclusive` is not None" + ) + elif closed is not lib.no_default: + warnings.warn( + "Argument `closed` is deprecated in favor of `inclusive`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if closed is None: + inclusive = "both" + elif closed in ("left", "right"): + inclusive = closed + else: + raise ValueError( + "Argument `closed` has to be either 'left', 'right' or None" + ) + elif inclusive is None: + inclusive = "both" + + if freq is None and com.any_none(periods, start, end): + freq = "D" + + dtarr = DatetimeArray._generate_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + inclusive=inclusive, + **kwargs, + ) + return DatetimeIndex._simple_new(dtarr, name=name) + + +def bdate_range( + start=None, + end=None, + periods: int | None = None, + freq="B", + tz=None, + normalize: bool = True, + name: Hashable = None, + weekmask=None, + holidays=None, + closed: IntervalLeftRight | lib.NoDefault | None = lib.no_default, + inclusive: IntervalClosedType | None = None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex with business day as the default. + + Parameters + ---------- + start : str or datetime-like, default None + Left bound for generating dates. + end : str or datetime-like, default None + Right bound for generating dates. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'B' (business daily) + Frequency strings can have multiples, e.g. '5H'. + tz : str or None + Time zone name for returning localized DatetimeIndex, for example + Asia/Beijing. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + weekmask : str or None, default None + Weekmask of valid business days, passed to ``numpy.busdaycalendar``, + only used when custom frequency strings are passed. The default + value None is equivalent to 'Mon Tue Wed Thu Fri'. + holidays : list-like or None, default None + Dates to exclude from the set of valid business days, passed to + ``numpy.busdaycalendar``, only used when custom frequency strings + are passed. + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + + .. deprecated:: 1.4.0 + Argument `closed` has been deprecated to standardize boundary inputs. + Use `inclusive` instead, to set each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 1.4.0 + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + DatetimeIndex + + Notes + ----- + Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. Specifying ``freq`` is a requirement + for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not + desired. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + Note how the two weekend days are skipped in the result. + + >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-08'], + dtype='datetime64[ns]', freq='B') + """ + if freq is None: + msg = "freq must be specified for bdate_range; use date_range instead" + raise TypeError(msg) + + if isinstance(freq, str) and freq.startswith("C"): + try: + weekmask = weekmask or "Mon Tue Wed Thu Fri" + freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask) + except (KeyError, TypeError) as err: + msg = f"invalid custom frequency string: {freq}" + raise ValueError(msg) from err + elif holidays or weekmask: + msg = ( + "a custom frequency string is required when holidays or " + f"weekmask are passed, got frequency {freq}" + ) + raise ValueError(msg) + + return date_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + name=name, + closed=closed, + inclusive=inclusive, + **kwargs, + ) + + +def _time_to_micros(time_obj: time) -> int: + seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second + return 1_000_000 * seconds + time_obj.microsecond diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py new file mode 100644 index 00000000..5bf30dde --- /dev/null +++ b/pandas/core/indexes/extension.py @@ -0,0 +1,192 @@ +""" +Shared methods for Index subclasses backed by ExtensionArray. +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, + TypeVar, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas.core.indexes.base import Index + +if TYPE_CHECKING: + from pandas.core.arrays import IntervalArray + from pandas.core.arrays._mixins import NDArrayBackedExtensionArray + +_T = TypeVar("_T", bound="NDArrayBackedExtensionIndex") +_ExtensionIndexT = TypeVar("_ExtensionIndexT", bound="ExtensionIndex") + + +def _inherit_from_data( + name: str, delegate: type, cache: bool = False, wrap: bool = False +): + """ + Make an alias for a method of the underlying ExtensionArray. + + Parameters + ---------- + name : str + Name of an attribute the class should inherit from its EA parent. + delegate : class + cache : bool, default False + Whether to convert wrapped properties into cache_readonly + wrap : bool, default False + Whether to wrap the inherited result in an Index. + + Returns + ------- + attribute, method, property, or cache_readonly + """ + attr = getattr(delegate, name) + + if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor": + # getset_descriptor i.e. property defined in cython class + if cache: + + def cached(self): + return getattr(self._data, name) + + cached.__name__ = name + cached.__doc__ = attr.__doc__ + method = cache_readonly(cached) + + else: + + def fget(self): + result = getattr(self._data, name) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + elif isinstance(result, ABCDataFrame): + return result.set_index(self) + return Index(result, name=self.name) + return result + + def fset(self, value): + setattr(self._data, name, value) + + fget.__name__ = name + fget.__doc__ = attr.__doc__ + + method = property(fget, fset) + + elif not callable(attr): + # just a normal attribute, no wrapping + method = attr + + else: + # error: Incompatible redefinition (redefinition with type "Callable[[Any, + # VarArg(Any), KwArg(Any)], Any]", original type "property") + def method(self, *args, **kwargs): # type: ignore[misc] + if "inplace" in kwargs: + raise ValueError(f"cannot use inplace with {type(self).__name__}") + result = attr(self._data, *args, **kwargs) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + elif isinstance(result, ABCDataFrame): + return result.set_index(self) + return Index(result, name=self.name) + return result + + # error: "property" has no attribute "__name__" + method.__name__ = name # type: ignore[attr-defined] + method.__doc__ = attr.__doc__ + return method + + +def inherit_names( + names: list[str], delegate: type, cache: bool = False, wrap: bool = False +) -> Callable[[type[_ExtensionIndexT]], type[_ExtensionIndexT]]: + """ + Class decorator to pin attributes from an ExtensionArray to a Index subclass. + + Parameters + ---------- + names : List[str] + delegate : class + cache : bool, default False + wrap : bool, default False + Whether to wrap the inherited result in an Index. + """ + + def wrapper(cls: type[_ExtensionIndexT]) -> type[_ExtensionIndexT]: + for name in names: + meth = _inherit_from_data(name, delegate, cache=cache, wrap=wrap) + setattr(cls, name, meth) + + return cls + + return wrapper + + +class ExtensionIndex(Index): + """ + Index subclass for indexes backed by ExtensionArray. + """ + + # The base class already passes through to _data: + # size, __len__, dtype + + _data: IntervalArray | NDArrayBackedExtensionArray + + # --------------------------------------------------------------------- + + def _validate_fill_value(self, value): + """ + Convert value to be insertable to underlying array. + """ + return self._data._validate_setitem_value(value) + + @doc(Index.map) + def map(self, mapper, na_action=None): + # Try to run function on index first, and then on elements of index + # Especially important for group-by functionality + try: + result = mapper(self) + + # Try to use this result if we can + if isinstance(result, np.ndarray): + result = Index(result) + + if not isinstance(result, Index): + raise TypeError("The map function must return an Index object") + return result + except Exception: + return self.astype(object).map(mapper) + + @cache_readonly + def _isnan(self) -> npt.NDArray[np.bool_]: + # error: Incompatible return value type (got "ExtensionArray", expected + # "ndarray") + return self._data.isna() # type: ignore[return-value] + + +class NDArrayBackedExtensionIndex(ExtensionIndex): + """ + Index subclass for indexes backed by NDArrayBackedExtensionArray. + """ + + _data: NDArrayBackedExtensionArray + + def _get_engine_target(self) -> np.ndarray: + return self._data._ndarray + + def _from_join_target(self, result: np.ndarray) -> ArrayLike: + assert result.dtype == self._data._ndarray.dtype + return self._data._from_backing_data(result) diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py new file mode 100644 index 00000000..043fd07b --- /dev/null +++ b/pandas/core/indexes/frozen.py @@ -0,0 +1,114 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) + +""" +from __future__ import annotations + +from typing import ( + Any, + NoReturn, +) + +from pandas.core.base import PandasObject + +from pandas.io.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + """ + Container that doesn't allow setting item *but* + because it's technically hashable, will be used + for lookups, appropriately, etc. + """ + + # Side note: This has to be of type list. Otherwise, + # it messes up PyTables type checks. + + def union(self, other) -> FrozenList: + """ + Returns a FrozenList with other concatenated to the end of self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are concatenating. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + if isinstance(other, tuple): + other = list(other) + return type(self)(super().__add__(other)) + + def difference(self, other) -> FrozenList: + """ + Returns a FrozenList with elements from other removed from self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are removing self. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + other = set(other) + temp = [x for x in self if x not in other] + return type(self)(temp) + + # TODO: Consider deprecating these in favor of `union` (xref gh-15506) + __add__ = __iadd__ = union + + def __getitem__(self, n): + if isinstance(n, slice): + return type(self)(super().__getitem__(n)) + return super().__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return type(self)(other + list(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super().__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return type(self)(super().__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return type(self), (list(self),) + + # error: Signature of "__hash__" incompatible with supertype "list" + def __hash__(self) -> int: # type: ignore[override] + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs) -> NoReturn: + """ + This method will not function because object is immutable. + """ + raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") + + def __str__(self) -> str: + return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) + + def __repr__(self) -> str: + return f"{type(self).__name__}({str(self)})" + + __setitem__ = __setslice__ = _disabled # type: ignore[assignment] + __delitem__ = __delslice__ = _disabled + pop = append = extend = _disabled + remove = sort = insert = _disabled # type: ignore[assignment] diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py new file mode 100644 index 00000000..b993af56 --- /dev/null +++ b/pandas/core/indexes/interval.py @@ -0,0 +1,1142 @@ +""" define the IntervalIndex """ +from __future__ import annotations + +from operator import ( + le, + lt, +) +import textwrap +from typing import ( + Any, + Hashable, + Literal, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.interval import ( + Interval, + IntervalMixin, + IntervalTree, +) +from pandas._libs.tslibs import ( + BaseOffset, + Timedelta, + Timestamp, + to_offset, +) +from pandas._typing import ( + Dtype, + DtypeObj, + IntervalClosedType, + npt, +) +from pandas.errors import InvalidIndexError +from pandas.util._decorators import ( + Appender, + cache_readonly, +) +from pandas.util._exceptions import rewrite_exception + +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, + maybe_box_datetimelike, + maybe_downcast_numeric, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_number, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.algorithms import unique +from pandas.core.arrays.interval import ( + IntervalArray, + _interval_shared_docs, +) +import pandas.core.common as com +from pandas.core.indexers import is_valid_positional_slice +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, + ensure_index, + maybe_extract_name, +) +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.extension import ( + ExtensionIndex, + inherit_names, +) +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + +_index_doc_kwargs.update( + { + "klass": "IntervalIndex", + "qualname": "IntervalIndex", + "target_klass": "IntervalIndex or list of Intervals", + "name": textwrap.dedent( + """\ + name : object, optional + Name to be stored in the index. + """ + ), + } +) + + +def _get_next_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label + np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label + 1 + elif is_float_dtype(dtype): + return np.nextafter(label, np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _get_prev_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label - np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label - 1 + elif is_float_dtype(dtype): + return np.nextafter(label, -np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _new_IntervalIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't have + arguments and breaks __new__. + """ + return cls.from_arrays(**d) + + +@Appender( + _interval_shared_docs["class"] + % { + "klass": "IntervalIndex", + "summary": "Immutable index of intervals that are closed on the same side.", + "name": _index_doc_kwargs["name"], + "versionadded": "0.20.0", + "extra_attributes": "is_overlapping\nvalues\n", + "extra_methods": "", + "examples": textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalIndex`` is typically constructed using + :func:`interval_range`: + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + dtype='interval[int64, right]') + + It may also be constructed using one of the constructor + methods: :meth:`IntervalIndex.from_arrays`, + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. + + See further examples in the doc strings of ``interval_range`` and the + mentioned constructor methods. + """ + ), + } +) +@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) +@inherit_names( + [ + "__array__", + "overlaps", + "contains", + "closed_left", + "closed_right", + "open_left", + "open_right", + "is_empty", + ], + IntervalArray, +) +@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) +class IntervalIndex(ExtensionIndex): + _typ = "intervalindex" + + # annotate properties pinned via inherit_names + closed: IntervalClosedType + is_non_overlapping_monotonic: bool + closed_left: bool + closed_right: bool + open_left: bool + open_right: bool + + _data: IntervalArray + _values: IntervalArray + _can_hold_strings = False + _data_cls = IntervalArray + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data, + closed=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + verify_integrity: bool = True, + ) -> IntervalIndex: + + name = maybe_extract_name(name, data, cls) + + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray( + data, + closed=closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + return cls._simple_new(array, name) + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % { + "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_breaks( + cls, + breaks, + closed: IntervalClosedType | None = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_breaks( + breaks, closed=closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % { + "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_arrays( + cls, + left, + right, + closed: IntervalClosedType = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_arrays( + left, right, closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % { + "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), + "examples": textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) + IntervalIndex([(0, 1], (1, 2]], + dtype='interval[int64, right]') + """ + ), + } + ) + def from_tuples( + cls, + data, + closed: str = "right", + name: Hashable = None, + copy: bool = False, + dtype: Dtype | None = None, + ) -> IntervalIndex: + with rewrite_exception("IntervalArray", cls.__name__): + arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) + return cls._simple_new(arr, name=name) + + # -------------------------------------------------------------------- + # error: Return type "IntervalTree" of "_engine" incompatible with return type + # "Union[IndexEngine, ExtensionEngine]" in supertype "Index" + @cache_readonly + def _engine(self) -> IntervalTree: # type: ignore[override] + left = self._maybe_convert_i8(self.left) + right = self._maybe_convert_i8(self.right) + return IntervalTree(left, right, closed=self.closed) + + def __contains__(self, key: Any) -> bool: + """ + return a boolean if this key is IN the index + We *only* accept an Interval + + Parameters + ---------- + key : Interval + + Returns + ------- + bool + """ + hash(key) + if not isinstance(key, Interval): + if is_valid_na_for_dtype(key, self.dtype): + return self.hasnans + return False + + try: + self.get_loc(key) + return True + except KeyError: + return False + + @cache_readonly + def _multiindex(self) -> MultiIndex: + return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) + + def __reduce__(self): + d = { + "left": self.left, + "right": self.right, + "closed": self.closed, + "name": self.name, + } + return _new_IntervalIndex, (type(self), d), None + + @property + def inferred_type(self) -> str: + """Return a string of the type inferred from the values""" + return "interval" + + @Appender(Index.memory_usage.__doc__) + def memory_usage(self, deep: bool = False) -> int: + # we don't use an explicit engine + # so return the bytes here + return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) + + # IntervalTree doesn't have a is_monotonic_decreasing, so have to override + # the Index implementation + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + Return True if the IntervalIndex is monotonic decreasing (only equal or + decreasing values), else False + """ + return self[::-1].is_monotonic_increasing + + @cache_readonly + def is_unique(self) -> bool: + """ + Return True if the IntervalIndex contains unique elements, else False. + """ + left = self.left + right = self.right + + if self.isna().sum() > 1: + return False + + if left.is_unique or right.is_unique: + return True + + seen_pairs = set() + check_idx = np.where(left.duplicated(keep=False))[0] + for idx in check_idx: + pair = (left[idx], right[idx]) + if pair in seen_pairs: + return False + seen_pairs.add(pair) + + return True + + @property + def is_overlapping(self) -> bool: + """ + Return True if the IntervalIndex has overlapping intervals, else False. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + Returns + ------- + bool + Boolean indicating if the IntervalIndex has overlapping intervals. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + IntervalIndex.overlaps : Check an IntervalIndex elementwise for + overlaps. + + Examples + -------- + >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) + >>> index + IntervalIndex([(0, 2], (1, 3], (4, 5]], + dtype='interval[int64, right]') + >>> index.is_overlapping + True + + Intervals that share closed endpoints overlap: + + >>> index = pd.interval_range(0, 3, closed='both') + >>> index + IntervalIndex([[0, 1], [1, 2], [2, 3]], + dtype='interval[int64, both]') + >>> index.is_overlapping + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> index = pd.interval_range(0, 3, closed='left') + >>> index + IntervalIndex([[0, 1), [1, 2), [2, 3)], + dtype='interval[int64, left]') + >>> index.is_overlapping + False + """ + # GH 23309 + return self._engine.is_overlapping + + def _needs_i8_conversion(self, key) -> bool: + """ + Check if a given key needs i8 conversion. Conversion is necessary for + Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An + Interval-like requires conversion if its endpoints are one of the + aforementioned types. + + Assumes that any list-like data has already been cast to an Index. + + Parameters + ---------- + key : scalar or Index-like + The key that should be checked for i8 conversion + + Returns + ------- + bool + """ + if is_interval_dtype(key) or isinstance(key, Interval): + return self._needs_i8_conversion(key.left) + + i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) + return isinstance(key, i8_types) + + def _maybe_convert_i8(self, key): + """ + Maybe convert a given key to its equivalent i8 value(s). Used as a + preprocessing step prior to IntervalTree queries (self._engine), which + expects numeric data. + + Parameters + ---------- + key : scalar or list-like + The key that should maybe be converted to i8. + + Returns + ------- + scalar or list-like + The original key if no conversion occurred, int if converted scalar, + Int64Index if converted list-like. + """ + original = key + if is_list_like(key): + key = ensure_index(key) + + if not self._needs_i8_conversion(key): + return original + + scalar = is_scalar(key) + if is_interval_dtype(key) or isinstance(key, Interval): + # convert left/right and reconstruct + left = self._maybe_convert_i8(key.left) + right = self._maybe_convert_i8(key.right) + constructor = Interval if scalar else IntervalIndex.from_arrays + # error: "object" not callable + return constructor( + left, right, closed=self.closed + ) # type: ignore[operator] + + if scalar: + # Timestamp/Timedelta + key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) + if lib.is_period(key): + key_i8 = key.ordinal + elif isinstance(key_i8, Timestamp): + key_i8 = key_i8.value + elif isinstance(key_i8, (np.datetime64, np.timedelta64)): + key_i8 = key_i8.view("i8") + else: + # DatetimeIndex/TimedeltaIndex + key_dtype, key_i8 = key.dtype, Index(key.asi8) + if key.hasnans: + # convert NaT from its i8 value to np.nan so it's not viewed + # as a valid value, maybe causing errors (e.g. is_overlapping) + key_i8 = key_i8.where(~key._isnan) + + # ensure consistency with IntervalIndex subtype + # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "subtype" + subtype = self.dtype.subtype # type: ignore[union-attr] + + if not is_dtype_equal(subtype, key_dtype): + raise ValueError( + f"Cannot index an IntervalIndex of subtype {subtype} with " + f"values of dtype {key_dtype}" + ) + + return key_i8 + + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): + if not self.is_non_overlapping_monotonic: + raise KeyError( + "can only get slices from an IntervalIndex if bounds are " + "non-overlapping and all monotonic increasing or decreasing" + ) + + if isinstance(label, (IntervalMixin, IntervalIndex)): + raise NotImplementedError("Interval objects are not currently supported") + + # GH 20921: "not is_monotonic_increasing" for the second condition + # instead of "is_monotonic_decreasing" to account for single element + # indexes being both increasing and decreasing + if (side == "left" and self.left.is_monotonic_increasing) or ( + side == "right" and not self.left.is_monotonic_increasing + ): + sub_idx = self.right + if self.open_right: + label = _get_next_label(label) + else: + sub_idx = self.left + if self.open_left: + label = _get_prev_label(label) + + return sub_idx._searchsorted_monotonic(label, side) + + # -------------------------------------------------------------------- + # Indexing Methods + + def get_loc( + self, key, method: str | None = None, tolerance=None + ) -> int | slice | np.ndarray: + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None}, optional + * default: matches where the label is within an interval only. + + .. deprecated:: 1.4 + + Returns + ------- + int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) + >>> index = pd.IntervalIndex([i1, i2]) + >>> index.get_loc(1) + 0 + + You can also supply a point inside an interval. + + >>> index.get_loc(1.5) + 1 + + If a label is in several intervals, you get the locations of all the + relevant intervals. + + >>> i3 = pd.Interval(0, 2) + >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) + >>> overlapping_index.get_loc(0.5) + array([ True, False, True]) + + Only exact matches will be returned if an interval is provided. + + >>> index.get_loc(pd.Interval(0, 1)) + 0 + """ + self._check_indexing_method(method) + self._check_indexing_error(key) + + if isinstance(key, Interval): + if self.closed != key.closed: + raise KeyError(key) + mask = (self.left == key.left) & (self.right == key.right) + elif is_valid_na_for_dtype(key, self.dtype): + mask = self.isna() + else: + # assume scalar + op_left = le if self.closed_left else lt + op_right = le if self.closed_right else lt + try: + mask = op_left(self.left, key) & op_right(key, self.right) + except TypeError as err: + # scalar is not comparable to II subtype --> invalid label + raise KeyError(key) from err + + matches = mask.sum() + if matches == 0: + raise KeyError(key) + elif matches == 1: + return mask.argmax() + + res = lib.maybe_booleans_to_slice(mask.view("u1")) + if isinstance(res, slice) and res.stop is None: + # TODO: DO this in maybe_booleans_to_slice? + res = slice(res.start, len(self), res.step) + return res + + def _get_indexer( + self, + target: Index, + method: str | None = None, + limit: int | None = None, + tolerance: Any | None = None, + ) -> npt.NDArray[np.intp]: + + if isinstance(target, IntervalIndex): + # We only get here with not self.is_overlapping + # -> at most one match per interval in target + # want exact matches -> need both left/right to match, so defer to + # left/right get_indexer, compare elementwise, equality -> match + indexer = self._get_indexer_unique_sides(target) + + elif not is_object_dtype(target.dtype): + # homogeneous scalar index: use IntervalTree + # we should always have self._should_partial_index(target) here + target = self._maybe_convert_i8(target) + indexer = self._engine.get_indexer(target.values) + else: + # heterogeneous scalar index: defer elementwise to get_loc + # we should always have self._should_partial_index(target) here + return self._get_indexer_pointwise(target)[0] + + return ensure_platform_int(indexer) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + target = ensure_index(target) + + if not self._should_compare(target) and not self._should_partial_index(target): + # e.g. IntervalIndex with different closed or incompatible subtype + # -> no matches + return self._get_indexer_non_comparable(target, None, unique=False) + + elif isinstance(target, IntervalIndex): + if self.left.is_unique and self.right.is_unique: + # fastpath available even if we don't have self._index_as_unique + indexer = self._get_indexer_unique_sides(target) + missing = (indexer == -1).nonzero()[0] + else: + return self._get_indexer_pointwise(target) + + elif is_object_dtype(target.dtype) or not self._should_partial_index(target): + # target might contain intervals: defer elementwise to get_loc + return self._get_indexer_pointwise(target) + + else: + # Note: this case behaves differently from other Index subclasses + # because IntervalIndex does partial-int indexing + target = self._maybe_convert_i8(target) + indexer, missing = self._engine.get_indexer_non_unique(target.values) + + return ensure_platform_int(indexer), ensure_platform_int(missing) + + def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]: + """ + _get_indexer specialized to the case where both of our sides are unique. + """ + # Caller is responsible for checking + # `self.left.is_unique and self.right.is_unique` + + left_indexer = self.left.get_indexer(target.left) + right_indexer = self.right.get_indexer(target.right) + indexer = np.where(left_indexer == right_indexer, left_indexer, -1) + return indexer + + def _get_indexer_pointwise( + self, target: Index + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + pointwise implementation for get_indexer and get_indexer_non_unique. + """ + indexer, missing = [], [] + for i, key in enumerate(target): + try: + locs = self.get_loc(key) + if isinstance(locs, slice): + # Only needed for get_indexer_non_unique + locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") + elif lib.is_integer(locs): + locs = np.array(locs, ndmin=1) + else: + # otherwise we have ndarray[bool] + locs = np.where(locs)[0] + except KeyError: + missing.append(i) + locs = np.array([-1]) + except InvalidIndexError: + # i.e. non-scalar key e.g. a tuple. + # see test_append_different_columns_types_raises + missing.append(i) + locs = np.array([-1]) + + indexer.append(locs) + + indexer = np.concatenate(indexer) + return ensure_platform_int(indexer), ensure_platform_int(missing) + + @cache_readonly + def _index_as_unique(self) -> bool: + return not self.is_overlapping and self._engine._na_count < 2 + + _requires_unique_msg = ( + "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" + ) + + def _convert_slice_indexer(self, key: slice, kind: str): + if not (key.step is None or key.step == 1): + # GH#31658 if label-based, we require step == 1, + # if positional, we disallow float start/stop + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + if kind == "loc": + raise ValueError(msg) + elif kind == "getitem": + if not is_valid_positional_slice(key): + # i.e. this cannot be interpreted as a positional slice + raise ValueError(msg) + + return super()._convert_slice_indexer(key, kind) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + # integer lookups in Series.__getitem__ are unambiguously + # positional in this case + # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], + # ExtensionDtype]" has no attribute "subtype" + return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr] + + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + return getattr(self, side)._maybe_cast_slice_bound(label, side) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + if not isinstance(dtype, IntervalDtype): + return False + common_subtype = find_common_type([self.dtype, dtype]) + return not is_object_dtype(common_subtype) + + # -------------------------------------------------------------------- + + @cache_readonly + def left(self) -> Index: + return Index(self._data.left, copy=False) + + @cache_readonly + def right(self) -> Index: + return Index(self._data.right, copy=False) + + @cache_readonly + def mid(self) -> Index: + return Index(self._data.mid, copy=False) + + @property + def length(self) -> Index: + return Index(self._data.length, copy=False) + + # -------------------------------------------------------------------- + # Rendering Methods + # __repr__ associated methods are based on MultiIndex + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + # matches base class except for whitespace padding + return header + list(self._format_native_types(na_rep=na_rep)) + + def _format_native_types( + self, *, na_rep="NaN", quoting=None, **kwargs + ) -> npt.NDArray[np.object_]: + # GH 28210: use base method but with different default na_rep + return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) + + def _format_data(self, name=None) -> str: + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + return self._data._format_data() + "," + self._format_space() + + # -------------------------------------------------------------------- + # Set Operations + + def _intersection(self, other, sort): + """ + intersection specialized to the case with matching dtypes. + """ + # For IntervalIndex we also know other.closed == self.closed + if self.left.is_unique and self.right.is_unique: + taken = self._intersection_unique(other) + elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: + # Swap other/self if other is unique and self does not have + # multiple NaNs + taken = other._intersection_unique(self) + else: + # duplicates + taken = self._intersection_non_unique(other) + + if sort is None: + taken = taken.sort_values() + + return taken + + def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does not have any common endpoint, + no matter left or right. + Return the intersection with another IntervalIndex. + Parameters + ---------- + other : IntervalIndex + Returns + ------- + IntervalIndex + """ + # Note: this is much more performant than super()._intersection(other) + lindexer = self.left.get_indexer(other.left) + rindexer = self.right.get_indexer(other.right) + + match = (lindexer == rindexer) & (lindexer != -1) + indexer = lindexer.take(match.nonzero()[0]) + indexer = unique(indexer) + + return self.take(indexer) + + def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does have some common endpoints, + on either sides. + Return the intersection with another IntervalIndex. + + Parameters + ---------- + other : IntervalIndex + + Returns + ------- + IntervalIndex + """ + # Note: this is about 3.25x faster than super()._intersection(other) + # in IntervalIndexMethod.time_intersection_both_duplicate(1000) + mask = np.zeros(len(self), dtype=bool) + + if self.hasnans and other.hasnans: + first_nan_loc = np.arange(len(self))[self.isna()][0] + mask[first_nan_loc] = True + + other_tups = set(zip(other.left, other.right)) + for i, tup in enumerate(zip(self.left, self.right)): + if tup in other_tups: + mask[i] = True + + return self[mask] + + # -------------------------------------------------------------------- + + def _get_engine_target(self) -> np.ndarray: + # Note: we _could_ use libjoin functions by either casting to object + # dtype or constructing tuples (faster than constructing Intervals) + # but the libjoin fastpaths are no longer fast in these cases. + raise NotImplementedError( + "IntervalIndex does not use libjoin fastpaths or pass values to " + "IndexEngine objects" + ) + + def _from_join_target(self, result): + raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") + + # TODO: arithmetic operations + + +def _is_valid_endpoint(endpoint) -> bool: + """ + Helper for interval_range to check if start/end are valid types. + """ + return any( + [ + is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None, + ] + ) + + +def _is_type_compatible(a, b) -> bool: + """ + Helper for interval_range to check type compat of start/end/freq. + """ + is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) + return ( + (is_number(a) and is_number(b)) + or (is_ts_compat(a) and is_ts_compat(b)) + or (is_td_compat(a) and is_td_compat(b)) + or com.any_none(a, b) + ) + + +def interval_range( + start=None, + end=None, + periods=None, + freq=None, + name: Hashable = None, + closed: IntervalClosedType = "right", +) -> IntervalIndex: + """ + Return a fixed frequency IntervalIndex. + + Parameters + ---------- + start : numeric or datetime-like, default None + Left bound for generating intervals. + end : numeric or datetime-like, default None + Right bound for generating intervals. + periods : int, default None + Number of periods to generate. + freq : numeric, str, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' for datetime-like. + name : str, default None + Name of the resulting IntervalIndex. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + IntervalIndex + + See Also + -------- + IntervalIndex : An Index of intervals that are all closed on the same side. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``IntervalIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end``, inclusively. + + To learn more about datetime-like frequency strings, please see `this link + `__. + + Examples + -------- + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + dtype='interval[int64, right]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... end=pd.Timestamp('2017-01-04')) + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]], + dtype='interval[datetime64[ns], right]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + dtype='interval[float64, right]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]], + dtype='interval[datetime64[ns], right]') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.interval_range(start=0, end=6, periods=4) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + dtype='interval[float64, right]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], + dtype='interval[int64, both]') + """ + start = maybe_box_datetimelike(start) + end = maybe_box_datetimelike(end) + endpoint = start if start is not None else end + + if freq is None and com.any_none(periods, start, end): + freq = 1 if is_number(endpoint) else "D" + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + if not _is_valid_endpoint(start): + raise ValueError(f"start must be numeric or datetime-like, got {start}") + elif not _is_valid_endpoint(end): + raise ValueError(f"end must be numeric or datetime-like, got {end}") + + if is_float(periods): + periods = int(periods) + elif not is_integer(periods) and periods is not None: + raise TypeError(f"periods must be a number, got {periods}") + + if freq is not None and not is_number(freq): + try: + freq = to_offset(freq) + except ValueError as err: + raise ValueError( + f"freq must be numeric or convertible to DateOffset, got {freq}" + ) from err + + # verify type compatibility + if not all( + [ + _is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq), + ] + ): + raise TypeError("start, end, freq need to be type compatible") + + # +1 to convert interval count to breaks count (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + + breaks: np.ndarray | TimedeltaIndex | DatetimeIndex + + if is_number(endpoint): + # force consistency between start/end/freq (lower end if freq skips it) + if com.all_not_none(start, end, freq): + end -= (end - start) % freq + + # compute the period/start/end if unspecified (at most one) + if periods is None: + periods = int((end - start) // freq) + 1 + elif start is None: + start = end - (periods - 1) * freq + elif end is None: + end = start + (periods - 1) * freq + + breaks = np.linspace(start, end, periods) + if all(is_integer(x) for x in com.not_none(start, end, freq)): + # np.linspace always produces float output + + # error: Argument 1 to "maybe_downcast_numeric" has incompatible type + # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]"; + # expected "ndarray[Any, Any]" [ + breaks = maybe_downcast_numeric( + breaks, # type: ignore[arg-type] + np.dtype("int64"), + ) + else: + # delegate to the appropriate range function + if isinstance(endpoint, Timestamp): + breaks = date_range(start=start, end=end, periods=periods, freq=freq) + else: + breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) + + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py new file mode 100644 index 00000000..e717f928 --- /dev/null +++ b/pandas/core/indexes/multi.py @@ -0,0 +1,3971 @@ +from __future__ import annotations + +from functools import wraps +from sys import getsizeof +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Hashable, + Iterable, + List, + Literal, + Sequence, + Tuple, + cast, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + algos as libalgos, + index as libindex, + lib, +) +from pandas._libs.hashtable import duplicated +from pandas._typing import ( + AnyArrayLike, + DtypeObj, + F, + Scalar, + Shape, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, + UnsortedIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_extension_array_dtype, + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import factorize_from_iterables +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, + ensure_index, + get_unanimous_names, +) +from pandas.core.indexes.frozen import FrozenList +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.sorting import ( + get_group_index, + indexer_from_factorized, + lexsort_indexer, +) + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas import ( + CategoricalIndex, + DataFrame, + Series, + ) + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"} +) + + +class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): + """ + This class manages a MultiIndex by mapping label combinations to positive + integers. + """ + + _base = libindex.UInt64Engine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one uint64 (each), in a strictly + monotonic way (i.e. respecting the lexicographic order of integer + combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + scalar or 1-dimensional array, of dtype uint64 + Integer(s) representing one combination (each). + """ + # Shift the representation of each level by the pre-calculated number + # of bits: + codes <<= self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer: + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): + """ + This class manages those (extreme) cases in which the number of possible + label combinations overflows the 64 bits integers, and uses an ObjectEngine + containing Python integers. + """ + + _base = libindex.ObjectEngine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one Python integer (each), in a + strictly monotonic way (i.e. respecting the lexicographic order of + integer combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + int, or 1-dimensional array of dtype object + Integer(s) representing one combination (each). + """ + # Shift the representation of each level by the pre-calculated number + # of bits. Since this can overflow uint64, first make sure we are + # working with Python integers: + codes = codes.astype("object") << self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer (per row): + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +def names_compat(meth: F) -> F: + """ + A decorator to allow either `name` or `names` keyword but not both. + + This makes it easier to share code with base class. + """ + + @wraps(meth) + def new_meth(self_or_cls, *args, **kwargs): + if "name" in kwargs and "names" in kwargs: + raise TypeError("Can only provide one of `names` and `name`") + elif "name" in kwargs: + kwargs["names"] = kwargs.pop("name") + + return meth(self_or_cls, *args, **kwargs) + + return cast(F, new_meth) + + +class MultiIndex(Index): + """ + A multi-level, or hierarchical, index object for pandas objects. + + Parameters + ---------- + levels : sequence of arrays + The unique labels for each level. + codes : sequence of arrays + Integers for each level designating which label at each location. + sortorder : optional int + Level of sortedness (must be lexicographically sorted by that + level). + names : optional sequence of objects + Names for each of the index levels. (name is accepted for compat). + copy : bool, default False + Copy the meta-data. + verify_integrity : bool, default True + Check that the levels/codes are consistent and valid. + + Attributes + ---------- + names + levels + codes + nlevels + levshape + + Methods + ------- + from_arrays + from_tuples + from_product + from_frame + set_levels + set_codes + to_frame + to_flat_index + sortlevel + droplevel + swaplevel + reorder_levels + remove_unused_levels + get_locs + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Create a MultiIndex from the cartesian product + of iterables. + MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + Index : The base pandas Index type. + + Notes + ----- + See the `user guide + `__ + for more. + + Examples + -------- + A new ``MultiIndex`` is typically constructed using one of the helper + methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` + and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): + + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + + See further examples for how to construct a MultiIndex in the doc strings + of the mentioned helper methods. + """ + + _hidden_attrs = Index._hidden_attrs | frozenset() + + # initialize to zero-length tuples to make everything work + _typ = "multiindex" + _names: list[Hashable | None] = [] + _levels = FrozenList() + _codes = FrozenList() + _comparables = ["names"] + + sortorder: int | None + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + levels=None, + codes=None, + sortorder=None, + names=None, + dtype=None, + copy=False, + name=None, + verify_integrity: bool = True, + ) -> MultiIndex: + + # compat with Index + if name is not None: + names = name + if levels is None or codes is None: + raise TypeError("Must pass both levels and codes") + if len(levels) != len(codes): + raise ValueError("Length of levels and codes must be the same.") + if len(levels) == 0: + raise ValueError("Must pass non-zero number of levels/codes") + + result = object.__new__(cls) + result._cache = {} + + # we've already validated levels and codes, so shortcut here + result._set_levels(levels, copy=copy, validate=False) + result._set_codes(codes, copy=copy, validate=False) + + result._names = [None] * len(levels) + if names is not None: + # handles name validation + result._set_names(names) + + if sortorder is not None: + result.sortorder = int(sortorder) + else: + result.sortorder = sortorder + + if verify_integrity: + new_codes = result._verify_integrity() + result._codes = new_codes + + result._reset_identity() + + return result + + def _validate_codes(self, level: list, code: list): + """ + Reassign code values as -1 if their corresponding levels are NaN. + + Parameters + ---------- + code : list + Code to reassign. + level : list + Level to check for missing values (NaN, NaT, None). + + Returns + ------- + new code where code value = -1 if it corresponds + to a level with missing values (NaN, NaT, None). + """ + null_mask = isna(level) + if np.any(null_mask): + # error: Incompatible types in assignment + # (expression has type "ndarray[Any, dtype[Any]]", + # variable has type "List[Any]") + code = np.where(null_mask[code], -1, code) # type: ignore[assignment] + return code + + def _verify_integrity(self, codes: list | None = None, levels: list | None = None): + """ + Parameters + ---------- + codes : optional list + Codes to check for validity. Defaults to current codes. + levels : optional list + Levels to check for validity. Defaults to current levels. + + Raises + ------ + ValueError + If length of levels and codes don't match, if the codes for any + level would exceed level bounds, or there are any duplicate levels. + + Returns + ------- + new codes where code value = -1 if it corresponds to a + NaN level. + """ + # NOTE: Currently does not check, among other things, that cached + # nlevels matches nor that sortorder matches actually sortorder. + codes = codes or self.codes + levels = levels or self.levels + + if len(levels) != len(codes): + raise ValueError( + "Length of levels and codes must match. NOTE: " + "this index is in an inconsistent state." + ) + codes_length = len(codes[0]) + for i, (level, level_codes) in enumerate(zip(levels, codes)): + if len(level_codes) != codes_length: + raise ValueError( + f"Unequal code lengths: {[len(code_) for code_ in codes]}" + ) + if len(level_codes) and level_codes.max() >= len(level): + raise ValueError( + f"On level {i}, code max ({level_codes.max()}) >= length of " + f"level ({len(level)}). NOTE: this index is in an " + "inconsistent state" + ) + if len(level_codes) and level_codes.min() < -1: + raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") + if not level.is_unique: + raise ValueError( + f"Level values must be unique: {list(level)} on level {i}" + ) + if self.sortorder is not None: + if self.sortorder > _lexsort_depth(self.codes, self.nlevels): + raise ValueError( + "Value for sortorder must be inferior or equal to actual " + f"lexsort_depth: sortorder {self.sortorder} " + f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" + ) + + codes = [ + self._validate_codes(level, code) for level, code in zip(levels, codes) + ] + new_codes = FrozenList(codes) + return new_codes + + @classmethod + def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex: + """ + Convert arrays to MultiIndex. + + Parameters + ---------- + arrays : list / sequence of array-likes + Each array-like gives one level's value for each data point. + len(arrays) is the number of levels. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + error_msg = "Input must be a list / sequence of array-likes." + if not is_list_like(arrays): + raise TypeError(error_msg) + elif is_iterator(arrays): + arrays = list(arrays) + + # Check if elements of array are list-like + for array in arrays: + if not is_list_like(array): + raise TypeError(error_msg) + + # Check if lengths of all arrays are equal or not, + # raise ValueError, if not + for i in range(1, len(arrays)): + if len(arrays[i]) != len(arrays[i - 1]): + raise ValueError("all arrays must be same length") + + codes, levels = factorize_from_iterables(arrays) + if names is lib.no_default: + names = [getattr(arr, "name", None) for arr in arrays] + + return cls( + levels=levels, + codes=codes, + sortorder=sortorder, + names=names, + verify_integrity=False, + ) + + @classmethod + @names_compat + def from_tuples( + cls, + tuples: Iterable[tuple[Hashable, ...]], + sortorder: int | None = None, + names: Sequence[Hashable] | Hashable | None = None, + ) -> MultiIndex: + """ + Convert list of tuples to MultiIndex. + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> tuples = [(1, 'red'), (1, 'blue'), + ... (2, 'red'), (2, 'blue')] + >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + if not is_list_like(tuples): + raise TypeError("Input must be a list / sequence of tuple-likes.") + elif is_iterator(tuples): + tuples = list(tuples) + tuples = cast(Collection[Tuple[Hashable, ...]], tuples) + + # handling the empty tuple cases + if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): + codes = [np.zeros(len(tuples))] + levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] + return cls( + levels=levels, + codes=codes, + sortorder=sortorder, + names=names, + verify_integrity=False, + ) + + arrays: list[Sequence[Hashable]] + if len(tuples) == 0: + if names is None: + raise TypeError("Cannot infer number of levels from empty list") + # error: Argument 1 to "len" has incompatible type "Hashable"; + # expected "Sized" + arrays = [[]] * len(names) # type: ignore[arg-type] + elif isinstance(tuples, (np.ndarray, Index)): + if isinstance(tuples, Index): + tuples = np.asarray(tuples._values) + + arrays = list(lib.tuples_to_object_array(tuples).T) + elif isinstance(tuples, list): + arrays = list(lib.to_object_array_tuples(tuples).T) + else: + arrs = zip(*tuples) + arrays = cast(List[Sequence[Hashable]], arrs) + + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + @classmethod + def from_product( + cls, + iterables: Sequence[Iterable[Hashable]], + sortorder: int | None = None, + names: Sequence[Hashable] | lib.NoDefault = lib.no_default, + ) -> MultiIndex: + """ + Make a MultiIndex from the cartesian product of multiple iterables. + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + .. versionchanged:: 1.0.0 + + If not explicitly provided, names will be inferred from the + elements of iterables if an element has a name attribute + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = ['green', 'purple'] + >>> pd.MultiIndex.from_product([numbers, colors], + ... names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + names=['number', 'color']) + """ + from pandas.core.reshape.util import cartesian_product + + if not is_list_like(iterables): + raise TypeError("Input must be a list / sequence of iterables.") + elif is_iterator(iterables): + iterables = list(iterables) + + codes, levels = factorize_from_iterables(iterables) + if names is lib.no_default: + names = [getattr(it, "name", None) for it in iterables] + + # codes are all ndarrays, so cartesian_product is lossless + codes = cartesian_product(codes) + return cls(levels, codes, sortorder=sortorder, names=names) + + @classmethod + def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex: + """ + Make a MultiIndex from a DataFrame. + + Parameters + ---------- + df : DataFrame + DataFrame to be converted to MultiIndex. + sortorder : int, optional + Level of sortedness (must be lexicographically sorted by that + level). + names : list-like, optional + If no names are provided, use the column names, or tuple of column + names if the columns is a MultiIndex. If a sequence, overwrite + names with the given sequence. + + Returns + ------- + MultiIndex + The MultiIndex representation of the given DataFrame. + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + + Examples + -------- + >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], + ... ['NJ', 'Temp'], ['NJ', 'Precip']], + ... columns=['a', 'b']) + >>> df + a b + 0 HI Temp + 1 HI Precip + 2 NJ Temp + 3 NJ Precip + + >>> pd.MultiIndex.from_frame(df) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['a', 'b']) + + Using explicit names, instead of the column names + + >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['state', 'observation']) + """ + if not isinstance(df, ABCDataFrame): + raise TypeError("Input must be a DataFrame") + + column_names, columns = zip(*df.items()) + names = column_names if names is None else names + return cls.from_arrays(columns, sortorder=sortorder, names=names) + + # -------------------------------------------------------------------- + + @cache_readonly + def _values(self) -> np.ndarray: + # We override here, since our parent uses _data, which we don't use. + values = [] + + for i in range(self.nlevels): + index = self.levels[i] + codes = self.codes[i] + + vals = index + if is_categorical_dtype(vals.dtype): + vals = cast("CategoricalIndex", vals) + vals = vals._data._internal_get_values() + + is_dti = isinstance(vals, ABCDatetimeIndex) + + if is_dti: + # TODO: this can be removed after Timestamp.freq is removed + # The astype(object) below does not remove the freq from + # the underlying Timestamps so we remove it here to match + # the behavior of self._get_level_values + vals = algos.take_nd(vals, codes, fill_value=index._na_value) + + if isinstance(vals.dtype, ExtensionDtype) or isinstance( + vals, (ABCDatetimeIndex, ABCTimedeltaIndex) + ): + vals = vals.astype(object) + + vals = np.array(vals, copy=False) + if not is_dti: + vals = algos.take_nd(vals, codes, fill_value=index._na_value) + values.append(vals) + + arr = lib.fast_zip(values) + return arr + + @property + def values(self) -> np.ndarray: + return self._values + + @property + def array(self): + """ + Raises a ValueError for `MultiIndex` because there's no single + array backing a MultiIndex. + + Raises + ------ + ValueError + """ + raise ValueError( + "MultiIndex has no single backing array. Use " + "'MultiIndex.to_numpy()' to get a NumPy array of tuples." + ) + + @cache_readonly + def dtypes(self) -> Series: + """ + Return the dtypes as a Series for the underlying MultiIndex. + """ + from pandas import Series + + names = com.fill_missing_names([level.name for level in self.levels]) + return Series([level.dtype for level in self.levels], index=Index(names)) + + def __len__(self) -> int: + return len(self.codes[0]) + + # -------------------------------------------------------------------- + # Levels Methods + + @cache_readonly + def levels(self) -> FrozenList: + # Use cache_readonly to ensure that self.get_locs doesn't repeatedly + # create new IndexEngine + # https://github.com/pandas-dev/pandas/issues/31648 + result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] + for level in result: + # disallow midx.levels[0].name = "foo" + level._no_setting_name = True + return FrozenList(result) + + def _set_levels( + self, + levels, + *, + level=None, + copy: bool = False, + validate: bool = True, + verify_integrity: bool = False, + ) -> None: + # This is NOT part of the levels property because it should be + # externally not allowed to set levels. User beware if you change + # _levels directly + if validate: + if len(levels) == 0: + raise ValueError("Must set non-zero number of levels.") + if level is None and len(levels) != self.nlevels: + raise ValueError("Length of levels must match number of levels.") + if level is not None and len(levels) != len(level): + raise ValueError("Length of levels must match length of level.") + + if level is None: + new_levels = FrozenList( + ensure_index(lev, copy=copy)._view() for lev in levels + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_levels_list = list(self._levels) + for lev_num, lev in zip(level_numbers, levels): + new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() + new_levels = FrozenList(new_levels_list) + + if verify_integrity: + new_codes = self._verify_integrity(levels=new_levels) + self._codes = new_codes + + names = self.names + self._levels = new_levels + if any(names): + self._set_names(names) + + self._reset_cache() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "levels"]) + def set_levels( + self, levels, level=None, inplace=None, verify_integrity: bool = True + ): + """ + Set new levels on MultiIndex. Defaults to returning new index. + + Parameters + ---------- + levels : sequence or list of sequence + New level(s) to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + + .. deprecated:: 1.2.0 + verify_integrity : bool, default True + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) or None + The same type as the caller or None if ``inplace=True``. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples( + ... [ + ... (1, "one"), + ... (1, "two"), + ... (2, "one"), + ... (2, "two"), + ... (3, "one"), + ... (3, "two") + ... ], + ... names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two'), + (3, 'one'), + (3, 'two')], + names=['foo', 'bar']) + + >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b', 'c'], level=0) + MultiIndex([('a', 'one'), + ('a', 'two'), + ('b', 'one'), + ('b', 'two'), + ('c', 'one'), + ('c', 'two')], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b'], level='bar') + MultiIndex([(1, 'a'), + (1, 'b'), + (2, 'a'), + (2, 'b'), + (3, 'a'), + (3, 'b')], + names=['foo', 'bar']) + + If any of the levels passed to ``set_levels()`` exceeds the + existing length, all of the values from that argument will + be stored in the MultiIndex levels, though the values will + be truncated in the MultiIndex output. + + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels + FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) + """ + if inplace is not None: + warnings.warn( + "inplace is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + if is_list_like(levels) and not isinstance(levels, Index): + levels = list(levels) + + level, levels = _require_listlike(level, levels, "Levels") + + if inplace: + idx = self + else: + idx = self._view() + idx._reset_identity() + idx._set_levels( + levels, level=level, validate=True, verify_integrity=verify_integrity + ) + if not inplace: + return idx + + @property + def nlevels(self) -> int: + """ + Integer number of levels in this MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.nlevels + 3 + """ + return len(self._levels) + + @property + def levshape(self) -> Shape: + """ + A tuple with the length of each level. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.levshape + (1, 1, 1) + """ + return tuple(len(x) for x in self.levels) + + # -------------------------------------------------------------------- + # Codes Methods + + @property + def codes(self): + return self._codes + + def _set_codes( + self, + codes, + *, + level=None, + copy: bool = False, + validate: bool = True, + verify_integrity: bool = False, + ) -> None: + if validate: + if level is None and len(codes) != self.nlevels: + raise ValueError("Length of codes must match number of levels") + if level is not None and len(codes) != len(level): + raise ValueError("Length of codes must match length of levels.") + + if level is None: + new_codes = FrozenList( + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() + for lev, level_codes in zip(self._levels, codes) + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_codes_list = list(self._codes) + for lev_num, level_codes in zip(level_numbers, codes): + lev = self.levels[lev_num] + new_codes_list[lev_num] = _coerce_indexer_frozen( + level_codes, lev, copy=copy + ) + new_codes = FrozenList(new_codes_list) + + if verify_integrity: + new_codes = self._verify_integrity(codes=new_codes) + + self._codes = new_codes + + self._reset_cache() + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "codes"]) + def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True): + """ + Set new codes on MultiIndex. Defaults to returning new index. + + Parameters + ---------- + codes : sequence or list of sequence + New codes to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + + .. deprecated:: 1.2.0 + verify_integrity : bool, default True + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) or None + The same type as the caller or None if ``inplace=True``. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples( + ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] + ... ) + >>> idx + MultiIndex([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], + names=['foo', 'bar']) + + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([1, 0, 1, 0], level=0) + MultiIndex([(2, 'one'), + (1, 'two'), + (2, 'one'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([0, 0, 1, 1], level='bar') + MultiIndex([(1, 'one'), + (1, 'one'), + (2, 'two'), + (2, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + """ + if inplace is not None: + warnings.warn( + "inplace is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + inplace = False + + level, codes = _require_listlike(level, codes, "Codes") + + if inplace: + idx = self + else: + idx = self._view() + idx._reset_identity() + idx._set_codes(codes, level=level, verify_integrity=verify_integrity) + if not inplace: + return idx + + # -------------------------------------------------------------------- + # Index Internals + + @cache_readonly + def _engine(self): + # Calculate the number of bits needed to represent labels in each + # level, as log2 of their sizes (including -1 for NaN): + sizes = np.ceil(np.log2([len(level) + 1 for level in self.levels])) + + # Sum bit counts, starting from the _right_.... + lev_bits = np.cumsum(sizes[::-1])[::-1] + + # ... in order to obtain offsets such that sorting the combination of + # shifted codes (one for each level, resulting in a unique integer) is + # equivalent to sorting lexicographically the codes themselves. Notice + # that each level needs to be shifted by the number of bits needed to + # represent the _previous_ ones: + offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") + + # Check the total number of bits needed for our representation: + if lev_bits[0] > 64: + # The levels would overflow a 64 bit uint - use Python integers: + return MultiIndexPyIntEngine(self.levels, self.codes, offsets) + return MultiIndexUIntEngine(self.levels, self.codes, offsets) + + # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return + # type "Type[MultiIndex]" in supertype "Index" + @property + def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override] + return type(self).from_tuples + + @doc(Index._shallow_copy) + def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex: + names = name if name is not lib.no_default else self.names + + return type(self).from_tuples(values, sortorder=None, names=names) + + def _view(self) -> MultiIndex: + result = type(self)( + levels=self.levels, + codes=self.codes, + sortorder=self.sortorder, + names=self.names, + verify_integrity=False, + ) + result._cache = self._cache.copy() + result._cache.pop("levels", None) # GH32669 + return result + + # -------------------------------------------------------------------- + + def copy( + self, + names=None, + dtype=None, + levels=None, + codes=None, + deep=False, + name=None, + ): + """ + Make a copy of this object. Names, dtype, levels and codes can be + passed and will be set on new copy. + + Parameters + ---------- + names : sequence, optional + dtype : numpy dtype or pandas type, optional + + .. deprecated:: 1.2.0 + levels : sequence, optional + + .. deprecated:: 1.2.0 + codes : sequence, optional + + .. deprecated:: 1.2.0 + deep : bool, default False + name : Label + Kept for compatibility with 1-dimensional Index. Should not be used. + + Returns + ------- + MultiIndex + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + This could be potentially expensive on large MultiIndex objects. + """ + names = self._validate_names(name=name, names=names, deep=deep) + keep_id = not deep + if levels is not None: + warnings.warn( + "parameter levels is deprecated and will be removed in a future " + "version. Use the set_levels method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + keep_id = False + if codes is not None: + warnings.warn( + "parameter codes is deprecated and will be removed in a future " + "version. Use the set_codes method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + keep_id = False + + if deep: + from copy import deepcopy + + if levels is None: + levels = deepcopy(self.levels) + if codes is None: + codes = deepcopy(self.codes) + + levels = levels if levels is not None else self.levels + codes = codes if codes is not None else self.codes + + new_index = type(self)( + levels=levels, + codes=codes, + sortorder=self.sortorder, + names=names, + verify_integrity=False, + ) + new_index._cache = self._cache.copy() + new_index._cache.pop("levels", None) # GH32669 + if keep_id: + new_index._id = self._id + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + def __array__(self, dtype=None) -> np.ndarray: + """the array interface, return my values""" + return self.values + + def view(self, cls=None): + """this is defined as a copy with the same identity""" + result = self.copy() + result._id = self._id + return result + + @doc(Index.__contains__) + def __contains__(self, key: Any) -> bool: + hash(key) + try: + self.get_loc(key) + return True + except (LookupError, TypeError, ValueError): + return False + + @cache_readonly + def dtype(self) -> np.dtype: + return np.dtype("O") + + def _is_memory_usage_qualified(self) -> bool: + """return a boolean if we need a qualified .info display""" + + def f(level): + return "mixed" in level or "string" in level or "unicode" in level + + return any(f(level) for level in self._inferred_type_levels) + + @doc(Index.memory_usage) + def memory_usage(self, deep: bool = False) -> int: + # we are overwriting our base class to avoid + # computing .values here which could materialize + # a tuple representation unnecessarily + return self._nbytes(deep) + + @cache_readonly + def nbytes(self) -> int: + """return the number of bytes in the underlying data""" + return self._nbytes(False) + + def _nbytes(self, deep: bool = False) -> int: + """ + return the number of bytes in the underlying data + deeply introspect the level data if deep=True + + include the engine hashtable + + *this is in internal routine* + + """ + # for implementations with no useful getsizeof (PyPy) + objsize = 24 + + level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) + label_nbytes = sum(i.nbytes for i in self.codes) + names_nbytes = sum(getsizeof(i, objsize) for i in self.names) + result = level_nbytes + label_nbytes + names_nbytes + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + # -------------------------------------------------------------------- + # Rendering Methods + + def _formatter_func(self, tup): + """ + Formats each item in tup according to its level's formatter function. + """ + formatter_funcs = [level._formatter_func for level in self.levels] + return tuple(func(val) for func, val in zip(formatter_funcs, tup)) + + def _format_native_types( + self, *, na_rep="nan", **kwargs + ) -> npt.NDArray[np.object_]: + new_levels = [] + new_codes = [] + + # go through the levels and format them + for level, level_codes in zip(self.levels, self.codes): + level_strs = level._format_native_types(na_rep=na_rep, **kwargs) + # add nan values, if there are any + mask = level_codes == -1 + if mask.any(): + nan_index = len(level_strs) + # numpy 1.21 deprecated implicit string casting + level_strs = level_strs.astype(str) + level_strs = np.append(level_strs, na_rep) + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable + level_codes[mask] = nan_index + new_levels.append(level_strs) + new_codes.append(level_codes) + + if len(new_levels) == 1: + # a single-level multi-index + return Index(new_levels[0].take(new_codes[0]))._format_native_types() + else: + # reconstruct the multi-index + mi = MultiIndex( + levels=new_levels, + codes=new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + return mi._values + + def format( + self, + name: bool | None = None, + formatter: Callable | None = None, + na_rep: str | None = None, + names: bool = False, + space: int = 2, + sparsify=None, + adjoin: bool = True, + ) -> list: + if name is not None: + names = name + + if len(self) == 0: + return [] + + stringified_levels = [] + for lev, level_codes in zip(self.levels, self.codes): + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) + + if len(lev) > 0: + + formatted = lev.take(level_codes).format(formatter=formatter) + + # we have some NA + mask = level_codes == -1 + if mask.any(): + formatted = np.array(formatted, dtype=object) + formatted[mask] = na + formatted = formatted.tolist() + + else: + # weird all NA case + formatted = [ + pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) + for x in algos.take_nd(lev._values, level_codes) + ] + stringified_levels.append(formatted) + + result_levels = [] + for lev, lev_name in zip(stringified_levels, self.names): + level = [] + + if names: + level.append( + pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) + if lev_name is not None + else "" + ) + + level.extend(np.array(lev, dtype=object)) + result_levels.append(level) + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + if sparsify: + sentinel: Literal[""] | bool | lib.NoDefault = "" + # GH3547 use value of sparsify as sentinel if it's "Falsey" + assert isinstance(sparsify, bool) or sparsify is lib.no_default + if sparsify in [False, lib.no_default]: + sentinel = sparsify + # little bit of a kludge job for #1217 + result_levels = sparsify_labels( + result_levels, start=int(names), sentinel=sentinel + ) + + if adjoin: + from pandas.io.formats.format import get_adjustment + + adj = get_adjustment() + return adj.adjoin(space, *result_levels).split("\n") + else: + return result_levels + + # -------------------------------------------------------------------- + # Names Methods + + def _get_names(self) -> FrozenList: + return FrozenList(self._names) + + def _set_names(self, names, *, level=None, validate: bool = True): + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + validate : bool, default True + validate that the names match level lengths + + Raises + ------ + TypeError if each name is not hashable. + + Notes + ----- + sets names on levels. WARNING: mutates! + + Note that you generally want to set this *after* changing levels, so + that it only acts on copies + """ + # GH 15110 + # Don't allow a single string for names in a MultiIndex + if names is not None and not is_list_like(names): + raise ValueError("Names should be list-like for a MultiIndex") + names = list(names) + + if validate: + if level is not None and len(names) != len(level): + raise ValueError("Length of names must match length of level.") + if level is None and len(names) != self.nlevels: + raise ValueError( + "Length of names must match number of levels in MultiIndex." + ) + + if level is None: + level = range(self.nlevels) + else: + level = [self._get_level_number(lev) for lev in level] + + # set the name + for lev, name in zip(level, names): + if name is not None: + # GH 20527 + # All items in 'names' need to be hashable: + if not is_hashable(name): + raise TypeError( + f"{type(self).__name__}.name must be a hashable type" + ) + self._names[lev] = name + + # If .levels has been accessed, the names in our cache will be stale. + self._reset_cache() + + names = property( + fset=_set_names, + fget=_get_names, + doc=""" + Names of levels in MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + >>> mi.names + FrozenList(['x', 'y', 'z']) + """, + ) + + # -------------------------------------------------------------------- + + @doc(Index._get_grouper_for_level) + def _get_grouper_for_level( + self, + mapper, + *, + level=None, + dropna: bool = True, + ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: + if mapper is not None: + indexer = self.codes[level] + # Handle group mapping function and return + level_values = self.levels[level].take(indexer) + grouper = level_values.map(mapper) + return grouper, None, None + + values = self.get_level_values(level) + codes, uniques = algos.factorize(values, sort=True, use_na_sentinel=dropna) + assert isinstance(uniques, Index) + + if self.levels[level]._can_hold_na: + grouper = uniques.take(codes, fill_value=True) + else: + grouper = uniques.take(codes) + + return grouper, codes, uniques + + @cache_readonly + def inferred_type(self) -> str: + return "mixed" + + def _get_level_number(self, level) -> int: + count = self.names.count(level) + if (count > 1) and not is_integer(level): + raise ValueError( + f"The name {level} occurs multiple times, use a level number" + ) + try: + level = self.names.index(level) + except ValueError as err: + if not is_integer(level): + raise KeyError(f"Level {level} not found") from err + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"{orig_level} is not a valid level number" + ) from err + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"not {level + 1}" + ) from err + return level + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + """ + Return a boolean if the values are equal or increasing. + """ + if any(-1 in code for code in self.codes): + return False + + if all(level.is_monotonic_increasing for level in self.levels): + # If each level is sorted, we can operate on the codes directly. GH27495 + return libalgos.is_lexsorted( + [x.astype("int64", copy=False) for x in self.codes] + ) + + # reversed() because lexsort() wants the most significant key last. + values = [ + self._get_level_values(i)._values for i in reversed(range(len(self.levels))) + ] + try: + # error: Argument 1 to "lexsort" has incompatible type + # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; + # expected "Union[_SupportsArray[dtype[Any]], + # _NestedSequence[_SupportsArray[dtype[Any]]], bool, + # int, float, complex, str, bytes, _NestedSequence[Union + # [bool, int, float, complex, str, bytes]]]" + sort_order = np.lexsort(values) # type: ignore[arg-type] + return Index(sort_order).is_monotonic_increasing + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self._values).is_monotonic_increasing + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + Return a boolean if the values are equal or decreasing. + """ + # monotonic decreasing if and only if reverse is monotonic increasing + return self[::-1].is_monotonic_increasing + + @cache_readonly + def _inferred_type_levels(self) -> list[str]: + """return a list of the inferred types, one for each level""" + return [i.inferred_type for i in self.levels] + + @doc(Index.duplicated) + def duplicated(self, keep="first") -> npt.NDArray[np.bool_]: + shape = tuple(len(lev) for lev in self.levels) + ids = get_group_index(self.codes, shape, sort=False, xnull=False) + + return duplicated(ids, keep) + + # error: Cannot override final attribute "_duplicated" + # (previously declared in base class "IndexOpsMixin") + _duplicated = duplicated # type: ignore[misc] + + def fillna(self, value=None, downcast=None): + """ + fillna is not implemented for MultiIndex + """ + raise NotImplementedError("isna is not defined for MultiIndex") + + @doc(Index.dropna) + def dropna(self, how: str = "any") -> MultiIndex: + nans = [level_codes == -1 for level_codes in self.codes] + if how == "any": + indexer = np.any(nans, axis=0) + elif how == "all": + indexer = np.all(nans, axis=0) + else: + raise ValueError(f"invalid how option: {how}") + + new_codes = [level_codes[~indexer] for level_codes in self.codes] + return self.set_codes(codes=new_codes) + + def _get_level_values(self, level: int, unique: bool = False) -> Index: + """ + Return vector of label values for requested level, + equal to the length of the index + + **this is an internal method** + + Parameters + ---------- + level : int + unique : bool, default False + if True, drop duplicated values + + Returns + ------- + Index + """ + lev = self.levels[level] + level_codes = self.codes[level] + name = self._names[level] + if unique: + level_codes = algos.unique(level_codes) + filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) + return lev._shallow_copy(filled, name=name) + + def get_level_values(self, level): + """ + Return vector of label values for requested level. + + Length of returned vector is equal to the length of the index. + + Parameters + ---------- + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. + + Returns + ------- + values : Index + Values is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Notes + ----- + If the level contains missing values, the result may be casted to + ``float`` with missing values specified as ``NaN``. This is because + the level is converted to a regular ``Index``. + + Examples + -------- + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') + + If a level contains missing values, the return type of the level + maybe casted to ``float``. + + >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes + level_0 int64 + level_1 int64 + dtype: object + >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0) + Float64Index([1.0, nan, 2.0], dtype='float64') + """ + level = self._get_level_number(level) + values = self._get_level_values(level) + return values + + @doc(Index.unique) + def unique(self, level=None): + + if level is None: + return super().unique() + else: + level = self._get_level_number(level) + return self._get_level_values(level=level, unique=True) + + def to_frame( + self, + index: bool = True, + name=lib.no_default, + allow_duplicates: bool = False, + ) -> DataFrame: + """ + Create a DataFrame with the levels of the MultiIndex as columns. + + Column ordering is determined by the DataFrame constructor with data as + a dict. + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original MultiIndex. + + name : list / sequence of str, optional + The passed names should substitute index level names. + + allow_duplicates : bool, optional default False + Allow duplicate column labels to be created. + + .. versionadded:: 1.5.0 + + Returns + ------- + DataFrame : a DataFrame containing the original MultiIndex data. + + See Also + -------- + DataFrame : Two-dimensional, size-mutable, potentially heterogeneous + tabular data. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) + >>> mi + MultiIndex([('a', 'c'), + ('b', 'd')], + ) + + >>> df = mi.to_frame() + >>> df + 0 1 + a c a c + b d b d + + >>> df = mi.to_frame(index=False) + >>> df + 0 1 + 0 a c + 1 b d + + >>> df = mi.to_frame(name=['x', 'y']) + >>> df + x y + a c a c + b d b d + """ + from pandas import DataFrame + + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Index's name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + if name is not lib.no_default: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence of column names.") + + if len(name) != len(self.levels): + raise ValueError( + "'name' should have same length as number of levels on index." + ) + idx_names = name + else: + idx_names = self._get_level_names() + + if not allow_duplicates and len(set(idx_names)) != len(idx_names): + raise ValueError( + "Cannot create duplicate column labels if allow_duplicates is False" + ) + + # Guarantee resulting column order - PY36+ dict maintains insertion order + result = DataFrame( + {level: self._get_level_values(level) for level in range(len(self.levels))}, + copy=False, + ) + result.columns = idx_names + + if index: + result.index = self + return result + + # error: Return type "Index" of "to_flat_index" incompatible with return type + # "MultiIndex" in supertype "Index" + def to_flat_index(self) -> Index: # type: ignore[override] + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + See Also + -------- + MultiIndex.from_tuples : Convert flat index back to MultiIndex. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self._values, tupleize_cols=False) + + def is_lexsorted(self) -> bool: + warnings.warn( + "MultiIndex.is_lexsorted is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._is_lexsorted() + + def _is_lexsorted(self) -> bool: + """ + Return True if the codes are lexicographically sorted. + + Returns + ------- + bool + + Examples + -------- + In the below examples, the first level of the MultiIndex is sorted because + a>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() + True + + In case there is a tie, the lexicographical sorting looks + at the next level of the MultiIndex. + + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() + False + >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], + ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() + True + >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], + ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() + False + """ + return self._lexsort_depth == self.nlevels + + @property + def lexsort_depth(self) -> int: + warnings.warn( + "MultiIndex.lexsort_depth is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._lexsort_depth + + @cache_readonly + def _lexsort_depth(self) -> int: + """ + Compute and return the lexsort_depth, the number of levels of the + MultiIndex that are sorted lexically + + Returns + ------- + int + """ + if self.sortorder is not None: + return self.sortorder + return _lexsort_depth(self.codes, self.nlevels) + + def _sort_levels_monotonic(self) -> MultiIndex: + """ + This is an *internal* function. + + Create a new MultiIndex from the current to monotonically sorted + items IN the levels. This does not actually make the entire MultiIndex + monotonic, JUST the levels. + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + + >>> mi.sort_values() + MultiIndex([('a', 'aa'), + ('a', 'bb'), + ('b', 'aa'), + ('b', 'bb')], + ) + """ + if self._is_lexsorted() and self.is_monotonic_increasing: + return self + + new_levels = [] + new_codes = [] + + for lev, level_codes in zip(self.levels, self.codes): + + if not lev.is_monotonic_increasing: + try: + # indexer to reorder the levels + indexer = lev.argsort() + except TypeError: + pass + else: + lev = lev.take(indexer) + + # indexer to reorder the level codes + indexer = ensure_platform_int(indexer) + ri = lib.get_reverse_indexer(indexer, len(indexer)) + level_codes = algos.take_nd(ri, level_codes) + + new_levels.append(lev) + new_codes.append(level_codes) + + return MultiIndex( + new_levels, + new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def remove_unused_levels(self) -> MultiIndex: + """ + Create new MultiIndex from current that removes unused levels. + + Unused level(s) means levels that are not expressed in the + labels. The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will + also be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi + MultiIndex([(0, 'a'), + (0, 'b'), + (1, 'a'), + (1, 'b')], + ) + + >>> mi[2:] + MultiIndex([(1, 'a'), + (1, 'b')], + ) + + The 0 from the first level is not represented + and can be removed + + >>> mi2 = mi[2:].remove_unused_levels() + >>> mi2.levels + FrozenList([[1], ['a', 'b']]) + """ + new_levels = [] + new_codes = [] + + changed = False + for lev, level_codes in zip(self.levels, self.codes): + + # Since few levels are typically unused, bincount() is more + # efficient than unique() - however it only accepts positive values + # (and drops order): + uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 + has_na = int(len(uniques) and (uniques[0] == -1)) + + if len(uniques) != len(lev) + has_na: + + if lev.isna().any() and len(uniques) == len(lev): + break + # We have unused levels + changed = True + + # Recalculate uniques, now preserving order. + # Can easily be cythonized by exploiting the already existing + # "uniques" and stop parsing "level_codes" when all items + # are found: + uniques = algos.unique(level_codes) + if has_na: + na_idx = np.where(uniques == -1)[0] + # Just ensure that -1 is in first position: + uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] + + # codes get mapped from uniques to 0:len(uniques) + # -1 (if present) is mapped to last position + code_mapping = np.zeros(len(lev) + has_na) + # ... and reassigned value -1: + code_mapping[uniques] = np.arange(len(uniques)) - has_na + + level_codes = code_mapping[level_codes] + + # new levels are simple + lev = lev.take(uniques[has_na:]) + + new_levels.append(lev) + new_codes.append(level_codes) + + result = self.view() + + if changed: + result._reset_identity() + result._set_levels(new_levels, validate=False) + result._set_codes(new_codes, validate=False) + + return result + + # -------------------------------------------------------------------- + # Pickling Methods + + def __reduce__(self): + """Necessary for making this object picklable""" + d = { + "levels": list(self.levels), + "codes": list(self.codes), + "sortorder": self.sortorder, + "names": list(self.names), + } + return ibase._new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + + def __getitem__(self, key): + if is_scalar(key): + key = com.cast_scalar_indexer(key, warn_float=True) + + retval = [] + for lev, level_codes in zip(self.levels, self.codes): + if level_codes[key] == -1: + retval.append(np.nan) + else: + retval.append(lev[level_codes[key]]) + + return tuple(retval) + else: + # in general cannot be sure whether the result will be sorted + sortorder = None + if com.is_bool_indexer(key): + key = np.asarray(key, dtype=bool) + sortorder = self.sortorder + elif isinstance(key, slice): + if key.step is None or key.step > 0: + sortorder = self.sortorder + elif isinstance(key, Index): + key = np.asarray(key) + + new_codes = [level_codes[key] for level_codes in self.codes] + + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + sortorder = None + if slobj.step is None or slobj.step > 0: + sortorder = self.sortorder + + new_codes = [level_codes[slobj] for level_codes in self.codes] + + return type(self)( + levels=self.levels, + codes=new_codes, + names=self._names, + sortorder=sortorder, + verify_integrity=False, + ) + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take( + self: MultiIndex, + indices, + axis: int = 0, + allow_fill: bool = True, + fill_value=None, + **kwargs, + ) -> MultiIndex: + nv.validate_take((), kwargs) + indices = ensure_platform_int(indices) + + # only fill if we are passing a non-None fill_value + allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) + + na_value = -1 + + taken = [lab.take(indices) for lab in self.codes] + if allow_fill: + mask = indices == -1 + if mask.any(): + masked = [] + for new_label in taken: + label_values = new_label + label_values[mask] = na_value + masked.append(np.asarray(label_values)) + taken = masked + + return MultiIndex( + levels=self.levels, codes=taken, names=self.names, verify_integrity=False + ) + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + if not isinstance(other, (list, tuple)): + other = [other] + + if all( + (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other + ): + arrays = [] + for i in range(self.nlevels): + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] + arrays.append(label.append(appended)) + return MultiIndex.from_arrays(arrays, names=self.names) + + to_concat = (self._values,) + tuple(k._values for k in other) + new_tuples = np.concatenate(to_concat) + + # if all(isinstance(x, MultiIndex) for x in other): + try: + return MultiIndex.from_tuples(new_tuples, names=self.names) + except (TypeError, IndexError): + return Index._with_infer(new_tuples) + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + return self._values.argsort(*args, **kwargs) + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats: int, axis=None) -> MultiIndex: + nv.validate_repeat((), {"axis": axis}) + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "int") + repeats = ensure_platform_int(repeats) # type: ignore[assignment] + return MultiIndex( + levels=self.levels, + codes=[ + level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats) + for level_codes in self.codes + ], + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def drop(self, codes, level=None, errors="raise"): + """ + Make new MultiIndex with passed list of codes deleted + + Parameters + ---------- + codes : array-like + Must be a list of tuples when level is not specified + level : int or level name, default None + errors : str, default 'raise' + + Returns + ------- + dropped : MultiIndex + """ + if level is not None: + return self._drop_from_level(codes, level, errors) + + if not isinstance(codes, (np.ndarray, Index)): + try: + codes = com.index_labels_to_array(codes, dtype=np.dtype("object")) + except ValueError: + pass + + inds = [] + for level_codes in codes: + try: + loc = self.get_loc(level_codes) + # get_loc returns either an integer, a slice, or a boolean + # mask + if isinstance(loc, int): + inds.append(loc) + elif isinstance(loc, slice): + step = loc.step if loc.step is not None else 1 + inds.extend(range(loc.start, loc.stop, step)) + elif com.is_bool_indexer(loc): + if self._lexsort_depth == 0: + warnings.warn( + "dropping on a non-lexsorted multi-index " + "without a level parameter may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + loc = loc.nonzero()[0] + inds.extend(loc) + else: + msg = f"unsupported indexer of type {type(loc)}" + raise AssertionError(msg) + except KeyError: + if errors != "ignore": + raise + + return self.delete(inds) + + def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex: + codes = com.index_labels_to_array(codes) + i = self._get_level_number(level) + index = self.levels[i] + values = index.get_indexer(codes) + # If nan should be dropped it will equal -1 here. We have to check which values + # are not nan and equal -1, this means they are missing in the index + nan_codes = isna(codes) + values[(np.equal(nan_codes, False)) & (values == -1)] = -2 + if index.shape[0] == self.shape[0]: + values[np.equal(nan_codes, True)] = -2 + + not_found = codes[values == -2] + if len(not_found) != 0 and errors != "ignore": + raise KeyError(f"labels {not_found} not found in level") + mask = ~algos.isin(self.codes[i], values) + + return self[mask] + + def swaplevel(self, i=-2, j=-1) -> MultiIndex: + """ + Swap level i with level j. + + Calling this method does not change the ordering of the values. + + Parameters + ---------- + i : int, str, default -2 + First level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + j : int, str, default -1 + Second level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + + Returns + ------- + MultiIndex + A new MultiIndex. + + See Also + -------- + Series.swaplevel : Swap levels i and j in a MultiIndex. + DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a + particular axis. + + Examples + -------- + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + >>> mi.swaplevel(0, 1) + MultiIndex([('bb', 'a'), + ('aa', 'a'), + ('bb', 'b'), + ('aa', 'b')], + ) + """ + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + i = self._get_level_number(i) + j = self._get_level_number(j) + + new_levels[i], new_levels[j] = new_levels[j], new_levels[i] + new_codes[i], new_codes[j] = new_codes[j], new_codes[i] + new_names[i], new_names[j] = new_names[j], new_names[i] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def reorder_levels(self, order) -> MultiIndex: + """ + Rearrange levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) + >>> mi + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.reorder_levels(order=[1, 0]) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) + + >>> mi.reorder_levels(order=['y', 'x']) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) + """ + order = [self._get_level_number(i) for i in order] + if len(order) != self.nlevels: + raise AssertionError( + f"Length of order must be same as number of levels ({self.nlevels}), " + f"got {len(order)}" + ) + new_levels = [self.levels[i] for i in order] + new_codes = [self.codes[i] for i in order] + new_names = [self.names[i] for i in order] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def _get_codes_for_sorting(self) -> list[Categorical]: + """ + we are categorizing our codes by using the + available categories (all, not just observed) + excluding any missing ones (-1); this is in preparation + for sorting, where we need to disambiguate that -1 is not + a valid valid + """ + + def cats(level_codes): + return np.arange( + np.array(level_codes).max() + 1 if len(level_codes) else 0, + dtype=level_codes.dtype, + ) + + return [ + Categorical.from_codes(level_codes, cats(level_codes), ordered=True) + for level_codes in self.codes + ] + + def sortlevel( + self, level=0, ascending: bool = True, sort_remaining: bool = True + ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: + """ + Sort MultiIndex at the requested level. + + The result will respect the original ordering of the associated + factor at that level. + + Parameters + ---------- + level : list-like, int or str, default 0 + If a string is given, must be a name of the level. + If list-like must be names or ints of levels. + ascending : bool, default True + False to sort in descending order. + Can also be a list to specify a directed ordering. + sort_remaining : sort by the remaining levels after level + + Returns + ------- + sorted_index : pd.MultiIndex + Resulting index. + indexer : np.ndarray[np.intp] + Indices of output values in original index. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) + >>> mi + MultiIndex([(0, 2), + (0, 1)], + ) + + >>> mi.sortlevel() + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(sort_remaining=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) + + >>> mi.sortlevel(1) + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(1, ascending=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) + """ + if isinstance(level, (str, int)): + level = [level] + level = [self._get_level_number(lev) for lev in level] + sortorder = None + + # we have a directed ordering via ascending + if isinstance(ascending, list): + if not len(level) == len(ascending): + raise ValueError("level must have same length as ascending") + + indexer = lexsort_indexer( + [self.codes[lev] for lev in level], orders=ascending + ) + + # level ordering + else: + + codes = list(self.codes) + shape = list(self.levshape) + + # partition codes and shape + primary = tuple(codes[lev] for lev in level) + primshp = tuple(shape[lev] for lev in level) + + # Reverse sorted to retain the order of + # smaller indices that needs to be removed + for lev in sorted(level, reverse=True): + codes.pop(lev) + shape.pop(lev) + + if sort_remaining: + primary += primary + tuple(codes) + primshp += primshp + tuple(shape) + else: + sortorder = level[0] + + indexer = indexer_from_factorized(primary, primshp, compress=False) + + if not ascending: + indexer = indexer[::-1] + + indexer = ensure_platform_int(indexer) + new_codes = [level_codes.take(indexer) for level_codes in self.codes] + + new_index = MultiIndex( + codes=new_codes, + levels=self.levels, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + return new_index, indexer + + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + if not isinstance(target, MultiIndex): + if indexer is None: + target = self + elif (indexer >= 0).all(): + target = self.take(indexer) + else: + try: + target = MultiIndex.from_tuples(target) + except TypeError: + # not all tuples, see test_constructor_dict_multiindex_reindex_flat + return target + + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: + if ( + preserve_names + and target.nlevels == self.nlevels + and target.names != self.names + ): + target = target.copy(deep=False) + target.names = self.names + return target + + # -------------------------------------------------------------------- + # Indexing Methods + + def _check_indexing_error(self, key) -> None: + if not is_hashable(key) or is_iterator(key): + # We allow tuples if they are hashable, whereas other Index + # subclasses require scalar. + # We have to explicitly exclude generators, as these are hashable. + raise InvalidIndexError(key) + + @cache_readonly + def _should_fallback_to_positional(self) -> bool: + """ + Should integer key(s) be treated as positional? + """ + # GH#33355 + return self.levels[0]._should_fallback_to_positional + + def _get_values_for_loc(self, series: Series, loc, key): + """ + Do a positional lookup on the given Series, returning either a scalar + or a Series. + + Assumes that `series.index is self` + """ + new_values = series._values[loc] + if is_scalar(loc): + return new_values + + if len(new_values) == 1 and not self.nlevels > 1: + # If more than one level left, we can not return a scalar + return new_values[0] + + new_index = self[loc] + new_index = maybe_droplevels(new_index, key) + new_ser = series._constructor(new_values, index=new_index, name=series.name) + return new_ser.__finalize__(series) + + def _get_indexer_strict( + self, key, axis_name: str + ) -> tuple[Index, npt.NDArray[np.intp]]: + + keyarr = key + if not isinstance(keyarr, Index): + keyarr = com.asarray_tuplesafe(keyarr) + + if len(keyarr) and not isinstance(keyarr[0], tuple): + indexer = self._get_indexer_level_0(keyarr) + + self._raise_if_missing(key, indexer, axis_name) + return self[indexer], indexer + + return super()._get_indexer_strict(key, axis_name) + + def _raise_if_missing(self, key, indexer, axis_name: str) -> None: + keyarr = key + if not isinstance(key, Index): + keyarr = com.asarray_tuplesafe(key) + + if len(keyarr) and not isinstance(keyarr[0], tuple): + # i.e. same condition for special case in MultiIndex._get_indexer_strict + + mask = indexer == -1 + if mask.any(): + check = self.levels[0].get_indexer(keyarr) + cmask = check == -1 + if cmask.any(): + raise KeyError(f"{keyarr[cmask]} not in index") + # We get here when levels still contain values which are not + # actually in Index anymore + raise KeyError(f"{keyarr} not in index") + else: + return super()._raise_if_missing(key, indexer, axis_name) + + def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: + """ + Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. + """ + lev = self.levels[0] + codes = self._codes[0] + cat = Categorical.from_codes(codes=codes, categories=lev) + ci = Index(cat) + return ci.get_indexer_for(target) + + def get_slice_bound( + self, + label: Hashable | Sequence[Hashable], + side: Literal["left", "right"], + kind=lib.no_default, + ) -> int: + """ + For an ordered MultiIndex, compute slice bound + that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if `side=='right') position + of given label. + + Parameters + ---------- + label : object or tuple of objects + side : {'left', 'right'} + kind : {'loc', 'getitem', None} + + .. deprecated:: 1.4.0 + + Returns + ------- + int + Index of label. + + Notes + ----- + This method only works if level 0 index of the MultiIndex is lexsorted. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) + + Get the locations from the leftmost 'b' in the first level + until the end of the multiindex: + + >>> mi.get_slice_bound('b', side="left") + 1 + + Like above, but if you get the locations from the rightmost + 'b' in the first level and 'f' in the second level: + + >>> mi.get_slice_bound(('b','f'), side="right") + 3 + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + self._deprecated_arg(kind, "kind", "get_slice_bound") + + if not isinstance(label, tuple): + label = (label,) + return self._partial_tup_index(label, side=side) + + def slice_locs( + self, start=None, end=None, step=None, kind=lib.no_default + ) -> tuple[int, int]: + """ + For an ordered MultiIndex, compute the slice locations for input + labels. + + The input labels can be tuples representing partial levels, e.g. for a + MultiIndex with 3 levels, you can pass a single value (corresponding to + the first level), or a 1-, 2-, or 3-tuple. + + Parameters + ---------- + start : label or tuple, default None + If None, defaults to the beginning + end : label or tuple + If None, defaults to the end + step : int or None + Slice step + kind : string, optional, defaults None + + .. deprecated:: 1.4.0 + + Returns + ------- + (start, end) : (int, int) + + Notes + ----- + This method only works if the MultiIndex is properly lexsorted. So, + if only the first 2 levels of a 3-level MultiIndex are lexsorted, + you can only pass two levels to ``.slice_locs``. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], + ... names=['A', 'B']) + + Get the slice locations from the beginning of 'b' in the first level + until the end of the multiindex: + + >>> mi.slice_locs(start='b') + (1, 4) + + Like above, but stop at the end of 'b' in the first level and 'f' in + the second level: + + >>> mi.slice_locs(start='b', end=('b', 'f')) + (1, 3) + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + self._deprecated_arg(kind, "kind", "slice_locs") + # This function adds nothing to its parent implementation (the magic + # happens in get_slice_bound method), but it adds meaningful doc. + return super().slice_locs(start, end, step) + + def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): + if len(tup) > self._lexsort_depth: + raise UnsortedIndexError( + f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " + f"({self._lexsort_depth})" + ) + + n = len(tup) + start, end = 0, len(self) + zipped = zip(tup, self.levels, self.codes) + for k, (lab, lev, level_codes) in enumerate(zipped): + section = level_codes[start:end] + + if lab not in lev and not isna(lab): + # short circuit + try: + loc = algos.searchsorted(lev, lab, side=side) + except TypeError as err: + # non-comparable e.g. test_slice_locs_with_type_mismatch + raise TypeError(f"Level type mismatch: {lab}") from err + if not is_integer(loc): + # non-comparable level, e.g. test_groupby_example + raise TypeError(f"Level type mismatch: {lab}") + if side == "right" and loc >= 0: + loc -= 1 + return start + algos.searchsorted(section, loc, side=side) + + idx = self._get_loc_single_level_index(lev, lab) + if isinstance(idx, slice) and k < n - 1: + # Get start and end value from slice, necessary when a non-integer + # interval is given as input GH#37707 + start = idx.start + end = idx.stop + elif k < n - 1: + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, dtype[signedinteger[Any]]] + end = start + algos.searchsorted( # type: ignore[assignment] + section, idx, side="right" + ) + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, dtype[signedinteger[Any]]] + start = start + algos.searchsorted( # type: ignore[assignment] + section, idx, side="left" + ) + elif isinstance(idx, slice): + idx = idx.start + return start + algos.searchsorted(section, idx, side=side) + else: + return start + algos.searchsorted(section, idx, side=side) + + def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: + """ + If key is NA value, location of index unify as -1. + + Parameters + ---------- + level_index: Index + key : label + + Returns + ------- + loc : int + If key is NA value, loc is -1 + Else, location of key in index. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + """ + if is_scalar(key) and isna(key): + return -1 + else: + return level_index.get_loc(key) + + def get_loc(self, key, method=None): + """ + Get location for a label or a tuple of labels. + + The location is returned as an integer/slice or boolean + mask. + + Parameters + ---------- + key : label or tuple of labels (one for each level) + method : None + + Returns + ------- + loc : int, slice object or boolean mask + If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Notes + ----- + The key cannot be a slice, list of same-level labels, a boolean mask, + or a sequence of such. If you want to use those, use + :meth:`MultiIndex.get_locs` instead. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_loc('b') + slice(1, 3, None) + + >>> mi.get_loc(('b', 'e')) + 1 + """ + if method is not None: + raise NotImplementedError( + "only the default get_loc method is " + "currently supported for MultiIndex" + ) + + self._check_indexing_error(key) + + def _maybe_to_slice(loc): + """convert integer indexer to boolean mask or slice if possible""" + if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: + return loc + + loc = lib.maybe_indices_to_slice(loc, len(self)) + if isinstance(loc, slice): + return loc + + mask = np.empty(len(self), dtype="bool") + mask.fill(False) + mask[loc] = True + return mask + + if not isinstance(key, tuple): + loc = self._get_level_indexer(key, level=0) + return _maybe_to_slice(loc) + + keylen = len(key) + if self.nlevels < keylen: + raise KeyError( + f"Key length ({keylen}) exceeds index depth ({self.nlevels})" + ) + + if keylen == self.nlevels and self.is_unique: + try: + return self._engine.get_loc(key) + except TypeError: + # e.g. test_partial_slicing_with_multiindex partial string slicing + loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + return loc + + # -- partial selection or non-unique index + # break the key into 2 parts based on the lexsort_depth of the index; + # the first part returns a continuous slice of the index; the 2nd part + # needs linear search within the slice + i = self._lexsort_depth + lead_key, follow_key = key[:i], key[i:] + + if not lead_key: + start = 0 + stop = len(self) + else: + try: + start, stop = self.slice_locs(lead_key, lead_key) + except TypeError as err: + # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col") + # when self has 5 integer levels + raise KeyError(key) from err + + if start == stop: + raise KeyError(key) + + if not follow_key: + return slice(start, stop) + + warnings.warn( + "indexing past lexsort depth may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + loc = np.arange(start, stop, dtype=np.intp) + + for i, k in enumerate(follow_key, len(lead_key)): + mask = self.codes[i][loc] == self._get_loc_single_level_index( + self.levels[i], k + ) + if not mask.all(): + loc = loc[mask] + if not len(loc): + raise KeyError(key) + + return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) + + def get_loc_level(self, key, level=0, drop_level: bool = True): + """ + Get location and sliced index for requested label(s)/level(s). + + Parameters + ---------- + key : label or sequence of labels + level : int/level name or list thereof, optional + drop_level : bool, default True + If ``False``, the resulting index will not drop any level. + + Returns + ------- + loc : A 2-tuple where the elements are: + Element 0: int, slice object or boolean array + Element 1: The resulting sliced multiindex/index. If the key + contains all levels, this will be ``None``. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], + ... names=['A', 'B']) + + >>> mi.get_loc_level('b') + (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) + + >>> mi.get_loc_level('e', level='B') + (array([False, True, False]), Index(['b'], dtype='object', name='A')) + + >>> mi.get_loc_level(['b', 'e']) + (1, None) + """ + if not isinstance(level, (list, tuple)): + level = self._get_level_number(level) + else: + level = [self._get_level_number(lev) for lev in level] + + loc, mi = self._get_loc_level(key, level=level) + if not drop_level: + if lib.is_integer(loc): + mi = self[loc : loc + 1] + else: + mi = self[loc] + return loc, mi + + def _get_loc_level(self, key, level: int | list[int] = 0): + """ + get_loc_level but with `level` known to be positional, not name-based. + """ + + # different name to distinguish from maybe_droplevels + def maybe_mi_droplevels(indexer, levels): + """ + If level does not exist or all levels were dropped, the exception + has to be handled outside. + """ + new_index = self[indexer] + + for i in sorted(levels, reverse=True): + new_index = new_index._drop_level_numbers([i]) + + return new_index + + if isinstance(level, (tuple, list)): + if len(key) != len(level): + raise AssertionError( + "Key for location must have same length as number of levels" + ) + result = None + for lev, k in zip(level, key): + loc, new_index = self._get_loc_level(k, level=lev) + if isinstance(loc, slice): + mask = np.zeros(len(self), dtype=bool) + mask[loc] = True + loc = mask + result = loc if result is None else result & loc + + try: + # FIXME: we should be only dropping levels on which we are + # scalar-indexing + mi = maybe_mi_droplevels(result, level) + except ValueError: + # droplevel failed because we tried to drop all levels, + # i.e. len(level) == self.nlevels + mi = self[result] + + return result, mi + + # kludge for #1796 + if isinstance(key, list): + key = tuple(key) + + if isinstance(key, tuple) and level == 0: + + try: + # Check if this tuple is a single key in our first level + if key in self.levels[0]: + indexer = self._get_level_indexer(key, level=level) + new_index = maybe_mi_droplevels(indexer, [0]) + return indexer, new_index + except (TypeError, InvalidIndexError): + pass + + if not any(isinstance(k, slice) for k in key): + + if len(key) == self.nlevels and self.is_unique: + # Complete key in unique index -> standard get_loc + try: + return (self._engine.get_loc(key), None) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # e.g. partial string indexing + # test_partial_string_timestamp_multiindex + pass + + # partial selection + indexer = self.get_loc(key) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + if len(ilevels) == self.nlevels: + if is_integer(indexer): + # we are dropping all levels + return indexer, None + + # TODO: in some cases we still need to drop some levels, + # e.g. test_multiindex_perf_warn + # test_partial_string_timestamp_multiindex + ilevels = [ + i + for i in range(len(key)) + if ( + not isinstance(key[i], str) + or not self.levels[i]._supports_partial_string_indexing + ) + and key[i] != slice(None, None) + ] + if len(ilevels) == self.nlevels: + # TODO: why? + ilevels = [] + return indexer, maybe_mi_droplevels(indexer, ilevels) + + else: + indexer = None + for i, k in enumerate(key): + if not isinstance(k, slice): + loc_level = self._get_level_indexer(k, level=i) + if isinstance(loc_level, slice): + if com.is_null_slice(loc_level) or com.is_full_slice( + loc_level, len(self) + ): + # everything + continue + else: + # e.g. test_xs_IndexSlice_argument_not_implemented + k_index = np.zeros(len(self), dtype=bool) + k_index[loc_level] = True + + else: + k_index = loc_level + + elif com.is_null_slice(k): + # taking everything, does not affect `indexer` below + continue + + else: + # FIXME: this message can be inaccurate, e.g. + # test_series_varied_multiindex_alignment + raise TypeError(f"Expected label or tuple of labels, got {key}") + + if indexer is None: + indexer = k_index + else: + indexer &= k_index + if indexer is None: + indexer = slice(None, None) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + return indexer, maybe_mi_droplevels(indexer, ilevels) + else: + indexer = self._get_level_indexer(key, level=level) + if ( + isinstance(key, str) + and self.levels[level]._supports_partial_string_indexing + ): + # check to see if we did an exact lookup vs sliced + check = self.levels[level].get_loc(key) + if not is_integer(check): + # e.g. test_partial_string_timestamp_multiindex + return indexer, self[indexer] + + try: + result_index = maybe_mi_droplevels(indexer, [level]) + except ValueError: + result_index = self[indexer] + + return indexer, result_index + + def _get_level_indexer( + self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None + ): + # `level` kwarg is _always_ positional, never name + # return a boolean array or slice showing where the key is + # in the totality of values + # if the indexer is provided, then use this + + level_index = self.levels[level] + level_codes = self.codes[level] + + def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): + # Compute a bool indexer to identify the positions to take. + # If we have an existing indexer, we only need to examine the + # subset of positions where the existing indexer is True. + if indexer is not None: + # we only need to look at the subset of codes where the + # existing indexer equals True + codes = codes[indexer] + + if step is None or step == 1: + new_indexer = (codes >= start) & (codes < stop) + else: + r = np.arange(start, stop, step, dtype=codes.dtype) + new_indexer = algos.isin(codes, r) + + if indexer is None: + return new_indexer + + indexer = indexer.copy() + indexer[indexer] = new_indexer + return indexer + + if isinstance(key, slice): + # handle a slice, returning a slice if we can + # otherwise a boolean indexer + step = key.step + is_negative_step = step is not None and step < 0 + + try: + if key.start is not None: + start = level_index.get_loc(key.start) + elif is_negative_step: + start = len(level_index) - 1 + else: + start = 0 + + if key.stop is not None: + stop = level_index.get_loc(key.stop) + elif is_negative_step: + stop = 0 + elif isinstance(start, slice): + stop = len(level_index) + else: + stop = len(level_index) - 1 + except KeyError: + + # we have a partial slice (like looking up a partial date + # string) + start = stop = level_index.slice_indexer(key.start, key.stop, key.step) + step = start.step + + if isinstance(start, slice) or isinstance(stop, slice): + # we have a slice for start and/or stop + # a partial date slicer on a DatetimeIndex generates a slice + # note that the stop ALREADY includes the stopped point (if + # it was a string sliced) + start = getattr(start, "start", start) + stop = getattr(stop, "stop", stop) + return convert_indexer(start, stop, step) + + elif level > 0 or self._lexsort_depth == 0 or step is not None: + # need to have like semantics here to right + # searching as when we are using a slice + # so adjust the stop by 1 (so we include stop) + stop = (stop - 1) if is_negative_step else (stop + 1) + return convert_indexer(start, stop, step) + else: + # sorted, so can return slice object -> view + i = algos.searchsorted(level_codes, start, side="left") + j = algos.searchsorted(level_codes, stop, side="right") + return slice(i, j, step) + + else: + + idx = self._get_loc_single_level_index(level_index, key) + + if level > 0 or self._lexsort_depth == 0: + # Desired level is not sorted + if isinstance(idx, slice): + # test_get_loc_partial_timestamp_multiindex + locs = (level_codes >= idx.start) & (level_codes < idx.stop) + return locs + + locs = np.array(level_codes == idx, dtype=bool, copy=False) + + if not locs.any(): + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return locs + + if isinstance(idx, slice): + # e.g. test_partial_string_timestamp_multiindex + start = algos.searchsorted(level_codes, idx.start, side="left") + # NB: "left" here bc of slice semantics + end = algos.searchsorted(level_codes, idx.stop, side="left") + else: + start = algos.searchsorted(level_codes, idx, side="left") + end = algos.searchsorted(level_codes, idx, side="right") + + if start == end: + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return slice(start, end) + + def get_locs(self, seq): + """ + Get location for a sequence of labels. + + Parameters + ---------- + seq : label, slice, list, mask or a sequence of such + You should use one of the above for each level. + If a level should not be used, set it to ``slice(None)``. + + Returns + ------- + numpy.ndarray + NumPy array of integers suitable for passing to iloc. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_locs('b') # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP + array([2], dtype=int64) + """ + + # must be lexsorted to at least as many levels + true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] + if true_slices and true_slices[-1] >= self._lexsort_depth: + raise UnsortedIndexError( + "MultiIndex slicing requires the index to be lexsorted: slicing " + f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" + ) + + if any(x is Ellipsis for x in seq): + raise NotImplementedError( + "MultiIndex does not support indexing with Ellipsis" + ) + + n = len(self) + + def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: + if isinstance(indexer, slice): + new_indexer = np.zeros(n, dtype=np.bool_) + new_indexer[indexer] = True + return new_indexer + return indexer + + # a bool indexer for the positions we want to take + indexer: npt.NDArray[np.bool_] | None = None + + for i, k in enumerate(seq): + + lvl_indexer: npt.NDArray[np.bool_] | slice | None = None + + if com.is_bool_indexer(k): + if len(k) != n: + raise ValueError( + "cannot index with a boolean indexer that " + "is not the same length as the index" + ) + lvl_indexer = np.asarray(k) + + elif is_list_like(k): + # a collection of labels to include from this level (these are or'd) + + # GH#27591 check if this is a single tuple key in the level + try: + lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) + except (InvalidIndexError, TypeError, KeyError) as err: + # InvalidIndexError e.g. non-hashable, fall back to treating + # this as a sequence of labels + # KeyError it can be ambiguous if this is a label or sequence + # of labels + # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 + for x in k: + if not is_hashable(x): + # e.g. slice + raise err + try: + item_indexer = self._get_level_indexer( + x, level=i, indexer=indexer + ) + except KeyError: + # ignore not founds; see discussion in GH#39424 + warnings.warn( + "The behavior of indexing on a MultiIndex with a " + "nested sequence of labels is deprecated and will " + "change in a future version. " + "`series.loc[label, sequence]` will raise if any " + "members of 'sequence' or not present in " + "the index's second level. To retain the old " + "behavior, use `series.index.isin(sequence, level=1)`", + # TODO: how to opt in to the future behavior? + # TODO: how to handle IntervalIndex level? + # (no test cases) + FutureWarning, + stacklevel=find_stack_level(), + ) + continue + else: + if lvl_indexer is None: + lvl_indexer = _to_bool_indexer(item_indexer) + elif isinstance(item_indexer, slice): + lvl_indexer[item_indexer] = True # type: ignore[index] + else: + lvl_indexer |= item_indexer + + if lvl_indexer is None: + # no matches we are done + # test_loc_getitem_duplicates_multiindex_empty_indexer + return np.array([], dtype=np.intp) + + elif com.is_null_slice(k): + # empty slice + if indexer is None and i == len(seq) - 1: + return np.arange(n, dtype=np.intp) + continue + + else: + # a slice or a single label + lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) + + # update indexer + lvl_indexer = _to_bool_indexer(lvl_indexer) + if indexer is None: + indexer = lvl_indexer + else: + indexer &= lvl_indexer + if not np.any(indexer) and np.any(lvl_indexer): + raise KeyError(seq) + + # empty indexer + if indexer is None: + return np.array([], dtype=np.intp) + + pos_indexer = indexer.nonzero()[0] + return self._reorder_indexer(seq, pos_indexer) + + # -------------------------------------------------------------------- + + def _reorder_indexer( + self, + seq: tuple[Scalar | Iterable | AnyArrayLike, ...], + indexer: npt.NDArray[np.intp], + ) -> npt.NDArray[np.intp]: + """ + Reorder an indexer of a MultiIndex (self) so that the labels are in the + same order as given in seq + + Parameters + ---------- + seq : label/slice/list/mask or a sequence of such + indexer: a position indexer of self + + Returns + ------- + indexer : a sorted position indexer of self ordered as seq + """ + # If the index is lexsorted and the list_like label in seq are sorted + # then we do not need to sort + if self._is_lexsorted(): + need_sort = False + for i, k in enumerate(seq): + if is_list_like(k): + if not need_sort: + k_codes = self.levels[i].get_indexer(k) + k_codes = k_codes[k_codes >= 0] # Filter absent keys + # True if the given codes are not ordered + need_sort = (k_codes[:-1] > k_codes[1:]).any() + elif isinstance(k, slice) and k.step is not None and k.step < 0: + need_sort = True + # Bail out if both index and seq are sorted + if not need_sort: + return indexer + + n = len(self) + keys: tuple[np.ndarray, ...] = () + # For each level of the sequence in seq, map the level codes with the + # order they appears in a list-like sequence + # This mapping is then use to reorder the indexer + for i, k in enumerate(seq): + if is_scalar(k): + # GH#34603 we want to treat a scalar the same as an all equal list + k = [k] + if com.is_bool_indexer(k): + new_order = np.arange(n)[indexer] + elif is_list_like(k): + # Generate a map with all level codes as sorted initially + k = algos.unique(k) + key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( + self.levels[i] + ) + # Set order as given in the indexer list + level_indexer = self.levels[i].get_indexer(k) + level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys + key_order_map[level_indexer] = np.arange(len(level_indexer)) + + new_order = key_order_map[self.codes[i][indexer]] + elif isinstance(k, slice) and k.step is not None and k.step < 0: + # flip order for negative step + new_order = np.arange(n)[::-1][indexer] + elif isinstance(k, slice) and k.start is None and k.stop is None: + # slice(None) should not determine order GH#31330 + new_order = np.ones((n,))[indexer] + else: + # For all other case, use the same order as the level + new_order = np.arange(n)[indexer] + keys = (new_order,) + keys + + # Find the reordering using lexsort on the keys mapping + ind = np.lexsort(keys) + return indexer[ind] + + def truncate(self, before=None, after=None) -> MultiIndex: + """ + Slice index between two labels / tuples, return new MultiIndex + + Parameters + ---------- + before : label or tuple, can be partial. Default None + None defaults to start + after : label or tuple, can be partial. Default None + None defaults to end + + Returns + ------- + truncated : MultiIndex + """ + if after and before and after < before: + raise ValueError("after < before") + + i, j = self.levels[0].slice_locs(before, after) + left, right = self.slice_locs(before, after) + + new_levels = list(self.levels) + new_levels[0] = new_levels[0][i:j] + + new_codes = [level_codes[left:right] for level_codes in self.codes] + new_codes[0] = new_codes[0] - i + + return MultiIndex( + levels=new_levels, + codes=new_codes, + names=self._names, + verify_integrity=False, + ) + + def equals(self, other: object) -> bool: + """ + Determines if two MultiIndex objects have the same labeling information + (the levels themselves do not necessarily have to be the same) + + See Also + -------- + equal_levels + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if len(self) != len(other): + return False + + if not isinstance(other, MultiIndex): + # d-level MultiIndex can equal d-tuple Index + if not self._should_compare(other): + # object Index or Categorical[object] may contain tuples + return False + return array_equivalent(self._values, other._values) + + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + self_codes = self.codes[i] + other_codes = other.codes[i] + self_mask = self_codes == -1 + other_mask = other_codes == -1 + if not np.array_equal(self_mask, other_mask): + return False + self_codes = self_codes[~self_mask] + self_values = self.levels[i]._values.take(self_codes) + + other_codes = other_codes[~other_mask] + other_values = other.levels[i]._values.take(other_codes) + + # since we use NaT both datetime64 and timedelta64 we can have a + # situation where a level is typed say timedelta64 in self (IOW it + # has other values than NaT) but types datetime64 in other (where + # its all NaT) but these are equivalent + if len(self_values) == 0 and len(other_values) == 0: + continue + + if not isinstance(self_values, np.ndarray): + # i.e. ExtensionArray + if not self_values.equals(other_values): + return False + elif not isinstance(other_values, np.ndarray): + # i.e. other is ExtensionArray + if not other_values.equals(self_values): + return False + else: + if not array_equivalent(self_values, other_values): + return False + + return True + + def equal_levels(self, other: MultiIndex) -> bool: + """ + Return True if the levels of both MultiIndex objects are the same + + """ + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + if not self.levels[i].equals(other.levels[i]): + return False + return True + + # -------------------------------------------------------------------- + # Set Methods + + def _union(self, other, sort) -> MultiIndex: + other, result_names = self._convert_can_do_setop(other) + if ( + any(-1 in code for code in self.codes) + and any(-1 in code for code in other.codes) + or self.has_duplicates + or other.has_duplicates + ): + # This is only necessary if both sides have nans or one has dups, + # fast_unique_multiple is faster + result = super()._union(other, sort) + else: + rvals = other._values.astype(object, copy=False) + result = lib.fast_unique_multiple([self._values, rvals], sort=sort) + + return MultiIndex.from_arrays(zip(*result), sortorder=None, names=result_names) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + return is_object_dtype(dtype) + + def _get_reconciled_name_object(self, other) -> MultiIndex: + """ + If the result of a set operation will be self, + return self, unless the names change, in which + case make a shallow copy of self. + """ + names = self._maybe_match_names(other) + if self.names != names: + # Incompatible return value type (got "Optional[MultiIndex]", expected + # "MultiIndex") + return self.rename(names) # type: ignore[return-value] + return self + + def _maybe_match_names(self, other): + """ + Try to find common names to attach to the result of an operation between + a and b. Return a consensus list of names if they match at least partly + or list of None if they have completely different names. + """ + if len(self.names) != len(other.names): + return [None] * len(self.names) + names = [] + for a_name, b_name in zip(self.names, other.names): + if a_name == b_name: + names.append(a_name) + else: + # TODO: what if they both have np.nan for their names? + names.append(None) + return names + + def _wrap_intersection_result(self, other, result) -> MultiIndex: + _, result_names = self._convert_can_do_setop(other) + + if len(result) == 0: + return MultiIndex( + levels=self.levels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names) + + def _wrap_difference_result(self, other, result) -> MultiIndex: + _, result_names = self._convert_can_do_setop(other) + + if len(result) == 0: + return MultiIndex( + levels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_tuples(result, sortorder=0, names=result_names) + + def _convert_can_do_setop(self, other): + result_names = self.names + + if not isinstance(other, Index): + + if len(other) == 0: + return self[:0], self.names + else: + msg = "other must be a MultiIndex or a list of tuples" + try: + other = MultiIndex.from_tuples(other, names=self.names) + except (ValueError, TypeError) as err: + # ValueError raised by tuples_to_object_array if we + # have non-object dtype + raise TypeError(msg) from err + else: + result_names = get_unanimous_names(self, other) + + return other, result_names + + # -------------------------------------------------------------------- + + @doc(Index.astype) + def astype(self, dtype, copy: bool = True): + dtype = pandas_dtype(dtype) + if is_categorical_dtype(dtype): + msg = "> 1 ndim Categorical are not supported at this time" + raise NotImplementedError(msg) + elif not is_object_dtype(dtype): + raise TypeError( + "Setting a MultiIndex dtype to anything other than object " + "is not supported" + ) + elif copy is True: + return self._view() + return self + + def _validate_fill_value(self, item): + if isinstance(item, MultiIndex): + # GH#43212 + if item.nlevels != self.nlevels: + raise ValueError("Item must have length equal to number of levels.") + return item._values + elif not isinstance(item, tuple): + # Pad the key with empty strings if lower levels of the key + # aren't specified: + item = (item,) + ("",) * (self.nlevels - 1) + elif len(item) != self.nlevels: + raise ValueError("Item must have length equal to number of levels.") + return item + + def insert(self, loc: int, item) -> MultiIndex: + """ + Make new MultiIndex inserting new item at location + + Parameters + ---------- + loc : int + item : tuple + Must be same length as number of levels in the MultiIndex + + Returns + ------- + new_index : Index + """ + item = self._validate_fill_value(item) + + new_levels = [] + new_codes = [] + for k, level, level_codes in zip(item, self.levels, self.codes): + if k not in level: + # have to insert into level + # must insert at end otherwise you have to recompute all the + # other codes + lev_loc = len(level) + level = level.insert(lev_loc, k) + else: + lev_loc = level.get_loc(k) + + new_levels.append(level) + new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False + ) + + def delete(self, loc) -> MultiIndex: + """ + Make new index with passed location deleted + + Returns + ------- + new_index : MultiIndex + """ + new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + verify_integrity=False, + ) + + @doc(Index.isin) + def isin(self, values, level=None) -> npt.NDArray[np.bool_]: + if level is None: + values = MultiIndex.from_tuples(values, names=self.names)._values + return algos.isin(self._values, values) + else: + num = self._get_level_number(level) + levs = self.get_level_values(num) + + if levs.size == 0: + return np.zeros(len(levs), dtype=np.bool_) + return levs.isin(values) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) + def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | None: + return super().set_names(names=names, level=level, inplace=inplace) + + rename = set_names + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: + return super().drop_duplicates(keep=keep) + + # --------------------------------------------------------------- + # Arithmetic/Numeric Methods - Disabled + + __add__ = make_invalid_op("__add__") + __radd__ = make_invalid_op("__radd__") + __iadd__ = make_invalid_op("__iadd__") + __sub__ = make_invalid_op("__sub__") + __rsub__ = make_invalid_op("__rsub__") + __isub__ = make_invalid_op("__isub__") + __pow__ = make_invalid_op("__pow__") + __rpow__ = make_invalid_op("__rpow__") + __mul__ = make_invalid_op("__mul__") + __rmul__ = make_invalid_op("__rmul__") + __floordiv__ = make_invalid_op("__floordiv__") + __rfloordiv__ = make_invalid_op("__rfloordiv__") + __truediv__ = make_invalid_op("__truediv__") + __rtruediv__ = make_invalid_op("__rtruediv__") + __mod__ = make_invalid_op("__mod__") + __rmod__ = make_invalid_op("__rmod__") + __divmod__ = make_invalid_op("__divmod__") + __rdivmod__ = make_invalid_op("__rdivmod__") + # Unary methods disabled + __neg__ = make_invalid_op("__neg__") + __pos__ = make_invalid_op("__pos__") + __abs__ = make_invalid_op("__abs__") + __invert__ = make_invalid_op("__invert__") + + +def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: + """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" + int64_codes = [ensure_int64(level_codes) for level_codes in codes] + for k in range(nlevels, 0, -1): + if libalgos.is_lexsorted(int64_codes[:k]): + return k + return 0 + + +def sparsify_labels(label_list, start: int = 0, sentinel=""): + pivoted = list(zip(*label_list)) + k = len(label_list) + + result = pivoted[: start + 1] + prev = pivoted[start] + + for cur in pivoted[start + 1 :]: + sparse_cur = [] + + for i, (p, t) in enumerate(zip(prev, cur)): + if i == k - 1: + sparse_cur.append(t) + result.append(sparse_cur) + break + + if p == t: + sparse_cur.append(sentinel) + else: + sparse_cur.extend(cur[i:]) + result.append(sparse_cur) + break + + prev = cur + + return list(zip(*result)) + + +def _get_na_rep(dtype) -> str: + if is_extension_array_dtype(dtype): + return f"{dtype.na_value}" + else: + dtype = dtype.type + + return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") + + +def maybe_droplevels(index: Index, key) -> Index: + """ + Attempt to drop level or levels from the given index. + + Parameters + ---------- + index: Index + key : scalar or tuple + + Returns + ------- + Index + """ + # drop levels + original_index = index + if isinstance(key, tuple): + for _ in key: + try: + index = index._drop_level_numbers([0]) + except ValueError: + # we have dropped too much, so back out + return original_index + else: + try: + index = index._drop_level_numbers([0]) + except ValueError: + pass + + return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array-like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like + + +def _require_listlike(level, arr, arrname: str): + """ + Ensure that level is either None or listlike, and arr is list-of-listlike. + """ + if level is not None and not is_list_like(level): + if not is_list_like(arr): + raise TypeError(f"{arrname} must be list-like") + if is_list_like(arr[0]): + raise TypeError(f"{arrname} must be list-like") + level = [level] + arr = [arr] + elif level is None or is_list_like(level): + if not is_list_like(arr) or not is_list_like(arr[0]): + raise TypeError(f"{arrname} must be list of lists-like") + return level, arr diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py new file mode 100644 index 00000000..fe11a02e --- /dev/null +++ b/pandas/core/indexes/numeric.py @@ -0,0 +1,420 @@ +from __future__ import annotations + +from typing import ( + Callable, + Hashable, +) +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._typing import ( + Dtype, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_numeric_dtype, + is_scalar, + is_signed_integer_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) + + +class NumericIndex(Index): + """ + Immutable numeric sequence used for indexing and alignment. + + The basic object storing axis labels for all pandas objects. + NumericIndex is a special case of `Index` with purely numpy int/uint/float labels. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: None) + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + Index : The base pandas Index type. + Int64Index : Index of purely int64 labels (deprecated). + UInt64Index : Index of purely uint64 labels (deprecated). + Float64Index : Index of purely float64 labels (deprecated). + + Notes + ----- + An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or + float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric + dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). + """ + + _typ = "numericindex" + _values: np.ndarray + _default_dtype: np.dtype | None = None + _dtype_validation_metadata: tuple[Callable[..., bool], str] = ( + is_numeric_dtype, + "numeric type", + ) + _is_numeric_dtype = True + _can_hold_strings = False + _is_backward_compat_public_numeric_index: bool = True + + _engine_types: dict[np.dtype, type[libindex.IndexEngine]] = { + np.dtype(np.int8): libindex.Int8Engine, + np.dtype(np.int16): libindex.Int16Engine, + np.dtype(np.int32): libindex.Int32Engine, + np.dtype(np.int64): libindex.Int64Engine, + np.dtype(np.uint8): libindex.UInt8Engine, + np.dtype(np.uint16): libindex.UInt16Engine, + np.dtype(np.uint32): libindex.UInt32Engine, + np.dtype(np.uint64): libindex.UInt64Engine, + np.dtype(np.float32): libindex.Float32Engine, + np.dtype(np.float64): libindex.Float64Engine, + np.dtype(np.complex64): libindex.Complex64Engine, + np.dtype(np.complex128): libindex.Complex128Engine, + } + + @property + def _engine_type(self) -> type[libindex.IndexEngine]: + # error: Invalid index type "Union[dtype[Any], ExtensionDtype]" for + # "Dict[dtype[Any], Type[IndexEngine]]"; expected type "dtype[Any]" + return self._engine_types[self.dtype] # type: ignore[index] + + @cache_readonly + def inferred_type(self) -> str: + return { + "i": "integer", + "u": "integer", + "f": "floating", + "c": "complex", + }[self.dtype.kind] + + def __new__( + cls, data=None, dtype: Dtype | None = None, copy=False, name=None + ) -> NumericIndex: + name = maybe_extract_name(name, data, cls) + + subarr = cls._ensure_array(data, dtype, copy) + return cls._simple_new(subarr, name=name) + + @classmethod + def _ensure_array(cls, data, dtype, copy: bool): + """ + Ensure we have a valid array to pass to _simple_new. + """ + cls._validate_dtype(dtype) + + if not isinstance(data, (np.ndarray, Index)): + # Coerce to ndarray if not already ndarray or Index + if is_scalar(data): + raise cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (ABCSeries, list, tuple)): + data = list(data) + + orig = data + data = np.asarray(data, dtype=dtype) + if dtype is None and data.dtype.kind == "f": + if cls is UInt64Index and (data >= 0).all(): + # https://github.com/numpy/numpy/issues/19146 + data = np.asarray(orig, dtype=np.uint64) + + if issubclass(data.dtype.type, str): + cls._string_data_error(data) + + dtype = cls._ensure_dtype(dtype) + + if copy or not is_dtype_equal(data.dtype, dtype): + # TODO: the try/except below is because it's difficult to predict the error + # and/or error message from different combinations of data and dtype. + # Efforts to avoid this try/except welcome. + # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 + try: + subarr = np.array(data, dtype=dtype, copy=copy) + cls._validate_dtype(subarr.dtype) + except (TypeError, ValueError): + raise ValueError(f"data is not compatible with {cls.__name__}") + cls._assert_safe_casting(data, subarr) + else: + subarr = data + + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + + subarr = np.asarray(subarr) + return subarr + + @classmethod + def _validate_dtype(cls, dtype: Dtype | None) -> None: + if dtype is None: + return + + validation_func, expected = cls._dtype_validation_metadata + if not validation_func(dtype): + raise ValueError( + f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + ) + + @classmethod + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: + """ + Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. + + Assumes dtype has already been validated. + """ + if dtype is None: + return cls._default_dtype + + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + if cls._is_backward_compat_public_numeric_index: + # dtype for NumericIndex + return dtype + else: + # dtype for Int64Index, UInt64Index etc. Needed for backwards compat. + return cls._default_dtype + + # ---------------------------------------------------------------- + # Indexing Methods + + # error: Decorated property not supported + @cache_readonly # type: ignore[misc] + @doc(Index._should_fallback_to_positional) + def _should_fallback_to_positional(self) -> bool: + return False + + @doc(Index._convert_slice_indexer) + def _convert_slice_indexer(self, key: slice, kind: str): + # TODO(2.0): once #45324 deprecation is enforced we should be able + # to simplify this. + if is_float_dtype(self.dtype): + assert kind in ["loc", "getitem"] + + # TODO: can we write this as a condition based on + # e.g. _should_fallback_to_positional? + # We always treat __getitem__ slicing as label-based + # translate to locations + return self.slice_indexer(key.start, key.stop, key.step) + + return super()._convert_slice_indexer(key, kind=kind) + + @doc(Index._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + assert kind in ["loc", "getitem", None, lib.no_default] + self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") + + # we will try to coerce to integers + return self._maybe_cast_indexer(label) + + # ---------------------------------------------------------------- + + @doc(Index._shallow_copy) + def _shallow_copy(self, values, name: Hashable = lib.no_default): + if not self._can_hold_na and values.dtype.kind == "f": + name = self._name if name is lib.no_default else name + # Ensure we are not returning an Int64Index with float data: + return Float64Index._simple_new(values, name=name) + return super()._shallow_copy(values=values, name=name) + + def _convert_tolerance(self, tolerance, target): + tolerance = super()._convert_tolerance(tolerance, target) + + if not np.issubdtype(tolerance.dtype, np.number): + if tolerance.ndim > 0: + raise ValueError( + f"tolerance argument for {type(self).__name__} must contain " + "numeric elements if it is list type" + ) + else: + raise ValueError( + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" + ) + return tolerance + + @classmethod + def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: + """ + Ensure incoming data can be represented with matching signed-ness. + + Needed if the process of casting data from some accepted dtype to the internal + dtype(s) bears the risk of truncation (e.g. float to int). + """ + if is_integer_dtype(subarr.dtype): + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") + + def _format_native_types( + self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs + ) -> npt.NDArray[np.object_]: + from pandas.io.formats.format import FloatArrayFormatter + + if is_float_dtype(self.dtype): + formatter = FloatArrayFormatter( + self._values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + return formatter.get_result_as_array() + + return super()._format_native_types( + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + **kwargs, + ) + + +_num_index_shared_docs = {} + + +_num_index_shared_docs[ + "class_descr" +] = """ + Immutable sequence used for indexing and alignment. + + .. deprecated:: 1.4.0 + In pandas v2.0 %(klass)s will be removed and :class:`NumericIndex` used instead. + %(klass)s will remain fully functional for the duration of pandas 1.x. + + The basic object storing axis labels for all pandas objects. + %(klass)s is a special case of `Index` with purely %(ltype)s labels. %(extra)s. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: %(dtype)s) + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + Index : The base pandas Index type. + NumericIndex : Index of numpy int/uint/float data. + + Notes + ----- + An Index instance can **only** contain hashable objects. +""" + + +class IntegerIndex(NumericIndex): + """ + This is an abstract class for Int64Index, UInt64Index. + """ + + _is_backward_compat_public_numeric_index: bool = False + + @property + def asi8(self) -> npt.NDArray[np.int64]: + # do not cache or you'll create a memory leak + warnings.warn( + "Index.asi8 is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._values.view(self._default_dtype) + + +class Int64Index(IntegerIndex): + _index_descr_args = { + "klass": "Int64Index", + "ltype": "integer", + "dtype": "int64", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "int64index" + _default_dtype = np.dtype(np.int64) + _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") + + @property + def _engine_type(self) -> type[libindex.Int64Engine]: + return libindex.Int64Engine + + +class UInt64Index(IntegerIndex): + _index_descr_args = { + "klass": "UInt64Index", + "ltype": "unsigned integer", + "dtype": "uint64", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "uint64index" + _default_dtype = np.dtype(np.uint64) + _dtype_validation_metadata = (is_unsigned_integer_dtype, "unsigned integer") + + @property + def _engine_type(self) -> type[libindex.UInt64Engine]: + return libindex.UInt64Engine + + +class Float64Index(NumericIndex): + _index_descr_args = { + "klass": "Float64Index", + "dtype": "float64", + "ltype": "float", + "extra": "", + } + __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args + + _typ = "float64index" + _default_dtype = np.dtype(np.float64) + _dtype_validation_metadata = (is_float_dtype, "float") + _is_backward_compat_public_numeric_index: bool = False + + @property + def _engine_type(self) -> type[libindex.Float64Engine]: + return libindex.Float64Engine diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py new file mode 100644 index 00000000..c034d941 --- /dev/null +++ b/pandas/core/indexes/period.py @@ -0,0 +1,588 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +from typing import Hashable +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + NaT, + Period, + Resolution, + Tick, +) +from pandas._typing import ( + Dtype, + DtypeObj, + npt, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_any_dtype, + is_integer, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.missing import is_valid_na_for_dtype + +from pandas.core.arrays.period import ( + PeriodArray, + period_array, + raise_on_incompatible, + validate_dtype_freq, +) +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import maybe_extract_name +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + Index, +) +from pandas.core.indexes.extension import inherit_names +from pandas.core.indexes.numeric import Int64Index + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) +_shared_doc_kwargs = { + "klass": "PeriodArray", +} + +# --- Period index sketch + + +def _new_PeriodIndex(cls, **d): + # GH13277 for unpickling + values = d.pop("data") + if values.dtype == "int64": + freq = d.pop("freq", None) + values = PeriodArray(values, freq=freq) + return cls._simple_new(values, **d) + else: + return cls(values, **d) + + +@inherit_names( + ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, + PeriodArray, + wrap=True, +) +@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) +class PeriodIndex(DatetimeIndexOpsMixin): + """ + Immutable ndarray holding ordinal values indicating regular periods in time. + + Index keys are boxed to Period objects which carries the metadata (eg, + frequency information). + + Parameters + ---------- + data : array-like (1d int np.ndarray or PeriodArray), optional + Optional period-like data to construct index with. + copy : bool + Make a copy of input ndarray. + freq : str or period object, optional + One of pandas period strings or corresponding objects. + year : int, array, or Series, default None + month : int, array, or Series, default None + quarter : int, array, or Series, default None + day : int, array, or Series, default None + hour : int, array, or Series, default None + minute : int, array, or Series, default None + second : int, array, or Series, default None + dtype : str or PeriodDtype, default None + + Attributes + ---------- + day + dayofweek + day_of_week + dayofyear + day_of_year + days_in_month + daysinmonth + end_time + freq + freqstr + hour + is_leap_year + minute + month + quarter + qyear + second + start_time + week + weekday + weekofyear + year + + Methods + ------- + asfreq + strftime + to_timestamp + + See Also + -------- + Index : The base pandas Index type. + Period : Represents a period of time. + DatetimeIndex : Index with datetime64 data. + TimedeltaIndex : Index of timedelta64 data. + period_range : Create a fixed-frequency PeriodIndex. + + Examples + -------- + >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) + >>> idx + PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') + """ + + _typ = "periodindex" + + _data: PeriodArray + freq: BaseOffset + dtype: PeriodDtype + + _data_cls = PeriodArray + _supports_partial_string_indexing = True + + @property + def _engine_type(self) -> type[libindex.PeriodEngine]: + return libindex.PeriodEngine + + @cache_readonly + # Signature of "_resolution_obj" incompatible with supertype "DatetimeIndexOpsMixin" + def _resolution_obj(self) -> Resolution: # type: ignore[override] + # for compat with DatetimeIndex + return self.dtype._resolution_obj + + # -------------------------------------------------------------------- + # methods that dispatch to array and wrap result in Index + # These are defined here instead of via inherit_names for mypy + + @doc( + PeriodArray.asfreq, + other="pandas.arrays.PeriodArray", + other_name="PeriodArray", + **_shared_doc_kwargs, + ) + def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: + arr = self._data.asfreq(freq, how) + return type(self)._simple_new(arr, name=self.name) + + @doc(PeriodArray.to_timestamp) + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: + arr = self._data.to_timestamp(freq, how) + return DatetimeIndex._simple_new(arr, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type: ignore[misc] + @doc(PeriodArray.hour.fget) + def hour(self) -> Int64Index: + return Int64Index(self._data.hour, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type: ignore[misc] + @doc(PeriodArray.minute.fget) + def minute(self) -> Int64Index: + return Int64Index(self._data.minute, name=self.name) + + # https://github.com/python/mypy/issues/1362 + # error: Decorated property not supported + @property # type: ignore[misc] + @doc(PeriodArray.second.fget) + def second(self) -> Int64Index: + return Int64Index(self._data.second, name=self.name) + + # ------------------------------------------------------------------------ + # Index Constructors + + def __new__( + cls, + data=None, + ordinal=None, + freq=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + **fields, + ) -> PeriodIndex: + + valid_field_set = { + "year", + "month", + "day", + "quarter", + "hour", + "minute", + "second", + } + + if not set(fields).issubset(valid_field_set): + argument = list(set(fields) - valid_field_set)[0] + raise TypeError(f"__new__() got an unexpected keyword argument {argument}") + + name = maybe_extract_name(name, data, cls) + + if data is None and ordinal is None: + # range-based. + if not fields: + # test_pickle_compat_construction + raise cls._scalar_data_error(None) + + data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) + # PeriodArray._generate range does validation that fields is + # empty when really using the range-based constructor. + freq = freq2 + + data = PeriodArray(data, freq=freq) + else: + freq = validate_dtype_freq(dtype, freq) + + # PeriodIndex allow PeriodIndex(period_index, freq=different) + # Let's not encourage that kind of behavior in PeriodArray. + + if freq and isinstance(data, cls) and data.freq != freq: + # TODO: We can do some of these with no-copy / coercion? + # e.g. D -> 2D seems to be OK + data = data.asfreq(freq) + + if data is None and ordinal is not None: + # we strangely ignore `ordinal` if data is passed. + ordinal = np.asarray(ordinal, dtype=np.int64) + data = PeriodArray(ordinal, freq=freq) + else: + # don't pass copy here, since we copy later. + data = period_array(data=data, freq=freq) + + if copy: + data = data.copy() + + return cls._simple_new(data, name=name) + + # ------------------------------------------------------------------------ + # Data + + @property + def values(self) -> np.ndarray: + return np.asarray(self, dtype=object) + + def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: + """ + Convert timedelta-like input to an integer multiple of self.freq + + Parameters + ---------- + other : timedelta, np.timedelta64, DateOffset, int, np.ndarray + + Returns + ------- + converted : int, np.ndarray[int64] + + Raises + ------ + IncompatibleFrequency : if the input cannot be written as a multiple + of self.freq. Note IncompatibleFrequency subclasses ValueError. + """ + if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): + if isinstance(self.freq, Tick): + # _check_timedeltalike_freq_compat will raise if incompatible + delta = self._data._check_timedeltalike_freq_compat(other) + return delta + elif isinstance(other, BaseOffset): + if other.base == self.freq.base: + return other.n + + raise raise_on_incompatible(self, other) + elif is_integer(other): + # integer is passed to .shift via + # _add_datetimelike_methods basically + # but ufunc may pass integer to _add_delta + return other + + # raise when input doesn't have freq + raise raise_on_incompatible(self, None) + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + if not isinstance(dtype, PeriodDtype): + return False + # For the subset of DateOffsets that can be a dtype.freq, it + # suffices (and is much faster) to compare the dtype_code rather than + # the freq itself. + # See also: PeriodDtype.__eq__ + freq = dtype.freq + own_freq = self.freq + return ( + freq._period_dtype_code + # error: "BaseOffset" has no attribute "_period_dtype_code" + == own_freq._period_dtype_code # type: ignore[attr-defined] + and freq.n == own_freq.n + ) + + # ------------------------------------------------------------------------ + # Index Methods + + def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: + """ + where : array of timestamps + mask : np.ndarray[bool] + Array of booleans where data is not NA. + """ + if isinstance(where, DatetimeIndex): + where = PeriodIndex(where._values, freq=self.freq) + elif not isinstance(where, PeriodIndex): + raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") + + return super().asof_locs(where, mask) + + @doc(Index.astype) + def astype(self, dtype, copy: bool = True, how=lib.no_default): + dtype = pandas_dtype(dtype) + + if how is not lib.no_default: + # GH#37982 + warnings.warn( + "The 'how' keyword in PeriodIndex.astype is deprecated and " + "will be removed in a future version. " + "Use index.to_timestamp(how=how) instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + how = "start" + + if is_datetime64_any_dtype(dtype): + # 'how' is index-specific, isn't part of the EA interface. + # GH#45038 implement this for PeriodArray (but without "how") + # once the "how" deprecation is enforced we can just dispatch + # directly to PeriodArray. + tz = getattr(dtype, "tz", None) + return self.to_timestamp(how=how).tz_localize(tz) + + return super().astype(dtype, copy=copy) + + @property + def is_full(self) -> bool: + """ + Returns True if this PeriodIndex is range-like in that all Periods + between start and end are present, in order. + """ + if len(self) == 0: + return True + if not self.is_monotonic_increasing: + raise ValueError("Index is not monotonic") + values = self.asi8 + return ((values[1:] - values[:-1]) < 2).all() + + @property + def inferred_type(self) -> str: + # b/c data is represented as ints make sure we can't have ambiguous + # indexing + return "period" + + # ------------------------------------------------------------------------ + # Indexing Methods + + def _convert_tolerance(self, tolerance, target): + # Returned tolerance must be in dtype/units so that + # `|self._get_engine_target() - target._engine_target()| <= tolerance` + # is meaningful. Since PeriodIndex returns int64 for engine_target, + # we may need to convert timedelta64 tolerance to int64. + tolerance = super()._convert_tolerance(tolerance, target) + + if self.dtype == target.dtype: + # convert tolerance to i8 + tolerance = self._maybe_convert_timedelta(tolerance) + + return tolerance + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label. + + Parameters + ---------- + key : Period, NaT, str, or datetime + String or datetime key must be parsable as Period. + + Returns + ------- + loc : int or ndarray[int64] + + Raises + ------ + KeyError + Key is not present in the index. + TypeError + If key is listlike or otherwise not hashable. + """ + orig_key = key + + self._check_indexing_error(key) + + if is_valid_na_for_dtype(key, self.dtype): + key = NaT + + elif isinstance(key, str): + + try: + parsed, reso = self._parse_with_reso(key) + except ValueError as err: + # A string with invalid format + raise KeyError(f"Cannot interpret '{key}' as period") from err + + if self._can_partial_date_slice(reso): + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + # TODO: pass if method is not None, like DTI does? + raise KeyError(key) from err + + if reso == self._resolution_obj: + # the reso < self._resolution_obj case goes + # through _get_string_slice + key = self._cast_partial_indexing_scalar(key) + loc = self.get_loc(key, method=method, tolerance=tolerance) + # Recursing instead of falling through matters for the exception + # message in test_get_loc3 (though not clear if that really matters) + return loc + elif method is None: + raise KeyError(key) + else: + key = self._cast_partial_indexing_scalar(parsed) + + elif isinstance(key, Period): + key = self._maybe_cast_for_get_loc(key) + + elif isinstance(key, datetime): + key = self._cast_partial_indexing_scalar(key) + + else: + # in particular integer, which Period constructor would cast to string + raise KeyError(key) + + try: + return Index.get_loc(self, key, method, tolerance) + except KeyError as err: + raise KeyError(orig_key) from err + + def _maybe_cast_for_get_loc(self, key: Period) -> Period: + # name is a misnomer, chosen for compat with DatetimeIndex + sfreq = self.freq + kfreq = key.freq + if not ( + sfreq.n == kfreq.n + # error: "BaseOffset" has no attribute "_period_dtype_code" + and sfreq._period_dtype_code # type: ignore[attr-defined] + # error: "BaseOffset" has no attribute "_period_dtype_code" + == kfreq._period_dtype_code # type: ignore[attr-defined] + ): + # GH#42247 For the subset of DateOffsets that can be Period freqs, + # checking these two attributes is sufficient to check equality, + # and much more performant than `self.freq == key.freq` + raise KeyError(key) + return key + + def _cast_partial_indexing_scalar(self, label): + try: + key = Period(label, freq=self.freq) + except ValueError as err: + # we cannot construct the Period + raise KeyError(label) from err + return key + + @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + if isinstance(label, datetime): + label = self._cast_partial_indexing_scalar(label) + + return super()._maybe_cast_slice_bound(label, side, kind=kind) + + def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): + iv = Period(parsed, freq=reso.attr_abbrev) + return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) + + +def period_range( + start=None, end=None, periods: int | None = None, freq=None, name=None +) -> PeriodIndex: + """ + Return a fixed frequency PeriodIndex. + + The day (calendar) is the default frequency. + + Parameters + ---------- + start : str or period-like, default None + Left bound for generating periods. + end : str or period-like, default None + Right bound for generating periods. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, optional + Frequency alias. By default the freq is taken from `start` or `end` + if those are Period objects. Otherwise, the default is ``"D"`` for + daily frequency. + name : str, default None + Name of the resulting PeriodIndex. + + Returns + ------- + PeriodIndex + + Notes + ----- + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', + '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', + '2018-01'], + dtype='period[M]') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]') + """ + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): + freq = "D" + + data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) + data = PeriodArray(data, freq=freq) + return PeriodIndex(data, name=name) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py new file mode 100644 index 00000000..376c98b6 --- /dev/null +++ b/pandas/core/indexes/range.py @@ -0,0 +1,1094 @@ +from __future__ import annotations + +from datetime import timedelta +import operator +from sys import getsizeof +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterator, + List, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.algos import unique_deltas +from pandas._libs.lib import no_default +from pandas._typing import ( + Dtype, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + cache_readonly, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_platform_int, + ensure_python_int, + is_float, + is_integer, + is_scalar, + is_signed_integer_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCTimedeltaIndex + +from pandas.core import ops +from pandas.core.algorithms import resolve_na_sentinel +import pandas.core.common as com +from pandas.core.construction import extract_array +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import maybe_extract_name +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumericIndex, +) +from pandas.core.ops.common import unpack_zerodim_and_defer + +if TYPE_CHECKING: + from pandas import Index + +_empty_range = range(0) + + +class RangeIndex(NumericIndex): + """ + Immutable Index implementing a monotonic integer range. + + RangeIndex is a memory-saving special case of Int64Index limited to + representing monotonic ranges. Using RangeIndex may in some instances + improve computing speed. + + This is the default index type used + by DataFrame and Series when no explicit index is provided by the user. + + Parameters + ---------- + start : int (default: 0), range, or other RangeIndex instance + If int and "stop" is not given, interpreted as "stop" instead. + stop : int (default: 0) + step : int (default: 1) + dtype : np.int64 + Unused, accepted for homogeneity with other index types. + copy : bool, default False + Unused, accepted for homogeneity with other index types. + name : object, optional + Name to be stored in the index. + + Attributes + ---------- + start + stop + step + + Methods + ------- + from_range + + See Also + -------- + Index : The base pandas Index type. + Int64Index : Index of int64 data. + """ + + _typ = "rangeindex" + _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") + _range: range + _is_backward_compat_public_numeric_index: bool = False + + @property + def _engine_type(self) -> type[libindex.Int64Engine]: + return libindex.Int64Engine + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + start=None, + stop=None, + step=None, + dtype: Dtype | None = None, + copy: bool = False, + name: Hashable = None, + ) -> RangeIndex: + cls._validate_dtype(dtype) + name = maybe_extract_name(name, start, cls) + + # RangeIndex + if isinstance(start, RangeIndex): + return start.copy(name=name) + elif isinstance(start, range): + return cls._simple_new(start, name=name) + + # validate the arguments + if com.all_none(start, stop, step): + raise TypeError("RangeIndex(...) must be called with integers") + + start = ensure_python_int(start) if start is not None else 0 + + if stop is None: + start, stop = 0, start + else: + stop = ensure_python_int(stop) + + step = ensure_python_int(step) if step is not None else 1 + if step == 0: + raise ValueError("Step must not be zero") + + rng = range(start, stop, step) + return cls._simple_new(rng, name=name) + + @classmethod + def from_range( + cls, data: range, name=None, dtype: Dtype | None = None + ) -> RangeIndex: + """ + Create RangeIndex from a range object. + + Returns + ------- + RangeIndex + """ + if not isinstance(data, range): + raise TypeError( + f"{cls.__name__}(...) must be called with object coercible to a " + f"range, {repr(data)} was passed" + ) + cls._validate_dtype(dtype) + return cls._simple_new(data, name=name) + + @classmethod + def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: + result = object.__new__(cls) + + assert isinstance(values, range) + + result._range = values + result._name = name + result._cache = {} + result._reset_identity() + return result + + # -------------------------------------------------------------------- + + # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return + # type "Type[RangeIndex]" in supertype "Index" + @cache_readonly + def _constructor(self) -> type[Int64Index]: # type: ignore[override] + """return the class to use for construction""" + return Int64Index + + # error: Signature of "_data" incompatible with supertype "Index" + @cache_readonly + def _data(self) -> np.ndarray: # type: ignore[override] + """ + An int array that for performance reasons is created only when needed. + + The constructed array is saved in ``_cache``. + """ + return np.arange(self.start, self.stop, self.step, dtype=np.int64) + + def _get_data_as_items(self): + """return a list of tuples of start, stop, step""" + rng = self._range + return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)] + + def __reduce__(self): + d = {"name": self.name} + d.update(dict(self._get_data_as_items())) + return ibase._new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Rendering Methods + + def _format_attrs(self): + """ + Return a list of tuples of the (attr, formatted_value) + """ + attrs = self._get_data_as_items() + if self.name is not None: + attrs.append(("name", ibase.default_pprint(self.name))) + return attrs + + def _format_data(self, name=None): + # we are formatting thru the attributes + return None + + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + # Equivalent to Index implementation, but faster + if not len(self._range): + return header + first_val_str = str(self._range[0]) + last_val_str = str(self._range[-1]) + max_length = max(len(first_val_str), len(last_val_str)) + + return header + [f"{x:<{max_length}}" for x in self._range] + + # -------------------------------------------------------------------- + _deprecation_message = ( + "RangeIndex.{} is deprecated and will be " + "removed in a future version. Use RangeIndex.{} " + "instead" + ) + + @property + def start(self) -> int: + """ + The value of the `start` parameter (``0`` if this was not supplied). + """ + # GH 25710 + return self._range.start + + @property + def _start(self) -> int: + """ + The value of the `start` parameter (``0`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``start`` instead. + """ + warnings.warn( + self._deprecation_message.format("_start", "start"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.start + + @property + def stop(self) -> int: + """ + The value of the `stop` parameter. + """ + return self._range.stop + + @property + def _stop(self) -> int: + """ + The value of the `stop` parameter. + + .. deprecated:: 0.25.0 + Use ``stop`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_stop", "stop"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.stop + + @property + def step(self) -> int: + """ + The value of the `step` parameter (``1`` if this was not supplied). + """ + # GH 25710 + return self._range.step + + @property + def _step(self) -> int: + """ + The value of the `step` parameter (``1`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``step`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_step", "step"), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.step + + @cache_readonly + def nbytes(self) -> int: + """ + Return the number of bytes in the underlying data. + """ + rng = self._range + return getsizeof(rng) + sum( + getsizeof(getattr(rng, attr_name)) + for attr_name in ["start", "stop", "step"] + ) + + def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self.nbytes + + @property + def dtype(self) -> np.dtype: + return np.dtype(np.int64) + + @property + def is_unique(self) -> bool: + """return if the index has unique values""" + return True + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + return self._range.step > 0 or len(self) <= 1 + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + return self._range.step < 0 or len(self) <= 1 + + def __contains__(self, key: Any) -> bool: + hash(key) + try: + key = ensure_python_int(key) + except TypeError: + return False + return key in self._range + + @property + def inferred_type(self) -> str: + return "integer" + + # -------------------------------------------------------------------- + # Indexing Methods + + @doc(Int64Index.get_loc) + def get_loc(self, key, method=None, tolerance=None): + if method is None and tolerance is None: + if is_integer(key) or (is_float(key) and key.is_integer()): + new_key = int(key) + try: + return self._range.index(new_key) + except ValueError as err: + raise KeyError(key) from err + self._check_indexing_error(key) + raise KeyError(key) + return super().get_loc(key, method=method, tolerance=tolerance) + + def _get_indexer( + self, + target: Index, + method: str | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + if com.any_not_none(method, tolerance, limit): + return super()._get_indexer( + target, method=method, tolerance=tolerance, limit=limit + ) + + if self.step > 0: + start, stop, step = self.start, self.stop, self.step + else: + # GH 28678: work on reversed range for simplicity + reverse = self._range[::-1] + start, stop, step = reverse.start, reverse.stop, reverse.step + + target_array = np.asarray(target) + locs = target_array - start + valid = (locs % step == 0) & (locs >= 0) & (target_array < stop) + locs[~valid] = -1 + locs[valid] = locs[valid] / step + + if step != self.step: + # We reversed this range: transform to original locs + locs[valid] = len(self) - 1 - locs[valid] + return ensure_platform_int(locs) + + # -------------------------------------------------------------------- + + def tolist(self) -> list[int]: + return list(self._range) + + @doc(Int64Index.__iter__) + def __iter__(self) -> Iterator[int]: + yield from self._range + + @doc(Int64Index._shallow_copy) + def _shallow_copy(self, values, name: Hashable = no_default): + name = self.name if name is no_default else name + + if values.dtype.kind == "f": + return Float64Index(values, name=name) + # GH 46675 & 43885: If values is equally spaced, return a + # more memory-compact RangeIndex instead of Int64Index + unique_diffs = unique_deltas(values) + if len(unique_diffs) == 1 and unique_diffs[0] != 0: + diff = unique_diffs[0] + new_range = range(values[0], values[-1] + diff, diff) + return type(self)._simple_new(new_range, name=name) + else: + return Int64Index._simple_new(values, name=name) + + def _view(self: RangeIndex) -> RangeIndex: + result = type(self)._simple_new(self._range, name=self._name) + result._cache = self._cache + return result + + @doc(Int64Index.copy) + def copy( + self, + name: Hashable = None, + deep: bool = False, + dtype: Dtype | None = None, + names=None, + ): + name = self._validate_names(name=name, names=names, deep=deep)[0] + new_index = self._rename(name=name) + + if dtype: + warnings.warn( + "parameter dtype is deprecated and will be removed in a future " + "version. Use the astype method instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + new_index = new_index.astype(dtype) + return new_index + + def _minmax(self, meth: str): + no_steps = len(self) - 1 + if no_steps == -1: + return np.nan + elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0): + return self.start + + return self.start + self.step * no_steps + + def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """The minimum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return self._minmax("min") + + def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + """The maximum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return self._minmax("max") + + def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + """ + Returns the indices that would sort the index and its + underlying data. + + Returns + ------- + np.ndarray[np.intp] + + See Also + -------- + numpy.ndarray.argsort + """ + ascending = kwargs.pop("ascending", True) # EA compat + kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant + nv.validate_argsort(args, kwargs) + + if self._range.step > 0: + result = np.arange(len(self), dtype=np.intp) + else: + result = np.arange(len(self) - 1, -1, -1, dtype=np.intp) + + if not ascending: + result = result[::-1] + return result + + def factorize( + self, + sort: bool = False, + na_sentinel: int | lib.NoDefault = lib.no_default, + use_na_sentinel: bool | lib.NoDefault = lib.no_default, + ) -> tuple[npt.NDArray[np.intp], RangeIndex]: + # resolve to emit warning if appropriate + resolve_na_sentinel(na_sentinel, use_na_sentinel) + codes = np.arange(len(self), dtype=np.intp) + uniques = self + if sort and self.step < 0: + codes = codes[::-1] + uniques = uniques[::-1] + return codes, uniques + + def equals(self, other: object) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + return self._range == other._range + return super().equals(other) + + def sort_values( + self, + return_indexer: bool = False, + ascending: bool = True, + na_position: str = "last", + key: Callable | None = None, + ): + sorted_index = self + indexer = RangeIndex(range(len(self))) + if key is not None: + return super().sort_values( + return_indexer=return_indexer, + ascending=ascending, + na_position=na_position, + key=key, + ) + else: + sorted_index = self + if ascending: + if self.step < 0: + sorted_index = self[::-1] + indexer = indexer[::-1] + else: + if self.step > 0: + sorted_index = self[::-1] + indexer = indexer = indexer[::-1] + + if return_indexer: + return sorted_index, indexer + else: + return sorted_index + + # -------------------------------------------------------------------- + # Set Operations + + def _intersection(self, other: Index, sort=False): + # caller is responsible for checking self and other are both non-empty + + if not isinstance(other, RangeIndex): + # Int64Index + return super()._intersection(other, sort=sort) + + first = self._range[::-1] if self.step < 0 else self._range + second = other._range[::-1] if other.step < 0 else other._range + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(first.start, second.start) + int_high = min(first.stop, second.stop) + if int_high <= int_low: + return self._simple_new(_empty_range) + + # Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use + # cheaper alternative + gcd, s, _ = self._extended_gcd(first.step, second.step) + + # check whether element sets intersect + if (first.start - second.start) % gcd: + return self._simple_new(_empty_range) + + # calculate parameters for the RangeIndex describing the + # intersection disregarding the lower bounds + tmp_start = first.start + (second.start - first.start) * first.step // gcd * s + new_step = first.step * second.step // gcd + new_range = range(tmp_start, int_high, new_step) + new_index = self._simple_new(new_range) + + # adjust index to limiting interval + new_start = new_index._min_fitting_element(int_low) + new_range = range(new_start, new_index.stop, new_index.step) + new_index = self._simple_new(new_range) + + if (self.step < 0 and other.step < 0) is not (new_index.step < 0): + new_index = new_index[::-1] + + if sort is None: + new_index = new_index.sort_values() + + return new_index + + def _min_fitting_element(self, lower_limit: int) -> int: + """Returns the smallest element greater than or equal to the limit""" + no_steps = -(-(lower_limit - self.start) // abs(self.step)) + return self.start + abs(self.step) * no_steps + + def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]: + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def _range_in_self(self, other: range) -> bool: + """Check if other range is contained in self""" + # https://stackoverflow.com/a/32481015 + if not other: + return True + if not self._range: + return False + if len(other) > 1 and other.step % self._range.step: + return False + return other.start in self._range and other[-1] in self._range + + def _union(self, other: Index, sort): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + sort : False or None, default None + Whether to sort (monotonically increasing) the resulting index. + ``sort=None`` returns a ``RangeIndex`` if possible or a sorted + ``Int64Index`` if not. + ``sort=False`` can return a ``RangeIndex`` if self is monotonically + increasing and other is fully contained in self. Otherwise, returns + an unsorted ``Int64Index`` + + .. versionadded:: 0.25.0 + + Returns + ------- + union : Index + """ + if isinstance(other, RangeIndex): + if sort is None or ( + sort is False and self.step > 0 and self._range_in_self(other._range) + ): + # GH 47557: Can still return a RangeIndex + # if other range in self and sort=False + start_s, step_s = self.start, self.step + end_s = self.start + self.step * (len(self) - 1) + start_o, step_o = other.start, other.step + end_o = other.start + other.step * (len(other) - 1) + if self.step < 0: + start_s, step_s, end_s = end_s, -step_s, start_s + if other.step < 0: + start_o, step_o, end_o = end_o, -step_o, start_o + if len(self) == 1 and len(other) == 1: + step_s = step_o = abs(self.start - other.start) + elif len(self) == 1: + step_s = step_o + elif len(other) == 1: + step_o = step_s + start_r = min(start_s, start_o) + end_r = max(end_s, end_o) + if step_o == step_s: + if ( + (start_s - start_o) % step_s == 0 + and (start_s - end_o) <= step_s + and (start_o - end_s) <= step_s + ): + return type(self)(start_r, end_r + step_s, step_s) + if ( + (step_s % 2 == 0) + and (abs(start_s - start_o) == step_s / 2) + and (abs(end_s - end_o) == step_s / 2) + ): + # e.g. range(0, 10, 2) and range(1, 11, 2) + # but not range(0, 20, 4) and range(1, 21, 4) GH#44019 + return type(self)(start_r, end_r + step_s / 2, step_s / 2) + + elif step_o % step_s == 0: + if ( + (start_o - start_s) % step_s == 0 + and (start_o + step_s >= start_s) + and (end_o - step_s <= end_s) + ): + return type(self)(start_r, end_r + step_s, step_s) + elif step_s % step_o == 0: + if ( + (start_s - start_o) % step_o == 0 + and (start_s + step_o >= start_o) + and (end_s - step_o <= end_o) + ): + return type(self)(start_r, end_r + step_o, step_o) + + return super()._union(other, sort=sort) + + def _difference(self, other, sort=None): + # optimized set operation if we have another RangeIndex + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name = self._convert_can_do_setop(other) + + if not isinstance(other, RangeIndex): + return super()._difference(other, sort=sort) + + if sort is None and self.step < 0: + return self[::-1]._difference(other) + + res_name = ops.get_op_result_name(self, other) + + first = self._range[::-1] if self.step < 0 else self._range + overlap = self.intersection(other) + if overlap.step < 0: + overlap = overlap[::-1] + + if len(overlap) == 0: + return self.rename(name=res_name) + if len(overlap) == len(self): + return self[:0].rename(res_name) + + # overlap.step will always be a multiple of self.step (see _intersection) + + if len(overlap) == 1: + if overlap[0] == self[0]: + return self[1:] + + elif overlap[0] == self[-1]: + return self[:-1] + + elif len(self) == 3 and overlap[0] == self[1]: + return self[::2] + + else: + return super()._difference(other, sort=sort) + + elif len(overlap) == 2 and overlap[0] == first[0] and overlap[-1] == first[-1]: + # e.g. range(-8, 20, 7) and range(13, -9, -3) + return self[1:-1] + + if overlap.step == first.step: + if overlap[0] == first.start: + # The difference is everything after the intersection + new_rng = range(overlap[-1] + first.step, first.stop, first.step) + elif overlap[-1] == first[-1]: + # The difference is everything before the intersection + new_rng = range(first.start, overlap[0], first.step) + elif overlap._range == first[1:-1]: + # e.g. range(4) and range(1, 3) + step = len(first) - 1 + new_rng = first[::step] + else: + # The difference is not range-like + # e.g. range(1, 10, 1) and range(3, 7, 1) + return super()._difference(other, sort=sort) + + else: + # We must have len(self) > 1, bc we ruled out above + # len(overlap) == 0 and len(overlap) == len(self) + assert len(self) > 1 + + if overlap.step == first.step * 2: + if overlap[0] == first[0] and overlap[-1] in (first[-1], first[-2]): + # e.g. range(1, 10, 1) and range(1, 10, 2) + new_rng = first[1::2] + + elif overlap[0] == first[1] and overlap[-1] in (first[-1], first[-2]): + # e.g. range(1, 10, 1) and range(2, 10, 2) + new_rng = first[::2] + + else: + # We can get here with e.g. range(20) and range(0, 10, 2) + return super()._difference(other, sort=sort) + + else: + # e.g. range(10) and range(0, 10, 3) + return super()._difference(other, sort=sort) + + new_index = type(self)._simple_new(new_rng, name=res_name) + if first is not self._range: + new_index = new_index[::-1] + + return new_index + + def symmetric_difference(self, other, result_name: Hashable = None, sort=None): + if not isinstance(other, RangeIndex) or sort is not None: + return super().symmetric_difference(other, result_name, sort) + + left = self.difference(other) + right = other.difference(self) + result = left.union(right) + + if result_name is not None: + result = result.rename(result_name) + return result + + # -------------------------------------------------------------------- + + # error: Return type "Index" of "delete" incompatible with return type + # "RangeIndex" in supertype "Index" + def delete(self, loc) -> Index: # type: ignore[override] + # In some cases we can retain RangeIndex, see also + # DatetimeTimedeltaMixin._get_delete_Freq + if is_integer(loc): + if loc == 0 or loc == -len(self): + return self[1:] + if loc == -1 or loc == len(self) - 1: + return self[:-1] + if len(self) == 3 and (loc == 1 or loc == -2): + return self[::2] + + elif lib.is_list_like(loc): + slc = lib.maybe_indices_to_slice(np.asarray(loc, dtype=np.intp), len(self)) + + if isinstance(slc, slice): + # defer to RangeIndex._difference, which is optimized to return + # a RangeIndex whenever possible + other = self[slc] + return self.difference(other, sort=False) + + return super().delete(loc) + + def insert(self, loc: int, item) -> Index: + if len(self) and (is_integer(item) or is_float(item)): + # We can retain RangeIndex is inserting at the beginning or end, + # or right in the middle. + rng = self._range + if loc == 0 and item == self[0] - self.step: + new_rng = range(rng.start - rng.step, rng.stop, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + elif loc == len(self) and item == self[-1] + self.step: + new_rng = range(rng.start, rng.stop + rng.step, rng.step) + return type(self)._simple_new(new_rng, name=self.name) + + elif len(self) == 2 and item == self[0] + self.step / 2: + # e.g. inserting 1 into [0, 2] + step = int(self.step / 2) + new_rng = range(self.start, self.stop, step) + return type(self)._simple_new(new_rng, name=self.name) + + return super().insert(loc, item) + + def _concat(self, indexes: list[Index], name: Hashable) -> Index: + """ + Overriding parent method for the case of all RangeIndex instances. + + When all members of "indexes" are of type RangeIndex: result will be + RangeIndex if possible, Int64Index otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + if not all(isinstance(x, RangeIndex) for x in indexes): + return super()._concat(indexes, name) + + elif len(indexes) == 1: + return indexes[0] + + rng_indexes = cast(List[RangeIndex], indexes) + + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in rng_indexes if len(obj)] + + for obj in non_empty_indexes: + rng = obj._range + + if start is None: + # This is set by the first non-empty index + start = rng.start + if step is None and len(rng) > 1: + step = rng.step + elif step is None: + # First non-empty index had only one element + if rng.start == start: + values = np.concatenate([x._values for x in rng_indexes]) + result = Int64Index(values) + return result.rename(name) + + step = rng.start - start + + non_consecutive = (step != rng.step and len(rng) > 1) or ( + next_ is not None and rng.start != next_ + ) + if non_consecutive: + result = Int64Index(np.concatenate([x._values for x in rng_indexes])) + return result.rename(name) + + if step is not None: + next_ = rng[-1] + step + + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return RangeIndex(start, stop, step).rename(name) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0).rename(name) + + def __len__(self) -> int: + """ + return the length of the RangeIndex + """ + return len(self._range) + + @property + def size(self) -> int: + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + if isinstance(key, slice): + new_range = self._range[key] + return self._simple_new(new_range, name=self._name) + elif is_integer(key): + new_key = int(key) + try: + return self._range[new_key] + except IndexError as err: + raise IndexError( + f"index {key} is out of bounds for axis 0 with size {len(self)}" + ) from err + elif is_scalar(key): + raise IndexError( + "only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices" + ) + # fall back to Int64Index + return super().__getitem__(key) + + def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._range[slobj] + return type(self)._simple_new(res, name=self._name) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + + if is_integer(other) and other != 0: + if len(self) == 0 or self.start % other == 0 and self.step % other == 0: + start = self.start // other + step = self.step // other + stop = start + len(self) * step + new_range = range(start, stop, step or 1) + return self._simple_new(new_range, name=self.name) + if len(self) == 1: + start = self.start // other + new_range = range(start, start + 1, 1) + return self._simple_new(new_range, name=self.name) + + return super().__floordiv__(other) + + # -------------------------------------------------------------------- + # Reductions + + def all(self, *args, **kwargs) -> bool: + return 0 not in self._range + + def any(self, *args, **kwargs) -> bool: + return any(self._range) + + # -------------------------------------------------------------------- + + def _cmp_method(self, other, op): + if isinstance(other, RangeIndex) and self._range == other._range: + # Both are immutable so if ._range attr. are equal, shortcut is possible + return super()._cmp_method(self, op) + return super()._cmp_method(other, op) + + def _arith_method(self, other, op): + """ + Parameters + ---------- + other : Any + op : callable that accepts 2 params + perform the binary op + """ + + if isinstance(other, ABCTimedeltaIndex): + # Defer to TimedeltaIndex implementation + return NotImplemented + elif isinstance(other, (timedelta, np.timedelta64)): + # GH#19333 is_integer evaluated True on timedelta64, + # so we need to catch these explicitly + return super()._arith_method(other, op) + elif is_timedelta64_dtype(other): + # Must be an np.ndarray; GH#22390 + return super()._arith_method(other, op) + + if op in [ + operator.pow, + ops.rpow, + operator.mod, + ops.rmod, + operator.floordiv, + ops.rfloordiv, + divmod, + ops.rdivmod, + ]: + return super()._arith_method(other, op) + + step: Callable | None = None + if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: + step = op + + # TODO: if other is a RangeIndex we may have more efficient options + right = extract_array(other, extract_numpy=True, extract_range=True) + left = self + + try: + # apply if we have an override + if step: + with np.errstate(all="ignore"): + rstep = step(left.step, right) + + # we don't have a representable op + # so return a base index + if not is_integer(rstep) or not rstep: + raise ValueError + + else: + rstep = left.step + + with np.errstate(all="ignore"): + rstart = op(left.start, right) + rstop = op(left.stop, right) + + res_name = ops.get_op_result_name(self, other) + result = type(self)(rstart, rstop, rstep, name=res_name) + + # for compat with numpy / Int64Index + # even if we can represent as a RangeIndex, return + # as a Float64Index if we have float-like descriptors + if not all(is_integer(x) for x in [rstart, rstop, rstep]): + result = result.astype("float64") + + return result + + except (ValueError, TypeError, ZeroDivisionError): + # Defer to Int64Index implementation + # test_arithmetic_explicit_conversions + return super()._arith_method(other, op) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py new file mode 100644 index 00000000..12a8f2c0 --- /dev/null +++ b/pandas/core/indexes/timedeltas.py @@ -0,0 +1,280 @@ +""" implement the TimedeltaIndex """ +from __future__ import annotations + +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + Timedelta, + to_offset, +) +from pandas._typing import DtypeObj + +from pandas.core.dtypes.common import ( + TD64NS_DTYPE, + is_scalar, + is_timedelta64_dtype, +) + +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.timedeltas import TimedeltaArray +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) +from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin +from pandas.core.indexes.extension import inherit_names + + +@inherit_names( + ["__neg__", "__pos__", "__abs__", "total_seconds", "round", "floor", "ceil"] + + TimedeltaArray._field_ops, + TimedeltaArray, + wrap=True, +) +@inherit_names( + [ + "components", + "to_pytimedelta", + "sum", + "std", + "median", + "_format_native_types", + ], + TimedeltaArray, +) +class TimedeltaIndex(DatetimeTimedeltaMixin): + """ + Immutable Index of timedelta64 data. + + Represented internally as int64, and scalars returned Timedelta objects. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional timedelta-like data to construct index with. + unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional + Which is an integer/float number. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + days + seconds + microseconds + nanoseconds + components + inferred_freq + + Methods + ------- + to_pytimedelta + to_series + round + floor + ceil + to_frame + mean + + See Also + -------- + Index : The base pandas Index type. + Timedelta : Represents a duration between two dates or times. + DatetimeIndex : Index of datetime64 data. + PeriodIndex : Index of Period data. + timedelta_range : Create a fixed-frequency TimedeltaIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "timedeltaindex" + + _data_cls = TimedeltaArray + + @property + def _engine_type(self) -> type[libindex.TimedeltaEngine]: + return libindex.TimedeltaEngine + + _data: TimedeltaArray + + # Use base class method instead of DatetimeTimedeltaMixin._get_string_slice + _get_string_slice = Index._get_string_slice + + # ------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + unit=None, + freq=lib.no_default, + closed=None, + dtype=TD64NS_DTYPE, + copy=False, + name=None, + ): + name = maybe_extract_name(name, data, cls) + + if is_scalar(data): + raise cls._scalar_data_error(data) + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + # FIXME: need to check for dtype/data match + if isinstance(data, TimedeltaArray) and freq is lib.no_default: + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + + if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None: + if copy: + return data.copy() + else: + return data._view() + + # - Cases checked above all return/raise before reaching here - # + + tdarr = TimedeltaArray._from_sequence_not_strict( + data, freq=freq, unit=unit, dtype=dtype, copy=copy + ) + return cls._simple_new(tdarr, name=name) + + # ------------------------------------------------------------------- + + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + return is_timedelta64_dtype(dtype) # aka self._data._is_recognized_dtype + + # ------------------------------------------------------------------- + # Indexing Methods + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int, slice, or ndarray[int] + """ + self._check_indexing_error(key) + + try: + key = self._data._validate_scalar(key, unbox=False) + except TypeError as err: + raise KeyError(key) from err + + return Index.get_loc(self, key, method, tolerance) + + def _parse_with_reso(self, label: str): + # the "with_reso" is a no-op for TimedeltaIndex + parsed = Timedelta(label) + return parsed, None + + def _parsed_string_to_bounds(self, reso, parsed: Timedelta): + # reso is unused, included to match signature of DTI/PI + lbound = parsed.round(parsed.resolution_string) + rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") + return lbound, rbound + + # ------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + return "timedelta64" + + +def timedelta_range( + start=None, + end=None, + periods: int | None = None, + freq=None, + name=None, + closed=None, +) -> TimedeltaIndex: + """ + Return a fixed frequency TimedeltaIndex with day as the default. + + Parameters + ---------- + start : str or timedelta-like, default None + Left bound for generating timedeltas. + end : str or timedelta-like, default None + Right bound for generating timedeltas. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. + name : str, default None + Name of the resulting TimedeltaIndex. + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + + Returns + ------- + TimedeltaIndex + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) + TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', + '5 days 00:00:00'], + dtype='timedelta64[ns]', freq=None) + """ + if freq is None and com.any_none(periods, start, end): + freq = "D" + + freq, _ = dtl.maybe_infer_freq(freq) + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed) + return TimedeltaIndex._simple_new(tdarr, name=name) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py new file mode 100644 index 00000000..dd06d9be --- /dev/null +++ b/pandas/core/indexing.py @@ -0,0 +1,2679 @@ +from __future__ import annotations + +from contextlib import suppress +from typing import ( + TYPE_CHECKING, + Hashable, + Sequence, + TypeVar, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._libs.indexing import NDFrameIndexerBase +from pandas._libs.lib import item_from_zerodim +from pandas.errors import ( + AbstractMethodError, + IndexingError, + InvalidIndexError, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + can_hold_element, + maybe_promote, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_sequence, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + infer_fill_value, + is_valid_na_for_dtype, + isna, + na_value_for_dtype, +) + +from pandas.core import algorithms as algos +import pandas.core.common as com +from pandas.core.construction import ( + array as pd_array, + extract_array, +) +from pandas.core.indexers import ( + check_array_indexer, + is_empty_indexer, + is_list_like_indexer, + is_scalar_indexer, + length_of_indexer, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +_LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer") + +# "null slice" +_NS = slice(None, None) +_one_ellipsis_message = "indexer may only contain one '...' entry" + + +# the public IndexSlicerMaker +class _IndexSlice: + """ + Create an object to more easily perform multi-index slicing. + + See Also + -------- + MultiIndex.remove_unused_levels : New MultiIndex with no unused levels. + + Notes + ----- + See :ref:`Defined Levels ` + for further info on slicing a MultiIndex. + + Examples + -------- + >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) + >>> columns = ['foo', 'bar'] + >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), + ... index=midx, columns=columns) + + Using the default slice command: + + >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + + Using the IndexSlice class for a more intuitive command: + + >>> idx = pd.IndexSlice + >>> dfmi.loc[idx[:, 'B0':'B1'], :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + """ + + def __getitem__(self, arg): + return arg + + +IndexSlice = _IndexSlice() + + +class IndexingMixin: + """ + Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series. + """ + + @property + def iloc(self) -> _iLocIndexer: + """ + Purely integer-location based indexing for selection by position. + + ``.iloc[]`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. + + Allowed inputs are: + + - An integer, e.g. ``5``. + - A list or array of integers, e.g. ``[4, 3, 0]``. + - A slice object with ints, e.g. ``1:7``. + - A boolean array. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above). + This is useful in method chains, when you don't have a reference to the + calling object, but would like to base your selection on some value. + - A tuple of row and column indexes. The tuple elements consist of one of the + above inputs, e.g. ``(0, 1)``. + + ``.iloc`` will raise ``IndexError`` if a requested indexer is + out-of-bounds, except *slice* indexers which allow out-of-bounds + indexing (this conforms with python/numpy *slice* semantics). + + See more at :ref:`Selection by Position `. + + See Also + -------- + DataFrame.iat : Fast integer location scalar accessor. + DataFrame.loc : Purely label-location based indexer for selection by label. + Series.iloc : Purely integer-location based indexing for + selection by position. + + Examples + -------- + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + >>> df = pd.DataFrame(mydict) + >>> df + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + **Indexing just the rows** + + With a scalar integer. + + >>> type(df.iloc[0]) + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: int64 + + With a list of integers. + + >>> df.iloc[[0]] + a b c d + 0 1 2 3 4 + >>> type(df.iloc[[0]]) + + + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + With a `slice` object. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + With a boolean mask the same length as the index. + + >>> df.iloc[[True, False, True]] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + With a callable, useful in method chains. The `x` passed + to the ``lambda`` is the DataFrame being sliced. This selects + the rows whose index label even. + + >>> df.iloc[lambda x: x.index % 2 == 0] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + **Indexing both axes** + + You can mix the indexer types for the index and columns. Use ``:`` to + select the entire axis. + + With scalar integers. + + >>> df.iloc[0, 1] + 2 + + With lists of integers. + + >>> df.iloc[[0, 2], [1, 3]] + b d + 0 2 4 + 2 2000 4000 + + With `slice` objects. + + >>> df.iloc[1:3, 0:3] + a b c + 1 100 200 300 + 2 1000 2000 3000 + + With a boolean array whose length matches the columns. + + >>> df.iloc[:, [True, False, True, False]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + + With a callable function that expects the Series or DataFrame. + + >>> df.iloc[:, lambda df: [0, 2]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + """ + return _iLocIndexer("iloc", self) + + @property + def loc(self) -> _LocIndexer: + """ + Access a group of rows and columns by label(s) or a boolean array. + + ``.loc[]`` is primarily label based, but may also be used with a + boolean array. + + Allowed inputs are: + + - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is + interpreted as a *label* of the index, and **never** as an + integer position along the index). + - A list or array of labels, e.g. ``['a', 'b', 'c']``. + - A slice object with labels, e.g. ``'a':'f'``. + + .. warning:: Note that contrary to usual python slices, **both** the + start and the stop are included + + - A boolean array of the same length as the axis being sliced, + e.g. ``[True, False, True]``. + - An alignable boolean Series. The index of the key will be aligned before + masking. + - An alignable Index. The Index of the returned selection will be the input. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above) + + See more at :ref:`Selection by Label `. + + Raises + ------ + KeyError + If any items are not found. + IndexingError + If an indexed key is passed and its index is unalignable to the frame index. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.iloc : Access group of rows and columns by integer position(s). + DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. + Series.loc : Access group of values using labels. + + Examples + -------- + **Getting values** + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) + >>> df + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 + + Single label. Note this returns the row as a Series. + + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: int64 + + List of labels. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 + + Single label for row and column + + >>> df.loc['cobra', 'shield'] + 2 + + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. + + >>> df.loc['cobra':'viper', 'max_speed'] + cobra 1 + viper 4 + Name: max_speed, dtype: int64 + + Boolean list with the same length as the row axis + + >>> df.loc[[False, False, True]] + max_speed shield + sidewinder 7 8 + + Alignable boolean Series: + + >>> df.loc[pd.Series([False, True, False], + ... index=['viper', 'sidewinder', 'cobra'])] + max_speed shield + sidewinder 7 8 + + Index (same behavior as ``df.reindex``) + + >>> df.loc[pd.Index(["cobra", "viper"], name="foo")] + max_speed shield + foo + cobra 1 2 + viper 4 5 + + Conditional that returns a boolean Series + + >>> df.loc[df['shield'] > 6] + max_speed shield + sidewinder 7 8 + + Conditional that returns a boolean Series with column labels specified + + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 + + Callable that returns a boolean Series + + >>> df.loc[lambda df: df['shield'] == 8] + max_speed shield + sidewinder 7 8 + + **Setting values** + + Set value for all items matching the list of labels + + >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50 + >>> df + max_speed shield + cobra 1 2 + viper 4 50 + sidewinder 7 50 + + Set value for an entire row + + >>> df.loc['cobra'] = 10 + >>> df + max_speed shield + cobra 10 10 + viper 4 50 + sidewinder 7 50 + + Set value for an entire column + + >>> df.loc[:, 'max_speed'] = 30 + >>> df + max_speed shield + cobra 30 10 + viper 30 50 + sidewinder 30 50 + + Set value for rows matching callable condition + + >>> df.loc[df['shield'] > 35] = 0 + >>> df + max_speed shield + cobra 30 10 + viper 0 0 + sidewinder 0 0 + + **Getting values on a DataFrame with an index that has integer labels** + + Another example using integers for the index + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=[7, 8, 9], columns=['max_speed', 'shield']) + >>> df + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + Slice with integer labels for rows. As mentioned above, note that both + the start and stop of the slice are included. + + >>> df.loc[7:9] + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + **Getting values with a MultiIndex** + + A number of examples using a DataFrame with a MultiIndex + + >>> tuples = [ + ... ('cobra', 'mark i'), ('cobra', 'mark ii'), + ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), + ... ('viper', 'mark ii'), ('viper', 'mark iii') + ... ] + >>> index = pd.MultiIndex.from_tuples(tuples) + >>> values = [[12, 2], [0, 4], [10, 20], + ... [1, 4], [7, 1], [16, 36]] + >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) + >>> df + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Single label. Note this returns a DataFrame with a single index. + + >>> df.loc['cobra'] + max_speed shield + mark i 12 2 + mark ii 0 4 + + Single index tuple. Note this returns a Series. + + >>> df.loc[('cobra', 'mark ii')] + max_speed 0 + shield 4 + Name: (cobra, mark ii), dtype: int64 + + Single label for row and column. Similar to passing in a tuple, this + returns a Series. + + >>> df.loc['cobra', 'mark i'] + max_speed 12 + shield 2 + Name: (cobra, mark i), dtype: int64 + + Single tuple. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[[('cobra', 'mark ii')]] + max_speed shield + cobra mark ii 0 4 + + Single tuple for the index with a single label for the column + + >>> df.loc[('cobra', 'mark i'), 'shield'] + 2 + + Slice from index tuple to single label + + >>> df.loc[('cobra', 'mark i'):'viper'] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Slice from index tuple to index tuple + + >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + + Please see the :ref:`user guide` + for more details and explanations of advanced indexing. + """ + return _LocIndexer("loc", self) + + @property + def at(self) -> _AtIndexer: + """ + Access a single value for a row/column label pair. + + Similar to ``loc``, in that both provide label-based lookups. Use + ``at`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + KeyError + * If getting a value and 'label' does not exist in a DataFrame or + Series. + ValueError + * If row/column label pair is not a tuple or if any label from + the pair is not a scalar for DataFrame. + * If label is list-like (*excluding* NamedTuple) for Series. + + See Also + -------- + DataFrame.at : Access a single value for a row/column pair by label. + DataFrame.iat : Access a single value for a row/column pair by integer + position. + DataFrame.loc : Access a group of rows and columns by label(s). + DataFrame.iloc : Access a group of rows and columns by integer + position(s). + Series.at : Access a single value by label. + Series.iat : Access a single value by integer position. + Series.loc : Access a group of rows by label(s). + Series.iloc : Access a group of rows by integer position(s). + + Notes + ----- + See :ref:`Fast scalar value getting and setting ` + for more details. + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df + A B C + 4 0 2 3 + 5 0 4 1 + 6 10 20 30 + + Get value at specified row/column pair + + >>> df.at[4, 'B'] + 2 + + Set value at specified row/column pair + + >>> df.at[4, 'B'] = 10 + >>> df.at[4, 'B'] + 10 + + Get value within a Series + + >>> df.loc[5].at['B'] + 4 + """ + return _AtIndexer("at", self) + + @property + def iat(self) -> _iAtIndexer: + """ + Access a single value for a row/column pair by integer position. + + Similar to ``iloc``, in that both provide integer-based lookups. Use + ``iat`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + IndexError + When integer position is out of bounds. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.loc : Access a group of rows and columns by label(s). + DataFrame.iloc : Access a group of rows and columns by integer position(s). + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 0 2 3 + 1 0 4 1 + 2 10 20 30 + + Get value at specified row/column pair + + >>> df.iat[1, 2] + 1 + + Set value at specified row/column pair + + >>> df.iat[1, 2] = 10 + >>> df.iat[1, 2] + 10 + + Get value within a series + + >>> df.loc[0].iat[1] + 2 + """ + return _iAtIndexer("iat", self) + + +class _LocationIndexer(NDFrameIndexerBase): + _valid_types: str + axis: int | None = None + + # sub-classes need to set _takeable + _takeable: bool + + @final + def __call__(self: _LocationIndexerT, axis=None) -> _LocationIndexerT: + # we need to return a copy of ourselves + new_self = type(self)(self.name, self.obj) + + if axis is not None: + axis = self.obj._get_axis_number(axis) + new_self.axis = axis + return new_self + + def _get_setitem_indexer(self, key): + """ + Convert a potentially-label-based key into a positional indexer. + """ + if self.name == "loc": + # always holds here bc iloc overrides _get_setitem_indexer + self._ensure_listlike_indexer(key) + + if isinstance(key, tuple): + for x in key: + check_deprecated_indexers(x) + + if self.axis is not None: + key = _tupleize_axis_indexer(self.ndim, self.axis, key) + + ax = self.obj._get_axis(0) + + if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key): + with suppress(KeyError, InvalidIndexError): + # TypeError e.g. passed a bool + return ax.get_loc(key) + + if isinstance(key, tuple): + with suppress(IndexingError): + # suppress "Too many indexers" + return self._convert_tuple(key) + + if isinstance(key, range): + # GH#45479 test_loc_setitem_range_key + key = list(key) + + return self._convert_to_indexer(key, axis=0) + + @final + def _maybe_mask_setitem_value(self, indexer, value): + """ + If we have obj.iloc[mask] = series_or_frame and series_or_frame has the + same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask], + similar to Series.__setitem__. + + Note this is only for loc, not iloc. + """ + + if ( + isinstance(indexer, tuple) + and len(indexer) == 2 + and isinstance(value, (ABCSeries, ABCDataFrame)) + ): + pi, icols = indexer + ndim = value.ndim + if com.is_bool_indexer(pi) and len(value) == len(pi): + newkey = pi.nonzero()[0] + + if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1: + # e.g. test_loc_setitem_boolean_mask_allfalse + if len(newkey) == 0: + # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse + # TODO(GH#45333): may be fixed when deprecation is enforced + + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_series(indexer, value) + indexer = (newkey, icols) + + elif ( + isinstance(icols, np.ndarray) + and icols.dtype.kind == "i" + and len(icols) == 1 + ): + if ndim == 1: + # We implicitly broadcast, though numpy does not, see + # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825 + if len(newkey) == 0: + # FIXME: kludge for + # test_setitem_loc_only_false_indexer_dtype_changed + # TODO(GH#45333): may be fixed when deprecation is enforced + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_series(indexer, value) + indexer = (newkey, icols) + + elif ndim == 2 and value.shape[1] == 1: + if len(newkey) == 0: + # FIXME: kludge for + # test_loc_setitem_all_false_boolean_two_blocks + # TODO(GH#45333): may be fixed when deprecation is enforced + value = value.iloc[:0] + else: + # test_loc_setitem_ndframe_values_alignment + value = self.obj.iloc._align_frame(indexer, value) + indexer = (newkey, icols) + elif com.is_bool_indexer(indexer): + indexer = indexer.nonzero()[0] + + return indexer, value + + @final + def _ensure_listlike_indexer(self, key, axis=None, value=None): + """ + Ensure that a list-like of column labels are all present by adding them if + they do not already exist. + + Parameters + ---------- + key : list-like of column labels + Target labels. + axis : key axis if known + """ + column_axis = 1 + + # column only exists in 2-dimensional DataFrame + if self.ndim != 2: + return + + if isinstance(key, tuple) and len(key) > 1: + # key may be a tuple if we are .loc + # if length of key is > 1 set key to column part + key = key[column_axis] + axis = column_axis + + if ( + axis == column_axis + and not isinstance(self.obj.columns, MultiIndex) + and is_list_like_indexer(key) + and not com.is_bool_indexer(key) + and all(is_hashable(k) for k in key) + ): + # GH#38148 + keys = self.obj.columns.union(key, sort=False) + + self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True) + + @final + def __setitem__(self, key, value) -> None: + check_deprecated_indexers(key) + if isinstance(key, tuple): + key = tuple(list(x) if is_iterator(x) else x for x in key) + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + key = com.apply_if_callable(key, self.obj) + indexer = self._get_setitem_indexer(key) + self._has_valid_setitem_indexer(key) + + iloc = self if self.name == "iloc" else self.obj.iloc + iloc._setitem_with_indexer(indexer, value, self.name) + + def _validate_key(self, key, axis: int): + """ + Ensure that key is valid for current indexer. + + Parameters + ---------- + key : scalar, slice or list-like + Key requested. + axis : int + Dimension on which the indexing is being made. + + Raises + ------ + TypeError + If the key (or some element of it) has wrong type. + IndexError + If the key (or some element of it) is out of bounds. + KeyError + If the key was not found. + """ + raise AbstractMethodError(self) + + @final + def _expand_ellipsis(self, tup: tuple) -> tuple: + """ + If a tuple key includes an Ellipsis, replace it with an appropriate + number of null slices. + """ + if any(x is Ellipsis for x in tup): + if tup.count(Ellipsis) > 1: + raise IndexingError(_one_ellipsis_message) + + if len(tup) == self.ndim: + # It is unambiguous what axis this Ellipsis is indexing, + # treat as a single null slice. + i = tup.index(Ellipsis) + # FIXME: this assumes only one Ellipsis + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + return new_key + + # TODO: other cases? only one test gets here, and that is covered + # by _validate_key_length + return tup + + @final + def _validate_tuple_indexer(self, key: tuple) -> tuple: + """ + Check the key for valid keys across my indexer. + """ + key = self._validate_key_length(key) + key = self._expand_ellipsis(key) + for i, k in enumerate(key): + try: + self._validate_key(k, i) + except ValueError as err: + raise ValueError( + "Location based indexing can only have " + f"[{self._valid_types}] types" + ) from err + return key + + @final + def _is_nested_tuple_indexer(self, tup: tuple) -> bool: + """ + Returns + ------- + bool + """ + if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): + return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) + return False + + @final + def _convert_tuple(self, key: tuple) -> tuple: + # Note: we assume _tupleize_axis_indexer has been called, if necessary. + self._validate_key_length(key) + keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)] + return tuple(keyidx) + + @final + def _validate_key_length(self, key: tuple) -> tuple: + if len(key) > self.ndim: + if key[0] is Ellipsis: + # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3] + key = key[1:] + if Ellipsis in key: + raise IndexingError(_one_ellipsis_message) + return self._validate_key_length(key) + raise IndexingError("Too many indexers") + return key + + @final + def _getitem_tuple_same_dim(self, tup: tuple): + """ + Index with indexers that should return an object of the same dimension + as self.obj. + + This is only called after a failed call to _getitem_lowerdim. + """ + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + # We should never have retval.ndim < self.ndim, as that should + # be handled by the _getitem_lowerdim call above. + assert retval.ndim == self.ndim + + return retval + + @final + def _getitem_lowerdim(self, tup: tuple): + + # we can directly get the axis result since the axis is specified + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + return self._getitem_axis(tup, axis=axis) + + # we may have a nested tuples indexer here + if self._is_nested_tuple_indexer(tup): + return self._getitem_nested_tuple(tup) + + # we maybe be using a tuple to represent multiple dimensions here + ax0 = self.obj._get_axis(0) + # ...but iloc should handle the tuple as simple integer-location + # instead of checking it as multiindex representation (GH 13797) + if ( + isinstance(ax0, MultiIndex) + and self.name != "iloc" + and not any(isinstance(x, slice) for x in tup) + ): + # Note: in all extant test cases, replacing the slice condition with + # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)` + # is equivalent. + # (see the other place where we call _handle_lowerdim_multi_index_axis0) + with suppress(IndexingError): + # error "_LocationIndexer" has no attribute + # "_handle_lowerdim_multi_index_axis0" + return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup) + + tup = self._validate_key_length(tup) + + for i, key in enumerate(tup): + if is_label_like(key): + # We don't need to check for tuples here because those are + # caught by the _is_nested_tuple_indexer check above. + section = self._getitem_axis(key, axis=i) + + # We should never have a scalar section here, because + # _getitem_lowerdim is only called after a check for + # is_scalar_access, which that would be. + if section.ndim == self.ndim: + # we're in the middle of slicing through a MultiIndex + # revise the key wrt to `section` by inserting an _NS + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + + else: + # Note: the section.ndim == self.ndim check above + # rules out having DataFrame here, so we dont need to worry + # about transposing. + new_key = tup[:i] + tup[i + 1 :] + + if len(new_key) == 1: + new_key = new_key[0] + + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if com.is_null_slice(new_key): + return section + # This is an elided recursive call to iloc/loc + return getattr(section, self.name)[new_key] + + raise IndexingError("not applicable") + + @final + def _getitem_nested_tuple(self, tup: tuple): + # we have a nested tuple so have at least 1 multi-index level + # we should be able to match up the dimensionality here + + for key in tup: + check_deprecated_indexers(key) + + # we have too many indexers for our dim, but have at least 1 + # multi-index dimension, try to see if we have something like + # a tuple passed to a series with a multi-index + if len(tup) > self.ndim: + if self.name != "loc": + # This should never be reached, but let's be explicit about it + raise ValueError("Too many indices") # pragma: no cover + if all(is_hashable(x) or com.is_null_slice(x) for x in tup): + # GH#10521 Series should reduce MultiIndex dimensions instead of + # DataFrame, IndexingError is not raised when slice(None,None,None) + # with one row. + with suppress(IndexingError): + # error "_LocationIndexer" has no attribute + # "_handle_lowerdim_multi_index_axis0" + return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0( + tup + ) + elif isinstance(self.obj, ABCSeries) and any( + isinstance(k, tuple) for k in tup + ): + # GH#35349 Raise if tuple in tuple for series + # Do this after the all-hashable-or-null-slice check so that + # we are only getting non-hashable tuples, in particular ones + # that themselves contain a slice entry + # See test_loc_series_getitem_too_many_dimensions + raise IndexingError("Too many indexers") + + # this is a series with a multi-index specified a tuple of + # selectors + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) + + # handle the multi-axis by taking sections and reducing + # this is iterative + obj = self.obj + # GH#41369 Loop in reverse order ensures indexing along columns before rows + # which selects only necessary blocks which avoids dtype conversion if possible + axis = len(tup) - 1 + for key in tup[::-1]: + + if com.is_null_slice(key): + axis -= 1 + continue + + obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) + axis -= 1 + + # if we have a scalar, we are done + if is_scalar(obj) or not hasattr(obj, "ndim"): + break + + return obj + + def _convert_to_indexer(self, key, axis: int): + raise AbstractMethodError(self) + + @final + def __getitem__(self, key): + check_deprecated_indexers(key) + if type(key) is tuple: + key = tuple(list(x) if is_iterator(x) else x for x in key) + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + if self._is_scalar_access(key): + return self.obj._get_value(*key, takeable=self._takeable) + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 + + maybe_callable = com.apply_if_callable(key, self.obj) + return self._getitem_axis(maybe_callable, axis=axis) + + def _is_scalar_access(self, key: tuple): + raise NotImplementedError() + + def _getitem_tuple(self, tup: tuple): + raise AbstractMethodError(self) + + def _getitem_axis(self, key, axis: int): + raise NotImplementedError() + + def _has_valid_setitem_indexer(self, indexer) -> bool: + raise AbstractMethodError(self) + + @final + def _getbool_axis(self, key, axis: int): + # caller is responsible for ensuring non-None axis + labels = self.obj._get_axis(axis) + key = check_bool_indexer(labels, key) + inds = key.nonzero()[0] + return self.obj._take_with_is_copy(inds, axis=axis) + + +@doc(IndexingMixin.loc) +class _LocIndexer(_LocationIndexer): + _takeable: bool = False + _valid_types = ( + "labels (MUST BE IN THE INDEX), slices of labels (BOTH " + "endpoints included! Can be slices of integers if the " + "index is integers), listlike of labels, boolean" + ) + + # ------------------------------------------------------------------- + # Key Checks + + @doc(_LocationIndexer._validate_key) + def _validate_key(self, key, axis: int): + # valid for a collection of labels (we check their presence later) + # slice of labels (where start-end in labels) + # slice of integers (only if in the labels) + # boolean not in slice and with boolean index + if isinstance(key, bool) and not ( + is_bool_dtype(self.obj._get_axis(axis)) + or self.obj._get_axis(axis).dtype.name == "boolean" + ): + raise KeyError( + f"{key}: boolean label can not be used without a boolean index" + ) + + if isinstance(key, slice) and ( + isinstance(key.start, bool) or isinstance(key.stop, bool) + ): + raise TypeError(f"{key}: boolean values can not be used in a slice") + + def _has_valid_setitem_indexer(self, indexer) -> bool: + return True + + def _is_scalar_access(self, key: tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_scalar(k): + return False + + ax = self.obj.axes[i] + if isinstance(ax, MultiIndex): + return False + + if isinstance(k, str) and ax._supports_partial_string_indexing: + # partial string indexing, df.loc['2000', 'A'] + # should not be considered scalar + return False + + if not ax._index_as_unique: + return False + + return True + + # ------------------------------------------------------------------- + # MultiIndex Handling + + def _multi_take_opportunity(self, tup: tuple) -> bool: + """ + Check whether there is the possibility to use ``_multi_take``. + + Currently the limit is that all axes being indexed, must be indexed with + list-likes. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + bool + Whether the current indexing, + can be passed through `_multi_take`. + """ + if not all(is_list_like_indexer(x) for x in tup): + return False + + # just too complicated + return not any(com.is_bool_indexer(x) for x in tup) + + def _multi_take(self, tup: tuple): + """ + Create the indexers for the passed tuple of keys, and + executes the take operation. This allows the take operation to be + executed all at once, rather than once for each dimension. + Improving efficiency. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + values: same type as the object being indexed + """ + # GH 836 + d = { + axis: self._get_listlike_indexer(key, axis) + for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) + } + return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True) + + # ------------------------------------------------------------------- + + def _getitem_iterable(self, key, axis: int): + """ + Index current object with an iterable collection of keys. + + Parameters + ---------- + key : iterable + Targeted labels. + axis : int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If no key was found. Will change in the future to raise if not all + keys were found. + + Returns + ------- + scalar, DataFrame, or Series: indexed value(s). + """ + # we assume that not com.is_bool_indexer(key), as that is + # handled before we get here. + self._validate_key(key, axis) + + # A collection of keys + keyarr, indexer = self._get_listlike_indexer(key, axis) + return self.obj._reindex_with_indexers( + {axis: [keyarr, indexer]}, copy=True, allow_dups=True + ) + + def _getitem_tuple(self, tup: tuple): + with suppress(IndexingError): + tup = self._expand_ellipsis(tup) + return self._getitem_lowerdim(tup) + + # no multi-index, so validate all of the indexers + tup = self._validate_tuple_indexer(tup) + + # ugly hack for GH #836 + if self._multi_take_opportunity(tup): + return self._multi_take(tup) + + return self._getitem_tuple_same_dim(tup) + + def _get_label(self, label, axis: int): + # GH#5567 this will fail if the label is not present in the axis. + return self.obj.xs(label, axis=axis) + + def _handle_lowerdim_multi_index_axis0(self, tup: tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if self.ndim < len(tup) <= self.obj.index.nlevels: + raise ek + raise IndexingError("No label returned") from ek + + def _getitem_axis(self, key, axis: int): + key = item_from_zerodim(key) + if is_iterator(key): + key = list(key) + if key is Ellipsis: + key = slice(None) + + labels = self.obj._get_axis(axis) + + if isinstance(key, tuple) and isinstance(labels, MultiIndex): + key = tuple(key) + + if isinstance(key, slice): + self._validate_key(key, axis) + return self._get_slice_axis(key, axis=axis) + elif com.is_bool_indexer(key): + return self._getbool_axis(key, axis=axis) + elif is_list_like_indexer(key): + + # an iterable multi-selection + if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + + # nested tuple slicing + if is_nested_tuple(key, labels): + locs = labels.get_locs(key) + indexer = [slice(None)] * self.ndim + indexer[axis] = locs + return self.obj.iloc[tuple(indexer)] + + # fall thru to straight lookup + self._validate_key(key, axis) + return self._get_label(key, axis=axis) + + def _get_slice_axis(self, slice_obj: slice, axis: int): + """ + This is pretty simple as we just have to deal with labels. + """ + # caller is responsible for ensuring non-None axis + obj = self.obj + if not need_slice(slice_obj): + return obj.copy(deep=False) + + labels = obj._get_axis(axis) + indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) + + if isinstance(indexer, slice): + return self.obj._slice(indexer, axis=axis) + else: + # DatetimeIndex overrides Index.slice_indexer and may + # return a DatetimeIndex instead of a slice object. + return self.obj.take(indexer, axis=axis) + + def _convert_to_indexer(self, key, axis: int): + """ + Convert indexing key into something we can use to do actual fancy + indexing on a ndarray. + + Examples + ix[:5] -> slice(0, 5) + ix[[1,2,3]] -> [1,2,3] + ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) + + Going by Zen of Python? + 'In the face of ambiguity, refuse the temptation to guess.' + raise AmbiguousIndexError with integer labels? + - No, prefer label-based indexing + """ + labels = self.obj._get_axis(axis) + + if isinstance(key, slice): + return labels._convert_slice_indexer(key, kind="loc") + + if ( + isinstance(key, tuple) + and not isinstance(labels, MultiIndex) + and self.ndim < 2 + and len(key) > 1 + ): + raise IndexingError("Too many indexers") + + if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)): + # Otherwise get_loc will raise InvalidIndexError + + # if we are a label return me + try: + return labels.get_loc(key) + except LookupError: + if isinstance(key, tuple) and isinstance(labels, MultiIndex): + if len(key) == labels.nlevels: + return {"key": key} + raise + except InvalidIndexError: + # GH35015, using datetime as column indices raises exception + if not isinstance(labels, MultiIndex): + raise + except ValueError: + if not is_integer(key): + raise + return {"key": key} + + if is_nested_tuple(key, labels): + if self.ndim == 1 and any(isinstance(k, tuple) for k in key): + # GH#35349 Raise if tuple in tuple for series + raise IndexingError("Too many indexers") + return labels.get_locs(key) + + elif is_list_like_indexer(key): + + if is_iterator(key): + key = list(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(labels, key) + return key + else: + return self._get_listlike_indexer(key, axis)[1] + else: + try: + return labels.get_loc(key) + except LookupError: + # allow a not found key only if we are a setter + if not is_list_like_indexer(key): + return {"key": key} + raise + + def _get_listlike_indexer(self, key, axis: int): + """ + Transform a list-like of keys into a new index and an indexer. + + Parameters + ---------- + key : list-like + Targeted labels. + axis: int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If at least one key was requested but none was found. + + Returns + ------- + keyarr: Index + New index (coinciding with 'key' if the axis is unique). + values : array-like + Indexer for the return object, -1 denotes keys not found. + """ + ax = self.obj._get_axis(axis) + axis_name = self.obj._get_axis_name(axis) + + keyarr, indexer = ax._get_indexer_strict(key, axis_name) + + return keyarr, indexer + + +@doc(IndexingMixin.iloc) +class _iLocIndexer(_LocationIndexer): + _valid_types = ( + "integer, integer slice (START point is INCLUDED, END " + "point is EXCLUDED), listlike of integers, boolean array" + ) + _takeable = True + + # ------------------------------------------------------------------- + # Key Checks + + def _validate_key(self, key, axis: int): + if com.is_bool_indexer(key): + if hasattr(key, "index") and isinstance(key.index, Index): + if key.index.inferred_type == "integer": + raise NotImplementedError( + "iLocation based boolean " + "indexing on an integer type " + "is not available" + ) + raise ValueError( + "iLocation based boolean indexing cannot use " + "an indexable as a mask" + ) + return + + if isinstance(key, slice): + return + elif is_integer(key): + self._validate_integer(key, axis) + elif isinstance(key, tuple): + # a tuple should already have been caught by this point + # so don't treat a tuple as a valid indexer + raise IndexingError("Too many indexers") + elif is_list_like_indexer(key): + arr = np.array(key) + len_axis = len(self.obj._get_axis(axis)) + + # check that the key has a numeric dtype + if not is_numeric_dtype(arr.dtype): + raise IndexError(f".iloc requires numeric indexers, got {arr}") + + # check that the key does not exceed the maximum size of the index + if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): + raise IndexError("positional indexers are out-of-bounds") + else: + raise ValueError(f"Can only index by location with a [{self._valid_types}]") + + def _has_valid_setitem_indexer(self, indexer) -> bool: + """ + Validate that a positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally. + + Returns + ------- + bool + """ + if isinstance(indexer, dict): + raise IndexError("iloc cannot enlarge its target object") + + if isinstance(indexer, ABCDataFrame): + warnings.warn( + "DataFrame indexer for .iloc is deprecated and will be removed in " + "a future version.\n" + "consider using .loc with a DataFrame indexer for automatic alignment.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + + for ax, i in zip(self.obj.axes, indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like_indexer(i): + # should check the elements? + pass + elif is_integer(i): + if i >= len(ax): + raise IndexError("iloc cannot enlarge its target object") + elif isinstance(i, dict): + raise IndexError("iloc cannot enlarge its target object") + + return True + + def _is_scalar_access(self, key: tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + return all(is_integer(k) for k in key) + + def _validate_integer(self, key: int, axis: int) -> None: + """ + Check that 'key' is a valid position in the desired axis. + + Parameters + ---------- + key : int + Requested position. + axis : int + Desired axis. + + Raises + ------ + IndexError + If 'key' is not a valid position in axis 'axis'. + """ + len_axis = len(self.obj._get_axis(axis)) + if key >= len_axis or key < -len_axis: + raise IndexError("single positional indexer is out-of-bounds") + + # ------------------------------------------------------------------- + + def _getitem_tuple(self, tup: tuple): + + tup = self._validate_tuple_indexer(tup) + with suppress(IndexingError): + return self._getitem_lowerdim(tup) + + return self._getitem_tuple_same_dim(tup) + + def _get_list_axis(self, key, axis: int): + """ + Return Series values by list or array of integers. + + Parameters + ---------- + key : list-like positional indexer + axis : int + + Returns + ------- + Series object + + Notes + ----- + `axis` can only be zero. + """ + try: + return self.obj._take_with_is_copy(key, axis=axis) + except IndexError as err: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") from err + + def _getitem_axis(self, key, axis: int): + if key is Ellipsis: + key = slice(None) + elif isinstance(key, ABCDataFrame): + raise IndexError( + "DataFrame indexer is not allowed for .iloc\n" + "Consider using .loc for automatic alignment." + ) + + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + + if is_iterator(key): + key = list(key) + + if isinstance(key, list): + key = np.asarray(key) + + if com.is_bool_indexer(key): + self._validate_key(key, axis) + return self._getbool_axis(key, axis=axis) + + # a list of integers + elif is_list_like_indexer(key): + return self._get_list_axis(key, axis=axis) + + # a single integer + else: + key = item_from_zerodim(key) + if not is_integer(key): + raise TypeError("Cannot index by location index with a non-integer key") + + # validate the location + self._validate_integer(key, axis) + + return self.obj._ixs(key, axis=axis) + + def _get_slice_axis(self, slice_obj: slice, axis: int): + # caller is responsible for ensuring non-None axis + obj = self.obj + + if not need_slice(slice_obj): + return obj.copy(deep=False) + + labels = obj._get_axis(axis) + labels._validate_positional_slice(slice_obj) + return self.obj._slice(slice_obj, axis=axis) + + def _convert_to_indexer(self, key, axis: int): + """ + Much simpler as we only have to deal with our valid types. + """ + return key + + def _get_setitem_indexer(self, key): + # GH#32257 Fall through to let numpy do validation + if is_iterator(key): + key = list(key) + + if self.axis is not None: + key = _tupleize_axis_indexer(self.ndim, self.axis, key) + + return key + + # ------------------------------------------------------------------- + + def _setitem_with_indexer(self, indexer, value, name="iloc"): + """ + _setitem_with_indexer is for setting values on a Series/DataFrame + using positional indexers. + + If the relevant keys are not present, the Series/DataFrame may be + expanded. + + This method is currently broken when dealing with non-unique Indexes, + since it goes from positional indexers back to labels when calling + BlockManager methods, see GH#12991, GH#22046, GH#15686. + """ + info_axis = self.obj._info_axis_number + + # maybe partial set + take_split_path = not self.obj._mgr.is_single_block + + # if there is only one block/type, still have to take split path + # unless the block is one-dimensional or it can hold the value + if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1: + # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + arr = self.obj._mgr.arrays[0] + take_split_path = not can_hold_element( + arr, extract_array(val, extract_numpy=True) + ) + + # if we have any multi-indexes that have non-trivial slices + # (not null slices) then we must take the split path, xref + # GH 10360, GH 27841 + if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): + for i, ax in zip(indexer, self.obj.axes): + if isinstance(ax, MultiIndex) and not ( + is_integer(i) or com.is_null_slice(i) + ): + take_split_path = True + break + + if isinstance(indexer, tuple): + nindexer = [] + for i, idx in enumerate(indexer): + if isinstance(idx, dict): + + # reindex the axis to the new value + # and set inplace + key, _ = convert_missing_indexer(idx) + + # if this is the items axes, then take the main missing + # path first + # this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed + # to possibly be modified + if self.ndim > 1 and i == info_axis: + + # add the new item, and set the value + # must have all defined axes if we have a scalar + # or a list-like on the non-info axes if we have a + # list-like + if not len(self.obj): + if not is_list_like_indexer(value): + raise ValueError( + "cannot set a frame with no " + "defined index and a scalar" + ) + self.obj[key] = value + return + + # add a new item with the dtype setup + if com.is_null_slice(indexer[0]): + # We are setting an entire column + self.obj[key] = value + return + elif is_array_like(value): + # GH#42099 + arr = extract_array(value, extract_numpy=True) + taker = -1 * np.ones(len(self.obj), dtype=np.intp) + empty_value = algos.take_nd(arr, taker) + if not isinstance(value, ABCSeries): + # if not Series (in which case we need to align), + # we can short-circuit + empty_value[indexer[0]] = arr + self.obj[key] = empty_value + return + + self.obj[key] = empty_value + + else: + self.obj[key] = infer_fill_value(value) + + new_indexer = convert_from_missing_indexer_tuple( + indexer, self.obj.axes + ) + self._setitem_with_indexer(new_indexer, value, name) + + return + + # reindex the axis + # make sure to clear the cache because we are + # just replacing the block manager here + # so the object is the same + index = self.obj._get_axis(i) + labels = index.insert(len(index), key) + + # We are expanding the Series/DataFrame values to match + # the length of thenew index `labels`. GH#40096 ensure + # this is valid even if the index has duplicates. + taker = np.arange(len(index) + 1, dtype=np.intp) + taker[-1] = -1 + reindexers = {i: (labels, taker)} + new_obj = self.obj._reindex_with_indexers( + reindexers, allow_dups=True + ) + self.obj._mgr = new_obj._mgr + self.obj._maybe_update_cacher(clear=True) + self.obj._is_copy = None + + nindexer.append(labels.get_loc(key)) + + else: + nindexer.append(idx) + + indexer = tuple(nindexer) + else: + + indexer, missing = convert_missing_indexer(indexer) + + if missing: + self._setitem_with_indexer_missing(indexer, value) + return + + if name == "loc": + # must come after setting of missing + indexer, value = self._maybe_mask_setitem_value(indexer, value) + + # align and set the values + if take_split_path: + # We have to operate column-wise + self._setitem_with_indexer_split_path(indexer, value, name) + else: + self._setitem_single_block(indexer, value, name) + + def _setitem_with_indexer_split_path(self, indexer, value, name: str): + """ + Setitem column-wise. + """ + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + if len(indexer) > self.ndim: + raise IndexError("too many indices for array") + if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2: + raise ValueError(r"Cannot set values with ndim > 2") + + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + from pandas import Series + + value = self._align_series(indexer, Series(value)) + + # Ensure we have something we can iterate over + info_axis = indexer[1] + ilocs = self._ensure_iterable_column_indexer(info_axis) + + pi = indexer[0] + lplane_indexer = length_of_indexer(pi, self.obj.index) + # lplane_indexer gives the expected length of obj[indexer[0]] + + # we need an iterable, with a ndim of at least 1 + # eg. don't pass through np.array(0) + if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: + + if isinstance(value, ABCDataFrame): + self._setitem_with_indexer_frame_value(indexer, value, name) + + elif np.ndim(value) == 2: + self._setitem_with_indexer_2d_value(indexer, value) + + elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): + # We are setting multiple rows in a single column. + self._setitem_single_column(ilocs[0], value, pi) + + elif len(ilocs) == 1 and 0 != lplane_indexer != len(value): + # We are trying to set N values into M entries of a single + # column, which is invalid for N != M + # Exclude zero-len for e.g. boolean masking that is all-false + + if len(value) == 1 and not is_integer(info_axis): + # This is a case like df.iloc[:3, [1]] = [0] + # where we treat as df.iloc[:3, 1] = 0 + return self._setitem_with_indexer((pi, info_axis[0]), value[0]) + + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + elif lplane_indexer == 0 and len(value) == len(self.obj.index): + # We get here in one case via .loc with a all-False mask + pass + + elif self._is_scalar_access(indexer) and is_object_dtype( + self.obj.dtypes[ilocs[0]] + ): + # We are setting nested data, only possible for object dtype data + self._setitem_single_column(indexer[1], value, pi) + + elif len(ilocs) == len(value): + # We are setting multiple columns in a single row. + for loc, v in zip(ilocs, value): + self._setitem_single_column(loc, v, pi) + + elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: + # This is a setitem-with-expansion, see + # test_loc_setitem_empty_append_expands_rows_mixed_dtype + # e.g. df = DataFrame(columns=["x", "y"]) + # df["x"] = df["x"].astype(np.int64) + # df.loc[:, "x"] = [1, 2, 3] + self._setitem_single_column(ilocs[0], value, pi) + + else: + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + else: + + # scalar value + for loc in ilocs: + self._setitem_single_column(loc, value, pi) + + def _setitem_with_indexer_2d_value(self, indexer, value): + # We get here with np.ndim(value) == 2, excluding DataFrame, + # which goes through _setitem_with_indexer_frame_value + pi = indexer[0] + + ilocs = self._ensure_iterable_column_indexer(indexer[1]) + + # GH#7551 Note that this coerces the dtype if we are mixed + value = np.array(value, dtype=object) + if len(ilocs) != value.shape[1]: + raise ValueError( + "Must have equal len keys and value when setting with an ndarray" + ) + + for i, loc in enumerate(ilocs): + # setting with a list, re-coerces + self._setitem_single_column(loc, value[:, i].tolist(), pi) + + def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str): + ilocs = self._ensure_iterable_column_indexer(indexer[1]) + + sub_indexer = list(indexer) + pi = indexer[0] + + multiindex_indexer = isinstance(self.obj.columns, MultiIndex) + + unique_cols = value.columns.is_unique + + # We do not want to align the value in case of iloc GH#37728 + if name == "iloc": + for i, loc in enumerate(ilocs): + val = value.iloc[:, i] + self._setitem_single_column(loc, val, pi) + + elif not unique_cols and value.columns.equals(self.obj.columns): + # We assume we are already aligned, see + # test_iloc_setitem_frame_duplicate_columns_multiple_blocks + for loc in ilocs: + item = self.obj.columns[loc] + if item in value: + sub_indexer[1] = item + val = self._align_series( + tuple(sub_indexer), + value.iloc[:, loc], + multiindex_indexer, + ) + else: + val = np.nan + + self._setitem_single_column(loc, val, pi) + + elif not unique_cols: + raise ValueError("Setting with non-unique columns is not allowed.") + + else: + for loc in ilocs: + item = self.obj.columns[loc] + if item in value: + sub_indexer[1] = item + val = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) + else: + val = np.nan + + self._setitem_single_column(loc, val, pi) + + def _setitem_single_column(self, loc: int, value, plane_indexer): + """ + + Parameters + ---------- + loc : int + Indexer for column position + plane_indexer : int, slice, listlike[int] + The indexer we use for setitem along axis=0. + """ + pi = plane_indexer + + orig_values = self.obj._get_column_array(loc) + + # perform the equivalent of a setitem on the info axis + # as we have a null slice or a slice with full bounds + # which means essentially reassign to the columns of a + # multi-dim object + # GH#6149 (null slice), GH#10408 (full bounds) + if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): + pass + elif ( + is_array_like(value) + and len(value.shape) > 0 + and self.obj.shape[0] == value.shape[0] + and not is_empty_indexer(pi) + ): + if is_list_like(pi) and not is_bool_dtype(pi): + value = value[np.argsort(pi)] + else: + # in case of slice + value = value[pi] + else: + # set value into the column (first attempting to operate inplace, then + # falling back to casting if necessary) + self.obj._mgr.column_setitem(loc, plane_indexer, value) + self.obj._clear_item_cache() + return + + self.obj._iset_item(loc, value) + + # We will not operate in-place, but will attempt to in the future. + # To determine whether we need to issue a FutureWarning, see if the + # setting in-place would work, i.e. behavior will change. + + new_values = self.obj._get_column_array(loc) + + if can_hold_element(orig_values, new_values) and not len(new_values) == 0: + # Don't issue the warning yet, as we can still trim a few cases where + # behavior will not change. + + if ( + isinstance(new_values, np.ndarray) + and isinstance(orig_values, np.ndarray) + and ( + np.shares_memory(new_values, orig_values) + or new_values.shape != orig_values.shape + ) + ): + # TODO: get something like tm.shares_memory working? + # The values were set inplace after all, no need to warn, + # e.g. test_rename_nocopy + # In case of enlarging we can not set inplace, so need to + # warn either + pass + else: + warnings.warn( + "In a future version, `df.iloc[:, i] = newvals` will attempt " + "to set the values inplace instead of always setting a new " + "array. To retain the old behavior, use either " + "`df[df.columns[i]] = newvals` or, if columns are non-unique, " + "`df.isetitem(i, newvals)`", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + # TODO: how to get future behavior? + # TODO: what if we got here indirectly via loc? + return + + def _setitem_single_block(self, indexer, value, name: str): + """ + _setitem_with_indexer for the case when we have a single Block. + """ + from pandas import Series + + info_axis = self.obj._info_axis_number + item_labels = self.obj._get_axis(info_axis) + if isinstance(indexer, tuple): + + # if we are setting on the info axis ONLY + # set using those methods to avoid block-splitting + # logic here + if ( + self.ndim == len(indexer) == 2 + and is_integer(indexer[1]) + and com.is_null_slice(indexer[0]) + ): + col = item_labels[indexer[info_axis]] + if len(item_labels.get_indexer_for([col])) == 1: + # e.g. test_loc_setitem_empty_append_expands_rows + loc = item_labels.get_loc(col) + # Go through _setitem_single_column to get + # FutureWarning if relevant. + self._setitem_single_column(loc, value, indexer[0]) + return + + indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align + + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + + elif isinstance(value, ABCDataFrame) and name != "iloc": + value = self._align_frame(indexer, value) + + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + + # actually do the set + self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) + self.obj._maybe_update_cacher(clear=True, inplace=True) + + def _setitem_with_indexer_missing(self, indexer, value): + """ + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + # pass new_index[-1:] instead if [new_index[-1]] + # so that we retain dtype + new_indexer = index.get_indexer(new_index[-1:]) + if (new_indexer != -1).any(): + # We get only here with loc, so can hard code + return self._setitem_with_indexer(new_indexer, value, "loc") + + # this preserves dtype of the value and of the object + if not is_scalar(value): + new_dtype = None + + elif is_valid_na_for_dtype(value, self.obj.dtype): + if not is_object_dtype(self.obj.dtype): + # Every NA value is suitable for object, no conversion needed + value = na_value_for_dtype(self.obj.dtype, compat=False) + + new_dtype = maybe_promote(self.obj.dtype, value)[0] + + elif isna(value): + new_dtype = None + elif not self.obj.empty and not is_object_dtype(self.obj.dtype): + # We should not cast, if we have object dtype because we can + # set timedeltas into object series + curr_dtype = self.obj.dtype + curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype) + new_dtype = maybe_promote(curr_dtype, value)[0] + else: + new_dtype = None + + new_values = Series([value], dtype=new_dtype)._values + + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = concat_compat([self.obj._values, new_values]) + self.obj._mgr = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._mgr + self.obj._maybe_update_cacher(clear=True) + + elif self.ndim == 2: + + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") + + has_dtype = hasattr(value, "dtype") + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer + elif isinstance(value, dict): + value = Series( + value, index=self.obj.columns, name=indexer, dtype=object + ) + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") + + value = Series(value, index=self.obj.columns, name=indexer) + + if not len(self.obj): + # We will ignore the existing dtypes instead of using + # internals.concat logic + df = value.to_frame().T + + idx = self.obj.index + if isinstance(idx, MultiIndex): + name = idx.names + else: + name = idx.name + + df.index = Index([indexer], name=name) + if not has_dtype: + # i.e. if we already had a Series or ndarray, keep that + # dtype. But if we had a list or dict, then do inference + df = df.infer_objects() + self.obj._mgr = df._mgr + else: + self.obj._mgr = self.obj._append(value)._mgr + self.obj._maybe_update_cacher(clear=True) + + def _ensure_iterable_column_indexer(self, column_indexer): + """ + Ensure that our column indexer is something that can be iterated over. + """ + ilocs: Sequence[int] | np.ndarray + if is_integer(column_indexer): + ilocs = [column_indexer] + elif isinstance(column_indexer, slice): + ilocs = np.arange(len(self.obj.columns))[column_indexer] + elif isinstance(column_indexer, np.ndarray) and is_bool_dtype( + column_indexer.dtype + ): + ilocs = np.arange(len(column_indexer))[column_indexer] + else: + ilocs = column_indexer + return ilocs + + def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False): + """ + Parameters + ---------- + indexer : tuple, slice, scalar + Indexer used to get the locations that will be set to `ser`. + ser : pd.Series + Values to assign to the locations specified by `indexer`. + multiindex_indexer : bool, optional + Defaults to False. Should be set to True if `indexer` was from + a `pd.MultiIndex`, to avoid unnecessary broadcasting. + + Returns + ------- + `np.array` of `ser` broadcast to the appropriate shape for assignment + to the locations selected by `indexer` + """ + if isinstance(indexer, (slice, np.ndarray, list, Index)): + indexer = (indexer,) + + if isinstance(indexer, tuple): + + # flatten np.ndarray indexers + def ravel(i): + return i.ravel() if isinstance(i, np.ndarray) else i + + indexer = tuple(map(ravel, indexer)) + + aligners = [not com.is_null_slice(idx) for idx in indexer] + sum_aligners = sum(aligners) + single_aligner = sum_aligners == 1 + is_frame = self.ndim == 2 + obj = self.obj + + # are we a single alignable value on a non-primary + # dim (e.g. panel: 1,2, or frame: 0) ? + # hence need to align to a single axis dimension + # rather that find all valid dims + + # frame + if is_frame: + single_aligner = single_aligner and aligners[0] + + # we have a frame, with multiple indexers on both axes; and a + # series, so need to broadcast (see GH5206) + if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + # TODO: This is hacky, align Series and DataFrame behavior GH#45778 + if obj.ndim == 2 and is_empty_indexer(indexer[0]): + return ser._values.copy() + ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values + + # single indexer + if len(indexer) > 1 and not multiindex_indexer: + len_indexer = len(indexer[1]) + ser_values = ( + np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T + ) + + return ser_values + + for i, idx in enumerate(indexer): + ax = obj.axes[i] + + # multiple aligners (or null slices) + if is_sequence(idx) or isinstance(idx, slice): + if single_aligner and com.is_null_slice(idx): + continue + new_ix = ax[idx] + if not is_list_like_indexer(new_ix): + new_ix = Index([new_ix]) + else: + new_ix = Index(new_ix) + if ser.index.equals(new_ix) or not len(new_ix): + return ser._values.copy() + + return ser.reindex(new_ix)._values + + # 2 dims + elif single_aligner: + + # reindex along index + ax = self.obj.axes[1] + if ser.index.equals(ax) or not len(ax): + return ser._values.copy() + return ser.reindex(ax)._values + + elif is_integer(indexer) and self.ndim == 1: + if is_object_dtype(self.obj): + return ser + ax = self.obj._get_axis(0) + + if ser.index.equals(ax): + return ser._values.copy() + + return ser.reindex(ax)._values[indexer] + + elif is_integer(indexer): + ax = self.obj._get_axis(1) + + if ser.index.equals(ax): + return ser._values.copy() + + return ser.reindex(ax)._values + + raise ValueError("Incompatible indexer with Series") + + def _align_frame(self, indexer, df: DataFrame): + is_frame = self.ndim == 2 + + if isinstance(indexer, tuple): + + idx, cols = None, None + sindexers = [] + for i, ix in enumerate(indexer): + ax = self.obj.axes[i] + if is_sequence(ix) or isinstance(ix, slice): + if isinstance(ix, np.ndarray): + ix = ix.ravel() + if idx is None: + idx = ax[ix] + elif cols is None: + cols = ax[ix] + else: + break + else: + sindexers.append(i) + + if idx is not None and cols is not None: + + if df.index.equals(idx) and df.columns.equals(cols): + val = df.copy()._values + else: + val = df.reindex(idx, columns=cols)._values + return val + + elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame: + ax = self.obj.index[indexer] + if df.index.equals(ax): + val = df.copy()._values + else: + + # we have a multi-index and are trying to align + # with a particular, level GH3738 + if ( + isinstance(ax, MultiIndex) + and isinstance(df.index, MultiIndex) + and ax.nlevels != df.index.nlevels + ): + raise TypeError( + "cannot align on a multi-index with out " + "specifying the join levels" + ) + + val = df.reindex(index=ax)._values + return val + + raise ValueError("Incompatible indexer with DataFrame") + + +class _ScalarAccessIndexer(NDFrameIndexerBase): + """ + Access scalars quickly. + """ + + # sub-classes need to set _takeable + _takeable: bool + + def _convert_key(self, key): + raise AbstractMethodError(self) + + def __getitem__(self, key): + if not isinstance(key, tuple): + + # we could have a convertible item here (e.g. Timestamp) + if not is_list_like_indexer(key): + key = (key,) + else: + raise ValueError("Invalid call for scalar access (getting)!") + + key = self._convert_key(key) + return self.obj._get_value(*key, takeable=self._takeable) + + def __setitem__(self, key, value) -> None: + if isinstance(key, tuple): + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + # scalar callable may return tuple + key = com.apply_if_callable(key, self.obj) + + if not isinstance(key, tuple): + key = _tuplify(self.ndim, key) + key = list(self._convert_key(key)) + if len(key) != self.ndim: + raise ValueError("Not enough indexers for scalar access (setting)!") + + self.obj._set_value(*key, value=value, takeable=self._takeable) + + +@doc(IndexingMixin.at) +class _AtIndexer(_ScalarAccessIndexer): + _takeable = False + + def _convert_key(self, key): + """ + Require they keys to be the same type as the index. (so we don't + fallback) + """ + # GH 26989 + # For series, unpacking key needs to result in the label. + # This is already the case for len(key) == 1; e.g. (1,) + if self.ndim == 1 and len(key) > 1: + key = (key,) + + return key + + @property + def _axes_are_unique(self) -> bool: + # Only relevant for self.ndim == 2 + assert self.ndim == 2 + return self.obj.index.is_unique and self.obj.columns.is_unique + + def __getitem__(self, key): + + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (getting)!") + return self.obj.loc[key] + + return super().__getitem__(key) + + def __setitem__(self, key, value): + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (setting)!") + + self.obj.loc[key] = value + return + + return super().__setitem__(key, value) + + +@doc(IndexingMixin.iat) +class _iAtIndexer(_ScalarAccessIndexer): + _takeable = True + + def _convert_key(self, key): + """ + Require integer args. (and convert to label arguments) + """ + for i in key: + if not is_integer(i): + raise ValueError("iAt based indexing can only have integer indexers") + return key + + +def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]: + """ + Given an indexer for the first dimension, create an equivalent tuple + for indexing over all dimensions. + + Parameters + ---------- + ndim : int + loc : object + + Returns + ------- + tuple + """ + _tup: list[Hashable | slice] + _tup = [slice(None, None) for _ in range(ndim)] + _tup[0] = loc + return tuple(_tup) + + +def _tupleize_axis_indexer(ndim: int, axis: int, key) -> tuple: + """ + If we have an axis, adapt the given key to be axis-independent. + """ + new_key = [slice(None)] * ndim + new_key[axis] = key + return tuple(new_key) + + +def convert_to_index_sliceable(obj: DataFrame, key): + """ + If we are index sliceable, then return my slicer, otherwise return None. + """ + idx = obj.index + if isinstance(key, slice): + return idx._convert_slice_indexer(key, kind="getitem") + + elif isinstance(key, str): + + # we are an actual column + if key in obj.columns: + return None + + # We might have a datetimelike string that we can translate to a + # slice here via partial string indexing + if idx._supports_partial_string_indexing: + try: + res = idx._get_string_slice(str(key)) + warnings.warn( + "Indexing a DataFrame with a datetimelike index using a single " + "string to slice the rows, like `frame[string]`, is deprecated " + "and will be removed in a future version. Use `frame.loc[string]` " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return res + except (KeyError, ValueError, NotImplementedError): + return None + + return None + + +def check_bool_indexer(index: Index, key) -> np.ndarray: + """ + Check if key is a valid boolean indexer for an object with such index and + perform reindexing or conversion if needed. + + This function assumes that is_bool_indexer(key) == True. + + Parameters + ---------- + index : Index + Index of the object on which the indexing is done. + key : list-like + Boolean indexer to check. + + Returns + ------- + np.array + Resulting key. + + Raises + ------ + IndexError + If the key does not have the same length as index. + IndexingError + If the index of the key is unalignable to index. + """ + result = key + if isinstance(key, ABCSeries) and not key.index.equals(index): + indexer = result.index.get_indexer_for(index) + if -1 in indexer: + raise IndexingError( + "Unalignable boolean Series provided as " + "indexer (index of the boolean Series and of " + "the indexed object do not match)." + ) + + result = result.take(indexer) + + # fall through for boolean + if not is_extension_array_dtype(result.dtype): + return result.astype(bool)._values + + if is_object_dtype(key): + # key might be object-dtype bool, check_array_indexer needs bool array + result = np.asarray(result, dtype=bool) + elif not is_array_like(result): + # GH 33924 + # key may contain nan elements, check_array_indexer needs bool array + result = pd_array(result, dtype=bool) + return check_array_indexer(index, result) + + +def convert_missing_indexer(indexer): + """ + Reverse convert a missing indexer, which is a dict + return the scalar indexer and a boolean indicating if we converted + """ + if isinstance(indexer, dict): + + # a missing key (but not a tuple indexer) + indexer = indexer["key"] + + if isinstance(indexer, bool): + raise KeyError("cannot use a single bool to index into setitem") + return indexer, True + + return indexer, False + + +def convert_from_missing_indexer_tuple(indexer, axes): + """ + Create a filtered indexer that doesn't have any missing indexers. + """ + + def get_indexer(_i, _idx): + return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx + + return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)) + + +def maybe_convert_ix(*args): + """ + We likely want to take the cross-product. + """ + for arg in args: + if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)): + return args + return np.ix_(*args) + + +def is_nested_tuple(tup, labels) -> bool: + """ + Returns + ------- + bool + """ + # check for a compatible nested tuple and multiindexes among the axes + if not isinstance(tup, tuple): + return False + + for k in tup: + if is_list_like(k) or isinstance(k, slice): + return isinstance(labels, MultiIndex) + + return False + + +def is_label_like(key) -> bool: + """ + Returns + ------- + bool + """ + # select a label or row + return ( + not isinstance(key, slice) + and not is_list_like_indexer(key) + and key is not Ellipsis + ) + + +def need_slice(obj: slice) -> bool: + """ + Returns + ------- + bool + """ + return ( + obj.start is not None + or obj.stop is not None + or (obj.step is not None and obj.step != 1) + ) + + +def check_deprecated_indexers(key) -> None: + """Checks if the key is a deprecated indexer.""" + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/interchange/__init__.py b/pandas/core/interchange/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/interchange/buffer.py b/pandas/core/interchange/buffer.py new file mode 100644 index 00000000..0f62dd00 --- /dev/null +++ b/pandas/core/interchange/buffer.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import numpy as np + +from pandas.core.interchange.dataframe_protocol import ( + Buffer, + DlpackDeviceType, +) +from pandas.util.version import Version + +_NUMPY_HAS_DLPACK = Version(np.__version__) >= Version("1.22.0") + + +class PandasBuffer(Buffer): + """ + Data in the buffer is guaranteed to be contiguous in memory. + """ + + def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None: + """ + Handle only regular columns (= numpy arrays) for now. + """ + if not x.strides == (x.dtype.itemsize,): + # The protocol does not support strided buffers, so a copy is + # necessary. If that's not allowed, we need to raise an exception. + if allow_copy: + x = x.copy() + else: + raise RuntimeError( + "Exports cannot be zero-copy in the case " + "of a non-contiguous buffer" + ) + + # Store the numpy array in which the data resides as a private + # attribute, so we can use it to retrieve the public attributes + self._x = x + + @property + def bufsize(self) -> int: + """ + Buffer size in bytes. + """ + return self._x.size * self._x.dtype.itemsize + + @property + def ptr(self) -> int: + """ + Pointer to start of the buffer as an integer. + """ + return self._x.__array_interface__["data"][0] + + def __dlpack__(self): + """ + Represent this structure as DLPack interface. + """ + if _NUMPY_HAS_DLPACK: + return self._x.__dlpack__() + raise NotImplementedError("__dlpack__") + + def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: + """ + Device type and device ID for where the data in the buffer resides. + """ + return (DlpackDeviceType.CPU, None) + + def __repr__(self) -> str: + return ( + "PandasBuffer(" + + str( + { + "bufsize": self.bufsize, + "ptr": self.ptr, + "device": self.__dlpack_device__()[0].name, + } + ) + + ")" + ) diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py new file mode 100644 index 00000000..359e2fa0 --- /dev/null +++ b/pandas/core/interchange/column.py @@ -0,0 +1,377 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np + +from pandas._libs.lib import infer_dtype +from pandas._libs.tslibs import iNaT +from pandas.util._decorators import cache_readonly + +import pandas as pd +from pandas.api.types import ( + is_categorical_dtype, + is_string_dtype, +) +from pandas.core.interchange.buffer import PandasBuffer +from pandas.core.interchange.dataframe_protocol import ( + Column, + ColumnBuffers, + ColumnNullType, + DtypeKind, +) +from pandas.core.interchange.utils import ( + ArrowCTypes, + Endianness, + NoBufferPresent, + dtype_to_arrow_c_fmt, +) + +_NP_KINDS = { + "i": DtypeKind.INT, + "u": DtypeKind.UINT, + "f": DtypeKind.FLOAT, + "b": DtypeKind.BOOL, + "U": DtypeKind.STRING, + "M": DtypeKind.DATETIME, + "m": DtypeKind.DATETIME, +} + +_NULL_DESCRIPTION = { + DtypeKind.FLOAT: (ColumnNullType.USE_NAN, None), + DtypeKind.DATETIME: (ColumnNullType.USE_SENTINEL, iNaT), + DtypeKind.INT: (ColumnNullType.NON_NULLABLE, None), + DtypeKind.UINT: (ColumnNullType.NON_NULLABLE, None), + DtypeKind.BOOL: (ColumnNullType.NON_NULLABLE, None), + # Null values for categoricals are stored as `-1` sentinel values + # in the category date (e.g., `col.values.codes` is int8 np.ndarray) + DtypeKind.CATEGORICAL: (ColumnNullType.USE_SENTINEL, -1), + # follow Arrow in using 1 as valid value and 0 for missing/null value + DtypeKind.STRING: (ColumnNullType.USE_BYTEMASK, 0), +} + +_NO_VALIDITY_BUFFER = { + ColumnNullType.NON_NULLABLE: "This column is non-nullable", + ColumnNullType.USE_NAN: "This column uses NaN as null", + ColumnNullType.USE_SENTINEL: "This column uses a sentinel value", +} + + +class PandasColumn(Column): + """ + A column object, with only the methods and properties required by the + interchange protocol defined. + A column can contain one or more chunks. Each chunk can contain up to three + buffers - a data buffer, a mask buffer (depending on null representation), + and an offsets buffer (if variable-size binary; e.g., variable-length + strings). + Note: this Column object can only be produced by ``__dataframe__``, so + doesn't need its own version or ``__column__`` protocol. + """ + + def __init__(self, column: pd.Series, allow_copy: bool = True) -> None: + """ + Note: doesn't deal with extension arrays yet, just assume a regular + Series/ndarray for now. + """ + if not isinstance(column, pd.Series): + raise NotImplementedError(f"Columns of type {type(column)} not handled yet") + + # Store the column as a private attribute + self._col = column + self._allow_copy = allow_copy + + def size(self) -> int: + """ + Size of the column, in elements. + """ + return self._col.size + + @property + def offset(self) -> int: + """ + Offset of first element. Always zero. + """ + # TODO: chunks are implemented now, probably this should return something + return 0 + + @cache_readonly + def dtype(self) -> tuple[DtypeKind, int, str, str]: + dtype = self._col.dtype + + if is_categorical_dtype(dtype): + codes = self._col.values.codes + ( + _, + bitwidth, + c_arrow_dtype_f_str, + _, + ) = self._dtype_from_pandasdtype(codes.dtype) + return ( + DtypeKind.CATEGORICAL, + bitwidth, + c_arrow_dtype_f_str, + Endianness.NATIVE, + ) + elif is_string_dtype(dtype): + if infer_dtype(self._col) == "string": + return ( + DtypeKind.STRING, + 8, + dtype_to_arrow_c_fmt(dtype), + Endianness.NATIVE, + ) + raise NotImplementedError("Non-string object dtypes are not supported yet") + else: + return self._dtype_from_pandasdtype(dtype) + + def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]: + """ + See `self.dtype` for details. + """ + # Note: 'c' (complex) not handled yet (not in array spec v1). + # 'b', 'B' (bytes), 'S', 'a', (old-style string) 'V' (void) not handled + # datetime and timedelta both map to datetime (is timedelta handled?) + + kind = _NP_KINDS.get(dtype.kind, None) + if kind is None: + # Not a NumPy dtype. Check if it's a categorical maybe + raise ValueError(f"Data type {dtype} not supported by interchange protocol") + + return kind, dtype.itemsize * 8, dtype_to_arrow_c_fmt(dtype), dtype.byteorder + + @property + def describe_categorical(self): + """ + If the dtype is categorical, there are two options: + - There are only values in the data buffer. + - There is a separate non-categorical Column encoding for categorical values. + + Raises TypeError if the dtype is not categorical + + Content of returned dict: + - "is_ordered" : bool, whether the ordering of dictionary indices is + semantically meaningful. + - "is_dictionary" : bool, whether a dictionary-style mapping of + categorical values to other objects exists + - "categories" : Column representing the (implicit) mapping of indices to + category values (e.g. an array of cat1, cat2, ...). + None if not a dictionary-style categorical. + """ + if not self.dtype[0] == DtypeKind.CATEGORICAL: + raise TypeError( + "describe_categorical only works on a column with categorical dtype!" + ) + + return { + "is_ordered": self._col.cat.ordered, + "is_dictionary": True, + "categories": PandasColumn(pd.Series(self._col.cat.categories)), + } + + @property + def describe_null(self): + kind = self.dtype[0] + try: + null, value = _NULL_DESCRIPTION[kind] + except KeyError: + raise NotImplementedError(f"Data type {kind} not yet supported") + + return null, value + + @cache_readonly + def null_count(self) -> int: + """ + Number of null elements. Should always be known. + """ + return self._col.isna().sum().item() + + @property + def metadata(self) -> dict[str, pd.Index]: + """ + Store specific metadata of the column. + """ + return {"pandas.index": self._col.index} + + def num_chunks(self) -> int: + """ + Return the number of chunks the column consists of. + """ + return 1 + + def get_chunks(self, n_chunks: int | None = None): + """ + Return an iterator yielding the chunks. + See `DataFrame.get_chunks` for details on ``n_chunks``. + """ + if n_chunks and n_chunks > 1: + size = len(self._col) + step = size // n_chunks + if size % n_chunks != 0: + step += 1 + for start in range(0, step * n_chunks, step): + yield PandasColumn( + self._col.iloc[start : start + step], self._allow_copy + ) + else: + yield self + + def get_buffers(self) -> ColumnBuffers: + """ + Return a dictionary containing the underlying buffers. + The returned dictionary has the following contents: + - "data": a two-element tuple whose first element is a buffer + containing the data and whose second element is the data + buffer's associated dtype. + - "validity": a two-element tuple whose first element is a buffer + containing mask values indicating missing data and + whose second element is the mask value buffer's + associated dtype. None if the null representation is + not a bit or byte mask. + - "offsets": a two-element tuple whose first element is a buffer + containing the offset values for variable-size binary + data (e.g., variable-length strings) and whose second + element is the offsets buffer's associated dtype. None + if the data buffer does not have an associated offsets + buffer. + """ + buffers: ColumnBuffers = { + "data": self._get_data_buffer(), + "validity": None, + "offsets": None, + } + + try: + buffers["validity"] = self._get_validity_buffer() + except NoBufferPresent: + pass + + try: + buffers["offsets"] = self._get_offsets_buffer() + except NoBufferPresent: + pass + + return buffers + + def _get_data_buffer( + self, + ) -> tuple[PandasBuffer, Any]: # Any is for self.dtype tuple + """ + Return the buffer containing the data and the buffer's associated dtype. + """ + if self.dtype[0] in ( + DtypeKind.INT, + DtypeKind.UINT, + DtypeKind.FLOAT, + DtypeKind.BOOL, + DtypeKind.DATETIME, + ): + buffer = PandasBuffer(self._col.to_numpy(), allow_copy=self._allow_copy) + dtype = self.dtype + elif self.dtype[0] == DtypeKind.CATEGORICAL: + codes = self._col.values._codes + buffer = PandasBuffer(codes, allow_copy=self._allow_copy) + dtype = self._dtype_from_pandasdtype(codes.dtype) + elif self.dtype[0] == DtypeKind.STRING: + # Marshal the strings from a NumPy object array into a byte array + buf = self._col.to_numpy() + b = bytearray() + + # TODO: this for-loop is slow; can be implemented in Cython/C/C++ later + for obj in buf: + if isinstance(obj, str): + b.extend(obj.encode(encoding="utf-8")) + + # Convert the byte array to a Pandas "buffer" using + # a NumPy array as the backing store + buffer = PandasBuffer(np.frombuffer(b, dtype="uint8")) + + # Define the dtype for the returned buffer + dtype = ( + DtypeKind.STRING, + 8, + ArrowCTypes.STRING, + Endianness.NATIVE, + ) # note: currently only support native endianness + else: + raise NotImplementedError(f"Data type {self._col.dtype} not handled yet") + + return buffer, dtype + + def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]: + """ + Return the buffer containing the mask values indicating missing data and + the buffer's associated dtype. + Raises NoBufferPresent if null representation is not a bit or byte mask. + """ + null, invalid = self.describe_null + + if self.dtype[0] == DtypeKind.STRING: + # For now, use byte array as the mask. + # TODO: maybe store as bit array to save space?.. + buf = self._col.to_numpy() + + # Determine the encoding for valid values + valid = invalid == 0 + invalid = not valid + + mask = np.zeros(shape=(len(buf),), dtype=np.bool_) + for i, obj in enumerate(buf): + mask[i] = valid if isinstance(obj, str) else invalid + + # Convert the mask array to a Pandas "buffer" using + # a NumPy array as the backing store + buffer = PandasBuffer(mask) + + # Define the dtype of the returned buffer + dtype = (DtypeKind.BOOL, 8, ArrowCTypes.BOOL, Endianness.NATIVE) + + return buffer, dtype + + try: + msg = _NO_VALIDITY_BUFFER[null] + " so does not have a separate mask" + except KeyError: + # TODO: implement for other bit/byte masks? + raise NotImplementedError("See self.describe_null") + + raise NoBufferPresent(msg) + + def _get_offsets_buffer(self) -> tuple[PandasBuffer, Any]: + """ + Return the buffer containing the offset values for variable-size binary + data (e.g., variable-length strings) and the buffer's associated dtype. + Raises NoBufferPresent if the data buffer does not have an associated + offsets buffer. + """ + if self.dtype[0] == DtypeKind.STRING: + # For each string, we need to manually determine the next offset + values = self._col.to_numpy() + ptr = 0 + offsets = np.zeros(shape=(len(values) + 1,), dtype=np.int64) + for i, v in enumerate(values): + # For missing values (in this case, `np.nan` values) + # we don't increment the pointer + if isinstance(v, str): + b = v.encode(encoding="utf-8") + ptr += len(b) + + offsets[i + 1] = ptr + + # Convert the offsets to a Pandas "buffer" using + # the NumPy array as the backing store + buffer = PandasBuffer(offsets) + + # Assemble the buffer dtype info + dtype = ( + DtypeKind.INT, + 64, + ArrowCTypes.INT64, + Endianness.NATIVE, + ) # note: currently only support native endianness + else: + raise NoBufferPresent( + "This column has a fixed-length dtype so " + "it does not have an offsets buffer" + ) + + return buffer, dtype diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py new file mode 100644 index 00000000..9139cb41 --- /dev/null +++ b/pandas/core/interchange/dataframe.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from collections import abc +from typing import TYPE_CHECKING + +from pandas.core.interchange.column import PandasColumn +from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg + +if TYPE_CHECKING: + import pandas as pd + from pandas import Index + + +class PandasDataFrameXchg(DataFrameXchg): + """ + A data frame class, with only the methods required by the interchange + protocol defined. + Instances of this (private) class are returned from + ``pd.DataFrame.__dataframe__`` as objects with the methods and + attributes defined on this class. + """ + + def __init__( + self, df: pd.DataFrame, nan_as_null: bool = False, allow_copy: bool = True + ) -> None: + """ + Constructor - an instance of this (private) class is returned from + `pd.DataFrame.__dataframe__`. + """ + self._df = df + # ``nan_as_null`` is a keyword intended for the consumer to tell the + # producer to overwrite null values in the data with ``NaN`` (or ``NaT``). + # This currently has no effect; once support for nullable extension + # dtypes is added, this value should be propagated to columns. + self._nan_as_null = nan_as_null + self._allow_copy = allow_copy + + def __dataframe__( + self, nan_as_null: bool = False, allow_copy: bool = True + ) -> PandasDataFrameXchg: + return PandasDataFrameXchg(self._df, nan_as_null, allow_copy) + + @property + def metadata(self) -> dict[str, Index]: + # `index` isn't a regular column, and the protocol doesn't support row + # labels - so we export it as Pandas-specific metadata here. + return {"pandas.index": self._df.index} + + def num_columns(self) -> int: + return len(self._df.columns) + + def num_rows(self) -> int: + return len(self._df) + + def num_chunks(self) -> int: + return 1 + + def column_names(self) -> Index: + return self._df.columns + + def get_column(self, i: int) -> PandasColumn: + return PandasColumn(self._df.iloc[:, i], allow_copy=self._allow_copy) + + def get_column_by_name(self, name: str) -> PandasColumn: + return PandasColumn(self._df[name], allow_copy=self._allow_copy) + + def get_columns(self) -> list[PandasColumn]: + return [ + PandasColumn(self._df[name], allow_copy=self._allow_copy) + for name in self._df.columns + ] + + def select_columns(self, indices) -> PandasDataFrameXchg: + if not isinstance(indices, abc.Sequence): + raise ValueError("`indices` is not a sequence") + if not isinstance(indices, list): + indices = list(indices) + + return PandasDataFrameXchg( + self._df.iloc[:, indices], self._nan_as_null, self._allow_copy + ) + + def select_columns_by_name(self, names) -> PandasDataFrameXchg: + if not isinstance(names, abc.Sequence): + raise ValueError("`names` is not a sequence") + if not isinstance(names, list): + names = list(names) + + return PandasDataFrameXchg( + self._df.loc[:, names], self._nan_as_null, self._allow_copy + ) + + def get_chunks(self, n_chunks=None): + """ + Return an iterator yielding the chunks. + """ + if n_chunks and n_chunks > 1: + size = len(self._df) + step = size // n_chunks + if size % n_chunks != 0: + step += 1 + for start in range(0, step * n_chunks, step): + yield PandasDataFrameXchg( + self._df.iloc[start : start + step, :], + self._nan_as_null, + self._allow_copy, + ) + else: + yield self diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py new file mode 100644 index 00000000..2cfdee55 --- /dev/null +++ b/pandas/core/interchange/dataframe_protocol.py @@ -0,0 +1,485 @@ +""" +A verbatim copy (vendored) of the spec from https://github.com/data-apis/dataframe-api +""" + +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +import enum +from typing import ( + Any, + Iterable, + Sequence, + TypedDict, +) + + +class DlpackDeviceType(enum.IntEnum): + """Integer enum for device type codes matching DLPack.""" + + CPU = 1 + CUDA = 2 + CPU_PINNED = 3 + OPENCL = 4 + VULKAN = 7 + METAL = 8 + VPI = 9 + ROCM = 10 + + +class DtypeKind(enum.IntEnum): + """ + Integer enum for data types. + + Attributes + ---------- + INT : int + Matches to signed integer data type. + UINT : int + Matches to unsigned integer data type. + FLOAT : int + Matches to floating point data type. + BOOL : int + Matches to boolean data type. + STRING : int + Matches to string data type (UTF-8 encoded). + DATETIME : int + Matches to datetime data type. + CATEGORICAL : int + Matches to categorical data type. + """ + + INT = 0 + UINT = 1 + FLOAT = 2 + BOOL = 20 + STRING = 21 # UTF-8 + DATETIME = 22 + CATEGORICAL = 23 + + +class ColumnNullType(enum.IntEnum): + """ + Integer enum for null type representation. + + Attributes + ---------- + NON_NULLABLE : int + Non-nullable column. + USE_NAN : int + Use explicit float NaN value. + USE_SENTINEL : int + Sentinel value besides NaN/NaT. + USE_BITMASK : int + The bit is set/unset representing a null on a certain position. + USE_BYTEMASK : int + The byte is set/unset representing a null on a certain position. + """ + + NON_NULLABLE = 0 + USE_NAN = 1 + USE_SENTINEL = 2 + USE_BITMASK = 3 + USE_BYTEMASK = 4 + + +class ColumnBuffers(TypedDict): + # first element is a buffer containing the column data; + # second element is the data buffer's associated dtype + data: tuple[Buffer, Any] + + # first element is a buffer containing mask values indicating missing data; + # second element is the mask value buffer's associated dtype. + # None if the null representation is not a bit or byte mask + validity: tuple[Buffer, Any] | None + + # first element is a buffer containing the offset values for + # variable-size binary data (e.g., variable-length strings); + # second element is the offsets buffer's associated dtype. + # None if the data buffer does not have an associated offsets buffer + offsets: tuple[Buffer, Any] | None + + +class CategoricalDescription(TypedDict): + # whether the ordering of dictionary indices is semantically meaningful + is_ordered: bool + # whether a dictionary-style mapping of categorical values to other objects exists + is_dictionary: bool + # Python-level only (e.g. ``{int: str}``). + # None if not a dictionary-style categorical. + categories: Column | None + + +class Buffer(ABC): + """ + Data in the buffer is guaranteed to be contiguous in memory. + + Note that there is no dtype attribute present, a buffer can be thought of + as simply a block of memory. However, if the column that the buffer is + attached to has a dtype that's supported by DLPack and ``__dlpack__`` is + implemented, then that dtype information will be contained in the return + value from ``__dlpack__``. + + This distinction is useful to support both data exchange via DLPack on a + buffer and (b) dtypes like variable-length strings which do not have a + fixed number of bytes per element. + """ + + @property + @abstractmethod + def bufsize(self) -> int: + """ + Buffer size in bytes. + """ + pass + + @property + @abstractmethod + def ptr(self) -> int: + """ + Pointer to start of the buffer as an integer. + """ + pass + + @abstractmethod + def __dlpack__(self): + """ + Produce DLPack capsule (see array API standard). + + Raises: + + - TypeError : if the buffer contains unsupported dtypes. + - NotImplementedError : if DLPack support is not implemented + + Useful to have to connect to array libraries. Support optional because + it's not completely trivial to implement for a Python-only library. + """ + raise NotImplementedError("__dlpack__") + + @abstractmethod + def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: + """ + Device type and device ID for where the data in the buffer resides. + Uses device type codes matching DLPack. + Note: must be implemented even if ``__dlpack__`` is not. + """ + pass + + +class Column(ABC): + """ + A column object, with only the methods and properties required by the + interchange protocol defined. + + A column can contain one or more chunks. Each chunk can contain up to three + buffers - a data buffer, a mask buffer (depending on null representation), + and an offsets buffer (if variable-size binary; e.g., variable-length + strings). + + TBD: Arrow has a separate "null" dtype, and has no separate mask concept. + Instead, it seems to use "children" for both columns with a bit mask, + and for nested dtypes. Unclear whether this is elegant or confusing. + This design requires checking the null representation explicitly. + + The Arrow design requires checking: + 1. the ARROW_FLAG_NULLABLE (for sentinel values) + 2. if a column has two children, combined with one of those children + having a null dtype. + + Making the mask concept explicit seems useful. One null dtype would + not be enough to cover both bit and byte masks, so that would mean + even more checking if we did it the Arrow way. + + TBD: there's also the "chunk" concept here, which is implicit in Arrow as + multiple buffers per array (= column here). Semantically it may make + sense to have both: chunks were meant for example for lazy evaluation + of data which doesn't fit in memory, while multiple buffers per column + could also come from doing a selection operation on a single + contiguous buffer. + + Given these concepts, one would expect chunks to be all of the same + size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows), + while multiple buffers could have data-dependent lengths. Not an issue + in pandas if one column is backed by a single NumPy array, but in + Arrow it seems possible. + Are multiple chunks *and* multiple buffers per column necessary for + the purposes of this interchange protocol, or must producers either + reuse the chunk concept for this or copy the data? + + Note: this Column object can only be produced by ``__dataframe__``, so + doesn't need its own version or ``__column__`` protocol. + """ + + @abstractmethod + def size(self) -> int: + """ + Size of the column, in elements. + + Corresponds to DataFrame.num_rows() if column is a single chunk; + equal to size of this current chunk otherwise. + """ + pass + + @property + @abstractmethod + def offset(self) -> int: + """ + Offset of first element. + + May be > 0 if using chunks; for example for a column with N chunks of + equal size M (only the last chunk may be shorter), + ``offset = n * M``, ``n = 0 .. N-1``. + """ + pass + + @property + @abstractmethod + def dtype(self) -> tuple[DtypeKind, int, str, str]: + """ + Dtype description as a tuple ``(kind, bit-width, format string, endianness)``. + + Bit-width : the number of bits as an integer + Format string : data type description format string in Apache Arrow C + Data Interface format. + Endianness : current only native endianness (``=``) is supported + + Notes: + - Kind specifiers are aligned with DLPack where possible (hence the + jump to 20, leave enough room for future extension) + - Masks must be specified as boolean with either bit width 1 (for bit + masks) or 8 (for byte masks). + - Dtype width in bits was preferred over bytes + - Endianness isn't too useful, but included now in case in the future + we need to support non-native endianness + - Went with Apache Arrow format strings over NumPy format strings + because they're more complete from a dataframe perspective + - Format strings are mostly useful for datetime specification, and + for categoricals. + - For categoricals, the format string describes the type of the + categorical in the data buffer. In case of a separate encoding of + the categorical (e.g. an integer to string mapping), this can + be derived from ``self.describe_categorical``. + - Data types not included: complex, Arrow-style null, binary, decimal, + and nested (list, struct, map, union) dtypes. + """ + pass + + @property + @abstractmethod + def describe_categorical(self) -> CategoricalDescription: + """ + If the dtype is categorical, there are two options: + - There are only values in the data buffer. + - There is a separate non-categorical Column encoding for categorical values. + + Raises TypeError if the dtype is not categorical + + Returns the dictionary with description on how to interpret the data buffer: + - "is_ordered" : bool, whether the ordering of dictionary indices is + semantically meaningful. + - "is_dictionary" : bool, whether a mapping of + categorical values to other objects exists + - "categories" : Column representing the (implicit) mapping of indices to + category values (e.g. an array of cat1, cat2, ...). + None if not a dictionary-style categorical. + + TBD: are there any other in-memory representations that are needed? + """ + pass + + @property + @abstractmethod + def describe_null(self) -> tuple[ColumnNullType, Any]: + """ + Return the missing value (or "null") representation the column dtype + uses, as a tuple ``(kind, value)``. + + Value : if kind is "sentinel value", the actual value. If kind is a bit + mask or a byte mask, the value (0 or 1) indicating a missing value. None + otherwise. + """ + pass + + @property + @abstractmethod + def null_count(self) -> int | None: + """ + Number of null elements, if known. + + Note: Arrow uses -1 to indicate "unknown", but None seems cleaner. + """ + pass + + @property + @abstractmethod + def metadata(self) -> dict[str, Any]: + """ + The metadata for the column. See `DataFrame.metadata` for more details. + """ + pass + + @abstractmethod + def num_chunks(self) -> int: + """ + Return the number of chunks the column consists of. + """ + pass + + @abstractmethod + def get_chunks(self, n_chunks: int | None = None) -> Iterable[Column]: + """ + Return an iterator yielding the chunks. + + See `DataFrame.get_chunks` for details on ``n_chunks``. + """ + pass + + @abstractmethod + def get_buffers(self) -> ColumnBuffers: + """ + Return a dictionary containing the underlying buffers. + + The returned dictionary has the following contents: + + - "data": a two-element tuple whose first element is a buffer + containing the data and whose second element is the data + buffer's associated dtype. + - "validity": a two-element tuple whose first element is a buffer + containing mask values indicating missing data and + whose second element is the mask value buffer's + associated dtype. None if the null representation is + not a bit or byte mask. + - "offsets": a two-element tuple whose first element is a buffer + containing the offset values for variable-size binary + data (e.g., variable-length strings) and whose second + element is the offsets buffer's associated dtype. None + if the data buffer does not have an associated offsets + buffer. + """ + pass + + +# def get_children(self) -> Iterable[Column]: +# """ +# Children columns underneath the column, each object in this iterator +# must adhere to the column specification. +# """ +# pass + + +class DataFrame(ABC): + """ + A data frame class, with only the methods required by the interchange + protocol defined. + + A "data frame" represents an ordered collection of named columns. + A column's "name" must be a unique string. + Columns may be accessed by name or by position. + + This could be a public data frame class, or an object with the methods and + attributes defined on this DataFrame class could be returned from the + ``__dataframe__`` method of a public data frame class in a library adhering + to the dataframe interchange protocol specification. + """ + + version = 0 # version of the protocol + + @abstractmethod + def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): + """Construct a new interchange object, potentially changing the parameters.""" + pass + + @property + @abstractmethod + def metadata(self) -> dict[str, Any]: + """ + The metadata for the data frame, as a dictionary with string keys. The + contents of `metadata` may be anything, they are meant for a library + to store information that it needs to, e.g., roundtrip losslessly or + for two implementations to share data that is not (yet) part of the + interchange protocol specification. For avoiding collisions with other + entries, please add name the keys with the name of the library + followed by a period and the desired name, e.g, ``pandas.indexcol``. + """ + pass + + @abstractmethod + def num_columns(self) -> int: + """ + Return the number of columns in the DataFrame. + """ + pass + + @abstractmethod + def num_rows(self) -> int | None: + # TODO: not happy with Optional, but need to flag it may be expensive + # why include it if it may be None - what do we expect consumers + # to do here? + """ + Return the number of rows in the DataFrame, if available. + """ + pass + + @abstractmethod + def num_chunks(self) -> int: + """ + Return the number of chunks the DataFrame consists of. + """ + pass + + @abstractmethod + def column_names(self) -> Iterable[str]: + """ + Return an iterator yielding the column names. + """ + pass + + @abstractmethod + def get_column(self, i: int) -> Column: + """ + Return the column at the indicated position. + """ + pass + + @abstractmethod + def get_column_by_name(self, name: str) -> Column: + """ + Return the column whose name is the indicated name. + """ + pass + + @abstractmethod + def get_columns(self) -> Iterable[Column]: + """ + Return an iterator yielding the columns. + """ + pass + + @abstractmethod + def select_columns(self, indices: Sequence[int]) -> DataFrame: + """ + Create a new DataFrame by selecting a subset of columns by index. + """ + pass + + @abstractmethod + def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: + """ + Create a new DataFrame by selecting a subset of columns by name. + """ + pass + + @abstractmethod + def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: + """ + Return an iterator yielding the chunks. + + By default (None), yields the chunks that the data is stored as by the + producer. If given, ``n_chunks`` must be a multiple of + ``self.num_chunks()``, meaning the producer must subdivide each chunk + before yielding it. + """ + pass diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py new file mode 100644 index 00000000..bec66e41 --- /dev/null +++ b/pandas/core/interchange/from_dataframe.py @@ -0,0 +1,524 @@ +from __future__ import annotations + +import ctypes +import re +from typing import Any + +import numpy as np + +import pandas as pd +from pandas.core.interchange.column import PandasColumn +from pandas.core.interchange.dataframe_protocol import ( + Buffer, + Column, + ColumnNullType, + DataFrame as DataFrameXchg, + DtypeKind, +) +from pandas.core.interchange.utils import ( + ArrowCTypes, + Endianness, +) + +_NP_DTYPES: dict[DtypeKind, dict[int, Any]] = { + DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}, + DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}, + DtypeKind.FLOAT: {32: np.float32, 64: np.float64}, + DtypeKind.BOOL: {8: bool}, +} + + +def from_dataframe(df, allow_copy=True) -> pd.DataFrame: + """ + Build a ``pd.DataFrame`` from any DataFrame supporting the interchange protocol. + + Parameters + ---------- + df : DataFrameXchg + Object supporting the interchange protocol, i.e. `__dataframe__` method. + allow_copy : bool, default: True + Whether to allow copying the memory to perform the conversion + (if false then zero-copy approach is requested). + + Returns + ------- + pd.DataFrame + """ + if isinstance(df, pd.DataFrame): + return df + + if not hasattr(df, "__dataframe__"): + raise ValueError("`df` does not support __dataframe__") + + return _from_dataframe(df.__dataframe__(allow_copy=allow_copy)) + + +def _from_dataframe(df: DataFrameXchg, allow_copy=True): + """ + Build a ``pd.DataFrame`` from the DataFrame interchange object. + + Parameters + ---------- + df : DataFrameXchg + Object supporting the interchange protocol, i.e. `__dataframe__` method. + allow_copy : bool, default: True + Whether to allow copying the memory to perform the conversion + (if false then zero-copy approach is requested). + + Returns + ------- + pd.DataFrame + """ + pandas_dfs = [] + for chunk in df.get_chunks(): + pandas_df = protocol_df_chunk_to_pandas(chunk) + pandas_dfs.append(pandas_df) + + if not allow_copy and len(pandas_dfs) > 1: + raise RuntimeError( + "To join chunks a copy is required which is forbidden by allow_copy=False" + ) + if len(pandas_dfs) == 1: + pandas_df = pandas_dfs[0] + else: + pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) + + index_obj = df.metadata.get("pandas.index", None) + if index_obj is not None: + pandas_df.index = index_obj + + return pandas_df + + +def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame: + """ + Convert interchange protocol chunk to ``pd.DataFrame``. + + Parameters + ---------- + df : DataFrameXchg + + Returns + ------- + pd.DataFrame + """ + # We need a dict of columns here, with each column being a NumPy array (at + # least for now, deal with non-NumPy dtypes later). + columns: dict[str, Any] = {} + buffers = [] # hold on to buffers, keeps memory alive + for name in df.column_names(): + if not isinstance(name, str): + raise ValueError(f"Column {name} is not a string") + if name in columns: + raise ValueError(f"Column {name} is not unique") + col = df.get_column_by_name(name) + dtype = col.dtype[0] + if dtype in ( + DtypeKind.INT, + DtypeKind.UINT, + DtypeKind.FLOAT, + DtypeKind.BOOL, + ): + columns[name], buf = primitive_column_to_ndarray(col) + elif dtype == DtypeKind.CATEGORICAL: + columns[name], buf = categorical_column_to_series(col) + elif dtype == DtypeKind.STRING: + columns[name], buf = string_column_to_ndarray(col) + elif dtype == DtypeKind.DATETIME: + columns[name], buf = datetime_column_to_ndarray(col) + else: + raise NotImplementedError(f"Data type {dtype} not handled yet") + + buffers.append(buf) + + pandas_df = pd.DataFrame(columns) + pandas_df.attrs["_INTERCHANGE_PROTOCOL_BUFFERS"] = buffers + return pandas_df + + +def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: + """ + Convert a column holding one of the primitive dtypes to a NumPy array. + + A primitive type is one of: int, uint, float, bool. + + Parameters + ---------- + col : Column + + Returns + ------- + tuple + Tuple of np.ndarray holding the data and the memory owner object + that keeps the memory alive. + """ + buffers = col.get_buffers() + + data_buff, data_dtype = buffers["data"] + data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size()) + + data = set_nulls(data, col, buffers["validity"]) + return data, buffers + + +def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]: + """ + Convert a column holding categorical data to a pandas Series. + + Parameters + ---------- + col : Column + + Returns + ------- + tuple + Tuple of pd.Series holding the data and the memory owner object + that keeps the memory alive. + """ + categorical = col.describe_categorical + + if not categorical["is_dictionary"]: + raise NotImplementedError("Non-dictionary categoricals not supported yet") + + cat_column = categorical["categories"] + # for mypy/pyright + assert isinstance(cat_column, PandasColumn), "categories must be a PandasColumn" + categories = np.array(cat_column._col) + buffers = col.get_buffers() + + codes_buff, codes_dtype = buffers["data"] + codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size()) + + # Doing module in order to not get ``IndexError`` for + # out-of-bounds sentinel values in `codes` + values = categories[codes % len(categories)] + + cat = pd.Categorical( + values, categories=categories, ordered=categorical["is_ordered"] + ) + data = pd.Series(cat) + + data = set_nulls(data, col, buffers["validity"]) + return data, buffers + + +def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: + """ + Convert a column holding string data to a NumPy array. + + Parameters + ---------- + col : Column + + Returns + ------- + tuple + Tuple of np.ndarray holding the data and the memory owner object + that keeps the memory alive. + """ + null_kind, sentinel_val = col.describe_null + + if null_kind not in ( + ColumnNullType.NON_NULLABLE, + ColumnNullType.USE_BITMASK, + ColumnNullType.USE_BYTEMASK, + ): + raise NotImplementedError( + f"{null_kind} null kind is not yet supported for string columns." + ) + + buffers = col.get_buffers() + + assert buffers["offsets"], "String buffers must contain offsets" + # Retrieve the data buffer containing the UTF-8 code units + data_buff, protocol_data_dtype = buffers["data"] + # We're going to reinterpret the buffer as uint8, so make sure we can do it safely + assert protocol_data_dtype[1] == 8 # bitwidth == 8 + assert protocol_data_dtype[2] == ArrowCTypes.STRING # format_str == utf-8 + # Convert the buffers to NumPy arrays. In order to go from STRING to + # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array) + data_dtype = ( + DtypeKind.UINT, + 8, + ArrowCTypes.UINT8, + Endianness.NATIVE, + ) + # Specify zero offset as we don't want to chunk the string data + data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size()) + + # Retrieve the offsets buffer containing the index offsets demarcating + # the beginning and the ending of each string + offset_buff, offset_dtype = buffers["offsets"] + # Offsets buffer contains start-stop positions of strings in the data buffer, + # meaning that it has more elements than in the data buffer, do `col.size() + 1` + # here to pass a proper offsets buffer size + offsets = buffer_to_ndarray( + offset_buff, offset_dtype, col.offset, length=col.size() + 1 + ) + + null_pos = None + if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): + assert buffers["validity"], "Validity buffers cannot be empty for masks" + valid_buff, valid_dtype = buffers["validity"] + null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size()) + if sentinel_val == 0: + null_pos = ~null_pos + + # Assemble the strings from the code units + str_list: list[None | float | str] = [None] * col.size() + for i in range(col.size()): + # Check for missing values + if null_pos is not None and null_pos[i]: + str_list[i] = np.nan + continue + + # Extract a range of code units + units = data[offsets[i] : offsets[i + 1]] + + # Convert the list of code units to bytes + str_bytes = bytes(units) + + # Create the string + string = str_bytes.decode(encoding="utf-8") + + # Add to our list of strings + str_list[i] = string + + # Convert the string list to a NumPy array + return np.asarray(str_list, dtype="object"), buffers + + +def parse_datetime_format_str(format_str, data): + """Parse datetime `format_str` to interpret the `data`.""" + # timestamp 'ts{unit}:tz' + timestamp_meta = re.match(r"ts([smun]):(.*)", format_str) + if timestamp_meta: + unit, tz = timestamp_meta.group(1), timestamp_meta.group(2) + if tz != "": + raise NotImplementedError("Timezones are not supported yet") + if unit != "s": + # the format string describes only a first letter of the unit, so + # add one extra letter to convert the unit to numpy-style: + # 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns' + unit += "s" + data = data.astype(f"datetime64[{unit}]") + return data + + # date 'td{Days/Ms}' + date_meta = re.match(r"td([Dm])", format_str) + if date_meta: + unit = date_meta.group(1) + if unit == "D": + # NumPy doesn't support DAY unit, so converting days to seconds + # (converting to uint64 to avoid overflow) + data = (data.astype(np.uint64) * (24 * 60 * 60)).astype("datetime64[s]") + elif unit == "m": + data = data.astype("datetime64[ms]") + else: + raise NotImplementedError(f"Date unit is not supported: {unit}") + return data + + raise NotImplementedError(f"DateTime kind is not supported: {format_str}") + + +def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: + """ + Convert a column holding DateTime data to a NumPy array. + + Parameters + ---------- + col : Column + + Returns + ------- + tuple + Tuple of np.ndarray holding the data and the memory owner object + that keeps the memory alive. + """ + buffers = col.get_buffers() + + _, _, format_str, _ = col.dtype + dbuf, dtype = buffers["data"] + # Consider dtype being `uint` to get number of units passed since the 01.01.1970 + data = buffer_to_ndarray( + dbuf, + ( + DtypeKind.UINT, + dtype[1], + getattr(ArrowCTypes, f"UINT{dtype[1]}"), + Endianness.NATIVE, + ), + col.offset, + col.size(), + ) + + data = parse_datetime_format_str(format_str, data) + data = set_nulls(data, col, buffers["validity"]) + return data, buffers + + +def buffer_to_ndarray( + buffer: Buffer, + dtype: tuple[DtypeKind, int, str, str], + offset: int = 0, + length: int | None = None, +) -> np.ndarray: + """ + Build a NumPy array from the passed buffer. + + Parameters + ---------- + buffer : Buffer + Buffer to build a NumPy array from. + dtype : tuple + Data type of the buffer conforming protocol dtypes format. + offset : int, default: 0 + Number of elements to offset from the start of the buffer. + length : int, optional + If the buffer is a bit-mask, specifies a number of bits to read + from the buffer. Has no effect otherwise. + + Returns + ------- + np.ndarray + + Notes + ----- + The returned array doesn't own the memory. The caller of this function is + responsible for keeping the memory owner object alive as long as + the returned NumPy array is being used. + """ + kind, bit_width, _, _ = dtype + + column_dtype = _NP_DTYPES.get(kind, {}).get(bit_width, None) + if column_dtype is None: + raise NotImplementedError(f"Conversion for {dtype} is not yet supported.") + + # TODO: No DLPack yet, so need to construct a new ndarray from the data pointer + # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports + # it since https://github.com/numpy/numpy/pull/19083 + ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype) + data_pointer = ctypes.cast( + buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type) + ) + + if bit_width == 1: + assert length is not None, "`length` must be specified for a bit-mask buffer." + arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,)) + return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8) + else: + return np.ctypeslib.as_array( + data_pointer, shape=(buffer.bufsize // (bit_width // 8),) + ) + + +def bitmask_to_bool_ndarray( + bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0 +) -> np.ndarray: + """ + Convert bit-mask to a boolean NumPy array. + + Parameters + ---------- + bitmask : np.ndarray[uint8] + NumPy array of uint8 dtype representing the bitmask. + mask_length : int + Number of elements in the mask to interpret. + first_byte_offset : int, default: 0 + Number of elements to offset from the start of the first byte. + + Returns + ------- + np.ndarray[bool] + """ + bytes_to_skip = first_byte_offset // 8 + bitmask = bitmask[bytes_to_skip:] + first_byte_offset %= 8 + + bool_mask = np.zeros(mask_length, dtype=bool) + + # Processing the first byte separately as it has its own offset + val = bitmask[0] + mask_idx = 0 + bits_in_first_byte = min(8 - first_byte_offset, mask_length) + for j in range(bits_in_first_byte): + if val & (1 << (j + first_byte_offset)): + bool_mask[mask_idx] = True + mask_idx += 1 + + # `mask_length // 8` describes how many full bytes to process + for i in range((mask_length - bits_in_first_byte) // 8): + # doing `+ 1` as we already processed the first byte + val = bitmask[i + 1] + for j in range(8): + if val & (1 << j): + bool_mask[mask_idx] = True + mask_idx += 1 + + if len(bitmask) > 1: + # Processing reminder of last byte + val = bitmask[-1] + for j in range(len(bool_mask) - mask_idx): + if val & (1 << j): + bool_mask[mask_idx] = True + mask_idx += 1 + + return bool_mask + + +def set_nulls( + data: np.ndarray | pd.Series, + col: Column, + validity: tuple[Buffer, tuple[DtypeKind, int, str, str]] | None, + allow_modify_inplace: bool = True, +): + """ + Set null values for the data according to the column null kind. + + Parameters + ---------- + data : np.ndarray or pd.Series + Data to set nulls in. + col : Column + Column object that describes the `data`. + validity : tuple(Buffer, dtype) or None + The return value of ``col.buffers()``. We do not access the ``col.buffers()`` + here to not take the ownership of the memory of buffer objects. + allow_modify_inplace : bool, default: True + Whether to modify the `data` inplace when zero-copy is possible (True) or always + modify a copy of the `data` (False). + + Returns + ------- + np.ndarray or pd.Series + Data with the nulls being set. + """ + null_kind, sentinel_val = col.describe_null + null_pos = None + + if null_kind == ColumnNullType.USE_SENTINEL: + null_pos = pd.Series(data) == sentinel_val + elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): + assert validity, "Expected to have a validity buffer for the mask" + valid_buff, valid_dtype = validity + null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size()) + if sentinel_val == 0: + null_pos = ~null_pos + elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN): + pass + else: + raise NotImplementedError(f"Null kind {null_kind} is not yet supported.") + + if null_pos is not None and np.any(null_pos): + if not allow_modify_inplace: + data = data.copy() + try: + data[null_pos] = None + except TypeError: + # TypeError happens if the `data` dtype appears to be non-nullable + # in numpy notation (bool, int, uint). If this happens, + # cast the `data` to nullable float dtype. + data = data.astype(float) + data[null_pos] = None + + return data diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py new file mode 100644 index 00000000..1d56af94 --- /dev/null +++ b/pandas/core/interchange/utils.py @@ -0,0 +1,95 @@ +""" +Utility functions and objects for implementing the interchange API. +""" + +from __future__ import annotations + +import re +import typing + +import numpy as np + +from pandas._typing import DtypeObj + +import pandas as pd +from pandas.api.types import is_datetime64_dtype + + +class ArrowCTypes: + """ + Enum for Apache Arrow C type format strings. + + The Arrow C data interface: + https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings + """ + + NULL = "n" + BOOL = "b" + INT8 = "c" + UINT8 = "C" + INT16 = "s" + UINT16 = "S" + INT32 = "i" + UINT32 = "I" + INT64 = "l" + UINT64 = "L" + FLOAT16 = "e" + FLOAT32 = "f" + FLOAT64 = "g" + STRING = "u" # utf-8 + DATE32 = "tdD" + DATE64 = "tdm" + # Resoulution: + # - seconds -> 's' + # - milliseconds -> 'm' + # - microseconds -> 'u' + # - nanoseconds -> 'n' + TIMESTAMP = "ts{resolution}:{tz}" + TIME = "tt{resolution}" + + +class Endianness: + """Enum indicating the byte-order of a data-type.""" + + LITTLE = "<" + BIG = ">" + NATIVE = "=" + NA = "|" + + +def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: + """ + Represent pandas `dtype` as a format string in Apache Arrow C notation. + + Parameters + ---------- + dtype : np.dtype + Datatype of pandas DataFrame to represent. + + Returns + ------- + str + Format string in Apache Arrow C notation of the given `dtype`. + """ + if isinstance(dtype, pd.CategoricalDtype): + return ArrowCTypes.INT64 + elif dtype == np.dtype("O"): + return ArrowCTypes.STRING + + format_str = getattr(ArrowCTypes, dtype.name.upper(), None) + if format_str is not None: + return format_str + + if is_datetime64_dtype(dtype): + # Selecting the first char of resolution string: + # dtype.str -> ' Block: + """ + This is a pseudo-public analogue to blocks.new_block. + + We ask that downstream libraries use this rather than any fully-internal + APIs, including but not limited to: + + - core.internals.blocks.make_block + - Block.make_block + - Block.make_block_same_class + - Block.__init__ + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + values, dtype = extract_pandas_array(values, dtype, ndim) + + if klass is ExtensionBlock and is_period_dtype(values.dtype): + # GH-44681 changed PeriodArray to be stored in the 2D + # NDArrayBackedExtensionBlock instead of ExtensionBlock + # -> still allow ExtensionBlock to be passed in this case for back compat + klass = None + + if klass is None: + dtype = dtype or values.dtype + klass = get_block_type(dtype) + + elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): + # pyarrow calls get here + values = DatetimeArray._simple_new(values, dtype=dtype) + + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + ndim = maybe_infer_ndim(values, placement, ndim) + if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): + # GH#41168 ensure we can pass 1D dt64tz values + # More generally, any EA dtype that isn't is_1d_only_ea_dtype + values = extract_array(values, extract_numpy=True) + values = ensure_block_shape(values, ndim) + + check_ndim(values, placement, ndim) + values = maybe_coerce_values(values) + return klass(values, ndim=ndim, placement=placement) + + +def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int: + """ + If `ndim` is not provided, infer it from placment and values. + """ + if ndim is None: + # GH#38134 Block constructor now assumes ndim is not None + if not isinstance(values.dtype, np.dtype): + if len(placement) != 1: + ndim = 1 + else: + ndim = 2 + else: + ndim = values.ndim + return ndim diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py new file mode 100644 index 00000000..262ed06f --- /dev/null +++ b/pandas/core/internals/array_manager.py @@ -0,0 +1,1408 @@ +""" +Experimental manager based on storing a collection of 1D arrays +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Literal, + TypeVar, +) + +import numpy as np + +from pandas._libs import ( + NaT, + algos as libalgos, + lib, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, + npt, +) +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.astype import astype_array_safe +from pandas.core.dtypes.cast import ( + ensure_dtype_can_hold_na, + infer_dtype_from_scalar, + soft_convert_objects, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_datetime64_ns_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_integer, + is_numeric_dtype, + is_object_dtype, + is_timedelta64_ns_dtype, +) +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import is_inferred_bool_dtype +from pandas.core.dtypes.missing import ( + array_equals, + isna, + na_value_for_dtype, +) + +import pandas.core.algorithms as algos +from pandas.core.array_algos.quantile import quantile_compat +from pandas.core.array_algos.take import take_1d +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + PandasArray, + TimedeltaArray, +) +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + sanitize_array, +) +from pandas.core.indexers import ( + maybe_convert_indices, + validate_indices, +) +from pandas.core.indexes.api import ( + Index, + ensure_index, +) +from pandas.core.internals.base import ( + DataManager, + SingleDataManager, + interleaved_dtype, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + external_values, + extract_pandas_array, + maybe_coerce_values, + new_block, + to_native_types, +) + +if TYPE_CHECKING: + from pandas import Float64Index + + +T = TypeVar("T", bound="BaseArrayManager") + + +class BaseArrayManager(DataManager): + """ + Core internal data structure to implement DataFrame and Series. + + Alternative to the BlockManager, storing a list of 1D arrays instead of + Blocks. + + This is *not* a public API class + + Parameters + ---------- + arrays : Sequence of arrays + axes : Sequence of Index + verify_integrity : bool, default True + + """ + + __slots__ = [ + "_axes", # private attribute, because 'axes' has different order, see below + "arrays", + ] + + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ) -> None: + raise NotImplementedError + + def make_empty(self: T, axes=None) -> T: + """Return an empty ArrayManager with the items axis of len 0 (no columns)""" + if axes is None: + axes = [self.axes[1:], Index([])] + + arrays: list[np.ndarray | ExtensionArray] = [] + return type(self)(arrays, axes) + + @property + def items(self) -> Index: + return self._axes[-1] + + @property + # error: Signature of "axes" incompatible with supertype "DataManager" + def axes(self) -> list[Index]: # type: ignore[override] + # mypy doesn't work to override attribute with property + # see https://github.com/python/mypy/issues/4125 + """Axes is BlockManager-compatible order (columns, rows)""" + return [self._axes[1], self._axes[0]] + + @property + def shape_proper(self) -> tuple[int, ...]: + # this returns (n_rows, n_columns) + return tuple(len(ax) for ax in self._axes) + + @staticmethod + def _normalize_axis(axis: int) -> int: + # switch axis + axis = 1 if axis == 0 else 0 + return axis + + def set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + self._validate_set_axis(axis, new_labels) + axis = self._normalize_axis(axis) + self._axes[axis] = new_labels + + def get_dtypes(self) -> np.ndarray: + return np.array([arr.dtype for arr in self.arrays], dtype="object") + + def __getstate__(self): + return self.arrays, self._axes + + def __setstate__(self, state) -> None: + self.arrays = state[0] + self._axes = state[1] + + def __repr__(self) -> str: + output = type(self).__name__ + output += f"\nIndex: {self._axes[0]}" + if self.ndim == 2: + output += f"\nColumns: {self._axes[1]}" + output += f"\n{len(self.arrays)} arrays:" + for arr in self.arrays: + output += f"\n{arr.dtype}" + return output + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + """ + Iterate over the arrays, collect and create a new ArrayManager. + + Parameters + ---------- + f : str or callable + Name of the Array method to apply. + align_keys: List[str] or None, default None + ignore_failures: bool, default False + **kwargs + Keywords to pass to `f` + + Returns + ------- + ArrayManager + """ + assert "filter" not in kwargs + + align_keys = align_keys or [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned + + aligned_args = {k: kwargs[k] for k in align_keys} + + if f == "apply": + f = kwargs.pop("func") + + for i, arr in enumerate(self.arrays): + + if aligned_args: + + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj.iloc[i] + else: + kwargs[k] = obj.iloc[:, i]._values + else: + # otherwise we have an array-like + kwargs[k] = obj[i] + + try: + if callable(f): + applied = f(arr, **kwargs) + else: + applied = getattr(arr, f)(**kwargs) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + # if not isinstance(applied, ExtensionArray): + # # TODO not all EA operations return new EAs (eg astype) + # applied = array(applied) + result_arrays.append(applied) + result_indices.append(i) + + new_axes: list[Index] + if ignore_failures: + # TODO copy? + new_axes = [self._axes[0], self._axes[1][result_indices]] + else: + new_axes = self._axes + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + return type(self)(result_arrays, new_axes) # type: ignore[arg-type] + + def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T: + # switch axis to follow BlockManager logic + if swap_axis and "axis" in kwargs and self.ndim == 2: + kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 + + align_keys = align_keys or [] + aligned_args = {k: kwargs[k] for k in align_keys} + + result_arrays = [] + + for i, arr in enumerate(self.arrays): + + if aligned_args: + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + if self.ndim == 2: + kwargs[k] = obj.iloc[slice(i, i + 1)]._values + else: + kwargs[k] = obj.iloc[:]._values + else: + kwargs[k] = obj.iloc[:, [i]]._values + else: + # otherwise we have an ndarray + if obj.ndim == 2: + kwargs[k] = obj[[i]] + + if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): + # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to + # convert for the Block constructors. + arr = np.asarray(arr) + + if self.ndim == 2: + arr = ensure_block_shape(arr, 2) + block = new_block(arr, placement=slice(0, 1, 1), ndim=2) + else: + block = new_block(arr, placement=slice(0, len(self), 1), ndim=1) + + applied = getattr(block, f)(**kwargs) + if isinstance(applied, list): + applied = applied[0] + arr = applied.values + if self.ndim == 2 and arr.ndim == 2: + # 2D for np.ndarray or DatetimeArray/TimedeltaArray + assert len(arr) == 1 + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + arr = arr[0, :] # type: ignore[call-overload] + result_arrays.append(arr) + + return type(self)(result_arrays, self._axes) + + def where(self: T, other, cond, align: bool) -> T: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply_with_block( + "where", + align_keys=align_keys, + other=other, + cond=cond, + ) + + def setitem(self: T, indexer, value) -> T: + return self.apply_with_block("setitem", indexer=indexer, value=value) + + def putmask(self: T, mask, new, align: bool = True) -> T: + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + return self.apply_with_block( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + ) + + def diff(self: T, n: int, axis: int) -> T: + if axis == 1: + # DataFrame only calls this for n=0, in which case performing it + # with axis=0 is equivalent + assert n == 0 + axis = 0 + return self.apply(algos.diff, n=n, axis=axis) + + def interpolate(self: T, **kwargs) -> T: + return self.apply_with_block("interpolate", swap_axis=False, **kwargs) + + def shift(self: T, periods: int, axis: int, fill_value) -> T: + if fill_value is lib.no_default: + fill_value = None + + if axis == 1 and self.ndim == 2: + # TODO column-wise shift + raise NotImplementedError + + return self.apply_with_block( + "shift", periods=periods, axis=axis, fill_value=fill_value + ) + + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + + return self.apply_with_block( + "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: + return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) + + def convert( + self: T, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> T: + def _convert(arr): + if is_object_dtype(arr.dtype): + # extract PandasArray for tests that patch PandasArray._typ + arr = np.asarray(arr) + return soft_convert_objects( + arr, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=copy, + ) + else: + return arr.copy() if copy else arr + + return self.apply(_convert) + + def replace_regex(self: T, **kwargs) -> T: + return self.apply_with_block("_replace_regex", **kwargs) + + def replace(self: T, to_replace, value, inplace: bool) -> T: + inplace = validate_bool_kwarg(inplace, "inplace") + assert np.ndim(value) == 0, value + # TODO "replace" is right now implemented on the blocks, we should move + # it to general array algos so it can be reused here + return self.apply_with_block( + "replace", value=value, to_replace=to_replace, inplace=inplace + ) + + def replace_list( + self: T, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> T: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + return self.apply_with_block( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + ) + + def to_native_types(self: T, **kwargs) -> T: + return self.apply(to_native_types, **kwargs) + + @property + def is_mixed_type(self) -> bool: + return True + + @property + def is_numeric_mixed_type(self) -> bool: + return all(is_numeric_dtype(t) for t in self.get_dtypes()) + + @property + def any_extension_types(self) -> bool: + """Whether any of the blocks in this manager are extension blocks""" + return False # any(block.is_extension for block in self.blocks) + + @property + def is_view(self) -> bool: + """return a boolean if we are a single block and are a view""" + # TODO what is this used for? + return False + + @property + def is_single_block(self) -> bool: + return len(self.arrays) == 1 + + def _get_data_subset(self: T, predicate: Callable) -> T: + indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] + arrays = [self.arrays[i] for i in indices] + # TODO copy? + # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, + # see test_describe_datetime_columns + taker = np.array(indices, dtype="intp") + new_cols = self._axes[1].take(taker) + new_axes = [self._axes[0], new_cols] + return type(self)(arrays, new_axes, verify_integrity=False) + + def get_bool_data(self: T, copy: bool = False) -> T: + """ + Select columns that are bool-dtype and object-dtype columns that are all-bool. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + return self._get_data_subset(is_inferred_bool_dtype) + + def get_numeric_data(self: T, copy: bool = False) -> T: + """ + Select columns that have a numeric dtype. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + return self._get_data_subset( + lambda arr: is_numeric_dtype(arr.dtype) + or getattr(arr.dtype, "_is_numeric", False) + ) + + def copy(self: T, deep=True) -> T: + """ + Make deep or shallow copy of ArrayManager + + Parameters + ---------- + deep : bool or string, default True + If False, return shallow copy (do not copy data) + If 'all', copy data and a deep copy of the index + + Returns + ------- + BlockManager + """ + if deep is None: + # ArrayManager does not yet support CoW, so deep=None always means + # deep=True for now + deep = True + + # this preserves the notion of view copying of axes + if deep: + # hit in e.g. tests.io.json.test_pandas + + def copy_func(ax): + return ax.copy(deep=True) if deep == "all" else ax.view() + + new_axes = [copy_func(ax) for ax in self._axes] + else: + new_axes = list(self._axes) + + if deep: + new_arrays = [arr.copy() for arr in self.arrays] + else: + new_arrays = list(self.arrays) + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + # ignored keywords + only_slice: bool = False, + # ArrayManager specific keywords + use_na_proxy: bool = False, + ) -> T: + axis = self._normalize_axis(axis) + return self._reindex_indexer( + new_axis, + indexer, + axis, + fill_value, + allow_dups, + copy, + use_na_proxy, + ) + + def _reindex_indexer( + self: T, + new_axis, + indexer: npt.NDArray[np.intp] | None, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + use_na_proxy: bool = False, + ) -> T: + """ + Parameters + ---------- + new_axis : Index + indexer : ndarray[intp] or None + axis : int + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True + + + pandas-indexer with -1's only. + """ + if copy is None: + # ArrayManager does not yet support CoW, so deep=None always means + # deep=True for now + copy = True + + if indexer is None: + if new_axis is self._axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result._axes = list(self._axes) + result._axes[axis] = new_axis + return result + + # some axes don't allow reindexing with dups + if not allow_dups: + self._axes[axis]._validate_can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 1: + new_arrays = [] + for i in indexer: + if i == -1: + arr = self._make_na_array( + fill_value=fill_value, use_na_proxy=use_na_proxy + ) + else: + arr = self.arrays[i] + if copy: + arr = arr.copy() + new_arrays.append(arr) + + else: + validate_indices(indexer, len(self._axes[0])) + indexer = ensure_platform_int(indexer) + mask = indexer == -1 + needs_masking = mask.any() + new_arrays = [ + take_1d( + arr, + indexer, + allow_fill=needs_masking, + fill_value=fill_value, + mask=mask, + # if fill_value is not None else blk.fill_value + ) + for arr in self.arrays + ] + + new_axes = list(self._axes) + new_axes[axis] = new_axis + + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def take( + self: T, + indexer, + axis: int = 1, + verify: bool = True, + convert_indices: bool = True, + ) -> T: + """ + Take items along any axis. + """ + axis = self._normalize_axis(axis) + + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype="int64") + ) + + if not indexer.ndim == 1: + raise ValueError("indexer should be 1-dimensional") + + n = self.shape_proper[axis] + if convert_indices: + indexer = maybe_convert_indices(indexer, n, verify=verify) + + new_labels = self._axes[axis].take(indexer) + return self._reindex_indexer( + new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True + ) + + def _make_na_array(self, fill_value=None, use_na_proxy=False): + if use_na_proxy: + assert fill_value is None + return NullArrayProxy(self.shape_proper[0]) + + if fill_value is None: + fill_value = np.nan + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] + values.fill(fill_value) + return values + + def _equal_values(self, other) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + for left, right in zip(self.arrays, other.arrays): + if not array_equals(left, right): + return False + else: + return True + + # TODO + # to_dict + + +class ArrayManager(BaseArrayManager): + @property + def ndim(self) -> Literal[2]: + return 2 + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ) -> None: + # Note: we are storing the axes in "_axes" in the (row, columns) order + # which contrasts the order how it is stored in BlockManager + self._axes = axes + self.arrays = arrays + + if verify_integrity: + self._axes = [ensure_index(ax) for ax in axes] + arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] + self.arrays = [maybe_coerce_values(arr) for arr in arrays] + self._verify_integrity() + + def _verify_integrity(self) -> None: + n_rows, n_columns = self.shape_proper + if not len(self.arrays) == n_columns: + raise ValueError( + "Number of passed arrays must equal the size of the column Index: " + f"{len(self.arrays)} arrays vs {n_columns} columns." + ) + for arr in self.arrays: + if not len(arr) == n_rows: + raise ValueError( + "Passed arrays should have the same length as the rows Index: " + f"{len(arr)} vs {n_rows} rows" + ) + if not isinstance(arr, (np.ndarray, ExtensionArray)): + raise ValueError( + "Passed arrays should be np.ndarray or ExtensionArray instances, " + f"got {type(arr)} instead" + ) + if not arr.ndim == 1: + raise ValueError( + "Passed arrays should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) + + # -------------------------------------------------------------------- + # Indexing + + def fast_xs(self, loc: int) -> SingleArrayManager: + """ + Return the array corresponding to `frame.iloc[loc]`. + + Parameters + ---------- + loc : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + + values = [arr[loc] for arr in self.arrays] + if isinstance(dtype, ExtensionDtype): + result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) + # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT + elif is_datetime64_ns_dtype(dtype): + result = DatetimeArray._from_sequence(values, dtype=dtype)._data + elif is_timedelta64_ns_dtype(dtype): + result = TimedeltaArray._from_sequence(values, dtype=dtype)._data + else: + result = np.array(values, dtype=dtype) + return SingleArrayManager([result], [self._axes[1]]) + + def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: + axis = self._normalize_axis(axis) + + if axis == 0: + arrays = [arr[slobj] for arr in self.arrays] + elif axis == 1: + arrays = self.arrays[slobj] + + new_axes = list(self._axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)(arrays, new_axes, verify_integrity=False) + + def iget(self, i: int) -> SingleArrayManager: + """ + Return the data as a SingleArrayManager. + """ + values = self.arrays[i] + return SingleArrayManager([values], [self._axes[0]]) + + def iget_values(self, i: int) -> ArrayLike: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + """ + return self.arrays[i] + + @property + def column_arrays(self) -> list[ArrayLike]: + """ + Used in the JSON C code to access column arrays. + """ + + return [np.asarray(arr) for arr in self.arrays] + + def iset( + self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False + ) -> None: + """ + Set new column(s). + + This changes the ArrayManager in-place, but replaces (an) existing + column(s), not changing column values in-place). + + Parameters + ---------- + loc : integer, slice or boolean mask + Positional location (already bounds checked) + value : np.ndarray or ExtensionArray + inplace : bool, default False + Whether overwrite existing array as opposed to replacing it. + """ + # single column -> single integer index + if lib.is_integer(loc): + + # TODO can we avoid needing to unpack this here? That means converting + # DataFrame into 1D array when loc is an integer + if isinstance(value, np.ndarray) and value.ndim == 2: + assert value.shape[1] == 1 + value = value[:, 0] + + # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item + # but we should avoid that and pass directly the proper array + value = maybe_coerce_values(value) + + assert isinstance(value, (np.ndarray, ExtensionArray)) + assert value.ndim == 1 + assert len(value) == len(self._axes[0]) + self.arrays[loc] = value + return + + # multiple columns -> convert slice or array to integer indices + elif isinstance(loc, slice): + indices = range( + loc.start if loc.start is not None else 0, + loc.stop if loc.stop is not None else self.shape_proper[1], + loc.step if loc.step is not None else 1, + ) + else: + assert isinstance(loc, np.ndarray) + assert loc.dtype == "bool" + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "range") + indices = np.nonzero(loc)[0] # type: ignore[assignment] + + assert value.ndim == 2 + assert value.shape[0] == len(self._axes[0]) + + for value_idx, mgr_idx in enumerate(indices): + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[slice, int]" + value_arr = value[:, value_idx] # type: ignore[call-overload] + self.arrays[mgr_idx] = value_arr + return + + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the ArrayManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + if not is_integer(loc): + raise TypeError("The column index should be an integer") + arr = self.arrays[loc] + mgr = SingleArrayManager([arr], [self._axes[0]]) + if inplace: + mgr.setitem_inplace(idx, value) + else: + new_mgr = mgr.setitem((idx,), value) + # update existing ArrayManager in-place + self.arrays[loc] = new_mgr.arrays[0] + + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: + """ + Insert item at selected position. + + Parameters + ---------- + loc : int + item : hashable + value : np.ndarray or ExtensionArray + """ + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + + value = extract_array(value, extract_numpy=True) + if value.ndim == 2: + if value.shape[0] == 1: + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[int, slice]" + value = value[0, :] # type: ignore[call-overload] + else: + raise ValueError( + f"Expected a 1D array, got an array with shape {value.shape}" + ) + value = maybe_coerce_values(value) + + # TODO self.arrays can be empty + # assert len(value) == len(self.arrays[0]) + + # TODO is this copy needed? + arrays = self.arrays.copy() + arrays.insert(loc, value) + + self.arrays = arrays + self._axes[1] = new_axis + + def idelete(self, indexer) -> ArrayManager: + """ + Delete selected locations in-place (new block and array, same BlockManager) + """ + to_keep = np.ones(self.shape[0], dtype=np.bool_) + to_keep[indexer] = False + + self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] + self._axes = [self._axes[0], self._axes[1][to_keep]] + return self + + # -------------------------------------------------------------------- + # Array-wise Operation + + def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: + """ + Apply grouped reduction function columnwise, returning a new ArrayManager. + + Parameters + ---------- + func : grouped reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. + + Returns + ------- + ArrayManager + """ + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + + for i, arr in enumerate(self.arrays): + # grouped_reduce functions all expect 2D arrays + arr = ensure_block_shape(arr, ndim=2) + try: + res = func(arr) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + + if res.ndim == 2: + # reverse of ensure_block_shape + assert res.shape[0] == 1 + res = res[0] + + result_arrays.append(res) + result_indices.append(i) + + if len(result_arrays) == 0: + index = Index([None]) # placeholder + else: + index = Index(range(result_arrays[0].shape[0])) + + if ignore_failures: + columns = self.items[np.array(result_indices, dtype="int64")] + else: + columns = self.items + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] + + def reduce( + self: T, func: Callable, ignore_failures: bool = False + ) -> tuple[T, np.ndarray]: + """ + Apply reduction function column-wise, returning a single-row ArrayManager. + + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. + + Returns + ------- + ArrayManager + np.ndarray + Indexer of column indices that are retained. + """ + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + for i, arr in enumerate(self.arrays): + try: + res = func(arr, axis=0) + except TypeError: + if not ignore_failures: + raise + else: + # TODO NaT doesn't preserve dtype, so we need to ensure to create + # a timedelta result array if original was timedelta + # what if datetime results in timedelta? (eg std) + if res is NaT and is_timedelta64_ns_dtype(arr.dtype): + result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) + else: + # error: Argument 1 to "append" of "list" has incompatible type + # "ExtensionArray"; expected "ndarray" + result_arrays.append( + sanitize_array([res], None) # type: ignore[arg-type] + ) + result_indices.append(i) + + index = Index._simple_new(np.array([None], dtype=object)) # placeholder + if ignore_failures: + indexer = np.array(result_indices) + columns = self.items[result_indices] + else: + indexer = np.arange(self.shape[0]) + columns = self.items + + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] + return new_mgr, indexer + + def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: + """ + Apply array_op blockwise with another (aligned) BlockManager. + """ + # TODO what if `other` is BlockManager ? + left_arrays = self.arrays + right_arrays = other.arrays + result_arrays = [ + array_op(left, right) for left, right in zip(left_arrays, right_arrays) + ] + return type(self)(result_arrays, self._axes) + + def quantile( + self, + *, + qs: Float64Index, + axis: int = 0, + transposed: bool = False, + interpolation="linear", + ) -> ArrayManager: + + arrs = [ensure_block_shape(x, 2) for x in self.arrays] + assert axis == 1 + new_arrs = [ + quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs + ] + for i, arr in enumerate(new_arrs): + if arr.ndim == 2: + assert arr.shape[0] == 1, arr.shape + new_arrs[i] = arr[0] + + axes = [qs, self._axes[1]] + return type(self)(new_arrs, axes) + + # ---------------------------------------------------------------- + + def unstack(self, unstacker, fill_value) -> ArrayManager: + """ + Return a BlockManager with all blocks unstacked. + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : Any + fill_value for newly introduced missing values. + + Returns + ------- + unstacked : BlockManager + """ + indexer, _ = unstacker._indexer_and_to_sort + if unstacker.mask.all(): + new_indexer = indexer + allow_fill = False + new_mask2D = None + needs_masking = None + else: + new_indexer = np.full(unstacker.mask.shape, -1) + new_indexer[unstacker.mask] = indexer + allow_fill = True + # calculating the full mask once and passing it to take_1d is faster + # than letting take_1d calculate it in each repeated call + new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) + needs_masking = new_mask2D.any(axis=0) + new_indexer2D = new_indexer.reshape(*unstacker.full_shape) + new_indexer2D = ensure_platform_int(new_indexer2D) + + new_arrays = [] + for arr in self.arrays: + for i in range(unstacker.full_shape[1]): + if allow_fill: + # error: Value of type "Optional[Any]" is not indexable [index] + new_arr = take_1d( + arr, + new_indexer2D[:, i], + allow_fill=needs_masking[i], # type: ignore[index] + fill_value=fill_value, + mask=new_mask2D[:, i], # type: ignore[index] + ) + else: + new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) + new_arrays.append(new_arr) + + new_index = unstacker.new_index + new_columns = unstacker.get_new_columns(self._axes[1]) + new_axes = [new_index, new_columns] + + return type(self)(new_arrays, new_axes, verify_integrity=False) + + def as_array( + self, + dtype=None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Convert the blockmanager data into an numpy array. + + Parameters + ---------- + dtype : object, default None + Data type of the return array. + copy : bool, default False + If True then guarantee that a copy is returned. A value of + False does not guarantee that the underlying data is not + copied. + na_value : object, default lib.no_default + Value to be used as the missing value sentinel. + + Returns + ------- + arr : ndarray + """ + if len(self.arrays) == 0: + empty_arr = np.empty(self.shape, dtype=float) + return empty_arr.transpose() + + # We want to copy when na_value is provided to avoid + # mutating the original object + copy = copy or na_value is not lib.no_default + + if not dtype: + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + elif isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + elif is_extension_array_dtype(dtype): + dtype = "object" + elif is_dtype_equal(dtype, str): + dtype = "object" + + result = np.empty(self.shape_proper, dtype=dtype) + + for i, arr in enumerate(self.arrays): + arr = arr.astype(dtype, copy=copy) + result[:, i] = arr + + if na_value is not lib.no_default: + result[isna(result)] = na_value + + return result + + +class SingleArrayManager(BaseArrayManager, SingleDataManager): + + __slots__ = [ + "_axes", # private attribute, because 'axes' has different order, see below + "arrays", + ] + + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] + + @property + def ndim(self) -> Literal[1]: + return 1 + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ) -> None: + self._axes = axes + self.arrays = arrays + + if verify_integrity: + assert len(axes) == 1 + assert len(arrays) == 1 + self._axes = [ensure_index(ax) for ax in self._axes] + arr = arrays[0] + arr = maybe_coerce_values(arr) + arr = extract_pandas_array(arr, None, 1)[0] + self.arrays = [arr] + self._verify_integrity() + + def _verify_integrity(self) -> None: + (n_rows,) = self.shape + assert len(self.arrays) == 1 + arr = self.arrays[0] + assert len(arr) == n_rows + if not arr.ndim == 1: + raise ValueError( + "Passed array should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) + + @staticmethod + def _normalize_axis(axis): + return axis + + def make_empty(self, axes=None) -> SingleArrayManager: + """Return an empty ArrayManager with index/array of length 0""" + if axes is None: + axes = [Index([], dtype=object)] + array: np.ndarray = np.array([], dtype=self.dtype) + return type(self)([array], axes) + + @classmethod + def from_array(cls, array, index) -> SingleArrayManager: + return cls([array], [index]) + + @property + def axes(self): + return self._axes + + @property + def index(self) -> Index: + return self._axes[0] + + @property + def dtype(self): + return self.array.dtype + + def external_values(self): + """The array that Series.values returns""" + return external_values(self.array) + + def internal_values(self): + """The array that Series._values returns""" + return self.array + + def array_values(self): + """The array that Series.array returns""" + arr = self.array + if isinstance(arr, np.ndarray): + arr = PandasArray(arr) + return arr + + @property + def _can_hold_na(self) -> bool: + if isinstance(self.array, np.ndarray): + return self.array.dtype.kind not in ["b", "i", "u"] + else: + # ExtensionArray + return self.array._can_hold_na + + @property + def is_single_block(self) -> bool: + return True + + def fast_xs(self, loc: int) -> SingleArrayManager: + raise NotImplementedError("Use series._values[loc] instead") + + def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + new_array = self.array[slobj] + new_index = self.index._getitem_slice(slobj) + return type(self)([new_array], [new_index], verify_integrity=False) + + def getitem_mgr(self, indexer) -> SingleArrayManager: + new_array = self.array[indexer] + new_index = self.index[indexer] + return type(self)([new_array], [new_index]) + + def apply(self, func, **kwargs): + if callable(func): + new_array = func(self.array, **kwargs) + else: + new_array = getattr(self.array, func)(**kwargs) + return type(self)([new_array], self._axes) + + def setitem(self, indexer, value) -> SingleArrayManager: + """ + Set values with indexer. + + For SingleArrayManager, this backs s[indexer] = value + + See `setitem_inplace` for a version that works inplace and doesn't + return a new Manager. + """ + if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: + raise ValueError(f"Cannot set values with ndim > {self.ndim}") + return self.apply_with_block("setitem", indexer=indexer, value=value) + + def idelete(self, indexer) -> SingleArrayManager: + """ + Delete selected locations in-place (new array, same ArrayManager) + """ + to_keep = np.ones(self.shape[0], dtype=np.bool_) + to_keep[indexer] = False + + self.arrays = [self.arrays[0][to_keep]] + self._axes = [self._axes[0][to_keep]] + return self + + def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: + # used in get_numeric_data / get_bool_data + if predicate(self.array): + return type(self)(self.arrays, self._axes, verify_integrity=False) + else: + return self.make_empty() + + def set_values(self, values: ArrayLike) -> None: + """ + Set (replace) the values of the SingleArrayManager in place. + + Use at your own risk! This does not check if the passed values are + valid for the current SingleArrayManager (length, dtype, etc). + """ + self.arrays[0] = values + + def to_2d_mgr(self, columns: Index) -> ArrayManager: + """ + Manager analogue of Series.to_frame + """ + arrays = [self.arrays[0]] + axes = [self.axes[0], columns] + + return ArrayManager(arrays, axes, verify_integrity=False) + + +class NullArrayProxy: + """ + Proxy object for an all-NA array. + + Only stores the length of the array, and not the dtype. The dtype + will only be known when actually concatenating (after determining the + common dtype, for which this proxy is ignored). + Using this object avoids that the internals/concat.py needs to determine + the proper dtype and array type. + """ + + ndim = 1 + + def __init__(self, n: int) -> None: + self.n = n + + @property + def shape(self) -> tuple[int]: + return (self.n,) + + def to_array(self, dtype: DtypeObj) -> ArrayLike: + """ + Helper function to create the actual all-NA array from the NullArrayProxy + object. + + Parameters + ---------- + arr : NullArrayProxy + dtype : the dtype for the resulting array + + Returns + ------- + np.ndarray or ExtensionArray + """ + if isinstance(dtype, ExtensionDtype): + empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) + indexer = -np.ones(self.n, dtype=np.intp) + return empty.take(indexer, allow_fill=True) + else: + # when introducing missing values, int becomes float, bool becomes object + dtype = ensure_dtype_can_hold_na(dtype) + fill_value = na_value_for_dtype(dtype) + arr = np.empty(self.n, dtype=dtype) + arr.fill(fill_value) + return ensure_wrapped_if_datetimelike(arr) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py new file mode 100644 index 00000000..ddc44953 --- /dev/null +++ b/pandas/core/internals/base.py @@ -0,0 +1,226 @@ +""" +Base class for the internal managers. Both BlockManager and ArrayManager +inherit from this class. +""" +from __future__ import annotations + +from typing import ( + Literal, + TypeVar, + final, +) + +import numpy as np + +from pandas._typing import ( + ArrayLike, + DtypeObj, + Shape, +) +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.cast import ( + find_common_type, + np_can_hold_element, +) + +from pandas.core.base import PandasObject +from pandas.core.indexes.api import ( + Index, + default_index, +) + +T = TypeVar("T", bound="DataManager") + + +class DataManager(PandasObject): + + # TODO share more methods/attributes + + axes: list[Index] + + @property + def items(self) -> Index: + raise AbstractMethodError(self) + + @final + def __len__(self) -> int: + return len(self.items) + + @property + def ndim(self) -> int: + return len(self.axes) + + @property + def shape(self) -> Shape: + return tuple(len(ax) for ax in self.axes) + + @final + def _validate_set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + old_len = len(self.axes[axis]) + new_len = len(new_labels) + + if axis == 1 and len(self.items) == 0: + # If we are setting the index on a DataFrame with no columns, + # it is OK to change the length. + pass + + elif new_len != old_len: + raise ValueError( + f"Length mismatch: Expected axis has {old_len} elements, new " + f"values have {new_len} elements" + ) + + def reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + only_slice: bool = False, + ) -> T: + raise AbstractMethodError(self) + + @final + def reindex_axis( + self: T, + new_index: Index, + axis: int, + fill_value=None, + only_slice: bool = False, + ) -> T: + """ + Conform data manager to new index. + """ + new_index, indexer = self.axes[axis].reindex(new_index) + + return self.reindex_indexer( + new_index, + indexer, + axis=axis, + fill_value=fill_value, + copy=False, + only_slice=only_slice, + ) + + def _equal_values(self: T, other: T) -> bool: + """ + To be implemented by the subclasses. Only check the column values + assuming shape and indexes have already been checked. + """ + raise AbstractMethodError(self) + + @final + def equals(self, other: object) -> bool: + """ + Implementation for DataFrame.equals + """ + if not isinstance(other, DataManager): + return False + + self_axes, other_axes = self.axes, other.axes + if len(self_axes) != len(other_axes): + return False + if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): + return False + + return self._equal_values(other) + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + raise AbstractMethodError(self) + + @final + def isna(self: T, func) -> T: + return self.apply("apply", func=func) + + # -------------------------------------------------------------------- + # Consolidation: No-ops for all but BlockManager + + def is_consolidated(self) -> bool: + return True + + def consolidate(self: T) -> T: + return self + + def _consolidate_inplace(self) -> None: + return + + +class SingleDataManager(DataManager): + @property + def ndim(self) -> Literal[1]: + return 1 + + @final + @property + def array(self) -> ArrayLike: + """ + Quick access to the backing array of the Block or SingleArrayManager. + """ + # error: "SingleDataManager" has no attribute "arrays"; maybe "array" + return self.arrays[0] # type: ignore[attr-defined] + + def setitem_inplace(self, indexer, value) -> None: + """ + Set values with indexer. + + For Single[Block/Array]Manager, this backs s[indexer] = value + + This is an inplace version of `setitem()`, mutating the manager/values + in place, not returning a new Manager (and Block), and thus never changing + the dtype. + """ + arr = self.array + + # EAs will do this validation in their own __setitem__ methods. + if isinstance(arr, np.ndarray): + # Note: checking for ndarray instead of np.dtype means we exclude + # dt64/td64, which do their own validation. + value = np_can_hold_element(arr.dtype, value) + + arr[indexer] = value + + def grouped_reduce(self, func, ignore_failures: bool = False): + """ + ignore_failures : bool, default False + Not used; for compatibility with ArrayManager/BlockManager. + """ + + arr = self.array + res = func(arr) + index = default_index(len(res)) + + mgr = type(self).from_array(res, index) + return mgr + + @classmethod + def from_array(cls, arr: ArrayLike, index: Index): + raise AbstractMethodError(cls) + + +def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: + """ + Find the common dtype for `blocks`. + + Parameters + ---------- + blocks : List[DtypeObj] + + Returns + ------- + dtype : np.dtype, ExtensionDtype, or None + None is returned when `blocks` is empty. + """ + if not len(dtypes): + return None + + return find_common_type(dtypes) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py new file mode 100644 index 00000000..5e95f83d --- /dev/null +++ b/pandas/core/internals/blocks.py @@ -0,0 +1,2387 @@ +from __future__ import annotations + +from functools import wraps +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Sequence, + cast, + final, +) +import warnings + +import numpy as np + +from pandas._libs import ( + Timestamp, + internals as libinternals, + lib, + writers, +) +from pandas._libs.internals import BlockPlacement +from pandas._libs.tslibs import IncompatibleFrequency +from pandas._typing import ( + ArrayLike, + DtypeObj, + F, + IgnoreRaise, + Shape, + npt, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.astype import astype_array_safe +from pandas.core.dtypes.cast import ( + LossySetitemError, + can_hold_element, + find_result_type, + maybe_downcast_to_dtype, + np_can_hold_element, + soft_convert_objects, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_1d_only_ea_obj, + is_dtype_equal, + is_interval_dtype, + is_list_like, + is_sparse, + is_string_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, + PandasDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.inference import is_inferred_bool_dtype +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, +) + +import pandas.core.algorithms as algos +from pandas.core.array_algos.putmask import ( + extract_bool_array, + putmask_inplace, + putmask_without_repeat, + setitem_datetimelike_compat, + validate_putmask, +) +from pandas.core.array_algos.quantile import quantile_compat +from pandas.core.array_algos.replace import ( + compare_or_regex_search, + replace_regex, + should_use_regex, +) +from pandas.core.array_algos.transforms import shift +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + IntervalArray, + PandasArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.base import PandasObject +import pandas.core.common as com +import pandas.core.computation.expressions as expressions +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import check_setitem_lengths +import pandas.core.missing as missing + +if TYPE_CHECKING: + from pandas import ( + Float64Index, + Index, + ) + from pandas.core.arrays._mixins import NDArrayBackedExtensionArray + +# comparison is faster than is_object_dtype +_dtype_obj = np.dtype("object") + + +def maybe_split(meth: F) -> F: + """ + If we have a multi-column block, split and operate block-wise. Otherwise + use the original method. + """ + + @wraps(meth) + def newfunc(self, *args, **kwargs) -> list[Block]: + + if self.ndim == 1 or self.shape[0] == 1: + return meth(self, *args, **kwargs) + else: + # Split and operate column-by-column + return self.split_and_operate(meth, *args, **kwargs) + + return cast(F, newfunc) + + +class Block(PandasObject): + """ + Canonical n-dimensional unit of homogeneous dtype contained in a pandas + data structure + + Index-ignorant; let the container take care of that + """ + + values: np.ndarray | ExtensionArray + ndim: int + __init__: Callable + + __slots__ = () + is_numeric = False + is_object = False + is_extension = False + _can_consolidate = True + _validate_ndim = True + + @final + @cache_readonly + def _consolidate_key(self): + return self._can_consolidate, self.dtype.name + + @final + @cache_readonly + def _can_hold_na(self) -> bool: + """ + Can we store NA values in this Block? + """ + dtype = self.dtype + if isinstance(dtype, np.dtype): + return dtype.kind not in ["b", "i", "u"] + return dtype._can_hold_na + + @final + @cache_readonly + def is_categorical(self) -> bool: + warnings.warn( + "Block.is_categorical is deprecated and will be removed in a " + "future version. Use isinstance(block.values, Categorical) " + "instead. See https://github.com/pandas-dev/pandas/issues/40226", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return isinstance(self.values, Categorical) + + @final + @property + def is_bool(self) -> bool: + """ + We can be bool if a) we are bool dtype or b) object dtype with bool objects. + """ + return is_inferred_bool_dtype(self.values) + + @final + def external_values(self): + return external_values(self.values) + + @final + @cache_readonly + def fill_value(self): + # Used in reindex_indexer + return na_value_for_dtype(self.dtype, compat=False) + + @final + def _standardize_fill_value(self, value): + # if we are passed a scalar None, convert it here + if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype): + value = self.fill_value + return value + + @property + def mgr_locs(self) -> BlockPlacement: + return self._mgr_locs + + @mgr_locs.setter + def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None: + self._mgr_locs = new_mgr_locs + + @final + def make_block(self, values, placement=None) -> Block: + """ + Create a new block, with type inference propagate any values that are + not specified + """ + if placement is None: + placement = self._mgr_locs + if self.is_extension: + values = ensure_block_shape(values, ndim=self.ndim) + + # TODO: perf by not going through new_block + # We assume maybe_coerce_values has already been called + return new_block(values, placement=placement, ndim=self.ndim) + + @final + def make_block_same_class( + self, values, placement: BlockPlacement | None = None + ) -> Block: + """Wrap given values in a block of same type as self.""" + if placement is None: + placement = self._mgr_locs + + if values.dtype.kind in ["m", "M"]: + + new_values = ensure_wrapped_if_datetimelike(values) + if new_values is not values: + # TODO(2.0): remove once fastparquet has stopped relying on it + warnings.warn( + "In a future version, Block.make_block_same_class will " + "assume that datetime64 and timedelta64 ndarrays have " + "already been cast to DatetimeArray and TimedeltaArray, " + "respectively.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + values = new_values + + # We assume maybe_coerce_values has already been called + return type(self)(values, placement=placement, ndim=self.ndim) + + @final + def __repr__(self) -> str: + # don't want to print out all of the items here + name = type(self).__name__ + if self.ndim == 1: + result = f"{name}: {len(self)} dtype: {self.dtype}" + else: + + shape = " x ".join([str(s) for s in self.shape]) + result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" + + return result + + @final + def __len__(self) -> int: + return len(self.values) + + @final + def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: + """ + Perform __getitem__-like, return result as block. + + Only supports slices that preserve dimensionality. + """ + # Note: the only place where we are called with ndarray[intp] + # is from internals.concat, and we can verify that never happens + # with 1-column blocks, i.e. never for ExtensionBlock. + + # Invalid index type "Union[slice, ndarray[Any, dtype[signedinteger[Any]]]]" + # for "BlockPlacement"; expected type "Union[slice, Sequence[int]]" + new_mgr_locs = self._mgr_locs[slicer] # type: ignore[index] + + new_values = self._slice(slicer) + + if new_values.ndim != self.values.ndim: + raise ValueError("Only same dim slicing is allowed") + + return type(self)(new_values, new_mgr_locs, self.ndim) + + @final + def getitem_block_columns( + self, slicer: slice, new_mgr_locs: BlockPlacement + ) -> Block: + """ + Perform __getitem__-like, return result as block. + + Only supports slices that preserve dimensionality. + """ + new_values = self._slice(slicer) + + if new_values.ndim != self.values.ndim: + raise ValueError("Only same dim slicing is allowed") + + return type(self)(new_values, new_mgr_locs, self.ndim) + + @final + def _can_hold_element(self, element: Any) -> bool: + """require the same dtype as ourselves""" + element = extract_array(element, extract_numpy=True) + return can_hold_element(self.values, element) + + @final + def should_store(self, value: ArrayLike) -> bool: + """ + Should we set self.values[indexer] = value inplace or do we need to cast? + + Parameters + ---------- + value : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + # faster equivalent to is_dtype_equal(value.dtype, self.dtype) + try: + return value.dtype == self.dtype + except TypeError: + return False + + # --------------------------------------------------------------------- + # Apply/Reduce and Helpers + + @final + def apply(self, func, **kwargs) -> list[Block]: + """ + apply the function to my values; return a block if we are not + one + """ + result = func(self.values, **kwargs) + + return self._split_op_result(result) + + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: + # We will apply the function and reshape the result into a single-row + # Block with the same mgr_locs; squeezing will be done at a higher level + assert self.ndim == 2 + + try: + result = func(self.values) + except (TypeError, NotImplementedError): + if ignore_failures: + return [] + raise + + if self.values.ndim == 1: + # TODO(EA2D): special case not needed with 2D EAs + res_values = np.array([[result]]) + else: + res_values = result.reshape(-1, 1) + + nb = self.make_block(res_values) + return [nb] + + @final + def _split_op_result(self, result: ArrayLike) -> list[Block]: + # See also: split_and_operate + if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype): + # TODO(EA2D): unnecessary with 2D EAs + # if we get a 2D ExtensionArray, we need to split it into 1D pieces + nbs = [] + for i, loc in enumerate(self._mgr_locs): + if not is_1d_only_ea_obj(result): + vals = result[i : i + 1] + else: + vals = result[i] + + block = self.make_block(values=vals, placement=loc) + nbs.append(block) + return nbs + + nb = self.make_block(result) + + return [nb] + + @final + def _split(self) -> list[Block]: + """ + Split a block into a list of single-column blocks. + """ + assert self.ndim == 2 + + new_blocks = [] + for i, ref_loc in enumerate(self._mgr_locs): + vals = self.values[slice(i, i + 1)] + + bp = BlockPlacement(ref_loc) + nb = type(self)(vals, placement=bp, ndim=2) + new_blocks.append(nb) + return new_blocks + + @final + def split_and_operate(self, func, *args, **kwargs) -> list[Block]: + """ + Split the block and apply func column-by-column. + + Parameters + ---------- + func : Block method + *args + **kwargs + + Returns + ------- + List[Block] + """ + assert self.ndim == 2 and self.shape[0] != 1 + + res_blocks = [] + for nb in self._split(): + rbs = func(nb, *args, **kwargs) + res_blocks.extend(rbs) + return res_blocks + + # --------------------------------------------------------------------- + # Up/Down-casting + + @final + def coerce_to_target_dtype(self, other) -> Block: + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + new_dtype = find_result_type(self.values, other) + + return self.astype(new_dtype, copy=False) + + @final + def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: + if downcast is False: + return blocks + + if self.dtype == _dtype_obj: + # GH#44241 We downcast regardless of the argument; + # respecting 'downcast=None' may be worthwhile at some point, + # but ATM it breaks too much existing code. + # split and convert the blocks + + return extend_blocks( + [blk.convert(datetime=True, numeric=False) for blk in blocks] + ) + + if downcast is None: + return blocks + + return extend_blocks([b._downcast_2d(downcast) for b in blocks]) + + @final + @maybe_split + def _downcast_2d(self, dtype) -> list[Block]: + """ + downcast specialized to 2D case post-validation. + + Refactored to allow use of maybe_split. + """ + new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) + return [self.make_block(new_values)] + + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> list[Block]: + """ + attempt to coerce any object types to better types return a copy + of the block (if copy = True) by definition we are not an ObjectBlock + here! + """ + return [self.copy()] if copy else [self] + + # --------------------------------------------------------------------- + # Array-Like Methods + + @cache_readonly + def dtype(self) -> DtypeObj: + return self.values.dtype + + @final + def astype( + self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise" + ) -> Block: + """ + Coerce to the new dtype. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + copy : bool, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + Block + """ + values = self.values + + new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) + + new_values = maybe_coerce_values(new_values) + newb = self.make_block(new_values) + if newb.shape != self.shape: + raise TypeError( + f"cannot set astype for copy = [{copy}] for dtype " + f"({self.dtype.name} [{self.shape}]) to different shape " + f"({newb.dtype.name} [{newb.shape}])" + ) + return newb + + @final + def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block: + """convert to our native types format""" + result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) + return self.make_block(result) + + @final + def copy(self, deep: bool = True) -> Block: + """copy constructor""" + values = self.values + if deep: + values = values.copy() + return type(self)(values, placement=self._mgr_locs, ndim=self.ndim) + + # --------------------------------------------------------------------- + # Replace + + @final + def replace( + self, + to_replace, + value, + inplace: bool = False, + # mask may be pre-computed if we're called from replace_list + mask: npt.NDArray[np.bool_] | None = None, + ) -> list[Block]: + """ + replace the to_replace value with value, possible to create new + blocks here this is just a call to putmask. + """ + + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through replace_list + values = self.values + + if isinstance(values, Categorical): + # TODO: avoid special-casing + blk = self if inplace else self.copy() + # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], + # ExtensionArray]" has no attribute "_replace" + blk.values._replace( # type: ignore[union-attr] + to_replace=to_replace, value=value, inplace=True + ) + return [blk] + + if not self._can_hold_element(to_replace): + # We cannot hold `to_replace`, so we know immediately that + # replacing it is a no-op. + # Note: If to_replace were a list, NDFrame.replace would call + # replace_list instead of replace. + return [self] if inplace else [self.copy()] + + if mask is None: + mask = missing.mask_missing(values, to_replace) + if not mask.any(): + # Note: we get here with test_replace_extension_other incorrectly + # bc _can_hold_element is incorrect. + return [self] if inplace else [self.copy()] + + elif self._can_hold_element(value): + blk = self if inplace else self.copy() + putmask_inplace(blk.values, mask, value) + if not (self.is_object and value is None): + # if the user *explicitly* gave None, we keep None, otherwise + # may downcast to NaN + blocks = blk.convert(numeric=False, copy=False) + else: + blocks = [blk] + return blocks + + elif self.ndim == 1 or self.shape[0] == 1: + if value is None: + blk = self.astype(np.dtype(object)) + else: + blk = self.coerce_to_target_dtype(value) + return blk.replace( + to_replace=to_replace, + value=value, + inplace=True, + mask=mask, + ) + + else: + # split so that we only upcast where necessary + blocks = [] + for i, nb in enumerate(self._split()): + blocks.extend( + type(self).replace( + nb, + to_replace=to_replace, + value=value, + inplace=True, + mask=mask[i : i + 1], + ) + ) + return blocks + + @final + def _replace_regex( + self, + to_replace, + value, + inplace: bool = False, + convert: bool = True, + mask=None, + ) -> list[Block]: + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + List[Block] + """ + if not self._can_hold_element(to_replace): + # i.e. only ObjectBlock, but could in principle include a + # String ExtensionBlock + return [self] if inplace else [self.copy()] + + rx = re.compile(to_replace) + + new_values = self.values if inplace else self.values.copy() + replace_regex(new_values, rx, value, mask) + + block = self.make_block(new_values) + return block.convert(numeric=False, copy=False) + + @final + def replace_list( + self, + src_list: Iterable[Any], + dest_list: Sequence[Any], + inplace: bool = False, + regex: bool = False, + ) -> list[Block]: + """ + See BlockManager.replace_list docstring. + """ + values = self.values + + # Exclude anything that we know we won't contain + pairs = [ + (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) + ] + if not len(pairs): + # shortcut, nothing to replace + return [self] if inplace else [self.copy()] + + src_len = len(pairs) - 1 + + if is_string_dtype(values.dtype): + # Calculate the mask once, prior to the call of comp + # in order to avoid repeating the same computations + mask = ~isna(values) + masks = [ + compare_or_regex_search(values, s[0], regex=regex, mask=mask) + for s in pairs + ] + else: + # GH#38086 faster if we know we dont need to check for regex + masks = [missing.mask_missing(values, s[0]) for s in pairs] + + # error: Argument 1 to "extract_bool_array" has incompatible type + # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray, + # ndarray]" + masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type] + + rb = [self if inplace else self.copy()] + for i, (src, dest) in enumerate(pairs): + convert = i == src_len # only convert once at the end + new_rb: list[Block] = [] + + # GH-39338: _replace_coerce can split a block into + # single-column blocks, so track the index so we know + # where to index into the mask + for blk_num, blk in enumerate(rb): + if len(rb) == 1: + m = masks[i] + else: + mib = masks[i] + assert not isinstance(mib, bool) + m = mib[blk_num : blk_num + 1] + + # error: Argument "mask" to "_replace_coerce" of "Block" has + # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]"; + # expected "ndarray[Any, dtype[bool_]]" + result = blk._replace_coerce( + to_replace=src, + value=dest, + mask=m, # type: ignore[arg-type] + inplace=inplace, + regex=regex, + ) + if convert and blk.is_object and not all(x is None for x in dest_list): + # GH#44498 avoid unwanted cast-back + result = extend_blocks( + [b.convert(numeric=False, copy=True) for b in result] + ) + new_rb.extend(result) + rb = new_rb + return rb + + @final + def _replace_coerce( + self, + to_replace, + value, + mask: npt.NDArray[np.bool_], + inplace: bool = True, + regex: bool = False, + ) -> list[Block]: + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + mask : np.ndarray[bool] + True indicate corresponding element is ignored. + inplace : bool, default True + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + + Returns + ------- + List[Block] + """ + if should_use_regex(regex, to_replace): + return self._replace_regex( + to_replace, + value, + inplace=inplace, + convert=False, + mask=mask, + ) + else: + if value is None: + # gh-45601, gh-45836, gh-46634 + if mask.any(): + nb = self.astype(np.dtype(object), copy=False) + if nb is self and not inplace: + nb = nb.copy() + putmask_inplace(nb.values, mask, value) + return [nb] + return [self] if inplace else [self.copy()] + return self.replace( + to_replace=to_replace, value=value, inplace=inplace, mask=mask + ) + + # --------------------------------------------------------------------- + # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock + # but not ExtensionBlock + + def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray: + """ + For compatibility with 1D-only ExtensionArrays. + """ + return arg + + def _unwrap_setitem_indexer(self, indexer): + """ + For compatibility with 1D-only ExtensionArrays. + """ + return indexer + + # NB: this cannot be made cache_readonly because in mgr.set_values we pin + # new .values that can have different shape GH#42631 + @property + def shape(self) -> Shape: + return self.values.shape + + def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray: + # In the case where we have a tuple[slice, int], the slice will always + # be slice(None) + # Note: only reached with self.ndim == 2 + # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]" + # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type + # "Union[int, integer[Any]]" + return self.values[i] # type: ignore[index] + + def _slice( + self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] + ) -> ArrayLike: + """return a slice of my values""" + + return self.values[slicer] + + def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: + """ + Modify block values in-place with new item value. + + If copy=True, first copy the underlying values in place before modifying + (for Copy-on-Write). + + Notes + ----- + `set_inplace` never creates a new array or new Block, whereas `setitem` + _may_ create a new array and always creates a new Block. + + Caller is responsible for checking values.dtype == self.dtype. + """ + if copy: + self.values = self.values.copy() + self.values[locs] = values + + def take_nd( + self, + indexer: npt.NDArray[np.intp], + axis: int, + new_mgr_locs: BlockPlacement | None = None, + fill_value=lib.no_default, + ) -> Block: + """ + Take values according to indexer and return them as a block. + """ + values = self.values + + if fill_value is lib.no_default: + fill_value = self.fill_value + allow_fill = False + else: + allow_fill = True + + # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype + new_values = algos.take_nd( + values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value + ) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (axis == 0 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self._mgr_locs + + if not is_dtype_equal(new_values.dtype, self.dtype): + return self.make_block(new_values, new_mgr_locs) + else: + return self.make_block_same_class(new_values, new_mgr_locs) + + def _unstack( + self, + unstacker, + fill_value, + new_placement: npt.NDArray[np.intp], + needs_masking: npt.NDArray[np.bool_], + ): + """ + Return a list of unstacked blocks of self + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : int + Only used in ExtensionBlock._unstack + new_placement : np.ndarray[np.intp] + allow_fill : bool + needs_masking : np.ndarray[bool] + + Returns + ------- + blocks : list of Block + New blocks of unstacked values. + mask : array-like of bool + The mask of columns of `blocks` we should keep. + """ + new_values, mask = unstacker.get_new_values( + self.values.T, fill_value=fill_value + ) + + mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? + + # Note: these next two lines ensure that + # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) + # which the calling function needs in order to pass verify_integrity=False + # to the BlockManager constructor + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + bp = BlockPlacement(new_placement) + blocks = [new_block_2d(new_values, placement=bp)] + return blocks, mask + + # --------------------------------------------------------------------- + + def setitem(self, indexer, value) -> Block: + """ + Attempt self.values[indexer] = value, possibly creating a new array. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice, int + The subset of self.values to set + value : object + The value being set + + Returns + ------- + Block + + Notes + ----- + `indexer` is a direct slice/positional indexer. `value` must + be a compatible shape. + """ + + value = self._standardize_fill_value(value) + + values = cast(np.ndarray, self.values) + if self.ndim == 2: + values = values.T + + # length checking + check_setitem_lengths(indexer, value, values) + + value = extract_array(value, extract_numpy=True) + try: + casted = np_can_hold_element(values.dtype, value) + except LossySetitemError: + # current dtype cannot store value, coerce to common dtype + nb = self.coerce_to_target_dtype(value) + return nb.setitem(indexer, value) + else: + if self.dtype == _dtype_obj: + # TODO: avoid having to construct values[indexer] + vi = values[indexer] + if lib.is_list_like(vi): + # checking lib.is_scalar here fails on + # test_iloc_setitem_custom_object + casted = setitem_datetimelike_compat(values, len(vi), casted) + values[indexer] = casted + return self + + def putmask(self, mask, new) -> list[Block]: + """ + putmask the data to the block; it is possible that we may create a + new dtype of block + + Return the resulting block(s). + + Parameters + ---------- + mask : np.ndarray[bool], SparseArray[bool], or BooleanArray + new : a ndarray/object + + Returns + ------- + List[Block] + """ + orig_mask = mask + values = cast(np.ndarray, self.values) + mask, noop = validate_putmask(values.T, mask) + assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) + + if new is lib.no_default: + new = self.fill_value + + new = self._standardize_fill_value(new) + new = extract_array(new, extract_numpy=True) + + if noop: + return [self] + + try: + casted = np_can_hold_element(values.dtype, new) + putmask_without_repeat(values.T, mask, casted) + return [self] + except LossySetitemError: + + if self.ndim == 1 or self.shape[0] == 1: + # no need to split columns + + if not is_list_like(new): + # using just new[indexer] can't save us the need to cast + return self.coerce_to_target_dtype(new).putmask(mask, new) + else: + indexer = mask.nonzero()[0] + nb = self.setitem(indexer, new[indexer]) + return [nb] + + else: + is_array = isinstance(new, np.ndarray) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = new + if is_array: + # we have a different value per-column + n = new[:, i : i + 1] + + submask = orig_mask[:, i : i + 1] + rbs = nb.putmask(submask, n) + res_blocks.extend(rbs) + return res_blocks + + def where(self, other, cond, _downcast="infer") -> list[Block]: + """ + evaluate the block; return result block(s) from the result + + Parameters + ---------- + other : a ndarray/object + cond : np.ndarray[bool], SparseArray[bool], or BooleanArray + _downcast : str or None, default "infer" + Private because we only specify it when calling from fillna. + + Returns + ------- + List[Block] + """ + assert cond.ndim == self.ndim + assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) + + transpose = self.ndim == 2 + + cond = extract_bool_array(cond) + + # EABlocks override where + values = cast(np.ndarray, self.values) + orig_other = other + if transpose: + values = values.T + + icond, noop = validate_putmask(values, ~cond) + if noop: + # GH-39595: Always return a copy; short-circuit up/downcasting + return [self.copy()] + + if other is lib.no_default: + other = self.fill_value + + other = self._standardize_fill_value(other) + + try: + # try/except here is equivalent to a self._can_hold_element check, + # but this gets us back 'casted' which we will re-use below; + # without using 'casted', expressions.where may do unwanted upcasts. + casted = np_can_hold_element(values.dtype, other) + except (ValueError, TypeError, LossySetitemError): + # we cannot coerce, return a compat dtype + + if self.ndim == 1 or self.shape[0] == 1: + # no need to split columns + + block = self.coerce_to_target_dtype(other) + blocks = block.where(orig_other, cond) + return self._maybe_downcast(blocks, downcast=_downcast) + + else: + # since _maybe_downcast would split blocks anyway, we + # can avoid some potential upcast/downcast by splitting + # on the front end. + is_array = isinstance(other, (np.ndarray, ExtensionArray)) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + oth = other + if is_array: + # we have a different value per-column + oth = other[:, i : i + 1] + + submask = cond[:, i : i + 1] + rbs = nb.where(oth, submask, _downcast=_downcast) + res_blocks.extend(rbs) + return res_blocks + + else: + other = casted + alt = setitem_datetimelike_compat(values, icond.sum(), other) + if alt is not other: + if is_list_like(other) and len(other) < len(values): + # call np.where with other to get the appropriate ValueError + np.where(~icond, values, other) + raise NotImplementedError( + "This should not be reached; call to np.where above is " + "expected to raise ValueError. Please report a bug at " + "github.com/pandas-dev/pandas" + ) + result = values.copy() + np.putmask(result, icond, alt) + else: + # By the time we get here, we should have all Series/Index + # args extracted to ndarray + if ( + is_list_like(other) + and not isinstance(other, np.ndarray) + and len(other) == self.shape[-1] + ): + # If we don't do this broadcasting here, then expressions.where + # will broadcast a 1D other to be row-like instead of + # column-like. + other = np.array(other).reshape(values.shape) + # If lengths don't match (or len(other)==1), we will raise + # inside expressions.where, see test_series_where + + # Note: expressions.where may upcast. + result = expressions.where(~icond, values, other) + # The np_can_hold_element check _should_ ensure that we always + # have result.dtype == self.dtype here. + + if transpose: + result = result.T + + return [self.make_block(result)] + + def fillna( + self, value, limit: int | None = None, inplace: bool = False, downcast=None + ) -> list[Block]: + """ + fillna on the block with the value. If we fail, then convert to + ObjectBlock and try again + """ + # Caller is responsible for validating limit; if int it is strictly positive + inplace = validate_bool_kwarg(inplace, "inplace") + + if not self._can_hold_na: + # can short-circuit the isna call + noop = True + else: + mask = isna(self.values) + mask, noop = validate_putmask(self.values, mask) + + if noop: + # we can't process the value, but nothing to do + if inplace: + # Arbitrarily imposing the convention that we ignore downcast + # on no-op when inplace=True + return [self] + else: + # GH#45423 consistent downcasting on no-ops. + nb = self.copy() + nbs = nb._maybe_downcast([nb], downcast=downcast) + return nbs + + if limit is not None: + mask[mask.cumsum(self.ndim - 1) > limit] = False + + if inplace: + nbs = self.putmask(mask.T, value) + else: + # without _downcast, we would break + # test_fillna_dtype_conversion_equiv_replace + nbs = self.where(value, ~mask.T, _downcast=False) + + # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) + # makes a difference bc blk may have object dtype, which has + # different behavior in _maybe_downcast. + return extend_blocks( + [blk._maybe_downcast([blk], downcast=downcast) for blk in nbs] + ) + + def interpolate( + self, + method: str = "pad", + axis: int = 0, + index: Index | None = None, + inplace: bool = False, + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + downcast: str | None = None, + **kwargs, + ) -> list[Block]: + + inplace = validate_bool_kwarg(inplace, "inplace") + + if not self._can_hold_na: + # If there are no NAs, then interpolate is a no-op + return [self] if inplace else [self.copy()] + + try: + m = missing.clean_fill_method(method) + except ValueError: + m = None + if m is None and self.dtype.kind != "f": + # only deal with floats + # bc we already checked that can_hold_na, we dont have int dtype here + # test_interp_basic checks that we make a copy here + return [self] if inplace else [self.copy()] + + if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0: + # split improves performance in ndarray.copy() + return self.split_and_operate( + type(self).interpolate, + method, + axis, + index, + inplace, + limit, + limit_direction, + limit_area, + fill_value, + downcast, + **kwargs, + ) + + data = self.values if inplace else self.values.copy() + data = cast(np.ndarray, data) # bc overridden by ExtensionBlock + + missing.interpolate_array_2d( + data, + method=method, + axis=axis, + index=index, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + **kwargs, + ) + + nb = self.make_block_same_class(data) + return nb._maybe_downcast([nb], downcast) + + def diff(self, n: int, axis: int = 1) -> list[Block]: + """return block for the diff of the values""" + new_values = algos.diff(self.values, n, axis=axis) + return [self.make_block(values=new_values)] + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + """shift the block by periods, possibly upcast""" + # convert integer to float if necessary. need to do a lot more than + # that, handle boolean etc also + + # Note: periods is never 0 here, as that is handled at the top of + # NDFrame.shift. If that ever changes, we can do a check for periods=0 + # and possibly avoid coercing. + + if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj: + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + # see test_shift_object_non_scalar_fill + raise ValueError("fill_value must be a scalar") + + fill_value = self._standardize_fill_value(fill_value) + + try: + # error: Argument 1 to "np_can_hold_element" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" + casted = np_can_hold_element( + self.dtype, fill_value # type: ignore[arg-type] + ) + except LossySetitemError: + nb = self.coerce_to_target_dtype(fill_value) + return nb.shift(periods, axis=axis, fill_value=fill_value) + + else: + values = cast(np.ndarray, self.values) + new_values = shift(values, periods, axis, casted) + return [self.make_block(new_values)] + + @final + def quantile( + self, qs: Float64Index, interpolation="linear", axis: int = 0 + ) -> Block: + """ + compute the quantiles of the + + Parameters + ---------- + qs : Float64Index + List of the quantiles to be computed. + interpolation : str, default 'linear' + Type of interpolation. + axis : int, default 0 + Axis to compute. + + Returns + ------- + Block + """ + # We should always have ndim == 2 because Series dispatches to DataFrame + assert self.ndim == 2 + assert axis == 1 # only ever called this way + assert is_list_like(qs) # caller is responsible for this + + result = quantile_compat(self.values, np.asarray(qs._values), interpolation) + # ensure_block_shape needed for cases where we start with EA and result + # is ndarray, e.g. IntegerArray, SparseArray + result = ensure_block_shape(result, ndim=2) + return new_block_2d(result, placement=self._mgr_locs) + + # --------------------------------------------------------------------- + # Abstract Methods Overridden By EABackedBlock and NumpyBlock + + def delete(self, loc) -> Block: + """ + Return a new Block with the given loc(s) deleted. + """ + raise AbstractMethodError(self) + + @property + def is_view(self) -> bool: + """return a boolean if I am possibly a view""" + raise AbstractMethodError(self) + + @property + def array_values(self) -> ExtensionArray: + """ + The array that Series.array returns. Always an ExtensionArray. + """ + raise AbstractMethodError(self) + + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: + """ + return an internal format, currently just the ndarray + this is often overridden to handle to_dense like operations + """ + raise AbstractMethodError(self) + + def values_for_json(self) -> np.ndarray: + raise AbstractMethodError(self) + + +class EABackedBlock(Block): + """ + Mixin for Block subclasses backed by ExtensionArray. + """ + + values: ExtensionArray + + def setitem(self, indexer, value): + """ + Attempt self.values[indexer] = value, possibly creating a new array. + + This differs from Block.setitem by not allowing setitem to change + the dtype of the Block. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice, int + The subset of self.values to set + value : object + The value being set + + Returns + ------- + Block + + Notes + ----- + `indexer` is a direct slice/positional indexer. `value` must + be a compatible shape. + """ + orig_indexer = indexer + orig_value = value + + indexer = self._unwrap_setitem_indexer(indexer) + value = self._maybe_squeeze_arg(value) + + values = self.values + if values.ndim == 2: + # TODO(GH#45419): string[pyarrow] tests break if we transpose + # unconditionally + values = values.T + check_setitem_lengths(indexer, value, values) + + try: + values[indexer] = value + except (ValueError, TypeError) as err: + _catch_deprecated_value_error(err) + + if is_interval_dtype(self.dtype): + # see TestSetitemFloatIntervalWithIntIntervalValues + nb = self.coerce_to_target_dtype(orig_value) + return nb.setitem(orig_indexer, orig_value) + + elif isinstance(self, NDArrayBackedExtensionBlock): + nb = self.coerce_to_target_dtype(orig_value) + return nb.setitem(orig_indexer, orig_value) + + else: + raise + + else: + return self + + def where(self, other, cond, _downcast="infer") -> list[Block]: + # _downcast private bc we only specify it when calling from fillna + arr = self.values.T + + cond = extract_bool_array(cond) + + orig_other = other + orig_cond = cond + other = self._maybe_squeeze_arg(other) + cond = self._maybe_squeeze_arg(cond) + + if other is lib.no_default: + other = self.fill_value + + icond, noop = validate_putmask(arr, ~cond) + if noop: + # GH#44181, GH#45135 + # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast + return [self.copy()] + + try: + res_values = arr._where(cond, other).T + except (ValueError, TypeError) as err: + _catch_deprecated_value_error(err) + + if self.ndim == 1 or self.shape[0] == 1: + + if is_interval_dtype(self.dtype): + # TestSetitemFloatIntervalWithIntIntervalValues + blk = self.coerce_to_target_dtype(orig_other) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast) + + elif isinstance(self, NDArrayBackedExtensionBlock): + # NB: not (yet) the same as + # isinstance(values, NDArrayBackedExtensionArray) + blk = self.coerce_to_target_dtype(orig_other) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast) + + else: + raise + + else: + # Same pattern we use in Block.putmask + is_array = isinstance(orig_other, (np.ndarray, ExtensionArray)) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = orig_other + if is_array: + # we have a different value per-column + n = orig_other[:, i : i + 1] + + submask = orig_cond[:, i : i + 1] + rbs = nb.where(n, submask) + res_blocks.extend(rbs) + return res_blocks + + nb = self.make_block_same_class(res_values) + return [nb] + + def putmask(self, mask, new) -> list[Block]: + """ + See Block.putmask.__doc__ + """ + mask = extract_bool_array(mask) + + values = self.values + if values.ndim == 2: + values = values.T + + orig_new = new + orig_mask = mask + new = self._maybe_squeeze_arg(new) + mask = self._maybe_squeeze_arg(mask) + + if not mask.any(): + return [self] + + try: + # Caller is responsible for ensuring matching lengths + values._putmask(mask, new) + except (TypeError, ValueError) as err: + _catch_deprecated_value_error(err) + + if self.ndim == 1 or self.shape[0] == 1: + + if is_interval_dtype(self.dtype): + # Discussion about what we want to support in the general + # case GH#39584 + blk = self.coerce_to_target_dtype(orig_new) + return blk.putmask(orig_mask, orig_new) + + elif isinstance(self, NDArrayBackedExtensionBlock): + # NB: not (yet) the same as + # isinstance(values, NDArrayBackedExtensionArray) + blk = self.coerce_to_target_dtype(orig_new) + return blk.putmask(orig_mask, orig_new) + + else: + raise + + else: + # Same pattern we use in Block.putmask + is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = orig_new + if is_array: + # we have a different value per-column + n = orig_new[:, i : i + 1] + + submask = orig_mask[:, i : i + 1] + rbs = nb.putmask(submask, n) + res_blocks.extend(rbs) + return res_blocks + + return [self] + + def fillna( + self, value, limit: int | None = None, inplace: bool = False, downcast=None + ) -> list[Block]: + # Caller is responsible for validating limit; if int it is strictly positive + + if self.dtype.kind == "m": + try: + res_values = self.values.fillna(value, limit=limit) + except (ValueError, TypeError): + # GH#45746 + warnings.warn( + "The behavior of fillna with timedelta64[ns] dtype and " + f"an incompatible value ({type(value)}) is deprecated. " + "In a future version, this will cast to a common dtype " + "(usually object) instead of raising, matching the " + "behavior of other dtypes.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise + else: + res_blk = self.make_block(res_values) + return [res_blk] + + # TODO: since this now dispatches to super, which in turn dispatches + # to putmask, it may *actually* respect 'inplace=True'. If so, add + # tests for this. + return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast) + + def delete(self, loc) -> Block: + # This will be unnecessary if/when __array_function__ is implemented + values = self.values.delete(loc) + mgr_locs = self._mgr_locs.delete(loc) + return type(self)(values, placement=mgr_locs, ndim=self.ndim) + + @cache_readonly + def array_values(self) -> ExtensionArray: + return self.values + + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: + """ + return object dtype as boxed values, such as Timestamps/Timedelta + """ + values: ArrayLike = self.values + if dtype == _dtype_obj: + values = values.astype(object) + # TODO(EA2D): reshape not needed with 2D EAs + return np.asarray(values).reshape(self.shape) + + def values_for_json(self) -> np.ndarray: + return np.asarray(self.values) + + def interpolate( + self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs + ): + values = self.values + if values.ndim == 2 and axis == 0: + # NDArrayBackedExtensionArray.fillna assumes axis=1 + new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T + else: + new_values = values.fillna(value=fill_value, method=method, limit=limit) + return self.make_block_same_class(new_values) + + +class ExtensionBlock(libinternals.Block, EABackedBlock): + """ + Block for holding extension types. + + Notes + ----- + This holds all 3rd-party extension array types. It's also the immediate + parent class for our internal extension types' blocks, CategoricalBlock. + + ExtensionArrays are limited to 1-D. + """ + + _can_consolidate = False + _validate_ndim = False + is_extension = True + + values: ExtensionArray + + @cache_readonly + def shape(self) -> Shape: + # TODO(EA2D): override unnecessary with 2D EAs + if self.ndim == 1: + return (len(self.values),) + return len(self._mgr_locs), len(self.values) + + def iget(self, i: int | tuple[int, int] | tuple[slice, int]): + # In the case where we have a tuple[slice, int], the slice will always + # be slice(None) + # We _could_ make the annotation more specific, but mypy would + # complain about override mismatch: + # Literal[0] | tuple[Literal[0], int] | tuple[slice, int] + + # Note: only reached with self.ndim == 2 + + if isinstance(i, tuple): + # TODO(EA2D): unnecessary with 2D EAs + col, loc = i + if not com.is_null_slice(col) and col != 0: + raise IndexError(f"{self} only contains one item") + elif isinstance(col, slice): + # the is_null_slice check above assures that col is slice(None) + # so what we want is a view on all our columns and row loc + if loc < 0: + loc += len(self.values) + # Note: loc:loc+1 vs [[loc]] makes a difference when called + # from fast_xs because we want to get a view back. + return self.values[loc : loc + 1] + return self.values[loc] + else: + if i != 0: + raise IndexError(f"{self} only contains one item") + return self.values + + def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: + # When an ndarray, we should have locs.tolist() == [0] + # When a BlockPlacement we should have list(locs) == [0] + if copy: + self.values = self.values.copy() + self.values[:] = values + + def _maybe_squeeze_arg(self, arg): + """ + If necessary, squeeze a (N, 1) ndarray to (N,) + """ + # e.g. if we are passed a 2D mask for putmask + if ( + isinstance(arg, (np.ndarray, ExtensionArray)) + and arg.ndim == self.values.ndim + 1 + ): + # TODO(EA2D): unnecessary with 2D EAs + assert arg.shape[1] == 1 + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + arg = arg[:, 0] # type: ignore[call-overload] + elif isinstance(arg, ABCDataFrame): + # 2022-01-06 only reached for setitem + # TODO: should we avoid getting here with DataFrame? + assert arg.shape[1] == 1 + arg = arg._ixs(0, axis=1)._values + + return arg + + def _unwrap_setitem_indexer(self, indexer): + """ + Adapt a 2D-indexer to our 1D values. + + This is intended for 'setitem', not 'iget' or '_slice'. + """ + # TODO: ATM this doesn't work for iget/_slice, can we change that? + + if isinstance(indexer, tuple): + # TODO(EA2D): not needed with 2D EAs + # Should never have length > 2. Caller is responsible for checking. + # Length 1 is reached vis setitem_single_block and setitem_single_column + # each of which pass indexer=(pi,) + if len(indexer) == 2: + + if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): + # GH#44703 went through indexing.maybe_convert_ix + first, second = indexer + if not ( + second.size == 1 and (second == 0).all() and first.shape[1] == 1 + ): + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas/" + ) + indexer = first[:, 0] + + elif lib.is_integer(indexer[1]) and indexer[1] == 0: + # reached via setitem_single_block passing the whole indexer + indexer = indexer[0] + + elif com.is_null_slice(indexer[1]): + indexer = indexer[0] + + elif is_list_like(indexer[1]) and indexer[1][0] == 0: + indexer = indexer[0] + + else: + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas/" + ) + return indexer + + @property + def is_view(self) -> bool: + """Extension arrays are never treated as views.""" + return False + + @cache_readonly + def is_numeric(self): + return self.values.dtype._is_numeric + + def take_nd( + self, + indexer: npt.NDArray[np.intp], + axis: int = 0, + new_mgr_locs: BlockPlacement | None = None, + fill_value=lib.no_default, + ) -> Block: + """ + Take values according to indexer and return them as a block. + """ + if fill_value is lib.no_default: + fill_value = None + + # TODO(EA2D): special case not needed with 2D EAs + # axis doesn't matter; we are really a single-dim object + # but are passed the axis depending on the calling routing + # if its REALLY axis 0, then this will be a reindex and not a take + new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (self.ndim == 1 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self._mgr_locs + + return self.make_block_same_class(new_values, new_mgr_locs) + + def _slice( + self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] + ) -> ExtensionArray: + """ + Return a slice of my values. + + Parameters + ---------- + slicer : slice, ndarray[int], or ndarray[bool] + Valid (non-reducing) indexer for self.values. + + Returns + ------- + ExtensionArray + """ + # Notes: ndarray[bool] is only reachable when via getitem_mgr, which + # is only for Series, i.e. self.ndim == 1. + + # return same dims as we currently have + if self.ndim == 2: + # reached via getitem_block via _slice_take_blocks_ax0 + # TODO(EA2D): won't be necessary with 2D EAs + + if not isinstance(slicer, slice): + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", slicer + ) + # GH#32959 only full-slicers along fake-dim0 are valid + # TODO(EA2D): won't be necessary with 2D EAs + # range(1) instead of self._mgr_locs to avoid exception on [::-1] + # see test_iloc_getitem_slice_negative_step_ea_block + new_locs = range(1)[slicer] + if not len(new_locs): + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", slicer + ) + slicer = slice(None) + + return self.values[slicer] + + @final + def getitem_block_index(self, slicer: slice) -> ExtensionBlock: + """ + Perform __getitem__-like specialized to slicing along index. + """ + # GH#42787 in principle this is equivalent to values[..., slicer], but we don't + # require subclasses of ExtensionArray to support that form (for now). + new_values = self.values[slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + def diff(self, n: int, axis: int = 1) -> list[Block]: + if axis == 0 and n != 0: + # n==0 case will be a no-op so let is fall through + # Since we only have one column, the result will be all-NA. + # Create this result by shifting along axis=0 past the length of + # our values. + return super().diff(len(self.values), axis=0) + if axis == 1: + # TODO(EA2D): unnecessary with 2D EAs + # we are by definition 1D. + axis = 0 + return super().diff(n, axis) + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + """ + Shift the block by `periods`. + + Dispatches to underlying ExtensionArray and re-boxes in an + ExtensionBlock. + """ + new_values = self.values.shift(periods=periods, fill_value=fill_value) + return [self.make_block_same_class(new_values)] + + def _unstack( + self, + unstacker, + fill_value, + new_placement: npt.NDArray[np.intp], + needs_masking: npt.NDArray[np.bool_], + ): + # ExtensionArray-safe unstack. + # We override ObjectBlock._unstack, which unstacks directly on the + # values of the array. For EA-backed blocks, this would require + # converting to a 2-D ndarray of objects. + # Instead, we unstack an ndarray of integer positions, followed by + # a `take` on the actual values. + + # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index) + new_values, mask = unstacker.arange_result + + # Note: these next two lines ensure that + # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) + # which the calling function needs in order to pass verify_integrity=False + # to the BlockManager constructor + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + # needs_masking[i] calculated once in BlockManager.unstack tells + # us if there are any -1s in the relevant indices. When False, + # that allows us to go through a faster path in 'take', among + # other things avoiding e.g. Categorical._validate_scalar. + blocks = [ + # TODO: could cast to object depending on fill_value? + type(self)( + self.values.take( + indices, allow_fill=needs_masking[i], fill_value=fill_value + ), + BlockPlacement(place), + ndim=2, + ) + for i, (indices, place) in enumerate(zip(new_values, new_placement)) + ] + return blocks, mask + + +class NumpyBlock(libinternals.NumpyBlock, Block): + values: np.ndarray + + @property + def is_view(self) -> bool: + """return a boolean if I am possibly a view""" + return self.values.base is not None + + @property + def array_values(self) -> ExtensionArray: + return PandasArray(self.values) + + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: + if dtype == _dtype_obj: + return self.values.astype(_dtype_obj) + return self.values + + def values_for_json(self) -> np.ndarray: + return self.values + + def delete(self, loc) -> Block: + values = np.delete(self.values, loc, 0) + mgr_locs = self._mgr_locs.delete(loc) + return type(self)(values, placement=mgr_locs, ndim=self.ndim) + + +class NumericBlock(NumpyBlock): + __slots__ = () + is_numeric = True + + +class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): + """ + Block backed by an NDArrayBackedExtensionArray + """ + + values: NDArrayBackedExtensionArray + + # error: Signature of "is_extension" incompatible with supertype "Block" + @cache_readonly + def is_extension(self) -> bool: # type: ignore[override] + # i.e. datetime64tz, PeriodDtype + return not isinstance(self.dtype, np.dtype) + + @property + def is_view(self) -> bool: + """return a boolean if I am possibly a view""" + # check the ndarray values of the DatetimeIndex values + return self.values._ndarray.base is not None + + def diff(self, n: int, axis: int = 0) -> list[Block]: + """ + 1st discrete difference. + + Parameters + ---------- + n : int + Number of periods to diff. + axis : int, default 0 + Axis to diff upon. + + Returns + ------- + A list with a new Block. + + Notes + ----- + The arguments here are mimicking shift so they are called correctly + by apply. + """ + values = self.values + + new_values = values - values.shift(n, axis=axis) + return [self.make_block(new_values)] + + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + values = self.values + new_values = values.shift(periods, fill_value=fill_value, axis=axis) + return [self.make_block_same_class(new_values)] + + +def _catch_deprecated_value_error(err: Exception) -> None: + """ + We catch ValueError for now, but only a specific one raised by DatetimeArray + which will no longer be raised in version.2.0. + """ + if isinstance(err, ValueError): + # TODO(2.0): once DTA._validate_setitem_value deprecation + # is enforced, stop catching ValueError here altogether + if isinstance(err, IncompatibleFrequency): + pass + elif "'value.closed' is" in str(err): + # IntervalDtype mismatched 'closed' + pass + elif "Timezones don't match" not in str(err): + raise + + +class DatetimeLikeBlock(NDArrayBackedExtensionBlock): + """Block for datetime64[ns], timedelta64[ns].""" + + __slots__ = () + is_numeric = False + values: DatetimeArray | TimedeltaArray + + def values_for_json(self) -> np.ndarray: + return self.values._ndarray + + +class DatetimeTZBlock(DatetimeLikeBlock): + """implement a datetime64 block with a tz attribute""" + + values: DatetimeArray + + __slots__ = () + is_extension = True + _validate_ndim = True + _can_consolidate = False + + # Don't use values_for_json from DatetimeLikeBlock since it is + # an invalid optimization here(drop the tz) + values_for_json = NDArrayBackedExtensionBlock.values_for_json + + +class ObjectBlock(NumpyBlock): + __slots__ = () + is_object = True + + @maybe_split + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: + """ + For object-dtype, we operate column-wise. + """ + assert self.ndim == 2 + + try: + res = func(self.values) + except TypeError: + if not ignore_failures: + raise + return [] + + assert isinstance(res, np.ndarray) + assert res.ndim == 1 + res = res.reshape(1, -1) + return [self.make_block_same_class(res)] + + @maybe_split + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> list[Block]: + """ + attempt to cast any object types to better types return a copy of + the block (if copy = True) by definition we ARE an ObjectBlock!!!!! + """ + values = self.values + if values.ndim == 2: + # maybe_split ensures we only get here with values.shape[0] == 1, + # avoid doing .ravel as that might make a copy + values = values[0] + + res_values = soft_convert_objects( + values, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=copy, + ) + res_values = ensure_block_shape(res_values, self.ndim) + return [self.make_block(res_values)] + + +class CategoricalBlock(ExtensionBlock): + # this Block type is kept for backwards-compatibility + __slots__ = () + + # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0, + # so this cannot be cached + @property + def dtype(self) -> DtypeObj: + return self.values.dtype + + +# ----------------------------------------------------------------- +# Constructor Helpers + + +def maybe_coerce_values(values: ArrayLike) -> ArrayLike: + """ + Input validation for values passed to __init__. Ensure that + any datetime64/timedelta64 dtypes are in nanoseconds. Ensure + that we do not have string dtypes. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + + Returns + ------- + values : np.ndarray or ExtensionArray + """ + # Caller is responsible for ensuring PandasArray is already extracted. + + if isinstance(values, np.ndarray): + values = ensure_wrapped_if_datetimelike(values) + + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None: + # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame + values = values._with_freq(None) + + return values + + +def get_block_type(dtype: DtypeObj): + """ + Find the appropriate Block subclass to use for the given values and dtype. + + Parameters + ---------- + dtype : numpy or pandas dtype + + Returns + ------- + cls : class, subclass of Block + """ + # We use vtype and kind checks because they are much more performant + # than is_foo_dtype + vtype = dtype.type + kind = dtype.kind + + cls: type[Block] + + if isinstance(dtype, SparseDtype): + # Need this first(ish) so that Sparse[datetime] is sparse + cls = ExtensionBlock + elif isinstance(dtype, CategoricalDtype): + cls = CategoricalBlock + elif vtype is Timestamp: + cls = DatetimeTZBlock + elif isinstance(dtype, PeriodDtype): + cls = NDArrayBackedExtensionBlock + elif isinstance(dtype, ExtensionDtype): + # Note: need to be sure PandasArray is unwrapped before we get here + cls = ExtensionBlock + + elif kind in ["M", "m"]: + cls = DatetimeLikeBlock + elif kind in ["f", "c", "i", "u", "b"]: + cls = NumericBlock + else: + cls = ObjectBlock + return cls + + +def new_block_2d(values: ArrayLike, placement: BlockPlacement): + # new_block specialized to case with + # ndim=2 + # isinstance(placement, BlockPlacement) + # check_ndim/ensure_block_shape already checked + klass = get_block_type(values.dtype) + + values = maybe_coerce_values(values) + return klass(values, ndim=2, placement=placement) + + +def new_block(values, placement, *, ndim: int) -> Block: + # caller is responsible for ensuring values is NOT a PandasArray + + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + check_ndim(values, placement, ndim) + + klass = get_block_type(values.dtype) + + values = maybe_coerce_values(values) + return klass(values, ndim=ndim, placement=placement) + + +def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: + """ + ndim inference and validation. + + Validates that values.ndim and ndim are consistent. + Validates that len(values) and len(placement) are consistent. + + Parameters + ---------- + values : array-like + placement : BlockPlacement + ndim : int + + Raises + ------ + ValueError : the number of dimensions do not match + """ + + if values.ndim > ndim: + # Check for both np.ndarray and ExtensionArray + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim > ndim [{values.ndim} > {ndim}]" + ) + + elif not is_1d_only_ea_dtype(values.dtype): + # TODO(EA2D): special case not needed with 2D EAs + if values.ndim != ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + if len(placement) != len(values): + raise ValueError( + f"Wrong number of items passed {len(values)}, " + f"placement implies {len(placement)}" + ) + elif ndim == 2 and len(placement) != 1: + # TODO(EA2D): special case unnecessary with 2D EAs + raise ValueError("need to split") + + +def extract_pandas_array( + values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int +) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: + """ + Ensure that we don't allow PandasArray / PandasDtype in internals. + """ + # For now, blocks should be backed by ndarrays when possible. + if isinstance(values, ABCPandasArray): + values = values.to_numpy() + if ndim and ndim > 1: + # TODO(EA2D): special case not needed with 2D EAs + values = np.atleast_2d(values) + + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + + return values, dtype + + +# ----------------------------------------------------------------- + + +def extend_blocks(result, blocks=None) -> list[Block]: + """return a new extended blocks, given the result""" + if blocks is None: + blocks = [] + if isinstance(result, list): + for r in result: + if isinstance(r, list): + blocks.extend(r) + else: + blocks.append(r) + else: + assert isinstance(result, Block), type(result) + blocks.append(result) + return blocks + + +def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: + """ + Reshape if possible to have values.ndim == ndim. + """ + + if values.ndim < ndim: + if not is_1d_only_ea_dtype(values.dtype): + # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 + # block.shape is incorrect for "2D" ExtensionArrays + # We can't, and don't need to, reshape. + values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) + values = values.reshape(1, -1) + + return values + + +def to_native_types( + values: ArrayLike, + *, + na_rep="nan", + quoting=None, + float_format=None, + decimal=".", + **kwargs, +) -> np.ndarray: + """convert to our native types format""" + if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": + # GH#40754 Convert categorical datetimes to datetime array + values = algos.take_nd( + values.categories._values, + ensure_platform_int(values._codes), + fill_value=na_rep, + ) + + values = ensure_wrapped_if_datetimelike(values) + + if isinstance(values, (DatetimeArray, TimedeltaArray)): + if values.ndim == 1: + result = values._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) + return result + + # GH#21734 Process every column separately, they might have different formats + results_converted = [] + for i in range(len(values)): + result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) + results_converted.append(result.astype(object, copy=False)) + return np.vstack(results_converted) + + elif values.dtype.kind == "f" and not is_sparse(values): + # see GH#13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == ".": + mask = isna(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype="object") + + values[mask] = na_rep + values = values.astype(object, copy=False) + return values + + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + res = formatter.get_result_as_array() + res = res.astype(object, copy=False) + return res + + elif isinstance(values, ExtensionArray): + mask = isna(values) + + new_values = np.asarray(values.astype(object)) + new_values[mask] = na_rep + return new_values + + else: + + mask = isna(values) + itemsize = writers.word_len(na_rep) + + if values.dtype != _dtype_obj and not quoting and itemsize: + values = values.astype(str) + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: + # enlarge for the na_rep + values = values.astype(f" ArrayLike: + """ + The array that Series.values returns (public attribute). + + This has some historical constraints, and is overridden in block + subclasses to return the correct array (e.g. period returns + object ndarray and datetimetz a datetime64[ns] ndarray instead of + proper extension array). + """ + if isinstance(values, (PeriodArray, IntervalArray)): + return values.astype(object) + elif isinstance(values, (DatetimeArray, TimedeltaArray)): + # NB: for datetime64tz this is different from np.asarray(values), since + # that returns an object-dtype ndarray of Timestamps. + # Avoid FutureWarning in .astype in casting from dt64tz to dt64 + return values._data + else: + return values diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py new file mode 100644 index 00000000..dafc437d --- /dev/null +++ b/pandas/core/internals/concat.py @@ -0,0 +1,765 @@ +from __future__ import annotations + +import copy +import itertools +from typing import ( + TYPE_CHECKING, + Sequence, + cast, +) + +import numpy as np + +from pandas._libs import ( + NaT, + internals as libinternals, +) +from pandas._libs.missing import NA +from pandas._typing import ( + ArrayLike, + DtypeObj, + Manager, + Shape, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import ( + ensure_dtype_can_hold_na, + find_common_type, +) +from pandas.core.dtypes.common import ( + is_1d_only_ea_dtype, + is_dtype_equal, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.concat import ( + cast_to_common_type, + concat_compat, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + isna_all, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, +) +from pandas.core.arrays.sparse import SparseDtype +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.internals.array_manager import ( + ArrayManager, + NullArrayProxy, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block_2d, +) +from pandas.core.internals.managers import BlockManager + +if TYPE_CHECKING: + from pandas import Index + from pandas.core.internals.blocks import Block + + +def _concatenate_array_managers( + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool +) -> Manager: + """ + Concatenate array managers into one. + + Parameters + ---------- + mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples + axes : list of Index + concat_axis : int + copy : bool + + Returns + ------- + ArrayManager + """ + # reindex all arrays + mgrs = [] + for mgr, indexers in mgrs_indexers: + axis1_made_copy = False + for ax, indexer in indexers.items(): + mgr = mgr.reindex_indexer( + axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True + ) + if ax == 1 and indexer is not None: + axis1_made_copy = True + if copy and concat_axis == 0 and not axis1_made_copy: + # for concat_axis 1 we will always get a copy through concat_arrays + mgr = mgr.copy() + mgrs.append(mgr) + + if concat_axis == 1: + # concatting along the rows -> concat the reindexed arrays + # TODO(ArrayManager) doesn't yet preserve the correct dtype + arrays = [ + concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) + for j in range(len(mgrs[0].arrays)) + ] + else: + # concatting along the columns -> combine reindexed arrays in a single manager + assert concat_axis == 0 + arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) + + new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False) + return new_mgr + + +def concat_arrays(to_concat: list) -> ArrayLike: + """ + Alternative for concat_compat but specialized for use in the ArrayManager. + + Differences: only deals with 1D arrays (no axis keyword), assumes + ensure_wrapped_if_datetimelike and does not skip empty arrays to determine + the dtype. + In addition ensures that all NullArrayProxies get replaced with actual + arrays. + + Parameters + ---------- + to_concat : list of arrays + + Returns + ------- + np.ndarray or ExtensionArray + """ + # ignore the all-NA proxies to determine the resulting dtype + to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] + + dtypes = {x.dtype for x in to_concat_no_proxy} + single_dtype = len(dtypes) == 1 + + if single_dtype: + target_dtype = to_concat_no_proxy[0].dtype + elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes): + # GH#42092 + target_dtype = np.find_common_type(list(dtypes), []) + else: + target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) + + to_concat = [ + arr.to_array(target_dtype) + if isinstance(arr, NullArrayProxy) + else cast_to_common_type(arr, target_dtype) + for arr in to_concat + ] + + if isinstance(to_concat[0], ExtensionArray): + cls = type(to_concat[0]) + return cls._concat_same_type(to_concat) + + result = np.concatenate(to_concat) + + # TODO decide on exact behaviour (we shouldn't do this only for empty result) + # see https://github.com/pandas-dev/pandas/issues/39817 + if len(result) == 0: + # all empties -> check for bool to not coerce to float + kinds = {obj.dtype.kind for obj in to_concat_no_proxy} + if len(kinds) != 1: + if "b" in kinds: + result = result.astype(object) + return result + + +def concatenate_managers( + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool +) -> Manager: + """ + Concatenate block managers into one. + + Parameters + ---------- + mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples + axes : list of Index + concat_axis : int + copy : bool + + Returns + ------- + BlockManager + """ + # TODO(ArrayManager) this assumes that all managers are of the same type + if isinstance(mgrs_indexers[0][0], ArrayManager): + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) + + mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) + + concat_plans = [ + _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers + ] + concat_plan = _combine_concat_plans(concat_plans, concat_axis) + blocks = [] + + for placement, join_units in concat_plan: + unit = join_units[0] + blk = unit.block + + if len(join_units) == 1 and not join_units[0].indexers: + values = blk.values + if copy: + values = values.copy() + else: + values = values.view() + fastpath = True + elif _is_uniform_join_units(join_units): + vals = [ju.block.values for ju in join_units] + + if not blk.is_extension: + # _is_uniform_join_units ensures a single dtype, so + # we can use np.concatenate, which is more performant + # than concat_compat + values = np.concatenate(vals, axis=1) + else: + # TODO(EA2D): special-casing not needed with 2D EAs + values = concat_compat(vals, axis=1) + values = ensure_block_shape(values, ndim=2) + + values = ensure_wrapped_if_datetimelike(values) + + fastpath = blk.values.dtype == values.dtype + else: + values = _concatenate_join_units(join_units, concat_axis, copy=copy) + fastpath = False + + if fastpath: + b = blk.make_block_same_class(values, placement=placement) + else: + b = new_block_2d(values, placement=placement) + + blocks.append(b) + + return BlockManager(tuple(blocks), axes) + + +def _maybe_reindex_columns_na_proxy( + axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] +) -> list[tuple[BlockManager, dict[int, np.ndarray]]]: + """ + Reindex along columns so that all of the BlockManagers being concatenated + have matching columns. + + Columns added in this reindexing have dtype=np.void, indicating they + should be ignored when choosing a column's final dtype. + """ + new_mgrs_indexers = [] + for mgr, indexers in mgrs_indexers: + # We only reindex for axis=0 (i.e. columns), as this can be done cheaply + if 0 in indexers: + new_mgr = mgr.reindex_indexer( + axes[0], + indexers[0], + axis=0, + copy=False, + only_slice=True, + allow_dups=True, + use_na_proxy=True, + ) + new_indexers = indexers.copy() + del new_indexers[0] + new_mgrs_indexers.append((new_mgr, new_indexers)) + else: + new_mgrs_indexers.append((mgr, indexers)) + + return new_mgrs_indexers + + +def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): + """ + Construct concatenation plan for given block manager and indexers. + + Parameters + ---------- + mgr : BlockManager + indexers : dict of {axis: indexer} + + Returns + ------- + plan : list of (BlockPlacement, JoinUnit) tuples + + """ + # Calculate post-reindex shape , save for item axis which will be separate + # for each block anyway. + mgr_shape_list = list(mgr.shape) + for ax, indexer in indexers.items(): + mgr_shape_list[ax] = len(indexer) + mgr_shape = tuple(mgr_shape_list) + + assert 0 not in indexers + + if mgr.is_single_block: + blk = mgr.blocks[0] + return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))] + + blknos = mgr.blknos + blklocs = mgr.blklocs + + plan = [] + for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): + + assert placements.is_slice_like + assert blkno != -1 + + join_unit_indexers = indexers.copy() + + shape_list = list(mgr_shape) + shape_list[0] = len(placements) + shape = tuple(shape_list) + + blk = mgr.blocks[blkno] + ax0_blk_indexer = blklocs[placements.indexer] + + unit_no_ax0_reindexing = ( + len(placements) == len(blk.mgr_locs) + and + # Fastpath detection of join unit not + # needing to reindex its block: no ax0 + # reindexing took place and block + # placement was sequential before. + ( + (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1) + or + # Slow-ish detection: all indexer locs + # are sequential (and length match is + # checked above). + (np.diff(ax0_blk_indexer) == 1).all() + ) + ) + + # Omit indexer if no item reindexing is required. + if unit_no_ax0_reindexing: + join_unit_indexers.pop(0, None) + else: + join_unit_indexers[0] = ax0_blk_indexer + + unit = JoinUnit(blk, shape, join_unit_indexers) + + plan.append((placements, unit)) + + return plan + + +class JoinUnit: + def __init__(self, block: Block, shape: Shape, indexers=None): + # Passing shape explicitly is required for cases when block is None. + # Note: block is None implies indexers is None, but not vice-versa + if indexers is None: + indexers = {} + self.block = block + self.indexers = indexers + self.shape = shape + + def __repr__(self) -> str: + return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" + + @cache_readonly + def needs_filling(self) -> bool: + for indexer in self.indexers.values(): + # FIXME: cache results of indexer == -1 checks. + if (indexer == -1).any(): + return True + + return False + + @cache_readonly + def dtype(self) -> DtypeObj: + blk = self.block + if blk.values.dtype.kind == "V": + raise AssertionError("Block is None, no dtype") + + if not self.needs_filling: + return blk.dtype + return ensure_dtype_can_hold_na(blk.dtype) + + def _is_valid_na_for(self, dtype: DtypeObj) -> bool: + """ + Check that we are all-NA of a type/dtype that is compatible with this dtype. + Augments `self.is_na` with an additional check of the type of NA values. + """ + if not self.is_na: + return False + if self.block.dtype.kind == "V": + return True + + if self.dtype == object: + values = self.block.values + return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K")) + + na_value = self.block.fill_value + if na_value is NaT and not is_dtype_equal(self.dtype, dtype): + # e.g. we are dt64 and other is td64 + # fill_values match but we should not cast self.block.values to dtype + # TODO: this will need updating if we ever have non-nano dt64/td64 + return False + + if na_value is NA and needs_i8_conversion(dtype): + # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat + # e.g. self.dtype == "Int64" and dtype is td64, we dont want + # to consider these as matching + return False + + # TODO: better to use can_hold_element? + return is_valid_na_for_dtype(na_value, dtype) + + @cache_readonly + def is_na(self) -> bool: + blk = self.block + if blk.dtype.kind == "V": + return True + + if not blk._can_hold_na: + return False + + values = blk.values + if values.size == 0: + return True + if isinstance(values.dtype, SparseDtype): + return False + + if values.ndim == 1: + # TODO(EA2D): no need for special case with 2D EAs + val = values[0] + if not is_scalar(val) or not isna(val): + # ideally isna_all would do this short-circuiting + return False + return isna_all(values) + else: + val = values[0][0] + if not is_scalar(val) or not isna(val): + # ideally isna_all would do this short-circuiting + return False + return all(isna_all(row) for row in values) + + def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: + values: ArrayLike + + if upcasted_na is None and self.block.dtype.kind != "V": + # No upcasting is necessary + fill_value = self.block.fill_value + values = self.block.get_values() + else: + fill_value = upcasted_na + + if self._is_valid_na_for(empty_dtype): + # note: always holds when self.block.dtype.kind == "V" + blk_dtype = self.block.dtype + + if blk_dtype == np.dtype("object"): + # we want to avoid filling with np.nan if we are + # using None; we already know that we are all + # nulls + values = self.block.values.ravel(order="K") + if len(values) and values[0] is None: + fill_value = None + + if isinstance(empty_dtype, DatetimeTZDtype): + # NB: exclude e.g. pyarrow[dt64tz] dtypes + i8values = np.full(self.shape, fill_value.value) + return DatetimeArray(i8values, dtype=empty_dtype) + + elif is_1d_only_ea_dtype(empty_dtype): + if is_dtype_equal(blk_dtype, empty_dtype) and self.indexers: + # avoid creating new empty array if we already have an array + # with correct dtype that can be reindexed + pass + else: + empty_dtype = cast(ExtensionDtype, empty_dtype) + cls = empty_dtype.construct_array_type() + + missing_arr = cls._from_sequence([], dtype=empty_dtype) + ncols, nrows = self.shape + assert ncols == 1, ncols + empty_arr = -1 * np.ones((nrows,), dtype=np.intp) + return missing_arr.take( + empty_arr, allow_fill=True, fill_value=fill_value + ) + elif isinstance(empty_dtype, ExtensionDtype): + # TODO: no tests get here, a handful would if we disabled + # the dt64tz special-case above (which is faster) + cls = empty_dtype.construct_array_type() + missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype) + missing_arr[:] = fill_value + return missing_arr + else: + # NB: we should never get here with empty_dtype integer or bool; + # if we did, the missing_arr.fill would cast to gibberish + missing_arr = np.empty(self.shape, dtype=empty_dtype) + missing_arr.fill(fill_value) + return missing_arr + + if (not self.indexers) and (not self.block._can_consolidate): + # preserve these for validation in concat_compat + return self.block.values + + if self.block.is_bool: + # External code requested filling/upcasting, bool values must + # be upcasted to object to avoid being upcasted to numeric. + values = self.block.astype(np.dtype("object")).values + else: + # No dtype upcasting is done here, it will be performed during + # concatenation itself. + values = self.block.values + + if not self.indexers: + # If there's no indexing to be done, we want to signal outside + # code that this array must be copied explicitly. This is done + # by returning a view and checking `retval.base`. + values = values.view() + + else: + for ax, indexer in self.indexers.items(): + values = algos.take_nd(values, indexer, axis=ax) + + return values + + +def _concatenate_join_units( + join_units: list[JoinUnit], concat_axis: int, copy: bool +) -> ArrayLike: + """ + Concatenate values from several join units along selected axis. + """ + if concat_axis == 0 and len(join_units) > 1: + # Concatenating join units along ax0 is handled in _merge_blocks. + raise AssertionError("Concatenating join units along axis0") + + empty_dtype = _get_empty_dtype(join_units) + + has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) + upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) + + to_concat = [ + ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) + for ju in join_units + ] + + if len(to_concat) == 1: + # Only one block, nothing to concatenate. + concat_values = to_concat[0] + if copy: + if isinstance(concat_values, np.ndarray): + # non-reindexed (=not yet copied) arrays are made into a view + # in JoinUnit.get_reindexed_values + if concat_values.base is not None: + concat_values = concat_values.copy() + else: + concat_values = concat_values.copy() + + elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): + # TODO(EA2D): special case not needed if all EAs used HybridBlocks + # NB: we are still assuming here that Hybrid blocks have shape (1, N) + # concatting with at least one EA means we are concatting a single column + # the non-EA values are 2D arrays with shape (1, n) + + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + to_concat = [ + t + if is_1d_only_ea_dtype(t.dtype) + else t[0, :] # type: ignore[call-overload] + for t in to_concat + ] + concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) + concat_values = ensure_block_shape(concat_values, 2) + + else: + concat_values = concat_compat(to_concat, axis=concat_axis) + + return concat_values + + +def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool): + """ + Find the NA value to go with this dtype. + """ + if isinstance(dtype, ExtensionDtype): + return dtype.na_value + elif dtype.kind in ["m", "M"]: + return dtype.type("NaT") + elif dtype.kind in ["f", "c"]: + return dtype.type("NaN") + elif dtype.kind == "b": + # different from missing.na_value_for_dtype + return None + elif dtype.kind in ["i", "u"]: + if not has_none_blocks: + # different from missing.na_value_for_dtype + return None + return np.nan + elif dtype.kind == "O": + return np.nan + raise NotImplementedError + + +def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: + """ + Return dtype and N/A values to use when concatenating specified units. + + Returned N/A value may be None which means there was no casting involved. + + Returns + ------- + dtype + """ + if len(join_units) == 1: + blk = join_units[0].block + return blk.dtype + + if _is_uniform_reindex(join_units): + empty_dtype = join_units[0].block.dtype + return empty_dtype + + has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) + + dtypes = [unit.dtype for unit in join_units if not unit.is_na] + if not len(dtypes): + dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"] + + dtype = find_common_type(dtypes) + if has_none_blocks: + dtype = ensure_dtype_can_hold_na(dtype) + return dtype + + +def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: + """ + Check if the join units consist of blocks of uniform type that can + be concatenated using Block.concat_same_type instead of the generic + _concatenate_join_units (which uses `concat_compat`). + + """ + first = join_units[0].block + if first.dtype.kind == "V": + return False + return ( + # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64 + all(type(ju.block) is type(first) for ju in join_units) + and + # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform + all( + is_dtype_equal(ju.block.dtype, first.dtype) + # GH#42092 we only want the dtype_equal check for non-numeric blocks + # (for now, may change but that would need a deprecation) + or ju.block.dtype.kind in ["b", "i", "u"] + for ju in join_units + ) + and + # no blocks that would get missing values (can lead to type upcasts) + # unless we're an extension dtype. + all(not ju.is_na or ju.block.is_extension for ju in join_units) + and + # no blocks with indexers (as then the dimensions do not fit) + all(not ju.indexers for ju in join_units) + and + # only use this path when there is something to concatenate + len(join_units) > 1 + ) + + +def _is_uniform_reindex(join_units) -> bool: + return ( + # TODO: should this be ju.block._can_hold_na? + all(ju.block.is_extension for ju in join_units) + and len({ju.block.dtype.name for ju in join_units}) == 1 + ) + + +def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: + """ + Reduce join_unit's shape along item axis to length. + + Extra items that didn't fit are returned as a separate block. + """ + if 0 not in join_unit.indexers: + extra_indexers = join_unit.indexers + + if join_unit.block is None: + extra_block = None + else: + extra_block = join_unit.block.getitem_block(slice(length, None)) + join_unit.block = join_unit.block.getitem_block(slice(length)) + else: + extra_block = join_unit.block + + extra_indexers = copy.copy(join_unit.indexers) + extra_indexers[0] = extra_indexers[0][length:] + join_unit.indexers[0] = join_unit.indexers[0][:length] + + extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] + join_unit.shape = (length,) + join_unit.shape[1:] + + return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) + + +def _combine_concat_plans(plans, concat_axis: int): + """ + Combine multiple concatenation plans into one. + + existing_plan is updated in-place. + """ + if len(plans) == 1: + for p in plans[0]: + yield p[0], [p[1]] + + elif concat_axis == 0: + offset = 0 + for plan in plans: + last_plc = None + + for plc, unit in plan: + yield plc.add(offset), [unit] + last_plc = plc + + if last_plc is not None: + offset += last_plc.as_slice.stop + + else: + # singleton list so we can modify it as a side-effect within _next_or_none + num_ended = [0] + + def _next_or_none(seq): + retval = next(seq, None) + if retval is None: + num_ended[0] += 1 + return retval + + plans = list(map(iter, plans)) + next_items = list(map(_next_or_none, plans)) + + while num_ended[0] != len(next_items): + if num_ended[0] > 0: + raise ValueError("Plan shapes are not aligned") + + placements, units = zip(*next_items) + + lengths = list(map(len, placements)) + min_len, max_len = min(lengths), max(lengths) + + if min_len == max_len: + yield placements[0], units + next_items[:] = map(_next_or_none, plans) + else: + yielded_placement = None + yielded_units = [None] * len(next_items) + for i, (plc, unit) in enumerate(next_items): + yielded_units[i] = unit + if len(plc) > min_len: + # _trim_join_unit updates unit in place, so only + # placement needs to be sliced to skip min_len. + next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len)) + else: + yielded_placement = plc + next_items[i] = _next_or_none(plans[i]) + + yield yielded_placement, yielded_units diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py new file mode 100644 index 00000000..c1d0ab73 --- /dev/null +++ b/pandas/core/internals/construction.py @@ -0,0 +1,1062 @@ +""" +Functions for preparing various inputs passed to the DataFrame or Series +constructors before passing them to a BlockManager. +""" +from __future__ import annotations + +from collections import abc +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + Sequence, + cast, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + DtypeObj, + Manager, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + dict_compat, + maybe_cast_to_datetime, + maybe_convert_platform, + maybe_infer_to_datetimelike, + maybe_upcast, +) +from pandas.core.dtypes.common import ( + is_1d_only_ea_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_integer_dtype, + is_list_like, + is_named_tuple, + is_object_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core import ( + algorithms, + common as com, +) +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, + range_to_ndarray, + sanitize_array, +) +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + TimedeltaIndex, + default_index, + ensure_index, + get_objs_combined_axis, + union_indexes, +) +from pandas.core.internals.array_manager import ( + ArrayManager, + SingleArrayManager, +) +from pandas.core.internals.blocks import ( + BlockPlacement, + ensure_block_shape, + new_block_2d, +) +from pandas.core.internals.managers import ( + BlockManager, + SingleBlockManager, + create_block_manager_from_blocks, + create_block_manager_from_column_arrays, +) + +if TYPE_CHECKING: + from numpy.ma.mrecords import MaskedRecords + + +# --------------------------------------------------------------------- +# BlockManager Interface + + +def arrays_to_mgr( + arrays, + columns: Index, + index, + *, + dtype: DtypeObj | None = None, + verify_integrity: bool = True, + typ: str | None = None, + consolidate: bool = True, +) -> Manager: + """ + Segregate Series based on type and coerce into matrices. + + Needs to handle a lot of exceptional cases. + """ + if verify_integrity: + # figure out the index, if necessary + if index is None: + index = _extract_index(arrays) + else: + index = ensure_index(index) + + # don't force copy because getting jammed in an ndarray anyway + arrays = _homogenize(arrays, index, dtype) + # _homogenize ensures + # - all(len(x) == len(index) for x in arrays) + # - all(x.ndim == 1 for x in arrays) + # - all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) + # - all(type(x) is not PandasArray for x in arrays) + + else: + index = ensure_index(index) + arrays = [extract_array(x, extract_numpy=True) for x in arrays] + + # Reached via DataFrame._from_arrays; we do validation here + for arr in arrays: + if ( + not isinstance(arr, (np.ndarray, ExtensionArray)) + or arr.ndim != 1 + or len(arr) != len(index) + ): + raise ValueError( + "Arrays must be 1-dimensional np.ndarray or ExtensionArray " + "with length matching len(index)" + ) + + columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(arrays) must match len(columns)") + + # from BlockManager perspective + axes = [columns, index] + + if typ == "block": + return create_block_manager_from_column_arrays( + arrays, axes, consolidate=consolidate + ) + elif typ == "array": + return ArrayManager(arrays, [index, columns]) + else: + raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") + + +def rec_array_to_mgr( + data: MaskedRecords | np.recarray | np.ndarray, + index, + columns, + dtype: DtypeObj | None, + copy: bool, + typ: str, +) -> Manager: + """ + Extract from a masked rec array and create the manager. + """ + # essentially process a record array then fill it + fdata = ma.getdata(data) + if index is None: + index = default_index(len(fdata)) + else: + index = ensure_index(index) + + if columns is not None: + columns = ensure_index(columns) + arrays, arr_columns = to_arrays(fdata, columns) + + # fill if needed + if isinstance(data, np.ma.MaskedArray): + # GH#42200 we only get here with MaskedRecords, but check for the + # parent class MaskedArray to avoid the need to import MaskedRecords + data = cast("MaskedRecords", data) + new_arrays = fill_masked_arrays(data, arr_columns) + else: + # error: Incompatible types in assignment (expression has type + # "List[ExtensionArray]", variable has type "List[ndarray]") + new_arrays = arrays # type: ignore[assignment] + + # create the manager + + # error: Argument 1 to "reorder_arrays" has incompatible type "List[ndarray]"; + # expected "List[Union[ExtensionArray, ndarray]]" + arrays, arr_columns = reorder_arrays( + new_arrays, arr_columns, columns, len(index) # type: ignore[arg-type] + ) + if columns is None: + columns = arr_columns + + mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ) + + if copy: + mgr = mgr.copy() + return mgr + + +def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> list[np.ndarray]: + """ + Convert numpy MaskedRecords to ensure mask is softened. + """ + new_arrays = [] + + for col in arr_columns: + arr = data[col] + fv = arr.fill_value + + mask = ma.getmaskarray(arr) + if mask.any(): + arr, fv = maybe_upcast(arr, fill_value=fv, copy=True) + arr[mask] = fv + new_arrays.append(arr) + return new_arrays + + +def mgr_to_mgr(mgr, typ: str, copy: bool = True): + """ + Convert to specific type of Manager. Does not copy if the type is already + correct. Does not guarantee a copy otherwise. `copy` keyword only controls + whether conversion from Block->ArrayManager copies the 1D arrays. + """ + new_mgr: Manager + + if typ == "block": + if isinstance(mgr, BlockManager): + new_mgr = mgr + else: + if mgr.ndim == 2: + new_mgr = arrays_to_mgr( + mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block" + ) + else: + new_mgr = SingleBlockManager.from_array(mgr.arrays[0], mgr.index) + elif typ == "array": + if isinstance(mgr, ArrayManager): + new_mgr = mgr + else: + if mgr.ndim == 2: + arrays = [mgr.iget_values(i) for i in range(len(mgr.axes[0]))] + if copy: + arrays = [arr.copy() for arr in arrays] + new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]]) + else: + array = mgr.internal_values() + if copy: + array = array.copy() + new_mgr = SingleArrayManager([array], [mgr.index]) + else: + raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") + return new_mgr + + +# --------------------------------------------------------------------- +# DataFrame Constructor Interface + + +def ndarray_to_mgr( + values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str +) -> Manager: + # used in DataFrame.__init__ + # input must be a ndarray, list, Series, Index, ExtensionArray + + if isinstance(values, ABCSeries): + if columns is None: + if values.name is not None: + columns = Index([values.name]) + if index is None: + index = values.index + else: + values = values.reindex(index) + + # zero len case (GH #2234) + if not len(values) and columns is not None and len(columns): + values = np.empty((0, 1), dtype=object) + + # if the array preparation does a copy -> avoid this for ArrayManager, + # since the copy is done on conversion to 1D arrays + copy_on_sanitize = False if typ == "array" else copy + + vdtype = getattr(values, "dtype", None) + if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): + # GH#19157 + + if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: + # GH#12513 a EA dtype passed with a 2D array, split into + # multiple EAs that view the values + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + values = [ + values[:, n] # type: ignore[call-overload] + for n in range(values.shape[1]) + ] + else: + values = [values] + + if columns is None: + columns = Index(range(len(values))) + else: + columns = ensure_index(columns) + + return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) + + elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): + # i.e. Datetime64TZ, PeriodDtype + values = extract_array(values, extract_numpy=True) + if copy: + values = values.copy() + if values.ndim == 1: + values = values.reshape(-1, 1) + + else: + # by definition an array here + # the dtypes will be coerced to a single dtype + values = _prep_ndarraylike(values, copy=copy_on_sanitize) + + if dtype is not None and not is_dtype_equal(values.dtype, dtype): + # GH#40110 see similar check inside sanitize_array + rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") + + values = sanitize_array( + values, + None, + dtype=dtype, + copy=copy_on_sanitize, + raise_cast_failure=rcf, + allow_2d=True, + ) + + # _prep_ndarraylike ensures that values.ndim == 2 at this point + index, columns = _get_axes( + values.shape[0], values.shape[1], index=index, columns=columns + ) + + _check_values_indices_shape_match(values, index, columns) + + if typ == "array": + + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + if dtype is None and is_object_dtype(values.dtype): + arrays = [ + ensure_wrapped_if_datetimelike( + maybe_infer_to_datetimelike(values[:, i]) + ) + for i in range(values.shape[1]) + ] + else: + if is_datetime_or_timedelta_dtype(values.dtype): + values = ensure_wrapped_if_datetimelike(values) + arrays = [values[:, i] for i in range(values.shape[1])] + + if copy: + arrays = [arr.copy() for arr in arrays] + + return ArrayManager(arrays, [index, columns], verify_integrity=False) + + values = values.T + + # if we don't have a dtype specified, then try to convert objects + # on the entire block; this is to convert if we have datetimelike's + # embedded in an object type + if dtype is None and is_object_dtype(values.dtype): + obj_columns = list(values) + maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] + # don't convert (and copy) the objects if no type inference occurs + if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): + dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime] + block_values = [ + new_block_2d(dvals_list[n], placement=BlockPlacement(n)) + for n in range(len(dvals_list)) + ] + else: + bp = BlockPlacement(slice(len(columns))) + nb = new_block_2d(values, placement=bp) + block_values = [nb] + else: + bp = BlockPlacement(slice(len(columns))) + nb = new_block_2d(values, placement=bp) + block_values = [nb] + + if len(columns) == 0: + block_values = [] + + return create_block_manager_from_blocks( + block_values, [columns, index], verify_integrity=False + ) + + +def _check_values_indices_shape_match( + values: np.ndarray, index: Index, columns: Index +) -> None: + """ + Check that the shape implied by our axes matches the actual shape of the + data. + """ + if values.shape[1] != len(columns) or values.shape[0] != len(index): + # Could let this raise in Block constructor, but we get a more + # helpful exception message this way. + if values.shape[0] == 0: + raise ValueError("Empty data passed with indices specified.") + + passed = values.shape + implied = (len(index), len(columns)) + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + +def dict_to_mgr( + data: dict, + index, + columns, + *, + dtype: DtypeObj | None = None, + typ: str = "block", + copy: bool = True, +) -> Manager: + """ + Segregate Series based on type and coerce into matrices. + Needs to handle a lot of exceptional cases. + + Used in DataFrame.__init__ + """ + arrays: Sequence[Any] | Series + + if columns is not None: + from pandas.core.series import Series + + arrays = Series(data, index=columns, dtype=object) + missing = arrays.isna() + if index is None: + # GH10856 + # raise ValueError if only scalars in dict + index = _extract_index(arrays[~missing]) + else: + index = ensure_index(index) + + # no obvious "empty" int column + if missing.any() and not is_integer_dtype(dtype): + nan_dtype: DtypeObj + + if dtype is not None: + # calling sanitize_array ensures we don't mix-and-match + # NA dtypes + midxs = missing.values.nonzero()[0] + for i in midxs: + arr = sanitize_array(arrays.iat[i], index, dtype=dtype) + arrays.iat[i] = arr + else: + # GH#1783 + nan_dtype = np.dtype("object") + val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) + nmissing = missing.sum() + if copy: + rhs = [val] * nmissing + else: + # GH#45369 + rhs = [val.copy() for _ in range(nmissing)] + arrays.loc[missing] = rhs + + arrays = list(arrays) + columns = ensure_index(columns) + + else: + keys = list(data.keys()) + columns = Index(keys) + arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] + arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays] + + if copy: + if typ == "block": + # We only need to copy arrays that will not get consolidated, i.e. + # only EA arrays + arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays] + else: + # dtype check to exclude e.g. range objects, scalars + arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] + + return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) + + +def nested_data_to_arrays( + data: Sequence, + columns: Index | None, + index: Index | None, + dtype: DtypeObj | None, +) -> tuple[list[ArrayLike], Index, Index]: + """ + Convert a single sequence of arrays to multiple arrays. + """ + # By the time we get here we have already checked treat_as_nested(data) + + if is_named_tuple(data[0]) and columns is None: + columns = ensure_index(data[0]._fields) + + arrays, columns = to_arrays(data, columns, dtype=dtype) + columns = ensure_index(columns) + + if index is None: + if isinstance(data[0], ABCSeries): + index = _get_names_from_index(data) + elif isinstance(data[0], Categorical): + # GH#38845 hit in test_constructor_categorical + index = default_index(len(data[0])) + else: + index = default_index(len(data)) + + return arrays, columns, index + + +def treat_as_nested(data) -> bool: + """ + Check if we should use nested_data_to_arrays. + """ + return ( + len(data) > 0 + and is_list_like(data[0]) + and getattr(data[0], "ndim", 1) == 1 + and not (isinstance(data, ExtensionArray) and data.ndim == 2) + ) + + +# --------------------------------------------------------------------- + + +def _prep_ndarraylike( + values, copy: bool = True +) -> np.ndarray | DatetimeArray | TimedeltaArray: + if isinstance(values, TimedeltaArray) or ( + isinstance(values, DatetimeArray) and values.tz is None + ): + # By retaining DTA/TDA instead of unpacking, we end up retaining non-nano + pass + + elif not isinstance(values, (np.ndarray, ABCSeries, Index)): + if len(values) == 0: + return np.empty((0, 0), dtype=object) + elif isinstance(values, range): + arr = range_to_ndarray(values) + return arr[..., np.newaxis] + + def convert(v): + if not is_list_like(v) or isinstance(v, ABCDataFrame): + return v + + v = extract_array(v, extract_numpy=True) + res = maybe_convert_platform(v) + return res + + # we could have a 1-dim or 2-dim list here + # this is equiv of np.asarray, but does object conversion + # and platform dtype preservation + if is_list_like(values[0]): + values = np.array([convert(v) for v in values]) + elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: + # GH#21861 see test_constructor_list_of_lists + values = np.array([convert(v) for v in values]) + else: + values = convert(values) + + else: + + # drop subclass info + values = np.array(values, copy=copy) + + if values.ndim == 1: + values = values.reshape((values.shape[0], 1)) + elif values.ndim != 2: + raise ValueError(f"Must pass 2-d input. shape={values.shape}") + + return values + + +def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: + oindex = None + homogenized = [] + + for val in data: + if isinstance(val, ABCSeries): + if dtype is not None: + val = val.astype(dtype, copy=False) + if val.index is not index: + # Forces alignment. No need to copy data since we + # are putting it into an ndarray later + val = val.reindex(index, copy=False) + + val = val._values + else: + if isinstance(val, dict): + # GH#41785 this _should_ be equivalent to (but faster than) + # val = create_series_with_explicit_dtype(val, index=index)._values + if oindex is None: + oindex = index.astype("O") + + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + # see test_constructor_dict_datetime64_index + val = dict_compat(val) + else: + # see test_constructor_subclass_dict + val = dict(val) + val = lib.fast_multiget(val, oindex._values, default=np.nan) + + val = sanitize_array( + val, index, dtype=dtype, copy=False, raise_cast_failure=False + ) + com.require_length_match(val, index) + + homogenized.append(val) + + return homogenized + + +def _extract_index(data) -> Index: + """ + Try to infer an Index from the passed data, raise ValueError on failure. + """ + index = None + if len(data) == 0: + index = Index([]) + else: + raw_lengths = [] + indexes: list[list[Hashable] | Index] = [] + + have_raw_arrays = False + have_series = False + have_dicts = False + + for val in data: + if isinstance(val, ABCSeries): + have_series = True + indexes.append(val.index) + elif isinstance(val, dict): + have_dicts = True + indexes.append(list(val.keys())) + elif is_list_like(val) and getattr(val, "ndim", 1) == 1: + have_raw_arrays = True + raw_lengths.append(len(val)) + elif isinstance(val, np.ndarray) and val.ndim > 1: + raise ValueError("Per-column arrays must each be 1-dimensional") + + if not indexes and not raw_lengths: + raise ValueError("If using all scalar values, you must pass an index") + + elif have_series: + index = union_indexes(indexes) + elif have_dicts: + index = union_indexes(indexes, sort=False) + + if have_raw_arrays: + lengths = list(set(raw_lengths)) + if len(lengths) > 1: + raise ValueError("All arrays must be of the same length") + + if have_dicts: + raise ValueError( + "Mixing dicts with non-Series may lead to ambiguous ordering." + ) + + if have_series: + assert index is not None # for mypy + if lengths[0] != len(index): + msg = ( + f"array length {lengths[0]} does not match index " + f"length {len(index)}" + ) + raise ValueError(msg) + else: + index = default_index(lengths[0]) + + # error: Argument 1 to "ensure_index" has incompatible type "Optional[Index]"; + # expected "Union[Union[Union[ExtensionArray, ndarray], Index, Series], + # Sequence[Any]]" + return ensure_index(index) # type: ignore[arg-type] + + +def reorder_arrays( + arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int +) -> tuple[list[ArrayLike], Index]: + """ + Pre-emptively (cheaply) reindex arrays with new columns. + """ + # reorder according to the columns + if columns is not None: + if not columns.equals(arr_columns): + # if they are equal, there is nothing to do + new_arrays: list[ArrayLike | None] + new_arrays = [None] * len(columns) + indexer = arr_columns.get_indexer(columns) + for i, k in enumerate(indexer): + if k == -1: + # by convention default is all-NaN object dtype + arr = np.empty(length, dtype=object) + arr.fill(np.nan) + else: + arr = arrays[k] + new_arrays[i] = arr + + # Incompatible types in assignment (expression has type + # "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable + # has type "List[Union[ExtensionArray, ndarray[Any, Any]]]") + arrays = new_arrays # type: ignore[assignment] + arr_columns = columns + + return arrays, arr_columns + + +def _get_names_from_index(data) -> Index: + has_some_name = any(getattr(s, "name", None) is not None for s in data) + if not has_some_name: + return default_index(len(data)) + + index: list[Hashable] = list(range(len(data))) + count = 0 + for i, s in enumerate(data): + n = getattr(s, "name", None) + if n is not None: + index[i] = n + else: + index[i] = f"Unnamed {count}" + count += 1 + + return Index(index) + + +def _get_axes( + N: int, K: int, index: Index | None, columns: Index | None +) -> tuple[Index, Index]: + # helper to create the axes as indexes + # return axes or defaults + + if index is None: + index = default_index(N) + else: + index = ensure_index(index) + + if columns is None: + columns = default_index(K) + else: + columns = ensure_index(columns) + return index, columns + + +def dataclasses_to_dicts(data): + """ + Converts a list of dataclass instances to a list of dictionaries. + + Parameters + ---------- + data : List[Type[dataclass]] + + Returns + -------- + list_dict : List[dict] + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> dataclasses_to_dicts([Point(1, 2), Point(2, 3)]) + [{'x': 1, 'y': 2}, {'x': 2, 'y': 3}] + + """ + from dataclasses import asdict + + return list(map(asdict, data)) + + +# --------------------------------------------------------------------- +# Conversion of Inputs to Arrays + + +def to_arrays( + data, columns: Index | None, dtype: DtypeObj | None = None +) -> tuple[list[ArrayLike], Index]: + """ + Return list of arrays, columns. + + Returns + ------- + list[ArrayLike] + These will become columns in a DataFrame. + Index + This will become frame.columns. + + Notes + ----- + Ensures that len(result_arrays) == len(result_index). + """ + if isinstance(data, ABCDataFrame): + # see test_from_records_with_index_data, test_from_records_bad_index_column + if columns is not None: + arrays = [ + data._ixs(i, axis=1).values + for i, col in enumerate(data.columns) + if col in columns + ] + else: + columns = data.columns + arrays = [data._ixs(i, axis=1).values for i in range(len(columns))] + + return arrays, columns + + if not len(data): + if isinstance(data, np.ndarray): + if data.dtype.names is not None: + # i.e. numpy structured array + columns = ensure_index(data.dtype.names) + arrays = [data[name] for name in columns] + + if len(data) == 0: + # GH#42456 the indexing above results in list of 2D ndarrays + # TODO: is that an issue with numpy? + for i, arr in enumerate(arrays): + if arr.ndim == 2: + arrays[i] = arr[:, 0] + + return arrays, columns + return [], ensure_index([]) + + elif isinstance(data[0], Categorical): + # GH#38845 deprecate special case + warnings.warn( + "The behavior of DataFrame([categorical, ...]) is deprecated and " + "in a future version will be changed to match the behavior of " + "DataFrame([any_listlike, ...]). " + "To retain the old behavior, pass as a dictionary " + "DataFrame({col: categorical, ..})", + FutureWarning, + stacklevel=find_stack_level(), + ) + if columns is None: + columns = default_index(len(data)) + elif len(columns) > len(data): + raise ValueError("len(columns) > len(data)") + elif len(columns) < len(data): + # doing this here is akin to a pre-emptive reindex + data = data[: len(columns)] + return data, columns + + elif isinstance(data, np.ndarray) and data.dtype.names is not None: + # e.g. recarray + columns = Index(list(data.dtype.names)) + arrays = [data[k] for k in columns] + return arrays, columns + + if isinstance(data[0], (list, tuple)): + arr = _list_to_arrays(data) + elif isinstance(data[0], abc.Mapping): + arr, columns = _list_of_dict_to_arrays(data, columns) + elif isinstance(data[0], ABCSeries): + arr, columns = _list_of_series_to_arrays(data, columns) + else: + # last ditch effort + data = [tuple(x) for x in data] + arr = _list_to_arrays(data) + + content, columns = _finalize_columns_and_data(arr, columns, dtype) + return content, columns + + +def _list_to_arrays(data: list[tuple | list]) -> np.ndarray: + # Returned np.ndarray has ndim = 2 + # Note: we already check len(data) > 0 before getting hre + if isinstance(data[0], tuple): + content = lib.to_object_array_tuples(data) + else: + # list of lists + content = lib.to_object_array(data) + return content + + +def _list_of_series_to_arrays( + data: list, + columns: Index | None, +) -> tuple[np.ndarray, Index]: + # returned np.ndarray has ndim == 2 + + if columns is None: + # We know pass_data is non-empty because data[0] is a Series + pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] + columns = get_objs_combined_axis(pass_data, sort=False) + + indexer_cache: dict[int, np.ndarray] = {} + + aligned_values = [] + for s in data: + index = getattr(s, "index", None) + if index is None: + index = default_index(len(s)) + + if id(index) in indexer_cache: + indexer = indexer_cache[id(index)] + else: + indexer = indexer_cache[id(index)] = index.get_indexer(columns) + + values = extract_array(s, extract_numpy=True) + aligned_values.append(algorithms.take_nd(values, indexer)) + + content = np.vstack(aligned_values) + return content, columns + + +def _list_of_dict_to_arrays( + data: list[dict], + columns: Index | None, +) -> tuple[np.ndarray, Index]: + """ + Convert list of dicts to numpy arrays + + if `columns` is not passed, column names are inferred from the records + - for OrderedDict and dicts, the column names match + the key insertion-order from the first record to the last. + - For other kinds of dict-likes, the keys are lexically sorted. + + Parameters + ---------- + data : iterable + collection of records (OrderedDict, dict) + columns: iterables or None + + Returns + ------- + content : np.ndarray[object, ndim=2] + columns : Index + """ + if columns is None: + gen = (list(x.keys()) for x in data) + sort = not any(isinstance(d, dict) for d in data) + pre_cols = lib.fast_unique_multiple_list_gen(gen, sort=sort) + columns = ensure_index(pre_cols) + + # assure that they are of the base dict class and not of derived + # classes + data = [d if type(d) is dict else dict(d) for d in data] + + content = lib.dicts_to_array(data, list(columns)) + return content, columns + + +def _finalize_columns_and_data( + content: np.ndarray, # ndim == 2 + columns: Index | None, + dtype: DtypeObj | None, +) -> tuple[list[ArrayLike], Index]: + """ + Ensure we have valid columns, cast object dtypes if possible. + """ + contents = list(content.T) + + try: + columns = _validate_or_indexify_columns(contents, columns) + except AssertionError as err: + # GH#26429 do not raise user-facing AssertionError + raise ValueError(err) from err + + if len(contents) and contents[0].dtype == np.object_: + contents = _convert_object_array(contents, dtype=dtype) + + return contents, columns + + +def _validate_or_indexify_columns( + content: list[np.ndarray], columns: Index | None +) -> Index: + """ + If columns is None, make numbers as column names; Otherwise, validate that + columns have valid length. + + Parameters + ---------- + content : list of np.ndarrays + columns : Index or None + + Returns + ------- + Index + If columns is None, assign positional column index value as columns. + + Raises + ------ + 1. AssertionError when content is not composed of list of lists, and if + length of columns is not equal to length of content. + 2. ValueError when content is list of lists, but length of each sub-list + is not equal + 3. ValueError when content is list of lists, but length of sub-list is + not equal to length of content + """ + if columns is None: + columns = default_index(len(content)) + else: + + # Add mask for data which is composed of list of lists + is_mi_list = isinstance(columns, list) and all( + isinstance(col, list) for col in columns + ) + + if not is_mi_list and len(columns) != len(content): # pragma: no cover + # caller's responsibility to check for this... + raise AssertionError( + f"{len(columns)} columns passed, passed data had " + f"{len(content)} columns" + ) + elif is_mi_list: + + # check if nested list column, length of each sub-list should be equal + if len({len(col) for col in columns}) > 1: + raise ValueError( + "Length of columns passed for MultiIndex columns is different" + ) + + # if columns is not empty and length of sublist is not equal to content + elif columns and len(columns[0]) != len(content): + raise ValueError( + f"{len(columns[0])} columns passed, passed data had " + f"{len(content)} columns" + ) + return columns + + +def _convert_object_array( + content: list[np.ndarray], dtype: DtypeObj | None +) -> list[ArrayLike]: + """ + Internal function to convert object array. + + Parameters + ---------- + content: List[np.ndarray] + dtype: np.dtype or ExtensionDtype + + Returns + ------- + List[ArrayLike] + """ + # provide soft conversion of object dtypes + def convert(arr): + if dtype != np.dtype("O"): + arr = lib.maybe_convert_objects(arr) + arr = maybe_cast_to_datetime(arr, dtype) + return arr + + arrays = [convert(arr) for arr in content] + + return arrays diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py new file mode 100644 index 00000000..5ab4aef7 --- /dev/null +++ b/pandas/core/internals/managers.py @@ -0,0 +1,2434 @@ +from __future__ import annotations + +import itertools +from typing import ( + Any, + Callable, + Hashable, + Literal, + Sequence, + TypeVar, + cast, +) +import warnings +import weakref + +import numpy as np + +from pandas._config.config import _global_config + +from pandas._libs import ( + algos as libalgos, + internals as libinternals, + lib, +) +from pandas._libs.internals import BlockPlacement +from pandas._typing import ( + ArrayLike, + DtypeObj, + Shape, + npt, + type_t, +) +from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import infer_dtype_from_scalar +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_dtype_equal, + is_list_like, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + array_equals, + isna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.arrays.sparse import SparseDtype +import pandas.core.common as com +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, +) +from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexes.api import ( + Float64Index, + Index, + ensure_index, +) +from pandas.core.internals.base import ( + DataManager, + SingleDataManager, + interleaved_dtype, +) +from pandas.core.internals.blocks import ( + Block, + DatetimeTZBlock, + NumpyBlock, + ensure_block_shape, + extend_blocks, + get_block_type, + new_block, + new_block_2d, +) +from pandas.core.internals.ops import ( + blockwise_all, + operate_blockwise, +) + +T = TypeVar("T", bound="BaseBlockManager") + + +class BaseBlockManager(DataManager): + """ + Core internal data structure to implement DataFrame, Series, etc. + + Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a + lightweight blocked set of labeled data to be manipulated by the DataFrame + public API class + + Attributes + ---------- + shape + ndim + axes + values + items + + Methods + ------- + set_axis(axis, new_labels) + copy(deep=True) + + get_dtypes + + apply(func, axes, block_filter_fn) + + get_bool_data + get_numeric_data + + get_slice(slice_like, axis) + get(label) + iget(loc) + + take(indexer, axis) + reindex_axis(new_labels, axis) + reindex_indexer(new_labels, indexer, axis) + + delete(label) + insert(loc, label, value) + set(label, value) + + Parameters + ---------- + blocks: Sequence of Block + axes: Sequence of Index + verify_integrity: bool, default True + + Notes + ----- + This is *not* a public API class + """ + + __slots__ = () + + _blknos: npt.NDArray[np.intp] + _blklocs: npt.NDArray[np.intp] + blocks: tuple[Block, ...] + axes: list[Index] + refs: list[weakref.ref | None] | None + parent: object + + @property + def ndim(self) -> int: + raise NotImplementedError + + _known_consolidated: bool + _is_consolidated: bool + + def __init__(self, blocks, axes, refs=None, verify_integrity: bool = True) -> None: + raise NotImplementedError + + @classmethod + def from_blocks( + cls: type_t[T], + blocks: list[Block], + axes: list[Index], + refs: list[weakref.ref | None] | None = None, + parent: object = None, + ) -> T: + raise NotImplementedError + + @property + def blknos(self) -> npt.NDArray[np.intp]: + """ + Suppose we want to find the array corresponding to our i'th column. + + blknos[i] identifies the block from self.blocks that contains this column. + + blklocs[i] identifies the column of interest within + self.blocks[self.blknos[i]] + """ + if self._blknos is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blknos + + @property + def blklocs(self) -> npt.NDArray[np.intp]: + """ + See blknos.__doc__ + """ + if self._blklocs is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blklocs + + def make_empty(self: T, axes=None) -> T: + """return an empty BlockManager with the items axis of len 0""" + if axes is None: + axes = [Index([])] + self.axes[1:] + + # preserve dtype if possible + if self.ndim == 1: + assert isinstance(self, SingleBlockManager) # for mypy + blk = self.blocks[0] + arr = blk.values[:0] + bp = BlockPlacement(slice(0, 0)) + nb = blk.make_block_same_class(arr, placement=bp) + blocks = [nb] + else: + blocks = [] + return type(self).from_blocks(blocks, axes) + + def __nonzero__(self) -> bool: + return True + + # Python3 compat + __bool__ = __nonzero__ + + def _normalize_axis(self, axis: int) -> int: + # switch axis to follow BlockManager logic + if self.ndim == 2: + axis = 1 if axis == 0 else 0 + return axis + + def set_axis(self, axis: int, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + self._validate_set_axis(axis, new_labels) + self.axes[axis] = new_labels + + @property + def is_single_block(self) -> bool: + # Assumes we are 2D; overridden by SingleBlockManager + return len(self.blocks) == 1 + + @property + def items(self) -> Index: + return self.axes[0] + + def _has_no_reference(self, i: int) -> bool: + """ + Check for column `i` if it has references. + (whether it references another array or is itself being referenced) + Returns True if the column has no references. + """ + blkno = self.blknos[i] + return self._has_no_reference_block(blkno) + + def _has_no_reference_block(self, blkno: int) -> bool: + """ + Check for block `i` if it has references. + (whether it references another array or is itself being referenced) + Returns True if the block has no references. + """ + # TODO(CoW) include `or self.refs[blkno]() is None` ? + return ( + self.refs is None or self.refs[blkno] is None + ) and weakref.getweakrefcount(self.blocks[blkno]) == 0 + + def _clear_reference_block(self, blkno: int) -> None: + """ + Clear any reference for column `i`. + """ + if self.refs is not None: + self.refs[blkno] = None + if com.all_none(*self.refs): + self.parent = None + + def get_dtypes(self): + dtypes = np.array([blk.dtype for blk in self.blocks]) + return dtypes.take(self.blknos) + + @property + def arrays(self) -> list[ArrayLike]: + """ + Quick access to the backing arrays of the Blocks. + + Only for compatibility with ArrayManager for testing convenience. + Not to be used in actual code, and return value is not the same as the + ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). + + Warning! The returned arrays don't handle Copy-on-Write, so this should + be used with caution (only in read-mode). + """ + return [blk.values for blk in self.blocks] + + def __repr__(self) -> str: + output = type(self).__name__ + for i, ax in enumerate(self.axes): + if i == 0: + output += f"\nItems: {ax}" + else: + output += f"\nAxis {i}: {ax}" + + for block in self.blocks: + output += f"\n{block}" + return output + + def apply( + self: T, + f, + align_keys: list[str] | None = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + """ + Iterate over the blocks, collect and create a new BlockManager. + + Parameters + ---------- + f : str or callable + Name of the Block method to apply. + align_keys: List[str] or None, default None + ignore_failures: bool, default False + **kwargs + Keywords to pass to `f` + + Returns + ------- + BlockManager + """ + assert "filter" not in kwargs + + align_keys = align_keys or [] + result_blocks: list[Block] = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned + + aligned_args = {k: kwargs[k] for k in align_keys} + + for b in self.blocks: + + if aligned_args: + + for k, obj in aligned_args.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values + else: + kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values + else: + # otherwise we have an ndarray + kwargs[k] = obj[b.mgr_locs.indexer] + + try: + if callable(f): + applied = b.apply(f, **kwargs) + else: + applied = getattr(b, f)(**kwargs) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + continue + result_blocks = extend_blocks(applied, result_blocks) + + if ignore_failures: + return self._combine(result_blocks) + + out = type(self).from_blocks(result_blocks, self.axes) + return out + + def where(self: T, other, cond, align: bool) -> T: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply( + "where", + align_keys=align_keys, + other=other, + cond=cond, + ) + + def setitem(self: T, indexer, value) -> T: + """ + Set values with indexer. + + For SingleBlockManager, this backs s[indexer] = value + """ + if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: + raise ValueError(f"Cannot set values with ndim > {self.ndim}") + + if _using_copy_on_write() and not self._has_no_reference(0): + # if being referenced -> perform Copy-on-Write and clear the reference + # this method is only called if there is a single block -> hardcoded 0 + self = self.copy() + + return self.apply("setitem", indexer=indexer, value=value) + + def putmask(self, mask, new, align: bool = True): + if _using_copy_on_write() and any( + not self._has_no_reference_block(i) for i in range(len(self.blocks)) + ): + # some reference -> copy full dataframe + # TODO(CoW) this could be optimized to only copy the blocks that would + # get modified + self = self.copy() + + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + return self.apply( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + ) + + def diff(self: T, n: int, axis: int) -> T: + axis = self._normalize_axis(axis) + return self.apply("diff", n=n, axis=axis) + + def interpolate(self: T, **kwargs) -> T: + return self.apply("interpolate", **kwargs) + + def shift(self: T, periods: int, axis: int, fill_value) -> T: + axis = self._normalize_axis(axis) + if fill_value is lib.no_default: + fill_value = None + + return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) + + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + if inplace: + # TODO(CoW) can be optimized to only copy those blocks that have refs + if _using_copy_on_write() and any( + not self._has_no_reference_block(i) for i in range(len(self.blocks)) + ): + self = self.copy() + + return self.apply( + "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: + return self.apply("astype", dtype=dtype, copy=copy, errors=errors) + + def convert( + self: T, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + ) -> T: + return self.apply( + "convert", + copy=copy, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + ) + + def replace(self: T, to_replace, value, inplace: bool) -> T: + inplace = validate_bool_kwarg(inplace, "inplace") + # NDFrame.replace ensures the not-is_list_likes here + assert not is_list_like(to_replace) + assert not is_list_like(value) + return self.apply( + "replace", to_replace=to_replace, value=value, inplace=inplace + ) + + def replace_regex(self, **kwargs): + return self.apply("_replace_regex", **kwargs) + + def replace_list( + self: T, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> T: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + bm = self.apply( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + ) + bm._consolidate_inplace() + return bm + + def to_native_types(self: T, **kwargs) -> T: + """ + Convert values to native types (strings / python objects) that are used + in formatting (repr / csv). + """ + return self.apply("to_native_types", **kwargs) + + @property + def is_numeric_mixed_type(self) -> bool: + return all(block.is_numeric for block in self.blocks) + + @property + def any_extension_types(self) -> bool: + """Whether any of the blocks in this manager are extension blocks""" + return any(block.is_extension for block in self.blocks) + + @property + def is_view(self) -> bool: + """return a boolean if we are a single block and are a view""" + if len(self.blocks) == 1: + return self.blocks[0].is_view + + # It is technically possible to figure out which blocks are views + # e.g. [ b.values.base is not None for b in self.blocks ] + # but then we have the case of possibly some blocks being a view + # and some blocks not. setting in theory is possible on the non-view + # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit + # complicated + + return False + + def _get_data_subset(self: T, predicate: Callable) -> T: + blocks = [blk for blk in self.blocks if predicate(blk.values)] + return self._combine(blocks, copy=False) + + def get_bool_data(self: T, copy: bool = False) -> T: + """ + Select blocks that are bool-dtype and columns from object-dtype blocks + that are all-bool. + + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + + new_blocks = [] + + for blk in self.blocks: + if blk.dtype == bool: + new_blocks.append(blk) + + elif blk.is_object: + nbs = blk._split() + for nb in nbs: + if nb.is_bool: + new_blocks.append(nb) + + return self._combine(new_blocks, copy) + + def get_numeric_data(self: T, copy: bool = False) -> T: + """ + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] + if len(numeric_blocks) == len(self.blocks): + # Avoid somewhat expensive _combine + if copy: + return self.copy(deep=True) + return self + return self._combine(numeric_blocks, copy) + + def _combine( + self: T, blocks: list[Block], copy: bool = True, index: Index | None = None + ) -> T: + """return a new manager with the blocks""" + if len(blocks) == 0: + if self.ndim == 2: + # retain our own Index dtype + if index is not None: + axes = [self.items[:0], index] + else: + axes = [self.items[:0]] + self.axes[1:] + return self.make_empty(axes) + return self.make_empty() + + # FIXME: optimization potential + indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) + inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) + + new_blocks: list[Block] = [] + # TODO(CoW) we could optimize here if we know that the passed blocks + # are fully "owned" (eg created from an operation, not coming from + # an existing manager) + new_refs: list[weakref.ref | None] | None = None if copy else [] + for b in blocks: + nb = b.copy(deep=copy) + nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) + new_blocks.append(nb) + if not copy: + # None has no attribute "append" + new_refs.append(weakref.ref(b)) # type: ignore[union-attr] + + axes = list(self.axes) + if index is not None: + axes[-1] = index + axes[0] = self.items.take(indexer) + + return type(self).from_blocks( + new_blocks, axes, new_refs, parent=None if copy else self + ) + + @property + def nblocks(self) -> int: + return len(self.blocks) + + def copy(self: T, deep=True) -> T: + """ + Make deep or shallow copy of BlockManager + + Parameters + ---------- + deep : bool, string or None, default True + If False or None, return a shallow copy (do not copy data) + If 'all', copy data and a deep copy of the index + + Returns + ------- + BlockManager + """ + if deep is None: + if _using_copy_on_write(): + # use shallow copy + deep = False + else: + # preserve deep copy for BlockManager with copy=None + deep = True + + # this preserves the notion of view copying of axes + if deep: + # hit in e.g. tests.io.json.test_pandas + + def copy_func(ax): + return ax.copy(deep=True) if deep == "all" else ax.view() + + new_axes = [copy_func(ax) for ax in self.axes] + else: + new_axes = list(self.axes) + + res = self.apply("copy", deep=deep) + new_refs: list[weakref.ref | None] | None + if deep: + new_refs = None + parent = None + else: + new_refs = [weakref.ref(blk) for blk in self.blocks] + parent = self + + res.axes = new_axes + res.refs = new_refs + res.parent = parent + + if self.ndim > 1: + # Avoid needing to re-compute these + blknos = self._blknos + if blknos is not None: + res._blknos = blknos.copy() + res._blklocs = self._blklocs.copy() + + if deep: + res._consolidate_inplace() + return res + + def consolidate(self: T) -> T: + """ + Join together blocks having same dtype + + Returns + ------- + y : BlockManager + """ + if self.is_consolidated(): + return self + + bm = type(self)(self.blocks, self.axes, self.refs, verify_integrity=False) + bm._is_consolidated = False + bm._consolidate_inplace() + return bm + + def reindex_indexer( + self: T, + new_axis: Index, + indexer: npt.NDArray[np.intp] | None, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool | None = True, + only_slice: bool = False, + *, + use_na_proxy: bool = False, + ) -> T: + """ + Parameters + ---------- + new_axis : Index + indexer : ndarray[intp] or None + axis : int + fill_value : object, default None + allow_dups : bool, default False + copy : bool or None, default True + If None, regard as False to get shallow copy. + only_slice : bool, default False + Whether to take views, not copies, along columns. + use_na_proxy : bool, default False + Whether to use a np.void ndarray for newly introduced columns. + + pandas-indexer with -1's only. + """ + if copy is None: + if _using_copy_on_write(): + # use shallow copy + copy = False + else: + # preserve deep copy for BlockManager with copy=None + copy = True + + if indexer is None: + if new_axis is self.axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result.axes = list(self.axes) + result.axes[axis] = new_axis + return result + + # some axes don't allow reindexing with dups + if not allow_dups: + self.axes[axis]._validate_can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 0: + new_blocks, new_refs = self._slice_take_blocks_ax0( + indexer, + fill_value=fill_value, + only_slice=only_slice, + use_na_proxy=use_na_proxy, + ) + parent = None if com.all_none(*new_refs) else self + else: + new_blocks = [ + blk.take_nd( + indexer, + axis=1, + fill_value=( + fill_value if fill_value is not None else blk.fill_value + ), + ) + for blk in self.blocks + ] + new_refs = None + parent = None + + new_axes = list(self.axes) + new_axes[axis] = new_axis + + new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent) + if axis == 1: + # We can avoid the need to rebuild these + new_mgr._blknos = self.blknos.copy() + new_mgr._blklocs = self.blklocs.copy() + return new_mgr + + def _slice_take_blocks_ax0( + self, + slice_or_indexer: slice | np.ndarray, + fill_value=lib.no_default, + only_slice: bool = False, + *, + use_na_proxy: bool = False, + ) -> tuple[list[Block], list[weakref.ref | None]]: + """ + Slice/take blocks along axis=0. + + Overloaded for SingleBlock + + Parameters + ---------- + slice_or_indexer : slice or np.ndarray[int64] + fill_value : scalar, default lib.no_default + only_slice : bool, default False + If True, we always return views on existing arrays, never copies. + This is used when called from ops.blockwise.operate_blockwise. + use_na_proxy : bool, default False + Whether to use a np.void ndarray for newly introduced columns. + + Returns + ------- + new_blocks : list of Block + """ + allow_fill = fill_value is not lib.no_default + + sl_type, slobj, sllen = _preprocess_slice_or_indexer( + slice_or_indexer, self.shape[0], allow_fill=allow_fill + ) + + if self.is_single_block: + blk = self.blocks[0] + + if sl_type == "slice": + # GH#32959 EABlock would fail since we can't make 0-width + # TODO(EA2D): special casing unnecessary with 2D EAs + if sllen == 0: + return [], [] + bp = BlockPlacement(slice(0, sllen)) + return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)], [ + weakref.ref(blk) + ] + elif not allow_fill or self.ndim == 1: + if allow_fill and fill_value is None: + fill_value = blk.fill_value + + if not allow_fill and only_slice: + # GH#33597 slice instead of take, so we get + # views instead of copies + blocks = [ + blk.getitem_block_columns( + slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i) + ) + for i, ml in enumerate(slobj) + ] + # We have + # all(np.shares_memory(nb.values, blk.values) for nb in blocks) + return blocks, [weakref.ref(blk)] * len(blocks) + else: + bp = BlockPlacement(slice(0, sllen)) + return [ + blk.take_nd( + slobj, + axis=0, + new_mgr_locs=bp, + fill_value=fill_value, + ) + ], [None] + + if sl_type == "slice": + blknos = self.blknos[slobj] + blklocs = self.blklocs[slobj] + else: + blknos = algos.take_nd( + self.blknos, slobj, fill_value=-1, allow_fill=allow_fill + ) + blklocs = algos.take_nd( + self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill + ) + + # When filling blknos, make sure blknos is updated before appending to + # blocks list, that way new blkno is exactly len(blocks). + blocks = [] + refs: list[weakref.ref | None] = [] + group = not only_slice + for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group): + if blkno == -1: + # If we've got here, fill_value was not lib.no_default + + blocks.append( + self._make_na_block( + placement=mgr_locs, + fill_value=fill_value, + use_na_proxy=use_na_proxy, + ) + ) + refs.append(None) + else: + blk = self.blocks[blkno] + + # Otherwise, slicing along items axis is necessary. + if not blk._can_consolidate and not blk._validate_ndim: + # i.e. we dont go through here for DatetimeTZBlock + # A non-consolidatable block, it's easy, because there's + # only one item and each mgr loc is a copy of that single + # item. + for mgr_loc in mgr_locs: + newblk = blk.copy(deep=False) + newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) + blocks.append(newblk) + refs.append(weakref.ref(blk)) + + else: + # GH#32779 to avoid the performance penalty of copying, + # we may try to only slice + taker = blklocs[mgr_locs.indexer] + max_len = max(len(mgr_locs), taker.max() + 1) + if only_slice or _using_copy_on_write(): + taker = lib.maybe_indices_to_slice(taker, max_len) + + if isinstance(taker, slice): + nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) + blocks.append(nb) + refs.append(weakref.ref(blk)) + elif only_slice: + # GH#33597 slice instead of take, so we get + # views instead of copies + for i, ml in zip(taker, mgr_locs): + slc = slice(i, i + 1) + bp = BlockPlacement(ml) + nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) + # We have np.shares_memory(nb.values, blk.values) + blocks.append(nb) + refs.append(weakref.ref(blk)) + else: + nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) + blocks.append(nb) + refs.append(None) + + return blocks, refs + + def _make_na_block( + self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False + ) -> Block: + # Note: we only get here with self.ndim == 2 + + if use_na_proxy: + assert fill_value is None + shape = (len(placement), self.shape[1]) + vals = np.empty(shape, dtype=np.void) + nb = NumpyBlock(vals, placement, ndim=2) + return nb + + if fill_value is None: + fill_value = np.nan + block_shape = list(self.shape) + block_shape[0] = len(placement) + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + # error: Argument "dtype" to "empty" has incompatible type "Union[dtype, + # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str, + # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, + # Tuple[Any, Any]]" + block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type] + block_values.fill(fill_value) + return new_block_2d(block_values, placement=placement) + + def take( + self: T, + indexer, + axis: int = 1, + verify: bool = True, + convert_indices: bool = True, + ) -> T: + """ + Take items along any axis. + + indexer : np.ndarray or slice + axis : int, default 1 + verify : bool, default True + Check that all entries are between 0 and len(self) - 1, inclusive. + Pass verify=False if this check has been done by the caller. + convert_indices : bool, default True + Whether to attempt to convert indices to positive values. + + Returns + ------- + BlockManager + """ + # We have 6 tests that get here with a slice + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp) + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype=np.intp) + ) + + n = self.shape[axis] + if convert_indices: + indexer = maybe_convert_indices(indexer, n, verify=verify) + + new_labels = self.axes[axis].take(indexer) + return self.reindex_indexer( + new_axis=new_labels, + indexer=indexer, + axis=axis, + allow_dups=True, + copy=None, + ) + + +class BlockManager(libinternals.BlockManager, BaseBlockManager): + """ + BaseBlockManager that holds 2D blocks. + """ + + ndim = 2 + + # ---------------------------------------------------------------- + # Constructors + + def __init__( + self, + blocks: Sequence[Block], + axes: Sequence[Index], + refs: list[weakref.ref | None] | None = None, + parent: object = None, + verify_integrity: bool = True, + ) -> None: + + if verify_integrity: + # Assertion disabled for performance + # assert all(isinstance(x, Index) for x in axes) + + for block in blocks: + if self.ndim != block.ndim: + raise AssertionError( + f"Number of Block dimensions ({block.ndim}) must equal " + f"number of axes ({self.ndim})" + ) + if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: + # TODO(2.0): remove once fastparquet no longer needs this + warnings.warn( + "In a future version, the BlockManager constructor " + "will assume that a DatetimeTZBlock with block.ndim==2 " + "has block.values.ndim == 2.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + + # error: Incompatible types in assignment (expression has type + # "Union[ExtensionArray, ndarray]", variable has type + # "DatetimeArray") + block.values = ensure_block_shape( # type: ignore[assignment] + block.values, self.ndim + ) + try: + block._cache.clear() + except AttributeError: + # _cache not initialized + pass + + self._verify_integrity() + + def _verify_integrity(self) -> None: + mgr_shape = self.shape + tot_items = sum(len(x.mgr_locs) for x in self.blocks) + for block in self.blocks: + if block.shape[1:] != mgr_shape[1:]: + raise construction_error(tot_items, block.shape[1:], self.axes) + if len(self.items) != tot_items: + raise AssertionError( + "Number of manager items must equal union of " + f"block items\n# manager items: {len(self.items)}, # " + f"tot_items: {tot_items}" + ) + if self.refs is not None: + if len(self.refs) != len(self.blocks): + raise AssertionError( + "Number of passed refs must equal the number of blocks: " + f"{len(self.refs)} refs vs {len(self.blocks)} blocks." + "\nIf you see this error, please report a bug at " + "https://github.com/pandas-dev/pandas/issues" + ) + + @classmethod + def from_blocks( + cls, + blocks: list[Block], + axes: list[Index], + refs: list[weakref.ref | None] | None = None, + parent: object = None, + ) -> BlockManager: + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + parent = parent if _using_copy_on_write() else None + return cls(blocks, axes, refs, parent, verify_integrity=False) + + # ---------------------------------------------------------------- + # Indexing + + def fast_xs(self, loc: int) -> SingleBlockManager: + """ + Return the array corresponding to `frame.iloc[loc]`. + + Parameters + ---------- + loc : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + if len(self.blocks) == 1: + result = self.blocks[0].iget((slice(None), loc)) + block = new_block(result, placement=slice(0, len(result)), ndim=1) + # in the case of a single block, the new block is a view + ref = weakref.ref(self.blocks[0]) + return SingleBlockManager(block, self.axes[0], [ref], parent=self) + + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) + + n = len(self) + + # GH#46406 + immutable_ea = isinstance(dtype, SparseDtype) + + if isinstance(dtype, ExtensionDtype) and not immutable_ea: + cls = dtype.construct_array_type() + result = cls._empty((n,), dtype=dtype) + else: + # error: Argument "dtype" to "empty" has incompatible type + # "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected + # "None" + result = np.empty( + n, dtype=object if immutable_ea else dtype # type: ignore[arg-type] + ) + result = ensure_wrapped_if_datetimelike(result) + + for blk in self.blocks: + # Such assignment may incorrectly coerce NaT to None + # result[blk.mgr_locs] = blk._slice((slice(None), loc)) + for i, rl in enumerate(blk.mgr_locs): + result[rl] = blk.iget((i, loc)) + + if immutable_ea: + dtype = cast(ExtensionDtype, dtype) + result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + + block = new_block(result, placement=slice(0, len(result)), ndim=1) + return SingleBlockManager(block, self.axes[0]) + + def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: + """ + Return the data as a SingleBlockManager. + """ + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + + # shortcut for select a single-dim from a 2-dim BM + bp = BlockPlacement(slice(0, len(values))) + nb = type(block)(values, placement=bp, ndim=1) + ref = weakref.ref(block) if track_ref else None + parent = self if track_ref else None + return SingleBlockManager(nb, self.axes[1], [ref], parent) + + def iget_values(self, i: int) -> ArrayLike: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + + Warning! The returned array is a view but doesn't handle Copy-on-Write, + so this should be used with caution. + """ + # TODO(CoW) making the arrays read-only might make this safer to use? + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + return values + + @property + def column_arrays(self) -> list[np.ndarray]: + """ + Used in the JSON C code to access column arrays. + This optimizes compared to using `iget_values` by converting each + + Warning! This doesn't handle Copy-on-Write, so should be used with + caution (current use case of consuming this in the JSON code is fine). + """ + # This is an optimized equivalent to + # result = [self.iget_values(i) for i in range(len(self.items))] + result: list[np.ndarray | None] = [None] * len(self.items) + + for blk in self.blocks: + mgr_locs = blk._mgr_locs + values = blk.values_for_json() + if values.ndim == 1: + # TODO(EA2D): special casing not needed with 2D EAs + result[mgr_locs[0]] = values + + else: + for i, loc in enumerate(mgr_locs): + result[loc] = values[i] + + # error: Incompatible return value type (got "List[None]", + # expected "List[ndarray[Any, Any]]") + return result # type: ignore[return-value] + + def iset( + self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False + ): + """ + Set new item in-place. Does not consolidate. Adds new Block if not + contained in the current set of items + """ + + # FIXME: refactor, clearly separate broadcasting & zip-like assignment + # can prob also fix the various if tests for sparse/categorical + if self._blklocs is None and self.ndim > 1: + self._rebuild_blknos_and_blklocs() + + # Note: we exclude DTA/TDA here + value_is_extension_type = is_1d_only_ea_dtype(value.dtype) + if not value_is_extension_type: + if value.ndim == 2: + value = value.T + else: + value = ensure_block_shape(value, ndim=2) + + if value.shape[1:] != self.shape[1:]: + raise AssertionError( + "Shape of new values must be compatible with manager shape" + ) + + if lib.is_integer(loc): + # We have 6 tests where loc is _not_ an int. + # In this case, get_blkno_placements will yield only one tuple, + # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) + + # Check if we can use _iset_single fastpath + loc = cast(int, loc) + blkno = self.blknos[loc] + blk = self.blocks[blkno] + if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? + return self._iset_single( + loc, + value, + inplace=inplace, + blkno=blkno, + blk=blk, + ) + + # error: Incompatible types in assignment (expression has type + # "List[Union[int, slice, ndarray]]", variable has type "Union[int, + # slice, ndarray]") + loc = [loc] # type: ignore[assignment] + + # categorical/sparse/datetimetz + if value_is_extension_type: + + def value_getitem(placement): + return value + + else: + + def value_getitem(placement): + return value[placement.indexer] + + # Accessing public blknos ensures the public versions are initialized + blknos = self.blknos[loc] + blklocs = self.blklocs[loc].copy() + + unfit_mgr_locs = [] + unfit_val_locs = [] + removed_blknos = [] + for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True): + blk = self.blocks[blkno_l] + blk_locs = blklocs[val_locs.indexer] + if inplace and blk.should_store(value): + # Updating inplace -> check if we need to do Copy-on-Write + if _using_copy_on_write() and not self._has_no_reference_block(blkno_l): + blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True) + self._clear_reference_block(blkno_l) + else: + blk.set_inplace(blk_locs, value_getitem(val_locs)) + else: + unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) + unfit_val_locs.append(val_locs) + + # If all block items are unfit, schedule the block for removal. + if len(val_locs) == len(blk.mgr_locs): + removed_blknos.append(blkno_l) + else: + nb = blk.delete(blk_locs) + blocks_tup = ( + self.blocks[:blkno_l] + (nb,) + self.blocks[blkno_l + 1 :] + ) + self.blocks = blocks_tup + self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) + # blk.delete gives a copy, so we can remove a possible reference + self._clear_reference_block(blkno_l) + + if len(removed_blknos): + # Remove blocks & update blknos and refs accordingly + is_deleted = np.zeros(self.nblocks, dtype=np.bool_) + is_deleted[removed_blknos] = True + + new_blknos = np.empty(self.nblocks, dtype=np.intp) + new_blknos.fill(-1) + new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) + self._blknos = new_blknos[self._blknos] + self.blocks = tuple( + blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) + ) + if self.refs is not None: + self.refs = [ + ref + for i, ref in enumerate(self.refs) + if i not in set(removed_blknos) + ] + + if unfit_val_locs: + unfit_idxr = np.concatenate(unfit_mgr_locs) + unfit_count = len(unfit_idxr) + + new_blocks: list[Block] = [] + if value_is_extension_type: + # This code (ab-)uses the fact that EA blocks contain only + # one item. + # TODO(EA2D): special casing unnecessary with 2D EAs + new_blocks.extend( + new_block_2d( + values=value, + placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)), + ) + for mgr_loc in unfit_idxr + ) + + self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks) + self._blklocs[unfit_idxr] = 0 + + else: + # unfit_val_locs contains BlockPlacement objects + unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) + + new_blocks.append( + new_block_2d( + values=value_getitem(unfit_val_items), + placement=BlockPlacement(unfit_idxr), + ) + ) + + self._blknos[unfit_idxr] = len(self.blocks) + self._blklocs[unfit_idxr] = np.arange(unfit_count) + + self.blocks += tuple(new_blocks) + # TODO(CoW) is this always correct to assume that the new_blocks + # are not referencing anything else? + if self.refs is not None: + self.refs = list(self.refs) + [None] * len(new_blocks) + + # Newly created block's dtype may already be present. + self._known_consolidated = False + + def _iset_single( + self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block + ) -> None: + """ + Fastpath for iset when we are only setting a single position and + the Block currently in that position is itself single-column. + + In this case we can swap out the entire Block and blklocs and blknos + are unaffected. + """ + # Caller is responsible for verifying value.shape + + if inplace and blk.should_store(value): + copy = False + if _using_copy_on_write() and not self._has_no_reference_block(blkno): + # perform Copy-on-Write and clear the reference + copy = True + self._clear_reference_block(blkno) + iloc = self.blklocs[loc] + blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) + return + + nb = new_block_2d(value, placement=blk._mgr_locs) + old_blocks = self.blocks + new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :] + self.blocks = new_blocks + self._clear_reference_block(blkno) + return + + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the BlockManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + if _using_copy_on_write() and not self._has_no_reference(loc): + # otherwise perform Copy-on-Write and clear the reference + blkno = self.blknos[loc] + blocks = list(self.blocks) + blocks[blkno] = blocks[blkno].copy() + self.blocks = tuple(blocks) + self._clear_reference_block(blkno) + + # this manager is only created temporarily to mutate the values in place + # so don't track references, otherwise the `setitem` would perform CoW again + col_mgr = self.iget(loc, track_ref=False) + if inplace: + col_mgr.setitem_inplace(idx, value) + else: + new_mgr = col_mgr.setitem((idx,), value) + self.iset(loc, new_mgr._block.values, inplace=True) + + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: + """ + Insert item at selected position. + + Parameters + ---------- + loc : int + item : hashable + value : np.ndarray or ExtensionArray + """ + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + + if value.ndim == 2: + value = value.T + if len(value) > 1: + raise ValueError( + f"Expected a 1D array, got an array with shape {value.T.shape}" + ) + else: + value = ensure_block_shape(value, ndim=self.ndim) + + bp = BlockPlacement(slice(loc, loc + 1)) + block = new_block_2d(values=value, placement=bp) + + if not len(self.blocks): + # Fastpath + self._blklocs = np.array([0], dtype=np.intp) + self._blknos = np.array([0], dtype=np.intp) + else: + self._insert_update_mgr_locs(loc) + self._insert_update_blklocs_and_blknos(loc) + + self.axes[0] = new_axis + self.blocks += (block,) + # TODO(CoW) do we always "own" the passed `value`? + if self.refs is not None: + self.refs += [None] + + self._known_consolidated = False + + if sum(not block.is_extension for block in self.blocks) > 100: + warnings.warn( + "DataFrame is highly fragmented. This is usually the result " + "of calling `frame.insert` many times, which has poor performance. " + "Consider joining all columns at once using pd.concat(axis=1) " + "instead. To get a de-fragmented frame, use `newframe = frame.copy()`", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + def _insert_update_mgr_locs(self, loc) -> None: + """ + When inserting a new Block at location 'loc', we increment + all of the mgr_locs of blocks above that by one. + """ + for blkno, count in _fast_count_smallints(self.blknos[loc:]): + # .620 this way, .326 of which is in increment_above + blk = self.blocks[blkno] + blk._mgr_locs = blk._mgr_locs.increment_above(loc) + + def _insert_update_blklocs_and_blknos(self, loc) -> None: + """ + When inserting a new Block at location 'loc', we update our + _blklocs and _blknos. + """ + + # Accessing public blklocs ensures the public versions are initialized + if loc == self.blklocs.shape[0]: + # np.append is a lot faster, let's use it if we can. + self._blklocs = np.append(self._blklocs, 0) + self._blknos = np.append(self._blknos, len(self.blocks)) + elif loc == 0: + # np.append is a lot faster, let's use it if we can. + self._blklocs = np.append(self._blklocs[::-1], 0)[::-1] + self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1] + else: + new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos( + self.blklocs, self.blknos, loc, len(self.blocks) + ) + self._blklocs = new_blklocs + self._blknos = new_blknos + + def idelete(self, indexer) -> BlockManager: + """ + Delete selected locations, returning a new BlockManager. + """ + is_deleted = np.zeros(self.shape[0], dtype=np.bool_) + is_deleted[indexer] = True + taker = (~is_deleted).nonzero()[0] + + nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True) + new_columns = self.items[~is_deleted] + axes = [new_columns, self.axes[1]] + # TODO this might not be needed (can a delete ever be done in chained manner?) + parent = None if com.all_none(*new_refs) else self + return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False) + + # ---------------------------------------------------------------- + # Block-wise Operation + + def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: + """ + Apply grouped reduction function blockwise, returning a new BlockManager. + + Parameters + ---------- + func : grouped reduction function + ignore_failures : bool, default False + Whether to drop blocks where func raises TypeError. + + Returns + ------- + BlockManager + """ + result_blocks: list[Block] = [] + dropped_any = False + + for blk in self.blocks: + if blk.is_object: + # split on object-dtype blocks bc some columns may raise + # while others do not. + for sb in blk._split(): + try: + applied = sb.apply(func) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + dropped_any = True + continue + result_blocks = extend_blocks(applied, result_blocks) + else: + try: + applied = blk.apply(func) + except (TypeError, NotImplementedError): + if not ignore_failures: + raise + dropped_any = True + continue + result_blocks = extend_blocks(applied, result_blocks) + + if len(result_blocks) == 0: + index = Index([None]) # placeholder + else: + index = Index(range(result_blocks[0].values.shape[-1])) + + if dropped_any: + # faster to skip _combine if we haven't dropped any blocks + return self._combine(result_blocks, copy=False, index=index) + + return type(self).from_blocks(result_blocks, [self.axes[0], index]) + + def reduce( + self: T, func: Callable, ignore_failures: bool = False + ) -> tuple[T, np.ndarray]: + """ + Apply reduction function blockwise, returning a single-row BlockManager. + + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop blocks where func raises TypeError. + + Returns + ------- + BlockManager + np.ndarray + Indexer of mgr_locs that are retained. + """ + # If 2D, we assume that we're operating column-wise + assert self.ndim == 2 + + res_blocks: list[Block] = [] + for blk in self.blocks: + nbs = blk.reduce(func, ignore_failures) + res_blocks.extend(nbs) + + index = Index([None]) # placeholder + if ignore_failures: + if res_blocks: + indexer = np.concatenate([blk.mgr_locs.as_array for blk in res_blocks]) + new_mgr = self._combine(res_blocks, copy=False, index=index) + else: + indexer = [] + new_mgr = type(self).from_blocks([], [self.items[:0], index]) + else: + indexer = np.arange(self.shape[0]) + new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) + return new_mgr, indexer + + def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: + """ + Apply array_op blockwise with another (aligned) BlockManager. + """ + return operate_blockwise(self, other, array_op) + + def _equal_values(self: BlockManager, other: BlockManager) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + return blockwise_all(self, other, array_equals) + + def quantile( + self: T, + *, + qs: Float64Index, + axis: int = 0, + interpolation="linear", + ) -> T: + """ + Iterate over blocks applying quantile reduction. + This routine is intended for reduction type operations and + will do inference on the generated blocks. + + Parameters + ---------- + axis: reduction axis, default 0 + consolidate: bool, default True. Join together blocks having same + dtype + interpolation : type of interpolation, default 'linear' + qs : list of the quantiles to be computed + + Returns + ------- + BlockManager + """ + # Series dispatches to DataFrame for quantile, which allows us to + # simplify some of the code here and in the blocks + assert self.ndim >= 2 + assert is_list_like(qs) # caller is responsible for this + assert axis == 1 # only ever called this way + + new_axes = list(self.axes) + new_axes[1] = Float64Index(qs) + + blocks = [ + blk.quantile(axis=axis, qs=qs, interpolation=interpolation) + for blk in self.blocks + ] + + return type(self)(blocks, new_axes) + + # ---------------------------------------------------------------- + + def unstack(self, unstacker, fill_value) -> BlockManager: + """ + Return a BlockManager with all blocks unstacked. + + Parameters + ---------- + unstacker : reshape._Unstacker + fill_value : Any + fill_value for newly introduced missing values. + + Returns + ------- + unstacked : BlockManager + """ + new_columns = unstacker.get_new_columns(self.items) + new_index = unstacker.new_index + + allow_fill = not unstacker.mask_all + if allow_fill: + # calculating the full mask once and passing it to Block._unstack is + # faster than letting calculating it in each repeated call + new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) + needs_masking = new_mask2D.any(axis=0) + else: + needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool) + + new_blocks: list[Block] = [] + columns_mask: list[np.ndarray] = [] + + if len(self.items) == 0: + factor = 1 + else: + fac = len(new_columns) / len(self.items) + assert fac == int(fac) + factor = int(fac) + + for blk in self.blocks: + mgr_locs = blk.mgr_locs + new_placement = mgr_locs.tile_for_unstack(factor) + + blocks, mask = blk._unstack( + unstacker, + fill_value, + new_placement=new_placement, + needs_masking=needs_masking, + ) + + new_blocks.extend(blocks) + columns_mask.extend(mask) + + # Block._unstack should ensure this holds, + assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks) + # In turn this ensures that in the BlockManager call below + # we have len(new_columns) == sum(x.shape[0] for x in new_blocks) + # which suffices to allow us to pass verify_inegrity=False + + new_columns = new_columns[columns_mask] + + bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) + return bm + + def to_dict(self, copy: bool = True): + """ + Return a dict of str(dtype) -> BlockManager + + Parameters + ---------- + copy : bool, default True + + Returns + ------- + values : a dict of dtype -> BlockManager + """ + + bd: dict[str, list[Block]] = {} + for b in self.blocks: + bd.setdefault(str(b.dtype), []).append(b) + + # TODO(EA2D): the combine will be unnecessary with 2D EAs + return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} + + def as_array( + self, + dtype: np.dtype | None = None, + copy: bool = False, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Convert the blockmanager data into an numpy array. + + Parameters + ---------- + dtype : np.dtype or None, default None + Data type of the return array. + copy : bool, default False + If True then guarantee that a copy is returned. A value of + False does not guarantee that the underlying data is not + copied. + na_value : object, default lib.no_default + Value to be used as the missing value sentinel. + + Returns + ------- + arr : ndarray + """ + # TODO(CoW) handle case where resulting array is a view + if len(self.blocks) == 0: + arr = np.empty(self.shape, dtype=float) + return arr.transpose() + + # We want to copy when na_value is provided to avoid + # mutating the original object + copy = copy or na_value is not lib.no_default + + if self.is_single_block: + blk = self.blocks[0] + if blk.is_extension: + # Avoid implicit conversion of extension blocks to object + + # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no + # attribute "to_numpy" + arr = blk.values.to_numpy( # type: ignore[union-attr] + dtype=dtype, + na_value=na_value, + ).reshape(blk.shape) + else: + arr = np.asarray(blk.get_values()) + if dtype: + arr = arr.astype(dtype, copy=False) + else: + arr = self._interleave(dtype=dtype, na_value=na_value) + # The underlying data was copied within _interleave + copy = False + + if copy: + arr = arr.copy() + + if na_value is not lib.no_default: + arr[isna(arr)] = na_value + + return arr.transpose() + + def _interleave( + self, + dtype: np.dtype | None = None, + na_value: object = lib.no_default, + ) -> np.ndarray: + """ + Return ndarray from blocks with specified item order + Items must be contained in the blocks + """ + if not dtype: + # Incompatible types in assignment (expression has type + # "Optional[Union[dtype[Any], ExtensionDtype]]", variable has + # type "Optional[dtype[Any]]") + dtype = interleaved_dtype( # type: ignore[assignment] + [blk.dtype for blk in self.blocks] + ) + + # TODO: https://github.com/pandas-dev/pandas/issues/22791 + # Give EAs some input on what happens here. Sparse needs this. + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + dtype = cast(np.dtype, dtype) + elif isinstance(dtype, ExtensionDtype): + dtype = np.dtype("object") + elif is_dtype_equal(dtype, str): + dtype = np.dtype("object") + + result = np.empty(self.shape, dtype=dtype) + + itemmask = np.zeros(self.shape[0]) + + if dtype == np.dtype("object") and na_value is lib.no_default: + # much more performant than using to_numpy below + for blk in self.blocks: + rl = blk.mgr_locs + arr = blk.get_values(dtype) + result[rl.indexer] = arr + itemmask[rl.indexer] = 1 + return result + + for blk in self.blocks: + rl = blk.mgr_locs + if blk.is_extension: + # Avoid implicit conversion of extension blocks to object + + # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no + # attribute "to_numpy" + arr = blk.values.to_numpy( # type: ignore[union-attr] + dtype=dtype, + na_value=na_value, + ) + else: + arr = blk.get_values(dtype) + result[rl.indexer] = arr + itemmask[rl.indexer] = 1 + + if not itemmask.all(): + raise AssertionError("Some items were not contained in blocks") + + return result + + # ---------------------------------------------------------------- + # Consolidation + + def is_consolidated(self) -> bool: + """ + Return True if more than one block with the same dtype + """ + if not self._known_consolidated: + self._consolidate_check() + return self._is_consolidated + + def _consolidate_check(self) -> None: + if len(self.blocks) == 1: + # fastpath + self._is_consolidated = True + self._known_consolidated = True + return + dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] + self._is_consolidated = len(dtypes) == len(set(dtypes)) + self._known_consolidated = True + + def _consolidate_inplace(self) -> None: + # In general, _consolidate_inplace should only be called via + # DataFrame._consolidate_inplace, otherwise we will fail to invalidate + # the DataFrame's _item_cache. The exception is for newly-created + # BlockManager objects not yet attached to a DataFrame. + if not self.is_consolidated(): + if self.refs is None: + self.blocks = _consolidate(self.blocks) + else: + self.blocks, self.refs = _consolidate_with_refs(self.blocks, self.refs) + self._is_consolidated = True + self._known_consolidated = True + self._rebuild_blknos_and_blklocs() + + +class SingleBlockManager(BaseBlockManager, SingleDataManager): + """manage a single block with""" + + @property + def ndim(self) -> Literal[1]: + return 1 + + _is_consolidated = True + _known_consolidated = True + __slots__ = () + is_single_block = True + + def __init__( + self, + block: Block, + axis: Index, + refs: list[weakref.ref | None] | None = None, + parent: object = None, + verify_integrity: bool = False, + fastpath=lib.no_default, + ) -> None: + # Assertions disabled for performance + # assert isinstance(block, Block), type(block) + # assert isinstance(axis, Index), type(axis) + + if fastpath is not lib.no_default: + warnings.warn( + "The `fastpath` keyword is deprecated and will be removed " + "in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + self.axes = [axis] + self.blocks = (block,) + self.refs = refs + self.parent = parent if _using_copy_on_write() else None + + @classmethod + def from_blocks( + cls, + blocks: list[Block], + axes: list[Index], + refs: list[weakref.ref | None] | None = None, + parent: object = None, + ) -> SingleBlockManager: + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + assert len(blocks) == 1 + assert len(axes) == 1 + if refs is not None: + assert len(refs) == 1 + return cls(blocks[0], axes[0], refs, parent, verify_integrity=False) + + @classmethod + def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: + """ + Constructor for if we have an array that is not yet a Block. + """ + block = new_block(array, placement=slice(0, len(index)), ndim=1) + return cls(block, index) + + def to_2d_mgr(self, columns: Index) -> BlockManager: + """ + Manager analogue of Series.to_frame + """ + blk = self.blocks[0] + arr = ensure_block_shape(blk.values, ndim=2) + bp = BlockPlacement(0) + new_blk = type(blk)(arr, placement=bp, ndim=2) + axes = [columns, self.axes[0]] + refs: list[weakref.ref | None] = [weakref.ref(blk)] + parent = self if _using_copy_on_write() else None + return BlockManager( + [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False + ) + + def _has_no_reference(self, i: int = 0) -> bool: + """ + Check for column `i` if it has references. + (whether it references another array or is itself being referenced) + Returns True if the column has no references. + """ + return (self.refs is None or self.refs[0] is None) and weakref.getweakrefcount( + self.blocks[0] + ) == 0 + + def __getstate__(self): + block_values = [b.values for b in self.blocks] + block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] + axes_array = list(self.axes) + + extra_state = { + "0.14.1": { + "axes": axes_array, + "blocks": [ + {"values": b.values, "mgr_locs": b.mgr_locs.indexer} + for b in self.blocks + ], + } + } + + # First three elements of the state are to maintain forward + # compatibility with 0.13.1. + return axes_array, block_values, block_items, extra_state + + def __setstate__(self, state): + def unpickle_block(values, mgr_locs, ndim: int) -> Block: + # TODO(EA2D): ndim would be unnecessary with 2D EAs + # older pickles may store e.g. DatetimeIndex instead of DatetimeArray + values = extract_array(values, extract_numpy=True) + return new_block(values, placement=mgr_locs, ndim=ndim) + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + self.axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(self.axes) + self.blocks = tuple( + unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) + for b in state["blocks"] + ) + else: + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + + def _post_setstate(self): + pass + + @cache_readonly + def _block(self) -> Block: + return self.blocks[0] + + @property + def _blknos(self): + """compat with BlockManager""" + return None + + @property + def _blklocs(self): + """compat with BlockManager""" + return None + + def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager: + # similar to get_slice, but not restricted to slice indexer + blk = self._block + array = blk._slice(indexer) + if array.ndim > 1: + # This will be caught by Series._get_values + raise ValueError("dimension-expanding indexing not allowed") + + bp = BlockPlacement(slice(0, len(array))) + block = type(blk)(array, placement=bp, ndim=1) + + new_idx = self.index[indexer] + # TODO(CoW) in theory only need to track reference if new_array is a view + ref = weakref.ref(blk) + return type(self)(block, new_idx, [ref], parent=self) + + def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: + # Assertion disabled for performance + # assert isinstance(slobj, slice), type(slobj) + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + blk = self._block + array = blk._slice(slobj) + bp = BlockPlacement(slice(0, len(array))) + block = type(blk)(array, placement=bp, ndim=1) + new_index = self.index._getitem_slice(slobj) + # TODO this method is only used in groupby SeriesSplitter at the moment, + # so passing refs / parent is not yet covered by the tests + return type(self)(block, new_index, [weakref.ref(blk)], parent=self) + + @property + def index(self) -> Index: + return self.axes[0] + + @property + def dtype(self) -> DtypeObj: + return self._block.dtype + + def get_dtypes(self) -> np.ndarray: + return np.array([self._block.dtype]) + + def external_values(self): + """The array that Series.values returns""" + return self._block.external_values() + + def internal_values(self): + """The array that Series._values returns""" + return self._block.values + + def array_values(self): + """The array that Series.array returns""" + return self._block.array_values + + def get_numeric_data(self, copy: bool = False): + if self._block.is_numeric: + return self.copy(deep=copy) + return self.make_empty() + + @property + def _can_hold_na(self) -> bool: + return self._block._can_hold_na + + def setitem_inplace(self, indexer, value) -> None: + """ + Set values with indexer. + + For Single[Block/Array]Manager, this backs s[indexer] = value + + This is an inplace version of `setitem()`, mutating the manager/values + in place, not returning a new Manager (and Block), and thus never changing + the dtype. + """ + if _using_copy_on_write() and not self._has_no_reference(0): + self.blocks = (self._block.copy(),) + self.refs = None + self.parent = None + self._cache.clear() + + super().setitem_inplace(indexer, value) + + def idelete(self, indexer) -> SingleBlockManager: + """ + Delete single location from SingleBlockManager. + + Ensures that self.blocks doesn't become empty. + """ + nb = self._block.delete(indexer) + self.blocks = (nb,) + self.axes[0] = self.axes[0].delete(indexer) + self._cache.clear() + # clear reference since delete always results in a new array + self.refs = None + self.parent = None + return self + + def fast_xs(self, loc): + """ + fast path for getting a cross-section + return a view of the data + """ + raise NotImplementedError("Use series._values[loc] instead") + + def set_values(self, values: ArrayLike): + """ + Set the values of the single block in place. + + Use at your own risk! This does not check if the passed values are + valid for the current Block/SingleBlockManager (length, dtype, etc). + """ + # TODO(CoW) do we need to handle copy on write here? Currently this is + # only used for FrameColumnApply.series_generator (what if apply is + # mutating inplace?) + self.blocks[0].values = values + self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) + + def _equal_values(self: T, other: T) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + # For SingleBlockManager (i.e.Series) + if other.ndim != 1: + return False + left = self.blocks[0].values + right = other.blocks[0].values + return array_equals(left, right) + + +# -------------------------------------------------------------------- +# Constructor Helpers + + +def create_block_manager_from_blocks( + blocks: list[Block], + axes: list[Index], + consolidate: bool = True, + verify_integrity: bool = True, +) -> BlockManager: + # If verify_integrity=False, then caller is responsible for checking + # all(x.shape[-1] == len(axes[1]) for x in blocks) + # sum(x.shape[0] for x in blocks) == len(axes[0]) + # set(x for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) + # all(blk.ndim == 2 for blk in blocks) + # This allows us to safely pass verify_integrity=False + + try: + mgr = BlockManager(blocks, axes, verify_integrity=verify_integrity) + + except ValueError as err: + arrays = [blk.values for blk in blocks] + tot_items = sum(arr.shape[0] for arr in arrays) + raise construction_error(tot_items, arrays[0].shape[1:], axes, err) + + if consolidate: + mgr._consolidate_inplace() + return mgr + + +def create_block_manager_from_column_arrays( + arrays: list[ArrayLike], + axes: list[Index], + consolidate: bool = True, +) -> BlockManager: + # Assertions disabled for performance (caller is responsible for verifying) + # assert isinstance(axes, list) + # assert all(isinstance(x, Index) for x in axes) + # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) + # assert all(type(x) is not PandasArray for x in arrays) + # assert all(x.ndim == 1 for x in arrays) + # assert all(len(x) == len(axes[1]) for x in arrays) + # assert len(arrays) == len(axes[0]) + # These last three are sufficient to allow us to safely pass + # verify_integrity=False below. + + try: + blocks = _form_blocks(arrays, consolidate) + mgr = BlockManager(blocks, axes, verify_integrity=False) + except ValueError as e: + raise construction_error(len(arrays), arrays[0].shape, axes, e) + if consolidate: + mgr._consolidate_inplace() + return mgr + + +def construction_error( + tot_items: int, + block_shape: Shape, + axes: list[Index], + e: ValueError | None = None, +): + """raise a helpful message about our construction""" + passed = tuple(map(int, [tot_items] + list(block_shape))) + # Correcting the user facing error message during dataframe construction + if len(passed) <= 2: + passed = passed[::-1] + + implied = tuple(len(ax) for ax in axes) + # Correcting the user facing error message during dataframe construction + if len(implied) <= 2: + implied = implied[::-1] + + # We return the exception object instead of raising it so that we + # can raise it in the caller; mypy plays better with that + if passed == implied and e is not None: + return e + if block_shape[0] == 0: + return ValueError("Empty data passed with indices specified.") + return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + +# ----------------------------------------------------------------------- + + +def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]: + # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype + # raises instead of returning False. Once earlier numpy versions are dropped, + # this can be simplified to `return tup[1].dtype` + dtype = tup[1].dtype + + if is_1d_only_ea_dtype(dtype): + # We know these won't be consolidated, so don't need to group these. + # This avoids expensive comparisons of CategoricalDtype objects + sep = id(dtype) + else: + sep = 0 + + return sep, isinstance(dtype, np.dtype), dtype + + +def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]: + tuples = list(enumerate(arrays)) + + if not consolidate: + nbs = _tuples_to_blocks_no_consolidate(tuples) + return nbs + + # group by dtype + grouper = itertools.groupby(tuples, _grouping_func) + + nbs = [] + for (_, _, dtype), tup_block in grouper: + block_type = get_block_type(dtype) + + if isinstance(dtype, np.dtype): + is_dtlike = dtype.kind in ["m", "M"] + + if issubclass(dtype.type, (str, bytes)): + dtype = np.dtype(object) + + values, placement = _stack_arrays(list(tup_block), dtype) + if is_dtlike: + values = ensure_wrapped_if_datetimelike(values) + blk = block_type(values, placement=BlockPlacement(placement), ndim=2) + nbs.append(blk) + + elif is_1d_only_ea_dtype(dtype): + dtype_blocks = [ + block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) + for x in tup_block + ] + nbs.extend(dtype_blocks) + + else: + dtype_blocks = [ + block_type( + ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 + ) + for x in tup_block + ] + nbs.extend(dtype_blocks) + return nbs + + +def _tuples_to_blocks_no_consolidate(tuples) -> list[Block]: + # tuples produced within _form_blocks are of the form (placement, array) + return [ + new_block_2d(ensure_block_shape(x[1], ndim=2), placement=BlockPlacement(x[0])) + for x in tuples + ] + + +def _stack_arrays(tuples, dtype: np.dtype): + + placement, arrays = zip(*tuples) + + first = arrays[0] + shape = (len(arrays),) + first.shape + + stacked = np.empty(shape, dtype=dtype) + for i, arr in enumerate(arrays): + stacked[i] = arr + + return stacked, placement + + +def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]: + """ + Merge blocks having same dtype, exclude non-consolidating blocks + """ + # sort by _can_consolidate, dtype + gkey = lambda x: x._consolidate_key + grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) + + new_blocks: list[Block] = [] + for (_can_consolidate, dtype), group_blocks in grouper: + merged_blocks, _ = _merge_blocks( + list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate + ) + new_blocks = extend_blocks(merged_blocks, new_blocks) + return tuple(new_blocks) + + +def _consolidate_with_refs( + blocks: tuple[Block, ...], refs +) -> tuple[tuple[Block, ...], list[weakref.ref | None]]: + """ + Merge blocks having same dtype, exclude non-consolidating blocks, handling + refs + """ + gkey = lambda x: x[0]._consolidate_key + grouper = itertools.groupby(sorted(zip(blocks, refs), key=gkey), gkey) + + new_blocks: list[Block] = [] + new_refs: list[weakref.ref | None] = [] + for (_can_consolidate, dtype), group_blocks_refs in grouper: + group_blocks, group_refs = list(zip(*list(group_blocks_refs))) + merged_blocks, consolidated = _merge_blocks( + list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate + ) + new_blocks = extend_blocks(merged_blocks, new_blocks) + if consolidated: + new_refs.extend([None]) + else: + new_refs.extend(group_refs) + return tuple(new_blocks), new_refs + + +def _merge_blocks( + blocks: list[Block], dtype: DtypeObj, can_consolidate: bool +) -> tuple[list[Block], bool]: + + if len(blocks) == 1: + return blocks, False + + if can_consolidate: + + # TODO: optimization potential in case all mgrs contain slices and + # combination of those slices is a slice, too. + new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) + + new_values: ArrayLike + + if isinstance(blocks[0].dtype, np.dtype): + # error: List comprehension has incompatible type List[Union[ndarray, + # ExtensionArray]]; expected List[Union[complex, generic, + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], SupportsArray]] + new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] + else: + bvals = [blk.values for blk in blocks] + bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) + new_values = bvals2[0]._concat_same_type(bvals2, axis=0) + + argsort = np.argsort(new_mgr_locs) + new_values = new_values[argsort] + new_mgr_locs = new_mgr_locs[argsort] + + bp = BlockPlacement(new_mgr_locs) + return [new_block_2d(new_values, placement=bp)], True + + # can't consolidate --> no merge + return blocks, False + + +def _fast_count_smallints(arr: npt.NDArray[np.intp]): + """Faster version of set(arr) for sequences of small numbers.""" + counts = np.bincount(arr) + nz = counts.nonzero()[0] + # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, + # in one benchmark by a factor of 11 + return zip(nz, counts[nz]) + + +def _preprocess_slice_or_indexer( + slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool +): + if isinstance(slice_or_indexer, slice): + return ( + "slice", + slice_or_indexer, + libinternals.slice_len(slice_or_indexer, length), + ) + else: + if ( + not isinstance(slice_or_indexer, np.ndarray) + or slice_or_indexer.dtype.kind != "i" + ): + dtype = getattr(slice_or_indexer, "dtype", None) + raise TypeError(type(slice_or_indexer), dtype) + + indexer = ensure_platform_int(slice_or_indexer) + if not allow_fill: + indexer = maybe_convert_indices(indexer, length) + return "fancy", indexer, len(indexer) + + +_mode_options = _global_config["mode"] + + +def _using_copy_on_write(): + return _mode_options["copy_on_write"] diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py new file mode 100644 index 00000000..5febb302 --- /dev/null +++ b/pandas/core/internals/ops.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Iterator, + NamedTuple, +) + +from pandas._typing import ArrayLike + +if TYPE_CHECKING: + from pandas._libs.internals import BlockPlacement + + from pandas.core.internals.blocks import Block + from pandas.core.internals.managers import BlockManager + + +class BlockPairInfo(NamedTuple): + lvals: ArrayLike + rvals: ArrayLike + locs: BlockPlacement + left_ea: bool + right_ea: bool + rblk: Block + + +def _iter_block_pairs( + left: BlockManager, right: BlockManager +) -> Iterator[BlockPairInfo]: + # At this point we have already checked the parent DataFrames for + # assert rframe._indexed_same(lframe) + + for blk in left.blocks: + locs = blk.mgr_locs + blk_vals = blk.values + + left_ea = blk_vals.ndim == 1 + + rblks, _ = right._slice_take_blocks_ax0(locs.indexer, only_slice=True) + + # Assertions are disabled for performance, but should hold: + # if left_ea: + # assert len(locs) == 1, locs + # assert len(rblks) == 1, rblks + # assert rblks[0].shape[0] == 1, rblks[0].shape + + for rblk in rblks: + right_ea = rblk.values.ndim == 1 + + lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea) + info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk) + yield info + + +def operate_blockwise( + left: BlockManager, right: BlockManager, array_op +) -> BlockManager: + # At this point we have already checked the parent DataFrames for + # assert rframe._indexed_same(lframe) + + res_blks: list[Block] = [] + for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right): + res_values = array_op(lvals, rvals) + if left_ea and not right_ea and hasattr(res_values, "reshape"): + res_values = res_values.reshape(1, -1) + nbs = rblk._split_op_result(res_values) + + # Assertions are disabled for performance, but should hold: + # if right_ea or left_ea: + # assert len(nbs) == 1 + # else: + # assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape) + + _reset_block_mgr_locs(nbs, locs) + + res_blks.extend(nbs) + + # Assertions are disabled for performance, but should hold: + # slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array} + # nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks) + # assert nlocs == len(left.items), (nlocs, len(left.items)) + # assert len(slocs) == nlocs, (len(slocs), nlocs) + # assert slocs == set(range(nlocs)), slocs + + new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False) + return new_mgr + + +def _reset_block_mgr_locs(nbs: list[Block], locs): + """ + Reset mgr_locs to correspond to our original DataFrame. + """ + for nb in nbs: + nblocs = locs[nb.mgr_locs.indexer] + nb.mgr_locs = nblocs + # Assertions are disabled for performance, but should hold: + # assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape) + # assert all(x in locs.as_array for x in nb.mgr_locs.as_array) + + +def _get_same_shape_values( + lblk: Block, rblk: Block, left_ea: bool, right_ea: bool +) -> tuple[ArrayLike, ArrayLike]: + """ + Slice lblk.values to align with rblk. Squeeze if we have EAs. + """ + lvals = lblk.values + rvals = rblk.values + + # Require that the indexing into lvals be slice-like + assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs + + # TODO(EA2D): with 2D EAs only this first clause would be needed + if not (left_ea or right_ea): + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] + assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) + elif left_ea and right_ea: + assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) + elif right_ea: + # lvals are 2D, rvals are 1D + + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[Union[ndarray, slice], slice]" + lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload] + assert lvals.shape[0] == 1, lvals.shape + lvals = lvals[0, :] + else: + # lvals are 1D, rvals are 2D + assert rvals.shape[0] == 1, rvals.shape + # error: No overload variant of "__getitem__" of "ExtensionArray" matches + # argument type "Tuple[int, slice]" + rvals = rvals[0, :] # type: ignore[call-overload] + + return lvals, rvals + + +def blockwise_all(left: BlockManager, right: BlockManager, op) -> bool: + """ + Blockwise `all` reduction. + """ + for info in _iter_block_pairs(left, right): + res = op(info.lvals, info.rvals) + if not res: + return False + return True diff --git a/pandas/core/missing.py b/pandas/core/missing.py new file mode 100644 index 00000000..6005e11e --- /dev/null +++ b/pandas/core/missing.py @@ -0,0 +1,993 @@ +""" +Routines for filling missing data. +""" +from __future__ import annotations + +from functools import ( + partial, + wraps, +) +from typing import ( + TYPE_CHECKING, + Any, + cast, +) + +import numpy as np + +from pandas._libs import ( + algos, + lib, +) +from pandas._typing import ( + ArrayLike, + Axis, + F, + npt, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import infer_dtype_from +from pandas.core.dtypes.common import ( + is_array_like, + is_numeric_v_string_like, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + na_value_for_dtype, +) + +if TYPE_CHECKING: + from pandas import Index + + +def check_value_size(value, mask: npt.NDArray[np.bool_], length: int): + """ + Validate the size of the values passed to ExtensionArray.fillna. + """ + if is_array_like(value): + if len(value) != length: + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {length}" + ) + value = value[mask] + + return value + + +def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: + """ + Return a masking array of same size/shape as arr + with entries equaling any member of values_to_mask set to True + + Parameters + ---------- + arr : ArrayLike + values_to_mask: list, tuple, or scalar + + Returns + ------- + np.ndarray[bool] + """ + # When called from Block.replace/replace_list, values_to_mask is a scalar + # known to be holdable by arr. + # When called from Series._single_replace, values_to_mask is tuple or list + dtype, values_to_mask = infer_dtype_from(values_to_mask) + # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] + + na_mask = isna(values_to_mask) + nonna = values_to_mask[~na_mask] + + # GH 21977 + mask = np.zeros(arr.shape, dtype=bool) + for x in nonna: + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + pass + else: + new_mask = arr == x + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + mask |= new_mask + + if na_mask.any(): + mask |= isna(arr) + + return mask + + +def clean_fill_method(method: str | None, allow_nearest: bool = False): + # asfreq is compat for resampling + if method in [None, "asfreq"]: + return None + + if isinstance(method, str): + method = method.lower() + if method == "ffill": + method = "pad" + elif method == "bfill": + method = "backfill" + + valid_methods = ["pad", "backfill"] + expecting = "pad (ffill) or backfill (bfill)" + if allow_nearest: + valid_methods.append("nearest") + expecting = "pad (ffill), backfill (bfill) or nearest" + if method not in valid_methods: + raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") + return method + + +# interpolation methods that dispatch to np.interp + +NP_METHODS = ["linear", "time", "index", "values"] + +# interpolation methods that dispatch to _interpolate_scipy_wrapper + +SP_METHODS = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "spline", + "polynomial", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", + "cubicspline", +] + + +def clean_interp_method(method: str, index: Index, **kwargs) -> str: + order = kwargs.get("order") + + if method in ("spline", "polynomial") and order is None: + raise ValueError("You must specify the order of the spline or polynomial.") + + valid = NP_METHODS + SP_METHODS + if method not in valid: + raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") + + if method in ("krogh", "piecewise_polynomial", "pchip"): + if not index.is_monotonic_increasing: + raise ValueError( + f"{method} interpolation requires that the index be monotonic." + ) + + return method + + +def find_valid_index(values, *, how: str) -> int | None: + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + values : ndarray or ExtensionArray + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + int or None + """ + assert how in ["first", "last"] + + if len(values) == 0: # early stop + return None + + is_valid = ~isna(values) + + if values.ndim == 2: + is_valid = is_valid.any(axis=1) # reduce axis 1 + + if how == "first": + idxpos = is_valid[::].argmax() + + elif how == "last": + idxpos = len(values) - 1 - is_valid[::-1].argmax() + + chk_notna = is_valid[idxpos] + + if not chk_notna: + return None + return idxpos + + +def interpolate_array_2d( + data: np.ndarray, + method: str = "pad", + axis: int = 0, + index: Index | None = None, + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + coerce: bool = False, + downcast: str | None = None, + **kwargs, +) -> None: + """ + Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill. + + Notes + ----- + Alters 'data' in-place. + """ + try: + m = clean_fill_method(method) + except ValueError: + m = None + + if m is not None: + if fill_value is not None: + # similar to validate_fillna_kwargs + raise ValueError("Cannot pass both fill_value and method") + + interpolate_2d( + data, + method=m, + axis=axis, + limit=limit, + limit_area=limit_area, + ) + else: + assert index is not None # for mypy + + _interpolate_2d_with_fill( + data=data, + index=index, + axis=axis, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + **kwargs, + ) + return + + +def _interpolate_2d_with_fill( + data: np.ndarray, # floating dtype + index: Index, + axis: int, + method: str = "linear", + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + **kwargs, +) -> None: + """ + Column-wise application of _interpolate_1d. + + Notes + ----- + Alters 'data' in-place. + + The signature does differ from _interpolate_1d because it only + includes what is needed for Block.interpolate. + """ + # validate the interp method + clean_interp_method(method, index, **kwargs) + + if is_valid_na_for_dtype(fill_value, data.dtype): + fill_value = na_value_for_dtype(data.dtype, compat=False) + + if method == "time": + if not needs_i8_conversion(index.dtype): + raise ValueError( + "time-weighted interpolation only works " + "on Series or DataFrames with a " + "DatetimeIndex" + ) + method = "values" + + valid_limit_directions = ["forward", "backward", "both"] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: + raise ValueError( + "Invalid limit_direction: expecting one of " + f"{valid_limit_directions}, got '{limit_direction}'." + ) + + if limit_area is not None: + valid_limit_areas = ["inside", "outside"] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError( + f"Invalid limit_area: expecting one of {valid_limit_areas}, got " + f"{limit_area}." + ) + + # default limit is unlimited GH #16282 + limit = algos.validate_limit(nobs=None, limit=limit) + + indices = _index_to_interp_indices(index, method) + + def func(yvalues: np.ndarray) -> None: + # process 1-d slices in the axis direction + + _interpolate_1d( + indices=indices, + yvalues=yvalues, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + bounds_error=False, + **kwargs, + ) + + # error: Argument 1 to "apply_along_axis" has incompatible type + # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[..., + # Union[_SupportsArray[dtype[]], Sequence[_SupportsArray + # [dtype[]]], Sequence[Sequence[_SupportsArray[dtype[]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], + # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]]]]]" + np.apply_along_axis(func, axis, data) # type: ignore[arg-type] + return + + +def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: + """ + Convert Index to ndarray of indices to pass to NumPy/SciPy. + """ + xarr = index._values + if needs_i8_conversion(xarr.dtype): + # GH#1646 for dt64tz + xarr = xarr.view("i8") + + if method == "linear": + inds = xarr + inds = cast(np.ndarray, inds) + else: + inds = np.asarray(xarr) + + if method in ("values", "index"): + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + + return inds + + +def _interpolate_1d( + indices: np.ndarray, + yvalues: np.ndarray, + method: str | None = "linear", + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + bounds_error: bool = False, + order: int | None = None, + **kwargs, +): + """ + Logic for the 1-d interpolation. The input + indices and yvalues will each be 1-d arrays of the same length. + + Bounds_error is currently hardcoded to False since non-scipy ones don't + take it as an argument. + + Notes + ----- + Fills 'yvalues' in-place. + """ + + invalid = isna(yvalues) + valid = ~invalid + + if not valid.any(): + return + + if valid.all(): + return + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + + first_valid_index = find_valid_index(yvalues, how="first") + if first_valid_index is None: # no nan found in start + first_valid_index = 0 + start_nans = set(range(first_valid_index)) + + last_valid_index = find_valid_index(yvalues, how="last") + if last_valid_index is None: # no nan found in end + last_valid_index = len(yvalues) + end_nans = set(range(1 + last_valid_index, len(valid))) + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + preserve_nans: list | set + if limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + elif limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + else: + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == "inside": + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == "outside": + # preserve NaNs on the inside + mid_nans = all_nans - start_nans - end_nans + preserve_nans |= mid_nans + + # sort preserve_nans and convert to list + preserve_nans = sorted(preserve_nans) + + if method in NP_METHODS: + # np.interp requires sorted X values, #21037 + + indexer = np.argsort(indices[valid]) + yvalues[invalid] = np.interp( + indices[invalid], indices[valid][indexer], yvalues[valid][indexer] + ) + else: + yvalues[invalid] = _interpolate_scipy_wrapper( + indices[valid], + yvalues[valid], + indices[invalid], + method=method, + fill_value=fill_value, + bounds_error=bounds_error, + order=order, + **kwargs, + ) + + yvalues[preserve_nans] = np.nan + return + + +def _interpolate_scipy_wrapper( + x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs +): + """ + Passed off to scipy.interpolate.interp1d. method is scipy's kind. + Returns an array interpolated at new_x. Add any new methods to + the list in _clean_interp_method. + """ + extra = f"{method} interpolation requires SciPy." + import_optional_dependency("scipy", extra=extra) + from scipy import interpolate + + new_x = np.asarray(new_x) + + # ignores some kwargs that could be passed along. + alt_methods = { + "barycentric": interpolate.barycentric_interpolate, + "krogh": interpolate.krogh_interpolate, + "from_derivatives": _from_derivatives, + "piecewise_polynomial": _from_derivatives, + } + + if getattr(x, "_is_all_dates", False): + # GH 5975, scipy.interp1d can't handle datetime64s + x, new_x = x._values.astype("i8"), new_x.astype("i8") + + if method == "pchip": + alt_methods["pchip"] = interpolate.pchip_interpolate + elif method == "akima": + alt_methods["akima"] = _akima_interpolate + elif method == "cubicspline": + alt_methods["cubicspline"] = _cubicspline_interpolate + + interp1d_methods = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "polynomial", + ] + if method in interp1d_methods: + if method == "polynomial": + method = order + terp = interpolate.interp1d( + x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + ) + new_y = terp(new_x) + elif method == "spline": + # GH #10633, #24014 + if isna(order) or (order <= 0): + raise ValueError( + f"order needs to be specified and greater than 0; got order: {order}" + ) + terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) + new_y = terp(new_x) + else: + # GH 7295: need to be able to write for some reason + # in some circumstances: check all three + if not x.flags.writeable: + x = x.copy() + if not y.flags.writeable: + y = y.copy() + if not new_x.flags.writeable: + new_x = new_x.copy() + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) + return new_y + + +def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): + """ + Convenience function for interpolate.BPoly.from_derivatives. + + Construct a piecewise polynomial in the Bernstein basis, compatible + with the specified values and derivatives at breakpoints. + + Parameters + ---------- + xi : array-like + sorted 1D array of x-coordinates + yi : array-like or list of array-likes + yi[i][j] is the j-th derivative known at xi[i] + order: None or int or array-like of ints. Default: None. + Specifies the degree of local polynomials. If not None, some + derivatives are ignored. + der : int or list + How many derivatives to extract; None for all potentially nonzero + derivatives (that is a number equal to the number of points), or a + list of derivatives to extract. This number includes the function + value as 0th derivative. + extrapolate : bool, optional + Whether to extrapolate to ouf-of-bounds points based on first and last + intervals, or to return NaNs. Default: True. + + See Also + -------- + scipy.interpolate.BPoly.from_derivatives + + Returns + ------- + y : scalar or array-like + The result, of length R or length M or M by R. + """ + from scipy import interpolate + + # return the method for compat with scipy version & backwards compat + method = interpolate.BPoly.from_derivatives + m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) + + return m(x) + + +def _akima_interpolate(xi, yi, x, der=0, axis=0): + """ + Convenience function for akima interpolation. + xi and yi are arrays of values used to approximate some function f, + with ``yi = f(xi)``. + + See `Akima1DInterpolator` for details. + + Parameters + ---------- + xi : array-like + A sorted list of x-coordinates, of length N. + yi : array-like + A 1-D array of real values. `yi`'s length along the interpolation + axis must be equal to the length of `xi`. If N-D array, use axis + parameter to select correct axis. + x : scalar or array-like + Of length M. + der : int, optional + How many derivatives to extract; None for all potentially + nonzero derivatives (that is a number equal to the number + of points), or a list of derivatives to extract. This number + includes the function value as 0th derivative. + axis : int, optional + Axis in the yi array corresponding to the x-coordinate values. + + See Also + -------- + scipy.interpolate.Akima1DInterpolator + + Returns + ------- + y : scalar or array-like + The result, of length R or length M or M by R, + + """ + from scipy import interpolate + + P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) + + return P(x, nu=der) + + +def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): + """ + Convenience function for cubic spline data interpolator. + + See `scipy.interpolate.CubicSpline` for details. + + Parameters + ---------- + xi : array-like, shape (n,) + 1-d array containing values of the independent variable. + Values must be real, finite and in strictly increasing order. + yi : array-like + Array containing values of the dependent variable. It can have + arbitrary number of dimensions, but the length along ``axis`` + (see below) must match the length of ``x``. Values must be finite. + x : scalar or array-like, shape (m,) + axis : int, optional + Axis along which `y` is assumed to be varying. Meaning that for + ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. + Default is 0. + bc_type : string or 2-tuple, optional + Boundary condition type. Two additional equations, given by the + boundary conditions, are required to determine all coefficients of + polynomials on each segment [2]_. + If `bc_type` is a string, then the specified condition will be applied + at both ends of a spline. Available conditions are: + * 'not-a-knot' (default): The first and second segment at a curve end + are the same polynomial. It is a good default when there is no + information on boundary conditions. + * 'periodic': The interpolated functions is assumed to be periodic + of period ``x[-1] - x[0]``. The first and last value of `y` must be + identical: ``y[0] == y[-1]``. This boundary condition will result in + ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. + * 'clamped': The first derivative at curves ends are zero. Assuming + a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. + * 'natural': The second derivative at curve ends are zero. Assuming + a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. + If `bc_type` is a 2-tuple, the first and the second value will be + applied at the curve start and end respectively. The tuple values can + be one of the previously mentioned strings (except 'periodic') or a + tuple `(order, deriv_values)` allowing to specify arbitrary + derivatives at curve ends: + * `order`: the derivative order, 1 or 2. + * `deriv_value`: array-like containing derivative values, shape must + be the same as `y`, excluding ``axis`` dimension. For example, if + `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with + the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D + and have the shape (n0, n1). + extrapolate : {bool, 'periodic', None}, optional + If bool, determines whether to extrapolate to out-of-bounds points + based on first and last intervals, or to return NaNs. If 'periodic', + periodic extrapolation is used. If None (default), ``extrapolate`` is + set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. + + See Also + -------- + scipy.interpolate.CubicHermiteSpline + + Returns + ------- + y : scalar or array-like + The result, of shape (m,) + + References + ---------- + .. [1] `Cubic Spline Interpolation + `_ + on Wikiversity. + .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. + """ + from scipy import interpolate + + P = interpolate.CubicSpline( + xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate + ) + + return P(x) + + +def _interpolate_with_limit_area( + values: np.ndarray, method: str, limit: int | None, limit_area: str | None +) -> None: + """ + Apply interpolation and limit_area logic to values along a to-be-specified axis. + + Parameters + ---------- + values: np.ndarray + Input array. + method: str + Interpolation method. Could be "bfill" or "pad" + limit: int, optional + Index limit on interpolation. + limit_area: str + Limit area for interpolation. Can be "inside" or "outside" + + Notes + ----- + Modifies values in-place. + """ + + invalid = isna(values) + + if not invalid.all(): + first = find_valid_index(values, how="first") + if first is None: + first = 0 + last = find_valid_index(values, how="last") + if last is None: + last = len(values) + + interpolate_2d( + values, + method=method, + limit=limit, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = invalid[last + 1 :] = False + + values[invalid] = np.nan + + return + + +def interpolate_2d( + values: np.ndarray, + method: str = "pad", + axis: Axis = 0, + limit: int | None = None, + limit_area: str | None = None, +) -> None: + """ + Perform an actual interpolation of values, values will be make 2-d if + needed fills inplace, returns the result. + + Parameters + ---------- + values: np.ndarray + Input array. + method: str, default "pad" + Interpolation method. Could be "bfill" or "pad" + axis: 0 or 1 + Interpolation axis + limit: int, optional + Index limit on interpolation. + limit_area: str, optional + Limit area for interpolation. Can be "inside" or "outside" + + Notes + ----- + Modifies values in-place. + """ + if limit_area is not None: + np.apply_along_axis( + # error: Argument 1 to "apply_along_axis" has incompatible type + # "partial[None]"; expected + # "Callable[..., Union[_SupportsArray[dtype[]], + # Sequence[_SupportsArray[dtype[]]], + # Sequence[Sequence[_SupportsArray[dtype[]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], + # Sequence[Sequence[Sequence[Sequence[_ + # SupportsArray[dtype[]]]]]]]]" + partial( # type: ignore[arg-type] + _interpolate_with_limit_area, + method=method, + limit=limit, + limit_area=limit_area, + ), + # error: Argument 2 to "apply_along_axis" has incompatible type + # "Union[str, int]"; expected "SupportsIndex" + axis, # type: ignore[arg-type] + values, + ) + return + + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + + # reshape a 1 dim if needed + if values.ndim == 1: + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + values = values.reshape(tuple((1,) + values.shape)) + + method = clean_fill_method(method) + tvalues = transf(values) + + # _pad_2d and _backfill_2d both modify tvalues inplace + if method == "pad": + _pad_2d(tvalues, limit=limit) + else: + _backfill_2d(tvalues, limit=limit) + + return + + +def _fillna_prep( + values, mask: npt.NDArray[np.bool_] | None = None +) -> npt.NDArray[np.bool_]: + # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d + + if mask is None: + mask = isna(values) + + mask = mask.view(np.uint8) + return mask + + +def _datetimelike_compat(func: F) -> F: + """ + Wrapper to handle datetime64 and timedelta64 dtypes. + """ + + @wraps(func) + def new_func(values, limit=None, mask=None): + if needs_i8_conversion(values.dtype): + if mask is None: + # This needs to occur before casting to int64 + mask = isna(values) + + result, mask = func(values.view("i8"), limit=limit, mask=mask) + return result.view(values.dtype), mask + + return func(values, limit=limit, mask=mask) + + return cast(F, new_func) + + +@_datetimelike_compat +def _pad_1d( + values: np.ndarray, + limit: int | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: + mask = _fillna_prep(values, mask) + algos.pad_inplace(values, mask, limit=limit) + return values, mask + + +@_datetimelike_compat +def _backfill_1d( + values: np.ndarray, + limit: int | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: + mask = _fillna_prep(values, mask) + algos.backfill_inplace(values, mask, limit=limit) + return values, mask + + +@_datetimelike_compat +def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None): + mask = _fillna_prep(values, mask) + + if np.all(values.shape): + algos.pad_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values, mask + + +@_datetimelike_compat +def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None): + mask = _fillna_prep(values, mask) + + if np.all(values.shape): + algos.backfill_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values, mask + + +_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} + + +def get_fill_func(method, ndim: int = 1): + method = clean_fill_method(method) + if ndim == 1: + return _fill_methods[method] + return {"pad": _pad_2d, "backfill": _backfill_2d}[method] + + +def clean_reindex_fill_method(method) -> str | None: + return clean_fill_method(method, allow_nearest=True) + + +def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit): + """ + Get indexers of values that won't be filled + because they exceed the limits. + + Parameters + ---------- + invalid : np.ndarray[bool] + fw_limit : int or None + forward limit to index + bw_limit : int or None + backward limit to index + + Returns + ------- + set of indexers + + Notes + ----- + This is equivalent to the more readable, but slower + + .. code-block:: python + + def _interp_limit(invalid, fw_limit, bw_limit): + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x + """ + # handle forward first; the backward direction is the same except + # 1. operate on the reversed array + # 2. subtract the returned indices from N - 1 + N = len(invalid) + f_idx = set() + b_idx = set() + + def inner(invalid, limit): + limit = min(limit, N) + windowed = _rolling_window(invalid, limit + 1).all(1) + idx = set(np.where(windowed)[0] + limit) | set( + np.where((~invalid[: limit + 1]).cumsum() == 0)[0] + ) + return idx + + if fw_limit is not None: + + if fw_limit == 0: + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit is not None: + + if bw_limit == 0: + # then we don't even need to care about backwards + # just use forwards + return f_idx + else: + b_idx_inv = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx_inv)) + if fw_limit == 0: + return b_idx + + return f_idx & b_idx + + +def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]: + """ + [True, True, False, True, False], 2 -> + + [ + [True, True], + [True, False], + [False, True], + [True, False], + ] + """ + # https://stackoverflow.com/a/6811241 + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py new file mode 100644 index 00000000..6658b25d --- /dev/null +++ b/pandas/core/nanops.py @@ -0,0 +1,1747 @@ +from __future__ import annotations + +import functools +import itertools +import operator +from typing import ( + Any, + Callable, + cast, +) +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + NaT, + NaTType, + iNaT, + lib, +) +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + F, + Scalar, + Shape, + npt, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import ( + is_any_int_dtype, + is_bool_dtype, + is_complex, + is_datetime64_any_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) + +from pandas.core.construction import extract_array + +bn = import_optional_dependency("bottleneck", errors="warn") +_BOTTLENECK_INSTALLED = bn is not None +_USE_BOTTLENECK = False + + +def set_use_bottleneck(v: bool = True) -> None: + # set/unset to use bottleneck + global _USE_BOTTLENECK + if _BOTTLENECK_INSTALLED: + _USE_BOTTLENECK = v + + +set_use_bottleneck(get_option("compute.use_bottleneck")) + + +class disallow: + def __init__(self, *dtypes: Dtype) -> None: + super().__init__() + self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) + + def check(self, obj) -> bool: + return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) + + def __call__(self, f: F) -> F: + @functools.wraps(f) + def _f(*args, **kwargs): + obj_iter = itertools.chain(args, kwargs.values()) + if any(self.check(obj) for obj in obj_iter): + f_name = f.__name__.replace("nan", "") + raise TypeError( + f"reduction operation '{f_name}' not allowed for this dtype" + ) + try: + with np.errstate(invalid="ignore"): + return f(*args, **kwargs) + except ValueError as e: + # we want to transform an object array + # ValueError message to the more typical TypeError + # e.g. this is normally a disallowed function on + # object arrays that contain strings + if is_object_dtype(args[0]): + raise TypeError(e) from e + raise + + return cast(F, _f) + + +class bottleneck_switch: + def __init__(self, name=None, **kwargs) -> None: + self.name = name + self.kwargs = kwargs + + def __call__(self, alt: F) -> F: + bn_name = self.name or alt.__name__ + + try: + bn_func = getattr(bn, bn_name) + except (AttributeError, NameError): # pragma: no cover + bn_func = None + + @functools.wraps(alt) + def f( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + **kwds, + ): + if len(self.kwargs) > 0: + for k, v in self.kwargs.items(): + if k not in kwds: + kwds[k] = v + + if values.size == 0 and kwds.get("min_count") is None: + # We are empty, returning NA for our type + # Only applies for the default `min_count` of None + # since that affects how empty arrays are handled. + # TODO(GH-18976) update all the nanops methods to + # correctly handle empty inputs and remove this check. + # It *may* just be `var` + return _na_for_min_count(values, axis) + + if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): + if kwds.get("mask", None) is None: + # `mask` is not recognised by bottleneck, would raise + # TypeError if called + kwds.pop("mask", None) + result = bn_func(values, axis=axis, **kwds) + + # prefer to treat inf/-inf as NA, but must compute the func + # twice :( + if _has_infs(result): + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + + return result + + return cast(F, f) + + +def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: + # Bottleneck chokes on datetime64, PeriodDtype (or and EA) + if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): + # GH 42878 + # Bottleneck uses naive summation leading to O(n) loss of precision + # unlike numpy which implements pairwise summation, which has O(log(n)) loss + # crossref: https://github.com/pydata/bottleneck/issues/379 + + # GH 15507 + # bottleneck does not properly upcast during the sum + # so can overflow + + # GH 9422 + # further we also want to preserve NaN when all elements + # are NaN, unlike bottleneck/numpy which consider this + # to be 0 + return name not in ["nansum", "nanprod", "nanmean"] + return False + + +def _has_infs(result) -> bool: + if isinstance(result, np.ndarray): + if result.dtype == "f8" or result.dtype == "f4": + # Note: outside of an nanops-specific test, we always have + # result.ndim == 1, so there is no risk of this ravel making a copy. + return lib.has_infs(result.ravel("K")) + try: + return np.isinf(result).any() + except (TypeError, NotImplementedError): + # if it doesn't support infs, then it can't have infs + return False + + +def _get_fill_value( + dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None +): + """return the correct fill value for the dtype of the values""" + if fill_value is not None: + return fill_value + if _na_ok_dtype(dtype): + if fill_value_typ is None: + return np.nan + else: + if fill_value_typ == "+inf": + return np.inf + else: + return -np.inf + else: + if fill_value_typ == "+inf": + # need the max int here + return lib.i8max + else: + return iNaT + + +def _maybe_get_mask( + values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None +) -> npt.NDArray[np.bool_] | None: + """ + Compute a mask if and only if necessary. + + This function will compute a mask iff it is necessary. Otherwise, + return the provided mask (potentially None) when a mask does not need to be + computed. + + A mask is never necessary if the values array is of boolean or integer + dtypes, as these are incapable of storing NaNs. If passing a NaN-capable + dtype that is interpretable as either boolean or integer data (eg, + timedelta64), a mask must be provided. + + If the skipna parameter is False, a new mask will not be computed. + + The mask is computed using isna() by default. Setting invert=True selects + notna() as the masking function. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + mask : Optional[ndarray] + nan-mask if known + + Returns + ------- + Optional[np.ndarray[bool]] + """ + if mask is None: + if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): + # Boolean data cannot contain nulls, so signal via mask being None + return None + + if skipna or needs_i8_conversion(values.dtype): + mask = isna(values) + + return mask + + +def _get_values( + values: np.ndarray, + skipna: bool, + fill_value: Any = None, + fill_value_typ: str | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]: + """ + Utility to get the values view, mask, dtype, dtype_max, and fill_value. + + If both mask and fill_value/fill_value_typ are not None and skipna is True, + the values array will be copied. + + For input arrays of boolean or integer dtypes, copies will only occur if a + precomputed mask, a fill_value/fill_value_typ, and skipna=True are + provided. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + fill_value : Any + value to fill NaNs with + fill_value_typ : str + Set to '+inf' or '-inf' to handle dtype-specific infinities + mask : Optional[np.ndarray[bool]] + nan-mask if known + + Returns + ------- + values : ndarray + Potential copy of input value array + mask : Optional[ndarray[bool]] + Mask for values, if deemed necessary to compute + dtype : np.dtype + dtype for values + dtype_max : np.dtype + platform independent dtype + fill_value : Any + fill value used + """ + # In _get_values is only called from within nanops, and in all cases + # with scalar fill_value. This guarantee is important for the + # np.where call below + assert is_scalar(fill_value) + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + + mask = _maybe_get_mask(values, skipna, mask) + + dtype = values.dtype + + datetimelike = False + if needs_i8_conversion(values.dtype): + # changing timedelta64/datetime64 to int64 needs to happen after + # finding `mask` above + values = np.asarray(values.view("i8")) + datetimelike = True + + dtype_ok = _na_ok_dtype(dtype) + + # get our fill value (in case we need to provide an alternative + # dtype for it) + fill_value = _get_fill_value( + dtype, fill_value=fill_value, fill_value_typ=fill_value_typ + ) + + if skipna and (mask is not None) and (fill_value is not None): + if mask.any(): + if dtype_ok or datetimelike: + values = values.copy() + np.putmask(values, mask, fill_value) + else: + # np.where will promote if needed + values = np.where(~mask, values, fill_value) + + # return a platform independent precision dtype + dtype_max = dtype + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + dtype_max = np.dtype(np.int64) + elif is_float_dtype(dtype): + dtype_max = np.dtype(np.float64) + + return values, mask, dtype, dtype_max, fill_value + + +def _na_ok_dtype(dtype: DtypeObj) -> bool: + if needs_i8_conversion(dtype): + return False + return not issubclass(dtype.type, np.integer) + + +def _wrap_results(result, dtype: np.dtype, fill_value=None): + """wrap our results if needed""" + if result is NaT: + pass + + elif is_datetime64_any_dtype(dtype): + if fill_value is None: + # GH#24293 + fill_value = iNaT + if not isinstance(result, np.ndarray): + assert not isna(fill_value), "Expected non-null fill_value" + if result == fill_value: + result = np.nan + + if isna(result): + result = np.datetime64("NaT", "ns") + else: + result = np.int64(result).view("datetime64[ns]") + # retain original unit + result = result.astype(dtype, copy=False) + else: + # If we have float dtype, taking a view will give the wrong result + result = result.astype(dtype) + elif is_timedelta64_dtype(dtype): + if not isinstance(result, np.ndarray): + if result == fill_value or np.isnan(result): + result = np.timedelta64("NaT").astype(dtype) + + elif np.fabs(result) > lib.i8max: + # raise if we have a timedelta64[ns] which is too large + raise ValueError("overflow in timedelta operation") + else: + # return a timedelta64 with the original unit + result = np.int64(result).astype(dtype, copy=False) + + else: + result = result.astype("m8[ns]").view(dtype) + + return result + + +def _datetimelike_compat(func: F) -> F: + """ + If we have datetime64 or timedelta64 values, ensure we have a correct + mask before calling the wrapped function, then cast back afterwards. + """ + + @functools.wraps(func) + def new_func( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, + **kwargs, + ): + orig_values = values + + datetimelike = values.dtype.kind in ["m", "M"] + if datetimelike and mask is None: + mask = isna(values) + + result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) + + if datetimelike: + result = _wrap_results(result, orig_values.dtype, fill_value=iNaT) + if not skipna: + assert mask is not None # checked above + result = _mask_datetimelike_result(result, axis, mask, orig_values) + + return result + + return cast(F, new_func) + + +def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray: + """ + Return the missing value for `values`. + + Parameters + ---------- + values : ndarray + axis : int or None + axis for the reduction, required if values.ndim > 1. + + Returns + ------- + result : scalar or ndarray + For 1-D values, returns a scalar of the correct missing type. + For 2-D values, returns a 1-D array where each element is missing. + """ + # we either return np.nan or pd.NaT + if is_numeric_dtype(values): + values = values.astype("float64") + fill_value = na_value_for_dtype(values.dtype) + + if values.ndim == 1: + return fill_value + elif axis is None: + return fill_value + else: + result_shape = values.shape[:axis] + values.shape[axis + 1 :] + + return np.full(result_shape, fill_value, dtype=values.dtype) + + +def maybe_operate_rowwise(func: F) -> F: + """ + NumPy operations on C-contiguous ndarrays with axis=1 can be + very slow if axis 1 >> axis 0. + Operate row-by-row and concatenate the results. + """ + + @functools.wraps(func) + def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): + if ( + axis == 1 + and values.ndim == 2 + and values.flags["C_CONTIGUOUS"] + # only takes this path for wide arrays (long dataframes), for threshold see + # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 + and (values.shape[1] / 1000) > values.shape[0] + and values.dtype != object + and values.dtype != bool + ): + arrs = list(values) + if kwargs.get("mask") is not None: + mask = kwargs.pop("mask") + results = [ + func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) + ] + else: + results = [func(x, **kwargs) for x in arrs] + return np.array(results) + + return func(values, axis=axis, **kwargs) + + return cast(F, newfunc) + + +def nanany( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> bool: + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask) + + # For object type, any won't necessarily return + # boolean values (numpy/numpy#4352) + if is_object_dtype(values): + values = values.astype(bool) + + # error: Incompatible return value type (got "Union[bool_, ndarray]", expected + # "bool") + return values.any(axis) # type: ignore[return-value] + + +def nanall( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> bool: + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask) + + # For object type, all won't necessarily return + # boolean values (numpy/numpy#4352) + if is_object_dtype(values): + values = values.astype(bool) + + # error: Incompatible return value type (got "Union[bool_, ndarray]", expected + # "bool") + return values.all(axis) # type: ignore[return-value] + + +@disallow("M8") +@_datetimelike_compat +@maybe_operate_rowwise +def nansum( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + min_count: int = 0, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray[dtype] + axis : int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + if is_float_dtype(dtype): + dtype_sum = dtype + elif is_timedelta64_dtype(dtype): + dtype_sum = np.dtype(np.float64) + + the_sum = values.sum(axis, dtype=dtype_sum) + the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) + + return the_sum + + +def _mask_datetimelike_result( + result: np.ndarray | np.datetime64 | np.timedelta64, + axis: int | None, + mask: npt.NDArray[np.bool_], + orig_values: np.ndarray, +) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: + if isinstance(result, np.ndarray): + # we need to apply the mask + result = result.astype("i8").view(orig_values.dtype) + axis_mask = mask.any(axis=axis) + # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any], + # datetime64, timedelta64]") + result[axis_mask] = iNaT # type: ignore[index] + else: + if mask.any(): + return np.int64(iNaT).view(orig_values.dtype) + return result + + +@disallow(PeriodDtype) +@bottleneck_switch() +@_datetimelike_compat +def nanmean( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the mean of the element along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + dtype_count = np.dtype(np.float64) + + # not using needs_i8_conversion because that includes period + if dtype.kind in ["m", "M"]: + dtype_sum = np.dtype(np.float64) + elif is_integer_dtype(dtype): + dtype_sum = np.dtype(np.float64) + elif is_float_dtype(dtype): + dtype_sum = dtype + dtype_count = dtype + + count = _get_counts(values.shape, mask, axis, dtype=dtype_count) + the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) + + if axis is not None and getattr(the_sum, "ndim", False): + count = cast(np.ndarray, count) + with np.errstate(all="ignore"): + # suppress division by zero warnings + the_mean = the_sum / count + ct_mask = count == 0 + if ct_mask.any(): + the_mean[ct_mask] = np.nan + else: + the_mean = the_sum / count if count > 0 else np.nan + + return the_mean + + +@bottleneck_switch() +def nanmedian(values, *, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ + + def get_median(x): + mask = notna(x) + if not skipna and not mask.all(): + return np.nan + with warnings.catch_warnings(): + # Suppress RuntimeWarning about All-NaN slice + warnings.filterwarnings("ignore", "All-NaN slice encountered") + res = np.nanmedian(x[mask]) + return res + + values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask) + if not is_float_dtype(values.dtype): + try: + values = values.astype("f8") + except ValueError as err: + # e.g. "could not convert string to float: 'a'" + raise TypeError(str(err)) from err + if mask is not None: + values[mask] = np.nan + + notempty = values.size + + # an array from a frame + if values.ndim > 1 and axis is not None: + + # there's a non-empty array to apply over otherwise numpy raises + if notempty: + if not skipna: + res = np.apply_along_axis(get_median, axis, values) + + else: + # fastpath for the skipna case + with warnings.catch_warnings(): + # Suppress RuntimeWarning about All-NaN slice + warnings.filterwarnings("ignore", "All-NaN slice encountered") + res = np.nanmedian(values, axis) + + else: + # must return the correct shape, but median is not defined for the + # empty set so return nans of shape "everything but the passed axis" + # since "axis" is where the reduction would occur if we had a nonempty + # array + res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) + + else: + # otherwise return a scalar value + res = get_median(values) if notempty else np.nan + return _wrap_results(res, dtype) + + +def get_empty_reduction_result( + shape: tuple[int, ...], + axis: int, + dtype: np.dtype | type[np.floating], + fill_value: Any, +) -> np.ndarray: + """ + The result from a reduction on an empty ndarray. + + Parameters + ---------- + shape : Tuple[int] + axis : int + dtype : np.dtype + fill_value : Any + + Returns + ------- + np.ndarray + """ + shp = np.array(shape) + dims = np.arange(len(shape)) + ret = np.empty(shp[dims != axis], dtype=dtype) + ret.fill(fill_value) + return ret + + +def _get_counts_nanvar( + values_shape: Shape, + mask: npt.NDArray[np.bool_] | None, + axis: int | None, + ddof: int, + dtype: np.dtype = np.dtype(np.float64), +) -> tuple[float | np.ndarray, float | np.ndarray]: + """ + Get the count of non-null values along an axis, accounting + for degrees of freedom. + + Parameters + ---------- + values_shape : Tuple[int, ...] + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + ddof : int + degrees of freedom + dtype : type, optional + type to use for count + + Returns + ------- + count : int, np.nan or np.ndarray + d : int, np.nan or np.ndarray + """ + count = _get_counts(values_shape, mask, axis, dtype=dtype) + d = count - dtype.type(ddof) + + # always return NaN, never inf + if is_scalar(count): + if count <= ddof: + count = np.nan + d = np.nan + else: + # count is not narrowed by is_scalar check + count = cast(np.ndarray, count) + mask = count <= ddof + if mask.any(): + np.putmask(d, mask, np.nan) + np.putmask(count, mask, np.nan) + return count, d + + +@bottleneck_switch(ddof=1) +def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + if values.dtype == "M8[ns]": + values = values.view("m8[ns]") + + orig_dtype = values.dtype + values, mask, _, _, _ = _get_values(values, skipna, mask=mask) + + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) + return _wrap_results(result, orig_dtype) + + +@disallow("M8", "m8") +@bottleneck_switch(ddof=1) +def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ + values = extract_array(values, extract_numpy=True) + dtype = values.dtype + mask = _maybe_get_mask(values, skipna, mask) + if is_any_int_dtype(dtype): + values = values.astype("f8") + if mask is not None: + values[mask] = np.nan + + if is_float_dtype(values.dtype): + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + else: + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + # xref GH10242 + # Compute variance via two-pass algorithm, which is stable against + # cancellation errors and relatively accurate for small numbers of + # observations. + # + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) + sqr = _ensure_numeric((avg - values) ** 2) + if mask is not None: + np.putmask(sqr, mask, 0) + result = sqr.sum(axis=axis, dtype=np.float64) / d + + # Return variance as np.float64 (the datatype used in the accumulator), + # unless we were dealing with a float array, in which case use the same + # precision as the original values array. + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + return result + + +@disallow("M8", "m8") +def nansem( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + ddof: int = 1, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + # This checks if non-numeric-like data is passed with numeric_only=False + # and raises a TypeError otherwise + nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) + + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + + count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) + + return np.sqrt(var) / np.sqrt(count) + + +def _nanminmax(meth, fill_value_typ): + @bottleneck_switch(name="nan" + meth) + @_datetimelike_compat + def reduction( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, + ) -> Dtype: + + values, mask, dtype, dtype_max, fill_value = _get_values( + values, skipna, fill_value_typ=fill_value_typ, mask=mask + ) + + if (axis is not None and values.shape[axis] == 0) or values.size == 0: + try: + result = getattr(values, meth)(axis, dtype=dtype_max) + result.fill(np.nan) + except (AttributeError, TypeError, ValueError): + result = np.nan + else: + result = getattr(values, meth)(axis) + + result = _maybe_null_out(result, axis, mask, values.shape) + return result + + return reduction + + +nanmin = _nanminmax("min", fill_value_typ="+inf") +nanmax = _nanminmax("max", fill_value_typ="-inf") + + +@disallow("O") +def nanargmax( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> int | np.ndarray: + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int or ndarray[int] + The index/indices of max value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(arr) + 4 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 2] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [ 6., 7., nan], + [ 9., 10., nan]]) + >>> nanops.nanargmax(arr, axis=1) + array([2, 2, 1, 1]) + """ + values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask) + # error: Need type annotation for 'result' + result = values.argmax(axis) # type: ignore[var-annotated] + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("O") +def nanargmin( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> int | np.ndarray: + """ + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int or ndarray[int] + The index/indices of min value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(arr) + 0 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 0] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [nan, 7., 8.], + [nan, 10., 11.]]) + >>> nanops.nanargmin(arr, axis=1) + array([0, 0, 1, 1]) + """ + values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask) + # error: Need type annotation for 'result' + result = values.argmin(axis) # type: ignore[var-annotated] + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nanskew( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the sample skewness. + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G1. The algorithm computes this coefficient directly + from the second and third central moment. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 + """ + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted**2 + adjusted3 = adjusted2 * adjusted + m2 = adjusted2.sum(axis, dtype=np.float64) + m3 = adjusted3.sum(axis, dtype=np.float64) + + # floating point error + # + # #18044 in _libs/windows.pyx calc_skew follow this behavior + # to fix the fperr to treat m2 <1e-14 as zero + m2 = _zero_out_fperr(m2) + m3 = _zero_out_fperr(m3) + + with np.errstate(invalid="ignore", divide="ignore"): + result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + + if isinstance(result, np.ndarray): + result = np.where(m2 == 0, 0, result) + result[count < 3] = np.nan + else: + result = 0 if m2 == 0 else result + if count < 3: + return np.nan + + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nankurt( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Compute the sample excess kurtosis + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G2, computed directly from the second and fourth + central moment. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 + """ + # error: Incompatible types in assignment (expression has type "Union[Any, + # Union[ExtensionArray, ndarray]]", variable has type "ndarray") + values = extract_array(values, extract_numpy=True) # type: ignore[assignment] + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted**2 + adjusted4 = adjusted2**2 + m2 = adjusted2.sum(axis, dtype=np.float64) + m4 = adjusted4.sum(axis, dtype=np.float64) + + with np.errstate(invalid="ignore", divide="ignore"): + adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) + numerator = count * (count + 1) * (count - 1) * m4 + denominator = (count - 2) * (count - 3) * m2**2 + + # floating point error + # + # #18044 in _libs/windows.pyx calc_kurt follow this behavior + # to fix the fperr to treat denom <1e-14 as zero + numerator = _zero_out_fperr(numerator) + denominator = _zero_out_fperr(denominator) + + if not isinstance(denominator, np.ndarray): + # if ``denom`` is a scalar, check these corner cases first before + # doing division + if count < 4: + return np.nan + if denominator == 0: + return 0 + + with np.errstate(invalid="ignore", divide="ignore"): + result = numerator / denominator - adj + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype, copy=False) + + if isinstance(result, np.ndarray): + result = np.where(denominator == 0, 0, result) + result[count < 4] = np.nan + + return result + + +@disallow("M8", "m8") +@maybe_operate_rowwise +def nanprod( + values: np.ndarray, + *, + axis: int | None = None, + skipna: bool = True, + min_count: int = 0, + mask: npt.NDArray[np.bool_] | None = None, +) -> float: + """ + Parameters + ---------- + values : ndarray[dtype] + axis : int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + """ + mask = _maybe_get_mask(values, skipna, mask) + + if skipna and mask is not None: + values = values.copy() + values[mask] = 1 + result = values.prod(axis) + # error: Incompatible return value type (got "Union[ndarray, float]", expected + # "float") + return _maybe_null_out( # type: ignore[return-value] + result, axis, mask, values.shape, min_count=min_count + ) + + +def _maybe_arg_null_out( + result: np.ndarray, + axis: int | None, + mask: npt.NDArray[np.bool_] | None, + skipna: bool, +) -> np.ndarray | int: + # helper function for nanargmin/nanargmax + if mask is None: + return result + + if axis is None or not getattr(result, "ndim", False): + if skipna: + if mask.all(): + return -1 + else: + if mask.any(): + return -1 + else: + if skipna: + na_mask = mask.all(axis) + else: + na_mask = mask.any(axis) + if na_mask.any(): + result[na_mask] = -1 + return result + + +def _get_counts( + values_shape: Shape, + mask: npt.NDArray[np.bool_] | None, + axis: int | None, + dtype: np.dtype = np.dtype(np.float64), +) -> float | np.ndarray: + """ + Get the count of non-null values along an axis + + Parameters + ---------- + values_shape : tuple of int + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + dtype : type, optional + type to use for count + + Returns + ------- + count : scalar or array + """ + if axis is None: + if mask is not None: + n = mask.size - mask.sum() + else: + n = np.prod(values_shape) + return dtype.type(n) + + if mask is not None: + count = mask.shape[axis] - mask.sum(axis) + else: + count = values_shape[axis] + + if is_scalar(count): + return dtype.type(count) + return count.astype(dtype, copy=False) + + +def _maybe_null_out( + result: np.ndarray | float | NaTType, + axis: int | None, + mask: npt.NDArray[np.bool_] | None, + shape: tuple[int, ...], + min_count: int = 1, +) -> np.ndarray | float | NaTType: + """ + Returns + ------- + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + if axis is not None and isinstance(result, np.ndarray): + if mask is not None: + null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 + else: + # we have no nulls, kept mask=None in _maybe_get_mask + below_count = shape[axis] - min_count < 0 + new_shape = shape[:axis] + shape[axis + 1 :] + null_mask = np.broadcast_to(below_count, new_shape) + + if np.any(null_mask): + if is_numeric_dtype(result): + if np.iscomplexobj(result): + result = result.astype("c16") + elif not is_float_dtype(result): + result = result.astype("f8", copy=False) + result[null_mask] = np.nan + else: + # GH12941, use None to auto cast null + result[null_mask] = None + elif result is not NaT: + if check_below_min_count(shape, mask, min_count): + result = np.nan + + return result + + +def check_below_min_count( + shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int +) -> bool: + """ + Check for the `min_count` keyword. Returns True if below `min_count` (when + missing value should be returned from the reduction). + + Parameters + ---------- + shape : tuple + The shape of the values (`values.shape`). + mask : ndarray[bool] or None + Boolean numpy array (typically of same shape as `shape`) or None. + min_count : int + Keyword passed through from sum/prod call. + + Returns + ------- + bool + """ + if min_count > 0: + if mask is None: + # no missing values, only check size + non_nulls = np.prod(shape) + else: + non_nulls = mask.size - mask.sum() + if non_nulls < min_count: + return True + return False + + +def _zero_out_fperr(arg): + # #18044 reference this behavior to fix rolling skew/kurt issue + if isinstance(arg, np.ndarray): + with np.errstate(invalid="ignore"): + return np.where(np.abs(arg) < 1e-14, 0, arg) + else: + return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg + + +@disallow("M8", "m8") +def nancorr( + a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None +) -> float: + """ + a, b: ndarrays + """ + if len(a) != len(b): + raise AssertionError("Operands to nancorr must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + f = get_corr_func(method) + return f(a, b) + + +def get_corr_func(method) -> Callable[[np.ndarray, np.ndarray], float]: + if method == "kendall": + from scipy.stats import kendalltau + + def func(a, b): + return kendalltau(a, b)[0] + + return func + elif method == "spearman": + from scipy.stats import spearmanr + + def func(a, b): + return spearmanr(a, b)[0] + + return func + elif method == "pearson": + + def func(a, b): + return np.corrcoef(a, b)[0, 1] + + return func + elif callable(method): + return method + + raise ValueError( + f"Unknown method '{method}', expected one of " + "'kendall', 'spearman', 'pearson', or callable" + ) + + +@disallow("M8", "m8") +def nancov( + a: np.ndarray, + b: np.ndarray, + *, + min_periods: int | None = None, + ddof: int | None = 1, +) -> float: + if len(a) != len(b): + raise AssertionError("Operands to nancov must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + return np.cov(a, b, ddof=ddof)[0, 1] + + +def _ensure_numeric(x): + if isinstance(x, np.ndarray): + if is_integer_dtype(x) or is_bool_dtype(x): + x = x.astype(np.float64) + elif is_object_dtype(x): + try: + x = x.astype(np.complex128) + except (TypeError, ValueError): + try: + x = x.astype(np.float64) + except ValueError as err: + # GH#29941 we get here with object arrays containing strs + raise TypeError(f"Could not convert {x} to numeric") from err + else: + if not np.any(np.imag(x)): + x = x.real + elif not (is_float(x) or is_integer(x) or is_complex(x)): + try: + x = float(x) + except (TypeError, ValueError): + # e.g. "1+1j" or "foo" + try: + x = complex(x) + except ValueError as err: + # e.g. "foo" + raise TypeError(f"Could not convert {x} to numeric") from err + return x + + +# NA-friendly array comparisons + + +def make_nancomp(op): + def f(x, y): + xmask = isna(x) + ymask = isna(y) + mask = xmask | ymask + + with np.errstate(all="ignore"): + result = op(x, y) + + if mask.any(): + if is_bool_dtype(result): + result = result.astype("O") + np.putmask(result, mask, np.nan) + + return result + + return f + + +nangt = make_nancomp(operator.gt) +nange = make_nancomp(operator.ge) +nanlt = make_nancomp(operator.lt) +nanle = make_nancomp(operator.le) +naneq = make_nancomp(operator.eq) +nanne = make_nancomp(operator.ne) + + +def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: + """ + Cumulative function with skipna support. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} + skipna : bool + + Returns + ------- + np.ndarray or ExtensionArray + """ + mask_a, mask_b = { + np.cumprod: (1.0, np.nan), + np.maximum.accumulate: (-np.inf, np.nan), + np.cumsum: (0.0, np.nan), + np.minimum.accumulate: (np.inf, np.nan), + }[accum_func] + + # We will be applying this function to block values + if values.dtype.kind in ["m", "M"]: + # GH#30460, GH#29058 + # numpy 1.18 started sorting NaTs at the end instead of beginning, + # so we need to work around to maintain backwards-consistency. + orig_dtype = values.dtype + + # We need to define mask before masking NaTs + mask = isna(values) + + y = values.view("i8") + # Note: the accum_func comparison fails as an "is" comparison + changed = accum_func == np.minimum.accumulate + + try: + if changed: + y[mask] = lib.i8max + + result = accum_func(y, axis=0) + finally: + if changed: + # restore NaT elements + y[mask] = iNaT + + if skipna: + result[mask] = iNaT + elif accum_func == np.minimum.accumulate: + # Restore NaTs that we masked previously + nz = (~np.asarray(mask)).nonzero()[0] + if len(nz): + # everything up to the first non-na entry stays NaT + result[: nz[0]] = iNaT + + if isinstance(values.dtype, np.dtype): + result = result.view(orig_dtype) + else: + # DatetimeArray/TimedeltaArray + # TODO: have this case go through a DTA method? + # For DatetimeTZDtype, view result as M8[ns] + npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]" + # Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]" + # has no attribute "_simple_new" + result = type(values)._simple_new( # type: ignore[union-attr] + result.view(npdtype), dtype=orig_dtype + ) + + elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): + vals = values.copy() + mask = isna(vals) + vals[mask] = mask_a + result = accum_func(vals, axis=0) + result[mask] = mask_b + else: + result = accum_func(values, axis=0) + + return result diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py new file mode 100644 index 00000000..cd470b8f --- /dev/null +++ b/pandas/core/ops/__init__.py @@ -0,0 +1,514 @@ +""" +Arithmetic operations for PandasObjects + +This is not a public API. +""" +from __future__ import annotations + +import operator +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas._typing import Level +from pandas.util._decorators import Appender +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_array_like, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import ( + algorithms, + roperator, +) +from pandas.core.ops.array_ops import ( + arithmetic_op, + comp_method_OBJECT_ARRAY, + comparison_op, + get_array_op, + logical_op, + maybe_prepare_scalar_for_op, +) +from pandas.core.ops.common import ( + get_op_result_name, + unpack_zerodim_and_defer, +) +from pandas.core.ops.docstrings import ( + _flex_comp_doc_FRAME, + _op_descriptions, + make_flex_doc, +) +from pandas.core.ops.invalid import invalid_comparison +from pandas.core.ops.mask_ops import ( + kleene_and, + kleene_or, + kleene_xor, +) +from pandas.core.ops.methods import add_flex_arithmetic_methods +from pandas.core.roperator import ( + radd, + rand_, + rdiv, + rdivmod, + rfloordiv, + rmod, + rmul, + ror_, + rpow, + rsub, + rtruediv, + rxor, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + +# ----------------------------------------------------------------------------- +# constants +ARITHMETIC_BINOPS: set[str] = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "radd", + "rsub", + "rmul", + "rpow", + "rmod", + "rfloordiv", + "rtruediv", + "rdivmod", +} + + +COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} + + +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + + +def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None and NAs are present. + """ + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + + # one but not both + mask = left_mask ^ right_mask + + if left_mask.any(): + # Avoid making a copy if we can + left = left.copy() + left[left_mask & mask] = fill_value + + if right_mask.any(): + # Avoid making a copy if we can + right = right.copy() + right[right_mask & mask] = fill_value + + return left, right + + +# ----------------------------------------------------------------------------- +# Series + + +def align_method_SERIES(left: Series, right, align_asobject: bool = False): + """align lhs and rhs Series""" + # ToDo: Different from align_method_FRAME, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in #13637 + + if isinstance(right, ABCSeries): + # avoid repeated alignment + if not left.index.equals(right.index): + + if align_asobject: + # to keep original value's dtype for bool ops + left = left.astype(object) + right = right.astype(object) + + left, right = left.align(right, copy=False) + + return left, right + + +def flex_method_SERIES(op): + name = op.__name__.strip("_") + doc = make_flex_doc(name, "series") + + @Appender(doc) + def flex_wrapper(self, other, level=None, fill_value=None, axis=0): + # validate axis + if axis is not None: + self._get_axis_number(axis) + + res_name = get_op_result_name(self, other) + + if isinstance(other, ABCSeries): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (np.ndarray, list, tuple)): + if len(other) != len(self): + raise ValueError("Lengths must be equal") + other = self._constructor(other, self.index) + result = self._binop(other, op, level=level, fill_value=fill_value) + result.name = res_name + return result + else: + if fill_value is not None: + self = self.fillna(fill_value) + + return op(self, other) + + flex_wrapper.__name__ = name + return flex_wrapper + + +# ----------------------------------------------------------------------------- +# DataFrame + + +def align_method_FRAME( + left, right, axis, flex: bool | None = False, level: Level = None +): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis : int, str, or None + flex : bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ + + def to_series(right): + msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" + if axis is not None and left._get_axis_name(axis) == "index": + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns) + return right + + if isinstance(right, np.ndarray): + + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + if right.shape == left.shape: + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, " + f"dimension must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): + # GH 36702. Raise when attempting arithmetic with list of array-like. + if any(is_array_like(el) for el in right): + raise ValueError( + f"Unable to coerce list of {type(right[0])} to Series/DataFrame" + ) + # GH17901 + right = to_series(right) + + if flex is not None and isinstance(right, ABCDataFrame): + if not left._indexed_same(right): + if flex: + left, right = left.align(right, join="outer", level=level, copy=False) + else: + raise ValueError( + "Can only compare identically-labeled DataFrame objects" + ) + elif isinstance(right, ABCSeries): + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + + if not flex: + if not left.axes[axis].equals(right.index): + warnings.warn( + "Automatic reindexing on DataFrame vs Series comparisons " + "is deprecated and will raise ValueError in a future version. " + "Do `left, right = left.align(right, axis=1, copy=False)` " + "before e.g. `left == right`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + left, right = left.align( + right, join="outer", axis=axis, level=level, copy=False + ) + right = _maybe_align_series_as_frame(left, right, axis) + + return left, right + + +def should_reindex_frame_op( + left: DataFrame, right, op, axis, default_axis, fill_value, level +) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + assert isinstance(left, ABCDataFrame) + + if op is operator.pow or op is roperator.rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, ABCDataFrame): + return False + + if fill_value is None and level is None and axis is default_axis: + # TODO: any other cases we should handle here? + + # Intersection is always unique so we have to check the unique columns + left_uniques = left.columns.unique() + right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) + if len(cols) and not ( + len(cols) == len(left_uniques) and len(cols) == len(right_uniques) + ): + # TODO: is there a shortcut available when len(cols) == 0? + return True + + return False + + +def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame: + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + # GH#31623, only operate on shared columns + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) + + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using align_method_FRAME + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result + + +def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: int): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]": + # We can losslessly+cheaply cast to ndarray + rvalues = np.asarray(rvalues) + else: + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, frame.shape) + return type(frame)(rvalues, index=frame.index, columns=frame.columns) + + +def flex_arith_method_FRAME(op): + op_name = op.__name__.strip("_") + default_axis = "columns" + + na_op = get_array_op(op) + doc = make_flex_doc(op_name, "dataframe") + + @Appender(doc) + def f(self, other, axis=default_axis, level=None, fill_value=None): + + if should_reindex_frame_op( + self, other, op, axis, default_axis, fill_value, level + ): + return frame_arith_method_with_reindex(self, other, op) + + if isinstance(other, ABCSeries) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") + + axis = self._get_axis_number(axis) if axis is not None else 1 + + other = maybe_prepare_scalar_for_op(other, self.shape) + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + + if isinstance(other, ABCDataFrame): + # Another DataFrame + new_data = self._combine_frame(other, na_op, fill_value) + + elif isinstance(other, ABCSeries): + new_data = self._dispatch_frame_op(other, op, axis=axis) + else: + # in this case we always have `np.ndim(other) == 0` + if fill_value is not None: + self = self.fillna(fill_value) + + new_data = self._dispatch_frame_op(other, op) + + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + +def flex_comp_method_FRAME(op): + op_name = op.__name__.strip("_") + default_axis = "columns" # because we are "flex" + + doc = _flex_comp_doc_FRAME.format( + op_name=op_name, desc=_op_descriptions[op_name]["desc"] + ) + + @Appender(doc) + def f(self, other, axis=default_axis, level=None): + axis = self._get_axis_number(axis) if axis is not None else 1 + + self, other = align_method_FRAME(self, other, axis, flex=True, level=level) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + +__all__ = [ + "add_flex_arithmetic_methods", + "align_method_FRAME", + "align_method_SERIES", + "ARITHMETIC_BINOPS", + "arithmetic_op", + "COMPARISON_BINOPS", + "comparison_op", + "comp_method_OBJECT_ARRAY", + "fill_binop", + "flex_arith_method_FRAME", + "flex_comp_method_FRAME", + "flex_method_SERIES", + "frame_arith_method_with_reindex", + "invalid_comparison", + "kleene_and", + "kleene_or", + "kleene_xor", + "logical_op", + "maybe_dispatch_ufunc_to_dunder_op", + "radd", + "rand_", + "rdiv", + "rdivmod", + "rfloordiv", + "rmod", + "rmul", + "ror_", + "rpow", + "rsub", + "rtruediv", + "rxor", + "should_reindex_frame_op", + "unpack_zerodim_and_defer", +] diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py new file mode 100644 index 00000000..6a1c586d --- /dev/null +++ b/pandas/core/ops/array_ops.py @@ -0,0 +1,526 @@ +""" +Functions for arithmetic and comparison operations on NumPy arrays and +ExtensionArrays. +""" +from __future__ import annotations + +import datetime +from functools import partial +import operator +from typing import Any + +import numpy as np + +from pandas._libs import ( + NaT, + Timedelta, + Timestamp, + lib, + ops as libops, +) +from pandas._libs.tslibs import BaseOffset +from pandas._typing import ( + ArrayLike, + Shape, +) + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + find_common_type, +) +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_integer_dtype, + is_list_like, + is_numeric_v_string_like, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +import pandas.core.computation.expressions as expressions +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.ops import ( + missing, + roperator, +) +from pandas.core.ops.dispatch import should_extension_dispatch +from pandas.core.ops.invalid import invalid_comparison + + +def comp_method_OBJECT_ARRAY(op, x, y): + if isinstance(y, list): + y = construct_1d_object_array_from_listlike(y) + + if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + if not is_object_dtype(y.dtype): + y = y.astype(np.object_) + + if isinstance(y, (ABCSeries, ABCIndex)): + y = y._values + + if x.shape != y.shape: + raise ValueError("Shapes must match", x.shape, y.shape) + result = libops.vec_compare(x.ravel(), y.ravel(), op) + else: + result = libops.scalar_compare(x.ravel(), y, op) + return result.reshape(x.shape) + + +def _masked_arith_op(x: np.ndarray, y, op): + """ + If the given arithmetic operation fails, attempt it again on + only the non-null elements of the input array(s). + + Parameters + ---------- + x : np.ndarray + y : np.ndarray, Series, Index + op : binary operator + """ + # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes + # the logic valid for both Series and DataFrame ops. + xrav = x.ravel() + assert isinstance(x, np.ndarray), type(x) + if isinstance(y, np.ndarray): + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + + if len(x) != len(y): + raise ValueError(x.shape, y.shape) + else: + ymask = notna(y) + + # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex + # we would get int64 dtype, see GH#19956 + yrav = y.ravel() + mask = notna(xrav) & ymask.ravel() + + # See GH#5284, GH#5035, GH#19448 for historical reference + if mask.any(): + result[mask] = op(xrav[mask], yrav[mask]) + + else: + if not is_scalar(y): + raise TypeError( + f"Cannot broadcast np.ndarray with operand of type { type(y) }" + ) + + # mask is only meaningful for x + result = np.empty(x.size, dtype=x.dtype) + mask = notna(xrav) + + # 1 ** np.nan is 1. So we have to unmask those. + if op is pow: + mask = np.where(x == 1, False, mask) + elif op is roperator.rpow: + mask = np.where(y == 1, False, mask) + + if mask.any(): + result[mask] = op(xrav[mask], y) + + np.putmask(result, ~mask, np.nan) + result = result.reshape(x.shape) # 2D compat + return result + + +def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False): + """ + Return the result of evaluating op on the passed in values. + + If native types are not compatible, try coercion to object dtype. + + Parameters + ---------- + left : np.ndarray + right : np.ndarray or scalar + Excludes DataFrame, Series, Index, ExtensionArray. + is_cmp : bool, default False + If this a comparison operation. + + Returns + ------- + array-like + + Raises + ------ + TypeError : invalid operation + """ + if isinstance(right, str): + # can never use numexpr + func = op + else: + func = partial(expressions.evaluate, op) + + try: + result = func(left, right) + except TypeError: + if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)): + # For object dtype, fallback to a masked operation (only operating + # on the non-missing values) + # Don't do this for comparisons, as that will handle complex numbers + # incorrectly, see GH#32047 + result = _masked_arith_op(left, right, op) + else: + raise + + if is_cmp and (is_scalar(result) or result is NotImplemented): + # numpy returned a scalar instead of operating element-wise + # e.g. numeric array vs str + # TODO: can remove this after dropping some future numpy version? + return invalid_comparison(left, right, op) + + return missing.dispatch_fill_zeros(op, left, right, result) + + +def arithmetic_op(left: ArrayLike, right: Any, op): + """ + Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... + + Note: the caller is responsible for ensuring that numpy warnings are + suppressed (with np.errstate(all="ignore")) if needed. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame or Index. Series is *not* excluded. + op : {operator.add, operator.sub, ...} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarray or ExtensionArray + Or a 2-tuple of these in the case of divmod or rdivmod. + """ + # NB: We assume that extract_array and ensure_wrapped_if_datetimelike + # have already been called on `left` and `right`, + # and `maybe_prepare_scalar_for_op` has already been called on `right` + # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy + # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) + + if ( + should_extension_dispatch(left, right) + or isinstance(right, (Timedelta, BaseOffset, Timestamp)) + or right is NaT + ): + # Timedelta/Timestamp and other custom scalars are included in the check + # because numexpr will fail on it, see GH#31457 + res_values = op(left, right) + else: + # TODO we should handle EAs consistently and move this check before the if/else + # (https://github.com/pandas-dev/pandas/issues/41165) + _bool_arith_check(op, left, right) + + # error: Argument 1 to "_na_arithmetic_op" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]" + res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type] + + return res_values + + +def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: + """ + Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. + + Note: the caller is responsible for ensuring that numpy warnings are + suppressed (with np.errstate(all="ignore")) if needed. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} + + Returns + ------- + ndarray or ExtensionArray + """ + # NB: We assume extract_array has already been called on left and right + lvalues = ensure_wrapped_if_datetimelike(left) + rvalues = ensure_wrapped_if_datetimelike(right) + + rvalues = lib.item_from_zerodim(rvalues) + if isinstance(rvalues, list): + # We don't catch tuple here bc we may be comparing e.g. MultiIndex + # to a tuple that represents a single entry, see test_compare_tuple_strs + rvalues = np.asarray(rvalues) + + if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): + # TODO: make this treatment consistent across ops and classes. + # We are not catching all listlikes here (e.g. frozenset, tuple) + # The ambiguous case is object-dtype. See GH#27803 + if len(lvalues) != len(rvalues): + raise ValueError( + "Lengths must match to compare", lvalues.shape, rvalues.shape + ) + + if should_extension_dispatch(lvalues, rvalues) or ( + (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) + and not is_object_dtype(lvalues.dtype) + ): + # Call the method on lvalues + res_values = op(lvalues, rvalues) + + elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA? + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(lvalues.shape, dtype=bool) + else: + res_values = np.zeros(lvalues.shape, dtype=bool) + + elif is_numeric_v_string_like(lvalues, rvalues): + # GH#36377 going through the numexpr path would incorrectly raise + return invalid_comparison(lvalues, rvalues, op) + + elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): + res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) + + else: + res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) + + return res_values + + +def na_logical_op(x: np.ndarray, y, op): + try: + # For exposition, write: + # yarr = isinstance(y, np.ndarray) + # yint = is_integer(y) or (yarr and y.dtype.kind == "i") + # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") + # xint = x.dtype.kind == "i" + # xbool = x.dtype.kind == "b" + # Then Cases where this goes through without raising include: + # (xint or xbool) and (yint or bool) + result = op(x, y) + except TypeError: + if isinstance(y, np.ndarray): + # bool-bool dtype operations should be OK, should not get here + assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) + x = ensure_object(x) + y = ensure_object(y) + result = libops.vec_binop(x.ravel(), y.ravel(), op) + else: + # let null fall thru + assert lib.is_scalar(y) + if not isna(y): + y = bool(y) + try: + result = libops.scalar_binop(x, y, op) + except ( + TypeError, + ValueError, + AttributeError, + OverflowError, + NotImplementedError, + ) as err: + typ = type(y).__name__ + raise TypeError( + f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " + f"and scalar of type [{typ}]" + ) from err + + return result.reshape(x.shape) + + +def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: + """ + Evaluate a logical operation `|`, `&`, or `^`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.and_, operator.or_, operator.xor} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarray or ExtensionArray + """ + fill_int = lambda x: x + + def fill_bool(x, left=None): + # if `left` is specifically not-boolean, we do not cast to bool + if x.dtype.kind in ["c", "f", "O"]: + # dtypes that can hold NA + mask = isna(x) + if mask.any(): + x = x.astype(object) + x[mask] = False + + if left is None or is_bool_dtype(left.dtype): + x = x.astype(bool) + return x + + is_self_int_dtype = is_integer_dtype(left.dtype) + + right = lib.item_from_zerodim(right) + if is_list_like(right) and not hasattr(right, "dtype"): + # e.g. list, tuple + right = construct_1d_object_array_from_listlike(right) + + # NB: We assume extract_array has already been called on left and right + lvalues = ensure_wrapped_if_datetimelike(left) + rvalues = right + + if should_extension_dispatch(lvalues, rvalues): + # Call the method on lvalues + res_values = op(lvalues, rvalues) + + else: + if isinstance(rvalues, np.ndarray): + is_other_int_dtype = is_integer_dtype(rvalues.dtype) + rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) + + else: + # i.e. scalar + is_other_int_dtype = lib.is_integer(rvalues) + + # For int vs int `^`, `|`, `&` are bitwise operators and return + # integer dtypes. Otherwise these are boolean ops + filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool + + res_values = na_logical_op(lvalues, rvalues, op) + # error: Cannot call function of unknown type + res_values = filler(res_values) # type: ignore[operator] + + return res_values + + +def get_array_op(op): + """ + Return a binary array operation corresponding to the given operator op. + + Parameters + ---------- + op : function + Binary operator from operator or roperator module. + + Returns + ------- + functools.partial + """ + if isinstance(op, partial): + # We get here via dispatch_to_series in DataFrame case + # e.g. test_rolling_consistency_var_debiasing_factors + return op + + op_name = op.__name__.strip("_").lstrip("r") + if op_name == "arith_op": + # Reached via DataFrame._combine_frame i.e. flex methods + # e.g. test_df_add_flex_filled_mixed_dtypes + return op + + if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: + return partial(comparison_op, op=op) + elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: + return partial(logical_op, op=op) + elif op_name in { + "add", + "sub", + "mul", + "truediv", + "floordiv", + "mod", + "divmod", + "pow", + }: + return partial(arithmetic_op, op=op) + else: + raise NotImplementedError(op_name) + + +def maybe_prepare_scalar_for_op(obj, shape: Shape): + """ + Cast non-pandas objects to pandas types to unify behavior of arithmetic + and comparison operations. + + Parameters + ---------- + obj: object + shape : tuple[int] + + Returns + ------- + out : object + + Notes + ----- + Be careful to call this *after* determining the `name` attribute to be + attached to the result of the arithmetic operation. + """ + if type(obj) is datetime.timedelta: + # GH#22390 cast up to Timedelta to rely on Timedelta + # implementation; otherwise operation against numeric-dtype + # raises TypeError + return Timedelta(obj) + elif type(obj) is datetime.datetime: + # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above + return Timestamp(obj) + elif isinstance(obj, np.datetime64): + # GH#28080 numpy casts integer-dtype to datetime64 when doing + # array[int] + datetime64, which we do not allow + if isna(obj): + from pandas.core.arrays import DatetimeArray + + # Avoid possible ambiguities with pd.NaT + obj = obj.astype("datetime64[ns]") + right = np.broadcast_to(obj, shape) + return DatetimeArray(right) + + return Timestamp(obj) + + elif isinstance(obj, np.timedelta64): + if isna(obj): + from pandas.core.arrays import TimedeltaArray + + # wrapping timedelta64("NaT") in Timedelta returns NaT, + # which would incorrectly be treated as a datetime-NaT, so + # we broadcast and wrap in a TimedeltaArray + obj = obj.astype("timedelta64[ns]") + right = np.broadcast_to(obj, shape) + return TimedeltaArray(right) + + # In particular non-nanosecond timedelta64 needs to be cast to + # nanoseconds, or else we get undesired behavior like + # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') + return Timedelta(obj) + + return obj + + +_BOOL_OP_NOT_ALLOWED = { + operator.truediv, + roperator.rtruediv, + operator.floordiv, + roperator.rfloordiv, + operator.pow, + roperator.rpow, +} + + +def _bool_arith_check(op, a, b): + """ + In contrast to numpy, pandas raises an error for certain operations + with booleans. + """ + if op in _BOOL_OP_NOT_ALLOWED: + if is_bool_dtype(a.dtype) and ( + is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) + ): + op_name = op.__name__.strip("_").lstrip("r") + raise NotImplementedError( + f"operator '{op_name}' not implemented for bool dtypes" + ) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py new file mode 100644 index 00000000..f0e6aa37 --- /dev/null +++ b/pandas/core/ops/common.py @@ -0,0 +1,142 @@ +""" +Boilerplate functions used in defining binary operations. +""" +from __future__ import annotations + +from functools import wraps +from typing import Callable + +from pandas._libs.lib import item_from_zerodim +from pandas._libs.missing import is_matching_na +from pandas._typing import F + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + + +def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]: + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Parameters + ---------- + name : str + + Returns + ------- + decorator + """ + + def wrapper(method: F) -> F: + return _unpack_zerodim_and_defer(method, name) + + return wrapper + + +def _unpack_zerodim_and_defer(method, name: str): + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Ensure method returns NotImplemented when operating against "senior" + classes. Ensure zero-dimensional ndarrays are always unpacked. + + Parameters + ---------- + method : binary method + name : str + + Returns + ------- + method + """ + is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"} + + @wraps(method) + def new_method(self, other): + + if is_cmp and isinstance(self, ABCIndex) and isinstance(other, ABCSeries): + # For comparison ops, Index does *not* defer to Series + pass + else: + for cls in [ABCDataFrame, ABCSeries, ABCIndex]: + if isinstance(self, cls): + break + if isinstance(other, cls): + return NotImplemented + + other = item_from_zerodim(other) + + return method(self, other) + + return new_method + + +def get_op_result_name(left, right): + """ + Find the appropriate name to pin to an operation result. This result + should always be either an Index or a Series. + + Parameters + ---------- + left : {Series, Index} + right : object + + Returns + ------- + name : object + Usually a string + """ + if isinstance(right, (ABCSeries, ABCIndex)): + name = _maybe_match_name(left, right) + else: + name = left.name + return name + + +def _maybe_match_name(a, b): + """ + Try to find a name to attach to the result of an operation between + a and b. If only one of these has a `name` attribute, return that + name. Otherwise return a consensus name if they match or None if + they have different names. + + Parameters + ---------- + a : object + b : object + + Returns + ------- + name : str or None + + See Also + -------- + pandas.core.common.consensus_name_attr + """ + a_has = hasattr(a, "name") + b_has = hasattr(b, "name") + if a_has and b_has: + try: + if a.name == b.name: + return a.name + elif is_matching_na(a.name, b.name): + # e.g. both are np.nan + return a.name + else: + return None + except TypeError: + # pd.NA + if is_matching_na(a.name, b.name): + return a.name + return None + except ValueError: + # e.g. np.int64(1) vs (np.int64(1), np.int64(2)) + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py new file mode 100644 index 00000000..2f500703 --- /dev/null +++ b/pandas/core/ops/dispatch.py @@ -0,0 +1,26 @@ +""" +Functions for defining unary operations. +""" +from __future__ import annotations + +from typing import Any + +from pandas._typing import ArrayLike + +from pandas.core.dtypes.generic import ABCExtensionArray + + +def should_extension_dispatch(left: ArrayLike, right: Any) -> bool: + """ + Identify cases where Series operation should dispatch to ExtensionArray method. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + + Returns + ------- + bool + """ + return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray) diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py new file mode 100644 index 00000000..9c3158b3 --- /dev/null +++ b/pandas/core/ops/docstrings.py @@ -0,0 +1,765 @@ +""" +Templating for ops docstrings +""" +from __future__ import annotations + + +def make_flex_doc(op_name: str, typ: str) -> str: + """ + Make the appropriate substitutions for the given operation and class-typ + into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring + to attach to a generated method. + + Parameters + ---------- + op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} + typ : str {series, 'dataframe']} + + Returns + ------- + doc : str + """ + op_name = op_name.replace("__", "") + op_desc = _op_descriptions[op_name] + + op_desc_op = op_desc["op"] + assert op_desc_op is not None # for mypy + if op_name.startswith("r"): + equiv = "other " + op_desc_op + " " + typ + elif op_name == "divmod": + equiv = f"{op_name}({typ}, other)" + else: + equiv = typ + " " + op_desc_op + " other" + + if typ == "series": + base_doc = _flex_doc_SERIES + if op_desc["reverse"]: + base_doc += _see_also_reverse_SERIES.format( + reverse=op_desc["reverse"], see_also_desc=op_desc["see_also_desc"] + ) + doc_no_examples = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + series_returns=op_desc["series_returns"], + ) + ser_example = op_desc["series_examples"] + if ser_example: + doc = doc_no_examples + ser_example + else: + doc = doc_no_examples + elif typ == "dataframe": + base_doc = _flex_doc_FRAME + doc = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + reverse=op_desc["reverse"], + ) + else: + raise AssertionError("Invalid typ argument.") + return doc + + +_common_examples_algebra_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64""" + +_common_examples_comparison_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan, 1], index=['a', 'b', 'c', 'd', 'e']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +e 1.0 +dtype: float64 +>>> b = pd.Series([0, 1, 2, np.nan, 1], index=['a', 'b', 'c', 'd', 'f']) +>>> b +a 0.0 +b 1.0 +c 2.0 +d NaN +f 1.0 +dtype: float64""" + +_add_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.add(b, fill_value=0) +a 2.0 +b 1.0 +c 1.0 +d 1.0 +e NaN +dtype: float64 +""" +) + +_sub_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.subtract(b, fill_value=0) +a 0.0 +b 1.0 +c 1.0 +d -1.0 +e NaN +dtype: float64 +""" +) + +_mul_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.multiply(b, fill_value=0) +a 1.0 +b 0.0 +c 0.0 +d 0.0 +e NaN +dtype: float64 +""" +) + +_div_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.divide(b, fill_value=0) +a 1.0 +b inf +c inf +d 0.0 +e NaN +dtype: float64 +""" +) + +_floordiv_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.floordiv(b, fill_value=0) +a 1.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +) + +_divmod_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.divmod(b, fill_value=0) +(a 1.0 + b NaN + c NaN + d 0.0 + e NaN + dtype: float64, + a 0.0 + b NaN + c NaN + d 0.0 + e NaN + dtype: float64) +""" +) + +_mod_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.mod(b, fill_value=0) +a 0.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +) +_pow_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.pow(b, fill_value=0) +a 1.0 +b 1.0 +c 1.0 +d 0.0 +e NaN +dtype: float64 +""" +) + +_ne_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.ne(b, fill_value=0) +a False +b True +c True +d True +e True +dtype: bool +""" +) + +_eq_example_SERIES = ( + _common_examples_algebra_SERIES + + """ +>>> a.eq(b, fill_value=0) +a True +b False +c False +d False +e False +dtype: bool +""" +) + +_lt_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.lt(b, fill_value=0) +a False +b False +c True +d False +e False +f True +dtype: bool +""" +) + +_le_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.le(b, fill_value=0) +a False +b True +c True +d False +e False +f True +dtype: bool +""" +) + +_gt_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.gt(b, fill_value=0) +a True +b False +c False +d False +e True +f False +dtype: bool +""" +) + +_ge_example_SERIES = ( + _common_examples_comparison_SERIES + + """ +>>> a.ge(b, fill_value=0) +a True +b True +c False +d False +e True +f False +dtype: bool +""" +) + +_returns_series = """Series\n The result of the operation.""" + +_returns_tuple = """2-Tuple of Series\n The result of the operation.""" + +_op_descriptions: dict[str, dict[str, str | None]] = { + # Arithmetic Operators + "add": { + "op": "+", + "desc": "Addition", + "reverse": "radd", + "series_examples": _add_example_SERIES, + "series_returns": _returns_series, + }, + "sub": { + "op": "-", + "desc": "Subtraction", + "reverse": "rsub", + "series_examples": _sub_example_SERIES, + "series_returns": _returns_series, + }, + "mul": { + "op": "*", + "desc": "Multiplication", + "reverse": "rmul", + "series_examples": _mul_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "mod": { + "op": "%", + "desc": "Modulo", + "reverse": "rmod", + "series_examples": _mod_example_SERIES, + "series_returns": _returns_series, + }, + "pow": { + "op": "**", + "desc": "Exponential power", + "reverse": "rpow", + "series_examples": _pow_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "truediv": { + "op": "/", + "desc": "Floating division", + "reverse": "rtruediv", + "series_examples": _div_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "floordiv": { + "op": "//", + "desc": "Integer division", + "reverse": "rfloordiv", + "series_examples": _floordiv_example_SERIES, + "series_returns": _returns_series, + "df_examples": None, + }, + "divmod": { + "op": "divmod", + "desc": "Integer division and modulo", + "reverse": "rdivmod", + "series_examples": _divmod_example_SERIES, + "series_returns": _returns_tuple, + "df_examples": None, + }, + # Comparison Operators + "eq": { + "op": "==", + "desc": "Equal to", + "reverse": None, + "series_examples": _eq_example_SERIES, + "series_returns": _returns_series, + }, + "ne": { + "op": "!=", + "desc": "Not equal to", + "reverse": None, + "series_examples": _ne_example_SERIES, + "series_returns": _returns_series, + }, + "lt": { + "op": "<", + "desc": "Less than", + "reverse": None, + "series_examples": _lt_example_SERIES, + "series_returns": _returns_series, + }, + "le": { + "op": "<=", + "desc": "Less than or equal to", + "reverse": None, + "series_examples": _le_example_SERIES, + "series_returns": _returns_series, + }, + "gt": { + "op": ">", + "desc": "Greater than", + "reverse": None, + "series_examples": _gt_example_SERIES, + "series_returns": _returns_series, + }, + "ge": { + "op": ">=", + "desc": "Greater than or equal to", + "reverse": None, + "series_examples": _ge_example_SERIES, + "series_returns": _returns_series, + }, +} + +_py_num_ref = """see + `Python documentation + `_ + for more details""" +_op_names = list(_op_descriptions.keys()) +for key in _op_names: + reverse_op = _op_descriptions[key]["reverse"] + if reverse_op is not None: + _op_descriptions[reverse_op] = _op_descriptions[key].copy() + _op_descriptions[reverse_op]["reverse"] = key + _op_descriptions[key][ + "see_also_desc" + ] = f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}" + _op_descriptions[reverse_op][ + "see_also_desc" + ] = f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}" + +_flex_doc_SERIES = """ +Return {desc} of series and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in either one of the inputs. + +Parameters +---------- +other : Series or scalar value +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. +fill_value : None or float value, default None (NaN) + Fill existing missing (NaN) values, and any new element needed for + successful Series alignment, with this value before computation. + If data in both corresponding Series locations is missing + the result of filling (at that location) will be missing. +axis : {{0 or 'index'}} + Unused. Parameter needed for compatibility with DataFrame. + +Returns +------- +{series_returns} +""" + +_see_also_reverse_SERIES = """ +See Also +-------- +Series.{reverse} : {see_also_desc}. +""" + +_flex_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value +for missing data in one of the inputs. With reverse version, `{reverse}`. + +Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to +arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + +Parameters +---------- +other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. +level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. +fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + +Returns +------- +DataFrame + Result of the arithmetic operation. + +See Also +-------- +DataFrame.add : Add DataFrames. +DataFrame.sub : Subtract DataFrames. +DataFrame.mul : Multiply DataFrames. +DataFrame.div : Divide DataFrames (float division). +DataFrame.truediv : Divide DataFrames (float division). +DataFrame.floordiv : Divide DataFrames (integer division). +DataFrame.mod : Calculate modulo (remainder after division). +DataFrame.pow : Calculate exponential power. + +Notes +----- +Mismatched indices will be unioned together. + +Examples +-------- +>>> df = pd.DataFrame({{'angles': [0, 3, 4], +... 'degrees': [360, 180, 360]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> df + angles degrees +circle 0 360 +triangle 3 180 +rectangle 4 360 + +Add a scalar with operator version which return the same +results. + +>>> df + 1 + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +>>> df.add(1) + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +Divide by constant with reverse version. + +>>> df.div(10) + angles degrees +circle 0.0 36.0 +triangle 0.3 18.0 +rectangle 0.4 36.0 + +>>> df.rdiv(10) + angles degrees +circle inf 0.027778 +triangle 3.333333 0.055556 +rectangle 2.500000 0.027778 + +Subtract a list and Series by axis with operator version. + +>>> df - [1, 2] + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub([1, 2], axis='columns') + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']), +... axis='index') + angles degrees +circle -1 359 +triangle 2 179 +rectangle 3 359 + +Multiply a dictionary by axis. + +>>> df.mul({{'angles': 0, 'degrees': 2}}) + angles degrees +circle 0 720 +triangle 0 360 +rectangle 0 720 + +>>> df.mul({{'circle': 0, 'triangle': 2, 'rectangle': 3}}, axis='index') + angles degrees +circle 0 0 +triangle 6 360 +rectangle 12 1080 + +Multiply a DataFrame of different shape with operator version. + +>>> other = pd.DataFrame({{'angles': [0, 3, 4]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> other + angles +circle 0 +triangle 3 +rectangle 4 + +>>> df * other + angles degrees +circle 0 NaN +triangle 9 NaN +rectangle 16 NaN + +>>> df.mul(other, fill_value=0) + angles degrees +circle 0 0.0 +triangle 9 0.0 +rectangle 16 0.0 + +Divide by a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6], +... 'degrees': [360, 180, 360, 360, 540, 720]}}, +... index=[['A', 'A', 'A', 'B', 'B', 'B'], +... ['circle', 'triangle', 'rectangle', +... 'square', 'pentagon', 'hexagon']]) +>>> df_multindex + angles degrees +A circle 0 360 + triangle 3 180 + rectangle 4 360 +B square 4 360 + pentagon 5 540 + hexagon 6 720 + +>>> df.div(df_multindex, level=1, fill_value=0) + angles degrees +A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 +B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 +""" + +_flex_comp_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison +operators. + +Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis +(rows or columns) and level for comparison. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). +level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + +Returns +------- +DataFrame of bool + Result of the comparison. + +See Also +-------- +DataFrame.eq : Compare DataFrames for equality elementwise. +DataFrame.ne : Compare DataFrames for inequality elementwise. +DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. +DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. +DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. +DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + +Notes +----- +Mismatched indices will be unioned together. +`NaN` values are considered different (i.e. `NaN` != `NaN`). + +Examples +-------- +>>> df = pd.DataFrame({{'cost': [250, 150, 100], +... 'revenue': [100, 250, 300]}}, +... index=['A', 'B', 'C']) +>>> df + cost revenue +A 250 100 +B 150 250 +C 100 300 + +Comparison with a scalar, using either the operator or method: + +>>> df == 100 + cost revenue +A False True +B False False +C True False + +>>> df.eq(100) + cost revenue +A False True +B False False +C True False + +When `other` is a :class:`Series`, the columns of a DataFrame are aligned +with the index of `other` and broadcast: + +>>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue +A True True +B True False +C False True + +Use the method to control the broadcast axis: + +>>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index') + cost revenue +A True False +B True True +C True True +D True True + +When comparing to an arbitrary sequence, the number of columns must +match the number elements in `other`: + +>>> df == [250, 100] + cost revenue +A True True +B False False +C False False + +Use the method to control the axis: + +>>> df.eq([250, 250, 100], axis='index') + cost revenue +A True False +B False True +C True False + +Compare to a DataFrame of different shape. + +>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}}, +... index=['A', 'B', 'C', 'D']) +>>> other + revenue +A 300 +B 250 +C 100 +D 150 + +>>> df.gt(other) + cost revenue +A False False +B False False +C False True +D False False + +Compare to a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220], +... 'revenue': [100, 250, 300, 200, 175, 225]}}, +... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'], +... ['A', 'B', 'C', 'A', 'B', 'C']]) +>>> df_multindex + cost revenue +Q1 A 250 100 + B 150 250 + C 100 300 +Q2 A 150 200 + B 300 175 + C 220 225 + +>>> df.le(df_multindex, level=1) + cost revenue +Q1 A True True + B True True + C True True +Q2 A False True + B True False + C True False +""" diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py new file mode 100644 index 00000000..eb27cf74 --- /dev/null +++ b/pandas/core/ops/invalid.py @@ -0,0 +1,58 @@ +""" +Templates for invalid operations. +""" +from __future__ import annotations + +import operator + +import numpy as np + + +def invalid_comparison(left, right, op) -> np.ndarray: + """ + If a comparison has mismatched types and is not necessarily meaningful, + follow python3 conventions by: + + - returning all-False for equality + - returning all-True for inequality + - raising TypeError otherwise + + Parameters + ---------- + left : array-like + right : scalar, array-like + op : operator.{eq, ne, lt, le, gt} + + Raises + ------ + TypeError : on inequality comparisons + """ + if op is operator.eq: + res_values = np.zeros(left.shape, dtype=bool) + elif op is operator.ne: + res_values = np.ones(left.shape, dtype=bool) + else: + typ = type(right).__name__ + raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}") + return res_values + + +def make_invalid_op(name: str): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + + def invalid_op(self, other=None): + typ = type(self).__name__ + raise TypeError(f"cannot perform {name} with this index type: {typ}") + + invalid_op.__name__ = name + return invalid_op diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py new file mode 100644 index 00000000..adc1f63c --- /dev/null +++ b/pandas/core/ops/mask_ops.py @@ -0,0 +1,189 @@ +""" +Ops for masked arrays. +""" +from __future__ import annotations + +import numpy as np + +from pandas._libs import ( + lib, + missing as libmissing, +) + + +def kleene_or( + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A | B == B | A + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + + raise_for_nan(right, method="or") + + if right is libmissing.NA: + result = left.copy() + else: + result = left | right + + if right_mask is not None: + # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = ( + (left_false & right_mask) + | (right_false & left_mask) + | (left_mask & right_mask) + ) + else: + if right is True: + mask = np.zeros_like(left_mask) + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False + mask = left_mask.copy() + + return result, mask + + +def kleene_xor( + left: bool | np.ndarray | libmissing.NAType, + right: bool | np.ndarray | libmissing.NAType, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A ^ B == B ^ A + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + + raise_for_nan(right, method="xor") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right + + if right_mask is None: + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() + else: + mask = left_mask | right_mask + + return result, mask + + +def kleene_and( + left: bool | libmissing.NAType | np.ndarray, + right: bool | libmissing.NAType | np.ndarray, + left_mask: np.ndarray | None, + right_mask: np.ndarray | None, +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since + # A & B == B & A + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + if not isinstance(left, np.ndarray): + raise TypeError("Either `left` or `right` need to be a np.ndarray.") + raise_for_nan(right, method="and") + + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + + if right_mask is None: + # Scalar `right` + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask + + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False + else: + # unmask where either left or right is False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) + + return result, mask + + +def raise_for_nan(value, method: str) -> None: + if lib.is_float(value) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py new file mode 100644 index 00000000..e8a93008 --- /dev/null +++ b/pandas/core/ops/methods.py @@ -0,0 +1,124 @@ +""" +Functions to generate methods and pin them to the appropriate classes. +""" +from __future__ import annotations + +import operator + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.ops import roperator + + +def _get_method_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining flex/special + arithmetic, boolean, and comparison operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_flex : function or None + comp_flex : function or None + """ + # TODO: make these non-runtime imports once the relevant functions + # are no longer in __init__ + from pandas.core.ops import ( + flex_arith_method_FRAME, + flex_comp_method_FRAME, + flex_method_SERIES, + ) + + if issubclass(cls, ABCSeries): + # Just Series + arith_flex = flex_method_SERIES + comp_flex = flex_method_SERIES + elif issubclass(cls, ABCDataFrame): + arith_flex = flex_arith_method_FRAME + comp_flex = flex_comp_method_FRAME + return arith_flex, comp_flex + + +def add_flex_arithmetic_methods(cls) -> None: + """ + Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) + to the class. + + Parameters + ---------- + cls : class + flex methods will be defined and pinned to this class + """ + flex_arith_method, flex_comp_method = _get_method_wrappers(cls) + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method) + new_methods.update( + { + "multiply": new_methods["mul"], + "subtract": new_methods["sub"], + "divide": new_methods["div"], + } + ) + # opt out of bool flex methods for now + assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_")) + + _add_methods(cls, new_methods=new_methods) + + +def _create_methods(cls, arith_method, comp_method): + # creates actual flex methods based upon arithmetic, and comp method + # constructors. + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series + + new_methods = {} + + new_methods.update( + { + "add": arith_method(operator.add), + "radd": arith_method(roperator.radd), + "sub": arith_method(operator.sub), + "mul": arith_method(operator.mul), + "truediv": arith_method(operator.truediv), + "floordiv": arith_method(operator.floordiv), + "mod": arith_method(operator.mod), + "pow": arith_method(operator.pow), + "rmul": arith_method(roperator.rmul), + "rsub": arith_method(roperator.rsub), + "rtruediv": arith_method(roperator.rtruediv), + "rfloordiv": arith_method(roperator.rfloordiv), + "rpow": arith_method(roperator.rpow), + "rmod": arith_method(roperator.rmod), + } + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(divmod) + new_methods["rdivmod"] = arith_method(roperator.rdivmod) + + new_methods.update( + { + "eq": comp_method(operator.eq), + "ne": comp_method(operator.ne), + "lt": comp_method(operator.lt), + "gt": comp_method(operator.gt), + "le": comp_method(operator.le), + "ge": comp_method(operator.ge), + } + ) + + new_methods = {k.strip("_"): v for k, v in new_methods.items()} + return new_methods + + +def _add_methods(cls, new_methods): + for name, method in new_methods.items(): + setattr(cls, name, method) diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py new file mode 100644 index 00000000..850ca44e --- /dev/null +++ b/pandas/core/ops/missing.py @@ -0,0 +1,183 @@ +""" +Missing data handling for arithmetic operations. + +In particular, pandas conventions regarding division by zero differ +from numpy in the following ways: + 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2) + gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for + the remaining pairs + (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN). + + pandas convention is to return [-inf, nan, inf] for all dtype + combinations. + + Note: the numpy behavior described here is py3-specific. + + 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2) + gives precisely the same results as the // operation. + + pandas convention is to return [nan, nan, nan] for all dtype + combinations. + + 3) divmod behavior consistent with 1) and 2). +""" +from __future__ import annotations + +import operator + +import numpy as np + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_scalar, +) + +from pandas.core.ops import roperator + + +def _fill_zeros(result, x, y): + """ + If this is a reversed op, then flip x,y + + If we have an integer value (or array in y) + and we have 0's, fill them with np.nan, + return the result. + + Mask the nan's from x. + """ + if is_float_dtype(result.dtype): + return result + + is_variable_type = hasattr(y, "dtype") + is_scalar_type = is_scalar(y) + + if not is_variable_type and not is_scalar_type: + return result + + if is_scalar_type: + y = np.array(y) + + if is_integer_dtype(y.dtype): + + ymask = y == 0 + if ymask.any(): + + # GH#7325, mask and nans must be broadcastable + mask = ymask & ~np.isnan(result) + + # GH#9308 doing ravel on result and mask can improve putmask perf, + # but can also make unwanted copies. + result = result.astype("float64", copy=False) + + np.putmask(result, mask, np.nan) + + return result + + +def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray: + """ + Set results of 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + + Returns + ------- + ndarray + The filled result. + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> x + array([ 1, 0, -1]) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x // y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + + if not hasattr(y, "dtype"): + # e.g. scalar, tuple + y = np.array(y) + if not hasattr(x, "dtype"): + # e.g scalar, tuple + x = np.array(x) + + zmask = y == 0 + + if zmask.any(): + + # Flip sign if necessary for -0.0 + zneg_mask = zmask & np.signbit(y) + zpos_mask = zmask & ~zneg_mask + + x_lt0 = x < 0 + x_gt0 = x > 0 + nan_mask = zmask & (x == 0) + with np.errstate(invalid="ignore"): + neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0) + posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0) + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype("float64", copy=False) + + result[nan_mask] = np.nan + result[posinf_mask] = np.inf + result[neginf_mask] = -np.inf + + return result + + +def dispatch_fill_zeros(op, left, right, result): + """ + Call _fill_zeros with the appropriate fill value depending on the operation, + with special logic for divmod and rdivmod. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (np.ndarray for non-reversed ops) + right : object (np.ndarray for reversed ops) + result : ndarray + + Returns + ------- + result : np.ndarray + + Notes + ----- + For divmod and rdivmod, the `result` parameter and returned `result` + is a 2-tuple of ndarray objects. + """ + if op is divmod: + result = ( + mask_zero_div_zero(left, right, result[0]), + _fill_zeros(result[1], left, right), + ) + elif op is roperator.rdivmod: + result = ( + mask_zero_div_zero(right, left, result[0]), + _fill_zeros(result[1], right, left), + ) + elif op is operator.floordiv: + # Note: no need to do this for truediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(left, right, result) + elif op is roperator.rfloordiv: + # Note: no need to do this for rtruediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(right, left, result) + elif op is operator.mod: + result = _fill_zeros(result, left, right) + elif op is roperator.rmod: + result = _fill_zeros(result, right, left) + return result diff --git a/pandas/core/resample.py b/pandas/core/resample.py new file mode 100644 index 00000000..0ac43b77 --- /dev/null +++ b/pandas/core/resample.py @@ -0,0 +1,2265 @@ +from __future__ import annotations + +import copy +from datetime import timedelta +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Literal, + final, + no_type_check, +) +import warnings + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + BaseOffset, + IncompatibleFrequency, + NaT, + Period, + Timedelta, + Timestamp, + to_offset, +) +from pandas._typing import ( + IndexLabel, + NDFrameT, + T, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import ( + AbstractMethodError, + DataError, +) +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +import pandas.core.algorithms as algos +from pandas.core.apply import ResamplerWindowApply +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.generic import ( + NDFrame, + _shared_docs, +) +from pandas.core.groupby.generic import SeriesGroupBy +from pandas.core.groupby.groupby import ( + BaseGroupBy, + GroupBy, + _pipe_template, + get_groupby, +) +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.period import ( + PeriodIndex, + period_range, +) +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) + +from pandas.tseries.frequencies import ( + is_subperiod, + is_superperiod, +) +from pandas.tseries.offsets import ( + DateOffset, + Day, + Nano, + Tick, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + +_shared_docs_kwargs: dict[str, str] = {} + + +class Resampler(BaseGroupBy, PandasObject): + """ + Class for resampling datetimelike data, a groupby-like operation. + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.resample(...) to use Resampler. + + Parameters + ---------- + obj : Series or DataFrame + groupby : TimeGrouper + axis : int, default 0 + kind : str or None + 'period', 'timestamp' to override default index treatment + + Returns + ------- + a Resampler of the appropriate type + + Notes + ----- + After resampling, see aggregate, apply, and transform functions. + """ + + grouper: BinGrouper + exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat + + # to the groupby descriptor + _attributes = [ + "freq", + "axis", + "closed", + "label", + "convention", + "loffset", + "kind", + "origin", + "offset", + ] + + def __init__( + self, + obj: DataFrame | Series, + groupby: TimeGrouper, + axis: int = 0, + kind=None, + *, + group_keys: bool | lib.NoDefault = lib.no_default, + selection=None, + **kwargs, + ) -> None: + self.groupby = groupby + self.keys = None + self.sort = True + self.axis = axis + self.kind = kind + self.squeeze = False + self.group_keys = group_keys + self.as_index = True + + self.groupby._set_grouper(self._convert_obj(obj), sort=True) + self.binner, self.grouper = self._get_binner() + self._selection = selection + if self.groupby.key is not None: + self.exclusions = frozenset([self.groupby.key]) + else: + self.exclusions = frozenset() + + @final + def _shallow_copy(self, obj, **kwargs): + """ + return a new object with the replacement attributes + """ + if isinstance(obj, self._constructor): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self, attr) + return self._constructor(obj, **kwargs) + + def __str__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + attrs = ( + f"{k}={getattr(self.groupby, k)}" + for k in self._attributes + if getattr(self.groupby, k, None) is not None + ) + return f"{type(self).__name__} [{', '.join(attrs)}]" + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self._attributes: + return getattr(self.groupby, attr) + if attr in self.obj: + return self[attr] + + return object.__getattribute__(self, attr) + + # error: Signature of "obj" incompatible with supertype "BaseGroupBy" + @property + def obj(self) -> NDFrame: # type: ignore[override] + # error: Incompatible return value type (got "Optional[Any]", + # expected "NDFrameT") + return self.groupby.obj # type: ignore[return-value] + + @property + def ax(self): + # we can infer that this is a PeriodIndex/DatetimeIndex/TimedeltaIndex, + # but skipping annotating bc the overrides overwhelming + return self.groupby.ax + + @property + def _from_selection(self) -> bool: + """ + Is the resampling from a DataFrame column or MultiIndex level. + """ + # upsampling and PeriodIndex resampling do not work + # with selection, this state used to catch and raise an error + return self.groupby is not None and ( + self.groupby.key is not None or self.groupby.level is not None + ) + + def _convert_obj(self, obj: NDFrameT) -> NDFrameT: + """ + Provide any conversions for the object in order to correctly handle. + + Parameters + ---------- + obj : Series or DataFrame + + Returns + ------- + Series or DataFrame + """ + return obj._consolidate() + + def _get_binner_for_time(self): + raise AbstractMethodError(self) + + @final + def _get_binner(self): + """ + Create the BinGrouper, assume that self.set_grouper(obj) + has already been called. + """ + binner, bins, binlabels = self._get_binner_for_time() + assert len(bins) == len(binlabels) + bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer) + return binner, bin_grouper + + @Substitution( + klass="Resampler", + examples=""" + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each 2-day period's maximum and minimum + value in one pass, you can do + + >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 1 + 2012-08-04 1""", + ) + @Appender(_pipe_template) + def pipe( + self, + func: Callable[..., T] | tuple[Callable[..., T], str], + *args, + **kwargs, + ) -> T: + return super().pipe(func, *args, **kwargs) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + DataFrame.groupby.aggregate : Aggregate using callable, string, dict, + or list of string/callables. + DataFrame.resample.transform : Transforms the Series on each group + based on the given function. + DataFrame.aggregate: Aggregate using one or more + operations over the specified axis. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4, 5], + ... index=pd.date_range('20130101', periods=5, freq='s')) + >>> s + 2013-01-01 00:00:00 1 + 2013-01-01 00:00:01 2 + 2013-01-01 00:00:02 3 + 2013-01-01 00:00:03 4 + 2013-01-01 00:00:04 5 + Freq: S, dtype: int64 + + >>> r = s.resample('2s') + + >>> r.agg(np.sum) + 2013-01-01 00:00:00 3 + 2013-01-01 00:00:02 7 + 2013-01-01 00:00:04 5 + Freq: 2S, dtype: int64 + + >>> r.agg(['sum', 'mean', 'max']) + sum mean max + 2013-01-01 00:00:00 3 1.5 2 + 2013-01-01 00:00:02 7 3.5 4 + 2013-01-01 00:00:04 5 5.0 5 + + >>> r.agg({'result': lambda x: x.mean() / x.std(), + ... 'total': np.sum}) + result total + 2013-01-01 00:00:00 2.121320 3 + 2013-01-01 00:00:02 4.949747 7 + 2013-01-01 00:00:04 NaN 5 + + >>> r.agg(average="mean", total="sum") + average total + 2013-01-01 00:00:00 1.5 3 + 2013-01-01 00:00:02 3.5 7 + 2013-01-01 00:00:04 5.0 5 + """ + ) + + @doc( + _shared_docs["aggregate"], + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + klass="DataFrame", + axis="", + ) + def aggregate(self, func=None, *args, **kwargs): + + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + how = func + result = self._groupby_and_aggregate(how, *args, **kwargs) + + result = self._apply_loffset(result) + return result + + agg = aggregate + apply = aggregate + + def transform(self, arg, *args, **kwargs): + """ + Call function producing a like-indexed Series on each group. + + Return a Series with the transformed values. + + Parameters + ---------- + arg : function + To apply to each group. Should return a Series with the same index. + + Returns + ------- + transformed : Series + + Examples + -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> resampled = s.resample('15min') + >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) + 2018-01-01 00:00:00 NaN + 2018-01-01 01:00:00 NaN + Freq: H, dtype: float64 + """ + return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs) + + def _downsample(self, f, **kwargs): + raise AbstractMethodError(self) + + def _upsample(self, f, limit=None, fill_value=None): + raise AbstractMethodError(self) + + def _gotitem(self, key, ndim: int, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + grouper = self.grouper + if subset is None: + subset = self.obj + grouped = get_groupby( + subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys + ) + + # try the key selection + try: + return grouped[key] + except KeyError: + return grouped + + def _groupby_and_aggregate(self, how, *args, **kwargs): + """ + Re-evaluate the obj with a groupby aggregation. + """ + grouper = self.grouper + + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + # Excludes `on` column when provided + obj = self._obj_with_exclusions + grouped = get_groupby( + obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys + ) + + try: + if isinstance(obj, ABCDataFrame) and callable(how): + # Check if the function is reducing or not. + result = grouped._aggregate_item_by_item(how, *args, **kwargs) + else: + result = grouped.aggregate(how, *args, **kwargs) + except DataError: + # got TypeErrors on aggregation + result = grouped.apply(how, *args, **kwargs) + except (AttributeError, KeyError): + # we have a non-reducing function; try to evaluate + # alternatively we want to evaluate only a column of the input + + # test_apply_to_one_column_of_df the function being applied references + # a DataFrame column, but aggregate_item_by_item operates column-wise + # on Series, raising AttributeError or KeyError + # (depending on whether the column lookup uses getattr/__getitem__) + result = grouped.apply(how, *args, **kwargs) + + except ValueError as err: + if "Must produce aggregated value" in str(err): + # raised in _aggregate_named + # see test_apply_without_aggregation, test_apply_with_mutated_index + pass + else: + raise + + # we have a non-reducing function + # try to evaluate + result = grouped.apply(how, *args, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _apply_loffset(self, result): + """ + If loffset is set, offset the result index. + + This is NOT an idempotent routine, it will be applied + exactly once to the result. + + Parameters + ---------- + result : Series or DataFrame + the result of resample + """ + # error: Cannot determine type of 'loffset' + needs_offset = ( + isinstance( + self.loffset, # type: ignore[has-type] + (DateOffset, timedelta, np.timedelta64), + ) + and isinstance(result.index, DatetimeIndex) + and len(result.index) > 0 + ) + + if needs_offset: + # error: Cannot determine type of 'loffset' + result.index = result.index + self.loffset # type: ignore[has-type] + + self.loffset = None + return result + + def _get_resampler_for_grouping(self, groupby, key=None): + """ + Return the correct class for resampling with groupby. + """ + return self._resampler_for_grouping(self, groupby=groupby, key=key) + + def _wrap_result(self, result): + """ + Potentially wrap any results. + """ + if isinstance(result, ABCSeries) and self._selection is not None: + result.name = self._selection + + if isinstance(result, ABCSeries) and result.empty: + obj = self.obj + # When index is all NaT, result is empty but index is not + result.index = _asfreq_compat(obj.index[:0], freq=self.freq) + result.name = getattr(obj, "name", None) + + return result + + def ffill(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + An upsampled Series. + + See Also + -------- + Series.fillna: Fill NA/NaN values using the specified method. + DataFrame.fillna: Fill NA/NaN values using the specified method. + """ + return self._upsample("ffill", limit=limit) + + def pad(self, limit=None): + """ + Forward fill the values. + + .. deprecated:: 1.4 + Use ffill instead. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + An upsampled Series. + """ + warnings.warn( + "pad is deprecated and will be removed in a future version. " + "Use ffill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.ffill(limit=limit) + + def nearest(self, limit=None): + """ + Resample by using the nearest value. + + When resampling data, missing values may appear (e.g., when the + resampling frequency is higher than the original frequency). + The `nearest` method will replace ``NaN`` values that appeared in + the resampled data with the value from the nearest member of the + sequence, based on the index value. + Missing values that existed in the original data will not be modified. + If `limit` is given, fill only this many values in each direction for + each of the original values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with ``NaN`` values filled with + their nearest value. + + See Also + -------- + backfill : Backward fill the new missing values in the resampled data. + pad : Forward fill ``NaN`` values. + + Examples + -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> s.resample('15min').nearest() + 2018-01-01 00:00:00 1 + 2018-01-01 00:15:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 00:45:00 2 + 2018-01-01 01:00:00 2 + Freq: 15T, dtype: int64 + + Limit the number of upsampled values imputed by the nearest: + + >>> s.resample('15min').nearest(limit=1) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + Freq: 15T, dtype: float64 + """ + return self._upsample("nearest", limit=limit) + + def bfill(self, limit=None): + """ + Backward fill the new missing values in the resampled data. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). The backward fill will replace NaN values that appeared in + the resampled data with the next value in the original sequence. + Missing values that existed in the original data will not be modified. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series, DataFrame + An upsampled Series or DataFrame with backward filled NaN values. + + See Also + -------- + bfill : Alias of backfill. + fillna : Fill NaN values using the specified method, which can be + 'backfill'. + nearest : Fill NaN values with nearest neighbor starting from center. + ffill : Forward fill NaN values. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'backfill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'backfill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + >>> s.resample('30min').bfill() + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').bfill(limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + Resampling a DataFrame that has missing values: + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').bfill() + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('15min').bfill(limit=2) + a b + 2018-01-01 00:00:00 2.0 1.0 + 2018-01-01 00:15:00 NaN NaN + 2018-01-01 00:30:00 NaN 3.0 + 2018-01-01 00:45:00 NaN 3.0 + 2018-01-01 01:00:00 NaN 3.0 + 2018-01-01 01:15:00 NaN NaN + 2018-01-01 01:30:00 6.0 5.0 + 2018-01-01 01:45:00 6.0 5.0 + 2018-01-01 02:00:00 6.0 5.0 + """ + return self._upsample("bfill", limit=limit) + + def backfill(self, limit=None): + """ + Backward fill the values. + + .. deprecated:: 1.4 + Use bfill instead. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series, DataFrame + An upsampled Series or DataFrame with backward filled NaN values. + """ + warnings.warn( + "backfill is deprecated and will be removed in a future version. " + "Use bfill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.bfill(limit=limit) + + def fillna(self, method, limit=None): + """ + Fill missing values introduced by upsampling. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). + + Missing values that existed in the original data will + not be modified. + + Parameters + ---------- + method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'} + Method to use for filling holes in resampled data + + * 'pad' or 'ffill': use previous valid observation to fill gap + (forward fill). + * 'backfill' or 'bfill': use next valid observation to fill gap. + * 'nearest': use nearest valid observation to fill gap. + + limit : int, optional + Limit of how many consecutive missing values to fill. + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with missing values filled. + + See Also + -------- + bfill : Backward fill NaN values in the resampled data. + ffill : Forward fill NaN values in the resampled data. + nearest : Fill NaN values in the resampled data + with nearest neighbor starting from center. + interpolate : Fill NaN values using interpolation. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'bfill' and 'ffill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'bfill' and 'ffill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + Without filling the missing values you get: + + >>> s.resample("30min").asfreq() + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> s.resample('30min').fillna("backfill") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').fillna("backfill", limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + >>> s.resample('30min').fillna("pad") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 2 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('30min').fillna("nearest") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + Missing values present before the upsampling are not affected. + + >>> sm = pd.Series([1, None, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> sm + 2018-01-01 00:00:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: H, dtype: float64 + + >>> sm.resample('30min').fillna('backfill') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('pad') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('nearest') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + DataFrame resampling is done column-wise. All the same options are + available. + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').fillna("bfill") + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + """ + return self._upsample(method, limit=limit) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + @doc(NDFrame.interpolate, **_shared_docs_kwargs) + def interpolate( + self, + method="linear", + axis=0, + limit=None, + inplace=False, + limit_direction="forward", + limit_area=None, + downcast=None, + **kwargs, + ): + """ + Interpolate values according to different methods. + """ + result = self._upsample("asfreq") + return result.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + limit_direction=limit_direction, + limit_area=limit_area, + downcast=downcast, + **kwargs, + ) + + def asfreq(self, fill_value=None): + """ + Return the values at the new freq, essentially a reindex. + + Parameters + ---------- + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + DataFrame or Series + Values at the specified freq. + + See Also + -------- + Series.asfreq: Convert TimeSeries to specified frequency. + DataFrame.asfreq: Convert TimeSeries to specified frequency. + """ + return self._upsample("asfreq", fill_value=fill_value) + + def std( + self, + ddof=1, + numeric_only: bool | lib.NoDefault = lib.no_default, + *args, + **kwargs, + ): + """ + Compute standard deviation of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + DataFrame or Series + Standard deviation of values within each group. + """ + nv.validate_resampler_func("std", args, kwargs) + return self._downsample("std", ddof=ddof, numeric_only=numeric_only) + + def var( + self, + ddof=1, + numeric_only: bool | lib.NoDefault = lib.no_default, + *args, + **kwargs, + ): + """ + Compute variance of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + DataFrame or Series + Variance of values within each group. + """ + nv.validate_resampler_func("var", args, kwargs) + return self._downsample("var", ddof=ddof, numeric_only=numeric_only) + + @doc(GroupBy.size) + def size(self): + result = self._downsample("size") + if not len(self.ax): + from pandas import Series + + if self._selected_obj.ndim == 1: + name = self._selected_obj.name + else: + name = None + result = Series([], index=result.index, dtype="int64", name=name) + return result + + @doc(GroupBy.count) + def count(self): + result = self._downsample("count") + if not len(self.ax): + if self._selected_obj.ndim == 1: + result = type(self._selected_obj)( + [], index=result.index, dtype="int64", name=self._selected_obj.name + ) + else: + from pandas import DataFrame + + result = DataFrame( + [], index=result.index, columns=result.columns, dtype="int64" + ) + + return result + + def quantile(self, q=0.5, **kwargs): + """ + Return value at the given quantile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + + Returns + ------- + DataFrame or Series + Quantile of values within each group. + + See Also + -------- + Series.quantile + Return a series, where the index is q and the values are the quantiles. + DataFrame.quantile + Return a DataFrame, where the columns are the columns of self, + and the values are the quantiles. + DataFrameGroupBy.quantile + Return a DataFrame, where the coulmns are groupby columns, + and the values are its quantiles. + """ + return self._downsample("quantile", q=q, **kwargs) + + +def _add_downsample_kernel( + name: str, args: tuple[str, ...], docs_class: type = GroupBy +) -> None: + """ + Add a kernel to Resampler. + + Arguments + --------- + name : str + Name of the kernel. + args : tuple + Arguments of the method. + docs_class : type + Class to get kernel docstring from. + """ + assert args in ( + ("numeric_only", "min_count"), + ("numeric_only",), + ("ddof", "numeric_only"), + (), + ) + + # Explicitly provide args rather than args/kwargs for API docs + if args == ("numeric_only", "min_count"): + + def f( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + if numeric_only is lib.no_default and name != "sum": + # For DataFrameGroupBy, set it to be False for methods other than `sum`. + numeric_only = False + + return self._downsample( + name, numeric_only=numeric_only, min_count=min_count + ) + + elif args == ("numeric_only",): + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name, numeric_only=numeric_only) + + elif args == ("ddof", "numeric_only"): + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, + ddof: int = 1, + numeric_only: bool | lib.NoDefault = lib.no_default, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name, ddof=ddof, numeric_only=numeric_only) + + else: + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name) + + f.__doc__ = getattr(docs_class, name).__doc__ + setattr(Resampler, name, f) + + +for method in ["sum", "prod", "min", "max", "first", "last"]: + _add_downsample_kernel(method, ("numeric_only", "min_count")) +for method in ["mean", "median"]: + _add_downsample_kernel(method, ("numeric_only",)) +for method in ["sem"]: + _add_downsample_kernel(method, ("ddof", "numeric_only")) +for method in ["ohlc"]: + _add_downsample_kernel(method, ()) +for method in ["nunique"]: + _add_downsample_kernel(method, (), SeriesGroupBy) + + +class _GroupByMixin(PandasObject): + """ + Provide the groupby facilities. + """ + + _attributes: list[str] # in practice the same as Resampler._attributes + _selection: IndexLabel | None = None + + def __init__(self, obj, parent=None, groupby=None, key=None, **kwargs) -> None: + # reached via ._gotitem and _get_resampler_for_grouping + + if parent is None: + parent = obj + + # initialize our GroupByMixin object with + # the resampler attributes + for attr in self._attributes: + setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) + self._selection = kwargs.get("selection") + + self.binner = parent.binner + self.key = key + + self._groupby = groupby + self._groupby.mutated = True + self._groupby.grouper.mutated = True + self.groupby = copy.copy(parent.groupby) + + @no_type_check + def _apply(self, f, *args, **kwargs): + """ + Dispatch to _upsample; we are stripping all of the _upsample kwargs and + performing the original function call on the grouped object. + """ + + def func(x): + x = self._shallow_copy(x, groupby=self.groupby) + + if isinstance(f, str): + return getattr(x, f)(**kwargs) + + return x.apply(f, *args, **kwargs) + + result = self._groupby.apply(func) + return self._wrap_result(result) + + _upsample = _apply + _downsample = _apply + _groupby_and_aggregate = _apply + + @final + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + # create a new object to prevent aliasing + if subset is None: + # error: "GotItemMixin" has no attribute "obj" + subset = self.obj # type: ignore[attr-defined] + + # we need to make a shallow copy of ourselves + # with the same groupby + kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + # Try to select from a DataFrame, falling back to a Series + try: + if isinstance(key, list) and self.key not in key: + key.append(self.key) + groupby = self._groupby[key] + except IndexError: + groupby = self._groupby + + selection = None + if subset.ndim == 2 and ( + (lib.is_scalar(key) and key in subset) or lib.is_list_like(key) + ): + selection = key + + new_rs = type(self)( + subset, groupby=groupby, parent=self, selection=selection, **kwargs + ) + return new_rs + + +class DatetimeIndexResampler(Resampler): + @property + def _resampler_for_grouping(self): + return DatetimeIndexResamplerGroupby + + def _get_binner_for_time(self): + + # this is how we are actually creating the bins + if self.kind == "period": + return self.groupby._get_time_period_bins(self.ax) + return self.groupby._get_time_bins(self.ax) + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + how = com.get_cython_func(how) or how + ax = self.ax + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + # Excludes `on` column when provided + obj = self._obj_with_exclusions + + if not len(ax): + # reset to the new freq + obj = obj.copy() + obj.index = obj.index._with_freq(self.freq) + assert obj.index.freq == self.freq, (obj.index.freq, self.freq) + return obj + + # do we have a regular frequency + + # error: Item "None" of "Optional[Any]" has no attribute "binlabels" + if ( + (ax.freq is not None or ax.inferred_freq is not None) + and len(self.grouper.binlabels) > len(ax) + and how is None + ): + + # let's do an asfreq + return self.asfreq() + + # we are downsampling + # we want to call the actual grouper method here + result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index should not be outside specified range + """ + if self.closed == "right": + binner = binner[1:] + else: + binner = binner[:-1] + return binner + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : string {'backfill', 'bfill', 'pad', + 'ffill', 'asfreq'} method for upsampling + limit : int, default None + Maximum size gap to fill when reindexing + fill_value : scalar, default None + Value to use for missing values + + See Also + -------- + .fillna: Fill NA/NaN values using the specified method. + + """ + if self.axis: + raise AssertionError("axis must be 0") + if self._from_selection: + raise ValueError( + "Upsampling from level= or on= selection " + "is not supported, use .set_index(...) " + "to explicitly set index to datetime-like" + ) + + ax = self.ax + obj = self._selected_obj + binner = self.binner + res_index = self._adjust_binner_for_upsample(binner) + + # if we have the same frequency as our axis, then we are equal sampling + if ( + limit is None + and to_offset(ax.inferred_freq) == self.freq + and len(obj) == len(res_index) + ): + result = obj.copy() + result.index = res_index + else: + result = obj.reindex( + res_index, method=method, limit=limit, fill_value=fill_value + ) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _wrap_result(self, result): + result = super()._wrap_result(result) + + # we may have a different kind that we were asked originally + # convert if needed + if self.kind == "period" and not isinstance(result.index, PeriodIndex): + result.index = result.index.to_period(self.freq) + return result + + +class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler): + """ + Provides a resample of a groupby implementation + """ + + @property + def _constructor(self): + return DatetimeIndexResampler + + +class PeriodIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return PeriodIndexResamplerGroupby + + def _get_binner_for_time(self): + if self.kind == "timestamp": + return super()._get_binner_for_time() + return self.groupby._get_period_bins(self.ax) + + def _convert_obj(self, obj: NDFrameT) -> NDFrameT: + obj = super()._convert_obj(obj) + + if self._from_selection: + # see GH 14008, GH 12871 + msg = ( + "Resampling from level= or on= selection " + "with a PeriodIndex is not currently supported, " + "use .set_index(...) to explicitly set index" + ) + raise NotImplementedError(msg) + + if self.loffset is not None: + # Cannot apply loffset/timedelta to PeriodIndex -> convert to + # timestamps + self.kind = "timestamp" + + # convert to timestamp + if self.kind == "timestamp": + obj = obj.to_timestamp(how=self.convention) + + return obj + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._downsample(how, **kwargs) + + how = com.get_cython_func(how) or how + ax = self.ax + + if is_subperiod(ax.freq, self.freq): + # Downsampling + return self._groupby_and_aggregate(how, **kwargs) + elif is_superperiod(ax.freq, self.freq): + if how == "ohlc": + # GH #13083 + # upsampling to subperiods is handled as an asfreq, which works + # for pure aggregating/reducing methods + # OHLC reduces along the time dimension, but creates multiple + # values for each period -> handle by _groupby_and_aggregate() + return self._groupby_and_aggregate(how) + return self.asfreq() + elif ax.freq == self.freq: + return self.asfreq() + + raise IncompatibleFrequency( + f"Frequency {ax.freq} cannot be resampled to {self.freq}, " + "as they are not sub or super periods" + ) + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : {'backfill', 'bfill', 'pad', 'ffill'} + Method for upsampling. + limit : int, default None + Maximum size gap to fill when reindexing. + fill_value : scalar, default None + Value to use for missing values. + + See Also + -------- + .fillna: Fill NA/NaN values using the specified method. + + """ + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._upsample(method, limit=limit, fill_value=fill_value) + + ax = self.ax + obj = self.obj + new_index = self.binner + + # Start vs. end of period + memb = ax.asfreq(self.freq, how=self.convention) + + # Get the fill indexer + indexer = memb.get_indexer(new_index, method=method, limit=limit) + new_obj = _take_new_index( + obj, + indexer, + new_index, + axis=self.axis, + ) + return self._wrap_result(new_obj) + + +class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return PeriodIndexResampler + + +class TimedeltaIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return TimedeltaIndexResamplerGroupby + + def _get_binner_for_time(self): + return self.groupby._get_time_delta_bins(self.ax) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index is allowed to be greater than original range + so we don't need to change the length of a binner, GH 13022 + """ + return binner + + +class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return TimedeltaIndexResampler + + +def get_resampler( + obj, kind=None, **kwds +) -> DatetimeIndexResampler | PeriodIndexResampler | TimedeltaIndexResampler: + """ + Create a TimeGrouper and return our resampler. + """ + tg = TimeGrouper(**kwds) + return tg._get_resampler(obj, kind=kind) + + +get_resampler.__doc__ = Resampler.__doc__ + + +def get_resampler_for_grouping( + groupby, rule, how=None, fill_method=None, limit=None, kind=None, on=None, **kwargs +): + """ + Return our appropriate resampler when grouping as well. + """ + # .resample uses 'on' similar to how .groupby uses 'key' + tg = TimeGrouper(freq=rule, key=on, **kwargs) + resampler = tg._get_resampler(groupby.obj, kind=kind) + return resampler._get_resampler_for_grouping(groupby=groupby, key=tg.key) + + +class TimeGrouper(Grouper): + """ + Custom groupby class for time-interval grouping. + + Parameters + ---------- + freq : pandas date offset or offset alias for identifying bin edges + closed : closed end of interval; 'left' or 'right' + label : interval boundary to use for labeling; 'left' or 'right' + convention : {'start', 'end', 'e', 's'} + If axis is PeriodIndex + """ + + _attributes = Grouper._attributes + ( + "closed", + "label", + "how", + "loffset", + "kind", + "convention", + "origin", + "offset", + ) + + def __init__( + self, + freq="Min", + closed: Literal["left", "right"] | None = None, + label: Literal["left", "right"] | None = None, + how="mean", + axis=0, + fill_method=None, + limit=None, + loffset=None, + kind: str | None = None, + convention: Literal["start", "end", "e", "s"] | None = None, + base: int | None = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys: bool | lib.NoDefault = True, + **kwargs, + ) -> None: + # Check for correctness of the keyword arguments which would + # otherwise silently use the default if misspelled + if label not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {label} for `label`") + if closed not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {closed} for `closed`") + if convention not in {None, "start", "end", "e", "s"}: + raise ValueError(f"Unsupported value {convention} for `convention`") + + freq = to_offset(freq) + + end_types = {"M", "A", "Q", "BM", "BA", "BQ", "W"} + rule = freq.rule_code + if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): + if closed is None: + closed = "right" + if label is None: + label = "right" + else: + # The backward resample sets ``closed`` to ``'right'`` by default + # since the last value should be considered as the edge point for + # the last bin. When origin in "end" or "end_day", the value for a + # specific ``Timestamp`` index stands for the resample result from + # the current ``Timestamp`` minus ``freq`` to the current + # ``Timestamp`` with a right close. + if origin in ["end", "end_day"]: + if closed is None: + closed = "right" + if label is None: + label = "right" + else: + if closed is None: + closed = "left" + if label is None: + label = "left" + + self.closed = closed + self.label = label + self.kind = kind + self.convention = convention if convention is not None else "e" + self.how = how + self.fill_method = fill_method + self.limit = limit + self.group_keys = group_keys + + if origin in ("epoch", "start", "start_day", "end", "end_day"): + self.origin = origin + else: + try: + self.origin = Timestamp(origin) + except (ValueError, TypeError) as err: + raise ValueError( + "'origin' should be equal to 'epoch', 'start', 'start_day', " + "'end', 'end_day' or " + f"should be a Timestamp convertible type. Got '{origin}' instead." + ) from err + + try: + self.offset = Timedelta(offset) if offset is not None else None + except (ValueError, TypeError) as err: + raise ValueError( + "'offset' should be a Timedelta convertible type. " + f"Got '{offset}' instead." + ) from err + + # always sort time groupers + kwargs["sort"] = True + + # Handle deprecated arguments since v1.1.0 of `base` and `loffset` (GH #31809) + if base is not None and offset is not None: + raise ValueError("'offset' and 'base' cannot be present at the same time") + + if base and isinstance(freq, Tick): + # this conversion handle the default behavior of base and the + # special case of GH #10530. Indeed in case when dealing with + # a TimedeltaIndex base was treated as a 'pure' offset even though + # the default behavior of base was equivalent of a modulo on + # freq_nanos. + self.offset = Timedelta(base * freq.nanos // freq.n) + + if isinstance(loffset, str): + loffset = to_offset(loffset) + self.loffset = loffset + + super().__init__(freq=freq, axis=axis, **kwargs) + + def _get_resampler(self, obj, kind=None): + """ + Return my resampler or raise if we have an invalid axis. + + Parameters + ---------- + obj : input object + kind : string, optional + 'period','timestamp','timedelta' are valid + + Returns + ------- + a Resampler + + Raises + ------ + TypeError if incompatible axis + + """ + self._set_grouper(obj) + + ax = self.ax + if isinstance(ax, DatetimeIndex): + return DatetimeIndexResampler( + obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys + ) + elif isinstance(ax, PeriodIndex) or kind == "period": + return PeriodIndexResampler( + obj, groupby=self, kind=kind, axis=self.axis, group_keys=self.group_keys + ) + elif isinstance(ax, TimedeltaIndex): + return TimedeltaIndexResampler( + obj, groupby=self, axis=self.axis, group_keys=self.group_keys + ) + + raise TypeError( + "Only valid with DatetimeIndex, " + "TimedeltaIndex or PeriodIndex, " + f"but got an instance of '{type(ax).__name__}'" + ) + + def _get_grouper(self, obj, validate: bool = True): + # create the resampler and return our binner + r = self._get_resampler(obj) + return r.binner, r.grouper, r.obj + + def _get_time_bins(self, ax: DatetimeIndex): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if len(ax) == 0: + binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + first, last = _get_timestamp_range_edges( + ax.min(), + ax.max(), + self.freq, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + # GH #12037 + # use first/last directly instead of call replace() on them + # because replace() will swallow the nanosecond part + # thus last bin maybe slightly before the end if the end contains + # nanosecond part and lead to `Values falls after last bin` error + # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback + # has noted that ambiguous=True provides the most sensible result + binner = labels = date_range( + freq=self.freq, + start=first, + end=last, + tz=ax.tz, + name=ax.name, + ambiguous=True, + nonexistent="shift_forward", + ) + + ax_values = ax.asi8 + binner, bin_edges = self._adjust_bin_edges(binner, ax_values) + + # general version, knowing nothing about relative frequencies + bins = lib.generate_bins_dt64( + ax_values, bin_edges, self.closed, hasnans=ax.hasnans + ) + + if self.closed == "right": + labels = binner + if self.label == "right": + labels = labels[1:] + elif self.label == "right": + labels = labels[1:] + + if ax.hasnans: + binner = binner.insert(0, NaT) + labels = labels.insert(0, NaT) + + # if we end up with more labels than bins + # adjust the labels + # GH4076 + if len(bins) < len(labels): + labels = labels[: len(bins)] + + return binner, bins, labels + + def _adjust_bin_edges(self, binner, ax_values): + # Some hacks for > daily data, see #1471, #1458, #1483 + + if self.freq != "D" and is_superperiod(self.freq, "D"): + if self.closed == "right": + # GH 21459, GH 9119: Adjust the bins relative to the wall time + bin_edges = binner.tz_localize(None) + bin_edges = bin_edges + timedelta(1) - Nano(1) + bin_edges = bin_edges.tz_localize(binner.tz).asi8 + else: + bin_edges = binner.asi8 + + # intraday values on last day + if bin_edges[-2] > ax_values.max(): + bin_edges = bin_edges[:-1] + binner = binner[:-1] + else: + bin_edges = binner.asi8 + return binner, bin_edges + + def _get_time_delta_bins(self, ax: TimedeltaIndex): + if not isinstance(ax, TimedeltaIndex): + raise TypeError( + "axis must be a TimedeltaIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if not len(ax): + binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + start, end = ax.min(), ax.max() + + if self.closed == "right": + end += self.freq + + labels = binner = timedelta_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + end_stamps = labels + if self.closed == "left": + end_stamps += self.freq + + bins = ax.searchsorted(end_stamps, side=self.closed) + + if self.offset: + # GH 10530 & 31809 + labels += self.offset + if self.loffset: + # GH 33498 + labels += self.loffset + + return binner, bins, labels + + def _get_time_period_bins(self, ax: DatetimeIndex): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + freq = self.freq + + if not len(ax): + binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name) + return binner, [], labels + + labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name) + + end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp() + if ax.tz: + end_stamps = end_stamps.tz_localize(ax.tz) + bins = ax.searchsorted(end_stamps, side="left") + + return binner, bins, labels + + def _get_period_bins(self, ax: PeriodIndex): + if not isinstance(ax, PeriodIndex): + raise TypeError( + "axis must be a PeriodIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + memb = ax.asfreq(self.freq, how=self.convention) + + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + nat_count = 0 + if memb.hasnans: + # error: Incompatible types in assignment (expression has type + # "bool_", variable has type "int") [assignment] + nat_count = np.sum(memb._isnan) # type: ignore[assignment] + memb = memb[~memb._isnan] + + if not len(memb): + # index contains no valid (non-NaT) values + bins = np.array([], dtype=np.int64) + binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name) + if len(ax) > 0: + # index is all NaT + binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax)) + return binner, bins, labels + + freq_mult = self.freq.n + + start = ax.min().asfreq(self.freq, how=self.convention) + end = ax.max().asfreq(self.freq, how="end") + bin_shift = 0 + + if isinstance(self.freq, Tick): + # GH 23882 & 31809: get adjusted bin edge labels with 'origin' + # and 'origin' support. This call only makes sense if the freq is a + # Tick since offset and origin are only used in those cases. + # Not doing this check could create an extra empty bin. + p_start, end = _get_period_range_edges( + start, + end, + self.freq, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + + # Get offset for bin edge (not label edge) adjustment + start_offset = Period(start, self.freq) - Period(p_start, self.freq) + # error: Item "Period" of "Union[Period, Any]" has no attribute "n" + bin_shift = start_offset.n % freq_mult # type: ignore[union-attr] + start = p_start + + labels = binner = period_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + i8 = memb.asi8 + + # when upsampling to subperiods, we need to generate enough bins + expected_bins_count = len(binner) * freq_mult + i8_extend = expected_bins_count - (i8[-1] - i8[0]) + rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult) + rng += freq_mult + # adjust bin edge indexes to account for base + rng -= bin_shift + + # Wrap in PeriodArray for PeriodArray.searchsorted + prng = type(memb._data)(rng, dtype=memb.dtype) + bins = memb.searchsorted(prng, side="left") + + if nat_count > 0: + binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count) + + return binner, bins, labels + + +def _take_new_index( + obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: int = 0 +) -> NDFrameT: + + if isinstance(obj, ABCSeries): + new_values = algos.take_nd(obj._values, indexer) + # error: Incompatible return value type (got "Series", expected "NDFrameT") + return obj._constructor( # type: ignore[return-value] + new_values, index=new_index, name=obj.name + ) + elif isinstance(obj, ABCDataFrame): + if axis == 1: + raise NotImplementedError("axis 1 is not supported") + new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) + # error: Incompatible return value type + # (got "DataFrame", expected "NDFrameT") + return obj._constructor(new_mgr) # type: ignore[return-value] + else: + raise ValueError("'obj' should be either a Series or a DataFrame") + + +def _get_timestamp_range_edges( + first: Timestamp, + last: Timestamp, + freq: BaseOffset, + closed: Literal["right", "left"] = "left", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Timestamp, Timestamp]: + """ + Adjust the `first` Timestamp to the preceding Timestamp that resides on + the provided offset. Adjust the `last` Timestamp to the following + Timestamp that resides on the provided offset. Input Timestamps that + already reside on the offset will be adjusted depending on the type of + offset and the `closed` parameter. + + Parameters + ---------- + first : pd.Timestamp + The beginning Timestamp of the range to be adjusted. + last : pd.Timestamp + The ending Timestamp of the range to be adjusted. + freq : pd.DateOffset + The dateoffset to which the Timestamps will be adjusted. + closed : {'right', 'left'}, default "left" + Which side of bin interval is closed. + origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + If a timestamp is not used, these values are also supported: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + offset : pd.Timedelta, default is None + An offset timedelta added to the origin. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Timestamp objects. + """ + if isinstance(freq, Tick): + index_tz = first.tz + if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): + raise ValueError("The origin must have the same timezone as the index.") + elif origin == "epoch": + # set the epoch based on the timezone to have similar bins results when + # resampling on the same kind of indexes on different timezones + origin = Timestamp("1970-01-01", tz=index_tz) + + if isinstance(freq, Day): + # _adjust_dates_anchored assumes 'D' means 24H, but first/last + # might contain a DST transition (23H, 24H, or 25H). + # So "pretend" the dates are naive when adjusting the endpoints + first = first.tz_localize(None) + last = last.tz_localize(None) + if isinstance(origin, Timestamp): + origin = origin.tz_localize(None) + + first, last = _adjust_dates_anchored( + first, last, freq, closed=closed, origin=origin, offset=offset + ) + if isinstance(freq, Day): + first = first.tz_localize(index_tz) + last = last.tz_localize(index_tz) + else: + first = first.normalize() + last = last.normalize() + + if closed == "left": + first = Timestamp(freq.rollback(first)) + else: + first = Timestamp(first - freq) + + last = Timestamp(last + freq) + + return first, last + + +def _get_period_range_edges( + first: Period, + last: Period, + freq: BaseOffset, + closed: Literal["right", "left"] = "left", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Period, Period]: + """ + Adjust the provided `first` and `last` Periods to the respective Period of + the given offset that encompasses them. + + Parameters + ---------- + first : pd.Period + The beginning Period of the range to be adjusted. + last : pd.Period + The ending Period of the range to be adjusted. + freq : pd.DateOffset + The freq to which the Periods will be adjusted. + closed : {'right', 'left'}, default "left" + Which side of bin interval is closed. + origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day' + The timestamp on which to adjust the grouping. The timezone of origin must + match the timezone of the index. + + If a timestamp is not used, these values are also supported: + + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + offset : pd.Timedelta, default is None + An offset timedelta added to the origin. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Period objects. + """ + if not all(isinstance(obj, Period) for obj in [first, last]): + raise TypeError("'first' and 'last' must be instances of type Period") + + # GH 23882 + first_ts = first.to_timestamp() + last_ts = last.to_timestamp() + adjust_first = not freq.is_on_offset(first_ts) + adjust_last = freq.is_on_offset(last_ts) + + first_ts, last_ts = _get_timestamp_range_edges( + first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset + ) + + first = (first_ts + int(adjust_first) * freq).to_period(freq) + last = (last_ts - int(adjust_last) * freq).to_period(freq) + return first, last + + +def _insert_nat_bin( + binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int +) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]: + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + # shift bins by the number of NaT + assert nat_count > 0 + bins += nat_count + bins = np.insert(bins, 0, nat_count) + + # Incompatible types in assignment (expression has type "Index", variable + # has type "PeriodIndex") + binner = binner.insert(0, NaT) # type: ignore[assignment] + # Incompatible types in assignment (expression has type "Index", variable + # has type "PeriodIndex") + labels = labels.insert(0, NaT) # type: ignore[assignment] + return binner, bins, labels + + +def _adjust_dates_anchored( + first: Timestamp, + last: Timestamp, + freq: Tick, + closed: Literal["right", "left"] = "right", + origin="start_day", + offset: Timedelta | None = None, +) -> tuple[Timestamp, Timestamp]: + # First and last offsets should be calculated from the start day to fix an + # error cause by resampling across multiple days when a one day period is + # not a multiple of the frequency. See GH 8683 + # To handle frequencies that are not multiple or divisible by a day we let + # the possibility to define a fixed origin timestamp. See GH 31809 + origin_nanos = 0 # origin == "epoch" + if origin == "start_day": + origin_nanos = first.normalize().value + elif origin == "start": + origin_nanos = first.value + elif isinstance(origin, Timestamp): + origin_nanos = origin.value + elif origin in ["end", "end_day"]: + origin = last if origin == "end" else last.ceil("D") + sub_freq_times = (origin.value - first.value) // freq.nanos + if closed == "left": + sub_freq_times += 1 + first = origin - sub_freq_times * freq + origin_nanos = first.value + origin_nanos += offset.value if offset else 0 + + # GH 10117 & GH 19375. If first and last contain timezone information, + # Perform the calculation in UTC in order to avoid localizing on an + # Ambiguous or Nonexistent time. + first_tzinfo = first.tzinfo + last_tzinfo = last.tzinfo + if first_tzinfo is not None: + first = first.tz_convert("UTC") + if last_tzinfo is not None: + last = last.tz_convert("UTC") + + foffset = (first.value - origin_nanos) % freq.nanos + loffset = (last.value - origin_nanos) % freq.nanos + + if closed == "right": + if foffset > 0: + # roll back + fresult_int = first.value - foffset + else: + fresult_int = first.value - freq.nanos + + if loffset > 0: + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + # already the end of the road + lresult_int = last.value + else: # closed == 'left' + if foffset > 0: + fresult_int = first.value - foffset + else: + # start of the road + fresult_int = first.value + + if loffset > 0: + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + lresult_int = last.value + freq.nanos + fresult = Timestamp(fresult_int) + lresult = Timestamp(lresult_int) + if first_tzinfo is not None: + fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) + if last_tzinfo is not None: + lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo) + return fresult, lresult + + +def asfreq( + obj: NDFrameT, + freq, + method=None, + how=None, + normalize: bool = False, + fill_value=None, +) -> NDFrameT: + """ + Utility frequency conversion method for Series/DataFrame. + + See :meth:`pandas.NDFrame.asfreq` for full documentation. + """ + if isinstance(obj.index, PeriodIndex): + if method is not None: + raise NotImplementedError("'method' argument is not supported") + + if how is None: + how = "E" + + new_obj = obj.copy() + new_obj.index = obj.index.asfreq(freq, how=how) + + elif len(obj.index) == 0: + new_obj = obj.copy() + + new_obj.index = _asfreq_compat(obj.index, freq) + else: + dti = date_range(obj.index.min(), obj.index.max(), freq=freq) + dti.name = obj.index.name + new_obj = obj.reindex(dti, method=method, fill_value=fill_value) + if normalize: + new_obj.index = new_obj.index.normalize() + + return new_obj + + +def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq): + """ + Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex. + + Parameters + ---------- + index : PeriodIndex, DatetimeIndex, or TimedeltaIndex + freq : DateOffset + + Returns + ------- + same type as index + """ + if len(index) != 0: + # This should never be reached, always checked by the caller + raise ValueError( + "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex" + ) + new_index: Index + if isinstance(index, PeriodIndex): + new_index = index.asfreq(freq=freq) + elif isinstance(index, DatetimeIndex): + new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name) + elif isinstance(index, TimedeltaIndex): + new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) + else: # pragma: no cover + raise TypeError(type(index)) + return new_index diff --git a/pandas/core/reshape/__init__.py b/pandas/core/reshape/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py new file mode 100644 index 00000000..b1884c49 --- /dev/null +++ b/pandas/core/reshape/api.py @@ -0,0 +1,41 @@ +from pandas.core.reshape.concat import concat +from pandas.core.reshape.encoding import ( + from_dummies, + get_dummies, +) +from pandas.core.reshape.melt import ( + lreshape, + melt, + wide_to_long, +) +from pandas.core.reshape.merge import ( + merge, + merge_asof, + merge_ordered, +) +from pandas.core.reshape.pivot import ( + crosstab, + pivot, + pivot_table, +) +from pandas.core.reshape.tile import ( + cut, + qcut, +) + +__all__ = [ + "concat", + "crosstab", + "cut", + "from_dummies", + "get_dummies", + "lreshape", + "melt", + "merge", + "merge_asof", + "merge_ordered", + "pivot", + "pivot_table", + "qcut", + "wide_to_long", +] diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py new file mode 100644 index 00000000..5328c799 --- /dev/null +++ b/pandas/core/reshape/concat.py @@ -0,0 +1,820 @@ +""" +Concat routines. +""" +from __future__ import annotations + +from collections import abc +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Iterable, + Literal, + Mapping, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._typing import ( + Axis, + HashableT, +) +from pandas.util._decorators import ( + cache_readonly, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import is_bool +from pandas.core.dtypes.missing import isna + +from pandas.core.arrays.categorical import ( + factorize_from_iterable, + factorize_from_iterables, +) +import pandas.core.common as com +from pandas.core.indexes.api import ( + Index, + MultiIndex, + all_indexes_same, + default_index, + ensure_index, + get_objs_combined_axis, + get_unanimous_names, +) +from pandas.core.internals import concatenate_managers + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.generic import NDFrame + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +@overload +def concat( + objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[Series] | Mapping[HashableT, Series], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> Series: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame | Series: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + axis: Literal[1, "columns"], + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + axis: Axis = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame | Series: + ... + + +@deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"]) +def concat( + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + axis: Axis = 0, + join: str = "outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> DataFrame | Series: + """ + Concatenate pandas objects along a particular axis. + + Allows optional set logic along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series or DataFrame objects + If a mapping is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised. + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along. + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis (or axes). + ignore_index : bool, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level. + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys. + names : list, default None + Names for the levels in the resulting hierarchical index. + verify_integrity : bool, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation. + sort : bool, default False + Sort non-concatenation axis if it is not already aligned when `join` + is 'outer'. + This has no effect when ``join='inner'``, which already preserves + the order of the non-concatenation axis. + + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + copy : bool, default True + If False, do not copy data unnecessarily. + + Returns + ------- + object, type of objs + When concatenating all ``Series`` along the index (axis=0), a + ``Series`` is returned. When ``objs`` contains at least one + ``DataFrame``, a ``DataFrame`` is returned. When concatenating along + the columns (axis=1), a ``DataFrame`` is returned. + + See Also + -------- + DataFrame.join : Join DataFrames using indexes. + DataFrame.merge : Merge DataFrames by indexes or columns. + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + pandas objects can be found `here + `__. + + It is not recommended to build DataFrames by adding single rows in a + for loop. Build a list of rows and make a DataFrame in a single concat. + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2']) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3], sort=False) + letter number animal + 0 a 1 NaN + 1 b 2 NaN + 0 c 3 cat + 1 d 4 dog + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: ['a'] + + Append a single row to the end of a ``DataFrame`` object. + + >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) + >>> df7 + a b + 0 1 2 + >>> new_row = pd.Series({'a': 3, 'b': 4}) + >>> new_row + a 3 + b 4 + dtype: int64 + >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True) + a b + 0 1 2 + 1 3 4 + """ + op = _Concatenator( + objs, + axis=axis, + ignore_index=ignore_index, + join=join, + keys=keys, + levels=levels, + names=names, + verify_integrity=verify_integrity, + copy=copy, + sort=sort, + ) + + return op.get_result() + + +class _Concatenator: + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__( + self, + objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + axis=0, + join: str = "outer", + keys=None, + levels=None, + names=None, + ignore_index: bool = False, + verify_integrity: bool = False, + copy: bool = True, + sort=False, + ) -> None: + if isinstance(objs, (ABCSeries, ABCDataFrame, str)): + raise TypeError( + "first argument must be an iterable of pandas " + f'objects, you passed an object of type "{type(objs).__name__}"' + ) + + if join == "outer": + self.intersect = False + elif join == "inner": + self.intersect = True + else: # pragma: no cover + raise ValueError( + "Only can inner (intersect) or outer (union) join the other axis" + ) + + if isinstance(objs, abc.Mapping): + if keys is None: + keys = list(objs.keys()) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError("No objects to concatenate") + + if keys is None: + objs = list(com.not_none(*objs)) + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + + if isinstance(keys, MultiIndex): + # TODO: retain levels? + keys = type(keys).from_tuples(clean_keys, names=keys.names) + else: + name = getattr(keys, "name", None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError("All objects passed were None") + + # figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, (ABCSeries, ABCDataFrame)): + msg = ( + f"cannot concatenate object of type '{type(obj)}'; " + "only Series and DataFrame objs are valid" + ) + raise TypeError(msg) + + ndims.add(obj.ndim) + + # get the sample + # want the highest ndim that we have, and must be non-empty + # unless all objs are empty + sample: NDFrame | None = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibilities + # note to keep empty Series as it affect to result columns / name + non_empties = [ + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) + ] + + if len(non_empties) and ( + keys is None and names is None and levels is None and not self.intersect + ): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, ABCSeries): + from pandas import DataFrame + + axis = DataFrame._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, ABCDataFrame) + if self._is_frame: + axis = sample._get_block_manager_axis(axis) + + self._is_series = isinstance(sample, ABCSeries) + if not 0 <= axis <= sample.ndim: + raise AssertionError( + f"axis must be between 0 and {sample.ndim}, input was {axis}" + ) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError( + "cannot concatenate unaligned mixed " + "dimensional NDFrame objects" + ) + + else: + name = getattr(obj, "name", None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + # mypy needs to know sample is not an NDFrame + sample = cast("DataFrame | Series", sample) + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.bm_axis = axis + self.axis = 1 - self.bm_axis if self._is_frame else 0 + self.keys = keys + self.names = names or getattr(keys, "names", None) + self.levels = levels + + if not is_bool(sort): + warnings.warn( + "Passing non boolean values for sort is deprecated and " + "will error in a future version!", + FutureWarning, + stacklevel=find_stack_level(), + ) + self.sort = sort + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + cons: Callable[..., DataFrame | Series] + sample: DataFrame | Series + + # series only + if self._is_series: + sample = cast("Series", self.objs[0]) + + # stack blocks + if self.bm_axis == 0: + name = com.consensus_name_attr(self.objs) + cons = sample._constructor + + arrs = [ser._values for ser in self.objs] + + res = concat_compat(arrs, axis=0) + result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype) + return result.__finalize__(self, method="concat") + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + + # GH28330 Preserves subclassed objects through concat + cons = sample._constructor_expanddim + + index, columns = self.new_axes + df = cons(data, index=index, copy=self.copy) + df.columns = columns + return df.__finalize__(self, method="concat") + + # combine block managers + else: + sample = cast("DataFrame", self.objs[0]) + + mgrs_indexers = [] + for obj in self.objs: + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + # ::-1 to convert BlockManager ax to DataFrame ax + if ax == self.bm_axis: + # Suppress reindexing on concat axis + continue + + # 1-ax to convert BlockManager axis to DataFrame axis + obj_labels = obj.axes[1 - ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.get_indexer(new_labels) + + mgrs_indexers.append((obj._mgr, indexers)) + + new_data = concatenate_managers( + mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy + ) + if not self.copy: + new_data._consolidate_inplace() + + cons = sample._constructor + return cons(new_data).__finalize__(self, method="concat") + + def _get_result_dim(self) -> int: + if self._is_series and self.bm_axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self) -> list[Index]: + ndim = self._get_result_dim() + return [ + self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) + for i in range(ndim) + ] + + def _get_comb_axis(self, i: int) -> Index: + data_axis = self.objs[0]._get_block_manager_axis(i) + return get_objs_combined_axis( + self.objs, + axis=data_axis, + intersect=self.intersect, + sort=self.sort, + copy=self.copy, + ) + + @cache_readonly + def _get_concat_axis(self) -> Index: + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.bm_axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = default_index(len(self.objs)) + return idx + elif self.keys is None: + names: list[Hashable] = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, ABCSeries): + raise TypeError( + f"Cannot concatenate type 'Series' with " + f"object of type '{type(x).__name__}'" + ) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return default_index(len(self.objs)) + else: + return ensure_index(self.keys).set_names(self.names) + else: + indexes = [x.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + if self.levels is not None: + raise ValueError("levels supported only when keys is not None") + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex( + indexes, self.keys, self.levels, self.names + ) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index: Index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index[concat_index.duplicated()].unique() + raise ValueError(f"Indexes have overlapping values: {overlap}") + + +def _concat_indexes(indexes) -> Index: + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: + + if (levels is None and isinstance(keys[0], tuple)) or ( + levels is not None and len(levels) > 1 + ): + zipped = list(zip(*keys)) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = factorize_from_iterables(zipped) + else: + levels = [ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [ensure_index(keys).unique()] + else: + levels = [ensure_index(x) for x in levels] + + for level in levels: + if not level.is_unique: + raise ValueError(f"Level values not unique: {level.tolist()}") + + if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): + codes_list = [] + + # things are potentially different sizes, so compute the exact codes + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + # Find matching codes, include matching nan values as equal. + mask = (isna(level) & isna(key)) | (level == key) + if not mask.any(): + raise ValueError(f"Key {key} not in level {level}") + i = np.nonzero(mask)[0][0] + + to_concat.append(np.repeat(i, len(index))) + codes_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + codes_list.extend(concat_index.codes) + else: + codes, categories = factorize_from_iterable(concat_index) + levels.append(categories) + codes_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len({idx.nlevels for idx in indexes}) == 1: + raise AssertionError( + "Cannot concat indices that do not have the same number of levels" + ) + + # also copies + names = list(names) + list(get_unanimous_names(*indexes)) + + return MultiIndex( + levels=levels, codes=codes_list, names=names, verify_integrity=False + ) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct codes + new_codes = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError(f"Values not found in passed level: {hlevel[mask]!s}") + + new_codes.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) + else: + new_levels.append(new_index.unique()) + single_codes = new_index.unique().get_indexer(new_index) + new_codes.append(np.tile(single_codes, kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py new file mode 100644 index 00000000..da4de8cc --- /dev/null +++ b/pandas/core/reshape/encoding.py @@ -0,0 +1,520 @@ +from __future__ import annotations + +from collections import defaultdict +import itertools +from typing import Hashable + +import numpy as np + +from pandas._libs.sparse import IntIndex +from pandas._typing import Dtype + +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, + is_object_dtype, +) + +from pandas.core.arrays import SparseArray +from pandas.core.arrays.categorical import factorize_from_iterable +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import Index +from pandas.core.series import Series + + +def get_dummies( + data, + prefix=None, + prefix_sep="_", + dummy_na: bool = False, + columns=None, + sparse: bool = False, + drop_first: bool = False, + dtype: Dtype | None = None, +) -> DataFrame: + """ + Convert categorical variable into dummy/indicator variables. + + Parameters + ---------- + data : array-like, Series, or DataFrame + Data of which to get dummy indicators. + prefix : str, list of str, or dict of str, default None + String to append DataFrame column names. + Pass a list with length equal to the number of columns + when calling get_dummies on a DataFrame. Alternatively, `prefix` + can be a dictionary mapping column names to prefixes. + prefix_sep : str, default '_' + If appending prefix, separator/delimiter to use. Or pass a + list or dictionary as with `prefix`. + dummy_na : bool, default False + Add a column to indicate NaNs, if False NaNs are ignored. + columns : list-like, default None + Column names in the DataFrame to be encoded. + If `columns` is None then all the columns with + `object`, `string`, or `category` dtype will be converted. + sparse : bool, default False + Whether the dummy-encoded columns should be backed by + a :class:`SparseArray` (True) or a regular NumPy array (False). + drop_first : bool, default False + Whether to get k-1 dummies out of k categorical levels by removing the + first level. + dtype : dtype, default np.uint8 + Data type for new columns. Only a single dtype is allowed. + + Returns + ------- + DataFrame + Dummy-coded data. + + See Also + -------- + Series.str.get_dummies : Convert Series to dummy codes. + :func:`~pandas.from_dummies` : Convert dummy codes to categorical ``DataFrame``. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series(list('abca')) + + >>> pd.get_dummies(s) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + + >>> s1 = ['a', 'b', np.nan] + + >>> pd.get_dummies(s1) + a b + 0 1 0 + 1 0 1 + 2 0 0 + + >>> pd.get_dummies(s1, dummy_na=True) + a b NaN + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], + ... 'C': [1, 2, 3]}) + + >>> pd.get_dummies(df, prefix=['col1', 'col2']) + C col1_a col1_b col2_a col2_b col2_c + 0 1 1 0 0 1 0 + 1 2 0 1 1 0 0 + 2 3 1 0 0 0 1 + + >>> pd.get_dummies(pd.Series(list('abcaa'))) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + 4 1 0 0 + + >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) + b c + 0 0 0 + 1 1 0 + 2 0 1 + 3 0 0 + 4 0 0 + + >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) + a b c + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas.core.reshape.concat import concat + + dtypes_to_encode = ["object", "string", "category"] + + if isinstance(data, DataFrame): + # determine columns being encoded + if columns is None: + data_to_encode = data.select_dtypes(include=dtypes_to_encode) + elif not is_list_like(columns): + raise TypeError("Input must be a list-like for parameter `columns`") + else: + data_to_encode = data[columns] + + # validate prefixes and separator to avoid silently dropping cols + def check_len(item, name): + + if is_list_like(item): + if not len(item) == data_to_encode.shape[1]: + len_msg = ( + f"Length of '{name}' ({len(item)}) did not match the " + "length of the columns being encoded " + f"({data_to_encode.shape[1]})." + ) + raise ValueError(len_msg) + + check_len(prefix, "prefix") + check_len(prefix_sep, "prefix_sep") + + if isinstance(prefix, str): + prefix = itertools.cycle([prefix]) + if isinstance(prefix, dict): + prefix = [prefix[col] for col in data_to_encode.columns] + + if prefix is None: + prefix = data_to_encode.columns + + # validate separators + if isinstance(prefix_sep, str): + prefix_sep = itertools.cycle([prefix_sep]) + elif isinstance(prefix_sep, dict): + prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] + + with_dummies: list[DataFrame] + if data_to_encode.shape == data.shape: + # Encoding the entire df, do not prepend any dropped columns + with_dummies = [] + elif columns is not None: + # Encoding only cols specified in columns. Get all cols not in + # columns to prepend to result. + with_dummies = [data.drop(columns, axis=1)] + else: + # Encoding only object and category dtype columns. Get remaining + # columns to prepend to result. + with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] + + for (col, pre, sep) in zip(data_to_encode.items(), prefix, prefix_sep): + # col is (column_name, column), use just column data here + dummy = _get_dummies_1d( + col[1], + prefix=pre, + prefix_sep=sep, + dummy_na=dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + with_dummies.append(dummy) + result = concat(with_dummies, axis=1) + else: + result = _get_dummies_1d( + data, + prefix, + prefix_sep, + dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + return result + + +def _get_dummies_1d( + data, + prefix, + prefix_sep="_", + dummy_na: bool = False, + sparse: bool = False, + drop_first: bool = False, + dtype: Dtype | None = None, +) -> DataFrame: + from pandas.core.reshape.concat import concat + + # Series avoids inconsistent NaN handling + codes, levels = factorize_from_iterable(Series(data)) + + if dtype is None: + dtype = np.dtype(np.uint8) + # error: Argument 1 to "dtype" has incompatible type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]"; expected "Type[Any]" + dtype = np.dtype(dtype) # type: ignore[arg-type] + + if is_object_dtype(dtype): + raise ValueError("dtype=object is not a valid dtype for get_dummies") + + def get_empty_frame(data) -> DataFrame: + index: Index | np.ndarray + if isinstance(data, Series): + index = data.index + else: + index = Index(range(len(data))) + return DataFrame(index=index) + + # if all NaN + if not dummy_na and len(levels) == 0: + return get_empty_frame(data) + + codes = codes.copy() + if dummy_na: + codes[codes == -1] = len(levels) + levels = levels.insert(len(levels), np.nan) + + # if dummy_na, we just fake a nan level. drop_first will drop it again + if drop_first and len(levels) == 1: + return get_empty_frame(data) + + number_of_cols = len(levels) + + if prefix is None: + dummy_cols = levels + else: + dummy_cols = Index([f"{prefix}{prefix_sep}{level}" for level in levels]) + + index: Index | None + if isinstance(data, Series): + index = data.index + else: + index = None + + if sparse: + + fill_value: bool | float + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == np.dtype(bool): + fill_value = False + else: + fill_value = 0.0 + + sparse_series = [] + N = len(data) + sp_indices: list[list] = [[] for _ in range(len(dummy_cols))] + mask = codes != -1 + codes = codes[mask] + n_idx = np.arange(N)[mask] + + for ndx, code in zip(n_idx, codes): + sp_indices[code].append(ndx) + + if drop_first: + # remove first categorical level to avoid perfect collinearity + # GH12042 + sp_indices = sp_indices[1:] + dummy_cols = dummy_cols[1:] + for col, ixs in zip(dummy_cols, sp_indices): + sarr = SparseArray( + np.ones(len(ixs), dtype=dtype), + sparse_index=IntIndex(N, ixs), + fill_value=fill_value, + dtype=dtype, + ) + sparse_series.append(Series(data=sarr, index=index, name=col)) + + return concat(sparse_series, axis=1, copy=False) + + else: + # take on axis=1 + transpose to ensure ndarray layout is column-major + dummy_mat = np.eye(number_of_cols, dtype=dtype).take(codes, axis=1).T + + if not dummy_na: + # reset NaN GH4446 + dummy_mat[codes == -1] = 0 + + if drop_first: + # remove first GH12042 + dummy_mat = dummy_mat[:, 1:] + dummy_cols = dummy_cols[1:] + return DataFrame(dummy_mat, index=index, columns=dummy_cols) + + +def from_dummies( + data: DataFrame, + sep: None | str = None, + default_category: None | Hashable | dict[str, Hashable] = None, +) -> DataFrame: + """ + Create a categorical ``DataFrame`` from a ``DataFrame`` of dummy variables. + + Inverts the operation performed by :func:`~pandas.get_dummies`. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + data : DataFrame + Data which contains dummy-coded variables in form of integer columns of + 1's and 0's. + sep : str, default None + Separator used in the column names of the dummy categories they are + character indicating the separation of the categorical names from the prefixes. + For example, if your column names are 'prefix_A' and 'prefix_B', + you can strip the underscore by specifying sep='_'. + default_category : None, Hashable or dict of Hashables, default None + The default category is the implied category when a value has none of the + listed categories specified with a one, i.e. if all dummies in a row are + zero. Can be a single value for all variables or a dict directly mapping + the default categories to a prefix of a variable. + + Returns + ------- + DataFrame + Categorical data decoded from the dummy input-data. + + Raises + ------ + ValueError + * When the input ``DataFrame`` ``data`` contains NA values. + * When the input ``DataFrame`` ``data`` contains column names with separators + that do not match the separator specified with ``sep``. + * When a ``dict`` passed to ``default_category`` does not include an implied + category for each prefix. + * When a value in ``data`` has more than one category assigned to it. + * When ``default_category=None`` and a value in ``data`` has no category + assigned to it. + TypeError + * When the input ``data`` is not of type ``DataFrame``. + * When the input ``DataFrame`` ``data`` contains non-dummy data. + * When the passed ``sep`` is of a wrong data type. + * When the passed ``default_category`` is of a wrong data type. + + See Also + -------- + :func:`~pandas.get_dummies` : Convert ``Series`` or ``DataFrame`` to dummy codes. + :class:`~pandas.Categorical` : Represent a categorical variable in classic. + + Notes + ----- + The columns of the passed dummy data should only include 1's and 0's, + or boolean values. + + Examples + -------- + >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], + ... "c": [0, 0, 1, 0]}) + + >>> df + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + + >>> pd.from_dummies(df) + 0 a + 1 b + 2 c + 3 a + + >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], + ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], + ... "col2_c": [0, 0, 1]}) + + >>> df + col1_a col1_b col2_a col2_b col2_c + 0 1 0 0 1 0 + 1 0 1 1 0 0 + 2 1 0 0 0 1 + + >>> pd.from_dummies(df, sep="_") + col1 col2 + 0 a b + 1 b a + 2 a c + + >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0], + ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], + ... "col2_c": [0, 0, 0]}) + + >>> df + col1_a col1_b col2_a col2_b col2_c + 0 1 0 0 1 0 + 1 0 1 1 0 0 + 2 0 0 0 0 0 + + >>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"}) + col1 col2 + 0 a b + 1 b a + 2 d e + """ + from pandas.core.reshape.concat import concat + + if not isinstance(data, DataFrame): + raise TypeError( + "Expected 'data' to be a 'DataFrame'; " + f"Received 'data' of type: {type(data).__name__}" + ) + + if data.isna().any().any(): + raise ValueError( + "Dummy DataFrame contains NA value in column: " + f"'{data.isna().any().idxmax()}'" + ) + + # index data with a list of all columns that are dummies + try: + data_to_decode = data.astype("boolean", copy=False) + except TypeError: + raise TypeError("Passed DataFrame contains non-dummy data") + + # collect prefixes and get lists to slice data for each prefix + variables_slice = defaultdict(list) + if sep is None: + variables_slice[""] = list(data.columns) + elif isinstance(sep, str): + for col in data_to_decode.columns: + prefix = col.split(sep)[0] + if len(prefix) == len(col): + raise ValueError(f"Separator not specified for column: {col}") + variables_slice[prefix].append(col) + else: + raise TypeError( + "Expected 'sep' to be of type 'str' or 'None'; " + f"Received 'sep' of type: {type(sep).__name__}" + ) + + if default_category is not None: + if isinstance(default_category, dict): + if not len(default_category) == len(variables_slice): + len_msg = ( + f"Length of 'default_category' ({len(default_category)}) " + f"did not match the length of the columns being encoded " + f"({len(variables_slice)})" + ) + raise ValueError(len_msg) + elif isinstance(default_category, Hashable): + default_category = dict( + zip(variables_slice, [default_category] * len(variables_slice)) + ) + else: + raise TypeError( + "Expected 'default_category' to be of type " + "'None', 'Hashable', or 'dict'; " + "Received 'default_category' of type: " + f"{type(default_category).__name__}" + ) + + cat_data = {} + for prefix, prefix_slice in variables_slice.items(): + if sep is None: + cats = prefix_slice.copy() + else: + cats = [col[len(prefix + sep) :] for col in prefix_slice] + assigned = data_to_decode.loc[:, prefix_slice].sum(axis=1) + if any(assigned > 1): + raise ValueError( + "Dummy DataFrame contains multi-assignment(s); " + f"First instance in row: {assigned.idxmax()}" + ) + elif any(assigned == 0): + if isinstance(default_category, dict): + cats.append(default_category[prefix]) + else: + raise ValueError( + "Dummy DataFrame contains unassigned value(s); " + f"First instance in row: {assigned.idxmin()}" + ) + data_slice = concat( + (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1 + ) + else: + data_slice = data_to_decode.loc[:, prefix_slice] + cats_array = np.array(cats, dtype="object") + # get indices of True entries along axis=1 + cat_data[prefix] = cats_array[data_slice.to_numpy().nonzero()[1]] + + return DataFrame(cat_data) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py new file mode 100644 index 00000000..5de9c8e2 --- /dev/null +++ b/pandas/core/reshape/melt.py @@ -0,0 +1,551 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING +import warnings + +import numpy as np + +from pandas.util._decorators import ( + Appender, + deprecate_kwarg, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_extension_array_dtype, + is_list_like, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.missing import notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import Categorical +import pandas.core.common as com +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import tile_compat +from pandas.core.shared_docs import _shared_docs +from pandas.core.tools.numeric import to_numeric + +if TYPE_CHECKING: + from pandas import DataFrame + + +@Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"}) +def melt( + frame: DataFrame, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level=None, + ignore_index: bool = True, +) -> DataFrame: + # If multiindex, gather names of columns on all level for checking presence + # of `id_vars` and `value_vars` + if isinstance(frame.columns, MultiIndex): + cols = [x for c in frame.columns for x in c] + else: + cols = list(frame.columns) + + if value_name in frame.columns: + warnings.warn( + "This dataframe has a column name that matches the 'value_name' column " + "name of the resulting Dataframe. " + "In the future this will raise an error, please set the 'value_name' " + "parameter of DataFrame.melt to a unique name.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if id_vars is not None: + if not is_list_like(id_vars): + id_vars = [id_vars] + elif isinstance(frame.columns, MultiIndex) and not isinstance(id_vars, list): + raise ValueError( + "id_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + # Check that `id_vars` are in frame + id_vars = list(id_vars) + missing = Index(com.flatten(id_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'id_vars' are not present " + f"in the DataFrame: {list(missing)}" + ) + else: + id_vars = [] + + if value_vars is not None: + if not is_list_like(value_vars): + value_vars = [value_vars] + elif isinstance(frame.columns, MultiIndex) and not isinstance(value_vars, list): + raise ValueError( + "value_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + value_vars = list(value_vars) + # Check that `value_vars` are in frame + missing = Index(com.flatten(value_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'value_vars' are not present in " + f"the DataFrame: {list(missing)}" + ) + if col_level is not None: + idx = frame.columns.get_level_values(col_level).get_indexer( + id_vars + value_vars + ) + else: + idx = algos.unique(frame.columns.get_indexer_for(id_vars + value_vars)) + frame = frame.iloc[:, idx] + else: + frame = frame.copy() + + if col_level is not None: # allow list or other? + # frame is a copy + frame.columns = frame.columns.get_level_values(col_level) + + if var_name is None: + if isinstance(frame.columns, MultiIndex): + if len(frame.columns.names) == len(set(frame.columns.names)): + var_name = frame.columns.names + else: + var_name = [f"variable_{i}" for i in range(len(frame.columns.names))] + else: + var_name = [ + frame.columns.name if frame.columns.name is not None else "variable" + ] + if isinstance(var_name, str): + var_name = [var_name] + + N, K = frame.shape + K -= len(id_vars) + + mdata = {} + for col in id_vars: + id_data = frame.pop(col) + if is_extension_array_dtype(id_data): + if K > 0: + id_data = concat([id_data] * K, ignore_index=True) + else: + # We can't concat empty list. (GH 46044) + id_data = type(id_data)([], name=id_data.name, dtype=id_data.dtype) + else: + # error: Incompatible types in assignment (expression has type + # "ndarray[Any, dtype[Any]]", variable has type "Series") + id_data = np.tile(id_data._values, K) # type: ignore[assignment] + mdata[col] = id_data + + mcolumns = id_vars + var_name + [value_name] + + # error: Incompatible types in assignment (expression has type "ndarray", + # target has type "Series") + mdata[value_name] = frame._values.ravel("F") # type: ignore[assignment] + for i, col in enumerate(var_name): + # asanyarray will keep the columns as an Index + + # error: Incompatible types in assignment (expression has type "ndarray", target + # has type "Series") + mdata[col] = np.asanyarray( # type: ignore[assignment] + frame.columns._get_level_values(i) + ).repeat(N) + + result = frame._constructor(mdata, columns=mcolumns) + + if not ignore_index: + result.index = tile_compat(frame.index, K) + + return result + + +@deprecate_kwarg(old_arg_name="label", new_arg_name=None) +def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: + """ + Reshape wide-format data to long. Generalized inverse of DataFrame.pivot. + + Accepts a dictionary, ``groups``, in which each key is a new column name + and each value is a list of old column names that will be "melted" under + the new column name as part of the reshape. + + Parameters + ---------- + data : DataFrame + The wide-format DataFrame. + groups : dict + {new_name : list_of_columns}. + dropna : bool, default True + Do not include columns whose entries are all NaN. + label : None + Not used. + + .. deprecated:: 1.0.0 + + Returns + ------- + DataFrame + Reshaped DataFrame. + + See Also + -------- + melt : Unpivot a DataFrame from wide to long format, optionally leaving + identifiers set. + pivot : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Examples + -------- + >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], + ... 'team': ['Red Sox', 'Yankees'], + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) + >>> data + hr1 hr2 team year1 year2 + 0 514 545 Red Sox 2007 2008 + 1 573 526 Yankees 2007 2008 + + >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 + """ + if isinstance(groups, dict): + keys = list(groups.keys()) + values = list(groups.values()) + else: + keys, values = zip(*groups) + + all_cols = list(set.union(*(set(x) for x in values))) + id_cols = list(data.columns.difference(all_cols)) + + K = len(values[0]) + + for seq in values: + if len(seq) != K: + raise ValueError("All column lists must be same length") + + mdata = {} + pivot_cols = [] + + for target, names in zip(keys, values): + to_concat = [data[col]._values for col in names] + + mdata[target] = concat_compat(to_concat) + pivot_cols.append(target) + + for col in id_cols: + mdata[col] = np.tile(data[col]._values, K) + + if dropna: + mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) + for c in pivot_cols: + mask &= notna(mdata[c]) + if not mask.all(): + mdata = {k: v[mask] for k, v in mdata.items()} + + return data._constructor(mdata, columns=id_cols + pivot_cols) + + +def wide_to_long( + df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> DataFrame: + r""" + Unpivot a DataFrame from wide to long format. + + Less flexible but more user-friendly than melt. + + With stubnames ['A', 'B'], this function expects to find one or more + group of columns with format + A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... + You specify what you want to call this suffix in the resulting long format + with `j` (for example `j='year'`) + + Each row of these wide variables are assumed to be uniquely identified by + `i` (can be a single column name or a list of column names) + + All remaining variables in the data frame are left intact. + + Parameters + ---------- + df : DataFrame + The wide-format DataFrame. + stubnames : str or list-like + The stub name(s). The wide format variables are assumed to + start with the stub names. + i : str or list-like + Column(s) to use as id variable(s). + j : str + The name of the sub-observation variable. What you wish to name your + suffix in the long format. + sep : str, default "" + A character indicating the separation of the variable names + in the wide format, to be stripped from the names in the long format. + For example, if your column names are A-suffix1, A-suffix2, you + can strip the hyphen by specifying `sep='-'`. + suffix : str, default '\\d+' + A regular expression capturing the wanted suffixes. '\\d+' captures + numeric suffixes. Suffixes with no numbers could be specified with the + negated character class '\\D+'. You can also further disambiguate + suffixes, for example, if your wide variables are of the form A-one, + B-two,.., and you have an unrelated column A-rating, you can ignore the + last one by specifying `suffix='(!?one|two)'`. When all suffixes are + numeric, they are cast to int64/float64. + + Returns + ------- + DataFrame + A DataFrame that contains each stub name as a variable, with new index + (i, j). + + See Also + -------- + melt : Unpivot a DataFrame from wide to long format, optionally leaving + identifiers set. + pivot : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + + Notes + ----- + All extra variables are left untouched. This simply uses + `pandas.melt` under the hood, but is hard-coded to "do the right thing" + in a typical case. + + Examples + -------- + >>> np.random.seed(123) + >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + ... "X" : dict(zip(range(3), np.random.randn(3))) + ... }) + >>> df["id"] = df.index + >>> df + A1970 A1980 B1970 B1980 X id + 0 a d 2.5 3.2 -1.085631 0 + 1 b e 1.2 1.3 0.997345 1 + 2 c f 0.7 0.1 0.282978 2 + >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE + X A B + id year + 0 1970 -1.085631 a 2.5 + 1 1970 0.997345 b 1.2 + 2 1970 0.282978 c 0.7 + 0 1980 -1.085631 d 3.2 + 1 1980 0.997345 e 1.3 + 2 1980 0.282978 f 0.1 + + With multiple id columns + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 1 2.8 + 2 3.4 + 2 1 2.9 + 2 3.8 + 3 1 2.2 + 2 2.9 + 2 1 1 2.0 + 2 3.2 + 2 1 1.8 + 2 2.8 + 3 1 1.9 + 2 2.4 + 3 1 1 2.2 + 2 3.3 + 2 1 2.3 + 2 3.4 + 3 1 2.1 + 2 2.9 + + Going from long back to wide just takes some creative use of `unstack` + + >>> w = l.unstack() + >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format) + >>> w.reset_index() + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + Less wieldy column names are also handled + + >>> np.random.seed(0) + >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), + ... 'A(weekly)-2011': np.random.rand(3), + ... 'B(weekly)-2010': np.random.rand(3), + ... 'B(weekly)-2011': np.random.rand(3), + ... 'X' : np.random.randint(3, size=3)}) + >>> df['id'] = df.index + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id + 0 0.548814 0.544883 0.437587 0.383442 0 0 + 1 0.715189 0.423655 0.891773 0.791725 1 1 + 2 0.602763 0.645894 0.963663 0.528895 1 2 + + >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', + ... j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(weekly) B(weekly) + id year + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 + + If we have many columns, we could also use a regex to find our + stubnames and pass that list on to wide_to_long + + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != []]) + ... ) + >>> list(stubnames) + ['A(weekly)', 'B(weekly)'] + + All of the above examples have integers as suffixes. It is possible to + have non-integers as suffixes. + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht_one ht_two + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', + ... sep='_', suffix=r'\w+') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 one 2.8 + two 3.4 + 2 one 2.9 + two 3.8 + 3 one 2.2 + two 2.9 + 2 1 one 2.0 + two 3.2 + 2 one 1.8 + two 2.8 + 3 one 1.9 + two 2.4 + 3 1 one 2.2 + two 3.3 + 2 one 2.3 + two 3.4 + 3 one 2.1 + two 2.9 + """ + + def get_var_names(df, stub: str, sep: str, suffix: str) -> list[str]: + regex = rf"^{re.escape(stub)}{re.escape(sep)}{suffix}$" + pattern = re.compile(regex) + return [col for col in df.columns if pattern.match(col)] + + def melt_stub(df, stub: str, i, j, value_vars, sep: str): + newdf = melt( + df, + id_vars=i, + value_vars=value_vars, + value_name=stub.rstrip(sep), + var_name=j, + ) + newdf[j] = Categorical(newdf[j]) + newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True) + + # GH17627 Cast numerics suffixes to int/float + newdf[j] = to_numeric(newdf[j], errors="ignore") + + return newdf.set_index(i + [j]) + + if not is_list_like(stubnames): + stubnames = [stubnames] + else: + stubnames = list(stubnames) + + if any(col in stubnames for col in df.columns): + raise ValueError("stubname can't be identical to a column name") + + if not is_list_like(i): + i = [i] + else: + i = list(i) + + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + + value_vars = [get_var_names(df, stub, sep, suffix) for stub in stubnames] + + value_vars_flattened = [e for sublist in value_vars for e in sublist] + id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) + + _melted = [melt_stub(df, s, i, j, v, sep) for s, v in zip(stubnames, value_vars)] + melted = _melted[0].join(_melted[1:], how="outer") + + if len(i) == 1: + new = df[id_vars].set_index(i).join(melted) + return new + + new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j]) + + return new diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py new file mode 100644 index 00000000..ec3dfa0b --- /dev/null +++ b/pandas/core/reshape/merge.py @@ -0,0 +1,2501 @@ +""" +SQL-style merge routines +""" +from __future__ import annotations + +import copy +import datetime +from functools import partial +import string +from typing import ( + TYPE_CHECKING, + Hashable, + Sequence, + cast, +) +import uuid +import warnings + +import numpy as np + +from pandas._libs import ( + Timedelta, + hashtable as libhashtable, + join as libjoin, + lib, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + DtypeObj, + IndexLabel, + Suffixes, + npt, +) +from pandas.errors import MergeError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_object, + is_array_like, + is_bool, + is_bool_dtype, + is_categorical_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_number, + is_numeric_dtype, + is_object_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) + +from pandas import ( + Categorical, + Index, + MultiIndex, + Series, +) +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.frame import _merge_doc +from pandas.core.sorting import is_int64_overflow_possible + +if TYPE_CHECKING: + from pandas import DataFrame + from pandas.core import groupby + from pandas.core.arrays import DatetimeArray + + +@Substitution("\nleft : DataFrame or named Series") +@Appender(_merge_doc, indents=0) +def merge( + left: DataFrame | Series, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate: str | None = None, +) -> DataFrame: + op = _MergeOperation( + left, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + indicator=indicator, + validate=validate, + ) + return op.get_result(copy=copy) + + +if __debug__: + merge.__doc__ = _merge_doc % "\nleft : DataFrame" + + +def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): + """ + groupby & merge; we are always performing a left-by type operation + + Parameters + ---------- + by: field to group + left: DataFrame + right: DataFrame + merge_pieces: function for merging + """ + pieces = [] + if not isinstance(by, (list, tuple)): + by = [by] + + lby = left.groupby(by, sort=False) + rby: groupby.DataFrameGroupBy | None = None + + # if we can groupby the rhs + # then we can get vastly better perf + if all(item in right.columns for item in by): + rby = right.groupby(by, sort=False) + + for key, lhs in lby.grouper.get_iterator(lby._selected_obj, axis=lby.axis): + + if rby is None: + rhs = right + else: + try: + rhs = right.take(rby.indices[key]) + except KeyError: + # key doesn't exist in left + lcols = lhs.columns.tolist() + cols = lcols + [r for r in right.columns if r not in set(lcols)] + merged = lhs.reindex(columns=cols) + merged.index = range(len(merged)) + pieces.append(merged) + continue + + merged = merge_pieces(lhs, rhs) + + # make sure join keys are in the merged + # TODO, should merge_pieces do this? + merged[by] = key + + pieces.append(merged) + + # preserve the original order + # if we have a missing piece this can be reset + from pandas.core.reshape.concat import concat + + result = concat(pieces, ignore_index=True) + result = result.reindex(columns=pieces[0].columns, copy=False) + return result, lby + + +def merge_ordered( + left: DataFrame, + right: DataFrame, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_by=None, + right_by=None, + fill_method: str | None = None, + suffixes: Suffixes = ("_x", "_y"), + how: str = "outer", +) -> DataFrame: + """ + Perform a merge for ordered data with optional filling/interpolation. + + Designed for ordered data like time series data. Optionally + perform group-wise merge (see examples). + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label or list + Field names to join on. Must be found in both DataFrames. + left_on : label or list, or array-like + Field names to join on in left DataFrame. Can be a vector or list of + vectors of the length of the DataFrame to use a particular vector as + the join key instead of columns. + right_on : label or list, or array-like + Field names to join on in right DataFrame or vector/list of vectors per + left_on docs. + left_by : column name or list of column names + Group left DataFrame by group columns and merge piece by piece with + right DataFrame. + right_by : column name or list of column names + Group right DataFrame by group columns and merge piece by piece with + left DataFrame. + fill_method : {'ffill', None}, default None + Interpolation method for data. + suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. + + .. versionchanged:: 0.25.0 + how : {'left', 'right', 'outer', 'inner'}, default 'outer' + * left: use only keys from left frame (SQL: left outer join) + * right: use only keys from right frame (SQL: right outer join) + * outer: use union of keys from both frames (SQL: full outer join) + * inner: use intersection of keys from both frames (SQL: inner join). + + Returns + ------- + DataFrame + The merged DataFrame output type will the be same as + 'left', if it is a subclass of DataFrame. + + See Also + -------- + merge : Merge with a database-style join. + merge_asof : Merge on nearest keys. + + Examples + -------- + >>> df1 = pd.DataFrame( + ... { + ... "key": ["a", "c", "e", "a", "c", "e"], + ... "lvalue": [1, 2, 3, 1, 2, 3], + ... "group": ["a", "a", "a", "b", "b", "b"] + ... } + ... ) + >>> df1 + key lvalue group + 0 a 1 a + 1 c 2 a + 2 e 3 a + 3 a 1 b + 4 c 2 b + 5 e 3 b + + >>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) + >>> df2 + key rvalue + 0 b 1 + 1 c 2 + 2 d 3 + + >>> merge_ordered(df1, df2, fill_method="ffill", left_by="group") + key lvalue group rvalue + 0 a 1 a NaN + 1 b 1 a 1.0 + 2 c 2 a 2.0 + 3 d 2 a 3.0 + 4 e 3 a 3.0 + 5 a 1 b NaN + 6 b 1 b 1.0 + 7 c 2 b 2.0 + 8 d 2 b 3.0 + 9 e 3 b 3.0 + """ + + def _merger(x, y) -> DataFrame: + # perform the ordered merge operation + op = _OrderedMerge( + x, + y, + on=on, + left_on=left_on, + right_on=right_on, + suffixes=suffixes, + fill_method=fill_method, + how=how, + ) + return op.get_result() + + if left_by is not None and right_by is not None: + raise ValueError("Can only group either left or right frames") + elif left_by is not None: + if isinstance(left_by, str): + left_by = [left_by] + check = set(left_by).difference(left.columns) + if len(check) != 0: + raise KeyError(f"{check} not found in left columns") + result, _ = _groupby_and_merge(left_by, left, right, lambda x, y: _merger(x, y)) + elif right_by is not None: + if isinstance(right_by, str): + right_by = [right_by] + check = set(right_by).difference(right.columns) + if len(check) != 0: + raise KeyError(f"{check} not found in right columns") + result, _ = _groupby_and_merge( + right_by, right, left, lambda x, y: _merger(y, x) + ) + else: + result = _merger(left, right) + return result + + +def merge_asof( + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + suffixes: Suffixes = ("_x", "_y"), + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", +) -> DataFrame: + """ + Perform a merge by key distance. + + This is similar to a left-join except that we match on nearest + key rather than equal keys. Both DataFrames must be sorted by the key. + + For each row in the left DataFrame: + + - A "backward" search selects the last row in the right DataFrame whose + 'on' key is less than or equal to the left's key. + + - A "forward" search selects the first row in the right DataFrame whose + 'on' key is greater than or equal to the left's key. + + - A "nearest" search selects the row in the right DataFrame whose 'on' + key is closest in absolute distance to the left's key. + + The default is "backward" and is compatible in versions below 0.20.0. + The direction parameter was added in version 0.20.0 and introduces + "forward" and "nearest". + + Optionally match on equivalent keys with 'by' before searching with 'on'. + + Parameters + ---------- + left : DataFrame or named Series + right : DataFrame or named Series + on : label + Field name to join on. Must be found in both DataFrames. + The data MUST be ordered. Furthermore this must be a numeric column, + such as datetimelike, integer, or float. On or left_on/right_on + must be given. + left_on : label + Field name to join on in left DataFrame. + right_on : label + Field name to join on in right DataFrame. + left_index : bool + Use the index of the left DataFrame as the join key. + right_index : bool + Use the index of the right DataFrame as the join key. + by : column name or list of column names + Match on these columns before performing merge operation. + left_by : column name + Field names to match on in the left DataFrame. + right_by : column name + Field names to match on in the right DataFrame. + suffixes : 2-length sequence (tuple, list, ...) + Suffix to apply to overlapping column names in the left and right + side, respectively. + tolerance : int or Timedelta, optional, default None + Select asof tolerance within this range; must be compatible + with the merge index. + allow_exact_matches : bool, default True + + - If True, allow matching with the same 'on' value + (i.e. less-than-or-equal-to / greater-than-or-equal-to) + - If False, don't match the same 'on' value + (i.e., strictly less-than / strictly greater-than). + + direction : 'backward' (default), 'forward', or 'nearest' + Whether to search for prior, subsequent, or closest matches. + + Returns + ------- + merged : DataFrame + + See Also + -------- + merge : Merge with a database-style join. + merge_ordered : Merge with optional filling/interpolation. + + Examples + -------- + >>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + >>> left + a left_val + 0 1 a + 1 5 b + 2 10 c + + >>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + >>> right + a right_val + 0 1 1 + 1 2 2 + 2 3 3 + 3 6 6 + 4 7 7 + + >>> pd.merge_asof(left, right, on="a") + a left_val right_val + 0 1 a 1 + 1 5 b 3 + 2 10 c 7 + + >>> pd.merge_asof(left, right, on="a", allow_exact_matches=False) + a left_val right_val + 0 1 a NaN + 1 5 b 3.0 + 2 10 c 7.0 + + >>> pd.merge_asof(left, right, on="a", direction="forward") + a left_val right_val + 0 1 a 1.0 + 1 5 b 6.0 + 2 10 c NaN + + >>> pd.merge_asof(left, right, on="a", direction="nearest") + a left_val right_val + 0 1 a 1 + 1 5 b 6 + 2 10 c 7 + + We can use indexed DataFrames as well. + + >>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10]) + >>> left + left_val + 1 a + 5 b + 10 c + + >>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) + >>> right + right_val + 1 1 + 2 2 + 3 3 + 6 6 + 7 7 + + >>> pd.merge_asof(left, right, left_index=True, right_index=True) + left_val right_val + 1 a 1 + 5 b 3 + 10 c 7 + + Here is a real-world times-series example + + >>> quotes = pd.DataFrame( + ... { + ... "time": [ + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.030"), + ... pd.Timestamp("2016-05-25 13:30:00.041"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.049"), + ... pd.Timestamp("2016-05-25 13:30:00.072"), + ... pd.Timestamp("2016-05-25 13:30:00.075") + ... ], + ... "ticker": [ + ... "GOOG", + ... "MSFT", + ... "MSFT", + ... "MSFT", + ... "GOOG", + ... "AAPL", + ... "GOOG", + ... "MSFT" + ... ], + ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03] + ... } + ... ) + >>> quotes + time ticker bid ask + 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 + 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 + 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 + 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 + 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 + 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 + 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 + 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 + + >>> trades = pd.DataFrame( + ... { + ... "time": [ + ... pd.Timestamp("2016-05-25 13:30:00.023"), + ... pd.Timestamp("2016-05-25 13:30:00.038"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.048"), + ... pd.Timestamp("2016-05-25 13:30:00.048") + ... ], + ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], + ... "quantity": [75, 155, 100, 100, 100] + ... } + ... ) + >>> trades + time ticker price quantity + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 + + By default we are taking the asof of the quotes + + >>> pd.merge_asof(trades, quotes, on="time", by="ticker") + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 2ms between the quote time and the trade time + + >>> pd.merge_asof( + ... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") + ... ) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 10ms between the quote time and the trade time + and we exclude exact matches on time. However *prior* data will + propagate forward + + >>> pd.merge_asof( + ... trades, + ... quotes, + ... on="time", + ... by="ticker", + ... tolerance=pd.Timedelta("10ms"), + ... allow_exact_matches=False + ... ) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + """ + op = _AsOfMerge( + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + by=by, + left_by=left_by, + right_by=right_by, + suffixes=suffixes, + how="asof", + tolerance=tolerance, + allow_exact_matches=allow_exact_matches, + direction=direction, + ) + return op.get_result() + + +# TODO: transformations?? +# TODO: only copy DataFrames when modification necessary +class _MergeOperation: + """ + Perform a database (SQL) merge operation between two DataFrame or Series + objects using either columns as keys or their row indexes + """ + + _merge_type = "merge" + how: str + on: IndexLabel | None + # left_on/right_on may be None when passed, but in validate_specification + # get replaced with non-None. + left_on: Sequence[Hashable | AnyArrayLike] + right_on: Sequence[Hashable | AnyArrayLike] + left_index: bool + right_index: bool + axis: int + bm_axis: int + sort: bool + suffixes: Suffixes + copy: bool + indicator: bool + validate: str | None + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + how: str = "inner", + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + axis: int = 1, + left_index: bool = False, + right_index: bool = False, + sort: bool = True, + suffixes: Suffixes = ("_x", "_y"), + indicator: bool = False, + validate: str | None = None, + ) -> None: + _left = _validate_operand(left) + _right = _validate_operand(right) + self.left = self.orig_left = _left + self.right = self.orig_right = _right + self.how = how + + # bm_axis -> the axis on the BlockManager + self.bm_axis = axis + # axis --> the axis on the Series/DataFrame + self.axis = 1 - axis if self.left.ndim == 2 else 0 + + self.on = com.maybe_make_list(on) + + self.suffixes = suffixes + self.sort = sort + + self.left_index = left_index + self.right_index = right_index + + self.indicator = indicator + + if not is_bool(left_index): + raise ValueError( + f"left_index parameter must be of type bool, not {type(left_index)}" + ) + if not is_bool(right_index): + raise ValueError( + f"right_index parameter must be of type bool, not {type(right_index)}" + ) + + # warn user when merging between different levels + if _left.columns.nlevels != _right.columns.nlevels: + msg = ( + "merging between different levels is deprecated and will be removed " + f"in a future version. ({_left.columns.nlevels} levels on the left, " + f"{_right.columns.nlevels} on the right)" + ) + # stacklevel chosen to be correct when this is reached via pd.merge + # (and not DataFrame.join) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) + + cross_col = None + if self.how == "cross": + ( + self.left, + self.right, + self.how, + cross_col, + ) = self._create_cross_configuration(self.left, self.right) + self.left_on = self.right_on = [cross_col] + self._cross = cross_col + + # note this function has side effects + ( + self.left_join_keys, + self.right_join_keys, + self.join_names, + ) = self._get_merge_keys() + + # validate the merge keys dtypes. We may need to coerce + # to avoid incompatible dtypes + self._maybe_coerce_merge_keys() + + # If argument passed to validate, + # check if columns specified as unique + # are in fact unique. + if validate is not None: + self._validate(validate) + + def _reindex_and_concat( + self, + join_index: Index, + left_indexer: npt.NDArray[np.intp] | None, + right_indexer: npt.NDArray[np.intp] | None, + copy: bool, + ) -> DataFrame: + """ + reindex along index and concat along columns. + """ + # Take views so we do not alter the originals + left = self.left[:] + right = self.right[:] + + llabels, rlabels = _items_overlap_with_suffix( + self.left._info_axis, self.right._info_axis, self.suffixes + ) + + if left_indexer is not None: + # Pinning the index here (and in the right code just below) is not + # necessary, but makes the `.take` more performant if we have e.g. + # a MultiIndex for left.index. + lmgr = left._mgr.reindex_indexer( + join_index, + left_indexer, + axis=1, + copy=False, + only_slice=True, + allow_dups=True, + use_na_proxy=True, + ) + left = left._constructor(lmgr) + left.index = join_index + + if right_indexer is not None: + rmgr = right._mgr.reindex_indexer( + join_index, + right_indexer, + axis=1, + copy=False, + only_slice=True, + allow_dups=True, + use_na_proxy=True, + ) + right = right._constructor(rmgr) + right.index = join_index + + from pandas import concat + + left.columns = llabels + right.columns = rlabels + result = concat([left, right], axis=1, copy=copy) + return result + + def get_result(self, copy: bool = True) -> DataFrame: + if self.indicator: + self.left, self.right = self._indicator_pre_merge(self.left, self.right) + + join_index, left_indexer, right_indexer = self._get_join_info() + + result = self._reindex_and_concat( + join_index, left_indexer, right_indexer, copy=copy + ) + result = result.__finalize__(self, method=self._merge_type) + + if self.indicator: + result = self._indicator_post_merge(result) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + self._maybe_restore_index_levels(result) + + self._maybe_drop_cross_column(result, self._cross) + + return result.__finalize__(self, method="merge") + + def _maybe_drop_cross_column( + self, result: DataFrame, cross_col: str | None + ) -> None: + if cross_col is not None: + del result[cross_col] + + @cache_readonly + def _indicator_name(self) -> str | None: + if isinstance(self.indicator, str): + return self.indicator + elif isinstance(self.indicator, bool): + return "_merge" if self.indicator else None + else: + raise ValueError( + "indicator option can only accept boolean or string arguments" + ) + + def _indicator_pre_merge( + self, left: DataFrame, right: DataFrame + ) -> tuple[DataFrame, DataFrame]: + + columns = left.columns.union(right.columns) + + for i in ["_left_indicator", "_right_indicator"]: + if i in columns: + raise ValueError( + "Cannot use `indicator=True` option when " + f"data contains a column named {i}" + ) + if self._indicator_name in columns: + raise ValueError( + "Cannot use name of an existing column for indicator column" + ) + + left = left.copy() + right = right.copy() + + left["_left_indicator"] = 1 + left["_left_indicator"] = left["_left_indicator"].astype("int8") + + right["_right_indicator"] = 2 + right["_right_indicator"] = right["_right_indicator"].astype("int8") + + return left, right + + def _indicator_post_merge(self, result: DataFrame) -> DataFrame: + + result["_left_indicator"] = result["_left_indicator"].fillna(0) + result["_right_indicator"] = result["_right_indicator"].fillna(0) + + result[self._indicator_name] = Categorical( + (result["_left_indicator"] + result["_right_indicator"]), + categories=[1, 2, 3], + ) + result[self._indicator_name] = result[ + self._indicator_name + ].cat.rename_categories(["left_only", "right_only", "both"]) + + result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1) + return result + + def _maybe_restore_index_levels(self, result: DataFrame) -> None: + """ + Restore index levels specified as `on` parameters + + Here we check for cases where `self.left_on` and `self.right_on` pairs + each reference an index level in their respective DataFrames. The + joined columns corresponding to these pairs are then restored to the + index of `result`. + + **Note:** This method has side effects. It modifies `result` in-place + + Parameters + ---------- + result: DataFrame + merge result + + Returns + ------- + None + """ + names_to_restore = [] + for name, left_key, right_key in zip( + self.join_names, self.left_on, self.right_on + ): + if ( + # Argument 1 to "_is_level_reference" of "NDFrame" has incompatible + # type "Union[Hashable, ExtensionArray, Index, Series]"; expected + # "Hashable" + self.orig_left._is_level_reference(left_key) # type: ignore[arg-type] + # Argument 1 to "_is_level_reference" of "NDFrame" has incompatible + # type "Union[Hashable, ExtensionArray, Index, Series]"; expected + # "Hashable" + and self.orig_right._is_level_reference( + right_key # type: ignore[arg-type] + ) + and left_key == right_key + and name not in result.index.names + ): + + names_to_restore.append(name) + + if names_to_restore: + result.set_index(names_to_restore, inplace=True) + + def _maybe_add_join_keys( + self, + result: DataFrame, + left_indexer: np.ndarray | None, + right_indexer: np.ndarray | None, + ) -> None: + + left_has_missing = None + right_has_missing = None + + keys = zip(self.join_names, self.left_on, self.right_on) + for i, (name, lname, rname) in enumerate(keys): + if not _should_fill(lname, rname): + continue + + take_left, take_right = None, None + + if name in result: + + if left_indexer is not None and right_indexer is not None: + if name in self.left: + + if left_has_missing is None: + left_has_missing = (left_indexer == -1).any() + + if left_has_missing: + take_right = self.right_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.left[name].dtype + ): + take_left = self.left[name]._values + + elif name in self.right: + + if right_has_missing is None: + right_has_missing = (right_indexer == -1).any() + + if right_has_missing: + take_left = self.left_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.right[name].dtype + ): + take_right = self.right[name]._values + + elif left_indexer is not None and is_array_like(self.left_join_keys[i]): + take_left = self.left_join_keys[i] + take_right = self.right_join_keys[i] + + if take_left is not None or take_right is not None: + + if take_left is None: + lvals = result[name]._values + else: + # TODO: can we pin down take_left's type earlier? + take_left = extract_array(take_left, extract_numpy=True) + lfill = na_value_for_dtype(take_left.dtype) + lvals = algos.take_nd(take_left, left_indexer, fill_value=lfill) + + if take_right is None: + rvals = result[name]._values + else: + # TODO: can we pin down take_right's type earlier? + take_right = extract_array(take_right, extract_numpy=True) + rfill = na_value_for_dtype(take_right.dtype) + rvals = algos.take_nd(take_right, right_indexer, fill_value=rfill) + + # if we have an all missing left_indexer + # make sure to just use the right values or vice-versa + mask_left = left_indexer == -1 + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + if mask_left.all(): # type: ignore[union-attr] + key_col = Index(rvals) + result_dtype = rvals.dtype + elif right_indexer is not None and (right_indexer == -1).all(): + key_col = Index(lvals) + result_dtype = lvals.dtype + else: + key_col = Index(lvals).where(~mask_left, rvals) + result_dtype = find_common_type([lvals.dtype, rvals.dtype]) + + if result._is_label_reference(name): + result[name] = Series( + key_col, dtype=result_dtype, index=result.index + ) + elif result._is_level_reference(name): + if isinstance(result.index, MultiIndex): + key_col.name = name + idx_list = [ + result.index.get_level_values(level_name) + if level_name != name + else key_col + for level_name in result.index.names + ] + + result.set_index(idx_list, inplace=True) + else: + result.index = Index(key_col, name=name) + else: + result.insert(i, name or f"key_{i}", key_col) + + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """return the join indexers""" + return get_join_indexers( + self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how + ) + + def _get_join_info( + self, + ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + + left_ax = self.left.axes[self.axis] + right_ax = self.right.axes[self.axis] + + if self.left_index and self.right_index and self.how != "asof": + join_index, left_indexer, right_indexer = left_ax.join( + right_ax, how=self.how, return_indexers=True, sort=self.sort + ) + + elif self.right_index and self.how == "left": + join_index, left_indexer, right_indexer = _left_join_on_index( + left_ax, right_ax, self.left_join_keys, sort=self.sort + ) + + elif self.left_index and self.how == "right": + join_index, right_indexer, left_indexer = _left_join_on_index( + right_ax, left_ax, self.right_join_keys, sort=self.sort + ) + else: + (left_indexer, right_indexer) = self._get_join_indexers() + + if self.right_index: + if len(self.left) > 0: + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="right", + ) + else: + join_index = self.right.index.take(right_indexer) + elif self.left_index: + if self.how == "asof": + # GH#33463 asof should always behave like a left merge + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="left", + ) + + elif len(self.right) > 0: + join_index = self._create_join_index( + self.right.index, + self.left.index, + right_indexer, + how="left", + ) + else: + join_index = self.left.index.take(left_indexer) + else: + join_index = Index(np.arange(len(left_indexer))) + + if len(join_index) == 0: + join_index = join_index.astype(object) + return join_index, left_indexer, right_indexer + + def _create_join_index( + self, + index: Index, + other_index: Index, + indexer: npt.NDArray[np.intp], + how: str = "left", + ) -> Index: + """ + Create a join index by rearranging one index to match another + + Parameters + ---------- + index : Index being rearranged + other_index : Index used to supply values not found in index + indexer : np.ndarray[np.intp] how to rearrange index + how : str + Replacement is only necessary if indexer based on other_index. + + Returns + ------- + Index + """ + if self.how in (how, "outer") and not isinstance(other_index, MultiIndex): + # if final index requires values in other_index but not target + # index, indexer may hold missing (-1) values, causing Index.take + # to take the final value in target index. So, we set the last + # element to be the desired fill value. We do not use allow_fill + # and fill_value because it throws a ValueError on integer indices + mask = indexer == -1 + if np.any(mask): + fill_value = na_value_for_dtype(index.dtype, compat=False) + index = index.append(Index([fill_value])) + return index.take(indexer) + + def _get_merge_keys(self): + """ + Note: has side effects (copy/delete key columns) + + Parameters + ---------- + left + right + on + + Returns + ------- + left_keys, right_keys, join_names + """ + # left_keys, right_keys entries can actually be anything listlike + # with a 'dtype' attr + left_keys: list[AnyArrayLike] = [] + right_keys: list[AnyArrayLike] = [] + join_names: list[Hashable] = [] + right_drop = [] + left_drop = [] + + left, right = self.left, self.right + + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) + + # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A + # user could, for example, request 'left_index' and 'left_by'. In a + # regular pd.merge(), users cannot specify both 'left_index' and + # 'left_on'. (Instead, users have a MultiIndex). That means the + # self.left_on in this function is always empty in a pd.merge(), but + # a pd.merge_asof(left_index=True, left_by=...) will result in a + # self.left_on array with a None in the middle of it. This requires + # a work-around as designated in the code below. + # See _validate_left_right_on() for where this happens. + + # ugh, spaghetti re #733 + if _any(self.left_on) and _any(self.right_on): + for lk, rk in zip(self.left_on, self.right_on): + if is_lkey(lk): + lk = cast(AnyArrayLike, lk) + left_keys.append(lk) + if is_rkey(rk): + rk = cast(AnyArrayLike, rk) + right_keys.append(rk) + join_names.append(None) # what to do? + else: + # Then we're either Hashable or a wrong-length arraylike, + # the latter of which will raise + rk = cast(Hashable, rk) + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + join_names.append(rk) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + join_names.append(right.index.name) + else: + if not is_rkey(rk): + # Then we're either Hashable or a wrong-length arraylike, + # the latter of which will raise + rk = cast(Hashable, rk) + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + if lk is not None and lk == rk: # FIXME: what about other NAs? + # avoid key upcast in corner case (length-0) + if len(left) > 0: + right_drop.append(rk) + else: + left_drop.append(lk) + else: + rk = cast(AnyArrayLike, rk) + right_keys.append(rk) + if lk is not None: + # Then we're either Hashable or a wrong-length arraylike, + # the latter of which will raise + lk = cast(Hashable, lk) + left_keys.append(left._get_label_or_level_values(lk)) + join_names.append(lk) + else: + # work-around for merge_asof(left_index=True) + left_keys.append(left.index) + join_names.append(left.index.name) + elif _any(self.left_on): + for k in self.left_on: + if is_lkey(k): + k = cast(AnyArrayLike, k) + left_keys.append(k) + join_names.append(None) + else: + # Then we're either Hashable or a wrong-length arraylike, + # the latter of which will raise + k = cast(Hashable, k) + left_keys.append(left._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.right.index, MultiIndex): + right_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.right.index.levels, self.right.index.codes + ) + ] + else: + right_keys = [self.right.index._values] + elif _any(self.right_on): + for k in self.right_on: + if is_rkey(k): + k = cast(AnyArrayLike, k) + right_keys.append(k) + join_names.append(None) + else: + # Then we're either Hashable or a wrong-length arraylike, + # the latter of which will raise + k = cast(Hashable, k) + right_keys.append(right._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.left.index, MultiIndex): + left_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.left.index.levels, self.left.index.codes + ) + ] + else: + left_keys = [self.left.index._values] + + if left_drop: + self.left = self.left._drop_labels_or_levels(left_drop) + + if right_drop: + self.right = self.right._drop_labels_or_levels(right_drop) + + return left_keys, right_keys, join_names + + def _maybe_coerce_merge_keys(self) -> None: + # we have valid merges but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip( + self.left_join_keys, self.right_join_keys, self.join_names + ): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + lk_is_cat = is_categorical_dtype(lk.dtype) + rk_is_cat = is_categorical_dtype(rk.dtype) + lk_is_object = is_object_dtype(lk.dtype) + rk_is_object = is_object_dtype(rk.dtype) + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if lk_is_cat and rk_is_cat: + if lk._categories_match_up_to_permutation(rk): + continue + + elif lk_is_cat or rk_is_cat: + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + msg = ( + f"You are trying to merge on {lk.dtype} and " + f"{rk.dtype} columns. If you wish to proceed you should use pd.concat" + ) + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8, int and float + # further if we are object, but we infer to + # the same, then proceed + if is_numeric_dtype(lk.dtype) and is_numeric_dtype(rk.dtype): + if lk.dtype.kind == rk.dtype.kind: + continue + + # check whether ints and floats + elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + with np.errstate(invalid="ignore"): + if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + stacklevel=find_stack_level(), + ) + continue + + elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + with np.errstate(invalid="ignore"): + if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + stacklevel=find_stack_level(), + ) + continue + + # let's infer and see if we are ok + elif lib.infer_dtype(lk, skipna=False) == lib.infer_dtype( + rk, skipna=False + ): + continue + + # Check if we are trying to merge on obviously + # incompatible dtypes GH 9780, GH 15800 + + # bool values are coerced to object + elif (lk_is_object and is_bool_dtype(rk.dtype)) or ( + is_bool_dtype(lk.dtype) and rk_is_object + ): + pass + + # object values are allowed to be merged + elif (lk_is_object and is_numeric_dtype(rk.dtype)) or ( + is_numeric_dtype(lk.dtype) and rk_is_object + ): + inferred_left = lib.infer_dtype(lk, skipna=False) + inferred_right = lib.infer_dtype(rk, skipna=False) + bool_types = ["integer", "mixed-integer", "boolean", "empty"] + string_types = ["string", "unicode", "mixed", "bytes", "empty"] + + # inferred bool + if inferred_left in bool_types and inferred_right in bool_types: + pass + + # unless we are merging non-string-like with string-like + elif ( + inferred_left in string_types and inferred_right not in string_types + ) or ( + inferred_right in string_types and inferred_left not in string_types + ): + raise ValueError(msg) + + # datetimelikes must match exactly + elif needs_i8_conversion(lk.dtype) and not needs_i8_conversion(rk.dtype): + raise ValueError(msg) + elif not needs_i8_conversion(lk.dtype) and needs_i8_conversion(rk.dtype): + raise ValueError(msg) + elif isinstance(lk.dtype, DatetimeTZDtype) and not isinstance( + rk.dtype, DatetimeTZDtype + ): + raise ValueError(msg) + elif not isinstance(lk.dtype, DatetimeTZDtype) and isinstance( + rk.dtype, DatetimeTZDtype + ): + raise ValueError(msg) + + elif lk_is_object and rk_is_object: + continue + + # Houston, we have a problem! + # let's coerce to object if the dtypes aren't + # categorical, otherwise coerce to the category + # dtype. If we coerced categories to object, + # then we would lose type information on some + # columns, and end up trying to merge + # incompatible dtypes. See GH 16900. + if name in self.left.columns: + typ = lk.categories.dtype if lk_is_cat else object + self.left = self.left.copy() + self.left[name] = self.left[name].astype(typ) + if name in self.right.columns: + typ = rk.categories.dtype if rk_is_cat else object + self.right = self.right.copy() + self.right[name] = self.right[name].astype(typ) + + def _create_cross_configuration( + self, left: DataFrame, right: DataFrame + ) -> tuple[DataFrame, DataFrame, str, str]: + """ + Creates the configuration to dispatch the cross operation to inner join, + e.g. adding a join column and resetting parameters. Join column is added + to a new object, no inplace modification + + Parameters + ---------- + left : DataFrame + right : DataFrame + + Returns + ------- + a tuple (left, right, how, cross_col) representing the adjusted + DataFrames with cross_col, the merge operation set to inner and the column + to join over. + """ + cross_col = f"_cross_{uuid.uuid4()}" + how = "inner" + return ( + left.assign(**{cross_col: 1}), + right.assign(**{cross_col: 1}), + how, + cross_col, + ) + + def _validate_left_right_on(self, left_on, right_on): + left_on = com.maybe_make_list(left_on) + right_on = com.maybe_make_list(right_on) + + if self.how == "cross": + if ( + self.left_index + or self.right_index + or right_on is not None + or left_on is not None + or self.on is not None + ): + raise MergeError( + "Can not pass on, right_on, left_on or set right_index=True or " + "left_index=True" + ) + # Hm, any way to make this logic less complicated?? + elif self.on is None and left_on is None and right_on is None: + + if self.left_index and self.right_index: + left_on, right_on = (), () + elif self.left_index: + raise MergeError("Must pass right_on or right_index=True") + elif self.right_index: + raise MergeError("Must pass left_on or left_index=True") + else: + # use the common columns + left_cols = self.left.columns + right_cols = self.right.columns + common_cols = left_cols.intersection(right_cols) + if len(common_cols) == 0: + raise MergeError( + "No common columns to perform merge on. " + f"Merge options: left_on={left_on}, " + f"right_on={right_on}, " + f"left_index={self.left_index}, " + f"right_index={self.right_index}" + ) + if ( + not left_cols.join(common_cols, how="inner").is_unique + or not right_cols.join(common_cols, how="inner").is_unique + ): + raise MergeError(f"Data columns not unique: {repr(common_cols)}") + left_on = right_on = common_cols + elif self.on is not None: + if left_on is not None or right_on is not None: + raise MergeError( + 'Can only pass argument "on" OR "left_on" ' + 'and "right_on", not a combination of both.' + ) + if self.left_index or self.right_index: + raise MergeError( + 'Can only pass argument "on" OR "left_index" ' + 'and "right_index", not a combination of both.' + ) + left_on = right_on = self.on + elif left_on is not None: + if self.left_index: + raise MergeError( + 'Can only pass argument "left_on" OR "left_index" not both.' + ) + if not self.right_index and right_on is None: + raise MergeError('Must pass "right_on" OR "right_index".') + n = len(left_on) + if self.right_index: + if len(left_on) != self.right.index.nlevels: + raise ValueError( + "len(left_on) must equal the number " + 'of levels in the index of "right"' + ) + right_on = [None] * n + elif right_on is not None: + if self.right_index: + raise MergeError( + 'Can only pass argument "right_on" OR "right_index" not both.' + ) + if not self.left_index and left_on is None: + raise MergeError('Must pass "left_on" OR "left_index".') + n = len(right_on) + if self.left_index: + if len(right_on) != self.left.index.nlevels: + raise ValueError( + "len(right_on) must equal the number " + 'of levels in the index of "left"' + ) + left_on = [None] * n + if self.how != "cross" and len(right_on) != len(left_on): + raise ValueError("len(right_on) must equal len(left_on)") + + return left_on, right_on + + def _validate(self, validate: str) -> None: + + # Check uniqueness of each + if self.left_index: + left_unique = self.orig_left.index.is_unique + else: + left_unique = MultiIndex.from_arrays(self.left_join_keys).is_unique + + if self.right_index: + right_unique = self.orig_right.index.is_unique + else: + right_unique = MultiIndex.from_arrays(self.right_join_keys).is_unique + + # Check data integrity + if validate in ["one_to_one", "1:1"]: + if not left_unique and not right_unique: + raise MergeError( + "Merge keys are not unique in either left " + "or right dataset; not a one-to-one merge" + ) + elif not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; not a one-to-one merge" + ) + elif not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; not a one-to-one merge" + ) + + elif validate in ["one_to_many", "1:m"]: + if not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; not a one-to-many merge" + ) + + elif validate in ["many_to_one", "m:1"]: + if not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; " + "not a many-to-one merge" + ) + + elif validate in ["many_to_many", "m:m"]: + pass + + else: + raise ValueError("Not a valid argument for validate") + + +def get_join_indexers( + left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + + Parameters + ---------- + left_keys : ndarray, Index, Series + right_keys : ndarray, Index, Series + sort : bool, default False + how : {'inner', 'outer', 'left', 'right'}, default 'inner' + + Returns + ------- + np.ndarray[np.intp] + Indexer into the left_keys. + np.ndarray[np.intp] + Indexer into the right_keys. + """ + assert len(left_keys) == len( + right_keys + ), "left_key and right_keys must be the same length" + + # fast-path for empty left/right + left_n = len(left_keys[0]) + right_n = len(right_keys[0]) + if left_n == 0: + if how in ["left", "inner", "cross"]: + return _get_empty_indexer() + elif not sort and how in ["right", "outer"]: + return _get_no_sort_one_missing_indexer(right_n, True) + elif right_n == 0: + if how in ["right", "inner", "cross"]: + return _get_empty_indexer() + elif not sort and how in ["left", "outer"]: + return _get_no_sort_one_missing_indexer(left_n, False) + + # get left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how) + for n in range(len(left_keys)) + ) + zipped = zip(*mapped) + llab, rlab, shape = (list(x) for x in zipped) + + # get flat i8 keys from label lists + lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + + # factorize keys to a dense i8 space + # `count` is the num. of unique keys + # set(lkey) | set(rkey) == range(count) + + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how) + # preserve left frame order if how == 'left' and sort == False + kwargs = copy.copy(kwargs) + if how in ("left", "right"): + kwargs["sort"] = sort + join_func = { + "inner": libjoin.inner_join, + "left": libjoin.left_outer_join, + "right": lambda x, y, count, **kwargs: libjoin.left_outer_join( + y, x, count, **kwargs + )[::-1], + "outer": libjoin.full_outer_join, + }[how] + + # error: Cannot call function of unknown type + return join_func(lkey, rkey, count, **kwargs) # type: ignore[operator] + + +def restore_dropped_levels_multijoin( + left: MultiIndex, + right: MultiIndex, + dropped_level_names, + join_index: Index, + lindexer: npt.NDArray[np.intp], + rindexer: npt.NDArray[np.intp], +) -> tuple[list[Index], npt.NDArray[np.intp], list[Hashable]]: + """ + *this is an internal non-public method* + + Returns the levels, labels and names of a multi-index to multi-index join. + Depending on the type of join, this method restores the appropriate + dropped levels of the joined multi-index. + The method relies on lidx, rindexer which hold the index positions of + left and right, where a join was feasible + + Parameters + ---------- + left : MultiIndex + left index + right : MultiIndex + right index + dropped_level_names : str array + list of non-common level names + join_index : Index + the index of the join between the + common levels of left and right + lindexer : np.ndarray[np.intp] + left indexer + rindexer : np.ndarray[np.intp] + right indexer + + Returns + ------- + levels : list of Index + levels of combined multiindexes + labels : np.ndarray[np.intp] + labels of combined multiindexes + names : List[Hashable] + names of combined multiindex levels + + """ + + def _convert_to_multiindex(index: Index) -> MultiIndex: + if isinstance(index, MultiIndex): + return index + else: + return MultiIndex.from_arrays([index._values], names=[index.name]) + + # For multi-multi joins with one overlapping level, + # the returned index if of type Index + # Assure that join_index is of type MultiIndex + # so that dropped levels can be appended + join_index = _convert_to_multiindex(join_index) + + join_levels = join_index.levels + join_codes = join_index.codes + join_names = join_index.names + + # lindexer and rindexer hold the indexes where the join occurred + # for left and right respectively. If left/right is None then + # the join occurred on all indices of left/right + if lindexer is None: + lindexer = range(left.size) + + if rindexer is None: + rindexer = range(right.size) + + # Iterate through the levels that must be restored + for dropped_level_name in dropped_level_names: + if dropped_level_name in left.names: + idx = left + indexer = lindexer + else: + idx = right + indexer = rindexer + + # The index of the level name to be restored + name_idx = idx.names.index(dropped_level_name) + + restore_levels = idx.levels[name_idx] + # Inject -1 in the codes list where a join was not possible + # IOW indexer[i]=-1 + codes = idx.codes[name_idx] + restore_codes = algos.take_nd(codes, indexer, fill_value=-1) + + join_levels = join_levels + [restore_levels] + join_codes = join_codes + [restore_codes] + join_names = join_names + [dropped_level_name] + + return join_levels, join_codes, join_names + + +class _OrderedMerge(_MergeOperation): + _merge_type = "ordered_merge" + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + axis: int = 1, + suffixes: Suffixes = ("_x", "_y"), + fill_method: str | None = None, + how: str = "outer", + ) -> None: + + self.fill_method = fill_method + _MergeOperation.__init__( + self, + left, + right, + on=on, + left_on=left_on, + left_index=left_index, + right_index=right_index, + right_on=right_on, + axis=axis, + how=how, + suffixes=suffixes, + sort=True, # factorize sorts + ) + + def get_result(self, copy: bool = True) -> DataFrame: + join_index, left_indexer, right_indexer = self._get_join_info() + + llabels, rlabels = _items_overlap_with_suffix( + self.left._info_axis, self.right._info_axis, self.suffixes + ) + + left_join_indexer: np.ndarray | None + right_join_indexer: np.ndarray | None + + if self.fill_method == "ffill": + if left_indexer is None: + raise TypeError("left_indexer cannot be None") + left_indexer, right_indexer = cast(np.ndarray, left_indexer), cast( + np.ndarray, right_indexer + ) + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) + else: + left_join_indexer = left_indexer + right_join_indexer = right_indexer + + result = self._reindex_and_concat( + join_index, left_join_indexer, right_join_indexer, copy=copy + ) + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + return result + + +def _asof_by_function(direction: str): + name = f"asof_join_{direction}_on_X_by_Y" + return getattr(libjoin, name, None) + + +_type_casters = { + "int64_t": ensure_int64, + "double": ensure_float64, + "object": ensure_object, +} + + +def _get_cython_type_upcast(dtype: DtypeObj) -> str: + """Upcast a dtype to 'int64_t', 'double', or 'object'""" + if is_integer_dtype(dtype): + return "int64_t" + elif is_float_dtype(dtype): + return "double" + else: + return "object" + + +class _AsOfMerge(_OrderedMerge): + _merge_type = "asof_merge" + + def __init__( + self, + left: DataFrame | Series, + right: DataFrame | Series, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + axis: int = 1, + suffixes: Suffixes = ("_x", "_y"), + copy: bool = True, + fill_method: str | None = None, + how: str = "asof", + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", + ) -> None: + + self.by = by + self.left_by = left_by + self.right_by = right_by + self.tolerance = tolerance + self.allow_exact_matches = allow_exact_matches + self.direction = direction + + _OrderedMerge.__init__( + self, + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + axis=axis, + how=how, + suffixes=suffixes, + fill_method=fill_method, + ) + + def _validate_left_right_on(self, left_on, right_on): + left_on, right_on = super()._validate_left_right_on(left_on, right_on) + + # we only allow on to be a single item for on + if len(left_on) != 1 and not self.left_index: + raise MergeError("can only asof on a key for left") + + if len(right_on) != 1 and not self.right_index: + raise MergeError("can only asof on a key for right") + + if self.left_index and isinstance(self.left.index, MultiIndex): + raise MergeError("left can only have one index") + + if self.right_index and isinstance(self.right.index, MultiIndex): + raise MergeError("right can only have one index") + + # set 'by' columns + if self.by is not None: + if self.left_by is not None or self.right_by is not None: + raise MergeError("Can only pass by OR left_by and right_by") + self.left_by = self.right_by = self.by + if self.left_by is None and self.right_by is not None: + raise MergeError("missing left_by") + if self.left_by is not None and self.right_by is None: + raise MergeError("missing right_by") + + # GH#29130 Check that merge keys do not have dtype object + if not self.left_index: + left_on_0 = left_on[0] + if is_array_like(left_on_0): + lo_dtype = left_on_0.dtype + else: + lo_dtype = ( + self.left[left_on_0].dtype + if left_on_0 in self.left.columns + else self.left.index.get_level_values(left_on_0) + ) + else: + lo_dtype = self.left.index.dtype + + if not self.right_index: + right_on_0 = right_on[0] + if is_array_like(right_on_0): + ro_dtype = right_on_0.dtype + else: + ro_dtype = ( + self.right[right_on_0].dtype + if right_on_0 in self.right.columns + else self.right.index.get_level_values(right_on_0) + ) + else: + ro_dtype = self.right.index.dtype + + if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype): + raise MergeError( + f"Incompatible merge dtype, {repr(ro_dtype)} and " + f"{repr(lo_dtype)}, both sides must have numeric dtype" + ) + + # add 'by' to our key-list so we can have it in the + # output as a key + if self.left_by is not None: + if not is_list_like(self.left_by): + self.left_by = [self.left_by] + if not is_list_like(self.right_by): + self.right_by = [self.right_by] + + if len(self.left_by) != len(self.right_by): + raise MergeError("left_by and right_by must be same length") + + left_on = self.left_by + list(left_on) + right_on = self.right_by + list(right_on) + + # check 'direction' is valid + if self.direction not in ["backward", "forward", "nearest"]: + raise MergeError(f"direction invalid: {self.direction}") + + return left_on, right_on + + def _get_merge_keys(self): + + # note this function has side effects + (left_join_keys, right_join_keys, join_names) = super()._get_merge_keys() + + # validate index types are the same + for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)): + if not is_dtype_equal(lk.dtype, rk.dtype): + if is_categorical_dtype(lk.dtype) and is_categorical_dtype(rk.dtype): + # The generic error message is confusing for categoricals. + # + # In this function, the join keys include both the original + # ones of the merge_asof() call, and also the keys passed + # to its by= argument. Unordered but equal categories + # are not supported for the former, but will fail + # later with a ValueError, so we don't *need* to check + # for them here. + msg = ( + f"incompatible merge keys [{i}] {repr(lk.dtype)} and " + f"{repr(rk.dtype)}, both sides category, but not equal ones" + ) + else: + msg = ( + f"incompatible merge keys [{i}] {repr(lk.dtype)} and " + f"{repr(rk.dtype)}, must be the same type" + ) + raise MergeError(msg) + + # validate tolerance; datetime.timedelta or Timedelta if we have a DTI + if self.tolerance is not None: + + if self.left_index: + lt = self.left.index + else: + lt = left_join_keys[-1] + + msg = ( + f"incompatible tolerance {self.tolerance}, must be compat " + f"with type {repr(lt.dtype)}" + ) + + if needs_i8_conversion(lt): + if not isinstance(self.tolerance, datetime.timedelta): + raise MergeError(msg) + if self.tolerance < Timedelta(0): + raise MergeError("tolerance must be positive") + + elif is_integer_dtype(lt): + if not is_integer(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + elif is_float_dtype(lt): + if not is_number(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + else: + raise MergeError("key must be integer, timestamp or float") + + # validate allow_exact_matches + if not is_bool(self.allow_exact_matches): + msg = ( + "allow_exact_matches must be boolean, " + f"passed {self.allow_exact_matches}" + ) + raise MergeError(msg) + + return left_join_keys, right_join_keys, join_names + + def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """return the join indexers""" + + def flip(xs) -> np.ndarray: + """unlike np.transpose, this returns an array of tuples""" + + def injection(obj): + if not is_extension_array_dtype(obj): + # ndarray + return obj + obj = extract_array(obj) + if isinstance(obj, NDArrayBackedExtensionArray): + # fastpath for e.g. dt64tz, categorical + return obj._ndarray + # FIXME: returning obj._values_for_argsort() here doesn't + # break in any existing test cases, but i (@jbrockmendel) + # am pretty sure it should! + # e.g. + # arr = pd.array([0, pd.NA, 255], dtype="UInt8") + # will have values_for_argsort (before GH#45434) + # np.array([0, 255, 255], dtype=np.uint8) + # and the non-injectivity should make a difference somehow + # shouldn't it? + return np.asarray(obj) + + xs = [injection(x) for x in xs] + labels = list(string.ascii_lowercase[: len(xs)]) + dtypes = [x.dtype for x in xs] + labeled_dtypes = list(zip(labels, dtypes)) + return np.array(list(zip(*xs)), labeled_dtypes) + + # values to compare + left_values = ( + self.left.index._values if self.left_index else self.left_join_keys[-1] + ) + right_values = ( + self.right.index._values if self.right_index else self.right_join_keys[-1] + ) + tolerance = self.tolerance + + # we require sortedness and non-null values in the join keys + if not Index(left_values).is_monotonic_increasing: + side = "left" + if isna(left_values).any(): + raise ValueError(f"Merge keys contain null values on {side} side") + else: + raise ValueError(f"{side} keys must be sorted") + + if not Index(right_values).is_monotonic_increasing: + side = "right" + if isna(right_values).any(): + raise ValueError(f"Merge keys contain null values on {side} side") + else: + raise ValueError(f"{side} keys must be sorted") + + # initial type conversion as needed + if needs_i8_conversion(left_values): + left_values = left_values.view("i8") + right_values = right_values.view("i8") + if tolerance is not None: + tolerance = Timedelta(tolerance) + tolerance = tolerance.value + + # a "by" parameter requires special handling + if self.left_by is not None: + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] + else: + # We get here with non-ndarrays in test_merge_by_col_tz_aware + # and test_merge_groupby_multiple_column_with_categorical_column + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) + + # upcast 'by' parameter because HashTable is limited + by_type = _get_cython_type_upcast(left_by_values.dtype) + by_type_caster = _type_casters[by_type] + # error: Cannot call function of unknown type + left_by_values = by_type_caster(left_by_values) # type: ignore[operator] + # error: Cannot call function of unknown type + right_by_values = by_type_caster(right_by_values) # type: ignore[operator] + + # choose appropriate function by type + func = _asof_by_function(self.direction) + return func( + left_values, + right_values, + left_by_values, + right_by_values, + self.allow_exact_matches, + tolerance, + ) + else: + # choose appropriate function by type + func = _asof_by_function(self.direction) + return func( + left_values, + right_values, + None, + None, + self.allow_exact_matches, + tolerance, + False, + ) + + +def _get_multiindex_indexer( + join_keys, index: MultiIndex, sort: bool +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + + # left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(index.levels[n], join_keys[n], sort=sort) + for n in range(index.nlevels) + ) + zipped = zip(*mapped) + rcodes, lcodes, shape = (list(x) for x in zipped) + if sort: + rcodes = list(map(np.take, rcodes, index.codes)) + else: + i8copy = lambda a: a.astype("i8", subok=False, copy=True) + rcodes = list(map(i8copy, index.codes)) + + # fix right labels if there were any nulls + for i in range(len(join_keys)): + mask = index.codes[i] == -1 + if mask.any(): + # check if there already was any nulls at this location + # if there was, it is factorized to `shape[i] - 1` + a = join_keys[i][lcodes[i] == shape[i] - 1] + if a.size == 0 or not a[0] != a[0]: + shape[i] += 1 + + rcodes[i][mask] = shape[i] - 1 + + # get flat i8 join keys + lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) + + # factorize keys to a dense i8 space + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) + + +def _get_single_indexer( + join_key, index: Index, sort: bool = False +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + left_key, right_key, count = _factorize_keys(join_key, index._values, sort=sort) + + return libjoin.left_outer_join(left_key, right_key, count, sort=sort) + + +def _get_empty_indexer() -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """Return empty join indexers.""" + return ( + np.array([], dtype=np.intp), + np.array([], dtype=np.intp), + ) + + +def _get_no_sort_one_missing_indexer( + n: int, left_missing: bool +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + """ + Return join indexers where all of one side is selected without sorting + and none of the other side is selected. + + Parameters + ---------- + n : int + Length of indexers to create. + left_missing : bool + If True, the left indexer will contain only -1's. + If False, the right indexer will contain only -1's. + + Returns + ------- + np.ndarray[np.intp] + Left indexer + np.ndarray[np.intp] + Right indexer + """ + idx = np.arange(n, dtype=np.intp) + idx_missing = np.full(shape=n, fill_value=-1, dtype=np.intp) + if left_missing: + return idx_missing, idx + return idx, idx_missing + + +def _left_join_on_index( + left_ax: Index, right_ax: Index, join_keys, sort: bool = False +) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]: + if len(join_keys) > 1: + if not ( + isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels + ): + raise AssertionError( + "If more than one join key is given then " + "'right_ax' must be a MultiIndex and the " + "number of join keys must be the number of levels in right_ax" + ) + + left_indexer, right_indexer = _get_multiindex_indexer( + join_keys, right_ax, sort=sort + ) + else: + jkey = join_keys[0] + + left_indexer, right_indexer = _get_single_indexer(jkey, right_ax, sort=sort) + + if sort or len(left_ax) != len(left_indexer): + # if asked to sort or there are 1-to-many matches + join_index = left_ax.take(left_indexer) + return join_index, left_indexer, right_indexer + + # left frame preserves order & length of its index + return left_ax, None, right_indexer + + +def _factorize_keys( + lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + """ + Encode left and right keys as enumerated types. + + This is used to get the join indexers to be used when merging DataFrames. + + Parameters + ---------- + lk : array-like + Left key. + rk : array-like + Right key. + sort : bool, defaults to True + If True, the encoding is done such that the unique elements in the + keys are sorted. + how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’ + Type of merge. + + Returns + ------- + np.ndarray[np.intp] + Left (resp. right if called with `key='right'`) labels, as enumerated type. + np.ndarray[np.intp] + Right (resp. left if called with `key='right'`) labels, as enumerated type. + int + Number of unique elements in union of left and right labels. + + See Also + -------- + merge : Merge DataFrame or named Series objects + with a database-style join. + algorithms.factorize : Encode the object as an enumerated type + or categorical variable. + + Examples + -------- + >>> lk = np.array(["a", "c", "b"]) + >>> rk = np.array(["a", "c"]) + + Here, the unique values are `'a', 'b', 'c'`. With the default + `sort=True`, the encoding will be `{0: 'a', 1: 'b', 2: 'c'}`: + + >>> pd.core.reshape.merge._factorize_keys(lk, rk) + (array([0, 2, 1]), array([0, 2]), 3) + + With the `sort=False`, the encoding will correspond to the order + in which the unique elements first appear: `{0: 'a', 1: 'c', 2: 'b'}`: + + >>> pd.core.reshape.merge._factorize_keys(lk, rk, sort=False) + (array([0, 1, 2]), array([0, 1]), 3) + """ + # Some pre-processing for non-ndarray lk / rk + lk = extract_array(lk, extract_numpy=True, extract_range=True) + rk = extract_array(rk, extract_numpy=True, extract_range=True) + # TODO: if either is a RangeIndex, we can likely factorize more efficiently? + + if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype): + # Extract the ndarray (UTC-localized) values + # Note: we dont need the dtypes to match, as these can still be compared + # TODO(non-nano): need to make sure resolutions match + lk = cast("DatetimeArray", lk)._ndarray + rk = cast("DatetimeArray", rk)._ndarray + + elif ( + is_categorical_dtype(lk.dtype) + and is_categorical_dtype(rk.dtype) + and is_dtype_equal(lk.dtype, rk.dtype) + ): + assert isinstance(lk, Categorical) + assert isinstance(rk, Categorical) + # Cast rk to encoding so we can compare codes with lk + + rk = lk._encode_with_my_categories(rk) + + lk = ensure_int64(lk.codes) + rk = ensure_int64(rk.codes) + + elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype): + lk, _ = lk._values_for_factorize() + + # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute + # "_values_for_factorize" + rk, _ = rk._values_for_factorize() # type: ignore[union-attr] + + klass: type[libhashtable.Factorizer] | type[libhashtable.Int64Factorizer] + if is_integer_dtype(lk.dtype) and is_integer_dtype(rk.dtype): + # GH#23917 TODO: needs tests for case where lk is integer-dtype + # and rk is datetime-dtype + klass = libhashtable.Int64Factorizer + lk = ensure_int64(np.asarray(lk)) + rk = ensure_int64(np.asarray(rk)) + + elif needs_i8_conversion(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype): + # GH#23917 TODO: Needs tests for non-matching dtypes + klass = libhashtable.Int64Factorizer + lk = ensure_int64(np.asarray(lk, dtype=np.int64)) + rk = ensure_int64(np.asarray(rk, dtype=np.int64)) + + else: + klass = libhashtable.ObjectFactorizer + lk = ensure_object(lk) + rk = ensure_object(rk) + + rizer = klass(max(len(lk), len(rk))) + + # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type + # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]], + # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]" + llab = rizer.factorize(lk) # type: ignore[arg-type] + # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type + # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]], + # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]" + rlab = rizer.factorize(rk) # type: ignore[arg-type] + assert llab.dtype == np.dtype(np.intp), llab.dtype + assert rlab.dtype == np.dtype(np.intp), rlab.dtype + + count = rizer.get_count() + + if sort: + uniques = rizer.uniques.to_array() + llab, rlab = _sort_labels(uniques, llab, rlab) + + # NA group + lmask = llab == -1 + lany = lmask.any() + rmask = rlab == -1 + rany = rmask.any() + + if lany or rany: + if lany: + np.putmask(llab, lmask, count) + if rany: + np.putmask(rlab, rmask, count) + count += 1 + + if how == "right": + return rlab, llab, count + return llab, rlab, count + + +def _sort_labels( + uniques: np.ndarray, left: npt.NDArray[np.intp], right: npt.NDArray[np.intp] +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: + + llength = len(left) + labels = np.concatenate([left, right]) + + _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) + new_left, new_right = new_labels[:llength], new_labels[llength:] + + return new_left, new_right + + +def _get_join_keys(llab, rlab, shape, sort: bool): + + # how many levels can be done without overflow + nlev = next( + lev + for lev in range(len(shape), 0, -1) + if not is_int64_overflow_possible(shape[:lev]) + ) + + # get keys for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype="i8") + lkey = stride * llab[0].astype("i8", subok=False, copy=False) + rkey = stride * rlab[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + with np.errstate(divide="ignore"): + stride //= shape[i] + lkey += llab[i] * stride + rkey += rlab[i] * stride + + if nlev == len(shape): # all done! + return lkey, rkey + + # densify current keys to avoid overflow + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + llab = [lkey] + llab[nlev:] + rlab = [rkey] + rlab[nlev:] + shape = [count] + shape[nlev:] + + return _get_join_keys(llab, rlab, shape, sort) + + +def _should_fill(lname, rname) -> bool: + if not isinstance(lname, str) or not isinstance(rname, str): + return True + return lname == rname + + +def _any(x) -> bool: + return x is not None and com.any_not_none(*x) + + +def _validate_operand(obj: DataFrame | Series) -> DataFrame: + if isinstance(obj, ABCDataFrame): + return obj + elif isinstance(obj, ABCSeries): + if obj.name is None: + raise ValueError("Cannot merge a Series without a name") + else: + return obj.to_frame() + else: + raise TypeError( + f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" + ) + + +def _items_overlap_with_suffix( + left: Index, right: Index, suffixes: Suffixes +) -> tuple[Index, Index]: + """ + Suffixes type validation. + + If two indices overlap, add suffixes to overlapping entries. + + If corresponding suffix is empty, the entry is simply converted to string. + + """ + if not is_list_like(suffixes, allow_sets=False): + warnings.warn( + f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " + "unexpected results. Provide 'suffixes' as a tuple instead. In the " + "future a 'TypeError' will be raised.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + to_rename = left.intersection(right) + if len(to_rename) == 0: + return left, right + + lsuffix, rsuffix = suffixes + + if not lsuffix and not rsuffix: + raise ValueError(f"columns overlap but no suffix specified: {to_rename}") + + def renamer(x, suffix): + """ + Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return f"{x}{suffix}" + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + llabels = left._transform_index(lrenamer) + rlabels = right._transform_index(rrenamer) + + dups = [] + if not llabels.is_unique: + # Only warn when duplicates are caused because of suffixes, already duplicated + # columns in origin should not warn + dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist() + if not rlabels.is_unique: + dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist()) + if dups: + warnings.warn( + f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the " + f"result is deprecated and will raise a MergeError in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return llabels, rlabels diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py new file mode 100644 index 00000000..7ef58c78 --- /dev/null +++ b/pandas/core/reshape/pivot.py @@ -0,0 +1,863 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + Sequence, + cast, +) + +import numpy as np + +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + IndexLabel, +) +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import rewrite_warning + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, + is_nested_list_like, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +import pandas.core.common as com +from pandas.core.frame import _shared_docs +from pandas.core.groupby import Grouper +from pandas.core.indexes.api import ( + Index, + MultiIndex, + get_objs_combined_axis, +) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import cartesian_product +from pandas.core.series import Series + +if TYPE_CHECKING: + from pandas import DataFrame + + +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot_table"], indents=1) +def pivot_table( + data: DataFrame, + values=None, + index=None, + columns=None, + aggfunc: AggFuncType = "mean", + fill_value=None, + margins: bool = False, + dropna: bool = True, + margins_name: str = "All", + observed: bool = False, + sort: bool = True, +) -> DataFrame: + index = _convert_by(index) + columns = _convert_by(columns) + + if isinstance(aggfunc, list): + pieces: list[DataFrame] = [] + keys = [] + for func in aggfunc: + _table = __internal_pivot_table( + data, + values=values, + index=index, + columns=columns, + fill_value=fill_value, + aggfunc=func, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + sort=sort, + ) + pieces.append(_table) + keys.append(getattr(func, "__name__", func)) + + table = concat(pieces, keys=keys, axis=1) + return table.__finalize__(data, method="pivot_table") + + table = __internal_pivot_table( + data, + values, + index, + columns, + aggfunc, + fill_value, + margins, + dropna, + margins_name, + observed, + sort, + ) + return table.__finalize__(data, method="pivot_table") + + +def __internal_pivot_table( + data: DataFrame, + values, + index, + columns, + aggfunc: AggFuncTypeBase | AggFuncTypeDict, + fill_value, + margins: bool, + dropna: bool, + margins_name: str, + observed: bool, + sort: bool, +) -> DataFrame: + """ + Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``. + """ + keys = index + columns + + values_passed = values is not None + if values_passed: + if is_list_like(values): + values_multi = True + values = list(values) + else: + values_multi = False + values = [values] + + # GH14938 Make sure value labels are in data + for i in values: + if i not in data: + raise KeyError(i) + + to_filter = [] + for x in keys + values: + if isinstance(x, Grouper): + x = x.key + try: + if x in data: + to_filter.append(x) + except TypeError: + pass + if len(to_filter) < len(data.columns): + data = data[to_filter] + + else: + values = data.columns + for key in keys: + try: + values = values.drop(key) + except (TypeError, ValueError, KeyError): + pass + values = list(values) + + grouped = data.groupby(keys, observed=observed, sort=sort) + msg = ( + "pivot_table dropped a column because it failed to aggregate. This behavior " + "is deprecated and will raise in a future version of pandas. Select only the " + "columns that can be aggregated." + ) + with rewrite_warning( + target_message="The default value of numeric_only", + target_category=FutureWarning, + new_message=msg, + ): + agged = grouped.agg(aggfunc) + + if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): + agged = agged.dropna(how="all") + + # gh-21133 + # we want to down cast if + # the original values are ints + # as we grouped with a NaN value + # and then dropped, coercing to floats + for v in values: + if ( + v in data + and is_integer_dtype(data[v]) + and v in agged + and not is_integer_dtype(agged[v]) + ): + if not isinstance(agged[v], ABCDataFrame) and isinstance( + data[v].dtype, np.dtype + ): + # exclude DataFrame case bc maybe_downcast_to_dtype expects + # ArrayLike + # e.g. test_pivot_table_multiindex_columns_doctest_case + # agged.columns is a MultiIndex and 'v' is indexing only + # on its first level. + agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) + + table = agged + + # GH17038, this check should only happen if index is defined (not None) + if table.index.nlevels > 1 and index: + # Related GH #17123 + # If index_names are integers, determine whether the integers refer + # to the level position or name. + index_names = agged.index.names[: len(index)] + to_unstack = [] + for i in range(len(index), len(keys)): + name = agged.index.names[i] + if name is None or name in index_names: + to_unstack.append(i) + else: + to_unstack.append(name) + table = agged.unstack(to_unstack) + + if not dropna: + if isinstance(table.index, MultiIndex): + m = MultiIndex.from_arrays( + cartesian_product(table.index.levels), names=table.index.names + ) + table = table.reindex(m, axis=0) + + if isinstance(table.columns, MultiIndex): + m = MultiIndex.from_arrays( + cartesian_product(table.columns.levels), names=table.columns.names + ) + table = table.reindex(m, axis=1) + + if sort is True and isinstance(table, ABCDataFrame): + table = table.sort_index(axis=1) + + if fill_value is not None: + table = table.fillna(fill_value, downcast="infer") + + if margins: + if dropna: + data = data[data.notna().all(axis=1)] + table = _add_margins( + table, + data, + values, + rows=index, + cols=columns, + aggfunc=aggfunc, + observed=dropna, + margins_name=margins_name, + fill_value=fill_value, + ) + + # discard the top level + if values_passed and not values_multi and table.columns.nlevels > 1: + table = table.droplevel(0, axis=1) + if len(index) == 0 and len(columns) > 0: + table = table.T + + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: + table = table.dropna(how="all", axis=1) + + return table + + +def _add_margins( + table: DataFrame | Series, + data: DataFrame, + values, + rows, + cols, + aggfunc, + observed=None, + margins_name: str = "All", + fill_value=None, +): + if not isinstance(margins_name, str): + raise ValueError("margins_name argument must be a string") + + msg = f'Conflicting name "{margins_name}" in margins' + for level in table.index.names: + if margins_name in table.index.get_level_values(level): + raise ValueError(msg) + + grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) + + if table.ndim == 2: + # i.e. DataFrame + for level in table.columns.names[1:]: + if margins_name in table.columns.get_level_values(level): + raise ValueError(msg) + + key: str | tuple[str, ...] + if len(rows) > 1: + key = (margins_name,) + ("",) * (len(rows) - 1) + else: + key = margins_name + + if not values and isinstance(table, ABCSeries): + # If there are no values and the table is a series, then there is only + # one column in the data. Compute grand margin and return it. + return table._append(Series({key: grand_margin[margins_name]})) + + elif values: + marginal_result_set = _generate_marginal_results( + table, data, values, rows, cols, aggfunc, observed, margins_name + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + else: + # no values, and table is a DataFrame + assert isinstance(table, ABCDataFrame) + marginal_result_set = _generate_marginal_results_without_values( + table, data, rows, cols, aggfunc, observed, margins_name + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) + # populate grand margin + for k in margin_keys: + if isinstance(k, str): + row_margin[k] = grand_margin[k] + else: + row_margin[k] = grand_margin[k[0]] + + from pandas import DataFrame + + margin_dummy = DataFrame(row_margin, columns=Index([key])).T + + row_names = result.index.names + # check the result column and leave floats + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) + result = result._append(margin_dummy) + result.index.names = row_names + + return result + + +def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = "All"): + + if values: + grand_margin = {} + for k, v in data[values].items(): + try: + if isinstance(aggfunc, str): + grand_margin[k] = getattr(v, aggfunc)() + elif isinstance(aggfunc, dict): + if isinstance(aggfunc[k], str): + grand_margin[k] = getattr(v, aggfunc[k])() + else: + grand_margin[k] = aggfunc[k](v) + else: + grand_margin[k] = aggfunc(v) + except TypeError: + pass + return grand_margin + else: + return {margins_name: aggfunc(data.index)} + + +def _generate_marginal_results( + table, data, values, rows, cols, aggfunc, observed, margins_name: str = "All" +): + if len(cols) > 0: + # need to "interleave" the margins + table_pieces = [] + margin_keys = [] + + def _all_key(key): + return (key, margins_name) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) + cat_axis = 1 + + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + all_key = _all_key(key) + + # we are going to mutate this, so need to copy! + piece = piece.copy() + piece[all_key] = margin[key] + + table_pieces.append(piece) + margin_keys.append(all_key) + else: + from pandas import DataFrame + + cat_axis = 0 + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + if len(cols) > 1: + all_key = _all_key(key) + else: + all_key = margins_name + table_pieces.append(piece) + # GH31016 this is to calculate margin for each group, and assign + # corresponded key as index + transformed_piece = DataFrame(piece.apply(aggfunc)).T + transformed_piece.index = Index([all_key], name=piece.index.name) + + # append piece for margin into table_piece + table_pieces.append(transformed_piece) + margin_keys.append(all_key) + + result = concat(table_pieces, axis=cat_axis) + + if len(rows) == 0: + return result + else: + result = table + margin_keys = table.columns + + if len(cols) > 0: + row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc) + row_margin = row_margin.stack() + + # slight hack + new_order = [len(cols)] + list(range(len(cols))) + row_margin.index = row_margin.index.reorder_levels(new_order) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _generate_marginal_results_without_values( + table: DataFrame, data, rows, cols, aggfunc, observed, margins_name: str = "All" +): + if len(cols) > 0: + # need to "interleave" the margins + margin_keys: list | Index = [] + + def _all_key(): + if len(cols) == 1: + return margins_name + return (margins_name,) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows].groupby(rows, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + + else: + margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + return result + else: + result = table + margin_keys = table.columns + + if len(cols): + row_margin = data[cols].groupby(cols, observed=observed).apply(aggfunc) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _convert_by(by): + if by is None: + by = [] + elif ( + is_scalar(by) + or isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) + or callable(by) + ): + by = [by] + else: + by = list(by) + return by + + +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot"], indents=1) +@deprecate_nonkeyword_arguments(version=None, allowed_args=["data"]) +def pivot( + data: DataFrame, + index: IndexLabel | None = None, + columns: IndexLabel | None = None, + values: IndexLabel | None = None, +) -> DataFrame: + if columns is None: + raise TypeError("pivot() missing 1 required argument: 'columns'") + + columns_listlike = com.convert_to_list_like(columns) + + indexed: DataFrame | Series + if values is None: + if index is not None: + cols = com.convert_to_list_like(index) + else: + cols = [] + + append = index is None + # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray") + # error: Unsupported left operand type for + ("ExtensionArray") + indexed = data.set_index( + cols + columns_listlike, append=append # type: ignore[operator] + ) + else: + if index is None: + if isinstance(data.index, MultiIndex): + # GH 23955 + index_list = [ + data.index.get_level_values(i) for i in range(data.index.nlevels) + ] + else: + index_list = [Series(data.index, name=data.index.name)] + else: + index_list = [data[idx] for idx in com.convert_to_list_like(index)] + + data_columns = [data[col] for col in columns_listlike] + index_list.extend(data_columns) + multiindex = MultiIndex.from_arrays(index_list) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + values = cast(Sequence[Hashable], values) + indexed = data._constructor( + data[values]._values, index=multiindex, columns=values + ) + else: + indexed = data._constructor_sliced(data[values]._values, index=multiindex) + # error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union + # [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected + # "Hashable" + return indexed.unstack(columns_listlike) # type: ignore[arg-type] + + +def crosstab( + index, + columns, + values=None, + rownames=None, + colnames=None, + aggfunc=None, + margins: bool = False, + margins_name: str = "All", + dropna: bool = True, + normalize=False, +) -> DataFrame: + """ + Compute a simple cross tabulation of two (or more) factors. + + By default, computes a frequency table of the factors unless an + array of values and an aggregation function are passed. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires `aggfunc` be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + aggfunc : function, optional + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals + when margins is True. + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + + - If passed 'all' or `True`, will normalize over all values. + - If passed 'index' will normalize over each row. + - If passed 'columns' will normalize over each column. + - If margins is `True`, will also normalize margin values. + + Returns + ------- + DataFrame + Cross tabulation of the data. + + See Also + -------- + DataFrame.pivot : Reshape data based on column values. + pivot_table : Create a pivot table as a DataFrame. + + Notes + ----- + Any Series passed will have their name attributes used unless row or column + names for the cross-tabulation are specified. + + Any input passed containing Categorical data will have **all** of its + categories included in the cross-tabulation, even if the actual data does + not contain any instances of a particular category. + + In the event that there aren't overlapping indexes an empty DataFrame will + be returned. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + b one two + c dull shiny dull shiny + a + bar 1 2 1 0 + foo 2 2 1 2 + + Here 'c' and 'f' are not represented in the data and will not be + shown in the output because dropna is True by default. Set + dropna=False to preserve categories with no data. + + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + >>> pd.crosstab(foo, bar) + col_0 d e + row_0 + a 1 0 + b 0 1 + >>> pd.crosstab(foo, bar, dropna=False) + col_0 d e f + row_0 + a 1 0 0 + b 0 1 0 + c 0 0 0 + """ + if values is None and aggfunc is not None: + raise ValueError("aggfunc cannot be used without values.") + + if values is not None and aggfunc is None: + raise ValueError("values cannot be used without an aggfunc.") + + if not is_nested_list_like(index): + index = [index] + if not is_nested_list_like(columns): + columns = [columns] + + common_idx = None + pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))] + if pass_objs: + common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) + + rownames = _get_names(index, rownames, prefix="row") + colnames = _get_names(columns, colnames, prefix="col") + + # duplicate names mapped to unique names for pivot op + ( + rownames_mapper, + unique_rownames, + colnames_mapper, + unique_colnames, + ) = _build_names_mapper(rownames, colnames) + + from pandas import DataFrame + + data = { + **dict(zip(unique_rownames, index)), + **dict(zip(unique_colnames, columns)), + } + df = DataFrame(data, index=common_idx) + + if values is None: + df["__dummy__"] = 0 + kwargs = {"aggfunc": len, "fill_value": 0} + else: + df["__dummy__"] = values + kwargs = {"aggfunc": aggfunc} + + table = df.pivot_table( + "__dummy__", + index=unique_rownames, + columns=unique_colnames, + margins=margins, + margins_name=margins_name, + dropna=dropna, + **kwargs, + ) + + # Post-process + if normalize is not False: + table = _normalize( + table, normalize=normalize, margins=margins, margins_name=margins_name + ) + + table = table.rename_axis(index=rownames_mapper, axis=0) + table = table.rename_axis(columns=colnames_mapper, axis=1) + + return table + + +def _normalize( + table: DataFrame, normalize, margins: bool, margins_name="All" +) -> DataFrame: + + if not isinstance(normalize, (bool, str)): + axis_subs = {0: "index", 1: "columns"} + try: + normalize = axis_subs[normalize] + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err + + if margins is False: + + # Actual Normalizations + normalizers: dict[bool | str, Callable] = { + "all": lambda x: x / x.sum(axis=1).sum(axis=0), + "columns": lambda x: x / x.sum(), + "index": lambda x: x.div(x.sum(axis=1), axis=0), + } + + normalizers[True] = normalizers["all"] + + try: + f = normalizers[normalize] + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err + + table = f(table) + table = table.fillna(0) + + elif margins is True: + # keep index and column of pivoted table + table_index = table.index + table_columns = table.columns + last_ind_or_col = table.iloc[-1, :].name + + # check if margin name is not in (for MI cases) and not equal to last + # index/column and save the column and index margin + if (margins_name not in last_ind_or_col) & (margins_name != last_ind_or_col): + raise ValueError(f"{margins_name} not in pivoted DataFrame") + column_margin = table.iloc[:-1, -1] + index_margin = table.iloc[-1, :-1] + + # keep the core table + table = table.iloc[:-1, :-1] + + # Normalize core + table = _normalize(table, normalize=normalize, margins=False) + + # Fix Margins + if normalize == "columns": + column_margin = column_margin / column_margin.sum() + table = concat([table, column_margin], axis=1) + table = table.fillna(0) + table.columns = table_columns + + elif normalize == "index": + index_margin = index_margin / index_margin.sum() + table = table._append(index_margin) + table = table.fillna(0) + table.index = table_index + + elif normalize == "all" or normalize is True: + column_margin = column_margin / column_margin.sum() + index_margin = index_margin / index_margin.sum() + index_margin.loc[margins_name] = 1 + table = concat([table, column_margin], axis=1) + table = table._append(index_margin) + + table = table.fillna(0) + table.index = table_index + table.columns = table_columns + + else: + raise ValueError("Not a valid normalize argument") + + else: + raise ValueError("Not a valid margins argument") + + return table + + +def _get_names(arrs, names, prefix: str = "row"): + if names is None: + names = [] + for i, arr in enumerate(arrs): + if isinstance(arr, ABCSeries) and arr.name is not None: + names.append(arr.name) + else: + names.append(f"{prefix}_{i}") + else: + if len(names) != len(arrs): + raise AssertionError("arrays and names must have the same length") + if not isinstance(names, list): + names = list(names) + + return names + + +def _build_names_mapper( + rownames: list[str], colnames: list[str] +) -> tuple[dict[str, str], list[str], dict[str, str], list[str]]: + """ + Given the names of a DataFrame's rows and columns, returns a set of unique row + and column names and mappers that convert to original names. + + A row or column name is replaced if it is duplicate among the rows of the inputs, + among the columns of the inputs or between the rows and the columns. + + Parameters + ---------- + rownames: list[str] + colnames: list[str] + + Returns + ------- + Tuple(Dict[str, str], List[str], Dict[str, str], List[str]) + + rownames_mapper: dict[str, str] + a dictionary with new row names as keys and original rownames as values + unique_rownames: list[str] + a list of rownames with duplicate names replaced by dummy names + colnames_mapper: dict[str, str] + a dictionary with new column names as keys and original column names as values + unique_colnames: list[str] + a list of column names with duplicate names replaced by dummy names + + """ + + def get_duplicates(names): + seen: set = set() + return {name for name in names if name not in seen} + + shared_names = set(rownames).intersection(set(colnames)) + dup_names = get_duplicates(rownames) | get_duplicates(colnames) | shared_names + + rownames_mapper = { + f"row_{i}": name for i, name in enumerate(rownames) if name in dup_names + } + unique_rownames = [ + f"row_{i}" if name in dup_names else name for i, name in enumerate(rownames) + ] + + colnames_mapper = { + f"col_{i}": name for i, name in enumerate(colnames) if name in dup_names + } + unique_colnames = [ + f"col_{i}" if name in dup_names else name for i, name in enumerate(colnames) + ] + + return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py new file mode 100644 index 00000000..1a99bf0d --- /dev/null +++ b/pandas/core/reshape/reshape.py @@ -0,0 +1,850 @@ +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + cast, +) +import warnings + +import numpy as np + +import pandas._libs.reshape as libreshape +from pandas._typing import npt +from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_dtype, + is_extension_array_dtype, + is_integer, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import notna + +import pandas.core.algorithms as algos +from pandas.core.arrays.categorical import factorize_from_iterable +from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_compressed_ids, + get_group_index, + get_group_index_sorter, +) + +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.frozen import FrozenList + + +class _Unstacker: + """ + Helper class to unstack data / pivot with multi-level index + + Parameters + ---------- + index : MultiIndex + level : int or str, default last level + Level to "unstack". Accepts a name for the level. + fill_value : scalar, optional + Default value to fill in missing values if subgroups do not have the + same set of labels. By default, missing values will be replaced with + the default fill value for that data type, NaN for float, NaT for + datetimelike, etc. For integer types, by default data will converted to + float and missing values will be set to NaN. + constructor : object + Pandas ``DataFrame`` or subclass used to create unstacked + response. If None, DataFrame will be used. + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + + Returns + ------- + unstacked : DataFrame + """ + + def __init__(self, index: MultiIndex, level=-1, constructor=None) -> None: + + if constructor is None: + constructor = DataFrame + self.constructor = constructor + + self.index = index.remove_unused_levels() + + self.level = self.index._get_level_number(level) + + # when index includes `nan`, need to lift levels/strides by 1 + self.lift = 1 if -1 in self.index.codes[self.level] else 0 + + # Note: the "pop" below alters these in-place. + self.new_index_levels = list(self.index.levels) + self.new_index_names = list(self.index.names) + + self.removed_name = self.new_index_names.pop(self.level) + self.removed_level = self.new_index_levels.pop(self.level) + self.removed_level_full = index.levels[self.level] + + # Bug fix GH 20601 + # If the data frame is too big, the number of unique index combination + # will cause int32 overflow on windows environments. + # We want to check and raise an error before this happens + num_rows = np.max([index_level.size for index_level in self.new_index_levels]) + num_columns = self.removed_level.size + + # GH20601: This forces an overflow if the number of cells is too high. + num_cells = num_rows * num_columns + + # GH 26314: Previous ValueError raised was too restrictive for many users. + if num_cells > np.iinfo(np.int32).max: + warnings.warn( + f"The following operation may generate {num_cells} cells " + f"in the resulting pandas object.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) + + self._make_selectors() + + @cache_readonly + def _indexer_and_to_sort( + self, + ) -> tuple[ + npt.NDArray[np.intp], + list[np.ndarray], # each has _some_ signed integer dtype + ]: + v = self.level + + codes = list(self.index.codes) + levs = list(self.index.levels) + to_sort = codes[:v] + codes[v + 1 :] + [codes[v]] + sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]) + + comp_index, obs_ids = get_compressed_ids(to_sort, sizes) + ngroups = len(obs_ids) + + indexer = get_group_index_sorter(comp_index, ngroups) + return indexer, to_sort + + @cache_readonly + def sorted_labels(self) -> list[np.ndarray]: + indexer, to_sort = self._indexer_and_to_sort + return [line.take(indexer) for line in to_sort] + + def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: + indexer, _ = self._indexer_and_to_sort + + sorted_values = algos.take_nd(values, indexer, axis=0) + return sorted_values + + def _make_selectors(self): + new_levels = self.new_index_levels + + # make the mask + remaining_labels = self.sorted_labels[:-1] + level_sizes = tuple(len(x) for x in new_levels) + + comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes) + ngroups = len(obs_ids) + + comp_index = ensure_platform_int(comp_index) + stride = self.index.levshape[self.level] + self.lift + self.full_shape = ngroups, stride + + selector = self.sorted_labels[-1] + stride * comp_index + self.lift + mask = np.zeros(np.prod(self.full_shape), dtype=bool) + mask.put(selector, True) + + if mask.sum() < len(self.index): + raise ValueError("Index contains duplicate entries, cannot reshape") + + self.group_index = comp_index + self.mask = mask + self.compressor = comp_index.searchsorted(np.arange(ngroups)) + + @cache_readonly + def mask_all(self) -> bool: + return bool(self.mask.all()) + + @cache_readonly + def arange_result(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]: + # We cache this for re-use in ExtensionBlock._unstack + dummy_arr = np.arange(len(self.index), dtype=np.intp) + new_values, mask = self.get_new_values(dummy_arr, fill_value=-1) + return new_values, mask.any(0) + # TODO: in all tests we have mask.any(0).all(); can we rely on that? + + def get_result(self, values, value_columns, fill_value) -> DataFrame: + + if values.ndim == 1: + values = values[:, np.newaxis] + + if value_columns is None and values.shape[1] != 1: # pragma: no cover + raise ValueError("must pass column labels for multi-column data") + + values, _ = self.get_new_values(values, fill_value) + columns = self.get_new_columns(value_columns) + index = self.new_index + + return self.constructor( + values, index=index, columns=columns, dtype=values.dtype + ) + + def get_new_values(self, values, fill_value=None): + + if values.ndim == 1: + values = values[:, np.newaxis] + + sorted_values = self._make_sorted_values(values) + + # place the values + length, width = self.full_shape + stride = values.shape[1] + result_width = width * stride + result_shape = (length, result_width) + mask = self.mask + mask_all = self.mask_all + + # we can simply reshape if we don't have a mask + if mask_all and len(values): + # TODO: Under what circumstances can we rely on sorted_values + # matching values? When that holds, we can slice instead + # of take (in particular for EAs) + new_values = ( + sorted_values.reshape(length, width, stride) + .swapaxes(1, 2) + .reshape(result_shape) + ) + new_mask = np.ones(result_shape, dtype=bool) + return new_values, new_mask + + dtype = values.dtype + + # if our mask is all True, then we can use our existing dtype + if mask_all: + dtype = values.dtype + new_values = np.empty(result_shape, dtype=dtype) + else: + if isinstance(dtype, ExtensionDtype): + # GH#41875 + # We are assuming that fill_value can be held by this dtype, + # unlike the non-EA case that promotes. + cls = dtype.construct_array_type() + new_values = cls._empty(result_shape, dtype=dtype) + new_values[:] = fill_value + else: + dtype, fill_value = maybe_promote(dtype, fill_value) + new_values = np.empty(result_shape, dtype=dtype) + new_values.fill(fill_value) + + name = dtype.name + new_mask = np.zeros(result_shape, dtype=bool) + + # we need to convert to a basic dtype + # and possibly coerce an input to our output dtype + # e.g. ints -> floats + if needs_i8_conversion(values.dtype): + sorted_values = sorted_values.view("i8") + new_values = new_values.view("i8") + else: + sorted_values = sorted_values.astype(name, copy=False) + + # fill in our values & mask + libreshape.unstack( + sorted_values, + mask.view("u1"), + stride, + length, + width, + new_values, + new_mask.view("u1"), + ) + + # reconstruct dtype if needed + if needs_i8_conversion(values.dtype): + # view as datetime64 so we can wrap in DatetimeArray and use + # DTA's view method + new_values = new_values.view("M8[ns]") + new_values = ensure_wrapped_if_datetimelike(new_values) + new_values = new_values.view(values.dtype) + + return new_values, new_mask + + def get_new_columns(self, value_columns: Index | None): + if value_columns is None: + if self.lift == 0: + return self.removed_level._rename(name=self.removed_name) + + lev = self.removed_level.insert(0, item=self.removed_level._na_value) + return lev.rename(self.removed_name) + + stride = len(self.removed_level) + self.lift + width = len(value_columns) + propagator = np.repeat(np.arange(width), stride) + + new_levels: FrozenList | list[Index] + + if isinstance(value_columns, MultiIndex): + new_levels = value_columns.levels + (self.removed_level_full,) + new_names = value_columns.names + (self.removed_name,) + + new_codes = [lab.take(propagator) for lab in value_columns.codes] + else: + new_levels = [ + value_columns, + self.removed_level_full, + ] + new_names = [value_columns.name, self.removed_name] + new_codes = [propagator] + + repeater = self._repeater + + # The entire level is then just a repetition of the single chunk: + new_codes.append(np.tile(repeater, width)) + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + @cache_readonly + def _repeater(self) -> np.ndarray: + # The two indices differ only if the unstacked level had unused items: + if len(self.removed_level_full) != len(self.removed_level): + # In this case, we remap the new codes to the original level: + repeater = self.removed_level_full.get_indexer(self.removed_level) + if self.lift: + repeater = np.insert(repeater, 0, -1) + else: + # Otherwise, we just use each level item exactly once: + stride = len(self.removed_level) + self.lift + repeater = np.arange(stride) - self.lift + + return repeater + + @cache_readonly + def new_index(self) -> MultiIndex: + # Does not depend on values or value_columns + result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] + + # construct the new index + if len(self.new_index_levels) == 1: + level, level_codes = self.new_index_levels[0], result_codes[0] + if (level_codes == -1).any(): + level = level.insert(len(level), level._na_value) + return level.take(level_codes).rename(self.new_index_names[0]) + + return MultiIndex( + levels=self.new_index_levels, + codes=result_codes, + names=self.new_index_names, + verify_integrity=False, + ) + + +def _unstack_multiple(data, clocs, fill_value=None): + if len(clocs) == 0: + return data + + # NOTE: This doesn't deal with hierarchical columns yet + + index = data.index + + # GH 19966 Make sure if MultiIndexed index has tuple name, they will be + # recognised as a whole + if clocs in index.names: + clocs = [clocs] + clocs = [index._get_level_number(i) for i in clocs] + + rlocs = [i for i in range(index.nlevels) if i not in clocs] + + clevels = [index.levels[i] for i in clocs] + ccodes = [index.codes[i] for i in clocs] + cnames = [index.names[i] for i in clocs] + rlevels = [index.levels[i] for i in rlocs] + rcodes = [index.codes[i] for i in rlocs] + rnames = [index.names[i] for i in rlocs] + + shape = tuple(len(x) for x in clevels) + group_index = get_group_index(ccodes, shape, sort=False, xnull=False) + + comp_ids, obs_ids = compress_group_index(group_index, sort=False) + recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False) + + if not rlocs: + # Everything is in clocs, so the dummy df has a regular index + dummy_index = Index(obs_ids, name="__placeholder__") + else: + dummy_index = MultiIndex( + levels=rlevels + [obs_ids], + codes=rcodes + [comp_ids], + names=rnames + ["__placeholder__"], + verify_integrity=False, + ) + + if isinstance(data, Series): + dummy = data.copy() + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + new_levels = clevels + new_names = cnames + new_codes = recons_codes + else: + if isinstance(data.columns, MultiIndex): + result = data + for i in range(len(clocs)): + val = clocs[i] + result = result.unstack(val, fill_value=fill_value) + clocs = [v if v < val else v - 1 for v in clocs] + + return result + + # GH#42579 deep=False to avoid consolidating + dummy = data.copy(deep=False) + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + if isinstance(unstacked, Series): + unstcols = unstacked.index + else: + unstcols = unstacked.columns + assert isinstance(unstcols, MultiIndex) # for mypy + new_levels = [unstcols.levels[0]] + clevels + new_names = [data.columns.name] + cnames + + new_codes = [unstcols.codes[0]] + for rec in recons_codes: + new_codes.append(rec.take(unstcols.codes[-1])) + + new_columns = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + if isinstance(unstacked, Series): + unstacked.index = new_columns + else: + unstacked.columns = new_columns + + return unstacked + + +def unstack(obj: Series | DataFrame, level, fill_value=None): + + if isinstance(level, (tuple, list)): + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level + return _unstack_multiple(obj, level, fill_value=fill_value) + else: + level = level[0] + + # Prioritize integer interpretation (GH #21677): + if not is_integer(level) and not level == "__placeholder__": + level = obj.index._get_level_number(level) + + if isinstance(obj, DataFrame): + if isinstance(obj.index, MultiIndex): + return _unstack_frame(obj, level, fill_value=fill_value) + else: + return obj.T.stack(dropna=False) + elif not isinstance(obj.index, MultiIndex): + # GH 36113 + # Give nicer error messages when unstack a Series whose + # Index is not a MultiIndex. + raise ValueError( + f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" + ) + else: + if is_1d_only_ea_dtype(obj.dtype): + return _unstack_extension_series(obj, level, fill_value) + unstacker = _Unstacker( + obj.index, level=level, constructor=obj._constructor_expanddim + ) + return unstacker.get_result( + obj._values, value_columns=None, fill_value=fill_value + ) + + +def _unstack_frame(obj: DataFrame, level, fill_value=None): + assert isinstance(obj.index, MultiIndex) # checked by caller + unstacker = _Unstacker(obj.index, level=level, constructor=obj._constructor) + + if not obj._can_fast_transpose: + mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) + return obj._constructor(mgr) + else: + return unstacker.get_result( + obj._values, value_columns=obj.columns, fill_value=fill_value + ) + + +def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame: + """ + Unstack an ExtensionArray-backed Series. + + The ExtensionDtype is preserved. + + Parameters + ---------- + series : Series + A Series with an ExtensionArray for values + level : Any + The level name or number. + fill_value : Any + The user-level (not physical storage) fill value to use for + missing values introduced by the reshape. Passed to + ``series.values.take``. + + Returns + ------- + DataFrame + Each column of the DataFrame will have the same dtype as + the input Series. + """ + # Defer to the logic in ExtensionBlock._unstack + df = series.to_frame() + result = df.unstack(level=level, fill_value=fill_value) + + # equiv: result.droplevel(level=0, axis=1) + # but this avoids an extra copy + result.columns = result.columns.droplevel(0) + return result + + +def stack(frame: DataFrame, level=-1, dropna: bool = True): + """ + Convert DataFrame to Series with multi-level Index. Columns become the + second level of the resulting hierarchical index + + Returns + ------- + stacked : Series or DataFrame + """ + + def factorize(index): + if index.is_unique: + return index, np.arange(len(index)) + codes, categories = factorize_from_iterable(index) + return categories, codes + + N, K = frame.shape + + # Will also convert negative level numbers and check if out of bounds. + level_num = frame.columns._get_level_number(level) + + if isinstance(frame.columns, MultiIndex): + return _stack_multi_columns(frame, level_num=level_num, dropna=dropna) + elif isinstance(frame.index, MultiIndex): + new_levels = list(frame.index.levels) + new_codes = [lab.repeat(K) for lab in frame.index.codes] + + clev, clab = factorize(frame.columns) + new_levels.append(clev) + new_codes.append(np.tile(clab, N).ravel()) + + new_names = list(frame.index.names) + new_names.append(frame.columns.name) + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + else: + levels, (ilab, clab) = zip(*map(factorize, (frame.index, frame.columns))) + codes = ilab.repeat(K), np.tile(clab, N).ravel() + new_index = MultiIndex( + levels=levels, + codes=codes, + names=[frame.index.name, frame.columns.name], + verify_integrity=False, + ) + + if not frame.empty and frame._is_homogeneous_type: + # For homogeneous EAs, frame._values will coerce to object. So + # we concatenate instead. + dtypes = list(frame.dtypes._values) + dtype = dtypes[0] + + if is_extension_array_dtype(dtype): + arr = dtype.construct_array_type() + new_values = arr._concat_same_type( + [col._values for _, col in frame.items()] + ) + new_values = _reorder_for_extension_array_stack(new_values, N, K) + else: + # homogeneous, non-EA + new_values = frame._values.ravel() + + else: + # non-homogeneous + new_values = frame._values.ravel() + + if dropna: + mask = notna(new_values) + new_values = new_values[mask] + new_index = new_index[mask] + + return frame._constructor_sliced(new_values, index=new_index) + + +def stack_multiple(frame, level, dropna=True): + # If all passed levels match up to column names, no + # ambiguity about what to do + if all(lev in frame.columns.names for lev in level): + result = frame + for lev in level: + result = stack(result, lev, dropna=dropna) + + # Otherwise, level numbers may change as each successive level is stacked + elif all(isinstance(lev, int) for lev in level): + # As each stack is done, the level numbers decrease, so we need + # to account for that when level is a sequence of ints + result = frame + # _get_level_number() checks level numbers are in range and converts + # negative numbers to positive + level = [frame.columns._get_level_number(lev) for lev in level] + + # Can't iterate directly through level as we might need to change + # values as we go + for index in range(len(level)): + lev = level[index] + result = stack(result, lev, dropna=dropna) + # Decrement all level numbers greater than current, as these + # have now shifted down by one + updated_level = [] + for other in level: + if other > lev: + updated_level.append(other - 1) + else: + updated_level.append(other) + level = updated_level + + else: + raise ValueError( + "level should contain all level names or all level " + "numbers, not a mixture of the two." + ) + + return result + + +def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex: + """Creates a MultiIndex from the first N-1 levels of this MultiIndex.""" + if len(columns.levels) <= 2: + return columns.levels[0]._rename(name=columns.names[0]) + + levs = [ + [lev[c] if c >= 0 else None for c in codes] + for lev, codes in zip(columns.levels[:-1], columns.codes[:-1]) + ] + + # Remove duplicate tuples in the MultiIndex. + tuples = zip(*levs) + unique_tuples = (key for key, _ in itertools.groupby(tuples)) + new_levs = zip(*unique_tuples) + + # The dtype of each level must be explicitly set to avoid inferring the wrong type. + # See GH-36991. + return MultiIndex.from_arrays( + [ + # Not all indices can accept None values. + Index(new_lev, dtype=lev.dtype) if None not in new_lev else new_lev + for new_lev, lev in zip(new_levs, columns.levels) + ], + names=columns.names[:-1], + ) + + +def _stack_multi_columns( + frame: DataFrame, level_num: int = -1, dropna: bool = True +) -> DataFrame: + def _convert_level_number(level_num: int, columns: Index): + """ + Logic for converting the level number to something we can safely pass + to swaplevel. + + If `level_num` matches a column name return the name from + position `level_num`, otherwise return `level_num`. + """ + if level_num in columns.names: + return columns.names[level_num] + + return level_num + + this = frame.copy(deep=False) + mi_cols = this.columns # cast(MultiIndex, this.columns) + assert isinstance(mi_cols, MultiIndex) # caller is responsible + + # this makes life much simpler + if level_num != mi_cols.nlevels - 1: + # roll levels to put selected level at end + roll_columns = mi_cols + for i in range(level_num, mi_cols.nlevels - 1): + # Need to check if the ints conflict with level names + lev1 = _convert_level_number(i, roll_columns) + lev2 = _convert_level_number(i + 1, roll_columns) + roll_columns = roll_columns.swaplevel(lev1, lev2) + this.columns = mi_cols = roll_columns + + if not mi_cols._is_lexsorted(): + # Workaround the edge case where 0 is one of the column names, + # which interferes with trying to sort based on the first + # level + level_to_sort = _convert_level_number(0, mi_cols) + this = this.sort_index(level=level_to_sort, axis=1) + mi_cols = this.columns + + mi_cols = cast(MultiIndex, mi_cols) + new_columns = _stack_multi_column_index(mi_cols) + + # time to ravel the values + new_data = {} + level_vals = mi_cols.levels[-1] + level_codes = sorted(set(mi_cols.codes[-1])) + level_vals_nan = level_vals.insert(len(level_vals), None) + + level_vals_used = np.take(level_vals_nan, level_codes) + levsize = len(level_codes) + drop_cols = [] + for key in new_columns: + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue + + # can make more efficient? + # we almost always return a slice + # but if unsorted can get a boolean + # indexer + if not isinstance(loc, slice): + slice_len = len(loc) + else: + slice_len = loc.stop - loc.start + + if slice_len != levsize: + chunk = this.loc[:, this.columns[loc]] + chunk.columns = level_vals_nan.take(chunk.columns.codes[-1]) + value_slice = chunk.reindex(columns=level_vals_used).values + else: + if frame._is_homogeneous_type and is_extension_array_dtype( + frame.dtypes.iloc[0] + ): + # TODO(EA2D): won't need special case, can go through .values + # paths below (might change to ._values) + dtype = this[this.columns[loc]].dtypes.iloc[0] + subset = this[this.columns[loc]] + + value_slice = dtype.construct_array_type()._concat_same_type( + [x._values for _, x in subset.items()] + ) + N, K = subset.shape + idx = np.arange(N * K).reshape(K, N).T.ravel() + value_slice = value_slice.take(idx) + + elif frame._is_mixed_type: + value_slice = this[this.columns[loc]].values + else: + value_slice = this.values[:, loc] + + if value_slice.ndim > 1: + # i.e. not extension + value_slice = value_slice.ravel() + + new_data[key] = value_slice + + if len(drop_cols) > 0: + new_columns = new_columns.difference(drop_cols) + + N = len(this) + + if isinstance(this.index, MultiIndex): + new_levels = list(this.index.levels) + new_names = list(this.index.names) + new_codes = [lab.repeat(levsize) for lab in this.index.codes] + else: + old_codes, old_levels = factorize_from_iterable(this.index) + new_levels = [old_levels] + new_codes = [old_codes.repeat(levsize)] + new_names = [this.index.name] # something better? + + new_levels.append(level_vals) + new_codes.append(np.tile(level_codes, N)) + new_names.append(frame.columns.names[level_num]) + + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + result = frame._constructor(new_data, index=new_index, columns=new_columns) + + # more efficient way to go about this? can do the whole masking biz but + # will only save a small amount of time... + if dropna: + result = result.dropna(axis=0, how="all") + + return result + + +def _reorder_for_extension_array_stack( + arr: ExtensionArray, n_rows: int, n_columns: int +) -> ExtensionArray: + """ + Re-orders the values when stacking multiple extension-arrays. + + The indirect stacking method used for EAs requires a followup + take to get the order correct. + + Parameters + ---------- + arr : ExtensionArray + n_rows, n_columns : int + The number of rows and columns in the original DataFrame. + + Returns + ------- + taken : ExtensionArray + The original `arr` with elements re-ordered appropriately + + Examples + -------- + >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) + >>> _reorder_for_extension_array_stack(arr, 2, 3) + array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='>> _reorder_for_extension_array_stack(arr, 3, 2) + array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='` for more examples. + + Examples + -------- + Discretize into three equal-sized bins. + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) + ... # doctest: +ELLIPSIS + [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) + ... # doctest: +ELLIPSIS + ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... + array([0.994, 3. , 5. , 7. ])) + + Discovers the same bins, but assign them specific labels. Notice that + the returned Categorical's categories are `labels` and is ordered. + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), + ... 3, labels=["bad", "medium", "good"]) + ['bad', 'good', 'medium', 'medium', 'good', 'bad'] + Categories (3, object): ['bad' < 'medium' < 'good'] + + ``ordered=False`` will result in unordered categories when labels are passed. + This parameter can be used to allow non-unique labels: + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, + ... labels=["B", "A", "B"], ordered=False) + ['B', 'B', 'A', 'A', 'B', 'B'] + Categories (2, object): ['A', 'B'] + + ``labels=False`` implies you just want the bins back. + + >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) + array([0, 1, 1, 3]) + + Passing a Series as an input returns a Series with categorical dtype: + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, 3) + ... # doctest: +ELLIPSIS + a (1.992, 4.667] + b (1.992, 4.667] + c (4.667, 7.333] + d (7.333, 10.0] + e (7.333, 10.0] + dtype: category + Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... + + Passing a Series as an input returns a Series with mapping value. + It is used to map numerically to intervals based on bins. + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) + ... # doctest: +ELLIPSIS + (a 1.0 + b 2.0 + c 3.0 + d 4.0 + e NaN + dtype: float64, + array([ 0, 2, 4, 6, 8, 10])) + + Use `drop` optional when bins is not unique + + >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, + ... right=False, duplicates='drop') + ... # doctest: +ELLIPSIS + (a 1.0 + b 2.0 + c 3.0 + d 3.0 + e NaN + dtype: float64, + array([ 0, 2, 4, 6, 10])) + + Passing an IntervalIndex for `bins` results in those categories exactly. + Notice that values not covered by the IntervalIndex are set to NaN. 0 + is to the left of the first bin (which is closed on the right), and 1.5 + falls between two bins. + + >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) + >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) + [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] + Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] + """ + # NOTE: this binning code is changed a bit from histogram for var(x) == 0 + + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + if not np.iterable(bins): + if is_scalar(bins) and bins < 1: + raise ValueError("`bins` should be a positive integer.") + + try: # for array-like + sz = x.size + except AttributeError: + x = np.asarray(x) + sz = x.size + + if sz == 0: + raise ValueError("Cannot cut empty array") + + rng = (nanops.nanmin(x), nanops.nanmax(x)) + mn, mx = (mi + 0.0 for mi in rng) + + if np.isinf(mn) or np.isinf(mx): + # GH 24314 + raise ValueError( + "cannot specify integer `bins` when input data contains infinity" + ) + elif mn == mx: # adjust end points before binning + mn -= 0.001 * abs(mn) if mn != 0 else 0.001 + mx += 0.001 * abs(mx) if mx != 0 else 0.001 + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + else: # adjust end points after binning + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + adj = (mx - mn) * 0.001 # 0.1% of the range + if right: + bins[0] -= adj + else: + bins[-1] += adj + + elif isinstance(bins, IntervalIndex): + if bins.is_overlapping: + raise ValueError("Overlapping IntervalIndex is not accepted.") + + else: + if is_datetime64tz_dtype(bins): + bins = np.asarray(bins, dtype=DT64NS_DTYPE) + else: + bins = np.asarray(bins) + bins = _convert_bin_to_numeric_type(bins, dtype) + + # GH 26045: cast to float64 to avoid an overflow + if (np.diff(bins.astype("float64")) < 0).any(): + raise ValueError("bins must increase monotonically.") + + fac, bins = _bins_to_cuts( + x, + bins, + right=right, + labels=labels, + precision=precision, + include_lowest=include_lowest, + dtype=dtype, + duplicates=duplicates, + ordered=ordered, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def qcut( + x, + q, + labels=None, + retbins: bool = False, + precision: int = 3, + duplicates: str = "raise", +): + """ + Quantile-based discretization function. + + Discretize variable into equal-sized buckets based on rank or based + on sample quantiles. For example 1000 values for 10 quantiles would + produce a Categorical object indicating quantile membership for each data point. + + Parameters + ---------- + x : 1d ndarray or Series + q : int or list-like of float + Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately + array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. + labels : array or False, default None + Used as labels for the resulting bins. Must be of the same length as + the resulting bins. If False, return only integer indicators of the + bins. If True, raises an error. + retbins : bool, optional + Whether to return the (bins, labels) or not. Can be useful if bins + is given as a scalar. + precision : int, optional + The precision at which to store and display the bins labels. + duplicates : {default 'raise', 'drop'}, optional + If bin edges are not unique, raise ValueError or drop non-uniques. + + Returns + ------- + out : Categorical or Series or array of integers if labels is False + The return type (Categorical or Series) depends on the input: a Series + of type category if input is a Series else Categorical. Bins are + represented as categories when categorical data is returned. + bins : ndarray of floats + Returned only if `retbins` is True. + + Notes + ----- + Out of bounds values will be NA in the resulting Categorical object + + Examples + -------- + >>> pd.qcut(range(5), 4) + ... # doctest: +ELLIPSIS + [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] + Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... + + >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP + [good, good, medium, bad, bad] + Categories (3, object): [good < medium < bad] + + >>> pd.qcut(range(5), 4, labels=False) + array([0, 0, 1, 2, 3]) + """ + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q + + x_np = np.asarray(x) + x_np = x_np[~np.isnan(x_np)] + bins = np.quantile(x_np, quantiles) + + fac, bins = _bins_to_cuts( + x, + bins, + labels=labels, + precision=precision, + include_lowest=True, + dtype=dtype, + duplicates=duplicates, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def _bins_to_cuts( + x, + bins: np.ndarray, + right: bool = True, + labels=None, + precision: int = 3, + include_lowest: bool = False, + dtype=None, + duplicates: str = "raise", + ordered: bool = True, +): + if not ordered and labels is None: + raise ValueError("'labels' must be provided if 'ordered = False'") + + if duplicates not in ["raise", "drop"]: + raise ValueError( + "invalid value for 'duplicates' parameter, valid options are: raise, drop" + ) + + if isinstance(bins, IntervalIndex): + # we have a fast-path here + ids = bins.get_indexer(x) + result = Categorical.from_codes(ids, categories=bins, ordered=True) + return result, bins + + unique_bins = algos.unique(bins) + if len(unique_bins) < len(bins) and len(bins) != 2: + if duplicates == "raise": + raise ValueError( + f"Bin edges must be unique: {repr(bins)}.\n" + f"You can drop duplicate edges by setting the 'duplicates' kwarg" + ) + else: + bins = unique_bins + + side: Literal["left", "right"] = "left" if right else "right" + ids = ensure_platform_int(bins.searchsorted(x, side=side)) + + if include_lowest: + ids[np.asarray(x) == bins[0]] = 1 + + na_mask = isna(x) | (ids == len(bins)) | (ids == 0) + has_nas = na_mask.any() + + if labels is not False: + if not (labels is None or is_list_like(labels)): + raise ValueError( + "Bin labels must either be False, None or passed in as a " + "list-like argument" + ) + + elif labels is None: + labels = _format_labels( + bins, precision, right=right, include_lowest=include_lowest, dtype=dtype + ) + elif ordered and len(set(labels)) != len(labels): + raise ValueError( + "labels must be unique if ordered=True; pass ordered=False " + "for duplicate labels" + ) + else: + if len(labels) != len(bins) - 1: + raise ValueError( + "Bin labels must be one fewer than the number of bin edges" + ) + if not is_categorical_dtype(labels): + labels = Categorical( + labels, + categories=labels if len(set(labels)) == len(labels) else None, + ordered=ordered, + ) + # TODO: handle mismatch between categorical label order and pandas.cut order. + np.putmask(ids, na_mask, 0) + result = algos.take_nd(labels, ids - 1) + + else: + result = ids - 1 + if has_nas: + result = result.astype(np.float64) + np.putmask(result, na_mask, np.nan) + + return result, bins + + +def _coerce_to_type(x): + """ + if the passed data is of datetime/timedelta, bool or nullable int type, + this method converts it to numeric so that cut or qcut method can + handle it + """ + dtype = None + + if is_datetime64tz_dtype(x.dtype): + dtype = x.dtype + elif is_datetime64_dtype(x.dtype): + x = to_datetime(x) + dtype = np.dtype("datetime64[ns]") + elif is_timedelta64_dtype(x.dtype): + x = to_timedelta(x) + dtype = np.dtype("timedelta64[ns]") + elif is_bool_dtype(x.dtype): + # GH 20303 + x = x.astype(np.int64) + # To support cut and qcut for IntegerArray we convert to float dtype. + # Will properly support in the future. + # https://github.com/pandas-dev/pandas/pull/31290 + # https://github.com/pandas-dev/pandas/issues/31389 + elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype): + x = x.to_numpy(dtype=np.float64, na_value=np.nan) + + if dtype is not None: + # GH 19768: force NaT to NaN during integer conversion + x = np.where(x.notna(), x.view(np.int64), np.nan) + + return x, dtype + + +def _convert_bin_to_numeric_type(bins, dtype): + """ + if the passed bin is of datetime/timedelta type, + this method converts it to integer + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Raises + ------ + ValueError if bins are not of a compat dtype to dtype + """ + bins_dtype = infer_dtype(bins, skipna=False) + if is_timedelta64_dtype(dtype): + if bins_dtype in ["timedelta", "timedelta64"]: + bins = to_timedelta(bins).view(np.int64) + else: + raise ValueError("bins must be of timedelta64 dtype") + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if bins_dtype in ["datetime", "datetime64"]: + bins = to_datetime(bins).view(np.int64) + else: + raise ValueError("bins must be of datetime64 dtype") + + return bins + + +def _convert_bin_to_datelike_type(bins, dtype): + """ + Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is + datelike + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Returns + ------- + bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is + datelike + """ + if is_datetime64tz_dtype(dtype): + bins = to_datetime(bins.astype(np.int64), utc=True).tz_convert(dtype.tz) + elif is_datetime_or_timedelta_dtype(dtype): + bins = Index(bins.astype(np.int64), dtype=dtype) + return bins + + +def _format_labels( + bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None +): + """based on the dtype, return our labels""" + closed: IntervalLeftRight = "right" if right else "left" + + formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta] + + if is_datetime64tz_dtype(dtype): + formatter = lambda x: Timestamp(x, tz=dtype.tz) + adjust = lambda x: x - Timedelta("1ns") + elif is_datetime64_dtype(dtype): + formatter = Timestamp + adjust = lambda x: x - Timedelta("1ns") + elif is_timedelta64_dtype(dtype): + formatter = Timedelta + adjust = lambda x: x - Timedelta("1ns") + else: + precision = _infer_precision(precision, bins) + formatter = lambda x: _round_frac(x, precision) + adjust = lambda x: x - 10 ** (-precision) + + breaks = [formatter(b) for b in bins] + if right and include_lowest: + # adjust lhs of first interval by precision to account for being right closed + breaks[0] = adjust(breaks[0]) + + return IntervalIndex.from_breaks(breaks, closed=closed) + + +def _preprocess_for_cut(x): + """ + handles preprocessing for cut where we convert passed + input to array, strip the index information and store it + separately + """ + # Check that the passed array is a Pandas or Numpy object + # We don't want to strip away a Pandas data-type here (e.g. datetimetz) + ndim = getattr(x, "ndim", None) + if ndim is None: + x = np.asarray(x) + if x.ndim != 1: + raise ValueError("Input array must be 1 dimensional") + + return x + + +def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original): + """ + handles post processing for the cut method where + we combine the index information if the originally passed + datatype was a series + """ + if isinstance(original, ABCSeries): + fac = original._constructor(fac, index=original.index, name=original.name) + + if not retbins: + return fac + + bins = _convert_bin_to_datelike_type(bins, dtype) + + return fac, bins + + +def _round_frac(x, precision: int): + """ + Round the fractional part of the given number + """ + if not np.isfinite(x) or x == 0: + return x + else: + frac, whole = np.modf(x) + if whole == 0: + digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision + else: + digits = precision + return np.around(x, digits) + + +def _infer_precision(base_precision: int, bins) -> int: + """ + Infer an appropriate precision for _round_frac + """ + for precision in range(base_precision, 20): + levels = [_round_frac(b, precision) for b in bins] + if algos.unique(levels).size == bins.size: + return precision + return base_precision # default diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py new file mode 100644 index 00000000..1154940f --- /dev/null +++ b/pandas/core/reshape/util.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import NumpyIndexT + +from pandas.core.dtypes.common import is_list_like + + +def cartesian_product(X) -> list[np.ndarray]: + """ + Numpy version of itertools.product. + Sometimes faster (for large inputs)... + + Parameters + ---------- + X : list-like of list-likes + + Returns + ------- + product : list of ndarrays + + Examples + -------- + >>> cartesian_product([list('ABC'), [1, 2]]) + [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype=' NumpyIndexT: + """ + Index compat for np.tile. + + Notes + ----- + Does not support multi-dimensional `num`. + """ + if isinstance(arr, np.ndarray): + return np.tile(arr, num) + + # Otherwise we have an Index + taker = np.tile(np.arange(len(arr)), num) + return arr.take(taker) diff --git a/pandas/core/roperator.py b/pandas/core/roperator.py new file mode 100644 index 00000000..2f320f4e --- /dev/null +++ b/pandas/core/roperator.py @@ -0,0 +1,62 @@ +""" +Reversed Operations not available in the stdlib operator module. +Defining these instead of using lambdas allows us to reference them by name. +""" +from __future__ import annotations + +import operator + + +def radd(left, right): + return right + left + + +def rsub(left, right): + return right - left + + +def rmul(left, right): + return right * left + + +def rdiv(left, right): + return right / left + + +def rtruediv(left, right): + return right / left + + +def rfloordiv(left, right): + return right // left + + +def rmod(left, right): + # check if right is a string as % is the string + # formatting operation; this is a TypeError + # otherwise perform the op + if isinstance(right, str): + typ = type(left).__name__ + raise TypeError(f"{typ} cannot perform the operation mod") + + return right % left + + +def rdivmod(left, right): + return divmod(right, left) + + +def rpow(left, right): + return right**left + + +def rand_(left, right): + return operator.and_(right, left) + + +def ror_(left, right): + return operator.or_(right, left) + + +def rxor(left, right): + return operator.xor(right, left) diff --git a/pandas/core/sample.py b/pandas/core/sample.py new file mode 100644 index 00000000..16fca2d0 --- /dev/null +++ b/pandas/core/sample.py @@ -0,0 +1,152 @@ +""" +Module containing utilities for NDFrame.sample() and .GroupBy.sample() +""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._libs import lib + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + + +def preprocess_weights(obj: NDFrame, weights, axis: int) -> np.ndarray: + """ + Process and validate the `weights` argument to `NDFrame.sample` and + `.GroupBy.sample`. + + Returns `weights` as an ndarray[np.float64], validated except for normalizing + weights (because that must be done groupwise in groupby sampling). + """ + # If a series, align with frame + if isinstance(weights, ABCSeries): + weights = weights.reindex(obj.axes[axis]) + + # Strings acceptable if a dataframe and axis = 0 + if isinstance(weights, str): + if isinstance(obj, ABCDataFrame): + if axis == 0: + try: + weights = obj[weights] + except KeyError as err: + raise KeyError( + "String passed to weights not a valid column" + ) from err + else: + raise ValueError( + "Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame" + ) + else: + raise ValueError( + "Strings cannot be passed as weights when sampling from a Series." + ) + + if isinstance(obj, ABCSeries): + func = obj._constructor + else: + func = obj._constructor_sliced + + weights = func(weights, dtype="float64")._values + + if len(weights) != obj.shape[axis]: + raise ValueError("Weights and axis to be sampled must be of same length") + + if lib.has_infs(weights): + raise ValueError("weight vector may not include `inf` values") + + if (weights < 0).any(): + raise ValueError("weight vector many not include negative values") + + missing = np.isnan(weights) + if missing.any(): + # Don't modify weights in place + weights = weights.copy() + weights[missing] = 0 + return weights + + +def process_sampling_size( + n: int | None, frac: float | None, replace: bool +) -> int | None: + """ + Process and validate the `n` and `frac` arguments to `NDFrame.sample` and + `.GroupBy.sample`. + + Returns None if `frac` should be used (variable sampling sizes), otherwise returns + the constant sampling size. + """ + # If no frac or n, default to n=1. + if n is None and frac is None: + n = 1 + elif n is not None and frac is not None: + raise ValueError("Please enter a value for `frac` OR `n`, not both") + elif n is not None: + if n < 0: + raise ValueError( + "A negative number of rows requested. Please provide `n` >= 0." + ) + if n % 1 != 0: + raise ValueError("Only integers accepted as `n` values") + else: + assert frac is not None # for mypy + if frac > 1 and not replace: + raise ValueError( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + if frac < 0: + raise ValueError( + "A negative number of rows requested. Please provide `frac` >= 0." + ) + + return n + + +def sample( + obj_len: int, + size: int, + replace: bool, + weights: np.ndarray | None, + random_state: np.random.RandomState | np.random.Generator, +) -> np.ndarray: + """ + Randomly sample `size` indices in `np.arange(obj_len)` + + Parameters + ---------- + obj_len : int + The length of the indices being considered + size : int + The number of values to choose + replace : bool + Allow or disallow sampling of the same row more than once. + weights : np.ndarray[np.float64] or None + If None, equal probability weighting, otherwise weights according + to the vector normalized + random_state: np.random.RandomState or np.random.Generator + State used for the random sampling + + Returns + ------- + np.ndarray[np.intp] + """ + if weights is not None: + weight_sum = weights.sum() + if weight_sum != 0: + weights = weights / weight_sum + else: + raise ValueError("Invalid weights: weights sum to zero") + + return random_state.choice(obj_len, size=size, replace=replace, p=weights).astype( + np.intp, copy=False + ) diff --git a/pandas/core/series.py b/pandas/core/series.py new file mode 100644 index 00000000..8f0faae3 --- /dev/null +++ b/pandas/core/series.py @@ -0,0 +1,6265 @@ +""" +Data structure for 1-dimensional cross-sectional and time series data +""" +from __future__ import annotations + +from textwrap import dedent +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Literal, + Mapping, + Sequence, + Union, + cast, + overload, +) +import warnings +import weakref + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import ( + lib, + properties, + reshape, + tslibs, +) +from pandas._libs.lib import no_default +from pandas._typing import ( + AggFuncType, + AnyArrayLike, + ArrayLike, + Axis, + Dtype, + DtypeObj, + FilePath, + FillnaOptions, + Frequency, + IgnoreRaise, + IndexKeyFunc, + IndexLabel, + Level, + NaPosition, + QuantileInterpolation, + Renamer, + SingleManager, + SortKind, + StorageOptions, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ValueKeyFunc, + WriteBuffer, + npt, +) +from pandas.compat.numpy import function as nv +from pandas.errors import InvalidIndexError +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_percentile, +) + +from pandas.core.dtypes.cast import ( + LossySetitemError, + convert_dtypes, + maybe_box_native, + maybe_cast_pointwise_result, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_dict_like, + is_integer, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + pandas_dtype, + validate_all_hashable, +) +from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, + remove_na_arraylike, +) + +from pandas.core import ( + algorithms, + base, + common as com, + missing, + nanops, + ops, +) +from pandas.core.accessor import CachedAccessor +from pandas.core.apply import SeriesApply +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.categorical import CategoricalAccessor +from pandas.core.arrays.sparse import SparseAccessor +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, + is_empty_data, + sanitize_array, +) +from pandas.core.generic import NDFrame +from pandas.core.indexers import ( + deprecate_ndim_indexing, + unpack_1tuple, +) +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.core.indexes.api import ( + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, + default_index, + ensure_index, +) +import pandas.core.indexes.base as ibase +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, +) +from pandas.core.internals import ( + SingleArrayManager, + SingleBlockManager, +) +from pandas.core.shared_docs import _shared_docs +from pandas.core.sorting import ( + ensure_key_mapped, + nargsort, +) +from pandas.core.strings import StringMethods +from pandas.core.tools.datetimes import to_datetime + +import pandas.io.formats.format as fmt +from pandas.io.formats.info import ( + INFO_DOCSTRING, + SeriesInfo, + series_sub_kwargs, +) +import pandas.plotting + +if TYPE_CHECKING: + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + Suffixes, + ) + + from pandas.core.frame import DataFrame + from pandas.core.groupby.generic import SeriesGroupBy + from pandas.core.resample import Resampler + +__all__ = ["Series"] + +_shared_doc_kwargs = { + "axes": "index", + "klass": "Series", + "axes_single_arg": "{0 or 'index'}", + "axis": """axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame.""", + "inplace": """inplace : bool, default False + If True, performs operation inplace and returns None.""", + "unique": "np.ndarray", + "duplicated": "Series", + "optional_by": "", + "optional_mapper": "", + "optional_labels": "", + "optional_axis": "", + "replace_iloc": """ + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +} + + +def _coerce_method(converter): + """ + Install the scalar coercion methods. + """ + + def wrapper(self): + if len(self) == 1: + return converter(self.iloc[0]) + raise TypeError(f"cannot convert the series to {converter}") + + wrapper.__name__ = f"__{converter.__name__}__" + return wrapper + + +# ---------------------------------------------------------------------- +# Series class + + +class Series(base.IndexOpsMixin, NDFrame): + """ + One-dimensional ndarray with axis labels (including time series). + + Labels need not be unique but must be a hashable type. The object + supports both integer- and label-based indexing and provides a host of + methods for performing operations involving the index. Statistical + methods from ndarray have been overridden to automatically exclude + missing data (currently represented as NaN). + + Operations between Series (+, -, /, \\*, \\*\\*) align values based on their + associated index values-- they need not be the same length. The result + index will be the sorted union of the two indexes. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. If data is a dict, argument order is + maintained. + index : array-like or Index (1d) + Values must be hashable and have the same length as `data`. + Non-unique index values are allowed. Will default to + RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like + and index is None, then the keys in the data are used as the index. If the + index is not None, the resulting Series is reindexed with the index values. + dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the output Series. If not specified, this will be + inferred from `data`. + See the :ref:`user guide ` for more usages. + name : str, optional + The name to give to the Series. + copy : bool, default False + Copy input data. Only affects Series or 1d ndarray input. See examples. + + Notes + ----- + Please reference the :ref:`User Guide ` for more information. + + Examples + -------- + Constructing Series from a dictionary with an Index specified + + >>> d = {'a': 1, 'b': 2, 'c': 3} + >>> ser = pd.Series(data=d, index=['a', 'b', 'c']) + >>> ser + a 1 + b 2 + c 3 + dtype: int64 + + The keys of the dictionary match with the Index values, hence the Index + values have no effect. + + >>> d = {'a': 1, 'b': 2, 'c': 3} + >>> ser = pd.Series(data=d, index=['x', 'y', 'z']) + >>> ser + x NaN + y NaN + z NaN + dtype: float64 + + Note that the Index is first build with the keys from the dictionary. + After this the Series is reindexed with the given Index values, hence we + get all NaN as a result. + + Constructing Series from a list with `copy=False`. + + >>> r = [1, 2] + >>> ser = pd.Series(r, copy=False) + >>> ser.iloc[0] = 999 + >>> r + [1, 2] + >>> ser + 0 999 + 1 2 + dtype: int64 + + Due to input data type the Series has a `copy` of + the original data even though `copy=False`, so + the data is unchanged. + + Constructing Series from a 1d ndarray with `copy=False`. + + >>> r = np.array([1, 2]) + >>> ser = pd.Series(r, copy=False) + >>> ser.iloc[0] = 999 + >>> r + array([999, 2]) + >>> ser + 0 999 + 1 2 + dtype: int64 + + Due to input data type the Series has a `view` on + the original data, so + the data is changed as well. + """ + + _typ = "series" + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + _name: Hashable + _metadata: list[str] = ["name"] + _internal_names_set = {"index"} | NDFrame._internal_names_set + _accessors = {"dt", "cat", "str", "sparse"} + _hidden_attrs = ( + base.IndexOpsMixin._hidden_attrs + | NDFrame._hidden_attrs + | frozenset(["compress", "ptp"]) + ) + + # Override cache_readonly bc Series is mutable + # error: Incompatible types in assignment (expression has type "property", + # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]") + hasnans = property( # type: ignore[assignment] + # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget" + base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined] + doc=base.IndexOpsMixin.hasnans.__doc__, + ) + _mgr: SingleManager + div: Callable[[Series, Any], Series] + rdiv: Callable[[Series, Any], Series] + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data=None, + index=None, + dtype: Dtype | None = None, + name=None, + copy: bool = False, + fastpath: bool = False, + ) -> None: + + if ( + isinstance(data, (SingleBlockManager, SingleArrayManager)) + and index is None + and dtype is None + and copy is False + ): + # GH#33357 called with just the SingleBlockManager + NDFrame.__init__(self, data) + if fastpath: + # e.g. from _box_col_values, skip validation of name + object.__setattr__(self, "_name", name) + else: + self.name = name + return + + # we are called internally, so short-circuit + if fastpath: + + # data is an ndarray, index is defined + if not isinstance(data, (SingleBlockManager, SingleArrayManager)): + manager = get_option("mode.data_manager") + if manager == "block": + data = SingleBlockManager.from_array(data, index) + elif manager == "array": + data = SingleArrayManager.from_array(data, index) + if copy: + data = data.copy() + if index is None: + index = data.index + + else: + + name = ibase.maybe_extract_name(name, data, type(self)) + + if is_empty_data(data) and dtype is None: + # gh-17261 + warnings.warn( + "The default dtype for empty Series will be 'object' instead " + "of 'float64' in a future version. Specify a dtype explicitly " + "to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # uncomment the line below when removing the FutureWarning + # dtype = np.dtype(object) + + if index is not None: + index = ensure_index(index) + + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) + elif isinstance(data, Index): + + if dtype is not None: + # astype copies + data = data.astype(dtype) + else: + # GH#24096 we need to ensure the index remains immutable + data = data._values.copy() + copy = False + + elif isinstance(data, np.ndarray): + if len(data.dtype): + # GH#13296 we are dealing with a compound dtype, which + # should be treated as 2D + raise ValueError( + "Cannot construct a Series from an ndarray with " + "compound dtype. Use DataFrame instead." + ) + elif isinstance(data, Series): + if index is None: + index = data.index + else: + data = data.reindex(index, copy=copy) + copy = False + data = data._mgr + elif is_dict_like(data): + data, index = self._init_dict(data, index, dtype) + dtype = None + copy = False + elif isinstance(data, (SingleBlockManager, SingleArrayManager)): + if index is None: + index = data.index + elif not data.index.equals(index) or copy: + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError( + "Cannot pass both SingleBlockManager " + "`data` argument and a different " + "`index` argument. `copy` must be False." + ) + + elif isinstance(data, ExtensionArray): + pass + else: + data = com.maybe_iterable_to_list(data) + + if index is None: + if not is_list_like(data): + data = [data] + index = default_index(len(data)) + elif is_list_like(data): + com.require_length_match(data, index) + + # create/copy the manager + if isinstance(data, (SingleBlockManager, SingleArrayManager)): + if dtype is not None: + data = data.astype(dtype=dtype, errors="ignore", copy=copy) + elif copy: + data = data.copy() + else: + data = sanitize_array(data, index, dtype, copy) + + manager = get_option("mode.data_manager") + if manager == "block": + data = SingleBlockManager.from_array(data, index) + elif manager == "array": + data = SingleArrayManager.from_array(data, index) + + NDFrame.__init__(self, data) + if fastpath: + # skips validation of the name + object.__setattr__(self, "_name", name) + else: + self.name = name + self._set_axis(0, index) + + def _init_dict( + self, data, index: Index | None = None, dtype: DtypeObj | None = None + ): + """ + Derive the "_mgr" and "index" attributes of a new Series from a + dictionary input. + + Parameters + ---------- + data : dict or dict-like + Data used to populate the new Series. + index : Index or None, default None + Index for the new Series: if None, use dict keys. + dtype : np.dtype, ExtensionDtype, or None, default None + The dtype for the new Series: if None, infer from data. + + Returns + ------- + _data : BlockManager for the new Series + index : index for the new Series + """ + keys: Index | tuple + + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError), so we iterate the entire dict, and align + if data: + # GH:34717, issue was using zip to extract key and values from data. + # using generators in effects the performance. + # Below is the new way of extracting the keys and values + + keys = tuple(data.keys()) + values = list(data.values()) # Generating list of values- faster way + elif index is not None: + # fastpath for Series(data=None). Just use broadcasting a scalar + # instead of reindexing. + values = na_value_for_dtype(pandas_dtype(dtype), compat=False) + keys = index + else: + keys, values = (), [] + + # Input is now list-like, so rely on "standard" construction: + + # TODO: passing np.float64 to not break anything yet. See GH-17261 + s = create_series_with_explicit_dtype( + # error: Argument "index" to "create_series_with_explicit_dtype" has + # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray, + # ndarray, Index, None]" + values, + index=keys, # type: ignore[arg-type] + dtype=dtype, + dtype_if_empty=np.float64, + ) + + # Now we just make sure the order is respected, if any + if data and index is not None: + s = s.reindex(index, copy=False) + return s._mgr, s.index + + # ---------------------------------------------------------------------- + + @property + def _constructor(self) -> Callable[..., Series]: + return Series + + @property + def _constructor_expanddim(self) -> Callable[..., DataFrame]: + """ + Used when a manipulation result has one higher dimension as the + original, such as Series.to_frame() + """ + from pandas.core.frame import DataFrame + + return DataFrame + + # types + @property + def _can_hold_na(self) -> bool: + return self._mgr._can_hold_na + + def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: + """ + Override generic, we want to set the _typ here. + + This is called from the cython code when we set the `index` attribute + directly, e.g. `series.index = [1, 2, 3]`. + """ + labels = ensure_index(labels) + + if labels._is_all_dates and not ( + type(labels) is Index and not isinstance(labels.dtype, np.dtype) + ): + # exclude e.g. timestamp[ns][pyarrow] dtype from this casting + deep_labels = labels + if isinstance(labels, CategoricalIndex): + deep_labels = labels.categories + + if not isinstance( + deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ): + try: + labels = DatetimeIndex(labels) + except (tslibs.OutOfBoundsDatetime, ValueError): + # labels may exceeds datetime bounds, + # or not be a DatetimeIndex + pass + + # The ensure_index call above ensures we have an Index object + self._mgr.set_axis(axis, labels) + + # ndarray compatibility + @property + def dtype(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + return self._mgr.dtype + + @property + def dtypes(self) -> DtypeObj: + """ + Return the dtype object of the underlying data. + """ + # DataFrame compatibility + return self.dtype + + @property + def name(self) -> Hashable: + """ + Return the name of the Series. + + The name of a Series becomes its index or column name if it is used + to form a DataFrame. It is also used whenever displaying the Series + using the interpreter. + + Returns + ------- + label (hashable object) + The name of the Series, also the column name if part of a DataFrame. + + See Also + -------- + Series.rename : Sets the Series name when given a scalar input. + Index.name : Corresponding Index property. + + Examples + -------- + The Series name can be set initially when calling the constructor. + + >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers') + >>> s + 0 1 + 1 2 + 2 3 + Name: Numbers, dtype: int64 + >>> s.name = "Integers" + >>> s + 0 1 + 1 2 + 2 3 + Name: Integers, dtype: int64 + + The name of a Series within a DataFrame is its column name. + + >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], + ... columns=["Odd Numbers", "Even Numbers"]) + >>> df + Odd Numbers Even Numbers + 0 1 2 + 1 3 4 + 2 5 6 + >>> df["Even Numbers"].name + 'Even Numbers' + """ + return self._name + + @name.setter + def name(self, value: Hashable) -> None: + validate_all_hashable(value, error_name=f"{type(self).__name__}.name") + object.__setattr__(self, "_name", value) + + @property + def values(self): + """ + Return Series as ndarray or ndarray-like depending on the dtype. + + .. warning:: + + We recommend using :attr:`Series.array` or + :meth:`Series.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + numpy.ndarray or ndarray-like + + See Also + -------- + Series.array : Reference to the underlying data. + Series.to_numpy : A NumPy array representing the underlying data. + + Examples + -------- + >>> pd.Series([1, 2, 3]).values + array([1, 2, 3]) + + >>> pd.Series(list('aabc')).values + array(['a', 'a', 'b', 'c'], dtype=object) + + >>> pd.Series(list('aabc')).astype('category').values + ['a', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Timezone aware datetime data is converted to UTC: + + >>> pd.Series(pd.date_range('20130101', periods=3, + ... tz='US/Eastern')).values + array(['2013-01-01T05:00:00.000000000', + '2013-01-02T05:00:00.000000000', + '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') + """ + return self._mgr.external_values() + + @property + def _values(self): + """ + Return the internal repr of this data (defined by Block.interval_values). + This are the values as stored in the Block (ndarray or ExtensionArray + depending on the Block class), with datetime64[ns] and timedelta64[ns] + wrapped in ExtensionArrays to match Index._values behavior. + + Differs from the public ``.values`` for certain data types, because of + historical backwards compatibility of the public attribute (e.g. period + returns object ndarray and datetimetz a datetime64[ns] ndarray for + ``.values`` while it returns an ExtensionArray for ``._values`` in those + cases). + + Differs from ``.array`` in that this still returns the numpy array if + the Block is backed by a numpy array (except for datetime64 and + timedelta64 dtypes), while ``.array`` ensures to always return an + ExtensionArray. + + Overview: + + dtype | values | _values | array | + ----------- | ------------- | ------------- | ------------- | + Numeric | ndarray | ndarray | PandasArray | + Category | Categorical | Categorical | Categorical | + dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | + Period | ndarray[obj] | PeriodArray | PeriodArray | + Nullable | EA | EA | EA | + + """ + return self._mgr.internal_values() + + # error: Decorated property not supported + @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc] + @property + def array(self) -> ExtensionArray: + return self._mgr.array_values() + + # ops + def ravel(self, order: str = "C") -> np.ndarray: + """ + Return the flattened underlying data as an ndarray. + + Returns + ------- + numpy.ndarray or ndarray-like + Flattened data of the Series. + + See Also + -------- + numpy.ndarray.ravel : Return a flattened array. + """ + return self._values.ravel(order=order) + + def __len__(self) -> int: + """ + Return the length of the Series. + """ + return len(self._mgr) + + def view(self, dtype: Dtype | None = None) -> Series: + """ + Create a new view of the Series. + + This function will return a new Series with a view of the same + underlying values in memory, optionally reinterpreted with a new data + type. The new data type must preserve the same size in bytes as to not + cause index misalignment. + + Parameters + ---------- + dtype : data type + Data type object or one of their string representations. + + Returns + ------- + Series + A new Series object as a view of the same data in memory. + + See Also + -------- + numpy.ndarray.view : Equivalent numpy function to create a new view of + the same data in memory. + + Notes + ----- + Series are instantiated with ``dtype=float64`` by default. While + ``numpy.ndarray.view()`` will return a view with the same data type as + the original array, ``Series.view()`` (without specified dtype) + will try using ``float64`` and may fail if the original data type size + in bytes is not the same. + + Examples + -------- + >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8') + >>> s + 0 -2 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + + The 8 bit signed integer representation of `-1` is `0b11111111`, but + the same bytes represent 255 if read as an 8 bit unsigned integer: + + >>> us = s.view('uint8') + >>> us + 0 254 + 1 255 + 2 0 + 3 1 + 4 2 + dtype: uint8 + + The views share the same underlying values: + + >>> us[0] = 128 + >>> s + 0 -128 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + """ + # self.array instead of self._values so we piggyback on PandasArray + # implementation + res_values = self.array.view(dtype) + res_ser = self._constructor(res_values, index=self.index) + return res_ser.__finalize__(self, method="view") + + # ---------------------------------------------------------------------- + # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: + """ + Return the values as a NumPy array. + + Users should not call this directly. Rather, it is invoked by + :func:`numpy.array` and :func:`numpy.asarray`. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to use for the resulting NumPy array. By default, + the dtype is inferred from the data. + + Returns + ------- + numpy.ndarray + The values in the series converted to a :class:`numpy.ndarray` + with the specified `dtype`. + + See Also + -------- + array : Create a new array from data. + Series.array : Zero-copy view to the array backing the Series. + Series.to_numpy : Series method for similar behavior. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> np.asarray(ser) + array([1, 2, 3]) + + For timezone-aware data, the timezones may be retained with + ``dtype='object'`` + + >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> np.asarray(tzser, dtype="object") + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET')], + dtype=object) + + Or the values may be localized to UTC and the tzinfo discarded with + ``dtype='datetime64[ns]'`` + + >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', ...], + dtype='datetime64[ns]') + """ + return np.asarray(self._values, dtype) + + # ---------------------------------------------------------------------- + # Unary Methods + + # coercion + __float__ = _coerce_method(float) + __long__ = _coerce_method(int) + __int__ = _coerce_method(int) + + # ---------------------------------------------------------------------- + + # indexers + @property + def axes(self) -> list[Index]: + """ + Return a list of the row axis labels. + """ + return [self.index] + + # ---------------------------------------------------------------------- + # Indexing Methods + + @Appender(NDFrame.take.__doc__) + def take( + self, indices, axis: Axis = 0, is_copy: bool | None = None, **kwargs + ) -> Series: + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=find_stack_level(), + ) + nv.validate_take((), kwargs) + + indices = ensure_platform_int(indices) + new_index = self.index.take(indices) + new_values = self._values.take(indices) + + result = self._constructor(new_values, index=new_index, fastpath=True) + return result.__finalize__(self, method="take") + + def _take_with_is_copy(self, indices, axis=0) -> Series: + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). For Series this does the same + as the public take (it never sets `_is_copy`). + + See the docstring of `take` for full explanation of the parameters. + """ + return self.take(indices=indices, axis=axis) + + def _ixs(self, i: int, axis: int = 0) -> Any: + """ + Return the i-th value or values in the Series by location. + + Parameters + ---------- + i : int + + Returns + ------- + scalar (int) or Series (slice, sequence) + """ + return self._values[i] + + def _slice(self, slobj: slice, axis: int = 0) -> Series: + # axis kwarg is retained for compat with NDFrame method + # _slice is *always* positional + return self._get_values(slobj) + + def __getitem__(self, key): + check_deprecated_indexers(key) + key = com.apply_if_callable(key, self) + + if key is Ellipsis: + return self + + key_is_scalar = is_scalar(key) + if isinstance(key, (list, tuple)): + key = unpack_1tuple(key) + + if is_integer(key) and self.index._should_fallback_to_positional: + return self._values[key] + + elif key_is_scalar: + return self._get_value(key) + + if is_hashable(key): + # Otherwise index.get_value will raise InvalidIndexError + try: + # For labels that don't resolve as scalars like tuples and frozensets + result = self._get_value(key) + + return result + + except (KeyError, TypeError, InvalidIndexError): + # InvalidIndexError for e.g. generator + # see test_series_getitem_corner_generator + if isinstance(key, tuple) and isinstance(self.index, MultiIndex): + # We still have the corner case where a tuple is a key + # in the first level of our MultiIndex + return self._get_values_tuple(key) + + if is_iterator(key): + key = list(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + key = np.asarray(key, dtype=bool) + return self._get_values(key) + + return self._get_with(key) + + def _get_with(self, key): + # other: fancy integer or otherwise + if isinstance(key, slice): + # _convert_slice_indexer to determine if this slice is positional + # or label based, and if the latter, convert to positional + slobj = self.index._convert_slice_indexer(key, kind="getitem") + return self._slice(slobj) + elif isinstance(key, ABCDataFrame): + raise TypeError( + "Indexing a Series with DataFrame is not " + "supported, use the appropriate DataFrame column" + ) + elif isinstance(key, tuple): + return self._get_values_tuple(key) + + elif not is_list_like(key): + # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684 + return self.loc[key] + + if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): + key = list(key) + + if isinstance(key, Index): + key_type = key.inferred_type + else: + key_type = lib.infer_dtype(key, skipna=False) + + # Note: The key_type == "boolean" case should be caught by the + # com.is_bool_indexer check in __getitem__ + if key_type == "integer": + # We need to decide whether to treat this as a positional indexer + # (i.e. self.iloc) or label-based (i.e. self.loc) + if not self.index._should_fallback_to_positional: + return self.loc[key] + else: + return self.iloc[key] + + # handle the dup indexing case GH#4246 + return self.loc[key] + + def _get_values_tuple(self, key: tuple): + # mpl hackaround + if com.any_none(*key): + # mpl compat if we look up e.g. ser[:, np.newaxis]; + # see tests.series.timeseries.test_mpl_compat_hack + # the asarray is needed to avoid returning a 2D DatetimeArray + result = np.asarray(self._values[key]) + deprecate_ndim_indexing(result, stacklevel=find_stack_level()) + return result + + if not isinstance(self.index, MultiIndex): + raise KeyError("key of type tuple not found and not a MultiIndex") + + # If key is contained, would have returned by now + indexer, new_index = self.index.get_loc_level(key) + return self._constructor(self._values[indexer], index=new_index).__finalize__( + self + ) + + def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: + new_mgr = self._mgr.getitem_mgr(indexer) + return self._constructor(new_mgr).__finalize__(self) + + def _get_value(self, label, takeable: bool = False): + """ + Quickly retrieve single value at passed index label. + + Parameters + ---------- + label : object + takeable : interpret the index as indexers, default False + + Returns + ------- + scalar value + """ + if takeable: + return self._values[label] + + # Similar to Index.get_value, but we do not fall back to positional + loc = self.index.get_loc(label) + return self.index._get_values_for_loc(self, loc, label) + + def __setitem__(self, key, value) -> None: + check_deprecated_indexers(key) + key = com.apply_if_callable(key, self) + cacher_needs_updating = self._check_is_chained_assignment_possible() + + if key is Ellipsis: + key = slice(None) + + if isinstance(key, slice): + indexer = self.index._convert_slice_indexer(key, kind="getitem") + return self._set_values(indexer, value) + + try: + self._set_with_engine(key, value) + except KeyError: + # We have a scalar (or for MultiIndex or object-dtype, scalar-like) + # key that is not present in self.index. + if is_integer(key) and self.index.inferred_type != "integer": + # positional setter + if not self.index._should_fallback_to_positional: + # GH#33469 + warnings.warn( + "Treating integers as positional in Series.__setitem__ " + "with a Float64Index is deprecated. In a future version, " + "`series[an_int] = val` will insert a new key into the " + "Series. Use `series.iloc[an_int] = val` to treat the " + "key as positional.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # can't use _mgr.setitem_inplace yet bc could have *both* + # KeyError and then ValueError, xref GH#45070 + self._set_values(key, value) + else: + # GH#12862 adding a new key to the Series + self.loc[key] = value + + except (TypeError, ValueError, LossySetitemError): + # The key was OK, but we cannot set the value losslessly + indexer = self.index.get_loc(key) + self._set_values(indexer, value) + + except InvalidIndexError as err: + if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): + # cases with MultiIndex don't get here bc they raise KeyError + # e.g. test_basic_getitem_setitem_corner + raise KeyError( + "key of type tuple not found and not a MultiIndex" + ) from err + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + key = np.asarray(key, dtype=bool) + + if ( + is_list_like(value) + and len(value) != len(self) + and not isinstance(value, Series) + and not is_object_dtype(self.dtype) + ): + # Series will be reindexed to have matching length inside + # _where call below + # GH#44265 + indexer = key.nonzero()[0] + self._set_values(indexer, value) + return + + # otherwise with listlike other we interpret series[mask] = other + # as series[mask] = other[mask] + try: + self._where(~key, value, inplace=True) + except InvalidIndexError: + # test_where_dups + self.iloc[key] = value + return + + else: + self._set_with(key, value) + + if cacher_needs_updating: + self._maybe_update_cacher(inplace=True) + + def _set_with_engine(self, key, value) -> None: + loc = self.index.get_loc(key) + + # this is equivalent to self._values[key] = value + self._mgr.setitem_inplace(loc, value) + + def _set_with(self, key, value): + # We got here via exception-handling off of InvalidIndexError, so + # key should always be listlike at this point. + assert not isinstance(key, tuple) + + if is_iterator(key): + # Without this, the call to infer_dtype will consume the generator + key = list(key) + + if not self.index._should_fallback_to_positional: + # Regardless of the key type, we're treating it as labels + self._set_labels(key, value) + + else: + # Note: key_type == "boolean" should not occur because that + # should be caught by the is_bool_indexer check in __setitem__ + key_type = lib.infer_dtype(key, skipna=False) + + if key_type == "integer": + self._set_values(key, value) + else: + self._set_labels(key, value) + + def _set_labels(self, key, value) -> None: + key = com.asarray_tuplesafe(key) + indexer: np.ndarray = self.index.get_indexer(key) + mask = indexer == -1 + if mask.any(): + raise KeyError(f"{key[mask]} not in index") + self._set_values(indexer, value) + + def _set_values(self, key, value) -> None: + if isinstance(key, (Index, Series)): + key = key._values + + self._mgr = self._mgr.setitem(indexer=key, value=value) + self._maybe_update_cacher() + + def _set_value(self, label, value, takeable: bool = False): + """ + Quickly set single value at passed label. + + If label is not contained, a new object is created with the label + placed at the end of the result index. + + Parameters + ---------- + label : object + Partial indexing with MultiIndex not allowed. + value : object + Scalar value. + takeable : interpret the index as indexers, default False + """ + if not takeable: + try: + loc = self.index.get_loc(label) + except KeyError: + # set using a non-recursive method + self.loc[label] = value + return + else: + loc = label + + self._set_values(loc, value) + + # ---------------------------------------------------------------------- + # Lookup Caching + + @property + def _is_cached(self) -> bool: + """Return boolean indicating if self is cached or not.""" + return getattr(self, "_cacher", None) is not None + + def _get_cacher(self): + """return my cacher or None""" + cacher = getattr(self, "_cacher", None) + if cacher is not None: + cacher = cacher[1]() + return cacher + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + if hasattr(self, "_cacher"): + del self._cacher + + def _set_as_cached(self, item, cacher) -> None: + """ + Set the _cacher attribute on the calling object with a weakref to + cacher. + """ + self._cacher = (item, weakref.ref(cacher)) + + def _clear_item_cache(self) -> None: + # no-op for Series + pass + + def _check_is_chained_assignment_possible(self) -> bool: + """ + See NDFrame._check_is_chained_assignment_possible.__doc__ + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(t="referent", force=True) + return True + return super()._check_is_chained_assignment_possible() + + def _maybe_update_cacher( + self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False + ) -> None: + """ + See NDFrame._maybe_update_cacher.__doc__ + """ + cacher = getattr(self, "_cacher", None) + if cacher is not None: + assert self.ndim == 1 + ref: DataFrame = cacher[1]() + + # we are trying to reference a dead referent, hence + # a copy + if ref is None: + del self._cacher + # for CoW, we never want to update the parent DataFrame cache + # if the Series changed, and always pop the cached item + elif ( + not ( + get_option("mode.copy_on_write") + and get_option("mode.data_manager") == "block" + ) + and len(self) == len(ref) + and self.name in ref.columns + ): + # GH#42530 self.name must be in ref.columns + # to ensure column still in dataframe + # otherwise, either self or ref has swapped in new arrays + ref._maybe_cache_changed(cacher[0], self, inplace=inplace) + else: + # GH#33675 we have swapped in a new array, so parent + # reference to self is now invalid + ref._item_cache.pop(cacher[0], None) + + super()._maybe_update_cacher( + clear=clear, verify_is_copy=verify_is_copy, inplace=inplace + ) + + # ---------------------------------------------------------------------- + # Unsorted + + @property + def _is_mixed_type(self): + return False + + def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: + """ + Repeat elements of a Series. + + Returns a new Series where each element of the current Series + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + Series. + axis : None + Unused. Parameter needed for compatibility with DataFrame. + + Returns + ------- + Series + Newly created Series with repeated elements. + + See Also + -------- + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + >>> s.repeat(2) + 0 a + 0 a + 1 b + 1 b + 2 c + 2 c + dtype: object + >>> s.repeat([1, 2, 3]) + 0 a + 1 b + 1 b + 2 c + 2 c + 2 c + dtype: object + """ + nv.validate_repeat((), {"axis": axis}) + new_index = self.index.repeat(repeats) + new_values = self._values.repeat(repeats) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="repeat" + ) + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: Literal[False] = ..., + name: Level = ..., + inplace: Literal[False] = ..., + allow_duplicates: bool = ..., + ) -> DataFrame: + ... + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: Literal[True], + name: Level = ..., + inplace: Literal[False] = ..., + allow_duplicates: bool = ..., + ) -> Series: + ... + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: bool = ..., + name: Level = ..., + inplace: Literal[True], + allow_duplicates: bool = ..., + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + def reset_index( + self, + level: IndexLabel = None, + drop: bool = False, + name: Level = lib.no_default, + inplace: bool = False, + allow_duplicates: bool = False, + ) -> DataFrame | Series | None: + """ + Generate a new DataFrame or Series with the index reset. + + This is useful when the index needs to be treated as a column, or + when the index is meaningless and needs to be reset to the default + before another operation. + + Parameters + ---------- + level : int, str, tuple, or list, default optional + For a Series with a MultiIndex, only remove the specified levels + from the index. Removes all levels by default. + drop : bool, default False + Just reset the index, without inserting it as a column in + the new DataFrame. + name : object, optional + The name to use for the column containing the original Series + values. Uses ``self.name`` by default. This argument is ignored + when `drop` is True. + inplace : bool, default False + Modify the Series in place (do not create a new object). + allow_duplicates : bool, default False + Allow duplicate column labels to be created. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series or DataFrame or None + When `drop` is False (the default), a DataFrame is returned. + The newly created columns will come first in the DataFrame, + followed by the original Series values. + When `drop` is True, a `Series` is returned. + In either case, if ``inplace=True``, no value is returned. + + See Also + -------- + DataFrame.reset_index: Analogous function for DataFrame. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], name='foo', + ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx')) + + Generate a DataFrame with default index. + + >>> s.reset_index() + idx foo + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To specify the name of the new column use `name`. + + >>> s.reset_index(name='values') + idx values + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To generate a new Series with the default set `drop` to True. + + >>> s.reset_index(drop=True) + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + To update the Series in place, without generating a new one + set `inplace` to True. Note that it also requires ``drop=True``. + + >>> s.reset_index(inplace=True, drop=True) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + The `level` parameter is interesting for Series with a multi-level + index. + + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), + ... np.array(['one', 'two', 'one', 'two'])] + >>> s2 = pd.Series( + ... range(4), name='foo', + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + + To remove a specific level from the Index, use `level`. + + >>> s2.reset_index(level='a') + a foo + b + one bar 0 + two bar 1 + one baz 2 + two baz 3 + + If `level` is not set, all levels are removed from the Index. + + >>> s2.reset_index() + a b foo + 0 bar one 0 + 1 bar two 1 + 2 baz one 2 + 3 baz two 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if drop: + new_index = default_index(len(self)) + if level is not None: + level_list: Sequence[Hashable] + if not isinstance(level, (tuple, list)): + level_list = [level] + else: + level_list = level + level_list = [self.index._get_level_number(lev) for lev in level_list] + if len(level_list) < self.index.nlevels: + new_index = self.index.droplevel(level_list) + + if inplace: + self.index = new_index + else: + return self._constructor( + self._values.copy(), index=new_index + ).__finalize__(self, method="reset_index") + elif inplace: + raise TypeError( + "Cannot reset_index inplace on a Series to create a DataFrame" + ) + else: + if name is lib.no_default: + # For backwards compatibility, keep columns as [0] instead of + # [None] when self.name is None + if self.name is None: + name = 0 + else: + name = self.name + + df = self.to_frame(name) + return df.reset_index( + level=level, drop=drop, allow_duplicates=allow_duplicates + ) + return None + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + """ + Return a string representation for a particular Series. + """ + repr_params = fmt.get_series_repr_params() + return self.to_string(**repr_params) + + @overload + def to_string( + self, + buf: None = ..., + na_rep: str = ..., + float_format: str | None = ..., + header: bool = ..., + index: bool = ..., + length=..., + dtype=..., + name=..., + max_rows: int | None = ..., + min_rows: int | None = ..., + ) -> str: + ... + + @overload + def to_string( + self, + buf: FilePath | WriteBuffer[str], + na_rep: str = ..., + float_format: str | None = ..., + header: bool = ..., + index: bool = ..., + length=..., + dtype=..., + name=..., + max_rows: int | None = ..., + min_rows: int | None = ..., + ) -> None: + ... + + def to_string( + self, + buf: FilePath | WriteBuffer[str] | None = None, + na_rep: str = "NaN", + float_format: str | None = None, + header: bool = True, + index: bool = True, + length=False, + dtype=False, + name=False, + max_rows: int | None = None, + min_rows: int | None = None, + ) -> str | None: + """ + Render a string representation of the Series. + + Parameters + ---------- + buf : StringIO-like, optional + Buffer to write to. + na_rep : str, optional + String representation of NaN to use, default 'NaN'. + float_format : one-parameter function, optional + Formatter function to apply to columns' elements if they are + floats, default None. + header : bool, default True + Add the Series header (index name). + index : bool, optional + Add index (row) labels, default True. + length : bool, default False + Add the Series length. + dtype : bool, default False + Add the Series dtype. + name : bool, default False + Add the Series name if not None. + max_rows : int, optional + Maximum number of rows to show before truncating. If None, show + all. + min_rows : int, optional + The number of rows to display in a truncated repr (when number + of rows is above `max_rows`). + + Returns + ------- + str or None + String representation of Series if ``buf=None``, otherwise None. + """ + formatter = fmt.SeriesFormatter( + self, + name=name, + length=length, + header=header, + index=index, + dtype=dtype, + na_rep=na_rep, + float_format=float_format, + min_rows=min_rows, + max_rows=max_rows, + ) + result = formatter.to_string() + + # catch contract violations + if not isinstance(result, str): + raise AssertionError( + "result must be of type str, type " + f"of result is {repr(type(result).__name__)}" + ) + + if buf is None: + return result + else: + if hasattr(buf, "write"): + # error: Item "str" of "Union[str, PathLike[str], WriteBuffer + # [str]]" has no attribute "write" + buf.write(result) # type: ignore[union-attr] + else: + # error: Argument 1 to "open" has incompatible type "Union[str, + # PathLike[str], WriteBuffer[str]]"; expected "Union[Union[str, + # bytes, PathLike[str], PathLike[bytes]], int]" + with open(buf, "w") as f: # type: ignore[arg-type] + f.write(result) + return None + + @doc( + klass=_shared_doc_kwargs["klass"], + storage_options=_shared_docs["storage_options"], + examples=dedent( + """Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+""" + ), + ) + def to_markdown( + self, + buf: IO[str] | None = None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, + ) -> str | None: + """ + Print {klass} in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + mode : str, optional + Mode in which file is opened, "wt" by default. + index : bool, optional, default True + Add index (row) labels. + + .. versionadded:: 1.1.0 + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs + These parameters will be passed to `tabulate \ + `_. + + Returns + ------- + str + {klass} in Markdown-friendly format. + + Notes + ----- + Requires the `tabulate `_ package. + + {examples} + """ + return self.to_frame().to_markdown( + buf, mode, index, storage_options=storage_options, **kwargs + ) + + # ---------------------------------------------------------------------- + + def items(self) -> Iterable[tuple[Hashable, Any]]: + """ + Lazily iterate over (index, value) tuples. + + This method returns an iterable tuple (index, value). This is + convenient if you want to create a lazy iterator. + + Returns + ------- + iterable + Iterable of tuples containing the (index, value) pairs from a + Series. + + See Also + -------- + DataFrame.items : Iterate over (column name, Series) pairs. + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'C']) + >>> for index, value in s.items(): + ... print(f"Index : {index}, Value : {value}") + Index : 0, Value : A + Index : 1, Value : B + Index : 2, Value : C + """ + return zip(iter(self.index), iter(self)) + + def iteritems(self) -> Iterable[tuple[Hashable, Any]]: + """ + Lazily iterate over (index, value) tuples. + + .. deprecated:: 1.5.0 + iteritems is deprecated and will be removed in a future version. + Use .items instead. + + This method returns an iterable tuple (index, value). This is + convenient if you want to create a lazy iterator. + + Returns + ------- + iterable + Iterable of tuples containing the (index, value) pairs from a + Series. + + See Also + -------- + Series.items : Recommended alternative. + DataFrame.items : Iterate over (column name, Series) pairs. + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs. + """ + warnings.warn( + "iteritems is deprecated and will be removed in a future version. " + "Use .items instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.items() + + # ---------------------------------------------------------------------- + # Misc public methods + + def keys(self) -> Index: + """ + Return alias for index. + + Returns + ------- + Index + Index of the Series. + """ + return self.index + + def to_dict(self, into: type[dict] = dict) -> dict: + """ + Convert Series to {label -> value} dict or dict-like object. + + Parameters + ---------- + into : class, default dict + The collections.abc.Mapping subclass to use as the return + object. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + Returns + ------- + collections.abc.Mapping + Key-value representation of Series. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {0: 1, 1: 2, 2: 3, 3: 4} + >>> from collections import OrderedDict, defaultdict + >>> s.to_dict(OrderedDict) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + >>> dd = defaultdict(list) + >>> s.to_dict(dd) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + """ + # GH16122 + into_c = com.standardize_mapping(into) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) + + def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: + """ + Convert Series to DataFrame. + + Parameters + ---------- + name : object, optional + The passed name should substitute for the series name (if it has + one). + + Returns + ------- + DataFrame + DataFrame representation of Series. + + Examples + -------- + >>> s = pd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c + """ + if name is None: + warnings.warn( + "Explicitly passing `name=None` currently preserves the Series' name " + "or uses a default name of 0. This behaviour is deprecated, and in " + "the future `None` will be used as the name of the resulting " + "DataFrame column.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = lib.no_default + + columns: Index + if name is lib.no_default: + name = self.name + if name is None: + # default to [0], same as we would get with DataFrame(self) + columns = default_index(1) + else: + columns = Index([name]) + else: + columns = Index([name]) + + mgr = self._mgr.to_2d_mgr(columns) + df = self._constructor_expanddim(mgr) + return df.__finalize__(self, method="to_frame") + + def _set_name(self, name, inplace=False) -> Series: + """ + Set the Series name. + + Parameters + ---------- + name : str + inplace : bool + Whether to modify `self` directly or return a copy. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + ser = self if inplace else self.copy() + ser.name = name + return ser + + @Appender( + """ +Examples +-------- +>>> ser = pd.Series([390., 350., 30., 20.], +... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed") +>>> ser +Falcon 390.0 +Falcon 350.0 +Parrot 30.0 +Parrot 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(["a", "b", "a", "b"]).mean() +a 210.0 +b 185.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(ser > 100).mean() +Max Speed +False 25.0 +True 370.0 +Name: Max Speed, dtype: float64 + +**Grouping by Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed") +>>> ser +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Animal +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level="Type").mean() +Type +Captive 210.0 +Wild 185.0 +Name: Max Speed, dtype: float64 + +We can also choose to include `NA` in group keys or not by defining +`dropna` parameter, the default setting is `True`. + +>>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan]) +>>> ser.groupby(level=0).sum() +a 3 +b 3 +dtype: int64 + +>>> ser.groupby(level=0, dropna=False).sum() +a 3 +b 3 +NaN 3 +dtype: int64 + +>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot'] +>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed") +>>> ser.groupby(["a", "b", "a", np.nan]).mean() +a 210.0 +b 350.0 +Name: Max Speed, dtype: float64 + +>>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean() +a 210.0 +b 350.0 +NaN 20.0 +Name: Max Speed, dtype: float64 +""" + ) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis: Axis = 0, + level: Level = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool | lib.NoDefault = no_default, + squeeze: bool | lib.NoDefault = no_default, + observed: bool = False, + dropna: bool = True, + ) -> SeriesGroupBy: + from pandas.core.groupby.generic import SeriesGroupBy + + if squeeze is not no_default: + warnings.warn( + ( + "The `squeeze` parameter is deprecated and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + squeeze = False + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + return SeriesGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + dropna=dropna, + ) + + # ---------------------------------------------------------------------- + # Statistics, overridden ndarray methods + + # TODO: integrate bottleneck + def count(self, level: Level = None): + """ + Return number of non-NA/null observations in the Series. + + Parameters + ---------- + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a smaller Series. + + Returns + ------- + int or Series (if level specified) + Number of non-null values in the Series. + + See Also + -------- + DataFrame.count : Count non-NA cells for each column or row. + + Examples + -------- + >>> s = pd.Series([0.0, 1.0, np.nan]) + >>> s.count() + 2 + """ + if level is None: + return notna(self._values).sum().astype("int64") + else: + warnings.warn( + "Using the level keyword in DataFrame and Series aggregations is " + "deprecated and will be removed in a future version. Use groupby " + "instead. ser.count(level=1) should use ser.groupby(level=1).count().", + FutureWarning, + stacklevel=find_stack_level(), + ) + if not isinstance(self.index, MultiIndex): + raise ValueError("Series.count level is only valid with a MultiIndex") + + index = self.index + assert isinstance(index, MultiIndex) # for mypy + + if isinstance(level, str): + level = index._get_level_number(level) + + lev = index.levels[level] + level_codes = np.array(index.codes[level], subok=False, copy=True) + + mask = level_codes == -1 + if mask.any(): + level_codes[mask] = cnt = len(lev) + lev = lev.insert(cnt, lev._na_value) + + obs = level_codes[notna(self._values)] + # error: Argument "minlength" to "bincount" has incompatible type + # "Optional[int]"; expected "SupportsIndex" + out = np.bincount(obs, minlength=len(lev) or None) # type: ignore[arg-type] + return self._constructor(out, index=lev, dtype="int64").__finalize__( + self, method="count" + ) + + def mode(self, dropna: bool = True) -> Series: + """ + Return the mode(s) of the Series. + + The mode is the value that appears most often. There can be multiple modes. + + Always returns Series even if only one value is returned. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + Returns + ------- + Series + Modes of the Series in sorted order. + """ + # TODO: Add option for bins like value_counts() + values = self._values + if isinstance(values, np.ndarray): + res_values = algorithms.mode(values, dropna=dropna) + else: + res_values = values._mode(dropna=dropna) + + # Ensure index is type stable (should always use int index) + return self._constructor( + res_values, index=range(len(res_values)), name=self.name + ) + + def unique(self) -> ArrayLike: + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + Returns + ------- + ndarray or ExtensionArray + The unique values returned as a NumPy array. See Notes. + + See Also + -------- + Series.drop_duplicates : Return Series with duplicate values removed. + unique : Top-level unique method for any 1-d array-like object. + Index.unique : Return Index with unique values from an Index object. + + Notes + ----- + Returns the unique values as a NumPy array. In case of an + extension-array backed Series, a new + :class:`~api.extensions.ExtensionArray` of that type with just + the unique values is returned. This includes + + * Categorical + * Period + * Datetime with Timezone + * Interval + * Sparse + * IntegerNA + + See Examples section. + + Examples + -------- + >>> pd.Series([2, 1, 3, 3], name='A').unique() + array([2, 1, 3]) + + >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') + ... for _ in range(3)]).unique() + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + An Categorical will return categories in the order of + appearance and with the same dtype. + + >>> pd.Series(pd.Categorical(list('baabc'))).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), + ... ordered=True)).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + """ + return super().unique() + + @overload + def drop_duplicates( + self, + keep: Literal["first", "last", False] = ..., + *, + inplace: Literal[False] = ..., + ) -> Series: + ... + + @overload + def drop_duplicates( + self, keep: Literal["first", "last", False] = ..., *, inplace: Literal[True] + ) -> None: + ... + + @overload + def drop_duplicates( + self, keep: Literal["first", "last", False] = ..., *, inplace: bool = ... + ) -> Series | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates( + self, keep: Literal["first", "last", False] = "first", inplace=False + ) -> Series | None: + """ + Return Series with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + inplace : bool, default ``False`` + If ``True``, performs operation inplace and returns None. + + Returns + ------- + Series or None + Series with duplicates dropped or None if ``inplace=True``. + + See Also + -------- + Index.drop_duplicates : Equivalent method on Index. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Series.duplicated : Related method on Series, indicating duplicate + Series values. + Series.unique : Return unique values as an array. + + Examples + -------- + Generate a Series with duplicated entries. + + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + ... name='animal') + >>> s + 0 lama + 1 cow + 2 lama + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 lama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + The value ``False`` for parameter 'keep' discards all sets of + duplicated entries. Setting the value of 'inplace' to ``True`` performs + the operation inplace and returns ``None``. + + >>> s.drop_duplicates(keep=False, inplace=True) + >>> s + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + result = super().drop_duplicates(keep=keep) + if inplace: + self._update_inplace(result) + return None + else: + return result + + def duplicated(self, keep: Literal["first", "last", False] = "first") -> Series: + """ + Indicate duplicate Series values. + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + Series[bool] + Series indicating whether each value has occurred in the + preceding values. + + See Also + -------- + Index.duplicated : Equivalent method on pandas.Index. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Series.drop_duplicates : Remove duplicate values from Series. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: bool + """ + res = self._duplicated(keep=keep) + result = self._constructor(res, index=self.index) + return result.__finalize__(self, method="duplicated") + + def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: + """ + Return the row label of the minimum value. + + If multiple values equal the minimum, the first row label with that + value is returned. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the minimum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmin : Return indices of the minimum values + along the given axis. + DataFrame.idxmin : Return index of first occurrence of minimum + over requested axis. + Series.idxmax : Return index *label* of the first occurrence + of maximum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmin``. This method + returns the label of the minimum, while ``ndarray.argmin`` returns + the position. To get the position, use ``series.values.argmin()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B NaN + C 4.0 + D 1.0 + dtype: float64 + + >>> s.idxmin() + 'A' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmin(skipna=False) + nan + """ + i = self.argmin(axis, skipna, *args, **kwargs) + if i == -1: + return np.nan + return self.index[i] + + def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: + """ + Return the row label of the maximum value. + + If multiple values equal the maximum, the first row label with that + value is returned. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the maximum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmax : Return indices of the maximum values + along the given axis. + DataFrame.idxmax : Return index of first occurrence of maximum + over requested axis. + Series.idxmin : Return index *label* of the first occurrence + of minimum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmax``. This method + returns the label of the maximum, while ``ndarray.argmax`` returns + the position. To get the position, use ``series.values.argmax()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B NaN + C 4.0 + D 3.0 + E 4.0 + dtype: float64 + + >>> s.idxmax() + 'C' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmax(skipna=False) + nan + """ + i = self.argmax(axis, skipna, *args, **kwargs) + if i == -1: + return np.nan + return self.index[i] + + def round(self, decimals: int = 0, *args, **kwargs) -> Series: + """ + Round each value in a Series to the given number of decimals. + + Parameters + ---------- + decimals : int, default 0 + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Series + Rounded values of the Series. + + See Also + -------- + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + + Examples + -------- + >>> s = pd.Series([0.1, 1.3, 2.7]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: float64 + """ + nv.validate_round(args, kwargs) + result = self._values.round(decimals) + result = self._constructor(result, index=self.index).__finalize__( + self, method="round" + ) + + return result + + @overload + def quantile( + self, q: float = ..., interpolation: QuantileInterpolation = ... + ) -> float: + ... + + @overload + def quantile( + self, + q: Sequence[float] | AnyArrayLike, + interpolation: QuantileInterpolation = ..., + ) -> Series: + ... + + @overload + def quantile( + self, + q: float | Sequence[float] | AnyArrayLike = ..., + interpolation: QuantileInterpolation = ..., + ) -> float | Series: + ... + + def quantile( + self, + q: float | Sequence[float] | AnyArrayLike = 0.5, + interpolation: QuantileInterpolation = "linear", + ) -> float | Series: + """ + Return value at the given quantile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + The quantile(s) to compute, which can lie in range: 0 <= q <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + + Returns + ------- + float or Series + If ``q`` is an array, a Series will be returned where the + index is ``q`` and the values are the quantiles, otherwise + a float will be returned. + + See Also + -------- + core.window.Rolling.quantile : Calculate the rolling quantile. + numpy.percentile : Returns the q-th percentile(s) of the array elements. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.quantile(.5) + 2.5 + >>> s.quantile([.25, .5, .75]) + 0.25 1.75 + 0.50 2.50 + 0.75 3.25 + dtype: float64 + """ + validate_percentile(q) + + # We dispatch to DataFrame so that core.internals only has to worry + # about 2D cases. + df = self.to_frame() + + result = df.quantile(q=q, interpolation=interpolation, numeric_only=False) + if result.ndim == 2: + result = result.iloc[:, 0] + + if is_list_like(q): + result.name = self.name + return self._constructor(result, index=Float64Index(q), name=self.name) + else: + # scalar + return result.iloc[0] + + def corr( + self, + other: Series, + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", + min_periods: int | None = None, + ) -> float: + """ + Compute correlation with `other` Series, excluding missing values. + + The two `Series` objects are not required to be the same length and will be + aligned internally before the correlation function is applied. + + Parameters + ---------- + other : Series + Series with which to compute the correlation. + method : {'pearson', 'kendall', 'spearman'} or callable + Method used to compute correlation: + + - pearson : Standard correlation coefficient + - kendall : Kendall Tau correlation coefficient + - spearman : Spearman rank correlation + - callable: Callable with input two 1d ndarrays and returning a float. + + .. warning:: + Note that the returned matrix from corr will have 1 along the + diagonals and will be symmetric regardless of the callable's + behavior. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + + Returns + ------- + float + Correlation with other. + + See Also + -------- + DataFrame.corr : Compute pairwise correlation between columns. + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + + Notes + ----- + Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations. + + * `Pearson correlation coefficient `_ + * `Kendall rank correlation coefficient `_ + * `Spearman's rank correlation coefficient `_ + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> s1 = pd.Series([.2, .0, .6, .2]) + >>> s2 = pd.Series([.3, .6, .0, .1]) + >>> s1.corr(s2, method=histogram_intersection) + 0.3 + """ # noqa:E501 + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + + if method in ["pearson", "spearman", "kendall"] or callable(method): + return nanops.nancorr( + this.values, other.values, method=method, min_periods=min_periods + ) + + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + def cov( + self, + other: Series, + min_periods: int | None = None, + ddof: int | None = 1, + ) -> float: + """ + Compute covariance with Series, excluding missing values. + + The two `Series` objects are not required to be the same length and + will be aligned internally before the covariance is calculated. + + Parameters + ---------- + other : Series + Series with which to compute the covariance. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + .. versionadded:: 1.1.0 + + Returns + ------- + float + Covariance between Series and other normalized by N-1 + (unbiased estimator). + + See Also + -------- + DataFrame.cov : Compute pairwise covariance of columns. + + Examples + -------- + >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + -0.01685762652715874 + """ + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + return nanops.nancov( + this.values, other.values, min_periods=min_periods, ddof=ddof + ) + + @doc( + klass="Series", + extra_params="", + other_klass="DataFrame", + examples=dedent( + """ + Difference with previous row + + >>> s = pd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 NaN + 1 0.0 + 2 1.0 + 3 1.0 + 4 2.0 + 5 3.0 + dtype: float64 + + Difference with 3rd previous row + + >>> s.diff(periods=3) + 0 NaN + 1 NaN + 2 NaN + 3 2.0 + 4 4.0 + 5 6.0 + dtype: float64 + + Difference with following row + + >>> s.diff(periods=-1) + 0 0.0 + 1 -1.0 + 2 -1.0 + 3 -2.0 + 4 -3.0 + 5 NaN + dtype: float64 + + Overflow in input dtype + + >>> s = pd.Series([1, 0], dtype=np.uint8) + >>> s.diff() + 0 NaN + 1 255.0 + dtype: float64""" + ), + ) + def diff(self, periods: int = 1) -> Series: + """ + First discrete difference of element. + + Calculates the difference of a {klass} element compared with another + element in the {klass} (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + {extra_params} + Returns + ------- + {klass} + First differences of the Series. + + See Also + -------- + {klass}.pct_change: Percent change over given number of periods. + {klass}.shift: Shift index by desired number of periods with an + optional time freq. + {other_klass}.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in {klass}, + however dtype of the result is always float64. + + Examples + -------- + {examples} + """ + result = algorithms.diff(self._values, periods) + return self._constructor(result, index=self.index).__finalize__( + self, method="diff" + ) + + def autocorr(self, lag: int = 1) -> float: + """ + Compute the lag-N autocorrelation. + + This method computes the Pearson correlation between + the Series and its shifted self. + + Parameters + ---------- + lag : int, default 1 + Number of lags to apply before performing autocorrelation. + + Returns + ------- + float + The Pearson correlation between self and self.shift(lag). + + See Also + -------- + Series.corr : Compute the correlation between two Series. + Series.shift : Shift index by desired number of periods. + DataFrame.corr : Compute pairwise correlation of columns. + DataFrame.corrwith : Compute pairwise correlation between rows or + columns of two DataFrame objects. + + Notes + ----- + If the Pearson correlation is not well defined return 'NaN'. + + Examples + -------- + >>> s = pd.Series([0.25, 0.5, 0.2, -0.05]) + >>> s.autocorr() # doctest: +ELLIPSIS + 0.10355... + >>> s.autocorr(lag=2) # doctest: +ELLIPSIS + -0.99999... + + If the Pearson correlation is not well defined, then 'NaN' is returned. + + >>> s = pd.Series([1, 0, 0, 0]) + >>> s.autocorr() + nan + """ + return self.corr(self.shift(lag)) + + def dot(self, other: AnyArrayLike) -> Series | np.ndarray: + """ + Compute the dot product between the Series and the columns of other. + + This method computes the dot product between the Series and another + one, or the Series and each columns of a DataFrame, or the Series and + each columns of an array. + + It can also be called using `self @ other` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the dot product with its columns. + + Returns + ------- + scalar, Series or numpy.ndarray + Return the dot product of the Series and other if other is a + Series, the Series of the dot product of Series and each rows of + other if other is a DataFrame or a numpy.ndarray between the Series + and each columns of the numpy array. + + See Also + -------- + DataFrame.dot: Compute the matrix product with the DataFrame. + Series.mul: Multiplication of series and other, element-wise. + + Notes + ----- + The Series and other has to share the same index if other is a Series + or a DataFrame. + + Examples + -------- + >>> s = pd.Series([0, 1, 2, 3]) + >>> other = pd.Series([-1, 2, -3, 4]) + >>> s.dot(other) + 8 + >>> s @ other + 8 + >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(df) + 0 24 + 1 14 + dtype: int64 + >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(arr) + array([24, 14]) + """ + if isinstance(other, (Series, ABCDataFrame)): + common = self.index.union(other.index) + if len(common) > len(self.index) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(index=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right.values + else: + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[0] != rvals.shape[0]: + raise Exception( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, ABCDataFrame): + return self._constructor( + np.dot(lvals, rvals), index=other.columns + ).__finalize__(self, method="dot") + elif isinstance(other, Series): + return np.dot(lvals, rvals) + elif isinstance(rvals, np.ndarray): + return np.dot(lvals, rvals) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + def __matmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(np.transpose(other)) + + @doc(base.IndexOpsMixin.searchsorted, klass="Series") + # Signature of "searchsorted" incompatible with supertype "IndexOpsMixin" + def searchsorted( # type: ignore[override] + self, + value: NumpyValueArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp] | np.intp: + return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter) + + # ------------------------------------------------------------------- + # Combination + + def append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ) -> Series: + """ + Concatenate two or more Series. + + .. deprecated:: 1.4.0 + Use :func:`concat` instead. For further details see + :ref:`whatsnew_140.deprecations.frame_series_append` + + Parameters + ---------- + to_append : Series or list/tuple of Series + Series to append with self. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + verify_integrity : bool, default False + If True, raise Exception on creating index with duplicates. + + Returns + ------- + Series + Concatenated Series. + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + Iteratively appending to a Series can be more computationally intensive + than a single concatenate. A better solution is to append values to a + list and then concatenate the list with the original Series all at + once. + + Examples + -------- + >>> s1 = pd.Series([1, 2, 3]) + >>> s2 = pd.Series([4, 5, 6]) + >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) + >>> s1.append(s2) + 0 1 + 1 2 + 2 3 + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s1.append(s3) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `ignore_index` set to True: + + >>> s1.append(s2, ignore_index=True) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `verify_integrity` set to True: + + >>> s1.append(s2, verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: [0, 1, 2] + """ + warnings.warn( + "The series.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.concat instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._append(to_append, ignore_index, verify_integrity) + + def _append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ): + from pandas.core.reshape.concat import concat + + if isinstance(to_append, (list, tuple)): + to_concat = [self] + to_concat.extend(to_append) + else: + to_concat = [self, to_append] + if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): + msg = "to_append should be a Series or list/tuple of Series, got DataFrame" + raise TypeError(msg) + return concat( + to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity + ) + + def _binop(self, other: Series, func, level=None, fill_value=None): + """ + Perform generic binary operation with optional fill value. + + Parameters + ---------- + other : Series + func : binary operator + fill_value : float or object + Value to substitute for NA/null values. If both Series are NA in a + location, the result will be NA regardless of the passed fill value. + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + + Returns + ------- + Series + """ + if not isinstance(other, Series): + raise AssertionError("Other operand must be Series") + + this = self + + if not self.index.equals(other.index): + this, other = self.align(other, level=level, join="outer", copy=False) + + this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value) + + with np.errstate(all="ignore"): + result = func(this_vals, other_vals) + + name = ops.get_op_result_name(self, other) + return this._construct_result(result, name) + + def _construct_result( + self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + ) -> Series | tuple[Series, Series]: + """ + Construct an appropriately-labelled Series from the result of an op. + + Parameters + ---------- + result : ndarray or ExtensionArray + name : Label + + Returns + ------- + Series + In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. + """ + if isinstance(result, tuple): + # produced by divmod or rdivmod + + res1 = self._construct_result(result[0], name=name) + res2 = self._construct_result(result[1], name=name) + + # GH#33427 assertions to keep mypy happy + assert isinstance(res1, Series) + assert isinstance(res2, Series) + return (res1, res2) + + # We do not pass dtype to ensure that the Series constructor + # does inference in the case where `result` has object-dtype. + out = self._constructor(result, index=self.index) + out = out.__finalize__(self) + + # Set the result's name after __finalize__ is called because __finalize__ + # would set it back to self.name + out.name = name + return out + + @doc( + _shared_docs["compare"], + """ +Returns +------- +Series or DataFrame + If axis is 0 or 'index' the result will be a Series. + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + + If axis is 1 or 'columns' the result will be a DataFrame. + It will have two columns namely 'self' and 'other'. + +See Also +-------- +DataFrame.compare : Compare with another DataFrame and show differences. + +Notes +----- +Matching NaNs will not appear as a difference. + +Examples +-------- +>>> s1 = pd.Series(["a", "b", "c", "d", "e"]) +>>> s2 = pd.Series(["a", "a", "c", "b", "e"]) + +Align the differences on columns + +>>> s1.compare(s2) + self other +1 b a +3 d b + +Stack the differences on indices + +>>> s1.compare(s2, align_axis=0) +1 self b + other a +3 self d + other b +dtype: object + +Keep all original rows + +>>> s1.compare(s2, keep_shape=True) + self other +0 NaN NaN +1 b a +2 NaN NaN +3 d b +4 NaN NaN + +Keep all original rows and also all original values + +>>> s1.compare(s2, keep_shape=True, keep_equal=True) + self other +0 a a +1 b a +2 c c +3 d b +4 e e +""", + klass=_shared_doc_kwargs["klass"], + ) + def compare( + self, + other: Series, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame | Series: + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + result_names=result_names, + ) + + def combine( + self, + other: Series | Hashable, + func: Callable[[Hashable, Hashable], Hashable], + fill_value: Hashable = None, + ) -> Series: + """ + Combine the Series with a Series or scalar according to `func`. + + Combine the Series and `other` using `func` to perform elementwise + selection for combined Series. + `fill_value` is assumed when value is missing at some index + from one of the two objects being combined. + + Parameters + ---------- + other : Series or scalar + The value(s) to be combined with the `Series`. + func : function + Function that takes two scalars as inputs and returns an element. + fill_value : scalar, optional + The value to assume when an index is missing from + one Series or the other. The default specifies to use the + appropriate NaN value for the underlying dtype of the Series. + + Returns + ------- + Series + The result of combining the Series with the other object. + + See Also + -------- + Series.combine_first : Combine Series values, choosing the calling + Series' values first. + + Examples + -------- + Consider 2 Datasets ``s1`` and ``s2`` containing + highest clocked speeds of different birds. + + >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0}) + >>> s1 + falcon 330.0 + eagle 160.0 + dtype: float64 + >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0}) + >>> s2 + falcon 345.0 + eagle 200.0 + duck 30.0 + dtype: float64 + + Now, to combine the two datasets and view the highest speeds + of the birds across the two datasets + + >>> s1.combine(s2, max) + duck NaN + eagle 200.0 + falcon 345.0 + dtype: float64 + + In the previous example, the resulting value for duck is missing, + because the maximum of a NaN and a float is a NaN. + So, in the example, we set ``fill_value=0``, + so the maximum value returned will be the value from some dataset. + + >>> s1.combine(s2, max, fill_value=0) + duck 30.0 + eagle 200.0 + falcon 345.0 + dtype: float64 + """ + if fill_value is None: + fill_value = na_value_for_dtype(self.dtype, compat=False) + + if isinstance(other, Series): + # If other is a Series, result is based on union of Series, + # so do this element by element + new_index = self.index.union(other.index) + new_name = ops.get_op_result_name(self, other) + new_values = np.empty(len(new_index), dtype=object) + for i, idx in enumerate(new_index): + lv = self.get(idx, fill_value) + rv = other.get(idx, fill_value) + with np.errstate(all="ignore"): + new_values[i] = func(lv, rv) + else: + # Assume that other is a scalar, so apply the function for + # each element in the Series + new_index = self.index + new_values = np.empty(len(new_index), dtype=object) + with np.errstate(all="ignore"): + new_values[:] = [func(lv, other) for lv in self._values] + new_name = self.name + + # try_float=False is to match agg_series + npvalues = lib.maybe_convert_objects(new_values, try_float=False) + res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) + return self._constructor(res_values, index=new_index, name=new_name) + + def combine_first(self, other) -> Series: + """ + Update null elements with value in the same location in 'other'. + + Combine two Series objects by filling null values in one Series with + non-null values from the other Series. Result index will be the union + of the two indexes. + + Parameters + ---------- + other : Series + The value(s) to be used for filling null values. + + Returns + ------- + Series + The result of combining the provided Series with the other object. + + See Also + -------- + Series.combine : Perform element-wise operation on two Series + using a given function. + + Examples + -------- + >>> s1 = pd.Series([1, np.nan]) + >>> s2 = pd.Series([3, 4, 5]) + >>> s1.combine_first(s2) + 0 1.0 + 1 4.0 + 2 5.0 + dtype: float64 + + Null values still persist if the location of that null value + does not exist in `other` + + >>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0}) + >>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0}) + >>> s1.combine_first(s2) + duck 30.0 + eagle 160.0 + falcon NaN + dtype: float64 + """ + new_index = self.index.union(other.index) + this = self.reindex(new_index, copy=False) + other = other.reindex(new_index, copy=False) + if this.dtype.kind == "M" and other.dtype.kind != "M": + other = to_datetime(other) + + return this.where(notna(this), other) + + def update(self, other: Series | Sequence | Mapping) -> None: + """ + Modify Series in place using values from passed Series. + + Uses non-NA values from passed Series to make updates. Aligns + on index. + + Parameters + ---------- + other : Series, or object coercible into Series + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s = pd.Series(['a', 'b', 'c']) + >>> s.update(pd.Series(['d', 'e'], index=[0, 2])) + >>> s + 0 d + 1 b + 2 e + dtype: object + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6, 7, 8])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + If ``other`` contains NaNs the corresponding values are not updated + in the original Series. + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, np.nan, 6])) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + + ``other`` can also be a non-Series object type + that is coercible into a Series + + >>> s = pd.Series([1, 2, 3]) + >>> s.update([4, np.nan, 6]) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + + >>> s = pd.Series([1, 2, 3]) + >>> s.update({1: 9}) + >>> s + 0 1 + 1 9 + 2 3 + dtype: int64 + """ + + if not isinstance(other, Series): + other = Series(other) + + other = other.reindex_like(self) + mask = notna(other) + + self._mgr = self._mgr.putmask(mask=mask, new=other) + self._maybe_update_cacher() + + # ---------------------------------------------------------------------- + # Reindexing, sorting + + # error: Signature of "sort_values" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def sort_values( + self, + *, + axis: Axis = ..., + ascending: bool | int | Sequence[bool] | Sequence[int] = ..., + inplace: Literal[False] = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + key: ValueKeyFunc = ..., + ) -> Series: + ... + + @overload + def sort_values( + self, + *, + axis: Axis = ..., + ascending: bool | int | Sequence[bool] | Sequence[int] = ..., + inplace: Literal[True], + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + key: ValueKeyFunc = ..., + ) -> None: + ... + + # error: Signature of "sort_values" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_values( # type: ignore[override] + self, + axis: Axis = 0, + ascending: bool | int | Sequence[bool] | Sequence[int] = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, + key: ValueKeyFunc = None, + ) -> Series | None: + """ + Sort by the values. + + Sort a Series in ascending or descending order by some + criterion. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + ascending : bool or list of bools, default True + If True, sort values in ascending order, otherwise descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the series values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect a + ``Series`` and return an array-like. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or None + Series ordered by values or None if ``inplace=True``. + + See Also + -------- + Series.sort_index : Sort by the Series indices. + DataFrame.sort_values : Sort DataFrame by the values along either axis. + DataFrame.sort_index : Sort DataFrame by indices. + + Examples + -------- + >>> s = pd.Series([np.nan, 1, 3, 10, 5]) + >>> s + 0 NaN + 1 1.0 + 2 3.0 + 3 10.0 + 4 5.0 + dtype: float64 + + Sort values ascending order (default behaviour) + + >>> s.sort_values(ascending=True) + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + 0 NaN + dtype: float64 + + Sort values descending order + + >>> s.sort_values(ascending=False) + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values inplace + + >>> s.sort_values(ascending=False, inplace=True) + >>> s + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values putting NAs first + + >>> s.sort_values(na_position='first') + 0 NaN + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + dtype: float64 + + Sort a series of strings + + >>> s = pd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s + 0 z + 1 b + 2 d + 3 a + 4 c + dtype: object + + >>> s.sort_values() + 3 a + 1 b + 4 c + 2 d + 0 z + dtype: object + + Sort using a key function. Your `key` function will be + given the ``Series`` of values and should return an array-like. + + >>> s = pd.Series(['a', 'B', 'c', 'D', 'e']) + >>> s.sort_values() + 1 B + 3 D + 0 a + 2 c + 4 e + dtype: object + >>> s.sort_values(key=lambda x: x.str.lower()) + 0 a + 1 B + 2 c + 3 D + 4 e + dtype: object + + NumPy ufuncs work well here. For example, we can + sort by the ``sin`` of the value + + >>> s = pd.Series([-4, -2, 0, 2, 4]) + >>> s.sort_values(key=np.sin) + 1 -2 + 4 4 + 2 0 + 0 -4 + 3 2 + dtype: int64 + + More complicated user-defined functions can be used, + as long as they expect a Series and return an array-like + + >>> s.sort_values(key=lambda x: (np.tan(x.cumsum()))) + 0 -4 + 3 2 + 4 4 + 1 -2 + 2 0 + dtype: int64 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis) + + # GH 5856/5853 + if inplace and self._is_cached: + raise ValueError( + "This Series is a view of some other array, to " + "sort in-place you must create a copy" + ) + + if is_list_like(ascending): + ascending = cast(Sequence[Union[bool, int]], ascending) + if len(ascending) != 1: + raise ValueError( + f"Length of ascending ({len(ascending)}) must be 1 for Series" + ) + ascending = ascending[0] + + ascending = validate_ascending(ascending) + + if na_position not in ["first", "last"]: + raise ValueError(f"invalid na_position: {na_position}") + + # GH 35922. Make sorting stable by leveraging nargsort + values_to_sort = ensure_key_mapped(self, key)._values if key else self._values + sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) + + result = self._constructor( + self._values[sorted_index], index=self.index[sorted_index] + ) + + if ignore_index: + result.index = default_index(len(sorted_index)) + + if not inplace: + return result.__finalize__(self, method="sort_values") + self._update_inplace(result) + return None + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: Literal[True], + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> None: + ... + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: Literal[False] = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> Series: + ... + + @overload + def sort_index( + self, + *, + axis: Axis = ..., + level: IndexLabel = ..., + ascending: bool | Sequence[bool] = ..., + inplace: bool = ..., + kind: SortKind = ..., + na_position: NaPosition = ..., + sort_remaining: bool = ..., + ignore_index: bool = ..., + key: IndexKeyFunc = ..., + ) -> Series | None: + ... + + # error: Signature of "sort_index" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def sort_index( # type: ignore[override] + self, + axis: Axis = 0, + level: IndexLabel = None, + ascending: bool | Sequence[bool] = True, + inplace: bool = False, + kind: SortKind = "quicksort", + na_position: NaPosition = "last", + sort_remaining: bool = True, + ignore_index: bool = False, + key: IndexKeyFunc = None, + ) -> Series | None: + """ + Sort Series by index labels. + + Returns a new Series sorted by label if `inplace` argument is + ``False``, otherwise updates the original series and returns None. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + level : int, optional + If not None, sort on values in specified index level(s). + ascending : bool or list-like of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series or None + The original Series sorted by the labels or None if ``inplace=True``. + + See Also + -------- + DataFrame.sort_index: Sort DataFrame by the index. + DataFrame.sort_values: Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: object + + Sort Descending + + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: object + + Sort Inplace + + >>> s.sort_index(inplace=True) + >>> s + 1 c + 2 b + 3 a + 4 d + dtype: object + + By default NaNs are put at the end, but use `na_position` to place + them at the beginning + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position='first') + NaN d + 1.0 c + 2.0 b + 3.0 a + dtype: object + + Specify index level to sort + + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1) + bar one 8 + baz one 6 + foo one 4 + qux one 2 + bar two 7 + baz two 5 + foo two 3 + qux two 1 + dtype: int64 + + Does not sort by remaining levels when sorting by levels + + >>> s.sort_index(level=1, sort_remaining=False) + qux one 2 + foo one 4 + baz one 6 + bar one 8 + qux two 1 + foo two 3 + baz two 5 + bar two 7 + dtype: int64 + + Apply a key function before sorting + + >>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd']) + >>> s.sort_index(key=lambda x : x.str.lower()) + A 1 + b 2 + C 3 + d 4 + dtype: int64 + """ + + return super().sort_index( + axis=axis, + level=level, + ascending=ascending, + inplace=inplace, + kind=kind, + na_position=na_position, + sort_remaining=sort_remaining, + ignore_index=ignore_index, + key=key, + ) + + def argsort( + self, + axis: Axis = 0, + kind: SortKind = "quicksort", + order: None = None, + ) -> Series: + """ + Return the integer indices that would sort the Series values. + + Override ndarray.argsort. Argsorts the value, omitting NA/null values, + and places the result in the same locations as the non-NA values. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort' + Choice of sorting algorithm. See :func:`numpy.sort` for more + information. 'mergesort' and 'stable' are the only stable algorithms. + order : None + Has no effect but is accepted for compatibility with numpy. + + Returns + ------- + Series[np.intp] + Positions of values within the sort order with -1 indicating + nan values. + + See Also + -------- + numpy.ndarray.argsort : Returns the indices that would sort this array. + """ + values = self._values + mask = isna(values) + + if mask.any(): + result = np.full(len(self), -1, dtype=np.intp) + notmask = ~mask + result[notmask] = np.argsort(values[notmask], kind=kind) + else: + result = np.argsort(values, kind=kind) + + res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) + return res.__finalize__(self, method="argsort") + + def nlargest( + self, n: int = 5, keep: Literal["first", "last", "all"] = "first" + ) -> Series: + """ + Return the largest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` largest values in the Series, sorted in decreasing order. + + See Also + -------- + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values(ascending=False).head(n)`` for small `n` + relative to the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() + + def nsmallest(self, n: int = 5, keep: str = "first") -> Series: + """ + Return the smallest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` smallest values in the Series, sorted in increasing order. + + See Also + -------- + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values().head(n)`` for small `n` relative to + the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` smallest elements where ``n=5`` by default. + + >>> s.nsmallest() + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep='last') + Montserrat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() + + @doc( + klass=_shared_doc_kwargs["klass"], + extra_params=dedent( + """copy : bool, default True + Whether to copy underlying data.""" + ), + examples=dedent( + """\ + Examples + -------- + >>> s = pd.Series( + ... ["A", "B", "A", "C"], + ... index=[ + ... ["Final exam", "Final exam", "Coursework", "Coursework"], + ... ["History", "Geography", "History", "Geography"], + ... ["January", "February", "March", "April"], + ... ], + ... ) + >>> s + Final exam History January A + Geography February B + Coursework History March A + Geography April C + dtype: object + + In the following example, we will swap the levels of the indices. + Here, we will swap the levels column-wise, but levels can be swapped row-wise + in a similar manner. Note that column-wise is the default behaviour. + By not supplying any arguments for i and j, we swap the last and second to + last indices. + + >>> s.swaplevel() + Final exam January History A + February Geography B + Coursework March History A + April Geography C + dtype: object + + By supplying one argument, we can choose which index to swap the last + index with. We can for example swap the first index with the last one as + follows. + + >>> s.swaplevel(0) + January History Final exam A + February Geography Final exam B + March History Coursework A + April Geography Coursework C + dtype: object + + We can also define explicitly which indices we want to swap by supplying values + for both i and j. Here, we for example swap the first and second indices. + + >>> s.swaplevel(0, 1) + History Final exam January A + Geography Final exam February B + History Coursework March A + Geography Coursework April C + dtype: object""" + ), + ) + def swaplevel(self, i: Level = -2, j: Level = -1, copy: bool = True) -> Series: + """ + Swap levels i and j in a :class:`MultiIndex`. + + Default is to swap the two innermost levels of the index. + + Parameters + ---------- + i, j : int or str + Levels of the indices to be swapped. Can pass level name as string. + {extra_params} + + Returns + ------- + {klass} + {klass} with levels swapped in MultiIndex. + + {examples} + """ + assert isinstance(self.index, MultiIndex) + new_index = self.index.swaplevel(i, j) + return self._constructor(self._values, index=new_index, copy=copy).__finalize__( + self, method="swaplevel" + ) + + def reorder_levels(self, order: Sequence[Level]) -> Series: + """ + Rearrange index levels using input order. + + May not drop or duplicate levels. + + Parameters + ---------- + order : list of int representing new level order + Reference level by number or key. + + Returns + ------- + type of caller (new object) + """ + if not isinstance(self.index, MultiIndex): # pragma: no cover + raise Exception("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + assert isinstance(result.index, MultiIndex) + result.index = result.index.reorder_levels(order) + return result + + def explode(self, ignore_index: bool = False) -> Series: + """ + Transform each element of a list-like to a row. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.1.0 + + Returns + ------- + Series + Exploded lists to rows; index will be duplicated for these rows. + + See Also + -------- + Series.str.split : Split string values on specified separator. + Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex + to produce DataFrame. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, sets, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of elements in + the output will be non-deterministic when exploding sets. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]]) + >>> s + 0 [1, 2, 3] + 1 foo + 2 [] + 3 [3, 4] + dtype: object + + >>> s.explode() + 0 1 + 0 2 + 0 3 + 1 foo + 2 NaN + 3 3 + 3 4 + dtype: object + """ + if not len(self) or not is_object_dtype(self): + result = self.copy() + return result.reset_index(drop=True) if ignore_index else result + + values, counts = reshape.explode(np.asarray(self._values)) + + if ignore_index: + index = default_index(len(values)) + else: + index = self.index.repeat(counts) + + return self._constructor(values, index=index, name=self.name) + + def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame: + """ + Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. + + Parameters + ---------- + level : int, str, or list of these, default last level + Level(s) to unstack, can pass level name. + fill_value : scalar value, default None + Value to use when replacing NaN values. + + Returns + ------- + DataFrame + Unstacked Series. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], + ... ['a', 'b']])) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + """ + from pandas.core.reshape.reshape import unstack + + return unstack(self, level, fill_value) + + # ---------------------------------------------------------------------- + # function application + + def map( + self, + arg: Callable | Mapping | Series, + na_action: Literal["ignore"] | None = None, + ) -> Series: + """ + Map values of Series according to an input mapping or function. + + Used for substituting each value in a Series with another value, + that may be derived from a function, a ``dict`` or + a :class:`Series`. + + Parameters + ---------- + arg : function, collections.abc.Mapping subclass or Series + Mapping correspondence. + na_action : {None, 'ignore'}, default None + If 'ignore', propagate NaN values, without passing them to the + mapping correspondence. + + Returns + ------- + Series + Same index as caller. + + See Also + -------- + Series.apply : For applying more complex functions on a Series. + DataFrame.apply : Apply a function row-/column-wise. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + + Notes + ----- + When ``arg`` is a dictionary, values in Series that are not in the + dictionary (as keys) are converted to ``NaN``. However, if the + dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. + provides a method for default values), then this default is used + rather than ``NaN``. + + Examples + -------- + >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit']) + >>> s + 0 cat + 1 dog + 2 NaN + 3 rabbit + dtype: object + + ``map`` accepts a ``dict`` or a ``Series``. Values that are not found + in the ``dict`` are converted to ``NaN``, unless the dict has a default + value (e.g. ``defaultdict``): + + >>> s.map({'cat': 'kitten', 'dog': 'puppy'}) + 0 kitten + 1 puppy + 2 NaN + 3 NaN + dtype: object + + It also accepts a function: + + >>> s.map('I am a {}'.format) + 0 I am a cat + 1 I am a dog + 2 I am a nan + 3 I am a rabbit + dtype: object + + To avoid applying the function to missing values (and keep them as + ``NaN``) ``na_action='ignore'`` can be used: + + >>> s.map('I am a {}'.format, na_action='ignore') + 0 I am a cat + 1 I am a dog + 2 NaN + 3 I am a rabbit + dtype: object + """ + new_values = self._map_values(arg, na_action=na_action) + return self._constructor(new_values, index=self.index).__finalize__( + self, method="map" + ) + + def _gotitem(self, key, ndim, subset=None) -> Series: + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : {1, 2} + Requested ndim of result. + subset : object, default None + Subset to act on. + """ + return self + + _agg_see_also_doc = dedent( + """ + See Also + -------- + Series.apply : Invoke function on a Series. + Series.transform : Transform function producing a Series with like indexes. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.agg('min') + 1 + + >>> s.agg(['min', 'max']) + min 1 + max 4 + dtype: int64 + """ + ) + + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + ) + def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + + # if func is None, will switch to user-provided "named aggregation" kwargs + if func is None: + func = dict(kwargs.items()) + + op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs) + result = op.agg() + return result + + agg = aggregate + + # error: Signature of "any" incompatible with supertype "NDFrame" [override] + @overload # type: ignore[override] + def any( + self, + *, + axis: Axis = ..., + bool_only: bool | None = ..., + skipna: bool = ..., + level: None = ..., + **kwargs, + ) -> bool: + ... + + @overload + def any( + self, + *, + axis: Axis = ..., + bool_only: bool | None = ..., + skipna: bool = ..., + level: Level, + **kwargs, + ) -> Series | bool: + ... + + @doc(NDFrame.any, **_shared_doc_kwargs) + def any( + self, + axis: Axis = 0, + bool_only: bool | None = None, + skipna: bool = True, + level: Level | None = None, + **kwargs, + ) -> Series | bool: + ... + + @doc( + _shared_docs["transform"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) + def transform( + self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + ) -> DataFrame | Series: + # Validate axis argument + self._get_axis_number(axis) + result = SeriesApply( + self, func=func, convert_dtype=True, args=args, kwargs=kwargs + ).transform() + return result + + def apply( + self, + func: AggFuncType, + convert_dtype: bool = True, + args: tuple[Any, ...] = (), + **kwargs, + ) -> DataFrame | Series: + """ + Invoke function on values of Series. + + Can be ufunc (a NumPy function that applies to the entire Series) + or a Python function that only works on single values. + + Parameters + ---------- + func : function + Python function or NumPy ufunc to apply. + convert_dtype : bool, default True + Try to find better dtype for elementwise function results. If + False, leave as dtype=object. Note that the dtype is always + preserved for some extension array dtypes, such as Categorical. + args : tuple + Positional arguments passed to func after the series value. + **kwargs + Additional keyword arguments passed to func. + + Returns + ------- + Series or DataFrame + If func returns a Series object the result will be a DataFrame. + + See Also + -------- + Series.map: For element-wise operations. + Series.agg: Only perform aggregating type operations. + Series.transform: Only perform transforming type operations. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + Create a series with typical summer temperatures for each city. + + >>> s = pd.Series([20, 21, 12], + ... index=['London', 'New York', 'Helsinki']) + >>> s + London 20 + New York 21 + Helsinki 12 + dtype: int64 + + Square the values by defining a function and passing it as an + argument to ``apply()``. + + >>> def square(x): + ... return x ** 2 + >>> s.apply(square) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Square the values by passing an anonymous function as an + argument to ``apply()``. + + >>> s.apply(lambda x: x ** 2) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Define a custom function that needs additional positional + arguments and pass these additional arguments using the + ``args`` keyword. + + >>> def subtract_custom_value(x, custom_value): + ... return x - custom_value + + >>> s.apply(subtract_custom_value, args=(5,)) + London 15 + New York 16 + Helsinki 7 + dtype: int64 + + Define a custom function that takes keyword arguments + and pass these arguments to ``apply``. + + >>> def add_custom_values(x, **kwargs): + ... for month in kwargs: + ... x += kwargs[month] + ... return x + + >>> s.apply(add_custom_values, june=30, july=20, august=25) + London 95 + New York 96 + Helsinki 87 + dtype: int64 + + Use a function from the Numpy library. + + >>> s.apply(np.log) + London 2.995732 + New York 3.044522 + Helsinki 2.484907 + dtype: float64 + """ + return SeriesApply(self, func, convert_dtype, args, kwargs).apply() + + def _reduce( + self, + op, + name: str, + *, + axis=0, + skipna=True, + numeric_only=None, + filter_type=None, + **kwds, + ): + """ + Perform a reduction operation. + + If we have an ndarray as a value, then simply perform the operation, + otherwise delegate to the object. + """ + delegate = self._values + + if axis is not None: + self._get_axis_number(axis) + + if isinstance(delegate, ExtensionArray): + # dispatch to ExtensionArray interface + return delegate._reduce(name, skipna=skipna, **kwds) + + else: + # dispatch to numpy arrays + if numeric_only and not is_numeric_dtype(self.dtype): + kwd_name = "numeric_only" + if name in ["any", "all"]: + kwd_name = "bool_only" + # GH#47500 - change to TypeError to match other methods + warnings.warn( + f"Calling Series.{name} with {kwd_name}={numeric_only} and " + f"dtype {self.dtype} will raise a TypeError in the future", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise NotImplementedError( + f"Series.{name} does not implement {kwd_name}." + ) + with np.errstate(all="ignore"): + return op(delegate, skipna=skipna, **kwds) + + def _reindex_indexer( + self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool + ) -> Series: + # Note: new_index is None iff indexer is None + # if not None, indexer is np.intp + if indexer is None and ( + new_index is None or new_index.names == self.index.names + ): + if copy: + return self.copy() + return self + + new_values = algorithms.take_nd( + self._values, indexer, allow_fill=True, fill_value=None + ) + return self._constructor(new_values, index=new_index) + + def _needs_reindex_multi(self, axes, method, level) -> bool: + """ + Check if we do need a multi reindex; this is for compat with + higher dims. + """ + return False + + # error: Cannot determine type of 'align' + @doc( + NDFrame.align, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + axes_single_arg=_shared_doc_kwargs["axes_single_arg"], + ) + def align( + self, + other: Series, + join: Literal["outer", "inner", "left", "right"] = "outer", + axis: Axis | None = None, + level: Level = None, + copy: bool = True, + fill_value: Hashable = None, + method: FillnaOptions | None = None, + limit: int | None = None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> Series: + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + @overload + def rename( + self, + index: Renamer | Hashable | None = ..., + *, + axis: Axis | None = ..., + copy: bool = ..., + inplace: Literal[True], + level: Level | None = ..., + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def rename( + self, + index: Renamer | Hashable | None = ..., + *, + axis: Axis | None = ..., + copy: bool = ..., + inplace: Literal[False] = ..., + level: Level | None = ..., + errors: IgnoreRaise = ..., + ) -> Series: + ... + + @overload + def rename( + self, + index: Renamer | Hashable | None = ..., + *, + axis: Axis | None = ..., + copy: bool = ..., + inplace: bool = ..., + level: Level | None = ..., + errors: IgnoreRaise = ..., + ) -> Series | None: + ... + + def rename( + self, + index: Renamer | Hashable | None = None, + *, + axis: Axis | None = None, + copy: bool = True, + inplace: bool = False, + level: Level | None = None, + errors: IgnoreRaise = "ignore", + ) -> Series | None: + """ + Alter Series index labels or name. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + Alternatively, change ``Series.name`` with a scalar value. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + index : scalar, hashable sequence, dict-like or function optional + Functions or dict-like are transformations to apply to + the index. + Scalar or hashable sequence-like will alter the ``Series.name`` + attribute. + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to return a new Series. If True the value of copy is ignored. + level : int or level name, default None + In case of MultiIndex, only rename labels in the specified level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise `KeyError` when a `dict-like mapper` or + `index` contains labels that are not present in the index being transformed. + If 'ignore', existing keys will be renamed and extra keys will be ignored. + + Returns + ------- + Series or None + Series with index labels or name altered or None if ``inplace=True``. + + See Also + -------- + DataFrame.rename : Corresponding DataFrame method. + Series.rename_axis : Set the name of the axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + """ + if axis is not None: + # Make sure we raise if an invalid 'axis' is passed. + axis = self._get_axis_number(axis) + + if callable(index) or is_dict_like(index): + # error: Argument 1 to "_rename" of "NDFrame" has incompatible + # type "Union[Union[Mapping[Any, Hashable], Callable[[Any], + # Hashable]], Hashable, None]"; expected "Union[Mapping[Any, + # Hashable], Callable[[Any], Hashable], None]" + return super()._rename( + index, # type: ignore[arg-type] + copy=copy, + inplace=inplace, + level=level, + errors=errors, + ) + else: + return self._set_name(index, inplace=inplace) + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] | lib.NoDefault = ..., + copy: bool | lib.NoDefault = ..., + ) -> Series: + ... + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: bool | lib.NoDefault = ..., + copy: bool | lib.NoDefault = ..., + ) -> Series | None: + ... + + # error: Signature of "set_axis" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @Appender( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> s.set_axis(['a', 'b', 'c'], axis=0) + a 1 + b 2 + c 3 + dtype: int64 + """ + ) + @Substitution( + **_shared_doc_kwargs, + extended_summary_sub="", + axis_description_sub="", + see_also_sub="", + ) + @Appender(NDFrame.set_axis.__doc__) + def set_axis( # type: ignore[override] + self, + labels, + axis: Axis = 0, + inplace: bool | lib.NoDefault = lib.no_default, + copy: bool | lib.NoDefault = lib.no_default, + ) -> Series | None: + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) + + # error: Cannot determine type of 'reindex' + @doc( + NDFrame.reindex, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_axis=_shared_doc_kwargs["optional_axis"], + ) + def reindex(self, *args, **kwargs) -> Series: + if len(args) > 1: + raise TypeError("Only one positional argument ('index') is allowed") + if args: + (index,) = args + if "index" in kwargs: + raise TypeError( + "'index' passed as both positional and keyword argument" + ) + kwargs.update({"index": index}) + return super().reindex(**kwargs) + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: Literal[True], + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: Literal[False] = ..., + errors: IgnoreRaise = ..., + ) -> Series: + ... + + @overload + def drop( + self, + labels: IndexLabel = ..., + *, + axis: Axis = ..., + index: IndexLabel = ..., + columns: IndexLabel = ..., + level: Level | None = ..., + inplace: bool = ..., + errors: IgnoreRaise = ..., + ) -> Series | None: + ... + + # error: Signature of "drop" incompatible with supertype "NDFrame" + # github.com/python/mypy/issues/12387 + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( # type: ignore[override] + self, + labels: IndexLabel = None, + axis: Axis = 0, + index: IndexLabel = None, + columns: IndexLabel = None, + level: Level | None = None, + inplace: bool = False, + errors: IgnoreRaise = "raise", + ) -> Series | None: + """ + Return Series with specified index labels removed. + + Remove elements of a Series based on specifying the index labels. + When using a multi-index, labels on different levels can be removed + by specifying the level. + + Parameters + ---------- + labels : single label or list-like + Index labels to drop. + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + index : single label or list-like + Redundant for application on Series, but 'index' can be used instead + of 'labels'. + columns : single label or list-like + No change is made to the Series; use 'index' or 'labels' instead. + level : int or level name, optional + For MultiIndex, level for which the labels will be removed. + inplace : bool, default False + If True, do operation inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are dropped. + + Returns + ------- + Series or None + Series with specified index labels removed or None if ``inplace=True``. + + Raises + ------ + KeyError + If none of the labels are found in the index. + + See Also + -------- + Series.reindex : Return only specified index labels of Series. + Series.dropna : Return series without null values. + Series.drop_duplicates : Return Series with duplicate values removed. + DataFrame.drop : Drop specified labels from rows or columns. + + Examples + -------- + >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C']) + >>> s + A 0 + B 1 + C 2 + dtype: int64 + + Drop labels B en C + + >>> s.drop(labels=['B', 'C']) + A 0 + dtype: int64 + + Drop 2nd level label in MultiIndex Series + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], + ... index=midx) + >>> s + lama speed 45.0 + weight 200.0 + length 1.2 + cow speed 30.0 + weight 250.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + dtype: float64 + + >>> s.drop(labels='weight', level=1) + lama speed 45.0 + length 1.2 + cow speed 30.0 + length 1.5 + falcon speed 320.0 + length 0.3 + dtype: float64 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> Series: + ... + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[True], + limit: int | None = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def fillna( + self, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> Series | None: + ... + + # error: Signature of "fillna" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + @doc(NDFrame.fillna, **_shared_doc_kwargs) + def fillna( # type: ignore[override] + self, + value: Hashable | Mapping | Series | DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool = False, + limit: int | None = None, + downcast: dict | None = None, + ) -> Series | None: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + def pop(self, item: Hashable) -> Any: + """ + Return item and drops from series. Raise KeyError if not found. + + Parameters + ---------- + item : label + Index of the element that needs to be removed. + + Returns + ------- + Value that is popped from series. + + Examples + -------- + >>> ser = pd.Series([1,2,3]) + + >>> ser.pop(0) + 1 + + >>> ser + 1 2 + 2 3 + dtype: int64 + """ + return super().pop(item=item) + + # error: Signature of "replace" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def replace( + self, + to_replace=..., + value=..., + *, + inplace: Literal[False] = ..., + limit: int | None = ..., + regex: bool = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> Series: + ... + + @overload + def replace( + self, + to_replace=..., + value=..., + *, + inplace: Literal[True], + limit: int | None = ..., + regex: bool = ..., + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., + ) -> None: + ... + + # error: Signature of "replace" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "to_replace", "value"] + ) + @doc( + NDFrame.replace, + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], + ) + def replace( # type: ignore[override] + self, + to_replace=None, + value=lib.no_default, + inplace: bool = False, + limit: int | None = None, + regex: bool = False, + method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, + ) -> Series | None: + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + @doc(INFO_DOCSTRING, **series_sub_kwargs) + def info( + self, + verbose: bool | None = None, + buf: IO[str] | None = None, + max_cols: int | None = None, + memory_usage: bool | str | None = None, + show_counts: bool = True, + ) -> None: + return SeriesInfo(self, memory_usage).render( + buf=buf, + max_cols=max_cols, + verbose=verbose, + show_counts=show_counts, + ) + + def _replace_single(self, to_replace, method: str, inplace: bool, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ + + result = self if inplace else self.copy() + + values = result._values + mask = missing.mask_missing(values, to_replace) + + if isinstance(values, ExtensionArray): + # dispatch to the EA's _pad_mask_inplace method + values._fill_mask_inplace(method, limit, mask) + else: + fill_f = missing.get_fill_func(method) + fill_f(values, limit=limit, mask=mask) + + if inplace: + return + return result + + # error: Cannot determine type of 'shift' + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def shift( + self, periods: int = 1, freq=None, axis: Axis = 0, fill_value: Hashable = None + ) -> Series: + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + def memory_usage(self, index: bool = True, deep: bool = False) -> int: + """ + Return the memory usage of the Series. + + The memory usage can optionally include the contribution of + the index and of elements of `object` dtype. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the Series index. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned value. + + Returns + ------- + int + Bytes of memory consumed. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + DataFrame.memory_usage : Bytes consumed by a DataFrame. + + Examples + -------- + >>> s = pd.Series(range(3)) + >>> s.memory_usage() + 152 + + Not including the index gives the size of the rest of the data, which + is necessarily smaller: + + >>> s.memory_usage(index=False) + 24 + + The memory footprint of `object` values is ignored by default: + + >>> s = pd.Series(["a", "b"]) + >>> s.values + array(['a', 'b'], dtype=object) + >>> s.memory_usage() + 144 + >>> s.memory_usage(deep=True) + 244 + """ + v = self._memory_usage(deep=deep) + if index: + v += self.index.memory_usage(deep=deep) + return v + + def isin(self, values) -> Series: + """ + Whether elements in Series are contained in `values`. + + Return a boolean Series showing whether each element in the Series + matches an element in the passed sequence of `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + Series + Series of booleans indicating if each element is in values. + + Raises + ------ + TypeError + * If `values` is a string + + See Also + -------- + DataFrame.isin : Equivalent method on DataFrame. + + Examples + -------- + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo'], name='animal') + >>> s.isin(['cow', 'lama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + To invert the boolean values, use the ``~`` operator: + + >>> ~s.isin(['cow', 'lama']) + 0 False + 1 False + 2 False + 3 True + 4 False + 5 True + Name: animal, dtype: bool + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Strings and integers are distinct and are therefore not comparable: + + >>> pd.Series([1]).isin(['1']) + 0 False + dtype: bool + >>> pd.Series([1.1]).isin(['1.1']) + 0 False + dtype: bool + """ + result = algorithms.isin(self._values, values) + return self._constructor(result, index=self.index).__finalize__( + self, method="isin" + ) + + def between( + self, + left, + right, + inclusive: Literal["both", "neither", "left", "right"] = "both", + ) -> Series: + """ + Return boolean Series equivalent to left <= series <= right. + + This function returns a boolean vector containing `True` wherever the + corresponding Series element is between the boundary values `left` and + `right`. NA values are treated as `False`. + + Parameters + ---------- + left : scalar or list-like + Left boundary. + right : scalar or list-like + Right boundary. + inclusive : {"both", "neither", "left", "right"} + Include boundaries. Whether to set each bound as closed or open. + + .. versionchanged:: 1.3.0 + + Returns + ------- + Series + Series representing whether each element is between left and + right (inclusive). + + See Also + -------- + Series.gt : Greater than of series and other. + Series.lt : Less than of series and other. + + Notes + ----- + This function is equivalent to ``(left <= ser) & (ser <= right)`` + + Examples + -------- + >>> s = pd.Series([2, 0, 4, 8, np.nan]) + + Boundary values are included by default: + + >>> s.between(1, 4) + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + With `inclusive` set to ``"neither"`` boundary values are excluded: + + >>> s.between(1, 4, inclusive="neither") + 0 True + 1 False + 2 False + 3 False + 4 False + dtype: bool + + `left` and `right` can be any scalar value: + + >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve']) + >>> s.between('Anna', 'Daniel') + 0 False + 1 True + 2 True + 3 False + dtype: bool + """ + # error: Non-overlapping identity check (left operand type: "Literal['both', + # 'neither', 'left', 'right']", right operand type: "Literal[False]") + if inclusive is True or inclusive is False: # type: ignore[comparison-overlap] + warnings.warn( + "Boolean inputs to the `inclusive` argument are deprecated in " + "favour of `both` or `neither`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if inclusive: + inclusive = "both" + else: + inclusive = "neither" + if inclusive == "both": + lmask = self >= left + rmask = self <= right + elif inclusive == "left": + lmask = self >= left + rmask = self < right + elif inclusive == "right": + lmask = self > left + rmask = self <= right + elif inclusive == "neither": + lmask = self > left + rmask = self < right + else: + raise ValueError( + "Inclusive has to be either string of 'both'," + "'left', 'right', or 'neither'." + ) + + return lmask & rmask + + # ---------------------------------------------------------------------- + # Convert to types that support pd.NA + + def _convert_dtypes( + self, + infer_objects: bool = True, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, + convert_floating: bool = True, + ) -> Series: + input_series = self + if infer_objects: + input_series = input_series.infer_objects() + if is_object_dtype(input_series): + input_series = input_series.copy() + + if convert_string or convert_integer or convert_boolean or convert_floating: + inferred_dtype = convert_dtypes( + input_series._values, + convert_string, + convert_integer, + convert_boolean, + convert_floating, + ) + result = input_series.astype(inferred_dtype) + else: + result = input_series.copy() + return result + + # error: Cannot determine type of 'isna' + # error: Return type "Series" of "isna" incompatible with return type "ndarray + # [Any, dtype[bool_]]" in supertype "IndexOpsMixin" + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def isna(self) -> Series: # type: ignore[override] + return NDFrame.isna(self) + + # error: Cannot determine type of 'isna' + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def isnull(self) -> Series: + """ + Series.isnull is an alias for Series.isna. + """ + return super().isnull() + + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def notna(self) -> Series: + return super().notna() + + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] + def notnull(self) -> Series: + """ + Series.notnull is an alias for Series.notna. + """ + return super().notnull() + + @overload + def dropna( + self, *, axis: Axis = ..., inplace: Literal[False] = ..., how: str | None = ... + ) -> Series: + ... + + @overload + def dropna( + self, *, axis: Axis = ..., inplace: Literal[True], how: str | None = ... + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def dropna( + self, axis: Axis = 0, inplace: bool = False, how: str | None = None + ) -> Series | None: + """ + Return a new Series with missing values removed. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + inplace : bool, default False + If True, do operation inplace and return None. + how : str, optional + Not in use. Kept for compatibility. + + Returns + ------- + Series or None + Series with NA entries dropped from it or None if ``inplace=True``. + + See Also + -------- + Series.isna: Indicate missing values. + Series.notna : Indicate existing (non-missing) values. + Series.fillna : Replace missing values. + DataFrame.dropna : Drop rows or columns which contain NA values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> ser = pd.Series([1., 2., np.nan]) + >>> ser + 0 1.0 + 1 2.0 + 2 NaN + dtype: float64 + + Drop NA values from a Series. + + >>> ser.dropna() + 0 1.0 + 1 2.0 + dtype: float64 + + Keep the Series with valid entries in the same variable. + + >>> ser.dropna(inplace=True) + >>> ser + 0 1.0 + 1 2.0 + dtype: float64 + + Empty strings are not considered NA values. ``None`` is considered an + NA value. + + >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay']) + >>> ser + 0 NaN + 1 2 + 2 NaT + 3 + 4 None + 5 I stay + dtype: object + >>> ser.dropna() + 1 2 + 3 + 5 I stay + dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis or 0) + + if self._can_hold_na: + result = remove_na_arraylike(self) + if inplace: + self._update_inplace(result) + else: + return result + else: + if not inplace: + return self.copy() + return None + + # ---------------------------------------------------------------------- + # Time series-oriented methods + + # error: Cannot determine type of 'asfreq' + @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] + def asfreq( + self, + freq: Frequency, + method: FillnaOptions | None = None, + how: str | None = None, + normalize: bool = False, + fill_value: Hashable = None, + ) -> Series: + return super().asfreq( + freq=freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + # error: Cannot determine type of 'resample' + @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type] + def resample( + self, + rule, + axis: Axis = 0, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + loffset=None, + base: int | None = None, + on: Level = None, + level: Level = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys: bool | lib.NoDefault = no_default, + ) -> Resampler: + return super().resample( + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + loffset=loffset, + base=base, + on=on, + level=level, + origin=origin, + offset=offset, + group_keys=group_keys, + ) + + def to_timestamp( + self, + freq=None, + how: Literal["s", "e", "start", "end"] = "start", + copy: bool = True, + ) -> Series: + """ + Cast to DatetimeIndex of Timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series with DatetimeIndex + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + if not isinstance(self.index, PeriodIndex): + raise TypeError(f"unsupported Type {type(self.index).__name__}") + new_index = self.index.to_timestamp(freq=freq, how=how) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="to_timestamp" + ) + + def to_period(self, freq: str | None = None, copy: bool = True) -> Series: + """ + Convert Series from DatetimeIndex to PeriodIndex. + + Parameters + ---------- + freq : str, default None + Frequency associated with the PeriodIndex. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series + Series with index converted to PeriodIndex. + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + if not isinstance(self.index, DatetimeIndex): + raise TypeError(f"unsupported Type {type(self.index).__name__}") + new_index = self.index.to_period(freq=freq) + return self._constructor(new_values, index=new_index).__finalize__( + self, method="to_period" + ) + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> Series: + ... + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def ffill( + self, + *, + axis: None | Axis = ..., + inplace: bool = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> Series | None: + ... + + # error: Signature of "ffill" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def ffill( # type: ignore[override] + self, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast: dict | None = None, + ) -> Series | None: + return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[False] = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> Series: + ... + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: Literal[True], + limit: None | int = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def bfill( + self, + *, + axis: None | Axis = ..., + inplace: bool = ..., + limit: None | int = ..., + downcast: dict | None = ..., + ) -> Series | None: + ... + + # error: Signature of "bfill" incompatible with supertype "NDFrame" + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def bfill( # type: ignore[override] + self, + axis: None | Axis = None, + inplace: bool = False, + limit: None | int = None, + downcast: dict | None = None, + ) -> Series | None: + return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "lower", "upper"] + ) + def clip( + self: Series, + lower=None, + upper=None, + axis: Axis | None = None, + inplace: bool = False, + *args, + **kwargs, + ) -> Series | None: + return super().clip(lower, upper, axis, inplace, *args, **kwargs) + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + def interpolate( + self: Series, + method: str = "linear", + axis: Axis = 0, + limit: int | None = None, + inplace: bool = False, + limit_direction: str | None = None, + limit_area: str | None = None, + downcast: str | None = None, + **kwargs, + ) -> Series | None: + return super().interpolate( + method, + axis, + limit, + inplace, + limit_direction, + limit_area, + downcast, + **kwargs, + ) + + @overload + def where( + self, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> Series: + ... + + @overload + def where( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def where( + self, + cond, + other=..., + *, + inplace: bool = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> Series | None: + ... + + # error: Signature of "where" incompatible with supertype "NDFrame" + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def where( # type: ignore[override] + self, + cond, + other=lib.no_default, + inplace: bool = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, + ) -> Series | None: + return super().where( + cond, + other, + inplace=inplace, + axis=axis, + level=level, + try_cast=try_cast, + ) + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> Series: + ... + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> None: + ... + + @overload + def mask( + self, + cond, + other=..., + *, + inplace: bool = ..., + axis: Axis | None = ..., + level: Level = ..., + errors: IgnoreRaise | lib.NoDefault = ..., + try_cast: bool | lib.NoDefault = ..., + ) -> Series | None: + ... + + # error: Signature of "mask" incompatible with supertype "NDFrame" + @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "cond", "other"] + ) + def mask( # type: ignore[override] + self, + cond, + other=np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level = None, + errors: IgnoreRaise | lib.NoDefault = lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, + ) -> Series | None: + return super().mask( + cond, + other, + inplace=inplace, + axis=axis, + level=level, + try_cast=try_cast, + ) + + # ---------------------------------------------------------------------- + # Add index + _AXIS_ORDERS = ["index"] + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 0 + _info_axis_name = "index" + + index = properties.AxisProperty( + axis=0, doc="The index (axis labels) of the Series." + ) + + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + sparse = CachedAccessor("sparse", SparseAccessor) + + # ---------------------------------------------------------------------- + # Add plotting methods to Series + hist = pandas.plotting.hist_series + + # ---------------------------------------------------------------------- + # Template-Based Arithmetic/Comparison Methods + + def _cmp_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + + if isinstance(other, Series) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled Series objects") + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + with np.errstate(all="ignore"): + res_values = ops.comparison_op(lvalues, rvalues, op) + + return self._construct_result(res_values, name=res_name) + + def _logical_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + self, other = ops.align_method_SERIES(self, other, align_asobject=True) + + lvalues = self._values + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + + res_values = ops.logical_op(lvalues, rvalues, op) + return self._construct_result(res_values, name=res_name) + + def _arith_method(self, other, op): + self, other = ops.align_method_SERIES(self, other) + return base.IndexOpsMixin._arith_method(self, other, op) + + +Series._add_numeric_operations() + +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py new file mode 100644 index 00000000..09c8cf39 --- /dev/null +++ b/pandas/core/shared_docs.py @@ -0,0 +1,893 @@ +from __future__ import annotations + +_shared_docs: dict[str, str] = {} + +_shared_docs[ + "aggregate" +] = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. +{see_also} +Notes +----- +`agg` is an alias for `aggregate`. Use the alias. + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +A passed user-defined-function will be passed a Series for evaluation. +{examples}""" + +_shared_docs[ + "compare" +] = """ +Compare to another {klass} and show the differences. + +.. versionadded:: 1.1.0 + +Parameters +---------- +other : {klass} + Object to compare with. + +align_axis : {{0 or 'index', 1 or 'columns'}}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + +keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + +keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + +result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 +""" + +_shared_docs[ + "groupby" +] = """ +Group %(klass)s using a mapper or by a Series of columns. + +A groupby operation involves some combination of splitting the +object, applying a function, and combining the results. This can be +used to group large amounts of data and compute operations on these +groups. + +Parameters +---------- +by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If a list or ndarray of length + equal to the selected axis is passed (see the `groupby user guide + `_), + the values are used as-is to determine the groups. A label or list + of labels may be passed to group by the columns in ``self``. + Notice that a tuple is interpreted as a (single) key. +axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). For `Series` this parameter + is unused and defaults to 0. +level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. Do not specify both ``by`` and ``level``. +as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. +sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. +group_keys : bool, optional + When calling apply and the ``by`` argument produces a like-indexed + (i.e. :ref:`a transform `) result, add group keys to + index to identify pieces. By default group keys are not included + when the result's index (and column) labels match the inputs, and + are included otherwise. This argument has no effect if the result produced + is not like-indexed with respect to the input. + + .. versionchanged:: 1.5.0 + + Warns that `group_keys` will no longer be ignored when the + result from ``apply`` is a like-indexed Series or DataFrame. + Specify ``group_keys`` explicitly to include the group keys or + not. +squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + + .. deprecated:: 1.1.0 + +observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. +dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups. + + .. versionadded:: 1.1.0 + +Returns +------- +%(klass)sGroupBy + Returns a groupby object that contains information about the groups. + +See Also +-------- +resample : Convenience method for frequency conversion and resampling + of time series. + +Notes +----- +See the `user guide +`__ for more +detailed usage and examples, including splitting an object into groups, +iterating through groups, selecting a group, aggregation, and more. +""" + +_shared_docs[ + "melt" +] = """ +Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + +This function is useful to massage a DataFrame into a format where one +or more columns are identifier variables (`id_vars`), while all other +columns, considered measured variables (`value_vars`), are "unpivoted" to +the row axis, leaving just two non-identifier columns, 'variable' and +'value'. + +Parameters +---------- +id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. +value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. +var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. +value_name : scalar, default 'value' + Name to use for the 'value' column. +col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. +ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + .. versionadded:: 1.1.0 + +Returns +------- +DataFrame + Unpivoted DataFrame. + +See Also +-------- +%(other)s : Identical method. +pivot_table : Create a spreadsheet-style pivot table as a DataFrame. +DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. +DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + +Notes +----- +Reference :ref:`the user guide ` for more examples. + +Examples +-------- +>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, +... 'B': {0: 1, 1: 3, 2: 5}, +... 'C': {0: 2, 1: 4, 2: 6}}) +>>> df + A B C +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +3 a C 2 +4 b C 4 +5 c C 6 + +The names of 'variable' and 'value' columns can be customized: + +>>> %(caller)sid_vars=['A'], value_vars=['B'], +... var_name='myVarname', value_name='myValname') + A myVarname myValname +0 a B 1 +1 b B 3 +2 c B 5 + +Original index values can be kept around: + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +0 a C 2 +1 b C 4 +2 c C 6 + +If you have multi-index columns: + +>>> df.columns = [list('ABC'), list('DEF')] +>>> df + A B C + D E F +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value +0 a B E 1 +1 b B E 3 +2 c B E 5 +""" + +_shared_docs[ + "transform" +] = """ +Call ``func`` on self producing a {klass} with the same axis shape as self. + +Parameters +---------- +func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such. +{axis} +*args + Positional arguments to pass to `func`. +**kwargs + Keyword arguments to pass to `func`. + +Returns +------- +{klass} + A {klass} that must have the same length as self. + +Raises +------ +ValueError : If the returned {klass} has a different length than self. + +See Also +-------- +{klass}.agg : Only perform aggregating type operations. +{klass}.apply : Invoke function on a {klass}. + +Notes +----- +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +Examples +-------- +>>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) +>>> df + A B +0 0 1 +1 1 2 +2 2 3 +>>> df.transform(lambda x: x + 1) + A B +0 1 2 +1 2 3 +2 3 4 + +Even though the resulting {klass} must have the same length as the +input {klass}, it is possible to provide several input functions: + +>>> s = pd.Series(range(3)) +>>> s +0 0 +1 1 +2 2 +dtype: int64 +>>> s.transform([np.sqrt, np.exp]) + sqrt exp +0 0.000000 1.000000 +1 1.000000 2.718282 +2 1.414214 7.389056 + +You can call transform on a GroupBy object: + +>>> df = pd.DataFrame({{ +... "Date": [ +... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05", +... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"], +... "Data": [5, 8, 6, 1, 50, 100, 60, 120], +... }}) +>>> df + Date Data +0 2015-05-08 5 +1 2015-05-07 8 +2 2015-05-06 6 +3 2015-05-05 1 +4 2015-05-08 50 +5 2015-05-07 100 +6 2015-05-06 60 +7 2015-05-05 120 +>>> df.groupby('Date')['Data'].transform('sum') +0 55 +1 108 +2 66 +3 121 +4 55 +5 108 +6 66 +7 121 +Name: Data, dtype: int64 + +>>> df = pd.DataFrame({{ +... "c": [1, 1, 1, 2, 2, 2, 2], +... "type": ["m", "n", "o", "m", "m", "n", "n"] +... }}) +>>> df + c type +0 1 m +1 1 n +2 1 o +3 2 m +4 2 m +5 2 n +6 2 n +>>> df['size'] = df.groupby('c')['type'].transform(len) +>>> df + c type size +0 1 m 3 +1 1 n 3 +2 1 o 3 +3 2 m 4 +4 2 m 4 +5 2 n 4 +6 2 n 4 +""" + +_shared_docs[ + "storage_options" +] = """storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_.""" + +_shared_docs[ + "compression_options" +] = """compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and '%s' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other + key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files.""" + +_shared_docs[ + "decompression_options" +] = """compression : str or dict, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer' and '%s' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. + Set to ``None`` for no decompression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'tar'``} and other + key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdDecompressor`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for Zstandard decompression using a + custom compression dictionary: + ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files.""" + +_shared_docs[ + "replace" +] = """ + Replace values given in `to_replace` with `value`. + + Values of the {klass} are replaced with other values dynamically. + {replace_iloc} + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way, the optional `value` + parameter should not be given. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The optional `value` + parameter should not be specified to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + {inplace} + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {{'pad', 'ffill', 'bfill'}} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + {klass} + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + {klass}.fillna : Fill NA values. + {klass}.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 3 + 1 3 + 2 3 + 3 4 + 4 5 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({{0: 10, 1: 100}}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': 0, 'B': 5}}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': {{0: 100, 4: 400}}}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Compare the behavior of ``s.replace({{'a': None}})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({{'a': None}})`` is equivalent to + ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: + + >>> s.replace({{'a': None}}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value`` is not explicitly passed and `to_replace` is a scalar, list + or tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + + >>> s.replace('a') + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object + + On the other hand, if ``None`` is explicitly passed for ``value``, it will + be respected: + + >>> s.replace('a', None) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + .. versionchanged:: 1.4.0 + Previously the explicit ``None`` was silently ignored. +""" + +_shared_docs[ + "idxmin" +] = """ + Return index of first occurrence of minimum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default {numeric_only_default} + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of minima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmin : Return index of the minimum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the minimum value in each column. + + >>> df.idxmin() + consumption Pork + co2_emissions Wheat Products + dtype: object + + To return the index for the minimum value in each row, use ``axis="columns"``. + + >>> df.idxmin(axis="columns") + Pork consumption + Wheat Products co2_emissions + Beef consumption + dtype: object +""" + +_shared_docs[ + "idxmax" +] = """ + Return index of first occurrence of maximum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default {numeric_only_default} + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of maxima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmax : Return index of the maximum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the maximum value in each column. + + >>> df.idxmax() + consumption Wheat Products + co2_emissions Beef + dtype: object + + To return the index for the maximum value in each row, use ``axis="columns"``. + + >>> df.idxmax(axis="columns") + Pork co2_emissions + Wheat Products consumption + Beef co2_emissions + dtype: object +""" diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py new file mode 100644 index 00000000..1d1a93b8 --- /dev/null +++ b/pandas/core/sorting.py @@ -0,0 +1,734 @@ +""" miscellaneous sorting / groupby utilities """ +from __future__ import annotations + +from collections import defaultdict +from typing import ( + TYPE_CHECKING, + Callable, + DefaultDict, + Hashable, + Iterable, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import ( + algos, + hashtable, + lib, +) +from pandas._libs.hashtable import unique_label_indices +from pandas._typing import ( + IndexKeyFunc, + Level, + NaPosition, + Shape, + SortKind, + npt, +) + +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_extension_array_dtype, +) +from pandas.core.dtypes.generic import ( + ABCMultiIndex, + ABCRangeIndex, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import MultiIndex + from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.base import Index + + +def get_indexer_indexer( + target: Index, + level: Level | list[Level] | None, + ascending: Sequence[bool] | bool, + kind: SortKind, + na_position: NaPosition, + sort_remaining: bool, + key: IndexKeyFunc, +) -> npt.NDArray[np.intp] | None: + """ + Helper method that return the indexer according to input parameters for + the sort_index method of DataFrame and Series. + + Parameters + ---------- + target : Index + level : int or level name or list of ints or list of level names + ascending : bool or list of bools, default True + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + na_position : {'first', 'last'}, default 'last' + sort_remaining : bool, default True + key : callable, optional + + Returns + ------- + Optional[ndarray[intp]] + The indexer for the new index. + """ + + target = ensure_key_mapped(target, key, levels=level) + target = target._sort_levels_monotonic() + + if level is not None: + _, indexer = target.sortlevel( + level, ascending=ascending, sort_remaining=sort_remaining + ) + elif isinstance(target, ABCMultiIndex): + indexer = lexsort_indexer( + target._get_codes_for_sorting(), orders=ascending, na_position=na_position + ) + else: + # Check monotonic-ness before sort an index (GH 11080) + if (ascending and target.is_monotonic_increasing) or ( + not ascending and target.is_monotonic_decreasing + ): + return None + + # ascending can only be a Sequence for MultiIndex + indexer = nargsort( + target, + kind=kind, + ascending=cast(bool, ascending), + na_position=na_position, + ) + return indexer + + +def get_group_index( + labels, shape: Shape, sort: bool, xnull: bool +) -> npt.NDArray[np.int64]: + """ + For the particular label_list, gets the offsets into the hypothetical list + representing the totally ordered cartesian product of all possible label + combinations, *as long as* this space fits within int64 bounds; + otherwise, though group indices identify unique combinations of + labels, they cannot be deconstructed. + - If `sort`, rank of returned ids preserve lexical ranks of labels. + i.e. returned id's can be used to do lexical sort on labels; + - If `xnull` nulls (-1 labels) are passed through. + + Parameters + ---------- + labels : sequence of arrays + Integers identifying levels at each location + shape : tuple[int, ...] + Number of unique levels at each location + sort : bool + If the ranks of returned ids should match lexical ranks of labels + xnull : bool + If true nulls are excluded. i.e. -1 values in the labels are + passed through. + + Returns + ------- + An array of type int64 where two elements are equal if their corresponding + labels are equal at all location. + + Notes + ----- + The length of `labels` and `shape` must be identical. + """ + + def _int64_cut_off(shape) -> int: + acc = 1 + for i, mul in enumerate(shape): + acc *= int(mul) + if not acc < lib.i8max: + return i + return len(shape) + + def maybe_lift(lab, size) -> tuple[np.ndarray, int]: + # promote nan values (assigned -1 label in lab array) + # so that all output values are non-negative + return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + + labels = [ensure_int64(x) for x in labels] + lshape = list(shape) + if not xnull: + for i, (lab, size) in enumerate(zip(labels, shape)): + lab, size = maybe_lift(lab, size) + labels[i] = lab + lshape[i] = size + + labels = list(labels) + + # Iteratively process all the labels in chunks sized so less + # than lib.i8max unique int ids will be required for each chunk + while True: + # how many levels can be done without overflow: + nlev = _int64_cut_off(lshape) + + # compute flat ids for the first `nlev` levels + stride = np.prod(lshape[1:nlev], dtype="i8") + out = stride * labels[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + if lshape[i] == 0: + stride = np.int64(0) + else: + stride //= lshape[i] + out += labels[i] * stride + + if xnull: # exclude nulls + mask = labels[0] == -1 + for lab in labels[1:nlev]: + mask |= lab == -1 + out[mask] = -1 + + if nlev == len(lshape): # all levels done! + break + + # compress what has been done so far in order to avoid overflow + # to retain lexical ranks, obs_ids should be sorted + comp_ids, obs_ids = compress_group_index(out, sort=sort) + + labels = [comp_ids] + labels[nlev:] + lshape = [len(obs_ids)] + lshape[nlev:] + + return out + + +def get_compressed_ids( + labels, sizes: Shape +) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + + Parameters + ---------- + labels : list of label arrays + sizes : tuple[int] of size of the levels + + Returns + ------- + np.ndarray[np.intp] + comp_ids + np.ndarray[np.int64] + obs_group_ids + """ + ids = get_group_index(labels, sizes, sort=True, xnull=False) + return compress_group_index(ids, sort=True) + + +def is_int64_overflow_possible(shape: Shape) -> bool: + the_prod = 1 + for x in shape: + the_prod *= int(x) + + return the_prod >= lib.i8max + + +def _decons_group_index( + comp_labels: npt.NDArray[np.intp], shape: Shape +) -> list[npt.NDArray[np.intp]]: + # reconstruct labels + if is_int64_overflow_possible(shape): + # at some point group indices are factorized, + # and may not be deconstructed here! wrong path! + raise ValueError("cannot deconstruct factorized group indices!") + + label_list = [] + factor = 1 + y = np.array(0) + x = comp_labels + for i in reversed(range(len(shape))): + labels = (x - y) % (factor * shape[i]) // factor + np.putmask(labels, comp_labels < 0, -1) + label_list.append(labels) + y = labels * factor + factor *= shape[i] + return label_list[::-1] + + +def decons_obs_group_ids( + comp_ids: npt.NDArray[np.intp], + obs_ids: npt.NDArray[np.intp], + shape: Shape, + labels: Sequence[npt.NDArray[np.signedinteger]], + xnull: bool, +) -> list[npt.NDArray[np.intp]]: + """ + Reconstruct labels from observed group ids. + + Parameters + ---------- + comp_ids : np.ndarray[np.intp] + obs_ids: np.ndarray[np.intp] + shape : tuple[int] + labels : Sequence[np.ndarray[np.signedinteger]] + xnull : bool + If nulls are excluded; i.e. -1 labels are passed through. + """ + if not xnull: + lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) + arr_shape = np.asarray(shape, dtype=np.intp) + lift + shape = tuple(arr_shape) + + if not is_int64_overflow_possible(shape): + # obs ids are deconstructable! take the fast route! + out = _decons_group_index(obs_ids, shape) + return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] + + indexer = unique_label_indices(comp_ids) + return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] + + +def indexer_from_factorized( + labels, shape: Shape, compress: bool = True +) -> npt.NDArray[np.intp]: + ids = get_group_index(labels, shape, sort=True, xnull=False) + + if not compress: + ngroups = (ids.size and ids.max()) + 1 + else: + ids, obs = compress_group_index(ids, sort=True) + ngroups = len(obs) + + return get_group_index_sorter(ids, ngroups) + + +def lexsort_indexer( + keys, orders=None, na_position: str = "last", key: Callable | None = None +) -> npt.NDArray[np.intp]: + """ + Performs lexical sorting on a set of keys + + Parameters + ---------- + keys : sequence of arrays + Sequence of ndarrays to be sorted by the indexer + orders : bool or list of booleans, optional + Determines the sorting order for each element in keys. If a list, + it must be the same length as keys. This determines whether the + corresponding element in keys should be sorted in ascending + (True) or descending (False) order. if bool, applied to all + elements as above. if None, defaults to True. + na_position : {'first', 'last'}, default 'last' + Determines placement of NA elements in the sorted list ("last" or "first") + key : Callable, optional + Callable key function applied to every element in keys before sorting + + .. versionadded:: 1.0.0 + + Returns + ------- + np.ndarray[np.intp] + """ + from pandas.core.arrays import Categorical + + labels = [] + shape = [] + if isinstance(orders, bool): + orders = [orders] * len(keys) + elif orders is None: + orders = [True] * len(keys) + + keys = [ensure_key_mapped(k, key) for k in keys] + + for k, order in zip(keys, orders): + with warnings.catch_warnings(): + # TODO(2.0): unnecessary once deprecation is enforced + # GH#45618 don't issue warning user can't do anything about + warnings.filterwarnings( + "ignore", ".*(SparseArray|SparseDtype).*", category=FutureWarning + ) + + cat = Categorical(k, ordered=True) + + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {na_position}") + + n = len(cat.categories) + codes = cat.codes.copy() + + mask = cat.codes == -1 + if order: # ascending + if na_position == "last": + codes = np.where(mask, n, codes) + elif na_position == "first": + codes += 1 + else: # not order means descending + if na_position == "last": + codes = np.where(mask, n, n - codes - 1) + elif na_position == "first": + codes = np.where(mask, 0, n - codes) + if mask.any(): + n += 1 + + shape.append(n) + labels.append(codes) + + return indexer_from_factorized(labels, tuple(shape)) + + +def nargsort( + items, + kind: str = "quicksort", + ascending: bool = True, + na_position: str = "last", + key: Callable | None = None, + mask: npt.NDArray[np.bool_] | None = None, +) -> npt.NDArray[np.intp]: + """ + Intended to be a drop-in replacement for np.argsort which handles NaNs. + + Adds ascending, na_position, and key parameters. + + (GH #6399, #5231, #27237) + + Parameters + ---------- + kind : str, default 'quicksort' + ascending : bool, default True + na_position : {'first', 'last'}, default 'last' + key : Optional[Callable], default None + mask : Optional[np.ndarray[bool]], default None + Passed when called by ExtensionArray.argsort. + + Returns + ------- + np.ndarray[np.intp] + """ + + if key is not None: + items = ensure_key_mapped(items, key) + return nargsort( + items, + kind=kind, + ascending=ascending, + na_position=na_position, + key=None, + mask=mask, + ) + + if isinstance(items, ABCRangeIndex): + return items.argsort(ascending=ascending) # TODO: test coverage with key? + elif not isinstance(items, ABCMultiIndex): + items = extract_array(items) + if mask is None: + mask = np.asarray(isna(items)) # TODO: does this exclude MultiIndex too? + + if is_extension_array_dtype(items): + return items.argsort(ascending=ascending, kind=kind, na_position=na_position) + else: + items = np.asanyarray(items) + + idx = np.arange(len(items)) + non_nans = items[~mask] + non_nan_idx = idx[~mask] + + nan_idx = np.nonzero(mask)[0] + if not ascending: + non_nans = non_nans[::-1] + non_nan_idx = non_nan_idx[::-1] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + if not ascending: + indexer = indexer[::-1] + # Finally, place the NaNs at the end or the beginning according to + # na_position + if na_position == "last": + indexer = np.concatenate([indexer, nan_idx]) + elif na_position == "first": + indexer = np.concatenate([nan_idx, indexer]) + else: + raise ValueError(f"invalid na_position: {na_position}") + return ensure_platform_int(indexer) + + +def nargminmax(values: ExtensionArray, method: str, axis: int = 0): + """ + Implementation of np.argmin/argmax but for ExtensionArray and which + handles missing values. + + Parameters + ---------- + values : ExtensionArray + method : {"argmax", "argmin"} + axis : int, default 0 + + Returns + ------- + int + """ + assert method in {"argmax", "argmin"} + func = np.argmax if method == "argmax" else np.argmin + + mask = np.asarray(isna(values)) + arr_values = values._values_for_argsort() + + if arr_values.ndim > 1: + if mask.any(): + if axis == 1: + zipped = zip(arr_values, mask) + else: + zipped = zip(arr_values.T, mask.T) + return np.array([_nanargminmax(v, m, func) for v, m in zipped]) + return func(arr_values, axis=axis) + + return _nanargminmax(arr_values, mask, func) + + +def _nanargminmax(values: np.ndarray, mask: npt.NDArray[np.bool_], func) -> int: + """ + See nanargminmax.__doc__. + """ + idx = np.arange(values.shape[0]) + non_nans = values[~mask] + non_nan_idx = idx[~mask] + + return non_nan_idx[func(non_nans)] + + +def _ensure_key_mapped_multiindex( + index: MultiIndex, key: Callable, level=None +) -> MultiIndex: + """ + Returns a new MultiIndex in which key has been applied + to all levels specified in level (or all levels if level + is None). Used for key sorting for MultiIndex. + + Parameters + ---------- + index : MultiIndex + Index to which to apply the key function on the + specified levels. + key : Callable + Function that takes an Index and returns an Index of + the same shape. This key is applied to each level + separately. The name of the level can be used to + distinguish different levels for application. + level : list-like, int or str, default None + Level or list of levels to apply the key function to. + If None, key function is applied to all levels. Other + levels are left unchanged. + + Returns + ------- + labels : MultiIndex + Resulting MultiIndex with modified levels. + """ + + if level is not None: + if isinstance(level, (str, int)): + sort_levels = [level] + else: + sort_levels = level + + sort_levels = [index._get_level_number(lev) for lev in sort_levels] + else: + sort_levels = list(range(index.nlevels)) # satisfies mypy + + mapped = [ + ensure_key_mapped(index._get_level_values(level), key) + if level in sort_levels + else index._get_level_values(level) + for level in range(index.nlevels) + ] + + return type(index).from_arrays(mapped) + + +def ensure_key_mapped(values, key: Callable | None, levels=None): + """ + Applies a callable key function to the values function and checks + that the resulting value has the same shape. Can be called on Index + subclasses, Series, DataFrames, or ndarrays. + + Parameters + ---------- + values : Series, DataFrame, Index subclass, or ndarray + key : Optional[Callable], key to be called on the values array + levels : Optional[List], if values is a MultiIndex, list of levels to + apply the key to. + """ + from pandas.core.indexes.api import Index + + if not key: + return values + + if isinstance(values, ABCMultiIndex): + return _ensure_key_mapped_multiindex(values, key, level=levels) + + result = key(values.copy()) + if len(result) != len(values): + raise ValueError( + "User-provided `key` function must not change the shape of the array." + ) + + try: + if isinstance( + values, Index + ): # convert to a new Index subclass, not necessarily the same + result = Index(result) + else: + type_of_values = type(values) + result = type_of_values(result) # try to revert to original type otherwise + except TypeError: + raise TypeError( + f"User-provided `key` function returned an invalid type {type(result)} \ + which could not be converted to {type(values)}." + ) + + return result + + +def get_flattened_list( + comp_ids: npt.NDArray[np.intp], + ngroups: int, + levels: Iterable[Index], + labels: Iterable[np.ndarray], +) -> list[tuple]: + """Map compressed group id -> key tuple.""" + comp_ids = comp_ids.astype(np.int64, copy=False) + arrays: DefaultDict[int, list[int]] = defaultdict(list) + for labs, level in zip(labels, levels): + table = hashtable.Int64HashTable(ngroups) + table.map_keys_to_values(comp_ids, labs.astype(np.int64, copy=False)) + for i in range(ngroups): + arrays[i].append(level[table.get_item(i)]) + return [tuple(array) for array in arrays.values()] + + +def get_indexer_dict( + label_list: list[np.ndarray], keys: list[Index] +) -> dict[Hashable, npt.NDArray[np.intp]]: + """ + Returns + ------- + dict: + Labels mapped to indexers. + """ + shape = tuple(len(x) for x in keys) + + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + if np.all(group_index == -1): + # Short-circuit, lib.indices_fast will return the same + return {} + ngroups = ( + ((group_index.size and group_index.max()) + 1) + if is_int64_overflow_possible(shape) + else np.prod(shape, dtype="i8") + ) + + sorter = get_group_index_sorter(group_index, ngroups) + + sorted_labels = [lab.take(sorter) for lab in label_list] + group_index = group_index.take(sorter) + + return lib.indices_fast(sorter, group_index, keys, sorted_labels) + + +# ---------------------------------------------------------------------- +# sorting levels...cleverly? + + +def get_group_index_sorter( + group_index: npt.NDArray[np.intp], ngroups: int | None = None +) -> npt.NDArray[np.intp]: + """ + algos.groupsort_indexer implements `counting sort` and it is at least + O(ngroups), where + ngroups = prod(shape) + shape = map(len, keys) + that is, linear in the number of combinations (cartesian product) of unique + values of groupby keys. This can be huge when doing multi-key groupby. + np.argsort(kind='mergesort') is O(count x log(count)) where count is the + length of the data-frame; + Both algorithms are `stable` sort and that is necessary for correctness of + groupby operations. e.g. consider: + df.groupby(key)[col].transform('first') + + Parameters + ---------- + group_index : np.ndarray[np.intp] + signed integer dtype + ngroups : int or None, default None + + Returns + ------- + np.ndarray[np.intp] + """ + if ngroups is None: + ngroups = 1 + group_index.max() + count = len(group_index) + alpha = 0.0 # taking complexities literally; there may be + beta = 1.0 # some room for fine-tuning these parameters + do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) + if do_groupsort: + sorter, _ = algos.groupsort_indexer( + ensure_platform_int(group_index), + ngroups, + ) + # sorter _should_ already be intp, but mypy is not yet able to verify + else: + sorter = group_index.argsort(kind="mergesort") + return ensure_platform_int(sorter) + + +def compress_group_index( + group_index: npt.NDArray[np.int64], sort: bool = True +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + """ + size_hint = len(group_index) + table = hashtable.Int64HashTable(size_hint) + + group_index = ensure_int64(group_index) + + # note, group labels come out ascending (ie, 1,2,3 etc) + comp_ids, obs_group_ids = table.get_labels_groupby(group_index) + + if sort and len(obs_group_ids) > 0: + obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) + + return ensure_int64(comp_ids), ensure_int64(obs_group_ids) + + +def _reorder_by_uniques( + uniques: npt.NDArray[np.int64], labels: npt.NDArray[np.intp] +) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.intp]]: + """ + Parameters + ---------- + uniques : np.ndarray[np.int64] + labels : np.ndarray[np.intp] + + Returns + ------- + np.ndarray[np.int64] + np.ndarray[np.intp] + """ + # sorter is index where elements ought to go + sorter = uniques.argsort() + + # reverse_indexer is where elements came from + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = labels < 0 + + # move labels to right locations (ie, unsort ascending labels) + labels = reverse_indexer.take(labels) + np.putmask(labels, mask, -1) + + # sort observed ids + uniques = uniques.take(sorter) + + return uniques, labels diff --git a/pandas/core/sparse/__init__.py b/pandas/core/sparse/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py new file mode 100644 index 00000000..2a324ebf --- /dev/null +++ b/pandas/core/sparse/api.py @@ -0,0 +1,6 @@ +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + +__all__ = ["SparseArray", "SparseDtype"] diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py new file mode 100644 index 00000000..28aba7c9 --- /dev/null +++ b/pandas/core/strings/__init__.py @@ -0,0 +1,33 @@ +""" +Implementation of pandas.Series.str and its interface. + +* strings.accessor.StringMethods : Accessor for Series.str +* strings.base.BaseStringArrayMethods: Mixin ABC for EAs to implement str methods + +Most methods on the StringMethods accessor follow the pattern: + + 1. extract the array from the series (or index) + 2. Call that array's implementation of the string method + 3. Wrap the result (in a Series, index, or DataFrame) + +Pandas extension arrays implementing string methods should inherit from +pandas.core.strings.base.BaseStringArrayMethods. This is an ABC defining +the various string methods. To avoid namespace clashes and pollution, +these are prefixed with `_str_`. So ``Series.str.upper()`` calls +``Series.array._str_upper()``. The interface isn't currently public +to other string extension arrays. +""" +# Pandas current implementation is in ObjectStringArrayMixin. This is designed +# to work on object-dtype ndarrays. +# +# BaseStringArrayMethods +# - ObjectStringArrayMixin +# - StringArray +# - PandasArray +# - Categorical +# - ArrowStringArray + +from pandas.core.strings.accessor import StringMethods +from pandas.core.strings.base import BaseStringArrayMethods + +__all__ = ["StringMethods", "BaseStringArrayMethods"] diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py new file mode 100644 index 00000000..7f50381d --- /dev/null +++ b/pandas/core/strings/accessor.py @@ -0,0 +1,3367 @@ +from __future__ import annotations + +import codecs +from functools import wraps +import re +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + cast, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + DtypeObj, + F, + Scalar, +) +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_integer, + is_list_like, + is_object_dtype, + is_re, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.base import NoNewAttributesMixin +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Index, + Series, + ) + +_shared_docs: dict[str, str] = {} +_cpython_optimized_encoders = ( + "utf-8", + "utf8", + "latin-1", + "latin1", + "iso-8859-1", + "mbcs", + "ascii", +) +_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32") + + +def forbid_nonstring_types( + forbidden: list[str] | None, name: str | None = None +) -> Callable[[F], F]: + """ + Decorator to forbid specific types for a method of StringMethods. + + For calling `.str.{method}` on a Series or Index, it is necessary to first + initialize the :class:`StringMethods` object, and then call the method. + However, different methods allow different input types, and so this can not + be checked during :meth:`StringMethods.__init__`, but must be done on a + per-method basis. This decorator exists to facilitate this process, and + make it explicit which (inferred) types are disallowed by the method. + + :meth:`StringMethods.__init__` allows the *union* of types its different + methods allow (after skipping NaNs; see :meth:`StringMethods._validate`), + namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']. + + The default string types ['string', 'empty'] are allowed for all methods. + For the additional types ['bytes', 'mixed', 'mixed-integer'], each method + then needs to forbid the types it is not intended for. + + Parameters + ---------- + forbidden : list-of-str or None + List of forbidden non-string types, may be one or more of + `['bytes', 'mixed', 'mixed-integer']`. + name : str, default None + Name of the method to use in the error message. By default, this is + None, in which case the name from the method being wrapped will be + copied. However, for working with further wrappers (like _pat_wrapper + and _noarg_wrapper), it is necessary to specify the name. + + Returns + ------- + func : wrapper + The method to which the decorator is applied, with an added check that + enforces the inferred type to not be in the list of forbidden types. + + Raises + ------ + TypeError + If the inferred type of the underlying data is in `forbidden`. + """ + # deal with None + forbidden = [] if forbidden is None else forbidden + + allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set( + forbidden + ) + + def _forbid_nonstring_types(func: F) -> F: + func_name = func.__name__ if name is None else name + + @wraps(func) + def wrapper(self, *args, **kwargs): + if self._inferred_dtype not in allowed_types: + msg = ( + f"Cannot use .str.{func_name} with values of " + f"inferred dtype '{self._inferred_dtype}'." + ) + raise TypeError(msg) + return func(self, *args, **kwargs) + + wrapper.__name__ = func_name + return cast(F, wrapper) + + return _forbid_nonstring_types + + +def _map_and_wrap(name, docstring): + @forbid_nonstring_types(["bytes"], name=name) + def wrapper(self): + result = getattr(self._data.array, f"_str_{name}")() + return self._wrap_result(result) + + wrapper.__doc__ = docstring + return wrapper + + +class StringMethods(NoNewAttributesMixin): + """ + Vectorized string functions for Series and Index. + + NAs stay NA unless handled otherwise by a particular method. + Patterned after Python's string methods, with some inspiration from + R's stringr package. + + Examples + -------- + >>> s = pd.Series(["A_Str_Series"]) + >>> s + 0 A_Str_Series + dtype: object + + >>> s.str.split("_") + 0 [A, Str, Series] + dtype: object + + >>> s.str.replace("_", "") + 0 AStrSeries + dtype: object + """ + + # Note: see the docstring in pandas.core.strings.__init__ + # for an explanation of the implementation. + # TODO: Dispatch all the methods + # Currently the following are not dispatched to the array + # * cat + # * extractall + + def __init__(self, data) -> None: + from pandas.core.arrays.string_ import StringDtype + + self._inferred_dtype = self._validate(data) + self._is_categorical = is_categorical_dtype(data.dtype) + self._is_string = isinstance(data.dtype, StringDtype) + self._data = data + + self._index = self._name = None + if isinstance(data, ABCSeries): + self._index = data.index + self._name = data.name + + # ._values.categories works for both Series/Index + self._parent = data._values.categories if self._is_categorical else data + # save orig to blow up categoricals to the right type + self._orig = data + self._freeze() + + @staticmethod + def _validate(data): + """ + Auxiliary function for StringMethods, infers and checks dtype of data. + + This is a "first line of defence" at the creation of the StringMethods- + object, and just checks that the dtype is in the + *union* of the allowed types over all string methods below; this + restriction is then refined on a per-method basis using the decorator + @forbid_nonstring_types (more info in the corresponding docstring). + + This really should exclude all series/index with any non-string values, + but that isn't practical for performance reasons until we have a str + dtype (GH 9343 / 13877) + + Parameters + ---------- + data : The content of the Series + + Returns + ------- + dtype : inferred dtype of data + """ + if isinstance(data, ABCMultiIndex): + raise AttributeError( + "Can only use .str accessor with Index, not MultiIndex" + ) + + # see _libs/lib.pyx for list of inferred types + allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] + + data = extract_array(data) + + values = getattr(data, "categories", data) # categorical / normal + + inferred_dtype = lib.infer_dtype(values, skipna=True) + + if inferred_dtype not in allowed_types: + raise AttributeError("Can only use .str accessor with string values!") + return inferred_dtype + + def __getitem__(self, key): + result = self._data.array._str_getitem(key) + return self._wrap_result(result) + + def __iter__(self): + warnings.warn( + "Columnar iteration over characters will be deprecated in future releases.", + FutureWarning, + stacklevel=find_stack_level(), + ) + i = 0 + g = self.get(i) + while g.notna().any(): + yield g + i += 1 + g = self.get(i) + + def _wrap_result( + self, + result, + name=None, + expand: bool | None = None, + fill_value=np.nan, + returns_string=True, + returns_bool: bool = False, + ): + from pandas import ( + Index, + MultiIndex, + ) + + if not hasattr(result, "ndim") or not hasattr(result, "dtype"): + if isinstance(result, ABCDataFrame): + result = result.__finalize__(self._orig, name="str") + return result + assert result.ndim < 3 + + # We can be wrapping a string / object / categorical result, in which + # case we'll want to return the same dtype as the input. + # Or we can be wrapping a numeric output, in which case we don't want + # to return a StringArray. + # Ideally the array method returns the right array type. + if expand is None: + # infer from ndim if expand is not specified + expand = result.ndim != 1 + + elif ( + expand is True + and is_object_dtype(result) + and not isinstance(self._orig, ABCIndex) + ): + # required when expand=True is explicitly specified + # not needed when inferred + + def cons_row(x): + if is_list_like(x): + return x + else: + return [x] + + result = [cons_row(x) for x in result] + if result and not self._is_string: + # propagate nan values to match longest sequence (GH 18450) + max_len = max(len(x) for x in result) + result = [ + x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result + ] + + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + + if expand is False: + # if expand is False, result should have the same name + # as the original otherwise specified + if name is None: + name = getattr(result, "name", None) + if name is None: + # do not use logical or, _orig may be a DataFrame + # which has "name" column + name = self._orig.name + + # Wait until we are sure result is a Series or Index before + # checking attributes (GH 12180) + if isinstance(self._orig, ABCIndex): + # if result is a boolean np.array, return the np.array + # instead of wrapping it into a boolean Index (GH 8875) + if is_bool_dtype(result): + return result + + if expand: + result = list(result) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. + out = out.get_level_values(0) + return out + else: + return Index._with_infer(result, name=name) + else: + index = self._orig.index + # This is a mess. + dtype: DtypeObj | str | None + vdtype = getattr(result, "dtype", None) + if self._is_string: + if is_bool_dtype(vdtype): + dtype = result.dtype + elif returns_string: + dtype = self._orig.dtype + else: + dtype = vdtype + else: + dtype = vdtype + + if expand: + cons = self._orig._constructor_expanddim + result = cons(result, columns=name, index=index, dtype=dtype) + else: + # Must be a Series + cons = self._orig._constructor + result = cons(result, name=name, index=index, dtype=dtype) + result = result.__finalize__(self._orig, method="str") + if name is not None and result.ndim == 1: + # __finalize__ might copy over the original name, but we may + # want the new name (e.g. str.extract). + result.name = name + return result + + def _get_series_list(self, others): + """ + Auxiliary function for :meth:`str.cat`. Turn potentially mixed input + into a list of Series (elements without an index must match the length + of the calling Series/Index). + + Parameters + ---------- + others : Series, DataFrame, np.ndarray, list-like or list-like of + Objects that are either Series, Index or np.ndarray (1-dim). + + Returns + ------- + list of Series + Others transformed into list of Series. + """ + from pandas import ( + DataFrame, + Series, + ) + + # self._orig is either Series or Index + idx = self._orig if isinstance(self._orig, ABCIndex) else self._orig.index + + # Generally speaking, all objects without an index inherit the index + # `idx` of the calling Series/Index - i.e. must have matching length. + # Objects with an index (i.e. Series/Index/DataFrame) keep their own. + if isinstance(others, ABCSeries): + return [others] + elif isinstance(others, ABCIndex): + return [Series(others._values, index=idx, dtype=others.dtype)] + elif isinstance(others, ABCDataFrame): + return [others[x] for x in others] + elif isinstance(others, np.ndarray) and others.ndim == 2: + others = DataFrame(others, index=idx) + return [others[x] for x in others] + elif is_list_like(others, allow_sets=False): + others = list(others) # ensure iterators do not get read twice etc + + # in case of list-like `others`, all elements must be + # either Series/Index/np.ndarray (1-dim)... + if all( + isinstance(x, (ABCSeries, ABCIndex)) + or (isinstance(x, np.ndarray) and x.ndim == 1) + for x in others + ): + los: list[Series] = [] + while others: # iterate through list and append each element + los = los + self._get_series_list(others.pop(0)) + return los + # ... or just strings + elif all(not is_list_like(x) for x in others): + return [Series(others, index=idx)] + raise TypeError( + "others must be Series, Index, DataFrame, np.ndarray " + "or list-like (either containing only strings or " + "containing only objects of type Series/Index/" + "np.ndarray[1-dim])" + ) + + @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"]) + def cat( + self, others=None, sep=None, na_rep=None, join="left" + ) -> str | Series | Index: + """ + Concatenate strings in the Series/Index with given separator. + + If `others` is specified, this function concatenates the Series/Index + and elements of `others` element-wise. + If `others` is not passed, then all values in the Series/Index are + concatenated into a single string with a given `sep`. + + Parameters + ---------- + others : Series, Index, DataFrame, np.ndarray or list-like + Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and + other list-likes of strings must have the same length as the + calling Series/Index, with the exception of indexed objects (i.e. + Series/Index/DataFrame) if `join` is not None. + + If others is a list-like that contains a combination of Series, + Index or np.ndarray (1-dim), then all elements will be unpacked and + must satisfy the above criteria individually. + + If others is None, the method returns the concatenation of all + strings in the calling Series/Index. + sep : str, default '' + The separator between the different elements/columns. By default + the empty string `''` is used. + na_rep : str or None, default None + Representation that is inserted for all missing values: + + - If `na_rep` is None, and `others` is None, missing values in the + Series/Index are omitted from the result. + - If `na_rep` is None, and `others` is not None, a row containing a + missing value in any of the columns (before concatenation) will + have a missing value in the result. + join : {'left', 'right', 'outer', 'inner'}, default 'left' + Determines the join-style between the calling Series/Index and any + Series/Index/DataFrame in `others` (objects without an index need + to match the length of the calling Series/Index). To disable + alignment, use `.values` on any Series/Index/DataFrame in `others`. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + Changed default of `join` from None to `'left'`. + + Returns + ------- + str, Series or Index + If `others` is None, `str` is returned, otherwise a `Series/Index` + (same type as caller) of objects is returned. + + See Also + -------- + split : Split each string in the Series/Index. + join : Join lists contained as elements in the Series/Index. + + Examples + -------- + When not passing `others`, all values are concatenated into a single + string: + + >>> s = pd.Series(['a', 'b', np.nan, 'd']) + >>> s.str.cat(sep=' ') + 'a b d' + + By default, NA values in the Series are ignored. Using `na_rep`, they + can be given a representation: + + >>> s.str.cat(sep=' ', na_rep='?') + 'a b ? d' + + If `others` is specified, corresponding values are concatenated with + the separator. Result will be a Series of strings. + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',') + 0 a,A + 1 b,B + 2 NaN + 3 d,D + dtype: object + + Missing values will remain missing in the result, but can again be + represented using `na_rep` + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-') + 0 a,A + 1 b,B + 2 -,C + 3 d,D + dtype: object + + If `sep` is not specified, the values are concatenated without + separation. + + >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-') + 0 aA + 1 bB + 2 -C + 3 dD + dtype: object + + Series with different indexes can be aligned before concatenation. The + `join`-keyword works as in other methods. + + >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) + >>> s.str.cat(t, join='left', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='outer', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + 4 -e + dtype: object + >>> + >>> s.str.cat(t, join='inner', na_rep='-') + 0 aa + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='right', na_rep='-') + 3 dd + 0 aa + 4 -e + 2 -c + dtype: object + + For more examples, see :ref:`here `. + """ + # TODO: dispatch + from pandas import ( + Index, + Series, + concat, + ) + + if isinstance(others, str): + raise ValueError("Did you mean to supply a `sep` keyword?") + if sep is None: + sep = "" + + if isinstance(self._orig, ABCIndex): + data = Series(self._orig, index=self._orig, dtype=self._orig.dtype) + else: # Series + data = self._orig + + # concatenate Series/Index with itself if no "others" + if others is None: + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Series") + data = ensure_object(data) # type: ignore[assignment] + na_mask = isna(data) + if na_rep is None and na_mask.any(): + return sep.join(data[~na_mask]) + elif na_rep is not None and na_mask.any(): + return sep.join(np.where(na_mask, na_rep, data)) + else: + return sep.join(data) + + try: + # turn anything in "others" into lists of Series + others = self._get_series_list(others) + except ValueError as err: # do not catch TypeError raised by _get_series_list + raise ValueError( + "If `others` contains arrays or lists (or other " + "list-likes without an index), these must all be " + "of the same length as the calling Series/Index." + ) from err + + # align if required + if any(not data.index.equals(x.index) for x in others): + # Need to add keys for uniqueness in case of duplicate columns + others = concat( + others, + axis=1, + join=(join if join == "inner" else "outer"), + keys=range(len(others)), + sort=False, + copy=False, + ) + data, others = data.align(others, join=join) + others = [others[x] for x in others] # again list of Series + + all_cols = [ensure_object(x) for x in [data] + others] + na_masks = np.array([isna(x) for x in all_cols]) + union_mask = np.logical_or.reduce(na_masks, axis=0) + + if na_rep is None and union_mask.any(): + # no na_rep means NaNs for all rows where any column has a NaN + # only necessary if there are actually any NaNs + result = np.empty(len(data), dtype=object) + np.putmask(result, union_mask, np.nan) + + not_masked = ~union_mask + result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep) + elif na_rep is not None and union_mask.any(): + # fill NaNs with na_rep in case there are actually any NaNs + all_cols = [ + np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols) + ] + result = cat_safe(all_cols, sep) + else: + # no NaNs - can just concatenate + result = cat_safe(all_cols, sep) + + out: Index | Series + if isinstance(self._orig, ABCIndex): + # add dtype for case that result is all-NA + + out = Index(result, dtype=object, name=self._orig.name) + else: # Series + if is_categorical_dtype(self._orig.dtype): + # We need to infer the new categories. + dtype = None + else: + dtype = self._orig.dtype + res_ser = Series( + result, dtype=dtype, index=data.index, name=self._orig.name + ) + out = res_ser.__finalize__(self._orig, method="str_cat") + return out + + _shared_docs[ + "str_split" + ] = r""" + Split strings around given separator/delimiter. + + Splits the string in the Series/Index from the %(side)s, + at the specified delimiter string. + + Parameters + ---------- + pat : str%(pat_regex)s, optional + %(pat_description)s. + If not specified, split on whitespace. + n : int, default -1 (all) + Limit number of splits in output. + ``None``, 0 and -1 will be interpreted as return all splits. + expand : bool, default False + Expand the split strings into separate columns. + + - If ``True``, return DataFrame/MultiIndex expanding dimensionality. + - If ``False``, return Series/Index, containing lists of strings. + %(regex_argument)s + Returns + ------- + Series, Index, DataFrame or MultiIndex + Type matches caller unless ``expand=True`` (see Notes). + %(raises_split)s + See Also + -------- + Series.str.split : Split strings around given separator/delimiter. + Series.str.rsplit : Splits string around given separator/delimiter, + starting from the right. + Series.str.join : Join lists contained as elements in the Series/Index + with passed delimiter. + str.split : Standard library version for split. + str.rsplit : Standard library version for rsplit. + + Notes + ----- + The handling of the `n` keyword depends on the number of found splits: + + - If found splits > `n`, make first `n` splits only + - If found splits <= `n`, make all splits + - If for a certain row the number of found splits < `n`, + append `None` for padding up to `n` if ``expand=True`` + + If using ``expand=True``, Series and Index callers return DataFrame and + MultiIndex objects, respectively. + %(regex_pat_note)s + Examples + -------- + >>> s = pd.Series( + ... [ + ... "this is a regular sentence", + ... "https://docs.python.org/3/tutorial/index.html", + ... np.nan + ... ] + ... ) + >>> s + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html + 2 NaN + dtype: object + + In the default setting, the string is split by whitespace. + + >>> s.str.split() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + Without the `n` parameter, the outputs of `rsplit` and `split` + are identical. + + >>> s.str.rsplit() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `n` parameter can be used to limit the number of splits on the + delimiter. The outputs of `split` and `rsplit` are different. + + >>> s.str.split(n=2) + 0 [this, is, a regular sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + >>> s.str.rsplit(n=2) + 0 [this is a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `pat` parameter can be used to split by other characters. + + >>> s.str.split(pat="/") + 0 [this is a regular sentence] + 1 [https:, , docs.python.org, 3, tutorial, index... + 2 NaN + dtype: object + + When using ``expand=True``, the split elements will expand out into + separate columns. If NaN is present, it is propagated throughout + the columns during the split. + + >>> s.str.split(expand=True) + 0 1 2 3 4 + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html None None None None + 2 NaN NaN NaN NaN NaN + + For slightly more complex use cases like splitting the html document name + from a url, a combination of parameter settings can be used. + + >>> s.str.rsplit("/", n=1, expand=True) + 0 1 + 0 this is a regular sentence None + 1 https://docs.python.org/3/tutorial index.html + 2 NaN NaN + %(regex_examples)s""" + + @Appender( + _shared_docs["str_split"] + % { + "side": "beginning", + "pat_regex": " or compiled regex", + "pat_description": "String or regular expression to split on", + "regex_argument": """ + regex : bool, default None + Determines if the passed-in pattern is a regular expression: + + - If ``True``, assumes the passed-in pattern is a regular expression + - If ``False``, treats the pattern as a literal string. + - If ``None`` and `pat` length is 1, treats `pat` as a literal string. + - If ``None`` and `pat` length is not 1, treats `pat` as a regular expression. + - Cannot be set to False if `pat` is a compiled regex + + .. versionadded:: 1.4.0 + """, + "raises_split": """ + Raises + ------ + ValueError + * if `regex` is False and `pat` is a compiled regex + """, + "regex_pat_note": """ + Use of `regex =False` with a `pat` as a compiled regex will raise an error. + """, + "method": "split", + "regex_examples": r""" + Remember to escape special characters when explicitly using regular expressions. + + >>> s = pd.Series(["foo and bar plus baz"]) + >>> s.str.split(r"and|plus", expand=True) + 0 1 2 + 0 foo bar baz + + Regular expressions can be used to handle urls or file names. + When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled + as a regex only if ``len(pat) != 1``. + + >>> s = pd.Series(['foojpgbar.jpg']) + >>> s.str.split(r".", expand=True) + 0 1 + 0 foojpgbar jpg + + >>> s.str.split(r"\.jpg", expand=True) + 0 1 + 0 foojpgbar + + When ``regex=True``, `pat` is interpreted as a regex + + >>> s.str.split(r"\.jpg", regex=True, expand=True) + 0 1 + 0 foojpgbar + + A compiled regex can be passed as `pat` + + >>> import re + >>> s.str.split(re.compile(r"\.jpg"), expand=True) + 0 1 + 0 foojpgbar + + When ``regex=False``, `pat` is interpreted as the string itself + + >>> s.str.split(r"\.jpg", regex=False, expand=True) + 0 + 0 foojpgbar.jpg + """, + } + ) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "pat"]) + @forbid_nonstring_types(["bytes"]) + def split( + self, + pat: str | re.Pattern | None = None, + n=-1, + expand=False, + *, + regex: bool | None = None, + ): + if regex is False and is_re(pat): + raise ValueError( + "Cannot use a compiled regex as replacement pattern with regex=False" + ) + if is_re(pat): + regex = True + result = self._data.array._str_split(pat, n, expand, regex) + return self._wrap_result(result, returns_string=expand, expand=expand) + + @Appender( + _shared_docs["str_split"] + % { + "side": "end", + "pat_regex": "", + "pat_description": "String to split on", + "regex_argument": "", + "raises_split": "", + "regex_pat_note": "", + "method": "rsplit", + "regex_examples": "", + } + ) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "pat"]) + @forbid_nonstring_types(["bytes"]) + def rsplit(self, pat=None, n=-1, expand=False): + result = self._data.array._str_rsplit(pat, n=n) + return self._wrap_result(result, expand=expand, returns_string=expand) + + _shared_docs[ + "str_partition" + ] = """ + Split the string at the %(side)s occurrence of `sep`. + + This method splits the string at the %(side)s occurrence of `sep`, + and returns 3 elements containing the part before the separator, + the separator itself, and the part after the separator. + If the separator is not found, return %(return)s. + + Parameters + ---------- + sep : str, default whitespace + String to split on. + expand : bool, default True + If True, return DataFrame/MultiIndex expanding dimensionality. + If False, return Series/Index. + + Returns + ------- + DataFrame/MultiIndex or Series/Index of objects + + See Also + -------- + %(also)s + Series.str.split : Split strings around given separators. + str.partition : Standard library version. + + Examples + -------- + + >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers']) + >>> s + 0 Linda van der Berg + 1 George Pitt-Rivers + dtype: object + + >>> s.str.partition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by the last space instead of the first one: + + >>> s.str.rpartition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by something different than a space: + + >>> s.str.partition('-') + 0 1 2 + 0 Linda van der Berg + 1 George Pitt - Rivers + + To return a Series containing tuples instead of a DataFrame: + + >>> s.str.partition('-', expand=False) + 0 (Linda van der Berg, , ) + 1 (George Pitt, -, Rivers) + dtype: object + + Also available on indices: + + >>> idx = pd.Index(['X 123', 'Y 999']) + >>> idx + Index(['X 123', 'Y 999'], dtype='object') + + Which will create a MultiIndex: + + >>> idx.str.partition() + MultiIndex([('X', ' ', '123'), + ('Y', ' ', '999')], + ) + + Or an index with tuples with ``expand=False``: + + >>> idx.str.partition(expand=False) + Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object') + """ + + @Appender( + _shared_docs["str_partition"] + % { + "side": "first", + "return": "3 elements containing the string itself, followed by two " + "empty strings", + "also": "rpartition : Split the string at the last occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def partition(self, sep=" ", expand=True): + result = self._data.array._str_partition(sep, expand) + return self._wrap_result(result, expand=expand, returns_string=expand) + + @Appender( + _shared_docs["str_partition"] + % { + "side": "last", + "return": "3 elements containing two empty strings, followed by the " + "string itself", + "also": "partition : Split the string at the first occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rpartition(self, sep=" ", expand=True): + result = self._data.array._str_rpartition(sep, expand) + return self._wrap_result(result, expand=expand, returns_string=expand) + + def get(self, i): + """ + Extract element from each component at specified position or with specified key. + + Extract element from lists, tuples, dict, or strings in each element in the + Series/Index. + + Parameters + ---------- + i : int or hashable dict label + Position or key of element to extract. + + Returns + ------- + Series or Index + + Examples + -------- + >>> s = pd.Series(["String", + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}]) + >>> s + 0 String + 1 (1, 2, 3) + 2 [a, b, c] + 3 123 + 4 -456 + 5 {1: 'Hello', '2': 'World'} + dtype: object + + >>> s.str.get(1) + 0 t + 1 2 + 2 b + 3 NaN + 4 NaN + 5 Hello + dtype: object + + >>> s.str.get(-1) + 0 g + 1 3 + 2 c + 3 NaN + 4 NaN + 5 None + dtype: object + + Return element with given key + + >>> s = pd.Series([{"name": "Hello", "value": "World"}, + ... {"name": "Goodbye", "value": "Planet"}]) + >>> s.str.get('name') + 0 Hello + 1 Goodbye + dtype: object + """ + result = self._data.array._str_get(i) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def join(self, sep): + """ + Join lists contained as elements in the Series/Index with passed delimiter. + + If the elements of a Series are lists themselves, join the content of these + lists using the delimiter passed to the function. + This function is an equivalent to :meth:`str.join`. + + Parameters + ---------- + sep : str + Delimiter to use between list entries. + + Returns + ------- + Series/Index: object + The list entries concatenated by intervening occurrences of the + delimiter. + + Raises + ------ + AttributeError + If the supplied Series contains neither strings nor lists. + + See Also + -------- + str.join : Standard library version of this method. + Series.str.split : Split strings around given separator/delimiter. + + Notes + ----- + If any of the list items is not a string object, the result of the join + will be `NaN`. + + Examples + -------- + Example with a list that contains non-string elements. + + >>> s = pd.Series([['lion', 'elephant', 'zebra'], + ... [1.1, 2.2, 3.3], + ... ['cat', np.nan, 'dog'], + ... ['cow', 4.5, 'goat'], + ... ['duck', ['swan', 'fish'], 'guppy']]) + >>> s + 0 [lion, elephant, zebra] + 1 [1.1, 2.2, 3.3] + 2 [cat, nan, dog] + 3 [cow, 4.5, goat] + 4 [duck, [swan, fish], guppy] + dtype: object + + Join all lists using a '-'. The lists containing object(s) of types other + than str will produce a NaN. + + >>> s.str.join('-') + 0 lion-elephant-zebra + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: object + """ + result = self._data.array._str_join(sep) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def contains(self, pat, case=True, flags=0, na=None, regex=True): + r""" + Test if pattern or regex is contained within a string of a Series or Index. + + Return boolean Series or Index based on whether a given pattern or regex is + contained within a string of a Series or Index. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Flags to pass through to the re module, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + regex : bool, default True + If True, assumes the pat is a regular expression. + + If False, treats the pat as a literal string. + + Returns + ------- + Series or Index of boolean values + A Series or Index of boolean values indicating whether the + given pattern is contained within the string of each element + of the Series or Index. + + See Also + -------- + match : Analogous, but stricter, relying on re.match instead of re.search. + Series.str.startswith : Test if the start of each string element matches a + pattern. + Series.str.endswith : Same as startswith, but tests the end of string. + + Examples + -------- + Returning a Series of booleans using only a literal pattern. + + >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN]) + >>> s1.str.contains('og', regex=False) + 0 False + 1 True + 2 False + 3 False + 4 NaN + dtype: object + + Returning an Index of booleans using only a literal pattern. + + >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN]) + >>> ind.str.contains('23', regex=False) + Index([False, False, False, True, nan], dtype='object') + + Specifying case sensitivity using `case`. + + >>> s1.str.contains('oG', case=True, regex=True) + 0 False + 1 False + 2 False + 3 False + 4 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN` replaces NaN values + with `False`. If Series or Index does not contain NaN values + the resultant dtype will be `bool`, otherwise, an `object` dtype. + + >>> s1.str.contains('og', na=False, regex=True) + 0 False + 1 True + 2 False + 3 False + 4 False + dtype: bool + + Returning 'house' or 'dog' when either expression occurs in a string. + + >>> s1.str.contains('house|dog', regex=True) + 0 False + 1 True + 2 True + 3 False + 4 NaN + dtype: object + + Ignoring case sensitivity using `flags` with regex. + + >>> import re + >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True) + 0 False + 1 False + 2 True + 3 False + 4 NaN + dtype: object + + Returning any digit using regular expression. + + >>> s1.str.contains('\\d', regex=True) + 0 False + 1 False + 2 False + 3 True + 4 NaN + dtype: object + + Ensure `pat` is a not a literal pattern when `regex` is set to True. + Note in the following example one might expect only `s2[1]` and `s2[3]` to + return `True`. However, '.0' as a regex matches any character + followed by a 0. + + >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) + >>> s2.str.contains('.0', regex=True) + 0 True + 1 True + 2 False + 3 True + 4 False + dtype: bool + """ + if regex and re.compile(pat).groups: + warnings.warn( + "This pattern is interpreted as a regular expression, and has " + "match groups. To actually get the groups, use str.extract.", + UserWarning, + stacklevel=find_stack_level(), + ) + + result = self._data.array._str_contains(pat, case, flags, na, regex) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def match(self, pat, case=True, flags=0, na=None): + """ + Determine if each string starts with a match of a regular expression. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + + Returns + ------- + Series/Index/array of boolean values + + See Also + -------- + fullmatch : Stricter matching that requires the entire string to match. + contains : Analogous, but less strict, relying on re.search instead of + re.match. + extract : Extract matched groups. + """ + result = self._data.array._str_match(pat, case=case, flags=flags, na=na) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def fullmatch(self, pat, case=True, flags=0, na=None): + """ + Determine if each string entirely matches a regular expression. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : scalar, optional + Fill value for missing values. The default depends on dtype of the + array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``, + ``pandas.NA`` is used. + + Returns + ------- + Series/Index/array of boolean values + + See Also + -------- + match : Similar, but also returns `True` when only a *prefix* of the string + matches the regular expression. + extract : Extract matched groups. + """ + result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool | None = None, + flags: int = 0, + regex: bool | None = None, + ): + r""" + Replace each occurrence of pattern/regex in the Series/Index. + + Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on + the regex value. + + Parameters + ---------- + pat : str or compiled regex + String can be a character sequence or regular expression. + repl : str or callable + Replacement string or a callable. The callable is passed the regex + match object and must return a replacement string to be used. + See :func:`re.sub`. + n : int, default -1 (all) + Number of replacements to make from start. + case : bool, default None + Determines if replace is case sensitive: + + - If True, case sensitive (the default if `pat` is a string) + - Set to False for case insensitive + - Cannot be set if `pat` is a compiled regex. + + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled + regex. + regex : bool, default True + Determines if the passed-in pattern is a regular expression: + + - If True, assumes the passed-in pattern is a regular expression. + - If False, treats the pattern as a literal string + - Cannot be set to False if `pat` is a compiled regex or `repl` is + a callable. + + .. versionadded:: 0.23.0 + + Returns + ------- + Series or Index of object + A copy of the object with all matching occurrences of `pat` replaced by + `repl`. + + Raises + ------ + ValueError + * if `regex` is False and `repl` is a callable or `pat` is a compiled + regex + * if `pat` is a compiled regex and `case` or `flags` is set + + Notes + ----- + When `pat` is a compiled regex, all flags should be included in the + compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled + regex will raise an error. + + Examples + -------- + When `pat` is a string and `regex` is True (the default), the given `pat` + is compiled as a regex. When `repl` is a string, it replaces matching + regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are + left as is: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True) + 0 bao + 1 baz + 2 NaN + dtype: object + + When `pat` is a string and `regex` is False, every `pat` is replaced with + `repl` as with :meth:`str.replace`: + + >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False) + 0 bao + 1 fuz + 2 NaN + dtype: object + + When `repl` is a callable, it is called on every `pat` using + :func:`re.sub`. The callable should expect one positional argument + (a regex object) and return a string. + + To get the idea: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr, regex=True) + 0 oo + 1 uz + 2 NaN + dtype: object + + Reverse every lowercase alphabetic word: + + >>> repl = lambda m: m.group(0)[::-1] + >>> ser = pd.Series(['foo 123', 'bar baz', np.nan]) + >>> ser.str.replace(r'[a-z]+', repl, regex=True) + 0 oof 123 + 1 rab zab + 2 NaN + dtype: object + + Using regex groups (extract second group and swap case): + + >>> pat = r"(?P\w+) (?P\w+) (?P\w+)" + >>> repl = lambda m: m.group('two').swapcase() + >>> ser = pd.Series(['One Two Three', 'Foo Bar Baz']) + >>> ser.str.replace(pat, repl, regex=True) + 0 tWO + 1 bAR + dtype: object + + Using a compiled regex with flags + + >>> import re + >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar', regex=True) + 0 foo + 1 bar + 2 NaN + dtype: object + """ + if regex is None: + if isinstance(pat, str) and any(c in pat for c in ".+*|^$?[](){}\\"): + # warn only in cases where regex behavior would differ from literal + msg = ( + "The default value of regex will change from True to False " + "in a future version." + ) + if len(pat) == 1: + msg += ( + " In addition, single character regular expressions will " + "*not* be treated as literal strings when regex=True." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + + # Check whether repl is valid (GH 13438, GH 15055) + if not (isinstance(repl, str) or callable(repl)): + raise TypeError("repl must be a string or callable") + + is_compiled_re = is_re(pat) + if regex or regex is None: + if is_compiled_re and (case is not None or flags != 0): + raise ValueError( + "case and flags cannot be set when pat is a compiled regex" + ) + + elif is_compiled_re: + raise ValueError( + "Cannot use a compiled regex as replacement pattern with regex=False" + ) + elif callable(repl): + raise ValueError("Cannot use a callable replacement when regex=False") + + # The current behavior is to treat single character patterns as literal strings, + # even when ``regex`` is set to ``True``. + if isinstance(pat, str) and len(pat) == 1: + regex = False + + if regex is None: + regex = True + + if case is None: + case = True + + result = self._data.array._str_replace( + pat, repl, n=n, case=case, flags=flags, regex=regex + ) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def repeat(self, repeats): + """ + Duplicate each string in the Series or Index. + + Parameters + ---------- + repeats : int or sequence of int + Same value for all (int) or different value per (sequence). + + Returns + ------- + Series or Index of object + Series or Index of repeated string objects specified by + input parameter repeats. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + + Single int repeats string in Series + + >>> s.str.repeat(repeats=2) + 0 aa + 1 bb + 2 cc + dtype: object + + Sequence of int repeats corresponding string in Series + + >>> s.str.repeat(repeats=[1, 2, 3]) + 0 a + 1 bb + 2 ccc + dtype: object + """ + result = self._data.array._str_repeat(repeats) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def pad(self, width, side="left", fillchar=" "): + """ + Pad strings in the Series/Index up to width. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with character defined in `fillchar`. + side : {'left', 'right', 'both'}, default 'left' + Side from which to fill resulting string. + fillchar : str, default ' ' + Additional character for filling, default is whitespace. + + Returns + ------- + Series or Index of object + Returns Series or Index with minimum number of char in object. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='left')``. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='right')``. + Series.str.center : Fills both sides of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='both')``. + Series.str.zfill : Pad strings in the Series/Index by prepending '0' + character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``. + + Examples + -------- + >>> s = pd.Series(["caribou", "tiger"]) + >>> s + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10) + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10, side='right', fillchar='-') + 0 caribou--- + 1 tiger----- + dtype: object + + >>> s.str.pad(width=10, side='both', fillchar='-') + 0 -caribou-- + 1 --tiger--- + dtype: object + """ + if not isinstance(fillchar, str): + msg = f"fillchar must be a character, not {type(fillchar).__name__}" + raise TypeError(msg) + + if len(fillchar) != 1: + raise TypeError("fillchar must be a character, not str") + + if not is_integer(width): + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) + + result = self._data.array._str_pad(width, side=side, fillchar=fillchar) + return self._wrap_result(result) + + _shared_docs[ + "str_pad" + ] = """ + Pad %(side)s side of strings in the Series/Index. + + Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with ``fillchar``. + fillchar : str + Additional character for filling, default is whitespace. + + Returns + ------- + filled : Series/Index of objects. + """ + + @Appender(_shared_docs["str_pad"] % {"side": "left and right", "method": "center"}) + @forbid_nonstring_types(["bytes"]) + def center(self, width, fillchar=" "): + return self.pad(width, side="both", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % {"side": "right", "method": "ljust"}) + @forbid_nonstring_types(["bytes"]) + def ljust(self, width, fillchar=" "): + return self.pad(width, side="right", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % {"side": "left", "method": "rjust"}) + @forbid_nonstring_types(["bytes"]) + def rjust(self, width, fillchar=" "): + return self.pad(width, side="left", fillchar=fillchar) + + @forbid_nonstring_types(["bytes"]) + def zfill(self, width): + """ + Pad strings in the Series/Index by prepending '0' characters. + + Strings in the Series/Index are padded with '0' characters on the + left of the string to reach a total string length `width`. Strings + in the Series/Index with length greater or equal to `width` are + unchanged. + + Parameters + ---------- + width : int + Minimum length of resulting string; strings with length less + than `width` be prepended with '0' characters. + + Returns + ------- + Series/Index of objects. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. + Series.str.pad : Fills the specified sides of strings with an arbitrary + character. + Series.str.center : Fills both sides of strings with an arbitrary + character. + + Notes + ----- + Differs from :meth:`str.zfill` which has special handling + for '+'/'-' in the string. + + Examples + -------- + >>> s = pd.Series(['-1', '1', '1000', 10, np.nan]) + >>> s + 0 -1 + 1 1 + 2 1000 + 3 10 + 4 NaN + dtype: object + + Note that ``10`` and ``NaN`` are not strings, therefore they are + converted to ``NaN``. The minus sign in ``'-1'`` is treated as a + special character and the zero is added to the right of it + (:meth:`str.zfill` would have moved it to the left). ``1000`` + remains unchanged as it is longer than `width`. + + >>> s.str.zfill(3) + 0 -01 + 1 001 + 2 1000 + 3 NaN + 4 NaN + dtype: object + """ + if not is_integer(width): + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) + f = lambda x: x.zfill(width) + result = self._data.array._str_map(f) + return self._wrap_result(result) + + def slice(self, start=None, stop=None, step=None): + """ + Slice substrings from each element in the Series or Index. + + Parameters + ---------- + start : int, optional + Start position for slice operation. + stop : int, optional + Stop position for slice operation. + step : int, optional + Step size for slice operation. + + Returns + ------- + Series or Index of object + Series or Index from sliced substring from original string object. + + See Also + -------- + Series.str.slice_replace : Replace a slice with a string. + Series.str.get : Return element at position. + Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i` + being the position. + + Examples + -------- + >>> s = pd.Series(["koala", "dog", "chameleon"]) + >>> s + 0 koala + 1 dog + 2 chameleon + dtype: object + + >>> s.str.slice(start=1) + 0 oala + 1 og + 2 hameleon + dtype: object + + >>> s.str.slice(start=-1) + 0 a + 1 g + 2 n + dtype: object + + >>> s.str.slice(stop=2) + 0 ko + 1 do + 2 ch + dtype: object + + >>> s.str.slice(step=2) + 0 kaa + 1 dg + 2 caeen + dtype: object + + >>> s.str.slice(start=0, stop=5, step=3) + 0 kl + 1 d + 2 cm + dtype: object + + Equivalent behaviour to: + + >>> s.str[0:5:3] + 0 kl + 1 d + 2 cm + dtype: object + """ + result = self._data.array._str_slice(start, stop, step) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def slice_replace(self, start=None, stop=None, repl=None): + """ + Replace a positional slice of a string with another value. + + Parameters + ---------- + start : int, optional + Left index position to use for the slice. If not specified (None), + the slice is unbounded on the left, i.e. slice from the start + of the string. + stop : int, optional + Right index position to use for the slice. If not specified (None), + the slice is unbounded on the right, i.e. slice until the + end of the string. + repl : str, optional + String for replacement. If not specified (None), the sliced region + is replaced with an empty string. + + Returns + ------- + Series or Index + Same type as the original object. + + See Also + -------- + Series.str.slice : Just slicing without replacement. + + Examples + -------- + >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde']) + >>> s + 0 a + 1 ab + 2 abc + 3 abdc + 4 abcde + dtype: object + + Specify just `start`, meaning replace `start` until the end of the + string with `repl`. + + >>> s.str.slice_replace(1, repl='X') + 0 aX + 1 aX + 2 aX + 3 aX + 4 aX + dtype: object + + Specify just `stop`, meaning the start of the string to `stop` is replaced + with `repl`, and the rest of the string is included. + + >>> s.str.slice_replace(stop=2, repl='X') + 0 X + 1 X + 2 Xc + 3 Xdc + 4 Xcde + dtype: object + + Specify `start` and `stop`, meaning the slice from `start` to `stop` is + replaced with `repl`. Everything before or after `start` and `stop` is + included as is. + + >>> s.str.slice_replace(start=1, stop=3, repl='X') + 0 aX + 1 aX + 2 aX + 3 aXc + 4 aXde + dtype: object + """ + result = self._data.array._str_slice_replace(start, stop, repl) + return self._wrap_result(result) + + def decode(self, encoding, errors="strict"): + """ + Decode character string in the Series/Index using indicated encoding. + + Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in + python3. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + Series or Index + """ + # TODO: Add a similar _bytes interface. + if encoding in _cpython_optimized_decoders: + # CPython optimized implementation + f = lambda x: x.decode(encoding, errors) + else: + decoder = codecs.getdecoder(encoding) + f = lambda x: decoder(x, errors)[0] + arr = self._data.array + # assert isinstance(arr, (StringArray,)) + result = arr._str_map(f) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def encode(self, encoding, errors="strict"): + """ + Encode character string in the Series/Index using indicated encoding. + + Equivalent to :meth:`str.encode`. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + encoded : Series/Index of objects + """ + result = self._data.array._str_encode(encoding, errors) + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "str_strip" + ] = r""" + Remove %(position)s characters. + + Strip whitespaces (including newlines) or a set of specified characters + from each string in the Series/Index from %(side)s. + Replaces any non-strings in Series with NaNs. + Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + to_strip : str or None, default None + Specifying the set of characters to be removed. + All combinations of this set of characters will be stripped. + If None then whitespaces are removed. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.strip : Remove leading and trailing characters in Series/Index. + Series.str.lstrip : Remove leading characters in Series/Index. + Series.str.rstrip : Remove trailing characters in Series/Index. + + Examples + -------- + >>> s = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', np.nan, 10, True]) + >>> s + 0 1. Ant. + 1 2. Bee!\n + 2 3. Cat?\t + 3 NaN + 4 10 + 5 True + dtype: object + + >>> s.str.strip() + 0 1. Ant. + 1 2. Bee! + 2 3. Cat? + 3 NaN + 4 NaN + 5 NaN + dtype: object + + >>> s.str.lstrip('123.') + 0 Ant. + 1 Bee!\n + 2 Cat?\t + 3 NaN + 4 NaN + 5 NaN + dtype: object + + >>> s.str.rstrip('.!? \n\t') + 0 1. Ant + 1 2. Bee + 2 3. Cat + 3 NaN + 4 NaN + 5 NaN + dtype: object + + >>> s.str.strip('123.!? \n\t') + 0 Ant + 1 Bee + 2 Cat + 3 NaN + 4 NaN + 5 NaN + dtype: object + """ + + @Appender( + _shared_docs["str_strip"] + % { + "side": "left and right sides", + "method": "strip", + "position": "leading and trailing", + } + ) + @forbid_nonstring_types(["bytes"]) + def strip(self, to_strip=None): + result = self._data.array._str_strip(to_strip) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_strip"] + % {"side": "left side", "method": "lstrip", "position": "leading"} + ) + @forbid_nonstring_types(["bytes"]) + def lstrip(self, to_strip=None): + result = self._data.array._str_lstrip(to_strip) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_strip"] + % {"side": "right side", "method": "rstrip", "position": "trailing"} + ) + @forbid_nonstring_types(["bytes"]) + def rstrip(self, to_strip=None): + result = self._data.array._str_rstrip(to_strip) + return self._wrap_result(result) + + _shared_docs[ + "str_removefix" + ] = r""" + Remove a %(side)s from an object series. + + If the %(side)s is not present, the original string will be returned. + + Parameters + ---------- + %(side)s : str + Remove the %(side)s of the string. + + Returns + ------- + Series/Index: object + The Series or Index with given %(side)s removed. + + See Also + -------- + Series.str.remove%(other_side)s : Remove a %(other_side)s from an object series. + + Examples + -------- + >>> s = pd.Series(["str_foo", "str_bar", "no_prefix"]) + >>> s + 0 str_foo + 1 str_bar + 2 no_prefix + dtype: object + >>> s.str.removeprefix("str_") + 0 foo + 1 bar + 2 no_prefix + dtype: object + + >>> s = pd.Series(["foo_str", "bar_str", "no_suffix"]) + >>> s + 0 foo_str + 1 bar_str + 2 no_suffix + dtype: object + >>> s.str.removesuffix("_str") + 0 foo + 1 bar + 2 no_suffix + dtype: object + """ + + @Appender( + _shared_docs["str_removefix"] % {"side": "prefix", "other_side": "suffix"} + ) + @forbid_nonstring_types(["bytes"]) + def removeprefix(self, prefix): + result = self._data.array._str_removeprefix(prefix) + return self._wrap_result(result) + + @Appender( + _shared_docs["str_removefix"] % {"side": "suffix", "other_side": "prefix"} + ) + @forbid_nonstring_types(["bytes"]) + def removesuffix(self, suffix): + result = self._data.array._str_removesuffix(suffix) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def wrap(self, width, **kwargs): + r""" + Wrap strings in Series/Index at specified line width. + + This method has the same keyword parameters and defaults as + :class:`textwrap.TextWrapper`. + + Parameters + ---------- + width : int + Maximum line width. + expand_tabs : bool, optional + If True, tab characters will be expanded to spaces (default: True). + replace_whitespace : bool, optional + If True, each whitespace character (as defined by string.whitespace) + remaining after tab expansion will be replaced by a single space + (default: True). + drop_whitespace : bool, optional + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). + break_long_words : bool, optional + If True, then words longer than width will be broken in order to ensure + that no lines are longer than width. If it is false, long words will + not be broken, and some lines may be longer than width (default: True). + break_on_hyphens : bool, optional + If True, wrapping will occur preferably on whitespace and right after + hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line + breaks, but you need to set break_long_words to false if you want truly + insecable words (default: True). + + Returns + ------- + Series or Index + + Notes + ----- + Internally, this method uses a :class:`textwrap.TextWrapper` instance with + default settings. To achieve behavior matching R's stringr library str_wrap + function, use the arguments: + + - expand_tabs = False + - replace_whitespace = True + - drop_whitespace = True + - break_long_words = False + - break_on_hyphens = False + + Examples + -------- + >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped']) + >>> s.str.wrap(12) + 0 line to be\nwrapped + 1 another line\nto be\nwrapped + dtype: object + """ + result = self._data.array._str_wrap(width, **kwargs) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def get_dummies(self, sep="|"): + """ + Return DataFrame of dummy/indicator variables for Series. + + Each string in Series is split by sep and returned as a DataFrame + of dummy/indicator variables. + + Parameters + ---------- + sep : str, default "|" + String to split on. + + Returns + ------- + DataFrame + Dummy variables corresponding to values of the Series. + + See Also + -------- + get_dummies : Convert categorical variable into dummy/indicator + variables. + + Examples + -------- + >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 1 0 0 + 2 1 0 1 + + >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 0 0 0 + 2 1 0 1 + """ + # we need to cast to Series of strings as only that has all + # methods available for making the dummies... + result, name = self._data.array._str_get_dummies(sep) + return self._wrap_result( + result, + name=name, + expand=True, + returns_string=False, + ) + + @forbid_nonstring_types(["bytes"]) + def translate(self, table): + """ + Map all characters in the string through the given mapping table. + + Equivalent to standard :meth:`str.translate`. + + Parameters + ---------- + table : dict + Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or + None. Unmapped characters are left untouched. + Characters mapped to None are deleted. :meth:`str.maketrans` is a + helper function for making translation tables. + + Returns + ------- + Series or Index + """ + result = self._data.array._str_translate(table) + return self._wrap_result(result) + + @forbid_nonstring_types(["bytes"]) + def count(self, pat, flags=0): + r""" + Count occurrences of pattern in each string of the Series/Index. + + This function is used to count the number of times a particular regex + pattern is repeated in each of the string elements of the + :class:`~pandas.Series`. + + Parameters + ---------- + pat : str + Valid regular expression. + flags : int, default 0, meaning no flags + Flags for the `re` module. For a complete list, `see here + `_. + **kwargs + For compatibility with other string methods. Not used. + + Returns + ------- + Series or Index + Same type as the calling object containing the integer counts. + + See Also + -------- + re : Standard library module for regular expressions. + str.count : Standard library version, without regular expression support. + + Notes + ----- + Some characters need to be escaped when passing in `pat`. + eg. ``'$'`` has a special meaning in regex and must be escaped when + finding this literal character. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat']) + >>> s.str.count('a') + 0 0.0 + 1 0.0 + 2 2.0 + 3 2.0 + 4 NaN + 5 0.0 + 6 1.0 + dtype: float64 + + Escape ``'$'`` to find the literal dollar sign. + + >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat']) + >>> s.str.count('\\$') + 0 1 + 1 0 + 2 1 + 3 2 + 4 2 + 5 0 + dtype: int64 + + This is also available on Index + + >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') + Int64Index([0, 0, 2, 1], dtype='int64') + """ + result = self._data.array._str_count(pat, flags) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def startswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: + """ + Test if the start of each string element matches a pattern. + + Equivalent to :meth:`str.startswith`. + + Parameters + ---------- + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. + na : object, default NaN + Object shown if element tested is not a string. The default depends + on dtype of the array. For object-dtype, ``numpy.nan`` is used. + For ``StringDtype``, ``pandas.NA`` is used. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the start of each string element. + + See Also + -------- + str.startswith : Python standard library string method. + Series.str.endswith : Same as startswith, but tests the end of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan]) + >>> s + 0 bat + 1 Bear + 2 cat + 3 NaN + dtype: object + + >>> s.str.startswith('b') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + >>> s.str.startswith(('b', 'B')) + 0 True + 1 True + 2 False + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.startswith('b', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" + raise TypeError(msg) + result = self._data.array._str_startswith(pat, na=na) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def endswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: + """ + Test if the end of each string element matches a pattern. + + Equivalent to :meth:`str.endswith`. + + Parameters + ---------- + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. + na : object, default NaN + Object shown if element tested is not a string. The default depends + on dtype of the array. For object-dtype, ``numpy.nan`` is used. + For ``StringDtype``, ``pandas.NA`` is used. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the end of each string element. + + See Also + -------- + str.endswith : Python standard library string method. + Series.str.startswith : Same as endswith, but tests the start of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'bear', 'caT', np.nan]) + >>> s + 0 bat + 1 bear + 2 caT + 3 NaN + dtype: object + + >>> s.str.endswith('t') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + >>> s.str.endswith(('t', 'T')) + 0 True + 1 False + 2 True + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.endswith('t', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" + raise TypeError(msg) + result = self._data.array._str_endswith(pat, na=na) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def findall(self, pat, flags=0): + """ + Find all occurrences of pattern or regular expression in the Series/Index. + + Equivalent to applying :func:`re.findall` to all the elements in the + Series/Index. + + Parameters + ---------- + pat : str + Pattern or regular expression. + flags : int, default 0 + Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which + means no flags). + + Returns + ------- + Series/Index of lists of strings + All non-overlapping matches of pattern or regular expression in each + string of this Series/Index. + + See Also + -------- + count : Count occurrences of pattern or regular expression in each string + of the Series/Index. + extractall : For each string in the Series, extract groups from all matches + of regular expression and return a DataFrame with one row for each + match and one column for each group. + re.findall : The equivalent ``re`` function to all non-overlapping matches + of pattern or regular expression in string, as a list of strings. + + Examples + -------- + >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + + The search for the pattern 'Monkey' returns one match: + + >>> s.str.findall('Monkey') + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + On the other hand, the search for the pattern 'MONKEY' doesn't return any + match: + + >>> s.str.findall('MONKEY') + 0 [] + 1 [] + 2 [] + dtype: object + + Flags can be added to the pattern or regular expression. For instance, + to find the pattern 'MONKEY' ignoring the case: + + >>> import re + >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + When the pattern matches more than one string in the Series, all matches + are returned: + + >>> s.str.findall('on') + 0 [on] + 1 [on] + 2 [] + dtype: object + + Regular expressions are supported too. For instance, the search for all the + strings ending with the word 'on' is shown next: + + >>> s.str.findall('on$') + 0 [on] + 1 [] + 2 [] + dtype: object + + If the pattern is found more than once in the same string, then a list of + multiple strings is returned: + + >>> s.str.findall('b') + 0 [] + 1 [] + 2 [b, b] + dtype: object + """ + result = self._data.array._str_findall(pat, flags) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def extract( + self, pat: str, flags: int = 0, expand: bool = True + ) -> DataFrame | Series | Index: + r""" + Extract capture groups in the regex `pat` as columns in a DataFrame. + + For each subject string in the Series, extract groups from the + first match of regular expression `pat`. + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that + modify regular expression matching for things like case, + spaces, etc. For more details, see :mod:`re`. + expand : bool, default True + If True, return DataFrame with one column per capture group. + If False, return a Series/Index if there is one capture group + or DataFrame if there are multiple capture groups. + + Returns + ------- + DataFrame or Series or Index + A DataFrame with one row for each subject string, and one + column for each group. Any capture group names in regular + expression pat will be used for column names; otherwise + capture group numbers will be used. The dtype of each result + column is always object, even when no match is found. If + ``expand=False`` and pat has only one capture group, then + return a Series (if subject is a Series) or Index (if subject + is an Index). + + See Also + -------- + extractall : Returns all matches (not just the first match). + + Examples + -------- + A pattern with two groups will return a DataFrame with two columns. + Non-matches will be NaN. + + >>> s = pd.Series(['a1', 'b2', 'c3']) + >>> s.str.extract(r'([ab])(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern may contain optional groups. + + >>> s.str.extract(r'([ab])?(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN 3 + + Named groups will become column names in the result. + + >>> s.str.extract(r'(?P[ab])(?P\d)') + letter digit + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern with one group will return a DataFrame with one column + if expand=True. + + >>> s.str.extract(r'[ab](\d)', expand=True) + 0 + 0 1 + 1 2 + 2 NaN + + A pattern with one group will return a Series if expand=False. + + >>> s.str.extract(r'[ab](\d)', expand=False) + 0 1 + 1 2 + 2 NaN + dtype: object + """ + from pandas import DataFrame + + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + + regex = re.compile(pat, flags=flags) + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + + if not expand and regex.groups > 1 and isinstance(self._data, ABCIndex): + raise ValueError("only one regex group is supported with Index") + + obj = self._data + result_dtype = _result_dtype(obj) + + returns_df = regex.groups > 1 or expand + + if returns_df: + name = None + columns = _get_group_names(regex) + + if obj.array.size == 0: + result = DataFrame(columns=columns, dtype=result_dtype) + + else: + result_list = self._data.array._str_extract( + pat, flags=flags, expand=returns_df + ) + + result_index: Index | None + if isinstance(obj, ABCSeries): + result_index = obj.index + else: + result_index = None + + result = DataFrame( + result_list, columns=columns, index=result_index, dtype=result_dtype + ) + + else: + name = _get_single_group_name(regex) + result = self._data.array._str_extract(pat, flags=flags, expand=returns_df) + return self._wrap_result(result, name=name) + + @forbid_nonstring_types(["bytes"]) + def extractall(self, pat, flags=0): + r""" + Extract capture groups in the regex `pat` as columns in DataFrame. + + For each subject string in the Series, extract groups from all + matches of regular expression pat. When each subject string in the + Series has exactly one match, extractall(pat).xs(0, level='match') + is the same as extract(pat). + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + A ``re`` module flag, for example ``re.IGNORECASE``. These allow + to modify regular expression matching for things like case, spaces, + etc. Multiple flags can be combined with the bitwise OR operator, + for example ``re.IGNORECASE | re.MULTILINE``. + + Returns + ------- + DataFrame + A ``DataFrame`` with one row for each match, and one column for each + group. Its rows have a ``MultiIndex`` with first levels that come from + the subject ``Series``. The last level is named 'match' and indexes the + matches in each item of the ``Series``. Any capture group names in + regular expression pat will be used for column names; otherwise capture + group numbers will be used. + + See Also + -------- + extract : Returns first match only (not all matches). + + Examples + -------- + A pattern with one group will return a DataFrame with one column. + Indices with no matches will not appear in the result. + + >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"]) + >>> s.str.extractall(r"[ab](\d)") + 0 + match + A 0 1 + 1 2 + B 0 1 + + Capture group names are used for column names of the result. + + >>> s.str.extractall(r"[ab](?P\d)") + digit + match + A 0 1 + 1 2 + B 0 1 + + A pattern with two groups will return a DataFrame with two columns. + + >>> s.str.extractall(r"(?P[ab])(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + + Optional groups that do not match are NaN in the result. + + >>> s.str.extractall(r"(?P[ab])?(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + C 0 NaN 1 + """ + # TODO: dispatch + return str_extractall(self._orig, pat, flags) + + _shared_docs[ + "find" + ] = """ + Return %(side)s indexes in each strings in the Series/Index. + + Each of returned indexes corresponds to the position where the + substring is fully contained between [start:end]. Return -1 on + failure. Equivalent to standard :meth:`str.%(method)s`. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of int. + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["find"] + % { + "side": "lowest", + "method": "find", + "also": "rfind : Return highest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def find(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_find(sub, start, end) + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["find"] + % { + "side": "highest", + "method": "rfind", + "also": "find : Return lowest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rfind(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_rfind(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def normalize(self, form): + """ + Return the Unicode normal form for the strings in the Series/Index. + + For more information on the forms, see the + :func:`unicodedata.normalize`. + + Parameters + ---------- + form : {'NFC', 'NFKC', 'NFD', 'NFKD'} + Unicode form. + + Returns + ------- + normalized : Series/Index of objects + """ + result = self._data.array._str_normalize(form) + return self._wrap_result(result) + + _shared_docs[ + "index" + ] = """ + Return %(side)s indexes in each string in Series/Index. + + Each of the returned indexes corresponds to the position where the + substring is fully contained between [start:end]. This is the same + as ``str.%(similar)s`` except instead of returning -1, it raises a + ValueError when the substring is not found. Equivalent to standard + ``str.%(method)s``. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of object + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["index"] + % { + "side": "lowest", + "similar": "find", + "method": "index", + "also": "rindex : Return highest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def index(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_index(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["index"] + % { + "side": "highest", + "similar": "rfind", + "method": "rindex", + "also": "index : Return lowest indexes in each strings.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rindex(self, sub, start=0, end=None): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + result = self._data.array._str_rindex(sub, start=start, end=end) + return self._wrap_result(result, returns_string=False) + + def len(self): + """ + Compute the length of each element in the Series/Index. + + The element may be a sequence (such as a string, tuple or list) or a collection + (such as a dictionary). + + Returns + ------- + Series or Index of int + A Series or Index of integer values indicating the length of each + element in the Series or Index. + + See Also + -------- + str.len : Python built-in function returning the length of an object. + Series.size : Returns the length of the Series. + + Examples + -------- + Returns the length (number of characters) in a string. Returns the + number of entries for dictionaries, lists or tuples. + + >>> s = pd.Series(['dog', + ... '', + ... 5, + ... {'foo' : 'bar'}, + ... [2, 3, 5, 7], + ... ('one', 'two', 'three')]) + >>> s + 0 dog + 1 + 2 5 + 3 {'foo': 'bar'} + 4 [2, 3, 5, 7] + 5 (one, two, three) + dtype: object + >>> s.str.len() + 0 3.0 + 1 0.0 + 2 NaN + 3 1.0 + 4 4.0 + 5 3.0 + dtype: float64 + """ + result = self._data.array._str_len() + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "casemethods" + ] = """ + Convert strings in the Series/Index to %(type)s. + %(version)s + Equivalent to :meth:`str.%(method)s`. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.lower : Converts all characters to lowercase. + Series.str.upper : Converts all characters to uppercase. + Series.str.title : Converts first character of each word to uppercase and + remaining to lowercase. + Series.str.capitalize : Converts first character to uppercase and + remaining to lowercase. + Series.str.swapcase : Converts uppercase to lowercase and lowercase to + uppercase. + Series.str.casefold: Removes all case distinctions in the string. + + Examples + -------- + >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe']) + >>> s + 0 lower + 1 CAPITALS + 2 this is a sentence + 3 SwApCaSe + dtype: object + + >>> s.str.lower() + 0 lower + 1 capitals + 2 this is a sentence + 3 swapcase + dtype: object + + >>> s.str.upper() + 0 LOWER + 1 CAPITALS + 2 THIS IS A SENTENCE + 3 SWAPCASE + dtype: object + + >>> s.str.title() + 0 Lower + 1 Capitals + 2 This Is A Sentence + 3 Swapcase + dtype: object + + >>> s.str.capitalize() + 0 Lower + 1 Capitals + 2 This is a sentence + 3 Swapcase + dtype: object + + >>> s.str.swapcase() + 0 LOWER + 1 capitals + 2 THIS IS A SENTENCE + 3 sWaPcAsE + dtype: object + """ + # Types: + # cases: + # upper, lower, title, capitalize, swapcase, casefold + # boolean: + # isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle + # _doc_args holds dict of strings to use in substituting casemethod docs + _doc_args: dict[str, dict[str, str]] = {} + _doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""} + _doc_args["upper"] = {"type": "uppercase", "method": "upper", "version": ""} + _doc_args["title"] = {"type": "titlecase", "method": "title", "version": ""} + _doc_args["capitalize"] = { + "type": "be capitalized", + "method": "capitalize", + "version": "", + } + _doc_args["swapcase"] = { + "type": "be swapcased", + "method": "swapcase", + "version": "", + } + _doc_args["casefold"] = { + "type": "be casefolded", + "method": "casefold", + "version": "\n .. versionadded:: 0.25.0\n", + } + + @Appender(_shared_docs["casemethods"] % _doc_args["lower"]) + @forbid_nonstring_types(["bytes"]) + def lower(self): + result = self._data.array._str_lower() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["upper"]) + @forbid_nonstring_types(["bytes"]) + def upper(self): + result = self._data.array._str_upper() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["title"]) + @forbid_nonstring_types(["bytes"]) + def title(self): + result = self._data.array._str_title() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"]) + @forbid_nonstring_types(["bytes"]) + def capitalize(self): + result = self._data.array._str_capitalize() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"]) + @forbid_nonstring_types(["bytes"]) + def swapcase(self): + result = self._data.array._str_swapcase() + return self._wrap_result(result) + + @Appender(_shared_docs["casemethods"] % _doc_args["casefold"]) + @forbid_nonstring_types(["bytes"]) + def casefold(self): + result = self._data.array._str_casefold() + return self._wrap_result(result) + + _shared_docs[ + "ismethods" + ] = """ + Check whether all characters in each string are %(type)s. + + This is equivalent to running the Python string method + :meth:`str.%(method)s` for each element of the Series/Index. If a string + has zero characters, ``False`` is returned for that check. + + Returns + ------- + Series or Index of bool + Series or Index of boolean values with the same length as the original + Series/Index. + + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. + + Examples + -------- + **Checks for Alphabetic and Numeric Characters** + + >>> s1 = pd.Series(['one', 'one1', '1', '']) + + >>> s1.str.isalpha() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s1.str.isnumeric() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> s1.str.isalnum() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + Note that checks against characters mixed with any additional punctuation + or whitespace will evaluate to false for an alphanumeric check. + + >>> s2 = pd.Series(['A B', '1.5', '3,000']) + >>> s2.str.isalnum() + 0 False + 1 False + 2 False + dtype: bool + + **More Detailed Checks for Numeric Characters** + + There are several different but overlapping sets of numeric characters that + can be checked for. + + >>> s3 = pd.Series(['23', '³', '⅕', '']) + + The ``s3.str.isdecimal`` method checks for characters used to form numbers + in base 10. + + >>> s3.str.isdecimal() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also + includes special digits, like superscripted and subscripted digits in + unicode. + + >>> s3.str.isdigit() + 0 True + 1 True + 2 False + 3 False + dtype: bool + + The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also + includes other characters that can represent quantities such as unicode + fractions. + + >>> s3.str.isnumeric() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + **Checks for Whitespace** + + >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) + >>> s4.str.isspace() + 0 True + 1 True + 2 False + dtype: bool + + **Checks for Character Case** + + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) + + >>> s5.str.islower() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s5.str.isupper() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + The ``s5.str.istitle`` method checks for whether all words are in title + case (whether only the first letter of each word is capitalized). Words are + assumed to be as any sequence of non-numeric characters separated by + whitespace characters. + + >>> s5.str.istitle() + 0 False + 1 True + 2 False + 3 False + dtype: bool + """ + _doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"} + _doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"} + _doc_args["isdigit"] = {"type": "digits", "method": "isdigit"} + _doc_args["isspace"] = {"type": "whitespace", "method": "isspace"} + _doc_args["islower"] = {"type": "lowercase", "method": "islower"} + _doc_args["isupper"] = {"type": "uppercase", "method": "isupper"} + _doc_args["istitle"] = {"type": "titlecase", "method": "istitle"} + _doc_args["isnumeric"] = {"type": "numeric", "method": "isnumeric"} + _doc_args["isdecimal"] = {"type": "decimal", "method": "isdecimal"} + # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624) + + isalnum = _map_and_wrap( + "isalnum", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"] + ) + isalpha = _map_and_wrap( + "isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"] + ) + isdigit = _map_and_wrap( + "isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + ) + isspace = _map_and_wrap( + "isspace", docstring=_shared_docs["ismethods"] % _doc_args["isspace"] + ) + islower = _map_and_wrap( + "islower", docstring=_shared_docs["ismethods"] % _doc_args["islower"] + ) + isupper = _map_and_wrap( + "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"] + ) + istitle = _map_and_wrap( + "istitle", docstring=_shared_docs["ismethods"] % _doc_args["istitle"] + ) + isnumeric = _map_and_wrap( + "isnumeric", docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"] + ) + isdecimal = _map_and_wrap( + "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"] + ) + + +def cat_safe(list_of_columns: list, sep: str): + """ + Auxiliary function for :meth:`str.cat`. + + Same signature as cat_core, but handles TypeErrors in concatenation, which + happen if the arrays in list_of columns have the wrong dtypes or content. + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + try: + result = cat_core(list_of_columns, sep) + except TypeError: + # if there are any non-string values (wrong dtype or hidden behind + # object dtype), np.sum will fail; catch and return with better message + for column in list_of_columns: + dtype = lib.infer_dtype(column, skipna=True) + if dtype not in ["string", "empty"]: + raise TypeError( + "Concatenation requires list-likes containing only " + "strings (or missing values). Offending values found in " + f"column {dtype}" + ) from None + return result + + +def cat_core(list_of_columns: list, sep: str): + """ + Auxiliary function for :meth:`str.cat` + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + if sep == "": + # no need to interleave sep if it is empty + arr_of_cols = np.asarray(list_of_columns, dtype=object) + return np.sum(arr_of_cols, axis=0) + list_with_sep = [sep] * (2 * len(list_of_columns) - 1) + list_with_sep[::2] = list_of_columns + arr_with_sep = np.asarray(list_with_sep, dtype=object) + return np.sum(arr_with_sep, axis=0) + + +def _result_dtype(arr): + # workaround #27953 + # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails + # when the list of values is empty. + from pandas.core.arrays.string_ import StringDtype + + if isinstance(arr.dtype, StringDtype): + return arr.dtype + else: + return object + + +def _get_single_group_name(regex: re.Pattern) -> Hashable: + if regex.groupindex: + return next(iter(regex.groupindex)) + else: + return None + + +def _get_group_names(regex: re.Pattern) -> list[Hashable]: + """ + Get named groups from compiled regex. + + Unnamed groups are numbered. + + Parameters + ---------- + regex : compiled regex + + Returns + ------- + list of column labels + """ + names = {v: k for k, v in regex.groupindex.items()} + return [names.get(1 + i, i) for i in range(regex.groups)] + + +def str_extractall(arr, pat, flags=0): + regex = re.compile(pat, flags=flags) + # the regex must contain capture groups. + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + + if isinstance(arr, ABCIndex): + arr = arr.to_series().reset_index(drop=True) + + columns = _get_group_names(regex) + match_list = [] + index_list = [] + is_mi = arr.index.nlevels > 1 + + for subject_key, subject in arr.items(): + if isinstance(subject, str): + + if not is_mi: + subject_key = (subject_key,) + + for match_i, match_tuple in enumerate(regex.findall(subject)): + if isinstance(match_tuple, str): + match_tuple = (match_tuple,) + na_tuple = [np.NaN if group == "" else group for group in match_tuple] + match_list.append(na_tuple) + result_key = tuple(subject_key + (match_i,)) + index_list.append(result_key) + + from pandas import MultiIndex + + index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) + dtype = _result_dtype(arr) + + result = arr._constructor_expanddim( + match_list, index=index, columns=columns, dtype=dtype + ) + return result diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py new file mode 100644 index 00000000..ef0c3f8c --- /dev/null +++ b/pandas/core/strings/base.py @@ -0,0 +1,248 @@ +from __future__ import annotations + +import abc +from collections.abc import Callable # noqa: PDF001 +import re +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._typing import Scalar + +if TYPE_CHECKING: + from pandas import Series + + +class BaseStringArrayMethods(abc.ABC): + """ + Base class for extension arrays implementing string methods. + + This is where our ExtensionArrays can override the implementation of + Series.str.. We don't expect this to work with + 3rd-party extension arrays. + + * User calls Series.str. + * pandas extracts the extension array from the Series + * pandas calls ``extension_array._str_(*args, **kwargs)`` + * pandas wraps the result, to return to the user. + + See :ref:`Series.str` for the docstring of each method. + """ + + def _str_getitem(self, key): + if isinstance(key, slice): + return self._str_slice(start=key.start, stop=key.stop, step=key.step) + else: + return self._str_get(key) + + @abc.abstractmethod + def _str_count(self, pat, flags=0): + pass + + @abc.abstractmethod + def _str_pad(self, width, side="left", fillchar=" "): + pass + + @abc.abstractmethod + def _str_contains(self, pat, case=True, flags=0, na=None, regex=True): + pass + + @abc.abstractmethod + def _str_startswith(self, pat, na=None): + pass + + @abc.abstractmethod + def _str_endswith(self, pat, na=None): + pass + + @abc.abstractmethod + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + pass + + @abc.abstractmethod + def _str_repeat(self, repeats): + pass + + @abc.abstractmethod + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar = np.nan + ): + pass + + @abc.abstractmethod + def _str_fullmatch( + self, + pat: str | re.Pattern, + case: bool = True, + flags: int = 0, + na: Scalar = np.nan, + ): + pass + + @abc.abstractmethod + def _str_encode(self, encoding, errors="strict"): + pass + + @abc.abstractmethod + def _str_find(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_rfind(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_findall(self, pat, flags=0): + pass + + @abc.abstractmethod + def _str_get(self, i): + pass + + @abc.abstractmethod + def _str_index(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_rindex(self, sub, start=0, end=None): + pass + + @abc.abstractmethod + def _str_join(self, sep): + pass + + @abc.abstractmethod + def _str_partition(self, sep, expand): + pass + + @abc.abstractmethod + def _str_rpartition(self, sep, expand): + pass + + @abc.abstractmethod + def _str_len(self): + pass + + @abc.abstractmethod + def _str_slice(self, start=None, stop=None, step=None): + pass + + @abc.abstractmethod + def _str_slice_replace(self, start=None, stop=None, repl=None): + pass + + @abc.abstractmethod + def _str_translate(self, table): + pass + + @abc.abstractmethod + def _str_wrap(self, width, **kwargs): + pass + + @abc.abstractmethod + def _str_get_dummies(self, sep="|"): + pass + + @abc.abstractmethod + def _str_isalnum(self): + pass + + @abc.abstractmethod + def _str_isalpha(self): + pass + + @abc.abstractmethod + def _str_isdecimal(self): + pass + + @abc.abstractmethod + def _str_isdigit(self): + pass + + @abc.abstractmethod + def _str_islower(self): + pass + + @abc.abstractmethod + def _str_isnumeric(self): + pass + + @abc.abstractmethod + def _str_isspace(self): + pass + + @abc.abstractmethod + def _str_istitle(self): + pass + + @abc.abstractmethod + def _str_isupper(self): + pass + + @abc.abstractmethod + def _str_capitalize(self): + pass + + @abc.abstractmethod + def _str_casefold(self): + pass + + @abc.abstractmethod + def _str_title(self): + pass + + @abc.abstractmethod + def _str_swapcase(self): + pass + + @abc.abstractmethod + def _str_lower(self): + pass + + @abc.abstractmethod + def _str_upper(self): + pass + + @abc.abstractmethod + def _str_normalize(self, form): + pass + + @abc.abstractmethod + def _str_strip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_lstrip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_rstrip(self, to_strip=None): + pass + + @abc.abstractmethod + def _str_removeprefix(self, prefix: str) -> Series: + pass + + @abc.abstractmethod + def _str_removesuffix(self, suffix: str) -> Series: + pass + + @abc.abstractmethod + def _str_split(self, pat=None, n=-1, expand=False): + pass + + @abc.abstractmethod + def _str_rsplit(self, pat=None, n=-1): + pass + + @abc.abstractmethod + def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): + pass diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py new file mode 100644 index 00000000..f884264e --- /dev/null +++ b/pandas/core/strings/object_array.py @@ -0,0 +1,483 @@ +from __future__ import annotations + +from collections.abc import Callable # noqa: PDF001 +import re +import textwrap +from typing import TYPE_CHECKING +import unicodedata + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.missing as libmissing +import pandas._libs.ops as libops +from pandas._typing import ( + NpDtype, + Scalar, +) + +from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.missing import isna + +from pandas.core.strings.base import BaseStringArrayMethods + +if TYPE_CHECKING: + from pandas import Series + + +class ObjectStringArrayMixin(BaseStringArrayMethods): + """ + String Methods operating on object-dtype ndarrays. + """ + + _str_na_value = np.nan + + def __len__(self): + # For typing, _str_map relies on the object being sized. + raise NotImplementedError + + def _str_map( + self, f, na_value=None, dtype: NpDtype | None = None, convert: bool = True + ): + """ + Map a callable over valid elements of the array. + + Parameters + ---------- + f : Callable + A function to call on each non-NA element. + na_value : Scalar, optional + The value to set for NA values. Might also be used for the + fill value if the callable `f` raises an exception. + This defaults to ``self._str_na_value`` which is ``np.nan`` + for object-dtype and Categorical and ``pd.NA`` for StringArray. + dtype : Dtype, optional + The dtype of the result array. + convert : bool, default True + Whether to call `maybe_convert_objects` on the resulting ndarray + """ + if dtype is None: + dtype = np.dtype("object") + if na_value is None: + na_value = self._str_na_value + + if not len(self): + return np.array([], dtype=dtype) + + arr = np.asarray(self, dtype=object) + mask = isna(arr) + map_convert = convert and not np.all(mask) + try: + result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert) + except (TypeError, AttributeError) as err: + # Reraise the exception if callable `f` got wrong number of args. + # The user may want to be warned by this, instead of getting NaN + p_err = ( + r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " + r"(?(3)required )positional arguments?" + ) + + if len(err.args) >= 1 and re.search(p_err, err.args[0]): + # FIXME: this should be totally avoidable + raise err + + def g(x): + # This type of fallback behavior can be removed once + # we remove object-dtype .str accessor. + try: + return f(x) + except (TypeError, AttributeError): + return na_value + + return self._str_map(g, na_value=na_value, dtype=dtype) + if not isinstance(result, np.ndarray): + return result + if na_value is not np.nan: + np.putmask(result, mask, na_value) + if convert and result.dtype == object: + result = lib.maybe_convert_objects(result) + return result + + def _str_count(self, pat, flags=0): + regex = re.compile(pat, flags=flags) + f = lambda x: len(regex.findall(x)) + return self._str_map(f, dtype="int64") + + def _str_pad(self, width, side="left", fillchar=" "): + if side == "left": + f = lambda x: x.rjust(width, fillchar) + elif side == "right": + f = lambda x: x.ljust(width, fillchar) + elif side == "both": + f = lambda x: x.center(width, fillchar) + else: # pragma: no cover + raise ValueError("Invalid side") + return self._str_map(f) + + def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True): + if regex: + if not case: + flags |= re.IGNORECASE + + pat = re.compile(pat, flags=flags) + + f = lambda x: pat.search(x) is not None + else: + if case: + f = lambda x: pat in x + else: + upper_pat = pat.upper() + f = lambda x: upper_pat in x.upper() + return self._str_map(f, na, dtype=np.dtype("bool")) + + def _str_startswith(self, pat, na=None): + f = lambda x: x.startswith(pat) + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_endswith(self, pat, na=None): + f = lambda x: x.endswith(pat) + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_replace( + self, + pat: str | re.Pattern, + repl: str | Callable, + n: int = -1, + case: bool = True, + flags: int = 0, + regex: bool = True, + ): + if case is False: + # add case flag, if provided + flags |= re.IGNORECASE + + if regex or flags or callable(repl): + if not isinstance(pat, re.Pattern): + if regex is False: + pat = re.escape(pat) + pat = re.compile(pat, flags=flags) + + n = n if n >= 0 else 0 + f = lambda x: pat.sub(repl=repl, string=x, count=n) + else: + f = lambda x: x.replace(pat, repl, n) + + return self._str_map(f, dtype=str) + + def _str_repeat(self, repeats): + if is_scalar(repeats): + + def scalar_rep(x): + try: + return bytes.__mul__(x, repeats) + except TypeError: + return str.__mul__(x, repeats) + + return self._str_map(scalar_rep, dtype=str) + else: + from pandas.core.arrays.string_ import BaseStringArray + + def rep(x, r): + if x is libmissing.NA: + return x + try: + return bytes.__mul__(x, r) + except TypeError: + return str.__mul__(x, r) + + repeats = np.asarray(repeats, dtype=object) + result = libops.vec_binop(np.asarray(self), repeats, rep) + if isinstance(self, BaseStringArray): + # Not going through map, so we have to do this here. + result = type(self)._from_sequence(result) + return result + + def _str_match( + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None + ): + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + f = lambda x: regex.match(x) is not None + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_fullmatch( + self, + pat: str | re.Pattern, + case: bool = True, + flags: int = 0, + na: Scalar | None = None, + ): + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + f = lambda x: regex.fullmatch(x) is not None + return self._str_map(f, na_value=na, dtype=np.dtype(bool)) + + def _str_encode(self, encoding, errors="strict"): + f = lambda x: x.encode(encoding, errors=errors) + return self._str_map(f, dtype=object) + + def _str_find(self, sub, start=0, end=None): + return self._str_find_(sub, start, end, side="left") + + def _str_rfind(self, sub, start=0, end=None): + return self._str_find_(sub, start, end, side="right") + + def _str_find_(self, sub, start, end, side): + if side == "left": + method = "find" + elif side == "right": + method = "rfind" + else: # pragma: no cover + raise ValueError("Invalid side") + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_findall(self, pat, flags=0): + regex = re.compile(pat, flags=flags) + return self._str_map(regex.findall, dtype="object") + + def _str_get(self, i): + def f(x): + if isinstance(x, dict): + return x.get(i) + elif len(x) > i >= -len(x): + return x[i] + return self._str_na_value + + return self._str_map(f) + + def _str_index(self, sub, start=0, end=None): + if end: + f = lambda x: x.index(sub, start, end) + else: + f = lambda x: x.index(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_rindex(self, sub, start=0, end=None): + if end: + f = lambda x: x.rindex(sub, start, end) + else: + f = lambda x: x.rindex(sub, start, end) + return self._str_map(f, dtype="int64") + + def _str_join(self, sep): + return self._str_map(sep.join) + + def _str_partition(self, sep, expand): + result = self._str_map(lambda x: x.partition(sep), dtype="object") + return result + + def _str_rpartition(self, sep, expand): + return self._str_map(lambda x: x.rpartition(sep), dtype="object") + + def _str_len(self): + return self._str_map(len, dtype="int64") + + def _str_slice(self, start=None, stop=None, step=None): + obj = slice(start, stop, step) + return self._str_map(lambda x: x[obj]) + + def _str_slice_replace(self, start=None, stop=None, repl=None): + if repl is None: + repl = "" + + def f(x): + if x[start:stop] == "": + local_stop = start + else: + local_stop = stop + y = "" + if start is not None: + y += x[:start] + y += repl + if stop is not None: + y += x[local_stop:] + return y + + return self._str_map(f) + + def _str_split( + self, + pat: str | re.Pattern | None = None, + n=-1, + expand=False, + regex: bool | None = None, + ): + if pat is None: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + else: + new_pat: str | re.Pattern + if regex is True or isinstance(pat, re.Pattern): + new_pat = re.compile(pat) + elif regex is False: + new_pat = pat + # regex is None so link to old behavior #43563 + else: + if len(pat) == 1: + new_pat = pat + else: + new_pat = re.compile(pat) + + if isinstance(new_pat, re.Pattern): + if n is None or n == -1: + n = 0 + f = lambda x: new_pat.split(x, maxsplit=n) + else: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + return self._str_map(f, dtype=object) + + def _str_rsplit(self, pat=None, n=-1): + if n is None or n == 0: + n = -1 + f = lambda x: x.rsplit(pat, n) + return self._str_map(f, dtype="object") + + def _str_translate(self, table): + return self._str_map(lambda x: x.translate(table)) + + def _str_wrap(self, width, **kwargs): + kwargs["width"] = width + tw = textwrap.TextWrapper(**kwargs) + return self._str_map(lambda s: "\n".join(tw.wrap(s))) + + def _str_get_dummies(self, sep="|"): + from pandas import Series + + arr = Series(self).fillna("") + try: + arr = sep + arr + sep + except (TypeError, NotImplementedError): + arr = sep + arr.astype(str) + sep + + tags: set[str] = set() + for ts in Series(arr).str.split(sep): + tags.update(ts) + tags2 = sorted(tags - {""}) + + dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) + + for i, t in enumerate(tags2): + pat = sep + t + sep + dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) + return dummies, tags2 + + def _str_upper(self): + return self._str_map(lambda x: x.upper()) + + def _str_isalnum(self): + return self._str_map(str.isalnum, dtype="bool") + + def _str_isalpha(self): + return self._str_map(str.isalpha, dtype="bool") + + def _str_isdecimal(self): + return self._str_map(str.isdecimal, dtype="bool") + + def _str_isdigit(self): + return self._str_map(str.isdigit, dtype="bool") + + def _str_islower(self): + return self._str_map(str.islower, dtype="bool") + + def _str_isnumeric(self): + return self._str_map(str.isnumeric, dtype="bool") + + def _str_isspace(self): + return self._str_map(str.isspace, dtype="bool") + + def _str_istitle(self): + return self._str_map(str.istitle, dtype="bool") + + def _str_isupper(self): + return self._str_map(str.isupper, dtype="bool") + + def _str_capitalize(self): + return self._str_map(str.capitalize) + + def _str_casefold(self): + return self._str_map(str.casefold) + + def _str_title(self): + return self._str_map(str.title) + + def _str_swapcase(self): + return self._str_map(str.swapcase) + + def _str_lower(self): + return self._str_map(str.lower) + + def _str_normalize(self, form): + f = lambda x: unicodedata.normalize(form, x) + return self._str_map(f) + + def _str_strip(self, to_strip=None): + return self._str_map(lambda x: x.strip(to_strip)) + + def _str_lstrip(self, to_strip=None): + return self._str_map(lambda x: x.lstrip(to_strip)) + + def _str_rstrip(self, to_strip=None): + return self._str_map(lambda x: x.rstrip(to_strip)) + + def _str_removeprefix(self, prefix: str) -> Series: + # outstanding question on whether to use native methods for users on Python 3.9+ + # https://github.com/pandas-dev/pandas/pull/39226#issuecomment-836719770, + # in which case we could do return self._str_map(str.removeprefix) + + def removeprefix(text: str) -> str: + if text.startswith(prefix): + return text[len(prefix) :] + return text + + return self._str_map(removeprefix) + + def _str_removesuffix(self, suffix: str) -> Series: + # this could be used on Python 3.9+ + # f = lambda x: x.removesuffix(suffix) + # return self._str_map(str.removesuffix) + + def removesuffix(text: str) -> str: + if text.endswith(suffix): + return text[: -len(suffix)] + return text + + return self._str_map(removesuffix) + + def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): + regex = re.compile(pat, flags=flags) + na_value = self._str_na_value + + if not expand: + + def g(x): + m = regex.search(x) + return m.groups()[0] if m else na_value + + return self._str_map(g, convert=False) + + empty_row = [na_value] * regex.groups + + def f(x): + if not isinstance(x, str): + return empty_row + m = regex.search(x) + if m: + return [na_value if item is None else item for item in m.groups()] + else: + return empty_row + + return [f(val) for val in np.asarray(self)] diff --git a/pandas/core/tools/__init__.py b/pandas/core/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py new file mode 100644 index 00000000..4739fb49 --- /dev/null +++ b/pandas/core/tools/datetimes.py @@ -0,0 +1,1303 @@ +from __future__ import annotations + +from collections import abc +from datetime import datetime +from functools import partial +from itertools import islice +from typing import ( + TYPE_CHECKING, + Callable, + Hashable, + List, + Tuple, + TypedDict, + Union, + cast, + overload, +) +import warnings + +import numpy as np + +from pandas._libs import tslib +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + Timedelta, + Timestamp, + iNaT, + nat_strings, + parsing, + timezones, +) +from pandas._libs.tslibs.parsing import ( + DateParseError, + format_is_iso, + guess_datetime_format, +) +from pandas._libs.tslibs.strptime import array_strptime +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + DateTimeErrorChoices, + Timezone, + npt, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + +from pandas.arrays import ( + DatetimeArray, + IntegerArray, +) +from pandas.core import algorithms +from pandas.core.algorithms import unique +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, + objects_to_datetime64ns, + tz_to_dtype, +) +from pandas.core.construction import extract_array +from pandas.core.indexes.base import Index +from pandas.core.indexes.datetimes import DatetimeIndex + +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + from pandas._libs.tslibs.timedeltas import UnitChoices + + from pandas import ( + DataFrame, + Series, + ) + +# --------------------------------------------------------------------- +# types used in annotations + +ArrayConvertible = Union[List, Tuple, AnyArrayLike] +Scalar = Union[float, str] +DatetimeScalar = Union[Scalar, datetime] + +DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] + +DatetimeDictArg = Union[List[Scalar], Tuple[Scalar, ...], AnyArrayLike] + + +class YearMonthDayDict(TypedDict, total=True): + year: DatetimeDictArg + month: DatetimeDictArg + day: DatetimeDictArg + + +class FulldatetimeDict(YearMonthDayDict, total=False): + hour: DatetimeDictArg + hours: DatetimeDictArg + minute: DatetimeDictArg + minutes: DatetimeDictArg + second: DatetimeDictArg + seconds: DatetimeDictArg + ms: DatetimeDictArg + us: DatetimeDictArg + ns: DatetimeDictArg + + +DictConvertible = Union[FulldatetimeDict, "DataFrame"] +start_caching_at = 50 + + +# --------------------------------------------------------------------- + + +def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + return guess_datetime_format(arr[non_nan_elements[0]], dayfirst=dayfirst) + + +def should_cache( + arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None +) -> bool: + """ + Decides whether to do caching. + + If the percent of unique elements among `check_count` elements less + than `unique_share * 100` then we can do caching. + + Parameters + ---------- + arg: listlike, tuple, 1-d array, Series + unique_share: float, default=0.7, optional + 0 < unique_share < 1 + check_count: int, optional + 0 <= check_count <= len(arg) + + Returns + ------- + do_caching: bool + + Notes + ----- + By default for a sequence of less than 50 items in size, we don't do + caching; for the number of elements less than 5000, we take ten percent of + all elements to check for a uniqueness share; if the sequence size is more + than 5000, then we check only the first 500 elements. + All constants were chosen empirically by. + """ + do_caching = True + + # default realization + if check_count is None: + # in this case, the gain from caching is negligible + if len(arg) <= start_caching_at: + return False + + if len(arg) <= 5000: + check_count = len(arg) // 10 + else: + check_count = 500 + else: + assert ( + 0 <= check_count <= len(arg) + ), "check_count must be in next bounds: [0; len(arg)]" + if check_count == 0: + return False + + assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" + + try: + # We can't cache if the items are not hashable. + unique_elements = set(islice(arg, check_count)) + except TypeError: + return False + if len(unique_elements) > check_count * unique_share: + do_caching = False + return do_caching + + +def _maybe_cache( + arg: ArrayConvertible, + format: str | None, + cache: bool, + convert_listlike: Callable, +) -> Series: + """ + Create a cache of unique dates from an array of dates + + Parameters + ---------- + arg : listlike, tuple, 1-d array, Series + format : string + Strftime format to parse time + cache : bool + True attempts to create a cache of converted values + convert_listlike : function + Conversion function to apply on dates + + Returns + ------- + cache_array : Series + Cache of converted, unique dates. Can be empty + """ + from pandas import Series + + cache_array = Series(dtype=object) + + if cache: + # Perform a quicker unique check + if not should_cache(arg): + return cache_array + + unique_dates = unique(arg) + if len(unique_dates) < len(arg): + cache_dates = convert_listlike(unique_dates, format) + # GH#45319 + try: + cache_array = Series(cache_dates, index=unique_dates) + except OutOfBoundsDatetime: + return cache_array + # GH#39882 and GH#35888 in case of None and NaT we get duplicates + if not cache_array.index.is_unique: + cache_array = cache_array[~cache_array.index.duplicated()] + return cache_array + + +def _box_as_indexlike( + dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None +) -> Index: + """ + Properly boxes the ndarray of datetimes to DatetimeIndex + if it is possible or to generic Index instead + + Parameters + ---------- + dt_array: 1-d array + Array of datetimes to be wrapped in an Index. + tz : object + None or 'utc' + name : string, default None + Name for a resulting index + + Returns + ------- + result : datetime of converted dates + - DatetimeIndex if convertible to sole datetime64 type + - general Index otherwise + """ + + if is_datetime64_dtype(dt_array): + tz = "utc" if utc else None + return DatetimeIndex(dt_array, tz=tz, name=name) + return Index(dt_array, name=name, dtype=dt_array.dtype) + + +def _convert_and_box_cache( + arg: DatetimeScalarOrArrayConvertible, + cache_array: Series, + name: Hashable | None = None, +) -> Index: + """ + Convert array of dates with a cache and wrap the result in an Index. + + Parameters + ---------- + arg : integer, float, string, datetime, list, tuple, 1-d array, Series + cache_array : Series + Cache of converted, unique dates + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + result : Index-like of converted dates + """ + from pandas import Series + + result = Series(arg).map(cache_array) + return _box_as_indexlike(result._values, utc=None, name=name) + + +def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index: + """ + Return results from array_strptime if a %z or %Z directive was passed. + + Parameters + ---------- + result : ndarray[int64] + int64 date representations of the dates + timezones : ndarray + pytz timezone objects + tz : object + None or pytz timezone object + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + tz_result : Index-like of parsed dates with timezone + """ + tz_results = np.array( + [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] + ) + if tz is not None: + # Convert to the same tz + tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results]) + + return Index(tz_results, name=name) + + +def _convert_listlike_datetimes( + arg, + format: str | None, + name: Hashable = None, + tz: Timezone | None = None, + unit: str | None = None, + errors: str = "raise", + infer_datetime_format: bool = False, + dayfirst: bool | None = None, + yearfirst: bool | None = None, + exact: bool = True, +): + """ + Helper function for to_datetime. Performs the conversions of 1D listlike + of dates + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be parsed + name : object + None or string for the Index name + tz : object + None or 'utc' + unit : str + None or string of the frequency of the passed data + errors : str + error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' + infer_datetime_format : bool, default False + inferring format behavior from to_datetime + dayfirst : bool + dayfirst parsing behavior from to_datetime + yearfirst : bool + yearfirst parsing behavior from to_datetime + exact : bool, default True + exact format matching behavior from to_datetime + + Returns + ------- + Index-like of parsed dates + """ + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + arg_dtype = getattr(arg, "dtype", None) + # these are shortcutable + if is_datetime64tz_dtype(arg_dtype): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + return DatetimeIndex(arg, tz=tz, name=name) + if tz == "utc": + arg = arg.tz_convert(None).tz_localize(tz) + return arg + + elif is_datetime64_ns_dtype(arg_dtype): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + try: + return DatetimeIndex(arg, tz=tz, name=name) + except ValueError: + pass + elif tz: + # DatetimeArray, DatetimeIndex + return arg.tz_localize(tz) + + return arg + + elif unit is not None: + if format is not None: + raise ValueError("cannot specify both format and unit") + return _to_datetime_with_unit(arg, unit, name, tz, errors) + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + orig_arg = arg + try: + arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz)) + except TypeError: + if errors == "coerce": + npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) + return DatetimeIndex(npvalues, name=name) + elif errors == "ignore": + idx = Index(arg, name=name) + return idx + raise + + arg = ensure_object(arg) + require_iso8601 = False + + if infer_datetime_format and format is None: + format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + + if format is not None: + # There is a special fast-path for iso8601 formatted + # datetime strings, so in those cases don't use the inferred + # format because this path makes process slower in this + # special case + format_is_iso8601 = format_is_iso(format) + if format_is_iso8601: + require_iso8601 = not infer_datetime_format + format = None + + if format is not None: + res = _to_datetime_with_format( + arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format + ) + if res is not None: + return res + + assert format is None or infer_datetime_format + utc = tz == "utc" + result, tz_parsed = objects_to_datetime64ns( + arg, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + errors=errors, + require_iso8601=require_iso8601, + allow_object=True, + ) + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) + return DatetimeIndex._simple_new(dta, name=name) + + utc = tz == "utc" + return _box_as_indexlike(result, utc=utc, name=name) + + +def _array_strptime_with_fallback( + arg, + name, + tz, + fmt: str, + exact: bool, + errors: str, + infer_datetime_format: bool, +) -> Index | None: + """ + Call array_strptime, with fallback behavior depending on 'errors'. + """ + utc = tz == "utc" + + try: + result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors) + except OutOfBoundsDatetime: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(iNaT) + else: + result = arg + except ValueError: + # if fmt was inferred, try falling back + # to array_to_datetime - terminate here + # for specified formats + if not infer_datetime_format: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(iNaT) + else: + result = arg + else: + # Indicates to the caller to fallback to objects_to_datetime64ns + return None + else: + if "%Z" in fmt or "%z" in fmt: + return _return_parsed_timezone_results(result, timezones, tz, name) + + return _box_as_indexlike(result, utc=utc, name=name) + + +def _to_datetime_with_format( + arg, + orig_arg, + name, + tz, + fmt: str, + exact: bool, + errors: str, + infer_datetime_format: bool, +) -> Index | None: + """ + Try parsing with the given format, returning None on failure. + """ + result = None + + # shortcut formatting here + if fmt == "%Y%m%d": + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + try: + # may return None without raising + result = _attempt_YYYYMMDD(orig_arg, errors=errors) + except (ValueError, TypeError, OutOfBoundsDatetime) as err: + raise ValueError( + "cannot convert the input to '%Y%m%d' date format" + ) from err + if result is not None: + utc = tz == "utc" + return _box_as_indexlike(result, utc=utc, name=name) + + # fallback + res = _array_strptime_with_fallback( + arg, name, tz, fmt, exact, errors, infer_datetime_format + ) + return res + + +def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index: + """ + to_datetime specalized to the case where a 'unit' is passed. + """ + arg = extract_array(arg, extract_numpy=True) + + # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime + # because it expects an ndarray argument + if isinstance(arg, IntegerArray): + arr = arg.astype(f"datetime64[{unit}]") + tz_parsed = None + else: + arg = np.asarray(arg) + arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + + if errors == "ignore": + # Index constructor _may_ infer to DatetimeIndex + result = Index._with_infer(arr, name=name) + else: + result = DatetimeIndex(arr, name=name) + + if not isinstance(result, DatetimeIndex): + return result + + # GH#23758: We may still need to localize the result with tz + # GH#25546: Apply tz_parsed first (from arg), then tz (from caller) + # result will be naive but in UTC + result = result.tz_localize("UTC").tz_convert(tz_parsed) + + if tz is not None: + if result.tz is None: + result = result.tz_localize(tz) + else: + result = result.tz_convert(tz) + return result + + +def _adjust_to_origin(arg, origin, unit): + """ + Helper function for to_datetime. + Adjust input argument to the specified origin + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be adjusted + origin : 'julian' or Timestamp + origin offset for the arg + unit : str + passed unit from to_datetime, must be 'D' + + Returns + ------- + ndarray or scalar of adjusted date(s) + """ + if origin == "julian": + original = arg + j0 = Timestamp(0).to_julian_date() + if unit != "D": + raise ValueError("unit must be 'D' for origin='julian'") + try: + arg = arg - j0 + except TypeError as err: + raise ValueError( + "incompatible 'arg' type for given 'origin'='julian'" + ) from err + + # preemptively check this for a nice range + j_max = Timestamp.max.to_julian_date() - j0 + j_min = Timestamp.min.to_julian_date() - j0 + if np.any(arg > j_max) or np.any(arg < j_min): + raise OutOfBoundsDatetime( + f"{original} is Out of Bounds for origin='julian'" + ) + else: + # arg must be numeric + if not ( + (is_scalar(arg) and (is_integer(arg) or is_float(arg))) + or is_numeric_dtype(np.asarray(arg)) + ): + raise ValueError( + f"'{arg}' is not compatible with origin='{origin}'; " + "it must be numeric with a unit specified" + ) + + # we are going to offset back to unix / epoch time + try: + offset = Timestamp(origin) + except OutOfBoundsDatetime as err: + raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err + except ValueError as err: + raise ValueError( + f"origin {origin} cannot be converted to a Timestamp" + ) from err + + if offset.tz is not None: + raise ValueError(f"origin offset {offset} must be tz-naive") + td_offset = offset - Timestamp(0) + + # convert the offset to the unit of the arg + # this should be lossless in terms of precision + ioffset = td_offset // Timedelta(1, unit=unit) + + # scalars & ndarray-like can handle the addition + if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): + arg = np.asarray(arg) + arg = arg + ioffset + return arg + + +@overload +def to_datetime( + arg: DatetimeScalar, + errors: DateTimeErrorChoices = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> Timestamp: + ... + + +@overload +def to_datetime( + arg: Series | DictConvertible, + errors: DateTimeErrorChoices = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> Series: + ... + + +@overload +def to_datetime( + arg: list | tuple | Index | ArrayLike, + errors: DateTimeErrorChoices = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + utc: bool | None = ..., + format: str | None = ..., + exact: bool = ..., + unit: str | None = ..., + infer_datetime_format: bool = ..., + origin=..., + cache: bool = ..., +) -> DatetimeIndex: + ... + + +def to_datetime( + arg: DatetimeScalarOrArrayConvertible | DictConvertible, + errors: DateTimeErrorChoices = "raise", + dayfirst: bool = False, + yearfirst: bool = False, + utc: bool | None = None, + format: str | None = None, + exact: bool = True, + unit: str | None = None, + infer_datetime_format: bool = False, + origin="unix", + cache: bool = True, +) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: + """ + Convert argument to datetime. + + This function converts a scalar, array-like, :class:`Series` or + :class:`DataFrame`/dict-like to a pandas datetime object. + + Parameters + ---------- + arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like + The object to convert to a datetime. If a :class:`DataFrame` is provided, the + method expects minimally the following columns: :const:`"year"`, + :const:`"month"`, :const:`"day"`. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If :const:`'raise'`, then invalid parsing will raise an exception. + - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. + - If :const:`'ignore'`, then invalid parsing will return the input. + dayfirst : bool, default False + Specify a date parse order if `arg` is str or is list-like. + If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` + is parsed as :const:`2012-11-10`. + + .. warning:: + + ``dayfirst=True`` is not strict, but will prefer to parse + with day first. If a delimited date string cannot be parsed in + accordance with the given `dayfirst` option, e.g. + ``to_datetime(['31-12-2021'])``, then a warning will be shown. + + yearfirst : bool, default False + Specify a date parse order if `arg` is str or is list-like. + + - If :const:`True` parses dates with the year first, e.g. + :const:`"10/11/12"` is parsed as :const:`2010-11-12`. + - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is + preceded (same as :mod:`dateutil`). + + .. warning:: + + ``yearfirst=True`` is not strict, but will prefer to parse + with year first. + + utc : bool, default None + Control timezone-related parsing, localization and conversion. + + - If :const:`True`, the function *always* returns a timezone-aware + UTC-localized :class:`Timestamp`, :class:`Series` or + :class:`DatetimeIndex`. To do this, timezone-naive inputs are + *localized* as UTC, while timezone-aware inputs are *converted* to UTC. + + - If :const:`False` (default), inputs will not be coerced to UTC. + Timezone-naive inputs will remain naive, while timezone-aware ones + will keep their time offsets. Limitations exist for mixed + offsets (typically, daylight savings), see :ref:`Examples + ` section for details. + + See also: pandas general documentation about `timezone conversion and + localization + `_. + + format : str, default None + The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that + :const:`"%f"` will parse all the way up to nanoseconds. See + `strftime documentation + `_ for more information on choices. + exact : bool, default True + Control how `format` is used: + + - If :const:`True`, require an exact `format` match. + - If :const:`False`, allow the `format` to match anywhere in the target + string. + + unit : str, default 'ns' + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate + the number of milliseconds to the unix epoch start. + infer_datetime_format : bool, default False + If :const:`True` and no `format` is given, attempt to infer the format + of the datetime strings based on the first non-NaN element, + and if it can be inferred, switch to a faster method of parsing them. + In some cases this can increase the parsing speed by ~5-10x. + origin : scalar, default 'unix' + Define the reference date. The numeric values would be parsed as number + of units (defined by `unit`) since this reference date. + + - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01. + - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to + beginning of Julian Calendar. Julian day number :const:`0` is assigned + to the day starting at noon on January 1, 4713 BC. + - If Timestamp convertible, origin is set to Timestamp identified by + origin. + cache : bool, default True + If :const:`True`, use a cache of unique, converted dates to apply the + datetime conversion. May produce significant speed-up when parsing + duplicate date strings, especially ones with timezone offsets. The cache + is only used when there are at least 50 values. The presence of + out-of-bounds values will render the cache unusable and may slow down + parsing. + + .. versionchanged:: 0.25.0 + changed default value from :const:`False` to :const:`True`. + + Returns + ------- + datetime + If parsing succeeded. + Return type depends on input (types in parenthesis correspond to + fallback in case of unsuccessful timezone or out-of-range timestamp + parsing): + + - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) + - array-like: :class:`DatetimeIndex` (or :class:`Series` with + :class:`object` dtype containing :class:`datetime.datetime`) + - Series: :class:`Series` of :class:`datetime64` dtype (or + :class:`Series` of :class:`object` dtype containing + :class:`datetime.datetime`) + - DataFrame: :class:`Series` of :class:`datetime64` dtype (or + :class:`Series` of :class:`object` dtype containing + :class:`datetime.datetime`) + + Raises + ------ + ParserError + When parsing a date from string fails. + ValueError + When another datetime conversion error happens. For example when one + of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or + when a Timezone-aware :class:`datetime.datetime` is found in an array-like + of mixed time offsets, and ``utc=False``. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_timedelta : Convert argument to timedelta. + convert_dtypes : Convert dtypes. + + Notes + ----- + + Many input types are supported, and lead to different output types: + + - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime` + module or :mod:`numpy`). They are converted to :class:`Timestamp` when + possible, otherwise they are converted to :class:`datetime.datetime`. + None/NaN/null scalars are converted to :const:`NaT`. + + - **array-like** can contain int, float, str, datetime objects. They are + converted to :class:`DatetimeIndex` when possible, otherwise they are + converted to :class:`Index` with :class:`object` dtype, containing + :class:`datetime.datetime`. None/NaN/null entries are converted to + :const:`NaT` in both cases. + + - **Series** are converted to :class:`Series` with :class:`datetime64` + dtype when possible, otherwise they are converted to :class:`Series` with + :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null + entries are converted to :const:`NaT` in both cases. + + - **DataFrame/dict-like** are converted to :class:`Series` with + :class:`datetime64` dtype. For each row a datetime is created from assembling + the various dataframe columns. Column keys can be common abbreviations + like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or + plurals of the same. + + The following causes are responsible for :class:`datetime.datetime` objects + being returned (possibly inside an :class:`Index` or a :class:`Series` with + :class:`object` dtype) instead of a proper pandas designated type + (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series` + with :class:`datetime64` dtype): + + - when any input element is before :const:`Timestamp.min` or after + :const:`Timestamp.max`, see `timestamp limitations + `_. + + - when ``utc=False`` (default) and the input is an array-like or + :class:`Series` containing mixed naive/aware datetime, or aware with mixed + time offsets. Note that this happens in the (quite frequent) situation when + the timezone has a daylight savings policy. In that case you may wish to + use ``utc=True``. + + Examples + -------- + + **Handling various input formats** + + Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys + can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', + 'ms', 'us', 'ns']) or plurals of the same + + >>> df = pd.DataFrame({'year': [2015, 2016], + ... 'month': [2, 3], + ... 'day': [4, 5]}) + >>> pd.to_datetime(df) + 0 2015-02-04 + 1 2016-03-05 + dtype: datetime64[ns] + + Passing ``infer_datetime_format=True`` can often-times speedup a parsing + if its not an ISO8601 format exactly, but in a regular format. + + >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000) + >>> s.head() + 0 3/11/2000 + 1 3/12/2000 + 2 3/13/2000 + 3 3/11/2000 + 4 3/12/2000 + dtype: object + + >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP + 100 loops, best of 3: 10.4 ms per loop + + >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP + 1 loop, best of 3: 471 ms per loop + + Using a unix epoch time + + >>> pd.to_datetime(1490195805, unit='s') + Timestamp('2017-03-22 15:16:45') + >>> pd.to_datetime(1490195805433502912, unit='ns') + Timestamp('2017-03-22 15:16:45.433502912') + + .. warning:: For float arg, precision rounding might happen. To prevent + unexpected behavior use a fixed-width exact type. + + Using a non-unix epoch origin + + >>> pd.to_datetime([1, 2, 3], unit='D', + ... origin=pd.Timestamp('1960-01-01')) + DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], + dtype='datetime64[ns]', freq=None) + + **Non-convertible date/times** + + If a date does not meet the `timestamp limitations + `_, passing ``errors='ignore'`` + will return the original input instead of raising any exception. + + Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`, + in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. + + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') + datetime.datetime(1300, 1, 1, 0, 0) + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') + NaT + + .. _to_datetime_tz_examples: + + **Timezones and time offsets** + + The default behaviour (``utc=False``) is as follows: + + - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: + + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15']) + DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], + dtype='datetime64[ns]', freq=None) + + - Timezone-aware inputs *with constant time offset* are converted to + timezone-aware :class:`DatetimeIndex`: + + >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) + DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], + dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None) + + - However, timezone-aware inputs *with mixed time offsets* (for example + issued from a timezone with daylight savings, such as Europe/Paris) + are **not successfully converted** to a :class:`DatetimeIndex`. Instead a + simple :class:`Index` containing :class:`datetime.datetime` objects is + returned: + + >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], + dtype='object') + + - A mix of timezone-aware and timezone-naive inputs is converted to + a timezone-aware :class:`DatetimeIndex` if the offsets of the timezone-aware + are constant: + + >>> from datetime import datetime + >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)]) + DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'], + dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None) + + | + + Setting ``utc=True`` solves most of the above issues: + + - Timezone-naive inputs are *localized* as UTC + + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) + DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + + - Timezone-aware inputs are *converted* to UTC (the output represents the + exact same datetime, but viewed from the UTC time offset `+00:00`). + + >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], + ... utc=True) + DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + + - Inputs can contain both naive and aware, string or datetime, the above + rules still apply + + >>> from datetime import timezone, timedelta + >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530', + ... datetime(2020, 1, 1, 18), + ... datetime(2020, 1, 1, 18, + ... tzinfo=timezone(-timedelta(hours=1)))], + ... utc=True) + DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00', + '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + """ + if arg is None: + return None + + if origin != "unix": + arg = _adjust_to_origin(arg, origin, unit) + + tz = "utc" if utc else None + convert_listlike = partial( + _convert_listlike_datetimes, + tz=tz, + unit=unit, + dayfirst=dayfirst, + yearfirst=yearfirst, + errors=errors, + exact=exact, + infer_datetime_format=infer_datetime_format, + ) + + result: Timestamp | NaTType | Series | Index + + if isinstance(arg, Timestamp): + result = arg + if tz is not None: + if arg.tz is not None: + result = arg.tz_convert(tz) + else: + result = arg.tz_localize(tz) + elif isinstance(arg, ABCSeries): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = arg.map(cache_array) + else: + values = convert_listlike(arg._values, format) + result = arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): + result = _assemble_from_unit_mappings(arg, errors, tz) + elif isinstance(arg, Index): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array, name=arg.name) + else: + result = convert_listlike(arg, format, name=arg.name) + elif is_list_like(arg): + try: + # error: Argument 1 to "_maybe_cache" has incompatible type + # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray, + # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], + # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" + argc = cast( + Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg + ) + cache_array = _maybe_cache(argc, format, cache, convert_listlike) + except OutOfBoundsDatetime: + # caching attempts to create a DatetimeIndex, which may raise + # an OOB. If that's the desired behavior, then just reraise... + if errors == "raise": + raise + # ... otherwise, continue without the cache. + from pandas import Series + + cache_array = Series([], dtype=object) # just an empty array + if not cache_array.empty: + result = _convert_and_box_cache(argc, cache_array) + else: + result = convert_listlike(argc, format) + else: + result = convert_listlike(np.array([arg]), format)[0] + if isinstance(arg, bool) and isinstance(result, np.bool_): + result = bool(result) # TODO: avoid this kludge. + + # error: Incompatible return value type (got "Union[Timestamp, NaTType, + # Series, Index]", expected "Union[DatetimeIndex, Series, float, str, + # NaTType, None]") + return result # type: ignore[return-value] + + +# mappings for assembling units +_unit_map = { + "year": "year", + "years": "year", + "month": "month", + "months": "month", + "day": "day", + "days": "day", + "hour": "h", + "hours": "h", + "minute": "m", + "minutes": "m", + "second": "s", + "seconds": "s", + "ms": "ms", + "millisecond": "ms", + "milliseconds": "ms", + "us": "us", + "microsecond": "us", + "microseconds": "us", + "ns": "ns", + "nanosecond": "ns", + "nanoseconds": "ns", +} + + +def _assemble_from_unit_mappings(arg, errors: DateTimeErrorChoices, tz): + """ + assemble the unit specified fields from the arg (DataFrame) + Return a Series for actual parsing + + Parameters + ---------- + arg : DataFrame + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + + - If :const:`'raise'`, then invalid parsing will raise an exception + - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT` + - If :const:`'ignore'`, then invalid parsing will return the input + tz : None or 'utc' + + Returns + ------- + Series + """ + from pandas import ( + DataFrame, + to_numeric, + to_timedelta, + ) + + arg = DataFrame(arg) + if not arg.columns.is_unique: + raise ValueError("cannot assemble with duplicate keys") + + # replace passed unit with _unit_map + def f(value): + if value in _unit_map: + return _unit_map[value] + + # m is case significant + if value.lower() in _unit_map: + return _unit_map[value.lower()] + + return value + + unit = {k: f(k) for k in arg.keys()} + unit_rev = {v: k for k, v in unit.items()} + + # we require at least Ymd + required = ["year", "month", "day"] + req = sorted(set(required) - set(unit_rev.keys())) + if len(req): + _required = ",".join(req) + raise ValueError( + "to assemble mappings requires at least that " + f"[year, month, day] be specified: [{_required}] is missing" + ) + + # keys we don't recognize + excess = sorted(set(unit_rev.keys()) - set(_unit_map.values())) + if len(excess): + _excess = ",".join(excess) + raise ValueError( + f"extra keys have been passed to the datetime assemblage: [{_excess}]" + ) + + def coerce(values): + # we allow coercion to if errors allows + values = to_numeric(values, errors=errors) + + # prevent overflow in case of int8 or int16 + if is_integer_dtype(values): + values = values.astype("int64", copy=False) + return values + + values = ( + coerce(arg[unit_rev["year"]]) * 10000 + + coerce(arg[unit_rev["month"]]) * 100 + + coerce(arg[unit_rev["day"]]) + ) + try: + values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes: {err}") from err + + units: list[UnitChoices] = ["h", "m", "s", "ms", "us", "ns"] + for u in units: + value = unit_rev.get(u) + if value is not None and value in arg: + try: + values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) + except (TypeError, ValueError) as err: + raise ValueError( + f"cannot assemble the datetimes [{value}]: {err}" + ) from err + return values + + +def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None: + """ + try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, + arg is a passed in as an object dtype, but could really be ints/strings + with nan-like/or floats (e.g. with nan) + + Parameters + ---------- + arg : np.ndarray[object] + errors : {'raise','ignore','coerce'} + """ + + def calc(carg): + # calculate the actual result + carg = carg.astype(object, copy=False) + parsed = parsing.try_parse_year_month_day( + carg / 10000, carg / 100 % 100, carg % 100 + ) + return tslib.array_to_datetime(parsed, errors=errors)[0] + + def calc_with_mask(carg, mask): + result = np.empty(carg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult[~mask] = iNaT + + masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) + result[mask] = masked_result.astype("M8[ns]") + return result + + # try intlike / strings that are ints + try: + return calc(arg.astype(np.int64)) + except (ValueError, OverflowError, TypeError): + pass + + # a float with actual np.nan + try: + carg = arg.astype(np.float64) + return calc_with_mask(carg, notna(carg)) + except (ValueError, OverflowError, TypeError): + pass + + # string with NaN-like + try: + # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected + # "Union[Union[ExtensionArray, ndarray], Index, Series]" + mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type] + return calc_with_mask(arg, mask) + except (ValueError, OverflowError, TypeError): + pass + + return None + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + # GH#34145 + warnings.warn( + "`to_time` has been moved, should be imported from pandas.core.tools.times. " + "This alias will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + from pandas.core.tools.times import to_time + + return to_time(arg, format, infer_time_format, errors) + + +__all__ = [ + "DateParseError", + "should_cache", + "to_datetime", + "to_time", +] diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py new file mode 100644 index 00000000..ef7f4bc9 --- /dev/null +++ b/pandas/core/tools/numeric.py @@ -0,0 +1,244 @@ +from __future__ import annotations + +import numpy as np + +from pandas._libs import lib +from pandas._typing import npt + +from pandas.core.dtypes.cast import maybe_downcast_numeric +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime_or_timedelta_dtype, + is_decimal, + is_integer_dtype, + is_number, + is_numeric_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +import pandas as pd +from pandas.core.arrays.numeric import NumericArray + + +def to_numeric(arg, errors="raise", downcast=None): + """ + Convert argument to a numeric type. + + The default return dtype is `float64` or `int64` + depending on the data supplied. Use the `downcast` parameter + to obtain other dtypes. + + Please note that precision loss may occur if really large numbers + are passed in. Due to the internal limitations of `ndarray`, if + numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) + or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are + passed in, it is very likely they will be converted to float so that + they can stored in an `ndarray`. These warnings apply similarly to + `Series` since it internally leverages `ndarray`. + + Parameters + ---------- + arg : scalar, list, tuple, 1-d array, or Series + Argument to be converted. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaN. + - If 'ignore', then invalid parsing will return the input. + downcast : str, default None + Can be 'integer', 'signed', 'unsigned', or 'float'. + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + Returns + ------- + ret + Numeric if parsing succeeded. + Return type depends on input. Series if Series, otherwise ndarray. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + numpy.ndarray.astype : Cast a numpy array to a specified type. + DataFrame.convert_dtypes : Convert dtypes. + + Examples + -------- + Take separate series and convert to numeric, coercing when told to + + >>> s = pd.Series(['1.0', '2', -3]) + >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 + >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object + >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 + + Downcasting of nullable integer and floating dtypes is supported: + + >>> s = pd.Series([1, 2, 3], dtype="Int64") + >>> pd.to_numeric(s, downcast="integer") + 0 1 + 1 2 + 2 3 + dtype: Int8 + >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") + >>> pd.to_numeric(s, downcast="float") + 0 1.0 + 1 2.1 + 2 3.0 + dtype: Float32 + """ + if downcast not in (None, "integer", "signed", "unsigned", "float"): + raise ValueError("invalid downcasting method provided") + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("invalid error value specified") + + is_series = False + is_index = False + is_scalars = False + + if isinstance(arg, ABCSeries): + is_series = True + values = arg.values + elif isinstance(arg, ABCIndex): + is_index = True + if needs_i8_conversion(arg.dtype): + values = arg.asi8 + else: + values = arg.values + elif isinstance(arg, (list, tuple)): + values = np.array(arg, dtype="O") + elif is_scalar(arg): + if is_decimal(arg): + return float(arg) + if is_number(arg): + return arg + is_scalars = True + values = np.array([arg], dtype="O") + elif getattr(arg, "ndim", 1) > 1: + raise TypeError("arg must be a list, tuple, 1-d array, or Series") + else: + values = arg + + # GH33013: for IntegerArray & FloatingArray extract non-null values for casting + # save mask to reconstruct the full array after casting + mask: npt.NDArray[np.bool_] | None = None + if isinstance(values, NumericArray): + mask = values._mask + values = values._data[~mask] + + values_dtype = getattr(values, "dtype", None) + if is_numeric_dtype(values_dtype): + pass + elif is_datetime_or_timedelta_dtype(values_dtype): + values = values.view(np.int64) + else: + values = ensure_object(values) + coerce_numeric = errors not in ("ignore", "raise") + try: + values, _ = lib.maybe_convert_numeric( + values, set(), coerce_numeric=coerce_numeric + ) + except (ValueError, TypeError): + if errors == "raise": + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and is_numeric_dtype(values.dtype): + typecodes: str | None = None + + if downcast in ("integer", "signed"): + typecodes = np.typecodes["Integer"] + elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): + typecodes = np.typecodes["UnsignedInteger"] + elif downcast == "float": + typecodes = np.typecodes["Float"] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for typecode in typecodes: + dtype = np.dtype(typecode) + if dtype.itemsize <= values.dtype.itemsize: + values = maybe_downcast_numeric(values, dtype) + + # successful conversion + if values.dtype == dtype: + break + + # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array + if mask is not None: + data = np.zeros(mask.shape, dtype=values.dtype) + data[~mask] = values + + from pandas.core.arrays import ( + FloatingArray, + IntegerArray, + ) + + klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray + values = klass(data, mask.copy()) + + if is_series: + return arg._constructor(values, index=arg.index, name=arg.name) + elif is_index: + # because we want to coerce to numeric if possible, + # do not use _shallow_copy + return pd.Index(values, name=arg.name) + elif is_scalars: + return values[0] + else: + return values diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py new file mode 100644 index 00000000..5026c97c --- /dev/null +++ b/pandas/core/tools/timedeltas.py @@ -0,0 +1,259 @@ +""" +timedelta support tools +""" +from __future__ import annotations + +from datetime import timedelta +from typing import ( + TYPE_CHECKING, + overload, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + NaT, + NaTType, +) +from pandas._libs.tslibs.timedeltas import ( + Timedelta, + parse_timedelta_unit, +) + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) + +from pandas.core.arrays.timedeltas import sequence_to_td64ns + +if TYPE_CHECKING: + from pandas._libs.tslibs.timedeltas import UnitChoices + from pandas._typing import ( + ArrayLike, + DateTimeErrorChoices, + ) + + from pandas import ( + Index, + Series, + TimedeltaIndex, + ) + + +@overload +def to_timedelta( + arg: str | float | timedelta, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> Timedelta: + ... + + +@overload +def to_timedelta( + arg: Series, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> Series: + ... + + +@overload +def to_timedelta( + arg: list | tuple | range | ArrayLike | Index, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> TimedeltaIndex: + ... + + +def to_timedelta( + arg: str + | int + | float + | timedelta + | list + | tuple + | range + | ArrayLike + | Index + | Series, + unit: UnitChoices | None = None, + errors: DateTimeErrorChoices = "raise", +) -> Timedelta | TimedeltaIndex | Series: + """ + Convert argument to timedelta. + + Timedeltas are absolute differences in times, expressed in difference + units (e.g. days, hours, minutes, seconds). This method converts + an argument from a recognized timedelta format / value into + a Timedelta type. + + Parameters + ---------- + arg : str, timedelta, list-like or Series + The data to be converted to timedelta. + + .. deprecated:: 1.2 + Strings with units 'M', 'Y' and 'y' do not represent + unambiguous timedelta values and will be removed in a future version + + unit : str, optional + Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. + + Possible values: + + * 'W' + * 'D' / 'days' / 'day' + * 'hours' / 'hour' / 'hr' / 'h' + * 'm' / 'minute' / 'min' / 'minutes' / 'T' + * 'S' / 'seconds' / 'sec' / 'second' + * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L' + * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U' + * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N' + + .. versionchanged:: 1.1.0 + + Must not be specified when `arg` context strings and + ``errors="raise"``. + + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. + + Returns + ------- + timedelta + If parsing succeeded. + Return type depends on input: + + - list-like: TimedeltaIndex of timedelta64 dtype + - Series: Series of timedelta64 dtype + - scalar: Timedelta + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + convert_dtypes : Convert dtypes. + + Notes + ----- + If the precision is higher than nanoseconds, the precision of the duration is + truncated to nanoseconds for string inputs. + + Examples + -------- + Parsing a single string to a Timedelta: + + >>> pd.to_timedelta('1 days 06:05:01.00003') + Timedelta('1 days 06:05:01.000030') + >>> pd.to_timedelta('15.5us') + Timedelta('0 days 00:00:00.000015500') + + Parsing a list or array of strings: + + >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], + dtype='timedelta64[ns]', freq=None) + + Converting numbers by specifying the `unit` keyword argument: + + >>> pd.to_timedelta(np.arange(5), unit='s') + TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', + '0 days 00:00:03', '0 days 00:00:04'], + dtype='timedelta64[ns]', freq=None) + >>> pd.to_timedelta(np.arange(5), unit='d') + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + """ + if unit is not None: + unit = parse_timedelta_unit(unit) + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.") + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if arg is None: + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, unit=unit, errors=errors) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndex): + return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) + elif isinstance(arg, np.ndarray) and arg.ndim == 0: + # extract array scalar and process below + # error: Incompatible types in assignment (expression has type "object", + # variable has type "Union[str, int, float, timedelta, List[Any], + # Tuple[Any, ...], Union[Union[ExtensionArray, ndarray[Any, Any]], Index, + # Series]]") [assignment] + arg = lib.item_from_zerodim(arg) # type: ignore[assignment] + elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: + return _convert_listlike(arg, unit=unit, errors=errors) + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, timedelta, list, tuple, 1-d array, or Series" + ) + + if isinstance(arg, str) and unit is not None: + raise ValueError("unit must not be specified if the input is/contains a str") + + # ...so it must be a scalar value. Return scalar. + return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) + + +def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): + """Convert string 'r' to a timedelta object.""" + result: Timedelta | NaTType + + try: + result = Timedelta(r, unit) + except ValueError: + if errors == "raise": + raise + elif errors == "ignore": + return r + + # coerce + result = NaT + + return result + + +def _convert_listlike(arg, unit=None, errors="raise", name=None): + """Convert a list of objects to a timedelta index object.""" + if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) + + try: + td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + except ValueError: + if errors == "ignore": + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise + + from pandas import TimedeltaIndex + + value = TimedeltaIndex(td64arr, unit="ns", name=name) + return value diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py new file mode 100644 index 00000000..87667921 --- /dev/null +++ b/pandas/core/tools/times.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from datetime import ( + datetime, + time, +) + +import numpy as np + +from pandas._libs.lib import is_list_like + +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + """ + Parse time strings to time objects using fixed strptime formats ("%H:%M", + "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p") + + Use infer_time_format if all the strings are in the same format to speed + up conversion. + + Parameters + ---------- + arg : string in time format, datetime.time, list, tuple, 1-d array, Series + format : str, default None + Format used to convert arg into a time object. If None, fixed formats + are used. + infer_time_format: bool, default False + Infer the time format based on the first non-NaN element. If all + strings are in the same format, this will speed up conversion. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as None + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + datetime.time + """ + + def _convert_listlike(arg, format): + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + arg = np.asarray(arg, dtype="O") + + if infer_time_format and format is None: + format = _guess_time_format_for_array(arg) + + times: list[time | None] = [] + if format is not None: + for element in arg: + try: + times.append(datetime.strptime(element, format).time()) + except (ValueError, TypeError) as err: + if errors == "raise": + msg = ( + f"Cannot convert {element} to a time with given " + f"format {format}" + ) + raise ValueError(msg) from err + elif errors == "ignore": + return arg + else: + times.append(None) + else: + formats = _time_formats[:] + format_found = False + for element in arg: + time_object = None + try: + time_object = time.fromisoformat(element) + except (ValueError, TypeError): + for time_format in formats: + try: + time_object = datetime.strptime(element, time_format).time() + if not format_found: + # Put the found format in front + fmt = formats.pop(formats.index(time_format)) + formats.insert(0, fmt) + format_found = True + break + except (ValueError, TypeError): + continue + + if time_object is not None: + times.append(time_object) + elif errors == "raise": + raise ValueError(f"Cannot convert arg {arg} to a time") + elif errors == "ignore": + return arg + else: + times.append(None) + + return times + + if arg is None: + return arg + elif isinstance(arg, time): + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, format) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndex): + return _convert_listlike(arg, format) + elif is_list_like(arg): + return _convert_listlike(arg, format) + + return _convert_listlike(np.array([arg]), format)[0] + + +# Fixed time formats for time parsing +_time_formats = [ + "%H:%M", + "%H%M", + "%I:%M%p", + "%I%M%p", + "%H:%M:%S", + "%H%M%S", + "%I:%M:%S%p", + "%I%M%S%p", +] + + +def _guess_time_format_for_array(arr): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + element = arr[non_nan_elements[0]] + for time_format in _time_formats: + try: + datetime.strptime(element, time_format) + return time_format + except ValueError: + pass + + return None diff --git a/pandas/core/util/__init__.py b/pandas/core/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py new file mode 100644 index 00000000..5a5e46e0 --- /dev/null +++ b/pandas/core/util/hashing.py @@ -0,0 +1,366 @@ +""" +data hash pandas / numpy objects +""" +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + Hashable, + Iterable, + Iterator, + cast, +) + +import numpy as np + +from pandas._libs import lib +from pandas._libs.hashing import hash_object_array +from pandas._typing import ( + ArrayLike, + npt, +) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) + +if TYPE_CHECKING: + from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + ) + + +# 16 byte long hashing key +_default_hash_key = "0123456789123456" + + +def combine_hash_arrays( + arrays: Iterator[np.ndarray], num_items: int +) -> npt.NDArray[np.uint64]: + """ + Parameters + ---------- + arrays : Iterator[np.ndarray] + num_items : int + + Returns + ------- + np.ndarray[uint64] + + Should be the same as CPython's tupleobject.c + """ + try: + first = next(arrays) + except StopIteration: + return np.array([], dtype=np.uint64) + + arrays = itertools.chain([first], arrays) + + mult = np.uint64(1000003) + out = np.zeros_like(first) + np.uint64(0x345678) + for i, a in enumerate(arrays): + inverse_i = num_items - i + out ^= a + out *= mult + mult += np.uint64(82520 + inverse_i + inverse_i) + assert i + 1 == num_items, "Fed in wrong num_items" + out += np.uint64(97531) + return out + + +def hash_pandas_object( + obj: Index | DataFrame | Series, + index: bool = True, + encoding: str = "utf8", + hash_key: str | None = _default_hash_key, + categorize: bool = True, +) -> Series: + """ + Return a data hash of the Index/Series/DataFrame. + + Parameters + ---------- + obj : Index, Series, or DataFrame + index : bool, default True + Include the index in the hash (if Series/DataFrame). + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + Series of uint64, same length as the object + """ + from pandas import Series + + if hash_key is None: + hash_key = _default_hash_key + + if isinstance(obj, ABCMultiIndex): + return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) + + elif isinstance(obj, ABCIndex): + h = hash_array(obj._values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + ser = Series(h, index=obj, dtype="uint64", copy=False) + + elif isinstance(obj, ABCSeries): + h = hash_array(obj._values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + if index: + index_iter = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + )._values + for _ in [None] + ) + arrays = itertools.chain([h], index_iter) + h = combine_hash_arrays(arrays, 2) + + ser = Series(h, index=obj.index, dtype="uint64", copy=False) + + elif isinstance(obj, ABCDataFrame): + hashes = ( + hash_array(series._values, encoding, hash_key, categorize) + for _, series in obj.items() + ) + num_items = len(obj.columns) + if index: + index_hash_generator = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + )._values + for _ in [None] + ) + num_items += 1 + + # keep `hashes` specifically a generator to keep mypy happy + _hashes = itertools.chain(hashes, index_hash_generator) + hashes = (x for x in _hashes) + h = combine_hash_arrays(hashes, num_items) + + ser = Series(h, index=obj.index, dtype="uint64", copy=False) + else: + raise TypeError(f"Unexpected type for hashing {type(obj)}") + + return ser + + +def hash_tuples( + vals: MultiIndex | Iterable[tuple[Hashable, ...]], + encoding: str = "utf8", + hash_key: str = _default_hash_key, +) -> npt.NDArray[np.uint64]: + """ + Hash an MultiIndex / listlike-of-tuples efficiently. + + Parameters + ---------- + vals : MultiIndex or listlike-of-tuples + encoding : str, default 'utf8' + hash_key : str, default _default_hash_key + + Returns + ------- + ndarray[np.uint64] of hashed values + """ + if not is_list_like(vals): + raise TypeError("must be convertible to a list-of-tuples") + + from pandas import ( + Categorical, + MultiIndex, + ) + + if not isinstance(vals, ABCMultiIndex): + mi = MultiIndex.from_tuples(vals) + else: + mi = vals + + # create a list-of-Categoricals + cat_vals = [ + Categorical(mi.codes[level], mi.levels[level], ordered=False, fastpath=True) + for level in range(mi.nlevels) + ] + + # hash the list-of-ndarrays + hashes = ( + _hash_categorical(cat, encoding=encoding, hash_key=hash_key) for cat in cat_vals + ) + h = combine_hash_arrays(hashes, len(cat_vals)) + + return h + + +def _hash_categorical( + cat: Categorical, encoding: str, hash_key: str +) -> npt.NDArray[np.uint64]: + """ + Hash a Categorical by hashing its categories, and then mapping the codes + to the hashes + + Parameters + ---------- + cat : Categorical + encoding : str + hash_key : str + + Returns + ------- + ndarray[np.uint64] of hashed values, same size as len(c) + """ + # Convert ExtensionArrays to ndarrays + values = np.asarray(cat.categories._values) + hashed = hash_array(values, encoding, hash_key, categorize=False) + + # we have uint64, as we don't directly support missing values + # we don't want to use take_nd which will coerce to float + # instead, directly construct the result with a + # max(np.uint64) as the missing value indicator + # + # TODO: GH 15362 + + mask = cat.isna() + if len(hashed): + result = hashed.take(cat.codes) + else: + result = np.zeros(len(mask), dtype="uint64") + + if mask.any(): + result[mask] = lib.u8max + + return result + + +def hash_array( + vals: ArrayLike, + encoding: str = "utf8", + hash_key: str = _default_hash_key, + categorize: bool = True, +) -> npt.NDArray[np.uint64]: + """ + Given a 1d array, return an array of deterministic integers. + + Parameters + ---------- + vals : ndarray or ExtensionArray + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + ndarray[np.uint64, ndim=1] + Hashed values, same length as the vals. + """ + if not hasattr(vals, "dtype"): + raise TypeError("must pass a ndarray-like") + dtype = vals.dtype + + # For categoricals, we hash the categories, then remap the codes to the + # hash values. (This check is above the complex check so that we don't ask + # numpy if categorical is a subdtype of complex, as it will choke). + if is_categorical_dtype(dtype): + vals = cast("Categorical", vals) + return _hash_categorical(vals, encoding, hash_key) + + elif isinstance(vals, ABCExtensionArray): + vals, _ = vals._values_for_factorize() + + elif not isinstance(vals, np.ndarray): + # GH#42003 + raise TypeError( + "hash_array requires np.ndarray or ExtensionArray, not " + f"{type(vals).__name__}. Use hash_pandas_object instead." + ) + + return _hash_ndarray(vals, encoding, hash_key, categorize) + + +def _hash_ndarray( + vals: np.ndarray, + encoding: str = "utf8", + hash_key: str = _default_hash_key, + categorize: bool = True, +) -> npt.NDArray[np.uint64]: + """ + See hash_array.__doc__. + """ + dtype = vals.dtype + + # we'll be working with everything as 64-bit values, so handle this + # 128-bit value early + if np.issubdtype(dtype, np.complex128): + return hash_array(np.real(vals)) + 23 * hash_array(np.imag(vals)) + + # First, turn whatever array this is into unsigned 64-bit ints, if we can + # manage it. + elif dtype == bool: + vals = vals.astype("u8") + elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): + vals = vals.view("i8").astype("u8", copy=False) + elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: + vals = vals.view(f"u{vals.dtype.itemsize}").astype("u8") + else: + # With repeated values, its MUCH faster to categorize object dtypes, + # then hash and rename categories. We allow skipping the categorization + # when the values are known/likely to be unique. + if categorize: + from pandas import ( + Categorical, + Index, + factorize, + ) + + codes, categories = factorize(vals, sort=False) + cat = Categorical( + codes, Index._with_infer(categories), ordered=False, fastpath=True + ) + return _hash_categorical(cat, encoding, hash_key) + + try: + vals = hash_object_array(vals, hash_key, encoding) + except TypeError: + # we have mixed types + vals = hash_object_array( + vals.astype(str).astype(object), hash_key, encoding + ) + + # Then, redistribute these 64-bit ints within the space of 64-bit ints + vals ^= vals >> 30 + vals *= np.uint64(0xBF58476D1CE4E5B9) + vals ^= vals >> 27 + vals *= np.uint64(0x94D049BB133111EB) + vals ^= vals >> 31 + return vals diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py new file mode 100644 index 00000000..be798e02 --- /dev/null +++ b/pandas/core/util/numba_.py @@ -0,0 +1,112 @@ +"""Common utilities for Numba operations""" +from __future__ import annotations + +import types +from typing import ( + TYPE_CHECKING, + Callable, +) + +import numpy as np + +from pandas.compat._optional import import_optional_dependency +from pandas.errors import NumbaUtilError + +GLOBAL_USE_NUMBA: bool = False + + +def maybe_use_numba(engine: str | None) -> bool: + """Signal whether to use numba routines.""" + return engine == "numba" or (engine is None and GLOBAL_USE_NUMBA) + + +def set_use_numba(enable: bool = False) -> None: + global GLOBAL_USE_NUMBA + if enable: + import_optional_dependency("numba") + GLOBAL_USE_NUMBA = enable + + +def get_jit_arguments( + engine_kwargs: dict[str, bool] | None = None, kwargs: dict | None = None +) -> dict[str, bool]: + """ + Return arguments to pass to numba.JIT, falling back on pandas default JIT settings. + + Parameters + ---------- + engine_kwargs : dict, default None + user passed keyword arguments for numba.JIT + kwargs : dict, default None + user passed keyword arguments to pass into the JITed function + + Returns + ------- + dict[str, bool] + nopython, nogil, parallel + + Raises + ------ + NumbaUtilError + """ + if engine_kwargs is None: + engine_kwargs = {} + + nopython = engine_kwargs.get("nopython", True) + if kwargs and nopython: + raise NumbaUtilError( + "numba does not support kwargs with nopython=True: " + "https://github.com/numba/numba/issues/2916" + ) + nogil = engine_kwargs.get("nogil", False) + parallel = engine_kwargs.get("parallel", False) + return {"nopython": nopython, "nogil": nogil, "parallel": parallel} + + +def jit_user_function( + func: Callable, nopython: bool, nogil: bool, parallel: bool +) -> Callable: + """ + JIT the user's function given the configurable arguments. + + Parameters + ---------- + func : function + user defined function + nopython : bool + nopython parameter for numba.JIT + nogil : bool + nogil parameter for numba.JIT + parallel : bool + parallel parameter for numba.JIT + + Returns + ------- + function + Numba JITed function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + if numba.extending.is_jitted(func): + # Don't jit a user passed jitted function + numba_func = func + else: + + @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel) + def numba_func(data, *_args): + if getattr(np, func.__name__, False) is func or isinstance( + func, types.BuiltinFunctionType + ): + jf = func + else: + jf = numba.jit(func, nopython=nopython, nogil=nogil) + + def impl(data, *_args): + return jf(data, *_args) + + return impl + + return numba_func diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py new file mode 100644 index 00000000..857e12e5 --- /dev/null +++ b/pandas/core/window/__init__.py @@ -0,0 +1,23 @@ +from pandas.core.window.ewm import ( + ExponentialMovingWindow, + ExponentialMovingWindowGroupby, +) +from pandas.core.window.expanding import ( + Expanding, + ExpandingGroupby, +) +from pandas.core.window.rolling import ( + Rolling, + RollingGroupby, + Window, +) + +__all__ = [ + "Expanding", + "ExpandingGroupby", + "ExponentialMovingWindow", + "ExponentialMovingWindowGroupby", + "Rolling", + "RollingGroupby", + "Window", +] diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py new file mode 100644 index 00000000..e31b5c60 --- /dev/null +++ b/pandas/core/window/common.py @@ -0,0 +1,207 @@ +"""Common utility functions for rolling operations""" +from __future__ import annotations + +from collections import defaultdict +from typing import cast +import warnings + +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.indexes.api import MultiIndex + + +def flex_binary_moment(arg1, arg2, f, pairwise=False): + + if isinstance(arg1, ABCSeries) and isinstance(arg2, ABCSeries): + X, Y = prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame + + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, ABCDataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i in range(len(arg1.columns)): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + X, Y = arg1.align(arg2, join="outer") + X, Y = prep_binary(X, Y) + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i in range(len(arg1.columns)): + for j in range(len(arg2.columns)): + if j < i and arg2 is arg1: + # Symmetric case + results[i][j] = results[j][i] + else: + results[i][j] = f( + *prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) + ) + + from pandas import concat + + result_index = arg1.index.union(arg2.index) + if len(result_index): + + # construct result frame + result = concat( + [ + concat( + [results[i][j] for j in range(len(arg2.columns))], + ignore_index=True, + ) + for i in range(len(arg1.columns)) + ], + ignore_index=True, + axis=1, + ) + result.columns = arg1.columns + + # set the index and reorder + if arg2.columns.nlevels > 1: + # mypy needs to know columns is a MultiIndex, Index doesn't + # have levels attribute + arg2.columns = cast(MultiIndex, arg2.columns) + # GH 21157: Equivalent to MultiIndex.from_product( + # [result_index], , + # ) + # A normal MultiIndex.from_product will produce too many + # combinations. + result_level = np.tile( + result_index, len(result) // len(result_index) + ) + arg2_levels = ( + np.repeat( + arg2.columns.get_level_values(i), + len(result) // len(arg2.columns), + ) + for i in range(arg2.columns.nlevels) + ) + result_names = list(arg2.columns.names) + [result_index.name] + result.index = MultiIndex.from_arrays( + [*arg2_levels, result_level], names=result_names + ) + # GH 34440 + num_levels = len(result.index.levels) + new_order = [num_levels - 1] + list(range(num_levels - 1)) + result = result.reorder_levels(new_order).sort_index() + else: + result.index = MultiIndex.from_product( + [range(len(arg2.columns)), range(len(result_index))] + ) + result = result.swaplevel(1, 0).sort_index() + result.index = MultiIndex.from_product( + [result_index] + [arg2.columns] + ) + else: + + # empty result + result = DataFrame( + index=MultiIndex( + levels=[arg1.index, arg2.columns], codes=[[], []] + ), + columns=arg2.columns, + dtype="float64", + ) + + # reset our index names to arg1 names + # reset our column names to arg2 names + # careful not to mutate the original names + result.columns = result.columns.set_names(arg1.columns.names) + result.index = result.index.set_names( + result_index.names + arg2.columns.names + ) + + return result + else: + results = { + i: f(*prep_binary(arg1.iloc[:, i], arg2)) + for i in range(len(arg1.columns)) + } + return dataframe_from_int_dict(results, arg1) + + else: + return flex_binary_moment(arg2, arg1, f) + + +def zsqrt(x): + with np.errstate(all="ignore"): + result = np.sqrt(x) + mask = x < 0 + + if isinstance(x, ABCDataFrame): + if mask._values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + + +def prep_binary(arg1, arg2): + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + return X, Y + + +def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None: + """ + Warn for deprecation of args and kwargs in rolling/expanding functions. + + Parameters + ---------- + cls : type + Class to warn about. + kernel : str + Operation name. + args : tuple or None + args passed by user. Will be None if and only if kernel does not have args. + kwargs : dict or None + kwargs passed by user. Will be None if and only if kernel does not have kwargs. + """ + warn_args = args is not None and len(args) > 0 + warn_kwargs = kwargs is not None and len(kwargs) > 0 + if warn_args and warn_kwargs: + msg = "args and kwargs" + elif warn_args: + msg = "args" + elif warn_kwargs: + msg = "kwargs" + else: + msg = "" + if msg != "": + warnings.warn( + f"Passing additional {msg} to {cls.__name__}.{kernel} has " + "no impact on the result and is deprecated. This will " + "raise a TypeError in a future version of pandas.", + category=FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py new file mode 100644 index 00000000..835085d4 --- /dev/null +++ b/pandas/core/window/doc.py @@ -0,0 +1,140 @@ +"""Any shareable docstring components for rolling/expanding/ewm""" +from __future__ import annotations + +from textwrap import dedent + +from pandas.core.shared_docs import _shared_docs + +_shared_docs = dict(**_shared_docs) + + +def create_section_header(header: str) -> str: + """Create numpydoc section header""" + return "\n".join((header, "-" * len(header))) + "\n" + + +template_header = "\nCalculate the {window_method} {aggregation_description}.\n\n" + +template_returns = dedent( + """ + Series or DataFrame + Return type is the same as the original object with ``np.float64`` dtype.\n + """ +).replace("\n", "", 1) + +template_see_also = dedent( + """ + pandas.Series.{window_method} : Calling {window_method} with Series data. + pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames. + pandas.Series.{agg_method} : Aggregating {agg_method} for Series. + pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n + """ +).replace("\n", "", 1) + +kwargs_numeric_only = dedent( + """ + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionadded:: 1.5.0\n + """ +).replace("\n", "", 1) + +args_compat = dedent( + """ + *args + For NumPy compatibility and will not have an effect on the result. + + .. deprecated:: 1.5.0\n + """ +).replace("\n", "", 1) + +kwargs_compat = dedent( + """ + **kwargs + For NumPy compatibility and will not have an effect on the result. + + .. deprecated:: 1.5.0\n + """ +).replace("\n", "", 1) + +kwargs_scipy = dedent( + """ + **kwargs + Keyword arguments to configure the ``SciPy`` weighted window type.\n + """ +).replace("\n", "", 1) + +window_apply_parameters = dedent( + """ + func : function + Must produce a single value from an ndarray input if ``raw=True`` + or a single value from a Series if ``raw=False``. Can also accept a + Numba JIT function with ``engine='numba'`` specified. + + .. versionchanged:: 1.0.0 + + raw : bool, default False + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray + objects instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + engine : str, default None + * ``'cython'`` : Runs rolling apply through C-extensions from cython. + * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. + Only available when ``raw`` is set to ``True``. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.0.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to both the ``func`` and the ``apply`` rolling aggregation. + + .. versionadded:: 1.0.0 + + args : tuple, default None + Positional arguments to be passed into func. + + kwargs : dict, default None + Keyword arguments to be passed into func.\n + """ +).replace("\n", "", 1) + +numba_notes = ( + "See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for " + "extended documentation and performance considerations for the Numba engine.\n\n" +) + + +def window_agg_numba_parameters(version: str = "1.3") -> str: + return ( + dedent( + """ + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: {version}.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: {version}.0\n + """ + ) + .replace("\n", "", 1) + .replace("{version}", version) + ) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py new file mode 100644 index 00000000..020ca710 --- /dev/null +++ b/pandas/core/window/ewm.py @@ -0,0 +1,1083 @@ +from __future__ import annotations + +import datetime +from functools import partial +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.tslibs import Timedelta +import pandas._libs.window.aggregations as window_aggregations +from pandas._typing import ( + Axis, + TimedeltaConvertibleTypes, +) + +if TYPE_CHECKING: + from pandas import DataFrame, Series + from pandas.core.generic import NDFrame + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, + is_numeric_dtype, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.common as common # noqa: PDF018 +from pandas.core.indexers.objects import ( + BaseIndexer, + ExponentialMovingWindowIndexer, + GroupbyIndexer, +) +from pandas.core.util.numba_ import ( + get_jit_arguments, + maybe_use_numba, +) +from pandas.core.window.common import ( + maybe_warn_args_and_kwargs, + zsqrt, +) +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + kwargs_numeric_only, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, +) +from pandas.core.window.numba_ import ( + generate_numba_ewm_func, + generate_numba_ewm_table_func, +) +from pandas.core.window.online import ( + EWMMeanState, + generate_online_numba_ewma_func, +) +from pandas.core.window.rolling import ( + BaseWindow, + BaseWindowGroupby, +) + + +def get_center_of_mass( + comass: float | None, + span: float | None, + halflife: float | None, + alpha: float | None, +) -> float: + valid_count = common.count_not_none(comass, span, halflife, alpha) + if valid_count > 1: + raise ValueError("comass, span, halflife, and alpha are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") + comass = (span - 1) / 2 + elif halflife is not None: + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") + decay = 1 - np.exp(np.log(0.5) / halflife) + comass = 1 / decay - 1 + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + comass = (1 - alpha) / alpha + else: + raise ValueError("Must pass one of comass, span, halflife, or alpha") + + return float(comass) + + +def _calculate_deltas( + times: str | np.ndarray | NDFrame | None, + halflife: float | TimedeltaConvertibleTypes | None, +) -> np.ndarray: + """ + Return the diff of the times divided by the half-life. These values are used in + the calculation of the ewm mean. + + Parameters + ---------- + times : str, np.ndarray, Series, default None + Times corresponding to the observations. Must be monotonically increasing + and ``datetime64[ns]`` dtype. + halflife : float, str, timedelta, optional + Half-life specifying the decay + + Returns + ------- + np.ndarray + Diff of the times divided by the half-life + """ + # error: Item "str" of "Union[str, ndarray, NDFrameT, None]" has no + # attribute "view" + # error: Item "None" of "Union[str, ndarray, NDFrameT, None]" has no + # attribute "view" + _times = np.asarray( + times.view(np.int64), dtype=np.float64 # type: ignore[union-attr] + ) + _halflife = float(Timedelta(halflife).value) + return np.diff(_times) / _halflife + + +class ExponentialMovingWindow(BaseWindow): + r""" + Provide exponentially weighted (EW) calculations. + + Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided if ``times`` is not provided. If ``times`` is provided, + ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass + + :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. + + span : float, optional + Specify decay in terms of span + + :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. + + halflife : float, str, timedelta, optional + Specify decay in terms of half-life + + :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for + :math:`halflife > 0`. + + If ``times`` is specified, a timedelta convertible unit over which an + observation decays to half its value. Only applicable to ``mean()``, + and halflife value will not apply to the other functions. + + .. versionadded:: 1.1.0 + + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly + + :math:`0 < \alpha \leq 1`. + + min_periods : int, default 0 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings (viewing EWMA as a moving average). + + - When ``adjust=True`` (default), the EW function is calculated using weights + :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + [:math:`x_0, x_1, ..., x_t`] would be: + + .. math:: + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - + \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + + - When ``adjust=False``, the exponentially weighted function is calculated + recursively: + + .. math:: + \begin{split} + y_0 &= x_0\\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + \end{split} + ignore_na : bool, default False + Ignore missing values when calculating weights. + + - When ``ignore_na=False`` (default), weights are based on absolute positions. + For example, the weights of :math:`x_0` and :math:`x_2` used in calculating + the final weighted average of [:math:`x_0`, None, :math:`x_2`] are + :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. + + - When ``ignore_na=True``, weights are based + on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` + used in calculating the final weighted average of + [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. + + axis : {0, 1}, default 0 + If ``0`` or ``'index'``, calculate across the rows. + + If ``1`` or ``'columns'``, calculate across the columns. + + For `Series` this parameter is unused and defaults to 0. + + times : str, np.ndarray, Series, default None + + .. versionadded:: 1.1.0 + + Only applicable to ``mean()``. + + Times corresponding to the observations. Must be monotonically increasing and + ``datetime64[ns]`` dtype. + + If 1-D array like, a sequence with the same shape as the observations. + + .. deprecated:: 1.4.0 + If str, the name of the column in the DataFrame representing the times. + + method : str {'single', 'table'}, default 'single' + .. versionadded:: 1.4.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Only applicable to ``mean()`` + + Returns + ------- + ``ExponentialMovingWindow`` subclass + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + See :ref:`Windowing Operations ` + for further usage details and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(alpha=2 / 3).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **adjust** + + >>> df.ewm(com=0.5, adjust=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(com=0.5, adjust=False).mean() + B + 0 0.000000 + 1 0.666667 + 2 1.555556 + 3 1.555556 + 4 3.650794 + + **ignore_na** + + >>> df.ewm(com=0.5, ignore_na=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.225000 + >>> df.ewm(com=0.5, ignore_na=False).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **times** + + Exponentially weighted mean with weights calculated with a timedelta ``halflife`` + relative to ``times``. + + >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] + >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() + B + 0 0.000000 + 1 0.585786 + 2 1.523889 + 3 1.523889 + 4 3.233686 + """ + + _attributes = [ + "com", + "span", + "halflife", + "alpha", + "min_periods", + "adjust", + "ignore_na", + "axis", + "times", + "method", + ] + + def __init__( + self, + obj: NDFrame, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | NDFrame | None = None, + method: str = "single", + *, + selection=None, + ) -> None: + super().__init__( + obj=obj, + min_periods=1 if min_periods is None else max(int(min_periods), 1), + on=None, + center=False, + closed=None, + method=method, + axis=axis, + selection=selection, + ) + self.com = com + self.span = span + self.halflife = halflife + self.alpha = alpha + self.adjust = adjust + self.ignore_na = ignore_na + self.times = times + if self.times is not None: + if not self.adjust: + raise NotImplementedError("times is not supported with adjust=False.") + if isinstance(self.times, str): + warnings.warn( + ( + "Specifying times as a string column label is deprecated " + "and will be removed in a future version. Pass the column " + "into times instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + # self.times cannot be str anymore + self.times = cast("Series", self._selected_obj[self.times]) + if not is_datetime64_ns_dtype(self.times): + raise ValueError("times must be datetime64[ns] dtype.") + if len(self.times) != len(obj): + raise ValueError("times must be the same length as the object.") + if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): + raise ValueError("halflife must be a timedelta convertible object") + if isna(self.times).any(): + raise ValueError("Cannot convert NaT values to integer") + self._deltas = _calculate_deltas(self.times, self.halflife) + # Halflife is no longer applicable when calculating COM + # But allow COM to still be calculated if the user passes other decay args + if common.count_not_none(self.com, self.span, self.alpha) > 0: + self._com = get_center_of_mass(self.com, self.span, None, self.alpha) + else: + self._com = 1.0 + else: + if self.halflife is not None and isinstance( + self.halflife, (str, datetime.timedelta, np.timedelta64) + ): + raise ValueError( + "halflife can only be a timedelta convertible argument if " + "times is not None." + ) + # Without times, points are equally spaced + self._deltas = np.ones( + max(self.obj.shape[self.axis] - 1, 0), dtype=np.float64 + ) + self._com = get_center_of_mass( + # error: Argument 3 to "get_center_of_mass" has incompatible type + # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; + # expected "Optional[float]" + self.com, + self.span, + self.halflife, # type: ignore[arg-type] + self.alpha, + ) + + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + # emw algorithms are iterative with each point + # ExponentialMovingWindowIndexer "bounds" are the entire window + pass + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + return ExponentialMovingWindowIndexer() + + def online( + self, engine="numba", engine_kwargs=None + ) -> OnlineExponentialMovingWindow: + """ + Return an ``OnlineExponentialMovingWindow`` object to calculate + exponentially moving window aggregations in an online method. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + engine: str, default ``'numba'`` + Execution engine to calculate online aggregations. + Applies to all supported aggregation methods. + + engine_kwargs : dict, default None + Applies to all supported aggregation methods. + + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to the function + + Returns + ------- + OnlineExponentialMovingWindow + """ + return OnlineExponentialMovingWindow( + obj=self.obj, + com=self.com, + span=self.span, + halflife=self.halflife, + alpha=self.alpha, + min_periods=self.min_periods, + adjust=self.adjust, + ignore_na=self.ignore_na, + axis=self.axis, + times=self.times, + engine=engine, + engine_kwargs=engine_kwargs, + selection=self._selection, + ) + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes.replace("\n", "", 1), + window_method="ewm", + aggregation_description="(exponential weighted moment) mean", + agg_method="mean", + ) + def mean( + self, + numeric_only: bool = False, + *args, + engine=None, + engine_kwargs=None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) + if maybe_use_numba(engine): + if self.method == "single": + func = generate_numba_ewm_func + else: + func = generate_numba_ewm_table_func + ewm_func = func( + **get_jit_arguments(engine_kwargs), + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=tuple(self._deltas), + normalize=True, + ) + return self._apply(ewm_func, name="mean") + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + nv.validate_window_func("mean", args, kwargs) + + deltas = None if self.times is None else self._deltas + window_func = partial( + window_aggregations.ewm, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=deltas, + normalize=True, + ) + return self._apply(window_func, name="mean", numeric_only=numeric_only) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes.replace("\n", "", 1), + window_method="ewm", + aggregation_description="(exponential weighted moment) sum", + agg_method="sum", + ) + def sum( + self, + numeric_only: bool = False, + *args, + engine=None, + engine_kwargs=None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs) + if not self.adjust: + raise NotImplementedError("sum is not implemented with adjust=False") + if maybe_use_numba(engine): + if self.method == "single": + func = generate_numba_ewm_func + else: + func = generate_numba_ewm_table_func + ewm_func = func( + **get_jit_arguments(engine_kwargs), + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=tuple(self._deltas), + normalize=False, + ) + return self._apply(ewm_func, name="sum") + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + nv.validate_window_func("sum", args, kwargs) + + deltas = None if self.times is None else self._deltas + window_func = partial( + window_aggregations.ewm, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + deltas=deltas, + normalize=False, + ) + return self._apply(window_func, name="sum", numeric_only=numeric_only) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) standard deviation", + agg_method="std", + ) + def std(self, bias: bool = False, numeric_only: bool = False, *args, **kwargs): + maybe_warn_args_and_kwargs(type(self), "std", args, kwargs) + nv.validate_window_func("std", args, kwargs) + if ( + numeric_only + and self._selected_obj.ndim == 1 + and not is_numeric_dtype(self._selected_obj.dtype) + ): + # Raise directly so error message says std instead of var + raise NotImplementedError( + f"{type(self).__name__}.std does not implement numeric_only" + ) + return zsqrt(self.var(bias=bias, numeric_only=numeric_only, **kwargs)) + + def vol(self, bias: bool = False, *args, **kwargs): + warnings.warn( + ( + "vol is deprecated will be removed in a future version. " + "Use std instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.std(bias, *args, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) variance", + agg_method="var", + ) + def var(self, bias: bool = False, numeric_only: bool = False, *args, **kwargs): + maybe_warn_args_and_kwargs(type(self), "var", args, kwargs) + nv.validate_window_func("var", args, kwargs) + window_func = window_aggregations.ewmcov + wfunc = partial( + window_func, + com=self._com, + adjust=self.adjust, + ignore_na=self.ignore_na, + bias=bias, + ) + + def var_func(values, begin, end, min_periods): + return wfunc(values, begin, end, min_periods, values) + + return self._apply(var_func, name="var", numeric_only=numeric_only) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame , optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + bias : bool, default False + Use a standard estimation bias correction. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + bias: bool = False, + numeric_only: bool = False, + **kwargs, + ): + from pandas import Series + + maybe_warn_args_and_kwargs(type(self), "cov", None, kwargs) + self._validate_numeric_only("cov", numeric_only) + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + result = window_aggregations.ewmcov( + x_array, + start, + end, + # error: Argument 4 to "ewmcov" has incompatible type + # "Optional[int]"; expected "int" + self.min_periods, # type: ignore[arg-type] + y_array, + self._com, + self.adjust, + self.ignore_na, + bias, + ) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise( + self._selected_obj, other, pairwise, cov_func, numeric_only + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + numeric_only: bool = False, + **kwargs, + ): + from pandas import Series + + maybe_warn_args_and_kwargs(type(self), "corr", None, kwargs) + self._validate_numeric_only("corr", numeric_only) + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + + def _cov(X, Y): + return window_aggregations.ewmcov( + X, + start, + end, + min_periods, + Y, + self._com, + self.adjust, + self.ignore_na, + True, + ) + + with np.errstate(all="ignore"): + cov = _cov(x_array, y_array) + x_var = _cov(x_array, x_array) + y_var = _cov(y_array, y_array) + result = cov / zsqrt(x_var * y_var) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise( + self._selected_obj, other, pairwise, cov_func, numeric_only + ) + + +class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): + """ + Provide an exponential moving window groupby implementation. + """ + + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes + + def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: + super().__init__(obj, *args, _grouper=_grouper, **kwargs) + + if not obj.empty and self.times is not None: + # sort the times and recalculate the deltas according to the groups + groupby_order = np.concatenate(list(self._grouper.indices.values())) + self._deltas = _calculate_deltas( + self.times.take(groupby_order), # type: ignore[union-attr] + self.halflife, + ) + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + window_indexer = GroupbyIndexer( + groupby_indices=self._grouper.indices, + window_indexer=ExponentialMovingWindowIndexer, + ) + return window_indexer + + +class OnlineExponentialMovingWindow(ExponentialMovingWindow): + def __init__( + self, + obj: NDFrame, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | NDFrame | None = None, + engine: str = "numba", + engine_kwargs: dict[str, bool] | None = None, + *, + selection=None, + ) -> None: + if times is not None: + raise NotImplementedError( + "times is not implemented with online operations." + ) + super().__init__( + obj=obj, + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + times=times, + selection=selection, + ) + self._mean = EWMMeanState( + self._com, self.adjust, self.ignore_na, self.axis, obj.shape + ) + if maybe_use_numba(engine): + self.engine = engine + self.engine_kwargs = engine_kwargs + else: + raise ValueError("'numba' is the only supported engine") + + def reset(self) -> None: + """ + Reset the state captured by `update` calls. + """ + self._mean.reset() + + def aggregate(self, func, *args, **kwargs): + return NotImplementedError + + def std(self, bias: bool = False, *args, **kwargs): + return NotImplementedError + + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + numeric_only: bool = False, + **kwargs, + ): + return NotImplementedError + + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + bias: bool = False, + numeric_only: bool = False, + **kwargs, + ): + return NotImplementedError + + def var(self, bias: bool = False, *args, **kwargs): + return NotImplementedError + + def mean(self, *args, update=None, update_times=None, **kwargs): + """ + Calculate an online exponentially weighted mean. + + Parameters + ---------- + update: DataFrame or Series, default None + New values to continue calculating the + exponentially weighted mean from the last values and weights. + Values should be float64 dtype. + + ``update`` needs to be ``None`` the first time the + exponentially weighted mean is calculated. + + update_times: Series or 1-D np.ndarray, default None + New times to continue calculating the + exponentially weighted mean from the last values and weights. + If ``None``, values are assumed to be evenly spaced + in time. + This feature is currently unsupported. + + Returns + ------- + DataFrame or Series + + Examples + -------- + >>> df = pd.DataFrame({"a": range(5), "b": range(5, 10)}) + >>> online_ewm = df.head(2).ewm(0.5).online() + >>> online_ewm.mean() + a b + 0 0.00 5.00 + 1 0.75 5.75 + >>> online_ewm.mean(update=df.tail(3)) + a b + 2 1.615385 6.615385 + 3 2.550000 7.550000 + 4 3.520661 8.520661 + >>> online_ewm.reset() + >>> online_ewm.mean() + a b + 0 0.00 5.00 + 1 0.75 5.75 + """ + result_kwargs = {} + is_frame = True if self._selected_obj.ndim == 2 else False + if update_times is not None: + raise NotImplementedError("update_times is not implemented.") + else: + update_deltas = np.ones( + max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 + ) + if update is not None: + if self._mean.last_ewm is None: + raise ValueError( + "Must call mean with update=None first before passing update" + ) + result_from = 1 + result_kwargs["index"] = update.index + if is_frame: + last_value = self._mean.last_ewm[np.newaxis, :] + result_kwargs["columns"] = update.columns + else: + last_value = self._mean.last_ewm + result_kwargs["name"] = update.name + np_array = np.concatenate((last_value, update.to_numpy())) + else: + result_from = 0 + result_kwargs["index"] = self._selected_obj.index + if is_frame: + result_kwargs["columns"] = self._selected_obj.columns + else: + result_kwargs["name"] = self._selected_obj.name + np_array = self._selected_obj.astype(np.float64).to_numpy() + ewma_func = generate_online_numba_ewma_func( + **get_jit_arguments(self.engine_kwargs) + ) + result = self._mean.run_ewm( + np_array if is_frame else np_array[:, np.newaxis], + update_deltas, + self.min_periods, + ewma_func, + ) + if not is_frame: + result = result.squeeze() + result = result[result_from:] + result = self._selected_obj._constructor(result, **result_kwargs) + return result diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py new file mode 100644 index 00000000..e997ffe1 --- /dev/null +++ b/pandas/core/window/expanding.py @@ -0,0 +1,898 @@ +from __future__ import annotations + +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +from pandas._typing import ( + Axis, + QuantileInterpolation, + WindowingRankType, +) + +if TYPE_CHECKING: + from pandas import DataFrame, Series + from pandas.core.generic import NDFrame + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import doc + +from pandas.core.indexers.objects import ( + BaseIndexer, + ExpandingIndexer, + GroupbyIndexer, +) +from pandas.core.window.common import maybe_warn_args_and_kwargs +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + kwargs_numeric_only, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, +) +from pandas.core.window.rolling import ( + BaseWindowGroupby, + RollingAndExpandingMixin, +) + + +class Expanding(RollingAndExpandingMixin): + """ + Provide expanding window calculations. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + .. deprecated:: 1.1.0 + + axis : int or str, default 0 + If ``0`` or ``'index'``, roll across the rows. + + If ``1`` or ``'columns'``, roll across the columns. + + For `Series` this parameter is unused and defaults to 0. + + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + .. versionadded:: 1.3.0 + + Returns + ------- + ``Expanding`` subclass + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **min_periods** + + Expanding sum with 1 vs 3 observations needed to calculate a value. + + >>> df.expanding(1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + >>> df.expanding(3).sum() + B + 0 NaN + 1 NaN + 2 3.0 + 3 3.0 + 4 7.0 + """ + + _attributes: list[str] = ["min_periods", "center", "axis", "method"] + + def __init__( + self, + obj: NDFrame, + min_periods: int = 1, + center: bool | None = None, + axis: Axis = 0, + method: str = "single", + selection=None, + ) -> None: + super().__init__( + obj=obj, + min_periods=min_periods, + center=center, + axis=axis, + method=method, + selection=selection, + ) + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + return ExpandingIndexer() + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="count of non NaN observations", + agg_method="count", + ) + def count(self, numeric_only: bool = False): + return super().count(numeric_only=numeric_only) + + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="custom aggregation function", + agg_method="apply", + ) + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + return super().apply( + func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="sum", + agg_method="sum", + ) + def sum( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs) + nv.validate_expanding_func("sum", args, kwargs) + return super().sum( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="maximum", + agg_method="max", + ) + def max( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "max", args, kwargs) + nv.validate_expanding_func("max", args, kwargs) + return super().max( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="minimum", + agg_method="min", + ) + def min( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "min", args, kwargs) + nv.validate_expanding_func("min", args, kwargs) + return super().min( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="mean", + agg_method="mean", + ) + def mean( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) + nv.validate_expanding_func("mean", args, kwargs) + return super().mean( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="expanding", + aggregation_description="median", + agg_method="median", + ) + def median( + self, + numeric_only: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "median", None, kwargs) + return super().median( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 0.957427 + 4 0.894427 + 5 0.836660 + 6 0.786796 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard deviation", + agg_method="std", + ) + def std( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "std", args, kwargs) + nv.validate_expanding_func("std", args, kwargs) + return super().std( + ddof=ddof, + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 0.916667 + 4 0.800000 + 5 0.700000 + 6 0.619048 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="variance", + agg_method="var", + ) + def var( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "var", args, kwargs) + nv.validate_expanding_func("var", args, kwargs) + return super().var( + ddof=ddof, + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + + >>> s.expanding().sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.745356 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard error of mean", + agg_method="sem", + ) + def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs): + maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs) + return super().sem(ddof=ddof, numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="expanding", + aggregation_description="unbiased skewness", + agg_method="skew", + ) + def skew(self, numeric_only: bool = False, **kwargs): + maybe_warn_args_and_kwargs(type(self), "skew", None, kwargs) + return super().skew(numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}") + 4.999874 + >>> s = pd.Series(arr) + >>> s.expanding(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 4.999874 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", + ) + def kurt(self, numeric_only: bool = False, **kwargs): + maybe_warn_args_and_kwargs(type(self), "kurt", None, kwargs) + return super().kurt(numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="quantile", + agg_method="quantile", + ) + def quantile( + self, + quantile: float, + interpolation: QuantileInterpolation = "linear", + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "quantile", None, kwargs) + return super().quantile( + quantile=quantile, + interpolation=interpolation, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + ".. versionadded:: 1.4.0 \n\n", + create_section_header("Parameters"), + dedent( + """ + method : {{'average', 'min', 'max'}}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 4, 2, 3, 5, 3]) + >>> s.expanding().rank() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 3.5 + dtype: float64 + + >>> s.expanding().rank(method="max") + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 4.0 + dtype: float64 + + >>> s.expanding().rank(method="min") + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + 4 5.0 + 5 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="rank", + agg_method="rank", + ) + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "rank", None, kwargs) + return super().rank( + method=method, + ascending=ascending, + pct=pct, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "cov", None, kwargs) + return super().cov( + other=other, + pairwise=pairwise, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used. + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "corr", None, kwargs) + return super().corr( + other=other, + pairwise=pairwise, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + + +class ExpandingGroupby(BaseWindowGroupby, Expanding): + """ + Provide a expanding groupby implementation. + """ + + _attributes = Expanding._attributes + BaseWindowGroupby._attributes + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + window_indexer = GroupbyIndexer( + groupby_indices=self._grouper.indices, + window_indexer=ExpandingIndexer, + ) + return window_indexer diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py new file mode 100644 index 00000000..0f9f01e9 --- /dev/null +++ b/pandas/core/window/numba_.py @@ -0,0 +1,350 @@ +from __future__ import annotations + +import functools +from typing import ( + TYPE_CHECKING, + Any, + Callable, +) + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.core.util.numba_ import jit_user_function + + +@functools.lru_cache(maxsize=None) +def generate_numba_apply_func( + func: Callable[..., Scalar], + nopython: bool, + nogil: bool, + parallel: bool, +): + """ + Generate a numba jitted apply function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the rolling apply function. + + Parameters + ---------- + func : function + function to be applied to each window and will be JITed + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_apply( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + *args: Any, + ) -> np.ndarray: + result = np.empty(len(begin)) + for i in numba.prange(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop] + count_nan = np.sum(np.isnan(window)) + if len(window) - count_nan >= minimum_periods: + result[i] = numba_func(window, *args) + else: + result[i] = np.nan + return result + + return roll_apply + + +@functools.lru_cache(maxsize=None) +def generate_numba_ewm_func( + nopython: bool, + nogil: bool, + parallel: bool, + com: float, + adjust: bool, + ignore_na: bool, + deltas: tuple, + normalize: bool, +): + """ + Generate a numba jitted ewm mean or sum function specified by values + from engine_kwargs. + + Parameters + ---------- + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + com : float + adjust : bool + ignore_na : bool + deltas : tuple + normalize : bool + + Returns + ------- + Numba function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def ewm( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + ) -> np.ndarray: + result = np.empty(len(values)) + alpha = 1.0 / (1.0 + com) + old_wt_factor = 1.0 - alpha + new_wt = 1.0 if adjust else alpha + + for i in numba.prange(len(begin)): + start = begin[i] + stop = end[i] + window = values[start:stop] + sub_result = np.empty(len(window)) + + weighted = window[0] + nobs = int(not np.isnan(weighted)) + sub_result[0] = weighted if nobs >= minimum_periods else np.nan + old_wt = 1.0 + + for j in range(1, len(window)): + cur = window[j] + is_observation = not np.isnan(cur) + nobs += is_observation + if not np.isnan(weighted): + + if is_observation or not ignore_na: + if normalize: + # note that len(deltas) = len(vals) - 1 and deltas[i] + # is to be used in conjunction with vals[i+1] + old_wt *= old_wt_factor ** deltas[start + j - 1] + else: + weighted = old_wt_factor * weighted + if is_observation: + if normalize: + # avoid numerical errors on constant series + if weighted != cur: + weighted = old_wt * weighted + new_wt * cur + if normalize: + weighted = weighted / (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1.0 + else: + weighted += cur + elif is_observation: + weighted = cur + + sub_result[j] = weighted if nobs >= minimum_periods else np.nan + + result[start:stop] = sub_result + + return result + + return ewm + + +@functools.lru_cache(maxsize=None) +def generate_numba_table_func( + func: Callable[..., np.ndarray], + nopython: bool, + nogil: bool, + parallel: bool, +): + """ + Generate a numba jitted function to apply window calculations table-wise. + + Func will be passed a M window size x N number of columns array, and + must return a 1 x N number of columns array. Func is intended to operate + row-wise, but the result will be transposed for axis=1. + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Parameters + ---------- + func : function + function to be applied to each window and will be JITed + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + numba_func = jit_user_function(func, nopython, nogil, parallel) + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_table( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + *args: Any, + ): + result = np.empty((len(begin), values.shape[1])) + min_periods_mask = np.empty(result.shape) + for i in numba.prange(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop] + count_nan = np.sum(np.isnan(window), axis=0) + sub_result = numba_func(window, *args) + nan_mask = len(window) - count_nan >= minimum_periods + min_periods_mask[i, :] = nan_mask + result[i, :] = sub_result + result = np.where(min_periods_mask, result, np.nan) + return result + + return roll_table + + +# This function will no longer be needed once numba supports +# axis for all np.nan* agg functions +# https://github.com/numba/numba/issues/1269 +@functools.lru_cache(maxsize=None) +def generate_manual_numpy_nan_agg_with_axis(nan_func): + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=True, nogil=True, parallel=True) + def nan_agg_with_axis(table): + result = np.empty(table.shape[1]) + for i in numba.prange(table.shape[1]): + partition = table[:, i] + result[i] = nan_func(partition) + return result + + return nan_agg_with_axis + + +@functools.lru_cache(maxsize=None) +def generate_numba_ewm_table_func( + nopython: bool, + nogil: bool, + parallel: bool, + com: float, + adjust: bool, + ignore_na: bool, + deltas: tuple, + normalize: bool, +): + """ + Generate a numba jitted ewm mean or sum function applied table wise specified + by values from engine_kwargs. + + Parameters + ---------- + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + com : float + adjust : bool + ignore_na : bool + deltas : tuple + normalize: bool + + Returns + ------- + Numba function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def ewm_table( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + ) -> np.ndarray: + alpha = 1.0 / (1.0 + com) + old_wt_factor = 1.0 - alpha + new_wt = 1.0 if adjust else alpha + old_wt = np.ones(values.shape[1]) + + result = np.empty(values.shape) + weighted = values[0].copy() + nobs = (~np.isnan(weighted)).astype(np.int64) + result[0] = np.where(nobs >= minimum_periods, weighted, np.nan) + for i in range(1, len(values)): + cur = values[i] + is_observations = ~np.isnan(cur) + nobs += is_observations.astype(np.int64) + for j in numba.prange(len(cur)): + if not np.isnan(weighted[j]): + if is_observations[j] or not ignore_na: + if normalize: + # note that len(deltas) = len(vals) - 1 and deltas[i] + # is to be used in conjunction with vals[i+1] + old_wt[j] *= old_wt_factor ** deltas[i - 1] + else: + weighted[j] = old_wt_factor * weighted[j] + if is_observations[j]: + if normalize: + # avoid numerical errors on constant series + if weighted[j] != cur[j]: + weighted[j] = ( + old_wt[j] * weighted[j] + new_wt * cur[j] + ) + if normalize: + weighted[j] = weighted[j] / (old_wt[j] + new_wt) + if adjust: + old_wt[j] += new_wt + else: + old_wt[j] = 1.0 + else: + weighted[j] += cur[j] + elif is_observations[j]: + weighted[j] = cur[j] + + result[i] = np.where(nobs >= minimum_periods, weighted, np.nan) + + return result + + return ewm_table diff --git a/pandas/core/window/online.py b/pandas/core/window/online.py new file mode 100644 index 00000000..2e25bdd1 --- /dev/null +++ b/pandas/core/window/online.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + + +def generate_online_numba_ewma_func( + nopython: bool, + nogil: bool, + parallel: bool, +): + """ + Generate a numba jitted groupby ewma function specified by values + from engine_kwargs. + + Parameters + ---------- + nopython : bool + nopython to be passed into numba.jit + nogil : bool + nogil to be passed into numba.jit + parallel : bool + parallel to be passed into numba.jit + + Returns + ------- + Numba function + """ + if TYPE_CHECKING: + import numba + else: + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def online_ewma( + values: np.ndarray, + deltas: np.ndarray, + minimum_periods: int, + old_wt_factor: float, + new_wt: float, + old_wt: np.ndarray, + adjust: bool, + ignore_na: bool, + ): + """ + Compute online exponentially weighted mean per column over 2D values. + + Takes the first observation as is, then computes the subsequent + exponentially weighted mean accounting minimum periods. + """ + result = np.empty(values.shape) + weighted_avg = values[0] + nobs = (~np.isnan(weighted_avg)).astype(np.int64) + result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + for i in range(1, len(values)): + cur = values[i] + is_observations = ~np.isnan(cur) + nobs += is_observations.astype(np.int64) + for j in numba.prange(len(cur)): + if not np.isnan(weighted_avg[j]): + if is_observations[j] or not ignore_na: + + # note that len(deltas) = len(vals) - 1 and deltas[i] is to be + # used in conjunction with vals[i+1] + old_wt[j] *= old_wt_factor ** deltas[j - 1] + if is_observations[j]: + # avoid numerical errors on constant series + if weighted_avg[j] != cur[j]: + weighted_avg[j] = ( + (old_wt[j] * weighted_avg[j]) + (new_wt * cur[j]) + ) / (old_wt[j] + new_wt) + if adjust: + old_wt[j] += new_wt + else: + old_wt[j] = 1.0 + elif is_observations[j]: + weighted_avg[j] = cur[j] + + result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + return result, old_wt + + return online_ewma + + +class EWMMeanState: + def __init__(self, com, adjust, ignore_na, axis, shape) -> None: + alpha = 1.0 / (1.0 + com) + self.axis = axis + self.shape = shape + self.adjust = adjust + self.ignore_na = ignore_na + self.new_wt = 1.0 if adjust else alpha + self.old_wt_factor = 1.0 - alpha + self.old_wt = np.ones(self.shape[self.axis - 1]) + self.last_ewm = None + + def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): + result, old_wt = ewm_func( + weighted_avg, + deltas, + min_periods, + self.old_wt_factor, + self.new_wt, + self.old_wt, + self.adjust, + self.ignore_na, + ) + self.old_wt = old_wt + self.last_ewm = result[-1] + return result + + def reset(self) -> None: + self.old_wt = np.ones(self.shape[self.axis - 1]) + self.last_ewm = None diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py new file mode 100644 index 00000000..1a71b41b --- /dev/null +++ b/pandas/core/window/rolling.py @@ -0,0 +1,2899 @@ +""" +Provide a generic structure to support window functions, +similar to how we have a Groupby object. +""" +from __future__ import annotations + +import copy +from datetime import timedelta +from functools import partial +import inspect +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Sized, +) +import warnings + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + to_offset, +) +import pandas._libs.window.aggregations as window_aggregations +from pandas._typing import ( + ArrayLike, + Axis, + NDFrameT, + QuantileInterpolation, + WindowingRankType, +) +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.errors import DataError +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_float64, + is_bool, + is_integer, + is_list_like, + is_numeric_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + +from pandas.core._numba import executor +from pandas.core.algorithms import factorize +from pandas.core.apply import ResamplerWindowApply +from pandas.core.arrays import ExtensionArray +from pandas.core.base import SelectionMixin +import pandas.core.common as com +from pandas.core.indexers.objects import ( + BaseIndexer, + FixedWindowIndexer, + GroupbyIndexer, + VariableWindowIndexer, +) +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, +) +from pandas.core.reshape.concat import concat +from pandas.core.util.numba_ import ( + get_jit_arguments, + maybe_use_numba, +) +from pandas.core.window.common import ( + flex_binary_moment, + maybe_warn_args_and_kwargs, + zsqrt, +) +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + kwargs_numeric_only, + kwargs_scipy, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, +) +from pandas.core.window.numba_ import ( + generate_manual_numpy_nan_agg_with_axis, + generate_numba_apply_func, + generate_numba_table_func, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + from pandas.core.generic import NDFrame + from pandas.core.groupby.ops import BaseGrouper + + +class BaseWindow(SelectionMixin): + """Provides utilities for performing windowing operations.""" + + _attributes: list[str] = [] + exclusions: frozenset[Hashable] = frozenset() + _on: Index + + def __init__( + self, + obj: NDFrame, + window=None, + min_periods: int | None = None, + center: bool | None = False, + win_type: str | None = None, + axis: Axis = 0, + on: str | Index | None = None, + closed: str | None = None, + step: int | None = None, + method: str = "single", + *, + selection=None, + ) -> None: + self.obj = obj + self.on = on + self.closed = closed + self.step = step + self.window = window + self.min_periods = min_periods + self.center = center + # TODO(2.0): Change this back to self.win_type once deprecation is enforced + self._win_type = win_type + self.axis = obj._get_axis_number(axis) if axis is not None else None + self.method = method + self._win_freq_i8: int | None = None + if self.on is None: + if self.axis == 0: + self._on = self.obj.index + else: + # i.e. self.axis == 1 + self._on = self.obj.columns + elif isinstance(self.on, Index): + self._on = self.on + elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: + self._on = Index(self.obj[self.on]) + else: + raise ValueError( + f"invalid on specified as {self.on}, " + "must be a column (of DataFrame), an Index or None" + ) + + self._selection = selection + self._validate() + + @property + def win_type(self): + if self._win_freq_i8 is not None: + warnings.warn( + "win_type will no longer return 'freq' in a future version. " + "Check the type of self.window instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return "freq" + return self._win_type + + @property + def is_datetimelike(self) -> bool: + warnings.warn( + "is_datetimelike is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._win_freq_i8 is not None + + def validate(self) -> None: + warnings.warn( + "validate is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self._validate() + + def _validate(self) -> None: + if self.center is not None and not is_bool(self.center): + raise ValueError("center must be a boolean") + if self.min_periods is not None: + if not is_integer(self.min_periods): + raise ValueError("min_periods must be an integer") + elif self.min_periods < 0: + raise ValueError("min_periods must be >= 0") + elif is_integer(self.window) and self.min_periods > self.window: + raise ValueError( + f"min_periods {self.min_periods} must be <= window {self.window}" + ) + if self.closed is not None and self.closed not in [ + "right", + "both", + "left", + "neither", + ]: + raise ValueError("closed must be 'right', 'left', 'both' or 'neither'") + if not isinstance(self.obj, (ABCSeries, ABCDataFrame)): + raise TypeError(f"invalid type: {type(self)}") + if isinstance(self.window, BaseIndexer): + # Validate that the passed BaseIndexer subclass has + # a get_window_bounds with the correct signature. + get_window_bounds_signature = inspect.signature( + self.window.get_window_bounds + ).parameters.keys() + expected_signature = inspect.signature( + BaseIndexer().get_window_bounds + ).parameters.keys() + if get_window_bounds_signature != expected_signature: + raise ValueError( + f"{type(self.window).__name__} does not implement " + f"the correct signature for get_window_bounds" + ) + if self.method not in ["table", "single"]: + raise ValueError("method must be 'table' or 'single") + if self.step is not None: + if not is_integer(self.step): + raise ValueError("step must be an integer") + elif self.step < 0: + raise ValueError("step must be >= 0") + + def _check_window_bounds( + self, start: np.ndarray, end: np.ndarray, num_vals: int + ) -> None: + if len(start) != len(end): + raise ValueError( + f"start ({len(start)}) and end ({len(end)}) bounds must be the " + f"same length" + ) + elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1): + raise ValueError( + f"start and end bounds ({len(start)}) must be the same length " + f"as the object ({num_vals}) divided by the step ({self.step}) " + f"if given and rounded up" + ) + + def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index: + """ + Slices the index for a given result and the preset step. + """ + return ( + index + if result is None or len(result) == len(index) + else index[:: self.step] + ) + + def _validate_numeric_only(self, name: str, numeric_only: bool) -> None: + """ + Validate numeric_only argument, raising if invalid for the input. + + Parameters + ---------- + name : str + Name of the operator (kernel). + numeric_only : bool + Value passed by user. + """ + if ( + self._selected_obj.ndim == 1 + and numeric_only + and not is_numeric_dtype(self._selected_obj.dtype) + ): + raise NotImplementedError( + f"{type(self).__name__}.{name} does not implement numeric_only" + ) + + def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT: + """Subset DataFrame to numeric columns. + + Parameters + ---------- + obj : DataFrame + + Returns + ------- + obj subset to numeric-only columns. + """ + result = obj.select_dtypes(include=["number"], exclude=["timedelta"]) + return result + + def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: + """ + Split data into blocks & return conformed data. + """ + # filter out the on from the object + if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: + obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) + if obj.ndim > 1 and (numeric_only or self.axis == 1): + # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything + # to float to calculate the complete row at once. We exclude all non-numeric + # dtypes. + obj = self._make_numeric_only(obj) + if self.axis == 1: + obj = obj.astype("float64", copy=False) + obj._mgr = obj._mgr.consolidate() + return obj + + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : str / list of selections + ndim : {1, 2} + requested ndim of result + subset : object, default None + subset to act on + """ + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + + # we need to make a shallow copy of ourselves + # with the same groupby + with warnings.catch_warnings(): + # TODO(2.0): Remove once win_type deprecation is enforced + warnings.filterwarnings("ignore", "win_type", FutureWarning) + kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + selection = None + if subset.ndim == 2 and ( + (is_scalar(key) and key in subset) or is_list_like(key) + ): + selection = key + + new_win = type(self)(subset, selection=selection, **kwargs) + return new_win + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + def _dir_additions(self): + return self.obj._dir_additions() + + def __repr__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + attrs_list = ( + f"{attr_name}={getattr(self, attr_name)}" + for attr_name in self._attributes + if getattr(self, attr_name, None) is not None and attr_name[0] != "_" + ) + attrs = ",".join(attrs_list) + return f"{type(self).__name__} [{attrs}]" + + def __iter__(self): + obj = self._selected_obj.set_axis(self._on) + obj = self._create_data(obj) + indexer = self._get_window_indexer() + + start, end = indexer.get_window_bounds( + num_values=len(obj), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + self._check_window_bounds(start, end, len(obj)) + + for s, e in zip(start, end): + result = obj.iloc[slice(s, e)] + yield result + + def _prep_values(self, values: ArrayLike) -> np.ndarray: + """Convert input to numpy arrays for Cython routines""" + if needs_i8_conversion(values.dtype): + raise NotImplementedError( + f"ops for {type(self).__name__} for this " + f"dtype {values.dtype} are not implemented" + ) + else: + # GH #12373 : rolling functions error on float32 data + # make sure the data is coerced to float64 + try: + if isinstance(values, ExtensionArray): + values = values.to_numpy(np.float64, na_value=np.nan) + else: + values = ensure_float64(values) + except (ValueError, TypeError) as err: + raise TypeError(f"cannot handle this type -> {values.dtype}") from err + + # Convert inf to nan for C funcs + inf = np.isinf(values) + if inf.any(): + values = np.where(inf, np.nan, values) + + return values + + def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None: + # if we have an 'on' column we want to put it back into + # the results in the same location + from pandas import Series + + if self.on is not None and not self._on.equals(obj.index): + name = self._on.name + extra_col = Series(self._on, index=self.obj.index, name=name) + if name in result.columns: + # TODO: sure we want to overwrite results? + result[name] = extra_col + elif name in result.index.names: + pass + elif name in self._selected_obj.columns: + # insert in the same location as we had in _selected_obj + old_cols = self._selected_obj.columns + new_cols = result.columns + old_loc = old_cols.get_loc(name) + overlap = new_cols.intersection(old_cols[:old_loc]) + new_loc = len(overlap) + result.insert(new_loc, name, extra_col) + else: + # insert at the end + result[name] = extra_col + + @property + def _index_array(self): + # TODO: why do we get here with e.g. MultiIndex? + if needs_i8_conversion(self._on.dtype): + return self._on.asi8 + return None + + def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame: + """Validate and finalize result.""" + if out.shape[1] == 0 and obj.shape[1] > 0: + raise DataError("No numeric types to aggregate") + elif out.shape[1] == 0: + return obj.astype("float64") + + self._insert_on_column(out, obj) + return out + + def _get_window_indexer(self) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + if isinstance(self.window, BaseIndexer): + return self.window + if self._win_freq_i8 is not None: + return VariableWindowIndexer( + index_array=self._index_array, + window_size=self._win_freq_i8, + center=self.center, + ) + return FixedWindowIndexer(window_size=self.window) + + def _apply_series( + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None + ) -> Series: + """ + Series version of _apply_blockwise + """ + obj = self._create_data(self._selected_obj) + + if name == "count": + # GH 12541: Special case for count where we support date-like types + obj = notna(obj).astype(int) + try: + values = self._prep_values(obj._values) + except (TypeError, NotImplementedError) as err: + raise DataError("No numeric types to aggregate") from err + + result = homogeneous_func(values) + index = self._slice_axis_for_step(obj.index, result) + return obj._constructor(result, index=index, name=obj.name) + + def _apply_blockwise( + self, + homogeneous_func: Callable[..., ArrayLike], + name: str, + numeric_only: bool = False, + ) -> DataFrame | Series: + """ + Apply the given function to the DataFrame broken down into homogeneous + sub-frames. + """ + self._validate_numeric_only(name, numeric_only) + if self._selected_obj.ndim == 1: + return self._apply_series(homogeneous_func, name) + + obj = self._create_data(self._selected_obj, numeric_only) + if name == "count": + # GH 12541: Special case for count where we support date-like types + obj = notna(obj).astype(int) + obj._mgr = obj._mgr.consolidate() + + def hfunc(values: ArrayLike) -> ArrayLike: + values = self._prep_values(values) + return homogeneous_func(values) + + if self.axis == 1: + obj = obj.T + + taker = [] + res_values = [] + for i, arr in enumerate(obj._iter_column_arrays()): + # GH#42736 operate column-wise instead of block-wise + try: + res = hfunc(arr) + except (TypeError, NotImplementedError): + pass + else: + res_values.append(res) + taker.append(i) + + index = self._slice_axis_for_step( + obj.index, res_values[0] if len(res_values) > 0 else None + ) + df = type(obj)._from_arrays( + res_values, + index=index, + columns=obj.columns.take(taker), + verify_integrity=False, + ) + + if self.axis == 1: + df = df.T + + if 0 != len(res_values) != len(obj.columns): + # GH#42738 ignore_failures dropped nuisance columns + dropped = obj.columns.difference(obj.columns.take(taker)) + warnings.warn( + "Dropping of nuisance columns in rolling operations " + "is deprecated; in a future version this will raise TypeError. " + "Select only valid columns before calling the operation. " + f"Dropped columns were {dropped}", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return self._resolve_output(df, obj) + + def _apply_tablewise( + self, + homogeneous_func: Callable[..., ArrayLike], + name: str | None = None, + numeric_only: bool = False, + ) -> DataFrame | Series: + """ + Apply the given function to the DataFrame across the entire object + """ + if self._selected_obj.ndim == 1: + raise ValueError("method='table' not applicable for Series objects.") + obj = self._create_data(self._selected_obj, numeric_only) + values = self._prep_values(obj.to_numpy()) + values = values.T if self.axis == 1 else values + result = homogeneous_func(values) + result = result.T if self.axis == 1 else result + index = self._slice_axis_for_step(obj.index, result) + columns = ( + obj.columns + if result.shape[1] == len(obj.columns) + else obj.columns[:: self.step] + ) + out = obj._constructor(result, index=index, columns=columns) + + return self._resolve_output(out, obj) + + def _apply_pairwise( + self, + target: DataFrame | Series, + other: DataFrame | Series | None, + pairwise: bool | None, + func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series], + numeric_only: bool, + ) -> DataFrame | Series: + """ + Apply the given pairwise function given 2 pandas objects (DataFrame/Series) + """ + target = self._create_data(target, numeric_only) + if other is None: + other = target + # only default unset + pairwise = True if pairwise is None else pairwise + elif not isinstance(other, (ABCDataFrame, ABCSeries)): + raise ValueError("other must be a DataFrame or Series") + elif other.ndim == 2 and numeric_only: + other = self._make_numeric_only(other) + + return flex_binary_moment(target, other, func, pairwise=bool(pairwise)) + + def _apply( + self, + func: Callable[..., Any], + name: str, + numeric_only: bool = False, + numba_args: tuple[Any, ...] = (), + **kwargs, + ): + """ + Rolling statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. + + Parameters + ---------- + func : callable function to apply + name : str, + numba_args : tuple + args to be passed when func is a numba func + **kwargs + additional arguments for rolling function and window function + + Returns + ------- + y : type of input + """ + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + + def homogeneous_func(values: np.ndarray): + # calculation function + + if values.size == 0: + return values.copy() + + def calc(x): + start, end = window_indexer.get_window_bounds( + num_values=len(x), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + self._check_window_bounds(start, end, len(x)) + + return func(x, start, end, min_periods, *numba_args) + + with np.errstate(all="ignore"): + result = calc(values) + + return result + + if self.method == "single": + return self._apply_blockwise(homogeneous_func, name, numeric_only) + else: + return self._apply_tablewise(homogeneous_func, name, numeric_only) + + def _numba_apply( + self, + func: Callable[..., Any], + engine_kwargs: dict[str, bool] | None = None, + *func_args, + ): + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + obj = self._create_data(self._selected_obj) + if self.axis == 1: + obj = obj.T + values = self._prep_values(obj.to_numpy()) + if values.ndim == 1: + values = values.reshape(-1, 1) + start, end = window_indexer.get_window_bounds( + num_values=len(values), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + self._check_window_bounds(start, end, len(values)) + aggregator = executor.generate_shared_aggregator( + func, **get_jit_arguments(engine_kwargs) + ) + result = aggregator(values, start, end, min_periods, *func_args) + result = result.T if self.axis == 1 else result + index = self._slice_axis_for_step(obj.index, result) + if obj.ndim == 1: + result = result.squeeze() + out = obj._constructor(result, index=index, name=obj.name) + return out + else: + columns = self._slice_axis_for_step(obj.columns, result.T) + out = obj._constructor(result, index=index, columns=columns) + return self._resolve_output(out, obj) + + def aggregate(self, func, *args, **kwargs): + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + return self.apply(func, raw=False, args=args, kwargs=kwargs) + return result + + agg = aggregate + + +class BaseWindowGroupby(BaseWindow): + """ + Provide the groupby windowing facilities. + """ + + _grouper: BaseGrouper + _as_index: bool + _attributes: list[str] = ["_grouper"] + + def __init__( + self, + obj: DataFrame | Series, + *args, + _grouper: BaseGrouper, + _as_index: bool = True, + **kwargs, + ) -> None: + from pandas.core.groupby.ops import BaseGrouper + + if not isinstance(_grouper, BaseGrouper): + raise ValueError("Must pass a BaseGrouper object.") + self._grouper = _grouper + self._as_index = _as_index + # GH 32262: It's convention to keep the grouping column in + # groupby., but unexpected to users in + # groupby.rolling. + obj = obj.drop(columns=self._grouper.names, errors="ignore") + # GH 15354 + if kwargs.get("step") is not None: + raise NotImplementedError("step not implemented for groupby") + super().__init__(obj, *args, **kwargs) + + def _apply( + self, + func: Callable[..., Any], + name: str, + numeric_only: bool = False, + numba_args: tuple[Any, ...] = (), + **kwargs, + ) -> DataFrame | Series: + result = super()._apply( + func, + name, + numeric_only, + numba_args, + **kwargs, + ) + # Reconstruct the resulting MultiIndex + # 1st set of levels = group by labels + # 2nd set of levels = original DataFrame/Series index + grouped_object_index = self.obj.index + grouped_index_name = [*grouped_object_index.names] + groupby_keys = copy.copy(self._grouper.names) + result_index_names = groupby_keys + grouped_index_name + + drop_columns = [ + key + for key in self._grouper.names + if key not in self.obj.index.names or key is None + ] + + if len(drop_columns) != len(groupby_keys): + # Our result will have still kept the column in the result + result = result.drop(columns=drop_columns, errors="ignore") + + codes = self._grouper.codes + levels = copy.copy(self._grouper.levels) + + group_indices = self._grouper.indices.values() + if group_indices: + indexer = np.concatenate(list(group_indices)) + else: + indexer = np.array([], dtype=np.intp) + codes = [c.take(indexer) for c in codes] + + # if the index of the original dataframe needs to be preserved, append + # this index (but reordered) to the codes/levels from the groupby + if grouped_object_index is not None: + idx = grouped_object_index.take(indexer) + if not isinstance(idx, MultiIndex): + idx = MultiIndex.from_arrays([idx]) + codes.extend(list(idx.codes)) + levels.extend(list(idx.levels)) + + result_index = MultiIndex( + levels, codes, names=result_index_names, verify_integrity=False + ) + + result.index = result_index + if not self._as_index: + result = result.reset_index(level=list(range(len(groupby_keys)))) + return result + + def _apply_pairwise( + self, + target: DataFrame | Series, + other: DataFrame | Series | None, + pairwise: bool | None, + func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series], + numeric_only: bool, + ) -> DataFrame | Series: + """ + Apply the given pairwise function given 2 pandas objects (DataFrame/Series) + """ + # Manually drop the grouping column first + target = target.drop(columns=self._grouper.names, errors="ignore") + result = super()._apply_pairwise(target, other, pairwise, func, numeric_only) + # 1) Determine the levels + codes of the groupby levels + if other is not None and not all( + len(group) == len(other) for group in self._grouper.indices.values() + ): + # GH 42915 + # len(other) != len(any group), so must reindex (expand) the result + # from flex_binary_moment to a "transform"-like result + # per groupby combination + old_result_len = len(result) + result = concat( + [ + result.take(gb_indices).reindex(result.index) + for gb_indices in self._grouper.indices.values() + ] + ) + + gb_pairs = ( + com.maybe_make_list(pair) for pair in self._grouper.indices.keys() + ) + groupby_codes = [] + groupby_levels = [] + # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]] + for gb_level_pair in map(list, zip(*gb_pairs)): + labels = np.repeat(np.array(gb_level_pair), old_result_len) + codes, levels = factorize(labels) + groupby_codes.append(codes) + groupby_levels.append(levels) + else: + # pairwise=True or len(other) == len(each group), so repeat + # the groupby labels by the number of columns in the original object + groupby_codes = self._grouper.codes + # error: Incompatible types in assignment (expression has type + # "List[Index]", variable has type "List[Union[ndarray, Index]]") + groupby_levels = self._grouper.levels # type: ignore[assignment] + + group_indices = self._grouper.indices.values() + if group_indices: + indexer = np.concatenate(list(group_indices)) + else: + indexer = np.array([], dtype=np.intp) + + if target.ndim == 1: + repeat_by = 1 + else: + repeat_by = len(target.columns) + groupby_codes = [ + np.repeat(c.take(indexer), repeat_by) for c in groupby_codes + ] + # 2) Determine the levels + codes of the result from super()._apply_pairwise + if isinstance(result.index, MultiIndex): + result_codes = list(result.index.codes) + result_levels = list(result.index.levels) + result_names = list(result.index.names) + else: + idx_codes, idx_levels = factorize(result.index) + result_codes = [idx_codes] + result_levels = [idx_levels] + result_names = [result.index.name] + + # 3) Create the resulting index by combining 1) + 2) + result_codes = groupby_codes + result_codes + result_levels = groupby_levels + result_levels + result_names = self._grouper.names + result_names + + result_index = MultiIndex( + result_levels, result_codes, names=result_names, verify_integrity=False + ) + result.index = result_index + return result + + def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: + """ + Split data into blocks & return conformed data. + """ + # Ensure the object we're rolling over is monotonically sorted relative + # to the groups + # GH 36197 + if not obj.empty: + groupby_order = np.concatenate(list(self._grouper.indices.values())).astype( + np.int64 + ) + obj = obj.take(groupby_order) + return super()._create_data(obj, numeric_only) + + def _gotitem(self, key, ndim, subset=None): + # we are setting the index on the actual object + # here so our index is carried through to the selected obj + # when we do the splitting for the groupby + if self.on is not None: + # GH 43355 + subset = self.obj.set_index(self._on) + return super()._gotitem(key, ndim, subset=subset) + + +class Window(BaseWindow): + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, offset, or BaseIndexer subclass + Size of the moving window. + + If an integer, the fixed number of observations used for + each window. + + If an offset, the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + To learn more about the offsets & frequency strings, please see `this link + `__. + + If a BaseIndexer subclass, the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely ``min_periods``, ``center``, ``closed`` and + ``step`` will be passed to ``get_window_bounds``. + + min_periods : int, default None + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + For a window that is specified by an offset, ``min_periods`` will default to 1. + + For a window that is specified by an integer, ``min_periods`` will default + to the size of the window. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + win_type : str, default None + If ``None``, all points are evenly weighted. + + If a string, it must be a valid `scipy.signal window function + `__. + + Certain Scipy window types require additional parameters to be passed + in the aggregation function. The additional parameters must match + the keywords specified in the Scipy window type method signature. + + on : str, optional + For a DataFrame, a column label or Index level on which + to calculate the rolling window, rather than the DataFrame's index. + + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + + axis : int or str, default 0 + If ``0`` or ``'index'``, roll across the rows. + + If ``1`` or ``'columns'``, roll across the columns. + + For `Series` this parameter is unused and defaults to 0. + + closed : str, default None + If ``'right'``, the first point in the window is excluded from calculations. + + If ``'left'``, the last point in the window is excluded from calculations. + + If ``'both'``, the no points in the window are excluded from calculations. + + If ``'neither'``, the first and last points in the window are excluded + from calculations. + + Default ``None`` (``'right'``). + + .. versionchanged:: 1.2.0 + + The closed parameter with fixed windows is now supported. + + step : int, default None + + .. versionadded:: 1.5.0 + + Evaluate the window at every ``step`` result, equivalent to slicing as + ``[::step]``. ``window`` must be an integer. Using a step argument other + than None or 1 will produce a result with a different shape than the input. + + method : str {'single', 'table'}, default 'single' + + .. versionadded:: 1.3.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Returns + ------- + ``Window`` subclass if a ``win_type`` is passed + + ``Rolling`` subclass if ``win_type`` is not passed + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **window** + + Rolling sum with a window length of 2 observations. + + >>> df.rolling(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 NaN + 4 NaN + + Rolling sum with a window span of 2 seconds. + + >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + ... index = [pd.Timestamp('20130101 09:00:00'), + ... pd.Timestamp('20130101 09:00:02'), + ... pd.Timestamp('20130101 09:00:03'), + ... pd.Timestamp('20130101 09:00:05'), + ... pd.Timestamp('20130101 09:00:06')]) + + >>> df_time + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 2.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + >>> df_time.rolling('2s').sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + Rolling sum with forward looking windows with 2 observations. + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + + **min_periods** + + Rolling sum with a window length of 2 observations, but only needs a minimum of 1 + observation to calculate a value. + + >>> df.rolling(2, min_periods=1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 2.0 + 4 4.0 + + **center** + + Rolling sum with the result assigned to the center of the window index. + + >>> df.rolling(3, min_periods=1, center=True).sum() + B + 0 1.0 + 1 3.0 + 2 3.0 + 3 6.0 + 4 4.0 + + >>> df.rolling(3, min_periods=1, center=False).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 6.0 + + **step** + + Rolling sum with a window length of 2 observations, minimum of 1 observation to + calculate a value, and a step of 2. + + >>> df.rolling(2, min_periods=1, step=2).sum() + B + 0 0.0 + 2 3.0 + 4 4.0 + + **win_type** + + Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` + window type. ``std`` is required in the aggregation function. + + >>> df.rolling(2, win_type='gaussian').sum(std=3) + B + 0 NaN + 1 0.986207 + 2 2.958621 + 3 NaN + 4 NaN + """ + + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "step", + "method", + ] + + def _validate(self): + super()._validate() + + if not isinstance(self.win_type, str): + raise ValueError(f"Invalid win_type {self.win_type}") + signal = import_optional_dependency( + "scipy.signal", extra="Scipy is required to generate window weight." + ) + self._scipy_weight_generator = getattr(signal, self.win_type, None) + if self._scipy_weight_generator is None: + raise ValueError(f"Invalid win_type {self.win_type}") + + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + "BaseIndexer subclasses not implemented with win_types." + ) + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + + if self.method != "single": + raise NotImplementedError("'single' is the only supported method type.") + + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: + """ + Center the result in the window for weighted rolling aggregations. + """ + if offset > 0: + lead_indexer = [slice(offset, None)] + result = np.copy(result[tuple(lead_indexer)]) + return result + + def _apply( + self, + func: Callable[[np.ndarray, int, int], np.ndarray], + name: str, + numeric_only: bool = False, + numba_args: tuple[Any, ...] = (), + **kwargs, + ): + """ + Rolling with weights statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. + + Parameters + ---------- + func : callable function to apply + name : str, + numeric_only : bool, default False + Whether to only operate on bool, int, and float columns + numba_args : tuple + unused + **kwargs + additional arguments for scipy windows if necessary + + Returns + ------- + y : type of input + """ + # "None" not callable [misc] + window = self._scipy_weight_generator( # type: ignore[misc] + self.window, **kwargs + ) + offset = (len(window) - 1) // 2 if self.center else 0 + + def homogeneous_func(values: np.ndarray): + # calculation function + + if values.size == 0: + return values.copy() + + def calc(x): + additional_nans = np.array([np.nan] * offset) + x = np.concatenate((x, additional_nans)) + return func(x, window, self.min_periods or len(window)) + + with np.errstate(all="ignore"): + # Our weighted aggregations return memoryviews + result = np.asarray(calc(values)) + + if self.center: + result = self._center_window(result, offset) + + return result + + return self._apply_blockwise(homogeneous_func, name, numeric_only)[:: self.step] + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2, win_type="boxcar").agg("mean") + A B C + 0 NaN NaN NaN + 1 1.5 4.5 7.5 + 2 2.5 5.5 8.5 + """ + ), + klass="Series/DataFrame", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + if result is None: + + # these must apply directly + result = func(self) + + return result + + agg = aggregate + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window sum", + agg_method="sum", + ) + def sum(self, numeric_only: bool = False, *args, **kwargs): + nv.validate_window_func("sum", args, kwargs) + window_func = window_aggregations.roll_weighted_sum + # error: Argument 1 to "_apply" of "Window" has incompatible type + # "Callable[[ndarray, ndarray, int], ndarray]"; expected + # "Callable[[ndarray, int, int], ndarray]" + return self._apply( + window_func, # type: ignore[arg-type] + name="sum", + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window mean", + agg_method="mean", + ) + def mean(self, numeric_only: bool = False, *args, **kwargs): + nv.validate_window_func("mean", args, kwargs) + window_func = window_aggregations.roll_weighted_mean + # error: Argument 1 to "_apply" of "Window" has incompatible type + # "Callable[[ndarray, ndarray, int], ndarray]"; expected + # "Callable[[ndarray, int, int], ndarray]" + return self._apply( + window_func, # type: ignore[arg-type] + name="mean", + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window variance", + agg_method="var", + ) + def var(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs): + nv.validate_window_func("var", args, kwargs) + window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof) + kwargs.pop("name", None) + return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window standard deviation", + agg_method="std", + ) + def std(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) + return zsqrt( + self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs) + ) + + +class RollingAndExpandingMixin(BaseWindow): + def count(self, numeric_only: bool = False): + window_func = window_aggregations.roll_sum + return self._apply(window_func, name="count", numeric_only=numeric_only) + + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + if args is None: + args = () + if kwargs is None: + kwargs = {} + + if not is_bool(raw): + raise ValueError("raw parameter must be `True` or `False`") + + numba_args: tuple[Any, ...] = () + if maybe_use_numba(engine): + if raw is False: + raise ValueError("raw must be `True` when using the numba engine") + numba_args = args + if self.method == "single": + apply_func = generate_numba_apply_func( + func, **get_jit_arguments(engine_kwargs, kwargs) + ) + else: + apply_func = generate_numba_table_func( + func, **get_jit_arguments(engine_kwargs, kwargs) + ) + elif engine in ("cython", None): + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + apply_func = self._generate_cython_apply_func(args, kwargs, raw, func) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + return self._apply( + apply_func, + name="apply", + numba_args=numba_args, + ) + + def _generate_cython_apply_func( + self, + args: tuple[Any, ...], + kwargs: dict[str, Any], + raw: bool, + function: Callable[..., Any], + ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]: + from pandas import Series + + window_func = partial( + window_aggregations.roll_apply, + args=args, + kwargs=kwargs, + raw=raw, + function=function, + ) + + def apply_func(values, begin, end, min_periods, raw=raw): + if not raw: + # GH 45912 + values = Series(values, index=self._on) + return window_func(values, begin, end, min_periods) + + return apply_func + + def sum( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("sum", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nansum) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_sum + + return self._numba_apply(sliding_sum, engine_kwargs) + window_func = window_aggregations.roll_sum + return self._apply(window_func, name="sum", numeric_only=numeric_only, **kwargs) + + def max( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("max", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmax) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_apply(sliding_min_max, engine_kwargs, True) + window_func = window_aggregations.roll_max + return self._apply(window_func, name="max", numeric_only=numeric_only, **kwargs) + + def min( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("min", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmin) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_min_max + + return self._numba_apply(sliding_min_max, engine_kwargs, False) + window_func = window_aggregations.roll_min + return self._apply(window_func, name="min", numeric_only=numeric_only, **kwargs) + + def mean( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("mean", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmean) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + else: + from pandas.core._numba.kernels import sliding_mean + + return self._numba_apply(sliding_mean, engine_kwargs) + window_func = window_aggregations.roll_mean + return self._apply( + window_func, name="mean", numeric_only=numeric_only, **kwargs + ) + + def median( + self, + numeric_only: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + if maybe_use_numba(engine): + if self.method == "table": + func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian) + else: + func = np.nanmedian + + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) + window_func = window_aggregations.roll_median_c + return self._apply( + window_func, name="median", numeric_only=numeric_only, **kwargs + ) + + def std( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("std", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("std not supported with method='table'") + else: + from pandas.core._numba.kernels import sliding_var + + return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof)) + window_func = window_aggregations.roll_var + + def zsqrt_func(values, begin, end, min_periods): + return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) + + return self._apply( + zsqrt_func, + name="std", + numeric_only=numeric_only, + **kwargs, + ) + + def var( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + nv.validate_window_func("var", args, kwargs) + if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("var not supported with method='table'") + else: + from pandas.core._numba.kernels import sliding_var + + return self._numba_apply(sliding_var, engine_kwargs, ddof) + window_func = partial(window_aggregations.roll_var, ddof=ddof) + return self._apply( + window_func, + name="var", + numeric_only=numeric_only, + **kwargs, + ) + + def skew(self, numeric_only: bool = False, **kwargs): + window_func = window_aggregations.roll_skew + return self._apply( + window_func, + name="skew", + numeric_only=numeric_only, + **kwargs, + ) + + def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs): + nv.validate_rolling_func("sem", args, kwargs) + # Raise here so error message says sem instead of std + self._validate_numeric_only("sem", numeric_only) + return self.std(numeric_only=numeric_only, **kwargs) / ( + self.count(numeric_only=numeric_only) - ddof + ).pow(0.5) + + def kurt(self, numeric_only: bool = False, **kwargs): + window_func = window_aggregations.roll_kurt + return self._apply( + window_func, + name="kurt", + numeric_only=numeric_only, + **kwargs, + ) + + def quantile( + self, + quantile: float, + interpolation: QuantileInterpolation = "linear", + numeric_only: bool = False, + **kwargs, + ): + if quantile == 1.0: + window_func = window_aggregations.roll_max + elif quantile == 0.0: + window_func = window_aggregations.roll_min + else: + window_func = partial( + window_aggregations.roll_quantile, + quantile=quantile, + interpolation=interpolation, + ) + + return self._apply( + window_func, name="quantile", numeric_only=numeric_only, **kwargs + ) + + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + numeric_only: bool = False, + **kwargs, + ): + window_func = partial( + window_aggregations.roll_rank, + method=method, + ascending=ascending, + percentile=pct, + ) + + return self._apply( + window_func, name="rank", numeric_only=numeric_only, **kwargs + ) + + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + if self.step is not None: + raise NotImplementedError("step not implemented for cov") + self._validate_numeric_only("cov", numeric_only) + + from pandas import Series + + def cov_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + self._check_window_bounds(start, end, len(x_array)) + + with np.errstate(all="ignore"): + mean_x_y = window_aggregations.roll_mean( + x_array * y_array, start, end, min_periods + ) + mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods) + mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods) + count_x_y = window_aggregations.roll_sum( + notna(x_array + y_array).astype(np.float64), start, end, 0 + ) + result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof)) + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise( + self._selected_obj, other, pairwise, cov_func, numeric_only + ) + + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + if self.step is not None: + raise NotImplementedError("step not implemented for corr") + self._validate_numeric_only("corr", numeric_only) + + from pandas import Series + + def corr_func(x, y): + x_array = self._prep_values(x) + y_array = self._prep_values(y) + window_indexer = self._get_window_indexer() + min_periods = ( + self.min_periods + if self.min_periods is not None + else window_indexer.window_size + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x_array), + min_periods=min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) + self._check_window_bounds(start, end, len(x_array)) + + with np.errstate(all="ignore"): + mean_x_y = window_aggregations.roll_mean( + x_array * y_array, start, end, min_periods + ) + mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods) + mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods) + count_x_y = window_aggregations.roll_sum( + notna(x_array + y_array).astype(np.float64), start, end, 0 + ) + x_var = window_aggregations.roll_var( + x_array, start, end, min_periods, ddof + ) + y_var = window_aggregations.roll_var( + y_array, start, end, min_periods, ddof + ) + numerator = (mean_x_y - mean_x * mean_y) * ( + count_x_y / (count_x_y - ddof) + ) + denominator = (x_var * y_var) ** 0.5 + result = numerator / denominator + return Series(result, index=x.index, name=x.name) + + return self._apply_pairwise( + self._selected_obj, other, pairwise, corr_func, numeric_only + ) + + +class Rolling(RollingAndExpandingMixin): + + _attributes: list[str] = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "step", + "method", + ] + + def _validate(self): + super()._validate() + + # we allow rolling on a datetimelike index + if ( + self.obj.empty + or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) + ) and isinstance(self.window, (str, BaseOffset, timedelta)): + + self._validate_datetimelike_monotonic() + + # this will raise ValueError on non-fixed freqs + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err + if isinstance(self._on, PeriodIndex): + # error: Incompatible types in assignment (expression has type + # "float", variable has type "Optional[int]") + self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment] + self._on.freq.nanos / self._on.freq.n + ) + else: + self._win_freq_i8 = freq.nanos + + # min_periods must be an integer + if self.min_periods is None: + self.min_periods = 1 + + if self.step is not None: + raise NotImplementedError( + "step is not supported with frequency windows" + ) + + elif isinstance(self.window, BaseIndexer): + # Passed BaseIndexer subclass should handle all other rolling kwargs + pass + elif not is_integer(self.window) or self.window < 0: + raise ValueError("window must be an integer 0 or greater") + + def _validate_datetimelike_monotonic(self): + """ + Validate self._on is monotonic (increasing or decreasing) and has + no NaT values for frequency windows. + """ + if self._on.hasnans: + self._raise_monotonic_error("values must not have NaT") + if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing): + self._raise_monotonic_error("values must be monotonic") + + def _raise_monotonic_error(self, msg: str): + on = self.on + if on is None: + if self.axis == 0: + on = "index" + else: + on = "column" + raise ValueError(f"{on} {msg}") + + @doc( + _shared_docs["aggregate"], + see_also=dedent( + """ + See Also + -------- + pandas.Series.rolling : Calling object with Series data. + pandas.DataFrame.rolling : Calling object with DataFrame data. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2).sum() + A B C + 0 NaN NaN NaN + 1 3.0 9.0 15.0 + 2 5.0 11.0 17.0 + + >>> df.rolling(2).agg({"A": "sum", "B": "min"}) + A B + 0 NaN NaN + 1 3.0 4.0 + 2 5.0 5.0 + """ + ), + klass="Series/Dataframe", + axis="", + ) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([2, 3, np.nan, 10]) + >>> s.rolling(2).count() + 0 1.0 + 1 2.0 + 2 1.0 + 3 1.0 + dtype: float64 + >>> s.rolling(3).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 2.0 + dtype: float64 + >>> s.rolling(4).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="count of non NaN observations", + agg_method="count", + ) + def count(self, numeric_only: bool = False): + if self.min_periods is None: + warnings.warn( + ( + "min_periods=None will default to the size of window " + "consistent with other methods in a future version. " + "Specify min_periods=0 instead." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + self.min_periods = 0 + result = super().count() + self.min_periods = None + else: + result = super().count(numeric_only) + return result + + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="custom aggregation function", + agg_method="apply", + ) + def apply( + self, + func: Callable[..., Any], + raw: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + ): + return super().apply( + func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> s.rolling(3).sum() + 0 NaN + 1 NaN + 2 6.0 + 3 9.0 + 4 12.0 + dtype: float64 + + >>> s.rolling(3, center=True).sum() + 0 NaN + 1 6.0 + 2 9.0 + 3 12.0 + 4 NaN + dtype: float64 + + For DataFrame, each sum is computed column-wise. + + >>> df = pd.DataFrame({{"A": s, "B": s ** 2}}) + >>> df + A B + 0 1 1 + 1 2 4 + 2 3 9 + 3 4 16 + 4 5 25 + + >>> df.rolling(3).sum() + A B + 0 NaN NaN + 1 NaN NaN + 2 6.0 14.0 + 3 9.0 29.0 + 4 12.0 50.0 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="sum", + agg_method="sum", + ) + def sum( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs) + nv.validate_rolling_func("sum", args, kwargs) + return super().sum( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes[:-1], + window_method="rolling", + aggregation_description="maximum", + agg_method="max", + ) + def max( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "max", args, kwargs) + nv.validate_rolling_func("max", args, kwargs) + return super().max( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Performing a rolling minimum with a window size of 3. + + >>> s = pd.Series([4, 3, 5, 2, 6]) + >>> s.rolling(3).min() + 0 NaN + 1 NaN + 2 3.0 + 3 2.0 + 4 2.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="minimum", + agg_method="min", + ) + def min( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "min", args, kwargs) + nv.validate_rolling_func("min", args, kwargs) + return super().min( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + The below examples will show rolling mean calculations with window sizes of + two and three, respectively. + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).mean() + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + + >>> s.rolling(3).mean() + 0 NaN + 1 NaN + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="mean", + agg_method="mean", + ) + def mean( + self, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) + nv.validate_rolling_func("mean", args, kwargs) + return super().mean( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + window_agg_numba_parameters(), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Compute the rolling median of a series with a window size of 3. + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.rolling(3).median() + 0 NaN + 1 NaN + 2 1.0 + 3 2.0 + 4 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="median", + agg_method="median", + ) + def median( + self, + numeric_only: bool = False, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "median", None, kwargs) + return super().median( + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 1.000000 + 4 1.000000 + 5 1.154701 + 6 0.000000 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard deviation", + agg_method="std", + ) + def std( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "std", args, kwargs) + nv.validate_rolling_func("std", args, kwargs) + return super().std( + ddof=ddof, + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + window_agg_numba_parameters("1.4"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 1.000000 + 4 1.000000 + 5 1.333333 + 6 0.000000 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="variance", + agg_method="var", + ) + def var( + self, + ddof: int = 1, + numeric_only: bool = False, + *args, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "var", args, kwargs) + nv.validate_rolling_func("var", args, kwargs) + return super().var( + ddof=ddof, + numeric_only=numeric_only, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="rolling", + aggregation_description="unbiased skewness", + agg_method="skew", + ) + def skew(self, numeric_only: bool = False, **kwargs): + maybe_warn_args_and_kwargs(type(self), "skew", None, kwargs) + return super().skew(numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + >>> s.rolling(2, min_periods=1).sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.707107 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard error of mean", + agg_method="sem", + ) + def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs): + maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs) + nv.validate_rolling_func("sem", args, kwargs) + # Raise here so error message says sem instead of std + self._validate_numeric_only("sem", numeric_only) + return self.std(numeric_only=numeric_only, **kwargs) / ( + self.count(numeric_only) - ddof + ).pow(0.5) + + @doc( + template_header, + create_section_header("Parameters"), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}") + 3.999946 + >>> s = pd.Series(arr) + >>> s.rolling(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 3.999946 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", + ) + def kurt(self, numeric_only: bool = False, **kwargs): + maybe_warn_args_and_kwargs(type(self), "kurt", None, kwargs) + return super().kurt(numeric_only=numeric_only, **kwargs) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).quantile(.4, interpolation='lower') + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + >>> s.rolling(2).quantile(.4, interpolation='midpoint') + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="quantile", + agg_method="quantile", + ) + def quantile( + self, + quantile: float, + interpolation: QuantileInterpolation = "linear", + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "quantile", None, kwargs) + return super().quantile( + quantile=quantile, + interpolation=interpolation, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + ".. versionadded:: 1.4.0 \n\n", + create_section_header("Parameters"), + dedent( + """ + method : {{'average', 'min', 'max'}}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 4, 2, 3, 5, 3]) + >>> s.rolling(3).rank() + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 1.5 + dtype: float64 + + >>> s.rolling(3).rank(method="max") + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 2.0 + dtype: float64 + + >>> s.rolling(3).rank(method="min") + 0 NaN + 1 NaN + 2 2.0 + 3 2.0 + 4 3.0 + 5 1.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="rank", + agg_method="rank", + ) + def rank( + self, + method: WindowingRankType = "average", + ascending: bool = True, + pct: bool = False, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "rank", None, kwargs) + return super().rank( + method=method, + ascending=ascending, + pct=pct, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="sample covariance", + agg_method="cov", + ) + def cov( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "cov", None, kwargs) + return super().cov( + other=other, + pairwise=pairwise, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series or DataFrame, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_numeric_only, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + The below example shows a rolling calculation with a window size of + four matching the equivalent function call using :meth:`numpy.corrcoef`. + + >>> v1 = [3, 3, 3, 5, 8] + >>> v2 = [3, 4, 4, 4, 8] + >>> # numpy returns a 2X2 array, the correlation coefficient + >>> # is the number at entry [0][1] + >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}") + 0.333333 + >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}") + 0.916949 + >>> s1 = pd.Series(v1) + >>> s2 = pd.Series(v2) + >>> s1.rolling(4).corr(s2) + 0 NaN + 1 NaN + 2 NaN + 3 0.333333 + 4 0.916949 + dtype: float64 + + The below example shows a similar rolling calculation on a + DataFrame using the pairwise option. + + >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ + [46., 31.], [50., 36.]]) + >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) + [[1. 0.6263001] + [0.6263001 1. ]] + >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) + [[1. 0.5553681] + [0.5553681 1. ]] + >>> df = pd.DataFrame(matrix, columns=['X','Y']) + >>> df + X Y + 0 51.0 35.0 + 1 49.0 30.0 + 2 47.0 32.0 + 3 46.0 31.0 + 4 50.0 36.0 + >>> df.rolling(4).corr(pairwise=True) + X Y + 0 X NaN NaN + Y NaN NaN + 1 X NaN NaN + Y NaN NaN + 2 X NaN NaN + Y NaN NaN + 3 X 1.000000 0.626300 + Y 0.626300 1.000000 + 4 X 1.000000 0.555368 + Y 0.555368 1.000000 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="correlation", + agg_method="corr", + ) + def corr( + self, + other: DataFrame | Series | None = None, + pairwise: bool | None = None, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + maybe_warn_args_and_kwargs(type(self), "corr", None, kwargs) + return super().corr( + other=other, + pairwise=pairwise, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + + +Rolling.__doc__ = Window.__doc__ + + +class RollingGroupby(BaseWindowGroupby, Rolling): + """ + Provide a rolling groupby implementation. + """ + + _attributes = Rolling._attributes + BaseWindowGroupby._attributes + + def _get_window_indexer(self) -> GroupbyIndexer: + """ + Return an indexer class that will compute the window start and end bounds + + Returns + ------- + GroupbyIndexer + """ + rolling_indexer: type[BaseIndexer] + indexer_kwargs: dict[str, Any] | None = None + index_array = self._index_array + if isinstance(self.window, BaseIndexer): + rolling_indexer = type(self.window) + indexer_kwargs = self.window.__dict__.copy() + assert isinstance(indexer_kwargs, dict) # for mypy + # We'll be using the index of each group later + indexer_kwargs.pop("index_array", None) + window = self.window + elif self._win_freq_i8 is not None: + rolling_indexer = VariableWindowIndexer + # error: Incompatible types in assignment (expression has type + # "int", variable has type "BaseIndexer") + window = self._win_freq_i8 # type: ignore[assignment] + else: + rolling_indexer = FixedWindowIndexer + window = self.window + window_indexer = GroupbyIndexer( + index_array=index_array, + window_size=window, + groupby_indices=self._grouper.indices, + window_indexer=rolling_indexer, + indexer_kwargs=indexer_kwargs, + ) + return window_indexer + + def _validate_datetimelike_monotonic(self): + """ + Validate that each group in self._on is monotonic + """ + # GH 46061 + if self._on.hasnans: + self._raise_monotonic_error("values must not have NaT") + for group_indices in self._grouper.indices.values(): + group_on = self._on.take(group_indices) + if not ( + group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing + ): + on = "index" if self.on is None else self.on + raise ValueError( + f"Each group within {on} must be monotonic. " + f"Sort the values in {on} first." + ) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py new file mode 100644 index 00000000..d0c9ef94 --- /dev/null +++ b/pandas/errors/__init__.py @@ -0,0 +1,576 @@ +""" +Expose public exceptions & warnings +""" +from __future__ import annotations + +import ctypes + +from pandas._config.config import OptionError + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) + + +class IntCastingNaNError(ValueError): + """ + Exception raised when converting (``astype``) an array with NaN to an integer type. + """ + + +class NullFrequencyError(ValueError): + """ + Exception raised when a ``freq`` cannot be null. + + Particularly ``DatetimeIndex.shift``, ``TimedeltaIndex.shift``, + ``PeriodIndex.shift``. + """ + + +class PerformanceWarning(Warning): + """ + Warning raised when there is a possible performance impact. + """ + + +class UnsupportedFunctionCall(ValueError): + """ + Exception raised when attempting to call a unsupported numpy function. + + For example, ``np.cumsum(groupby_object)``. + """ + + +class UnsortedIndexError(KeyError): + """ + Error raised when slicing a MultiIndex which has not been lexsorted. + + Subclass of `KeyError`. + """ + + +class ParserError(ValueError): + """ + Exception that is raised by an error encountered in parsing file contents. + + This is a generic error raised for errors encountered when functions like + `read_csv` or `read_html` are parsing contents of a file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_html : Read HTML table into a DataFrame. + """ + + +class DtypeWarning(Warning): + """ + Warning raised when reading different dtypes in a column from a file. + + Raised for a dtype incompatibility. This can happen whenever `read_csv` + or `read_table` encounter non-uniform dtypes in a column(s) of a given + CSV file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_table : Read general delimited file into a DataFrame. + + Notes + ----- + This warning is issued when dealing with larger files because the dtype + checking happens per chunk read. + + Despite the warning, the CSV file is read with mixed types in a single + column which will be an object type. See the examples below to better + understand this issue. + + Examples + -------- + This example creates and reads a large CSV file with a column that contains + `int` and `str`. + + >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + + ... ['1'] * 100000), + ... 'b': ['b'] * 300000}) # doctest: +SKIP + >>> df.to_csv('test.csv', index=False) # doctest: +SKIP + >>> df2 = pd.read_csv('test.csv') # doctest: +SKIP + ... # DtypeWarning: Columns (0) have mixed types + + Important to notice that ``df2`` will contain both `str` and `int` for the + same input, '1'. + + >>> df2.iloc[262140, 0] # doctest: +SKIP + '1' + >>> type(df2.iloc[262140, 0]) # doctest: +SKIP + + >>> df2.iloc[262150, 0] # doctest: +SKIP + 1 + >>> type(df2.iloc[262150, 0]) # doctest: +SKIP + + + One way to solve this issue is using the `dtype` parameter in the + `read_csv` and `read_table` functions to explicit the conversion: + + >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP + + No warning was issued. + """ + + +class EmptyDataError(ValueError): + """ + Exception raised in ``pd.read_csv`` when empty data or header is encountered. + """ + + +class ParserWarning(Warning): + """ + Warning raised when reading a file that doesn't use the default 'c' parser. + + Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change + parsers, generally from the default 'c' parser to 'python'. + + It happens due to a lack of support or functionality for parsing a + particular attribute of a CSV file with the requested engine. + + Currently, 'c' unsupported options include the following parameters: + + 1. `sep` other than a single character (e.g. regex separators) + 2. `skipfooter` higher than 0 + 3. `sep=None` with `delim_whitespace=False` + + The warning can be avoided by adding `engine='python'` as a parameter in + `pd.read_csv` and `pd.read_table` methods. + + See Also + -------- + pd.read_csv : Read CSV (comma-separated) file into DataFrame. + pd.read_table : Read general delimited file into DataFrame. + + Examples + -------- + Using a `sep` in `pd.read_csv` other than a single character: + + >>> import io + >>> csv = '''a;b;c + ... 1;1,8 + ... 1;2,1''' + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP + ... # ParserWarning: Falling back to the 'python' engine... + + Adding `engine='python'` to `pd.read_csv` removes the Warning: + + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python') + """ + + +class MergeError(ValueError): + """ + Exception raised when merging data. + + Subclass of ``ValueError``. + """ + + +class AccessorRegistrationWarning(Warning): + """ + Warning for attribute conflicts in accessor registration. + """ + + +class AbstractMethodError(NotImplementedError): + """ + Raise this error instead of NotImplementedError for abstract methods. + """ + + def __init__(self, class_instance, methodtype: str = "method") -> None: + types = {"method", "classmethod", "staticmethod", "property"} + if methodtype not in types: + raise ValueError( + f"methodtype must be one of {methodtype}, got {types} instead." + ) + self.methodtype = methodtype + self.class_instance = class_instance + + def __str__(self) -> str: + if self.methodtype == "classmethod": + name = self.class_instance.__name__ + else: + name = type(self.class_instance).__name__ + return f"This {self.methodtype} must be defined in the concrete class {name}" + + +class NumbaUtilError(Exception): + """ + Error raised for unsupported Numba engine routines. + """ + + +class DuplicateLabelError(ValueError): + """ + Error raised when an operation would introduce duplicate labels. + + .. versionadded:: 1.2.0 + + Examples + -------- + >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags( + ... allows_duplicate_labels=False + ... ) + >>> s.reindex(['a', 'a', 'b']) + Traceback (most recent call last): + ... + DuplicateLabelError: Index has duplicates. + positions + label + a [0, 1] + """ + + +class InvalidIndexError(Exception): + """ + Exception raised when attempting to use an invalid index key. + + .. versionadded:: 1.1.0 + """ + + +class DataError(Exception): + """ + Exceptionn raised when performing an operation on non-numerical data. + + For example, calling ``ohlc`` on a non-numerical column or a function + on a rolling window. + """ + + +class SpecificationError(Exception): + """ + Exception raised by ``agg`` when the functions are ill-specified. + + The exception raised in two scenarios. + + The first way is calling ``agg`` on a + Dataframe or Series using a nested renamer (dict-of-dict). + + The second way is calling ``agg`` on a Dataframe with duplicated functions + names without assigning column name. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + ... 'B': range(5), + ... 'C': range(5)}) + >>> df.groupby('A').B.agg({'foo': 'count'}) # doctest: +SKIP + ... # SpecificationError: nested renamer is not supported + + >>> df.groupby('A').agg({'B': {'foo': ['sum', 'max']}}) # doctest: +SKIP + ... # SpecificationError: nested renamer is not supported + + >>> df.groupby('A').agg(['min', 'min']) # doctest: +SKIP + ... # SpecificationError: nested renamer is not supported + """ + + +class SettingWithCopyError(ValueError): + """ + Exception raised when trying to set on a copied slice from a ``DataFrame``. + + The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can + happen unintentionally when chained indexing. + + For more information on eveluation order, + see :ref:`the user guide`. + + For more information on view vs. copy, + see :ref:`the user guide`. + + Examples + -------- + >>> pd.options.mode.chained_assignment = 'raise' + >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) + >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP + ... # SettingWithCopyError: A value is trying to be set on a copy of a... + """ + + +class SettingWithCopyWarning(Warning): + """ + Warning raised when trying to set on a copied slice from a ``DataFrame``. + + The ``mode.chained_assignment`` needs to be set to set to 'warn.' + 'Warn' is the default option. This can happen unintentionally when + chained indexing. + + For more information on eveluation order, + see :ref:`the user guide`. + + For more information on view vs. copy, + see :ref:`the user guide`. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) + >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP + ... # SettingWithCopyWarning: A value is trying to be set on a copy of a... + """ + + +class NumExprClobberingError(NameError): + """ + Exception raised when trying to use a built-in numexpr name as a variable name. + + ``eval`` or ``query`` will throw the error if the engine is set + to 'numexpr'. 'numexpr' is the default engine value for these methods if the + numexpr package is installed. + + Examples + -------- + >>> df = pd.DataFrame({'abs': [1, 1, 1]}) + >>> df.query("abs > 2") # doctest: +SKIP + ... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap... + >>> sin, a = 1, 2 + >>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP + ... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap... + """ + + +class UndefinedVariableError(NameError): + """ + Exception raised by ``query`` or ``eval`` when using an undefined variable name. + + It will also specify whether the undefined variable is local or not. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 1]}) + >>> df.query("A > x") # doctest: +SKIP + ... # UndefinedVariableError: name 'x' is not defined + >>> df.query("A > @y") # doctest: +SKIP + ... # UndefinedVariableError: local variable 'y' is not defined + >>> pd.eval('x + 1') # doctest: +SKIP + ... # UndefinedVariableError: name 'x' is not defined + """ + + def __init__(self, name: str, is_local: bool | None = None) -> None: + base_msg = f"{repr(name)} is not defined" + if is_local: + msg = f"local variable {base_msg}" + else: + msg = f"name {base_msg}" + super().__init__(msg) + + +class IndexingError(Exception): + """ + Exception is raised when trying to index and there is a mismatch in dimensions. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 1]}) + >>> df.loc[..., ..., 'A'] # doctest: +SKIP + ... # IndexingError: indexer may only contain one '...' entry + >>> df = pd.DataFrame({'A': [1, 1, 1]}) + >>> df.loc[1, ..., ...] # doctest: +SKIP + ... # IndexingError: Too many indexers + >>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP + ... # IndexingError: Unalignable boolean Series provided as indexer... + >>> s = pd.Series(range(2), + ... index = pd.MultiIndex.from_product([["a", "b"], ["c"]])) + >>> s.loc["a", "c", "d"] # doctest: +SKIP + ... # IndexingError: Too many indexers + """ + + +class PyperclipException(RuntimeError): + """ + Exception raised when clipboard functionality is unsupported. + + Raised by ``to_clipboard()`` and ``read_clipboard()``. + """ + + +class PyperclipWindowsException(PyperclipException): + """ + Exception raised when clipboard functionality is unsupported by Windows. + + Access to the clipboard handle would be denied due to some other + window process is accessing it. + """ + + def __init__(self, message: str) -> None: + # attr only exists on Windows, so typing fails on other platforms + message += f" ({ctypes.WinError()})" # type: ignore[attr-defined] + super().__init__(message) + + +class CSSWarning(UserWarning): + """ + Warning is raised when converting css styling fails. + + This can be due to the styling not having an equivalent value or because the + styling isn't properly formatted. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 1]}) + >>> df.style.applymap(lambda x: 'background-color: blueGreenRed;') + ... .to_excel('styled.xlsx') # doctest: +SKIP + ... # CSSWarning: Unhandled color format: 'blueGreenRed' + >>> df.style.applymap(lambda x: 'border: 1px solid red red;') + ... .to_excel('styled.xlsx') # doctest: +SKIP + ... # CSSWarning: Too many tokens provided to "border" (expected 1-3) + """ + + +class PossibleDataLossError(Exception): + """ + Exception raised when trying to open a HDFStore file when already opened. + + Examples + -------- + >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP + >>> store.open("w") # doctest: +SKIP + ... # PossibleDataLossError: Re-opening the file [my-store] with mode [a]... + """ + + +class ClosedFileError(Exception): + """ + Exception is raised when trying to perform an operation on a closed HDFStore file. + + Examples + -------- + >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP + >>> store.close() # doctest: +SKIP + >>> store.keys() # doctest: +SKIP + ... # ClosedFileError: my-store file is not open! + """ + + +class IncompatibilityWarning(Warning): + """ + Warning raised when trying to use where criteria on an incompatible HDF5 file. + """ + + +class AttributeConflictWarning(Warning): + """ + Warning raised when index attributes conflict when using HDFStore. + + Occurs when attempting to append an index with a different + name than the existing index on an HDFStore or attempting to append an index with a + different frequency than the existing index on an HDFStore. + """ + + +class DatabaseError(OSError): + """ + Error is raised when executing sql with bad syntax or sql that throws an error. + + Examples + -------- + >>> from sqlite3 import connect + >>> conn = connect(':memory:') + >>> pd.read_sql('select * test', conn) # doctest: +SKIP + ... # DatabaseError: Execution failed on sql 'test': near "test": syntax error + """ + + +class PossiblePrecisionLoss(Warning): + """ + Warning raised by to_stata on a column with a value outside or equal to int64. + + When the column value is outside or equal to the int64 value the column is + converted to a float64 dtype. + + Examples + -------- + >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)}) + >>> df.to_stata('test') # doctest: +SKIP + ... # PossiblePrecisionLoss: Column converted from int64 to float64... + """ + + +class ValueLabelTypeMismatch(Warning): + """ + Warning raised by to_stata on a category column that contains non-string values. + + Examples + -------- + >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")}) + >>> df.to_stata('test') # doctest: +SKIP + ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str... + """ + + +class InvalidColumnName(Warning): + """ + Warning raised by to_stata the column contains a non-valid stata name. + + Because the column name is an invalid Stata variable, the name needs to be + converted. + + Examples + -------- + >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])}) + >>> df.to_stata('test') # doctest: +SKIP + ... # InvalidColumnName: Not all pandas column names were valid Stata variable... + """ + + +class CategoricalConversionWarning(Warning): + """ + Warning is raised when reading a partial labeled Stata file using a iterator. + + Examples + -------- + >>> from pandas.io.stata import StataReader + >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP + ... for i, block in enumerate(reader): + ... print(i, block)) + ... # CategoricalConversionWarning: One or more series with value labels... + """ + + +__all__ = [ + "AbstractMethodError", + "AccessorRegistrationWarning", + "AttributeConflictWarning", + "CategoricalConversionWarning", + "ClosedFileError", + "CSSWarning", + "DatabaseError", + "DataError", + "DtypeWarning", + "DuplicateLabelError", + "EmptyDataError", + "IncompatibilityWarning", + "IntCastingNaNError", + "InvalidColumnName", + "InvalidIndexError", + "IndexingError", + "MergeError", + "NullFrequencyError", + "NumbaUtilError", + "NumExprClobberingError", + "OptionError", + "OutOfBoundsDatetime", + "OutOfBoundsTimedelta", + "ParserError", + "ParserWarning", + "PerformanceWarning", + "PossibleDataLossError", + "PossiblePrecisionLoss", + "PyperclipException", + "PyperclipWindowsException", + "SettingWithCopyError", + "SettingWithCopyWarning", + "SpecificationError", + "UndefinedVariableError", + "UnsortedIndexError", + "UnsupportedFunctionCall", + "ValueLabelTypeMismatch", +] diff --git a/pandas/io/__init__.py b/pandas/io/__init__.py new file mode 100644 index 00000000..bd3ddc09 --- /dev/null +++ b/pandas/io/__init__.py @@ -0,0 +1,12 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # import modules that have public classes/functions + from pandas.io import ( + formats, + json, + stata, + ) + + # and mark only those modules as public + __all__ = ["formats", "json", "stata"] diff --git a/pandas/io/api.py b/pandas/io/api.py new file mode 100644 index 00000000..4e8b34a6 --- /dev/null +++ b/pandas/io/api.py @@ -0,0 +1,65 @@ +""" +Data IO api +""" + +from pandas.io.clipboards import read_clipboard +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + read_excel, +) +from pandas.io.feather_format import read_feather +from pandas.io.gbq import read_gbq +from pandas.io.html import read_html +from pandas.io.json import read_json +from pandas.io.orc import read_orc +from pandas.io.parquet import read_parquet +from pandas.io.parsers import ( + read_csv, + read_fwf, + read_table, +) +from pandas.io.pickle import ( + read_pickle, + to_pickle, +) +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) +from pandas.io.sas import read_sas +from pandas.io.spss import read_spss +from pandas.io.sql import ( + read_sql, + read_sql_query, + read_sql_table, +) +from pandas.io.stata import read_stata +from pandas.io.xml import read_xml + +__all__ = [ + "ExcelFile", + "ExcelWriter", + "HDFStore", + "read_clipboard", + "read_csv", + "read_excel", + "read_feather", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_json", + "read_orc", + "read_parquet", + "read_pickle", + "read_sas", + "read_spss", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_xml", + "to_pickle", +] diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py new file mode 100644 index 00000000..635fce37 --- /dev/null +++ b/pandas/io/clipboard/__init__.py @@ -0,0 +1,678 @@ +""" +Pyperclip + +A cross-platform clipboard module for Python, +with copy & paste functions for plain text. +By Al Sweigart al@inventwithpython.com +BSD License + +Usage: + import pyperclip + pyperclip.copy('The text to be copied to the clipboard.') + spam = pyperclip.paste() + + if not pyperclip.is_available(): + print("Copy functionality unavailable!") + +On Windows, no additional modules are needed. +On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli + commands. (These commands should come with OS X.). +On Linux, install xclip or xsel via package manager. For example, in Debian: + sudo apt-get install xclip + sudo apt-get install xsel + +Otherwise on Linux, you will need the PyQt5 modules installed. + +This module does not work with PyGObject yet. + +Cygwin is currently not supported. + +Security Note: This module runs programs with these names: + - which + - where + - pbcopy + - pbpaste + - xclip + - xsel + - klipper + - qdbus +A malicious user could rename or add programs with these names, tricking +Pyperclip into running them with whatever permissions the Python process has. + +""" + +__version__ = "1.7.0" + + +import contextlib +import ctypes +from ctypes import ( + c_size_t, + c_wchar, + c_wchar_p, + get_errno, + sizeof, +) +import os +import platform +from shutil import which +import subprocess +import time +import warnings + +from pandas.errors import ( + PyperclipException, + PyperclipWindowsException, +) +from pandas.util._exceptions import find_stack_level + +# `import PyQt4` sys.exit()s if DISPLAY is not in the environment. +# Thus, we need to detect the presence of $DISPLAY manually +# and not load PyQt4 if it is absent. +HAS_DISPLAY = os.getenv("DISPLAY", False) + +EXCEPT_MSG = """ + Pyperclip could not find a copy/paste mechanism for your system. + For more information, please visit + https://pyperclip.readthedocs.io/en/latest/#not-implemented-error + """ + +ENCODING = "utf-8" + +# The "which" unix command finds where a command is. +if platform.system() == "Windows": + WHICH_CMD = "where" +else: + WHICH_CMD = "which" + + +def _executable_exists(name): + return ( + subprocess.call( + [WHICH_CMD, name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + == 0 + ) + + +def _stringifyText(text) -> str: + acceptedTypes = (str, int, float, bool) + if not isinstance(text, acceptedTypes): + raise PyperclipException( + f"only str, int, float, and bool values " + f"can be copied to the clipboard, not {type(text).__name__}" + ) + return str(text) + + +def init_osx_pbcopy_clipboard(): + def copy_osx_pbcopy(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen( + ["pbcopy", "w"], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_osx_pbcopy(): + with subprocess.Popen( + ["pbpaste", "r"], stdout=subprocess.PIPE, close_fds=True + ) as p: + stdout = p.communicate()[0] + return stdout.decode(ENCODING) + + return copy_osx_pbcopy, paste_osx_pbcopy + + +def init_osx_pyobjc_clipboard(): + def copy_osx_pyobjc(text): + """Copy string argument to clipboard""" + text = _stringifyText(text) # Converts non-str values to str. + newStr = Foundation.NSString.stringWithString_(text).nsstring() + newData = newStr.dataUsingEncoding_(Foundation.NSUTF8StringEncoding) + board = AppKit.NSPasteboard.generalPasteboard() + board.declareTypes_owner_([AppKit.NSStringPboardType], None) + board.setData_forType_(newData, AppKit.NSStringPboardType) + + def paste_osx_pyobjc(): + """Returns contents of clipboard""" + board = AppKit.NSPasteboard.generalPasteboard() + content = board.stringForType_(AppKit.NSStringPboardType) + return content + + return copy_osx_pyobjc, paste_osx_pyobjc + + +def init_qt_clipboard(): + global QApplication + # $DISPLAY should exist + + # Try to import from qtpy, but if that fails try PyQt5 then PyQt4 + try: + from qtpy.QtWidgets import QApplication + except ImportError: + try: + from PyQt5.QtWidgets import QApplication + except ImportError: + from PyQt4.QtGui import QApplication + + app = QApplication.instance() + if app is None: + app = QApplication([]) + + def copy_qt(text): + text = _stringifyText(text) # Converts non-str values to str. + cb = app.clipboard() + cb.setText(text) + + def paste_qt() -> str: + cb = app.clipboard() + return str(cb.text()) + + return copy_qt, paste_qt + + +def init_xclip_clipboard(): + DEFAULT_SELECTION = "c" + PRIMARY_SELECTION = "p" + + def copy_xclip(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + with subprocess.Popen( + ["xclip", "-selection", selection], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_xclip(primary=False): + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + with subprocess.Popen( + ["xclip", "-selection", selection, "-o"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + # Intentionally ignore extraneous output on stderr when clipboard is empty + return stdout.decode(ENCODING) + + return copy_xclip, paste_xclip + + +def init_xsel_clipboard(): + DEFAULT_SELECTION = "-b" + PRIMARY_SELECTION = "-p" + + def copy_xsel(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + with subprocess.Popen( + ["xsel", selection_flag, "-i"], stdin=subprocess.PIPE, close_fds=True + ) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_xsel(primary=False): + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + with subprocess.Popen( + ["xsel", selection_flag, "-o"], stdout=subprocess.PIPE, close_fds=True + ) as p: + stdout = p.communicate()[0] + return stdout.decode(ENCODING) + + return copy_xsel, paste_xsel + + +def init_klipper_clipboard(): + def copy_klipper(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen( + [ + "qdbus", + "org.kde.klipper", + "/klipper", + "setClipboardContents", + text.encode(ENCODING), + ], + stdin=subprocess.PIPE, + close_fds=True, + ) as p: + p.communicate(input=None) + + def paste_klipper(): + with subprocess.Popen( + ["qdbus", "org.kde.klipper", "/klipper", "getClipboardContents"], + stdout=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + + # Workaround for https://bugs.kde.org/show_bug.cgi?id=342874 + # TODO: https://github.com/asweigart/pyperclip/issues/43 + clipboardContents = stdout.decode(ENCODING) + # even if blank, Klipper will append a newline at the end + assert len(clipboardContents) > 0 + # make sure that newline is there + assert clipboardContents.endswith("\n") + if clipboardContents.endswith("\n"): + clipboardContents = clipboardContents[:-1] + return clipboardContents + + return copy_klipper, paste_klipper + + +def init_dev_clipboard_clipboard(): + def copy_dev_clipboard(text): + text = _stringifyText(text) # Converts non-str values to str. + if text == "": + warnings.warn( + "Pyperclip cannot copy a blank string to the clipboard on Cygwin. " + "This is effectively a no-op.", + stacklevel=find_stack_level(), + ) + if "\r" in text: + warnings.warn( + "Pyperclip cannot handle \\r characters on Cygwin.", + stacklevel=find_stack_level(), + ) + + with open("/dev/clipboard", "wt") as fd: + fd.write(text) + + def paste_dev_clipboard() -> str: + with open("/dev/clipboard") as fd: + content = fd.read() + return content + + return copy_dev_clipboard, paste_dev_clipboard + + +def init_no_clipboard(): + class ClipboardUnavailable: + def __call__(self, *args, **kwargs): + raise PyperclipException(EXCEPT_MSG) + + def __bool__(self) -> bool: + return False + + return ClipboardUnavailable(), ClipboardUnavailable() + + +# Windows-related clipboard functions: +class CheckedCall: + def __init__(self, f) -> None: + super().__setattr__("f", f) + + def __call__(self, *args): + ret = self.f(*args) + if not ret and get_errno(): + raise PyperclipWindowsException("Error calling " + self.f.__name__) + return ret + + def __setattr__(self, key, value): + setattr(self.f, key, value) + + +def init_windows_clipboard(): + global HGLOBAL, LPVOID, DWORD, LPCSTR, INT + global HWND, HINSTANCE, HMENU, BOOL, UINT, HANDLE + from ctypes.wintypes import ( + BOOL, + DWORD, + HANDLE, + HGLOBAL, + HINSTANCE, + HMENU, + HWND, + INT, + LPCSTR, + LPVOID, + UINT, + ) + + windll = ctypes.windll + msvcrt = ctypes.CDLL("msvcrt") + + safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA) + safeCreateWindowExA.argtypes = [ + DWORD, + LPCSTR, + LPCSTR, + DWORD, + INT, + INT, + INT, + INT, + HWND, + HMENU, + HINSTANCE, + LPVOID, + ] + safeCreateWindowExA.restype = HWND + + safeDestroyWindow = CheckedCall(windll.user32.DestroyWindow) + safeDestroyWindow.argtypes = [HWND] + safeDestroyWindow.restype = BOOL + + OpenClipboard = windll.user32.OpenClipboard + OpenClipboard.argtypes = [HWND] + OpenClipboard.restype = BOOL + + safeCloseClipboard = CheckedCall(windll.user32.CloseClipboard) + safeCloseClipboard.argtypes = [] + safeCloseClipboard.restype = BOOL + + safeEmptyClipboard = CheckedCall(windll.user32.EmptyClipboard) + safeEmptyClipboard.argtypes = [] + safeEmptyClipboard.restype = BOOL + + safeGetClipboardData = CheckedCall(windll.user32.GetClipboardData) + safeGetClipboardData.argtypes = [UINT] + safeGetClipboardData.restype = HANDLE + + safeSetClipboardData = CheckedCall(windll.user32.SetClipboardData) + safeSetClipboardData.argtypes = [UINT, HANDLE] + safeSetClipboardData.restype = HANDLE + + safeGlobalAlloc = CheckedCall(windll.kernel32.GlobalAlloc) + safeGlobalAlloc.argtypes = [UINT, c_size_t] + safeGlobalAlloc.restype = HGLOBAL + + safeGlobalLock = CheckedCall(windll.kernel32.GlobalLock) + safeGlobalLock.argtypes = [HGLOBAL] + safeGlobalLock.restype = LPVOID + + safeGlobalUnlock = CheckedCall(windll.kernel32.GlobalUnlock) + safeGlobalUnlock.argtypes = [HGLOBAL] + safeGlobalUnlock.restype = BOOL + + wcslen = CheckedCall(msvcrt.wcslen) + wcslen.argtypes = [c_wchar_p] + wcslen.restype = UINT + + GMEM_MOVEABLE = 0x0002 + CF_UNICODETEXT = 13 + + @contextlib.contextmanager + def window(): + """ + Context that provides a valid Windows hwnd. + """ + # we really just need the hwnd, so setting "STATIC" + # as predefined lpClass is just fine. + hwnd = safeCreateWindowExA( + 0, b"STATIC", None, 0, 0, 0, 0, 0, None, None, None, None + ) + try: + yield hwnd + finally: + safeDestroyWindow(hwnd) + + @contextlib.contextmanager + def clipboard(hwnd): + """ + Context manager that opens the clipboard and prevents + other applications from modifying the clipboard content. + """ + # We may not get the clipboard handle immediately because + # some other application is accessing it (?) + # We try for at least 500ms to get the clipboard. + t = time.time() + 0.5 + success = False + while time.time() < t: + success = OpenClipboard(hwnd) + if success: + break + time.sleep(0.01) + if not success: + raise PyperclipWindowsException("Error calling OpenClipboard") + + try: + yield + finally: + safeCloseClipboard() + + def copy_windows(text): + # This function is heavily based on + # http://msdn.com/ms649016#_win32_Copying_Information_to_the_Clipboard + + text = _stringifyText(text) # Converts non-str values to str. + + with window() as hwnd: + # http://msdn.com/ms649048 + # If an application calls OpenClipboard with hwnd set to NULL, + # EmptyClipboard sets the clipboard owner to NULL; + # this causes SetClipboardData to fail. + # => We need a valid hwnd to copy something. + with clipboard(hwnd): + safeEmptyClipboard() + + if text: + # http://msdn.com/ms649051 + # If the hMem parameter identifies a memory object, + # the object must have been allocated using the + # function with the GMEM_MOVEABLE flag. + count = wcslen(text) + 1 + handle = safeGlobalAlloc(GMEM_MOVEABLE, count * sizeof(c_wchar)) + locked_handle = safeGlobalLock(handle) + + ctypes.memmove( + c_wchar_p(locked_handle), + c_wchar_p(text), + count * sizeof(c_wchar), + ) + + safeGlobalUnlock(handle) + safeSetClipboardData(CF_UNICODETEXT, handle) + + def paste_windows(): + with clipboard(None): + handle = safeGetClipboardData(CF_UNICODETEXT) + if not handle: + # GetClipboardData may return NULL with errno == NO_ERROR + # if the clipboard is empty. + # (Also, it may return a handle to an empty buffer, + # but technically that's not empty) + return "" + return c_wchar_p(handle).value + + return copy_windows, paste_windows + + +def init_wsl_clipboard(): + def copy_wsl(text): + text = _stringifyText(text) # Converts non-str values to str. + with subprocess.Popen(["clip.exe"], stdin=subprocess.PIPE, close_fds=True) as p: + p.communicate(input=text.encode(ENCODING)) + + def paste_wsl(): + with subprocess.Popen( + ["powershell.exe", "-command", "Get-Clipboard"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) as p: + stdout = p.communicate()[0] + # WSL appends "\r\n" to the contents. + return stdout[:-2].decode(ENCODING) + + return copy_wsl, paste_wsl + + +# Automatic detection of clipboard mechanisms +# and importing is done in determine_clipboard(): +def determine_clipboard(): + """ + Determine the OS/platform and set the copy() and paste() functions + accordingly. + """ + global Foundation, AppKit, qtpy, PyQt4, PyQt5 + + # Setup for the CYGWIN platform: + if ( + "cygwin" in platform.system().lower() + ): # Cygwin has a variety of values returned by platform.system(), + # such as 'CYGWIN_NT-6.1' + # FIXME(pyperclip#55): pyperclip currently does not support Cygwin, + # see https://github.com/asweigart/pyperclip/issues/55 + if os.path.exists("/dev/clipboard"): + warnings.warn( + "Pyperclip's support for Cygwin is not perfect, " + "see https://github.com/asweigart/pyperclip/issues/55", + stacklevel=find_stack_level(), + ) + return init_dev_clipboard_clipboard() + + # Setup for the WINDOWS platform: + elif os.name == "nt" or platform.system() == "Windows": + return init_windows_clipboard() + + if platform.system() == "Linux": + if which("wslconfig.exe"): + return init_wsl_clipboard() + + # Setup for the MAC OS X platform: + if os.name == "mac" or platform.system() == "Darwin": + try: + import AppKit + import Foundation # check if pyobjc is installed + except ImportError: + return init_osx_pbcopy_clipboard() + else: + return init_osx_pyobjc_clipboard() + + # Setup for the LINUX platform: + if HAS_DISPLAY: + if _executable_exists("xsel"): + return init_xsel_clipboard() + if _executable_exists("xclip"): + return init_xclip_clipboard() + if _executable_exists("klipper") and _executable_exists("qdbus"): + return init_klipper_clipboard() + + try: + # qtpy is a small abstraction layer that lets you write applications + # using a single api call to either PyQt or PySide. + # https://pypi.python.org/project/QtPy + import qtpy # check if qtpy is installed + except ImportError: + # If qtpy isn't installed, fall back on importing PyQt4. + try: + import PyQt5 # check if PyQt5 is installed + except ImportError: + try: + import PyQt4 # check if PyQt4 is installed + except ImportError: + pass # We want to fail fast for all non-ImportError exceptions. + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + + return init_no_clipboard() + + +def set_clipboard(clipboard): + """ + Explicitly sets the clipboard mechanism. The "clipboard mechanism" is how + the copy() and paste() functions interact with the operating system to + implement the copy/paste feature. The clipboard parameter must be one of: + - pbcopy + - pbobjc (default on Mac OS X) + - qt + - xclip + - xsel + - klipper + - windows (default on Windows) + - no (this is what is set when no clipboard mechanism can be found) + """ + global copy, paste + + clipboard_types = { + "pbcopy": init_osx_pbcopy_clipboard, + "pyobjc": init_osx_pyobjc_clipboard, + "qt": init_qt_clipboard, # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5' + "xclip": init_xclip_clipboard, + "xsel": init_xsel_clipboard, + "klipper": init_klipper_clipboard, + "windows": init_windows_clipboard, + "no": init_no_clipboard, + } + + if clipboard not in clipboard_types: + allowed_clipboard_types = [repr(_) for _ in clipboard_types.keys()] + raise ValueError( + f"Argument must be one of {', '.join(allowed_clipboard_types)}" + ) + + # Sets pyperclip's copy() and paste() functions: + copy, paste = clipboard_types[clipboard]() + + +def lazy_load_stub_copy(text): + """ + A stub function for copy(), which will load the real copy() function when + called so that the real copy() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return copy(text) + + +def lazy_load_stub_paste(): + """ + A stub function for paste(), which will load the real paste() function when + called so that the real paste() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return paste() + + +def is_available() -> bool: + return copy != lazy_load_stub_copy and paste != lazy_load_stub_paste + + +# Initially, copy() and paste() are set to lazy loading wrappers which will +# set `copy` and `paste` to real functions the first time they're used, unless +# set_clipboard() or determine_clipboard() is called first. +copy, paste = lazy_load_stub_copy, lazy_load_stub_paste + + +__all__ = ["copy", "paste", "set_clipboard", "determine_clipboard"] + +# pandas aliases +clipboard_get = paste +clipboard_set = copy diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py new file mode 100644 index 00000000..a3e778e5 --- /dev/null +++ b/pandas/io/clipboards.py @@ -0,0 +1,156 @@ +""" io on the clipboard """ +from __future__ import annotations + +from io import StringIO +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas import ( + get_option, + option_context, +) + + +def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover + r""" + Read text from clipboard and pass to read_csv. + + Parameters + ---------- + sep : str, default '\s+' + A string or regex delimiter. The default of '\s+' denotes + one or more whitespace characters. + + **kwargs + See read_csv for the full argument list. + + Returns + ------- + DataFrame + A parsed DataFrame object. + """ + encoding = kwargs.pop("encoding", "utf-8") + + # only utf-8 is valid for passed value because that's what clipboard + # supports + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise NotImplementedError("reading from clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_get + from pandas.io.parsers import read_csv + + text = clipboard_get() + + # Try to decode (if needed, as "text" might already be a string here). + try: + text = text.decode(kwargs.get("encoding") or get_option("display.encoding")) + except AttributeError: + pass + + # Excel copies into clipboard with \t separation + # inspect no more then the 10 first lines, if they + # all contain an equal number (>0) of tabs, infer + # that this came from excel and set 'sep' accordingly + lines = text[:10000].split("\n")[:-1][:10] + + # Need to remove leading white space, since read_csv + # accepts: + # a b + # 0 1 2 + # 1 3 4 + + counts = {x.lstrip(" ").count("\t") for x in lines} + if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: + sep = "\t" + # check the number of leading tabs in the first line + # to account for index columns + index_length = len(lines[0]) - len(lines[0].lstrip(" \t")) + if index_length != 0: + kwargs.setdefault("index_col", list(range(index_length))) + + # Edge case where sep is specified to be None, return to default + if sep is None and kwargs.get("delim_whitespace") is None: + sep = r"\s+" + + # Regex separator currently only works with python engine. + # Default to python if separator is multi-character (regex) + if len(sep) > 1 and kwargs.get("engine") is None: + kwargs["engine"] = "python" + elif len(sep) > 1 and kwargs.get("engine") == "c": + warnings.warn( + "read_clipboard with regex separator does not work properly with c engine.", + stacklevel=find_stack_level(), + ) + + return read_csv(StringIO(text), sep=sep, **kwargs) + + +def to_clipboard( + obj, excel: bool | None = True, sep: str | None = None, **kwargs +) -> None: # pragma: no cover + """ + Attempt to write text representation of object to the system clipboard + The clipboard can be then pasted into Excel for example. + + Parameters + ---------- + obj : the object to write to the clipboard + excel : bool, defaults to True + if True, use the provided separator, writing in a csv + format for allowing easy pasting into excel. + if False, write a string representation of the object + to the clipboard + sep : optional, defaults to tab + other keywords are passed to to_csv + + Notes + ----- + Requirements for your platform + - Linux: xclip, or xsel (with PyQt4 modules) + - Windows: + - OS X: + """ + encoding = kwargs.pop("encoding", "utf-8") + + # testing if an invalid encoding is passed to clipboard + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise ValueError("clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_set + + if excel is None: + excel = True + + if excel: + try: + if sep is None: + sep = "\t" + buf = StringIO() + + # clipboard_set (pyperclip) expects unicode + obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs) + text = buf.getvalue() + + clipboard_set(text) + return + except TypeError: + warnings.warn( + "to_clipboard in excel mode requires a single character separator.", + stacklevel=find_stack_level(), + ) + elif sep is not None: + warnings.warn( + "to_clipboard with excel=False ignores the sep argument.", + stacklevel=find_stack_level(), + ) + + if isinstance(obj, ABCDataFrame): + # str(df) has various unhelpful defaults, like truncation + with option_context("display.max_colwidth", None): + objstr = obj.to_string(**kwargs) + else: + objstr = str(obj) + clipboard_set(objstr) diff --git a/pandas/io/common.py b/pandas/io/common.py new file mode 100644 index 00000000..f31de63a --- /dev/null +++ b/pandas/io/common.py @@ -0,0 +1,1183 @@ +"""Common IO api utilities""" +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +import bz2 +import codecs +import dataclasses +import functools +import gzip +from io import ( + BufferedIOBase, + BytesIO, + RawIOBase, + StringIO, + TextIOBase, + TextIOWrapper, +) +import mmap +import os +from pathlib import Path +import re +import tarfile +from typing import ( + IO, + Any, + AnyStr, + Generic, + Literal, + Mapping, + Sequence, + TypeVar, + cast, + overload, +) +from urllib.parse import ( + urljoin, + urlparse as parse_url, + uses_netloc, + uses_params, + uses_relative, +) +import warnings +import zipfile + +from pandas._typing import ( + BaseBuffer, + CompressionDict, + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat import get_lzma_file +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_file_like, + is_integer, + is_list_like, +) + +from pandas.core.shared_docs import _shared_docs + +_VALID_URLS = set(uses_relative + uses_netloc + uses_params) +_VALID_URLS.discard("") +_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://") + +BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer) + + +@dataclasses.dataclass +class IOArgs: + """ + Return value of io/common.py:_get_filepath_or_buffer. + """ + + filepath_or_buffer: str | BaseBuffer + encoding: str + mode: str + compression: CompressionDict + should_close: bool = False + + +@dataclasses.dataclass +class IOHandles(Generic[AnyStr]): + """ + Return value of io/common.py:get_handle + + Can be used as a context manager. + + This is used to easily close created buffers and to handle corner cases when + TextIOWrapper is inserted. + + handle: The file handle to be used. + created_handles: All file handles that are created by get_handle + is_wrapped: Whether a TextIOWrapper needs to be detached. + """ + + # handle might not implement the IO-interface + handle: IO[AnyStr] + compression: CompressionDict + created_handles: list[IO[bytes] | IO[str]] = dataclasses.field(default_factory=list) + is_wrapped: bool = False + + def close(self) -> None: + """ + Close all created buffers. + + Note: If a TextIOWrapper was inserted, it is flushed and detached to + avoid closing the potentially user-created buffer. + """ + if self.is_wrapped: + assert isinstance(self.handle, TextIOWrapper) + self.handle.flush() + self.handle.detach() + self.created_handles.remove(self.handle) + for handle in self.created_handles: + handle.close() + self.created_handles = [] + self.is_wrapped = False + + def __enter__(self) -> IOHandles[AnyStr]: + return self + + def __exit__(self, *args: Any) -> None: + self.close() + + +def is_url(url: object) -> bool: + """ + Check to see if a URL has a valid protocol. + + Parameters + ---------- + url : str or unicode + + Returns + ------- + isurl : bool + If `url` has a valid protocol return True otherwise False. + """ + if not isinstance(url, str): + return False + return parse_url(url).scheme in _VALID_URLS + + +@overload +def _expand_user(filepath_or_buffer: str) -> str: + ... + + +@overload +def _expand_user(filepath_or_buffer: BaseBufferT) -> BaseBufferT: + ... + + +def _expand_user(filepath_or_buffer: str | BaseBufferT) -> str | BaseBufferT: + """ + Return the argument with an initial component of ~ or ~user + replaced by that user's home directory. + + Parameters + ---------- + filepath_or_buffer : object to be converted if possible + + Returns + ------- + expanded_filepath_or_buffer : an expanded filepath or the + input if not expandable + """ + if isinstance(filepath_or_buffer, str): + return os.path.expanduser(filepath_or_buffer) + return filepath_or_buffer + + +def validate_header_arg(header: object) -> None: + if header is None: + return + if is_integer(header): + header = cast(int, header) + if header < 0: + # GH 27779 + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) + return + if is_list_like(header, allow_sets=False): + header = cast(Sequence, header) + if not all(map(is_integer, header)): + raise ValueError("header must be integer or list of integers") + if any(i < 0 for i in header): + raise ValueError("cannot specify multi-index header with negative integers") + return + if is_bool(header): + raise TypeError( + "Passing a bool to header is invalid. Use header=None for no header or " + "header=int or list-like of ints to specify " + "the row(s) making up the column names" + ) + # GH 16338 + raise ValueError("header must be integer or list of integers") + + +@overload +def stringify_path(filepath_or_buffer: FilePath, convert_file_like: bool = ...) -> str: + ... + + +@overload +def stringify_path( + filepath_or_buffer: BaseBufferT, convert_file_like: bool = ... +) -> BaseBufferT: + ... + + +def stringify_path( + filepath_or_buffer: FilePath | BaseBufferT, + convert_file_like: bool = False, +) -> str | BaseBufferT: + """ + Attempt to convert a path-like object to a string. + + Parameters + ---------- + filepath_or_buffer : object to be converted + + Returns + ------- + str_filepath_or_buffer : maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol (python 3.6+) are coerced + according to its __fspath__ method. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. + """ + if not convert_file_like and is_file_like(filepath_or_buffer): + # GH 38125: some fsspec objects implement os.PathLike but have already opened a + # file. This prevents opening the file a second time. infer_compression calls + # this function with convert_file_like=True to infer the compression. + return cast(BaseBufferT, filepath_or_buffer) + + if isinstance(filepath_or_buffer, os.PathLike): + filepath_or_buffer = filepath_or_buffer.__fspath__() + return _expand_user(filepath_or_buffer) + + +def urlopen(*args, **kwargs): + """ + Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of + the stdlib. + """ + import urllib.request + + return urllib.request.urlopen(*args, **kwargs) + + +def is_fsspec_url(url: FilePath | BaseBuffer) -> bool: + """ + Returns true if the given URL looks like + something fsspec can handle + """ + return ( + isinstance(url, str) + and bool(_RFC_3986_PATTERN.match(url)) + and not url.startswith(("http://", "https://")) + ) + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", +) +def _get_filepath_or_buffer( + filepath_or_buffer: FilePath | BaseBuffer, + encoding: str = "utf-8", + compression: CompressionOptions = None, + mode: str = "r", + storage_options: StorageOptions = None, +) -> IOArgs: + """ + If the filepath_or_buffer is a url, translate and return the buffer. + Otherwise passthrough. + + Parameters + ---------- + filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), + or buffer + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + encoding : the encoding to use to decode bytes, default is 'utf-8' + mode : str, optional + + {storage_options} + + .. versionadded:: 1.2.0 + + ..versionchange:: 1.2.0 + + Returns the dataclass IOArgs. + """ + filepath_or_buffer = stringify_path(filepath_or_buffer) + + # handle compression dict + compression_method, compression = get_compression_method(compression) + compression_method = infer_compression(filepath_or_buffer, compression_method) + + # GH21227 internal compression is not used for non-binary handles. + if compression_method and hasattr(filepath_or_buffer, "write") and "b" not in mode: + warnings.warn( + "compression has no effect when passing a non-binary object as input.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) + compression_method = None + + compression = dict(compression, method=compression_method) + + # bz2 and xz do not write the byte order mark for utf-16 and utf-32 + # print a warning when writing such files + if ( + "w" in mode + and compression_method in ["bz2", "xz"] + and encoding in ["utf-16", "utf-32"] + ): + warnings.warn( + f"{compression} will not write the byte order mark for {encoding}", + UnicodeWarning, + stacklevel=find_stack_level(), + ) + + # Use binary mode when converting path-like objects to file-like objects (fsspec) + # except when text mode is explicitly requested. The original mode is returned if + # fsspec is not used. + fsspec_mode = mode + if "t" not in fsspec_mode and "b" not in fsspec_mode: + fsspec_mode += "b" + + if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): + # TODO: fsspec can also handle HTTP via requests, but leaving this + # unchanged. using fsspec appears to break the ability to infer if the + # server responded with gzipped data + storage_options = storage_options or {} + + # waiting until now for importing to match intended lazy logic of + # urlopen function defined elsewhere in this module + import urllib.request + + # assuming storage_options is to be interpreted as headers + req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) + with urlopen(req_info) as req: + content_encoding = req.headers.get("Content-Encoding", None) + if content_encoding == "gzip": + # Override compression based on Content-Encoding header + compression = {"method": "gzip"} + reader = BytesIO(req.read()) + return IOArgs( + filepath_or_buffer=reader, + encoding=encoding, + compression=compression, + should_close=True, + mode=fsspec_mode, + ) + + if is_fsspec_url(filepath_or_buffer): + assert isinstance( + filepath_or_buffer, str + ) # just to appease mypy for this branch + # two special-case s3-like protocols; these have special meaning in Hadoop, + # but are equivalent to just "s3" from fsspec's point of view + # cc #11071 + if filepath_or_buffer.startswith("s3a://"): + filepath_or_buffer = filepath_or_buffer.replace("s3a://", "s3://") + if filepath_or_buffer.startswith("s3n://"): + filepath_or_buffer = filepath_or_buffer.replace("s3n://", "s3://") + fsspec = import_optional_dependency("fsspec") + + # If botocore is installed we fallback to reading with anon=True + # to allow reads from public buckets + err_types_to_retry_with_anon: list[Any] = [] + try: + import_optional_dependency("botocore") + from botocore.exceptions import ( + ClientError, + NoCredentialsError, + ) + + err_types_to_retry_with_anon = [ + ClientError, + NoCredentialsError, + PermissionError, + ] + except ImportError: + pass + + try: + file_obj = fsspec.open( + filepath_or_buffer, mode=fsspec_mode, **(storage_options or {}) + ).open() + # GH 34626 Reads from Public Buckets without Credentials needs anon=True + except tuple(err_types_to_retry_with_anon): + if storage_options is None: + storage_options = {"anon": True} + else: + # don't mutate user input. + storage_options = dict(storage_options) + storage_options["anon"] = True + file_obj = fsspec.open( + filepath_or_buffer, mode=fsspec_mode, **(storage_options or {}) + ).open() + + return IOArgs( + filepath_or_buffer=file_obj, + encoding=encoding, + compression=compression, + should_close=True, + mode=fsspec_mode, + ) + elif storage_options: + raise ValueError( + "storage_options passed with file object or non-fsspec file path" + ) + + if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): + return IOArgs( + filepath_or_buffer=_expand_user(filepath_or_buffer), + encoding=encoding, + compression=compression, + should_close=False, + mode=mode, + ) + + # is_file_like requires (read | write) & __iter__ but __iter__ is only + # needed for read_csv(engine=python) + if not ( + hasattr(filepath_or_buffer, "read") or hasattr(filepath_or_buffer, "write") + ): + msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}" + raise ValueError(msg) + + return IOArgs( + filepath_or_buffer=filepath_or_buffer, + encoding=encoding, + compression=compression, + should_close=False, + mode=mode, + ) + + +def file_path_to_url(path: str) -> str: + """ + converts an absolute native path to a FILE URL. + + Parameters + ---------- + path : a path in native format + + Returns + ------- + a valid FILE URL + """ + # lazify expensive import (~30ms) + from urllib.request import pathname2url + + return urljoin("file:", pathname2url(path)) + + +_extension_to_compression = { + ".tar": "tar", + ".tar.gz": "tar", + ".tar.bz2": "tar", + ".tar.xz": "tar", + ".gz": "gzip", + ".bz2": "bz2", + ".zip": "zip", + ".xz": "xz", + ".zst": "zstd", +} +_supported_compressions = set(_extension_to_compression.values()) + + +def get_compression_method( + compression: CompressionOptions, +) -> tuple[str | None, CompressionDict]: + """ + Simplifies a compression argument to a compression method string and + a mapping containing additional arguments. + + Parameters + ---------- + compression : str or mapping + If string, specifies the compression method. If mapping, value at key + 'method' specifies compression method. + + Returns + ------- + tuple of ({compression method}, Optional[str] + {compression arguments}, Dict[str, Any]) + + Raises + ------ + ValueError on mapping missing 'method' key + """ + compression_method: str | None + if isinstance(compression, Mapping): + compression_args = dict(compression) + try: + compression_method = compression_args.pop("method") + except KeyError as err: + raise ValueError("If mapping, compression must have key 'method'") from err + else: + compression_args = {} + compression_method = compression + return compression_method, compression_args + + +@doc(compression_options=_shared_docs["compression_options"] % "filepath_or_buffer") +def infer_compression( + filepath_or_buffer: FilePath | BaseBuffer, compression: str | None +) -> str | None: + """ + Get the compression method for filepath_or_buffer. If compression='infer', + the inferred compression method is returned. Otherwise, the input + compression method is returned unchanged, unless it's invalid, in which + case an error is raised. + + Parameters + ---------- + filepath_or_buffer : str or file handle + File path or object. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + Returns + ------- + string or None + + Raises + ------ + ValueError on invalid compression specified. + """ + if compression is None: + return None + + # Infer compression + if compression == "infer": + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True) + if not isinstance(filepath_or_buffer, str): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension + for extension, compression in _extension_to_compression.items(): + if filepath_or_buffer.lower().endswith(extension): + return compression + return None + + # Compression has been specified. Check that it's valid + if compression in _supported_compressions: + return compression + + # https://github.com/python/mypy/issues/5492 + # Unsupported operand types for + ("List[Optional[str]]" and "List[str]") + valid = ["infer", None] + sorted(_supported_compressions) # type: ignore[operator] + msg = ( + f"Unrecognized compression type: {compression}\n" + f"Valid compression types are {valid}" + ) + raise ValueError(msg) + + +def check_parent_directory(path: Path | str) -> None: + """ + Check if parent directory of a file exists, raise OSError if it does not + + Parameters + ---------- + path: Path or str + Path to check parent directory of + """ + parent = Path(path).parent + if not parent.is_dir(): + raise OSError(rf"Cannot save file into a non-existent directory: '{parent}'") + + +@overload +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = ..., + compression: CompressionOptions = ..., + memory_map: bool = ..., + is_text: Literal[False], + errors: str | None = ..., + storage_options: StorageOptions = ..., +) -> IOHandles[bytes]: + ... + + +@overload +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = ..., + compression: CompressionOptions = ..., + memory_map: bool = ..., + is_text: Literal[True] = ..., + errors: str | None = ..., + storage_options: StorageOptions = ..., +) -> IOHandles[str]: + ... + + +@overload +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = ..., + compression: CompressionOptions = ..., + memory_map: bool = ..., + is_text: bool = ..., + errors: str | None = ..., + storage_options: StorageOptions = ..., +) -> IOHandles[str] | IOHandles[bytes]: + ... + + +@doc(compression_options=_shared_docs["compression_options"] % "path_or_buf") +def get_handle( + path_or_buf: FilePath | BaseBuffer, + mode: str, + *, + encoding: str | None = None, + compression: CompressionOptions = None, + memory_map: bool = False, + is_text: bool = True, + errors: str | None = None, + storage_options: StorageOptions = None, +) -> IOHandles[str] | IOHandles[bytes]: + """ + Get file handle for given path/buffer and mode. + + Parameters + ---------- + path_or_buf : str or file handle + File path or object. + mode : str + Mode to open path_or_buf with. + encoding : str or None + Encoding to use. + {compression_options} + + .. versionchanged:: 1.0.0 + May now be a dict with key 'method' as compression mode + and other keys as compression options if compression + mode is 'zip'. + + .. versionchanged:: 1.1.0 + Passing compression options as keys in dict is now + supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'. + + .. versionchanged:: 1.4.0 Zstandard support. + + memory_map : bool, default False + See parsers._parser_params for more information. Only used by read_csv. + is_text : bool, default True + Whether the type of the content passed to the file/buffer is string or + bytes. This is not the same as `"b" not in mode`. If a string content is + passed to a binary file/buffer, a wrapper is inserted. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + storage_options: StorageOptions = None + Passed to _get_filepath_or_buffer + + .. versionchanged:: 1.2.0 + + Returns the dataclass IOHandles + """ + # Windows does not default to utf-8. Set to utf-8 for a consistent behavior + encoding = encoding or "utf-8" + + errors = errors or "strict" + + # read_csv does not know whether the buffer is opened in binary/text mode + if _is_binary_mode(path_or_buf, mode) and "b" not in mode: + mode += "b" + + # validate encoding and errors + codecs.lookup(encoding) + if isinstance(errors, str): + codecs.lookup_error(errors) + + # open URLs + ioargs = _get_filepath_or_buffer( + path_or_buf, + encoding=encoding, + compression=compression, + mode=mode, + storage_options=storage_options, + ) + + handle = ioargs.filepath_or_buffer + handles: list[BaseBuffer] + + # memory mapping needs to be the first step + # only used for read_csv + handle, memory_map, handles = _maybe_memory_map(handle, memory_map) + + is_path = isinstance(handle, str) + compression_args = dict(ioargs.compression) + compression = compression_args.pop("method") + + # Only for write methods + if "r" not in mode and is_path: + check_parent_directory(str(handle)) + + if compression: + if compression != "zstd": + # compression libraries do not like an explicit text-mode + ioargs.mode = ioargs.mode.replace("t", "") + elif compression == "zstd" and "b" not in ioargs.mode: + # python-zstandard defaults to text mode, but we always expect + # compression libraries to use binary mode. + ioargs.mode += "b" + + # GZ Compression + if compression == "gzip": + if isinstance(handle, str): + # error: Incompatible types in assignment (expression has type + # "GzipFile", variable has type "Union[str, BaseBuffer]") + handle = gzip.GzipFile( # type: ignore[assignment] + filename=handle, + mode=ioargs.mode, + **compression_args, + ) + else: + handle = gzip.GzipFile( + # No overload variant of "GzipFile" matches argument types + # "Union[str, BaseBuffer]", "str", "Dict[str, Any]" + fileobj=handle, # type: ignore[call-overload] + mode=ioargs.mode, + **compression_args, + ) + + # BZ Compression + elif compression == "bz2": + # No overload variant of "BZ2File" matches argument types + # "Union[str, BaseBuffer]", "str", "Dict[str, Any]" + handle = bz2.BZ2File( # type: ignore[call-overload] + handle, + mode=ioargs.mode, + **compression_args, + ) + + # ZIP Compression + elif compression == "zip": + # error: Argument 1 to "_BytesZipFile" has incompatible type + # "Union[str, BaseBuffer]"; expected "Union[Union[str, PathLike[str]], + # ReadBuffer[bytes], WriteBuffer[bytes]]" + handle = _BytesZipFile( + handle, ioargs.mode, **compression_args # type: ignore[arg-type] + ) + if handle.buffer.mode == "r": + handles.append(handle) + zip_names = handle.buffer.namelist() + if len(zip_names) == 1: + handle = handle.buffer.open(zip_names.pop()) + elif not zip_names: + raise ValueError(f"Zero files found in ZIP file {path_or_buf}") + else: + raise ValueError( + "Multiple files found in ZIP file. " + f"Only one file per ZIP: {zip_names}" + ) + + # TAR Encoding + elif compression == "tar": + compression_args.setdefault("mode", ioargs.mode) + if isinstance(handle, str): + handle = _BytesTarFile(name=handle, **compression_args) + else: + # error: Argument "fileobj" to "_BytesTarFile" has incompatible + # type "BaseBuffer"; expected "Union[ReadBuffer[bytes], + # WriteBuffer[bytes], None]" + handle = _BytesTarFile( + fileobj=handle, **compression_args # type: ignore[arg-type] + ) + assert isinstance(handle, _BytesTarFile) + if "r" in handle.buffer.mode: + handles.append(handle) + files = handle.buffer.getnames() + if len(files) == 1: + file = handle.buffer.extractfile(files[0]) + assert file is not None + handle = file + elif not files: + raise ValueError(f"Zero files found in TAR archive {path_or_buf}") + else: + raise ValueError( + "Multiple files found in TAR archive. " + f"Only one file per TAR archive: {files}" + ) + + # XZ Compression + elif compression == "xz": + # error: Argument 1 to "LZMAFile" has incompatible type "Union[str, + # BaseBuffer]"; expected "Optional[Union[Union[str, bytes, PathLike[str], + # PathLike[bytes]], IO[bytes]]]" + handle = get_lzma_file()(handle, ioargs.mode) # type: ignore[arg-type] + + # Zstd Compression + elif compression == "zstd": + zstd = import_optional_dependency("zstandard") + if "r" in ioargs.mode: + open_args = {"dctx": zstd.ZstdDecompressor(**compression_args)} + else: + open_args = {"cctx": zstd.ZstdCompressor(**compression_args)} + handle = zstd.open( + handle, + mode=ioargs.mode, + **open_args, + ) + + # Unrecognized Compression + else: + msg = f"Unrecognized compression type: {compression}" + raise ValueError(msg) + + assert not isinstance(handle, str) + handles.append(handle) + + elif isinstance(handle, str): + # Check whether the filename is to be opened in binary mode. + # Binary mode does not support 'encoding' and 'newline'. + if ioargs.encoding and "b" not in ioargs.mode: + # Encoding + handle = open( + handle, + ioargs.mode, + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + else: + # Binary mode + handle = open(handle, ioargs.mode) + handles.append(handle) + + # Convert BytesIO or file objects passed with an encoding + is_wrapped = False + if not is_text and ioargs.mode == "rb" and isinstance(handle, TextIOBase): + # not added to handles as it does not open/buffer resources + handle = _BytesIOWrapper( + handle, + encoding=ioargs.encoding, + ) + elif is_text and ( + compression or memory_map or _is_binary_mode(handle, ioargs.mode) + ): + if ( + not hasattr(handle, "readable") + or not hasattr(handle, "writable") + or not hasattr(handle, "seekable") + ): + handle = _IOWrapper(handle) + # error: Argument 1 to "TextIOWrapper" has incompatible type + # "_IOWrapper"; expected "IO[bytes]" + handle = TextIOWrapper( + handle, # type: ignore[arg-type] + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + handles.append(handle) + # only marked as wrapped when the caller provided a handle + is_wrapped = not ( + isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close + ) + + if "r" in ioargs.mode and not hasattr(handle, "read"): + raise TypeError( + "Expected file path name or file-like object, " + f"got {type(ioargs.filepath_or_buffer)} type" + ) + + handles.reverse() # close the most recently added buffer first + if ioargs.should_close: + assert not isinstance(ioargs.filepath_or_buffer, str) + handles.append(ioargs.filepath_or_buffer) + + return IOHandles( + # error: Argument "handle" to "IOHandles" has incompatible type + # "Union[TextIOWrapper, GzipFile, BaseBuffer, typing.IO[bytes], + # typing.IO[Any]]"; expected "pandas._typing.IO[Any]" + handle=handle, # type: ignore[arg-type] + # error: Argument "created_handles" to "IOHandles" has incompatible type + # "List[BaseBuffer]"; expected "List[Union[IO[bytes], IO[str]]]" + created_handles=handles, # type: ignore[arg-type] + is_wrapped=is_wrapped, + compression=ioargs.compression, + ) + + +# error: Definition of "__enter__" in base class "IOBase" is incompatible +# with definition in base class "BinaryIO" +class _BufferedWriter(BytesIO, ABC): # type: ignore[misc] + """ + Some objects do not support multiple .write() calls (TarFile and ZipFile). + This wrapper writes to the underlying buffer on close. + """ + + @abstractmethod + def write_to_buffer(self) -> None: + ... + + def close(self) -> None: + if self.closed: + # already closed + return + if self.getvalue(): + # write to buffer + self.seek(0) + # error: "_BufferedWriter" has no attribute "buffer" + with self.buffer: # type: ignore[attr-defined] + self.write_to_buffer() + else: + # error: "_BufferedWriter" has no attribute "buffer" + self.buffer.close() # type: ignore[attr-defined] + super().close() + + +class _BytesTarFile(_BufferedWriter): + def __init__( + self, + name: str | None = None, + mode: Literal["r", "a", "w", "x"] = "r", + fileobj: ReadBuffer[bytes] | WriteBuffer[bytes] | None = None, + archive_name: str | None = None, + **kwargs, + ) -> None: + super().__init__() + self.archive_name = archive_name + self.name = name + # error: Argument "fileobj" to "open" of "TarFile" has incompatible + # type "Union[ReadBuffer[bytes], WriteBuffer[bytes], None]"; expected + # "Optional[IO[bytes]]" + self.buffer = tarfile.TarFile.open( + name=name, + mode=self.extend_mode(mode), + fileobj=fileobj, # type: ignore[arg-type] + **kwargs, + ) + + def extend_mode(self, mode: str) -> str: + mode = mode.replace("b", "") + if mode != "w": + return mode + if self.name is not None: + suffix = Path(self.name).suffix + if suffix in (".gz", ".xz", ".bz2"): + mode = f"{mode}:{suffix[1:]}" + return mode + + def infer_filename(self) -> str | None: + """ + If an explicit archive_name is not given, we still want the file inside the zip + file not to be named something.tar, because that causes confusion (GH39465). + """ + if self.name is None: + return None + + filename = Path(self.name) + if filename.suffix == ".tar": + return filename.with_suffix("").name + elif filename.suffix in (".tar.gz", ".tar.bz2", ".tar.xz"): + return filename.with_suffix("").with_suffix("").name + return filename.name + + def write_to_buffer(self) -> None: + # TarFile needs a non-empty string + archive_name = self.archive_name or self.infer_filename() or "tar" + tarinfo = tarfile.TarInfo(name=archive_name) + tarinfo.size = len(self.getvalue()) + self.buffer.addfile(tarinfo, self) + + +class _BytesZipFile(_BufferedWriter): + def __init__( + self, + file: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], + mode: str, + archive_name: str | None = None, + **kwargs, + ) -> None: + super().__init__() + mode = mode.replace("b", "") + self.archive_name = archive_name + + kwargs.setdefault("compression", zipfile.ZIP_DEFLATED) + # error: Argument 1 to "ZipFile" has incompatible type "Union[ + # Union[str, PathLike[str]], ReadBuffer[bytes], WriteBuffer[bytes]]"; + # expected "Union[Union[str, PathLike[str]], IO[bytes]]" + self.buffer = zipfile.ZipFile(file, mode, **kwargs) # type: ignore[arg-type] + + def infer_filename(self) -> str | None: + """ + If an explicit archive_name is not given, we still want the file inside the zip + file not to be named something.zip, because that causes confusion (GH39465). + """ + if isinstance(self.buffer.filename, (os.PathLike, str)): + filename = Path(self.buffer.filename) + if filename.suffix == ".zip": + return filename.with_suffix("").name + return filename.name + return None + + def write_to_buffer(self) -> None: + # ZipFile needs a non-empty string + archive_name = self.archive_name or self.infer_filename() or "zip" + self.buffer.writestr(archive_name, self.getvalue()) + + +class _IOWrapper: + # TextIOWrapper is overly strict: it request that the buffer has seekable, readable, + # and writable. If we have a read-only buffer, we shouldn't need writable and vice + # versa. Some buffers, are seek/read/writ-able but they do not have the "-able" + # methods, e.g., tempfile.SpooledTemporaryFile. + # If a buffer does not have the above "-able" methods, we simple assume they are + # seek/read/writ-able. + def __init__(self, buffer: BaseBuffer) -> None: + self.buffer = buffer + + def __getattr__(self, name: str): + return getattr(self.buffer, name) + + def readable(self) -> bool: + if hasattr(self.buffer, "readable"): + # error: "BaseBuffer" has no attribute "readable" + return self.buffer.readable() # type: ignore[attr-defined] + return True + + def seekable(self) -> bool: + if hasattr(self.buffer, "seekable"): + return self.buffer.seekable() + return True + + def writable(self) -> bool: + if hasattr(self.buffer, "writable"): + # error: "BaseBuffer" has no attribute "writable" + return self.buffer.writable() # type: ignore[attr-defined] + return True + + +class _BytesIOWrapper: + # Wrapper that wraps a StringIO buffer and reads bytes from it + # Created for compat with pyarrow read_csv + def __init__(self, buffer: StringIO | TextIOBase, encoding: str = "utf-8") -> None: + self.buffer = buffer + self.encoding = encoding + # Because a character can be represented by more than 1 byte, + # it is possible that reading will produce more bytes than n + # We store the extra bytes in this overflow variable, and append the + # overflow to the front of the bytestring the next time reading is performed + self.overflow = b"" + + def __getattr__(self, attr: str): + return getattr(self.buffer, attr) + + def read(self, n: int | None = -1) -> bytes: + assert self.buffer is not None + bytestring = self.buffer.read(n).encode(self.encoding) + # When n=-1/n greater than remaining bytes: Read entire file/rest of file + combined_bytestring = self.overflow + bytestring + if n is None or n < 0 or n >= len(combined_bytestring): + self.overflow = b"" + return combined_bytestring + else: + to_return = combined_bytestring[:n] + self.overflow = combined_bytestring[n:] + return to_return + + +def _maybe_memory_map( + handle: str | BaseBuffer, memory_map: bool +) -> tuple[str | BaseBuffer, bool, list[BaseBuffer]]: + """Try to memory map file/buffer.""" + handles: list[BaseBuffer] = [] + memory_map &= hasattr(handle, "fileno") or isinstance(handle, str) + if not memory_map: + return handle, memory_map, handles + + # need to open the file first + if isinstance(handle, str): + handle = open(handle, "rb") + handles.append(handle) + + try: + # open mmap and adds *-able + # error: Argument 1 to "_IOWrapper" has incompatible type "mmap"; + # expected "BaseBuffer" + wrapped = _IOWrapper( + mmap.mmap( + handle.fileno(), 0, access=mmap.ACCESS_READ # type: ignore[arg-type] + ) + ) + finally: + for handle in reversed(handles): + # error: "BaseBuffer" has no attribute "close" + handle.close() # type: ignore[attr-defined] + + return wrapped, memory_map, [wrapped] + + +def file_exists(filepath_or_buffer: FilePath | BaseBuffer) -> bool: + """Test whether file exists.""" + exists = False + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + return exists + try: + exists = os.path.exists(filepath_or_buffer) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + return exists + + +def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool: + """Whether the handle is opened in binary mode""" + # specified by user + if "t" in mode or "b" in mode: + return "b" in mode + + # exceptions + text_classes = ( + # classes that expect string but have 'b' in mode + codecs.StreamWriter, + codecs.StreamReader, + codecs.StreamReaderWriter, + ) + if issubclass(type(handle), text_classes): + return False + + return isinstance(handle, _get_binary_io_classes()) or "b" in getattr( + handle, "mode", mode + ) + + +@functools.lru_cache +def _get_binary_io_classes() -> tuple[type, ...]: + """IO classes that that expect bytes""" + binary_classes: tuple[type, ...] = (BufferedIOBase, RawIOBase) + + # python-zstandard doesn't use any of the builtin base classes; instead we + # have to use the `zstd.ZstdDecompressionReader` class for isinstance checks. + # Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard + # so we have to get it from a `zstd.ZstdDecompressor` instance. + # See also https://github.com/indygreg/python-zstandard/pull/165. + zstd = import_optional_dependency("zstandard", errors="ignore") + if zstd is not None: + with zstd.ZstdDecompressor().stream_reader(b"") as reader: + binary_classes += (type(reader),) + + return binary_classes diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py new file mode 100644 index 00000000..85e92da8 --- /dev/null +++ b/pandas/io/date_converters.py @@ -0,0 +1,131 @@ +"""This module is designed for community supported date conversion functions""" +from __future__ import annotations + +import warnings + +import numpy as np + +from pandas._libs.tslibs import parsing +from pandas._typing import npt +from pandas.util._exceptions import find_stack_level + + +def parse_date_time(date_col, time_col) -> npt.NDArray[np.object_]: + """ + Parse columns with dates and times into a single datetime column. + + .. deprecated:: 1.2 + """ + warnings.warn( + """ + Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series. + Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + date_col = _maybe_cast(date_col) + time_col = _maybe_cast(time_col) + return parsing.try_parse_date_and_time(date_col, time_col) + + +def parse_date_fields(year_col, month_col, day_col) -> npt.NDArray[np.object_]: + """ + Parse columns with years, months and days into a single date column. + + .. deprecated:: 1.2 + """ + warnings.warn( + """ + Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series. + Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and + np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + return parsing.try_parse_year_month_day(year_col, month_col, day_col) + + +def parse_all_fields( + year_col, month_col, day_col, hour_col, minute_col, second_col +) -> npt.NDArray[np.object_]: + """ + Parse columns with datetime information into a single datetime column. + + .. deprecated:: 1.2 + """ + + warnings.warn( + """ + Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, + "hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series. + Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, + "hour": hour_col, "minute": minute_col, second": second_col}) and + np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. +""", # noqa: E501 + FutureWarning, + stacklevel=find_stack_level(), + ) + + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + hour_col = _maybe_cast(hour_col) + minute_col = _maybe_cast(minute_col) + second_col = _maybe_cast(second_col) + return parsing.try_parse_datetime_components( + year_col, month_col, day_col, hour_col, minute_col, second_col + ) + + +def generic_parser(parse_func, *cols) -> np.ndarray: + """ + Use dateparser to parse columns with data information into a single datetime column. + + .. deprecated:: 1.2 + """ + + warnings.warn( + """ + Use pd.to_datetime instead. +""", + FutureWarning, + stacklevel=find_stack_level(), + ) + + N = _check_columns(cols) + results = np.empty(N, dtype=object) + + for i in range(N): + args = [c[i] for c in cols] + results[i] = parse_func(*args) + + return results + + +def _maybe_cast(arr: np.ndarray) -> np.ndarray: + if not arr.dtype.type == np.object_: + arr = np.array(arr, dtype=object) + return arr + + +def _check_columns(cols) -> int: + if not len(cols): + raise AssertionError("There must be at least 1 column") + + head, tail = cols[0], cols[1:] + + N = len(head) + + for i, n in enumerate(map(len, tail)): + if n != N: + raise AssertionError( + f"All columns must have the same length: {N}; column {i} has length {n}" + ) + + return N diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py new file mode 100644 index 00000000..854e2a1e --- /dev/null +++ b/pandas/io/excel/__init__.py @@ -0,0 +1,24 @@ +from pandas.io.excel._base import ( + ExcelFile, + ExcelWriter, + read_excel, +) +from pandas.io.excel._odswriter import ODSWriter as _ODSWriter +from pandas.io.excel._openpyxl import OpenpyxlWriter as _OpenpyxlWriter +from pandas.io.excel._util import register_writer +from pandas.io.excel._xlsxwriter import XlsxWriter as _XlsxWriter +from pandas.io.excel._xlwt import XlwtWriter as _XlwtWriter + +__all__ = ["read_excel", "ExcelWriter", "ExcelFile"] + + +register_writer(_OpenpyxlWriter) + + +register_writer(_XlwtWriter) + + +register_writer(_XlsxWriter) + + +register_writer(_ODSWriter) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py new file mode 100644 index 00000000..4b7cd1a9 --- /dev/null +++ b/pandas/io/excel/_base.py @@ -0,0 +1,1781 @@ +from __future__ import annotations + +import abc +import datetime +from functools import partial +from io import BytesIO +import os +from textwrap import fill +from typing import ( + IO, + Any, + Callable, + Hashable, + Iterable, + List, + Literal, + Mapping, + Sequence, + Union, + cast, + overload, +) +import warnings +import zipfile + +from pandas._config import config + +from pandas._libs.parsers import STR_NA_VALUES +from pandas._typing import ( + DtypeArg, + FilePath, + IntStrT, + ReadBuffer, + StorageOptions, + WriteExcelBuffer, +) +from pandas.compat._optional import ( + get_version, + import_optional_dependency, +) +from pandas.errors import EmptyDataError +from pandas.util._decorators import ( + Appender, + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_bool, + is_float, + is_integer, + is_list_like, +) + +from pandas.core.frame import DataFrame +from pandas.core.shared_docs import _shared_docs +from pandas.util.version import Version + +from pandas.io.common import ( + IOHandles, + get_handle, + stringify_path, + validate_header_arg, +) +from pandas.io.excel._util import ( + fill_mi_header, + get_default_engine, + get_writer, + maybe_convert_usecols, + pop_header_name, +) +from pandas.io.parsers import TextParser +from pandas.io.parsers.readers import validate_integer + +_read_excel_doc = ( + """ +Read an Excel file into a pandas DataFrame. + +Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions +read from a local filesystem or URL. Supports an option to read +a single sheet or a list of sheets. + +Parameters +---------- +io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.xlsx``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. +sheet_name : str, int, list, or None, default 0 + Strings are used for sheet names. Integers are used in zero-indexed + sheet positions (chart sheets do not count as a sheet position). + Lists of strings/integers are used to request multiple sheets. + Specify None to get all worksheets. + + Available cases: + + * Defaults to ``0``: 1st sheet as a `DataFrame` + * ``1``: 2nd sheet as a `DataFrame` + * ``"Sheet1"``: Load sheet with name "Sheet1" + * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" + as a dict of `DataFrame` + * None: All worksheets. + +header : int, list of int, default 0 + Row (0-indexed) to use for the column labels of the parsed + DataFrame. If a list of integers is passed those row positions will + be combined into a ``MultiIndex``. Use None if there is no header. +names : array-like, default None + List of column names to use. If file contains no header row, + then you should explicitly pass header=None. +index_col : int, list of int, default None + Column (0-indexed) to use as the row labels of the DataFrame. + Pass None if there is no such column. If a list is passed, + those columns will be combined into a ``MultiIndex``. If a + subset of data is selected with ``usecols``, index_col + is based on the subset. + + Missing values will be forward filled to allow roundtripping with + ``to_excel`` for ``merged_cells=True``. To avoid forward filling the + missing values use ``set_index`` after reading the data instead of + ``index_col``. +usecols : str, list-like, or callable, default None + * If None, then parse all columns. + * If str, then indicates comma separated list of Excel column letters + and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of + both sides. + * If list of int, then indicates list of column numbers to be parsed + (0-indexed). + * If list of string, then indicates list of column names to be parsed. + * If callable, then evaluate each column name against it and parse the + column if the callable returns ``True``. + + Returns a subset of the columns according to behavior above. +squeeze : bool, default False + If the parsed data only contains one column then return a Series. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze + the data. +dtype : Type name or dict of column -> type, default None + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32}} + Use `object` to preserve data as stored in Excel and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. +engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb". + Engine compatibility : + + - "xlrd" supports old-style Excel files (.xls). + - "openpyxl" supports newer Excel file formats. + - "odf" supports OpenDocument file formats (.odf, .ods, .odt). + - "pyxlsb" supports Binary Excel files. + + .. versionchanged:: 1.2.0 + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: + + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, + ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, + ``pyxlsb`` will be used. + + .. versionadded:: 1.3.0 + - Otherwise ``openpyxl`` will be used. + + .. versionchanged:: 1.3.0 + +converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the Excel cell content, and return the transformed + content. +true_values : list, default None + Values to consider as True. +false_values : list, default None + Values to consider as False. +skiprows : list-like, int, or callable, optional + Line numbers to skip (0-indexed) or number of lines to skip (int) at the + start of the file. If callable, the callable function will be evaluated + against the row indices, returning True if the row should be skipped and + False otherwise. An example of a valid callable argument would be ``lambda + x: x in [0, 2]``. +nrows : int, default None + Number of rows to parse. +na_values : scalar, str, list-like, or dict, default None + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted + as NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +parse_dates : bool, list-like, or dict, default False + The behavior is as follows: + + * bool. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index contains an unparsable date, the entire column or + index will be returned unaltered as an object data type. If you don`t want to + parse some cells as date just change their type in Excel to "Text". + For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. + + Note: A fast-path exists for iso8601-formatted dates. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +thousands : str, default None + Thousands separator for parsing string columns to numeric. Note that + this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format. +decimal : str, default '.' + Character to recognize as decimal point for parsing string columns to numeric. + Note that this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format.(e.g. use ',' for European data). + + .. versionadded:: 1.4.0 + +comment : str, default None + Comments out remainder of line. Pass a character or characters to this + argument to indicate comments in the input file. Any data between the + comment string and the end of the current line is ignored. +skipfooter : int, default 0 + Rows at the end to skip (0-indexed). +convert_float : bool, default True + Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric + data will be read in as floats: Excel stores all numbers as floats + internally. + + .. deprecated:: 1.3.0 + convert_float will be removed in a future version + +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. + + .. deprecated:: 1.5.0 + Not implemented, and a new argument to specify the pattern for the + names of duplicated columns will be added instead + +{storage_options} + + .. versionadded:: 1.2.0 + +Returns +------- +DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. See notes in sheet_name + argument for more information on when a dict of DataFrames is returned. + +See Also +-------- +DataFrame.to_excel : Write DataFrame to an Excel file. +DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +The file can be read using the file name as string or an open file object: + +>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP + Name Value +0 string1 1 +1 string2 2 +2 #Comment 3 + +>>> pd.read_excel(open('tmp.xlsx', 'rb'), +... sheet_name='Sheet3') # doctest: +SKIP + Unnamed: 0 Name Value +0 0 string1 1 +1 1 string2 2 +2 2 #Comment 3 + +Index and header can be specified via the `index_col` and `header` arguments + +>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP + 0 1 2 +0 NaN Name Value +1 0.0 string1 1 +2 1.0 string2 2 +3 2.0 #Comment 3 + +Column types are inferred but can be explicitly specified + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... dtype={{'Name': str, 'Value': float}}) # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 #Comment 3.0 + +True, False, and NA values, and thousands separators have defaults, +but can be explicitly specified, too. Supply the values you would like +as strings or lists of strings! + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... na_values=['string1', 'string2']) # doctest: +SKIP + Name Value +0 NaN 1 +1 NaN 2 +2 #Comment 3 + +Comment lines in the excel input file can be skipped using the `comment` kwarg + +>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 None NaN +""" +) + + +@overload +def read_excel( + io, + # sheet name is str or int -> DataFrame + sheet_name: str | int = ..., + header: int | Sequence[int] | None = ..., + names: list[str] | None = ..., + index_col: int | Sequence[int] | None = ..., + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = ..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters: dict[str, Callable] | dict[int, Callable] | None = ..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates: list | dict | bool = ..., + date_parser: Callable | None = ..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +@overload +def read_excel( + io, + # sheet name is list or None -> dict[IntStrT, DataFrame] + sheet_name: list[IntStrT] | None, + header: int | Sequence[int] | None = ..., + names: list[str] | None = ..., + index_col: int | Sequence[int] | None = ..., + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = ..., + squeeze: bool | None = ..., + dtype: DtypeArg | None = ..., + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., + converters: dict[str, Callable] | dict[int, Callable] | None = ..., + true_values: Iterable[Hashable] | None = ..., + false_values: Iterable[Hashable] | None = ..., + skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + parse_dates: list | dict | bool = ..., + date_parser: Callable | None = ..., + thousands: str | None = ..., + decimal: str = ..., + comment: str | None = ..., + skipfooter: int = ..., + convert_float: bool | None = ..., + mangle_dupe_cols: bool = ..., + storage_options: StorageOptions = ..., +) -> dict[IntStrT, DataFrame]: + ... + + +@doc(storage_options=_shared_docs["storage_options"]) +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) +@deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0") +@Appender(_read_excel_doc) +def read_excel( + io, + sheet_name: str | int | list[IntStrT] | None = 0, + header: int | Sequence[int] | None = 0, + names: list[str] | None = None, + index_col: int | Sequence[int] | None = None, + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = None, + squeeze: bool | None = None, + dtype: DtypeArg | None = None, + engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None, + converters: dict[str, Callable] | dict[int, Callable] | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + keep_default_na: bool = True, + na_filter: bool = True, + verbose: bool = False, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + storage_options: StorageOptions = None, +) -> DataFrame | dict[IntStrT, DataFrame]: + + should_close = False + if not isinstance(io, ExcelFile): + should_close = True + io = ExcelFile(io, storage_options=storage_options, engine=engine) + elif engine and engine != io.engine: + raise ValueError( + "Engine should not be specified when passing " + "an ExcelFile - ExcelFile already has the engine set" + ) + + try: + data = io.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + dtype=dtype, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + keep_default_na=keep_default_na, + na_filter=na_filter, + verbose=verbose, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + ) + finally: + # make sure to close opened file handles + if should_close: + io.close() + return data + + +class BaseExcelReader(metaclass=abc.ABCMeta): + def __init__( + self, filepath_or_buffer, storage_options: StorageOptions = None + ) -> None: + # First argument can also be bytes, so create a buffer + if isinstance(filepath_or_buffer, bytes): + filepath_or_buffer = BytesIO(filepath_or_buffer) + + self.handles = IOHandles( + handle=filepath_or_buffer, compression={"method": None} + ) + if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): + self.handles = get_handle( + filepath_or_buffer, "rb", storage_options=storage_options, is_text=False + ) + + if isinstance(self.handles.handle, self._workbook_class): + self.book = self.handles.handle + elif hasattr(self.handles.handle, "read"): + # N.B. xlrd.Book has a read attribute too + self.handles.handle.seek(0) + try: + self.book = self.load_workbook(self.handles.handle) + except Exception: + self.close() + raise + else: + raise ValueError( + "Must explicitly set engine if not passing in buffer or path for io." + ) + + @property + @abc.abstractmethod + def _workbook_class(self): + pass + + @abc.abstractmethod + def load_workbook(self, filepath_or_buffer): + pass + + def close(self) -> None: + if hasattr(self, "book"): + if hasattr(self.book, "close"): + # pyxlsb: opens a TemporaryFile + # openpyxl: https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + elif hasattr(self.book, "release_resources"): + # xlrd + # https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548 + self.book.release_resources() + self.handles.close() + + @property + @abc.abstractmethod + def sheet_names(self) -> list[str]: + pass + + @abc.abstractmethod + def get_sheet_by_name(self, name: str): + pass + + @abc.abstractmethod + def get_sheet_by_index(self, index: int): + pass + + @abc.abstractmethod + def get_sheet_data(self, sheet, convert_float: bool, rows: int | None = None): + pass + + def raise_if_bad_sheet_by_index(self, index: int) -> None: + n_sheets = len(self.sheet_names) + if index >= n_sheets: + raise ValueError( + f"Worksheet index {index} is invalid, {n_sheets} worksheets found" + ) + + def raise_if_bad_sheet_by_name(self, name: str) -> None: + if name not in self.sheet_names: + raise ValueError(f"Worksheet named '{name}' not found") + + def _check_skiprows_func( + self, + skiprows: Callable, + rows_to_use: int, + ) -> int: + """ + Determine how many file rows are required to obtain `nrows` data + rows when `skiprows` is a function. + + Parameters + ---------- + skiprows : function + The function passed to read_excel by the user. + rows_to_use : int + The number of rows that will be needed for the header and + the data. + + Returns + ------- + int + """ + i = 0 + rows_used_so_far = 0 + while rows_used_so_far < rows_to_use: + if not skiprows(i): + rows_used_so_far += 1 + i += 1 + return i + + def _calc_rows( + self, + header: int | Sequence[int] | None, + index_col: int | Sequence[int] | None, + skiprows: Sequence[int] | int | Callable[[int], object] | None, + nrows: int | None, + ) -> int | None: + """ + If nrows specified, find the number of rows needed from the + file, otherwise return None. + + + Parameters + ---------- + header : int, list of int, or None + See read_excel docstring. + index_col : int, list of int, or None + See read_excel docstring. + skiprows : list-like, int, callable, or None + See read_excel docstring. + nrows : int or None + See read_excel docstring. + + Returns + ------- + int or None + """ + if nrows is None: + return None + if header is None: + header_rows = 1 + elif is_integer(header): + header = cast(int, header) + header_rows = 1 + header + else: + header = cast(Sequence, header) + header_rows = 1 + header[-1] + # If there is a MultiIndex header and an index then there is also + # a row containing just the index name(s) + if is_list_like(header) and index_col is not None: + header = cast(Sequence, header) + if len(header) > 1: + header_rows += 1 + if skiprows is None: + return header_rows + nrows + if is_integer(skiprows): + skiprows = cast(int, skiprows) + return header_rows + nrows + skiprows + if is_list_like(skiprows): + + def f(skiprows: Sequence, x: int) -> bool: + return x in skiprows + + skiprows = cast(Sequence, skiprows) + return self._check_skiprows_func(partial(f, skiprows), header_rows + nrows) + if callable(skiprows): + return self._check_skiprows_func( + skiprows, + header_rows + nrows, + ) + # else unexpected skiprows type: read_excel will not optimize + # the number of rows read from file + return None + + def parse( + self, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, + names=None, + index_col: int | Sequence[int] | None = None, + usecols=None, + squeeze: bool | None = None, + dtype: DtypeArg | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + verbose: bool = False, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + **kwds, + ): + + if convert_float is None: + convert_float = True + else: + warnings.warn( + "convert_float is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + validate_header_arg(header) + validate_integer("nrows", nrows) + + ret_dict = False + + # Keep sheetname to maintain backwards compatibility. + sheets: list[int] | list[str] + if isinstance(sheet_name, list): + sheets = sheet_name + ret_dict = True + elif sheet_name is None: + sheets = self.sheet_names + ret_dict = True + elif isinstance(sheet_name, str): + sheets = [sheet_name] + else: + sheets = [sheet_name] + + # handle same-type duplicates. + sheets = cast(Union[List[int], List[str]], list(dict.fromkeys(sheets).keys())) + + output = {} + + for asheetname in sheets: + if verbose: + print(f"Reading sheet {asheetname}") + + if isinstance(asheetname, str): + sheet = self.get_sheet_by_name(asheetname) + else: # assume an integer if not a string + sheet = self.get_sheet_by_index(asheetname) + + file_rows_needed = self._calc_rows(header, index_col, skiprows, nrows) + data = self.get_sheet_data(sheet, convert_float, file_rows_needed) + if hasattr(sheet, "close"): + # pyxlsb opens two TemporaryFiles + sheet.close() + usecols = maybe_convert_usecols(usecols) + + if not data: + output[asheetname] = DataFrame() + continue + + is_list_header = False + is_len_one_list_header = False + if is_list_like(header): + assert isinstance(header, Sequence) + is_list_header = True + if len(header) == 1: + is_len_one_list_header = True + + if is_len_one_list_header: + header = cast(Sequence[int], header)[0] + + # forward fill and pull out names for MultiIndex column + header_names = None + if header is not None and is_list_like(header): + assert isinstance(header, Sequence) + + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + assert isinstance(skiprows, int) + row += skiprows + + if row > len(data) - 1: + raise ValueError( + f"header index {row} exceeds maximum index " + f"{len(data) - 1} of data.", + ) + + data[row], control_row = fill_mi_header(data[row], control_row) + + if index_col is not None: + header_name, _ = pop_header_name(data[row], index_col) + header_names.append(header_name) + + # If there is a MultiIndex header and an index then there is also + # a row containing just the index name(s) + has_index_names = False + if is_list_header and not is_len_one_list_header and index_col is not None: + + index_col_list: Sequence[int] + if isinstance(index_col, int): + index_col_list = [index_col] + else: + assert isinstance(index_col, Sequence) + index_col_list = index_col + + # We have to handle mi without names. If any of the entries in the data + # columns are not empty, this is a regular row + assert isinstance(header, Sequence) + if len(header) < len(data): + potential_index_names = data[len(header)] + potential_data = [ + x + for i, x in enumerate(potential_index_names) + if not control_row[i] and i not in index_col_list + ] + has_index_names = all(x == "" or x is None for x in potential_data) + + if is_list_like(index_col): + # Forward fill values for MultiIndex index. + if header is None: + offset = 0 + elif isinstance(header, int): + offset = 1 + header + else: + offset = 1 + max(header) + + # GH34673: if MultiIndex names present and not defined in the header, + # offset needs to be incremented so that forward filling starts + # from the first MI value instead of the name + if has_index_names: + offset += 1 + + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + assert isinstance(index_col, Sequence) + + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == "" or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] + + # GH 12292 : error when read one empty column from excel file + try: + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + squeeze=squeeze, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + skip_blank_lines=False, # GH 39808 + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + output[asheetname] = parser.read(nrows=nrows) + + if not squeeze or isinstance(output[asheetname], DataFrame): + if header_names: + output[asheetname].columns = output[ + asheetname + ].columns.set_names(header_names) + + except EmptyDataError: + # No Data, return an empty DataFrame + output[asheetname] = DataFrame() + + if ret_dict: + return output + else: + return output[asheetname] + + +@doc(storage_options=_shared_docs["storage_options"]) +class ExcelWriter(metaclass=abc.ABCMeta): + """ + Class for writing DataFrame objects into excel sheets. + + Default is to use: + + * `xlwt `__ for xls files + * `xlsxwriter `__ for xlsx files if xlsxwriter + is installed otherwise `openpyxl `__ + * `odswriter `__ for ods files + + See ``DataFrame.to_excel`` for typical usage. + + The writer should be used as a context manager. Otherwise, call `close()` to save + and close any opened file handles. + + Parameters + ---------- + path : str or typing.BinaryIO + Path to xls or xlsx or ods file. + engine : str (optional) + Engine to use for writing. If None, defaults to + ``io.excel..writer``. NOTE: can only be passed as a keyword + argument. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future + version of pandas. + + date_format : str, default None + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). + datetime_format : str, default None + Format string for datetime objects written into Excel files. + (e.g. 'YYYY-MM-DD HH:MM:SS'). + mode : {{'w', 'a'}}, default 'w' + File mode to use (write or append). Append does not work with fsspec URLs. + {storage_options} + + .. versionadded:: 1.2.0 + + if_sheet_exists : {{'error', 'new', 'replace', 'overlay'}}, default 'error' + How to behave when trying to write to a sheet that already + exists (append mode only). + + * error: raise a ValueError. + * new: Create a new sheet, with a name determined by the engine. + * replace: Delete the contents of the sheet before writing to it. + * overlay: Write contents to the existing sheet without removing the old + contents. + + .. versionadded:: 1.3.0 + + .. versionchanged:: 1.4.0 + + Added ``overlay`` option + + engine_kwargs : dict, optional + Keyword arguments to be passed into the engine. These will be passed to + the following functions of the respective engines: + + * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)`` + * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)`` + * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)`` + * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` + + .. versionadded:: 1.3.0 + **kwargs : dict, optional + Keyword arguments to be passed into the engine. + + .. deprecated:: 1.3.0 + + Use engine_kwargs instead. + + Notes + ----- + For compatibility with CSV writers, ExcelWriter serializes lists + and dicts to strings before writing. + + Examples + -------- + Default usage: + + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: + ... df.to_excel(writer) # doctest: +SKIP + + To write to separate sheets in a single file: + + >>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) # doctest: +SKIP + >>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: + ... df1.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP + ... df2.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP + + You can set the date format or datetime format: + + >>> from datetime import date, datetime # doctest: +SKIP + >>> df = pd.DataFrame( + ... [ + ... [date(2014, 1, 31), date(1999, 9, 24)], + ... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ... ], + ... index=["Date", "Datetime"], + ... columns=["X", "Y"], + ... ) # doctest: +SKIP + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... date_format="YYYY-MM-DD", + ... datetime_format="YYYY-MM-DD HH:MM:SS" + ... ) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + You can also append to an existing Excel file: + + >>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer: + ... df.to_excel(writer, sheet_name="Sheet3") # doctest: +SKIP + + Here, the `if_sheet_exists` parameter can be set to replace a sheet if it + already exists: + + >>> with ExcelWriter( + ... "path_to_file.xlsx", + ... mode="a", + ... engine="openpyxl", + ... if_sheet_exists="replace", + ... ) as writer: + ... df.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP + + You can also write multiple DataFrames to a single sheet. Note that the + ``if_sheet_exists`` parameter needs to be set to ``overlay``: + + >>> with ExcelWriter("path_to_file.xlsx", + ... mode="a", + ... engine="openpyxl", + ... if_sheet_exists="overlay", + ... ) as writer: + ... df1.to_excel(writer, sheet_name="Sheet1") + ... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) # doctest: +SKIP + + You can store Excel file in RAM: + + >>> import io + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) + >>> buffer = io.BytesIO() + >>> with pd.ExcelWriter(buffer) as writer: + ... df.to_excel(writer) + + You can pack Excel file into zip archive: + + >>> import zipfile # doctest: +SKIP + >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP + >>> with zipfile.ZipFile("path_to_file.zip", "w") as zf: + ... with zf.open("filename.xlsx", "w") as buffer: + ... with pd.ExcelWriter(buffer) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + You can specify additional arguments to the underlying engine: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="xlsxwriter", + ... engine_kwargs={{"options": {{"nan_inf_to_errors": True}}}} + ... ) as writer: + ... df.to_excel(writer) # doctest: +SKIP + + In append mode, ``engine_kwargs`` are passed through to + openpyxl's ``load_workbook``: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="openpyxl", + ... mode="a", + ... engine_kwargs={{"keep_vba": True}} + ... ) as writer: + ... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP + """ + + # Defining an ExcelWriter implementation (see abstract methods for more...) + + # - Mandatory + # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` + # --> called to write additional DataFrames to disk + # - ``_supported_extensions`` (tuple of supported extensions), used to + # check that engine supports the given extension. + # - ``_engine`` - string that gives the engine name. Necessary to + # instantiate class directly and bypass ``ExcelWriterMeta`` engine + # lookup. + # - ``save(self)`` --> called to save file to disk + # - Mostly mandatory (i.e. should at least exist) + # - book, cur_sheet, path + + # - Optional: + # - ``__init__(self, path, engine=None, **kwargs)`` --> always called + # with path as first argument. + + # You also need to register the class with ``register_writer()``. + # Technically, ExcelWriter implementations don't need to subclass + # ExcelWriter. + + _engine: str + _supported_extensions: tuple[str, ...] + + def __new__( + cls: type[ExcelWriter], + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None, + engine_kwargs: dict | None = None, + **kwargs, + ) -> ExcelWriter: + if kwargs: + if engine_kwargs is not None: + raise ValueError("Cannot use both engine_kwargs and **kwargs") + warnings.warn( + "Use of **kwargs is deprecated, use engine_kwargs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # only switch class if generic(ExcelWriter) + if cls is ExcelWriter: + if engine is None or (isinstance(engine, str) and engine == "auto"): + if isinstance(path, str): + ext = os.path.splitext(path)[-1][1:] + else: + ext = "xlsx" + + try: + engine = config.get_option(f"io.excel.{ext}.writer", silent=True) + if engine == "auto": + engine = get_default_engine(ext, mode="writer") + except KeyError as err: + raise ValueError(f"No engine for filetype: '{ext}'") from err + + if engine == "xlwt": + xls_config_engine = config.get_option( + "io.excel.xls.writer", silent=True + ) + # Don't warn a 2nd time if user has changed the default engine for xls + if xls_config_engine != "xlwt": + warnings.warn( + "As the xlwt package is no longer maintained, the xlwt " + "engine will be removed in a future version of pandas. " + "This is the only engine in pandas that supports writing " + "in the xls format. Install openpyxl and write to an xlsx " + "file instead. You can set the option io.excel.xls.writer " + "to 'xlwt' to silence this warning. While this option is " + "deprecated and will also raise a warning, it can " + "be globally set and the warning suppressed.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + # for mypy + assert engine is not None + cls = get_writer(engine) + + return object.__new__(cls) + + # declare external properties you can count on + _path = None + + @property + def supported_extensions(self) -> tuple[str, ...]: + """Extensions that writer engine supports.""" + return self._supported_extensions + + @property + def engine(self) -> str: + """Name of engine.""" + return self._engine + + @property + @abc.abstractmethod + def sheets(self) -> dict[str, Any]: + """Mapping of sheet names to sheet objects.""" + pass + + # mypy doesn't handle abstract setters prior to 0.981 + # https://github.com/python/mypy/issues/4165 + @property # type: ignore[misc] + @abc.abstractmethod + def book(self): + """ + Book instance. Class type will depend on the engine used. + + This attribute can be used to access engine-specific features. + """ + pass + + # mypy doesn't handle abstract setters prior to 0.981 + # https://github.com/python/mypy/issues/4165 + @book.setter # type: ignore[misc] + @abc.abstractmethod + def book(self, other) -> None: + """ + Set book instance. Class type will depend on the engine used. + """ + pass + + def write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + """ + Write given formatted cells into Excel an excel sheet + + .. deprecated:: 1.5.0 + + Parameters + ---------- + cells : generator + cell of formatted data to save to Excel sheet + sheet_name : str, default None + Name of Excel sheet, if None, then use self.cur_sheet + startrow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + freeze_panes: int tuple of length 2 + contains the bottom-most row and right-most column to freeze + """ + self._deprecate("write_cells") + return self._write_cells(cells, sheet_name, startrow, startcol, freeze_panes) + + @abc.abstractmethod + def _write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + """ + Write given formatted cells into Excel an excel sheet + + Parameters + ---------- + cells : generator + cell of formatted data to save to Excel sheet + sheet_name : str, default None + Name of Excel sheet, if None, then use self.cur_sheet + startrow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + freeze_panes: int tuple of length 2 + contains the bottom-most row and right-most column to freeze + """ + pass + + def save(self) -> None: + """ + Save workbook to disk. + + .. deprecated:: 1.5.0 + """ + self._deprecate("save") + return self._save() + + @abc.abstractmethod + def _save(self) -> None: + """ + Save workbook to disk. + """ + pass + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ) -> None: + # validate that this engine can handle the extension + if isinstance(path, str): + ext = os.path.splitext(path)[-1] + self.check_extension(ext) + + # use mode to open the file + if "b" not in mode: + mode += "b" + # use "a" for the user to append data to excel but internally use "r+" to let + # the excel backend first read the existing file and then write any data to it + mode = mode.replace("a", "r+") + + # cast ExcelWriter to avoid adding 'if self.handles is not None' + self._handles = IOHandles( + cast(IO[bytes], path), compression={"compression": None} + ) + if not isinstance(path, ExcelWriter): + self._handles = get_handle( + path, mode, storage_options=storage_options, is_text=False + ) + self._cur_sheet = None + + if date_format is None: + self._date_format = "YYYY-MM-DD" + else: + self._date_format = date_format + if datetime_format is None: + self._datetime_format = "YYYY-MM-DD HH:MM:SS" + else: + self._datetime_format = datetime_format + + self._mode = mode + + if if_sheet_exists not in (None, "error", "new", "replace", "overlay"): + raise ValueError( + f"'{if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new', 'replace' and 'overlay'." + ) + if if_sheet_exists and "r+" not in mode: + raise ValueError("if_sheet_exists is only valid in append mode (mode='a')") + if if_sheet_exists is None: + if_sheet_exists = "error" + self._if_sheet_exists = if_sheet_exists + + def _deprecate(self, attr: str): + """ + Deprecate attribute or method for ExcelWriter. + """ + warnings.warn( + f"{attr} is not part of the public API, usage can give unexpected " + "results and will be removed in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + + def _deprecate_set_book(self) -> None: + """ + Deprecate setting the book attribute - GH#48780. + """ + warnings.warn( + "Setting the `book` attribute is not part of the public API, " + "usage can give unexpected or corrupted results and will be " + "removed in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + + @property + def date_format(self) -> str: + """ + Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + """ + return self._date_format + + @property + def datetime_format(self) -> str: + """ + Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + """ + return self._datetime_format + + @property + def if_sheet_exists(self) -> str: + """ + How to behave when writing to a sheet that already exists in append mode. + """ + return self._if_sheet_exists + + @property + def cur_sheet(self): + """ + Current sheet for writing. + + .. deprecated:: 1.5.0 + """ + self._deprecate("cur_sheet") + return self._cur_sheet + + @property + def handles(self) -> IOHandles[bytes]: + """ + Handles to Excel sheets. + + .. deprecated:: 1.5.0 + """ + self._deprecate("handles") + return self._handles + + @property + def path(self): + """ + Path to Excel file. + + .. deprecated:: 1.5.0 + """ + self._deprecate("path") + return self._path + + def __fspath__(self) -> str: + return getattr(self._handles.handle, "name", "") + + def _get_sheet_name(self, sheet_name: str | None) -> str: + if sheet_name is None: + sheet_name = self._cur_sheet + if sheet_name is None: # pragma: no cover + raise ValueError("Must pass explicit sheet_name or set _cur_sheet property") + return sheet_name + + def _value_with_fmt(self, val) -> tuple[object, str | None]: + """ + Convert numpy types to Python types for the Excel writers. + + Parameters + ---------- + val : object + Value to be written into cells + + Returns + ------- + Tuple with the first element being the converted value and the second + being an optional format + """ + fmt = None + + if is_integer(val): + val = int(val) + elif is_float(val): + val = float(val) + elif is_bool(val): + val = bool(val) + elif isinstance(val, datetime.datetime): + fmt = self._datetime_format + elif isinstance(val, datetime.date): + fmt = self._date_format + elif isinstance(val, datetime.timedelta): + val = val.total_seconds() / 86400 + fmt = "0" + else: + val = str(val) + + return val, fmt + + @classmethod + def check_extension(cls, ext: str) -> Literal[True]: + """ + checks that path's extension against the Writer's supported + extensions. If it isn't supported, raises UnsupportedFiletypeError. + """ + if ext.startswith("."): + ext = ext[1:] + if not any(ext in extension for extension in cls._supported_extensions): + raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") + else: + return True + + # Allow use as a contextmanager + def __enter__(self) -> ExcelWriter: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + def close(self) -> None: + """synonym for save, to make it more file-like""" + self._save() + self._handles.close() + + +XLS_SIGNATURES = ( + b"\x09\x00\x04\x00\x07\x00\x10\x00", # BIFF2 + b"\x09\x02\x06\x00\x00\x00\x10\x00", # BIFF3 + b"\x09\x04\x06\x00\x00\x00\x10\x00", # BIFF4 + b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", # Compound File Binary +) +ZIP_SIGNATURE = b"PK\x03\x04" +PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE,))) + + +@doc(storage_options=_shared_docs["storage_options"]) +def inspect_excel_format( + content_or_path: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, +) -> str | None: + """ + Inspect the path or content of an excel file and get its format. + + Adopted from xlrd: https://github.com/python-excel/xlrd. + + Parameters + ---------- + content_or_path : str or file-like object + Path to file or content of file to inspect. May be a URL. + {storage_options} + + Returns + ------- + str or None + Format of file if it can be determined. + + Raises + ------ + ValueError + If resulting stream is empty. + BadZipFile + If resulting stream does not have an XLS signature and is not a valid zipfile. + """ + if isinstance(content_or_path, bytes): + content_or_path = BytesIO(content_or_path) + + with get_handle( + content_or_path, "rb", storage_options=storage_options, is_text=False + ) as handle: + stream = handle.handle + stream.seek(0) + buf = stream.read(PEEK_SIZE) + if buf is None: + raise ValueError("stream is empty") + else: + assert isinstance(buf, bytes) + peek = buf + stream.seek(0) + + if any(peek.startswith(sig) for sig in XLS_SIGNATURES): + return "xls" + elif not peek.startswith(ZIP_SIGNATURE): + return None + + with zipfile.ZipFile(stream) as zf: + # Workaround for some third party files that use forward slashes and + # lower case names. + component_names = [ + name.replace("\\", "/").lower() for name in zf.namelist() + ] + + if "xl/workbook.xml" in component_names: + return "xlsx" + if "xl/workbook.bin" in component_names: + return "xlsb" + if "content.xml" in component_names: + return "ods" + return "zip" + + +class ExcelFile: + """ + Class for parsing tabular excel sheets into DataFrame objects. + + See read_excel for more documentation. + + Parameters + ---------- + path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath), + a file-like object, xlrd workbook or openpyxl workbook. + If a string or path object, expected to be a path to a + .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. + engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` + Engine compatibility : + + - ``xlrd`` supports old-style Excel files (.xls). + - ``openpyxl`` supports newer Excel file formats. + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. + + .. versionchanged:: 1.2.0 + + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: + + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, + ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, + `pyxlsb `_ will be used. + + .. versionadded:: 1.3.0 + - Otherwise if `openpyxl `_ is installed, + then ``openpyxl`` will be used. + - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. + - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. + This case will raise a ``ValueError`` in a future version of pandas. + + .. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. + """ + + from pandas.io.excel._odfreader import ODFReader + from pandas.io.excel._openpyxl import OpenpyxlReader + from pandas.io.excel._pyxlsb import PyxlsbReader + from pandas.io.excel._xlrd import XlrdReader + + _engines: Mapping[str, Any] = { + "xlrd": XlrdReader, + "openpyxl": OpenpyxlReader, + "odf": ODFReader, + "pyxlsb": PyxlsbReader, + } + + def __init__( + self, + path_or_buffer, + engine: str | None = None, + storage_options: StorageOptions = None, + ) -> None: + if engine is not None and engine not in self._engines: + raise ValueError(f"Unknown engine: {engine}") + + # First argument can also be bytes, so create a buffer + if isinstance(path_or_buffer, bytes): + path_or_buffer = BytesIO(path_or_buffer) + + # Could be a str, ExcelFile, Book, etc. + self.io = path_or_buffer + # Always a string + self._io = stringify_path(path_or_buffer) + + # Determine xlrd version if installed + if import_optional_dependency("xlrd", errors="ignore") is None: + xlrd_version = None + else: + import xlrd + + xlrd_version = Version(get_version(xlrd)) + + ext = None + if engine is None: + # Only determine ext if it is needed + if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): + ext = "xls" + else: + ext = inspect_excel_format( + content_or_path=path_or_buffer, storage_options=storage_options + ) + if ext is None: + raise ValueError( + "Excel file format cannot be determined, you must specify " + "an engine manually." + ) + + engine = config.get_option(f"io.excel.{ext}.reader", silent=True) + if engine == "auto": + engine = get_default_engine(ext, mode="reader") + + if engine == "xlrd" and xlrd_version is not None: + if ext is None: + # Need ext to determine ext in order to raise/warn + if isinstance(path_or_buffer, xlrd.Book): + ext = "xls" + else: + ext = inspect_excel_format( + path_or_buffer, storage_options=storage_options + ) + + # Pass through if ext is None, otherwise check if ext valid for xlrd + if ext and ext != "xls" and xlrd_version >= Version("2"): + raise ValueError( + f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " + f"only the xls format is supported. Install openpyxl instead." + ) + elif ext and ext != "xls": + stacklevel = find_stack_level() + warnings.warn( + f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " + f"only the xls format is supported. Install " + f"openpyxl instead.", + FutureWarning, + stacklevel=stacklevel, + ) + + assert engine is not None + self.engine = engine + self.storage_options = storage_options + + self._reader = self._engines[engine](self._io, storage_options=storage_options) + + def __fspath__(self): + return self._io + + def parse( + self, + sheet_name: str | int | list[int] | list[str] | None = 0, + header: int | Sequence[int] | None = 0, + names=None, + index_col: int | Sequence[int] | None = None, + usecols=None, + squeeze: bool | None = None, + converters=None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + na_values=None, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, + thousands: str | None = None, + comment: str | None = None, + skipfooter: int = 0, + convert_float: bool | None = None, + mangle_dupe_cols: bool = True, + **kwds, + ) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]: + """ + Parse specified sheet(s) into a DataFrame. + + Equivalent to read_excel(ExcelFile, ...) See the read_excel + docstring for more info on accepted parameters. + + Returns + ------- + DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. + """ + return self._reader.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + @property + def book(self): + return self._reader.book + + @property + def sheet_names(self): + return self._reader.sheet_names + + def close(self) -> None: + """close io if necessary""" + self._reader.close() + + def __enter__(self) -> ExcelFile: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + def __del__(self) -> None: + # Ensure we don't leak file descriptors, but put in try/except in case + # attributes are already deleted + try: + self.close() + except AttributeError: + pass diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py new file mode 100644 index 00000000..075590f3 --- /dev/null +++ b/pandas/io/excel/_odfreader.py @@ -0,0 +1,251 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + cast, +) + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +import pandas as pd +from pandas.core.shared_docs import _shared_docs + +from pandas.io.excel._base import BaseExcelReader + +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + + +@doc(storage_options=_shared_docs["storage_options"]) +class ODFReader(BaseExcelReader): + """ + Read tables out of OpenDocument formatted files. + + Parameters + ---------- + filepath_or_buffer : str, path to be parsed or + an open readable stream. + {storage_options} + """ + + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ) -> None: + import_optional_dependency("odf") + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from odf.opendocument import OpenDocument + + return OpenDocument + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from odf.opendocument import load + + return load(filepath_or_buffer) + + @property + def empty_value(self) -> str: + """Property for compat with other readers.""" + return "" + + @property + def sheet_names(self) -> list[str]: + """Return a list of sheet names present in the document""" + from odf.table import Table + + tables = self.book.getElementsByType(Table) + return [t.getAttribute("name") for t in tables] + + def get_sheet_by_index(self, index: int): + from odf.table import Table + + self.raise_if_bad_sheet_by_index(index) + tables = self.book.getElementsByType(Table) + return tables[index] + + def get_sheet_by_name(self, name: str): + from odf.table import Table + + self.raise_if_bad_sheet_by_name(name) + tables = self.book.getElementsByType(Table) + + for table in tables: + if table.getAttribute("name") == name: + return table + + self.close() + raise ValueError(f"sheet {name} not found") + + def get_sheet_data( + self, sheet, convert_float: bool, file_rows_needed: int | None = None + ) -> list[list[Scalar | NaTType]]: + """ + Parse an ODF Table into a list of lists + """ + from odf.table import ( + CoveredTableCell, + TableCell, + TableRow, + ) + + covered_cell_name = CoveredTableCell().qname + table_cell_name = TableCell().qname + cell_names = {covered_cell_name, table_cell_name} + + sheet_rows = sheet.getElementsByType(TableRow) + empty_rows = 0 + max_row_len = 0 + + table: list[list[Scalar | NaTType]] = [] + + for sheet_row in sheet_rows: + sheet_cells = [ + x + for x in sheet_row.childNodes + if hasattr(x, "qname") and x.qname in cell_names + ] + empty_cells = 0 + table_row: list[Scalar | NaTType] = [] + + for sheet_cell in sheet_cells: + if sheet_cell.qname == table_cell_name: + value = self._get_cell_value(sheet_cell, convert_float) + else: + value = self.empty_value + + column_repeat = self._get_column_repeat(sheet_cell) + + # Queue up empty values, writing only if content succeeds them + if value == self.empty_value: + empty_cells += column_repeat + else: + table_row.extend([self.empty_value] * empty_cells) + empty_cells = 0 + table_row.extend([value] * column_repeat) + + if max_row_len < len(table_row): + max_row_len = len(table_row) + + row_repeat = self._get_row_repeat(sheet_row) + if self._is_empty_row(sheet_row): + empty_rows += row_repeat + else: + # add blank rows to our table + table.extend([[self.empty_value]] * empty_rows) + empty_rows = 0 + for _ in range(row_repeat): + table.append(table_row) + if file_rows_needed is not None and len(table) >= file_rows_needed: + break + + # Make our table square + for row in table: + if len(row) < max_row_len: + row.extend([self.empty_value] * (max_row_len - len(row))) + + return table + + def _get_row_repeat(self, row) -> int: + """ + Return number of times this row was repeated + Repeating an empty row appeared to be a common way + of representing sparse rows in the table. + """ + from odf.namespaces import TABLENS + + return int(row.attributes.get((TABLENS, "number-rows-repeated"), 1)) + + def _get_column_repeat(self, cell) -> int: + from odf.namespaces import TABLENS + + return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) + + def _is_empty_row(self, row) -> bool: + """ + Helper function to find empty rows + """ + for column in row.childNodes: + if len(column.childNodes) > 0: + return False + + return True + + def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: + from odf.namespaces import OFFICENS + + if str(cell) == "#N/A": + return np.nan + + cell_type = cell.attributes.get((OFFICENS, "value-type")) + if cell_type == "boolean": + if str(cell) == "TRUE": + return True + return False + if cell_type is None: + return self.empty_value + elif cell_type == "float": + # GH5394 + cell_value = float(cell.attributes.get((OFFICENS, "value"))) + if convert_float: + val = int(cell_value) + if val == cell_value: + return val + return cell_value + elif cell_type == "percentage": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "string": + return self._get_cell_string_value(cell) + elif cell_type == "currency": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "date": + cell_value = cell.attributes.get((OFFICENS, "date-value")) + return pd.to_datetime(cell_value) + elif cell_type == "time": + stamp = pd.to_datetime(str(cell)) + # cast needed here because Scalar doesn't include datetime.time + return cast(Scalar, stamp.time()) + else: + self.close() + raise ValueError(f"Unrecognized type {cell_type}") + + def _get_cell_string_value(self, cell) -> str: + """ + Find and decode OpenDocument text:s tags that represent + a run length encoded sequence of space characters. + """ + from odf.element import Element + from odf.namespaces import TEXTNS + from odf.text import S + + text_s = S().qname + + value = [] + + for fragment in cell.childNodes: + if isinstance(fragment, Element): + if fragment.qname == text_s: + spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) + value.append(" " * spaces) + else: + # recursive impl needed in case of nested fragments + # with multiple spaces + # https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704 + value.append(self._get_cell_string_value(fragment)) + else: + value.append(str(fragment).strip("\n")) + return "".join(value) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py new file mode 100644 index 00000000..5603c601 --- /dev/null +++ b/pandas/io/excel/_odswriter.py @@ -0,0 +1,347 @@ +from __future__ import annotations + +from collections import defaultdict +import datetime +from typing import ( + TYPE_CHECKING, + Any, + DefaultDict, + Tuple, + cast, +) + +import pandas._libs.json as json +from pandas._typing import ( + FilePath, + StorageOptions, + WriteExcelBuffer, +) + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from odf.opendocument import OpenDocumentSpreadsheet + + from pandas.io.formats.excel import ExcelCell + + +class ODSWriter(ExcelWriter): + _engine = "odf" + _supported_extensions = (".ods",) + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format=None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ) -> None: + from odf.opendocument import OpenDocumentSpreadsheet + + if mode == "a": + raise ValueError("Append mode is not supported with odf!") + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + self._book = OpenDocumentSpreadsheet(**engine_kwargs) + self._style_dict: dict[str, str] = {} + + @property + def book(self): + """ + Book instance of class odf.opendocument.OpenDocumentSpreadsheet. + + This attribute can be used to access engine-specific features. + """ + return self._book + + @book.setter + def book(self, other: OpenDocumentSpreadsheet) -> None: + """ + Set book instance. Class type will depend on the engine used. + """ + self._deprecate_set_book() + self._book = other + + @property + def sheets(self) -> dict[str, Any]: + """Mapping of sheet names to sheet objects.""" + from odf.table import Table + + result = { + sheet.getAttribute("name"): sheet + for sheet in self.book.getElementsByType(Table) + } + return result + + def _save(self) -> None: + """ + Save workbook to disk. + """ + for sheet in self.sheets.values(): + self.book.spreadsheet.addElement(sheet) + self.book.save(self._handles.handle) + + def _write_cells( + self, + cells: list[ExcelCell], + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + """ + Write the frame cells using odf + """ + from odf.table import ( + Table, + TableCell, + TableRow, + ) + from odf.text import P + + sheet_name = self._get_sheet_name(sheet_name) + assert sheet_name is not None + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = Table(name=sheet_name) + self.book.spreadsheet.addElement(wks) + + if validate_freeze_panes(freeze_panes): + freeze_panes = cast(Tuple[int, int], freeze_panes) + self._create_freeze_panes(sheet_name, freeze_panes) + + for _ in range(startrow): + wks.addElement(TableRow()) + + rows: DefaultDict = defaultdict(TableRow) + col_count: DefaultDict = defaultdict(int) + + for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): + # only add empty cells if the row is still empty + if not col_count[cell.row]: + for _ in range(startcol): + rows[cell.row].addElement(TableCell()) + + # fill with empty cells if needed + for _ in range(cell.col - col_count[cell.row]): + rows[cell.row].addElement(TableCell()) + col_count[cell.row] += 1 + + pvalue, tc = self._make_table_cell(cell) + rows[cell.row].addElement(tc) + col_count[cell.row] += 1 + p = P(text=pvalue) + tc.addElement(p) + + # add all rows to the sheet + if len(rows) > 0: + for row_nr in range(max(rows.keys()) + 1): + wks.addElement(rows[row_nr]) + + def _make_table_cell_attributes(self, cell) -> dict[str, int | str]: + """Convert cell attributes to OpenDocument attributes + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + attributes : Dict[str, Union[int, str]] + Dictionary with attributes and attribute values + """ + attributes: dict[str, int | str] = {} + style_name = self._process_style(cell.style) + if style_name is not None: + attributes["stylename"] = style_name + if cell.mergestart is not None and cell.mergeend is not None: + attributes["numberrowsspanned"] = max(1, cell.mergestart) + attributes["numbercolumnsspanned"] = cell.mergeend + return attributes + + def _make_table_cell(self, cell) -> tuple[object, Any]: + """Convert cell data to an OpenDocument spreadsheet cell + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + pvalue, cell : Tuple[str, TableCell] + Display value, Cell value + """ + from odf.table import TableCell + + attributes = self._make_table_cell_attributes(cell) + val, fmt = self._value_with_fmt(cell.val) + pvalue = value = val + if isinstance(val, bool): + value = str(val).lower() + pvalue = str(val).upper() + if isinstance(val, datetime.datetime): + # Fast formatting + value = val.isoformat() + # Slow but locale-dependent + pvalue = val.strftime("%c") + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) + elif isinstance(val, datetime.date): + # Fast formatting + value = f"{val.year}-{val.month:02d}-{val.day:02d}" + # Slow but locale-dependent + pvalue = val.strftime("%x") + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) + else: + class_to_cell_type = { + str: "string", + int: "float", + float: "float", + bool: "boolean", + } + return ( + pvalue, + TableCell( + valuetype=class_to_cell_type[type(val)], + value=value, + attributes=attributes, + ), + ) + + def _process_style(self, style: dict[str, Any]) -> str: + """Convert a style dictionary to a OpenDocument style sheet + + Parameters + ---------- + style : Dict + Style dictionary + + Returns + ------- + style_key : str + Unique style key for later reference in sheet + """ + from odf.style import ( + ParagraphProperties, + Style, + TableCellProperties, + TextProperties, + ) + + if style is None: + return None + style_key = json.dumps(style) + if style_key in self._style_dict: + return self._style_dict[style_key] + name = f"pd{len(self._style_dict)+1}" + self._style_dict[style_key] = name + odf_style = Style(name=name, family="table-cell") + if "font" in style: + font = style["font"] + if font.get("bold", False): + odf_style.addElement(TextProperties(fontweight="bold")) + if "borders" in style: + borders = style["borders"] + for side, thickness in borders.items(): + thickness_translation = {"thin": "0.75pt solid #000000"} + odf_style.addElement( + TableCellProperties( + attributes={f"border{side}": thickness_translation[thickness]} + ) + ) + if "alignment" in style: + alignment = style["alignment"] + horizontal = alignment.get("horizontal") + if horizontal: + odf_style.addElement(ParagraphProperties(textalign=horizontal)) + vertical = alignment.get("vertical") + if vertical: + odf_style.addElement(TableCellProperties(verticalalign=vertical)) + self.book.styles.addElement(odf_style) + return name + + def _create_freeze_panes( + self, sheet_name: str, freeze_panes: tuple[int, int] + ) -> None: + """ + Create freeze panes in the sheet. + + Parameters + ---------- + sheet_name : str + Name of the spreadsheet + freeze_panes : tuple of (int, int) + Freeze pane location x and y + """ + from odf.config import ( + ConfigItem, + ConfigItemMapEntry, + ConfigItemMapIndexed, + ConfigItemMapNamed, + ConfigItemSet, + ) + + config_item_set = ConfigItemSet(name="ooo:view-settings") + self.book.settings.addElement(config_item_set) + + config_item_map_indexed = ConfigItemMapIndexed(name="Views") + config_item_set.addElement(config_item_map_indexed) + + config_item_map_entry = ConfigItemMapEntry() + config_item_map_indexed.addElement(config_item_map_entry) + + config_item_map_named = ConfigItemMapNamed(name="Tables") + config_item_map_entry.addElement(config_item_map_named) + + config_item_map_entry = ConfigItemMapEntry(name=sheet_name) + config_item_map_named.addElement(config_item_map_entry) + + config_item_map_entry.addElement( + ConfigItem(name="HorizontalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem(name="VerticalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem( + name="HorizontalSplitPosition", type="int", text=str(freeze_panes[0]) + ) + ) + config_item_map_entry.addElement( + ConfigItem( + name="VerticalSplitPosition", type="int", text=str(freeze_panes[1]) + ) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionRight", type="int", text=str(freeze_panes[0])) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionBottom", type="int", text=str(freeze_panes[1])) + ) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py new file mode 100644 index 00000000..6fde319b --- /dev/null +++ b/pandas/io/excel/_openpyxl.py @@ -0,0 +1,639 @@ +from __future__ import annotations + +import mmap +from typing import ( + TYPE_CHECKING, + Any, + Tuple, + cast, +) + +import numpy as np + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, + WriteExcelBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.excel._base import ( + BaseExcelReader, + ExcelWriter, +) +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from openpyxl.descriptors.serialisable import Serialisable + from openpyxl.workbook import Workbook + + +class OpenpyxlWriter(ExcelWriter): + _engine = "openpyxl" + _supported_extensions = (".xlsx", ".xlsm") + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ) -> None: + # Use the openpyxl module as the Excel writer. + from openpyxl.workbook import Workbook + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from + # the file and later write to it + if "r+" in self._mode: # Load from existing workbook + from openpyxl import load_workbook + + self._book = load_workbook(self._handles.handle, **engine_kwargs) + self._handles.handle.seek(0) + else: + # Create workbook object with default optimized_write=True. + self._book = Workbook(**engine_kwargs) + + if self.book.worksheets: + self.book.remove(self.book.worksheets[0]) + + @property + def book(self) -> Workbook: + """ + Book instance of class openpyxl.workbook.Workbook. + + This attribute can be used to access engine-specific features. + """ + return self._book + + @book.setter + def book(self, other: Workbook) -> None: + """ + Set book instance. Class type will depend on the engine used. + """ + self._deprecate_set_book() + self._book = other + + @property + def sheets(self) -> dict[str, Any]: + """Mapping of sheet names to sheet objects.""" + result = {name: self.book[name] for name in self.book.sheetnames} + return result + + def _save(self) -> None: + """ + Save workbook to disk. + """ + self.book.save(self._handles.handle) + if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap): + # truncate file to the written content + self._handles.handle.truncate() + + @classmethod + def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: + """ + Convert a style_dict to a set of kwargs suitable for initializing + or updating-on-copy an openpyxl v2 style object. + + Parameters + ---------- + style_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'font' + 'fill' + 'border' ('borders') + 'alignment' + 'number_format' + 'protection' + + Returns + ------- + style_kwargs : dict + A dict with the same, normalized keys as ``style_dict`` but each + value has been replaced with a native openpyxl style object of the + appropriate class. + """ + _style_key_map = {"borders": "border"} + + style_kwargs: dict[str, Serialisable] = {} + for k, v in style_dict.items(): + if k in _style_key_map: + k = _style_key_map[k] + _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) + new_v = _conv_to_x(v) + if new_v: + style_kwargs[k] = new_v + + return style_kwargs + + @classmethod + def _convert_to_color(cls, color_spec): + """ + Convert ``color_spec`` to an openpyxl v2 Color object. + + Parameters + ---------- + color_spec : str, dict + A 32-bit ARGB hex string, or a dict with zero or more of the + following keys. + 'rgb' + 'indexed' + 'auto' + 'theme' + 'tint' + 'index' + 'type' + + Returns + ------- + color : openpyxl.styles.Color + """ + from openpyxl.styles import Color + + if isinstance(color_spec, str): + return Color(color_spec) + else: + return Color(**color_spec) + + @classmethod + def _convert_to_font(cls, font_dict): + """ + Convert ``font_dict`` to an openpyxl v2 Font object. + + Parameters + ---------- + font_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'name' + 'size' ('sz') + 'bold' ('b') + 'italic' ('i') + 'underline' ('u') + 'strikethrough' ('strike') + 'color' + 'vertAlign' ('vertalign') + 'charset' + 'scheme' + 'family' + 'outline' + 'shadow' + 'condense' + + Returns + ------- + font : openpyxl.styles.Font + """ + from openpyxl.styles import Font + + _font_key_map = { + "sz": "size", + "b": "bold", + "i": "italic", + "u": "underline", + "strike": "strikethrough", + "vertalign": "vertAlign", + } + + font_kwargs = {} + for k, v in font_dict.items(): + if k in _font_key_map: + k = _font_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + font_kwargs[k] = v + + return Font(**font_kwargs) + + @classmethod + def _convert_to_stop(cls, stop_seq): + """ + Convert ``stop_seq`` to a list of openpyxl v2 Color objects, + suitable for initializing the ``GradientFill`` ``stop`` parameter. + + Parameters + ---------- + stop_seq : iterable + An iterable that yields objects suitable for consumption by + ``_convert_to_color``. + + Returns + ------- + stop : list of openpyxl.styles.Color + """ + return map(cls._convert_to_color, stop_seq) + + @classmethod + def _convert_to_fill(cls, fill_dict: dict[str, Any]): + """ + Convert ``fill_dict`` to an openpyxl v2 Fill object. + + Parameters + ---------- + fill_dict : dict + A dict with one or more of the following keys (or their synonyms), + 'fill_type' ('patternType', 'patterntype') + 'start_color' ('fgColor', 'fgcolor') + 'end_color' ('bgColor', 'bgcolor') + or one or more of the following keys (or their synonyms). + 'type' ('fill_type') + 'degree' + 'left' + 'right' + 'top' + 'bottom' + 'stop' + + Returns + ------- + fill : openpyxl.styles.Fill + """ + from openpyxl.styles import ( + GradientFill, + PatternFill, + ) + + _pattern_fill_key_map = { + "patternType": "fill_type", + "patterntype": "fill_type", + "fgColor": "start_color", + "fgcolor": "start_color", + "bgColor": "end_color", + "bgcolor": "end_color", + } + + _gradient_fill_key_map = {"fill_type": "type"} + + pfill_kwargs = {} + gfill_kwargs = {} + for k, v in fill_dict.items(): + pk = gk = None + if k in _pattern_fill_key_map: + pk = _pattern_fill_key_map[k] + if k in _gradient_fill_key_map: + gk = _gradient_fill_key_map[k] + if pk in ["start_color", "end_color"]: + v = cls._convert_to_color(v) + if gk == "stop": + v = cls._convert_to_stop(v) + if pk: + pfill_kwargs[pk] = v + elif gk: + gfill_kwargs[gk] = v + else: + pfill_kwargs[k] = v + gfill_kwargs[k] = v + + try: + return PatternFill(**pfill_kwargs) + except TypeError: + return GradientFill(**gfill_kwargs) + + @classmethod + def _convert_to_side(cls, side_spec): + """ + Convert ``side_spec`` to an openpyxl v2 Side object. + + Parameters + ---------- + side_spec : str, dict + A string specifying the border style, or a dict with zero or more + of the following keys (or their synonyms). + 'style' ('border_style') + 'color' + + Returns + ------- + side : openpyxl.styles.Side + """ + from openpyxl.styles import Side + + _side_key_map = {"border_style": "style"} + + if isinstance(side_spec, str): + return Side(style=side_spec) + + side_kwargs = {} + for k, v in side_spec.items(): + if k in _side_key_map: + k = _side_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + side_kwargs[k] = v + + return Side(**side_kwargs) + + @classmethod + def _convert_to_border(cls, border_dict): + """ + Convert ``border_dict`` to an openpyxl v2 Border object. + + Parameters + ---------- + border_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'left' + 'right' + 'top' + 'bottom' + 'diagonal' + 'diagonal_direction' + 'vertical' + 'horizontal' + 'diagonalUp' ('diagonalup') + 'diagonalDown' ('diagonaldown') + 'outline' + + Returns + ------- + border : openpyxl.styles.Border + """ + from openpyxl.styles import Border + + _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} + + border_kwargs = {} + for k, v in border_dict.items(): + if k in _border_key_map: + k = _border_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + if k in ["left", "right", "top", "bottom", "diagonal"]: + v = cls._convert_to_side(v) + border_kwargs[k] = v + + return Border(**border_kwargs) + + @classmethod + def _convert_to_alignment(cls, alignment_dict): + """ + Convert ``alignment_dict`` to an openpyxl v2 Alignment object. + + Parameters + ---------- + alignment_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'horizontal' + 'vertical' + 'text_rotation' + 'wrap_text' + 'shrink_to_fit' + 'indent' + Returns + ------- + alignment : openpyxl.styles.Alignment + """ + from openpyxl.styles import Alignment + + return Alignment(**alignment_dict) + + @classmethod + def _convert_to_number_format(cls, number_format_dict): + """ + Convert ``number_format_dict`` to an openpyxl v2.1.0 number format + initializer. + + Parameters + ---------- + number_format_dict : dict + A dict with zero or more of the following keys. + 'format_code' : str + + Returns + ------- + number_format : str + """ + return number_format_dict["format_code"] + + @classmethod + def _convert_to_protection(cls, protection_dict): + """ + Convert ``protection_dict`` to an openpyxl v2 Protection object. + + Parameters + ---------- + protection_dict : dict + A dict with zero or more of the following keys. + 'locked' + 'hidden' + + Returns + ------- + """ + from openpyxl.styles import Protection + + return Protection(**protection_dict) + + def _write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + # Write the frame cells using openpyxl. + sheet_name = self._get_sheet_name(sheet_name) + + _style_cache: dict[str, dict[str, Serialisable]] = {} + + if sheet_name in self.sheets and self._if_sheet_exists != "new": + if "r+" in self._mode: + if self._if_sheet_exists == "replace": + old_wks = self.sheets[sheet_name] + target_index = self.book.index(old_wks) + del self.book[sheet_name] + wks = self.book.create_sheet(sheet_name, target_index) + elif self._if_sheet_exists == "error": + raise ValueError( + f"Sheet '{sheet_name}' already exists and " + f"if_sheet_exists is set to 'error'." + ) + elif self._if_sheet_exists == "overlay": + wks = self.sheets[sheet_name] + else: + raise ValueError( + f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new', 'replace' and 'overlay'." + ) + else: + wks = self.sheets[sheet_name] + else: + wks = self.book.create_sheet() + wks.title = sheet_name + + if validate_freeze_panes(freeze_panes): + freeze_panes = cast(Tuple[int, int], freeze_panes) + wks.freeze_panes = wks.cell( + row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 + ) + + for cell in cells: + xcell = wks.cell( + row=startrow + cell.row + 1, column=startcol + cell.col + 1 + ) + xcell.value, fmt = self._value_with_fmt(cell.val) + if fmt: + xcell.number_format = fmt + + style_kwargs: dict[str, Serialisable] | None = {} + if cell.style: + key = str(cell.style) + style_kwargs = _style_cache.get(key) + if style_kwargs is None: + style_kwargs = self._convert_to_style_kwargs(cell.style) + _style_cache[key] = style_kwargs + + if style_kwargs: + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + if cell.mergestart is not None and cell.mergeend is not None: + + wks.merge_cells( + start_row=startrow + cell.row + 1, + start_column=startcol + cell.col + 1, + end_column=startcol + cell.mergeend + 1, + end_row=startrow + cell.mergestart + 1, + ) + + # When cells are merged only the top-left cell is preserved + # The behaviour of the other cells in a merged range is + # undefined + if style_kwargs: + first_row = startrow + cell.row + 1 + last_row = startrow + cell.mergestart + 1 + first_col = startcol + cell.col + 1 + last_col = startcol + cell.mergeend + 1 + + for row in range(first_row, last_row + 1): + for col in range(first_col, last_col + 1): + if row == first_row and col == first_col: + # Ignore first cell. It is already handled. + continue + xcell = wks.cell(column=col, row=row) + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + +class OpenpyxlReader(BaseExcelReader): + @doc(storage_options=_shared_docs["storage_options"]) + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ) -> None: + """ + Reader using openpyxl engine. + + Parameters + ---------- + filepath_or_buffer : str, path object or Workbook + Object to be parsed. + {storage_options} + """ + import_optional_dependency("openpyxl") + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from openpyxl import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from openpyxl import load_workbook + + return load_workbook( + filepath_or_buffer, read_only=True, data_only=True, keep_links=False + ) + + @property + def sheet_names(self) -> list[str]: + return [sheet.title for sheet in self.book.worksheets] + + def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) + return self.book[name] + + def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) + return self.book.worksheets[index] + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + + from openpyxl.cell.cell import ( + TYPE_ERROR, + TYPE_NUMERIC, + ) + + if cell.value is None: + return "" # compat with xlrd + elif cell.data_type == TYPE_ERROR: + return np.nan + elif cell.data_type == TYPE_NUMERIC: + # GH5394, GH46988 + if convert_float: + val = int(cell.value) + if val == cell.value: + return val + else: + return float(cell.value) + + return cell.value + + def get_sheet_data( + self, sheet, convert_float: bool, file_rows_needed: int | None = None + ) -> list[list[Scalar]]: + + if self.book.read_only: + sheet.reset_dimensions() + + data: list[list[Scalar]] = [] + last_row_with_data = -1 + for row_number, row in enumerate(sheet.rows): + converted_row = [self._convert_cell(cell, convert_float) for cell in row] + while converted_row and converted_row[-1] == "": + # trim trailing empty elements + converted_row.pop() + if converted_row: + last_row_with_data = row_number + data.append(converted_row) + if file_rows_needed is not None and len(data) >= file_rows_needed: + break + + # Trim trailing empty rows + data = data[: last_row_with_data + 1] + + if len(data) > 0: + # extend rows to max width + max_width = max(len(data_row) for data_row in data) + if min(len(data_row) for data_row in data) < max_width: + empty_cell: list[Scalar] = [""] + data = [ + data_row + (max_width - len(data_row)) * empty_cell + for data_row in data + ] + + return data diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py new file mode 100644 index 00000000..5d40ccdf --- /dev/null +++ b/pandas/io/excel/_pyxlsb.py @@ -0,0 +1,113 @@ +# pyright: reportMissingImports=false +from __future__ import annotations + +from pandas._typing import ( + FilePath, + ReadBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.excel._base import BaseExcelReader + + +class PyxlsbReader(BaseExcelReader): + @doc(storage_options=_shared_docs["storage_options"]) + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + storage_options: StorageOptions = None, + ) -> None: + """ + Reader using pyxlsb engine. + + Parameters + ---------- + filepath_or_buffer : str, path object, or Workbook + Object to be parsed. + {storage_options} + """ + import_optional_dependency("pyxlsb") + # This will call load_workbook on the filepath or buffer + # And set the result to the book-attribute + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from pyxlsb import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + from pyxlsb import open_workbook + + # TODO: hack in buffer capability + # This might need some modifications to the Pyxlsb library + # Actual work for opening it is in xlsbpackage.py, line 20-ish + + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self) -> list[str]: + return self.book.sheets + + def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) + return self.book.get_sheet(name) + + def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) + # pyxlsb sheets are indexed from 1 onwards + # There's a fix for this in the source, but the pypi package doesn't have it + return self.book.get_sheet(index + 1) + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + # TODO: there is no way to distinguish between floats and datetimes in pyxlsb + # This means that there is no way to read datetime types from an xlsb file yet + if cell.v is None: + return "" # Prevents non-named columns from not showing up as Unnamed: i + if isinstance(cell.v, float) and convert_float: + val = int(cell.v) + if val == cell.v: + return val + else: + return float(cell.v) + + return cell.v + + def get_sheet_data( + self, + sheet, + convert_float: bool, + file_rows_needed: int | None = None, + ) -> list[list[Scalar]]: + data: list[list[Scalar]] = [] + prevous_row_number = -1 + # When sparse=True the rows can have different lengths and empty rows are + # not returned. The cells are namedtuples of row, col, value (r, c, v). + for row in sheet.rows(sparse=True): + row_number = row[0].r + converted_row = [self._convert_cell(cell, convert_float) for cell in row] + while converted_row and converted_row[-1] == "": + # trim trailing empty elements + converted_row.pop() + if converted_row: + data.extend([[]] * (row_number - prevous_row_number - 1)) + data.append(converted_row) + prevous_row_number = row_number + if file_rows_needed is not None and len(data) >= file_rows_needed: + break + if data: + # extend rows to max_width + max_width = max(len(data_row) for data_row in data) + if min(len(data_row) for data_row in data) < max_width: + empty_cell: list[Scalar] = [""] + data = [ + data_row + (max_width - len(data_row)) * empty_cell + for data_row in data + ] + return data diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py new file mode 100644 index 00000000..c3156571 --- /dev/null +++ b/pandas/io/excel/_util.py @@ -0,0 +1,333 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Hashable, + Iterable, + Literal, + MutableMapping, + Sequence, + TypeVar, + overload, +) + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) + +if TYPE_CHECKING: + from pandas.io.excel._base import ExcelWriter + + ExcelWriter_t = type[ExcelWriter] + usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object]) + +_writers: MutableMapping[str, ExcelWriter_t] = {} + + +def register_writer(klass: ExcelWriter_t) -> None: + """ + Add engine to the excel writer registry.io.excel. + + You must use this method to integrate with ``to_excel``. + + Parameters + ---------- + klass : ExcelWriter + """ + if not callable(klass): + raise ValueError("Can only register callables as engines") + engine_name = klass._engine + _writers[engine_name] = klass + + +def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str: + """ + Return the default reader/writer for the given extension. + + Parameters + ---------- + ext : str + The excel file extension for which to get the default engine. + mode : str {'reader', 'writer'} + Whether to get the default engine for reading or writing. + Either 'reader' or 'writer' + + Returns + ------- + str + The default engine for the extension. + """ + _default_readers = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + _default_writers = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlwt", + "ods": "odf", + } + assert mode in ["reader", "writer"] + if mode == "writer": + # Prefer xlsxwriter over openpyxl if installed + xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn") + if xlsxwriter: + _default_writers["xlsx"] = "xlsxwriter" + return _default_writers[ext] + else: + return _default_readers[ext] + + +def get_writer(engine_name: str) -> ExcelWriter_t: + try: + return _writers[engine_name] + except KeyError as err: + raise ValueError(f"No Excel writer '{engine_name}'") from err + + +def _excel2num(x: str) -> int: + """ + Convert Excel column name like 'AB' to 0-based column index. + + Parameters + ---------- + x : str + The Excel column name to convert to a 0-based column index. + + Returns + ------- + num : int + The column index corresponding to the name. + + Raises + ------ + ValueError + Part of the Excel column name was invalid. + """ + index = 0 + + for c in x.upper().strip(): + cp = ord(c) + + if cp < ord("A") or cp > ord("Z"): + raise ValueError(f"Invalid column name: {x}") + + index = index * 26 + cp - ord("A") + 1 + + return index - 1 + + +def _range2cols(areas: str) -> list[int]: + """ + Convert comma separated list of column names and ranges to indices. + + Parameters + ---------- + areas : str + A string containing a sequence of column ranges (or areas). + + Returns + ------- + cols : list + A list of 0-based column indices. + + Examples + -------- + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + cols: list[int] = [] + + for rng in areas.split(","): + if ":" in rng: + rngs = rng.split(":") + cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1)) + else: + cols.append(_excel2num(rng)) + + return cols + + +@overload +def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: + ... + + +@overload +def maybe_convert_usecols(usecols: list[str]) -> list[str]: + ... + + +@overload +def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: + ... + + +@overload +def maybe_convert_usecols(usecols: None) -> None: + ... + + +def maybe_convert_usecols( + usecols: str | list[int] | list[str] | usecols_func | None, +) -> None | list[int] | list[str] | usecols_func: + """ + Convert `usecols` into a compatible format for parsing in `parsers.py`. + + Parameters + ---------- + usecols : object + The use-columns object to potentially convert. + + Returns + ------- + converted : object + The compatible format of `usecols`. + """ + if usecols is None: + return usecols + + if is_integer(usecols): + raise ValueError( + "Passing an integer for `usecols` is no longer supported. " + "Please pass in a list of int from 0 to `usecols` inclusive instead." + ) + + if isinstance(usecols, str): + return _range2cols(usecols) + + return usecols + + +@overload +def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: + ... + + +@overload +def validate_freeze_panes(freeze_panes: None) -> Literal[False]: + ... + + +def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool: + if freeze_panes is not None: + if len(freeze_panes) == 2 and all( + isinstance(item, int) for item in freeze_panes + ): + return True + + raise ValueError( + "freeze_panes must be of form (row, column) " + "where row and column are integers" + ) + + # freeze_panes wasn't specified, return False so it won't be applied + # to output sheet + return False + + +def fill_mi_header( + row: list[Hashable], control_row: list[bool] +) -> tuple[list[Hashable], list[bool]]: + """ + Forward fill blank entries in row but only inside the same parent index. + + Used for creating headers in Multiindex. + + Parameters + ---------- + row : list + List of items in a single row. + control_row : list of bool + Helps to determine if particular column is in same parent index as the + previous value. Used to stop propagation of empty cells between + different indexes. + + Returns + ------- + Returns changed row and control_row + """ + last = row[0] + for i in range(1, len(row)): + if not control_row[i]: + last = row[i] + + if row[i] == "" or row[i] is None: + row[i] = last + else: + control_row[i] = False + last = row[i] + + return row, control_row + + +def pop_header_name( + row: list[Hashable], index_col: int | Sequence[int] +) -> tuple[Hashable | None, list[Hashable]]: + """ + Pop the header name for MultiIndex parsing. + + Parameters + ---------- + row : list + The data row to parse for the header name. + index_col : int, list + The index columns for our data. Assumed to be non-null. + + Returns + ------- + header_name : str + The extracted header name. + trimmed_row : list + The original data row with the header name removed. + """ + # Pop out header name and fill w/blank. + if is_list_like(index_col): + assert isinstance(index_col, Iterable) + i = max(index_col) + else: + assert not isinstance(index_col, Iterable) + i = index_col + + header_name = row[i] + header_name = None if header_name == "" else header_name + + return header_name, row[:i] + [""] + row[i + 1 :] + + +def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict: + """ + Used to combine two sources of kwargs for the backend engine. + + Use of kwargs is deprecated, this function is solely for use in 1.3 and should + be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs + or kwargs must be None or empty respectively. + + Parameters + ---------- + engine_kwargs: dict + kwargs to be passed through to the engine. + kwargs: dict + kwargs to be psased through to the engine (deprecated) + + Returns + ------- + engine_kwargs combined with kwargs + """ + if engine_kwargs is None: + result = {} + else: + result = engine_kwargs.copy() + result.update(kwargs) + return result diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py new file mode 100644 index 00000000..0bf3ac61 --- /dev/null +++ b/pandas/io/excel/_xlrd.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from datetime import time + +import numpy as np + +from pandas._typing import ( + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.excel._base import BaseExcelReader + + +class XlrdReader(BaseExcelReader): + @doc(storage_options=_shared_docs["storage_options"]) + def __init__( + self, filepath_or_buffer, storage_options: StorageOptions = None + ) -> None: + """ + Reader using xlrd engine. + + Parameters + ---------- + filepath_or_buffer : str, path object or Workbook + Object to be parsed. + {storage_options} + """ + err_msg = "Install xlrd >= 1.0.0 for Excel support" + import_optional_dependency("xlrd", extra=err_msg) + super().__init__(filepath_or_buffer, storage_options=storage_options) + + @property + def _workbook_class(self): + from xlrd import Book + + return Book + + def load_workbook(self, filepath_or_buffer): + from xlrd import open_workbook + + if hasattr(filepath_or_buffer, "read"): + data = filepath_or_buffer.read() + return open_workbook(file_contents=data) + else: + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self): + return self.book.sheet_names() + + def get_sheet_by_name(self, name): + self.raise_if_bad_sheet_by_name(name) + return self.book.sheet_by_name(name) + + def get_sheet_by_index(self, index): + self.raise_if_bad_sheet_by_index(index) + return self.book.sheet_by_index(index) + + def get_sheet_data( + self, sheet, convert_float: bool, file_rows_needed: int | None = None + ) -> list[list[Scalar]]: + from xlrd import ( + XL_CELL_BOOLEAN, + XL_CELL_DATE, + XL_CELL_ERROR, + XL_CELL_NUMBER, + xldate, + ) + + epoch1904 = self.book.datemode + + def _parse_cell(cell_contents, cell_typ): + """ + converts the contents of the cell into a pandas appropriate object + """ + if cell_typ == XL_CELL_DATE: + + # Use the newer xlrd datetime handling. + try: + cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) + except OverflowError: + return cell_contents + + # Excel doesn't distinguish between dates and time, + # so we treat dates on the epoch as times only. + # Also, Excel supports 1900 and 1904 epochs. + year = (cell_contents.timetuple())[0:3] + if (not epoch1904 and year == (1899, 12, 31)) or ( + epoch1904 and year == (1904, 1, 1) + ): + cell_contents = time( + cell_contents.hour, + cell_contents.minute, + cell_contents.second, + cell_contents.microsecond, + ) + + elif cell_typ == XL_CELL_ERROR: + cell_contents = np.nan + elif cell_typ == XL_CELL_BOOLEAN: + cell_contents = bool(cell_contents) + elif convert_float and cell_typ == XL_CELL_NUMBER: + # GH5394 - Excel 'numbers' are always floats + # it's a minimal perf hit and less surprising + val = int(cell_contents) + if val == cell_contents: + cell_contents = val + return cell_contents + + data = [] + + nrows = sheet.nrows + if file_rows_needed is not None: + nrows = min(nrows, file_rows_needed) + for i in range(nrows): + row = [ + _parse_cell(value, typ) + for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) + ] + data.append(row) + + return data diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py new file mode 100644 index 00000000..8d11896c --- /dev/null +++ b/pandas/io/excel/_xlsxwriter.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +import pandas._libs.json as json +from pandas._typing import ( + FilePath, + StorageOptions, + WriteExcelBuffer, +) + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from xlsxwriter import Workbook + + +class _XlsxStyler: + # Map from openpyxl-oriented styles to flatter xlsxwriter representation + # Ordering necessary for both determinism and because some are keyed by + # prefixes of others. + STYLE_MAPPING: dict[str, list[tuple[tuple[str, ...], str]]] = { + "font": [ + (("name",), "font_name"), + (("sz",), "font_size"), + (("size",), "font_size"), + (("color", "rgb"), "font_color"), + (("color",), "font_color"), + (("b",), "bold"), + (("bold",), "bold"), + (("i",), "italic"), + (("italic",), "italic"), + (("u",), "underline"), + (("underline",), "underline"), + (("strike",), "font_strikeout"), + (("vertAlign",), "font_script"), + (("vertalign",), "font_script"), + ], + "number_format": [(("format_code",), "num_format"), ((), "num_format")], + "protection": [(("locked",), "locked"), (("hidden",), "hidden")], + "alignment": [ + (("horizontal",), "align"), + (("vertical",), "valign"), + (("text_rotation",), "rotation"), + (("wrap_text",), "text_wrap"), + (("indent",), "indent"), + (("shrink_to_fit",), "shrink"), + ], + "fill": [ + (("patternType",), "pattern"), + (("patterntype",), "pattern"), + (("fill_type",), "pattern"), + (("start_color", "rgb"), "fg_color"), + (("fgColor", "rgb"), "fg_color"), + (("fgcolor", "rgb"), "fg_color"), + (("start_color",), "fg_color"), + (("fgColor",), "fg_color"), + (("fgcolor",), "fg_color"), + (("end_color", "rgb"), "bg_color"), + (("bgColor", "rgb"), "bg_color"), + (("bgcolor", "rgb"), "bg_color"), + (("end_color",), "bg_color"), + (("bgColor",), "bg_color"), + (("bgcolor",), "bg_color"), + ], + "border": [ + (("color", "rgb"), "border_color"), + (("color",), "border_color"), + (("style",), "border"), + (("top", "color", "rgb"), "top_color"), + (("top", "color"), "top_color"), + (("top", "style"), "top"), + (("top",), "top"), + (("right", "color", "rgb"), "right_color"), + (("right", "color"), "right_color"), + (("right", "style"), "right"), + (("right",), "right"), + (("bottom", "color", "rgb"), "bottom_color"), + (("bottom", "color"), "bottom_color"), + (("bottom", "style"), "bottom"), + (("bottom",), "bottom"), + (("left", "color", "rgb"), "left_color"), + (("left", "color"), "left_color"), + (("left", "style"), "left"), + (("left",), "left"), + ], + } + + @classmethod + def convert(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlsxwriter format dict + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + # Create a XlsxWriter format object. + props = {} + + if num_format_str is not None: + props["num_format"] = num_format_str + + if style_dict is None: + return props + + if "borders" in style_dict: + style_dict = style_dict.copy() + style_dict["border"] = style_dict.pop("borders") + + for style_group_key, style_group in style_dict.items(): + for src, dst in cls.STYLE_MAPPING.get(style_group_key, []): + # src is a sequence of keys into a nested dict + # dst is a flat key + if dst in props: + continue + v = style_group + for k in src: + try: + v = v[k] + except (KeyError, TypeError): + break + else: + props[dst] = v + + if isinstance(props.get("pattern"), str): + # TODO: support other fill patterns + props["pattern"] = 0 if props["pattern"] == "none" else 1 + + for k in ["border", "top", "right", "bottom", "left"]: + if isinstance(props.get(k), str): + try: + props[k] = [ + "none", + "thin", + "medium", + "dashed", + "dotted", + "thick", + "double", + "hair", + "mediumDashed", + "dashDot", + "mediumDashDot", + "dashDotDot", + "mediumDashDotDot", + "slantDashDot", + ].index(props[k]) + except ValueError: + props[k] = 2 + + if isinstance(props.get("font_script"), str): + props["font_script"] = ["baseline", "superscript", "subscript"].index( + props["font_script"] + ) + + if isinstance(props.get("underline"), str): + props["underline"] = { + "none": 0, + "single": 1, + "double": 2, + "singleAccounting": 33, + "doubleAccounting": 34, + }[props["underline"]] + + # GH 30107 - xlsxwriter uses different name + if props.get("valign") == "center": + props["valign"] = "vcenter" + + return props + + +class XlsxWriter(ExcelWriter): + _engine = "xlsxwriter" + _supported_extensions = (".xlsx",) + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ) -> None: + # Use the xlsxwriter module as the Excel writer. + from xlsxwriter import Workbook + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + if mode == "a": + raise ValueError("Append mode is not supported with xlsxwriter!") + + super().__init__( + path, + engine=engine, + date_format=date_format, + datetime_format=datetime_format, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + self._book = Workbook(self._handles.handle, **engine_kwargs) + + @property + def book(self): + """ + Book instance of class xlsxwriter.Workbook. + + This attribute can be used to access engine-specific features. + """ + return self._book + + @book.setter + def book(self, other: Workbook) -> None: + """ + Set book instance. Class type will depend on the engine used. + """ + self._deprecate_set_book() + self._book = other + + @property + def sheets(self) -> dict[str, Any]: + result = self.book.sheetnames + return result + + def _save(self) -> None: + """ + Save workbook to disk. + """ + self.book.close() + + def _write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + # Write the frame cells using xlsxwriter. + sheet_name = self._get_sheet_name(sheet_name) + + wks = self.book.get_worksheet_by_name(sheet_name) + if wks is None: + wks = self.book.add_worksheet(sheet_name) + + style_dict = {"null": None} + + if validate_freeze_panes(freeze_panes): + wks.freeze_panes(*(freeze_panes)) + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt)) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range( + startrow + cell.row, + startcol + cell.col, + startrow + cell.mergestart, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py new file mode 100644 index 00000000..f1455e47 --- /dev/null +++ b/pandas/io/excel/_xlwt.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Tuple, + cast, +) + +import pandas._libs.json as json +from pandas._typing import ( + FilePath, + StorageOptions, + WriteExcelBuffer, +) + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) + +if TYPE_CHECKING: + from xlwt import ( + Workbook, + XFStyle, + ) + + +class XlwtWriter(ExcelWriter): + _engine = "xlwt" + _supported_extensions = (".xls",) + + def __init__( + self, + path: FilePath | WriteExcelBuffer | ExcelWriter, + engine: str | None = None, + date_format: str | None = None, + datetime_format: str | None = None, + encoding: str | None = None, + mode: str = "w", + storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, + engine_kwargs: dict[str, Any] | None = None, + **kwargs, + ) -> None: + # Use the xlwt module as the Excel writer. + import xlwt + + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + if mode == "a": + raise ValueError("Append mode is not supported with xlwt!") + + super().__init__( + path, + mode=mode, + storage_options=storage_options, + if_sheet_exists=if_sheet_exists, + engine_kwargs=engine_kwargs, + ) + + if encoding is None: + encoding = "ascii" + self._book = xlwt.Workbook(encoding=encoding, **engine_kwargs) + self._fm_datetime = xlwt.easyxf(num_format_str=self._datetime_format) + self._fm_date = xlwt.easyxf(num_format_str=self._date_format) + + @property + def book(self) -> Workbook: + """ + Book instance of class xlwt.Workbook. + + This attribute can be used to access engine-specific features. + """ + return self._book + + @book.setter + def book(self, other: Workbook) -> None: + """ + Set book instance. Class type will depend on the engine used. + """ + self._deprecate_set_book() + self._book = other + + @property + def sheets(self) -> dict[str, Any]: + """Mapping of sheet names to sheet objects.""" + result = {sheet.name: sheet for sheet in self.book._Workbook__worksheets} + return result + + @property + def fm_date(self): + """ + XFStyle formatter for dates. + """ + self._deprecate("fm_date") + return self._fm_date + + @property + def fm_datetime(self): + """ + XFStyle formatter for dates. + """ + self._deprecate("fm_datetime") + return self._fm_datetime + + def _save(self) -> None: + """ + Save workbook to disk. + """ + if self.sheets: + # fails when the ExcelWriter is just opened and then closed + self.book.save(self._handles.handle) + + def _write_cells( + self, + cells, + sheet_name: str | None = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + ) -> None: + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + if validate_freeze_panes(freeze_panes): + freeze_panes = cast(Tuple[int, int], freeze_panes) + wks.set_panes_frozen(True) + wks.set_horz_split_pos(freeze_panes[0]) + wks.set_vert_split_pos(freeze_panes[1]) + + style_dict: dict[str, XFStyle] = {} + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, fmt) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge( + startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) + + @classmethod + def _style_to_xlwt( + cls, item, firstlevel: bool = True, field_sep: str = ",", line_sep: str = ";" + ) -> str: + """ + helper which recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, "items"): + if firstlevel: + it = [ + f"{key}: {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{line_sep.join(it)} " + return out + else: + it = [ + f"{key} {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{field_sep.join(it)} " + return out + else: + item = f"{item}" + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + @classmethod + def _convert_to_style( + cls, style_dict, num_format_str: str | None = None + ) -> XFStyle: + """ + converts a style_dict to an xlwt style object + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + import xlwt + + if style_dict: + xlwt_stylestr = cls._style_to_xlwt(style_dict) + style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") + else: + style = xlwt.XFStyle() + if num_format_str is not None: + style.num_format_str = num_format_str + + return style diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py new file mode 100644 index 00000000..4ecd5b76 --- /dev/null +++ b/pandas/io/feather_format.py @@ -0,0 +1,134 @@ +""" feather-format compat """ +from __future__ import annotations + +from typing import ( + Hashable, + Sequence, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import doc + +from pandas.core.api import ( + DataFrame, + Int64Index, + RangeIndex, +) +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + + +@doc(storage_options=_shared_docs["storage_options"]) +def to_feather( + df: DataFrame, + path: FilePath | WriteBuffer[bytes], + storage_options: StorageOptions = None, + **kwargs, +) -> None: + """ + Write a DataFrame to the binary Feather format. + + Parameters + ---------- + df : DataFrame + path : str, path object, or file-like object + {storage_options} + + .. versionadded:: 1.2.0 + + **kwargs : + Additional keywords passed to `pyarrow.feather.write_feather`. + + .. versionadded:: 1.1.0 + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + if not isinstance(df, DataFrame): + raise ValueError("feather only support IO with DataFrames") + + valid_types = {"string", "unicode"} + + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not isinstance(df.index, (Int64Index, RangeIndex)): + typ = type(df.index) + raise ValueError( + f"feather does not support serializing {typ} " + "for the index; you can .reset_index() to make the index into column(s)" + ) + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError( + "feather does not support serializing a non-default index for the index; " + "you can .reset_index() to make the index into column(s)" + ) + + if df.index.name is not None: + raise ValueError( + "feather does not serialize index meta-data on a default index" + ) + + # validate columns + # ---------------- + + # must have value column names (strings only) + if df.columns.inferred_type not in valid_types: + raise ValueError("feather must have string column names") + + with get_handle( + path, "wb", storage_options=storage_options, is_text=False + ) as handles: + feather.write_feather(df, handles.handle, **kwargs) + + +@doc(storage_options=_shared_docs["storage_options"]) +def read_feather( + path: FilePath | ReadBuffer[bytes], + columns: Sequence[Hashable] | None = None, + use_threads: bool = True, + storage_options: StorageOptions = None, +): + """ + Load a feather-format object from the file path. + + Parameters + ---------- + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.feather``. + columns : sequence, default None + If not provided, all columns are read. + use_threads : bool, default True + Whether to parallelize reading using multiple threads. + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + type of object stored in file + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + with get_handle( + path, "rb", storage_options=storage_options, is_text=False + ) as handles: + + return feather.read_feather( + handles.handle, columns=columns, use_threads=bool(use_threads) + ) diff --git a/pandas/io/formats/__init__.py b/pandas/io/formats/__init__.py new file mode 100644 index 00000000..8a3486a4 --- /dev/null +++ b/pandas/io/formats/__init__.py @@ -0,0 +1,8 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # import modules that have public classes/functions + from pandas.io.formats import style + + # and mark only those modules as public + __all__ = ["style"] diff --git a/pandas/io/formats/_color_data.py b/pandas/io/formats/_color_data.py new file mode 100644 index 00000000..2e7cb7f2 --- /dev/null +++ b/pandas/io/formats/_color_data.py @@ -0,0 +1,157 @@ +# GH37967: Enable the use of CSS named colors, as defined in +# matplotlib.colors.CSS4_COLORS, when exporting to Excel. +# This data has been copied here, instead of being imported from matplotlib, +# not to have ``to_excel`` methods require matplotlib. +# source: matplotlib._color_data (3.3.3) +from __future__ import annotations + +CSS4_COLORS = { + "aliceblue": "F0F8FF", + "antiquewhite": "FAEBD7", + "aqua": "00FFFF", + "aquamarine": "7FFFD4", + "azure": "F0FFFF", + "beige": "F5F5DC", + "bisque": "FFE4C4", + "black": "000000", + "blanchedalmond": "FFEBCD", + "blue": "0000FF", + "blueviolet": "8A2BE2", + "brown": "A52A2A", + "burlywood": "DEB887", + "cadetblue": "5F9EA0", + "chartreuse": "7FFF00", + "chocolate": "D2691E", + "coral": "FF7F50", + "cornflowerblue": "6495ED", + "cornsilk": "FFF8DC", + "crimson": "DC143C", + "cyan": "00FFFF", + "darkblue": "00008B", + "darkcyan": "008B8B", + "darkgoldenrod": "B8860B", + "darkgray": "A9A9A9", + "darkgreen": "006400", + "darkgrey": "A9A9A9", + "darkkhaki": "BDB76B", + "darkmagenta": "8B008B", + "darkolivegreen": "556B2F", + "darkorange": "FF8C00", + "darkorchid": "9932CC", + "darkred": "8B0000", + "darksalmon": "E9967A", + "darkseagreen": "8FBC8F", + "darkslateblue": "483D8B", + "darkslategray": "2F4F4F", + "darkslategrey": "2F4F4F", + "darkturquoise": "00CED1", + "darkviolet": "9400D3", + "deeppink": "FF1493", + "deepskyblue": "00BFFF", + "dimgray": "696969", + "dimgrey": "696969", + "dodgerblue": "1E90FF", + "firebrick": "B22222", + "floralwhite": "FFFAF0", + "forestgreen": "228B22", + "fuchsia": "FF00FF", + "gainsboro": "DCDCDC", + "ghostwhite": "F8F8FF", + "gold": "FFD700", + "goldenrod": "DAA520", + "gray": "808080", + "green": "008000", + "greenyellow": "ADFF2F", + "grey": "808080", + "honeydew": "F0FFF0", + "hotpink": "FF69B4", + "indianred": "CD5C5C", + "indigo": "4B0082", + "ivory": "FFFFF0", + "khaki": "F0E68C", + "lavender": "E6E6FA", + "lavenderblush": "FFF0F5", + "lawngreen": "7CFC00", + "lemonchiffon": "FFFACD", + "lightblue": "ADD8E6", + "lightcoral": "F08080", + "lightcyan": "E0FFFF", + "lightgoldenrodyellow": "FAFAD2", + "lightgray": "D3D3D3", + "lightgreen": "90EE90", + "lightgrey": "D3D3D3", + "lightpink": "FFB6C1", + "lightsalmon": "FFA07A", + "lightseagreen": "20B2AA", + "lightskyblue": "87CEFA", + "lightslategray": "778899", + "lightslategrey": "778899", + "lightsteelblue": "B0C4DE", + "lightyellow": "FFFFE0", + "lime": "00FF00", + "limegreen": "32CD32", + "linen": "FAF0E6", + "magenta": "FF00FF", + "maroon": "800000", + "mediumaquamarine": "66CDAA", + "mediumblue": "0000CD", + "mediumorchid": "BA55D3", + "mediumpurple": "9370DB", + "mediumseagreen": "3CB371", + "mediumslateblue": "7B68EE", + "mediumspringgreen": "00FA9A", + "mediumturquoise": "48D1CC", + "mediumvioletred": "C71585", + "midnightblue": "191970", + "mintcream": "F5FFFA", + "mistyrose": "FFE4E1", + "moccasin": "FFE4B5", + "navajowhite": "FFDEAD", + "navy": "000080", + "oldlace": "FDF5E6", + "olive": "808000", + "olivedrab": "6B8E23", + "orange": "FFA500", + "orangered": "FF4500", + "orchid": "DA70D6", + "palegoldenrod": "EEE8AA", + "palegreen": "98FB98", + "paleturquoise": "AFEEEE", + "palevioletred": "DB7093", + "papayawhip": "FFEFD5", + "peachpuff": "FFDAB9", + "peru": "CD853F", + "pink": "FFC0CB", + "plum": "DDA0DD", + "powderblue": "B0E0E6", + "purple": "800080", + "rebeccapurple": "663399", + "red": "FF0000", + "rosybrown": "BC8F8F", + "royalblue": "4169E1", + "saddlebrown": "8B4513", + "salmon": "FA8072", + "sandybrown": "F4A460", + "seagreen": "2E8B57", + "seashell": "FFF5EE", + "sienna": "A0522D", + "silver": "C0C0C0", + "skyblue": "87CEEB", + "slateblue": "6A5ACD", + "slategray": "708090", + "slategrey": "708090", + "snow": "FFFAFA", + "springgreen": "00FF7F", + "steelblue": "4682B4", + "tan": "D2B48C", + "teal": "008080", + "thistle": "D8BFD8", + "tomato": "FF6347", + "turquoise": "40E0D0", + "violet": "EE82EE", + "wheat": "F5DEB3", + "white": "FFFFFF", + "whitesmoke": "F5F5F5", + "yellow": "FFFF00", + "yellowgreen": "9ACD32", +} diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py new file mode 100644 index 00000000..2a6cbe07 --- /dev/null +++ b/pandas/io/formats/console.py @@ -0,0 +1,94 @@ +""" +Internal module for console introspection +""" +from __future__ import annotations + +from shutil import get_terminal_size + + +def get_console_size() -> tuple[int | None, int | None]: + """ + Return console size as tuple = (width, height). + + Returns (None,None) in non-interactive session. + """ + from pandas import get_option + + display_width = get_option("display.width") + display_height = get_option("display.max_rows") + + # Consider + # interactive shell terminal, can detect term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size + + # in addition + # width,height have default values, but setting to 'None' signals + # should use Auto-Detection, But only in interactive shell-terminal. + # Simple. yeah. + + if in_interactive_session(): + if in_ipython_frontend(): + # sane defaults for interactive non-shell terminal + # match default for width,height in config_init + from pandas._config.config import get_default_val + + terminal_width = get_default_val("display.width") + terminal_height = get_default_val("display.max_rows") + else: + # pure terminal + terminal_width, terminal_height = get_terminal_size() + else: + terminal_width, terminal_height = None, None + + # Note if the User sets width/Height to None (auto-detection) + # and we're in a script (non-inter), this will return (None,None) + # caller needs to deal. + return display_width or terminal_width, display_height or terminal_height + + +# ---------------------------------------------------------------------- +# Detect our environment + + +def in_interactive_session() -> bool: + """ + Check if we're running in an interactive shell. + + Returns + ------- + bool + True if running under python/ipython interactive shell. + """ + from pandas import get_option + + def check_main(): + try: + import __main__ as main + except ModuleNotFoundError: + return get_option("mode.sim_interactive") + return not hasattr(main, "__file__") or get_option("mode.sim_interactive") + + try: + # error: Name '__IPYTHON__' is not defined + return __IPYTHON__ or check_main() # type: ignore[name-defined] + except NameError: + return check_main() + + +def in_ipython_frontend() -> bool: + """ + Check if we're inside an IPython zmq frontend. + + Returns + ------- + bool + """ + try: + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore[name-defined] + return "zmq" in str(type(ip)).lower() + except NameError: + pass + + return False diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py new file mode 100644 index 00000000..34626a0b --- /dev/null +++ b/pandas/io/formats/css.py @@ -0,0 +1,418 @@ +""" +Utilities for interpreting CSS from Stylers for formatting non-HTML outputs. +""" +from __future__ import annotations + +import re +from typing import ( + Callable, + Generator, + Iterable, + Iterator, +) +import warnings + +from pandas.errors import CSSWarning +from pandas.util._exceptions import find_stack_level + + +def _side_expander(prop_fmt: str) -> Callable: + """ + Wrapper to expand shorthand property into top, right, bottom, left properties + + Parameters + ---------- + side : str + The border side to expand into properties + + Returns + ------- + function: Return to call when a 'border(-{side}): {value}' string is encountered + """ + + def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]: + """ + Expand shorthand property into side-specific property (top, right, bottom, left) + + Parameters + ---------- + prop (str): CSS property name + value (str): String token for property + + Yields + ------ + Tuple (str, str): Expanded property, value + """ + tokens = value.split() + try: + mapping = self.SIDE_SHORTHANDS[len(tokens)] + except KeyError: + warnings.warn( + f'Could not expand "{prop}: {value}"', + CSSWarning, + stacklevel=find_stack_level(), + ) + return + for key, idx in zip(self.SIDES, mapping): + yield prop_fmt.format(key), tokens[idx] + + return expand + + +def _border_expander(side: str = "") -> Callable: + """ + Wrapper to expand 'border' property into border color, style, and width properties + + Parameters + ---------- + side : str + The border side to expand into properties + + Returns + ------- + function: Return to call when a 'border(-{side}): {value}' string is encountered + """ + if side != "": + side = f"-{side}" + + def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]: + """ + Expand border into color, style, and width tuples + + Parameters + ---------- + prop : str + CSS property name passed to styler + value : str + Value passed to styler for property + + Yields + ------ + Tuple (str, str): Expanded property, value + """ + tokens = value.split() + if len(tokens) == 0 or len(tokens) > 3: + warnings.warn( + f'Too many tokens provided to "{prop}" (expected 1-3)', + CSSWarning, + stacklevel=find_stack_level(), + ) + + # TODO: Can we use current color as initial value to comply with CSS standards? + border_declarations = { + f"border{side}-color": "black", + f"border{side}-style": "none", + f"border{side}-width": "medium", + } + for token in tokens: + if token.lower() in self.BORDER_STYLES: + border_declarations[f"border{side}-style"] = token + elif any(ratio in token.lower() for ratio in self.BORDER_WIDTH_RATIOS): + border_declarations[f"border{side}-width"] = token + else: + border_declarations[f"border{side}-color"] = token + # TODO: Warn user if item entered more than once (e.g. "border: red green") + + # Per CSS, "border" will reset previous "border-*" definitions + yield from self.atomize(border_declarations.items()) + + return expand + + +class CSSResolver: + """ + A callable for parsing and resolving CSS to atomic properties. + """ + + UNIT_RATIOS = { + "pt": ("pt", 1), + "em": ("em", 1), + "rem": ("pt", 12), + "ex": ("em", 0.5), + # 'ch': + "px": ("pt", 0.75), + "pc": ("pt", 12), + "in": ("pt", 72), + "cm": ("in", 1 / 2.54), + "mm": ("in", 1 / 25.4), + "q": ("mm", 0.25), + "!!default": ("em", 0), + } + + FONT_SIZE_RATIOS = UNIT_RATIOS.copy() + FONT_SIZE_RATIOS.update( + { + "%": ("em", 0.01), + "xx-small": ("rem", 0.5), + "x-small": ("rem", 0.625), + "small": ("rem", 0.8), + "medium": ("rem", 1), + "large": ("rem", 1.125), + "x-large": ("rem", 1.5), + "xx-large": ("rem", 2), + "smaller": ("em", 1 / 1.2), + "larger": ("em", 1.2), + "!!default": ("em", 1), + } + ) + + MARGIN_RATIOS = UNIT_RATIOS.copy() + MARGIN_RATIOS.update({"none": ("pt", 0)}) + + BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy() + BORDER_WIDTH_RATIOS.update( + { + "none": ("pt", 0), + "thick": ("px", 4), + "medium": ("px", 2), + "thin": ("px", 1), + # Default: medium only if solid + } + ) + + BORDER_STYLES = [ + "none", + "hidden", + "dotted", + "dashed", + "solid", + "double", + "groove", + "ridge", + "inset", + "outset", + "mediumdashdot", + "dashdotdot", + "hair", + "mediumdashdotdot", + "dashdot", + "slantdashdot", + "mediumdashed", + ] + + SIDE_SHORTHANDS = { + 1: [0, 0, 0, 0], + 2: [0, 1, 0, 1], + 3: [0, 1, 2, 1], + 4: [0, 1, 2, 3], + } + + SIDES = ("top", "right", "bottom", "left") + + CSS_EXPANSIONS = { + **{ + "-".join(["border", prop] if prop else ["border"]): _border_expander(prop) + for prop in ["", "top", "right", "bottom", "left"] + }, + **{ + "-".join(["border", prop]): _side_expander("border-{:s}-" + prop) + for prop in ["color", "style", "width"] + }, + **{ + "margin": _side_expander("margin-{:s}"), + "padding": _side_expander("padding-{:s}"), + }, + } + + def __call__( + self, + declarations: str | Iterable[tuple[str, str]], + inherited: dict[str, str] | None = None, + ) -> dict[str, str]: + """ + The given declarations to atomic properties. + + Parameters + ---------- + declarations_str : str | Iterable[tuple[str, str]] + A CSS string or set of CSS declaration tuples + e.g. "font-weight: bold; background: blue" or + {("font-weight", "bold"), ("background", "blue")} + inherited : dict, optional + Atomic properties indicating the inherited style context in which + declarations_str is to be resolved. ``inherited`` should already + be resolved, i.e. valid output of this method. + + Returns + ------- + dict + Atomic CSS 2.2 properties. + + Examples + -------- + >>> resolve = CSSResolver() + >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'} + >>> out = resolve(''' + ... border-color: BLUE RED; + ... font-size: 1em; + ... font-size: 2em; + ... font-weight: normal; + ... font-weight: inherit; + ... ''', inherited) + >>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE + [('border-bottom-color', 'blue'), + ('border-left-color', 'red'), + ('border-right-color', 'red'), + ('border-top-color', 'blue'), + ('font-family', 'serif'), + ('font-size', '24pt'), + ('font-weight', 'bold')] + """ + if isinstance(declarations, str): + declarations = self.parse(declarations) + props = dict(self.atomize(declarations)) + if inherited is None: + inherited = {} + + props = self._update_initial(props, inherited) + props = self._update_font_size(props, inherited) + return self._update_other_units(props) + + def _update_initial( + self, + props: dict[str, str], + inherited: dict[str, str], + ) -> dict[str, str]: + # 1. resolve inherited, initial + for prop, val in inherited.items(): + if prop not in props: + props[prop] = val + + new_props = props.copy() + for prop, val in props.items(): + if val == "inherit": + val = inherited.get(prop, "initial") + + if val in ("initial", None): + # we do not define a complete initial stylesheet + del new_props[prop] + else: + new_props[prop] = val + return new_props + + def _update_font_size( + self, + props: dict[str, str], + inherited: dict[str, str], + ) -> dict[str, str]: + # 2. resolve relative font size + if props.get("font-size"): + props["font-size"] = self.size_to_pt( + props["font-size"], + self._get_font_size(inherited), + conversions=self.FONT_SIZE_RATIOS, + ) + return props + + def _get_font_size(self, props: dict[str, str]) -> float | None: + if props.get("font-size"): + font_size_string = props["font-size"] + return self._get_float_font_size_from_pt(font_size_string) + return None + + def _get_float_font_size_from_pt(self, font_size_string: str) -> float: + assert font_size_string.endswith("pt") + return float(font_size_string.rstrip("pt")) + + def _update_other_units(self, props: dict[str, str]) -> dict[str, str]: + font_size = self._get_font_size(props) + # 3. TODO: resolve other font-relative units + for side in self.SIDES: + prop = f"border-{side}-width" + if prop in props: + props[prop] = self.size_to_pt( + props[prop], + em_pt=font_size, + conversions=self.BORDER_WIDTH_RATIOS, + ) + + for prop in [f"margin-{side}", f"padding-{side}"]: + if prop in props: + # TODO: support % + props[prop] = self.size_to_pt( + props[prop], + em_pt=font_size, + conversions=self.MARGIN_RATIOS, + ) + return props + + def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS): + def _error(): + warnings.warn( + f"Unhandled size: {repr(in_val)}", + CSSWarning, + stacklevel=find_stack_level(), + ) + return self.size_to_pt("1!!default", conversions=conversions) + + match = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val) + if match is None: + return _error() + + val, unit = match.groups() + if val == "": + # hack for 'large' etc. + val = 1 + else: + try: + val = float(val) + except ValueError: + return _error() + + while unit != "pt": + if unit == "em": + if em_pt is None: + unit = "rem" + else: + val *= em_pt + unit = "pt" + continue + + try: + unit, mul = conversions[unit] + except KeyError: + return _error() + val *= mul + + val = round(val, 5) + if int(val) == val: + size_fmt = f"{int(val):d}pt" + else: + size_fmt = f"{val:f}pt" + return size_fmt + + def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]: + for prop, value in declarations: + prop = prop.lower() + value = value.lower() + if prop in self.CSS_EXPANSIONS: + expand = self.CSS_EXPANSIONS[prop] + yield from expand(self, prop, value) + else: + yield prop, value + + def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]: + """ + Generates (prop, value) pairs from declarations. + + In a future version may generate parsed tokens from tinycss/tinycss2 + + Parameters + ---------- + declarations_str : str + """ + for decl in declarations_str.split(";"): + if not decl.strip(): + continue + prop, sep, val = decl.partition(":") + prop = prop.strip().lower() + # TODO: don't lowercase case sensitive parts of values (strings) + val = val.strip().lower() + if sep: + yield prop, val + else: + warnings.warn( + f"Ill-formatted attribute: expected a colon in {repr(decl)}", + CSSWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py new file mode 100644 index 00000000..6ab57b0c --- /dev/null +++ b/pandas/io/formats/csvs.py @@ -0,0 +1,321 @@ +""" +Module for formatting output data into CSV files. +""" + +from __future__ import annotations + +import csv as csvlib +import os +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + Iterator, + Sequence, + cast, +) + +import numpy as np + +from pandas._libs import writers as libwriters +from pandas._typing import ( + CompressionOptions, + FilePath, + FloatFormatType, + IndexLabel, + StorageOptions, + WriteBuffer, +) +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCIndex, + ABCMultiIndex, + ABCPeriodIndex, +) +from pandas.core.dtypes.missing import notna + +from pandas.core.indexes.api import Index + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from pandas.io.formats.format import DataFrameFormatter + + +class CSVFormatter: + cols: np.ndarray + + def __init__( + self, + formatter: DataFrameFormatter, + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "", + sep: str = ",", + cols: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, + mode: str = "w", + encoding: str | None = None, + errors: str = "strict", + compression: CompressionOptions = "infer", + quoting: int | None = None, + lineterminator: str | None = "\n", + chunksize: int | None = None, + quotechar: str | None = '"', + date_format: str | None = None, + doublequote: bool = True, + escapechar: str | None = None, + storage_options: StorageOptions = None, + ) -> None: + self.fmt = formatter + + self.obj = self.fmt.frame + + self.filepath_or_buffer = path_or_buf + self.encoding = encoding + self.compression: CompressionOptions = compression + self.mode = mode + self.storage_options = storage_options + + self.sep = sep + self.index_label = self._initialize_index_label(index_label) + self.errors = errors + self.quoting = quoting or csvlib.QUOTE_MINIMAL + self.quotechar = self._initialize_quotechar(quotechar) + self.doublequote = doublequote + self.escapechar = escapechar + self.lineterminator = lineterminator or os.linesep + self.date_format = date_format + self.cols = self._initialize_columns(cols) + self.chunksize = self._initialize_chunksize(chunksize) + + @property + def na_rep(self) -> str: + return self.fmt.na_rep + + @property + def float_format(self) -> FloatFormatType | None: + return self.fmt.float_format + + @property + def decimal(self) -> str: + return self.fmt.decimal + + @property + def header(self) -> bool | Sequence[str]: + return self.fmt.header + + @property + def index(self) -> bool: + return self.fmt.index + + def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel: + if index_label is not False: + if index_label is None: + return self._get_index_label_from_obj() + elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndex)): + # given a string for a DF with Index + return [index_label] + return index_label + + def _get_index_label_from_obj(self) -> Sequence[Hashable]: + if isinstance(self.obj.index, ABCMultiIndex): + return self._get_index_label_multiindex() + else: + return self._get_index_label_flat() + + def _get_index_label_multiindex(self) -> Sequence[Hashable]: + return [name or "" for name in self.obj.index.names] + + def _get_index_label_flat(self) -> Sequence[Hashable]: + index_label = self.obj.index.name + return [""] if index_label is None else [index_label] + + def _initialize_quotechar(self, quotechar: str | None) -> str | None: + if self.quoting != csvlib.QUOTE_NONE: + # prevents crash in _csv + return quotechar + return None + + @property + def has_mi_columns(self) -> bool: + return bool(isinstance(self.obj.columns, ABCMultiIndex)) + + def _initialize_columns(self, cols: Sequence[Hashable] | None) -> np.ndarray: + # validate mi options + if self.has_mi_columns: + if cols is not None: + msg = "cannot specify cols with a MultiIndex on the columns" + raise TypeError(msg) + + if cols is not None: + if isinstance(cols, ABCIndex): + cols = cols._format_native_types(**self._number_format) + else: + cols = list(cols) + self.obj = self.obj.loc[:, cols] + + # update columns to include possible multiplicity of dupes + # and make sure cols is just a list of labels + new_cols = self.obj.columns + return new_cols._format_native_types(**self._number_format) + + def _initialize_chunksize(self, chunksize: int | None) -> int: + if chunksize is None: + return (100000 // (len(self.cols) or 1)) or 1 + return int(chunksize) + + @property + def _number_format(self) -> dict[str, Any]: + """Dictionary used for storing number formatting settings.""" + return { + "na_rep": self.na_rep, + "float_format": self.float_format, + "date_format": self.date_format, + "quoting": self.quoting, + "decimal": self.decimal, + } + + @cache_readonly + def data_index(self) -> Index: + data_index = self.obj.index + if ( + isinstance(data_index, (ABCDatetimeIndex, ABCPeriodIndex)) + and self.date_format is not None + ): + data_index = Index( + [x.strftime(self.date_format) if notna(x) else "" for x in data_index] + ) + elif isinstance(data_index, ABCMultiIndex): + data_index = data_index.remove_unused_levels() + return data_index + + @property + def nlevels(self) -> int: + if self.index: + return getattr(self.data_index, "nlevels", 1) + else: + return 0 + + @property + def _has_aliases(self) -> bool: + return isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) + + @property + def _need_to_save_header(self) -> bool: + return bool(self._has_aliases or self.header) + + @property + def write_cols(self) -> Sequence[Hashable]: + if self._has_aliases: + assert not isinstance(self.header, bool) + if len(self.header) != len(self.cols): + raise ValueError( + f"Writing {len(self.cols)} cols but got {len(self.header)} aliases" + ) + else: + return self.header + else: + # self.cols is an ndarray derived from Index._format_native_types, + # so its entries are strings, i.e. hashable + return cast(Sequence[Hashable], self.cols) + + @property + def encoded_labels(self) -> list[Hashable]: + encoded_labels: list[Hashable] = [] + + if self.index and self.index_label: + assert isinstance(self.index_label, Sequence) + encoded_labels = list(self.index_label) + + if not self.has_mi_columns or self._has_aliases: + encoded_labels += list(self.write_cols) + + return encoded_labels + + def save(self) -> None: + """ + Create the writer & save. + """ + # apply compression and byte/text conversion + with get_handle( + self.filepath_or_buffer, + self.mode, + encoding=self.encoding, + errors=self.errors, + compression=self.compression, + storage_options=self.storage_options, + ) as handles: + + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer( + handles.handle, + lineterminator=self.lineterminator, + delimiter=self.sep, + quoting=self.quoting, + doublequote=self.doublequote, + escapechar=self.escapechar, + quotechar=self.quotechar, + ) + + self._save() + + def _save(self) -> None: + if self._need_to_save_header: + self._save_header() + self._save_body() + + def _save_header(self) -> None: + if not self.has_mi_columns or self._has_aliases: + self.writer.writerow(self.encoded_labels) + else: + for row in self._generate_multiindex_header_rows(): + self.writer.writerow(row) + + def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]: + columns = self.obj.columns + for i in range(columns.nlevels): + # we need at least 1 index column to write our col names + col_line = [] + if self.index: + # name is the first column + col_line.append(columns.names[i]) + + if isinstance(self.index_label, list) and len(self.index_label) > 1: + col_line.extend([""] * (len(self.index_label) - 1)) + + col_line.extend(columns._get_level_values(i)) + yield col_line + + # Write out the index line if it's not empty. + # Otherwise, we will print out an extraneous + # blank line between the mi and the data rows. + if self.encoded_labels and set(self.encoded_labels) != {""}: + yield self.encoded_labels + [""] * len(columns) + + def _save_body(self) -> None: + nrows = len(self.data_index) + chunks = (nrows // self.chunksize) + 1 + for i in range(chunks): + start_i = i * self.chunksize + end_i = min(start_i + self.chunksize, nrows) + if start_i >= end_i: + break + self._save_chunk(start_i, end_i) + + def _save_chunk(self, start_i: int, end_i: int) -> None: + # create the data for a chunk + slicer = slice(start_i, end_i) + df = self.obj.iloc[slicer] + + res = df._mgr.to_native_types(**self._number_format) + data = [res.iget_values(i) for i in range(len(res.items))] + + ix = self.data_index[slicer]._format_native_types(**self._number_format) + libwriters.write_csv_rows( + data, + ix, + self.nlevels, + self.cols, + self.writer, + ) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py new file mode 100644 index 00000000..b6e0f271 --- /dev/null +++ b/pandas/io/formats/excel.py @@ -0,0 +1,960 @@ +""" +Utilities for conversion to writer-agnostic Excel representation. +""" +from __future__ import annotations + +from functools import ( + lru_cache, + reduce, +) +import itertools +import re +from typing import ( + Any, + Callable, + Hashable, + Iterable, + Mapping, + Sequence, + cast, +) +import warnings + +import numpy as np + +from pandas._libs.lib import is_list_like +from pandas._typing import ( + IndexLabel, + StorageOptions, +) +from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes import missing +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) + +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, +) +import pandas.core.common as com +from pandas.core.shared_docs import _shared_docs + +from pandas.io.formats._color_data import CSS4_COLORS +from pandas.io.formats.css import ( + CSSResolver, + CSSWarning, +) +from pandas.io.formats.format import get_level_lengths +from pandas.io.formats.printing import pprint_thing + + +class ExcelCell: + __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend") + __slots__ = __fields__ + + def __init__( + self, + row: int, + col: int, + val, + style=None, + mergestart: int | None = None, + mergeend: int | None = None, + ) -> None: + self.row = row + self.col = col + self.val = val + self.style = style + self.mergestart = mergestart + self.mergeend = mergeend + + +class CssExcelCell(ExcelCell): + def __init__( + self, + row: int, + col: int, + val, + style: dict | None, + css_styles: dict[tuple[int, int], list[tuple[str, Any]]] | None, + css_row: int, + css_col: int, + css_converter: Callable | None, + **kwargs, + ) -> None: + if css_styles and css_converter: + # Use dict to get only one (case-insensitive) declaration per property + declaration_dict = { + prop.lower(): val for prop, val in css_styles[css_row, css_col] + } + # Convert to frozenset for order-invariant caching + unique_declarations = frozenset(declaration_dict.items()) + style = css_converter(unique_declarations) + + super().__init__(row=row, col=col, val=val, style=style, **kwargs) + + +class CSSToExcelConverter: + """ + A callable for converting CSS declarations to ExcelWriter styles + + Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), + focusing on font styling, backgrounds, borders and alignment. + + Operates by first computing CSS styles in a fairly generic + way (see :meth:`compute_css`) then determining Excel style + properties from CSS properties (see :meth:`build_xlstyle`). + + Parameters + ---------- + inherited : str, optional + CSS declarations understood to be the containing scope for the + CSS processed by :meth:`__call__`. + """ + + NAMED_COLORS = CSS4_COLORS + + VERTICAL_MAP = { + "top": "top", + "text-top": "top", + "middle": "center", + "baseline": "bottom", + "bottom": "bottom", + "text-bottom": "bottom", + # OpenXML also has 'justify', 'distributed' + } + + BOLD_MAP = { + "bold": True, + "bolder": True, + "600": True, + "700": True, + "800": True, + "900": True, + "normal": False, + "lighter": False, + "100": False, + "200": False, + "300": False, + "400": False, + "500": False, + } + + ITALIC_MAP = { + "normal": False, + "italic": True, + "oblique": True, + } + + FAMILY_MAP = { + "serif": 1, # roman + "sans-serif": 2, # swiss + "cursive": 4, # script + "fantasy": 5, # decorative + } + + BORDER_STYLE_MAP = { + style.lower(): style + for style in [ + "dashed", + "mediumDashDot", + "dashDotDot", + "hair", + "dotted", + "mediumDashDotDot", + "double", + "dashDot", + "slantDashDot", + "mediumDashed", + ] + } + + # NB: Most of the methods here could be classmethods, as only __init__ + # and __call__ make use of instance attributes. We leave them as + # instancemethods so that users can easily experiment with extensions + # without monkey-patching. + inherited: dict[str, str] | None + + def __init__(self, inherited: str | None = None) -> None: + if inherited is not None: + self.inherited = self.compute_css(inherited) + else: + self.inherited = None + # We should avoid lru_cache on the __call__ method. + # Otherwise once the method __call__ has been called + # garbage collection no longer deletes the instance. + self._call_cached = lru_cache(maxsize=None)(self._call_uncached) + + compute_css = CSSResolver() + + def __call__( + self, declarations: str | frozenset[tuple[str, str]] + ) -> dict[str, dict[str, str]]: + """ + Convert CSS declarations to ExcelWriter style. + + Parameters + ---------- + declarations : str | frozenset[tuple[str, str]] + CSS string or set of CSS declaration tuples. + e.g. "font-weight: bold; background: blue" or + {("font-weight", "bold"), ("background", "blue")} + + Returns + ------- + xlstyle : dict + A style as interpreted by ExcelWriter when found in + ExcelCell.style. + """ + return self._call_cached(declarations) + + def _call_uncached( + self, declarations: str | frozenset[tuple[str, str]] + ) -> dict[str, dict[str, str]]: + properties = self.compute_css(declarations, self.inherited) + return self.build_xlstyle(properties) + + def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]: + out = { + "alignment": self.build_alignment(props), + "border": self.build_border(props), + "fill": self.build_fill(props), + "font": self.build_font(props), + "number_format": self.build_number_format(props), + } + + # TODO: handle cell width and height: needs support in pandas.io.excel + + def remove_none(d: dict[str, str | None]) -> None: + """Remove key where value is None, through nested dicts""" + for k, v in list(d.items()): + if v is None: + del d[k] + elif isinstance(v, dict): + remove_none(v) + if not v: + del d[k] + + remove_none(out) + return out + + def build_alignment(self, props: Mapping[str, str]) -> dict[str, bool | str | None]: + # TODO: text-indent, padding-left -> alignment.indent + return { + "horizontal": props.get("text-align"), + "vertical": self._get_vertical_alignment(props), + "wrap_text": self._get_is_wrap_text(props), + } + + def _get_vertical_alignment(self, props: Mapping[str, str]) -> str | None: + vertical_align = props.get("vertical-align") + if vertical_align: + return self.VERTICAL_MAP.get(vertical_align) + return None + + def _get_is_wrap_text(self, props: Mapping[str, str]) -> bool | None: + if props.get("white-space") is None: + return None + return bool(props["white-space"] not in ("nowrap", "pre", "pre-line")) + + def build_border( + self, props: Mapping[str, str] + ) -> dict[str, dict[str, str | None]]: + return { + side: { + "style": self._border_style( + props.get(f"border-{side}-style"), + props.get(f"border-{side}-width"), + self.color_to_excel(props.get(f"border-{side}-color")), + ), + "color": self.color_to_excel(props.get(f"border-{side}-color")), + } + for side in ["top", "right", "bottom", "left"] + } + + def _border_style(self, style: str | None, width: str | None, color: str | None): + # convert styles and widths to openxml, one of: + # 'dashDot' + # 'dashDotDot' + # 'dashed' + # 'dotted' + # 'double' + # 'hair' + # 'medium' + # 'mediumDashDot' + # 'mediumDashDotDot' + # 'mediumDashed' + # 'slantDashDot' + # 'thick' + # 'thin' + if width is None and style is None and color is None: + # Return None will remove "border" from style dictionary + return None + + if width is None and style is None: + # Return "none" will keep "border" in style dictionary + return "none" + + if style == "none" or style == "hidden": + return "none" + + width_name = self._get_width_name(width) + if width_name is None: + return "none" + + if style in (None, "groove", "ridge", "inset", "outset", "solid"): + # not handled + return width_name + + if style == "double": + return "double" + if style == "dotted": + if width_name in ("hair", "thin"): + return "dotted" + return "mediumDashDotDot" + if style == "dashed": + if width_name in ("hair", "thin"): + return "dashed" + return "mediumDashed" + elif style in self.BORDER_STYLE_MAP: + # Excel-specific styles + return self.BORDER_STYLE_MAP[style] + else: + warnings.warn( + f"Unhandled border style format: {repr(style)}", + CSSWarning, + stacklevel=find_stack_level(), + ) + return "none" + + def _get_width_name(self, width_input: str | None) -> str | None: + width = self._width_to_float(width_input) + if width < 1e-5: + return None + elif width < 1.3: + return "thin" + elif width < 2.8: + return "medium" + return "thick" + + def _width_to_float(self, width: str | None) -> float: + if width is None: + width = "2pt" + return self._pt_to_float(width) + + def _pt_to_float(self, pt_string: str) -> float: + assert pt_string.endswith("pt") + return float(pt_string.rstrip("pt")) + + def build_fill(self, props: Mapping[str, str]): + # TODO: perhaps allow for special properties + # -excel-pattern-bgcolor and -excel-pattern-type + fill_color = props.get("background-color") + if fill_color not in (None, "transparent", "none"): + return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"} + + def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]: + fc = props.get("number-format") + fc = fc.replace("§", ";") if isinstance(fc, str) else fc + return {"format_code": fc} + + def build_font( + self, props: Mapping[str, str] + ) -> dict[str, bool | float | str | None]: + font_names = self._get_font_names(props) + decoration = self._get_decoration(props) + return { + "name": font_names[0] if font_names else None, + "family": self._select_font_family(font_names), + "size": self._get_font_size(props), + "bold": self._get_is_bold(props), + "italic": self._get_is_italic(props), + "underline": ("single" if "underline" in decoration else None), + "strike": ("line-through" in decoration) or None, + "color": self.color_to_excel(props.get("color")), + # shadow if nonzero digit before shadow color + "shadow": self._get_shadow(props), + } + + def _get_is_bold(self, props: Mapping[str, str]) -> bool | None: + weight = props.get("font-weight") + if weight: + return self.BOLD_MAP.get(weight) + return None + + def _get_is_italic(self, props: Mapping[str, str]) -> bool | None: + font_style = props.get("font-style") + if font_style: + return self.ITALIC_MAP.get(font_style) + return None + + def _get_decoration(self, props: Mapping[str, str]) -> Sequence[str]: + decoration = props.get("text-decoration") + if decoration is not None: + return decoration.split() + else: + return () + + def _get_underline(self, decoration: Sequence[str]) -> str | None: + if "underline" in decoration: + return "single" + return None + + def _get_shadow(self, props: Mapping[str, str]) -> bool | None: + if "text-shadow" in props: + return bool(re.search("^[^#(]*[1-9]", props["text-shadow"])) + return None + + def _get_font_names(self, props: Mapping[str, str]) -> Sequence[str]: + font_names_tmp = re.findall( + r"""(?x) + ( + "(?:[^"]|\\")+" + | + '(?:[^']|\\')+' + | + [^'",]+ + )(?=,|\s*$) + """, + props.get("font-family", ""), + ) + + font_names = [] + for name in font_names_tmp: + if name[:1] == '"': + name = name[1:-1].replace('\\"', '"') + elif name[:1] == "'": + name = name[1:-1].replace("\\'", "'") + else: + name = name.strip() + if name: + font_names.append(name) + return font_names + + def _get_font_size(self, props: Mapping[str, str]) -> float | None: + size = props.get("font-size") + if size is None: + return size + return self._pt_to_float(size) + + def _select_font_family(self, font_names) -> int | None: + family = None + for name in font_names: + family = self.FAMILY_MAP.get(name) + if family: + break + + return family + + def color_to_excel(self, val: str | None) -> str | None: + if val is None: + return None + + if self._is_hex_color(val): + return self._convert_hex_to_excel(val) + + try: + return self.NAMED_COLORS[val] + except KeyError: + warnings.warn( + f"Unhandled color format: {repr(val)}", + CSSWarning, + stacklevel=find_stack_level(), + ) + return None + + def _is_hex_color(self, color_string: str) -> bool: + return bool(color_string.startswith("#")) + + def _convert_hex_to_excel(self, color_string: str) -> str: + code = color_string.lstrip("#") + if self._is_shorthand_color(color_string): + return (code[0] * 2 + code[1] * 2 + code[2] * 2).upper() + else: + return code.upper() + + def _is_shorthand_color(self, color_string: str) -> bool: + """Check if color code is shorthand. + + #FFF is a shorthand as opposed to full #FFFFFF. + """ + code = color_string.lstrip("#") + if len(code) == 3: + return True + elif len(code) == 6: + return False + else: + raise ValueError(f"Unexpected color {color_string}") + + +class ExcelFormatter: + """ + Class for formatting a DataFrame to a list of ExcelCells, + + Parameters + ---------- + df : DataFrame or Styler + na_rep: na representation + float_format : str, default None + Format string for floating point numbers + cols : sequence, optional + Columns to write + header : bool or sequence of str, default True + Write out column names. If a list of string is given it is + assumed to be aliases for the column names + index : bool, default True + output row names (index) + index_label : str or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + merge_cells : bool, default False + Format MultiIndex and Hierarchical Rows as merged cells. + inf_rep : str, default `'inf'` + representation for np.inf values (which aren't representable in Excel) + A `'-'` sign will be added in front of -inf. + style_converter : callable, optional + This translates Styler styles (CSS) into ExcelWriter styles. + Defaults to ``CSSToExcelConverter()``. + It should have signature css_declarations string -> excel style. + This is only called for body cells. + """ + + max_rows = 2**20 + max_cols = 2**14 + + def __init__( + self, + df, + na_rep: str = "", + float_format: str | None = None, + cols: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool = True, + index: bool = True, + index_label: IndexLabel | None = None, + merge_cells: bool = False, + inf_rep: str = "inf", + style_converter: Callable | None = None, + ) -> None: + self.rowcounter = 0 + self.na_rep = na_rep + if not isinstance(df, DataFrame): + self.styler = df + self.styler._compute() # calculate applied styles + df = df.data + if style_converter is None: + style_converter = CSSToExcelConverter() + self.style_converter: Callable | None = style_converter + else: + self.styler = None + self.style_converter = None + self.df = df + if cols is not None: + + # all missing, raise + if not len(Index(cols).intersection(df.columns)): + raise KeyError("passes columns are not ALL present dataframe") + + if len(Index(cols).intersection(df.columns)) != len(set(cols)): + # Deprecated in GH#17295, enforced in 1.0.0 + raise KeyError("Not all names specified in 'columns' are found") + + self.df = df.reindex(columns=cols) + + self.columns = self.df.columns + self.float_format = float_format + self.index = index + self.index_label = index_label + self.header = header + self.merge_cells = merge_cells + self.inf_rep = inf_rep + + @property + def header_style(self) -> dict[str, dict[str, str | bool]]: + return { + "font": {"bold": True}, + "borders": { + "top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin", + }, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + def _format_value(self, val): + if is_scalar(val) and missing.isna(val): + val = self.na_rep + elif is_float(val): + if missing.isposinf_scalar(val): + val = self.inf_rep + elif missing.isneginf_scalar(val): + val = f"-{self.inf_rep}" + elif self.float_format is not None: + val = float(self.float_format % val) + if getattr(val, "tzinfo", None) is not None: + raise ValueError( + "Excel does not support datetimes with " + "timezones. Please ensure that datetimes " + "are timezone unaware before writing to Excel." + ) + return val + + def _format_header_mi(self) -> Iterable[ExcelCell]: + if self.columns.nlevels > 1: + if not self.index: + raise NotImplementedError( + "Writing to Excel with MultiIndex columns and no " + "index ('index'=False) is not yet implemented." + ) + + if not (self._has_aliases or self.header): + return + + columns = self.columns + level_strs = columns.format( + sparsify=self.merge_cells, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + coloffset = 0 + lnum = 0 + + if self.index and isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) - 1 + + if self.merge_cells: + # Format multi-index as a merged cells. + for lnum, name in enumerate(columns.names): + yield ExcelCell( + row=lnum, + col=coloffset, + val=name, + style=self.header_style, + ) + + for lnum, (spans, levels, level_codes) in enumerate( + zip(level_lengths, columns.levels, columns.codes) + ): + values = levels.take(level_codes) + for i, span_val in spans.items(): + mergestart, mergeend = None, None + if span_val > 1: + mergestart, mergeend = lnum, coloffset + i + span_val + yield CssExcelCell( + row=lnum, + col=coloffset + i + 1, + val=values[i], + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=lnum, + css_col=i, + css_converter=self.style_converter, + mergestart=mergestart, + mergeend=mergeend, + ) + else: + # Format in legacy format with dots to indicate levels. + for i, values in enumerate(zip(*level_strs)): + v = ".".join(map(pprint_thing, values)) + yield CssExcelCell( + row=lnum, + col=coloffset + i + 1, + val=v, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=lnum, + css_col=i, + css_converter=self.style_converter, + ) + + self.rowcounter = lnum + + def _format_header_regular(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + coloffset = 0 + + if self.index: + coloffset = 1 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index.names) + + colnames = self.columns + if self._has_aliases: + self.header = cast(Sequence, self.header) + if len(self.header) != len(self.columns): + raise ValueError( + f"Writing {len(self.columns)} cols " + f"but got {len(self.header)} aliases" + ) + else: + colnames = self.header + + for colindex, colname in enumerate(colnames): + yield CssExcelCell( + row=self.rowcounter, + col=colindex + coloffset, + val=colname, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_columns", None), + css_row=0, + css_col=colindex, + css_converter=self.style_converter, + ) + + def _format_header(self) -> Iterable[ExcelCell]: + gen: Iterable[ExcelCell] + + if isinstance(self.columns, MultiIndex): + gen = self._format_header_mi() + else: + gen = self._format_header_regular() + + gen2: Iterable[ExcelCell] = () + + if self.df.index.names: + row = [x if x is not None else "" for x in self.df.index.names] + [ + "" + ] * len(self.columns) + if reduce(lambda x, y: x and y, map(lambda x: x != "", row)): + gen2 = ( + ExcelCell(self.rowcounter, colindex, val, self.header_style) + for colindex, val in enumerate(row) + ) + self.rowcounter += 1 + return itertools.chain(gen, gen2) + + def _format_body(self) -> Iterable[ExcelCell]: + if isinstance(self.df.index, MultiIndex): + return self._format_hierarchical_rows() + else: + return self._format_regular_rows() + + def _format_regular_rows(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + self.rowcounter += 1 + + # output index and index_label? + if self.index: + # check aliases + # if list only take first as this is not a MultiIndex + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_label = self.index_label[0] + # if string good to go + elif self.index_label and isinstance(self.index_label, str): + index_label = self.index_label + else: + index_label = self.df.index.names[0] + + if isinstance(self.columns, MultiIndex): + self.rowcounter += 1 + + if index_label and self.header is not False: + yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style) + + # write index_values + index_values = self.df.index + if isinstance(self.df.index, PeriodIndex): + index_values = self.df.index.to_timestamp() + + for idx, idxval in enumerate(index_values): + yield CssExcelCell( + row=self.rowcounter + idx, + col=0, + val=idxval, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=idx, + css_col=0, + css_converter=self.style_converter, + ) + coloffset = 1 + else: + coloffset = 0 + + yield from self._generate_body(coloffset) + + def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: + if self._has_aliases or self.header: + self.rowcounter += 1 + + gcolidx = 0 + + if self.index: + index_labels = self.df.index.names + # check for aliases + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_labels = self.index_label + + # MultiIndex columns require an extra row + # with index names (blank if None) for + # unambiguous round-trip, unless not merging, + # in which case the names all go on one row Issue #11328 + if isinstance(self.columns, MultiIndex) and self.merge_cells: + self.rowcounter += 1 + + # if index labels are not empty go ahead and dump + if com.any_not_none(*index_labels) and self.header is not False: + + for cidx, name in enumerate(index_labels): + yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style) + + if self.merge_cells: + # Format hierarchical rows as merged cells. + level_strs = self.df.index.format( + sparsify=True, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + + for spans, levels, level_codes in zip( + level_lengths, self.df.index.levels, self.df.index.codes + ): + + values = levels.take( + level_codes, + allow_fill=levels._can_hold_na, + fill_value=levels._na_value, + ) + + for i, span_val in spans.items(): + mergestart, mergeend = None, None + if span_val > 1: + mergestart = self.rowcounter + i + span_val - 1 + mergeend = gcolidx + yield CssExcelCell( + row=self.rowcounter + i, + col=gcolidx, + val=values[i], + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=i, + css_col=gcolidx, + css_converter=self.style_converter, + mergestart=mergestart, + mergeend=mergeend, + ) + gcolidx += 1 + + else: + # Format hierarchical rows with non-merged values. + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield CssExcelCell( + row=self.rowcounter + idx, + col=gcolidx, + val=indexcolval, + style=self.header_style, + css_styles=getattr(self.styler, "ctx_index", None), + css_row=idx, + css_col=gcolidx, + css_converter=self.style_converter, + ) + gcolidx += 1 + + yield from self._generate_body(gcolidx) + + @property + def _has_aliases(self) -> bool: + """Whether the aliases for column names are present.""" + return is_list_like(self.header) + + def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]: + # Write the body of the frame data series by series. + for colidx in range(len(self.columns)): + series = self.df.iloc[:, colidx] + for i, val in enumerate(series): + yield CssExcelCell( + row=self.rowcounter + i, + col=colidx + coloffset, + val=val, + style=None, + css_styles=getattr(self.styler, "ctx", None), + css_row=i, + css_col=colidx, + css_converter=self.style_converter, + ) + + def get_formatted_cells(self) -> Iterable[ExcelCell]: + for cell in itertools.chain(self._format_header(), self._format_body()): + cell.val = self._format_value(cell.val) + yield cell + + @doc(storage_options=_shared_docs["storage_options"]) + def write( + self, + writer, + sheet_name: str = "Sheet1", + startrow: int = 0, + startcol: int = 0, + freeze_panes: tuple[int, int] | None = None, + engine: str | None = None, + storage_options: StorageOptions = None, + ) -> None: + """ + writer : path-like, file-like, or ExcelWriter object + File path or existing ExcelWriter + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame + startrow : + upper left cell row to dump data frame + startcol : + upper left cell column to dump data frame + freeze_panes : tuple of integer (length 2), default None + Specifies the one-based bottommost row and rightmost column that + is to be frozen + engine : string, default None + write engine to use if writer is a path - you can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, + and ``io.excel.xlsm.writer``. + + .. deprecated:: 1.2.0 + + As the `xlwt `__ package is no longer + maintained, the ``xlwt`` engine will be removed in a future + version of pandas. + + {storage_options} + + .. versionadded:: 1.2.0 + """ + from pandas.io.excel import ExcelWriter + + num_rows, num_cols = self.df.shape + if num_rows > self.max_rows or num_cols > self.max_cols: + raise ValueError( + f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} " + f"Max sheet size is: {self.max_rows}, {self.max_cols}" + ) + + formatted_cells = self.get_formatted_cells() + if isinstance(writer, ExcelWriter): + need_save = False + else: + # error: Cannot instantiate abstract class 'ExcelWriter' with abstract + # attributes 'engine', 'save', 'supported_extensions' and 'write_cells' + writer = ExcelWriter( # type: ignore[abstract] + writer, engine=engine, storage_options=storage_options + ) + need_save = True + + try: + writer._write_cells( + formatted_cells, + sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + ) + finally: + # make sure to close opened file handles + if need_save: + writer.close() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py new file mode 100644 index 00000000..ff631a95 --- /dev/null +++ b/pandas/io/formats/format.py @@ -0,0 +1,2183 @@ +""" +Internal module for formatting output data in csv, html, xml, +and latex files. This module also applies to display formatting. +""" +from __future__ import annotations + +from contextlib import contextmanager +from csv import ( + QUOTE_NONE, + QUOTE_NONNUMERIC, +) +import decimal +from functools import partial +from io import StringIO +import math +import re +from shutil import get_terminal_size +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Final, + Hashable, + Iterable, + Iterator, + List, + Mapping, + Sequence, + cast, +) +from unicodedata import east_asian_width + +import numpy as np + +from pandas._config.config import ( + get_option, + set_option, +) + +from pandas._libs import lib +from pandas._libs.missing import NA +from pandas._libs.tslibs import ( + NaT, + Timedelta, + Timestamp, + get_unit_from_dtype, + iNaT, + periods_per_day, +) +from pandas._libs.tslibs.nattype import NaTType +from pandas._typing import ( + ArrayLike, + Axes, + ColspaceArgType, + ColspaceType, + CompressionOptions, + FilePath, + FloatFormatType, + FormattersType, + IndexLabel, + StorageOptions, + WriteBuffer, +) +from pandas.util._decorators import deprecate_kwarg + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + TimedeltaArray, +) +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.indexes.api import ( + Index, + MultiIndex, + PeriodIndex, + ensure_index, +) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.reshape.concat import concat + +from pandas.io.common import ( + check_parent_directory, + stringify_path, +) +from pandas.io.formats.printing import ( + adjoin, + justify, + pprint_thing, +) + +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + + +common_docstring: Final = """ + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : sequence, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : %(col_space_type)s, optional + %(col_space)s. + header : %(header_type)s, optional + %(header)s. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + + .. versionchanged:: 1.2.0 + + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + """ + +_VALID_JUSTIFY_PARAMETERS = ( + "left", + "right", + "center", + "justify", + "justify-all", + "start", + "end", + "inherit", + "match-parent", + "initial", + "unset", +) + +return_docstring: Final = """ + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. + """ + + +class CategoricalFormatter: + def __init__( + self, + categorical: Categorical, + buf: IO[str] | None = None, + length: bool = True, + na_rep: str = "NaN", + footer: bool = True, + ) -> None: + self.categorical = categorical + self.buf = buf if buf is not None else StringIO("") + self.na_rep = na_rep + self.length = length + self.footer = footer + self.quoting = QUOTE_NONNUMERIC + + def _get_footer(self) -> str: + footer = "" + + if self.length: + if footer: + footer += ", " + footer += f"Length: {len(self.categorical)}" + + level_info = self.categorical._repr_categories_info() + + # Levels are added in a newline + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_values(self) -> list[str]: + return format_array( + self.categorical._internal_get_values(), + None, + float_format=None, + na_rep=self.na_rep, + quoting=self.quoting, + ) + + def to_string(self) -> str: + categorical = self.categorical + + if len(categorical) == 0: + if self.footer: + return self._get_footer() + else: + return "" + + fmt_values = self._get_formatted_values() + + fmt_values = [i.strip() for i in fmt_values] + values = ", ".join(fmt_values) + result = ["[" + values + "]"] + if self.footer: + footer = self._get_footer() + if footer: + result.append(footer) + + return str("\n".join(result)) + + +class SeriesFormatter: + def __init__( + self, + series: Series, + buf: IO[str] | None = None, + length: bool | str = True, + header: bool = True, + index: bool = True, + na_rep: str = "NaN", + name: bool = False, + float_format: str | None = None, + dtype: bool = True, + max_rows: int | None = None, + min_rows: int | None = None, + ) -> None: + self.series = series + self.buf = buf if buf is not None else StringIO() + self.name = name + self.na_rep = na_rep + self.header = header + self.length = length + self.index = index + self.max_rows = max_rows + self.min_rows = min_rows + + if float_format is None: + float_format = get_option("display.float_format") + self.float_format = float_format + self.dtype = dtype + self.adj = get_adjustment() + + self._chk_truncate() + + def _chk_truncate(self) -> None: + self.tr_row_num: int | None + + min_rows = self.min_rows + max_rows = self.max_rows + # truncation determined by max_rows, actual truncated number of rows + # used below by min_rows + is_truncated_vertically = max_rows and (len(self.series) > max_rows) + series = self.series + if is_truncated_vertically: + max_rows = cast(int, max_rows) + if min_rows: + # if min_rows is set (not None or 0), set max_rows to minimum + # of both + max_rows = min(min_rows, max_rows) + if max_rows == 1: + row_num = max_rows + series = series.iloc[:max_rows] + else: + row_num = max_rows // 2 + series = concat((series.iloc[:row_num], series.iloc[-row_num:])) + self.tr_row_num = row_num + else: + self.tr_row_num = None + self.tr_series = series + self.is_truncated_vertically = is_truncated_vertically + + def _get_footer(self) -> str: + name = self.series.name + footer = "" + + if getattr(self.series.index, "freq", None) is not None: + assert isinstance( + self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ) + footer += f"Freq: {self.series.index.freqstr}" + + if self.name is not False and name is not None: + if footer: + footer += ", " + + series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) + footer += f"Name: {series_name}" + + if self.length is True or ( + self.length == "truncate" and self.is_truncated_vertically + ): + if footer: + footer += ", " + footer += f"Length: {len(self.series)}" + + if self.dtype is not False and self.dtype is not None: + dtype_name = getattr(self.tr_series.dtype, "name", None) + if dtype_name: + if footer: + footer += ", " + footer += f"dtype: {pprint_thing(dtype_name)}" + + # level infos are added to the end and in a new line, like it is done + # for Categoricals + if is_categorical_dtype(self.tr_series.dtype): + level_info = self.tr_series._values._repr_categories_info() + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_index(self) -> tuple[list[str], bool]: + index = self.tr_series.index + + if isinstance(index, MultiIndex): + have_header = any(name for name in index.names) + fmt_index = index.format(names=True) + else: + have_header = index.name is not None + fmt_index = index.format(name=True) + return fmt_index, have_header + + def _get_formatted_values(self) -> list[str]: + return format_array( + self.tr_series._values, + None, + float_format=self.float_format, + na_rep=self.na_rep, + leading_space=self.index, + ) + + def to_string(self) -> str: + series = self.tr_series + footer = self._get_footer() + + if len(series) == 0: + return f"{type(self.series).__name__}([], {footer})" + + fmt_index, have_header = self._get_formatted_index() + fmt_values = self._get_formatted_values() + + if self.is_truncated_vertically: + n_header_rows = 0 + row_num = self.tr_row_num + row_num = cast(int, row_num) + width = self.adj.len(fmt_values[row_num - 1]) + if width > 3: + dot_str = "..." + else: + dot_str = ".." + # Series uses mode=center because it has single value columns + # DataFrame uses mode=left + dot_str = self.adj.justify([dot_str], width, mode="center")[0] + fmt_values.insert(row_num + n_header_rows, dot_str) + fmt_index.insert(row_num + 1, "") + + if self.index: + result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) + else: + result = self.adj.adjoin(3, fmt_values) + + if self.header and have_header: + result = fmt_index[0] + "\n" + result + + if footer: + result += "\n" + footer + + return str("".join(result)) + + +class TextAdjustment: + def __init__(self) -> None: + self.encoding = get_option("display.encoding") + + def len(self, text: str) -> int: + return len(text) + + def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: + return justify(texts, max_len, mode=mode) + + def adjoin(self, space: int, *lists, **kwargs) -> str: + return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) + + +class EastAsianTextAdjustment(TextAdjustment): + def __init__(self) -> None: + super().__init__() + if get_option("display.unicode.ambiguous_as_wide"): + self.ambiguous_width = 2 + else: + self.ambiguous_width = 1 + + # Definition of East Asian Width + # https://unicode.org/reports/tr11/ + # Ambiguous width can be changed by option + self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} + + def len(self, text: str) -> int: + """ + Calculate display width considering unicode East Asian Width + """ + if not isinstance(text, str): + return len(text) + + return sum( + self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text + ) + + def justify( + self, texts: Iterable[str], max_len: int, mode: str = "right" + ) -> list[str]: + # re-calculate padding space per str considering East Asian Width + def _get_pad(t): + return max_len - self.len(t) + len(t) + + if mode == "left": + return [x.ljust(_get_pad(x)) for x in texts] + elif mode == "center": + return [x.center(_get_pad(x)) for x in texts] + else: + return [x.rjust(_get_pad(x)) for x in texts] + + +def get_adjustment() -> TextAdjustment: + use_east_asian_width = get_option("display.unicode.east_asian_width") + if use_east_asian_width: + return EastAsianTextAdjustment() + else: + return TextAdjustment() + + +def get_dataframe_repr_params() -> dict[str, Any]: + """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string. + + Supplying these parameters to DataFrame.to_string is equivalent to calling + ``repr(DataFrame)``. This is useful if you want to adjust the repr output. + + .. versionadded:: 1.4.0 + + Example + ------- + >>> import pandas as pd + >>> + >>> df = pd.DataFrame([[1, 2], [3, 4]]) + >>> repr_params = pd.io.formats.format.get_dataframe_repr_params() + >>> repr(df) == df.to_string(**repr_params) + True + """ + from pandas.io.formats import console + + if get_option("display.expand_frame_repr"): + line_width, _ = console.get_console_size() + else: + line_width = None + return { + "max_rows": get_option("display.max_rows"), + "min_rows": get_option("display.min_rows"), + "max_cols": get_option("display.max_columns"), + "max_colwidth": get_option("display.max_colwidth"), + "show_dimensions": get_option("display.show_dimensions"), + "line_width": line_width, + } + + +def get_series_repr_params() -> dict[str, Any]: + """Get the parameters used to repr(Series) calls using Series.to_string. + + Supplying these parameters to Series.to_string is equivalent to calling + ``repr(series)``. This is useful if you want to adjust the series repr output. + + .. versionadded:: 1.4.0 + + Example + ------- + >>> import pandas as pd + >>> + >>> ser = pd.Series([1, 2, 3, 4]) + >>> repr_params = pd.io.formats.format.get_series_repr_params() + >>> repr(ser) == ser.to_string(**repr_params) + True + """ + width, height = get_terminal_size() + max_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.max_rows") + ) + min_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.min_rows") + ) + + return { + "name": True, + "dtype": True, + "min_rows": min_rows, + "max_rows": max_rows, + "length": get_option("display.show_dimensions"), + } + + +class DataFrameFormatter: + """Class for processing dataframe formatting options and data.""" + + __doc__ = __doc__ if __doc__ else "" + __doc__ += common_docstring + return_docstring + + def __init__( + self, + frame: DataFrame, + columns: Sequence[Hashable] | None = None, + col_space: ColspaceArgType | None = None, + header: bool | Sequence[str] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: FormattersType | None = None, + justify: str | None = None, + float_format: FloatFormatType | None = None, + sparsify: bool | None = None, + index_names: bool = True, + max_rows: int | None = None, + min_rows: int | None = None, + max_cols: int | None = None, + show_dimensions: bool | str = False, + decimal: str = ".", + bold_rows: bool = False, + escape: bool = True, + ) -> None: + self.frame = frame + self.columns = self._initialize_columns(columns) + self.col_space = self._initialize_colspace(col_space) + self.header = header + self.index = index + self.na_rep = na_rep + self.formatters = self._initialize_formatters(formatters) + self.justify = self._initialize_justify(justify) + self.float_format = float_format + self.sparsify = self._initialize_sparsify(sparsify) + self.show_index_names = index_names + self.decimal = decimal + self.bold_rows = bold_rows + self.escape = escape + self.max_rows = max_rows + self.min_rows = min_rows + self.max_cols = max_cols + self.show_dimensions = show_dimensions + + self.max_cols_fitted = self._calc_max_cols_fitted() + self.max_rows_fitted = self._calc_max_rows_fitted() + + self.tr_frame = self.frame + self.truncate() + self.adj = get_adjustment() + + def get_strcols(self) -> list[list[str]]: + """ + Render a DataFrame to a list of columns (as lists of strings). + """ + strcols = self._get_strcols_without_index() + + if self.index: + str_index = self._get_formatted_index(self.tr_frame) + strcols.insert(0, str_index) + + return strcols + + @property + def should_show_dimensions(self) -> bool: + return self.show_dimensions is True or ( + self.show_dimensions == "truncate" and self.is_truncated + ) + + @property + def is_truncated(self) -> bool: + return bool(self.is_truncated_horizontally or self.is_truncated_vertically) + + @property + def is_truncated_horizontally(self) -> bool: + return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted)) + + @property + def is_truncated_vertically(self) -> bool: + return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted)) + + @property + def dimensions_info(self) -> str: + return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]" + + @property + def has_index_names(self) -> bool: + return _has_names(self.frame.index) + + @property + def has_column_names(self) -> bool: + return _has_names(self.frame.columns) + + @property + def show_row_idx_names(self) -> bool: + return all((self.has_index_names, self.index, self.show_index_names)) + + @property + def show_col_idx_names(self) -> bool: + return all((self.has_column_names, self.show_index_names, self.header)) + + @property + def max_rows_displayed(self) -> int: + return min(self.max_rows or len(self.frame), len(self.frame)) + + def _initialize_sparsify(self, sparsify: bool | None) -> bool: + if sparsify is None: + return get_option("display.multi_sparse") + return sparsify + + def _initialize_formatters( + self, formatters: FormattersType | None + ) -> FormattersType: + if formatters is None: + return {} + elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict): + return formatters + else: + raise ValueError( + f"Formatters length({len(formatters)}) should match " + f"DataFrame number of columns({len(self.frame.columns)})" + ) + + def _initialize_justify(self, justify: str | None) -> str: + if justify is None: + return get_option("display.colheader_justify") + else: + return justify + + def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index: + if columns is not None: + # GH 47231 - columns doesn't have to be `Sequence[str]` + # Will fix in later PR + cols = ensure_index(cast(Axes, columns)) + self.frame = self.frame[cols] + return cols + else: + return self.frame.columns + + def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType: + result: ColspaceType + + if col_space is None: + result = {} + elif isinstance(col_space, (int, str)): + result = {"": col_space} + result.update({column: col_space for column in self.frame.columns}) + elif isinstance(col_space, Mapping): + for column in col_space.keys(): + if column not in self.frame.columns and column != "": + raise ValueError( + f"Col_space is defined for an unknown column: {column}" + ) + result = col_space + else: + if len(self.frame.columns) != len(col_space): + raise ValueError( + f"Col_space length({len(col_space)}) should match " + f"DataFrame number of columns({len(self.frame.columns)})" + ) + result = dict(zip(self.frame.columns, col_space)) + return result + + def _calc_max_cols_fitted(self) -> int | None: + """Number of columns fitting the screen.""" + if not self._is_in_terminal(): + return self.max_cols + + width, _ = get_terminal_size() + if self._is_screen_narrow(width): + return width + else: + return self.max_cols + + def _calc_max_rows_fitted(self) -> int | None: + """Number of rows with data fitting the screen.""" + max_rows: int | None + + if self._is_in_terminal(): + _, height = get_terminal_size() + if self.max_rows == 0: + # rows available to fill with actual data + return height - self._get_number_of_auxillary_rows() + + if self._is_screen_short(height): + max_rows = height + else: + max_rows = self.max_rows + else: + max_rows = self.max_rows + + return self._adjust_max_rows(max_rows) + + def _adjust_max_rows(self, max_rows: int | None) -> int | None: + """Adjust max_rows using display logic. + + See description here: + https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options + + GH #37359 + """ + if max_rows: + if (len(self.frame) > max_rows) and self.min_rows: + # if truncated, set max_rows showed to min_rows + max_rows = min(self.min_rows, max_rows) + return max_rows + + def _is_in_terminal(self) -> bool: + """Check if the output is to be shown in terminal.""" + return bool(self.max_cols == 0 or self.max_rows == 0) + + def _is_screen_narrow(self, max_width) -> bool: + return bool(self.max_cols == 0 and len(self.frame.columns) > max_width) + + def _is_screen_short(self, max_height) -> bool: + return bool(self.max_rows == 0 and len(self.frame) > max_height) + + def _get_number_of_auxillary_rows(self) -> int: + """Get number of rows occupied by prompt, dots and dimension info.""" + dot_row = 1 + prompt_row = 1 + num_rows = dot_row + prompt_row + + if self.show_dimensions: + num_rows += len(self.dimensions_info.splitlines()) + + if self.header: + num_rows += 1 + + return num_rows + + def truncate(self) -> None: + """ + Check whether the frame should be truncated. If so, slice the frame up. + """ + if self.is_truncated_horizontally: + self._truncate_horizontally() + + if self.is_truncated_vertically: + self._truncate_vertically() + + def _truncate_horizontally(self) -> None: + """Remove columns, which are not to be displayed and adjust formatters. + + Attributes affected: + - tr_frame + - formatters + - tr_col_num + """ + assert self.max_cols_fitted is not None + col_num = self.max_cols_fitted // 2 + if col_num >= 1: + left = self.tr_frame.iloc[:, :col_num] + right = self.tr_frame.iloc[:, -col_num:] + self.tr_frame = concat((left, right), axis=1) + + # truncate formatter + if isinstance(self.formatters, (list, tuple)): + self.formatters = [ + *self.formatters[:col_num], + *self.formatters[-col_num:], + ] + else: + col_num = cast(int, self.max_cols) + self.tr_frame = self.tr_frame.iloc[:, :col_num] + self.tr_col_num = col_num + + def _truncate_vertically(self) -> None: + """Remove rows, which are not to be displayed. + + Attributes affected: + - tr_frame + - tr_row_num + """ + assert self.max_rows_fitted is not None + row_num = self.max_rows_fitted // 2 + if row_num >= 1: + head = self.tr_frame.iloc[:row_num, :] + tail = self.tr_frame.iloc[-row_num:, :] + self.tr_frame = concat((head, tail)) + else: + row_num = cast(int, self.max_rows) + self.tr_frame = self.tr_frame.iloc[:row_num, :] + self.tr_row_num = row_num + + def _get_strcols_without_index(self) -> list[list[str]]: + strcols: list[list[str]] = [] + + if not is_list_like(self.header) and not self.header: + for i, c in enumerate(self.tr_frame): + fmt_values = self.format_col(i) + fmt_values = _make_fixed_width( + strings=fmt_values, + justify=self.justify, + minimum=int(self.col_space.get(c, 0)), + adj=self.adj, + ) + strcols.append(fmt_values) + return strcols + + if is_list_like(self.header): + # cast here since can't be bool if is_list_like + self.header = cast(List[str], self.header) + if len(self.header) != len(self.columns): + raise ValueError( + f"Writing {len(self.columns)} cols " + f"but got {len(self.header)} aliases" + ) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(self.tr_frame) + + if self.show_row_idx_names: + for x in str_columns: + x.append("") + + for i, c in enumerate(self.tr_frame): + cheader = str_columns[i] + header_colwidth = max( + int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) + ) + fmt_values = self.format_col(i) + fmt_values = _make_fixed_width( + fmt_values, self.justify, minimum=header_colwidth, adj=self.adj + ) + + max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + strcols.append(cheader + fmt_values) + + return strcols + + def format_col(self, i: int) -> list[str]: + frame = self.tr_frame + formatter = self._get_formatter(i) + return format_array( + frame.iloc[:, i]._values, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + space=self.col_space.get(frame.columns[i]), + decimal=self.decimal, + leading_space=self.index, + ) + + def _get_formatter(self, i: str | int) -> Callable | None: + if isinstance(self.formatters, (list, tuple)): + if is_integer(i): + i = cast(int, i) + return self.formatters[i] + else: + return None + else: + if is_integer(i) and i not in self.columns: + i = self.columns[i] + return self.formatters.get(i, None) + + def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: + from pandas.core.indexes.multi import sparsify_labels + + columns = frame.columns + + if isinstance(columns, MultiIndex): + fmt_columns = columns.format(sparsify=False, adjoin=False) + fmt_columns = list(zip(*fmt_columns)) + dtypes = self.frame.dtypes._values + + # if we have a Float level, they don't use leading space at all + restrict_formatting = any(level.is_floating for level in columns.levels) + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + + def space_format(x, y): + if ( + y not in self.formatters + and need_leadsp[x] + and not restrict_formatting + ): + return " " + y + return y + + str_columns = list( + zip(*([space_format(x, y) for y in x] for x in fmt_columns)) + ) + if self.sparsify and len(str_columns): + str_columns = sparsify_labels(str_columns) + + str_columns = [list(x) for x in zip(*str_columns)] + else: + fmt_columns = columns.format() + dtypes = self.frame.dtypes + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + str_columns = [ + [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] + for i, x in enumerate(fmt_columns) + ] + # self.str_columns = str_columns + return str_columns + + def _get_formatted_index(self, frame: DataFrame) -> list[str]: + # Note: this is only used by to_string() and to_latex(), not by + # to_html(). so safe to cast col_space here. + col_space = {k: cast(int, v) for k, v in self.col_space.items()} + index = frame.index + columns = frame.columns + fmt = self._get_formatter("__index__") + + if isinstance(index, MultiIndex): + fmt_index = index.format( + sparsify=self.sparsify, + adjoin=False, + names=self.show_row_idx_names, + formatter=fmt, + ) + else: + fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)] + + fmt_index = [ + tuple( + _make_fixed_width( + list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj + ) + ) + for x in fmt_index + ] + + adjoined = self.adj.adjoin(1, *fmt_index).split("\n") + + # empty space for columns + if self.show_col_idx_names: + col_header = [str(x) for x in self._get_column_name_list()] + else: + col_header = [""] * columns.nlevels + + if self.header: + return col_header + adjoined + else: + return adjoined + + def _get_column_name_list(self) -> list[Hashable]: + names: list[Hashable] = [] + columns = self.frame.columns + if isinstance(columns, MultiIndex): + names.extend("" if name is None else name for name in columns.names) + else: + names.append("" if columns.name is None else columns.name) + return names + + +class DataFrameRenderer: + """Class for creating dataframe output in multiple formats. + + Called in pandas.core.generic.NDFrame: + - to_csv + - to_latex + + Called in pandas.core.frame.DataFrame: + - to_html + - to_string + + Parameters + ---------- + fmt : DataFrameFormatter + Formatter with the formatting options. + """ + + def __init__(self, fmt: DataFrameFormatter) -> None: + self.fmt = fmt + + def to_latex( + self, + buf: FilePath | WriteBuffer[str] | None = None, + column_format: str | None = None, + longtable: bool = False, + encoding: str | None = None, + multicolumn: bool = False, + multicolumn_format: str | None = None, + multirow: bool = False, + caption: str | tuple[str, str] | None = None, + label: str | None = None, + position: str | None = None, + ) -> str | None: + """ + Render a DataFrame to a LaTeX tabular/longtable environment output. + """ + from pandas.io.formats.latex import LatexFormatter + + latex_formatter = LatexFormatter( + self.fmt, + longtable=longtable, + column_format=column_format, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + position=position, + ) + string = latex_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, + classes: str | list | tuple | None = None, + notebook: bool = False, + border: int | bool | None = None, + table_id: str | None = None, + render_links: bool = False, + ) -> str | None: + """ + Render a DataFrame to a html table. + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + encoding : str, default “utf-8” + Set character encoding. + classes : str or list-like + classes to include in the `class` attribute of the opening + ``
    `` tag, in addition to the default "dataframe". + notebook : {True, False}, optional, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + ``
    `` tag. Default ``pd.options.display.html.border``. + table_id : str, optional + A css id is included in the opening `
    ` tag if specified. + render_links : bool, default False + Convert URLs to HTML links. + """ + from pandas.io.formats.html import ( + HTMLFormatter, + NotebookFormatter, + ) + + Klass = NotebookFormatter if notebook else HTMLFormatter + + html_formatter = Klass( + self.fmt, + classes=classes, + border=border, + table_id=table_id, + render_links=render_links, + ) + string = html_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + def to_string( + self, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, + line_width: int | None = None, + ) -> str | None: + """ + Render a DataFrame to a console-friendly tabular output. + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + encoding: str, default “utf-8” + Set character encoding. + line_width : int, optional + Width to wrap a line in characters. + """ + from pandas.io.formats.string import StringFormatter + + string_formatter = StringFormatter(self.fmt, line_width=line_width) + string = string_formatter.to_string() + return save_to_buffer(string, buf=buf, encoding=encoding) + + @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") + def to_csv( + self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + encoding: str | None = None, + sep: str = ",", + columns: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, + mode: str = "w", + compression: CompressionOptions = "infer", + quoting: int | None = None, + quotechar: str = '"', + lineterminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, + doublequote: bool = True, + escapechar: str | None = None, + errors: str = "strict", + storage_options: StorageOptions = None, + ) -> str | None: + """ + Render dataframe as comma-separated file. + """ + from pandas.io.formats.csvs import CSVFormatter + + if path_or_buf is None: + created_buffer = True + path_or_buf = StringIO() + else: + created_buffer = False + + csv_formatter = CSVFormatter( + path_or_buf=path_or_buf, + lineterminator=lineterminator, + sep=sep, + encoding=encoding, + errors=errors, + compression=compression, + quoting=quoting, + cols=columns, + index_label=index_label, + mode=mode, + chunksize=chunksize, + quotechar=quotechar, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + storage_options=storage_options, + formatter=self.fmt, + ) + csv_formatter.save() + + if created_buffer: + assert isinstance(path_or_buf, StringIO) + content = path_or_buf.getvalue() + path_or_buf.close() + return content + + return None + + +def save_to_buffer( + string: str, + buf: FilePath | WriteBuffer[str] | None = None, + encoding: str | None = None, +) -> str | None: + """ + Perform serialization. Write to buf or return as string if buf is None. + """ + with get_buffer(buf, encoding=encoding) as f: + f.write(string) + if buf is None: + # error: "WriteBuffer[str]" has no attribute "getvalue" + return f.getvalue() # type: ignore[attr-defined] + return None + + +@contextmanager +def get_buffer( + buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None +) -> Iterator[WriteBuffer[str]] | Iterator[StringIO]: + """ + Context manager to open, yield and close buffer for filenames or Path-like + objects, otherwise yield buf unchanged. + """ + if buf is not None: + buf = stringify_path(buf) + else: + buf = StringIO() + + if encoding is None: + encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") + + if hasattr(buf, "write"): + yield buf + elif isinstance(buf, str): + check_parent_directory(str(buf)) + with open(buf, "w", encoding=encoding, newline="") as f: + # GH#30034 open instead of codecs.open prevents a file leak + # if we have an invalid encoding argument. + # newline="" is needed to roundtrip correctly on + # windows test_to_latex_filename + yield f + else: + raise TypeError("buf is not a file name and it has no write method") + + +# ---------------------------------------------------------------------- +# Array formatters + + +def format_array( + values: Any, + formatter: Callable | None, + float_format: FloatFormatType | None = None, + na_rep: str = "NaN", + digits: int | None = None, + space: str | int | None = None, + justify: str = "right", + decimal: str = ".", + leading_space: bool | None = True, + quoting: int | None = None, +) -> list[str]: + """ + Format an array for printing. + + Parameters + ---------- + values + formatter + float_format + na_rep + digits + space + justify + decimal + leading_space : bool, optional, default True + Whether the array should be formatted with a leading space. + When an array as a column of a Series or DataFrame, we do want + the leading space to pad between columns. + + When formatting an Index subclass + (e.g. IntervalIndex._format_native_types), we don't want the + leading space since it should be left-aligned. + + Returns + ------- + List[str] + """ + fmt_klass: type[GenericArrayFormatter] + if is_datetime64_dtype(values.dtype): + fmt_klass = Datetime64Formatter + elif isinstance(values.dtype, DatetimeTZDtype): + fmt_klass = Datetime64TZFormatter + elif is_timedelta64_dtype(values.dtype): + fmt_klass = Timedelta64Formatter + elif is_extension_array_dtype(values.dtype): + fmt_klass = ExtensionArrayFormatter + elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): + fmt_klass = FloatArrayFormatter + elif is_integer_dtype(values.dtype): + fmt_klass = IntArrayFormatter + else: + fmt_klass = GenericArrayFormatter + + if space is None: + space = 12 + + if float_format is None: + float_format = get_option("display.float_format") + + if digits is None: + digits = get_option("display.precision") + + fmt_obj = fmt_klass( + values, + digits=digits, + na_rep=na_rep, + float_format=float_format, + formatter=formatter, + space=space, + justify=justify, + decimal=decimal, + leading_space=leading_space, + quoting=quoting, + ) + + return fmt_obj.get_result() + + +class GenericArrayFormatter: + def __init__( + self, + values: Any, + digits: int = 7, + formatter: Callable | None = None, + na_rep: str = "NaN", + space: str | int = 12, + float_format: FloatFormatType | None = None, + justify: str = "right", + decimal: str = ".", + quoting: int | None = None, + fixed_width: bool = True, + leading_space: bool | None = True, + ) -> None: + self.values = values + self.digits = digits + self.na_rep = na_rep + self.space = space + self.formatter = formatter + self.float_format = float_format + self.justify = justify + self.decimal = decimal + self.quoting = quoting + self.fixed_width = fixed_width + self.leading_space = leading_space + + def get_result(self) -> list[str]: + fmt_values = self._format_strings() + return _make_fixed_width(fmt_values, self.justify) + + def _format_strings(self) -> list[str]: + if self.float_format is None: + float_format = get_option("display.float_format") + if float_format is None: + precision = get_option("display.precision") + float_format = lambda x: _trim_zeros_single_float( + f"{x: .{precision:d}f}" + ) + else: + float_format = self.float_format + + if self.formatter is not None: + formatter = self.formatter + else: + quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE + formatter = partial( + pprint_thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=quote_strings, + ) + + def _format(x): + if self.na_rep is not None and is_scalar(x) and isna(x): + try: + # try block for np.isnat specifically + # determine na_rep if x is None or NaT-like + if x is None: + return "None" + elif x is NA: + return str(NA) + elif x is NaT or np.isnat(x): + return "NaT" + except (TypeError, ValueError): + # np.isnat only handles datetime or timedelta objects + pass + return self.na_rep + elif isinstance(x, PandasObject): + return str(x) + else: + # object dtype + return str(formatter(x)) + + vals = extract_array(self.values, extract_numpy=True) + if not isinstance(vals, np.ndarray): + raise TypeError( + "ExtensionArray formatting should use ExtensionArrayFormatter" + ) + inferred = lib.map_infer(vals, is_float) + is_float_type = ( + inferred + # vals may have 2 or more dimensions + & np.all(notna(vals), axis=tuple(range(1, len(vals.shape)))) + ) + leading_space = self.leading_space + if leading_space is None: + leading_space = is_float_type.any() + + fmt_values = [] + for i, v in enumerate(vals): + if not is_float_type[i] and leading_space: + fmt_values.append(f" {_format(v)}") + elif is_float_type[i]: + fmt_values.append(float_format(v)) + else: + if leading_space is False: + # False specifically, so that the default is + # to include a space if we get here. + tpl = "{v}" + else: + tpl = " {v}" + fmt_values.append(tpl.format(v=_format(v))) + + return fmt_values + + +class FloatArrayFormatter(GenericArrayFormatter): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + # float_format is expected to be a string + # formatter should be used to pass a function + if self.float_format is not None and self.formatter is None: + # GH21625, GH22270 + self.fixed_width = False + if callable(self.float_format): + self.formatter = self.float_format + self.float_format = None + + def _value_formatter( + self, + float_format: FloatFormatType | None = None, + threshold: float | None = None, + ) -> Callable: + """Returns a function to be applied on each value to format it""" + # the float_format parameter supersedes self.float_format + if float_format is None: + float_format = self.float_format + + # we are going to compose different functions, to first convert to + # a string, then replace the decimal symbol, and finally chop according + # to the threshold + + # when there is no float_format, we use str instead of '%g' + # because str(0.0) = '0.0' while '%g' % 0.0 = '0' + if float_format: + + def base_formatter(v): + assert float_format is not None # for mypy + # error: "str" not callable + # error: Unexpected keyword argument "value" for "__call__" of + # "EngFormatter" + return ( + float_format(value=v) # type: ignore[operator,call-arg] + if notna(v) + else self.na_rep + ) + + else: + + def base_formatter(v): + return str(v) if notna(v) else self.na_rep + + if self.decimal != ".": + + def decimal_formatter(v): + return base_formatter(v).replace(".", self.decimal, 1) + + else: + decimal_formatter = base_formatter + + if threshold is None: + return decimal_formatter + + def formatter(value): + if notna(value): + if abs(value) > threshold: + return decimal_formatter(value) + else: + return decimal_formatter(0.0) + else: + return self.na_rep + + return formatter + + def get_result_as_array(self) -> np.ndarray: + """ + Returns the float values converted into strings using + the parameters given at initialisation, as a numpy array + """ + + def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): + mask = isna(values) + formatted = np.array( + [ + formatter(val) if not m else na_rep + for val, m in zip(values.ravel(), mask.ravel()) + ] + ).reshape(values.shape) + return formatted + + if self.formatter is not None: + return format_with_na_rep(self.values, self.formatter, self.na_rep) + + if self.fixed_width: + threshold = get_option("display.chop_threshold") + else: + threshold = None + + # if we have a fixed_width, we'll need to try different float_format + def format_values_with(float_format): + formatter = self._value_formatter(float_format, threshold) + + # default formatter leaves a space to the left when formatting + # floats, must be consistent for left-justifying NaNs (GH #25061) + if self.justify == "left": + na_rep = " " + self.na_rep + else: + na_rep = self.na_rep + + # separate the wheat from the chaff + values = self.values + is_complex = is_complex_dtype(values) + values = format_with_na_rep(values, formatter, na_rep) + + if self.fixed_width: + if is_complex: + result = _trim_zeros_complex(values, self.decimal) + else: + result = _trim_zeros_float(values, self.decimal) + return np.asarray(result, dtype="object") + + return values + + # There is a special default string when we are fixed-width + # The default is otherwise to use str instead of a formatting string + float_format: FloatFormatType | None + if self.float_format is None: + if self.fixed_width: + if self.leading_space is True: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" + float_format = partial(fmt_str.format, digits=self.digits) + else: + float_format = self.float_format + else: + float_format = lambda value: self.float_format % value + + formatted_values = format_values_with(float_format) + + if not self.fixed_width: + return formatted_values + + # we need do convert to engineering format if some values are too small + # and would appear as 0, or if some values are too big and take too + # much space + + if len(formatted_values) > 0: + maxlen = max(len(x) for x in formatted_values) + too_long = maxlen > self.digits + 6 + else: + too_long = False + + with np.errstate(invalid="ignore"): + abs_vals = np.abs(self.values) + # this is pretty arbitrary for now + # large values: more that 8 characters including decimal symbol + # and first digit, hence > 1e6 + has_large_values = (abs_vals > 1e6).any() + has_small_values = ( + (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0) + ).any() + + if has_small_values or (too_long and has_large_values): + if self.leading_space is True: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) + formatted_values = format_values_with(float_format) + + return formatted_values + + def _format_strings(self) -> list[str]: + return list(self.get_result_as_array()) + + +class IntArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> list[str]: + if self.leading_space is False: + formatter_str = lambda x: f"{x:d}".format(x=x) + else: + formatter_str = lambda x: f"{x: d}".format(x=x) + formatter = self.formatter or formatter_str + fmt_values = [formatter(x) for x in self.values] + return fmt_values + + +class Datetime64Formatter(GenericArrayFormatter): + def __init__( + self, + values: np.ndarray | Series | DatetimeIndex | DatetimeArray, + nat_rep: str = "NaT", + date_format: None = None, + **kwargs, + ) -> None: + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.date_format = date_format + + def _format_strings(self) -> list[str]: + """we by definition have DO NOT have a TZ""" + values = self.values + + if not isinstance(values, DatetimeIndex): + values = DatetimeIndex(values) + + if self.formatter is not None and callable(self.formatter): + return [self.formatter(x) for x in values] + + fmt_values = values._data._format_native_types( + na_rep=self.nat_rep, date_format=self.date_format + ) + return fmt_values.tolist() + + +class ExtensionArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> list[str]: + values = extract_array(self.values, extract_numpy=True) + + formatter = self.formatter + if formatter is None: + formatter = values._formatter(boxed=True) + + if isinstance(values, Categorical): + # Categorical is special for now, so that we can preserve tzinfo + array = values._internal_get_values() + else: + array = np.asarray(values) + + fmt_values = format_array( + array, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + digits=self.digits, + space=self.space, + justify=self.justify, + decimal=self.decimal, + leading_space=self.leading_space, + quoting=self.quoting, + ) + return fmt_values + + +def format_percentiles( + percentiles: (np.ndarray | Sequence[float]), +) -> list[str]: + """ + Outputs rounded and formatted percentiles. + + Parameters + ---------- + percentiles : list-like, containing floats from interval [0,1] + + Returns + ------- + formatted : list of strings + + Notes + ----- + Rounding precision is chosen so that: (1) if any two elements of + ``percentiles`` differ, they remain different after rounding + (2) no entry is *rounded* to 0% or 100%. + Any non-integer is always rounded to at least 1 decimal place. + + Examples + -------- + Keeps all entries different after rounding: + + >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) + ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] + + No element is rounded to 0% or 100% (unless already equal to it). + Duplicates are allowed: + + >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) + ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] + """ + percentiles = np.asarray(percentiles) + + # It checks for np.NaN as well + with np.errstate(invalid="ignore"): + if ( + not is_numeric_dtype(percentiles) + or not np.all(percentiles >= 0) + or not np.all(percentiles <= 1) + ): + raise ValueError("percentiles should all be in the interval [0,1]") + + percentiles = 100 * percentiles + + int_idx = np.isclose(percentiles.astype(int), percentiles) + + if np.all(int_idx): + out = percentiles.astype(int).astype(str) + return [i + "%" for i in out] + + unique_pcts = np.unique(percentiles) + to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None + to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None + + # Least precision that keeps percentiles unique after rounding + prec = -np.floor( + np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end))) + ).astype(int) + prec = max(1, prec) + out = np.empty_like(percentiles, dtype=object) + out[int_idx] = percentiles[int_idx].astype(int).astype(str) + + out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) + return [i + "%" for i in out] + + +def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool: + # return a boolean if we are only dates (and don't have a timezone) + if not isinstance(values, Index): + values = values.ravel() + + if not isinstance(values, (DatetimeArray, DatetimeIndex)): + values = DatetimeIndex(values) + + if values.tz is not None: + return False + + values_int = values.asi8 + consider_values = values_int != iNaT + # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" + reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type] + ppd = periods_per_day(reso) + + # TODO: can we reuse is_date_array_normalized? would need a skipna kwd + even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 + if even_days: + return True + return False + + +def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: + if x is NaT: + return nat_rep + + # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ') + # so it already uses string formatting rather than strftime (faster). + return str(x) + + +def _format_datetime64_dateonly( + x: NaTType | Timestamp, + nat_rep: str = "NaT", + date_format: str | None = None, +) -> str: + if isinstance(x, NaTType): + return nat_rep + + if date_format: + return x.strftime(date_format) + else: + # Timestamp._date_repr relies on string formatting (faster than strftime) + return x._date_repr + + +def get_format_datetime64( + is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None +) -> Callable: + """Return a formatter callable taking a datetime64 as input and providing + a string as output""" + + if is_dates_only: + return lambda x: _format_datetime64_dateonly( + x, nat_rep=nat_rep, date_format=date_format + ) + else: + return lambda x: _format_datetime64(x, nat_rep=nat_rep) + + +def get_format_datetime64_from_values( + values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None +) -> str | None: + """given values and a date_format, return a string format""" + if isinstance(values, np.ndarray) and values.ndim > 1: + # We don't actually care about the order of values, and DatetimeIndex + # only accepts 1D values + values = values.ravel() + + ido = is_dates_only(values) + if ido: + # Only dates and no timezone: provide a default format + return date_format or "%Y-%m-%d" + return date_format + + +class Datetime64TZFormatter(Datetime64Formatter): + def _format_strings(self) -> list[str]: + """we by definition have a TZ""" + values = self.values.astype(object) + ido = is_dates_only(values) + formatter = self.formatter or get_format_datetime64( + ido, date_format=self.date_format + ) + fmt_values = [formatter(x) for x in values] + + return fmt_values + + +class Timedelta64Formatter(GenericArrayFormatter): + def __init__( + self, + values: np.ndarray | TimedeltaIndex, + nat_rep: str = "NaT", + box: bool = False, + **kwargs, + ) -> None: + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.box = box + + def _format_strings(self) -> list[str]: + formatter = self.formatter or get_format_timedelta64( + self.values, nat_rep=self.nat_rep, box=self.box + ) + return [formatter(x) for x in self.values] + + +def get_format_timedelta64( + values: np.ndarray | TimedeltaIndex | TimedeltaArray, + nat_rep: str = "NaT", + box: bool = False, +) -> Callable: + """ + Return a formatter function for a range of timedeltas. + These will all have the same format argument + + If box, then show the return in quotes + """ + values_int = values.view(np.int64) + + consider_values = values_int != iNaT + + one_day_nanos = 86400 * 10**9 + # error: Unsupported operand types for % ("ExtensionArray" and "int") + not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator] + # error: Argument 1 to "__call__" of "ufunc" has incompatible type + # "Union[Any, ExtensionArray, ndarray]"; expected + # "Union[Union[int, float, complex, str, bytes, generic], + # Sequence[Union[int, float, complex, str, bytes, generic]], + # Sequence[Sequence[Any]], _SupportsArray]" + both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type] + even_days = both.sum() == 0 + + if even_days: + format = None + else: + format = "long" + + def _formatter(x): + if x is None or (is_scalar(x) and isna(x)): + return nat_rep + + if not isinstance(x, Timedelta): + x = Timedelta(x) + + # Timedelta._repr_base uses string formatting (faster than strftime) + result = x._repr_base(format=format) + if box: + result = f"'{result}'" + return result + + return _formatter + + +def _make_fixed_width( + strings: list[str], + justify: str = "right", + minimum: int | None = None, + adj: TextAdjustment | None = None, +) -> list[str]: + + if len(strings) == 0 or justify == "all": + return strings + + if adj is None: + adjustment = get_adjustment() + else: + adjustment = adj + + max_len = max(adjustment.len(x) for x in strings) + + if minimum is not None: + max_len = max(minimum, max_len) + + conf_max = get_option("display.max_colwidth") + if conf_max is not None and max_len > conf_max: + max_len = conf_max + + def just(x: str) -> str: + if conf_max is not None: + if (conf_max > 3) & (adjustment.len(x) > max_len): + x = x[: max_len - 3] + "..." + return x + + strings = [just(x) for x in strings] + result = adjustment.justify(strings, max_len, mode=justify) + return result + + +def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]: + """ + Separates the real and imaginary parts from the complex number, and + executes the _trim_zeros_float method on each of those. + """ + trimmed = [ + "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal)) + for x in str_complexes + ] + + # pad strings to the length of the longest trimmed string for alignment + lengths = [len(s) for s in trimmed] + max_length = max(lengths) + padded = [ + s[: -((k - 1) // 2 + 1)] # real part + + (max_length - k) // 2 * "0" + + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / - + + s[-((k - 1) // 2) : -1] # imaginary part + + (max_length - k) // 2 * "0" + + s[-1] + for s, k in zip(trimmed, lengths) + ] + return padded + + +def _trim_zeros_single_float(str_float: str) -> str: + """ + Trims trailing zeros after a decimal point, + leaving just one if necessary. + """ + str_float = str_float.rstrip("0") + if str_float.endswith("."): + str_float += "0" + + return str_float + + +def _trim_zeros_float( + str_floats: np.ndarray | list[str], decimal: str = "." +) -> list[str]: + """ + Trims the maximum number of trailing zeros equally from + all numbers containing decimals, leaving just one if + necessary. + """ + trimmed = str_floats + number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") + + def is_number_with_decimal(x): + return re.match(number_regex, x) is not None + + def should_trim(values: np.ndarray | list[str]) -> bool: + """ + Determine if an array of strings should be trimmed. + + Returns True if all numbers containing decimals (defined by the + above regular expression) within the array end in a zero, otherwise + returns False. + """ + numbers = [x for x in values if is_number_with_decimal(x)] + return len(numbers) > 0 and all(x.endswith("0") for x in numbers) + + while should_trim(trimmed): + trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] + + # leave one 0 after the decimal points if need be. + result = [ + x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x + for x in trimmed + ] + return result + + +def _has_names(index: Index) -> bool: + if isinstance(index, MultiIndex): + return com.any_not_none(*index.names) + else: + return index.name is not None + + +class EngFormatter: + """ + Formats float values according to engineering format. + + Based on matplotlib.ticker.EngFormatter + """ + + # The SI engineering prefixes + ENG_PREFIXES = { + -24: "y", + -21: "z", + -18: "a", + -15: "f", + -12: "p", + -9: "n", + -6: "u", + -3: "m", + 0: "", + 3: "k", + 6: "M", + 9: "G", + 12: "T", + 15: "P", + 18: "E", + 21: "Z", + 24: "Y", + } + + def __init__( + self, accuracy: int | None = None, use_eng_prefix: bool = False + ) -> None: + self.accuracy = accuracy + self.use_eng_prefix = use_eng_prefix + + def __call__(self, num: float) -> str: + """ + Formats a number in engineering notation, appending a letter + representing the power of 1000 of the original number. Some examples: + >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True) + >>> format_eng(0) + ' 0' + >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True) + >>> format_eng(1_000_000) + ' 1.0M' + >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False) + >>> format_eng("-1e-6") + '-1.00E-06' + + @param num: the value to represent + @type num: either a numeric value or a string that can be converted to + a numeric value (as per decimal.Decimal constructor) + + @return: engineering formatted string + """ + dnum = decimal.Decimal(str(num)) + + if decimal.Decimal.is_nan(dnum): + return "NaN" + + if decimal.Decimal.is_infinite(dnum): + return "inf" + + sign = 1 + + if dnum < 0: # pragma: no cover + sign = -1 + dnum = -dnum + + if dnum != 0: + pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3)) + else: + pow10 = decimal.Decimal(0) + + pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) + pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) + int_pow10 = int(pow10) + + if self.use_eng_prefix: + prefix = self.ENG_PREFIXES[int_pow10] + else: + if int_pow10 < 0: + prefix = f"E-{-int_pow10:02d}" + else: + prefix = f"E+{int_pow10:02d}" + + mant = sign * dnum / (10**pow10) + + if self.accuracy is None: # pragma: no cover + format_str = "{mant: g}{prefix}" + else: + format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}" + + formatted = format_str.format(mant=mant, prefix=prefix) + + return formatted + + +def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: + """ + Alter default behavior on how float is formatted in DataFrame. + Format float in engineering format. By accuracy, we mean the number of + decimal digits after the floating point. + + See also EngFormatter. + """ + set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) + + +def get_level_lengths( + levels: Any, sentinel: bool | object | str = "" +) -> list[dict[int, int]]: + """ + For each index in each level the function returns lengths of indexes. + + Parameters + ---------- + levels : list of lists + List of values on for level. + sentinel : string, optional + Value which states that no new index starts on there. + + Returns + ------- + Returns list of maps. For each level returns map of indexes (key is index + in row and value is length of index). + """ + if len(levels) == 0: + return [] + + control = [True] * len(levels[0]) + + result = [] + for level in levels: + last_index = 0 + + lengths = {} + for i, key in enumerate(level): + if control[i] and key == sentinel: + pass + else: + control[i] = False + lengths[last_index] = i - last_index + last_index = i + + lengths[last_index] = len(level) - last_index + + result.append(lengths) + + return result + + +def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None: + """ + Appends lines to a buffer. + + Parameters + ---------- + buf + The buffer to write to + lines + The lines to append. + """ + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] + buf.write("\n".join(lines)) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py new file mode 100644 index 00000000..e161c8ad --- /dev/null +++ b/pandas/io/formats/html.py @@ -0,0 +1,634 @@ +""" +Module for formatting output data in HTML. +""" +from __future__ import annotations + +from textwrap import dedent +from typing import ( + Any, + Final, + Hashable, + Iterable, + Mapping, + cast, +) + +from pandas._config import get_option + +from pandas._libs import lib + +from pandas import ( + MultiIndex, + option_context, +) + +from pandas.io.common import is_url +from pandas.io.formats.format import ( + DataFrameFormatter, + get_level_lengths, +) +from pandas.io.formats.printing import pprint_thing + + +class HTMLFormatter: + """ + Internal class for formatting output data in html. + This class is intended for shared functionality between + DataFrame.to_html() and DataFrame._repr_html_(). + Any logic in common with other output formatting methods + should ideally be inherited from classes in format.py + and this class responsible for only producing html markup. + """ + + indent_delta: Final = 2 + + def __init__( + self, + formatter: DataFrameFormatter, + classes: str | list[str] | tuple[str, ...] | None = None, + border: int | bool | None = None, + table_id: str | None = None, + render_links: bool = False, + ) -> None: + self.fmt = formatter + self.classes = classes + + self.frame = self.fmt.frame + self.columns = self.fmt.tr_frame.columns + self.elements: list[str] = [] + self.bold_rows = self.fmt.bold_rows + self.escape = self.fmt.escape + self.show_dimensions = self.fmt.show_dimensions + if border is None or border is True: + border = cast(int, get_option("display.html.border")) + elif not border: + border = None + + self.border = border + self.table_id = table_id + self.render_links = render_links + + self.col_space = { + column: f"{value}px" if isinstance(value, int) else value + for column, value in self.fmt.col_space.items() + } + + def to_string(self) -> str: + lines = self.render() + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] + return "\n".join(lines) + + def render(self) -> list[str]: + self._write_table() + + if self.should_show_dimensions: + by = chr(215) # × + self.write( + f"

    {len(self.frame)} rows {by} {len(self.frame.columns)} columns

    " + ) + + return self.elements + + @property + def should_show_dimensions(self) -> bool: + return self.fmt.should_show_dimensions + + @property + def show_row_idx_names(self) -> bool: + return self.fmt.show_row_idx_names + + @property + def show_col_idx_names(self) -> bool: + return self.fmt.show_col_idx_names + + @property + def row_levels(self) -> int: + if self.fmt.index: + # showing (row) index + return self.frame.index.nlevels + elif self.show_col_idx_names: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # If the row index is not displayed a column of + # blank cells need to be included before the DataFrame values. + return 1 + # not showing (row) index + return 0 + + def _get_columns_formatted_values(self) -> Iterable: + return self.columns + + @property + def is_truncated(self) -> bool: + return self.fmt.is_truncated + + @property + def ncols(self) -> int: + return len(self.fmt.tr_frame.columns) + + def write(self, s: Any, indent: int = 0) -> None: + rs = pprint_thing(s) + self.elements.append(" " * indent + rs) + + def write_th( + self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None + ) -> None: + """ + Method for writing a formatted . This will + cause min-width to be set if there is one. + indent : int, default 0 + The indentation level of the cell. + tags : str, default None + Tags to include in the cell. + + Returns + ------- + A written ", indent) + else: + self.write(f'', indent) + indent += indent_delta + + for i, s in enumerate(line): + val_tag = tags.get(i, None) + if header or (self.bold_rows and i < nindex_levels): + self.write_th(s, indent=indent, header=header, tags=val_tag) + else: + self.write_td(s, indent, tags=val_tag) + + indent -= indent_delta + self.write("", indent) + + def _write_table(self, indent: int = 0) -> None: + _classes = ["dataframe"] # Default class. + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + _classes.append("tex2jax_ignore") + if self.classes is not None: + if isinstance(self.classes, str): + self.classes = self.classes.split() + if not isinstance(self.classes, (list, tuple)): + raise TypeError( + "classes must be a string, list, " + f"or tuple, not {type(self.classes)}" + ) + _classes.extend(self.classes) + + if self.table_id is None: + id_section = "" + else: + id_section = f' id="{self.table_id}"' + + if self.border is None: + border_attr = "" + else: + border_attr = f' border="{self.border}"' + + self.write( + f'', + indent, + ) + + if self.fmt.header or self.show_row_idx_names: + self._write_header(indent + self.indent_delta) + + self._write_body(indent + self.indent_delta) + + self.write("
    cell. + + If col_space is set on the formatter then that is used for + the value of min-width. + + Parameters + ---------- + s : object + The data to be written inside the cell. + header : bool, default False + Set to True if the is for use inside
    cell. + """ + col_space = self.col_space.get(s, None) + + if header and col_space is not None: + tags = tags or "" + tags += f'style="min-width: {col_space};"' + + self._write_cell(s, kind="th", indent=indent, tags=tags) + + def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None: + self._write_cell(s, kind="td", indent=indent, tags=tags) + + def _write_cell( + self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None + ) -> None: + if tags is not None: + start_tag = f"<{kind} {tags}>" + else: + start_tag = f"<{kind}>" + + if self.escape: + # escape & first to prevent double escaping of & + esc = {"&": r"&", "<": r"<", ">": r">"} + else: + esc = {} + + rs = pprint_thing(s, escape_chars=esc).strip() + + if self.render_links and is_url(rs): + rs_unescaped = pprint_thing(s, escape_chars={}).strip() + start_tag += f'' + end_a = "" + else: + end_a = "" + + self.write(f"{start_tag}{rs}{end_a}", indent) + + def write_tr( + self, + line: Iterable, + indent: int = 0, + indent_delta: int = 0, + header: bool = False, + align: str | None = None, + tags: dict[int, str] | None = None, + nindex_levels: int = 0, + ) -> None: + if tags is None: + tags = {} + + if align is None: + self.write("
    ", indent) + + def _write_col_header(self, indent: int) -> None: + row: list[Hashable] + is_truncated_horizontally = self.fmt.is_truncated_horizontally + if isinstance(self.columns, MultiIndex): + template = 'colspan="{span:d}" halign="left"' + + sentinel: lib.NoDefault | bool + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + else: + sentinel = False + levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + for lnum, (records, values) in enumerate(zip(level_lengths, levels)): + if is_truncated_horizontally: + # modify the header lines + ins_col = self.fmt.tr_col_num + if self.fmt.sparsify: + recs_new = {} + # Increment tags after ... col. + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + elif tag + span > ins_col: + recs_new[tag] = span + 1 + if lnum == inner_lvl: + values = ( + values[:ins_col] + ("...",) + values[ins_col:] + ) + else: + # sparse col headers do not receive a ... + values = ( + values[:ins_col] + + (values[ins_col - 1],) + + values[ins_col:] + ) + else: + recs_new[tag] = span + # if ins_col lies between tags, all col headers + # get ... + if tag + span == ins_col: + recs_new[ins_col] = 1 + values = values[:ins_col] + ("...",) + values[ins_col:] + records = recs_new + inner_lvl = len(level_lengths) - 1 + if lnum == inner_lvl: + records[ins_col] = 1 + else: + recs_new = {} + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + else: + recs_new[tag] = span + recs_new[ins_col] = 1 + records = recs_new + values = values[:ins_col] + ["..."] + values[ins_col:] + + # see gh-22579 + # Column Offset Bug with to_html(index=False) with + # MultiIndex Columns and Index. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code + # block below for standard columns index. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class and create a + # _get_formatted_column_labels function for code + # parity with DataFrameFormatter class. + if self.fmt.show_index_names: + name = self.columns.names[lnum] + row.append(pprint_thing(name or "")) + else: + row.append("") + + tags = {} + j = len(row) + for i, v in enumerate(values): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + continue + j += 1 + row.append(v) + self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) + else: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code block + # above for columns MultiIndex. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class. + if self.fmt.show_index_names: + row.append(self.columns.name or "") + else: + row.append("") + row.extend(self._get_columns_formatted_values()) + align = self.fmt.justify + + if is_truncated_horizontally: + ins_col = self.row_levels + self.fmt.tr_col_num + row.insert(ins_col, "...") + + self.write_tr(row, indent, self.indent_delta, header=True, align=align) + + def _write_row_header(self, indent: int) -> None: + is_truncated_horizontally = self.fmt.is_truncated_horizontally + row = [x if x is not None else "" for x in self.frame.index.names] + [""] * ( + self.ncols + (1 if is_truncated_horizontally else 0) + ) + self.write_tr(row, indent, self.indent_delta, header=True) + + def _write_header(self, indent: int) -> None: + self.write("
    `` tag + in addition to automatic (by default) id. + cell_ids : bool, default True + If True, each cell will have an ``id`` attribute in their HTML tag. + The ``id`` takes the form ``T__row_col`` + where ```` is the unique identifier, ```` is the row + number and ```` is the column number. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied, and falls back to + ``pandas.options.styler.format.na_rep``. + + .. versionadded:: 1.0.0 + + uuid_len : int, default 5 + If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate + expressed in hex characters, in range [0, 32]. + + .. versionadded:: 1.2.0 + + decimal : str, optional + Character used as decimal separator for floats, complex and integers. If not + given uses ``pandas.options.styler.format.decimal``. + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. If not + given uses ``pandas.options.styler.format.thousands``. + + .. versionadded:: 1.3.0 + + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. If not given uses ``pandas.options.styler.format.escape``. + + .. versionadded:: 1.3.0 + formatter : str, callable, dict, optional + Object to define how values are displayed. See ``Styler.format``. If not given + uses ``pandas.options.styler.format.formatter``. + + .. versionadded:: 1.4.0 + + Attributes + ---------- + env : Jinja2 jinja2.Environment + template_html : Jinja2 Template + template_html_table : Jinja2 Template + template_html_style : Jinja2 Template + template_latex : Jinja2 Template + loader : Jinja2 Loader + + See Also + -------- + DataFrame.style : Return a Styler object containing methods for building + a styled HTML representation for the DataFrame. + + Notes + ----- + Most styling will be done by passing style functions into + ``Styler.apply`` or ``Styler.applymap``. Style functions should + return values with strings containing CSS ``'attr: value'`` that will + be applied to the indicated cells. + + If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` + to automatically render itself. Otherwise call Styler.to_html to get + the generated HTML. + + CSS classes are attached to the generated HTML + + * Index and Column names include ``index_name`` and ``level`` + where `k` is its level in a MultiIndex + * Index label cells include + + * ``row_heading`` + * ``row`` where `n` is the numeric position of the row + * ``level`` where `k` is the level in a MultiIndex + + * Column label cells include + * ``col_heading`` + * ``col`` where `n` is the numeric position of the column + * ``level`` where `k` is the level in a MultiIndex + + * Blank cells include ``blank`` + * Data cells include ``data`` + * Trimmed cells include ``col_trim`` or ``row_trim``. + + Any, or all, or these classes can be renamed by using the ``css_class_names`` + argument in ``Styler.set_table_classes``, giving a value such as + *{"row": "MY_ROW_CLASS", "col_trim": "", "row_trim": ""}*. + """ + + def __init__( + self, + data: DataFrame | Series, + precision: int | None = None, + table_styles: CSSStyles | None = None, + uuid: str | None = None, + caption: str | tuple | None = None, + table_attributes: str | None = None, + cell_ids: bool = True, + na_rep: str | None = None, + uuid_len: int = 5, + decimal: str | None = None, + thousands: str | None = None, + escape: str | None = None, + formatter: ExtFormatter | None = None, + ) -> None: + super().__init__( + data=data, + uuid=uuid, + uuid_len=uuid_len, + table_styles=table_styles, + table_attributes=table_attributes, + caption=caption, + cell_ids=cell_ids, + precision=precision, + ) + + # validate ordered args + thousands = thousands or get_option("styler.format.thousands") + decimal = decimal or get_option("styler.format.decimal") + na_rep = na_rep or get_option("styler.format.na_rep") + escape = escape or get_option("styler.format.escape") + formatter = formatter or get_option("styler.format.formatter") + # precision is handled by superclass as default for performance + + self.precision = precision # can be removed on set_precision depr cycle + self.na_rep = na_rep # can be removed on set_na_rep depr cycle + self.format( + formatter=formatter, + precision=precision, + na_rep=na_rep, + escape=escape, + decimal=decimal, + thousands=thousands, + ) + + def concat(self, other: Styler) -> Styler: + """ + Append another Styler to combine the output into a single table. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + other : Styler + The other Styler object which has already been styled and formatted. The + data for this Styler must have the same columns as the original, and the + number of index levels must also be the same to render correctly. + + Returns + ------- + self : Styler + + Notes + ----- + The purpose of this method is to extend existing styled dataframes with other + metrics that may be useful but may not conform to the original's structure. + For example adding a sub total row, or displaying metrics such as means, + variance or counts. + + Styles that are applied using the ``apply``, ``applymap``, ``apply_index`` + and ``applymap_index``, and formatting applied with ``format`` and + ``format_index`` will be preserved. + + .. warning:: + Only the output methods ``to_html``, ``to_string`` and ``to_latex`` + currently work with concatenated Stylers. + + Other output methods, including ``to_excel``, **do not** work with + concatenated Stylers. + + The following should be noted: + + - ``table_styles``, ``table_attributes``, ``caption`` and ``uuid`` are all + inherited from the original Styler and not ``other``. + - hidden columns and hidden index levels will be inherited from the + original Styler + - ``css`` will be inherited from the original Styler, and the value of + keys ``data``, ``row_heading`` and ``row`` will be prepended with + ``foot0_``. If more concats are chained, their styles will be prepended + with ``foot1_``, ''foot_2'', etc., and if a concatenated style have + another concatanated style, the second style will be prepended with + ``foot{parent}_foot{child}_``. + + A common use case is to concatenate user defined functions with + ``DataFrame.agg`` or with described statistics via ``DataFrame.describe``. + See examples. + + Examples + -------- + A common use case is adding totals rows, or otherwise, via methods calculated + in ``DataFrame.agg``. + + >>> df = DataFrame([[4, 6], [1, 9], [3, 4], [5, 5], [9,6]], + ... columns=["Mike", "Jim"], + ... index=["Mon", "Tue", "Wed", "Thurs", "Fri"]) + >>> styler = df.style.concat(df.agg(["sum"]).style) # doctest: +SKIP + + .. figure:: ../../_static/style/footer_simple.png + + Since the concatenated object is a Styler the existing functionality can be + used to conditionally format it as well as the original. + + >>> descriptors = df.agg(["sum", "mean", lambda s: s.dtype]) + >>> descriptors.index = ["Total", "Average", "dtype"] + >>> other = (descriptors.style + ... .highlight_max(axis=1, subset=(["Total", "Average"], slice(None))) + ... .format(subset=("Average", slice(None)), precision=2, decimal=",") + ... .applymap(lambda v: "font-weight: bold;")) + >>> styler = (df.style + ... .highlight_max(color="salmon") + ... .set_table_styles([{"selector": ".foot_row0", + ... "props": "border-top: 1px solid black;"}])) + >>> styler.concat(other) # doctest: +SKIP + + .. figure:: ../../_static/style/footer_extended.png + + When ``other`` has fewer index levels than the original Styler it is possible + to extend the index in ``other``, with placeholder levels. + + >>> df = DataFrame([[1], [2]], index=pd.MultiIndex.from_product([[0], [1, 2]])) + >>> descriptors = df.agg(["sum"]) + >>> descriptors.index = pd.MultiIndex.from_product([[""], descriptors.index]) + >>> df.style.concat(descriptors.style) # doctest: +SKIP + """ + if not isinstance(other, Styler): + raise TypeError("`other` must be of type `Styler`") + if not self.data.columns.equals(other.data.columns): + raise ValueError("`other.data` must have same columns as `Styler.data`") + if not self.data.index.nlevels == other.data.index.nlevels: + raise ValueError( + "number of index levels must be same in `other` " + "as in `Styler`. See documentation for suggestions." + ) + self.concatenated.append(other) + return self + + def _repr_html_(self) -> str | None: + """ + Hooks into Jupyter notebook rich display system, which calls _repr_html_ by + default if an object is returned at the end of a cell. + """ + if get_option("styler.render.repr") == "html": + return self.to_html() + return None + + def _repr_latex_(self) -> str | None: + if get_option("styler.render.repr") == "latex": + return self.to_latex() + return None + + def render( + self, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + **kwargs, + ) -> str: + """ + Render the ``Styler`` including all applied styles to HTML. + + .. deprecated:: 1.4.0 + + Parameters + ---------- + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.columns`` value. + **kwargs + Any additional keyword arguments are passed + through to ``self.template.render``. + This is useful when you need to provide + additional variables for a custom template. + + Returns + ------- + rendered : str + The rendered HTML. + + Notes + ----- + This method is deprecated in favour of ``Styler.to_html``. + + Styler objects have defined the ``_repr_html_`` method + which automatically calls ``self.to_html()`` when it's the + last item in a Notebook cell. + + When calling ``Styler.render()`` directly, wrap the result in + ``IPython.display.HTML`` to view the rendered HTML in the notebook. + + Pandas uses the following keys in render. Arguments passed + in ``**kwargs`` take precedence, so think carefully if you want + to override them: + + * head + * cellstyle + * body + * uuid + * table_styles + * caption + * table_attributes + """ + warnings.warn( + "this method is deprecated in favour of `Styler.to_html()`", + FutureWarning, + stacklevel=find_stack_level(), + ) + if sparse_index is None: + sparse_index = get_option("styler.sparse.index") + if sparse_columns is None: + sparse_columns = get_option("styler.sparse.columns") + return self._render_html(sparse_index, sparse_columns, **kwargs) + + def set_tooltips( + self, + ttips: DataFrame, + props: CSSProperties | None = None, + css_class: str | None = None, + ) -> Styler: + """ + Set the DataFrame of strings on ``Styler`` generating ``:hover`` tooltips. + + These string based tooltips are only applicable to `` + + + + + + + + +
    `` HTML elements, + and cannot be used for column or index headers. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + ttips : DataFrame + DataFrame containing strings that will be translated to tooltips, mapped + by identical column and index values that must exist on the underlying + Styler data. None, NaN values, and empty strings will be ignored and + not affect the rendered HTML. + props : list-like or str, optional + List of (attr, value) tuples or a valid CSS string. If ``None`` adopts + the internal default values described in notes. + css_class : str, optional + Name of the tooltip class used in CSS, should conform to HTML standards. + Only useful if integrating tooltips with external CSS. If ``None`` uses the + internal default value 'pd-t'. + + Returns + ------- + self : Styler + + Notes + ----- + Tooltips are created by adding `` to each data cell + and then manipulating the table level CSS to attach pseudo hover and pseudo + after selectors to produce the required the results. + + The default properties for the tooltip CSS class are: + + - visibility: hidden + - position: absolute + - z-index: 1 + - background-color: black + - color: white + - transform: translate(-20px, -20px) + + The property 'visibility: hidden;' is a key prerequisite to the hover + functionality, and should always be included in any manual properties + specification, using the ``props`` argument. + + Tooltips are not designed to be efficient, and can add large amounts of + additional HTML for larger tables, since they also require that ``cell_ids`` + is forced to `True`. + + Examples + -------- + Basic application + + >>> df = pd.DataFrame(data=[[0, 1], [2, 3]]) + >>> ttips = pd.DataFrame( + ... data=[["Min", ""], [np.nan, "Max"]], columns=df.columns, index=df.index + ... ) + >>> s = df.style.set_tooltips(ttips).to_html() + + Optionally controlling the tooltip visual display + + >>> df.style.set_tooltips(ttips, css_class='tt-add', props=[ + ... ('visibility', 'hidden'), + ... ('position', 'absolute'), + ... ('z-index', 1)]) # doctest: +SKIP + >>> df.style.set_tooltips(ttips, css_class='tt-add', + ... props='visibility:hidden; position:absolute; z-index:1;') + ... # doctest: +SKIP + """ + if not self.cell_ids: + # tooltips not optimised for individual cell check. requires reasonable + # redesign and more extensive code for a feature that might be rarely used. + raise NotImplementedError( + "Tooltips can only render with 'cell_ids' is True." + ) + if not ttips.index.is_unique or not ttips.columns.is_unique: + raise KeyError( + "Tooltips render only if `ttips` has unique index and columns." + ) + if self.tooltips is None: # create a default instance if necessary + self.tooltips = Tooltips() + self.tooltips.tt_data = ttips + if props: + self.tooltips.class_properties = props + if css_class: + self.tooltips.class_name = css_class + + return self + + @doc( + NDFrame.to_excel, + klass="Styler", + storage_options=_shared_docs["storage_options"], + storage_options_versionadded="1.5.0", + ) + def to_excel( + self, + excel_writer, + sheet_name: str = "Sheet1", + na_rep: str = "", + float_format: str | None = None, + columns: Sequence[Hashable] | None = None, + header: Sequence[Hashable] | bool = True, + index: bool = True, + index_label: IndexLabel | None = None, + startrow: int = 0, + startcol: int = 0, + engine: str | None = None, + merge_cells: bool = True, + encoding: str | None = None, + inf_rep: str = "inf", + verbose: bool = True, + freeze_panes: tuple[int, int] | None = None, + storage_options: StorageOptions = None, + ) -> None: + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + self, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + storage_options=storage_options, + ) + + @overload + def to_latex( + self, + buf: FilePath | WriteBuffer[str], + *, + column_format: str | None = ..., + position: str | None = ..., + position_float: str | None = ..., + hrules: bool | None = ..., + clines: str | None = ..., + label: str | None = ..., + caption: str | tuple | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + multirow_align: str | None = ..., + multicol_align: str | None = ..., + siunitx: bool = ..., + environment: str | None = ..., + encoding: str | None = ..., + convert_css: bool = ..., + ) -> None: + ... + + @overload + def to_latex( + self, + buf: None = ..., + *, + column_format: str | None = ..., + position: str | None = ..., + position_float: str | None = ..., + hrules: bool | None = ..., + clines: str | None = ..., + label: str | None = ..., + caption: str | tuple | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + multirow_align: str | None = ..., + multicol_align: str | None = ..., + siunitx: bool = ..., + environment: str | None = ..., + encoding: str | None = ..., + convert_css: bool = ..., + ) -> str: + ... + + def to_latex( + self, + buf: FilePath | WriteBuffer[str] | None = None, + *, + column_format: str | None = None, + position: str | None = None, + position_float: str | None = None, + hrules: bool | None = None, + clines: str | None = None, + label: str | None = None, + caption: str | tuple | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + multirow_align: str | None = None, + multicol_align: str | None = None, + siunitx: bool = False, + environment: str | None = None, + encoding: str | None = None, + convert_css: bool = False, + ) -> str | None: + r""" + Write Styler to a file, buffer or string in LaTeX format. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + buf : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``write()`` function. If None, the result is + returned as a string. + column_format : str, optional + The LaTeX column specification placed in location: + + \\begin{tabular}{} + + Defaults to 'l' for index and + non-numeric data columns, and, for numeric data columns, + to 'r' by default, or 'S' if ``siunitx`` is ``True``. + position : str, optional + The LaTeX positional argument (e.g. 'h!') for tables, placed in location: + + ``\\begin{table}[]``. + position_float : {"centering", "raggedleft", "raggedright"}, optional + The LaTeX float command placed in location: + + \\begin{table}[] + + \\ + + Cannot be used if ``environment`` is "longtable". + hrules : bool + Set to `True` to add \\toprule, \\midrule and \\bottomrule from the + {booktabs} LaTeX package. + Defaults to ``pandas.options.styler.latex.hrules``, which is `False`. + + .. versionchanged:: 1.4.0 + clines : str, optional + Use to control adding \\cline commands for the index labels separation. + Possible values are: + + - `None`: no cline commands are added (default). + - `"all;data"`: a cline is added for every index value extending the + width of the table, including data entries. + - `"all;index"`: as above with lines extending only the width of the + index entries. + - `"skip-last;data"`: a cline is added for each index value except the + last level (which is never sparsified), extending the widtn of the + table. + - `"skip-last;index"`: as above with lines extending only the width of the + index entries. + + .. versionadded:: 1.4.0 + label : str, optional + The LaTeX label included as: \\label{
    }. + If tuple, i.e ("full caption", "short caption"), the caption included + as: \\caption[]{}. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index``, which is `True`. + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns``, which + is `True`. + multirow_align : {"c", "t", "b", "naive"}, optional + If sparsifying hierarchical MultiIndexes whether to align text centrally, + at the top or bottom using the multirow package. If not given defaults to + ``pandas.options.styler.latex.multirow_align``, which is `"c"`. + If "naive" is given renders without multirow. + + .. versionchanged:: 1.4.0 + multicol_align : {"r", "c", "l", "naive-l", "naive-r"}, optional + If sparsifying hierarchical MultiIndex columns whether to align text at + the left, centrally, or at the right. If not given defaults to + ``pandas.options.styler.latex.multicol_align``, which is "r". + If a naive option is given renders without multicol. + Pipe decorators can also be added to non-naive values to draw vertical + rules, e.g. "\|r" will draw a rule on the left side of right aligned merged + cells. + + .. versionchanged:: 1.4.0 + siunitx : bool, default False + Set to ``True`` to structure LaTeX compatible with the {siunitx} package. + environment : str, optional + If given, the environment that will replace 'table' in ``\\begin{table}``. + If 'longtable' is specified then a more suitable template is + rendered. If not given defaults to + ``pandas.options.styler.latex.environment``, which is `None`. + + .. versionadded:: 1.4.0 + encoding : str, optional + Character encoding setting. Defaults + to ``pandas.options.styler.render.encoding``, which is "utf-8". + convert_css : bool, default False + Convert simple cell-styles from CSS to LaTeX format. Any CSS not found in + conversion table is dropped. A style can be forced by adding option + `--latex`. See notes. + + Returns + ------- + str or None + If `buf` is None, returns the result as a string. Otherwise returns `None`. + + See Also + -------- + Styler.format: Format the text display value of cells. + + Notes + ----- + **Latex Packages** + + For the following features we recommend the following LaTeX inclusions: + + ===================== ========================================================== + Feature Inclusion + ===================== ========================================================== + sparse columns none: included within default {tabular} environment + sparse rows \\usepackage{multirow} + hrules \\usepackage{booktabs} + colors \\usepackage[table]{xcolor} + siunitx \\usepackage{siunitx} + bold (with siunitx) | \\usepackage{etoolbox} + | \\robustify\\bfseries + | \\sisetup{detect-all = true} *(within {document})* + italic (with siunitx) | \\usepackage{etoolbox} + | \\robustify\\itshape + | \\sisetup{detect-all = true} *(within {document})* + environment \\usepackage{longtable} if arg is "longtable" + | or any other relevant environment package + hyperlinks \\usepackage{hyperref} + ===================== ========================================================== + + **Cell Styles** + + LaTeX styling can only be rendered if the accompanying styling functions have + been constructed with appropriate LaTeX commands. All styling + functionality is built around the concept of a CSS ``(, )`` + pair (see `Table Visualization <../../user_guide/style.ipynb>`_), and this + should be replaced by a LaTeX + ``(, )`` approach. Each cell will be styled individually + using nested LaTeX commands with their accompanied options. + + For example the following code will highlight and bold a cell in HTML-CSS: + + >>> df = pd.DataFrame([[1,2], [3,4]]) + >>> s = df.style.highlight_max(axis=None, + ... props='background-color:red; font-weight:bold;') + >>> s.to_html() # doctest: +SKIP + + The equivalent using LaTeX only commands is the following: + + >>> s = df.style.highlight_max(axis=None, + ... props='cellcolor:{red}; bfseries: ;') + >>> s.to_latex() # doctest: +SKIP + + Internally these structured LaTeX ``(, )`` pairs + are translated to the + ``display_value`` with the default structure: + ``\ ``. + Where there are multiple commands the latter is nested recursively, so that + the above example highlighed cell is rendered as + ``\cellcolor{red} \bfseries 4``. + + Occasionally this format does not suit the applied command, or + combination of LaTeX packages that is in use, so additional flags can be + added to the ````, within the tuple, to result in different + positions of required braces (the **default** being the same as ``--nowrap``): + + =================================== ============================================ + Tuple Format Output Structure + =================================== ============================================ + (,) \\ + (, ``--nowrap``) \\ + (, ``--rwrap``) \\{} + (, ``--wrap``) {\\ } + (, ``--lwrap``) {\\} + (, ``--dwrap``) {\\}{} + =================================== ============================================ + + For example the `textbf` command for font-weight + should always be used with `--rwrap` so ``('textbf', '--rwrap')`` will render a + working cell, wrapped with braces, as ``\textbf{}``. + + A more comprehensive example is as follows: + + >>> df = pd.DataFrame([[1, 2.2, "dogs"], [3, 4.4, "cats"], [2, 6.6, "cows"]], + ... index=["ix1", "ix2", "ix3"], + ... columns=["Integers", "Floats", "Strings"]) + >>> s = df.style.highlight_max( + ... props='cellcolor:[HTML]{FFFF00}; color:{red};' + ... 'textit:--rwrap; textbf:--rwrap;' + ... ) + >>> s.to_latex() # doctest: +SKIP + + .. figure:: ../../_static/style/latex_1.png + + **Table Styles** + + Internally Styler uses its ``table_styles`` object to parse the + ``column_format``, ``position``, ``position_float``, and ``label`` + input arguments. These arguments are added to table styles in the format: + + .. code-block:: python + + set_table_styles([ + {"selector": "column_format", "props": f":{column_format};"}, + {"selector": "position", "props": f":{position};"}, + {"selector": "position_float", "props": f":{position_float};"}, + {"selector": "label", "props": f":{{{label.replace(':','§')}}};"} + ], overwrite=False) + + Exception is made for the ``hrules`` argument which, in fact, controls all three + commands: ``toprule``, ``bottomrule`` and ``midrule`` simultaneously. Instead of + setting ``hrules`` to ``True``, it is also possible to set each + individual rule definition, by manually setting the ``table_styles``, + for example below we set a regular ``toprule``, set an ``hline`` for + ``bottomrule`` and exclude the ``midrule``: + + .. code-block:: python + + set_table_styles([ + {'selector': 'toprule', 'props': ':toprule;'}, + {'selector': 'bottomrule', 'props': ':hline;'}, + ], overwrite=False) + + If other ``commands`` are added to table styles they will be detected, and + positioned immediately above the '\\begin{tabular}' command. For example to + add odd and even row coloring, from the {colortbl} package, in format + ``\rowcolors{1}{pink}{red}``, use: + + .. code-block:: python + + set_table_styles([ + {'selector': 'rowcolors', 'props': ':{1}{pink}{red};'} + ], overwrite=False) + + A more comprehensive example using these arguments is as follows: + + >>> df.columns = pd.MultiIndex.from_tuples([ + ... ("Numeric", "Integers"), + ... ("Numeric", "Floats"), + ... ("Non-Numeric", "Strings") + ... ]) + >>> df.index = pd.MultiIndex.from_tuples([ + ... ("L0", "ix1"), ("L0", "ix2"), ("L1", "ix3") + ... ]) + >>> s = df.style.highlight_max( + ... props='cellcolor:[HTML]{FFFF00}; color:{red}; itshape:; bfseries:;' + ... ) + >>> s.to_latex( + ... column_format="rrrrr", position="h", position_float="centering", + ... hrules=True, label="table:5", caption="Styled LaTeX Table", + ... multirow_align="t", multicol_align="r" + ... ) # doctest: +SKIP + + .. figure:: ../../_static/style/latex_2.png + + **Formatting** + + To format values :meth:`Styler.format` should be used prior to calling + `Styler.to_latex`, as well as other methods such as :meth:`Styler.hide` + for example: + + >>> s.clear() + >>> s.table_styles = [] + >>> s.caption = None + >>> s.format({ + ... ("Numeric", "Integers"): '\${}', + ... ("Numeric", "Floats"): '{:.3f}', + ... ("Non-Numeric", "Strings"): str.upper + ... }) # doctest: +SKIP + Numeric Non-Numeric + Integers Floats Strings + L0 ix1 $1 2.200 DOGS + ix2 $3 4.400 CATS + L1 ix3 $2 6.600 COWS + + >>> s.to_latex() # doctest: +SKIP + \begin{tabular}{llrrl} + {} & {} & \multicolumn{2}{r}{Numeric} & {Non-Numeric} \\ + {} & {} & {Integers} & {Floats} & {Strings} \\ + \multirow[c]{2}{*}{L0} & ix1 & \\$1 & 2.200 & DOGS \\ + & ix2 & \$3 & 4.400 & CATS \\ + L1 & ix3 & \$2 & 6.600 & COWS \\ + \end{tabular} + + **CSS Conversion** + + This method can convert a Styler constructured with HTML-CSS to LaTeX using + the following limited conversions. + + ================== ==================== ============= ========================== + CSS Attribute CSS value LaTeX Command LaTeX Options + ================== ==================== ============= ========================== + font-weight | bold | bfseries + | bolder | bfseries + font-style | italic | itshape + | oblique | slshape + background-color | red cellcolor | {red}--lwrap + | #fe01ea | [HTML]{FE01EA}--lwrap + | #f0e | [HTML]{FF00EE}--lwrap + | rgb(128,255,0) | [rgb]{0.5,1,0}--lwrap + | rgba(128,0,0,0.5) | [rgb]{0.5,0,0}--lwrap + | rgb(25%,255,50%) | [rgb]{0.25,1,0.5}--lwrap + color | red color | {red} + | #fe01ea | [HTML]{FE01EA} + | #f0e | [HTML]{FF00EE} + | rgb(128,255,0) | [rgb]{0.5,1,0} + | rgba(128,0,0,0.5) | [rgb]{0.5,0,0} + | rgb(25%,255,50%) | [rgb]{0.25,1,0.5} + ================== ==================== ============= ========================== + + It is also possible to add user-defined LaTeX only styles to a HTML-CSS Styler + using the ``--latex`` flag, and to add LaTeX parsing options that the + converter will detect within a CSS-comment. + + >>> df = pd.DataFrame([[1]]) + >>> df.style.set_properties( + ... **{"font-weight": "bold /* --dwrap */", "Huge": "--latex--rwrap"} + ... ).to_latex(convert_css=True) # doctest: +SKIP + \begin{tabular}{lr} + {} & {0} \\ + 0 & {\bfseries}{\Huge{1}} \\ + \end{tabular} + + Examples + -------- + Below we give a complete step by step example adding some advanced features + and noting some common gotchas. + + First we create the DataFrame and Styler as usual, including MultiIndex rows + and columns, which allow for more advanced formatting options: + + >>> cidx = pd.MultiIndex.from_arrays([ + ... ["Equity", "Equity", "Equity", "Equity", + ... "Stats", "Stats", "Stats", "Stats", "Rating"], + ... ["Energy", "Energy", "Consumer", "Consumer", "", "", "", "", ""], + ... ["BP", "Shell", "H&M", "Unilever", + ... "Std Dev", "Variance", "52w High", "52w Low", ""] + ... ]) + >>> iidx = pd.MultiIndex.from_arrays([ + ... ["Equity", "Equity", "Equity", "Equity"], + ... ["Energy", "Energy", "Consumer", "Consumer"], + ... ["BP", "Shell", "H&M", "Unilever"] + ... ]) + >>> styler = pd.DataFrame([ + ... [1, 0.8, 0.66, 0.72, 32.1678, 32.1678**2, 335.12, 240.89, "Buy"], + ... [0.8, 1.0, 0.69, 0.79, 1.876, 1.876**2, 14.12, 19.78, "Hold"], + ... [0.66, 0.69, 1.0, 0.86, 7, 7**2, 210.9, 140.6, "Buy"], + ... [0.72, 0.79, 0.86, 1.0, 213.76, 213.76**2, 2807, 3678, "Sell"], + ... ], columns=cidx, index=iidx).style + + Second we will format the display and, since our table is quite wide, will + hide the repeated level-0 of the index: + + >>> styler.format(subset="Equity", precision=2) + ... .format(subset="Stats", precision=1, thousands=",") + ... .format(subset="Rating", formatter=str.upper) + ... .format_index(escape="latex", axis=1) + ... .format_index(escape="latex", axis=0) + ... .hide(level=0, axis=0) # doctest: +SKIP + + Note that one of the string entries of the index and column headers is "H&M". + Without applying the `escape="latex"` option to the `format_index` method the + resultant LaTeX will fail to render, and the error returned is quite + difficult to debug. Using the appropriate escape the "&" is converted to "\\&". + + Thirdly we will apply some (CSS-HTML) styles to our object. We will use a + builtin method and also define our own method to highlight the stock + recommendation: + + >>> def rating_color(v): + ... if v == "Buy": color = "#33ff85" + ... elif v == "Sell": color = "#ff5933" + ... else: color = "#ffdd33" + ... return f"color: {color}; font-weight: bold;" + >>> styler.background_gradient(cmap="inferno", subset="Equity", vmin=0, vmax=1) + ... .applymap(rating_color, subset="Rating") # doctest: +SKIP + + All the above styles will work with HTML (see below) and LaTeX upon conversion: + + .. figure:: ../../_static/style/latex_stocks_html.png + + However, we finally want to add one LaTeX only style + (from the {graphicx} package), that is not easy to convert from CSS and + pandas does not support it. Notice the `--latex` flag used here, + as well as `--rwrap` to ensure this is formatted correctly and + not ignored upon conversion. + + >>> styler.applymap_index( + ... lambda v: "rotatebox:{45}--rwrap--latex;", level=2, axis=1 + ... ) # doctest: +SKIP + + Finally we render our LaTeX adding in other options as required: + + >>> styler.to_latex( + ... caption="Selected stock correlation and simple statistics.", + ... clines="skip-last;data", + ... convert_css=True, + ... position_float="centering", + ... multicol_align="|c|", + ... hrules=True, + ... ) # doctest: +SKIP + \begin{table} + \centering + \caption{Selected stock correlation and simple statistics.} + \begin{tabular}{llrrrrrrrrl} + \toprule + & & \multicolumn{4}{|c|}{Equity} & \multicolumn{4}{|c|}{Stats} & Rating \\ + & & \multicolumn{2}{|c|}{Energy} & \multicolumn{2}{|c|}{Consumer} & + \multicolumn{4}{|c|}{} & \\ + & & \rotatebox{45}{BP} & \rotatebox{45}{Shell} & \rotatebox{45}{H\&M} & + \rotatebox{45}{Unilever} & \rotatebox{45}{Std Dev} & \rotatebox{45}{Variance} & + \rotatebox{45}{52w High} & \rotatebox{45}{52w Low} & \rotatebox{45}{} \\ + \midrule + \multirow[c]{2}{*}{Energy} & BP & {\cellcolor[HTML]{FCFFA4}} + \color[HTML]{000000} 1.00 & {\cellcolor[HTML]{FCA50A}} \color[HTML]{000000} + 0.80 & {\cellcolor[HTML]{EB6628}} \color[HTML]{F1F1F1} 0.66 & + {\cellcolor[HTML]{F68013}} \color[HTML]{F1F1F1} 0.72 & 32.2 & 1,034.8 & 335.1 + & 240.9 & \color[HTML]{33FF85} \bfseries BUY \\ + & Shell & {\cellcolor[HTML]{FCA50A}} \color[HTML]{000000} 0.80 & + {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & + {\cellcolor[HTML]{F1731D}} \color[HTML]{F1F1F1} 0.69 & + {\cellcolor[HTML]{FCA108}} \color[HTML]{000000} 0.79 & 1.9 & 3.5 & 14.1 & + 19.8 & \color[HTML]{FFDD33} \bfseries HOLD \\ + \cline{1-11} + \multirow[c]{2}{*}{Consumer} & H\&M & {\cellcolor[HTML]{EB6628}} + \color[HTML]{F1F1F1} 0.66 & {\cellcolor[HTML]{F1731D}} \color[HTML]{F1F1F1} + 0.69 & {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & + {\cellcolor[HTML]{FAC42A}} \color[HTML]{000000} 0.86 & 7.0 & 49.0 & 210.9 & + 140.6 & \color[HTML]{33FF85} \bfseries BUY \\ + & Unilever & {\cellcolor[HTML]{F68013}} \color[HTML]{F1F1F1} 0.72 & + {\cellcolor[HTML]{FCA108}} \color[HTML]{000000} 0.79 & + {\cellcolor[HTML]{FAC42A}} \color[HTML]{000000} 0.86 & + {\cellcolor[HTML]{FCFFA4}} \color[HTML]{000000} 1.00 & 213.8 & 45,693.3 & + 2,807.0 & 3,678.0 & \color[HTML]{FF5933} \bfseries SELL \\ + \cline{1-11} + \bottomrule + \end{tabular} + \end{table} + + .. figure:: ../../_static/style/latex_stocks.png + """ + obj = self._copy(deepcopy=True) # manipulate table_styles on obj, not self + + table_selectors = ( + [style["selector"] for style in self.table_styles] + if self.table_styles is not None + else [] + ) + + if column_format is not None: + # add more recent setting to table_styles + obj.set_table_styles( + [{"selector": "column_format", "props": f":{column_format}"}], + overwrite=False, + ) + elif "column_format" in table_selectors: + pass # adopt what has been previously set in table_styles + else: + # create a default: set float, complex, int cols to 'r' ('S'), index to 'l' + _original_columns = self.data.columns + self.data.columns = RangeIndex(stop=len(self.data.columns)) + numeric_cols = self.data._get_numeric_data().columns.to_list() + self.data.columns = _original_columns + column_format = "" + for level in range(self.index.nlevels): + column_format += "" if self.hide_index_[level] else "l" + for ci, _ in enumerate(self.data.columns): + if ci not in self.hidden_columns: + column_format += ( + ("r" if not siunitx else "S") if ci in numeric_cols else "l" + ) + obj.set_table_styles( + [{"selector": "column_format", "props": f":{column_format}"}], + overwrite=False, + ) + + if position: + obj.set_table_styles( + [{"selector": "position", "props": f":{position}"}], + overwrite=False, + ) + + if position_float: + if environment == "longtable": + raise ValueError( + "`position_float` cannot be used in 'longtable' `environment`" + ) + if position_float not in ["raggedright", "raggedleft", "centering"]: + raise ValueError( + f"`position_float` should be one of " + f"'raggedright', 'raggedleft', 'centering', " + f"got: '{position_float}'" + ) + obj.set_table_styles( + [{"selector": "position_float", "props": f":{position_float}"}], + overwrite=False, + ) + + hrules = get_option("styler.latex.hrules") if hrules is None else hrules + if hrules: + obj.set_table_styles( + [ + {"selector": "toprule", "props": ":toprule"}, + {"selector": "midrule", "props": ":midrule"}, + {"selector": "bottomrule", "props": ":bottomrule"}, + ], + overwrite=False, + ) + + if label: + obj.set_table_styles( + [{"selector": "label", "props": f":{{{label.replace(':', '§')}}}"}], + overwrite=False, + ) + + if caption: + obj.set_caption(caption) + + if sparse_index is None: + sparse_index = get_option("styler.sparse.index") + if sparse_columns is None: + sparse_columns = get_option("styler.sparse.columns") + environment = environment or get_option("styler.latex.environment") + multicol_align = multicol_align or get_option("styler.latex.multicol_align") + multirow_align = multirow_align or get_option("styler.latex.multirow_align") + latex = obj._render_latex( + sparse_index=sparse_index, + sparse_columns=sparse_columns, + multirow_align=multirow_align, + multicol_align=multicol_align, + environment=environment, + convert_css=convert_css, + siunitx=siunitx, + clines=clines, + ) + + encoding = ( + (encoding or get_option("styler.render.encoding")) + if isinstance(buf, str) # i.e. a filepath + else encoding + ) + return save_to_buffer(latex, buf=buf, encoding=encoding) + + @overload + def to_html( + self, + buf: FilePath | WriteBuffer[str], + *, + table_uuid: str | None = ..., + table_attributes: str | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + bold_headers: bool = ..., + caption: str | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + encoding: str | None = ..., + doctype_html: bool = ..., + exclude_styles: bool = ..., + **kwargs, + ) -> None: + ... + + @overload + def to_html( + self, + buf: None = ..., + *, + table_uuid: str | None = ..., + table_attributes: str | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + bold_headers: bool = ..., + caption: str | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + encoding: str | None = ..., + doctype_html: bool = ..., + exclude_styles: bool = ..., + **kwargs, + ) -> str: + ... + + @Substitution(buf=buf, encoding=encoding) + def to_html( + self, + buf: FilePath | WriteBuffer[str] | None = None, + *, + table_uuid: str | None = None, + table_attributes: str | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + bold_headers: bool = False, + caption: str | None = None, + max_rows: int | None = None, + max_columns: int | None = None, + encoding: str | None = None, + doctype_html: bool = False, + exclude_styles: bool = False, + **kwargs, + ) -> str | None: + """ + Write Styler to a file, buffer or string in HTML-CSS format. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + %(buf)s + table_uuid : str, optional + Id attribute assigned to the HTML element in the format: + + ``
    `` + + If not given uses Styler's initially assigned value. + table_attributes : str, optional + Attributes to assign within the `
    ` HTML element in the format: + + ``
    >`` + + If not given defaults to Styler's preexisting value. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + + .. versionadded:: 1.4.0 + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. + + .. versionadded:: 1.4.0 + bold_headers : bool, optional + Adds "font-weight: bold;" as a CSS property to table style header cells. + + .. versionadded:: 1.4.0 + caption : str, optional + Set, or overwrite, the caption on Styler before rendering. + + .. versionadded:: 1.4.0 + max_rows : int, optional + The maximum number of rows that will be rendered. Defaults to + ``pandas.options.styler.render.max_rows/max_columns``. + + .. versionadded:: 1.4.0 + max_columns : int, optional + The maximum number of columns that will be rendered. Defaults to + ``pandas.options.styler.render.max_columns``, which is None. + + Rows and columns may be reduced if the number of total elements is + large. This value is set to ``pandas.options.styler.render.max_elements``, + which is 262144 (18 bit browser rendering). + + .. versionadded:: 1.4.0 + %(encoding)s + doctype_html : bool, default False + Whether to output a fully structured HTML file including all + HTML elements, or just the core ``' + '
    ' + ' ' + ' ' + ' ' + ' ' + ' ' + ' ' + '
    0
    1
    ' + """ + if not classes.index.is_unique or not classes.columns.is_unique: + raise KeyError( + "Classes render only if `classes` has unique index and columns." + ) + classes = classes.reindex_like(self.data) + + for r, row_tup in enumerate(classes.itertuples()): + for c, value in enumerate(row_tup[1:]): + if not (pd.isna(value) or value == ""): + self.cell_context[(r, c)] = str(value) + + return self + + def _update_ctx(self, attrs: DataFrame) -> None: + """ + Update the state of the ``Styler`` for data cells. + + Collects a mapping of {index_label: [('', ''), ..]}. + + Parameters + ---------- + attrs : DataFrame + should contain strings of ': ;: ' + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + """ + if not self.index.is_unique or not self.columns.is_unique: + raise KeyError( + "`Styler.apply` and `.applymap` are not compatible " + "with non-unique index or columns." + ) + + for cn in attrs.columns: + j = self.columns.get_loc(cn) + ser = attrs[cn] + for rn, c in ser.items(): + if not c or pd.isna(c): + continue + css_list = maybe_convert_css_to_tuples(c) + i = self.index.get_loc(rn) + self.ctx[(i, j)].extend(css_list) + + def _update_ctx_header(self, attrs: DataFrame, axis: int) -> None: + """ + Update the state of the ``Styler`` for header cells. + + Collects a mapping of {index_label: [('', ''), ..]}. + + Parameters + ---------- + attrs : Series + Should contain strings of ': ;: ', and an + integer index. + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + axis : int + Identifies whether the ctx object being updated is the index or columns + """ + for j in attrs.columns: + ser = attrs[j] + for i, c in ser.items(): + if not c: + continue + css_list = maybe_convert_css_to_tuples(c) + if axis == 0: + self.ctx_index[(i, j)].extend(css_list) + else: + self.ctx_columns[(j, i)].extend(css_list) + + def _copy(self, deepcopy: bool = False) -> Styler: + """ + Copies a Styler, allowing for deepcopy or shallow copy + + Copying a Styler aims to recreate a new Styler object which contains the same + data and styles as the original. + + Data dependent attributes [copied and NOT exported]: + - formatting (._display_funcs) + - hidden index values or column values (.hidden_rows, .hidden_columns) + - tooltips + - cell_context (cell css classes) + - ctx (cell css styles) + - caption + - concatenated stylers + + Non-data dependent attributes [copied and exported]: + - css + - hidden index state and hidden columns state (.hide_index_, .hide_columns_) + - table_attributes + - table_styles + - applied styles (_todo) + + """ + # GH 40675 + styler = Styler( + self.data, # populates attributes 'data', 'columns', 'index' as shallow + ) + shallow = [ # simple string or boolean immutables + "hide_index_", + "hide_columns_", + "hide_column_names", + "hide_index_names", + "table_attributes", + "cell_ids", + "caption", + "uuid", + "uuid_len", + "template_latex", # also copy templates if these have been customised + "template_html_style", + "template_html_table", + "template_html", + ] + deep = [ # nested lists or dicts + "css", + "concatenated", + "_display_funcs", + "_display_funcs_index", + "_display_funcs_columns", + "hidden_rows", + "hidden_columns", + "ctx", + "ctx_index", + "ctx_columns", + "cell_context", + "_todo", + "table_styles", + "tooltips", + ] + + for attr in shallow: + setattr(styler, attr, getattr(self, attr)) + + for attr in deep: + val = getattr(self, attr) + setattr(styler, attr, copy.deepcopy(val) if deepcopy else val) + + return styler + + def __copy__(self) -> Styler: + return self._copy(deepcopy=False) + + def __deepcopy__(self, memo) -> Styler: + return self._copy(deepcopy=True) + + def clear(self) -> None: + """ + Reset the ``Styler``, removing any previously applied styles. + + Returns None. + """ + # create default GH 40675 + clean_copy = Styler(self.data, uuid=self.uuid) + clean_attrs = [a for a in clean_copy.__dict__ if not callable(a)] + self_attrs = [a for a in self.__dict__ if not callable(a)] # maybe more attrs + for attr in clean_attrs: + setattr(self, attr, getattr(clean_copy, attr)) + for attr in set(self_attrs).difference(clean_attrs): + delattr(self, attr) + + def _apply( + self, + func: Callable, + axis: Axis | None = 0, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + subset = slice(None) if subset is None else subset + subset = non_reducing_slice(subset) + data = self.data.loc[subset] + if data.empty: + result = DataFrame() + elif axis is None: + result = func(data, **kwargs) + if not isinstance(result, DataFrame): + if not isinstance(result, np.ndarray): + raise TypeError( + f"Function {repr(func)} must return a DataFrame or ndarray " + f"when passed to `Styler.apply` with axis=None" + ) + if not (data.shape == result.shape): + raise ValueError( + f"Function {repr(func)} returned ndarray with wrong shape.\n" + f"Result has shape: {result.shape}\n" + f"Expected shape: {data.shape}" + ) + result = DataFrame(result, index=data.index, columns=data.columns) + else: + axis = self.data._get_axis_number(axis) + if axis == 0: + result = data.apply(func, axis=0, **kwargs) + else: + result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005 + + if isinstance(result, Series): + raise ValueError( + f"Function {repr(func)} resulted in the apply method collapsing to a " + f"Series.\nUsually, this is the result of the function returning a " + f"single value, instead of list-like." + ) + msg = ( + f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " + f"the result of the function returning a " + f"{'Series' if axis is not None else 'DataFrame'} which contains invalid " + f"labels, or returning an incorrectly shaped, list-like object which " + f"cannot be mapped to labels, possibly due to applying the function along " + f"the wrong axis.\n" + f"Result {{0}} has shape: {{1}}\n" + f"Expected {{0}} shape: {{2}}" + ) + if not all(result.index.isin(data.index)): + raise ValueError(msg.format("index", result.index.shape, data.index.shape)) + if not all(result.columns.isin(data.columns)): + raise ValueError( + msg.format("columns", result.columns.shape, data.columns.shape) + ) + self._update_ctx(result) + return self + + @Substitution(subset=subset) + def apply( + self, + func: Callable, + axis: Axis | None = 0, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + """ + Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a Series if ``axis`` in [0,1] and return a list-like + object of same length, or a Series, not necessarily of same length, with + valid index labels considering ``subset``. + ``func`` should take a DataFrame if ``axis`` is ``None`` and return either + an ndarray with the same shape or a DataFrame, not necessarily of the same + shape, with valid index and columns labels considering ``subset``. + + .. versionchanged:: 1.3.0 + + .. versionchanged:: 1.4.0 + + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Apply to each column (``axis=0`` or ``'index'``), to each row + (``axis=1`` or ``'columns'``), or to the entire DataFrame at once + with ``axis=None``. + %(subset)s + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap_index: Apply a CSS-styling function to headers elementwise. + Styler.apply_index: Apply a CSS-styling function to headers level-wise. + Styler.applymap: Apply a CSS-styling function elementwise. + + Notes + ----- + The elements of the output of ``func`` should be CSS styles as strings, in the + format 'attribute: value; attribute2: value2; ...' or, + if nothing is to be applied to that element, an empty string or ``None``. + + This is similar to ``DataFrame.apply``, except that ``axis=None`` + applies the function to the entire DataFrame at once, + rather than column-wise or row-wise. + + Examples + -------- + >>> def highlight_max(x, color): + ... return np.where(x == np.nanmax(x.to_numpy()), f"color: {color};", None) + >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + >>> df.style.apply(highlight_max, color='red') # doctest: +SKIP + >>> df.style.apply(highlight_max, color='blue', axis=1) # doctest: +SKIP + >>> df.style.apply(highlight_max, color='green', axis=None) # doctest: +SKIP + + Using ``subset`` to restrict application to a single column or multiple columns + + >>> df.style.apply(highlight_max, color='red', subset="A") + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) + ... # doctest: +SKIP + + Using a 2d input to ``subset`` to select rows in addition to columns + + >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None))) + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + Using a function which returns a Series / DataFrame of unequal length but + containing valid index labels + + >>> df = pd.DataFrame([[1, 2], [3, 4], [4, 6]], index=["A1", "A2", "Total"]) + >>> total_style = pd.Series("font-weight: bold;", index=["Total"]) + >>> df.style.apply(lambda s: total_style) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. + """ + self._todo.append( + (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) + ) + return self + + def _apply_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + method: str = "apply", + **kwargs, + ) -> Styler: + axis = self.data._get_axis_number(axis) + obj = self.index if axis == 0 else self.columns + + levels_ = refactor_levels(level, obj) + data = DataFrame(obj.to_list()).loc[:, levels_] + + if method == "apply": + result = data.apply(func, axis=0, **kwargs) + elif method == "applymap": + result = data.applymap(func, **kwargs) + + self._update_ctx_header(result, axis) + return self + + @doc( + this="apply", + wise="level-wise", + alt="applymap", + altwise="elementwise", + func="take a Series and return a string array of the same length", + input_note="the index as a Series, if an Index, or a level of a MultiIndex", + output_note="an identically sized array of CSS styles as strings", + var="s", + ret='np.where(s == "B", "background-color: yellow;", "")', + ret2='["background-color: yellow;" if "x" in v else "" for v in s]', + ) + def apply_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + **kwargs, + ) -> Styler: + """ + Apply a CSS-styling function to the index or column headers, {wise}. + + Updates the HTML representation with the result. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + func : function + ``func`` should {func}. + axis : {{0, 1, "index", "columns"}} + The headers over which to apply the function. + level : int, str, list, optional + If index is MultiIndex the level(s) over which to apply the function. + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.{alt}_index: Apply a CSS-styling function to headers {altwise}. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + Styler.applymap: Apply a CSS-styling function elementwise. + + Notes + ----- + Each input to ``func`` will be {input_note}. The output of ``func`` should be + {output_note}, in the format 'attribute: value; attribute2: value2; ...' + or, if nothing is to be applied to that element, an empty string or ``None``. + + Examples + -------- + Basic usage to conditionally highlight values in the index. + + >>> df = pd.DataFrame([[1,2], [3,4]], index=["A", "B"]) + >>> def color_b(s): + ... return {ret} + >>> df.style.{this}_index(color_b) # doctest: +SKIP + + .. figure:: ../../_static/style/appmaphead1.png + + Selectively applying to specific levels of MultiIndex columns. + + >>> midx = pd.MultiIndex.from_product([['ix', 'jy'], [0, 1], ['x3', 'z4']]) + >>> df = pd.DataFrame([np.arange(8)], columns=midx) + >>> def highlight_x({var}): + ... return {ret2} + >>> df.style.{this}_index(highlight_x, axis="columns", level=[0, 2]) + ... # doctest: +SKIP + + .. figure:: ../../_static/style/appmaphead2.png + """ + self._todo.append( + ( + lambda instance: getattr(instance, "_apply_index"), + (func, axis, level, "apply"), + kwargs, + ) + ) + return self + + @doc( + apply_index, + this="applymap", + wise="elementwise", + alt="apply", + altwise="level-wise", + func="take a scalar and return a string", + input_note="an index value, if an Index, or a level value of a MultiIndex", + output_note="CSS styles as a string", + var="v", + ret='"background-color: yellow;" if v == "B" else None', + ret2='"background-color: yellow;" if "x" in v else None', + ) + def applymap_index( + self, + func: Callable, + axis: int | str = 0, + level: Level | list[Level] | None = None, + **kwargs, + ) -> Styler: + self._todo.append( + ( + lambda instance: getattr(instance, "_apply_index"), + (func, axis, level, "applymap"), + kwargs, + ) + ) + return self + + def _applymap( + self, func: Callable, subset: Subset | None = None, **kwargs + ) -> Styler: + func = partial(func, **kwargs) # applymap doesn't take kwargs? + if subset is None: + subset = IndexSlice[:] + subset = non_reducing_slice(subset) + result = self.data.loc[subset].applymap(func) + self._update_ctx(result) + return self + + @Substitution(subset=subset) + def applymap( + self, func: Callable, subset: Subset | None = None, **kwargs + ) -> Styler: + """ + Apply a CSS-styling function elementwise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a scalar and return a string. + %(subset)s + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap_index: Apply a CSS-styling function to headers elementwise. + Styler.apply_index: Apply a CSS-styling function to headers level-wise. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Notes + ----- + The elements of the output of ``func`` should be CSS styles as strings, in the + format 'attribute: value; attribute2: value2; ...' or, + if nothing is to be applied to that element, an empty string or ``None``. + + Examples + -------- + >>> def color_negative(v, color): + ... return f"color: {color};" if v < 0 else None + >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + >>> df.style.applymap(color_negative, color='red') # doctest: +SKIP + + Using ``subset`` to restrict application to a single column or multiple columns + + >>> df.style.applymap(color_negative, color='red', subset="A") + ... # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=["A", "B"]) + ... # doctest: +SKIP + + Using a 2d input to ``subset`` to select rows in addition to columns + + >>> df.style.applymap(color_negative, color='red', + ... subset=([0,1,2], slice(None))) # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. + """ + self._todo.append( + (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) + ) + return self + + @Substitution(subset=subset) + def where( + self, + cond: Callable, + value: str, + other: str | None = None, + subset: Subset | None = None, + **kwargs, + ) -> Styler: + """ + Apply CSS-styles based on a conditional function elementwise. + + .. deprecated:: 1.3.0 + + Updates the HTML representation with a style which is + selected in accordance with the return value of a function. + + Parameters + ---------- + cond : callable + ``cond`` should take a scalar, and optional keyword arguments, and return + a boolean. + value : str + Applied when ``cond`` returns true. + other : str + Applied when ``cond`` returns false. + %(subset)s + **kwargs : dict + Pass along to ``cond``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap: Apply a CSS-styling function elementwise. + Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. + + Notes + ----- + This method is deprecated. + + This method is a convenience wrapper for :meth:`Styler.applymap`, which we + recommend using instead. + + The example: + + >>> df = pd.DataFrame([[1, 2], [3, 4]]) + >>> def cond(v, limit=4): + ... return v > 1 and v != limit + >>> df.style.where(cond, value='color:green;', other='color:red;') + ... # doctest: +SKIP + + should be refactored to: + + >>> def style_func(v, value, other, limit=4): + ... cond = v > 1 and v != limit + ... return value if cond else other + >>> df.style.applymap(style_func, value='color:green;', other='color:red;') + ... # doctest: +SKIP + """ + warnings.warn( + "this method is deprecated in favour of `Styler.applymap()`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if other is None: + other = "" + + return self.applymap( + lambda val: value if cond(val, **kwargs) else other, + subset=subset, + ) + + def set_precision(self, precision: int) -> StylerRenderer: + """ + Set the precision used to display values. + + .. deprecated:: 1.3.0 + + Parameters + ---------- + precision : int + + Returns + ------- + self : Styler + + Notes + ----- + This method is deprecated see `Styler.format`. + """ + warnings.warn( + "this method is deprecated in favour of `Styler.format(precision=..)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + self.precision = precision + return self.format(precision=precision, na_rep=self.na_rep) + + def set_table_attributes(self, attributes: str) -> Styler: + """ + Set the table attributes added to the ```` HTML element. + + These are items in addition to automatic (by default) ``id`` attribute. + + Parameters + ---------- + attributes : str + + Returns + ------- + self : Styler + + See Also + -------- + Styler.set_table_styles: Set the table styles included within the `` block + + Parameters + ---------- + sparsify_index : bool + Whether index_headers section will add rowspan attributes (>1) to elements. + + Returns + ------- + body : list + The associated HTML elements needed for template rendering. + """ + rlabels = self.data.index.tolist() + if not isinstance(self.data.index, MultiIndex): + rlabels = [[x] for x in rlabels] + + body: list = [] + visible_row_count: int = 0 + for r, row_tup in [ + z for z in enumerate(self.data.itertuples()) if z[0] not in self.hidden_rows + ]: + visible_row_count += 1 + if self._check_trim( + visible_row_count, + max_rows, + body, + "row", + ): + break + + body_row = self._generate_body_row( + (r, row_tup, rlabels), max_cols, idx_lengths + ) + body.append(body_row) + return body + + def _check_trim( + self, + count, + max, + obj, + element, + css=None, + value="...", + ): + """ + Indicates whether to break render loops and append a trimming indicator + + Parameters + ---------- + count : int + The loop count of previous visible items. + max : int + The allowable rendered items in the loop. + obj : list + The current render collection of the rendered items. + element : str + The type of element to append in the case a trimming indicator is needed. + css : str, optional + The css to add to the trimming indicator element. + value : str, optional + The value of the elements display if necessary. + + Returns + ------- + result : bool + Whether a trimming element was required and appended. + """ + if count > max: + if element == "row": + obj.append(self._generate_trimmed_row(max)) + else: + obj.append(_element(element, css, value, True, attributes="")) + return True + return False + + def _generate_trimmed_row(self, max_cols: int) -> list: + """ + When a render has too many rows we generate a trimming row containing "..." + + Parameters + ---------- + max_cols : int + Number of permissible columns + + Returns + ------- + list of elements + """ + index_headers = [ + _element( + "th", + ( + f"{self.css['row_heading']} {self.css['level']}{c} " + f"{self.css['row_trim']}" + ), + "...", + not self.hide_index_[c], + attributes="", + ) + for c in range(self.data.index.nlevels) + ] + + data: list = [] + visible_col_count: int = 0 + for c, _ in enumerate(self.columns): + data_element_visible = c not in self.hidden_columns + if data_element_visible: + visible_col_count += 1 + if self._check_trim( + visible_col_count, + max_cols, + data, + "td", + f"{self.css['data']} {self.css['row_trim']} {self.css['col_trim']}", + ): + break + + data.append( + _element( + "td", + f"{self.css['data']} {self.css['col']}{c} {self.css['row_trim']}", + "...", + data_element_visible, + attributes="", + ) + ) + + return index_headers + data + + def _generate_body_row( + self, + iter: tuple, + max_cols: int, + idx_lengths: dict, + ): + """ + Generate a regular row for the body section of appropriate format. + + +--------------------------------------------+---------------------------+ + | index_header_0 ... index_header_n | data_by_column ... | + +--------------------------------------------+---------------------------+ + + Parameters + ---------- + iter : tuple + Iterable from outer scope: row number, row data tuple, row index labels. + max_cols : int + Number of permissible columns. + idx_lengths : dict + A map of the sparsification structure of the index + + Returns + ------- + list of elements + """ + r, row_tup, rlabels = iter + + index_headers = [] + for c, value in enumerate(rlabels[r]): + header_element_visible = ( + _is_visible(r, c, idx_lengths) and not self.hide_index_[c] + ) + header_element = _element( + "th", + ( + f"{self.css['row_heading']} {self.css['level']}{c} " + f"{self.css['row']}{r}" + ), + value, + header_element_visible, + display_value=self._display_funcs_index[(r, c)](value), + attributes=( + f'rowspan="{idx_lengths.get((c, r), 0)}"' + if idx_lengths.get((c, r), 0) > 1 + else "" + ), + ) + + if self.cell_ids: + header_element[ + "id" + ] = f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given + if ( + header_element_visible + and (r, c) in self.ctx_index + and self.ctx_index[r, c] + ): + # always add id if a style is specified + header_element["id"] = f"{self.css['level']}{c}_{self.css['row']}{r}" + self.cellstyle_map_index[tuple(self.ctx_index[r, c])].append( + f"{self.css['level']}{c}_{self.css['row']}{r}" + ) + + index_headers.append(header_element) + + data: list = [] + visible_col_count: int = 0 + for c, value in enumerate(row_tup[1:]): + data_element_visible = ( + c not in self.hidden_columns and r not in self.hidden_rows + ) + if data_element_visible: + visible_col_count += 1 + if self._check_trim( + visible_col_count, + max_cols, + data, + "td", + f"{self.css['data']} {self.css['row']}{r} {self.css['col_trim']}", + ): + break + + # add custom classes from cell context + cls = "" + if (r, c) in self.cell_context: + cls = " " + self.cell_context[r, c] + + data_element = _element( + "td", + ( + f"{self.css['data']} {self.css['row']}{r} " + f"{self.css['col']}{c}{cls}" + ), + value, + data_element_visible, + attributes="", + display_value=self._display_funcs[(r, c)](value), + ) + + if self.cell_ids: + data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}" + if data_element_visible and (r, c) in self.ctx and self.ctx[r, c]: + # always add id if needed due to specified style + data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}" + self.cellstyle_map[tuple(self.ctx[r, c])].append( + f"{self.css['row']}{r}_{self.css['col']}{c}" + ) + + data.append(data_element) + + return index_headers + data + + def _translate_latex(self, d: dict, clines: str | None) -> None: + r""" + Post-process the default render dict for the LaTeX template format. + + Processing items included are: + - Remove hidden columns from the non-headers part of the body. + - Place cellstyles directly in td cells rather than use cellstyle_map. + - Remove hidden indexes or reinsert missing th elements if part of multiindex + or multirow sparsification (so that \multirow and \multicol work correctly). + """ + index_levels = self.index.nlevels + visible_index_level_n = index_levels - sum(self.hide_index_) + d["head"] = [ + [ + {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]} + for c, col in enumerate(row) + if col["is_visible"] + ] + for r, row in enumerate(d["head"]) + ] + + def _concatenated_visible_rows(obj, n, row_indices): + """ + Extract all visible row indices recursively from concatenated stylers. + """ + row_indices.extend( + [r + n for r in range(len(obj.index)) if r not in obj.hidden_rows] + ) + n += len(obj.index) + for concatenated in obj.concatenated: + n = _concatenated_visible_rows(concatenated, n, row_indices) + return n + + def concatenated_visible_rows(obj): + row_indices: list[int] = [] + _concatenated_visible_rows(obj, 0, row_indices) + # TODO try to consolidate the concat visible rows + # methods to a single function / recursion for simplicity + return row_indices + + body = [] + for r, row in zip(concatenated_visible_rows(self), d["body"]): + # note: cannot enumerate d["body"] because rows were dropped if hidden + # during _translate_body so must zip to acquire the true r-index associated + # with the ctx obj which contains the cell styles. + if all(self.hide_index_): + row_body_headers = [] + else: + row_body_headers = [ + { + **col, + "display_value": col["display_value"] + if col["is_visible"] + else "", + "cellstyle": self.ctx_index[r, c], + } + for c, col in enumerate(row[:index_levels]) + if (col["type"] == "th" and not self.hide_index_[c]) + ] + + row_body_cells = [ + {**col, "cellstyle": self.ctx[r, c]} + for c, col in enumerate(row[index_levels:]) + if (col["is_visible"] and col["type"] == "td") + ] + + body.append(row_body_headers + row_body_cells) + d["body"] = body + + # clines are determined from info on index_lengths and hidden_rows and input + # to a dict defining which row clines should be added in the template. + if clines not in [ + None, + "all;data", + "all;index", + "skip-last;data", + "skip-last;index", + ]: + raise ValueError( + f"`clines` value of {clines} is invalid. Should either be None or one " + f"of 'all;data', 'all;index', 'skip-last;data', 'skip-last;index'." + ) + elif clines is not None: + data_len = len(row_body_cells) if "data" in clines and d["body"] else 0 + + d["clines"] = defaultdict(list) + visible_row_indexes: list[int] = [ + r for r in range(len(self.data.index)) if r not in self.hidden_rows + ] + visible_index_levels: list[int] = [ + i for i in range(index_levels) if not self.hide_index_[i] + ] + for rn, r in enumerate(visible_row_indexes): + for lvln, lvl in enumerate(visible_index_levels): + if lvl == index_levels - 1 and "skip-last" in clines: + continue + idx_len = d["index_lengths"].get((lvl, r), None) + if idx_len is not None: # i.e. not a sparsified entry + d["clines"][rn + idx_len].append( + f"\\cline{{{lvln+1}-{len(visible_index_levels)+data_len}}}" + ) + + def format( + self, + formatter: ExtFormatter | None = None, + subset: Subset | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of cells. + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + subset : label, array-like, IndexSlice, optional + A valid 2d input to `DataFrame.loc[]`, or, in the case of a 1d input + or single key, to `DataFrame.loc[:, ]` where the columns are + prioritised, to limit ``data`` to *before* applying the function. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + + .. versionadded:: 1.0.0 + + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + + .. versionadded:: 1.3.0 + + decimal : str, default "." + Character used as decimal separator for floats, complex and integers. + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. + + .. versionadded:: 1.3.0 + + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + + .. versionadded:: 1.3.0 + + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + .. versionadded:: 1.4.0 + + Returns + ------- + self : Styler + + See Also + -------- + Styler.format_index: Format the text display value of index labels. + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each cell in the + DataFrame. If ``formatter`` is ``None``, then the default formatter is used. + If a callable then that function should take a data value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to column names, and values should be string or + callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``subset`` argument defines which region to apply the formatting function + to. If the ``formatter`` argument is given in dict form but does not include + all columns within the subset then these columns will have the default formatter + applied. Any columns in the formatter dict excluded from the subset will + be ignored. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + When instantiating a Styler, default formatting can be applied be setting the + ``pandas.options``: + + - ``styler.format.formatter``: default None. + - ``styler.format.na_rep``: default None. + - ``styler.format.precision``: default 6. + - ``styler.format.decimal``: default ".". + - ``styler.format.thousands``: default None. + - ``styler.format.escape``: default None. + + .. warning:: + `Styler.format` is ignored when using the output format `Styler.to_excel`, + since Excel and Python have inherrently different formatting structures. + However, it is possible to use the `number-format` pseudo CSS attribute + to force Excel permissible formatting. See examples. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]]) + >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP + 0 1 2 + 0 MISS 1.000 A + 1 2.000 MISS 3.000 + + Using a ``formatter`` specification on consistent column dtypes + + >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.00 MISS 3.000000 + + Using the default ``formatter`` for unspecified columns + + >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1) + ... # doctest: +SKIP + 0 1 2 + 0 MISS £ 1.0 A + 1 2.00 MISS 3.0 + + Multiple ``na_rep`` or ``precision`` specifications under the default + ``formatter``. + + >>> df.style.format(na_rep='MISS', precision=1, subset=[0]) + ... .format(na_rep='PASS', precision=2, subset=[1, 2]) # doctest: +SKIP + 0 1 2 + 0 MISS 1.00 A + 1 2.0 PASS 3.00 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') + ... # doctest: +SKIP + 0 1 2 + 0 MISS 1.0000 STRING + 1 2.0 MISS FLOAT + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([['
    ', '"A&B"', None]]) + >>> s = df.style.format( + ... '
    {0}', escape="html", na_rep="NA" + ... ) + >>> s.to_html() # doctest: +SKIP + ... +
    + + + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + ... # doctest: +SKIP + \begin{tabular}{ll} + {} & {0} \\ + 0 & \textbf{123} \\ + 1 & \textbf{\textasciitilde \space \textasciicircum } \\ + 2 & \textbf{\$\%\#} \\ + \end{tabular} + + Pandas defines a `number-format` pseudo CSS attribute instead of the `.format` + method to create `to_excel` permissible formatting. Note that semi-colons are + CSS protected characters but used as separators in Excel's format string. + Replace semi-colons with the section separator character (ASCII-245) when + defining the formatting here. + + >>> df = pd.DataFrame({"A": [1, 0, -1]}) + >>> pseudo_css = "number-format: 0§[Red](0)§-§@;" + >>> df.style.applymap(lambda v: css).to_excel("formatted_file.xlsx") + ... # doctest: +SKIP + + .. figure:: ../../_static/style/format_excel_css.png + """ + if all( + ( + formatter is None, + subset is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + hyperlinks is None, + ) + ): + self._display_funcs.clear() + return self # clear the formatter / revert to default and avoid looping + + subset = slice(None) if subset is None else subset + subset = non_reducing_slice(subset) + data = self.data.loc[subset] + + if not isinstance(formatter, dict): + formatter = {col: formatter for col in data.columns} + + cis = self.columns.get_indexer_for(data.columns) + ris = self.index.get_indexer_for(data.index) + for ci in cis: + format_func = _maybe_wrap_formatter( + formatter.get(self.columns[ci]), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) + for ri in ris: + self._display_funcs[(ri, ci)] = format_func + + return self + + def format_index( + self, + formatter: ExtFormatter | None = None, + axis: int | str = 0, + level: Level | list[Level] | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of index labels or column headers. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + axis : {0, "index", 1, "columns"} + Whether to apply the formatter to the index or column headers. + level : int, str, list + The level(s) over which to apply the generic formatter. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + decimal : str, default "." + Character used as decimal separator for floats, complex and integers. + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + Returns + ------- + self : Styler + + See Also + -------- + Styler.format: Format the text display value of data cells. + + Notes + ----- + This method assigns a formatting function, ``formatter``, to each level label + in the DataFrame's index or column headers. If ``formatter`` is ``None``, + then the default formatter is used. + If a callable then that function should take a label value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to MultiIndex level numbers or names, and values should + be string or callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``level`` argument defines which levels of a MultiIndex to apply the + method to. If the ``formatter`` argument is given in dict form but does + not include all levels within the level argument then these unspecified levels + will have the default formatter applied. Any levels in the formatter dict + specifically excluded from the level argument will be ignored. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. + + .. warning:: + `Styler.format_index` is ignored when using the output format + `Styler.to_excel`, since Excel and Python have inherrently different + formatting structures. + However, it is possible to use the `number-format` pseudo CSS attribute + to force Excel permissible formatting. See documentation for `Styler.format`. + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]) + >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP + 2.000 MISS 4.000 + 0 1 2 3 + + Using a ``formatter`` specification on consistent dtypes in a level + + >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP + 2.00 MISS 4.00 + 0 1 2 3 + + Using the default ``formatter`` for unspecified levels + + >>> df = pd.DataFrame([[1, 2, 3]], + ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]])) + >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1) + ... # doctest: +SKIP + A B + 2.0 nan 4.0 + 0 1 2 3 + + Using a callable ``formatter`` function. + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format_index(func, axis=1, na_rep='MISS') + ... # doctest: +SKIP + STRING STRING + FLOAT MISS FLOAT + 0 1 2 3 + + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None]) + >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA") + ... # doctest: +SKIP + + + or element. + """ + if "display_value" not in kwargs: + kwargs["display_value"] = value + return { + "type": html_element, + "value": value, + "class": html_class, + "is_visible": is_visible, + **kwargs, + } + + +def _get_trimming_maximums( + rn, + cn, + max_elements, + max_rows=None, + max_cols=None, + scaling_factor=0.8, +) -> tuple[int, int]: + """ + Recursively reduce the number of rows and columns to satisfy max elements. + + Parameters + ---------- + rn, cn : int + The number of input rows / columns + max_elements : int + The number of allowable elements + max_rows, max_cols : int, optional + Directly specify an initial maximum rows or columns before compression. + scaling_factor : float + Factor at which to reduce the number of rows / columns to fit. + + Returns + ------- + rn, cn : tuple + New rn and cn values that satisfy the max_elements constraint + """ + + def scale_down(rn, cn): + if cn >= rn: + return rn, int(cn * scaling_factor) + else: + return int(rn * scaling_factor), cn + + if max_rows: + rn = max_rows if rn > max_rows else rn + if max_cols: + cn = max_cols if cn > max_cols else cn + + while rn * cn > max_elements: + rn, cn = scale_down(rn, cn) + + return rn, cn + + +def _get_level_lengths( + index: Index, + sparsify: bool, + max_index: int, + hidden_elements: Sequence[int] | None = None, +): + """ + Given an index, find the level length for each element. + + Parameters + ---------- + index : Index + Index or columns to determine lengths of each element + sparsify : bool + Whether to hide or show each distinct element in a MultiIndex + max_index : int + The maximum number of elements to analyse along the index due to trimming + hidden_elements : sequence of int + Index positions of elements hidden from display in the index affecting + length + + Returns + ------- + Dict : + Result is a dictionary of (level, initial_position): span + """ + if isinstance(index, MultiIndex): + levels = index.format(sparsify=lib.no_default, adjoin=False) + else: + levels = index.format() + + if hidden_elements is None: + hidden_elements = [] + + lengths = {} + if not isinstance(index, MultiIndex): + for i, value in enumerate(levels): + if i not in hidden_elements: + lengths[(0, i)] = 1 + return lengths + + for i, lvl in enumerate(levels): + visible_row_count = 0 # used to break loop due to display trimming + for j, row in enumerate(lvl): + if visible_row_count > max_index: + break + if not sparsify: + # then lengths will always equal 1 since no aggregation. + if j not in hidden_elements: + lengths[(i, j)] = 1 + visible_row_count += 1 + elif (row is not lib.no_default) and (j not in hidden_elements): + # this element has not been sparsified so must be the start of section + last_label = j + lengths[(i, last_label)] = 1 + visible_row_count += 1 + elif row is not lib.no_default: + # even if the above is hidden, keep track of it in case length > 1 and + # later elements are visible + last_label = j + lengths[(i, last_label)] = 0 + elif j not in hidden_elements: + # then element must be part of sparsified section and is visible + visible_row_count += 1 + if visible_row_count > max_index: + break # do not add a length since the render trim limit reached + if lengths[(i, last_label)] == 0: + # if previous iteration was first-of-section but hidden then offset + last_label = j + lengths[(i, last_label)] = 1 + else: + # else add to previous iteration + lengths[(i, last_label)] += 1 + + non_zero_lengths = { + element: length for element, length in lengths.items() if length >= 1 + } + + return non_zero_lengths + + +def _is_visible(idx_row, idx_col, lengths) -> bool: + """ + Index -> {(idx_row, idx_col): bool}). + """ + return (idx_col, idx_row) in lengths + + +def format_table_styles(styles: CSSStyles) -> CSSStyles: + """ + looks for multiple CSS selectors and separates them: + [{'selector': 'td, th', 'props': 'a:v;'}] + ---> [{'selector': 'td', 'props': 'a:v;'}, + {'selector': 'th', 'props': 'a:v;'}] + """ + return [ + {"selector": selector, "props": css_dict["props"]} + for css_dict in styles + for selector in css_dict["selector"].split(",") + ] + + +def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any: + """ + Format the display of a value + + Parameters + ---------- + x : Any + Input variable to be formatted + precision : Int + Floating point precision used if ``x`` is float or complex. + thousands : bool, default False + Whether to group digits with thousands separated with ",". + + Returns + ------- + value : Any + Matches input type, or string if input is float or complex or int with sep. + """ + if is_float(x) or is_complex(x): + return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}" + elif is_integer(x): + return f"{x:,.0f}" if thousands else f"{x:.0f}" + return x + + +def _wrap_decimal_thousands( + formatter: Callable, decimal: str, thousands: str | None +) -> Callable: + """ + Takes a string formatting function and wraps logic to deal with thousands and + decimal parameters, in the case that they are non-standard and that the input + is a (float, complex, int). + """ + + def wrapper(x): + if is_float(x) or is_integer(x) or is_complex(x): + if decimal != "." and thousands is not None and thousands != ",": + return ( + formatter(x) + .replace(",", "§_§-") # rare string to avoid "," <-> "." clash. + .replace(".", decimal) + .replace("§_§-", thousands) + ) + elif decimal != "." and (thousands is None or thousands == ","): + return formatter(x).replace(".", decimal) + elif decimal == "." and thousands is not None and thousands != ",": + return formatter(x).replace(",", thousands) + return formatter(x) + + return wrapper + + +def _str_escape(x, escape): + """if escaping: only use on str, else return input""" + if isinstance(x, str): + if escape == "html": + return escape_html(x) + elif escape == "latex": + return _escape_latex(x) + else: + raise ValueError( + f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + ) + return x + + +def _render_href(x, format): + """uses regex to detect a common URL pattern and converts to href tag in format.""" + if isinstance(x, str): + if format == "html": + href = '{0}' + elif format == "latex": + href = r"\href{{{0}}}{{{0}}}" + else: + raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'") + pat = r"((http|ftp)s?:\/\/|www.)[\w/\-?=%.:@]+\.[\w/\-&?=%.,':;~!@#$*()\[\]]+" + return re.sub(pat, lambda m: href.format(m.group(0)), x) + return x + + +def _maybe_wrap_formatter( + formatter: BaseFormatter | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, +) -> Callable: + """ + Allows formatters to be expressed as str, callable or None, where None returns + a default formatting function. wraps with na_rep, and precision where they are + available. + """ + # Get initial func from input string, input callable, or from default factory + if isinstance(formatter, str): + func_0 = lambda x: formatter.format(x) + elif callable(formatter): + func_0 = formatter + elif formatter is None: + precision = ( + get_option("styler.format.precision") if precision is None else precision + ) + func_0 = partial( + _default_formatter, precision=precision, thousands=(thousands is not None) + ) + else: + raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + + # Replace chars if escaping + if escape is not None: + func_1 = lambda x: func_0(_str_escape(x, escape=escape)) + else: + func_1 = func_0 + + # Replace decimals and thousands if non-standard inputs detected + if decimal != "." or (thousands is not None and thousands != ","): + func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands) + else: + func_2 = func_1 + + # Render links + if hyperlinks is not None: + func_3 = lambda x: func_2(_render_href(x, format=hyperlinks)) + else: + func_3 = func_2 + + # Replace missing values if na_rep + if na_rep is None: + return func_3 + else: + return lambda x: na_rep if isna(x) else func_3(x) + + +def non_reducing_slice(slice_: Subset): + """ + Ensure that a slice doesn't reduce to a Series or Scalar. + + Any user-passed `subset` should have this called on it + to make sure we're always working with DataFrames. + """ + # default to column slice, like DataFrame + # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] + kinds = (ABCSeries, np.ndarray, Index, list, str) + if isinstance(slice_, kinds): + slice_ = IndexSlice[:, slice_] + + def pred(part) -> bool: + """ + Returns + ------- + bool + True if slice does *not* reduce, + False if `part` is a tuple. + """ + # true when slice does *not* reduce, False when part is a tuple, + # i.e. MultiIndex slice + if isinstance(part, tuple): + # GH#39421 check for sub-slice: + return any((isinstance(s, slice) or is_list_like(s)) for s in part) + else: + return isinstance(part, slice) or is_list_like(part) + + if not is_list_like(slice_): + if not isinstance(slice_, slice): + # a 1-d slice, like df.loc[1] + slice_ = [[slice_]] + else: + # slice(a, b, c) + slice_ = [slice_] # to tuplize later + else: + # error: Item "slice" of "Union[slice, Sequence[Any]]" has no attribute + # "__iter__" (not iterable) -> is specifically list_like in conditional + slice_ = [p if pred(p) else [p] for p in slice_] # type: ignore[union-attr] + return tuple(slice_) + + +def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: + """ + Convert css-string to sequence of tuples format if needed. + 'color:red; border:1px solid black;' -> [('color', 'red'), + ('border','1px solid red')] + """ + if isinstance(style, str): + s = style.split(";") + try: + return [ + (x.split(":")[0].strip(), x.split(":")[1].strip()) + for x in s + if x.strip() != "" + ] + except IndexError: + raise ValueError( + "Styles supplied as string must follow CSS rule formats, " + f"for example 'attr: val;'. '{style}' was given." + ) + return style + + +def refactor_levels( + level: Level | list[Level] | None, + obj: Index, +) -> list[int]: + """ + Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``. + + Parameters + ---------- + level : int, str, list + Original ``level`` arg supplied to above methods. + obj: + Either ``self.index`` or ``self.columns`` + + Returns + ------- + list : refactored arg with a list of levels to hide + """ + if level is None: + levels_: list[int] = list(range(obj.nlevels)) + elif isinstance(level, int): + levels_ = [level] + elif isinstance(level, str): + levels_ = [obj._get_level_number(level)] + elif isinstance(level, list): + levels_ = [ + obj._get_level_number(lev) if not isinstance(lev, int) else lev + for lev in level + ] + else: + raise ValueError("`level` must be of type `int`, `str` or list of such") + return levels_ + + +class Tooltips: + """ + An extension to ``Styler`` that allows for and manipulates tooltips on hover + of ``" in result + result = styler.to_html() + assert "" not in result + + +def test_block_names(tpl_style, tpl_table): + # catch accidental removal of a block + expected_style = { + "before_style", + "style", + "table_styles", + "before_cellstyle", + "cellstyle", + } + expected_table = { + "before_table", + "table", + "caption", + "thead", + "tbody", + "after_table", + "before_head_rows", + "head_tr", + "after_head_rows", + "before_rows", + "tr", + "after_rows", + } + result1 = set(tpl_style.blocks) + assert result1 == expected_style + + result2 = set(tpl_table.blocks) + assert result2 == expected_table + + +def test_from_custom_template_table(tmpdir): + p = tmpdir.mkdir("tpl").join("myhtml_table.tpl") + p.write( + dedent( + """\ + {% extends "html_table.tpl" %} + {% block table %} +

    {{custom_title}}

    + {{ super() }} + {% endblock table %}""" + ) + ) + result = Styler.from_custom_template(str(tmpdir.join("tpl")), "myhtml_table.tpl") + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_table is not Styler.template_html_table + styler = result(DataFrame({"A": [1, 2]})) + assert "

    My Title

    \n\n\n + {{ super() }} + {% endblock style %}""" + ) + ) + result = Styler.from_custom_template( + str(tmpdir.join("tpl")), html_style="myhtml_style.tpl" + ) + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_style is not Styler.template_html_style + styler = result(DataFrame({"A": [1, 2]})) + assert '\n\nfull cap" in styler.to_html() + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("index_name", [True, False]) +def test_sticky_basic(styler, index, columns, index_name): + if index_name: + styler.index.name = "some text" + if index: + styler.set_sticky(axis=0) + if columns: + styler.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: 0px;\n z-index: {1};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n z-index: {2};\n{3}}}" + ) + + res = styler.set_uuid("").to_html() + + # test index stickys over thead and tbody + assert (left_css.format("thead tr th:nth-child(1)", "3 !important") in res) is index + assert (left_css.format("tbody tr th:nth-child(1)", "1") in res) is index + + # test column stickys including if name row + assert ( + top_css.format("thead tr:nth-child(1) th", "0", "2", " height: 25px;\n") in res + ) is (columns and index_name) + assert ( + top_css.format("thead tr:nth-child(2) th", "25", "2", " height: 25px;\n") + in res + ) is (columns and index_name) + assert (top_css.format("thead tr:nth-child(1) th", "0", "2", "") in res) is ( + columns and not index_name + ) + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +def test_sticky_mi(styler_mi, index, columns): + if index: + styler_mi.set_sticky(axis=0) + if columns: + styler_mi.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test the index stickys for thead and tbody over both levels + assert ( + left_css.format("thead tr th:nth-child(1)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level0", "0", "1") in res) is index + assert ( + left_css.format("thead tr th:nth-child(2)", "75", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "75", "1") in res) is index + + # test the column stickys for each level row + assert (top_css.format("thead tr:nth-child(1) th", "0", "2") in res) is columns + assert (top_css.format("thead tr:nth-child(2) th", "25", "2") in res) is columns + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("levels", [[1], ["one"], "one"]) +def test_sticky_levels(styler_mi, index, columns, levels): + styler_mi.index.names, styler_mi.columns.names = ["zero", "one"], ["zero", "one"] + if index: + styler_mi.set_sticky(axis=0, levels=levels) + if columns: + styler_mi.set_sticky(axis=1, levels=levels) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test no sticking of level0 + assert "#T_ thead tr th:nth-child(1)" not in res + assert "#T_ tbody tr th.level0" not in res + assert "#T_ thead tr:nth-child(1) th" not in res + + # test sticking level1 + assert ( + left_css.format("thead tr th:nth-child(2)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "0", "1") in res) is index + assert (top_css.format("thead tr:nth-child(2) th", "0", "2") in res) is columns + + +def test_sticky_raises(styler): + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + styler.set_sticky(axis="bad") + + +@pytest.mark.parametrize( + "sparse_index, sparse_columns", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_sparse_options(sparse_index, sparse_columns): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx) + styler = df.style + + default_html = styler.to_html() # defaults under pd.options to (True , True) + + with option_context( + "styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns + ): + html1 = styler.to_html() + assert (html1 == default_html) is (sparse_index and sparse_columns) + html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns) + assert html1 == html2 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_applymap_header_cell_ids(styler, index, columns): + # GH 41893 + func = lambda v: "attr: val;" + styler.uuid, styler.cell_ids = "", False + if index: + styler.applymap_index(func, axis="index") + if columns: + styler.applymap_index(func, axis="columns") + + result = styler.to_html() + + # test no data cell ids + assert '' in result + assert '' in result + + # test index header ids where needed and css styles + assert ( + '' in result + ) is index + assert ( + '' in result + ) is index + assert ("#T__level0_row0, #T__level0_row1 {\n attr: val;\n}" in result) is index + + # test column header ids where needed and css styles + assert ( + '' in result + ) is columns + assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns + + +@pytest.mark.parametrize("rows", [True, False]) +@pytest.mark.parametrize("cols", [True, False]) +def test_maximums(styler_mi, rows, cols): + result = styler_mi.to_html( + max_rows=2 if rows else None, + max_columns=2 if cols else None, + ) + + assert ">5" in result # [[0,1], [4,5]] always visible + assert (">8" in result) is not rows # first trimmed vertical element + assert (">2" in result) is not cols # first trimmed horizontal element + + +def test_replaced_css_class_names(): + css = { + "row_heading": "ROWHEAD", + # "col_heading": "COLHEAD", + "index_name": "IDXNAME", + # "col": "COL", + "row": "ROW", + # "col_trim": "COLTRIM", + "row_trim": "ROWTRIM", + "level": "LEVEL", + "data": "DATA", + "blank": "BLANK", + } + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + styler_mi = Styler( + DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx), + uuid_len=0, + ).set_table_styles(css_class_names=css) + styler_mi.index.names = ["n1", "n2"] + styler_mi.hide(styler_mi.index[1:], axis=0) + styler_mi.hide(styler_mi.columns[1:], axis=1) + styler_mi.applymap_index(lambda v: "color: red;", axis=0) + styler_mi.applymap_index(lambda v: "color: green;", axis=1) + styler_mi.applymap(lambda v: "color: blue;") + expected = dedent( + """\ + +
    <div></div>"A&B"NA$ "A"$ A&BNA + ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"]) + >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex() + ... # doctest: +SKIP + \begin{tabular}{lrrr} + {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ + 0 & 1 & 2 & 3 \\ + \end{tabular} + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + levels_ = refactor_levels(level, obj) + + if all( + ( + formatter is None, + level is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + hyperlinks is None, + ) + ): + display_funcs_.clear() + return self # clear the formatter / revert to default and avoid looping + + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for lvl in levels_: + format_func = _maybe_wrap_formatter( + formatter.get(lvl), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) + + for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: + display_funcs_[idx] = format_func + + return self + + def relabel_index( + self, + labels: Sequence | Index, + axis: Axis = 0, + level: Level | list[Level] | None = None, + ) -> StylerRenderer: + r""" + Relabel the index, or column header, keys to display a set of specified values. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + labels : list-like or Index + New labels to display. Must have same length as the underlying values not + hidden. + axis : {"index", 0, "columns", 1} + Apply to the index or columns. + level : int, str, list, optional + The level(s) over which to apply the new labels. If `None` will apply + to all levels of an Index or MultiIndex which are not hidden. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.format_index: Format the text display value of index or column headers. + Styler.hide: Hide the index, column headers, or specified data from display. + + Notes + ----- + As part of Styler, this method allows the display of an index to be + completely user-specified without affecting the underlying DataFrame data, + index, or column headers. This means that the flexibility of indexing is + maintained whilst the final display is customisable. + + Since Styler is designed to be progressively constructed with method chaining, + this method is adapted to react to the **currently specified hidden elements**. + This is useful because it means one does not have to specify all the new + labels if the majority of an index, or column headers, have already been hidden. + The following produce equivalent display (note the length of ``labels`` in + each case). + + .. code-block:: python + + # relabel first, then hide + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.relabel_index(["A", "B", "C"]).hide([0,1]) + # hide first, then relabel + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.hide([0,1]).relabel_index(["C"]) + + This method should be used, rather than :meth:`Styler.format_index`, in one of + the following cases (see examples): + + - A specified set of labels are required which are not a function of the + underlying index keys. + - The function of the underlying index keys requires a counter variable, + such as those available upon enumeration. + + Examples + -------- + Basic use + + >>> df = pd.DataFrame({"col": ["a", "b", "c"]}) + >>> df.style.relabel_index(["A", "B", "C"]) # doctest: +SKIP + col + A a + B b + C c + + Chaining with pre-hidden elements + + >>> df.style.hide([0,1]).relabel_index(["C"]) # doctest: +SKIP + col + C c + + Using a MultiIndex + + >>> midx = pd.MultiIndex.from_product([[0, 1], [0, 1], [0, 1]]) + >>> df = pd.DataFrame({"col": list(range(8))}, index=midx) + >>> styler = df.style # doctest: +SKIP + col + 0 0 0 0 + 1 1 + 1 0 2 + 1 3 + 1 0 0 4 + 1 5 + 1 0 6 + 1 7 + >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0)) + ... # doctest: +SKIP + >>> styler.hide(level=[0,1]) # doctest: +SKIP + >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP + col + binary6 6 + binary7 7 + + We can also achieve the above by indexing first and then re-labeling + + >>> styler = df.loc[[(1,1,0), (1,1,1)]].style + >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"]) + ... # doctest: +SKIP + col + binary6 6 + binary7 7 + + Defining a formatting function which uses an enumeration counter. Also note + that the value of the index key is passed in the case of string labels so it + can also be inserted into the label, using curly brackets (or double curly + brackets if the string if pre-formatted), + + >>> df = pd.DataFrame({"samples": np.random.rand(10)}) + >>> styler = df.loc[np.random.randint(0,10,3)].style + >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)]) + ... # doctest: +SKIP + samples + sample1 (5) 0.315811 + sample2 (0) 0.495941 + sample3 (2) 0.067946 + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + hidden_labels, hidden_lvls = self.hidden_rows, self.hide_index_ + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_ + visible_len = len(obj) - len(set(hidden_labels)) + if len(labels) != visible_len: + raise ValueError( + "``labels`` must be of length equal to the number of " + f"visible labels along ``axis`` ({visible_len})." + ) + + if level is None: + level = [i for i in range(obj.nlevels) if not hidden_lvls[i]] + levels_ = refactor_levels(level, obj) + + def alias_(x, value): + if isinstance(value, str): + return value.format(x) + return value + + for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]): + if len(levels_) == 1: + idx = (i, levels_[0]) if axis == 0 else (levels_[0], i) + display_funcs_[idx] = partial(alias_, value=labels[ai]) + else: + for aj, lvl in enumerate(levels_): + idx = (i, lvl) if axis == 0 else (lvl, i) + display_funcs_[idx] = partial(alias_, value=labels[ai][aj]) + + return self + + +def _element( + html_element: str, + html_class: str, + value: Any, + is_visible: bool, + **kwargs, +) -> dict: + """ + Template to return container with information for a `` cells in the HTML result. + + Parameters + ---------- + css_name: str, default "pd-t" + Name of the CSS class that controls visualisation of tooltips. + css_props: list-like, default; see Notes + List of (attr, value) tuples defining properties of the CSS class. + tooltips: DataFrame, default empty + DataFrame of strings aligned with underlying Styler data for tooltip + display. + + Notes + ----- + The default properties for the tooltip CSS class are: + + - visibility: hidden + - position: absolute + - z-index: 1 + - background-color: black + - color: white + - transform: translate(-20px, -20px) + + Hidden visibility is a key prerequisite to the hover functionality, and should + always be included in any manual properties specification. + """ + + def __init__( + self, + css_props: CSSProperties = [ + ("visibility", "hidden"), + ("position", "absolute"), + ("z-index", 1), + ("background-color", "black"), + ("color", "white"), + ("transform", "translate(-20px, -20px)"), + ], + css_name: str = "pd-t", + tooltips: DataFrame = DataFrame(), + ) -> None: + self.class_name = css_name + self.class_properties = css_props + self.tt_data = tooltips + self.table_styles: CSSStyles = [] + + @property + def _class_styles(self): + """ + Combine the ``_Tooltips`` CSS class name and CSS properties to the format + required to extend the underlying ``Styler`` `table_styles` to allow + tooltips to render in HTML. + + Returns + ------- + styles : List + """ + return [ + { + "selector": f".{self.class_name}", + "props": maybe_convert_css_to_tuples(self.class_properties), + } + ] + + def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str): + """ + For every table data-cell that has a valid tooltip (not None, NaN or + empty string) must create two pseudo CSS entries for the specific + element id which are added to overall table styles: + an on hover visibility change and a content change + dependent upon the user's chosen display string. + + For example: + [{"selector": "T__row1_col1:hover .pd-t", + "props": [("visibility", "visible")]}, + {"selector": "T__row1_col1 .pd-t::after", + "props": [("content", "Some Valid Text String")]}] + + Parameters + ---------- + uuid: str + The uuid of the Styler instance + name: str + The css-name of the class used for styling tooltips + row : int + The row index of the specified tooltip string data + col : int + The col index of the specified tooltip string data + text : str + The textual content of the tooltip to be displayed in HTML. + + Returns + ------- + pseudo_css : List + """ + selector_id = "#T_" + uuid + "_row" + str(row) + "_col" + str(col) + return [ + { + "selector": selector_id + f":hover .{name}", + "props": [("visibility", "visible")], + }, + { + "selector": selector_id + f" .{name}::after", + "props": [("content", f'"{text}"')], + }, + ] + + def _translate(self, styler: StylerRenderer, d: dict): + """ + Mutate the render dictionary to allow for tooltips: + + - Add ```` HTML element to each data cells ``display_value``. Ignores + headers. + - Add table level CSS styles to control pseudo classes. + + Parameters + ---------- + styler_data : DataFrame + Underlying ``Styler`` DataFrame used for reindexing. + uuid : str + The underlying ``Styler`` uuid for CSS id. + d : dict + The dictionary prior to final render + + Returns + ------- + render_dict : Dict + """ + self.tt_data = self.tt_data.reindex_like(styler.data) + if self.tt_data.empty: + return d + + name = self.class_name + mask = (self.tt_data.isna()) | (self.tt_data.eq("")) # empty string = no ttip + self.table_styles = [ + style + for sublist in [ + self._pseudo_css(styler.uuid, name, i, j, str(self.tt_data.iloc[i, j])) + for i in range(len(self.tt_data.index)) + for j in range(len(self.tt_data.columns)) + if not ( + mask.iloc[i, j] + or i in styler.hidden_rows + or j in styler.hidden_columns + ) + ] + for style in sublist + ] + + if self.table_styles: + # add span class to every cell only if at least 1 non-empty tooltip + for row in d["body"]: + for item in row: + if item["type"] == "td": + item["display_value"] = ( + str(item["display_value"]) + + f'' + ) + d["table_styles"].extend(self._class_styles) + d["table_styles"].extend(self.table_styles) + + return d + + +def _parse_latex_table_wrapping(table_styles: CSSStyles, caption: str | None) -> bool: + """ + Indicate whether LaTeX {tabular} should be wrapped with a {table} environment. + + Parses the `table_styles` and detects any selectors which must be included outside + of {tabular}, i.e. indicating that wrapping must occur, and therefore return True, + or if a caption exists and requires similar. + """ + IGNORED_WRAPPERS = ["toprule", "midrule", "bottomrule", "column_format"] + # ignored selectors are included with {tabular} so do not need wrapping + return ( + table_styles is not None + and any(d["selector"] not in IGNORED_WRAPPERS for d in table_styles) + ) or caption is not None + + +def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | None: + """ + Return the first 'props' 'value' from ``tables_styles`` identified by ``selector``. + + Examples + -------- + >>> table_styles = [{'selector': 'foo', 'props': [('attr','value')]}, + ... {'selector': 'bar', 'props': [('attr', 'overwritten')]}, + ... {'selector': 'bar', 'props': [('a1', 'baz'), ('a2', 'ignore')]}] + >>> _parse_latex_table_styles(table_styles, selector='bar') + 'baz' + + Notes + ----- + The replacement of "§" with ":" is to avoid the CSS problem where ":" has structural + significance and cannot be used in LaTeX labels, but is often required by them. + """ + for style in table_styles[::-1]: # in reverse for most recently applied style + if style["selector"] == selector: + return str(style["props"][0][1]).replace("§", ":") + return None + + +def _parse_latex_cell_styles( + latex_styles: CSSList, display_value: str, convert_css: bool = False +) -> str: + r""" + Mutate the ``display_value`` string including LaTeX commands from ``latex_styles``. + + This method builds a recursive latex chain of commands based on the + CSSList input, nested around ``display_value``. + + If a CSS style is given as ('', '') this is translated to + '\{display_value}', and this value is treated as the + display value for the next iteration. + + The most recent style forms the inner component, for example for styles: + `[('c1', 'o1'), ('c2', 'o2')]` this returns: `\c1o1{\c2o2{display_value}}` + + Sometimes latex commands have to be wrapped with curly braces in different ways: + We create some parsing flags to identify the different behaviours: + + - `--rwrap` : `\{}` + - `--wrap` : `{\ }` + - `--nowrap` : `\ ` + - `--lwrap` : `{\} ` + - `--dwrap` : `{\}{}` + + For example for styles: + `[('c1', 'o1--wrap'), ('c2', 'o2')]` this returns: `{\c1o1 \c2o2{display_value}} + """ + if convert_css: + latex_styles = _parse_latex_css_conversion(latex_styles) + for (command, options) in latex_styles[::-1]: # in reverse for most recent style + formatter = { + "--wrap": f"{{\\{command}--to_parse {display_value}}}", + "--nowrap": f"\\{command}--to_parse {display_value}", + "--lwrap": f"{{\\{command}--to_parse}} {display_value}", + "--rwrap": f"\\{command}--to_parse{{{display_value}}}", + "--dwrap": f"{{\\{command}--to_parse}}{{{display_value}}}", + } + display_value = f"\\{command}{options} {display_value}" + for arg in ["--nowrap", "--wrap", "--lwrap", "--rwrap", "--dwrap"]: + if arg in str(options): + display_value = formatter[arg].replace( + "--to_parse", _parse_latex_options_strip(value=options, arg=arg) + ) + break # only ever one purposeful entry + return display_value + + +def _parse_latex_header_span( + cell: dict[str, Any], + multirow_align: str, + multicol_align: str, + wrap: bool = False, + convert_css: bool = False, +) -> str: + r""" + Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present. + + 'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then + the `display_value` is altered to a LaTeX `multirow` or `multicol` command + respectively, with the appropriate cell-span. + + ``wrap`` is used to enclose the `display_value` in braces which is needed for + column headers using an siunitx package. + + Requires the package {multirow}, whereas multicol support is usually built in + to the {tabular} environment. + + Examples + -------- + >>> cell = {'cellstyle': '', 'display_value':'text', 'attributes': 'colspan="3"'} + >>> _parse_latex_header_span(cell, 't', 'c') + '\\multicolumn{3}{c}{text}' + """ + display_val = _parse_latex_cell_styles( + cell["cellstyle"], cell["display_value"], convert_css + ) + if "attributes" in cell: + attrs = cell["attributes"] + if 'colspan="' in attrs: + colspan = attrs[attrs.find('colspan="') + 9 :] # len('colspan="') = 9 + colspan = int(colspan[: colspan.find('"')]) + if "naive-l" == multicol_align: + out = f"{{{display_val}}}" if wrap else f"{display_val}" + blanks = " & {}" if wrap else " &" + return out + blanks * (colspan - 1) + elif "naive-r" == multicol_align: + out = f"{{{display_val}}}" if wrap else f"{display_val}" + blanks = "{} & " if wrap else "& " + return blanks * (colspan - 1) + out + return f"\\multicolumn{{{colspan}}}{{{multicol_align}}}{{{display_val}}}" + elif 'rowspan="' in attrs: + if multirow_align == "naive": + return display_val + rowspan = attrs[attrs.find('rowspan="') + 9 :] + rowspan = int(rowspan[: rowspan.find('"')]) + return f"\\multirow[{multirow_align}]{{{rowspan}}}{{*}}{{{display_val}}}" + if wrap: + return f"{{{display_val}}}" + else: + return display_val + + +def _parse_latex_options_strip(value: str | float, arg: str) -> str: + """ + Strip a css_value which may have latex wrapping arguments, css comment identifiers, + and whitespaces, to a valid string for latex options parsing. + + For example: 'red /* --wrap */ ' --> 'red' + """ + return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip() + + +def _parse_latex_css_conversion(styles: CSSList) -> CSSList: + """ + Convert CSS (attribute,value) pairs to equivalent LaTeX (command,options) pairs. + + Ignore conversion if tagged with `--latex` option, skipped if no conversion found. + """ + + def font_weight(value, arg): + if value == "bold" or value == "bolder": + return "bfseries", f"{arg}" + return None + + def font_style(value, arg): + if value == "italic": + return "itshape", f"{arg}" + elif value == "oblique": + return "slshape", f"{arg}" + return None + + def color(value, user_arg, command, comm_arg): + """ + CSS colors have 5 formats to process: + + - 6 digit hex code: "#ff23ee" --> [HTML]{FF23EE} + - 3 digit hex code: "#f0e" --> [HTML]{FF00EE} + - rgba: rgba(128, 255, 0, 0.5) --> [rgb]{0.502, 1.000, 0.000} + - rgb: rgb(128, 255, 0,) --> [rbg]{0.502, 1.000, 0.000} + - string: red --> {red} + + Additionally rgb or rgba can be expressed in % which is also parsed. + """ + arg = user_arg if user_arg != "" else comm_arg + + if value[0] == "#" and len(value) == 7: # color is hex code + return command, f"[HTML]{{{value[1:].upper()}}}{arg}" + if value[0] == "#" and len(value) == 4: # color is short hex code + val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}" + return command, f"[HTML]{{{val}}}{arg}" + elif value[:3] == "rgb": # color is rgb or rgba + r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip() + r = float(r[:-1]) / 100 if "%" in r else int(r) / 255 + g = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[0].strip() + g = float(g[:-1]) / 100 if "%" in g else int(g) / 255 + if value[3] == "a": # color is rgba + b = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[1].strip() + else: # color is rgb + b = re.findall("(?<=,)[0-9\\s%]+(?=\\))", value)[0].strip() + b = float(b[:-1]) / 100 if "%" in b else int(b) / 255 + return command, f"[rgb]{{{r:.3f}, {g:.3f}, {b:.3f}}}{arg}" + else: + return command, f"{{{value}}}{arg}" # color is likely string-named + + CONVERTED_ATTRIBUTES: dict[str, Callable] = { + "font-weight": font_weight, + "background-color": partial(color, command="cellcolor", comm_arg="--lwrap"), + "color": partial(color, command="color", comm_arg=""), + "font-style": font_style, + } + + latex_styles: CSSList = [] + for (attribute, value) in styles: + if isinstance(value, str) and "--latex" in value: + # return the style without conversion but drop '--latex' + latex_styles.append((attribute, value.replace("--latex", ""))) + if attribute in CONVERTED_ATTRIBUTES.keys(): + arg = "" + for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]: + if x in str(value): + arg, value = x, _parse_latex_options_strip(value, x) + break + latex_style = CONVERTED_ATTRIBUTES[attribute](value, arg) + if latex_style is not None: + latex_styles.extend([latex_style]) + return latex_styles + + +def _escape_latex(s): + r""" + Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, + ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences. + + Use this if you need to display text that might contain such characters in LaTeX. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + return ( + s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash + .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces + .replace("&", "\\&") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("_", "\\_") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces + .replace("~", "\\textasciitilde ") + .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces + .replace("^", "\\textasciicircum ") + .replace("ab2§=§8yz", "\\textbackslash ") + ) diff --git a/pandas/io/formats/templates/html.tpl b/pandas/io/formats/templates/html.tpl new file mode 100644 index 00000000..8c63be3a --- /dev/null +++ b/pandas/io/formats/templates/html.tpl @@ -0,0 +1,16 @@ +{# Update the html_style/table_structure.html documentation too #} +{% if doctype_html %} + + + + +{% if not exclude_styles %}{% include html_style_tpl %}{% endif %} + + +{% include html_table_tpl %} + + +{% elif not doctype_html %} +{% if not exclude_styles %}{% include html_style_tpl %}{% endif %} +{% include html_table_tpl %} +{% endif %} diff --git a/pandas/io/formats/templates/html_style.tpl b/pandas/io/formats/templates/html_style.tpl new file mode 100644 index 00000000..5c3fcd97 --- /dev/null +++ b/pandas/io/formats/templates/html_style.tpl @@ -0,0 +1,26 @@ +{%- block before_style -%}{%- endblock before_style -%} +{% block style %} + +{% endblock style %} diff --git a/pandas/io/formats/templates/html_table.tpl b/pandas/io/formats/templates/html_table.tpl new file mode 100644 index 00000000..17118d2b --- /dev/null +++ b/pandas/io/formats/templates/html_table.tpl @@ -0,0 +1,63 @@ +{% block before_table %}{% endblock before_table %} +{% block table %} +{% if exclude_styles %} + +{% else %} +
    +{% endif %} +{% block caption %} +{% if caption and caption is string %} + +{% elif caption and caption is sequence %} + +{% endif %} +{% endblock caption %} +{% block thead %} + +{% block before_head_rows %}{% endblock %} +{% for r in head %} +{% block head_tr scoped %} + +{% if exclude_styles %} +{% for c in r %} +{% if c.is_visible != False %} + <{{c.type}} {{c.attributes}}>{{c.display_value}} +{% endif %} +{% endfor %} +{% else %} +{% for c in r %} +{% if c.is_visible != False %} + <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} +{% endif %} +{% endfor %} +{% endif %} + +{% endblock head_tr %} +{% endfor %} +{% block after_head_rows %}{% endblock %} + +{% endblock thead %} +{% block tbody %} + +{% block before_rows %}{% endblock before_rows %} +{% for r in body %} +{% block tr scoped %} + +{% if exclude_styles %} +{% for c in r %}{% if c.is_visible != False %} + <{{c.type}} {{c.attributes}}>{{c.display_value}} +{% endif %}{% endfor %} +{% else %} +{% for c in r %}{% if c.is_visible != False %} + <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} +{% endif %}{% endfor %} +{% endif %} + +{% endblock tr %} +{% endfor %} +{% block after_rows %}{% endblock after_rows %} + +{% endblock tbody %} +
    {{caption}}{{caption[0]}}
    +{% endblock table %} +{% block after_table %}{% endblock after_table %} diff --git a/pandas/io/formats/templates/latex.tpl b/pandas/io/formats/templates/latex.tpl new file mode 100644 index 00000000..ae341bbc --- /dev/null +++ b/pandas/io/formats/templates/latex.tpl @@ -0,0 +1,5 @@ +{% if environment == "longtable" %} +{% include "latex_longtable.tpl" %} +{% else %} +{% include "latex_table.tpl" %} +{% endif %} diff --git a/pandas/io/formats/templates/latex_longtable.tpl b/pandas/io/formats/templates/latex_longtable.tpl new file mode 100644 index 00000000..b97843ee --- /dev/null +++ b/pandas/io/formats/templates/latex_longtable.tpl @@ -0,0 +1,82 @@ +\begin{longtable} +{%- set position = parse_table(table_styles, 'position') %} +{%- if position is not none %} +[{{position}}] +{%- endif %} +{%- set column_format = parse_table(table_styles, 'column_format') %} +{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %} + +{% for style in table_styles %} +{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format', 'label'] %} +\{{style['selector']}}{{parse_table(table_styles, style['selector'])}} +{% endif %} +{% endfor %} +{% if caption and caption is string %} +\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} + \label{{label}} +{%- endif %} \\ +{% elif caption and caption is sequence %} +\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} + \label{{label}} +{%- endif %} \\ +{% else %} +{%- set label = parse_table(table_styles, 'label') %} +{%- if label is not none %} +\label{{label}} \\ +{% endif %} +{% endif %} +{% set toprule = parse_table(table_styles, 'toprule') %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\ +{% endfor %} +{% set midrule = parse_table(table_styles, 'midrule') %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endfirsthead +{% if caption and caption is string %} +\caption[]{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} \\ +{% elif caption and caption is sequence %} +\caption[]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} \\ +{% endif %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\ +{% endfor %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endhead +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\multicolumn{% raw %}{{% endraw %}{{body[0]|length}}{% raw %}}{% endraw %}{r}{Continued on next page} \\ +{% if midrule is not none %} +\{{midrule}} +{% endif %} +\endfoot +{% set bottomrule = parse_table(table_styles, 'bottomrule') %} +{% if bottomrule is not none %} +\{{bottomrule}} +{% endif %} +\endlastfoot +{% for row in body %} +{% for c in row %}{% if not loop.first %} & {% endif %} + {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %} +{%- endfor %} \\ +{% if clines and clines[loop.index] | length > 0 %} + {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %} + +{% endif %} +{% endfor %} +\end{longtable} +{% raw %}{% endraw %} diff --git a/pandas/io/formats/templates/latex_table.tpl b/pandas/io/formats/templates/latex_table.tpl new file mode 100644 index 00000000..7858cb4c --- /dev/null +++ b/pandas/io/formats/templates/latex_table.tpl @@ -0,0 +1,57 @@ +{% if environment or parse_wrap(table_styles, caption) %} +\begin{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %} +{%- set position = parse_table(table_styles, 'position') %} +{%- if position is not none %} +[{{position}}] +{%- endif %} + +{% set position_float = parse_table(table_styles, 'position_float') %} +{% if position_float is not none%} +\{{position_float}} +{% endif %} +{% if caption and caption is string %} +\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} + +{% elif caption and caption is sequence %} +\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} + +{% endif %} +{% for style in table_styles %} +{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format'] %} +\{{style['selector']}}{{parse_table(table_styles, style['selector'])}} +{% endif %} +{% endfor %} +{% endif %} +\begin{tabular} +{%- set column_format = parse_table(table_styles, 'column_format') %} +{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %} + +{% set toprule = parse_table(table_styles, 'toprule') %} +{% if toprule is not none %} +\{{toprule}} +{% endif %} +{% for row in head %} +{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx, convert_css)}}{% endfor %} \\ +{% endfor %} +{% set midrule = parse_table(table_styles, 'midrule') %} +{% if midrule is not none %} +\{{midrule}} +{% endif %} +{% for row in body %} +{% for c in row %}{% if not loop.first %} & {% endif %} + {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align, False, convert_css)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %} +{%- endfor %} \\ +{% if clines and clines[loop.index] | length > 0 %} + {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %} + +{% endif %} +{% endfor %} +{% set bottomrule = parse_table(table_styles, 'bottomrule') %} +{% if bottomrule is not none %} +\{{bottomrule}} +{% endif %} +\end{tabular} +{% if environment or parse_wrap(table_styles, caption) %} +\end{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %} + +{% endif %} diff --git a/pandas/io/formats/templates/string.tpl b/pandas/io/formats/templates/string.tpl new file mode 100644 index 00000000..06aeb2b4 --- /dev/null +++ b/pandas/io/formats/templates/string.tpl @@ -0,0 +1,12 @@ +{% for r in head %} +{% for c in r %}{% if c["is_visible"] %} +{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %} +{% endif %}{% endfor %} + +{% endfor %} +{% for r in body %} +{% for c in r %}{% if c["is_visible"] %} +{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %} +{% endif %}{% endfor %} + +{% endfor %} diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py new file mode 100644 index 00000000..eb1835f0 --- /dev/null +++ b/pandas/io/formats/xml.py @@ -0,0 +1,558 @@ +""" +:mod:`pandas.io.formats.xml` is a module for formatting data in XML. +""" +from __future__ import annotations + +import codecs +import io +from typing import ( + TYPE_CHECKING, + Any, +) + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.missing import isna + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle +from pandas.io.xml import ( + get_data_from_filepath, + preprocess_data, +) + +if TYPE_CHECKING: + from pandas import DataFrame + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buffer", +) +class BaseXMLFormatter: + """ + Subclass for formatting data in XML. + + Parameters + ---------- + path_or_buffer : str or file-like + This can be either a string of raw XML, a valid URL, + file or file-like object. + + index : bool + Whether to include index in xml document. + + row_name : str + Name for root of xml document. Default is 'data'. + + root_name : str + Name for row elements of xml document. Default is 'row'. + + na_rep : str + Missing data representation. + + attrs_cols : list + List of columns to write as attributes in row element. + + elem_cols : list + List of columns to write as children in row element. + + namespaces : dict + The namespaces to define in XML document as dicts with key + being namespace and value the URI. + + prefix : str + The prefix for each element in XML document including root. + + encoding : str + Encoding of xml object or document. + + xml_declaration : bool + Whether to include xml declaration at top line item in xml. + + pretty_print : bool + Whether to write xml document with line breaks and indentation. + + stylesheet : str or file-like + A URL, file, file-like object, or a raw string containing XSLT. + + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + See also + -------- + pandas.io.formats.xml.EtreeXMLFormatter + pandas.io.formats.xml.LxmlXMLFormatter + + """ + + def __init__( + self, + frame: DataFrame, + path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + index: bool = True, + root_name: str | None = "data", + row_name: str | None = "row", + na_rep: str | None = None, + attr_cols: list[str] | None = None, + elem_cols: list[str] | None = None, + namespaces: dict[str | None, str] | None = None, + prefix: str | None = None, + encoding: str = "utf-8", + xml_declaration: bool | None = True, + pretty_print: bool | None = True, + stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + ) -> None: + self.frame = frame + self.path_or_buffer = path_or_buffer + self.index = index + self.root_name = root_name + self.row_name = row_name + self.na_rep = na_rep + self.attr_cols = attr_cols + self.elem_cols = elem_cols + self.namespaces = namespaces + self.prefix = prefix + self.encoding = encoding + self.xml_declaration = xml_declaration + self.pretty_print = pretty_print + self.stylesheet = stylesheet + self.compression = compression + self.storage_options = storage_options + + self.orig_cols = self.frame.columns.tolist() + self.frame_dicts = self.process_dataframe() + + self.validate_columns() + self.validate_encoding() + self.prefix_uri = self.get_prefix_uri() + self.handle_indexes() + + def build_tree(self) -> bytes: + """ + Build tree from data. + + This method initializes the root and builds attributes and elements + with optional namespaces. + """ + raise AbstractMethodError(self) + + def validate_columns(self) -> None: + """ + Validate elems_cols and attrs_cols. + + This method will check if columns is list-like. + + Raises + ------ + ValueError + * If value is not a list and less then length of nodes. + """ + if self.attr_cols and not is_list_like(self.attr_cols): + raise TypeError( + f"{type(self.attr_cols).__name__} is not a valid type for attr_cols" + ) + + if self.elem_cols and not is_list_like(self.elem_cols): + raise TypeError( + f"{type(self.elem_cols).__name__} is not a valid type for elem_cols" + ) + + def validate_encoding(self) -> None: + """ + Validate encoding. + + This method will check if encoding is among listed under codecs. + + Raises + ------ + LookupError + * If encoding is not available in codecs. + """ + + codecs.lookup(self.encoding) + + def process_dataframe(self) -> dict[int | str, dict[str, Any]]: + """ + Adjust Data Frame to fit xml output. + + This method will adjust underlying data frame for xml output, + including optionally replacing missing values and including indexes. + """ + + df = self.frame + + if self.index: + df = df.reset_index() + + if self.na_rep is not None: + df = df.fillna(self.na_rep) + + return df.to_dict(orient="index") + + def handle_indexes(self) -> None: + """ + Handle indexes. + + This method will add indexes into attr_cols or elem_cols. + """ + + if not self.index: + return + + first_key = next(iter(self.frame_dicts)) + indexes: list[str] = [ + x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols + ] + + if self.attr_cols: + self.attr_cols = indexes + self.attr_cols + + if self.elem_cols: + self.elem_cols = indexes + self.elem_cols + + def get_prefix_uri(self) -> str: + """ + Get uri of namespace prefix. + + This method retrieves corresponding URI to prefix in namespaces. + + Raises + ------ + KeyError + *If prefix is not included in namespace dict. + """ + + raise AbstractMethodError(self) + + def other_namespaces(self) -> dict: + """ + Define other namespaces. + + This method will build dictionary of namespaces attributes + for root element, conditionally with optional namespaces and + prefix. + """ + + nmsp_dict: dict[str, str] = {} + if self.namespaces and self.prefix is None: + nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p != ""} + + if self.namespaces and self.prefix: + nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p == ""} + + return nmsp_dict + + def build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any: + """ + Create attributes of row. + + This method adds attributes using attr_cols to row element and + works with tuples for multindex or hierarchical columns. + """ + + if not self.attr_cols: + return elem_row + + for col in self.attr_cols: + attr_name = self._get_flat_col_name(col) + try: + if not isna(d[col]): + elem_row.attrib[attr_name] = str(d[col]) + except KeyError: + raise KeyError(f"no valid column, {col}") + return elem_row + + def _get_flat_col_name(self, col: str | tuple) -> str: + flat_col = col + if isinstance(col, tuple): + flat_col = ( + "".join([str(c) for c in col]).strip() + if "" in col + else "_".join([str(c) for c in col]).strip() + ) + return f"{self.prefix_uri}{flat_col}" + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + """ + Create child elements of row. + + This method adds child elements using elem_cols to row element and + works with tuples for multindex or hierarchical columns. + """ + + raise AbstractMethodError(self) + + def _build_elems(self, sub_element_cls, d: dict[str, Any], elem_row: Any) -> None: + + if not self.elem_cols: + return + + for col in self.elem_cols: + elem_name = self._get_flat_col_name(col) + try: + val = None if isna(d[col]) or d[col] == "" else str(d[col]) + sub_element_cls(elem_row, elem_name).text = val + except KeyError: + raise KeyError(f"no valid column, {col}") + + def write_output(self) -> str | None: + xml_doc = self.build_tree() + + if self.path_or_buffer is not None: + with get_handle( + self.path_or_buffer, + "wb", + compression=self.compression, + storage_options=self.storage_options, + is_text=False, + ) as handles: + handles.handle.write(xml_doc) + return None + + else: + return xml_doc.decode(self.encoding).rstrip() + + +class EtreeXMLFormatter(BaseXMLFormatter): + """ + Class for formatting data in xml using Python standard library + modules: `xml.etree.ElementTree` and `xml.dom.minidom`. + """ + + def build_tree(self) -> bytes: + from xml.etree.ElementTree import ( + Element, + SubElement, + tostring, + ) + + self.root = Element( + f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces() + ) + + for d in self.frame_dicts.values(): + elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") + + if not self.attr_cols and not self.elem_cols: + self.elem_cols = list(d.keys()) + self.build_elems(d, elem_row) + + else: + elem_row = self.build_attribs(d, elem_row) + self.build_elems(d, elem_row) + + self.out_xml = tostring(self.root, method="xml", encoding=self.encoding) + + if self.pretty_print: + self.out_xml = self.prettify_tree() + + if self.xml_declaration: + self.out_xml = self.add_declaration() + else: + self.out_xml = self.remove_declaration() + + if self.stylesheet is not None: + raise ValueError( + "To use stylesheet, you need lxml installed and selected as parser." + ) + + return self.out_xml + + def get_prefix_uri(self) -> str: + from xml.etree.ElementTree import register_namespace + + uri = "" + if self.namespaces: + for p, n in self.namespaces.items(): + if isinstance(p, str) and isinstance(n, str): + register_namespace(p, n) + if self.prefix: + try: + uri = f"{{{self.namespaces[self.prefix]}}}" + except KeyError: + raise KeyError(f"{self.prefix} is not included in namespaces") + else: + uri = f'{{{self.namespaces[""]}}}' + + return uri + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + from xml.etree.ElementTree import SubElement + + self._build_elems(SubElement, d, elem_row) + + def prettify_tree(self) -> bytes: + """ + Output tree for pretty print format. + + This method will pretty print xml with line breaks and indentation. + """ + + from xml.dom.minidom import parseString + + dom = parseString(self.out_xml) + + return dom.toprettyxml(indent=" ", encoding=self.encoding) + + def add_declaration(self) -> bytes: + """ + Add xml declaration. + + This method will add xml declaration of working tree. Currently, + xml_declaration is supported in etree starting in Python 3.8. + """ + decl = f'\n' + + doc = ( + self.out_xml + if self.out_xml.startswith(b" bytes: + """ + Remove xml declaration. + + This method will remove xml declaration of working tree. Currently, + pretty_print is not supported in etree. + """ + + return self.out_xml.split(b"?>")[-1].strip() + + +class LxmlXMLFormatter(BaseXMLFormatter): + """ + Class for formatting data in xml using Python standard library + modules: `xml.etree.ElementTree` and `xml.dom.minidom`. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + self.convert_empty_str_key() + + def build_tree(self) -> bytes: + """ + Build tree from data. + + This method initializes the root and builds attributes and elements + with optional namespaces. + """ + from lxml.etree import ( + Element, + SubElement, + tostring, + ) + + self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces) + + for d in self.frame_dicts.values(): + elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") + + if not self.attr_cols and not self.elem_cols: + self.elem_cols = list(d.keys()) + self.build_elems(d, elem_row) + + else: + elem_row = self.build_attribs(d, elem_row) + self.build_elems(d, elem_row) + + self.out_xml = tostring( + self.root, + pretty_print=self.pretty_print, + method="xml", + encoding=self.encoding, + xml_declaration=self.xml_declaration, + ) + + if self.stylesheet is not None: + self.out_xml = self.transform_doc() + + return self.out_xml + + def convert_empty_str_key(self) -> None: + """ + Replace zero-length string in `namespaces`. + + This method will replace '' with None to align to `lxml` + requirement that empty string prefixes are not allowed. + """ + + if self.namespaces and "" in self.namespaces.keys(): + self.namespaces[None] = self.namespaces.pop("", "default") + + def get_prefix_uri(self) -> str: + uri = "" + if self.namespaces: + if self.prefix: + try: + uri = f"{{{self.namespaces[self.prefix]}}}" + except KeyError: + raise KeyError(f"{self.prefix} is not included in namespaces") + else: + uri = f'{{{self.namespaces[""]}}}' + + return uri + + def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + from lxml.etree import SubElement + + self._build_elems(SubElement, d, elem_row) + + def transform_doc(self) -> bytes: + """ + Parse stylesheet from file or buffer and run it. + + This method will parse stylesheet object into tree for parsing + conditionally by its specific object type, then transforms + original tree with XSLT script. + """ + from lxml.etree import ( + XSLT, + XMLParser, + fromstring, + parse, + ) + + style_doc = self.stylesheet + assert style_doc is not None # is ensured by caller + + handle_data = get_data_from_filepath( + filepath_or_buffer=style_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + + if isinstance(xml_data, io.StringIO): + xsl_doc = fromstring( + xml_data.getvalue().encode(self.encoding), parser=curr_parser + ) + else: + xsl_doc = parse(xml_data, parser=curr_parser) + + transformer = XSLT(xsl_doc) + new_doc = transformer(self.root) + + return bytes(new_doc) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py new file mode 100644 index 00000000..ec2ffbcf --- /dev/null +++ b/pandas/io/gbq.py @@ -0,0 +1,230 @@ +""" Google BigQuery support """ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +from pandas.compat._optional import import_optional_dependency + +if TYPE_CHECKING: + from pandas import DataFrame + + +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use + msg = ( + "pandas-gbq is required to load data from Google BigQuery. " + "See the docs: https://pandas-gbq.readthedocs.io." + ) + pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) + return pandas_gbq + + +def read_gbq( + query: str, + project_id: str | None = None, + index_col: str | None = None, + col_order: list[str] | None = None, + reauth: bool = False, + auth_local_webserver: bool = True, + dialect: str | None = None, + location: str | None = None, + configuration: dict[str, Any] | None = None, + credentials=None, + use_bqstorage_api: bool | None = None, + max_results: int | None = None, + progress_bar_type: str | None = None, +) -> DataFrame: + """ + Load data from Google BigQuery. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + query : str + SQL-Like Query to return data values. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + index_col : str, optional + Name of result column to use for index in results DataFrame. + col_order : list(str), optional + List of BigQuery column names in the desired order for results + DataFrame. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + auth_local_webserver : bool, default True + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + + .. versionchanged:: 1.5.0 + Default value is changed to ``True``. Google has deprecated the + ``auth_local_webserver = False`` `"out of band" (copy-paste) + flow + `_. + dialect : str, default 'legacy' + Note: The default value is changing to 'standard' in a future version. + + SQL syntax dialect to use. Value can be one of: + + ``'legacy'`` + Use BigQuery's legacy SQL dialect. For more information see + `BigQuery Legacy SQL Reference + `__. + ``'standard'`` + Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery Standard SQL Reference + `__. + location : str, optional + Location where the query job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of any + datasets used in the query. + + *New in version 0.5.0 of pandas-gbq*. + configuration : dict, optional + Query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery REST API Reference + `__. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + *New in version 0.8.0 of pandas-gbq*. + use_bqstorage_api : bool, default False + Use the `BigQuery Storage API + `__ to + download query results quickly, but at an increased cost. To use this + API, first `enable it in the Cloud Console + `__. + You must also have the `bigquery.readsessions.create + `__ + permission on the project you are billing queries to. + + This feature requires version 0.10.0 or later of the ``pandas-gbq`` + package. It also requires the ``google-cloud-bigquery-storage`` and + ``fastavro`` packages. + + .. versionadded:: 0.25.0 + max_results : int, optional + If set, limit the maximum number of rows to fetch from the query + results. + + *New in version 0.12.0 of pandas-gbq*. + + .. versionadded:: 1.1.0 + progress_bar_type : Optional, str + If set, use the `tqdm `__ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + Note that this feature requires version 0.12.0 or later of the + ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly + different than ``pandas-gbq``, here the default is ``None``. + + .. versionadded:: 1.0.0 + + Returns + ------- + df: DataFrame + DataFrame representing results of query. + + See Also + -------- + pandas_gbq.read_gbq : This function in the pandas-gbq library. + DataFrame.to_gbq : Write a DataFrame to Google BigQuery. + """ + pandas_gbq = _try_import() + + kwargs: dict[str, str | bool | int | None] = {} + + # START: new kwargs. Don't populate unless explicitly set. + if use_bqstorage_api is not None: + kwargs["use_bqstorage_api"] = use_bqstorage_api + if max_results is not None: + kwargs["max_results"] = max_results + + kwargs["progress_bar_type"] = progress_bar_type + # END: new kwargs + + return pandas_gbq.read_gbq( + query, + project_id=project_id, + index_col=index_col, + col_order=col_order, + reauth=reauth, + auth_local_webserver=auth_local_webserver, + dialect=dialect, + location=location, + configuration=configuration, + credentials=credentials, + **kwargs, + ) + + +def to_gbq( + dataframe: DataFrame, + destination_table: str, + project_id: str | None = None, + chunksize: int | None = None, + reauth: bool = False, + if_exists: str = "fail", + auth_local_webserver: bool = True, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, + progress_bar: bool = True, + credentials=None, +) -> None: + pandas_gbq = _try_import() + pandas_gbq.to_gbq( + dataframe, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + ) diff --git a/pandas/io/html.py b/pandas/io/html.py new file mode 100644 index 00000000..acf98a2f --- /dev/null +++ b/pandas/io/html.py @@ -0,0 +1,1222 @@ +""" +:mod:`pandas.io.html` is a module containing functionality for dealing with +HTML IO. + +""" + +from __future__ import annotations + +from collections import abc +import numbers +import re +from typing import ( + TYPE_CHECKING, + Iterable, + Literal, + Pattern, + Sequence, + cast, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import ( + AbstractMethodError, + EmptyDataError, +) +from pandas.util._decorators import deprecate_nonkeyword_arguments + +from pandas.core.dtypes.common import is_list_like + +from pandas import isna +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.indexes.base import Index +from pandas.core.indexes.multi import MultiIndex + +from pandas.io.common import ( + file_exists, + get_handle, + is_url, + stringify_path, + urlopen, + validate_header_arg, +) +from pandas.io.formats.printing import pprint_thing +from pandas.io.parsers import TextParser + +if TYPE_CHECKING: + from pandas import DataFrame + +_IMPORTS = False +_HAS_BS4 = False +_HAS_LXML = False +_HAS_HTML5LIB = False + + +def _importers() -> None: + # import things we need + # but make this done on a first use basis + + global _IMPORTS + if _IMPORTS: + return + + global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB + bs4 = import_optional_dependency("bs4", errors="ignore") + _HAS_BS4 = bs4 is not None + + lxml = import_optional_dependency("lxml.etree", errors="ignore") + _HAS_LXML = lxml is not None + + html5lib = import_optional_dependency("html5lib", errors="ignore") + _HAS_HTML5LIB = html5lib is not None + + _IMPORTS = True + + +############# +# READ HTML # +############# +_RE_WHITESPACE = re.compile(r"[\r\n]+|\s{2,}") + + +def _remove_whitespace(s: str, regex: Pattern = _RE_WHITESPACE) -> str: + """ + Replace extra whitespace inside of a string with a single space. + + Parameters + ---------- + s : str or unicode + The string from which to remove extra whitespace. + regex : re.Pattern + The regular expression to use to remove extra whitespace. + + Returns + ------- + subd : str or unicode + `s` with all extra whitespace replaced with a single space. + """ + return regex.sub(" ", s.strip()) + + +def _get_skiprows(skiprows: int | Sequence[int] | slice | None) -> int | Sequence[int]: + """ + Get an iterator given an integer, slice or container. + + Parameters + ---------- + skiprows : int, slice, container + The iterator to use to skip rows; can also be a slice. + + Raises + ------ + TypeError + * If `skiprows` is not a slice, integer, or Container + + Returns + ------- + it : iterable + A proper iterator to use to skip rows of a DataFrame. + """ + if isinstance(skiprows, slice): + start, step = skiprows.start or 0, skiprows.step or 1 + return list(range(start, skiprows.stop, step)) + elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows): + return cast("int | Sequence[int]", skiprows) + elif skiprows is None: + return 0 + raise TypeError(f"{type(skiprows).__name__} is not a valid type for skipping rows") + + +def _read( + obj: bytes | FilePath | ReadBuffer[str] | ReadBuffer[bytes], encoding: str | None +) -> str | bytes: + """ + Try to read from a url, file or string. + + Parameters + ---------- + obj : str, unicode, path object, or file-like object + + Returns + ------- + raw_text : str + """ + text: str | bytes + if ( + is_url(obj) + or hasattr(obj, "read") + or (isinstance(obj, str) and file_exists(obj)) + ): + # error: Argument 1 to "get_handle" has incompatible type "Union[str, bytes, + # Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]"; + # expected "Union[PathLike[str], Union[str, Union[IO[Any], RawIOBase, + # BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]]" + with get_handle( + obj, "r", encoding=encoding # type: ignore[arg-type] + ) as handles: + text = handles.handle.read() + elif isinstance(obj, (str, bytes)): + text = obj + else: + raise TypeError(f"Cannot read object of type '{type(obj).__name__}'") + return text + + +class _HtmlFrameParser: + """ + Base class for parsers that parse HTML into DataFrames. + + Parameters + ---------- + io : str or file-like + This can be either a string of raw HTML, a valid URL using the HTTP, + FTP, or FILE protocols or a file-like object. + + match : str or regex + The text to match in the document. + + attrs : dict + List of HTML element attributes to match. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + extract_links : {None, "all", "header", "body", "footer"} + Table elements in the specified section(s) with tags will have their + href extracted. + + .. versionadded:: 1.5.0 + + Attributes + ---------- + io : str or file-like + raw HTML, URL, or file-like object + + match : regex + The text to match in the raw HTML + + attrs : dict-like + A dictionary of valid table attributes to use to search for table + elements. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + extract_links : {None, "all", "header", "body", "footer"} + Table elements in the specified section(s) with tags will have their + href extracted. + + .. versionadded:: 1.5.0 + + Notes + ----- + To subclass this class effectively you must override the following methods: + * :func:`_build_doc` + * :func:`_attr_getter` + * :func:`_href_getter` + * :func:`_text_getter` + * :func:`_parse_td` + * :func:`_parse_thead_tr` + * :func:`_parse_tbody_tr` + * :func:`_parse_tfoot_tr` + * :func:`_parse_tables` + * :func:`_equals_tag` + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + io: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + match: str | Pattern, + attrs: dict[str, str] | None, + encoding: str, + displayed_only: bool, + extract_links: Literal[None, "header", "footer", "body", "all"], + ) -> None: + self.io = io + self.match = match + self.attrs = attrs + self.encoding = encoding + self.displayed_only = displayed_only + self.extract_links = extract_links + + def parse_tables(self): + """ + Parse and return all tables from the DOM. + + Returns + ------- + list of parsed (header, body, footer) tuples from tables. + """ + tables = self._parse_tables(self._build_doc(), self.match, self.attrs) + return (self._parse_thead_tbody_tfoot(table) for table in tables) + + def _attr_getter(self, obj, attr): + """ + Return the attribute value of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + attr : str or unicode + The attribute, such as "colspan" + + Returns + ------- + str or unicode + The attribute value. + """ + # Both lxml and BeautifulSoup have the same implementation: + return obj.get(attr) + + def _href_getter(self, obj): + """ + Return a href if the DOM node contains a child or None. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + href : str or unicode + The href from the child of the DOM node. + """ + raise AbstractMethodError(self) + + def _text_getter(self, obj): + """ + Return the text of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + text : str or unicode + The text from an individual DOM node. + """ + raise AbstractMethodError(self) + + def _parse_td(self, obj): + """ + Return the td elements from a row element. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + list of node-like + These are the elements of each row, i.e., the columns. + """ + raise AbstractMethodError(self) + + def _parse_thead_tr(self, table): + """ + Return the list of thead row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains zero or more thead elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tbody_tr(self, table): + """ + Return the list of tbody row elements from the parsed table element. + + HTML5 table bodies consist of either 0 or more elements (which + only contain elements) or 0 or more elements. This method + checks for both structures. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tfoot_tr(self, table): + """ + Return the list of tfoot row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tables(self, doc, match, attrs): + """ + Return all tables from the parsed DOM. + + Parameters + ---------- + doc : the DOM from which to parse the table element. + + match : str or regular expression + The text to search for in the DOM tree. + + attrs : dict + A dictionary of table attributes that can be used to disambiguate + multiple tables on a page. + + Raises + ------ + ValueError : `match` does not match any text in the document. + + Returns + ------- + list of node-like + HTML
    elements to be parsed into raw data. + """ + raise AbstractMethodError(self) + + def _equals_tag(self, obj, tag): + """ + Return whether an individual DOM node matches a tag + + Parameters + ---------- + obj : node-like + A DOM node. + + tag : str + Tag name to be checked for equality. + + Returns + ------- + boolean + Whether `obj`'s tag name is `tag` + """ + raise AbstractMethodError(self) + + def _build_doc(self): + """ + Return a tree-like object that can be used to iterate over the DOM. + + Returns + ------- + node-like + The DOM from which to parse the table element. + """ + raise AbstractMethodError(self) + + def _parse_thead_tbody_tfoot(self, table_html): + """ + Given a table, return parsed header, body, and foot. + + Parameters + ---------- + table_html : node-like + + Returns + ------- + tuple of (header, body, footer), each a list of list-of-text rows. + + Notes + ----- + Header and body are lists-of-lists. Top level list is a list of + rows. Each row is a list of str text. + + Logic: Use , , elements to identify + header, body, and footer, otherwise: + - Put all rows into body + - Move rows from top of body to header only if + all elements inside row are . Move the top all- or + while body_rows and row_is_all_th(body_rows[0]): + header_rows.append(body_rows.pop(0)) + + header = self._expand_colspan_rowspan(header_rows, section="header") + body = self._expand_colspan_rowspan(body_rows, section="body") + footer = self._expand_colspan_rowspan(footer_rows, section="footer") + + return header, body, footer + + def _expand_colspan_rowspan( + self, rows, section: Literal["header", "footer", "body"] + ): + """ + Given a list of s, return a list of text rows. + + Parameters + ---------- + rows : list of node-like + List of s + section : the section that the rows belong to (header, body or footer). + + Returns + ------- + list of list + Each returned row is a list of str text, or tuple (text, link) + if extract_links is not None. + + Notes + ----- + Any cell with ``rowspan`` or ``colspan`` will have its contents copied + to subsequent cells. + """ + all_texts = [] # list of rows, each a list of str + text: str | tuple + remainder: list[ + tuple[int, str | tuple, int] + ] = [] # list of (index, text, nrows) + + for tr in rows: + texts = [] # the output for this row + next_remainder = [] + + index = 0 + tds = self._parse_td(tr) + for td in tds: + # Append texts from previous rows with rowspan>1 that come + # before this or (see _parse_thead_tr). + return row.xpath("./td|./th") + + def _parse_tables(self, doc, match, kwargs): + pattern = match.pattern + + # 1. check all descendants for the given pattern and only search tables + # 2. go up the tree until we find a table + xpath_expr = f"//table//*[re:test(text(), {repr(pattern)})]/ancestor::table" + + # if any table attributes were given build an xpath expression to + # search for them + if kwargs: + xpath_expr += _build_xpath_expr(kwargs) + + tables = doc.xpath(xpath_expr, namespaces=_re_namespace) + + tables = self._handle_hidden_tables(tables, "attrib") + if self.displayed_only: + for table in tables: + # lxml utilizes XPATH 1.0 which does not have regex + # support. As a result, we find all elements with a style + # attribute and iterate them to check for display:none + for elem in table.xpath(".//*[@style]"): + if "display:none" in elem.attrib.get("style", "").replace(" ", ""): + elem.getparent().remove(elem) + + if not tables: + raise ValueError(f"No tables found matching regex {repr(pattern)}") + return tables + + def _equals_tag(self, obj, tag): + return obj.tag == tag + + def _build_doc(self): + """ + Raises + ------ + ValueError + * If a URL that lxml cannot parse is passed. + + Exception + * Any other ``Exception`` thrown. For example, trying to parse a + URL that is syntactically correct on a machine with no internet + connection will fail. + + See Also + -------- + pandas.io.html._HtmlFrameParser._build_doc + """ + from lxml.etree import XMLSyntaxError + from lxml.html import ( + HTMLParser, + fromstring, + parse, + ) + + parser = HTMLParser(recover=True, encoding=self.encoding) + + try: + if is_url(self.io): + with urlopen(self.io) as f: + r = parse(f, parser=parser) + else: + # try to parse the input in the simplest way + r = parse(self.io, parser=parser) + try: + r = r.getroot() + except AttributeError: + pass + except (UnicodeDecodeError, OSError) as e: + # if the input is a blob of html goop + if not is_url(self.io): + r = fromstring(self.io, parser=parser) + + try: + r = r.getroot() + except AttributeError: + pass + else: + raise e + else: + if not hasattr(r, "text_content"): + raise XMLSyntaxError("no text parsed from document", 0, 0, 0) + + for br in r.xpath("*//br"): + br.tail = "\n" + (br.tail or "") + + return r + + def _parse_thead_tr(self, table): + rows = [] + + for thead in table.xpath(".//thead"): + rows.extend(thead.xpath("./tr")) + + # HACK: lxml does not clean up the clearly-erroneous + # . (Missing ). Add + # the and _pretend_ it's a ; _parse_td() will find its + # children as though it's a . + # + # Better solution would be to use html5lib. + elements_at_root = thead.xpath("./td|./th") + if elements_at_root: + rows.append(thead) + + return rows + + def _parse_tbody_tr(self, table): + from_tbody = table.xpath(".//tbody//tr") + from_root = table.xpath("./tr") + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.xpath(".//tfoot//tr") + + +def _expand_elements(body): + data = [len(elem) for elem in body] + lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) + lens_max = lens.max() + not_max = lens[lens != lens_max] + + empty = [""] + for ind, length in not_max.items(): + body[ind] += empty * (lens_max - length) + + +def _data_to_frame(**kwargs): + head, body, foot = kwargs.pop("data") + header = kwargs.pop("header") + kwargs["skiprows"] = _get_skiprows(kwargs["skiprows"]) + if head: + body = head + body + + # Infer header when there is a or top ' in styler.to_html() + + +def test_rowspan_w3(): + # GH 38533 + df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert '' in styler.to_html() + + +def test_styles(styler): + styler.set_uuid("abc") + styler.set_table_styles([{"selector": "td", "props": "color: red;"}]) + result = styler.to_html(doctype_html=True) + expected = dedent( + """\ + + + + + + + +
    + - Move rows from bottom of body to footer only if + all elements inside row are + """ + header_rows = self._parse_thead_tr(table_html) + body_rows = self._parse_tbody_tr(table_html) + footer_rows = self._parse_tfoot_tr(table_html) + + def row_is_all_th(row): + return all(self._equals_tag(t, "th") for t in self._parse_td(row)) + + if not header_rows: + # The table has no
    rows from + # body_rows to header_rows. (This is a common case because many + # tables in the wild have no
    + while remainder and remainder[0][0] <= index: + prev_i, prev_text, prev_rowspan = remainder.pop(0) + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + index += 1 + + # Append the text from this , colspan times + text = _remove_whitespace(self._text_getter(td)) + if self.extract_links == "all" or self.extract_links == section: + href = self._href_getter(td) + text = (text, href) + rowspan = int(self._attr_getter(td, "rowspan") or 1) + colspan = int(self._attr_getter(td, "colspan") or 1) + + for _ in range(colspan): + texts.append(text) + if rowspan > 1: + next_remainder.append((index, text, rowspan - 1)) + index += 1 + + # Append texts from previous rows at the final position + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + + all_texts.append(texts) + remainder = next_remainder + + # Append rows that only appear because the previous row had non-1 + # rowspan + while remainder: + next_remainder = [] + texts = [] + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + all_texts.append(texts) + remainder = next_remainder + + return all_texts + + def _handle_hidden_tables(self, tbl_list, attr_name): + """ + Return list of tables, potentially removing hidden elements + + Parameters + ---------- + tbl_list : list of node-like + Type of list elements will vary depending upon parser used + attr_name : str + Name of the accessor for retrieving HTML attributes + + Returns + ------- + list of node-like + Return type matches `tbl_list` + """ + if not self.displayed_only: + return tbl_list + + return [ + x + for x in tbl_list + if "display:none" + not in getattr(x, attr_name).get("style", "").replace(" ", "") + ] + + +class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses BeautifulSoup under the hood. + + See Also + -------- + pandas.io.html._HtmlFrameParser + pandas.io.html._LxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`pandas.io.html._HtmlFrameParser`. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + from bs4 import SoupStrainer + + self._strainer = SoupStrainer("table") + + def _parse_tables(self, doc, match, attrs): + element_name = self._strainer.name + tables = doc.find_all(element_name, attrs=attrs) + + if not tables: + raise ValueError("No tables found") + + result = [] + unique_tables = set() + tables = self._handle_hidden_tables(tables, "attrs") + + for table in tables: + if self.displayed_only: + for elem in table.find_all(style=re.compile(r"display:\s*none")): + elem.decompose() + + if table not in unique_tables and table.find(string=match) is not None: + result.append(table) + unique_tables.add(table) + + if not result: + raise ValueError(f"No tables found matching pattern {repr(match.pattern)}") + return result + + def _href_getter(self, obj) -> str | None: + a = obj.find("a", href=True) + return None if not a else a["href"] + + def _text_getter(self, obj): + return obj.text + + def _equals_tag(self, obj, tag): + return obj.name == tag + + def _parse_td(self, row): + return row.find_all(("td", "th"), recursive=False) + + def _parse_thead_tr(self, table): + return table.select("thead tr") + + def _parse_tbody_tr(self, table): + from_tbody = table.select("tbody tr") + from_root = table.find_all("tr", recursive=False) + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.select("tfoot tr") + + def _setup_build_doc(self): + raw_text = _read(self.io, self.encoding) + if not raw_text: + raise ValueError(f"No text parsed from document: {self.io}") + return raw_text + + def _build_doc(self): + from bs4 import BeautifulSoup + + bdoc = self._setup_build_doc() + if isinstance(bdoc, bytes) and self.encoding is not None: + udoc = bdoc.decode(self.encoding) + from_encoding = None + else: + udoc = bdoc + from_encoding = self.encoding + + soup = BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding) + + for br in soup.find_all("br"): + br.replace_with("\n" + br.text) + + return soup + + +def _build_xpath_expr(attrs) -> str: + """ + Build an xpath expression to simulate bs4's ability to pass in kwargs to + search for attributes when using the lxml parser. + + Parameters + ---------- + attrs : dict + A dict of HTML attributes. These are NOT checked for validity. + + Returns + ------- + expr : unicode + An XPath expression that checks for the given HTML attributes. + """ + # give class attribute as class_ because class is a python keyword + if "class_" in attrs: + attrs["class"] = attrs.pop("class_") + + s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()]) + return f"[{s}]" + + +_re_namespace = {"re": "http://exslt.org/regular-expressions"} + + +class _LxmlFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses lxml under the hood. + + Warning + ------- + This parser can only handle HTTP, FTP, and FILE urls. + + See Also + -------- + _HtmlFrameParser + _BeautifulSoupLxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`_HtmlFrameParser`. + """ + + def _href_getter(self, obj) -> str | None: + href = obj.xpath(".//a/@href") + return None if not href else href[0] + + def _text_getter(self, obj): + return obj.text_content() + + def _parse_td(self, row): + # Look for direct children only: the "row" element here may be a + #
    foobar
    -only rows + if header is None: + if len(head) == 1: + header = 0 + else: + # ignore all-empty-text rows + header = [i for i, row in enumerate(head) if any(text for text in row)] + + if foot: + body += foot + + # fill out elements of body that are "ragged" + _expand_elements(body) + with TextParser(body, header=header, **kwargs) as tp: + return tp.read() + + +_valid_parsers = { + "lxml": _LxmlFrameParser, + None: _LxmlFrameParser, + "html5lib": _BeautifulSoupHtml5LibFrameParser, + "bs4": _BeautifulSoupHtml5LibFrameParser, +} + + +def _parser_dispatch(flavor: str | None) -> type[_HtmlFrameParser]: + """ + Choose the parser based on the input flavor. + + Parameters + ---------- + flavor : str + The type of parser to use. This must be a valid backend. + + Returns + ------- + cls : _HtmlFrameParser subclass + The parser class based on the requested input flavor. + + Raises + ------ + ValueError + * If `flavor` is not a valid backend. + ImportError + * If you do not have the requested `flavor` + """ + valid_parsers = list(_valid_parsers.keys()) + if flavor not in valid_parsers: + raise ValueError( + f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}" + ) + + if flavor in ("bs4", "html5lib"): + if not _HAS_HTML5LIB: + raise ImportError("html5lib not found, please install it") + if not _HAS_BS4: + raise ImportError("BeautifulSoup4 (bs4) not found, please install it") + # Although we call this above, we want to raise here right before use. + bs4 = import_optional_dependency("bs4") # noqa:F841 + + else: + if not _HAS_LXML: + raise ImportError("lxml not found, please install it") + return _valid_parsers[flavor] + + +def _print_as_set(s) -> str: + arg = ", ".join([pprint_thing(el) for el in s]) + return f"{{{arg}}}" + + +def _validate_flavor(flavor): + if flavor is None: + flavor = "lxml", "bs4" + elif isinstance(flavor, str): + flavor = (flavor,) + elif isinstance(flavor, abc.Iterable): + if not all(isinstance(flav, str) for flav in flavor): + raise TypeError( + f"Object of type {repr(type(flavor).__name__)} " + f"is not an iterable of strings" + ) + else: + msg = repr(flavor) if isinstance(flavor, str) else str(flavor) + msg += " is not a valid flavor" + raise ValueError(msg) + + flavor = tuple(flavor) + valid_flavors = set(_valid_parsers) + flavor_set = set(flavor) + + if not flavor_set & valid_flavors: + raise ValueError( + f"{_print_as_set(flavor_set)} is not a valid set of flavors, valid " + f"flavors are {_print_as_set(valid_flavors)}" + ) + return flavor + + +def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, **kwargs): + flavor = _validate_flavor(flavor) + compiled_match = re.compile(match) # you can pass a compiled regex here + + retained = None + for flav in flavor: + parser = _parser_dispatch(flav) + p = parser(io, compiled_match, attrs, encoding, displayed_only, extract_links) + + try: + tables = p.parse_tables() + except ValueError as caught: + # if `io` is an io-like object, check if it's seekable + # and try to rewind it before trying the next parser + if hasattr(io, "seekable") and io.seekable(): + io.seek(0) + elif hasattr(io, "seekable") and not io.seekable(): + # if we couldn't rewind it, let the user know + raise ValueError( + f"The flavor {flav} failed to parse your input. " + "Since you passed a non-rewindable file " + "object, we can't rewind it to try " + "another parser. Try read_html() with a different flavor." + ) from caught + + retained = caught + else: + break + else: + assert retained is not None # for mypy + raise retained + + ret = [] + for table in tables: + try: + df = _data_to_frame(data=table, **kwargs) + # Cast MultiIndex header to an Index of tuples when extracting header + # links and replace nan with None (therefore can't use mi.to_flat_index()). + # This maintains consistency of selection (e.g. df.columns.str[1]) + if extract_links in ("all", "header") and isinstance( + df.columns, MultiIndex + ): + df.columns = Index( + ((col[0], None if isna(col[1]) else col[1]) for col in df.columns), + tupleize_cols=False, + ) + + ret.append(df) + except EmptyDataError: # empty table + continue + return ret + + +@deprecate_nonkeyword_arguments(version="2.0") +def read_html( + io: FilePath | ReadBuffer[str], + match: str | Pattern = ".+", + flavor: str | None = None, + header: int | Sequence[int] | None = None, + index_col: int | Sequence[int] | None = None, + skiprows: int | Sequence[int] | slice | None = None, + attrs: dict[str, str] | None = None, + parse_dates: bool = False, + thousands: str | None = ",", + encoding: str | None = None, + decimal: str = ".", + converters: dict | None = None, + na_values: Iterable[object] | None = None, + keep_default_na: bool = True, + displayed_only: bool = True, + extract_links: Literal[None, "header", "footer", "body", "all"] = None, +) -> list[DataFrame]: + r""" + Read HTML tables into a ``list`` of ``DataFrame`` objects. + + Parameters + ---------- + io : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a string ``read()`` function. + The string can represent a URL or the HTML itself. Note that + lxml only accepts the http, ftp and file url protocols. If you have a + URL that starts with ``'https'`` you might try removing the ``'s'``. + + match : str or compiled regular expression, optional + The set of tables containing text matching this regex or string will be + returned. Unless the HTML is extremely simple you will probably need to + pass a non-empty string here. Defaults to '.+' (match any non-empty + string). The default value will return all tables contained on a page. + This value is converted to a regular expression so that there is + consistent behavior between Beautiful Soup and lxml. + + flavor : str, optional + The parsing engine to use. 'bs4' and 'html5lib' are synonymous with + each other, they are both there for backwards compatibility. The + default of ``None`` tries to use ``lxml`` to parse and if that fails it + falls back on ``bs4`` + ``html5lib``. + + header : int or list-like, optional + The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to + make the columns headers. + + index_col : int or list-like, optional + The column (or list of columns) to use to create the index. + + skiprows : int, list-like or slice, optional + Number of rows to skip after parsing the column integer. 0-based. If a + sequence of integers or a slice is given, will skip the rows indexed by + that sequence. Note that a single element sequence means 'skip the nth + row' whereas an integer means 'skip n rows'. + + attrs : dict, optional + This is a dictionary of attributes that you can pass to use to identify + the table in the HTML. These are not checked for validity before being + passed to lxml or Beautiful Soup. However, these attributes must be + valid HTML table attributes to work correctly. For example, :: + + attrs = {'id': 'table'} + + is a valid attribute dictionary because the 'id' HTML tag attribute is + a valid HTML attribute for *any* HTML tag as per `this document + `__. :: + + attrs = {'asdf': 'table'} + + is *not* a valid attribute dictionary because 'asdf' is not a valid + HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 + table attributes can be found `here + `__. A + working draft of the HTML 5 spec can be found `here + `__. It contains the + latest information on table attributes for the modern web. + + parse_dates : bool, optional + See :func:`~read_csv` for more details. + + thousands : str, optional + Separator to use to parse thousands. Defaults to ``','``. + + encoding : str, optional + The encoding used to decode the web page. Defaults to ``None``.``None`` + preserves the previous encoding behavior, which depends on the + underlying parser library (e.g., the parser library will try to use + the encoding provided by the document). + + decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European + data). + + converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + + na_values : iterable, default None + Custom NA values. + + keep_default_na : bool, default True + If na_values are specified and keep_default_na is False the default NaN + values are overridden, otherwise they're appended to. + + displayed_only : bool, default True + Whether elements with "display: none" should be parsed. + + extract_links : {None, "all", "header", "body", "footer"} + Table elements in the specified section(s) with tags will have their + href extracted. + + .. versionadded:: 1.5.0 + + Returns + ------- + dfs + A list of DataFrames. + + See Also + -------- + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Notes + ----- + Before using this function you should read the :ref:`gotchas about the + HTML parsing libraries `. + + Expect to do some cleanup after you call this function. For example, you + might need to manually assign column names if the column names are + converted to NaN when you pass the `header=0` argument. We try to assume as + little as possible about the structure of the table and push the + idiosyncrasies of the HTML contained in the table to the user. + + This function searches for ```` elements and only for ```` + and ```` or ```` argument, it is used to construct + the header, otherwise the function attempts to find the header within + the body (by putting rows with only ``' + assert expected in s.to_html() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) + expected = f'' + assert expected in s.to_html() + + # also test format_index() + styler = Styler(DataFrame(columns=[chars]), uuid_len=0) + styler.format_index("&{0}&", escape=None, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&" + styler.format_index("&{0}&", escape=escape, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" + + +def test_format_escape_na_rep(): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") + ex = '' + expected2 = '' + assert ex in s.to_html() + assert expected2 in s.to_html() + + # also test for format_index() + df = DataFrame(columns=['<>&"', None]) + styler = Styler(df, uuid_len=0) + styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "X&<>&">X" + assert ctx["head"][0][2]["display_value"] == "&" + + +def test_format_escape_floats(styler): + # test given formatter for number format is not impacted by escape + s = styler.format("{:.1f}", escape="html") + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + # tests precision of floats is not impacted by escape + s = styler.format(precision=1, escape="html") + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + + +@pytest.mark.parametrize("formatter", [5, True, [2.0]]) +@pytest.mark.parametrize("func", ["format", "format_index"]) +def test_format_raises(styler, formatter, func): + with pytest.raises(TypeError, match="expected str or callable"): + getattr(styler, func)(formatter) + + +@pytest.mark.parametrize( + "precision, expected", + [ + (1, ["1.0", "2.0", "3.2", "4.6"]), + (2, ["1.00", "2.01", "3.21", "4.57"]), + (3, ["1.000", "2.009", "3.212", "4.566"]), + ], +) +def test_format_with_precision(precision, expected): + # Issue #13257 + df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566]) + styler = Styler(df) + styler.format(precision=precision) + styler.format_index(precision=precision, axis=1) + + ctx = styler._translate(True, True) + for col, exp in enumerate(expected): + assert ctx["body"][0][col + 1]["display_value"] == exp # format test + assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test + + +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "level, expected", + [ + (0, ["X", "X", "_", "_"]), # level int + ("zero", ["X", "X", "_", "_"]), # level name + (1, ["_", "_", "X", "X"]), # other level int + ("one", ["_", "_", "X", "X"]), # other level name + ([0, 1], ["X", "X", "X", "X"]), # both levels + ([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous + ([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name + (["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed + ], +) +def test_format_index_level(axis, level, expected): + midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"]) + df = DataFrame([[1, 2], [3, 4]]) + if axis == 0: + df.index = midx + else: + df.columns = midx + + styler = df.style.format_index(lambda v: "X", level=level, axis=axis) + ctx = styler._translate(True, True) + + if axis == 0: # compare index + result = [ctx["body"][s][0]["display_value"] for s in range(2)] + result += [ctx["body"][s][1]["display_value"] for s in range(2)] + else: # compare columns + result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)] + result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)] + + assert expected == result + + +def test_format_subset(): + df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + ctx = df.style.format( + {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :] + )._translate(True, True) + expected = "0.1" + raw_11 = "1.123400" + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + assert ctx["body"][0][2]["display_value"] == "12.34%" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][0][2]["display_value"] == "0.123400" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate( + True, True + ) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == "1.1" + assert ctx["body"][0][2]["display_value"] == "0.123400" + assert ctx["body"][1][2]["display_value"] == raw_11 + + +@pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) +@pytest.mark.parametrize("decimal", [".", "*"]) +@pytest.mark.parametrize("precision", [None, 2]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_thousands(formatter, decimal, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1000000]], index=[1000000]).style + result = getattr(styler, func)( # testing int + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + +@pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) +@pytest.mark.parametrize("thousands", [None, ",", "*"]) +@pytest.mark.parametrize("precision", [None, 4]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_decimal(formatter, thousands, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + +def test_str_escape_error(): + msg = "`escape` only permitted in {'html', 'latex'}, got " + with pytest.raises(ValueError, match=msg): + _str_escape("text", "bad_escape") + + with pytest.raises(ValueError, match=msg): + _str_escape("text", []) + + _str_escape(2.00, "bad_escape") # OK since dtype is float + + +def test_format_options(): + df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]}) + ctx = df.style._translate(True, True) + + # test option: na_rep + assert ctx["body"][1][2]["display_value"] == "nan" + with option_context("styler.format.na_rep", "MISSING"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][2]["display_value"] == "MISSING" + + # test option: decimal and precision + assert ctx["body"][0][2]["display_value"] == "1.009000" + with option_context("styler.format.decimal", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1_009000" + with option_context("styler.format.precision", 2): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1.01" + + # test option: thousands + assert ctx["body"][0][1]["display_value"] == "2000" + with option_context("styler.format.thousands", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2_000" + + # test option: escape + assert ctx["body"][0][3]["display_value"] == "&<" + assert ctx["body"][1][3]["display_value"] == "&~" + with option_context("styler.format.escape", "html"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][3]["display_value"] == "&<" + with option_context("styler.format.escape", "latex"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + + # test option: formatter + with option_context("styler.format.formatter", {"int": "{:,.2f}"}): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00" + + +def test_precision_zero(df): + styler = Styler(df, precision=0) + ctx = styler._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-1" + assert ctx["body"][1][2]["display_value"] == "-1" + + +@pytest.mark.parametrize( + "formatter, exp", + [ + (lambda x: f"{x:.3f}", "9.000"), + ("{:.2f}", "9.00"), + ({0: "{:.1f}"}, "9.0"), + (None, "9"), + ], +) +def test_formatter_options_validator(formatter, exp): + df = DataFrame([[9]]) + with option_context("styler.format.formatter", formatter): + assert f" {exp} " in df.style.to_latex() + + +def test_formatter_options_raises(): + msg = "Value must be an instance of" + with pytest.raises(ValueError, match=msg): + with option_context("styler.format.formatter", ["bad", "type"]): + DataFrame().style.to_latex() + + +def test_1level_multiindex(): + # GH 43383 + midx = MultiIndex.from_product([[1, 2]], names=[""]) + df = DataFrame(-1, index=midx, columns=[0, 1]) + ctx = df.style._translate(True, True) + assert ctx["body"][0][0]["display_value"] == "1" + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][1][0]["display_value"] == "2" + assert ctx["body"][1][0]["is_visible"] is True + + +def test_boolean_format(): + # gh 46384: booleans do not collapse to integer representation on display + df = DataFrame([[True, False]]) + ctx = df.style._translate(True, True) + assert ctx["body"][0][1]["display_value"] is True + assert ctx["body"][0][2]["display_value"] is False + + +@pytest.mark.parametrize( + "hide, labels", + [ + (False, [1, 2]), + (True, [1, 2, 3, 4]), + ], +) +def test_relabel_raise_length(styler_multi, hide, labels): + if hide: + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + with pytest.raises(ValueError, match="``labels`` must be of length equal"): + styler_multi.relabel_index(labels=labels) + + +def test_relabel_index(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + styler_multi.relabel_index(labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items() + assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items() + assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items() + + +def test_relabel_columns(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")]) + styler_multi.relabel_index(axis=1, labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items() + assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items() + assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items() + assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items() + + +def test_relabel_roundtrip(styler): + styler.relabel_index(["{}", "{}"]) + ctx = styler._translate(True, True) + assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items() diff --git a/pandas/tests/io/formats/style/test_highlight.py b/pandas/tests/io/formats/style/test_highlight.py new file mode 100644 index 00000000..3d597190 --- /dev/null +++ b/pandas/tests/io/formats/style/test_highlight.py @@ -0,0 +1,218 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture(params=[(None, "float64"), (NA, "Int64")]) +def df(request): + # GH 45804 + return DataFrame( + {"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1] + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_highlight_null(styler): + result = styler.highlight_null()._compute().ctx + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "red")], + } + assert result == expected + + +def test_highlight_null_subset(styler): + # GH 31345 + result = ( + styler.highlight_null(color="red", subset=["A"]) + .highlight_null(color="green", subset=["B"]) + ._compute() + .ctx + ) + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "green")], + } + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +def test_highlight_minmax_basic(df, f): + expected = { + (0, 1): [("background-color", "red")], + # ignores NaN row, + (2, 0): [("background-color", "red")], + } + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(axis=1, color="red")._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"axis": None, "color": "red"}, # test axis + {"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN + {"axis": None, "props": "background-color: red"}, # test props + ], +) +def test_highlight_minmax_ext(df, f, kwargs): + expected = {(2, 0): [("background-color", "red")]} + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize("axis", [None, 0, 1]) +def test_highlight_minmax_nulls(f, axis): + # GH 42750 + expected = { + (1, 0): [("background-color", "yellow")], + (1, 1): [("background-color", "yellow")], + } + if axis == 1: + expected.update({(2, 1): [("background-color", "yellow")]}) + + if f == "highlight_max": + df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]}) + else: + df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]}) + + result = getattr(df.style, f)(axis=axis)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"left": 0, "right": 1}, # test basic range + {"left": 0, "right": 1, "props": "background-color: yellow"}, # test props + {"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset + {"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right + {"right": 1}, # test no left + {"left": [0, 0, 11], "axis": 0}, # test left as sequence + {"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis + {"left": 0, "right": [0, 1], "axis": 1}, # test sequence right + ], +) +def test_highlight_between(styler, kwargs): + expected = { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + } + result = styler.highlight_between(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "arg, map, axis", + [ + ("left", [1, 2], 0), # 0 axis has 3 elements not 2 + ("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3 + ("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2) + ("right", [1, 2], 0), # same tests as above for 'right' not 'left' + ("right", [1, 2, 3], 1), # .. + ("right", np.array([[1, 2], [1, 2]]), None), # .. + ], +) +def test_highlight_between_raises(arg, styler, map, axis): + msg = f"supplied '{arg}' is not correct shape" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(**{arg: map, "axis": axis})._compute() + + +def test_highlight_between_raises2(styler): + msg = "values can be 'both', 'left', 'right', or 'neither'" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive="badstring")._compute() + + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive=1)._compute() + + +@pytest.mark.parametrize( + "inclusive, expected", + [ + ( + "both", + { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + }, + ), + ("neither", {}), + ("left", {(0, 0): [("background-color", "yellow")]}), + ("right", {(0, 1): [("background-color", "yellow")]}), + ], +) +def test_highlight_between_inclusive(styler, inclusive, expected): + kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]} + result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute() + assert result.ctx == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"q_left": 0.5, "q_right": 1, "axis": 0}, # base case + {"q_left": 0.5, "q_right": 1, "axis": None}, # test axis + {"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset + {"q_left": 0.5, "axis": 0}, # test no high + {"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low + {"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop + ], +) +def test_highlight_quantile(styler, kwargs): + expected = { + (2, 0): [("background-color", "yellow")], + (2, 1): [("background-color", "yellow")], + } + result = styler.highlight_quantile(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "f,kwargs", + [ + ("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}), + ("highlight_max", {"axis": 0, "subset": [0]}), + ("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}), + ("highlight_between", {"subset": [0]}), + ], +) +@pytest.mark.parametrize( + "df", + [ + DataFrame([[0, 10], [20, 30]], dtype=int), + DataFrame([[0, 10], [20, 30]], dtype=float), + DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"), + DataFrame([[0, 10], [20, 30]], dtype=str), + DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"), + ], +) +def test_all_highlight_dtypes(f, kwargs, df): + if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)): + return None # quantile incompatible with str + if f == "highlight_between": + kwargs["left"] = df.iloc[1, 0] # set the range low for testing + + expected = {(1, 0): [("background-color", "yellow")]} + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py new file mode 100644 index 00000000..4ae95645 --- /dev/null +++ b/pandas/tests/io/formats/style/test_html.py @@ -0,0 +1,977 @@ +from textwrap import ( + dedent, + indent, +) + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + option_context, +) + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler + +loader = jinja2.PackageLoader("pandas", "io/formats/templates") +env = jinja2.Environment(loader=loader, trim_blocks=True) + + +@pytest.fixture +def styler(): + return Styler(DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"])) + + +@pytest.fixture +def styler_mi(): + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx)) + + +@pytest.fixture +def tpl_style(): + return env.get_template("html_style.tpl") + + +@pytest.fixture +def tpl_table(): + return env.get_template("html_table.tpl") + + +def test_html_template_extends_options(): + # make sure if templates are edited tests are updated as are setup fixtures + # to understand the dependency + with open("pandas/io/formats/templates/html.tpl") as file: + result = file.read() + assert "{% include html_style_tpl %}" in result + assert "{% include html_table_tpl %}" in result + + +def test_exclude_styles(styler): + result = styler.to_html(exclude_styles=True, doctype_html=True) + expected = dedent( + """\ + + + + + + +
    `` rows and ```` elements within each ``
    `` + element in the table. ```` stands for "table data". This function + attempts to properly handle ``colspan`` and ``rowspan`` attributes. + If the function has a ``
    `` elements into the header). + + Similar to :func:`~read_csv` the `header` argument is applied + **after** `skiprows` is applied. + + This function will *always* return a list of :class:`DataFrame` *or* + it will fail, e.g., it will *not* return an empty list. + + Examples + -------- + See the :ref:`read_html documentation in the IO section of the docs + ` for some examples of reading in HTML tables. + """ + _importers() + + # Type check here. We don't want to parse only to fail because of an + # invalid value of an integer skiprows. + if isinstance(skiprows, numbers.Integral) and skiprows < 0: + raise ValueError( + "cannot skip rows starting from the end of the " + "data (you passed a negative value)" + ) + if extract_links not in [None, "header", "footer", "body", "all"]: + raise ValueError( + "`extract_links` must be one of " + '{None, "header", "footer", "body", "all"}, got ' + f'"{extract_links}"' + ) + validate_header_arg(header) + + io = stringify_path(io) + + return _parse( + flavor=flavor, + io=io, + match=match, + header=header, + index_col=index_col, + skiprows=skiprows, + parse_dates=parse_dates, + thousands=thousands, + attrs=attrs, + encoding=encoding, + decimal=decimal, + converters=converters, + na_values=na_values, + keep_default_na=keep_default_na, + displayed_only=displayed_only, + extract_links=extract_links, + ) diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py new file mode 100644 index 00000000..1de1abcd --- /dev/null +++ b/pandas/io/json/__init__.py @@ -0,0 +1,21 @@ +from pandas.io.json._json import ( + dumps, + loads, + read_json, + to_json, +) +from pandas.io.json._normalize import ( + _json_normalize, + json_normalize, +) +from pandas.io.json._table_schema import build_table_schema + +__all__ = [ + "dumps", + "loads", + "read_json", + "to_json", + "_json_normalize", + "json_normalize", + "build_table_schema", +] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py new file mode 100644 index 00000000..02a0b27f --- /dev/null +++ b/pandas/io/json/_json.py @@ -0,0 +1,1414 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from collections import abc +import functools +from io import StringIO +from itertools import islice +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Generic, + Literal, + Mapping, + TypeVar, + overload, +) + +import numpy as np + +import pandas._libs.json as json +from pandas._libs.tslibs import iNaT +from pandas._typing import ( + CompressionOptions, + DtypeArg, + FilePath, + IndexLabel, + JSONSerializable, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import ( + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.dtypes.common import ( + ensure_str, + is_period_dtype, +) + +from pandas import ( + DataFrame, + MultiIndex, + Series, + isna, + notna, + to_datetime, +) +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.reshape.concat import concat +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + IOHandles, + _extension_to_compression, + file_exists, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) +from pandas.io.json._normalize import convert_to_line_delimits +from pandas.io.json._table_schema import ( + build_table_schema, + parse_table_schema, +) +from pandas.io.parsers.readers import validate_integer + +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + +FrameSeriesStrT = TypeVar("FrameSeriesStrT", bound=Literal["frame", "series"]) + +loads = json.loads +dumps = json.dumps + + +# interface to/from +@overload +def to_json( + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes], + obj: NDFrame, + orient: str | None = ..., + date_format: str = ..., + double_precision: int = ..., + force_ascii: bool = ..., + date_unit: str = ..., + default_handler: Callable[[Any], JSONSerializable] | None = ..., + lines: bool = ..., + compression: CompressionOptions = ..., + index: bool = ..., + indent: int = ..., + storage_options: StorageOptions = ..., +) -> None: + ... + + +@overload +def to_json( + path_or_buf: None, + obj: NDFrame, + orient: str | None = ..., + date_format: str = ..., + double_precision: int = ..., + force_ascii: bool = ..., + date_unit: str = ..., + default_handler: Callable[[Any], JSONSerializable] | None = ..., + lines: bool = ..., + compression: CompressionOptions = ..., + index: bool = ..., + indent: int = ..., + storage_options: StorageOptions = ..., +) -> str: + ... + + +def to_json( + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] | None, + obj: NDFrame, + orient: str | None = None, + date_format: str = "epoch", + double_precision: int = 10, + force_ascii: bool = True, + date_unit: str = "ms", + default_handler: Callable[[Any], JSONSerializable] | None = None, + lines: bool = False, + compression: CompressionOptions = "infer", + index: bool = True, + indent: int = 0, + storage_options: StorageOptions = None, +) -> str | None: + + if not index and orient not in ["split", "table"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split' or 'table'" + ) + + if lines and orient != "records": + raise ValueError("'lines' keyword only valid when 'orient' is records") + + if orient == "table" and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or "values") + + writer: type[Writer] + if orient == "table" and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter + elif isinstance(obj, DataFrame): + writer = FrameWriter + else: + raise NotImplementedError("'obj' should be a Series or a DataFrame") + + s = writer( + obj, + orient=orient, + date_format=date_format, + double_precision=double_precision, + ensure_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + index=index, + indent=indent, + ).write() + + if lines: + s = convert_to_line_delimits(s) + + if path_or_buf is not None: + # apply compression and byte/text conversion + with get_handle( + path_or_buf, "w", compression=compression, storage_options=storage_options + ) as handles: + handles.handle.write(s) + else: + return s + return None + + +class Writer(ABC): + _default_orient: str + + def __init__( + self, + obj, + orient: str | None, + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Callable[[Any], JSONSerializable] | None = None, + indent: int = 0, + ) -> None: + self.obj = obj + + if orient is None: + orient = self._default_orient + + self.orient = orient + self.date_format = date_format + self.double_precision = double_precision + self.ensure_ascii = ensure_ascii + self.date_unit = date_unit + self.default_handler = default_handler + self.index = index + self.indent = indent + + self.is_copy = None + self._format_axes() + + def _format_axes(self): + raise AbstractMethodError(self) + + def write(self) -> str: + iso_dates = self.date_format == "iso" + return dumps( + self.obj_to_write, + orient=self.orient, + double_precision=self.double_precision, + ensure_ascii=self.ensure_ascii, + date_unit=self.date_unit, + iso_dates=iso_dates, + default_handler=self.default_handler, + indent=self.indent, + ) + + @property + @abstractmethod + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + """Object to write in JSON format.""" + pass + + +class SeriesWriter(Writer): + _default_orient = "index" + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + if not self.index and self.orient == "split": + return {"name": self.obj.name, "data": self.obj.values} + else: + return self.obj + + def _format_axes(self): + if not self.obj.index.is_unique and self.orient == "index": + raise ValueError(f"Series index must be unique for orient='{self.orient}'") + + +class FrameWriter(Writer): + _default_orient = "columns" + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + if not self.index and self.orient == "split": + obj_to_write = self.obj.to_dict(orient="split") + del obj_to_write["index"] + else: + obj_to_write = self.obj + return obj_to_write + + def _format_axes(self): + """ + Try to format axes if they are datelike. + """ + if not self.obj.index.is_unique and self.orient in ("index", "columns"): + raise ValueError( + f"DataFrame index must be unique for orient='{self.orient}'." + ) + if not self.obj.columns.is_unique and self.orient in ( + "index", + "columns", + "records", + ): + raise ValueError( + f"DataFrame columns must be unique for orient='{self.orient}'." + ) + + +class JSONTableWriter(FrameWriter): + _default_orient = "records" + + def __init__( + self, + obj, + orient: str | None, + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Callable[[Any], JSONSerializable] | None = None, + indent: int = 0, + ) -> None: + """ + Adds a `schema` attribute with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + super().__init__( + obj, + orient, + date_format, + double_precision, + ensure_ascii, + date_unit, + index, + default_handler=default_handler, + indent=indent, + ) + + if date_format != "iso": + msg = ( + "Trying to write with `orient='table'` and " + f"`date_format='{date_format}'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" + ) + raise ValueError(msg) + + self.schema = build_table_schema(obj, index=self.index) + + # NotImplemented on a column MultiIndex + if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): + raise NotImplementedError( + "orient='table' is not supported for MultiIndex columns" + ) + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ( + (obj.ndim == 1) + and (obj.name in set(obj.index.names)) + or len(obj.columns.intersection(obj.index.names)) + ): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=["timedelta"]).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serializing + if is_period_dtype(obj.index.dtype): + obj.index = obj.index.to_timestamp() + + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) + self.date_format = "iso" + self.orient = "records" + self.index = index + + @property + def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: + return {"schema": self.schema, "data": self.obj} + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient: str | None = ..., + typ: Literal["frame"] = ..., + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> JsonReader[Literal["frame"]]: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient: str | None = ..., + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> JsonReader[Literal["series"]]: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient: str | None = ..., + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> Series: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + orient: str | None = ..., + typ: Literal["frame"] = ..., + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "path_or_buf", +) +@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) +@deprecate_nonkeyword_arguments(version="2.0", allowed_args=["path_or_buf"]) +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + orient: str | None = None, + typ: Literal["frame", "series"] = "frame", + dtype: DtypeArg | None = None, + convert_axes=None, + convert_dates: bool | list[str] = True, + keep_default_dates: bool = True, + numpy: bool = False, + precise_float: bool = False, + date_unit: str | None = None, + encoding: str | None = None, + encoding_errors: str | None = "strict", + lines: bool = False, + chunksize: int | None = None, + compression: CompressionOptions = "infer", + nrows: int | None = None, + storage_options: StorageOptions = None, +) -> DataFrame | Series | JsonReader: + """ + Convert a JSON string to pandas object. + + Parameters + ---------- + path_or_buf : a valid JSON str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.json``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. + orient : str + Indication of expected JSON string format. + Compatible JSON strings can be produced by ``to_json()`` with a + corresponding orient value. + The set of possible orients is: + + - ``'split'`` : dict like + ``{{index -> [index], columns -> [columns], data -> [values]}}`` + - ``'records'`` : list like + ``[{{column -> value}}, ... , {{column -> value}}]`` + - ``'index'`` : dict like ``{{index -> {{column -> value}}}}`` + - ``'columns'`` : dict like ``{{column -> {{index -> value}}}}`` + - ``'values'`` : just the values array + + The allowed and default values depend on the value + of the `typ` parameter. + + * when ``typ == 'series'``, + + - allowed orients are ``{{'split','records','index'}}`` + - default is ``'index'`` + - The Series index must be unique for orient ``'index'``. + + * when ``typ == 'frame'``, + + - allowed orients are ``{{'split','records','index', + 'columns','values', 'table'}}`` + - default is ``'columns'`` + - The DataFrame index must be unique for orients ``'index'`` and + ``'columns'``. + - The DataFrame columns must be unique for orients ``'index'``, + ``'columns'``, and ``'records'``. + + typ : {{'frame', 'series'}}, default 'frame' + The type of object to recover. + + dtype : bool or dict, default None + If True, infer dtypes; if a dict of column to dtype, then use those; + if False, then don't infer dtypes at all, applies only to the data. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_axes : bool, default None + Try to convert the axes to the proper dtypes. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_dates : bool or list of str, default True + If True then default datelike columns may be converted (depending on + keep_default_dates). + If False, no dates will be converted. + If a list of column names, then those columns will be converted and + default datelike columns may also be converted (depending on + keep_default_dates). + + keep_default_dates : bool, default True + If parsing dates (convert_dates is not False), then try to parse the + default datelike columns. + A column label is datelike if + + * it ends with ``'_at'``, + + * it ends with ``'_time'``, + + * it begins with ``'timestamp'``, + + * it is ``'modified'``, or + + * it is ``'date'``. + + numpy : bool, default False + Direct decoding to numpy arrays. Supports numeric data only, but + non-numeric column and index labels are supported. Note also that the + JSON ordering MUST be the same for each term if numpy=True. + + .. deprecated:: 1.0.0 + + precise_float : bool, default False + Set to enable usage of higher precision (strtod) function when + decoding string to double values. Default (False) is to use fast but + less precise builtin functionality. + + date_unit : str, default None + The timestamp unit to detect if converting dates. The default behaviour + is to try and detect the correct precision, but if this is not desired + then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, + milliseconds, microseconds or nanoseconds respectively. + + encoding : str, default is 'utf-8' + The encoding to use to decode py3 bytes. + + encoding_errors : str, optional, default "strict" + How encoding errors are treated. `List of possible values + `_ . + + .. versionadded:: 1.3.0 + + lines : bool, default False + Read the file as a json object per line. + + chunksize : int, optional + Return JsonReader object for iteration. + See the `line-delimited json docs + `_ + for more information on ``chunksize``. + This can only be passed if `lines=True`. + If this is None, the file will be read into memory all at once. + + .. versionchanged:: 1.2 + + ``JsonReader`` is a context manager. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + nrows : int, optional + The number of lines from the line-delimited jsonfile that has to be read. + This can only be passed if `lines=True`. + If this is None, all the rows will be returned. + + .. versionadded:: 1.1 + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + Series or DataFrame + The type returned depends on the value of `typ`. + + See Also + -------- + DataFrame.to_json : Convert a DataFrame to a JSON string. + Series.to_json : Convert a Series to a JSON string. + json_normalize : Normalize semi-structured JSON data into a flat table. + + Notes + ----- + Specific to ``orient='table'``, if a :class:`DataFrame` with a literal + :class:`Index` name of `index` gets written with :func:`to_json`, the + subsequent read operation will incorrectly set the :class:`Index` name to + ``None``. This is because `index` is also used by :func:`DataFrame.to_json` + to denote a missing :class:`Index` name, and the subsequent + :func:`read_json` operation cannot distinguish between the two. The same + limitation is encountered with a :class:`MultiIndex` and any names + beginning with ``'level_'``. + + Examples + -------- + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + + Encoding/decoding a Dataframe using ``'split'`` formatted JSON: + + >>> df.to_json(orient='split') + '\ +{{\ +"columns":["col 1","col 2"],\ +"index":["row 1","row 2"],\ +"data":[["a","b"],["c","d"]]\ +}}\ +' + >>> pd.read_json(_, orient='split') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' + + >>> pd.read_json(_, orient='index') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' + >>> pd.read_json(_, orient='records') + col 1 col 2 + 0 a b + 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '\ +{{"schema":{{"fields":[\ +{{"name":"index","type":"string"}},\ +{{"name":"col 1","type":"string"}},\ +{{"name":"col 2","type":"string"}}],\ +"primaryKey":["index"],\ +"pandas_version":"1.4.0"}},\ +"data":[\ +{{"index":"row 1","col 1":"a","col 2":"b"}},\ +{{"index":"row 2","col 1":"c","col 2":"d"}}]\ +}}\ +' + """ + if orient == "table" and dtype: + raise ValueError("cannot pass both dtype and orient='table'") + if orient == "table" and convert_axes: + raise ValueError("cannot pass both convert_axes and orient='table'") + + if dtype is None and orient != "table": + # error: Incompatible types in assignment (expression has type "bool", variable + # has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object], Dict[Hashable, + # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], + # Type[int], Type[complex], Type[bool], Type[object]]], None]") + dtype = True # type: ignore[assignment] + if convert_axes is None and orient != "table": + convert_axes = True + + json_reader = JsonReader( + path_or_buf, + orient=orient, + typ=typ, + dtype=dtype, + convert_axes=convert_axes, + convert_dates=convert_dates, + keep_default_dates=keep_default_dates, + numpy=numpy, + precise_float=precise_float, + date_unit=date_unit, + encoding=encoding, + lines=lines, + chunksize=chunksize, + compression=compression, + nrows=nrows, + storage_options=storage_options, + encoding_errors=encoding_errors, + ) + + if chunksize: + return json_reader + + with json_reader: + return json_reader.read() + + +class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): + """ + JsonReader provides an interface for reading in a JSON file. + + If initialized with ``lines=True`` and ``chunksize``, can be iterated over + ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the + whole document. + """ + + def __init__( + self, + filepath_or_buffer, + orient, + typ: FrameSeriesStrT, + dtype, + convert_axes, + convert_dates, + keep_default_dates: bool, + numpy: bool, + precise_float: bool, + date_unit, + encoding, + lines: bool, + chunksize: int | None, + compression: CompressionOptions, + nrows: int | None, + storage_options: StorageOptions = None, + encoding_errors: str | None = "strict", + ) -> None: + + self.orient = orient + self.typ = typ + self.dtype = dtype + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.keep_default_dates = keep_default_dates + self.numpy = numpy + self.precise_float = precise_float + self.date_unit = date_unit + self.encoding = encoding + self.compression = compression + self.storage_options = storage_options + self.lines = lines + self.chunksize = chunksize + self.nrows_seen = 0 + self.nrows = nrows + self.encoding_errors = encoding_errors + self.handles: IOHandles[str] | None = None + + if self.chunksize is not None: + self.chunksize = validate_integer("chunksize", self.chunksize, 1) + if not self.lines: + raise ValueError("chunksize can only be passed if lines=True") + if self.nrows is not None: + self.nrows = validate_integer("nrows", self.nrows, 0) + if not self.lines: + raise ValueError("nrows can only be passed if lines=True") + + data = self._get_data_from_filepath(filepath_or_buffer) + self.data = self._preprocess_data(data) + + def _preprocess_data(self, data): + """ + At this point, the data either has a `read` attribute (e.g. a file + object or a StringIO) or is a string that is a JSON document. + + If self.chunksize, we prepare the data for the `__next__` method. + Otherwise, we read it into memory for the `read` method. + """ + if hasattr(data, "read") and not (self.chunksize or self.nrows): + with self: + data = data.read() + if not hasattr(data, "read") and (self.chunksize or self.nrows): + data = StringIO(data) + + return data + + def _get_data_from_filepath(self, filepath_or_buffer): + """ + The function read_json accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. JSON string + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + + It raises FileNotFoundError if the input is a string ending in + one of .json, .json.gz, .json.bz2, etc. but no such file exists. + """ + # if it is a string but the file does not exist, it might be a JSON string + filepath_or_buffer = stringify_path(filepath_or_buffer) + if ( + not isinstance(filepath_or_buffer, str) + or is_url(filepath_or_buffer) + or is_fsspec_url(filepath_or_buffer) + or file_exists(filepath_or_buffer) + ): + self.handles = get_handle( + filepath_or_buffer, + "r", + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + errors=self.encoding_errors, + ) + filepath_or_buffer = self.handles.handle + elif ( + isinstance(filepath_or_buffer, str) + and filepath_or_buffer.lower().endswith( + (".json",) + tuple(f".json{c}" for c in _extension_to_compression) + ) + and not file_exists(filepath_or_buffer) + ): + raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") + + return filepath_or_buffer + + def _combine_lines(self, lines) -> str: + """ + Combines a list of JSON objects into one JSON object. + """ + return ( + f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' + ) + + @overload + def read(self: JsonReader[Literal["frame"]]) -> DataFrame: + ... + + @overload + def read(self: JsonReader[Literal["series"]]) -> Series: + ... + + @overload + def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: + ... + + def read(self) -> DataFrame | Series: + """ + Read the whole JSON input into a pandas object. + """ + obj: DataFrame | Series + if self.lines: + if self.chunksize: + obj = concat(self) + elif self.nrows: + lines = list(islice(self.data, self.nrows)) + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + else: + data = ensure_str(self.data) + data_lines = data.split("\n") + obj = self._get_object_parser(self._combine_lines(data_lines)) + else: + obj = self._get_object_parser(self.data) + self.close() + return obj + + def _get_object_parser(self, json) -> DataFrame | Series: + """ + Parses a json document into a pandas object. + """ + typ = self.typ + dtype = self.dtype + kwargs = { + "orient": self.orient, + "dtype": self.dtype, + "convert_axes": self.convert_axes, + "convert_dates": self.convert_dates, + "keep_default_dates": self.keep_default_dates, + "numpy": self.numpy, + "precise_float": self.precise_float, + "date_unit": self.date_unit, + } + obj = None + if typ == "frame": + obj = FrameParser(json, **kwargs).parse() + + if typ == "series" or obj is None: + if not isinstance(dtype, bool): + kwargs["dtype"] = dtype + obj = SeriesParser(json, **kwargs).parse() + + return obj + + def close(self) -> None: + """ + If we opened a stream earlier, in _get_data_from_filepath, we should + close it. + + If an open stream or file was passed, we leave it open. + """ + if self.handles is not None: + self.handles.close() + + def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]: + return self + + @overload + def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame: + ... + + @overload + def __next__(self: JsonReader[Literal["series"]]) -> Series: + ... + + @overload + def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: + ... + + def __next__(self) -> DataFrame | Series: + if self.nrows: + if self.nrows_seen >= self.nrows: + self.close() + raise StopIteration + + lines = list(islice(self.data, self.chunksize)) + if lines: + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + + # Make sure that the returned objects have the right index. + obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) + self.nrows_seen += len(obj) + + return obj + + self.close() + raise StopIteration + + def __enter__(self) -> JsonReader[FrameSeriesStrT]: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + +class Parser: + _split_keys: tuple[str, ...] + _default_orient: str + + _STAMP_UNITS = ("s", "ms", "us", "ns") + _MIN_STAMPS = { + "s": 31536000, + "ms": 31536000000, + "us": 31536000000000, + "ns": 31536000000000000, + } + + def __init__( + self, + json, + orient, + dtype: DtypeArg | None = None, + convert_axes: bool = True, + convert_dates: bool | list[str] = True, + keep_default_dates: bool = False, + numpy: bool = False, + precise_float: bool = False, + date_unit=None, + ) -> None: + self.json = json + + if orient is None: + orient = self._default_orient + + self.orient = orient + + self.dtype = dtype + + if orient == "split": + numpy = False + + if date_unit is not None: + date_unit = date_unit.lower() + if date_unit not in self._STAMP_UNITS: + raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}") + self.min_stamp = self._MIN_STAMPS[date_unit] + else: + self.min_stamp = self._MIN_STAMPS["s"] + + self.numpy = numpy + self.precise_float = precise_float + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.date_unit = date_unit + self.keep_default_dates = keep_default_dates + self.obj: DataFrame | Series | None = None + + def check_keys_split(self, decoded) -> None: + """ + Checks that dict has only the appropriate keys for orient='split'. + """ + bad_keys = set(decoded.keys()).difference(set(self._split_keys)) + if bad_keys: + bad_keys_joined = ", ".join(bad_keys) + raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") + + def parse(self): + + if self.numpy: + self._parse_numpy() + else: + self._parse_no_numpy() + + if self.obj is None: + return None + if self.convert_axes: + self._convert_axes() + self._try_convert_types() + return self.obj + + def _parse_numpy(self): + raise AbstractMethodError(self) + + def _parse_no_numpy(self): + raise AbstractMethodError(self) + + def _convert_axes(self): + """ + Try to convert axes. + """ + obj = self.obj + assert obj is not None # for mypy + for axis_name in obj._AXIS_ORDERS: + new_axis, result = self._try_convert_data( + name=axis_name, + data=obj._get_axis(axis_name), + use_dtypes=False, + convert_dates=True, + ) + if result: + setattr(self.obj, axis_name, new_axis) + + def _try_convert_types(self): + raise AbstractMethodError(self) + + def _try_convert_data( + self, + name, + data, + use_dtypes: bool = True, + convert_dates: bool | list[str] = True, + ): + """ + Try to parse a ndarray like into a column by inferring dtype. + """ + # don't try to coerce, unless a force conversion + if use_dtypes: + if not self.dtype: + if all(notna(data)): + return data, False + return data.fillna(np.nan), True + + # error: Non-overlapping identity check (left operand type: + # "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]", right operand type: "Literal[True]") + elif self.dtype is True: # type: ignore[comparison-overlap] + pass + else: + # dtype to force + dtype = ( + self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype + ) + if dtype is not None: + try: + return data.astype(dtype), True + except (TypeError, ValueError): + return data, False + + if convert_dates: + new_data, result = self._try_convert_to_date(data) + if result: + return new_data, True + + if data.dtype == "object": + + # try float + try: + data = data.astype("float64") + except (TypeError, ValueError): + pass + + if data.dtype.kind == "f": + + if data.dtype != "float64": + + # coerce floats to 64 + try: + data = data.astype("float64") + except (TypeError, ValueError): + pass + + # don't coerce 0-len data + if len(data) and (data.dtype == "float" or data.dtype == "object"): + + # coerce ints if we can + try: + new_data = data.astype("int64") + if (new_data == data).all(): + data = new_data + except (TypeError, ValueError, OverflowError): + pass + + # coerce ints to 64 + if data.dtype == "int": + + # coerce floats to 64 + try: + data = data.astype("int64") + except (TypeError, ValueError): + pass + + # if we have an index, we want to preserve dtypes + if name == "index" and len(data): + if self.orient == "split": + return data, False + + return data, True + + def _try_convert_to_date(self, data): + """ + Try to parse a ndarray like into a date column. + + Try to coerce object in epoch/iso formats and integer/float in epoch + formats. Return a boolean if parsing was successful. + """ + # no conversion on empty + if not len(data): + return data, False + + new_data = data + if new_data.dtype == "object": + try: + new_data = data.astype("int64") + except (TypeError, ValueError, OverflowError): + pass + + # ignore numbers that are out of range + if issubclass(new_data.dtype.type, np.number): + in_range = ( + isna(new_data._values) + | (new_data > self.min_stamp) + | (new_data._values == iNaT) + ) + if not in_range.all(): + return data, False + + date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS + for date_unit in date_units: + try: + new_data = to_datetime(new_data, errors="raise", unit=date_unit) + except (ValueError, OverflowError, TypeError): + continue + return new_data, True + return data, False + + def _try_convert_dates(self): + raise AbstractMethodError(self) + + +class SeriesParser(Parser): + _default_orient = "index" + _split_keys = ("name", "index", "data") + + def _parse_no_numpy(self): + data = loads(self.json, precise_float=self.precise_float) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _parse_numpy(self): + load_kwargs = { + "dtype": None, + "numpy": True, + "precise_float": self.precise_float, + } + if self.orient in ["columns", "index"]: + load_kwargs["labelled"] = True + loads_ = functools.partial(loads, **load_kwargs) + data = loads_(self.json) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + elif self.orient in ["columns", "index"]: + # error: "create_series_with_explicit_dtype" + # gets multiple values for keyword argument "dtype_if_empty + self.obj = create_series_with_explicit_dtype( + *data, dtype_if_empty=object + ) # type: ignore[misc] + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _try_convert_types(self): + if self.obj is None: + return + obj, result = self._try_convert_data( + "data", self.obj, convert_dates=self.convert_dates + ) + if result: + self.obj = obj + + +class FrameParser(Parser): + _default_orient = "columns" + _split_keys = ("columns", "index", "data") + + def _parse_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + args = loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + if len(args): + args = (args[0].T, args[2], args[1]) + self.obj = DataFrame(*args) + elif orient == "split": + decoded = loads( + json, dtype=None, numpy=True, precise_float=self.precise_float + ) + decoded = {str(k): v for k, v in decoded.items()} + self.check_keys_split(decoded) + self.obj = DataFrame(**decoded) + elif orient == "values": + self.obj = DataFrame( + loads(json, dtype=None, numpy=True, precise_float=self.precise_float) + ) + else: + self.obj = DataFrame( + *loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + ) + + def _parse_no_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + elif orient == "split": + decoded = { + str(k): v + for k, v in loads(json, precise_float=self.precise_float).items() + } + self.check_keys_split(decoded) + self.obj = DataFrame(dtype=None, **decoded) + elif orient == "index": + self.obj = DataFrame.from_dict( + loads(json, precise_float=self.precise_float), + dtype=None, + orient="index", + ) + elif orient == "table": + self.obj = parse_table_schema(json, precise_float=self.precise_float) + else: + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + + def _process_converter(self, f, filt=None): + """ + Take a conversion function and possibly recreate the frame. + """ + if filt is None: + filt = lambda col, c: True + + obj = self.obj + assert obj is not None # for mypy + + needs_new_obj = False + new_obj = {} + for i, (col, c) in enumerate(obj.items()): + if filt(col, c): + new_data, result = f(col, c) + if result: + c = new_data + needs_new_obj = True + new_obj[i] = c + + if needs_new_obj: + + # possibly handle dup columns + new_frame = DataFrame(new_obj, index=obj.index) + new_frame.columns = obj.columns + self.obj = new_frame + + def _try_convert_types(self): + if self.obj is None: + return + if self.convert_dates: + self._try_convert_dates() + + self._process_converter( + lambda col, c: self._try_convert_data(col, c, convert_dates=False) + ) + + def _try_convert_dates(self): + if self.obj is None: + return + + # our columns to parse + convert_dates_list_bool = self.convert_dates + if isinstance(convert_dates_list_bool, bool): + convert_dates_list_bool = [] + convert_dates = set(convert_dates_list_bool) + + def is_ok(col) -> bool: + """ + Return if this col is ok to try for a date parse. + """ + if not isinstance(col, str): + return False + + col_lower = col.lower() + if ( + col_lower.endswith("_at") + or col_lower.endswith("_time") + or col_lower == "modified" + or col_lower == "date" + or col_lower == "datetime" + or col_lower.startswith("timestamp") + ): + return True + return False + + self._process_converter( + lambda col, c: self._try_convert_to_date(c), + lambda col, c: ( + (self.keep_default_dates and is_ok(col)) or col in convert_dates + ), + ) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py new file mode 100644 index 00000000..e77d60d2 --- /dev/null +++ b/pandas/io/json/_normalize.py @@ -0,0 +1,540 @@ +# --------------------------------------------------------------------- +# JSON normalization routines +from __future__ import annotations + +from collections import ( + abc, + defaultdict, +) +import copy +from typing import ( + Any, + DefaultDict, + Iterable, +) + +import numpy as np + +from pandas._libs.writers import convert_json_to_lines +from pandas._typing import ( + IgnoreRaise, + Scalar, +) +from pandas.util._decorators import deprecate + +import pandas as pd +from pandas import DataFrame + + +def convert_to_line_delimits(s: str) -> str: + """ + Helper function that converts JSON lists to line delimited JSON. + """ + # Determine we have a JSON list to turn to lines otherwise just return the + # json object, only lists can + if not s[0] == "[" and s[-1] == "]": + return s + s = s[1:-1] + + return convert_json_to_lines(s) + + +def nested_to_record( + ds, + prefix: str = "", + sep: str = ".", + level: int = 0, + max_level: int | None = None, +): + """ + A simplified json_normalize + + Converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + level: int, optional, default: 0 + The number of levels in the json string. + + max_level: int, optional, default: None + The max depth to normalize. + + .. versionadded:: 0.25.0 + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + >>> nested_to_record( + ... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2)) + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + new_ds = [] + for d in ds: + new_d = copy.deepcopy(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, str): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + sep + k + + # flatten if type is dict and + # current dict level < maximum level provided and + # only dicts gets recurse-flattened + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict) or ( + max_level is not None and level >= max_level + ): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + else: + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds + + +def _normalise_json( + data: Any, + key_string: str, + normalized_dict: dict[str, Any], + separator: str, +) -> dict[str, Any]: + """ + Main recursive function + Designed for the most basic use case of pd.json_normalize(data) + intended as a performance improvement, see #15621 + + Parameters + ---------- + data : Any + Type dependent on types contained within nested Json + key_string : str + New key (with separator(s) in) for data + normalized_dict : dict + The new normalized/flattened Json dict + separator : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + """ + if isinstance(data, dict): + for key, value in data.items(): + new_key = f"{key_string}{separator}{key}" + _normalise_json( + data=value, + # to avoid adding the separator to the start of every key + # GH#43831 avoid adding key if key_string blank + key_string=new_key + if new_key[: len(separator)] != separator + else new_key[len(separator) :], + normalized_dict=normalized_dict, + separator=separator, + ) + else: + normalized_dict[key_string] = data + return normalized_dict + + +def _normalise_json_ordered(data: dict[str, Any], separator: str) -> dict[str, Any]: + """ + Order the top level keys and then recursively go to depth + + Parameters + ---------- + data : dict or list of dicts + separator : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + Returns + ------- + dict or list of dicts, matching `normalised_json_object` + """ + top_dict_ = {k: v for k, v in data.items() if not isinstance(v, dict)} + nested_dict_ = _normalise_json( + data={k: v for k, v in data.items() if isinstance(v, dict)}, + key_string="", + normalized_dict={}, + separator=separator, + ) + return {**top_dict_, **nested_dict_} + + +def _simple_json_normalize( + ds: dict | list[dict], + sep: str = ".", +) -> dict | list[dict] | Any: + """ + A optimized basic json_normalize + + Converts a nested dict into a flat dict ("record"), unlike + json_normalize and nested_to_record it doesn't do anything clever. + But for the most basic use cases it enhances performance. + E.g. pd.json_normalize(data) + + Parameters + ---------- + ds : dict or list of dicts + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + Returns + ------- + frame : DataFrame + d - dict or list of dicts, matching `normalised_json_object` + + Examples + -------- + >>> _simple_json_normalize( + ... { + ... "flat1": 1, + ... "dict1": {"c": 1, "d": 2}, + ... "nested": {"e": {"c": 1, "d": 2}, "d": 2}, + ... } + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + + """ + normalised_json_object = {} + # expect a dictionary, as most jsons are. However, lists are perfectly valid + if isinstance(ds, dict): + normalised_json_object = _normalise_json_ordered(data=ds, separator=sep) + elif isinstance(ds, list): + normalised_json_list = [_simple_json_normalize(row, sep=sep) for row in ds] + return normalised_json_list + return normalised_json_object + + +def _json_normalize( + data: dict | list[dict], + record_path: str | list | None = None, + meta: str | list[str | list[str]] | None = None, + meta_prefix: str | None = None, + record_prefix: str | None = None, + errors: IgnoreRaise = "raise", + sep: str = ".", + max_level: int | None = None, +) -> DataFrame: + """ + Normalize semi-structured JSON data into a flat table. + + Parameters + ---------- + data : dict or list of dicts + Unserialized JSON objects. + record_path : str or list of str, default None + Path in each object to list of records. If not passed, data will be + assumed to be an array of records. + meta : list of paths (str or list of str), default None + Fields to use as metadata for each record in resulting table. + meta_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + meta is ['foo', 'bar']. + record_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + path to records is ['foo', 'bar']. + errors : {'raise', 'ignore'}, default 'raise' + Configures error handling. + + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present. + * 'raise' : will raise KeyError if keys listed in meta are not + always present. + sep : str, default '.' + Nested records will generate names separated by sep. + e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar. + max_level : int, default None + Max number of levels(depth of dict) to normalize. + if None, normalizes all levels. + + .. versionadded:: 0.25.0 + + Returns + ------- + frame : DataFrame + Normalize semi-structured JSON data into a flat table. + + Examples + -------- + >>> data = [ + ... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + ... {"name": {"given": "Mark", "family": "Regner"}}, + ... {"id": 2, "name": "Faye Raker"}, + ... ] + >>> pd.json_normalize(data) + id name.first name.last name.given name.family name + 0 1.0 Coleen Volk NaN NaN NaN + 1 NaN NaN NaN Mark Regner NaN + 2 2.0 NaN NaN NaN NaN Faye Raker + + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] + >>> pd.json_normalize(data, max_level=0) + id name fitness + 0 1.0 Cole Volk {'height': 130, 'weight': 60} + 1 NaN Mark Reg {'height': 130, 'weight': 60} + 2 2.0 Faye Raker {'height': 130, 'weight': 60} + + Normalizes nested data up to level 1. + + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] + >>> pd.json_normalize(data, max_level=1) + id name fitness.height fitness.weight + 0 1.0 Cole Volk 130 60 + 1 NaN Mark Reg 130 60 + 2 2.0 Faye Raker 130 60 + + >>> data = [ + ... { + ... "state": "Florida", + ... "shortname": "FL", + ... "info": {"governor": "Rick Scott"}, + ... "counties": [ + ... {"name": "Dade", "population": 12345}, + ... {"name": "Broward", "population": 40000}, + ... {"name": "Palm Beach", "population": 60000}, + ... ], + ... }, + ... { + ... "state": "Ohio", + ... "shortname": "OH", + ... "info": {"governor": "John Kasich"}, + ... "counties": [ + ... {"name": "Summit", "population": 1234}, + ... {"name": "Cuyahoga", "population": 1337}, + ... ], + ... }, + ... ] + >>> result = pd.json_normalize( + ... data, "counties", ["state", "shortname", ["info", "governor"]] + ... ) + >>> result + name population state shortname info.governor + 0 Dade 12345 Florida FL Rick Scott + 1 Broward 40000 Florida FL Rick Scott + 2 Palm Beach 60000 Florida FL Rick Scott + 3 Summit 1234 Ohio OH John Kasich + 4 Cuyahoga 1337 Ohio OH John Kasich + + >>> data = {"A": [1, 2]} + >>> pd.json_normalize(data, "A", record_prefix="Prefix.") + Prefix.0 + 0 1 + 1 2 + + Returns normalized data with columns prefixed with the given string. + """ + + def _pull_field( + js: dict[str, Any], spec: list | str, extract_record: bool = False + ) -> Scalar | Iterable: + """Internal function to pull field""" + result = js + try: + if isinstance(spec, list): + for field in spec: + if result is None: + raise KeyError(field) + result = result[field] + else: + result = result[spec] + except KeyError as e: + if extract_record: + raise KeyError( + f"Key {e} not found. If specifying a record_path, all elements of " + f"data should have the path." + ) from e + elif errors == "ignore": + return np.nan + else: + raise KeyError( + f"Key {e} not found. To replace missing values of {e} with " + f"np.nan, pass in errors='ignore'" + ) from e + + return result + + def _pull_records(js: dict[str, Any], spec: list | str) -> list: + """ + Internal function to pull field for records, and similar to + _pull_field, but require to return list. And will raise error + if has non iterable value. + """ + result = _pull_field(js, spec, extract_record=True) + + # GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not + # null, otherwise return an empty list + if not isinstance(result, list): + if pd.isnull(result): + result = [] + else: + raise TypeError( + f"{js} has non list value {result} for path {spec}. " + "Must be list or null." + ) + return result + + if isinstance(data, list) and not data: + return DataFrame() + elif isinstance(data, dict): + # A bit of a hackjob + data = [data] + elif isinstance(data, abc.Iterable) and not isinstance(data, str): + # GH35923 Fix pd.json_normalize to not skip the first element of a + # generator input + data = list(data) + else: + raise NotImplementedError + + # check to see if a simple recursive function is possible to + # improve performance (see #15621) but only for cases such + # as pd.Dataframe(data) or pd.Dataframe(data, sep) + if ( + record_path is None + and meta is None + and meta_prefix is None + and record_prefix is None + and max_level is None + ): + return DataFrame(_simple_json_normalize(data, sep=sep)) + + if record_path is None: + if any([isinstance(x, dict) for x in y.values()] for y in data): + # naive normalization, this is idempotent for flat records + # and potentially will inflate the data considerably for + # deeply nested structures: + # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} + # + # TODO: handle record value which are lists, at least error + # reasonably + data = nested_to_record(data, sep=sep, max_level=max_level) + return DataFrame(data) + elif not isinstance(record_path, list): + record_path = [record_path] + + if meta is None: + meta = [] + elif not isinstance(meta, list): + meta = [meta] + + _meta = [m if isinstance(m, list) else [m] for m in meta] + + # Disastrously inefficient for now + records: list = [] + lengths = [] + + meta_vals: DefaultDict = defaultdict(list) + meta_keys = [sep.join(val) for val in _meta] + + def _recursive_extract(data, path, seen_meta, level=0): + if isinstance(data, dict): + data = [data] + if len(path) > 1: + for obj in data: + for val, key in zip(_meta, meta_keys): + if level + 1 == len(val): + seen_meta[key] = _pull_field(obj, val[-1]) + + _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) + else: + for obj in data: + recs = _pull_records(obj, path[0]) + recs = [ + nested_to_record(r, sep=sep, max_level=max_level) + if isinstance(r, dict) + else r + for r in recs + ] + + # For repeating the metadata later + lengths.append(len(recs)) + for val, key in zip(_meta, meta_keys): + if level + 1 > len(val): + meta_val = seen_meta[key] + else: + meta_val = _pull_field(obj, val[level:]) + meta_vals[key].append(meta_val) + records.extend(recs) + + _recursive_extract(data, record_path, {}, level=0) + + result = DataFrame(records) + + if record_prefix is not None: + result = result.rename(columns=lambda x: f"{record_prefix}{x}") + + # Data types, a problem + for k, v in meta_vals.items(): + if meta_prefix is not None: + k = meta_prefix + k + + if k in result: + raise ValueError( + f"Conflicting metadata name {k}, need distinguishing prefix " + ) + result[k] = np.array(v, dtype=object).repeat(lengths) + return result + + +json_normalize = deprecate( + "pandas.io.json.json_normalize", _json_normalize, "1.0.0", "pandas.json_normalize" +) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py new file mode 100644 index 00000000..0d6cab20 --- /dev/null +++ b/pandas/io/json/_table_schema.py @@ -0,0 +1,377 @@ +""" +Table Schema builders + +https://specs.frictionlessdata.io/json-table-schema/ +""" +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + cast, +) +import warnings + +import pandas._libs.json as json +from pandas._typing import ( + DtypeObj, + JSONSerializable, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_numeric_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import DataFrame +import pandas.core.common as com + +if TYPE_CHECKING: + from pandas import Series + from pandas.core.indexes.multi import MultiIndex + +loads = json.loads + +TABLE_SCHEMA_VERSION = "1.4.0" + + +def as_json_table_type(x: DtypeObj) -> str: + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : np.dtype or ExtensionDtype + + Returns + ------- + str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return "integer" + elif is_bool_dtype(x): + return "boolean" + elif is_numeric_dtype(x): + return "number" + elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x): + return "datetime" + elif is_timedelta64_dtype(x): + return "duration" + elif is_categorical_dtype(x): + return "any" + elif is_extension_array_dtype(x): + return "any" + elif is_string_dtype(x): + return "string" + else: + return "any" + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if com.all_not_none(*data.index.names): + nms = data.index.names + if len(nms) == 1 and data.index.name == "index": + warnings.warn( + "Index name of 'index' is not round-trippable.", + stacklevel=find_stack_level(), + ) + elif len(nms) > 1 and any(x.startswith("level_") for x in nms): + warnings.warn( + "Index names beginning with 'level_' are not round-trippable.", + stacklevel=find_stack_level(), + ) + return data + + data = data.copy() + if data.index.nlevels > 1: + data.index.names = com.fill_missing_names(data.index.names) + else: + data.index.name = data.index.name or "index" + return data + + +def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]: + dtype = arr.dtype + name: JSONSerializable + if arr.name is None: + name = "values" + else: + name = arr.name + field: dict[str, JSONSerializable] = { + "name": name, + "type": as_json_table_type(dtype), + } + + if is_categorical_dtype(dtype): + cats = dtype.categories + ordered = dtype.ordered + + field["constraints"] = {"enum": list(cats)} + field["ordered"] = ordered + elif is_period_dtype(dtype): + field["freq"] = dtype.freq.freqstr + elif is_datetime64tz_dtype(dtype): + field["tz"] = dtype.tz.zone + elif is_extension_array_dtype(dtype): + field["extDtype"] = dtype.name + return field + + +def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: + """ + Converts a JSON field descriptor into its corresponding NumPy / pandas type + + Parameters + ---------- + field + A JSON field descriptor + + Returns + ------- + dtype + + Raises + ------ + ValueError + If the type of the provided field is unknown or currently unsupported + + Examples + -------- + >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"}) + 'int64' + + >>> convert_json_field_to_pandas_type( + ... { + ... "name": "a_categorical", + ... "type": "any", + ... "constraints": {"enum": ["a", "b", "c"]}, + ... "ordered": True, + ... } + ... ) + CategoricalDtype(categories=['a', 'b', 'c'], ordered=True) + + >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"}) + 'datetime64[ns]' + + >>> convert_json_field_to_pandas_type( + ... {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"} + ... ) + 'datetime64[ns, US/Central]' + """ + typ = field["type"] + if typ == "string": + return "object" + elif typ == "integer": + return "int64" + elif typ == "number": + return "float64" + elif typ == "boolean": + return "bool" + elif typ == "duration": + return "timedelta64" + elif typ == "datetime": + if field.get("tz"): + return f"datetime64[ns, {field['tz']}]" + elif field.get("freq"): + # GH#47747 using datetime over period to minimize the change surface + return f"period[{field['freq']}]" + else: + return "datetime64[ns]" + elif typ == "any": + if "constraints" in field and "ordered" in field: + return CategoricalDtype( + categories=field["constraints"]["enum"], ordered=field["ordered"] + ) + elif "extDtype" in field: + return registry.find(field["extDtype"]) + else: + return "object" + + raise ValueError(f"Unsupported or invalid field type: {typ}") + + +def build_table_schema( + data: DataFrame | Series, + index: bool = True, + primary_key: bool | None = None, + version: bool = True, +) -> dict[str, JSONSerializable]: + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + Column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that last revised the table schema. This version + can be different from the installed pandas version. + + Returns + ------- + schema : dict + + Notes + ----- + See `Table Schema + `__ for + conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the seconds field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': \ +[{'name': 'idx', 'type': 'integer'}, \ +{'name': 'A', 'type': 'integer'}, \ +{'name': 'B', 'type': 'string'}, \ +{'name': 'C', 'type': 'datetime'}], \ +'primaryKey': ['idx'], \ +'pandas_version': '1.4.0'} + """ + if index is True: + data = set_default_names(data) + + schema: dict[str, Any] = {} + fields = [] + + if index: + if data.index.nlevels > 1: + data.index = cast("MultiIndex", data.index) + for level, name in zip(data.index.levels, data.index.names): + new_field = convert_pandas_type_to_json_field(level) + new_field["name"] = name + fields.append(new_field) + else: + fields.append(convert_pandas_type_to_json_field(data.index)) + + if data.ndim > 1: + for column, s in data.items(): + fields.append(convert_pandas_type_to_json_field(s)) + else: + fields.append(convert_pandas_type_to_json_field(data)) + + schema["fields"] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema["primaryKey"] = [data.index.name] + else: + schema["primaryKey"] = data.index.names + elif primary_key is not None: + schema["primaryKey"] = primary_key + + if version: + schema["pandas_version"] = TABLE_SCHEMA_VERSION + return schema + + +def parse_table_schema(json, precise_float): + """ + Builds a DataFrame from a given schema + + Parameters + ---------- + json : + A JSON table schema + precise_float : bool + Flag controlling precision when decoding string to double values, as + dictated by ``read_json`` + + Returns + ------- + df : DataFrame + + Raises + ------ + NotImplementedError + If the JSON table schema contains either timezone or timedelta data + + Notes + ----- + Because :func:`DataFrame.to_json` uses the string 'index' to denote a + name-less :class:`Index`, this function sets the name of the returned + :class:`DataFrame` to ``None`` when said string is encountered with a + normal :class:`Index`. For a :class:`MultiIndex`, the same limitation + applies to any strings beginning with 'level_'. Therefore, an + :class:`Index` name of 'index' and :class:`MultiIndex` names starting + with 'level_' are not supported. + + See Also + -------- + build_table_schema : Inverse function. + pandas.read_json + """ + table = loads(json, precise_float=precise_float) + col_order = [field["name"] for field in table["schema"]["fields"]] + df = DataFrame(table["data"], columns=col_order)[col_order] + + dtypes = { + field["name"]: convert_json_field_to_pandas_type(field) + for field in table["schema"]["fields"] + } + + # No ISO constructor for Timedelta as of yet, so need to raise + if "timedelta64" in dtypes.values(): + raise NotImplementedError( + 'table="orient" can not yet read ISO-formatted Timedelta data' + ) + + df = df.astype(dtypes) + + if "primaryKey" in table["schema"]: + df = df.set_index(table["schema"]["primaryKey"]) + if len(df.index.names) == 1: + if df.index.name == "index": + df.index.name = None + else: + df.index.names = [ + None if x.startswith("level_") else x for x in df.index.names + ] + + return df diff --git a/pandas/io/orc.py b/pandas/io/orc.py new file mode 100644 index 00000000..40754a56 --- /dev/null +++ b/pandas/io/orc.py @@ -0,0 +1,176 @@ +""" orc compat """ +from __future__ import annotations + +import io +from types import ModuleType +from typing import ( + TYPE_CHECKING, + Any, + Literal, +) + +from pandas._typing import ( + FilePath, + ReadBuffer, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_period_dtype, + is_unsigned_integer_dtype, +) + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from pandas import DataFrame + + +def read_orc( + path: FilePath | ReadBuffer[bytes], columns: list[str] | None = None, **kwargs +) -> DataFrame: + """ + Load an ORC object from the file path, returning a DataFrame. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.orc``. + columns : list, default None + If not None, only these columns will be read from the file. + **kwargs + Any additional kwargs are passed to pyarrow. + + Returns + ------- + DataFrame + + Notes + ----- + Before using this function you should read the :ref:`user guide about ORC ` + and :ref:`install optional dependencies `. + """ + # we require a newer version of pyarrow than we support for parquet + + orc = import_optional_dependency("pyarrow.orc") + + with get_handle(path, "rb", is_text=False) as handles: + orc_file = orc.ORCFile(handles.handle) + return orc_file.read(columns=columns, **kwargs).to_pandas() + + +def to_orc( + df: DataFrame, + path: FilePath | WriteBuffer[bytes] | None = None, + *, + engine: Literal["pyarrow"] = "pyarrow", + index: bool | None = None, + engine_kwargs: dict[str, Any] | None = None, +) -> bytes | None: + """ + Write a DataFrame to the ORC format. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + df : DataFrame + The dataframe to be written to ORC. Raises NotImplementedError + if dtype of one or more columns is category, unsigned integers, + intervals, periods or sparse. + path : str, file-like object or None, default None + If a string, it will be used as Root Directory path + when writing a partitioned dataset. By file-like object, + we refer to objects with a write() method, such as a file handle + (e.g. via builtin open function). If path is None, + a bytes object is returned. + engine : str, default 'pyarrow' + ORC library to use. Pyarrow must be >= 7.0.0. + index : bool, optional + If ``True``, include the dataframe's index(es) in the file output. If + ``False``, they will not be written to the file. + If ``None``, similar to ``infer`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + engine_kwargs : dict[str, Any] or None, default None + Additional keyword arguments passed to :func:`pyarrow.orc.write_table`. + + Returns + ------- + bytes if no path argument is provided else None + + Raises + ------ + NotImplementedError + Dtype of one or more columns is category, unsigned integers, interval, + period or sparse. + ValueError + engine is not pyarrow. + + Notes + ----- + * Before using this function you should read the + :ref:`user guide about ORC ` and + :ref:`install optional dependencies `. + * This function requires `pyarrow `_ + library. + * For supported dtypes please refer to `supported ORC features in Arrow + `__. + * Currently timezones in datetime columns are not preserved when a + dataframe is converted into ORC files. + """ + if index is None: + index = df.index.names[0] is not None + if engine_kwargs is None: + engine_kwargs = {} + + # If unsupported dtypes are found raise NotImplementedError + # In Pyarrow 9.0.0 this check will no longer be needed + for dtype in df.dtypes: + if ( + is_categorical_dtype(dtype) + or is_interval_dtype(dtype) + or is_period_dtype(dtype) + or is_unsigned_integer_dtype(dtype) + ): + raise NotImplementedError( + "The dtype of one or more columns is not supported yet." + ) + + if engine != "pyarrow": + raise ValueError("engine must be 'pyarrow'") + engine = import_optional_dependency(engine, min_version="7.0.0") + orc = import_optional_dependency("pyarrow.orc") + + was_none = path is None + if was_none: + path = io.BytesIO() + assert path is not None # For mypy + with get_handle(path, "wb", is_text=False) as handles: + assert isinstance(engine, ModuleType) # For mypy + try: + orc.write_table( + engine.Table.from_pandas(df, preserve_index=index), + handles.handle, + **engine_kwargs, + ) + except TypeError as e: + raise NotImplementedError( + "The dtype of one or more columns is not supported yet." + ) from e + + if was_none: + assert isinstance(path, io.BytesIO) # For mypy + return path.getvalue() + return None diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py new file mode 100644 index 00000000..6f3a7608 --- /dev/null +++ b/pandas/io/parquet.py @@ -0,0 +1,509 @@ +""" parquet compat """ +from __future__ import annotations + +import io +import os +from typing import Any +from warnings import catch_warnings + +from pandas._typing import ( + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc + +from pandas import ( + DataFrame, + MultiIndex, + get_option, +) +from pandas.core.shared_docs import _shared_docs +from pandas.util.version import Version + +from pandas.io.common import ( + IOHandles, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) + + +def get_engine(engine: str) -> BaseImpl: + """return our implementation""" + if engine == "auto": + engine = get_option("io.parquet.engine") + + if engine == "auto": + # try engines in this order + engine_classes = [PyArrowImpl, FastParquetImpl] + + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'pyarrow', 'fastparquet'.\n" + "A suitable version of " + "pyarrow or fastparquet is required for parquet " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" + ) + + if engine == "pyarrow": + return PyArrowImpl() + elif engine == "fastparquet": + return FastParquetImpl() + + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + +def _get_path_or_handle( + path: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], + fs: Any, + storage_options: StorageOptions = None, + mode: str = "rb", + is_dir: bool = False, +) -> tuple[ + FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any +]: + """File handling for PyArrow.""" + path_or_handle = stringify_path(path) + if is_fsspec_url(path_or_handle) and fs is None: + fsspec = import_optional_dependency("fsspec") + + fs, path_or_handle = fsspec.core.url_to_fs( + path_or_handle, **(storage_options or {}) + ) + elif storage_options and (not is_url(path_or_handle) or mode != "rb"): + # can't write to a remote url + # without making use of fsspec at the moment + raise ValueError("storage_options passed with buffer, or non-supported URL") + + handles = None + if ( + not fs + and not is_dir + and isinstance(path_or_handle, str) + and not os.path.isdir(path_or_handle) + ): + # use get_handle only when we are very certain that it is not a directory + # fsspec resources can also point to directories + # this branch is used for example when reading from non-fsspec URLs + handles = get_handle( + path_or_handle, mode, is_text=False, storage_options=storage_options + ) + fs = None + path_or_handle = handles.handle + return path_or_handle, handles, fs + + +class BaseImpl: + @staticmethod + def validate_dataframe(df: DataFrame) -> None: + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only supports IO with DataFrames") + + # must have value column names for all index levels (strings only) + if isinstance(df.columns, MultiIndex): + if not all( + x.inferred_type in {"string", "empty"} for x in df.columns.levels + ): + raise ValueError( + """ + parquet must have string column names for all values in + each level of the MultiIndex + """ + ) + else: + if df.columns.inferred_type not in {"string", "empty"}: + raise ValueError("parquet must have string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, str) for name in df.index.names if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + def write(self, df: DataFrame, path, compression, **kwargs): + raise AbstractMethodError(self) + + def read(self, path, columns=None, **kwargs) -> DataFrame: + raise AbstractMethodError(self) + + +class PyArrowImpl(BaseImpl): + def __init__(self) -> None: + import_optional_dependency( + "pyarrow", extra="pyarrow is required for parquet support." + ) + import pyarrow.parquet + + # import utils to register the pyarrow extension types + import pandas.core.arrays.arrow.extension_types # pyright: ignore # noqa:F401 + + self.api = pyarrow + + def write( + self, + df: DataFrame, + path: FilePath | WriteBuffer[bytes], + compression: str | None = "snappy", + index: bool | None = None, + storage_options: StorageOptions = None, + partition_cols: list[str] | None = None, + **kwargs, + ) -> None: + self.validate_dataframe(df) + + from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)} + if index is not None: + from_pandas_kwargs["preserve_index"] = index + + table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + + path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle( + path, + kwargs.pop("filesystem", None), + storage_options=storage_options, + mode="wb", + is_dir=partition_cols is not None, + ) + if ( + isinstance(path_or_handle, io.BufferedWriter) + and hasattr(path_or_handle, "name") + and isinstance(path_or_handle.name, (str, bytes)) + ): + path_or_handle = path_or_handle.name + if isinstance(path_or_handle, bytes): + path_or_handle = path_or_handle.decode() + + try: + if partition_cols is not None: + # writes to multiple files under the given path + self.api.parquet.write_to_dataset( + table, + path_or_handle, + compression=compression, + partition_cols=partition_cols, + **kwargs, + ) + else: + # write to single output file + self.api.parquet.write_table( + table, path_or_handle, compression=compression, **kwargs + ) + finally: + if handles is not None: + handles.close() + + def read( + self, + path, + columns=None, + use_nullable_dtypes=False, + storage_options: StorageOptions = None, + **kwargs, + ) -> DataFrame: + kwargs["use_pandas_metadata"] = True + + to_pandas_kwargs = {} + if use_nullable_dtypes: + import pandas as pd + + mapping = { + self.api.int8(): pd.Int8Dtype(), + self.api.int16(): pd.Int16Dtype(), + self.api.int32(): pd.Int32Dtype(), + self.api.int64(): pd.Int64Dtype(), + self.api.uint8(): pd.UInt8Dtype(), + self.api.uint16(): pd.UInt16Dtype(), + self.api.uint32(): pd.UInt32Dtype(), + self.api.uint64(): pd.UInt64Dtype(), + self.api.bool_(): pd.BooleanDtype(), + self.api.string(): pd.StringDtype(), + self.api.float32(): pd.Float32Dtype(), + self.api.float64(): pd.Float64Dtype(), + } + to_pandas_kwargs["types_mapper"] = mapping.get + manager = get_option("mode.data_manager") + if manager == "array": + to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment] + + path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle( + path, + kwargs.pop("filesystem", None), + storage_options=storage_options, + mode="rb", + ) + try: + result = self.api.parquet.read_table( + path_or_handle, columns=columns, **kwargs + ).to_pandas(**to_pandas_kwargs) + if manager == "array": + result = result._as_manager("array", copy=False) + return result + finally: + if handles is not None: + handles.close() + + +class FastParquetImpl(BaseImpl): + def __init__(self) -> None: + # since pandas is a dependency of fastparquet + # we need to import on first use + fastparquet = import_optional_dependency( + "fastparquet", extra="fastparquet is required for parquet support." + ) + self.api = fastparquet + + def write( + self, + df: DataFrame, + path, + compression="snappy", + index=None, + partition_cols=None, + storage_options: StorageOptions = None, + **kwargs, + ) -> None: + self.validate_dataframe(df) + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + + if "partition_on" in kwargs and partition_cols is not None: + raise ValueError( + "Cannot use both partition_on and " + "partition_cols. Use partition_cols for partitioning data" + ) + elif "partition_on" in kwargs: + partition_cols = kwargs.pop("partition_on") + + if partition_cols is not None: + kwargs["file_scheme"] = "hive" + + # cannot use get_handle as write() does not accept file buffers + path = stringify_path(path) + if is_fsspec_url(path): + fsspec = import_optional_dependency("fsspec") + + # if filesystem is provided by fsspec, file must be opened in 'wb' mode. + kwargs["open_with"] = lambda path, _: fsspec.open( + path, "wb", **(storage_options or {}) + ).open() + elif storage_options: + raise ValueError( + "storage_options passed with file object or non-fsspec file path" + ) + + with catch_warnings(record=True): + self.api.write( + path, + df, + compression=compression, + write_index=index, + partition_on=partition_cols, + **kwargs, + ) + + def read( + self, path, columns=None, storage_options: StorageOptions = None, **kwargs + ) -> DataFrame: + parquet_kwargs: dict[str, Any] = {} + use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False) + if Version(self.api.__version__) >= Version("0.7.1"): + # We are disabling nullable dtypes for fastparquet pending discussion + parquet_kwargs["pandas_nulls"] = False + if use_nullable_dtypes: + raise ValueError( + "The 'use_nullable_dtypes' argument is not supported for the " + "fastparquet engine" + ) + path = stringify_path(path) + handles = None + if is_fsspec_url(path): + fsspec = import_optional_dependency("fsspec") + + if Version(self.api.__version__) > Version("0.6.1"): + parquet_kwargs["fs"] = fsspec.open( + path, "rb", **(storage_options or {}) + ).fs + else: + parquet_kwargs["open_with"] = lambda path, _: fsspec.open( + path, "rb", **(storage_options or {}) + ).open() + elif isinstance(path, str) and not os.path.isdir(path): + # use get_handle only when we are very certain that it is not a directory + # fsspec resources can also point to directories + # this branch is used for example when reading from non-fsspec URLs + handles = get_handle( + path, "rb", is_text=False, storage_options=storage_options + ) + path = handles.handle + + try: + parquet_file = self.api.ParquetFile(path, **parquet_kwargs) + return parquet_file.to_pandas(columns=columns, **kwargs) + finally: + if handles is not None: + handles.close() + + +@doc(storage_options=_shared_docs["storage_options"]) +def to_parquet( + df: DataFrame, + path: FilePath | WriteBuffer[bytes] | None = None, + engine: str = "auto", + compression: str | None = "snappy", + index: bool | None = None, + storage_options: StorageOptions = None, + partition_cols: list[str] | None = None, + **kwargs, +) -> bytes | None: + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : str, path object, file-like object, or None, default None + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. If None, the result is + returned as bytes. If a string, it will be used as Root Directory path + when writing a partitioned dataset. The engine fastparquet does not + accept file-like objects. + + .. versionchanged:: 1.2.0 + + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}}, + default 'snappy'. Name of the compression to use. Use ``None`` + for no compression. The supported compression methods actually + depend on which engine is used. For 'pyarrow', 'snappy', 'gzip', + 'brotli', 'lz4', 'zstd' are all supported. For 'fastparquet', + only 'gzip' and 'snappy' are supported. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. If + ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + partition_cols : str or list, optional, default None + Column names by which to partition the dataset. + Columns are partitioned in the order they are given. + Must be None if path is not a string. + {storage_options} + + .. versionadded:: 1.2.0 + + kwargs + Additional keyword arguments passed to the engine + + Returns + ------- + bytes if no path argument is provided else None + """ + if isinstance(partition_cols, str): + partition_cols = [partition_cols] + impl = get_engine(engine) + + path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path + + impl.write( + df, + path_or_buf, + compression=compression, + index=index, + partition_cols=partition_cols, + storage_options=storage_options, + **kwargs, + ) + + if path is None: + assert isinstance(path_or_buf, io.BytesIO) + return path_or_buf.getvalue() + else: + return None + + +@doc(storage_options=_shared_docs["storage_options"]) +def read_parquet( + path: FilePath | ReadBuffer[bytes], + engine: str = "auto", + columns: list[str] | None = None, + storage_options: StorageOptions = None, + use_nullable_dtypes: bool = False, + **kwargs, +) -> DataFrame: + """ + Load a parquet object from the file path, returning a DataFrame. + + Parameters + ---------- + path : str, path object or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. + The string could be a URL. Valid URL schemes include http, ftp, s3, + gs, and file. For file URLs, a host is expected. A local file could be: + ``file://localhost/path/to/table.parquet``. + A file URL can also be a path to a directory that contains multiple + partitioned parquet files. Both pyarrow and fastparquet support + paths to directories as well as file URLs. A directory path could be: + ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``. + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + columns : list, default=None + If not None, only these columns will be read from the file. + + {storage_options} + + .. versionadded:: 1.3.0 + + use_nullable_dtypes : bool, default False + If True, use dtypes that use ``pd.NA`` as missing value indicator + for the resulting DataFrame. (only applicable for the ``pyarrow`` + engine) + As new dtypes are added that support ``pd.NA`` in the future, the + output with this option will change to use those dtypes. + Note: this is an experimental option, and behaviour (e.g. additional + support dtypes) may change without notice. + + .. versionadded:: 1.2.0 + + **kwargs + Any additional kwargs are passed to the engine. + + Returns + ------- + DataFrame + """ + impl = get_engine(engine) + + return impl.read( + path, + columns=columns, + storage_options=storage_options, + use_nullable_dtypes=use_nullable_dtypes, + **kwargs, + ) diff --git a/pandas/io/parsers/__init__.py b/pandas/io/parsers/__init__.py new file mode 100644 index 00000000..ff11968d --- /dev/null +++ b/pandas/io/parsers/__init__.py @@ -0,0 +1,9 @@ +from pandas.io.parsers.readers import ( + TextFileReader, + TextParser, + read_csv, + read_fwf, + read_table, +) + +__all__ = ["TextFileReader", "TextParser", "read_csv", "read_fwf", "read_table"] diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py new file mode 100644 index 00000000..2305c209 --- /dev/null +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas._typing import ReadBuffer +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_integer + +from pandas.io.parsers.base_parser import ParserBase + +if TYPE_CHECKING: + from pandas import DataFrame + + +class ArrowParserWrapper(ParserBase): + """ + Wrapper for the pyarrow engine for read_csv() + """ + + def __init__(self, src: ReadBuffer[bytes], **kwds) -> None: + super().__init__(kwds) + self.kwds = kwds + self.src = src + + self._parse_kwds() + + def _parse_kwds(self): + """ + Validates keywords before passing to pyarrow. + """ + encoding: str | None = self.kwds.get("encoding") + self.encoding = "utf-8" if encoding is None else encoding + + self.usecols, self.usecols_dtype = self._validate_usecols_arg( + self.kwds["usecols"] + ) + na_values = self.kwds["na_values"] + if isinstance(na_values, dict): + raise ValueError( + "The pyarrow engine doesn't support passing a dict for na_values" + ) + self.na_values = list(self.kwds["na_values"]) + + def _get_pyarrow_options(self): + """ + Rename some arguments to pass to pyarrow + """ + mapping = { + "usecols": "include_columns", + "na_values": "null_values", + "escapechar": "escape_char", + "skip_blank_lines": "ignore_empty_lines", + } + for pandas_name, pyarrow_name in mapping.items(): + if pandas_name in self.kwds and self.kwds.get(pandas_name) is not None: + self.kwds[pyarrow_name] = self.kwds.pop(pandas_name) + + self.parse_options = { + option_name: option_value + for option_name, option_value in self.kwds.items() + if option_value is not None + and option_name + in ("delimiter", "quote_char", "escape_char", "ignore_empty_lines") + } + self.convert_options = { + option_name: option_value + for option_name, option_value in self.kwds.items() + if option_value is not None + and option_name + in ("include_columns", "null_values", "true_values", "false_values") + } + self.read_options = { + "autogenerate_column_names": self.header is None, + "skip_rows": self.header + if self.header is not None + else self.kwds["skiprows"], + } + + def _finalize_output(self, frame: DataFrame) -> DataFrame: + """ + Processes data read in based on kwargs. + + Parameters + ---------- + frame: DataFrame + The DataFrame to process. + + Returns + ------- + DataFrame + The processed DataFrame. + """ + num_cols = len(frame.columns) + multi_index_named = True + if self.header is None: + if self.names is None: + if self.prefix is not None: + self.names = [f"{self.prefix}{i}" for i in range(num_cols)] + elif self.header is None: + self.names = range(num_cols) + if len(self.names) != num_cols: + # usecols is passed through to pyarrow, we only handle index col here + # The only way self.names is not the same length as number of cols is + # if we have int index_col. We should just pad the names(they will get + # removed anyways) to expected length then. + self.names = list(range(num_cols - len(self.names))) + self.names + multi_index_named = False + frame.columns = self.names + # we only need the frame not the names + frame.columns, frame = self._do_date_conversions(frame.columns, frame) + if self.index_col is not None: + for i, item in enumerate(self.index_col): + if is_integer(item): + self.index_col[i] = frame.columns[item] + else: + # String case + if item not in frame.columns: + raise ValueError(f"Index {item} invalid") + frame.set_index(self.index_col, drop=True, inplace=True) + # Clear names if headerless and no name given + if self.header is None and not multi_index_named: + frame.index.names = [None] * len(frame.index.names) + + if self.kwds.get("dtype") is not None: + try: + frame = frame.astype(self.kwds.get("dtype")) + except TypeError as e: + # GH#44901 reraise to keep api consistent + raise ValueError(e) + return frame + + def read(self) -> DataFrame: + """ + Reads the contents of a CSV file into a DataFrame and + processes it according to the kwargs passed in the + constructor. + + Returns + ------- + DataFrame + The DataFrame created from the CSV file. + """ + pyarrow_csv = import_optional_dependency("pyarrow.csv") + self._get_pyarrow_options() + + table = pyarrow_csv.read_csv( + self.src, + read_options=pyarrow_csv.ReadOptions(**self.read_options), + parse_options=pyarrow_csv.ParseOptions(**self.parse_options), + convert_options=pyarrow_csv.ConvertOptions(**self.convert_options), + ) + + frame = table.to_pandas() + return self._finalize_output(frame) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py new file mode 100644 index 00000000..0e40e47b --- /dev/null +++ b/pandas/io/parsers/base_parser.py @@ -0,0 +1,1335 @@ +from __future__ import annotations + +from collections import defaultdict +from copy import copy +import csv +import datetime +from enum import Enum +import itertools +from typing import ( + TYPE_CHECKING, + Any, + Callable, + DefaultDict, + Hashable, + Iterable, + List, + Mapping, + Sequence, + Tuple, + cast, + final, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.ops as libops +import pandas._libs.parsers as parsers +from pandas._libs.parsers import STR_NA_VALUES +from pandas._libs.tslibs import parsing +from pandas._typing import ( + ArrayLike, + DtypeArg, + Scalar, +) +from pandas.errors import ( + ParserError, + ParserWarning, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.astype import astype_nansafe +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.missing import isna + +from pandas.core import algorithms +from pandas.core.arrays import Categorical +from pandas.core.indexes.api import ( + Index, + MultiIndex, + ensure_index_from_sequences, +) +from pandas.core.series import Series +from pandas.core.tools import datetimes as tools + +from pandas.io.date_converters import generic_parser + +if TYPE_CHECKING: + from pandas import DataFrame + + +class ParserBase: + class BadLineHandleMethod(Enum): + ERROR = 0 + WARN = 1 + SKIP = 2 + + _implicit_index: bool = False + _first_chunk: bool + + def __init__(self, kwds) -> None: + + self.names = kwds.get("names") + self.orig_names: list | None = None + self.prefix = kwds.pop("prefix", None) + + self.index_col = kwds.get("index_col", None) + self.unnamed_cols: set = set() + self.index_names: Sequence[Hashable] | None = None + self.col_names = None + + self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) + self._parse_date_cols: Iterable = [] + self.date_parser = kwds.pop("date_parser", None) + self.dayfirst = kwds.pop("dayfirst", False) + self.keep_date_col = kwds.pop("keep_date_col", False) + + self.na_values = kwds.get("na_values") + self.na_fvalues = kwds.get("na_fvalues") + self.na_filter = kwds.get("na_filter", False) + self.keep_default_na = kwds.get("keep_default_na", True) + + self.dtype = copy(kwds.get("dtype", None)) + self.converters = kwds.get("converters") + + self.true_values = kwds.get("true_values") + self.false_values = kwds.get("false_values") + self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True) + self.infer_datetime_format = kwds.pop("infer_datetime_format", False) + self.cache_dates = kwds.pop("cache_dates", True) + + self._date_conv = _make_date_converter( + date_parser=self.date_parser, + dayfirst=self.dayfirst, + infer_datetime_format=self.infer_datetime_format, + cache_dates=self.cache_dates, + ) + + # validate header options for mi + self.header = kwds.get("header") + if is_list_like(self.header, allow_sets=False): + if kwds.get("usecols"): + raise ValueError( + "cannot specify usecols when specifying a multi-index header" + ) + if kwds.get("names"): + raise ValueError( + "cannot specify names when specifying a multi-index header" + ) + + # validate index_col that only contains integers + if self.index_col is not None: + if not ( + is_list_like(self.index_col, allow_sets=False) + and all(map(is_integer, self.index_col)) + or is_integer(self.index_col) + ): + raise ValueError( + "index_col must only contain row numbers " + "when specifying a multi-index header" + ) + elif self.header is not None and self.prefix is not None: + # GH 27394 + raise ValueError( + "Argument prefix must be None if argument header is not None" + ) + + self._name_processed = False + + self._first_chunk = True + + self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"]) + + # Fallback to error to pass a sketchy test(test_override_set_noconvert_columns) + # Normally, this arg would get pre-processed earlier on + self.on_bad_lines = kwds.get("on_bad_lines", self.BadLineHandleMethod.ERROR) + + def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> Iterable: + """ + Check if parse_dates are in columns. + + If user has provided names for parse_dates, check if those columns + are available. + + Parameters + ---------- + columns : list + List of names of the dataframe. + + Returns + ------- + The names of the columns which will get parsed later if a dict or list + is given as specification. + + Raises + ------ + ValueError + If column to parse_date is not in dataframe. + + """ + cols_needed: Iterable + if is_dict_like(self.parse_dates): + cols_needed = itertools.chain(*self.parse_dates.values()) + elif is_list_like(self.parse_dates): + # a column in parse_dates could be represented + # ColReference = Union[int, str] + # DateGroups = List[ColReference] + # ParseDates = Union[DateGroups, List[DateGroups], + # Dict[ColReference, DateGroups]] + cols_needed = itertools.chain.from_iterable( + col if is_list_like(col) and not isinstance(col, tuple) else [col] + for col in self.parse_dates + ) + else: + cols_needed = [] + + cols_needed = list(cols_needed) + + # get only columns that are references using names (str), not by index + missing_cols = ", ".join( + sorted( + { + col + for col in cols_needed + if isinstance(col, str) and col not in columns + } + ) + ) + if missing_cols: + raise ValueError( + f"Missing column provided to 'parse_dates': '{missing_cols}'" + ) + # Convert positions to actual column names + return [ + col if (isinstance(col, str) or col in columns) else columns[col] + for col in cols_needed + ] + + def close(self) -> None: + pass + + @final + @property + def _has_complex_date_col(self) -> bool: + return isinstance(self.parse_dates, dict) or ( + isinstance(self.parse_dates, list) + and len(self.parse_dates) > 0 + and isinstance(self.parse_dates[0], list) + ) + + @final + def _should_parse_dates(self, i: int) -> bool: + if isinstance(self.parse_dates, bool): + return self.parse_dates + else: + if self.index_names is not None: + name = self.index_names[i] + else: + name = None + j = i if self.index_col is None else self.index_col[i] + + if is_scalar(self.parse_dates): + return (j == self.parse_dates) or ( + name is not None and name == self.parse_dates + ) + else: + return (j in self.parse_dates) or ( + name is not None and name in self.parse_dates + ) + + @final + def _extract_multi_indexer_columns( + self, + header, + index_names: list | None, + passed_names: bool = False, + ): + """ + Extract and return the names, index_names, col_names if the column + names are a MultiIndex. + + Parameters + ---------- + header: list of lists + The header rows + index_names: list, optional + The names of the future index + passed_names: bool, default False + A flag specifying if names where passed + + """ + if len(header) < 2: + return header[0], index_names, None, passed_names + + # the names are the tuples of the header that are not the index cols + # 0 is the name of the index, assuming index_col is a list of column + # numbers + ic = self.index_col + if ic is None: + ic = [] + + if not isinstance(ic, (list, tuple, np.ndarray)): + ic = [ic] + sic = set(ic) + + # clean the index_names + index_names = header.pop(-1) + index_names, _, _ = self._clean_index_names(index_names, self.index_col) + + # extract the columns + field_count = len(header[0]) + + # check if header lengths are equal + if not all(len(header_iter) == field_count for header_iter in header[1:]): + raise ParserError("Header rows must have an equal number of columns.") + + def extract(r): + return tuple(r[i] for i in range(field_count) if i not in sic) + + columns = list(zip(*(extract(r) for r in header))) + names = columns.copy() + for single_ic in sorted(ic): + names.insert(single_ic, single_ic) + + # Clean the column names (if we have an index_col). + if len(ic): + col_names = [ + r[ic[0]] + if ((r[ic[0]] is not None) and r[ic[0]] not in self.unnamed_cols) + else None + for r in header + ] + else: + col_names = [None] * len(header) + + passed_names = True + + return names, index_names, col_names, passed_names + + @final + def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]: + # see gh-7160 and gh-9424: this helps to provide + # immediate alleviation of the duplicate names + # issue and appears to be satisfactory to users, + # but ultimately, not needing to butcher the names + # would be nice! + if self.mangle_dupe_cols: + names = list(names) # so we can index + counts: DefaultDict[Hashable, int] = defaultdict(int) + is_potential_mi = _is_potential_multi_index(names, self.index_col) + + for i, col in enumerate(names): + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 + + if is_potential_mi: + # for mypy + assert isinstance(col, tuple) + col = col[:-1] + (f"{col[-1]}.{cur_count}",) + else: + col = f"{col}.{cur_count}" + cur_count = counts[col] + + names[i] = col + counts[col] = cur_count + 1 + + return names + + @final + def _maybe_make_multi_index_columns( + self, + columns: Sequence[Hashable], + col_names: Sequence[Hashable] | None = None, + ) -> Sequence[Hashable] | MultiIndex: + # possibly create a column mi here + if _is_potential_multi_index(columns): + list_columns = cast(List[Tuple], columns) + return MultiIndex.from_tuples(list_columns, names=col_names) + return columns + + @final + def _make_index( + self, data, alldata, columns, indexnamerow: list[Scalar] | None = None + ) -> tuple[Index | None, Sequence[Hashable] | MultiIndex]: + index: Index | None + if not is_index_col(self.index_col) or not self.index_col: + index = None + + elif not self._has_complex_date_col: + simple_index = self._get_simple_index(alldata, columns) + index = self._agg_index(simple_index) + elif self._has_complex_date_col: + if not self._name_processed: + (self.index_names, _, self.index_col) = self._clean_index_names( + list(columns), self.index_col + ) + self._name_processed = True + date_index = self._get_complex_date_index(data, columns) + index = self._agg_index(date_index, try_parse_dates=False) + + # add names for the index + if indexnamerow: + coffset = len(indexnamerow) - len(columns) + assert index is not None + index = index.set_names(indexnamerow[:coffset]) + + # maybe create a mi on the columns + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + return index, columns + + @final + def _get_simple_index(self, data, columns): + def ix(col): + if not isinstance(col, str): + return col + raise ValueError(f"Index {col} invalid") + + to_remove = [] + index = [] + for idx in self.index_col: + i = ix(idx) + to_remove.append(i) + index.append(data[i]) + + # remove index items from content and columns, don't pop in + # loop + for i in sorted(to_remove, reverse=True): + data.pop(i) + if not self._implicit_index: + columns.pop(i) + + return index + + @final + def _get_complex_date_index(self, data, col_names): + def _get_name(icol): + if isinstance(icol, str): + return icol + + if col_names is None: + raise ValueError(f"Must supply column order to use {icol!s} as index") + + for i, c in enumerate(col_names): + if i == icol: + return c + + to_remove = [] + index = [] + for idx in self.index_col: + name = _get_name(idx) + to_remove.append(name) + index.append(data[name]) + + # remove index items from content and columns, don't pop in + # loop + for c in sorted(to_remove, reverse=True): + data.pop(c) + col_names.remove(c) + + return index + + def _clean_mapping(self, mapping): + """converts col numbers to names""" + if not isinstance(mapping, dict): + return mapping + clean = {} + # for mypy + assert self.orig_names is not None + + for col, v in mapping.items(): + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + clean[col] = v + if isinstance(mapping, defaultdict): + remaining_cols = set(self.orig_names) - set(clean.keys()) + clean.update({col: mapping[col] for col in remaining_cols}) + return clean + + @final + def _agg_index(self, index, try_parse_dates: bool = True) -> Index: + arrays = [] + converters = self._clean_mapping(self.converters) + + for i, arr in enumerate(index): + + if try_parse_dates and self._should_parse_dates(i): + arr = self._date_conv(arr) + + if self.na_filter: + col_na_values = self.na_values + col_na_fvalues = self.na_fvalues + else: + col_na_values = set() + col_na_fvalues = set() + + if isinstance(self.na_values, dict): + assert self.index_names is not None + col_name = self.index_names[i] + if col_name is not None: + col_na_values, col_na_fvalues = _get_na_values( + col_name, self.na_values, self.na_fvalues, self.keep_default_na + ) + + clean_dtypes = self._clean_mapping(self.dtype) + + cast_type = None + index_converter = False + if self.index_names is not None: + if isinstance(clean_dtypes, dict): + cast_type = clean_dtypes.get(self.index_names[i], None) + + if isinstance(converters, dict): + index_converter = converters.get(self.index_names[i]) is not None + + try_num_bool = not ( + cast_type and is_string_dtype(cast_type) or index_converter + ) + + arr, _ = self._infer_types( + arr, col_na_values | col_na_fvalues, try_num_bool + ) + arrays.append(arr) + + names = self.index_names + index = ensure_index_from_sequences(arrays, names) + + return index + + @final + def _convert_to_ndarrays( + self, + dct: Mapping, + na_values, + na_fvalues, + verbose: bool = False, + converters=None, + dtypes=None, + ): + result = {} + for c, values in dct.items(): + conv_f = None if converters is None else converters.get(c, None) + if isinstance(dtypes, dict): + cast_type = dtypes.get(c, None) + else: + # single dtype or None + cast_type = dtypes + + if self.na_filter: + col_na_values, col_na_fvalues = _get_na_values( + c, na_values, na_fvalues, self.keep_default_na + ) + else: + col_na_values, col_na_fvalues = set(), set() + + if c in self._parse_date_cols: + # GH#26203 Do not convert columns which get converted to dates + # but replace nans to ensure to_datetime works + mask = algorithms.isin(values, set(col_na_values) | col_na_fvalues) + np.putmask(values, mask, np.nan) + result[c] = values + continue + + if conv_f is not None: + # conv_f applied to data before inference + if cast_type is not None: + warnings.warn( + ( + "Both a converter and dtype were specified " + f"for column {c} - only the converter will be used." + ), + ParserWarning, + stacklevel=find_stack_level(), + ) + + try: + values = lib.map_infer(values, conv_f) + except ValueError: + # error: Argument 2 to "isin" has incompatible type "List[Any]"; + # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" + mask = algorithms.isin( + values, list(na_values) # type: ignore[arg-type] + ).view(np.uint8) + values = lib.map_infer_mask(values, conv_f, mask) + + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool=False + ) + else: + is_ea = is_extension_array_dtype(cast_type) + is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) + # skip inference if specified dtype is object + # or casting to an EA + try_num_bool = not (cast_type and is_str_or_ea_dtype) + + # general type inference and conversion + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool + ) + + # type specified in dtype param or cast_type is an EA + if cast_type and ( + not is_dtype_equal(cvals, cast_type) + or is_extension_array_dtype(cast_type) + ): + if not is_ea and na_count > 0: + try: + if is_bool_dtype(cast_type): + raise ValueError( + f"Bool column has NA values in column {c}" + ) + except (AttributeError, TypeError): + # invalid input to is_bool_dtype + pass + cast_type = pandas_dtype(cast_type) + cvals = self._cast_types(cvals, cast_type, c) + + result[c] = cvals + if verbose and na_count: + print(f"Filled {na_count} NA values in column {c!s}") + return result + + @final + def _set_noconvert_dtype_columns( + self, col_indices: list[int], names: Sequence[Hashable] + ) -> set[int]: + """ + Set the columns that should not undergo dtype conversions. + + Currently, any column that is involved with date parsing will not + undergo such conversions. If usecols is specified, the positions of the columns + not to cast is relative to the usecols not to all columns. + + Parameters + ---------- + col_indices: The indices specifying order and positions of the columns + names: The column names which order is corresponding with the order + of col_indices + + Returns + ------- + A set of integers containing the positions of the columns not to convert. + """ + usecols: list[int] | list[str] | None + noconvert_columns = set() + if self.usecols_dtype == "integer": + # A set of integers will be converted to a list in + # the correct order every single time. + usecols = sorted(self.usecols) + elif callable(self.usecols) or self.usecols_dtype not in ("empty", None): + # The names attribute should have the correct columns + # in the proper order for indexing with parse_dates. + usecols = col_indices + else: + # Usecols is empty. + usecols = None + + def _set(x) -> int: + if usecols is not None and is_integer(x): + x = usecols[x] + + if not is_integer(x): + x = col_indices[names.index(x)] + + return x + + if isinstance(self.parse_dates, list): + for val in self.parse_dates: + if isinstance(val, list): + for k in val: + noconvert_columns.add(_set(k)) + else: + noconvert_columns.add(_set(val)) + + elif isinstance(self.parse_dates, dict): + for val in self.parse_dates.values(): + if isinstance(val, list): + for k in val: + noconvert_columns.add(_set(k)) + else: + noconvert_columns.add(_set(val)) + + elif self.parse_dates: + if isinstance(self.index_col, list): + for k in self.index_col: + noconvert_columns.add(_set(k)) + elif self.index_col is not None: + noconvert_columns.add(_set(self.index_col)) + + return noconvert_columns + + def _infer_types(self, values, na_values, try_num_bool=True): + """ + Infer types of values, possibly casting + + Parameters + ---------- + values : ndarray + na_values : set + try_num_bool : bool, default try + try to cast values to numeric (first preference) or boolean + + Returns + ------- + converted : ndarray + na_count : int + """ + na_count = 0 + if issubclass(values.dtype.type, (np.number, np.bool_)): + # If our array has numeric dtype, we don't have to check for strings in isin + na_values = np.array([val for val in na_values if not isinstance(val, str)]) + mask = algorithms.isin(values, na_values) + na_count = mask.astype("uint8", copy=False).sum() + if na_count > 0: + if is_integer_dtype(values): + values = values.astype(np.float64) + np.putmask(values, mask, np.nan) + return values, na_count + + if try_num_bool and is_object_dtype(values.dtype): + # exclude e.g DatetimeIndex here + try: + result, _ = lib.maybe_convert_numeric(values, na_values, False) + except (ValueError, TypeError): + # e.g. encountering datetime string gets ValueError + # TypeError can be raised in floatify + result = values + na_count = parsers.sanitize_objects(result, na_values) + else: + na_count = isna(result).sum() + else: + result = values + if values.dtype == np.object_: + na_count = parsers.sanitize_objects(values, na_values) + + if result.dtype == np.object_ and try_num_bool: + result, _ = libops.maybe_convert_bool( + np.asarray(values), + true_values=self.true_values, + false_values=self.false_values, + ) + + return result, na_count + + def _cast_types(self, values, cast_type, column): + """ + Cast values to specified type + + Parameters + ---------- + values : ndarray + cast_type : string or np.dtype + dtype to cast values to + column : string + column name - used only for error reporting + + Returns + ------- + converted : ndarray + """ + if is_categorical_dtype(cast_type): + known_cats = ( + isinstance(cast_type, CategoricalDtype) + and cast_type.categories is not None + ) + + if not is_object_dtype(values) and not known_cats: + # TODO: this is for consistency with + # c-parser which parses all categories + # as strings + + values = astype_nansafe(values, np.dtype(str)) + + cats = Index(values).unique().dropna() + values = Categorical._from_inferred_categories( + cats, cats.get_indexer(values), cast_type, true_values=self.true_values + ) + + # use the EA's implementation of casting + elif is_extension_array_dtype(cast_type): + # ensure cast_type is an actual dtype and not a string + cast_type = pandas_dtype(cast_type) + array_type = cast_type.construct_array_type() + try: + if is_bool_dtype(cast_type): + return array_type._from_sequence_of_strings( + values, + dtype=cast_type, + true_values=self.true_values, + false_values=self.false_values, + ) + else: + return array_type._from_sequence_of_strings(values, dtype=cast_type) + except NotImplementedError as err: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + "_from_sequence_of_strings in order to be used in parser methods" + ) from err + + else: + try: + values = astype_nansafe(values, cast_type, copy=True, skipna=True) + except ValueError as err: + raise ValueError( + f"Unable to convert column {column} to type {cast_type}" + ) from err + return values + + @overload + def _do_date_conversions( + self, + names: Index, + data: DataFrame, + ) -> tuple[Sequence[Hashable] | Index, DataFrame]: + ... + + @overload + def _do_date_conversions( + self, + names: Sequence[Hashable], + data: Mapping[Hashable, ArrayLike], + ) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]: + ... + + def _do_date_conversions( + self, + names: Sequence[Hashable] | Index, + data: Mapping[Hashable, ArrayLike] | DataFrame, + ) -> tuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame]: + # returns data, columns + + if self.parse_dates is not None: + data, names = _process_date_conversion( + data, + self._date_conv, + self.parse_dates, + self.index_col, + self.index_names, + names, + keep_date_col=self.keep_date_col, + ) + + return names, data + + def _check_data_length( + self, + columns: Sequence[Hashable], + data: Sequence[ArrayLike], + ) -> None: + """Checks if length of data is equal to length of column names. + + One set of trailing commas is allowed. self.index_col not False + results in a ParserError previously when lengths do not match. + + Parameters + ---------- + columns: list of column names + data: list of array-likes containing the data column-wise. + """ + if not self.index_col and len(columns) != len(data) and columns: + empty_str = is_object_dtype(data[-1]) and data[-1] == "" + # error: No overload variant of "__ror__" of "ndarray" matches + # argument type "ExtensionArray" + empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator] + if len(columns) == len(data) - 1 and np.all(empty_str_or_na): + return + warnings.warn( + "Length of header or names does not match length of data. This leads " + "to a loss of data with index_col=False.", + ParserWarning, + stacklevel=find_stack_level(), + ) + + @overload + def _evaluate_usecols( + self, + usecols: set[int] | Callable[[Hashable], object], + names: Sequence[Hashable], + ) -> set[int]: + ... + + @overload + def _evaluate_usecols( + self, usecols: set[str], names: Sequence[Hashable] + ) -> set[str]: + ... + + def _evaluate_usecols( + self, + usecols: Callable[[Hashable], object] | set[str] | set[int], + names: Sequence[Hashable], + ) -> set[str] | set[int]: + """ + Check whether or not the 'usecols' parameter + is a callable. If so, enumerates the 'names' + parameter and returns a set of indices for + each entry in 'names' that evaluates to True. + If not a callable, returns 'usecols'. + """ + if callable(usecols): + return {i for i, name in enumerate(names) if usecols(name)} + return usecols + + def _validate_usecols_names(self, usecols, names): + """ + Validates that all usecols are present in a given + list of names. If not, raise a ValueError that + shows what usecols are missing. + + Parameters + ---------- + usecols : iterable of usecols + The columns to validate are present in names. + names : iterable of names + The column names to check against. + + Returns + ------- + usecols : iterable of usecols + The `usecols` parameter if the validation succeeds. + + Raises + ------ + ValueError : Columns were missing. Error message will list them. + """ + missing = [c for c in usecols if c not in names] + if len(missing) > 0: + raise ValueError( + f"Usecols do not match columns, columns expected but not found: " + f"{missing}" + ) + + return usecols + + def _validate_usecols_arg(self, usecols): + """ + Validate the 'usecols' parameter. + + Checks whether or not the 'usecols' parameter contains all integers + (column selection by index), strings (column by name) or is a callable. + Raises a ValueError if that is not the case. + + Parameters + ---------- + usecols : list-like, callable, or None + List of columns to use when parsing or a callable that can be used + to filter a list of table columns. + + Returns + ------- + usecols_tuple : tuple + A tuple of (verified_usecols, usecols_dtype). + + 'verified_usecols' is either a set if an array-like is passed in or + 'usecols' if a callable or None is passed in. + + 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like + is passed in or None if a callable or None is passed in. + """ + msg = ( + "'usecols' must either be list-like of all strings, all unicode, " + "all integers or a callable." + ) + if usecols is not None: + if callable(usecols): + return usecols, None + + if not is_list_like(usecols): + # see gh-20529 + # + # Ensure it is iterable container but not string. + raise ValueError(msg) + + usecols_dtype = lib.infer_dtype(usecols, skipna=False) + + if usecols_dtype not in ("empty", "integer", "string"): + raise ValueError(msg) + + usecols = set(usecols) + + return usecols, usecols_dtype + return usecols, None + + def _clean_index_names(self, columns, index_col): + if not is_index_col(index_col): + return None, columns, index_col + + columns = list(columns) + + # In case of no rows and multiindex columns we have to set index_names to + # list of Nones GH#38292 + if not columns: + return [None] * len(index_col), columns, index_col + + cp_cols = list(columns) + index_names: list[str | int | None] = [] + + # don't mutate + index_col = list(index_col) + + for i, c in enumerate(index_col): + if isinstance(c, str): + index_names.append(c) + for j, name in enumerate(cp_cols): + if name == c: + index_col[i] = j + columns.remove(name) + break + else: + name = cp_cols[c] + columns.remove(name) + index_names.append(name) + + # Only clean index names that were placeholders. + for i, name in enumerate(index_names): + if isinstance(name, str) and name in self.unnamed_cols: + index_names[i] = None + + return index_names, columns, index_col + + def _get_empty_meta( + self, columns, index_col, index_names, dtype: DtypeArg | None = None + ): + columns = list(columns) + + # Convert `dtype` to a defaultdict of some kind. + # This will enable us to write `dtype[col_name]` + # without worrying about KeyError issues later on. + dtype_dict: defaultdict[Hashable, Any] + if not is_dict_like(dtype): + # if dtype == None, default will be object. + default_dtype = dtype or object + dtype_dict = defaultdict(lambda: default_dtype) + else: + dtype = cast(dict, dtype) + dtype_dict = defaultdict( + lambda: object, + {columns[k] if is_integer(k) else k: v for k, v in dtype.items()}, + ) + + # Even though we have no data, the "index" of the empty DataFrame + # could for example still be an empty MultiIndex. Thus, we need to + # check whether we have any index columns specified, via either: + # + # 1) index_col (column indices) + # 2) index_names (column names) + # + # Both must be non-null to ensure a successful construction. Otherwise, + # we have to create a generic empty Index. + if (index_col is None or index_col is False) or index_names is None: + index = Index([]) + else: + data = [Series([], dtype=dtype_dict[name]) for name in index_names] + index = ensure_index_from_sequences(data, names=index_names) + index_col.sort() + + for i, n in enumerate(index_col): + columns.pop(n - i) + + col_dict = { + col_name: Series([], dtype=dtype_dict[col_name]) for col_name in columns + } + + return index, columns, col_dict + + +def _make_date_converter( + date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True +): + def converter(*date_cols): + if date_parser is None: + strs = parsing.concat_date_cols(date_cols) + + try: + return tools.to_datetime( + ensure_object(strs), + utc=None, + dayfirst=dayfirst, + errors="ignore", + infer_datetime_format=infer_datetime_format, + cache=cache_dates, + ).to_numpy() + + except ValueError: + return tools.to_datetime( + parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates + ) + else: + try: + result = tools.to_datetime( + date_parser(*date_cols), errors="ignore", cache=cache_dates + ) + if isinstance(result, datetime.datetime): + raise Exception("scalar parser") + return result + except Exception: + try: + return tools.to_datetime( + parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + dayfirst=dayfirst, + ), + errors="ignore", + ) + except Exception: + return generic_parser(date_parser, *date_cols) + + return converter + + +parser_defaults = { + "delimiter": None, + "escapechar": None, + "quotechar": '"', + "quoting": csv.QUOTE_MINIMAL, + "doublequote": True, + "skipinitialspace": False, + "lineterminator": None, + "header": "infer", + "index_col": None, + "names": None, + "prefix": None, + "skiprows": None, + "skipfooter": 0, + "nrows": None, + "na_values": None, + "keep_default_na": True, + "true_values": None, + "false_values": None, + "converters": None, + "dtype": None, + "cache_dates": True, + "thousands": None, + "comment": None, + "decimal": ".", + # 'engine': 'c', + "parse_dates": False, + "keep_date_col": False, + "dayfirst": False, + "date_parser": None, + "usecols": None, + # 'iterator': False, + "chunksize": None, + "verbose": False, + "encoding": None, + "squeeze": None, + "compression": None, + "mangle_dupe_cols": True, + "infer_datetime_format": False, + "skip_blank_lines": True, + "encoding_errors": "strict", + "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR, + "error_bad_lines": None, + "warn_bad_lines": None, +} + + +def _process_date_conversion( + data_dict, + converter: Callable, + parse_spec, + index_col, + index_names, + columns, + keep_date_col: bool = False, +): + def _isindex(colspec): + return (isinstance(index_col, list) and colspec in index_col) or ( + isinstance(index_names, list) and colspec in index_names + ) + + new_cols = [] + new_data = {} + + orig_names = columns + columns = list(columns) + + date_cols = set() + + if parse_spec is None or isinstance(parse_spec, bool): + return data_dict, columns + + if isinstance(parse_spec, list): + # list of column lists + for colspec in parse_spec: + if is_scalar(colspec) or isinstance(colspec, tuple): + if isinstance(colspec, int) and colspec not in data_dict: + colspec = orig_names[colspec] + if _isindex(colspec): + continue + # Pyarrow engine returns Series which we need to convert to + # numpy array before converter, its a no-op for other parsers + data_dict[colspec] = converter(np.asarray(data_dict[colspec])) + else: + new_name, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + if new_name in data_dict: + raise ValueError(f"New date column already in dict {new_name}") + new_data[new_name] = col + new_cols.append(new_name) + date_cols.update(old_names) + + elif isinstance(parse_spec, dict): + # dict of new name to column list + for new_name, colspec in parse_spec.items(): + if new_name in data_dict: + raise ValueError(f"Date column {new_name} already in dict") + + _, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + + new_data[new_name] = col + + # If original column can be converted to date we keep the converted values + # This can only happen if values are from single column + if len(colspec) == 1: + new_data[colspec[0]] = col + + new_cols.append(new_name) + date_cols.update(old_names) + + data_dict.update(new_data) + new_cols.extend(columns) + + if not keep_date_col: + for c in list(date_cols): + data_dict.pop(c) + new_cols.remove(c) + + return data_dict, new_cols + + +def _try_convert_dates(parser: Callable, colspec, data_dict, columns): + colset = set(columns) + colnames = [] + + for c in colspec: + if c in colset: + colnames.append(c) + elif isinstance(c, int) and c not in columns: + colnames.append(columns[c]) + else: + colnames.append(c) + + new_name: tuple | str + if all(isinstance(x, tuple) for x in colnames): + new_name = tuple(map("_".join, zip(*colnames))) + else: + new_name = "_".join([str(x) for x in colnames]) + to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict] + + new_col = parser(*to_parse) + return new_name, new_col, colnames + + +def _get_na_values(col, na_values, na_fvalues, keep_default_na): + """ + Get the NaN values for a given column. + + Parameters + ---------- + col : str + The name of the column. + na_values : array-like, dict + The object listing the NaN values as strings. + na_fvalues : array-like, dict + The object listing the NaN values as floats. + keep_default_na : bool + If `na_values` is a dict, and the column is not mapped in the + dictionary, whether to return the default NaN values or the empty set. + + Returns + ------- + nan_tuple : A length-two tuple composed of + + 1) na_values : the string NaN values for that column. + 2) na_fvalues : the float NaN values for that column. + """ + if isinstance(na_values, dict): + if col in na_values: + return na_values[col], na_fvalues[col] + else: + if keep_default_na: + return STR_NA_VALUES, set() + + return set(), set() + else: + return na_values, na_fvalues + + +def _is_potential_multi_index( + columns: Sequence[Hashable] | MultiIndex, + index_col: bool | Sequence[int] | None = None, +) -> bool: + """ + Check whether or not the `columns` parameter + could be converted into a MultiIndex. + + Parameters + ---------- + columns : array-like + Object which may or may not be convertible into a MultiIndex + index_col : None, bool or list, optional + Column or columns to use as the (possibly hierarchical) index + + Returns + ------- + bool : Whether or not columns could become a MultiIndex + """ + if index_col is None or isinstance(index_col, bool): + index_col = [] + + return bool( + len(columns) + and not isinstance(columns, MultiIndex) + and all(isinstance(c, tuple) for c in columns if c not in list(index_col)) + ) + + +def _validate_parse_dates_arg(parse_dates): + """ + Check whether or not the 'parse_dates' parameter + is a non-boolean scalar. Raises a ValueError if + that is the case. + """ + msg = ( + "Only booleans, lists, and dictionaries are accepted " + "for the 'parse_dates' parameter" + ) + + if parse_dates is not None: + if is_scalar(parse_dates): + if not lib.is_bool(parse_dates): + raise TypeError(msg) + + elif not isinstance(parse_dates, (list, dict)): + raise TypeError(msg) + + return parse_dates + + +def is_index_col(col) -> bool: + return col is not None and col is not False diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py new file mode 100644 index 00000000..874eaee1 --- /dev/null +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -0,0 +1,434 @@ +from __future__ import annotations + +from collections import defaultdict +from typing import ( + TYPE_CHECKING, + Hashable, + Mapping, + Sequence, +) +import warnings + +import numpy as np + +import pandas._libs.parsers as parsers +from pandas._typing import ( + ArrayLike, + DtypeArg, + DtypeObj, + ReadCsvBuffer, +) +from pandas.errors import DtypeWarning +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + pandas_dtype, +) +from pandas.core.dtypes.concat import union_categoricals +from pandas.core.dtypes.dtypes import ExtensionDtype + +from pandas.core.indexes.api import ensure_index_from_sequences + +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, +) + +if TYPE_CHECKING: + from pandas import ( + Index, + MultiIndex, + ) + + +class CParserWrapper(ParserBase): + low_memory: bool + _reader: parsers.TextReader + + def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: + super().__init__(kwds) + self.kwds = kwds + kwds = kwds.copy() + + self.low_memory = kwds.pop("low_memory", False) + + # #2442 + # error: Cannot determine type of 'index_col' + kwds["allow_leading_cols"] = ( + self.index_col is not False # type: ignore[has-type] + ) + + # GH20529, validate usecol arg before TextReader + kwds["usecols"] = self.usecols + + # Have to pass int, would break tests using TextReader directly otherwise :( + kwds["on_bad_lines"] = self.on_bad_lines.value + + for key in ( + "storage_options", + "encoding", + "memory_map", + "compression", + "error_bad_lines", + "warn_bad_lines", + ): + kwds.pop(key, None) + + kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None)) + self._reader = parsers.TextReader(src, **kwds) + + self.unnamed_cols = self._reader.unnamed_cols + + # error: Cannot determine type of 'names' + passed_names = self.names is None # type: ignore[has-type] + + if self._reader.header is None: + self.names = None + else: + # error: Cannot determine type of 'names' + # error: Cannot determine type of 'index_names' + ( + self.names, # type: ignore[has-type] + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( + self._reader.header, + self.index_names, # type: ignore[has-type] + passed_names, + ) + + # error: Cannot determine type of 'names' + if self.names is None: # type: ignore[has-type] + if self.prefix: + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] + f"{self.prefix}{i}" for i in range(self._reader.table_width) + ] + else: + # error: Cannot determine type of 'names' + self.names = list( # type: ignore[has-type] + range(self._reader.table_width) + ) + + # gh-9755 + # + # need to set orig_names here first + # so that proper indexing can be done + # with _set_noconvert_columns + # + # once names has been filtered, we will + # then set orig_names again to names + # error: Cannot determine type of 'names' + self.orig_names = self.names[:] # type: ignore[has-type] + + if self.usecols: + usecols = self._evaluate_usecols(self.usecols, self.orig_names) + + # GH 14671 + # assert for mypy, orig_names is List or None, None would error in issubset + assert self.orig_names is not None + if self.usecols_dtype == "string" and not set(usecols).issubset( + self.orig_names + ): + self._validate_usecols_names(usecols, self.orig_names) + + # error: Cannot determine type of 'names' + if len(self.names) > len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] + n + # error: Cannot determine type of 'names' + for i, n in enumerate(self.names) # type: ignore[has-type] + if (i in usecols or n in usecols) + ] + + # error: Cannot determine type of 'names' + if len(self.names) < len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self._validate_usecols_names( + usecols, + self.names, # type: ignore[has-type] + ) + + # error: Cannot determine type of 'names' + self._validate_parse_dates_presence(self.names) # type: ignore[has-type] + self._set_noconvert_columns() + + # error: Cannot determine type of 'names' + self.orig_names = self.names # type: ignore[has-type] + + if not self._has_complex_date_col: + # error: Cannot determine type of 'index_col' + if self._reader.leading_cols == 0 and is_index_col( + self.index_col # type: ignore[has-type] + ): + + self._name_processed = True + ( + index_names, + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + self.index_col, + ) = self._clean_index_names( + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + # error: Cannot determine type of 'index_col' + self.index_col, # type: ignore[has-type] + ) + + if self.index_names is None: + self.index_names = index_names + + if self._reader.header is None and not passed_names: + assert self.index_names is not None + self.index_names = [None] * len(self.index_names) + + self._implicit_index = self._reader.leading_cols > 0 + + def close(self) -> None: + # close handles opened by C parser + try: + self._reader.close() + except ValueError: + pass + + def _set_noconvert_columns(self) -> None: + """ + Set the columns that should not undergo dtype conversions. + + Currently, any column that is involved with date parsing will not + undergo such conversions. + """ + assert self.orig_names is not None + # error: Cannot determine type of 'names' + + # much faster than using orig_names.index(x) xref GH#44106 + names_dict = {x: i for i, x in enumerate(self.orig_names)} + col_indices = [names_dict[x] for x in self.names] # type: ignore[has-type] + # error: Cannot determine type of 'names' + noconvert_columns = self._set_noconvert_dtype_columns( + col_indices, + self.names, # type: ignore[has-type] + ) + for col in noconvert_columns: + self._reader.set_noconvert(col) + + def read( + self, + nrows: int | None = None, + ) -> tuple[ + Index | MultiIndex | None, + Sequence[Hashable] | MultiIndex, + Mapping[Hashable, ArrayLike], + ]: + index: Index | MultiIndex | None + column_names: Sequence[Hashable] | MultiIndex + try: + if self.low_memory: + chunks = self._reader.read_low_memory(nrows) + # destructive to chunks + data = _concatenate_chunks(chunks) + + else: + data = self._reader.read(nrows) + except StopIteration: + if self._first_chunk: + self._first_chunk = False + names = self._maybe_dedup_names(self.orig_names) + index, columns, col_dict = self._get_empty_meta( + names, + self.index_col, + self.index_names, + dtype=self.kwds.get("dtype"), + ) + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + if self.usecols is not None: + columns = self._filter_usecols(columns) + + col_dict = {k: v for k, v in col_dict.items() if k in columns} + + return index, columns, col_dict + + else: + self.close() + raise + + # Done with first read, next time raise StopIteration + self._first_chunk = False + + # error: Cannot determine type of 'names' + names = self.names # type: ignore[has-type] + + if self._reader.leading_cols: + if self._has_complex_date_col: + raise NotImplementedError("file structure not yet supported") + + # implicit index, no index names + arrays = [] + + for i in range(self._reader.leading_cols): + if self.index_col is None: + values = data.pop(i) + else: + values = data.pop(self.index_col[i]) + + values = self._maybe_parse_dates(values, i, try_parse_dates=True) + arrays.append(values) + + index = ensure_index_from_sequences(arrays) + + if self.usecols is not None: + names = self._filter_usecols(names) + + names = self._maybe_dedup_names(names) + + # rename dict keys + data_tups = sorted(data.items()) + data = {k: v for k, (i, v) in zip(names, data_tups)} + + column_names, date_data = self._do_date_conversions(names, data) + + # maybe create a mi on the columns + column_names = self._maybe_make_multi_index_columns( + column_names, self.col_names + ) + + else: + # rename dict keys + data_tups = sorted(data.items()) + + # ugh, mutation + + # assert for mypy, orig_names is List or None, None would error in list(...) + assert self.orig_names is not None + names = list(self.orig_names) + names = self._maybe_dedup_names(names) + + if self.usecols is not None: + names = self._filter_usecols(names) + + # columns as list + alldata = [x[1] for x in data_tups] + if self.usecols is None: + self._check_data_length(names, alldata) + + data = {k: v for k, (i, v) in zip(names, data_tups)} + + names, date_data = self._do_date_conversions(names, data) + index, column_names = self._make_index(date_data, alldata, names) + + return index, column_names, date_data + + def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: + # hackish + usecols = self._evaluate_usecols(self.usecols, names) + if usecols is not None and len(names) != len(usecols): + names = [ + name for i, name in enumerate(names) if i in usecols or name in usecols + ] + return names + + def _get_index_names(self): + names = list(self._reader.header[0]) + idx_names = None + + if self._reader.leading_cols == 0 and self.index_col is not None: + (idx_names, names, self.index_col) = self._clean_index_names( + names, self.index_col + ) + + return names, idx_names + + def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True): + if try_parse_dates and self._should_parse_dates(index): + values = self._date_conv(values) + return values + + +def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: + """ + Concatenate chunks of data read with low_memory=True. + + The tricky part is handling Categoricals, where different chunks + may have different inferred categories. + """ + names = list(chunks[0].keys()) + warning_columns = [] + + result: dict = {} + for name in names: + arrs = [chunk.pop(name) for chunk in chunks] + # Check each arr for consistent types. + dtypes = {a.dtype for a in arrs} + # TODO: shouldn't we exclude all EA dtypes here? + numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)} + if len(numpy_dtypes) > 1: + # error: Argument 1 to "find_common_type" has incompatible type + # "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type, + # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, + # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]" + common_type = np.find_common_type( + numpy_dtypes, # type: ignore[arg-type] + [], + ) + if common_type == np.dtype(object): + warning_columns.append(str(name)) + + dtype = dtypes.pop() + if is_categorical_dtype(dtype): + result[name] = union_categoricals(arrs, sort_categories=False) + else: + if isinstance(dtype, ExtensionDtype): + # TODO: concat_compat? + array_type = dtype.construct_array_type() + # error: Argument 1 to "_concat_same_type" of "ExtensionArray" + # has incompatible type "List[Union[ExtensionArray, ndarray]]"; + # expected "Sequence[ExtensionArray]" + result[name] = array_type._concat_same_type( + arrs # type: ignore[arg-type] + ) + else: + # error: Argument 1 to "concatenate" has incompatible + # type "List[Union[ExtensionArray, ndarray[Any, Any]]]" + # ; expected "Union[_SupportsArray[dtype[Any]], + # Sequence[_SupportsArray[dtype[Any]]], + # Sequence[Sequence[_SupportsArray[dtype[Any]]]], + # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]] + # , Sequence[Sequence[Sequence[Sequence[ + # _SupportsArray[dtype[Any]]]]]]]" + result[name] = np.concatenate(arrs) # type: ignore[arg-type] + + if warning_columns: + warning_names = ",".join(warning_columns) + warning_message = " ".join( + [ + f"Columns ({warning_names}) have mixed types. " + f"Specify dtype option on import or set low_memory=False." + ] + ) + warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level()) + return result + + +def ensure_dtype_objs( + dtype: DtypeArg | dict[Hashable, DtypeArg] | None +) -> DtypeObj | dict[Hashable, DtypeObj] | None: + """ + Ensure we have either None, a dtype object, or a dictionary mapping to + dtype objects. + """ + if isinstance(dtype, defaultdict): + # "None" not callable [misc] + default_dtype = pandas_dtype(dtype.default_factory()) # type: ignore[misc] + dtype_converted: defaultdict = defaultdict(lambda: default_dtype) + for key in dtype.keys(): + dtype_converted[key] = pandas_dtype(dtype[key]) + return dtype_converted + elif isinstance(dtype, dict): + return {k: pandas_dtype(dtype[k]) for k in dtype} + elif dtype is not None: + return pandas_dtype(dtype) + return dtype diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py new file mode 100644 index 00000000..7c03a81d --- /dev/null +++ b/pandas/io/parsers/python_parser.py @@ -0,0 +1,1344 @@ +from __future__ import annotations + +from collections import ( + abc, + defaultdict, +) +import csv +from io import StringIO +import re +import sys +from typing import ( + IO, + TYPE_CHECKING, + DefaultDict, + Hashable, + Iterator, + List, + Literal, + Mapping, + Sequence, + cast, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + ArrayLike, + ReadCsvBuffer, + Scalar, +) +from pandas.errors import ( + EmptyDataError, + ParserError, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.inference import is_dict_like + +from pandas.io.parsers.base_parser import ( + ParserBase, + parser_defaults, +) + +if TYPE_CHECKING: + from pandas import ( + Index, + MultiIndex, + ) + +# BOM character (byte order mark) +# This exists at the beginning of a file to indicate endianness +# of a file (stream). Unfortunately, this marker screws up parsing, +# so we need to remove it if we see it. +_BOM = "\ufeff" + + +class PythonParser(ParserBase): + def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None: + """ + Workhorse function for processing nested list into DataFrame + """ + super().__init__(kwds) + + self.data: Iterator[str] | None = None + self.buf: list = [] + self.pos = 0 + self.line_pos = 0 + + self.skiprows = kwds["skiprows"] + + if callable(self.skiprows): + self.skipfunc = self.skiprows + else: + self.skipfunc = lambda x: x in self.skiprows + + self.skipfooter = _validate_skipfooter_arg(kwds["skipfooter"]) + self.delimiter = kwds["delimiter"] + + self.quotechar = kwds["quotechar"] + if isinstance(self.quotechar, str): + self.quotechar = str(self.quotechar) + + self.escapechar = kwds["escapechar"] + self.doublequote = kwds["doublequote"] + self.skipinitialspace = kwds["skipinitialspace"] + self.lineterminator = kwds["lineterminator"] + self.quoting = kwds["quoting"] + self.skip_blank_lines = kwds["skip_blank_lines"] + + self.names_passed = kwds["names"] or None + + self.has_index_names = False + if "has_index_names" in kwds: + self.has_index_names = kwds["has_index_names"] + + self.verbose = kwds["verbose"] + + self.thousands = kwds["thousands"] + self.decimal = kwds["decimal"] + + self.comment = kwds["comment"] + + # Set self.data to something that can read lines. + if isinstance(f, list): + # read_excel: f is a list + self.data = cast(Iterator[str], f) + else: + assert hasattr(f, "readline") + self._make_reader(f) + + # Get columns in two steps: infer from data, then + # infer column indices from self.usecols if it is specified. + self._col_indices: list[int] | None = None + columns: list[list[Scalar | None]] + ( + columns, + self.num_original_columns, + self.unnamed_cols, + ) = self._infer_columns() + + # Now self.columns has the set of columns that we will process. + # The original set is stored in self.original_columns. + # error: Cannot determine type of 'index_names' + self.columns: list[Hashable] + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( + columns, + self.index_names, # type: ignore[has-type] + ) + + # get popped off for index + self.orig_names: list[Hashable] = list(self.columns) + + # needs to be cleaned/refactored + # multiple date column thing turning into a real spaghetti factory + + if not self._has_complex_date_col: + (index_names, self.orig_names, self.columns) = self._get_index_name( + self.columns + ) + self._name_processed = True + if self.index_names is None: + self.index_names = index_names + + if self._col_indices is None: + self._col_indices = list(range(len(self.columns))) + + self._parse_date_cols = self._validate_parse_dates_presence(self.columns) + no_thousands_columns: set[int] | None = None + if self.parse_dates: + no_thousands_columns = self._set_noconvert_dtype_columns( + self._col_indices, self.columns + ) + self._no_thousands_columns = no_thousands_columns + + if len(self.decimal) != 1: + raise ValueError("Only length-1 decimal markers supported") + + decimal = re.escape(self.decimal) + if self.thousands is None: + regex = rf"^[\-\+]?[0-9]*({decimal}[0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$" + else: + thousands = re.escape(self.thousands) + regex = ( + rf"^[\-\+]?([0-9]+{thousands}|[0-9])*({decimal}[0-9]*)?" + rf"([0-9]?(E|e)\-?[0-9]+)?$" + ) + self.num = re.compile(regex) + + def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: + sep = self.delimiter + + if sep is None or len(sep) == 1: + if self.lineterminator: + raise ValueError( + "Custom line terminators not supported in python parser (yet)" + ) + + class MyDialect(csv.Dialect): + delimiter = self.delimiter + quotechar = self.quotechar + escapechar = self.escapechar + doublequote = self.doublequote + skipinitialspace = self.skipinitialspace + quoting = self.quoting + lineterminator = "\n" + + dia = MyDialect + + if sep is not None: + dia.delimiter = sep + else: + # attempt to sniff the delimiter from the first valid line, + # i.e. no comment line and not in skiprows + line = f.readline() + lines = self._check_comments([[line]])[0] + while self.skipfunc(self.pos) or not lines: + self.pos += 1 + line = f.readline() + lines = self._check_comments([[line]])[0] + lines_str = cast(List[str], lines) + + # since `line` was a string, lines will be a list containing + # only a single string + line = lines_str[0] + + self.pos += 1 + self.line_pos += 1 + sniffed = csv.Sniffer().sniff(line) + dia.delimiter = sniffed.delimiter + + # Note: encoding is irrelevant here + line_rdr = csv.reader(StringIO(line), dialect=dia) + self.buf.extend(list(line_rdr)) + + # Note: encoding is irrelevant here + reader = csv.reader(f, dialect=dia, strict=True) + + else: + + def _read(): + line = f.readline() + pat = re.compile(sep) + + yield pat.split(line.strip()) + + for line in f: + yield pat.split(line.strip()) + + reader = _read() + + # error: Incompatible types in assignment (expression has type "_reader", + # variable has type "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap, None]") + self.data = reader # type: ignore[assignment] + + def read( + self, rows: int | None = None + ) -> tuple[ + Index | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike] + ]: + try: + content = self._get_lines(rows) + except StopIteration: + if self._first_chunk: + content = [] + else: + self.close() + raise + + # done with first read, next time raise StopIteration + self._first_chunk = False + + columns: Sequence[Hashable] = list(self.orig_names) + if not len(content): # pragma: no cover + # DataFrame with the right metadata, even though it's length 0 + names = self._maybe_dedup_names(self.orig_names) + # error: Cannot determine type of 'index_col' + index, columns, col_dict = self._get_empty_meta( + names, + self.index_col, # type: ignore[has-type] + self.index_names, + self.dtype, + ) + conv_columns = self._maybe_make_multi_index_columns(columns, self.col_names) + return index, conv_columns, col_dict + + # handle new style for names in index + count_empty_content_vals = count_empty_vals(content[0]) + indexnamerow = None + if self.has_index_names and count_empty_content_vals == len(columns): + indexnamerow = content[0] + content = content[1:] + + alldata = self._rows_to_cols(content) + data, columns = self._exclude_implicit_index(alldata) + + conv_data = self._convert_data(data) + columns, conv_data = self._do_date_conversions(columns, conv_data) + + index, result_columns = self._make_index( + conv_data, alldata, columns, indexnamerow + ) + + return index, result_columns, conv_data + + def _exclude_implicit_index( + self, + alldata: list[np.ndarray], + ) -> tuple[Mapping[Hashable, np.ndarray], Sequence[Hashable]]: + names = self._maybe_dedup_names(self.orig_names) + + offset = 0 + if self._implicit_index: + # error: Cannot determine type of 'index_col' + offset = len(self.index_col) # type: ignore[has-type] + + len_alldata = len(alldata) + self._check_data_length(names, alldata) + + return { + name: alldata[i + offset] for i, name in enumerate(names) if i < len_alldata + }, names + + # legacy + def get_chunk( + self, size: int | None = None + ) -> tuple[ + Index | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike] + ]: + if size is None: + # error: "PythonParser" has no attribute "chunksize" + size = self.chunksize # type: ignore[attr-defined] + return self.read(rows=size) + + def _convert_data( + self, + data: Mapping[Hashable, np.ndarray], + ) -> Mapping[Hashable, ArrayLike]: + # apply converters + clean_conv = self._clean_mapping(self.converters) + clean_dtypes = self._clean_mapping(self.dtype) + + # Apply NA values. + clean_na_values = {} + clean_na_fvalues = {} + + if isinstance(self.na_values, dict): + for col in self.na_values: + na_value = self.na_values[col] + na_fvalue = self.na_fvalues[col] + + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + + clean_na_values[col] = na_value + clean_na_fvalues[col] = na_fvalue + else: + clean_na_values = self.na_values + clean_na_fvalues = self.na_fvalues + + return self._convert_to_ndarrays( + data, + clean_na_values, + clean_na_fvalues, + self.verbose, + clean_conv, + clean_dtypes, + ) + + def _infer_columns( + self, + ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: + names = self.names + num_original_columns = 0 + clear_buffer = True + unnamed_cols: set[Scalar | None] = set() + self._header_line = None + + if self.header is not None: + header = self.header + + if isinstance(header, (list, tuple, np.ndarray)): + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] + else: + have_mi_columns = False + header = [header] + + columns: list[list[Scalar | None]] = [] + for level, hr in enumerate(header): + try: + line = self._buffered_line() + + while self.line_pos <= hr: + line = self._next_line() + + except StopIteration as err: + if 0 < self.line_pos <= hr and ( + not have_mi_columns or hr != header[-1] + ): + # If no rows we want to raise a different message and if + # we have mi columns, the last line is not part of the header + joi = list(map(str, header[:-1] if have_mi_columns else header)) + msg = f"[{','.join(joi)}], len of {len(joi)}, " + raise ValueError( + f"Passed header={msg}" + f"but only {self.line_pos} lines in file" + ) from err + + # We have an empty file, so check + # if columns are provided. That will + # serve as the 'line' for parsing + if have_mi_columns and hr > 0: + if clear_buffer: + self._clear_buffer() + columns.append([None] * len(columns[-1])) + return columns, num_original_columns, unnamed_cols + + if not self.names: + raise EmptyDataError("No columns to parse from file") from err + + line = self.names[:] + + this_columns: list[Scalar | None] = [] + this_unnamed_cols = [] + + for i, c in enumerate(line): + if c == "": + if have_mi_columns: + col_name = f"Unnamed: {i}_level_{level}" + else: + col_name = f"Unnamed: {i}" + + this_unnamed_cols.append(i) + this_columns.append(col_name) + else: + this_columns.append(c) + + if not have_mi_columns and self.mangle_dupe_cols: + counts: DefaultDict = defaultdict(int) + # Ensure that regular columns are used before unnamed ones + # to keep given names and mangle unnamed columns + col_loop_order = [ + i + for i in range(len(this_columns)) + if i not in this_unnamed_cols + ] + this_unnamed_cols + + for i in col_loop_order: + col = this_columns[i] + old_col = col + cur_count = counts[col] + + if cur_count > 0: + while cur_count > 0: + counts[old_col] = cur_count + 1 + col = f"{old_col}.{cur_count}" + if col in this_columns: + cur_count += 1 + else: + cur_count = counts[col] + + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_col) is not None + and self.dtype.get(col) is None + ): + self.dtype.update({col: self.dtype.get(old_col)}) + this_columns[i] = col + counts[col] = cur_count + 1 + elif have_mi_columns: + + # if we have grabbed an extra line, but its not in our + # format so save in the buffer, and create an blank extra + # line for the rest of the parsing code + if hr == header[-1]: + lc = len(this_columns) + # error: Cannot determine type of 'index_col' + sic = self.index_col # type: ignore[has-type] + ic = len(sic) if sic is not None else 0 + unnamed_count = len(this_unnamed_cols) + + # if wrong number of blanks or no index, not our format + if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0: + clear_buffer = False + this_columns = [None] * lc + self.buf = [self.buf[-1]] + + columns.append(this_columns) + unnamed_cols.update({this_columns[i] for i in this_unnamed_cols}) + + if len(columns) == 1: + num_original_columns = len(this_columns) + + if clear_buffer: + self._clear_buffer() + + first_line: list[Scalar] | None + if names is not None: + # Read first row after header to check if data are longer + try: + first_line = self._next_line() + except StopIteration: + first_line = None + + len_first_data_row = 0 if first_line is None else len(first_line) + + if len(names) > len(columns[0]) and len(names) > len_first_data_row: + raise ValueError( + "Number of passed names did not match " + "number of header fields in the file" + ) + if len(columns) > 1: + raise TypeError("Cannot pass names with multi-index columns") + + if self.usecols is not None: + # Set _use_cols. We don't store columns because they are + # overwritten. + self._handle_usecols(columns, names, num_original_columns) + else: + num_original_columns = len(names) + if self._col_indices is not None and len(names) != len( + self._col_indices + ): + columns = [[names[i] for i in sorted(self._col_indices)]] + else: + columns = [names] + else: + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) + else: + try: + line = self._buffered_line() + + except StopIteration as err: + if not names: + raise EmptyDataError("No columns to parse from file") from err + + line = names[:] + + # Store line, otherwise it is lost for guessing the index + self._header_line = line + ncols = len(line) + num_original_columns = ncols + + if not names: + if self.prefix: + columns = [[f"{self.prefix}{i}" for i in range(ncols)]] + else: + columns = [list(range(ncols))] + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) + else: + if self.usecols is None or len(names) >= num_original_columns: + columns = self._handle_usecols([names], names, num_original_columns) + num_original_columns = len(names) + else: + if not callable(self.usecols) and len(names) != len(self.usecols): + raise ValueError( + "Number of passed names did not match number of " + "header fields in the file" + ) + # Ignore output but set used columns. + self._handle_usecols([names], names, ncols) + columns = [names] + num_original_columns = ncols + + return columns, num_original_columns, unnamed_cols + + def _handle_usecols( + self, + columns: list[list[Scalar | None]], + usecols_key: list[Scalar | None], + num_original_columns: int, + ) -> list[list[Scalar | None]]: + """ + Sets self._col_indices + + usecols_key is used if there are string usecols. + """ + col_indices: set[int] | list[int] + if self.usecols is not None: + if callable(self.usecols): + col_indices = self._evaluate_usecols(self.usecols, usecols_key) + elif any(isinstance(u, str) for u in self.usecols): + if len(columns) > 1: + raise ValueError( + "If using multiple headers, usecols must be integers." + ) + col_indices = [] + + for col in self.usecols: + if isinstance(col, str): + try: + col_indices.append(usecols_key.index(col)) + except ValueError: + self._validate_usecols_names(self.usecols, usecols_key) + else: + col_indices.append(col) + else: + missing_usecols = [ + col for col in self.usecols if col >= num_original_columns + ] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + col_indices = self.usecols + + columns = [ + [n for i, n in enumerate(column) if i in col_indices] + for column in columns + ] + self._col_indices = sorted(col_indices) + return columns + + def _buffered_line(self) -> list[Scalar]: + """ + Return a line from buffer, filling buffer if required. + """ + if len(self.buf) > 0: + return self.buf[0] + else: + return self._next_line() + + def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]: + """ + Checks whether the file begins with the BOM character. + If it does, remove it. In addition, if there is quoting + in the field subsequent to the BOM, remove it as well + because it technically takes place at the beginning of + the name, not the middle of it. + """ + # first_row will be a list, so we need to check + # that that list is not empty before proceeding. + if not first_row: + return first_row + + # The first element of this row is the one that could have the + # BOM that we want to remove. Check that the first element is a + # string before proceeding. + if not isinstance(first_row[0], str): + return first_row + + # Check that the string is not empty, as that would + # obviously not have a BOM at the start of it. + if not first_row[0]: + return first_row + + # Since the string is non-empty, check that it does + # in fact begin with a BOM. + first_elt = first_row[0][0] + if first_elt != _BOM: + return first_row + + first_row_bom = first_row[0] + new_row: str + + if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar: + start = 2 + quote = first_row_bom[1] + end = first_row_bom[2:].index(quote) + 2 + + # Extract the data between the quotation marks + new_row = first_row_bom[start:end] + + # Extract any remaining data after the second + # quotation mark. + if len(first_row_bom) > end + 1: + new_row += first_row_bom[end + 1 :] + + else: + + # No quotation so just remove BOM from first element + new_row = first_row_bom[1:] + + new_row_list: list[Scalar] = [new_row] + return new_row_list + first_row[1:] + + def _is_line_empty(self, line: list[Scalar]) -> bool: + """ + Check if a line is empty or not. + + Parameters + ---------- + line : str, array-like + The line of data to check. + + Returns + ------- + boolean : Whether or not the line is empty. + """ + return not line or all(not x for x in line) + + def _next_line(self) -> list[Scalar]: + if isinstance(self.data, list): + while self.skipfunc(self.pos): + if self.pos >= len(self.data): + break + self.pos += 1 + + while True: + try: + line = self._check_comments([self.data[self.pos]])[0] + self.pos += 1 + # either uncommented or blank to begin with + if not self.skip_blank_lines and ( + self._is_line_empty(self.data[self.pos - 1]) or line + ): + break + elif self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + if ret: + line = ret[0] + break + except IndexError: + raise StopIteration + else: + while self.skipfunc(self.pos): + self.pos += 1 + # assert for mypy, data is Iterator[str] or None, would error in next + assert self.data is not None + next(self.data) + + while True: + orig_line = self._next_iter_line(row_num=self.pos + 1) + self.pos += 1 + + if orig_line is not None: + line = self._check_comments([orig_line])[0] + + if self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + + if ret: + line = ret[0] + break + elif self._is_line_empty(orig_line) or line: + break + + # This was the first line of the file, + # which could contain the BOM at the + # beginning of it. + if self.pos == 1: + line = self._check_for_bom(line) + + self.line_pos += 1 + self.buf.append(line) + return line + + def _alert_malformed(self, msg: str, row_num: int) -> None: + """ + Alert a user about a malformed row, depending on value of + `self.on_bad_lines` enum. + + If `self.on_bad_lines` is ERROR, the alert will be `ParserError`. + If `self.on_bad_lines` is WARN, the alert will be printed out. + + Parameters + ---------- + msg: str + The error message to display. + row_num: int + The row number where the parsing error occurred. + Because this row number is displayed, we 1-index, + even though we 0-index internally. + """ + if self.on_bad_lines == self.BadLineHandleMethod.ERROR: + raise ParserError(msg) + elif self.on_bad_lines == self.BadLineHandleMethod.WARN: + base = f"Skipping line {row_num}: " + sys.stderr.write(base + msg + "\n") + + def _next_iter_line(self, row_num: int) -> list[Scalar] | None: + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + row_num: int + The row number of the line being parsed. + """ + try: + # assert for mypy, data is Iterator[str] or None, would error in next + assert self.data is not None + line = next(self.data) + # for mypy + assert isinstance(line, list) + return line + except csv.Error as e: + if ( + self.on_bad_lines == self.BadLineHandleMethod.ERROR + or self.on_bad_lines == self.BadLineHandleMethod.WARN + ): + msg = str(e) + + if "NULL byte" in msg or "line contains NUL" in msg: + msg = ( + "NULL byte detected. This byte " + "cannot be processed in Python's " + "native csv library at the moment, " + "so please pass in engine='c' instead" + ) + + if self.skipfooter > 0: + reason = ( + "Error could possibly be due to " + "parsing errors in the skipped footer rows " + "(the skipfooter keyword is only applied " + "after Python's csv library has parsed " + "all rows)." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num) + return None + + def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.comment is None: + return lines + ret = [] + for line in lines: + rl = [] + for x in line: + if ( + not isinstance(x, str) + or self.comment not in x + or x in self.na_values + ): + rl.append(x) + else: + x = x[: x.find(self.comment)] + if len(x) > 0: + rl.append(x) + break + ret.append(rl) + return ret + + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + """ + Iterate through the lines and remove any that are + either empty or contain only one whitespace value + + Parameters + ---------- + lines : list of list of Scalars + The array of lines that we are to filter. + + Returns + ------- + filtered_lines : list of list of Scalars + The same array of lines with the "empty" ones removed. + """ + ret = [] + for line in lines: + # Remove empty lines and lines with only one whitespace value + if ( + len(line) > 1 + or len(line) == 1 + and (not isinstance(line[0], str) or line[0].strip()) + ): + ret.append(line) + return ret + + def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.thousands is None: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.thousands, replace="" + ) + + def _search_replace_num_columns( + self, lines: list[list[Scalar]], search: str, replace: str + ) -> list[list[Scalar]]: + ret = [] + for line in lines: + rl = [] + for i, x in enumerate(line): + if ( + not isinstance(x, str) + or search not in x + or (self._no_thousands_columns and i in self._no_thousands_columns) + or not self.num.search(x.strip()) + ): + rl.append(x) + else: + rl.append(x.replace(search, replace)) + ret.append(rl) + return ret + + def _check_decimal(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + if self.decimal == parser_defaults["decimal"]: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.decimal, replace="." + ) + + def _clear_buffer(self) -> None: + self.buf = [] + + _implicit_index = False + + def _get_index_name( + self, columns: list[Hashable] + ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]: + """ + Try several cases to get lines: + + 0) There are headers on row 0 and row 1 and their + total summed lengths equals the length of the next line. + Treat row 0 as columns and row 1 as indices + 1) Look for implicit index: there are more columns + on row 1 than row 0. If this is true, assume that row + 1 lists index columns and row 0 lists normal columns. + 2) Get index from the columns if it was listed. + """ + orig_names = list(columns) + columns = list(columns) + + line: list[Scalar] | None + if self._header_line is not None: + line = self._header_line + else: + try: + line = self._next_line() + except StopIteration: + line = None + + next_line: list[Scalar] | None + try: + next_line = self._next_line() + except StopIteration: + next_line = None + + # implicitly index_col=0 b/c 1 fewer column names + implicit_first_cols = 0 + if line is not None: + # leave it 0, #2442 + # Case 1 + # error: Cannot determine type of 'index_col' + index_col = self.index_col # type: ignore[has-type] + if index_col is not False: + implicit_first_cols = len(line) - self.num_original_columns + + # Case 0 + if ( + next_line is not None + and self.header is not None + and index_col is not False + ): + if len(next_line) == len(line) + self.num_original_columns: + # column and index names on diff rows + self.index_col = list(range(len(line))) + self.buf = self.buf[1:] + + for c in reversed(line): + columns.insert(0, c) + + # Update list of original names to include all indices. + orig_names = list(columns) + self.num_original_columns = len(columns) + return line, orig_names, columns + + if implicit_first_cols > 0: + # Case 1 + self._implicit_index = True + if self.index_col is None: + self.index_col = list(range(implicit_first_cols)) + + index_name = None + + else: + # Case 2 + (index_name, _, self.index_col) = self._clean_index_names( + columns, self.index_col + ) + + return index_name, orig_names, columns + + def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: + col_len = self.num_original_columns + + if self._implicit_index: + col_len += len(self.index_col) + + max_len = max(len(row) for row in content) + + # Check that there are no rows with too many + # elements in their row (rows with too few + # elements are padded with NaN). + # error: Non-overlapping identity check (left operand type: "List[int]", + # right operand type: "Literal[False]") + if ( + max_len > col_len + and self.index_col is not False # type: ignore[comparison-overlap] + and self.usecols is None + ): + + footers = self.skipfooter if self.skipfooter else 0 + bad_lines = [] + + iter_content = enumerate(content) + content_len = len(content) + content = [] + + for (i, l) in iter_content: + actual_len = len(l) + + if actual_len > col_len: + if callable(self.on_bad_lines): + new_l = self.on_bad_lines(l) + if new_l is not None: + content.append(new_l) + elif ( + self.on_bad_lines == self.BadLineHandleMethod.ERROR + or self.on_bad_lines == self.BadLineHandleMethod.WARN + ): + row_num = self.pos - (content_len - i + footers) + bad_lines.append((row_num, actual_len)) + + if self.on_bad_lines == self.BadLineHandleMethod.ERROR: + break + else: + content.append(l) + + for row_num, actual_len in bad_lines: + msg = ( + f"Expected {col_len} fields in line {row_num + 1}, saw " + f"{actual_len}" + ) + if ( + self.delimiter + and len(self.delimiter) > 1 + and self.quoting != csv.QUOTE_NONE + ): + # see gh-13374 + reason = ( + "Error could possibly be due to quotes being " + "ignored when a multi-char delimiter is used." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num + 1) + + # see gh-13320 + zipped_content = list(lib.to_object_array(content, min_width=col_len).T) + + if self.usecols: + assert self._col_indices is not None + col_indices = self._col_indices + + if self._implicit_index: + zipped_content = [ + a + for i, a in enumerate(zipped_content) + if ( + i < len(self.index_col) + or i - len(self.index_col) in col_indices + ) + ] + else: + zipped_content = [ + a for i, a in enumerate(zipped_content) if i in col_indices + ] + return zipped_content + + def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]: + lines = self.buf + new_rows = None + + # already fetched some number + if rows is not None: + # we already have the lines in the buffer + if len(self.buf) >= rows: + new_rows, self.buf = self.buf[:rows], self.buf[rows:] + + # need some lines + else: + rows -= len(self.buf) + + if new_rows is None: + if isinstance(self.data, list): + if self.pos > len(self.data): + raise StopIteration + if rows is None: + new_rows = self.data[self.pos :] + new_pos = len(self.data) + else: + new_rows = self.data[self.pos : self.pos + rows] + new_pos = self.pos + rows + + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + self.pos = new_pos + + else: + new_rows = [] + try: + if rows is not None: + + rows_to_skip = 0 + if self.skiprows is not None and self.pos is not None: + # Only read additional rows if pos is in skiprows + rows_to_skip = len( + set(self.skiprows) - set(range(self.pos)) + ) + + for _ in range(rows + rows_to_skip): + # assert for mypy, data is Iterator[str] or None, would + # error in next + assert self.data is not None + new_rows.append(next(self.data)) + + len_new_rows = len(new_rows) + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + else: + rows = 0 + + while True: + new_row = self._next_iter_line(row_num=self.pos + rows + 1) + rows += 1 + + if new_row is not None: + new_rows.append(new_row) + len_new_rows = len(new_rows) + + except StopIteration: + len_new_rows = len(new_rows) + new_rows = self._remove_skipped_rows(new_rows) + lines.extend(new_rows) + if len(lines) == 0: + raise + self.pos += len_new_rows + + self.buf = [] + else: + lines = new_rows + + if self.skipfooter: + lines = lines[: -self.skipfooter] + + lines = self._check_comments(lines) + if self.skip_blank_lines: + lines = self._remove_empty_lines(lines) + lines = self._check_thousands(lines) + return self._check_decimal(lines) + + def _remove_skipped_rows(self, new_rows: list[list[Scalar]]) -> list[list[Scalar]]: + if self.skiprows: + return [ + row for i, row in enumerate(new_rows) if not self.skipfunc(i + self.pos) + ] + return new_rows + + +class FixedWidthReader(abc.Iterator): + """ + A reader of fixed-width lines. + """ + + def __init__( + self, + f: IO[str] | ReadCsvBuffer[str], + colspecs: list[tuple[int, int]] | Literal["infer"], + delimiter: str | None, + comment: str | None, + skiprows: set[int] | None = None, + infer_nrows: int = 100, + ) -> None: + self.f = f + self.buffer: Iterator | None = None + self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t " + self.comment = comment + if colspecs == "infer": + self.colspecs = self.detect_colspecs( + infer_nrows=infer_nrows, skiprows=skiprows + ) + else: + self.colspecs = colspecs + + if not isinstance(self.colspecs, (tuple, list)): + raise TypeError( + "column specifications must be a list or tuple, " + f"input was a {type(colspecs).__name__}" + ) + + for colspec in self.colspecs: + if not ( + isinstance(colspec, (tuple, list)) + and len(colspec) == 2 + and isinstance(colspec[0], (int, np.integer, type(None))) + and isinstance(colspec[1], (int, np.integer, type(None))) + ): + raise TypeError( + "Each column specification must be " + "2 element tuple or list of integers" + ) + + def get_rows(self, infer_nrows: int, skiprows: set[int] | None = None) -> list[str]: + """ + Read rows from self.f, skipping as specified. + + We distinguish buffer_rows (the first <= infer_nrows + lines) from the rows returned to detect_colspecs + because it's simpler to leave the other locations + with skiprows logic alone than to modify them to + deal with the fact we skipped some rows here as + well. + + Parameters + ---------- + infer_nrows : int + Number of rows to read from self.f, not counting + rows that are skipped. + skiprows: set, optional + Indices of rows to skip. + + Returns + ------- + detect_rows : list of str + A list containing the rows to read. + + """ + if skiprows is None: + skiprows = set() + buffer_rows = [] + detect_rows = [] + for i, row in enumerate(self.f): + if i not in skiprows: + detect_rows.append(row) + buffer_rows.append(row) + if len(detect_rows) >= infer_nrows: + break + self.buffer = iter(buffer_rows) + return detect_rows + + def detect_colspecs( + self, infer_nrows: int = 100, skiprows: set[int] | None = None + ) -> list[tuple[int, int]]: + # Regex escape the delimiters + delimiters = "".join([rf"\{x}" for x in self.delimiter]) + pattern = re.compile(f"([^{delimiters}]+)") + rows = self.get_rows(infer_nrows, skiprows) + if not rows: + raise EmptyDataError("No rows from which to infer column width") + max_len = max(map(len, rows)) + mask = np.zeros(max_len + 1, dtype=int) + if self.comment is not None: + rows = [row.partition(self.comment)[0] for row in rows] + for row in rows: + for m in pattern.finditer(row): + mask[m.start() : m.end()] = 1 + shifted = np.roll(mask, 1) + shifted[0] = 0 + edges = np.where((mask ^ shifted) == 1)[0] + edge_pairs = list(zip(edges[::2], edges[1::2])) + return edge_pairs + + def __next__(self) -> list[str]: + # Argument 1 to "next" has incompatible type "Union[IO[str], + # ReadCsvBuffer[str]]"; expected "SupportsNext[str]" + if self.buffer is not None: + try: + line = next(self.buffer) + except StopIteration: + self.buffer = None + line = next(self.f) # type: ignore[arg-type] + else: + line = next(self.f) # type: ignore[arg-type] + # Note: 'colspecs' is a sequence of half-open intervals. + return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs] + + +class FixedWidthFieldParser(PythonParser): + """ + Specialization that Converts fixed-width fields into DataFrames. + See PythonParser for details. + """ + + def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None: + # Support iterators, convert to a list. + self.colspecs = kwds.pop("colspecs") + self.infer_nrows = kwds.pop("infer_nrows") + PythonParser.__init__(self, f, **kwds) + + def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: + self.data = FixedWidthReader( + f, + self.colspecs, + self.delimiter, + self.comment, + self.skiprows, + self.infer_nrows, + ) + + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + """ + Returns the list of lines without the empty ones. With fixed-width + fields, empty lines become arrays of empty strings. + + See PythonParser._remove_empty_lines. + """ + return [ + line + for line in lines + if any(not isinstance(e, str) or e.strip() for e in line) + ] + + +def count_empty_vals(vals) -> int: + return sum(1 for v in vals if v == "" or v is None) + + +def _validate_skipfooter_arg(skipfooter: int) -> int: + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py new file mode 100644 index 00000000..356c357f --- /dev/null +++ b/pandas/io/parsers/readers.py @@ -0,0 +1,2248 @@ +""" +Module contains tools for processing files into DataFrames or other objects +""" +from __future__ import annotations + +from collections import abc +import csv +import sys +from textwrap import fill +from typing import ( + IO, + Any, + Callable, + Hashable, + Literal, + NamedTuple, + Sequence, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._libs.parsers import STR_NA_VALUES +from pandas._typing import ( + CompressionOptions, + CSVEngine, + DtypeArg, + FilePath, + IndexLabel, + ReadCsvBuffer, + StorageOptions, +) +from pandas.errors import ( + AbstractMethodError, + ParserWarning, +) +from pandas.util._decorators import ( + Appender, + deprecate_kwarg, + deprecate_nonkeyword_arguments, +) +from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.common import ( + is_file_like, + is_float, + is_integer, + is_list_like, +) + +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import RangeIndex +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + IOHandles, + get_handle, + stringify_path, + validate_header_arg, +) +from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, + parser_defaults, +) +from pandas.io.parsers.c_parser_wrapper import CParserWrapper +from pandas.io.parsers.python_parser import ( + FixedWidthFieldParser, + PythonParser, +) + +_doc_read_csv_and_table = ( + r""" +{summary} + +Also supports optionally iterating or breaking of the file +into chunks. + +Additional help can be found in the online docs for +`IO Tools `_. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is + expected. A local file could be: file://localhost/path/to/table.csv. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, such as + a file handle (e.g. via builtin ``open`` function) or ``StringIO``. +sep : str, default {_default_sep} + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``. +delimiter : str, default ``None`` + Alias for sep. +header : int, list of int, None, default 'infer' + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names + are passed the behavior is identical to ``header=0`` and column + names are inferred from the first line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to + replace existing names. The header can be a list of integers that + specify row locations for a multi-index on the columns + e.g. [0,1,3]. Intervening rows that are not specified will be + skipped (e.g. 2 in this example is skipped). Note that this + parameter ignores commented lines and empty lines if + ``skip_blank_lines=True``, so ``header=0`` denotes the first line of + data rather than the first line of the file. +names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. +index_col : int, str, sequence of int / str, or False, optional, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. +usecols : list-like or callable, optional + Return a subset of the columns. If list-like, all elements must either + be positional (i.e. integer indices into the document columns) or strings + that correspond to column names provided either by the user in `names` or + inferred from the document header row(s). If ``names`` are given, the document + header row(s) are not taken into account. For example, a valid list-like + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + To instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` + for ``['bar', 'foo']`` order. + + If callable, the callable function will be evaluated against the column + names, returning names where the callable function evaluates to True. An + example of a valid callable argument would be ``lambda x: x.upper() in + ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster + parsing time and lower memory usage. +squeeze : bool, default False + If the parsed data only contains one column then return a Series. + + .. deprecated:: 1.4.0 + Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze + the data. +prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... + + .. deprecated:: 1.4.0 + Use a list comprehension on the DataFrame's columns after calling ``read_csv``. +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. + + .. deprecated:: 1.5.0 + Not implemented, and a new argument to specify the pattern for the + names of duplicated columns will be added instead +dtype : Type name or dict of column -> type, optional + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, + 'c': 'Int64'}} + Use `str` or `object` together with suitable `na_values` settings + to preserve and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. + + .. versionadded:: 1.5.0 + + Support for defaultdict was added. Specify a defaultdict as input where + the default determines the dtype of the columns which are not explicitly + listed. +engine : {{'c', 'python', 'pyarrow'}}, optional + Parser engine to use. The C and pyarrow engines are faster, while the python engine + is currently more feature-complete. Multithreading is currently only supported by + the pyarrow engine. + + .. versionadded:: 1.4.0 + + The "pyarrow" engine was added as an *experimental* engine, and some features + are unsupported, or may not work correctly, with this engine. +converters : dict, optional + Dict of functions for converting values in certain columns. Keys can either + be integers or column labels. +true_values : list, optional + Values to consider as True. +false_values : list, optional + Values to consider as False. +skipinitialspace : bool, default False + Skip spaces after delimiter. +skiprows : list-like, int or callable, optional + Line numbers to skip (0-indexed) or number of lines to skip (int) + at the start of the file. + + If callable, the callable function will be evaluated against the row + indices, returning True if the row should be skipped and False otherwise. + An example of a valid callable argument would be ``lambda x: x in [0, 2]``. +skipfooter : int, default 0 + Number of lines at bottom of file to skip (Unsupported with engine='c'). +nrows : int, optional + Number of rows of file to read. Useful for reading pieces of large files. +na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted as + NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +skip_blank_lines : bool, default True + If True, skip over blank lines rather than interpreting as NaN values. +parse_dates : bool or list of int or names or list of lists or dict, \ +default False + The behavior is as follows: + + * boolean. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index cannot be represented as an array of datetimes, + say because of an unparsable value or a mixture of timezones, the column + or index will be returned unaltered as an object data type. For + non-standard datetime parsing, use ``pd.to_datetime`` after + ``pd.read_csv``. To parse an index or column with a mixture of timezones, + specify ``date_parser`` to be a partially-applied + :func:`pandas.to_datetime` with ``utc=True``. See + :ref:`io.csv.mixed_timezones` for more. + + Note: A fast-path exists for iso8601-formatted dates. +infer_datetime_format : bool, default False + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. +keep_date_col : bool, default False + If True and `parse_dates` specifies combining multiple columns then + keep the original columns. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +dayfirst : bool, default False + DD/MM format dates, international and European format. +cache_dates : bool, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. + + .. versionadded:: 0.25.0 +iterator : bool, default False + Return TextFileReader object for iteration or getting chunks with + ``get_chunk()``. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. +chunksize : int, optional + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. +{decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + +thousands : str, optional + Thousands separator. +decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European data). +lineterminator : str (length 1), optional + Character to break file into lines. Only valid with C parser. +quotechar : str (length 1), optional + The character used to denote the start and end of a quoted item. Quoted + items can include the delimiter and it will be ignored. +quoting : int or csv.QUOTE_* instance, default 0 + Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of + QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3). +doublequote : bool, default ``True`` + When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate + whether or not to interpret two consecutive quotechar elements INSIDE a + field as a single ``quotechar`` element. +escapechar : str (length 1), optional + One-character string used to escape other characters. +comment : str, optional + Indicates remainder of line should not be parsed. If found at the beginning + of a line, the line will be ignored altogether. This parameter must be a + single character. Like empty lines (as long as ``skip_blank_lines=True``), + fully commented lines are ignored by the parameter `header` but not by + `skiprows`. For example, if ``comment='#'``, parsing + ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being + treated as the header. +encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python + standard encodings + `_ . + + .. versionchanged:: 1.2 + + When ``encoding`` is ``None``, ``errors="replace"`` is passed to + ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``. + This behavior was previously only the case for ``engine="python"``. + + .. versionchanged:: 1.3.0 + + ``encoding_errors`` is a new argument. ``encoding`` has no longer an + influence on how encoding errors are handled. + +encoding_errors : str, optional, default "strict" + How encoding errors are treated. `List of possible values + `_ . + + .. versionadded:: 1.3.0 + +dialect : str or csv.Dialect, optional + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See csv.Dialect + documentation for more details. +error_bad_lines : bool, optional, default ``None`` + Lines with too many fields (e.g. a csv line with too many commas) will by + default cause an exception to be raised, and no DataFrame will be returned. + If False, then these "bad lines" will be dropped from the DataFrame that is + returned. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +warn_bad_lines : bool, optional, default ``None`` + If error_bad_lines is False, and warn_bad_lines is True, a warning for each + "bad line" will be output. + + .. deprecated:: 1.3.0 + The ``on_bad_lines`` parameter should be used instead to specify behavior upon + encountering a bad line instead. +on_bad_lines : {{'error', 'warn', 'skip'}} or callable, default 'error' + Specifies what to do upon encountering a bad line (a line with too many fields). + Allowed values are : + + - 'error', raise an Exception when a bad line is encountered. + - 'warn', raise a warning when a bad line is encountered and skip that line. + - 'skip', skip bad lines without raising or warning when they are encountered. + + .. versionadded:: 1.3.0 + + .. versionadded:: 1.4.0 + + - callable, function with signature + ``(bad_line: list[str]) -> list[str] | None`` that will process a single + bad line. ``bad_line`` is a list of strings split by the ``sep``. + If the function returns ``None``, the bad line will be ignored. + If the function returns a new list of strings with more elements than + expected, a ``ParserWarning`` will be emitted while dropping extra elements. + Only supported when ``engine="python"`` + +delim_whitespace : bool, default False + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be + used as the sep. Equivalent to setting ``sep='\\s+'``. If this option + is set to True, nothing should be passed in for the ``delimiter`` + parameter. +low_memory : bool, default True + Internally process the file in chunks, resulting in lower memory use + while parsing, but possibly mixed type inference. To ensure no mixed + types either set False, or specify the type with the `dtype` parameter. + Note that the entire file is read into a single DataFrame regardless, + use the `chunksize` or `iterator` parameter to return the data in chunks. + (Only valid with C parser). +memory_map : bool, default False + If a filepath is provided for `filepath_or_buffer`, map the file object + directly onto memory and access the data directly from there. Using this + option can improve performance because there is no longer any I/O overhead. +float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are ``None`` or 'high' for the ordinary converter, + 'legacy' for the original lower precision pandas converter, and + 'round_trip' for the round-trip converter. + + .. versionchanged:: 1.2 + +{storage_options} + + .. versionadded:: 1.2 + +Returns +------- +DataFrame or TextParser + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + +See Also +-------- +DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +>>> pd.{func_name}('data.csv') # doctest: +SKIP +""" +) + + +_c_parser_defaults = { + "delim_whitespace": False, + "na_filter": True, + "low_memory": True, + "memory_map": False, + "float_precision": None, +} + +_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} + +_c_unsupported = {"skipfooter"} +_python_unsupported = {"low_memory", "float_precision"} +_pyarrow_unsupported = { + "skipfooter", + "float_precision", + "chunksize", + "comment", + "nrows", + "thousands", + "memory_map", + "dialect", + "warn_bad_lines", + "error_bad_lines", + "on_bad_lines", + "delim_whitespace", + "quoting", + "lineterminator", + "converters", + "decimal", + "iterator", + "dayfirst", + "infer_datetime_format", + "verbose", + "skipinitialspace", + "low_memory", +} + + +class _DeprecationConfig(NamedTuple): + default_value: Any + msg: str | None + + +_deprecated_defaults: dict[str, _DeprecationConfig] = { + "error_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), + "warn_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), + "squeeze": _DeprecationConfig( + None, 'Append .squeeze("columns") to the call to squeeze.' + ), + "prefix": _DeprecationConfig( + None, "Use a list comprehension on the column names in the future." + ), +} + + +@overload +def validate_integer(name, val: None, min_val=...) -> None: + ... + + +@overload +def validate_integer(name, val: float, min_val=...) -> int: + ... + + +@overload +def validate_integer(name, val: int | None, min_val=...) -> int | None: + ... + + +def validate_integer(name, val: int | float | None, min_val=0) -> int | None: + """ + Checks whether the 'name' parameter for parsing is either + an integer OR float that can SAFELY be cast to an integer + without losing accuracy. Raises a ValueError if that is + not the case. + + Parameters + ---------- + name : str + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) + """ + if val is None: + return val + + msg = f"'{name:s}' must be an integer >={min_val:d}" + if is_float(val): + if int(val) != val: + raise ValueError(msg) + val = int(val) + elif not (is_integer(val) and val >= min_val): + raise ValueError(msg) + + return int(val) + + +def _validate_names(names: Sequence[Hashable] | None) -> None: + """ + Raise ValueError if the `names` parameter contains duplicates or has an + invalid data type. + + Parameters + ---------- + names : array-like or None + An array containing a list of the names used for the output DataFrame. + + Raises + ------ + ValueError + If names are not unique or are not ordered (e.g. set). + """ + if names is not None: + if len(names) != len(set(names)): + raise ValueError("Duplicate names are not allowed.") + if not ( + is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView) + ): + raise ValueError("Names should be an ordered collection.") + + +def _read( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds +) -> DataFrame | TextFileReader: + """Generic reader of line files.""" + # if we pass a date_parser and parse_dates=False, we should not parse the + # dates GH#44366 + if kwds.get("parse_dates", None) is None: + if kwds.get("date_parser", None) is None: + kwds["parse_dates"] = False + else: + kwds["parse_dates"] = True + + # Extract some of the arguments (pass chunksize on). + iterator = kwds.get("iterator", False) + chunksize = kwds.get("chunksize", None) + if kwds.get("engine") == "pyarrow": + if iterator: + raise ValueError( + "The 'iterator' option is not supported with the 'pyarrow' engine" + ) + + if chunksize is not None: + raise ValueError( + "The 'chunksize' option is not supported with the 'pyarrow' engine" + ) + else: + chunksize = validate_integer("chunksize", chunksize, 1) + + nrows = kwds.get("nrows", None) + + # Check for duplicates in names. + _validate_names(kwds.get("names", None)) + + # Create the parser. + parser = TextFileReader(filepath_or_buffer, **kwds) + + if chunksize or iterator: + return parser + + with parser: + return parser.read(nrows) + + +# iterator=True -> TextFileReader +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: + ... + + +# chunksize=int -> TextFileReader +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: + ... + + +# default case -> DataFrame +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +# Unions -> DataFrame | TextFileReader +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame | TextFileReader: + ... + + +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) +@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) +@Appender( + _doc_read_csv_and_table.format( + func_name="read_csv", + summary="Read a comma-separated values (csv) file into DataFrame.", + _default_sep="','", + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] + % "filepath_or_buffer", + ) +) +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + sep: str | None | lib.NoDefault = lib.no_default, + delimiter: str | None | lib.NoDefault = None, + # Column and Index Locations and Names + header: int | Sequence[int] | None | Literal["infer"] = "infer", + names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, + index_col: IndexLabel | Literal[False] | None = None, + usecols=None, + squeeze: bool | None = None, + prefix: str | lib.NoDefault = lib.no_default, + mangle_dupe_cols: bool = True, + # General Parsing Configuration + dtype: DtypeArg | None = None, + engine: CSVEngine | None = None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace: bool = False, + skiprows=None, + skipfooter: int = 0, + nrows: int | None = None, + # NA and Missing Data Handling + na_values=None, + keep_default_na: bool = True, + na_filter: bool = True, + verbose: bool = False, + skip_blank_lines: bool = True, + # Datetime Handling + parse_dates=None, + infer_datetime_format: bool = False, + keep_date_col: bool = False, + date_parser=None, + dayfirst: bool = False, + cache_dates: bool = True, + # Iteration + iterator: bool = False, + chunksize: int | None = None, + # Quoting, Compression, and File Format + compression: CompressionOptions = "infer", + thousands: str | None = None, + decimal: str = ".", + lineterminator: str | None = None, + quotechar: str = '"', + quoting: int = csv.QUOTE_MINIMAL, + doublequote: bool = True, + escapechar: str | None = None, + comment: str | None = None, + encoding: str | None = None, + encoding_errors: str | None = "strict", + dialect: str | csv.Dialect | None = None, + # Error Handling + error_bad_lines: bool | None = None, + warn_bad_lines: bool | None = None, + # TODO(2.0): set on_bad_lines to "error". + # See _refine_defaults_read comment for why we do this. + on_bad_lines=None, + # Internal + delim_whitespace: bool = False, + low_memory=_c_parser_defaults["low_memory"], + memory_map: bool = False, + float_precision: Literal["high", "legacy"] | None = None, + storage_options: StorageOptions = None, +) -> DataFrame | TextFileReader: + # locals() should never be modified + kwds = locals().copy() + del kwds["filepath_or_buffer"] + del kwds["sep"] + + kwds_defaults = _refine_defaults_read( + dialect, + delimiter, + delim_whitespace, + engine, + sep, + error_bad_lines, + warn_bad_lines, + on_bad_lines, + names, + prefix, + defaults={"delimiter": ","}, + ) + kwds.update(kwds_defaults) + + return _read(filepath_or_buffer, kwds) + + +# iterator=True -> TextFileReader +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: + ... + + +# chunksize=int -> TextFileReader +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: + ... + + +# default -> DataFrame +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + +# Unions -> DataFrame | TextFileReader +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame | TextFileReader: + ... + + +@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) +@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) +@Appender( + _doc_read_csv_and_table.format( + func_name="read_table", + summary="Read general delimited file into DataFrame.", + _default_sep=r"'\\t' (tab-stop)", + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] + % "filepath_or_buffer", + ) +) +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + sep: str | None | lib.NoDefault = lib.no_default, + delimiter: str | None | lib.NoDefault = None, + # Column and Index Locations and Names + header: int | Sequence[int] | None | Literal["infer"] = "infer", + names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, + index_col: IndexLabel | Literal[False] | None = None, + usecols=None, + squeeze: bool | None = None, + prefix: str | lib.NoDefault = lib.no_default, + mangle_dupe_cols: bool = True, + # General Parsing Configuration + dtype: DtypeArg | None = None, + engine: CSVEngine | None = None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace: bool = False, + skiprows=None, + skipfooter: int = 0, + nrows: int | None = None, + # NA and Missing Data Handling + na_values=None, + keep_default_na: bool = True, + na_filter: bool = True, + verbose: bool = False, + skip_blank_lines: bool = True, + # Datetime Handling + parse_dates=False, + infer_datetime_format: bool = False, + keep_date_col: bool = False, + date_parser=None, + dayfirst: bool = False, + cache_dates: bool = True, + # Iteration + iterator: bool = False, + chunksize: int | None = None, + # Quoting, Compression, and File Format + compression: CompressionOptions = "infer", + thousands: str | None = None, + decimal: str = ".", + lineterminator: str | None = None, + quotechar: str = '"', + quoting: int = csv.QUOTE_MINIMAL, + doublequote: bool = True, + escapechar: str | None = None, + comment: str | None = None, + encoding: str | None = None, + encoding_errors: str | None = "strict", + dialect: str | csv.Dialect | None = None, + # Error Handling + error_bad_lines: bool | None = None, + warn_bad_lines: bool | None = None, + # TODO(2.0): set on_bad_lines to "error". + # See _refine_defaults_read comment for why we do this. + on_bad_lines=None, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map: bool = False, + float_precision: str | None = None, + storage_options: StorageOptions = None, +) -> DataFrame | TextFileReader: + # locals() should never be modified + kwds = locals().copy() + del kwds["filepath_or_buffer"] + del kwds["sep"] + + kwds_defaults = _refine_defaults_read( + dialect, + delimiter, + delim_whitespace, + engine, + sep, + error_bad_lines, + warn_bad_lines, + on_bad_lines, + names, + prefix, + defaults={"delimiter": "\t"}, + ) + kwds.update(kwds_defaults) + + return _read(filepath_or_buffer, kwds) + + +@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: Sequence[tuple[int, int]] | str | None = "infer", + widths: Sequence[int] | None = None, + infer_nrows: int = 100, + **kwds, +) -> DataFrame | TextFileReader: + r""" + Read a table of fixed-width formatted lines into DataFrame. + + Also supports optionally iterating or breaking of the file + into chunks. + + Additional help can be found in the `online docs for IO Tools + `_. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a text ``read()`` function.The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.csv``. + colspecs : list of tuple (int, int) or 'infer'. optional + A list of tuples giving the extents of the fixed-width + fields of each line as half-open intervals (i.e., [from, to[ ). + String value 'infer' can be used to instruct the parser to try + detecting the column specifications from the first 100 rows of + the data which are not being skipped via skiprows (default='infer'). + widths : list of int, optional + A list of field widths which can be used instead of 'colspecs' if + the intervals are contiguous. + infer_nrows : int, default 100 + The number of rows to consider when letting the parser determine the + `colspecs`. + **kwds : optional + Optional keyword arguments can be passed to ``TextFileReader``. + + Returns + ------- + DataFrame or TextFileReader + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + + See Also + -------- + DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Examples + -------- + >>> pd.read_fwf('data.csv') # doctest: +SKIP + """ + # Check input arguments. + if colspecs is None and widths is None: + raise ValueError("Must specify either colspecs or widths") + elif colspecs not in (None, "infer") and widths is not None: + raise ValueError("You must specify only one of 'widths' and 'colspecs'") + + # Compute 'colspecs' from 'widths', if specified. + if widths is not None: + colspecs, col = [], 0 + for w in widths: + colspecs.append((col, col + w)) + col += w + + # for mypy + assert colspecs is not None + + # GH#40830 + # Ensure length of `colspecs` matches length of `names` + names = kwds.get("names") + if names is not None: + if len(names) != len(colspecs) and colspecs != "infer": + # need to check len(index_col) as it might contain + # unnamed indices, in which case it's name is not required + len_index = 0 + if kwds.get("index_col") is not None: + index_col: Any = kwds.get("index_col") + if index_col is not False: + if not is_list_like(index_col): + len_index = 1 + else: + len_index = len(index_col) + if kwds.get("usecols") is None and len(names) + len_index != len(colspecs): + # If usecols is used colspec may be longer than names + raise ValueError("Length of colspecs must match length of names") + + kwds["colspecs"] = colspecs + kwds["infer_nrows"] = infer_nrows + kwds["engine"] = "python-fwf" + return _read(filepath_or_buffer, kwds) + + +class TextFileReader(abc.Iterator): + """ + + Passed dialect overrides any of the related parser options + + """ + + def __init__( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list, + engine: CSVEngine | None = None, + **kwds, + ) -> None: + if engine is not None: + engine_specified = True + else: + engine = "python" + engine_specified = False + self.engine = engine + self._engine_specified = kwds.get("engine_specified", engine_specified) + + _validate_skipfooter(kwds) + + dialect = _extract_dialect(kwds) + if dialect is not None: + if engine == "pyarrow": + raise ValueError( + "The 'dialect' option is not supported with the 'pyarrow' engine" + ) + kwds = _merge_with_dialect_properties(dialect, kwds) + + if kwds.get("header", "infer") == "infer": + kwds["header"] = 0 if kwds.get("names") is None else None + + self.orig_options = kwds + + # miscellanea + self._currow = 0 + + options = self._get_options_with_defaults(engine) + options["storage_options"] = kwds.get("storage_options", None) + + self.chunksize = options.pop("chunksize", None) + self.nrows = options.pop("nrows", None) + + self._check_file_or_buffer(f, engine) + self.options, self.engine = self._clean_options(options, engine) + + self.squeeze = self.options.pop("squeeze", False) + + if "has_index_names" in kwds: + self.options["has_index_names"] = kwds["has_index_names"] + + self.handles: IOHandles | None = None + self._engine = self._make_engine(f, self.engine) + + def close(self) -> None: + if self.handles is not None: + self.handles.close() + self._engine.close() + + def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: + kwds = self.orig_options + + options = {} + default: object | None + + for argname, default in parser_defaults.items(): + value = kwds.get(argname, default) + + # see gh-12935 + if ( + engine == "pyarrow" + and argname in _pyarrow_unsupported + and value != default + and value != getattr(value, "value", default) + ): + if ( + argname == "on_bad_lines" + and kwds.get("error_bad_lines") is not None + ): + argname = "error_bad_lines" + elif ( + argname == "on_bad_lines" and kwds.get("warn_bad_lines") is not None + ): + argname = "warn_bad_lines" + + raise ValueError( + f"The {repr(argname)} option is not supported with the " + f"'pyarrow' engine" + ) + elif argname == "mangle_dupe_cols" and value is False: + # GH12935 + raise ValueError("Setting mangle_dupe_cols=False is not supported yet") + else: + options[argname] = value + + for argname, default in _c_parser_defaults.items(): + if argname in kwds: + value = kwds[argname] + + if engine != "c" and value != default: + if "python" in engine and argname not in _python_unsupported: + pass + elif ( + value + == _deprecated_defaults.get( + argname, _DeprecationConfig(default, None) + ).default_value + ): + pass + else: + raise ValueError( + f"The {repr(argname)} option is not supported with the " + f"{repr(engine)} engine" + ) + else: + value = _deprecated_defaults.get( + argname, _DeprecationConfig(default, None) + ).default_value + options[argname] = value + + if engine == "python-fwf": + for argname, default in _fwf_defaults.items(): + options[argname] = kwds.get(argname, default) + + return options + + def _check_file_or_buffer(self, f, engine: CSVEngine) -> None: + # see gh-16530 + if is_file_like(f) and engine != "c" and not hasattr(f, "__iter__"): + # The C engine doesn't need the file-like to have the "__iter__" + # attribute. However, the Python engine needs "__iter__(...)" + # when iterating through such an object, meaning it + # needs to have that attribute + raise ValueError( + "The 'python' engine cannot iterate through this file buffer." + ) + + def _clean_options( + self, options: dict[str, Any], engine: CSVEngine + ) -> tuple[dict[str, Any], CSVEngine]: + result = options.copy() + + fallback_reason = None + + # C engine not supported yet + if engine == "c": + if options["skipfooter"] > 0: + fallback_reason = "the 'c' engine does not support skipfooter" + engine = "python" + + sep = options["delimiter"] + delim_whitespace = options["delim_whitespace"] + + if sep is None and not delim_whitespace: + if engine in ("c", "pyarrow"): + fallback_reason = ( + f"the '{engine}' engine does not support " + "sep=None with delim_whitespace=False" + ) + engine = "python" + elif sep is not None and len(sep) > 1: + if engine == "c" and sep == r"\s+": + result["delim_whitespace"] = True + del result["delimiter"] + elif engine not in ("python", "python-fwf"): + # wait until regex engine integrated + fallback_reason = ( + f"the '{engine}' engine does not support " + "regex separators (separators > 1 char and " + r"different from '\s+' are interpreted as regex)" + ) + engine = "python" + elif delim_whitespace: + if "python" in engine: + result["delimiter"] = r"\s+" + elif sep is not None: + encodeable = True + encoding = sys.getfilesystemencoding() or "utf-8" + try: + if len(sep.encode(encoding)) > 1: + encodeable = False + except UnicodeDecodeError: + encodeable = False + if not encodeable and engine not in ("python", "python-fwf"): + fallback_reason = ( + f"the separator encoded in {encoding} " + f"is > 1 char long, and the '{engine}' engine " + "does not support such separators" + ) + engine = "python" + + quotechar = options["quotechar"] + if quotechar is not None and isinstance(quotechar, (str, bytes)): + if ( + len(quotechar) == 1 + and ord(quotechar) > 127 + and engine not in ("python", "python-fwf") + ): + fallback_reason = ( + "ord(quotechar) > 127, meaning the " + "quotechar is larger than one byte, " + f"and the '{engine}' engine does not support such quotechars" + ) + engine = "python" + + if fallback_reason and self._engine_specified: + raise ValueError(fallback_reason) + + if engine == "c": + for arg in _c_unsupported: + del result[arg] + + if "python" in engine: + for arg in _python_unsupported: + if fallback_reason and result[arg] != _c_parser_defaults[arg]: + raise ValueError( + "Falling back to the 'python' engine because " + f"{fallback_reason}, but this causes {repr(arg)} to be " + "ignored as it is not supported by the 'python' engine." + ) + del result[arg] + + if fallback_reason: + warnings.warn( + ( + "Falling back to the 'python' engine because " + f"{fallback_reason}; you can avoid this warning by specifying " + "engine='python'." + ), + ParserWarning, + stacklevel=find_stack_level(), + ) + + index_col = options["index_col"] + names = options["names"] + converters = options["converters"] + na_values = options["na_values"] + skiprows = options["skiprows"] + + validate_header_arg(options["header"]) + + for arg in _deprecated_defaults.keys(): + parser_default = _c_parser_defaults.get(arg, parser_defaults[arg]) + depr_default = _deprecated_defaults[arg] + if result.get(arg, depr_default) != depr_default.default_value: + msg = ( + f"The {arg} argument has been deprecated and will be " + f"removed in a future version. {depr_default.msg}\n\n" + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + else: + result[arg] = parser_default + + if index_col is True: + raise ValueError("The value of index_col couldn't be 'True'") + if is_index_col(index_col): + if not isinstance(index_col, (list, tuple, np.ndarray)): + index_col = [index_col] + result["index_col"] = index_col + + names = list(names) if names is not None else names + + # type conversion-related + if converters is not None: + if not isinstance(converters, dict): + raise TypeError( + "Type converters must be a dict or subclass, " + f"input was a {type(converters).__name__}" + ) + else: + converters = {} + + # Converting values to NA + keep_default_na = options["keep_default_na"] + na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) + + # handle skiprows; this is internally handled by the + # c-engine, so only need for python and pyarrow parsers + if engine == "pyarrow": + if not is_integer(skiprows) and skiprows is not None: + # pyarrow expects skiprows to be passed as an integer + raise ValueError( + "skiprows argument must be an integer when using " + "engine='pyarrow'" + ) + else: + if is_integer(skiprows): + skiprows = list(range(skiprows)) + if skiprows is None: + skiprows = set() + elif not callable(skiprows): + skiprows = set(skiprows) + + # put stuff back + result["names"] = names + result["converters"] = converters + result["na_values"] = na_values + result["na_fvalues"] = na_fvalues + result["skiprows"] = skiprows + # Default for squeeze is none since we need to check + # if user sets it. We then set to False to preserve + # previous behavior. + result["squeeze"] = False if options["squeeze"] is None else options["squeeze"] + + return result, engine + + def __next__(self) -> DataFrame: + try: + return self.get_chunk() + except StopIteration: + self.close() + raise + + def _make_engine( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list | IO, + engine: CSVEngine = "c", + ) -> ParserBase: + mapping: dict[str, type[ParserBase]] = { + "c": CParserWrapper, + "python": PythonParser, + "pyarrow": ArrowParserWrapper, + "python-fwf": FixedWidthFieldParser, + } + if engine not in mapping: + raise ValueError( + f"Unknown engine: {engine} (valid options are {mapping.keys()})" + ) + if not isinstance(f, list): + # open file here + is_text = True + mode = "r" + if engine == "pyarrow": + is_text = False + mode = "rb" + elif ( + engine == "c" + and self.options.get("encoding", "utf-8") == "utf-8" + and isinstance(stringify_path(f), str) + ): + # c engine can decode utf-8 bytes, adding TextIOWrapper makes + # the c-engine especially for memory_map=True far slower + is_text = False + if "b" not in mode: + mode += "b" + self.handles = get_handle( + f, + mode, + encoding=self.options.get("encoding", None), + compression=self.options.get("compression", None), + memory_map=self.options.get("memory_map", False), + is_text=is_text, + errors=self.options.get("encoding_errors", "strict"), + storage_options=self.options.get("storage_options", None), + ) + assert self.handles is not None + f = self.handles.handle + + elif engine != "python": + msg = f"Invalid file path or buffer object type: {type(f)}" + raise ValueError(msg) + + try: + return mapping[engine](f, **self.options) + except Exception: + if self.handles is not None: + self.handles.close() + raise + + def _failover_to_python(self) -> None: + raise AbstractMethodError(self) + + def read(self, nrows: int | None = None) -> DataFrame: + if self.engine == "pyarrow": + try: + # error: "ParserBase" has no attribute "read" + df = self._engine.read() # type: ignore[attr-defined] + except Exception: + self.close() + raise + else: + nrows = validate_integer("nrows", nrows) + try: + # error: "ParserBase" has no attribute "read" + ( + index, + columns, + col_dict, + ) = self._engine.read( # type: ignore[attr-defined] + nrows + ) + except Exception: + self.close() + raise + + if index is None: + if col_dict: + # Any column is actually fine: + new_rows = len(next(iter(col_dict.values()))) + index = RangeIndex(self._currow, self._currow + new_rows) + else: + new_rows = 0 + else: + new_rows = len(index) + + df = DataFrame(col_dict, columns=columns, index=index) + + self._currow += new_rows + + if self.squeeze and len(df.columns) == 1: + return df.squeeze("columns").copy() + return df + + def get_chunk(self, size: int | None = None) -> DataFrame: + if size is None: + size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) + return self.read(nrows=size) + + def __enter__(self) -> TextFileReader: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + +def TextParser(*args, **kwds) -> TextFileReader: + """ + Converts lists of lists/tuples into DataFrames with proper type inference + and optional (e.g. string to datetime) conversion. Also enables iterating + lazily over chunks of large files + + Parameters + ---------- + data : file-like object or list + delimiter : separator character to use + dialect : str or csv.Dialect instance, optional + Ignored if delimiter is longer than 1 character + names : sequence, default + header : int, default 0 + Row to use to parse column labels. Defaults to the first row. Prior + rows will be discarded + index_col : int or list, optional + Column or columns to use as the (possibly hierarchical) index + has_index_names: bool, default False + True if the cols defined in index_col have an index name and are + not in the header. + na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. + keep_default_na : bool, default True + thousands : str, optional + Thousands separator + comment : str, optional + Comment out remainder of line + parse_dates : bool, default False + keep_date_col : bool, default False + date_parser : function, optional + skiprows : list of integers + Row numbers to skip + skipfooter : int + Number of line at bottom of file to skip + converters : dict, optional + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8') + squeeze : bool, default False + returns Series if only one column. + infer_datetime_format: bool, default False + If True and `parse_dates` is True for a column, try to infer the + datetime format based on the first datetime string. If the format + can be inferred, there often will be a large parsing speed-up. + float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are `None` or `high` for the ordinary converter, + `legacy` for the original lower precision pandas converter, and + `round_trip` for the round-trip converter. + + .. versionchanged:: 1.2 + """ + kwds["engine"] = "python" + return TextFileReader(*args, **kwds) + + +def _clean_na_values(na_values, keep_default_na=True): + na_fvalues: set | dict + if na_values is None: + if keep_default_na: + na_values = STR_NA_VALUES + else: + na_values = set() + na_fvalues = set() + elif isinstance(na_values, dict): + old_na_values = na_values.copy() + na_values = {} # Prevent aliasing. + + # Convert the values in the na_values dictionary + # into array-likes for further use. This is also + # where we append the default NaN values, provided + # that `keep_default_na=True`. + for k, v in old_na_values.items(): + if not is_list_like(v): + v = [v] + + if keep_default_na: + v = set(v) | STR_NA_VALUES + + na_values[k] = v + na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} + else: + if not is_list_like(na_values): + na_values = [na_values] + na_values = _stringify_na_values(na_values) + if keep_default_na: + na_values = na_values | STR_NA_VALUES + + na_fvalues = _floatify_na_values(na_values) + + return na_values, na_fvalues + + +def _floatify_na_values(na_values): + # create float versions of the na_values + result = set() + for v in na_values: + try: + v = float(v) + if not np.isnan(v): + result.add(v) + except (TypeError, ValueError, OverflowError): + pass + return result + + +def _stringify_na_values(na_values): + """return a stringified and numeric for these values""" + result: list[str | float] = [] + for x in na_values: + result.append(str(x)) + result.append(x) + try: + v = float(x) + + # we are like 999 here + if v == int(v): + v = int(v) + result.append(f"{v}.0") + result.append(str(v)) + + result.append(v) + except (TypeError, ValueError, OverflowError): + pass + try: + result.append(int(x)) + except (TypeError, ValueError, OverflowError): + pass + return set(result) + + +def _refine_defaults_read( + dialect: str | csv.Dialect | None, + delimiter: str | None | lib.NoDefault, + delim_whitespace: bool, + engine: CSVEngine | None, + sep: str | None | lib.NoDefault, + error_bad_lines: bool | None, + warn_bad_lines: bool | None, + on_bad_lines: str | Callable | None, + names: Sequence[Hashable] | None | lib.NoDefault, + prefix: str | None | lib.NoDefault, + defaults: dict[str, Any], +): + """Validate/refine default values of input parameters of read_csv, read_table. + + Parameters + ---------- + dialect : str or csv.Dialect + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See csv.Dialect + documentation for more details. + delimiter : str or object + Alias for sep. + delim_whitespace : bool + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be + used as the sep. Equivalent to setting ``sep='\\s+'``. If this option + is set to True, nothing should be passed in for the ``delimiter`` + parameter. + engine : {{'c', 'python'}} + Parser engine to use. The C engine is faster while the python engine is + currently more feature-complete. + sep : str or object + A delimiter provided by the user (str) or a sentinel value, i.e. + pandas._libs.lib.no_default. + error_bad_lines : str or None + Whether to error on a bad line or not. + warn_bad_lines : str or None + Whether to warn on a bad line or not. + on_bad_lines : str, callable or None + An option for handling bad lines or a sentinel value(None). + names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. + prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... + defaults: dict + Default values of input parameters. + + Returns + ------- + kwds : dict + Input parameters with correct values. + + Raises + ------ + ValueError : + If a delimiter was specified with ``sep`` (or ``delimiter``) and + ``delim_whitespace=True``. + If on_bad_lines is specified(not ``None``) and ``error_bad_lines``/ + ``warn_bad_lines`` is True. + """ + # fix types for sep, delimiter to Union(str, Any) + delim_default = defaults["delimiter"] + kwds: dict[str, Any] = {} + # gh-23761 + # + # When a dialect is passed, it overrides any of the overlapping + # parameters passed in directly. We don't want to warn if the + # default parameters were passed in (since it probably means + # that the user didn't pass them in explicitly in the first place). + # + # "delimiter" is the annoying corner case because we alias it to + # "sep" before doing comparison to the dialect values later on. + # Thus, we need a flag to indicate that we need to "override" + # the comparison to dialect values by checking if default values + # for BOTH "delimiter" and "sep" were provided. + if dialect is not None: + kwds["sep_override"] = delimiter is None and ( + sep is lib.no_default or sep == delim_default + ) + + if delimiter and (sep is not lib.no_default): + raise ValueError("Specified a sep and a delimiter; you can only specify one.") + + if ( + names is not None + and names is not lib.no_default + and prefix is not None + and prefix is not lib.no_default + ): + raise ValueError("Specified named and prefix; you can only specify one.") + + kwds["names"] = None if names is lib.no_default else names + kwds["prefix"] = None if prefix is lib.no_default else prefix + + # Alias sep -> delimiter. + if delimiter is None: + delimiter = sep + + if delim_whitespace and (delimiter is not lib.no_default): + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + + if delimiter == "\n": + raise ValueError( + r"Specified \n as separator or delimiter. This forces the python engine " + "which does not accept a line terminator. Hence it is not allowed to use " + "the line terminator as separator.", + ) + + if delimiter is lib.no_default: + # assign default separator value + kwds["delimiter"] = delim_default + else: + kwds["delimiter"] = delimiter + + if engine is not None: + kwds["engine_specified"] = True + else: + kwds["engine"] = "c" + kwds["engine_specified"] = False + + # Ensure that on_bad_lines and error_bad_lines/warn_bad_lines + # aren't specified at the same time. If so, raise. Otherwise, + # alias on_bad_lines to "error" if error/warn_bad_lines not set + # and on_bad_lines is not set. on_bad_lines is defaulted to None + # so we can tell if it is set (this is why this hack exists). + if on_bad_lines is not None: + if error_bad_lines is not None or warn_bad_lines is not None: + raise ValueError( + "Both on_bad_lines and error_bad_lines/warn_bad_lines are set. " + "Please only set on_bad_lines." + ) + if on_bad_lines == "error": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + elif on_bad_lines == "warn": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + elif on_bad_lines == "skip": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP + elif callable(on_bad_lines): + if engine != "python": + raise ValueError( + "on_bad_line can only be a callable function if engine='python'" + ) + kwds["on_bad_lines"] = on_bad_lines + else: + raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines") + else: + if error_bad_lines is not None: + # Must check is_bool, because other stuff(e.g. non-empty lists) eval to true + validate_bool_kwarg(error_bad_lines, "error_bad_lines") + if error_bad_lines: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + else: + if warn_bad_lines is not None: + # This is the case where error_bad_lines is False + # We can only warn/skip if error_bad_lines is False + # None doesn't work because backwards-compatibility reasons + validate_bool_kwarg(warn_bad_lines, "warn_bad_lines") + if warn_bad_lines: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + else: + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP + else: + # Backwards compat, when only error_bad_lines = false, we warn + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + else: + # Everything None -> Error + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + + return kwds + + +def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: + """ + Extract concrete csv dialect instance. + + Returns + ------- + csv.Dialect or None + """ + if kwds.get("dialect") is None: + return None + + dialect = kwds["dialect"] + if dialect in csv.list_dialects(): + dialect = csv.get_dialect(dialect) + + _validate_dialect(dialect) + + return dialect + + +MANDATORY_DIALECT_ATTRS = ( + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", +) + + +def _validate_dialect(dialect: csv.Dialect) -> None: + """ + Validate csv dialect instance. + + Raises + ------ + ValueError + If incorrect dialect is provided. + """ + for param in MANDATORY_DIALECT_ATTRS: + if not hasattr(dialect, param): + raise ValueError(f"Invalid dialect {dialect} provided") + + +def _merge_with_dialect_properties( + dialect: csv.Dialect, + defaults: dict[str, Any], +) -> dict[str, Any]: + """ + Merge default kwargs in TextFileReader with dialect parameters. + + Parameters + ---------- + dialect : csv.Dialect + Concrete csv dialect. See csv.Dialect documentation for more details. + defaults : dict + Keyword arguments passed to TextFileReader. + + Returns + ------- + kwds : dict + Updated keyword arguments, merged with dialect parameters. + """ + kwds = defaults.copy() + + for param in MANDATORY_DIALECT_ATTRS: + dialect_val = getattr(dialect, param) + + parser_default = parser_defaults[param] + provided = kwds.get(param, parser_default) + + # Messages for conflicting values between the dialect + # instance and the actual parameters provided. + conflict_msgs = [] + + # Don't warn if the default parameter was passed in, + # even if it conflicts with the dialect (gh-23761). + if provided != parser_default and provided != dialect_val: + msg = ( + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." + ) + + # Annoying corner case for not warning about + # conflicts between dialect and delimiter parameter. + # Refer to the outer "_read_" function for more info. + if not (param == "delimiter" and kwds.pop("sep_override", False)): + conflict_msgs.append(msg) + + if conflict_msgs: + warnings.warn( + "\n\n".join(conflict_msgs), ParserWarning, stacklevel=find_stack_level() + ) + kwds[param] = dialect_val + return kwds + + +def _validate_skipfooter(kwds: dict[str, Any]) -> None: + """ + Check whether skipfooter is compatible with other kwargs in TextFileReader. + + Parameters + ---------- + kwds : dict + Keyword arguments passed to TextFileReader. + + Raises + ------ + ValueError + If skipfooter is not compatible with other parameters. + """ + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for iteration") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py new file mode 100644 index 00000000..373d6088 --- /dev/null +++ b/pandas/io/pickle.py @@ -0,0 +1,216 @@ +""" pickle compat """ +from __future__ import annotations + +import pickle +from typing import Any +import warnings + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadPickleBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.compat import pickle_compat as pc +from pandas.util._decorators import doc + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", +) +def to_pickle( + obj: Any, + filepath_or_buffer: FilePath | WriteBuffer[bytes], + compression: CompressionOptions = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, +) -> None: + """ + Pickle (serialize) object to file. + + Parameters + ---------- + obj : any object + Any python object. + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. + + .. versionchanged:: 1.0.0 + Accept URL. URL has to be of S3 or GCS. + {compression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For Python + 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. + For Python >= 3.4, 4 is a valid value. A negative value for the + protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + {storage_options} + + .. versionadded:: 1.2.0 + + .. [1] https://docs.python.org/3/library/pickle.html + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ # noqa: E501 + if protocol < 0: + protocol = pickle.HIGHEST_PROTOCOL + + with get_handle( + filepath_or_buffer, + "wb", + compression=compression, + is_text=False, + storage_options=storage_options, + ) as handles: + if handles.compression["method"] in ("bz2", "xz") and protocol >= 5: + # some weird TypeError GH#39002 with pickle 5: fallback to letting + # pickle create the entire object and then write it to the buffer. + # "zip" would also be here if pandas.io.common._BytesZipFile + # wouldn't buffer write calls + handles.handle.write(pickle.dumps(obj, protocol=protocol)) + else: + # letting pickle write directly to the buffer is more memory-efficient + pickle.dump(obj, handles.handle, protocol=protocol) + + +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", +) +def read_pickle( + filepath_or_buffer: FilePath | ReadPickleBuffer, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +): + """ + Load pickled pandas object (or any object) from file. + + .. warning:: + + Loading pickled data received from untrusted sources can be + unsafe. See `here `__. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``readlines()`` function. + + .. versionchanged:: 1.0.0 + Accept URL. URL is not limited to S3 and GCS. + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + Returns + ------- + unpickled : same type as object stored in file + + See Also + -------- + DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. + Series.to_pickle : Pickle (serialize) Series object to file. + read_hdf : Read HDF5 file into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + read_parquet : Load a parquet object, returning a DataFrame. + + Notes + ----- + read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 + provided the object was serialized with to_pickle. + + Examples + -------- + >>> original_df = pd.DataFrame( + ... {{"foo": range(5), "bar": range(5, 10)}} + ... ) # doctest: +SKIP + >>> original_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP + >>> unpickled_df # doctest: +SKIP + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + """ + excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError) + with get_handle( + filepath_or_buffer, + "rb", + compression=compression, + is_text=False, + storage_options=storage_options, + ) as handles: + + # 1) try standard library Pickle + # 2) try pickle_compat (older pandas version) to handle subclass changes + # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError + + try: + # TypeError for Cython complaints about object.__new__ vs Tick.__new__ + try: + with warnings.catch_warnings(record=True): + # We want to silence any warnings about, e.g. moved modules. + warnings.simplefilter("ignore", Warning) + return pickle.load(handles.handle) + except excs_to_catch: + # e.g. + # "No module named 'pandas.core.sparse.series'" + # "Can't get attribute '__nat_unpickle' on %s,key->%s] [items->%s] +""" + +# formats +_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"} + +# axes map +_AXES_MAP = {DataFrame: [0]} + +# register our configuration options +dropna_doc: Final = """ +: boolean + drop ALL nan rows when appending to a table +""" +format_doc: Final = """ +: format + default format writing format, if None, then + put will default to 'fixed' and append will default to 'table' +""" + +with config.config_prefix("io.hdf"): + config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool) + config.register_option( + "default_format", + None, + format_doc, + validator=config.is_one_of_factory(["fixed", "table", None]), + ) + +# oh the troubles to reduce import time +_table_mod = None +_table_file_open_policy_is_strict = False + + +def _tables(): + global _table_mod + global _table_file_open_policy_is_strict + if _table_mod is None: + import tables + + _table_mod = tables + + # set the file open policy + # return the file open policy; this changes as of pytables 3.1 + # depending on the HDF5 version + with suppress(AttributeError): + _table_file_open_policy_is_strict = ( + tables.file._FILE_OPEN_POLICY == "strict" + ) + + return _table_mod + + +# interface to/from ### + + +def to_hdf( + path_or_buf: FilePath | HDFStore, + key: str, + value: DataFrame | Series, + mode: str = "a", + complevel: int | None = None, + complib: str | None = None, + append: bool = False, + format: str | None = None, + index: bool = True, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + dropna: bool | None = None, + data_columns: Literal[True] | list[str] | None = None, + errors: str = "strict", + encoding: str = "UTF-8", +) -> None: + """store this object, close it if we opened it""" + if append: + f = lambda store: store.append( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + else: + # NB: dropna is not passed to `put` + f = lambda store: store.put( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + errors=errors, + encoding=encoding, + dropna=dropna, + ) + + path_or_buf = stringify_path(path_or_buf) + if isinstance(path_or_buf, str): + with HDFStore( + path_or_buf, mode=mode, complevel=complevel, complib=complib + ) as store: + f(store) + else: + f(path_or_buf) + + +def read_hdf( + path_or_buf: FilePath | HDFStore, + key=None, + mode: str = "r", + errors: str = "strict", + where: str | list | None = None, + start: int | None = None, + stop: int | None = None, + columns: list[str] | None = None, + iterator: bool = False, + chunksize: int | None = None, + **kwargs, +): + """ + Read from the store, close it if we opened it. + + Retrieve pandas object stored in file, optionally based on where + criteria. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + path_or_buf : str, path object, pandas.HDFStore + Any valid string path is acceptable. Only supports the local file system, + remote URLs and file-like objects are not supported. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + Alternatively, pandas accepts an open :class:`pandas.HDFStore` object. + + key : object, optional + The group identifier in the store. Can be omitted if the HDF file + contains a single pandas object. + mode : {'r', 'r+', 'a'}, default 'r' + Mode to use when opening the file. Ignored if path_or_buf is a + :class:`pandas.HDFStore`. Default is 'r'. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + where : list, optional + A list of Term (or convertible) objects. + start : int, optional + Row number to start selection. + stop : int, optional + Row number to stop selection. + columns : list, optional + A list of columns names to return. + iterator : bool, optional + Return an iterator object. + chunksize : int, optional + Number of rows to include in an iteration when using an iterator. + **kwargs + Additional keyword arguments passed to HDFStore. + + Returns + ------- + item : object + The selected object. Return type depends on the object stored. + + See Also + -------- + DataFrame.to_hdf : Write a HDF file from a DataFrame. + HDFStore : Low-level access to HDF files. + + Examples + -------- + >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP + >>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP + >>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP + """ + if mode not in ["r", "r+", "a"]: + raise ValueError( + f"mode {mode} is not allowed while performing a read. " + f"Allowed modes are r, r+ and a." + ) + # grab the scope + if where is not None: + where = _ensure_term(where, scope_level=1) + + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise OSError("The HDFStore must be open for reading.") + + store = path_or_buf + auto_close = False + else: + path_or_buf = stringify_path(path_or_buf) + if not isinstance(path_or_buf, str): + raise NotImplementedError( + "Support for generic buffers has not been implemented." + ) + try: + exists = os.path.exists(path_or_buf) + + # if filepath is too long + except (TypeError, ValueError): + exists = False + + if not exists: + raise FileNotFoundError(f"File {path_or_buf} does not exist") + + store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs) + # can't auto open/close if we are using an iterator + # so delegate to the iterator + auto_close = True + + try: + if key is None: + groups = store.groups() + if len(groups) == 0: + raise ValueError( + "Dataset(s) incompatible with Pandas data types, " + "not table, or no datasets found in HDF5 file." + ) + candidate_only_group = groups[0] + + # For the HDF file to have only one dataset, all other groups + # should then be metadata groups for that candidate group. (This + # assumes that the groups() method enumerates parent groups + # before their children.) + for group_to_check in groups[1:]: + if not _is_metadata_of(group_to_check, candidate_only_group): + raise ValueError( + "key must be provided when HDF5 " + "file contains multiple datasets." + ) + key = candidate_only_group._v_pathname + return store.select( + key, + where=where, + start=start, + stop=stop, + columns=columns, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + except (ValueError, TypeError, KeyError): + if not isinstance(path_or_buf, HDFStore): + # if there is an error, close the store if we opened it. + with suppress(AttributeError): + store.close() + + raise + + +def _is_metadata_of(group: Node, parent_group: Node) -> bool: + """Check if a given group is a metadata group for a given parent_group.""" + if group._v_depth <= parent_group._v_depth: + return False + + current = group + while current._v_depth > 1: + parent = current._v_parent + if parent == parent_group and current._v_name == "meta": + return True + current = current._v_parent + return False + + +class HDFStore: + """ + Dict-like IO interface for storing pandas objects in PyTables. + + Either Fixed or Table format. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + path : str + File path to HDF5 file. + mode : {'a', 'w', 'r', 'r+'}, default 'a' + + ``'r'`` + Read-only; no data can be modified. + ``'w'`` + Write; a new file is created (an existing file with the same + name would be deleted). + ``'a'`` + Append; an existing file is opened for reading and writing, + and if the file does not exist it is created. + ``'r+'`` + It is similar to ``'a'``, but the file must already exist. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + fletcher32 : bool, default False + If applying compression use the fletcher32 checksum. + **kwargs + These parameters will be passed to the PyTables open_file method. + + Examples + -------- + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5') + >>> store['foo'] = bar # write to HDF5 + >>> bar = store['foo'] # retrieve + >>> store.close() + + **Create or load HDF5 file in-memory** + + When passing the `driver` option to the PyTables open_file method through + **kwargs, the HDF5 file is loaded or created in-memory and will only be + written when closed: + + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') + >>> store['foo'] = bar + >>> store.close() # only now, data is written to disk + """ + + _handle: File | None + _mode: str + _complevel: int + _fletcher32: bool + + def __init__( + self, + path, + mode: str = "a", + complevel: int | None = None, + complib=None, + fletcher32: bool = False, + **kwargs, + ) -> None: + + if "format" in kwargs: + raise ValueError("format is not a defined argument for HDFStore") + + tables = import_optional_dependency("tables") + + if complib is not None and complib not in tables.filters.all_complibs: + raise ValueError( + f"complib only supports {tables.filters.all_complibs} compression." + ) + + if complib is None and complevel is not None: + complib = tables.filters.default_complib + + self._path = stringify_path(path) + if mode is None: + mode = "a" + self._mode = mode + self._handle = None + self._complevel = complevel if complevel else 0 + self._complib = complib + self._fletcher32 = fletcher32 + self._filters = None + self.open(mode=mode, **kwargs) + + def __fspath__(self) -> str: + return self._path + + @property + def root(self): + """return the root node""" + self._check_if_open() + assert self._handle is not None # for mypy + return self._handle.root + + @property + def filename(self) -> str: + return self._path + + def __getitem__(self, key: str): + return self.get(key) + + def __setitem__(self, key: str, value) -> None: + self.put(key, value) + + def __delitem__(self, key: str) -> None: + return self.remove(key) + + def __getattr__(self, name: str): + """allow attribute access to get stores""" + try: + return self.get(name) + except (KeyError, ClosedFileError): + pass + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{name}'" + ) + + def __contains__(self, key: str) -> bool: + """ + check for existence of this key + can match the exact pathname or the pathnm w/o the leading '/' + """ + node = self.get_node(key) + if node is not None: + name = node._v_pathname + if name == key or name[1:] == key: + return True + return False + + def __len__(self) -> int: + return len(self.groups()) + + def __repr__(self) -> str: + pstr = pprint_thing(self._path) + return f"{type(self)}\nFile path: {pstr}\n" + + def __enter__(self) -> HDFStore: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + def keys(self, include: str = "pandas") -> list[str]: + """ + Return a list of keys corresponding to objects stored in HDFStore. + + Parameters + ---------- + + include : str, default 'pandas' + When kind equals 'pandas' return pandas objects. + When kind equals 'native' return native HDF5 Table objects. + + .. versionadded:: 1.1.0 + + Returns + ------- + list + List of ABSOLUTE path-names (e.g. have the leading '/'). + + Raises + ------ + raises ValueError if kind has an illegal value + """ + if include == "pandas": + return [n._v_pathname for n in self.groups()] + + elif include == "native": + assert self._handle is not None # mypy + return [ + n._v_pathname for n in self._handle.walk_nodes("/", classname="Table") + ] + raise ValueError( + f"`include` should be either 'pandas' or 'native' but is '{include}'" + ) + + def __iter__(self) -> Iterator[str]: + return iter(self.keys()) + + def items(self) -> Iterator[tuple[str, list]]: + """ + iterate on key->group + """ + for g in self.groups(): + yield g._v_pathname, g + + def iteritems(self): + """ + iterate on key->group + """ + warnings.warn( + "iteritems is deprecated and will be removed in a future version. " + "Use .items instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + yield from self.items() + + def open(self, mode: str = "a", **kwargs) -> None: + """ + Open the file in the specified mode + + Parameters + ---------- + mode : {'a', 'w', 'r', 'r+'}, default 'a' + See HDFStore docstring or tables.open_file for info about modes + **kwargs + These parameters will be passed to the PyTables open_file method. + """ + tables = _tables() + + if self._mode != mode: + # if we are changing a write mode to read, ok + if self._mode in ["a", "w"] and mode in ["r", "r+"]: + pass + elif mode in ["w"]: + # this would truncate, raise here + if self.is_open: + raise PossibleDataLossError( + f"Re-opening the file [{self._path}] with mode [{self._mode}] " + "will delete the current file!" + ) + + self._mode = mode + + # close and reopen the handle + if self.is_open: + self.close() + + if self._complevel and self._complevel > 0: + self._filters = _tables().Filters( + self._complevel, self._complib, fletcher32=self._fletcher32 + ) + + if _table_file_open_policy_is_strict and self.is_open: + msg = ( + "Cannot open HDF5 file, which is already opened, " + "even in read-only mode." + ) + raise ValueError(msg) + + self._handle = tables.open_file(self._path, self._mode, **kwargs) + + def close(self) -> None: + """ + Close the PyTables file handle + """ + if self._handle is not None: + self._handle.close() + self._handle = None + + @property + def is_open(self) -> bool: + """ + return a boolean indicating whether the file is open + """ + if self._handle is None: + return False + return bool(self._handle.isopen) + + def flush(self, fsync: bool = False) -> None: + """ + Force all buffered modifications to be written to disk. + + Parameters + ---------- + fsync : bool (default False) + call ``os.fsync()`` on the file handle to force writing to disk. + + Notes + ----- + Without ``fsync=True``, flushing may not guarantee that the OS writes + to disk. With fsync, the operation will block until the OS claims the + file has been written; however, other caching layers may still + interfere. + """ + if self._handle is not None: + self._handle.flush() + if fsync: + with suppress(OSError): + os.fsync(self._handle.fileno()) + + def get(self, key: str): + """ + Retrieve pandas object stored in file. + + Parameters + ---------- + key : str + + Returns + ------- + object + Same type as object stored in file. + """ + with patch_pickle(): + # GH#31167 Without this patch, pickle doesn't know how to unpickle + # old DateOffset objects now that they are cdef classes. + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) + + def select( + self, + key: str, + where=None, + start=None, + stop=None, + columns=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas object stored in file, optionally based on where criteria. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + key : str + Object being retrieved from file. + where : list or None + List of Term (or convertible) objects, optional. + start : int or None + Row number to start selection. + stop : int, default None + Row number to stop selection. + columns : list or None + A list of columns that if not None, will limit the return columns. + iterator : bool or False + Returns an iterator. + chunksize : int or None + Number or rows to include in iteration, return an iterator. + auto_close : bool or False + Should automatically close the store when finished. + + Returns + ------- + object + Retrieved object from file. + """ + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + # create the storer and axes + where = _ensure_term(where, scope_level=1) + s = self._create_storer(group) + s.infer_axes() + + # function to call on iteration + def func(_start, _stop, _where): + return s.read(start=_start, stop=_stop, where=_where, columns=columns) + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=s.nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result() + + def select_as_coordinates( + self, + key: str, + where=None, + start: int | None = None, + stop: int | None = None, + ): + """ + return the selection as an Index + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + + Parameters + ---------- + key : str + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + """ + where = _ensure_term(where, scope_level=1) + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_coordinates with a table") + return tbl.read_coordinates(where=where, start=start, stop=stop) + + def select_column( + self, + key: str, + column: str, + start: int | None = None, + stop: int | None = None, + ): + """ + return a single column from the table. This is generally only useful to + select an indexable + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + key : str + column : str + The column of interest. + start : int or None, default None + stop : int or None, default None + + Raises + ------ + raises KeyError if the column is not found (or key is not a valid + store) + raises ValueError if the column can not be extracted individually (it + is part of a data block) + + """ + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_column with a table") + return tbl.read_column(column=column, start=start, stop=stop) + + def select_as_multiple( + self, + keys, + where=None, + selector=None, + columns=None, + start=None, + stop=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas objects from multiple tables. + + .. warning:: + + Pandas uses PyTables for reading and writing HDF5 files, which allows + serializing object-dtype data with pickle when using the "fixed" format. + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html for more. + + Parameters + ---------- + keys : a list of the tables + selector : the table to apply the where criteria (defaults to keys[0] + if not supplied) + columns : the columns I want back + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + iterator : bool, return an iterator, default False + chunksize : nrows to include in iteration, return an iterator + auto_close : bool, default False + Should automatically close the store when finished. + + Raises + ------ + raises KeyError if keys or selector is not found or keys is empty + raises TypeError if keys is not a list or tuple + raises ValueError if the tables are not ALL THE SAME DIMENSIONS + """ + # default to single select + where = _ensure_term(where, scope_level=1) + if isinstance(keys, (list, tuple)) and len(keys) == 1: + keys = keys[0] + if isinstance(keys, str): + return self.select( + key=keys, + where=where, + columns=columns, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + if not isinstance(keys, (list, tuple)): + raise TypeError("keys must be a list/tuple") + + if not len(keys): + raise ValueError("keys must have a non-zero length") + + if selector is None: + selector = keys[0] + + # collect the tables + tbls = [self.get_storer(k) for k in keys] + s = self.get_storer(selector) + + # validate rows + nrows = None + for t, k in itertools.chain([(s, selector)], zip(tbls, keys)): + if t is None: + raise KeyError(f"Invalid table [{k}]") + if not t.is_table: + raise TypeError( + f"object [{t.pathname}] is not a table, and cannot be used in all " + "select as multiple" + ) + + if nrows is None: + nrows = t.nrows + elif t.nrows != nrows: + raise ValueError("all tables must have exactly the same nrows!") + + # The isinstance checks here are redundant with the check above, + # but necessary for mypy; see GH#29757 + _tbls = [x for x in tbls if isinstance(x, Table)] + + # axis is the concentration axes + axis = list({t.non_index_axes[0][0] for t in _tbls})[0] + + def func(_start, _stop, _where): + + # retrieve the objs, _where is always passed as a set of + # coordinates here + objs = [ + t.read(where=_where, columns=columns, start=_start, stop=_stop) + for t in tbls + ] + + # concat and return + return concat(objs, axis=axis, verify_integrity=False)._consolidate() + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result(coordinates=True) + + def put( + self, + key: str, + value: DataFrame | Series, + format=None, + index=True, + append=False, + complib=None, + complevel: int | None = None, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + data_columns: Literal[True] | list[str] | None = None, + encoding=None, + errors: str = "strict", + track_times: bool = True, + dropna: bool = False, + ) -> None: + """ + Store object in HDFStore. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'fixed(f)|table(t)', default is 'fixed' + Format to use when storing object in HDFStore. Value can be one of: + + ``'fixed'`` + Fixed format. Fast writing/reading. Not-appendable, nor searchable. + ``'table'`` + Table format. Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching / selecting + subsets of the data. + index : bool, default True + Write DataFrame index as a column. + append : bool, default False + This will force Table format, append the input data to the existing. + data_columns : list of columns or True, default None + List of columns to create as data columns, or True to use all columns. + See `here + `__. + encoding : str, default None + Provide an encoding for strings. + track_times : bool, default True + Parameter is propagated to 'create_table' method of 'PyTables'. + If set to False it enables to have the same h5 files (same hashes) + independent on creation time. + dropna : bool, default False, optional + Remove missing values. + + .. versionadded:: 1.1.0 + """ + if format is None: + format = get_option("io.hdf.default_format") or "fixed" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + encoding=encoding, + errors=errors, + track_times=track_times, + dropna=dropna, + ) + + def remove(self, key: str, where=None, start=None, stop=None) -> None: + """ + Remove pandas object partially by specifying the where condition + + Parameters + ---------- + key : str + Node to remove or delete rows from + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + + Returns + ------- + number of rows removed (or None if not a Table) + + Raises + ------ + raises KeyError if key is not a valid store + + """ + where = _ensure_term(where, scope_level=1) + try: + s = self.get_storer(key) + except KeyError: + # the key is not a valid store, re-raising KeyError + raise + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception as err: + # In tests we get here with ClosedFileError, TypeError, and + # _table_mod.NoSuchNodeError. TODO: Catch only these? + + if where is not None: + raise ValueError( + "trying to remove a node with a non-None where clause!" + ) from err + + # we are actually trying to remove a node (with children) + node = self.get_node(key) + if node is not None: + node._f_remove(recursive=True) + return None + + # remove the node + if com.all_none(where, start, stop): + s.group._f_remove(recursive=True) + + # delete from the table + else: + if not s.is_table: + raise ValueError( + "can only remove with where on objects written as tables" + ) + return s.delete(where=where, start=start, stop=stop) + + def append( + self, + key: str, + value: DataFrame | Series, + format=None, + axes=None, + index=True, + append=True, + complib=None, + complevel: int | None = None, + columns=None, + min_itemsize: int | dict[str, int] | None = None, + nan_rep=None, + chunksize=None, + expectedrows=None, + dropna: bool | None = None, + data_columns: Literal[True] | list[str] | None = None, + encoding=None, + errors: str = "strict", + ) -> None: + """ + Append to Table in file. + + Node must already exist and be Table format. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'table' is the default + Format to use when storing object in HDFStore. Value can be one of: + + ``'table'`` + Table format. Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching / selecting + subsets of the data. + index : bool, default True + Write DataFrame index as a column. + append : bool, default True + Append the input data to the existing. + data_columns : list of columns, or True, default None + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See `here + `__. + min_itemsize : dict of columns that specify minimum str sizes + nan_rep : str to use as str nan representation + chunksize : size to chunk the writing + expectedrows : expected TOTAL row size of this table + encoding : default None, provide an encoding for str + dropna : bool, default False, optional + Do not write an ALL nan row to the store settable + by the option 'io.hdf.dropna_table'. + + Notes + ----- + Does *not* check if data being appended overlaps with existing + data in the table, so be careful + """ + if columns is not None: + raise TypeError( + "columns is not a supported keyword in append, try data_columns" + ) + + if dropna is None: + dropna = get_option("io.hdf.dropna_table") + if format is None: + format = get_option("io.hdf.default_format") or "table" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + axes=axes, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + data_columns=data_columns, + encoding=encoding, + errors=errors, + ) + + def append_to_multiple( + self, + d: dict, + value, + selector, + data_columns=None, + axes=None, + dropna=False, + **kwargs, + ) -> None: + """ + Append to multiple tables + + Parameters + ---------- + d : a dict of table_name to table_columns, None is acceptable as the + values of one node (this will get all the remaining columns) + value : a pandas object + selector : a string that designates the indexable table; all of its + columns will be designed as data_columns, unless data_columns is + passed, in which case these are used + data_columns : list of columns to create as data columns, or True to + use all columns + dropna : if evaluates to True, drop rows from all tables if any single + row in each table has all NaN. Default False. + + Notes + ----- + axes parameter is currently not accepted + + """ + if axes is not None: + raise TypeError( + "axes is currently not accepted as a parameter to append_to_multiple; " + "you can create the tables independently instead" + ) + + if not isinstance(d, dict): + raise ValueError( + "append_to_multiple must have a dictionary specified as the " + "way to split the value" + ) + + if selector not in d: + raise ValueError( + "append_to_multiple requires a selector that is in passed dict" + ) + + # figure out the splitting axis (the non_index_axis) + axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0] + + # figure out how to split the value + remain_key = None + remain_values: list = [] + for k, v in d.items(): + if v is None: + if remain_key is not None: + raise ValueError( + "append_to_multiple can only have one value in d that is None" + ) + remain_key = k + else: + remain_values.extend(v) + if remain_key is not None: + ordered = value.axes[axis] + ordd = ordered.difference(Index(remain_values)) + ordd = sorted(ordered.get_indexer(ordd)) + d[remain_key] = ordered.take(ordd) + + # data_columns + if data_columns is None: + data_columns = d[selector] + + # ensure rows are synchronized across the tables + if dropna: + idxs = (value[cols].dropna(how="all").index for cols in d.values()) + valid_index = next(idxs) + for index in idxs: + valid_index = valid_index.intersection(index) + value = value.loc[valid_index] + + min_itemsize = kwargs.pop("min_itemsize", None) + + # append + for k, v in d.items(): + dc = data_columns if k == selector else None + + # compute the val + val = value.reindex(v, axis=axis) + + filtered = ( + {key: value for (key, value) in min_itemsize.items() if key in v} + if min_itemsize is not None + else None + ) + self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs) + + def create_table_index( + self, + key: str, + columns=None, + optlevel: int | None = None, + kind: str | None = None, + ) -> None: + """ + Create a pytables index on the table. + + Parameters + ---------- + key : str + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError: raises if the node is not a table + """ + # version requirements + _tables() + s = self.get_storer(key) + if s is None: + return + + if not isinstance(s, Table): + raise TypeError("cannot create table index on a Fixed format store") + s.create_index(columns=columns, optlevel=optlevel, kind=kind) + + def groups(self) -> list: + """ + Return a list of all the top-level nodes. + + Each node returned is not a pandas storage object. + + Returns + ------- + list + List of objects. + """ + _tables() + self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + return [ + g + for g in self._handle.walk_groups() + if ( + not isinstance(g, _table_mod.link.Link) + and ( + getattr(g._v_attrs, "pandas_type", None) + or getattr(g, "table", None) + or (isinstance(g, _table_mod.table.Table) and g._v_name != "table") + ) + ) + ] + + def walk(self, where: str = "/") -> Iterator[tuple[str, list[str], list[str]]]: + """ + Walk the pytables group hierarchy for pandas objects. + + This generator will yield the group path, subgroups and pandas object + names for each group. + + Any non-pandas PyTables objects that are not a group will be ignored. + + The `where` group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. + + Parameters + ---------- + where : str, default "/" + Group where to start walking. + + Yields + ------ + path : str + Full path to a group (without trailing '/'). + groups : list + Names (strings) of the groups contained in `path`. + leaves : list + Names (strings) of the pandas objects contained in `path`. + """ + _tables() + self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + + for g in self._handle.walk_groups(where): + if getattr(g._v_attrs, "pandas_type", None) is not None: + continue + + groups = [] + leaves = [] + for child in g._v_children.values(): + pandas_type = getattr(child._v_attrs, "pandas_type", None) + if pandas_type is None: + if isinstance(child, _table_mod.group.Group): + groups.append(child._v_name) + else: + leaves.append(child._v_name) + + yield (g._v_pathname.rstrip("/"), groups, leaves) + + def get_node(self, key: str) -> Node | None: + """return the node with the key or None if it does not exist""" + self._check_if_open() + if not key.startswith("/"): + key = "/" + key + + assert self._handle is not None + assert _table_mod is not None # for mypy + try: + node = self._handle.get_node(self.root, key) + except _table_mod.exceptions.NoSuchNodeError: + return None + + assert isinstance(node, _table_mod.Node), type(node) + return node + + def get_storer(self, key: str) -> GenericFixed | Table: + """return the storer object for a key, raise if not in the file""" + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + s = self._create_storer(group) + s.infer_axes() + return s + + def copy( + self, + file, + mode="w", + propindexes: bool = True, + keys=None, + complib=None, + complevel: int | None = None, + fletcher32: bool = False, + overwrite=True, + ) -> HDFStore: + """ + Copy the existing store to a new file, updating in place. + + Parameters + ---------- + propindexes : bool, default True + Restore indexes in copied file. + keys : list, optional + List of keys to include in the copy (defaults to all). + overwrite : bool, default True + Whether to overwrite (remove and replace) existing nodes in the new store. + mode, complib, complevel, fletcher32 same as in HDFStore.__init__ + + Returns + ------- + open file handle of the new store + """ + new_store = HDFStore( + file, mode=mode, complib=complib, complevel=complevel, fletcher32=fletcher32 + ) + if keys is None: + keys = list(self.keys()) + if not isinstance(keys, (tuple, list)): + keys = [keys] + for k in keys: + s = self.get_storer(k) + if s is not None: + + if k in new_store: + if overwrite: + new_store.remove(k) + + data = self.select(k) + if isinstance(s, Table): + + index: bool | list[str] = False + if propindexes: + index = [a.name for a in s.axes if a.is_indexed] + new_store.append( + k, + data, + index=index, + data_columns=getattr(s, "data_columns", None), + encoding=s.encoding, + ) + else: + new_store.put(k, data, encoding=s.encoding) + + return new_store + + def info(self) -> str: + """ + Print detailed information on the store. + + Returns + ------- + str + """ + path = pprint_thing(self._path) + output = f"{type(self)}\nFile path: {path}\n" + + if self.is_open: + lkeys = sorted(self.keys()) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append(pprint_thing(s or "invalid_HDFStore node")) + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception as detail: + keys.append(k) + dstr = pprint_thing(detail) + values.append(f"[invalid_HDFStore node: {dstr}]") + + output += adjoin(12, keys, values) + else: + output += "Empty" + else: + output += "File is CLOSED" + + return output + + # ------------------------------------------------------------------------ + # private methods + + def _check_if_open(self): + if not self.is_open: + raise ClosedFileError(f"{self._path} file is not open!") + + def _validate_format(self, format: str) -> str: + """validate / deprecate formats""" + # validate + try: + format = _FORMAT_MAP[format.lower()] + except KeyError as err: + raise TypeError(f"invalid HDFStore format specified [{format}]") from err + + return format + + def _create_storer( + self, + group, + format=None, + value: DataFrame | Series | None = None, + encoding: str = "UTF-8", + errors: str = "strict", + ) -> GenericFixed | Table: + """return a suitable class to operate""" + cls: type[GenericFixed] | type[Table] + + if value is not None and not isinstance(value, (Series, DataFrame)): + raise TypeError("value must be None, Series, or DataFrame") + + def error(t): + # return instead of raising so mypy can tell where we are raising + return TypeError( + f"cannot properly create the storer for: [{t}] [group->" + f"{group},value->{type(value)},format->{format}" + ) + + pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None)) + tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None)) + + # infer the pt from the passed value + if pt is None: + if value is None: + _tables() + assert _table_mod is not None # for mypy + if getattr(group, "table", None) or isinstance( + group, _table_mod.table.Table + ): + pt = "frame_table" + tt = "generic_table" + else: + raise TypeError( + "cannot create a storer if the object is not existing " + "nor a value are passed" + ) + else: + if isinstance(value, Series): + pt = "series" + else: + pt = "frame" + + # we are actually a table + if format == "table": + pt += "_table" + + # a storer node + if "table" not in pt: + _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} + try: + cls = _STORER_MAP[pt] + except KeyError as err: + raise error("_STORER_MAP") from err + return cls(self, group, encoding=encoding, errors=errors) + + # existing node (and must be a table) + if tt is None: + # if we are a writer, determine the tt + if value is not None: + if pt == "series_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_series" + elif index.nlevels > 1: + tt = "appendable_multiseries" + elif pt == "frame_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_frame" + elif index.nlevels > 1: + tt = "appendable_multiframe" + + _TABLE_MAP = { + "generic_table": GenericTable, + "appendable_series": AppendableSeriesTable, + "appendable_multiseries": AppendableMultiSeriesTable, + "appendable_frame": AppendableFrameTable, + "appendable_multiframe": AppendableMultiFrameTable, + "worm": WORMTable, + } + try: + cls = _TABLE_MAP[tt] + except KeyError as err: + raise error("_TABLE_MAP") from err + + return cls(self, group, encoding=encoding, errors=errors) + + def _write_to_group( + self, + key: str, + value: DataFrame | Series, + format, + axes=None, + index=True, + append=False, + complib=None, + complevel: int | None = None, + fletcher32=None, + min_itemsize: int | dict[str, int] | None = None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, + encoding=None, + errors: str = "strict", + track_times: bool = True, + ) -> None: + # we don't want to store a table node at all if our object is 0-len + # as there are not dtypes + if getattr(value, "empty", None) and (format == "table" or append): + return + + group = self._identify_group(key, append) + + s = self._create_storer(group, format, value, encoding=encoding, errors=errors) + if append: + # raise if we are trying to append to a Fixed format, + # or a table that exists (and we are putting) + if not s.is_table or (s.is_table and format == "fixed" and s.is_exists): + raise ValueError("Can only append to Tables") + if not s.is_exists: + s.set_object_info() + else: + s.set_object_info() + + if not s.is_table and complib: + raise ValueError("Compression not supported on Fixed format stores") + + # write the object + s.write( + obj=value, + axes=axes, + append=append, + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + min_itemsize=min_itemsize, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + nan_rep=nan_rep, + data_columns=data_columns, + track_times=track_times, + ) + + if isinstance(s, Table) and index: + s.create_index(columns=index) + + def _read_group(self, group: Node): + s = self._create_storer(group) + s.infer_axes() + return s.read() + + def _identify_group(self, key: str, append: bool) -> Node: + """Identify HDF5 group based on key, delete/create group if needed.""" + group = self.get_node(key) + + # we make this assertion for mypy; the get_node call will already + # have raised if this is incorrect + assert self._handle is not None + + # remove the node if we are not appending + if group is not None and not append: + self._handle.remove_node(group, recursive=True) + group = None + + if group is None: + group = self._create_nodes_and_group(key) + + return group + + def _create_nodes_and_group(self, key: str) -> Node: + """Create nodes from key and return group name.""" + # assertion for mypy + assert self._handle is not None + + paths = key.split("/") + # recursively create the groups + path = "/" + for p in paths: + if not len(p): + continue + new_path = path + if not path.endswith("/"): + new_path += "/" + new_path += p + group = self.get_node(new_path) + if group is None: + group = self._handle.create_group(path, p) + path = new_path + return group + + +class TableIterator: + """ + Define the iteration interface on a table + + Parameters + ---------- + store : HDFStore + s : the referred storer + func : the function to execute the query + where : the where of the query + nrows : the rows to iterate on + start : the passed start value (default is None) + stop : the passed stop value (default is None) + iterator : bool, default False + Whether to use the default iterator. + chunksize : the passed chunking value (default is 100000) + auto_close : bool, default False + Whether to automatically close the store at the end of iteration. + """ + + chunksize: int | None + store: HDFStore + s: GenericFixed | Table + + def __init__( + self, + store: HDFStore, + s: GenericFixed | Table, + func, + where, + nrows, + start=None, + stop=None, + iterator: bool = False, + chunksize: int | None = None, + auto_close: bool = False, + ) -> None: + self.store = store + self.s = s + self.func = func + self.where = where + + # set start/stop if they are not set if we are a table + if self.s.is_table: + if nrows is None: + nrows = 0 + if start is None: + start = 0 + if stop is None: + stop = nrows + stop = min(nrows, stop) + + self.nrows = nrows + self.start = start + self.stop = stop + + self.coordinates = None + if iterator or chunksize is not None: + if chunksize is None: + chunksize = 100000 + self.chunksize = int(chunksize) + else: + self.chunksize = None + + self.auto_close = auto_close + + def __iter__(self): + # iterate + current = self.start + if self.coordinates is None: + raise ValueError("Cannot iterate until get_result is called.") + while current < self.stop: + stop = min(current + self.chunksize, self.stop) + value = self.func(None, None, self.coordinates[current:stop]) + current = stop + if value is None or not len(value): + continue + + yield value + + self.close() + + def close(self) -> None: + if self.auto_close: + self.store.close() + + def get_result(self, coordinates: bool = False): + # return the actual iterator + if self.chunksize is not None: + if not isinstance(self.s, Table): + raise TypeError("can only use an iterator or chunksize on a table") + + self.coordinates = self.s.read_coordinates(where=self.where) + + return self + + # if specified read via coordinates (necessary for multiple selections + if coordinates: + if not isinstance(self.s, Table): + raise TypeError("can only read_coordinates on a table") + where = self.s.read_coordinates( + where=self.where, start=self.start, stop=self.stop + ) + else: + where = self.where + + # directly return the result + results = self.func(self.start, self.stop, where) + self.close() + return results + + +class IndexCol: + """ + an index column description class + + Parameters + ---------- + axis : axis which I reference + values : the ndarray like converted values + kind : a string description of this type + typ : the pytables type + pos : the position in the pytables + + """ + + is_an_indexable: bool = True + is_data_indexable: bool = True + _info_fields = ["freq", "tz", "index_name"] + + name: str + cname: str + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname: str | None = None, + axis=None, + pos=None, + freq=None, + tz=None, + index_name=None, + ordered=None, + table=None, + meta=None, + metadata=None, + ) -> None: + + if not isinstance(name, str): + raise ValueError("`name` must be a str.") + + self.values = values + self.kind = kind + self.typ = typ + self.name = name + self.cname = cname or name + self.axis = axis + self.pos = pos + self.freq = freq + self.tz = tz + self.index_name = index_name + self.ordered = ordered + self.table = table + self.meta = meta + self.metadata = metadata + + if pos is not None: + self.set_pos(pos) + + # These are ensured as long as the passed arguments match the + # constructor annotations. + assert isinstance(self.name, str) + assert isinstance(self.cname, str) + + @property + def itemsize(self) -> int: + # Assumes self.typ has already been initialized + return self.typ.itemsize + + @property + def kind_attr(self) -> str: + return f"{self.name}_kind" + + def set_pos(self, pos: int) -> None: + """set the position of this column in the Table""" + self.pos = pos + if pos is not None and self.typ is not None: + self.typ._v_pos = pos + + def __repr__(self) -> str: + temp = tuple( + map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind)) + ) + return ",".join( + [ + f"{key}->{value}" + for key, value in zip(["name", "cname", "axis", "pos", "kind"], temp) + ] + ) + + def __eq__(self, other: Any) -> bool: + """compare 2 col items""" + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "axis", "pos"] + ) + + def __ne__(self, other) -> bool: + return not self.__eq__(other) + + @property + def is_indexed(self) -> bool: + """return whether I am an indexed column""" + if not hasattr(self.table, "cols"): + # e.g. if infer hasn't been called yet, self.table will be None. + return False + return getattr(self.table.cols, self.cname).is_indexed + + def convert( + self, values: np.ndarray, nan_rep, encoding: str, errors: str + ) -> tuple[np.ndarray, np.ndarray] | tuple[DatetimeIndex, DatetimeIndex]: + """ + Convert the data from this selection to the appropriate pandas type. + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + val_kind = _ensure_decoded(self.kind) + values = _maybe_convert(values, val_kind, encoding, errors) + + kwargs = {} + kwargs["name"] = _ensure_decoded(self.index_name) + + if self.freq is not None: + kwargs["freq"] = _ensure_decoded(self.freq) + + factory: type[Index] | type[DatetimeIndex] = Index + if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype): + factory = DatetimeIndex + elif values.dtype == "i8" and "freq" in kwargs: + # PeriodIndex data is stored as i8 + # error: Incompatible types in assignment (expression has type + # "Callable[[Any, KwArg(Any)], PeriodIndex]", variable has type + # "Union[Type[Index], Type[DatetimeIndex]]") + factory = lambda x, **kwds: PeriodIndex( # type: ignore[assignment] + ordinal=x, **kwds + ) + + # making an Index instance could throw a number of different errors + try: + new_pd_index = factory(values, **kwargs) + except ValueError: + # if the output freq is different that what we recorded, + # it should be None (see also 'doc example part 2') + if "freq" in kwargs: + kwargs["freq"] = None + new_pd_index = factory(values, **kwargs) + final_pd_index = _set_tz(new_pd_index, self.tz) + return final_pd_index, final_pd_index + + def take_data(self): + """return the values""" + return self.values + + @property + def attrs(self): + return self.table._v_attrs + + @property + def description(self): + return self.table.description + + @property + def col(self): + """return my current col description""" + return getattr(self.description, self.cname, None) + + @property + def cvalues(self): + """return my cython values""" + return self.values + + def __iter__(self): + return iter(self.values) + + def maybe_set_size(self, min_itemsize=None) -> None: + """ + maybe set a string col itemsize: + min_itemsize can be an integer or a dict with this columns name + with an integer size + """ + if _ensure_decoded(self.kind) == "string": + if isinstance(min_itemsize, dict): + min_itemsize = min_itemsize.get(self.name) + + if min_itemsize is not None and self.typ.itemsize < min_itemsize: + self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos) + + def validate_names(self) -> None: + pass + + def validate_and_set(self, handler: AppendableTable, append: bool) -> None: + self.table = handler.table + self.validate_col() + self.validate_attr(append) + self.validate_metadata(handler) + self.write_metadata(handler) + self.set_attr() + + def validate_col(self, itemsize=None): + """validate this column: return the compared against itemsize""" + # validate this column for string truncation (or reset to the max size) + if _ensure_decoded(self.kind) == "string": + c = self.col + if c is not None: + if itemsize is None: + itemsize = self.itemsize + if c.itemsize < itemsize: + raise ValueError( + f"Trying to store a string with len [{itemsize}] in " + f"[{self.cname}] column but\nthis column has a limit of " + f"[{c.itemsize}]!\nConsider using min_itemsize to " + "preset the sizes on these columns" + ) + return c.itemsize + + return None + + def validate_attr(self, append: bool) -> None: + # check for backwards incompatibility + if append: + existing_kind = getattr(self.attrs, self.kind_attr, None) + if existing_kind is not None and existing_kind != self.kind: + raise TypeError( + f"incompatible kind in col [{existing_kind} - {self.kind}]" + ) + + def update_info(self, info) -> None: + """ + set/update the info for this indexable with the key/value + if there is a conflict raise/warn as needed + """ + for key in self._info_fields: + + value = getattr(self, key, None) + idx = info.setdefault(self.name, {}) + + existing_value = idx.get(key) + if key in idx and value is not None and existing_value != value: + # frequency/name just warn + if key in ["freq", "index_name"]: + ws = attribute_conflict_doc % (key, existing_value, value) + warnings.warn( + ws, AttributeConflictWarning, stacklevel=find_stack_level() + ) + + # reset + idx[key] = None + setattr(self, key, None) + + else: + raise ValueError( + f"invalid info for [{self.name}] for [{key}], " + f"existing_value [{existing_value}] conflicts with " + f"new value [{value}]" + ) + else: + if value is not None or existing_value is not None: + idx[key] = value + + def set_info(self, info) -> None: + """set my state from the passed info""" + idx = info.get(self.name) + if idx is not None: + self.__dict__.update(idx) + + def set_attr(self) -> None: + """set the kind for this column""" + setattr(self.attrs, self.kind_attr, self.kind) + + def validate_metadata(self, handler: AppendableTable) -> None: + """validate that kind=category does not change the categories""" + if self.meta == "category": + new_metadata = self.metadata + cur_metadata = handler.read_metadata(self.cname) + if ( + new_metadata is not None + and cur_metadata is not None + and not array_equivalent(new_metadata, cur_metadata) + ): + raise ValueError( + "cannot append a categorical with " + "different categories to the existing" + ) + + def write_metadata(self, handler: AppendableTable) -> None: + """set the meta data""" + if self.metadata is not None: + handler.write_metadata(self.cname, self.metadata) + + +class GenericIndexCol(IndexCol): + """an index which is not represented in the data of the table""" + + @property + def is_indexed(self) -> bool: + return False + + # error: Return type "Tuple[Int64Index, Int64Index]" of "convert" + # incompatible with return type "Union[Tuple[ndarray[Any, Any], + # ndarray[Any, Any]], Tuple[DatetimeIndex, DatetimeIndex]]" in + # supertype "IndexCol" + def convert( # type: ignore[override] + self, values: np.ndarray, nan_rep, encoding: str, errors: str + ) -> tuple[Int64Index, Int64Index]: + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : str + encoding : str + errors : str + """ + assert isinstance(values, np.ndarray), type(values) + + index = Int64Index(np.arange(len(values))) + return index, index + + def set_attr(self) -> None: + pass + + +class DataCol(IndexCol): + """ + a data holding column, by definition this is not indexable + + Parameters + ---------- + data : the actual data + cname : the column name in the table to hold the data (typically + values) + meta : a string description of the metadata + metadata : the actual metadata + """ + + is_an_indexable = False + is_data_indexable = False + _info_fields = ["tz", "ordered"] + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname=None, + pos=None, + tz=None, + ordered=None, + table=None, + meta=None, + metadata=None, + dtype: DtypeArg | None = None, + data=None, + ) -> None: + super().__init__( + name=name, + values=values, + kind=kind, + typ=typ, + pos=pos, + cname=cname, + tz=tz, + ordered=ordered, + table=table, + meta=meta, + metadata=metadata, + ) + self.dtype = dtype + self.data = data + + @property + def dtype_attr(self) -> str: + return f"{self.name}_dtype" + + @property + def meta_attr(self) -> str: + return f"{self.name}_meta" + + def __repr__(self) -> str: + temp = tuple( + map( + pprint_thing, (self.name, self.cname, self.dtype, self.kind, self.shape) + ) + ) + return ",".join( + [ + f"{key}->{value}" + for key, value in zip(["name", "cname", "dtype", "kind", "shape"], temp) + ] + ) + + def __eq__(self, other: Any) -> bool: + """compare 2 col items""" + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "dtype", "pos"] + ) + + def set_data(self, data: ArrayLike) -> None: + assert data is not None + assert self.dtype is None + + data, dtype_name = _get_data_and_dtype_name(data) + + self.data = data + self.dtype = dtype_name + self.kind = _dtype_to_kind(dtype_name) + + def take_data(self): + """return the data""" + return self.data + + @classmethod + def _get_atom(cls, values: ArrayLike) -> Col: + """ + Get an appropriately typed and shaped pytables.Col object for values. + """ + dtype = values.dtype + # error: Item "ExtensionDtype" of "Union[ExtensionDtype, dtype[Any]]" has no + # attribute "itemsize" + itemsize = dtype.itemsize # type: ignore[union-attr] + + shape = values.shape + if values.ndim == 1: + # EA, use block shape pretending it is 2D + # TODO(EA2D): not necessary with 2D EAs + shape = (1, values.size) + + if isinstance(values, Categorical): + codes = values.codes + atom = cls.get_atom_data(shape, kind=codes.dtype.name) + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + atom = cls.get_atom_datetime64(shape) + elif is_timedelta64_dtype(dtype): + atom = cls.get_atom_timedelta64(shape) + elif is_complex_dtype(dtype): + atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0]) + elif is_string_dtype(dtype): + atom = cls.get_atom_string(shape, itemsize) + else: + atom = cls.get_atom_data(shape, kind=dtype.name) + + return atom + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize, shape=shape[0]) + + @classmethod + def get_atom_coltype(cls, kind: str) -> type[Col]: + """return the PyTables column class for this column""" + if kind.startswith("uint"): + k4 = kind[4:] + col_name = f"UInt{k4}Col" + elif kind.startswith("period"): + # we store as integer + col_name = "Int64Col" + else: + kcap = kind.capitalize() + col_name = f"{kcap}Col" + + return getattr(_tables(), col_name) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> Col: + return cls.get_atom_coltype(kind=kind)(shape=shape[0]) + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @property + def shape(self): + return getattr(self.data, "shape", None) + + @property + def cvalues(self): + """return my cython values""" + return self.data + + def validate_attr(self, append) -> None: + """validate that we have the same order as the existing & same dtype""" + if append: + existing_fields = getattr(self.attrs, self.kind_attr, None) + if existing_fields is not None and existing_fields != list(self.values): + raise ValueError("appended items do not match existing items in table!") + + existing_dtype = getattr(self.attrs, self.dtype_attr, None) + if existing_dtype is not None and existing_dtype != self.dtype: + raise ValueError( + "appended items dtype do not match existing items dtype in table!" + ) + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : + encoding : str + errors : str + + Returns + ------- + index : listlike to become an Index + data : ndarraylike to become a column + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + assert self.typ is not None + if self.dtype is None: + # Note: in tests we never have timedelta64 or datetime64, + # so the _get_data_and_dtype_name may be unnecessary + converted, dtype_name = _get_data_and_dtype_name(values) + kind = _dtype_to_kind(dtype_name) + else: + converted = values + dtype_name = self.dtype + kind = self.kind + + assert isinstance(converted, np.ndarray) # for mypy + + # use the meta if needed + meta = _ensure_decoded(self.meta) + metadata = self.metadata + ordered = self.ordered + tz = self.tz + + assert dtype_name is not None + # convert to the correct dtype + dtype = _ensure_decoded(dtype_name) + + # reverse converts + if dtype == "datetime64": + # recreate with tz if indicated + converted = _set_tz(converted, tz, coerce=True) + + elif dtype == "timedelta64": + converted = np.asarray(converted, dtype="m8[ns]") + elif dtype == "date": + try: + converted = np.asarray( + [date.fromordinal(v) for v in converted], dtype=object + ) + except ValueError: + converted = np.asarray( + [date.fromtimestamp(v) for v in converted], dtype=object + ) + + elif meta == "category": + # we have a categorical + categories = metadata + codes = converted.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + if categories is None: + # Handle case of NaN-only categorical columns in which case + # the categories are an empty array; when this is stored, + # pytables cannot write a zero-len array, so on readback + # the categories would be None and `read_hdf()` would fail. + categories = Index([], dtype=np.float64) + else: + mask = isna(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum()._values + + converted = Categorical.from_codes( + codes, categories=categories, ordered=ordered + ) + + else: + + try: + converted = converted.astype(dtype, copy=False) + except TypeError: + converted = converted.astype("O", copy=False) + + # convert nans / decode + if _ensure_decoded(kind) == "string": + converted = _unconvert_string_array( + converted, nan_rep=nan_rep, encoding=encoding, errors=errors + ) + + return self.values, converted + + def set_attr(self) -> None: + """set the data for this column""" + setattr(self.attrs, self.kind_attr, self.values) + setattr(self.attrs, self.meta_attr, self.meta) + assert self.dtype is not None + setattr(self.attrs, self.dtype_attr, self.dtype) + + +class DataIndexableCol(DataCol): + """represent a data column that can be indexed""" + + is_data_indexable = True + + def validate_names(self) -> None: + if not Index(self.values).is_object(): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> Col: + return cls.get_atom_coltype(kind=kind)() + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col() + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col() + + +class GenericDataIndexableCol(DataIndexableCol): + """represent a generic pytables data column""" + + pass + + +class Fixed: + """ + represent an object in my store + facilitate read/write of various types of objects + this is an abstract base class + + Parameters + ---------- + parent : HDFStore + group : Node + The group node where the table resides. + """ + + pandas_kind: str + format_type: str = "fixed" # GH#30962 needed by dask + obj_type: type[DataFrame | Series] + ndim: int + encoding: str + parent: HDFStore + group: Node + errors: str + is_table: bool = False + + def __init__( + self, + parent: HDFStore, + group: Node, + encoding: str = "UTF-8", + errors: str = "strict", + ) -> None: + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) + self.parent = parent + self.group = group + self.encoding = _ensure_encoding(encoding) + self.errors = errors + + @property + def is_old_version(self) -> bool: + return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 + + @property + def version(self) -> tuple[int, int, int]: + """compute and set our version""" + version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None)) + try: + version = tuple(int(x) for x in version.split(".")) + if len(version) == 2: + version = version + (0,) + except AttributeError: + version = (0, 0, 0) + return version + + @property + def pandas_type(self): + return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None)) + + def __repr__(self) -> str: + """return a pretty representation of myself""" + self.infer_axes() + s = self.shape + if s is not None: + if isinstance(s, (list, tuple)): + jshape = ",".join([pprint_thing(x) for x in s]) + s = f"[{jshape}]" + return f"{self.pandas_type:12.12} (shape->{s})" + return self.pandas_type + + def set_object_info(self) -> None: + """set my pandas type & version""" + self.attrs.pandas_type = str(self.pandas_kind) + self.attrs.pandas_version = str(_version) + + def copy(self) -> Fixed: + new_self = copy.copy(self) + return new_self + + @property + def shape(self): + return self.nrows + + @property + def pathname(self): + return self.group._v_pathname + + @property + def _handle(self): + return self.parent._handle + + @property + def _filters(self): + return self.parent._filters + + @property + def _complevel(self) -> int: + return self.parent._complevel + + @property + def _fletcher32(self) -> bool: + return self.parent._fletcher32 + + @property + def attrs(self): + return self.group._v_attrs + + def set_attrs(self) -> None: + """set our object attributes""" + pass + + def get_attrs(self) -> None: + """get our object attributes""" + pass + + @property + def storable(self): + """return my storable""" + return self.group + + @property + def is_exists(self) -> bool: + return False + + @property + def nrows(self): + return getattr(self.storable, "nrows", None) + + def validate(self, other) -> Literal[True] | None: + """validate against an existing storable""" + if other is None: + return None + return True + + def validate_version(self, where=None) -> None: + """are we trying to operate on an old version?""" + pass + + def infer_axes(self) -> bool: + """ + infer the axes of my storer + return a boolean indicating if we have a valid storer or not + """ + s = self.storable + if s is None: + return False + self.get_attrs() + return True + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + raise NotImplementedError( + "cannot read on an abstract storer: subclasses should implement" + ) + + def write(self, **kwargs): + raise NotImplementedError( + "cannot write on an abstract storer: subclasses should implement" + ) + + def delete( + self, where=None, start: int | None = None, stop: int | None = None + ) -> None: + """ + support fully deleting the node in its entirety (only) - where + specification must be None + """ + if com.all_none(where, start, stop): + self._handle.remove_node(self.group, recursive=True) + return None + + raise TypeError("cannot delete on an abstract storer") + + +class GenericFixed(Fixed): + """a generified fixed version""" + + _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} + _reverse_index_map = {v: k for k, v in _index_type_map.items()} + attributes: list[str] = [] + + # indexer helpers + def _class_to_alias(self, cls) -> str: + return self._index_type_map.get(cls, "") + + def _alias_to_class(self, alias): + if isinstance(alias, type): # pragma: no cover + # compat: for a short period of time master stored types + return alias + return self._reverse_index_map.get(alias, Index) + + def _get_index_factory(self, attrs): + index_class = self._alias_to_class( + _ensure_decoded(getattr(attrs, "index_class", "")) + ) + + factory: Callable + + if index_class == DatetimeIndex: + + def f(values, freq=None, tz=None): + # data are already in UTC, localize and convert if tz present + dta = DatetimeArray._simple_new(values.values, freq=freq) + result = DatetimeIndex._simple_new(dta, name=None) + if tz is not None: + result = result.tz_localize("UTC").tz_convert(tz) + return result + + factory = f + elif index_class == PeriodIndex: + + def f(values, freq=None, tz=None): + parr = PeriodArray._simple_new(values, freq=freq) + return PeriodIndex._simple_new(parr, name=None) + + factory = f + else: + factory = index_class + + kwargs = {} + if "freq" in attrs: + kwargs["freq"] = attrs["freq"] + if index_class is Index: + # DTI/PI would be gotten by _alias_to_class + factory = TimedeltaIndex + + if "tz" in attrs: + if isinstance(attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = attrs["tz"] + assert index_class is DatetimeIndex # just checking + + return factory, kwargs + + def validate_read(self, columns, where) -> None: + """ + raise if any keywords are passed which are not-None + """ + if columns is not None: + raise TypeError( + "cannot pass a column specification when reading " + "a Fixed format store. this store must be selected in its entirety" + ) + if where is not None: + raise TypeError( + "cannot pass a where specification when reading " + "from a Fixed format store. this store must be selected in its entirety" + ) + + @property + def is_exists(self) -> bool: + return True + + def set_attrs(self) -> None: + """set our object attributes""" + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + + def get_attrs(self) -> None: + """retrieve our attributes""" + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + for n in self.attributes: + setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None))) + + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] + self.set_attrs() + + def read_array(self, key: str, start: int | None = None, stop: int | None = None): + """read an array for the specified node (off of group""" + import tables + + node = getattr(self.group, key) + attrs = node._v_attrs + + transposed = getattr(attrs, "transposed", False) + + if isinstance(node, tables.VLArray): + ret = node[0][start:stop] + else: + dtype = _ensure_decoded(getattr(attrs, "value_type", None)) + shape = getattr(attrs, "shape", None) + + if shape is not None: + # length 0 axis + ret = np.empty(shape, dtype=dtype) + else: + ret = node[start:stop] + + if dtype == "datetime64": + # reconstruct a timezone if indicated + tz = getattr(attrs, "tz", None) + ret = _set_tz(ret, tz, coerce=True) + + elif dtype == "timedelta64": + ret = np.asarray(ret, dtype="m8[ns]") + + if transposed: + return ret.T + else: + return ret + + def read_index( + self, key: str, start: int | None = None, stop: int | None = None + ) -> Index: + variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) + + if variety == "multi": + return self.read_multi_index(key, start=start, stop=stop) + elif variety == "regular": + node = getattr(self.group, key) + index = self.read_index_node(node, start=start, stop=stop) + return index + else: # pragma: no cover + raise TypeError(f"unrecognized index variety: {variety}") + + def write_index(self, key: str, index: Index) -> None: + if isinstance(index, MultiIndex): + setattr(self.attrs, f"{key}_variety", "multi") + self.write_multi_index(key, index) + else: + setattr(self.attrs, f"{key}_variety", "regular") + converted = _convert_index("index", index, self.encoding, self.errors) + + self.write_array(key, converted.values) + + node = getattr(self.group, key) + node._v_attrs.kind = converted.kind + node._v_attrs.name = index.name + + if isinstance(index, (DatetimeIndex, PeriodIndex)): + node._v_attrs.index_class = self._class_to_alias(type(index)) + + if isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + node._v_attrs.freq = index.freq + + if isinstance(index, DatetimeIndex) and index.tz is not None: + node._v_attrs.tz = _get_tz(index.tz) + + def write_multi_index(self, key: str, index: MultiIndex) -> None: + setattr(self.attrs, f"{key}_nlevels", index.nlevels) + + for i, (lev, level_codes, name) in enumerate( + zip(index.levels, index.codes, index.names) + ): + # write the level + if is_extension_array_dtype(lev): + raise NotImplementedError( + "Saving a MultiIndex with an extension dtype is not supported." + ) + level_key = f"{key}_level{i}" + conv_level = _convert_index(level_key, lev, self.encoding, self.errors) + self.write_array(level_key, conv_level.values) + node = getattr(self.group, level_key) + node._v_attrs.kind = conv_level.kind + node._v_attrs.name = name + + # write the name + setattr(node._v_attrs, f"{key}_name{name}", name) + + # write the labels + label_key = f"{key}_label{i}" + self.write_array(label_key, level_codes) + + def read_multi_index( + self, key: str, start: int | None = None, stop: int | None = None + ) -> MultiIndex: + nlevels = getattr(self.attrs, f"{key}_nlevels") + + levels = [] + codes = [] + names: list[Hashable] = [] + for i in range(nlevels): + level_key = f"{key}_level{i}" + node = getattr(self.group, level_key) + lev = self.read_index_node(node, start=start, stop=stop) + levels.append(lev) + names.append(lev.name) + + label_key = f"{key}_label{i}" + level_codes = self.read_array(label_key, start=start, stop=stop) + codes.append(level_codes) + + return MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=True + ) + + def read_index_node( + self, node: Node, start: int | None = None, stop: int | None = None + ) -> Index: + data = node[start:stop] + # If the index was an empty array write_array_empty() will + # have written a sentinel. Here we replace it with the original. + if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0: + data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type) + kind = _ensure_decoded(node._v_attrs.kind) + name = None + + if "name" in node._v_attrs: + name = _ensure_str(node._v_attrs.name) + name = _ensure_decoded(name) + + attrs = node._v_attrs + factory, kwargs = self._get_index_factory(attrs) + + if kind == "date": + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + dtype=object, + **kwargs, + ) + else: + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + **kwargs, + ) + + index.name = name + + return index + + def write_array_empty(self, key: str, value: ArrayLike) -> None: + """write a 0-len array""" + # ugly hack for length 0 axes + arr = np.empty((1,) * value.ndim) + self._handle.create_array(self.group, key, arr) + node = getattr(self.group, key) + node._v_attrs.value_type = str(value.dtype) + node._v_attrs.shape = value.shape + + def write_array( + self, key: str, obj: AnyArrayLike, items: Index | None = None + ) -> None: + # TODO: we only have a few tests that get here, the only EA + # that gets passed is DatetimeArray, and we never have + # both self._filters and EA + + value = extract_array(obj, extract_numpy=True) + + if key in self.group: + self._handle.remove_node(self.group, key) + + # Transform needed to interface with pytables row/col notation + empty_array = value.size == 0 + transposed = False + + if is_categorical_dtype(value.dtype): + raise NotImplementedError( + "Cannot store a category dtype in a HDF5 dataset that uses format=" + '"fixed". Use format="table".' + ) + if not empty_array: + if hasattr(value, "T"): + # ExtensionArrays (1d) may not have transpose. + value = value.T + transposed = True + + atom = None + if self._filters is not None: + with suppress(ValueError): + # get the atom for this datatype + atom = _tables().Atom.from_dtype(value.dtype) + + if atom is not None: + # We only get here if self._filters is non-None and + # the Atom.from_dtype call succeeded + + # create an empty chunked array and fill it from value + if not empty_array: + ca = self._handle.create_carray( + self.group, key, atom, value.shape, filters=self._filters + ) + ca[:] = value + + else: + self.write_array_empty(key, value) + + elif value.dtype.type == np.object_: + # infer the type, warn if we have a non-string type here (for + # performance) + inferred_type = lib.infer_dtype(value, skipna=False) + if empty_array: + pass + elif inferred_type == "string": + pass + else: + ws = performance_doc % (inferred_type, key, items) + warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level()) + + vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) + vlarr.append(value) + + elif is_datetime64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "datetime64" + elif is_datetime64tz_dtype(value.dtype): + # store as UTC + # with a zone + + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "asi8" + self._handle.create_array( + self.group, key, value.asi8 # type: ignore[union-attr] + ) + + node = getattr(self.group, key) + # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no + # attribute "tz" + node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr] + node._v_attrs.value_type = "datetime64" + elif is_timedelta64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "timedelta64" + elif empty_array: + self.write_array_empty(key, value) + else: + self._handle.create_array(self.group, key, value) + + getattr(self.group, key)._v_attrs.transposed = transposed + + +class SeriesFixed(GenericFixed): + pandas_kind = "series" + attributes = ["name"] + + name: Hashable + + @property + def shape(self): + try: + return (len(self.group.values),) + except (TypeError, AttributeError): + return None + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ) -> Series: + self.validate_read(columns, where) + index = self.read_index("index", start=start, stop=stop) + values = self.read_array("values", start=start, stop=stop) + return Series(values, index=index, name=self.name) + + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] + super().write(obj, **kwargs) + self.write_index("index", obj.index) + self.write_array("values", obj) + self.attrs.name = obj.name + + +class BlockManagerFixed(GenericFixed): + attributes = ["ndim", "nblocks"] + + nblocks: int + + @property + def shape(self) -> Shape | None: + try: + ndim = self.ndim + + # items + items = 0 + for i in range(self.nblocks): + node = getattr(self.group, f"block{i}_items") + shape = getattr(node, "shape", None) + if shape is not None: + items += shape[0] + + # data shape + node = self.group.block0_values + shape = getattr(node, "shape", None) + if shape is not None: + shape = list(shape[0 : (ndim - 1)]) + else: + shape = [] + + shape.append(items) + + return shape + except AttributeError: + return None + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ) -> DataFrame: + # start, stop applied to rows, so 0th axis only + self.validate_read(columns, where) + select_axis = self.obj_type()._get_block_manager_axis(0) + + axes = [] + for i in range(self.ndim): + + _start, _stop = (start, stop) if i == select_axis else (None, None) + ax = self.read_index(f"axis{i}", start=_start, stop=_stop) + axes.append(ax) + + items = axes[0] + dfs = [] + + for i in range(self.nblocks): + + blk_items = self.read_index(f"block{i}_items") + values = self.read_array(f"block{i}_values", start=_start, stop=_stop) + + columns = items[items.get_indexer(blk_items)] + df = DataFrame(values.T, columns=columns, index=axes[1]) + dfs.append(df) + + if len(dfs) > 0: + out = concat(dfs, axis=1) + out = out.reindex(columns=items, copy=False) + return out + + return DataFrame(columns=axes[0], index=axes[1]) + + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] + super().write(obj, **kwargs) + + # TODO(ArrayManager) HDFStore relies on accessing the blocks + if isinstance(obj._mgr, ArrayManager): + obj = obj._as_manager("block") + + data = obj._mgr + if not data.is_consolidated(): + data = data.consolidate() + + self.attrs.ndim = data.ndim + for i, ax in enumerate(data.axes): + if i == 0 and (not ax.is_unique): + raise ValueError("Columns index has to be unique for fixed format") + self.write_index(f"axis{i}", ax) + + # Supporting mixed-type DataFrame objects...nontrivial + self.attrs.nblocks = len(data.blocks) + for i, blk in enumerate(data.blocks): + # I have no idea why, but writing values before items fixed #2299 + blk_items = data.items.take(blk.mgr_locs) + self.write_array(f"block{i}_values", blk.values, items=blk_items) + self.write_index(f"block{i}_items", blk_items) + + +class FrameFixed(BlockManagerFixed): + pandas_kind = "frame" + obj_type = DataFrame + + +class Table(Fixed): + """ + represent a table: + facilitate read/write of various types of tables + + Attrs in Table Node + ------------------- + These are attributes that are store in the main table node, they are + necessary to recreate these tables when read back in. + + index_axes : a list of tuples of the (original indexing axis and + index column) + non_index_axes: a list of tuples of the (original index axis and + columns on a non-indexing axis) + values_axes : a list of the columns which comprise the data of this + table + data_columns : a list of the columns that we are allowing indexing + (these become single columns in values_axes) + nan_rep : the string to use for nan representations for string + objects + levels : the names of levels + metadata : the names of the metadata columns + """ + + pandas_kind = "wide_table" + format_type: str = "table" # GH#30962 needed by dask + table_type: str + levels: int | list[Hashable] = 1 + is_table = True + + index_axes: list[IndexCol] + non_index_axes: list[tuple[int, Any]] + values_axes: list[DataCol] + data_columns: list + metadata: list + info: dict + + def __init__( + self, + parent: HDFStore, + group: Node, + encoding=None, + errors: str = "strict", + index_axes=None, + non_index_axes=None, + values_axes=None, + data_columns=None, + info=None, + nan_rep=None, + ) -> None: + super().__init__(parent, group, encoding=encoding, errors=errors) + self.index_axes = index_axes or [] + self.non_index_axes = non_index_axes or [] + self.values_axes = values_axes or [] + self.data_columns = data_columns or [] + self.info = info or {} + self.nan_rep = nan_rep + + @property + def table_type_short(self) -> str: + return self.table_type.split("_")[0] + + def __repr__(self) -> str: + """return a pretty representation of myself""" + self.infer_axes() + jdc = ",".join(self.data_columns) if len(self.data_columns) else "" + dc = f",dc->[{jdc}]" + + ver = "" + if self.is_old_version: + jver = ".".join([str(x) for x in self.version]) + ver = f"[{jver}]" + + jindex_axes = ",".join([a.name for a in self.index_axes]) + return ( + f"{self.pandas_type:12.12}{ver} " + f"(typ->{self.table_type_short},nrows->{self.nrows}," + f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})" + ) + + def __getitem__(self, c: str): + """return the axis for c""" + for a in self.axes: + if c == a.name: + return a + return None + + def validate(self, other) -> None: + """validate against an existing table""" + if other is None: + return + + if other.table_type != self.table_type: + raise TypeError( + "incompatible table_type with existing " + f"[{other.table_type} - {self.table_type}]" + ) + + for c in ["index_axes", "non_index_axes", "values_axes"]: + sv = getattr(self, c, None) + ov = getattr(other, c, None) + if sv != ov: + + # show the error for the specific axes + # Argument 1 to "enumerate" has incompatible type + # "Optional[Any]"; expected "Iterable[Any]" [arg-type] + for i, sax in enumerate(sv): # type: ignore[arg-type] + # Value of type "Optional[Any]" is not indexable [index] + oax = ov[i] # type: ignore[index] + if sax != oax: + raise ValueError( + f"invalid combination of [{c}] on appending data " + f"[{sax}] vs current table [{oax}]" + ) + + # should never get here + raise Exception( + f"invalid combination of [{c}] on appending data [{sv}] vs " + f"current table [{ov}]" + ) + + @property + def is_multi_index(self) -> bool: + """the levels attribute is 1 or a list in the case of a multi-index""" + return isinstance(self.levels, list) + + def validate_multiindex( + self, obj: DataFrame | Series + ) -> tuple[DataFrame, list[Hashable]]: + """ + validate that we can store the multi-index; reset and return the + new object + """ + levels = com.fill_missing_names(obj.index.names) + try: + reset_obj = obj.reset_index() + except ValueError as err: + raise ValueError( + "duplicate names/columns in the multi-index when storing as a table" + ) from err + assert isinstance(reset_obj, DataFrame) # for mypy + return reset_obj, levels + + @property + def nrows_expected(self) -> int: + """based on our axes, compute the expected nrows""" + return np.prod([i.cvalues.shape[0] for i in self.index_axes]) + + @property + def is_exists(self) -> bool: + """has this table been created""" + return "table" in self.group + + @property + def storable(self): + return getattr(self.group, "table", None) + + @property + def table(self): + """return the table group (this is my storable)""" + return self.storable + + @property + def dtype(self): + return self.table.dtype + + @property + def description(self): + return self.table.description + + @property + def axes(self): + return itertools.chain(self.index_axes, self.values_axes) + + @property + def ncols(self) -> int: + """the number of total columns in the values axes""" + return sum(len(a.values) for a in self.values_axes) + + @property + def is_transposed(self) -> bool: + return False + + @property + def data_orientation(self) -> tuple[int, ...]: + """return a tuple of my permutated axes, non_indexable at the front""" + return tuple( + itertools.chain( + [int(a[0]) for a in self.non_index_axes], + [int(a.axis) for a in self.index_axes], + ) + ) + + def queryables(self) -> dict[str, Any]: + """return a dict of the kinds allowable columns for this object""" + # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here + axis_names = {0: "index", 1: "columns"} + + # compute the values_axes queryables + d1 = [(a.cname, a) for a in self.index_axes] + d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes] + d3 = [ + (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns) + ] + + # error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" and + # "List[Tuple[str, None]]") + return dict(d1 + d2 + d3) # type: ignore[operator] + + def index_cols(self): + """return a list of my index cols""" + # Note: each `i.cname` below is assured to be a str. + return [(i.axis, i.cname) for i in self.index_axes] + + def values_cols(self) -> list[str]: + """return a list of my values cols""" + return [i.cname for i in self.values_axes] + + def _get_metadata_path(self, key: str) -> str: + """return the metadata pathname for this key""" + group = self.group._v_pathname + return f"{group}/meta/{key}/meta" + + def write_metadata(self, key: str, values: np.ndarray) -> None: + """ + Write out a metadata array to the key as a fixed-format Series. + + Parameters + ---------- + key : str + values : ndarray + """ + self.parent.put( + self._get_metadata_path(key), + Series(values), + format="table", + encoding=self.encoding, + errors=self.errors, + nan_rep=self.nan_rep, + ) + + def read_metadata(self, key: str): + """return the meta data array for this key""" + if getattr(getattr(self.group, "meta", None), key, None) is not None: + return self.parent.select(self._get_metadata_path(key)) + return None + + def set_attrs(self) -> None: + """set our table type & indexables""" + self.attrs.table_type = str(self.table_type) + self.attrs.index_cols = self.index_cols() + self.attrs.values_cols = self.values_cols() + self.attrs.non_index_axes = self.non_index_axes + self.attrs.data_columns = self.data_columns + self.attrs.nan_rep = self.nan_rep + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + self.attrs.levels = self.levels + self.attrs.info = self.info + + def get_attrs(self) -> None: + """retrieve our attributes""" + self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or [] + self.data_columns = getattr(self.attrs, "data_columns", None) or [] + self.info = getattr(self.attrs, "info", None) or {} + self.nan_rep = getattr(self.attrs, "nan_rep", None) + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or [] + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + + def validate_version(self, where=None) -> None: + """are we trying to operate on an old version?""" + if where is not None: + if self.is_old_version: + ws = incompatibility_doc % ".".join([str(x) for x in self.version]) + warnings.warn( + ws, + IncompatibilityWarning, + stacklevel=find_stack_level(), + ) + + def validate_min_itemsize(self, min_itemsize) -> None: + """ + validate the min_itemsize doesn't contain items that are not in the + axes this needs data_columns to be defined + """ + if min_itemsize is None: + return + if not isinstance(min_itemsize, dict): + return + + q = self.queryables() + for k in min_itemsize: + + # ok, apply generally + if k == "values": + continue + if k not in q: + raise ValueError( + f"min_itemsize has the key [{k}] which is not an axis or " + "data_column" + ) + + @cache_readonly + def indexables(self): + """create/cache the indexables if they don't exist""" + _indexables = [] + + desc = self.description + table_attrs = self.table.attrs + + # Note: each of the `name` kwargs below are str, ensured + # by the definition in index_cols. + # index columns + for i, (axis, name) in enumerate(self.attrs.index_cols): + atom = getattr(desc, name) + md = self.read_metadata(name) + meta = "category" if md is not None else None + + kind_attr = f"{name}_kind" + kind = getattr(table_attrs, kind_attr, None) + + index_col = IndexCol( + name=name, + axis=axis, + pos=i, + kind=kind, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(index_col) + + # values columns + dc = set(self.data_columns) + base_pos = len(_indexables) + + def f(i, c): + assert isinstance(c, str) + klass = DataCol + if c in dc: + klass = DataIndexableCol + + atom = getattr(desc, c) + adj_name = _maybe_adjust_name(c, self.version) + + # TODO: why kind_attr here? + values = getattr(table_attrs, f"{adj_name}_kind", None) + dtype = getattr(table_attrs, f"{adj_name}_dtype", None) + # Argument 1 to "_dtype_to_kind" has incompatible type + # "Optional[Any]"; expected "str" [arg-type] + kind = _dtype_to_kind(dtype) # type: ignore[arg-type] + + md = self.read_metadata(c) + # TODO: figure out why these two versions of `meta` dont always match. + # meta = "category" if md is not None else None + meta = getattr(table_attrs, f"{adj_name}_meta", None) + + obj = klass( + name=adj_name, + cname=c, + values=values, + kind=kind, + pos=base_pos + i, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + dtype=dtype, + ) + return obj + + # Note: the definition of `values_cols` ensures that each + # `c` below is a str. + _indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)]) + + return _indexables + + def create_index( + self, columns=None, optlevel=None, kind: str | None = None + ) -> None: + """ + Create a pytables index on the specified columns. + + Parameters + ---------- + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError if trying to create an index on a complex-type column. + + Notes + ----- + Cannot index Time64Col or ComplexCol. + Pytables must be >= 3.0. + """ + if not self.infer_axes(): + return + if columns is False: + return + + # index all indexables and data_columns + if columns is None or columns is True: + columns = [a.cname for a in self.axes if a.is_data_indexable] + if not isinstance(columns, (tuple, list)): + columns = [columns] + + kw = {} + if optlevel is not None: + kw["optlevel"] = optlevel + if kind is not None: + kw["kind"] = kind + + table = self.table + for c in columns: + v = getattr(table.cols, c, None) + if v is not None: + # remove the index if the kind/optlevel have changed + if v.is_indexed: + index = v.index + cur_optlevel = index.optlevel + cur_kind = index.kind + + if kind is not None and cur_kind != kind: + v.remove_index() + else: + kw["kind"] = cur_kind + + if optlevel is not None and cur_optlevel != optlevel: + v.remove_index() + else: + kw["optlevel"] = cur_optlevel + + # create the index + if not v.is_indexed: + if v.type.startswith("complex"): + raise TypeError( + "Columns containing complex values can be stored but " + "cannot be indexed when using table format. Either use " + "fixed format, set index=False, or do not include " + "the columns containing complex values to " + "data_columns when initializing the table." + ) + v.create_index(**kw) + elif c in self.non_index_axes[0][1]: + # GH 28156 + raise AttributeError( + f"column {c} is not a data_column.\n" + f"In order to read column {c} you must reload the dataframe \n" + f"into HDFStore and include {c} with the data_columns argument." + ) + + def _read_axes( + self, where, start: int | None = None, stop: int | None = None + ) -> list[tuple[ArrayLike, ArrayLike]]: + """ + Create the axes sniffed from the table. + + Parameters + ---------- + where : ??? + start : int or None, default None + stop : int or None, default None + + Returns + ------- + List[Tuple[index_values, column_values]] + """ + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + values = selection.select() + + results = [] + # convert the data + for a in self.axes: + a.set_info(self.info) + res = a.convert( + values, + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + results.append(res) + + return results + + @classmethod + def get_object(cls, obj, transposed: bool): + """return the data for this obj""" + return obj + + def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): + """ + take the input data_columns and min_itemize and create a data + columns spec + """ + if not len(non_index_axes): + return [] + + axis, axis_labels = non_index_axes[0] + info = self.info.get(axis, {}) + if info.get("type") == "MultiIndex" and data_columns: + raise ValueError( + f"cannot use a multi-index on axis [{axis}] with " + f"data_columns {data_columns}" + ) + + # evaluate the passed data_columns, True == use all columns + # take only valid axis labels + if data_columns is True: + data_columns = list(axis_labels) + elif data_columns is None: + data_columns = [] + + # if min_itemsize is a dict, add the keys (exclude 'values') + if isinstance(min_itemsize, dict): + existing_data_columns = set(data_columns) + data_columns = list(data_columns) # ensure we do not modify + data_columns.extend( + [ + k + for k in min_itemsize.keys() + if k != "values" and k not in existing_data_columns + ] + ) + + # return valid columns in the order of our axis + return [c for c in data_columns if c in axis_labels] + + def _create_axes( + self, + axes, + obj: DataFrame, + validate: bool = True, + nan_rep=None, + data_columns=None, + min_itemsize=None, + ): + """ + Create and return the axes. + + Parameters + ---------- + axes: list or None + The names or numbers of the axes to create. + obj : DataFrame + The object to create axes on. + validate: bool, default True + Whether to validate the obj against an existing object already written. + nan_rep : + A value to use for string column nan_rep. + data_columns : List[str], True, or None, default None + Specify the columns that we want to create to allow indexing on. + + * True : Use all available columns. + * None : Use no columns. + * List[str] : Use the specified columns. + + min_itemsize: Dict[str, int] or None, default None + The min itemsize for a column in bytes. + """ + if not isinstance(obj, DataFrame): + group = self.group._v_name + raise TypeError( + f"cannot properly create the storer for: [group->{group}," + f"value->{type(obj)}]" + ) + + # set the default axes if needed + if axes is None: + axes = [0] + + # map axes to numbers + axes = [obj._get_axis_number(a) for a in axes] + + # do we have an existing table (if so, use its axes & data_columns) + if self.infer_axes(): + table_exists = True + axes = [a.axis for a in self.index_axes] + data_columns = list(self.data_columns) + nan_rep = self.nan_rep + # TODO: do we always have validate=True here? + else: + table_exists = False + + new_info = self.info + + assert self.ndim == 2 # with next check, we must have len(axes) == 1 + # currently support on ndim-1 axes + if len(axes) != self.ndim - 1: + raise ValueError( + "currently only support ndim-1 indexers in an AppendableTable" + ) + + # create according to the new data + new_non_index_axes: list = [] + + # nan_representation + if nan_rep is None: + nan_rep = "nan" + + # We construct the non-index-axis first, since that alters new_info + idx = [x for x in [0, 1] if x not in axes][0] + + a = obj.axes[idx] + # we might be able to change the axes on the appending data if necessary + append_axis = list(a) + if table_exists: + indexer = len(new_non_index_axes) # i.e. 0 + exist_axis = self.non_index_axes[indexer][1] + if not array_equivalent(np.array(append_axis), np.array(exist_axis)): + + # ahah! -> reindex + if array_equivalent( + np.array(sorted(append_axis)), np.array(sorted(exist_axis)) + ): + append_axis = exist_axis + + # the non_index_axes info + info = new_info.setdefault(idx, {}) + info["names"] = list(a.names) + info["type"] = type(a).__name__ + + new_non_index_axes.append((idx, append_axis)) + + # Now we can construct our new index axis + idx = axes[0] + a = obj.axes[idx] + axis_name = obj._get_axis_name(idx) + new_index = _convert_index(axis_name, a, self.encoding, self.errors) + new_index.axis = idx + + # Because we are always 2D, there is only one new_index, so + # we know it will have pos=0 + new_index.set_pos(0) + new_index.update_info(new_info) + new_index.maybe_set_size(min_itemsize) # check for column conflicts + + new_index_axes = [new_index] + j = len(new_index_axes) # i.e. 1 + assert j == 1 + + # reindex by our non_index_axes & compute data_columns + assert len(new_non_index_axes) == 1 + for a in new_non_index_axes: + obj = _reindex_axis(obj, a[0], a[1]) + + transposed = new_index.axis == 1 + + # figure out data_columns and get out blocks + data_columns = self.validate_data_columns( + data_columns, min_itemsize, new_non_index_axes + ) + + frame = self.get_object(obj, transposed)._consolidate() + + blocks, blk_items = self._get_blocks_and_items( + frame, table_exists, new_non_index_axes, self.values_axes, data_columns + ) + + # add my values + vaxes = [] + for i, (blk, b_items) in enumerate(zip(blocks, blk_items)): + + # shape of the data column are the indexable axes + klass = DataCol + name = None + + # we have a data_column + if data_columns and len(b_items) == 1 and b_items[0] in data_columns: + klass = DataIndexableCol + name = b_items[0] + if not (name is None or isinstance(name, str)): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + # make sure that we match up the existing columns + # if we have an existing table + existing_col: DataCol | None + + if table_exists and validate: + try: + existing_col = self.values_axes[i] + except (IndexError, KeyError) as err: + raise ValueError( + f"Incompatible appended table [{blocks}]" + f"with existing table [{self.values_axes}]" + ) from err + else: + existing_col = None + + new_name = name or f"values_block_{i}" + data_converted = _maybe_convert_for_string_atom( + new_name, + blk.values, + existing_col=existing_col, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + encoding=self.encoding, + errors=self.errors, + columns=b_items, + ) + adj_name = _maybe_adjust_name(new_name, self.version) + + typ = klass._get_atom(data_converted) + kind = _dtype_to_kind(data_converted.dtype.name) + tz = None + if getattr(data_converted, "tz", None) is not None: + tz = _get_tz(data_converted.tz) + + meta = metadata = ordered = None + if is_categorical_dtype(data_converted.dtype): + ordered = data_converted.ordered + meta = "category" + metadata = np.array(data_converted.categories, copy=False).ravel() + + data, dtype_name = _get_data_and_dtype_name(data_converted) + + col = klass( + name=adj_name, + cname=new_name, + values=list(b_items), + typ=typ, + pos=j, + kind=kind, + tz=tz, + ordered=ordered, + meta=meta, + metadata=metadata, + dtype=dtype_name, + data=data, + ) + col.update_info(new_info) + + vaxes.append(col) + + j += 1 + + dcs = [col.name for col in vaxes if col.is_data_indexable] + + new_table = type(self)( + parent=self.parent, + group=self.group, + encoding=self.encoding, + errors=self.errors, + index_axes=new_index_axes, + non_index_axes=new_non_index_axes, + values_axes=vaxes, + data_columns=dcs, + info=new_info, + nan_rep=nan_rep, + ) + if hasattr(self, "levels"): + # TODO: get this into constructor, only for appropriate subclass + new_table.levels = self.levels + + new_table.validate_min_itemsize(min_itemsize) + + if validate and table_exists: + new_table.validate(self) + + return new_table + + @staticmethod + def _get_blocks_and_items( + frame: DataFrame, + table_exists: bool, + new_non_index_axes, + values_axes, + data_columns, + ): + # Helper to clarify non-state-altering parts of _create_axes + + # TODO(ArrayManager) HDFStore relies on accessing the blocks + if isinstance(frame._mgr, ArrayManager): + frame = frame._as_manager("block") + + def get_blk_items(mgr): + return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks] + + mgr = frame._mgr + mgr = cast(BlockManager, mgr) + blocks: list[Block] = list(mgr.blocks) + blk_items: list[Index] = get_blk_items(mgr) + + if len(data_columns): + axis, axis_labels = new_non_index_axes[0] + new_labels = Index(axis_labels).difference(Index(data_columns)) + mgr = frame.reindex(new_labels, axis=axis)._mgr + + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no + # attribute "blocks" + blocks = list(mgr.blocks) # type: ignore[union-attr] + blk_items = get_blk_items(mgr) + for c in data_columns: + mgr = frame.reindex([c], axis=axis)._mgr + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has + # no attribute "blocks" + blocks.extend(mgr.blocks) # type: ignore[union-attr] + blk_items.extend(get_blk_items(mgr)) + + # reorder the blocks in the same order as the existing table if we can + if table_exists: + by_items = { + tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items) + } + new_blocks: list[Block] = [] + new_blk_items = [] + for ea in values_axes: + items = tuple(ea.values) + try: + b, b_items = by_items.pop(items) + new_blocks.append(b) + new_blk_items.append(b_items) + except (IndexError, KeyError) as err: + jitems = ",".join([pprint_thing(item) for item in items]) + raise ValueError( + f"cannot match existing table structure for [{jitems}] " + "on appending data" + ) from err + blocks = new_blocks + blk_items = new_blk_items + + return blocks, blk_items + + def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: + """process axes filters""" + # make a copy to avoid side effects + if columns is not None: + columns = list(columns) + + # make sure to include levels if we have them + if columns is not None and self.is_multi_index: + assert isinstance(self.levels, list) # assured by is_multi_index + for n in self.levels: + if n not in columns: + columns.insert(0, n) + + # reorder by any non_index_axes & limit to the select columns + for axis, labels in self.non_index_axes: + obj = _reindex_axis(obj, axis, labels, columns) + + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + + def process_filter(field, filt): + + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None + + # see if the field is the name of an axis + if field == axis_name: + + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) + + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] + + # this might be the name of a file IN an axis + elif field in axis_values: + + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) + + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] + + raise ValueError(f"cannot find the field [{field}] for filtering!") + + obj = process_filter(field, filt) + + return obj + + def create_description( + self, + complib, + complevel: int | None, + fletcher32: bool, + expectedrows: int | None, + ) -> dict[str, Any]: + """create the description of the table from the axes & values""" + # provided expected rows if its passed + if expectedrows is None: + expectedrows = max(self.nrows_expected, 10000) + + d = {"name": "table", "expectedrows": expectedrows} + + # description from the axes & values + d["description"] = {a.cname: a.typ for a in self.axes} + + if complib: + if complevel is None: + complevel = self._complevel or 9 + filters = _tables().Filters( + complevel=complevel, + complib=complib, + fletcher32=fletcher32 or self._fletcher32, + ) + d["filters"] = filters + elif self._filters is not None: + d["filters"] = self._filters + + return d + + def read_coordinates( + self, where=None, start: int | None = None, stop: int | None = None + ): + """ + select coordinates (row numbers) from a table; return the + coordinates object + """ + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return False + + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + coords = selection.select_coords() + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + data = self.read_column( + field, start=coords.min(), stop=coords.max() + 1 + ) + coords = coords[op(data.iloc[coords - coords.min()], filt).values] + + return Index(coords) + + def read_column( + self, + column: str, + where=None, + start: int | None = None, + stop: int | None = None, + ): + """ + return a single column from the table, generally only indexables + are interesting + """ + # validate the version + self.validate_version() + + # infer the data kind + if not self.infer_axes(): + return False + + if where is not None: + raise TypeError("read_column does not currently accept a where clause") + + # find the axes + for a in self.axes: + if column == a.name: + if not a.is_data_indexable: + raise ValueError( + f"column [{column}] can not be extracted individually; " + "it is not data indexable" + ) + + # column must be an indexable or a data column + c = getattr(self.table.cols, column) + a.set_info(self.info) + col_values = a.convert( + c[start:stop], + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + return Series(_set_tz(col_values[1], a.tz), name=column) + + raise KeyError(f"column [{column}] not found in the table") + + +class WORMTable(Table): + """ + a write-once read-many table: this format DOES NOT ALLOW appending to a + table. writing is a one-time operation the data are stored in a format + that allows for searching the data on disk + """ + + table_type = "worm" + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + """ + read the indices and the indexing array, calculate offset rows and return + """ + raise NotImplementedError("WORMTable needs to implement read") + + def write(self, **kwargs) -> None: + """ + write in a format that we can search later on (but cannot append + to): write out the indices and the values using _write_array + (e.g. a CArray) create an indexing table so that we can search + """ + raise NotImplementedError("WORMTable needs to implement write") + + +class AppendableTable(Table): + """support the new appendable table formats""" + + table_type = "appendable" + + # error: Signature of "write" incompatible with supertype "Fixed" + def write( # type: ignore[override] + self, + obj, + axes=None, + append: bool = False, + complib=None, + complevel=None, + fletcher32=None, + min_itemsize=None, + chunksize=None, + expectedrows=None, + dropna: bool = False, + nan_rep=None, + data_columns=None, + track_times=True, + ) -> None: + if not append and self.is_exists: + self._handle.remove_node(self.group, "table") + + # create the axes + table = self._create_axes( + axes=axes, + obj=obj, + validate=append, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + ) + + for a in table.axes: + a.validate_names() + + if not table.is_exists: + + # create the table + options = table.create_description( + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + expectedrows=expectedrows, + ) + + # set the table attributes + table.set_attrs() + + options["track_times"] = track_times + + # create the table + table._handle.create_table(table.group, **options) + + # update my info + table.attrs.info = table.info + + # validate the axes and set the kinds + for a in table.axes: + a.validate_and_set(table, append) + + # add the rows + table.write_data(chunksize, dropna=dropna) + + def write_data(self, chunksize: int | None, dropna: bool = False) -> None: + """ + we form the data into a 2-d including indexes,values,mask write chunk-by-chunk + """ + names = self.dtype.names + nrows = self.nrows_expected + + # if dropna==True, then drop ALL nan rows + masks = [] + if dropna: + for a in self.values_axes: + # figure the mask: only do if we can successfully process this + # column, otherwise ignore the mask + mask = isna(a.data).all(axis=0) + if isinstance(mask, np.ndarray): + masks.append(mask.astype("u1", copy=False)) + + # consolidate masks + if len(masks): + mask = masks[0] + for m in masks[1:]: + mask = mask & m + mask = mask.ravel() + else: + mask = None + + # broadcast the indexes if needed + indexes = [a.cvalues for a in self.index_axes] + nindexes = len(indexes) + assert nindexes == 1, nindexes # ensures we dont need to broadcast + + # transpose the values so first dimension is last + # reshape the values if needed + values = [a.take_data() for a in self.values_axes] + values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1)) for v in values] + bvalues = [] + for i, v in enumerate(values): + new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape + bvalues.append(values[i].reshape(new_shape)) + + # write the chunks + if chunksize is None: + chunksize = 100000 + + rows = np.empty(min(chunksize, nrows), dtype=self.dtype) + chunks = nrows // chunksize + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + self.write_data_chunk( + rows, + indexes=[a[start_i:end_i] for a in indexes], + mask=mask[start_i:end_i] if mask is not None else None, + values=[v[start_i:end_i] for v in bvalues], + ) + + def write_data_chunk( + self, + rows: np.ndarray, + indexes: list[np.ndarray], + mask: npt.NDArray[np.bool_] | None, + values: list[np.ndarray], + ) -> None: + """ + Parameters + ---------- + rows : an empty memory space where we are putting the chunk + indexes : an array of the indexes + mask : an array of the masks + values : an array of the values + """ + # 0 len + for v in values: + if not np.prod(v.shape): + return + + nrows = indexes[0].shape[0] + if nrows != len(rows): + rows = np.empty(nrows, dtype=self.dtype) + names = self.dtype.names + nindexes = len(indexes) + + # indexes + for i, idx in enumerate(indexes): + rows[names[i]] = idx + + # values + for i, v in enumerate(values): + rows[names[i + nindexes]] = v + + # mask + if mask is not None: + m = ~mask.ravel().astype(bool, copy=False) + if not m.all(): + rows = rows[m] + + if len(rows): + self.table.append(rows) + self.table.flush() + + def delete(self, where=None, start: int | None = None, stop: int | None = None): + + # delete all rows (and return the nrows) + if where is None or not len(where): + if start is None and stop is None: + nrows = self.nrows + self._handle.remove_node(self.group, recursive=True) + else: + # pytables<3.0 would remove a single row with stop=None + if stop is None: + stop = self.nrows + nrows = self.table.remove_rows(start=start, stop=stop) + self.table.flush() + return nrows + + # infer the data kind + if not self.infer_axes(): + return None + + # create the selection + table = self.table + selection = Selection(self, where, start=start, stop=stop) + values = selection.select_coords() + + # delete the rows in reverse order + sorted_series = Series(values).sort_values() + ln = len(sorted_series) + + if ln: + + # construct groups of consecutive rows + diff = sorted_series.diff() + groups = list(diff[diff > 1].index) + + # 1 group + if not len(groups): + groups = [0] + + # final element + if groups[-1] != ln: + groups.append(ln) + + # initial element + if groups[0] != 0: + groups.insert(0, 0) + + # we must remove in reverse order! + pg = groups.pop() + for g in reversed(groups): + rows = sorted_series.take(range(g, pg)) + table.remove_rows( + start=rows[rows.index[0]], stop=rows[rows.index[-1]] + 1 + ) + pg = g + + self.table.flush() + + # return the number of rows removed + return ln + + +class AppendableFrameTable(AppendableTable): + """support the new appendable table formats""" + + pandas_kind = "frame_table" + table_type = "appendable_frame" + ndim = 2 + obj_type: type[DataFrame | Series] = DataFrame + + @property + def is_transposed(self) -> bool: + return self.index_axes[0].axis == 1 + + @classmethod + def get_object(cls, obj, transposed: bool): + """these are written transposed""" + if transposed: + obj = obj.T + return obj + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return None + + result = self._read_axes(where=where, start=start, stop=stop) + + info = ( + self.info.get(self.non_index_axes[0][0], {}) + if len(self.non_index_axes) + else {} + ) + + inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]] + assert len(inds) == 1 + ind = inds[0] + + index = result[ind][0] + + frames = [] + for i, a in enumerate(self.axes): + if a not in self.values_axes: + continue + index_vals, cvalues = result[i] + + # we could have a multi-index constructor here + # ensure_index doesn't recognized our list-of-tuples here + if info.get("type") != "MultiIndex": + cols = Index(index_vals) + else: + cols = MultiIndex.from_tuples(index_vals) + + names = info.get("names") + if names is not None: + cols.set_names(names, inplace=True) + + if self.is_transposed: + values = cvalues + index_ = cols + cols_ = Index(index, name=getattr(index, "name", None)) + else: + values = cvalues.T + index_ = Index(index, name=getattr(index, "name", None)) + cols_ = cols + + # if we have a DataIndexableCol, its shape will only be 1 dim + if values.ndim == 1 and isinstance(values, np.ndarray): + values = values.reshape((1, values.shape[0])) + + if isinstance(values, np.ndarray): + df = DataFrame(values.T, columns=cols_, index=index_) + elif isinstance(values, Index): + df = DataFrame(values, columns=cols_, index=index_) + else: + # Categorical + df = DataFrame._from_arrays([values], columns=cols_, index=index_) + assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) + frames.append(df) + + if len(frames) == 1: + df = frames[0] + else: + df = concat(frames, axis=1) + + selection = Selection(self, where=where, start=start, stop=stop) + # apply the selection filters & axis orderings + df = self.process_axes(df, selection=selection, columns=columns) + + return df + + +class AppendableSeriesTable(AppendableFrameTable): + """support the new appendable table formats""" + + pandas_kind = "series_table" + table_type = "appendable_series" + ndim = 2 + obj_type = Series + + @property + def is_transposed(self) -> bool: + return False + + @classmethod + def get_object(cls, obj, transposed: bool): + return obj + + def write(self, obj, data_columns=None, **kwargs): + """we are going to write this as a frame table""" + if not isinstance(obj, DataFrame): + name = obj.name or "values" + obj = obj.to_frame(name) + return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs) + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ) -> Series: + + is_multi_index = self.is_multi_index + if columns is not None and is_multi_index: + assert isinstance(self.levels, list) # needed for mypy + for n in self.levels: + if n not in columns: + columns.insert(0, n) + s = super().read(where=where, columns=columns, start=start, stop=stop) + if is_multi_index: + s.set_index(self.levels, inplace=True) + + s = s.iloc[:, 0] + + # remove the default name + if s.name == "values": + s.name = None + return s + + +class AppendableMultiSeriesTable(AppendableSeriesTable): + """support the new appendable table formats""" + + pandas_kind = "series_table" + table_type = "appendable_multiseries" + + def write(self, obj, **kwargs): + """we are going to write this as a frame table""" + name = obj.name or "values" + newobj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy + cols = list(self.levels) + cols.append(name) + newobj.columns = Index(cols) + return super().write(obj=newobj, **kwargs) + + +class GenericTable(AppendableFrameTable): + """a table that read/writes the generic pytables table format""" + + pandas_kind = "frame_table" + table_type = "generic_table" + ndim = 2 + obj_type = DataFrame + levels: list[Hashable] + + @property + def pandas_type(self) -> str: + return self.pandas_kind + + @property + def storable(self): + return getattr(self.group, "table", None) or self.group + + def get_attrs(self) -> None: + """retrieve our attributes""" + self.non_index_axes = [] + self.nan_rep = None + self.levels = [] + + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + self.data_columns = [a.name for a in self.values_axes] + + @cache_readonly + def indexables(self): + """create the indexables from the table description""" + d = self.description + + # TODO: can we get a typ for this? AFAICT it is the only place + # where we aren't passing one + # the index columns is just a simple index + md = self.read_metadata("index") + meta = "category" if md is not None else None + index_col = GenericIndexCol( + name="index", axis=0, table=self.table, meta=meta, metadata=md + ) + + _indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col] + + for i, n in enumerate(d._v_names): + assert isinstance(n, str) + + atom = getattr(d, n) + md = self.read_metadata(n) + meta = "category" if md is not None else None + dc = GenericDataIndexableCol( + name=n, + pos=i, + values=[n], + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(dc) + + return _indexables + + def write(self, **kwargs): + raise NotImplementedError("cannot write on an generic table") + + +class AppendableMultiFrameTable(AppendableFrameTable): + """a frame with a multi-index""" + + table_type = "appendable_multiframe" + obj_type = DataFrame + ndim = 2 + _re_levels = re.compile(r"^level_\d+$") + + @property + def table_type_short(self) -> str: + return "appendable_multi" + + def write(self, obj, data_columns=None, **kwargs): + if data_columns is None: + data_columns = [] + elif data_columns is True: + data_columns = obj.columns.tolist() + obj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy + for n in self.levels: + if n not in data_columns: + data_columns.insert(0, n) + return super().write(obj=obj, data_columns=data_columns, **kwargs) + + def read( + self, + where=None, + columns=None, + start: int | None = None, + stop: int | None = None, + ): + + df = super().read(where=where, columns=columns, start=start, stop=stop) + df = df.set_index(self.levels) + + # remove names for 'level_%d' + df.index = df.index.set_names( + [None if self._re_levels.search(name) else name for name in df.index.names] + ) + + return df + + +def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame: + ax = obj._get_axis(axis) + labels = ensure_index(labels) + + # try not to reindex even if other is provided + # if it equals our current index + if other is not None: + other = ensure_index(other) + if (other is None or labels.equals(other)) and labels.equals(ax): + return obj + + labels = ensure_index(labels.unique()) + if other is not None: + labels = ensure_index(other.unique()).intersection(labels, sort=False) + if not labels.equals(ax): + slicer: list[slice | Index] = [slice(None, None)] * obj.ndim + slicer[axis] = labels + obj = obj.loc[tuple(slicer)] + return obj + + +# tz to/from coercion + + +def _get_tz(tz: tzinfo) -> str | tzinfo: + """for a tz-aware type, return an encoded zone""" + zone = timezones.get_timezone(tz) + return zone + + +@overload +def _set_tz( + values: np.ndarray | Index, tz: str | tzinfo, coerce: bool = False +) -> DatetimeIndex: + ... + + +@overload +def _set_tz(values: np.ndarray | Index, tz: None, coerce: bool = False) -> np.ndarray: + ... + + +def _set_tz( + values: np.ndarray | Index, tz: str | tzinfo | None, coerce: bool = False +) -> np.ndarray | DatetimeIndex: + """ + coerce the values to a DatetimeIndex if tz is set + preserve the input shape if possible + + Parameters + ---------- + values : ndarray or Index + tz : str or tzinfo + coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray + """ + if isinstance(values, DatetimeIndex): + # If values is tzaware, the tz gets dropped in the values.ravel() + # call below (which returns an ndarray). So we are only non-lossy + # if `tz` matches `values.tz`. + assert values.tz is None or values.tz == tz + + if tz is not None: + if isinstance(values, DatetimeIndex): + name = values.name + values = values.asi8 + else: + name = None + values = values.ravel() + + tz = _ensure_decoded(tz) + values = DatetimeIndex(values, name=name) + values = values.tz_localize("UTC").tz_convert(tz) + elif coerce: + values = np.asarray(values, dtype="M8[ns]") + + # error: Incompatible return value type (got "Union[ndarray, Index]", + # expected "Union[ndarray, DatetimeIndex]") + return values # type: ignore[return-value] + + +def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol: + assert isinstance(name, str) + + index_name = index.name + # error: Argument 1 to "_get_data_and_dtype_name" has incompatible type "Index"; + # expected "Union[ExtensionArray, ndarray]" + converted, dtype_name = _get_data_and_dtype_name(index) # type: ignore[arg-type] + kind = _dtype_to_kind(dtype_name) + atom = DataIndexableCol._get_atom(converted) + + if ( + isinstance(index, Int64Index) + or needs_i8_conversion(index.dtype) + or is_bool_dtype(index.dtype) + ): + # Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, + # in which case "kind" is "integer", "integer", "datetime64", + # "timedelta64", and "integer", respectively. + return IndexCol( + name, + values=converted, + kind=kind, + typ=atom, + freq=getattr(index, "freq", None), + tz=getattr(index, "tz", None), + index_name=index_name, + ) + + if isinstance(index, MultiIndex): + raise TypeError("MultiIndex not supported here!") + + inferred_type = lib.infer_dtype(index, skipna=False) + # we won't get inferred_type of "datetime64" or "timedelta64" as these + # would go through the DatetimeIndex/TimedeltaIndex paths above + + values = np.asarray(index) + + if inferred_type == "date": + converted = np.asarray([v.toordinal() for v in values], dtype=np.int32) + return IndexCol( + name, converted, "date", _tables().Time32Col(), index_name=index_name + ) + elif inferred_type == "string": + + converted = _convert_string_array(values, encoding, errors) + itemsize = converted.dtype.itemsize + return IndexCol( + name, + converted, + "string", + _tables().StringCol(itemsize), + index_name=index_name, + ) + + elif inferred_type in ["integer", "floating"]: + return IndexCol( + name, values=converted, kind=kind, typ=atom, index_name=index_name + ) + else: + assert isinstance(converted, np.ndarray) and converted.dtype == object + assert kind == "object", kind + atom = _tables().ObjectAtom() + return IndexCol(name, converted, kind, atom, index_name=index_name) + + +def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index: + index: Index | np.ndarray + + if kind == "datetime64": + index = DatetimeIndex(data) + elif kind == "timedelta64": + index = TimedeltaIndex(data) + elif kind == "date": + try: + index = np.asarray([date.fromordinal(v) for v in data], dtype=object) + except (ValueError): + index = np.asarray([date.fromtimestamp(v) for v in data], dtype=object) + elif kind in ("integer", "float", "bool"): + index = np.asarray(data) + elif kind in ("string"): + index = _unconvert_string_array( + data, nan_rep=None, encoding=encoding, errors=errors + ) + elif kind == "object": + index = np.asarray(data[0]) + else: # pragma: no cover + raise ValueError(f"unrecognized index type {kind}") + return index + + +def _maybe_convert_for_string_atom( + name: str, + bvalues: ArrayLike, + existing_col, + min_itemsize, + nan_rep, + encoding, + errors, + columns: list[str], +): + + if bvalues.dtype != object: + return bvalues + + bvalues = cast(np.ndarray, bvalues) + + dtype_name = bvalues.dtype.name + inferred_type = lib.infer_dtype(bvalues, skipna=False) + + if inferred_type == "date": + raise TypeError("[date] is not implemented as a table column") + elif inferred_type == "datetime": + # after GH#8260 + # this only would be hit for a multi-timezone dtype which is an error + raise TypeError( + "too many timezones in this block, create separate data columns" + ) + + elif not (inferred_type == "string" or dtype_name == "object"): + return bvalues + + mask = isna(bvalues) + data = bvalues.copy() + data[mask] = nan_rep + + # see if we have a valid string type + inferred_type = lib.infer_dtype(data, skipna=False) + if inferred_type != "string": + + # we cannot serialize this data, so report an exception on a column + # by column basis + + # expected behaviour: + # search block for a non-string object column by column + for i in range(data.shape[0]): + col = data[i] + inferred_type = lib.infer_dtype(col, skipna=False) + if inferred_type != "string": + error_column_label = columns[i] if len(columns) > i else f"No.{i}" + raise TypeError( + f"Cannot serialize the column [{error_column_label}]\n" + f"because its data contents are not [string] but " + f"[{inferred_type}] object dtype" + ) + + # itemsize is the maximum length of a string (along any dimension) + + data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape) + itemsize = data_converted.itemsize + + # specified min_itemsize? + if isinstance(min_itemsize, dict): + min_itemsize = int(min_itemsize.get(name) or min_itemsize.get("values") or 0) + itemsize = max(min_itemsize or 0, itemsize) + + # check for column in the values conflicts + if existing_col is not None: + eci = existing_col.validate_col(itemsize) + if eci is not None and eci > itemsize: + itemsize = eci + + data_converted = data_converted.astype(f"|S{itemsize}", copy=False) + return data_converted + + +def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.ndarray: + """ + Take a string-like that is object dtype and coerce to a fixed size string type. + + Parameters + ---------- + data : np.ndarray[object] + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[fixed-length-string] + """ + # encode if needed + if len(data): + data = ( + Series(data.ravel()) + .str.encode(encoding, errors) + ._values.reshape(data.shape) + ) + + # create the sized dtype + ensured = ensure_object(data.ravel()) + itemsize = max(1, libwriters.max_len_string_array(ensured)) + + data = np.asarray(data, dtype=f"S{itemsize}") + return data + + +def _unconvert_string_array( + data: np.ndarray, nan_rep, encoding: str, errors: str +) -> np.ndarray: + """ + Inverse of _convert_string_array. + + Parameters + ---------- + data : np.ndarray[fixed-length-string] + nan_rep : the storage repr of NaN + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[object] + Decoded data. + """ + shape = data.shape + data = np.asarray(data.ravel(), dtype=object) + + if len(data): + + itemsize = libwriters.max_len_string_array(ensure_object(data)) + dtype = f"U{itemsize}" + + if isinstance(data[0], bytes): + data = Series(data).str.decode(encoding, errors=errors)._values + else: + data = data.astype(dtype, copy=False).astype(object, copy=False) + + if nan_rep is None: + nan_rep = "nan" + + libwriters.string_array_replace_from_nan_rep(data, nan_rep) + return data.reshape(shape) + + +def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str): + assert isinstance(val_kind, str), type(val_kind) + if _need_convert(val_kind): + conv = _get_converter(val_kind, encoding, errors) + values = conv(values) + return values + + +def _get_converter(kind: str, encoding: str, errors: str): + if kind == "datetime64": + return lambda x: np.asarray(x, dtype="M8[ns]") + elif kind == "string": + return lambda x: _unconvert_string_array( + x, nan_rep=None, encoding=encoding, errors=errors + ) + else: # pragma: no cover + raise ValueError(f"invalid kind {kind}") + + +def _need_convert(kind: str) -> bool: + if kind in ("datetime64", "string"): + return True + return False + + +def _maybe_adjust_name(name: str, version: Sequence[int]) -> str: + """ + Prior to 0.10.1, we named values blocks like: values_block_0 an the + name values_0, adjust the given name if necessary. + + Parameters + ---------- + name : str + version : Tuple[int, int, int] + + Returns + ------- + str + """ + if isinstance(version, str) or len(version) < 3: + raise ValueError("Version is incorrect, expected sequence of 3 integers.") + + if version[0] == 0 and version[1] <= 10 and version[2] == 0: + m = re.search(r"values_block_(\d+)", name) + if m: + grp = m.groups()[0] + name = f"values_{grp}" + return name + + +def _dtype_to_kind(dtype_str: str) -> str: + """ + Find the "kind" string describing the given dtype name. + """ + dtype_str = _ensure_decoded(dtype_str) + + if dtype_str.startswith("string") or dtype_str.startswith("bytes"): + kind = "string" + elif dtype_str.startswith("float"): + kind = "float" + elif dtype_str.startswith("complex"): + kind = "complex" + elif dtype_str.startswith("int") or dtype_str.startswith("uint"): + kind = "integer" + elif dtype_str.startswith("datetime64"): + kind = "datetime64" + elif dtype_str.startswith("timedelta"): + kind = "timedelta64" + elif dtype_str.startswith("bool"): + kind = "bool" + elif dtype_str.startswith("category"): + kind = "category" + elif dtype_str.startswith("period"): + # We store the `freq` attr so we can restore from integers + kind = "integer" + elif dtype_str == "object": + kind = "object" + else: + raise ValueError(f"cannot interpret dtype of [{dtype_str}]") + + return kind + + +def _get_data_and_dtype_name(data: ArrayLike): + """ + Convert the passed data into a storable form and a dtype string. + """ + if isinstance(data, Categorical): + data = data.codes + + # For datetime64tz we need to drop the TZ in tests TODO: why? + dtype_name = data.dtype.name.split("[")[0] + + if data.dtype.kind in ["m", "M"]: + data = np.asarray(data.view("i8")) + # TODO: we used to reshape for the dt64tz case, but no longer + # doing that doesn't seem to break anything. why? + + elif isinstance(data, PeriodIndex): + data = data.asi8 + + data = np.asarray(data) + return data, dtype_name + + +class Selection: + """ + Carries out a selection operation on a tables.Table object. + + Parameters + ---------- + table : a Table object + where : list of Terms (or convertible to) + start, stop: indices to start and/or stop selection + + """ + + def __init__( + self, + table: Table, + where=None, + start: int | None = None, + stop: int | None = None, + ) -> None: + self.table = table + self.where = where + self.start = start + self.stop = stop + self.condition = None + self.filter = None + self.terms = None + self.coordinates = None + + if is_list_like(where): + + # see if we have a passed coordinate like + with suppress(ValueError): + inferred = lib.infer_dtype(where, skipna=False) + if inferred == "integer" or inferred == "boolean": + where = np.asarray(where) + if where.dtype == np.bool_: + start, stop = self.start, self.stop + if start is None: + start = 0 + if stop is None: + stop = self.table.nrows + self.coordinates = np.arange(start, stop)[where] + elif issubclass(where.dtype.type, np.integer): + if (self.start is not None and (where < self.start).any()) or ( + self.stop is not None and (where >= self.stop).any() + ): + raise ValueError( + "where must have index locations >= start and < stop" + ) + self.coordinates = where + + if self.coordinates is None: + + self.terms = self.generate(where) + + # create the numexpr & the filter + if self.terms is not None: + self.condition, self.filter = self.terms.evaluate() + + def generate(self, where): + """where can be a : dict,list,tuple,string""" + if where is None: + return None + + q = self.table.queryables() + try: + return PyTablesExpr(where, queryables=q, encoding=self.table.encoding) + except NameError as err: + # raise a nice message, suggesting that the user should use + # data_columns + qkeys = ",".join(q.keys()) + msg = dedent( + f"""\ + The passed where expression: {where} + contains an invalid variable reference + all of the variable references must be a reference to + an axis (e.g. 'index' or 'columns'), or a data_column + The currently defined references are: {qkeys} + """ + ) + raise ValueError(msg) from err + + def select(self): + """ + generate the selection + """ + if self.condition is not None: + return self.table.table.read_where( + self.condition.format(), start=self.start, stop=self.stop + ) + elif self.coordinates is not None: + return self.table.table.read_coordinates(self.coordinates) + return self.table.table.read(start=self.start, stop=self.stop) + + def select_coords(self): + """ + generate the selection + """ + start, stop = self.start, self.stop + nrows = self.table.nrows + if start is None: + start = 0 + elif start < 0: + start += nrows + if stop is None: + stop = nrows + elif stop < 0: + stop += nrows + + if self.condition is not None: + return self.table.table.get_where_list( + self.condition.format(), start=start, stop=stop, sort=True + ) + elif self.coordinates is not None: + return self.coordinates + + return np.arange(start, stop) diff --git a/pandas/io/sas/__init__.py b/pandas/io/sas/__init__.py new file mode 100644 index 00000000..31773074 --- /dev/null +++ b/pandas/io/sas/__init__.py @@ -0,0 +1,3 @@ +from pandas.io.sas.sasreader import read_sas + +__all__ = ["read_sas"] diff --git a/pandas/io/sas/_sas.pyi b/pandas/io/sas/_sas.pyi new file mode 100644 index 00000000..527193dd --- /dev/null +++ b/pandas/io/sas/_sas.pyi @@ -0,0 +1,5 @@ +from pandas.io.sas.sas7bdat import SAS7BDATReader + +class Parser: + def __init__(self, parser: SAS7BDATReader) -> None: ... + def read(self, nrows: int) -> None: ... diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx new file mode 100644 index 00000000..d8591c0b --- /dev/null +++ b/pandas/io/sas/sas.pyx @@ -0,0 +1,436 @@ +# cython: profile=False +# cython: boundscheck=False, initializedcheck=False +from cython cimport Py_ssize_t +import numpy as np + +import pandas.io.sas.sas_constants as const + +ctypedef signed long long int64_t +ctypedef unsigned char uint8_t +ctypedef unsigned short uint16_t + +# rle_decompress decompresses data using a Run Length Encoding +# algorithm. It is partially documented here: +# +# https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf +cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff) except *: + + cdef: + uint8_t control_byte, x + uint8_t[:] result = np.zeros(result_length, np.uint8) + int rpos = 0 + int i, nbytes, end_of_first_byte + Py_ssize_t ipos = 0, length = len(inbuff) + + while ipos < length: + control_byte = inbuff[ipos] & 0xF0 + end_of_first_byte = (inbuff[ipos] & 0x0F) + ipos += 1 + + if control_byte == 0x00: + nbytes = (inbuff[ipos]) + 64 + end_of_first_byte * 256 + ipos += 1 + for _ in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x40: + # not documented + nbytes = (inbuff[ipos] & 0xFF) + 18 + end_of_first_byte * 256 + ipos += 1 + for _ in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x60: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for _ in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0x70: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for _ in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + elif control_byte == 0x80: + nbytes = end_of_first_byte + 1 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0x90: + nbytes = end_of_first_byte + 17 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xA0: + nbytes = end_of_first_byte + 33 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xB0: + nbytes = end_of_first_byte + 49 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xC0: + nbytes = end_of_first_byte + 3 + x = inbuff[ipos] + ipos += 1 + for _ in range(nbytes): + result[rpos] = x + rpos += 1 + elif control_byte == 0xD0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x40 + rpos += 1 + elif control_byte == 0xE0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0xF0: + nbytes = end_of_first_byte + 2 + for _ in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + else: + raise ValueError(f"unknown control byte: {control_byte}") + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(result) != result_length: + raise ValueError(f"RLE: {len(result)} != {result_length}") + + return np.asarray(result) + + +# rdc_decompress decompresses data using the Ross Data Compression algorithm: +# +# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff) except *: + + cdef: + uint8_t cmd + uint16_t ctrl_bits = 0, ctrl_mask = 0, ofs, cnt + int rpos = 0, k + uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8) + Py_ssize_t ipos = 0, length = len(inbuff) + + ii = -1 + + while ipos < length: + ii += 1 + ctrl_mask = ctrl_mask >> 1 + if ctrl_mask == 0: + ctrl_bits = ((inbuff[ipos] << 8) + + inbuff[ipos + 1]) + ipos += 2 + ctrl_mask = 0x8000 + + if ctrl_bits & ctrl_mask == 0: + outbuff[rpos] = inbuff[ipos] + ipos += 1 + rpos += 1 + continue + + cmd = (inbuff[ipos] >> 4) & 0x0F + cnt = (inbuff[ipos] & 0x0F) + ipos += 1 + + # short RLE + if cmd == 0: + cnt += 3 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long RLE + elif cmd == 1: + cnt += inbuff[ipos] << 4 + cnt += 19 + ipos += 1 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long pattern + elif cmd == 2: + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + cnt = inbuff[ipos] + ipos += 1 + cnt += 16 + for k in range(cnt): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cnt + + # short pattern + else: + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + for k in range(cmd): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cmd + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(outbuff) != result_length: + raise ValueError(f"RDC: {len(outbuff)} != {result_length}\n") + + return np.asarray(outbuff) + + +cdef enum ColumnTypes: + column_type_decimal = 1 + column_type_string = 2 + + +# type the page_data types +assert len(const.page_meta_types) == 2 +cdef: + int page_meta_types_0 = const.page_meta_types[0] + int page_meta_types_1 = const.page_meta_types[1] + int page_mix_type = const.page_mix_type + int page_data_type = const.page_data_type + int subheader_pointers_offset = const.subheader_pointers_offset + + +cdef class Parser: + + cdef: + int column_count + int64_t[:] lengths + int64_t[:] offsets + int64_t[:] column_types + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + char *cached_page + int current_row_on_page_index + int current_page_block_count + int current_page_data_subheader_pointers_len + int current_page_subheaders_count + int current_row_in_chunk_index + int current_row_in_file_index + int header_length + int row_length + int bit_offset + int subheader_pointer_length + int current_page_type + bint is_little_endian + const uint8_t[:] (*decompress)(int result_length, const uint8_t[:] inbuff) except * + object parser + + def __init__(self, object parser): + cdef: + int j + char[:] column_types + + self.parser = parser + self.header_length = self.parser.header_length + self.column_count = parser.column_count + self.lengths = parser.column_data_lengths() + self.offsets = parser.column_data_offsets() + self.byte_chunk = parser._byte_chunk + self.string_chunk = parser._string_chunk + self.row_length = parser.row_length + self.bit_offset = self.parser._page_bit_offset + self.subheader_pointer_length = self.parser._subheader_pointer_length + self.is_little_endian = parser.byte_order == "<" + self.column_types = np.empty(self.column_count, dtype='int64') + + # page indicators + self.update_next_page() + + column_types = parser.column_types() + + # map column types + for j in range(self.column_count): + if column_types[j] == b'd': + self.column_types[j] = column_type_decimal + elif column_types[j] == b's': + self.column_types[j] = column_type_string + else: + raise ValueError(f"unknown column type: {self.parser.columns[j].ctype}") + + # compression + if parser.compression == const.rle_compression: + self.decompress = rle_decompress + elif parser.compression == const.rdc_compression: + self.decompress = rdc_decompress + else: + self.decompress = NULL + + # update to current state of the parser + self.current_row_in_chunk_index = parser._current_row_in_chunk_index + self.current_row_in_file_index = parser._current_row_in_file_index + self.current_row_on_page_index = parser._current_row_on_page_index + + def read(self, int nrows): + cdef: + bint done + int i + + for _ in range(nrows): + done = self.readline() + if done: + break + + # update the parser + self.parser._current_row_on_page_index = self.current_row_on_page_index + self.parser._current_row_in_chunk_index = self.current_row_in_chunk_index + self.parser._current_row_in_file_index = self.current_row_in_file_index + + cdef bint read_next_page(self) except? True: + cdef bint done + + done = self.parser._read_next_page() + if done: + self.cached_page = NULL + else: + self.update_next_page() + return done + + cdef update_next_page(self): + # update data for the current page + + self.cached_page = self.parser._cached_page + self.current_row_on_page_index = 0 + self.current_page_type = self.parser._current_page_type + self.current_page_block_count = self.parser._current_page_block_count + self.current_page_data_subheader_pointers_len = len( + self.parser._current_page_data_subheader_pointers + ) + self.current_page_subheaders_count = self.parser._current_page_subheaders_count + + cdef bint readline(self) except? True: + + cdef: + int offset, bit_offset, align_correction + int subheader_pointer_length, mn + bint done, flag + + bit_offset = self.bit_offset + subheader_pointer_length = self.subheader_pointer_length + + # If there is no page, go to the end of the header and read a page. + if self.cached_page == NULL: + self.parser._path_or_buf.seek(self.header_length) + done = self.read_next_page() + if done: + return True + + # Loop until a data row is read + while True: + if self.current_page_type in (page_meta_types_0, page_meta_types_1): + flag = self.current_row_on_page_index >=\ + self.current_page_data_subheader_pointers_len + if flag: + done = self.read_next_page() + if done: + return True + continue + current_subheader_pointer = ( + self.parser._current_page_data_subheader_pointers[ + self.current_row_on_page_index]) + self.process_byte_array_with_data( + current_subheader_pointer.offset, + current_subheader_pointer.length) + return False + elif self.current_page_type == page_mix_type: + align_correction = ( + bit_offset + + subheader_pointers_offset + + self.current_page_subheaders_count * subheader_pointer_length + ) + align_correction = align_correction % 8 + offset = bit_offset + align_correction + offset += subheader_pointers_offset + offset += self.current_page_subheaders_count * subheader_pointer_length + offset += self.current_row_on_page_index * self.row_length + self.process_byte_array_with_data(offset, self.row_length) + mn = min(self.parser.row_count, self.parser._mix_page_row_count) + if self.current_row_on_page_index == mn: + done = self.read_next_page() + if done: + return True + return False + elif self.current_page_type == page_data_type: + self.process_byte_array_with_data( + bit_offset + + subheader_pointers_offset + + self.current_row_on_page_index * self.row_length, + self.row_length, + ) + flag = self.current_row_on_page_index == self.current_page_block_count + if flag: + done = self.read_next_page() + if done: + return True + return False + else: + raise ValueError(f"unknown page type: {self.current_page_type}") + + cdef void process_byte_array_with_data(self, int offset, int length) except *: + + cdef: + Py_ssize_t j + int s, k, m, jb, js, current_row + int64_t lngt, start, ct + const uint8_t[:] source + int64_t[:] column_types + int64_t[:] lengths + int64_t[:] offsets + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + + source = np.frombuffer( + self.cached_page[offset:offset + length], dtype=np.uint8) + + if self.decompress != NULL and (length < self.row_length): + source = self.decompress(self.row_length, source) + + current_row = self.current_row_in_chunk_index + column_types = self.column_types + lengths = self.lengths + offsets = self.offsets + byte_chunk = self.byte_chunk + string_chunk = self.string_chunk + s = 8 * self.current_row_in_chunk_index + js = 0 + jb = 0 + for j in range(self.column_count): + lngt = lengths[j] + if lngt == 0: + break + start = offsets[j] + ct = column_types[j] + if ct == column_type_decimal: + # decimal + if self.is_little_endian: + m = s + 8 - lngt + else: + m = s + for k in range(lngt): + byte_chunk[jb, m + k] = source[start + k] + jb += 1 + elif column_types[j] == column_type_string: + # string + # Skip trailing whitespace. This is equivalent to calling + # .rstrip(b"\x00 ") but without Python call overhead. + while lngt > 0 and source[start+lngt-1] in b"\x00 ": + lngt -= 1 + string_chunk[js, current_row] = (&source[start])[:lngt] + js += 1 + + self.current_row_on_page_index += 1 + self.current_row_in_chunk_index += 1 + self.current_row_in_file_index += 1 diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py new file mode 100644 index 00000000..7282affe --- /dev/null +++ b/pandas/io/sas/sas7bdat.py @@ -0,0 +1,816 @@ +""" +Read SAS7BDAT files + +Based on code written by Jared Hobbs: + https://bitbucket.org/jaredhobbs/sas7bdat + +See also: + https://github.com/BioStatMatt/sas7bdat + +Partial documentation of the file format: + https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf + +Reference for binary data compression: + http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +""" +from __future__ import annotations + +from collections import abc +from datetime import ( + datetime, + timedelta, +) +import struct +from typing import cast + +import numpy as np + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, +) +from pandas.errors import ( + EmptyDataError, + OutOfBoundsDatetime, +) + +import pandas as pd +from pandas import ( + DataFrame, + isna, +) + +from pandas.io.common import get_handle +from pandas.io.sas._sas import Parser +import pandas.io.sas.sas_constants as const +from pandas.io.sas.sasreader import ReaderBase + + +def _parse_datetime(sas_datetime: float, unit: str): + if isna(sas_datetime): + return pd.NaT + + if unit == "s": + return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime) + + elif unit == "d": + return datetime(1960, 1, 1) + timedelta(days=sas_datetime) + + else: + raise ValueError("unit must be 'd' or 's'") + + +def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series: + """ + Convert to Timestamp if possible, otherwise to datetime.datetime. + SAS float64 lacks precision for more than ms resolution so the fit + to datetime.datetime is ok. + + Parameters + ---------- + sas_datetimes : {Series, Sequence[float]} + Dates or datetimes in SAS + unit : {str} + "d" if the floats represent dates, "s" for datetimes + + Returns + ------- + Series + Series of datetime64 dtype or datetime.datetime. + """ + try: + return pd.to_datetime(sas_datetimes, unit=unit, origin="1960-01-01") + except OutOfBoundsDatetime: + s_series = sas_datetimes.apply(_parse_datetime, unit=unit) + s_series = cast(pd.Series, s_series) + return s_series + + +class _SubheaderPointer: + offset: int + length: int + compression: int + ptype: int + + def __init__(self, offset: int, length: int, compression: int, ptype: int) -> None: + self.offset = offset + self.length = length + self.compression = compression + self.ptype = ptype + + +class _Column: + col_id: int + name: str | bytes + label: str | bytes + format: str | bytes + ctype: bytes + length: int + + def __init__( + self, + col_id: int, + # These can be bytes when convert_header_text is False + name: str | bytes, + label: str | bytes, + format: str | bytes, + ctype: bytes, + length: int, + ) -> None: + self.col_id = col_id + self.name = name + self.label = label + self.format = format + self.ctype = ctype + self.length = length + + +# SAS7BDAT represents a SAS data file in SAS7BDAT format. +class SAS7BDATReader(ReaderBase, abc.Iterator): + """ + Read SAS files in SAS7BDAT format. + + Parameters + ---------- + path_or_buf : path name or buffer + Name of SAS file or file-like object pointing to SAS file + contents. + index : column identifier, defaults to None + Column to use as index. + convert_dates : bool, defaults to True + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. + blank_missing : bool, defaults to True + Convert empty strings to missing values (SAS uses blanks to + indicate missing character variables). + chunksize : int, defaults to None + Return SAS7BDATReader object for iterations, returns chunks + with given number of lines. + encoding : string, defaults to None + String encoding. + convert_text : bool, defaults to True + If False, text variables are left as raw bytes. + convert_header_text : bool, defaults to True + If False, header text, including column names, are left as raw + bytes. + """ + + _int_length: int + _cached_page: bytes | None + + def __init__( + self, + path_or_buf: FilePath | ReadBuffer[bytes], + index=None, + convert_dates: bool = True, + blank_missing: bool = True, + chunksize: int | None = None, + encoding: str | None = None, + convert_text: bool = True, + convert_header_text: bool = True, + compression: CompressionOptions = "infer", + ) -> None: + + self.index = index + self.convert_dates = convert_dates + self.blank_missing = blank_missing + self.chunksize = chunksize + self.encoding = encoding + self.convert_text = convert_text + self.convert_header_text = convert_header_text + + self.default_encoding = "latin-1" + self.compression = b"" + self.column_names_raw: list[bytes] = [] + self.column_names: list[str | bytes] = [] + self.column_formats: list[str | bytes] = [] + self.columns: list[_Column] = [] + + self._current_page_data_subheader_pointers: list[_SubheaderPointer] = [] + self._cached_page = None + self._column_data_lengths: list[int] = [] + self._column_data_offsets: list[int] = [] + self._column_types: list[bytes] = [] + + self._current_row_in_file_index = 0 + self._current_row_on_page_index = 0 + self._current_row_in_file_index = 0 + + self.handles = get_handle( + path_or_buf, "rb", is_text=False, compression=compression + ) + + self._path_or_buf = self.handles.handle + + try: + self._get_properties() + self._parse_metadata() + except Exception: + self.close() + raise + + def column_data_lengths(self) -> np.ndarray: + """Return a numpy int64 array of the column data lengths""" + return np.asarray(self._column_data_lengths, dtype=np.int64) + + def column_data_offsets(self) -> np.ndarray: + """Return a numpy int64 array of the column offsets""" + return np.asarray(self._column_data_offsets, dtype=np.int64) + + def column_types(self) -> np.ndarray: + """ + Returns a numpy character array of the column types: + s (string) or d (double) + """ + return np.asarray(self._column_types, dtype=np.dtype("S1")) + + def close(self) -> None: + self.handles.close() + + def _get_properties(self) -> None: + + # Check magic number + self._path_or_buf.seek(0) + self._cached_page = self._path_or_buf.read(288) + if self._cached_page[0 : len(const.magic)] != const.magic: + raise ValueError("magic number mismatch (not a SAS file?)") + + # Get alignment information + align1, align2 = 0, 0 + buf = self._read_bytes(const.align_1_offset, const.align_1_length) + if buf == const.u64_byte_checker_value: + align2 = const.align_2_value + self.U64 = True + self._int_length = 8 + self._page_bit_offset = const.page_bit_offset_x64 + self._subheader_pointer_length = const.subheader_pointer_length_x64 + else: + self.U64 = False + self._page_bit_offset = const.page_bit_offset_x86 + self._subheader_pointer_length = const.subheader_pointer_length_x86 + self._int_length = 4 + buf = self._read_bytes(const.align_2_offset, const.align_2_length) + if buf == const.align_1_checker_value: + align1 = const.align_2_value + total_align = align1 + align2 + + # Get endianness information + buf = self._read_bytes(const.endianness_offset, const.endianness_length) + if buf == b"\x01": + self.byte_order = "<" + else: + self.byte_order = ">" + + # Get encoding information + buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0] + if buf in const.encoding_names: + self.file_encoding = const.encoding_names[buf] + else: + self.file_encoding = f"unknown (code={buf})" + + # Get platform information + buf = self._read_bytes(const.platform_offset, const.platform_length) + if buf == b"1": + self.platform = "unix" + elif buf == b"2": + self.platform = "windows" + else: + self.platform = "unknown" + + self.name = self._read_and_convert_header_text( + const.dataset_offset, const.dataset_length + ) + + self.file_type = self._read_and_convert_header_text( + const.file_type_offset, const.file_type_length + ) + + # Timestamp is epoch 01/01/1960 + epoch = datetime(1960, 1, 1) + x = self._read_float( + const.date_created_offset + align1, const.date_created_length + ) + self.date_created = epoch + pd.to_timedelta(x, unit="s") + x = self._read_float( + const.date_modified_offset + align1, const.date_modified_length + ) + self.date_modified = epoch + pd.to_timedelta(x, unit="s") + + self.header_length = self._read_int( + const.header_size_offset + align1, const.header_size_length + ) + + # Read the rest of the header into cached_page. + buf = self._path_or_buf.read(self.header_length - 288) + self._cached_page += buf + # error: Argument 1 to "len" has incompatible type "Optional[bytes]"; + # expected "Sized" + if len(self._cached_page) != self.header_length: # type: ignore[arg-type] + raise ValueError("The SAS7BDAT file appears to be truncated.") + + self._page_length = self._read_int( + const.page_size_offset + align1, const.page_size_length + ) + self._page_count = self._read_int( + const.page_count_offset + align1, const.page_count_length + ) + + self.sas_release_offset = self._read_and_convert_header_text( + const.sas_release_offset + total_align, const.sas_release_length + ) + + self.server_type = self._read_and_convert_header_text( + const.sas_server_type_offset + total_align, const.sas_server_type_length + ) + + self.os_version = self._read_and_convert_header_text( + const.os_version_number_offset + total_align, const.os_version_number_length + ) + + self.os_name = self._read_and_convert_header_text( + const.os_name_offset + total_align, const.os_name_length + ) + if not self.os_name: + self.os_name = self._read_and_convert_header_text( + const.os_maker_offset + total_align, const.os_maker_length + ) + + def __next__(self) -> DataFrame: + da = self.read(nrows=self.chunksize or 1) + if da.empty: + self.close() + raise StopIteration + return da + + # Read a single float of the given width (4 or 8). + def _read_float(self, offset: int, width: int): + if width not in (4, 8): + self.close() + raise ValueError("invalid float width") + buf = self._read_bytes(offset, width) + fd = "f" if width == 4 else "d" + return struct.unpack(self.byte_order + fd, buf)[0] + + # Read a single signed integer of the given width (1, 2, 4 or 8). + def _read_int(self, offset: int, width: int) -> int: + if width not in (1, 2, 4, 8): + self.close() + raise ValueError("invalid int width") + buf = self._read_bytes(offset, width) + it = {1: "b", 2: "h", 4: "l", 8: "q"}[width] + iv = struct.unpack(self.byte_order + it, buf)[0] + return iv + + def _read_bytes(self, offset: int, length: int): + if self._cached_page is None: + self._path_or_buf.seek(offset) + buf = self._path_or_buf.read(length) + if len(buf) < length: + self.close() + msg = f"Unable to read {length:d} bytes from file position {offset:d}." + raise ValueError(msg) + return buf + else: + if offset + length > len(self._cached_page): + self.close() + raise ValueError("The cached page is too small.") + return self._cached_page[offset : offset + length] + + def _read_and_convert_header_text(self, offset: int, length: int) -> str | bytes: + return self._convert_header_text( + self._read_bytes(offset, length).rstrip(b"\x00 ") + ) + + def _parse_metadata(self) -> None: + done = False + while not done: + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + break + if len(self._cached_page) != self._page_length: + raise ValueError("Failed to read a meta data page from the SAS file.") + done = self._process_page_meta() + + def _process_page_meta(self) -> bool: + self._read_page_header() + pt = const.page_meta_types + [const.page_amd_type, const.page_mix_type] + if self._current_page_type in pt: + self._process_page_metadata() + is_data_page = self._current_page_type == const.page_data_type + is_mix_page = self._current_page_type == const.page_mix_type + return bool( + is_data_page + or is_mix_page + or self._current_page_data_subheader_pointers != [] + ) + + def _read_page_header(self): + bit_offset = self._page_bit_offset + tx = const.page_type_offset + bit_offset + self._current_page_type = ( + self._read_int(tx, const.page_type_length) & const.page_type_mask2 + ) + tx = const.block_count_offset + bit_offset + self._current_page_block_count = self._read_int(tx, const.block_count_length) + tx = const.subheader_count_offset + bit_offset + self._current_page_subheaders_count = self._read_int( + tx, const.subheader_count_length + ) + + def _process_page_metadata(self) -> None: + bit_offset = self._page_bit_offset + + for i in range(self._current_page_subheaders_count): + pointer = self._process_subheader_pointers( + const.subheader_pointers_offset + bit_offset, i + ) + if pointer.length == 0: + continue + if pointer.compression == const.truncated_subheader_id: + continue + subheader_signature = self._read_subheader_signature(pointer.offset) + subheader_index = self._get_subheader_index( + subheader_signature, pointer.compression, pointer.ptype + ) + self._process_subheader(subheader_index, pointer) + + def _get_subheader_index(self, signature: bytes, compression, ptype) -> int: + # TODO: return here could be made an enum + index = const.subheader_signature_to_index.get(signature) + if index is None: + f1 = (compression == const.compressed_subheader_id) or (compression == 0) + f2 = ptype == const.compressed_subheader_type + if (self.compression != b"") and f1 and f2: + index = const.SASIndex.data_subheader_index + else: + self.close() + raise ValueError("Unknown subheader signature") + return index + + def _process_subheader_pointers( + self, offset: int, subheader_pointer_index: int + ) -> _SubheaderPointer: + + subheader_pointer_length = self._subheader_pointer_length + total_offset = offset + subheader_pointer_length * subheader_pointer_index + + subheader_offset = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_length = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_compression = self._read_int(total_offset, 1) + total_offset += 1 + + subheader_type = self._read_int(total_offset, 1) + + x = _SubheaderPointer( + subheader_offset, subheader_length, subheader_compression, subheader_type + ) + + return x + + def _read_subheader_signature(self, offset: int) -> bytes: + subheader_signature = self._read_bytes(offset, self._int_length) + return subheader_signature + + def _process_subheader( + self, subheader_index: int, pointer: _SubheaderPointer + ) -> None: + offset = pointer.offset + length = pointer.length + + if subheader_index == const.SASIndex.row_size_index: + processor = self._process_rowsize_subheader + elif subheader_index == const.SASIndex.column_size_index: + processor = self._process_columnsize_subheader + elif subheader_index == const.SASIndex.column_text_index: + processor = self._process_columntext_subheader + elif subheader_index == const.SASIndex.column_name_index: + processor = self._process_columnname_subheader + elif subheader_index == const.SASIndex.column_attributes_index: + processor = self._process_columnattributes_subheader + elif subheader_index == const.SASIndex.format_and_label_index: + processor = self._process_format_subheader + elif subheader_index == const.SASIndex.column_list_index: + processor = self._process_columnlist_subheader + elif subheader_index == const.SASIndex.subheader_counts_index: + processor = self._process_subheader_counts + elif subheader_index == const.SASIndex.data_subheader_index: + self._current_page_data_subheader_pointers.append(pointer) + return + else: + raise ValueError("unknown subheader index") + + processor(offset, length) + + def _process_rowsize_subheader(self, offset: int, length: int) -> None: + + int_len = self._int_length + lcs_offset = offset + lcp_offset = offset + if self.U64: + lcs_offset += 682 + lcp_offset += 706 + else: + lcs_offset += 354 + lcp_offset += 378 + + self.row_length = self._read_int( + offset + const.row_length_offset_multiplier * int_len, int_len + ) + self.row_count = self._read_int( + offset + const.row_count_offset_multiplier * int_len, int_len + ) + self.col_count_p1 = self._read_int( + offset + const.col_count_p1_multiplier * int_len, int_len + ) + self.col_count_p2 = self._read_int( + offset + const.col_count_p2_multiplier * int_len, int_len + ) + mx = const.row_count_on_mix_page_offset_multiplier * int_len + self._mix_page_row_count = self._read_int(offset + mx, int_len) + self._lcs = self._read_int(lcs_offset, 2) + self._lcp = self._read_int(lcp_offset, 2) + + def _process_columnsize_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + offset += int_len + self.column_count = self._read_int(offset, int_len) + if self.col_count_p1 + self.col_count_p2 != self.column_count: + print( + f"Warning: column count mismatch ({self.col_count_p1} + " + f"{self.col_count_p2} != {self.column_count})\n" + ) + + # Unknown purpose + def _process_subheader_counts(self, offset: int, length: int) -> None: + pass + + def _process_columntext_subheader(self, offset: int, length: int) -> None: + + offset += self._int_length + text_block_size = self._read_int(offset, const.text_block_size_length) + + buf = self._read_bytes(offset, text_block_size) + cname_raw = buf[0:text_block_size].rstrip(b"\x00 ") + self.column_names_raw.append(cname_raw) + + if len(self.column_names_raw) == 1: + compression_literal = b"" + for cl in const.compression_literals: + if cl in cname_raw: + compression_literal = cl + self.compression = compression_literal + offset -= self._int_length + + offset1 = offset + 16 + if self.U64: + offset1 += 4 + + buf = self._read_bytes(offset1, self._lcp) + compression_literal = buf.rstrip(b"\x00") + if compression_literal == b"": + self._lcs = 0 + offset1 = offset + 32 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif compression_literal == const.rle_compression: + offset1 = offset + 40 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif self._lcs > 0: + self._lcp = 0 + offset1 = offset + 16 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcs) + self.creator_proc = buf[0 : self._lcp] + if hasattr(self, "creator_proc"): + self.creator_proc = self._convert_header_text(self.creator_proc) + + def _process_columnname_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + offset += int_len + column_name_pointers_count = (length - 2 * int_len - 12) // 8 + for i in range(column_name_pointers_count): + text_subheader = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_text_subheader_offset + ) + col_name_offset = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_offset_offset + ) + col_name_length = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_length_offset + ) + + idx = self._read_int( + text_subheader, const.column_name_text_subheader_length + ) + col_offset = self._read_int( + col_name_offset, const.column_name_offset_length + ) + col_len = self._read_int(col_name_length, const.column_name_length_length) + + name_raw = self.column_names_raw[idx] + cname = name_raw[col_offset : col_offset + col_len] + self.column_names.append(self._convert_header_text(cname)) + + def _process_columnattributes_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8) + for i in range(column_attributes_vectors_count): + col_data_offset = ( + offset + int_len + const.column_data_offset_offset + i * (int_len + 8) + ) + col_data_len = ( + offset + + 2 * int_len + + const.column_data_length_offset + + i * (int_len + 8) + ) + col_types = ( + offset + 2 * int_len + const.column_type_offset + i * (int_len + 8) + ) + + x = self._read_int(col_data_offset, int_len) + self._column_data_offsets.append(x) + + x = self._read_int(col_data_len, const.column_data_length_length) + self._column_data_lengths.append(x) + + x = self._read_int(col_types, const.column_type_length) + self._column_types.append(b"d" if x == 1 else b"s") + + def _process_columnlist_subheader(self, offset: int, length: int) -> None: + # unknown purpose + pass + + def _process_format_subheader(self, offset: int, length: int) -> None: + int_len = self._int_length + text_subheader_format = ( + offset + const.column_format_text_subheader_index_offset + 3 * int_len + ) + col_format_offset = offset + const.column_format_offset_offset + 3 * int_len + col_format_len = offset + const.column_format_length_offset + 3 * int_len + text_subheader_label = ( + offset + const.column_label_text_subheader_index_offset + 3 * int_len + ) + col_label_offset = offset + const.column_label_offset_offset + 3 * int_len + col_label_len = offset + const.column_label_length_offset + 3 * int_len + + x = self._read_int( + text_subheader_format, const.column_format_text_subheader_index_length + ) + format_idx = min(x, len(self.column_names_raw) - 1) + + format_start = self._read_int( + col_format_offset, const.column_format_offset_length + ) + format_len = self._read_int(col_format_len, const.column_format_length_length) + + label_idx = self._read_int( + text_subheader_label, const.column_label_text_subheader_index_length + ) + label_idx = min(label_idx, len(self.column_names_raw) - 1) + + label_start = self._read_int(col_label_offset, const.column_label_offset_length) + label_len = self._read_int(col_label_len, const.column_label_length_length) + + label_names = self.column_names_raw[label_idx] + column_label = self._convert_header_text( + label_names[label_start : label_start + label_len] + ) + format_names = self.column_names_raw[format_idx] + column_format = self._convert_header_text( + format_names[format_start : format_start + format_len] + ) + current_column_number = len(self.columns) + + col = _Column( + current_column_number, + self.column_names[current_column_number], + column_label, + column_format, + self._column_types[current_column_number], + self._column_data_lengths[current_column_number], + ) + + self.column_formats.append(column_format) + self.columns.append(col) + + def read(self, nrows: int | None = None) -> DataFrame: + + if (nrows is None) and (self.chunksize is not None): + nrows = self.chunksize + elif nrows is None: + nrows = self.row_count + + if len(self._column_types) == 0: + self.close() + raise EmptyDataError("No columns to parse from file") + + if nrows > 0 and self._current_row_in_file_index >= self.row_count: + return DataFrame() + + m = self.row_count - self._current_row_in_file_index + if nrows > m: + nrows = m + + nd = self._column_types.count(b"d") + ns = self._column_types.count(b"s") + + self._string_chunk = np.empty((ns, nrows), dtype=object) + self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) + + self._current_row_in_chunk_index = 0 + p = Parser(self) + p.read(nrows) + + rslt = self._chunk_to_dataframe() + if self.index is not None: + rslt = rslt.set_index(self.index) + + return rslt + + def _read_next_page(self): + self._current_page_data_subheader_pointers = [] + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + return True + elif len(self._cached_page) != self._page_length: + self.close() + msg = ( + "failed to read complete page from file (read " + f"{len(self._cached_page):d} of {self._page_length:d} bytes)" + ) + raise ValueError(msg) + + self._read_page_header() + if self._current_page_type in const.page_meta_types: + self._process_page_metadata() + + if self._current_page_type not in const.page_meta_types + [ + const.page_data_type, + const.page_mix_type, + ]: + return self._read_next_page() + + return False + + def _chunk_to_dataframe(self) -> DataFrame: + + n = self._current_row_in_chunk_index + m = self._current_row_in_file_index + ix = range(m - n, m) + rslt = {} + + js, jb = 0, 0 + for j in range(self.column_count): + + name = self.column_names[j] + + if self._column_types[j] == b"d": + col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d") + rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix) + if self.convert_dates: + if self.column_formats[j] in const.sas_date_formats: + rslt[name] = _convert_datetimes(rslt[name], "d") + elif self.column_formats[j] in const.sas_datetime_formats: + rslt[name] = _convert_datetimes(rslt[name], "s") + jb += 1 + elif self._column_types[j] == b"s": + rslt[name] = pd.Series(self._string_chunk[js, :], index=ix) + if self.convert_text and (self.encoding is not None): + rslt[name] = self._decode_string(rslt[name].str) + if self.blank_missing: + ii = rslt[name].str.len() == 0 + rslt[name][ii] = np.nan + js += 1 + else: + self.close() + raise ValueError(f"unknown column type {repr(self._column_types[j])}") + + df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False) + return df + + def _decode_string(self, b): + return b.decode(self.encoding or self.default_encoding) + + def _convert_header_text(self, b: bytes) -> str | bytes: + if self.convert_header_text: + return self._decode_string(b) + else: + return b diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py new file mode 100644 index 00000000..69bc16e6 --- /dev/null +++ b/pandas/io/sas/sas_constants.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +from typing import Final + +magic: Final = ( + b"\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\xc2\xea\x81\x60" + + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" + + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11" +) + +align_1_checker_value: Final = b"3" +align_1_offset: Final = 32 +align_1_length: Final = 1 +align_1_value: Final = 4 +u64_byte_checker_value: Final = b"3" +align_2_offset: Final = 35 +align_2_length: Final = 1 +align_2_value: Final = 4 +endianness_offset: Final = 37 +endianness_length: Final = 1 +platform_offset: Final = 39 +platform_length: Final = 1 +encoding_offset: Final = 70 +encoding_length: Final = 1 +dataset_offset: Final = 92 +dataset_length: Final = 64 +file_type_offset: Final = 156 +file_type_length: Final = 8 +date_created_offset: Final = 164 +date_created_length: Final = 8 +date_modified_offset: Final = 172 +date_modified_length: Final = 8 +header_size_offset: Final = 196 +header_size_length: Final = 4 +page_size_offset: Final = 200 +page_size_length: Final = 4 +page_count_offset: Final = 204 +page_count_length: Final = 4 +sas_release_offset: Final = 216 +sas_release_length: Final = 8 +sas_server_type_offset: Final = 224 +sas_server_type_length: Final = 16 +os_version_number_offset: Final = 240 +os_version_number_length: Final = 16 +os_maker_offset: Final = 256 +os_maker_length: Final = 16 +os_name_offset: Final = 272 +os_name_length: Final = 16 +page_bit_offset_x86: Final = 16 +page_bit_offset_x64: Final = 32 +subheader_pointer_length_x86: Final = 12 +subheader_pointer_length_x64: Final = 24 +page_type_offset: Final = 0 +page_type_length: Final = 2 +block_count_offset: Final = 2 +block_count_length: Final = 2 +subheader_count_offset: Final = 4 +subheader_count_length: Final = 2 +page_type_mask: Final = 0x0F00 +# Keep "page_comp_type" bits +page_type_mask2: Final = 0xF000 | page_type_mask +page_meta_type: Final = 0x0000 +page_data_type: Final = 0x0100 +page_mix_type: Final = 0x0200 +page_amd_type: Final = 0x0400 +page_meta2_type: Final = 0x4000 +page_comp_type: Final = 0x9000 +page_meta_types: Final = [page_meta_type, page_meta2_type] +subheader_pointers_offset: Final = 8 +truncated_subheader_id: Final = 1 +compressed_subheader_id: Final = 4 +compressed_subheader_type: Final = 1 +text_block_size_length: Final = 2 +row_length_offset_multiplier: Final = 5 +row_count_offset_multiplier: Final = 6 +col_count_p1_multiplier: Final = 9 +col_count_p2_multiplier: Final = 10 +row_count_on_mix_page_offset_multiplier: Final = 15 +column_name_pointer_length: Final = 8 +column_name_text_subheader_offset: Final = 0 +column_name_text_subheader_length: Final = 2 +column_name_offset_offset: Final = 2 +column_name_offset_length: Final = 2 +column_name_length_offset: Final = 4 +column_name_length_length: Final = 2 +column_data_offset_offset: Final = 8 +column_data_length_offset: Final = 8 +column_data_length_length: Final = 4 +column_type_offset: Final = 14 +column_type_length: Final = 1 +column_format_text_subheader_index_offset: Final = 22 +column_format_text_subheader_index_length: Final = 2 +column_format_offset_offset: Final = 24 +column_format_offset_length: Final = 2 +column_format_length_offset: Final = 26 +column_format_length_length: Final = 2 +column_label_text_subheader_index_offset: Final = 28 +column_label_text_subheader_index_length: Final = 2 +column_label_offset_offset: Final = 30 +column_label_offset_length: Final = 2 +column_label_length_offset: Final = 32 +column_label_length_length: Final = 2 +rle_compression: Final = b"SASYZCRL" +rdc_compression: Final = b"SASYZCR2" + +compression_literals: Final = [rle_compression, rdc_compression] + +# Incomplete list of encodings, using SAS nomenclature: +# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm +encoding_names: Final = { + 29: "latin1", + 20: "utf-8", + 33: "cyrillic", + 60: "wlatin2", + 61: "wcyrillic", + 62: "wlatin1", + 90: "ebcdic870", +} + + +class SASIndex: + row_size_index: Final = 0 + column_size_index: Final = 1 + subheader_counts_index: Final = 2 + column_text_index: Final = 3 + column_name_index: Final = 4 + column_attributes_index: Final = 5 + format_and_label_index: Final = 6 + column_list_index: Final = 7 + data_subheader_index: Final = 8 + + +subheader_signature_to_index: Final = { + b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index, + b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index, + b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index, + b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index, +} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats: Final = ( + "DATE", + "DAY", + "DDMMYY", + "DOWNAME", + "JULDAY", + "JULIAN", + "MMDDYY", + "MMYY", + "MMYYC", + "MMYYD", + "MMYYP", + "MMYYS", + "MMYYN", + "MONNAME", + "MONTH", + "MONYY", + "QTR", + "QTRR", + "NENGO", + "WEEKDATE", + "WEEKDATX", + "WEEKDAY", + "WEEKV", + "WORDDATE", + "WORDDATX", + "YEAR", + "YYMM", + "YYMMC", + "YYMMD", + "YYMMP", + "YYMMS", + "YYMMN", + "YYMON", + "YYMMDD", + "YYQ", + "YYQC", + "YYQD", + "YYQP", + "YYQS", + "YYQN", + "YYQR", + "YYQRC", + "YYQRD", + "YYQRP", + "YYQRS", + "YYQRN", + "YYMMDDP", + "YYMMDDC", + "E8601DA", + "YYMMDDN", + "MMDDYYC", + "MMDDYYS", + "MMDDYYD", + "YYMMDDS", + "B8601DA", + "DDMMYYN", + "YYMMDDD", + "DDMMYYB", + "DDMMYYP", + "MMDDYYP", + "YYMMDDB", + "MMDDYYN", + "DDMMYYC", + "DDMMYYD", + "DDMMYYS", + "MINGUO", +) + +sas_datetime_formats: Final = ( + "DATETIME", + "DTWKDATX", + "B8601DN", + "B8601DT", + "B8601DX", + "B8601DZ", + "B8601LX", + "E8601DN", + "E8601DT", + "E8601DX", + "E8601DZ", + "E8601LX", + "DATEAMPM", + "DTDATE", + "DTMONYY", + "DTMONYY", + "DTWKDATX", + "DTYEAR", + "TOD", + "MDYAMPM", +) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py new file mode 100644 index 00000000..c188e8d1 --- /dev/null +++ b/pandas/io/sas/sas_xport.py @@ -0,0 +1,507 @@ +""" +Read a SAS XPort format file into a Pandas DataFrame. + +Based on code from Jack Cushman (github.com/jcushman/xport). + +The file format is defined here: + +https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf +""" +from __future__ import annotations + +from collections import abc +from datetime import datetime +import struct +import warnings + +import numpy as np + +from pandas._typing import ( + CompressionOptions, + DatetimeNaTType, + FilePath, + ReadBuffer, +) +from pandas.util._decorators import Appender +from pandas.util._exceptions import find_stack_level + +import pandas as pd + +from pandas.io.common import get_handle +from pandas.io.sas.sasreader import ReaderBase + +_correct_line1 = ( + "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_header1 = ( + "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000" +) +_correct_header2 = ( + "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_obs_header = ( + "HEADER RECORD*******OBS HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_fieldkeys = [ + "ntype", + "nhfun", + "field_length", + "nvar0", + "name", + "label", + "nform", + "nfl", + "num_decimals", + "nfj", + "nfill", + "niform", + "nifl", + "nifd", + "npos", + "_", +] + + +_base_params_doc = """\ +Parameters +---------- +filepath_or_buffer : str or file-like object + Path to SAS file or object implementing binary read method.""" + +_params2_doc = """\ +index : identifier of index column + Identifier of column that should be used as index of the DataFrame. +encoding : str + Encoding for text data. +chunksize : int + Read file `chunksize` lines at a time, returns iterator.""" + +_format_params_doc = """\ +format : str + File format, only `xport` is currently supported.""" + +_iterator_doc = """\ +iterator : bool, default False + Return XportReader object for reading file incrementally.""" + + +_read_sas_doc = f"""Read a SAS file into a DataFrame. + +{_base_params_doc} +{_format_params_doc} +{_params2_doc} +{_iterator_doc} + +Returns +------- +DataFrame or XportReader + +Examples +-------- +Read a SAS Xport file: + +>>> df = pd.read_sas('filename.XPT') + +Read a Xport file in 10,000 line chunks: + +>>> itr = pd.read_sas('filename.XPT', chunksize=10000) +>>> for chunk in itr: +>>> do_something(chunk) + +""" + +_xport_reader_doc = f"""\ +Class for reading SAS Xport files. + +{_base_params_doc} +{_params2_doc} + +Attributes +---------- +member_info : list + Contains information about the file +fields : list + Contains information about the variables in the file +""" + +_read_method_doc = """\ +Read observations from SAS Xport file, returning as data frame. + +Parameters +---------- +nrows : int + Number of rows to read from data file; if None, read whole + file. + +Returns +------- +A DataFrame. +""" + + +def _parse_date(datestr: str) -> DatetimeNaTType: + """Given a date in xport format, return Python date.""" + try: + # e.g. "16FEB11:10:07:55" + return datetime.strptime(datestr, "%d%b%y:%H:%M:%S") + except ValueError: + return pd.NaT + + +def _split_line(s: str, parts): + """ + Parameters + ---------- + s: str + Fixed-length string to split + parts: list of (name, length) pairs + Used to break up string, name '_' will be filtered from output. + + Returns + ------- + Dict of name:contents of string at given location. + """ + out = {} + start = 0 + for name, length in parts: + out[name] = s[start : start + length].strip() + start += length + del out["_"] + return out + + +def _handle_truncated_float_vec(vec, nbytes): + # This feature is not well documented, but some SAS XPORT files + # have 2-7 byte "truncated" floats. To read these truncated + # floats, pad them with zeros on the right to make 8 byte floats. + # + # References: + # https://github.com/jcushman/xport/pull/3 + # The R "foreign" library + + if nbytes != 8: + vec1 = np.zeros(len(vec), np.dtype("S8")) + dtype = np.dtype(f"S{nbytes},S{8 - nbytes}") + vec2 = vec1.view(dtype=dtype) + vec2["f0"] = vec + return vec2 + + return vec + + +def _parse_float_vec(vec): + """ + Parse a vector of float values representing IBM 8 byte floats into + native 8 byte floats. + """ + dtype = np.dtype(">u4,>u4") + vec1 = vec.view(dtype=dtype) + xport1 = vec1["f0"] + xport2 = vec1["f1"] + + # Start by setting first half of ieee number to first half of IBM + # number sans exponent + ieee1 = xport1 & 0x00FFFFFF + + # The fraction bit to the left of the binary point in the ieee + # format was set and the number was shifted 0, 1, 2, or 3 + # places. This will tell us how to adjust the ibm exponent to be a + # power of 2 ieee exponent and how to shift the fraction bits to + # restore the correct magnitude. + shift = np.zeros(len(vec), dtype=np.uint8) + shift[np.where(xport1 & 0x00200000)] = 1 + shift[np.where(xport1 & 0x00400000)] = 2 + shift[np.where(xport1 & 0x00800000)] = 3 + + # shift the ieee number down the correct number of places then + # set the second half of the ieee number to be the second half + # of the ibm number shifted appropriately, ored with the bits + # from the first half that would have been shifted in if we + # could shift a double. All we are worried about are the low + # order 3 bits of the first half since we're only shifting by + # 1, 2, or 3. + ieee1 >>= shift + ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift))) + + # clear the 1 bit to the left of the binary point + ieee1 &= 0xFFEFFFFF + + # set the exponent of the ieee number to be the actual exponent + # plus the shift count + 1023. Or this into the first half of the + # ieee number. The ibm exponent is excess 64 but is adjusted by 65 + # since during conversion to ibm format the exponent is + # incremented by 1 and the fraction bits left 4 positions to the + # right of the radix point. (had to add >> 24 because C treats & + # 0x7f as 0x7f000000 and Python doesn't) + ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | ( + xport1 & 0x80000000 + ) + + ieee = np.empty((len(ieee1),), dtype=">u4,>u4") + ieee["f0"] = ieee1 + ieee["f1"] = ieee2 + ieee = ieee.view(dtype=">f8") + ieee = ieee.astype("f8") + + return ieee + + +class XportReader(ReaderBase, abc.Iterator): + __doc__ = _xport_reader_doc + + def __init__( + self, + filepath_or_buffer: FilePath | ReadBuffer[bytes], + index=None, + encoding: str | None = "ISO-8859-1", + chunksize=None, + compression: CompressionOptions = "infer", + ) -> None: + + self._encoding = encoding + self._lines_read = 0 + self._index = index + self._chunksize = chunksize + + self.handles = get_handle( + filepath_or_buffer, + "rb", + encoding=encoding, + is_text=False, + compression=compression, + ) + self.filepath_or_buffer = self.handles.handle + + try: + self._read_header() + except Exception: + self.close() + raise + + def close(self) -> None: + self.handles.close() + + def _get_row(self): + return self.filepath_or_buffer.read(80).decode() + + def _read_header(self): + self.filepath_or_buffer.seek(0) + + # read file header + line1 = self._get_row() + if line1 != _correct_line1: + if "**COMPRESSED**" in line1: + # this was created with the PROC CPORT method and can't be read + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm + raise ValueError( + "Header record indicates a CPORT file, which is not readable." + ) + raise ValueError("Header record is not an XPORT file.") + + line2 = self._get_row() + fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]] + file_info = _split_line(line2, fif) + if file_info["prefix"] != "SAS SAS SASLIB": + raise ValueError("Header record has invalid prefix.") + file_info["created"] = _parse_date(file_info["created"]) + self.file_info = file_info + + line3 = self._get_row() + file_info["modified"] = _parse_date(line3[:16]) + + # read member header + header1 = self._get_row() + header2 = self._get_row() + headflag1 = header1.startswith(_correct_header1) + headflag2 = header2 == _correct_header2 + if not (headflag1 and headflag2): + raise ValueError("Member header not found") + # usually 140, could be 135 + fieldnamelength = int(header1[-5:-2]) + + # member info + mem = [ + ["prefix", 8], + ["set_name", 8], + ["sasdata", 8], + ["version", 8], + ["OS", 8], + ["_", 24], + ["created", 16], + ] + member_info = _split_line(self._get_row(), mem) + mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]] + member_info.update(_split_line(self._get_row(), mem)) + member_info["modified"] = _parse_date(member_info["modified"]) + member_info["created"] = _parse_date(member_info["created"]) + self.member_info = member_info + + # read field names + types = {1: "numeric", 2: "char"} + fieldcount = int(self._get_row()[54:58]) + datalength = fieldnamelength * fieldcount + # round up to nearest 80 + if datalength % 80: + datalength += 80 - datalength % 80 + fielddata = self.filepath_or_buffer.read(datalength) + fields = [] + obs_length = 0 + while len(fielddata) >= fieldnamelength: + # pull data for one field + fieldbytes, fielddata = ( + fielddata[:fieldnamelength], + fielddata[fieldnamelength:], + ) + + # rest at end gets ignored, so if field is short, pad out + # to match struct pattern below + fieldbytes = fieldbytes.ljust(140) + + fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", fieldbytes) + field = dict(zip(_fieldkeys, fieldstruct)) + del field["_"] + field["ntype"] = types[field["ntype"]] + fl = field["field_length"] + if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)): + msg = f"Floating field width {fl} is not between 2 and 8." + raise TypeError(msg) + + for k, v in field.items(): + try: + field[k] = v.strip() + except AttributeError: + pass + + obs_length += field["field_length"] + fields += [field] + + header = self._get_row() + if not header == _correct_obs_header: + raise ValueError("Observation header not found.") + + self.fields = fields + self.record_length = obs_length + self.record_start = self.filepath_or_buffer.tell() + + self.nobs = self._record_count() + self.columns = [x["name"].decode() for x in self.fields] + + # Setup the dtype. + dtypel = [ + ("s" + str(i), "S" + str(field["field_length"])) + for i, field in enumerate(self.fields) + ] + dtype = np.dtype(dtypel) + self._dtype = dtype + + def __next__(self) -> pd.DataFrame: + return self.read(nrows=self._chunksize or 1) + + def _record_count(self) -> int: + """ + Get number of records in file. + + This is maybe suboptimal because we have to seek to the end of + the file. + + Side effect: returns file position to record_start. + """ + self.filepath_or_buffer.seek(0, 2) + total_records_length = self.filepath_or_buffer.tell() - self.record_start + + if total_records_length % 80 != 0: + warnings.warn( + "xport file may be corrupted.", + stacklevel=find_stack_level(), + ) + + if self.record_length > 80: + self.filepath_or_buffer.seek(self.record_start) + return total_records_length // self.record_length + + self.filepath_or_buffer.seek(-80, 2) + last_card_bytes = self.filepath_or_buffer.read(80) + last_card = np.frombuffer(last_card_bytes, dtype=np.uint64) + + # 8 byte blank + ix = np.flatnonzero(last_card == 2314885530818453536) + + if len(ix) == 0: + tail_pad = 0 + else: + tail_pad = 8 * len(ix) + + self.filepath_or_buffer.seek(self.record_start) + + return (total_records_length - tail_pad) // self.record_length + + def get_chunk(self, size=None) -> pd.DataFrame: + """ + Reads lines from Xport file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + def _missing_double(self, vec): + v = vec.view(dtype="u1,u1,u2,u4") + miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0) + miss1 = ( + ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A)) + | (v["f0"] == 0x5F) + | (v["f0"] == 0x2E) + ) + miss &= miss1 + return miss + + @Appender(_read_method_doc) + def read(self, nrows: int | None = None) -> pd.DataFrame: + + if nrows is None: + nrows = self.nobs + + read_lines = min(nrows, self.nobs - self._lines_read) + read_len = read_lines * self.record_length + if read_len <= 0: + self.close() + raise StopIteration + raw = self.filepath_or_buffer.read(read_len) + data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) + + df = pd.DataFrame(index=range(read_lines)) + for j, x in enumerate(self.columns): + vec = data["s" + str(j)] + ntype = self.fields[j]["ntype"] + if ntype == "numeric": + vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"]) + miss = self._missing_double(vec) + v = _parse_float_vec(vec) + v[miss] = np.nan + elif self.fields[j]["ntype"] == "char": + v = [y.rstrip() for y in vec] + + if self._encoding is not None: + v = [y.decode(self._encoding) for y in v] + + df[x] = v + + if self._index is None: + df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines)) + else: + df = df.set_index(self._index) + + self._lines_read += read_lines + + return df diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py new file mode 100644 index 00000000..35917416 --- /dev/null +++ b/pandas/io/sas/sasreader.py @@ -0,0 +1,175 @@ +""" +Read SAS sas7bdat or xport files. +""" +from __future__ import annotations + +from abc import ( + ABCMeta, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Hashable, + overload, +) + +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, +) +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import stringify_path + +if TYPE_CHECKING: + from pandas import DataFrame + + +# TODO(PY38): replace with Protocol in Python 3.8 +class ReaderBase(metaclass=ABCMeta): + """ + Protocol for XportReader and SAS7BDATReader classes. + """ + + @abstractmethod + def read(self, nrows: int | None = None) -> DataFrame: + pass + + @abstractmethod + def close(self) -> None: + pass + + def __enter__(self) -> ReaderBase: + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + self.close() + + +@overload +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., + chunksize: int = ..., + iterator: bool = ..., + compression: CompressionOptions = ..., +) -> ReaderBase: + ... + + +@overload +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = ..., + index: Hashable | None = ..., + encoding: str | None = ..., + chunksize: None = ..., + iterator: bool = ..., + compression: CompressionOptions = ..., +) -> DataFrame | ReaderBase: + ... + + +@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) +@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") +def read_sas( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + format: str | None = None, + index: Hashable | None = None, + encoding: str | None = None, + chunksize: int | None = None, + iterator: bool = False, + compression: CompressionOptions = "infer", +) -> DataFrame | ReaderBase: + """ + Read SAS files stored as either XPORT or SAS7BDAT format files. + + Parameters + ---------- + filepath_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``read()`` function. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.sas``. + format : str {{'xport', 'sas7bdat'}} or None + If None, file format is inferred from file extension. If 'xport' or + 'sas7bdat', uses the corresponding format. + index : identifier of index column, defaults to None + Identifier of column that should be used as index of the DataFrame. + encoding : str, default is None + Encoding for text data. If None, text data are stored as raw bytes. + chunksize : int + Read file `chunksize` lines at a time, returns iterator. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. + iterator : bool, defaults to False + If True, returns an iterator for reading the file incrementally. + + .. versionchanged:: 1.2 + + ``TextFileReader`` is a context manager. + {decompression_options} + + Returns + ------- + DataFrame if iterator=False and chunksize=None, else SAS7BDATReader + or XportReader + """ + if format is None: + buffer_error_msg = ( + "If this is a buffer object rather " + "than a string name, you must specify a format string" + ) + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + raise ValueError(buffer_error_msg) + fname = filepath_or_buffer.lower() + if ".xpt" in fname: + format = "xport" + elif ".sas7bdat" in fname: + format = "sas7bdat" + else: + raise ValueError( + f"unable to infer format of SAS file from filename: {repr(fname)}" + ) + + reader: ReaderBase + if format.lower() == "xport": + from pandas.io.sas.sas_xport import XportReader + + reader = XportReader( + filepath_or_buffer, + index=index, + encoding=encoding, + chunksize=chunksize, + compression=compression, + ) + elif format.lower() == "sas7bdat": + from pandas.io.sas.sas7bdat import SAS7BDATReader + + reader = SAS7BDATReader( + filepath_or_buffer, + index=index, + encoding=encoding, + chunksize=chunksize, + compression=compression, + ) + else: + raise ValueError("unknown SAS format") + + if iterator or chunksize: + return reader + + with reader: + return reader.read() diff --git a/pandas/io/spss.py b/pandas/io/spss.py new file mode 100644 index 00000000..1b83d339 --- /dev/null +++ b/pandas/io/spss.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Sequence, +) + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_list_like + +from pandas.io.common import stringify_path + +if TYPE_CHECKING: + from pandas import DataFrame + + +def read_spss( + path: str | Path, + usecols: Sequence[str] | None = None, + convert_categoricals: bool = True, +) -> DataFrame: + """ + Load an SPSS file from the file path, returning a DataFrame. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + path : str or Path + File path. + usecols : list-like, optional + Return a subset of the columns. If None, return all columns. + convert_categoricals : bool, default is True + Convert categorical columns into pd.Categorical. + + Returns + ------- + DataFrame + """ + pyreadstat = import_optional_dependency("pyreadstat") + + if usecols is not None: + if not is_list_like(usecols): + raise TypeError("usecols must be list-like.") + else: + usecols = list(usecols) # pyreadstat requires a list + + df, _ = pyreadstat.read_sav( + stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals + ) + return df diff --git a/pandas/io/sql.py b/pandas/io/sql.py new file mode 100644 index 00000000..b5036e11 --- /dev/null +++ b/pandas/io/sql.py @@ -0,0 +1,2257 @@ +""" +Collection of query wrappers / abstractions to both facilitate data +retrieval and to reduce dependency on DB-specific API. +""" + +from __future__ import annotations + +from contextlib import contextmanager +from datetime import ( + date, + datetime, + time, +) +from functools import partial +import re +from typing import ( + TYPE_CHECKING, + Any, + Iterator, + cast, + overload, +) +import warnings + +import numpy as np + +import pandas._libs.lib as lib +from pandas._typing import ( + DateTimeErrorChoices, + DtypeArg, + IndexLabel, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import ( + AbstractMethodError, + DatabaseError, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_dict_like, + is_integer, + is_list_like, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +from pandas import get_option +from pandas.core.api import ( + DataFrame, + Series, +) +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.tools.datetimes import to_datetime + +if TYPE_CHECKING: + from sqlalchemy import Table + + +# ----------------------------------------------------------------------------- +# -- Helper functions + + +def _convert_params(sql, params): + """Convert SQL and params args to DBAPI2.0 compliant format.""" + args = [sql] + if params is not None: + if hasattr(params, "keys"): # test if params is a mapping + args += [params] + else: + args += [list(params)] + return args + + +def _process_parse_dates_argument(parse_dates): + """Process parse_dates argument for read_sql functions""" + # handle non-list entries for parse_dates gracefully + if parse_dates is True or parse_dates is None or parse_dates is False: + parse_dates = [] + + elif not hasattr(parse_dates, "__iter__"): + parse_dates = [parse_dates] + return parse_dates + + +def _handle_date_column( + col, utc: bool | None = None, format: str | dict[str, Any] | None = None +): + if isinstance(format, dict): + # GH35185 Allow custom error values in parse_dates argument of + # read_sql like functions. + # Format can take on custom to_datetime argument values such as + # {"errors": "coerce"} or {"dayfirst": True} + error: DateTimeErrorChoices = format.pop("errors", None) or "ignore" + return to_datetime(col, errors=error, **format) + else: + # Allow passing of formatting string for integers + # GH17855 + if format is None and ( + issubclass(col.dtype.type, np.floating) + or issubclass(col.dtype.type, np.integer) + ): + format = "s" + if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: + return to_datetime(col, errors="coerce", unit=format, utc=utc) + elif is_datetime64tz_dtype(col.dtype): + # coerce to UTC timezone + # GH11216 + return to_datetime(col, utc=True) + else: + return to_datetime(col, errors="coerce", format=format, utc=utc) + + +def _parse_date_columns(data_frame, parse_dates): + """ + Force non-datetime columns to be read as such. + Supports both string formatted and integer timestamp columns. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + # we want to coerce datetime64_tz dtypes for now to UTC + # we could in theory do a 'nice' conversion from a FixedOffset tz + # GH11216 + for col_name, df_col in data_frame.items(): + if is_datetime64tz_dtype(df_col.dtype) or col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + data_frame[col_name] = _handle_date_column(df_col, format=fmt) + + return data_frame + + +def _wrap_result( + data, + columns, + index_col=None, + coerce_float: bool = True, + parse_dates=None, + dtype: DtypeArg | None = None, +): + """Wrap result set of query in a DataFrame.""" + frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) + + if dtype: + frame = frame.astype(dtype) + + frame = _parse_date_columns(frame, parse_dates) + + if index_col is not None: + frame.set_index(index_col, inplace=True) + + return frame + + +def execute(sql, con, params=None): + """ + Execute the given SQL query using the provided connection object. + + Parameters + ---------- + sql : string + SQL query to be executed. + con : SQLAlchemy connectable(engine/connection) or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by the + library. + If a DBAPI2 object, only sqlite3 is supported. + params : list or tuple, optional, default: None + List of parameters to pass to execute method. + + Returns + ------- + Results Iterable + """ + pandas_sql = pandasSQL_builder(con) + args = _convert_params(sql, params) + return pandas_sql.execute(*args) + + +# ----------------------------------------------------------------------------- +# -- Read and write to DataFrames + + +@overload +def read_sql_table( + table_name, + con, + schema=..., + index_col: str | list[str] | None = ..., + coerce_float=..., + parse_dates: list[str] | dict[str, str] | None = ..., + columns: list[str] | None = ..., + chunksize: None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql_table( + table_name, + con, + schema=..., + index_col: str | list[str] | None = ..., + coerce_float=..., + parse_dates: list[str] | dict[str, str] | None = ..., + columns: list[str] | None = ..., + chunksize: int = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql_table( + table_name: str, + con, + schema: str | None = None, + index_col: str | list[str] | None = None, + coerce_float: bool = True, + parse_dates: list[str] | dict[str, str] | None = None, + columns: list[str] | None = None, + chunksize: int | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL database table into a DataFrame. + + Given a table name and a SQLAlchemy connectable, returns a DataFrame. + This function does not support DBAPI connections. + + Parameters + ---------- + table_name : str + Name of SQL table in database. + con : SQLAlchemy connectable or str + A database URI could be provided as str. + SQLite DBAPI connection mode not supported. + schema : str, default None + Name of SQL schema in database to query (if database flavor + supports this). Uses default schema if None (default). + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Can result in loss of Precision. + parse_dates : list or dict, default None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default None + List of column names to select from SQL table. + chunksize : int, default None + If specified, returns an iterator where `chunksize` is the number of + rows to include in each chunk. + + Returns + ------- + DataFrame or Iterator[DataFrame] + A SQL table is returned as two-dimensional data structure with labeled + axes. + + See Also + -------- + read_sql_query : Read SQL query into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + + Notes + ----- + Any datetime values with time zone information will be converted to UTC. + + Examples + -------- + >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP + """ + pandas_sql = pandasSQL_builder(con, schema=schema) + if not pandas_sql.has_table(table_name): + raise ValueError(f"Table {table_name} not found") + + # error: Item "SQLiteDatabase" of "Union[SQLDatabase, SQLiteDatabase]" + # has no attribute "read_table" + table = pandas_sql.read_table( # type: ignore[union-attr] + table_name, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + if table is not None: + return table + else: + raise ValueError(f"Table {table_name} not found", con) + + +@overload +def read_sql_query( + sql, + con, + index_col: str | list[str] | None = ..., + coerce_float=..., + params: list[str] | dict[str, str] | None = ..., + parse_dates: list[str] | dict[str, str] | None = ..., + chunksize: None = ..., + dtype: DtypeArg | None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql_query( + sql, + con, + index_col: str | list[str] | None = ..., + coerce_float=..., + params: list[str] | dict[str, str] | None = ..., + parse_dates: list[str] | dict[str, str] | None = ..., + chunksize: int = ..., + dtype: DtypeArg | None = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql_query( + sql, + con, + index_col: str | list[str] | None = None, + coerce_float: bool = True, + params: list[str] | dict[str, str] | None = None, + parse_dates: list[str] | dict[str, str] | None = None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL query into a DataFrame. + + Returns a DataFrame corresponding to the result set of the query + string. Optionally provide an `index_col` parameter to use one of the + columns as the index, otherwise default integer index will be used. + + Parameters + ---------- + sql : str SQL query or SQLAlchemy Selectable (select or text object) + SQL query to be executed. + con : SQLAlchemy connectable, str, or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. If a DBAPI2 object, only sqlite3 is supported. + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number of + rows to include in each chunk. + dtype : Type name or dict of columns + Data type for data or columns. E.g. np.float64 or + {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame or Iterator[DataFrame] + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + + Notes + ----- + Any datetime values with time zone information parsed via the `parse_dates` + parameter will be converted to UTC. + """ + pandas_sql = pandasSQL_builder(con) + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + dtype=dtype, + ) + + +@overload +def read_sql( + sql, + con, + index_col: str | list[str] | None = ..., + coerce_float=..., + params=..., + parse_dates=..., + columns: list[str] = ..., + chunksize: None = ..., +) -> DataFrame: + ... + + +@overload +def read_sql( + sql, + con, + index_col: str | list[str] | None = ..., + coerce_float=..., + params=..., + parse_dates=..., + columns: list[str] = ..., + chunksize: int = ..., +) -> Iterator[DataFrame]: + ... + + +def read_sql( + sql, + con, + index_col: str | list[str] | None = None, + coerce_float: bool = True, + params=None, + parse_dates=None, + columns: list[str] | None = None, + chunksize: int | None = None, +) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL query or database table into a DataFrame. + + This function is a convenience wrapper around ``read_sql_table`` and + ``read_sql_query`` (for backward compatibility). It will delegate + to the specific function depending on the provided input. A SQL query + will be routed to ``read_sql_query``, while a database table name will + be routed to ``read_sql_table``. Note that the delegated function might + have more specific notes about their functionality not listed here. + + Parameters + ---------- + sql : str or SQLAlchemy Selectable (select or text object) + SQL query to be executed or a table name. + con : SQLAlchemy connectable, str, or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible + for engine disposal and connection closure for the SQLAlchemy connectable; str + connections are closed automatically. See + `here `_. + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table (only used when reading + a table). + chunksize : int, default None + If specified, return an iterator where `chunksize` is the + number of rows to include in each chunk. + + Returns + ------- + DataFrame or Iterator[DataFrame] + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql_query : Read SQL query into a DataFrame. + + Examples + -------- + Read data from SQL via either a SQL query or a SQL tablename. + When using a SQLite database only SQL queries are accepted, + providing only the SQL tablename will result in an error. + + >>> from sqlite3 import connect + >>> conn = connect(':memory:') + >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], + ... columns=['int_column', 'date_column']) + >>> df.to_sql('test_data', conn) + 2 + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) + int_column date_column + 0 0 10/11/12 + 1 1 12/11/10 + + >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP + + Apply date parsing to columns through the ``parse_dates`` argument + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates=["date_column"]) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 + + The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns. + Custom argument values for applying ``pd.to_datetime`` on a column are specified + via a dictionary format: + 1. Ignore errors while parsing the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"errors": "ignore"}}) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 + + 2. Apply a dayfirst date parsing order on the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"dayfirst": True}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 + + 3. Apply custom formatting when date parsing the values of "date_column" + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 + """ + pandas_sql = pandasSQL_builder(con) + + if isinstance(pandas_sql, SQLiteDatabase): + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + try: + _is_table_name = pandas_sql.has_table(sql) + except Exception: + # using generic exception to catch errors from sql drivers (GH24988) + _is_table_name = False + + if _is_table_name: + pandas_sql.meta.reflect(bind=pandas_sql.connectable, only=[sql]) + return pandas_sql.read_table( + sql, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + else: + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + +def to_sql( + frame, + name: str, + con, + schema: str | None = None, + if_exists: str = "fail", + index: bool = True, + index_label: IndexLabel = None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + method: str | None = None, + engine: str = "auto", + **engine_kwargs, +) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame, Series + name : str + Name of SQL table. + con : SQLAlchemy connectable(engine/connection) or database string URI + or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : str, optional + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : bool, default True + Write DataFrame index as a column. + index_label : str or sequence, optional + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 fallback mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + - None : Uses standard SQL ``INSERT`` clause (one per row). + - ``'multi'``: Pass multiple values in a single ``INSERT`` clause. + - callable with signature ``(pd_table, conn, keys, data_iter) -> int | None``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + engine : {'auto', 'sqlalchemy'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.3.0 + + **engine_kwargs + Any additional kwargs are passed to the engine. + + Returns + ------- + None or int + Number of rows affected by to_sql. None is returned if the callable + passed into ``method`` does not return an integer number of rows. + + .. versionadded:: 1.4.0 + + Notes + ----- + The returned rows affected is the sum of the ``rowcount`` attribute of ``sqlite3.Cursor`` + or SQLAlchemy connectable. The returned value may not reflect the exact number of written + rows as stipulated in the + `sqlite3 `__ or + `SQLAlchemy `__ + """ # noqa:E501 + if if_exists not in ("fail", "replace", "append"): + raise ValueError(f"'{if_exists}' is not valid for if_exists") + + pandas_sql = pandasSQL_builder(con, schema=schema) + + if isinstance(frame, Series): + frame = frame.to_frame() + elif not isinstance(frame, DataFrame): + raise NotImplementedError( + "'frame' argument should be either a Series or a DataFrame" + ) + + return pandas_sql.to_sql( + frame, + name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + chunksize=chunksize, + dtype=dtype, + method=method, + engine=engine, + **engine_kwargs, + ) + + +def has_table(table_name: str, con, schema: str | None = None) -> bool: + """ + Check if DataBase has named table. + + Parameters + ---------- + table_name: string + Name of SQL table. + con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : string, default None + Name of SQL schema in database to write to (if database flavor supports + this). If None, use default schema (default). + + Returns + ------- + boolean + """ + pandas_sql = pandasSQL_builder(con, schema=schema) + return pandas_sql.has_table(table_name) + + +table_exists = has_table + + +def pandasSQL_builder(con, schema: str | None = None) -> SQLDatabase | SQLiteDatabase: + """ + Convenience function to return the correct PandasSQL subclass based on the + provided parameters. + """ + import sqlite3 + import warnings + + if isinstance(con, sqlite3.Connection) or con is None: + return SQLiteDatabase(con) + + sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore") + + if isinstance(con, str): + if sqlalchemy is None: + raise ImportError("Using URI string without sqlalchemy installed.") + else: + con = sqlalchemy.create_engine(con) + + if sqlalchemy is not None and isinstance(con, sqlalchemy.engine.Connectable): + return SQLDatabase(con, schema=schema) + + warnings.warn( + "pandas only supports SQLAlchemy connectable (engine/connection) or " + "database string URI or sqlite3 DBAPI2 connection. " + "Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.", + UserWarning, + stacklevel=find_stack_level(), + ) + return SQLiteDatabase(con) + + +class SQLTable(PandasObject): + """ + For mapping Pandas tables to SQL tables. + Uses fact that table is reflected by SQLAlchemy to + do better type conversions. + Also holds various flags needed to avoid having to + pass them between functions all the time. + """ + + # TODO: support for multiIndex + + def __init__( + self, + name: str, + pandas_sql_engine, + frame=None, + index: bool | str | list[str] | None = True, + if_exists: str = "fail", + prefix: str = "pandas", + index_label=None, + schema=None, + keys=None, + dtype: DtypeArg | None = None, + ) -> None: + self.name = name + self.pd_sql = pandas_sql_engine + self.prefix = prefix + self.frame = frame + self.index = self._index_name(index, index_label) + self.schema = schema + self.if_exists = if_exists + self.keys = keys + self.dtype = dtype + + if frame is not None: + # We want to initialize based on a dataframe + self.table = self._create_table_setup() + else: + # no data provided, read-only mode + self.table = self.pd_sql.get_table(self.name, self.schema) + + if self.table is None: + raise ValueError(f"Could not init table '{name}'") + + def exists(self): + return self.pd_sql.has_table(self.name, self.schema) + + def sql_schema(self) -> str: + from sqlalchemy.schema import CreateTable + + return str(CreateTable(self.table).compile(self.pd_sql.connectable)) + + def _execute_create(self): + # Inserting table into database, add to MetaData object + self.table = self.table.to_metadata(self.pd_sql.meta) + self.table.create(bind=self.pd_sql.connectable) + + def create(self) -> None: + if self.exists(): + if self.if_exists == "fail": + raise ValueError(f"Table '{self.name}' already exists.") + elif self.if_exists == "replace": + self.pd_sql.drop_table(self.name, self.schema) + self._execute_create() + elif self.if_exists == "append": + pass + else: + raise ValueError(f"'{self.if_exists}' is not valid for if_exists") + else: + self._execute_create() + + def _execute_insert(self, conn, keys: list[str], data_iter) -> int: + """ + Execute SQL statement inserting data + + Parameters + ---------- + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : generator of list + Each item contains a list of values to be inserted + """ + data = [dict(zip(keys, row)) for row in data_iter] + result = conn.execute(self.table.insert(), data) + return result.rowcount + + def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: + """ + Alternative to _execute_insert for DBs support multivalue INSERT. + + Note: multi-value insert is usually faster for analytics DBs + and tables containing a few columns + but performance degrades quickly with increase of columns. + """ + + from sqlalchemy import insert + + data = [dict(zip(keys, row)) for row in data_iter] + stmt = insert(self.table).values(data) + result = conn.execute(stmt) + return result.rowcount + + def insert_data(self) -> tuple[list[str], list[np.ndarray]]: + if self.index is not None: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError(f"duplicate name in index/columns: {err}") from err + else: + temp = self.frame + + column_names = list(map(str, temp.columns)) + ncols = len(column_names) + # this just pre-allocates the list: None's will be replaced with ndarrays + # error: List item 0 has incompatible type "None"; expected "ndarray" + data_list: list[np.ndarray] = [None] * ncols # type: ignore[list-item] + + for i, (_, ser) in enumerate(temp.items()): + vals = ser._values + if vals.dtype.kind == "M": + d = vals.to_pydatetime() + elif vals.dtype.kind == "m": + # store as integers, see GH#6921, GH#7076 + d = vals.view("i8").astype(object) + else: + d = vals.astype(object) + + assert isinstance(d, np.ndarray), type(d) + + if ser._can_hold_na: + # Note: this will miss timedeltas since they are converted to int + mask = isna(d) + d[mask] = None + + data_list[i] = d + + return column_names, data_list + + def insert( + self, chunksize: int | None = None, method: str | None = None + ) -> int | None: + + # set insert method + if method is None: + exec_insert = self._execute_insert + elif method == "multi": + exec_insert = self._execute_insert_multi + elif callable(method): + exec_insert = partial(method, self) + else: + raise ValueError(f"Invalid parameter `method`: {method}") + + keys, data_list = self.insert_data() + + nrows = len(self.frame) + + if nrows == 0: + return 0 + + if chunksize is None: + chunksize = nrows + elif chunksize == 0: + raise ValueError("chunksize argument should be non-zero") + + chunks = (nrows // chunksize) + 1 + total_inserted = None + with self.pd_sql.run_transaction() as conn: + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list)) + num_inserted = exec_insert(conn, keys, chunk_iter) + # GH 46891 + if is_integer(num_inserted): + if total_inserted is None: + total_inserted = num_inserted + else: + total_inserted += num_inserted + return total_inserted + + def _query_iterator( + self, + result, + chunksize: str | None, + columns, + coerce_float: bool = True, + parse_dates=None, + ): + """Return generator through chunked result set.""" + has_read_data = False + while True: + data = result.fetchmany(chunksize) + if not data: + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) + break + else: + has_read_data = True + self.frame = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + yield self.frame + + def read( + self, + coerce_float: bool = True, + parse_dates=None, + columns=None, + chunksize=None, + ) -> DataFrame | Iterator[DataFrame]: + from sqlalchemy import select + + if columns is not None and len(columns) > 0: + cols = [self.table.c[n] for n in columns] + if self.index is not None: + for idx in self.index[::-1]: + cols.insert(0, self.table.c[idx]) + sql_select = select(*cols) + else: + sql_select = select(self.table) + result = self.pd_sql.execute(sql_select) + column_names = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + column_names, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + else: + data = result.fetchall() + self.frame = DataFrame.from_records( + data, columns=column_names, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + return self.frame + + def _index_name(self, index, index_label): + # for writing: index=True to include index in sql table + if index is True: + nlevels = self.frame.index.nlevels + # if index_label is specified, set this as index name(s) + if index_label is not None: + if not isinstance(index_label, list): + index_label = [index_label] + if len(index_label) != nlevels: + raise ValueError( + "Length of 'index_label' should match number of " + f"levels, which is {nlevels}" + ) + else: + return index_label + # return the used column labels for the index columns + if ( + nlevels == 1 + and "index" not in self.frame.columns + and self.frame.index.name is None + ): + return ["index"] + else: + return com.fill_missing_names(self.frame.index.names) + + # for reading: index=(list of) string to specify column to set as index + elif isinstance(index, str): + return [index] + elif isinstance(index, list): + return index + else: + return None + + def _get_column_names_and_types(self, dtype_mapper): + column_names_and_types = [] + if self.index is not None: + for i, idx_label in enumerate(self.index): + idx_type = dtype_mapper(self.frame.index._get_level_values(i)) + column_names_and_types.append((str(idx_label), idx_type, True)) + + column_names_and_types += [ + (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False) + for i in range(len(self.frame.columns)) + ] + + return column_names_and_types + + def _create_table_setup(self): + from sqlalchemy import ( + Column, + PrimaryKeyConstraint, + Table, + ) + from sqlalchemy.schema import MetaData + + column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) + + columns = [ + Column(name, typ, index=is_index) + for name, typ, is_index in column_names_and_types + ] + + if self.keys is not None: + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk") + columns.append(pkc) + + schema = self.schema or self.pd_sql.meta.schema + + # At this point, attach to new metadata, only attach to self.meta + # once table is created. + meta = MetaData() + return Table(self.name, meta, *columns, schema=schema) + + def _harmonize_columns(self, parse_dates=None): + """ + Make the DataFrame's column types align with the SQL table + column types. + Need to work around limited NA value support. Floats are always + fine, ints must always be floats if there are Null values. + Booleans are hard because converting bool column with None replaces + all Nones with false. Therefore only convert bool if there are no + NA values. + Datetimes should already be converted to np.datetime64 if supported, + but here we also force conversion if required. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + for sql_col in self.table.columns: + col_name = sql_col.name + try: + df_col = self.frame[col_name] + + # Handle date parsing upfront; don't try to convert columns + # twice + if col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + self.frame[col_name] = _handle_date_column(df_col, format=fmt) + continue + + # the type the dataframe column should have + col_type = self._get_dtype(sql_col.type) + + if ( + col_type is datetime + or col_type is date + or col_type is DatetimeTZDtype + ): + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) + elif col_type is float: + # floats support NA, can always convert! + self.frame[col_name] = df_col.astype(col_type, copy=False) + + elif len(df_col) == df_col.count(): + # No NA values, can convert ints and bools + if col_type is np.dtype("int64") or col_type is bool: + self.frame[col_name] = df_col.astype(col_type, copy=False) + except KeyError: + pass # this column not in results + + def _sqlalchemy_type(self, col): + + dtype: DtypeArg = self.dtype or {} + if is_dict_like(dtype): + dtype = cast(dict, dtype) + if col.name in dtype: + return dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + from sqlalchemy.types import ( + TIMESTAMP, + BigInteger, + Boolean, + Date, + DateTime, + Float, + Integer, + SmallInteger, + Text, + Time, + ) + + if col_type == "datetime64" or col_type == "datetime": + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information + try: + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) + except AttributeError: + # The column is actually a DatetimeIndex + # GH 26761 or an Index with date-like data e.g. 9999-01-01 + if getattr(col, "tz", None) is not None: + return TIMESTAMP(timezone=True) + return DateTime + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the database.", + UserWarning, + stacklevel=find_stack_level(), + ) + return BigInteger + elif col_type == "floating": + if col.dtype == "float32": + return Float(precision=23) + else: + return Float(precision=53) + elif col_type == "integer": + # GH35076 Map pandas integer to optimal SQLAlchemy integer type + if col.dtype.name.lower() in ("int8", "uint8", "int16"): + return SmallInteger + elif col.dtype.name.lower() in ("uint16", "int32"): + return Integer + elif col.dtype.name.lower() == "uint64": + raise ValueError("Unsigned 64 bit integer datatype is not supported") + else: + return BigInteger + elif col_type == "boolean": + return Boolean + elif col_type == "date": + return Date + elif col_type == "time": + return Time + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + return Text + + def _get_dtype(self, sqltype): + from sqlalchemy.types import ( + TIMESTAMP, + Boolean, + Date, + DateTime, + Float, + Integer, + ) + + if isinstance(sqltype, Float): + return float + elif isinstance(sqltype, Integer): + # TODO: Refine integer size. + return np.dtype("int64") + elif isinstance(sqltype, TIMESTAMP): + # we have a timezone capable type + if not sqltype.timezone: + return datetime + return DatetimeTZDtype + elif isinstance(sqltype, DateTime): + # Caution: np.datetime64 is also a subclass of np.number. + return datetime + elif isinstance(sqltype, Date): + return date + elif isinstance(sqltype, Boolean): + return bool + return object + + +class PandasSQL(PandasObject): + """ + Subclasses Should define read_sql and to_sql. + """ + + def read_sql(self, *args, **kwargs): + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + def to_sql( + self, + frame, + name, + if_exists: str = "fail", + index: bool = True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + ) -> int | None: + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + +class BaseEngine: + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **engine_kwargs, + ) -> int | None: + """ + Inserts data into already-prepared table + """ + raise AbstractMethodError(self) + + +class SQLAlchemyEngine(BaseEngine): + def __init__(self) -> None: + import_optional_dependency( + "sqlalchemy", extra="sqlalchemy is required for SQL support." + ) + + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **engine_kwargs, + ) -> int | None: + from sqlalchemy import exc + + try: + return table.insert(chunksize=chunksize, method=method) + except exc.SQLAlchemyError as err: + # GH34431 + # https://stackoverflow.com/a/67358288/6067848 + msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# + )|inf can not be used with MySQL""" + err_text = str(err.orig) + if re.search(msg, err_text): + raise ValueError("inf cannot be used with MySQL") from err + else: + raise err + + +def get_engine(engine: str) -> BaseEngine: + """return our implementation""" + if engine == "auto": + engine = get_option("io.sql.engine") + + if engine == "auto": + # try engines in this order + engine_classes = [SQLAlchemyEngine] + + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'sqlalchemy'.\n" + "A suitable version of " + "sqlalchemy is required for sql I/O " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" + ) + + elif engine == "sqlalchemy": + return SQLAlchemyEngine() + + raise ValueError("engine must be one of 'auto', 'sqlalchemy'") + + +class SQLDatabase(PandasSQL): + """ + This class enables conversion between DataFrame and SQL databases + using SQLAlchemy to handle DataBase abstraction. + + Parameters + ---------- + engine : SQLAlchemy connectable + Connectable to connect with the database. Using SQLAlchemy makes it + possible to use any DB supported by that library. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + + """ + + def __init__(self, engine, schema: str | None = None) -> None: + from sqlalchemy.schema import MetaData + + self.connectable = engine + self.meta = MetaData(schema=schema) + + @contextmanager + def run_transaction(self): + from sqlalchemy.engine import Engine + + if isinstance(self.connectable, Engine): + with self.connectable.connect() as conn: + with conn.begin(): + yield conn + else: + yield self.connectable + + def execute(self, *args, **kwargs): + """Simple passthrough to SQLAlchemy connectable""" + return self.connectable.execution_options().execute(*args, **kwargs) + + def read_table( + self, + table_name: str, + index_col: str | list[str] | None = None, + coerce_float: bool = True, + parse_dates=None, + columns=None, + schema: str | None = None, + chunksize: int | None = None, + ) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL database table into a DataFrame. + + Parameters + ---------- + table_name : str + Name of SQL table in database. + index_col : string, optional, default: None + Column to set as index. + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects + (like decimal.Decimal) to floating point. This can result in + loss of precision. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg}``, where the arg corresponds + to the keyword arguments of :func:`pandas.to_datetime`. + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table. + schema : string, default None + Name of SQL schema in database to query (if database flavor + supports this). If specified, this overwrites the default + schema of the SQL database object. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + pandas.read_sql_table + SQLDatabase.read_query + + """ + table = SQLTable(table_name, self, index=index_col, schema=schema) + return table.read( + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + @staticmethod + def _query_iterator( + result, + chunksize: int, + columns, + index_col=None, + coerce_float=True, + parse_dates=None, + dtype: DtypeArg | None = None, + ): + """Return generator through chunked result set""" + has_read_data = False + while True: + data = result.fetchmany(chunksize) + if not data: + if not has_read_data: + yield _wrap_result( + [], + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + break + else: + has_read_data = True + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + + def read_query( + self, + sql: str, + index_col: str | list[str] | None = None, + coerce_float: bool = True, + parse_dates=None, + params=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + ) -> DataFrame | Iterator[DataFrame]: + """ + Read SQL query into a DataFrame. + + Parameters + ---------- + sql : str + SQL query to be executed. + index_col : string, optional, default: None + Column name to use as index for the returned DataFrame object. + coerce_float : bool, default True + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict + corresponds to the keyword arguments of + :func:`pandas.to_datetime` Especially useful with databases + without native Datetime support, such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + dtype : Type name or dict of columns + Data type for data or columns. E.g. np.float64 or + {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} + + .. versionadded:: 1.3.0 + + Returns + ------- + DataFrame + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql + + """ + args = _convert_params(sql, params) + + result = self.execute(*args) + columns = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + else: + data = result.fetchall() + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + return frame + + read_sql = read_query + + def prep_table( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + dtype: DtypeArg | None = None, + ) -> SQLTable: + """ + Prepares table in the database for data insertion. Creates it if needed, etc. + """ + if dtype: + if not is_dict_like(dtype): + # error: Value expression in dictionary comprehension has incompatible + # type "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]"; expected type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]" + dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + else: + dtype = cast(dict, dtype) + + from sqlalchemy.types import ( + TypeEngine, + to_instance, + ) + + for col, my_type in dtype.items(): + if not isinstance(to_instance(my_type), TypeEngine): + raise ValueError(f"The type of {col} is not a SQLAlchemy type") + + table = SQLTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + table.create() + return table + + def check_case_sensitive( + self, + name: str, + schema: str | None, + ) -> None: + """ + Checks table name for issues with case-sensitivity. + Method is called after data is inserted. + """ + if not name.isdigit() and not name.islower(): + # check for potentially case sensitivity issues (GH7815) + # Only check when name is not a number and name is not lower case + from sqlalchemy import inspect as sqlalchemy_inspect + + with self.connectable.connect() as conn: + insp = sqlalchemy_inspect(conn) + table_names = insp.get_table_names(schema=schema or self.meta.schema) + if name not in table_names: + msg = ( + f"The provided table name '{name}' is not found exactly as " + "such in the database after writing the table, possibly " + "due to case sensitivity issues. Consider using lower " + "case table names." + ) + warnings.warn( + msg, + UserWarning, + stacklevel=find_stack_level(), + ) + + def to_sql( + self, + frame, + name: str, + if_exists: str = "fail", + index: bool = True, + index_label=None, + schema: str | None = None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + engine="auto", + **engine_kwargs, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame + name : string + Name of SQL table. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column. + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + chunksize : int, default None + If not None, then rows will be written in batches of this size at a + time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. + method : {None', 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + engine : {'auto', 'sqlalchemy'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.3.0 + + **engine_kwargs + Any additional kwargs are passed to the engine. + """ + sql_engine = get_engine(engine) + + table = self.prep_table( + frame=frame, + name=name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + + total_inserted = sql_engine.insert_records( + table=table, + con=self.connectable, + frame=frame, + name=name, + index=index, + schema=schema, + chunksize=chunksize, + method=method, + **engine_kwargs, + ) + + self.check_case_sensitive(name=name, schema=schema) + return total_inserted + + @property + def tables(self): + return self.meta.tables + + def has_table(self, name: str, schema: str | None = None): + from sqlalchemy import inspect as sqlalchemy_inspect + + insp = sqlalchemy_inspect(self.connectable) + return insp.has_table(name, schema or self.meta.schema) + + def get_table(self, table_name: str, schema: str | None = None) -> Table: + from sqlalchemy import ( + Numeric, + Table, + ) + + schema = schema or self.meta.schema + tbl = Table( + table_name, self.meta, autoload_with=self.connectable, schema=schema + ) + for column in tbl.columns: + if isinstance(column.type, Numeric): + column.type.asdecimal = False + return tbl + + def drop_table(self, table_name: str, schema: str | None = None) -> None: + schema = schema or self.meta.schema + if self.has_table(table_name, schema): + self.meta.reflect(bind=self.connectable, only=[table_name], schema=schema) + self.get_table(table_name, schema).drop(bind=self.connectable) + self.meta.clear() + + def _create_sql_schema( + self, + frame: DataFrame, + table_name: str, + keys: list[str] | None = None, + dtype: DtypeArg | None = None, + schema: str | None = None, + ): + table = SQLTable( + table_name, + self, + frame=frame, + index=False, + keys=keys, + dtype=dtype, + schema=schema, + ) + return str(table.sql_schema()) + + +# ---- SQL without SQLAlchemy --- +# sqlite-specific sql strings and handler class +# dictionary used for readability purposes +_SQL_TYPES = { + "string": "TEXT", + "floating": "REAL", + "integer": "INTEGER", + "datetime": "TIMESTAMP", + "date": "DATE", + "time": "TIME", + "boolean": "INTEGER", +} + + +def _get_unicode_name(name): + try: + uname = str(name).encode("utf-8", "strict").decode("utf-8") + except UnicodeError as err: + raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err + return uname + + +def _get_valid_sqlite_name(name): + # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\ + # -for-sqlite-table-column-names-in-python + # Ensure the string can be encoded as UTF-8. + # Ensure the string does not include any NUL characters. + # Replace all " with "". + # Wrap the entire thing in double quotes. + + uname = _get_unicode_name(name) + if not len(uname): + raise ValueError("Empty table or column name specified") + + nul_index = uname.find("\x00") + if nul_index >= 0: + raise ValueError("SQLite identifier cannot contain NULs") + return '"' + uname.replace('"', '""') + '"' + + +class SQLiteTable(SQLTable): + """ + Patch the SQLTable for fallback support. + Instead of a table variable just use the Create Table statement. + """ + + def __init__(self, *args, **kwargs) -> None: + # GH 8341 + # register an adapter callable for datetime.time object + import sqlite3 + + # this will transform time(12,34,56,789) into '12:34:56.000789' + # (this is what sqlalchemy does) + def _adapt_time(t): + # This is faster than strftime + return f"{t.hour:02d}:{t.minute:02d}:{t.second:02d}.{t.microsecond:06d}" + + sqlite3.register_adapter(time, _adapt_time) + super().__init__(*args, **kwargs) + + def sql_schema(self) -> str: + return str(";\n".join(self.table)) + + def _execute_create(self): + with self.pd_sql.run_transaction() as conn: + for stmt in self.table: + conn.execute(stmt) + + def insert_statement(self, *, num_rows: int) -> str: + names = list(map(str, self.frame.columns)) + wld = "?" # wildcard char + escape = _get_valid_sqlite_name + + if self.index is not None: + for idx in self.index[::-1]: + names.insert(0, idx) + + bracketed_names = [escape(column) for column in names] + col_names = ",".join(bracketed_names) + + row_wildcards = ",".join([wld] * len(names)) + wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)]) + insert_statement = ( + f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}" + ) + return insert_statement + + def _execute_insert(self, conn, keys, data_iter) -> int: + data_list = list(data_iter) + conn.executemany(self.insert_statement(num_rows=1), data_list) + return conn.rowcount + + def _execute_insert_multi(self, conn, keys, data_iter) -> int: + data_list = list(data_iter) + flattened_data = [x for row in data_list for x in row] + conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data) + return conn.rowcount + + def _create_table_setup(self): + """ + Return a list of SQL statements that creates a table reflecting the + structure of a DataFrame. The first entry will be a CREATE TABLE + statement while the rest will be CREATE INDEX statements. + """ + column_names_and_types = self._get_column_names_and_types(self._sql_type_name) + escape = _get_valid_sqlite_name + + create_tbl_stmts = [ + escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types + ] + + if self.keys is not None and len(self.keys): + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + cnames_br = ", ".join([escape(c) for c in keys]) + create_tbl_stmts.append( + f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" + ) + if self.schema: + schema_name = self.schema + "." + else: + schema_name = "" + create_stmts = [ + "CREATE TABLE " + + schema_name + + escape(self.name) + + " (\n" + + ",\n ".join(create_tbl_stmts) + + "\n)" + ] + + ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] + if len(ix_cols): + cnames = "_".join(ix_cols) + cnames_br = ",".join([escape(c) for c in ix_cols]) + create_stmts.append( + "CREATE INDEX " + + escape("ix_" + self.name + "_" + cnames) + + "ON " + + escape(self.name) + + " (" + + cnames_br + + ")" + ) + + return create_stmts + + def _sql_type_name(self, col): + dtype: DtypeArg = self.dtype or {} + if is_dict_like(dtype): + dtype = cast(dict, dtype) + if col.name in dtype: + return dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the database.", + UserWarning, + stacklevel=find_stack_level(), + ) + col_type = "integer" + + elif col_type == "datetime64": + col_type = "datetime" + + elif col_type == "empty": + col_type = "string" + + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + if col_type not in _SQL_TYPES: + col_type = "string" + + return _SQL_TYPES[col_type] + + +class SQLiteDatabase(PandasSQL): + """ + Version of SQLDatabase to support SQLite connections (fallback without + SQLAlchemy). This should only be used internally. + + Parameters + ---------- + con : sqlite connection object + + """ + + def __init__(self, con) -> None: + self.con = con + + @contextmanager + def run_transaction(self): + cur = self.con.cursor() + try: + yield cur + self.con.commit() + except Exception: + self.con.rollback() + raise + finally: + cur.close() + + def execute(self, *args, **kwargs): + cur = self.con.cursor() + try: + cur.execute(*args, **kwargs) + return cur + except Exception as exc: + try: + self.con.rollback() + except Exception as inner_exc: # pragma: no cover + ex = DatabaseError( + f"Execution failed on sql: {args[0]}\n{exc}\nunable to rollback" + ) + raise ex from inner_exc + + ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}") + raise ex from exc + + @staticmethod + def _query_iterator( + cursor, + chunksize: int, + columns, + index_col=None, + coerce_float: bool = True, + parse_dates=None, + dtype: DtypeArg | None = None, + ): + """Return generator through chunked result set""" + has_read_data = False + while True: + data = cursor.fetchmany(chunksize) + if type(data) == tuple: + data = list(data) + if not data: + cursor.close() + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) + break + else: + has_read_data = True + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + + def read_query( + self, + sql, + index_col=None, + coerce_float: bool = True, + params=None, + parse_dates=None, + chunksize: int | None = None, + dtype: DtypeArg | None = None, + ) -> DataFrame | Iterator[DataFrame]: + + args = _convert_params(sql, params) + cursor = self.execute(*args) + columns = [col_desc[0] for col_desc in cursor.description] + + if chunksize is not None: + return self._query_iterator( + cursor, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + else: + data = self._fetchall_as_list(cursor) + cursor.close() + + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + dtype=dtype, + ) + return frame + + def _fetchall_as_list(self, cur): + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + + def to_sql( + self, + frame, + name, + if_exists: str = "fail", + index: bool = True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + **kwargs, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: string + Name of SQL table. + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if it does not exist. + index : bool, default True + Write DataFrame index as a column + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Ignored parameter included for compatibility with SQLAlchemy + version of ``to_sql``. + chunksize : int, default None + If not None, then rows will be written in batches of this + size at a time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a string. If all columns are of the same type, one single value + can be used. + method : {None, 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + """ + if dtype: + if not is_dict_like(dtype): + # error: Value expression in dictionary comprehension has incompatible + # type "Union[ExtensionDtype, str, dtype[Any], Type[object], + # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], + # Type[str], Type[float], Type[int], Type[complex], Type[bool], + # Type[object]]]]"; expected type "Union[ExtensionDtype, str, + # dtype[Any], Type[object]]" + dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + else: + dtype = cast(dict, dtype) + + for col, my_type in dtype.items(): + if not isinstance(my_type, str): + raise ValueError(f"{col} ({my_type}) not a string") + + table = SQLiteTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + dtype=dtype, + ) + table.create() + return table.insert(chunksize, method) + + def has_table(self, name: str, schema: str | None = None) -> bool: + + wld = "?" + query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" + + return len(self.execute(query, [name]).fetchall()) > 0 + + def get_table(self, table_name: str, schema: str | None = None) -> None: + return None # not supported in fallback mode + + def drop_table(self, name: str, schema: str | None = None) -> None: + drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" + self.execute(drop_sql) + + def _create_sql_schema( + self, + frame, + table_name: str, + keys=None, + dtype: DtypeArg | None = None, + schema: str | None = None, + ): + table = SQLiteTable( + table_name, + self, + frame=frame, + index=False, + keys=keys, + dtype=dtype, + schema=schema, + ) + return str(table.sql_schema()) + + +def get_schema( + frame, + name: str, + keys=None, + con=None, + dtype: DtypeArg | None = None, + schema: str | None = None, +) -> str: + """ + Get the SQL db table schema for the given frame. + + Parameters + ---------- + frame : DataFrame + name : str + name of SQL table + keys : string or sequence, default: None + columns to use a primary key + con: an open SQL database connection object or a SQLAlchemy connectable + Using SQLAlchemy makes it possible to use any DB supported by that + library, default: None + If a DBAPI2 object, only sqlite3 is supported. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. + schema: str, default: None + Optional specifying the schema to be used in creating the table. + + .. versionadded:: 1.2.0 + """ + pandas_sql = pandasSQL_builder(con=con) + return pandas_sql._create_sql_schema( + frame, name, keys=keys, dtype=dtype, schema=schema + ) diff --git a/pandas/io/stata.py b/pandas/io/stata.py new file mode 100644 index 00000000..fd4d2c23 --- /dev/null +++ b/pandas/io/stata.py @@ -0,0 +1,3678 @@ +""" +Module contains tools for processing Stata files into DataFrames + +The StataReader below was originally written by Joe Presbrey as part of PyDTA. +It has been extended and improved by Skipper Seabold from the Statsmodels +project who also developed the StataWriter and was finally added to pandas in +a once again improved version. + +You can find more information on http://presbrey.mit.edu/PyDTA and +https://www.statsmodels.org/devel/ +""" +from __future__ import annotations + +from collections import abc +import datetime +from io import BytesIO +import os +import struct +import sys +from typing import ( + IO, + TYPE_CHECKING, + Any, + AnyStr, + Final, + Hashable, + Sequence, + cast, +) +import warnings + +from dateutil.relativedelta import relativedelta +import numpy as np + +from pandas._libs.lib import infer_dtype +from pandas._libs.writers import max_len_string_array +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, +) +from pandas.errors import ( + CategoricalConversionWarning, + InvalidColumnName, + PossiblePrecisionLoss, + ValueLabelTypeMismatch, +) +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, + doc, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + ensure_object, + is_categorical_dtype, + is_datetime64_dtype, + is_numeric_dtype, +) + +from pandas import ( + Categorical, + DatetimeIndex, + NaT, + Timestamp, + isna, + to_datetime, + to_timedelta, +) +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.integer import IntegerDtype +from pandas.core.frame import DataFrame +from pandas.core.indexes.base import Index +from pandas.core.series import Series +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import get_handle + +if TYPE_CHECKING: + from typing import Literal + +_version_error = ( + "Version of given Stata file is {version}. pandas supports importing " + "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " + "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16)," + "and 119 (Stata 15/16, over 32,767 variables)." +) + +_statafile_processing_params1 = """\ +convert_dates : bool, default True + Convert date variables to DataFrame time values. +convert_categoricals : bool, default True + Read value labels and convert columns to Categorical/Factor variables.""" + +_statafile_processing_params2 = """\ +index_col : str, optional + Column to set as index. +convert_missing : bool, default False + Flag indicating whether to convert missing values to their Stata + representations. If False, missing values are replaced with nan. + If True, columns containing missing values are returned with + object data types and missing values are represented by + StataMissingValue objects. +preserve_dtypes : bool, default True + Preserve Stata datatypes. If False, numeric data are upcast to pandas + default types for foreign data (float64 or int64). +columns : list or None + Columns to retain. Columns will be returned in the given order. None + returns all columns. +order_categoricals : bool, default True + Flag indicating whether converted categorical data are ordered.""" + +_chunksize_params = """\ +chunksize : int, default None + Return StataReader object for iterations, returns chunks with + given number of lines.""" + +_iterator_params = """\ +iterator : bool, default False + Return StataReader object.""" + +_reader_notes = """\ +Notes +----- +Categorical variables read through an iterator may not have the same +categories and dtype. This occurs when a variable stored in a DTA +file is associated to an incomplete set of value labels that only +label a strict subset of the values.""" + +_read_stata_doc = f""" +Read Stata file into DataFrame. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.dta``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handle (e.g. via builtin ``open`` function) + or ``StringIO``. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +{_iterator_params} +{_shared_docs["decompression_options"] % "filepath_or_buffer"} +{_shared_docs["storage_options"]} + +Returns +------- +DataFrame or StataReader + +See Also +-------- +io.stata.StataReader : Low-level reader for Stata data files. +DataFrame.to_stata: Export Stata data files. + +{_reader_notes} + +Examples +-------- + +Creating a dummy stata for this example +>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', +... 'parrot'], +... 'speed': [350, 18, 361, 15]}}) # doctest: +SKIP +>>> df.to_stata('animals.dta') # doctest: +SKIP + +Read a Stata dta file: + +>>> df = pd.read_stata('animals.dta') # doctest: +SKIP + +Read a Stata dta file in 10,000 line chunks: +>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP +>>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP +>>> df.to_stata('filename.dta') # doctest: +SKIP + +>>> itr = pd.read_stata('filename.dta', chunksize=10000) # doctest: +SKIP +>>> for chunk in itr: +... # Operate on a single chunk, e.g., chunk.mean() +... pass # doctest: +SKIP +""" + +_read_method_doc = f"""\ +Reads observations from Stata file, converting them into a dataframe + +Parameters +---------- +nrows : int + Number of lines to read from data file, if None read whole file. +{_statafile_processing_params1} +{_statafile_processing_params2} + +Returns +------- +DataFrame +""" + +_stata_reader_doc = f"""\ +Class for reading Stata dta files. + +Parameters +---------- +path_or_buf : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or object + implementing a binary read() functions. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +{_shared_docs["decompression_options"]} +{_shared_docs["storage_options"]} + +{_reader_notes} +""" + + +_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] + + +stata_epoch: Final = datetime.datetime(1960, 1, 1) + + +# TODO: Add typing. As of January 2020 it is not possible to type this function since +# mypy doesn't understand that a Series and an int can be combined using mathematical +# operations. (+, -). +def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series: + """ + Convert from SIF to datetime. https://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + The Stata Internal Format date to convert to datetime according to fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + Returns + + Returns + ------- + converted : Series + The converted dates + + Examples + -------- + >>> dates = pd.Series([52]) + >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") + 0 1961-01-01 + dtype: datetime64[ns] + + Notes + ----- + datetime/c - tc + milliseconds since 01jan1960 00:00:00.000, assuming 86,400 s/day + datetime/C - tC - NOT IMPLEMENTED + milliseconds since 01jan1960 00:00:00.000, adjusted for leap seconds + date - td + days since 01jan1960 (01jan1960 = 0) + weekly date - tw + weeks since 1960w1 + This assumes 52 weeks in a year, then adds 7 * remainder of the weeks. + The datetime value is the start of the week in terms of days in the + year, not ISO calendar weeks. + monthly date - tm + months since 1960m1 + quarterly date - tq + quarters since 1960q1 + half-yearly date - th + half-years since 1960h1 yearly + date - ty + years since 0000 + """ + MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year + MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days + MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days + MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 + MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 + + def convert_year_month_safe(year, month) -> Series: + """ + Convert year and month to datetimes, using pandas vectorized versions + when the date range falls within the range supported by pandas. + Otherwise it falls back to a slower but more robust method + using datetime. + """ + if year.max() < MAX_YEAR and year.min() > MIN_YEAR: + return to_datetime(100 * year + month, format="%Y%m") + else: + index = getattr(year, "index", None) + return Series( + [datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index + ) + + def convert_year_days_safe(year, days) -> Series: + """ + Converts year (e.g. 1999) and days since the start of the year to a + datetime or datetime64 Series + """ + if year.max() < (MAX_YEAR - 1) and year.min() > MIN_YEAR: + return to_datetime(year, format="%Y") + to_timedelta(days, unit="d") + else: + index = getattr(year, "index", None) + value = [ + datetime.datetime(y, 1, 1) + relativedelta(days=int(d)) + for y, d in zip(year, days) + ] + return Series(value, index=index) + + def convert_delta_safe(base, deltas, unit) -> Series: + """ + Convert base dates and deltas to datetimes, using pandas vectorized + versions if the deltas satisfy restrictions required to be expressed + as dates in pandas. + """ + index = getattr(deltas, "index", None) + if unit == "d": + if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA: + values = [base + relativedelta(days=int(d)) for d in deltas] + return Series(values, index=index) + elif unit == "ms": + if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA: + values = [ + base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas + ] + return Series(values, index=index) + else: + raise ValueError("format not understood") + base = to_datetime(base) + deltas = to_timedelta(deltas, unit=unit) + return base + deltas + + # TODO(non-nano): If/when pandas supports more than datetime64[ns], this + # should be improved to use correct range, e.g. datetime[Y] for yearly + bad_locs = np.isnan(dates) + has_bad_values = False + if bad_locs.any(): + has_bad_values = True + # reset cache to avoid SettingWithCopy checks (we own the DataFrame and the + # `dates` Series is used to overwrite itself in the DataFramae) + dates._reset_cacher() + dates[bad_locs] = 1.0 # Replace with NaT + dates = dates.astype(np.int64) + + if fmt.startswith(("%tc", "tc")): # Delta ms relative to base + base = stata_epoch + ms = dates + conv_dates = convert_delta_safe(base, ms, "ms") + elif fmt.startswith(("%tC", "tC")): + + warnings.warn( + "Encountered %tC format. Leaving in Stata Internal Format.", + stacklevel=find_stack_level(), + ) + conv_dates = Series(dates, dtype=object) + if has_bad_values: + conv_dates[bad_locs] = NaT + return conv_dates + # Delta days relative to base + elif fmt.startswith(("%td", "td", "%d", "d")): + base = stata_epoch + days = dates + conv_dates = convert_delta_safe(base, days, "d") + # does not count leap days - 7 days is a week. + # 52nd week may have more than 7 days + elif fmt.startswith(("%tw", "tw")): + year = stata_epoch.year + dates // 52 + days = (dates % 52) * 7 + conv_dates = convert_year_days_safe(year, days) + elif fmt.startswith(("%tm", "tm")): # Delta months relative to base + year = stata_epoch.year + dates // 12 + month = (dates % 12) + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base + year = stata_epoch.year + dates // 4 + quarter_month = (dates % 4) * 3 + 1 + conv_dates = convert_year_month_safe(year, quarter_month) + elif fmt.startswith(("%th", "th")): # Delta half-years relative to base + year = stata_epoch.year + dates // 2 + month = (dates % 2) * 6 + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%ty", "ty")): # Years -- not delta + year = dates + first_month = np.ones_like(dates) + conv_dates = convert_year_month_safe(year, first_month) + else: + raise ValueError(f"Date fmt {fmt} not understood") + + if has_bad_values: # Restore NaT for bad values + conv_dates[bad_locs] = NaT + + return conv_dates + + +def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: + """ + Convert from datetime to SIF. https://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + Series or array containing datetime.datetime or datetime64[ns] to + convert to the Stata Internal Format given by fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + """ + index = dates.index + NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000 + US_PER_DAY = NS_PER_DAY / 1000 + + def parse_dates_safe(dates, delta=False, year=False, days=False): + d = {} + if is_datetime64_dtype(dates.dtype): + if delta: + time_delta = dates - stata_epoch + d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds + if days or year: + date_index = DatetimeIndex(dates) + d["year"] = date_index._data.year + d["month"] = date_index._data.month + if days: + days_in_ns = dates.view(np.int64) - to_datetime( + d["year"], format="%Y" + ).view(np.int64) + d["days"] = days_in_ns // NS_PER_DAY + + elif infer_dtype(dates, skipna=False) == "datetime": + if delta: + delta = dates._values - stata_epoch + + def f(x: datetime.timedelta) -> float: + return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds + + v = np.vectorize(f) + d["delta"] = v(delta) + if year: + year_month = dates.apply(lambda x: 100 * x.year + x.month) + d["year"] = year_month._values // 100 + d["month"] = year_month._values - d["year"] * 100 + if days: + + def g(x: datetime.datetime) -> int: + return (x - datetime.datetime(x.year, 1, 1)).days + + v = np.vectorize(g) + d["days"] = v(dates) + else: + raise ValueError( + "Columns containing dates must contain either " + "datetime64, datetime.datetime or null values." + ) + + return DataFrame(d, index=index) + + bad_loc = isna(dates) + index = dates.index + if bad_loc.any(): + dates = Series(dates) + if is_datetime64_dtype(dates): + dates[bad_loc] = to_datetime(stata_epoch) + else: + dates[bad_loc] = stata_epoch + + if fmt in ["%tc", "tc"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta / 1000 + elif fmt in ["%tC", "tC"]: + warnings.warn( + "Stata Internal Format tC not supported.", + stacklevel=find_stack_level(), + ) + conv_dates = dates + elif fmt in ["%td", "td"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta // US_PER_DAY + elif fmt in ["%tw", "tw"]: + d = parse_dates_safe(dates, year=True, days=True) + conv_dates = 52 * (d.year - stata_epoch.year) + d.days // 7 + elif fmt in ["%tm", "tm"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 12 * (d.year - stata_epoch.year) + d.month - 1 + elif fmt in ["%tq", "tq"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 4 * (d.year - stata_epoch.year) + (d.month - 1) // 3 + elif fmt in ["%th", "th"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 2 * (d.year - stata_epoch.year) + (d.month > 6).astype(int) + elif fmt in ["%ty", "ty"]: + d = parse_dates_safe(dates, year=True) + conv_dates = d.year + else: + raise ValueError(f"Format {fmt} is not a known Stata date format") + + conv_dates = Series(conv_dates, dtype=np.float64) + missing_value = struct.unpack(" DataFrame: + """ + Checks the dtypes of the columns of a pandas DataFrame for + compatibility with the data types and ranges supported by Stata, and + converts if necessary. + + Parameters + ---------- + data : DataFrame + The DataFrame to check and convert + + Notes + ----- + Numeric columns in Stata must be one of int8, int16, int32, float32 or + float64, with some additional value restrictions. int8 and int16 columns + are checked for violations of the value restrictions and upcast if needed. + int64 data is not usable in Stata, and so it is downcast to int32 whenever + the value are in the int32 range, and sidecast to float64 when larger than + this range. If the int64 values are outside of the range of those + perfectly representable as float64 values, a warning is raised. + + bool columns are cast to int8. uint columns are converted to int of the + same size if there is no loss in precision, otherwise are upcast to a + larger type. uint64 is currently not supported since it is concerted to + object in a DataFrame. + """ + ws = "" + # original, if small, if large + conversion_data: tuple[ + tuple[type, type, type], + tuple[type, type, type], + tuple[type, type, type], + tuple[type, type, type], + tuple[type, type, type], + ] = ( + (np.bool_, np.int8, np.int8), + (np.uint8, np.int8, np.int16), + (np.uint16, np.int16, np.int32), + (np.uint32, np.int32, np.int64), + (np.uint64, np.int64, np.float64), + ) + + float32_max = struct.unpack("= 2**53: + ws = precision_loss_doc.format("uint64", "float64") + + data[col] = data[col].astype(dtype) + + # Check values and upcast if necessary + if dtype == np.int8: + if data[col].max() > 100 or data[col].min() < -127: + data[col] = data[col].astype(np.int16) + elif dtype == np.int16: + if data[col].max() > 32740 or data[col].min() < -32767: + data[col] = data[col].astype(np.int32) + elif dtype == np.int64: + if data[col].max() <= 2147483620 and data[col].min() >= -2147483647: + data[col] = data[col].astype(np.int32) + else: + data[col] = data[col].astype(np.float64) + if data[col].max() >= 2**53 or data[col].min() <= -(2**53): + ws = precision_loss_doc.format("int64", "float64") + elif dtype in (np.float32, np.float64): + if np.isinf(data[col]).any(): + raise ValueError( + f"Column {col} contains infinity or -infinity" + "which is outside the range supported by Stata." + ) + value = data[col].max() + if dtype == np.float32 and value > float32_max: + data[col] = data[col].astype(np.float64) + elif dtype == np.float64: + if value > float64_max: + raise ValueError( + f"Column {col} has a maximum value ({value}) outside the range " + f"supported by Stata ({float64_max})" + ) + if is_nullable_int: + if orig_missing.any(): + # Replace missing by Stata sentinel value + sentinel = StataMissingValue.BASE_MISSING_VALUES[data[col].dtype.name] + data.loc[orig_missing, col] = sentinel + if ws: + warnings.warn( + ws, + PossiblePrecisionLoss, + stacklevel=find_stack_level(), + ) + + return data + + +class StataValueLabel: + """ + Parse a categorical column and prepare formatted output + + Parameters + ---------- + catarray : Series + Categorical Series to encode + encoding : {"latin-1", "utf-8"} + Encoding to use for value labels. + """ + + def __init__( + self, catarray: Series, encoding: Literal["latin-1", "utf-8"] = "latin-1" + ) -> None: + + if encoding not in ("latin-1", "utf-8"): + raise ValueError("Only latin-1 and utf-8 are supported.") + self.labname = catarray.name + self._encoding = encoding + categories = catarray.cat.categories + self.value_labels: list[tuple[float, str]] = list( + zip(np.arange(len(categories)), categories) + ) + self.value_labels.sort(key=lambda x: x[0]) + + self._prepare_value_labels() + + def _prepare_value_labels(self): + """Encode value labels.""" + + self.text_len = 0 + self.txt: list[bytes] = [] + self.n = 0 + # Offsets (length of categories), converted to int32 + self.off = np.array([], dtype=np.int32) + # Values, converted to int32 + self.val = np.array([], dtype=np.int32) + self.len = 0 + + # Compute lengths and setup lists of offsets and labels + offsets: list[int] = [] + values: list[float] = [] + for vl in self.value_labels: + category: str | bytes = vl[1] + if not isinstance(category, str): + category = str(category) + warnings.warn( + value_label_mismatch_doc.format(self.labname), + ValueLabelTypeMismatch, + stacklevel=find_stack_level(), + ) + category = category.encode(self._encoding) + offsets.append(self.text_len) + self.text_len += len(category) + 1 # +1 for the padding + values.append(vl[0]) + self.txt.append(category) + self.n += 1 + + if self.text_len > 32000: + raise ValueError( + "Stata value labels for a single variable must " + "have a combined length less than 32,000 characters." + ) + + # Ensure int32 + self.off = np.array(offsets, dtype=np.int32) + self.val = np.array(values, dtype=np.int32) + + # Total length + self.len = 4 + 4 + 4 * self.n + 4 * self.n + self.text_len + + def generate_value_label(self, byteorder: str) -> bytes: + """ + Generate the binary representation of the value labels. + + Parameters + ---------- + byteorder : str + Byte order of the output + + Returns + ------- + value_label : bytes + Bytes containing the formatted value label + """ + encoding = self._encoding + bio = BytesIO() + null_byte = b"\x00" + + # len + bio.write(struct.pack(byteorder + "i", self.len)) + + # labname + labname = str(self.labname)[:32].encode(encoding) + lab_len = 32 if encoding not in ("utf-8", "utf8") else 128 + labname = _pad_bytes(labname, lab_len + 1) + bio.write(labname) + + # padding - 3 bytes + for i in range(3): + bio.write(struct.pack("c", null_byte)) + + # value_label_table + # n - int32 + bio.write(struct.pack(byteorder + "i", self.n)) + + # textlen - int32 + bio.write(struct.pack(byteorder + "i", self.text_len)) + + # off - int32 array (n elements) + for offset in self.off: + bio.write(struct.pack(byteorder + "i", offset)) + + # val - int32 array (n elements) + for value in self.val: + bio.write(struct.pack(byteorder + "i", value)) + + # txt - Text labels, null terminated + for text in self.txt: + bio.write(text + null_byte) + + return bio.getvalue() + + +class StataNonCatValueLabel(StataValueLabel): + """ + Prepare formatted version of value labels + + Parameters + ---------- + labname : str + Value label name + value_labels: Dictionary + Mapping of values to labels + encoding : {"latin-1", "utf-8"} + Encoding to use for value labels. + """ + + def __init__( + self, + labname: str, + value_labels: dict[float, str], + encoding: Literal["latin-1", "utf-8"] = "latin-1", + ) -> None: + + if encoding not in ("latin-1", "utf-8"): + raise ValueError("Only latin-1 and utf-8 are supported.") + + self.labname = labname + self._encoding = encoding + self.value_labels: list[tuple[float, str]] = sorted( + value_labels.items(), key=lambda x: x[0] + ) + self._prepare_value_labels() + + +class StataMissingValue: + """ + An observation's missing value. + + Parameters + ---------- + value : {int, float} + The Stata missing value code + + Notes + ----- + More information: + + Integer missing values make the code '.', '.a', ..., '.z' to the ranges + 101 ... 127 (for int8), 32741 ... 32767 (for int16) and 2147483621 ... + 2147483647 (for int32). Missing values for floating point data types are + more complex but the pattern is simple to discern from the following table. + + np.float32 missing values (float in Stata) + 0000007f . + 0008007f .a + 0010007f .b + ... + 00c0007f .x + 00c8007f .y + 00d0007f .z + + np.float64 missing values (double in Stata) + 000000000000e07f . + 000000000001e07f .a + 000000000002e07f .b + ... + 000000000018e07f .x + 000000000019e07f .y + 00000000001ae07f .z + """ + + # Construct a dictionary of missing values + MISSING_VALUES: dict[float, str] = {} + bases: Final = (101, 32741, 2147483621) + for b in bases: + # Conversion to long to avoid hash issues on 32 bit platforms #8968 + MISSING_VALUES[b] = "." + for i in range(1, 27): + MISSING_VALUES[i + b] = "." + chr(96 + i) + + float32_base: bytes = b"\x00\x00\x00\x7f" + increment: int = struct.unpack(" 0: + MISSING_VALUES[key] += chr(96 + i) + int_value = struct.unpack(" 0: + MISSING_VALUES[key] += chr(96 + i) + int_value = struct.unpack("q", struct.pack(" None: + self._value = value + # Conversion to int to avoid hash issues on 32 bit platforms #8968 + value = int(value) if value < 2147483648 else float(value) + self._str = self.MISSING_VALUES[value] + + @property + def string(self) -> str: + """ + The Stata representation of the missing value: '.', '.a'..'.z' + + Returns + ------- + str + The representation of the missing value. + """ + return self._str + + @property + def value(self) -> float: + """ + The binary representation of the missing value. + + Returns + ------- + {int, float} + The binary representation of the missing value. + """ + return self._value + + def __str__(self) -> str: + return self.string + + def __repr__(self) -> str: + return f"{type(self)}({self})" + + def __eq__(self, other: Any) -> bool: + return ( + isinstance(other, type(self)) + and self.string == other.string + and self.value == other.value + ) + + @classmethod + def get_base_missing_value(cls, dtype: np.dtype) -> float: + if dtype.type is np.int8: + value = cls.BASE_MISSING_VALUES["int8"] + elif dtype.type is np.int16: + value = cls.BASE_MISSING_VALUES["int16"] + elif dtype.type is np.int32: + value = cls.BASE_MISSING_VALUES["int32"] + elif dtype.type is np.float32: + value = cls.BASE_MISSING_VALUES["float32"] + elif dtype.type is np.float64: + value = cls.BASE_MISSING_VALUES["float64"] + else: + raise ValueError("Unsupported dtype") + return value + + +class StataParser: + def __init__(self) -> None: + + # type code. + # -------------------- + # str1 1 = 0x01 + # str2 2 = 0x02 + # ... + # str244 244 = 0xf4 + # byte 251 = 0xfb (sic) + # int 252 = 0xfc + # long 253 = 0xfd + # float 254 = 0xfe + # double 255 = 0xff + # -------------------- + # NOTE: the byte type seems to be reserved for categorical variables + # with a label, but the underlying variable is -127 to 100 + # we're going to drop the label and cast to int + self.DTYPE_MAP = dict( + list(zip(range(1, 245), [np.dtype("a" + str(i)) for i in range(1, 245)])) + + [ + (251, np.dtype(np.int8)), + (252, np.dtype(np.int16)), + (253, np.dtype(np.int32)), + (254, np.dtype(np.float32)), + (255, np.dtype(np.float64)), + ] + ) + self.DTYPE_MAP_XML: dict[int, np.dtype] = { + 32768: np.dtype(np.uint8), # Keys to GSO + 65526: np.dtype(np.float64), + 65527: np.dtype(np.float32), + 65528: np.dtype(np.int32), + 65529: np.dtype(np.int16), + 65530: np.dtype(np.int8), + } + self.TYPE_MAP = list(tuple(range(251)) + tuple("bhlfd")) + self.TYPE_MAP_XML = { + # Not really a Q, unclear how to handle byteswap + 32768: "Q", + 65526: "d", + 65527: "f", + 65528: "l", + 65529: "h", + 65530: "b", + } + # NOTE: technically, some of these are wrong. there are more numbers + # that can be represented. it's the 27 ABOVE and BELOW the max listed + # numeric data type in [U] 12.2.2 of the 11.2 manual + float32_min = b"\xff\xff\xff\xfe" + float32_max = b"\xff\xff\xff\x7e" + float64_min = b"\xff\xff\xff\xff\xff\xff\xef\xff" + float64_max = b"\xff\xff\xff\xff\xff\xff\xdf\x7f" + self.VALID_RANGE = { + "b": (-127, 100), + "h": (-32767, 32740), + "l": (-2147483647, 2147483620), + "f": ( + np.float32(struct.unpack(" None: + super().__init__() + self.col_sizes: list[int] = [] + + # Arguments to the reader (can be temporarily overridden in + # calls to read). + self._convert_dates = convert_dates + self._convert_categoricals = convert_categoricals + self._index_col = index_col + self._convert_missing = convert_missing + self._preserve_dtypes = preserve_dtypes + self._columns = columns + self._order_categoricals = order_categoricals + self._encoding = "" + self._chunksize = chunksize + self._using_iterator = False + if self._chunksize is None: + self._chunksize = 1 + elif not isinstance(chunksize, int) or chunksize <= 0: + raise ValueError("chunksize must be a positive integer when set.") + + # State variables for the file + self._has_string_data = False + self._missing_values = False + self._can_read_value_labels = False + self._column_selector_set = False + self._value_labels_read = False + self._data_read = False + self._dtype: np.dtype | None = None + self._lines_read = 0 + + self._native_byteorder = _set_endianness(sys.byteorder) + with get_handle( + path_or_buf, + "rb", + storage_options=storage_options, + is_text=False, + compression=compression, + ) as handles: + # Copy to BytesIO, and ensure no encoding + self.path_or_buf = BytesIO(handles.handle.read()) + + self._read_header() + self._setup_dtype() + + def __enter__(self) -> StataReader: + """enter context manager""" + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + """exit context manager""" + self.close() + + def close(self) -> None: + """close the handle if its open""" + self.path_or_buf.close() + + def _set_encoding(self) -> None: + """ + Set string encoding which depends on file version + """ + if self.format_version < 118: + self._encoding = "latin-1" + else: + self._encoding = "utf-8" + + def _read_header(self) -> None: + first_char = self.path_or_buf.read(1) + if struct.unpack("c", first_char)[0] == b"<": + self._read_new_header() + else: + self._read_old_header(first_char) + + self.has_string_data = len([x for x in self.typlist if type(x) is int]) > 0 + + # calculate size of a data record + self.col_sizes = [self._calcsize(typ) for typ in self.typlist] + + def _read_new_header(self) -> None: + # The first part of the header is common to 117 - 119. + self.path_or_buf.read(27) # stata_dta>
    + self.format_version = int(self.path_or_buf.read(3)) + if self.format_version not in [117, 118, 119]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.path_or_buf.read(21) # + self.byteorder = self.path_or_buf.read(3) == b"MSF" and ">" or "<" + self.path_or_buf.read(15) # + nvar_type = "H" if self.format_version <= 118 else "I" + nvar_size = 2 if self.format_version <= 118 else 4 + self.nvar = struct.unpack( + self.byteorder + nvar_type, self.path_or_buf.read(nvar_size) + )[0] + self.path_or_buf.read(7) # + + self.nobs = self._get_nobs() + self.path_or_buf.read(11) # + self.time_stamp = self._get_time_stamp() + self.path_or_buf.read(26) #
    + self.path_or_buf.read(8) # 0x0000000000000000 + self.path_or_buf.read(8) # position of + + self._seek_vartypes = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 16 + ) + self._seek_varnames = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_sortlist = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_formats = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 9 + ) + self._seek_value_label_names = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 19 + ) + + # Requires version-specific treatment + self._seek_variable_labels = self._get_seek_variable_labels() + + self.path_or_buf.read(8) # + self.data_location = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 6 + ) + self.seek_strls = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 7 + ) + self.seek_value_labels = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 14 + ) + + self.typlist, self.dtyplist = self._get_dtypes(self._seek_vartypes) + + self.path_or_buf.seek(self._seek_varnames) + self.varlist = self._get_varlist() + + self.path_or_buf.seek(self._seek_sortlist) + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.path_or_buf.seek(self._seek_formats) + self.fmtlist = self._get_fmtlist() + + self.path_or_buf.seek(self._seek_value_label_names) + self.lbllist = self._get_lbllist() + + self.path_or_buf.seek(self._seek_variable_labels) + self._variable_labels = self._get_variable_labels() + + # Get data type information, works for versions 117-119. + def _get_dtypes( + self, seek_vartypes: int + ) -> tuple[list[int | str], list[str | np.dtype]]: + + self.path_or_buf.seek(seek_vartypes) + raw_typlist = [ + struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + for _ in range(self.nvar) + ] + + def f(typ: int) -> int | str: + if typ <= 2045: + return typ + try: + return self.TYPE_MAP_XML[typ] + except KeyError as err: + raise ValueError(f"cannot convert stata types [{typ}]") from err + + typlist = [f(x) for x in raw_typlist] + + def g(typ: int) -> str | np.dtype: + if typ <= 2045: + return str(typ) + try: + return self.DTYPE_MAP_XML[typ] + except KeyError as err: + raise ValueError(f"cannot convert stata dtype [{typ}]") from err + + dtyplist = [g(x) for x in raw_typlist] + + return typlist, dtyplist + + def _get_varlist(self) -> list[str]: + # 33 in order formats, 129 in formats 118 and 119 + b = 33 if self.format_version < 118 else 129 + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + # Returns the format list + def _get_fmtlist(self) -> list[str]: + if self.format_version >= 118: + b = 57 + elif self.format_version > 113: + b = 49 + elif self.format_version > 104: + b = 12 + else: + b = 7 + + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + # Returns the label list + def _get_lbllist(self) -> list[str]: + if self.format_version >= 118: + b = 129 + elif self.format_version > 108: + b = 33 + else: + b = 9 + return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] + + def _get_variable_labels(self) -> list[str]: + if self.format_version >= 118: + vlblist = [ + self._decode(self.path_or_buf.read(321)) for _ in range(self.nvar) + ] + elif self.format_version > 105: + vlblist = [ + self._decode(self.path_or_buf.read(81)) for _ in range(self.nvar) + ] + else: + vlblist = [ + self._decode(self.path_or_buf.read(32)) for _ in range(self.nvar) + ] + return vlblist + + def _get_nobs(self) -> int: + if self.format_version >= 118: + return struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + return struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + + def _get_data_label(self) -> str: + if self.format_version >= 118: + strlen = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 105: + return self._decode(self.path_or_buf.read(81)) + else: + return self._decode(self.path_or_buf.read(32)) + + def _get_time_stamp(self) -> str: + if self.format_version >= 118: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self.path_or_buf.read(strlen).decode("utf-8") + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 104: + return self._decode(self.path_or_buf.read(18)) + else: + raise ValueError() + + def _get_seek_variable_labels(self) -> int: + if self.format_version == 117: + self.path_or_buf.read(8) # , throw away + # Stata 117 data files do not follow the described format. This is + # a work around that uses the previous label, 33 bytes for each + # variable, 20 for the closing tag and 17 for the opening tag + return self._seek_value_label_names + (33 * self.nvar) + 20 + 17 + elif self.format_version >= 118: + return struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 17 + else: + raise ValueError() + + def _read_old_header(self, first_char: bytes) -> None: + self.format_version = struct.unpack("b", first_char)[0] + if self.format_version not in [104, 105, 108, 111, 113, 114, 115]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.byteorder = ( + struct.unpack("b", self.path_or_buf.read(1))[0] == 0x1 and ">" or "<" + ) + self.filetype = struct.unpack("b", self.path_or_buf.read(1))[0] + self.path_or_buf.read(1) # unused + + self.nvar = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + self.nobs = self._get_nobs() + + self._data_label = self._get_data_label() + + self.time_stamp = self._get_time_stamp() + + # descriptors + if self.format_version > 108: + typlist = [ord(self.path_or_buf.read(1)) for _ in range(self.nvar)] + else: + buf = self.path_or_buf.read(self.nvar) + typlistb = np.frombuffer(buf, dtype=np.uint8) + typlist = [] + for tp in typlistb: + if tp in self.OLD_TYPE_MAPPING: + typlist.append(self.OLD_TYPE_MAPPING[tp]) + else: + typlist.append(tp - 127) # bytes + + try: + self.typlist = [self.TYPE_MAP[typ] for typ in typlist] + except ValueError as err: + invalid_types = ",".join([str(x) for x in typlist]) + raise ValueError(f"cannot convert stata types [{invalid_types}]") from err + try: + self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] + except ValueError as err: + invalid_dtypes = ",".join([str(x) for x in typlist]) + raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") from err + + if self.format_version > 108: + self.varlist = [ + self._decode(self.path_or_buf.read(33)) for _ in range(self.nvar) + ] + else: + self.varlist = [ + self._decode(self.path_or_buf.read(9)) for _ in range(self.nvar) + ] + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.fmtlist = self._get_fmtlist() + + self.lbllist = self._get_lbllist() + + self._variable_labels = self._get_variable_labels() + + # ignore expansion fields (Format 105 and later) + # When reading, read five bytes; the last four bytes now tell you + # the size of the next read, which you discard. You then continue + # like this until you read 5 bytes of zeros. + + if self.format_version > 104: + while True: + data_type = struct.unpack( + self.byteorder + "b", self.path_or_buf.read(1) + )[0] + if self.format_version > 108: + data_len = struct.unpack( + self.byteorder + "i", self.path_or_buf.read(4) + )[0] + else: + data_len = struct.unpack( + self.byteorder + "h", self.path_or_buf.read(2) + )[0] + if data_type == 0: + break + self.path_or_buf.read(data_len) + + # necessary data to continue parsing + self.data_location = self.path_or_buf.tell() + + def _setup_dtype(self) -> np.dtype: + """Map between numpy and state dtypes""" + if self._dtype is not None: + return self._dtype + + dtypes = [] # Convert struct data types to numpy data type + for i, typ in enumerate(self.typlist): + if typ in self.NUMPY_TYPE_MAP: + typ = cast(str, typ) # only strs in NUMPY_TYPE_MAP + dtypes.append(("s" + str(i), self.byteorder + self.NUMPY_TYPE_MAP[typ])) + else: + dtypes.append(("s" + str(i), "S" + str(typ))) + self._dtype = np.dtype(dtypes) + + return self._dtype + + def _calcsize(self, fmt: int | str) -> int: + if isinstance(fmt, int): + return fmt + return struct.calcsize(self.byteorder + fmt) + + def _decode(self, s: bytes) -> str: + # have bytes not strings, so must decode + s = s.partition(b"\0")[0] + try: + return s.decode(self._encoding) + except UnicodeDecodeError: + # GH 25960, fallback to handle incorrect format produced when 117 + # files are converted to 118 files in Stata + encoding = self._encoding + msg = f""" +One or more strings in the dta file could not be decoded using {encoding}, and +so the fallback encoding of latin-1 is being used. This can happen when a file +has been incorrectly encoded by Stata or some other software. You should verify +the string values returned are correct.""" + warnings.warn( + msg, + UnicodeWarning, + stacklevel=find_stack_level(), + ) + return s.decode("latin-1") + + def _read_value_labels(self) -> None: + if self._value_labels_read: + # Don't read twice + return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict: dict[str, dict[float, str]] = {} + return + + if self.format_version >= 117: + self.path_or_buf.seek(self.seek_value_labels) + else: + assert self._dtype is not None + offset = self.nobs * self._dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + + self._value_labels_read = True + self.value_label_dict = {} + + while True: + if self.format_version >= 117: + if self.path_or_buf.read(5) == b" + break # end of value label table + + slength = self.path_or_buf.read(4) + if not slength: + break # end of value label table (format < 117) + if self.format_version <= 117: + labname = self._decode(self.path_or_buf.read(33)) + else: + labname = self._decode(self.path_or_buf.read(129)) + self.path_or_buf.read(3) # padding + + n = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + txtlen = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + off = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + val = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + ii = np.argsort(off) + off = off[ii] + val = val[ii] + txt = self.path_or_buf.read(txtlen) + self.value_label_dict[labname] = {} + for i in range(n): + end = off[i + 1] if i < n - 1 else txtlen + self.value_label_dict[labname][val[i]] = self._decode(txt[off[i] : end]) + if self.format_version >= 117: + self.path_or_buf.read(6) # + self._value_labels_read = True + + def _read_strls(self) -> None: + self.path_or_buf.seek(self.seek_strls) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {"0": ""} + while True: + if self.path_or_buf.read(3) != b"GSO": + break + + if self.format_version == 117: + v_o = struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + buf = self.path_or_buf.read(12) + # Only tested on little endian file on little endian machine. + v_size = 2 if self.format_version == 118 else 3 + if self.byteorder == "<": + buf = buf[0:v_size] + buf[4 : (12 - v_size)] + else: + # This path may not be correct, impossible to test + buf = buf[0:v_size] + buf[(4 + v_size) :] + v_o = struct.unpack("Q", buf)[0] + typ = struct.unpack("B", self.path_or_buf.read(1))[0] + length = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + va = self.path_or_buf.read(length) + if typ == 130: + decoded_va = va[0:-1].decode(self._encoding) + else: + # Stata says typ 129 can be binary, so use str + decoded_va = str(va) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = decoded_va + + def __next__(self) -> DataFrame: + self._using_iterator = True + return self.read(nrows=self._chunksize) + + def get_chunk(self, size: int | None = None) -> DataFrame: + """ + Reads lines from Stata file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + @Appender(_read_method_doc) + def read( + self, + nrows: int | None = None, + convert_dates: bool | None = None, + convert_categoricals: bool | None = None, + index_col: str | None = None, + convert_missing: bool | None = None, + preserve_dtypes: bool | None = None, + columns: Sequence[str] | None = None, + order_categoricals: bool | None = None, + ) -> DataFrame: + # Handle empty file or chunk. If reading incrementally raise + # StopIteration. If reading the whole thing return an empty + # data frame. + if (self.nobs == 0) and (nrows is None): + self._can_read_value_labels = True + self._data_read = True + self.close() + return DataFrame(columns=self.varlist) + + # Handle options + if convert_dates is None: + convert_dates = self._convert_dates + if convert_categoricals is None: + convert_categoricals = self._convert_categoricals + if convert_missing is None: + convert_missing = self._convert_missing + if preserve_dtypes is None: + preserve_dtypes = self._preserve_dtypes + if columns is None: + columns = self._columns + if order_categoricals is None: + order_categoricals = self._order_categoricals + if index_col is None: + index_col = self._index_col + + if nrows is None: + nrows = self.nobs + + if (self.format_version >= 117) and (not self._value_labels_read): + self._can_read_value_labels = True + self._read_strls() + + # Read data + assert self._dtype is not None + dtype = self._dtype + max_read_len = (self.nobs - self._lines_read) * dtype.itemsize + read_len = nrows * dtype.itemsize + read_len = min(read_len, max_read_len) + if read_len <= 0: + # Iterator has finished, should never be here unless + # we are reading the file incrementally + if convert_categoricals: + self._read_value_labels() + self.close() + raise StopIteration + offset = self._lines_read * dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + read_lines = min(nrows, self.nobs - self._lines_read) + raw_data = np.frombuffer( + self.path_or_buf.read(read_len), dtype=dtype, count=read_lines + ) + + self._lines_read += read_lines + if self._lines_read == self.nobs: + self._can_read_value_labels = True + self._data_read = True + # if necessary, swap the byte order to native here + if self.byteorder != self._native_byteorder: + raw_data = raw_data.byteswap().newbyteorder() + + if convert_categoricals: + self._read_value_labels() + + if len(raw_data) == 0: + data = DataFrame(columns=self.varlist) + else: + data = DataFrame.from_records(raw_data) + data.columns = Index(self.varlist) + + # If index is not specified, use actual row number rather than + # restarting at 0 for each chunk. + if index_col is None: + rng = np.arange(self._lines_read - read_lines, self._lines_read) + data.index = Index(rng) # set attr instead of set_index to avoid copy + + if columns is not None: + try: + data = self._do_select_columns(data, columns) + except ValueError: + self.close() + raise + + # Decode strings + for col, typ in zip(data, self.typlist): + if type(typ) is int: + data[col] = data[col].apply(self._decode, convert_dtype=True) + + data = self._insert_strls(data) + + cols_ = np.where([dtyp is not None for dtyp in self.dtyplist])[0] + # Convert columns (if needed) to match input type + ix = data.index + requires_type_conversion = False + data_formatted = [] + for i in cols_: + if self.dtyplist[i] is not None: + col = data.columns[i] + dtype = data[col].dtype + if dtype != np.dtype(object) and dtype != self.dtyplist[i]: + requires_type_conversion = True + data_formatted.append( + (col, Series(data[col], ix, self.dtyplist[i])) + ) + else: + data_formatted.append((col, data[col])) + if requires_type_conversion: + data = DataFrame.from_dict(dict(data_formatted)) + del data_formatted + + data = self._do_convert_missing(data, convert_missing) + + if convert_dates: + + def any_startswith(x: str) -> bool: + return any(x.startswith(fmt) for fmt in _date_formats) + + cols = np.where([any_startswith(x) for x in self.fmtlist])[0] + for i in cols: + col = data.columns[i] + try: + data[col] = _stata_elapsed_date_to_datetime_vec( + data[col], self.fmtlist[i] + ) + except ValueError: + self.close() + raise + + if convert_categoricals and self.format_version > 108: + data = self._do_convert_categoricals( + data, self.value_label_dict, self.lbllist, order_categoricals + ) + + if not preserve_dtypes: + retyped_data = [] + convert = False + for col in data: + dtype = data[col].dtype + if dtype in (np.dtype(np.float16), np.dtype(np.float32)): + dtype = np.dtype(np.float64) + convert = True + elif dtype in ( + np.dtype(np.int8), + np.dtype(np.int16), + np.dtype(np.int32), + ): + dtype = np.dtype(np.int64) + convert = True + retyped_data.append((col, data[col].astype(dtype))) + if convert: + data = DataFrame.from_dict(dict(retyped_data)) + + if index_col is not None: + data = data.set_index(data.pop(index_col)) + + return data + + def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFrame: + # Check for missing values, and replace if found + replacements = {} + for i, colname in enumerate(data): + fmt = self.typlist[i] + if fmt not in self.VALID_RANGE: + continue + + fmt = cast(str, fmt) # only strs in VALID_RANGE + nmin, nmax = self.VALID_RANGE[fmt] + series = data[colname] + + # appreciably faster to do this with ndarray instead of Series + svals = series._values + missing = (svals < nmin) | (svals > nmax) + + if not missing.any(): + continue + + if convert_missing: # Replacement follows Stata notation + missing_loc = np.nonzero(np.asarray(missing))[0] + umissing, umissing_loc = np.unique(series[missing], return_inverse=True) + replacement = Series(series, dtype=object) + for j, um in enumerate(umissing): + missing_value = StataMissingValue(um) + + loc = missing_loc[umissing_loc == j] + replacement.iloc[loc] = missing_value + else: # All replacements are identical + dtype = series.dtype + if dtype not in (np.float32, np.float64): + dtype = np.float64 + replacement = Series(series, dtype=dtype) + if not replacement._values.flags["WRITEABLE"]: + # only relevant for ArrayManager; construction + # path for BlockManager ensures writeability + replacement = replacement.copy() + # Note: operating on ._values is much faster than directly + # TODO: can we fix that? + replacement._values[missing] = np.nan + replacements[colname] = replacement + + if replacements: + for col in replacements: + data[col] = replacements[col] + return data + + def _insert_strls(self, data: DataFrame) -> DataFrame: + if not hasattr(self, "GSO") or len(self.GSO) == 0: + return data + for i, typ in enumerate(self.typlist): + if typ != "Q": + continue + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] + return data + + def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame: + + if not self._column_selector_set: + column_set = set(columns) + if len(column_set) != len(columns): + raise ValueError("columns contains duplicate entries") + unmatched = column_set.difference(data.columns) + if unmatched: + joined = ", ".join(list(unmatched)) + raise ValueError( + "The following columns were not " + f"found in the Stata data set: {joined}" + ) + # Copy information for retained columns for later processing + dtyplist = [] + typlist = [] + fmtlist = [] + lbllist = [] + for col in columns: + i = data.columns.get_loc(col) + dtyplist.append(self.dtyplist[i]) + typlist.append(self.typlist[i]) + fmtlist.append(self.fmtlist[i]) + lbllist.append(self.lbllist[i]) + + self.dtyplist = dtyplist + self.typlist = typlist + self.fmtlist = fmtlist + self.lbllist = lbllist + self._column_selector_set = True + + return data[columns] + + def _do_convert_categoricals( + self, + data: DataFrame, + value_label_dict: dict[str, dict[float, str]], + lbllist: Sequence[str], + order_categoricals: bool, + ) -> DataFrame: + """ + Converts categorical columns to Categorical type. + """ + value_labels = list(value_label_dict.keys()) + cat_converted_data = [] + for col, label in zip(data, lbllist): + if label in value_labels: + # Explicit call with ordered=True + vl = value_label_dict[label] + keys = np.array(list(vl.keys())) + column = data[col] + key_matches = column.isin(keys) + if self._using_iterator and key_matches.all(): + initial_categories: np.ndarray | None = keys + # If all categories are in the keys and we are iterating, + # use the same keys for all chunks. If some are missing + # value labels, then we will fall back to the categories + # varying across chunks. + else: + if self._using_iterator: + # warn is using an iterator + warnings.warn( + categorical_conversion_warning, + CategoricalConversionWarning, + stacklevel=find_stack_level(), + ) + initial_categories = None + cat_data = Categorical( + column, categories=initial_categories, ordered=order_categoricals + ) + if initial_categories is None: + # If None here, then we need to match the cats in the Categorical + categories = [] + for category in cat_data.categories: + if category in vl: + categories.append(vl[category]) + else: + categories.append(category) + else: + # If all cats are matched, we can use the values + categories = list(vl.values()) + try: + # Try to catch duplicate categories + # TODO: if we get a non-copying rename_categories, use that + cat_data = cat_data.rename_categories(categories) + except ValueError as err: + vc = Series(categories).value_counts() + repeated_cats = list(vc.index[vc > 1]) + repeats = "-" * 80 + "\n" + "\n".join(repeated_cats) + # GH 25772 + msg = f""" +Value labels for column {col} are not unique. These cannot be converted to +pandas categoricals. + +Either read the file with `convert_categoricals` set to False or use the +low level interface in `StataReader` to separately read the values and the +value_labels. + +The repeated labels are: +{repeats} +""" + raise ValueError(msg) from err + # TODO: is the next line needed above in the data(...) method? + cat_series = Series(cat_data, index=data.index) + cat_converted_data.append((col, cat_series)) + else: + cat_converted_data.append((col, data[col])) + data = DataFrame(dict(cat_converted_data), copy=False) + return data + + @property + def data_label(self) -> str: + """ + Return data label of Stata file. + """ + return self._data_label + + def variable_labels(self) -> dict[str, str]: + """ + Return a dict associating each variable name with corresponding label. + + Returns + ------- + dict + """ + return dict(zip(self.varlist, self._variable_labels)) + + def value_labels(self) -> dict[str, dict[float, str]]: + """ + Return a nested dict associating each variable name to its value and label. + + Returns + ------- + dict + """ + if not self._value_labels_read: + self._read_value_labels() + + return self.value_label_dict + + +@Appender(_read_stata_doc) +@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) +def read_stata( + filepath_or_buffer: FilePath | ReadBuffer[bytes], + convert_dates: bool = True, + convert_categoricals: bool = True, + index_col: str | None = None, + convert_missing: bool = False, + preserve_dtypes: bool = True, + columns: Sequence[str] | None = None, + order_categoricals: bool = True, + chunksize: int | None = None, + iterator: bool = False, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +) -> DataFrame | StataReader: + + reader = StataReader( + filepath_or_buffer, + convert_dates=convert_dates, + convert_categoricals=convert_categoricals, + index_col=index_col, + convert_missing=convert_missing, + preserve_dtypes=preserve_dtypes, + columns=columns, + order_categoricals=order_categoricals, + chunksize=chunksize, + storage_options=storage_options, + compression=compression, + ) + + if iterator or chunksize: + return reader + + with reader: + return reader.read() + + +def _set_endianness(endianness: str) -> str: + if endianness.lower() in ["<", "little"]: + return "<" + elif endianness.lower() in [">", "big"]: + return ">" + else: # pragma : no cover + raise ValueError(f"Endianness {endianness} not understood") + + +def _pad_bytes(name: AnyStr, length: int) -> AnyStr: + """ + Take a char string and pads it with null bytes until it's length chars. + """ + if isinstance(name, bytes): + return name + b"\x00" * (length - len(name)) + return name + "\x00" * (length - len(name)) + + +def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: + """ + Convert from one of the stata date formats to a type in TYPE_MAP. + """ + if fmt in [ + "tc", + "%tc", + "td", + "%td", + "tw", + "%tw", + "tm", + "%tm", + "tq", + "%tq", + "th", + "%th", + "ty", + "%ty", + ]: + return np.dtype(np.float64) # Stata expects doubles for SIFs + else: + raise NotImplementedError(f"Format {fmt} not implemented") + + +def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: + new_dict = {} + for key in convert_dates: + if not convert_dates[key].startswith("%"): # make sure proper fmts + convert_dates[key] = "%" + convert_dates[key] + if key in varlist: + new_dict.update({varlist.index(key): convert_dates[key]}) + else: + if not isinstance(key, int): + raise ValueError("convert_dates key must be a column or an integer") + new_dict.update({key: convert_dates[key]}) + return new_dict + + +def _dtype_to_stata_type(dtype: np.dtype, column: Series) -> int: + """ + Convert dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 244 are strings of this length + Pandas Stata + 251 - for int8 byte + 252 - for int16 int + 253 - for int32 long + 254 - for float32 float + 255 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if dtype.type is np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column._values)) + return max(itemsize, 1) + elif dtype.type is np.float64: + return 255 + elif dtype.type is np.float32: + return 254 + elif dtype.type is np.int32: + return 253 + elif dtype.type is np.int16: + return 252 + elif dtype.type is np.int8: + return 251 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _dtype_to_default_stata_fmt( + dtype, column: Series, dta_version: int = 114, force_strl: bool = False +) -> str: + """ + Map numpy dtype to stata's default format for this type. Not terribly + important since users can change this in Stata. Semantics are + + object -> "%DDs" where DD is the length of the string. If not a string, + raise ValueError + float64 -> "%10.0g" + float32 -> "%9.0g" + int64 -> "%9.0g" + int32 -> "%12.0g" + int16 -> "%8.0g" + int8 -> "%8.0g" + strl -> "%9s" + """ + # TODO: Refactor to combine type with format + # TODO: expand this to handle a default datetime format? + if dta_version < 117: + max_str_len = 244 + else: + max_str_len = 2045 + if force_strl: + return "%9s" + if dtype.type is np.object_: + itemsize = max_len_string_array(ensure_object(column._values)) + if itemsize > max_str_len: + if dta_version >= 117: + return "%9s" + else: + raise ValueError(excessive_string_length_error.format(column.name)) + return "%" + str(max(itemsize, 1)) + "s" + elif dtype == np.float64: + return "%10.0g" + elif dtype == np.float32: + return "%9.0g" + elif dtype == np.int32: + return "%12.0g" + elif dtype == np.int8 or dtype == np.int16: + return "%8.0g" + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +@doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "fname", +) +class StataWriter(StataParser): + """ + A class for writing Stata binary dta files + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + .. versionadded:: 1.2.0 + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + writer : StataWriter instance + The StataWriter instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> data = pd.DataFrame([[1.0, 1]], columns=['a', 'b']) + >>> writer = StataWriter('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {{"method": "zip", "archive_name": "data_file.dta"}} + >>> writer = StataWriter('./data_file.zip', data, compression=compression) + >>> writer.write_file() + + Save a DataFrame with dates + >>> from datetime import datetime + >>> data = pd.DataFrame([[datetime(2000,1,1)]], columns=['date']) + >>> writer = StataWriter('./date_data_file.dta', data, {{'date' : 'tw'}}) + >>> writer.write_file() + """ + + _max_string_length = 244 + _encoding: Literal["latin-1", "utf-8"] = "latin-1" + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float, str]] | None = None, + ) -> None: + super().__init__() + self.data = data + self._convert_dates = {} if convert_dates is None else convert_dates + self._write_index = write_index + self._time_stamp = time_stamp + self._data_label = data_label + self._variable_labels = variable_labels + self._non_cat_value_labels = value_labels + self._value_labels: list[StataValueLabel] = [] + self._has_value_labels = np.array([], dtype=bool) + self._compression = compression + self._output_file: IO[bytes] | None = None + self._converted_names: dict[Hashable, str] = {} + # attach nobs, nvars, data, varlist, typlist + self._prepare_pandas(data) + self.storage_options = storage_options + + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + self._fname = fname + self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} + + def _write(self, to_write: str) -> None: + """ + Helper to call encode before writing to file for Python 3 compat. + """ + self.handles.handle.write(to_write.encode(self._encoding)) + + def _write_bytes(self, value: bytes) -> None: + """ + Helper to assert file is open before writing. + """ + self.handles.handle.write(value) + + def _prepare_non_cat_value_labels( + self, data: DataFrame + ) -> list[StataNonCatValueLabel]: + """ + Check for value labels provided for non-categorical columns. Value + labels + """ + non_cat_value_labels: list[StataNonCatValueLabel] = [] + if self._non_cat_value_labels is None: + return non_cat_value_labels + + for labname, labels in self._non_cat_value_labels.items(): + if labname in self._converted_names: + colname = self._converted_names[labname] + elif labname in data.columns: + colname = str(labname) + else: + raise KeyError( + f"Can't create value labels for {labname}, it wasn't " + "found in the dataset." + ) + + if not is_numeric_dtype(data[colname].dtype): + # Labels should not be passed explicitly for categorical + # columns that will be converted to int + raise ValueError( + f"Can't create value labels for {labname}, value labels " + "can only be applied to numeric columns." + ) + svl = StataNonCatValueLabel(colname, labels, self._encoding) + non_cat_value_labels.append(svl) + return non_cat_value_labels + + def _prepare_categoricals(self, data: DataFrame) -> DataFrame: + """ + Check for categorical columns, retain categorical information for + Stata file and convert categorical data to int + """ + is_cat = [is_categorical_dtype(data[col].dtype) for col in data] + if not any(is_cat): + return data + + self._has_value_labels |= np.array(is_cat) + + get_base_missing_value = StataMissingValue.get_base_missing_value + data_formatted = [] + for col, col_is_cat in zip(data, is_cat): + if col_is_cat: + svl = StataValueLabel(data[col], encoding=self._encoding) + self._value_labels.append(svl) + dtype = data[col].cat.codes.dtype + if dtype == np.int64: + raise ValueError( + "It is not possible to export " + "int64-based categorical data to Stata." + ) + values = data[col].cat.codes._values.copy() + + # Upcast if needed so that correct missing values can be set + if values.max() >= get_base_missing_value(dtype): + if dtype == np.int8: + dtype = np.dtype(np.int16) + elif dtype == np.int16: + dtype = np.dtype(np.int32) + else: + dtype = np.dtype(np.float64) + values = np.array(values, dtype=dtype) + + # Replace missing values with Stata missing value for type + values[values == -1] = get_base_missing_value(dtype) + data_formatted.append((col, values)) + else: + data_formatted.append((col, data[col])) + return DataFrame.from_dict(dict(data_formatted)) + + def _replace_nans(self, data: DataFrame) -> DataFrame: + # return data + """ + Checks floating point data columns for nans, and replaces these with + the generic Stata for missing value (.) + """ + for c in data: + dtype = data[c].dtype + if dtype in (np.float32, np.float64): + if dtype == np.float32: + replacement = self.MISSING_VALUES["f"] + else: + replacement = self.MISSING_VALUES["d"] + data[c] = data[c].fillna(replacement) + + return data + + def _update_strl_names(self) -> None: + """No-op, forward compatibility""" + pass + + def _validate_variable_name(self, name: str) -> str: + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 114 and 117 support ascii characters in a-z, A-Z, 0-9 + and _. + """ + for c in name: + if ( + (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ): + name = name.replace(c, "_") + return name + + def _check_column_names(self, data: DataFrame) -> DataFrame: + """ + Checks column names to ensure that they are valid Stata column names. + This includes checks for: + * Non-string names + * Stata keywords + * Variables that start with numbers + * Variables with names that are too long + + When an illegal variable name is detected, it is converted, and if + dates are exported, the variable name is propagated to the date + conversion dictionary + """ + converted_names: dict[Hashable, str] = {} + columns = list(data.columns) + original_columns = columns[:] + + duplicate_var_id = 0 + for j, name in enumerate(columns): + orig_name = name + if not isinstance(name, str): + name = str(name) + + name = self._validate_variable_name(name) + + # Variable name must not be a reserved word + if name in self.RESERVED_WORDS: + name = "_" + name + + # Variable name may not start with a number + if "0" <= name[0] <= "9": + name = "_" + name + + name = name[: min(len(name), 32)] + + if not name == orig_name: + # check for duplicates + while columns.count(name) > 0: + # prepend ascending number to avoid duplicates + name = "_" + str(duplicate_var_id) + name + name = name[: min(len(name), 32)] + duplicate_var_id += 1 + converted_names[orig_name] = name + + columns[j] = name + + data.columns = Index(columns) + + # Check date conversion, and fix key if needed + if self._convert_dates: + for c, o in zip(columns, original_columns): + if c != o: + self._convert_dates[c] = self._convert_dates[o] + del self._convert_dates[o] + + if converted_names: + conversion_warning = [] + for orig_name, name in converted_names.items(): + msg = f"{orig_name} -> {name}" + conversion_warning.append(msg) + + ws = invalid_name_doc.format("\n ".join(conversion_warning)) + warnings.warn( + ws, + InvalidColumnName, + stacklevel=find_stack_level(), + ) + + self._converted_names = converted_names + self._update_strl_names() + + return data + + def _set_formats_and_types(self, dtypes: Series) -> None: + self.fmtlist: list[str] = [] + self.typlist: list[int] = [] + for col, dtype in dtypes.items(): + self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) + self.typlist.append(_dtype_to_stata_type(dtype, self.data[col])) + + def _prepare_pandas(self, data: DataFrame) -> None: + # NOTE: we might need a different API / class for pandas objects so + # we can set different semantics - handle this with a PR to pandas.io + + data = data.copy() + + if self._write_index: + temp = data.reset_index() + if isinstance(temp, DataFrame): + data = temp + + # Ensure column names are strings + data = self._check_column_names(data) + + # Check columns for compatibility with stata, upcast if necessary + # Raise if outside the supported range + data = _cast_to_stata_types(data) + + # Replace NaNs with Stata missing values + data = self._replace_nans(data) + + # Set all columns to initially unlabelled + self._has_value_labels = np.repeat(False, data.shape[1]) + + # Create value labels for non-categorical data + non_cat_value_labels = self._prepare_non_cat_value_labels(data) + + non_cat_columns = [svl.labname for svl in non_cat_value_labels] + has_non_cat_val_labels = data.columns.isin(non_cat_columns) + self._has_value_labels |= has_non_cat_val_labels + self._value_labels.extend(non_cat_value_labels) + + # Convert categoricals to int data, and strip labels + data = self._prepare_categoricals(data) + + self.nobs, self.nvar = data.shape + self.data = data + self.varlist = data.columns.tolist() + + dtypes = data.dtypes + + # Ensure all date columns are converted + for col in data: + if col in self._convert_dates: + continue + if is_datetime64_dtype(data[col]): + self._convert_dates[col] = "tc" + + self._convert_dates = _maybe_convert_to_int_keys( + self._convert_dates, self.varlist + ) + for key in self._convert_dates: + new_type = _convert_datetime_to_stata_type(self._convert_dates[key]) + dtypes[key] = np.dtype(new_type) + + # Verify object arrays are strings and encode to bytes + self._encode_strings() + + self._set_formats_and_types(dtypes) + + # set the given format for the datetime cols + if self._convert_dates is not None: + for key in self._convert_dates: + if isinstance(key, int): + self.fmtlist[key] = self._convert_dates[key] + + def _encode_strings(self) -> None: + """ + Encode strings in dta-specific encoding + + Do not encode columns marked for date conversion or for strL + conversion. The strL converter independently handles conversion and + also accepts empty string arrays. + """ + convert_dates = self._convert_dates + # _convert_strl is not available in dta 114 + convert_strl = getattr(self, "_convert_strl", []) + for i, col in enumerate(self.data): + # Skip columns marked for date conversion or strl conversion + if i in convert_dates or col in convert_strl: + continue + column = self.data[col] + dtype = column.dtype + if dtype.type is np.object_: + inferred_dtype = infer_dtype(column, skipna=True) + if not ((inferred_dtype == "string") or len(column) == 0): + col = column.name + raise ValueError( + f"""\ +Column `{col}` cannot be exported.\n\nOnly string-like object arrays +containing all strings or a mix of strings and None can be exported. +Object arrays containing only null values are prohibited. Other object +types cannot be exported and must first be converted to one of the +supported types.""" + ) + encoded = self.data[col].str.encode(self._encoding) + # If larger than _max_string_length do nothing + if ( + max_len_string_array(ensure_object(encoded._values)) + <= self._max_string_length + ): + self.data[col] = encoded + + def write_file(self) -> None: + """ + Export DataFrame object to Stata dta format. + """ + with get_handle( + self._fname, + "wb", + compression=self._compression, + is_text=False, + storage_options=self.storage_options, + ) as self.handles: + + if self.handles.compression["method"] is not None: + # ZipFile creates a file (with the same name) for each write call. + # Write it first into a buffer and then write the buffer to the ZipFile. + self._output_file, self.handles.handle = self.handles.handle, BytesIO() + self.handles.created_handles.append(self.handles.handle) + + try: + self._write_header( + data_label=self._data_label, time_stamp=self._time_stamp + ) + self._write_map() + self._write_variable_types() + self._write_varnames() + self._write_sortlist() + self._write_formats() + self._write_value_label_names() + self._write_variable_labels() + self._write_expansion_fields() + self._write_characteristics() + records = self._prepare_data() + self._write_data(records) + self._write_strls() + self._write_value_labels() + self._write_file_close_tag() + self._write_map() + self._close() + except Exception as exc: + self.handles.close() + if isinstance(self._fname, (str, os.PathLike)) and os.path.isfile( + self._fname + ): + try: + os.unlink(self._fname) + except OSError: + warnings.warn( + f"This save was not successful but {self._fname} could not " + "be deleted. This file is not valid.", + ResourceWarning, + stacklevel=find_stack_level(), + ) + raise exc + + def _close(self) -> None: + """ + Close the file if it was created by the writer. + + If a buffer or file-like object was passed in, for example a GzipFile, + then leave this file open for the caller to close. + """ + # write compression + if self._output_file is not None: + assert isinstance(self.handles.handle, BytesIO) + bio, self.handles.handle = self.handles.handle, self._output_file + self.handles.handle.write(bio.getvalue()) + + def _write_map(self) -> None: + """No-op, future compatibility""" + pass + + def _write_file_close_tag(self) -> None: + """No-op, future compatibility""" + pass + + def _write_characteristics(self) -> None: + """No-op, future compatibility""" + pass + + def _write_strls(self) -> None: + """No-op, future compatibility""" + pass + + def _write_expansion_fields(self) -> None: + """Write 5 zeros for expansion fields""" + self._write(_pad_bytes("", 5)) + + def _write_value_labels(self) -> None: + for vl in self._value_labels: + self._write_bytes(vl.generate_value_label(self._byteorder)) + + def _write_header( + self, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, + ) -> None: + byteorder = self._byteorder + # ds_format - just use 114 + self._write_bytes(struct.pack("b", 114)) + # byteorder + self._write(byteorder == ">" and "\x01" or "\x02") + # filetype + self._write("\x01") + # unused + self._write("\x00") + # number of vars, 2 bytes + self._write_bytes(struct.pack(byteorder + "h", self.nvar)[:2]) + # number of obs, 4 bytes + self._write_bytes(struct.pack(byteorder + "i", self.nobs)[:4]) + # data label 81 bytes, char, null terminated + if data_label is None: + self._write_bytes(self._null_terminate_bytes(_pad_bytes("", 80))) + else: + self._write_bytes( + self._null_terminate_bytes(_pad_bytes(data_label[:80], 80)) + ) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # GH #13856 + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + self._write_bytes(self._null_terminate_bytes(ts)) + + def _write_variable_types(self) -> None: + for typ in self.typlist: + self._write_bytes(struct.pack("B", typ)) + + def _write_varnames(self) -> None: + # varlist names are checked by _check_column_names + # varlist, requires null terminated + for name in self.varlist: + name = self._null_terminate_str(name) + name = _pad_bytes(name[:32], 33) + self._write(name) + + def _write_sortlist(self) -> None: + # srtlist, 2*(nvar+1), int array, encoded by byteorder + srtlist = _pad_bytes("", 2 * (self.nvar + 1)) + self._write(srtlist) + + def _write_formats(self) -> None: + # fmtlist, 49*nvar, char array + for fmt in self.fmtlist: + self._write(_pad_bytes(fmt, 49)) + + def _write_value_label_names(self) -> None: + # lbllist, 33*nvar, char array + for i in range(self.nvar): + # Use variable name when categorical + if self._has_value_labels[i]: + name = self.varlist[i] + name = self._null_terminate_str(name) + name = _pad_bytes(name[:32], 33) + self._write(name) + else: # Default is empty label + self._write(_pad_bytes("", 33)) + + def _write_variable_labels(self) -> None: + # Missing labels are 80 blank characters plus null termination + blank = _pad_bytes("", 81) + + if self._variable_labels is None: + for i in range(self.nvar): + self._write(blank) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + is_latin1 = all(ord(c) < 256 for c in label) + if not is_latin1: + raise ValueError( + "Variable labels must contain only characters that " + "can be encoded in Latin-1" + ) + self._write(_pad_bytes(label, 81)) + else: + self._write(blank) + + def _convert_strls(self, data: DataFrame) -> DataFrame: + """No-op, future compatibility""" + return data + + def _prepare_data(self) -> np.recarray: + data = self.data + typlist = self.typlist + convert_dates = self._convert_dates + # 1. Convert dates + if self._convert_dates is not None: + for i, col in enumerate(data): + if i in convert_dates: + data[col] = _datetime_to_stata_elapsed_vec( + data[col], self.fmtlist[i] + ) + # 2. Convert strls + data = self._convert_strls(data) + + # 3. Convert bad string data to '' and pad to correct length + dtypes = {} + native_byteorder = self._byteorder == _set_endianness(sys.byteorder) + for i, col in enumerate(data): + typ = typlist[i] + if typ <= self._max_string_length: + data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,)) + stype = f"S{typ}" + dtypes[col] = stype + data[col] = data[col].astype(stype) + else: + dtype = data[col].dtype + if not native_byteorder: + dtype = dtype.newbyteorder(self._byteorder) + dtypes[col] = dtype + + return data.to_records(index=False, column_dtypes=dtypes) + + def _write_data(self, records: np.recarray) -> None: + self._write_bytes(records.tobytes()) + + @staticmethod + def _null_terminate_str(s: str) -> str: + s += "\x00" + return s + + def _null_terminate_bytes(self, s: str) -> bytes: + return self._null_terminate_str(s).encode(self._encoding) + + +def _dtype_to_stata_type_117(dtype: np.dtype, column: Series, force_strl: bool) -> int: + """ + Converts dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 2045 are strings of this length + Pandas Stata + 32768 - for object strL + 65526 - for int8 byte + 65527 - for int16 int + 65528 - for int32 long + 65529 - for float32 float + 65530 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if force_strl: + return 32768 + if dtype.type is np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column._values)) + itemsize = max(itemsize, 1) + if itemsize <= 2045: + return itemsize + return 32768 + elif dtype.type is np.float64: + return 65526 + elif dtype.type is np.float32: + return 65527 + elif dtype.type is np.int32: + return 65528 + elif dtype.type is np.int16: + return 65529 + elif dtype.type is np.int8: + return 65530 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _pad_bytes_new(name: str | bytes, length: int) -> bytes: + """ + Takes a bytes instance and pads it with null bytes until it's length chars. + """ + if isinstance(name, str): + name = bytes(name, "utf-8") + return name + b"\x00" * (length - len(name)) + + +class StataStrLWriter: + """ + Converter for Stata StrLs + + Stata StrLs map 8 byte values to strings which are stored using a + dictionary-like format where strings are keyed to two values. + + Parameters + ---------- + df : DataFrame + DataFrame to convert + columns : Sequence[str] + List of columns names to convert to StrL + version : int, optional + dta version. Currently supports 117, 118 and 119 + byteorder : str, optional + Can be ">", "<", "little", or "big". default is `sys.byteorder` + + Notes + ----- + Supports creation of the StrL block of a dta file for dta versions + 117, 118 and 119. These differ in how the GSO is stored. 118 and + 119 store the GSO lookup value as a uint32 and a uint64, while 117 + uses two uint32s. 118 and 119 also encode all strings as unicode + which is required by the format. 117 uses 'latin-1' a fixed width + encoding that extends the 7-bit ascii table with an additional 128 + characters. + """ + + def __init__( + self, + df: DataFrame, + columns: Sequence[str], + version: int = 117, + byteorder: str | None = None, + ) -> None: + if version not in (117, 118, 119): + raise ValueError("Only dta versions 117, 118 and 119 supported") + self._dta_ver = version + + self.df = df + self.columns = columns + self._gso_table = {"": (0, 0)} + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + + gso_v_type = "I" # uint32 + gso_o_type = "Q" # uint64 + self._encoding = "utf-8" + if version == 117: + o_size = 4 + gso_o_type = "I" # 117 used uint32 + self._encoding = "latin-1" + elif version == 118: + o_size = 6 + else: # version == 119 + o_size = 5 + self._o_offet = 2 ** (8 * (8 - o_size)) + self._gso_o_type = gso_o_type + self._gso_v_type = gso_v_type + + def _convert_key(self, key: tuple[int, int]) -> int: + v, o = key + return v + self._o_offet * o + + def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: + """ + Generates the GSO lookup table for the DataFrame + + Returns + ------- + gso_table : dict + Ordered dictionary using the string found as keys + and their lookup position (v,o) as values + gso_df : DataFrame + DataFrame where strl columns have been converted to + (v,o) values + + Notes + ----- + Modifies the DataFrame in-place. + + The DataFrame returned encodes the (v,o) values as uint64s. The + encoding depends on the dta version, and can be expressed as + + enc = v + o * 2 ** (o_size * 8) + + so that v is stored in the lower bits and o is in the upper + bits. o_size is + + * 117: 4 + * 118: 6 + * 119: 5 + """ + gso_table = self._gso_table + gso_df = self.df + columns = list(gso_df.columns) + selected = gso_df[self.columns] + col_index = [(col, columns.index(col)) for col in self.columns] + keys = np.empty(selected.shape, dtype=np.uint64) + for o, (idx, row) in enumerate(selected.iterrows()): + for j, (col, v) in enumerate(col_index): + val = row[col] + # Allow columns with mixed str and None (GH 23633) + val = "" if val is None else val + key = gso_table.get(val, None) + if key is None: + # Stata prefers human numbers + key = (v + 1, o + 1) + gso_table[val] = key + keys[o, j] = self._convert_key(key) + for i, col in enumerate(self.columns): + gso_df[col] = keys[:, i] + + return gso_table, gso_df + + def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: + """ + Generates the binary blob of GSOs that is written to the dta file. + + Parameters + ---------- + gso_table : dict + Ordered dictionary (str, vo) + + Returns + ------- + gso : bytes + Binary content of dta file to be placed between strl tags + + Notes + ----- + Output format depends on dta version. 117 uses two uint32s to + express v and o while 118+ uses a uint32 for v and a uint64 for o. + """ + # Format information + # Length includes null term + # 117 + # GSOvvvvooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u4 u1 u4 string + null term + # + # 118, 119 + # GSOvvvvooooooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u8 u1 u4 string + null term + + bio = BytesIO() + gso = bytes("GSO", "ascii") + gso_type = struct.pack(self._byteorder + "B", 130) + null = struct.pack(self._byteorder + "B", 0) + v_type = self._byteorder + self._gso_v_type + o_type = self._byteorder + self._gso_o_type + len_type = self._byteorder + "I" + for strl, vo in gso_table.items(): + if vo == (0, 0): + continue + v, o = vo + + # GSO + bio.write(gso) + + # vvvv + bio.write(struct.pack(v_type, v)) + + # oooo / oooooooo + bio.write(struct.pack(o_type, o)) + + # t + bio.write(gso_type) + + # llll + utf8_string = bytes(strl, "utf-8") + bio.write(struct.pack(len_type, len(utf8_string) + 1)) + + # xxx...xxx + bio.write(utf8_string) + bio.write(null) + + return bio.getvalue() + + +class StataWriter117(StataWriter): + """ + A class for writing Stata binary dta files in Stata 13 format (117) + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + writer : StataWriter117 instance + The StataWriter117 instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c']) + >>> writer = pd.io.stata.StataWriter117('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {"method": "zip", "archive_name": "data_file.dta"} + >>> writer = pd.io.stata.StataWriter117( + ... './data_file.zip', data, compression=compression + ... ) + >>> writer.write_file() + + Or with long strings stored in strl format + >>> data = pd.DataFrame([['A relatively long string'], [''], ['']], + ... columns=['strls']) + >>> writer = pd.io.stata.StataWriter117( + ... './data_file_with_long_strings.dta', data, convert_strl=['strls']) + >>> writer.write_file() + """ + + _max_string_length = 2045 + _dta_version = 117 + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float, str]] | None = None, + ) -> None: + # Copy to new list since convert_strl might be modified later + self._convert_strl: list[Hashable] = [] + if convert_strl is not None: + self._convert_strl.extend(convert_strl) + + super().__init__( + fname, + data, + convert_dates, + write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + value_labels=value_labels, + compression=compression, + storage_options=storage_options, + ) + self._map: dict[str, int] = {} + self._strl_blob = b"" + + @staticmethod + def _tag(val: str | bytes, tag: str) -> bytes: + """Surround val with """ + if isinstance(val, str): + val = bytes(val, "utf-8") + return bytes("<" + tag + ">", "utf-8") + val + bytes("", "utf-8") + + def _update_map(self, tag: str) -> None: + """Update map location for tag with file position""" + assert self.handles.handle is not None + self._map[tag] = self.handles.handle.tell() + + def _write_header( + self, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, + ) -> None: + """Write the file header""" + byteorder = self._byteorder + self._write_bytes(bytes("", "utf-8")) + bio = BytesIO() + # ds_format - 117 + bio.write(self._tag(bytes(str(self._dta_version), "utf-8"), "release")) + # byteorder + bio.write(self._tag(byteorder == ">" and "MSF" or "LSF", "byteorder")) + # number of vars, 2 bytes in 117 and 118, 4 byte in 119 + nvar_type = "H" if self._dta_version <= 118 else "I" + bio.write(self._tag(struct.pack(byteorder + nvar_type, self.nvar), "K")) + # 117 uses 4 bytes, 118 uses 8 + nobs_size = "I" if self._dta_version == 117 else "Q" + bio.write(self._tag(struct.pack(byteorder + nobs_size, self.nobs), "N")) + # data label 81 bytes, char, null terminated + label = data_label[:80] if data_label is not None else "" + encoded_label = label.encode(self._encoding) + label_size = "B" if self._dta_version == 117 else "H" + label_len = struct.pack(byteorder + label_size, len(encoded_label)) + encoded_label = label_len + encoded_label + bio.write(self._tag(encoded_label, "label")) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + # '\x11' added due to inspection of Stata file + stata_ts = b"\x11" + bytes(ts, "utf-8") + bio.write(self._tag(stata_ts, "timestamp")) + self._write_bytes(self._tag(bio.getvalue(), "header")) + + def _write_map(self) -> None: + """ + Called twice during file write. The first populates the values in + the map with 0s. The second call writes the final map locations when + all blocks have been written. + """ + if not self._map: + self._map = { + "stata_data": 0, + "map": self.handles.handle.tell(), + "variable_types": 0, + "varnames": 0, + "sortlist": 0, + "formats": 0, + "value_label_names": 0, + "variable_labels": 0, + "characteristics": 0, + "data": 0, + "strls": 0, + "value_labels": 0, + "stata_data_close": 0, + "end-of-file": 0, + } + # Move to start of map + self.handles.handle.seek(self._map["map"]) + bio = BytesIO() + for val in self._map.values(): + bio.write(struct.pack(self._byteorder + "Q", val)) + self._write_bytes(self._tag(bio.getvalue(), "map")) + + def _write_variable_types(self) -> None: + self._update_map("variable_types") + bio = BytesIO() + for typ in self.typlist: + bio.write(struct.pack(self._byteorder + "H", typ)) + self._write_bytes(self._tag(bio.getvalue(), "variable_types")) + + def _write_varnames(self) -> None: + self._update_map("varnames") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vn_len = 32 if self._dta_version == 117 else 128 + for name in self.varlist: + name = self._null_terminate_str(name) + name = _pad_bytes_new(name[:32].encode(self._encoding), vn_len + 1) + bio.write(name) + self._write_bytes(self._tag(bio.getvalue(), "varnames")) + + def _write_sortlist(self) -> None: + self._update_map("sortlist") + sort_size = 2 if self._dta_version < 119 else 4 + self._write_bytes(self._tag(b"\x00" * sort_size * (self.nvar + 1), "sortlist")) + + def _write_formats(self) -> None: + self._update_map("formats") + bio = BytesIO() + fmt_len = 49 if self._dta_version == 117 else 57 + for fmt in self.fmtlist: + bio.write(_pad_bytes_new(fmt.encode(self._encoding), fmt_len)) + self._write_bytes(self._tag(bio.getvalue(), "formats")) + + def _write_value_label_names(self) -> None: + self._update_map("value_label_names") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 32 if self._dta_version == 117 else 128 + for i in range(self.nvar): + # Use variable name when categorical + name = "" # default name + if self._has_value_labels[i]: + name = self.varlist[i] + name = self._null_terminate_str(name) + encoded_name = _pad_bytes_new(name[:32].encode(self._encoding), vl_len + 1) + bio.write(encoded_name) + self._write_bytes(self._tag(bio.getvalue(), "value_label_names")) + + def _write_variable_labels(self) -> None: + # Missing labels are 80 blank characters plus null termination + self._update_map("variable_labels") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 80 if self._dta_version == 117 else 320 + blank = _pad_bytes_new("", vl_len + 1) + + if self._variable_labels is None: + for _ in range(self.nvar): + bio.write(blank) + self._write_bytes(self._tag(bio.getvalue(), "variable_labels")) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + try: + encoded = label.encode(self._encoding) + except UnicodeEncodeError as err: + raise ValueError( + "Variable labels must contain only characters that " + f"can be encoded in {self._encoding}" + ) from err + + bio.write(_pad_bytes_new(encoded, vl_len + 1)) + else: + bio.write(blank) + self._write_bytes(self._tag(bio.getvalue(), "variable_labels")) + + def _write_characteristics(self) -> None: + self._update_map("characteristics") + self._write_bytes(self._tag(b"", "characteristics")) + + def _write_data(self, records) -> None: + self._update_map("data") + self._write_bytes(b"") + self._write_bytes(records.tobytes()) + self._write_bytes(b"") + + def _write_strls(self) -> None: + self._update_map("strls") + self._write_bytes(self._tag(self._strl_blob, "strls")) + + def _write_expansion_fields(self) -> None: + """No-op in dta 117+""" + pass + + def _write_value_labels(self) -> None: + self._update_map("value_labels") + bio = BytesIO() + for vl in self._value_labels: + lab = vl.generate_value_label(self._byteorder) + lab = self._tag(lab, "lbl") + bio.write(lab) + self._write_bytes(self._tag(bio.getvalue(), "value_labels")) + + def _write_file_close_tag(self) -> None: + self._update_map("stata_data_close") + self._write_bytes(bytes("", "utf-8")) + self._update_map("end-of-file") + + def _update_strl_names(self) -> None: + """ + Update column names for conversion to strl if they might have been + changed to comply with Stata naming rules + """ + # Update convert_strl if names changed + for orig, new in self._converted_names.items(): + if orig in self._convert_strl: + idx = self._convert_strl.index(orig) + self._convert_strl[idx] = new + + def _convert_strls(self, data: DataFrame) -> DataFrame: + """ + Convert columns to StrLs if either very large or in the + convert_strl variable + """ + convert_cols = [ + col + for i, col in enumerate(data) + if self.typlist[i] == 32768 or col in self._convert_strl + ] + + if convert_cols: + ssw = StataStrLWriter(data, convert_cols, version=self._dta_version) + tab, new_data = ssw.generate_table() + data = new_data + self._strl_blob = ssw.generate_blob(tab) + return data + + def _set_formats_and_types(self, dtypes: Series) -> None: + self.typlist = [] + self.fmtlist = [] + for col, dtype in dtypes.items(): + force_strl = col in self._convert_strl + fmt = _dtype_to_default_stata_fmt( + dtype, + self.data[col], + dta_version=self._dta_version, + force_strl=force_strl, + ) + self.fmtlist.append(fmt) + self.typlist.append( + _dtype_to_stata_type_117(dtype, self.data[col], force_strl) + ) + + +class StataWriterUTF8(StataWriter117): + """ + Stata binary dta file writing in Stata 15 (118) and 16 (119) formats + + DTA 118 and 119 format files support unicode string data (both fixed + and strL) format. Unicode is also supported in value labels, variable + labels and the dataset label. Format 119 is automatically used if the + file contains more than 32,767 variables. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict, default None + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool, default True + Write the index to Stata dataset. + byteorder : str, default None + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime, default None + A datetime to use as file creation date. Default is the current time + data_label : str, default None + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict, default None + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list, default None + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + version : int, default None + The dta version to use. By default, uses the size of data to determine + the version. 118 is used if data.shape[1] <= 32767, and 119 is used + for storing larger DataFrames. + {compression_options} + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.4.0 Zstandard support. + + value_labels : dict of dicts + Dictionary containing columns as keys and dictionaries of column value + to labels as values. The combined length of all labels for a single + variable must be 32,000 characters or smaller. + + .. versionadded:: 1.4.0 + + Returns + ------- + StataWriterUTF8 + The instance has a write_file method, which will write the file to the + given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + Using Unicode data and column names + + >>> from pandas.io.stata import StataWriterUTF8 + >>> data = pd.DataFrame([[1.0, 1, 'ᴬ']], columns=['a', 'β', 'ĉ']) + >>> writer = StataWriterUTF8('./data_file.dta', data) + >>> writer.write_file() + + Directly write a zip file + >>> compression = {"method": "zip", "archive_name": "data_file.dta"} + >>> writer = StataWriterUTF8('./data_file.zip', data, compression=compression) + >>> writer.write_file() + + Or with long strings stored in strl format + + >>> data = pd.DataFrame([['ᴀ relatively long ŝtring'], [''], ['']], + ... columns=['strls']) + >>> writer = StataWriterUTF8('./data_file_with_long_strings.dta', data, + ... convert_strl=['strls']) + >>> writer.write_file() + """ + + _encoding: Literal["utf-8"] = "utf-8" + + def __init__( + self, + fname: FilePath | WriteBuffer[bytes], + data: DataFrame, + convert_dates: dict[Hashable, str] | None = None, + write_index: bool = True, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + version: int | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, + *, + value_labels: dict[Hashable, dict[float, str]] | None = None, + ) -> None: + if version is None: + version = 118 if data.shape[1] <= 32767 else 119 + elif version not in (118, 119): + raise ValueError("version must be either 118 or 119.") + elif version == 118 and data.shape[1] > 32767: + raise ValueError( + "You must use version 119 for data sets containing more than" + "32,767 variables" + ) + + super().__init__( + fname, + data, + convert_dates=convert_dates, + write_index=write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + value_labels=value_labels, + convert_strl=convert_strl, + compression=compression, + storage_options=storage_options, + ) + # Override version set in StataWriter117 init + self._dta_version = version + + def _validate_variable_name(self, name: str) -> str: + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 118+ support most unicode characters. The only limitation is in + the ascii range where the characters supported are a-z, A-Z, 0-9 and _. + """ + # High code points appear to be acceptable + for c in name: + if ( + ( + ord(c) < 128 + and (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ) + or 128 <= ord(c) < 192 + or c in {"×", "÷"} + ): + name = name.replace(c, "_") + + return name diff --git a/pandas/io/xml.py b/pandas/io/xml.py new file mode 100644 index 00000000..fbe3e41b --- /dev/null +++ b/pandas/io/xml.py @@ -0,0 +1,1104 @@ +""" +:mod:`pandas.io.xml` is a module for reading XML. +""" + +from __future__ import annotations + +import io +from typing import ( + Any, + Callable, + Sequence, +) + +from pandas._typing import ( + TYPE_CHECKING, + CompressionOptions, + ConvertersArg, + DtypeArg, + FilePath, + ParseDatesArg, + ReadBuffer, + StorageOptions, + XMLParsers, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import ( + AbstractMethodError, + ParserError, +) +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) + +from pandas.core.dtypes.common import is_list_like + +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + file_exists, + get_handle, + infer_compression, + is_fsspec_url, + is_url, + stringify_path, +) +from pandas.io.parsers import TextParser + +if TYPE_CHECKING: + from xml.etree.ElementTree import Element + + from lxml.etree import ( + _Element, + _XSLTResultTree, + ) + + from pandas import DataFrame + + +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "path_or_buffer", +) +class _XMLFrameParser: + """ + Internal subclass to parse XML into DataFrames. + + Parameters + ---------- + path_or_buffer : a valid JSON str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. + + xpath : str or regex + The XPath expression to parse required set of nodes for + migration to `Data Frame`. `etree` supports limited XPath. + + namespaces : dict + The namespaces defined in XML document (`xmlns:namespace='URI') + as dicts with key being namespace and value the URI. + + elems_only : bool + Parse only the child elements at the specified `xpath`. + + attrs_only : bool + Parse only the attributes at the specified `xpath`. + + names : list + Column names for Data Frame of parsed XML data. + + dtype : dict + Data type for data or columns. E.g. {{'a': np.float64, + 'b': np.int32, 'c': 'Int64'}} + + .. versionadded:: 1.5.0 + + converters : dict, optional + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels. + + .. versionadded:: 1.5.0 + + parse_dates : bool or list of int or names or list of lists or dict + Converts either index or select columns to datetimes + + .. versionadded:: 1.5.0 + + encoding : str + Encoding of xml object or document. + + stylesheet : str or file-like + URL, file, file-like object, or a raw string containing XSLT, + `etree` does not support XSLT but retained for consistency. + + iterparse : dict, optional + Dict with row element as key and list of descendant elements + and/or attributes as value to be retrieved in iterparsing of + XML document. + + .. versionadded:: 1.5.0 + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + See also + -------- + pandas.io.xml._EtreeFrameParser + pandas.io.xml._LxmlFrameParser + + Notes + ----- + To subclass this class effectively you must override the following methods:` + * :func:`parse_data` + * :func:`_parse_nodes` + * :func:`_iterparse_nodes` + * :func:`_parse_doc` + * :func:`_validate_names` + * :func:`_validate_path` + + + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str, + namespaces: dict[str, str] | None, + elems_only: bool, + attrs_only: bool, + names: Sequence[str] | None, + dtype: DtypeArg | None, + converters: ConvertersArg | None, + parse_dates: ParseDatesArg | None, + encoding: str | None, + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, + iterparse: dict[str, list[str]] | None, + compression: CompressionOptions, + storage_options: StorageOptions, + ) -> None: + self.path_or_buffer = path_or_buffer + self.xpath = xpath + self.namespaces = namespaces + self.elems_only = elems_only + self.attrs_only = attrs_only + self.names = names + self.dtype = dtype + self.converters = converters + self.parse_dates = parse_dates + self.encoding = encoding + self.stylesheet = stylesheet + self.iterparse = iterparse + self.is_style = None + self.compression = compression + self.storage_options = storage_options + + def parse_data(self) -> list[dict[str, str | None]]: + """ + Parse xml data. + + This method will call the other internal methods to + validate xpath, names, parse and return specific nodes. + """ + + raise AbstractMethodError(self) + + def _parse_nodes(self, elems: list[Any]) -> list[dict[str, str | None]]: + """ + Parse xml nodes. + + This method will parse the children and attributes of elements + in xpath, conditionally for only elements, only attributes + or both while optionally renaming node names. + + Raises + ------ + ValueError + * If only elements and only attributes are specified. + + Notes + ----- + Namespace URIs will be removed from return node values. Also, + elements with missing children or attributes compared to siblings + will have optional keys filled with None values. + """ + + dicts: list[dict[str, str | None]] + + if self.elems_only and self.attrs_only: + raise ValueError("Either element or attributes can be parsed not both.") + elif self.elems_only: + if self.names: + dicts = [ + { + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.findall("*")) + }, + } + for el in elems + ] + else: + dicts = [ + { + ch.tag: ch.text.strip() if ch.text else None + for ch in el.findall("*") + } + for el in elems + ] + + elif self.attrs_only: + dicts = [ + {k: v.strip() if v else None for k, v in el.attrib.items()} + for el in elems + ] + + else: + if self.names: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + nm: ch.text.strip() if ch.text else None + for nm, ch in zip(self.names, el.findall("*")) + }, + } + for el in elems + ] + + else: + dicts = [ + { + **el.attrib, + **( + {el.tag: el.text.strip()} + if el.text and not el.text.isspace() + else {} + ), + **{ + ch.tag: ch.text.strip() if ch.text else None + for ch in el.findall("*") + }, + } + for el in elems + ] + + dicts = [ + {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} for d in dicts + ] + + keys = list(dict.fromkeys([k for d in dicts for k in d.keys()])) + dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts] + + if self.names: + dicts = [{nm: v for nm, v in zip(self.names, d.values())} for d in dicts] + + return dicts + + def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]: + """ + Iterparse xml nodes. + + This method will read in local disk, decompressed XML files for elements + and underlying descendants using iterparse, a method to iterate through + an XML tree without holding entire XML tree in memory. + + Raises + ------ + TypeError + * If `iterparse` is not a dict or its dict value is not list-like. + ParserError + * If `path_or_buffer` is not a physical, decompressed file on disk. + * If no data is returned from selected items in `iterparse`. + + Notes + ----- + Namespace URIs will be removed from return node values. Also, + elements with missing children or attributes in submitted list + will have optional keys filled with None values. + """ + + dicts: list[dict[str, str | None]] = [] + row: dict[str, str | None] | None = None + + if not isinstance(self.iterparse, dict): + raise TypeError( + f"{type(self.iterparse).__name__} is not a valid type for iterparse" + ) + + row_node = next(iter(self.iterparse.keys())) if self.iterparse else "" + if not is_list_like(self.iterparse[row_node]): + raise TypeError( + f"{type(self.iterparse[row_node])} is not a valid type " + "for value in iterparse" + ) + + if ( + not isinstance(self.path_or_buffer, str) + or is_url(self.path_or_buffer) + or is_fsspec_url(self.path_or_buffer) + or self.path_or_buffer.startswith((" None: + """ + Validate xpath. + + This method checks for syntax, evaluation, or empty nodes return. + + Raises + ------ + SyntaxError + * If xpah is not supported or issues with namespaces. + + ValueError + * If xpah does not return any nodes. + """ + + raise AbstractMethodError(self) + + def _validate_names(self) -> None: + """ + Validate names. + + This method will check if names is a list-like and aligns + with length of parse nodes. + + Raises + ------ + ValueError + * If value is not a list and less then length of nodes. + """ + raise AbstractMethodError(self) + + def _parse_doc( + self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] + ) -> Element | _Element: + """ + Build tree from path_or_buffer. + + This method will parse XML object into tree + either from string/bytes or file location. + """ + raise AbstractMethodError(self) + + +class _EtreeFrameParser(_XMLFrameParser): + """ + Internal class to parse XML into DataFrames with the Python + standard library XML module: `xml.etree.ElementTree`. + """ + + def parse_data(self) -> list[dict[str, str | None]]: + from xml.etree.ElementTree import iterparse + + if self.stylesheet is not None: + raise ValueError( + "To use stylesheet, you need lxml installed and selected as parser." + ) + + if self.iterparse is None: + self.xml_doc = self._parse_doc(self.path_or_buffer) + self._validate_path() + elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces) + + self._validate_names() + + xml_dicts: list[dict[str, str | None]] = ( + self._parse_nodes(elems) + if self.iterparse is None + else self._iterparse_nodes(iterparse) + ) + + return xml_dicts + + def _validate_path(self) -> None: + """ + Notes + ----- + `etree` supports limited XPath. If user attempts a more complex + expression syntax error will raise. + """ + + msg = ( + "xpath does not return any nodes. " + "If document uses namespaces denoted with " + "xmlns, be sure to define namespaces and " + "use them in xpath." + ) + try: + elems = self.xml_doc.find(self.xpath, namespaces=self.namespaces) + if elems is None: + raise ValueError(msg) + + if elems is not None and elems.find("*") is None and elems.attrib is None: + raise ValueError(msg) + + except (KeyError, SyntaxError): + raise SyntaxError( + "You have used an incorrect or unsupported XPath " + "expression for etree library or you used an " + "undeclared namespace prefix." + ) + + def _validate_names(self) -> None: + children: list[Any] + + if self.names: + if self.iterparse: + children = self.iterparse[next(iter(self.iterparse))] + else: + parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces) + children = parent.findall("*") if parent else [] + + if is_list_like(self.names): + if len(self.names) < len(children): + raise ValueError( + "names does not match length of child elements in xpath." + ) + else: + raise TypeError( + f"{type(self.names).__name__} is not a valid type for names" + ) + + def _parse_doc( + self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] + ) -> Element: + from xml.etree.ElementTree import ( + XMLParser, + parse, + ) + + handle_data = get_data_from_filepath( + filepath_or_buffer=raw_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + doc = parse(xml_data, parser=curr_parser) + + return doc.getroot() + + +class _LxmlFrameParser(_XMLFrameParser): + """ + Internal class to parse XML into DataFrames with third-party + full-featured XML library, `lxml`, that supports + XPath 1.0 and XSLT 1.0. + """ + + def parse_data(self) -> list[dict[str, str | None]]: + """ + Parse xml data. + + This method will call the other internal methods to + validate xpath, names, optionally parse and run XSLT, + and parse original or transformed XML and return specific nodes. + """ + from lxml.etree import iterparse + + if self.iterparse is None: + self.xml_doc = self._parse_doc(self.path_or_buffer) + + if self.stylesheet: + self.xsl_doc = self._parse_doc(self.stylesheet) + self.xml_doc = self._transform_doc() + + self._validate_path() + elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces) + + self._validate_names() + + xml_dicts: list[dict[str, str | None]] = ( + self._parse_nodes(elems) + if self.iterparse is None + else self._iterparse_nodes(iterparse) + ) + + return xml_dicts + + def _validate_path(self) -> None: + + msg = ( + "xpath does not return any nodes. " + "Be sure row level nodes are in xpath. " + "If document uses namespaces denoted with " + "xmlns, be sure to define namespaces and " + "use them in xpath." + ) + + elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces) + children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces) + attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces) + + if elems == []: + raise ValueError(msg) + + if elems != [] and attrs == [] and children == []: + raise ValueError(msg) + + def _validate_names(self) -> None: + children: list[Any] + + if self.names: + if self.iterparse: + children = self.iterparse[next(iter(self.iterparse))] + else: + children = self.xml_doc.xpath( + self.xpath + "[1]/*", namespaces=self.namespaces + ) + + if is_list_like(self.names): + if len(self.names) < len(children): + raise ValueError( + "names does not match length of child elements in xpath." + ) + else: + raise TypeError( + f"{type(self.names).__name__} is not a valid type for names" + ) + + def _parse_doc( + self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] + ) -> _Element: + from lxml.etree import ( + XMLParser, + fromstring, + parse, + ) + + handle_data = get_data_from_filepath( + filepath_or_buffer=raw_doc, + encoding=self.encoding, + compression=self.compression, + storage_options=self.storage_options, + ) + + with preprocess_data(handle_data) as xml_data: + curr_parser = XMLParser(encoding=self.encoding) + + if isinstance(xml_data, io.StringIO): + if self.encoding is None: + raise TypeError( + "Can not pass encoding None when input is StringIO." + ) + + doc = fromstring( + xml_data.getvalue().encode(self.encoding), parser=curr_parser + ) + else: + doc = parse(xml_data, parser=curr_parser) + + return doc + + def _transform_doc(self) -> _XSLTResultTree: + """ + Transform original tree using stylesheet. + + This method will transform original xml using XSLT script into + am ideally flatter xml document for easier parsing and migration + to Data Frame. + """ + from lxml.etree import XSLT + + transformer = XSLT(self.xsl_doc) + new_doc = transformer(self.xml_doc) + + return new_doc + + +def get_data_from_filepath( + filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str], + encoding: str | None, + compression: CompressionOptions, + storage_options: StorageOptions, +) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]: + """ + Extract raw XML data. + + The method accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. XML string or bytes + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + if not isinstance(filepath_or_buffer, bytes): + filepath_or_buffer = stringify_path(filepath_or_buffer) + + if ( + isinstance(filepath_or_buffer, str) + and not filepath_or_buffer.startswith((" io.StringIO | io.BytesIO: + """ + Convert extracted raw data. + + This method will return underlying data of extracted XML content. + The data either has a `read` attribute (e.g. a file object or a + StringIO/BytesIO) or is a string or bytes that is an XML document. + """ + + if isinstance(data, str): + data = io.StringIO(data) + + elif isinstance(data, bytes): + data = io.BytesIO(data) + + return data + + +def _data_to_frame(data, **kwargs) -> DataFrame: + """ + Convert parsed data to Data Frame. + + This method will bind xml dictionary data of keys and values + into named columns of Data Frame using the built-in TextParser + class that build Data Frame and infers specific dtypes. + """ + + tags = next(iter(data)) + nodes = [list(d.values()) for d in data] + + try: + with TextParser(nodes, names=tags, **kwargs) as tp: + return tp.read() + except ParserError: + raise ParserError( + "XML document may be too complex for import. " + "Try to flatten document and use distinct " + "element and attribute names." + ) + + +def _parse( + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str, + namespaces: dict[str, str] | None, + elems_only: bool, + attrs_only: bool, + names: Sequence[str] | None, + dtype: DtypeArg | None, + converters: ConvertersArg | None, + parse_dates: ParseDatesArg | None, + encoding: str | None, + parser: XMLParsers, + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, + iterparse: dict[str, list[str]] | None, + compression: CompressionOptions, + storage_options: StorageOptions, + **kwargs, +) -> DataFrame: + """ + Call internal parsers. + + This method will conditionally call internal parsers: + LxmlFrameParser and/or EtreeParser. + + Raises + ------ + ImportError + * If lxml is not installed if selected as parser. + + ValueError + * If parser is not lxml or etree. + """ + + p: _EtreeFrameParser | _LxmlFrameParser + + if parser == "lxml": + lxml = import_optional_dependency("lxml.etree", errors="ignore") + + if lxml is not None: + p = _LxmlFrameParser( + path_or_buffer, + xpath, + namespaces, + elems_only, + attrs_only, + names, + dtype, + converters, + parse_dates, + encoding, + stylesheet, + iterparse, + compression, + storage_options, + ) + else: + raise ImportError("lxml not found, please install or use the etree parser.") + + elif parser == "etree": + p = _EtreeFrameParser( + path_or_buffer, + xpath, + namespaces, + elems_only, + attrs_only, + names, + dtype, + converters, + parse_dates, + encoding, + stylesheet, + iterparse, + compression, + storage_options, + ) + else: + raise ValueError("Values for parser can only be lxml or etree.") + + data_dicts = p.parse_data() + + return _data_to_frame( + data=data_dicts, + dtype=dtype, + converters=converters, + parse_dates=parse_dates, + **kwargs, + ) + + +@deprecate_nonkeyword_arguments(version=None, allowed_args=["path_or_buffer"]) +@doc( + storage_options=_shared_docs["storage_options"], + decompression_options=_shared_docs["decompression_options"] % "path_or_buffer", +) +def read_xml( + path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + xpath: str = "./*", + namespaces: dict[str, str] | None = None, + elems_only: bool = False, + attrs_only: bool = False, + names: Sequence[str] | None = None, + dtype: DtypeArg | None = None, + converters: ConvertersArg | None = None, + parse_dates: ParseDatesArg | None = None, + # encoding can not be None for lxml and StringIO input + encoding: str | None = "utf-8", + parser: XMLParsers = "lxml", + stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None, + iterparse: dict[str, list[str]] | None = None, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +) -> DataFrame: + r""" + Read XML document into a ``DataFrame`` object. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a ``read()`` function. The string can be any valid XML + string or a path. The string can further be a URL. Valid URL schemes + include http, ftp, s3, and file. + + xpath : str, optional, default './\*' + The XPath to parse required set of nodes for migration to DataFrame. + XPath should return a collection of elements and not a single + element. Note: The ``etree`` parser supports limited XPath + expressions. For more complex XPath, use ``lxml`` which requires + installation. + + namespaces : dict, optional + The namespaces defined in XML document as dicts with key being + namespace prefix and value the URI. There is no need to include all + namespaces in XML, only the ones used in ``xpath`` expression. + Note: if XML document uses default namespace denoted as + `xmlns=''` without a prefix, you must assign any temporary + namespace prefix such as 'doc' to the URI in order to parse + underlying nodes and/or attributes. For example, :: + + namespaces = {{"doc": "https://example.com"}} + + elems_only : bool, optional, default False + Parse only the child elements at the specified ``xpath``. By default, + all child elements and non-empty text nodes are returned. + + attrs_only : bool, optional, default False + Parse only the attributes at the specified ``xpath``. + By default, all attributes are returned. + + names : list-like, optional + Column names for DataFrame of parsed XML data. Use this parameter to + rename original element names and distinguish same named elements and + attributes. + + dtype : Type name or dict of column -> type, optional + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, + 'c': 'Int64'}} + Use `str` or `object` together with suitable `na_values` settings + to preserve and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. + + .. versionadded:: 1.5.0 + + converters : dict, optional + Dict of functions for converting values in certain columns. Keys can either + be integers or column labels. + + .. versionadded:: 1.5.0 + + parse_dates : bool or list of int or names or list of lists or dict, default False + Identifiers to parse index or columns to datetime. The behavior is as follows: + + * boolean. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + result 'foo' + + .. versionadded:: 1.5.0 + + encoding : str, optional, default 'utf-8' + Encoding of XML document. + + parser : {{'lxml','etree'}}, default 'lxml' + Parser module to use for retrieval of data. Only 'lxml' and + 'etree' are supported. With 'lxml' more complex XPath searches + and ability to use XSLT stylesheet are supported. + + stylesheet : str, path object or file-like object + A URL, file-like object, or a raw string containing an XSLT script. + This stylesheet should flatten complex, deeply nested XML documents + for easier parsing. To use this feature you must have ``lxml`` module + installed and specify 'lxml' as ``parser``. The ``xpath`` must + reference nodes of transformed XML document generated after XSLT + transformation and not the original XML document. Only XSLT 1.0 + scripts and not later versions is currently supported. + + iterparse : dict, optional + The nodes or attributes to retrieve in iterparsing of XML document + as a dict with key being the name of repeating element and value being + list of elements or attribute names that are descendants of the repeated + element. Note: If this option is used, it will replace ``xpath`` parsing + and unlike xpath, descendants do not need to relate to each other but can + exist any where in document under the repeating element. This memory- + efficient method should be used for very large XML files (500MB, 1GB, or 5GB+). + For example, :: + + iterparse = {{"row_element": ["child_elem", "attr", "grandchild_elem"]}} + + .. versionadded:: 1.5.0 + + {decompression_options} + + .. versionchanged:: 1.4.0 Zstandard support. + + {storage_options} + + Returns + ------- + df + A DataFrame. + + See Also + -------- + read_json : Convert a JSON string to pandas object. + read_html : Read HTML tables into a list of DataFrame objects. + + Notes + ----- + This method is best designed to import shallow XML documents in + following format which is the ideal fit for the two-dimensions of a + ``DataFrame`` (row by column). :: + + + + data + data + data + ... + + + ... + + ... + + + As a file format, XML documents can be designed any way including + layout of elements and attributes as long as it conforms to W3C + specifications. Therefore, this method is a convenience handler for + a specific flatter design and not all possible XML structures. + + However, for more complex XML documents, ``stylesheet`` allows you to + temporarily redesign original document with XSLT (a special purpose + language) for a flatter version for migration to a DataFrame. + + This function will *always* return a single :class:`DataFrame` or raise + exceptions due to issues with XML document, ``xpath``, or other + parameters. + + See the :ref:`read_xml documentation in the IO section of the docs + ` for more information in using this method to parse XML + files to DataFrames. + + Examples + -------- + >>> xml = ''' + ... + ... + ... square + ... 360 + ... 4.0 + ... + ... + ... circle + ... 360 + ... + ... + ... + ... triangle + ... 180 + ... 3.0 + ... + ... ''' + + >>> df = pd.read_xml(xml) + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + + >>> xml = ''' + ... + ... + ... + ... + ... ''' + + >>> df = pd.read_xml(xml, xpath=".//row") + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + + >>> xml = ''' + ... + ... + ... square + ... 360 + ... 4.0 + ... + ... + ... circle + ... 360 + ... + ... + ... + ... triangle + ... 180 + ... 3.0 + ... + ... ''' + + >>> df = pd.read_xml(xml, + ... xpath="//doc:row", + ... namespaces={{"doc": "https://example.com"}}) + >>> df + shape degrees sides + 0 square 360 4.0 + 1 circle 360 NaN + 2 triangle 180 3.0 + """ + + return _parse( + path_or_buffer=path_or_buffer, + xpath=xpath, + namespaces=namespaces, + elems_only=elems_only, + attrs_only=attrs_only, + names=names, + dtype=dtype, + converters=converters, + parse_dates=parse_dates, + encoding=encoding, + parser=parser, + stylesheet=stylesheet, + iterparse=iterparse, + compression=compression, + storage_options=storage_options, + ) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 00000000..55c861e3 --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1,98 @@ +""" +Plotting public API. + +Authors of third-party plotting backends should implement a module with a +public ``plot(data, kind, **kwargs)``. The parameter `data` will contain +the data structure and can be a `Series` or a `DataFrame`. For example, +for ``df.plot()`` the parameter `data` will contain the DataFrame `df`. +In some cases, the data structure is transformed before being sent to +the backend (see PlotAccessor.__call__ in pandas/plotting/_core.py for +the exact transformations). + +The parameter `kind` will be one of: + +- line +- bar +- barh +- box +- hist +- kde +- area +- pie +- scatter +- hexbin + +See the pandas API reference for documentation on each kind of plot. + +Any other keyword argument is currently assumed to be backend specific, +but some parameters may be unified and added to the signature in the +future (e.g. `title` which should be useful for any backend). + +Currently, all the Matplotlib functions in pandas are accessed through +the selected backend. For example, `pandas.plotting.boxplot` (equivalent +to `DataFrame.boxplot`) is also accessed in the selected backend. This +is expected to change, and the exact API is under discussion. But with +the current version, backends are expected to implement the next functions: + +- plot (describe above, used for `Series.plot` and `DataFrame.plot`) +- hist_series and hist_frame (for `Series.hist` and `DataFrame.hist`) +- boxplot (`pandas.plotting.boxplot(df)` equivalent to `DataFrame.boxplot`) +- boxplot_frame and boxplot_frame_groupby +- register and deregister (register converters for the tick formats) +- Plots not called as `Series` and `DataFrame` methods: + - table + - andrews_curves + - autocorrelation_plot + - bootstrap_plot + - lag_plot + - parallel_coordinates + - radviz + - scatter_matrix + +Use the code in pandas/plotting/_matplotib.py and +https://github.com/pyviz/hvplot as a reference on how to write a backend. + +For the discussion about the API see +https://github.com/pandas-dev/pandas/issues/26747. +""" +from pandas.plotting._core import ( + PlotAccessor, + boxplot, + boxplot_frame, + boxplot_frame_groupby, + hist_frame, + hist_series, +) +from pandas.plotting._misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + deregister as deregister_matplotlib_converters, + lag_plot, + parallel_coordinates, + plot_params, + radviz, + register as register_matplotlib_converters, + scatter_matrix, + table, +) + +__all__ = [ + "PlotAccessor", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "hist_frame", + "hist_series", + "scatter_matrix", + "radviz", + "andrews_curves", + "bootstrap_plot", + "parallel_coordinates", + "lag_plot", + "autocorrelation_plot", + "table", + "plot_params", + "register_matplotlib_converters", + "deregister_matplotlib_converters", +] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py new file mode 100644 index 00000000..be77de2c --- /dev/null +++ b/pandas/plotting/_core.py @@ -0,0 +1,1888 @@ +from __future__ import annotations + +import importlib +import itertools +import types +from typing import ( + TYPE_CHECKING, + Sequence, +) +import warnings + +from pandas._config import get_option + +from pandas._typing import IndexLabel +from pandas.util._decorators import ( + Appender, + Substitution, +) +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.base import PandasObject + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + from pandas import DataFrame + + +def hist_series( + self, + by=None, + ax=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + figsize: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, +): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups. + ax : matplotlib axis object + If not passed, uses gca(). + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. + figsize : tuple, default None + Figure size in inches by default. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + legend : bool, default False + Whether to show the legend. + + .. versionadded:: 1.1.0 + + **kwargs + To be passed to the actual plotting function. + + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_series( + self, + by=by, + ax=ax, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + figsize=figsize, + bins=bins, + legend=legend, + **kwargs, + ) + + +def hist_frame( + data: DataFrame, + column: IndexLabel = None, + by=None, + grid: bool = True, + xlabelsize: int | None = None, + xrot: float | None = None, + ylabelsize: int | None = None, + yrot: float | None = None, + ax=None, + sharex: bool = False, + sharey: bool = False, + figsize: tuple[int, int] | None = None, + layout: tuple[int, int] | None = None, + bins: int | Sequence[int] = 10, + backend: str | None = None, + legend: bool = False, + **kwargs, +): + """ + Make a histogram of the DataFrame's columns. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : str or sequence, optional + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple, optional + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + legend : bool, default False + Whether to show the legend. + + .. versionadded:: 1.1.0 + + **kwargs + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. + + Returns + ------- + matplotlib.AxesSubplot or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. + + Examples + -------- + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_frame( + data, + column=column, + by=by, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + legend=legend, + bins=bins, + **kwargs, + ) + + +_boxplot_doc = """ +Make a box plot from DataFrame columns. + +Make a box-and-whisker plot from DataFrame columns, optionally grouped +by some other columns. A box plot is a method for graphically depicting +groups of numerical data through their quartiles. +The box extends from the Q1 to Q3 quartile values of the data, +with a line at the median (Q2). The whiskers extend from the edges +of box to show the range of the data. By default, they extend no more than +`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest +data point within that interval. Outliers are plotted as separate dots. + +For further details see +Wikipedia's entry for `boxplot `_. + +Parameters +---------- +column : str or list of str, optional + Column name or list of names, or vector. + Can be any valid input to :meth:`pandas.DataFrame.groupby`. +by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. +ax : object of class matplotlib.axes.Axes, optional + The matplotlib axes to be used by boxplot. +fontsize : float or str + Tick label font size in points or as a string (e.g., `large`). +rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate system. +grid : bool, default True + Setting this to True will show the grid. +figsize : A tuple (width, height) in inches + The size of the figure to create in matplotlib. +layout : tuple (rows, columns), optional + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. +return_type : {'axes', 'dict', 'both'} or None, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned. + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned. +%(backend)s\ + +**kwargs + All other plotting keyword arguments to be passed to + :func:`matplotlib.pyplot.boxplot`. + +Returns +------- +result + See Notes. + +See Also +-------- +Series.plot.hist: Make a histogram. +matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + +Notes +----- +The return type depends on the `return_type` parameter: + +* 'axes' : object of class matplotlib.axes.Axes +* 'dict' : dict of matplotlib.lines.Line2D objects +* 'both' : a namedtuple with structure (ax, lines) + +For data grouped with ``by``, return a Series of the above or a numpy +array: + +* :class:`~pandas.Series` +* :class:`~numpy.array` (for ``return_type = None``) + +Use ``return_type='dict'`` when you want to tweak the appearance +of the lines after plotting. In this case a dict containing the Lines +making up the boxes, caps, fliers, medians, and whiskers is returned. + +Examples +-------- + +Boxplots can be created for every column in the dataframe +by ``df.boxplot()`` or indicating the columns to be used: + +.. plot:: + :context: close-figs + + >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.randn(10, 4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP + +Boxplots of variables distributions grouped by the values of a third +variable can be created using the option ``by``. For instance: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> boxplot = df.boxplot(by='X') + +A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot +in order to group the data by combination of the variables in the x-axis: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + +The layout of boxplot can be adjusted giving a tuple to ``layout``: + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) + +Additional formatting can be done to the boxplot, like suppressing the grid +(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) +or changing the fontsize (i.e. ``fontsize=15``): + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP + +The parameter ``return_type`` can be used to select the type of element +returned by `boxplot`. When ``return_type='axes'`` is selected, +the matplotlib axes on which the boxplot is drawn are returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') + >>> type(boxplot) + + +When grouping with ``by``, a Series mapping columns to ``return_type`` +is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + + +If ``return_type`` is `None`, a NumPy array of axes with the same shape +as ``layout`` is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + +""" + +_backend_doc = """\ +backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 +""" + + +_bar_or_line_doc = """ + Parameters + ---------- + x : label or position, optional + Allows plotting of one column versus another. If not specified, + the index of the DataFrame is used. + y : label or position, optional + Allows plotting of one column versus another. If not specified, + all numerical columns are used. + color : str, array-like, or dict, optional + The color for each of the DataFrame's columns. Possible values are: + + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. + + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each column recursively. For + instance ['green','yellow'] each column's %(kind)s will be filled in + green or yellow, alternatively. If there is only a single column to + be plotted, then only the first color from the color list will be + used. + + - A dict of the form {column name : color}, so that each column will be + colored accordingly. For example, if your columns are called `a` and + `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for + column `a` in green and %(kind)ss for column `b` in red. + + .. versionadded:: 1.1.0 + + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + An ndarray is returned with one :class:`matplotlib.axes.Axes` + per column when ``subplots=True``. +""" + + +@Substitution(backend="") +@Appender(_boxplot_doc) +def boxplot( + data: DataFrame, + column: str | list[str] | None = None, + by: str | list[str] | None = None, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: int = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + return_type: str | None = None, + **kwargs, +): + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.boxplot( + data, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +@Substitution(backend=_backend_doc) +@Appender(_boxplot_doc) +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot: int = 0, + grid: bool = True, + figsize=None, + layout=None, + return_type=None, + backend=None, + **kwargs, +): + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +def boxplot_frame_groupby( + grouped, + subplots: bool = True, + column=None, + fontsize=None, + rot: int = 0, + grid: bool = True, + ax=None, + figsize=None, + layout=None, + sharex: bool = False, + sharey: bool = True, + backend=None, + **kwargs, +): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : bool + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group. + + column : column name or list of names, or vector + Can be any valid input to groupby. + fontsize : int or str + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + The layout of the plot: (rows, columns). + sharex : bool, default False + Whether x-axes will be shared among subplots. + sharey : bool, default True + Whether y-axes will be shared among subplots. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + All other plotting keyword arguments to be passed to + matplotlib's boxplot function. + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + You can create boxplots for grouped data and show them as separate subplots: + + .. plot:: + :context: close-figs + + >>> import itertools + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) + >>> grouped = df.groupby(level='lvl1') + >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP + + The ``subplots=False`` option shows the boxplots in a single figure. + + .. plot:: + :context: close-figs + + >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame_groupby( + grouped, + subplots=subplots, + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + sharex=sharex, + sharey=sharey, + **kwargs, + ) + + +class PlotAccessor(PandasObject): + """ + Make plots of Series or DataFrame. + + Uses the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called. + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + The kind of plot to produce: + + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot (DataFrame only) + - 'hexbin' : hexbin plot (DataFrame only) + ax : matplotlib axes object, default None + An axes of the current figure. + subplots : bool or sequence of iterables, default False + Whether to group columns into subplots: + + - ``False`` : No subplots will be used + - ``True`` : Make separate subplots for each column. + - sequence of iterables of column labels: Create a subplot for each + group of columns. For example `[('a', 'c'), ('b', 'd')]` will + create 2 subplots: one with columns 'a' and 'c', and one + with columns 'b' and 'd'. Remaining columns that aren't specified + will be plotted in additional subplots (one per column). + .. versionadded:: 1.5.0 + + sharex : bool, default True if ax is None else False + In case ``subplots=True``, share x axis and set some x axis labels + to invisible; defaults to True if ax is None otherwise False if + an ax is passed in; Be aware, that passing in both an ax and + ``sharex=True`` will alter all x axis labels for all axis in a figure. + sharey : bool, default False + In case ``subplots=True``, share y axis and set some y axis labels to invisible. + layout : tuple, optional + (rows, columns) for the layout of subplots. + figsize : a tuple (width, height) in inches + Size of a figure object. + use_index : bool, default True + Use index as ticks for x axis. + title : str or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines. + legend : bool or {'reverse'} + Place legend on axis subplots. + style : list or dict + The matplotlib line style per column. + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis. + .. versionchanged:: 0.25.0 + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis. + .. versionchanged:: 0.25.0 + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes. + .. versionchanged:: 0.25.0 + + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + xlim : 2-tuple/list + Set the x limits of the current axes. + ylim : 2-tuple/list + Set the y limits of the current axes. + xlabel : label, optional + Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the + x-column name for planar plots. + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.2.0 + + Now applicable to planar plots (`scatter`, `hexbin`). + + ylabel : label, optional + Name to use for the ylabel on y-axis. Default will show no ylabel, or the + y-column name for planar plots. + + .. versionadded:: 1.1.0 + + .. versionchanged:: 1.2.0 + + Now applicable to planar plots (`scatter`, `hexbin`). + + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots). + fontsize : int, default None + Font size for xticks and yticks. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots). + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center). + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + stacked : bool, default False in line and bar plots, and True in area plot + If True, create stacked plot. + sort_columns : bool, default False + Sort column names to determine plot ordering. + + .. deprecated:: 1.5.0 + The `sort_columns` arguments is deprecated and will be removed in a + future version. + + secondary_y : bool or sequence, default False + Whether to plot on the secondary y-axis if a list/tuple, which + columns to plot on secondary y-axis. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend. + include_bool : bool, default is False + If True, boolean values can be plotted. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + """ + + _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") + _series_kinds = ("pie",) + _dataframe_kinds = ("scatter", "hexbin") + _kind_aliases = {"density": "kde"} + _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds + + def __init__(self, data) -> None: + self._parent = data + + @staticmethod + def _get_call_args(backend_name, data, args, kwargs): + """ + This function makes calls to this accessor `__call__` method compatible + with the previous `SeriesPlotMethods.__call__` and + `DataFramePlotMethods.__call__`. Those had slightly different + signatures, since `DataFramePlotMethods` accepted `x` and `y` + parameters. + """ + if isinstance(data, ABCSeries): + arg_def = [ + ("kind", "line"), + ("ax", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", False), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("label", None), + ("secondary_y", False), + ("xlabel", None), + ("ylabel", None), + ] + elif isinstance(data, ABCDataFrame): + arg_def = [ + ("x", None), + ("y", None), + ("kind", "line"), + ("ax", None), + ("subplots", False), + ("sharex", None), + ("sharey", False), + ("layout", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", True), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("secondary_y", False), + ("sort_columns", False), + ("xlabel", None), + ("ylabel", None), + ] + else: + raise TypeError( + f"Called plot accessor for type {type(data).__name__}, " + "expected Series or DataFrame" + ) + + if "sort_columns" in itertools.chain(args, kwargs.keys()): + warnings.warn( + "`sort_columns` is deprecated and will be removed in a future " + "version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if args and isinstance(data, ABCSeries): + positional_args = str(args)[1:-1] + keyword_args = ", ".join( + [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] + ) + msg = ( + "`Series.plot()` should not be called with positional " + "arguments, only keyword arguments. The order of " + "positional arguments will change in the future. " + f"Use `Series.plot({keyword_args})` instead of " + f"`Series.plot({positional_args})`." + ) + raise TypeError(msg) + + pos_args = {name: value for (name, _), value in zip(arg_def, args)} + if backend_name == "pandas.plotting._matplotlib": + kwargs = dict(arg_def, **pos_args, **kwargs) + else: + kwargs = dict(pos_args, **kwargs) + + x = kwargs.pop("x", None) + y = kwargs.pop("y", None) + kind = kwargs.pop("kind", "line") + return x, y, kind, kwargs + + def __call__(self, *args, **kwargs): + plot_backend = _get_plot_backend(kwargs.pop("backend", None)) + + x, y, kind, kwargs = self._get_call_args( + plot_backend.__name__, self._parent, args, kwargs + ) + + kind = self._kind_aliases.get(kind, kind) + + # when using another backend, get out of the way + if plot_backend.__name__ != "pandas.plotting._matplotlib": + return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) + + if kind not in self._all_kinds: + raise ValueError(f"{kind} is not a valid plot kind") + + # The original data structured can be transformed before passed to the + # backend. For example, for DataFrame is common to set the index as the + # `x` parameter, and return a Series with the parameter `y` as values. + data = self._parent.copy() + + if isinstance(data, ABCSeries): + kwargs["reuse_plot"] = True + + if kind in self._dataframe_kinds: + if isinstance(data, ABCDataFrame): + return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) + else: + raise ValueError(f"plot kind {kind} can only be used for data frames") + elif kind in self._series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and kwargs.get("subplots") is False: + raise ValueError( + f"{kind} requires either y column or 'subplots=True'" + ) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + elif isinstance(data, ABCDataFrame): + data_cols = data.columns + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwargs["label"] if "label" in kwargs else False + for kw in ["xerr", "yerr"]: + if kw in kwargs and ( + isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) + ): + try: + kwargs[kw] = data[kwargs[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): + label_name = label_kw or y + data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data.columns + data.columns = label_name + + return plot_backend.plot(data, kind=kind, **kwargs) + + __call__.__doc__ = __doc__ + + @Appender( + """ + See Also + -------- + matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + + + .. plot:: + :context: close-figs + + The following example shows the populations for some animals + over the years. + + >>> df = pd.DataFrame({ + ... 'pig': [20, 18, 489, 675, 1776], + ... 'horse': [4, 25, 281, 600, 1900] + ... }, index=[1990, 1997, 2003, 2009, 2014]) + >>> lines = df.plot.line() + + .. plot:: + :context: close-figs + + An example with subplots, so an array of axes is returned. + + >>> axes = df.plot.line(subplots=True) + >>> type(axes) + + + .. plot:: + :context: close-figs + + Let's repeat the same example, but specifying colors for + each column (in this case, for each animal). + + >>> axes = df.plot.line( + ... subplots=True, color={"pig": "pink", "horse": "#742802"} + ... ) + + .. plot:: + :context: close-figs + + The following example shows the relationship between both + populations. + + >>> lines = df.plot.line(x='pig', y='horse') + """ + ) + @Substitution(kind="line") + @Appender(_bar_or_line_doc) + def line(self, x=None, y=None, **kwargs) -> PlotAccessor: + """ + Plot Series or DataFrame as lines. + + This function is useful to plot lines using DataFrame's values + as coordinates. + """ + return self(kind="line", x=x, y=y, **kwargs) + + @Appender( + """ + See Also + -------- + DataFrame.plot.barh : Horizontal bar plot. + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.bar : Make a bar plot with matplotlib. + + Examples + -------- + Basic plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) + >>> ax = df.plot.bar(x='lab', y='val', rot=0) + + Plot a whole dataframe to a bar plot. Each column is assigned a + distinct color, and each row is nested in a group along the + horizontal axis. + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.bar(rot=0) + + Plot stacked bar charts for the DataFrame + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(stacked=True) + + Instead of nesting, the figure can be split by column with + ``subplots=True``. In this case, a :class:`numpy.ndarray` of + :class:`matplotlib.axes.Axes` are returned. + + .. plot:: + :context: close-figs + + >>> axes = df.plot.bar(rot=0, subplots=True) + >>> axes[1].legend(loc=2) # doctest: +SKIP + + If you don't like the default colours, you can specify how you'd + like each column to be colored. + + .. plot:: + :context: close-figs + + >>> axes = df.plot.bar( + ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} + ... ) + >>> axes[1].legend(loc=2) # doctest: +SKIP + + Plot a single column. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(y='speed', rot=0) + + Plot only selected categories for the DataFrame. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(x='lifespan', rot=0) + """ + ) + @Substitution(kind="bar") + @Appender(_bar_or_line_doc) + def bar(self, x=None, y=None, **kwargs) -> PlotAccessor: + """ + Vertical bar plot. + + A bar plot is a plot that presents categorical data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + """ + return self(kind="bar", x=x, y=y, **kwargs) + + @Appender( + """ + See Also + -------- + DataFrame.plot.bar: Vertical bar plot. + DataFrame.plot : Make plots of DataFrame using matplotlib. + matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. + + Examples + -------- + Basic example + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) + >>> ax = df.plot.barh(x='lab', y='val') + + Plot a whole DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh() + + Plot stacked barh charts for the DataFrame + + .. plot:: + :context: close-figs + + >>> ax = df.plot.barh(stacked=True) + + We can specify colors for each column + + .. plot:: + :context: close-figs + + >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"}) + + Plot a column of the DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(y='speed') + + Plot DataFrame versus the desired column + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(x='lifespan') + """ + ) + @Substitution(kind="bar") + @Appender(_bar_or_line_doc) + def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: + """ + Make a horizontal bar plot. + + A horizontal bar plot is a plot that presents quantitative data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + """ + return self(kind="barh", x=x, y=y, **kwargs) + + def box(self, by=None, **kwargs) -> PlotAccessor: + r""" + Make a box plot of the DataFrame columns. + + A box plot is a method for graphically depicting groups of numerical + data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2). The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the + box. Outlier points are those past the end of the whiskers. + + For further details see Wikipedia's + entry for `boxplot `__. + + A consideration when using this chart is that the box and the whiskers + can overlap, which is very common when plotting small sets of data. + + Parameters + ---------- + by : str or sequence + Column in the DataFrame to group by. + + .. versionchanged:: 1.4.0 + + Previously, `by` is silently ignore and makes no groupings + + **kwargs + Additional keywords are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + DataFrame.boxplot: Another method to draw a box plot. + Series.plot.box: Draw a box plot from a Series object. + matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. + + Examples + -------- + Draw a box plot from a DataFrame with four columns of randomly + generated data. + + .. plot:: + :context: close-figs + + >>> data = np.random.randn(25, 4) + >>> df = pd.DataFrame(data, columns=list('ABCD')) + >>> ax = df.plot.box() + + You can also generate groupings if you specify the `by` parameter (which + can take a column name, or a list or tuple of column names): + + .. versionchanged:: 1.4.0 + + .. plot:: + :context: close-figs + + >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] + >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) + >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) + """ + return self(kind="box", by=by, **kwargs) + + def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: + """ + Draw one histogram of the DataFrame's columns. + + A histogram is a representation of the distribution of data. + This function groups the values of all given Series in the DataFrame + into bins and draws all bins in one :class:`matplotlib.axes.Axes`. + This is useful when the DataFrame's Series are in a similar scale. + + Parameters + ---------- + by : str or sequence, optional + Column in the DataFrame to group by. + + .. versionchanged:: 1.4.0 + + Previously, `by` is silently ignore and makes no groupings + + bins : int, default 10 + Number of histogram bins to be used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + class:`matplotlib.AxesSubplot` + Return a histogram plot. + + See Also + -------- + DataFrame.hist : Draw histograms per DataFrame's Series. + Series.hist : Draw a histogram with Series' data. + + Examples + -------- + When we roll a die 6000 times, we expect to get each value around 1000 + times. But when we roll two dice and sum the result, the distribution + is going to be quite different. A histogram illustrates those + distributions. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... np.random.randint(1, 7, 6000), + ... columns = ['one']) + >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) + >>> ax = df.plot.hist(bins=12, alpha=0.5) + + A grouped histogram can be generated by providing the parameter `by` (which + can be a column name, or a list of column names): + + .. plot:: + :context: close-figs + + >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] + >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) + >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) + """ + return self(kind="hist", by=by, bins=bins, **kwargs) + + def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: + """ + Generate Kernel Density Estimate plot using Gaussian kernels. + + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. + + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or int, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic + bandwidth determination and plot the results, evaluating them at + 1000 equally spaced points (default): + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + + For DataFrame, it works in the same way: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], + ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], + ... }) + >>> ax = df.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) + """ + return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) + + density = kde + + def area(self, x=None, y=None, **kwargs) -> PlotAccessor: + """ + Draw a stacked area plot. + + An area plot displays quantitative data visually. + This function wraps the matplotlib area function. + + Parameters + ---------- + x : label or position, optional + Coordinates for the X axis. By default uses the index. + y : label or position, optional + Column to plot. By default uses all columns. + stacked : bool, default True + Area plots are stacked by default. Set to False to create a + unstacked plot. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray + Area plot, or array of area plots if subplots is True. + + See Also + -------- + DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. + + Examples + -------- + Draw an area plot based on basic business metrics: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3, 9, 10, 6], + ... 'signups': [5, 5, 6, 12, 14, 13], + ... 'visits': [20, 42, 28, 62, 81, 50], + ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', + ... freq='M')) + >>> ax = df.plot.area() + + Area plots are stacked by default. To produce an unstacked plot, + pass ``stacked=False``: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(stacked=False) + + Draw an area plot for a single column: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(y='sales') + + Draw with a different `x`: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3], + ... 'visits': [20, 42, 28], + ... 'day': [1, 2, 3], + ... }) + >>> ax = df.plot.area(x='day') + """ + return self(kind="area", x=x, y=y, **kwargs) + + def pie(self, **kwargs) -> PlotAccessor: + """ + Generate a pie plot. + + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. + + Parameters + ---------- + y : int or label, optional + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + A NumPy array is returned when `subplots` is True. + + See Also + -------- + Series.plot.pie : Generate a pie plot for a Series. + DataFrame.plot : Make plots of a DataFrame. + + Examples + -------- + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the 'mass' column to the + pie function to get a pie plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + + .. plot:: + :context: close-figs + + >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) + """ + if ( + isinstance(self._parent, ABCDataFrame) + and kwargs.get("y", None) is None + and not kwargs.get("subplots", False) + ): + raise ValueError("pie requires either y column or 'subplots=True'") + return self(kind="pie", **kwargs) + + def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: + """ + Create a scatter plot with varying marker point size and color. + + The coordinates of each point are defined by two dataframe columns and + filled circles are used to represent each point. This kind of plot is + useful to see complex correlations between two variables. Points could + be for instance natural 2D coordinates like longitude and latitude in + a map or, in general, any pair of metrics that can be plotted against + each other. + + Parameters + ---------- + x : int or str + The column name or column position to be used as horizontal + coordinates for each point. + y : int or str + The column name or column position to be used as vertical + coordinates for each point. + s : str, scalar or array-like, optional + The size of each point. Possible values are: + + - A string with the name of the column to be used for marker's size. + + - A single scalar so all points have the same size. + + - A sequence of scalars, which will be used for each point's size + recursively. For instance, when passing [2,14] all points size + will be either 2 or 14, alternatively. + + .. versionchanged:: 1.1.0 + + c : str, int or array-like, optional + The color of each point. Possible values are: + + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. + + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each point's color recursively. For + instance ['green','yellow'] all points will be filled in green or + yellow, alternatively. + + - A column name or position whose values will be used to color the + marker points according to a colormap. + + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.scatter : Scatter plot using multiple input data + formats. + + Examples + -------- + Let's see how to draw a scatter plot using coordinates from the values + in a DataFrame's columns. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], + ... [6.4, 3.2, 1], [5.9, 3.0, 2]], + ... columns=['length', 'width', 'species']) + >>> ax1 = df.plot.scatter(x='length', + ... y='width', + ... c='DarkBlue') + + And now with the color determined by a column as well. + + .. plot:: + :context: close-figs + + >>> ax2 = df.plot.scatter(x='length', + ... y='width', + ... c='species', + ... colormap='viridis') + """ + return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) + + def hexbin( + self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs + ) -> PlotAccessor: + """ + Generate a hexagonal binning plot. + + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences + of the observations at ``(x[i], y[i])``. + + If `C` is specified, specifies values at given coordinates + ``(x[i], y[i])``. These values are accumulated for each hexagonal + bin and then reduced according to `reduce_C_function`, + having as default the NumPy's mean function (:meth:`numpy.mean`). + (If `C` is specified, it must also be a 1-D sequence + of the same length as `x` and `y`, or a column label.) + + Parameters + ---------- + x : int or str + The column label or position for x points. + y : int or str + The column label or position for y points. + C : int or str, optional + The column label or position for the value of `(x, y)` point. + reduce_C_function : callable, default `np.mean` + Function of one argument that reduces all the values in a bin to + a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). + gridsize : int or tuple of (int, int), default 100 + The number of hexagons in the x-direction. + The corresponding number of hexagons in the y-direction is + chosen in a way that the hexagons are approximately regular. + Alternatively, gridsize can be a tuple with two elements + specifying the number of hexagons in the x-direction and the + y-direction. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.AxesSubplot + The matplotlib ``Axes`` on which the hexbin is plotted. + + See Also + -------- + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, + the matplotlib function that is used under the hood. + + Examples + -------- + The following examples are generated with random data from + a normal distribution. + + .. plot:: + :context: close-figs + + >>> n = 10000 + >>> df = pd.DataFrame({'x': np.random.randn(n), + ... 'y': np.random.randn(n)}) + >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) + + The next example uses `C` and `np.sum` as `reduce_C_function`. + Note that `'observations'` values ranges from 1 to 5 but the result + plot shows values up to more than 25. This is because of the + `reduce_C_function`. + + .. plot:: + :context: close-figs + + >>> n = 500 + >>> df = pd.DataFrame({ + ... 'coord_x': np.random.uniform(-3, 3, size=n), + ... 'coord_y': np.random.uniform(30, 50, size=n), + ... 'observations': np.random.randint(1,5, size=n) + ... }) + >>> ax = df.plot.hexbin(x='coord_x', + ... y='coord_y', + ... C='observations', + ... reduce_C_function=np.sum, + ... gridsize=10, + ... cmap="viridis") + """ + if reduce_C_function is not None: + kwargs["reduce_C_function"] = reduce_C_function + if gridsize is not None: + kwargs["gridsize"] = gridsize + + return self(kind="hexbin", x=x, y=y, C=C, **kwargs) + + +_backends: dict[str, types.ModuleType] = {} + + +def _load_backend(backend: str) -> types.ModuleType: + """ + Load a pandas plotting backend. + + Parameters + ---------- + backend : str + The identifier for the backend. Either an entrypoint item registered + with importlib.metadata, "matplotlib", or a module name. + + Returns + ------- + types.ModuleType + The imported backend. + """ + from importlib.metadata import entry_points + + if backend == "matplotlib": + # Because matplotlib is an optional dependency and first-party backend, + # we need to attempt an import here to raise an ImportError if needed. + try: + module = importlib.import_module("pandas.plotting._matplotlib") + except ImportError: + raise ImportError( + "matplotlib is required for plotting when the " + 'default backend "matplotlib" is selected.' + ) from None + return module + + found_backend = False + + eps = entry_points() + key = "pandas_plotting_backends" + # entry_points lost dict API ~ PY 3.10 + # https://github.com/python/importlib_metadata/issues/298 + if hasattr(eps, "select"): + # error: "Dict[str, Tuple[EntryPoint, ...]]" has no attribute "select" + entry = eps.select(group=key) # type: ignore[attr-defined] + else: + entry = eps.get(key, ()) + for entry_point in entry: + found_backend = entry_point.name == backend + if found_backend: + module = entry_point.load() + break + + if not found_backend: + # Fall back to unregistered, module name approach. + try: + module = importlib.import_module(backend) + found_backend = True + except ImportError: + # We re-raise later on. + pass + + if found_backend: + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option is set, + # rather than at plot time. + return module + + raise ValueError( + f"Could not find plotting backend '{backend}'. Ensure that you've " + f"installed the package providing the '{backend}' entrypoint, or that " + "the package has a top-level `.plot` method." + ) + + +def _get_plot_backend(backend: str | None = None): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + + The plotting system of pandas uses matplotlib by default, but the idea here + is that it can also work with other third-party backends. This function + returns the module which provides a top-level `.plot` method that will + actually do the plotting. The backend is specified from a string, which + either comes from the keyword argument `backend`, or, if not specified, from + the option `pandas.options.plotting.backend`. All the rest of the code in + this file uses the backend specified there for the plotting. + + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. + + Notes + ----- + Modifies `_backends` with imported backend as a side effect. + """ + backend_str: str = backend or get_option("plotting.backend") + + if backend_str in _backends: + return _backends[backend_str] + + module = _load_backend(backend_str) + _backends[backend_str] = module + return module diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py new file mode 100644 index 00000000..75c61da0 --- /dev/null +++ b/pandas/plotting/_matplotlib/__init__.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas.plotting._matplotlib.boxplot import ( + BoxPlot, + boxplot, + boxplot_frame, + boxplot_frame_groupby, +) +from pandas.plotting._matplotlib.converter import ( + deregister, + register, +) +from pandas.plotting._matplotlib.core import ( + AreaPlot, + BarhPlot, + BarPlot, + HexBinPlot, + LinePlot, + PiePlot, + ScatterPlot, +) +from pandas.plotting._matplotlib.hist import ( + HistPlot, + KdePlot, + hist_frame, + hist_series, +) +from pandas.plotting._matplotlib.misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + lag_plot, + parallel_coordinates, + radviz, + scatter_matrix, +) +from pandas.plotting._matplotlib.tools import table + +if TYPE_CHECKING: + from pandas.plotting._matplotlib.core import MPLPlot + +PLOT_CLASSES: dict[str, type[MPLPlot]] = { + "line": LinePlot, + "bar": BarPlot, + "barh": BarhPlot, + "box": BoxPlot, + "hist": HistPlot, + "kde": KdePlot, + "area": AreaPlot, + "pie": PiePlot, + "scatter": ScatterPlot, + "hexbin": HexBinPlot, +} + + +def plot(data, kind, **kwargs): + # Importing pyplot at the top of the file (before the converters are + # registered) causes problems in matplotlib 2 (converters seem to not + # work) + import matplotlib.pyplot as plt + + if kwargs.pop("reuse_plot", False): + ax = kwargs.get("ax") + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + kwargs["ax"] = getattr(ax, "left_ax", ax) + plot_obj = PLOT_CLASSES[kind](data, **kwargs) + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +__all__ = [ + "plot", + "hist_series", + "hist_frame", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "table", + "andrews_curves", + "autocorrelation_plot", + "bootstrap_plot", + "lag_plot", + "parallel_coordinates", + "radviz", + "scatter_matrix", + "register", + "deregister", +] diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py new file mode 100644 index 00000000..6789485f --- /dev/null +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -0,0 +1,539 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Literal, + NamedTuple, +) +import warnings + +from matplotlib.artist import setp +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_dict_like +from pandas.core.dtypes.missing import remove_na_arraylike + +import pandas as pd +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) +from pandas.plotting._matplotlib.groupby import create_iter_data_given_by +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + maybe_adjust_figure, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.lines import Line2D + + +class BoxPlot(LinePlot): + @property + def _kind(self) -> Literal["box"]: + return "box" + + _layout_type = "horizontal" + + _valid_return_types = (None, "axes", "dict", "both") + + class BP(NamedTuple): + # namedtuple to hold results + ax: Axes + lines: dict[str, list[Line2D]] + + def __init__(self, data, return_type: str = "axes", **kwargs) -> None: + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == "vertical": + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type="axes", **kwds): + if y.ndim == 2: + y = [remove_na_arraylike(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na_arraylike(y) + bp = ax.boxplot(y, **kwds) + + if return_type == "dict": + return bp, bp + elif return_type == "both": + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if "color" in self.kwds: + if self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'", + stacklevel=find_stack_level(), + ) + self.color = self.kwds.pop("color") + + if isinstance(self.color, dict): + valid_keys = ["boxes", "whiskers", "medians", "caps"] + for key in self.color: + if key not in valid_keys: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + self.color = None + + # get standard colors for default + colors = get_standard_colors(num_colors=3, colormap=self.colormap, color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = colors[0] + + def _get_colors(self, num_colors=None, color_kwds="color"): + pass + + def maybe_color_bp(self, bp) -> None: + if isinstance(self.color, dict): + boxes = self.color.get("boxes", self._boxes_c) + whiskers = self.color.get("whiskers", self._whiskers_c) + medians = self.color.get("medians", self._medians_c) + caps = self.color.get("caps", self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + # GH 30346, when users specifying those arguments explicitly, our defaults + # for these four kwargs should be overridden; if not, use Pandas settings + if not self.kwds.get("boxprops"): + setp(bp["boxes"], color=boxes, alpha=1) + if not self.kwds.get("whiskerprops"): + setp(bp["whiskers"], color=whiskers, alpha=1) + if not self.kwds.get("medianprops"): + setp(bp["medians"], color=medians, alpha=1) + if not self.kwds.get("capprops"): + setp(bp["caps"], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + self._return_obj = pd.Series(dtype=object) + + # Re-create iterated data if `by` is assigned by users + data = ( + create_iter_data_given_by(self.data, self._kind) + if self.by is not None + else self.data + ) + + for i, (label, y) in enumerate(self._iter_data(data=data)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + # When by is applied, show title for subplots to know which group it is + # just like df.boxplot, and need to apply T on y to provide right input + if self.by is not None: + y = y.T + ax.set_title(pprint_thing(label)) + + # When `by` is assigned, the ticklabels will become unique grouped + # values, instead of label which is used as subtitle in this case. + ticklabels = [ + pprint_thing(col) for col in self.data.columns.levels[0] + ] + else: + ticklabels = [pprint_thing(label)] + + ret, bp = self._plot( + ax, y, column_num=i, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + self._set_ticklabels(ax, ticklabels) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot( + ax, y, column_num=0, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [left for left, _ in self._iter_data()] + labels = [pprint_thing(left) for left in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax: Axes, labels): + if self.orientation == "vertical": + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + # GH 45465: make sure that the boxplot doesn't ignore xlabel/ylabel + if self.xlabel: + ax.set_xlabel(pprint_thing(self.xlabel)) + if self.ylabel: + ax.set_ylabel(pprint_thing(self.ylabel)) + + @property + def orientation(self): + if self.kwds.get("vert", True): + return "vertical" + else: + return "horizontal" + + @property + def result(self): + if self.return_type is None: + return super().result + else: + return self._return_obj + + +def _grouped_plot_by_column( + plotf, + data, + columns=None, + by=None, + numeric_only=True, + grid=False, + figsize=None, + ax=None, + layout=None, + return_type=None, + **kwargs, +): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = create_subplots( + naxes=naxes, + sharex=kwargs.pop("sharex", True), + sharey=kwargs.pop("sharey", True), + figsize=figsize, + ax=ax, + layout=layout, + ) + + _axes = flatten_axes(axes) + + # GH 45465: move the "by" label based on "vert" + xlabel, ylabel = kwargs.pop("xlabel", None), kwargs.pop("ylabel", None) + if kwargs.get("vert", True): + xlabel = xlabel or by + else: + ylabel = ylabel or by + + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, xlabel=xlabel, ylabel=ylabel, **kwargs) + ax.set_title(col) + ax_values.append(re_plotf) + ax.grid(grid) + + result = pd.Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle(f"Boxplot grouped by {byline}") + maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +def boxplot( + data, + column=None, + by=None, + ax=None, + fontsize=None, + rot: int = 0, + grid: bool = True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + + import matplotlib.pyplot as plt + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + if isinstance(data, pd.Series): + data = data.to_frame("x") + column = "x" + + def _get_colors(): + # num_colors=3 is required as method maybe_color_bp takes the colors + # in positions 0 and 2. + # if colors not provided, use same defaults as DataFrame.plot.box + result = get_standard_colors(num_colors=3) + result = np.take(result, [0, 0, 2]) + result = np.append(result, "k") + + colors = kwds.pop("color", None) + if colors: + if is_dict_like(colors): + # replace colors in result array with user-specified colors + # taken from the colors dict parameter + # "boxes" value placed in position 0, "whiskers" in 1, etc. + valid_keys = ["boxes", "whiskers", "medians", "caps"] + key_to_index = dict(zip(valid_keys, range(4))) + for key, value in colors.items(): + if key in valid_keys: + result[key_to_index[key]] = value + else: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + result.fill(colors) + + return result + + def maybe_color_bp(bp, **kwds): + # GH 30346, when users specifying those arguments explicitly, our defaults + # for these four kwargs should be overridden; if not, use Pandas settings + if not kwds.get("boxprops"): + setp(bp["boxes"], color=colors[0], alpha=1) + if not kwds.get("whiskerprops"): + setp(bp["whiskers"], color=colors[1], alpha=1) + if not kwds.get("medianprops"): + setp(bp["medians"], color=colors[2], alpha=1) + if not kwds.get("capprops"): + setp(bp["caps"], color=colors[3], alpha=1) + + def plot_group(keys, values, ax: Axes, **kwds): + # GH 45465: xlabel/ylabel need to be popped out before plotting happens + xlabel, ylabel = kwds.pop("xlabel", None), kwds.pop("ylabel", None) + if xlabel: + ax.set_xlabel(pprint_thing(xlabel)) + if ylabel: + ax.set_ylabel(pprint_thing(ylabel)) + + keys = [pprint_thing(x) for x in keys] + values = [np.asarray(remove_na_arraylike(v), dtype=object) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis="both", labelsize=fontsize) + + # GH 45465: x/y are flipped when "vert" changes + is_vertical = kwds.get("vert", True) + ticks = ax.get_xticks() if is_vertical else ax.get_yticks() + if len(ticks) != len(keys): + i, remainder = divmod(len(ticks), len(keys)) + assert remainder == 0, remainder + keys *= i + if is_vertical: + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp, **kwds) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == "dict": + return bp + elif return_type == "both": + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column( + plot_group, + data, + columns=columns, + by=by, + grid=grid, + figsize=figsize, + ax=ax, + layout=layout, + return_type=return_type, + **kwds, + ) + else: + if return_type is None: + return_type = "axes" + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + + if ax is None: + rc = {"figure.figsize": figsize} if figsize is not None else {} + with plt.rc_context(rc): + ax = plt.gca() + data = data._get_numeric_data() + naxes = len(data.columns) + if naxes == 0: + raise ValueError( + "boxplot method requires numerical columns, nothing to plot." + ) + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax, **kwds) + ax.grid(grid) + + return result + + +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot: int = 0, + grid: bool = True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + import matplotlib.pyplot as plt + + ax = boxplot( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + grid=grid, + rot=rot, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwds, + ) + plt.draw_if_interactive() + return ax + + +def boxplot_frame_groupby( + grouped, + subplots: bool = True, + column=None, + fontsize=None, + rot: int = 0, + grid: bool = True, + ax=None, + figsize=None, + layout=None, + sharex: bool = False, + sharey: bool = True, + **kwds, +): + if subplots is True: + naxes = len(grouped) + fig, axes = create_subplots( + naxes=naxes, + squeeze=False, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + axes = flatten_axes(axes) + + ret = pd.Series(dtype=object) + + for (key, group), ax in zip(grouped, axes): + d = group.boxplot( + ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds + ) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + else: + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = pd.concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + + # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument + # is assigned, and in this case, since `df` here becomes MI after groupby, + # so we need to couple the keys (grouped values) and column (original df + # column) together to search for subset to plot + if column is not None: + column = com.convert_to_list_like(column) + multi_key = pd.MultiIndex.from_product([keys, column]) + column = list(multi_key.values) + ret = df.boxplot( + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + **kwds, + ) + return ret diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py new file mode 100644 index 00000000..86b218db --- /dev/null +++ b/pandas/plotting/_matplotlib/compat.py @@ -0,0 +1,22 @@ +# being a bit too dynamic +from __future__ import annotations + +import operator + +from pandas.util.version import Version + + +def _mpl_version(version, op): + def inner(): + try: + import matplotlib as mpl + except ImportError: + return False + return op(Version(mpl.__version__), Version(version)) + + return inner + + +mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) +mpl_ge_3_5_0 = _mpl_version("3.5.0", operator.ge) +mpl_ge_3_6_0 = _mpl_version("3.6.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py new file mode 100644 index 00000000..51a08164 --- /dev/null +++ b/pandas/plotting/_matplotlib/converter.py @@ -0,0 +1,1116 @@ +from __future__ import annotations + +import contextlib +import datetime as pydt +from datetime import ( + datetime, + timedelta, + tzinfo, +) +import functools +from typing import ( + TYPE_CHECKING, + Any, + Final, + Iterator, + cast, +) + +from dateutil.relativedelta import relativedelta +import matplotlib.dates as dates +from matplotlib.ticker import ( + AutoLocator, + Formatter, + Locator, +) +from matplotlib.transforms import nonsingular +import matplotlib.units as units +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import ( + Timestamp, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup +from pandas._typing import F + +from pandas.core.dtypes.common import ( + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_nested_list_like, +) + +from pandas import ( + Index, + Series, + get_option, +) +import pandas.core.common as com +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) +import pandas.core.tools.datetimes as tools + +if TYPE_CHECKING: + from pandas._libs.tslibs.offsets import BaseOffset + +# constants +HOURS_PER_DAY: Final = 24.0 +MIN_PER_HOUR: Final = 60.0 +SEC_PER_MIN: Final = 60.0 + +SEC_PER_HOUR: Final = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY: Final = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY: Final = 10**6 * SEC_PER_DAY + +_mpl_units = {} # Cache for units overwritten by us + + +def get_pairs(): + pairs = [ + (Timestamp, DatetimeConverter), + (Period, PeriodConverter), + (pydt.datetime, DatetimeConverter), + (pydt.date, DatetimeConverter), + (pydt.time, TimeConverter), + (np.datetime64, DatetimeConverter), + ] + return pairs + + +def register_pandas_matplotlib_converters(func: F) -> F: + """ + Decorator applying pandas_converters. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with pandas_converters(): + return func(*args, **kwargs) + + return cast(F, wrapper) + + +@contextlib.contextmanager +def pandas_converters() -> Iterator[None]: + """ + Context manager registering pandas' converters for a plot. + + See Also + -------- + register_pandas_matplotlib_converters : Decorator that applies this. + """ + value = get_option("plotting.matplotlib.register_converters") + + if value: + # register for True or "auto" + register() + try: + yield + finally: + if value == "auto": + # only deregister for "auto" + deregister() + + +def register() -> None: + pairs = get_pairs() + for type_, cls in pairs: + # Cache previous converter if present + if type_ in units.registry and not isinstance(units.registry[type_], cls): + previous = units.registry[type_] + _mpl_units[type_] = previous + # Replace with pandas converter + units.registry[type_] = cls() + + +def deregister() -> None: + # Renamed in pandas.plotting.__init__ + for type_, cls in get_pairs(): + # We use type to catch our classes directly, no inheritance + if type(units.registry.get(type_)) is cls: + units.registry.pop(type_) + + # restore the old keys + for unit, formatter in _mpl_units.items(): + if type(formatter) not in {DatetimeConverter, PeriodConverter, TimeConverter}: + # make it idempotent by excluding ours. + units.registry[unit] = formatter + + +def _to_ordinalf(tm: pydt.time) -> float: + tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + tm.microsecond / 10**6 + return tot_sec + + +def time2num(d): + if isinstance(d, str): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError(f"Could not parse time {d}") + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if isinstance(value, valid_types) or is_integer(value) or is_float(value): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis) -> units.AxisInfo | None: + if unit != "time": + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label="time") + + @staticmethod + def default_units(x, axis) -> str: + return "time" + + +# time formatter +class TimeFormatter(Formatter): + def __init__(self, locs) -> None: + self.locs = locs + + def __call__(self, x, pos: int = 0) -> str: + """ + Return the time of day as a formatted string. + + Parameters + ---------- + x : float + The time of day specified as seconds since 00:00 (midnight), + with up to microsecond precision. + pos + Unused + + Returns + ------- + str + A string in HH:MM:SS.mmmuuu format. Microseconds, + milliseconds and seconds are only displayed if non-zero. + """ + fmt = "%H:%M:%S.%f" + s = int(x) + msus = round((x - s) * 10**6) + ms = msus // 1000 + us = msus % 1000 + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + return pydt.time(h, m, s, msus).strftime(fmt) + elif ms != 0: + return pydt.time(h, m, s, msus).strftime(fmt)[:-3] + elif s != 0: + return pydt.time(h, m, s).strftime("%H:%M:%S") + + return pydt.time(h, m).strftime("%H:%M") + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + @staticmethod + def convert(values, units, axis): + if is_nested_list_like(values): + values = [PeriodConverter._convert_1d(v, units, axis) for v in values] + else: + values = PeriodConverter._convert_1d(values, units, axis) + return values + + @staticmethod + def _convert_1d(values, units, axis): + if not hasattr(axis, "freq"): + raise TypeError("Axis must have `freq` set to convert to Periods") + valid_types = (str, datetime, Period, pydt.date, pydt.time, np.datetime64) + if isinstance(values, valid_types) or is_integer(values) or is_float(values): + return get_datevalue(values, axis.freq) + elif isinstance(values, PeriodIndex): + return values.asfreq(axis.freq).asi8 + elif isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + elif lib.infer_dtype(values, skipna=False) == "period": + # https://github.com/pandas-dev/pandas/issues/24304 + # convert ndarray[period] -> PeriodIndex + return PeriodIndex(values, freq=axis.freq).asi8 + elif isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (str, datetime, pydt.date, pydt.time, np.datetime64)): + return Period(date, freq).ordinal + elif ( + is_integer(date) + or is_float(date) + or (isinstance(date, (np.ndarray, Index)) and (date.size == 1)) + ): + return date + elif date is None: + return None + raise ValueError(f"Unrecognizable date '{date}'") + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + @staticmethod + def convert(values, unit, axis): + # values might be a 1-d array, or a list-like of arrays. + if is_nested_list_like(values): + values = [DatetimeConverter._convert_1d(v, unit, axis) for v in values] + else: + values = DatetimeConverter._convert_1d(values, unit, axis) + return values + + @staticmethod + def _convert_1d(values, unit, axis): + def try_parse(values): + try: + return dates.date2num(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date, np.datetime64, pydt.time)): + return dates.date2num(values) + elif is_integer(values) or is_float(values): + return values + elif isinstance(values, str): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index, Series)): + if isinstance(values, Series): + # https://github.com/matplotlib/matplotlib/issues/11391 + # Series was skipped. Convert to DatetimeIndex to get asi8 + values = Index(values) + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com.asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + except Exception: + pass + + values = dates.date2num(values) + + return values + + @staticmethod + def axisinfo(unit: tzinfo | None, axis) -> units.AxisInfo: + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo( + majloc=majloc, majfmt=majfmt, label="", default_limits=(datemin, datemax) + ) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + def __init__(self, locator, tz=None, defaultfmt: str = "%Y-%m-%d") -> None: + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + + +class PandasAutoDateLocator(dates.AutoDateLocator): + def get_locator(self, dmin, dmax): + """Pick the best locator based on a distance.""" + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400.0 + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.axis.set_view_interval(*self.axis.get_view_interval()) + locator.axis.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1.0 / (24 * 3600 * 1000) + + def __init__(self, tz) -> None: + dates.DateLocator.__init__(self, tz) + self._interval = 1.0 + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + # We need to cap at the endpoints of valid datetime + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000.0 + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError( + "MillisecondLocator estimated to generate " + f"{estimate:d} ticks from {dmin} to {dmax}: exceeds Locator.MAXTICKS" + f"* 2 ({self.MAXTICKS * 2:d}) " + ) + + interval = self._get_interval() + freq = f"{interval}L" + tz = self.tz.tzname(None) + st = dmin.replace(tzinfo=None) + ed = dmin.replace(tzinfo=None) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).astype(object) + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + # We need to cap at the endpoints of valid datetime + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz: tzinfo | None = None) -> datetime: + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1_000_000 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime( + dt.year, dt.month, dt.day, int(hour), int(minute), int(second), microsecond + ) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1_000_000 - microsecond) + + return dt + + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears) -> tuple[int, int]: + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates: PeriodIndex, period: str) -> np.ndarray: + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : str + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1 * dates.freq, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags: np.ndarray, vmin: float) -> bool: + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or ( + label_flags.size == 1 and label_flags[0] == 0 and vmin % 1 > 0.0 + ): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq: BaseOffset): + # error: "BaseOffset" has no attribute "_period_dtype_code" + dtype_code = freq._period_dtype_code # type: ignore[attr-defined] + freq_group = FreqGroup.from_period_dtype_code(dtype_code) + + periodsperday = -1 + + if dtype_code >= FreqGroup.FR_HR.value: + if freq_group == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq_group == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq_group == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq_group == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq_group == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq_group == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError(f"unexpected frequency: {dtype_code}") + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq_group == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq_group == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif freq_group == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = ( + Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq), + ) + assert isinstance(vmin, Period) + assert isinstance(vmax, Period) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = period_range(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros( + span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] + ) + info["val"][:] = dates_.asi8 + info["fmt"][:] = "" + info["maj"][[0, -1]] = True + # .. and set some shortcuts + info_maj = info["maj"] + info_min = info["min"] + info_fmt = info["fmt"] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, "day") + month_start = period_break(dates_, "month") + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1 * dates_.freq).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y" + + def _minute_finder(label_interval): + hour_start = period_break(dates_, "hour") + _minute = dates_.minute + _prev_minute = (dates_ - 1 * dates_.freq).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + + def _second_finder(label_interval): + minute_start = period_break(dates_, "minute") + _second = dates_.second + _prev_second = (dates_ - 1 * dates_.freq).second + second_start = (_second - _prev_second) != 0 + info["maj"][minute_start] = True + info["min"][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S" + info_fmt[day_start] = "%H:%M:%S\n%d-%b" + info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y" + + if span < periodsperday / 12000: + _second_finder(1) + elif span < periodsperday / 6000: + _second_finder(2) + elif span < periodsperday / 2400: + _second_finder(5) + elif span < periodsperday / 1200: + _second_finder(10) + elif span < periodsperday / 800: + _second_finder(15) + elif span < periodsperday / 400: + _second_finder(30) + elif span < periodsperday / 150: + _minute_finder(1) + elif span < periodsperday / 70: + _minute_finder(2) + elif span < periodsperday / 24: + _minute_finder(5) + elif span < periodsperday / 12: + _minute_finder(15) + elif span < periodsperday / 6: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[day_start] = "%d" + info_fmt[month_start] = "%d\n%b" + info_fmt[year_start] = "%d\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = "%d\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "%d\n%b\n%Y" + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, "month") + info_maj[month_start] = True + if dtype_code < FreqGroup.FR_HR.value: + info["min"] = True + else: + day_start = period_break(dates_, "day") + info["min"][day_start] = True + week_start = period_break(dates_, "week") + year_start = period_break(dates_, "year") + info_fmt[week_start] = "%d" + info_fmt[month_start] = "\n\n%b" + info_fmt[year_start] = "\n\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = "\n\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "\n\n%b\n%Y" + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + week_start = period_break(dates_, "week") + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = "%b\n%Y" + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + month_start = period_break(dates_, "month") + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = "%Y" + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, "year") + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = "%Y" + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + dates_ = info["val"] + info["fmt"] = "" + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info["maj"] + info_fmt = info["fmt"] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "%b" + info_fmt[year_start] = "%b\n%Y" + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "%b\n%Y" + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info["fmt"][quarter_start] = True + info["min"] = True + + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info["min"][quarter_start] = True + + info_fmt[year_start] = "%Y" + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = "%Y" + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + info_maj = info["maj"] + info_fmt = info["fmt"] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "Q%q" + info_fmt[year_start] = "Q%q\n%F" + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "Q%q\n%F" + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + info_fmt[year_start] = "%F" + + else: + # https://github.com/pandas-dev/pandas/pull/47602 + years = dates_[year_start] // 4 + 1970 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = "%F" + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info["maj"][major_idx] = True + info["min"][(dates_ % min_anndef == 0)] = True + info["fmt"][major_idx] = "%Y" + + return info + + +def get_finder(freq: BaseOffset): + # error: "BaseOffset" has no attribute "_period_dtype_code" + dtype_code = freq._period_dtype_code # type: ignore[attr-defined] + fgroup = FreqGroup.from_period_dtype_code(dtype_code) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif fgroup == FreqGroup.FR_MTH: + return _monthly_finder + elif (dtype_code >= FreqGroup.FR_BUS.value) or fgroup == FreqGroup.FR_WK: + return _daily_finder + else: # pragma: no cover + raise NotImplementedError(f"Unsupported frequency: {dtype_code}") + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : BaseOffset + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__( + self, + freq: BaseOffset, + minor_locator: bool = False, + dynamic_mode: bool = True, + base: int = 1, + quarter: int = 1, + month: int = 1, + day: int = 1, + plot_obj=None, + ) -> None: + freq = to_offset(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + """Returns the default locations of ticks.""" + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator["min"], locator["val"]) + return np.compress(locator["maj"], locator["val"]) + + def __call__(self): + """Return the locations of the ticks.""" + # axis calls Locator.set_axis inside set_m_formatter + + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = list(range(vmin, vmax + 1, base)) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : BaseOffset + Valid frequency specifier. + minor_locator : bool, default False + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : bool, default True + Whether the formatter works in dynamic mode or not. + """ + + def __init__( + self, + freq: BaseOffset, + minor_locator: bool = False, + dynamic_mode: bool = True, + plot_obj=None, + ) -> None: + freq = to_offset(freq) + self.format = None + self.freq = freq + self.locs: list[Any] = [] # unused, for matplotlib compat + self.formatdict: dict[Any, Any] | None = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + """Returns the default ticks spacing.""" + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info["min"] & np.logical_not(info["maj"]), info) + else: + format = np.compress(info["maj"], info) + self.formatdict = {x: f for (x, _, _, f) in format} + return self.formatdict + + def set_locs(self, locs) -> None: + """Sets the locations of the ticks""" + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos: int = 0) -> str: + + if self.formatdict is None: + return "" + else: + fmt = self.formatdict.pop(x, "") + if isinstance(fmt, np.bytes_): + fmt = fmt.decode("utf-8") + period = Period(ordinal=int(x), freq=self.freq) + assert isinstance(period, Period) + return period.strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals: int) -> str: + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 10**9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10 ** (n_decimals - 9)) + s = f"{int(h):02d}:{int(m):02d}:{int(s):02d}" + if n_decimals > 0: + s += f".{decimals:0{n_decimals}d}" + if d != 0: + s = f"{int(d):d} days {s}" + return s + + def __call__(self, x, pos: int = 0) -> str: + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 10**9 / abs(vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py new file mode 100644 index 00000000..af91a8ab --- /dev/null +++ b/pandas/plotting/_matplotlib/core.py @@ -0,0 +1,1856 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Hashable, + Iterable, + Literal, + Sequence, +) +import warnings + +import matplotlib as mpl +from matplotlib.artist import Artist +import numpy as np + +from pandas._typing import ( + IndexLabel, + PlottingOrientation, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_iterator, + is_list_like, + is_number, + is_numeric_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) + +import pandas.core.common as com +from pandas.core.frame import DataFrame + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 +from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters +from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by +from pandas.plotting._matplotlib.misc import unpack_single_str_list +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.timeseries import ( + decorate_axes, + format_dateaxis, + maybe_convert_index, + maybe_resample, + use_dynamic_x, +) +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + format_date_labels, + get_all_lines, + get_xlim, + handle_shared_axes, + table, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.axis import Axis + + +def _color_in_style(style: str) -> bool: + """ + Check if there is a color letter in the style string. + """ + from matplotlib.colors import BASE_COLORS + + return not set(BASE_COLORS).isdisjoint(style) + + +class MPLPlot(ABC): + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + @abstractmethod + def _kind(self) -> str: + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = "vertical" + _default_rot = 0 + + @property + def orientation(self) -> str | None: + return None + + axes: np.ndarray # of Axes objects + + def __init__( + self, + data, + kind=None, + by: IndexLabel | None = None, + subplots: bool | Sequence[Sequence[str]] = False, + sharex=None, + sharey: bool = False, + use_index: bool = True, + figsize=None, + grid=None, + legend: bool | str = True, + rot=None, + ax=None, + fig=None, + title=None, + xlim=None, + ylim=None, + xticks=None, + yticks=None, + xlabel: Hashable | None = None, + ylabel: Hashable | None = None, + sort_columns: bool = False, + fontsize=None, + secondary_y: bool | tuple | list | np.ndarray = False, + colormap=None, + table: bool = False, + layout=None, + include_bool: bool = False, + column: IndexLabel | None = None, + **kwds, + ) -> None: + + import matplotlib.pyplot as plt + + self.data = data + + # if users assign an empty list or tuple, raise `ValueError` + # similar to current `df.box` and `df.hist` APIs. + if by in ([], ()): + raise ValueError("No group keys passed!") + self.by = com.maybe_make_list(by) + + # Assign the rest of columns into self.columns if by is explicitly defined + # while column is not, only need `columns` in hist/box plot when it's DF + # TODO: Might deprecate `column` argument in future PR (#28373) + if isinstance(data, DataFrame): + if column: + self.columns = com.maybe_make_list(column) + else: + if self.by is None: + self.columns = [ + col for col in data.columns if is_numeric_dtype(data[col]) + ] + else: + self.columns = [ + col + for col in data.columns + if col not in self.by and is_numeric_dtype(data[col]) + ] + + # For `hist` plot, need to get grouped original data before `self.data` is + # updated later + if self.by is not None and self._kind == "hist": + self._grouped = data.groupby(unpack_single_str_list(self.by)) + + self.kind = kind + + self.sort_columns = sort_columns + self.subplots = self._validate_subplots_kwarg(subplots) + + if sharex is None: + + # if by is defined, subplots are used and sharex should be False + if ax is None and by is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + self.xlabel = xlabel + self.ylabel = ylabel + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else plt.rcParams["axes.grid"] + + self.grid = grid + self.legend = legend + self.legend_handles: list[Artist] = [] + self.legend_labels: list[Hashable] = [] + + self.logx = kwds.pop("logx", False) + self.logy = kwds.pop("logy", False) + self.loglog = kwds.pop("loglog", False) + self.label = kwds.pop("label", None) + self.style = kwds.pop("style", None) + self.mark_right = kwds.pop("mark_right", True) + self.stacked = kwds.pop("stacked", False) + + self.ax = ax + self.fig = fig + self.axes = np.array([], dtype=object) # "real" version get set in `generate` + + # parse errorbar input if given + xerr = kwds.pop("xerr", None) + yerr = kwds.pop("yerr", None) + self.errors = { + kw: self._parse_errorbars(kw, err) + for kw, err in zip(["xerr", "yerr"], [xerr, yerr]) + } + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, ABCIndex)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if "cmap" in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif "cmap" in kwds: + self.colormap = kwds.pop("cmap") + else: + self.colormap = colormap + + self.table = table + self.include_bool = include_bool + + self.kwds = kwds + + self._validate_color_args() + + def _validate_subplots_kwarg( + self, subplots: bool | Sequence[Sequence[str]] + ) -> bool | list[tuple[int, ...]]: + """ + Validate the subplots parameter + + - check type and content + - check for duplicate columns + - check for invalid column names + - convert column names into indices + - add missing columns in a group of their own + See comments in code below for more details. + + Parameters + ---------- + subplots : subplots parameters as passed to PlotAccessor + + Returns + ------- + validated subplots : a bool or a list of tuples of column indices. Columns + in the same tuple will be grouped together in the resulting plot. + """ + + if isinstance(subplots, bool): + return subplots + elif not isinstance(subplots, Iterable): + raise ValueError("subplots should be a bool or an iterable") + + supported_kinds = ( + "line", + "bar", + "barh", + "hist", + "kde", + "density", + "area", + "pie", + ) + if self._kind not in supported_kinds: + raise ValueError( + "When subplots is an iterable, kind must be " + f"one of {', '.join(supported_kinds)}. Got {self._kind}." + ) + + if isinstance(self.data, ABCSeries): + raise NotImplementedError( + "An iterable subplots for a Series is not supported." + ) + + columns = self.data.columns + if isinstance(columns, ABCMultiIndex): + raise NotImplementedError( + "An iterable subplots for a DataFrame with a MultiIndex column " + "is not supported." + ) + + if columns.nunique() != len(columns): + raise NotImplementedError( + "An iterable subplots for a DataFrame with non-unique column " + "labels is not supported." + ) + + # subplots is a list of tuples where each tuple is a group of + # columns to be grouped together (one ax per group). + # we consolidate the subplots list such that: + # - the tuples contain indices instead of column names + # - the columns that aren't yet in the list are added in a group + # of their own. + # For example with columns from a to g, and + # subplots = [(a, c), (b, f, e)], + # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] + # This way, we can handle self.subplots in a homogeneous manner + # later. + # TODO: also accept indices instead of just names? + + out = [] + seen_columns: set[Hashable] = set() + for group in subplots: + if not is_list_like(group): + raise ValueError( + "When subplots is an iterable, each entry " + "should be a list/tuple of column names." + ) + idx_locs = columns.get_indexer_for(group) + if (idx_locs == -1).any(): + bad_labels = np.extract(idx_locs == -1, group) + raise ValueError( + f"Column label(s) {list(bad_labels)} not found in the DataFrame." + ) + else: + unique_columns = set(group) + duplicates = seen_columns.intersection(unique_columns) + if duplicates: + raise ValueError( + "Each column should be in only one subplot. " + f"Columns {duplicates} were found in multiple subplots." + ) + seen_columns = seen_columns.union(unique_columns) + out.append(tuple(idx_locs)) + + unseen_columns = columns.difference(seen_columns) + for column in unseen_columns: + idx_loc = columns.get_loc(column) + out.append((idx_loc,)) + return out + + def _validate_color_args(self): + if ( + "color" in self.kwds + and self.nseries == 1 + and not is_list_like(self.kwds["color"]) + ): + # support series.plot(color='green') + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds + and isinstance(self.kwds["color"], tuple) + and self.nseries == 1 + and len(self.kwds["color"]) in (3, 4) + ): + # support RGB and RGBA tuples in series plot + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds or "colors" in self.kwds + ) and self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'", + stacklevel=find_stack_level(), + ) + + if "color" in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if _color_in_style(s): + raise ValueError( + "Cannot pass 'style' string with a color symbol and " + "'color' keyword argument. Please use one or the " + "other or pass 'style' without a color symbol" + ) + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + for col, values in data.items(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self) -> int: + + # When `by` is explicitly assigned, grouped data size will be defined, and + # this will determine number of subplots to have, aka `self.nseries` + if self.data.ndim == 1: + return 1 + elif self.by is not None and self._kind == "hist": + return len(self._grouped) + elif self.by is not None and self._kind == "box": + return len(self.columns) + else: + return self.data.shape[1] + + def draw(self) -> None: + self.plt.draw_if_interactive() + + def generate(self) -> None: + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax: Axes) -> bool: + """check whether ax has data""" + return len(ax.lines) != 0 or len(ax.artists) != 0 or len(ax.containers) != 0 + + def _maybe_right_yaxis(self, ax: Axes, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, "right_ax"): + # if it has right_ax property, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, "left_ax"): + # if it has left_ax property, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + + if self.logy is True or self.loglog is True: + new_ax.set_yscale("log") + elif self.logy == "sym" or self.loglog == "sym": + new_ax.set_yscale("symlog") + return new_ax + + def _setup_subplots(self): + if self.subplots: + naxes = ( + self.nseries if isinstance(self.subplots, bool) else len(self.subplots) + ) + fig, axes = create_subplots( + naxes=naxes, + sharex=self.sharex, + sharey=self.sharey, + figsize=self.figsize, + ax=self.ax, + layout=self.layout, + layout_type=self._layout_type, + ) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = flatten_axes(axes) + + valid_log = {False, True, "sym", None} + input_log = {self.logx, self.logy, self.loglog} + if input_log - valid_log: + invalid_log = next(iter(input_log - valid_log)) + raise ValueError( + f"Boolean, None and 'sym' are valid options, '{invalid_log}' is given." + ) + + if self.logx is True or self.loglog is True: + [a.set_xscale("log") for a in axes] + elif self.logx == "sym" or self.loglog == "sym": + [a.set_xscale("symlog") for a in axes] + + if self.logy is True or self.loglog is True: + [a.set_yscale("log") for a in axes] + elif self.logy == "sym" or self.loglog == "sym": + [a.set_yscale("symlog") for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + # error: Argument 1 to "len" has incompatible type "Union[bool, + # Tuple[Any, ...], List[Any], ndarray[Any, Any]]"; expected "Sized" + all_sec = ( + is_list_like(self.secondary_y) + and len(self.secondary_y) == self.nseries # type: ignore[arg-type] + ) + if sec_true or all_sec: + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _convert_to_ndarray(self, data): + # GH31357: categorical columns are processed separately + if is_categorical_dtype(data): + return data + + # GH32073: cast to float if values contain nulled integers + if ( + is_integer_dtype(data.dtype) or is_float_dtype(data.dtype) + ) and is_extension_array_dtype(data.dtype): + return data.to_numpy(dtype="float", na_value=np.nan) + + # GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to + # np.ndarray before plot. + if len(data) > 0: + return np.asarray(data) + + return data + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, ABCSeries): + label = self.label + if label is None and data.name is None: + label = "" + if label is None: + # We'll end up with columns of [0] instead of [None] + data = data.to_frame() + else: + data = data.to_frame(name=label) + elif self._kind in ("hist", "box"): + cols = self.columns if self.by is None else self.columns + self.by + data = data.loc[:, cols] + + # GH15079 reconstruct data if by is defined + if self.by is not None: + self.subplots = True + data = reconstruct_data_with_by(self.data, by=self.by, cols=self.columns) + + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + include_type = [np.number, "datetime", "datetimetz", "timedelta"] + + # GH23719, allow plotting boolean + if self.include_bool is True: + include_type.append(np.bool_) + + # GH22799, exclude datetime-like type for boxplot + exclude_type = None + if self._kind == "box": + # TODO: change after solving issue 27881 + include_type = [np.number] + exclude_type = ["timedelta"] + + # GH 18755, include object and category type for scatter plot + if self._kind == "scatter": + include_type.extend(["object", "category"]) + + numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type) + + try: + is_empty = numeric_data.columns.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no non-numeric frames or series allowed + if is_empty: + raise TypeError("no numeric data to plot") + + self.data = numeric_data.apply(self._convert_to_ndarray) + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + if self.orientation == "vertical" or self.orientation is None: + self._apply_axis_properties(ax.xaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + + elif self.orientation == "horizontal": + self._apply_axis_properties(ax.yaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + handle_shared_axes( + axarr=all_axes, + nplots=len(all_axes), + naxes=nrows * ncols, + nrows=nrows, + ncols=ncols, + sharex=self.sharex, + sharey=self.sharey, + ) + + for ax in self.axes: + ax = getattr(ax, "right_ax", ax) + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + # GH9093, currently Pandas does not show ylabel, so if users provide + # ylabel will set it as ylabel in the plot. + if self.ylabel is not None: + ax.set_ylabel(pprint_thing(self.ylabel)) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + raise ValueError( + "The length of `title` must equal the number " + "of columns if using `title` of type `list` " + "and `subplots=True`.\n" + f"length of title = {len(self.title)}\n" + f"number of columns = {self.nseries}" + ) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ( + "Using `title` of type `list` is not supported " + "unless `subplots=True` is passed" + ) + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis: Axis, rot=None, fontsize=None): + """ + Tick creation within matplotlib is reasonably expensive and is + internally deferred until accessed as Ticks are created/destroyed + multiple times per draw. It's therefore beneficial for us to avoid + accessing unless we will act on the Tick. + """ + if rot is not None or fontsize is not None: + # rot=0 is a valid setting, hence the explicit None check + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self) -> str | None: + if not isinstance(self.data.columns, ABCMultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, self.data.columns.names) + return ",".join(stringified) + + def _mark_right_label(self, label: str, index: int) -> str: + """ + Append ``(right)`` to the label of a line if it's plotted on the right axis. + + Note that ``(right)`` is only appended when ``subplots=False``. + """ + if not self.subplots and self.mark_right and self.on_right(index): + label += " (right)" + return label + + def _append_legend_handles_labels(self, handle: Artist, label: str) -> None: + """ + Append current handle and label to ``legend_handles`` and ``legend_labels``. + + These will be used to make the legend. + """ + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self) -> None: + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = "" + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + # Replace leg.LegendHandles because it misses marker info + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == "reverse": + handles += reversed(self.legend_handles) + labels += reversed(self.legend_labels) + else: + handles += self.legend_handles + labels += self.legend_labels + + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc="best", title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc="best") + + def _get_ax_legend(self, ax: Axes): + """ + Take in axes and return ax and legend under different scenarios + """ + leg = ax.get_legend() + + other_ax = getattr(ax, "left_ax", None) or getattr(ax, "right_ax", None) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + + return plt + + _need_to_set_index = False + + def _get_xticks(self, convert_period: bool = False): + index = self.data.index + is_datetype = index.inferred_type in ("datetime", "date", "datetime64", "time") + + if self.use_index: + if convert_period and isinstance(index, ABCPeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data[notna(self.data.index)] + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = list(range(len(index))) + else: + x = list(range(len(index))) + + return x + + @classmethod + @register_pandas_matplotlib_converters + def _plot( + cls, ax: Axes, x, y: np.ndarray, style=None, is_errorbar: bool = False, **kwds + ): + mask = isna(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, ABCIndex): + x = x._mpl_repr() + + if is_errorbar: + if "xerr" in kwds: + kwds["xerr"] = np.array(kwds.get("xerr")) + if "yerr" in kwds: + kwds["yerr"] = np.array(kwds.get("yerr")) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is unsupported + args = (x, y, style) if style is not None else (x, y) + return ax.plot(*args, **kwds) + + def _get_custom_index_name(self): + """Specify whether xlabel/ylabel should be used to override index name""" + return self.xlabel + + def _get_index_name(self) -> str | None: + if isinstance(self.data.index, ABCMultiIndex): + name = self.data.index.names + if com.any_not_none(*name): + name = ",".join([pprint_thing(x) for x in name]) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + # GH 45145, override the default axis label if one is provided. + index_name = self._get_custom_index_name() + if index_name is not None: + name = pprint_thing(index_name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, "left_ax", ax) + else: + return getattr(ax, "right_ax", ax) + + def _col_idx_to_axis_idx(self, col_idx: int) -> int: + """Return the index of the axis where the column at col_idx should be plotted""" + if isinstance(self.subplots, list): + # Subplots is a list: some columns will be grouped together in the same ax + return next( + group_idx + for (group_idx, group) in enumerate(self.subplots) + if col_idx in group + ) + else: + # subplots is True: one ax per column + return col_idx + + def _get_ax(self, i: int): + # get the twinx ax if appropriate + if self.subplots: + i = self._col_idx_to_axis_idx(i) + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + @classmethod + def get_default_ax(cls, ax) -> None: + import matplotlib.pyplot as plt + + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + ax = cls._get_ax_layer(ax) + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, ABCIndex)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = "color" in kwds or self.colormap is not None + nocolor_style = style is None or not _color_in_style(style) + if (has_color or self.subplots) and nocolor_style: + if isinstance(colors, dict): + kwds["color"] = colors[label] + else: + kwds["color"] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds="color"): + if num_colors is None: + num_colors = self.nseries + + return get_standard_colors( + num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds), + ) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + + Asymmetrical error bars are also supported, however raw error values + must be provided in this case. For a ``N`` length :class:`Series`, a + ``2xN`` array should be provided indicating lower and upper (or left + and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors + should be in a ``Mx2xN`` array. + """ + if err is None: + return None + + def match_labels(data, e): + e = e.reindex(data.index) + return e + + # key-matched DataFrame + if isinstance(err, ABCDataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, ABCSeries): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, str): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if isinstance(self.data, ABCSeries) and err_shape[0] == 2: + err = np.expand_dims(err, 0) + err_shape = err.shape + if err_shape[2] != len(self.data): + raise ValueError( + "Asymmetrical error bars should be provided " + f"with the shape (2, {len(self.data)})" + ) + elif isinstance(self.data, ABCDataFrame) and err.ndim == 3: + if ( + (err_shape[0] != self.nseries) + or (err_shape[1] != 2) + or (err_shape[2] != len(self.data)) + ): + raise ValueError( + "Asymmetrical error bars should be provided " + f"with the shape ({self.nseries}, 2, {len(self.data)})" + ) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = f"No valid {label} detected" + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + errors = {} + + for kw, flag in zip(["xerr", "yerr"], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (ABCDataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + + return [ + ax for ax in self.axes[0].get_figure().get_axes() if isinstance(ax, Subplot) + ] + + def _get_axes_layout(self) -> tuple[int, int]: + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot, ABC): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = "single" + + def __init__(self, data, x, y, **kwargs) -> None: + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + " requires an x and y column") + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + + # Scatter plot allows to plot objects data + if self._kind == "hexbin": + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires x column to be numeric") + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires y column to be numeric") + + self.x = x + self.y = y + + @property + def nseries(self) -> int: + return 1 + + def _post_plot_logic(self, ax: Axes, data): + x, y = self.x, self.y + xlabel = self.xlabel if self.xlabel is not None else pprint_thing(x) + ylabel = self.ylabel if self.ylabel is not None else pprint_thing(y) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + def _plot_colorbar(self, ax: Axes, **kwds): + # Addresses issues #10611 and #10678: + # When plotting scatterplots and hexbinplots in IPython + # inline backend the colorbar axis height tends not to + # exactly match the parent axis height. + # The difference is due to small fractional differences + # in floating points with similar representation. + # To deal with this, this method forces the colorbar + # height to take the height of the parent axes. + # For a more detailed description of the issue + # see the following link: + # https://github.com/ipython/ipython/issues/11215 + + # GH33389, if ax is used multiple times, we should always + # use the last one which contains the latest information + # about the ax + img = ax.collections[-1] + return self.fig.colorbar(img, ax=ax, **kwds) + + +class ScatterPlot(PlanePlot): + @property + def _kind(self) -> Literal["scatter"]: + return "scatter" + + def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + elif is_hashable(s) and s in data.columns: + s = data[s] + super().__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + color_by_categorical = c_is_column and is_categorical_dtype(self.data[c]) + + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError("Specify exactly one of `c` and `color`") + elif c is None and color is None: + c_values = self.plt.rcParams["patch.facecolor"] + elif color is not None: + c_values = color + elif color_by_categorical: + c_values = self.data[c].cat.codes + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.colormap is not None: + if mpl_ge_3_6_0(): + cmap = mpl.colormaps.get_cmap(self.colormap) + else: + cmap = self.plt.cm.get_cmap(self.colormap) + else: + # cmap is only used if c_values are integers, otherwise UserWarning + if is_integer_dtype(c_values): + # pandas uses colormap, matplotlib uses cmap. + cmap = "Greys" + if mpl_ge_3_6_0(): + cmap = mpl.colormaps[cmap] + else: + cmap = self.plt.cm.get_cmap(cmap) + else: + cmap = None + + if color_by_categorical: + from matplotlib import colors + + n_cats = len(self.data[c].cat.categories) + cmap = colors.ListedColormap([cmap(i) for i in range(cmap.N)]) + bounds = np.linspace(0, n_cats, n_cats + 1) + norm = colors.BoundaryNorm(bounds, cmap.N) + else: + norm = self.kwds.pop("norm", None) + # plot colorbar if + # 1. colormap is assigned, and + # 2.`c` is a column containing only numeric values + plot_colorbar = self.colormap or c_is_column + cb = self.kwds.pop("colorbar", is_numeric_dtype(c_values) and plot_colorbar) + + if self.legend and hasattr(self, "label"): + label = self.label + else: + label = None + scatter = ax.scatter( + data[x].values, + data[y].values, + c=c_values, + label=label, + cmap=cmap, + norm=norm, + **self.kwds, + ) + if cb: + cbar_label = c if c_is_column else "" + cbar = self._plot_colorbar(ax, label=cbar_label) + if color_by_categorical: + cbar.set_ticks(np.linspace(0.5, n_cats - 0.5, n_cats)) + cbar.ax.set_yticklabels(self.data[c].cat.categories) + + if label is not None: + self._append_legend_handles_labels(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds["ecolor"] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + + +class HexBinPlot(PlanePlot): + @property + def _kind(self) -> Literal["hexbin"]: + return "hexbin" + + def __init__(self, data, x, y, C=None, **kwargs) -> None: + super().__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or "BuGn" + if mpl_ge_3_6_0(): + cmap = mpl.colormaps.get_cmap(cmap) + else: + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop("colorbar", True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, **self.kwds) + if cb: + self._plot_colorbar(ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _default_rot = 0 + + @property + def orientation(self) -> PlottingOrientation: + return "vertical" + + @property + def _kind(self) -> Literal["line", "area", "hist", "kde", "box"]: + return "line" + + def __init__(self, data, **kwargs) -> None: + from pandas.plotting import plot_params + + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params["x_compat"] + if "x_compat" in self.kwds: + self.x_compat = bool(self.kwds.pop("x_compat")) + + def _is_ts_plot(self) -> bool: + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + return use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + data = maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + # error: Incompatible types in assignment (expression has type + # "Callable[[Any, Any, Any, Any, Any, Any, KwArg(Any)], Any]", variable has + # type "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") + plotf = self._plot # type: ignore[assignment] + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = com.any_not_none(*self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + label = self._mark_right_label(label, index=i) + kwds["label"] = label + + newlines = plotf( + ax, + x, + y, + style=style, + column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds, + ) + self._append_legend_handles_labels(newlines[0], label) + + if self._is_ts_plot(): + + # reset of xlim should be used for ts data + # TODO: GH28021, should find a way to change view limit on xaxis + lines = get_all_lines(ax) + left, right = get_xlim(lines) + ax.set_xlim(left, right) + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, style=None, column_num=None, stacking_id=None, **kwds + ): + # column_num is used to get the target column from plotf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + def _ts_plot(self, ax: Axes, x, data, style=None, **kwds): + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = maybe_resample(data, ax, kwds) + + # Set ax with freq info + decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, "left_ax"): + decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, "right_ax"): + decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, self._kind, kwds)) + + lines = self._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax: Axes, stacking_id, n: int): + if stacking_id is None: + return + if not hasattr(ax, "_stacker_pos_prior"): + ax._stacker_pos_prior = {} + if not hasattr(ax, "_stacker_neg_prior"): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax: Axes, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, "_stacker_pos_prior"): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError( + "When stacked is True, each column must be either " + "all positive or all negative. " + f"Column '{label}' contains both positive and negative values" + ) + + @classmethod + def _update_stacker(cls, ax: Axes, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax: Axes, data): + from matplotlib.ticker import FixedLocator + + def get_label(i): + if is_float(i) and i.is_integer(): + i = int(i) + try: + return pprint_thing(data.index[i]) + except Exception: + return "" + + if self._need_to_set_index: + xticks = ax.get_xticks() + xticklabels = [get_label(x) for x in xticks] + ax.xaxis.set_major_locator(FixedLocator(xticks)) + ax.set_xticklabels(xticklabels) + + # If the index is an irregular time series, then by default + # we rotate the tick labels. The exception is if there are + # subplots which don't share their x-axes, in which we case + # we don't rotate the ticklabels as by default the subplots + # would be too close together. + condition = ( + not self._use_dynamic_x() + and (data.index._is_all_dates and self.use_index) + and (not self.subplots or (self.subplots and self.sharex)) + ) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + @property + def _kind(self) -> Literal["area"]: + return "area" + + def __init__(self, data, **kwargs) -> None: + kwargs.setdefault("stacked", True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault("alpha", 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, + ax: Axes, + x, + y, + style=None, + column_num=None, + stacking_id=None, + is_errorbar: bool = False, + **kwds, + ): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + line_kwds.pop("label") + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if "color" not in kwds: + kwds["color"] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] + return res + + def _post_plot_logic(self, ax: Axes, data): + LinePlot._post_plot_logic(self, ax, data) + + is_shared_y = len(list(ax.get_shared_y_axes())) > 0 + # do not override the default axis behaviour in case of shared y axes + if self.ylim is None and not is_shared_y: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + @property + def _kind(self) -> Literal["bar", "barh"]: + return "bar" + + _default_rot = 90 + + @property + def orientation(self) -> PlottingOrientation: + return "vertical" + + def __init__(self, data, **kwargs) -> None: + # we have to treat a series differently than a + # 1-column DataFrame w.r.t. color handling + self._is_series = isinstance(data, ABCSeries) + self.bar_width = kwargs.pop("width", 0.5) + pos = kwargs.pop("position", 0.5) + kwargs.setdefault("align", "center") + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop("bottom", 0) + self.left = kwargs.pop("left", 0) + + self.log = kwargs.pop("log", False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs["align"] == "edge": + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs["align"] == "edge": + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, w, start=0, log=False, **kwds + ): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + if self._is_series: + kwds["color"] = colors + elif isinstance(colors, dict): + kwds["color"] = colors[label] + else: + kwds["color"] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + label = self._mark_right_label(label, index=i) + + if (("yerr" in kwds) or ("xerr" in kwds)) and (kwds.get("ecolor") is None): + kwds["ecolor"] = mpl.rcParams["xtick.color"] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot( + ax, + self.ax_pos + (i + 0.5) * w, + y, + w, + start=start, + label=label, + log=self.log, + **kwds, + ) + self._append_legend_handles_labels(rect, label) + + def _post_plot_logic(self, ax: Axes, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, self._get_index_name(), str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax: Axes, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + + if self.xticks is not None: + ax.set_xticks(np.array(self.xticks)) + else: + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + @property + def _kind(self) -> Literal["barh"]: + return "barh" + + _default_rot = 0 + + @property + def orientation(self) -> Literal["horizontal"]: + return "horizontal" + + @property + def _start_base(self): + return self.left + + # error: Signature of "_plot" incompatible with supertype "MPLPlot" + @classmethod + def _plot( # type: ignore[override] + cls, ax: Axes, x, y, w, start=0, log=False, **kwds + ): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _get_custom_index_name(self): + return self.ylabel + + def _decorate_ticks(self, ax: Axes, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + ax.set_xlabel(self.xlabel) + + +class PiePlot(MPLPlot): + @property + def _kind(self) -> Literal["pie"]: + return "pie" + + _layout_type = "horizontal" + + def __init__(self, data, kind=None, **kwargs) -> None: + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError(f"{self._kind} plot doesn't allow negative values") + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors(num_colors=len(self.data), color_kwds="colors") + self.kwds.setdefault("colors", colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return "" + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop("labels", idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(left, value) for left, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get("autopct", None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._append_legend_handles_labels(p, l) diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py new file mode 100644 index 00000000..17a21429 --- /dev/null +++ b/pandas/plotting/_matplotlib/groupby.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import numpy as np + +from pandas._typing import ( + Dict, + IndexLabel, +) + +from pandas.core.dtypes.missing import remove_na_arraylike + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, +) + +from pandas.plotting._matplotlib.misc import unpack_single_str_list + + +def create_iter_data_given_by( + data: DataFrame, kind: str = "hist" +) -> Dict[str, DataFrame | Series]: + """ + Create data for iteration given `by` is assigned or not, and it is only + used in both hist and boxplot. + + If `by` is assigned, return a dictionary of DataFrames in which the key of + dictionary is the values in groups. + If `by` is not assigned, return input as is, and this preserves current + status of iter_data. + + Parameters + ---------- + data : reformatted grouped data from `_compute_plot_data` method. + kind : str, plot kind. This function is only used for `hist` and `box` plots. + + Returns + ------- + iter_data : DataFrame or Dictionary of DataFrames + + Examples + -------- + If `by` is assigned: + + >>> import numpy as np + >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')] + >>> mi = MultiIndex.from_tuples(tuples) + >>> value = [[1, 3, np.nan, np.nan], + ... [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]] + >>> data = DataFrame(value, columns=mi) + >>> create_iter_data_given_by(data) + {'h1': h1 + a b + 0 1.0 3.0 + 1 3.0 4.0 + 2 NaN NaN, 'h2': h2 + a b + 0 NaN NaN + 1 NaN NaN + 2 5.0 6.0} + """ + + # For `hist` plot, before transformation, the values in level 0 are values + # in groups and subplot titles, and later used for column subselection and + # iteration; For `box` plot, values in level 1 are column names to show, + # and are used for iteration and as subplots titles. + if kind == "hist": + level = 0 + else: + level = 1 + + # Select sub-columns based on the value of level of MI, and if `by` is + # assigned, data must be a MI DataFrame + assert isinstance(data.columns, MultiIndex) + return { + col: data.loc[:, data.columns.get_level_values(level) == col] + for col in data.columns.levels[level] + } + + +def reconstruct_data_with_by( + data: DataFrame, by: IndexLabel, cols: IndexLabel +) -> DataFrame: + """ + Internal function to group data, and reassign multiindex column names onto the + result in order to let grouped data be used in _compute_plot_data method. + + Parameters + ---------- + data : Original DataFrame to plot + by : grouped `by` parameter selected by users + cols : columns of data set (excluding columns used in `by`) + + Returns + ------- + Output is the reconstructed DataFrame with MultiIndex columns. The first level + of MI is unique values of groups, and second level of MI is the columns + selected by users. + + Examples + -------- + >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]} + >>> df = DataFrame(d) + >>> reconstruct_data_with_by(df, by='h', cols=['a', 'b']) + h1 h2 + a b a b + 0 1.0 3.0 NaN NaN + 1 3.0 4.0 NaN NaN + 2 NaN NaN 5.0 6.0 + """ + by_modified = unpack_single_str_list(by) + grouped = data.groupby(by_modified) + + data_list = [] + for key, group in grouped: + # error: List item 1 has incompatible type "Union[Hashable, + # Sequence[Hashable]]"; expected "Iterable[Hashable]" + columns = MultiIndex.from_product([[key], cols]) # type: ignore[list-item] + sub_group = group[cols] + sub_group.columns = columns + data_list.append(sub_group) + + data = concat(data_list, axis=1) + return data + + +def reformat_hist_y_given_by( + y: Series | np.ndarray, by: IndexLabel | None +) -> Series | np.ndarray: + """Internal function to reformat y given `by` is applied or not for hist plot. + + If by is None, input y is 1-d with NaN removed; and if by is not None, groupby + will take place and input y is multi-dimensional array. + """ + if by is not None and len(y.shape) > 1: + return np.array([remove_na_arraylike(col) for col in y.T]).T + return remove_na_arraylike(y) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py new file mode 100644 index 00000000..d69f68d9 --- /dev/null +++ b/pandas/plotting/_matplotlib/hist.py @@ -0,0 +1,531 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Literal, +) + +import numpy as np + +from pandas._typing import PlottingOrientation + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) +from pandas.plotting._matplotlib.groupby import ( + create_iter_data_given_by, + reformat_hist_y_given_by, +) +from pandas.plotting._matplotlib.misc import unpack_single_str_list +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + maybe_adjust_figure, + set_ticks_props, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + from pandas import DataFrame + + +class HistPlot(LinePlot): + @property + def _kind(self) -> Literal["hist", "kde"]: + return "hist" + + def __init__( + self, + data, + bins: int | np.ndarray | list[np.ndarray] = 10, + bottom: int | np.ndarray = 0, + **kwargs, + ) -> None: + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + # calculate bin number separately in different subplots + # where subplots are created based on by argument + if is_integer(self.bins): + if self.by is not None: + by_modified = unpack_single_str_list(self.by) + grouped = self.data.groupby(by_modified)[self.columns] + self.bins = [self._calculate_bins(group) for key, group in grouped] + else: + self.bins = self._calculate_bins(self.data) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + def _calculate_bins(self, data: DataFrame) -> np.ndarray: + """Calculate bins given data""" + nd_values = data._convert(datetime=True)._get_numeric_data() + values = np.ravel(nd_values) + values = values[~isna(values)] + + hist, bins = np.histogram( + values, bins=self.bins, range=self.kwds.get("range", None) + ) + return bins + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bins=None, + bottom=0, + column_num=0, + stacking_id=None, + **kwds, + ): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + + base = np.zeros(len(bins) - 1) + bottom = bottom + cls._get_stacked_values(ax, stacking_id, base, kwds["label"]) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + # Re-create iterated data if `by` is assigned by users + data = ( + create_iter_data_given_by(self.data, self._kind) + if self.by is not None + else self.data + ) + + for i, (label, y) in enumerate(self._iter_data(data=data)): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + label = self._mark_right_label(label, index=i) + kwds["label"] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds["style"] = style + + kwds = self._make_plot_keywords(kwds, y) + + # the bins is multi-dimension array now and each plot need only 1-d and + # when by is applied, label should be columns that are grouped + if self.by is not None: + kwds["bins"] = kwds["bins"][i] + kwds["label"] = self.columns + kwds.pop("color") + + y = reformat_hist_y_given_by(y, self.by) + + # We allow weights to be a multi-dimensional array, e.g. a (10, 2) array, + # and each sub-array (10,) will be called in each iteration. If users only + # provide 1D array, we assume the same weights is used for all iterations + weights = kwds.get("weights", None) + if weights is not None and np.ndim(weights) != 1: + kwds["weights"] = weights[:, i] + + artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds) + + # when by is applied, show title for subplots to know which group it is + if self.by is not None: + ax.set_title(pprint_thing(label)) + + self._append_legend_handles_labels(artists[0], label) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds["bottom"] = self.bottom + kwds["bins"] = self.bins + return kwds + + def _post_plot_logic(self, ax: Axes, data): + if self.orientation == "horizontal": + ax.set_xlabel("Frequency") + else: + ax.set_ylabel("Frequency") + + @property + def orientation(self) -> PlottingOrientation: + if self.kwds.get("orientation", None) == "horizontal": + return "horizontal" + else: + return "vertical" + + +class KdePlot(HistPlot): + @property + def _kind(self) -> Literal["kde"]: + return "kde" + + @property + def orientation(self) -> Literal["vertical"]: + return "vertical" + + def __init__(self, data, bw_method=None, ind=None, **kwargs) -> None: + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + 1000, + ) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + self.ind, + ) + else: + ind = self.ind + return ind + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bw_method=None, + ind=None, + column_num=None, + stacking_id=None, + **kwds, + ): + from scipy.stats import gaussian_kde + + y = remove_na_arraylike(y) + gkde = gaussian_kde(y, bw_method=bw_method) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds["bw_method"] = self.bw_method + kwds["ind"] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel("Density") + + +def _grouped_plot( + plotf, + data, + column=None, + by=None, + numeric_only=True, + figsize=None, + sharex=True, + sharey=True, + layout=None, + rot=0, + ax=None, + **kwargs, +): + + if figsize == "default": + # allowed to specify mpl default with 'default' + raise ValueError( + "figsize='default' is no longer supported. " + "Specify figure size by tuple instead" + ) + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = create_subplots( + naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout + ) + + _axes = flatten_axes(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, ABCDataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_hist( + data, + column=None, + by=None, + ax=None, + bins=50, + figsize=None, + layout=None, + sharex=False, + sharey=False, + rot=90, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + legend=False, + **kwargs, +): + """ + Grouped histogram + + Parameters + ---------- + data : Series/DataFrame + column : object, optional + by : object, optional + ax : axes, optional + bins : int, default 50 + figsize : tuple, optional + layout : optional + sharex : bool, default False + sharey : bool, default False + rot : int, default 90 + grid : bool, default True + legend: : bool, default False + kwargs : dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + collection of Matplotlib Axes + """ + if legend: + assert "label" not in kwargs + if data.ndim == 1: + kwargs["label"] = data.name + elif column is None: + kwargs["label"] = data.columns + else: + kwargs["label"] = column + + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + if legend: + ax.legend() + + if xrot is None: + xrot = rot + + fig, axes = _grouped_plot( + plot_group, + data, + column=column, + by=by, + sharex=sharex, + sharey=sharey, + ax=ax, + figsize=figsize, + layout=layout, + rot=rot, + ) + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + maybe_adjust_figure( + fig, bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3 + ) + return axes + + +def hist_series( + self, + by=None, + ax=None, + grid: bool = True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins: int = 10, + legend: bool = False, + **kwds, +): + import matplotlib.pyplot as plt + + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") + + if by is None: + if kwds.get("layout", None) is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop( + "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) + ) + if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError("passed axis not bound to passed figure") + values = self.dropna().values + if legend: + kwds["label"] = self.name + ax.hist(values, bins=bins, **kwds) + if legend: + ax.legend() + ax.grid(grid) + axes = np.array([ax]) + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + else: + if "figure" in kwds: + raise ValueError( + "Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance will be created" + ) + axes = _grouped_hist( + self, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + legend=legend, + **kwds, + ) + + if hasattr(axes, "ndim"): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def hist_frame( + data, + column=None, + by=None, + grid: bool = True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex: bool = False, + sharey: bool = False, + figsize=None, + layout=None, + bins: int = 10, + legend: bool = False, + **kwds, +): + if legend and "label" in kwds: + raise ValueError("Cannot use both legend and label") + if by is not None: + axes = _grouped_hist( + data, + column=column, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + sharex=sharex, + sharey=sharey, + layout=layout, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + legend=legend, + **kwds, + ) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, ABCIndex)): + column = [column] + data = data[column] + # GH32590 + data = data.select_dtypes( + include=(np.number, "datetime64", "datetimetz"), exclude="timedelta" + ) + naxes = len(data.columns) + + if naxes == 0: + raise ValueError( + "hist method requires numerical or datetime columns, nothing to plot." + ) + + fig, axes = create_subplots( + naxes=naxes, + ax=ax, + squeeze=False, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + _axes = flatten_axes(axes) + + can_set_label = "label" not in kwds + + for i, col in enumerate(data.columns): + ax = _axes[i] + if legend and can_set_label: + kwds["label"] = col + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + if legend: + ax.legend() + + set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + maybe_adjust_figure(fig, wspace=0.3, hspace=0.3) + + return axes diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py new file mode 100644 index 00000000..633cb636 --- /dev/null +++ b/pandas/plotting/_matplotlib/misc.py @@ -0,0 +1,484 @@ +from __future__ import annotations + +import random +from typing import ( + TYPE_CHECKING, + Hashable, +) + +import matplotlib.lines as mlines +import matplotlib.patches as patches +import numpy as np + +from pandas.core.dtypes.missing import notna + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import ( + create_subplots, + do_adjust_figure, + maybe_adjust_figure, + set_ticks_props, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.figure import Figure + + from pandas import ( + DataFrame, + Index, + Series, + ) + + +def scatter_matrix( + frame: DataFrame, + alpha: float = 0.5, + figsize=None, + ax=None, + grid: bool = False, + diagonal: str = "hist", + marker: str = ".", + density_kwds=None, + hist_kwds=None, + range_padding: float = 0.05, + **kwds, +): + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) + + # no gaps between subplots + maybe_adjust_figure(fig, wspace=0, hspace=0) + + mask = notna(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault("edgecolors", "none") + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2 + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in enumerate(df.columns): + for j, b in enumerate(df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == "hist": + ax.hist(values, **hist_kwds) + + elif diagonal in ("kde", "density"): + from scipy.stats import gaussian_kde + + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter( + df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds + ) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + if marker not in mlines.lineMarkers: + return "o" + return marker + + +def radviz( + frame: DataFrame, + class_column, + ax: Axes | None = None, + color=None, + colormap=None, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca() + ax.set_xlim(-1, 1) + ax.set_ylim(-1, 1) + + to_plot: dict[Hashable, list[list]] = {} + colors = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array( + [(np.cos(t), np.sin(t)) for t in [2 * np.pi * (i / m) for i in range(m)]] + ) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter( + to_plot[kls][0], + to_plot[kls][1], + color=colors[i], + label=pprint_thing(kls), + **kwds, + ) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor="none")) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor="gray")) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text( + xy[0] - 0.025, xy[1] - 0.025, name, ha="right", va="top", size="small" + ) + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] - 0.025, + xy[1] + 0.025, + name, + ha="right", + va="bottom", + size="small", + ) + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text( + xy[0] + 0.025, xy[1] - 0.025, name, ha="left", va="top", size="small" + ) + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] + 0.025, xy[1] + 0.025, name, ha="left", va="bottom", size="small" + ) + + ax.axis("equal") + return ax + + +def andrews_curves( + frame: DataFrame, + class_column, + ax: Axes | None = None, + samples: int = 200, + color=None, + colormap=None, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / np.sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs = np.resize(coeffs, (int((coeffs.size + 1) / 2), 2)) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum( + coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0, + ) + return result + + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-np.pi, np.pi, samples) + used_legends: set[str] = set() + + color_values = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca() + ax.set_xlim(-np.pi, np.pi) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc="upper right") + ax.grid() + return ax + + +def bootstrap_plot( + series: Series, + fig: Figure | None = None, + size: int = 50, + samples: int = 500, + **kwds, +) -> Figure: + + import matplotlib.pyplot as plt + + # TODO: is the failure mentioned below still relevant? + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array( + [(min(sampling) + max(sampling)) * 0.5 for sampling in samplings] + ) + if fig is None: + fig = plt.figure() + x = list(range(samples)) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + if do_adjust_figure(fig): + plt.tight_layout() + return fig + + +def parallel_coordinates( + frame: DataFrame, + class_column, + cols=None, + ax: Axes | None = None, + color=None, + use_columns: bool = False, + xticks=None, + colormap=None, + axvlines: bool = True, + axvlines_kwds=None, + sort_labels: bool = False, + **kwds, +) -> Axes: + import matplotlib.pyplot as plt + + if axvlines_kwds is None: + axvlines_kwds = {"linewidth": 1, "color": "black"} + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends: set[str] = set() + + ncols = len(df.columns) + + # determine values to use for xticks + x: list[int] | Index + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError("Columns must be numeric to be used as xticks") + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError("xticks specified must be numeric") + elif len(xticks) != ncols: + raise ValueError("Length of xticks must match number of columns") + x = xticks + else: + x = list(range(ncols)) + + if ax is None: + ax = plt.gca() + + color_values = get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc="upper right") + ax.grid() + return ax + + +def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: + # workaround because `c='b'` is hardcoded in matplotlib's scatter method + import matplotlib.pyplot as plt + + kwds.setdefault("c", plt.rcParams["patch.facecolor"]) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel(f"y(t + {lag})") + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwds) -> Axes: + import matplotlib.pyplot as plt + + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca() + ax.set_xlim(1, n) + ax.set_ylim(-1.0, 1.0) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / n + + def r(h): + return ((data[: n - h] - mean) * (data[h:] - mean)).sum() / n / c0 + + x = np.arange(n) + 1 + y = [r(loc) for loc in x] + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle="--", color="grey") + ax.axhline(y=z95 / np.sqrt(n), color="grey") + ax.axhline(y=0.0, color="black") + ax.axhline(y=-z95 / np.sqrt(n), color="grey") + ax.axhline(y=-z99 / np.sqrt(n), linestyle="--", color="grey") + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if "label" in kwds: + ax.legend() + ax.grid() + return ax + + +def unpack_single_str_list(keys): + # GH 42795 + if isinstance(keys, list) and len(keys) == 1: + keys = keys[0] + return keys diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py new file mode 100644 index 00000000..d462fdb5 --- /dev/null +++ b/pandas/plotting/_matplotlib/style.py @@ -0,0 +1,284 @@ +from __future__ import annotations + +import itertools +from typing import ( + TYPE_CHECKING, + Collection, + Iterator, + Sequence, + Union, + cast, +) +import warnings + +import matplotlib as mpl +import matplotlib.cm as cm +import matplotlib.colors +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com + +from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 + +if TYPE_CHECKING: + from matplotlib.colors import Colormap + + +Color = Union[str, Sequence[float]] + + +def get_standard_colors( + num_colors: int, + colormap: Colormap | None = None, + color_type: str = "default", + color: dict[str, Color] | Color | Collection[Color] | None = None, +): + """ + Get standard colors based on `colormap`, `color_type` or `color` inputs. + + Parameters + ---------- + num_colors : int + Minimum number of colors to be returned. + Ignored if `color` is a dictionary. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap` are not None. + color : dict or str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a dictionary, or a single color (single color string, + or sequence of floats representing a single color), + or a sequence of colors. + + Returns + ------- + dict or list + Standard colors. Can either be a mapping if `color` was a dictionary, + or a list of colors with a length of `num_colors` or more. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ + if isinstance(color, dict): + return color + + colors = _derive_colors( + color=color, + colormap=colormap, + color_type=color_type, + num_colors=num_colors, + ) + + return list(_cycle_colors(colors, num_colors=num_colors)) + + +def _derive_colors( + *, + color: Color | Collection[Color] | None, + colormap: str | Colormap | None, + color_type: str, + num_colors: int, +) -> list[Color]: + """ + Derive colors from either `colormap`, `color_type` or `color` inputs. + + Get a list of colors either from `colormap`, or from `color`, + or from `color_type` (if both `colormap` and `color` are None). + + Parameters + ---------- + color : str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a single color (single color string, or sequence of floats + representing a single color), or a sequence of colors. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap`` are not None. + num_colors : int + Number of colors to be extracted. + + Returns + ------- + list + List of colors extracted. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ + if color is None and colormap is not None: + return _get_colors_from_colormap(colormap, num_colors=num_colors) + elif color is not None: + if colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'", + stacklevel=find_stack_level(), + ) + return _get_colors_from_color(color) + else: + return _get_colors_from_color_type(color_type, num_colors=num_colors) + + +def _cycle_colors(colors: list[Color], num_colors: int) -> Iterator[Color]: + """Cycle colors until achieving max of `num_colors` or length of `colors`. + + Extra colors will be ignored by matplotlib if there are more colors + than needed and nothing needs to be done here. + """ + max_colors = max(num_colors, len(colors)) + yield from itertools.islice(itertools.cycle(colors), max_colors) + + +def _get_colors_from_colormap( + colormap: str | Colormap, + num_colors: int, +) -> list[Color]: + """Get colors from colormap.""" + cmap = _get_cmap_instance(colormap) + return [cmap(num) for num in np.linspace(0, 1, num=num_colors)] + + +def _get_cmap_instance(colormap: str | Colormap) -> Colormap: + """Get instance of matplotlib colormap.""" + if isinstance(colormap, str): + cmap = colormap + if mpl_ge_3_6_0(): + colormap = mpl.colormaps[colormap] + else: + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError(f"Colormap {cmap} is not recognized") + return colormap + + +def _get_colors_from_color( + color: Color | Collection[Color], +) -> list[Color]: + """Get colors from user input color.""" + if len(color) == 0: + raise ValueError(f"Invalid color argument: {color}") + + if _is_single_color(color): + color = cast(Color, color) + return [color] + + color = cast(Collection[Color], color) + return list(_gen_list_of_colors_from_iterable(color)) + + +def _is_single_color(color: Color | Collection[Color]) -> bool: + """Check if `color` is a single color, not a sequence of colors. + + Single color is of these kinds: + - Named color "red", "C0", "firebrick" + - Alias "g" + - Sequence of floats, such as (0.1, 0.2, 0.3) or (0.1, 0.2, 0.3, 0.4). + + See Also + -------- + _is_single_string_color + """ + if isinstance(color, str) and _is_single_string_color(color): + # GH #36972 + return True + + if _is_floats_color(color): + return True + + return False + + +def _gen_list_of_colors_from_iterable(color: Collection[Color]) -> Iterator[Color]: + """ + Yield colors from string of several letters or from collection of colors. + """ + for x in color: + if _is_single_color(x): + yield x + else: + raise ValueError(f"Invalid color {x}") + + +def _is_floats_color(color: Color | Collection[Color]) -> bool: + """Check if color comprises a sequence of floats representing color.""" + return bool( + is_list_like(color) + and (len(color) == 3 or len(color) == 4) + and all(isinstance(x, (int, float)) for x in color) + ) + + +def _get_colors_from_color_type(color_type: str, num_colors: int) -> list[Color]: + """Get colors from user input color type.""" + if color_type == "default": + return _get_default_colors(num_colors) + elif color_type == "random": + return _get_random_colors(num_colors) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + +def _get_default_colors(num_colors: int) -> list[Color]: + """Get `num_colors` of default colors from matplotlib rc params.""" + import matplotlib.pyplot as plt + + colors = [c["color"] for c in plt.rcParams["axes.prop_cycle"]] + return colors[0:num_colors] + + +def _get_random_colors(num_colors: int) -> list[Color]: + """Get `num_colors` of random colors.""" + return [_random_color(num) for num in range(num_colors)] + + +def _random_color(column: int) -> list[float]: + """Get a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = com.random_state(column) + return rs.rand(3).tolist() + + +def _is_single_string_color(color: Color) -> bool: + """Check if `color` is a single string color. + + Examples of single string colors: + - 'r' + - 'g' + - 'red' + - 'green' + - 'C3' + - 'firebrick' + + Parameters + ---------- + color : Color + Color string or sequence of floats. + + Returns + ------- + bool + True if `color` looks like a valid color. + False otherwise. + """ + conv = matplotlib.colors.ColorConverter() + try: + conv.to_rgba(color) + except ValueError: + return False + else: + return True diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py new file mode 100644 index 00000000..06aac478 --- /dev/null +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -0,0 +1,336 @@ +# TODO: Use the fact that axis can have units to simplify the process + +from __future__ import annotations + +from datetime import timedelta +import functools +from typing import ( + TYPE_CHECKING, + cast, +) + +import numpy as np + +from pandas._libs.tslibs import ( + BaseOffset, + Period, + to_offset, +) +from pandas._libs.tslibs.dtypes import FreqGroup + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCPeriodIndex, + ABCTimedeltaIndex, +) + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.converter import ( + TimeSeries_DateFormatter, + TimeSeries_DateLocator, + TimeSeries_TimedeltaFormatter, +) +from pandas.tseries.frequencies import ( + get_period_alias, + is_subperiod, + is_superperiod, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + + from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + ) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def maybe_resample(series: Series, ax: Axes, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError("Cannot use dynamic axis without frequency info") + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, ABCDatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + # error: "Index" has no attribute "asfreq" + series.index = series.index.asfreq( # type: ignore[attr-defined] + ax_freq, how="s" + ) + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop("how", "last") + series = getattr(series.resample("D"), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + else: # pragma: no cover + raise ValueError("Incompatible frequency conversion") + return freq, series + + +def _is_sub(f1: str, f2: str) -> bool: + return (f1.startswith("W") and is_subperiod("D", f2)) or ( + f2.startswith("W") and is_subperiod(f1, "D") + ) + + +def _is_sup(f1: str, f2: str) -> bool: + return (f1.startswith("W") and is_superperiod("D", f2)) or ( + f2.startswith("W") and is_superperiod(f1, "D") + ) + + +def _upsample_others(ax: Axes, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, "left_ax"): + other_ax = ax.left_ax + if hasattr(ax, "right_ax"): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if legend is not None and kwargs.get("legend", True) and len(lines) > 0: + title = legend.get_title().get_text() + if title == "None": + title = None + ax.legend(lines, labels, loc="best", title=title) + + +def _replot_ax(ax: Axes, freq, kwargs): + data = getattr(ax, "_plot_data", None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how="S") + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, str): + from pandas.plotting._matplotlib import PLOT_CLASSES + + plotf = PLOT_CLASSES[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def decorate_axes(ax: Axes, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, "_plot_data"): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, "legendlabels"): + ax.legendlabels = [kwargs.get("label", None)] + else: + ax.legendlabels.append(kwargs.get("label", None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax: Axes): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, "freq", None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, "left_ax"): + ax_freq = getattr(ax.left_ax, "freq", None) + elif hasattr(ax, "right_ax"): + ax_freq = getattr(ax.right_ax, "freq", None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, "freq", None) + if ax_freq is not None: + break + return ax_freq + + +def _get_period_alias(freq: timedelta | BaseOffset | str) -> str | None: + freqstr = to_offset(freq).rule_code + + return get_period_alias(freqstr) + + +def _get_freq(ax: Axes, series: Series): + # get frequency from data + freq = getattr(series.index, "freq", None) + if freq is None: + freq = getattr(series.index, "inferred_freq", None) + freq = to_offset(freq) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + freq = _get_period_alias(freq) + return freq, ax_freq + + +def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool: + freq = _get_index_freq(data.index) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + freq_str = _get_period_alias(freq) + + if freq_str is None: + return False + + # FIXME: hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, ABCDatetimeIndex): + # error: "BaseOffset" has no attribute "_period_dtype_code" + base = to_offset(freq_str)._period_dtype_code # type: ignore[attr-defined] + x = data.index + if base <= FreqGroup.FR_DAY.value: + return x[:1].is_normalized + period = Period(x[0], freq_str) + assert isinstance(period, Period) + return period.to_timestamp().tz_localize(x.tz) == x[0] + return True + + +def _get_index_freq(index: Index) -> BaseOffset | None: + freq = getattr(index, "freq", None) + if freq is None: + freq = getattr(index, "inferred_freq", None) + if freq == "B": + # error: "Index" has no attribute "dayofweek" + weekdays = np.unique(index.dayofweek) # type: ignore[attr-defined] + if (5 in weekdays) or (6 in weekdays): + freq = None + + freq = to_offset(freq) + return freq + + +def maybe_convert_index(ax: Axes, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, (ABCDatetimeIndex, ABCPeriodIndex)): + freq: str | BaseOffset | None = data.index.freq + + if freq is None: + # We only get here for DatetimeIndex + data.index = cast("DatetimeIndex", data.index) + freq = data.index.inferred_freq + freq = to_offset(freq) + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError("Could not get frequency alias for plotting") + + freq_str = _get_period_alias(freq) + + if isinstance(data.index, ABCDatetimeIndex): + data = data.tz_localize(None).to_period(freq=freq_str) + elif isinstance(data.index, ABCPeriodIndex): + data.index = data.index.asfreq(freq=freq_str) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + + +def _format_coord(freq, t, y) -> str: + time_period = Period(ordinal=int(t), freq=freq) + return f"t = {time_period} y = {y:8f}" + + +def format_dateaxis(subplot, freq, index) -> None: + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + from matplotlib import pylab + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, ABCPeriodIndex): + + majlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = functools.partial(_format_coord, freq) + + elif isinstance(index, ABCTimedeltaIndex): + subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter()) + else: + raise TypeError("index type not supported") + + pylab.draw_if_interactive() diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py new file mode 100644 index 00000000..1925dd8c --- /dev/null +++ b/pandas/plotting/_matplotlib/tools.py @@ -0,0 +1,492 @@ +# being a bit too dynamic +from __future__ import annotations + +from math import ceil +from typing import ( + TYPE_CHECKING, + Iterable, + Sequence, +) +import warnings + +import matplotlib.table +import matplotlib.ticker as ticker +import numpy as np + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) + +from pandas.plotting._matplotlib import compat + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.axis import Axis + from matplotlib.figure import Figure + from matplotlib.lines import Line2D + from matplotlib.table import Table + + from pandas import ( + DataFrame, + Series, + ) + + +def do_adjust_figure(fig: Figure): + """Whether fig has constrained_layout enabled.""" + if not hasattr(fig, "get_constrained_layout"): + return False + return not fig.get_constrained_layout() + + +def maybe_adjust_figure(fig: Figure, *args, **kwargs): + """Call fig.subplots_adjust unless fig has constrained_layout enabled.""" + if do_adjust_figure(fig): + fig.subplots_adjust(*args, **kwargs) + + +def format_date_labels(ax: Axes, rot) -> None: + # mini version of autofmt_xdate + for label in ax.get_xticklabels(): + label.set_ha("right") + label.set_rotation(rot) + fig = ax.get_figure() + maybe_adjust_figure(fig, bottom=0.2) + + +def table( + ax, data: DataFrame | Series, rowLabels=None, colLabels=None, **kwargs +) -> Table: + if isinstance(data, ABCSeries): + data = data.to_frame() + elif isinstance(data, ABCDataFrame): + pass + else: + raise ValueError("Input data must be DataFrame or Series") + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + table = matplotlib.table.table( + ax, cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, **kwargs + ) + return table + + +def _get_layout( + nplots: int, + layout: tuple[int, int] | None = None, + layout_type: str = "box", +) -> tuple[int, int]: + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError("Layout must be a tuple of (rows, columns)") + + nrows, ncols = layout + + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil(nplots / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil(nplots / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError( + f"Layout of {nrows}x{ncols} must be larger than required size {nplots}" + ) + + return layout + + if layout_type == "single": + return (1, 1) + elif layout_type == "horizontal": + return (1, nplots) + elif layout_type == "vertical": + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k**2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def create_subplots( + naxes: int, + sharex: bool = False, + sharey: bool = False, + squeeze: bool = True, + subplot_kw=None, + ax=None, + layout=None, + layout_type: str = "box", + **fig_kw, +): + """ + Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Parameters + ---------- + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horizontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns + ------- + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + Examples + -------- + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + if squeeze: + ax = flatten_axes(ax) + if layout is not None: + warnings.warn( + "When passing multiple axes, layout keyword is ignored.", + UserWarning, + stacklevel=find_stack_level(), + ) + if sharex or sharey: + warnings.warn( + "When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified when creating axes.", + UserWarning, + stacklevel=find_stack_level(), + ) + if ax.size == naxes: + fig = ax.flat[0].get_figure() + return fig, ax + else: + raise ValueError( + f"The number of passed axes must be {naxes}, the " + "same as the output plot" + ) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, flatten_axes(ax) + else: + warnings.warn( + "To output multiple subplots, the figure containing " + "the passed axes is being cleared.", + UserWarning, + stacklevel=find_stack_level(), + ) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw["sharex"] = ax0 + if sharey: + subplot_kw["sharey"] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds["sharex"] = None + kwds["sharey"] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis: Axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + # set_visible will not be effective if + # minor axis has NullLocator and NullFormatter (default) + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter("")) + for t in axis.get_minorticklabels(): + t.set_visible(False) + + axis.get_label().set_visible(False) + + +def _has_externally_shared_axis(ax1: Axes, compare_axis: str) -> bool: + """ + Return whether an axis is externally shared. + + Parameters + ---------- + ax1 : matplotlib.axes.Axes + Axis to query. + compare_axis : str + `"x"` or `"y"` according to whether the X-axis or Y-axis is being + compared. + + Returns + ------- + bool + `True` if the axis is externally shared. Otherwise `False`. + + Notes + ----- + If two axes with different positions are sharing an axis, they can be + referred to as *externally* sharing the common axis. + + If two axes sharing an axis also have the same position, they can be + referred to as *internally* sharing the common axis (a.k.a twinning). + + _handle_shared_axes() is only interested in axes externally sharing an + axis, regardless of whether either of the axes is also internally sharing + with a third axis. + """ + if compare_axis == "x": + axes = ax1.get_shared_x_axes() + elif compare_axis == "y": + axes = ax1.get_shared_y_axes() + else: + raise ValueError( + "_has_externally_shared_axis() needs 'x' or 'y' as a second parameter" + ) + + axes = axes.get_siblings(ax1) + + # Retain ax1 and any of its siblings which aren't in the same position as it + ax1_points = ax1.get_position().get_points() + + for ax2 in axes: + if not np.array_equal(ax1_points, ax2.get_position().get_points()): + return True + + return False + + +def handle_shared_axes( + axarr: Iterable[Axes], + nplots: int, + naxes: int, + nrows: int, + ncols: int, + sharex: bool, + sharey: bool, +): + if nplots > 1: + row_num = lambda x: x.get_subplotspec().rowspan.start + col_num = lambda x: x.get_subplotspec().colspan.start + + if compat.mpl_ge_3_4_0(): + is_first_col = lambda x: x.get_subplotspec().is_first_col() + else: + is_first_col = lambda x: x.is_first_col() + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool_) + for ax in axarr: + layout[row_num(ax), col_num(ax)] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[row_num(ax) + 1, col_num(ax)]: + continue + if sharex or _has_externally_shared_axis(ax, "x"): + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + if compat.mpl_ge_3_4_0(): + is_last_row = lambda x: x.get_subplotspec().is_last_row() + else: + is_last_row = lambda x: x.is_last_row() + for ax in axarr: + if is_last_row(ax): + continue + if sharex or _has_externally_shared_axis(ax, "x"): + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in the first column and we always + # have a subplot there, we can skip the layout test + if is_first_col(ax): + continue + if sharey or _has_externally_shared_axis(ax, "y"): + _remove_labels_from_axis(ax.yaxis) + + +def flatten_axes(axes: Axes | Sequence[Axes]) -> np.ndarray: + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, ABCIndex)): + return np.asarray(axes).ravel() + return np.array(axes) + + +def set_ticks_props( + axes: Axes | Sequence[Axes], + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, +): + import matplotlib.pyplot as plt + + for ax in flatten_axes(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes + + +def get_all_lines(ax: Axes) -> list[Line2D]: + lines = ax.get_lines() + + if hasattr(ax, "right_ax"): + lines += ax.right_ax.get_lines() + + if hasattr(ax, "left_ax"): + lines += ax.left_ax.get_lines() + + return lines + + +def get_xlim(lines: Iterable[Line2D]) -> tuple[float, float]: + left, right = np.inf, -np.inf + for line in lines: + x = line.get_xdata(orig=False) + left = min(np.nanmin(x), left) + right = max(np.nanmax(x), right) + return left, right diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py new file mode 100644 index 00000000..b7e6fca8 --- /dev/null +++ b/pandas/plotting/_misc.py @@ -0,0 +1,610 @@ +from __future__ import annotations + +from contextlib import contextmanager +from typing import ( + TYPE_CHECKING, + Iterator, +) + +from pandas.plotting._core import _get_plot_backend + +if TYPE_CHECKING: + from matplotlib.axes import Axes + from matplotlib.figure import Figure + import numpy as np + + from pandas import ( + DataFrame, + Series, + ) + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table. + + Parameters + ---------- + ax : Matplotlib axes object + data : DataFrame or Series + Data for table contents. + **kwargs + Keyword arguments to be passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.table( + ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs + ) + + +def register() -> None: + """ + Register pandas formatters and converters with matplotlib. + + This function modifies the global ``matplotlib.units.registry`` + dictionary. pandas adds custom converters for + + * pd.Timestamp + * pd.Period + * np.datetime64 + * datetime.datetime + * datetime.date + * datetime.time + + See Also + -------- + deregister_matplotlib_converters : Remove pandas formatters and converters. + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.register() + + +def deregister() -> None: + """ + Remove pandas formatters and converters. + + Removes the custom converters added by :func:`register`. This + attempts to set the state of the registry back to the state before + pandas registered its own units. Converters for pandas' own types like + Timestamp and Period are removed completely. Converters for types + pandas overwrites, like ``datetime.datetime``, are restored to their + original value. + + See Also + -------- + register_matplotlib_converters : Register pandas formatters and converters + with matplotlib. + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.deregister() + + +def scatter_matrix( + frame: DataFrame, + alpha: float = 0.5, + figsize: tuple[float, float] | None = None, + ax: Axes | None = None, + grid: bool = False, + diagonal: str = "hist", + marker: str = ".", + density_kwds=None, + hist_kwds=None, + range_padding: float = 0.05, + **kwargs, +) -> np.ndarray: + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + Amount of transparency applied. + figsize : (float,float), optional + A tuple (width, height) in inches. + ax : Matplotlib axis object, optional + grid : bool, optional + Setting this to True will show the grid. + diagonal : {'hist', 'kde'} + Pick between 'kde' and 'hist' for either Kernel Density Estimation or + Histogram plot in the diagonal. + marker : str, optional + Matplotlib marker type, default '.'. + density_kwds : keywords + Keyword arguments to be passed to kernel density estimate plot. + hist_kwds : keywords + Keyword arguments to be passed to hist function. + range_padding : float, default 0.05 + Relative extension of axis range in x and y with respect to + (x_max - x_min) or (y_max - y_min). + **kwargs + Keyword arguments to be passed to scatter function. + + Returns + ------- + numpy.ndarray + A matrix of scatter plots. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> pd.plotting.scatter_matrix(df, alpha=0.2) + array([[, + , + , + ], + [, + , + , + ], + [, + , + , + ], + [, + , + , + ]], dtype=object) + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.scatter_matrix( + frame=frame, + alpha=alpha, + figsize=figsize, + ax=ax, + grid=grid, + diagonal=diagonal, + marker=marker, + density_kwds=density_kwds, + hist_kwds=hist_kwds, + range_padding=range_padding, + **kwargs, + ) + + +def radviz( + frame: DataFrame, + class_column: str, + ax: Axes | None = None, + color: list[str] | tuple[str, ...] | None = None, + colormap=None, + **kwds, +) -> Axes: + """ + Plot a multidimensional dataset in 2D. + + Each Series in the DataFrame is represented as a evenly distributed + slice on a circle. Each data point is rendered in the circle according to + the value on each Series. Highly correlated `Series` in the `DataFrame` + are placed closer on the unit circle. + + RadViz allow to project a N-dimensional data set into a 2D space where the + influence of each dimension can be interpreted as a balance between the + influence of all dimensions. + + More info available at the `original article + `_ + describing RadViz. + + Parameters + ---------- + frame : `DataFrame` + Object holding the data. + class_column : str + Column name containing the name of the data point category. + ax : :class:`matplotlib.axes.Axes`, optional + A plot instance to which to add the information. + color : list[str] or tuple[str], optional + Assign a color to each category. Example: ['blue', 'green']. + colormap : str or :class:`matplotlib.colors.Colormap`, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + **kwds + Options to pass to matplotlib scatter plotting method. + + Returns + ------- + class:`matplotlib.axes.Axes` + + See Also + -------- + plotting.andrews_curves : Plot clustering visualization. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... { + ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], + ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], + ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], + ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], + ... 'Category': [ + ... 'virginica', + ... 'virginica', + ... 'setosa', + ... 'virginica', + ... 'virginica', + ... 'versicolor', + ... 'versicolor', + ... 'setosa', + ... 'virginica', + ... 'setosa' + ... ] + ... } + ... ) + >>> pd.plotting.radviz(df, 'Category') + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.radviz( + frame=frame, + class_column=class_column, + ax=ax, + color=color, + colormap=colormap, + **kwds, + ) + + +def andrews_curves( + frame: DataFrame, + class_column: str, + ax: Axes | None = None, + samples: int = 200, + color: list[str] | tuple[str, ...] | None = None, + colormap=None, + **kwargs, +) -> Axes: + """ + Generate a matplotlib plot for visualising clusters of multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters + ---------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0). + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color : list or tuple, optional + Colors to use for the different classes. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlip.axis.Axes` + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv( + ... 'https://raw.githubusercontent.com/pandas-dev/' + ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... ) + >>> pd.plotting.andrews_curves(df, 'Name') + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.andrews_curves( + frame=frame, + class_column=class_column, + ax=ax, + samples=samples, + color=color, + colormap=colormap, + **kwargs, + ) + + +def bootstrap_plot( + series: Series, + fig: Figure | None = None, + size: int = 50, + samples: int = 500, + **kwds, +) -> Figure: + """ + Bootstrap plot on mean, median and mid-range statistics. + + The bootstrap plot is used to estimate the uncertainty of a statistic + by relaying on random sampling with replacement [1]_. This function will + generate bootstrapping plots for mean, median and mid-range statistics + for the given number of samples of the given size. + + .. [1] "Bootstrapping (statistics)" in \ + https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 + + Parameters + ---------- + series : pandas.Series + Series from where to get the samplings for the bootstrapping. + fig : matplotlib.figure.Figure, default None + If given, it will use the `fig` reference for plotting instead of + creating a new one with default parameters. + size : int, default 50 + Number of data points to consider during each sampling. It must be + less than or equal to the length of the `series`. + samples : int, default 500 + Number of times the bootstrap procedure is performed. + **kwds + Options to pass to matplotlib plotting method. + + Returns + ------- + matplotlib.figure.Figure + Matplotlib figure. + + See Also + -------- + DataFrame.plot : Basic plotting for DataFrame objects. + Series.plot : Basic plotting for Series objects. + + Examples + -------- + This example draws a basic bootstrap plot for a Series. + + .. plot:: + :context: close-figs + + >>> s = pd.Series(np.random.uniform(size=100)) + >>> pd.plotting.bootstrap_plot(s) +
    + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.bootstrap_plot( + series=series, fig=fig, size=size, samples=samples, **kwds + ) + + +def parallel_coordinates( + frame: DataFrame, + class_column: str, + cols: list[str] | None = None, + ax: Axes | None = None, + color: list[str] | tuple[str, ...] | None = None, + use_columns: bool = False, + xticks: list | tuple | None = None, + colormap=None, + axvlines: bool = True, + axvlines_kwds=None, + sort_labels: bool = False, + **kwargs, +) -> Axes: + """ + Parallel coordinates plotting. + + Parameters + ---------- + frame : DataFrame + class_column : str + Column name containing class names. + cols : list, optional + A list of column names to use. + ax : matplotlib.axis, optional + Matplotlib axis object. + color : list or tuple, optional + Colors to use for the different classes. + use_columns : bool, optional + If true, columns will be used as xticks. + xticks : list or tuple, optional + A list of values to use for xticks. + colormap : str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines : bool, optional + If true, vertical lines will be added at each xtick. + axvlines_kwds : keywords, optional + Options to be passed to axvline method for vertical lines. + sort_labels : bool, default False + Sort class_column labels, useful when assigning colors. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> df = pd.read_csv( + ... 'https://raw.githubusercontent.com/pandas-dev/' + ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... ) + >>> pd.plotting.parallel_coordinates( + ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') + ... ) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.parallel_coordinates( + frame=frame, + class_column=class_column, + cols=cols, + ax=ax, + color=color, + use_columns=use_columns, + xticks=xticks, + colormap=colormap, + axvlines=axvlines, + axvlines_kwds=axvlines_kwds, + sort_labels=sort_labels, + **kwargs, + ) + + +def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: + """ + Lag plot for time series. + + Parameters + ---------- + series : Time series + lag : lag of the scatter plot, default 1 + ax : Matplotlib axis object, optional + **kwds + Matplotlib scatter method keyword arguments. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + Lag plots are most commonly used to look for patterns in time series data. + + Given the following time series + + .. plot:: + :context: close-figs + + >>> np.random.seed(5) + >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) + >>> s = pd.Series(x) + >>> s.plot() + + + A lag plot with ``lag=1`` returns + + .. plot:: + :context: close-figs + + >>> pd.plotting.lag_plot(s, lag=1) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) + + +def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes: + """ + Autocorrelation plot for time series. + + Parameters + ---------- + series : Time series + ax : Matplotlib axis object, optional + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + + The horizontal lines in the plot correspond to 95% and 99% confidence bands. + + The dashed line is 99% confidence band. + + .. plot:: + :context: close-figs + + >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) + >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) + >>> pd.plotting.autocorrelation_plot(s) + + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) + + +class _Options(dict): + """ + Stores pandas plotting options. + + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later. + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {"x_compat": "xaxis.compat"} + _DEFAULT_KEYS = ["xaxis.compat"] + + def __init__(self, deprecated: bool = False) -> None: + self._deprecated = deprecated + super().__setitem__("xaxis.compat", False) + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError(f"{key} is not a valid pandas plotting option") + return super().__getitem__(key) + + def __setitem__(self, key, value) -> None: + key = self._get_canonical_key(key) + super().__setitem__(key, value) + + def __delitem__(self, key) -> None: + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError(f"Cannot remove default parameter {key}") + super().__delitem__(key) + + def __contains__(self, key) -> bool: + key = self._get_canonical_key(key) + return super().__contains__(key) + + def reset(self) -> None: + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + # error: Cannot access "__init__" directly + self.__init__() # type: ignore[misc] + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value) -> Iterator[_Options]: + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/testing.py b/pandas/testing.py new file mode 100644 index 00000000..841b55df --- /dev/null +++ b/pandas/testing.py @@ -0,0 +1,18 @@ +""" +Public testing utility functions. +""" + + +from pandas._testing import ( + assert_extension_array_equal, + assert_frame_equal, + assert_index_equal, + assert_series_equal, +) + +__all__ = [ + "assert_extension_array_equal", + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", +] diff --git a/pandas/tests/__init__.py b/pandas/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/api/__init__.py b/pandas/tests/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py new file mode 100644 index 00000000..c2db9698 --- /dev/null +++ b/pandas/tests/api/test_api.py @@ -0,0 +1,332 @@ +from __future__ import annotations + +import subprocess +import sys + +import pytest + +import pandas as pd +from pandas import api +import pandas._testing as tm + + +class Base: + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted( + f for f in dir(namespace) if not f.startswith("__") and f != "annotations" + ) + if ignored is not None: + result = sorted(set(result) - set(ignored)) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base): + # these are optionally imported based on testing + # & need to be ignored + ignored = ["tests", "locale", "conftest"] + + # top-level sub-packages + public_lib = [ + "api", + "arrays", + "options", + "test", + "testing", + "errors", + "plotting", + "io", + "tseries", + ] + private_lib = ["compat", "core", "pandas", "util"] + + # these are already deprecated; awaiting removal + deprecated_modules: list[str] = ["np", "datetime"] + + # misc + misc = ["IndexSlice", "NaT", "NA"] + + # top-level classes + classes = [ + "ArrowDtype", + "Categorical", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "DatetimeIndex", + "ExcelFile", + "ExcelWriter", + "Float64Index", + "Flags", + "Grouper", + "HDFStore", + "Index", + "Int64Index", + "MultiIndex", + "Period", + "PeriodIndex", + "RangeIndex", + "UInt64Index", + "Series", + "SparseDtype", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "Timestamp", + "Interval", + "IntervalIndex", + "CategoricalDtype", + "PeriodDtype", + "IntervalDtype", + "DatetimeTZDtype", + "BooleanDtype", + "Int8Dtype", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "UInt8Dtype", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "Float32Dtype", + "Float64Dtype", + "NamedAgg", + ] + + # these are already deprecated; awaiting removal + deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"] + + # these should be deprecated in the future + deprecated_classes_in_future: list[str] = ["SparseArray"] + + # external modules exposed in pandas namespace + modules: list[str] = [] + + # top-level functions + funcs = [ + "array", + "bdate_range", + "concat", + "crosstab", + "cut", + "date_range", + "interval_range", + "eval", + "factorize", + "get_dummies", + "from_dummies", + "infer_freq", + "isna", + "isnull", + "lreshape", + "melt", + "notna", + "notnull", + "offsets", + "merge", + "merge_ordered", + "merge_asof", + "period_range", + "pivot", + "pivot_table", + "qcut", + "show_versions", + "timedelta_range", + "unique", + "value_counts", + "wide_to_long", + ] + + # top-level option funcs + funcs_option = [ + "reset_option", + "describe_option", + "get_option", + "option_context", + "set_option", + "set_eng_float_format", + ] + + # top-level read_* funcs + funcs_read = [ + "read_clipboard", + "read_csv", + "read_excel", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_xml", + "read_json", + "read_pickle", + "read_sas", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_feather", + "read_parquet", + "read_orc", + "read_spss", + ] + + # top-level json funcs + funcs_json = ["json_normalize"] + + # top-level to_* funcs + funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"] + + # top-level to deprecate in the future + deprecated_funcs_in_future: list[str] = [] + + # these are already deprecated; awaiting removal + deprecated_funcs: list[str] = [] + + # private modules in pandas namespace + private_modules = [ + "_config", + "_libs", + "_is_numpy_dev", + "_testing", + "_typing", + "_version", + ] + + def test_api(self): + + checkthese = ( + self.public_lib + + self.private_lib + + self.misc + + self.modules + + self.classes + + self.funcs + + self.funcs_option + + self.funcs_read + + self.funcs_json + + self.funcs_to + + self.private_modules + ) + self.check(namespace=pd, expected=checkthese, ignored=self.ignored) + + def test_api_all(self): + expected = set( + self.public_lib + + self.misc + + self.modules + + self.classes + + self.funcs + + self.funcs_option + + self.funcs_read + + self.funcs_json + + self.funcs_to + ) - set(self.deprecated_classes) + actual = set(pd.__all__) + + extraneous = actual - expected + assert not extraneous + + missing = expected - actual + assert not missing + + def test_depr(self): + deprecated_list = ( + self.deprecated_modules + + self.deprecated_classes + + self.deprecated_classes_in_future + + self.deprecated_funcs + + self.deprecated_funcs_in_future + ) + for depr in deprecated_list: + with tm.assert_produces_warning(FutureWarning): + _ = getattr(pd, depr) + + +def test_datetime(): + from datetime import datetime + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert datetime(2015, 1, 2, 0, 0) == datetime(2015, 1, 2, 0, 0) + + assert isinstance(datetime(2015, 1, 2, 0, 0), datetime) + + +def test_sparsearray(): + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert isinstance(pd.array([1, 2, 3], dtype="Sparse"), pd.SparseArray) + + +def test_np(): + import warnings + + import numpy as np + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert (pd.np.arange(0, 10) == np.arange(0, 10)).all() + + +class TestApi(Base): + allowed = ["types", "extensions", "indexers", "interchange"] + + def test_api(self): + self.check(api, self.allowed) + + +class TestTesting(Base): + funcs = [ + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", + "assert_extension_array_equal", + ] + + def test_testing(self): + from pandas import testing # noqa: PDF015 + + self.check(testing, self.funcs) + + def test_util_testing_deprecated(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + + with tm.assert_produces_warning(FutureWarning) as m: + import pandas.util.testing # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_testing_deprecated_direct(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + with tm.assert_produces_warning(FutureWarning) as m: + from pandas.util.testing import assert_series_equal # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_in_top_level(self): + # in a subprocess to avoid import caching issues + out = subprocess.check_output( + [ + sys.executable, + "-c", + "import pandas; pandas.util.testing.assert_series_equal", + ], + stderr=subprocess.STDOUT, + ).decode() + assert "pandas.util.testing is deprecated" in out + + with pytest.raises(AttributeError, match="foo"): + pd.util.foo diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py new file mode 100644 index 00000000..7b6cc941 --- /dev/null +++ b/pandas/tests/api/test_types.py @@ -0,0 +1,63 @@ +import pandas._testing as tm +from pandas.api import types +from pandas.tests.api.test_api import Base + + +class TestTypes(Base): + + allowed = [ + "is_bool", + "is_bool_dtype", + "is_categorical", + "is_categorical_dtype", + "is_complex", + "is_complex_dtype", + "is_datetime64_any_dtype", + "is_datetime64_dtype", + "is_datetime64_ns_dtype", + "is_datetime64tz_dtype", + "is_dtype_equal", + "is_float", + "is_float_dtype", + "is_int64_dtype", + "is_integer", + "is_integer_dtype", + "is_number", + "is_numeric_dtype", + "is_object_dtype", + "is_scalar", + "is_sparse", + "is_string_dtype", + "is_signed_integer_dtype", + "is_timedelta64_dtype", + "is_timedelta64_ns_dtype", + "is_unsigned_integer_dtype", + "is_period_dtype", + "is_interval", + "is_interval_dtype", + "is_re", + "is_re_compilable", + "is_dict_like", + "is_iterator", + "is_file_like", + "is_list_like", + "is_hashable", + "is_array_like", + "is_named_tuple", + "pandas_dtype", + "union_categoricals", + "infer_dtype", + "is_extension_array_dtype", + ] + deprecated = ["is_extension_type"] + dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"] + + def test_types(self): + + self.check(types, self.allowed + self.dtypes + self.deprecated) + + def test_deprecated_from_api_types(self): + + for t in self.deprecated: + with tm.assert_produces_warning(FutureWarning): + getattr(types, t)(1) diff --git a/pandas/tests/apply/__init__.py b/pandas/tests/apply/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py new file mode 100644 index 00000000..91b831bc --- /dev/null +++ b/pandas/tests/apply/common.py @@ -0,0 +1,10 @@ +from pandas.core.groupby.base import transformation_kernels + +# tshift only works on time index and is deprecated +# There is no Series.cumcount or DataFrame.cumcount +series_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] +frame_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] diff --git a/pandas/tests/apply/conftest.py b/pandas/tests/apply/conftest.py new file mode 100644 index 00000000..b68c6235 --- /dev/null +++ b/pandas/tests/apply/conftest.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest + +from pandas import DataFrame + + +@pytest.fixture +def int_frame_const_col(): + """ + Fixture for DataFrame of ints which are constant per column + + Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3] + """ + df = DataFrame( + np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1, + columns=["A", "B", "C"], + ) + return df diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py new file mode 100644 index 00000000..faa89e55 --- /dev/null +++ b/pandas/tests/apply/test_frame_apply.py @@ -0,0 +1,1662 @@ +from datetime import datetime +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.frame.common import zip_frames + + +def test_apply(float_frame): + with np.errstate(all="ignore"): + # ufunc + result = np.sqrt(float_frame["A"]) + expected = float_frame.apply(np.sqrt)["A"] + tm.assert_series_equal(result, expected) + + # aggregator + result = float_frame.apply(np.mean)["A"] + expected = np.mean(float_frame["A"]) + assert result == expected + + d = float_frame.index[0] + result = float_frame.apply(np.mean, axis=1) + expected = np.mean(float_frame.xs(d)) + assert result[d] == expected + assert result.index is float_frame.index + + +def test_apply_categorical_func(): + # GH 9573 + df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) + result = df.apply(lambda ts: ts.astype("category")) + + assert result.shape == (4, 2) + assert isinstance(result["c0"].dtype, CategoricalDtype) + assert isinstance(result["c1"].dtype, CategoricalDtype) + + +def test_apply_axis1_with_ea(): + # GH#36785 + expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, dtype", + [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)], +) +def test_agg_axis1_duplicate_index(data, dtype): + # GH 42380 + expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype) + result = expected.agg(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +def test_apply_mixed_datetimelike(): + # mixed datetimelike + # GH 7778 + expected = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": pd.to_timedelta(np.arange(3), unit="s"), + } + ) + result = expected.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.sqrt, np.mean]) +def test_apply_empty(func): + # empty + empty_frame = DataFrame() + + result = empty_frame.apply(func) + assert result.empty + + +def test_apply_float_frame(float_frame): + no_rows = float_frame[:0] + result = no_rows.apply(lambda x: x.mean()) + expected = Series(np.nan, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + no_cols = float_frame.loc[:, []] + result = no_cols.apply(lambda x: x.mean(), axis=1) + expected = Series(np.nan, index=float_frame.index) + tm.assert_series_equal(result, expected) + + +def test_apply_empty_except_index(): + # GH 2476 + expected = DataFrame(index=["a"]) + result = expected.apply(lambda x: x["a"], axis=1) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_reduce_empty(): + # reduce with an empty DataFrame + empty_frame = DataFrame() + + x = [] + result = empty_frame.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_frame) + result = empty_frame.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + empty_with_cols = DataFrame(columns=["a", "b", "c"]) + result = empty_with_cols.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_with_cols) + result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + # Ensure that x.append hasn't been called + assert x == [] + + +@pytest.mark.parametrize("func", ["sum", "prod", "any", "all"]) +def test_apply_funcs_over_empty(func): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.apply(getattr(np, func)) + expected = getattr(df, func)() + tm.assert_series_equal(result, expected) + + +def test_nunique_empty(): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.nunique() + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.T.nunique() + expected = Series([], index=pd.Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + +def test_apply_standard_nonunique(): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + + result = df.apply(lambda s: s[0], axis=1) + expected = Series([1, 4, 7], ["a", "a", "c"]) + tm.assert_series_equal(result, expected) + + result = df.T.apply(lambda s: s[0], axis=0) + tm.assert_series_equal(result, expected) + + +def test_apply_broadcast_scalars(float_frame): + # scalars + result = float_frame.apply(np.mean, result_type="broadcast") + expected = DataFrame([float_frame.mean()], index=float_frame.index) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_scalars_axis1(float_frame): + result = float_frame.apply(np.mean, axis=1, result_type="broadcast") + m = float_frame.mean(axis=1) + expected = DataFrame({c: m for c in float_frame.columns}) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_lists_columns(float_frame): + # lists + result = float_frame.apply( + lambda x: list(range(len(float_frame.columns))), + axis=1, + result_type="broadcast", + ) + m = list(range(len(float_frame.columns))) + expected = DataFrame( + [m] * len(float_frame.index), + dtype="float64", + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_lists_index(float_frame): + result = float_frame.apply( + lambda x: list(range(len(float_frame.index))), result_type="broadcast" + ) + m = list(range(len(float_frame.index))) + expected = DataFrame( + {c: m for c in float_frame.columns}, + dtype="float64", + index=float_frame.index, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_broadcast_list_lambda_func(int_frame_const_col): + # preserve columns + df = int_frame_const_col + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + tm.assert_frame_equal(result, df) + + +def test_apply_broadcast_series_lambda_func(int_frame_const_col): + df = int_frame_const_col + result = df.apply( + lambda x: Series([1, 2, 3], index=list("abc")), + axis=1, + result_type="broadcast", + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_float_frame(float_frame, axis): + def _assert_raw(x): + assert isinstance(x, np.ndarray) + assert x.ndim == 1 + + float_frame.apply(_assert_raw, axis=axis, raw=True) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_float_frame_lambda(float_frame, axis): + result = float_frame.apply(np.mean, axis=axis, raw=True) + expected = float_frame.apply(lambda x: x.values.mean(), axis=axis) + tm.assert_series_equal(result, expected) + + +def test_apply_raw_float_frame_no_reduction(float_frame): + # no reduction + result = float_frame.apply(lambda x: x * 2, raw=True) + expected = float_frame * 2 + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_raw_mixed_type_frame(mixed_type_frame, axis): + def _assert_raw(x): + assert isinstance(x, np.ndarray) + assert x.ndim == 1 + + # Mixed dtype (GH-32423) + mixed_type_frame.apply(_assert_raw, axis=axis, raw=True) + + +def test_apply_axis1(float_frame): + d = float_frame.index[0] + result = float_frame.apply(np.mean, axis=1)[d] + expected = np.mean(float_frame.xs(d)) + assert result == expected + + +def test_apply_mixed_dtype_corner(): + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df[:0].apply(np.mean, axis=1) + # the result here is actually kind of ambiguous, should it be a Series + # or a DataFrame? + expected = Series(np.nan, index=pd.Index([], dtype="int64")) + tm.assert_series_equal(result, expected) + + +def test_apply_mixed_dtype_corner_indexing(): + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df.apply(lambda x: x["A"], axis=1) + expected = Series(["foo"], index=[0]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: x["B"], axis=1) + expected = Series([1.0], index=[0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ax", ["index", "columns"]) +@pytest.mark.parametrize( + "func", [lambda x: x, lambda x: x.mean()], ids=["identity", "mean"] +) +@pytest.mark.parametrize("raw", [True, False]) +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_empty_infer_type(ax, func, raw, axis): + df = DataFrame(**{ax: ["a", "b", "c"]}) + + with np.errstate(all="ignore"): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + test_res = func(np.array([], dtype="f8")) + is_reduction = not isinstance(test_res, np.ndarray) + + result = df.apply(func, axis=axis, raw=raw) + if is_reduction: + agg_axis = df._get_agg_axis(axis) + assert isinstance(result, Series) + assert result.index is agg_axis + else: + assert isinstance(result, DataFrame) + + +def test_apply_empty_infer_type_broadcast(): + no_cols = DataFrame(index=["a", "b", "c"]) + result = no_cols.apply(lambda x: x.mean(), result_type="broadcast") + assert isinstance(result, DataFrame) + + +def test_apply_with_args_kwds_add_some(float_frame): + def add_some(x, howmuch=0): + return x + howmuch + + result = float_frame.apply(add_some, howmuch=2) + expected = float_frame.apply(lambda x: x + 2) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_args_kwds_agg_and_add(float_frame): + def agg_and_add(x, howmuch=0): + return x.mean() + howmuch + + result = float_frame.apply(agg_and_add, howmuch=2) + expected = float_frame.apply(lambda x: x.mean() + 2) + tm.assert_series_equal(result, expected) + + +def test_apply_with_args_kwds_subtract_and_divide(float_frame): + def subtract_and_divide(x, sub, divide=1): + return (x - sub) / divide + + result = float_frame.apply(subtract_and_divide, args=(2,), divide=2) + expected = float_frame.apply(lambda x: (x - 2.0) / 2.0) + tm.assert_frame_equal(result, expected) + + +def test_apply_yield_list(float_frame): + result = float_frame.apply(list) + tm.assert_frame_equal(result, float_frame) + + +def test_apply_reduce_Series(float_frame): + float_frame["A"].iloc[::2] = np.nan + expected = float_frame.mean(1) + result = float_frame.apply(np.mean, axis=1) + tm.assert_series_equal(result, expected) + + +def test_apply_reduce_to_dict(): + # GH 25196 37544 + data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"]) + + result = data.apply(dict, axis=0) + expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) + tm.assert_series_equal(result, expected) + + result = data.apply(dict, axis=1) + expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) + tm.assert_series_equal(result, expected) + + +def test_apply_differently_indexed(): + df = DataFrame(np.random.randn(20, 10)) + + result = df.apply(Series.describe, axis=0) + expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) + tm.assert_frame_equal(result, expected) + + result = df.apply(Series.describe, axis=1) + expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T + tm.assert_frame_equal(result, expected) + + +def test_apply_bug(): + + # GH 6125 + positions = DataFrame( + [ + [1, "ABC0", 50], + [1, "YUM0", 20], + [1, "DEF0", 20], + [2, "ABC1", 50], + [2, "YUM1", 20], + [2, "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + + def f(r): + return r["market"] + + expected = positions.apply(f, axis=1) + + positions = DataFrame( + [ + [datetime(2013, 1, 1), "ABC0", 50], + [datetime(2013, 1, 2), "YUM0", 20], + [datetime(2013, 1, 3), "DEF0", 20], + [datetime(2013, 1, 4), "ABC1", 50], + [datetime(2013, 1, 5), "YUM1", 20], + [datetime(2013, 1, 6), "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + result = positions.apply(f, axis=1) + tm.assert_series_equal(result, expected) + + +def test_apply_convert_objects(): + expected = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + result = expected.apply(lambda x: x, axis=1)._convert(datetime=True) + tm.assert_frame_equal(result, expected) + + +def test_apply_attach_name(float_frame): + result = float_frame.apply(lambda x: x.name) + expected = Series(float_frame.columns, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + +def test_apply_attach_name_axis1(float_frame): + result = float_frame.apply(lambda x: x.name, axis=1) + expected = Series(float_frame.index, index=float_frame.index) + tm.assert_series_equal(result, expected) + + +def test_apply_attach_name_non_reduction(float_frame): + # non-reductions + result = float_frame.apply(lambda x: np.repeat(x.name, len(x))) + expected = DataFrame( + np.tile(float_frame.columns, (len(float_frame.index), 1)), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_attach_name_non_reduction_axis1(float_frame): + result = float_frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1) + expected = Series( + np.repeat(t[0], len(float_frame.columns)) for t in float_frame.itertuples() + ) + expected.index = float_frame.index + tm.assert_series_equal(result, expected) + + +def test_apply_multi_index(): + index = MultiIndex.from_arrays([["a", "a", "b"], ["c", "d", "d"]]) + s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["col1", "col2"]) + result = s.apply(lambda x: Series({"min": min(x), "max": max(x)}), 1) + expected = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["min", "max"]) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize( + "df, dicts", + [ + [ + DataFrame([["foo", "bar"], ["spam", "eggs"]]), + Series([{0: "foo", 1: "spam"}, {0: "bar", 1: "eggs"}]), + ], + [DataFrame([[0, 1], [2, 3]]), Series([{0: 0, 1: 2}, {0: 1, 1: 3}])], + ], +) +def test_apply_dict(df, dicts): + # GH 8735 + fn = lambda x: x.to_dict() + reduce_true = df.apply(fn, result_type="reduce") + reduce_false = df.apply(fn, result_type="expand") + reduce_none = df.apply(fn) + + tm.assert_series_equal(reduce_true, dicts) + tm.assert_frame_equal(reduce_false, df) + tm.assert_series_equal(reduce_none, dicts) + + +def test_applymap(float_frame): + applied = float_frame.applymap(lambda x: x * 2) + tm.assert_frame_equal(applied, float_frame * 2) + float_frame.applymap(type) + + # GH 465: function returning tuples + result = float_frame.applymap(lambda x: (x, x))["A"][0] + assert isinstance(result, tuple) + + +@pytest.mark.parametrize("val", [1, 1.0]) +def test_applymap_float_object_conversion(val): + # GH 2909: object conversion to float in constructor? + df = DataFrame(data=[val, "a"]) + result = df.applymap(lambda x: x).dtypes[0] + assert result == object + + +def test_applymap_str(): + # GH 2786 + df = DataFrame(np.random.random((3, 4))) + df2 = df.copy() + cols = ["a", "a", "a", "a"] + df.columns = cols + + expected = df2.applymap(str) + expected.columns = cols + result = df.applymap(str) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "col, val", + [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]], +) +def test_applymap_datetimelike(col, val): + # datetime/timedelta + df = DataFrame(np.random.random((3, 4))) + df[col] = val + result = df.applymap(str) + assert result.loc[0, col] == str(df.loc[0, col]) + + +@pytest.mark.parametrize( + "expected", + [ + DataFrame(), + DataFrame(columns=list("ABC")), + DataFrame(index=list("ABC")), + DataFrame({"A": [], "B": [], "C": []}), + ], +) +@pytest.mark.parametrize("func", [round, lambda x: x]) +def test_applymap_empty(expected, func): + # GH 8222 + result = expected.applymap(func) + tm.assert_frame_equal(result, expected) + + +def test_applymap_kwargs(): + # GH 40652 + result = DataFrame([[1, 2], [3, 4]]).applymap(lambda x, y: x + y, y=2) + expected = DataFrame([[3, 4], [5, 6]]) + tm.assert_frame_equal(result, expected) + + +def test_applymap_na_ignore(float_frame): + # GH 23803 + strlen_frame = float_frame.applymap(lambda x: len(str(x))) + float_frame_with_na = float_frame.copy() + mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + float_frame_with_na[mask] = pd.NA + strlen_frame_na_ignore = float_frame_with_na.applymap( + lambda x: len(str(x)), na_action="ignore" + ) + strlen_frame_with_na = strlen_frame.copy() + strlen_frame_with_na[mask] = pd.NA + tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) + + +def test_applymap_box_timestamps(): + # GH 2689, GH 2627 + ser = Series(date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + DataFrame(ser).applymap(func) + + +def test_applymap_box(): + # ufunc will not be boxed. Same test cases as the test_map_box + df = DataFrame( + { + "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")], + "b": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ], + "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")], + "d": [ + pd.Period("2011-01-01", freq="M"), + pd.Period("2011-01-02", freq="M"), + ], + } + ) + + result = df.applymap(lambda x: type(x).__name__) + expected = DataFrame( + { + "a": ["Timestamp", "Timestamp"], + "b": ["Timestamp", "Timestamp"], + "c": ["Timedelta", "Timedelta"], + "d": ["Period", "Period"], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_frame_apply_dont_convert_datetime64(): + from pandas.tseries.offsets import BDay + + df = DataFrame({"x1": [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + result = df.x1.dtype + assert result == "M8[ns]" + + +def test_apply_non_numpy_dtype(): + # GH 12244 + df = DataFrame({"dt": date_range("2015-01-01", periods=3, tz="Europe/Brussels")}) + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + result = df.apply(lambda x: x + pd.Timedelta("1day")) + expected = DataFrame( + {"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")} + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_non_numpy_dtype_category(): + df = DataFrame({"dt": ["a", "b", "c", "a"]}, dtype="category") + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +def test_apply_dup_names_multi_agg(): + # GH 21063 + df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) + expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) + result = df.agg(["min"]) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["apply", "agg"]) +def test_apply_nested_result_axis_1(op): + # GH 13820 + def apply_list(row): + return [2 * row["A"], 2 * row["C"], 2 * row["B"]] + + df = DataFrame(np.zeros((4, 4)), columns=list("ABCD")) + result = getattr(df, op)(apply_list, axis=1) + expected = Series( + [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] + ) + tm.assert_series_equal(result, expected) + + +def test_apply_noreduction_tzaware_object(): + # https://github.com/pandas-dev/pandas/issues/31505 + expected = DataFrame( + {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" + ) + result = expected.apply(lambda x: x) + tm.assert_frame_equal(result, expected) + result = expected.apply(lambda x: x.copy()) + tm.assert_frame_equal(result, expected) + + +def test_apply_function_runs_once(): + # https://github.com/pandas-dev/pandas/issues/30815 + + df = DataFrame({"a": [1, 2, 3]}) + names = [] # Save row names function is applied to + + def reducing_function(row): + names.append(row.name) + + def non_reducing_function(row): + names.append(row.name) + return row + + for func in [reducing_function, non_reducing_function]: + del names[:] + + df.apply(func, axis=1) + assert names == list(df.index) + + +def test_apply_raw_function_runs_once(): + # https://github.com/pandas-dev/pandas/issues/34506 + + df = DataFrame({"a": [1, 2, 3]}) + values = [] # Save row values function is applied to + + def reducing_function(row): + values.extend(row) + + def non_reducing_function(row): + values.extend(row) + return row + + for func in [reducing_function, non_reducing_function]: + del values[:] + + df.apply(func, raw=True, axis=1) + assert values == list(df.a.to_list()) + + +def test_applymap_function_runs_once(): + + df = DataFrame({"a": [1, 2, 3]}) + values = [] # Save values function is applied to + + def reducing_function(val): + values.append(val) + + def non_reducing_function(val): + values.append(val) + return val + + for func in [reducing_function, non_reducing_function]: + del values[:] + + df.applymap(func) + assert values == df.a.to_list() + + +def test_apply_with_byte_string(): + # GH 34529 + df = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"]) + expected = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"], dtype=object) + # After we make the apply we expect a dataframe just + # like the original but with the object datatype + result = df.apply(lambda x: x.astype("object")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("val", ["asd", 12, None, np.NaN]) +def test_apply_category_equalness(val): + # Check if categorical comparisons on apply, GH 21239 + df_values = ["asd", None, 12, "asd", "cde", np.NaN] + df = DataFrame({"a": df_values}, dtype="category") + + result = df.a.apply(lambda x: x == val) + expected = Series( + [np.NaN if pd.isnull(x) else x == val for x in df_values], name="a" + ) + tm.assert_series_equal(result, expected) + + +# the user has supplied an opaque UDF where +# they are transforming the input that requires +# us to infer the output + + +def test_infer_row_shape(): + # GH 17437 + # if row shape is changing, infer it + df = DataFrame(np.random.rand(10, 2)) + result = df.apply(np.fft.fft, axis=0).shape + assert result == (10, 2) + + result = df.apply(np.fft.rfft, axis=0).shape + assert result == (6, 2) + + +def test_with_dictlike_columns(): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + expected = Series([{"s": 3} for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + df["tm"] = [ + Timestamp("2017-05-01 00:00:00"), + Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + tm.assert_series_equal(result, expected) + + # compose a series + result = (df["a"] + df["b"]).apply(lambda x: {"s": x}) + expected = Series([{"s": 3}, {"s": 3}]) + tm.assert_series_equal(result, expected) + + +def test_with_dictlike_columns_with_datetime(): + # GH 18775 + df = DataFrame() + df["author"] = ["X", "Y", "Z"] + df["publisher"] = ["BBC", "NBC", "N24"] + df["date"] = pd.to_datetime( + ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"] + ) + result = df.apply(lambda x: {}, axis=1) + expected = Series([{}, {}, {}]) + tm.assert_series_equal(result, expected) + + +def test_with_dictlike_columns_with_infer(): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand") + expected = DataFrame({"s": [3, 3]}) + tm.assert_frame_equal(result, expected) + + df["tm"] = [ + Timestamp("2017-05-01 00:00:00"), + Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand") + tm.assert_frame_equal(result, expected) + + +def test_with_listlike_columns(): + # GH 17348 + df = DataFrame( + { + "a": Series(np.random.randn(4)), + "b": ["a", "list", "of", "words"], + "ts": date_range("2016-10-01", periods=4, freq="H"), + } + ) + + result = df[["a", "b"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "b"]].itertuples()]) + tm.assert_series_equal(result, expected) + + result = df[["a", "ts"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "ts"]].itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_with_listlike_columns_returning_list(): + # GH 18919 + df = DataFrame({"x": Series([["a", "b"], ["q"]]), "y": Series([["z"], ["q", "t"]])}) + df.index = MultiIndex.from_tuples([("i0", "j0"), ("i1", "j1")]) + + result = df.apply(lambda row: [el for el in row["x"] if el in row["y"]], axis=1) + expected = Series([[], ["q"]], index=df.index) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_columns(): + # GH 18573 + + df = DataFrame( + { + "number": [1.0, 2.0], + "string": ["foo", "bar"], + "datetime": [ + Timestamp("2017-11-29 03:30:00"), + Timestamp("2017-11-29 03:45:00"), + ], + } + ) + result = df.apply(lambda row: (row.number, row.string), axis=1) + expected = Series([(t.number, t.string) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_listlike_columns(): + # GH 16353 + + df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("val", [1, 2]) +def test_infer_output_shape_listlike_columns_np_func(val): + # GH 17970 + df = DataFrame({"a": [1, 2, 3]}, index=list("abc")) + + result = df.apply(lambda row: np.ones(val), axis=1) + expected = Series([np.ones(val) for t in df.itertuples()], index=df.index) + tm.assert_series_equal(result, expected) + + +def test_infer_output_shape_listlike_columns_with_timestamp(): + # GH 17892 + df = DataFrame( + { + "a": [ + Timestamp("2010-02-01"), + Timestamp("2010-02-04"), + Timestamp("2010-02-05"), + Timestamp("2010-02-06"), + ], + "b": [9, 5, 4, 3], + "c": [5, 3, 4, 2], + "d": [1, 2, 3, 4], + } + ) + + def fun(x): + return (1, 2) + + result = df.apply(fun, axis=1) + expected = Series([(1, 2) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("lst", [[1, 2, 3], [1, 2]]) +def test_consistent_coerce_for_shapes(lst): + # we want column names to NOT be propagated + # just because the shape matches the input shape + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: lst, axis=1) + expected = Series([lst for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + +def test_consistent_names(int_frame_const_col): + # if a Series is returned, we should use the resulting index names + df = int_frame_const_col + + result = df.apply( + lambda x: Series([1, 2, 3], index=["test", "other", "cols"]), axis=1 + ) + expected = int_frame_const_col.rename( + columns={"A": "test", "B": "other", "C": "cols"} + ) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: Series([1, 2], index=["test", "other"]), axis=1) + expected = expected[["test", "other"]] + tm.assert_frame_equal(result, expected) + + +def test_result_type(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + expected = df.copy() + expected.columns = [0, 1, 2] + tm.assert_frame_equal(result, expected) + + +def test_result_type_shorter_list(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + result = df.apply(lambda x: [1, 2], axis=1, result_type="expand") + expected = df[["A", "B"]].copy() + expected.columns = [0, 1] + tm.assert_frame_equal(result, expected) + + +def test_result_type_broadcast(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # broadcast result + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_broadcast_series_func(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + columns = ["other", "col", "names"] + result = df.apply( + lambda x: Series([1, 2, 3], index=columns), axis=1, result_type="broadcast" + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_series_result(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # series result + result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_result_type_series_result_other_index(int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + # series result with other index + columns = ["other", "col", "names"] + result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1) + expected = df.copy() + expected.columns = columns + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "box", + [lambda x: list(x), lambda x: tuple(x), lambda x: np.array(x, dtype="int64")], + ids=["list", "tuple", "array"], +) +def test_consistency_for_boxed(box, int_frame_const_col): + # passing an array or list should not affect the output shape + df = int_frame_const_col + + result = df.apply(lambda x: box([1, 2]), axis=1) + expected = Series([box([1, 2]) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: box([1, 2]), axis=1, result_type="expand") + expected = int_frame_const_col[["A", "B"]].rename(columns={"A": 0, "B": 1}) + tm.assert_frame_equal(result, expected) + + +def test_agg_transform(axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + + with np.errstate(all="ignore"): + + f_abs = np.abs(float_frame) + f_sqrt = np.sqrt(float_frame) + + # ufunc + expected = f_sqrt.copy() + result = float_frame.apply(np.sqrt, axis=axis) + tm.assert_frame_equal(result, expected) + + # list-like + result = float_frame.apply([np.sqrt], axis=axis) + expected = f_sqrt.copy() + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]]) + else: + expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + result = float_frame.apply([np.abs, np.sqrt], axis=axis) + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [float_frame.columns, ["absolute", "sqrt"]] + ) + else: + expected.index = MultiIndex.from_product( + [float_frame.index, ["absolute", "sqrt"]] + ) + tm.assert_frame_equal(result, expected) + + +def test_demo(): + # demonstration tests + df = DataFrame({"A": range(5), "B": 5}) + + result = df.agg(["min", "max"]) + expected = DataFrame( + {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] + ) + tm.assert_frame_equal(result, expected) + + +def test_demo_dict_agg(): + # demonstration tests + df = DataFrame({"A": range(5), "B": 5}) + result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]}) + expected = DataFrame( + {"A": [4.0, 0.0, np.nan], "B": [5.0, np.nan, 25.0]}, + columns=["A", "B"], + index=["max", "min", "sum"], + ) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + +def test_agg_with_name_as_column_name(): + # GH 36212 - Column name is "name" + data = {"name": ["foo", "bar"]} + df = DataFrame(data) + + # result's name should be None + result = df.agg({"name": "count"}) + expected = Series({"name": 2}) + tm.assert_series_equal(result, expected) + + # Check if name is still preserved when aggregating series instead + result = df["name"].agg({"name": "count"}) + expected = Series({"name": 2}, name="name") + tm.assert_series_equal(result, expected) + + +def test_agg_multiple_mixed_no_warning(): + # GH 20909 + mdf = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": date_range("20130101", periods=3), + } + ) + expected = DataFrame( + { + "A": [1, 6], + "B": [1.0, 6.0], + "C": ["bar", "foobarbaz"], + "D": [Timestamp("2013-01-01"), pd.NaT], + }, + index=["min", "sum"], + ) + # sorted index + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = mdf.agg(["min", "sum"]) + + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) + + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) + tm.assert_frame_equal(result, expected) + + +def test_agg_reduce(axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() + + # all reducers + expected = pd.concat( + [ + float_frame.mean(axis=axis), + float_frame.max(axis=axis), + float_frame.sum(axis=axis), + ], + axis=1, + ) + expected.columns = ["mean", "max", "sum"] + expected = expected.T if axis in {0, "index"} else expected + + result = float_frame.agg(["mean", "max", "sum"], axis=axis) + tm.assert_frame_equal(result, expected) + + # dict input with scalars + func = {name1: "mean", name2: "sum"} + result = float_frame.agg(func, axis=axis) + expected = Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name2].sum(), + ], + index=[name1, name2], + ) + tm.assert_series_equal(result, expected) + + # dict input with lists + func = {name1: ["mean"], name2: ["sum"]} + result = float_frame.agg(func, axis=axis) + expected = DataFrame( + { + name1: Series([float_frame.loc(other_axis)[name1].mean()], index=["mean"]), + name2: Series([float_frame.loc(other_axis)[name2].sum()], index=["sum"]), + } + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + # dict input with lists with multiple + func = {name1: ["mean", "sum"], name2: ["sum", "max"]} + result = float_frame.agg(func, axis=axis) + expected = pd.concat( + { + name1: Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name1].sum(), + ], + index=["mean", "sum"], + ), + name2: Series( + [ + float_frame.loc(other_axis)[name2].sum(), + float_frame.loc(other_axis)[name2].max(), + ], + index=["sum", "max"], + ), + }, + axis=1, + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + +def test_nuiscance_columns(): + + # GH 15015 + df = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": date_range("20130101", periods=3), + } + ) + + result = df.agg("min") + expected = Series([1, 1.0, "bar", Timestamp("20130101")], index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.agg(["min"]) + expected = DataFrame( + [[1, 1.0, "bar", Timestamp("20130101")]], + index=["min"], + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.agg("sum") + expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['D'\] did not aggregate successfully" + ): + result = df.agg(["sum"]) + expected = DataFrame( + [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["agg", "apply", "transform"]) +def test_numeric_only_warning_numpy(method): + # GH#50538 + df = DataFrame({"a": [1, 1, 2], "b": list("xyz")}) + if method == "agg": + msg = "The operation ", ""] + ) + + tm.assert_frame_equal(result, expected) + + +def test_apply_raw_returns_string(): + # https://github.com/pandas-dev/pandas/issues/35940 + df = DataFrame({"A": ["aa", "bbb"]}) + result = df.apply(lambda x: x[0], axis=1, raw=True) + expected = Series(["aa", "bbb"]) + tm.assert_series_equal(result, expected) + + +def test_aggregation_func_column_order(): + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + df = DataFrame( + [ + ("1", 1, 0, 0), + ("2", 2, 0, 0), + ("3", 3, 0, 0), + ("4", 4, 5, 4), + ("5", 5, 6, 6), + ("6", 6, 7, 7), + ], + columns=("item", "att1", "att2", "att3"), + ) + + def foo(s): + return s.sum() / 2 + + aggs = ["sum", foo, "count", "min"] + with tm.assert_produces_warning( + FutureWarning, match=r"\['item'\] did not aggregate successfully" + ): + result = df.agg(aggs) + expected = DataFrame( + { + "item": ["123456", np.nan, 6, "1"], + "att1": [21.0, 10.5, 6.0, 1.0], + "att2": [18.0, 9.0, 6.0, 0.0], + "att3": [17.0, 8.5, 6.0, 0.0], + }, + index=["sum", "foo", "count", "min"], + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_getitem_axis_1(): + # GH 13427 + df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]}) + result = df[["a", "a"]].apply(lambda x: x[0] + x[1], axis=1) + expected = Series([0, 2, 4]) + tm.assert_series_equal(result, expected) + + +def test_nuisance_depr_passes_through_warnings(): + # GH 43740 + # DataFrame.agg with list-likes may emit warnings for both individual + # args and for entire columns, but we only want to emit once. We + # catch and suppress the warnings for individual args, but need to make + # sure if some other warnings were raised, they get passed through to + # the user. + + def foo(x): + warnings.warn("Hello, World!") + return x.sum() + + df = DataFrame({"a": [1, 2, 3]}) + with tm.assert_produces_warning(UserWarning, match="Hello, World!"): + df.agg([foo]) + + +def test_apply_type(): + # GH 46719 + df = DataFrame( + {"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]}, + index=["a", "b", "c"], + ) + + # applymap + result = df.applymap(type) + expected = DataFrame( + {"col1": [int, str, type], "col2": [float, datetime, float]}, + index=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + # axis=0 + result = df.apply(type, axis=0) + expected = Series({"col1": Series, "col2": Series}) + tm.assert_series_equal(result, expected) + + # axis=1 + result = df.apply(type, axis=1) + expected = Series({"a": Series, "b": Series, "c": Series}) + tm.assert_series_equal(result, expected) + + +def test_apply_on_empty_dataframe(): + # GH 39111 + df = DataFrame({"a": [1, 2], "b": [3, 0]}) + result = df.head(0).apply(lambda x: max(x["a"], x["b"]), axis=1) + expected = Series([], dtype=np.float64) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "test, constant", + [ + ({"a": [1, 2, 3], "b": [1, 1, 1]}, {"a": [1, 2, 3], "b": [1]}), + ({"a": [2, 2, 2], "b": [1, 1, 1]}, {"a": [2], "b": [1]}), + ], +) +def test_unique_agg_type_is_series(test, constant): + # GH#22558 + df1 = DataFrame(test) + expected = Series(data=constant, index=["a", "b"], dtype="object") + aggregation = {"a": "unique", "b": "unique"} + + result = df1.agg(aggregation) + + tm.assert_series_equal(result, expected) + + +def test_any_non_keyword_deprecation(): + df = DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) + msg = ( + "In a future version of pandas all arguments of " + "DataFrame.any and Series.any will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.any("index", None) + expected = Series({"A": True, "B": True, "C": False}) + tm.assert_series_equal(result, expected) + + s = Series([False, False, False]) + msg = ( + "In a future version of pandas all arguments of " + "DataFrame.any and Series.any will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.any("index") + expected = False + tm.assert_equal(result, expected) + + +def test_any_apply_keyword_non_zero_axis_regression(): + # https://github.com/pandas-dev/pandas/issues/48656 + df = DataFrame({"A": [1, 2, 0], "B": [0, 2, 0], "C": [0, 0, 0]}) + expected = Series([True, True, False]) + tm.assert_series_equal(df.any(axis=1), expected) + + result = df.apply("any", axis=1) + tm.assert_series_equal(result, expected) + + result = df.apply("any", 1) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py new file mode 100644 index 00000000..2da4a789 --- /dev/null +++ b/pandas/tests/apply/test_frame_apply_relabeling.py @@ -0,0 +1,97 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_agg_relabel(): + # GH 26513 + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + + # simplest case with one column, one func + result = df.agg(foo=("B", "sum")) + expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"])) + tm.assert_frame_equal(result, expected) + + # test on same column with different methods + result = df.agg(foo=("B", "sum"), bar=("B", "min")) + expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"])) + + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_multi_columns_multi_methods(): + # GH 26513, test on multiple columns with multiple methods + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + result = df.agg( + foo=("A", "sum"), + bar=("B", "mean"), + cat=("A", "min"), + dat=("B", "max"), + f=("A", "max"), + g=("C", "min"), + ) + expected = pd.DataFrame( + { + "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan], + "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0], + }, + index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_partial_functions(): + # GH 26513, test on partial, functools or more complex cases + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) + result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) + expected = pd.DataFrame( + {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.agg( + foo=("A", min), + bar=("A", np.min), + cat=("B", max), + dat=("C", "min"), + f=("B", np.sum), + kk=("B", lambda x: min(x)), + ) + expected = pd.DataFrame( + { + "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0], + "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan], + }, + index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_namedtuple(): + # GH 26513 + df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + result = df.agg( + foo=pd.NamedAgg("B", "sum"), + bar=pd.NamedAgg("B", min), + cat=pd.NamedAgg(column="B", aggfunc="count"), + fft=pd.NamedAgg("B", aggfunc="max"), + ) + + expected = pd.DataFrame( + {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.agg( + foo=pd.NamedAgg("A", "min"), + bar=pd.NamedAgg(column="B", aggfunc="max"), + cat=pd.NamedAgg(column="A", aggfunc="max"), + ) + expected = pd.DataFrame( + {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]}, + index=pd.Index(["foo", "bar", "cat"]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py new file mode 100644 index 00000000..2d4deff8 --- /dev/null +++ b/pandas/tests/apply/test_frame_transform.py @@ -0,0 +1,258 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.tests.apply.common import frame_transform_kernels +from pandas.tests.frame.common import zip_frames + + +def unpack_obj(obj, klass, axis): + """ + Helper to ensure we have the right type of object for a test parametrized + over frame_or_series. + """ + if klass is not DataFrame: + obj = obj["A"] + if axis != 0: + pytest.skip(f"Test is only for DataFrame with axis={axis}") + return obj + + +def test_transform_ufunc(axis, float_frame, frame_or_series): + # GH 35964 + obj = unpack_obj(float_frame, frame_or_series, axis) + + with np.errstate(all="ignore"): + f_sqrt = np.sqrt(obj) + + # ufunc + result = obj.transform(np.sqrt, axis=axis) + expected = f_sqrt + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_transform_listlike(axis, float_frame, ops, names): + # GH 35964 + other_axis = 1 if axis in {0, "index"} else 0 + with np.errstate(all="ignore"): + expected = zip_frames([op(float_frame) for op in ops], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product([float_frame.columns, names]) + else: + expected.index = MultiIndex.from_product([float_frame.index, names]) + result = float_frame.transform(ops, axis=axis) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ops", [[], np.array([])]) +def test_transform_empty_listlike(float_frame, ops, frame_or_series): + obj = unpack_obj(float_frame, frame_or_series, 0) + + with pytest.raises(ValueError, match="No transform functions were provided"): + obj.transform(ops) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(axis, float_frame, box): + # GH 35964 + if axis == 0 or axis == "index": + e = float_frame.columns[0] + expected = float_frame[[e]].transform(np.abs) + else: + e = float_frame.index[0] + expected = float_frame.iloc[[0]].transform(np.abs) + result = float_frame.transform(box({e: np.abs}), axis=axis) + tm.assert_frame_equal(result, expected) + + +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {}, + {"A": []}, + {"A": [], "B": "cumsum"}, + {"A": "cumsum", "B": []}, + {"A": [], "B": ["cumsum"]}, + {"A": ["cumsum"], "B": []}, + ], +) +def test_transform_empty_dictlike(float_frame, ops, frame_or_series): + obj = unpack_obj(float_frame, frame_or_series, 0) + + with pytest.raises(ValueError, match="No transform functions were provided"): + obj.transform(ops) + + +@pytest.mark.parametrize("use_apply", [True, False]) +def test_transform_udf(axis, float_frame, use_apply, frame_or_series): + # GH 35964 + obj = unpack_obj(float_frame, frame_or_series, axis) + + # transform uses UDF either via apply or passing the entire DataFrame + def func(x): + # transform is using apply iff x is not a DataFrame + if use_apply == isinstance(x, frame_or_series): + # Force transform to fallback + raise ValueError + return x + 1 + + result = obj.transform(func, axis=axis) + expected = obj + 1 + tm.assert_equal(result, expected) + + +wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] +frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] + + +@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) +def test_transform_bad_dtype(op, frame_or_series, request): + # GH 35964 + if op == "rank": + request.node.add_marker( + pytest.mark.xfail( + raises=ValueError, reason="GH 40418: rank does not raise a TypeError" + ) + ) + elif op == "ngroup": + request.node.add_marker( + pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + ) + + obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms + obj = tm.get_obj(obj, frame_or_series) + + # tshift is deprecated + warn = None if op != "tshift" else FutureWarning + with tm.assert_produces_warning(warn): + with pytest.raises(TypeError, match="unsupported operand|not supported"): + obj.transform(op) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform([op]) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": op}) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": [op]}) + + +@pytest.mark.parametrize("op", frame_kernels_raise) +def test_transform_partial_failure_typeerror(request, op): + # GH 35964 + + if op == "ngroup": + request.node.add_marker( + pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + ) + + # Using object makes most transform kernels fail + df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + + expected = df[["B"]].transform([op]) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully" + + def op(x): + if np.sum(np.sum(x)) < 10: + raise ValueError + return x + + df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + + expected = df[["B"]].transform([op]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("use_apply", [True, False]) +def test_transform_passes_args(use_apply, frame_or_series): + # GH 35964 + # transform uses UDF either via apply or passing the entire DataFrame + expected_args = [1, 2] + expected_kwargs = {"c": 3} + + def f(x, a, b, c): + # transform is using apply iff x is not a DataFrame + if use_apply == isinstance(x, frame_or_series): + # Force transform to fallback + raise ValueError + assert [a, b] == expected_args + assert c == expected_kwargs["c"] + return x + + frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs) + + +def test_transform_empty_dataframe(): + # https://github.com/pandas-dev/pandas/issues/39636 + df = DataFrame([], columns=["col1", "col2"]) + result = df.transform(lambda x: x + 10) + tm.assert_frame_equal(result, df) + + result = df["col1"].transform(lambda x: x + 10) + tm.assert_series_equal(result, df["col1"]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py new file mode 100644 index 00000000..5a498aa7 --- /dev/null +++ b/pandas/tests/apply/test_invalid_arg.py @@ -0,0 +1,369 @@ +# Tests specifically aimed at detecting bad arguments. +# This file is organized by reason for exception. +# 1. always invalid argument values +# 2. missing column(s) +# 3. incompatible ops/dtype/args/kwargs +# 4. invalid result shape/type +# If your test does not fit into one of these categories, add to this list. + +from itertools import chain +import re + +import numpy as np +import pytest + +from pandas.errors import SpecificationError + +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, + notna, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("result_type", ["foo", 1]) +def test_result_type_error(result_type, int_frame_const_col): + # allowed result_type + df = int_frame_const_col + + msg = ( + "invalid value for result_type, must be one of " + "{None, 'reduce', 'broadcast', 'expand'}" + ) + with pytest.raises(ValueError, match=msg): + df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type) + + +def test_apply_invalid_axis_value(): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.apply(lambda x: x, 2) + + +def test_applymap_invalid_na_action(float_frame): + # GH 23803 + with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"): + float_frame.applymap(lambda x: len(str(x)), na_action="abc") + + +def test_agg_raises(): + # GH 26513 + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + msg = "Must provide" + + with pytest.raises(TypeError, match=msg): + df.agg() + + +def test_map_with_invalid_na_action_raises(): + # https://github.com/pandas-dev/pandas/issues/32815 + s = Series([1, 2, 3]) + msg = "na_action must either be 'ignore' or None" + with pytest.raises(ValueError, match=msg): + s.map(lambda x: x, na_action="____") + + +@pytest.mark.parametrize("input_na_action", ["____", True]) +def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): + # https://github.com/pandas-dev/pandas/issues/46588 + s = Series([1, 2, 3]) + msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed" + with pytest.raises(ValueError, match=msg): + s.map({1: 2}, na_action=input_na_action) + + +def test_map_categorical_na_action(): + values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + s = Series(values, name="XX", index=list("abcdefg")) + with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): + s.map(lambda x: x, na_action="ignore") + + +def test_map_datetimetz_na_action(): + values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo") + s = Series(values, name="XX") + with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): + s.map(lambda x: x, na_action="ignore") + + +@pytest.mark.parametrize("box", [DataFrame, Series]) +@pytest.mark.parametrize("method", ["apply", "agg", "transform"]) +@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) +def test_nested_renamer(box, method, func): + # GH 35964 + obj = box({"A": [1]}) + match = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=match): + getattr(obj, method)(func) + + +@pytest.mark.parametrize( + "renamer", + [{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}], +) +def test_series_nested_renamer(renamer): + s = Series(range(6), dtype="int64", name="series") + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + s.agg(renamer) + + +def test_apply_dict_depr(): + + tsdf = DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=date_range("1/1/2000", periods=10), + ) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + tsdf.A.agg({"foo": ["sum", "mean"]}) + + +@pytest.mark.parametrize("method", ["agg", "transform"]) +def test_dict_nested_renaming_depr(method): + + df = DataFrame({"A": range(5), "B": 5}) + + # nested renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}}) + + +@pytest.mark.parametrize("method", ["apply", "agg", "transform"]) +@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}]) +def test_missing_column(method, func): + # GH 40004 + obj = DataFrame({"A": [1]}) + match = re.escape("Column(s) ['B'] do not exist") + with pytest.raises(KeyError, match=match): + getattr(obj, method)(func) + + +def test_transform_mixed_column_name_dtypes(): + # GH39025 + df = DataFrame({"a": ["1"]}) + msg = r"Column\(s\) \[1, 'b'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.transform({"a": int, 1: str, "b": int}) + + +@pytest.mark.parametrize( + "how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)] +) +def test_apply_str_axis_1_raises(how, args): + # GH 39211 - some ops don't support axis=1 + df = DataFrame({"a": [1, 2], "b": [3, 4]}) + msg = f"Operation {how} does not support axis=1" + with pytest.raises(ValueError, match=msg): + df.apply(how, axis=1, args=args) + + +def test_transform_axis_1_raises(): + # GH 35964 + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + Series([1]).transform("sum", axis=1) + + +def test_apply_modify_traceback(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + data.loc[4, "C"] = np.nan + + def transform(row): + if row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + def transform2(row): + if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + msg = "'float' object has no attribute 'startswith'" + with pytest.raises(AttributeError, match=msg): + data.apply(transform, axis=1) + + +@pytest.mark.parametrize( + "df, func, expected", + tm.get_cython_table_params( + DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]] + ), +) +def test_agg_cython_table_raises_frame(df, func, expected, axis): + # GH 21224 + msg = "can't multiply sequence by non-int of type 'str'" + with pytest.raises(expected, match=msg): + df.agg(func, axis=axis) + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series("a b c".split()), + [ + ("mean", TypeError), # mean raises TypeError + ("prod", TypeError), + ("std", TypeError), + ("var", TypeError), + ("median", TypeError), + ("cumprod", TypeError), + ], + ) + ), +) +def test_agg_cython_table_raises_series(series, func, expected): + # GH21224 + msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type" + with pytest.raises(expected, match=msg): + # e.g. Series('a b'.split()).cumprod() will raise + series.agg(func) + + +def test_agg_none_to_type(): + # GH 40543 + df = DataFrame({"a": [None]}) + msg = re.escape("int() argument must be a string") + with pytest.raises(TypeError, match=msg): + df.agg({"a": int}) + + +def test_transform_none_to_type(): + # GH#34377 + df = DataFrame({"a": [None]}) + msg = "Transform function failed" + with pytest.raises(TypeError, match=msg): + df.transform({"a": int}) + + +@pytest.mark.parametrize( + "func", + [ + lambda x: np.array([1, 2]).reshape(-1, 2), + lambda x: [1, 2], + lambda x: Series([1, 2]), + ], +) +def test_apply_broadcast_error(int_frame_const_col, func): + df = int_frame_const_col + + # > 1 ndim + msg = "too many dims to broadcast|cannot broadcast result" + with pytest.raises(ValueError, match=msg): + df.apply(func, axis=1, result_type="broadcast") + + +def test_transform_and_agg_err_agg(axis, float_frame): + # cannot both transform and agg + msg = "cannot combine transform and aggregation operations" + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + float_frame.agg(["max", "sqrt"], axis=axis) + + +@pytest.mark.parametrize( + "func, msg", + [ + (["sqrt", "max"], "cannot combine transform and aggregation"), + ( + {"foo": np.sqrt, "bar": "sum"}, + "cannot perform both aggregation and transformation", + ), + ], +) +def test_transform_and_agg_err_series(string_series, func, msg): + # we are trying to transform with an aggregator + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + string_series.agg(func) + + +@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) +def test_transform_wont_agg_frame(axis, float_frame, func): + # GH 35964 + # cannot both transform and agg + msg = "Function did not transform" + with pytest.raises(ValueError, match=msg): + float_frame.transform(func, axis=axis) + + +@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]]) +def test_transform_wont_agg_series(string_series, func): + # GH 35964 + # we are trying to transform with an aggregator + msg = "Function did not transform" + + warn = RuntimeWarning if func[0] == "sqrt" else None + warn_msg = "invalid value encountered in sqrt" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(warn, match=warn_msg): + string_series.transform(func) + + +@pytest.mark.parametrize( + "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] +) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): + # GH 35964 + op = op_wrapper(all_reductions) + + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + msg = "Function did not transform" + with pytest.raises(ValueError, match=msg): + obj.transform(op) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py new file mode 100644 index 00000000..afe1c236 --- /dev/null +++ b/pandas/tests/apply/test_series_apply.py @@ -0,0 +1,927 @@ +from collections import ( + Counter, + defaultdict, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + isna, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.apply.common import series_transform_kernels + + +def test_series_map_box_timedelta(): + # GH#11349 + ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) + + def f(x): + return x.total_seconds() + + ser.map(f) + ser.apply(f) + DataFrame(ser).applymap(f) + + +def test_apply(datetime_series): + with np.errstate(all="ignore"): + tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series)) + + # element-wise apply + import math + + tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series)) + + # empty series + s = Series(dtype=object, name="foo", index=Index([], name="bar")) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + # check all metadata (GH 9322) + assert s is not rs + assert s.index is rs.index + assert s.dtype == rs.dtype + assert s.name == rs.name + + # index but no data + s = Series(index=[1, 2, 3], dtype=np.float64) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + +def test_apply_same_length_inference_bug(): + s = Series([1, 2]) + + def f(x): + return (x, x + 1) + + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + s = Series([1, 2, 3]) + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + +def test_apply_dont_convert_dtype(): + s = Series(np.random.randn(10)) + + def f(x): + return x if x > 0 else np.nan + + result = s.apply(f, convert_dtype=False) + assert result.dtype == object + + +def test_apply_args(): + s = Series(["foo,bar"]) + + result = s.apply(str.split, args=(",",)) + assert result[0] == ["foo", "bar"] + assert isinstance(result[0], list) + + +@pytest.mark.parametrize( + "args, kwargs, increment", + [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], +) +def test_agg_args(args, kwargs, increment): + # GH 43357 + def f(x, a=0, b=0, c=0): + return x + a + 10 * b + 100 * c + + s = Series([1, 2]) + result = s.agg(f, 0, *args, **kwargs) + expected = s + increment + tm.assert_series_equal(result, expected) + + +def test_series_map_box_timestamps(): + # GH#2689, GH#2627 + ser = Series(pd.date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + ser.map(func) + ser.apply(func) + + +def test_series_map_stringdtype(any_string_dtype): + # map test on StringDType, GH#40823 + ser1 = Series( + data=["cat", "dog", "rabbit"], + index=["id1", "id2", "id3"], + dtype=any_string_dtype, + ) + ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype) + result = ser2.map(ser1) + expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype) + + tm.assert_series_equal(result, expected) + + +def test_apply_box(): + # ufunc will not be boxed. Same test cases as the test_map_box + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + +def test_apply_datetimetz(): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = Series(values, name="XX") + + result = s.apply(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.apply(lambda x: x.hour) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + +def test_apply_categorical(): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + ser = Series(values, name="XX", index=list("abcdefg")) + result = ser.apply(lambda x: x.lower()) + + # should be categorical dtype when the number of categories are + # the same + values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = Series(values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp.values) + + result = ser.apply(lambda x: "A") + exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == object + + +@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) +def test_apply_categorical_with_nan_values(series): + # GH 20714 bug fixed in: GH 24275 + s = Series(series, dtype="category") + result = s.apply(lambda x: x.split("-")[0]) + result = result.astype(object) + expected = Series(["1", "1", np.NaN], dtype="category") + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + + +def test_apply_empty_integer_series_with_datetime_index(): + # GH 21245 + s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int) + result = s.apply(lambda x: x) + tm.assert_series_equal(result, s) + + +def test_transform(string_series): + # transforming functions + + with np.errstate(all="ignore"): + + f_sqrt = np.sqrt(string_series) + f_abs = np.abs(string_series) + + # ufunc + result = string_series.apply(np.sqrt) + expected = f_sqrt.copy() + tm.assert_series_equal(result, expected) + + # list-like + result = string_series.apply([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ["sqrt"] + tm.assert_frame_equal(result, expected) + + result = string_series.apply(["sqrt"]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = concat([f_sqrt, f_abs], axis=1) + expected.columns = ["sqrt", "absolute"] + result = string_series.apply([np.sqrt, np.abs]) + tm.assert_frame_equal(result, expected) + + # dict, provide renaming + expected = concat([f_sqrt, f_abs], axis=1) + expected.columns = ["foo", "bar"] + expected = expected.unstack().rename("series") + + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) + tm.assert_series_equal(result.reindex_like(expected), expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_partial_failure(op, request): + # GH 35964 + if op in ("ffill", "bfill", "pad", "backfill", "shift"): + request.node.add_marker( + pytest.mark.xfail( + raises=AssertionError, reason=f"{op} is successful on any dtype" + ) + ) + + # Using object makes most transform kernels fail + ser = Series(3 * [object]) + + expected = ser.transform(["shift"]) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([op, "shift"]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": "shift"}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": op, "B": "shift"}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": ["shift"]}) + match = r"\['A'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [op], "B": ["shift"]}) + tm.assert_equal(result, expected) + + match = r"\['B'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + expected = ser.transform({"A": ["shift"], "B": [op]}) + match = rf"\['{op}'\] did not transform successfully" + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully" + + def noop(x): + return x + + def raising_op(_): + raise ValueError + + ser = Series(3 * [object]) + + expected = ser.transform([noop]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([noop, raising_op]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": noop}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": raising_op, "B": noop}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + + expected = ser.transform({"A": [noop], "B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [noop, raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + + +def test_demo(): + # demonstration tests + s = Series(range(6), dtype="int64", name="series") + + result = s.agg(["min", "max"]) + expected = Series([0, 5], index=["min", "max"], name="series") + tm.assert_series_equal(result, expected) + + result = s.agg({"foo": "min"}) + expected = Series([0], index=["foo"], name="series") + tm.assert_series_equal(result, expected) + + +def test_agg_apply_evaluate_lambdas_the_same(string_series): + # test that we are evaluating row-by-row first + # before vectorized evaluation + result = string_series.apply(lambda x: str(x)) + expected = string_series.agg(lambda x: str(x)) + tm.assert_series_equal(result, expected) + + result = string_series.apply(str) + expected = string_series.agg(str) + tm.assert_series_equal(result, expected) + + +def test_with_nested_series(datetime_series): + # GH 2316 + # .agg with a reducer and a transform, what to do + result = datetime_series.apply(lambda x: Series([x, x**2], index=["x", "x^2"])) + expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) + tm.assert_frame_equal(result, expected) + + result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) + tm.assert_frame_equal(result, expected) + + +def test_replicate_describe(string_series): + # this also tests a result set that is all scalars + expected = string_series.describe() + result = string_series.apply( + { + "count": "count", + "mean": "mean", + "std": "std", + "min": "min", + "25%": lambda x: x.quantile(0.25), + "50%": "median", + "75%": lambda x: x.quantile(0.75), + "max": "max", + } + ) + tm.assert_series_equal(result, expected) + + +def test_reduce(string_series): + # reductions with named functions + result = string_series.agg(["sum", "mean"]) + expected = Series( + [string_series.sum(), string_series.mean()], + ["sum", "mean"], + name=string_series.name, + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_non_callable_aggregates(how): + # test agg using non-callable series attributes + # GH 39116 - expand to apply + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = getattr(s, how)("size") + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = getattr(s, how)(["size", "count", "mean"]) + expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) + tm.assert_series_equal(result, expected) + + +def test_series_apply_no_suffix_index(): + # GH36189 + s = Series([4] * 3) + result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = Series([12, 12, 12], index=["sum", "", ""]) + + tm.assert_series_equal(result, expected) + + +def test_map(datetime_series): + index, data = tm.getMixedTypeDict() + + source = Series(data["B"], index=data["C"]) + target = Series(data["C"][:4], index=data["D"][:4]) + + merged = target.map(source) + + for k, v in merged.items(): + assert v == source[target[k]] + + # input could be a dict + merged = target.map(source.to_dict()) + + for k, v in merged.items(): + assert v == source[target[k]] + + # function + result = datetime_series.map(lambda x: x * 2) + tm.assert_series_equal(result, datetime_series * 2) + + # GH 10324 + a = Series([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = Series(["odd", "even", "odd", np.nan], dtype="category") + tm.assert_series_equal(a.map(b), exp) + exp = Series(["odd", "even", "odd", np.nan]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) + c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) + + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series( + ["B", "C", "D", "E"], + dtype="category", + index=pd.CategoricalIndex(["b", "c", "d", "e"]), + ) + c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) + + exp = Series( + pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"]) + ) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, "B", "C", "D"]) + tm.assert_series_equal(a.map(c), exp) + + +def test_map_empty(request, index): + if isinstance(index, MultiIndex): + request.node.add_marker( + pytest.mark.xfail( + reason="Initializing a Series from a MultiIndex is not supported" + ) + ) + + s = Series(index) + result = s.map({}) + + expected = Series(np.nan, index=s.index) + tm.assert_series_equal(result, expected) + + +def test_map_compat(): + # related GH 8024 + s = Series([True, True, False], index=[1, 2, 3]) + result = s.map({True: "foo", False: "bar"}) + expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +def test_map_int(): + left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) + right = Series({1: 11, 2: 22, 3: 33}) + + assert left.dtype == np.float_ + assert issubclass(right.dtype.type, np.integer) + + merged = left.map(right) + assert merged.dtype == np.float_ + assert isna(merged["d"]) + assert not isna(merged["c"]) + + +def test_map_type_inference(): + s = Series(range(3)) + s2 = s.map(lambda x: np.where(x == 0, 0, 1)) + assert issubclass(s2.dtype.type, np.integer) + + +def test_map_decimal(string_series): + from decimal import Decimal + + result = string_series.map(lambda x: Decimal(str(x))) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + +def test_map_na_exclusion(): + s = Series([1.5, np.nan, 3, np.nan, 5]) + + result = s.map(lambda x: x * 2, na_action="ignore") + exp = s * 2 + tm.assert_series_equal(result, exp) + + +def test_map_dict_with_tuple_keys(): + """ + Due to new MultiIndex-ing behaviour in v0.14.0, + dicts with tuple keys passed to map were being + converted to a multi-index, preventing tuple values + from being mapped properly. + """ + # GH 18496 + df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) + label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} + + df["labels"] = df["a"].map(label_mappings) + df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) + # All labels should be filled now + tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) + + +def test_map_counter(): + s = Series(["a", "b", "c"], index=[1, 2, 3]) + counter = Counter() + counter["b"] = 5 + counter["c"] += 1 + result = s.map(counter) + expected = Series([0, 5, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +def test_map_defaultdict(): + s = Series([1, 2, 3], index=["a", "b", "c"]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = s.map(default_dict) + expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + +def test_map_dict_na_key(): + # https://github.com/pandas-dev/pandas/issues/17648 + # Checks that np.nan key is appropriately mapped + s = Series([1, 2, np.nan]) + expected = Series(["a", "b", "c"]) + result = s.map({1: "a", 2: "b", np.nan: "c"}) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("arg_func", [dict, Series]) +def test_map_dict_ignore_na(arg_func): + # GH#47527 + mapping = arg_func({1: 10, np.nan: 42}) + ser = Series([1, np.nan, 2]) + result = ser.map(mapping, na_action="ignore") + expected = Series([10, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_map_defaultdict_ignore_na(): + # GH#47527 + mapping = defaultdict(int, {1: 10, np.nan: 42}) + ser = Series([1, np.nan, 2]) + result = ser.map(mapping) + expected = Series([10, 0, 0]) + tm.assert_series_equal(result, expected) + + +def test_map_categorical_na_ignore(): + # GH#47527 + values = pd.Categorical([1, np.nan, 2], categories=[10, 1]) + ser = Series(values) + result = ser.map({1: 10, np.nan: 42}) + expected = Series([10, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_map_dict_subclass_with_missing(): + """ + Test Series.map with a dictionary subclass that defines __missing__, + i.e. sets a default value (GH #15999). + """ + + class DictWithMissing(dict): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + dictionary = DictWithMissing({3: "three"}) + result = s.map(dictionary) + expected = Series(["missing", "missing", "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_dict_subclass_without_missing(): + class DictWithoutMissing(dict): + pass + + s = Series([1, 2, 3]) + dictionary = DictWithoutMissing({3: "three"}) + result = s.map(dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_abc_mapping(non_dict_mapping_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + s = Series([1, 2, 3]) + not_a_dictionary = non_dict_mapping_subclass({3: "three"}) + result = s.map(not_a_dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_abc_mapping_with_missing(non_dict_mapping_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + class NonDictMappingWithMissing(non_dict_mapping_subclass): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + not_a_dictionary = NonDictMappingWithMissing({3: "three"}) + result = s.map(not_a_dictionary) + # __missing__ is a dict concept, not a Mapping concept, + # so it should not change the result! + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + +def test_map_box(): + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + +def test_map_categorical(): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + s = Series(values, name="XX", index=list("abcdefg")) + + result = s.map(lambda x: x.lower()) + exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = Series(exp_values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp_values) + + result = s.map(lambda x: "A") + exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == object + + +def test_map_datetimetz(): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = Series(values, name="XX") + + # keep tz + result = s.map(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.map(lambda x: x.hour) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + +@pytest.mark.parametrize( + "vals,mapping,exp", + [ + (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]), + (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3), + (list(range(3)), {0: 42}, [42] + [np.nan] * 3), + ], +) +def test_map_missing_mixed(vals, mapping, exp): + # GH20495 + s = Series(vals + [np.nan]) + result = s.map(mapping) + + tm.assert_series_equal(result, Series(exp)) + + +@pytest.mark.parametrize( + "dti,exp", + [ + ( + Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), + DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), + ), + ( + tm.makeTimeSeries(nper=30), + DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), + ), + ], +) +@pytest.mark.parametrize("aware", [True, False]) +def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): + # GH 25959 + # Calling apply on a localized time series should not cause an error + if aware: + index = dti.tz_localize("UTC").index + else: + index = dti.index + result = Series(index).apply(lambda x: Series([1, 2])) + tm.assert_frame_equal(result, exp) + + +def test_apply_scalar_on_date_time_index_aware_series(): + # GH 25959 + # Calling apply on a localized time series should not cause an error + series = tm.makeTimeSeries(nper=30).tz_localize("UTC") + result = Series(series.index).apply(lambda x: 1) + tm.assert_series_equal(result, Series(np.ones(30), dtype="int64")) + + +def test_map_float_to_string_precision(): + # GH 13228 + ser = Series(1 / 3) + result = ser.map(lambda val: str(val)).to_dict() + expected = {0: "0.3333333333333333"} + assert result == expected + + +def test_apply_to_timedelta(): + list_of_valid_strings = ["00:00:01", "00:00:02"] + a = pd.to_timedelta(list_of_valid_strings) + b = Series(list_of_valid_strings).apply(pd.to_timedelta) + tm.assert_series_equal(Series(a), b) + + list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] + + a = pd.to_timedelta(list_of_strings) + with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"): + ser = Series(list_of_strings) + b = ser.apply(pd.to_timedelta) + tm.assert_series_equal(Series(a), b) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sum], ["sum"]), + ([np.sum, np.mean], ["sum", "mean"]), + (np.array([np.sum]), ["sum"]), + (np.array([np.sum, np.mean]), ["sum", "mean"]), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_listlike_reducer(string_series, ops, names, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in zip(names, ops)}) + expected.name = "series" + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sum}, + {"A": np.sum, "B": np.mean}, + Series({"A": np.sum}), + Series({"A": np.sum, "B": np.mean}), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_dictlike_reducer(string_series, ops, how): + # GH 39140 + expected = Series({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = getattr(string_series, how)(ops) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_apply_listlike_transformer(string_series, ops, names): + # GH 39140 + with np.errstate(all="ignore"): + expected = concat([op(string_series) for op in ops], axis=1) + expected.columns = names + result = string_series.apply(ops) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "ops", + [ + {"A": np.sqrt}, + {"A": np.sqrt, "B": np.exp}, + Series({"A": np.sqrt}), + Series({"A": np.sqrt, "B": np.exp}), + ], +) +def test_apply_dictlike_transformer(string_series, ops): + # GH 39140 + with np.errstate(all="ignore"): + expected = concat({name: op(string_series) for name, op in ops.items()}) + expected.name = string_series.name + result = string_series.apply(ops) + tm.assert_series_equal(result, expected) + + +def test_apply_retains_column_name(): + # GH 16380 + df = DataFrame({"x": range(3)}, Index(range(3), name="x")) + result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) + expected = DataFrame( + [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], + columns=Index(range(3), name="y"), + index=Index(range(3), name="x"), + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_type(): + # GH 46719 + s = Series([3, "string", float], index=["a", "b", "c"]) + result = s.apply(type) + expected = Series([int, str, type], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_series_apply_relabeling.py b/pandas/tests/apply/test_series_apply_relabeling.py new file mode 100644 index 00000000..c0a285e6 --- /dev/null +++ b/pandas/tests/apply/test_series_apply_relabeling.py @@ -0,0 +1,33 @@ +import pandas as pd +import pandas._testing as tm + + +def test_relabel_no_duplicated_method(): + # this is to test there is no duplicated method used in agg + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) + + result = df["A"].agg(foo="sum") + expected = df["A"].agg({"foo": "sum"}) + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo="min", bar="max") + expected = df["B"].agg({"foo": "min", "bar": "max"}) + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo=sum, bar=min, cat="max") + expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"}) + tm.assert_series_equal(result, expected) + + +def test_relabel_duplicated_method(): + # this is to test with nested renaming, duplicated method can be used + # if they are assigned with different new names + df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) + + result = df["A"].agg(foo="sum", bar="sum") + expected = pd.Series([6, 6], index=["foo", "bar"], name="A") + tm.assert_series_equal(result, expected) + + result = df["B"].agg(foo=min, bar="min") + expected = pd.Series([1, 1], index=["foo", "bar"], name="B") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py new file mode 100644 index 00000000..b10af13e --- /dev/null +++ b/pandas/tests/apply/test_series_transform.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], +) +def test_transform_listlike(string_series, ops, names): + # GH 35964 + with np.errstate(all="ignore"): + expected = concat([op(string_series) for op in ops], axis=1) + expected.columns = names + result = string_series.transform(ops) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(string_series, box): + # GH 35964 + with np.errstate(all="ignore"): + expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1) + expected.columns = ["foo", "bar"] + result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs})) + tm.assert_frame_equal(result, expected) + + +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = Series([1, 4]) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py new file mode 100644 index 00000000..38b2a545 --- /dev/null +++ b/pandas/tests/apply/test_str.py @@ -0,0 +1,304 @@ +from itertools import chain +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_number + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.tests.apply.common import ( + frame_transform_kernels, + series_transform_kernels, +) + + +@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) +@pytest.mark.parametrize( + "args,kwds", + [ + pytest.param([], {}, id="no_args_or_kwds"), + pytest.param([1], {}, id="axis_from_args"), + pytest.param([], {"axis": 1}, id="axis_from_kwds"), + pytest.param([], {"numeric_only": True}, id="optional_kwds"), + pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"), + ], +) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): + if len(args) > 1 and how == "agg": + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason="agg/apply signature mismatch - agg passes 2nd " + "argument to func", + ) + ) + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"]) +def test_with_string_args(datetime_series, arg): + result = datetime_series.apply(arg) + expected = getattr(datetime_series, arg)() + assert result == expected + + +@pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_np_reducer(op, how): + # GH 39116 + float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) + result = getattr(float_frame, how)(op) + # pandas ddof defaults to 1, numpy to 0 + kwargs = {"ddof": 1} if op in ("std", "var") else {} + expected = Series( + getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] +) +@pytest.mark.parametrize("how", ["transform", "apply"]) +def test_apply_np_transformer(float_frame, op, how): + # GH 39116 + + # float_frame will _usually_ have negative values, which will + # trigger the warning here, but let's put one in just to be sure + float_frame.iloc[0, 0] = -1.0 + warn = None + if op in ["log", "sqrt"]: + warn = RuntimeWarning + + with tm.assert_produces_warning(warn, check_stacklevel=False): + # float_frame fixture is defined in conftest.py, so we don't check the + # stacklevel as otherwise the test would fail. + result = getattr(float_frame, how)(op) + expected = getattr(np, op)(float_frame) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("sum", 0), + ("max", np.nan), + ("min", np.nan), + ("all", True), + ("any", False), + ("mean", np.nan), + ("prod", 1), + ("std", np.nan), + ("var", np.nan), + ("median", np.nan), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("sum", 6), + ("max", 3), + ("min", 1), + ("all", True), + ("any", True), + ("mean", 2), + ("prod", 6), + ("std", 1), + ("var", 1), + ("median", 2), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), + [ + ("sum", "abc"), + ("max", "c"), + ("min", "a"), + ("all", True), + ("any", True), + ], + ), + ), +) +def test_agg_cython_table_series(series, func, expected): + # GH21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = series.agg(func) + if is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + else: + assert result == expected + + +@pytest.mark.parametrize( + "series, func, expected", + chain( + tm.get_cython_table_params( + Series(dtype=np.float64), + [ + ("cumprod", Series([], Index([]), dtype=np.float64)), + ("cumsum", Series([], Index([]), dtype=np.float64)), + ], + ), + tm.get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("cumprod", Series([np.nan, 1, 2, 6])), + ("cumsum", Series([np.nan, 1, 3, 6])), + ], + ), + tm.get_cython_table_params( + Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] + ), + ), +) +def test_agg_cython_table_transform_series(series, func, expected): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = series.agg(func) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), + [ + ("sum", Series(dtype="float64")), + ("max", Series(dtype="float64")), + ("min", Series(dtype="float64")), + ("all", Series(dtype=bool)), + ("any", Series(dtype=bool)), + ("mean", Series(dtype="float64")), + ("prod", Series(dtype="float64")), + ("std", Series(dtype="float64")), + ("var", Series(dtype="float64")), + ("median", Series(dtype="float64")), + ], + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("sum", Series([1.0, 3])), + ("max", Series([1.0, 2])), + ("min", Series([1.0, 1])), + ("all", Series([True, True])), + ("any", Series([True, True])), + ("mean", Series([1, 1.5])), + ("prod", Series([1.0, 2])), + ("std", Series([np.nan, 0.707107])), + ("var", Series([np.nan, 0.5])), + ("median", Series([1, 1.5])), + ], + ), + ), +) +def test_agg_cython_table_frame(df, func, expected, axis): + # GH 21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = df.agg(func, axis=axis) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, func, expected", + chain( + tm.get_cython_table_params( + DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] + ), + tm.get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), + ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), + ], + ), + ), +) +def test_agg_cython_table_transform_frame(df, func, expected, axis): + # GH 21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if axis == "columns" or axis == 1: + # operating blockwise doesn't let us preserve dtypes + expected = expected.astype("float64") + + result = df.agg(func, axis=axis) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_groupby_kernel_series(request, string_series, op): + # GH 35964 + if op == "ngroup": + request.node.add_marker( + pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + ) + # TODO(2.0) Remove after pad/backfill deprecation enforced + op = maybe_normalize_deprecated_kernels(op) + args = [0.0] if op == "fillna" else [] + ones = np.ones(string_series.shape[0]) + expected = string_series.groupby(ones).transform(op, *args) + result = string_series.transform(op, 0, *args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", frame_transform_kernels) +def test_transform_groupby_kernel_frame(request, axis, float_frame, op): + # TODO(2.0) Remove after pad/backfill deprecation enforced + op = maybe_normalize_deprecated_kernels(op) + + if op == "ngroup": + request.node.add_marker( + pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + ) + + # GH 35964 + + args = [0.0] if op == "fillna" else [] + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected = float_frame.groupby(ones, axis=axis).transform(op, *args) + result = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result, expected) + + # same thing, but ensuring we have multiple blocks + assert "E" not in float_frame.columns + float_frame["E"] = float_frame["A"].copy() + assert len(float_frame._mgr.arrays) > 1 + + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) + result2 = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result2, expected2) + + +@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) +def test_transform_method_name(method): + # GH 19760 + df = DataFrame({"A": [-1, 2]}) + result = df.transform(method) + expected = operator.methodcaller(method)(df) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arithmetic/__init__.py b/pandas/tests/arithmetic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py new file mode 100644 index 00000000..f3173e8f --- /dev/null +++ b/pandas/tests/arithmetic/common.py @@ -0,0 +1,155 @@ +""" +Assertion helpers for arithmetic tests. +""" +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, + array, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + PandasArray, +) + + +def assert_cannot_add(left, right, msg="cannot add"): + """ + Helper to assert that left and right cannot be added. + + Parameters + ---------- + left : object + right : object + msg : str, default "cannot add" + """ + with pytest.raises(TypeError, match=msg): + left + right + with pytest.raises(TypeError, match=msg): + right + left + + +def assert_invalid_addsub_type(left, right, msg=None): + """ + Helper to assert that left and right can be neither added nor subtracted. + + Parameters + ---------- + left : object + right : object + msg : str or None, default None + """ + with pytest.raises(TypeError, match=msg): + left + right + with pytest.raises(TypeError, match=msg): + right + left + with pytest.raises(TypeError, match=msg): + left - right + with pytest.raises(TypeError, match=msg): + right - left + + +def get_upcast_box(left, right, is_cmp: bool = False): + """ + Get the box to use for 'expected' in an arithmetic or comparison operation. + + Parameters + left : Any + right : Any + is_cmp : bool, default False + Whether the operation is a comparison method. + """ + + if isinstance(left, DataFrame) or isinstance(right, DataFrame): + return DataFrame + if isinstance(left, Series) or isinstance(right, Series): + if is_cmp and isinstance(left, Index): + # Index does not defer for comparisons + return np.array + return Series + if isinstance(left, Index) or isinstance(right, Index): + if is_cmp: + return np.array + return Index + return tm.to_array + + +def assert_invalid_comparison(left, right, box): + """ + Assert that comparison operations with mismatched types behave correctly. + + Parameters + ---------- + left : np.ndarray, ExtensionArray, Index, or Series + right : object + box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array} + """ + # Not for tznaive-tzaware comparison + + # Note: not quite the same as how we do this for tm.box_expected + xbox = box if box not in [Index, array] else np.array + + def xbox2(x): + # Eventually we'd like this to be tighter, but for now we'll + # just exclude PandasArray[bool] + if isinstance(x, PandasArray): + return x._ndarray + if isinstance(x, BooleanArray): + # NB: we are assuming no pd.NAs for now + return x.astype(bool) + return x + + # rev_box: box to use for reversed comparisons + rev_box = xbox + if isinstance(right, Index) and isinstance(left, Series): + rev_box = np.array + + result = xbox2(left == right) + expected = xbox(np.zeros(result.shape, dtype=np.bool_)) + + tm.assert_equal(result, expected) + + result = xbox2(right == left) + tm.assert_equal(result, rev_box(expected)) + + result = xbox2(left != right) + tm.assert_equal(result, ~expected) + + result = xbox2(right != left) + tm.assert_equal(result, rev_box(~expected)) + + msg = "|".join( + [ + "Invalid comparison between", + "Cannot compare type", + "not supported between", + "invalid type promotion", + ( + # GH#36706 npdev 1.20.0 2020-09-28 + r"The DTypes and " + r" do not have a common DType. " + "For example they cannot be stored in a single array unless the " + "dtype is `object`." + ), + ] + ) + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right + with pytest.raises(TypeError, match=msg): + right < left + with pytest.raises(TypeError, match=msg): + right <= left + with pytest.raises(TypeError, match=msg): + right > left + with pytest.raises(TypeError, match=msg): + right >= left diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py new file mode 100644 index 00000000..e847f31c --- /dev/null +++ b/pandas/tests/arithmetic/conftest.py @@ -0,0 +1,232 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import RangeIndex +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.computation import expressions as expr + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + + +# ------------------------------------------------------------------ + +# doctest with +SKIP for one fixture fails during setup with +# 'DoctestItem' object has no attribute 'callspec' +# due to switch_numexpr_min_elements fixture +@pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) +def one(request): + """ + Several variants of integer value 1. The zero-dim integer array + behaves like an integer. + + This fixture can be used to check that datetimelike indexes handle + addition and subtraction of integers and zero-dimensional arrays + of integers. + + Examples + -------- + dti = pd.date_range('2016-01-01', periods=2, freq='H') + dti + DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'], + dtype='datetime64[ns]', freq='H') + dti + one + DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'], + dtype='datetime64[ns]', freq='H') + """ + return request.param + + +zeros = [ + box_cls([0] * 5, dtype=dtype) + for box_cls in [pd.Index, np.array, pd.array] + for dtype in [np.int64, np.uint64, np.float64] +] +zeros.extend( + [box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]] +) +zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([np.array(-0.0, dtype=np.float64)]) +zeros.extend([0, 0.0, -0.0]) + + +# doctest with +SKIP for zero fixture fails during setup with +# 'DoctestItem' object has no attribute 'callspec' +# due to switch_numexpr_min_elements fixture +@pytest.fixture(params=zeros) +def zero(request): + """ + Several types of scalar zeros and length 5 vectors of zeros. + + This fixture can be used to check that numeric-dtype indexes handle + division by any zero numeric-dtype. + + Uses vector of length 5 for broadcasting with `numeric_idx` fixture, + which creates numeric-dtype vectors also of length 5. + + Examples + -------- + arr = RangeIndex(5) + arr / zeros + Float64Index([nan, inf, inf, inf, inf], dtype='float64') + """ + return request.param + + +# ------------------------------------------------------------------ +# Vector Fixtures + + +@pytest.fixture( + params=[ + Float64Index(np.arange(5, dtype="float64")), + Int64Index(np.arange(5, dtype="int64")), + UInt64Index(np.arange(5, dtype="uint64")), + RangeIndex(5), + ], + ids=lambda x: type(x).__name__, +) +def numeric_idx(request): + """ + Several types of numeric-dtypes Index objects + """ + return request.param + + +# ------------------------------------------------------------------ +# Scalar Fixtures + + +@pytest.fixture( + params=[ + pd.Timedelta("10m7s").to_pytimedelta(), + pd.Timedelta("10m7s"), + pd.Timedelta("10m7s").to_timedelta64(), + ], + ids=lambda x: type(x).__name__, +) +def scalar_td(request): + """ + Several variants of Timedelta scalars representing 10 minutes and 7 seconds. + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Day(3), + pd.offsets.Hour(72), + pd.Timedelta(days=3).to_pytimedelta(), + pd.Timedelta("72:00:00"), + np.timedelta64(3, "D"), + np.timedelta64(72, "h"), + ], + ids=lambda x: type(x).__name__, +) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Hour(2), + pd.offsets.Minute(120), + pd.Timedelta(hours=2).to_pytimedelta(), + pd.Timedelta(seconds=2 * 3600), + np.timedelta64(2, "h"), + np.timedelta64(120, "m"), + ], + ids=lambda x: type(x).__name__, +) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + +_common_mismatch = [ + pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute(), +] + + +@pytest.fixture( + params=[ + pd.Timedelta(minutes=30).to_pytimedelta(), + np.timedelta64(30, "s"), + pd.Timedelta(seconds=30), + ] + + _common_mismatch +) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(4, "h"), + pd.Timedelta(hours=23).to_pytimedelta(), + pd.Timedelta("23:00:00"), + ] + + _common_mismatch +) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(365, "D"), + pd.Timedelta(days=365).to_pytimedelta(), + pd.Timedelta(days=365), + ] + + _common_mismatch +) +def mismatched_freq(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + +# ------------------------------------------------------------------ + + +@pytest.fixture( + params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ +) +def box_1d_array(request): + """ + Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list + classes + """ + return request.param diff --git a/pandas/tests/arithmetic/test_array_ops.py b/pandas/tests/arithmetic/test_array_ops.py new file mode 100644 index 00000000..2c347d96 --- /dev/null +++ b/pandas/tests/arithmetic/test_array_ops.py @@ -0,0 +1,39 @@ +import operator + +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.ops.array_ops import ( + comparison_op, + na_logical_op, +) + + +def test_na_logical_op_2d(): + left = np.arange(8).reshape(4, 2) + right = left.astype(object) + right[0, 0] = np.nan + + # Check that we fall back to the vec_binop branch + with pytest.raises(TypeError, match="unsupported operand type"): + operator.or_(left, right) + + result = na_logical_op(left, right, operator.or_) + expected = right + tm.assert_numpy_array_equal(result, expected) + + +def test_object_comparison_2d(): + left = np.arange(9).reshape(3, 3).astype(object) + right = left.T + + result = comparison_op(left, right, operator.eq) + expected = np.eye(3).astype(bool) + tm.assert_numpy_array_equal(result, expected) + + # Ensure that cython doesn't raise on non-writeable arg, which + # we can get from np.broadcast_to + right.flags.writeable = False + result = comparison_op(left, right, operator.ne) + tm.assert_numpy_array_equal(result, ~expected) diff --git a/pandas/tests/arithmetic/test_categorical.py b/pandas/tests/arithmetic/test_categorical.py new file mode 100644 index 00000000..d6f3a13c --- /dev/null +++ b/pandas/tests/arithmetic/test_categorical.py @@ -0,0 +1,25 @@ +import numpy as np + +from pandas import ( + Categorical, + Series, +) +import pandas._testing as tm + + +class TestCategoricalComparisons: + def test_categorical_nan_equality(self): + cat = Series(Categorical(["a", "b", "c", np.nan])) + expected = Series([True, True, True, False]) + result = cat == cat + tm.assert_series_equal(result, expected) + + def test_categorical_tuple_equality(self): + # GH 18050 + ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)]) + expected = Series([True, False, True, False, False]) + result = ser == (0, 0) + tm.assert_series_equal(result, expected) + + result = ser.astype("category") == (0, 0) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py new file mode 100644 index 00000000..0b1d56a9 --- /dev/null +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -0,0 +1,2433 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for datetime64 and datetime64tz dtypes +from datetime import ( + datetime, + time, + timedelta, +) +from itertools import ( + product, + starmap, +) +import operator +import warnings + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.conversion import localize_pydatetime +from pandas._libs.tslibs.offsets import shift_months +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DateOffset, + DatetimeIndex, + NaT, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) +from pandas.core.ops import roperator +from pandas.tests.arithmetic.common import ( + assert_cannot_add, + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestDatetime64ArrayLikeComparisons: + # Comparison tests for datetime64 vectors fully parametrized over + # DataFrame/Series/DatetimeIndex/DatetimeArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_zerodim(self, tz_naive_fixture, box_with_array): + # Test comparison with zero-dimensional array is unboxed + tz = tz_naive_fixture + box = box_with_array + dti = date_range("20130101", periods=3, tz=tz) + + other = np.array(dti.to_numpy()[0]) + + dtarr = tm.box_expected(dti, box) + xbox = get_upcast_box(dtarr, other, True) + result = dtarr <= other + expected = np.array([True, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + "foo", + -1, + 99, + 4.0, + object(), + timedelta(days=2), + # GH#19800, GH#19301 datetime.date comparison raises to + # match DatetimeIndex/Timestamp. This also matches the behavior + # of stdlib datetime.datetime + datetime(2001, 1, 1).date(), + # GH#19301 None and NaN are *not* cast to NaT for comparisons + None, + np.nan, + ], + ) + def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture, box_with_array): + # GH#22074, GH#15966 + tz = tz_naive_fixture + + rng = date_range("1/1/2000", periods=10, tz=tz) + dtarr = tm.box_expected(rng, box_with_array) + assert_invalid_comparison(dtarr, other, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + # GH#4968 invalid date/int comparisons + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.timedelta_range("1ns", periods=10).array, + np.array(pd.timedelta_range("1ns", periods=10)), + list(pd.timedelta_range("1ns", periods=10)), + pd.timedelta_range("1 Day", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_dt64arr_cmp_arraylike_invalid( + self, other, tz_naive_fixture, box_with_array + ): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="ns", periods=10, tz=tz)._data + obj = tm.box_expected(dta, box_with_array) + assert_invalid_comparison(obj, other, box_with_array) + + def test_dt64arr_cmp_mixed_invalid(self, tz_naive_fixture): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="h", periods=5, tz=tz)._data + + other = np.array([0, 1, 2, dta[3], Timedelta(days=1)]) + result = dta == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dta != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + dta < other + with pytest.raises(TypeError, match=msg): + dta > other + with pytest.raises(TypeError, match=msg): + dta <= other + with pytest.raises(TypeError, match=msg): + dta >= other + + def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): + # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly + tz = tz_naive_fixture + box = box_with_array + + ts = Timestamp("2021-01-01", tz=tz) + ser = Series([ts, NaT]) + + obj = tm.box_expected(ser, box) + xbox = get_upcast_box(obj, ts, True) + + expected = Series([True, False], dtype=np.bool_) + expected = tm.box_expected(expected, xbox) + + result = obj == ts + tm.assert_equal(result, expected) + + +class TestDatetime64SeriesComparison: + # TODO: moved from tests.series.test_operators; needs cleanup + + @pytest.mark.parametrize( + "pair", + [ + ( + [Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")], + [NaT, NaT, Timestamp("2011-01-03")], + ), + ( + [Timedelta("1 days"), NaT, Timedelta("3 days")], + [NaT, NaT, Timedelta("3 days")], + ), + ( + [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")], + [NaT, NaT, Period("2011-03", freq="M")], + ), + ], + ) + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("dtype", [None, object]) + @pytest.mark.parametrize( + "op, expected", + [ + (operator.eq, Series([False, False, True])), + (operator.ne, Series([True, True, False])), + (operator.lt, Series([False, False, False])), + (operator.gt, Series([False, False, False])), + (operator.ge, Series([False, False, True])), + (operator.le, Series([False, False, True])), + ], + ) + def test_nat_comparisons( + self, + dtype, + index_or_series, + reverse, + pair, + op, + expected, + ): + box = index_or_series + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l + + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + + result = op(left, right) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + [Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")], + [Timedelta("1 days"), NaT, Timedelta("3 days")], + [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_nat_comparisons_scalar(self, dtype, data, box_with_array): + box = box_with_array + + left = Series(data, dtype=dtype) + left = tm.box_expected(left, box) + xbox = get_upcast_box(left, NaT, True) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + + tm.assert_equal(left == NaT, expected) + tm.assert_equal(NaT == left, expected) + + expected = [True, True, True] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + tm.assert_equal(left != NaT, expected) + tm.assert_equal(NaT != left, expected) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + if box is pd.array and dtype is object: + expected = pd.array(expected, dtype="bool") + tm.assert_equal(left < NaT, expected) + tm.assert_equal(NaT > left, expected) + tm.assert_equal(left <= NaT, expected) + tm.assert_equal(NaT >= left, expected) + + tm.assert_equal(left > NaT, expected) + tm.assert_equal(NaT < left, expected) + tm.assert_equal(left >= NaT, expected) + tm.assert_equal(NaT <= left, expected) + + @pytest.mark.parametrize("val", [datetime(2000, 1, 4), datetime(2000, 1, 5)]) + def test_series_comparison_scalars(self, val): + series = Series(date_range("1/1/2000", periods=10)) + + result = series > val + expected = Series([x > val for x in series]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "left,right", [("lt", "gt"), ("le", "ge"), ("eq", "eq"), ("ne", "ne")] + ) + def test_timestamp_compare_series(self, left, right): + # see gh-4982 + # Make sure we can compare Timestamps on the right AND left hand side. + ser = Series(date_range("20010101", periods=10), name="dates") + s_nat = ser.copy(deep=True) + + ser[0] = Timestamp("nat") + ser[3] = Timestamp("nat") + + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # No NaT + expected = left_f(ser, Timestamp("20010109")) + result = right_f(Timestamp("20010109"), ser) + tm.assert_series_equal(result, expected) + + # NaT + expected = left_f(ser, Timestamp("nat")) + result = right_f(Timestamp("nat"), ser) + tm.assert_series_equal(result, expected) + + # Compare to Timestamp with series containing NaT + expected = left_f(s_nat, Timestamp("20010109")) + result = right_f(Timestamp("20010109"), s_nat) + tm.assert_series_equal(result, expected) + + # Compare to NaT with series containing NaT + expected = left_f(s_nat, NaT) + result = right_f(NaT, s_nat) + tm.assert_series_equal(result, expected) + + def test_dt64arr_timestamp_equality(self, box_with_array): + # GH#11034 + + ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), NaT]) + ser = tm.box_expected(ser, box_with_array) + xbox = get_upcast_box(ser, ser, True) + + result = ser != ser + expected = tm.box_expected([False, False, True], xbox) + tm.assert_equal(result, expected) + + warn = FutureWarning if box_with_array is pd.DataFrame else None + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser != ser[0] + expected = tm.box_expected([False, True, True], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser != ser[2] + expected = tm.box_expected([True, True, True], xbox) + tm.assert_equal(result, expected) + + result = ser == ser + expected = tm.box_expected([True, True, False], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser == ser[0] + expected = tm.box_expected([True, False, False], xbox) + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(warn): + # alignment for frame vs series comparisons deprecated + result = ser == ser[2] + expected = tm.box_expected([False, False, False], xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "datetimelike", + [ + Timestamp("20130101"), + datetime(2013, 1, 1), + np.datetime64("2013-01-01T00:00", "ns"), + ], + ) + @pytest.mark.parametrize( + "op,expected", + [ + (operator.lt, [True, False, False, False]), + (operator.le, [True, True, False, False]), + (operator.eq, [False, True, False, False]), + (operator.gt, [False, False, False, True]), + ], + ) + def test_dt64_compare_datetime_scalar(self, datetimelike, op, expected): + # GH#17965, test for ability to compare datetime64[ns] columns + # to datetimelike + ser = Series( + [ + Timestamp("20120101"), + Timestamp("20130101"), + np.nan, + Timestamp("20130103"), + ], + name="A", + ) + result = op(ser, datetimelike) + expected = Series(expected, name="A") + tm.assert_series_equal(result, expected) + + +class TestDatetimeIndexComparisons: + + # TODO: moved from tests.indexes.test_base; parametrize and de-duplicate + def test_comparators(self, comparison_op): + index = tm.makeDateIndex(100) + element = index[len(index) // 2] + element = Timestamp(element).to_datetime64() + + arr = np.array(index) + arr_result = comparison_op(arr, element) + index_result = comparison_op(index, element) + + assert isinstance(index_result, np.ndarray) + tm.assert_numpy_array_equal(arr_result, index_result) + + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + def test_dti_cmp_datetimelike(self, other, tz_naive_fixture): + tz = tz_naive_fixture + dti = date_range("2016-01-01", periods=2, tz=tz) + if tz is not None: + if isinstance(other, np.datetime64): + # no tzaware version available + return + other = localize_pydatetime(other, dti.tzinfo) + + result = dti == other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti > other + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti >= other + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti < other + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti <= other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_dti_cmp_nat(self, dtype, box_with_array): + + left = DatetimeIndex([Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")]) + right = DatetimeIndex([NaT, NaT, Timestamp("2011-01-03")]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + xbox = get_upcast_box(left, right, True) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs == NaT, expected) + tm.assert_equal(NaT == rhs, expected) + + expected = np.array([True, True, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs != NaT, expected) + tm.assert_equal(NaT != lhs, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs < NaT, expected) + tm.assert_equal(NaT > lhs, expected) + + def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = DatetimeIndex( + ["2014-01-01", NaT, "2014-03-01", NaT, "2014-05-01", "2014-07-01"] + ) + didx2 = DatetimeIndex( + ["2014-02-01", "2014-03-01", NaT, NaT, "2014-06-01", "2014-07-01"] + ) + darr = np.array( + [ + np.datetime64("2014-02-01 00:00"), + np.datetime64("2014-03-01 00:00"), + np.datetime64("nat"), + np.datetime64("nat"), + np.datetime64("2014-06-01 00:00"), + np.datetime64("2014-07-01 00:00"), + ] + ) + + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_comparison_tzawareness_compat(self, comparison_op, box_with_array): + # GH#18162 + op = comparison_op + box = box_with_array + + dr = date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box) + dz = tm.box_expected(dz, box) + + if box is pd.DataFrame: + tolist = lambda x: x.astype(object).values.tolist()[0] + else: + tolist = list + + if op not in [operator.eq, operator.ne]: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns.*\] " + "and (Timestamp|DatetimeArray|list|ndarray)" + ) + with pytest.raises(TypeError, match=msg): + op(dr, dz) + + with pytest.raises(TypeError, match=msg): + op(dr, tolist(dz)) + with pytest.raises(TypeError, match=msg): + op(dr, np.array(tolist(dz), dtype=object)) + with pytest.raises(TypeError, match=msg): + op(dz, dr) + + with pytest.raises(TypeError, match=msg): + op(dz, tolist(dr)) + with pytest.raises(TypeError, match=msg): + op(dz, np.array(tolist(dr), dtype=object)) + + # The aware==aware and naive==naive comparisons should *not* raise + assert np.all(dr == dr) + assert np.all(dr == tolist(dr)) + assert np.all(tolist(dr) == dr) + assert np.all(np.array(tolist(dr), dtype=object) == dr) + assert np.all(dr == np.array(tolist(dr), dtype=object)) + + assert np.all(dz == dz) + assert np.all(dz == tolist(dz)) + assert np.all(tolist(dz) == dz) + assert np.all(np.array(tolist(dz), dtype=object) == dz) + assert np.all(dz == np.array(tolist(dz), dtype=object)) + + def test_comparison_tzawareness_compat_scalars(self, comparison_op, box_with_array): + # GH#18162 + op = comparison_op + + dr = date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box_with_array) + dz = tm.box_expected(dz, box_with_array) + + # Check comparisons against scalar Timestamps + ts = Timestamp("2000-03-14 01:59") + ts_tz = Timestamp("2000-03-14 01:59", tz="Europe/Amsterdam") + + assert np.all(dr > ts) + msg = r"Invalid comparison between dtype=datetime64\[ns.*\] and Timestamp" + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dr, ts_tz) + + assert np.all(dz > ts_tz) + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dz, ts) + + if op not in [operator.eq, operator.ne]: + # GH#12601: Check comparison against Timestamps and DatetimeIndex + with pytest.raises(TypeError, match=msg): + op(ts, dz) + + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + # Bug in NumPy? https://github.com/numpy/numpy/issues/13841 + # Raising in __eq__ will fallback to NumPy, which warns, fails, + # then re-raises the original exception. So we just need to ignore. + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + @pytest.mark.filterwarnings("ignore:Converting timezone-aware:FutureWarning") + def test_scalar_comparison_tzawareness( + self, comparison_op, other, tz_aware_fixture, box_with_array + ): + op = comparison_op + tz = tz_aware_fixture + dti = date_range("2016-01-01", periods=2, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + xbox = get_upcast_box(dtarr, other, True) + if op in [operator.eq, operator.ne]: + exbool = op is operator.ne + expected = np.array([exbool, exbool], dtype=bool) + expected = tm.box_expected(expected, xbox) + + result = op(dtarr, other) + tm.assert_equal(result, expected) + + result = op(other, dtarr) + tm.assert_equal(result, expected) + else: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns, .*\] " + f"and {type(other).__name__}" + ) + with pytest.raises(TypeError, match=msg): + op(dtarr, other) + with pytest.raises(TypeError, match=msg): + op(other, dtarr) + + def test_nat_comparison_tzawareness(self, comparison_op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + op = comparison_op + + dti = DatetimeIndex( + ["2014-01-01", NaT, "2014-03-01", NaT, "2014-05-01", "2014-07-01"] + ) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize("US/Pacific"), NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_str(self, tz_naive_fixture): + # GH#22074 + # regardless of tz, we expect these comparisons are valid + tz = tz_naive_fixture + rng = date_range("1/1/2000", periods=10, tz=tz) + other = "1/1/2000" + + result = rng == other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng < other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = rng <= other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng > other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng >= other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_list(self): + rng = date_range("1/1/2000", periods=10) + + result = rng == list(rng) + expected = rng == rng + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + pd.timedelta_range("1D", periods=10), + pd.timedelta_range("1D", periods=10).to_series(), + pd.timedelta_range("1D", periods=10).asi8.view("m8[ns]"), + ], + ids=lambda x: type(x).__name__, + ) + def test_dti_cmp_tdi_tzawareness(self, other): + # GH#22074 + # reversion test that we _don't_ call _assert_tzawareness_compat + # when comparing against TimedeltaIndex + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + result = dti == other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = dti != other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + msg = "Invalid comparison between" + with pytest.raises(TypeError, match=msg): + dti < other + with pytest.raises(TypeError, match=msg): + dti <= other + with pytest.raises(TypeError, match=msg): + dti > other + with pytest.raises(TypeError, match=msg): + dti >= other + + def test_dti_cmp_object_dtype(self): + # GH#22074 + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + other = dti.astype("O") + + result = dti == other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + other = dti.tz_localize(None) + result = dti != other + tm.assert_numpy_array_equal(result, expected) + + other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5) + result = dti == other + expected = np.array([True] * 5 + [False] * 5) + tm.assert_numpy_array_equal(result, expected) + msg = ">=' not supported between instances of 'Timestamp' and 'Timedelta'" + with pytest.raises(TypeError, match=msg): + dti >= other + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDatetime64Arithmetic: + # This class is intended for "finished" tests that are fully parametrized + # over DataFrame/Series/Index/DatetimeArray + + # ------------------------------------------------------------- + # Addition/Subtraction of timedelta-like + + @pytest.mark.arm_slow + def test_dt64arr_add_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + # GH#22005, GH#22163 check DataFrame doesn't raise TypeError + tz = tz_naive_fixture + + rng = date_range("2000-01-01", "2000-02-01", tz=tz) + expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng + two_hours + tm.assert_equal(result, expected) + + result = two_hours + rng + tm.assert_equal(result, expected) + + rng += two_hours + tm.assert_equal(rng, expected) + + def test_dt64arr_sub_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + tz = tz_naive_fixture + + rng = date_range("2000-01-01", "2000-02-01", tz=tz) + expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng - two_hours + tm.assert_equal(result, expected) + + rng -= two_hours + tm.assert_equal(rng, expected) + + def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array): + t1 = date_range("20130101", periods=3).tz_localize("US/Eastern") + t1 = tm.box_expected(t1, box_with_array) + t2 = Timestamp("20130101").tz_localize("CET") + tnaive = Timestamp(20130101) + + result = t1 - t2 + expected = TimedeltaIndex( + ["0 days 06:00:00", "1 days 06:00:00", "2 days 06:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = t2 - t1 + expected = TimedeltaIndex( + ["-1 days +18:00:00", "-2 days +18:00:00", "-3 days +18:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + t1 - tnaive + + with pytest.raises(TypeError, match=msg): + tnaive - t1 + + def test_dt64_array_sub_dt64_array_with_different_timezone(self, box_with_array): + t1 = date_range("20130101", periods=3).tz_localize("US/Eastern") + t1 = tm.box_expected(t1, box_with_array) + t2 = date_range("20130101", periods=3).tz_localize("CET") + t2 = tm.box_expected(t2, box_with_array) + tnaive = date_range("20130101", periods=3) + + result = t1 - t2 + expected = TimedeltaIndex( + ["0 days 06:00:00", "0 days 06:00:00", "0 days 06:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = t2 - t1 + expected = TimedeltaIndex( + ["-1 days +18:00:00", "-1 days +18:00:00", "-1 days +18:00:00"] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + t1 - tnaive + + with pytest.raises(TypeError, match=msg): + tnaive - t1 + + def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): + # GH#23320 special handling for timedelta64("NaT") + tz = tz_naive_fixture + + dti = date_range("1994-04-01", periods=9, tz=tz, freq="QS") + other = np.timedelta64("NaT") + expected = DatetimeIndex(["NaT"] * 9, tz=tz) + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + other - obj + + def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array): + + tz = tz_naive_fixture + dti = date_range("2016-01-01", periods=3, tz=tz) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr + tdarr + tm.assert_equal(result, expected) + result = tdarr + dtarr + tm.assert_equal(result, expected) + + expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr - tdarr + tm.assert_equal(result, expected) + msg = "cannot subtract|(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + tdarr - dtarr + + # ----------------------------------------------------------------- + # Subtraction of datetime-like scalars + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2013-01-01"), + Timestamp("2013-01-01").to_pydatetime(), + Timestamp("2013-01-01").to_datetime64(), + # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano + # for DataFrame operation + np.datetime64("2013-01-01", "D"), + ], + ) + def test_dt64arr_sub_dtscalar(self, box_with_array, ts): + # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype + idx = date_range("2013-01-01", periods=3)._with_freq(None) + idx = tm.box_expected(idx, box_with_array) + + expected = TimedeltaIndex(["0 Days", "1 Day", "2 Days"]) + expected = tm.box_expected(expected, box_with_array) + + result = idx - ts + tm.assert_equal(result, expected) + + result = ts - idx + tm.assert_equal(result, -expected) + tm.assert_equal(result, -expected) + + def test_dt64arr_sub_timestamp_tzaware(self, box_with_array): + ser = date_range("2014-03-17", periods=2, freq="D", tz="US/Eastern") + ser = ser._with_freq(None) + ts = ser[0] + + ser = tm.box_expected(ser, box_with_array) + + delta_series = Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")]) + expected = tm.box_expected(delta_series, box_with_array) + + tm.assert_equal(ser - ts, expected) + tm.assert_equal(ts - ser, -expected) + + def test_dt64arr_sub_NaT(self, box_with_array): + # GH#18808 + dti = DatetimeIndex([NaT, Timestamp("19900315")]) + ser = tm.box_expected(dti, box_with_array) + + result = ser - NaT + expected = Series([NaT, NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + dti_tz = dti.tz_localize("Asia/Tokyo") + ser_tz = tm.box_expected(dti_tz, box_with_array) + + result = ser_tz - NaT + expected = Series([NaT, NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + # ------------------------------------------------------------- + # Subtraction of datetime-like array-like + + def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): + dti = date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + expected = dti - dti + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + with tm.assert_produces_warning(PerformanceWarning): + result = obj - obj.astype(object) + tm.assert_equal(result, expected) + + def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): + dti = date_range("2016-01-01", periods=3, tz=None) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + + expected = dtarr - dtarr + result = dtarr - dt64vals + tm.assert_equal(result, expected) + result = dt64vals - dtarr + tm.assert_equal(result, expected) + + def test_dt64arr_aware_sub_dt64ndarray_raises( + self, tz_aware_fixture, box_with_array + ): + + tz = tz_aware_fixture + dti = date_range("2016-01-01", periods=3, tz=tz) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + msg = "Cannot subtract tz-naive and tz-aware datetime" + with pytest.raises(TypeError, match=msg): + dtarr - dt64vals + with pytest.raises(TypeError, match=msg): + dt64vals - dtarr + + # ------------------------------------------------------------- + # Addition of datetime-like others (invalid) + + def test_dt64arr_add_dtlike_raises(self, tz_naive_fixture, box_with_array): + # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 + # GH#9631 + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + if tz is None: + dti2 = dti.tz_localize("US/Eastern") + else: + dti2 = dti.tz_localize(None) + dtarr = tm.box_expected(dti, box_with_array) + + assert_cannot_add(dtarr, dti.values) + assert_cannot_add(dtarr, dti) + assert_cannot_add(dtarr, dtarr) + assert_cannot_add(dtarr, dti[0]) + assert_cannot_add(dtarr, dti[0].to_pydatetime()) + assert_cannot_add(dtarr, dti[0].to_datetime64()) + assert_cannot_add(dtarr, dti2[0]) + assert_cannot_add(dtarr, dti2[0].to_pydatetime()) + assert_cannot_add(dtarr, np.datetime64("2011-01-01", "D")) + + # ------------------------------------------------------------- + # Other Invalid Addition/Subtraction + + # Note: freq here includes both Tick and non-Tick offsets; this is + # relevant because historically integer-addition was allowed if we had + # a freq. + @pytest.mark.parametrize("freq", ["H", "D", "W", "M", "MS", "Q", "B", None]) + @pytest.mark.parametrize("dtype", [None, "uint8"]) + def test_dt64arr_addsub_intlike( + self, dtype, box_with_array, freq, tz_naive_fixture + ): + # GH#19959, GH#19123, GH#19012 + tz = tz_naive_fixture + if box_with_array is pd.DataFrame: + # alignment headaches + return + + if freq is None: + dti = DatetimeIndex(["NaT", "2017-04-05 06:07:08"], tz=tz) + else: + dti = date_range("2016-01-01", periods=2, freq=freq, tz=tz) + + obj = box_with_array(dti) + other = np.array([4, -1], dtype=dtype) + + msg = "|".join( + [ + "Addition/subtraction of integers", + "cannot subtract DatetimeArray from", + # IntegerArray + "can only perform ops with numeric values", + "unsupported operand type.*Categorical", + r"unsupported operand type\(s\) for -: 'int' and 'Timestamp'", + ] + ) + assert_invalid_addsub_type(obj, 1, msg) + assert_invalid_addsub_type(obj, np.int64(2), msg) + assert_invalid_addsub_type(obj, np.array(3, dtype=np.int64), msg) + assert_invalid_addsub_type(obj, other, msg) + assert_invalid_addsub_type(obj, np.array(other), msg) + assert_invalid_addsub_type(obj, pd.array(other), msg) + assert_invalid_addsub_type(obj, pd.Categorical(other), msg) + assert_invalid_addsub_type(obj, pd.Index(other), msg) + assert_invalid_addsub_type(obj, pd.core.indexes.api.NumericIndex(other), msg) + assert_invalid_addsub_type(obj, Series(other), msg) + + @pytest.mark.parametrize( + "other", + [ + 3.14, + np.array([2.0, 3.0]), + # GH#13078 datetime +/- Period is invalid + Period("2011-01-01", freq="D"), + # https://github.com/pandas-dev/pandas/issues/10329 + time(1, 2, 3), + ], + ) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_invalid(self, dti_freq, other, box_with_array): + dti = DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + dtarr = tm.box_expected(dti, box_with_array) + msg = "|".join( + [ + "unsupported operand type", + "cannot (add|subtract)", + "cannot use operands with types", + "ufunc '?(add|subtract)'? cannot use operands with types", + "Concatenation operation is not implemented for NumPy arrays", + ] + ) + assert_invalid_addsub_type(dtarr, other, msg) + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_parr( + self, dti_freq, pi_freq, box_with_array, box_with_array2 + ): + # GH#20049 subtracting PeriodIndex should raise TypeError + dti = DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + pi = dti.to_period(pi_freq) + + dtarr = tm.box_expected(dti, box_with_array) + parr = tm.box_expected(pi, box_with_array2) + msg = "|".join( + [ + "cannot (add|subtract)", + "unsupported operand", + "descriptor.*requires", + "ufunc.*cannot use operands", + ] + ) + assert_invalid_addsub_type(dtarr, parr, msg) + + def test_dt64arr_addsub_time_objects_raises(self, box_with_array, tz_naive_fixture): + # https://github.com/pandas-dev/pandas/issues/10329 + + tz = tz_naive_fixture + + obj1 = date_range("2012-01-01", periods=3, tz=tz) + obj2 = [time(i, i, i) for i in range(3)] + + obj1 = tm.box_expected(obj1, box_with_array) + obj2 = tm.box_expected(obj2, box_with_array) + + msg = "|".join( + [ + "unsupported operand", + "cannot subtract DatetimeArray from ndarray", + ] + ) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + + assert_invalid_addsub_type(obj1, obj2, msg=msg) + + # ------------------------------------------------------------- + # Other invalid operations + + @pytest.mark.parametrize( + "dt64_series", + [ + Series([Timestamp("19900315"), Timestamp("19900315")]), + Series([NaT, Timestamp("19900315")]), + Series([NaT, NaT], dtype="datetime64[ns]"), + ], + ) + @pytest.mark.parametrize("one", [1, 1.0, np.array(1)]) + def test_dt64_mul_div_numeric_invalid(self, one, dt64_series, box_with_array): + obj = tm.box_expected(dt64_series, box_with_array) + + msg = "cannot perform .* with this index type" + + # multiplication + with pytest.raises(TypeError, match=msg): + obj * one + with pytest.raises(TypeError, match=msg): + one * obj + + # division + with pytest.raises(TypeError, match=msg): + obj / one + with pytest.raises(TypeError, match=msg): + one / obj + + +class TestDatetime64DateOffsetArithmetic: + + # ------------------------------------------------------------- + # Tick DateOffsets + + # TODO: parametrize over timezone? + def test_dt64arr_series_add_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser + pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + + def test_dt64arr_series_sub_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:00:55"), Timestamp("20130101 9:01:55")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser - pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize( + "cls_name", ["Day", "Hour", "Minute", "Second", "Milli", "Micro", "Nano"] + ) + def test_dt64arr_add_sub_tick_DateOffset_smoke(self, cls_name, box_with_array): + # GH#4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + ser = tm.box_expected(ser, box_with_array) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + ser - offset_cls(5) + + def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): + # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype + tz = tz_aware_fixture + if tz == "US/Pacific": + dates = date_range("2012-11-01", periods=3, tz=tz) + offset = dates + pd.offsets.Hour(5) + assert dates[0] + pd.offsets.Hour(5) == offset[0] + + dates = date_range("2010-11-01 00:00", periods=3, tz=tz, freq="H") + expected = DatetimeIndex( + ["2010-11-01 05:00", "2010-11-01 06:00", "2010-11-01 07:00"], + freq="H", + tz=tz, + ) + + dates = tm.box_expected(dates, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + for scalar in [pd.offsets.Hour(5), np.timedelta64(5, "h"), timedelta(hours=5)]: + offset = dates + scalar + tm.assert_equal(offset, expected) + offset = scalar + dates + tm.assert_equal(offset, expected) + + roundtrip = offset - scalar + tm.assert_equal(roundtrip, dates) + + msg = "|".join( + ["bad operand type for unary -", "cannot subtract DatetimeArray"] + ) + with pytest.raises(TypeError, match=msg): + scalar - dates + + # ------------------------------------------------------------- + # RelativeDelta DateOffsets + + def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): + # GH#10699 + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec + + # DateOffset relativedelta fastpath + relative_kwargs = [ + ("years", 2), + ("months", 5), + ("days", 3), + ("hours", 5), + ("minutes", 10), + ("seconds", 2), + ("microseconds", 5), + ] + for i, (unit, value) in enumerate(relative_kwargs): + off = DateOffset(**{unit: value}) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + + off = DateOffset(**dict(relative_kwargs[: i + 1])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + off - vec + + # ------------------------------------------------------------- + # Non-Tick, Non-RelativeDelta DateOffsets + + # TODO: redundant with test_dt64arr_add_sub_DateOffset? that includes + # tz-aware cases which this does not + @pytest.mark.parametrize( + "cls_and_kwargs", + [ + "YearBegin", + ("YearBegin", {"month": 5}), + "YearEnd", + ("YearEnd", {"month": 5}), + "MonthBegin", + "MonthEnd", + "SemiMonthEnd", + "SemiMonthBegin", + "Week", + ("Week", {"weekday": 3}), + "Week", + ("Week", {"weekday": 6}), + "BusinessDay", + "BDay", + "QuarterEnd", + "QuarterBegin", + "CustomBusinessDay", + "CDay", + "CBMonthEnd", + "CBMonthBegin", + "BMonthBegin", + "BMonthEnd", + "BusinessHour", + "BYearBegin", + "BYearEnd", + "BQuarterBegin", + ("LastWeekOfMonth", {"weekday": 2}), + ( + "FY5253Quarter", + { + "qtr_with_extra_week": 1, + "startingMonth": 1, + "weekday": 2, + "variation": "nearest", + }, + ), + ("FY5253", {"weekday": 0, "startingMonth": 2, "variation": "nearest"}), + ("WeekOfMonth", {"weekday": 2, "week": 2}), + "Easter", + ("DateOffset", {"day": 4}), + ("DateOffset", {"month": 5}), + ], + ) + @pytest.mark.parametrize("normalize", [True, False]) + @pytest.mark.parametrize("n", [0, 5]) + def test_dt64arr_add_sub_DateOffsets( + self, box_with_array, n, normalize, cls_and_kwargs + ): + # GH#10699 + # assert vectorized operation matches pointwise operations + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + if n == 0 and cls_name in [ + "WeekOfMonth", + "LastWeekOfMonth", + "FY5253Quarter", + "FY5253", + ]: + # passing n = 0 is invalid for these offset classes + return + + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + + offset = offset_cls(n, normalize=normalize, **kwargs) + + expected = DatetimeIndex([x + offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + offset) + + expected = DatetimeIndex([x - offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - offset) + + expected = DatetimeIndex([offset + x for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, offset + vec) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + offset - vec + + def test_dt64arr_add_sub_DateOffset(self, box_with_array): + # GH#10699 + s = date_range("2000-01-01", "2000-01-31", name="a") + s = tm.box_expected(s, box_with_array) + result = s + DateOffset(years=1) + result2 = DateOffset(years=1) + s + exp = date_range("2001-01-01", "2001-01-31", name="a")._with_freq(None) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + result = s - DateOffset(years=1) + exp = date_range("1999-01-01", "1999-01-31", name="a")._with_freq(None) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-16 00:15:00", tz="US/Central"), + Timestamp("2000-02-16", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-31 00:15:00", tz="US/Central"), + Timestamp("2000-02-29", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + @pytest.mark.parametrize( + "other", + [ + np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]), + np.array([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]), + np.array( # matching offsets + [pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)] + ), + ], + ) + @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) + @pytest.mark.parametrize("box_other", [True, False]) + def test_dt64arr_add_sub_offset_array( + self, tz_naive_fixture, box_with_array, box_other, op, other + ): + # GH#18849 + # GH#10699 array of offsets + + tz = tz_naive_fixture + dti = date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))]) + expected = tm.box_expected(expected, box_with_array) + + if box_other: + other = tm.box_expected(other, box_with_array) + + with tm.assert_produces_warning(PerformanceWarning): + res = op(dtarr, other) + + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "op, offset, exp, exp_freq", + [ + ( + "__add__", + DateOffset(months=3, days=10), + [ + Timestamp("2014-04-11"), + Timestamp("2015-04-11"), + Timestamp("2016-04-11"), + Timestamp("2017-04-11"), + ], + None, + ), + ( + "__add__", + DateOffset(months=3), + [ + Timestamp("2014-04-01"), + Timestamp("2015-04-01"), + Timestamp("2016-04-01"), + Timestamp("2017-04-01"), + ], + "AS-APR", + ), + ( + "__sub__", + DateOffset(months=3, days=10), + [ + Timestamp("2013-09-21"), + Timestamp("2014-09-21"), + Timestamp("2015-09-21"), + Timestamp("2016-09-21"), + ], + None, + ), + ( + "__sub__", + DateOffset(months=3), + [ + Timestamp("2013-10-01"), + Timestamp("2014-10-01"), + Timestamp("2015-10-01"), + Timestamp("2016-10-01"), + ], + "AS-OCT", + ), + ], + ) + def test_dti_add_sub_nonzero_mth_offset( + self, op, offset, exp, exp_freq, tz_aware_fixture, box_with_array + ): + # GH 26258 + tz = tz_aware_fixture + date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz) + date = tm.box_expected(date, box_with_array, False) + mth = getattr(date, op) + result = mth(offset) + + expected = DatetimeIndex(exp, tz=tz) + expected = tm.box_expected(expected, box_with_array, False) + tm.assert_equal(result, expected) + + +class TestDatetime64OverflowHandling: + # TODO: box + de-duplicate + + def test_dt64_overflow_masking(self, box_with_array): + # GH#25317 + left = Series([Timestamp("1969-12-31")]) + right = Series([NaT]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + + expected = TimedeltaIndex([NaT]) + expected = tm.box_expected(expected, box_with_array) + + result = left - right + tm.assert_equal(result, expected) + + def test_dt64_series_arith_overflow(self): + # GH#12534, fixed by GH#19024 + dt = Timestamp("1700-01-31") + td = Timedelta("20000 Days") + dti = date_range("1949-09-30", freq="100Y", periods=4) + ser = Series(dti) + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + ser - dt + with pytest.raises(OverflowError, match=msg): + dt - ser + with pytest.raises(OverflowError, match=msg): + ser + td + with pytest.raises(OverflowError, match=msg): + td + ser + + ser.iloc[-1] = NaT + expected = Series( + ["2004-10-03", "2104-10-04", "2204-10-04", "NaT"], dtype="datetime64[ns]" + ) + res = ser + td + tm.assert_series_equal(res, expected) + res = td + ser + tm.assert_series_equal(res, expected) + + ser.iloc[1:] = NaT + expected = Series(["91279 Days", "NaT", "NaT", "NaT"], dtype="timedelta64[ns]") + res = ser - dt + tm.assert_series_equal(res, expected) + res = dt - ser + tm.assert_series_equal(res, -expected) + + def test_datetimeindex_sub_timestamp_overflow(self): + dtimax = pd.to_datetime(["2021-12-28 17:19", Timestamp.max]) + dtimin = pd.to_datetime(["2021-12-28 17:19", Timestamp.min]) + + tsneg = Timestamp("1950-01-01") + ts_neg_variants = [ + tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype("datetime64[ns]"), + tsneg.to_datetime64().astype("datetime64[D]"), + ] + + tspos = Timestamp("1980-01-01") + ts_pos_variants = [ + tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype("datetime64[ns]"), + tspos.to_datetime64().astype("datetime64[D]"), + ] + msg = "Overflow in int64 addition" + for variant in ts_neg_variants: + with pytest.raises(OverflowError, match=msg): + dtimax - variant + + expected = Timestamp.max.value - tspos.value + for variant in ts_pos_variants: + res = dtimax - variant + assert res[1].value == expected + + expected = Timestamp.min.value - tsneg.value + for variant in ts_neg_variants: + res = dtimin - variant + assert res[1].value == expected + + for variant in ts_pos_variants: + with pytest.raises(OverflowError, match=msg): + dtimin - variant + + def test_datetimeindex_sub_datetimeindex_overflow(self): + # GH#22492, GH#22508 + dtimax = pd.to_datetime(["2021-12-28 17:19", Timestamp.max]) + dtimin = pd.to_datetime(["2021-12-28 17:19", Timestamp.min]) + + ts_neg = pd.to_datetime(["1950-01-01", "1950-01-01"]) + ts_pos = pd.to_datetime(["1980-01-01", "1980-01-01"]) + + # General tests + expected = Timestamp.max.value - ts_pos[1].value + result = dtimax - ts_pos + assert result[1].value == expected + + expected = Timestamp.min.value - ts_neg[1].value + result = dtimin - ts_neg + assert result[1].value == expected + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + dtimax - ts_neg + + with pytest.raises(OverflowError, match=msg): + dtimin - ts_pos + + # Edge cases + tmin = pd.to_datetime([Timestamp.min]) + t1 = tmin + Timedelta.max + Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + t1 - tmin + + tmax = pd.to_datetime([Timestamp.max]) + t2 = tmax + Timedelta.min - Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + tmax - t2 + + +class TestTimestampSeriesArithmetic: + def test_empty_series_add_sub(self, box_with_array): + # GH#13844 + a = Series(dtype="M8[ns]") + b = Series(dtype="m8[ns]") + a = box_with_array(a) + b = box_with_array(b) + tm.assert_equal(a, a + b) + tm.assert_equal(a, a - b) + tm.assert_equal(a, b + a) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + b - a + + def test_operators_datetimelike(self): + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series( + [ + Timestamp("20111230"), + Timestamp("20120101"), + Timestamp("20120103"), + ] + ) + dt1.iloc[2] = np.nan + dt2 = Series( + [ + Timestamp("20111231"), + Timestamp("20120102"), + Timestamp("20120104"), + ] + ) + dt1 - dt2 + dt2 - dt1 + + # datetime64 with timetimedelta + dt1 + td1 + td1 + dt1 + dt1 - td1 + + # timetimedelta with datetime64 + td1 + dt1 + dt1 + td1 + + def test_dt64ser_sub_datetime_dtype(self): + ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00)) + dt = datetime(1993, 6, 22, 13, 30) + ser = Series([ts]) + result = pd.to_timedelta(np.abs(ser - dt)) + assert result.dtype == "timedelta64[ns]" + + # ------------------------------------------------------------- + # TODO: This next block of tests came from tests.series.test_operators, + # needs to be de-duplicated and parametrized over `box` classes + + @pytest.mark.parametrize( + "left, right, op_fail", + [ + [ + [Timestamp("20111230"), Timestamp("20120101"), NaT], + [Timestamp("20111231"), Timestamp("20120102"), Timestamp("20120104")], + ["__sub__", "__rsub__"], + ], + [ + [Timestamp("20111230"), Timestamp("20120101"), NaT], + [timedelta(minutes=5, seconds=3), timedelta(minutes=5, seconds=3), NaT], + ["__add__", "__radd__", "__sub__"], + ], + [ + [ + Timestamp("20111230", tz="US/Eastern"), + Timestamp("20111230", tz="US/Eastern"), + NaT, + ], + [timedelta(minutes=5, seconds=3), NaT, timedelta(minutes=5, seconds=3)], + ["__add__", "__radd__", "__sub__"], + ], + ], + ) + def test_operators_datetimelike_invalid( + self, left, right, op_fail, all_arithmetic_operators + ): + # these are all TypeError ops + op_str = all_arithmetic_operators + arg1 = Series(left) + arg2 = Series(right) + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + op = getattr(arg1, op_str, None) + # Previously, _validate_for_numeric_binop in core/indexes/base.py + # did this for us. + if op_str not in op_fail: + with pytest.raises( + TypeError, match="operate|[cC]annot|unsupported operand" + ): + op(arg2) + else: + # Smoke test + op(arg2) + + def test_sub_single_tz(self): + # GH#12290 + s1 = Series([Timestamp("2016-02-10", tz="America/Sao_Paulo")]) + s2 = Series([Timestamp("2016-02-08", tz="America/Sao_Paulo")]) + result = s1 - s2 + expected = Series([Timedelta("2days")]) + tm.assert_series_equal(result, expected) + result = s2 - s1 + expected = Series([Timedelta("-2days")]) + tm.assert_series_equal(result, expected) + + def test_dt64tz_series_sub_dtitz(self): + # GH#19071 subtracting tzaware DatetimeIndex from tzaware Series + # (with same tz) raises, fixed by #19024 + dti = date_range("1999-09-30", periods=10, tz="US/Pacific") + ser = Series(dti) + expected = Series(TimedeltaIndex(["0days"] * 10)) + + res = dti - ser + tm.assert_series_equal(res, expected) + res = ser - dti + tm.assert_series_equal(res, expected) + + def test_sub_datetime_compat(self): + # see GH#14088 + s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), NaT]) + dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) + exp = Series([Timedelta("1 days"), NaT]) + tm.assert_series_equal(s - dt, exp) + tm.assert_series_equal(s - Timestamp(dt), exp) + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH#4532 + # operate with pd.offsets + s = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + + result = s + pd.offsets.Milli(5) + result2 = pd.offsets.Milli(5) + s + expected = Series( + [Timestamp("20130101 9:01:00.005"), Timestamp("20130101 9:02:00.005")] + ) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + expected = Series( + [Timestamp("20130101 9:06:00.005"), Timestamp("20130101 9:07:00.005")] + ) + tm.assert_series_equal(result, expected) + + def test_datetime64_ops_nat(self): + # GH#11349 + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + + # subtraction + tm.assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) + msg = "bad operand type for unary -: 'DatetimeArray'" + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + datetime_series + + tm.assert_series_equal( + -NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + nat_series_dtype_timestamp + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + # ------------------------------------------------------------- + # Timezone-Centric Tests + + def test_operators_datetimelike_with_timezones(self): + tz = "US/Eastern" + dt1 = Series(date_range("2000-01-01 09:00:00", periods=5, tz=tz), name="foo") + dt2 = dt1.copy() + dt2.iloc[2] = np.nan + + td1 = Series(pd.timedelta_range("1 days 1 min", periods=5, freq="H")) + td2 = td1.copy() + td2.iloc[1] = np.nan + assert td2._values.freq is None + + result = dt1 + td1[0] + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2[0] + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + # odd numpy behavior with scalar timedeltas + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1[0] + exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td1[0] - dt1 + + result = dt2 - td2[0] + exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + with pytest.raises(TypeError, match=msg): + td2[0] - dt2 + + result = dt1 + td1 + exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2 + exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1 + exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 - td2 + exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "cannot (add|subtract)" + with pytest.raises(TypeError, match=msg): + td1 - dt1 + with pytest.raises(TypeError, match=msg): + td2 - dt2 + + +class TestDatetimeIndexArithmetic: + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and TimedeltaIndex/array + + def test_dti_add_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz) + expected = expected._with_freq(None) + + # add with TimedeltaIndex + result = dti + tdi + tm.assert_index_equal(result, expected) + + result = tdi + dti + tm.assert_index_equal(result, expected) + + # add with timedelta64 array + result = dti + tdi.values + tm.assert_index_equal(result, expected) + + result = tdi.values + dti + tm.assert_index_equal(result, expected) + + def test_dti_iadd_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz) + expected = expected._with_freq(None) + + # iadd with TimedeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + # iadd with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi.values + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + expected = expected._with_freq(None) + + # sub with TimedeltaIndex + result = dti - tdi + tm.assert_index_equal(result, expected) + + msg = "cannot subtract .*TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dti + + # sub with timedelta64 array + result = dti - tdi.values + tm.assert_index_equal(result, expected) + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi.values - dti + + def test_dti_isub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + expected = expected._with_freq(None) + + # isub with TimedeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi + tm.assert_index_equal(result, expected) + + # DTA.__isub__ GH#43904 + dta = dti._data.copy() + dta -= tdi + tm.assert_datetime_array_equal(dta, expected._data) + + out = dti._data.copy() + np.subtract(out, tdi, out=out) + tm.assert_datetime_array_equal(out, expected._data) + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi -= dti + + # isub with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi.values + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError, match=msg): + tdi.values -= dti + + with pytest.raises(TypeError, match=msg): + tdi._values -= dti + + # ------------------------------------------------------------- + # Binary Operations DatetimeIndex and datetime-like + # TODO: A couple other tests belong in this section. Move them in + # A PR where there isn't already a giant diff. + + # ------------------------------------------------------------- + + def test_dta_add_sub_index(self, tz_naive_fixture): + # Check that DatetimeArray defers to Index classes + dti = date_range("20130101", periods=3, tz=tz_naive_fixture) + dta = dti.array + result = dta - dti + expected = dti - dti + tm.assert_index_equal(result, expected) + + tdi = result + result = dta + tdi + expected = dti + tdi + tm.assert_index_equal(result, expected) + + result = dta - tdi + expected = dti - tdi + tm.assert_index_equal(result, expected) + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range("20130101", periods=3) + dti_tz = date_range("20130101", periods=3).tz_localize("US/Eastern") + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + dti_tz - dti + + with pytest.raises(TypeError, match=msg): + dti - dti_tz + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range("20130101", periods=3) + dti2 = date_range("20130101", periods=4) + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(["2012-01-01", np.nan, "2012-01-03"]) + dti2 = DatetimeIndex(["2012-01-02", "2012-01-03", np.nan]) + expected = TimedeltaIndex(["1 days", np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------- + # TODO: Most of this block is moved from series or frame tests, needs + # cleanup, box-parametrization, and de-duplication + + @pytest.mark.parametrize("op", [operator.add, operator.sub]) + def test_timedelta64_equal_timedelta_supported_ops(self, op, box_with_array): + ser = Series( + [ + Timestamp("20130301"), + Timestamp("20130228 23:00:00"), + Timestamp("20130228 22:00:00"), + Timestamp("20130228 21:00:00"), + ] + ) + obj = box_with_array(ser) + + intervals = ["D", "h", "m", "s", "us"] + + def timedelta64(*args): + # see casting notes in NumPy gh-12927 + return np.sum(list(starmap(np.timedelta64, zip(args, intervals)))) + + for d, h, m, s, us in product(*([range(2)] * 5)): + nptd = timedelta64(d, h, m, s, us) + pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, microseconds=us) + lhs = op(obj, nptd) + rhs = op(obj, pytd) + + tm.assert_equal(lhs, rhs) + + def test_ops_nat_mixed_datetime64_timedelta64(self): + # GH#11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal( + datetime_series - single_nat_dtype_datetime, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + datetime_series - single_nat_dtype_timedelta, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + datetime_series, nat_series_dtype_timestamp + ) + + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_datetime, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + msg = "cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + timedelta_series - single_nat_dtype_datetime + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_datetime, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_datetime + nat_series_dtype_timedelta, + nat_series_dtype_timestamp, + ) + + def test_ufunc_coercions(self): + idx = date_range("2011-01-01", periods=3, freq="2D", name="x") + + delta = np.timedelta64(1, "D") + exp = date_range("2011-01-02", periods=3, freq="2D", name="x") + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + exp = date_range("2010-12-31", periods=3, freq="2D", name="x") + + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + # When adding/subtracting an ndarray (which has no .freq), the result + # does not infer freq + idx = idx._with_freq(None) + delta = np.array( + [np.timedelta64(1, "D"), np.timedelta64(2, "D"), np.timedelta64(3, "D")] + ) + exp = DatetimeIndex(["2011-01-02", "2011-01-05", "2011-01-08"], name="x") + + for result in [idx + delta, np.add(idx, delta)]: + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + exp = DatetimeIndex(["2010-12-31", "2011-01-01", "2011-01-02"], name="x") + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_dti_add_series(self, tz_naive_fixture, names): + # GH#13905 + tz = tz_naive_fixture + index = DatetimeIndex( + ["2016-06-28 05:30", "2016-06-28 05:31"], tz=tz, name=names[0] + ) + ser = Series([Timedelta(seconds=5)] * 2, index=index, name=names[1]) + expected = Series(index + Timedelta(seconds=5), index=index, name=names[2]) + + # passing name arg isn't enough when names[2] is None + expected.name = names[2] + assert expected.dtype == index.dtype + result = ser + index + tm.assert_series_equal(result, expected) + result2 = index + ser + tm.assert_series_equal(result2, expected) + + expected = index + Timedelta(seconds=5) + result3 = ser.values + index + tm.assert_index_equal(result3, expected) + result4 = index + ser.values + tm.assert_index_equal(result4, expected) + + @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) + def test_dti_addsub_offset_arraylike( + self, tz_naive_fixture, names, op, index_or_series + ): + # GH#18849, GH#19744 + other_box = index_or_series + + tz = tz_naive_fixture + dti = date_range("2017-01-01", periods=2, tz=tz, name=names[0]) + other = other_box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], name=names[1]) + + xbox = get_upcast_box(dti, other) + + with tm.assert_produces_warning(PerformanceWarning): + res = op(dti, other) + + expected = DatetimeIndex( + [op(dti[n], other[n]) for n in range(len(dti))], name=names[2], freq="infer" + ) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize("other_box", [pd.Index, np.array]) + def test_dti_addsub_object_arraylike( + self, tz_naive_fixture, box_with_array, other_box + ): + tz = tz_naive_fixture + + dti = date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + other = other_box([pd.offsets.MonthEnd(), Timedelta(days=4)]) + xbox = get_upcast_box(dtarr, other) + + expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + with tm.assert_produces_warning(PerformanceWarning): + result = dtarr + other + tm.assert_equal(result, expected) + + expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + with tm.assert_produces_warning(PerformanceWarning): + result = dtarr - other + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("years", [-1, 0, 1]) +@pytest.mark.parametrize("months", [-2, 0, 2]) +def test_shift_months(years, months): + dti = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + ] + ) + actual = DatetimeIndex(shift_months(dti.asi8, years * 12 + months)) + + raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti] + expected = DatetimeIndex(raw) + tm.assert_index_equal(actual, expected) + + +def test_dt64arr_addsub_object_dtype_2d(): + # block-wise DataFrame operations will require operating on 2D + # DatetimeArray/TimedeltaArray, so check that specifically. + dti = date_range("1994-02-13", freq="2W", periods=4) + dta = dti._data.reshape((4, 1)) + + other = np.array([[pd.offsets.Day(n)] for n in range(4)]) + assert other.shape == dta.shape + + with tm.assert_produces_warning(PerformanceWarning): + result = dta + other + with tm.assert_produces_warning(PerformanceWarning): + expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) + + assert isinstance(result, DatetimeArray) + assert result.freq is None + tm.assert_numpy_array_equal(result._data, expected._data) + + with tm.assert_produces_warning(PerformanceWarning): + # Case where we expect to get a TimedeltaArray back + result2 = dta - dta.astype(object) + + assert isinstance(result2, TimedeltaArray) + assert result2.shape == (4, 1) + assert result2.freq is None + assert (result2.asi8 == 0).all() diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py new file mode 100644 index 00000000..88e3dca6 --- /dev/null +++ b/pandas/tests/arithmetic/test_interval.py @@ -0,0 +1,316 @@ +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_list_like + +import pandas as pd +from pandas import ( + Categorical, + Index, + Interval, + IntervalIndex, + Period, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + IntervalArray, +) +from pandas.tests.arithmetic.common import get_upcast_box + + +@pytest.fixture( + params=[ + (Index([0, 2, 4, 4]), Index([1, 3, 5, 8])), + (Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])), + ( + timedelta_range("0 days", periods=3).insert(3, pd.NaT), + timedelta_range("1 day", periods=3).insert(3, pd.NaT), + ), + ( + date_range("20170101", periods=3).insert(3, pd.NaT), + date_range("20170102", periods=3).insert(3, pd.NaT), + ), + ( + date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT), + date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +@pytest.fixture +def interval_array(left_right_dtypes): + """ + Fixture to generate an IntervalArray of various dtypes containing NA if possible + """ + left, right = left_right_dtypes + return IntervalArray.from_arrays(left, right) + + +def create_categorical_intervals(left, right, closed="right"): + return Categorical(IntervalIndex.from_arrays(left, right, closed)) + + +def create_series_intervals(left, right, closed="right"): + return Series(IntervalArray.from_arrays(left, right, closed)) + + +def create_series_categorical_intervals(left, right, closed="right"): + return Series(Categorical(IntervalIndex.from_arrays(left, right, closed))) + + +class TestComparison: + @pytest.fixture(params=[operator.eq, operator.ne]) + def op(self, request): + return request.param + + @pytest.fixture( + params=[ + IntervalArray.from_arrays, + IntervalIndex.from_arrays, + create_categorical_intervals, + create_series_intervals, + create_series_categorical_intervals, + ], + ids=[ + "IntervalArray", + "IntervalIndex", + "Categorical[Interval]", + "Series[Interval]", + "Series[Categorical[Interval]]", + ], + ) + def interval_constructor(self, request): + """ + Fixture for all pandas native interval constructors. + To be used as the LHS of IntervalArray comparisons. + """ + return request.param + + def elementwise_comparison(self, op, interval_array, other): + """ + Helper that performs elementwise comparisons between `array` and `other` + """ + other = other if is_list_like(other) else [other] * len(interval_array) + expected = np.array([op(x, y) for x, y in zip(interval_array, other)]) + if isinstance(other, Series): + return Series(expected, index=other.index) + return expected + + def test_compare_scalar_interval(self, op, interval_array): + # matches first interval + other = interval_array[0] + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + # matches on a single endpoint but not both + other = Interval(interval_array.left[0], interval_array.right[1]) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): + interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = Interval(0, 1, closed=other_closed) + + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_na( + self, op, interval_array, nulls_fixture, box_with_array, request + ): + box = box_with_array + + if box is pd.DataFrame: + if interval_array.dtype.subtype.kind not in "iuf": + mark = pytest.mark.xfail( + reason="raises on DataFrame.transpose (would be fixed by EA2D)" + ) + request.node.add_marker(mark) + + obj = tm.box_expected(interval_array, box) + result = op(obj, nulls_fixture) + + if nulls_fixture is pd.NA: + # GH#31882 + exp = np.ones(interval_array.shape, dtype=bool) + expected = BooleanArray(exp, exp) + else: + expected = self.elementwise_comparison(op, interval_array, nulls_fixture) + + if not (box is Index and nulls_fixture is pd.NA): + # don't cast expected from BooleanArray to ndarray[object] + xbox = get_upcast_box(obj, nulls_fixture, True) + expected = tm.box_expected(expected, xbox) + + tm.assert_equal(result, expected) + + rev = op(nulls_fixture, obj) + tm.assert_equal(rev, expected) + + @pytest.mark.parametrize( + "other", + [ + 0, + 1.0, + True, + "foo", + Timestamp("2017-01-01"), + Timestamp("2017-01-01", tz="US/Eastern"), + Timedelta("0 days"), + Period("2017-01-01", "D"), + ], + ) + def test_compare_scalar_other(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_interval(self, op, interval_array, interval_constructor): + # same endpoints + other = interval_constructor(interval_array.left, interval_array.right) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + # different endpoints + other = interval_constructor( + interval_array.left[::-1], interval_array.right[::-1] + ) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + # all nan endpoints + other = interval_constructor([np.nan] * 4, [np.nan] * 4) + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + def test_compare_list_like_interval_mixed_closed( + self, op, interval_constructor, closed, other_closed + ): + interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = interval_constructor(range(2), range(1, 3), closed=other_closed) + + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + ( + Interval(0, 1), + Interval(Timedelta("1 day"), Timedelta("2 days")), + Interval(4, 5, "both"), + Interval(10, 20, "neither"), + ), + (0, 1.5, Timestamp("20170103"), np.nan), + ( + Timestamp("20170102", tz="US/Eastern"), + Timedelta("2 days"), + "baz", + pd.NaT, + ), + ], + ) + def test_compare_list_like_object(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_nan(self, op, interval_array, nulls_fixture): + other = [nulls_fixture] * 4 + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + np.arange(4, dtype="int64"), + np.arange(4, dtype="float64"), + date_range("2017-01-01", periods=4), + date_range("2017-01-01", periods=4, tz="US/Eastern"), + timedelta_range("0 days", periods=4), + period_range("2017-01-01", periods=4, freq="D"), + Categorical(list("abab")), + Categorical(date_range("2017-01-01", periods=4)), + pd.array(list("abcd")), + pd.array(["foo", 3.14, None, object()], dtype=object), + ], + ids=lambda x: str(x.dtype), + ) + def test_compare_list_like_other(self, op, interval_array, other): + result = op(interval_array, other) + expected = self.elementwise_comparison(op, interval_array, other) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("length", [1, 3, 5]) + @pytest.mark.parametrize("other_constructor", [IntervalArray, list]) + def test_compare_length_mismatch_errors(self, op, other_constructor, length): + interval_array = IntervalArray.from_arrays(range(4), range(1, 5)) + other = other_constructor([Interval(0, 1)] * length) + with pytest.raises(ValueError, match="Lengths must match to compare"): + op(interval_array, other) + + @pytest.mark.parametrize( + "constructor, expected_type, assert_func", + [ + (IntervalIndex, np.array, tm.assert_numpy_array_equal), + (Series, Series, tm.assert_series_equal), + ], + ) + def test_index_series_compat(self, op, constructor, expected_type, assert_func): + # IntervalIndex/Series that rely on IntervalArray for comparisons + breaks = range(4) + index = constructor(IntervalIndex.from_breaks(breaks)) + + # scalar comparisons + other = index[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = breaks[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + # list-like comparisons + other = IntervalArray.from_breaks(breaks) + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = [index[0], breaks[0], "foo"] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + @pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None]) + def test_comparison_operations(self, scalars): + # GH #28981 + expected = Series([False, False]) + s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval") + result = s == scalars + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py new file mode 100644 index 00000000..881a5f1d --- /dev/null +++ b/pandas/tests/arithmetic/test_numeric.py @@ -0,0 +1,1447 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for numeric dtypes +from __future__ import annotations + +from collections import abc +from decimal import Decimal +import operator +from typing import Any + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + RangeIndex, + Series, + Timedelta, + TimedeltaIndex, + array, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.core.computation import expressions as expr +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, +) + + +@pytest.fixture(params=[Index, Series, tm.to_array]) +def box_pandas_1d_array(request): + """ + Fixture to test behavior for Index, Series and tm.to_array classes + """ + return request.param + + +def adjust_negative_zero(zero, expected): + """ + Helper to adjust the expected result if we are dividing by -0.0 + as opposed to 0.0 + """ + if np.signbit(np.array(zero)).any(): + # All entries in the `zero` fixture should be either + # all-negative or no-negative. + assert np.signbit(np.array(zero)).all() + + expected *= -1 + + return expected + + +def compare_op(series, other, op): + left = np.abs(series) if op in (ops.rpow, operator.pow) else series + right = np.abs(other) if op in (ops.rpow, operator.pow) else other + + cython_or_numpy = op(left, right) + python = left.combine(right, op) + if isinstance(other, Series) and not other.index.equals(series.index): + python.index = python.index._with_freq(None) + tm.assert_series_equal(cython_or_numpy, python) + + +# TODO: remove this kludge once mypy stops giving false positives here +# List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex] +# See GH#29725 +ser_or_index: list[Any] = [Series, Index] +lefts: list[Any] = [RangeIndex(10, 40, 10)] +lefts.extend( + [ + cls([10, 20, 30], dtype=dtype) + for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] + for cls in ser_or_index + ] +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestNumericComparisons: + def test_operator_series_comparison_zerorank(self): + # GH#13006 + result = np.float64(0) > Series([1, 2, 3]) + expected = 0.0 > Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + result = Series([1, 2, 3]) < np.float64(0) + expected = Series([1, 2, 3]) < 0.0 + tm.assert_series_equal(result, expected) + result = np.array([0, 1, 2])[0] > Series([0, 1, 2]) + expected = 0.0 > Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_df_numeric_cmp_dt64_raises(self, box_with_array, fixed_now_ts): + # GH#8932, GH#22163 + ts = fixed_now_ts + obj = np.array(range(5)) + obj = tm.box_expected(obj, box_with_array) + + assert_invalid_comparison(obj, ts, box_with_array) + + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = Series(np.random.randn(5), name=0) + b = Series(np.random.randn(5)) + b.name = pd.Timestamp("2000-01-01") + tm.assert_series_equal(a / b, 1 / (b / a)) + + def test_numeric_cmp_string_numexpr_path(self, box_with_array): + # GH#36377, GH#35700 + box = box_with_array + xbox = box if box is not Index else np.ndarray + + obj = Series(np.random.randn(10**5)) + obj = tm.box_expected(obj, box, transpose=False) + + result = obj == "a" + + expected = Series(np.zeros(10**5, dtype=bool)) + expected = tm.box_expected(expected, xbox, transpose=False) + tm.assert_equal(result, expected) + + result = obj != "a" + tm.assert_equal(result, ~expected) + + msg = "Invalid comparison between dtype=float64 and str" + with pytest.raises(TypeError, match=msg): + obj < "a" + + +# ------------------------------------------------------------------ +# Numeric dtypes Arithmetic with Datetime/Timedelta Scalar + + +class TestNumericArraylikeArithmeticWithDatetimeLike: + @pytest.mark.parametrize("box_cls", [np.array, Index, Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype) + ) + def test_mul_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([1, 2, 3], dtype="m8[s]") + right = box_cls(right) + + expected = TimedeltaIndex(["10s", "40s", "90s"]) + if isinstance(left, Series) or box_cls is Series: + expected = Series(expected) + + result = left * right + tm.assert_equal(result, expected) + + result = right * left + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("box_cls", [np.array, Index, Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype) + ) + def test_div_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([10, 40, 90], dtype="m8[s]") + right = box_cls(right) + + expected = TimedeltaIndex(["1s", "2s", "3s"]) + if isinstance(left, Series) or box_cls is Series: + expected = Series(expected) + + result = right / left + tm.assert_equal(result, expected) + + result = right // left + tm.assert_equal(result, expected) + + msg = "Cannot divide" + with pytest.raises(TypeError, match=msg): + left / right + + with pytest.raises(TypeError, match=msg): + left // right + + # TODO: also test Tick objects; + # see test_numeric_arr_rdiv_tdscalar for note on these failing + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + Timedelta(days=1).to_timedelta64().astype("timedelta64[s]"), + Timedelta(days=1).to_timedelta64().astype("timedelta64[ms]"), + ], + ids=lambda x: type(x).__name__, + ) + def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array): + # GH#19333 + box = box_with_array + index = numeric_idx + expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))]) + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = index * scalar_td + tm.assert_equal(result, expected) + + commute = scalar_td * index + tm.assert_equal(commute, expected) + + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + ], + ids=lambda x: type(x).__name__, + ) + @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + def test_numeric_arr_mul_tdscalar_numexpr_path( + self, dtype, scalar_td, box_with_array + ): + # GH#44772 for the float64 case + box = box_with_array + + arr_i8 = np.arange(2 * 10**4).astype(np.int64, copy=False) + arr = arr_i8.astype(dtype, copy=False) + obj = tm.box_expected(arr, box, transpose=False) + + expected = arr_i8.view("timedelta64[D]").astype("timedelta64[ns]") + expected = tm.box_expected(expected, box, transpose=False) + + result = obj * scalar_td + tm.assert_equal(result, expected) + + result = scalar_td * obj + tm.assert_equal(result, expected) + + def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array): + box = box_with_array + + index = numeric_idx[1:3] + + expected = TimedeltaIndex(["3 Days", "36 Hours"]) + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = three_days / index + tm.assert_equal(result, expected) + + msg = "cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + index / three_days + + @pytest.mark.parametrize( + "other", + [ + Timedelta(hours=31), + Timedelta(hours=31).to_pytimedelta(), + Timedelta(hours=31).to_timedelta64(), + Timedelta(hours=31).to_timedelta64().astype("m8[h]"), + np.timedelta64("NaT"), + np.timedelta64("NaT", "D"), + pd.offsets.Minute(3), + pd.offsets.Second(0), + # GH#28080 numeric+datetimelike should raise; Timestamp used + # to raise NullFrequencyError but that behavior was removed in 1.0 + pd.Timestamp("2021-01-01", tz="Asia/Tokyo"), + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01").to_pydatetime(), + pd.Timestamp("2021-01-01", tz="UTC").to_pydatetime(), + pd.Timestamp("2021-01-01").to_datetime64(), + np.datetime64("NaT", "ns"), + pd.NaT, + ], + ) + def test_add_sub_datetimedeltalike_invalid( + self, numeric_idx, other, box_with_array + ): + box = box_with_array + + left = tm.box_expected(numeric_idx, box) + msg = "|".join( + [ + "unsupported operand type", + "Addition/subtraction of integers and integer-arrays", + "Instead of adding/subtracting", + "cannot use operands with types dtype", + "Concatenation operation is not implemented for NumPy arrays", + "Cannot (add|subtract) NaT (to|from) ndarray", + # pd.array vs np.datetime64 case + r"operand type\(s\) all returned NotImplemented from __array_ufunc__", + "can only perform ops with numeric values", + "cannot subtract DatetimeArray from ndarray", + ] + ) + assert_invalid_addsub_type(left, other, msg) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDivisionByZero: + def test_div_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx / zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") / np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_floordiv_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx // zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") // np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_mod_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype("i8") % np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero, numeric_idx): + idx = numeric_idx + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + exleft = adjust_negative_zero(zero, exleft) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + + @pytest.mark.parametrize("op", [operator.truediv, operator.floordiv]) + def test_div_negative_zero(self, zero, numeric_idx, op): + # Check that -1 / -0.0 returns np.inf, not -np.inf + if isinstance(numeric_idx, UInt64Index): + return + idx = numeric_idx - 3 + + expected = Index([-np.inf, -np.inf, -np.inf, np.nan, np.inf], dtype=np.float64) + expected = adjust_negative_zero(zero, expected) + + result = op(idx, zero) + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------ + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_div_ser( + self, + switch_numexpr_min_elements, + dtype1, + any_real_numpy_dtype, + ): + # no longer do integer div for any ops, but deal with the 0's + dtype2 = any_real_numpy_dtype + + first = Series([3, 4, 5, 8], name="first").astype(dtype1) + second = Series([0, 0, 0, 3], name="second").astype(dtype2) + + with np.errstate(all="ignore"): + expected = Series( + first.values.astype(np.float64) / second.values, + dtype="float64", + name=None, + ) + expected.iloc[0:3] = np.inf + if first.dtype == "int64" and second.dtype == "float32": + # when using numexpr, the casting rules are slightly different + # and int64/float32 combo results in float32 instead of float64 + if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: + expected = expected.astype("float32") + + result = first / second + tm.assert_series_equal(result, expected) + assert not result.equals(second / first) + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_divmod_zero(self, dtype1, any_real_numpy_dtype): + # GH#26987 + dtype2 = any_real_numpy_dtype + left = Series([1, 1]).astype(dtype1) + right = Series([0, 2]).astype(dtype2) + + # GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed + # to numpy which sets to np.nan; patch `expected[0]` below + expected = left // right, left % right + expected = list(expected) + expected[0] = expected[0].astype(np.float64) + expected[0][0] = np.inf + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_ser_divmod_inf(self): + left = Series([np.inf, 1.0]) + right = Series([np.inf, 2.0]) + + expected = left // right, left % right + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = Series([0.0] * 5) + + result = zero_array / Series(data) + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / data + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / Series(data) + tm.assert_series_equal(result, expected) + + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name="first") + expected = Series([-np.inf, np.nan, np.inf], name="first") + + result = ser / 0 + tm.assert_series_equal(result, expected) + + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + expected = Series([0.0, np.nan, 0.0], name="first") + + result = 0 / ser + tm.assert_series_equal(result, expected) + + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + + result = ser // 0 + expected = Series([-np.inf, np.nan, np.inf], name="first") + tm.assert_series_equal(result, expected) + + def test_df_div_zero_df(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = df / df + + first = Series([1.0, 1.0, 1.0, 1.0]) + second = Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_array(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + first = Series([1.0, 1.0, 1.0, 1.0]) + second = Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + + with np.errstate(all="ignore"): + arr = df.values.astype("float") / df.values + result = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_int(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df / 0 + expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns) + expected.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") / 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_div_zero_series_does_not_commute(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser / df + res2 = df / ser + assert not res.fillna(0).equals(res2.fillna(0)) + + # ------------------------------------------------------------------ + # Mod By Zero + + def test_df_mod_zero_df(self, using_array_manager): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + # this is technically wrong, as the integer portion is coerced to float + first = Series([0, 0, 0, 0]) + if not using_array_manager: + # INFO(ArrayManager) BlockManager doesn't preserve dtype per column + # while ArrayManager performs op column-wisedoes and thus preserves + # dtype if possible + first = first.astype("float64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + + # GH#38939 If we dont pass copy=False, df is consolidated and + # result["first"] is float64 instead of int64 + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}, copy=False) + first = Series([0, 0, 0, 0], dtype="int64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + + def test_df_mod_zero_array(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = Series([0, 0, 0, 0], dtype="float64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values % df.values + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns, dtype="float64") + result2.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_int(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df % 0 + expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") % 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_series_does_not_commute(self): + # GH#3590, modulo as ints + # not commutative with series + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser % df + res2 = df % ser + assert not res.fillna(0).equals(res2.fillna(0)) + + +class TestMultiplicationDivision: + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + # for non-timestamp/timedelta/period dtypes + + def test_divide_decimal(self, box_with_array): + # resolves issue GH#9787 + box = box_with_array + ser = Series([Decimal(10)]) + expected = Series([Decimal(5)]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = ser / Decimal(2) + + tm.assert_equal(result, expected) + + result = ser // Decimal(2) + tm.assert_equal(result, expected) + + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = Series([1, 0], name="first") + second = Series([-0.01, -0.02], name="second") + expected = Series([-0.01, -np.inf]) + + result = second.div(first) + tm.assert_series_equal(result, expected, check_names=False) + + result = second / first + tm.assert_series_equal(result, expected) + + def test_div_int(self, numeric_idx): + idx = numeric_idx + result = idx / 1 + expected = idx.astype("float64") + tm.assert_index_equal(result, expected) + + result = idx / 2 + expected = Index(idx.values / 2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.mul, ops.rmul, operator.floordiv]) + def test_mul_int_identity(self, op, numeric_idx, box_with_array): + idx = numeric_idx + idx = tm.box_expected(idx, box_with_array) + + result = op(idx, 1) + tm.assert_equal(result, idx) + + def test_mul_int_array(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + result = idx * np.array(5, dtype="int64") + tm.assert_index_equal(result, idx * 5) + + arr_dtype = "uint64" if isinstance(idx, UInt64Index) else "int64" + result = idx * np.arange(5, dtype=arr_dtype) + tm.assert_index_equal(result, didx) + + def test_mul_int_series(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + arr_dtype = "uint64" if isinstance(idx, UInt64Index) else "int64" + result = idx * Series(np.arange(5, dtype=arr_dtype)) + tm.assert_series_equal(result, Series(didx)) + + def test_mul_float_series(self, numeric_idx): + idx = numeric_idx + rng5 = np.arange(5, dtype="float64") + + result = idx * Series(rng5 + 0.1) + expected = Series(rng5 * (rng5 + 0.1)) + tm.assert_series_equal(result, expected) + + def test_mul_index(self, numeric_idx): + idx = numeric_idx + + result = idx * idx + tm.assert_index_equal(result, idx**2) + + def test_mul_datelike_raises(self, numeric_idx): + idx = numeric_idx + msg = "cannot perform __rmul__ with this index type" + with pytest.raises(TypeError, match=msg): + idx * pd.date_range("20130101", periods=5) + + def test_mul_size_mismatch_raises(self, numeric_idx): + idx = numeric_idx + msg = "operands could not be broadcast together" + with pytest.raises(ValueError, match=msg): + idx * idx[0:3] + with pytest.raises(ValueError, match=msg): + idx * np.array([1, 2]) + + @pytest.mark.parametrize("op", [operator.pow, ops.rpow]) + def test_pow_float(self, op, numeric_idx, box_with_array): + # test power calculations both ways, GH#14973 + box = box_with_array + idx = numeric_idx + expected = Float64Index(op(idx.values, 2.0)) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = op(idx, 2.0) + tm.assert_equal(result, expected) + + def test_modulo(self, numeric_idx, box_with_array): + # GH#9244 + box = box_with_array + idx = numeric_idx + expected = Index(idx.values % 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = idx % 2 + tm.assert_equal(result, expected) + + def test_divmod_scalar(self, numeric_idx): + idx = numeric_idx + + result = divmod(idx, 2) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, 2) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_ndarray(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, other) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_series(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, Series(other)) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Series(div), Series(mod) + for r, e in zip(result, expected): + tm.assert_series_equal(r, e) + + @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) + def test_ops_np_scalar(self, other): + vals = np.random.randn(5, 3) + f = lambda x: pd.DataFrame( + x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] + ) + + df = f(vals) + + tm.assert_frame_equal(df / np.array(other), f(vals / other)) + tm.assert_frame_equal(np.array(other) * df, f(vals * other)) + tm.assert_frame_equal(df + np.array(other), f(vals + other)) + tm.assert_frame_equal(np.array(other) - df, f(other - vals)) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = "ts" + + df = pd.DataFrame({"A": ts}) + + tm.assert_series_equal(ts + ts, ts + df["A"], check_names=False) + tm.assert_series_equal(ts**ts, ts ** df["A"], check_names=False) + tm.assert_series_equal(ts < ts, ts < df["A"], check_names=False) + tm.assert_series_equal(ts / ts, ts / df["A"], check_names=False) + + # TODO: this came from tests.series.test_analytics, needs cleanup and + # de-duplication with test_modulo above + def test_modulo2(self): + with np.errstate(all="ignore"): + + # GH#3590, modulo as ints + p = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values, dtype="float64") + expected.iloc[0:3] = np.nan + tm.assert_series_equal(result, expected) + + result = p["first"] % 0 + expected = Series(np.nan, index=p.index, name="first") + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values) + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + result2 = p["second"] % p["first"] + assert not result.equals(result2) + + def test_modulo_zero_int(self): + # GH#9144 + with np.errstate(all="ignore"): + s = Series([0, 1]) + + result = s % 0 + expected = Series([np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + result = 0 % s + expected = Series([np.nan, 0.0]) + tm.assert_series_equal(result, expected) + + +class TestAdditionSubtraction: + # __add__, __sub__, __radd__, __rsub__, __iadd__, __isub__ + # for non-timestamp/timedelta/period dtypes + + @pytest.mark.parametrize( + "first, second, expected", + [ + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2], index=list("ABD"), name="x"), + Series([3.0, 4.0, np.nan, np.nan], index=list("ABCD"), name="x"), + ), + ( + Series([1, 2, 3], index=list("ABC"), name="x"), + Series([2, 2, 2, 2], index=list("ABCD"), name="x"), + Series([3, 4, 5, np.nan], index=list("ABCD"), name="x"), + ), + ], + ) + def test_add_series(self, first, second, expected): + # GH#1134 + tm.assert_series_equal(first + second, expected) + tm.assert_series_equal(second + first, expected) + + @pytest.mark.parametrize( + "first, second, expected", + [ + ( + pd.DataFrame({"x": [1, 2, 3]}, index=list("ABC")), + pd.DataFrame({"x": [2, 2, 2]}, index=list("ABD")), + pd.DataFrame({"x": [3.0, 4.0, np.nan, np.nan]}, index=list("ABCD")), + ), + ( + pd.DataFrame({"x": [1, 2, 3]}, index=list("ABC")), + pd.DataFrame({"x": [2, 2, 2, 2]}, index=list("ABCD")), + pd.DataFrame({"x": [3, 4, 5, np.nan]}, index=list("ABCD")), + ), + ], + ) + def test_add_frames(self, first, second, expected): + # GH#1134 + tm.assert_frame_equal(first + second, expected) + tm.assert_frame_equal(second + first, expected) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_series_frame_radd_bug(self, fixed_now_ts): + # GH#353 + vals = Series(tm.rands_array(5, 10)) + result = "foo_" + vals + expected = vals.map(lambda x: "foo_" + x) + tm.assert_series_equal(result, expected) + + frame = pd.DataFrame({"vals": vals}) + result = "foo_" + frame + expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) + tm.assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = "ts" + + # really raise this time + fix_now = fixed_now_ts.to_pydatetime() + msg = "|".join( + [ + "unsupported operand type", + # wrong error message, see https://github.com/numpy/numpy/issues/18832 + "Concatenation operation", + ] + ) + with pytest.raises(TypeError, match=msg): + fix_now + ts + + with pytest.raises(TypeError, match=msg): + ts + fix_now + + # TODO: This came from series.test.test_operators, needs cleanup + def test_datetime64_with_index(self): + # arithmetic integer ops with an index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + # GH#4629 + # arithmetic datetime64 ops with an index + ser = Series( + pd.date_range("20130101", periods=5), + index=pd.date_range("20130101", periods=5), + ) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + msg = "cannot subtract period" + with pytest.raises(TypeError, match=msg): + # GH#18850 + result = ser - ser.index.to_period() + + df = pd.DataFrame( + np.random.randn(5, 2), index=pd.date_range("20130101", periods=5) + ) + df["date"] = pd.Timestamp("20130102") + df["expected"] = df["date"] - df.index.to_series() + df["result"] = df["date"] - df.index + tm.assert_series_equal(df["result"], df["expected"], check_names=False) + + # TODO: taken from tests.frame.test_operators, needs cleanup + def test_frame_operators(self, float_frame): + frame = float_frame + + garbage = np.random.random(4) + colSeries = Series(garbage, index=np.array(frame.columns)) + + idSum = frame + frame + seriesSum = frame + colSeries + + for col, series in idSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] * 2 + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + for col, series in seriesSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] + colSeries[col] + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + def test_frame_operators_col_align(self, float_frame): + frame2 = pd.DataFrame(float_frame, columns=["D", "C", "B", "A"]) + added = frame2 + frame2 + expected = frame2 * 2 + tm.assert_frame_equal(added, expected) + + def test_frame_operators_none_to_nan(self): + df = pd.DataFrame({"a": ["a", None, "b"]}) + tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) + + @pytest.mark.parametrize("dtype", ("float", "int64")) + def test_frame_operators_empty_like(self, dtype): + # Test for issue #10181 + frames = [ + pd.DataFrame(dtype=dtype), + pd.DataFrame(columns=["A"], dtype=dtype), + pd.DataFrame(index=[0], dtype=dtype), + ] + for df in frames: + assert (df + df).equals(df) + tm.assert_frame_equal(df + df, df) + + @pytest.mark.parametrize( + "func", + [lambda x: x * 2, lambda x: x[::2], lambda x: 5], + ids=["multiply", "slice", "constant"], + ) + def test_series_operators_arithmetic(self, all_arithmetic_functions, func): + op = all_arithmetic_functions + series = tm.makeTimeSeries().rename("ts") + other = func(series) + compare_op(series, other, op) + + @pytest.mark.parametrize( + "func", [lambda x: x + 1, lambda x: 5], ids=["add", "constant"] + ) + def test_series_operators_compare(self, comparison_op, func): + op = comparison_op + series = tm.makeTimeSeries().rename("ts") + other = func(series) + compare_op(series, other, op) + + @pytest.mark.parametrize( + "func", + [lambda x: x * 2, lambda x: x[::2], lambda x: 5], + ids=["multiply", "slice", "constant"], + ) + def test_divmod(self, func): + series = tm.makeTimeSeries().rename("ts") + other = func(series) + results = divmod(series, other) + if isinstance(other, abc.Iterable) and len(series) != len(other): + # if the lengths don't match, this is the test where we use + # `tser[::2]`. Pad every other value in `other_np` with nan. + other_np = [] + for n in other: + other_np.append(n) + other_np.append(np.nan) + else: + other_np = other + other_np = np.asarray(other_np) + with np.errstate(all="ignore"): + expecteds = divmod(series.values, np.asarray(other_np)) + + for result, expected in zip(results, expecteds): + # check the values, name, and index separately + tm.assert_almost_equal(np.asarray(result), expected) + + assert result.name == series.name + tm.assert_index_equal(result.index, series.index._with_freq(None)) + + def test_series_divmod_zero(self): + # Check that divmod uses pandas convention for division by zero, + # which does not match numpy. + # pandas convention has + # 1/0 == np.inf + # -1/0 == -np.inf + # 1/-0.0 == -np.inf + # -1/-0.0 == np.inf + tser = tm.makeTimeSeries().rename("ts") + other = tser * 0 + + result = divmod(tser, other) + exp1 = Series([np.inf] * len(tser), index=tser.index, name="ts") + exp2 = Series([np.nan] * len(tser), index=tser.index, name="ts") + tm.assert_series_equal(result[0], exp1) + tm.assert_series_equal(result[1], exp2) + + +class TestUFuncCompat: + @pytest.mark.parametrize( + "holder", + [Int64Index, UInt64Index, Float64Index, RangeIndex, Series], + ) + def test_ufunc_compat(self, holder): + box = Series if holder is Series else Index + + if holder is RangeIndex: + idx = RangeIndex(0, 5, name="foo") + else: + idx = holder(np.arange(5, dtype="int64"), name="foo") + result = np.sin(idx) + expected = box(np.sin(np.arange(5, dtype="int64")), name="foo") + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("holder", [Int64Index, UInt64Index, Float64Index, Series]) + def test_ufunc_coercions(self, holder): + idx = holder([1, 2, 3, 4, 5], name="x") + box = Series if holder is Series else Index + + result = np.sqrt(idx) + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = np.divide(idx, 2.0) + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx - 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx * 1.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx / 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + @pytest.mark.parametrize("holder", [Int64Index, UInt64Index, Float64Index, Series]) + def test_ufunc_multiple_return_values(self, holder): + obj = holder([1, 2, 3], name="x") + box = Series if holder is Series else Index + + result = np.modf(obj) + assert isinstance(result, tuple) + exp1 = Float64Index([0.0, 0.0, 0.0], name="x") + exp2 = Float64Index([1.0, 2.0, 3.0], name="x") + tm.assert_equal(result[0], tm.box_expected(exp1, box)) + tm.assert_equal(result[1], tm.box_expected(exp2, box)) + + def test_ufunc_at(self): + s = Series([0, 1, 2], index=[1, 2, 3], name="x") + np.add.at(s, [0, 2], 10) + expected = Series([10, 1, 12], index=[1, 2, 3], name="x") + tm.assert_series_equal(s, expected) + + +class TestObjectDtypeEquivalence: + # Tests that arithmetic operations match operations executed elementwise + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): + box = box_with_array + ser = Series([1, 2, 3], dtype=dtype) + expected = Series([np.nan, np.nan, np.nan], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = np.nan + ser + tm.assert_equal(result, expected) + + result = ser + np.nan + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_int(self, dtype, box_with_array): + box = box_with_array + ser = Series([1, 2, 3], dtype=dtype) + expected = Series([2, 3, 4], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = 1 + ser + tm.assert_equal(result, expected) + + result = ser + 1 + tm.assert_equal(result, expected) + + # TODO: moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize( + "op", + [operator.add, operator.sub, operator.mul, operator.truediv, operator.floordiv], + ) + def test_operators_reverse_object(self, op): + # GH#56 + arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) + + result = op(1.0, arr) + expected = op(1.0, arr.astype(float)) + tm.assert_series_equal(result.astype(float), expected) + + +class TestNumericArithmeticUnsorted: + # Tests in this class have been moved from type-specific test modules + # but not yet sorted, parametrized, and de-duplicated + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + ], + ) + @pytest.mark.parametrize( + "idx1", + [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ], + ) + @pytest.mark.parametrize( + "idx2", + [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ], + ) + def test_binops_index(self, op, idx1, idx2): + idx1 = idx1._rename("foo") + idx2 = idx2._rename("bar") + result = op(idx1, idx2) + expected = op(Int64Index(idx1), Int64Index(idx2)) + tm.assert_index_equal(result, expected, exact="equiv") + + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + ], + ) + @pytest.mark.parametrize( + "idx", + [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ], + ) + @pytest.mark.parametrize("scalar", [-1, 1, 2]) + def test_binops_index_scalar(self, op, idx, scalar): + result = op(idx, scalar) + expected = op(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected, exact="equiv") + + @pytest.mark.parametrize("idx1", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + @pytest.mark.parametrize("idx2", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + def test_binops_index_pow(self, idx1, idx2): + # numpy does not allow powers of negative integers so test separately + # https://github.com/numpy/numpy/pull/8127 + idx1 = idx1._rename("foo") + idx2 = idx2._rename("bar") + result = pow(idx1, idx2) + expected = pow(Int64Index(idx1), Int64Index(idx2)) + tm.assert_index_equal(result, expected, exact="equiv") + + @pytest.mark.parametrize("idx", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + @pytest.mark.parametrize("scalar", [1, 2]) + def test_binops_index_scalar_pow(self, idx, scalar): + # numpy does not allow powers of negative integers so test separately + # https://github.com/numpy/numpy/pull/8127 + result = pow(idx, scalar) + expected = pow(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected, exact="equiv") + + # TODO: divmod? + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + operator.pow, + operator.mod, + ], + ) + def test_arithmetic_with_frame_or_series(self, op): + # check that we return NotImplemented when operating with Series + # or DataFrame + index = RangeIndex(5) + other = Series(np.random.randn(5)) + + expected = op(Series(index), other) + result = op(index, other) + tm.assert_series_equal(result, expected) + + other = pd.DataFrame(np.random.randn(2, 5)) + expected = op(pd.DataFrame([index, index]), other) + result = op(index, other) + tm.assert_frame_equal(result, expected) + + def test_numeric_compat2(self): + # validate that we are handling the RangeIndex overrides to numeric ops + # and returning RangeIndex where possible + + idx = RangeIndex(0, 10, 2) + + result = idx * 2 + expected = RangeIndex(0, 20, 4) + tm.assert_index_equal(result, expected, exact=True) + + result = idx + 2 + expected = RangeIndex(2, 12, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx - 2 + expected = RangeIndex(-2, 8, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 2 + expected = RangeIndex(0, 5, 1).astype("float64") + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 4 + expected = RangeIndex(0, 10, 2) / 4 + tm.assert_index_equal(result, expected, exact=True) + + result = idx // 1 + expected = idx + tm.assert_index_equal(result, expected, exact=True) + + # __mul__ + result = idx * idx + expected = Index(idx.values * idx.values) + tm.assert_index_equal(result, expected, exact=True) + + # __pow__ + idx = RangeIndex(0, 1000, 2) + result = idx**2 + expected = Int64Index(idx._values) ** 2 + tm.assert_index_equal(Index(result.values), expected, exact=True) + + @pytest.mark.parametrize( + "idx, div, expected", + [ + (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), + (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), + ( + RangeIndex(0, 1000, 1), + 2, + Int64Index(RangeIndex(0, 1000, 1)._values) // 2, + ), + ( + RangeIndex(0, 100, 1), + 2.0, + Int64Index(RangeIndex(0, 100, 1)._values) // 2.0, + ), + (RangeIndex(0), 50, RangeIndex(0)), + (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), + (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), + (RangeIndex(-100, -200, 3), 2, RangeIndex(0)), + ], + ) + def test_numeric_compat2_floordiv(self, idx, div, expected): + # __floordiv__ + tm.assert_index_equal(idx // div, expected, exact=True) + + @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("delta", [1, 0, -1]) + def test_addsub_arithmetic(self, dtype, delta): + # GH#8142 + delta = dtype(delta) + index = Index([10, 11, 12], dtype=dtype) + result = index + delta + expected = Index(index.values + delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + # this subtraction used to fail + result = index - delta + expected = Index(index.values - delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(index + index, 2 * index) + tm.assert_index_equal(index - index, 0 * index) + assert not (index - index).empty + + +def test_fill_value_inf_masking(): + # GH #27464 make sure we mask 0/1 with Inf and not NaN + df = pd.DataFrame({"A": [0, 1, 2], "B": [1.1, None, 1.1]}) + + other = pd.DataFrame({"A": [1.1, 1.2, 1.3]}, index=[0, 2, 3]) + + result = df.rfloordiv(other, fill_value=1) + + expected = pd.DataFrame( + {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]} + ) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_div_silenced(): + # GH#26793 + pdf1 = pd.DataFrame( + { + "A": np.arange(10), + "B": [np.nan, 1, 2, 3, 4] * 2, + "C": [np.nan] * 10, + "D": np.arange(10), + }, + index=list("abcdefghij"), + columns=list("ABCD"), + ) + pdf2 = pd.DataFrame( + np.random.randn(10, 4), index=list("abcdefghjk"), columns=list("ABCX") + ) + with tm.assert_produces_warning(None): + pdf1.div(pdf2, fill_value=0) + + +@pytest.mark.parametrize( + "data, expected_data", + [([0, 1, 2], [0, 2, 4])], +) +def test_integer_array_add_list_like( + box_pandas_1d_array, box_1d_array, data, expected_data +): + # GH22606 Verify operators with IntegerArray and list-likes + arr = array(data, dtype="Int64") + container = box_pandas_1d_array(arr) + left = container + box_1d_array(data) + right = box_1d_array(data) + container + + if Series in [box_1d_array, box_pandas_1d_array]: + cls = Series + elif Index in [box_1d_array, box_pandas_1d_array]: + cls = Index + else: + cls = array + + expected = cls(expected_data, dtype="Int64") + + tm.assert_equal(left, expected) + tm.assert_equal(right, expected) + + +def test_sub_multiindex_swapped_levels(): + # GH 9952 + df = pd.DataFrame( + {"a": np.random.randn(6)}, + index=pd.MultiIndex.from_product( + [["a", "b"], [0, 1, 2]], names=["levA", "levB"] + ), + ) + df2 = df.copy() + df2.index = df2.index.swaplevel(0, 1) + result = df - df2 + expected = pd.DataFrame([0.0] * 6, columns=["a"], index=df.index) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("power", [1, 2, 5]) +@pytest.mark.parametrize("string_size", [0, 1, 2, 5]) +def test_empty_str_comparison(power, string_size): + # GH 37348 + a = np.array(range(10**power)) + right = pd.DataFrame(a, dtype=np.int64) + left = " " * string_size + + result = right == left + expected = pd.DataFrame(np.zeros(right.shape, dtype=bool)) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py new file mode 100644 index 00000000..e107ff6b --- /dev/null +++ b/pandas/tests/arithmetic/test_object.py @@ -0,0 +1,379 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for object dtype +import datetime +from decimal import Decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core import ops + +# ------------------------------------------------------------------ +# Comparisons + + +class TestObjectComparisons: + def test_comparison_object_numeric_nas(self, comparison_op): + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) + + func = comparison_op + + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) + tm.assert_series_equal(result, expected) + + def test_object_comparisons(self): + ser = Series(["a", "b", np.nan, "c", "a"]) + + result = ser == "a" + expected = Series([True, False, False, False, True]) + tm.assert_series_equal(result, expected) + + result = ser < "a" + expected = Series([False, False, False, False, False]) + tm.assert_series_equal(result, expected) + + result = ser != "a" + expected = -(ser == "a") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(["a", np.nan, "c"], dtype=dtype) + right = Series(["a", np.nan, "d"], dtype=dtype) + + result = left == right + expected = Series([True, False, False]) + tm.assert_series_equal(result, expected) + + result = left != right + expected = Series([False, True, True]) + tm.assert_series_equal(result, expected) + + result = left == np.nan + expected = Series([False, False, False]) + tm.assert_series_equal(result, expected) + + result = left != np.nan + expected = Series([True, True, True]) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestArithmetic: + + # TODO: parametrize + def test_pow_ops_object(self): + # GH#22922 + # pow is weird with masking & 1, so testing here + a = Series([1, np.nan, 1, np.nan], dtype=object) + b = Series([1, np.nan, np.nan, 1], dtype=object) + result = a**b + expected = Series(a.values**b.values, dtype=object) + tm.assert_series_equal(result, expected) + + result = b**a + expected = Series(b.values**a.values, dtype=object) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + @pytest.mark.parametrize("other", ["category", "Int64"]) + def test_add_extension_scalar(self, other, box_with_array, op): + # GH#22378 + # Check that scalars satisfying is_extension_array_dtype(obj) + # do not incorrectly try to dispatch to an ExtensionArray operation + + arr = Series(["a", "b", "c"]) + expected = Series([op(x, other) for x in arr]) + + arr = tm.box_expected(arr, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = op(arr, other) + tm.assert_equal(result, expected) + + def test_objarr_add_str(self, box_with_array): + ser = Series(["x", np.nan, "x"]) + expected = Series(["xa", np.nan, "xa"]) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser + "a" + tm.assert_equal(result, expected) + + def test_objarr_radd_str(self, box_with_array): + ser = Series(["x", np.nan, "x"]) + expected = Series(["ax", np.nan, "ax"]) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = "a" + ser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT], + ["x", "y", 1], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_objarr_radd_str_invalid(self, dtype, data, box_with_array): + ser = Series(data, dtype=dtype) + + ser = tm.box_expected(ser, box_with_array) + msg = "|".join( + [ + "can only concatenate str", + "did not contain a loop with signature matching types", + "unsupported operand type", + "must be str", + ] + ) + with pytest.raises(TypeError, match=msg): + "foo_" + ser + + @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub]) + def test_objarr_add_invalid(self, op, box_with_array): + # invalid ops + box = box_with_array + + obj_ser = tm.makeObjectSeries() + obj_ser.name = "objects" + + obj_ser = tm.box_expected(obj_ser, box) + msg = "|".join( + ["can only concatenate str", "unsupported operand type", "must be str"] + ) + with pytest.raises(Exception, match=msg): + op(obj_ser, 1) + with pytest.raises(Exception, match=msg): + op(obj_ser, np.array(1, dtype=np.int64)) + + # TODO: Moved from tests.series.test_operators; needs cleanup + def test_operators_na_handling(self): + ser = Series(["foo", "bar", "baz", np.nan]) + result = "prefix_" + ser + expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan]) + tm.assert_series_equal(result, expected) + + result = ser + "_suffix" + expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan]) + tm.assert_series_equal(result, expected) + + # TODO: parametrize over box + @pytest.mark.parametrize("dtype", [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = Series( + [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], + dtype=dtype, + ) + expected = Series( + [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")] + ) + + result = pd.Timedelta("3 days") + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta("3 days") + tm.assert_series_equal(result, expected) + + # TODO: cleanup & parametrize over box + def test_mixed_timezone_series_ops_object(self): + # GH#13043 + ser = Series( + [ + Timestamp("2015-01-01", tz="US/Eastern"), + Timestamp("2015-01-01", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser.dtype == object + + exp = Series( + [ + Timestamp("2015-01-02", tz="US/Eastern"), + Timestamp("2015-01-02", tz="Asia/Tokyo"), + ], + name="xxx", + ) + tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp) + tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp) + + # object series & object series + ser2 = Series( + [ + Timestamp("2015-01-03", tz="US/Eastern"), + Timestamp("2015-01-05", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser2.dtype == object + exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx") + tm.assert_series_equal(ser2 - ser, exp) + tm.assert_series_equal(ser - ser2, -exp) + + ser = Series( + [pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")], + name="xxx", + dtype=object, + ) + assert ser.dtype == object + + exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx") + tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp) + tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp) + + # TODO: cleanup & parametrize over box + def test_iadd_preserves_name(self): + # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name + ser = Series([1, 2, 3]) + ser.index.name = "foo" + + ser.index += 1 + assert ser.index.name == "foo" + + ser.index -= 1 + assert ser.index.name == "foo" + + def test_add_string(self): + # from bug report + index = pd.Index(["a", "b", "c"]) + index2 = index + "foo" + + assert "a" not in index2 + assert "afoo" in index2 + + def test_iadd_string(self): + index = pd.Index(["a", "b", "c"]) + # doesn't fail test unless there is a check before `+=` + assert "a" in index + + index += "_x" + assert "a_x" in index + + def test_add(self): + index = tm.makeStringIndex(100) + expected = pd.Index(index.values * 2) + tm.assert_index_equal(index + index, expected) + tm.assert_index_equal(index + index.tolist(), expected) + tm.assert_index_equal(index.tolist() + index, expected) + + # test add and radd + index = pd.Index(list("abc")) + expected = pd.Index(["a1", "b1", "c1"]) + tm.assert_index_equal(index + "1", expected) + expected = pd.Index(["1a", "1b", "1c"]) + tm.assert_index_equal("1" + index, expected) + + def test_sub_fail(self): + index = tm.makeStringIndex(100) + + msg = "unsupported operand type|Cannot broadcast" + with pytest.raises(TypeError, match=msg): + index - "a" + with pytest.raises(TypeError, match=msg): + index - index + with pytest.raises(TypeError, match=msg): + index - index.tolist() + with pytest.raises(TypeError, match=msg): + index.tolist() - index + + def test_sub_object(self): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(0), Decimal(1)]) + + result = index - Decimal(1) + tm.assert_index_equal(result, expected) + + result = index - pd.Index([Decimal(1), Decimal(1)]) + tm.assert_index_equal(result, expected) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + index - "foo" + + with pytest.raises(TypeError, match=msg): + index - np.array([2, "foo"], dtype=object) + + def test_rsub_object(self, fixed_now_ts): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(1), Decimal(0)]) + + result = Decimal(2) - index + tm.assert_index_equal(result, expected) + + result = np.array([Decimal(2), Decimal(2)]) - index + tm.assert_index_equal(result, expected) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + "foo" - index + + with pytest.raises(TypeError, match=msg): + np.array([True, fixed_now_ts]) - index + + +class MyIndex(pd.Index): + # Simple index subclass that tracks ops calls. + + _calls: int + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + result._data = values + result._name = name + result._calls = 0 + result._reset_identity() + + return result + + def __add__(self, other): + self._calls += 1 + return self._simple_new(self._data) + + def __radd__(self, other): + return self.__add__(other) + + +@pytest.mark.parametrize( + "other", + [ + [datetime.timedelta(1), datetime.timedelta(2)], + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + [pd.Period("2000"), pd.Period("2001")], + ["a", "b"], + ], + ids=["timedelta", "datetime", "period", "object"], +) +def test_index_ops_defer_to_unknown_subclasses(other): + # https://github.com/pandas-dev/pandas/issues/31109 + values = np.array( + [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object + ) + a = MyIndex._simple_new(values) + other = pd.Index(other) + result = other + a + assert isinstance(result, MyIndex) + assert a._calls == 1 diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py new file mode 100644 index 00000000..b03ac26a --- /dev/null +++ b/pandas/tests/arithmetic/test_period.py @@ -0,0 +1,1601 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for Period dtype +import operator + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + IncompatibleFrequency, + Period, + Timestamp, + to_offset, +) +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + period_range, +) +import pandas._testing as tm +from pandas.core import ops +from pandas.core.arrays import TimedeltaArray +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestPeriodArrayLikeComparisons: + # Comparison tests for PeriodDtype vectors fully parametrized over + # DataFrame/Series/PeriodIndex/PeriodArray. Ideally all comparison + # tests will eventually end up here. + + @pytest.mark.parametrize("other", ["2017", Period("2017", freq="D")]) + def test_eq_scalar(self, other, box_with_array): + + idx = PeriodIndex(["2017", "2017", "2018"], freq="D") + idx = tm.box_expected(idx, box_with_array) + xbox = get_upcast_box(idx, other, True) + + expected = np.array([True, True, False]) + expected = tm.box_expected(expected, xbox) + + result = idx == other + + tm.assert_equal(result, expected) + + def test_compare_zerodim(self, box_with_array): + # GH#26689 make sure we unbox zero-dimensional arrays + + pi = period_range("2000", periods=4) + other = np.array(pi.to_numpy()[0]) + + pi = tm.box_expected(pi, box_with_array) + xbox = get_upcast_box(pi, other, True) + + result = pi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "scalar", + [ + "foo", + Timestamp("2021-01-01"), + Timedelta(days=4), + 9, + 9.5, + 2000, # specifically don't consider 2000 to match Period("2000", "D") + False, + None, + ], + ) + def test_compare_invalid_scalar(self, box_with_array, scalar): + # GH#28980 + # comparison with scalar that cannot be interpreted as a Period + pi = period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, scalar, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("1D", periods=4).array, + np.arange(4), + np.arange(4).astype(np.float64), + list(range(4)), + # match Period semantics by not treating integers as Periods + [2000, 2001, 2002, 2003], + np.arange(2000, 2004), + np.arange(2000, 2004).astype(object), + pd.Index([2000, 2001, 2002, 2003]), + ], + ) + def test_compare_invalid_listlike(self, box_with_array, other): + pi = period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, other, box_with_array) + + @pytest.mark.parametrize("other_box", [list, np.array, lambda x: x.astype(object)]) + def test_compare_object_dtype(self, box_with_array, other_box): + pi = period_range("2000", periods=5) + parr = tm.box_expected(pi, box_with_array) + + other = other_box(pi) + xbox = get_upcast_box(parr, other, True) + + expected = np.array([True, True, True, True, True]) + expected = tm.box_expected(expected, xbox) + + result = parr == other + tm.assert_equal(result, expected) + result = parr <= other + tm.assert_equal(result, expected) + result = parr >= other + tm.assert_equal(result, expected) + + result = parr != other + tm.assert_equal(result, ~expected) + result = parr < other + tm.assert_equal(result, ~expected) + result = parr > other + tm.assert_equal(result, ~expected) + + other = other_box(pi[::-1]) + + expected = np.array([False, False, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr == other + tm.assert_equal(result, expected) + + expected = np.array([True, True, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr <= other + tm.assert_equal(result, expected) + + expected = np.array([False, False, True, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr >= other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr != other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr < other + tm.assert_equal(result, expected) + + expected = np.array([False, False, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr > other + tm.assert_equal(result, expected) + + +class TestPeriodIndexComparisons: + # TODO: parameterize over boxes + + def test_pi_cmp_period(self): + idx = period_range("2007-01", periods=20, freq="M") + per = idx[10] + + result = idx < per + exp = idx.values < idx.values[10] + tm.assert_numpy_array_equal(result, exp) + + # Tests Period.__richcmp__ against ndarray[object, ndim=2] + result = idx.values.reshape(10, 2) < per + tm.assert_numpy_array_equal(result, exp.reshape(10, 2)) + + # Tests Period.__richcmp__ against ndarray[object, ndim=0] + result = idx < np.array(per) + tm.assert_numpy_array_equal(result, exp) + + # TODO: moved from test_datetime64; de-duplicate with version below + def test_parr_cmp_period_scalar2(self, box_with_array): + pi = period_range("2000-01-01", periods=10, freq="D") + + val = pi[3] + expected = [x > val for x in pi] + + ser = tm.box_expected(pi, box_with_array) + xbox = get_upcast_box(ser, val, True) + + expected = tm.box_expected(expected, xbox) + result = ser > val + tm.assert_equal(result, expected) + + val = pi[5] + result = ser > val + expected = [x > val for x in pi] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_period_scalar(self, freq, box_with_array): + # GH#13200 + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + per = Period("2011-02", freq=freq) + xbox = get_upcast_box(base, per, True) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == per, exp) + tm.assert_equal(per == base, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != per, exp) + tm.assert_equal(per != base, exp) + + exp = np.array([False, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > per, exp) + tm.assert_equal(per < base, exp) + + exp = np.array([True, False, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < per, exp) + tm.assert_equal(per > base, exp) + + exp = np.array([False, True, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= per, exp) + tm.assert_equal(per <= base, exp) + + exp = np.array([True, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= per, exp) + tm.assert_equal(per >= base, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi(self, freq, box_with_array): + # GH#13200 + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + # TODO: could also box idx? + idx = PeriodIndex(["2011-02", "2011-01", "2011-03", "2011-05"], freq=freq) + + xbox = get_upcast_box(base, idx, True) + + exp = np.array([False, False, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= idx, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi_mismatched_freq(self, freq, box_with_array): + # GH#13200 + # different base freq + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + msg = rf"Invalid comparison between dtype=period\[{freq}\] and Period" + with pytest.raises(TypeError, match=msg): + base <= Period("2011", freq="A") + + with pytest.raises(TypeError, match=msg): + Period("2011", freq="A") >= base + + # TODO: Could parametrize over boxes for idx? + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="A") + rev_msg = r"Invalid comparison between dtype=period\[A-DEC\] and PeriodArray" + idx_msg = rev_msg if box_with_array in [tm.to_array, pd.array] else msg + with pytest.raises(TypeError, match=idx_msg): + base <= idx + + # Different frequency + msg = rf"Invalid comparison between dtype=period\[{freq}\] and Period" + with pytest.raises(TypeError, match=msg): + base <= Period("2011", freq="4M") + + with pytest.raises(TypeError, match=msg): + Period("2011", freq="4M") >= base + + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="4M") + rev_msg = r"Invalid comparison between dtype=period\[4M\] and PeriodArray" + idx_msg = rev_msg if box_with_array in [tm.to_array, pd.array] else msg + with pytest.raises(TypeError, match=idx_msg): + base <= idx + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + per = idx1[1] + + result = idx1 > per + exp = np.array([False, False, False, True]) + tm.assert_numpy_array_equal(result, exp) + result = per < idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == pd.NaT + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + result = pd.NaT == idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != pd.NaT + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + result = pd.NaT != idx1 + tm.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat_mismatched_freq_raises(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + + diff = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq="4M") + msg = rf"Invalid comparison between dtype=period\[{freq}\] and PeriodArray" + with pytest.raises(TypeError, match=msg): + idx1 > diff + + result = idx1 == diff + expected = np.array([False, False, False, False], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + # TODO: De-duplicate with test_pi_cmp_nat + @pytest.mark.parametrize("dtype", [object, None]) + def test_comp_nat(self, dtype): + left = PeriodIndex([Period("2011-01-01"), pd.NaT, Period("2011-01-03")]) + right = PeriodIndex([pd.NaT, pd.NaT, Period("2011-01-03")]) + + if dtype is not None: + left = left.astype(dtype) + right = right.astype(dtype) + + result = left == right + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = left != right + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == right, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(left != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != left, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > left, expected) + + +class TestPeriodSeriesComparisons: + def test_cmp_series_period_series_mixed_freq(self): + # GH#13200 + base = Series( + [ + Period("2011", freq="A"), + Period("2011-02", freq="M"), + Period("2013", freq="A"), + Period("2011-04", freq="M"), + ] + ) + + ser = Series( + [ + Period("2012", freq="A"), + Period("2011-01", freq="M"), + Period("2013", freq="A"), + Period("2011-05", freq="M"), + ] + ) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + +class TestPeriodIndexSeriesComparisonConsistency: + """Test PeriodIndex and Period Series Ops consistency""" + + # TODO: needs parametrization+de-duplication + + def _check(self, values, func, expected): + # Test PeriodIndex and Period Series Ops consistency + + idx = PeriodIndex(values) + result = func(idx) + + # check that we don't pass an unwanted type to tm.assert_equal + assert isinstance(expected, (pd.Index, np.ndarray)) + tm.assert_equal(result, expected) + + s = Series(values) + result = func(s) + + exp = Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_comp_period(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + per = idx[2] + + f = lambda x: x == per + exp = np.array([False, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per == x + self._check(idx, f, exp) + + f = lambda x: x != per + exp = np.array([True, True, False, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per != x + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, True, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x > per + exp = np.array([False, False, False, True], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, True, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + per = idx[2] + + f = lambda x: x == per + exp = np.array([False, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per == x + self._check(idx, f, exp) + + f = lambda x: x == pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: pd.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != per + exp = np.array([True, True, False, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: per != x + self._check(idx, f, exp) + + f = lambda x: x != pd.NaT + exp = np.array([True, True, True, True], dtype=np.bool_) + self._check(idx, f, exp) + f = lambda x: pd.NaT != x + self._check(idx, f, exp) + + f = lambda x: per >= x + exp = np.array([True, False, True, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x < per + exp = np.array([True, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: x > pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + f = lambda x: pd.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool_) + self._check(idx, f, exp) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestPeriodFrameArithmetic: + def test_ops_frame_period(self): + # GH#13043 + df = pd.DataFrame( + { + "A": [Period("2015-01", freq="M"), Period("2015-02", freq="M")], + "B": [Period("2014-01", freq="M"), Period("2014-02", freq="M")], + } + ) + assert df["A"].dtype == "Period[M]" + assert df["B"].dtype == "Period[M]" + + p = Period("2015-03", freq="M") + off = p.freq + # dtype will be object because of original dtype + exp = pd.DataFrame( + { + "A": np.array([2 * off, 1 * off], dtype=object), + "B": np.array([14 * off, 13 * off], dtype=object), + } + ) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -1 * exp) + + df2 = pd.DataFrame( + { + "A": [Period("2015-05", freq="M"), Period("2015-06", freq="M")], + "B": [Period("2015-05", freq="M"), Period("2015-06", freq="M")], + } + ) + assert df2["A"].dtype == "Period[M]" + assert df2["B"].dtype == "Period[M]" + + exp = pd.DataFrame( + { + "A": np.array([4 * off, 4 * off], dtype=object), + "B": np.array([16 * off, 16 * off], dtype=object), + } + ) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -1 * exp) + + +class TestPeriodIndexArithmetic: + # --------------------------------------------------------------- + # __add__/__sub__ with PeriodIndex + # PeriodIndex + other is defined for integers and timedelta-like others + # PeriodIndex - other is defined for integers, timedelta-like others, + # and PeriodIndex (with matching freq) + + def test_parr_add_iadd_parr_raises(self, box_with_array): + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="D", periods=5) + # TODO: parametrize over boxes for other? + + rng = tm.box_expected(rng, box_with_array) + # An earlier implementation of PeriodIndex addition performed + # a set operation (union). This has since been changed to + # raise a TypeError. See GH#14164 and GH#13077 for historical + # reference. + msg = r"unsupported operand type\(s\) for \+: .* and .*" + with pytest.raises(TypeError, match=msg): + rng + other + + with pytest.raises(TypeError, match=msg): + rng += other + + def test_pi_sub_isub_pi(self): + # GH#20049 + # For historical reference see GH#14164, GH#13077. + # PeriodIndex subtraction originally performed set difference, + # then changed to raise TypeError before being implemented in GH#20049 + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="D", periods=5) + + off = rng.freq + expected = pd.Index([-5 * off] * 5) + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_pi_with_nat(self): + rng = period_range("1/1/2000", freq="D", periods=5) + other = rng[1:].insert(0, pd.NaT) + assert other[1:].equals(rng[1:]) + + result = rng - other + off = rng.freq + expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) + tm.assert_index_equal(result, expected) + + def test_parr_sub_pi_mismatched_freq(self, box_with_array, box_with_array2): + rng = period_range("1/1/2000", freq="D", periods=5) + other = period_range("1/6/2000", freq="H", periods=5) + + rng = tm.box_expected(rng, box_with_array) + other = tm.box_expected(other, box_with_array2) + msg = r"Input has different freq=[HD] from PeriodArray\(freq=[DH]\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + def test_sub_n_gt_1_ticks(self, tick_classes, n): + # GH 23878 + p1_d = "19910905" + p2_d = "19920406" + p1 = PeriodIndex([p1_d], freq=tick_classes(n)) + p2 = PeriodIndex([p2_d], freq=tick_classes(n)) + + expected = PeriodIndex([p2_d], freq=p2.freq.base) - PeriodIndex( + [p1_d], freq=p1.freq.base + ) + + tm.assert_index_equal((p2 - p1), expected) + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + @pytest.mark.parametrize( + "offset, kwd_name", + [ + (pd.offsets.YearEnd, "month"), + (pd.offsets.QuarterEnd, "startingMonth"), + (pd.offsets.MonthEnd, None), + (pd.offsets.Week, "weekday"), + ], + ) + def test_sub_n_gt_1_offsets(self, offset, kwd_name, n): + # GH 23878 + kwds = {kwd_name: 3} if kwd_name is not None else {} + p1_d = "19910905" + p2_d = "19920406" + freq = offset(n, normalize=False, **kwds) + p1 = PeriodIndex([p1_d], freq=freq) + p2 = PeriodIndex([p2_d], freq=freq) + + result = p2 - p1 + expected = PeriodIndex([p2_d], freq=freq.base) - PeriodIndex( + [p1_d], freq=freq.base + ) + + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize( + "other", + [ + # datetime scalars + Timestamp("2016-01-01"), + Timestamp("2016-01-01").to_pydatetime(), + Timestamp("2016-01-01").to_datetime64(), + # datetime-like arrays + pd.date_range("2016-01-01", periods=3, freq="H"), + pd.date_range("2016-01-01", periods=3, tz="Europe/Brussels"), + pd.date_range("2016-01-01", periods=3, freq="S")._data, + pd.date_range("2016-01-01", periods=3, tz="Asia/Tokyo")._data, + # Miscellaneous invalid types + 3.14, + np.array([2.0, 3.0, 4.0]), + ], + ) + def test_parr_add_sub_invalid(self, other, box_with_array): + # GH#23215 + rng = period_range("1/1/2000", freq="D", periods=3) + rng = tm.box_expected(rng, box_with_array) + + msg = "|".join( + [ + r"(:?cannot add PeriodArray and .*)", + r"(:?cannot subtract .* from (:?a\s)?.*)", + r"(:?unsupported operand type\(s\) for \+: .* and .*)", + r"unsupported operand type\(s\) for [+-]: .* and .*", + ] + ) + assert_invalid_addsub_type(rng, other, msg) + with pytest.raises(TypeError, match=msg): + rng + other + with pytest.raises(TypeError, match=msg): + other + rng + with pytest.raises(TypeError, match=msg): + rng - other + with pytest.raises(TypeError, match=msg): + other - rng + + # ----------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_pi_add_sub_td64_array_non_tick_raises(self): + rng = period_range("1/1/2000", freq="Q", periods=3) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + msg = r"Cannot add or subtract timedelta64\[ns\] dtype from period\[Q-DEC\]" + with pytest.raises(TypeError, match=msg): + rng + tdarr + with pytest.raises(TypeError, match=msg): + tdarr + rng + + with pytest.raises(TypeError, match=msg): + rng - tdarr + msg = r"cannot subtract period\[Q-DEC\]-dtype from TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdarr - rng + + def test_pi_add_sub_td64_array_tick(self): + # PeriodIndex + Timedelta-like is allowed only with + # tick-like frequencies + rng = period_range("1/1/2000", freq="90D", periods=3) + tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = period_range("12/31/1999", freq="90D", periods=3) + result = rng + tdi + tm.assert_index_equal(result, expected) + result = rng + tdarr + tm.assert_index_equal(result, expected) + result = tdi + rng + tm.assert_index_equal(result, expected) + result = tdarr + rng + tm.assert_index_equal(result, expected) + + expected = period_range("1/2/2000", freq="90D", periods=3) + + result = rng - tdi + tm.assert_index_equal(result, expected) + result = rng - tdarr + tm.assert_index_equal(result, expected) + + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + tdarr - rng + + with pytest.raises(TypeError, match=msg): + tdi - rng + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("tdi_freq", [None, "H"]) + def test_parr_sub_td64array(self, box_with_array, tdi_freq, pi_freq): + box = box_with_array + xbox = box if box not in [pd.array, tm.to_array] else pd.Index + + tdi = TimedeltaIndex(["1 hours", "2 hours"], freq=tdi_freq) + dti = Timestamp("2018-03-07 17:16:40") + tdi + pi = dti.to_period(pi_freq) + + # TODO: parametrize over box for pi? + td64obj = tm.box_expected(tdi, box) + + if pi_freq == "H": + result = pi - td64obj + expected = (pi.to_timestamp("S") - tdi).to_period(pi_freq) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + # Subtract from scalar + result = pi[0] - td64obj + expected = (pi[0].to_timestamp("S") - tdi).to_period(pi_freq) + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) + + elif pi_freq == "D": + # Tick, but non-compatible + msg = ( + "Cannot add/subtract timedelta-like from PeriodArray that is " + "not an integer multiple of the PeriodArray's freq." + ) + with pytest.raises(IncompatibleFrequency, match=msg): + pi - td64obj + + with pytest.raises(IncompatibleFrequency, match=msg): + pi[0] - td64obj + + else: + # With non-Tick freq, we could not add timedelta64 array regardless + # of what its resolution is + msg = "Cannot add or subtract timedelta64" + with pytest.raises(TypeError, match=msg): + pi - td64obj + with pytest.raises(TypeError, match=msg): + pi[0] - td64obj + + # ----------------------------------------------------------------- + # operations with array/Index of DateOffset objects + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_add_offset_array(self, box): + # GH#18849 + pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) + offs = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi + offs + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = offs + pi + tm.assert_index_equal(res2, expected) + + unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + # addition/subtraction ops with incompatible offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = r"Input cannot be converted to Period\(freq=Q-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + pi + unanchored + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + unanchored + pi + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_sub_offset_array(self, box): + # GH#18824 + pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) + other = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + + expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi - other + tm.assert_index_equal(res, expected) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = r"Input has different freq=-1M from Period\(freq=Q-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + pi - anchored + with pytest.raises(IncompatibleFrequency, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored - pi + + def test_pi_add_iadd_int(self, one): + # Variants of `one` for #19012 + rng = period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng + one + expected = period_range("2000-01-01 10:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng += one + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_int(self, one): + """ + PeriodIndex.__sub__ and __isub__ with several representations of + the integer 1, e.g. int, np.int64, np.uint8, ... + """ + rng = period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng - one + expected = period_range("2000-01-01 08:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng -= one + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("five", [5, np.array(5, dtype=np.int64)]) + def test_pi_sub_intlike(self, five): + rng = period_range("2007-01", periods=50) + + result = rng - five + exp = rng + (-five) + tm.assert_index_equal(result, exp) + + def test_pi_add_sub_int_array_freqn_gt1(self): + # GH#47209 test adding array of ints when freq.n > 1 matches + # scalar behavior + pi = period_range("2016-01-01", periods=10, freq="2D") + arr = np.arange(10) + result = pi + arr + expected = pd.Index([x + y for x, y in zip(pi, arr)]) + tm.assert_index_equal(result, expected) + + result = pi - arr + expected = pd.Index([x - y for x, y in zip(pi, arr)]) + tm.assert_index_equal(result, expected) + + def test_pi_sub_isub_offset(self): + # offset + # DateOffset + rng = period_range("2014", "2024", freq="A") + result = rng - pd.offsets.YearEnd(5) + expected = period_range("2009", "2019", freq="A") + tm.assert_index_equal(result, expected) + rng -= pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + rng = period_range("2014-01", "2016-12", freq="M") + result = rng - pd.offsets.MonthEnd(5) + expected = period_range("2013-08", "2016-07", freq="M") + tm.assert_index_equal(result, expected) + + rng -= pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("transpose", [True, False]) + def test_pi_add_offset_n_gt1(self, box_with_array, transpose): + # GH#23215 + # add offset to PeriodIndex with freq.n > 1 + + per = Period("2016-01", freq="2M") + pi = PeriodIndex([per]) + + expected = PeriodIndex(["2016-03"], freq="2M") + + pi = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = pi + per.freq + tm.assert_equal(result, expected) + + result = per.freq + pi + tm.assert_equal(result, expected) + + def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): + # GH#23215 + # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 + pi = PeriodIndex(["2016-01"], freq="2M") + expected = PeriodIndex(["2016-04"], freq="2M") + + pi = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = pi + to_offset("3M") + tm.assert_equal(result, expected) + + result = to_offset("3M") + pi + tm.assert_equal(result, expected) + + # --------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_pi_add_intarray(self, int_holder, op): + # GH#19959 + pi = PeriodIndex([Period("2015Q1"), Period("NaT")]) + other = int_holder([4, -1]) + + result = op(pi, other) + expected = PeriodIndex([Period("2016Q1"), Period("NaT")]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_pi_sub_intarray(self, int_holder): + # GH#19959 + pi = PeriodIndex([Period("2015Q1"), Period("NaT")]) + other = int_holder([4, -1]) + + result = pi - other + expected = PeriodIndex([Period("2014Q1"), Period("NaT")]) + tm.assert_index_equal(result, expected) + + msg = r"bad operand type for unary -: 'PeriodArray'" + with pytest.raises(TypeError, match=msg): + other - pi + + # --------------------------------------------------------------- + # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) + # TODO: Some of these are misnomers because of non-Tick DateOffsets + + def test_parr_add_timedeltalike_minute_gt1(self, three_days, box_with_array): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # minute frequency with n != 1. A more general case is tested below + # in test_pi_add_timedeltalike_tick_gt1, but here we write out the + # expected result more explicitly. + other = three_days + rng = period_range("2014-05-01", periods=3, freq="2D") + rng = tm.box_expected(rng, box_with_array) + + expected = PeriodIndex(["2014-05-04", "2014-05-06", "2014-05-08"], freq="2D") + expected = tm.box_expected(expected, box_with_array) + + result = rng + other + tm.assert_equal(result, expected) + + result = other + rng + tm.assert_equal(result, expected) + + # subtraction + expected = PeriodIndex(["2014-04-28", "2014-04-30", "2014-05-02"], freq="2D") + expected = tm.box_expected(expected, box_with_array) + result = rng - other + tm.assert_equal(result, expected) + + msg = "|".join( + [ + r"bad operand type for unary -: 'PeriodArray'", + r"cannot subtract PeriodArray from timedelta64\[[hD]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + other - rng + + @pytest.mark.parametrize("freqstr", ["5ns", "5us", "5ms", "5s", "5T", "5h", "5d"]) + def test_parr_add_timedeltalike_tick_gt1(self, three_days, freqstr, box_with_array): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # tick-like frequency with n != 1 + other = three_days + rng = period_range("2014-05-01", periods=6, freq=freqstr) + first = rng[0] + rng = tm.box_expected(rng, box_with_array) + + expected = period_range(first + other, periods=6, freq=freqstr) + expected = tm.box_expected(expected, box_with_array) + + result = rng + other + tm.assert_equal(result, expected) + + result = other + rng + tm.assert_equal(result, expected) + + # subtraction + expected = period_range(first - other, periods=6, freq=freqstr) + expected = tm.box_expected(expected, box_with_array) + result = rng - other + tm.assert_equal(result, expected) + msg = "|".join( + [ + r"bad operand type for unary -: 'PeriodArray'", + r"cannot subtract PeriodArray from timedelta64\[[hD]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + other - rng + + def test_pi_add_iadd_timedeltalike_daily(self, three_days): + # Tick + other = three_days + rng = period_range("2014-05-01", "2014-05-15", freq="D") + expected = period_range("2014-05-04", "2014-05-18", freq="D") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_timedeltalike_daily(self, three_days): + # Tick-like 3 Days + other = three_days + rng = period_range("2014-05-01", "2014-05-15", freq="D") + expected = period_range("2014-04-28", "2014-05-12", freq="D") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_parr_add_sub_timedeltalike_freq_mismatch_daily( + self, not_daily, box_with_array + ): + other = not_daily + rng = period_range("2014-05-01", "2014-05-15", freq="D") + rng = tm.box_expected(rng, box_with_array) + + msg = "|".join( + [ + # non-timedelta-like DateOffset + "Input has different freq(=.+)? from Period.*?\\(freq=D\\)", + # timedelta/td64/Timedelta but not a multiple of 24H + "Cannot add/subtract timedelta-like from PeriodArray that is " + "not an integer multiple of the PeriodArray's freq.", + ] + ) + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = period_range("2014-01-01 12:00", "2014-01-05 12:00", freq="H") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_parr_add_timedeltalike_mismatched_freq_hourly( + self, not_hourly, box_with_array + ): + other = not_hourly + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + rng = tm.box_expected(rng, box_with_array) + msg = "|".join( + [ + # non-timedelta-like DateOffset + "Input has different freq(=.+)? from Period.*?\\(freq=H\\)", + # timedelta/td64/Timedelta but not a multiple of 24H + "Cannot add/subtract timedelta-like from PeriodArray that is " + "not an integer multiple of the PeriodArray's freq.", + ] + ) + + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + + def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = period_range("2014-01-01 08:00", "2014-01-05 08:00", freq="H") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_add_iadd_timedeltalike_annual(self): + # offset + # DateOffset + rng = period_range("2014", "2024", freq="A") + result = rng + pd.offsets.YearEnd(5) + expected = period_range("2019", "2029", freq="A") + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_annual(self, mismatched_freq): + other = mismatched_freq + rng = period_range("2014", "2024", freq="A") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=A-DEC\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_M(self): + rng = period_range("2014-01", "2016-12", freq="M") + expected = period_range("2014-06", "2017-05", freq="M") + + result = rng + pd.offsets.MonthEnd(5) + tm.assert_index_equal(result, expected) + + rng += pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): + other = mismatched_freq + rng = period_range("2014-01", "2016-12", freq="M") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=M\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + @pytest.mark.parametrize("transpose", [True, False]) + def test_parr_add_sub_td64_nat(self, box_with_array, transpose): + # GH#23320 special handling for timedelta64("NaT") + pi = period_range("1994-04-01", periods=9, freq="19D") + other = np.timedelta64("NaT") + expected = PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + other - obj + + @pytest.mark.parametrize( + "other", + [ + np.array(["NaT"] * 9, dtype="m8[ns]"), + TimedeltaArray._from_sequence(["NaT"] * 9), + ], + ) + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): + pi = period_range("1994-04-01", periods=9, freq="19D") + expected = PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = r"cannot subtract .* from .*" + with pytest.raises(TypeError, match=msg): + other - obj + + # some but not *all* NaT + other = other.copy() + other[0] = np.timedelta64(0, "ns") + expected = PeriodIndex([pi[0]] + ["NaT"] * 8, freq="19D") + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + with pytest.raises(TypeError, match=msg): + other - obj + + # --------------------------------------------------------------- + # Unsorted + + def test_parr_add_sub_index(self): + # Check that PeriodArray defers to Index on arithmetic ops + pi = period_range("2000-12-31", periods=3) + parr = pi.array + + result = parr - pi + expected = pi - pi + tm.assert_index_equal(result, expected) + + def test_parr_add_sub_object_array(self): + pi = period_range("2000-12-31", periods=3, freq="D") + parr = pi.array + + other = np.array([Timedelta(days=1), pd.offsets.Day(2), 3]) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr + other + + expected = PeriodIndex( + ["2001-01-01", "2001-01-03", "2001-01-05"], freq="D" + ).array + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr - other + + expected = PeriodIndex(["2000-12-30"] * 3, freq="D").array + tm.assert_equal(result, expected) + + +class TestPeriodSeriesArithmetic: + def test_parr_add_timedeltalike_scalar(self, three_days, box_with_array): + # GH#13043 + ser = Series( + [Period("2015-01-01", freq="D"), Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + expected = Series( + [Period("2015-01-04", freq="D"), Period("2015-01-05", freq="D")], + name="xxx", + ) + + obj = tm.box_expected(ser, box_with_array) + if box_with_array is pd.DataFrame: + assert (obj.dtypes == "Period[D]").all() + + expected = tm.box_expected(expected, box_with_array) + + result = obj + three_days + tm.assert_equal(result, expected) + + result = three_days + obj + tm.assert_equal(result, expected) + + def test_ops_series_period(self): + # GH#13043 + ser = Series( + [Period("2015-01-01", freq="D"), Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + per = Period("2015-01-10", freq="D") + off = per.freq + # dtype will be object because of original dtype + expected = Series([9 * off, 8 * off], name="xxx", dtype=object) + tm.assert_series_equal(per - ser, expected) + tm.assert_series_equal(ser - per, -1 * expected) + + s2 = Series( + [Period("2015-01-05", freq="D"), Period("2015-01-04", freq="D")], + name="xxx", + ) + assert s2.dtype == "Period[D]" + + expected = Series([4 * off, 2 * off], name="xxx", dtype=object) + tm.assert_series_equal(s2 - ser, expected) + tm.assert_series_equal(ser - s2, -1 * expected) + + +class TestPeriodIndexSeriesMethods: + """Test PeriodIndex and Period Series Ops consistency""" + + def _check(self, values, func, expected): + idx = PeriodIndex(values) + result = func(idx) + tm.assert_equal(result, expected) + + ser = Series(values) + result = func(ser) + + exp = Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + expected = PeriodIndex( + ["2011-03", "2011-04", "2011-05", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + + result = idx - Period("2011-01", freq="M") + off = idx.freq + exp = pd.Index([0 * off, 1 * off, 2 * off, 3 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = Period("2011-01", freq="M") - idx + exp = pd.Index([0 * off, -1 * off, -2 * off, -3 * off], name="idx") + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("ng", ["str", 1.5]) + @pytest.mark.parametrize( + "func", + [ + lambda obj, ng: obj + ng, + lambda obj, ng: ng + obj, + lambda obj, ng: obj - ng, + lambda obj, ng: ng - obj, + lambda obj, ng: np.add(obj, ng), + lambda obj, ng: np.add(ng, obj), + lambda obj, ng: np.subtract(obj, ng), + lambda obj, ng: np.subtract(ng, obj), + ], + ) + def test_parr_ops_errors(self, ng, func, box_with_array): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + obj = tm.box_expected(idx, box_with_array) + msg = "|".join( + [ + r"unsupported operand type\(s\)", + "can only concatenate", + r"must be str", + "object to str implicitly", + ] + ) + + with pytest.raises(TypeError, match=msg): + func(obj, ng) + + def test_pi_ops_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + expected = PeriodIndex( + ["2011-03", "2011-04", "NaT", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="2M", name="idx" + ) + expected = PeriodIndex( + ["2011-07", "2011-08", "NaT", "2011-10"], freq="2M", name="idx" + ) + + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex( + ["2011-05", "2011-01", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2010-12", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex( + ["2010-10", "2010-12", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + f = lambda x: x + pd.offsets.Day() + exp = PeriodIndex( + ["2011-01-02", "2011-02-02", "2011-03-02", "2011-04-02"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x + pd.offsets.Day(2) + exp = PeriodIndex( + ["2011-01-03", "2011-02-03", "2011-03-03", "2011-04-03"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x - pd.offsets.Day(2) + exp = PeriodIndex( + ["2010-12-30", "2011-01-30", "2011-02-27", "2011-03-30"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + ser = Series(idx) + + msg = ( + "Cannot add/subtract timedelta-like from PeriodArray that is not " + "an integer multiple of the PeriodArray's freq" + ) + for obj in [idx, ser]: + with pytest.raises(IncompatibleFrequency, match=msg): + obj + pd.offsets.Hour(2) + + with pytest.raises(IncompatibleFrequency, match=msg): + pd.offsets.Hour(2) + obj + + with pytest.raises(IncompatibleFrequency, match=msg): + obj - pd.offsets.Hour(2) + + def test_pi_sub_period(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, Period("2012-01", freq="M")) + tm.assert_index_equal(result, exp) + + result = Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(Period("2012-01", freq="M"), idx) + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + result = idx - Period("NaT", freq="M") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = Period("NaT", freq="M") - idx + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_pi_sub_pdnat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + exp = TimedeltaIndex([pd.NaT] * 4, name="idx") + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, pd.NaT, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + tm.assert_index_equal(idx - Period("NaT", freq="M"), exp) + tm.assert_index_equal(Period("NaT", freq="M") - idx, exp) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py new file mode 100644 index 00000000..bb7949c9 --- /dev/null +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -0,0 +1,2106 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + offsets, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + + +def assert_dtype(obj, expected_dtype): + """ + Helper to check the dtype for a Series, Index, or single-column DataFrame. + """ + dtype = tm.get_dtype(obj) + + assert dtype == expected_dtype + + +def get_expected_name(box, names): + if box is DataFrame: + # Since we are operating with a DataFrame and a non-DataFrame, + # the non-DataFrame is cast to Series and its name ignored. + exname = names[0] + elif box in [tm.to_array, pd.array]: + exname = names[1] + else: + exname = names[2] + return exname + + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Comparisons + + +class TestTimedelta64ArrayLikeComparisons: + # Comparison tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_timedelta64_zerodim(self, box_with_array): + # GH#26689 should unbox when comparing with zerodim array + box = box_with_array + xbox = ( + box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray + ) + + tdi = timedelta_range("2H", periods=4) + other = np.array(tdi.to_numpy()[0]) + + tdi = tm.box_expected(tdi, box) + res = tdi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "td_scalar", + [ + timedelta(days=1), + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + offsets.Hour(24), + ], + ) + def test_compare_timedeltalike_scalar(self, box_with_array, td_scalar): + # regression test for GH#5963 + box = box_with_array + xbox = box if box not in [pd.Index, pd.array] else np.ndarray + + ser = Series([timedelta(days=1), timedelta(days=2)]) + ser = tm.box_expected(ser, box) + actual = ser > td_scalar + expected = Series([False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(actual, expected) + + @pytest.mark.parametrize( + "invalid", + [ + 345600000000000, + "a", + Timestamp("2021-01-01"), + Timestamp("2021-01-01").now("UTC"), + Timestamp("2021-01-01").now().to_datetime64(), + Timestamp("2021-01-01").now().to_pydatetime(), + Timestamp("2021-01-01").date(), + np.array(4), # zero-dim mismatched dtype + ], + ) + def test_td64_comparisons_invalid(self, box_with_array, invalid): + # GH#13624 for str + box = box_with_array + + rng = timedelta_range("1 days", periods=10) + obj = tm.box_expected(rng, box) + + assert_invalid_comparison(obj, invalid, box) + + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.date_range("1970-01-01", periods=10, tz="UTC").array, + np.array(pd.date_range("1970-01-01", periods=10)), + list(pd.date_range("1970-01-01", periods=10)), + pd.date_range("1970-01-01", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_td64arr_cmp_arraylike_invalid(self, other, box_with_array): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + + rng = timedelta_range("1 days", periods=10)._data + rng = tm.box_expected(rng, box_with_array) + assert_invalid_comparison(rng, other, box_with_array) + + def test_td64arr_cmp_mixed_invalid(self): + rng = timedelta_range("1 days", periods=5)._data + other = np.array([0, 1, 2, rng[3], Timestamp("2021-01-01")]) + + result = rng == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + rng < other + with pytest.raises(TypeError, match=msg): + rng > other + with pytest.raises(TypeError, match=msg): + rng <= other + with pytest.raises(TypeError, match=msg): + rng >= other + + +class TestTimedelta64ArrayComparisons: + # TODO: All of these need to be parametrized over box + + @pytest.mark.parametrize("dtype", [None, object]) + def test_comp_nat(self, dtype): + left = TimedeltaIndex([Timedelta("1 days"), NaT, Timedelta("3 days")]) + right = TimedeltaIndex([NaT, NaT, Timedelta("3 days")]) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = rhs != lhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == NaT, expected) + tm.assert_numpy_array_equal(NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != NaT, expected) + tm.assert_numpy_array_equal(NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < NaT, expected) + tm.assert_numpy_array_equal(NaT > lhs, expected) + + @pytest.mark.parametrize( + "idx2", + [ + TimedeltaIndex( + ["2 day", "2 day", NaT, NaT, "1 day 00:00:02", "5 days 00:00:03"] + ), + np.array( + [ + np.timedelta64(2, "D"), + np.timedelta64(2, "D"), + np.timedelta64("nat"), + np.timedelta64("nat"), + np.timedelta64(1, "D") + np.timedelta64(2, "s"), + np.timedelta64(5, "D") + np.timedelta64(3, "s"), + ] + ), + ], + ) + def test_comparisons_nat(self, idx2): + idx1 = TimedeltaIndex( + [ + "1 day", + NaT, + "1 day 00:00:01", + NaT, + "1 day 00:00:01", + "5 day 00:00:03", + ] + ) + # Check pd.NaT is handles as the same as np.nan + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + # TODO: better name + def test_comparisons_coverage(self): + rng = timedelta_range("1 days", periods=10) + + result = rng < rng[3] + expected = np.array([True, True, True] + [False] * 7) + tm.assert_numpy_array_equal(result, expected) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Arithmetic Operations + + +class TestTimedelta64ArithmeticUnsorted: + # Tests moved from type-specific test files but not + # yet sorted/parametrized/de-duplicated + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(["2H", "4H", "6H", "8H", "10H"], freq="2H", name="x") + + for result in [idx * 2, np.multiply(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["4H", "8H", "12H", "16H", "20H"], freq="4H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4H" + + for result in [idx / 2, np.divide(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["1H", "2H", "3H", "4H", "5H"], freq="H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "H" + + for result in [-idx, np.negative(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex( + ["-2H", "-4H", "-6H", "-8H", "-10H"], freq="-2H", name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq == "-2H" + + idx = TimedeltaIndex(["-2H", "-1H", "0H", "1H", "2H"], freq="H", name="x") + for result in [abs(idx), np.absolute(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["2H", "1H", "0H", "1H", "2H"], freq=None, name="x") + tm.assert_index_equal(result, exp) + assert result.freq is None + + def test_subtraction_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dt + with pytest.raises(TypeError, match=msg): + tdi - dti + + msg = r"unsupported operand type\(s\) for -" + with pytest.raises(TypeError, match=msg): + td - dt + + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td - dti + + result = dt - dti + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(["0 days", "1 days", "2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(["0 days", NaT, "1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(["0 days", NaT, "-1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ["20121231", "20130101", "20130102"], freq="D", name="bar" + ) + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(["20121231", NaT, "20121230"], name="foo") + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self, box_with_array): + + # check that dt/dti subtraction ops with tz are validated + dti = pd.date_range("20130101", periods=3) + dti = tm.box_expected(dti, box_with_array) + ts = Timestamp("20130101") + dt = ts.to_pydatetime() + dti_tz = pd.date_range("20130101", periods=3).tz_localize("US/Eastern") + dti_tz = tm.box_expected(dti_tz, box_with_array) + ts_tz = Timestamp("20130101").tz_localize("US/Eastern") + ts_tz2 = Timestamp("20130101").tz_localize("CET") + dt_tz = ts_tz.to_pydatetime() + td = Timedelta("1 days") + + def _check(result, expected): + assert result == expected + assert isinstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta("0 days") + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta("0 days") + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta("0 days") + _check(result, expected) + + # tz mismatches + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects." + with pytest.raises(TypeError, match=msg): + dt_tz - ts + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt_tz - dt + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt - dt_tz + msg = "Cannot subtract tz-naive and tz-aware datetime-like objects." + with pytest.raises(TypeError, match=msg): + ts - dt_tz + with pytest.raises(TypeError, match=msg): + ts_tz2 - ts + with pytest.raises(TypeError, match=msg): + ts_tz2 - dt + + msg = "Cannot subtract tz-naive and tz-aware" + # with dti + with pytest.raises(TypeError, match=msg): + dti - ts_tz + with pytest.raises(TypeError, match=msg): + dti_tz - ts + + result = dti_tz - dt_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + result = td - td + expected = Timedelta("0 days") + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex(["20121231", "20130101", "20130102"], tz="US/Eastern") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + + result = tdi - tdi + expected = TimedeltaIndex(["0 days", NaT, "0 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(["2 days", NaT, "4 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(["20121231", NaT, "20130101"]) + tm.assert_index_equal(result, expected) + + def test_addition_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + result = tdi + dt + expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(["20130102", NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(["2 days", NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(["2 days", NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + # unequal length + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + tdi + dti[0:1] + with pytest.raises(ValueError, match=msg): + tdi[0:1] + dti + + # random indexes + msg = "Addition/subtraction of integers and integer-arrays" + with pytest.raises(TypeError, match=msg): + tdi + Int64Index([1, 2, 3]) + + # this is a union! + # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(["20130102", NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(["20130102", NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp("20130102") + assert result == expected + + result = td + dt + expected = Timestamp("20130102") + assert result == expected + + # TODO: Needs more informative name, probably split up into + # more targeted tests + @pytest.mark.parametrize("freq", ["D", "B"]) + def test_timedelta(self, freq): + index = pd.date_range("1/1/2000", periods=50, freq=freq) + + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + back = back._with_freq("infer") + tm.assert_index_equal(index, back) + + if freq == "D": + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + def test_timedelta_tick_arithmetic(self): + # GH#4134, buggy with timedeltas + rng = pd.date_range("2013", "2014") + s = Series(rng) + result1 = rng - offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - offsets.Hour(1)) + + assert result1.freq == rng.freq + result1 = result1._with_freq(None) + tm.assert_index_equal(result1, result4) + + assert result3.freq == rng.freq + result3 = result3._with_freq(None) + tm.assert_index_equal(result2, result3) + + def test_tda_add_sub_index(self): + # Check that TimedeltaArray defers to Index on arithmetic ops + tdi = TimedeltaIndex(["1 days", NaT, "2 days"]) + tda = tdi.array + + dti = pd.date_range("1999-12-31", periods=3, freq="D") + + result = tda + dti + expected = tdi + dti + tm.assert_index_equal(result, expected) + + result = tda + tdi + expected = tdi + tdi + tm.assert_index_equal(result, expected) + + result = tda - tdi + expected = tdi - tdi + tm.assert_index_equal(result, expected) + + def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): + # Result should be cast back to DatetimeArray + box = box_with_array + + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + dti = dti._with_freq(None) + tdi = dti - dti + + obj = tm.box_expected(tdi, box) + other = tm.box_expected(dti, box) + + with tm.assert_produces_warning(PerformanceWarning): + result = obj + other.astype(object) + tm.assert_equal(result, other) + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and timedelta-like + + def test_tdi_iadd_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as + is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng + rng += two_hours + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) + + def test_tdi_isub_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as - is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng + rng -= two_hours + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) + + # ------------------------------------------------------------- + + def test_tdi_ops_attributes(self): + rng = timedelta_range("2 days", periods=5, freq="2D", name="x") + + result = rng + 1 * rng.freq + exp = timedelta_range("4 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng - 2 * rng.freq + exp = timedelta_range("-2 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng * 2 + exp = timedelta_range("4 days", periods=5, freq="4D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4D" + + result = rng / 2 + exp = timedelta_range("1 days", periods=5, freq="D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "D" + + result = -rng + exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "-2D" + + rng = timedelta_range("-2 days", periods=5, freq="D", name="x") + + result = abs(rng) + exp = TimedeltaIndex( + ["2 days", "1 days", "0 days", "1 days", "2 days"], name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq is None + + +class TestAddSubNaTMasking: + # TODO: parametrize over boxes + + @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) + def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): + # GH#17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) + tdobj = tm.box_expected(tdinat, box_with_array) + + ts = Timestamp(str_ts) + ts_variants = [ + ts, + ts.to_pydatetime(), + ts.to_datetime64().astype("datetime64[ns]"), + ts.to_datetime64().astype("datetime64[D]"), + ] + + for variant in ts_variants: + res = tdobj + variant + if box_with_array is DataFrame: + assert res.iloc[1, 1] is NaT + else: + assert res[1] is NaT + + def test_tdi_add_overflow(self): + # See GH#14068 + # preliminary test scalar analogue of vectorized tests below + # TODO: Make raised error message more informative and test + with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): + pd.to_timedelta(106580, "D") + Timestamp("2000") + with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): + Timestamp("2000") + pd.to_timedelta(106580, "D") + + _NaT = NaT.value + 1 + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([106580], "D") + Timestamp("2000") + with pytest.raises(OverflowError, match=msg): + Timestamp("2000") + pd.to_timedelta([106580], "D") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([_NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + ( + pd.to_timedelta([_NaT, "5 days", "1 hours"]) + - pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) + ) + + # These should not overflow! + exp = TimedeltaIndex([NaT]) + result = pd.to_timedelta([NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(["4 days", NaT]) + result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([NaT, NaT, "5 hours"]) + result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( + ["7 seconds", NaT, "4 hours"] + ) + tm.assert_index_equal(result, exp) + + +class TestTimedeltaArraylikeAddSubOps: + # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ + + # TODO: moved from tests.indexes.timedeltas.test_arithmetic; needs + # parametrization+de-duplication + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(["00:00:01"])) + s2 = pd.to_timedelta(Series(["00:00:02"])) + + msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + pd.to_timedelta(Series([NaT])) # TODO: belongs elsewhere? + + sn = pd.to_timedelta(Series([NaT], dtype="m8[ns]")) + + df1 = DataFrame(["00:00:01"]).apply(pd.to_timedelta) + df2 = DataFrame(["00:00:02"]).apply(pd.to_timedelta) + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + DataFrame([NaT]).apply(pd.to_timedelta) # TODO: belongs elsewhere? + + dfn = DataFrame([NaT.value]).apply(pd.to_timedelta) + + scalar1 = pd.to_timedelta("00:00:01") + scalar2 = pd.to_timedelta("00:00:02") + timedelta_NaT = pd.to_timedelta("NaT") + + actual = scalar1 + scalar1 + assert actual == scalar2 + actual = scalar2 - scalar1 + assert actual == scalar1 + + actual = s1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - s1 + tm.assert_series_equal(actual, s1) + + actual = s1 + scalar1 + tm.assert_series_equal(actual, s2) + actual = scalar1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - scalar1 + tm.assert_series_equal(actual, s1) + actual = -scalar1 + s2 + tm.assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + s1 + np.nan + with pytest.raises(TypeError, match=msg): + np.nan + s1 + with pytest.raises(TypeError, match=msg): + s1 - np.nan + with pytest.raises(TypeError, match=msg): + -np.nan + s1 + + actual = s1 + NaT + tm.assert_series_equal(actual, sn) + actual = s2 - NaT + tm.assert_series_equal(actual, sn) + + actual = s1 + df1 + tm.assert_frame_equal(actual, df2) + actual = s2 - df1 + tm.assert_frame_equal(actual, df1) + actual = df1 + s1 + tm.assert_frame_equal(actual, df2) + actual = df2 - s1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + df1 + tm.assert_frame_equal(actual, df2) + actual = df2 - df1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + tm.assert_frame_equal(actual, df2) + actual = df2 - scalar1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + tm.assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + tm.assert_frame_equal(actual, dfn) + + msg = "cannot subtract a datelike from|unsupported operand type" + with pytest.raises(TypeError, match=msg): + df1 + np.nan + with pytest.raises(TypeError, match=msg): + df1 - np.nan + + actual = df1 + NaT # NaT is datetime, not timedelta + tm.assert_frame_equal(actual, dfn) + actual = df1 - NaT + tm.assert_frame_equal(actual, dfn) + + # TODO: moved from tests.series.test_operators, needs splitting, cleanup, + # de-duplication, box-parametrization... + def test_operators_timedelta64(self): + # series ops + v1 = pd.date_range("2012-1-1", periods=3, freq="D") + v2 = pd.date_range("2012-1-2", periods=3, freq="D") + rs = Series(v2) - Series(v1) + xp = Series(1e9 * 3600 * 24, rs.index).astype("int64").astype("timedelta64[ns]") + tm.assert_series_equal(rs, xp) + assert rs.dtype == "timedelta64[ns]" + + df = DataFrame({"A": v1}) + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == "timedelta64[ns]" + + # series on the rhs + result = df["A"] - df["A"].shift() + assert result.dtype == "timedelta64[ns]" + + result = df["A"] + td + assert result.dtype == "M8[ns]" + + # scalar Timestamp on rhs + maxa = df["A"].max() + assert isinstance(maxa, Timestamp) + + resultb = df["A"] - df["A"].max() + assert resultb.dtype == "timedelta64[ns]" + + # timestamp on lhs + result = resultb + df["A"] + values = [Timestamp("20111230"), Timestamp("20120101"), Timestamp("20120103")] + expected = Series(values, name="A") + tm.assert_series_equal(result, expected) + + # datetimes on rhs + result = df["A"] - datetime(2001, 1, 1) + expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A") + tm.assert_series_equal(result, expected) + assert result.dtype == "m8[ns]" + + d = datetime(2001, 1, 1, 3, 4) + resulta = df["A"] - d + assert resulta.dtype == "m8[ns]" + + # roundtrip + resultb = resulta + d + tm.assert_series_equal(df["A"], resultb) + + # timedeltas on rhs + td = timedelta(days=1) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(resultb, df["A"]) + assert resultb.dtype == "M8[ns]" + + # roundtrip + td = timedelta(minutes=5, seconds=3) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(df["A"], resultb) + assert resultb.dtype == "M8[ns]" + + # inplace + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) + assert rs[2] == value + + def test_timedelta64_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(-NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series - single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + # addition + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series + single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + # multiplication + tm.assert_series_equal( + nat_series_dtype_timedelta * 1.0, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + 1.0 * nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal(timedelta_series * 1, timedelta_series) + tm.assert_series_equal(1 * timedelta_series, timedelta_series) + + tm.assert_series_equal(timedelta_series * 1.5, Series([NaT, Timedelta("1.5s")])) + tm.assert_series_equal(1.5 * timedelta_series, Series([NaT, Timedelta("1.5s")])) + + tm.assert_series_equal(timedelta_series * np.nan, nat_series_dtype_timedelta) + tm.assert_series_equal(np.nan * timedelta_series, nat_series_dtype_timedelta) + + # division + tm.assert_series_equal(timedelta_series / 2, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / 2.0, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / np.nan, nat_series_dtype_timedelta) + + # ------------------------------------------------------------- + # Binary operations td64 arraylike and datetime-like + + @pytest.mark.parametrize("cls", [Timestamp, datetime, np.datetime64]) + def test_td64arr_add_sub_datetimelike_scalar( + self, cls, box_with_array, tz_naive_fixture + ): + # GH#11925, GH#29558, GH#23215 + tz = tz_naive_fixture + + dt_scalar = Timestamp("2012-01-01", tz=tz) + if cls is datetime: + ts = dt_scalar.to_pydatetime() + elif cls is np.datetime64: + if tz_naive_fixture is not None: + return + ts = dt_scalar.to_datetime64() + else: + ts = dt_scalar + + tdi = timedelta_range("1 day", periods=3) + expected = pd.date_range("2012-01-02", periods=3, tz=tz) + + tdarr = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(ts + tdarr, expected) + tm.assert_equal(tdarr + ts, expected) + + expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D", tz=tz) + expected2 = tm.box_expected(expected2, box_with_array) + + tm.assert_equal(ts - tdarr, expected2) + tm.assert_equal(ts + (-tdarr), expected2) + + msg = "cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + tdarr - ts + + def test_td64arr_add_datetime64_nat(self, box_with_array): + # GH#23215 + other = np.datetime64("NaT") + + tdi = timedelta_range("1 day", periods=3) + expected = DatetimeIndex(["NaT", "NaT", "NaT"]) + + tdser = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(tdser + other, expected) + tm.assert_equal(other + tdser, expected) + + def test_td64arr_sub_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = TimedeltaIndex(["-1 Day"] * 3) + dtarr = dti.values + expected = DatetimeIndex(dtarr) - tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + msg = "cannot subtract a datelike from" + with pytest.raises(TypeError, match=msg): + tdi - dtarr + + # TimedeltaIndex.__rsub__ + result = dtarr - tdi + tm.assert_equal(result, expected) + + def test_td64arr_add_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = TimedeltaIndex(["-1 Day"] * 3) + dtarr = dti.values + expected = DatetimeIndex(dtarr) + tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdi + dtarr + tm.assert_equal(result, expected) + result = dtarr + tdi + tm.assert_equal(result, expected) + + # ------------------------------------------------------------------ + # Invalid __add__/__sub__ operations + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("tdi_freq", [None, "H"]) + def test_td64arr_sub_periodlike( + self, box_with_array, box_with_array2, tdi_freq, pi_freq + ): + # GH#20049 subtracting PeriodIndex should raise TypeError + tdi = TimedeltaIndex(["1 hours", "2 hours"], freq=tdi_freq) + dti = Timestamp("2018-03-07 17:16:40") + tdi + pi = dti.to_period(pi_freq) + per = pi[0] + + tdi = tm.box_expected(tdi, box_with_array) + pi = tm.box_expected(pi, box_with_array2) + msg = "cannot subtract|unsupported operand type" + with pytest.raises(TypeError, match=msg): + tdi - pi + + # GH#13078 subtraction of Period scalar not supported + with pytest.raises(TypeError, match=msg): + tdi - per + + @pytest.mark.parametrize( + "other", + [ + # GH#12624 for str case + "a", + # GH#19123 + 1, + 1.5, + np.array(2), + ], + ) + def test_td64arr_addsub_numeric_scalar_invalid(self, box_with_array, other): + # vector-like others are tested in test_td64arr_add_sub_numeric_arr_invalid + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + assert_invalid_addsub_type(tdarr, other) + + @pytest.mark.parametrize( + "vec", + [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + Series([1, 2, 3]), + DataFrame([[1, 2, 3]]), + ], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_addsub_numeric_arr_invalid( + self, box_with_array, vec, any_real_numpy_dtype + ): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + vector = vec.astype(any_real_numpy_dtype) + assert_invalid_addsub_type(tdarr, vector) + + def test_td64arr_add_sub_int(self, box_with_array, one): + # Variants of `one` for #19012, deprecated GH#22535 + rng = timedelta_range("1 days 09:00:00", freq="H", periods=10) + tdarr = tm.box_expected(rng, box_with_array) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, one, msg) + + # TODO: get inplace ops into assert_invalid_addsub_type + with pytest.raises(TypeError, match=msg): + tdarr += one + with pytest.raises(TypeError, match=msg): + tdarr -= one + + def test_td64arr_add_sub_integer_array(self, box_with_array): + # GH#19959, deprecated GH#22535 + # GH#22696 for DataFrame case, check that we don't dispatch to numpy + # implementation, which treats int64 as m8[ns] + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days 09:00:00", freq="H", periods=3) + tdarr = tm.box_expected(rng, box) + other = tm.box_expected([4, 3, 2], xbox) + + msg = "Addition/subtraction of integers and integer-arrays" + assert_invalid_addsub_type(tdarr, other, msg) + + def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): + # GH#19959 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + tdi = TimedeltaIndex(["1 Day", "NaT", "3 Hours"]) + tdarr = tm.box_expected(tdi, box) + other = tm.box_expected([14, -1, 16], xbox) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, other, msg) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + def test_td64arr_add_sub_td64_array(self, box_with_array): + box = box_with_array + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 2 * tdi + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = tdi + tdarr + tm.assert_equal(result, expected) + result = tdarr + tdi + tm.assert_equal(result, expected) + + expected_sub = 0 * tdi + result = tdi - tdarr + tm.assert_equal(result, expected_sub) + result = tdarr - tdi + tm.assert_equal(result, expected_sub) + + def test_td64arr_add_sub_tdi(self, box_with_array, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["0 days", "1 day"], name=names[1]) + tdi = np.array(tdi) if box in [tm.to_array, pd.array] else tdi + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[0]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], name=exname) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = tdi + ser + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + result = ser + tdi + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + expected = Series( + [Timedelta(hours=-3), Timedelta(days=1, hours=-4)], name=exname + ) + expected = tm.box_expected(expected, box) + + result = tdi - ser + tm.assert_equal(result, expected) + assert_dtype(result, "timedelta64[ns]") + + result = ser - tdi + tm.assert_equal(result, -expected) + assert_dtype(result, "timedelta64[ns]") + + @pytest.mark.parametrize("tdnat", [np.timedelta64("NaT"), NaT]) + def test_td64arr_add_sub_td64_nat(self, box_with_array, tdnat): + # GH#18808, GH#23320 special handling for timedelta64("NaT") + box = box_with_array + tdi = TimedeltaIndex([NaT, Timedelta("1s")]) + expected = TimedeltaIndex(["NaT"] * 2) + + obj = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = obj + tdnat + tm.assert_equal(result, expected) + result = tdnat + obj + tm.assert_equal(result, expected) + result = obj - tdnat + tm.assert_equal(result, expected) + result = tdnat - obj + tm.assert_equal(result, expected) + + def test_td64arr_add_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as + is now numeric + # GH#10699 for Tick cases + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng + two_hours + tm.assert_equal(result, expected) + + result = two_hours + rng + tm.assert_equal(result, expected) + + def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as - is now numeric + # GH#10699 for Tick cases + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng - two_hours + tm.assert_equal(result, expected) + + result = two_hours - rng + tm.assert_equal(result, -expected) + + # ------------------------------------------------------------------ + # __add__/__sub__ with DateOffsets and arrays of DateOffsets + + def test_td64arr_add_sub_offset_index(self, names, box_with_array): + # GH#18849, GH#19744 + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = pd.Index([offsets.Hour(n=1), offsets.Minute(n=-2)], name=names[1]) + other = np.array(other) if box in [tm.to_array, pd.array] else other + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer", name=exname + ) + expected_sub = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer", name=exname + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + expected_sub = tm.box_expected(expected_sub, box) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_equal(res2, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res_sub = tdi - other + tm.assert_equal(res_sub, expected_sub) + + def test_td64arr_add_sub_offset_array(self, box_with_array): + # GH#18849, GH#18824 + box = box_with_array + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + other = np.array([offsets.Hour(n=1), offsets.Minute(n=-2)]) + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer" + ) + expected_sub = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer" + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_equal(res2, expected) + + expected_sub = tm.box_expected(expected_sub, box_with_array) + with tm.assert_produces_warning(PerformanceWarning): + res_sub = tdi - other + tm.assert_equal(res_sub, expected_sub) + + def test_td64arr_with_offset_series(self, names, box_with_array): + # GH#18849 + box = box_with_array + box2 = Series if box in [pd.Index, tm.to_array, pd.array] else box + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = Series([offsets.Hour(n=1), offsets.Minute(n=-2)], name=names[1]) + + expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], name=exname) + obj = tm.box_expected(tdi, box) + expected_add = tm.box_expected(expected_add, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res = obj + other + tm.assert_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + obj + tm.assert_equal(res2, expected_add) + + expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname) + expected_sub = tm.box_expected(expected_sub, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = obj - other + tm.assert_equal(res3, expected_sub) + + @pytest.mark.parametrize("obox", [np.array, pd.Index, Series]) + def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): + # GH#18824 + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + tdi = tm.box_expected(tdi, box_with_array) + + anchored = obox([offsets.MonthEnd(), offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + msg = "has incorrect type|cannot add the type MonthEnd" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi + + # ------------------------------------------------------------------ + # Unsorted + + def test_td64arr_add_sub_object_array(self, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + tdi = timedelta_range("1 day", periods=3, freq="D") + tdarr = tm.box_expected(tdi, box) + + other = np.array([Timedelta(days=1), offsets.Day(2), Timestamp("2000-01-04")]) + + with tm.assert_produces_warning(PerformanceWarning): + result = tdarr + other + + expected = pd.Index( + [Timedelta(days=2), Timedelta(days=4), Timestamp("2000-01-07")] + ) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + msg = "unsupported operand type|cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(PerformanceWarning): + tdarr - other + + with tm.assert_produces_warning(PerformanceWarning): + result = other - tdarr + + expected = pd.Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + +class TestTimedeltaArraylikeMulDivOps: + # Tests for timedelta64[ns] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # ------------------------------------------------------------------ + # Multiplication + # organized with scalar others first, then array-like + + def test_td64arr_mul_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx * 1 + tm.assert_equal(result, idx) + + result = 1 * idx + tm.assert_equal(result, idx) + + def test_td64arr_mul_tdlike_scalar_raises(self, two_hours, box_with_array): + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + msg = "argument must be an integer|cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + rng * two_hours + + def test_tdi_mul_int_array_zerodim(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 * 5) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * np.array(5, dtype="int64") + tm.assert_equal(result, expected) + + def test_tdi_mul_int_array(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5**2) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * rng5 + tm.assert_equal(result, expected) + + def test_tdi_mul_int_series(self, box_with_array): + box = box_with_array + xbox = Series if box in [pd.Index, tm.to_array, pd.array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + expected = TimedeltaIndex(np.arange(5, dtype="int64") ** 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, xbox) + + result = idx * Series(np.arange(5, dtype="int64")) + tm.assert_equal(result, expected) + + def test_tdi_mul_float_series(self, box_with_array): + box = box_with_array + xbox = Series if box in [pd.Index, tm.to_array, pd.array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box) + + rng5f = np.arange(5, dtype="float64") + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) + expected = tm.box_expected(expected, xbox) + + result = idx * Series(rng5f + 1.0) + tm.assert_equal(result, expected) + + # TODO: Put Series/DataFrame in others? + @pytest.mark.parametrize( + "other", + [ + np.arange(1, 11), + Int64Index(range(1, 11)), + UInt64Index(range(1, 11)), + Float64Index(range(1, 11)), + pd.RangeIndex(1, 11), + ], + ids=lambda x: type(x).__name__, + ) + def test_tdi_rmul_arraylike(self, other, box_with_array): + box = box_with_array + + tdi = TimedeltaIndex(["1 Day"] * 10) + expected = timedelta_range("1 days", "10 days")._with_freq(None) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, other) + + expected = tm.box_expected(expected, xbox) + + result = other * tdi + tm.assert_equal(result, expected) + commute = tdi * other + tm.assert_equal(commute, expected) + + # ------------------------------------------------------------------ + # __div__, __rdiv__ + + def test_td64arr_div_nat_invalid(self, box_with_array): + # don't allow division by NaT (maybe could in the future) + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + + with pytest.raises(TypeError, match="unsupported operand type"): + rng / NaT + with pytest.raises(TypeError, match="Cannot divide NaTType by"): + NaT / rng + + dt64nat = np.datetime64("NaT", "ns") + msg = "|".join( + [ + # 'divide' on npdev as of 2021-12-18 + "ufunc '(true_divide|divide)' cannot use operands", + "cannot perform __r?truediv__", + "Cannot divide datetime64 by TimedeltaArray", + ] + ) + with pytest.raises(TypeError, match=msg): + rng / dt64nat + with pytest.raises(TypeError, match=msg): + dt64nat / rng + + def test_td64arr_div_td64nat(self, box_with_array): + # GH#23829 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days", "10 days") + rng = tm.box_expected(rng, box) + + other = np.timedelta64("NaT") + + expected = np.array([np.nan] * 10) + expected = tm.box_expected(expected, xbox) + + result = rng / other + tm.assert_equal(result, expected) + + result = other / rng + tm.assert_equal(result, expected) + + def test_td64arr_div_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx / 1 + tm.assert_equal(result, idx) + + with pytest.raises(TypeError, match="Cannot divide"): + # GH#23829 + 1 / idx + + def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): + # GH#20088, GH#22163 ensure DataFrame returns correct dtype + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = timedelta_range("1 days", "10 days", name="foo") + expected = Float64Index((np.arange(10) + 1) * 12, name="foo") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_td64arr_div_td64_scalar(self, m, unit, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + ser = Series([Timedelta(days=59)] * 3) + ser[2] = np.nan + flat = ser + ser = tm.box_expected(ser, box) + + # op + expected = Series([x / np.timedelta64(m, unit) for x in flat]) + expected = tm.box_expected(expected, xbox) + result = ser / np.timedelta64(m, unit) + tm.assert_equal(result, expected) + + # reverse op + expected = Series([Timedelta(np.timedelta64(m, unit)) / x for x in flat]) + expected = tm.box_expected(expected, xbox) + result = np.timedelta64(m, unit) / ser + tm.assert_equal(result, expected) + + def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") + expected = Float64Index([12, np.nan, 24], name="foo") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_td64_ndarray(self, box_with_array): + # GH#22631 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + rng = TimedeltaIndex(["1 days", NaT, "2 days"]) + expected = Float64Index([12, np.nan, 24]) + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, xbox) + + other = np.array([2, 4, 2], dtype="m8[h]") + result = rng / other + tm.assert_equal(result, expected) + + result = rng / tm.box_expected(other, box) + tm.assert_equal(result, expected) + + result = rng / other.astype(object) + tm.assert_equal(result, expected) + + result = rng / list(other) + tm.assert_equal(result, expected) + + # reversed op + expected = 1 / expected + result = other / rng + tm.assert_equal(result, expected) + + result = tm.box_expected(other, box) / rng + tm.assert_equal(result, expected) + + result = other.astype(object) / rng + tm.assert_equal(result, expected) + + result = list(other) / rng + tm.assert_equal(result, expected) + + def test_tdarr_div_length_mismatch(self, box_with_array): + rng = TimedeltaIndex(["1 days", NaT, "2 days"]) + mismatched = [1, 2, 3, 4] + + rng = tm.box_expected(rng, box_with_array) + msg = "Cannot divide vectors|Unable to coerce to Series" + for obj in [mismatched, mismatched[:2]]: + # one shorter, one longer + for other in [obj, np.array(obj), pd.Index(obj)]: + with pytest.raises(ValueError, match=msg): + rng / other + with pytest.raises(ValueError, match=msg): + other / rng + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + def test_td64arr_floordiv_td64arr_with_nat( + self, box_with_array, using_array_manager + ): + # GH#35529 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + + left = Series([1000, 222330, 30], dtype="timedelta64[ns]") + right = Series([1000, 222330, None], dtype="timedelta64[ns]") + + left = tm.box_expected(left, box) + right = tm.box_expected(right, box) + + expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) + expected = tm.box_expected(expected, xbox) + if box is DataFrame and using_array_manager: + # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # performs ops column-wise and thus preserves int64 dtype for + # columns without missing values + expected[[0, 1]] = expected[[0, 1]].astype("int64") + + result = left // right + + tm.assert_equal(result, expected) + + # case that goes through __rfloordiv__ with arraylike + result = np.asarray(left) // right + tm.assert_equal(result, expected) + + def test_td64arr_floordiv_tdscalar(self, box_with_array, scalar_td): + # GH#18831, GH#19125 + box = box_with_array + xbox = np.ndarray if box is pd.array else box + td = Timedelta("5m3s") # i.e. (scalar_td - 1sec) / 2 + + td1 = Series([td, td, NaT], dtype="m8[ns]") + td1 = tm.box_expected(td1, box, transpose=False) + + expected = Series([0, 0, np.nan]) + expected = tm.box_expected(expected, xbox, transpose=False) + + result = td1 // scalar_td + tm.assert_equal(result, expected) + + # Reversed op + expected = Series([2, 2, np.nan]) + expected = tm.box_expected(expected, xbox, transpose=False) + + result = scalar_td // td1 + tm.assert_equal(result, expected) + + # same thing buts let's be explicit about calling __rfloordiv__ + result = td1.__rfloordiv__(scalar_td) + tm.assert_equal(result, expected) + + def test_td64arr_floordiv_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + result = idx // 1 + tm.assert_equal(result, idx) + + pattern = "floor_divide cannot use operands|Cannot divide int by Timedelta*" + with pytest.raises(TypeError, match=pattern): + 1 // idx + + # ------------------------------------------------------------------ + # mod, divmod + # TODO: operations with timedelta-like arrays, numeric arrays, + # reversed ops + + def test_td64arr_mod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % three_days + tm.assert_equal(result, expected) + + warn = None + if box_with_array is DataFrame and isinstance(three_days, pd.DateOffset): + warn = PerformanceWarning + + with tm.assert_produces_warning(warn): + result = divmod(tdarr, three_days) + + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // three_days) + + def test_td64arr_mod_int(self, box_with_array): + tdi = timedelta_range("1 ns", "10 ns", periods=10) + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 ns", "0 ns"] * 5) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % 2 + tm.assert_equal(result, expected) + + msg = "Cannot divide int by" + with pytest.raises(TypeError, match=msg): + 2 % tdarr + + result = divmod(tdarr, 2) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // 2) + + def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = ["0 Days", "1 Day", "0 Days"] + ["3 Days"] * 6 + expected = TimedeltaIndex(expected) + expected = tm.box_expected(expected, box_with_array) + + result = three_days % tdarr + tm.assert_equal(result, expected) + + result = divmod(three_days, tdarr) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], three_days // tdarr) + + # ------------------------------------------------------------------ + # Operations with invalid others + + def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + td1 * scalar_td + with pytest.raises(TypeError, match=pattern): + scalar_td * td1 + + def test_td64arr_mul_too_short_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + msg = "|".join( + [ + "cannot use operands with types dtype", + "Cannot multiply with unequal lengths", + "Unable to coerce to Series", + ] + ) + with pytest.raises(TypeError, match=msg): + # length check before dtype check + idx * idx[:3] + with pytest.raises(ValueError, match=msg): + idx * np.array([1, 2]) + + def test_td64arr_mul_td64arr_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + msg = "cannot use operands with types dtype" + with pytest.raises(TypeError, match=msg): + idx * idx + + # ------------------------------------------------------------------ + # Operations with numeric others + + def test_td64arr_mul_numeric_scalar(self, box_with_array, one): + # GH#4521 + # divide/multiply by integers + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["-59 Days", "-59 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (-one) + tm.assert_equal(result, expected) + result = (-one) * tdser + tm.assert_equal(result, expected) + + expected = Series(["118 Days", "118 Days", "NaT"], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (2 * one) + tm.assert_equal(result, expected) + result = (2 * one) * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("two", [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_div_numeric_scalar(self, box_with_array, two): + # GH#4521 + # divide/multiply by integers + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["29.5D", "29.5D", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser / two + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match="Cannot divide"): + two / tdser + + @pytest.mark.parametrize("two", [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_floordiv_numeric_scalar(self, box_with_array, two): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["29.5D", "29.5D", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser // two + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match="Cannot divide"): + two // tdser + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_rmul_numeric_array( + self, + box_with_array, + vector, + any_real_numpy_dtype, + ): + # GH#4521 + # divide/multiply by integers + + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_numpy_dtype) + + expected = Series(["1180 Days", "1770 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + xbox = get_upcast_box(tdser, vector) + + expected = tm.box_expected(expected, xbox) + + result = tdser * vector + tm.assert_equal(result, expected) + + result = vector * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_div_numeric_array( + self, box_with_array, vector, any_real_numpy_dtype + ): + # GH#4521 + # divide/multiply by integers + + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_numpy_dtype) + + expected = Series(["2.95D", "1D 23H 12m", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + xbox = get_upcast_box(tdser, vector) + expected = tm.box_expected(expected, xbox) + + result = tdser / vector + tm.assert_equal(result, expected) + + pattern = "|".join( + [ + "true_divide'? cannot use operands", + "cannot perform __div__", + "cannot perform __truediv__", + "unsupported operand", + "Cannot divide", + ] + ) + with pytest.raises(TypeError, match=pattern): + vector / tdser + + result = tdser / vector.astype(object) + if box_with_array is DataFrame: + expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] + else: + expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = pd.Index(expected) # do dtype inference + expected = tm.box_expected(expected, xbox) + assert tm.get_dtype(expected) == "m8[ns]" + + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match=pattern): + vector.astype(object) / tdser + + def test_td64arr_mul_int_series(self, box_with_array, names): + # GH#19042 test for correct name attachment + box = box_with_array + exname = get_expected_name(box, names) + + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + # TODO: Should we be parametrizing over types for `ser` too? + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series( + ["0days", "1day", "4days", "9days", "16days"], + dtype="timedelta64[ns]", + name=exname, + ) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, ser) + + expected = tm.box_expected(expected, xbox) + + result = ser * tdi + tm.assert_equal(result, expected) + + result = tdi * ser + tm.assert_equal(result, expected) + + # TODO: Should we be parametrizing over types for `ser` too? + def test_float_series_rdiv_td64arr(self, box_with_array, names): + # GH#19042 test for correct name attachment + box = box_with_array + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + xname = names[2] if box not in [tm.to_array, pd.array] else names[1] + expected = Series( + [tdi[n] / ser[n] for n in range(len(ser))], + dtype="timedelta64[ns]", + name=xname, + ) + + tdi = tm.box_expected(tdi, box) + xbox = get_upcast_box(tdi, ser) + expected = tm.box_expected(expected, xbox) + + result = ser.__rtruediv__(tdi) + if box is DataFrame: + assert result is NotImplemented + else: + tm.assert_equal(result, expected) + + def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array): + # GH#39750 make sure we infer the result as td64 + tdi = TimedeltaIndex([NaT, NaT]) + + left = tm.box_expected(tdi, box_with_array) + right = np.array([2, 2.0], dtype=object) + + result = left / right + tm.assert_equal(result, left) + + result = left // right + tm.assert_equal(result, left) + + +class TestTimedelta64ArrayLikeArithmetic: + # Arithmetic tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all arithmetic + # tests will eventually end up here. + + def test_td64arr_pow_invalid(self, scalar_td, box_with_array): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + scalar_td**td1 + + with pytest.raises(TypeError, match=pattern): + td1**scalar_td + + +def test_add_timestamp_to_timedelta(): + # GH: 35897 + timestamp = Timestamp("2021-01-01") + result = timestamp + timedelta_range("0s", "1s", periods=31) + expected = DatetimeIndex( + [ + timestamp + + ( + pd.to_timedelta("0.033333333s") * i + + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] + ) + for i in range(31) + ] + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/arrays/__init__.py b/pandas/tests/arrays/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/boolean/__init__.py b/pandas/tests/arrays/boolean/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py new file mode 100644 index 00000000..197e8312 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -0,0 +1,129 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture +def data(): + """Fixture returning boolean array with valid and missing values.""" + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) + + +@pytest.fixture +def left_array(): + """Fixture returning boolean array with valid and missing values.""" + return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + + +@pytest.fixture +def right_array(): + """Fixture returning boolean array with valid and missing values.""" + return pd.array([True, False, None] * 3, dtype="boolean") + + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [ + ("add", [True, True, None, True, False, None, None, None, None]), + ("mul", [True, False, None, False, False, None, None, None, None]), + ], + ids=["add", "mul"], +) +def test_add_mul(left_array, right_array, opname, exp): + op = getattr(operator, opname) + result = op(left_array, right_array) + expected = pd.array(exp, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_sub(left_array, right_array): + msg = ( + r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), " + r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\." + ) + with pytest.raises(TypeError, match=msg): + left_array - right_array + + +def test_div(left_array, right_array): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + # check that we are matching the non-masked Series behavior + pd.Series(left_array._data) / pd.Series(right_array._data) + + with pytest.raises(NotImplementedError, match=msg): + left_array / right_array + + +@pytest.mark.parametrize( + "opname", + [ + "floordiv", + "mod", + "pow", + ], +) +def test_op_int8(left_array, right_array, opname): + op = getattr(operator, opname) + if opname != "mod": + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + result = op(left_array, right_array) + return + result = op(left_array, right_array) + expected = op(left_array.astype("Int8"), right_array.astype("Int8")) + tm.assert_extension_array_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = ( + "did not contain a loop with signature matching types|" + "BooleanArray cannot perform the operation|" + "not supported for the input types, and the inputs could not be safely coerced " + "to any supported types according to the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + msg = "|".join( + [ + r"unsupported operand type\(s\) for", + "Concatenation operation is not implemented for NumPy arrays", + ] + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + if op not in ("__mul__", "__rmul__"): + # TODO(extension) numpy's mul with object array sees booleans as numbers + msg = "|".join( + [ + r"unsupported operand type\(s\) for", + "can only concatenate str", + "not all arguments converted during string formatting", + ] + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Series("foo", index=s.index)) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py new file mode 100644 index 00000000..932e903c --- /dev/null +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_astype(): + # with missing values + arr = pd.array([True, False, None], dtype="boolean") + + with pytest.raises(ValueError, match="cannot convert NA to integer"): + arr.astype("int64") + + with pytest.raises(ValueError, match="cannot convert float NaN to"): + arr.astype("bool") + + result = arr.astype("float64") + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("str") + expected = np.array(["True", "False", ""], dtype=f"{tm.ENDIAN}U5") + tm.assert_numpy_array_equal(result, expected) + + # no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.astype("int64") + expected = np.array([1, 0, 1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_to_boolean_array(): + # astype to BooleanArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("boolean") + tm.assert_extension_array_equal(result, arr) + result = arr.astype(pd.BooleanDtype()) + tm.assert_extension_array_equal(result, arr) + + +def test_astype_to_integer_array(): + # astype to IntegerArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("Int64") + expected = pd.array([1, 0, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/boolean/test_comparison.py b/pandas/tests/arrays/boolean/test_comparison.py new file mode 100644 index 00000000..2eeb9da5 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_comparison.py @@ -0,0 +1,60 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.tests.arrays.masked_shared import ComparisonOps + + +@pytest.fixture +def data(): + """Fixture returning boolean array with valid and missing data""" + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) + + +@pytest.fixture +def dtype(): + """Fixture returning BooleanDtype""" + return pd.BooleanDtype() + + +class TestComparisonOps(ComparisonOps): + def test_compare_scalar(self, data, comparison_op): + self._compare_other(data, comparison_op, True) + + def test_compare_array(self, data, comparison_op): + other = pd.array([True] * len(data), dtype="boolean") + self._compare_other(data, comparison_op, other) + other = np.array([True] * len(data)) + self._compare_other(data, comparison_op, other) + other = pd.Series([True] * len(data)) + self._compare_other(data, comparison_op, other) + + @pytest.mark.parametrize("other", [True, False, pd.NA]) + def test_scalar(self, other, comparison_op, dtype): + ComparisonOps.test_scalar(self, other, comparison_op, dtype) + + def test_array(self, comparison_op): + op = comparison_op + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + + result = op(a, b) + + values = op(a._data, b._data) + mask = a._mask | b._mask + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = None + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py new file mode 100644 index 00000000..d26eea19 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -0,0 +1,326 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.core.arrays.boolean import coerce_to_array + + +def test_boolean_array_constructor(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.tolist(), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, mask.tolist()) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.astype(int), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, None) + + with pytest.raises(ValueError, match="values.shape must match mask.shape"): + BooleanArray(values.reshape(1, -1), mask) + + with pytest.raises(ValueError, match="values.shape must match mask.shape"): + BooleanArray(values, mask.reshape(1, -1)) + + +def test_boolean_array_constructor_copy(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = BooleanArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +def test_to_boolean_array(): + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, False]) + ) + + result = pd.array([True, False, True], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True]), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, True]) + ) + + result = pd.array([True, False, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_all_none(): + expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) + + result = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]), + ([True, np.nan], [True, None]), + ([True, pd.NA], [True, None]), + ([np.nan, np.nan], [None, None]), + (np.array([np.nan, np.nan], dtype=float), [None, None]), + ], +) +def test_to_boolean_array_missing_indicators(a, b): + result = pd.array(a, dtype="boolean") + expected = pd.array(b, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + ["1", "2"], + # "foo", + [1, 2], + [1.0, 2.0], + pd.date_range("20130101", periods=2), + np.array(["foo"]), + np.array([1, 2]), + np.array([1.0, 2.0]), + [np.nan, {"a": 1}], + ], +) +def test_to_boolean_array_error(values): + # error in converting existing arrays to BooleanArray + msg = "Need to pass bool-like value" + with pytest.raises(TypeError, match=msg): + pd.array(values, dtype="boolean") + + +def test_to_boolean_array_from_integer_array(): + result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_float_array(): + result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_integer_like(): + # integers of 0's and 1's + result = pd.array([1, 0, 1, 0], dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array([1, 0, 1, None], dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_coerce_to_array(): + # TODO this is currently not public API + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is values + assert result._mask is mask + result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is not values + assert result._mask is not mask + + # mixed missing from values and mask + values = [True, False, None, False] + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray( + np.array([True, False, True, True]), np.array([False, False, True, True]) + ) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask)) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) + tm.assert_extension_array_equal(result, expected) + + # raise errors for wrong dimension + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + # passing 2D values is OK as long as no mask + coerce_to_array(values.reshape(1, -1)) + + with pytest.raises(ValueError, match="values.shape and mask.shape must match"): + coerce_to_array(values.reshape(1, -1), mask=mask) + + with pytest.raises(ValueError, match="values.shape and mask.shape must match"): + coerce_to_array(values, mask=mask.reshape(1, -1)) + + +def test_coerce_to_array_from_boolean_array(): + # passing BooleanArray to coerce_to_array + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + arr = BooleanArray(values, mask) + result = BooleanArray(*coerce_to_array(arr)) + tm.assert_extension_array_equal(result, arr) + # no copy + assert result._data is arr._data + assert result._mask is arr._mask + + result = BooleanArray(*coerce_to_array(arr), copy=True) + tm.assert_extension_array_equal(result, arr) + assert result._data is not arr._data + assert result._mask is not arr._mask + + with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): + coerce_to_array(arr, mask=mask) + + +def test_coerce_to_numpy_array(): + # with missing values -> object dtype + arr = pd.array([True, False, None], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # also with no missing values -> object dtype + arr = pd.array([True, False, True], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # force bool dtype + result = np.array(arr, dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + # with missing values will raise error + arr = pd.array([True, False, None], dtype="boolean") + msg = ( + "cannot convert to 'bool'-dtype NumPy array with missing values. " + "Specify an appropriate 'na_value' for this dtype." + ) + with pytest.raises(ValueError, match=msg): + np.array(arr, dtype="bool") + + +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings( + np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object) + ) + expected = BooleanArray( + np.array([True, False, True, True, False, False, False]), + np.array([False, False, False, False, False, False, True]), + ) + + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_strings_invalid_string(): + with pytest.raises(ValueError, match="cannot be cast"): + BooleanArray._from_sequence_of_strings(["donkey"]) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + # default (with or without missing values) -> object dtype + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype="str") + expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5") + tm.assert_numpy_array_equal(result, expected) + + # no missing values -> can convert to bool, otherwise raises + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): + result = arr.to_numpy(dtype="bool") + + # specify dtype and na_value + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([True, False, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([True, False, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([1, 0, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # converting to int or float without specifying na_value raises + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + arr.to_numpy(dtype="int64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + arr.to_numpy(dtype="float64") + + +def test_to_numpy_copy(): + # to_numpy can be zero-copy if no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool) + result[0] = False + tm.assert_extension_array_equal( + arr, pd.array([False, False, True], dtype="boolean") + ) + + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool, copy=True) + result[0] = False + tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py new file mode 100644 index 00000000..8e9112b5 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_function.py @@ -0,0 +1,126 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] +) +def test_ufuncs_binary(ufunc): + # two BooleanArrays + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a, a) + expected = pd.array(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s, a) + expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + # Boolean with numpy array + arr = np.array([True, True, False]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a._data, arr), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # BooleanArray with scalar + result = ufunc(a, True) + expected = pd.array(ufunc(a._data, True), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(True, a) + expected = pd.array(ufunc(True, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # not handled types + msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__" + with pytest.raises(TypeError, match=msg): + ufunc(a, "test") + + +@pytest.mark.parametrize("ufunc", [np.logical_not]) +def test_ufuncs_unary(ufunc): + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a) + expected = pd.array(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + ser = pd.Series(a) + result = ufunc(ser) + expected = pd.Series(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + +def test_ufunc_numeric(): + # np.sqrt on np.bool returns float16, which we upcast to Float32 + # bc we do not have Float16 + arr = pd.array([True, False, None], dtype="boolean") + + res = np.sqrt(arr) + + expected = pd.array([1, 0, None], dtype="Float32") + tm.assert_extension_array_equal(res, expected) + + +@pytest.mark.parametrize("values", [[True, False], [True, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values, dtype="boolean") + + res = np.add.reduce(arr) + if arr[-1] is pd.NA: + expected = pd.NA + else: + expected = arr._data.sum() + tm.assert_almost_equal(res, expected) + + +def test_value_counts_na(): + arr = pd.array([True, False, pd.NA], dtype="boolean") + result = arr.value_counts(dropna=False) + expected = pd.Series([1, 1, 1], index=arr, dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + ser = pd.Series([True, False, pd.NA], dtype="boolean") + result = ser.value_counts(normalize=True) + expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2 + assert expected.index.dtype == "boolean" + tm.assert_series_equal(result, expected) + + +def test_diff(): + a = pd.array( + [True, True, False, False, True, None, True, None, False], dtype="boolean" + ) + result = pd.core.algorithms.diff(a, 1) + expected = pd.array( + [None, False, True, False, True, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + ser = pd.Series(a) + result = ser.diff() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/boolean/test_indexing.py b/pandas/tests/arrays/boolean/test_indexing.py new file mode 100644 index 00000000..6a7daea1 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_indexing.py @@ -0,0 +1,13 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) +def test_setitem_missing_values(na): + arr = pd.array([True, False, None], dtype="boolean") + expected = pd.array([True, None, None], dtype="boolean") + arr[1] = na + tm.assert_extension_array_equal(arr, expected) diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py new file mode 100644 index 00000000..66c117ea --- /dev/null +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -0,0 +1,254 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.core.ops.mask_ops import ( + kleene_and, + kleene_or, + kleene_xor, +) +from pandas.tests.extension.base import BaseOpsUtil + + +class TestLogicalOps(BaseOpsUtil): + def test_numpy_scalars_ok(self, all_logical_operators): + a = pd.array([True, False, None], dtype="boolean") + op = getattr(a, all_logical_operators) + + tm.assert_extension_array_equal(op(True), op(np.bool_(True))) + tm.assert_extension_array_equal(op(False), op(np.bool_(False))) + + def get_op_from_name(self, op_name): + short_opname = op_name.strip("_") + short_opname = short_opname if "xor" in short_opname else short_opname + "_" + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + def test_empty_ok(self, all_logical_operators): + a = pd.array([], dtype="boolean") + op_name = all_logical_operators + result = getattr(a, op_name)(True) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(False) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(pd.NA) + tm.assert_extension_array_equal(a, result) + + @pytest.mark.parametrize( + "other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)] + ) + def test_eq_mismatched_type(self, other): + # GH-44499 + arr = pd.array([True, False]) + result = arr == other + expected = pd.array([False, False]) + tm.assert_extension_array_equal(result, expected) + + result = arr != other + expected = pd.array([True, True]) + tm.assert_extension_array_equal(result, expected) + + def test_logical_length_mismatch_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Lengths must match" + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)([True, False]) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.array([True, False])) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(pd.array([True, False], dtype="boolean")) + + def test_logical_nan_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Got float instead" + + with pytest.raises(TypeError, match=msg): + getattr(a, op_name)(np.nan) + + @pytest.mark.parametrize("other", ["a", 1]) + def test_non_bool_or_na_other_raises(self, other, all_logical_operators): + a = pd.array([True, False], dtype="boolean") + with pytest.raises(TypeError, match=str(type(other).__name__)): + getattr(a, all_logical_operators)(other) + + def test_kleene_or(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a | b + expected = pd.array( + [True, True, True, True, False, None, True, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [True, None, None]), + (True, [True, True, True]), + (np.bool_(True), [True, True, True]), + (False, [True, False, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + # TODO: test True & False + a = pd.array([True, False, None], dtype="boolean") + result = a | other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_and(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a & b + expected = pd.array( + [True, False, None, False, False, False, None, False, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a & other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_xor(self): + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a ^ b + expected = pd.array( + [False, True, None, True, False, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, None, None]), + (True, [False, True, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a ^ other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + @pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3]) + def test_no_masked_assumptions(self, other, all_logical_operators): + # The logical operations should not assume that masked values are False! + a = pd.arrays.BooleanArray( + np.array([True, True, True, False, False, False, True, False, True]), + np.array([False] * 6 + [True, True, True]), + ) + b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + if isinstance(other, list): + other = pd.array(other, dtype="boolean") + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + if isinstance(other, BooleanArray): + other._data[other._mask] = True + a._data[a._mask] = False + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and]) +def test_error_both_scalar(operation): + msg = r"Either `left` or `right` need to be a np\.ndarray." + with pytest.raises(TypeError, match=msg): + # masks need to be non-None, otherwise it ends up in an infinite recursion + operation(True, True, np.zeros(1), np.zeros(1)) diff --git a/pandas/tests/arrays/boolean/test_ops.py b/pandas/tests/arrays/boolean/test_ops.py new file mode 100644 index 00000000..95ebe852 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_ops.py @@ -0,0 +1,27 @@ +import pandas as pd +import pandas._testing as tm + + +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + def test_abs(self): + # matching numpy behavior, abs is the identity function + arr = pd.array([True, False, None], dtype="boolean") + result = abs(arr) + + tm.assert_extension_array_equal(result, arr) diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py new file mode 100644 index 00000000..f3807df9 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest + +import pandas as pd + + +@pytest.fixture +def data(): + """Fixture returning boolean array, with valid and missing values.""" + return pd.array( + [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], + dtype="boolean", + ) + + +@pytest.mark.parametrize( + "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", + [ + ([True, pd.NA], True, True, True, pd.NA), + ([False, pd.NA], False, False, pd.NA, False), + ([pd.NA], False, True, pd.NA, pd.NA), + ([], False, True, False, True), + # GH-33253: all True / all False values buggy with skipna=False + ([True, True], True, True, True, True), + ([False, False], False, False, False, False), + ], +) +def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): + # the methods return numpy scalars + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + + for con in [pd.array, pd.Series]: + a = con(values, dtype="boolean") + assert a.any() is exp_any + assert a.all() is exp_all + assert a.any(skipna=False) is exp_any_noskip + assert a.all(skipna=False) is exp_all_noskip + + assert np.any(a.any()) is exp_any + assert np.all(a.all()) is exp_all + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_reductions_return_types(dropna, data, all_numeric_reductions): + op = all_numeric_reductions + s = pd.Series(data) + if dropna: + s = s.dropna() + + if op == "sum": + assert isinstance(getattr(s, op)(), np.int_) + elif op == "prod": + assert isinstance(getattr(s, op)(), np.int_) + elif op in ("min", "max"): + assert isinstance(getattr(s, op)(), np.bool_) + else: + # "mean", "std", "var", "median", "kurt", "skew" + assert isinstance(getattr(s, op)(), np.float64) diff --git a/pandas/tests/arrays/boolean/test_repr.py b/pandas/tests/arrays/boolean/test_repr.py new file mode 100644 index 00000000..0ee904b1 --- /dev/null +++ b/pandas/tests/arrays/boolean/test_repr.py @@ -0,0 +1,13 @@ +import pandas as pd + + +def test_repr(): + df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) + expected = " A\n0 True\n1 False\n2 " + assert repr(df) == expected + + expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" + assert repr(df.A) == expected + + expected = "\n[True, False, ]\nLength: 3, dtype: boolean" + assert repr(df.A.array) == expected diff --git a/pandas/tests/arrays/categorical/__init__.py b/pandas/tests/arrays/categorical/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/categorical/conftest.py b/pandas/tests/arrays/categorical/conftest.py new file mode 100644 index 00000000..d5b49e3e --- /dev/null +++ b/pandas/tests/arrays/categorical/conftest.py @@ -0,0 +1,15 @@ +import pytest + +from pandas import Categorical + + +@pytest.fixture(params=[True, False]) +def allow_fill(request): + """Boolean 'allow_fill' parameter for Categorical.take""" + return request.param + + +@pytest.fixture +def factor(): + """Fixture returning a Categorical object""" + return Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py new file mode 100644 index 00000000..5b0004a3 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -0,0 +1,83 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("ordered", [True, False]) +@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]]) +def test_factorize(categories, ordered): + cat = pd.Categorical( + ["b", "b", "a", "c", None], categories=categories, ordered=ordered + ) + codes, uniques = pd.factorize(cat) + expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a", "c"], categories=categories, ordered=ordered + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort(): + cat = pd.Categorical(["b", "b", None, "a"]) + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([1, 1, -1, 0], dtype=np.intp) + expected_uniques = pd.Categorical(["a", "b"]) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort_ordered(): + cat = pd.Categorical( + ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True + ) + + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([0, 0, -1, 1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a"], categories=["c", "b", "a"], ordered=True + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_isin_cats(): + # GH2003 + cat = pd.Categorical(["a", "b", np.nan]) + + result = cat.isin(["a", np.nan]) + expected = np.array([True, False, True], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + result = cat.isin(["a", "c"]) + expected = np.array([True, False, False], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + +@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) +def test_isin_empty(empty): + s = pd.Categorical(["a", "b"]) + expected = np.array([False, False], dtype=bool) + + result = s.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + +def test_diff(): + s = pd.Series([1, 2, 3], dtype="category") + with tm.assert_produces_warning(FutureWarning): + result = s.diff() + expected = pd.Series([np.nan, 1, 1]) + tm.assert_series_equal(result, expected) + + expected = expected.to_frame(name="A") + df = s.to_frame(name="A") + with tm.assert_produces_warning(FutureWarning): + result = df.diff() + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py new file mode 100644 index 00000000..1a8dbe25 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -0,0 +1,384 @@ +import re +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +from pandas import ( + Categorical, + CategoricalDtype, + Index, + NaT, + Series, + date_range, +) +import pandas._testing as tm +from pandas.api.types import is_scalar + + +class TestCategoricalAnalytics: + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_not_ordered_raises(self, aggregation): + # unordered cats have no min/max + cat = Categorical(["a", "b", "c", "d"], ordered=False) + msg = f"Categorical is not ordered for operation {aggregation}" + agg_func = getattr(cat, aggregation) + + with pytest.raises(TypeError, match=msg): + agg_func() + + ufunc = np.minimum if aggregation == "min" else np.maximum + with pytest.raises(TypeError, match=msg): + ufunc.reduce(cat) + + def test_min_max_ordered(self, index_or_series_or_array): + cat = Categorical(["a", "b", "c", "d"], ordered=True) + obj = index_or_series_or_array(cat) + _min = obj.min() + _max = obj.max() + assert _min == "a" + assert _max == "d" + + assert np.minimum.reduce(obj) == "a" + assert np.maximum.reduce(obj) == "d" + # TODO: raises if we pass axis=0 (on Index and Categorical, not Series) + + cat = Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + obj = index_or_series_or_array(cat) + _min = obj.min() + _max = obj.max() + assert _min == "d" + assert _max == "a" + assert np.minimum.reduce(obj) == "d" + assert np.maximum.reduce(obj) == "a" + + @pytest.mark.parametrize( + "categories,expected", + [ + (list("ABC"), np.NaN), + ([1, 2, 3], np.NaN), + pytest.param( + Series(date_range("2020-01-01", periods=3), dtype="category"), + NaT, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/29962" + ), + ), + ], + ) + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_ordered_empty(self, categories, expected, aggregation): + # GH 30227 + cat = Categorical([], categories=categories, ordered=True) + + agg_func = getattr(cat, aggregation) + result = agg_func() + assert result is expected + + @pytest.mark.parametrize( + "values, categories", + [(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])], + ) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("function", ["min", "max"]) + def test_min_max_with_nan(self, values, categories, function, skipna): + # GH 25303 + cat = Categorical(values, categories=categories, ordered=True) + result = getattr(cat, function)(skipna=skipna) + + if skipna is False: + assert result is np.nan + else: + expected = categories[0] if function == "min" else categories[2] + assert result == expected + + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_only_nan(self, function, skipna): + # https://github.com/pandas-dev/pandas/issues/33450 + cat = Categorical([np.nan], categories=[1, 2], ordered=True) + result = getattr(cat, function)(skipna=skipna) + assert result is np.nan + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_raises(self, method): + cat = Categorical(["a", "b", "c", "b"], ordered=False) + msg = ( + f"Categorical is not ordered for operation {method}\n" + "you can use .as_ordered() to change the Categorical to an ordered one" + ) + method = getattr(np, method) + with pytest.raises(TypeError, match=re.escape(msg)): + method(cat) + + @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + msg = ( + f"the '{kwarg}' parameter is not supported in the pandas implementation " + f"of {method}" + ) + if kwarg == "axis": + msg = r"`axis` must be fewer than the number of dimensions \(1\)" + kwargs = {kwarg: 42} + method = getattr(np, method) + with pytest.raises(ValueError, match=msg): + method(cat, **kwargs) + + @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) + def test_numpy_min_max_axis_equals_none(self, method, expected): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + method = getattr(np, method) + result = method(cat, axis=None) + assert result == expected + + @pytest.mark.parametrize( + "values,categories,exp_mode", + [ + ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), + ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), + ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), + ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ], + ) + def test_mode(self, values, categories, exp_mode): + s = Categorical(values, categories=categories, ordered=True) + msg = "Use Series.mode instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.mode() + exp = Categorical(exp_mode, categories=categories, ordered=True) + tm.assert_categorical_equal(res, exp) + + def test_searchsorted(self, ordered): + # https://github.com/pandas-dev/pandas/issues/8420 + # https://github.com/pandas-dev/pandas/issues/14522 + + cat = Categorical( + ["cheese", "milk", "apple", "bread", "bread"], + categories=["cheese", "milk", "apple", "bread"], + ordered=ordered, + ) + ser = Series(cat) + + # Searching for single item argument, side='left' (default) + res_cat = cat.searchsorted("apple") + assert res_cat == 2 + assert is_scalar(res_cat) + + res_ser = ser.searchsorted("apple") + assert res_ser == 2 + assert is_scalar(res_ser) + + # Searching for single item array, side='left' (default) + res_cat = cat.searchsorted(["bread"]) + res_ser = ser.searchsorted(["bread"]) + exp = np.array([3], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for several items array, side='right' + res_cat = cat.searchsorted(["apple", "bread"], side="right") + res_ser = ser.searchsorted(["apple", "bread"], side="right") + exp = np.array([3, 5], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for a single value that is not from the Categorical + with pytest.raises(TypeError, match="cucumber"): + cat.searchsorted("cucumber") + with pytest.raises(TypeError, match="cucumber"): + ser.searchsorted("cucumber") + + # Searching for multiple values one of each is not from the Categorical + msg = ( + "Cannot setitem on a Categorical with a new category, " + "set the categories first" + ) + with pytest.raises(TypeError, match=msg): + cat.searchsorted(["bread", "cucumber"]) + with pytest.raises(TypeError, match=msg): + ser.searchsorted(["bread", "cucumber"]) + + def test_unique(self, ordered): + # GH38140 + dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered) + + # categories are reordered based on value when ordered=False + cat = Categorical(["a", "b", "c"], dtype=dtype) + res = cat.unique() + tm.assert_categorical_equal(res, cat) + + cat = Categorical(["a", "b", "a", "a"], dtype=dtype) + res = cat.unique() + tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype)) + + cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype) + res = cat.unique() + exp_cat = Categorical(["c", "a", "b"], dtype=dtype) + tm.assert_categorical_equal(res, exp_cat) + + # nan must be removed + cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype) + res = cat.unique() + exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_index_series(self, ordered): + # GH38140 + dtype = CategoricalDtype([3, 2, 1], ordered=ordered) + + c = Categorical([3, 1, 2, 2, 1], dtype=dtype) + # Categorical.unique sorts categories by appearance order + # if ordered=False + exp = Categorical([3, 1, 2], dtype=dtype) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([1, 1, 2, 2], dtype=dtype) + exp = Categorical([1, 2], dtype=dtype) + tm.assert_categorical_equal(c.unique(), exp) + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + def test_shift(self): + # GH 9416 + cat = Categorical(["a", "b", "c", "d", "a"]) + + # shift forward + sp1 = cat.shift(1) + xp1 = Categorical([np.nan, "a", "b", "c", "d"]) + tm.assert_categorical_equal(sp1, xp1) + tm.assert_categorical_equal(cat[:-1], sp1[1:]) + + # shift back + sn2 = cat.shift(-2) + xp2 = Categorical( + ["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"] + ) + tm.assert_categorical_equal(sn2, xp2) + tm.assert_categorical_equal(cat[2:], sn2[:-2]) + + # shift by zero + tm.assert_categorical_equal(cat, cat.shift(0)) + + def test_nbytes(self): + cat = Categorical([1, 2, 3]) + exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories + assert cat.nbytes == exp + + def test_memory_usage(self): + cat = Categorical([1, 2, 3]) + + # .categories is an index, so we include the hashtable + assert 0 < cat.nbytes <= cat.memory_usage() + assert 0 < cat.nbytes <= cat.memory_usage(deep=True) + + cat = Categorical(["foo", "foo", "bar"]) + assert cat.memory_usage(deep=True) > cat.nbytes + + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 + + def test_map(self): + c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) + tm.assert_categorical_equal(result, exp) + + result = c.map(lambda x: 1) + # GH 12766: Return an index not an array + tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) + + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_inplace_raises(self, value): + cat = Categorical(["A", "B", "B", "C", "A"]) + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}" + ) + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning( + FutureWarning, match="Use rename_categories" + ): + cat.set_ordered(value=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning( + FutureWarning, match="Use rename_categories" + ): + cat.as_ordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning( + FutureWarning, match="Use rename_categories" + ): + cat.as_unordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.rename_categories(["X", "Y", "Z"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.add_categories(new_categories=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.remove_categories(removals=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.remove_unused_categories(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.sort_values(inplace=value) + + def test_quantile_empty(self): + # make sure we have correct itemsize on resulting codes + cat = Categorical(["A", "B"]) + idx = Index([0.0, 0.5]) + result = cat[:0]._quantile(idx, interpolation="linear") + assert result._codes.dtype == np.int8 + + expected = cat.take([-1, -1], allow_fill=True) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py new file mode 100644 index 00000000..f0669f52 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_api.py @@ -0,0 +1,579 @@ +import re + +import numpy as np +import pytest + +from pandas.compat import PY311 + +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.arrays.categorical import recode_for_categories + + +class TestCategoricalAPI: + def test_ordered_api(self): + # GH 9347 + cat1 = Categorical(list("acb"), ordered=False) + tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"])) + assert not cat1.ordered + + cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False) + tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"])) + assert not cat2.ordered + + cat3 = Categorical(list("acb"), ordered=True) + tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"])) + assert cat3.ordered + + cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True) + tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"])) + assert cat4.ordered + + def test_set_ordered(self): + msg = ( + "The `inplace` parameter in pandas.Categorical.set_ordered is " + "deprecated and will be removed in a future version. setting " + "ordered-ness on categories will always return a new Categorical object" + ) + cat = Categorical(["a", "b", "c", "a"], ordered=True) + cat2 = cat.as_unordered() + assert not cat2.ordered + cat2 = cat.as_ordered() + assert cat2.ordered + with tm.assert_produces_warning(FutureWarning, match=msg): + cat2.as_unordered(inplace=True) + assert not cat2.ordered + with tm.assert_produces_warning(FutureWarning, match=msg): + cat2.as_ordered(inplace=True) + assert cat2.ordered + + assert cat2.set_ordered(True).ordered + assert not cat2.set_ordered(False).ordered + with tm.assert_produces_warning(FutureWarning, match=msg): + cat2.set_ordered(True, inplace=True) + assert cat2.ordered + with tm.assert_produces_warning(FutureWarning, match=msg): + cat2.set_ordered(False, inplace=True) + assert not cat2.ordered + + # removed in 0.19.0 + msg = ( + "property 'ordered' of 'Categorical' object has no setter" + if PY311 + else "can't set attribute" + ) + with pytest.raises(AttributeError, match=msg): + cat.ordered = True + with pytest.raises(AttributeError, match=msg): + cat.ordered = False + + def test_rename_categories(self): + cat = Categorical(["a", "b", "c", "a"]) + + # inplace=False: the old one must not be changed + res = cat.rename_categories([1, 2, 3]) + tm.assert_numpy_array_equal( + res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(res.categories, Index([1, 2, 3])) + + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + exp_cat = Index(["a", "b", "c"]) + tm.assert_index_equal(cat.categories, exp_cat) + + # GH18862 (let rename_categories take callables) + result = cat.rename_categories(lambda x: x.upper()) + expected = Categorical(["A", "B", "C", "A"]) + tm.assert_categorical_equal(result, expected) + + # and now inplace + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.rename_categories([1, 2, 3], inplace=True) + + assert res is None + tm.assert_numpy_array_equal( + cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_rename_categories_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items as the " + "old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.rename_categories(new_categories) + + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + c = Categorical(["a", "b"]) + result = c.rename_categories(Series([0, 1], index=["a", "b"])) + expected = Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_rename_categories_dict(self): + # GH 17336 + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + + # Test for inplace + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True) + + assert res is None + tm.assert_index_equal(cat.categories, expected) + + # Test for dicts of smaller length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "c": 3}) + + expected = Index([1, "b", 3, "d"]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with bigger length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with no items from old categories + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"f": 1, "g": 3}) + + expected = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(res.categories, expected) + + def test_reorder_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True + ) + + # first inplace == False + res = cat.reorder_categories(["c", "b", "a"]) + # cat must be the same as before + tm.assert_categorical_equal(cat, old) + # only res is changed + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.reorder_categories(["c", "b", "a"], inplace=True) + + assert res is None + tm.assert_categorical_equal(cat, new) + + @pytest.mark.parametrize( + "new_categories", + [ + ["a"], # not all "old" included in "new" + ["a", "b", "d"], # still not all "old" in "new" + ["a", "b", "c", "d"], # all "old" included in "new", but too long + ], + ) + def test_reorder_categories_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + msg = "items in new_categories are not the same as in old categories" + with pytest.raises(ValueError, match=msg): + cat.reorder_categories(new_categories) + + def test_add_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True + ) + + # first inplace == False + res = cat.add_categories("d") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.add_categories(["d"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.add_categories("d", inplace=True) + + tm.assert_categorical_equal(cat, new) + assert res is None + + # GH 9927 + cat = Categorical(list("abc"), ordered=True) + expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) + # test with Series, np.array, index, list + res = cat.add_categories(Series(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(np.array(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(Index(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(["d", "e"]) + tm.assert_categorical_equal(res, expected) + + def test_add_categories_existing_raises(self): + # new is in old categories + cat = Categorical(["a", "b", "c", "d"], ordered=True) + msg = re.escape("new categories must not include old categories: {'d'}") + with pytest.raises(ValueError, match=msg): + cat.add_categories(["d"]) + + def test_set_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.set_categories(["c", "b", "a"], inplace=True) + + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + assert res is None + + res = cat.set_categories(["a", "b", "c"]) + # cat must be the same as before + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + # only res is changed + exp_categories_back = Index(["a", "b", "c"]) + tm.assert_index_equal(res.categories, exp_categories_back) + tm.assert_numpy_array_equal(res.__array__(), exp_values) + + # not all "old" included in "new" -> all not included ones are now + # np.nan + cat = Categorical(["a", "b", "c", "a"], ordered=True) + res = cat.set_categories(["a"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # still not all "old" in "new" + res = cat.set_categories(["a", "b", "d"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) + tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) + + # all "old" included in "new" + cat = cat.set_categories(["a", "b", "c", "d"]) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(cat.categories, exp_categories) + + # internals... + c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) + tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) + + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(np.asarray(c), exp) + + # all "pointers" to '4' must be changed from 3 to 0,... + c = c.set_categories([4, 3, 2, 1]) + + # positions are changed + tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) + + # categories are now in new order + tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) + + # output is the same + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(np.asarray(c), exp) + assert c.min() == 4 + assert c.max() == 1 + + # set_categories should set the ordering if specified + c2 = c.set_categories([4, 3, 2, 1], ordered=False) + assert not c2.ordered + + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) + + # set_categories should pass thru the ordering + c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) + assert not c2.ordered + + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) + + def test_to_dense_deprecated(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + + with tm.assert_produces_warning(FutureWarning): + cat.to_dense() + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_categories_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c.set_categories(new_categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_rename_less(self): + # GH 24675 + cat = Categorical(["A", "B"]) + result = cat.set_categories(["A"], rename=True) + expected = Categorical(["A", np.nan]) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_private(self): + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"]) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + # fastpath + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"], fastpath=True) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + def test_remove_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) + + # first inplace == False + res = cat.remove_categories("c") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.remove_categories(["c"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = cat.remove_categories("c", inplace=True) + + tm.assert_categorical_equal(cat, new) + assert res is None + + @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) + def test_remove_categories_raises(self, removals): + cat = Categorical(["a", "b", "a"]) + message = re.escape("removals must all be in old categories: {'c'}") + + with pytest.raises(ValueError, match=message): + cat.remove_categories(removals) + + def test_remove_unused_categories(self): + c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) + exp_categories_all = Index(["a", "b", "c", "d", "e"]) + exp_categories_dropped = Index(["a", "b", "c", "d"]) + + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, exp_categories_dropped) + tm.assert_index_equal(c.categories, exp_categories_all) + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + res = c.remove_unused_categories(inplace=True) + + tm.assert_index_equal(c.categories, exp_categories_dropped) + assert res is None + + # with NaN values (GH11599) + c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"]))) + exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(res.codes, exp_codes) + tm.assert_index_equal(c.categories, exp_categories_all) + + val = ["F", np.nan, "D", "B", "D", "F", np.nan] + cat = Categorical(values=val, categories=list("ABCDEFG")) + out = cat.remove_unused_categories() + tm.assert_index_equal(out.categories, Index(["B", "D", "F"])) + exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(out.codes, exp_codes) + assert out.tolist() == val + + alpha = list("abcdefghijklmnopqrstuvwxyz") + val = np.random.choice(alpha[::2], 10000).astype("object") + val[np.random.choice(len(val), 100)] = np.nan + + cat = Categorical(values=val, categories=alpha) + out = cat.remove_unused_categories() + assert out.tolist() == val.tolist() + + +class TestCategoricalAPIWithFactor: + def test_describe(self, factor): + # string type + desc = factor.describe() + assert factor.ordered + exp_index = CategoricalIndex( + ["a", "b", "c"], name="categories", ordered=factor.ordered + ) + expected = DataFrame( + {"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index + ) + tm.assert_frame_equal(desc, expected) + + # check unused categories + cat = factor.copy() + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["a", "b", "c", "d"], inplace=True) + + desc = cat.describe() + + exp_index = CategoricalIndex( + list("abcd"), ordered=factor.ordered, name="categories" + ) + expected = DataFrame( + {"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # check an integer one + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + desc = cat.describe() + exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories") + expected = DataFrame( + {"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # https://github.com/pandas-dev/pandas/issues/3678 + # describe should work with NaN + cat = Categorical([np.nan, 1, 2, 2]) + desc = cat.describe() + expected = DataFrame( + {"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]}, + index=CategoricalIndex( + [1, 2, np.nan], categories=[1, 2], name="categories" + ), + ) + tm.assert_frame_equal(desc, expected) + + def test_set_categories_inplace(self, factor): + cat = factor.copy() + + with tm.assert_produces_warning(FutureWarning): + # issue #37643 inplace kwarg deprecated + cat.set_categories(["a", "b", "c", "d"], inplace=True) + + tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"])) + + def test_codes_setter_deprecated(self): + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + new_codes = cat._codes + 1 + with tm.assert_produces_warning(FutureWarning): + # GH#40606 + cat._codes = new_codes + + assert cat._codes is new_codes + + +class TestPrivateCategoricalAPI: + def test_codes_immutable(self): + + # Codes should be read only + c = Categorical(["a", "b", "c", "a", np.nan]) + exp = np.array([0, 1, 2, 0, -1], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + # Assignments to codes should raise + msg = ( + "property 'codes' of 'Categorical' object has no setter" + if PY311 + else "can't set attribute" + ) + with pytest.raises(AttributeError, match=msg): + c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") + + # changes in the codes array should raise + codes = c.codes + + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[4] = 1 + + # But even after getting the codes, the original array should still be + # writeable! + c[4] = "a" + exp = np.array([0, 1, 2, 0, 0], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + c._codes[4] = 2 + exp = np.array([0, 1, 2, 0, 2], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + @pytest.mark.parametrize( + "codes, old, new, expected", + [ + ([0, 1], ["a", "b"], ["a", "b"], [0, 1]), + ([0, 1], ["b", "a"], ["b", "a"], [0, 1]), + ([0, 1], ["a", "b"], ["b", "a"], [1, 0]), + ([0, 1], ["b", "a"], ["a", "b"], [1, 0]), + ([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]), + ([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]), + ([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]), + ([-1, -1], [], ["a", "b"], [-1, -1]), + ([1, 0], ["b", "a"], ["a", "b"], [0, 1]), + ], + ) + def test_recode_to_categories(self, codes, old, new, expected): + codes = np.asanyarray(codes, dtype=np.int8) + expected = np.asanyarray(expected, dtype=np.int8) + old = Index(old) + new = Index(new) + result = recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + def test_recode_to_categories_large(self): + N = 1000 + codes = np.arange(N) + old = Index(codes) + expected = np.arange(N - 1, -1, -1, dtype=np.int16) + new = Index(expected) + result = recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py new file mode 100644 index 00000000..205e3950 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -0,0 +1,99 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + NaT, + Timestamp, + array, + to_datetime, +) +import pandas._testing as tm + + +class TestAstype: + def test_astype_str_int_categories_to_nullable_int(self): + # GH#39616 + dtype = CategoricalDtype([str(i) for i in range(5)]) + codes = np.random.randint(5, size=20) + arr = Categorical.from_codes(codes, dtype=dtype) + + res = arr.astype("Int64") + expected = array(codes, dtype="Int64") + tm.assert_extension_array_equal(res, expected) + + def test_astype_str_int_categories_to_nullable_float(self): + # GH#39616 + dtype = CategoricalDtype([str(i / 2) for i in range(5)]) + codes = np.random.randint(5, size=20) + arr = Categorical.from_codes(codes, dtype=dtype) + + res = arr.astype("Float64") + expected = array(codes, dtype="Float64") / 2 + tm.assert_extension_array_equal(res, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype(self, ordered): + # string + cat = Categorical(list("abbaaccc"), ordered=ordered) + result = cat.astype(object) + expected = np.array(cat) + tm.assert_numpy_array_equal(result, expected) + + msg = r"Cannot cast object dtype to float64" + with pytest.raises(ValueError, match=msg): + cat.astype(float) + + # numeric + cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) + result = cat.astype(object) + expected = np.array(cat, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(int) + expected = np.array(cat, dtype="int") + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(float) + expected = np.array(cat, dtype=float) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("cat_ordered", [True, False]) + def test_astype_category(self, dtype_ordered, cat_ordered): + # GH#10696/GH#18593 + data = list("abcaacbab") + cat = Categorical(data, categories=list("bac"), ordered=cat_ordered) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered) + tm.assert_categorical_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(list("adc"), dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, dtype=dtype) + tm.assert_categorical_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = cat.astype("category") + expected = cat + tm.assert_categorical_equal(result, expected) + + def test_astype_object_datetime_categories(self): + # GH#40754 + cat = Categorical(to_datetime(["2021-03-27", NaT])) + result = cat.astype(object) + expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_astype_object_timestamp_categories(self): + # GH#18024 + cat = Categorical([Timestamp("2014-01-01")]) + result = cat.astype(object) + expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py new file mode 100644 index 00000000..24f4dd4a --- /dev/null +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -0,0 +1,772 @@ +from datetime import ( + date, + datetime, +) + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +class TestCategoricalConstructors: + def test_categorical_scalar_deprecated(self): + # GH#38433 + with tm.assert_produces_warning(FutureWarning): + Categorical("A", categories=["A", "B"]) + + def test_categorical_1d_only(self): + # ndim > 1 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + Categorical(np.array([list("abcd")])) + + def test_validate_ordered(self): + # see gh-14058 + exp_msg = "'ordered' must either be 'True' or 'False'" + exp_err = TypeError + + # This should be a boolean. + ordered = np.array([0, 1, 2]) + + with pytest.raises(exp_err, match=exp_msg): + Categorical([1, 2, 3], ordered=ordered) + + with pytest.raises(exp_err, match=exp_msg): + Categorical.from_codes( + [0, 0, 1], categories=["a", "b", "c"], ordered=ordered + ) + + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + + def test_constructor_empty_boolean(self): + # see gh-22702 + cat = Categorical([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_constructor_tuples(self): + values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) + result = Categorical(values) + expected = Index([(1,), (1, 2)], tupleize_cols=False) + tm.assert_index_equal(result.categories, expected) + assert result.ordered is False + + def test_constructor_tuples_datetimes(self): + # numpy will auto reshape when all of the tuples are the + # same len, so add an extra one with 2 items and slice it off + values = np.array( + [ + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + ("a", "b"), + ], + dtype=object, + )[:-1] + result = Categorical(values) + expected = Index( + [(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)], + tupleize_cols=False, + ) + tm.assert_index_equal(result.categories, expected) + + def test_constructor_unsortable(self): + + # it works! + arr = np.array([1, 2, 3, datetime.now()], dtype="O") + factor = Categorical(arr, ordered=False) + assert not factor.ordered + + # this however will raise as cannot be sorted + msg = ( + "'values' is not ordered, please explicitly specify the " + "categories order by passing in a categories argument." + ) + with pytest.raises(TypeError, match=msg): + Categorical(arr, ordered=True) + + def test_constructor_interval(self): + result = Categorical( + [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True + ) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) + exp = Categorical(ii, ordered=True) + tm.assert_categorical_equal(result, exp) + tm.assert_index_equal(result.categories, ii) + + def test_constructor(self): + + exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) + c1 = Categorical(exp_arr) + tm.assert_numpy_array_equal(c1.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["c", "b", "a"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + + # categories must be unique + msg = "Categorical categories must be unique" + with pytest.raises(ValueError, match=msg): + Categorical([1, 2], [1, 2, 2]) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ["a", "b", "b"]) + + # The default should be unordered + c1 = Categorical(["a", "b", "c", "a"]) + assert not c1.ordered + + # Categorical as input + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) + tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) + + # Series of dtype category + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + # Series + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(Series(["a", "b", "c", "a"])) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(c1, c2) + + # This should result in integer categories, not float! + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # https://github.com/pandas-dev/pandas/issues/3678 + cat = Categorical([np.nan, 1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # this should result in floats + cat = Categorical([np.nan, 1, 2.0, 3]) + assert is_float_dtype(cat.categories) + + cat = Categorical([np.nan, 1.0, 2.0, 3.0]) + assert is_float_dtype(cat.categories) + + # This doesn't work -> this would probably need some kind of "remember + # the original type" feature to try to cast the array interface result + # to... + + # vals = np.asarray(cat[cat.notna()]) + # assert is_integer_dtype(vals) + + # corner cases + cat = Categorical([1]) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + cat = Categorical(["a"]) + assert len(cat.categories) == 1 + assert cat.categories[0] == "a" + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + with tm.assert_produces_warning(FutureWarning): + # GH#38433 + cat = Categorical(1) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + # two arrays + # - when the first is an integer dtype and the second is not + # - when the resulting codes are all -1/NaN + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) + + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) + + # the next one are from the old docs + with tm.assert_produces_warning(None): + Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) + cat = Categorical([1, 2], categories=[1, 2, 3]) + + # this is a legitimate constructor + with tm.assert_produces_warning(None): + Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True) + + def test_constructor_with_existing_categories(self): + # GH25318: constructing with pd.Series used to bogusly skip recoding + # categories + c0 = Categorical(["a", "b", "c", "a"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) + + c2 = Categorical(c0, categories=c1.categories) + tm.assert_categorical_equal(c1, c2) + + c3 = Categorical(Series(c0), categories=c1.categories) + tm.assert_categorical_equal(c1, c3) + + def test_constructor_not_sequence(self): + # https://github.com/pandas-dev/pandas/issues/16022 + msg = r"^Parameter 'categories' must be list-like, was" + with pytest.raises(TypeError, match=msg): + Categorical(["a", "b"], categories="a") + + def test_constructor_with_null(self): + + # Cannot have NaN in categories + msg = "Categorical categories cannot be null" + with pytest.raises(ValueError, match=msg): + Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical( + DatetimeIndex(["nat", "20160101"]), + categories=[NaT, Timestamp("20160101")], + ) + + def test_constructor_with_index(self): + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal(ci.values, Categorical(ci)) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal( + ci.values, Categorical(ci.astype(object), categories=ci.categories) + ) + + def test_constructor_with_generator(self): + # This was raising an Error in isna(single_val).any() because isna + # returned a scalar for a generator + + exp = Categorical([0, 1, 2]) + cat = Categorical(x for x in [0, 1, 2]) + tm.assert_categorical_equal(cat, exp) + cat = Categorical(range(3)) + tm.assert_categorical_equal(cat, exp) + + MultiIndex.from_product([range(5), ["a", "b", "c"]]) + + # check that categories accept generators and sequences + cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical([0, 1, 2], categories=range(3)) + tm.assert_categorical_equal(cat, exp) + + def test_constructor_with_rangeindex(self): + # RangeIndex is preserved in Categories + rng = Index(range(3)) + + cat = Categorical(rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + + cat = Categorical([1, 2, 0], categories=rng) + tm.assert_index_equal(cat.categories, rng, exact=True) + + @pytest.mark.parametrize( + "dtl", + [ + date_range("1995-01-01 00:00:00", periods=5, freq="s"), + date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), + timedelta_range("1 day", periods=5, freq="s"), + ], + ) + def test_constructor_with_datetimelike(self, dtl): + # see gh-12077 + # constructor with a datetimelike and NaT + + s = Series(dtl) + c = Categorical(s) + + expected = type(dtl)(s) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) + + # with NaT + s2 = s.copy() + s2.iloc[-1] = NaT + c = Categorical(s2) + + expected = type(dtl)(s2.dropna()) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + + exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) + tm.assert_numpy_array_equal(c.codes, exp) + + result = repr(c) + assert "NaT" in result + + def test_constructor_from_index_series_datetimetz(self): + idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx = idx._with_freq(None) # freq not preserved in result.categories + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_date_objects(self): + # we dont cast date objects to timestamps, matching Index constructor + v = date.today() + + cat = Categorical([v, v]) + assert cat.categories.dtype == object + assert type(cat.categories[0]) is date + + def test_constructor_from_index_series_timedelta(self): + idx = timedelta_range("1 days", freq="D", periods=3) + idx = idx._with_freq(None) # freq not preserved in result.categories + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_period(self): + idx = period_range("2015-01-01", freq="D", periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + @pytest.mark.parametrize( + "values", + [ + np.array([1.0, 1.2, 1.8, np.nan]), + np.array([1, 2, 3], dtype="int64"), + ["a", "b", "c", np.nan], + [pd.Period("2014-01"), pd.Period("2014-02"), NaT], + [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], + [ + Timestamp("2014-01-01", tz="US/Eastern"), + Timestamp("2014-01-02", tz="US/Eastern"), + NaT, + ], + ], + ) + def test_constructor_invariant(self, values): + # GH 14190 + c = Categorical(values) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_with_dtype(self, ordered): + categories = ["b", "a", "c"] + dtype = CategoricalDtype(categories, ordered=ordered) + result = Categorical(["a", "b", "a", "c"], dtype=dtype) + expected = Categorical( + ["a", "b", "a", "c"], categories=categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + assert result.ordered is ordered + + def test_constructor_dtype_and_others_raises(self): + dtype = CategoricalDtype(["a", "b"], ordered=True) + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=True, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=False, dtype=dtype) + + @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_str_category(self, categories, ordered): + result = Categorical( + ["a", "b"], categories=categories, ordered=ordered, dtype="category" + ) + expected = Categorical(["a", "b"], categories=categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_constructor_str_unknown(self): + with pytest.raises(ValueError, match="Unknown dtype"): + Categorical([1, 2], dtype="foo") + + def test_constructor_np_strs(self): + # GH#31499 Hastable.map_locations needs to work on np.str_ objects + cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) + assert all(isinstance(x, np.str_) for x in cat.categories) + + def test_constructor_from_categorical_with_dtype(self): + dtype = CategoricalDtype(["a", "b", "c"], ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use dtype.categories, not values.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_with_unknown_dtype(self): + dtype = CategoricalDtype(None, ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use values.categories, not dtype.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "d"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_string(self): + values = Categorical(["a", "b", "d"]) + # use categories, ordered + result = Categorical( + values, categories=["a", "b", "c"], ordered=True, dtype="category" + ) + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + # No string + result = Categorical(values, categories=["a", "b", "c"], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_with_categorical_categories(self): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) + def test_construction_with_null(self, klass, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/31927 + values = klass(["a", nulls_fixture, "b"]) + result = Categorical(values) + + dtype = CategoricalDtype(["a", "b"]) + codes = [0, -1, 1] + expected = Categorical.from_codes(codes=codes, dtype=dtype) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype): + # GH#39649 + cats = pd.array(range(5), dtype=any_numeric_ea_dtype) + codes = np.random.randint(5, size=3) + dtype = CategoricalDtype(cats) + arr = Categorical.from_codes(codes, dtype=dtype) + assert arr.categories.dtype == cats.dtype + tm.assert_index_equal(arr.categories, Index(cats)) + + def test_from_codes_empty(self): + cat = ["a", "b", "c"] + result = Categorical.from_codes([], categories=cat) + expected = Categorical([], categories=cat) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_too_few_categories(self): + dtype = CategoricalDtype(categories=[1, 2]) + msg = "codes need to be between " + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], dtype=dtype) + + def test_from_codes_non_int_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], dtype=dtype) + + def test_from_codes_non_unique_categories(self): + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) + + def test_from_codes_nan_cat_included(self): + with pytest.raises(ValueError, match="Categorical categories cannot be null"): + Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) + + def test_from_codes_too_negative(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) + msg = r"codes need to be between -1 and len\(categories\)-1" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], dtype=dtype) + + def test_from_codes(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) + exp = Categorical(["a", "b", "c"], ordered=False) + res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) + tm.assert_categorical_equal(exp, res) + + res = Categorical.from_codes([0, 1, 2], dtype=dtype) + tm.assert_categorical_equal(exp, res) + + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_categorical_categories(self, klass): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_non_unique_categorical_categories(self, klass): + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1], klass(["a", "b", "a"])) + + def test_from_codes_with_nan_code(self): + # GH21767 + codes = [1, 2, np.nan] + dtype = CategoricalDtype(categories=["a", "b", "c"]) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, categories=dtype.categories) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) + + @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) + def test_from_codes_with_float(self, codes): + # GH21767 + # float codes should raise even if values are equal to integers + dtype = CategoricalDtype(categories=["a", "b", "c"]) + + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, dtype=dtype) + + def test_from_codes_with_dtype_raises(self): + msg = "Cannot specify" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"]) + ) + + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"]) + ) + + def test_from_codes_neither(self): + msg = "Both were None" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([0, 1]) + + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "codes cannot contain NA values" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories(self, dtype): + cats = ["a", "b"] + codes = np.array([0, 0, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes(codes, cats) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories_sorts(self, dtype): + cats = ["b", "a"] + codes = np.array([0, 1, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_dtype(self): + cats = ["a", "b", "d"] + codes = np.array([0, 1, 0, 2], dtype="i8") + dtype = CategoricalDtype(["c", "b", "a"], ordered=True) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical( + ["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_coerces(self): + cats = ["1", "2", "bad"] + codes = np.array([0, 0, 1, 2], dtype="i8") + dtype = CategoricalDtype([1, 2]) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical([1, 1, 2, np.nan]) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("ordered", [None, True, False]) + def test_construction_with_ordered(self, ordered): + # GH 9347, 9190 + cat = Categorical([0, 1, 2], ordered=ordered) + assert cat.ordered == bool(ordered) + + def test_constructor_imaginary(self): + values = [1, 2, 3 + 1j] + c1 = Categorical(values) + tm.assert_index_equal(c1.categories, Index(values)) + tm.assert_numpy_array_equal(np.array(c1), np.array(values)) + + def test_constructor_string_and_tuples(self): + # GH 21416 + c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) + expected_index = Index([("a", "b"), ("b", "a"), "c"]) + assert c.categories.equals(expected_index) + + def test_interval(self): + idx = pd.interval_range(0, 10, periods=10) + cat = Categorical(idx, categories=idx) + expected_codes = np.arange(10, dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # infer categories + cat = Categorical(idx) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # list values + cat = Categorical(list(idx)) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # list values, categories + cat = Categorical(list(idx), categories=list(idx)) + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # shuffled + values = idx.take([1, 2, 0]) + cat = Categorical(values, categories=idx) + tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8")) + tm.assert_index_equal(cat.categories, idx) + + # extra + values = pd.interval_range(8, 11, periods=3) + cat = Categorical(values, categories=idx) + expected_codes = np.array([8, 9, -1], dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + # overlapping + idx = IntervalIndex([Interval(0, 2), Interval(0, 1)]) + cat = Categorical(idx, categories=idx) + expected_codes = np.array([0, 1], dtype="int8") + tm.assert_numpy_array_equal(cat.codes, expected_codes) + tm.assert_index_equal(cat.categories, idx) + + def test_categorical_extension_array_nullable(self, nulls_fixture): + # GH: + arr = pd.arrays.StringArray._from_sequence([nulls_fixture] * 2) + result = Categorical(arr) + assert arr.dtype == result.categories.dtype + expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype)) + tm.assert_categorical_equal(result, expected) + + def test_from_sequence_copy(self): + cat = Categorical(np.arange(5).repeat(2)) + result = Categorical._from_sequence(cat, dtype=None, copy=False) + + # more generally, we'd be OK with a view + assert result._codes is cat._codes + + result = Categorical._from_sequence(cat, dtype=None, copy=True) + + assert not tm.shares_memory(result, cat) + + @pytest.mark.xfail( + not IS64 or is_platform_windows(), + reason="Incorrectly raising in astype_overflowsafe", + ) + def test_constructor_datetime64_non_nano(self): + categories = np.arange(10).view("M8[D]") + values = categories[::2].copy() + + cat = Categorical(values, categories=categories) + assert (cat == values).all() + + def test_constructor_preserves_freq(self): + # GH33830 freq retention in categorical + dti = date_range("2016-01-01", periods=5) + + expected = dti.freq + + cat = Categorical(dti) + result = cat.categories.freq + + assert expected == result diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py new file mode 100644 index 00000000..5acb62c5 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -0,0 +1,136 @@ +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestCategoricalDtypes: + def test_is_dtype_equal_deprecated(self): + # GH#37545 + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + + with tm.assert_produces_warning(FutureWarning): + c1.is_dtype_equal(c1) + + def test_categories_match_up_to_permutation(self): + + # test dtype comparisons between cats + + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) + c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) + assert c1._categories_match_up_to_permutation(c1) + assert c2._categories_match_up_to_permutation(c2) + assert c3._categories_match_up_to_permutation(c3) + assert c1._categories_match_up_to_permutation(c2) + assert not c1._categories_match_up_to_permutation(c3) + assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) + assert not c1._categories_match_up_to_permutation(c1.astype(object)) + assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) + assert c1._categories_match_up_to_permutation( + CategoricalIndex(c1, categories=list("cab")) + ) + assert not c1._categories_match_up_to_permutation( + CategoricalIndex(c1, ordered=True) + ) + + # GH 16659 + s1 = Series(c1) + s2 = Series(c2) + s3 = Series(c3) + assert c1._categories_match_up_to_permutation(s1) + assert c2._categories_match_up_to_permutation(s2) + assert c3._categories_match_up_to_permutation(s3) + assert c1._categories_match_up_to_permutation(s2) + assert not c1._categories_match_up_to_permutation(s3) + assert not c1._categories_match_up_to_permutation(s1.astype(object)) + + def test_set_dtype_same(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) + tm.assert_categorical_equal(result, c) + + def test_set_dtype_new_categories(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(list("abcd"))) + tm.assert_numpy_array_equal(result.codes, c.codes) + tm.assert_index_equal(result.dtype.categories, Index(list("abcd"))) + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_dtype_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c._set_dtype(expected.dtype) + tm.assert_categorical_equal(result, expected) + + def test_set_dtype_no_overlap(self): + c = Categorical(["a", "b", "c"], ["d", "e"]) + result = c._set_dtype(CategoricalDtype(["a", "b"])) + expected = Categorical([None, None, None], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_codes_dtypes(self): + + # GH 8453 + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + + result = Categorical([f"foo{i:05d}" for i in range(400)]) + assert result.codes.dtype == "int16" + + result = Categorical([f"foo{i:05d}" for i in range(40000)]) + assert result.codes.dtype == "int32" + + # adding cats + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + result = result.add_categories([f"foo{i:05d}" for i in range(400)]) + assert result.codes.dtype == "int16" + + # removing cats + result = result.remove_categories([f"foo{i:05d}" for i in range(300)]) + assert result.codes.dtype == "int8" + + def test_iter_python_types(self): + # GH-19909 + cat = Categorical([1, 2]) + assert isinstance(list(cat)[0], int) + assert isinstance(cat.tolist()[0], int) + + def test_iter_python_types_datetime(self): + cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) + assert isinstance(list(cat)[0], Timestamp) + assert isinstance(cat.tolist()[0], Timestamp) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py new file mode 100644 index 00000000..94e96664 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -0,0 +1,393 @@ +import math + +import numpy as np +import pytest + +from pandas import ( + NA, + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + NaT, + PeriodIndex, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +import pandas.core.common as com + + +class TestCategoricalIndexingWithFactor: + def test_getitem(self, factor): + assert factor[0] == "a" + assert factor[-1] == "c" + + subf = factor[[0, 1, 2]] + tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8)) + + subf = factor[np.asarray(factor) == "c"] + tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8)) + + def test_setitem(self, factor): + + # int/positional + c = factor.copy() + c[0] = "b" + assert c[0] == "b" + c[-1] = "a" + assert c[-1] == "a" + + # boolean + c = factor.copy() + indexer = np.zeros(len(c), dtype="bool") + indexer[0] = True + indexer[-1] = True + c[indexer] = "c" + expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + + tm.assert_categorical_equal(c, expected) + + @pytest.mark.parametrize( + "other", + [Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])], + ) + def test_setitem_same_but_unordered(self, other): + # GH-24142 + target = Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + target[mask] = other[mask] + expected = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_categorical_equal(target, expected) + + @pytest.mark.parametrize( + "other", + [ + Categorical(["b", "a"], categories=["b", "a", "c"]), + Categorical(["b", "a"], categories=["a", "b", "c"]), + Categorical(["a", "a"], categories=["a"]), + Categorical(["b", "b"], categories=["b"]), + ], + ) + def test_setitem_different_unordered_raises(self, other): + # GH-24142 + target = Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(TypeError, match=msg): + target[mask] = other[mask] + + @pytest.mark.parametrize( + "other", + [ + Categorical(["b", "a"]), + Categorical(["b", "a"], categories=["b", "a"], ordered=True), + Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True), + ], + ) + def test_setitem_same_ordered_raises(self, other): + # Gh-24142 + target = Categorical(["a", "b"], categories=["a", "b"], ordered=True) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(TypeError, match=msg): + target[mask] = other[mask] + + def test_setitem_tuple(self): + # GH#20439 + cat = Categorical([(0, 1), (0, 2), (0, 1)]) + + # This should not raise + cat[1] = cat[0] + assert cat[1] == (0, 1) + + def test_setitem_listlike(self): + + # GH#9469 + # properly coerce the input indexers + np.random.seed(1) + cat = Categorical( + np.random.randint(0, 5, size=150000).astype(np.int8) + ).add_categories([-1000]) + indexer = np.array([100000]).astype(np.int64) + cat[indexer] = -1000 + + # we are asserting the code result here + # which maps to the -1000 category + result = cat.codes[np.array([100000]).astype(np.int64)] + tm.assert_numpy_array_equal(result, np.array([5], dtype="int8")) + + +class TestCategoricalIndexing: + def test_getitem_slice(self): + cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) + sliced = cat[3] + assert sliced == "d" + + sliced = cat[3:5] + expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(sliced, expected) + + def test_getitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + result = c.codes[np.array([100000]).astype(np.int64)] + expected = c[np.array([100000]).astype(np.int64)].codes + tm.assert_numpy_array_equal(result, expected) + + def test_periodindex(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + cat1 = Categorical(idx1) + str(cat1) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat1._codes, exp_arr) + tm.assert_index_equal(cat1.categories, exp_idx) + + idx2 = PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + cat2 = Categorical(idx2, ordered=True) + str(cat2) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) + exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat2._codes, exp_arr) + tm.assert_index_equal(cat2.categories, exp_idx2) + + idx3 = PeriodIndex( + [ + "2013-12", + "2013-11", + "2013-10", + "2013-09", + "2013-08", + "2013-07", + "2013-05", + ], + freq="M", + ) + cat3 = Categorical(idx3, ordered=True) + exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) + exp_idx = PeriodIndex( + [ + "2013-05", + "2013-07", + "2013-08", + "2013-09", + "2013-10", + "2013-11", + "2013-12", + ], + freq="M", + ) + tm.assert_numpy_array_equal(cat3._codes, exp_arr) + tm.assert_index_equal(cat3.categories, exp_idx) + + def test_categories_assignments(self): + cat = Categorical(["a", "b", "c", "a"]) + exp = np.array([1, 2, 3, 1], dtype=np.int64) + with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): + cat.categories = [1, 2, 3] + tm.assert_numpy_array_equal(cat.__array__(), exp) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize( + "null_val", + [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], + ) + def test_periodindex_on_null_types(self, null_val): + # GH 46673 + result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D") + expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]") + assert result[2] is NaT + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_categories_assignments_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items " + "as the old categories!" + ) + with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): + with pytest.raises(ValueError, match=msg): + cat.categories = new_categories + + # Combinations of sorted/unique: + @pytest.mark.parametrize( + "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] + ) + # Combinations of missing/unique + @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) + @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("dtype", [None, "category", "key"]) + def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype): + # GH 21448 + key = key_class(key_values, categories=range(1, 5)) + + if dtype == "key": + dtype = key.dtype + + # Test for flat index and CategoricalIndex with same/different cats: + idx = Index(idx_values, dtype=dtype) + expected, exp_miss = idx.get_indexer_non_unique(key_values) + result, res_miss = idx.get_indexer_non_unique(key) + + tm.assert_numpy_array_equal(expected, result) + tm.assert_numpy_array_equal(exp_miss, res_miss) + + exp_unique = idx.unique().get_indexer(key_values) + res_unique = idx.unique().get_indexer(key) + tm.assert_numpy_array_equal(res_unique, exp_unique) + + def test_where_unobserved_nan(self): + ser = Series(Categorical(["a", "b"])) + result = ser.where([True, False]) + expected = Series(Categorical(["a", None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + # all NA + ser = Series(Categorical(["a", "b"])) + result = ser.where([False, False]) + expected = Series(Categorical([None, None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + def test_where_unobserved_categories(self): + ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + result = ser.where([True, True, False], other="b") + expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories)) + tm.assert_series_equal(result, expected) + + def test_where_other_categorical(self): + ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"]) + result = ser.where([True, False, True], other) + expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) + tm.assert_series_equal(result, expected) + + def test_where_new_category_raises(self): + ser = Series(Categorical(["a", "b", "c"])) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + ser.where([True, False, True], "d") + + def test_where_ordered_differs_rasies(self): + ser = Series( + Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True) + ) + other = Categorical( + ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True + ) + with pytest.raises(TypeError, match="without identical categories"): + ser.where([True, False, True], other) + + +class TestContains: + def test_contains(self): + # GH#21508 + cat = Categorical(list("aabbca"), categories=list("cab")) + + assert "b" in cat + assert "z" not in cat + assert np.nan not in cat + with pytest.raises(TypeError, match="unhashable type: 'list'"): + assert [1] in cat + + # assert codes NOT in index + assert 0 not in cat + assert 1 not in cat + + cat = Categorical(list("aabbca") + [np.nan], categories=list("cab")) + assert np.nan in cat + + @pytest.mark.parametrize( + "item, expected", + [ + (Interval(0, 1), True), + (1.5, True), + (Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH#23705 + cat = Categorical(IntervalIndex.from_breaks(range(3))) + result = item in cat + assert result is expected + + def test_contains_list(self): + # GH#21729 + cat = Categorical([1, 2, 3]) + + assert "a" not in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in cat + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean(index): + ser = Series(range(3)) + idx = Categorical([True, False, True]) + if index: + idx = CategoricalIndex(idx) + + assert com.is_bool_indexer(idx) + result = ser[idx] + expected = ser[idx.astype("object")] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_na_treated_as_false(index): + # https://github.com/pandas-dev/pandas/issues/31503 + ser = Series(range(3)) + idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) + + result = ser[idx] + expected = ser[idx.fillna(False)] + + tm.assert_series_equal(result, expected) + + +@pytest.fixture +def non_coercible_categorical(monkeypatch): + """ + Monkeypatch Categorical.__array__ to ensure no implicit conversion. + + Raises + ------ + ValueError + When Categorical.__array__ is called. + """ + # TODO(Categorical): identify other places where this may be + # useful and move to a conftest.py + def array(self, dtype=None): + raise ValueError("I cannot be converted.") + + with monkeypatch.context() as m: + m.setattr(Categorical, "__array__", array) + yield + + +def test_series_at(): + arr = Categorical(["a", "b", "c"]) + ser = Series(arr) + result = ser.at[0] + assert result == "a" diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py new file mode 100644 index 00000000..fb5330a9 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -0,0 +1,213 @@ +import collections + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + isna, +) +import pandas._testing as tm + + +class TestCategoricalMissing: + def test_isna(self): + exp = np.array([False, False, True]) + cat = Categorical(["a", "b", np.nan]) + res = cat.isna() + + tm.assert_numpy_array_equal(res, exp) + + def test_na_flags_int_categories(self): + # #1457 + + categories = list(range(10)) + labels = np.random.randint(0, 10, 20) + labels[::5] = -1 + + cat = Categorical(labels, categories, fastpath=True) + repr(cat) + + tm.assert_numpy_array_equal(isna(cat), labels == -1) + + def test_nan_handling(self): + + # Nans are represented as -1 in codes + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + c[1] = np.nan + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # Adding nan to categories should make assigned nan point to the + # category! + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + + def test_set_dtype_nans(self): + c = Categorical(["a", "b", np.nan]) + result = c._set_dtype(CategoricalDtype(["a", "c"])) + tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8")) + + def test_set_item_nan(self): + cat = Categorical([1, 2, 3]) + cat[1] = np.nan + + exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize( + "fillna_kwargs, msg", + [ + ( + {"value": 1, "method": "ffill"}, + "Cannot specify both 'value' and 'method'.", + ), + ({}, "Must specify a fill 'value' or 'method'."), + ({"method": "bad"}, "Invalid fill method. Expecting .* bad"), + ( + {"value": Series([1, 2, 3, 4, "a"])}, + "Cannot setitem on a Categorical with a new category", + ), + ], + ) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + # https://github.com/pandas-dev/pandas/issues/13628 + cat = Categorical([1, 2, 3, None, None]) + + if len(fillna_kwargs) == 1 and "value" in fillna_kwargs: + err = TypeError + else: + err = ValueError + + with pytest.raises(err, match=msg): + cat.fillna(**fillna_kwargs) + + @pytest.mark.parametrize("named", [True, False]) + def test_fillna_iterable_category(self, named): + # https://github.com/pandas-dev/pandas/issues/21097 + if named: + Point = collections.namedtuple("Point", "x y") + else: + Point = lambda *args: args # tuple + cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object)) + result = cat.fillna(Point(0, 0)) + expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) + + tm.assert_categorical_equal(result, expected) + + # Case where the Point is not among our categories; we want ValueError, + # not NotImplementedError GH#41914 + cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object)) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + cat.fillna(Point(0, 0)) + + def test_fillna_array(self): + # accept Categorical or ndarray value if it holds appropriate values + cat = Categorical(["A", "B", "C", None, None]) + + other = cat.fillna("C") + result = cat.fillna(other) + tm.assert_categorical_equal(result, other) + assert isna(cat[-1]) # didn't modify original inplace + + other = np.array(["A", "B", "C", "B", "A"]) + result = cat.fillna(other) + expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) + tm.assert_categorical_equal(result, expected) + assert isna(cat[-1]) # didn't modify original inplace + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + with pd.option_context("mode.use_inf_as_na", True): + cat = Categorical(values) + result = cat.isna() + tm.assert_numpy_array_equal(result, expected) + + result = Series(cat).isna() + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = DataFrame(cat).isna() + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na_outside_context(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + # Using isna directly for Categorical will fail in general here + cat = Categorical(values) + + with pd.option_context("mode.use_inf_as_na", True): + result = isna(cat) + tm.assert_numpy_array_equal(result, expected) + + result = isna(Series(cat)) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = isna(DataFrame(cat)) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "a1, a2, categories", + [ + (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]), + ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]), + ], + ) + def test_compare_categorical_with_missing(self, a1, a2, categories): + # GH 28384 + cat_type = CategoricalDtype(categories) + + # != + result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type) + expected = Series(a1) != Series(a2) + tm.assert_series_equal(result, expected) + + # == + result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type) + expected = Series(a1) == Series(a2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "na_value, dtype", + [ + (pd.NaT, "datetime64[ns]"), + (None, "float64"), + (np.nan, "float64"), + (pd.NA, "float64"), + ], + ) + def test_categorical_only_missing_values_no_cast(self, na_value, dtype): + # GH#44900 + result = Categorical([na_value, na_value]) + tm.assert_index_equal(result.categories, Index([], dtype=dtype)) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py new file mode 100644 index 00000000..9642691b --- /dev/null +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -0,0 +1,406 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestCategoricalOpsWithFactor: + def test_categories_none_comparisons(self): + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + tm.assert_categorical_equal(factor, factor) + + def test_comparisons(self, factor): + result = factor[factor == "a"] + expected = factor[np.asarray(factor) == "a"] + tm.assert_categorical_equal(result, expected) + + result = factor[factor != "a"] + expected = factor[np.asarray(factor) != "a"] + tm.assert_categorical_equal(result, expected) + + result = factor[factor < "c"] + expected = factor[np.asarray(factor) < "c"] + tm.assert_categorical_equal(result, expected) + + result = factor[factor > "a"] + expected = factor[np.asarray(factor) > "a"] + tm.assert_categorical_equal(result, expected) + + result = factor[factor >= "b"] + expected = factor[np.asarray(factor) >= "b"] + tm.assert_categorical_equal(result, expected) + + result = factor[factor <= "b"] + expected = factor[np.asarray(factor) <= "b"] + tm.assert_categorical_equal(result, expected) + + n = len(factor) + + other = factor[np.random.permutation(n)] + result = factor == other + expected = np.asarray(factor) == np.asarray(other) + tm.assert_numpy_array_equal(result, expected) + + result = factor == "d" + expected = np.zeros(len(factor), dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + # comparisons with categoricals + cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True) + cat_rev_base = Categorical( + ["b", "b", "b"], categories=["c", "b", "a"], ordered=True + ) + cat = Categorical(["a", "b", "c"], ordered=True) + cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = np.array([True, False, False]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = np.array([False, False, True]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = np.array([False, False, True]) + tm.assert_numpy_array_equal(res, exp) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"]) + + with pytest.raises(TypeError, match=msg): + cat_rev > cat_rev_base2 + + # Only categories with same ordering information can be compared + cat_unorderd = cat.set_ordered(False) + assert not (cat > cat).any() + + with pytest.raises(TypeError, match=msg): + cat > cat_unorderd + + # comparison (in both directions) with Series will raise + s = Series(["b", "b", "b"]) + msg = ( + "Cannot compare a Categorical for op __gt__ with type " + r"" + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + # comparison with numpy.array will raise in both direction, but only on + # newer numpy versions + a = np.array(["b", "b", "b"]) + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + # Make sure that unequal comparison take the categories order in + # account + cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True) + exp = np.array([True, False, False]) + res = cat_rev > "b" + tm.assert_numpy_array_equal(res, exp) + + # check that zero-dim array gets unboxed + res = cat_rev > np.array("b") + tm.assert_numpy_array_equal(res, exp) + + +class TestCategoricalOps: + def test_compare_frame(self): + # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame + data = ["a", "b", 2, "a"] + cat = Categorical(data) + + df = DataFrame(cat) + + result = cat == df.T + expected = DataFrame([[True, True, True, True]]) + tm.assert_frame_equal(result, expected) + + result = cat[::-1] != df.T + expected = DataFrame([[False, True, True, False]]) + tm.assert_frame_equal(result, expected) + + def test_compare_frame_raises(self, comparison_op): + # alignment raises unless we transpose + op = comparison_op + cat = Categorical(["a", "b", 2, "a"]) + df = DataFrame(cat) + msg = "Unable to coerce to Series, length must be 1: given 4" + with pytest.raises(ValueError, match=msg): + op(cat, df) + + def test_datetime_categorical_comparison(self): + dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True) + tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True])) + + def test_reflected_comparison_with_scalars(self): + # GH8658 + cat = Categorical([1, 2, 3], ordered=True) + tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True])) + + def test_comparison_with_unknown_scalars(self): + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Categorical([1, 2, 3], ordered=True) + + msg = "Invalid comparison between dtype=category and int" + with pytest.raises(TypeError, match=msg): + cat < 4 + with pytest.raises(TypeError, match=msg): + cat > 4 + with pytest.raises(TypeError, match=msg): + 4 < cat + with pytest.raises(TypeError, match=msg): + 4 > cat + + tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False])) + tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) + + def test_comparison_with_tuple(self): + cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object)) + + result = cat == "foo" + expected = np.array([True, False, False, False], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = cat == (0, 1) + expected = np.array([False, True, False, True], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = cat != (0, 1) + tm.assert_numpy_array_equal(result, ~expected) + + def test_comparison_of_ordered_categorical_with_nan_to_scalar( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # BUG: fix ordered categorical comparison with missing values (#26504 ) + # and following comparisons with scalars in categories with missing + # values should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + scalar = 2 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar) + actual = getattr(cat, compare_operators_no_eq_ne)(scalar) + tm.assert_numpy_array_equal(actual, expected) + + def test_comparison_of_ordered_categorical_with_nan_to_listlike( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # and following comparisons of missing values in ordered Categorical + # with listlike should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) + actual = getattr(cat, compare_operators_no_eq_ne)(other) + tm.assert_numpy_array_equal(actual, expected) + + @pytest.mark.parametrize( + "data,reverse,base", + [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])], + ) + def test_comparisons(self, data, reverse, base): + cat_rev = Series(Categorical(data, categories=reverse, ordered=True)) + cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True)) + cat = Series(Categorical(data, ordered=True)) + cat_base = Series( + Categorical(base, categories=cat.cat.categories, ordered=True) + ) + s = Series(base) + a = np.array(base) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = Series([True, False, False]) + tm.assert_series_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = Series([False, False, True]) + tm.assert_series_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = Series([False, False, True]) + tm.assert_series_equal(res, exp) + + scalar = base[1] + res = cat > scalar + exp = Series([False, False, True]) + exp2 = cat.values > scalar + tm.assert_series_equal(res, exp) + tm.assert_numpy_array_equal(res.values, exp2) + res_rev = cat_rev > scalar + exp_rev = Series([True, False, False]) + exp_rev2 = cat_rev.values > scalar + tm.assert_series_equal(res_rev, exp_rev) + tm.assert_numpy_array_equal(res_rev.values, exp_rev2) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + # categorical cannot be compared to Series or numpy array, and also + # not the other way around + msg = ( + "Cannot compare a Categorical for op __gt__ with type " + r"" + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + with pytest.raises(TypeError, match=msg): + a < cat + with pytest.raises(TypeError, match=msg): + a < cat_rev + + @pytest.mark.parametrize( + "ctor", + [ + lambda *args, **kwargs: Categorical(*args, **kwargs), + lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), + ], + ) + def test_unordered_different_order_equal(self, ctor): + # https://github.com/pandas-dev/pandas/issues/16014 + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + assert (c1 == c2).all() + + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + result = c1 == c2 + tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) + + def test_unordered_different_categories_raises(self): + c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False) + c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False) + + with pytest.raises(TypeError, match=("Categoricals can only be compared")): + c1 == c2 + + def test_compare_different_lengths(self): + c1 = Categorical([], categories=["a", "b"]) + c2 = Categorical([], categories=["a"]) + + msg = "Categoricals can only be compared if 'categories' are the same." + with pytest.raises(TypeError, match=msg): + c1 == c2 + + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = Categorical(["a"], categories=["a", "b"]) + b = Categorical(["b"], categories=["b", "a"]) + assert not a.equals(b) + + def test_numeric_like_ops(self): + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + # numeric ops should not succeed + for op, str_rep in [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ]: + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" + with pytest.raises(TypeError, match=msg): + getattr(df, op)(df) + + # reduction ops should not succeed (unless specifically defined, e.g. + # min/max) + s = df["value_group"] + for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: + msg = f"does not support reduction '{op}'" + with pytest.raises(TypeError, match=msg): + getattr(s, op)(numeric_only=False) + + # mad technically works because it takes always the numeric data + + def test_numeric_like_ops_series(self): + # numpy ops + s = Series(Categorical([1, 2, 3, 4])) + with pytest.raises(TypeError, match="does not support reduction 'sum'"): + np.sum(s) + + @pytest.mark.parametrize( + "op, str_rep", + [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ], + ) + def test_numeric_like_ops_series_arith(self, op, str_rep): + # numeric ops on a Series + s = Series(Categorical([1, 2, 3, 4])) + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" + with pytest.raises(TypeError, match=msg): + getattr(s, op)(2) + + def test_numeric_like_ops_series_invalid(self): + # invalid ufunc + s = Series(Categorical([1, 2, 3, 4])) + msg = "Object with dtype category cannot perform the numpy op log" + with pytest.raises(TypeError, match=msg): + np.log(s) diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py new file mode 100644 index 00000000..a50b1edd --- /dev/null +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -0,0 +1,72 @@ +import pytest + +import pandas as pd +from pandas import Categorical +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_replace,value,expected,flip_categories", + [ + # one-to-one + (1, 2, [2, 2, 3], False), + (1, 4, [4, 2, 3], False), + (4, 1, [1, 2, 3], False), + (5, 6, [1, 2, 3], False), + # many-to-one + ([1], 2, [2, 2, 3], False), + ([1, 2], 3, [3, 3, 3], False), + ([1, 2], 4, [4, 4, 3], False), + ((1, 2, 4), 5, [5, 5, 3], False), + ((5, 6), 2, [1, 2, 3], False), + ([1], [2], [2, 2, 3], False), + ([1, 4], [5, 2], [5, 2, 3], False), + # check_categorical sorts categories, which crashes on mixed dtypes + (3, "4", [1, 2, "4"], False), + ([1, 2, "3"], "5", ["5", "5", 3], True), + ], +) +def test_replace_categorical_series(to_replace, value, expected, flip_categories): + # GH 31720 + + ser = pd.Series([1, 2, 3], dtype="category") + result = ser.replace(to_replace, value) + expected = pd.Series(expected, dtype="category") + ser.replace(to_replace, value, inplace=True) + + if flip_categories: + expected = expected.cat.set_categories(expected.cat.categories[::-1]) + + tm.assert_series_equal(expected, result, check_category_order=False) + tm.assert_series_equal(expected, ser, check_category_order=False) + + +@pytest.mark.parametrize( + "to_replace, value, result, expected_error_msg", + [ + ("b", "c", ["a", "c"], "Categorical.categories are different"), + ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), + ("b", None, ["a", None], "Categorical.categories length are different"), + ], +) +def test_replace_categorical(to_replace, value, result, expected_error_msg): + # GH#26988 + cat = Categorical(["a", "b"]) + expected = Categorical(result) + with tm.assert_produces_warning(FutureWarning, match="Series.replace"): + # GH#44929 replace->_replace + result = cat.replace(to_replace, value) + + tm.assert_categorical_equal(result, expected) + if to_replace == "b": # the "c" test is supposed to be unchanged + with pytest.raises(AssertionError, match=expected_error_msg): + # ensure non-inplace call does not affect original + tm.assert_categorical_equal(cat, expected) + + with tm.assert_produces_warning(FutureWarning, match="Series.replace"): + # GH#44929 replace->_replace + cat.replace(to_replace, value, inplace=True) + + tm.assert_categorical_equal(cat, expected) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py new file mode 100644 index 00000000..b44af07c --- /dev/null +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -0,0 +1,533 @@ +import numpy as np + +from pandas import ( + Categorical, + CategoricalIndex, + Series, + date_range, + option_context, + period_range, + timedelta_range, +) + + +class TestCategoricalReprWithFactor: + def test_print(self, factor): + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + "Categories (3, object): ['a' < 'b' < 'c']", + ] + expected = "\n".join(expected) + actual = repr(factor) + assert actual == expected + + +class TestCategoricalRepr: + def test_big_print(self): + factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) + expected = [ + "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", + "Length: 600", + "Categories (3, object): ['a', 'b', 'c']", + ] + expected = "\n".join(expected) + + actual = repr(factor) + + assert actual == expected + + def test_empty_print(self): + factor = Categorical([], ["a", "b", "c"]) + expected = "[], Categories (3, object): ['a', 'b', 'c']" + actual = repr(factor) + assert actual == expected + + assert expected == actual + factor = Categorical([], ["a", "b", "c"], ordered=True) + expected = "[], Categories (3, object): ['a' < 'b' < 'c']" + actual = repr(factor) + assert expected == actual + + factor = Categorical([], []) + expected = "[], Categories (0, object): []" + assert expected == repr(factor) + + def test_print_none_width(self): + # GH10087 + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + with option_context("display.width", None): + assert exp == repr(a) + + def test_unicode_print(self): + c = Categorical(["aaaaa", "bb", "cccc"] * 20) + expected = """\ +['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] +Length: 60 +Categories (3, object): ['aaaaa', 'bb', 'cccc']""" + + assert repr(c) == expected + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """\ +['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] +Length: 60 +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa:E501 + + assert repr(c) == expected + + # unicode option should not affect to Categorical, as it doesn't care + # the repr width + with option_context("display.unicode.east_asian_width", True): + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] +Length: 60 +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa:E501 + + assert repr(c) == expected + + def test_categorical_repr(self): + c = Categorical([1, 2, 3]) + exp = """[1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1, 2, 3, 4, 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20)) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" + + assert repr(c) == exp + + def test_categorical_repr_ordered(self): + c = Categorical([1, 2, 3], ordered=True) + exp = """[1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20), ordered=True) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" + + assert repr(c) == exp + + def test_categorical_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + "" + ) + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + ) + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " + "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " + "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + def test_categorical_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_int_with_nan(self): + c = Categorical([1, 2, np.nan]) + c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]""" + assert repr(c) == c_exp + + s = Series([1, 2, np.nan], dtype="object").astype("category") + s_exp = """0 1\n1 2\n2 NaN +dtype: category +Categories (2, int64): [1, 2]""" + assert repr(s) == s_exp + + def test_categorical_repr_period(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa:E501 + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa:E501 + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa:E501 + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa:E501 + + assert repr(c) == exp + + def test_categorical_index_repr(self): + idx = CategoricalIndex(Categorical([1, 2, 3])) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == exp + + i = CategoricalIndex(Categorical(np.arange(10))) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_ordered(self): + i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', + '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', + '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_period(self): + # test all length + idx = period_range("2011-01-01 09:00", freq="H", periods=1) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=2) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=3) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx))) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', + '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', + '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + def test_categorical_index_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=False, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_index_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=True, dtype='category')""" # noqa:E501 + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=True, dtype='category')""" # noqa:E501 + + assert repr(i) == exp + + def test_categorical_str_repr(self): + # GH 33676 + result = repr(Categorical([1, "2", 3, 4])) + expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" + assert result == expected diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py new file mode 100644 index 00000000..4f65c8df --- /dev/null +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -0,0 +1,129 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Index, +) +import pandas._testing as tm + + +class TestCategoricalSort: + def test_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal( + c.argsort(ascending=True), expected, check_dtype=False + ) + + expected = expected[::-1] + tm.assert_numpy_array_equal( + c.argsort(ascending=False), expected, check_dtype=False + ) + + def test_numpy_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) + + tm.assert_numpy_array_equal( + np.argsort(c, kind="mergesort"), expected, check_dtype=False + ) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, axis=0) + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, order="C") + + def test_sort_values(self): + + # unordered cats are sortable + cat = Categorical(["a", "b", "b", "a"], ordered=False) + cat.sort_values() + + cat = Categorical(["a", "c", "b", "d"], ordered=True) + + # sort_values + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + cat = Categorical( + ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True + ) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # sort (inplace order) + cat1 = cat.copy() + orig_codes = cat1._codes + cat1.sort_values(inplace=True) + assert cat1._codes is orig_codes + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(cat1.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # reverse + cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) + res = cat.sort_values(ascending=False) + exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + def test_sort_values_na_position(self): + # see gh-12882 + cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) + exp_categories = Index([2, 5]) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values() # default arguments + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) + res = cat.sort_values(ascending=True, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) + res = cat.sort_values(ascending=False, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values(ascending=True, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) + res = cat.sort_values(ascending=False, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="last") + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="first") + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) diff --git a/pandas/tests/arrays/categorical/test_subclass.py b/pandas/tests/arrays/categorical/test_subclass.py new file mode 100644 index 00000000..b80d0ff4 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_subclass.py @@ -0,0 +1,22 @@ +from pandas import Categorical +import pandas._testing as tm + + +class TestCategoricalSubclassing: + def test_constructor(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"])) + + def test_from_codes(self): + sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"]) + tm.assert_categorical_equal(sc, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + res = sc.map(lambda x: x.upper()) + assert isinstance(res, tm.SubclassedCategorical) + exp = Categorical(["A", "B", "C"]) + tm.assert_categorical_equal(res, exp) diff --git a/pandas/tests/arrays/categorical/test_take.py b/pandas/tests/arrays/categorical/test_take.py new file mode 100644 index 00000000..fbdbea1d --- /dev/null +++ b/pandas/tests/arrays/categorical/test_take.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + Index, +) +import pandas._testing as tm + + +class TestTake: + # https://github.com/pandas-dev/pandas/issues/20664 + + def test_take_default_allow_fill(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) + + def test_take_positive_no_warning(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + cat.take([0, 0]) + + def test_take_bounds(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical(["a", "b", "a"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "index 4 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + cat.take([4, 5], allow_fill=allow_fill) + + def test_take_empty(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical([], categories=["a", "b"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "cannot do a non-empty take from an empty axes" + with pytest.raises(IndexError, match=msg): + cat.take([0], allow_fill=allow_fill) + + def test_positional_take(self, ordered): + cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) + result = cat.take([0, 1, 2], allow_fill=False) + expected = Categorical( + ["a", "a", "b"], categories=cat.categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + def test_positional_take_unobserved(self, ordered): + cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) + result = cat.take([1, 0], allow_fill=False) + expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_take_allow_fill(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "a", "b"]) + result = cat.take([0, -1, -1], allow_fill=True) + expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_with_negative_one(self): + # -1 was a category + cat = Categorical([-1, 0, 1]) + result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) + expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) + expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value_new_raises(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + xpr = r"Cannot setitem on a Categorical with a new category \(d\)" + with pytest.raises(TypeError, match=xpr): + cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) + + ci = Index(cat) + with tm.assert_produces_warning(FutureWarning): + ci.take_nd([0, 1]) diff --git a/pandas/tests/arrays/categorical/test_warnings.py b/pandas/tests/arrays/categorical/test_warnings.py new file mode 100644 index 00000000..6ba33477 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_warnings.py @@ -0,0 +1,22 @@ +import pytest + +from pandas.util._test_decorators import async_mark + +import pandas._testing as tm + + +class TestCategoricalWarnings: + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; c = Categorical([])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("c.", 1)) diff --git a/pandas/tests/arrays/datetimes/__init__.py b/pandas/tests/arrays/datetimes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py new file mode 100644 index 00000000..cb2d8f31 --- /dev/null +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -0,0 +1,162 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.arrays.datetimes import _sequence_to_dt64ns + + +class TestDatetimeArrayConstructor: + def test_from_sequence_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence(mi) + + def test_only_1dim_accepted(self): + arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + DatetimeArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + DatetimeArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # GH#24623 check that invalid instances cannot be created with the + # public constructor + arr = np.arange(5, dtype=np.int64) * 3600 * 10**9 + + msg = ( + "Inferred frequency H from passed values does not " + "conform to passed frequency W-SUN" + ) + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr, freq="W") + + @pytest.mark.parametrize( + "meth", + [ + DatetimeArray._from_sequence, + _sequence_to_dt64ns, + pd.to_datetime, + pd.DatetimeIndex, + ], + ) + def test_mixing_naive_tzaware_raises(self, meth): + # GH#24569 + arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) + + msg = ( + "Cannot mix tz-aware with tz-naive values|" + "Tz-aware datetime.datetime cannot be converted " + "to datetime64 unless utc=True" + ) + + for obj in [arr, arr[::-1]]: + # check that we raise regardless of whether naive is found + # before aware or vice-versa + with pytest.raises(ValueError, match=msg): + meth(obj) + + def test_from_pandas_array(self): + arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9 + + result = DatetimeArray._from_sequence(arr)._with_freq("infer") + + expected = pd.date_range("1970-01-01", periods=5, freq="H")._data + tm.assert_datetime_array_equal(result, expected) + + def test_mismatched_timezone_raises(self): + arr = DatetimeArray( + np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), + dtype=DatetimeTZDtype(tz="US/Central"), + ) + dtype = DatetimeTZDtype(tz="US/Eastern") + msg = r"dtype=datetime64\[ns.*\] does not match data dtype datetime64\[ns.*\]" + with pytest.raises(TypeError, match=msg): + DatetimeArray(arr, dtype=dtype) + + # also with mismatched tzawareness + with pytest.raises(TypeError, match=msg): + DatetimeArray(arr, dtype=np.dtype("M8[ns]")) + with pytest.raises(TypeError, match=msg): + DatetimeArray(arr.tz_localize(None), dtype=arr.dtype) + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + DatetimeArray([1, 2, 3]) + + def test_bool_dtype_raises(self): + arr = np.array([1, 2, 3], dtype="bool") + + msg = "Unexpected value for 'dtype': 'bool'. Must be" + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr) + + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): + DatetimeArray._from_sequence(arr) + + with pytest.raises(TypeError, match=msg): + _sequence_to_dt64ns(arr) + + with pytest.raises(TypeError, match=msg): + pd.DatetimeIndex(arr) + + with pytest.raises(TypeError, match=msg): + pd.to_datetime(arr) + + def test_incorrect_dtype_raises(self): + with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + def test_freq_infer_raises(self): + with pytest.raises(ValueError, match="Frequency inference"): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") + + def test_copy(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False) + assert arr._data is data + + arr = DatetimeArray(data, copy=True) + assert arr._data is not data + + +class TestSequenceToDT64NS: + def test_tz_dtype_mismatch_raises(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + with pytest.raises(TypeError, match="data is already tz-aware"): + _sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) + + def test_tz_dtype_matches(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + result, _, _ = _sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) + tm.assert_numpy_array_equal(arr._data, result) + + @pytest.mark.parametrize("order", ["F", "C"]) + def test_2d(self, order): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = np.array(dti, dtype=object).reshape(3, 2) + if order == "F": + arr = arr.T + + res = _sequence_to_dt64ns(arr) + expected = _sequence_to_dt64ns(arr.ravel()) + + tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) + assert res[1] == expected[1] + assert res[2] == expected[2] + + res = DatetimeArray._from_sequence(arr) + expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) + tm.assert_datetime_array_equal(res, expected) diff --git a/pandas/tests/arrays/datetimes/test_reductions.py b/pandas/tests/arrays/datetimes/test_reductions.py new file mode 100644 index 00000000..d0553f96 --- /dev/null +++ b/pandas/tests/arrays/datetimes/test_reductions.py @@ -0,0 +1,176 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import NaT +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + + +class TestReductions: + @pytest.fixture + def arr1d(self, tz_naive_fixture): + """Fixture returning DatetimeArray with parametrized timezones""" + tz = tz_naive_fixture + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + dtype=dtype, + ) + return arr + + def test_min_max(self, arr1d): + arr = arr1d + tz = arr.tz + + result = arr.min() + expected = pd.Timestamp("2000-01-02", tz=tz) + assert result == expected + + result = arr.max() + expected = pd.Timestamp("2000-01-05", tz=tz) + assert result == expected + + result = arr.min(skipna=False) + assert result is NaT + + result = arr.max(skipna=False) + assert result is NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.min(skipna=skipna) + assert result is NaT + + result = arr.max(skipna=skipna) + assert result is NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_median_empty(self, skipna, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) + result = arr.median(skipna=skipna) + assert result is NaT + + arr = arr.reshape(0, 3) + result = arr.median(axis=0, skipna=skipna) + expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=skipna) + expected = type(arr)._from_sequence([], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_median(self, arr1d): + arr = arr1d + + result = arr.median() + assert result == arr[0] + result = arr.median(skipna=False) + assert result is NaT + + result = arr.dropna().median(skipna=False) + assert result == arr[0] + + result = arr.median(axis=0) + assert result == arr[0] + + def test_median_axis(self, arr1d): + arr = arr1d + assert arr.median(axis=0) == arr.median() + assert arr.median(axis=0, skipna=False) is NaT + + msg = r"abs\(axis\) must be less than ndim" + with pytest.raises(ValueError, match=msg): + arr.median(axis=1) + + @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning") + def test_median_2d(self, arr1d): + arr = arr1d.reshape(1, -1) + + # axis = None + assert arr.median() == arr1d.median() + assert arr.median(skipna=False) is NaT + + # axis = 0 + result = arr.median(axis=0) + expected = arr1d + tm.assert_equal(result, expected) + + # Since column 3 is all-NaT, we get NaT there with or without skipna + result = arr.median(axis=0, skipna=False) + expected = arr1d + tm.assert_equal(result, expected) + + # axis = 1 + result = arr.median(axis=1) + expected = type(arr)._from_sequence([arr1d.median()]) + tm.assert_equal(result, expected) + + result = arr.median(axis=1, skipna=False) + expected = type(arr)._from_sequence([NaT], dtype=arr.dtype) + tm.assert_equal(result, expected) + + def test_mean(self, arr1d): + arr = arr1d + + # manually verified result + expected = arr[0] + 0.4 * pd.Timedelta(days=1) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + result = dta.mean(axis=0) + expected = dta[1] + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=1) + expected = dta[:, 0] + pd.Timedelta(hours=12) + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=None) + expected = dti.mean() + assert result == expected + + @pytest.mark.parametrize("skipna", [True, False]) + def test_mean_empty(self, arr1d, skipna): + arr = arr1d[:0] + + assert arr.mean(skipna=skipna) is NaT + + arr2d = arr.reshape(0, 3) + result = arr2d.mean(axis=0, skipna=skipna) + expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=1, skipna=skipna) + expected = arr # i.e. 1D, empty + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=None, skipna=skipna) + assert result is NaT diff --git a/pandas/tests/arrays/floating/__init__.py b/pandas/tests/arrays/floating/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/floating/conftest.py b/pandas/tests/arrays/floating/conftest.py new file mode 100644 index 00000000..5e971c66 --- /dev/null +++ b/pandas/tests/arrays/floating/conftest.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +@pytest.fixture(params=[Float32Dtype, Float64Dtype]) +def dtype(request): + """Parametrized fixture returning a float 'dtype'""" + return request.param() + + +@pytest.fixture +def data(dtype): + """Fixture returning 'data' array according to parametrized float 'dtype'""" + return pd.array( + list(np.arange(0.1, 0.9, 0.1)) + + [pd.NA] + + list(np.arange(1, 9.8, 0.1)) + + [pd.NA] + + [9.9, 10.0], + dtype=dtype, + ) + + +@pytest.fixture +def data_missing(dtype): + """ + Fixture returning array with missing data according to parametrized float + 'dtype'. + """ + return pd.array([np.nan, 0.1], dtype=dtype) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture returning 'data' or 'data_missing' float arrays. + + Used to test dtype conversion with and without missing values. + """ + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py new file mode 100644 index 00000000..ec7419d6 --- /dev/null +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -0,0 +1,232 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [ + ("add", [1.1, 2.2, None, None, 5.5]), + ("mul", [0.1, 0.4, None, None, 2.5]), + ("sub", [0.9, 1.8, None, None, 4.5]), + ("truediv", [10.0, 10.0, None, None, 10.0]), + ("floordiv", [9.0, 9.0, None, None, 10.0]), + ("mod", [0.1, 0.2, None, None, 0.0]), + ], + ids=["add", "mul", "sub", "div", "floordiv", "mod"], +) +def test_array_op(dtype, opname, exp): + a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype) + b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype) + + op = getattr(operator, opname) + + result = op(a, b) + expected = pd.array(exp, dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) +def test_divide_by_zero(dtype, zero, negative): + # TODO pending NA/NaN discussion + # https://github.com/pandas-dev/pandas/issues/32265/ + a = pd.array([0, 1, -1, None], dtype=dtype) + result = a / zero + expected = FloatingArray( + np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype), + np.array([False, False, False, True]), + ) + if negative: + expected *= -1 + tm.assert_extension_array_equal(result, expected) + + +def test_pow_scalar(dtype): + a = pd.array([-1, 0, 1, None, 2], dtype=dtype) + result = a**0 + expected = pd.array([1, 1, 1, 1, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a**1 + expected = pd.array([-1, 0, 1, None, 2], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a**pd.NA + expected = pd.array([None, None, 1, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = a**np.nan + # TODO np.nan should be converted to pd.NA / missing before operation? + expected = FloatingArray( + np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype), + mask=a._mask, + ) + tm.assert_extension_array_equal(result, expected) + + # reversed + a = a[1:] # Can't raise integers to negative powers. + + result = 0**a + expected = pd.array([1, 0, None, 0], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = 1**a + expected = pd.array([1, 1, 1, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = pd.NA**a + expected = pd.array([1, None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = np.nan**a + expected = FloatingArray( + np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask + ) + tm.assert_extension_array_equal(result, expected) + + +def test_pow_array(dtype): + a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype) + b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype) + result = a**b + expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_rpow_one_to_na(): + # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 + arr = pd.array([np.nan, np.nan], dtype="Float64") + result = np.array([1.0, 2.0]) ** arr + expected = pd.array([1.0, np.nan], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("other", [0, 0.5]) +def test_arith_zero_dim_ndarray(other): + arr = pd.array([1, None, 2], dtype="Float64") + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = "|".join( + [ + r"can only perform ops with numeric values", + r"FloatingArray cannot perform the operation mod", + "unsupported operand type", + "not all arguments converted during string formatting", + "can't multiply sequence by non-int of type 'float'", + "ufunc 'subtract' cannot use operands with types dtype", + r"can only concatenate str \(not \"float\"\) to str", + "ufunc '.*' not supported for the input types, and the inputs could not", + "ufunc '.*' did not contain a loop with signature matching types", + "Concatenation operation is not implemented for NumPy arrays", + ] + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + with pytest.raises(TypeError, match=msg): + ops(pd.Series("foo", index=s.index)) + + msg = "|".join( + [ + "can only perform ops with numeric values", + "cannot perform .* with this index type: DatetimeArray", + "Addition/subtraction of integers and integer-arrays " + "with DatetimeArray is no longer supported. *", + "unsupported operand type", + "not all arguments converted during string formatting", + "can't multiply sequence by non-int of type 'float'", + "ufunc 'subtract' cannot use operands with types dtype", + r"ufunc 'add' cannot use operands with types dtype\('"], dtype=object) + + tm.assert_numpy_array_equal(a.astype(str), expected) + tm.assert_numpy_array_equal(a.astype("str"), expected) + + +def test_astype_copy(): + arr = pd.array([0.1, 0.2, None], dtype="Float64") + orig = pd.array([0.1, 0.2, None], dtype="Float64") + + # copy=True -> ensure both data and mask are actual copies + result = arr.astype("Float64", copy=True) + assert result is not arr + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + # copy=False + result = arr.astype("Float64", copy=False) + assert result is arr + assert np.shares_memory(result._data, arr._data) + assert np.shares_memory(result._mask, arr._mask) + result[0] = 10 + assert arr[0] == 10 + result[0] = pd.NA + assert arr[0] is pd.NA + + # astype to different dtype -> always needs a copy -> even with copy=False + # we need to ensure that also the mask is actually copied + arr = pd.array([0.1, 0.2, None], dtype="Float64") + orig = pd.array([0.1, 0.2, None], dtype="Float64") + + result = arr.astype("Float32", copy=False) + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + +def test_astype_object(dtype): + arr = pd.array([1.0, pd.NA], dtype=dtype) + + result = arr.astype(object) + expected = np.array([1.0, pd.NA], dtype=object) + tm.assert_numpy_array_equal(result, expected) + # check exact element types + assert isinstance(result[0], float) + assert result[1] is pd.NA diff --git a/pandas/tests/arrays/floating/test_comparison.py b/pandas/tests/arrays/floating/test_comparison.py new file mode 100644 index 00000000..a429649f --- /dev/null +++ b/pandas/tests/arrays/floating/test_comparison.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray +from pandas.tests.arrays.masked_shared import ( + ComparisonOps, + NumericOps, +) + + +class TestComparisonOps(NumericOps, ComparisonOps): + @pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1]) + def test_scalar(self, other, comparison_op, dtype): + ComparisonOps.test_scalar(self, other, comparison_op, dtype) + + def test_compare_with_integerarray(self, comparison_op): + op = comparison_op + a = pd.array([0, 1, None] * 3, dtype="Int64") + b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64") + other = b.astype("Int64") + expected = op(a, other) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + expected = op(other, a) + result = op(b, a) + tm.assert_extension_array_equal(result, expected) + + +def test_equals(): + # GH-30652 + # equals is generally tested in /tests/extension/base/methods, but this + # specifically tests that two arrays of the same class but different dtype + # do not evaluate equal + a1 = pd.array([1, 2, None], dtype="Float64") + a2 = pd.array([1, 2, None], dtype="Float32") + assert a1.equals(a2) is False + + +def test_equals_nan_vs_na(): + # GH#44382 + + mask = np.zeros(3, dtype=bool) + data = np.array([1.0, np.nan, 3.0], dtype=np.float64) + + left = FloatingArray(data, mask) + assert left.equals(left) + tm.assert_extension_array_equal(left, left) + + assert left.equals(left.copy()) + assert left.equals(FloatingArray(data.copy(), mask.copy())) + + mask2 = np.array([False, True, False], dtype=bool) + data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64) + right = FloatingArray(data2, mask2) + assert right.equals(right) + tm.assert_extension_array_equal(right, right) + + assert not left.equals(right) + + # with mask[1] = True, the only difference is data[1], which should + # not matter for equals + mask[1] = True + assert left.equals(right) diff --git a/pandas/tests/arrays/floating/test_concat.py b/pandas/tests/arrays/floating/test_concat.py new file mode 100644 index 00000000..dcb02104 --- /dev/null +++ b/pandas/tests/arrays/floating/test_concat.py @@ -0,0 +1,21 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Float64", "Float64"], "Float64"), + (["Float32", "Float64"], "Float64"), + (["Float32", "Float32"], "Float32"), + ], +) +def test_concat_series(to_concat_dtypes, result_dtype): + + result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes]) + expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py new file mode 100644 index 00000000..2dcd54f4 --- /dev/null +++ b/pandas/tests/arrays/floating/test_construction.py @@ -0,0 +1,203 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +def test_uses_pandas_na(): + a = pd.array([1, None], dtype=Float64Dtype()) + assert a[1] is pd.NA + + +def test_floating_array_constructor(): + values = np.array([1, 2, 3, 4], dtype="float64") + mask = np.array([False, False, False, True], dtype="bool") + + result = FloatingArray(values, mask) + expected = pd.array([1, 2, 3, np.nan], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + tm.assert_numpy_array_equal(result._data, values) + tm.assert_numpy_array_equal(result._mask, mask) + + msg = r".* should be .* numpy array. Use the 'pd.array' function instead" + with pytest.raises(TypeError, match=msg): + FloatingArray(values.tolist(), mask) + + with pytest.raises(TypeError, match=msg): + FloatingArray(values, mask.tolist()) + + with pytest.raises(TypeError, match=msg): + FloatingArray(values.astype(int), mask) + + msg = r"__init__\(\) missing 1 required positional argument: 'mask'" + with pytest.raises(TypeError, match=msg): + FloatingArray(values) + + +def test_floating_array_disallows_float16(): + # GH#44715 + arr = np.array([1, 2], dtype=np.float16) + mask = np.array([False, False]) + + msg = "FloatingArray does not support np.float16 dtype" + with pytest.raises(TypeError, match=msg): + FloatingArray(arr, mask) + + +def test_floating_array_disallows_Float16_dtype(request): + # GH#44715 + with pytest.raises(TypeError, match="data type 'Float16' not understood"): + pd.array([1.0, 2.0], dtype="Float16") + + +def test_floating_array_constructor_copy(): + values = np.array([1, 2, 3, 4], dtype="float64") + mask = np.array([False, False, False, True], dtype="bool") + + result = FloatingArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = FloatingArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +def test_to_array(): + result = pd.array([0.1, 0.2, 0.3, 0.4]) + expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + ([1, None], [1, pd.NA]), + ([None], [pd.NA]), + ([None, np.nan], [pd.NA, pd.NA]), + ([1, np.nan], [1, pd.NA]), + ([np.nan], [pd.NA]), + ], +) +def test_to_array_none_is_nan(a, b): + result = pd.array(a, dtype="Float64") + expected = pd.array(b, dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +def test_to_array_mixed_integer_float(): + result = pd.array([1, 2.0]) + expected = pd.array([1.0, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = pd.array([1, None, 2.0]) + expected = pd.array([1.0, None, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + "foo", + 1, + 1.0, + pd.date_range("20130101", periods=2), + np.array(["foo"]), + [[1, 2], [3, 4]], + [np.nan, {"a": 1}], + # GH#44514 all-NA case used to get quietly swapped out before checking ndim + np.array([pd.NA] * 6, dtype=object).reshape(3, 2), + ], +) +def test_to_array_error(values): + # error in converting existing arrays to FloatingArray + msg = "|".join( + [ + "cannot be converted to FloatingDtype", + "values must be a 1D list-like", + "Cannot pass scalar", + r"float\(\) argument must be a string or a (real )?number, not 'dict'", + "could not convert string to float: 'foo'", + ] + ) + with pytest.raises((TypeError, ValueError), match=msg): + pd.array(values, dtype="Float64") + + +@pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]]) +def test_construct_from_float_strings(values): + # see also test_to_integer_array_str + expected = pd.array([float(values[0]), 2, None], dtype="Float64") + + res = pd.array(values, dtype="Float64") + tm.assert_extension_array_equal(res, expected) + + res = FloatingArray._from_sequence(values) + tm.assert_extension_array_equal(res, expected) + + +def test_to_array_inferred_dtype(): + # if values has dtype -> respect it + result = pd.array(np.array([1, 2], dtype="float32")) + assert result.dtype == Float32Dtype() + + # if values have no dtype -> always float64 + result = pd.array([1.0, 2.0]) + assert result.dtype == Float64Dtype() + + +def test_to_array_dtype_keyword(): + result = pd.array([1, 2], dtype="Float32") + assert result.dtype == Float32Dtype() + + # if values has dtype -> override it + result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64") + assert result.dtype == Float64Dtype() + + +def test_to_array_integer(): + result = pd.array([1, 2], dtype="Float64") + expected = pd.array([1.0, 2.0], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # for integer dtypes, the itemsize is not preserved + # TODO can we specify "floating" in general? + result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64") + assert result.dtype == Float64Dtype() + + +@pytest.mark.parametrize( + "bool_values, values, target_dtype, expected_dtype", + [ + ([False, True], [0, 1], Float64Dtype(), Float64Dtype()), + ([False, True], [0, 1], "Float64", Float64Dtype()), + ([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()), + ], +) +def test_to_array_bool(bool_values, values, target_dtype, expected_dtype): + result = pd.array(bool_values, dtype=target_dtype) + assert result.dtype == expected_dtype + expected = pd.array(values, dtype=target_dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_series_from_float(data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py new file mode 100644 index 00000000..fbdf4198 --- /dev/null +++ b/pandas/tests/arrays/floating/test_function.py @@ -0,0 +1,192 @@ +import numpy as np +import pytest + +from pandas.compat import IS64 + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("ufunc", [np.abs, np.sign]) +# np.sign emits a warning with nans, +@pytest.mark.filterwarnings("ignore:invalid value encountered in sign") +def test_ufuncs_single(ufunc): + a = pd.array([1, 2, -3, np.nan], dtype="Float64") + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64") + with np.errstate(invalid="ignore"): + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid="ignore"): + result = ufunc(s) + expected = pd.Series(ufunc(s.astype(float)), dtype="Float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.add, np.subtract]) +def test_ufuncs_binary_float(ufunc): + # two FloatingArrays + a = pd.array([1, 0.2, -3, np.nan], dtype="Float64") + result = ufunc(a, a) + expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # FloatingArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + # FloatingArray with scalar + result = ufunc(a, 1) + expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("values", [[0, 1], [0, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values, dtype="Float64") + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) + + +@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) +def test_stat_method(pandasmethname, kwargs): + s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64") + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result + + +def test_value_counts_na(): + arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64") + result = arr.value_counts(dropna=False) + idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype) + assert idx.dtype == arr.dtype + expected = pd.Series([2, 1, 1], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_empty(): + ser = pd.Series([], dtype="Float64") + result = ser.value_counts() + idx = pd.Index([], dtype="Float64") + assert idx.dtype == "Float64" + expected = pd.Series([], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64") + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + assert expected.index.dtype == ser.dtype + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 4]) +def test_floating_array_sum(skipna, min_count, dtype): + arr = pd.array([1, 2, 3, None], dtype=dtype) + result = arr.sum(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 6.0 + else: + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)] +) +def test_floating_array_numpy_sum(values, expected): + arr = pd.array(values, dtype="Float64") + result = np.sum(arr) + assert result == expected + + +@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) +def test_preserve_dtypes(op): + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([0.1, None, 3.0], dtype="Float64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, np.float64) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_floating_array_min_max(skipna, method, dtype): + arr = pd.array([0.0, 1.0, None], dtype=dtype) + func = getattr(arr, method) + result = func(skipna=skipna) + if skipna: + assert result == (0 if method == "min" else 1) + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 9]) +def test_floating_array_prod(skipna, min_count, dtype): + arr = pd.array([1.0, 2.0, None], dtype=dtype) + result = arr.prod(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 2 + else: + assert result is pd.NA diff --git a/pandas/tests/arrays/floating/test_repr.py b/pandas/tests/arrays/floating/test_repr.py new file mode 100644 index 00000000..a8868fd9 --- /dev/null +++ b/pandas/tests/arrays/floating/test_repr.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + np.dtype(dtype.type).kind == "f" + assert dtype.name is not None + + +@pytest.mark.parametrize( + "dtype, expected", + [(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")], +) +def test_repr_dtype(dtype, expected): + assert repr(dtype) == expected + + +def test_repr_array(): + result = repr(pd.array([1.0, None, 3.0])) + expected = "\n[1.0, , 3.0]\nLength: 3, dtype: Float64" + assert result == expected + + +def test_repr_array_long(): + data = pd.array([1.0, 2.0, None] * 1000) + expected = """ +[ 1.0, 2.0, , 1.0, 2.0, , 1.0, 2.0, , 1.0, + ... + , 1.0, 2.0, , 1.0, 2.0, , 1.0, 2.0, ] +Length: 3000, dtype: Float64""" + result = repr(data) + assert result == expected + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({"A": data_missing}) + result = repr(df) + expected = " A\n0 \n1 0.1" + assert result == expected diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py new file mode 100644 index 00000000..2ed52439 --- /dev/null +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -0,0 +1,132 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + + # default (with or without missing values) -> object dtype + arr = con([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy() + expected = np.array([0.1, 0.2, 0.3], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([0.1, 0.2, None], dtype="Float64") + result = arr.to_numpy() + expected = np.array([0.1, 0.2, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_float(box): + con = pd.Series if box else pd.array + + # no missing values -> can convert to float, otherwise raises + arr = con([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy(dtype="float64") + expected = np.array([0.1, 0.2, 0.3], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + arr = con([0.1, 0.2, None], dtype="Float64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + result = arr.to_numpy(dtype="float64") + + # need to explicitly specify na_value + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([0.1, 0.2, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_int(box): + con = pd.Series if box else pd.array + + # no missing values -> can convert to int, otherwise raises + arr = con([1.0, 2.0, 3.0], dtype="Float64") + result = arr.to_numpy(dtype="int64") + expected = np.array([1, 2, 3], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + arr = con([1.0, 2.0, None], dtype="Float64") + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + result = arr.to_numpy(dtype="int64") + + # automatic casting (floors the values) + arr = con([0.1, 0.9, 1.1], dtype="Float64") + result = arr.to_numpy(dtype="int64") + expected = np.array([0, 0, 1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_na_value(box): + con = pd.Series if box else pd.array + + arr = con([0.0, 1.0, None], dtype="Float64") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([0.0, 1.0, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([False, True, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([0, 1, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_na_value_with_nan(): + # array with both NaN and NA -> only fill NA with `na_value` + arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True])) + result = arr.to_numpy(dtype="float64", na_value=-1) + expected = np.array([0.0, np.nan, -1.0], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_dtype(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0], dtype="Float64") + + result = arr.to_numpy(dtype=dtype) + expected = np.array([0, 1], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_na_raises(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0, None], dtype="Float64") + with pytest.raises(ValueError, match=dtype): + arr.to_numpy(dtype=dtype) + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_string(box, dtype): + con = pd.Series if box else pd.array + arr = con([0.0, 1.0, None], dtype="Float64") + + result = arr.to_numpy(dtype="str") + expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32") + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_copy(): + # to_numpy can be zero-copy if no missing values + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy(dtype="float64") + result[0] = 10 + tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64")) + + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + result = arr.to_numpy(dtype="float64", copy=True) + result[0] = 10 + tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64")) diff --git a/pandas/tests/arrays/integer/__init__.py b/pandas/tests/arrays/integer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/integer/conftest.py b/pandas/tests/arrays/integer/conftest.py new file mode 100644 index 00000000..f73400df --- /dev/null +++ b/pandas/tests/arrays/integer/conftest.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) + + +@pytest.fixture( + params=[ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + ] +) +def dtype(request): + """Parametrized fixture returning integer 'dtype'""" + return request.param() + + +@pytest.fixture +def data(dtype): + """ + Fixture returning 'data' array with valid and missing values according to + parametrized integer 'dtype'. + + Used to test dtype conversion with and without missing values. + """ + return pd.array( + list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100], + dtype=dtype, + ) + + +@pytest.fixture +def data_missing(dtype): + """ + Fixture returning array with exactly one NaN and one valid integer, + according to parametrized integer 'dtype'. + + Used to test dtype conversion with and without missing values. + """ + return pd.array([np.nan, 1], dtype=dtype) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture returning 'data' or 'data_missing' integer arrays. + + Used to test dtype conversion with and without missing values. + """ + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py new file mode 100644 index 00000000..5b9780e3 --- /dev/null +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -0,0 +1,367 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray +import pandas.core.ops as ops + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])], + ids=["add", "mul"], +) +def test_add_mul(dtype, opname, exp): + a = pd.array([0, 1, None, 3, 4], dtype=dtype) + b = pd.array([1, 2, 3, None, 5], dtype=dtype) + + # array / array + expected = pd.array(exp, dtype=dtype) + + op = getattr(operator, opname) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + + op = getattr(ops, "r" + opname) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + + +def test_sub(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a - b + expected = pd.array([1, 1, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_div(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a / b + expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) +def test_divide_by_zero(zero, negative): + # https://github.com/pandas-dev/pandas/issues/27398, GH#22793 + a = pd.array([0, 1, -1, None], dtype="Int64") + result = a / zero + expected = FloatingArray( + np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"), + np.array([False, False, False, True]), + ) + if negative: + expected *= -1 + tm.assert_extension_array_equal(result, expected) + + +def test_floordiv(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a // b + # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) + expected = pd.array([0, 2, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype): + # GH 48223: Aligns with non-masked floordiv + # but differs from numpy + # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740 + ser = pd.Series([0, 1], dtype=any_int_ea_dtype) + result = 1 // ser + expected = pd.Series([np.inf, 1.0], dtype="Float64") + tm.assert_series_equal(result, expected) + + ser_non_nullable = ser.astype(ser.dtype.numpy_dtype) + result = 1 // ser_non_nullable + expected = expected.astype(np.float64) + tm.assert_series_equal(result, expected) + + +def test_mod(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a % b + expected = pd.array([0, 0, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_pow_scalar(): + a = pd.array([-1, 0, 1, None, 2], dtype="Int64") + result = a**0 + expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a**1 + expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a**pd.NA + expected = pd.array([None, None, 1, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a**np.nan + expected = FloatingArray( + np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"), + np.array([False, False, False, True, False]), + ) + tm.assert_extension_array_equal(result, expected) + + # reversed + a = a[1:] # Can't raise integers to negative powers. + + result = 0**a + expected = pd.array([1, 0, None, 0], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = 1**a + expected = pd.array([1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = pd.NA**a + expected = pd.array([1, None, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = np.nan**a + expected = FloatingArray( + np.array([1, np.nan, np.nan, np.nan], dtype="float64"), + np.array([False, False, True, False]), + ) + tm.assert_extension_array_equal(result, expected) + + +def test_pow_array(): + a = pd.array([0, 0, 0, 1, 1, 1, None, None, None]) + b = pd.array([0, 1, None, 0, 1, None, 0, 1, None]) + result = a**b + expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None]) + tm.assert_extension_array_equal(result, expected) + + +def test_rpow_one_to_na(): + # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 + arr = pd.array([np.nan, np.nan], dtype="Int64") + result = np.array([1.0, 2.0]) ** arr + expected = pd.array([1.0, np.nan], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("other", [0, 0.5]) +def test_numpy_zero_dim_ndarray(other): + arr = pd.array([1, None, 2]) + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = "|".join( + [ + r"can only perform ops with numeric values", + r"IntegerArray cannot perform the operation mod", + r"unsupported operand type", + r"can only concatenate str \(not \"int\"\) to str", + "not all arguments converted during string", + "ufunc '.*' not supported for the input types, and the inputs could not", + "ufunc '.*' did not contain a loop with signature matching types", + "Addition/subtraction of integers and integer-arrays with Timestamp", + ] + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + str_ser = pd.Series("foo", index=s.index) + # with pytest.raises(TypeError, match=msg): + if all_arithmetic_operators in [ + "__mul__", + "__rmul__", + ]: # (data[~data.isna()] >= 0).all(): + res = ops(str_ser) + expected = pd.Series(["foo" * x for x in data], index=s.index) + tm.assert_series_equal(res, expected) + else: + with pytest.raises(TypeError, match=msg): + ops(str_ser) + + msg = "|".join( + [ + "can only perform ops with numeric values", + "cannot perform .* with this index type: DatetimeArray", + "Addition/subtraction of integers and integer-arrays " + "with DatetimeArray is no longer supported. *", + "unsupported operand type", + r"can only concatenate str \(not \"int\"\) to str", + "not all arguments converted during string", + "cannot subtract DatetimeArray from ndarray", + ] + ) + with pytest.raises(TypeError, match=msg): + ops(pd.Series(pd.date_range("20180101", periods=len(s)))) + + +# Various +# ----------------------------------------------------------------------------- + + +# TODO test unsigned overflow + + +def test_arith_coerce_scalar(data, all_arithmetic_operators): + op = tm.get_op_from_name(all_arithmetic_operators) + s = pd.Series(data) + other = 0.01 + + result = op(s, other) + expected = op(s.astype(float), other) + expected = expected.astype("Float64") + + # rmod results in NaN that wasn't NA in original nullable Series -> unmask it + if all_arithmetic_operators == "__rmod__": + mask = (s == 0).fillna(False).to_numpy(bool) + expected.array._mask[mask] = False + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("other", [1.0, np.array(1.0)]) +def test_arithmetic_conversion(all_arithmetic_operators, other): + # if we have a float operand we should have a float result + # if that is equal to an integer + op = tm.get_op_from_name(all_arithmetic_operators) + + s = pd.Series([1, 2, 3], dtype="Int64") + result = op(s, other) + assert result.dtype == "Float64" + + +def test_cross_type_arithmetic(): + + df = pd.DataFrame( + { + "A": pd.Series([1, 2, np.nan], dtype="Int64"), + "B": pd.Series([1, np.nan, 3], dtype="UInt8"), + "C": [1, 2, 3], + } + ) + + result = df.A + df.C + expected = pd.Series([2, 4, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = (df.A + df.C) * 3 == 12 + expected = pd.Series([False, True, None], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = df.A + df.B + expected = pd.Series([2, np.nan, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", ["mean"]) +def test_reduce_to_float(op): + # some reduce ops always return float, even if the result + # is a rounded number + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, float) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "source, neg_target, abs_target", + [ + ([1, 2, 3], [-1, -2, -3], [1, 2, 3]), + ([1, 2, None], [-1, -2, None], [1, 2, None]), + ([-1, 0, 1], [1, 0, -1], [1, 0, 1]), + ], +) +def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target): + dtype = any_signed_int_ea_dtype + arr = pd.array(source, dtype=dtype) + neg_result, pos_result, abs_result = -arr, +arr, abs(arr) + neg_target = pd.array(neg_target, dtype=dtype) + abs_target = pd.array(abs_target, dtype=dtype) + + tm.assert_extension_array_equal(neg_result, neg_target) + tm.assert_extension_array_equal(pos_result, arr) + assert not tm.shares_memory(pos_result, arr) + tm.assert_extension_array_equal(abs_result, abs_target) + + +def test_values_multiplying_large_series_by_NA(): + # GH#33701 + + result = pd.NA * pd.Series(np.zeros(10001)) + expected = pd.Series([pd.NA] * 10001) + + tm.assert_series_equal(result, expected) + + +def test_bitwise(dtype): + left = pd.array([1, None, 3, 4], dtype=dtype) + right = pd.array([None, 3, 5, 4], dtype=dtype) + + result = left | right + expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = left & right + expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = left ^ right + expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + # TODO: desired behavior when operating with boolean? defer? + + floats = right.astype("Float64") + with pytest.raises(TypeError, match="unsupported operand type"): + left | floats + with pytest.raises(TypeError, match="unsupported operand type"): + left & floats + with pytest.raises(TypeError, match="unsupported operand type"): + left ^ floats diff --git a/pandas/tests/arrays/integer/test_comparison.py b/pandas/tests/arrays/integer/test_comparison.py new file mode 100644 index 00000000..3bbf6866 --- /dev/null +++ b/pandas/tests/arrays/integer/test_comparison.py @@ -0,0 +1,38 @@ +import pytest + +import pandas as pd +from pandas.tests.arrays.masked_shared import ( + ComparisonOps, + NumericOps, +) + + +class TestComparisonOps(NumericOps, ComparisonOps): + @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1]) + def test_scalar(self, other, comparison_op, dtype): + ComparisonOps.test_scalar(self, other, comparison_op, dtype) + + def test_compare_to_int(self, dtype, comparison_op): + # GH 28930 + op_name = f"__{comparison_op.__name__}__" + s1 = pd.Series([1, None, 3], dtype=dtype) + s2 = pd.Series([1, None, 3], dtype="float") + + method = getattr(s1, op_name) + result = method(2) + + method = getattr(s2, op_name) + expected = method(2).astype("boolean") + expected[s2.isna()] = pd.NA + + self.assert_series_equal(result, expected) + + +def test_equals(): + # GH-30652 + # equals is generally tested in /tests/extension/base/methods, but this + # specifically tests that two arrays of the same class but different dtype + # do not evaluate equal + a1 = pd.array([1, 2, None], dtype="Int64") + a2 = pd.array([1, 2, None], dtype="Int32") + assert a1.equals(a2) is False diff --git a/pandas/tests/arrays/integer/test_concat.py b/pandas/tests/arrays/integer/test_concat.py new file mode 100644 index 00000000..feba574d --- /dev/null +++ b/pandas/tests/arrays/integer/test_concat.py @@ -0,0 +1,69 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Int64", "Int64"], "Int64"), + (["UInt64", "UInt64"], "UInt64"), + (["Int8", "Int8"], "Int8"), + (["Int8", "Int16"], "Int16"), + (["UInt8", "Int8"], "Int16"), + (["Int32", "UInt32"], "Int64"), + (["Int64", "UInt64"], "Float64"), + (["Int64", "boolean"], "object"), + (["UInt8", "boolean"], "object"), + ], +) +def test_concat_series(to_concat_dtypes, result_dtype): + # we expect the same dtypes as we would get with non-masked inputs, + # just masked where available. + + result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes]) + expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) + + # order doesn't matter for result + result = pd.concat( + [pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]] + ) + expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( + result_dtype + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "to_concat_dtypes, result_dtype", + [ + (["Int64", "int64"], "Int64"), + (["UInt64", "uint64"], "UInt64"), + (["Int8", "int8"], "Int8"), + (["Int8", "int16"], "Int16"), + (["UInt8", "int8"], "Int16"), + (["Int32", "uint32"], "Int64"), + (["Int64", "uint64"], "Float64"), + (["Int64", "bool"], "object"), + (["UInt8", "bool"], "object"), + ], +) +def test_concat_series_with_numpy(to_concat_dtypes, result_dtype): + # we expect the same dtypes as we would get with non-masked inputs, + # just masked where available. + + s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0]) + s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1])) + result = pd.concat([s1, s2], ignore_index=True) + expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype) + tm.assert_series_equal(result, expected) + + # order doesn't matter for result + result = pd.concat([s2, s1], ignore_index=True) + expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py new file mode 100644 index 00000000..43ef46dd --- /dev/null +++ b/pandas/tests/arrays/integer/test_construction.py @@ -0,0 +1,236 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_integer +from pandas.core.arrays import IntegerArray +from pandas.core.arrays.integer import ( + Int8Dtype, + Int32Dtype, + Int64Dtype, +) + + +@pytest.fixture(params=[pd.array, IntegerArray._from_sequence]) +def constructor(request): + """Fixture returning parametrized IntegerArray from given sequence. + + Used to test dtype conversions. + """ + return request.param + + +def test_uses_pandas_na(): + a = pd.array([1, None], dtype=Int64Dtype()) + assert a[1] is pd.NA + + +def test_from_dtype_from_float(data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype(dtype.type)) + result = pd.Series(dropped, dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + +def test_conversions(data_missing): + # astype to object series + df = pd.DataFrame({"A": data_missing}) + result = df["A"].astype("object") + expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A") + tm.assert_series_equal(result, expected) + + # convert to object ndarray + # we assert that we are exactly equal + # including type conversions of scalars + result = df["A"].astype("object").values + expected = np.array([pd.NA, 1], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + for r, e in zip(result, expected): + if pd.isnull(r): + assert pd.isnull(e) + elif is_integer(r): + assert r == e + assert is_integer(e) + else: + assert r == e + assert type(r) == type(e) + + +def test_integer_array_constructor(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + expected = pd.array([1, 2, 3, np.nan], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + msg = r".* should be .* numpy array. Use the 'pd.array' function instead" + with pytest.raises(TypeError, match=msg): + IntegerArray(values.tolist(), mask) + + with pytest.raises(TypeError, match=msg): + IntegerArray(values, mask.tolist()) + + with pytest.raises(TypeError, match=msg): + IntegerArray(values.astype(float), mask) + msg = r"__init__\(\) missing 1 required positional argument: 'mask'" + with pytest.raises(TypeError, match=msg): + IntegerArray(values) + + +def test_integer_array_constructor_copy(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = IntegerArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +@pytest.mark.parametrize( + "a, b", + [ + ([1, None], [1, np.nan]), + ([None], [np.nan]), + ([None, np.nan], [np.nan, np.nan]), + ([np.nan, np.nan], [np.nan, np.nan]), + ], +) +def test_to_integer_array_none_is_nan(a, b): + result = pd.array(a, dtype="Int64") + expected = pd.array(b, dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + "foo", + 1, + 1.0, + pd.date_range("20130101", periods=2), + np.array(["foo"]), + [[1, 2], [3, 4]], + [np.nan, {"a": 1}], + ], +) +def test_to_integer_array_error(values): + # error in converting existing arrays to IntegerArrays + msg = "|".join( + [ + r"cannot be converted to IntegerDtype", + r"invalid literal for int\(\) with base 10:", + r"values must be a 1D list-like", + r"Cannot pass scalar", + r"int\(\) argument must be a string", + ] + ) + with pytest.raises((ValueError, TypeError), match=msg): + pd.array(values, dtype="Int64") + + with pytest.raises((ValueError, TypeError), match=msg): + IntegerArray._from_sequence(values) + + +def test_to_integer_array_inferred_dtype(constructor): + # if values has dtype -> respect it + result = constructor(np.array([1, 2], dtype="int8")) + assert result.dtype == Int8Dtype() + result = constructor(np.array([1, 2], dtype="int32")) + assert result.dtype == Int32Dtype() + + # if values have no dtype -> always int64 + result = constructor([1, 2]) + assert result.dtype == Int64Dtype() + + +def test_to_integer_array_dtype_keyword(constructor): + result = constructor([1, 2], dtype="Int8") + assert result.dtype == Int8Dtype() + + # if values has dtype -> override it + result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32") + assert result.dtype == Int32Dtype() + + +def test_to_integer_array_float(): + result = IntegerArray._from_sequence([1.0, 2.0]) + expected = pd.array([1, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): + IntegerArray._from_sequence([1.5, 2.0]) + + # for float dtypes, the itemsize is not preserved + result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32")) + assert result.dtype == Int64Dtype() + + +def test_to_integer_array_str(): + result = IntegerArray._from_sequence(["1", "2", None]) + expected = pd.array([1, 2, np.nan], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): + IntegerArray._from_sequence(["1", "2", ""]) + + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): + IntegerArray._from_sequence(["1.5", "2.0"]) + + +@pytest.mark.parametrize( + "bool_values, int_values, target_dtype, expected_dtype", + [ + ([False, True], [0, 1], Int64Dtype(), Int64Dtype()), + ([False, True], [0, 1], "Int64", Int64Dtype()), + ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()), + ], +) +def test_to_integer_array_bool( + constructor, bool_values, int_values, target_dtype, expected_dtype +): + result = constructor(bool_values, dtype=target_dtype) + assert result.dtype == expected_dtype + expected = pd.array(int_values, dtype=target_dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, to_dtype, result_dtype", + [ + (np.array([1], dtype="int64"), None, Int64Dtype), + (np.array([1, np.nan]), None, Int64Dtype), + (np.array([1, np.nan]), "int8", Int8Dtype), + ], +) +def test_to_integer_array(values, to_dtype, result_dtype): + # convert existing arrays to IntegerArrays + result = IntegerArray._from_sequence(values, dtype=to_dtype) + assert result.dtype == result_dtype() + expected = pd.array(values, dtype=result_dtype()) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py new file mode 100644 index 00000000..1566476c --- /dev/null +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -0,0 +1,297 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.generic import ABCIndex + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.integer import ( + Int8Dtype, + UInt32Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == "i" + else: + assert np.dtype(dtype.type).kind == "u" + assert dtype.name is not None + + +@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) +def test_preserve_dtypes(op): + # TODO(#22346): preserve Int64 dtype + # for ops that enable (mean would actually work here + # but generally it is a float return value) + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": pd.array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + if op in {"sum", "prod", "min", "max"}: + assert isinstance(result, np.int64) + else: + assert isinstance(result, int) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +def test_astype_nansafe(): + # see gh-22343 + arr = pd.array([np.nan, 1, 2], dtype="Int8") + msg = "cannot convert NA to integer" + + with pytest.raises(ValueError, match=msg): + arr.astype("uint32") + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_construct_index(all_data, dropna): + # ensure that we do not coerce to Float64Index, rather + # keep as Index + + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Index(pd.array(other, dtype=all_data.dtype)) + expected = pd.Index(other, dtype=all_data.dtype) + assert all_data.dtype == expected.dtype # dont coerce to object + + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_astype_index(all_data, dropna): + # as an int/uint index to Index + + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + + dtype = all_data.dtype + idx = pd.Index._with_infer(np.array(other)) + assert isinstance(idx, ABCIndex) + + result = idx.astype(dtype) + expected = idx.astype(object).astype(dtype) + tm.assert_index_equal(result, expected) + + +def test_astype(all_data): + all_data = all_data[:10] + + ints = all_data[~all_data.isna()] + mixed = all_data + dtype = Int8Dtype() + + # coerce to same type - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype) + expected = pd.Series(ints) + tm.assert_series_equal(result, expected) + + # coerce to same other - ints + s = pd.Series(ints) + result = s.astype(dtype) + expected = pd.Series(ints, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype.numpy_dtype) + expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) + tm.assert_series_equal(result, expected) + + # coerce to same type - mixed + s = pd.Series(mixed) + result = s.astype(all_data.dtype) + expected = pd.Series(mixed) + tm.assert_series_equal(result, expected) + + # coerce to same other - mixed + s = pd.Series(mixed) + result = s.astype(dtype) + expected = pd.Series(mixed, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - mixed + s = pd.Series(mixed) + msg = "cannot convert NA to integer" + with pytest.raises(ValueError, match=msg): + s.astype(all_data.dtype.numpy_dtype) + + # coerce to object + s = pd.Series(mixed) + result = s.astype("object") + expected = pd.Series(np.asarray(mixed)) + tm.assert_series_equal(result, expected) + + +def test_astype_copy(): + arr = pd.array([1, 2, 3, None], dtype="Int64") + orig = pd.array([1, 2, 3, None], dtype="Int64") + + # copy=True -> ensure both data and mask are actual copies + result = arr.astype("Int64", copy=True) + assert result is not arr + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + # copy=False + result = arr.astype("Int64", copy=False) + assert result is arr + assert np.shares_memory(result._data, arr._data) + assert np.shares_memory(result._mask, arr._mask) + result[0] = 10 + assert arr[0] == 10 + result[0] = pd.NA + assert arr[0] is pd.NA + + # astype to different dtype -> always needs a copy -> even with copy=False + # we need to ensure that also the mask is actually copied + arr = pd.array([1, 2, 3, None], dtype="Int64") + orig = pd.array([1, 2, 3, None], dtype="Int64") + + result = arr.astype("Int32", copy=False) + assert not tm.shares_memory(result, arr) + result[0] = 10 + tm.assert_extension_array_equal(arr, orig) + result[0] = pd.NA + tm.assert_extension_array_equal(arr, orig) + + +def test_astype_to_larger_numpy(): + a = pd.array([1, 2], dtype="Int32") + result = a.astype("int64") + expected = np.array([1, 2], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + a = pd.array([1, 2], dtype="UInt32") + result = a.astype("uint64") + expected = np.array([1, 2], dtype="uint64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) +def test_astype_specific_casting(dtype): + s = pd.Series([1, 2, 3], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + s = pd.Series([1, 2, 3, None], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_astype_floating(): + arr = pd.array([1, 2, None], dtype="Int64") + result = arr.astype("Float64") + expected = pd.array([1.0, 2.0, None], dtype="Float64") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_dt64(): + # GH#32435 + arr = pd.array([1, 2, 3, pd.NA]) * 10**9 + + result = arr.astype("datetime64[ns]") + + expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +def test_construct_cast_invalid(dtype): + + msg = "cannot safely" + arr = [1.2, 2.3, 3.7] + with pytest.raises(TypeError, match=msg): + pd.array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + arr = [1.2, 2.3, 3.7, np.nan] + with pytest.raises(TypeError, match=msg): + pd.array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + +@pytest.mark.parametrize("in_series", [True, False]) +def test_to_numpy_na_nan(in_series): + a = pd.array([0, 1, None], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([0.0, 1.0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="int64", na_value=-1) + expected = np.array([0, 1, -1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="bool", na_value=False) + expected = np.array([False, True, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("in_series", [True, False]) +@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) +def test_to_numpy_dtype(dtype, in_series): + a = pd.array([0, 1], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype=dtype) + expected = np.array([0, 1], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"]) +def test_to_numpy_na_raises(dtype): + a = pd.array([0, 1, None], dtype="Int64") + with pytest.raises(ValueError, match=dtype): + a.to_numpy(dtype=dtype) + + +def test_astype_str(): + a = pd.array([1, 2, None], dtype="Int64") + expected = np.array(["1", "2", ""], dtype=f"{tm.ENDIAN}U21") + + tm.assert_numpy_array_equal(a.astype(str), expected) + tm.assert_numpy_array_equal(a.astype("str"), expected) + + +def test_astype_boolean(): + # https://github.com/pandas-dev/pandas/issues/31102 + a = pd.array([1, 0, -1, 2, None], dtype="Int64") + result = a.astype("boolean") + expected = pd.array([True, False, True, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py new file mode 100644 index 00000000..73c8d4e6 --- /dev/null +++ b/pandas/tests/arrays/integer/test_function.py @@ -0,0 +1,201 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import FloatingArray + + +@pytest.mark.parametrize("ufunc", [np.abs, np.sign]) +# np.sign emits a warning with nans, +@pytest.mark.filterwarnings("ignore:invalid value encountered in sign") +def test_ufuncs_single_int(ufunc): + a = pd.array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = pd.array(ufunc(a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64")) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = pd.array([1, 2, -3, np.nan]) + with np.errstate(invalid="ignore"): + result = ufunc(a) + expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid="ignore"): + result = ufunc(s) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.add, np.subtract]) +def test_ufuncs_binary_int(ufunc): + # two IntegerArrays + a = pd.array([1, 2, -3, np.nan]) + result = ufunc(a, a) + expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with scalar + result = ufunc(a, 1) + expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +def test_ufunc_binary_output(): + a = pd.array([1, 2, np.nan]) + result = np.modf(a) + expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) + expected = (pd.array(expected[0]), pd.array(expected[1])) + + assert isinstance(result, tuple) + assert len(result) == 2 + + for x, y in zip(result, expected): + tm.assert_extension_array_equal(x, y) + + +@pytest.mark.parametrize("values", [[0, 1], [0, None]]) +def test_ufunc_reduce_raises(values): + arr = pd.array(values) + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) + + +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) +def test_stat_method(pandasmethname, kwargs): + s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result + + +def test_value_counts_na(): + arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") + result = arr.value_counts(dropna=False) + ex_index = pd.Index([1, 2, pd.NA], dtype="Int64") + assert ex_index.dtype == "Int64" + expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + assert expected.index.dtype == arr.dtype + tm.assert_series_equal(result, expected) + + +def test_value_counts_empty(): + # https://github.com/pandas-dev/pandas/issues/33317 + ser = pd.Series([], dtype="Int64") + result = ser.value_counts() + idx = pd.Index([], dtype=ser.dtype) + assert idx.dtype == ser.dtype + expected = pd.Series([], index=idx, dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(): + # GH 33172 + ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + assert expected.index.dtype == ser.dtype + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 4]) +def test_integer_array_sum(skipna, min_count, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([1, 2, 3, None], dtype=dtype) + result = arr.sum(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 6 + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_integer_array_min_max(skipna, method, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([0, 1, None], dtype=dtype) + func = getattr(arr, method) + result = func(skipna=skipna) + if skipna: + assert result == (0 if method == "min" else 1) + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 9]) +def test_integer_array_prod(skipna, min_count, any_int_ea_dtype): + dtype = any_int_ea_dtype + arr = pd.array([1, 2, None], dtype=dtype) + result = arr.prod(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 2 + else: + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] +) +def test_integer_array_numpy_sum(values, expected): + arr = pd.array(values, dtype="Int64") + result = np.sum(arr) + assert result == expected + + +@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"]) +def test_dataframe_reductions(op): + # https://github.com/pandas-dev/pandas/pull/32867 + # ensure the integers are not cast to float during reductions + df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")}) + result = df.max() + assert isinstance(result["a"], np.int64) + + +# TODO(jreback) - these need testing / are broken + +# shift + +# set_index (destroys type) diff --git a/pandas/tests/arrays/integer/test_indexing.py b/pandas/tests/arrays/integer/test_indexing.py new file mode 100644 index 00000000..4b953d69 --- /dev/null +++ b/pandas/tests/arrays/integer/test_indexing.py @@ -0,0 +1,19 @@ +import pandas as pd +import pandas._testing as tm + + +def test_array_setitem_nullable_boolean_mask(): + # GH 31446 + ser = pd.Series([1, 2], dtype="Int64") + result = ser.where(ser > 1) + expected = pd.Series([pd.NA, 2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_array_setitem(): + # GH 31446 + arr = pd.Series([1, 2], dtype="Int64").array + arr[arr > 1] = 1 + + expected = pd.array([1, 1], dtype="Int64") + tm.assert_extension_array_equal(arr, expected) diff --git a/pandas/tests/arrays/integer/test_repr.py b/pandas/tests/arrays/integer/test_repr.py new file mode 100644 index 00000000..35d07bda --- /dev/null +++ b/pandas/tests/arrays/integer/test_repr.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == "i" + else: + assert np.dtype(dtype.type).kind == "u" + assert dtype.name is not None + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (Int8Dtype(), "Int8Dtype()"), + (Int16Dtype(), "Int16Dtype()"), + (Int32Dtype(), "Int32Dtype()"), + (Int64Dtype(), "Int64Dtype()"), + (UInt8Dtype(), "UInt8Dtype()"), + (UInt16Dtype(), "UInt16Dtype()"), + (UInt32Dtype(), "UInt32Dtype()"), + (UInt64Dtype(), "UInt64Dtype()"), + ], +) +def test_repr_dtype(dtype, expected): + assert repr(dtype) == expected + + +def test_repr_array(): + result = repr(pd.array([1, None, 3])) + expected = "\n[1, , 3]\nLength: 3, dtype: Int64" + assert result == expected + + +def test_repr_array_long(): + data = pd.array([1, 2, None] * 1000) + expected = ( + "\n" + "[ 1, 2, , 1, 2, , 1, 2, , 1,\n" + " ...\n" + " , 1, 2, , 1, 2, , 1, 2, ]\n" + "Length: 3000, dtype: Int64" + ) + result = repr(data) + assert result == expected + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({"A": data_missing}) + result = repr(df) + expected = " A\n0 \n1 1" + assert result == expected diff --git a/pandas/tests/arrays/interval/__init__.py b/pandas/tests/arrays/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/interval/test_astype.py b/pandas/tests/arrays/interval/test_astype.py new file mode 100644 index 00000000..d7a2140f --- /dev/null +++ b/pandas/tests/arrays/interval/test_astype.py @@ -0,0 +1,28 @@ +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + Index, + IntervalIndex, +) +import pandas._testing as tm + + +class TestAstype: + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype_categorical_retains_ordered(self, ordered): + index = IntervalIndex.from_breaks(range(5)) + arr = index._data + + dtype = CategoricalDtype(None, ordered=ordered) + + expected = Categorical(list(arr), ordered=ordered) + result = arr.astype(dtype) + assert result.ordered is ordered + tm.assert_categorical_equal(result, expected) + + # test IntervalIndex.astype while we're at it. + result = index.astype(dtype) + expected = Index(expected) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py new file mode 100644 index 00000000..2a6bea32 --- /dev/null +++ b/pandas/tests/arrays/interval/test_interval.py @@ -0,0 +1,415 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Index, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture( + params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])), + (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)), + (date_range("20170101", periods=3), date_range("20170102", periods=3)), + ( + date_range("20170101", periods=3, tz="US/Eastern"), + date_range("20170102", periods=3, tz="US/Eastern"), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestAttributes: + @pytest.mark.parametrize( + "left, right", + [ + (0, 1), + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timestamp("2018-01-02")), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-02", tz="US/Eastern"), + ), + ], + ) + @pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex]) + def test_is_empty(self, constructor, left, right, closed): + # GH27219 + tuples = [(left, left), (left, right), np.nan] + expected = np.array([closed != "both", False, False]) + result = constructor.from_tuples(tuples, closed=closed).is_empty + tm.assert_numpy_array_equal(result, expected) + + +class TestMethods: + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + Interval(0, 1, closed="right"), + IntervalArray.from_breaks([1, 2, 3, 4], closed="right"), + ], + ) + def test_where_raises(self, other): + # GH#45768 The IntervalArray methods raises; the Series method coerces + ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) + mask = np.array([True, False, True]) + match = "'value.closed' is 'right', expected 'left'." + with pytest.raises(ValueError, match=match): + ser.array._where(mask, other) + + res = ser.where(mask, other=other) + expected = ser.astype(object).where(mask, other) + tm.assert_series_equal(res, expected) + + def test_shift(self): + # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 + a = IntervalArray.from_breaks([1, 2, 3]) + result = a.shift() + # int -> float + expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) + tm.assert_interval_array_equal(result, expected) + + def test_shift_datetime(self): + # GH#31502, GH#31504 + a = IntervalArray.from_breaks(date_range("2000", periods=4)) + result = a.shift(2) + expected = a.take([-1, -1, 0], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + result = a.shift(-1) + expected = a.take([1, 2, -1], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + +class TestSetitem: + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + result = IntervalArray.from_arrays(left, right) + + if result.dtype.subtype.kind not in ["m", "M"]: + msg = "'value' should be an interval type, got <.*NaTType'> instead." + with pytest.raises(TypeError, match=msg): + result[0] = pd.NaT + if result.dtype.subtype.kind in ["i", "u"]: + msg = "Cannot set float NaN to integer-backed IntervalArray" + # GH#45484 TypeError, not ValueError, matches what we get with + # non-NA un-holdable value. + with pytest.raises(TypeError, match=msg): + result[0] = np.NaN + return + + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + def test_setitem_mismatched_closed(self): + arr = IntervalArray.from_breaks(range(4)) + orig = arr.copy() + other = arr.set_closed("both") + + msg = "'value.closed' is 'both', expected 'right'" + with pytest.raises(ValueError, match=msg): + arr[0] = other[0] + with pytest.raises(ValueError, match=msg): + arr[:1] = other[:1] + with pytest.raises(ValueError, match=msg): + arr[:0] = other[:0] + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1] + with pytest.raises(ValueError, match=msg): + arr[:] = list(other[::-1]) + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1].astype(object) + with pytest.raises(ValueError, match=msg): + arr[:] = other[::-1].astype("category") + + # empty list should be no-op + arr[:0] = [] + tm.assert_interval_array_equal(arr, orig) + + +def test_repr(): + # GH 25022 + arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) + result = repr(arr) + expected = ( + "\n" + "[(0, 1], (1, 2]]\n" + "Length: 2, dtype: interval[int64, right]" + ) + assert result == expected + + +class TestReductions: + def test_min_max_invalid_axis(self, left_right_dtypes): + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + msg = "`axis` must be fewer than the number of dimensions" + for axis in [-2, 1]: + with pytest.raises(ValueError, match=msg): + arr.min(axis=axis) + with pytest.raises(ValueError, match=msg): + arr.max(axis=axis) + + msg = "'>=' not supported between" + with pytest.raises(TypeError, match=msg): + arr.min(axis="foo") + with pytest.raises(TypeError, match=msg): + arr.max(axis="foo") + + def test_min_max(self, left_right_dtypes, index_or_series_or_array): + # GH#44746 + left, right = left_right_dtypes + left = left.copy(deep=True) + right = right.copy(deep=True) + arr = IntervalArray.from_arrays(left, right) + + # The expected results below are only valid if monotonic + assert left.is_monotonic_increasing + assert Index(arr).is_monotonic_increasing + + MIN = arr[0] + MAX = arr[-1] + + indexer = np.arange(len(arr)) + np.random.shuffle(indexer) + arr = arr.take(indexer) + + arr_na = arr.insert(2, np.nan) + + arr = index_or_series_or_array(arr) + arr_na = index_or_series_or_array(arr_na) + + for skipna in [True, False]: + res = arr.min(skipna=skipna) + assert res == MIN + assert type(res) == type(MIN) + + res = arr.max(skipna=skipna) + assert res == MAX + assert type(res) == type(MAX) + + res = arr_na.min(skipna=False) + assert np.isnan(res) + res = arr_na.max(skipna=False) + assert np.isnan(res) + + res = arr_na.min(skipna=True) + assert res == MIN + assert type(res) == type(MIN) + res = arr_na.max(skipna=True) + assert res == MAX + assert type(res) == type(MAX) + + +# ---------------------------------------------------------------------------- +# Arrow interaction + + +pyarrow_skip = td.skip_if_no("pyarrow") + + +@pyarrow_skip +def test_arrow_extension_type(): + import pyarrow as pa + + from pandas.core.arrays.arrow.extension_types import ArrowIntervalType + + p1 = ArrowIntervalType(pa.int64(), "left") + p2 = ArrowIntervalType(pa.int64(), "left") + p3 = ArrowIntervalType(pa.int64(), "right") + + assert p1.closed == "left" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pyarrow_skip +def test_arrow_array(): + import pyarrow as pa + + from pandas.core.arrays.arrow.extension_types import ArrowIntervalType + + intervals = pd.interval_range(1, 5, freq=1).array + + result = pa.array(intervals) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == intervals.closed + assert result.type.subtype == pa.int64() + assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64")) + assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64")) + + expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)]) + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(intervals, type=expected.type) + assert result.equals(expected) + + # unsupported conversions + with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): + pa.array(intervals, type="float64") + + with pytest.raises(TypeError, match="different 'subtype'"): + pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) + + +@pyarrow_skip +def test_arrow_array_missing(): + import pyarrow as pa + + from pandas.core.arrays.arrow.extension_types import ArrowIntervalType + + arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) + arr[1] = None + + result = pa.array(arr) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == arr.closed + assert result.type.subtype == pa.float64() + + # fields have missing values (not NaN) + left = pa.array([0.0, None, 2.0], type="float64") + right = pa.array([1.0, None, 3.0], type="float64") + assert result.storage.field("left").equals(left) + assert result.storage.field("right").equals(right) + + # structarray itself also has missing values on the array level + vals = [ + {"left": 0.0, "right": 1.0}, + {"left": None, "right": None}, + {"left": 2.0, "right": 3.0}, + ] + expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False])) + assert result.storage.equals(expected) + + +@pyarrow_skip +@pytest.mark.parametrize( + "breaks", + [[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")], + ids=["float", "datetime64[ns]"], +) +def test_arrow_table_roundtrip(breaks): + import pyarrow as pa + + from pandas.core.arrays.arrow.extension_types import ArrowIntervalType + + arr = IntervalArray.from_breaks(breaks) + arr[1] = None + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowIntervalType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.IntervalDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) + + # GH-41040 + table = pa.table( + [pa.chunked_array([], type=table.column(0).type)], schema=table.schema + ) + result = table.to_pandas() + tm.assert_frame_equal(result, expected[0:0]) + + +@pyarrow_skip +@pytest.mark.parametrize( + "breaks", + [[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")], + ids=["float", "datetime64[ns]"], +) +def test_arrow_table_roundtrip_without_metadata(breaks): + import pyarrow as pa + + arr = IntervalArray.from_breaks(breaks) + arr[1] = None + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + # remove the metadata + table = table.replace_schema_metadata() + assert table.schema.metadata is None + + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.IntervalDtype) + tm.assert_frame_equal(result, df) + + +@pyarrow_skip +def test_from_arrow_from_raw_struct_array(): + # in case pyarrow lost the Interval extension type (eg on parquet roundtrip + # with datetime64[ns] subtype, see GH-45881), still allow conversion + # from arrow to IntervalArray + import pyarrow as pa + + arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}]) + dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither") + + result = dtype.__from_arrow__(arr) + expected = IntervalArray.from_breaks( + np.array([0, 1, 2], dtype="int64"), closed="neither" + ) + tm.assert_extension_array_equal(result, expected) + + result = dtype.__from_arrow__(pa.chunked_array([arr])) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"]) +def test_interval_index_subtype(timezone, inclusive_endpoints_fixture): + # GH 46999 + dates = date_range("2022", periods=3, tz=timezone) + dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]" + result = IntervalIndex.from_arrays( + ["2022-01-01", "2022-01-02"], + ["2022-01-02", "2022-01-03"], + closed=inclusive_endpoints_fixture, + dtype=dtype, + ) + expected = IntervalIndex.from_arrays( + dates[:-1], dates[1:], closed=inclusive_endpoints_fixture + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py new file mode 100644 index 00000000..4853bec5 --- /dev/null +++ b/pandas/tests/arrays/interval/test_ops.py @@ -0,0 +1,93 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import numpy as np +import pytest + +from pandas import ( + Interval, + IntervalIndex, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture(params=[IntervalArray, IntervalIndex]) +def constructor(request): + """ + Fixture for testing both interval container classes. + """ + return request.param + + +@pytest.fixture( + params=[ + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timedelta("1 day")), + (0, 1), + ], + ids=lambda x: type(x[0]).__name__, +) +def start_shift(request): + """ + Fixture for generating intervals of different types from a start value + and a shift value that can be added to start to generate an endpoint. + """ + return request.param + + +class TestOverlaps: + def test_overlaps_interval(self, constructor, start_shift, closed, other_closed): + start, shift = start_shift + interval = Interval(start, start + 3 * shift, other_closed) + + # intervals: identical, nested, spanning, partial, adjacent, disjoint + tuples = [ + (start, start + 3 * shift), + (start + shift, start + 2 * shift), + (start - shift, start + 4 * shift), + (start + 2 * shift, start + 4 * shift), + (start + 3 * shift, start + 4 * shift), + (start + 4 * shift, start + 5 * shift), + ] + interval_container = constructor.from_tuples(tuples, closed) + + adjacent = interval.closed_right and interval_container.closed_left + expected = np.array([True, True, True, True, adjacent, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex]) + def test_overlaps_interval_container(self, constructor, other_constructor): + # TODO: modify this test when implemented + interval_container = constructor.from_breaks(range(5)) + other_container = other_constructor.from_breaks(range(5)) + with pytest.raises(NotImplementedError, match="^$"): + interval_container.overlaps(other_container) + + def test_overlaps_na(self, constructor, start_shift): + """NA values are marked as False""" + start, shift = start_shift + interval = Interval(start, start + shift) + + tuples = [ + (start, start + shift), + np.nan, + (start + 2 * shift, start + 3 * shift), + ] + interval_container = constructor.from_tuples(tuples) + + expected = np.array([True, False, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], + ids=lambda x: type(x).__name__, + ) + def test_overlaps_invalid_type(self, constructor, other): + interval_container = constructor.from_breaks(range(5)) + msg = f"`other` must be Interval-like, got {type(other).__name__}" + with pytest.raises(TypeError, match=msg): + interval_container.overlaps(other) diff --git a/pandas/tests/arrays/masked/__init__.py b/pandas/tests/arrays/masked/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py new file mode 100644 index 00000000..f4b571ca --- /dev/null +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -0,0 +1,248 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +# integer dtypes +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +scalars: list[Any] = [2] * len(arrays) +# floating dtypes +arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] +scalars += [0.2, 0.2] +# boolean +arrays += [pd.array([True, False, True, None], dtype="boolean")] +scalars += [False] + + +@pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays]) +def data(request): + """Fixture returning parametrized (array, scalar) tuple. + + Used to test equivalence of scalars, numpy arrays with array ops, and the + equivalence of DataFrame and Series ops. + """ + return request.param + + +def check_skip(data, op_name): + if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name: + pytest.skip("subtract not implemented for boolean") + + +def is_bool_not_implemented(data, op_name): + # match non-masked behavior + return data.dtype.kind == "b" and op_name.strip("_").lstrip("r") in [ + "pow", + "truediv", + "floordiv", + ] + + +# Test equivalence of scalars, numpy arrays with array ops +# ----------------------------------------------------------------------------- + + +def test_array_scalar_like_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) + + # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) + for scalar in [scalar, data.dtype.type(scalar)]: + if is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + op(data, scalar) + with pytest.raises(NotImplementedError, match=msg): + op(data, scalar_array) + else: + result = op(data, scalar) + expected = op(data, scalar_array) + tm.assert_extension_array_equal(result, expected) + + +def test_array_NA(data, all_arithmetic_operators): + data, _ = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + scalar = pd.NA + scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) + + mask = data._mask.copy() + + if is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + op(data, scalar) + # GH#45421 check op doesn't alter data._mask inplace + tm.assert_numpy_array_equal(mask, data._mask) + return + + result = op(data, scalar) + # GH#45421 check op doesn't alter data._mask inplace + tm.assert_numpy_array_equal(mask, data._mask) + + expected = op(data, scalar_array) + tm.assert_numpy_array_equal(mask, data._mask) + + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_equivalence(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) + pd_array = pd.array(numpy_array, dtype=data.dtype) + + if is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + op(data, numpy_array) + with pytest.raises(NotImplementedError, match=msg): + op(data, pd_array) + return + + result = op(data, numpy_array) + expected = op(data, pd_array) + tm.assert_extension_array_equal(result, expected) + + +# Test equivalence with Series and DataFrame ops +# ----------------------------------------------------------------------------- + + +def test_frame(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + # DataFrame with scalar + df = pd.DataFrame({"A": data}) + + if is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + op(df, scalar) + with pytest.raises(NotImplementedError, match=msg): + op(data, scalar) + return + + result = op(df, scalar) + expected = pd.DataFrame({"A": op(data, scalar)}) + tm.assert_frame_equal(result, expected) + + +def test_series(data, all_arithmetic_operators): + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + check_skip(data, all_arithmetic_operators) + + ser = pd.Series(data) + + others = [ + scalar, + np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype), + pd.array([scalar] * len(data), dtype=data.dtype), + pd.Series([scalar] * len(data), dtype=data.dtype), + ] + + for other in others: + if is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + op(ser, other) + + else: + result = op(ser, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + +# Test generic characteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_object(data, all_arithmetic_operators): + data, _ = data + + op = all_arithmetic_operators + opa = getattr(data, op) + + # 2d -> return NotImplemented + result = opa(pd.DataFrame({"A": data})) + assert result is NotImplemented + + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): + opa(np.arange(len(data)).reshape(-1, len(data))) + + +def test_error_len_mismatch(data, all_arithmetic_operators): + # operating with a list-like with non-matching length raises + data, scalar = data + op = tm.get_op_from_name(all_arithmetic_operators) + + other = [scalar] * (len(data) - 1) + + err = ValueError + msg = "|".join( + [ + r"operands could not be broadcast together with shapes \(3,\) \(4,\)", + r"operands could not be broadcast together with shapes \(4,\) \(3,\)", + ] + ) + if data.dtype.kind == "b" and all_arithmetic_operators.strip("_") in [ + "sub", + "rsub", + ]: + err = TypeError + msg = ( + r"numpy boolean subtract, the `\-` operator, is not supported, use " + r"the bitwise_xor, the `\^` operator, or the logical_xor function instead" + ) + elif is_bool_not_implemented(data, all_arithmetic_operators): + msg = "operator '.*' not implemented for bool dtypes" + err = NotImplementedError + + for other in [other, np.array(other)]: + with pytest.raises(err, match=msg): + op(data, other) + + s = pd.Series(data) + with pytest.raises(err, match=msg): + op(s, other) + + +@pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) +def test_unary_op_does_not_propagate_mask(data, op): + # https://github.com/pandas-dev/pandas/issues/39943 + data, _ = data + ser = pd.Series(data) + + if op == "__invert__" and data.dtype.kind == "f": + # we follow numpy in raising + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + getattr(ser, op)() + with pytest.raises(TypeError, match=msg): + getattr(data, op)() + with pytest.raises(TypeError, match=msg): + # Check that this is still the numpy behavior + getattr(data._data, op)() + + return + + result = getattr(ser, op)() + expected = result.copy(deep=True) + ser[0] = None + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py new file mode 100644 index 00000000..6b008132 --- /dev/null +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -0,0 +1,195 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pa = pytest.importorskip("pyarrow", minversion="1.0.1") + +from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] +arrays += [pd.array([True, False, True, None], dtype="boolean")] + + +@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) +def data(request): + """ + Fixture returning parametrized array from given dtype, including integer, + float and boolean + """ + return request.param + + +def test_arrow_array(data): + arr = pa.array(data) + expected = pa.array( + data.to_numpy(object, na_value=None), + type=pa.from_numpy_dtype(data.dtype.numpy_dtype), + ) + assert arr.equals(expected) + + +def test_arrow_roundtrip(data): + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + result = table.to_pandas() + assert result["a"].dtype == data.dtype + tm.assert_frame_equal(result, df) + + +def test_dataframe_from_arrow_types_mapper(): + def types_mapper(arrow_type): + if pa.types.is_boolean(arrow_type): + return pd.BooleanDtype() + elif pa.types.is_integer(arrow_type): + return pd.Int64Dtype() + + bools_array = pa.array([True, None, False], type=pa.bool_()) + ints_array = pa.array([1, None, 2], type=pa.int64()) + small_ints_array = pa.array([-1, 0, 7], type=pa.int8()) + record_batch = pa.RecordBatch.from_arrays( + [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] + ) + result = record_batch.to_pandas(types_mapper=types_mapper) + bools = pd.Series([True, None, False], dtype="boolean") + ints = pd.Series([1, None, 2], dtype="Int64") + small_ints = pd.Series([-1, 0, 7], dtype="Int64") + expected = pd.DataFrame({"bools": bools, "ints": ints, "small_ints": small_ints}) + tm.assert_frame_equal(result, expected) + + +def test_arrow_load_from_zero_chunks(data): + # GH-41040 + + df = pd.DataFrame({"a": data[0:0]}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + table = pa.table( + [pa.chunked_array([], type=table.field("a").type)], schema=table.schema + ) + result = table.to_pandas() + assert result["a"].dtype == data.dtype + tm.assert_frame_equal(result, df) + + +def test_arrow_from_arrow_uint(): + # https://github.com/pandas-dev/pandas/issues/31896 + # possible mismatch in types + + dtype = pd.UInt32Dtype() + result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) + expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") + + tm.assert_extension_array_equal(result, expected) + + +def test_arrow_sliced(data): + # https://github.com/pandas-dev/pandas/issues/38525 + + df = pd.DataFrame({"a": data}) + table = pa.table(df) + result = table.slice(2, None).to_pandas() + expected = df.iloc[2:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + # no missing values + df2 = df.fillna(data[0]) + table = pa.table(df2) + result = table.slice(2, None).to_pandas() + expected = df2.iloc[2:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def np_dtype_to_arrays(any_real_numpy_dtype): + """ + Fixture returning actual and expected dtype, pandas and numpy arrays and + mask from a given numpy dtype + """ + np_dtype = np.dtype(any_real_numpy_dtype) + pa_type = pa.from_numpy_dtype(np_dtype) + + # None ensures the creation of a bitmask buffer. + pa_array = pa.array([0, 1, 2, None], type=pa_type) + # Since masked Arrow buffer slots are not required to contain a specific + # value, assert only the first three values of the created np.array + np_expected = np.array([0, 1, 2], dtype=np_dtype) + mask_expected = np.array([True, True, True, False]) + return np_dtype, pa_array, np_expected, mask_expected + + +def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): + """ + Test conversion from pyarrow array to numpy array. + + Modifies the pyarrow buffer to contain padding and offset, which are + considered valid buffers by pyarrow. + + Also tests empty pyarrow arrays with non empty buffers. + See https://github.com/pandas-dev/pandas/issues/40896 + """ + np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays + data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + mask_buffer = pa_array.buffers()[0] + data_buffer = pa_array.buffers()[1] + data_buffer_bytes = pa_array.buffers()[1].to_pybytes() + + # Add trailing padding to the buffer. + data_buffer_trail = pa.py_buffer(data_buffer_bytes + b"\x00") + pa_array_trail = pa.Array.from_buffers( + type=pa_array.type, + length=len(pa_array), + buffers=[mask_buffer, data_buffer_trail], + offset=pa_array.offset, + ) + pa_array_trail.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + # Add offset to the buffer. + offset = b"\x00" * (pa_array.type.bit_width // 8) + data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes) + mask_buffer_offset = pa.py_buffer(b"\x0E") + pa_array_offset = pa.Array.from_buffers( + type=pa_array.type, + length=len(pa_array), + buffers=[mask_buffer_offset, data_buffer_offset], + offset=pa_array.offset + 1, + ) + pa_array_offset.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected) + tm.assert_numpy_array_equal(mask, mask_expected) + + # Empty array + np_expected_empty = np.array([], dtype=np_dtype) + mask_expected_empty = np.array([], dtype=np.bool_) + + pa_array_offset = pa.Array.from_buffers( + type=pa_array.type, + length=0, + buffers=[mask_buffer, data_buffer], + offset=pa_array.offset, + ) + pa_array_offset.validate() + data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) + tm.assert_numpy_array_equal(data[:3], np_expected_empty) + tm.assert_numpy_array_equal(mask, mask_expected_empty) + + +def test_from_arrow_type_error(data): + # ensure that __from_arrow__ returns a TypeError when getting a wrong + # array type + + arr = pa.array(data).cast("string") + with pytest.raises(TypeError, match=None): + # we don't test the exact error message, only the fact that it raises + # a TypeError is relevant + data.dtype.__from_arrow__(arr) diff --git a/pandas/tests/arrays/masked/test_function.py b/pandas/tests/arrays/masked/test_function.py new file mode 100644 index 00000000..9a86ef83 --- /dev/null +++ b/pandas/tests/arrays/masked/test_function.py @@ -0,0 +1,51 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +import pandas._testing as tm + +arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] +arrays += [ + pd.array([0.141, -0.268, 5.895, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES +] + + +@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) +def data(request): + """ + Fixture returning parametrized 'data' array with different integer and + floating point types + """ + return request.param + + +@pytest.fixture() +def numpy_dtype(data): + """ + Fixture returning numpy dtype from 'data' input array. + """ + # For integer dtype, the numpy conversion must be done to float + if is_integer_dtype(data): + numpy_dtype = float + else: + numpy_dtype = data.dtype.type + return numpy_dtype + + +def test_round(data, numpy_dtype): + # No arguments + result = data.round() + expected = pd.array( + np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype + ) + tm.assert_extension_array_equal(result, expected) + + # Decimals argument + result = data.round(decimals=2) + expected = pd.array( + np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2), + dtype=data.dtype, + ) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/masked/test_indexing.py b/pandas/tests/arrays/masked/test_indexing.py new file mode 100644 index 00000000..28ee451a --- /dev/null +++ b/pandas/tests/arrays/masked/test_indexing.py @@ -0,0 +1,60 @@ +import re + +import numpy as np +import pytest + +import pandas as pd + + +class TestSetitemValidation: + def _check_setitem_invalid(self, arr, invalid): + msg = f"Invalid value '{str(invalid)}' for dtype {arr.dtype}" + msg = re.escape(msg) + with pytest.raises(TypeError, match=msg): + arr[0] = invalid + + with pytest.raises(TypeError, match=msg): + arr[:] = invalid + + with pytest.raises(TypeError, match=msg): + arr[[0]] = invalid + + # FIXME: don't leave commented-out + # with pytest.raises(TypeError): + # arr[[0]] = [invalid] + + # with pytest.raises(TypeError): + # arr[[0]] = np.array([invalid], dtype=object) + + # Series non-coercion, behavior subject to change + ser = pd.Series(arr) + with pytest.raises(TypeError, match=msg): + ser[0] = invalid + # TODO: so, so many other variants of this... + + _invalid_scalars = [ + 1 + 2j, + "True", + "1", + "1.0", + pd.NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + + @pytest.mark.parametrize( + "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] + ) + def test_setitem_validation_scalar_bool(self, invalid): + arr = pd.array([True, False, None], dtype="boolean") + self._check_setitem_invalid(arr, invalid) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) + def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype): + arr = pd.array([1, 2, None], dtype=any_int_ea_dtype) + self._check_setitem_invalid(arr, invalid) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) + def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype): + arr = pd.array([1, 2, None], dtype=float_ea_dtype) + self._check_setitem_invalid(arr, invalid) diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py new file mode 100644 index 00000000..6174ae0a --- /dev/null +++ b/pandas/tests/arrays/masked_shared.py @@ -0,0 +1,155 @@ +""" +Tests shared by MaskedArray subclasses. +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base import BaseOpsUtil + + +class ComparisonOps(BaseOpsUtil): + def _compare_other(self, data, op, other): + + # array + result = pd.Series(op(data, other)) + expected = pd.Series(op(data._data, other), dtype="boolean") + + # fill the nan locations + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + # series + ser = pd.Series(data) + result = op(ser, other) + + expected = op(pd.Series(data._data), other) + + # fill the nan locations + expected[data._mask] = pd.NA + expected = expected.astype("boolean") + + tm.assert_series_equal(result, expected) + + # subclass will override to parametrize 'other' + def test_scalar(self, other, comparison_op, dtype): + op = comparison_op + left = pd.array([1, 0, None], dtype=dtype) + + result = op(left, other) + + if other is pd.NA: + expected = pd.array([None, None, None], dtype="boolean") + else: + values = op(left._data, other) + expected = pd.arrays.BooleanArray(values, left._mask, copy=True) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype)) + + +class NumericOps: + # Shared by IntegerArray and FloatingArray, not BooleanArray + + def test_searchsorted_nan(self, dtype): + # The base class casts to object dtype, for which searchsorted returns + # 0 from the left and 10 from the right. + arr = pd.array(range(10), dtype=dtype) + + assert arr.searchsorted(np.nan, side="left") == 10 + assert arr.searchsorted(np.nan, side="right") == 10 + + def test_no_shared_mask(self, data): + result = data + 1 + assert not tm.shares_memory(result, data) + + def test_array(self, comparison_op, dtype): + op = comparison_op + + left = pd.array([0, 1, 2, None, None, None], dtype=dtype) + right = pd.array([0, 1, None, 0, 1, None], dtype=dtype) + + result = op(left, right) + values = op(left._data, right._data) + mask = left._mask | right._mask + + expected = pd.arrays.BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal( + left, pd.array([0, 1, 2, None, None, None], dtype=dtype) + ) + tm.assert_extension_array_equal( + right, pd.array([0, 1, None, 0, 1, None], dtype=dtype) + ) + + def test_compare_with_booleanarray(self, comparison_op, dtype): + op = comparison_op + + left = pd.array([True, False, None] * 3, dtype="boolean") + right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype) + other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean") + + expected = op(left, other) + result = op(left, right) + tm.assert_extension_array_equal(result, expected) + + # reversed op + expected = op(other, left) + result = op(right, left) + tm.assert_extension_array_equal(result, expected) + + def test_compare_to_string(self, dtype): + # GH#28930 + ser = pd.Series([1, None], dtype=dtype) + result = ser == "a" + expected = pd.Series([False, pd.NA], dtype="boolean") + + self.assert_series_equal(result, expected) + + def test_ufunc_with_out(self, dtype): + arr = pd.array([1, 2, 3], dtype=dtype) + arr2 = pd.array([1, 2, pd.NA], dtype=dtype) + + mask = arr == arr + mask2 = arr2 == arr2 + + result = np.zeros(3, dtype=bool) + result |= mask + # If MaskedArray.__array_ufunc__ handled "out" appropriately, + # `result` should still be an ndarray. + assert isinstance(result, np.ndarray) + assert result.all() + + # result |= mask worked because mask could be cast losslessly to + # boolean ndarray. mask2 can't, so this raises + result = np.zeros(3, dtype=bool) + msg = "Specify an appropriate 'na_value' for this dtype" + with pytest.raises(ValueError, match=msg): + result |= mask2 + + # addition + res = np.add(arr, arr2) + expected = pd.array([2, 4, pd.NA], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + # when passing out=arr, we will modify 'arr' inplace. + res = np.add(arr, arr2, out=arr) + assert res is arr + tm.assert_extension_array_equal(res, expected) + tm.assert_extension_array_equal(arr, expected) + + def test_mul_td64_array(self, dtype): + # GH#45622 + arr = pd.array([1, 2, pd.NA], dtype=dtype) + other = np.arange(3, dtype=np.int64).view("m8[ns]") + + result = arr * other + expected = pd.array([pd.Timedelta(0), pd.Timedelta(2), pd.NaT]) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/numpy_/__init__.py b/pandas/tests/arrays/numpy_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/numpy_/test_indexing.py b/pandas/tests/arrays/numpy_/test_indexing.py new file mode 100644 index 00000000..225d64ad --- /dev/null +++ b/pandas/tests/arrays/numpy_/test_indexing.py @@ -0,0 +1,41 @@ +import numpy as np + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +import pandas._testing as tm + + +class TestSearchsorted: + def test_searchsorted_string(self, string_dtype): + arr = pd.array(["a", "b", "c"], dtype=string_dtype) + + result = arr.searchsorted("a", side="left") + assert is_scalar(result) + assert result == 0 + + result = arr.searchsorted("a", side="right") + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) + result = arr.searchsorted(30) + assert is_scalar(result) + assert result == 2 + + result = arr.searchsorted([30]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) + result = arr.searchsorted([2, 30]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_sorter(self, any_real_numpy_dtype): + arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype) + result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) + expected = np.array([0, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py new file mode 100644 index 00000000..c748d487 --- /dev/null +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -0,0 +1,324 @@ +""" +Additional tests for PandasArray that aren't covered by +the interface tests. +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import PandasDtype + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import PandasArray + + +@pytest.fixture( + params=[ + np.array(["a", "b"], dtype=object), + np.array([0, 1], dtype=float), + np.array([0, 1], dtype=int), + np.array([0, 1 + 2j], dtype=complex), + np.array([True, False], dtype=bool), + np.array([0, 1], dtype="datetime64[ns]"), + np.array([0, 1], dtype="timedelta64[ns]"), + ] +) +def any_numpy_array(request): + """ + Parametrized fixture for NumPy arrays with different dtypes. + + This excludes string and bytes. + """ + return request.param + + +# ---------------------------------------------------------------------------- +# PandasDtype + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", True), + ("uint", True), + ("float", True), + ("complex", True), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_numeric(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_numeric is expected + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", False), + ("uint", False), + ("float", False), + ("complex", False), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_boolean(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_boolean is expected + + +def test_repr(): + dtype = PandasDtype(np.dtype("int64")) + assert repr(dtype) == "PandasDtype('int64')" + + +def test_constructor_from_string(): + result = PandasDtype.construct_from_string("int64") + expected = PandasDtype(np.dtype("int64")) + assert result == expected + + +def test_dtype_univalent(any_numpy_dtype): + dtype = PandasDtype(any_numpy_dtype) + + result = PandasDtype(dtype) + assert result == dtype + + +# ---------------------------------------------------------------------------- +# Construction + + +def test_constructor_no_coercion(): + with pytest.raises(ValueError, match="NumPy array"): + PandasArray([1, 2, 3]) + + +def test_series_constructor_with_copy(): + ndarray = np.array([1, 2, 3]) + ser = pd.Series(PandasArray(ndarray), copy=True) + + assert ser.values is not ndarray + + +def test_series_constructor_with_astype(): + ndarray = np.array([1, 2, 3]) + result = pd.Series(PandasArray(ndarray), dtype="float64") + expected = pd.Series([1.0, 2.0, 3.0], dtype="float64") + tm.assert_series_equal(result, expected) + + +def test_from_sequence_dtype(): + arr = np.array([1, 2, 3], dtype="int64") + result = PandasArray._from_sequence(arr, dtype="uint64") + expected = PandasArray(np.array([1, 2, 3], dtype="uint64")) + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_copy(): + arr = np.array([0, 1]) + result = PandasArray(arr, copy=True) + + assert not tm.shares_memory(result, arr) + + +def test_constructor_with_data(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr) + assert arr.dtype.numpy_dtype == nparr.dtype + + +# ---------------------------------------------------------------------------- +# Conversion + + +def test_to_numpy(): + arr = PandasArray(np.array([1, 2, 3])) + result = arr.to_numpy() + assert result is arr._ndarray + + result = arr.to_numpy(copy=True) + assert result is not arr._ndarray + + result = arr.to_numpy(dtype="f8") + expected = np.array([1, 2, 3], dtype="f8") + tm.assert_numpy_array_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# Setitem + + +def test_setitem_series(): + ser = pd.Series([1, 2, 3]) + ser.array[0] = 10 + expected = pd.Series([10, 2, 3]) + tm.assert_series_equal(ser, expected) + + +def test_setitem(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr, copy=True) + + arr[0] = arr[1] + nparr[0] = nparr[1] + + tm.assert_numpy_array_equal(arr.to_numpy(), nparr) + + +# ---------------------------------------------------------------------------- +# Reductions + + +def test_bad_reduce_raises(): + arr = np.array([1, 2, 3], dtype="int64") + arr = PandasArray(arr) + msg = "cannot perform not_a_method with type int" + with pytest.raises(TypeError, match=msg): + arr._reduce(msg) + + +def test_validate_reduction_keyword_args(): + arr = PandasArray(np.array([1, 2, 3])) + msg = "the 'keepdims' parameter is not supported .*all" + with pytest.raises(ValueError, match=msg): + arr.all(keepdims=True) + + +def test_np_max_nested_tuples(): + # case where checking in ufunc.nout works while checking for tuples + # does not + vals = [ + (("j", "k"), ("l", "m")), + (("l", "m"), ("o", "p")), + (("o", "p"), ("j", "k")), + ] + ser = pd.Series(vals) + arr = ser.array + + assert arr.max() is arr[2] + assert ser.max() is arr[2] + + result = np.maximum.reduce(arr) + assert result == arr[2] + + result = np.maximum.reduce(ser) + assert result == arr[2] + + +def test_np_reduce_2d(): + raw = np.arange(12).reshape(4, 3) + arr = PandasArray(raw) + + res = np.maximum.reduce(arr, axis=0) + tm.assert_extension_array_equal(res, arr[-1]) + + alt = arr.max(axis=0) + tm.assert_extension_array_equal(alt, arr[-1]) + + +# ---------------------------------------------------------------------------- +# Ops + + +@pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive]) +def test_ufunc_unary(ufunc): + arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + result = ufunc(arr) + expected = PandasArray(ufunc(arr._ndarray)) + tm.assert_extension_array_equal(result, expected) + + # same thing but with the 'out' keyword + out = PandasArray(np.array([-9.0, -9.0, -9.0])) + ufunc(arr, out=out) + tm.assert_extension_array_equal(out, expected) + + +def test_ufunc(): + arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + + r1, r2 = np.divmod(arr, np.add(arr, 2)) + e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) + e1 = PandasArray(e1) + e2 = PandasArray(e2) + tm.assert_extension_array_equal(r1, e1) + tm.assert_extension_array_equal(r2, e2) + + +def test_basic_binop(): + # Just a basic smoke test. The EA interface tests exercise this + # more thoroughly. + x = PandasArray(np.array([1, 2, 3])) + result = x + x + expected = PandasArray(np.array([2, 4, 6])) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [None, object]) +def test_setitem_object_typecode(dtype): + arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype)) + arr[0] = "t" + expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype)) + tm.assert_extension_array_equal(arr, expected) + + +def test_setitem_no_coercion(): + # https://github.com/pandas-dev/pandas/issues/28150 + arr = PandasArray(np.array([1, 2, 3])) + with pytest.raises(ValueError, match="int"): + arr[0] = "a" + + # With a value that we do coerce, check that we coerce the value + # and not the underlying array. + arr[0] = 2.5 + assert isinstance(arr[0], (int, np.integer)), type(arr[0]) + + +def test_setitem_preserves_views(): + # GH#28150, see also extension test of the same name + arr = PandasArray(np.array([1, 2, 3])) + view1 = arr.view() + view2 = arr[:] + view3 = np.asarray(arr) + + arr[0] = 9 + assert view1[0] == 9 + assert view2[0] == 9 + assert view3[0] == 9 + + arr[-1] = 2.5 + view1[-1] = 5 + assert arr[-1] == 5 + + +@pytest.mark.parametrize("dtype", [np.int64, np.uint64]) +def test_quantile_empty(dtype): + # we should get back np.nans, not -1s + arr = PandasArray(np.array([], dtype=dtype)) + idx = pd.Index([0.0, 0.5]) + + result = arr._quantile(idx, interpolation="linear") + expected = PandasArray(np.array([np.nan, np.nan])) + tm.assert_extension_array_equal(result, expected) + + +def test_factorize_unsigned(): + # don't raise when calling factorize on unsigned int PandasArray + arr = np.array([1, 2, 3], dtype=np.uint64) + obj = PandasArray(arr) + + res_codes, res_unique = obj.factorize() + exp_codes, exp_unique = pd.factorize(arr) + + tm.assert_numpy_array_equal(res_codes, exp_codes) + + tm.assert_extension_array_equal(res_unique, PandasArray(exp_unique)) diff --git a/pandas/tests/arrays/period/__init__.py b/pandas/tests/arrays/period/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py new file mode 100644 index 00000000..03fd1465 --- /dev/null +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -0,0 +1,121 @@ +import pytest + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + +pa = pytest.importorskip("pyarrow", minversion="1.0.1") + + +def test_arrow_extension_type(): + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + p1 = ArrowPeriodType("D") + p2 = ArrowPeriodType("D") + p3 = ArrowPeriodType("M") + + assert p1.freq == "D" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pytest.mark.parametrize( + "data, freq", + [ + (pd.date_range("2017", periods=3), "D"), + (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), + ], +) +def test_arrow_array(data, freq): + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + periods = period_array(data, freq=freq) + result = pa.array(periods) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == freq + expected = pa.array(periods.asi8, type="int64") + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(periods, type=pa.int64()) + assert result.equals(expected) + + # unsupported conversions + msg = "Not supported to convert PeriodArray to 'double' type" + with pytest.raises(TypeError, match=msg): + pa.array(periods, type="float64") + + with pytest.raises(TypeError, match="different 'freq'"): + pa.array(periods, type=ArrowPeriodType("T")) + + +def test_arrow_array_missing(): + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + + result = pa.array(arr) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == "D" + expected = pa.array([1, None, 3], type="int64") + assert result.storage.equals(expected) + + +def test_arrow_table_roundtrip(): + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) + + +def test_arrow_load_from_zero_chunks(): + # GH-41040 + + from pandas.core.arrays.arrow.extension_types import ArrowPeriodType + + arr = PeriodArray([], freq="D") + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + table = pa.table( + [pa.chunked_array([], type=table.column(0).type)], schema=table.schema + ) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + +def test_arrow_table_roundtrip_without_metadata(): + arr = PeriodArray([1, 2, 3], freq="H") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + # remove the metadata + table = table.replace_schema_metadata() + assert table.schema.metadata is None + + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py new file mode 100644 index 00000000..e9245c9c --- /dev/null +++ b/pandas/tests/arrays/period/test_astype.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import period_array + + +@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) +def test_astype_int(dtype): + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype + arr = period_array(["2000", "2001", None], freq="D") + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + warn1 = FutureWarning + else: + expected_dtype = np.dtype("int64") + warn1 = None + + msg_overflow = "will raise if the conversion overflows" + with tm.assert_produces_warning(warn1, match=msg_overflow): + expected = arr.astype(expected_dtype) + + warn = None if dtype == expected_dtype else FutureWarning + msg = " will return exactly the specified dtype" + if warn is None and warn1 is not None: + warn = warn1 + msg = msg_overflow + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_copies(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(np.int64, copy=False) + + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data + + result = arr.astype(np.int64, copy=True) + assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view("i8")) + + +def test_astype_categorical(): + arr = period_array(["2000", "2001", "2001", None], freq="D") + result = arr.astype("category") + categories = pd.PeriodIndex(["2000", "2001"], freq="D") + expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) + tm.assert_categorical_equal(result, expected) + + +def test_astype_period(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(PeriodDtype("M")) + expected = period_array(["2000", "2001", None], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) +def test_astype_datetime(other): + arr = period_array(["2000", "2001", None], freq="D") + # slice off the [ns] so that the regex matches. + if other == "timedelta64[ns]": + with pytest.raises(TypeError, match=other[:-4]): + arr.astype(other) + + else: + # GH#45038 allow period->dt64 because we allow dt64->period + result = arr.astype(other) + expected = pd.DatetimeIndex(["2000", "2001", pd.NaT])._data + tm.assert_datetime_array_equal(result, expected) diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py new file mode 100644 index 00000000..cf974905 --- /dev/null +++ b/pandas/tests/arrays/period/test_constructors.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + + +@pytest.mark.parametrize( + "data, freq, expected", + [ + ([pd.Period("2017", "D")], None, [17167]), + ([pd.Period("2017", "D")], "D", [17167]), + ([2017], "D", [17167]), + (["2017"], "D", [17167]), + ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), + ([pd.Period("2017", "D"), None], None, [17167, iNaT]), + (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), + (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), + (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), + ], +) +def test_period_array_ok(data, freq, expected): + result = period_array(data, freq=freq).asi8 + expected = np.asarray(expected, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_period_array_readonly_object(): + # https://github.com/pandas-dev/pandas/issues/25403 + pa = period_array([pd.Period("2019-01-01")]) + arr = np.asarray(pa, dtype="object") + arr.setflags(write=False) + + result = period_array(arr) + tm.assert_period_array_equal(result, pa) + + result = pd.Series(arr) + tm.assert_series_equal(result, pd.Series(pa)) + + result = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) + + +def test_from_datetime64_freq_changes(): + # https://github.com/pandas-dev/pandas/issues/23438 + arr = pd.date_range("2017", periods=3, freq="D") + result = PeriodArray._from_datetime64(arr, freq="M") + expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, freq, msg", + [ + ( + [pd.Period("2017", "D"), pd.Period("2017", "A")], + None, + "Input has different freq", + ), + ([pd.Period("2017", "D")], "A", "Input has different freq"), + ], +) +def test_period_array_raises(data, freq, msg): + with pytest.raises(IncompatibleFrequency, match=msg): + period_array(data, freq) + + +def test_period_array_non_period_series_raies(): + ser = pd.Series([1, 2, 3]) + with pytest.raises(TypeError, match="dtype"): + PeriodArray(ser, freq="D") + + +def test_period_array_freq_mismatch(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq="M") + + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + + +def test_from_sequence_disallows_i8(): + arr = period_array(["2000", "2001"], freq="D") + + msg = str(arr[0].ordinal) + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype) + + with pytest.raises(TypeError, match=msg): + PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype) + + +def test_from_td64nat_sequence_raises(): + # GH#44507 + td = pd.NaT.to_numpy("m8[ns]") + + dtype = pd.period_range("2005-01-01", periods=3, freq="D").dtype + + arr = np.array([None], dtype=object) + arr[0] = td + + msg = "Value must be Period, string, integer, or datetime" + with pytest.raises(ValueError, match=msg): + PeriodArray._from_sequence(arr, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + pd.PeriodIndex(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.Index(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.array(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.Series(arr, dtype=dtype) + with pytest.raises(ValueError, match=msg): + pd.DataFrame(arr, dtype=dtype) diff --git a/pandas/tests/arrays/period/test_reductions.py b/pandas/tests/arrays/period/test_reductions.py new file mode 100644 index 00000000..2889cc78 --- /dev/null +++ b/pandas/tests/arrays/period/test_reductions.py @@ -0,0 +1,42 @@ +import pytest + +import pandas as pd +from pandas.core.arrays import period_array + + +class TestReductions: + def test_min_max(self): + arr = period_array( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + freq="D", + ) + + result = arr.min() + expected = pd.Period("2000-01-02", freq="D") + assert result == expected + + result = arr.max() + expected = pd.Period("2000-01-05", freq="D") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna): + arr = period_array([], freq="D") + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/pandas/tests/arrays/sparse/__init__.py b/pandas/tests/arrays/sparse/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py new file mode 100644 index 00000000..36af5d32 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -0,0 +1,236 @@ +import string + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestSeriesAccessor: + def test_to_dense(self): + ser = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]") + result = ser.sparse.to_dense() + expected = pd.Series([0, 1, 0, 10]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"]) + def test_get_attributes(self, attr): + arr = SparseArray([0, 1]) + ser = pd.Series(arr) + + result = getattr(ser.sparse, attr) + expected = getattr(arr, attr) + assert result == expected + + @td.skip_if_no_scipy + def test_from_coo(self): + import scipy.sparse + + row = [0, 3, 1, 0] + col = [0, 3, 1, 2] + data = [4, 5, 7, 9] + # TODO(scipy#13585): Remove dtype when scipy is fixed + # https://github.com/scipy/scipy/issues/13585 + sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") + result = pd.Series.sparse.from_coo(sp_array) + + index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "sort_labels, expected_rows, expected_cols, expected_values_pos", + [ + ( + False, + [("b", 2), ("a", 2), ("b", 1), ("a", 1)], + [("z", 1), ("z", 2), ("x", 2), ("z", 0)], + {1: (1, 0), 3: (3, 3)}, + ), + ( + True, + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], + [("x", 2), ("z", 0), ("z", 1), ("z", 2)], + {1: (1, 2), 3: (0, 1)}, + ), + ], + ) + def test_to_coo( + self, sort_labels, expected_rows, expected_cols, expected_values_pos + ): + import scipy.sparse + + values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0) + index = pd.MultiIndex.from_tuples( + [ + ("b", 2, "z", 1), + ("a", 2, "z", 2), + ("a", 2, "z", 1), + ("a", 2, "x", 2), + ("b", 1, "z", 1), + ("a", 1, "z", 0), + ] + ) + ss = pd.Series(values, index=index) + + expected_A = np.zeros((4, 4)) + for value, (row, col) in expected_values_pos.items(): + expected_A[row, col] = value + + A, rows, cols = ss.sparse.to_coo( + row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels + ) + assert isinstance(A, scipy.sparse.coo_matrix) + tm.assert_numpy_array_equal(A.toarray(), expected_A) + assert rows == expected_rows + assert cols == expected_cols + + def test_non_sparse_raises(self): + ser = pd.Series([1, 2, 3]) + with pytest.raises(AttributeError, match=".sparse"): + ser.sparse.density + + +class TestFrameAccessor: + def test_accessor_raises(self): + df = pd.DataFrame({"A": [0, 1]}) + with pytest.raises(AttributeError, match="sparse"): + df.sparse + + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])]) + @pytest.mark.parametrize("dtype", ["float64", "int64"]) + @td.skip_if_no_scipy + def test_from_spmatrix(self, format, labels, dtype): + import scipy.sparse + + sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) + + mat = scipy.sparse.eye(10, format=format, dtype=dtype) + result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) + expected = pd.DataFrame( + np.eye(10, dtype=dtype), index=labels, columns=labels + ).astype(sp_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 2, density=0.5, format=format) + mat.data[0] = 0 + result = pd.DataFrame.sparse.from_spmatrix(mat) + dtype = SparseDtype("float64", 0.0) + expected = pd.DataFrame(mat.todense()).astype(dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], + ) + @td.skip_if_no_scipy + def test_from_spmatrix_columns(self, columns): + import scipy.sparse + + dtype = SparseDtype("float64", 0.0) + + mat = scipy.sparse.random(10, 2, density=0.5) + result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) + expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)] + ) + @td.skip_if_no_scipy + def test_to_coo(self, colnames): + import scipy.sparse + + df = pd.DataFrame( + {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]" + ) + result = df.sparse.to_coo() + expected = scipy.sparse.coo_matrix(np.asarray(df)) + assert (result != expected).nnz == 0 + + @pytest.mark.parametrize("fill_value", [1, np.nan]) + @td.skip_if_no_scipy + def test_to_coo_nonzero_fill_val_raises(self, fill_value): + df = pd.DataFrame( + { + "A": SparseArray( + [fill_value, fill_value, fill_value, 2], fill_value=fill_value + ), + "B": SparseArray( + [fill_value, 2, fill_value, fill_value], fill_value=fill_value + ), + } + ) + with pytest.raises(ValueError, match="fill value must be 0"): + df.sparse.to_coo() + + def test_to_dense(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), + "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), + "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), + }, + index=["b", "a"], + ) + result = df.sparse.to_dense() + expected = pd.DataFrame( + {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"] + ) + tm.assert_frame_equal(result, expected) + + def test_density(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0, 2, 1], fill_value=0), + "B": SparseArray([0, 1, 1, 1], fill_value=0), + } + ) + res = df.sparse.density + expected = 0.75 + assert res == expected + + @pytest.mark.parametrize("dtype", ["int64", "float64"]) + @pytest.mark.parametrize("dense_index", [True, False]) + @td.skip_if_no_scipy + def test_series_from_coo(self, dtype, dense_index): + import scipy.sparse + + A = scipy.sparse.eye(3, format="coo", dtype=dtype) + result = pd.Series.sparse.from_coo(A, dense_index=dense_index) + index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) + if dense_index: + expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) + + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_series_from_coo_incorrect_format_raises(self): + # gh-26554 + import scipy.sparse + + m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]])) + with pytest.raises( + TypeError, match="Expected coo_matrix. Got csr_matrix instead." + ): + pd.Series.sparse.from_coo(m) + + def test_with_column_named_sparse(self): + # https://github.com/pandas-dev/pandas/issues/30758 + df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])}) + assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py new file mode 100644 index 00000000..1a32c995 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -0,0 +1,516 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +@pytest.fixture(params=["integer", "block"]) +def kind(request): + """kind kwarg to pass to SparseArray/SparseSeries""" + return request.param + + +@pytest.fixture(params=[True, False]) +def mix(request): + """ + Fixture returning True or False, determining whether to operate + op(sparse, dense) instead of op(sparse, sparse) + """ + return request.param + + +class TestSparseArrayArithmetics: + def _assert(self, a, b): + # We have to use tm.assert_sp_array_equal. See GH #45126 + tm.assert_numpy_array_equal(a, b) + + def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op): + # Check that arithmetic behavior matches non-Sparse Series arithmetic + + if isinstance(a_dense, np.ndarray): + expected = op(pd.Series(a_dense), b_dense).values + elif isinstance(b_dense, np.ndarray): + expected = op(a_dense, pd.Series(b_dense)).values + else: + raise NotImplementedError + + with np.errstate(invalid="ignore", divide="ignore"): + if mix: + result = op(a, b_dense).to_dense() + else: + result = op(a, b).to_dense() + + self._assert(result, expected) + + def _check_bool_result(self, res): + assert isinstance(res, SparseArray) + assert isinstance(res.dtype, SparseDtype) + assert res.dtype.subtype == np.bool_ + assert isinstance(res.fill_value, bool) + + def _check_comparison_ops(self, a, b, a_dense, b_dense): + with np.errstate(invalid="ignore"): + # Unfortunately, trying to wrap the computation of each expected + # value is with np.errstate() is too tedious. + # + # sparse & sparse + self._check_bool_result(a == b) + self._assert((a == b).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b) + self._assert((a != b).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b) + self._assert((a >= b).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b) + self._assert((a <= b).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b) + self._assert((a > b).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b) + self._assert((a < b).to_dense(), a_dense < b_dense) + + # sparse & dense + self._check_bool_result(a == b_dense) + self._assert((a == b_dense).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b_dense) + self._assert((a != b_dense).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b_dense) + self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b_dense) + self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b_dense) + self._assert((a > b_dense).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b_dense) + self._assert((a < b_dense).to_dense(), a_dense < b_dense) + + def _check_logical_ops(self, a, b, a_dense, b_dense): + # sparse & sparse + self._check_bool_result(a & b) + self._assert((a & b).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b) + self._assert((a | b).to_dense(), a_dense | b_dense) + # sparse & dense + self._check_bool_result(a & b_dense) + self._assert((a & b_dense).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b_dense) + self._assert((a | b_dense).to_dense(), a_dense | b_dense) + + @pytest.mark.parametrize("scalar", [0, 1, 3]) + @pytest.mark.parametrize("fill_value", [None, 0, 2]) + def test_float_scalar( + self, kind, mix, all_arithmetic_functions, fill_value, scalar, request + ): + op = all_arithmetic_functions + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + a = SparseArray(values, kind=kind, fill_value=fill_value) + self._check_numeric_ops(a, scalar, values, scalar, mix, op) + + def test_float_scalar_comparison(self, kind): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = SparseArray(values, kind=kind) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = SparseArray(values, kind=kind, fill_value=0) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = SparseArray(values, kind=kind, fill_value=2) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions): + # when sp_index are the same + op = all_arithmetic_functions + + values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_same_index_with_nans( + self, kind, mix, all_arithmetic_functions, request + ): + # when sp_index are the same + op = all_arithmetic_functions + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_same_index_comparison(self, kind): + # when sp_index are the same + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + def test_float_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind=kind, fill_value=1) + b = SparseArray(rvalues, kind=kind, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_different_kind(self, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = SparseArray(values, kind="integer") + b = SparseArray(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = SparseArray(values, kind="integer", fill_value=0) + b = SparseArray(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind="integer", fill_value=0) + b = SparseArray(rvalues, kind="block", fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind="integer", fill_value=1) + b = SparseArray(rvalues, kind="block", fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_comparison(self, kind): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=1) + b = SparseArray(rvalues, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_int_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + # have to specify dtype explicitly until fixing GH 667 + dtype = np.int64 + + values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = SparseArray(values, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = SparseArray(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = SparseArray(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = SparseArray(rvalues, fill_value=0, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, fill_value=1, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype, fill_value=1) + b = SparseArray(rvalues, fill_value=2, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_int_array_comparison(self, kind): + dtype = "int64" + # int32 NI ATM + + values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = SparseArray(values, dtype=dtype, kind=kind) + b = SparseArray(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) + b = SparseArray(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) + b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, dtype=dtype, kind=kind, fill_value=1) + b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_same_index(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = np.array([True, False, True, True], dtype=np.bool_) + rvalues = np.array([True, False, True, True], dtype=np.bool_) + + a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) + b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_array_logical(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = np.array([True, False, True, False, True, True], dtype=np.bool_) + rvalues = np.array([True, False, False, True, False, True], dtype=np.bool_) + + a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) + b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request): + op = all_arithmetic_functions + rdtype = "int64" + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = SparseArray(values, kind=kind, fill_value=1) + b = SparseArray(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_mixed_array_comparison(self, kind): + rdtype = "int64" + # int32 NI ATM + + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=1) + b = SparseArray(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_xor(self): + s = SparseArray([True, True, False, False]) + t = SparseArray([True, False, True, False]) + result = s ^ t + sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32")) + expected = SparseArray([False, True, True], sparse_index=sp_index) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("op", [operator.eq, operator.add]) +def test_with_list(op): + arr = SparseArray([0, 1], fill_value=0) + result = op(arr, [0, 1]) + expected = op(arr, SparseArray([0, 1])) + tm.assert_sp_array_equal(result, expected) + + +def test_with_dataframe(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + df = pd.DataFrame([[1, 2], [3, 4]]) + result = arr.__add__(df) + assert result is NotImplemented + + +def test_with_zerodim_ndarray(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + + result = arr * np.array(2) + expected = arr * 2 + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.abs, np.exp]) +@pytest.mark.parametrize( + "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] +) +def test_ufuncs(ufunc, arr): + result = ufunc(arr) + fill_value = ufunc(arr.fill_value) + expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseArray([0, 0, 0]), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + ], +) +@pytest.mark.parametrize("ufunc", [np.add, np.greater]) +def test_binary_ufuncs(ufunc, a, b): + # can't say anything about fill value here. + result = ufunc(a, b) + expected = ufunc(np.asarray(a), np.asarray(b)) + assert isinstance(result, SparseArray) + tm.assert_numpy_array_equal(np.asarray(result), expected) + + +def test_ndarray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + ndarray += sparray + expected = np.array([0, 3, 2, 3]) + tm.assert_numpy_array_equal(ndarray, expected) + + +def test_sparray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + sparray += ndarray + expected = SparseArray([0, 3, 2, 3], fill_value=0) + tm.assert_sp_array_equal(sparray, expected) + + +@pytest.mark.parametrize("cons", [list, np.array, SparseArray]) +def test_mismatched_length_cmp_op(cons): + left = SparseArray([True, True]) + right = cons([True, True, True]) + with pytest.raises(ValueError, match="operands have mismatched length"): + left & right + + +@pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) +@pytest.mark.parametrize("fill_value", [np.nan, 3]) +def test_binary_operators(op, fill_value): + op = getattr(operator, op) + data1 = np.random.randn(20) + data2 = np.random.randn(20) + + data1[::2] = fill_value + data2[::3] = fill_value + + first = SparseArray(data1, fill_value=fill_value) + second = SparseArray(data2, fill_value=fill_value) + + with np.errstate(all="ignore"): + res = op(first, second) + exp = SparseArray( + op(first.to_dense(), second.to_dense()), fill_value=first.fill_value + ) + assert isinstance(res, SparseArray) + tm.assert_almost_equal(res.to_dense(), exp.to_dense()) + + res2 = op(first, second.to_dense()) + assert isinstance(res2, SparseArray) + tm.assert_sp_array_equal(res, res2) + + res3 = op(first.to_dense(), second) + assert isinstance(res3, SparseArray) + tm.assert_sp_array_equal(res, res3) + + res4 = op(first, 4) + assert isinstance(res4, SparseArray) + + # Ignore this if the actual op raises (e.g. pow). + try: + exp = op(first.to_dense(), 4) + exp_fv = op(first.fill_value, 4) + except ValueError: + pass + else: + tm.assert_almost_equal(res4.fill_value, exp_fv) + tm.assert_almost_equal(res4.to_dense(), exp) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py new file mode 100644 index 00000000..9b78eb34 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_array.py @@ -0,0 +1,485 @@ +import re +import warnings + +import numpy as np +import pytest + +from pandas._libs.sparse import IntIndex + +import pandas as pd +from pandas import isna +import pandas._testing as tm +from pandas.core.api import Int64Index +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +@pytest.fixture +def arr_data(): + """Fixture returning numpy array with valid and missing entries""" + return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + + +@pytest.fixture +def arr(arr_data): + """Fixture returning SparseArray from 'arr_data'""" + return SparseArray(arr_data) + + +@pytest.fixture +def zarr(): + """Fixture returning SparseArray with integer entries and 'fill_value=0'""" + return SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + + +class TestSparseArray: + @pytest.mark.parametrize("fill_value", [0, None, np.nan]) + def test_shift_fill_value(self, fill_value): + # GH #24128 + sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) + res = sparse.shift(1, fill_value=fill_value) + if isna(fill_value): + fill_value = res.dtype.na_value + exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) + tm.assert_sp_array_equal(res, exp) + + def test_set_fill_value(self): + arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) + arr.fill_value = 2 + assert arr.fill_value == 2 + + arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) + arr.fill_value = 2 + assert arr.fill_value == 2 + + # TODO: this seems fine? You can construct an integer + # sparsearray with NaN fill value, why not update one? + # coerces to int + # msg = "unable to set fill_value 3\\.1 to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = 3.1 + assert arr.fill_value == 3.1 + + # msg = "unable to set fill_value nan to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) + arr.fill_value = True + assert arr.fill_value + + # FIXME: don't leave commented-out + # coerces to bool + # TODO: we can construct an sparse array of bool + # type and use as fill_value any value + # msg = "fill_value must be True, False or nan" + # with pytest.raises(ValueError, match=msg): + # arr.fill_value = 0 + + # msg = "unable to set fill_value nan to bool dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) + def test_set_fill_invalid_non_scalar(self, val): + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) + msg = "fill_value must be a scalar" + + with pytest.raises(ValueError, match=msg): + arr.fill_value = val + + def test_copy(self, arr): + arr2 = arr.copy() + assert arr2.sp_values is not arr.sp_values + assert arr2.sp_index is arr.sp_index + + def test_values_asarray(self, arr_data, arr): + tm.assert_almost_equal(arr.to_dense(), arr_data) + + @pytest.mark.parametrize( + "data,shape,dtype", + [ + ([0, 0, 0, 0, 0], (5,), None), + ([], (0,), None), + ([0], (1,), None), + (["A", "A", np.nan, "B"], (4,), object), + ], + ) + def test_shape(self, data, shape, dtype): + # GH 21126 + out = SparseArray(data, dtype=dtype) + assert out.shape == shape + + @pytest.mark.parametrize( + "vals", + [ + [np.nan, np.nan, np.nan, np.nan, np.nan], + [1, np.nan, np.nan, 3, np.nan], + [1, np.nan, 0, 3, 0], + ], + ) + @pytest.mark.parametrize("fill_value", [None, 0]) + def test_dense_repr(self, vals, fill_value): + vals = np.array(vals) + arr = SparseArray(vals, fill_value=fill_value) + + res = arr.to_dense() + tm.assert_numpy_array_equal(res, vals) + + @pytest.mark.parametrize("fix", ["arr", "zarr"]) + def test_pickle(self, fix, request): + obj = request.getfixturevalue(fix) + unpickled = tm.round_trip_pickle(obj) + tm.assert_sp_array_equal(unpickled, obj) + + def test_generator_warnings(self): + sp_arr = SparseArray([1, 2, 3]) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings(action="always", category=DeprecationWarning) + warnings.filterwarnings(action="always", category=PendingDeprecationWarning) + for _ in sp_arr: + pass + assert len(w) == 0 + + def test_where_retain_fill_value(self): + # GH#45691 don't lose fill_value on _where + arr = SparseArray([np.nan, 1.0], fill_value=0) + + mask = np.array([True, False]) + + res = arr._where(~mask, 1) + exp = SparseArray([1, 1.0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + ser = pd.Series(arr) + res = ser.where(~mask, 1) + tm.assert_series_equal(res, pd.Series(exp)) + + def test_fillna(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0]) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan]) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + # float dtype's fill_value is np.nan, replaced by -1 + s = SparseArray([0.0, 0.0, 0.0, 0.0]) + res = s.fillna(-1) + exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + # int dtype shouldn't have missing. No changes. + s = SparseArray([0, 0, 0, 0]) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + tm.assert_sp_array_equal(res, s) + + s = SparseArray([0, 0, 0, 0], fill_value=0) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + # fill_value can be nan if there is no missing hole. + # only fill_value will be changed + s = SparseArray([0, 0, 0, 0], fill_value=np.nan) + assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) + assert np.isnan(s.fill_value) + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + def test_fillna_overlap(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + # filling with existing value doesn't replace existing value with + # fill_value, i.e. existing 3 remains in sp_values + res = s.fillna(3) + exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) + tm.assert_numpy_array_equal(res.to_dense(), exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(3) + exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + def test_nonzero(self): + # Tests regression #21172. + sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + expected = np.array([2, 5, 9], dtype=np.int32) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + +class TestSparseArrayAnalytics: + @pytest.mark.parametrize( + "data,expected", + [ + ( + np.array([1, 2, 3, 4, 5], dtype=float), # non-null data + SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])), + ), + ( + np.array([1, 2, np.nan, 4, 5], dtype=float), # null data + SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])), + ), + ], + ) + @pytest.mark.parametrize("numpy", [True, False]) + def test_cumsum(self, data, expected, numpy): + cumsum = np.cumsum if numpy else lambda s: s.cumsum() + + out = cumsum(SparseArray(data)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=np.nan)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=2)) + tm.assert_sp_array_equal(out, expected) + + if numpy: # numpy compatibility checks. + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), out=out) + else: + axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. + msg = re.escape(f"axis(={axis}) out of bounds") + with pytest.raises(ValueError, match=msg): + SparseArray(data).cumsum(axis=axis) + + def test_ufunc(self): + # GH 13853 make sure ufunc is applied to fill_value + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([1, np.nan, 2, np.nan, 2]) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=-1) + exp = SparseArray([1, 1, 2, 2], fill_value=1) + tm.assert_sp_array_equal(abs(sparse), exp) + tm.assert_sp_array_equal(np.abs(sparse), exp) + + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + def test_ufunc_args(self): + # GH 13853 make sure ufunc is applied to fill_value, including its arg + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([2, np.nan, 3, np.nan, -1]) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([2, 0, 3, -1], fill_value=2) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray([2, 0, 1, -1], fill_value=1) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + @pytest.mark.parametrize("fill_value", [0.0, np.nan]) + def test_modf(self, fill_value): + # https://github.com/pandas-dev/pandas/issues/26946 + sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) + r1, r2 = np.modf(sparse) + e1, e2 = np.modf(np.asarray(sparse)) + tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) + tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) + + def test_nbytes_integer(self): + arr = SparseArray([1, 0, 0, 0, 2], kind="integer") + result = arr.nbytes + # (2 * 8) + 2 * 4 + assert result == 24 + + def test_nbytes_block(self): + arr = SparseArray([1, 2, 0, 0, 0], kind="block") + result = arr.nbytes + # (2 * 8) + 4 + 4 + # sp_values, blocs, blengths + assert result == 24 + + def test_asarray_datetime64(self): + s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) + np.asarray(s) + + def test_density(self): + arr = SparseArray([0, 1]) + assert arr.density == 0.5 + + def test_npoints(self): + arr = SparseArray([0, 1]) + assert arr.npoints == 1 + + +def test_setting_fill_value_fillna_still_works(): + # This is why letting users update fill_value / dtype is bad + # astype has the same problem. + arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0) + arr.fill_value = np.nan + result = arr.isna() + # Can't do direct comparison, since the sp_index will be different + # So let's convert to ndarray and check there. + result = np.asarray(result) + + expected = np.array([False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_setting_fill_value_updates(): + arr = SparseArray([0.0, np.nan], fill_value=0) + arr.fill_value = np.nan + # use private constructor to get the index right + # otherwise both nans would be un-stored. + expected = SparseArray._simple_new( + sparse_array=np.array([np.nan]), + sparse_index=IntIndex(2, [1]), + dtype=SparseDtype(float, np.nan), + ) + tm.assert_sp_array_equal(arr, expected) + + +@pytest.mark.parametrize( + "arr,fill_value,loc", + [ + ([None, 1, 2], None, 0), + ([0, None, 2], None, 1), + ([0, 1, None], None, 2), + ([0, 1, 1, None, None], None, 3), + ([1, 1, 1, 2], None, -1), + ([], None, -1), + ([None, 1, 0, 0, None, 2], None, 0), + ([None, 1, 0, 0, None, 2], 1, 1), + ([None, 1, 0, 0, None, 2], 2, 5), + ([None, 1, 0, 0, None, 2], 3, -1), + ([None, 0, 0, 1, 2, 1], 0, 1), + ([None, 0, 0, 1, 2, 1], 1, 3), + ], +) +def test_first_fill_value_loc(arr, fill_value, loc): + result = SparseArray(arr, fill_value=fill_value)._first_fill_value_loc() + assert result == loc + + +@pytest.mark.parametrize( + "arr", + [ + [1, 2, np.nan, np.nan], + [1, np.nan, 2, np.nan], + [1, 2, np.nan], + [np.nan, 1, 0, 0, np.nan, 2], + [np.nan, 0, 0, 1, 2, 1], + ], +) +@pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) +def test_unique_na_fill(arr, fill_value): + a = SparseArray(arr, fill_value=fill_value).unique() + b = pd.Series(arr).unique() + assert isinstance(a, SparseArray) + a = np.asarray(a) + tm.assert_numpy_array_equal(a, b) + + +def test_unique_all_sparse(): + # https://github.com/pandas-dev/pandas/issues/23168 + arr = SparseArray([0, 0]) + result = arr.unique() + expected = SparseArray([0]) + tm.assert_sp_array_equal(result, expected) + + +def test_map(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, 12], fill_value=10) + + # dict + result = arr.map({0: 10, 1: 11, 2: 12}) + tm.assert_sp_array_equal(result, expected) + + # series + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + tm.assert_sp_array_equal(result, expected) + + # function + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + expected = SparseArray([10, 11, 12], fill_value=10) + tm.assert_sp_array_equal(result, expected) + + +def test_map_missing(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, None], fill_value=10) + + result = arr.map({0: 10, 1: 11}) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [np.nan, 1]) +def test_dropna(fill_value): + # GH-28287 + arr = SparseArray([np.nan, 1], fill_value=fill_value) + exp = SparseArray([1.0], fill_value=fill_value) + tm.assert_sp_array_equal(arr.dropna(), exp) + + df = pd.DataFrame({"a": [0, 1], "b": arr}) + expected_df = pd.DataFrame({"a": [1], "b": exp}, index=Int64Index([1])) + tm.assert_equal(df.dropna(), expected_df) + + +def test_drop_duplicates_fill_value(): + # GH 11726 + df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0)) + result = df.drop_duplicates() + expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py new file mode 100644 index 00000000..6761040d --- /dev/null +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -0,0 +1,128 @@ +import numpy as np +import pytest + +from pandas._libs.sparse import IntIndex + +from pandas import Timestamp +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestAstype: + def test_astype(self): + # float -> float + arr = SparseArray([None, None, 0, 2]) + result = arr.astype("Sparse[float32]") + expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("float64", fill_value=0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("int64", 0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + arr = SparseArray([0, np.nan, 0, 1], fill_value=0) + with pytest.raises(ValueError, match="NA"): + arr.astype("Sparse[i8]") + + def test_astype_bool(self): + a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) + with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): + result = a.astype(bool) + expected = SparseArray( + [True, False, False, True], dtype=SparseDtype(bool, False) + ) + tm.assert_sp_array_equal(result, expected) + + # update fill value + result = a.astype(SparseDtype(bool, False)) + expected = SparseArray( + [True, False, False, True], dtype=SparseDtype(bool, False) + ) + tm.assert_sp_array_equal(result, expected) + + def test_astype_all(self, any_real_numpy_dtype): + vals = np.array([1, 2, 3]) + arr = SparseArray(vals, fill_value=1) + typ = np.dtype(any_real_numpy_dtype) + with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): + res = arr.astype(typ) + assert res.dtype == SparseDtype(typ, 1) + assert res.sp_values.dtype == typ + + tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) + + @pytest.mark.parametrize( + "arr, dtype, expected", + [ + ( + SparseArray([0, 1]), + "float", + SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), + ), + (SparseArray([0, 1]), bool, SparseArray([False, True])), + ( + SparseArray([0, 1], fill_value=1), + bool, + SparseArray([False, True], dtype=SparseDtype(bool, True)), + ), + pytest.param( + SparseArray([0, 1]), + "datetime64[ns]", + SparseArray( + np.array([0, 1], dtype="datetime64[ns]"), + dtype=SparseDtype("datetime64[ns]", Timestamp("1970")), + ), + ), + ( + SparseArray([0, 1, 10]), + str, + SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")), + ), + (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), + ( + SparseArray([0, 1, 0]), + object, + SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), + ), + ], + ) + def test_astype_more(self, arr, dtype, expected): + + if isinstance(dtype, SparseDtype): + warn = None + else: + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="astype from SparseDtype"): + result = arr.astype(dtype) + tm.assert_sp_array_equal(result, expected) + + def test_astype_nan_raises(self): + arr = SparseArray([1.0, np.nan]) + with pytest.raises(ValueError, match="Cannot convert non-finite"): + msg = "astype from SparseDtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + arr.astype(int) + + def test_astype_copy_false(self): + # GH#34456 bug caused by using .view instead of .astype in astype_nansafe + arr = SparseArray([1, 2, 3]) + + dtype = SparseDtype(float, 0) + + result = arr.astype(dtype, copy=False) + expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0) + tm.assert_sp_array_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py new file mode 100644 index 00000000..0f09af26 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -0,0 +1,62 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray + + +class TestSparseArrayConcat: + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_basic(self, kind): + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=kind) + + result = SparseArray._concat_same_type([a, b]) + # Can't make any assertions about the sparse index itself + # since we aren't don't merge sparse blocs across arrays + # in to_concat + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_uses_first_kind(self, kind): + other = "integer" if kind == "block" else "block" + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=other) + + result = SparseArray._concat_same_type([a, b]) + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind + + +@pytest.mark.parametrize( + "other, expected_dtype", + [ + # compatible dtype -> preserve sparse + (pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)), + # (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)), + # incompatible dtype -> Sparse[common dtype] + (pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)), + # incompatible dtype -> Sparse[object] dtype + (pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)), + # categorical with compatible categories -> dtype of the categories + (pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")), + (pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")), + # categorical with incompatible categories -> object dtype + (pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)), + ], +) +def test_concat_with_non_sparse(other, expected_dtype): + # https://github.com/pandas-dev/pandas/issues/34336 + s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0)) + + result = pd.concat([s_sparse, other], ignore_index=True) + expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype) + tm.assert_series_equal(result, expected) + + result = pd.concat([other, s_sparse], ignore_index=True) + expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_constructors.py b/pandas/tests/arrays/sparse/test_constructors.py new file mode 100644 index 00000000..e1d401f1 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_constructors.py @@ -0,0 +1,308 @@ +import numpy as np +import pytest + +from pandas._libs.sparse import IntIndex +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import isna +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestConstructors: + def test_constructor_dtype(self): + arr = SparseArray([np.nan, 1, 2, np.nan]) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert arr.dtype.subtype == np.float64 + assert np.isnan(arr.fill_value) + + arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) + assert arr.dtype == SparseDtype(np.float64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=np.float64) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert np.isnan(arr.fill_value) + + arr = SparseArray([0, 1, 2, 4], dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + def test_constructor_dtype_str(self): + result = SparseArray([1, 2, 3], dtype="int") + expected = SparseArray([1, 2, 3], dtype=int) + tm.assert_sp_array_equal(result, expected) + + def test_constructor_sparse_dtype(self): + result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) + expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int64") + + def test_constructor_sparse_dtype_str(self): + result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") + expected = SparseArray([1, 0, 0, 1], dtype=np.int32) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int32") + + def test_constructor_object_dtype(self): + # GH#11856 + arr = SparseArray(["A", "A", np.nan, "B"], dtype=object) + assert arr.dtype == SparseDtype(object) + assert np.isnan(arr.fill_value) + + arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A") + assert arr.dtype == SparseDtype(object, "A") + assert arr.fill_value == "A" + + def test_constructor_object_dtype_bool_fill(self): + # GH#17574 + data = [False, 0, 100.0, 0.0] + arr = SparseArray(data, dtype=object, fill_value=False) + assert arr.dtype == SparseDtype(object, False) + assert arr.fill_value is False + arr_expected = np.array(data, dtype=object) + it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) + assert np.fromiter(it, dtype=np.bool_).all() + + @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) + def test_constructor_na_dtype(self, dtype): + with pytest.raises(ValueError, match="Cannot convert"): + SparseArray([0, 1, np.nan], dtype=dtype) + + def test_constructor_warns_when_losing_timezone(self): + # GH#32501 warn when losing timezone information + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(dti) + + tm.assert_sp_array_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(pd.Series(dti)) + + tm.assert_sp_array_equal(result, expected) + + def test_constructor_spindex_dtype(self): + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) + # TODO: actionable? + # XXX: Behavior change: specifying SparseIndex no longer changes the + # fill_value + expected = SparseArray([0, 1, 2, 0], kind="integer") + tm.assert_sp_array_equal(arr, expected) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=np.int64, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64 + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=None, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) + def test_constructor_spindex_dtype_scalar(self, sparse_index): + # scalar input + arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + def test_constructor_spindex_dtype_scalar_broadcasts(self): + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize( + "data, fill_value", + [ + (np.array([1, 2]), 0), + (np.array([1.0, 2.0]), np.nan), + ([True, False], False), + ([pd.Timestamp("2017-01-01")], pd.NaT), + ], + ) + def test_constructor_inferred_fill_value(self, data, fill_value): + result = SparseArray(data).fill_value + + if isna(fill_value): + assert isna(result) + else: + assert result == fill_value + + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @pytest.mark.parametrize("size", [0, 10]) + @td.skip_if_no_scipy + def test_from_spmatrix(self, size, format): + import scipy.sparse + + mat = scipy.sparse.random(size, 1, density=0.5, format=format) + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 1, density=0.5, format=format) + mat.data[0] = 0 + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + + @td.skip_if_no_scipy + def test_from_spmatrix_raises(self): + import scipy.sparse + + mat = scipy.sparse.eye(5, 4, format="csc") + + with pytest.raises(ValueError, match="not '4'"): + SparseArray.from_spmatrix(mat) + + @pytest.mark.parametrize( + "scalar,dtype", + [ + (False, SparseDtype(bool, False)), + (0.0, SparseDtype("float64", 0)), + (1, SparseDtype("int64", 1)), + ("z", SparseDtype("object", "z")), + ], + ) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH#19163 + with tm.assert_produces_warning( + FutureWarning, match="The index argument has been deprecated" + ): + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype + + def test_constructor_from_too_large_array(self): + with pytest.raises(TypeError, match="expected dimension <= 1 data"): + SparseArray(np.arange(10).reshape((2, 5))) + + def test_constructor_from_sparse(self): + zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + res = SparseArray(zarr) + assert res.fill_value == 0 + tm.assert_almost_equal(res.sp_values, zarr.sp_values) + + def test_constructor_copy(self): + arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + arr = SparseArray(arr_data) + + cp = SparseArray(arr, copy=True) + cp.sp_values[:3] = 0 + assert not (arr.sp_values[:3] == 0).any() + + not_copy = SparseArray(arr) + not_copy.sp_values[:3] = 0 + assert (arr.sp_values[:3] == 0).all() + + def test_constructor_bool(self): + # GH#10648 + data = np.array([False, False, True, True, False, False]) + arr = SparseArray(data, fill_value=False, dtype=bool) + + assert arr.dtype == SparseDtype(bool) + tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) + + dense = arr.to_dense() + assert dense.dtype == bool + tm.assert_numpy_array_equal(dense, data) + + def test_constructor_bool_fill_value(self): + arr = SparseArray([True, False, True], dtype=None) + assert arr.dtype == SparseDtype(np.bool_) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool_) + assert arr.dtype == SparseDtype(np.bool_) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True) + assert arr.dtype == SparseDtype(np.bool_, True) + assert arr.fill_value + + def test_constructor_float32(self): + # GH#10648 + data = np.array([1.0, np.nan, 3], dtype=np.float32) + arr = SparseArray(data, dtype=np.float32) + + assert arr.dtype == SparseDtype(np.float32) + tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal( + arr.sp_index.indices, np.array([0, 2], dtype=np.int32) + ) + + dense = arr.to_dense() + assert dense.dtype == np.float32 + tm.assert_numpy_array_equal(dense, data) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py new file mode 100644 index 00000000..58fedbd3 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -0,0 +1,209 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.sparse import SparseDtype + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", 0), + ("float", np.nan), + ("bool", False), + ("object", np.nan), + ("datetime64[ns]", np.datetime64("NaT", "ns")), + ("timedelta64[ns]", np.timedelta64("NaT", "ns")), + ], +) +def test_inferred_dtype(dtype, fill_value): + sparse_dtype = SparseDtype(dtype) + result = sparse_dtype.fill_value + if pd.isna(fill_value): + assert pd.isna(result) and type(result) == type(fill_value) + else: + assert result == fill_value + + +def test_from_sparse_dtype(): + dtype = SparseDtype("float", 0) + result = SparseDtype(dtype) + assert result.fill_value == 0 + + +def test_from_sparse_dtype_fill_value(): + dtype = SparseDtype("int", 1) + result = SparseDtype(dtype, fill_value=2) + expected = SparseDtype("int", 2) + assert result == expected + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", None), + ("float", None), + ("bool", None), + ("object", None), + ("datetime64[ns]", None), + ("timedelta64[ns]", None), + ("int", np.nan), + ("float", 0), + ], +) +def test_equal(dtype, fill_value): + a = SparseDtype(dtype, fill_value) + b = SparseDtype(dtype, fill_value) + assert a == b + assert b == a + + +def test_nans_equal(): + a = SparseDtype(float, float("nan")) + b = SparseDtype(float, np.nan) + assert a == b + assert b == a + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseDtype("float64"), SparseDtype("float32")), + (SparseDtype("float64"), SparseDtype("float64", 0)), + (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), + (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), + (SparseDtype("float64"), np.dtype("float64")), + ], +) +def test_not_equal(a, b): + assert a != b + + +def test_construct_from_string_raises(): + with pytest.raises( + TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'" + ): + SparseDtype.construct_from_string("not a dtype") + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (SparseDtype(int), True), + (SparseDtype(float), True), + (SparseDtype(bool), True), + (SparseDtype(object), False), + (SparseDtype(str), False), + ], +) +def test_is_numeric(dtype, expected): + assert dtype._is_numeric is expected + + +def test_str_uses_object(): + result = SparseDtype(str).subtype + assert result == np.dtype("object") + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[float64]", SparseDtype(np.dtype("float64"))), + ("Sparse[float32]", SparseDtype(np.dtype("float32"))), + ("Sparse[int]", SparseDtype(np.dtype("int"))), + ("Sparse[str]", SparseDtype(np.dtype("str"))), + ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))), + ("Sparse", SparseDtype(np.dtype("float"), np.nan)), + ], +) +def test_construct_from_string(string, expected): + result = SparseDtype.construct_from_string(string) + assert result == expected + + +@pytest.mark.parametrize( + "a, b, expected", + [ + (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True), + (SparseDtype(int, 0), SparseDtype(int, 0), True), + (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True), + (SparseDtype(float, 0), SparseDtype(float, np.nan), False), + (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False), + ], +) +def test_hash_equal(a, b, expected): + result = a == b + assert result is expected + + result = hash(a) == hash(b) + assert result is expected + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[int]", "int"), + ("Sparse[int, 0]", "int"), + ("Sparse[int64]", "int64"), + ("Sparse[int64, 0]", "int64"), + ("Sparse[datetime64[ns], 0]", "datetime64[ns]"), + ], +) +def test_parse_subtype(string, expected): + subtype, _ = SparseDtype._parse_subtype(string) + assert subtype == expected + + +@pytest.mark.parametrize( + "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"] +) +def test_construct_from_string_fill_value_raises(string): + with pytest.raises(TypeError, match="fill_value in the string is not"): + SparseDtype.construct_from_string(string) + + +@pytest.mark.parametrize( + "original, dtype, expected", + [ + (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), + (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), + (SparseDtype(int, 1), str, SparseDtype(object, "1")), + (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), + ], +) +def test_update_dtype(original, dtype, expected): + result = original.update_dtype(dtype) + assert result == expected + + +@pytest.mark.parametrize( + "original, dtype, expected_error_msg", + [ + ( + SparseDtype(float, np.nan), + int, + re.escape("Cannot convert non-finite values (NA or inf) to integer"), + ), + ( + SparseDtype(str, "abc"), + int, + re.escape("invalid literal for int() with base 10: 'abc'"), + ), + ], +) +def test_update_dtype_raises(original, dtype, expected_error_msg): + with pytest.raises(ValueError, match=expected_error_msg): + original.update_dtype(dtype) + + +def test_repr(): + # GH-34352 + result = str(SparseDtype("int64", fill_value=0)) + expected = "Sparse[int64, 0]" + assert result == expected + + result = str(SparseDtype(object, fill_value="0")) + expected = "Sparse[object, '0']" + assert result == expected diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py new file mode 100644 index 00000000..311a8a04 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -0,0 +1,290 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + +arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) +arr = SparseArray(arr_data) + + +class TestGetitem: + def test_getitem(self): + dense = arr.to_dense() + for i in range(len(arr)): + tm.assert_almost_equal(arr[i], dense[i]) + tm.assert_almost_equal(arr[-i], dense[-i]) + + def test_getitem_arraylike_mask(self): + arr = SparseArray([0, 1, 2]) + result = arr[[True, False, True]] + expected = SparseArray([0, 2]) + tm.assert_sp_array_equal(result, expected) + + @pytest.mark.parametrize( + "slc", + [ + np.s_[:], + np.s_[1:10], + np.s_[1:100], + np.s_[10:1], + np.s_[:-3], + np.s_[-5:-4], + np.s_[:-12], + np.s_[-12:], + np.s_[2:], + np.s_[2::3], + np.s_[::2], + np.s_[::-1], + np.s_[::-2], + np.s_[1:6:2], + np.s_[:-6:-2], + ], + ) + @pytest.mark.parametrize( + "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []] + ) + def test_getslice(self, slc, as_dense): + as_dense = np.array(as_dense) + arr = SparseArray(as_dense) + + result = arr[slc] + expected = SparseArray(as_dense[slc]) + + tm.assert_sp_array_equal(result, expected) + + def test_getslice_tuple(self): + dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) + + sparse = SparseArray(dense) + res = sparse[(slice(4, None),)] + exp = SparseArray(dense[4:]) + tm.assert_sp_array_equal(res, exp) + + sparse = SparseArray(dense, fill_value=0) + res = sparse[(slice(4, None),)] + exp = SparseArray(dense[4:], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + msg = "too many indices for array" + with pytest.raises(IndexError, match=msg): + sparse[4:, :] + + with pytest.raises(IndexError, match=msg): + # check numpy compat + dense[4:, :] + + def test_boolean_slice_empty(self): + arr = SparseArray([0, 1, 2]) + res = arr[[False, False, False]] + assert res.dtype == arr.dtype + + def test_getitem_bool_sparse_array(self): + # GH 23122 + spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) + exp = SparseArray([np.nan, 2, np.nan, 5, 6]) + tm.assert_sp_array_equal(arr[spar_bool], exp) + + spar_bool = ~spar_bool + res = arr[spar_bool] + exp = SparseArray([np.nan, 1, 3, 4, np.nan]) + tm.assert_sp_array_equal(res, exp) + + spar_bool = SparseArray( + [False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan + ) + res = arr[spar_bool] + exp = SparseArray([np.nan, 3, 5]) + tm.assert_sp_array_equal(res, exp) + + def test_getitem_bool_sparse_array_as_comparison(self): + # GH 45110 + arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan) + res = arr[arr > 2] + exp = SparseArray([3.0, 4.0], fill_value=np.nan) + tm.assert_sp_array_equal(res, exp) + + def test_get_item(self): + zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + + assert np.isnan(arr[1]) + assert arr[2] == 1 + assert arr[7] == 5 + + assert zarr[0] == 0 + assert zarr[2] == 1 + assert zarr[7] == 5 + + errmsg = "must be an integer between -10 and 10" + + with pytest.raises(IndexError, match=errmsg): + arr[11] + + with pytest.raises(IndexError, match=errmsg): + arr[-11] + + assert arr[-1] == arr[len(arr) - 1] + + +class TestSetitem: + def test_set_item(self): + arr = SparseArray(arr_data).copy() + + def setitem(): + arr[5] = 3 + + def setslice(): + arr[1:5] = 2 + + with pytest.raises(TypeError, match="assignment via setitem"): + setitem() + + with pytest.raises(TypeError, match="assignment via setitem"): + setslice() + + +class TestTake: + def test_take_scalar_raises(self): + msg = "'indices' must be an array, not a scalar '2'." + with pytest.raises(ValueError, match=msg): + arr.take(2) + + def test_take(self): + exp = SparseArray(np.take(arr_data, [2, 3])) + tm.assert_sp_array_equal(arr.take([2, 3]), exp) + + exp = SparseArray(np.take(arr_data, [0, 1, 2])) + tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp) + + def test_take_all_empty(self): + a = pd.array([0, 0], dtype=SparseDtype("int64")) + result = a.take([0, 1], allow_fill=True, fill_value=np.nan) + tm.assert_sp_array_equal(a, result) + + def test_take_fill_value(self): + data = np.array([1, np.nan, 0, 3, 0]) + sparse = SparseArray(data, fill_value=0) + + exp = SparseArray(np.take(data, [0]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([0]), exp) + + exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) + + def test_take_negative(self): + exp = SparseArray(np.take(arr_data, [-1])) + tm.assert_sp_array_equal(arr.take([-1]), exp) + + exp = SparseArray(np.take(arr_data, [-4, -3, -2])) + tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp) + + def test_bad_take(self): + with pytest.raises(IndexError, match="bounds"): + arr.take([11]) + + def test_take_filling(self): + # similar tests as GH 12631 + sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + # TODO: actionable? + # XXX: test change: fill_value=True -> allow_fill=True + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + expected = SparseArray([np.nan, np.nan, np.nan]) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'" + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), allow_fill=True) + + def test_take_filling_fill_value(self): + # same tests as GH#12631 + sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # fill_value + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + # TODO: actionable? + # XXX: behavior change. + # the old way of filling self.fill_value doesn't follow EA rules. + # It's supposed to be self.dtype.na_value (nan in this case) + expected = SparseArray([0, np.nan, np.nan], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'." + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + @pytest.mark.parametrize("kind", ["block", "integer"]) + def test_take_filling_all_nan(self, kind): + sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) + tm.assert_sp_array_equal(result, expected) + + result = sparse.take(np.array([1, 0, -1]), fill_value=True) + expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) + tm.assert_sp_array_equal(result, expected) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + +class TestWhere: + def test_where_retain_fill_value(self): + # GH#45691 don't lose fill_value on _where + arr = SparseArray([np.nan, 1.0], fill_value=0) + + mask = np.array([True, False]) + + res = arr._where(~mask, 1) + exp = SparseArray([1, 1.0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + ser = pd.Series(arr) + res = ser.where(~mask, 1) + tm.assert_series_equal(res, pd.Series(exp)) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py new file mode 100644 index 00000000..35d58c29 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -0,0 +1,553 @@ +import operator + +import numpy as np +import pytest + +import pandas._libs.sparse as splib +import pandas.util._test_decorators as td + +from pandas import Series +import pandas._testing as tm +from pandas.core.arrays.sparse import ( + BlockIndex, + IntIndex, + make_sparse_index, +) + +TEST_LENGTH = 20 + +plain_case = [ + [0, 7, 15], + [3, 5, 5], + [2, 9, 14], + [2, 3, 5], + [2, 9, 15], + [1, 3, 4], +] +delete_blocks = [ + [0, 5], + [4, 4], + [1], + [4], + [1], + [3], +] +split_blocks = [ + [0], + [10], + [0, 5], + [3, 7], + [0, 5], + [3, 5], +] +skip_block = [ + [10], + [5], + [0, 12], + [5, 3], + [12], + [3], +] + +no_intersect = [ + [0, 10], + [4, 6], + [5, 17], + [4, 2], + [], + [], +] + +one_empty = [ + [0], + [5], + [], + [], + [], + [], +] + +both_empty = [ # type: ignore[var-annotated] + [], + [], + [], + [], + [], + [], +] + +CASES = [plain_case, delete_blocks, split_blocks, skip_block, no_intersect, one_empty] +IDS = [ + "plain_case", + "delete_blocks", + "split_blocks", + "skip_block", + "no_intersect", + "one_empty", +] + + +class TestSparseIndexUnion: + @pytest.mark.parametrize( + "xloc, xlen, yloc, ylen, eloc, elen", + [ + [[0], [5], [5], [4], [0], [9]], + [[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]], + [[1], [5], [3], [5], [1], [7]], + [[2, 10], [4, 4], [4], [8], [2], [12]], + [[0, 5], [3, 5], [0], [7], [0], [10]], + [[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]], + [[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]], + [[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]], + ], + ) + def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen): + # Case 1 + # x: ---- + # y: ---- + # r: -------- + # Case 2 + # x: ----- ----- + # y: ----- -- + # Case 3 + # x: ------ + # y: ------- + # r: ---------- + # Case 4 + # x: ------ ----- + # y: ------- + # r: ------------- + # Case 5 + # x: --- ----- + # y: ------- + # r: ------------- + # Case 6 + # x: ------ ----- + # y: ------- --- + # r: ------------- + # Case 7 + # x: ---------------------- + # y: ---- ---- --- + # r: ---------------------- + # Case 8 + # x: ---- --- + # y: --- --- + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + bresult = xindex.make_union(yindex) + assert isinstance(bresult, BlockIndex) + tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) + tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32)) + + ixindex = xindex.to_int_index() + iyindex = yindex.to_int_index() + iresult = ixindex.make_union(iyindex) + assert isinstance(iresult, IntIndex) + tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) + + def test_int_index_make_union(self): + a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1], dtype=np.int32)) + b = IntIndex(4, np.array([0, 1], dtype=np.int32)) + + msg = "Indices must reference same underlying length" + with pytest.raises(ValueError, match=msg): + a.make_union(b) + + +class TestSparseIndexIntersect: + @td.skip_if_windows + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_intersect(self, xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + result = xindex.intersect(yindex) + assert result.equals(expected) + result = xindex.to_int_index().intersect(yindex.to_int_index()) + assert result.equals(expected.to_int_index()) + + msg = "Indices must reference same underlying length" + with pytest.raises(Exception, match=msg): + xindex.intersect(longer_index) + with pytest.raises(Exception, match=msg): + xindex.to_int_index().intersect(longer_index.to_int_index()) + + def test_intersect_empty(self): + xindex = IntIndex(4, np.array([], dtype=np.int32)) + yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + xindex = xindex.to_block_index() + yindex = yindex.to_block_index() + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + @pytest.mark.parametrize( + "case", + [ + IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(5, np.array([], dtype=np.int32)), # type: ignore[arg-type] + ], + ) + def test_intersect_identical(self, case): + assert case.intersect(case).equals(case) + case = case.to_block_index() + assert case.intersect(case).equals(case) + + +class TestSparseIndexCommon: + def test_int_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" + ) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_block_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_lookup(self, kind): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + assert idx.lookup(i) == -1 + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_lookup_array(self, kind): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + @pytest.mark.parametrize( + "idx, expected", + [ + [0, -1], + [5, 0], + [7, 2], + [8, -1], + [9, -1], + [10, -1], + [11, -1], + [12, 3], + [17, 8], + [18, -1], + ], + ) + def test_lookup_basics(self, idx, expected): + bindex = BlockIndex(20, [5, 12], [3, 6]) + assert bindex.lookup(idx) == expected + + iindex = bindex.to_int_index() + assert iindex.lookup(idx) == expected + + +class TestBlockIndex: + def test_block_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + @pytest.mark.parametrize("i", [5, 10, 100, 101]) + def test_make_block_boundary(self, i): + idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") + + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) + + def test_equals(self): + index = BlockIndex(10, [0, 4], [2, 5]) + + assert index.equals(index) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) + + def test_check_integrity(self): + locs = [] + lengths = [] + + # 0-length OK + BlockIndex(0, locs, lengths) + + # also OK even though empty + BlockIndex(1, locs, lengths) + + msg = "Block 0 extends beyond end" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [5], [10]) + + msg = "Block 0 overlaps" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [2, 5], [5, 3]) + + def test_to_int_index(self): + locs = [0, 10] + lengths = [4, 6] + exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] + + block = BlockIndex(20, locs, lengths) + dense = block.to_int_index() + + tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32)) + + def test_to_block_index(self): + index = BlockIndex(10, [0, 5], [4, 5]) + assert index.to_block_index() is index + + +class TestIntIndex: + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 3]) + + def test_int_internal(self): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = make_sparse_index( + 4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" + ) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_equals(self): + index = IntIndex(10, [0, 1, 2, 3, 4]) + assert index.equals(index) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) + + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_to_block_index(self, xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + # see if survive the round trip + xbindex = xindex.to_int_index().to_block_index() + ybindex = yindex.to_int_index().to_block_index() + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) + + def test_to_int_index(self): + index = IntIndex(10, [2, 3, 4, 5, 6]) + assert index.to_int_index() is index + + +class TestSparseOperators: + @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_op(self, opname, xloc, xlen, yloc, ylen, eloc, elen): + sparse_op = getattr(splib, f"sparse_{opname}_float64") + python_op = getattr(operator, opname) + + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + xdindex = xindex.to_int_index() + ydindex = yindex.to_int_index() + + x = np.arange(xindex.npoints) * 10.0 + 1 + y = np.arange(yindex.npoints) * 100.0 + 1 + + xfill = 0 + yfill = 2 + + result_block_vals, rb_index, bfill = sparse_op( + x, xindex, xfill, y, yindex, yfill + ) + result_int_vals, ri_index, ifill = sparse_op( + x, xdindex, xfill, y, ydindex, yfill + ) + + assert rb_index.to_int_index().equals(ri_index) + tm.assert_numpy_array_equal(result_block_vals, result_int_vals) + assert bfill == ifill + + # check versus Series... + xseries = Series(x, xdindex.indices) + xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) + + yseries = Series(y, ydindex.indices) + yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) + + series_result = python_op(xseries, yseries) + series_result = series_result.reindex(ri_index.indices) + + tm.assert_numpy_array_equal(result_block_vals, series_result.values) + tm.assert_numpy_array_equal(result_int_vals, series_result.values) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py new file mode 100644 index 00000000..5d6d65dd --- /dev/null +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -0,0 +1,308 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + Timestamp, + isna, +) +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) + + +class TestReductions: + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + def test_all(self, data, pos, neg): + # GH#17570 + out = SparseArray(data).all() + assert out + + out = SparseArray(data, fill_value=pos).all() + assert out + + data[1] = neg + out = SparseArray(data).all() + assert not out + + out = SparseArray(data, fill_value=pos).all() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + def test_numpy_all(self, data, pos, neg): + # GH#17570 + out = np.all(SparseArray(data)) + assert out + + out = np.all(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.all(SparseArray(data)) + assert not out + + out = np.all(SparseArray(data, fill_value=pos)) + assert not out + + # raises with a different message on py2. + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.all(SparseArray(data), out=np.array([])) + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + def test_any(self, data, pos, neg): + # GH#17570 + out = SparseArray(data).any() + assert out + + out = SparseArray(data, fill_value=pos).any() + assert out + + data[1] = neg + out = SparseArray(data).any() + assert not out + + out = SparseArray(data, fill_value=pos).any() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + def test_numpy_any(self, data, pos, neg): + # GH#17570 + out = np.any(SparseArray(data)) + assert out + + out = np.any(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.any(SparseArray(data)) + assert not out + + out = np.any(SparseArray(data, fill_value=pos)) + assert not out + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.any(SparseArray(data), out=out) + + def test_sum(self): + data = np.arange(10).astype(float) + out = SparseArray(data).sum() + assert out == 45.0 + + data[5] = np.nan + out = SparseArray(data, fill_value=2).sum() + assert out == 40.0 + + out = SparseArray(data, fill_value=np.nan).sum() + assert out == 40.0 + + @pytest.mark.parametrize( + "arr", + [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])], + ) + @pytest.mark.parametrize("fill_value", [0, 1, np.nan]) + @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) + def test_sum_min_count(self, arr, fill_value, min_count, expected): + # GH#25777 + sparray = SparseArray(arr, fill_value=fill_value) + result = sparray.sum(min_count=min_count) + if np.isnan(expected): + assert np.isnan(result) + else: + assert result == expected + + def test_bool_sum_min_count(self): + spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) + res = spar_bool.sum(min_count=1) + assert res == 5 + res = spar_bool.sum(min_count=11) + assert isna(res) + + def test_numpy_sum(self): + data = np.arange(10).astype(float) + out = np.sum(SparseArray(data)) + assert out == 45.0 + + data[5] = np.nan + out = np.sum(SparseArray(data, fill_value=2)) + assert out == 40.0 + + out = np.sum(SparseArray(data, fill_value=np.nan)) + assert out == 40.0 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), out=out) + + def test_mean(self): + data = np.arange(10).astype(float) + out = SparseArray(data).mean() + assert out == 4.5 + + data[5] = np.nan + out = SparseArray(data).mean() + assert out == 40.0 / 9 + + def test_numpy_mean(self): + data = np.arange(10).astype(float) + out = np.mean(SparseArray(data)) + assert out == 4.5 + + data[5] = np.nan + out = np.mean(SparseArray(data)) + assert out == 40.0 / 9 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), out=out) + + +class TestMinMax: + @pytest.mark.parametrize( + "raw_data,max_expected,min_expected", + [ + (np.arange(5.0), [4], [0]), + (-np.arange(5.0), [0], [-4]), + (np.array([0, 1, 2, np.nan, 4]), [4], [0]), + (np.array([np.nan] * 5), [np.nan], [np.nan]), + (np.array([]), [np.nan], [np.nan]), + ], + ) + def test_nan_fill_value(self, raw_data, max_expected, min_expected): + arr = SparseArray(raw_data) + max_result = arr.max() + min_result = arr.min() + assert max_result in max_expected + assert min_result in min_expected + + max_result = arr.max(skipna=False) + min_result = arr.min(skipna=False) + if np.isnan(raw_data).any(): + assert np.isnan(max_result) + assert np.isnan(min_result) + else: + assert max_result in max_expected + assert min_result in min_expected + + @pytest.mark.parametrize( + "fill_value,max_expected,min_expected", + [ + (100, 100, 0), + (-100, 1, -100), + ], + ) + def test_fill_value(self, fill_value, max_expected, min_expected): + arr = SparseArray( + np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value) + ) + max_result = arr.max() + assert max_result == max_expected + + min_result = arr.min() + assert min_result == min_expected + + def test_only_fill_value(self): + fv = 100 + arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv)) + assert len(arr._valid_sp_values) == 0 + + assert arr.max() == fv + assert arr.min() == fv + assert arr.max(skipna=False) == fv + assert arr.min(skipna=False) == fv + + @pytest.mark.parametrize("func", ["min", "max"]) + @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])]) + @pytest.mark.parametrize( + "dtype,expected", + [ + (SparseDtype(np.float64, np.nan), np.nan), + (SparseDtype(np.float64, 5.0), np.nan), + (SparseDtype("datetime64[ns]", NaT), NaT), + (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT), + ], + ) + def test_na_value_if_no_valid_values(self, func, data, dtype, expected): + arr = SparseArray(data, dtype=dtype) + result = getattr(arr, func)() + if expected is NaT: + # TODO: pin down whether we wrap datetime64("NaT") + assert result is NaT or np.isnat(result) + else: + assert np.isnan(result) + + +class TestArgmaxArgmin: + @pytest.mark.parametrize( + "arr,argmax_expected,argmin_expected", + [ + (SparseArray([1, 2, 0, 1, 2]), 1, 2), + (SparseArray([-1, -2, 0, -1, -2]), 2, 1), + (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2), + (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10), + (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10), + (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10), + (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0), + (SparseArray([1] + [0] * 10, fill_value=0), 0, 1), + (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0), + (SparseArray([1] + [0] * 10, fill_value=1), 0, 1), + ], + ) + def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): + argmax_result = arr.argmax() + argmin_result = arr.argmin() + assert argmax_result == argmax_expected + assert argmin_result == argmin_expected + + @pytest.mark.parametrize( + "arr,method", + [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], + ) + def test_empty_array(self, arr, method): + msg = f"attempt to get {method} of an empty sequence" + with pytest.raises(ValueError, match=msg): + arr.argmax() if method == "argmax" else arr.argmin() diff --git a/pandas/tests/arrays/sparse/test_unary.py b/pandas/tests/arrays/sparse/test_unary.py new file mode 100644 index 00000000..605023a4 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_unary.py @@ -0,0 +1,72 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import SparseArray + + +@pytest.mark.parametrize("fill_value", [0, np.nan]) +@pytest.mark.parametrize("op", [operator.pos, operator.neg]) +def test_unary_op(op, fill_value): + arr = np.array([0, 1, np.nan, 2]) + sparray = SparseArray(arr, fill_value=fill_value) + result = op(sparray) + expected = SparseArray(op(arr), fill_value=op(fill_value)) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [True, False]) +def test_invert(fill_value): + arr = np.array([True, False, False, True]) + sparray = SparseArray(arr, fill_value=fill_value) + result = ~sparray + expected = SparseArray(~arr, fill_value=not fill_value) + tm.assert_sp_array_equal(result, expected) + + result = ~pd.Series(sparray) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = ~pd.DataFrame({"A": sparray}) + expected = pd.DataFrame({"A": expected}) + tm.assert_frame_equal(result, expected) + + +class TestUnaryMethods: + def test_neg_operator(self): + arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + res = -arr + exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) + res = -arr + exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + def test_abs_operator(self): + arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + res = abs(arr) + exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) + res = abs(arr) + exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8) + tm.assert_sp_array_equal(exp, res) + + def test_invert_operator(self): + arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool_) + exp = SparseArray( + np.invert([False, True, False, True]), fill_value=True, dtype=np.bool_ + ) + res = ~arr + tm.assert_sp_array_equal(exp, res) + + arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32) + res = ~arr + exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32) + tm.assert_sp_array_equal(exp, res) diff --git a/pandas/tests/arrays/string_/__init__.py b/pandas/tests/arrays/string_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py new file mode 100644 index 00000000..6a17a56a --- /dev/null +++ b/pandas/tests/arrays/string_/test_string.py @@ -0,0 +1,613 @@ +""" +This module tests the functionality of StringArray and ArrowStringArray. +Tests for the str accessors are in pandas/tests/strings/test_string_array.py +""" +import numpy as np +import pytest + +from pandas.compat import ( + pa_version_under2p0, + pa_version_under6p0, +) +from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_dtype_equal + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.string_arrow import ArrowStringArray + + +@pytest.fixture +def dtype(string_storage): + """Fixture giving StringDtype from parametrized 'string_storage'""" + return pd.StringDtype(storage=string_storage) + + +@pytest.fixture +def cls(dtype): + """Fixture giving array type from parametrized 'dtype'""" + return dtype.construct_array_type() + + +def test_repr(dtype): + df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)}) + expected = " A\n0 a\n1 \n2 b" + assert repr(df) == expected + + expected = "0 a\n1 \n2 b\nName: A, dtype: string" + assert repr(df.A) == expected + + arr_name = "ArrowStringArray" if dtype.storage == "pyarrow" else "StringArray" + expected = f"<{arr_name}>\n['a', , 'b']\nLength: 3, dtype: string" + assert repr(df.A.array) == expected + + +def test_none_to_nan(cls): + a = cls._from_sequence(["a", None, "b"]) + assert a[1] is not None + assert a[1] is pd.NA + + +def test_setitem_validates(cls): + arr = cls._from_sequence(["a", "b"]) + + if cls is pd.arrays.StringArray: + msg = "Cannot set non-string value '10' into a StringArray." + else: + msg = "Scalar must be NA or str" + with pytest.raises(ValueError, match=msg): + arr[0] = 10 + + if cls is pd.arrays.StringArray: + msg = "Must provide strings." + else: + msg = "Scalar must be NA or str" + with pytest.raises(ValueError, match=msg): + arr[:] = np.array([1, 2]) + + +def test_setitem_with_scalar_string(dtype): + # is_float_dtype considers some strings, like 'd', to be floats + # which can cause issues. + arr = pd.array(["a", "c"], dtype=dtype) + arr[0] = "d" + expected = pd.array(["d", "c"], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + + +def test_astype_roundtrip(dtype, request): + if dtype.storage == "pyarrow": + reason = "ValueError: Could not convert object to NumPy datetime" + mark = pytest.mark.xfail(reason=reason, raises=ValueError) + request.node.add_marker(mark) + else: + mark = pytest.mark.xfail( + reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError + ) + request.node.add_marker(mark) + + ser = pd.Series(pd.date_range("2000", periods=12)) + ser[0] = None + + casted = ser.astype(dtype) + assert is_dtype_equal(casted.dtype, dtype) + + result = casted.astype("datetime64[ns]") + tm.assert_series_equal(result, ser) + + +def test_add(dtype, request): + if dtype.storage == "pyarrow": + reason = ( + "unsupported operand type(s) for +: 'ArrowStringArray' and " + "'ArrowStringArray'" + ) + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) + request.node.add_marker(mark) + + a = pd.Series(["a", "b", "c", None, None], dtype=dtype) + b = pd.Series(["x", "y", None, "z", None], dtype=dtype) + + result = a + b + expected = pd.Series(["ax", "by", None, None, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = a.add(b) + tm.assert_series_equal(result, expected) + + result = a.radd(b) + expected = pd.Series(["xa", "yb", None, None, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = a.add(b, fill_value="-") + expected = pd.Series(["ax", "by", "c-", "-z", None], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_add_2d(dtype, request): + if dtype.storage == "pyarrow": + reason = "Failed: DID NOT RAISE " + mark = pytest.mark.xfail(raises=None, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", "c"], dtype=dtype) + b = np.array([["a", "b", "c"]], dtype=object) + with pytest.raises(ValueError, match="3 != 1"): + a + b + + s = pd.Series(a) + with pytest.raises(ValueError, match="3 != 1"): + s + b + + +def test_add_sequence(dtype, request): + if dtype.storage == "pyarrow": + reason = "unsupported operand type(s) for +: 'ArrowStringArray' and 'list'" + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", None, None], dtype=dtype) + other = ["x", None, "y", None] + + result = a + other + expected = pd.array(["ax", None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = other + a + expected = pd.array(["xa", None, None, None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_mul(dtype, request): + if dtype.storage == "pyarrow": + reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'" + mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) + request.node.add_marker(mark) + + a = pd.array(["a", "b", None], dtype=dtype) + result = a * 2 + expected = pd.array(["aa", "bb", None], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + result = 2 * a + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_strings(dtype): + arr = pd.array(["a", "b", "c", "d"], dtype=dtype) + df = pd.DataFrame([["t", "u", "v", "w"]]) + assert arr.__add__(df) is NotImplemented + + result = arr + df + expected = pd.DataFrame([["at", "bu", "cv", "dw"]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + result = df + arr + expected = pd.DataFrame([["ta", "ub", "vc", "wd"]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_frame(dtype): + arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) + df = pd.DataFrame([["x", np.nan, "y", np.nan]]) + + assert arr.__add__(df) is NotImplemented + + result = arr + df + expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + result = df + arr + expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected) + + +def test_comparison_methods_scalar(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + a = pd.array(["a", None, "c"], dtype=dtype) + other = "a" + result = getattr(a, op_name)(other) + expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_scalar_pd_na(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + a = pd.array(["a", None, "c"], dtype=dtype) + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_scalar_not_string(comparison_op, dtype): + op_name = f"__{comparison_op.__name__}__" + + a = pd.array(["a", None, "c"], dtype=dtype) + other = 42 + + if op_name not in ["__eq__", "__ne__"]: + with pytest.raises(TypeError, match="not supported between"): + getattr(a, op_name)(other) + + return + + result = getattr(a, op_name)(other) + expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[ + op_name + ] + expected = pd.array(expected_data, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_array(comparison_op, dtype): + + op_name = f"__{comparison_op.__name__}__" + + a = pd.array(["a", None, "c"], dtype=dtype) + other = [None, None, "c"] + result = getattr(a, op_name)(other) + expected = np.empty_like(a, dtype="object") + expected[-1] = getattr(other[-1], op_name)(a[-1]) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_raises(cls): + if cls is pd.arrays.StringArray: + msg = "StringArray requires a sequence of strings or pandas.NA" + else: + msg = "Unsupported type '' for ArrowExtensionArray" + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", "b"], dtype="S1")) + + with pytest.raises(ValueError, match=msg): + cls(np.array([])) + + if cls is pd.arrays.StringArray: + # GH#45057 np.nan and None do NOT raise, as they are considered valid NAs + # for string dtype + cls(np.array(["a", np.nan], dtype=object)) + cls(np.array(["a", None], dtype=object)) + else: + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", np.nan], dtype=object)) + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", None], dtype=object)) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", pd.NaT], dtype=object)) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", np.datetime64("NaT", "ns")], dtype=object)) + + with pytest.raises(ValueError, match=msg): + cls(np.array(["a", np.timedelta64("NaT", "ns")], dtype=object)) + + +@pytest.mark.parametrize("na", [np.nan, np.float64("nan"), float("nan"), None, pd.NA]) +def test_constructor_nan_like(na): + expected = pd.arrays.StringArray(np.array(["a", pd.NA])) + tm.assert_extension_array_equal( + pd.arrays.StringArray(np.array(["a", na], dtype="object")), expected + ) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_from_sequence_no_mutate(copy, cls, request): + if cls is ArrowStringArray and copy is False: + mark = pytest.mark.xfail( + raises=AssertionError, reason="numpy array are different" + ) + request.node.add_marker(mark) + + nan_arr = np.array(["a", np.nan], dtype=object) + na_arr = np.array(["a", pd.NA], dtype=object) + + result = cls._from_sequence(nan_arr, copy=copy) + + if cls is ArrowStringArray: + import pyarrow as pa + + expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True)) + else: + expected = cls(na_arr) + + tm.assert_extension_array_equal(result, expected) + + expected = nan_arr if copy else na_arr + tm.assert_numpy_array_equal(nan_arr, expected) + + +def test_astype_int(dtype): + arr = pd.array(["1", "2", "3"], dtype=dtype) + result = arr.astype("int64") + expected = np.array([1, 2, 3], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + arr = pd.array(["1", pd.NA, "3"], dtype=dtype) + msg = r"int\(\) argument must be a string, a bytes-like object or a( real)? number" + with pytest.raises(TypeError, match=msg): + arr.astype("int64") + + +def test_astype_nullable_int(dtype): + arr = pd.array(["1", pd.NA, "3"], dtype=dtype) + + result = arr.astype("Int64") + expected = pd.array([1, pd.NA, 3], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +def test_astype_float(dtype, any_float_dtype): + # Don't compare arrays (37974) + ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype) + result = ser.astype(any_float_dtype) + expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce(skipna, dtype): + arr = pd.Series(["a", "b", "c"], dtype=dtype) + result = arr.sum(skipna=skipna) + assert result == "abc" + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce_missing(skipna, dtype): + arr = pd.Series([None, "a", None, "b", "c", None], dtype=dtype) + result = arr.sum(skipna=skipna) + if skipna: + assert result == "abc" + else: + assert pd.isna(result) + + +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_min_max(method, skipna, dtype, request): + if dtype.storage == "pyarrow" and pa_version_under6p0: + reason = "'ArrowStringArray' object has no attribute 'max'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + arr = pd.Series(["a", "b", "c", None], dtype=dtype) + result = getattr(arr, method)(skipna=skipna) + if skipna: + expected = "a" if method == "min" else "c" + assert result == expected + else: + assert result is pd.NA + + +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize("box", [pd.Series, pd.array]) +def test_min_max_numpy(method, box, dtype, request): + if dtype.storage == "pyarrow" and (pa_version_under6p0 or box is pd.array): + if box is pd.array: + reason = "'<=' not supported between instances of 'str' and 'NoneType'" + else: + reason = "'ArrowStringArray' object has no attribute 'max'" + mark = pytest.mark.xfail(raises=TypeError, reason=reason) + request.node.add_marker(mark) + + arr = box(["a", "b", "c", None], dtype=dtype) + result = getattr(np, method)(arr) + expected = "a" if method == "min" else "c" + assert result == expected + + +def test_fillna_args(dtype, request): + # GH 37987 + + if dtype.storage == "pyarrow": + reason = ( + "Regex pattern \"Cannot set non-string value '1' into " + "a StringArray.\" does not match 'Scalar must be NA or str'" + ) + mark = pytest.mark.xfail(raises=AssertionError, reason=reason) + request.node.add_marker(mark) + + arr = pd.array(["a", pd.NA], dtype=dtype) + + res = arr.fillna(value="b") + expected = pd.array(["a", "b"], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + res = arr.fillna(value=np.str_("b")) + expected = pd.array(["a", "b"], dtype=dtype) + tm.assert_extension_array_equal(res, expected) + + msg = "Cannot set non-string value '1' into a StringArray." + with pytest.raises(ValueError, match=msg): + arr.fillna(value=1) + + +@td.skip_if_no("pyarrow") +def test_arrow_array(dtype): + # protocol added in 0.15.0 + import pyarrow as pa + + data = pd.array(["a", "b", "c"], dtype=dtype) + arr = pa.array(data) + expected = pa.array(list(data), type=pa.string(), from_pandas=True) + if dtype.storage == "pyarrow": + expected = pa.chunked_array(expected) + + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow") +def test_arrow_roundtrip(dtype, string_storage2): + # roundtrip possible from arrow 1.0.0 + import pyarrow as pa + + data = pd.array(["a", "b", None], dtype=dtype) + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "string" + with pd.option_context("string_storage", string_storage2): + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.StringDtype) + expected = df.astype(f"string[{string_storage2}]") + tm.assert_frame_equal(result, expected) + # ensure the missing value is represented by NA and not np.nan or None + assert result.loc[2, "a"] is pd.NA + + +@td.skip_if_no("pyarrow") +def test_arrow_load_from_zero_chunks(dtype, string_storage2): + # GH-41040 + import pyarrow as pa + + data = pd.array([], dtype=dtype) + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "string" + # Instantiate the same table with no chunks at all + table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) + with pd.option_context("string_storage", string_storage2): + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.StringDtype) + expected = df.astype(f"string[{string_storage2}]") + tm.assert_frame_equal(result, expected) + + +def test_value_counts_na(dtype): + arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype) + result = arr.value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_value_counts_with_normalize(dtype): + ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) + result = ser.value_counts(normalize=True) + expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "values, expected", + [ + (["a", "b", "c"], np.array([False, False, False])), + (["a", "b", None], np.array([False, False, True])), + ], +) +def test_use_inf_as_na(values, expected, dtype): + # https://github.com/pandas-dev/pandas/issues/33655 + values = pd.array(values, dtype=dtype) + with pd.option_context("mode.use_inf_as_na", True): + result = values.isna() + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(values).isna() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.DataFrame(values).isna() + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + +def test_memory_usage(dtype): + # GH 33963 + + if dtype.storage == "pyarrow": + pytest.skip(f"not applicable for {dtype.storage}") + + series = pd.Series(["a", "b", "c"], dtype=dtype) + + assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + + +@pytest.mark.parametrize("float_dtype", [np.float16, np.float32, np.float64]) +def test_astype_from_float_dtype(float_dtype, dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + ser = pd.Series([0.1], dtype=float_dtype) + result = ser.astype(dtype) + expected = pd.Series(["0.1"], dtype=dtype) + tm.assert_series_equal(result, expected) + + +def test_to_numpy_returns_pdna_default(dtype): + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + result = np.array(arr) + expected = np.array(["a", pd.NA, "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_na_value(dtype, nulls_fixture): + na_value = nulls_fixture + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + result = arr.to_numpy(na_value=na_value) + expected = np.array(["a", na_value, "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_isin(dtype, fixed_now_ts): + s = pd.Series(["a", "b", None], dtype=dtype) + + with tm.maybe_produces_warning( + PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + ): + result = s.isin(["a", "c"]) + expected = pd.Series([True, False, False]) + tm.assert_series_equal(result, expected) + + with tm.maybe_produces_warning( + PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + ): + result = s.isin(["a", pd.NA]) + expected = pd.Series([True, False, True]) + tm.assert_series_equal(result, expected) + + with tm.maybe_produces_warning( + PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + ): + result = s.isin([]) + expected = pd.Series([False, False, False]) + tm.assert_series_equal(result, expected) + + with tm.maybe_produces_warning( + PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + ): + result = s.isin(["a", fixed_now_ts]) + expected = pd.Series([True, False, False]) + tm.assert_series_equal(result, expected) + + +def test_setitem_scalar_with_mask_validation(dtype): + # https://github.com/pandas-dev/pandas/issues/47628 + # setting None with a boolean mask (through _putmaks) should still result + # in pd.NA values in the underlying array + ser = pd.Series(["a", "b", "c"], dtype=dtype) + mask = np.array([False, True, False]) + + ser[mask] = None + assert ser.array[1] is pd.NA + + # for other non-string we should also raise an error + ser = pd.Series(["a", "b", "c"], dtype=dtype) + if type(ser.array) is pd.arrays.StringArray: + msg = "Cannot set non-string value" + else: + msg = "Scalar must be NA or str" + with pytest.raises(ValueError, match=msg): + ser[mask] = 1 diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py new file mode 100644 index 00000000..f43cf298 --- /dev/null +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -0,0 +1,199 @@ +import re + +import numpy as np +import pytest + +from pandas.compat import pa_version_under1p01 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.string_ import ( + StringArray, + StringDtype, +) +from pandas.core.arrays.string_arrow import ArrowStringArray + +skip_if_no_pyarrow = pytest.mark.skipif( + pa_version_under1p01, + reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", +) + + +@skip_if_no_pyarrow +def test_eq_all_na(): + a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow")) + result = a == a + expected = pd.array([pd.NA, pd.NA], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_config(string_storage): + with pd.option_context("string_storage", string_storage): + assert StringDtype().storage == string_storage + result = pd.array(["a", "b"]) + assert result.dtype.storage == string_storage + + expected = ( + StringDtype(string_storage).construct_array_type()._from_sequence(["a", "b"]) + ) + tm.assert_equal(result, expected) + + +def test_config_bad_storage_raises(): + msg = re.escape("Value must be one of python|pyarrow") + with pytest.raises(ValueError, match=msg): + pd.options.mode.string_storage = "foo" + + +@skip_if_no_pyarrow +@pytest.mark.parametrize("chunked", [True, False]) +@pytest.mark.parametrize("array", ["numpy", "pyarrow"]) +def test_constructor_not_string_type_raises(array, chunked): + import pyarrow as pa + + array = pa if array == "pyarrow" else np + + arr = array.array([1, 2, 3]) + if chunked: + if array is np: + pytest.skip("chunked not applicable to numpy array") + arr = pa.chunked_array(arr) + if array is np: + msg = "Unsupported type '' for ArrowExtensionArray" + else: + msg = re.escape( + "ArrowStringArray requires a PyArrow (chunked) array of string type" + ) + with pytest.raises(ValueError, match=msg): + ArrowStringArray(arr) + + +@skip_if_no_pyarrow +def test_from_sequence_wrong_dtype_raises(): + with pd.option_context("string_storage", "python"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") + + with pd.option_context("string_storage", "pyarrow"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") + + with pytest.raises(AssertionError, match=None): + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]") + + ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") + + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "python"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pd.option_context("string_storage", "pyarrow"): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pytest.raises(AssertionError, match=None): + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) + + ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) + + with pd.option_context("string_storage", "python"): + StringArray._from_sequence(["a", None, "c"], dtype="string") + + with pd.option_context("string_storage", "pyarrow"): + StringArray._from_sequence(["a", None, "c"], dtype="string") + + StringArray._from_sequence(["a", None, "c"], dtype="string[python]") + + with pytest.raises(AssertionError, match=None): + StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") + + with pd.option_context("string_storage", "python"): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + with pytest.raises(AssertionError, match=None): + with pd.option_context("string_storage", "pyarrow"): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) + + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) + + with pytest.raises(AssertionError, match=None): + StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) + + +@pytest.mark.skipif( + not pa_version_under1p01, + reason="pyarrow is installed", +) +def test_pyarrow_not_installed_raises(): + msg = re.escape("pyarrow>=1.0.0 is required for PyArrow backed") + + with pytest.raises(ImportError, match=msg): + StringDtype(storage="pyarrow") + + with pytest.raises(ImportError, match=msg): + ArrowStringArray([]) + + with pytest.raises(ImportError, match=msg): + ArrowStringArray._from_sequence(["a", None, "b"]) + + +@skip_if_no_pyarrow +@pytest.mark.parametrize("multiple_chunks", [False, True]) +@pytest.mark.parametrize( + "key, value, expected", + [ + (-1, "XX", ["a", "b", "c", "d", "XX"]), + (1, "XX", ["a", "XX", "c", "d", "e"]), + (1, None, ["a", None, "c", "d", "e"]), + (1, pd.NA, ["a", None, "c", "d", "e"]), + ([1, 3], "XX", ["a", "XX", "c", "XX", "e"]), + ([1, 3], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]), + ([1, 3], ["XX", None], ["a", "XX", "c", None, "e"]), + ([1, 3], ["XX", pd.NA], ["a", "XX", "c", None, "e"]), + ([0, -1], ["XX", "YY"], ["XX", "b", "c", "d", "YY"]), + ([-1, 0], ["XX", "YY"], ["YY", "b", "c", "d", "XX"]), + (slice(3, None), "XX", ["a", "b", "c", "XX", "XX"]), + (slice(2, 4), ["XX", "YY"], ["a", "b", "XX", "YY", "e"]), + (slice(3, 1, -1), ["XX", "YY"], ["a", "b", "YY", "XX", "e"]), + (slice(None), "XX", ["XX", "XX", "XX", "XX", "XX"]), + ([False, True, False, True, False], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]), + ], +) +def test_setitem(multiple_chunks, key, value, expected): + import pyarrow as pa + + result = pa.array(list("abcde")) + expected = pa.array(expected) + + if multiple_chunks: + result = pa.chunked_array([result[:3], result[3:]]) + expected = pa.chunked_array([expected[:3], expected[3:]]) + + result = ArrowStringArray(result) + expected = ArrowStringArray(expected) + + result[key] = value + tm.assert_equal(result, expected) + assert result._data.num_chunks == expected._data.num_chunks + + +@skip_if_no_pyarrow +def test_setitem_invalid_indexer_raises(): + import pyarrow as pa + + arr = ArrowStringArray(pa.array(list("abcde"))) + + with pytest.raises(IndexError, match=None): + arr[5] = "foo" + + with pytest.raises(IndexError, match=None): + arr[-6] = "foo" + + with pytest.raises(IndexError, match=None): + arr[[0, 5]] = "foo" + + with pytest.raises(IndexError, match=None): + arr[[0, -6]] = "foo" + + with pytest.raises(IndexError, match=None): + arr[[True, True, False]] = "foo" + + with pytest.raises(ValueError, match=None): + arr[[0, 1]] = ["foo", "bar", "baz"] diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py new file mode 100644 index 00000000..9f8c277f --- /dev/null +++ b/pandas/tests/arrays/test_array.py @@ -0,0 +1,393 @@ +import datetime +import decimal + +import numpy as np +import pytest +import pytz + +from pandas.core.dtypes.base import _registry as registry + +import pandas as pd +import pandas._testing as tm +from pandas.api.extensions import register_extension_dtype +from pandas.arrays import ( + BooleanArray, + DatetimeArray, + FloatingArray, + IntegerArray, + IntervalArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.arrays import ( + PandasArray, + period_array, +) +from pandas.tests.extension.decimal import ( + DecimalArray, + DecimalDtype, + to_decimal, +) + + +@pytest.mark.parametrize( + "data, dtype, expected", + [ + # Basic NumPy defaults. + ([1, 2], None, IntegerArray._from_sequence([1, 2])), + ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))), + ( + [1, 2], + np.dtype("float32"), + PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), + ), + (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), + ( + np.array([1.0, 2.0], dtype="float64"), + None, + FloatingArray._from_sequence([1.0, 2.0]), + ), + # String alias passes through to NumPy + ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), + ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))), + # GH#44715 FloatingArray does not support float16, so fall back to PandasArray + ( + np.array([1, 2], dtype=np.float16), + None, + PandasArray(np.array([1, 2], dtype=np.float16)), + ), + # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64")) + ( + PandasArray(np.array([1, 2], dtype=np.int32)), + None, + PandasArray(np.array([1, 2], dtype=np.int32)), + ), + # Period alias + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + "Period[D]", + period_array(["2000", "2001"], freq="D"), + ), + # Period dtype + ( + [pd.Period("2000", "D")], + pd.PeriodDtype("D"), + period_array(["2000"], freq="D"), + ), + # Datetime (naive) + ( + [1, 2], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + np.array([1, 2], dtype="datetime64[ns]"), + None, + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + None, + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + ["2000", "2001"], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + # Datetime (tz-aware) + ( + ["2000", "2001"], + pd.DatetimeTZDtype(tz="CET"), + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + # Timedelta + ( + ["1H", "2H"], + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + None, + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + # Category + (["a", "b"], "category", pd.Categorical(["a", "b"])), + ( + ["a", "b"], + pd.CategoricalDtype(None, ordered=True), + pd.Categorical(["a", "b"], ordered=True), + ), + # Interval + ( + [pd.Interval(1, 2), pd.Interval(3, 4)], + "interval", + IntervalArray.from_tuples([(1, 2), (3, 4)]), + ), + # Sparse + ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), + # IntegerNA + ([1, None], "Int16", pd.array([1, None], dtype="Int16")), + (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # String + ( + ["a", None], + "string", + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + ( + ["a", None], + pd.StringDtype(), + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + # Boolean + ([True, None], "boolean", BooleanArray._from_sequence([True, None])), + ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), + # Index + (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # Series[EA] returns the EA + ( + pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), + None, + pd.Categorical(["a", "b"], categories=["a", "b", "c"]), + ), + # "3rd party" EAs work + ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), + # pass an ExtensionArray, but a different dtype + ( + period_array(["2000", "2001"], freq="D"), + "category", + pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]), + ), + ], +) +def test_array(data, dtype, expected): + result = pd.array(data, dtype=dtype) + tm.assert_equal(result, expected) + + +def test_array_copy(): + a = np.array([1, 2]) + # default is to copy + b = pd.array(a, dtype=a.dtype) + assert not tm.shares_memory(a, b) + + # copy=True + b = pd.array(a, dtype=a.dtype, copy=True) + assert not tm.shares_memory(a, b) + + # copy=False + b = pd.array(a, dtype=a.dtype, copy=False) + assert tm.shares_memory(a, b) + + +cet = pytz.timezone("CET") + + +@pytest.mark.parametrize( + "data, expected", + [ + # period + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + period_array(["2000", "2001"], freq="D"), + ), + # interval + ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])), + # datetime + ( + [pd.Timestamp("2000"), pd.Timestamp("2001")], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + np.array([1, 2], dtype="M8[ns]"), + DatetimeArray(np.array([1, 2], dtype="M8[ns]")), + ), + ( + np.array([1, 2], dtype="M8[us]"), + DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")), + ), + # datetimetz + ( + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + ( + [ + datetime.datetime(2000, 1, 1, tzinfo=cet), + datetime.datetime(2001, 1, 1, tzinfo=cet), + ], + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet) + ), + ), + # timedelta + ( + [pd.Timedelta("1H"), pd.Timedelta("2H")], + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + np.array([1, 2], dtype="m8[ns]"), + TimedeltaArray(np.array([1, 2], dtype="m8[ns]")), + ), + ( + np.array([1, 2], dtype="m8[us]"), + TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")), + ), + # integer + ([1, 2], IntegerArray._from_sequence([1, 2])), + ([1, None], IntegerArray._from_sequence([1, None])), + ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])), + ([1, np.nan], IntegerArray._from_sequence([1, np.nan])), + # float + ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2])), + ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA])), + ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA])), + ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA])), + # integer-like float + ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0])), + ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA])), + ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA])), + ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA])), + # mixed-integer-float + ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0])), + ([1, np.nan, 2.0], FloatingArray._from_sequence([1.0, None, 2.0])), + # string + ( + ["a", "b"], + pd.StringDtype().construct_array_type()._from_sequence(["a", "b"]), + ), + ( + ["a", None], + pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + ), + # Boolean + ([True, False], BooleanArray._from_sequence([True, False])), + ([True, None], BooleanArray._from_sequence([True, None])), + ], +) +def test_array_inference(data, expected): + result = pd.array(data) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + # mix of frequencies + [pd.Period("2000", "D"), pd.Period("2001", "A")], + # mix of closed + [pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")], + # Mix of timezones + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")], + # Mix of tz-aware and tz-naive + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")], + np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]), + ], +) +def test_array_inference_fails(data): + result = pd.array(data) + expected = PandasArray(np.array(data, dtype=object)) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("data", [np.array(0)]) +def test_nd_raises(data): + with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"): + pd.array(data, dtype="int64") + + +def test_scalar_raises(): + with pytest.raises(ValueError, match="Cannot pass scalar '1'"): + pd.array(1) + + +def test_bounds_check(): + # GH21796 + with pytest.raises( + TypeError, match=r"cannot safely cast non-equivalent int(32|64) to uint16" + ): + pd.array([-1, 2, 3], dtype="UInt16") + + +# --------------------------------------------------------------------------- +# A couple dummy classes to ensure that Series and Indexes are unboxed before +# getting to the EA classes. + + +@register_extension_dtype +class DecimalDtype2(DecimalDtype): + name = "decimal2" + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray2 + + +class DecimalArray2(DecimalArray): + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, (pd.Series, pd.Index)): + raise TypeError("scalars should not be of type pd.Series or pd.Index") + + return super()._from_sequence(scalars, dtype=dtype, copy=copy) + + +def test_array_unboxes(index_or_series): + box = index_or_series + + data = box([decimal.Decimal("1"), decimal.Decimal("2")]) + # make sure it works + with pytest.raises( + TypeError, match="scalars should not be of type pd.Series or pd.Index" + ): + DecimalArray2._from_sequence(data) + + result = pd.array(data, dtype="decimal2") + expected = DecimalArray2._from_sequence(data.values) + tm.assert_equal(result, expected) + + +@pytest.fixture +def registry_without_decimal(): + """Fixture yielding 'registry' with no DecimalDtype entries""" + idx = registry.dtypes.index(DecimalDtype) + registry.dtypes.pop(idx) + yield + registry.dtypes.append(DecimalDtype) + + +def test_array_not_registered(registry_without_decimal): + # check we aren't on it + assert registry.find("decimal") is None + data = [decimal.Decimal("1"), decimal.Decimal("2")] + + result = pd.array(data, dtype=DecimalDtype) + expected = DecimalArray._from_sequence(data) + tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py new file mode 100644 index 00000000..ea895e56 --- /dev/null +++ b/pandas/tests/arrays/test_datetimelike.py @@ -0,0 +1,1459 @@ +from __future__ import annotations + +import re + +import numpy as np +import pytest + +from pandas._libs import ( + NaT, + OutOfBoundsDatetime, + Timestamp, +) +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + PandasArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.timedeltas import sequence_to_td64ns + + +# TODO: more freq variants +@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"]) +def freqstr(request): + """Fixture returning parametrized frequency in string format.""" + return request.param + + +@pytest.fixture +def period_index(freqstr): + """ + A fixture to provide PeriodIndex objects with different frequencies. + + Most PeriodArray behavior is already tested in PeriodIndex tests, + so here we just test that the PeriodArray behavior matches + the PeriodIndex behavior. + """ + # TODO: non-monotone indexes; NaTs, different start dates + pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr) + return pi + + +@pytest.fixture +def datetime_index(freqstr): + """ + A fixture to provide DatetimeIndex objects with different frequencies. + + Most DatetimeArray behavior is already tested in DatetimeIndex tests, + so here we just test that the DatetimeArray behavior matches + the DatetimeIndex behavior. + """ + # TODO: non-monotone indexes; NaTs, different start dates, timezones + dti = pd.date_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr) + return dti + + +@pytest.fixture +def timedelta_index(): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return TimedeltaIndex(["1 Day", "3 Hours", "NaT"]) + + +class SharedTests: + index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex] + + @pytest.fixture + def arr1d(self): + """Fixture returning DatetimeArray with daily frequency.""" + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + return arr + + def test_compare_len1_raises(self, arr1d): + # make sure we raise when comparing with different lengths, specific + # to the case where one has length-1, which numpy would broadcast + arr = arr1d + idx = self.index_cls(arr) + + with pytest.raises(ValueError, match="Lengths must match"): + arr == arr[:1] + + # test the index classes while we're at it, GH#23078 + with pytest.raises(ValueError, match="Lengths must match"): + idx <= idx[[0]] + + @pytest.mark.parametrize( + "result", + [ + pd.date_range("2020", periods=3), + pd.date_range("2020", periods=3, tz="UTC"), + pd.timedelta_range("0 days", periods=3), + pd.period_range("2020Q1", periods=3, freq="Q"), + ], + ) + def test_compare_with_Categorical(self, result): + expected = pd.Categorical(result) + assert all(result == expected) + assert not any(result != expected) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("as_index", [True, False]) + def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): + other = pd.Categorical(arr1d, ordered=ordered) + if as_index: + other = pd.CategoricalIndex(other) + + left, right = arr1d, other + if reverse: + left, right = right, left + + ones = np.ones(arr1d.shape, dtype=bool) + zeros = ~ones + + result = left == right + tm.assert_numpy_array_equal(result, ones) + + result = left != right + tm.assert_numpy_array_equal(result, zeros) + + if not reverse and not as_index: + # Otherwise Categorical raises TypeError bc it is not ordered + # TODO: we should probably get the same behavior regardless? + result = left < right + tm.assert_numpy_array_equal(result, zeros) + + result = left <= right + tm.assert_numpy_array_equal(result, ones) + + result = left > right + tm.assert_numpy_array_equal(result, zeros) + + result = left >= right + tm.assert_numpy_array_equal(result, ones) + + def test_take(self): + data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9 + np.random.shuffle(data) + + freq = None if self.array_cls is not PeriodArray else "D" + + arr = self.array_cls(data, freq=freq) + idx = self.index_cls._simple_new(arr) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time]) + def test_take_fill_raises(self, fill_value): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + + arr = self.array_cls(data, freq="D") + + msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + arr.take([0, 1], allow_fill=True, fill_value=fill_value) + + def test_take_fill(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + + arr = self.array_cls(data, freq="D") + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=NaT) + assert result[0] is NaT + + def test_take_fill_str(self, arr1d): + # Cast str fill_value matching other fill_value-taking methods + result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1])) + expected = arr1d[[-1, 1]] + tm.assert_equal(result, expected) + + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + arr1d.take([-1, 1], allow_fill=True, fill_value="foo") + + def test_concat_same_type(self, arr1d): + arr = arr1d + idx = self.index_cls(arr) + idx = idx.insert(0, NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + arr2 = arr.astype(object) + expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_unbox_scalar(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + result = arr._unbox_scalar(arr[0]) + expected = arr._data.dtype.type + assert isinstance(result, expected) + + result = arr._unbox_scalar(NaT) + assert isinstance(result, expected) + + msg = f"'value' should be a {self.scalar_type.__name__}." + with pytest.raises(ValueError, match=msg): + arr._unbox_scalar("foo") + + def test_check_compatible_with(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + arr._check_compatible_with(arr[0]) + arr._check_compatible_with(arr[:1]) + arr._check_compatible_with(NaT) + + def test_scalar_from_string(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + result = arr._scalar_from_string(str(arr[0])) + assert result == arr[0] + + def test_reduce_invalid(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + msg = "does not support reduction 'not a method'" + with pytest.raises(TypeError, match=msg): + arr._reduce("not a method") + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_method_doesnt_change_orig(self, method): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + arr[4] = NaT + + fill_value = arr[3] if method == "pad" else arr[5] + + result = arr.fillna(method=method) + assert result[4] == fill_value + + # check that the original was not changed + assert arr[4] is NaT + + def test_searchsorted(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + # scalar + result = arr.searchsorted(arr[1]) + assert result == 1 + + result = arr.searchsorted(arr[2], side="right") + assert result == 3 + + # own-type + result = arr.searchsorted(arr[1:3]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = arr.searchsorted(arr[1:3], side="right") + expected = np.array([2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + # GH#29884 match numpy convention on whether NaT goes + # at the end or the beginning + result = arr.searchsorted(NaT) + assert result == 10 + + @pytest.mark.parametrize("box", [None, "index", "series"]) + def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage): + if isinstance(arr1d, DatetimeArray): + tz = arr1d.tz + ts1, ts2 = arr1d[1:3] + if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): + # If we have e.g. tzutc(), when we cast to string and parse + # back we get pytz.UTC, and then consider them different timezones + # so incorrectly raise. + mark = pytest.mark.xfail( + raises=TypeError, reason="timezone comparisons inconsistent" + ) + request.node.add_marker(mark) + + arr = arr1d + if box is None: + pass + elif box == "index": + # Test the equivalent Index.searchsorted method while we're here + arr = self.index_cls(arr) + else: + # Test the equivalent Series.searchsorted method while we're here + arr = pd.Series(arr) + + # scalar + result = arr.searchsorted(str(arr[1])) + assert result == 1 + + result = arr.searchsorted(str(arr[2]), side="right") + assert result == 3 + + result = arr.searchsorted([str(x) for x in arr[1:3]]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + TypeError, + match=re.escape( + f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " + "or array of those. Got 'str' instead." + ), + ): + arr.searchsorted("foo") + + arr_type = "StringArray" if string_storage == "python" else "ArrowStringArray" + + with pd.option_context("string_storage", string_storage): + with pytest.raises( + TypeError, + match=re.escape( + f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{arr_type}' instead." + ), + ): + arr.searchsorted([str(arr[1]), "baz"]) + + def test_getitem_near_implementation_bounds(self): + # We only check tz-naive for DTA bc the bounds are slightly different + # for other tzs + i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8") + arr = self.array_cls(i8vals, freq="ns") + arr[0] # should not raise OutOfBoundsDatetime + + index = pd.Index(arr) + index[0] # should not raise OutOfBoundsDatetime + + ser = pd.Series(arr) + ser[0] # should not raise OutOfBoundsDatetime + + def test_getitem_2d(self, arr1d): + # 2d slicing on a 1D array + expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype) + result = arr1d[:, np.newaxis] + tm.assert_equal(result, expected) + + # Lookup on a 2D array + arr2d = expected + expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype) + result = arr2d[:3, 0] + tm.assert_equal(result, expected) + + # Scalar lookup + result = arr2d[-1, 0] + expected = arr1d[-1] + assert result == expected + + def test_iter_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + result = list(arr2d) + assert len(result) == 3 + for x in result: + assert isinstance(x, type(arr1d)) + assert x.ndim == 1 + assert x.dtype == arr1d.dtype + + def test_repr_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + + result = repr(arr2d) + + if isinstance(arr2d, TimedeltaArray): + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]._repr_base()}'],\n" + f"['{arr1d[1]._repr_base()}'],\n" + f"['{arr1d[2]._repr_base()}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + else: + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]}'],\n" + f"['{arr1d[1]}'],\n" + f"['{arr1d[2]}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + + assert result == expected + + def test_setitem(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + arr[0] = arr[1] + expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + expected[0] = expected[1] + + tm.assert_numpy_array_equal(arr.asi8, expected) + + arr[:2] = arr[-2:] + expected[:2] = expected[-2:] + tm.assert_numpy_array_equal(arr.asi8, expected) + + @pytest.mark.parametrize( + "box", + [ + pd.Index, + pd.Series, + np.array, + list, + PandasArray, + ], + ) + def test_setitem_object_dtype(self, box, arr1d): + + expected = arr1d.copy()[::-1] + if expected.dtype.kind in ["m", "M"]: + expected = expected._with_freq(None) + + vals = expected + if box is list: + vals = list(vals) + elif box is np.array: + # if we do np.array(x).astype(object) then dt64 and td64 cast to ints + vals = np.array(vals.astype(object)) + elif box is PandasArray: + vals = box(np.asarray(vals, dtype=object)) + else: + vals = box(vals).astype(object) + + arr1d[:] = vals + + tm.assert_equal(arr1d, expected) + + def test_setitem_strs(self, arr1d, request): + # Check that we parse strs in both scalar and listlike + if isinstance(arr1d, DatetimeArray): + tz = arr1d.tz + ts1, ts2 = arr1d[-2:] + if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): + # If we have e.g. tzutc(), when we cast to string and parse + # back we get pytz.UTC, and then consider them different timezones + # so incorrectly raise. + mark = pytest.mark.xfail( + raises=TypeError, reason="timezone comparisons inconsistent" + ) + request.node.add_marker(mark) + + # Setting list-like of strs + expected = arr1d.copy() + expected[[0, 1]] = arr1d[-2:] + + result = arr1d.copy() + result[:2] = [str(x) for x in arr1d[-2:]] + tm.assert_equal(result, expected) + + # Same thing but now for just a scalar str + expected = arr1d.copy() + expected[0] = arr1d[-1] + + result = arr1d.copy() + result[0] = str(arr1d[-1]) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_setitem_categorical(self, arr1d, as_index): + expected = arr1d.copy()[::-1] + if not isinstance(expected, PeriodArray): + expected = expected._with_freq(None) + + cat = pd.Categorical(arr1d) + if as_index: + cat = pd.CategoricalIndex(cat) + + arr1d[:] = cat[::-1] + + tm.assert_equal(arr1d, expected) + + def test_setitem_raises(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + val = arr[0] + + with pytest.raises(IndexError, match="index 12 is out of bounds"): + arr[12] = val + + with pytest.raises(TypeError, match="value should be a.* 'object'"): + arr[0] = object() + + msg = "cannot set using a list-like indexer with a different length" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[[]] = [arr[1]] + + msg = "cannot set using a slice indexer with a different length than" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[1:1] = arr[:3] + + @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) + def test_setitem_numeric_raises(self, arr1d, box): + # We dont case e.g. int64 to our own dtype for setitem + + msg = ( + f"value should be a '{arr1d._scalar_type.__name__}', " + "'NaT', or array of those. Got" + ) + with pytest.raises(TypeError, match=msg): + arr1d[:2] = box([0, 1]) + + with pytest.raises(TypeError, match=msg): + arr1d[:2] = box([0.0, 1.0]) + + def test_inplace_arithmetic(self): + # GH#24115 check that iadd and isub are actually in-place + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + expected = arr + pd.Timedelta(days=1) + arr += pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + expected = arr - pd.Timedelta(days=1) + arr -= pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + def test_shift_fill_int_deprecated(self): + # GH#31971 + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq="D") + + msg = "Passing to shift" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = arr.shift(1, fill_value=1) + + expected = arr.copy() + if self.array_cls is PeriodArray: + fill_val = arr._scalar_type._from_ordinal(1, freq=arr.freq) + else: + fill_val = arr._scalar_type(1) + expected[0] = fill_val + expected[1:] = arr[:-1] + tm.assert_equal(result, expected) + + def test_median(self, arr1d): + arr = arr1d + if len(arr) % 2 == 0: + # make it easier to define `expected` + arr = arr[:-1] + + expected = arr[len(arr) // 2] + + result = arr.median() + assert type(result) is type(expected) + assert result == expected + + arr[len(arr) // 2] = NaT + if not isinstance(expected, Period): + expected = arr[len(arr) // 2 - 1 : len(arr) // 2 + 2].mean() + + assert arr.median(skipna=False) is NaT + + result = arr.median() + assert type(result) is type(expected) + assert result == expected + + assert arr[:0].median() is NaT + assert arr[:0].median(skipna=False) is NaT + + # 2d Case + arr2 = arr.reshape(-1, 1) + + result = arr2.median(axis=None) + assert type(result) is type(expected) + assert result == expected + + assert arr2.median(axis=None, skipna=False) is NaT + + result = arr2.median(axis=0) + expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype) + tm.assert_equal(result, expected2) + + result = arr2.median(axis=0, skipna=False) + expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype) + tm.assert_equal(result, expected2) + + result = arr2.median(axis=1) + tm.assert_equal(result, arr) + + result = arr2.median(axis=1, skipna=False) + tm.assert_equal(result, arr) + + def test_from_integer_array(self): + arr = np.array([1, 2, 3], dtype=np.int64) + expected = self.array_cls(arr, dtype=self.example_dtype) + + data = pd.array(arr, dtype="Int64") + result = self.array_cls(data, dtype=self.example_dtype) + + tm.assert_extension_array_equal(result, expected) + + +class TestDatetimeArray(SharedTests): + index_cls = DatetimeIndex + array_cls = DatetimeArray + scalar_type = Timestamp + example_dtype = "M8[ns]" + + @pytest.fixture + def arr1d(self, tz_naive_fixture, freqstr): + """ + Fixture returning DatetimeArray with parametrized frequency and + timezones + """ + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01 01:01:00", periods=5, freq=freqstr, tz=tz) + dta = dti._data + return dta + + def test_round(self, arr1d): + # GH#24064 + dti = self.index_cls(arr1d) + + result = dti.round(freq="2T") + expected = dti - pd.Timedelta(minutes=1) + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + dta = dti._data + result = dta.round(freq="2T") + expected = expected._data._with_freq(None) + tm.assert_datetime_array_equal(result, expected) + + def test_array_interface(self, datetime_index): + arr = DatetimeArray(datetime_index) + + # default asarray gives the same underlying data (for tz naive) + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying M8[ns] gives the same result as default + result = np.asarray(arr, dtype="datetime64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_array_object_dtype(self, arr1d): + # GH#23524 + arr = arr1d + dti = self.index_cls(arr1d) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array_tz(self, arr1d): + # GH#23524 + arr = arr1d + dti = self.index_cls(arr1d) + + expected = dti.asi8.view("M8[ns]") + result = np.array(arr, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype="M8[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + + expected = dti.asi8 + result = np.array(arr, dtype="i8") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are still making copies when setting copy=False + result = np.array(arr, dtype="i8", copy=False) + assert result.base is not expected.base + assert result.base is None + + def test_from_array_keeps_base(self): + # Ensure that DatetimeArray._data.base isn't lost. + arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + dta = DatetimeArray(arr) + + assert dta._data is arr + dta = DatetimeArray(arr[:0]) + assert dta._data.base is arr + + def test_from_dti(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + assert list(dti) == list(arr) + + # Check that Index.__new__ knows what to do with DatetimeArray + dti2 = pd.Index(arr) + assert isinstance(dti2, DatetimeIndex) + assert list(dti2) == list(arr) + + def test_astype_object(self, arr1d): + arr = arr1d + dti = self.index_cls(arr1d) + + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(dti) + + def test_to_perioddelta(self, datetime_index, freqstr): + # GH#23113 + dti = datetime_index + arr = DatetimeArray(dti) + + msg = "to_perioddelta is deprecated and will be removed" + with tm.assert_produces_warning(FutureWarning, match=msg): + # Deprecation GH#34853 + expected = dti.to_perioddelta(freq=freqstr) + with tm.assert_produces_warning(FutureWarning, match=msg): + # stacklevel is chosen to be "correct" for DatetimeIndex, not + # DatetimeArray + result = arr.to_perioddelta(freq=freqstr) + assert isinstance(result, TimedeltaArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_period(self, datetime_index, freqstr): + dti = datetime_index + arr = DatetimeArray(dti) + + expected = dti.to_period(freq=freqstr) + result = arr.to_period(freq=freqstr) + assert isinstance(result, PeriodArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_period_2d(self, arr1d): + arr2d = arr1d.reshape(1, -1) + + warn = None if arr1d.tz is None else UserWarning + with tm.assert_produces_warning(warn): + result = arr2d.to_period("D") + expected = arr1d.to_period("D").reshape(1, -1) + tm.assert_period_array_equal(result, expected) + + @pytest.mark.parametrize("propname", DatetimeArray._bool_ops) + def test_bool_properties(self, arr1d, propname): + # in this case _bool_ops is just `is_leap_year` + dti = self.index_cls(arr1d) + arr = arr1d + assert dti.freq == arr.freq + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", DatetimeArray._field_ops) + def test_int_properties(self, arr1d, propname): + warn = None + msg = "weekofyear and week have been deprecated, please use" + if propname in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + warn = FutureWarning + + dti = self.index_cls(arr1d) + arr = arr1d + + with tm.assert_produces_warning(warn, match=msg): + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, arr1d, fixed_now_ts): + arr = arr1d + dti = self.index_cls(arr1d) + + now = fixed_now_ts.tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(TypeError, match=msg): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=Period("2014Q1")) + + tz = None if dti.tz is not None else "US/Eastern" + now = fixed_now_ts.tz_localize(tz) + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + value = NaT.value + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # require NaT, not iNaT, as it could be confused with an integer + arr.take([-1, 1], allow_fill=True, fill_value=value) + + value = np.timedelta64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + if arr.tz is not None: + # GH#37356 + # Assuming here that arr1d fixture does not include Australia/Melbourne + value = fixed_now_ts.tz_localize("Australia/Melbourne") + msg = "Timezones don't match. .* != 'Australia/Melbourne'" + with pytest.raises(ValueError, match=msg): + # require tz match, not just tzawareness match + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezone" + ): + result = arr.take([-1, 1], allow_fill=True, fill_value=value) + + # once deprecation is enforced + # expected = arr.take([-1, 1], allow_fill=True, + # fill_value=value.tz_convert(arr.dtype.tz)) + # tm.assert_equal(result, expected) + + def test_concat_same_type_invalid(self, arr1d): + # different timezones + arr = arr1d + + if arr.tz is None: + other = arr.tz_localize("UTC") + else: + other = arr.tz_localize(None) + + with pytest.raises(ValueError, match="to_concat must have the same"): + arr._concat_same_type([arr, other]) + + def test_concat_same_type_different_freq(self): + # we *can* concatenate DTI with different freqs. + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + b = DatetimeArray(pd.date_range("2000", periods=2, freq="H", tz="US/Central")) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray( + pd.to_datetime( + [ + "2000-01-01 00:00:00", + "2000-01-02 00:00:00", + "2000-01-01 00:00:00", + "2000-01-01 01:00:00", + ] + ).tz_localize("US/Central") + ) + + tm.assert_datetime_array_equal(result, expected) + + def test_strftime(self, arr1d): + arr = arr1d + + result = arr.strftime("%Y %b") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = DatetimeArray(DatetimeIndex(["2019-01-01", NaT])) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltaArray(SharedTests): + index_cls = TimedeltaIndex + array_cls = TimedeltaArray + scalar_type = pd.Timedelta + example_dtype = "m8[ns]" + + def test_from_tdi(self): + tdi = TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + assert list(arr) == list(tdi) + + # Check that Index.__new__ knows what to do with TimedeltaArray + tdi2 = pd.Index(arr) + assert isinstance(tdi2, TimedeltaIndex) + assert list(tdi2) == list(arr) + + def test_astype_object(self): + tdi = TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(tdi) + + def test_to_pytimedelta(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.to_pytimedelta() + result = arr.to_pytimedelta() + + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.total_seconds() + result = arr.total_seconds() + + tm.assert_numpy_array_equal(result, expected.values) + + @pytest.mark.parametrize("propname", TimedeltaArray._field_ops) + def test_int_properties(self, timedelta_index, propname): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + result = getattr(arr, propname) + expected = np.array(getattr(tdi, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, timedelta_index): + arr = TimedeltaArray(timedelta_index) + + # default asarray gives the same underlying data + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying m8[ns] gives the same result as default + result = np.asarray(arr, dtype="timedelta64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, timedelta_index, fixed_now_ts): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + value = fixed_now_ts + msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=value) + + value = fixed_now_ts.to_period("D") + with pytest.raises(TypeError, match=msg): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=value) + + value = np.datetime64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + +class TestPeriodArray(SharedTests): + index_cls = PeriodIndex + array_cls = PeriodArray + scalar_type = Period + example_dtype = PeriodIndex([], freq="W").dtype + + @pytest.fixture + def arr1d(self, period_index): + """ + Fixture returning DatetimeArray from parametrized PeriodIndex objects + """ + return period_index._data + + def test_from_pi(self, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + assert list(arr) == list(pi) + + # Check that Index.__new__ knows what to do with PeriodArray + pi2 = pd.Index(arr) + assert isinstance(pi2, PeriodIndex) + assert list(pi2) == list(arr) + + def test_astype_object(self, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(pi) + + def test_take_fill_valid(self, arr1d): + arr = arr1d + + value = NaT.value + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" + with pytest.raises(TypeError, match=msg): + # require NaT, not iNaT, as it could be confused with an integer + arr.take([-1, 1], allow_fill=True, fill_value=value) + + value = np.timedelta64("NaT", "ns") + with pytest.raises(TypeError, match=msg): + # require appropriate-dtype if we have a NA value + arr.take([-1, 1], allow_fill=True, fill_value=value) + + @pytest.mark.parametrize("how", ["S", "E"]) + def test_to_timestamp(self, how, arr1d): + pi = self.index_cls(arr1d) + arr = arr1d + + expected = DatetimeArray(pi.to_timestamp(how=how)) + result = arr.to_timestamp(how=how) + assert isinstance(result, DatetimeArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_timestamp_roundtrip_bday(self): + # Case where infer_freq inside would choose "D" instead of "B" + dta = pd.date_range("2021-10-18", periods=3, freq="B")._data + parr = dta.to_period() + result = parr.to_timestamp() + assert result.freq == "B" + tm.assert_extension_array_equal(result, dta) + + dta2 = dta[::2] + parr2 = dta2.to_period() + result2 = parr2.to_timestamp() + assert result2.freq == "2B" + tm.assert_extension_array_equal(result2, dta2) + + parr3 = dta.to_period("2B") + result3 = parr3.to_timestamp() + assert result3.freq == "B" + tm.assert_extension_array_equal(result3, dta) + + def test_to_timestamp_out_of_bounds(self): + # GH#19643 previously overflowed silently + pi = pd.period_range("1500", freq="Y", periods=3) + msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + pi.to_timestamp() + + with pytest.raises(OutOfBoundsDatetime, match=msg): + pi._data.to_timestamp() + + @pytest.mark.parametrize("propname", PeriodArray._bool_ops) + def test_bool_properties(self, arr1d, propname): + # in this case _bool_ops is just `is_leap_year` + pi = self.index_cls(arr1d) + arr = arr1d + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", PeriodArray._field_ops) + def test_int_properties(self, arr1d, propname): + pi = self.index_cls(arr1d) + arr = arr1d + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, arr1d): + arr = arr1d + + # default asarray gives objects + result = np.asarray(arr) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to object dtype (same as default) + result = np.asarray(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype="int64") + tm.assert_numpy_array_equal(result, arr.asi8) + + # to other dtypes + msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'" + with pytest.raises(TypeError, match=msg): + np.asarray(arr, dtype="float64") + + result = np.asarray(arr, dtype="S20") + expected = np.asarray(arr).astype("S20") + tm.assert_numpy_array_equal(result, expected) + + def test_strftime(self, arr1d): + arr = arr1d + + result = arr.strftime("%Y") + expected = np.array([per.strftime("%Y") for per in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]")) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "arr,casting_nats", + [ + ( + TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (NaT, np.timedelta64("NaT", "ns")), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (NaT, np.datetime64("NaT", "ns")), + ), + (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT,)), + ], + ids=lambda x: type(x).__name__, +) +def test_casting_nat_setitem_array(arr, casting_nats): + expected = type(arr)._from_sequence([NaT, arr[1], arr[2]]) + + for nat in casting_nats: + arr = arr.copy() + arr[0] = nat + tm.assert_equal(arr, expected) + + +@pytest.mark.parametrize( + "arr,non_casting_nats", + [ + ( + TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (np.datetime64("NaT", "ns"), NaT.value), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (np.timedelta64("NaT", "ns"), NaT.value), + ), + ( + pd.period_range("2000-01-01", periods=3, freq="D")._data, + (np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns"), NaT.value), + ), + ], + ids=lambda x: type(x).__name__, +) +def test_invalid_nat_setitem_array(arr, non_casting_nats): + msg = ( + "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. " + "Got '(timedelta64|datetime64|int)' instead." + ) + + for nat in non_casting_nats: + with pytest.raises(TypeError, match=msg): + arr[0] = nat + + +@pytest.mark.parametrize( + "arr", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("2000", periods=4).array, + ], +) +def test_to_numpy_extra(arr): + arr[0] = NaT + original = arr.copy() + + result = arr.to_numpy() + assert np.isnan(result[0]) + + result = arr.to_numpy(dtype="int64") + assert result[0] == -9223372036854775808 + + result = arr.to_numpy(dtype="int64", na_value=0) + assert result[0] == 0 + + result = arr.to_numpy(na_value=arr[1].to_numpy()) + assert result[0] == result[1] + + result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False)) + assert result[0] == result[1] + + tm.assert_equal(arr, original) + + +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize( + "values", + [ + pd.to_datetime(["2020-01-01", "2020-02-01"]), + TimedeltaIndex([1, 2], unit="D"), + PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), + ], +) +@pytest.mark.parametrize( + "klass", + [ + list, + np.array, + pd.array, + pd.Series, + pd.Index, + pd.Categorical, + pd.CategoricalIndex, + ], +) +def test_searchsorted_datetimelike_with_listlike(values, klass, as_index): + # https://github.com/pandas-dev/pandas/issues/32762 + if not as_index: + values = values._data + + result = values.searchsorted(klass(values)) + expected = np.array([0, 1], dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.to_datetime(["2020-01-01", "2020-02-01"]), + TimedeltaIndex([1, 2], unit="D"), + PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), + ], +) +@pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] +) +def test_searchsorted_datetimelike_with_listlike_invalid_dtype(values, arg): + # https://github.com/pandas-dev/pandas/issues/32762 + msg = "[Unexpected type|Cannot compare]" + with pytest.raises(TypeError, match=msg): + values.searchsorted(arg) + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) +def test_period_index_construction_from_strings(klass): + # https://github.com/pandas-dev/pandas/issues/26109 + strings = ["2020Q1", "2020Q2"] * 2 + data = klass(strings) + result = PeriodIndex(data, freq="Q") + expected = PeriodIndex([Period(s) for s in strings]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) +def test_from_pandas_array(dtype): + # GH#24615 + data = np.array([1, 2, 3], dtype=dtype) + arr = PandasArray(data) + + cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] + + result = cls(arr) + expected = cls(data) + tm.assert_extension_array_equal(result, expected) + + result = cls._from_sequence(arr) + expected = cls._from_sequence(data) + tm.assert_extension_array_equal(result, expected) + + func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + result = func(arr)[0] + expected = func(data)[0] + tm.assert_equal(result, expected) + + func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] + result = func(arr).array + expected = func(data).array + tm.assert_equal(result, expected) + + # Let's check the Indexes while we're here + idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] + result = idx_cls(arr) + expected = idx_cls(data) + tm.assert_index_equal(result, expected) + + +@pytest.fixture( + params=[ + "memoryview", + "array", + pytest.param("dask", marks=td.skip_if_no("dask.array")), + pytest.param("xarray", marks=td.skip_if_no("xarray")), + ] +) +def array_likes(request): + """ + Fixture giving a numpy array and a parametrized 'data' object, which can + be a memoryview, array, dask or xarray object created from the numpy array. + """ + # GH#24539 recognize e.g xarray, dask, ... + arr = np.array([1, 2, 3], dtype=np.int64) + + name = request.param + if name == "memoryview": + data = memoryview(arr) + elif name == "array": + # stdlib array + import array + + data = array.array("i", arr) + elif name == "dask": + import dask.array + + data = dask.array.array(arr) + elif name == "xarray": + import xarray as xr + + data = xr.DataArray(arr) + + return arr, data + + +@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) +def test_from_obscure_array(dtype, array_likes): + # GH#24539 recognize e.g xarray, dask, ... + # Note: we dont do this for PeriodArray bc _from_sequence won't accept + # an array of integers + # TODO: could check with arraylike of Period objects + arr, data = array_likes + + cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] + + expected = cls(arr) + result = cls._from_sequence(data) + tm.assert_extension_array_equal(result, expected) + + func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + result = func(arr)[0] + expected = func(data)[0] + tm.assert_equal(result, expected) + + if not isinstance(data, memoryview): + # FIXME(GH#44431) these raise on memoryview and attempted fix + # fails on py3.10 + func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] + result = func(arr).array + expected = func(data).array + tm.assert_equal(result, expected) + + # Let's check the Indexes while we're here + idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] + result = idx_cls(arr) + expected = idx_cls(data) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 00000000..af1a292a --- /dev/null +++ b/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,641 @@ +""" +Tests for DatetimeArray +""" +import operator + +import numpy as np +import pytest + +from pandas._libs.tslibs import tz_compare +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + + +class TestNonNano: + @pytest.fixture(params=["s", "ms", "us"]) + def unit(self, request): + """Fixture returning parametrized time units""" + return request.param + + @pytest.fixture + def reso(self, unit): + """Fixture returning datetime resolution for a given time unit""" + return { + "s": NpyDatetimeUnit.NPY_FR_s.value, + "ms": NpyDatetimeUnit.NPY_FR_ms.value, + "us": NpyDatetimeUnit.NPY_FR_us.value, + }[unit] + + @pytest.fixture + def dtype(self, unit, tz_naive_fixture): + tz = tz_naive_fixture + if tz is None: + return np.dtype(f"datetime64[{unit}]") + else: + return DatetimeTZDtype(unit=unit, tz=tz) + + @pytest.fixture + def dta_dti(self, unit, dtype): + tz = getattr(dtype, "tz", None) + + dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz) + if tz is None: + arr = np.asarray(dti).astype(f"M8[{unit}]") + else: + arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype( + f"M8[{unit}]" + ) + + dta = DatetimeArray._simple_new(arr, dtype=dtype) + return dta, dti + + @pytest.fixture + def dta(self, dta_dti): + dta, dti = dta_dti + return dta + + def test_non_nano(self, unit, reso, dtype): + arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") + dta = DatetimeArray._simple_new(arr, dtype=dtype) + + assert dta.dtype == dtype + assert dta[0]._reso == reso + assert tz_compare(dta.tz, dta[0].tz) + assert (dta[0] == dta[:1]).all() + + @pytest.mark.filterwarnings( + "ignore:weekofyear and week have been deprecated:FutureWarning" + ) + @pytest.mark.parametrize( + "field", DatetimeArray._field_ops + DatetimeArray._bool_ops + ) + def test_fields(self, unit, reso, field, dtype, dta_dti): + dta, dti = dta_dti + + # FIXME: assert (dti == dta).all() + + res = getattr(dta, field) + expected = getattr(dti._data, field) + tm.assert_numpy_array_equal(res, expected) + + def test_normalize(self, unit): + dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D") + arr = np.asarray(dti).astype(f"M8[{unit}]") + + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + + assert not dta.is_normalized + + # TODO: simplify once we can just .astype to other unit + exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]") + expected = DatetimeArray._simple_new(exp, dtype=exp.dtype) + + res = dta.normalize() + tm.assert_extension_array_equal(res, expected) + + def test_simple_new_requires_match(self, unit): + arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") + dtype = DatetimeTZDtype(unit, "UTC") + + dta = DatetimeArray._simple_new(arr, dtype=dtype) + assert dta.dtype == dtype + + wrong = DatetimeTZDtype("ns", "UTC") + with pytest.raises(AssertionError, match=""): + DatetimeArray._simple_new(arr, dtype=wrong) + + def test_std_non_nano(self, unit): + dti = pd.date_range("2016-01-01", periods=55, freq="D") + arr = np.asarray(dti).astype(f"M8[{unit}]") + + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + + # we should match the nano-reso std, but floored to our reso. + res = dta.std() + assert res._reso == dta._reso + assert res == dti.std().floor(unit) + + @pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning") + def test_to_period(self, dta_dti): + dta, dti = dta_dti + result = dta.to_period("D") + expected = dti._data.to_period("D") + + tm.assert_extension_array_equal(result, expected) + + def test_iter(self, dta): + res = next(iter(dta)) + expected = dta[0] + + assert type(res) is pd.Timestamp + assert res.value == expected.value + assert res._reso == expected._reso + assert res == expected + + def test_astype_object(self, dta): + result = dta.astype(object) + assert all(x._reso == dta._reso for x in result) + assert all(x == y for x, y in zip(result, dta)) + + def test_to_pydatetime(self, dta_dti): + dta, dti = dta_dti + + result = dta.to_pydatetime() + expected = dti.to_pydatetime() + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("meth", ["time", "timetz", "date"]) + def test_time_date(self, dta_dti, meth): + dta, dti = dta_dti + + result = getattr(dta, meth) + expected = getattr(dti, meth) + tm.assert_numpy_array_equal(result, expected) + + def test_format_native_types(self, unit, reso, dtype, dta_dti): + # In this case we should get the same formatted values with our nano + # version dti._data as we do with the non-nano dta + dta, dti = dta_dti + + res = dta._format_native_types() + exp = dti._data._format_native_types() + tm.assert_numpy_array_equal(res, exp) + + def test_repr(self, dta_dti, unit): + dta, dti = dta_dti + + assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}") + + # TODO: tests with td64 + def test_compare_mismatched_resolutions(self, comparison_op): + # comparison that numpy gets wrong bc of silent overflows + op = comparison_op + + iinfo = np.iinfo(np.int64) + vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64) + + # Construct so that arr2[1] < arr[1] < arr[2] < arr2[2] + arr = np.array(vals).view("M8[ns]") + arr2 = arr.view("M8[s]") + + left = DatetimeArray._simple_new(arr, dtype=arr.dtype) + right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype) + + if comparison_op is operator.eq: + expected = np.array([False, False, False]) + elif comparison_op is operator.ne: + expected = np.array([True, True, True]) + elif comparison_op in [operator.lt, operator.le]: + expected = np.array([False, False, True]) + else: + expected = np.array([False, True, False]) + + result = op(left, right) + tm.assert_numpy_array_equal(result, expected) + + result = op(left[1], right) + tm.assert_numpy_array_equal(result, expected) + + if op not in [operator.eq, operator.ne]: + # check that numpy still gets this wrong; if it is fixed we may be + # able to remove compare_mismatched_resolutions + np_res = op(left._ndarray, right._ndarray) + tm.assert_numpy_array_equal(np_res[1:], ~expected[1:]) + + +class TestDatetimeArrayComparisons: + # TODO: merge this into tests/arithmetic/test_datetime64 once it is + # sufficiently robust + + def test_cmp_dt64_arraylike_tznaive(self, comparison_op): + # arbitrary tz-naive DatetimeIndex + op = comparison_op + + dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None) + arr = DatetimeArray(dti) + assert arr.freq == dti.freq + assert arr.tz == dti.tz + + right = dti + + expected = np.ones(len(arr), dtype=bool) + if comparison_op.__name__ in ["ne", "gt", "lt"]: + # for these the comparisons should be all-False + expected = ~expected + + result = op(arr, arr) + tm.assert_numpy_array_equal(result, expected) + for other in [ + right, + np.array(right), + list(right), + tuple(right), + right.astype(object), + ]: + result = op(arr, other) + tm.assert_numpy_array_equal(result, expected) + + result = op(other, arr) + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeArray: + def test_astype_non_nano_tznaive(self): + dti = pd.date_range("2016-01-01", periods=3) + + res = dti.astype("M8[s]") + assert res.dtype == "M8[s]" + + dta = dti._data + res = dta.astype("M8[s]") + assert res.dtype == "M8[s]" + assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray + + def test_astype_non_nano_tzaware(self): + dti = pd.date_range("2016-01-01", periods=3, tz="UTC") + + res = dti.astype("M8[s, US/Pacific]") + assert res.dtype == "M8[s, US/Pacific]" + + dta = dti._data + res = dta.astype("M8[s, US/Pacific]") + assert res.dtype == "M8[s, US/Pacific]" + + # from non-nano to non-nano, preserving reso + res2 = res.astype("M8[s, UTC]") + assert res2.dtype == "M8[s, UTC]" + assert not tm.shares_memory(res2, res) + + res3 = res.astype("M8[s, UTC]", copy=False) + assert res2.dtype == "M8[s, UTC]" + assert tm.shares_memory(res3, res) + + def test_astype_to_same(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) + assert result is arr + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) + @pytest.mark.parametrize( + "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] + ) + def test_astype_copies(self, dtype, other): + # https://github.com/pandas-dev/pandas/pull/32490 + ser = pd.Series([1, 2], dtype=dtype) + orig = ser.copy() + + warn = None + if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"): + # deprecated in favor of tz_localize + warn = FutureWarning + + with tm.assert_produces_warning(warn): + t = ser.astype(other) + t[:] = pd.NaT + tm.assert_series_equal(ser, orig) + + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + warn = None + if dtype != expected_dtype: + warn = FutureWarning + msg = " will return exactly the specified dtype" + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_tz_setter_raises(self): + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) + with pytest.raises(AttributeError, match="tz_localize"): + arr.tz = "UTC" + + def test_setitem_str_impute_tz(self, tz_naive_fixture): + # Like for getitem, if we are passed a naive-like string, we impute + # our own timezone. + tz = tz_naive_fixture + + data = np.array([1, 2, 3], dtype="M8[ns]") + dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz) + arr = DatetimeArray(data, dtype=dtype) + expected = arr.copy() + + ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz) + setter = str(ts.tz_localize(None)) + + # Setting a scalar tznaive string + expected[0] = ts + arr[0] = setter + tm.assert_equal(arr, expected) + + # Setting a listlike of tznaive strings + expected[1] = ts + arr[:2] = [setter, setter] + tm.assert_equal(arr, expected) + + def test_setitem_different_tz_raises(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")) + with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): + arr[0] = pd.Timestamp("2000") + + ts = pd.Timestamp("2000", tz="US/Eastern") + with pytest.raises(ValueError, match="US/Central"): + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + arr[0] = ts + # once deprecation is enforced + # assert arr[0] == ts.tz_convert("US/Central") + + def test_setitem_clears_freq(self): + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + a[0] = pd.Timestamp("2000", tz="US/Central") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01").to_datetime64(), + pd.Timestamp("2021-01-01").to_pydatetime(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept datetime64 and datetime in addition to Timestamp + dti = pd.date_range("2000", periods=2, freq="D") + arr = dti._data + + arr[0] = obj + assert arr[0] == obj + + def test_repeat_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti) + + repeated = arr.repeat([1, 1]) + + # preserves tz and values, but not freq + expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype) + tm.assert_equal(repeated, expected) + + def test_value_counts_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti).repeat([4, 3]) + + result = arr.value_counts() + + # Note: not tm.assert_index_equal, since `freq`s do not match + assert result.index.equals(dti) + + arr[-2] = pd.NaT + result = arr.value_counts(dropna=False) + expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_preserves_tz(self, method): + dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central") + arr = DatetimeArray(dti, copy=True) + arr[2] = pd.NaT + + fill_val = dti[1] if method == "pad" else dti[3] + expected = DatetimeArray._from_sequence( + [dti[0], dti[1], fill_val, dti[3], dti[4]], + dtype=DatetimeTZDtype(tz="US/Central"), + ) + + result = arr.fillna(method=method) + tm.assert_extension_array_equal(result, expected) + + # assert that arr and dti were not modified in-place + assert arr[2] is pd.NaT + assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central") + + def test_fillna_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2).copy() + dta[0, 1] = pd.NaT + dta[1, 0] = pd.NaT + + res1 = dta.fillna(method="pad") + expected1 = dta.copy() + expected1[1, 0] = dta[0, 0] + tm.assert_extension_array_equal(res1, expected1) + + res2 = dta.fillna(method="backfill") + expected2 = dta.copy() + expected2 = dta.copy() + expected2[1, 0] = dta[2, 0] + expected2[0, 1] = dta[1, 1] + tm.assert_extension_array_equal(res2, expected2) + + # with different ordering for underlying ndarray; behavior should + # be unchanged + dta2 = dta._from_backing_data(dta._ndarray.copy(order="F")) + assert dta2._ndarray.flags["F_CONTIGUOUS"] + assert not dta2._ndarray.flags["C_CONTIGUOUS"] + tm.assert_extension_array_equal(dta, dta2) + + res3 = dta2.fillna(method="pad") + tm.assert_extension_array_equal(res3, expected1) + + res4 = dta2.fillna(method="backfill") + tm.assert_extension_array_equal(res4, expected2) + + # test the DataFrame method while we're here + df = pd.DataFrame(dta) + res = df.fillna(method="pad") + expected = pd.DataFrame(expected1) + tm.assert_frame_equal(res, expected) + + res = df.fillna(method="backfill") + expected = pd.DataFrame(expected2) + tm.assert_frame_equal(res, expected) + + def test_array_interface_tz(self): + tz = "US/Central" + data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz)) + result = np.asarray(data) + + expected = np.array( + [ + pd.Timestamp("2017-01-01T00:00:00", tz=tz), + pd.Timestamp("2017-01-02T00:00:00", tz=tz), + ], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype="M8[ns]") + + expected = np.array( + ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]" + ) + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self): + data = DatetimeArray(pd.date_range("2017", periods=2)) + expected = np.array( + ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]" + ) + + result = np.asarray(data) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + expected = np.array( + [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_different_tz(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = DatetimeArray(data, freq="D").tz_localize("Asia/Tokyo") + if index: + arr = pd.Index(arr) + + expected = arr.searchsorted(arr[2]) + result = arr.searchsorted(arr[2].tz_convert("UTC")) + assert result == expected + + expected = arr.searchsorted(arr[2:6]) + result = arr.searchsorted(arr[2:6].tz_convert("UTC")) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_tzawareness_compat(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + mismatch = arr.tz_localize("Asia/Tokyo") + + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch[0]) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch) + + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr[0]) + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.timedelta64("NaT"), + pd.Timedelta(days=2), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10**9, + np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9, + pd.Timestamp("2021-01-01").to_period("D"), + ], + ) + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Timestamp', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + def test_shift_fill_value(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + expected = DatetimeArray(np.roll(dta._data, 1)) + + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + dta = dta.tz_localize("UTC") + expected = expected.tz_localize("UTC") + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + def test_shift_value_tzawareness_mismatch(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + + fv = dta[-1].tz_localize("UTC") + for invalid in [fv, fv.to_pydatetime()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + + dta = dta.tz_localize("UTC") + fv = dta[-1].tz_localize(None) + for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + + def test_shift_requires_tzmatch(self): + # since filling is setitem-like, we require a matching timezone, + # not just matching tzawawreness + dti = pd.date_range("2016-01-01", periods=3, tz="UTC") + dta = dti._data + + fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific") + + msg = "Timezones don't match. 'UTC' != 'US/Pacific'" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + dta.shift(1, fill_value=fill_value) + + # once deprecation is enforced + # expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) + # tm.assert_equal(result, expected) + + def test_tz_localize_t2d(self): + dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") + dta = dti._data.reshape(3, 4) + result = dta.tz_localize(None) + + expected = dta.ravel().tz_localize(None).reshape(dta.shape) + tm.assert_datetime_array_equal(result, expected) + + roundtrip = expected.tz_localize("US/Pacific") + tm.assert_datetime_array_equal(roundtrip, dta) diff --git a/pandas/tests/arrays/test_ndarray_backed.py b/pandas/tests/arrays/test_ndarray_backed.py new file mode 100644 index 00000000..c48fb7e7 --- /dev/null +++ b/pandas/tests/arrays/test_ndarray_backed.py @@ -0,0 +1,75 @@ +""" +Tests for subclasses of NDArrayBackedExtensionArray +""" +import numpy as np + +from pandas import ( + CategoricalIndex, + date_range, +) +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + PandasArray, + TimedeltaArray, +) + + +class TestEmpty: + def test_empty_categorical(self): + ci = CategoricalIndex(["a", "b", "c"], ordered=True) + dtype = ci.dtype + + # case with int8 codes + shape = (4,) + result = Categorical._empty(shape, dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == shape + assert result._ndarray.dtype == np.int8 + + # case where repr would segfault if we didn't override base implementation + result = Categorical._empty((4096,), dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == (4096,) + assert result._ndarray.dtype == np.int8 + repr(result) + + # case with int16 codes + ci = CategoricalIndex(list(range(512)) * 4, ordered=False) + dtype = ci.dtype + result = Categorical._empty(shape, dtype=dtype) + assert isinstance(result, Categorical) + assert result.shape == shape + assert result._ndarray.dtype == np.int16 + + def test_empty_dt64tz(self): + dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo") + dtype = dti.dtype + + shape = (0,) + result = DatetimeArray._empty(shape, dtype=dtype) + assert result.dtype == dtype + assert isinstance(result, DatetimeArray) + assert result.shape == shape + + def test_empty_dt64(self): + shape = (3, 9) + result = DatetimeArray._empty(shape, dtype="datetime64[ns]") + assert isinstance(result, DatetimeArray) + assert result.shape == shape + + def test_empty_td64(self): + shape = (3, 9) + result = TimedeltaArray._empty(shape, dtype="m8[ns]") + assert isinstance(result, TimedeltaArray) + assert result.shape == shape + + def test_empty_pandas_array(self): + arr = PandasArray(np.array([1, 2])) + dtype = arr.dtype + + shape = (3, 9) + result = PandasArray._empty(shape, dtype=dtype) + assert isinstance(result, PandasArray) + assert result.dtype == dtype + assert result.shape == shape diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py new file mode 100644 index 00000000..a4b442ff --- /dev/null +++ b/pandas/tests/arrays/test_period.py @@ -0,0 +1,182 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas.core.dtypes.base import _registry as registry +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ( + PeriodArray, + period_array, +) + +# ---------------------------------------------------------------------------- +# Dtype + + +def test_registered(): + assert PeriodDtype in registry.dtypes + result = registry.find("Period[D]") + expected = PeriodDtype("D") + assert result == expected + + +# ---------------------------------------------------------------------------- +# period_array + + +def test_asi8(): + result = period_array(["2000", "2001", None], freq="D").asi8 + expected = np.array([10957, 11323, iNaT]) + tm.assert_numpy_array_equal(result, expected) + + +def test_take_raises(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) + + msg = "value should be a 'Period' or 'NaT'. Got 'str' instead" + with pytest.raises(TypeError, match=msg): + arr.take([0, -1], allow_fill=True, fill_value="foo") + + +def test_fillna_raises(): + arr = period_array(["2000", "2001", "2002"], freq="D") + with pytest.raises(ValueError, match="Length"): + arr.fillna(arr[:2]) + + +def test_fillna_copies(): + arr = period_array(["2000", "2001", "2002"], freq="D") + result = arr.fillna(pd.Period("2000", "D")) + assert result is not arr + + +# ---------------------------------------------------------------------------- +# setitem + + +@pytest.mark.parametrize( + "key, value, expected", + [ + ([0], pd.Period("2000", "D"), [10957, 1, 2]), + ([0], None, [iNaT, 1, 2]), + ([0], np.nan, [iNaT, 1, 2]), + ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), + ( + [0, 1, 2], + [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")], + [10957, 11323, 11688], + ), + ], +) +def test_setitem(key, value, expected): + arr = PeriodArray(np.arange(3), freq="D") + expected = PeriodArray(expected, freq="D") + arr[key] = value + tm.assert_period_array_equal(arr, expected) + + +def test_setitem_raises_incompatible_freq(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[0] = pd.Period("2000", freq="A") + + other = period_array(["2000", "2001"], freq="A") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[[0, 1]] = other + + +def test_setitem_raises_length(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(ValueError, match="length"): + arr[[0, 1]] = [pd.Period("2000", freq="D")] + + +def test_setitem_raises_type(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(TypeError, match="int"): + arr[0] = 1 + + +# ---------------------------------------------------------------------------- +# Ops + + +def test_sub_period(): + arr = period_array(["2000", "2001"], freq="D") + other = pd.Period("2000", freq="M") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr - other + + +def test_sub_period_overflow(): + # GH#47538 + dti = pd.date_range("1677-09-22", periods=2, freq="D") + pi = dti.to_period("ns") + + per = pd.Period._from_ordinal(10**14, pi.freq) + + with pytest.raises(OverflowError, match="Overflow in int64 addition"): + pi - per + + with pytest.raises(OverflowError, match="Overflow in int64 addition"): + per - pi + + +# ---------------------------------------------------------------------------- +# Methods + + +@pytest.mark.parametrize( + "other", + [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")], +) +def test_where_different_freq_raises(other): + # GH#45768 The PeriodArray method raises, the Series method coerces + ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D")) + cond = np.array([True, False, True]) + + with pytest.raises(IncompatibleFrequency, match="freq"): + ser.array._where(cond, other) + + res = ser.where(cond, other) + expected = ser.astype(object).where(cond, other) + tm.assert_series_equal(res, expected) + + +# ---------------------------------------------------------------------------- +# Printing + + +def test_repr_small(): + arr = period_array(["2000", "2001"], freq="D") + result = str(arr) + expected = ( + "\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" + ) + assert result == expected + + +def test_repr_large(): + arr = period_array(["2000", "2001"] * 500, freq="D") + result = str(arr) + expected = ( + "\n" + "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01',\n" + " ...\n" + " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01']\n" + "Length: 1000, dtype: period[D]" + ) + assert result == expected diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 00000000..b3b79bd9 --- /dev/null +++ b/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,293 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + +import pandas as pd +from pandas import Timedelta +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) + + +class TestNonNano: + @pytest.fixture(params=["s", "ms", "us"]) + def unit(self, request): + return request.param + + @pytest.fixture + def reso(self, unit): + if unit == "s": + return NpyDatetimeUnit.NPY_FR_s.value + elif unit == "ms": + return NpyDatetimeUnit.NPY_FR_ms.value + elif unit == "us": + return NpyDatetimeUnit.NPY_FR_us.value + else: + raise NotImplementedError(unit) + + @pytest.fixture + def tda(self, unit): + arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") + return TimedeltaArray._simple_new(arr, dtype=arr.dtype) + + def test_non_nano(self, unit, reso): + arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") + tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) + + assert tda.dtype == arr.dtype + assert tda[0]._reso == reso + + @pytest.mark.parametrize("field", TimedeltaArray._field_ops) + def test_fields(self, tda, field): + as_nano = tda._ndarray.astype("m8[ns]") + tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) + + result = getattr(tda, field) + expected = getattr(tda_nano, field) + tm.assert_numpy_array_equal(result, expected) + + def test_to_pytimedelta(self, tda): + as_nano = tda._ndarray.astype("m8[ns]") + tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) + + result = tda.to_pytimedelta() + expected = tda_nano.to_pytimedelta() + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, unit, tda): + as_nano = tda._ndarray.astype("m8[ns]") + tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) + + result = tda.total_seconds() + expected = tda_nano.total_seconds() + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")] + ) + def test_add_nat_datetimelike_scalar(self, nat, tda): + result = tda + nat + assert isinstance(result, DatetimeArray) + assert result._reso == tda._reso + assert result.isna().all() + + result = nat + tda + assert isinstance(result, DatetimeArray) + assert result._reso == tda._reso + assert result.isna().all() + + def test_add_pdnat(self, tda): + result = tda + pd.NaT + assert isinstance(result, TimedeltaArray) + assert result._reso == tda._reso + assert result.isna().all() + + result = pd.NaT + tda + assert isinstance(result, TimedeltaArray) + assert result._reso == tda._reso + assert result.isna().all() + + # TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert + # or tz_localize with non-nano; implement tests specific to that. + def test_add_datetimelike_scalar(self, tda, tz_naive_fixture): + ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture) + + msg = "with mis-matched resolutions" + with pytest.raises(NotImplementedError, match=msg): + # mismatched reso -> check that we don't give an incorrect result + tda + ts + with pytest.raises(NotImplementedError, match=msg): + # mismatched reso -> check that we don't give an incorrect result + ts + tda + + ts = ts._as_unit(tda._unit) + + exp_values = tda._ndarray + ts.asm8 + expected = ( + DatetimeArray._simple_new(exp_values, dtype=exp_values.dtype) + .tz_localize("UTC") + .tz_convert(ts.tz) + ) + + result = tda + ts + tm.assert_extension_array_equal(result, expected) + + result = ts + tda + tm.assert_extension_array_equal(result, expected) + + def test_mul_scalar(self, tda): + other = 2 + result = tda * other + expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) + tm.assert_extension_array_equal(result, expected) + assert result._reso == tda._reso + + def test_mul_listlike(self, tda): + other = np.arange(len(tda)) + result = tda * other + expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) + tm.assert_extension_array_equal(result, expected) + assert result._reso == tda._reso + + def test_mul_listlike_object(self, tda): + other = np.arange(len(tda)) + result = tda * other.astype(object) + expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) + tm.assert_extension_array_equal(result, expected) + assert result._reso == tda._reso + + def test_div_numeric_scalar(self, tda): + other = 2 + result = tda / other + expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) + tm.assert_extension_array_equal(result, expected) + assert result._reso == tda._reso + + def test_div_td_scalar(self, tda): + other = timedelta(seconds=1) + result = tda / other + expected = tda._ndarray / np.timedelta64(1, "s") + tm.assert_numpy_array_equal(result, expected) + + def test_div_numeric_array(self, tda): + other = np.arange(len(tda)) + result = tda / other + expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) + tm.assert_extension_array_equal(result, expected) + assert result._reso == tda._reso + + def test_div_td_array(self, tda): + other = tda._ndarray + tda._ndarray[-1] + result = tda / other + expected = tda._ndarray / other + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltaArray: + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + warn = None + if dtype != expected_dtype: + warn = FutureWarning + msg = " will return exactly the specified dtype" + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_setitem_clears_freq(self): + a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H")) + a[0] = Timedelta("1H") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + Timedelta(seconds=1), + Timedelta(seconds=1).to_timedelta64(), + Timedelta(seconds=1).to_pytimedelta(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept timedelta64 and timedelta in addition to Timedelta + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + arr[0] = obj + assert arr[0] == Timedelta(seconds=1) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.datetime64("NaT"), + pd.Timestamp("2021-01-01"), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10**9, + (np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"), + pd.Timestamp("2021-01-01").to_period("D"), + ], + ) + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + arr = TimedeltaArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Timedelta', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + +class TestUnaryOps: + def test_abs(self): + vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = abs(arr) + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.abs(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_pos(self): + vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + result = +arr + tm.assert_timedelta_array_equal(result, arr) + assert not tm.shares_memory(result, arr) + + result2 = np.positive(arr) + tm.assert_timedelta_array_equal(result2, arr) + assert not tm.shares_memory(result2, arr) + + def test_neg(self): + vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_neg_freq(self): + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + expected = TimedeltaArray(-tdi._data, freq=-tdi.freq) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) diff --git a/pandas/tests/arrays/timedeltas/__init__.py b/pandas/tests/arrays/timedeltas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/timedeltas/test_constructors.py b/pandas/tests/arrays/timedeltas/test_constructors.py new file mode 100644 index 00000000..d24fabfe --- /dev/null +++ b/pandas/tests/arrays/timedeltas/test_constructors.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaArrayConstructor: + def test_only_1dim_accepted(self): + # GH#25282 + arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + TimedeltaArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + TimedeltaArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # ensure that the public constructor cannot create an invalid instance + arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10**9 + + msg = ( + "Inferred frequency None from passed values does not " + "conform to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + TimedeltaArray([1, 2, 3]) + + def test_other_type_raises(self): + with pytest.raises(ValueError, match="dtype bool cannot be converted"): + TimedeltaArray(np.array([1, 2, 3], dtype="bool")) + + def test_incorrect_dtype_raises(self): + # TODO: why TypeError for 'category' but ValueError for i8? + with pytest.raises( + ValueError, match=r"category cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + with pytest.raises( + ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64")) + + def test_copy(self): + data = np.array([1, 2, 3], dtype="m8[ns]") + arr = TimedeltaArray(data, copy=False) + assert arr._data is data + + arr = TimedeltaArray(data, copy=True) + assert arr._data is not data + assert arr._data.base is not data + + def test_from_sequence_dtype(self): + msg = "dtype .*object.* cannot be converted to timedelta64" + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence([], dtype=object) diff --git a/pandas/tests/arrays/timedeltas/test_reductions.py b/pandas/tests/arrays/timedeltas/test_reductions.py new file mode 100644 index 00000000..72d45f5b --- /dev/null +++ b/pandas/tests/arrays/timedeltas/test_reductions.py @@ -0,0 +1,215 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Timedelta +import pandas._testing as tm +from pandas.core import nanops +from pandas.core.arrays import TimedeltaArray + + +class TestReductions: + @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reductions_empty(self, name, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = getattr(tdi, name)(skipna=skipna) + assert result is pd.NaT + + result = getattr(arr, name)(skipna=skipna) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_sum_empty(self, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = tdi.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = arr.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + def test_min_max(self): + arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) + + result = arr.min() + expected = Timedelta("2H") + assert result == expected + + result = arr.max() + expected = Timedelta("5H") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + def test_sum(self): + tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) + arr = tdi.array + + result = arr.sum(skipna=True) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.sum(skipna=False) + assert result is pd.NaT + + result = tdi.sum(skipna=False) + assert result is pd.NaT + + result = arr.sum(min_count=9) + assert result is pd.NaT + + result = tdi.sum(min_count=9) + assert result is pd.NaT + + result = arr.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.sum(min_count=1) + assert isinstance(result, Timedelta) + assert result == expected + + def test_npsum(self): + # GH#25282, GH#25335 np.sum should return a Timedelta, not timedelta64 + tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) + arr = tdi.array + + result = np.sum(tdi) + expected = Timedelta(hours=17) + assert isinstance(result, Timedelta) + assert result == expected + + result = np.sum(arr) + assert isinstance(result, Timedelta) + assert result == expected + + def test_sum_2d_skipna_false(self): + arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2) + arr[-1, -1] = "Nat" + + tda = TimedeltaArray(arr) + + result = tda.sum(skipna=False) + assert result is pd.NaT + + result = tda.sum(axis=0, skipna=False) + expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values + tm.assert_timedelta_array_equal(result, expected) + + result = tda.sum(axis=1, skipna=False) + expected = pd.TimedeltaIndex( + [ + Timedelta(seconds=1), + Timedelta(seconds=5), + Timedelta(seconds=9), + pd.NaT, + ] + )._values + tm.assert_timedelta_array_equal(result, expected) + + # Adding a Timestamp makes this a test for DatetimeArray.std + @pytest.mark.parametrize( + "add", + [ + Timedelta(0), + pd.Timestamp("2021-01-01"), + pd.Timestamp("2021-01-01", tz="UTC"), + pd.Timestamp("2021-01-01", tz="Asia/Tokyo"), + ], + ) + def test_std(self, add): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add + arr = tdi.array + + result = arr.std(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.std(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=True) + assert isinstance(result, np.timedelta64) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT + + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=False) + assert isinstance(result, np.timedelta64) + assert np.isnat(result) + + def test_median(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi.array + + result = arr.median(skipna=True) + expected = Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + result = tdi.median(skipna=True) + assert isinstance(result, Timedelta) + assert result == expected + + result = arr.median(skipna=False) + assert result is pd.NaT + + result = tdi.median(skipna=False) + assert result is pd.NaT + + def test_mean(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi._data + + # manually verified result + expected = Timedelta(arr.dropna()._ndarray.mean()) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + tdi = pd.timedelta_range("14 days", periods=6) + tda = tdi._data.reshape(3, 2) + + result = tda.mean(axis=0) + expected = tda[1] + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=1) + expected = tda[:, 0] + Timedelta(hours=12) + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=None) + expected = tdi.mean() + assert result == expected diff --git a/pandas/tests/base/__init__.py b/pandas/tests/base/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/base/common.py b/pandas/tests/base/common.py new file mode 100644 index 00000000..b09710a9 --- /dev/null +++ b/pandas/tests/base/common.py @@ -0,0 +1,9 @@ +from typing import Any + +from pandas import Index + + +def allow_na_ops(obj: Any) -> bool: + """Whether to skip test cases including NaN""" + is_bool_index = isinstance(obj, Index) and obj.is_boolean() + return not is_bool_index and obj._can_hold_na diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py new file mode 100644 index 00000000..858eaacd --- /dev/null +++ b/pandas/tests/base/test_constructors.py @@ -0,0 +1,173 @@ +from datetime import datetime +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.accessor import PandasDelegate +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) + + +@pytest.fixture( + params=[ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + lambda x, **kwargs: DataFrame(x, **kwargs)[0], + Index, + ], + ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"], +) +def constructor(request): + return request.param + + +class TestPandasDelegate: + class Delegator: + _properties = ["foo"] + _methods = ["bar"] + + def _set_foo(self, value): + self.foo = value + + def _get_foo(self): + return self.foo + + foo = property(_get_foo, _set_foo, doc="foo property") + + def bar(self, *args, **kwargs): + """a test bar method""" + pass + + class Delegate(PandasDelegate, PandasObject): + def __init__(self, obj) -> None: + self.obj = obj + + def test_invalid_delegation(self): + # these show that in order for the delegation to work + # the _delegate_* methods need to be overridden to not raise + # a TypeError + + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, + accessors=self.Delegator._properties, + typ="property", + ) + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, accessors=self.Delegator._methods, typ="method" + ) + + delegate = self.Delegate(self.Delegator()) + + msg = "You cannot access the property foo" + with pytest.raises(TypeError, match=msg): + delegate.foo + + msg = "The property foo cannot be set" + with pytest.raises(TypeError, match=msg): + delegate.foo = 5 + + msg = "You cannot access the property foo" + with pytest.raises(TypeError, match=msg): + delegate.foo() + + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") + def test_memory_usage(self): + # Delegate does not implement memory_usage. + # Check that we fall back to in-built `__sizeof__` + # GH 12924 + delegate = self.Delegate(self.Delegator()) + sys.getsizeof(delegate) + + +class TestNoNewAttributesMixin: + def test_mixin(self): + class T(NoNewAttributesMixin): + pass + + t = T() + assert not hasattr(t, "__frozen") + + t.a = "test" + assert t.a == "test" + + t._freeze() + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") + msg = "You cannot add any new attribute" + with pytest.raises(AttributeError, match=msg): + t.b = "test" + + assert not hasattr(t, "b") + + +class TestConstruction: + # test certain constructor behaviours on dtype inference across Series, + # Index and DataFrame + + @pytest.mark.parametrize( + "klass", + [ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + lambda x, **kwargs: DataFrame(x, **kwargs)[0], + Index, + ], + ) + @pytest.mark.parametrize( + "a", + [ + np.array(["2263-01-01"], dtype="datetime64[D]"), + np.array([datetime(2263, 1, 1)], dtype=object), + np.array([np.datetime64("2263-01-01", "D")], dtype=object), + np.array(["2263-01-01"], dtype=object), + ], + ids=[ + "datetime64[D]", + "object-datetime.datetime", + "object-numpy-scalar", + "object-string", + ], + ) + def test_constructor_datetime_outofbound(self, a, klass): + # GH-26853 (+ bug GH-26206 out of bound non-ns unit) + + # No dtype specified (dtype inference) + # datetime64[non-ns] raise error, other cases result in object dtype + # and preserve original data + if a.dtype.kind == "M": + msg = "Out of bounds" + with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): + klass(a) + else: + result = klass(a) + assert result.dtype == "object" + tm.assert_numpy_array_equal(result.to_numpy(), a) + + # Explicit dtype specified + # Forced conversion fails for all -> all cases raise error + msg = "Out of bounds|Out of bounds .* present at position 0" + with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): + klass(a, dtype="datetime64[ns]") + + def test_constructor_datetime_nonns(self, constructor): + arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]") + expected = constructor(pd.to_datetime(["2020-01-01"])) + result = constructor(arr) + tm.assert_equal(result, expected) + + # https://github.com/pandas-dev/pandas/issues/34843 + arr.flags.writeable = False + result = constructor(arr) + tm.assert_equal(result, expected) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py new file mode 100644 index 00000000..599aaae4 --- /dev/null +++ b/pandas/tests/base/test_conversion.py @@ -0,0 +1,552 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + TimedeltaArray, +) + + +class TestToIterable: + # test that we convert an iterable to python types + + dtypes = [ + ("int8", int), + ("int16", int), + ("int32", int), + ("int64", int), + ("uint8", int), + ("uint16", int), + ("uint32", int), + ("uint64", int), + ("float16", float), + ("float32", float), + ("float64", float), + ("datetime64[ns]", Timestamp), + ("datetime64[ns, US/Eastern]", Timestamp), + ("timedelta64[ns]", Timedelta), + ] + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable(self, index_or_series, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype, obj", + [ + ("object", object, "a"), + ("object", int, 1), + ("category", object, "a"), + ("category", int, 1), + ], + ) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable_object_and_category( + self, index_or_series, method, dtype, rdtype, obj + ): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test if items yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype", dtypes + [("object", int), ("category", int)] + ) + def test_iterable_map(self, index_or_series, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = (rdtype,) + assert result in rdtype + + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")]) + + result = method(i)[0] + assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz is None + assert res == exp + + vals = [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + + assert s.dtype == "datetime64[ns, US/Eastern]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [Timedelta("1 days"), Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timedelta) + assert res == exp + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == "M" + assert res == exp + + +@pytest.mark.parametrize( + "arr, expected_type, dtype", + [ + (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"), + (np.array(["a", "b"]), np.ndarray, "object"), + (pd.Categorical(["a", "b"]), pd.Categorical, "category"), + ( + pd.DatetimeIndex(["2017", "2018"], tz="US/Central"), + DatetimeArray, + "datetime64[ns, US/Central]", + ), + ( + pd.PeriodIndex([2018, 2019], freq="A"), + PeriodArray, + pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), + ), + (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval"), + ( + pd.DatetimeIndex(["2017", "2018"]), + DatetimeArray, + "datetime64[ns]", + ), + ( + pd.TimedeltaIndex([10**10]), + TimedeltaArray, + "m8[ns]", + ), + ], +) +def test_values_consistent(arr, expected_type, dtype): + l_values = Series(arr)._values + r_values = pd.Index(arr)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + tm.assert_equal(l_values, r_values) + + +@pytest.mark.parametrize("arr", [np.array([1, 2, 3])]) +def test_numpy_array(arr): + ser = Series(arr) + result = ser.array + expected = PandasArray(arr) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_all_dtypes(any_numpy_dtype): + ser = Series(dtype=any_numpy_dtype) + result = ser.array + if is_datetime64_dtype(any_numpy_dtype): + assert isinstance(result, DatetimeArray) + elif is_timedelta64_dtype(any_numpy_dtype): + assert isinstance(result, TimedeltaArray) + else: + assert isinstance(result, PandasArray) + + +@pytest.mark.parametrize( + "arr, attr", + [ + (pd.Categorical(["a", "b"]), "_codes"), + (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), + (pd.array([0, np.nan], dtype="Int64"), "_data"), + (IntervalArray.from_breaks([0, 1]), "_left"), + (SparseArray([0, 1]), "_sparse_values"), + (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), + # tz-aware Datetime + ( + DatetimeArray( + np.array( + ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + "_data", + ), + ], +) +def test_array(arr, attr, index_or_series, request): + box = index_or_series + warn = None + if arr.dtype.name in ("Sparse[int64, 0]") and box is pd.Index: + mark = pytest.mark.xfail(reason="Index cannot yet store sparse dtype") + request.node.add_marker(mark) + warn = FutureWarning + + with tm.assert_produces_warning(warn): + result = box(arr, copy=False).array + + if attr: + arr = getattr(arr, attr) + result = getattr(result, attr) + + assert result is arr + + +def test_array_multiindex_raises(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b"]]) + msg = "MultiIndex has no single backing array" + with pytest.raises(ValueError, match=msg): + idx.array + + +@pytest.mark.parametrize( + "arr, expected", + [ + (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), + (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)), + ( + pd.core.arrays.period_array(["2000", "2001"], freq="D"), + np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), + ), + (pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)), + ( + IntervalArray.from_breaks([0, 1, 2]), + np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), + ), + (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + # tz-naive datetime + ( + DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), + np.array(["2000", "2001"], dtype="M8[ns]"), + ), + # tz-aware stays tz`-aware + ( + DatetimeArray( + np.array( + ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + np.array( + [ + Timestamp("2000-01-01", tz="US/Central"), + Timestamp("2000-01-02", tz="US/Central"), + ] + ), + ), + # Timedelta + ( + TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), + np.array([0, 3600000000000], dtype="m8[ns]"), + ), + # GH#26406 tz is preserved in Categorical[dt64tz] + ( + pd.Categorical(date_range("2016-01-01", periods=2, tz="US/Pacific")), + np.array( + [ + Timestamp("2016-01-01", tz="US/Pacific"), + Timestamp("2016-01-02", tz="US/Pacific"), + ] + ), + ), + ], +) +def test_to_numpy(arr, expected, index_or_series_or_array, request): + box = index_or_series_or_array + + warn = None + if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray): + warn = FutureWarning + with tm.assert_produces_warning(warn): + thing = box(arr) + + if arr.dtype.name == "int64" and box is pd.array: + mark = pytest.mark.xfail(reason="thing is Int64 and to_numpy() returns object") + request.node.add_marker(mark) + + result = thing.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(thing) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize( + "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] +) +def test_to_numpy_copy(arr, as_series): + obj = pd.Index(arr, copy=False) + if as_series: + obj = Series(obj.values, copy=False) + + # no copy by default + result = obj.to_numpy() + assert np.shares_memory(arr, result) is True + + result = obj.to_numpy(copy=False) + assert np.shares_memory(arr, result) is True + + # copy=True + result = obj.to_numpy(copy=True) + assert np.shares_memory(arr, result) is False + + +@pytest.mark.parametrize("as_series", [True, False]) +def test_to_numpy_dtype(as_series): + tz = "US/Eastern" + obj = pd.DatetimeIndex(["2000", "2001"], tz=tz) + if as_series: + obj = Series(obj) + + # preserve tz by default + result = obj.to_numpy() + expected = np.array( + [Timestamp("2000", tz=tz), Timestamp("2001", tz=tz)], dtype=object + ) + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="M8[ns]") + expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, dtype, na_value, expected", + [ + ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]), + ( + [Timestamp("2000"), Timestamp("2000"), pd.NaT], + None, + Timestamp("2000"), + [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + ), + ], +) +def test_to_numpy_na_value_numpy_dtype( + index_or_series, values, dtype, na_value, expected +): + obj = index_or_series(values) + result = obj.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array(expected) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, multiindex, dtype, na_value, expected", + [ + ( + [1, 2, None, 4], + [(0, "a"), (0, "b"), (1, "b"), (1, "c")], + float, + None, + [1.0, 2.0, np.nan, 4.0], + ), + ( + [1, 2, None, 4], + [(0, "a"), (0, "b"), (1, "b"), (1, "c")], + float, + np.nan, + [1.0, 2.0, np.nan, 4.0], + ), + ( + [1.0, 2.0, np.nan, 4.0], + [("a", 0), ("a", 1), ("a", 2), ("b", 0)], + int, + 0, + [1, 2, 0, 4], + ), + ( + [Timestamp("2000"), Timestamp("2000"), pd.NaT], + [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))], + None, + Timestamp("2000"), + [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + ), + ], +) +def test_to_numpy_multiindex_series_na_value( + data, multiindex, dtype, na_value, expected +): + index = pd.MultiIndex.from_tuples(multiindex) + series = Series(data, index=index) + result = series.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array(expected) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_kwargs_raises(): + # numpy + s = Series([1, 2, 3]) + msg = r"to_numpy\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + s.to_numpy(foo=True) + + # extension + s = Series([1, 2, 3], dtype="Int64") + with pytest.raises(TypeError, match=msg): + s.to_numpy(foo=True) + + +@pytest.mark.parametrize( + "data", + [ + {"a": [1, 2, 3], "b": [1, 2, None]}, + {"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])}, + {"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])}, + ], +) +@pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)]) +def test_to_numpy_dataframe_na_value(data, dtype, na_value): + # https://github.com/pandas-dev/pandas/issues/33820 + df = pd.DataFrame(data) + result = df.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array([[1, 1], [2, 2], [3, na_value]], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected", + [ + ( + {"a": pd.array([1, 2, None])}, + np.array([[1.0], [2.0], [np.nan]], dtype=float), + ), + ( + {"a": [1, 2, 3], "b": [1, 2, 3]}, + np.array([[1, 1], [2, 2], [3, 3]], dtype=float), + ), + ], +) +def test_to_numpy_dataframe_single_block(data, expected): + # https://github.com/pandas-dev/pandas/issues/33820 + df = pd.DataFrame(data) + result = df.to_numpy(dtype=float, na_value=np.nan) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_dataframe_single_block_no_mutate(): + # https://github.com/pandas-dev/pandas/issues/33820 + result = pd.DataFrame(np.array([1.0, 2.0, np.nan])) + expected = pd.DataFrame(np.array([1.0, 2.0, np.nan])) + result.to_numpy(na_value=0.0) + tm.assert_frame_equal(result, expected) + + +class TestAsArray: + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_asarray_object_dt64(self, tz): + ser = Series(date_range("2000", periods=2, tz=tz)) + + with tm.assert_produces_warning(None): + # Future behavior (for tzaware case) with no warning + result = np.asarray(ser, dtype=object) + + expected = np.array( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)] + ) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + ser = Series(date_range("2000", periods=2)) + expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + result = np.asarray(ser) + + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = "US/Central" + ser = Series(date_range("2000", periods=2, tz=tz)) + expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") + result = np.asarray(ser, dtype="datetime64[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + result = np.asarray(ser, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/base/test_fillna.py b/pandas/tests/base/test_fillna.py new file mode 100644 index 00000000..7300d301 --- /dev/null +++ b/pandas/tests/base/test_fillna.py @@ -0,0 +1,60 @@ +""" +Though Index.fillna and Series.fillna has separate impl, +test here to confirm these works as the same +""" + +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm +from pandas.tests.base.common import allow_na_ops + + +def test_fillna(index_or_series_obj): + # GH 11343 + obj = index_or_series_obj + + if isinstance(obj, MultiIndex): + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + obj.fillna(0) + return + + # values will not be changed + fill_value = obj.values[0] if len(obj) > 0 else 0 + result = obj.fillna(fill_value) + + tm.assert_equal(obj, result) + + # check shallow_copied + assert obj is not result + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_fillna_null(null_obj, index_or_series_obj): + # GH 11343 + obj = index_or_series_obj + klass = type(obj) + + if not allow_na_ops(obj): + pytest.skip(f"{klass} doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + fill_value = values[0] + expected = values.copy() + values[0:2] = null_obj + expected[0:2] = fill_value + + expected = klass(expected) + obj = klass(values) + + result = obj.fillna(fill_value) + tm.assert_equal(result, expected) + + # check shallow_copied + assert obj is not result diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py new file mode 100644 index 00000000..a55c013a --- /dev/null +++ b/pandas/tests/base/test_misc.py @@ -0,0 +1,198 @@ +import sys + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + PYPY, +) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dtype_equal, + is_object_dtype, +) + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm + + +def test_isnull_notnull_docstrings(): + # GH#41855 make sure its clear these are aliases + doc = pd.DataFrame.notnull.__doc__ + assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n") + doc = pd.DataFrame.isnull.__doc__ + assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n") + + doc = Series.notnull.__doc__ + assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n") + doc = Series.isnull.__doc__ + assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n") + + +@pytest.mark.parametrize( + "op_name, op", + [ + ("add", "+"), + ("sub", "-"), + ("mul", "*"), + ("mod", "%"), + ("pow", "**"), + ("truediv", "/"), + ("floordiv", "//"), + ], +) +def test_binary_ops_docstring(frame_or_series, op_name, op): + # not using the all_arithmetic_functions fixture with _get_opstr + # as _get_opstr is used internally in the dynamic implementation of the docstring + klass = frame_or_series + + operand1 = klass.__name__.lower() + operand2 = "other" + expected_str = " ".join([operand1, op, operand2]) + assert expected_str in getattr(klass, op_name).__doc__ + + # reverse version of the binary ops + expected_str = " ".join([operand2, op, operand1]) + assert expected_str in getattr(klass, "r" + op_name).__doc__ + + +def test_ndarray_compat_properties(index_or_series_obj): + obj = index_or_series_obj + + # Check that we work. + for p in ["shape", "dtype", "T", "nbytes"]: + assert getattr(obj, p, None) is not None + + # deprecated properties + for p in ["strides", "itemsize", "base", "data"]: + assert not hasattr(obj, p) + + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + obj.item() # len > 1 + + assert obj.ndim == 1 + assert obj.size == len(obj) + + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 + + +def test_array_wrap_compat(): + # Note: at time of dask 2022.01.0, this is still used by eg dask + # (https://github.com/dask/dask/issues/8580). + # This test is a small dummy ensuring coverage + orig = Series([1, 2, 3], dtype="int64", index=["a", "b", "c"]) + with tm.assert_produces_warning(DeprecationWarning): + result = orig.__array_wrap__(np.array([2, 4, 6], dtype="int64")) + expected = orig * 2 + tm.assert_series_equal(result, expected) + + +@pytest.mark.skipif(PYPY, reason="not relevant for PyPy") +def test_memory_usage(index_or_series_obj): + obj = index_or_series_obj + + res = obj.memory_usage() + res_deep = obj.memory_usage(deep=True) + + is_ser = isinstance(obj, Series) + is_object = is_object_dtype(obj) or ( + isinstance(obj, Series) and is_object_dtype(obj.index) + ) + is_categorical = is_categorical_dtype(obj.dtype) or ( + isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype) + ) + is_object_string = is_dtype_equal(obj, "string[python]") or ( + is_ser and is_dtype_equal(obj.index.dtype, "string[python]") + ) + + if len(obj) == 0: + if isinstance(obj, Index): + expected = 0 + else: + expected = 108 if IS64 else 64 + assert res_deep == res == expected + elif is_object or is_categorical or is_object_string: + # only deep will pick them up + assert res_deep > res + else: + assert res == res_deep + + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = res_deep - sys.getsizeof(obj) + assert abs(diff) < 100 + + +def test_memory_usage_components_series(series_with_simple_index): + series = series_with_simple_index + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + +@pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES) +def test_memory_usage_components_narrow_series(dtype): + series = tm.make_rand_series(name="a", dtype=dtype) + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + +def test_searchsorted(request, index_or_series_obj): + # numpy.searchsorted calls obj.searchsorted under the hood. + # See gh-12238 + obj = index_or_series_obj + + if isinstance(obj, pd.MultiIndex): + # See gh-14833 + request.node.add_marker( + pytest.mark.xfail( + reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833" + ) + ) + elif obj.dtype.kind == "c" and isinstance(obj, Index): + # TODO: Should Series cases also raise? Looks like they use numpy + # comparison semantics https://github.com/numpy/numpy/issues/15981 + mark = pytest.mark.xfail(reason="complex objects are not comparable") + request.node.add_marker(mark) + + max_obj = max(obj, default=0) + index = np.searchsorted(obj, max_obj) + assert 0 <= index <= len(obj) + + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) + assert 0 <= index <= len(obj) + + +def test_access_by_position(index_flat): + index = index_flat + + if len(index) == 0: + pytest.skip("Test doesn't make sense on empty data") + + series = Series(index) + assert index[0] == series.iloc[0] + assert index[5] == series.iloc[5] + assert index[-1] == series.iloc[-1] + + size = len(index) + assert index[-1] == index[size - 1] + + msg = f"index {size} is out of bounds for axis 0 with size {size}" + if is_dtype_equal(index.dtype, "string[pyarrow]"): + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + index[size] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + series.iloc[size] diff --git a/pandas/tests/base/test_transpose.py b/pandas/tests/base/test_transpose.py new file mode 100644 index 00000000..246f33d2 --- /dev/null +++ b/pandas/tests/base/test_transpose.py @@ -0,0 +1,56 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalDtype, + DataFrame, +) +import pandas._testing as tm + + +def test_transpose(index_or_series_obj): + obj = index_or_series_obj + tm.assert_equal(obj.transpose(), obj) + + +def test_transpose_non_default_axes(index_or_series_obj): + msg = "the 'axes' parameter is not supported" + obj = index_or_series_obj + with pytest.raises(ValueError, match=msg): + obj.transpose(1) + with pytest.raises(ValueError, match=msg): + obj.transpose(axes=1) + + +def test_numpy_transpose(index_or_series_obj): + msg = "the 'axes' parameter is not supported" + obj = index_or_series_obj + tm.assert_equal(np.transpose(obj), obj) + + with pytest.raises(ValueError, match=msg): + np.transpose(obj, axes=1) + + +@pytest.mark.parametrize( + "data, transposed_data, index, columns, dtype", + [ + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int), + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])), + ([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int), + ( + [[1, 2], [3, 4]], + [[1, 3], [2, 4]], + ["a", "a"], + ["b", "b"], + CategoricalDtype([1, 2, 3, 4]), + ), + ], +) +def test_duplicate_labels(data, transposed_data, index, columns, dtype): + # GH 42380 + df = DataFrame(data, index=index, columns=columns, dtype=dtype) + result = df.T + expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py new file mode 100644 index 00000000..eac1e356 --- /dev/null +++ b/pandas/tests/base/test_unique.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +from pandas.compat import pa_version_under2p0 +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.common import is_datetime64tz_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.api import NumericIndex +from pandas.tests.base.common import allow_na_ops + + +def test_unique(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + result = obj.unique() + + # dict.fromkeys preserves the order + unique_values = list(dict.fromkeys(obj.values)) + if isinstance(obj, pd.MultiIndex): + expected = pd.MultiIndex.from_tuples(unique_values) + expected.names = obj.names + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj.dtype): + expected = expected.normalize() + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(unique_values) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_unique_null(null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + result = obj.unique() + + unique_values_raw = dict.fromkeys(obj.values) + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] + unique_values = [null_obj] + unique_values_not_null + + if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj.dtype): + result = result.normalize() + expected = expected.normalize() + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(unique_values, dtype=obj.dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_nunique(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + expected = len(obj.unique()) + assert obj.nunique(dropna=False) == expected + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_nunique_null(null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + + if isinstance(obj, pd.CategoricalIndex): + assert obj.nunique() == len(obj.categories) + assert obj.nunique(dropna=False) == len(obj.categories) + 1 + else: + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + num_unique_values = len(obj.unique()) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + assert obj.nunique() == max(0, num_unique_values - 1) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + ): + assert obj.nunique(dropna=False) == max(0, num_unique_values) + + +@pytest.mark.single_cpu +@pytest.mark.xfail( + reason="Flaky in the CI. Remove once CI has a single build: GH 44584", strict=False +) +def test_unique_bad_unicode(index_or_series): + # regression test for #34550 + uval = "\ud83d" # smiley emoji + + obj = index_or_series([uval] * 2) + result = obj.unique() + + if isinstance(obj, pd.Index): + expected = pd.Index(["\ud83d"], dtype=object) + tm.assert_index_equal(result, expected, exact=True) + else: + expected = np.array(["\ud83d"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_nunique_dropna(dropna): + # GH37566 + ser = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT]) + res = ser.nunique(dropna) + assert res == 1 if dropna else 5 diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py new file mode 100644 index 00000000..af654669 --- /dev/null +++ b/pandas/tests/base/test_value_counts.py @@ -0,0 +1,317 @@ +import collections +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.compat import pa_version_under7p0 +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Interval, + IntervalIndex, + Series, + Timedelta, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.tests.base.common import allow_na_ops + + +def test_value_counts(index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + result = obj.value_counts() + + counter = collections.Counter(obj) + expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) + expected.index = expected.index.astype(obj.dtype) + if isinstance(obj, pd.MultiIndex): + expected.index = Index(expected.index) + + if not isinstance(result.dtype, np.dtype): + # i.e IntegerDtype + expected = expected.astype("Int64") + + # TODO(GH#32514): Order of entries with the same count is inconsistent + # on CI (gh-32449) + if obj.duplicated().any(): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + result = result.sort_index() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("null_obj", [np.nan, None]) +def test_value_counts_null(null_obj, index_or_series_obj): + orig = index_or_series_obj + obj = orig.copy() + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(orig, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj._values + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + counter = collections.Counter(obj.dropna()) + expected = Series(dict(counter.most_common()), dtype=np.int64) + expected.index = expected.index.astype(obj.dtype) + + result = obj.value_counts() + if obj.duplicated().any(): + # TODO(GH#32514): + # Order of entries with the same count is inconsistent on CI (gh-32449) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + expected = expected.sort_index() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + result = result.sort_index() + + if not isinstance(result.dtype, np.dtype): + # i.e IntegerDtype + expected = expected.astype("Int64") + tm.assert_series_equal(result, expected) + + expected[null_obj] = 3 + + result = obj.value_counts(dropna=False) + if obj.duplicated().any(): + # TODO(GH#32514): + # Order of entries with the same count is inconsistent on CI (gh-32449) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + expected = expected.sort_index() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", + ): + result = result.sort_index() + tm.assert_series_equal(result, expected) + + +def test_value_counts_inferred(index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(np.unique(np.array(s_values, dtype=np.object_))) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.unique(np.array(s_values, dtype=np.object_)) + tm.assert_numpy_array_equal(s.unique(), exp) + + assert s.nunique() == 4 + # don't sort, have to sort after the fact as not sorting is + # platform-dep + hist = s.value_counts(sort=False).sort_values() + expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values() + tm.assert_series_equal(hist, expected) + + # sort ascending + hist = s.value_counts(ascending=True) + expected = Series([1, 2, 3, 4], index=list("cdab")) + tm.assert_series_equal(hist, expected) + + # relative histogram. + hist = s.value_counts(normalize=True) + expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(hist, expected) + + +def test_value_counts_bins(index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + + # bins + msg = "bins argument only works with numeric data" + with pytest.raises(TypeError, match=msg): + s.value_counts(bins=1) + + s1 = Series([1, 1, 2, 3]) + res1 = s1.value_counts(bins=1) + exp1 = Series({Interval(0.997, 3.0): 4}) + tm.assert_series_equal(res1, exp1) + res1n = s1.value_counts(bins=1, normalize=True) + exp1n = Series({Interval(0.997, 3.0): 1.0}) + tm.assert_series_equal(res1n, exp1n) + + if isinstance(s1, Index): + tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) + else: + exp = np.array([1, 2, 3], dtype=np.int64) + tm.assert_numpy_array_equal(s1.unique(), exp) + + assert s1.nunique() == 3 + + # these return the same + res4 = s1.value_counts(bins=4, dropna=True) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4, exp4) + + res4 = s1.value_counts(bins=4, dropna=False) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4, exp4) + + res4n = s1.value_counts(bins=4, normalize=True) + exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) + tm.assert_series_equal(res4n, exp4n) + + # handle NA's properly + s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] + s = klass(s_values) + expected = Series([4, 3, 2], index=["b", "a", "d"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(["a", "b", np.nan, "d"]) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.array(["a", "b", np.nan, "d"], dtype=object) + tm.assert_numpy_array_equal(s.unique(), exp) + assert s.nunique() == 3 + + s = klass({}) if klass is dict else klass({}, dtype=object) + expected = Series([], dtype=np.int64) + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + # returned dtype differs depending on original + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), Index([]), exact=False) + else: + tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) + + assert s.nunique() == 0 + + +def test_value_counts_datetime64(index_or_series): + klass = index_or_series + + # GH 3002, datetime64[ns] + # don't test names though + df = pd.DataFrame( + { + "person_id": ["xxyyzz", "xxyyzz", "xxyyzz", "xxyyww", "foofoo", "foofoo"], + "dt": pd.to_datetime( + [ + "2010-01-01", + "2010-01-01", + "2010-01-01", + "2009-01-01", + "2008-09-09", + "2008-09-09", + ] + ), + "food": ["PIE", "GUM", "EGG", "EGG", "PIE", "GUM"], + } + ) + + s = klass(df["dt"].copy()) + s.name = None + idx = pd.to_datetime( + ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] + ) + expected_s = Series([3, 2, 1], index=idx) + tm.assert_series_equal(s.value_counts(), expected_s) + + expected = np.array( + ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], + dtype="datetime64[ns]", + ) + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) + else: + tm.assert_numpy_array_equal(s.unique(), expected) + + assert s.nunique() == 3 + + # with NaT + s = df["dt"].copy() + s = klass(list(s.values) + [pd.NaT] * 4) + + result = s.value_counts() + assert result.index.dtype == "datetime64[ns]" + tm.assert_series_equal(result, expected_s) + + result = s.value_counts(dropna=False) + expected_s = pd.concat([Series([4], index=DatetimeIndex([pd.NaT])), expected_s]) + tm.assert_series_equal(result, expected_s) + + assert s.dtype == "datetime64[ns]" + unique = s.unique() + assert unique.dtype == "datetime64[ns]" + + # numpy_array_equal cannot compare pd.NaT + if isinstance(s, Index): + exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) + tm.assert_index_equal(unique, exp_idx) + else: + tm.assert_numpy_array_equal(unique[:3], expected) + assert pd.isna(unique[3]) + + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 + + # timedelta64[ns] + td = df.dt - df.dt + timedelta(1) + td = klass(td, name="dt") + + result = td.value_counts() + expected_s = Series([6], index=[Timedelta("1day")], name="dt") + tm.assert_series_equal(result, expected_s) + + expected = TimedeltaIndex(["1 days"], name="dt") + if isinstance(td, Index): + tm.assert_index_equal(td.unique(), expected) + else: + tm.assert_numpy_array_equal(td.unique(), expected.values) + + td2 = timedelta(1) + (df.dt - df.dt) + td2 = klass(td2, name="dt") + result2 = td2.value_counts() + tm.assert_series_equal(result2, expected_s) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_value_counts_with_nan(dropna, index_or_series): + # GH31944 + klass = index_or_series + values = [True, pd.NA, np.nan] + obj = klass(values) + res = obj.value_counts(dropna=dropna) + if dropna is True: + expected = Series([1], index=Index([True], dtype=obj.dtype)) + else: + expected = Series([1, 1, 1], index=[True, pd.NA, np.nan]) + tm.assert_series_equal(res, expected) diff --git a/pandas/tests/computation/__init__.py b/pandas/tests/computation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py new file mode 100644 index 00000000..cfc08426 --- /dev/null +++ b/pandas/tests/computation/test_compat.py @@ -0,0 +1,32 @@ +import pytest + +from pandas.compat._optional import VERSIONS + +import pandas as pd +from pandas.core.computation.engines import ENGINES +import pandas.core.computation.expr as expr +from pandas.util.version import Version + + +def test_compat(): + # test we have compat with our version of numexpr + + from pandas.core.computation.check import NUMEXPR_INSTALLED + + ne = pytest.importorskip("numexpr") + + ver = ne.__version__ + if Version(ver) < Version(VERSIONS["numexpr"]): + assert not NUMEXPR_INSTALLED + else: + assert NUMEXPR_INSTALLED + + +@pytest.mark.parametrize("engine", ENGINES) +@pytest.mark.parametrize("parser", expr.PARSERS) +def test_invalid_numexpr_version(engine, parser): + if engine == "numexpr": + pytest.importorskip("numexpr") + a, b = 1, 2 # noqa:F841 + res = pd.eval("a + b", engine=engine, parser=parser) + assert res == 3 diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py new file mode 100644 index 00000000..b3b80e8e --- /dev/null +++ b/pandas/tests/computation/test_eval.py @@ -0,0 +1,1941 @@ +from __future__ import annotations + +from functools import reduce +from itertools import product +import operator +import random +import warnings + +import numpy as np +import pytest + +from pandas.errors import ( + NumExprClobberingError, + PerformanceWarning, + UndefinedVariableError, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_bool, + is_float, + is_list_like, + is_scalar, +) + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.computation import pytables +from pandas.core.computation.engines import ENGINES +import pandas.core.computation.expr as expr +from pandas.core.computation.expr import ( + BaseExprVisitor, + PandasExprVisitor, + PythonExprVisitor, +) +from pandas.core.computation.expressions import ( + NUMEXPR_INSTALLED, + USE_NUMEXPR, +) +from pandas.core.computation.ops import ( + ARITH_OPS_SYMS, + SPECIAL_CASE_ARITH_OPS_SYMS, + _binary_math_ops, + _binary_ops_dict, + _unary_math_ops, +) +from pandas.core.computation.scope import DEFAULT_GLOBALS + + +@pytest.fixture( + params=( + pytest.param( + engine, + marks=[ + pytest.mark.skipif( + engine == "numexpr" and not USE_NUMEXPR, + reason=f"numexpr enabled->{USE_NUMEXPR}, " + f"installed->{NUMEXPR_INSTALLED}", + ), + td.skip_if_no_ne, + ], + ) + for engine in ENGINES + ) +) +def engine(request): + return request.param + + +@pytest.fixture(params=expr.PARSERS) +def parser(request): + return request.param + + +@pytest.fixture(params=list(_unary_math_ops) if NUMEXPR_INSTALLED else []) +def unary_fns_for_ne(request): + return request.param + + +def _eval_single_bin(lhs, cmp1, rhs, engine): + c = _binary_ops_dict[cmp1] + if ENGINES[engine].has_neg_frac: + try: + return c(lhs, rhs) + except ValueError as e: + if str(e).startswith( + "negative number cannot be raised to a fractional power" + ): + return np.nan + raise + return c(lhs, rhs) + + +# TODO: using range(5) here is a kludge +@pytest.fixture( + params=list(range(5)), + ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"], +) +def lhs(request): + + nan_df1 = DataFrame(np.random.rand(10, 5)) + nan_df1[nan_df1 > 0.5] = np.nan + + opts = ( + DataFrame(np.random.randn(10, 5)), + Series(np.random.randn(5)), + Series([1, 2, np.nan, np.nan, 5]), + nan_df1, + np.random.randn(), + ) + return opts[request.param] + + +rhs = lhs +midhs = lhs + + +class TestEval: + @pytest.mark.parametrize( + "cmp1", + ["!=", "==", "<=", ">=", "<", ">"], + ids=["ne", "eq", "le", "ge", "lt", "gt"], + ) + @pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"]) + @pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS) + def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser): + if parser == "python" and binop in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" + pd.eval(ex, engine=engine, parser=parser) + return + + lhs_new = _eval_single_bin(lhs, cmp1, rhs, engine) + rhs_new = _eval_single_bin(lhs, cmp2, rhs, engine) + expected = _eval_single_bin(lhs_new, binop, rhs_new, engine) + + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" + result = pd.eval(ex, engine=engine, parser=parser) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS) + def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser): + lhs = lhs < 0 + rhs = rhs < 0 + + if parser == "python" and cmp_op in ["in", "not in"]: + msg = "'(In|NotIn)' nodes are not implemented" + + with pytest.raises(NotImplementedError, match=msg): + ex = f"lhs {cmp_op} rhs" + pd.eval(ex, engine=engine, parser=parser) + return + + ex = f"lhs {cmp_op} rhs" + msg = "|".join( + [ + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')bool(\]|')", + "argument of type 'bool' is not iterable", + ] + ) + if cmp_op in ("in", "not in") and not is_list_like(rhs): + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=engine, + parser=parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + expected = _eval_single_bin(lhs, cmp_op, rhs, engine) + result = pd.eval(ex, engine=engine, parser=parser) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("op", expr.CMP_OPS_SYMS) + def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser): + if parser == "python" and op in ["in", "not in"]: + + msg = "'(In|NotIn)' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + ex = f"~(lhs {op} rhs)" + pd.eval(ex, engine=engine, parser=parser) + return + + if ( + is_float(lhs) + and not is_float(rhs) + and op in ["in", "not in"] + and engine == "python" + and parser == "pandas" + ): + mark = pytest.mark.xfail( + reason="Looks like expected is negative, unclear whether " + "expected is incorrect or result is incorrect" + ) + request.node.add_marker(mark) + skip_these = ["in", "not in"] + ex = f"~(lhs {op} rhs)" + + msg = "|".join( + [ + r"only list-like( or dict-like)? objects are allowed to be " + r"passed to (DataFrame\.)?isin\(\), you passed a " + r"(\[|')float(\]|')", + "argument of type 'float' is not iterable", + ] + ) + if is_scalar(rhs) and op in skip_these: + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=engine, + parser=parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + # compound + if is_scalar(lhs) and is_scalar(rhs): + lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs)) + expected = _eval_single_bin(lhs, op, rhs, engine) + if is_scalar(expected): + expected = not expected + else: + expected = ~expected + result = pd.eval(ex, engine=engine, parser=parser) + tm.assert_almost_equal(expected, result) + + @pytest.mark.parametrize("cmp1", ["<", ">"]) + @pytest.mark.parametrize("cmp2", ["<", ">"]) + def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser): + mid = midhs + if parser == "python": + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex1, engine=engine, parser=parser) + return + + lhs_new = _eval_single_bin(lhs, cmp1, mid, engine) + rhs_new = _eval_single_bin(mid, cmp2, rhs, engine) + + if lhs_new is not None and rhs_new is not None: + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs" + ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)" + expected = _eval_single_bin(lhs_new, "&", rhs_new, engine) + + for ex in (ex1, ex2, ex3): + result = pd.eval(ex, engine=engine, parser=parser) + + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS)) + ) + def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser): + ex = f"lhs {arith1} rhs" + result = pd.eval(ex, engine=engine, parser=parser) + expected = _eval_single_bin(lhs, arith1, rhs, engine) + + tm.assert_almost_equal(result, expected) + ex = f"lhs {arith1} rhs {arith1} rhs" + result = pd.eval(ex, engine=engine, parser=parser) + nlhs = _eval_single_bin(lhs, arith1, rhs, engine) + try: + nlhs, ghs = nlhs.align(rhs) + except (ValueError, TypeError, AttributeError): + # ValueError: series frame or frame series align + # TypeError, AttributeError: series or frame with scalar align + return + else: + if engine == "numexpr": + import numexpr as ne + + # direct numpy comparison + expected = ne.evaluate(f"nlhs {arith1} ghs") + # Update assert statement due to unreliable numerical + # precision component (GH37328) + # TODO: update testing code so that assert_almost_equal statement + # can be replaced again by the assert_numpy_array_equal statement + tm.assert_almost_equal(result.values, expected) + else: + expected = eval(f"nlhs {arith1} ghs") + tm.assert_almost_equal(result, expected) + + # modulus, pow, and floor division require special casing + + def test_modulus(self, lhs, rhs, engine, parser): + ex = r"lhs % rhs" + result = pd.eval(ex, engine=engine, parser=parser) + expected = lhs % rhs + tm.assert_almost_equal(result, expected) + + if engine == "numexpr": + import numexpr as ne + + expected = ne.evaluate(r"expected % rhs") + if isinstance(result, (DataFrame, Series)): + tm.assert_almost_equal(result.values, expected) + else: + tm.assert_almost_equal(result, expected.item()) + else: + expected = _eval_single_bin(expected, "%", rhs, engine) + tm.assert_almost_equal(result, expected) + + def test_floor_division(self, lhs, rhs, engine, parser): + ex = "lhs // rhs" + + if engine == "python": + res = pd.eval(ex, engine=engine, parser=parser) + expected = lhs // rhs + tm.assert_equal(res, expected) + else: + msg = ( + r"unsupported operand type\(s\) for //: 'VariableNode' and " + "'VariableNode'" + ) + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + local_dict={"lhs": lhs, "rhs": rhs}, + engine=engine, + parser=parser, + ) + + @td.skip_if_windows + def test_pow(self, lhs, rhs, engine, parser): + # odd failure on win32 platform, so skip + ex = "lhs ** rhs" + expected = _eval_single_bin(lhs, "**", rhs, engine) + result = pd.eval(ex, engine=engine, parser=parser) + + if ( + is_scalar(lhs) + and is_scalar(rhs) + and isinstance(expected, (complex, np.complexfloating)) + and np.isnan(result) + ): + msg = "(DataFrame.columns|numpy array) are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(result, expected) + else: + tm.assert_almost_equal(result, expected) + + ex = "(lhs ** rhs) ** rhs" + result = pd.eval(ex, engine=engine, parser=parser) + + middle = _eval_single_bin(lhs, "**", rhs, engine) + expected = _eval_single_bin(middle, "**", rhs, engine) + tm.assert_almost_equal(result, expected) + + def check_single_invert_op(self, lhs, engine, parser): + # simple + try: + elb = lhs.astype(bool) + except AttributeError: + elb = np.array([bool(lhs)]) + expected = ~elb + result = pd.eval("~elb", engine=engine, parser=parser) + tm.assert_almost_equal(expected, result) + + def test_frame_invert(self, engine, parser): + expr = "~lhs" + + # ~ ## + # frame + # float always raises + lhs = DataFrame(np.random.randn(5, 2)) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + else: + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + + # int raises on numexpr + lhs = DataFrame(np.random.randint(5, size=(5, 2))) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + # bool always works + lhs = DataFrame(np.random.rand(5, 2) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + # object raises + lhs = DataFrame({"b": ["a", 1, 2.0], "c": np.random.rand(3) > 0.5}) + if engine == "numexpr": + with pytest.raises(ValueError, match="unknown type object"): + pd.eval(expr, engine=engine, parser=parser) + else: + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + + def test_series_invert(self, engine, parser): + # ~ #### + expr = "~lhs" + + # series + # float raises + lhs = Series(np.random.randn(5)) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): + result = pd.eval(expr, engine=engine, parser=parser) + else: + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + + # int raises on numexpr + lhs = Series(np.random.randint(5, size=5)) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + # bool + lhs = Series(np.random.rand(5) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + # float + # int + # bool + + # object + lhs = Series(["a", 1, 2.0]) + if engine == "numexpr": + with pytest.raises(ValueError, match="unknown type object"): + pd.eval(expr, engine=engine, parser=parser) + else: + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + + def test_frame_negate(self, engine, parser): + expr = "-lhs" + + # float + lhs = DataFrame(np.random.randn(5, 2)) + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + # int + lhs = DataFrame(np.random.randint(5, size=(5, 2))) + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = DataFrame(np.random.rand(5, 2) > 0.5) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + else: + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + def test_series_negate(self, engine, parser): + expr = "-lhs" + + # float + lhs = Series(np.random.randn(5)) + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + # int + lhs = Series(np.random.randint(5, size=5)) + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = Series(np.random.rand(5) > 0.5) + if engine == "numexpr": + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(expr, engine=engine, parser=parser) + else: + expect = -lhs + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + @pytest.mark.parametrize( + "lhs", + [ + # Float + DataFrame(np.random.randn(5, 2)), + # Int + DataFrame(np.random.randint(5, size=(5, 2))), + # bool doesn't work with numexpr but works elsewhere + DataFrame(np.random.rand(5, 2) > 0.5), + ], + ) + def test_frame_pos(self, lhs, engine, parser): + expr = "+lhs" + expect = lhs + + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_frame_equal(expect, result) + + @pytest.mark.parametrize( + "lhs", + [ + # Float + Series(np.random.randn(5)), + # Int + Series(np.random.randint(5, size=5)), + # bool doesn't work with numexpr but works elsewhere + Series(np.random.rand(5) > 0.5), + ], + ) + def test_series_pos(self, lhs, engine, parser): + expr = "+lhs" + expect = lhs + + result = pd.eval(expr, engine=engine, parser=parser) + tm.assert_series_equal(expect, result) + + def test_scalar_unary(self, engine, parser): + msg = "bad operand type for unary ~: 'float'" + with pytest.raises(TypeError, match=msg): + pd.eval("~1.0", engine=engine, parser=parser) + + assert pd.eval("-1.0", parser=parser, engine=engine) == -1.0 + assert pd.eval("+1.0", parser=parser, engine=engine) == +1.0 + assert pd.eval("~1", parser=parser, engine=engine) == ~1 + assert pd.eval("-1", parser=parser, engine=engine) == -1 + assert pd.eval("+1", parser=parser, engine=engine) == +1 + assert pd.eval("~True", parser=parser, engine=engine) == ~True + assert pd.eval("~False", parser=parser, engine=engine) == ~False + assert pd.eval("-True", parser=parser, engine=engine) == -True + assert pd.eval("-False", parser=parser, engine=engine) == -False + assert pd.eval("+True", parser=parser, engine=engine) == +True + assert pd.eval("+False", parser=parser, engine=engine) == +False + + def test_unary_in_array(self): + # GH 11235 + # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI + # but cannot reproduce locally + result = np.array( + pd.eval( + "[-True, True, ~True, +True," + "-False, False, ~False, +False," + "-37, 37, ~37, +37]" + ), + dtype=np.object_, + ) + expected = np.array( + [ + -True, + True, + ~True, + +True, + -False, + False, + ~False, + +False, + -37, + 37, + ~37, + +37, + ], + dtype=np.object_, + ) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [np.float32, np.float64]) + @pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"]) + def test_float_comparison_bin_op(self, dtype, expr): + # GH 16363 + df = DataFrame({"x": np.array([0], dtype=dtype)}) + res = df.eval(expr) + assert res.values == np.array([False]) + + @pytest.mark.parametrize( + "ex", + ( + "1 or 2", + "1 and 2", + "a and b", + "a or b", + "1 or 2 and (3 + 2) > 3", + "2 * x > 2 or 1 and 2", + "2 * df > 3 and 1 or a", + ), + ) + def test_disallow_scalar_bool_ops(self, ex, engine, parser): + x, a, b = np.random.randn(3), 1, 2 # noqa:F841 + df = DataFrame(np.random.randn(3, 2)) # noqa:F841 + + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, engine=engine, parser=parser) + + def test_identical(self, engine, parser): + # see gh-10546 + x = 1 + result = pd.eval("x", engine=engine, parser=parser) + assert result == 1 + assert is_scalar(result) + + x = 1.5 + result = pd.eval("x", engine=engine, parser=parser) + assert result == 1.5 + assert is_scalar(result) + + x = False + result = pd.eval("x", engine=engine, parser=parser) + assert not result + assert is_bool(result) + assert is_scalar(result) + + x = np.array([1]) + result = pd.eval("x", engine=engine, parser=parser) + tm.assert_numpy_array_equal(result, np.array([1])) + assert result.shape == (1,) + + x = np.array([1.5]) + result = pd.eval("x", engine=engine, parser=parser) + tm.assert_numpy_array_equal(result, np.array([1.5])) + assert result.shape == (1,) + + x = np.array([False]) # noqa:F841 + result = pd.eval("x", engine=engine, parser=parser) + tm.assert_numpy_array_equal(result, np.array([False])) + assert result.shape == (1,) + + def test_line_continuation(self, engine, parser): + # GH 11149 + exp = """1 + 2 * \ + 5 - 1 + 2 """ + result = pd.eval(exp, engine=engine, parser=parser) + assert result == 12 + + def test_float_truncation(self, engine, parser): + # GH 14241 + exp = "1000000000.006" + result = pd.eval(exp, engine=engine, parser=parser) + expected = np.float64(exp) + assert result == expected + + df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + cutoff = 1000000000.0006 + result = df.query(f"A < {cutoff:.4f}") + assert result.empty + + cutoff = 1000000000.0010 + result = df.query(f"A > {cutoff:.4f}") + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = df.query(f"A == {exact:.4f}") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + def test_disallow_python_keywords(self): + # GH 18221 + df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) + msg = "Python keyword not valid identifier in numexpr query" + with pytest.raises(SyntaxError, match=msg): + df.query("class == 0") + + df = DataFrame() + df.index.name = "lambda" + with pytest.raises(SyntaxError, match=msg): + df.query("lambda == 0") + + def test_true_false_logic(self): + # GH 25823 + assert pd.eval("not True") == -2 + assert pd.eval("not False") == -1 + assert pd.eval("True and not True") == 0 + + def test_and_logic_string_match(self): + # GH 25823 + event = Series({"a": "hello"}) + assert pd.eval(f"{event.str.match('hello').a}") + assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}") + + +f = lambda *args, **kwargs: np.random.randn() + + +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python + + +class TestTypeCasting: + @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize("dt", [np.float32, np.float64]) + @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) + def test_binop_typecasting(self, engine, parser, op, dt, left_right): + df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt) + left, right = left_right + s = f"{left} {op} {right}" + res = pd.eval(s, engine=engine, parser=parser) + assert df.values.dtype == dt + assert res.values.dtype == dt + tm.assert_frame_equal(res, eval(s)) + + +# ------------------------------------- +# Basic and complex alignment + + +def should_warn(*args): + not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args)) + only_one_dt = reduce( + operator.xor, map(lambda x: issubclass(x.dtype.type, np.datetime64), args) + ) + return not_mono and only_one_dt + + +class TestAlignment: + + index_types = ["i", "s", "dt"] + lhs_index_types = index_types + ["s"] # 'p' + + def test_align_nested_unary_op(self, engine, parser): + s = "df * ~2" + df = tm.makeCustomDataframe(5, 3, data_gen_f=f) + res = pd.eval(s, engine=engine, parser=parser) + tm.assert_frame_equal(res, df * ~2) + + @pytest.mark.parametrize("lr_idx_type", lhs_index_types) + @pytest.mark.parametrize("rr_idx_type", index_types) + @pytest.mark.parametrize("c_idx_type", index_types) + def test_basic_frame_alignment( + self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type + ): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type + ) + df2 = tm.makeCustomDataframe( + 20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type + ) + # only warns if not monotonic and not sortable + if should_warn(df.index, df2.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2", engine=engine, parser=parser) + else: + res = pd.eval("df + df2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2) + + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + @pytest.mark.parametrize("c_idx_type", lhs_index_types) + def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + res = pd.eval("df < 2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < 2) + + df3 = DataFrame(np.random.randn(*df.shape), index=df.index, columns=df.columns) + res = pd.eval("df < df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < df3) + + @pytest.mark.parametrize("r1", lhs_index_types) + @pytest.mark.parametrize("c1", index_types) + @pytest.mark.parametrize("r2", index_types) + @pytest.mark.parametrize("c2", index_types) + def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2): + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + 4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + df3 = tm.makeCustomDataframe( + 5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + if should_warn(df.index, df2.index, df3.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + else: + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2 + df3) + + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize("c_idx_type", index_types) + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + def test_basic_frame_series_alignment( + self, engine, parser, index_name, r_idx_type, c_idx_type + ): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + s", engine=engine, parser=parser) + else: + res = pd.eval("df + s", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else df + s + else: + expected = df + s + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize( + "r_idx_type, c_idx_type", + list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")], + ) + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_basic_series_frame_alignment( + self, request, engine, parser, index_name, r_idx_type, c_idx_type + ): + if ( + engine == "numexpr" + and parser == "pandas" + and index_name == "index" + and r_idx_type == "i" + and c_idx_type == "s" + ): + reason = ( + f"Flaky column ordering when engine={engine}, " + f"parser={parser}, index_name={index_name}, " + f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}" + ) + request.node.add_marker(pytest.mark.xfail(reason=reason, strict=False)) + df = tm.makeCustomDataframe( + 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + if should_warn(s.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("s + df", engine=engine, parser=parser) + else: + res = pd.eval("s + df", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else s + df + else: + expected = s + df + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("c_idx_type", index_types) + @pytest.mark.parametrize("r_idx_type", lhs_index_types) + @pytest.mark.parametrize("index_name", ["index", "columns"]) + @pytest.mark.parametrize("op", ["+", "*"]) + def test_series_frame_commutativity( + self, engine, parser, index_name, op, r_idx_type, c_idx_type + ): + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + lhs = f"s {op} df" + rhs = f"df {op} s" + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + a = pd.eval(lhs, engine=engine, parser=parser) + with tm.assert_produces_warning(RuntimeWarning): + b = pd.eval(rhs, engine=engine, parser=parser) + else: + a = pd.eval(lhs, engine=engine, parser=parser) + b = pd.eval(rhs, engine=engine, parser=parser) + + if r_idx_type != "dt" and c_idx_type != "dt": + if engine == "numexpr": + tm.assert_frame_equal(a, b) + + @pytest.mark.parametrize("r1", lhs_index_types) + @pytest.mark.parametrize("c1", index_types) + @pytest.mark.parametrize("r2", index_types) + @pytest.mark.parametrize("c2", index_types) + def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): + n = 3 + m1 = 5 + m2 = 2 * m1 + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + index_name = random.choice(["index", "columns"]) + obj_name = random.choice(["df", "df2"]) + + df = tm.makeCustomDataframe( + m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + index = getattr(locals().get(obj_name), index_name) + ser = Series(np.random.randn(n), index[:n]) + + if r2 == "dt" or c2 == "dt": + if engine == "numexpr": + expected2 = df2.add(ser) + else: + expected2 = df2 + ser + else: + expected2 = df2 + ser + + if r1 == "dt" or c1 == "dt": + if engine == "numexpr": + expected = expected2.add(df) + else: + expected = expected2 + df + else: + expected = expected2 + df + + if should_warn(df2.index, ser.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df2 + ser + df", engine=engine, parser=parser) + else: + res = pd.eval("df2 + ser + df", engine=engine, parser=parser) + assert res.shape == expected.shape + tm.assert_frame_equal(res, expected) + + def test_performance_warning_for_poor_alignment(self, engine, parser): + df = DataFrame(np.random.randn(1000, 10)) + s = Series(np.random.randn(10000)) + if engine == "numexpr": + seen = PerformanceWarning + else: + seen = False + + with tm.assert_produces_warning(seen): + pd.eval("df + s", engine=engine, parser=parser) + + s = Series(np.random.randn(1000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(np.random.randn(10, 10000)) + s = Series(np.random.randn(10000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(np.random.randn(10, 10)) + s = Series(np.random.randn(10000)) + + is_python_engine = engine == "python" + + if not is_python_engine: + wrn = PerformanceWarning + else: + wrn = False + + with tm.assert_produces_warning(wrn) as w: + pd.eval("df + s", engine=engine, parser=parser) + + if not is_python_engine: + assert len(w) == 1 + msg = str(w[0].message) + logged = np.log10(s.size - df.shape[1]) + expected = ( + f"Alignment difference on axis 1 is larger " + f"than an order of magnitude on term 'df', " + f"by more than {logged:.4g}; performance may suffer." + ) + assert msg == expected + + +# ------------------------------------ +# Slightly more complex ops + + +class TestOperations: + def eval(self, *args, **kwargs): + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_simple_arith_ops(self, engine, parser): + exclude_arith = [] + if parser == "python": + exclude_arith = ["in", "not in"] + + arith_ops = [ + op + for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS + if op not in exclude_arith + ] + + ops = (op for op in arith_ops if op != "//") + + for op in ops: + ex = f"1 {op} 1" + ex2 = f"x {op} 1" + ex3 = f"1 {op} (x + 1)" + + if op in ("in", "not in"): + msg = "argument of type 'int' is not iterable" + with pytest.raises(TypeError, match=msg): + pd.eval(ex, engine=engine, parser=parser) + else: + expec = _eval_single_bin(1, op, 1, engine) + x = self.eval(ex, engine=engine, parser=parser) + assert x == expec + + expec = _eval_single_bin(x, op, 1, engine) + y = self.eval(ex2, local_dict={"x": x}, engine=engine, parser=parser) + assert y == expec + + expec = _eval_single_bin(1, op, x + 1, engine) + y = self.eval(ex3, local_dict={"x": x}, engine=engine, parser=parser) + assert y == expec + + @pytest.mark.parametrize("rhs", [True, False]) + @pytest.mark.parametrize("lhs", [True, False]) + @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) + def test_simple_bool_ops(self, rhs, lhs, op): + ex = f"{lhs} {op} {rhs}" + + if parser == "python" and op in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.eval(ex) + return + + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + @pytest.mark.parametrize("rhs", [True, False]) + @pytest.mark.parametrize("lhs", [True, False]) + @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) + def test_bool_ops_with_constants(self, rhs, lhs, op): + ex = f"{lhs} {op} {rhs}" + + if parser == "python" and op in ["and", "or"]: + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.eval(ex) + return + + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + def test_4d_ndarray_fails(self): + x = np.random.randn(3, 4, 5, 6) + y = Series(np.random.randn(10)) + msg = "N-dimensional objects, where N > 2, are not supported with eval" + with pytest.raises(NotImplementedError, match=msg): + self.eval("x + y", local_dict={"x": x, "y": y}) + + def test_constant(self): + x = self.eval("1") + assert x == 1 + + def test_single_variable(self): + df = DataFrame(np.random.randn(10, 2)) + df2 = self.eval("df", local_dict={"df": df}) + tm.assert_frame_equal(df, df2) + + def test_truediv(self): + s = np.array([1]) # noqa:F841 + ex = "s / 1" + + # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be + # removed in a future version. + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=False) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=True) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=True) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=True) + expec = 0.5 + assert res == expec + + def test_failing_subscript_with_name_error(self): + df = DataFrame(np.random.randn(5, 3)) # noqa:F841 + with pytest.raises(NameError, match="name 'x' is not defined"): + self.eval("df[x > 2] > 2") + + def test_lhs_expression_subscript(self): + df = DataFrame(np.random.randn(5, 3)) + result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) + expected = (df + 1)[df > 2] + tm.assert_frame_equal(result, expected) + + def test_attr_expression(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + expr1 = "df.a < df.b" + expec1 = df.a < df.b + expr2 = "df.a + df.b + df.c" + expec2 = df.a + df.b + df.c + expr3 = "df.a + df.b + df.c[df.b < 0]" + expec3 = df.a + df.b + df.c[df.b < 0] + exprs = expr1, expr2, expr3 + expecs = expec1, expec2, expec3 + for e, expec in zip(exprs, expecs): + tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) + + def test_assignment_fails(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + df2 = DataFrame(np.random.randn(5, 3)) + expr1 = "df = df2" + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): + self.eval(expr1, local_dict={"df": df, "df2": df2}) + + def test_assignment_column_multiple_raise(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # multiple assignees + with pytest.raises(SyntaxError, match="invalid syntax"): + df.eval("d c = a + b") + + def test_assignment_column_invalid_assign(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # invalid assignees + msg = "left hand side of an assignment must be a single name" + with pytest.raises(SyntaxError, match=msg): + df.eval("d,c = a + b") + + def test_assignment_column_invalid_assign_function_call(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + msg = "cannot assign to function call" + with pytest.raises(SyntaxError, match=msg): + df.eval('Timestamp("20131001") = a + b') + + def test_assignment_single_assign_existing(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # single assignment - existing variable + expected = df.copy() + expected["a"] = expected["a"] + expected["b"] + df.eval("a = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + def test_assignment_single_assign_new(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # single assignment - new variable + expected = df.copy() + expected["c"] = expected["a"] + expected["b"] + df.eval("c = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + def test_assignment_single_assign_local_overlap(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = df.copy() + a = 1 # noqa:F841 + df.eval("a = 1 + b", inplace=True) + + expected = df.copy() + expected["a"] = 1 + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_assignment_single_assign_name(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + + a = 1 # noqa:F841 + old_a = df.a.copy() + df.eval("a = a + b", inplace=True) + result = old_a + df.b + tm.assert_series_equal(result, df.a, check_names=False) + assert result.name is None + + def test_assignment_multiple_raises(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # multiple assignment + df.eval("c = a + b", inplace=True) + msg = "can only assign a single expression" + with pytest.raises(SyntaxError, match=msg): + df.eval("c = a = b") + + def test_assignment_explicit(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + # explicit targets + self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) + expected = df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_column_in(self): + # GH 11235 + df = DataFrame({"a": [11], "b": [-32]}) + result = df.eval("a in [11, -32]") + expected = Series([True]) + # TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI + # but cannot reproduce locally + tm.assert_series_equal(result, expected, check_names=False) + + @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") + def test_assignment_not_inplace(self): + # see gh-9297 + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + + actual = df.eval("c = a + b", inplace=False) + assert actual is not None + + expected = df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_multi_line_expression(self): + # GH 11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + answer = df.eval( + """ + c = a + b + d = c + b""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + answer = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + # multi-line not valid if not all assignments + msg = "Multi-line expressions are only valid if all expressions contain" + with pytest.raises(ValueError, match=msg): + df.eval( + """ + a = b + 2 + b - 2""", + inplace=False, + ) + + def test_multi_line_expression_not_inplace(self): + # GH 11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + df = df.eval( + """ + c = a + b + d = c + b""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + df = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + def test_multi_line_expression_local_variable(self): + # GH 15342 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + local_var = 7 + expected["c"] = expected["a"] * local_var + expected["d"] = expected["c"] + local_var + answer = df.eval( + """ + c = a * @local_var + d = c + @local_var + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_multi_line_expression_callable_local_variable(self): + # 26426 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(1, 7) + expected["d"] = expected["c"] + local_func(1, 7) + answer = df.eval( + """ + c = a * @local_func(1, 7) + d = c + @local_func(1, 7) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_multi_line_expression_callable_local_variable_with_kwargs(self): + # 26426 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(b=7, a=1) + expected["d"] = expected["c"] + local_func(b=7, a=1) + answer = df.eval( + """ + c = a * @local_func(b=7, a=1) + d = c + @local_func(b=7, a=1) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert answer is None + + def test_assignment_in_query(self): + # GH 8664 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): + df.query("a = 1") + tm.assert_frame_equal(df, df_orig) + + def test_query_inplace(self): + # see gh-11149 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + expected = expected[expected["a"] == 2] + df.query("a == 2", inplace=True) + tm.assert_frame_equal(expected, df) + + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)]) + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=target, inplace=True) + + def test_basic_period_index_boolean_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + + e = df < 2 + r = self.eval("df < 2", local_dict={"df": df}) + x = df < 2 + + tm.assert_frame_equal(r, e) + tm.assert_frame_equal(x, e) + + def test_basic_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df < 2 + 3]", local_dict={"df": df}) + e = df[df < 2 + 3] + tm.assert_frame_equal(r, e) + + def test_nested_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df}) + e = df[df[df < 2] < 2] + df * 2 + tm.assert_frame_equal(r, e) + + def test_date_boolean(self, engine, parser): + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + res = self.eval( + "df.dates1 < 20130101", + local_dict={"df": df}, + engine=engine, + parser=parser, + ) + expec = df.dates1 < "20130101" + tm.assert_series_equal(res, expec, check_names=False) + + def test_simple_in_ops(self, engine, parser): + if parser != "python": + res = pd.eval("1 in [1, 2]", engine=engine, parser=parser) + assert res + + res = pd.eval("2 in (1, 2)", engine=engine, parser=parser) + assert res + + res = pd.eval("3 in (1, 2)", engine=engine, parser=parser) + assert not res + + res = pd.eval("3 not in (1, 2)", engine=engine, parser=parser) + assert res + + res = pd.eval("[3] not in (1, 2)", engine=engine, parser=parser) + assert res + + res = pd.eval("[3] in ([3], 2)", engine=engine, parser=parser) + assert res + + res = pd.eval("[[3]] in [[[3]], 2]", engine=engine, parser=parser) + assert res + + res = pd.eval("(3,) in [(3,), 2]", engine=engine, parser=parser) + assert res + + res = pd.eval("(3,) not in [(3,), 2]", engine=engine, parser=parser) + assert not res + + res = pd.eval("[(3,)] in [[(3,)], 2]", engine=engine, parser=parser) + assert res + else: + msg = "'In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval("1 in [1, 2]", engine=engine, parser=parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("2 in (1, 2)", engine=engine, parser=parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("3 in (1, 2)", engine=engine, parser=parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("[(3,)] in (1, 2, [(3,)])", engine=engine, parser=parser) + msg = "'NotIn' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval("3 not in (1, 2)", engine=engine, parser=parser) + with pytest.raises(NotImplementedError, match=msg): + pd.eval("[3] not in (1, 2, [[3]])", engine=engine, parser=parser) + + def test_check_many_exprs(self, engine, parser): + a = 1 # noqa:F841 + expr = " * ".join("a" * 33) + expected = 1 + res = pd.eval(expr, engine=engine, parser=parser) + assert res == expected + + @pytest.mark.parametrize( + "expr", + [ + "df > 2 and df > 3", + "df > 2 or df > 3", + "not df > 2", + ], + ) + def test_fails_and_or_not(self, expr, engine, parser): + df = DataFrame(np.random.randn(5, 3)) + if parser == "python": + msg = "'BoolOp' nodes are not implemented" + if "not" in expr: + msg = "'Not' nodes are not implemented" + + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + expr, + local_dict={"df": df}, + parser=parser, + engine=engine, + ) + else: + # smoke-test, should not raise + pd.eval( + expr, + local_dict={"df": df}, + parser=parser, + engine=engine, + ) + + @pytest.mark.parametrize("char", ["|", "&"]) + def test_fails_ampersand_pipe(self, char, engine, parser): + df = DataFrame(np.random.randn(5, 3)) # noqa:F841 + ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" + if parser == "python": + msg = "cannot evaluate scalar only bool ops" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, parser=parser, engine=engine) + else: + # smoke-test, should not raise + pd.eval(ex, parser=parser, engine=engine) + + +class TestMath: + def eval(self, *args, **kwargs): + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_unary_functions(self, unary_fns_for_ne): + df = DataFrame({"a": np.random.randn(10)}) + a = df.a + + fn = unary_fns_for_ne + + expr = f"{fn}(a)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a) + tm.assert_series_equal(got, expect, check_names=False) + + @pytest.mark.parametrize("fn", _binary_math_ops) + def test_binary_functions(self, fn): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + a = df.a + b = df.b + + expr = f"{fn}(a, b)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a, b) + tm.assert_almost_equal(got, expect, check_names=False) + + def test_df_use_case(self, engine, parser): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval( + "e = arctan2(sin(a), b)", + engine=engine, + parser=parser, + inplace=True, + ) + got = df.e + expect = np.arctan2(np.sin(df.a), df.b) + tm.assert_series_equal(got, expect, check_names=False) + + def test_df_arithmetic_subexpression(self, engine, parser): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True) + got = df.e + expect = np.sin(df.a + df.b) + tm.assert_series_equal(got, expect, check_names=False) + + @pytest.mark.parametrize( + "dtype, expect_dtype", + [ + (np.int32, np.float64), + (np.int64, np.float64), + (np.float32, np.float32), + (np.float64, np.float64), + pytest.param(np.complex128, np.complex128, marks=td.skip_if_windows), + ], + ) + def test_result_types(self, dtype, expect_dtype, engine, parser): + # xref https://github.com/pandas-dev/pandas/issues/12293 + # this fails on Windows, apparently a floating point precision issue + + # Did not test complex64 because DataFrame is converting it to + # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 + df = DataFrame({"a": np.random.randn(10).astype(dtype)}) + assert df.a.dtype == dtype + df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True) + got = df.b + expect = np.sin(df.a) + assert expect.dtype == got.dtype + assert expect_dtype == got.dtype + tm.assert_series_equal(got, expect, check_names=False) + + def test_undefined_func(self, engine, parser): + df = DataFrame({"a": np.random.randn(10)}) + msg = '"mysin" is not a supported function' + + with pytest.raises(ValueError, match=msg): + df.eval("mysin(a)", engine=engine, parser=parser) + + def test_keyword_arg(self, engine, parser): + df = DataFrame({"a": np.random.randn(10)}) + msg = 'Function "sin" does not support keyword arguments' + + with pytest.raises(TypeError, match=msg): + df.eval("sin(x=a)", engine=engine, parser=parser) + + +_var_s = np.random.randn(10) + + +class TestScope: + def test_global_scope(self, engine, parser): + e = "_var_s * 2" + tm.assert_numpy_array_equal( + _var_s * 2, pd.eval(e, engine=engine, parser=parser) + ) + + def test_no_new_locals(self, engine, parser): + x = 1 + lcls = locals().copy() + pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser) + lcls2 = locals().copy() + lcls2.pop("lcls") + assert lcls == lcls2 + + def test_no_new_globals(self, engine, parser): + x = 1 # noqa:F841 + gbls = globals().copy() + pd.eval("x + 1", engine=engine, parser=parser) + gbls2 = globals().copy() + assert gbls == gbls2 + + def test_empty_locals(self, engine, parser): + # GH 47084 + x = 1 # noqa: F841 + msg = "name 'x' is not defined" + with pytest.raises(UndefinedVariableError, match=msg): + pd.eval("x + 1", engine=engine, parser=parser, local_dict={}) + + def test_empty_globals(self, engine, parser): + # GH 47084 + msg = "name '_var_s' is not defined" + e = "_var_s * 2" + with pytest.raises(UndefinedVariableError, match=msg): + pd.eval(e, engine=engine, parser=parser, global_dict={}) + + +@td.skip_if_no_ne +def test_invalid_engine(): + msg = "Invalid engine 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf") + + +@td.skip_if_no_ne +@pytest.mark.parametrize( + ("use_numexpr", "expected"), + ( + (True, "numexpr"), + (False, "python"), + ), +) +def test_numexpr_option_respected(use_numexpr, expected): + # GH 32556 + from pandas.core.computation.eval import _check_engine + + with pd.option_context("compute.use_numexpr", use_numexpr): + result = _check_engine(None) + assert result == expected + + +@td.skip_if_no_ne +def test_numexpr_option_incompatible_op(): + # GH 32556 + with pd.option_context("compute.use_numexpr", False): + df = DataFrame( + {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]} + ) + result = df.query("A.isnull()") + expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_ne +def test_invalid_parser(): + msg = "Invalid parser 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf") + + +_parsers: dict[str, type[BaseExprVisitor]] = { + "python": PythonExprVisitor, + "pytables": pytables.PyTablesExprVisitor, + "pandas": PandasExprVisitor, +} + + +@pytest.mark.parametrize("engine", ENGINES) +@pytest.mark.parametrize("parser", _parsers) +def test_disallowed_nodes(engine, parser): + VisitorClass = _parsers[parser] + inst = VisitorClass("x + 1", engine, parser) + + for ops in VisitorClass.unsupported_nodes: + + msg = "nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + getattr(inst, ops)() + + +def test_syntax_error_exprs(engine, parser): + e = "s +" + with pytest.raises(SyntaxError, match="invalid syntax"): + pd.eval(e, engine=engine, parser=parser) + + +def test_name_error_exprs(engine, parser): + e = "s + t" + msg = "name 's' is not defined" + with pytest.raises(NameError, match=msg): + pd.eval(e, engine=engine, parser=parser) + + +@pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"]) +def test_invalid_local_variable_reference(engine, parser, express): + a, b = 1, 2 # noqa:F841 + + if parser != "pandas": + with pytest.raises(SyntaxError, match="The '@' prefix is only"): + pd.eval(express, engine=engine, parser=parser) + else: + with pytest.raises(SyntaxError, match="The '@' prefix is not"): + pd.eval(express, engine=engine, parser=parser) + + +def test_numexpr_builtin_raises(engine, parser): + sin, dotted_line = 1, 2 + if engine == "numexpr": + msg = "Variables in expression .+" + with pytest.raises(NumExprClobberingError, match=msg): + pd.eval("sin + dotted_line", engine=engine, parser=parser) + else: + res = pd.eval("sin + dotted_line", engine=engine, parser=parser) + assert res == sin + dotted_line + + +def test_bad_resolver_raises(engine, parser): + cannot_resolve = 42, 3.0 + with pytest.raises(TypeError, match="Resolver of type .+"): + pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser) + + +def test_empty_string_raises(engine, parser): + # GH 13139 + with pytest.raises(ValueError, match="expr cannot be an empty string"): + pd.eval("", engine=engine, parser=parser) + + +def test_more_than_one_expression_raises(engine, parser): + with pytest.raises(SyntaxError, match="only a single expression is allowed"): + pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser) + + +@pytest.mark.parametrize("cmp", ("and", "or")) +@pytest.mark.parametrize("lhs", (int, float)) +@pytest.mark.parametrize("rhs", (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + + mid = gen[lhs]() # noqa:F841 + lhs = gen[lhs]() + rhs = gen[rhs]() + + ex1 = f"lhs {cmp} mid {cmp} rhs" + ex2 = f"lhs {cmp} mid and mid {cmp} rhs" + ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)" + for ex in (ex1, ex2, ex3): + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): + pd.eval(ex, engine=engine, parser=parser) + + +@pytest.mark.parametrize( + "other", + [ + "'x'", + "...", + ], +) +def test_equals_various(other): + df = DataFrame({"A": ["a", "b", "c"]}) + result = df.eval(f"A == {other}") + expected = Series([False, False, False], name="A") + if USE_NUMEXPR: + # https://github.com/pandas-dev/pandas/issues/10239 + # lose name with numexpr engine. Remove when that's fixed. + expected.name = None + tm.assert_series_equal(result, expected) + + +def test_inf(engine, parser): + s = "inf + 1" + expected = np.inf + result = pd.eval(s, engine=engine, parser=parser) + assert result == expected + + +def test_truediv_deprecated(engine, parser): + # GH#29182 + match = "The `truediv` parameter in pd.eval is deprecated" + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=True) + + assert len(m) == 1 + assert match in str(m[0].message) + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=False) + + assert len(m) == 1 + assert match in str(m[0].message) + + +@pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"]) +def test_query_token(engine, column): + # See: https://github.com/pandas-dev/pandas/pull/42826 + df = DataFrame(np.random.randn(5, 2), columns=[column, "b"]) + expected = df[df[column] > 5] + query_string = f"`{column}` > 5" + result = df.query(query_string, engine=engine) + tm.assert_frame_equal(result, expected) + + +def test_negate_lt_eq_le(engine, parser): + df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) + expected = df[~(df.cat > 0)] + + result = df.query("~(cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + if parser == "python": + msg = "'Not' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("not (cat > 0)", engine=engine, parser=parser) + else: + result = df.query("not (cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "column", + DEFAULT_GLOBALS.keys(), +) +def test_eval_no_support_column_name(request, column): + # GH 44603 + if column in ["True", "False", "inf", "Inf"]: + request.node.add_marker( + pytest.mark.xfail( + raises=KeyError, + reason=f"GH 47859 DataFrame eval not supported with {column}", + ) + ) + + df = DataFrame(np.random.randint(0, 100, size=(10, 2)), columns=[column, "col1"]) + expected = df[df[column] > 6] + result = df.query(f"{column}>6") + + tm.assert_frame_equal(result, expected) + + +@td.skip_array_manager_not_yet_implemented +def test_set_inplace(using_copy_on_write): + # https://github.com/pandas-dev/pandas/issues/47449 + # Ensure we don't only update the DataFrame inplace, but also the actual + # column values, such that references to this column also get updated + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result_view = df[:] + ser = df["A"] + df.eval("A = B + C", inplace=True) + expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) + tm.assert_frame_equal(df, expected) + if not using_copy_on_write: + tm.assert_series_equal(ser, expected["A"]) + tm.assert_series_equal(result_view["A"], expected["A"]) + else: + expected = Series([1, 2, 3], name="A") + tm.assert_series_equal(ser, expected) + tm.assert_series_equal(result_view["A"], expected) + + +class TestValidate: + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): + pd.eval("2+2", inplace=value) diff --git a/pandas/tests/config/__init__.py b/pandas/tests/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py new file mode 100644 index 00000000..cc394bbb --- /dev/null +++ b/pandas/tests/config/test_config.py @@ -0,0 +1,477 @@ +import warnings + +import pytest + +from pandas._config import config as cf +from pandas._config.config import OptionError + +import pandas as pd + + +class TestConfig: + @classmethod + def setup_class(cls): + from copy import deepcopy + + cls.cf = cf + cls.gc = deepcopy(getattr(cls.cf, "_global_config")) + cls.do = deepcopy(getattr(cls.cf, "_deprecated_options")) + cls.ro = deepcopy(getattr(cls.cf, "_registered_options")) + + def setup_method(self): + setattr(self.cf, "_global_config", {}) + setattr(self.cf, "options", self.cf.DictWrapper(self.cf._global_config)) + setattr(self.cf, "_deprecated_options", {}) + setattr(self.cf, "_registered_options", {}) + + # Our test fixture in conftest.py sets "chained_assignment" + # to "raise" only after all test methods have been setup. + # However, after this setup, there is no longer any + # "chained_assignment" option, so re-register it. + self.cf.register_option("chained_assignment", "raise") + + def teardown_method(self): + setattr(self.cf, "_global_config", self.gc) + setattr(self.cf, "_deprecated_options", self.do) + setattr(self.cf, "_registered_options", self.ro) + + def test_api(self): + + # the pandas object exposes the user API + assert hasattr(pd, "get_option") + assert hasattr(pd, "set_option") + assert hasattr(pd, "reset_option") + assert hasattr(pd, "describe_option") + + def test_is_one_of_factory(self): + v = self.cf.is_one_of_factory([None, 12]) + + v(12) + v(None) + msg = r"Value must be one of None\|12" + with pytest.raises(ValueError, match=msg): + v(1.1) + + def test_register_option(self): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Option 'a' has already been registered" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Path prefix to option 'a' is already an option" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d1", 1, "doc") + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d2", 1, "doc") + + # no python keywords + msg = "for is a python keyword" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("for", 0) + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.for.b", 0) + # must be valid identifier (ensure attribute access works) + msg = "oh my goddess! is not a valid identifier" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("Oh my Goddess!", 0) + + # we can register options several levels deep + # without predefining the intermediate steps + # and we can define differently named options + # in the same namespace + self.cf.register_option("k.b.c.d1", 1, "doc") + self.cf.register_option("k.b.c.d2", 1, "doc") + + def test_describe_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b", 1, "doc2") + self.cf.deprecate_option("b") + + self.cf.register_option("c.d.e1", 1, "doc3") + self.cf.register_option("c.d.e2", 1, "doc4") + self.cf.register_option("f", 1) + self.cf.register_option("g.h", 1) + self.cf.register_option("k", 2) + self.cf.deprecate_option("g.h", rkey="k") + self.cf.register_option("l", "foo") + + # non-existent keys raise KeyError + msg = r"No such keys\(s\)" + with pytest.raises(OptionError, match=msg): + self.cf.describe_option("no.such.key") + + # we can get the description for any key we registered + assert "doc" in self.cf.describe_option("a", _print_desc=False) + assert "doc2" in self.cf.describe_option("b", _print_desc=False) + assert "precated" in self.cf.describe_option("b", _print_desc=False) + assert "doc3" in self.cf.describe_option("c.d.e1", _print_desc=False) + assert "doc4" in self.cf.describe_option("c.d.e2", _print_desc=False) + + # if no doc is specified we get a default message + # saying "description not available" + assert "available" in self.cf.describe_option("f", _print_desc=False) + assert "available" in self.cf.describe_option("g.h", _print_desc=False) + assert "precated" in self.cf.describe_option("g.h", _print_desc=False) + assert "k" in self.cf.describe_option("g.h", _print_desc=False) + + # default is reported + assert "foo" in self.cf.describe_option("l", _print_desc=False) + # current value is reported + assert "bar" not in self.cf.describe_option("l", _print_desc=False) + self.cf.set_option("l", "bar") + assert "bar" in self.cf.describe_option("l", _print_desc=False) + + def test_case_insensitive(self): + self.cf.register_option("KanBAN", 1, "doc") + + assert "doc" in self.cf.describe_option("kanbaN", _print_desc=False) + assert self.cf.get_option("kanBaN") == 1 + self.cf.set_option("KanBan", 2) + assert self.cf.get_option("kAnBaN") == 2 + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + self.cf.deprecate_option("KanBan") + + assert self.cf._is_deprecated("kAnBaN") + + def test_get_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + # gets of existing keys succeed + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + + def test_set_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + self.cf.set_option("b.b", 1.1) + + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + assert self.cf.get_option("b.b") == 1.1 + + msg = r"No such keys\(s\): 'no.such.key'" + with pytest.raises(OptionError, match=msg): + self.cf.set_option("no.such.key", None) + + def test_set_option_empty_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option() + + def test_set_option_uneven_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a.b", 2, "b.c") + + def test_set_option_invalid_single_argument_type(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option(2) + + def test_set_option_multiple(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", "2", "b.c", None, "b.b", 10.0) + + assert self.cf.get_option("a") == "2" + assert self.cf.get_option("b.c") is None + assert self.cf.get_option("b.b") == 10.0 + + def test_validation(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("d", 1, "doc", validator=self.cf.is_nonnegative_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_text) + + msg = "Value must have type ''" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.b.c.d2", "NO", "doc", validator=self.cf.is_int) + + self.cf.set_option("a", 2) # int is_int + self.cf.set_option("b.c", "wurld") # str is_str + self.cf.set_option("d", 2) + self.cf.set_option("d", None) # non-negative int can be None + + # None not is_int + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", None) + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", "ab") + + msg = "Value must be a nonnegative integer or None" + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", "NO", "doc", validator=self.cf.is_nonnegative_int + ) + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", -2, "doc", validator=self.cf.is_nonnegative_int + ) + + msg = r"Value must be an instance of \|" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("b.c", 1) + + validator = self.cf.is_one_of_factory([None, self.cf.is_callable]) + self.cf.register_option("b", lambda: None, "doc", validator=validator) + self.cf.set_option("b", "%.1f".format) # Formatter is callable + self.cf.set_option("b", None) # Formatter is none (default) + with pytest.raises(ValueError, match="Value must be a callable"): + self.cf.set_option("b", "%.1f") + + def test_reset_option(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("a") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "wurld" + self.cf.reset_option("b.c") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_reset_option_all(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("all") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_deprecate_option(self): + # we can deprecate non-existent options + self.cf.deprecate_option("foo") + + assert self.cf._is_deprecated("foo") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with pytest.raises(KeyError, match="No such keys.s.: 'foo'"): + self.cf.get_option("foo") + assert len(w) == 1 # should have raised one warning + assert "deprecated" in str(w[-1]) # we get the default message + + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("foo", "hullo", "doc2") + + self.cf.deprecate_option("a", removal_ver="nifty_ver") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("a") + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the default message + assert "nifty_ver" in str(w[-1]) # with the removal_ver quoted + + msg = "Option 'a' has already been defined as deprecated" + with pytest.raises(OptionError, match=msg): + self.cf.deprecate_option("a") + + self.cf.deprecate_option("b.c", "zounds!") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("b.c") + + assert len(w) == 1 # should have raised one warning + assert "zounds!" in str(w[-1]) # we get the custom message + + # test rerouting keys + self.cf.register_option("d.a", "foo", "doc2") + self.cf.register_option("d.dep", "bar", "doc2") + assert self.cf.get_option("d.a") == "foo" + assert self.cf.get_option("d.dep") == "bar" + + self.cf.deprecate_option("d.dep", rkey="d.a") # reroute d.dep to d.a + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "foo" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.set_option("d.dep", "baz") # should overwrite "d.a" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "baz" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + def test_config_prefix(self): + with self.cf.config_prefix("base"): + self.cf.register_option("a", 1, "doc1") + self.cf.register_option("b", 2, "doc2") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + self.cf.set_option("a", 3) + self.cf.set_option("b", 4) + assert self.cf.get_option("a") == 3 + assert self.cf.get_option("b") == 4 + + assert self.cf.get_option("base.a") == 3 + assert self.cf.get_option("base.b") == 4 + assert "doc1" in self.cf.describe_option("base.a", _print_desc=False) + assert "doc2" in self.cf.describe_option("base.b", _print_desc=False) + + self.cf.reset_option("base.a") + self.cf.reset_option("base.b") + + with self.cf.config_prefix("base"): + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + def test_callback(self): + k = [None] + v = [None] + + def callback(key): + k.append(key) + v.append(self.cf.get_option(key)) + + self.cf.register_option("d.a", "foo", cb=callback) + self.cf.register_option("d.b", "foo", cb=callback) + + del k[-1], v[-1] + self.cf.set_option("d.a", "fooz") + assert k[-1] == "d.a" + assert v[-1] == "fooz" + + del k[-1], v[-1] + self.cf.set_option("d.b", "boo") + assert k[-1] == "d.b" + assert v[-1] == "boo" + + del k[-1], v[-1] + self.cf.reset_option("d.b") + assert k[-1] == "d.b" + + def test_set_ContextManager(self): + def eq(val): + assert self.cf.get_option("a") == val + + self.cf.register_option("a", 0) + eq(0) + with self.cf.option_context("a", 15): + eq(15) + with self.cf.option_context("a", 25): + eq(25) + eq(15) + eq(0) + + self.cf.set_option("a", 17) + eq(17) + + # Test that option_context can be used as a decorator too (#34253). + @self.cf.option_context("a", 123) + def f(): + eq(123) + + f() + + def test_attribute_access(self): + holder = [] + + def f3(key): + holder.append(True) + + self.cf.register_option("a", 0) + self.cf.register_option("c", 0, cb=f3) + options = self.cf.options + + assert options.a == 0 + with self.cf.option_context("a", 15): + assert options.a == 15 + + options.a = 500 + assert self.cf.get_option("a") == 500 + + self.cf.reset_option("a") + assert options.a == self.cf.get_option("a", 0) + + msg = "You can only set the value of existing options" + with pytest.raises(OptionError, match=msg): + options.b = 1 + with pytest.raises(OptionError, match=msg): + options.display = 1 + + # make sure callback kicks when using this form of setting + options.c = 1 + assert len(holder) == 1 + + def test_option_context_scope(self): + # Ensure that creating a context does not affect the existing + # environment as it is supposed to be used with the `with` statement. + # See https://github.com/pandas-dev/pandas/issues/8514 + + original_value = 60 + context_value = 10 + option_name = "a" + + self.cf.register_option(option_name, original_value) + + # Ensure creating contexts didn't affect the current context. + ctx = self.cf.option_context(option_name, context_value) + assert self.cf.get_option(option_name) == original_value + + # Ensure the correct value is available inside the context. + with ctx: + assert self.cf.get_option(option_name) == context_value + + # Ensure the current context is reset + assert self.cf.get_option(option_name) == original_value + + def test_dictwrapper_getattr(self): + options = self.cf.options + # GH 19789 + with pytest.raises(OptionError, match="No such option"): + options.bananas + assert not hasattr(options, "bananas") diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py new file mode 100644 index 00000000..f972a9ee --- /dev/null +++ b/pandas/tests/config/test_localization.py @@ -0,0 +1,142 @@ +import codecs +import locale +import os + +import pytest + +from pandas._config.localization import ( + can_set_locale, + get_locales, + set_locale, +) + +import pandas as pd + +_all_locales = get_locales() or [] +_current_locale = locale.setlocale(locale.LC_ALL) # getlocale() is wrong, see GH#46595 + +# Don't run any of these tests if we have no locales. +pytestmark = pytest.mark.skipif(not _all_locales, reason="Need locales") + +_skip_if_only_one_locale = pytest.mark.skipif( + len(_all_locales) <= 1, reason="Need multiple locales for meaningful test" +) + + +def _get_current_locale(lc_var: int = locale.LC_ALL) -> str: + # getlocale is not always compliant with setlocale, use setlocale. GH#46595 + return locale.setlocale(lc_var) + + +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_current_locale(lc_var): + # Can set the current locale + before_locale = _get_current_locale(lc_var) + assert can_set_locale(before_locale, lc_var=lc_var) + after_locale = _get_current_locale(lc_var) + assert before_locale == after_locale + + +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_valid_set(lc_var): + # Can set the default locale. + before_locale = _get_current_locale(lc_var) + assert can_set_locale("", lc_var=lc_var) + after_locale = _get_current_locale(lc_var) + assert before_locale == after_locale + + +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_invalid_set(lc_var): + # Cannot set an invalid locale. + before_locale = _get_current_locale(lc_var) + assert not can_set_locale("non-existent_locale", lc_var=lc_var) + after_locale = _get_current_locale(lc_var) + assert before_locale == after_locale + + +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], +) +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_no_leak(lang, enc, lc_var): + # Test that can_set_locale does not leak even when returning False. See GH#46595 + before_locale = _get_current_locale(lc_var) + can_set_locale((lang, enc), locale.LC_ALL) + after_locale = _get_current_locale(lc_var) + assert before_locale == after_locale + + +def test_can_set_locale_invalid_get(monkeypatch): + # see GH#22129 + # In some cases, an invalid locale can be set, + # but a subsequent getlocale() raises a ValueError. + + def mock_get_locale(): + raise ValueError() + + with monkeypatch.context() as m: + m.setattr(locale, "getlocale", mock_get_locale) + assert not can_set_locale("") + + +def test_get_locales_at_least_one(): + # see GH#9744 + assert len(_all_locales) > 0 + + +@_skip_if_only_one_locale +def test_get_locales_prefix(): + first_locale = _all_locales[0] + assert len(get_locales(prefix=first_locale[:2])) > 0 + + +@_skip_if_only_one_locale +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], +) +def test_set_locale(lang, enc): + before_locale = _get_current_locale() + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not can_set_locale(new_locale): + msg = "unsupported locale setting" + + with pytest.raises(locale.Error, match=msg): + with set_locale(new_locale): + pass + else: + with set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split(".") + new_enc = codecs.lookup(enc).name + + normalized_locale = new_lang, new_enc + assert normalized_locale == new_locale + + # Once we exit the "with" statement, locale should be back to what it was. + after_locale = _get_current_locale() + assert before_locale == after_locale + + +def test_encoding_detected(): + system_locale = os.environ.get("LC_ALL") + system_encoding = system_locale.split(".")[-1] if system_locale else "utf-8" + + assert ( + codecs.lookup(pd.options.display.encoding).name + == codecs.lookup(system_encoding).name + ) diff --git a/pandas/tests/construction/__init__.py b/pandas/tests/construction/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/construction/test_extract_array.py b/pandas/tests/construction/test_extract_array.py new file mode 100644 index 00000000..4dd3eda8 --- /dev/null +++ b/pandas/tests/construction/test_extract_array.py @@ -0,0 +1,18 @@ +from pandas import Index +import pandas._testing as tm +from pandas.core.construction import extract_array + + +def test_extract_array_rangeindex(): + ri = Index(range(5)) + + expected = ri._values + res = extract_array(ri, extract_numpy=True, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + res = extract_array(ri, extract_numpy=False, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + + res = extract_array(ri, extract_numpy=True, extract_range=False) + tm.assert_index_equal(res, ri) + res = extract_array(ri, extract_numpy=False, extract_range=False) + tm.assert_index_equal(res, ri) diff --git a/pandas/tests/copy_view/__init__.py b/pandas/tests/copy_view/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py new file mode 100644 index 00000000..444c6ff2 --- /dev/null +++ b/pandas/tests/copy_view/test_indexing.py @@ -0,0 +1,802 @@ +import numpy as np +import pytest + +from pandas.errors import SettingWithCopyWarning + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + +# ----------------------------------------------------------------------------- +# Indexing operations taking subset + modifying the subset/parent + + +def test_subset_column_selection(using_copy_on_write): + # Case: taking a subset of the columns of a DataFrame + # + afterwards modifying the subset + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + subset = df[["a", "c"]] + + if using_copy_on_write: + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but uses CoW when being modified + subset.iloc[0, 0] = 0 + else: + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # INFO this no longer raise warning since pandas 1.4 + # with pd.option_context("chained_assignment", "warn"): + # with tm.assert_produces_warning(SettingWithCopyWarning): + subset.iloc[0, 0] = 0 + + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + + expected = DataFrame({"a": [0, 2, 3], "c": [0.1, 0.2, 0.3]}) + tm.assert_frame_equal(subset, expected) + tm.assert_frame_equal(df, df_orig) + + +def test_subset_column_selection_modify_parent(using_copy_on_write): + # Case: taking a subset of the columns of a DataFrame + # + afterwards modifying the parent + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + + subset = df[["a", "c"]] + if using_copy_on_write: + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but parent uses CoW parent when it is modified + df.iloc[0, 0] = 0 + + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + if using_copy_on_write: + # different column/block still shares memory + assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) + + expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]}) + tm.assert_frame_equal(subset, expected) + + +def test_subset_row_slice(using_copy_on_write): + # Case: taking a subset of the rows of a DataFrame using a slice + # + afterwards modifying the subset + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + subset = df[1:3] + subset._mgr._verify_integrity() + + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + + if using_copy_on_write: + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + + else: + # INFO this no longer raise warning since pandas 1.4 + # with pd.option_context("chained_assignment", "warn"): + # with tm.assert_produces_warning(SettingWithCopyWarning): + subset.iloc[0, 0] = 0 + + subset._mgr._verify_integrity() + + expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)) + tm.assert_frame_equal(subset, expected) + if using_copy_on_write: + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) + else: + # original parent dataframe is actually updated + df_orig.iloc[1, 0] = 0 + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_column_slice(using_copy_on_write, using_array_manager, dtype): + # Case: taking a subset of the columns of a DataFrame using a slice + # + afterwards modifying the subset + single_block = (dtype == "int64") and not using_array_manager + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + subset = df.iloc[:, 1:] + subset._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) + + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) + + else: + # we only get a warning in case of a single block + warn = SettingWithCopyWarning if single_block else None + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(warn): + subset.iloc[0, 0] = 0 + + expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}) + tm.assert_frame_equal(subset, expected) + # original parent dataframe is not modified (also not for BlockManager case, + # except for single block) + if not using_copy_on_write and (using_array_manager or single_block): + df_orig.iloc[0, 1] = 0 + tm.assert_frame_equal(df, df_orig) + else: + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +@pytest.mark.parametrize( + "row_indexer", + [slice(1, 2), np.array([False, True, True]), np.array([1, 2])], + ids=["slice", "mask", "array"], +) +@pytest.mark.parametrize( + "column_indexer", + [slice("b", "c"), np.array([False, True, True]), ["b", "c"]], + ids=["slice", "mask", "array"], +) +def test_subset_loc_rows_columns( + dtype, row_indexer, column_indexer, using_array_manager, using_copy_on_write +): + # Case: taking a subset of the rows+columns of a DataFrame using .loc + # + afterwards modifying the subset + # Generic test for several combinations of row/column indexers, not all + # of those could actually return a view / need CoW (so this test is not + # checking memory sharing, only ensuring subsequent mutation doesn't + # affect the parent dataframe) + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + subset = df.loc[row_indexer, column_indexer] + + # modifying the subset never modifies the parent + subset.iloc[0, 0] = 0 + + expected = DataFrame( + {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) + ) + tm.assert_frame_equal(subset, expected) + # a few corner cases _do_ actually modify the parent (with both row and column + # slice, and in case of ArrayManager or BlockManager with single block) + if ( + isinstance(row_indexer, slice) + and isinstance(column_indexer, slice) + and (using_array_manager or (dtype == "int64" and not using_copy_on_write)) + ): + df_orig.iloc[1, 1] = 0 + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +@pytest.mark.parametrize( + "row_indexer", + [slice(1, 3), np.array([False, True, True]), np.array([1, 2])], + ids=["slice", "mask", "array"], +) +@pytest.mark.parametrize( + "column_indexer", + [slice(1, 3), np.array([False, True, True]), [1, 2]], + ids=["slice", "mask", "array"], +) +def test_subset_iloc_rows_columns( + dtype, row_indexer, column_indexer, using_array_manager, using_copy_on_write +): + # Case: taking a subset of the rows+columns of a DataFrame using .iloc + # + afterwards modifying the subset + # Generic test for several combinations of row/column indexers, not all + # of those could actually return a view / need CoW (so this test is not + # checking memory sharing, only ensuring subsequent mutation doesn't + # affect the parent dataframe) + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + subset = df.iloc[row_indexer, column_indexer] + + # modifying the subset never modifies the parent + subset.iloc[0, 0] = 0 + + expected = DataFrame( + {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) + ) + tm.assert_frame_equal(subset, expected) + # a few corner cases _do_ actually modify the parent (with both row and column + # slice, and in case of ArrayManager or BlockManager with single block) + if ( + isinstance(row_indexer, slice) + and isinstance(column_indexer, slice) + and (using_array_manager or (dtype == "int64" and not using_copy_on_write)) + ): + df_orig.iloc[1, 1] = 0 + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "indexer", + [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], + ids=["slice", "mask", "array"], +) +def test_subset_set_with_row_indexer(indexer_si, indexer, using_copy_on_write): + # Case: setting values with a row indexer on a viewing subset + # subset[indexer] = value and subset.iloc[indexer] = value + df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) + df_orig = df.copy() + subset = df[1:4] + + if ( + indexer_si is tm.setitem + and isinstance(indexer, np.ndarray) + and indexer.dtype == "int" + ): + pytest.skip("setitem with labels selects on columns") + + if using_copy_on_write: + indexer_si(subset)[indexer] = 0 + else: + # INFO iloc no longer raises warning since pandas 1.4 + warn = SettingWithCopyWarning if indexer_si is tm.setitem else None + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(warn): + indexer_si(subset)[indexer] = 0 + + expected = DataFrame( + {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4) + ) + tm.assert_frame_equal(subset, expected) + if using_copy_on_write: + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) + else: + # original parent dataframe is actually updated + df_orig[1:3] = 0 + tm.assert_frame_equal(df, df_orig) + + +def test_subset_set_with_mask(using_copy_on_write): + # Case: setting values with a mask on a viewing subset: subset[mask] = value + df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) + df_orig = df.copy() + subset = df[1:4] + + mask = subset > 3 + + if using_copy_on_write: + subset[mask] = 0 + else: + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + subset[mask] = 0 + + expected = DataFrame( + {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4) + ) + tm.assert_frame_equal(subset, expected) + if using_copy_on_write: + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) + else: + # original parent dataframe is actually updated + df_orig.loc[3, "a"] = 0 + df_orig.loc[1:3, "b"] = 0 + tm.assert_frame_equal(df, df_orig) + + +def test_subset_set_column(using_copy_on_write): + # Case: setting a single column on a viewing subset -> subset[col] = value + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + subset = df[1:3] + + if using_copy_on_write: + subset["a"] = np.array([10, 11], dtype="int64") + else: + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + subset["a"] = np.array([10, 11], dtype="int64") + + subset._mgr._verify_integrity() + expected = DataFrame( + {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3) + ) + tm.assert_frame_equal(subset, expected) + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dtype): + # Case: setting a single column with loc on a viewing subset + # -> subset.loc[:, col] = value + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + subset = df[1:3] + + if using_copy_on_write: + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + else: + with pd.option_context("chained_assignment", "warn"): + # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those + # warnings and only assert the SettingWithCopyWarning + raise_on_extra_warnings = False if using_array_manager else True + with tm.assert_produces_warning( + SettingWithCopyWarning, + raise_on_extra_warnings=raise_on_extra_warnings, + ): + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + + subset._mgr._verify_integrity() + expected = DataFrame( + {"a": [10, 11], "b": [5, 6], "c": np.array([8, 9], dtype=dtype)}, + index=range(1, 3), + ) + tm.assert_frame_equal(subset, expected) + if using_copy_on_write or using_array_manager: + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) + else: + # original parent dataframe is actually updated + df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") + tm.assert_frame_equal(df, df_orig) + + +def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): + # Case: setting a single column with loc on a viewing subset + # -> subset.loc[:, col] = value + # separate test for case of DataFrame of a single column -> takes a separate + # code path + df = DataFrame({"a": [1, 2, 3]}) + df_orig = df.copy() + subset = df[1:3] + + if using_copy_on_write: + subset.loc[:, "a"] = 0 + else: + with pd.option_context("chained_assignment", "warn"): + # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those + # warnings and only assert the SettingWithCopyWarning + raise_on_extra_warnings = False if using_array_manager else True + with tm.assert_produces_warning( + SettingWithCopyWarning, + raise_on_extra_warnings=raise_on_extra_warnings, + ): + subset.loc[:, "a"] = 0 + + subset._mgr._verify_integrity() + expected = DataFrame({"a": [0, 0]}, index=range(1, 3)) + tm.assert_frame_equal(subset, expected) + if using_copy_on_write or using_array_manager: + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) + else: + # original parent dataframe is actually updated + df_orig.loc[1:3, "a"] = 0 + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_set_columns(using_copy_on_write, dtype): + # Case: setting multiple columns on a viewing subset + # -> subset[[col1, col2]] = value + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + subset = df[1:3] + + if using_copy_on_write: + subset[["a", "c"]] = 0 + else: + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + subset[["a", "c"]] = 0 + + subset._mgr._verify_integrity() + if using_copy_on_write: + # first and third column should certainly have no references anymore + assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) + expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3)) + tm.assert_frame_equal(subset, expected) + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "indexer", + [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], + ids=["slice", "mask", "array"], +) +def test_subset_set_with_column_indexer( + indexer, using_copy_on_write, using_array_manager +): + # Case: setting multiple columns with a column indexer on a viewing subset + # -> subset.loc[:, [col1, col2]] = value + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + df_orig = df.copy() + subset = df[1:3] + + if using_copy_on_write: + subset.loc[:, indexer] = 0 + else: + with pd.option_context("chained_assignment", "warn"): + # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those + # warnings and only assert the SettingWithCopyWarning + with tm.assert_produces_warning( + SettingWithCopyWarning, raise_on_extra_warnings=False + ): + subset.loc[:, indexer] = 0 + + subset._mgr._verify_integrity() + expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) + # TODO full row slice .loc[:, idx] update inplace instead of overwrite? + expected["b"] = expected["b"].astype("int64") + tm.assert_frame_equal(subset, expected) + if using_copy_on_write or using_array_manager: + tm.assert_frame_equal(df, df_orig) + else: + # In the mixed case with BlockManager, only one of the two columns is + # mutated in the parent frame .. + df_orig.loc[1:2, ["a"]] = 0 + tm.assert_frame_equal(df, df_orig) + + +@pytest.mark.parametrize( + "method", + [ + lambda df: df[["a", "b"]][0:2], + lambda df: df[0:2][["a", "b"]], + lambda df: df[["a", "b"]].iloc[0:2], + lambda df: df[["a", "b"]].loc[0:1], + lambda df: df[0:2].iloc[:, 0:2], + lambda df: df[0:2].loc[:, "a":"b"], # type: ignore[misc] + ], + ids=[ + "row-getitem-slice", + "column-getitem", + "row-iloc-slice", + "row-loc-slice", + "column-iloc-slice", + "column-loc-slice", + ], +) +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem( + request, method, dtype, using_copy_on_write, using_array_manager +): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # when not using CoW, it depends on whether we have a single block or not + # and whether we are slicing the columns -> in that case we have a view + subset_is_view = request.node.callspec.id in ( + "single-block-column-iloc-slice", + "single-block-column-loc-slice", + ) or ( + request.node.callspec.id + in ("mixed-block-column-iloc-slice", "mixed-block-column-loc-slice") + and using_array_manager + ) + + # modify subset -> don't modify parent + subset = method(df) + subset.iloc[0, 0] = 0 + if using_copy_on_write or (not subset_is_view): + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = method(df) + df.iloc[0, 0] = 0 + expected = DataFrame({"a": [1, 2], "b": [4, 5]}) + if using_copy_on_write or not subset_is_view: + tm.assert_frame_equal(subset, expected) + else: + assert subset.iloc[0, 0] == 0 + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem_column(dtype, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:]["a"][0:2] + df._clear_item_cache() + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:]["a"][0:2] + df._clear_item_cache() + df.iloc[0, 0] = 0 + expected = Series([1, 2], name="a") + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +@pytest.mark.parametrize( + "method", + [ + lambda s: s["a":"c"]["a":"b"], # type: ignore[misc] + lambda s: s.iloc[0:3].iloc[0:2], + lambda s: s.loc["a":"c"].loc["a":"b"], # type: ignore[misc] + lambda s: s.loc["a":"c"] # type: ignore[misc] + .iloc[0:3] + .iloc[0:2] + .loc["a":"b"] # type: ignore[misc] + .iloc[0:1], + ], + ids=["getitem", "iloc", "loc", "long-chain"], +) +def test_subset_chained_getitem_series(method, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + + # modify subset -> don't modify parent + subset = method(s) + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_series_equal(s, s_orig) + else: + assert s.iloc[0] == 0 + + # modify parent -> don't modify subset + subset = s.iloc[0:3].iloc[0:2] + s.iloc[0] = 0 + expected = Series([1, 2], index=["a", "b"]) + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +def test_subset_chained_single_block_row(using_copy_on_write, using_array_manager): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:].iloc[0].iloc[0:2] + subset.iloc[0] = 0 + if using_copy_on_write or using_array_manager: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:].iloc[0].iloc[0:2] + df.iloc[0, 0] = 0 + expected = Series([1, 4], index=["a", "b"], name=0) + if using_copy_on_write or using_array_manager: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +# TODO add more tests modifying the parent + + +# ----------------------------------------------------------------------------- +# Series -- Indexing operations taking subset + modifying the subset/parent + + +def test_series_getitem_slice(using_copy_on_write): + # Case: taking a slice of a Series + afterwards modifying the subset + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + + subset = s[:] + assert np.shares_memory(subset.values, s.values) + + subset.iloc[0] = 0 + + if using_copy_on_write: + assert not np.shares_memory(subset.values, s.values) + + expected = Series([0, 2, 3], index=["a", "b", "c"]) + tm.assert_series_equal(subset, expected) + + if using_copy_on_write: + # original parent series is not modified (CoW) + tm.assert_series_equal(s, s_orig) + else: + # original parent series is actually updated + assert s.iloc[0] == 0 + + +@pytest.mark.parametrize( + "indexer", + [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], + ids=["slice", "mask", "array"], +) +def test_series_subset_set_with_indexer(indexer_si, indexer, using_copy_on_write): + # Case: setting values in a viewing Series with an indexer + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + subset = s[:] + + indexer_si(subset)[indexer] = 0 + expected = Series([0, 0, 3], index=["a", "b", "c"]) + tm.assert_series_equal(subset, expected) + + if using_copy_on_write: + tm.assert_series_equal(s, s_orig) + else: + tm.assert_series_equal(s, expected) + + +# ----------------------------------------------------------------------------- +# del operator + + +def test_del_frame(using_copy_on_write): + # Case: deleting a column with `del` on a viewing child dataframe should + # not modify parent + update the references + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df[:] + + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + + del df2["b"] + + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df2, df_orig[["a", "c"]]) + df2._mgr._verify_integrity() + + # TODO in theory modifying column "b" of the parent wouldn't need a CoW + # but the weakref is still alive and so we still perform CoW + + df2.loc[0, "a"] = 100 + if using_copy_on_write: + # modifying child after deleting a column still doesn't update parent + tm.assert_frame_equal(df, df_orig) + else: + assert df.loc[0, "a"] == 100 + + +def test_del_series(): + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + s2 = s[:] + + assert np.shares_memory(s.values, s2.values) + + del s2["a"] + + assert not np.shares_memory(s.values, s2.values) + tm.assert_series_equal(s, s_orig) + tm.assert_series_equal(s2, s_orig[["b", "c"]]) + + # modifying s2 doesn't need copy on write (due to `del`, s2 is backed by new array) + values = s2.values + s2.loc["b"] = 100 + assert values[0] == 100 + + +# ----------------------------------------------------------------------------- +# Accessing column as Series + + +def test_column_as_series(using_copy_on_write, using_array_manager): + # Case: selecting a single column now also uses Copy-on-Write + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + s = df["a"] + + assert np.shares_memory(s.values, get_array(df, "a")) + + if using_copy_on_write or using_array_manager: + s[0] = 0 + else: + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + s[0] = 0 + + expected = Series([0, 2, 3], name="a") + tm.assert_series_equal(s, expected) + if using_copy_on_write: + # assert not np.shares_memory(s.values, get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) + else: + df_orig.iloc[0, 0] = 0 + tm.assert_frame_equal(df, df_orig) + + +def test_column_as_series_set_with_upcast(using_copy_on_write, using_array_manager): + # Case: selecting a single column now also uses Copy-on-Write -> when + # setting a value causes an upcast, we don't need to update the parent + # DataFrame through the cache mechanism + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + s = df["a"] + if using_copy_on_write or using_array_manager: + s[0] = "foo" + else: + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + s[0] = "foo" + + expected = Series(["foo", 2, 3], dtype=object, name="a") + tm.assert_series_equal(s, expected) + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) + else: + df_orig["a"] = expected + tm.assert_frame_equal(df, df_orig) + + +# TODO add tests for other indexing methods on the Series + + +def test_dataframe_add_column_from_series(): + # Case: adding a new column to a DataFrame from an existing column/series + # -> always already takes a copy on assignment + # (no change in behaviour here) + # TODO can we achieve the same behaviour with Copy-on-Write? + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + + s = Series([10, 11, 12]) + df["new"] = s + assert not np.shares_memory(get_array(df, "new"), s.values) + + # editing series -> doesn't modify column in frame + s[0] = 0 + expected = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]}) + tm.assert_frame_equal(df, expected) + + # editing column in frame -> doesn't modify series + df.loc[2, "new"] = 100 + expected_s = Series([0, 11, 12]) + tm.assert_series_equal(s, expected_s) + + +# TODO add tests for constructors diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py new file mode 100644 index 00000000..1938a1c5 --- /dev/null +++ b/pandas/tests/copy_view/test_internals.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame +from pandas.tests.copy_view.util import get_array + + +@td.skip_array_manager_invalid_test +def test_consolidate(using_copy_on_write): + + # create unconsolidated DataFrame + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + df["c"] = [4, 5, 6] + + # take a viewing subset + subset = df[:] + + # each block of subset references a block of df + assert subset._mgr.refs is not None and all( + ref is not None for ref in subset._mgr.refs + ) + + # consolidate the two int64 blocks + subset._consolidate_inplace() + + # the float64 block still references the parent one because it still a view + assert subset._mgr.refs[0] is not None + # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values) + # but avoids caching df["b"] + assert np.shares_memory(get_array(df, "b"), get_array(subset, "b")) + + # the new consolidated int64 block does not reference another + assert subset._mgr.refs[1] is None + + # the parent dataframe now also only is linked for the float column + assert df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(2) + + # and modifying subset still doesn't modify parent + if using_copy_on_write: + subset.iloc[0, 1] = 0.0 + assert df._mgr._has_no_reference(1) + assert df.loc[0, "b"] == 0.1 + + +@td.skip_array_manager_invalid_test +def test_clear_parent(using_copy_on_write): + # ensure to clear parent reference if we are no longer viewing data from parent + if not using_copy_on_write: + pytest.skip("test only relevant when using copy-on-write") + + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + assert subset._mgr.parent is not None + + # replacing existing columns loses the references to the parent df + subset["a"] = 0 + assert subset._mgr.parent is not None + # when losing the last reference, also the parent should be reset + subset["b"] = 0 + assert subset._mgr.parent is None + + +@pytest.mark.single_cpu +@td.skip_array_manager_invalid_test +def test_switch_options(): + # ensure we can switch the value of the option within one session + # (assuming data is constructed after switching) + + # using the option_context to ensure we set back to global option value + # after running the test + with pd.option_context("mode.copy_on_write", False): + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + subset.iloc[0, 0] = 0 + # df updated with CoW disabled + assert df.iloc[0, 0] == 0 + + pd.options.mode.copy_on_write = True + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + subset.iloc[0, 0] = 0 + # df not updated with CoW enabled + assert df.iloc[0, 0] == 1 + + pd.options.mode.copy_on_write = False + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + subset.iloc[0, 0] = 0 + # df updated with CoW disabled + assert df.iloc[0, 0] == 0 diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py new file mode 100644 index 00000000..0b366f37 --- /dev/null +++ b/pandas/tests/copy_view/test_methods.py @@ -0,0 +1,231 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +def test_copy(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_copy = df.copy() + + # the deep copy doesn't share memory + assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + if using_copy_on_write: + assert df_copy._mgr.refs is None + + # mutating copy doesn't mutate original + df_copy.iloc[0, 0] = 0 + assert df.iloc[0, 0] == 1 + + +def test_copy_shallow(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_copy = df.copy(deep=False) + + # the shallow copy still shares memory + assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + if using_copy_on_write: + assert df_copy._mgr.refs is not None + + if using_copy_on_write: + # mutating shallow copy doesn't mutate original + df_copy.iloc[0, 0] = 0 + assert df.iloc[0, 0] == 1 + # mutating triggered a copy-on-write -> no longer shares memory + assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + # but still shares memory for the other columns/blocks + assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) + else: + # mutating shallow copy does mutate original + df_copy.iloc[0, 0] = 0 + assert df.iloc[0, 0] == 0 + # and still shares memory + assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) + + +# ----------------------------------------------------------------------------- +# DataFrame methods returning new DataFrame using shallow copy + + +def test_reset_index(using_copy_on_write): + # Case: resetting the index (i.e. adding a new column) + mutating the + # resulting dataframe + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12] + ) + df_orig = df.copy() + df2 = df.reset_index() + df2._mgr._verify_integrity() + + if using_copy_on_write: + # still shares memory (df2 is a shallow copy) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 2] = 0 + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) + + +def test_rename_columns(using_copy_on_write): + # Case: renaming columns returns a new dataframe + # + afterwards modifying the result + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.rename(columns=str.upper) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) + expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]}) + tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df, df_orig) + + +def test_rename_columns_modify_parent(using_copy_on_write): + # Case: renaming columns returns a new dataframe + # + afterwards modifying the original (parent) dataframe + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df2 = df.rename(columns=str.upper) + df2_orig = df2.copy() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + df.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "C"), get_array(df, "c")) + expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, df2_orig) + + +def test_reindex_columns(using_copy_on_write): + # Case: reindexing the column returns a new dataframe + # + afterwards modifying the result + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.reindex(columns=["a", "c"]) + + if using_copy_on_write: + # still shares memory (df2 is a shallow copy) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + # mutating df2 triggers a copy-on-write for that column + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) + + +def test_select_dtypes(using_copy_on_write): + # Case: selecting columns using `select_dtypes()` returns a new dataframe + # + afterwards modifying the result + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.select_dtypes("int64") + df2._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column/block + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_to_frame(using_copy_on_write): + # Case: converting a Series to a DataFrame with to_frame + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + + df = ser[:].to_frame() + + # currently this always returns a "view" + assert np.shares_memory(ser.values, get_array(df, 0)) + + df.iloc[0, 0] = 0 + + if using_copy_on_write: + # mutating df triggers a copy-on-write for that column + assert not np.shares_memory(ser.values, get_array(df, 0)) + tm.assert_series_equal(ser, ser_orig) + else: + # but currently select_dtypes() actually returns a view -> mutates parent + expected = ser_orig.copy() + expected.iloc[0] = 0 + tm.assert_series_equal(ser, expected) + + # modify original series -> don't modify dataframe + df = ser[:].to_frame() + ser.iloc[0] = 0 + + if using_copy_on_write: + tm.assert_frame_equal(df, ser_orig.to_frame()) + else: + expected = ser_orig.copy().to_frame() + expected.iloc[0, 0] = 0 + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "method, idx", + [ + (lambda df: df.copy(deep=False).copy(deep=False), 0), + (lambda df: df.reset_index().reset_index(), 2), + (lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0), + (lambda df: df.copy(deep=False).select_dtypes(include="number"), 0), + ], + ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], +) +def test_chained_methods(request, method, idx, using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + # when not using CoW, only the copy() variant actually gives a view + df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy" + + # modify df2 -> don't modify df + df2 = method(df) + df2.iloc[0, idx] = 0 + if not df2_is_view: + tm.assert_frame_equal(df, df_orig) + + # modify df -> don't modify df2 + df2 = method(df) + df.iloc[0, 0] = 0 + if not df2_is_view: + tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) + + +def test_putmask(using_copy_on_write): + df = DataFrame({"a": [1, 2], "b": 1, "c": 2}) + view = df[:] + df_orig = df.copy() + df[df == df] = 5 + + if using_copy_on_write: + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + tm.assert_frame_equal(view, df_orig) + else: + # Without CoW the original will be modified + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + assert view.iloc[0, 0] == 5 diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py new file mode 100644 index 00000000..9e0d350d --- /dev/null +++ b/pandas/tests/copy_view/test_setitem.py @@ -0,0 +1,91 @@ +import numpy as np + +from pandas import ( + DataFrame, + Index, + RangeIndex, + Series, +) +import pandas._testing as tm + +# ----------------------------------------------------------------------------- +# Copy/view behaviour for the values that are set in a DataFrame + + +def test_set_column_with_array(): + # Case: setting an array as a new column (df[col] = arr) copies that data + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + arr = np.array([1, 2, 3], dtype="int64") + + df["c"] = arr + + # the array data is copied + assert not np.shares_memory(df["c"].values, arr) + # and thus modifying the array does not modify the DataFrame + arr[0] = 0 + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) + + +def test_set_column_with_series(using_copy_on_write): + # Case: setting a series as a new column (df[col] = s) copies that data + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + ser = Series([1, 2, 3]) + + df["c"] = ser + + if using_copy_on_write: + # TODO(CoW) with CoW we can delay the copy + # assert np.shares_memory(df["c"].values, ser.values) + assert not np.shares_memory(df["c"].values, ser.values) + else: + # the series data is copied + assert not np.shares_memory(df["c"].values, ser.values) + + # and modifying the series does not modify the DataFrame + ser.iloc[0] = 0 + assert ser.iloc[0] == 0 + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) + + +def test_set_column_with_index(using_copy_on_write): + # Case: setting an index as a new column (df[col] = idx) copies that data + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + idx = Index([1, 2, 3]) + + df["c"] = idx + + # the index data is copied + assert not np.shares_memory(df["c"].values, idx.values) + + # and thus modifying the index does not modify the DataFrame + idx.values[0] = 0 + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) + + idx = RangeIndex(1, 4) + arr = idx.values + + df["d"] = idx + + assert not np.shares_memory(df["d"].values, arr) + arr[0] = 0 + tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d")) + + +def test_set_columns_with_dataframe(using_copy_on_write): + # Case: setting a DataFrame as new columns copies that data + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}) + + df[["c", "d"]] = df2 + + if using_copy_on_write: + # TODO(CoW) with CoW we can delay the copy + # assert np.shares_memory(df["c"].values, df2["c"].values) + assert not np.shares_memory(df["c"].values, df2["c"].values) + else: + # the data is copied + assert not np.shares_memory(df["c"].values, df2["c"].values) + + # and modifying the set DataFrame does not modify the original DataFrame + df2.iloc[0, 0] = 0 + tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c")) diff --git a/pandas/tests/copy_view/util.py b/pandas/tests/copy_view/util.py new file mode 100644 index 00000000..9e358c7e --- /dev/null +++ b/pandas/tests/copy_view/util.py @@ -0,0 +1,11 @@ +def get_array(df, col): + """ + Helper method to get array for a DataFrame column. + + Equivalent of df[col].values, but without going through normal getitem, + which triggers tracking references / CoW (and we might be testing that + this is done by some other operation). + """ + icol = df.columns.get_loc(col) + assert isinstance(icol, int) + return df._get_column_array(icol) diff --git a/pandas/tests/dtypes/__init__.py b/pandas/tests/dtypes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/dtypes/cast/__init__.py b/pandas/tests/dtypes/cast/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py new file mode 100644 index 00000000..3b7d76ea --- /dev/null +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -0,0 +1,79 @@ +import numpy as np + +from pandas.core.dtypes.cast import can_hold_element + + +def test_can_hold_element_range(any_int_numpy_dtype): + # GH#44261 + dtype = np.dtype(any_int_numpy_dtype) + arr = np.array([], dtype=dtype) + + rng = range(2, 127) + assert can_hold_element(arr, rng) + + # negatives -> can't be held by uint dtypes + rng = range(-2, 127) + if dtype.kind == "i": + assert can_hold_element(arr, rng) + else: + assert not can_hold_element(arr, rng) + + rng = range(2, 255) + if dtype == "int8": + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + rng = range(-255, 65537) + if dtype.kind == "u": + assert not can_hold_element(arr, rng) + elif dtype.itemsize < 4: + assert not can_hold_element(arr, rng) + else: + assert can_hold_element(arr, rng) + + # empty + rng = range(-(10**10), -(10**10)) + assert len(rng) == 0 + # assert can_hold_element(arr, rng) + + rng = range(10**10, 10**10) + assert len(rng) == 0 + assert can_hold_element(arr, rng) + + +def test_can_hold_element_int_values_float_ndarray(): + arr = np.array([], dtype=np.int64) + + element = np.array([1.0, 2.0]) + assert can_hold_element(arr, element) + + assert not can_hold_element(arr, element + 0.5) + + # integer but not losslessly castable to int64 + element = np.array([3, 2**65], dtype=np.float64) + assert not can_hold_element(arr, element) + + +def test_can_hold_element_int8_int(): + arr = np.array([], dtype=np.int8) + + element = 2 + assert can_hold_element(arr, element) + assert can_hold_element(arr, np.int8(element)) + assert can_hold_element(arr, np.uint8(element)) + assert can_hold_element(arr, np.int16(element)) + assert can_hold_element(arr, np.uint16(element)) + assert can_hold_element(arr, np.int32(element)) + assert can_hold_element(arr, np.uint32(element)) + assert can_hold_element(arr, np.int64(element)) + assert can_hold_element(arr, np.uint64(element)) + + element = 2**9 + assert not can_hold_element(arr, element) + assert not can_hold_element(arr, np.int16(element)) + assert not can_hold_element(arr, np.uint16(element)) + assert not can_hold_element(arr, np.int32(element)) + assert not can_hold_element(arr, np.uint32(element)) + assert not can_hold_element(arr, np.int64(element)) + assert not can_hold_element(arr, np.uint64(element)) diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py new file mode 100644 index 00000000..0ce04ce2 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import ( + Categorical, + Timedelta, +) +import pandas._testing as tm + + +def test_cast_1d_array_like_from_scalar_categorical(): + # see gh-19565 + # + # Categorical result from scalar did not maintain + # categories and ordering of the passed dtype. + cats = ["a", "b", "c"] + cat_type = CategoricalDtype(categories=cats, ordered=False) + expected = Categorical(["a", "a"], categories=cats) + + result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) + tm.assert_categorical_equal(result, expected) + + +def test_cast_1d_array_like_from_timestamp(fixed_now_ts): + # check we dont lose nanoseconds + ts = fixed_now_ts + Timedelta(1) + res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]")) + assert res[0] == ts + + +def test_cast_1d_array_like_from_timedelta(): + # check we dont lose nanoseconds + td = Timedelta(1) + res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]")) + assert res[0] == td + + +def test_cast_1d_array_like_mismatched_datetimelike(): + td = np.timedelta64("NaT", "ns") + dt = np.datetime64("NaT", "ns") + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(td, 2, dt.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(np.timedelta64(4, "ns"), 2, dt.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(dt, 2, td.dtype) + + with pytest.raises(TypeError, match="Cannot cast"): + construct_1d_arraylike_from_scalar(np.datetime64(4, "ns"), 2, td.dtype) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py new file mode 100644 index 00000000..10085ddd --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.construction import sanitize_array + + +@pytest.mark.parametrize( + "values, dtype, expected", + [ + ([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)), + (np.array([1, 2, 3]), None, np.array([1, 2, 3])), + (["1", "2", None], None, np.array(["1", "2", None])), + (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])), + ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), + ], +) +def test_construct_1d_ndarray_preserving_na(values, dtype, expected): + result = sanitize_array(values, index=None, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) +def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): + arr = np.arange(5, dtype=np.int64).view(dtype) + expected = np.array(list(arr), dtype=object) + assert all(isinstance(x, type(arr[0])) for x in expected) + + result = sanitize_array(arr, index=None, dtype=np.dtype(object)) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/cast/test_construct_object_arr.py b/pandas/tests/dtypes/cast/test_construct_object_arr.py new file mode 100644 index 00000000..cb44f91f --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_object_arr.py @@ -0,0 +1,20 @@ +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +@pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None]) +@pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None]) +def test_cast_1d_array(datum1, datum2): + data = [datum1, datum2] + result = construct_1d_object_array_from_listlike(data) + + # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 + assert result.dtype == "object" + assert list(result) == data + + +@pytest.mark.parametrize("val", [1, 2.0, None]) +def test_cast_1d_array_invalid_scalar(val): + with pytest.raises(TypeError, match="has no len()"): + construct_1d_object_array_from_listlike(val) diff --git a/pandas/tests/dtypes/cast/test_dict_compat.py b/pandas/tests/dtypes/cast/test_dict_compat.py new file mode 100644 index 00000000..13dc82d7 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_dict_compat.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas.core.dtypes.cast import dict_compat + +from pandas import Timestamp + + +def test_dict_compat(): + data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} + assert dict_compat(data_datetime64) == expected + assert dict_compat(expected) == expected + assert dict_compat(data_unchanged) == data_unchanged diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py new file mode 100644 index 00000000..c01eac74 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -0,0 +1,97 @@ +import decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype + +from pandas import ( + Series, + Timedelta, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arr,dtype,expected", + [ + ( + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + "infer", + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + # This is a judgement call, but we do _not_ downcast Decimal + # objects + np.array([decimal.Decimal(0.0)]), + "int64", + np.array([decimal.Decimal(0.0)]), + ), + ( + # GH#45837 + np.array([Timedelta(days=1), Timedelta(days=2)], dtype=object), + "infer", + np.array([1, 2], dtype="m8[D]").astype("m8[ns]"), + ), + # TODO: similar for dt64, dt64tz, Period, Interval? + ], +) +def test_downcast(arr, expected, dtype): + result = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_downcast_booleans(): + # see gh-16875: coercing of booleans. + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + + expected = ser + tm.assert_series_equal(result, expected) + + +def test_downcast_conversion_no_nan(any_real_numpy_dtype): + dtype = any_real_numpy_dtype + expected = np.array([1, 2]) + arr = np.array([1.0, 2.0], dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected, check_dtype=False) + + +def test_downcast_conversion_nan(float_numpy_dtype): + dtype = float_numpy_dtype + data = [1.0, 2.0, np.nan] + + expected = np.array(data, dtype=dtype) + arr = np.array(data, dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected) + + +def test_downcast_conversion_empty(any_real_numpy_dtype): + dtype = any_real_numpy_dtype + arr = np.array([], dtype=dtype) + result = maybe_downcast_to_dtype(arr, np.dtype("int64")) + tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + +@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64]) +def test_datetime_likes_nan(klass): + dtype = klass.__name__ + "[ns]" + arr = np.array([1, 2, np.nan]) + + exp = np.array([1, 2, klass("NaT")], dtype) + res = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py new file mode 100644 index 00000000..8484b552 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -0,0 +1,173 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +from pandas import ( + Categorical, + Index, +) + + +@pytest.mark.parametrize( + "source_dtypes,expected_common_dtype", + [ + ((np.int64,), np.int64), + ((np.uint64,), np.uint64), + ((np.float32,), np.float32), + ((object,), object), + # Into ints. + ((np.int16, np.int64), np.int64), + ((np.int32, np.uint32), np.int64), + ((np.uint16, np.uint64), np.uint64), + # Into floats. + ((np.float16, np.float32), np.float32), + ((np.float16, np.int16), np.float32), + ((np.float32, np.int16), np.float32), + ((np.uint64, np.int64), np.float64), + ((np.int16, np.float64), np.float64), + ((np.float16, np.int64), np.float64), + # Into others. + ((np.complex128, np.int32), np.complex128), + ((object, np.float32), object), + ((object, np.int16), object), + # Bool with int. + ((np.dtype("bool"), np.int64), object), + ((np.dtype("bool"), np.int32), object), + ((np.dtype("bool"), np.int16), object), + ((np.dtype("bool"), np.int8), object), + ((np.dtype("bool"), np.uint64), object), + ((np.dtype("bool"), np.uint32), object), + ((np.dtype("bool"), np.uint16), object), + ((np.dtype("bool"), np.uint8), object), + # Bool with float. + ((np.dtype("bool"), np.float64), object), + ((np.dtype("bool"), np.float32), object), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), object), + ((np.dtype("datetime64[ns]"), np.int64), object), + ], +) +def test_numpy_dtypes(source_dtypes, expected_common_dtype): + assert find_common_type(source_dtypes) == expected_common_dtype + + +def test_raises_empty_input(): + with pytest.raises(ValueError, match="no types given"): + find_common_type([]) + + +@pytest.mark.parametrize( + "dtypes,exp_type", + [ + ([CategoricalDtype()], "category"), + ([object, CategoricalDtype()], object), + ([CategoricalDtype(), CategoricalDtype()], "category"), + ], +) +def test_categorical_dtype(dtypes, exp_type): + assert find_common_type(dtypes) == exp_type + + +def test_datetimetz_dtype_match(): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + np.dtype("datetime64[ns]"), + object, + np.int64, + ], +) +def test_datetimetz_dtype_mismatch(dtype2): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype2]) == object + assert find_common_type([dtype2, dtype]) == object + + +def test_period_dtype_match(): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype]) == "period[D]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + PeriodDtype(freq="2D"), + PeriodDtype(freq="H"), + np.dtype("datetime64[ns]"), + object, + np.int64, + ], +) +def test_period_dtype_mismatch(dtype2): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype2]) == object + assert find_common_type([dtype2, dtype]) == object + + +interval_dtypes = [ + IntervalDtype(np.int64, "right"), + IntervalDtype(np.float64, "right"), + IntervalDtype(np.uint64, "right"), + IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"), + IntervalDtype("M8[ns]", "right"), + IntervalDtype("m8[ns]", "right"), +] + + +@pytest.mark.parametrize("left", interval_dtypes) +@pytest.mark.parametrize("right", interval_dtypes) +def test_interval_dtype(left, right): + result = find_common_type([left, right]) + + if left is right: + assert result is left + + elif left.subtype.kind in ["i", "u", "f"]: + # i.e. numeric + if right.subtype.kind in ["i", "u", "f"]: + # both numeric -> common numeric subtype + expected = IntervalDtype(np.float64, "right") + assert result == expected + else: + assert result == object + + else: + assert result == object + + +@pytest.mark.parametrize("dtype", interval_dtypes) +def test_interval_dtype_with_categorical(dtype): + obj = Index([], dtype=dtype) + + cat = Categorical([], categories=obj) + + result = find_common_type([dtype, cat.dtype]) + assert result == dtype diff --git a/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/pandas/tests/dtypes/cast/test_infer_datetimelike.py new file mode 100644 index 00000000..3c3844e6 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_datetimelike.py @@ -0,0 +1,28 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, +) + + +@pytest.mark.parametrize( + "data,exp_size", + [ + # see gh-16362. + ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8), + ([[NaT, "a", 0], [NaT, "b", 1]], 6), + ], +) +def test_maybe_infer_to_datetimelike_df_construct(data, exp_size): + result = DataFrame(np.array(data)) + assert result.size == exp_size + + +def test_maybe_infer_to_datetimelike_ser_construct(): + # see gh-19671. + result = Series(["M1701", Timestamp("20130101")]) + assert result.dtype.kind == "O" diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py new file mode 100644 index 00000000..902130bf --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -0,0 +1,208 @@ +from datetime import ( + date, + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + infer_dtype_from, + infer_dtype_from_array, + infer_dtype_from_scalar, +) +from pandas.core.dtypes.common import is_dtype_equal + +from pandas import ( + Categorical, + Interval, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) + + +@pytest.fixture(params=[True, False]) +def pandas_dtype(request): + return request.param + + +def test_infer_dtype_from_int_scalar(any_int_numpy_dtype): + # Test that infer_dtype_from_scalar is + # returning correct dtype for int and float. + data = np.dtype(any_int_numpy_dtype).type(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == type(data) + + +def test_infer_dtype_from_float_scalar(float_numpy_dtype): + float_numpy_dtype = np.dtype(float_numpy_dtype).type + data = float_numpy_dtype(12) + + dtype, val = infer_dtype_from_scalar(data) + assert dtype == float_numpy_dtype + + +@pytest.mark.parametrize( + "data,exp_dtype", [(12, np.int64), (np.float_(12), np.float64)] +) +def test_infer_dtype_from_python_scalar(data, exp_dtype): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == exp_dtype + + +@pytest.mark.parametrize("bool_val", [True, False]) +def test_infer_dtype_from_boolean(bool_val): + dtype, val = infer_dtype_from_scalar(bool_val) + assert dtype == np.bool_ + + +def test_infer_dtype_from_complex(complex_dtype): + data = np.dtype(complex_dtype).type(1) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.complex_ + + +@pytest.mark.parametrize( + "data", [np.datetime64(1, "ns"), Timestamp(1), datetime(2000, 1, 1, 0, 0)] +) +def test_infer_dtype_from_datetime(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "M8[ns]" + + +@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), timedelta(1)]) +def test_infer_dtype_from_timedelta(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "m8[ns]" + + +@pytest.mark.parametrize("freq", ["M", "D"]) +def test_infer_dtype_from_period(freq, pandas_dtype): + p = Period("2011-01-01", freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"period[{freq}]" + else: + exp_dtype = np.object_ + + assert dtype == exp_dtype + assert val == p + + +@pytest.mark.parametrize( + "data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")] +) +def test_infer_dtype_misc(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.object_ + + +@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"]) +def test_infer_from_scalar_tz(tz, pandas_dtype): + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"datetime64[ns, {tz}]" + else: + exp_dtype = np.object_ + + assert dtype == exp_dtype + assert val == dt + + +@pytest.mark.parametrize( + "left, right, subtype", + [ + (0, 1, "int64"), + (0.0, 1.0, "float64"), + (Timestamp(0), Timestamp(1), "datetime64[ns]"), + (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"), + (Timedelta(0), Timedelta(1), "timedelta64[ns]"), + ], +) +def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): + # GH 30337 + interval = Interval(left, right, closed) + result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) + expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_ + assert result_dtype == expected_dtype + assert result_value == interval + + +def test_infer_dtype_from_scalar_errors(): + msg = "invalid ndarray passed to infer_dtype_from_scalar" + + with pytest.raises(ValueError, match=msg): + infer_dtype_from_scalar(np.array([1])) + + +@pytest.mark.parametrize( + "value, expected, pandas_dtype", + [ + ("foo", np.object_, False), + (b"foo", np.object_, False), + (1, np.int64, False), + (1.5, np.float_, False), + (np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False), + (Timestamp("20160101"), np.dtype("M8[ns]"), False), + (Timestamp("20160101", tz="UTC"), np.object_, False), + (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True), + ], +) +def test_infer_dtype_from_scalar(value, expected, pandas_dtype): + dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + with pytest.raises(TypeError, match="must be list-like"): + infer_dtype_from_array(value, pandas_dtype=pandas_dtype) + + +@pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [ + ([1], np.int_, False), + (np.array([1], dtype=np.int64), np.int64, False), + ([np.nan, 1, ""], np.object_, False), + (np.array([[1.0, 2.0]]), np.float_, False), + (Categorical(list("aabc")), np.object_, False), + (Categorical([1, 2, 3]), np.int64, False), + (Categorical(list("aabc")), "category", True), + (Categorical([1, 2, 3]), "category", True), + (date_range("20160101", periods=3), np.dtype("=M8[ns]"), False), + ( + date_range("20160101", periods=3, tz="US/Eastern"), + "datetime64[ns, US/Eastern]", + True, + ), + (Series([1.0, 2, 3]), np.float64, False), + (Series(list("abc")), np.object_, False), + ( + Series(date_range("20160101", periods=3, tz="US/Eastern")), + "datetime64[ns, US/Eastern]", + True, + ), + ], +) +def test_infer_dtype_from_array(arr, expected, pandas_dtype): + dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + +@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) +def test_infer_dtype_from_scalar_zerodim_datetimelike(cls): + # ndarray.item() can incorrectly return int instead of td64/dt64 + val = cls(1234, "ns") + arr = np.array(val) + + dtype, res = infer_dtype_from_scalar(arr) + assert dtype.type is cls + assert isinstance(res, cls) + + dtype, res = infer_dtype_from(arr) + assert dtype.type is cls diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py new file mode 100644 index 00000000..3f62f31d --- /dev/null +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -0,0 +1,40 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_box_native + +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize( + "obj,expected_dtype", + [ + (b"\x00\x10", bytes), + (int(4), int), + (np.uint(4), int), + (np.int32(-4), int), + (np.uint8(4), int), + (float(454.98), float), + (np.float16(0.4), float), + (np.float64(1.4), float), + (np.bool_(False), bool), + (datetime(2005, 2, 25), datetime), + (np.datetime64("2005-02-25"), Timestamp), + (Timestamp("2005-02-25"), Timestamp), + (np.timedelta64(1, "D"), Timedelta), + (Timedelta(1, "D"), Timedelta), + (Interval(0, 1), Interval), + (Period("4Q2005"), Period), + ], +) +def test_maybe_box_native(obj, expected_dtype): + boxed_obj = maybe_box_native(obj) + result_dtype = type(boxed_obj) + assert result_dtype is expected_dtype diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py new file mode 100644 index 00000000..02bd03f5 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -0,0 +1,582 @@ +""" +These test the method maybe_promote from core/dtypes/cast.py +""" + +import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs.tslibs import NaT + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + is_complex_dtype, + is_datetime64_dtype, + is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture( + params=[ + bool, + "uint8", + "int32", + "uint64", + "float32", + "float64", + "complex64", + "complex128", + "M8[ns]", + "m8[ns]", + str, + bytes, + object, + ] +) +def any_numpy_dtype_reduced(request): + """ + Parameterized fixture for numpy dtypes, reduced from any_numpy_dtype. + + * bool + * 'int32' + * 'uint64' + * 'float32' + * 'float64' + * 'complex64' + * 'complex128' + * 'M8[ns]' + * 'M8[ns]' + * str + * bytes + * object + """ + return request.param + + +def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): + """ + Auxiliary function to unify testing of scalar/array promotion. + + Parameters + ---------- + dtype : dtype + The value to pass on as the first argument to maybe_promote. + fill_value : scalar + The value to pass on as the second argument to maybe_promote as + a scalar. + expected_dtype : dtype + The expected dtype returned by maybe_promote (by design this is the + same regardless of whether fill_value was passed as a scalar or in an + array!). + exp_val_for_scalar : scalar + The expected value for the (potentially upcast) fill_value returned by + maybe_promote. + """ + assert is_scalar(fill_value) + + # here, we pass on fill_value as a scalar directly; the expected value + # returned from maybe_promote is fill_value, potentially upcast to the + # returned dtype. + result_dtype, result_fill_value = maybe_promote(dtype, fill_value) + expected_fill_value = exp_val_for_scalar + + assert result_dtype == expected_dtype + _assert_match(result_fill_value, expected_fill_value) + + +def _assert_match(result_fill_value, expected_fill_value): + # GH#23982/25425 require the same type in addition to equality/NA-ness + res_type = type(result_fill_value) + ex_type = type(expected_fill_value) + + if hasattr(result_fill_value, "dtype"): + # Compare types in a way that is robust to platform-specific + # idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias + # for "uint64" or "intc" as an alias for "int32" + assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind + assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize + else: + # On some builds, type comparison fails, e.g. np.int32 != np.int32 + assert res_type == ex_type or res_type.__name__ == ex_type.__name__ + + match_value = result_fill_value == expected_fill_value + if match_value is pd.NA: + match_value = False + + # Note: type check above ensures that we have the _same_ NA value + # for missing values, None == None (which is checked + # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT + match_missing = isna(result_fill_value) and isna(expected_fill_value) + + assert match_value or match_missing + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # size 8 + ("int8", 1, "int8"), + ("int8", np.iinfo("int8").max + 1, "int16"), + ("int8", np.iinfo("int16").max + 1, "int32"), + ("int8", np.iinfo("int32").max + 1, "int64"), + ("int8", np.iinfo("int64").max + 1, "object"), + ("int8", -1, "int8"), + ("int8", np.iinfo("int8").min - 1, "int16"), + ("int8", np.iinfo("int16").min - 1, "int32"), + ("int8", np.iinfo("int32").min - 1, "int64"), + ("int8", np.iinfo("int64").min - 1, "object"), + # keep signed-ness as long as possible + ("uint8", 1, "uint8"), + ("uint8", np.iinfo("int8").max + 1, "uint8"), + ("uint8", np.iinfo("uint8").max + 1, "uint16"), + ("uint8", np.iinfo("int16").max + 1, "uint16"), + ("uint8", np.iinfo("uint16").max + 1, "uint32"), + ("uint8", np.iinfo("int32").max + 1, "uint32"), + ("uint8", np.iinfo("uint32").max + 1, "uint64"), + ("uint8", np.iinfo("int64").max + 1, "uint64"), + ("uint8", np.iinfo("uint64").max + 1, "object"), + # max of uint8 cannot be contained in int8 + ("uint8", -1, "int16"), + ("uint8", np.iinfo("int8").min - 1, "int16"), + ("uint8", np.iinfo("int16").min - 1, "int32"), + ("uint8", np.iinfo("int32").min - 1, "int64"), + ("uint8", np.iinfo("int64").min - 1, "object"), + # size 16 + ("int16", 1, "int16"), + ("int16", np.iinfo("int8").max + 1, "int16"), + ("int16", np.iinfo("int16").max + 1, "int32"), + ("int16", np.iinfo("int32").max + 1, "int64"), + ("int16", np.iinfo("int64").max + 1, "object"), + ("int16", -1, "int16"), + ("int16", np.iinfo("int8").min - 1, "int16"), + ("int16", np.iinfo("int16").min - 1, "int32"), + ("int16", np.iinfo("int32").min - 1, "int64"), + ("int16", np.iinfo("int64").min - 1, "object"), + ("uint16", 1, "uint16"), + ("uint16", np.iinfo("int8").max + 1, "uint16"), + ("uint16", np.iinfo("uint8").max + 1, "uint16"), + ("uint16", np.iinfo("int16").max + 1, "uint16"), + ("uint16", np.iinfo("uint16").max + 1, "uint32"), + ("uint16", np.iinfo("int32").max + 1, "uint32"), + ("uint16", np.iinfo("uint32").max + 1, "uint64"), + ("uint16", np.iinfo("int64").max + 1, "uint64"), + ("uint16", np.iinfo("uint64").max + 1, "object"), + ("uint16", -1, "int32"), + ("uint16", np.iinfo("int8").min - 1, "int32"), + ("uint16", np.iinfo("int16").min - 1, "int32"), + ("uint16", np.iinfo("int32").min - 1, "int64"), + ("uint16", np.iinfo("int64").min - 1, "object"), + # size 32 + ("int32", 1, "int32"), + ("int32", np.iinfo("int8").max + 1, "int32"), + ("int32", np.iinfo("int16").max + 1, "int32"), + ("int32", np.iinfo("int32").max + 1, "int64"), + ("int32", np.iinfo("int64").max + 1, "object"), + ("int32", -1, "int32"), + ("int32", np.iinfo("int8").min - 1, "int32"), + ("int32", np.iinfo("int16").min - 1, "int32"), + ("int32", np.iinfo("int32").min - 1, "int64"), + ("int32", np.iinfo("int64").min - 1, "object"), + ("uint32", 1, "uint32"), + ("uint32", np.iinfo("int8").max + 1, "uint32"), + ("uint32", np.iinfo("uint8").max + 1, "uint32"), + ("uint32", np.iinfo("int16").max + 1, "uint32"), + ("uint32", np.iinfo("uint16").max + 1, "uint32"), + ("uint32", np.iinfo("int32").max + 1, "uint32"), + ("uint32", np.iinfo("uint32").max + 1, "uint64"), + ("uint32", np.iinfo("int64").max + 1, "uint64"), + ("uint32", np.iinfo("uint64").max + 1, "object"), + ("uint32", -1, "int64"), + ("uint32", np.iinfo("int8").min - 1, "int64"), + ("uint32", np.iinfo("int16").min - 1, "int64"), + ("uint32", np.iinfo("int32").min - 1, "int64"), + ("uint32", np.iinfo("int64").min - 1, "object"), + # size 64 + ("int64", 1, "int64"), + ("int64", np.iinfo("int8").max + 1, "int64"), + ("int64", np.iinfo("int16").max + 1, "int64"), + ("int64", np.iinfo("int32").max + 1, "int64"), + ("int64", np.iinfo("int64").max + 1, "object"), + ("int64", -1, "int64"), + ("int64", np.iinfo("int8").min - 1, "int64"), + ("int64", np.iinfo("int16").min - 1, "int64"), + ("int64", np.iinfo("int32").min - 1, "int64"), + ("int64", np.iinfo("int64").min - 1, "object"), + ("uint64", 1, "uint64"), + ("uint64", np.iinfo("int8").max + 1, "uint64"), + ("uint64", np.iinfo("uint8").max + 1, "uint64"), + ("uint64", np.iinfo("int16").max + 1, "uint64"), + ("uint64", np.iinfo("uint16").max + 1, "uint64"), + ("uint64", np.iinfo("int32").max + 1, "uint64"), + ("uint64", np.iinfo("uint32").max + 1, "uint64"), + ("uint64", np.iinfo("int64").max + 1, "uint64"), + ("uint64", np.iinfo("uint64").max + 1, "object"), + ("uint64", -1, "object"), + ("uint64", np.iinfo("int8").min - 1, "object"), + ("uint64", np.iinfo("int16").min - 1, "object"), + ("uint64", np.iinfo("int32").min - 1, "object"), + ("uint64", np.iinfo("int64").min - 1, "object"), + ], +) +def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype): + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic int, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_int_with_float(any_int_numpy_dtype, float_numpy_dtype): + dtype = np.dtype(any_int_numpy_dtype) + fill_dtype = np.dtype(float_numpy_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling int with float always upcasts to float64 + expected_dtype = np.float64 + # fill_value can be different float type + exp_val_for_scalar = np.float64(fill_value) + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype): + + dtype = np.dtype(float_numpy_dtype) + fill_dtype = np.dtype(any_int_numpy_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling float with int always keeps float dtype + # because: np.finfo('float32').max > np.iinfo('uint64').max + expected_dtype = dtype + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # float filled with float + ("float32", 1, "float32"), + ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float64", 1, "float64"), + ("float64", np.finfo("float32").max * 1.1, "float64"), + # complex filled with float + ("complex64", 1, "complex64"), + ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", 1, "complex128"), + ("complex128", np.finfo("float32").max * 1.1, "complex128"), + # float filled with complex + ("float32", 1 + 1j, "complex64"), + ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", 1 + 1j, "complex128"), + ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + # complex filled with complex + ("complex64", 1 + 1j, "complex64"), + ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", 1 + 1j, "complex128"), + ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ], +) +def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype): + + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced): + dtype = np.dtype(bool) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling bool with anything but bool casts to object + expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_value = True + + # filling anything but bool with bool casts to object + expected_dtype = np.dtype(object) if dtype != bool else dtype + # output is not a generic bool, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(bytes_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = b"abc" + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + # output is not a generic bytes, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(datetime64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(fill_dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to to_datetime64 + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_with_datetime64(any_numpy_dtype_reduced, fill_value): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to pd.Timestamp.value + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + warn = None + msg = "Using a `date` object for fill_value" + if type(fill_value) is datetime.date and dtype.kind == "M": + # Casting date to dt64 is deprecated + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + # stacklevel is chosen to make sense when called from higher-level functions + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_numpy_dtype_with_datetimetz( + any_numpy_dtype_reduced, tz_aware_fixture, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] + + # filling any numpy dtype with datetimetz casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(timedelta64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling timedelta with anything but timedelta casts to object + if is_timedelta64_dtype(fill_dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], + ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], +) +def test_maybe_promote_any_with_timedelta64(any_numpy_dtype_reduced, fill_value): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling anything but timedelta with timedelta casts to object + if is_timedelta64_dtype(dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(string_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling string with anything casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_string(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = "abc" + + # filling anything with a string casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(object_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_object(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of object dtype from a scalar value (i.e. passing + # dtypes.common.is_scalar), which can however not be cast to int/float etc. + fill_value = pd.DateOffset(1) + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fixture): + fill_value = nulls_fixture + dtype = np.dtype(any_numpy_dtype_reduced) + + if isinstance(fill_value, Decimal): + # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) + # this is the existing behavior in maybe_promote, + # hinges on is_valid_na_for_dtype + if dtype.kind in ["i", "u", "f", "c"]: + if dtype.kind in ["i", "u"]: + expected_dtype = np.dtype(np.float64) + else: + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_integer_dtype(dtype) and fill_value is not NaT: + # integer + other missing value (np.nan / None) casts to float + expected_dtype = np.float64 + exp_val_for_scalar = np.nan + elif is_object_dtype(dtype) and fill_value is NaT: + # inserting into object does not cast the value + # but *does* cast None to np.nan + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_datetime_or_timedelta_dtype(dtype): + # datetime / timedelta cast all missing values to dtyped-NaT + expected_dtype = dtype + exp_val_for_scalar = dtype.type("NaT", "ns") + elif fill_value is NaT: + # NaT upcasts everything that's not datetime/timedelta to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = NaT + elif is_float_dtype(dtype) or is_complex_dtype(dtype): + # float / complex + missing value (!= NaT) stays the same + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + # all other cases cast to object, and use np.nan as missing value + expected_dtype = np.dtype(object) + if fill_value is pd.NA: + exp_val_for_scalar = pd.NA + else: + exp_val_for_scalar = np.nan + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py new file mode 100644 index 00000000..984655c6 --- /dev/null +++ b/pandas/tests/dtypes/test_common.py @@ -0,0 +1,803 @@ +from __future__ import annotations + +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.astype import astype_nansafe +import pandas.core.dtypes.common as com +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + CategoricalDtypeType, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import isna + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import pandas_dtype +from pandas.arrays import SparseArray + + +# EA & Actual Dtypes +def to_ea_dtypes(dtypes): + """convert list of string dtypes to EA dtype""" + return [getattr(pd, dt + "Dtype") for dt in dtypes] + + +def to_numpy_dtypes(dtypes): + """convert list of string dtypes to numpy dtype""" + return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)] + + +class TestPandasDtype: + + # Passing invalid dtype, both as a string or object, must raise TypeError + # Per issue GH15520 + @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) + def test_invalid_dtype_error(self, box): + with pytest.raises(TypeError, match="not understood"): + com.pandas_dtype(box) + + @pytest.mark.parametrize( + "dtype", + [ + object, + "float64", + np.object_, + np.dtype("object"), + "O", + np.float64, + float, + np.dtype("float64"), + ], + ) + def test_pandas_dtype_valid(self, dtype): + assert com.pandas_dtype(dtype) == dtype + + @pytest.mark.parametrize( + "dtype", ["M8[ns]", "m8[ns]", "object", "float64", "int64"] + ) + def test_numpy_dtype(self, dtype): + assert com.pandas_dtype(dtype) == np.dtype(dtype) + + def test_numpy_string_dtype(self): + # do not parse freq-like string as period dtype + assert com.pandas_dtype("U") == np.dtype("U") + assert com.pandas_dtype("S") == np.dtype("S") + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[ns, US/Eastern]", + "datetime64[ns, Asia/Tokyo]", + "datetime64[ns, UTC]", + # GH#33885 check that the M8 alias is understood + "M8[ns, US/Eastern]", + "M8[ns, Asia/Tokyo]", + "M8[ns, UTC]", + ], + ) + def test_datetimetz_dtype(self, dtype): + assert com.pandas_dtype(dtype) == DatetimeTZDtype.construct_from_string(dtype) + assert com.pandas_dtype(dtype) == dtype + + def test_categorical_dtype(self): + assert com.pandas_dtype("category") == CategoricalDtype() + + @pytest.mark.parametrize( + "dtype", + [ + "period[D]", + "period[3M]", + "period[U]", + "Period[D]", + "Period[3M]", + "Period[U]", + ], + ) + def test_period_dtype(self, dtype): + assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == dtype + + +dtypes = { + "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), + "datetime": com.pandas_dtype("datetime64[ns]"), + "timedelta": com.pandas_dtype("timedelta64[ns]"), + "period": PeriodDtype("D"), + "integer": np.dtype(np.int64), + "float": np.dtype(np.float64), + "object": np.dtype(object), + "category": com.pandas_dtype("category"), + "string": pd.StringDtype(), +} + + +@pytest.mark.parametrize("name1,dtype1", list(dtypes.items()), ids=lambda x: str(x)) +@pytest.mark.parametrize("name2,dtype2", list(dtypes.items()), ids=lambda x: str(x)) +def test_dtype_equal(name1, dtype1, name2, dtype2): + + # match equal to self, but not equal to other + assert com.is_dtype_equal(dtype1, dtype1) + if name1 != name2: + assert not com.is_dtype_equal(dtype1, dtype2) + + +@pytest.mark.parametrize("name,dtype", list(dtypes.items()), ids=lambda x: str(x)) +def test_pyarrow_string_import_error(name, dtype): + # GH-44276 + assert not com.is_dtype_equal(dtype, "string[pyarrow]") + + +@pytest.mark.parametrize( + "dtype1,dtype2", + [ + (np.int8, np.int64), + (np.int16, np.int64), + (np.int32, np.int64), + (np.float32, np.float64), + (PeriodDtype("D"), PeriodDtype("2D")), # PeriodType + ( + com.pandas_dtype("datetime64[ns, US/Eastern]"), + com.pandas_dtype("datetime64[ns, CET]"), + ), # Datetime + (None, None), # gh-15941: no exception should be raised. + ], +) +def test_dtype_equal_strict(dtype1, dtype2): + assert not com.is_dtype_equal(dtype1, dtype2) + + +def get_is_dtype_funcs(): + """ + Get all functions in pandas.core.dtypes.common that + begin with 'is_' and end with 'dtype' + + """ + fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] + fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj + return [getattr(com, fname) for fname in fnames] + + +@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) +def test_get_dtype_error_catch(func): + # see gh-15941 + # + # No exception should be raised. + + assert not func(None) + + +def test_is_object(): + assert com.is_object_dtype(object) + assert com.is_object_dtype(np.array([], dtype=object)) + + assert not com.is_object_dtype(int) + assert not com.is_object_dtype(np.array([], dtype=int)) + assert not com.is_object_dtype([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_sparse(check_scipy): + assert com.is_sparse(SparseArray([1, 2, 3])) + + assert not com.is_sparse(np.array([1, 2, 3])) + + if check_scipy: + import scipy.sparse + + assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3])) + + +@td.skip_if_no_scipy +def test_is_scipy_sparse(): + from scipy.sparse import bsr_matrix + + assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) + + assert not com.is_scipy_sparse(SparseArray([1, 2, 3])) + + +def test_is_categorical(): + cat = pd.Categorical([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + assert com.is_categorical(cat) + assert com.is_categorical(pd.Series(cat)) + assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) + + assert not com.is_categorical([1, 2, 3]) + + +def test_is_categorical_deprecation(): + # GH#33385 + with tm.assert_produces_warning(FutureWarning): + com.is_categorical([1, 2, 3]) + + +def test_is_datetime64_dtype(): + assert not com.is_datetime64_dtype(object) + assert not com.is_datetime64_dtype([1, 2, 3]) + assert not com.is_datetime64_dtype(np.array([], dtype=int)) + + assert com.is_datetime64_dtype(np.datetime64) + assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_datetime64tz_dtype(): + assert not com.is_datetime64tz_dtype(object) + assert not com.is_datetime64tz_dtype([1, 2, 3]) + assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_timedelta64_dtype(): + assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype(None) + assert not com.is_timedelta64_dtype([1, 2, 3]) + assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64)) + assert not com.is_timedelta64_dtype("0 days") + assert not com.is_timedelta64_dtype("0 days 00:00:00") + assert not com.is_timedelta64_dtype(["0 days 00:00:00"]) + assert not com.is_timedelta64_dtype("NO DATE") + + assert com.is_timedelta64_dtype(np.timedelta64) + assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + assert com.is_timedelta64_dtype(pd.to_timedelta(["0 days", "1 days"])) + + +def test_is_period_dtype(): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) + + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + + +def test_is_interval_dtype(): + assert not com.is_interval_dtype(object) + assert not com.is_interval_dtype([1, 2, 3]) + + assert com.is_interval_dtype(IntervalDtype()) + + interval = pd.Interval(1, 2, closed="right") + assert not com.is_interval_dtype(interval) + assert com.is_interval_dtype(pd.IntervalIndex([interval])) + + +def test_is_categorical_dtype(): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) + + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + + +def test_is_string_dtype(): + assert not com.is_string_dtype(int) + assert not com.is_string_dtype(pd.Series([1, 2])) + + assert com.is_string_dtype(str) + assert com.is_string_dtype(object) + assert com.is_string_dtype(np.array(["a", "b"])) + assert com.is_string_dtype(pd.StringDtype()) + + +def test_is_string_dtype_nullable(nullable_string_dtype): + assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype)) + + +integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + integer_dtypes + + [pd.Series([1, 2])] + + tm.ALL_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.ALL_INT_NUMPY_DTYPES) + + tm.ALL_INT_EA_DTYPES + + to_ea_dtypes(tm.ALL_INT_EA_DTYPES), +) +def test_is_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ], +) +def test_is_not_integer_dtype(dtype): + assert not com.is_integer_dtype(dtype) + + +signed_integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + signed_integer_dtypes + + [pd.Series([1, 2])] + + tm.SIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_NUMPY_DTYPES) + + tm.SIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.SIGNED_INT_EA_DTYPES), +) +def test_is_signed_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + tm.UNSIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_NUMPY_DTYPES) + + tm.UNSIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.UNSIGNED_INT_EA_DTYPES), +) +def test_is_not_signed_integer_dtype(dtype): + assert not com.is_signed_integer_dtype(dtype) + + +unsigned_integer_dtypes: list = [] + + +@pytest.mark.parametrize( + "dtype", + unsigned_integer_dtypes + + [pd.Series([1, 2], dtype=np.uint32)] + + tm.UNSIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.UNSIGNED_INT_NUMPY_DTYPES) + + tm.UNSIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.UNSIGNED_INT_EA_DTYPES), +) +def test_is_unsigned_integer_dtype(dtype): + assert com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + tm.SIGNED_INT_NUMPY_DTYPES + + to_numpy_dtypes(tm.SIGNED_INT_NUMPY_DTYPES) + + tm.SIGNED_INT_EA_DTYPES + + to_ea_dtypes(tm.SIGNED_INT_EA_DTYPES), +) +def test_is_not_unsigned_integer_dtype(dtype): + assert not com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", [np.int64, np.array([1, 2], dtype=np.int64), "Int64", pd.Int64Dtype] +) +def test_is_int64_dtype(dtype): + assert com.is_int64_dtype(dtype) + + +def test_type_comparison_with_numeric_ea_dtype(any_numeric_ea_dtype): + # GH#43038 + assert pandas_dtype(any_numeric_ea_dtype) == any_numeric_ea_dtype + + +def test_type_comparison_with_real_numpy_dtype(any_real_numpy_dtype): + # GH#43038 + assert pandas_dtype(any_real_numpy_dtype) == any_real_numpy_dtype + + +def test_type_comparison_with_signed_int_ea_dtype_and_signed_int_numpy_dtype( + any_signed_int_ea_dtype, any_signed_int_numpy_dtype +): + # GH#43038 + assert not pandas_dtype(any_signed_int_ea_dtype) == any_signed_int_numpy_dtype + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.int32, + np.uint64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([1, 2], dtype=np.uint32), + "int8", + "Int8", + pd.Int8Dtype, + ], +) +def test_is_not_int64_dtype(dtype): + assert not com.is_int64_dtype(dtype) + + +def test_is_datetime64_any_dtype(): + assert not com.is_datetime64_any_dtype(int) + assert not com.is_datetime64_any_dtype(str) + assert not com.is_datetime64_any_dtype(np.array([1, 2])) + assert not com.is_datetime64_any_dtype(np.array(["a", "b"])) + + assert com.is_datetime64_any_dtype(np.datetime64) + assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_any_dtype( + pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]") + ) + + +def test_is_datetime64_ns_dtype(): + assert not com.is_datetime64_ns_dtype(int) + assert not com.is_datetime64_ns_dtype(str) + assert not com.is_datetime64_ns_dtype(np.datetime64) + assert not com.is_datetime64_ns_dtype(np.array([1, 2])) + assert not com.is_datetime64_ns_dtype(np.array(["a", "b"])) + assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) + + # This datetime array has the wrong unit (ps instead of ns) + assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) + + assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_ns_dtype( + pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]")) + ) + + # non-nano dt64tz + assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern")) + + +def test_is_timedelta64_ns_dtype(): + assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]")) + assert not com.is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + + assert com.is_timedelta64_ns_dtype(np.dtype("m8[ns]")) + assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype="m8[ns]")) + + +def test_is_datetime_or_timedelta_dtype(): + assert not com.is_datetime_or_timedelta_dtype(int) + assert not com.is_datetime_or_timedelta_dtype(str) + assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"])) + + # TODO(jreback), this is slightly suspect + assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern")) + + assert com.is_datetime_or_timedelta_dtype(np.datetime64) + assert com.is_datetime_or_timedelta_dtype(np.timedelta64) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(np.array([1]), 1) + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + +def test_needs_i8_conversion(): + assert not com.needs_i8_conversion(str) + assert not com.needs_i8_conversion(np.int64) + assert not com.needs_i8_conversion(pd.Series([1, 2])) + assert not com.needs_i8_conversion(np.array(["a", "b"])) + + assert com.needs_i8_conversion(np.datetime64) + assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + assert com.needs_i8_conversion(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_numeric_dtype(): + assert not com.is_numeric_dtype(str) + assert not com.is_numeric_dtype(np.datetime64) + assert not com.is_numeric_dtype(np.timedelta64) + assert not com.is_numeric_dtype(np.array(["a", "b"])) + assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_numeric_dtype(int) + assert com.is_numeric_dtype(float) + assert com.is_numeric_dtype(np.uint64) + assert com.is_numeric_dtype(pd.Series([1, 2])) + assert com.is_numeric_dtype(pd.Index([1, 2.0])) + + +def test_is_float_dtype(): + assert not com.is_float_dtype(str) + assert not com.is_float_dtype(int) + assert not com.is_float_dtype(pd.Series([1, 2])) + assert not com.is_float_dtype(np.array(["a", "b"])) + + assert com.is_float_dtype(float) + assert com.is_float_dtype(pd.Index([1, 2.0])) + + +def test_is_bool_dtype(): + assert not com.is_bool_dtype(int) + assert not com.is_bool_dtype(str) + assert not com.is_bool_dtype(pd.Series([1, 2])) + assert not com.is_bool_dtype(pd.Series(["a", "b"], dtype="category")) + assert not com.is_bool_dtype(np.array(["a", "b"])) + assert not com.is_bool_dtype(pd.Index(["a", "b"])) + assert not com.is_bool_dtype("Int64") + + assert com.is_bool_dtype(bool) + assert com.is_bool_dtype(np.bool_) + assert com.is_bool_dtype(pd.Series([True, False], dtype="category")) + assert com.is_bool_dtype(np.array([True, False])) + assert com.is_bool_dtype(pd.Index([True, False])) + + assert com.is_bool_dtype(pd.BooleanDtype()) + assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) + assert com.is_bool_dtype("boolean") + + +def test_is_bool_dtype_numpy_error(): + # GH39010 + assert not com.is_bool_dtype("0 - Name") + + +@pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_type(check_scipy): + assert not com.is_extension_type([1, 2, 3]) + assert not com.is_extension_type(np.array([1, 2, 3])) + assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_type(cat) + assert com.is_extension_type(pd.Series(cat)) + assert com.is_extension_type(SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_type(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_extension_type_deprecation(): + with tm.assert_produces_warning(FutureWarning): + com.is_extension_type([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_array_dtype(check_scipy): + assert not com.is_extension_array_dtype([1, 2, 3]) + assert not com.is_extension_array_dtype(np.array([1, 2, 3])) + assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_array_dtype(cat) + assert com.is_extension_array_dtype(pd.Series(cat)) + assert com.is_extension_array_dtype(SparseArray([1, 2, 3])) + assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_array_dtype(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_complex_dtype(): + assert not com.is_complex_dtype(int) + assert not com.is_complex_dtype(str) + assert not com.is_complex_dtype(pd.Series([1, 2])) + assert not com.is_complex_dtype(np.array(["a", "b"])) + + assert com.is_complex_dtype(np.complex_) + assert com.is_complex_dtype(complex) + assert com.is_complex_dtype(np.array([1 + 1j, 5])) + + +@pytest.mark.parametrize( + "input_param,result", + [ + (int, np.dtype(int)), + ("int32", np.dtype("int32")), + (float, np.dtype(float)), + ("float64", np.dtype("float64")), + (np.dtype("float64"), np.dtype("float64")), + (str, np.dtype(str)), + (pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")), + (pd.Series(["a", "b"]), np.dtype(object)), + (pd.Index([1, 2]), np.dtype("int64")), + (pd.Index(["a", "b"]), np.dtype(object)), + ("category", "category"), + (pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]), CategoricalDtype(["a", "b"])), + (CategoricalDtype(), CategoricalDtype()), + (pd.DatetimeIndex([1, 2]), np.dtype("=M8[ns]")), + (pd.DatetimeIndex([1, 2]).dtype, np.dtype("=M8[ns]")), + (" df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py new file mode 100644 index 00000000..948d14c1 --- /dev/null +++ b/pandas/tests/dtypes/test_inference.py @@ -0,0 +1,2018 @@ +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" +import collections +from collections import namedtuple +from datetime import ( + date, + datetime, + time, + timedelta, +) +from decimal import Decimal +from fractions import Fraction +from io import StringIO +import itertools +from numbers import Number +import re +import sys +from typing import ( + Generic, + TypeVar, +) + +import numpy as np +import pytest +import pytz + +from pandas._libs import ( + lib, + missing as libmissing, + ops as libops, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes import inference +from pandas.core.dtypes.common import ( + ensure_int32, + is_bool, + is_complex, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_number, + is_scalar, + is_scipy_sparse, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DateOffset, + DatetimeIndex, + Index, + Interval, + Period, + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import ( + BooleanArray, + FloatingArray, + IntegerArray, +) + + +@pytest.fixture(params=[True, False], ids=str) +def coerce(request): + return request.param + + +class MockNumpyLikeArray: + """ + A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy + + The key is that it is not actually a numpy array so + ``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other + important properties are that the class defines a :meth:`__iter__` method + (so that ``isinstance(abc.Iterable)`` returns ``True``) and has a + :meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some + cases. + + We expect pandas to behave with respect to such duck arrays exactly as + with real numpy arrays. In particular, a 0-dimensional duck array is *NOT* + a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either. + """ + + def __init__(self, values) -> None: + self._values = values + + def __iter__(self): + iter_values = iter(self._values) + + def it_outer(): + yield from iter_values + + return it_outer() + + def __len__(self): + return len(self._values) + + def __array__(self, t=None): + return np.asarray(self._values, dtype=t) + + @property + def ndim(self): + return self._values.ndim + + @property + def dtype(self): + return self._values.dtype + + @property + def size(self): + return self._values.size + + @property + def shape(self): + return self._values.shape + + +# collect all objects to be tested for list-like-ness; use tuples of objects, +# whether they are list-like or not (special casing for sets), and their ID +ll_params = [ + ([1], True, "list"), + ([], True, "list-empty"), + ((1,), True, "tuple"), + ((), True, "tuple-empty"), + ({"a": 1}, True, "dict"), + ({}, True, "dict-empty"), + ({"a", 1}, "set", "set"), + (set(), "set", "set-empty"), + (frozenset({"a", 1}), "set", "frozenset"), + (frozenset(), "set", "frozenset-empty"), + (iter([1, 2]), True, "iterator"), + (iter([]), True, "iterator-empty"), + ((x for x in [1, 2]), True, "generator"), + ((_ for _ in []), True, "generator-empty"), + (Series([1]), True, "Series"), + (Series([], dtype=object), True, "Series-empty"), + (Series(["a"]).str, True, "StringMethods"), + (Series([], dtype="O").str, True, "StringMethods-empty"), + (Index([1]), True, "Index"), + (Index([]), True, "Index-empty"), + (DataFrame([[1]]), True, "DataFrame"), + (DataFrame(), True, "DataFrame-empty"), + (np.ndarray((2,) * 1), True, "ndarray-1d"), + (np.array([]), True, "ndarray-1d-empty"), + (np.ndarray((2,) * 2), True, "ndarray-2d"), + (np.array([[]]), True, "ndarray-2d-empty"), + (np.ndarray((2,) * 3), True, "ndarray-3d"), + (np.array([[[]]]), True, "ndarray-3d-empty"), + (np.ndarray((2,) * 4), True, "ndarray-4d"), + (np.array([[[[]]]]), True, "ndarray-4d-empty"), + (np.array(2), False, "ndarray-0d"), + (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"), + (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"), + (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"), + (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"), + (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"), + (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"), + (1, False, "int"), + (b"123", False, "bytes"), + (b"", False, "bytes-empty"), + ("123", False, "string"), + ("", False, "string-empty"), + (str, False, "string-type"), + (object(), False, "object"), + (np.nan, False, "NaN"), + (None, False, "None"), +] +objs, expected, ids = zip(*ll_params) + + +@pytest.fixture(params=zip(objs, expected), ids=ids) +def maybe_list_like(request): + return request.param + + +def test_is_list_like(maybe_list_like): + obj, expected = maybe_list_like + expected = True if expected == "set" else expected + assert inference.is_list_like(obj) == expected + + +def test_is_list_like_disallow_sets(maybe_list_like): + obj, expected = maybe_list_like + expected = False if expected == "set" else expected + assert inference.is_list_like(obj, allow_sets=False) == expected + + +def test_is_list_like_recursion(): + # GH 33721 + # interpreter would crash with SIGABRT + def foo(): + inference.is_list_like([]) + foo() + + rec_limit = sys.getrecursionlimit() + try: + # Limit to avoid stack overflow on Windows CI + sys.setrecursionlimit(100) + with tm.external_error_raised(RecursionError): + foo() + finally: + sys.setrecursionlimit(rec_limit) + + +def test_is_list_like_iter_is_none(): + # GH 43373 + # is_list_like was yielding false positives with __iter__ == None + class NotListLike: + def __getitem__(self, item): + return self + + __iter__ = None + + assert not inference.is_list_like(NotListLike()) + + +def test_is_list_like_generic(): + # GH 49649 + # is_list_like was yielding false positives for Generic classes in python 3.11 + T = TypeVar("T") + + class MyDataFrame(DataFrame, Generic[T]): + ... + + tstc = MyDataFrame[int] + tst = MyDataFrame[int]({"x": [1, 2, 3]}) + + assert not inference.is_list_like(tstc) + assert isinstance(tst, DataFrame) + assert inference.is_list_like(tst) + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert is_seq((1, 2)) + assert is_seq([1, 2]) + assert not is_seq("abcd") + assert not is_seq(np.int64) + + class A: + def __getitem__(self, item): + return 1 + + assert not is_seq(A()) + + +def test_is_array_like(): + assert inference.is_array_like(Series([], dtype=object)) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + assert inference.is_array_like(np.array([2, 3])) + assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3]))) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + +@pytest.mark.parametrize( + "inner", + [ + [], + [1], + (1,), + (1, 2), + {"a": 1}, + {1, "a"}, + Series([1]), + Series([], dtype=object), + Series(["a"]).str, + (x for x in range(5)), + ], +) +@pytest.mark.parametrize("outer", [list, Series, np.array, tuple]) +def test_is_nested_list_like_passes(inner, outer): + result = outer([inner for _ in range(5)]) + assert inference.is_list_like(result) + + +@pytest.mark.parametrize( + "obj", + [ + "abc", + [], + [1], + (1,), + ["a"], + "a", + {"a"}, + [1, 2, 3], + Series([1]), + DataFrame({"A": [1]}), + ([1, 2] for _ in range(5)), + ], +) +def test_is_nested_list_like_fails(obj): + assert not inference.is_nested_list_like(obj) + + +@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()]) +def test_is_dict_like_passes(ll): + assert inference.is_dict_like(ll) + + +@pytest.mark.parametrize( + "ll", + [ + "1", + 1, + [1, 2], + (1, 2), + range(2), + Index([1]), + dict, + collections.defaultdict, + Series, + ], +) +def test_is_dict_like_fails(ll): + assert not inference.is_dict_like(ll) + + +@pytest.mark.parametrize("has_keys", [True, False]) +@pytest.mark.parametrize("has_getitem", [True, False]) +@pytest.mark.parametrize("has_contains", [True, False]) +def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): + class DictLike: + def __init__(self, d) -> None: + self.d = d + + if has_keys: + + def keys(self): + return self.d.keys() + + if has_getitem: + + def __getitem__(self, key): + return self.d.__getitem__(key) + + if has_contains: + + def __contains__(self, key) -> bool: + return self.d.__contains__(key) + + d = DictLike({1: 2}) + result = inference.is_dict_like(d) + expected = has_keys and has_getitem and has_contains + + assert result is expected + + +def test_is_file_like(): + class MockFile: + pass + + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + # gh-16530: Valid iterator just means we have the + # __iter__ attribute for our purposes. + MockFile.__iter__ = lambda self: self + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes + data = [1, 2, 3] + assert not is_file(data) + + +test_tuple = collections.namedtuple("test_tuple", ["a", "b", "c"]) + + +@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)]) +def test_is_names_tuple_passes(ll): + assert inference.is_named_tuple(ll) + + +@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})]) +def test_is_names_tuple_fails(ll): + assert not inference.is_named_tuple(ll) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass: + pass + + class UnhashableClass1: + __hash__ = None + + class UnhashableClass2: + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass()) + not_hashable = ([], UnhashableClass1()) + abc_hashable_not_really_hashable = (([],), UnhashableClass2()) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.abc.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + +@pytest.mark.parametrize("ll", [re.compile("ad")]) +def test_is_re_passes(ll): + assert inference.is_re(ll) + + +@pytest.mark.parametrize("ll", ["x", 2, 3, object()]) +def test_is_re_fails(ll): + assert not inference.is_re(ll) + + +@pytest.mark.parametrize( + "ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")] +) +def test_is_recompilable_passes(ll): + assert inference.is_re_compilable(ll) + + +@pytest.mark.parametrize("ll", [1, [], object()]) +def test_is_recompilable_fails(ll): + assert not inference.is_re_compilable(ll) + + +class TestInference: + @pytest.mark.parametrize( + "arr", + [ + np.array(list("abc"), dtype="S1"), + np.array(list("abc"), dtype="S1").astype(object), + [b"a", np.nan, b"c"], + ], + ) + def test_infer_dtype_bytes(self, arr): + result = lib.infer_dtype(arr, skipna=True) + assert result == "bytes" + + @pytest.mark.parametrize( + "value, expected", + [ + (float("inf"), True), + (np.inf, True), + (-np.inf, False), + (1, False), + ("a", False), + ], + ) + def test_isposinf_scalar(self, value, expected): + # GH 11352 + result = libmissing.isposinf_scalar(value) + assert result is expected + + @pytest.mark.parametrize( + "value, expected", + [ + (float("-inf"), True), + (-np.inf, True), + (np.inf, False), + (1, False), + ("a", False), + ], + ) + def test_isneginf_scalar(self, value, expected): + result = libmissing.isneginf_scalar(value) + assert result is expected + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + ( + True, + BooleanArray( + np.array([True, False], dtype="bool"), np.array([False, True]) + ), + ), + (False, np.array([True, np.nan], dtype="object")), + ], + ) + def test_maybe_convert_nullable_boolean(self, convert_to_masked_nullable, exp): + # GH 40687 + arr = np.array([True, np.NaN], dtype=object) + result = libops.maybe_convert_bool( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + tm.assert_extension_array_equal(BooleanArray(*result), exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + @pytest.mark.parametrize("coerce_numeric", [True, False]) + @pytest.mark.parametrize( + "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] + ) + @pytest.mark.parametrize("prefix", ["", "-", "+"]) + def test_maybe_convert_numeric_infinities( + self, coerce_numeric, infinity, prefix, convert_to_masked_nullable + ): + # see gh-13274 + result, _ = lib.maybe_convert_numeric( + np.array([prefix + infinity], dtype=object), + na_values={"", "NULL", "nan"}, + coerce_numeric=coerce_numeric, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + expected = np.array([np.inf if prefix in ["", "+"] else -np.inf]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_maybe_convert_numeric_infinities_raises(self, convert_to_masked_nullable): + msg = "Unable to parse string" + with pytest.raises(ValueError, match=msg): + lib.maybe_convert_numeric( + np.array(["foo_inf"], dtype=object), + na_values={"", "NULL", "nan"}, + coerce_numeric=False, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_maybe_convert_numeric_post_floatify_nan( + self, coerce, convert_to_masked_nullable + ): + # see gh-13314 + data = np.array(["1.200", "-999.000", "4.500"], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = {-999, -999.0} + + out = lib.maybe_convert_numeric( + data, + nan_values, + coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable: + expected = FloatingArray(expected, np.isnan(expected)) + tm.assert_extension_array_equal(expected, FloatingArray(*out)) + else: + out = out[0] + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(["inf", "inf", "inf"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + arr = np.array(["-inf", "-inf", "-inf"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(["42E", "2E", "99e", "6e"], dtype="O") + result, _ = lib.maybe_convert_numeric(arr, set(), False, True) + assert np.all(np.isnan(result)) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object) + result, _ = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + def test_convert_numeric_uint64(self): + arr = np.array([2**63], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + arr = np.array([str(2**63)], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + arr = np.array([np.uint64(2**63)], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set())[0], exp) + + @pytest.mark.parametrize( + "arr", + [ + np.array([2**63, np.nan], dtype=object), + np.array([str(2**63), np.nan], dtype=object), + np.array([np.nan, 2**63], dtype=object), + np.array([np.nan, str(2**63)], dtype=object), + ], + ) + def test_convert_numeric_uint64_nan(self, coerce, arr): + expected = arr.astype(float) if coerce else arr.copy() + result, _ = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_uint64_nan_values( + self, coerce, convert_to_masked_nullable + ): + arr = np.array([2**63, 2**63 + 1], dtype=object) + na_values = {2**63} + + expected = ( + np.array([np.nan, 2**63 + 1], dtype=float) if coerce else arr.copy() + ) + result = lib.maybe_convert_numeric( + arr, + na_values, + coerce_numeric=coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable and coerce: + expected = IntegerArray( + np.array([0, 2**63 + 1], dtype="u8"), + np.array([True, False], dtype="bool"), + ) + result = IntegerArray(*result) + else: + result = result[0] # discard mask + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "case", + [ + np.array([2**63, -1], dtype=object), + np.array([str(2**63), -1], dtype=object), + np.array([str(2**63), str(-1)], dtype=object), + np.array([-1, 2**63], dtype=object), + np.array([-1, str(2**63)], dtype=object), + np.array([str(-1), str(2**63)], dtype=object), + ], + ) + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_int64_uint64( + self, case, coerce, convert_to_masked_nullable + ): + expected = case.astype(float) if coerce else case.copy() + result, _ = lib.maybe_convert_numeric( + case, + set(), + coerce_numeric=coerce, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("convert_to_masked_nullable", [True, False]) + def test_convert_numeric_string_uint64(self, convert_to_masked_nullable): + # GH32394 + result = lib.maybe_convert_numeric( + np.array(["uint64"], dtype=object), + set(), + coerce_numeric=True, + convert_to_masked_nullable=convert_to_masked_nullable, + ) + if convert_to_masked_nullable: + result = FloatingArray(*result) + else: + result = result[0] + assert np.isnan(result) + + @pytest.mark.parametrize("value", [-(2**63) - 1, 2**64]) + def test_convert_int_overflow(self, value): + # see gh-18584 + arr = np.array([value], dtype=object) + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(arr, result) + + @pytest.mark.parametrize( + "value, expected_dtype", + [ + # see gh-4471 + ([2**63], np.uint64), + # NumPy bug: can't compare uint64 to int64, as that + # results in both casting to float64, so we should + # make sure that this function is robust against it + ([np.uint64(2**63)], np.uint64), + ([2, -1], np.int64), + ([2**63, -1], object), + # GH#47294 + ([np.uint8(1)], np.uint8), + ([np.uint16(1)], np.uint16), + ([np.uint32(1)], np.uint32), + ([np.uint64(1)], np.uint64), + ([np.uint8(2), np.uint16(1)], np.uint16), + ([np.uint32(2), np.uint16(1)], np.uint32), + ([np.uint32(2), -1], object), + ([np.uint32(2), 1], np.uint64), + ([np.uint32(2), np.int32(1)], object), + ], + ) + def test_maybe_convert_objects_uint(self, value, expected_dtype): + arr = np.array(value, dtype=object) + exp = np.array(value, dtype=expected_dtype) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + def test_maybe_convert_objects_datetime(self): + # GH27438 + arr = np.array( + [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object + ) + exp = arr.copy() + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) + exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + # with convert_timedelta=True, the nan is a valid NA value for td64 + arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) + exp = exp[::-1] + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + tm.assert_numpy_array_equal(out, exp) + + def test_maybe_convert_objects_dtype_if_all_nat(self): + arr = np.array([pd.NaT, pd.NaT], dtype=object) + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + # no dtype_if_all_nat passed -> we dont guess + tm.assert_numpy_array_equal(out, arr) + + out = lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("timedelta64[ns]"), + ) + exp = np.array(["NaT", "NaT"], dtype="timedelta64[ns]") + tm.assert_numpy_array_equal(out, exp) + + out = lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("datetime64[ns]"), + ) + exp = np.array(["NaT", "NaT"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(out, exp) + + def test_maybe_convert_objects_dtype_if_all_nat_invalid(self): + # we accept datetime64[ns], timedelta64[ns], and EADtype + arr = np.array([pd.NaT, pd.NaT], dtype=object) + + with pytest.raises(ValueError, match="int64"): + lib.maybe_convert_objects( + arr, + convert_datetime=True, + convert_timedelta=True, + dtype_if_all_nat=np.dtype("int64"), + ) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_maybe_convert_objects_datetime_overflow_safe(self, dtype): + stamp = datetime(2363, 10, 4) # Enterprise-D launch date + if dtype == "timedelta64[ns]": + stamp = stamp - datetime(1970, 1, 1) + arr = np.array([stamp], dtype=object) + + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) + # no OutOfBoundsDatetime/OutOfBoundsTimedeltas + tm.assert_numpy_array_equal(out, arr) + + def test_maybe_convert_objects_mixed_datetimes(self): + ts = Timestamp("now") + vals = [ts, ts.to_pydatetime(), ts.to_datetime64(), pd.NaT, np.nan, None] + + for data in itertools.permutations(vals): + data = np.array(list(data), dtype=object) + expected = DatetimeIndex(data)._data._ndarray + result = lib.maybe_convert_objects(data, convert_datetime=True) + tm.assert_numpy_array_equal(result, expected) + + def test_maybe_convert_objects_timedelta64_nat(self): + obj = np.timedelta64("NaT", "ns") + arr = np.array([obj], dtype=object) + assert arr[0] is obj + + result = lib.maybe_convert_objects(arr, convert_timedelta=True) + + expected = np.array([obj], dtype="m8[ns]") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "exp", + [ + IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), + IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), + ], + ) + def test_maybe_convert_objects_nullable_integer(self, exp): + # GH27335 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True) + + tm.assert_extension_array_equal(result, exp) + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + (True, IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True]))), + (False, np.array([2, np.nan], dtype="float64")), + ], + ) + def test_maybe_convert_numeric_nullable_integer( + self, convert_to_masked_nullable, exp + ): + # GH 40687 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_numeric( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + result = IntegerArray(*result) + tm.assert_extension_array_equal(result, exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize( + "convert_to_masked_nullable, exp", + [ + ( + True, + FloatingArray( + np.array([2.0, 0.0], dtype="float64"), np.array([False, True]) + ), + ), + (False, np.array([2.0, np.nan], dtype="float64")), + ], + ) + def test_maybe_convert_numeric_floating_array( + self, convert_to_masked_nullable, exp + ): + # GH 40687 + arr = np.array([2.0, np.nan], dtype=object) + result = lib.maybe_convert_numeric( + arr, set(), convert_to_masked_nullable=convert_to_masked_nullable + ) + if convert_to_masked_nullable: + tm.assert_extension_array_equal(FloatingArray(*result), exp) + else: + result = result[0] + tm.assert_numpy_array_equal(result, exp) + + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + ind = Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + out = lib.maybe_convert_objects(ind.values, safe=1) + tm.assert_numpy_array_equal(out, exp) + + @pytest.mark.parametrize( + "data0", + [ + True, + 1, + 1.0, + 1.0 + 1.0j, + np.int8(1), + np.int16(1), + np.int32(1), + np.int64(1), + np.float16(1), + np.float32(1), + np.float64(1), + np.complex64(1), + np.complex128(1), + ], + ) + @pytest.mark.parametrize( + "data1", + [ + True, + 1, + 1.0, + 1.0 + 1.0j, + np.int8(1), + np.int16(1), + np.int32(1), + np.int64(1), + np.float16(1), + np.float32(1), + np.float64(1), + np.complex64(1), + np.complex128(1), + ], + ) + def test_maybe_convert_objects_itemsize(self, data0, data1): + # GH 40908 + data = [data0, data1] + arr = np.array(data, dtype="object") + + common_kind = np.find_common_type( + [type(data0), type(data1)], scalar_types=[] + ).kind + kind0 = "python" if not hasattr(data0, "dtype") else data0.dtype.kind + kind1 = "python" if not hasattr(data1, "dtype") else data1.dtype.kind + if kind0 != "python" and kind1 != "python": + kind = common_kind + itemsize = max(data0.dtype.itemsize, data1.dtype.itemsize) + elif is_bool(data0) or is_bool(data1): + kind = "bool" if (is_bool(data0) and is_bool(data1)) else "object" + itemsize = "" + elif is_complex(data0) or is_complex(data1): + kind = common_kind + itemsize = 16 + else: + kind = common_kind + itemsize = 8 + + expected = np.array(data, dtype=f"{kind}{itemsize}") + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_mixed_dtypes_remain_object_array(self): + # GH14956 + arr = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) + result = lib.maybe_convert_objects(arr, convert_datetime=True) + tm.assert_numpy_array_equal(result, arr) + + @pytest.mark.parametrize( + "idx", + [ + pd.IntervalIndex.from_breaks(range(5), closed="both"), + pd.period_range("2016-01-01", periods=3, freq="D"), + ], + ) + def test_maybe_convert_objects_ea(self, idx): + + result = lib.maybe_convert_objects( + np.array(idx, dtype=object), + convert_period=True, + convert_interval=True, + ) + tm.assert_extension_array_equal(result, idx._data) + + +class TestTypeInference: + + # Dummy class used for testing with Python objects + class Dummy: + pass + + def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype): + # see pandas/conftest.py + inferred_dtype, values = any_skipna_inferred_dtype + + # make sure the inferred dtype of the fixture is as requested + assert inferred_dtype == lib.infer_dtype(values, skipna=True) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_length_zero(self, skipna): + result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna) + assert result == "integer" + + result = lib.infer_dtype([], skipna=skipna) + assert result == "empty" + + # GH 18004 + arr = np.array([np.array([], dtype=object), np.array([], dtype=object)]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "empty" + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="i4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + @pytest.mark.parametrize( + "arr, skipna", + [ + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ], + ) + def test_integer_na(self, arr, skipna): + # GH 27392 + result = lib.infer_dtype(arr, skipna=skipna) + expected = "integer" if skipna else "integer-na" + assert result == expected + + def test_infer_dtype_skipna_default(self): + # infer_dtype `skipna` default deprecated in GH#24050, + # changed to True in GH#29876 + arr = np.array([1, 2, 3, np.nan], dtype=object) + + result = lib.infer_dtype(arr) + assert result == "integer" + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([np.bool_(True), np.bool_(False)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, False, True, "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, np.nan, False], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + + def test_floats(self): + arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="f4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, 4, 5], dtype="f8") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + def test_decimals(self): + # GH15690 + arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([1.0, 2.0, Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + result = lib.infer_dtype(arr[::-1], skipna=True) + assert result == "mixed" + + arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + # complex is compatible with nan, so skipna has no effect + @pytest.mark.parametrize("skipna", [True, False]) + def test_complex(self, skipna): + # gets cast to complex on array construction + arr = np.array([1.0, 2.0, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, 2.0, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + result = lib.infer_dtype(arr[::-1], skipna=skipna) + assert result == "mixed" + + # gets cast to complex on array construction + arr = np.array([1, np.nan, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, np.nan, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + # complex with nans stays complex + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + # test smaller complex dtype; will pass through _try_infer_map fastpath + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + def test_string(self): + pass + + def test_unicode(self): + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=False) + # This currently returns "mixed", but it's not clear that's optimal. + # This could also return "string" or "mixed-string" + assert result == "mixed" + + # even though we use skipna, we are only skipping those NAs that are + # considered matching by is_string_array + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=True) + assert result == "string" + + arr = ["a", pd.NA, "c"] + result = lib.infer_dtype(arr, skipna=True) + assert result == "string" + + arr = ["a", pd.NaT, "c"] + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + arr = ["a", "c"] + result = lib.infer_dtype(arr, skipna=False) + assert result == "string" + + @pytest.mark.parametrize( + "dtype, missing, skipna, expected", + [ + (float, np.nan, False, "floating"), + (float, np.nan, True, "floating"), + (object, np.nan, False, "floating"), + (object, np.nan, True, "empty"), + (object, None, False, "mixed"), + (object, None, True, "empty"), + ], + ) + @pytest.mark.parametrize("box", [Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "datetime64" + + def test_infer_dtype_datetime64(self): + arr = np.array( + [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_datetime64_with_na(self, na_value): + # starts with nan + arr = np.array([na_value, np.datetime64("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + arr = np.array([na_value, np.datetime64("2011-01-02"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + @pytest.mark.parametrize( + "arr", + [ + np.array( + [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object + ), + np.array( + [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object + ), + np.array([np.datetime64("2011-01-01"), Timestamp("2011-01-02")]), + np.array([Timestamp("2011-01-02"), np.datetime64("2011-01-01")]), + np.array([np.nan, Timestamp("2011-01-02"), 1.1]), + np.array([np.nan, "2011-01-01", Timestamp("2011-01-02")], dtype=object), + np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object), + np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object), + ], + ) + def test_infer_datetimelike_dtype_mixed(self, arr): + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_infer_dtype_mixed_integer(self): + arr = np.array([np.nan, Timestamp("2011-01-02"), 1]) + assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" + + @pytest.mark.parametrize( + "arr", + [ + np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), + np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), + np.array([datetime(2011, 1, 1), Timestamp("2011-01-02")]), + ], + ) + def test_infer_dtype_datetime(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "time_stamp", [Timestamp("2011-01-01"), datetime(2011, 1, 1)] + ) + def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): + # starts with nan + arr = np.array([na_value, time_stamp]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array([na_value, time_stamp, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + @pytest.mark.parametrize( + "arr", + [ + np.array([Timedelta("1 days"), Timedelta("2 days")]), + np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), + np.array([timedelta(1), timedelta(2)]), + ], + ) + def test_infer_dtype_timedelta(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] + ) + def test_infer_dtype_timedelta_with_na(self, na_value, delta): + # starts with nan + arr = np.array([na_value, delta]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([na_value, delta, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + def test_infer_dtype_period(self): + # GH 13664 + arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + # non-homogeneous freqs -> mixed + arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + @pytest.mark.parametrize("klass", [pd.array, Series, Index]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_period_array(self, klass, skipna): + # https://github.com/pandas-dev/pandas/issues/23553 + values = klass( + [ + Period("2011-01-01", freq="D"), + Period("2011-01-02", freq="D"), + pd.NaT, + ] + ) + assert lib.infer_dtype(values, skipna=skipna) == "period" + + # periods but mixed freq + values = klass( + [ + Period("2011-01-01", freq="D"), + Period("2011-01-02", freq="M"), + pd.NaT, + ] + ) + # with pd.array this becomes PandasArray which ends up as "unknown-array" + exp = "unknown-array" if klass is pd.array else "mixed" + assert lib.infer_dtype(values, skipna=skipna) == exp + + def test_infer_dtype_period_mixed(self): + arr = np.array( + [Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array( + [np.datetime64("nat"), Period("2011-01", freq="M")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_period_with_na(self, na_value): + # starts with nan + arr = np.array([na_value, Period("2011-01", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([na_value, Period("2011-01", freq="D"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [ + Timestamp("20170612", tz="US/Eastern"), + Timestamp("20170311", tz="US/Eastern"), + ], + [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], + ], + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == ("datetime", False) + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], + ], + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == ("timedelta", False) + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == ("date", False) + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], + [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + ], + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data)[0] == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"], + ], + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == (expected, False) + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "floating" + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([None, np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # pd.NaT + arr = np.array([pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([None, pd.NaT, None]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + # np.datetime64(nat) + arr = np.array([np.datetime64("nat")]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([pd.NaT, n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([np.timedelta64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + arr = np.array([pd.NaT, n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64("nat")]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, np.nan], dtype=object) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + assert lib.is_datetime_with_singletz_array( + np.array( + [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130102", tz="US/Eastern"), + ], + dtype=object, + ) + ) + assert not lib.is_datetime_with_singletz_array( + np.array( + [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130102", tz="CET"), + ], + dtype=object, + ) + ) + + @pytest.mark.parametrize( + "func", + [ + "is_datetime_array", + "is_datetime64_array", + "is_bool_array", + "is_timedelta_or_timedelta64_array", + "is_date_array", + "is_time_array", + "is_interval_array", + ], + ) + def test_other_dtypes_for_array(self, func): + func = getattr(lib, func) + arr = np.array(["foo", "bar"]) + assert not func(arr) + assert not func(arr.reshape(2, 1)) + + arr = np.array([1, 2]) + assert not func(arr) + assert not func(arr.reshape(2, 1)) + + def test_date(self): + + dates = [date(2012, 1, day) for day in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "date" + + dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] + result = lib.infer_dtype(dates, skipna=False) + assert result == "mixed" + + result = lib.infer_dtype(dates, skipna=True) + assert result == "date" + + @pytest.mark.parametrize( + "values", + [ + [date(2020, 1, 1), Timestamp("2020-01-01")], + [Timestamp("2020-01-01"), date(2020, 1, 1)], + [date(2020, 1, 1), pd.NaT], + [pd.NaT, date(2020, 1, 1)], + ], + ) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_date_order_invariant(self, values, skipna): + # https://github.com/pandas-dev/pandas/issues/33741 + result = lib.infer_dtype(values, skipna=skipna) + assert result == "date" + + def test_is_numeric_array(self): + + assert lib.is_float_array(np.array([1, 2.0])) + assert lib.is_float_array(np.array([1, 2.0, np.nan])) + assert not lib.is_float_array(np.array([1, 2])) + + assert lib.is_integer_array(np.array([1, 2])) + assert not lib.is_integer_array(np.array([1, 2.0])) + + def test_is_string_array(self): + # We should only be accepting pd.NA, np.nan, + # other floating point nans e.g. float('nan')] + # when skipna is True. + assert lib.is_string_array(np.array(["foo", "bar"])) + assert not lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=False + ) + assert lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=True + ) + # we allow NaN/None in the StringArray constructor, so its allowed here + assert lib.is_string_array( + np.array(["foo", "bar", None], dtype=object), skipna=True + ) + assert lib.is_string_array( + np.array(["foo", "bar", np.nan], dtype=object), skipna=True + ) + # But not e.g. datetimelike or Decimal NAs + assert not lib.is_string_array( + np.array(["foo", "bar", pd.NaT], dtype=object), skipna=True + ) + assert not lib.is_string_array( + np.array(["foo", "bar", np.datetime64("NaT")], dtype=object), skipna=True + ) + assert not lib.is_string_array( + np.array(["foo", "bar", Decimal("NaN")], dtype=object), skipna=True + ) + + assert not lib.is_string_array( + np.array(["foo", "bar", None], dtype=object), skipna=False + ) + assert not lib.is_string_array( + np.array(["foo", "bar", np.nan], dtype=object), skipna=False + ) + assert not lib.is_string_array(np.array([1, 2])) + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + lib.to_object_array_tuples(values) + + # make sure record array works + record = namedtuple("record", "x y") + r = record(5, 6) + values = [r] + lib.to_object_array_tuples(values) + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype="O") + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + result = lib.infer_dtype(arr, skipna=True) + assert result == "empty" + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array( + [[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object + ) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + assert lib.is_period(Period("2011-01", freq="M")) + assert not lib.is_period(PeriodIndex(["2011-01"], freq="M")) + assert not lib.is_period(Timestamp("2011-01")) + assert not lib.is_period(1) + assert not lib.is_period(np.nan) + + def test_categorical(self): + + # GH 8974 + arr = Categorical(list("abc")) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + arr = Categorical(list("abc"), categories=["cegfab"], ordered=True) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + @pytest.mark.parametrize("asobject", [True, False]) + def test_interval(self, asobject): + idx = pd.IntervalIndex.from_breaks(range(5), closed="both") + if asobject: + idx = idx.astype(object) + + inferred = lib.infer_dtype(idx, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(idx._data, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(Series(idx, dtype=idx.dtype), skipna=False) + assert inferred == "interval" + + @pytest.mark.parametrize("value", [Timestamp(0), Timedelta(0), 0, 0.0]) + def test_interval_mismatched_closed(self, value): + + first = Interval(value, value, closed="left") + second = Interval(value, value, closed="right") + + # if closed match, we should infer "interval" + arr = np.array([first, first], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + + # if closed dont match, we should _not_ get "interval" + arr2 = np.array([first, second], dtype=object) + assert lib.infer_dtype(arr2, skipna=False) == "mixed" + + def test_interval_mismatched_subtype(self): + first = Interval(0, 1, closed="left") + second = Interval(Timestamp(0), Timestamp(1), closed="left") + third = Interval(Timedelta(0), Timedelta(1), closed="left") + + arr = np.array([first, second]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([second, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([first, third]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # float vs int subdtype are compatible + flt_interval = Interval(1.5, 2.5, closed="left") + arr = np.array([first, flt_interval], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "interval" + + @pytest.mark.parametrize("klass", [pd.array, Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]]) + def test_string_dtype(self, data, skipna, klass, nullable_string_dtype): + # StringArray + val = klass(data, dtype=nullable_string_dtype) + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "string" + + @pytest.mark.parametrize("klass", [pd.array, Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]]) + def test_boolean_dtype(self, data, skipna, klass): + # BooleanArray + val = klass(data, dtype="boolean") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "boolean" + + +class TestNumberScalar: + def test_is_number(self): + + assert is_number(True) + assert is_number(1) + assert is_number(1.1) + assert is_number(1 + 3j) + assert is_number(np.int64(1)) + assert is_number(np.float64(1.1)) + assert is_number(np.complex128(1 + 3j)) + assert is_number(np.nan) + + assert not is_number(None) + assert not is_number("x") + assert not is_number(datetime(2011, 1, 1)) + assert not is_number(np.datetime64("2011-01-01")) + assert not is_number(Timestamp("2011-01-01")) + assert not is_number(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_number(timedelta(1000)) + assert not is_number(Timedelta("1 days")) + + # questionable + assert not is_number(np.bool_(False)) + assert is_number(np.timedelta64(1, "D")) + + def test_is_bool(self): + assert is_bool(True) + assert is_bool(False) + assert is_bool(np.bool_(False)) + + assert not is_bool(1) + assert not is_bool(1.1) + assert not is_bool(1 + 3j) + assert not is_bool(np.int64(1)) + assert not is_bool(np.float64(1.1)) + assert not is_bool(np.complex128(1 + 3j)) + assert not is_bool(np.nan) + assert not is_bool(None) + assert not is_bool("x") + assert not is_bool(datetime(2011, 1, 1)) + assert not is_bool(np.datetime64("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_bool(timedelta(1000)) + assert not is_bool(np.timedelta64(1, "D")) + assert not is_bool(Timedelta("1 days")) + + def test_is_integer(self): + assert is_integer(1) + assert is_integer(np.int64(1)) + + assert not is_integer(True) + assert not is_integer(1.1) + assert not is_integer(1 + 3j) + assert not is_integer(False) + assert not is_integer(np.bool_(False)) + assert not is_integer(np.float64(1.1)) + assert not is_integer(np.complex128(1 + 3j)) + assert not is_integer(np.nan) + assert not is_integer(None) + assert not is_integer("x") + assert not is_integer(datetime(2011, 1, 1)) + assert not is_integer(np.datetime64("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_integer(timedelta(1000)) + assert not is_integer(Timedelta("1 days")) + assert not is_integer(np.timedelta64(1, "D")) + + def test_is_float(self): + assert is_float(1.1) + assert is_float(np.float64(1.1)) + assert is_float(np.nan) + + assert not is_float(True) + assert not is_float(1) + assert not is_float(1 + 3j) + assert not is_float(False) + assert not is_float(np.bool_(False)) + assert not is_float(np.int64(1)) + assert not is_float(np.complex128(1 + 3j)) + assert not is_float(None) + assert not is_float("x") + assert not is_float(datetime(2011, 1, 1)) + assert not is_float(np.datetime64("2011-01-01")) + assert not is_float(Timestamp("2011-01-01")) + assert not is_float(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_float(timedelta(1000)) + assert not is_float(np.timedelta64(1, "D")) + assert not is_float(Timedelta("1 days")) + + def test_is_datetime_dtypes(self): + + ts = pd.date_range("20130101", periods=3) + tsa = pd.date_range("20130101", periods=3, tz="US/Eastern") + + assert is_datetime64_dtype("datetime64") + assert is_datetime64_dtype("datetime64[ns]") + assert is_datetime64_dtype(ts) + assert not is_datetime64_dtype(tsa) + + assert not is_datetime64_ns_dtype("datetime64") + assert is_datetime64_ns_dtype("datetime64[ns]") + assert is_datetime64_ns_dtype(ts) + assert is_datetime64_ns_dtype(tsa) + + assert is_datetime64_any_dtype("datetime64") + assert is_datetime64_any_dtype("datetime64[ns]") + assert is_datetime64_any_dtype(ts) + assert is_datetime64_any_dtype(tsa) + + assert not is_datetime64tz_dtype("datetime64") + assert not is_datetime64tz_dtype("datetime64[ns]") + assert not is_datetime64tz_dtype(ts) + assert is_datetime64tz_dtype(tsa) + + @pytest.mark.parametrize("tz", ["US/Eastern", "UTC"]) + def test_is_datetime_dtypes_with_tz(self, tz): + dtype = f"datetime64[ns, {tz}]" + assert not is_datetime64_dtype(dtype) + assert is_datetime64tz_dtype(dtype) + assert is_datetime64_ns_dtype(dtype) + assert is_datetime64_any_dtype(dtype) + + def test_is_timedelta(self): + assert is_timedelta64_dtype("timedelta64") + assert is_timedelta64_dtype("timedelta64[ns]") + assert not is_timedelta64_ns_dtype("timedelta64") + assert is_timedelta64_ns_dtype("timedelta64[ns]") + + tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]") + assert is_timedelta64_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) + + # Conversion to Int64Index: + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) + + +class TestIsScalar: + def test_is_scalar_builtin_scalars(self): + assert is_scalar(None) + assert is_scalar(True) + assert is_scalar(False) + assert is_scalar(Fraction()) + assert is_scalar(0.0) + assert is_scalar(1) + assert is_scalar(complex(2)) + assert is_scalar(float("NaN")) + assert is_scalar(np.nan) + assert is_scalar("foobar") + assert is_scalar(b"foobar") + assert is_scalar(datetime(2014, 1, 1)) + assert is_scalar(date(2014, 1, 1)) + assert is_scalar(time(12, 0)) + assert is_scalar(timedelta(hours=1)) + assert is_scalar(pd.NaT) + assert is_scalar(pd.NA) + + def test_is_scalar_builtin_nonscalars(self): + assert not is_scalar({}) + assert not is_scalar([]) + assert not is_scalar([1]) + assert not is_scalar(()) + assert not is_scalar((1,)) + assert not is_scalar(slice(None)) + assert not is_scalar(Ellipsis) + + def test_is_scalar_numpy_array_scalars(self): + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.0)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.complex64(2)) + assert is_scalar(np.object_("foobar")) + assert is_scalar(np.str_("foobar")) + assert is_scalar(np.unicode_("foobar")) + assert is_scalar(np.bytes_(b"foobar")) + assert is_scalar(np.datetime64("2014-01-01")) + assert is_scalar(np.timedelta64(1, "h")) + + @pytest.mark.parametrize( + "zerodim", + [ + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + ], + ) + def test_is_scalar_numpy_zerodim_arrays(self, zerodim): + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + @pytest.mark.parametrize("arr", [np.array([]), np.array([[]])]) + def test_is_scalar_numpy_arrays(self, arr): + assert not is_scalar(arr) + assert not is_scalar(MockNumpyLikeArray(arr)) + + def test_is_scalar_pandas_scalars(self): + assert is_scalar(Timestamp("2014-01-01")) + assert is_scalar(Timedelta(hours=1)) + assert is_scalar(Period("2014-01-01")) + assert is_scalar(Interval(left=0, right=1)) + assert is_scalar(DateOffset(days=1)) + assert is_scalar(pd.offsets.Minute(3)) + + def test_is_scalar_pandas_containers(self): + assert not is_scalar(Series(dtype=object)) + assert not is_scalar(Series([1])) + assert not is_scalar(DataFrame()) + assert not is_scalar(DataFrame([[1]])) + assert not is_scalar(Index([])) + assert not is_scalar(Index([1])) + assert not is_scalar(Categorical([])) + assert not is_scalar(DatetimeIndex([])._data) + assert not is_scalar(TimedeltaIndex([])._data) + assert not is_scalar(DatetimeIndex([])._data.to_period("D")) + assert not is_scalar(pd.array([1, 2, 3])) + + def test_is_scalar_number(self): + # Number() is not recognied by PyNumber_Check, so by extension + # is not recognized by is_scalar, but instances of non-abstract + # subclasses are. + + class Numeric(Number): + def __init__(self, value) -> None: + self.value = value + + def __int__(self): + return self.value + + num = Numeric(1) + assert is_scalar(num) + + +@pytest.mark.parametrize("unit", ["ms", "us", "ns"]) +def test_datetimeindex_from_empty_datetime64_array(unit): + idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]")) + assert len(idx) == 0 + + +def test_nan_to_nat_conversions(): + + df = DataFrame( + {"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")} + ) + df.iloc[3:6, :] = np.nan + result = df.loc[4, "B"] + assert result is pd.NaT + + s = df["B"].copy() + s[8:9] = np.nan + assert s[8] is pd.NaT + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") +def test_is_scipy_sparse(spmatrix): + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = ensure_int32(values) + assert result.dtype == np.int32 + + values = np.arange(10, dtype=np.int64) + result = ensure_int32(values) + assert result.dtype == np.int32 diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py new file mode 100644 index 00000000..9a242e94 --- /dev/null +++ b/pandas/tests/dtypes/test_missing.py @@ -0,0 +1,856 @@ +from contextlib import nullcontext +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._config import config as cf + +from pandas._libs import missing as libmissing +from pandas._libs.tslibs import iNaT +from pandas.compat import is_numpy_dev + +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, + isnull, + na_value_for_dtype, + notna, + notnull, +) + +import pandas as pd +from pandas import ( + DatetimeIndex, + NaT, + Series, + TimedeltaIndex, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index + +fix_now = pd.Timestamp("2021-01-01") +fix_utcnow = pd.Timestamp("2021-01-01", tz="UTC") + + +@pytest.mark.parametrize("notna_f", [notna, notnull]) +def test_notna_notnull(notna_f): + assert notna_f(1.0) + assert not notna_f(None) + assert not notna_f(np.NaN) + + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.all() + + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.sum() == 2 + + +@pytest.mark.parametrize("null_func", [notna, notnull, isna, isnull]) +@pytest.mark.parametrize( + "ser", + [ + tm.makeFloatSeries(), + tm.makeStringSeries(), + tm.makeObjectSeries(), + tm.makeTimeSeries(), + tm.makePeriodSeries(), + ], +) +def test_null_check_is_series(null_func, ser): + with cf.option_context("mode.use_inf_as_na", False): + assert isinstance(null_func(ser), Series) + + +class TestIsNA: + def test_0d_array(self): + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) + # test object dtype + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) + + @pytest.mark.parametrize("shape", [(4, 0), (4,)]) + def test_empty_object(self, shape): + arr = np.empty(shape=shape, dtype=object) + result = isna(arr) + expected = np.ones(shape=shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("isna_f", [isna, isnull]) + def test_isna_isnull(self, isna_f): + assert not isna_f(1.0) + assert isna_f(None) + assert isna_f(np.NaN) + assert float("nan") + assert not isna_f(np.inf) + assert not isna_f(-np.inf) + + # type + assert not isna_f(type(Series(dtype=object))) + assert not isna_f(type(Series(dtype=np.float64))) + assert not isna_f(type(pd.DataFrame())) + + @pytest.mark.parametrize("isna_f", [isna, isnull]) + @pytest.mark.parametrize( + "df", + [ + tm.makeTimeDataFrame(), + tm.makePeriodFrame(), + tm.makeMixedDataFrame(), + ], + ) + def test_isna_isnull_frame(self, isna_f, df): + # frame + result = isna_f(df) + expected = df.apply(isna_f) + tm.assert_frame_equal(result, expected) + + def test_isna_lists(self): + result = isna([[False]]) + exp = np.array([[False]]) + tm.assert_numpy_array_equal(result, exp) + + result = isna([[1], [2]]) + exp = np.array([[False], [False]]) + tm.assert_numpy_array_equal(result, exp) + + # list of strings / unicode + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + # GH20675 + result = isna([np.NaN, "world"]) + exp = np.array([True, False]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_nat(self): + result = isna([NaT]) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(np.array([NaT], dtype=object)) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_numpy_nat(self): + arr = np.array( + [ + NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + np.datetime64("NaT", "s"), + ] + ) + result = isna(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) + + idx = date_range("1/1/1990", periods=20) + exp = np.ones(len(idx), dtype=bool) + tm.assert_numpy_array_equal(notna(idx), exp) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isna(idx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + # GH 9129 + pidx = idx.to_period(freq="M") + mask = isna(pidx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + mask = isna(pidx[1:]) + exp = np.zeros(len(mask), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + def test_isna_old_datetimelike(self): + # isna_old should work for dt64tz, td64, and period, not just tznaive + dti = date_range("2016-01-01", periods=3) + dta = dti._data + dta[-1] = NaT + expected = np.array([False, False, True], dtype=bool) + + objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")] + + for obj in objs: + with cf.option_context("mode.use_inf_as_na", True): + result = isna(obj) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "value, expected", + [ + (np.complex128(np.nan), True), + (np.float64(1), False), + (np.array([1, 1 + 0j, np.nan, 3]), np.array([False, False, True, False])), + ( + np.array([1, 1 + 0j, np.nan, 3], dtype=object), + np.array([False, False, True, False]), + ), + ( + np.array([1, 1 + 0j, np.nan, 3]).astype(object), + np.array([False, False, True, False]), + ), + ], + ) + def test_complex(self, value, expected): + result = isna(value) + if is_scalar(result): + assert result is expected + else: + tm.assert_numpy_array_equal(result, expected) + + def test_datetime_other_units(self): + idx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-02"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + def test_datetime_other_units_astype(self, dtype): + idx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-02"]) + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = Series([False, True, False]) + s = Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_timedelta_other_units(self): + idx = TimedeltaIndex(["1 days", "NaT", "2 days"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ], + ) + def test_timedelta_other_units_dtype(self, dtype): + idx = TimedeltaIndex(["1 days", "NaT", "2 days"]) + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = Series([False, True, False]) + s = Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_period(self): + idx = pd.PeriodIndex(["2011-01", "NaT", "2012-01"], freq="M") + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + + exp = Series([False, True, False]) + s = Series(idx) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = Series(idx, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_decimal(self): + # scalars GH#23530 + a = Decimal(1.0) + assert isna(a) is False + assert notna(a) is True + + b = Decimal("NaN") + assert isna(b) is True + assert notna(b) is False + + # array + arr = np.array([a, b]) + expected = np.array([False, True]) + result = isna(arr) + tm.assert_numpy_array_equal(result, expected) + + result = notna(arr) + tm.assert_numpy_array_equal(result, ~expected) + + # series + ser = Series(arr) + expected = Series(expected) + result = isna(ser) + tm.assert_series_equal(result, expected) + + result = notna(ser) + tm.assert_series_equal(result, ~expected) + + # index + idx = pd.Index(arr) + expected = np.array([False, True]) + result = isna(idx) + tm.assert_numpy_array_equal(result, expected) + + result = notna(idx) + tm.assert_numpy_array_equal(result, ~expected) + + +@pytest.mark.parametrize("dtype_equal", [True, False]) +def test_array_equivalent(dtype_equal): + assert array_equivalent( + np.array([np.nan, np.nan]), np.array([np.nan, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + np.array([np.nan, 1, np.nan]), + np.array([np.nan, 1, np.nan]), + dtype_equal=dtype_equal, + ) + assert array_equivalent( + np.array([np.nan, None], dtype="object"), + np.array([np.nan, None], dtype="object"), + dtype_equal=dtype_equal, + ) + # Check the handling of nested arrays in array_equivalent_object + assert array_equivalent( + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + dtype_equal=dtype_equal, + ) + assert array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 1j], dtype="complex"), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 2j], dtype="complex"), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), + np.array([np.nan, 2, np.nan]), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + np.array(["a", "b", "c", "d"]), np.array(["e", "e"]), dtype_equal=dtype_equal + ) + assert array_equivalent( + Float64Index([0, np.nan]), Float64Index([0, np.nan]), dtype_equal=dtype_equal + ) + assert not array_equivalent( + Float64Index([0, np.nan]), Float64Index([1, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal + ) + assert array_equivalent( + TimedeltaIndex([0, np.nan]), + TimedeltaIndex([0, np.nan]), + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), + TimedeltaIndex([1, np.nan]), + dtype_equal=dtype_equal, + ) + + msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times" + with tm.assert_produces_warning(FutureWarning, match=msg): + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") + dti2 = DatetimeIndex([0, np.nan], tz="CET") + dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") + + assert array_equivalent( + dti1, + dti1, + dtype_equal=dtype_equal, + ) + assert not array_equivalent( + dti1, + dti3, + dtype_equal=dtype_equal, + ) + # The rest are not dtype_equal + assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1) + assert not array_equivalent( + dti2, + dti1, + ) + + assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + + +@pytest.mark.parametrize( + "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] +) +def test_array_equivalent_series(val): + arr = np.array([1, 2]) + msg = "elementwise comparison failed" + cm = ( + # stacklevel is chosen to make sense when called from .equals + tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False) + if isinstance(val, str) and not is_numpy_dev + else nullcontext() + ) + with cm: + assert not array_equivalent(Series([arr, arr]), Series([arr, val])) + + +def test_array_equivalent_different_dtype_but_equal(): + # Unclear if this is exposed anywhere in the public-facing API + assert array_equivalent(np.array([1, 2]), np.array([1.0, 2.0])) + + +@pytest.mark.parametrize( + "lvalue, rvalue", + [ + # There are 3 variants for each of lvalue and rvalue. We include all + # three for the tz-naive `now` and exclude the datetim64 variant + # for utcnow because it drops tzinfo. + (fix_now, fix_utcnow), + (fix_now.to_datetime64(), fix_utcnow), + (fix_now.to_pydatetime(), fix_utcnow), + (fix_now, fix_utcnow), + (fix_now.to_datetime64(), fix_utcnow.to_pydatetime()), + (fix_now.to_pydatetime(), fix_utcnow.to_pydatetime()), + ], +) +def test_array_equivalent_tzawareness(lvalue, rvalue): + # we shouldn't raise if comparing tzaware and tznaive datetimes + left = np.array([lvalue], dtype=object) + right = np.array([rvalue], dtype=object) + + assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=False) + + +def test_array_equivalent_compat(): + # see gh-13388 + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + assert array_equivalent(m, n, strict_nan=True) + assert array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (4, 3)], dtype=[("a", int), ("b", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("b", int), ("a", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + +@pytest.mark.parametrize("dtype", ["O", "S", "U"]) +def test_array_equivalent_str(dtype): + assert array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "B"], dtype=dtype) + ) + assert not array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "X"], dtype=dtype) + ) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested(strict_nan): + # reached in groupby aggregations, make sure we use np.any when checking + # if the comparison is truthy + left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) + right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.empty(2, dtype=object) + left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + right = np.empty(2, dtype=object) + right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested2(strict_nan): + # more than one level of nesting + left = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + right = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object) + right = np.array([50]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested_list(strict_nan): + left = np.array([[50, 70, 90], [20, 30]], dtype=object) + right = np.array([[50, 70, 90], [20, 30]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([[50, 50, 50], [40, 40]], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_mixed_list(strict_nan): + # mixed arrays / lists in left and right + # https://github.com/pandas-dev/pandas/issues/50360 + left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object) + right = np.array([[1, 2, 3], [4, 5]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # multiple levels of nesting + left = np.array( + [ + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object), + ], + dtype=object, + ) + right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # same-length lists + subarr = np.empty(2, dtype=object) + subarr[:] = [ + np.array([None, "b"], dtype=object), + np.array(["c", "d"], dtype=object), + ] + left = np.array([subarr, None], dtype=object) + right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_dicts(strict_nan): + left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object) + right = np.array( + [{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object) + assert array_equivalent(left, right2, strict_nan=strict_nan) + assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "dtype, na_value", + [ + # Datetime-like + (np.dtype("M8[ns]"), np.datetime64("NaT", "ns")), + (np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")), + (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), + (PeriodDtype("M"), NaT), + # Integer + ("u1", 0), + ("u2", 0), + ("u4", 0), + ("u8", 0), + ("i1", 0), + ("i2", 0), + ("i4", 0), + ("i8", 0), + # Bool + ("bool", False), + # Float + ("f2", np.nan), + ("f4", np.nan), + ("f8", np.nan), + # Object + ("O", np.nan), + # Interval + (IntervalDtype(), np.nan), + ], +) +def test_na_value_for_dtype(dtype, na_value): + result = na_value_for_dtype(dtype) + # identify check doesn't work for datetime64/timedelta64("NaT") bc they + # are not singletons + assert result is na_value or ( + isna(result) and isna(na_value) and type(result) is type(na_value) + ) + + +class TestNAObj: + def _check_behavior(self, arr, expected): + result = libmissing.isnaobj(arr) + tm.assert_numpy_array_equal(result, expected) + result = libmissing.isnaobj(arr, inf_as_na=True) + tm.assert_numpy_array_equal(result, expected) + + arr = np.atleast_2d(arr) + expected = np.atleast_2d(expected) + + result = libmissing.isnaobj2d(arr) + tm.assert_numpy_array_equal(result, expected) + result = libmissing.isnaobj2d(arr, inf_as_na=True) + tm.assert_numpy_array_equal(result, expected) + + def test_basic(self): + arr = np.array([1, None, "foo", -5.1, NaT, np.nan]) + expected = np.array([False, True, False, False, True, True]) + + self._check_behavior(arr, expected) + + def test_non_obj_dtype(self): + arr = np.array([1, 3, np.nan, 5], dtype=float) + expected = np.array([False, False, True, False]) + + self._check_behavior(arr, expected) + + def test_empty_arr(self): + arr = np.array([]) + expected = np.array([], dtype=bool) + + self._check_behavior(arr, expected) + + def test_empty_str_inp(self): + arr = np.array([""]) # empty but not na + expected = np.array([False]) + + self._check_behavior(arr, expected) + + def test_empty_like(self): + # see gh-13717: no segfaults! + arr = np.empty_like([None]) + expected = np.array([True]) + + self._check_behavior(arr, expected) + + +m8_units = ["as", "ps", "ns", "us", "ms", "s", "m", "h", "D", "W", "M", "Y"] + +na_vals = ( + [ + None, + NaT, + float("NaN"), + complex("NaN"), + np.nan, + np.float64("NaN"), + np.float32("NaN"), + np.complex64(np.nan), + np.complex128(np.nan), + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + + [np.datetime64("NaT", unit) for unit in m8_units] + + [np.timedelta64("NaT", unit) for unit in m8_units] +) + +inf_vals = [ + float("inf"), + float("-inf"), + complex("inf"), + complex("-inf"), + np.inf, + np.NINF, +] + +int_na_vals = [ + # Values that match iNaT, which we treat as null in specific cases + np.int64(NaT.value), + int(NaT.value), +] + +sometimes_na_vals = [Decimal("NaN")] + +never_na_vals = [ + # float/complex values that when viewed as int64 match iNaT + -0.0, + np.float64("-0.0"), + -0j, + np.complex64(-0j), +] + + +class TestLibMissing: + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + @pytest.mark.parametrize( + "value", na_vals + sometimes_na_vals # type: ignore[operator] + ) + def test_checknull_na_vals(self, func, value): + assert func(value) + + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + @pytest.mark.parametrize("value", inf_vals) + def test_checknull_inf_vals(self, func, value): + assert not func(value) + + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + @pytest.mark.parametrize("value", int_na_vals) + def test_checknull_intna_vals(self, func, value): + assert not func(value) + + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + @pytest.mark.parametrize("value", never_na_vals) + def test_checknull_never_na_vals(self, func, value): + assert not func(value) + + @pytest.mark.parametrize( + "value", na_vals + sometimes_na_vals # type: ignore[operator] + ) + def test_checknull_old_na_vals(self, value): + assert libmissing.checknull(value, inf_as_na=True) + + @pytest.mark.parametrize("value", inf_vals) + def test_checknull_old_inf_vals(self, value): + assert libmissing.checknull(value, inf_as_na=True) + + @pytest.mark.parametrize("value", int_na_vals) + def test_checknull_old_intna_vals(self, value): + assert not libmissing.checknull(value, inf_as_na=True) + + @pytest.mark.parametrize("value", int_na_vals) + def test_checknull_old_never_na_vals(self, value): + assert not libmissing.checknull(value, inf_as_na=True) + + def test_is_matching_na(self, nulls_fixture, nulls_fixture2): + left = nulls_fixture + right = nulls_fixture2 + + assert libmissing.is_matching_na(left, left) + + if left is right: + assert libmissing.is_matching_na(left, right) + elif is_float(left) and is_float(right): + # np.nan vs float("NaN") we consider as matching + assert libmissing.is_matching_na(left, right) + elif type(left) is type(right): + # e.g. both Decimal("NaN") + assert libmissing.is_matching_na(left, right) + else: + assert not libmissing.is_matching_na(left, right) + + def test_is_matching_na_nan_matches_none(self): + + assert not libmissing.is_matching_na(None, np.nan) + assert not libmissing.is_matching_na(np.nan, None) + + assert libmissing.is_matching_na(None, np.nan, nan_matches_none=True) + assert libmissing.is_matching_na(np.nan, None, nan_matches_none=True) + + +class TestIsValidNAForDtype: + def test_is_valid_na_for_dtype_interval(self): + dtype = IntervalDtype("int64", "left") + assert not is_valid_na_for_dtype(NaT, dtype) + + dtype = IntervalDtype("datetime64[ns]", "both") + assert not is_valid_na_for_dtype(NaT, dtype) + + def test_is_valid_na_for_dtype_categorical(self): + dtype = CategoricalDtype(categories=[0, 1, 2]) + assert is_valid_na_for_dtype(np.nan, dtype) + + assert not is_valid_na_for_dtype(NaT, dtype) + assert not is_valid_na_for_dtype(np.datetime64("NaT", "ns"), dtype) + assert not is_valid_na_for_dtype(np.timedelta64("NaT", "ns"), dtype) diff --git a/pandas/tests/extension/__init__.py b/pandas/tests/extension/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/extension/array_with_attr/__init__.py b/pandas/tests/extension/array_with_attr/__init__.py new file mode 100644 index 00000000..49da6af0 --- /dev/null +++ b/pandas/tests/extension/array_with_attr/__init__.py @@ -0,0 +1,6 @@ +from pandas.tests.extension.array_with_attr.array import ( + FloatAttrArray, + FloatAttrDtype, +) + +__all__ = ["FloatAttrArray", "FloatAttrDtype"] diff --git a/pandas/tests/extension/array_with_attr/array.py b/pandas/tests/extension/array_with_attr/array.py new file mode 100644 index 00000000..d9327ca9 --- /dev/null +++ b/pandas/tests/extension/array_with_attr/array.py @@ -0,0 +1,84 @@ +""" +Test extension array that has custom attribute information (not stored on the dtype). + +""" +from __future__ import annotations + +import numbers + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.base import ExtensionDtype + +import pandas as pd +from pandas.core.arrays import ExtensionArray + + +class FloatAttrDtype(ExtensionDtype): + type = float + name = "float_attr" + na_value = np.nan + + @classmethod + def construct_array_type(cls) -> type_t[FloatAttrArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return FloatAttrArray + + +class FloatAttrArray(ExtensionArray): + dtype = FloatAttrDtype() + __array_priority__ = 1000 + + def __init__(self, values, attr=None) -> None: + if not isinstance(values, np.ndarray): + raise TypeError("Need to pass a numpy array of float64 dtype as values") + if not values.dtype == "float64": + raise TypeError("Need to pass a numpy array of float64 dtype as values") + self.data = values + self.attr = attr + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + data = np.array(scalars, dtype="float64", copy=copy) + return cls(data) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + else: + # slice, list-like, mask + item = pd.api.indexers.check_array_indexer(self, item) + return type(self)(self.data[item], self.attr) + + def __len__(self) -> int: + return len(self.data) + + def isna(self): + return np.isnan(self.data) + + def take(self, indexer, allow_fill=False, fill_value=None): + from pandas.api.extensions import take + + data = self.data + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) + return type(self)(result, self.attr) + + def copy(self): + return type(self)(self.data.copy(), self.attr) + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x.data for x in to_concat]) + attr = to_concat[0].attr if len(to_concat) else None + return cls(data, attr) diff --git a/pandas/tests/extension/array_with_attr/test_array_with_attr.py b/pandas/tests/extension/array_with_attr/test_array_with_attr.py new file mode 100644 index 00000000..3735fe40 --- /dev/null +++ b/pandas/tests/extension/array_with_attr/test_array_with_attr.py @@ -0,0 +1,33 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.array_with_attr import FloatAttrArray + + +def test_concat_with_all_na(): + # https://github.com/pandas-dev/pandas/pull/47762 + # ensure that attribute of the column array is preserved (when it gets + # preserved in reindexing the array) during merge/concat + arr = FloatAttrArray(np.array([np.nan, np.nan], dtype="float64"), attr="test") + + df1 = pd.DataFrame({"col": arr, "key": [0, 1]}) + df2 = pd.DataFrame({"key": [0, 1], "col2": [1, 2]}) + result = pd.merge(df1, df2, on="key") + expected = pd.DataFrame({"col": arr, "key": [0, 1], "col2": [1, 2]}) + tm.assert_frame_equal(result, expected) + assert result["col"].array.attr == "test" + + df1 = pd.DataFrame({"col": arr, "key": [0, 1]}) + df2 = pd.DataFrame({"key": [0, 2], "col2": [1, 2]}) + result = pd.merge(df1, df2, on="key") + expected = pd.DataFrame({"col": arr.take([0]), "key": [0], "col2": [1]}) + tm.assert_frame_equal(result, expected) + assert result["col"].array.attr == "test" + + result = pd.concat([df1.set_index("key"), df2.set_index("key")], axis=1) + expected = pd.DataFrame( + {"col": arr.take([0, 1, -1]), "col2": [1, np.nan, 2], "key": [0, 1, 2]} + ).set_index("key") + tm.assert_frame_equal(result, expected) + assert result["col"].array.attr == "test" diff --git a/pandas/tests/extension/arrow/__init__.py b/pandas/tests/extension/arrow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py new file mode 100644 index 00000000..26b94ebe --- /dev/null +++ b/pandas/tests/extension/arrow/arrays.py @@ -0,0 +1,197 @@ +""" +Rudimentary Apache Arrow-backed ExtensionArray. + +At the moment, just a boolean array / type is implemented. +Eventually, we'll want to parametrize the type and support +multiple dtypes. Not all methods are implemented yet, and the +current implementation is not efficient. +""" +from __future__ import annotations + +import itertools +import operator + +import numpy as np +import pyarrow as pa + +from pandas._typing import type_t + +import pandas as pd +from pandas.api.extensions import ( + ExtensionDtype, + register_extension_dtype, + take, +) +from pandas.api.types import is_scalar +from pandas.core.arrays.arrow import ArrowExtensionArray as _ArrowExtensionArray +from pandas.core.construction import extract_array + + +@register_extension_dtype +class ArrowBoolDtype(ExtensionDtype): + + type = np.bool_ + kind = "b" + name = "arrow_bool" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowBoolArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowBoolArray + + @property + def _is_boolean(self) -> bool: + return True + + +@register_extension_dtype +class ArrowStringDtype(ExtensionDtype): + + type = str + kind = "U" + name = "arrow_string" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowStringArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowStringArray + + +class ArrowExtensionArray(_ArrowExtensionArray): + _data: pa.ChunkedArray + + @classmethod + def _from_sequence(cls, values, dtype=None, copy=False): + # TODO: respect dtype, copy + + if isinstance(values, cls): + # in particular for empty cases the pa.array(np.asarray(...)) + # does not round-trip + return cls(values._data) + + elif not len(values): + if isinstance(values, list): + dtype = bool if cls is ArrowBoolArray else str + values = np.array([], dtype=dtype) + + arr = pa.chunked_array([pa.array(np.asarray(values))]) + return cls(arr) + + def __repr__(self): + return f"{type(self).__name__}({repr(self._data)})" + + def __contains__(self, obj) -> bool: + if obj is None or obj is self.dtype.na_value: + # None -> EA.__contains__ only checks for self._dtype.na_value, not + # any compatible NA value. + # self.dtype.na_value -> isn't recognized by pd.isna + return bool(self.isna().any()) + return bool(super().__contains__(obj)) + + def __getitem__(self, item): + if is_scalar(item): + return self._data.to_pandas()[item] + else: + vals = self._data.to_pandas()[item] + return type(self)._from_sequence(vals) + + def astype(self, dtype, copy=True): + # needed to fix this astype for the Series constructor. + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + return super().astype(dtype, copy) + + @property + def dtype(self): + return self._dtype + + def _logical_method(self, other, op): + if not isinstance(other, type(self)): + raise NotImplementedError() + + result = op(np.array(self._data), np.array(other._data)) + return ArrowBoolArray( + pa.chunked_array([pa.array(result, mask=pd.isna(self._data.to_pandas()))]) + ) + + def __eq__(self, other): + if not isinstance(other, type(self)): + # TODO: use some pyarrow function here? + return np.asarray(self).__eq__(other) + + return self._logical_method(other, operator.eq) + + def take(self, indices, allow_fill=False, fill_value=None): + data = self._data.to_pandas() + data = extract_array(data, extract_numpy=True) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + chunks = list(itertools.chain.from_iterable(x._data.chunks for x in to_concat)) + arr = pa.chunked_array(chunks) + return cls(arr) + + def __invert__(self): + return type(self)._from_sequence(~self._data.to_pandas()) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if skipna: + arr = self[~self.isna()] + else: + arr = self + + try: + op = getattr(arr, name) + except AttributeError as err: + raise TypeError from err + return op(**kwargs) + + def any(self, axis=0, out=None): + # Explicitly return a plain bool to reproduce GH-34660 + return bool(self._data.to_pandas().any()) + + def all(self, axis=0, out=None): + # Explicitly return a plain bool to reproduce GH-34660 + return bool(self._data.to_pandas().all()) + + +class ArrowBoolArray(ArrowExtensionArray): + def __init__(self, values) -> None: + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.bool_() + self._data = values + self._dtype = ArrowBoolDtype() # type: ignore[assignment] + + +class ArrowStringArray(ArrowExtensionArray): + def __init__(self, values) -> None: + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.string() + self._data = values + self._dtype = ArrowStringDtype() # type: ignore[assignment] diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py new file mode 100644 index 00000000..0205b8aa --- /dev/null +++ b/pandas/tests/extension/arrow/test_bool.py @@ -0,0 +1,104 @@ +import numpy as np +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_windows, +) + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_bool_dtype +from pandas.tests.extension import base + +pytest.importorskip("pyarrow", minversion="1.0.1") + +from pandas.tests.extension.arrow.arrays import ( # isort:skip + ArrowBoolArray, + ArrowBoolDtype, +) + + +@pytest.fixture +def dtype(): + return ArrowBoolDtype() + + +@pytest.fixture +def data(): + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray._from_sequence(values) + + +@pytest.fixture +def data_missing(): + return ArrowBoolArray._from_sequence([None, True]) + + +def test_basic_equals(data): + # https://github.com/pandas-dev/pandas/issues/34660 + assert pd.Series(data).equals(pd.Series(data)) + + +class BaseArrowTests: + pass + + +class TestDtype(BaseArrowTests, base.BaseDtypeTests): + pass + + +class TestInterface(BaseArrowTests, base.BaseInterfaceTests): + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + @pytest.mark.xfail( + raises=AssertionError, + reason="Doesn't recognize data._na_value as NA", + ) + def test_contains(self, data, data_missing): + super().test_contains(data, data_missing) + + +class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): + @pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899") + def test_series_constructor_no_data_with_index(self, dtype, na_value): + # pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays + super().test_series_constructor_no_data_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899") + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + # pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays + super().test_series_constructor_scalar_na_with_index(dtype, na_value) + + @pytest.mark.xfail(reason="_from_sequence ignores dtype keyword") + def test_empty(self, dtype): + super().test_empty(dtype) + + +class TestReduce(base.BaseNoReduceTests): + def test_reduce_series_boolean(self): + pass + + +@pytest.mark.skipif( + is_ci_environment() and is_platform_windows(), + reason="Causes stack overflow on Windows CI", +) +class TestReduceBoolean(base.BaseBooleanReduceTests): + pass + + +def test_is_bool_dtype(data): + assert is_bool_dtype(data) + assert pd.core.common.is_bool_indexer(data) + s = pd.Series(range(len(data))) + result = s[data] + expected = s[np.asarray(data)] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/arrow/test_string.py b/pandas/tests/extension/arrow/test_string.py new file mode 100644 index 00000000..67a62978 --- /dev/null +++ b/pandas/tests/extension/arrow/test_string.py @@ -0,0 +1,12 @@ +import pytest + +import pandas as pd + +pytest.importorskip("pyarrow", minversion="1.0.0") + + +def test_constructor_from_list(): + # GH 27673 + result = pd.Series(["E"], dtype=pd.StringDtype(storage="pyarrow")) + assert isinstance(result.dtype, pd.StringDtype) + assert result.dtype.storage == "pyarrow" diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py new file mode 100644 index 00000000..5b81940e --- /dev/null +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import datetime + +import pytest + +from pandas._typing import type_t + +import pandas as pd +from pandas.api.extensions import ( + ExtensionDtype, + register_extension_dtype, +) + +pytest.importorskip("pyarrow", minversion="1.0.1") + +import pyarrow as pa # isort:skip + +from pandas.tests.extension.arrow.arrays import ArrowExtensionArray # isort:skip + + +@register_extension_dtype +class ArrowTimestampUSDtype(ExtensionDtype): + + type = datetime.datetime + kind = "M" + name = "arrow_timestamp_us" + na_value = pa.NULL + + @classmethod + def construct_array_type(cls) -> type_t[ArrowTimestampUSArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowTimestampUSArray + + +class ArrowTimestampUSArray(ArrowExtensionArray): + def __init__(self, values) -> None: + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.timestamp("us") + self._data = values + self._dtype = ArrowTimestampUSDtype() # type: ignore[assignment] + + +def test_constructor_extensionblock(): + # GH 34986 + arr = ArrowTimestampUSArray._from_sequence( + [None, datetime.datetime(2010, 9, 8, 7, 6, 5, 4)] + ) + pd.DataFrame({"timestamp": arr}) diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py new file mode 100644 index 00000000..571ab3dc --- /dev/null +++ b/pandas/tests/extension/base/__init__.py @@ -0,0 +1,71 @@ +""" +Base test suite for extension arrays. + +These tests are intended for third-party libraries to subclass to validate +that their extension arrays and dtypes satisfy the interface. Moving or +renaming the tests should not be done lightly. + +Libraries are expected to implement a few pytest fixtures to provide data +for the tests. The fixtures may be located in either + +* The same module as your test class. +* A ``conftest.py`` in the same directory as your test class. + +The full list of fixtures may be found in the ``conftest.py`` next to this +file. + +.. code-block:: python + + import pytest + from pandas.tests.extension.base import BaseDtypeTests + + + @pytest.fixture + def dtype(): + return MyDtype() + + + class TestMyDtype(BaseDtypeTests): + pass + + +Your class ``TestDtype`` will inherit all the tests defined on +``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` +wherever the test requires it. You're free to implement additional tests. + +All the tests in these modules use ``self.assert_frame_equal`` or +``self.assert_series_equal`` for dataframe or series comparisons. By default, +they use the usual ``pandas.testing.assert_frame_equal`` and +``pandas.testing.assert_series_equal``. You can override the checks used +by defining the staticmethods ``assert_frame_equal`` and +``assert_series_equal`` on your base test class. + +""" +from pandas.tests.extension.base.casting import BaseCastingTests # noqa +from pandas.tests.extension.base.constructors import BaseConstructorsTests # noqa +from pandas.tests.extension.base.dim2 import ( # noqa + Dim2CompatTests, + NDArrayBacked2DTests, +) +from pandas.tests.extension.base.dtype import BaseDtypeTests # noqa +from pandas.tests.extension.base.getitem import BaseGetitemTests # noqa +from pandas.tests.extension.base.groupby import BaseGroupbyTests # noqa +from pandas.tests.extension.base.index import BaseIndexTests # noqa +from pandas.tests.extension.base.interface import BaseInterfaceTests # noqa +from pandas.tests.extension.base.io import BaseParsingTests # noqa +from pandas.tests.extension.base.methods import BaseMethodsTests # noqa +from pandas.tests.extension.base.missing import BaseMissingTests # noqa +from pandas.tests.extension.base.ops import ( # noqa + BaseArithmeticOpsTests, + BaseComparisonOpsTests, + BaseOpsUtil, + BaseUnaryOpsTests, +) +from pandas.tests.extension.base.printing import BasePrintingTests # noqa +from pandas.tests.extension.base.reduce import ( # noqa + BaseBooleanReduceTests, + BaseNoReduceTests, + BaseNumericReduceTests, +) +from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa +from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa diff --git a/pandas/tests/extension/base/base.py b/pandas/tests/extension/base/base.py new file mode 100644 index 00000000..97d8e7c6 --- /dev/null +++ b/pandas/tests/extension/base/base.py @@ -0,0 +1,21 @@ +import pandas._testing as tm + + +class BaseExtensionTests: + # classmethod and different signature is needed + # to make inheritance compliant with mypy + @classmethod + def assert_equal(cls, left, right, **kwargs): + return tm.assert_equal(left, right, **kwargs) + + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + return tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + return tm.assert_frame_equal(left, right, *args, **kwargs) + + @classmethod + def assert_extension_array_equal(cls, left, right, *args, **kwargs): + return tm.assert_extension_array_equal(left, right, *args, **kwargs) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py new file mode 100644 index 00000000..0eb8123e --- /dev/null +++ b/pandas/tests/extension/base/casting.py @@ -0,0 +1,86 @@ +import numpy as np +import pytest + +from pandas.compat import np_version_under1p21 +import pandas.util._test_decorators as td + +import pandas as pd +from pandas.core.internals import ObjectBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseCastingTests(BaseExtensionTests): + """Casting to and from ExtensionDtypes""" + + def test_astype_object_series(self, all_data): + ser = pd.Series(all_data, name="A") + result = ser.astype(object) + assert result.dtype == np.dtype(object) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], ObjectBlock) + assert isinstance(result._mgr.array, np.ndarray) + assert result._mgr.array.dtype == np.dtype(object) + + def test_astype_object_frame(self, all_data): + df = pd.DataFrame({"A": all_data}) + + result = df.astype(object) + if hasattr(result._mgr, "blocks"): + blk = result._data.blocks[0] + assert isinstance(blk, ObjectBlock), type(blk) + assert isinstance(result._mgr.arrays[0], np.ndarray) + assert result._mgr.arrays[0].dtype == np.dtype(object) + + # earlier numpy raises TypeError on e.g. np.dtype(np.int64) == "Int64" + if not np_version_under1p21: + # check that we can compare the dtypes + comp = result.dtypes == df.dtypes + assert not comp.any() + + def test_tolist(self, data): + result = pd.Series(data).tolist() + expected = list(data) + assert result == expected + + def test_astype_str(self, data): + result = pd.Series(data[:5]).astype(str) + expected = pd.Series([str(x) for x in data[:5]], dtype=str) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "nullable_string_dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ], + ) + def test_astype_string(self, data, nullable_string_dtype): + # GH-33465 + result = pd.Series(data[:5]).astype(nullable_string_dtype) + expected = pd.Series([str(x) for x in data[:5]], dtype=nullable_string_dtype) + self.assert_series_equal(result, expected) + + def test_to_numpy(self, data): + expected = np.asarray(data) + + result = data.to_numpy() + self.assert_equal(result, expected) + + result = pd.Series(data).to_numpy() + self.assert_equal(result, expected) + + def test_astype_empty_dataframe(self, dtype): + # https://github.com/pandas-dev/pandas/issues/33113 + df = pd.DataFrame() + result = df.astype(dtype) + self.assert_frame_equal(result, df) + + @pytest.mark.parametrize("copy", [True, False]) + def test_astype_own_type(self, data, copy): + # ensure that astype returns the original object for equal dtype and copy=False + # https://github.com/pandas-dev/pandas/issues/28488 + result = data.astype(data.dtype, copy=copy) + assert (result is data) is (not copy) + self.assert_extension_array_equal(result, data) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py new file mode 100644 index 00000000..b9f8f851 --- /dev/null +++ b/pandas/tests/extension/base/constructors.py @@ -0,0 +1,142 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.api.extensions import ExtensionArray +from pandas.core.internals.blocks import EABackedBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseConstructorsTests(BaseExtensionTests): + def test_from_sequence_from_cls(self, data): + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + data = data[:0] + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + def test_array_from_scalars(self, data): + scalars = [data[0], data[1], data[2]] + result = data._from_sequence(scalars) + assert isinstance(result, type(data)) + + def test_series_constructor(self, data): + result = pd.Series(data) + assert result.dtype == data.dtype + assert len(result) == len(data) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert result._mgr.array is data + + # Series[EA] is unboxed / boxed correctly + result2 = pd.Series(result) + assert result2.dtype == data.dtype + if hasattr(result._mgr, "blocks"): + assert isinstance(result2._mgr.blocks[0], EABackedBlock) + + def test_series_constructor_no_data_with_index(self, dtype, na_value): + result = pd.Series(index=[1, 2, 3], dtype=dtype) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + # GH 33559 - empty index + result = pd.Series(index=[], dtype=dtype) + expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype) + self.assert_series_equal(result, expected) + + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + def test_series_constructor_scalar_with_index(self, data, dtype): + scalar = data[0] + result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype) + expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(scalar, index=["foo"], dtype=dtype) + expected = pd.Series([scalar], index=["foo"], dtype=dtype) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("from_series", [True, False]) + def test_dataframe_constructor_from_dict(self, data, from_series): + if from_series: + data = pd.Series(data) + result = pd.DataFrame({"A": data}) + assert result.dtypes["A"] == data.dtype + assert result.shape == (len(data), 1) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + def test_dataframe_from_series(self, data): + result = pd.DataFrame(pd.Series(data)) + assert result.dtypes[0] == data.dtype + assert result.shape == (len(data), 1) + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + def test_series_given_mismatched_index_raises(self, data): + msg = r"Length of values \(3\) does not match length of index \(5\)" + with pytest.raises(ValueError, match=msg): + pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + def test_from_dtype(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + expected = pd.Series(data) + result = pd.Series(list(data), dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(list(data), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # gh-30280 + + expected = pd.DataFrame(data).astype(dtype) + result = pd.DataFrame(list(data), dtype=dtype) + self.assert_frame_equal(result, expected) + + result = pd.DataFrame(list(data), dtype=str(dtype)) + self.assert_frame_equal(result, expected) + + def test_pandas_array(self, data): + # pd.array(extension_array) should be idempotent... + result = pd.array(data) + self.assert_extension_array_equal(result, data) + + def test_pandas_array_dtype(self, data): + # ... but specifying dtype will override idempotency + result = pd.array(data, dtype=np.dtype(object)) + expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) + self.assert_equal(result, expected) + + def test_construct_empty_dataframe(self, dtype): + # GH 33623 + result = pd.DataFrame(columns=["a"], dtype=dtype) + expected = pd.DataFrame( + {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + ) + self.assert_frame_equal(result, expected) + + def test_empty(self, dtype): + cls = dtype.construct_array_type() + result = cls._empty((4,), dtype=dtype) + assert isinstance(result, cls) + assert result.dtype == dtype + assert result.shape == (4,) + + # GH#19600 method on ExtensionDtype + result2 = dtype.empty((4,)) + assert isinstance(result2, cls) + assert result2.dtype == dtype + assert result2.shape == (4,) + + result2 = dtype.empty(4) + assert isinstance(result2, cls) + assert result2.dtype == dtype + assert result2.shape == (4,) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py new file mode 100644 index 00000000..1d5a5c45 --- /dev/null +++ b/pandas/tests/extension/base/dim2.py @@ -0,0 +1,304 @@ +""" +Tests for 2D compatibility. +""" +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +import pandas as pd +from pandas.core.arrays.integer import INT_STR_TO_DTYPE +from pandas.tests.extension.base.base import BaseExtensionTests + + +class Dim2CompatTests(BaseExtensionTests): + # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays. + # i.e. not for pyarrow-backed EAs. + + def test_transpose(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + shape = arr2d.shape + assert shape[0] != shape[-1] # otherwise the rest of the test is useless + + assert arr2d.T.shape == shape[::-1] + + def test_frame_from_2d_array(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + + df = pd.DataFrame(arr2d) + expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) + self.assert_frame_equal(df, expected) + + def test_swapaxes(self, data): + arr2d = data.repeat(2).reshape(-1, 2) + + result = arr2d.swapaxes(0, 1) + expected = arr2d.T + self.assert_extension_array_equal(result, expected) + + def test_delete_2d(self, data): + arr2d = data.repeat(3).reshape(-1, 3) + + # axis = 0 + result = arr2d.delete(1, axis=0) + expected = data.delete(1).repeat(3).reshape(-1, 3) + self.assert_extension_array_equal(result, expected) + + # axis = 1 + result = arr2d.delete(1, axis=1) + expected = data.repeat(2).reshape(-1, 2) + self.assert_extension_array_equal(result, expected) + + def test_take_2d(self, data): + arr2d = data.reshape(-1, 1) + + result = arr2d.take([0, 0, -1], axis=0) + + expected = data.take([0, 0, -1]).reshape(-1, 1) + self.assert_extension_array_equal(result, expected) + + def test_repr_2d(self, data): + # this could fail in a corner case where an element contained the name + res = repr(data.reshape(1, -1)) + assert res.count(f"<{type(data).__name__}") == 1 + + res = repr(data.reshape(-1, 1)) + assert res.count(f"<{type(data).__name__}") == 1 + + def test_reshape(self, data): + arr2d = data.reshape(-1, 1) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + arr2d = data.reshape((-1, 1)) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + with pytest.raises(ValueError): + data.reshape((data.size, 2)) + with pytest.raises(ValueError): + data.reshape(data.size, 2) + + def test_getitem_2d(self, data): + arr2d = data.reshape(1, -1) + + result = arr2d[0] + self.assert_extension_array_equal(result, data) + + with pytest.raises(IndexError): + arr2d[1] + + with pytest.raises(IndexError): + arr2d[-2] + + result = arr2d[:] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, :] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, 0] + expected = data[[0]] + self.assert_extension_array_equal(result, expected) + + # dimension-expanding getitem on 1D + result = data[:, np.newaxis] + self.assert_extension_array_equal(result, arr2d.T) + + def test_iter_2d(self, data): + arr2d = data.reshape(1, -1) + + objs = list(iter(arr2d)) + assert len(objs) == arr2d.shape[0] + + for obj in objs: + assert isinstance(obj, type(data)) + assert obj.dtype == data.dtype + assert obj.ndim == 1 + assert len(obj) == arr2d.shape[1] + + def test_tolist_2d(self, data): + arr2d = data.reshape(1, -1) + + result = arr2d.tolist() + expected = [data.tolist()] + + assert isinstance(result, list) + assert all(isinstance(x, list) for x in result) + + assert result == expected + + def test_concat_2d(self, data): + left = type(data)._concat_same_type([data, data]).reshape(-1, 2) + right = left.copy() + + # axis=0 + result = left._concat_same_type([left, right], axis=0) + expected = data._concat_same_type([data] * 4).reshape(-1, 2) + self.assert_extension_array_equal(result, expected) + + # axis=1 + result = left._concat_same_type([left, right], axis=1) + assert result.shape == (len(data), 4) + self.assert_extension_array_equal(result[:, :2], left) + self.assert_extension_array_equal(result[:, 2:], right) + + # axis > 1 -> invalid + msg = "axis 2 is out of bounds for array of dimension 2" + with pytest.raises(ValueError, match=msg): + left._concat_same_type([left, right], axis=2) + + @pytest.mark.parametrize("method", ["backfill", "pad"]) + def test_fillna_2d_method(self, data_missing, method): + arr = data_missing.repeat(2).reshape(2, 2) + assert arr[0].isna().all() + assert not arr[1].isna().any() + + result = arr.fillna(method=method) + + expected = data_missing.fillna(method=method).repeat(2).reshape(2, 2) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis_none(self, data, method): + arr2d = data.reshape(1, -1) + + err_expected = None + err_result = None + try: + expected = getattr(data, method)() + except Exception as err: + # if the 1D reduction is invalid, the 2D reduction should be as well + err_expected = err + try: + result = getattr(arr2d, method)(axis=None) + except Exception as err2: + err_result = err2 + + else: + result = getattr(arr2d, method)(axis=None) + + if err_result is not None or err_expected is not None: + assert type(err_result) == type(err_expected) + return + + assert is_matching_na(result, expected) or result == expected + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis0(self, data, method): + arr2d = data.reshape(1, -1) + + kwargs = {} + if method == "std": + # pass ddof=0 so we get all-zero std instead of all-NA std + kwargs["ddof"] = 0 + + try: + result = getattr(arr2d, method)(axis=0, **kwargs) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + def get_reduction_result_dtype(dtype): + # windows and 32bit builds will in some cases have int32/uint32 + # where other builds will have int64/uint64. + if dtype.itemsize == 8: + return dtype + elif dtype.kind in "ib": + return INT_STR_TO_DTYPE[np.dtype(int).name] + else: + # i.e. dtype.kind == "u" + return INT_STR_TO_DTYPE[np.dtype(np.uint).name] + + if method in ["mean", "median", "sum", "prod"]: + # std and var are not dtype-preserving + expected = data + if method in ["sum", "prod"] and data.dtype.kind in "iub": + dtype = get_reduction_result_dtype(data.dtype) + + expected = data.astype(dtype) + if data.dtype.kind == "b" and method in ["sum", "prod"]: + # We get IntegerArray instead of BooleanArray + pass + else: + assert type(expected) == type(data), type(expected) + assert dtype == expected.dtype + + self.assert_extension_array_equal(result, expected) + elif method == "std": + self.assert_extension_array_equal(result, data - data) + # punt on method == "var" + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis1(self, data, method): + arr2d = data.reshape(1, -1) + + try: + result = getattr(arr2d, method)(axis=1) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + # not necessarily type/dtype-preserving, so weaker assertions + assert result.shape == (1,) + expected_scalar = getattr(data, method)() + res = result[0] + assert is_matching_na(res, expected_scalar) or res == expected_scalar + + +class NDArrayBacked2DTests(Dim2CompatTests): + # More specific tests for NDArrayBackedExtensionArray subclasses + + def test_copy_order(self, data): + # We should be matching numpy semantics for the "order" keyword in 'copy' + arr2d = data.repeat(2).reshape(-1, 2) + assert arr2d._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.copy() + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d[::2, ::2].copy() + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.copy("F") + assert not res._ndarray.flags["C_CONTIGUOUS"] + assert res._ndarray.flags["F_CONTIGUOUS"] + + res = arr2d.copy("K") + assert res._ndarray.flags["C_CONTIGUOUS"] + + res = arr2d.T.copy("K") + assert not res._ndarray.flags["C_CONTIGUOUS"] + assert res._ndarray.flags["F_CONTIGUOUS"] + + # order not accepted by numpy + msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)" + with pytest.raises(ValueError, match=msg): + arr2d.copy("Q") + + # neither contiguity + arr_nc = arr2d[::2] + assert not arr_nc._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"] + + assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"] + assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"] + + assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"] + assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"] diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py new file mode 100644 index 00000000..ea444301 --- /dev/null +++ b/pandas/tests/extension/base/dtype.py @@ -0,0 +1,137 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas.api.types import ( + infer_dtype, + is_object_dtype, + is_string_dtype, +) +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseDtypeTests(BaseExtensionTests): + """Base class for ExtensionDtype classes""" + + def test_name(self, dtype): + assert isinstance(dtype.name, str) + + def test_kind(self, dtype): + valid = set("biufcmMOSUV") + assert dtype.kind in valid + + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) + + def test_is_dtype_from_name(self, dtype): + result = type(dtype).is_dtype(dtype.name) + assert result is True + + def test_is_dtype_unboxes_dtype(self, data, dtype): + assert dtype.is_dtype(data) is True + + def test_is_dtype_from_self(self, dtype): + result = type(dtype).is_dtype(dtype) + assert result is True + + def test_is_dtype_other_input(self, dtype): + assert dtype.is_dtype([1, 2, 3]) is False + + def test_is_not_string_type(self, dtype): + return not is_string_dtype(dtype) + + def test_is_not_object_type(self, dtype): + return not is_object_dtype(dtype) + + def test_eq_with_str(self, dtype): + assert dtype == dtype.name + assert dtype != dtype.name + "-suffix" + + def test_eq_with_numpy_object(self, dtype): + assert dtype != np.dtype("object") + + def test_eq_with_self(self, dtype): + assert dtype == dtype + assert dtype != object() + + def test_array_type(self, data, dtype): + assert dtype.construct_array_type() is type(data) + + def test_check_dtype(self, data): + dtype = data.dtype + + # check equivalency for using .dtypes + df = pd.DataFrame( + {"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1} + ) + + # TODO(numpy-1.20): This warnings filter and if block can be removed + # once we require numpy>=1.20 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + result = df.dtypes == str(dtype) + # NumPy>=1.20.0, but not pandas.compat.numpy till there + # is a wheel available with this change. + try: + new_numpy_behavior = np.dtype("int64") != "Int64" + except TypeError: + new_numpy_behavior = True + + if dtype.name == "Int64" and not new_numpy_behavior: + expected = pd.Series([True, True, False, True], index=list("ABCD")) + else: + expected = pd.Series([True, True, False, False], index=list("ABCD")) + + self.assert_series_equal(result, expected) + + expected = pd.Series([True, True, False, False], index=list("ABCD")) + result = df.dtypes.apply(str) == str(dtype) + self.assert_series_equal(result, expected) + + def test_hashable(self, dtype): + hash(dtype) # no error + + def test_str(self, dtype): + assert str(dtype) == dtype.name + + def test_eq(self, dtype): + assert dtype == dtype.name + assert dtype != "anonther_type" + + def test_construct_from_string(self, dtype): + dtype_instance = type(dtype).construct_from_string(dtype.name) + assert isinstance(dtype_instance, type(dtype)) + + def test_construct_from_string_another_type_raises(self, dtype): + msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'" + with pytest.raises(TypeError, match=msg): + type(dtype).construct_from_string("another_type") + + def test_construct_from_string_wrong_type_raises(self, dtype): + with pytest.raises( + TypeError, + match="'construct_from_string' expects a string, got ", + ): + type(dtype).construct_from_string(0) + + def test_get_common_dtype(self, dtype): + # in practice we will not typically call this with a 1-length list + # (we shortcut to just use that dtype as the common dtype), but + # still testing as good practice to have this working (and it is the + # only case we can test in general) + assert dtype._get_common_dtype([dtype]) == dtype + + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # only testing that this works without raising an error + res = infer_dtype(data, skipna=skipna) + assert isinstance(res, str) + res = infer_dtype(data_missing, skipna=skipna) + assert isinstance(res, str) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py new file mode 100644 index 00000000..cf51d9d6 --- /dev/null +++ b/pandas/tests/extension/base/getitem.py @@ -0,0 +1,488 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseGetitemTests(BaseExtensionTests): + """Tests for ExtensionArray.__getitem__.""" + + def test_iloc_series(self, data): + ser = pd.Series(data) + result = ser.iloc[:4] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.iloc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_iloc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.iloc[:4, [0]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.iloc[[0, 1, 2, 3], [0]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # GH#32959 slice columns with step + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df[["A"]]) + result = df[["B", "A"]].iloc[:, ::2] + self.assert_frame_equal(result, df[["B"]]) + + def test_iloc_frame_single_block(self, data): + # GH#32959 null slice along index, slice along columns with single-block + df = pd.DataFrame({"A": data}) + + result = df.iloc[:, :] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :1] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, 1:2] + self.assert_frame_equal(result, df.iloc[:, :0]) + + result = df.iloc[:, -1:] + self.assert_frame_equal(result, df) + + def test_loc_series(self, data): + ser = pd.Series(data) + result = ser.loc[:3] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.loc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_loc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.loc[:3, ["A"]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.loc[[0, 1, 2, 3], ["A"]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + def test_loc_iloc_frame_single_dtype(self, data): + # GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly + # return a scalar + df = pd.DataFrame({"A": data}) + expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype) + + result = df.loc[2] + self.assert_series_equal(result, expected) + + expected = pd.Series( + [data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype + ) + result = df.iloc[-1] + self.assert_series_equal(result, expected) + + def test_getitem_scalar(self, data): + result = data[0] + assert isinstance(result, data.dtype.type) + + result = pd.Series(data)[0] + assert isinstance(result, data.dtype.type) + + def test_getitem_invalid(self, data): + # TODO: box over scalar, [scalar], (scalar,)? + + msg = ( + r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis " + r"\(`None`\) and integer or boolean arrays are valid indices" + ) + with pytest.raises(IndexError, match=msg): + data["foo"] + with pytest.raises(IndexError, match=msg): + data[2.5] + + ub = len(data) + msg = "|".join( + [ + "list index out of range", # json + "index out of bounds", # pyarrow + "Out of bounds access", # Sparse + f"loc must be an integer between -{ub} and {ub}", # Sparse + f"index {ub+1} is out of bounds for axis 0 with size {ub}", + f"index -{ub+1} is out of bounds for axis 0 with size {ub}", + ] + ) + with pytest.raises(IndexError, match=msg): + data[ub + 1] + with pytest.raises(IndexError, match=msg): + data[-ub - 1] + + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): + result = data_missing[0] + assert na_cmp(result, na_value) + + def test_getitem_empty(self, data): + # Indexing with empty list + result = data[[]] + assert len(result) == 0 + assert isinstance(result, type(data)) + + expected = data[np.array([], dtype="int64")] + self.assert_extension_array_equal(result, expected) + + def test_getitem_mask(self, data): + # Empty mask, raw array + mask = np.zeros(len(data), dtype=bool) + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + # Empty mask, in series + mask = np.zeros(len(data), dtype=bool) + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + # non-empty mask, raw array + mask[0] = True + result = data[mask] + assert len(result) == 1 + assert isinstance(result, type(data)) + + # non-empty mask, in series + result = pd.Series(data)[mask] + assert len(result) == 1 + assert result.dtype == data.dtype + + def test_getitem_mask_raises(self, data): + mask = np.array([True, False]) + msg = f"Boolean index has wrong length: 2 instead of {len(data)}" + with pytest.raises(IndexError, match=msg): + data[mask] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match=msg): + data[mask] + + def test_getitem_boolean_array_mask(self, data): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + mask[:5] = True + expected = data.take([0, 1, 2, 3, 4]) + result = data[mask] + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[mask] + self.assert_series_equal(result, expected) + + def test_getitem_boolean_na_treated_as_false(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:2] = pd.NA + mask[2:4] = True + + result = data[mask] + expected = data[mask.fillna(False)] + + self.assert_extension_array_equal(result, expected) + + s = pd.Series(data) + + result = s[mask] + expected = s[mask.fillna(False)] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_getitem_integer_array(self, data, idx): + result = data[idx] + assert len(result) == 3 + assert isinstance(result, type(data)) + expected = data.take([0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[idx] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + data[idx] + + @pytest.mark.xfail( + reason="Tries label-based and raises KeyError; " + "in some cases raises when calling np.asarray" + ) + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_series_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + # TODO: this raises KeyError about labels not found (it tries label-based) + + ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + with pytest.raises(ValueError, match=msg): + ser[idx] + + def test_getitem_slice(self, data): + # getitem[slice] should return an array + result = data[slice(0)] # empty + assert isinstance(result, type(data)) + + result = data[slice(1)] # scalar + assert isinstance(result, type(data)) + + def test_getitem_ellipsis_and_slice(self, data): + # GH#40353 this is called from getitem_block_index + result = data[..., :] + self.assert_extension_array_equal(result, data) + + result = data[:, ...] + self.assert_extension_array_equal(result, data) + + result = data[..., :3] + self.assert_extension_array_equal(result, data[:3]) + + result = data[:3, ...] + self.assert_extension_array_equal(result, data[:3]) + + result = data[..., ::2] + self.assert_extension_array_equal(result, data[::2]) + + result = data[::2, ...] + self.assert_extension_array_equal(result, data[::2]) + + def test_get(self, data): + # GH 20882 + s = pd.Series(data, index=[2 * i for i in range(len(data))]) + assert s.get(4) == s.iloc[2] + + result = s.get([4, 6]) + expected = s.iloc[[2, 3]] + self.assert_series_equal(result, expected) + + result = s.get(slice(2)) + expected = s.iloc[[0, 1]] + self.assert_series_equal(result, expected) + + assert s.get(-1) is None + assert s.get(s.index.max() + 1) is None + + s = pd.Series(data[:6], index=list("abcdef")) + assert s.get("c") == s.iloc[2] + + result = s.get(slice("b", "d")) + expected = s.iloc[[1, 2, 3]] + self.assert_series_equal(result, expected) + + result = s.get("Z") + assert result is None + + assert s.get(4) == s.iloc[4] + assert s.get(-1) == s.iloc[-1] + assert s.get(len(s)) is None + + # GH 21257 + s = pd.Series(data) + with tm.assert_produces_warning(None): + # GH#45324 make sure we aren't giving a spurious FutureWarning + s2 = s[::2] + assert s2.get(1) is None + + def test_take_sequence(self, data): + result = pd.Series(data)[[0, 1, 3]] + assert result.iloc[0] == data[0] + assert result.iloc[1] == data[1] + assert result.iloc[2] == data[3] + + def test_take(self, data, na_value, na_cmp): + result = data.take([0, -1]) + assert result.dtype == data.dtype + assert result[0] == data[0] + assert result[1] == data[-1] + + result = data.take([0, -1], allow_fill=True, fill_value=na_value) + assert result[0] == data[0] + assert na_cmp(result[1], na_value) + + with pytest.raises(IndexError, match="out of bounds"): + data.take([len(data) + 1]) + + def test_take_empty(self, data, na_value, na_cmp): + empty = data[:0] + + result = empty.take([-1], allow_fill=True) + assert na_cmp(result[0], na_value) + + msg = "cannot do a non-empty take from an empty axes|out of bounds" + + with pytest.raises(IndexError, match=msg): + empty.take([-1]) + + with pytest.raises(IndexError, match="cannot do a non-empty take"): + empty.take([0, 1]) + + def test_take_negative(self, data): + # https://github.com/pandas-dev/pandas/issues/20640 + n = len(data) + result = data.take([0, -n, n - 1, -1]) + expected = data.take([0, 0, n - 1, n - 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_non_na_fill_value(self, data_missing): + fill_value = data_missing[1] # valid + na = data_missing[0] + + arr = data_missing._from_sequence( + [na, fill_value, na], dtype=data_missing.dtype + ) + result = arr.take([-1, 1], fill_value=fill_value, allow_fill=True) + expected = arr.take([1, 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_pandas_style_negative_raises(self, data, na_value): + with pytest.raises(ValueError, match=""): + data.take([0, -2], fill_value=na_value, allow_fill=True) + + @pytest.mark.parametrize("allow_fill", [True, False]) + def test_take_out_of_bounds_raises(self, data, allow_fill): + arr = data[:3] + + with pytest.raises(IndexError, match="out of bounds|out-of-bounds"): + arr.take(np.asarray([0, 3]), allow_fill=allow_fill) + + def test_take_series(self, data): + s = pd.Series(data) + result = s.take([0, -1]) + expected = pd.Series( + data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), + index=[0, len(data) - 1], + ) + self.assert_series_equal(result, expected) + + def test_reindex(self, data, na_value): + s = pd.Series(data) + result = s.reindex([0, 1, 3]) + expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3]) + self.assert_series_equal(result, expected) + + n = len(data) + result = s.reindex([-1, 0, n]) + expected = pd.Series( + data._from_sequence([na_value, data[0], na_value], dtype=s.dtype), + index=[-1, 0, n], + ) + self.assert_series_equal(result, expected) + + result = s.reindex([n, n + 1]) + expected = pd.Series( + data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] + ) + self.assert_series_equal(result, expected) + + def test_reindex_non_na_fill_value(self, data_missing): + valid = data_missing[1] + na = data_missing[0] + + arr = data_missing._from_sequence([na, valid], dtype=data_missing.dtype) + ser = pd.Series(arr) + result = ser.reindex([0, 1, 2], fill_value=valid) + expected = pd.Series( + data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype) + ) + + self.assert_series_equal(result, expected) + + def test_loc_len1(self, data): + # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim + df = pd.DataFrame({"A": data}) + res = df.loc[[0], "A"] + assert res.ndim == 1 + assert res._mgr.arrays[0].ndim == 1 + if hasattr(res._mgr, "blocks"): + assert res._mgr._block.ndim == 1 + + def test_item(self, data): + # https://github.com/pandas-dev/pandas/pull/30175 + s = pd.Series(data) + result = s[:1].item() + assert result == data[0] + + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + s[:0].item() + + with pytest.raises(ValueError, match=msg): + s.item() + + def test_ellipsis_index(self): + # GH42430 1D slices over extension types turn into N-dimensional slices over + # ExtensionArrays + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + self.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)") diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py new file mode 100644 index 00000000..711f1835 --- /dev/null +++ b/pandas/tests/extension/base/groupby.py @@ -0,0 +1,116 @@ +import pytest + +from pandas.core.dtypes.common import is_numeric_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseGroupbyTests(BaseExtensionTests): + """Groupby-specific tests.""" + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, uniques = pd.factorize(data_for_grouping, sort=True) + + if as_index: + index = pd.Index._with_infer(uniques, name="B") + expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") + self.assert_series_equal(result, expected) + else: + expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) + self.assert_frame_equal(result, expected) + + def test_groupby_agg_extension(self, data_for_grouping): + # GH#38980 groupby agg on extension type fails for non-numeric types + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + + expected = df.iloc[[0, 2, 4, 7]] + expected = expected.set_index("A") + + result = df.groupby("A").agg({"B": "first"}) + self.assert_frame_equal(result, expected) + + result = df.groupby("A").agg("first") + self.assert_frame_equal(result, expected) + + result = df.groupby("A").first() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index._with_infer(index, name="B") + expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + df.groupby("B", group_keys=False).apply(groupby_apply_op) + df.groupby("B", group_keys=False).A.apply(groupby_apply_op) + df.groupby("A", group_keys=False).apply(groupby_apply_op) + df.groupby("A", group_keys=False).B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + df.B.iloc[[7]].array, + ], + index=pd.Index([1, 2, 3, 4], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1, 4], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1, 1], + } + ) + + dtype = data_for_grouping.dtype + if is_numeric_dtype(dtype) or dtype.name == "decimal": + warn = None + else: + warn = FutureWarning + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/extension/base/index.py b/pandas/tests/extension/base/index.py new file mode 100644 index 00000000..2539c387 --- /dev/null +++ b/pandas/tests/extension/base/index.py @@ -0,0 +1,20 @@ +""" +Tests for Indexes backed by arbitrary ExtensionArrays. +""" +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseIndexTests(BaseExtensionTests): + """Tests for Index object backed by an ExtensionArray""" + + def test_index_from_array(self, data): + idx = pd.Index(data) + assert data.dtype == idx.dtype + + def test_index_from_listlike_with_dtype(self, data): + idx = pd.Index(data, dtype=data.dtype) + assert idx.dtype == data.dtype + + idx = pd.Index(list(data), dtype=data.dtype) + assert idx.dtype == data.dtype diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py new file mode 100644 index 00000000..3e8a754c --- /dev/null +++ b/pandas/tests/extension/base/interface.py @@ -0,0 +1,127 @@ +import numpy as np + +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseInterfaceTests(BaseExtensionTests): + """Tests that the basic interface is satisfied.""" + + # ------------------------------------------------------------------------ + # Interface + # ------------------------------------------------------------------------ + + def test_len(self, data): + assert len(data) == 100 + + def test_size(self, data): + assert data.size == 100 + + def test_ndim(self, data): + assert data.ndim == 1 + + def test_can_hold_na_valid(self, data): + # GH-20761 + assert data._can_hold_na is True + + def test_contains(self, data, data_missing): + # GH-37867 + # Tests for membership checks. Membership checks for nan-likes is tricky and + # the settled on rule is: `nan_like in arr` is True if nan_like is + # arr.dtype.na_value and arr.isna().any() is True. Else the check returns False. + + na_value = data.dtype.na_value + # ensure data without missing values + data = data[~data.isna()] + + # first elements are non-missing + assert data[0] in data + assert data_missing[0] in data_missing + + # check the presence of na_value + assert na_value in data_missing + assert na_value not in data + + # the data can never contain other nan-likes than na_value + for na_value_obj in tm.NULL_OBJECTS: + if na_value_obj is na_value or type(na_value_obj) == type(na_value): + # type check for e.g. two instances of Decimal("NAN") + continue + assert na_value_obj not in data + assert na_value_obj not in data_missing + + def test_memory_usage(self, data): + s = pd.Series(data) + result = s.memory_usage(index=False) + assert result == s.nbytes + + def test_array_interface(self, data): + result = np.array(data) + assert result[0] == data[0] + + result = np.array(data, dtype=object) + expected = np.array(list(data), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) + assert isinstance(data.dtype, ExtensionDtype) + + def test_no_values_attribute(self, data): + # GH-20735: EA's with .values attribute give problems with internal + # code, disallowing this for now until solved + assert not hasattr(data, "values") + assert not hasattr(data, "_values") + + def test_is_numeric_honored(self, data): + result = pd.Series(data) + if hasattr(result._mgr, "blocks"): + assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric + + def test_isna_extension_array(self, data_missing): + # If your `isna` returns an ExtensionArray, you must also implement + # _reduce. At the *very* least, you must implement any and all + na = data_missing.isna() + if is_extension_array_dtype(na): + assert na._reduce("any") + assert na.any() + + assert not na._reduce("all") + assert not na.all() + + assert na.dtype._is_boolean + + def test_copy(self, data): + # GH#27083 removing deep keyword from EA.copy + assert data[0] != data[1] + result = data.copy() + + data[1] = data[0] + assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] + + # check specifically that the `dtype` kwarg is accepted + data.view(dtype=None) + + def test_tolist(self, data): + result = data.tolist() + expected = list(data) + assert isinstance(result, list) + assert result == expected diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py new file mode 100644 index 00000000..a8c25db3 --- /dev/null +++ b/pandas/tests/extension/base/io.py @@ -0,0 +1,19 @@ +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseParsingTests(BaseExtensionTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) + csv_output = df.to_csv(index=False, na_rep=np.nan) + result = pd.read_csv( + StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine + ) + expected = df + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py new file mode 100644 index 00000000..838c9f5b --- /dev/null +++ b/pandas/tests/extension/base/methods.py @@ -0,0 +1,610 @@ +import inspect +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_bool_dtype +from pandas.core.dtypes.missing import na_value_for_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.sorting import nargsort +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseMethodsTests(BaseExtensionTests): + """Various Series and DataFrame methods.""" + + def test_value_counts_default_dropna(self, data): + # make sure we have consistent default dropna kwarg + if not hasattr(data, "value_counts"): + pytest.skip(f"value_counts is not implemented for {type(data)}") + sig = inspect.signature(data.value_counts) + kwarg = sig.parameters["dropna"] + assert kwarg.default is True + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + def test_value_counts_with_normalize(self, data): + # GH 33172 + data = data[:10].unique() + values = np.array(data[~data.isna()]) + ser = pd.Series(data, dtype=data.dtype) + + result = ser.value_counts(normalize=True).sort_index() + + if not isinstance(data, pd.Categorical): + expected = pd.Series([1 / len(values)] * len(values), index=result.index) + else: + expected = pd.Series(0.0, index=result.index) + expected[result > 0] = 1 / len(values) + if na_value_for_dtype(data.dtype) is pd.NA: + # TODO(GH#44692): avoid special-casing + expected = expected.astype("Float64") + + self.assert_series_equal(result, expected) + + def test_count(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + result = df.count(axis="columns") + expected = pd.Series([0, 1]) + self.assert_series_equal(result, expected) + + def test_series_count(self, data_missing): + # GH#26835 + ser = pd.Series(data_missing) + result = ser.count() + expected = 1 + assert result == expected + + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(id) + assert isinstance(result, pd.Series) + + def test_argsort(self, data_for_sorting): + result = pd.Series(data_for_sorting).argsort() + # argsort result gets passed to take, so should be np.intp + expected = pd.Series(np.array([2, 0, 1], dtype=np.intp)) + self.assert_series_equal(result, expected) + + def test_argsort_missing_array(self, data_missing_for_sorting): + result = data_missing_for_sorting.argsort() + # argsort result gets passed to take, so should be np.intp + expected = np.array([2, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_argsort_missing(self, data_missing_for_sorting): + result = pd.Series(data_missing_for_sorting).argsort() + expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) + self.assert_series_equal(result, expected) + + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): + # GH 24382 + + # data_for_sorting -> [B, C, A] with A < B < C + assert data_for_sorting.argmax() == 1 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurrence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 3 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_empty_array(self, method, data): + # GH 24382 + err_msg = "attempt to get" + with pytest.raises(ValueError, match=err_msg): + getattr(data[:0], method)() + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_all_na(self, method, data, na_value): + # all missing with skipna=True is the same as empty + err_msg = "attempt to get" + data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) + with pytest.raises(ValueError, match=err_msg): + getattr(data_na, method)() + + @pytest.mark.parametrize( + "op_name, skipna, expected", + [ + ("idxmax", True, 0), + ("idxmin", True, 2), + ("argmax", True, 0), + ("argmin", True, 2), + ("idxmax", False, np.nan), + ("idxmin", False, np.nan), + ("argmax", False, -1), + ("argmin", False, -1), + ], + ) + def test_argreduce_series( + self, data_missing_for_sorting, op_name, skipna, expected + ): + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + ser = pd.Series(data_missing_for_sorting) + result = getattr(ser, op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): + # GH#38733 + data = data_missing_for_sorting + + with pytest.raises(NotImplementedError, match=""): + data.argmin(skipna=False) + + with pytest.raises(NotImplementedError, match=""): + data.argmax(skipna=False) + + @pytest.mark.parametrize( + "na_position, expected", + [ + ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), + ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), + ], + ) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + # GH 25439 + result = nargsort(data_missing_for_sorting, na_position=na_position) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key): + ser = pd.Series(data_for_sorting) + result = ser.sort_values(ascending=ascending, key=sort_by_key) + expected = ser.iloc[[2, 0, 1]] + if not ascending: + # GH 35922. Expect stable sort + if ser.nunique() == 2: + expected = ser.iloc[[0, 1, 2]] + else: + expected = ser.iloc[[1, 0, 2]] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + ser = pd.Series(data_missing_for_sorting) + result = ser.sort_values(ascending=ascending, key=sort_by_key) + if ascending: + expected = ser.iloc[[2, 0, 1]] + else: + expected = ser.iloc[[0, 2, 1]] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending): + df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting}) + result = df.sort_values(["A", "B"]) + expected = pd.DataFrame( + {"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1] + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + duplicated = box(data._from_sequence([data[0], data[0]])) + + result = method(duplicated) + + assert len(result) == 1 + assert isinstance(result, type(data)) + assert result[0] == duplicated[0] + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + if na_sentinel == -1: + msg = "Specifying `na_sentinel=-1` is deprecated" + else: + msg = "Specifying the specific value to use for `na_sentinel` is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + codes, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_codes = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0, 2], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4, 7]) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize_equivalence(self, data_for_grouping, na_sentinel): + if na_sentinel == -1: + msg = "Specifying `na_sentinel=-1` is deprecated" + else: + msg = "Specifying the specific value to use for `na_sentinel` is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + codes_1, uniques_1 = pd.factorize( + data_for_grouping, na_sentinel=na_sentinel + ) + codes_2, uniques_2 = data_for_grouping.factorize(na_sentinel=na_sentinel) + + tm.assert_numpy_array_equal(codes_1, codes_2) + self.assert_extension_array_equal(uniques_1, uniques_2) + assert len(uniques_1) == len(pd.unique(uniques_1)) + assert uniques_1.dtype == data_for_grouping.dtype + + def test_factorize_empty(self, data): + codes, uniques = pd.factorize(data[:0]) + expected_codes = np.array([], dtype=np.intp) + expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + assert df.A.values is not result.A.values + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values is arr + + def test_fillna_length_mismatch(self, data_missing): + msg = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg): + data_missing.fillna(data_missing.take([1])) + + def test_combine_le(self, data_repeated): + # GH 20825 + # Test that combine works when doing a <= (le) comparison + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + def test_combine_add(self, data_repeated): + # GH 20825 + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + with np.errstate(over="ignore"): + expected = pd.Series( + orig_data1._from_sequence( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series( + orig_data1._from_sequence([a + val for a in list(orig_data1)]) + ) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data): + # https://github.com/pandas-dev/pandas/issues/24147 + a = pd.Series(data[:3]) + b = pd.Series(data[2:5], index=[2, 3, 4]) + result = a.combine_first(b) + expected = pd.Series(data[:5]) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("frame", [True, False]) + @pytest.mark.parametrize( + "periods, indices", + [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], + ) + def test_container_shift(self, data, frame, periods, indices): + # https://github.com/pandas-dev/pandas/issues/22386 + subset = data[:5] + data = pd.Series(subset, name="A") + expected = pd.Series(subset.take(indices, allow_fill=True), name="A") + + if frame: + result = data.to_frame(name="A").assign(B=1).shift(periods) + expected = pd.concat( + [expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1 + ) + compare = self.assert_frame_equal + else: + result = data.shift(periods) + compare = self.assert_series_equal + + compare(result, expected) + + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + assert data[0] != data[1] # otherwise below is invalid + data[0] = data[1] + assert result[0] != result[1] # i.e. not the same object/view + + @pytest.mark.parametrize("periods", [1, -2]) + def test_diff(self, data, periods): + data = data[:5] + if is_bool_dtype(data.dtype): + op = operator.xor + else: + op = operator.sub + try: + # does this array implement ops? + op(data, data) + except Exception: + pytest.skip(f"{type(data)} does not support diff") + s = pd.Series(data) + result = s.diff(periods) + expected = pd.Series(op(data, data.shift(periods))) + self.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": data, "B": [1.0] * 5}) + result = df.diff(periods) + if periods == 1: + b = [np.nan, 0, 0, 0, 0] + else: + b = [0, 0, 0, np.nan, np.nan] + expected = pd.DataFrame({"A": expected, "B": b}) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "periods, indices", + [[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]], + ) + def test_shift_non_empty_array(self, data, periods, indices): + # https://github.com/pandas-dev/pandas/issues/23911 + subset = data[:2] + result = subset.shift(periods) + expected = subset.take(indices, allow_fill=True) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4]) + def test_shift_empty_array(self, data, periods): + # https://github.com/pandas-dev/pandas/issues/23911 + empty = data[:0] + result = empty.shift(periods) + expected = empty + self.assert_extension_array_equal(result, expected) + + def test_shift_zero_copies(self, data): + # GH#31502 + result = data.shift(0) + assert result is not data + + result = data[:0].shift(2) + assert result is not data + + def test_shift_fill_value(self, data): + arr = data[:4] + fill_value = data[0] + result = arr.shift(1, fill_value=fill_value) + expected = data.take([0, 0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + result = arr.shift(-2, fill_value=fill_value) + expected = data.take([2, 3, 0, 0]) + self.assert_extension_array_equal(result, expected) + + def test_not_hashable(self, data): + # We are in general mutable, so not hashable + with pytest.raises(TypeError, match="unhashable type"): + hash(data) + + def test_hash_pandas_object_works(self, data, as_frame): + # https://github.com/pandas-dev/pandas/issues/23066 + data = pd.Series(data) + if as_frame: + data = data.to_frame() + a = pd.util.hash_pandas_object(data) + b = pd.util.hash_pandas_object(data) + self.assert_equal(a, b) + + def test_searchsorted(self, data_for_sorting, as_series): + b, c, a = data_for_sorting + arr = data_for_sorting.take([2, 0, 1]) # to get [a, b, c] + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + assert arr.searchsorted(c) == 2 + assert arr.searchsorted(c, side="right") == 3 + + result = arr.searchsorted(arr.take([0, 2])) + expected = np.array([0, 2], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 2, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + def test_where_series(self, data, na_value, as_frame): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + orig = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + ser = orig.copy() + cond = np.array([True, True, False, False]) + + if as_frame: + ser = ser.to_frame(name="a") + cond = cond.reshape(-1, 1) + + result = ser.where(cond) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype) + ) + + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + ser.mask(~cond, inplace=True) + self.assert_equal(ser, expected) + + # array other + ser = orig.copy() + if as_frame: + ser = ser.to_frame(name="a") + cond = np.array([True, False, True, True]) + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + if as_frame: + other = pd.DataFrame({"a": other}) + cond = pd.DataFrame({"a": cond}) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + ser.mask(~cond, other, inplace=True) + self.assert_equal(ser, expected) + + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + arr = type(data)._from_sequence(data[:3], dtype=data.dtype) + if as_series: + arr = pd.Series(arr) + + result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) + + repeats = [repeats] * 3 if isinstance(repeats, int) else repeats + expected = [x for x, n in zip(arr, repeats) for _ in range(n)] + expected = type(data)._from_sequence(expected, dtype=data.dtype) + if as_series: + expected = pd.Series(expected, index=arr.index.repeat(repeats)) + + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "repeats, kwargs, error, msg", + [ + (2, {"axis": 1}, ValueError, "axis"), + (-1, {}, ValueError, "negative"), + ([1, 2], {}, ValueError, "shape"), + (2, {"foo": "bar"}, TypeError, "'foo'"), + ], + ) + def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): + with pytest.raises(error, match=msg): + if use_numpy: + np.repeat(data, repeats, **kwargs) + else: + data.repeat(repeats, **kwargs) + + def test_delete(self, data): + result = data.delete(0) + expected = data[1:] + self.assert_extension_array_equal(result, expected) + + result = data.delete([1, 3]) + expected = data._concat_same_type([data[[0]], data[[2]], data[4:]]) + self.assert_extension_array_equal(result, expected) + + def test_insert(self, data): + # insert at the beginning + result = data[1:].insert(0, data[0]) + self.assert_extension_array_equal(result, data) + + result = data[1:].insert(-len(data[1:]), data[0]) + self.assert_extension_array_equal(result, data) + + # insert at the middle + result = data[:-1].insert(4, data[-1]) + + taker = np.arange(len(data)) + taker[5:] = taker[4:-1] + taker[4] = len(data) - 1 + expected = data.take(taker) + self.assert_extension_array_equal(result, expected) + + def test_insert_invalid(self, data, invalid_scalar): + item = invalid_scalar + + with pytest.raises((TypeError, ValueError)): + data.insert(0, item) + + with pytest.raises((TypeError, ValueError)): + data.insert(4, item) + + with pytest.raises((TypeError, ValueError)): + data.insert(len(data) - 1, item) + + def test_insert_invalid_loc(self, data): + ub = len(data) + + with pytest.raises(IndexError): + data.insert(ub + 1, data[0]) + + with pytest.raises(IndexError): + data.insert(-ub - 1, data[0]) + + with pytest.raises(TypeError): + # we expect TypeError here instead of IndexError to match np.insert + data.insert(1.5, data[0]) + + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): + data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) + data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) + + data = tm.box_expected(data, box, transpose=False) + data2 = tm.box_expected(data2, box, transpose=False) + data_na = tm.box_expected(data_na, box, transpose=False) + + # we are asserting with `is True/False` explicitly, to test that the + # result is an actual Python bool, and not something "truthy" + + assert data.equals(data) is True + assert data.equals(data.copy()) is True + + # unequal other data + assert data.equals(data2) is False + assert data.equals(data_na) is False + + # different length + assert data[:2].equals(data[:3]) is False + + # empty are equal + assert data[:0].equals(data[:0]) is True + + # other types + assert data.equals(None) is False + assert data[[0]].equals(data[0]) is False diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py new file mode 100644 index 00000000..3d43dc47 --- /dev/null +++ b/pandas/tests/extension/base/missing.py @@ -0,0 +1,160 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_sparse +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseMissingTests(BaseExtensionTests): + def test_isna(self, data_missing): + expected = np.array([True, False]) + + result = pd.isna(data_missing) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + self.assert_series_equal(result, expected) + + # GH 21189 + result = pd.Series(data_missing).drop([0, 1]).isna() + expected = pd.Series([], dtype=bool) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("na_func", ["isna", "notna"]) + def test_isna_returns_copy(self, data_missing, na_func): + result = pd.Series(data_missing) + expected = result.copy() + mask = getattr(result, na_func)() + if is_sparse(mask): + mask = np.array(mask) + + mask[:] = True + self.assert_series_equal(result, expected) + + def test_dropna_array(self, data_missing): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + + def test_dropna_series(self, data_missing): + ser = pd.Series(data_missing) + result = ser.dropna() + expected = ser.iloc[[1]] + self.assert_series_equal(result, expected) + + def test_dropna_frame(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + + # defaults + result = df.dropna() + expected = df.iloc[[1]] + self.assert_frame_equal(result, expected) + + # axis = 1 + result = df.dropna(axis="columns") + expected = pd.DataFrame(index=[0, 1]) + self.assert_frame_equal(result, expected) + + # multiple + df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]}) + result = df.dropna() + expected = df.iloc[:0] + self.assert_frame_equal(result, expected) + + def test_fillna_scalar(self, data_missing): + valid = data_missing[1] + result = data_missing.fillna(valid) + expected = data_missing.fillna(valid) + self.assert_extension_array_equal(result, expected) + + def test_fillna_limit_pad(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="ffill", limit=2) + expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_limit_backfill(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="backfill", limit=2) + expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_no_op_returns_copy(self, data): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + self.assert_extension_array_equal(result, data) + + result = data.fillna(method="backfill") + assert result is not data + self.assert_extension_array_equal(result, data) + + def test_fillna_series(self, data_missing): + fill_value = data_missing[1] + ser = pd.Series(data_missing) + + result = ser.fillna(fill_value) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + self.assert_series_equal(result, expected) + + # Fill with a series + result = ser.fillna(expected) + self.assert_series_equal(result, expected) + + # Fill with a series not affecting the missing values + result = ser.fillna(ser) + self.assert_series_equal(result, ser) + + def test_fillna_series_method(self, data_missing, fillna_method): + fill_value = data_missing[1] + + if fillna_method == "ffill": + data_missing = data_missing[::-1] + + result = pd.Series(data_missing).fillna(method=fillna_method) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + + self.assert_series_equal(result, expected) + + def test_fillna_frame(self, data_missing): + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + def test_fillna_fill_other(self, data): + result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) + + expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) + + self.assert_frame_equal(result, expected) + + def test_use_inf_as_na_no_effect(self, data_missing): + ser = pd.Series(data_missing) + expected = ser.isna() + with pd.option_context("mode.use_inf_as_na", True): + result = ser.isna() + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py new file mode 100644 index 00000000..569782e5 --- /dev/null +++ b/pandas/tests/extension/base/ops.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core import ops +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseOpsUtil(BaseExtensionTests): + def get_op_from_name(self, op_name: str): + return tm.get_op_from_name(op_name) + + def check_opname(self, ser: pd.Series, op_name: str, other, exc=Exception): + op = self.get_op_from_name(op_name) + + self._check_op(ser, op, other, op_name, exc) + + def _combine(self, obj, other, op): + if isinstance(obj, pd.DataFrame): + if len(obj.columns) != 1: + raise NotImplementedError + expected = obj.iloc[:, 0].combine(other, op).to_frame() + else: + expected = obj.combine(other, op) + return expected + + def _check_op( + self, ser: pd.Series, op, other, op_name: str, exc=NotImplementedError + ): + if exc is None: + result = op(ser, other) + expected = self._combine(ser, other, op) + assert isinstance(result, type(ser)) + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(ser, other) + + def _check_divmod_op(self, ser: pd.Series, op, other, exc=Exception): + # divmod has multiple return values, so check separately + if exc is None: + result_div, result_mod = op(ser, other) + if op is divmod: + expected_div, expected_mod = ser // other, ser % other + else: + expected_div, expected_mod = other // ser, other % ser + self.assert_series_equal(result_div, expected_div) + self.assert_series_equal(result_mod, expected_mod) + else: + with pytest.raises(exc): + divmod(ser, other) + + +class BaseArithmeticOpsTests(BaseOpsUtil): + """ + Various Series and DataFrame arithmetic ops methods. + + Subclasses supporting various ops should set the class variables + to indicate that they support ops of that kind + + * series_scalar_exc = TypeError + * frame_scalar_exc = TypeError + * series_array_exc = TypeError + * divmod_exc = TypeError + """ + + series_scalar_exc: type[Exception] | None = TypeError + frame_scalar_exc: type[Exception] | None = TypeError + series_array_exc: type[Exception] | None = TypeError + divmod_exc: type[Exception] | None = TypeError + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # series & scalar + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname(ser, op_name, ser.iloc[0], exc=self.series_scalar_exc) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + op_name = all_arithmetic_operators + df = pd.DataFrame({"A": data}) + self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op_name = all_arithmetic_operators + ser = pd.Series(data) + self.check_opname( + ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), exc=self.series_array_exc + ) + + def test_divmod(self, data): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, 1, exc=self.divmod_exc) + self._check_divmod_op(1, ops.rdivmod, ser, exc=self.divmod_exc) + + def test_divmod_series_array(self, data, data_for_twos): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, data) + + other = data_for_twos + self._check_divmod_op(other, ops.rdivmod, ser) + + other = pd.Series(other) + self._check_divmod_op(other, ops.rdivmod, ser) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + result = ser + data + expected = pd.Series(data + data) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented( + self, request, data, box + ): + # EAs should return NotImplemented for ops with Series/DataFrame + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + if not hasattr(data, "__add__"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{type(data).__name__} does not implement add" + ) + ) + result = data.__add__(other) + assert result is NotImplemented + + +class BaseComparisonOpsTests(BaseOpsUtil): + """Various Series and DataFrame comparison ops methods.""" + + def _compare_other(self, ser: pd.Series, data, op, other): + + if op.__name__ in ["eq", "ne"]: + # comparison should match point-wise comparisons + result = op(ser, other) + expected = ser.combine(other, op) + self.assert_series_equal(result, expected) + + else: + exc = None + try: + result = op(ser, other) + except Exception as err: + exc = err + + if exc is None: + # Didn't error, then should match pointwise behavior + expected = ser.combine(other, op) + self.assert_series_equal(result, expected) + else: + with pytest.raises(type(exc)): + ser.combine(other, op) + + def test_compare_scalar(self, data, comparison_op): + ser = pd.Series(data) + self._compare_other(ser, data, comparison_op, 0) + + def test_compare_array(self, data, comparison_op): + ser = pd.Series(data) + other = pd.Series([data[0]] * len(data)) + self._compare_other(ser, data, comparison_op, other) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box): + # EAs should return NotImplemented for ops with Series/DataFrame + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + + if hasattr(data, "__eq__"): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + + if hasattr(data, "__ne__"): + result = data.__ne__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __ne__") + + +class BaseUnaryOpsTests(BaseOpsUtil): + def test_invert(self, data): + ser = pd.Series(data, name="name") + result = ~ser + expected = pd.Series(~data, name="name") + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) + def test_unary_ufunc_dunder_equivalence(self, data, ufunc): + # the dunder __pos__ works if and only if np.positive works, + # same for __neg__/np.negative and __abs__/np.abs + attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[ + ufunc + ] + + exc = None + try: + result = getattr(data, attr)() + except Exception as err: + exc = err + + # if __pos__ raised, then so should the ufunc + with pytest.raises((type(exc), TypeError)): + ufunc(data) + else: + alt = ufunc(data) + self.assert_extension_array_equal(result, alt) diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py new file mode 100644 index 00000000..eab75be6 --- /dev/null +++ b/pandas/tests/extension/base/printing.py @@ -0,0 +1,42 @@ +import io + +import pytest + +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BasePrintingTests(BaseExtensionTests): + """Tests checking the formatting of your EA when printed.""" + + @pytest.mark.parametrize("size", ["big", "small"]) + def test_array_repr(self, data, size): + if size == "small": + data = data[:5] + else: + data = type(data)._concat_same_type([data] * 5) + + result = repr(data) + assert type(data).__name__ in result + assert f"Length: {len(data)}" in result + assert str(data.dtype) in result + if size == "big": + assert "..." in result + + def test_array_repr_unicode(self, data): + result = str(data) + assert isinstance(result, str) + + def test_series_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + def test_dataframe_repr(self, data): + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = io.StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py new file mode 100644 index 00000000..e363fda6 --- /dev/null +++ b/pandas/tests/extension/base/reduce.py @@ -0,0 +1,69 @@ +import warnings + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseReduceTests(BaseExtensionTests): + """ + Reduction specific tests. Generally these only + make sense for numeric/boolean operations. + """ + + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +class BaseNoReduceTests(BaseReduceTests): + """we don't define any reductions""" + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + +class BaseNumericReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) + + +class BaseBooleanReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + self.check_reduce(s, op_name, skipna) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py new file mode 100644 index 00000000..babb2868 --- /dev/null +++ b/pandas/tests/extension/base/reshaping.py @@ -0,0 +1,378 @@ +import itertools + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.extensions import ExtensionArray +from pandas.core.internals.blocks import EABackedBlock +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseReshapingTests(BaseExtensionTests): + """Tests for reshaping and concatenation.""" + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat(self, data, in_frame): + wrapped = pd.Series(data) + if in_frame: + wrapped = pd.DataFrame(wrapped) + result = pd.concat([wrapped, wrapped], ignore_index=True) + + assert len(result) == len(data) * 2 + + if in_frame: + dtype = result.dtypes[0] + else: + dtype = result.dtype + + assert dtype == data.dtype + if hasattr(result._mgr, "blocks"): + assert isinstance(result._mgr.blocks[0], EABackedBlock) + assert isinstance(result._mgr.arrays[0], ExtensionArray) + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat_all_na_block(self, data_missing, in_frame): + valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) + na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + if in_frame: + valid_block = pd.DataFrame({"a": valid_block}) + na_block = pd.DataFrame({"a": na_block}) + result = pd.concat([valid_block, na_block]) + if in_frame: + expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + self.assert_frame_equal(result, expected) + else: + expected = pd.Series(data_missing.take([1, 1, 0, 0])) + self.assert_series_equal(result, expected) + + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat([x.astype(object) for x in dfs]) + self.assert_frame_equal(result, expected) + + # series + result = pd.concat([x["A"] for x in dfs]) + expected = pd.concat([x["A"].astype(object) for x in dfs]) + self.assert_series_equal(result, expected) + + # simple test for just EA and one other + result = pd.concat([df1, df2.astype(object)]) + expected = pd.concat([df1.astype("object"), df2.astype("object")]) + self.assert_frame_equal(result, expected) + + result = pd.concat([df1["A"], df2["A"].astype(object)]) + expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")]) + self.assert_series_equal(result, expected) + + def test_concat_columns(self, data, na_value): + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": [1, 2, 3]}) + + expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]}) + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + # non-aligned + df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3]) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": [np.nan, 1, 2, 3], + } + ) + + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + # GH 20756 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": data[3:7]}) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": data[3:7], + } + ) + result = pd.concat([df1, df2], axis=1, copy=False) + self.assert_frame_equal(result, expected) + + def test_concat_with_reindex(self, data): + # GH-33027 + a = pd.DataFrame({"a": data[:5]}) + b = pd.DataFrame({"b": data[:5]}) + result = pd.concat([a, b], ignore_index=True) + expected = pd.DataFrame( + { + "a": data.take(list(range(5)) + ([-1] * 5), allow_fill=True), + "b": data.take(([-1] * 5) + list(range(5)), allow_fill=True), + } + ) + self.assert_frame_equal(result, expected) + + def test_align(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype)) + e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype)) + self.assert_series_equal(r1, e1) + self.assert_series_equal(r2, e2) + + def test_align_frame(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame( + {"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)} + ) + e2 = pd.DataFrame( + {"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)} + ) + self.assert_frame_equal(r1, e1) + self.assert_frame_equal(r2, e2) + + def test_align_series_frame(self, data, na_value): + # https://github.com/pandas-dev/pandas/issues/20576 + ser = pd.Series(data, name="a") + df = pd.DataFrame({"col": np.arange(len(ser) + 1)}) + r1, r2 = ser.align(df) + + e1 = pd.Series( + data._from_sequence(list(data) + [na_value], dtype=data.dtype), + name=ser.name, + ) + + self.assert_series_equal(r1, e1) + self.assert_frame_equal(r2, df) + + def test_set_frame_expand_regular_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + df["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(df, expected) + + def test_set_frame_expand_extension_with_regular(self, data): + df = pd.DataFrame({"A": data}) + df["B"] = [1] * len(data) + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(df, expected) + + def test_set_frame_overwrite_object(self, data): + # https://github.com/pandas-dev/pandas/issues/20555 + df = pd.DataFrame({"A": [1] * len(data)}, dtype=object) + df["A"] = data + assert df.dtypes["A"] == data.dtype + + def test_merge(self, data, na_value): + # GH-20743 + df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]}) + df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]}) + + res = pd.merge(df1, df2) + exp = pd.DataFrame( + { + "int1": [1, 1, 2], + "int2": [1, 2, 3], + "key": [0, 0, 1], + "ext": data._from_sequence( + [data[0], data[0], data[1]], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + res = pd.merge(df1, df2, how="outer") + exp = pd.DataFrame( + { + "int1": [1, 1, 2, 3, np.nan], + "int2": [1, 2, 3, np.nan, 4], + "key": [0, 0, 1, 2, 3], + "ext": data._from_sequence( + [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + def test_merge_on_extension_array(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b], dtype=data.dtype) + + df = pd.DataFrame({"key": key, "val": [1, 2]}) + result = pd.merge(df, df, on="key") + expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]}) + self.assert_frame_equal(result, expected) + + # order + result = pd.merge(df.iloc[[1, 0]], df, on="key") + expected = expected.iloc[[1, 0]].reset_index(drop=True) + self.assert_frame_equal(result, expected) + + def test_merge_on_extension_array_duplicates(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b, a], dtype=data.dtype) + df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + + result = pd.merge(df1, df2, on="key") + expected = pd.DataFrame( + { + "key": key.take([0, 0, 0, 0, 1]), + "val_x": [1, 1, 3, 3, 2], + "val_y": [1, 3, 1, 3, 2], + } + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + df = pd.DataFrame({"A": data[:5], "B": data[:5]}) + df.columns = columns + result = df.stack() + expected = df.astype(object).stack() + # we need a second astype(object), in case the constructor inferred + # object -> specialized, as is done for period. + expected = expected.astype(object) + + if isinstance(expected, pd.Series): + assert result.dtype == df.iloc[:, 0].dtype + else: + assert all(result.dtypes == df.iloc[:, 0].dtype) + + result = result.astype(object) + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + # Two levels, uniform. + pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), + # non-uniform + pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), + # three levels, non-uniform + pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), + pd.MultiIndex.from_tuples( + [ + ("A", "a", 1), + ("A", "b", 0), + ("A", "a", 0), + ("B", "a", 0), + ("B", "c", 1), + ] + ), + ], + ) + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, data, index, obj): + data = data[: len(index)] + if obj == "series": + ser = pd.Series(data, index=index) + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + + n = index.nlevels + levels = list(range(n)) + # [0, 1, 2] + # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + combinations = itertools.chain.from_iterable( + itertools.permutations(levels, i) for i in range(1, n) + ) + + for level in combinations: + result = ser.unstack(level=level) + assert all( + isinstance(result[col].array, type(data)) for col in result.columns + ) + + if obj == "series": + # We should get the same result with to_frame+unstack+droplevel + df = ser.to_frame() + + alt = df.unstack(level=level).droplevel(0, axis=1) + self.assert_frame_equal(result, alt) + + if obj == "series": + is_sparse = isinstance(ser.dtype, pd.SparseDtype) + else: + is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype) + warn = None if not is_sparse else FutureWarning + with tm.assert_produces_warning(warn, match="astype from Sparse"): + obj_ser = ser.astype(object) + + expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value) + if obj == "series" and not is_sparse: + # GH#34457 SparseArray.astype(object) gives Sparse[object] + # instead of np.dtype(object) + assert (expected.dtypes == object).all() + + with tm.assert_produces_warning(warn, match="astype from Sparse"): + result = result.astype(object) + + self.assert_frame_equal(result, expected) + + def test_ravel(self, data): + # as long as EA is 1D-only, ravel is a no-op + result = data.ravel() + assert type(result) == type(data) + + # Check that we have a view, not a copy + result[0] = result[1] + assert data[0] == data[1] + + def test_transpose(self, data): + result = data.transpose() + assert type(result) == type(data) + + # check we get a new object + assert result is not data + + # If we ever _did_ support 2D, shape should be reversed + assert result.shape == data.shape[::-1] + + # Check that we have a view, not a copy + result[0] = result[1] + assert data[0] == data[1] + + def test_transpose_frame(self, data): + df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) + result = df.T + expected = pd.DataFrame( + { + "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), + "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), + "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), + "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), + }, + index=["A", "B"], + ) + self.assert_frame_equal(result, expected) + self.assert_frame_equal(np.transpose(np.transpose(df)), df) + self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py new file mode 100644 index 00000000..83b1679b --- /dev/null +++ b/pandas/tests/extension/base/setitem.py @@ -0,0 +1,443 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseSetitemTests(BaseExtensionTests): + @pytest.fixture( + params=[ + lambda x: x.index, + lambda x: list(x.index), + lambda x: slice(None), + lambda x: slice(0, len(x)), + lambda x: range(len(x)), + lambda x: list(range(len(x))), + lambda x: np.ones(len(x), dtype=bool), + ], + ids=[ + "index", + "list[index]", + "null_slice", + "full_slice", + "range", + "list(range)", + "mask", + ], + ) + def full_indexer(self, request): + """ + Fixture for an indexer to pass to obj.loc to get/set the full length of the + object. + + In some cases, assumes that obj.index is the default RangeIndex. + """ + return request.param + + def test_setitem_scalar_series(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[0] = data[1] + assert data[0] == data[1] + + def test_setitem_sequence(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + + data[[0, 1]] = [data[1], data[0]] + assert data[0] == original[1] + assert data[1] == original[0] + + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + ser = pd.Series(data) + original = ser.copy() + value = [data[0]] + if as_array: + value = data._from_sequence(value) + + xpr = "cannot set using a {} indexer with a different length" + with pytest.raises(ValueError, match=xpr.format("list-like")): + ser[[0, 1]] = value + # Ensure no modifications made before the exception + self.assert_series_equal(ser, original) + + with pytest.raises(ValueError, match=xpr.format("slice")): + ser[slice(3)] = value + self.assert_series_equal(ser, original) + + def test_setitem_empty_indexer(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + data[np.array([], dtype=int)] = [] + self.assert_equal(data, original) + + def test_setitem_sequence_broadcasts(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[[0, 1]] = data[2] + assert data[0] == data[2] + assert data[1] == data[2] + + @pytest.mark.parametrize("setter", ["loc", "iloc"]) + def test_setitem_scalar(self, data, setter): + arr = pd.Series(data) + setter = getattr(arr, setter) + setter[0] = data[1] + assert arr[0] == data[1] + + def test_setitem_loc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.loc[0, "B"] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_loc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_loc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.iloc[0, 1] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_iloc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.iloc[10, 0] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.iloc[10, 1] = data[1] + assert df.loc[10, "B"] == data[1] + + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array", "boolean-array-na"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + arr = data[:5].copy() + expected = arr.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + arr[mask] = data[0] + self.assert_equal(expected, arr) + + def test_setitem_mask_raises(self, data, box_in_series): + # wrong length + mask = np.array([True, False]) + + if box_in_series: + data = pd.Series(data) + + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:3] = True + mask[3:5] = pd.NA + + if box_in_series: + data = pd.Series(data) + + data[mask] = data[0] + + assert (data[:3] == data[0]).all() + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[idx] = arr[0] + self.assert_equal(arr, expected) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param( + [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") + ), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + arr = data.copy() + + # TODO(xfail) this raises KeyError about labels not found (it tries label-based) + # for list of labels with Series + if box_in_series: + arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + arr[idx] = arr[0] + + @pytest.mark.parametrize("as_callable", [True, False]) + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_aligned(self, data, as_callable, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if as_callable: + mask2 = lambda x: mask + else: + mask2 = mask + + if setter: + # loc + target = getattr(ser, setter) + else: + # Series.__setitem__ + target = ser + + target[mask2] = data[5:7] + + ser[mask2] = data[5:7] + assert ser[0] == data[5] + assert ser[1] == data[6] + + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if setter: # loc + target = getattr(ser, setter) + else: # __setitem__ + target = ser + + target[mask] = data[10] + assert ser[0] == data[10] + assert ser[1] == data[10] + + def test_setitem_expand_columns(self, data): + df = pd.DataFrame({"A": data}) + result = df.copy() + result["B"] = 1 + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = 1 + self.assert_frame_equal(result, expected) + + # overwrite with new type + result["B"] = data + expected = pd.DataFrame({"A": data, "B": data}) + self.assert_frame_equal(result, expected) + + def test_setitem_expand_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + result = df.copy() + result["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = data + self.assert_frame_equal(result, expected) + + def test_setitem_frame_invalid_length(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + xpr = ( + rf"Length of values \({len(data[:5])}\) " + rf"does not match length of index \({len(df)}\)" + ) + with pytest.raises(ValueError, match=xpr): + df["B"] = data[:5] + + def test_setitem_tuple_index(self, data): + ser = pd.Series(data[:2], index=[(0, 0), (0, 1)]) + expected = pd.Series(data.take([1, 1]), index=ser.index) + ser[(0, 0)] = data[1] + self.assert_series_equal(ser, expected) + + def test_setitem_slice(self, data, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[:3] = data[0] + self.assert_equal(arr, expected) + + def test_setitem_loc_iloc_slice(self, data): + arr = data[:5].copy() + s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) + expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index) + + result = s.copy() + result.iloc[:3] = data[0] + self.assert_equal(result, expected) + + result = s.copy() + result.loc[:"c"] = data[0] + self.assert_equal(result, expected) + + def test_setitem_slice_mismatch_length_raises(self, data): + arr = data[:5] + with pytest.raises(ValueError): + arr[:1] = arr[:2] + + def test_setitem_slice_array(self, data): + arr = data[:5].copy() + arr[:5] = data[-5:] + self.assert_extension_array_equal(arr, data[-5:]) + + def test_setitem_scalar_key_sequence_raise(self, data): + arr = data[:5].copy() + with pytest.raises(ValueError): + arr[0] = arr[[0, 1]] + + def test_setitem_preserves_views(self, data): + # GH#28150 setitem shouldn't swap the underlying data + view1 = data.view() + view2 = data[:] + + data[0] = data[1] + assert view1[0] == data[1] + assert view2[0] == data[1] + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + + key = full_indexer(df) + result.loc[key, "data"] = df["data"] + + self.assert_frame_equal(result, expected) + + def test_setitem_with_expansion_row(self, data, na_value): + df = pd.DataFrame({"data": data[:1]}) + + df.loc[1, "data"] = data[1] + expected = pd.DataFrame({"data": data[:2]}) + self.assert_frame_equal(df, expected) + + # https://github.com/pandas-dev/pandas/issues/47284 + df.loc[2, "data"] = na_value + expected = pd.DataFrame( + {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)} + ) + self.assert_frame_equal(df, expected) + + def test_setitem_series(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=object, name="data") + + # because result has object dtype, the attempt to do setting inplace + # is successful, and object dtype is retained + key = full_indexer(ser) + result.loc[key] = ser + + expected = pd.Series( + data.astype(object), index=ser.index, name="data", dtype=object + ) + self.assert_series_equal(result, expected) + + def test_setitem_frame_2d_values(self, data): + # GH#44514 + df = pd.DataFrame({"A": data}) + + # These dtypes have non-broken implementations of _can_hold_element + has_can_hold_element = isinstance( + data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) + ) + + # Avoiding using_array_manager fixture + # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 + using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) + using_copy_on_write = pd.options.mode.copy_on_write + + blk_data = df._mgr.arrays[0] + + orig = df.copy() + + msg = "will attempt to set the values inplace instead" + warn = None + if has_can_hold_element and not isinstance(data.dtype, PandasDtype): + # PandasDtype excluded because it isn't *really* supported. + warn = DeprecationWarning + + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:] = df + self.assert_frame_equal(df, orig) + + df.iloc[:-1] = df.iloc[:-1] + self.assert_frame_equal(df, orig) + + if isinstance(data.dtype, DatetimeTZDtype): + # no warning bc df.values casts to object dtype + warn = None + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:] = df.values + self.assert_frame_equal(df, orig) + if not using_array_manager and not using_copy_on_write: + # GH#33457 Check that this setting occurred in-place + # FIXME(ArrayManager): this should work there too + assert df._mgr.arrays[0] is blk_data + + df.iloc[:-1] = df.values[:-1] + self.assert_frame_equal(df, orig) + + def test_delitem_series(self, data): + # GH#40763 + ser = pd.Series(data, name="data") + + taker = np.arange(len(ser)) + taker = np.delete(taker, 1) + + expected = ser[taker] + del ser[1] + self.assert_series_equal(ser, expected) + + def test_setitem_invalid(self, data, invalid_scalar): + msg = "" # messages vary by subclass, so we do not test it + with pytest.raises((ValueError, TypeError), match=msg): + data[0] = invalid_scalar + + with pytest.raises((ValueError, TypeError), match=msg): + data[:] = invalid_scalar diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py new file mode 100644 index 00000000..3827ba23 --- /dev/null +++ b/pandas/tests/extension/conftest.py @@ -0,0 +1,195 @@ +import operator + +import pytest + +from pandas import Series + + +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + raise NotImplementedError + + +@pytest.fixture +def data(): + """ + Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not be equal + """ + raise NotImplementedError + + +@pytest.fixture +def data_for_twos(): + """Length-100 array in which all the elements are two.""" + raise NotImplementedError + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + raise NotImplementedError + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + raise NotImplementedError + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + raise NotImplementedError + + +@pytest.fixture +def na_cmp(): + """ + Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By default, uses ``operator.is_`` + """ + return operator.is_ + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return None + + +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + raise NotImplementedError + + +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """Whether to box the data in a Series""" + return request.param + + +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + """ + return request.param + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + """ + return request.param + + +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + """ + return object.__new__(object) diff --git a/pandas/tests/extension/date/__init__.py b/pandas/tests/extension/date/__init__.py new file mode 100644 index 00000000..2a8c7e9f --- /dev/null +++ b/pandas/tests/extension/date/__init__.py @@ -0,0 +1,6 @@ +from pandas.tests.extension.date.array import ( + DateArray, + DateDtype, +) + +__all__ = ["DateArray", "DateDtype"] diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py new file mode 100644 index 00000000..eca935cd --- /dev/null +++ b/pandas/tests/extension/date/array.py @@ -0,0 +1,182 @@ +import datetime as dt +from typing import ( + Any, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import numpy as np + +from pandas._typing import ( + Dtype, + PositionalIndexer, +) + +from pandas.core.dtypes.dtypes import register_extension_dtype + +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, +) +from pandas.api.types import pandas_dtype + + +@register_extension_dtype +class DateDtype(ExtensionDtype): + @property + def type(self): + return dt.date + + @property + def name(self): + return "DateDtype" + + @classmethod + def construct_from_string(cls, string: str): + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + if string == cls.__name__: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + @classmethod + def construct_array_type(cls): + return DateArray + + @property + def na_value(self): + return dt.date.min + + def __repr__(self) -> str: + return self.name + + +class DateArray(ExtensionArray): + def __init__( + self, + dates: Union[ + dt.date, + Sequence[dt.date], + Tuple[np.ndarray, np.ndarray, np.ndarray], + np.ndarray, + ], + ) -> None: + if isinstance(dates, dt.date): + self._year = np.array([dates.year]) + self._month = np.array([dates.month]) + self._day = np.array([dates.year]) + return + + ldates = len(dates) + if isinstance(dates, list): + # pre-allocate the arrays since we know the size before hand + self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999) + self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31) + self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12) + # populate them + for i, (y, m, d) in enumerate( + map(lambda date: (date.year, date.month, date.day), dates) + ): + self._year[i] = y + self._month[i] = m + self._day[i] = d + + elif isinstance(dates, tuple): + # only support triples + if ldates != 3: + raise ValueError("only triples are valid") + # check if all elements have the same type + if any(map(lambda x: not isinstance(x, np.ndarray), dates)): + raise TypeError("invalid type") + ly, lm, ld = (len(cast(np.ndarray, d)) for d in dates) + if not ly == lm == ld: + raise ValueError( + f"tuple members must have the same length: {(ly, lm, ld)}" + ) + self._year = dates[0].astype(np.uint16) + self._month = dates[1].astype(np.uint8) + self._day = dates[2].astype(np.uint8) + + elif isinstance(dates, np.ndarray) and dates.dtype == "U10": + self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999) + self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31) + self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12) + + # error: "object_" object is not iterable + obj = np.char.split(dates, sep="-") + for (i,), (y, m, d) in np.ndenumerate(obj): # type: ignore[misc] + self._year[i] = int(y) + self._month[i] = int(m) + self._day[i] = int(d) + + else: + raise TypeError(f"{type(dates)} is not supported") + + @property + def dtype(self) -> ExtensionDtype: + return DateDtype() + + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + + if isinstance(dtype, DateDtype): + data = self.copy() if copy else self + else: + data = self.to_numpy(dtype=dtype, copy=copy, na_value=dt.date.min) + + return data + + @property + def nbytes(self) -> int: + return self._year.nbytes + self._month.nbytes + self._day.nbytes + + def __len__(self) -> int: + return len(self._year) # all 3 arrays are enforced to have the same length + + def __getitem__(self, item: PositionalIndexer): + if isinstance(item, int): + return dt.date(self._year[item], self._month[item], self._day[item]) + else: + raise NotImplementedError("only ints are supported as indexes") + + def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any): + if not isinstance(key, int): + raise NotImplementedError("only ints are supported as indexes") + + if not isinstance(value, dt.date): + raise TypeError("you can only set datetime.date types") + + self._year[key] = value.year + self._month[key] = value.month + self._day[key] = value.day + + def __repr__(self) -> str: + return f"DateArray{list(zip(self._year, self._month, self._day))}" + + def copy(self) -> "DateArray": + return DateArray((self._year.copy(), self._month.copy(), self._day.copy())) + + def isna(self) -> np.ndarray: + return np.logical_and( + np.logical_and( + self._year == dt.date.min.year, self._month == dt.date.min.month + ), + self._day == dt.date.min.day, + ) + + @classmethod + def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False): + if isinstance(scalars, dt.date): + pass + elif isinstance(scalars, DateArray): + pass + elif isinstance(scalars, np.ndarray): + scalars = scalars.astype("U10") # 10 chars for yyyy-mm-dd + return DateArray(scalars) diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py new file mode 100644 index 00000000..34727b43 --- /dev/null +++ b/pandas/tests/extension/decimal/__init__.py @@ -0,0 +1,8 @@ +from pandas.tests.extension.decimal.array import ( + DecimalArray, + DecimalDtype, + make_data, + to_decimal, +) + +__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py new file mode 100644 index 00000000..6eaa90d7 --- /dev/null +++ b/pandas/tests/extension/decimal/array.py @@ -0,0 +1,288 @@ +from __future__ import annotations + +import decimal +import numbers +import random +import sys + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_float, + pandas_dtype, +) + +import pandas as pd +from pandas.api.extensions import ( + no_default, + register_extension_dtype, +) +from pandas.api.types import ( + is_list_like, + is_scalar, +) +from pandas.core import arraylike +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) +from pandas.core.indexers import check_array_indexer + + +@register_extension_dtype +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = "decimal" + na_value = decimal.Decimal("NaN") + _metadata = ("context",) + + def __init__(self, context=None) -> None: + self.context = context or decimal.getcontext() + + def __repr__(self) -> str: + return f"DecimalDtype(context={self.context})" + + @classmethod + def construct_array_type(cls) -> type_t[DecimalArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray + + @property + def _is_numeric(self) -> bool: + return True + + +class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray): + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False, context=None) -> None: + for i, val in enumerate(values): + if is_float(val): + if np.isnan(val): + values[i] = DecimalDtype.na_value + else: + values[i] = DecimalDtype.type(val) + elif not isinstance(val, decimal.Decimal): + raise TypeError("All values must be of type " + str(decimal.Decimal)) + values = np.asarray(values, dtype=object) + + self._data = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + self._dtype = DecimalDtype(context) + + @property + def dtype(self): + return self._dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def to_numpy( + self, + dtype=None, + copy: bool = False, + na_value: object = no_default, + decimals=None, + ) -> np.ndarray: + result = np.asarray(self, dtype=dtype) + if decimals is not None: + result = np.asarray([round(x, decimals) for x in result]) + return result + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # + if not all( + isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. test_array_ufunc_series_scalar_other + return result + + if "out" in kwargs: + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if ufunc.nout > 1: + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self._data[item] + else: + # array, slice. + item = pd.api.indexers.check_array_indexer(self, item) + return type(self)(self._data[item]) + + def take(self, indexer, allow_fill=False, fill_value=None): + from pandas.api.extensions import take + + data = self._data + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result) + + def copy(self): + return type(self)(self._data.copy(), dtype=self.dtype) + + def astype(self, dtype, copy=True): + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + dtype = pandas_dtype(dtype) + if isinstance(dtype, type(self.dtype)): + return type(self)(self._data, copy=copy, context=dtype.context) + + return super().astype(dtype, copy=copy) + + def __setitem__(self, key, value): + if is_list_like(value): + if is_scalar(key): + raise ValueError("setting an array element with a sequence.") + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + + key = check_array_indexer(self, key) + self._data[key] = value + + def __len__(self) -> int: + return len(self._data) + + def __contains__(self, item) -> bool | np.bool_: + if not isinstance(item, decimal.Decimal): + return False + elif item.is_nan(): + return self.isna().any() + else: + return super().__contains__(item) + + @property + def nbytes(self) -> int: + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self._data], dtype=bool) + + @property + def _na_value(self): + return decimal.Decimal("NaN") + + def _formatter(self, boxed=False): + if boxed: + return "Decimal: {}".format + return repr + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x._data for x in to_concat])) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + + if skipna: + # If we don't have any NAs, we can ignore skipna + if self.isna().any(): + other = self[~self.isna()] + return other._reduce(name, **kwargs) + + if name == "sum" and len(self) == 0: + # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy + return decimal.Decimal(0) + + try: + op = getattr(self.data, name) + except AttributeError as err: + raise NotImplementedError( + f"decimal does not support the {name} operation" + ) from err + return op(axis=0) + + def _cmp_method(self, other, op): + # For use with OpsMixin + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: + # Assume it's an object + ovalues = [param] * len(self) + return ovalues + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + return np.asarray(res, dtype=bool) + + def value_counts(self, dropna: bool = True): + from pandas.core.algorithms import value_counts + + return value_counts(self.to_numpy(), dropna=dropna) + + +def to_decimal(values, context=None): + return DecimalArray([decimal.Decimal(x) for x in values], context=context) + + +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] + + +DecimalArray._add_arithmetic_ops() diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py new file mode 100644 index 00000000..c2e42ae2 --- /dev/null +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -0,0 +1,481 @@ +import decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import infer_dtype +from pandas.tests.extension import base +from pandas.tests.extension.decimal.array import ( + DecimalArray, + DecimalDtype, + make_data, + to_decimal, +) + + +@pytest.fixture +def dtype(): + return DecimalDtype() + + +@pytest.fixture +def data(): + return DecimalArray(make_data()) + + +@pytest.fixture +def data_for_twos(): + return DecimalArray([decimal.Decimal(2) for _ in range(100)]) + + +@pytest.fixture +def data_missing(): + return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)]) + + +@pytest.fixture +def data_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def data_missing_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x.is_nan() and y.is_nan() + + +@pytest.fixture +def na_value(): + return decimal.Decimal("NaN") + + +@pytest.fixture +def data_for_grouping(): + b = decimal.Decimal("1.0") + a = decimal.Decimal("0.0") + c = decimal.Decimal("2.0") + na = decimal.Decimal("NaN") + return DecimalArray([b, b, na, na, a, a, b, c]) + + +class TestDtype(base.BaseDtypeTests): + def test_hashable(self, dtype): + pass + + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # here overriding base test to ensure we fall back to return + # "unknown-array" for an EA pandas doesn't know + assert infer_dtype(data, skipna=skipna) == "unknown-array" + assert infer_dtype(data_missing, skipna=skipna) == "unknown-array" + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + def test_take_na_value_other_decimal(self): + arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0")) + expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")]) + self.assert_extension_array_equal(result, expected) + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class Reduce: + def check_reduce(self, s, op_name, skipna): + + if op_name in ["median", "skew", "kurt"]: + msg = r"decimal does not support the .* operation" + with pytest.raises(NotImplementedError, match=msg): + getattr(s, op_name)(skipna=skipna) + + else: + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(np.asarray(s), op_name)() + tm.assert_almost_equal(result, expected) + + +class TestNumericReduce(Reduce, base.BaseNumericReduceTests): + pass + + +class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna, request): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + vcs = pd.Series(all_data).value_counts(dropna=dropna) + vcs_ex = pd.Series(other).value_counts(dropna=dropna) + + with decimal.localcontext() as ctx: + # avoid raising when comparing Decimal("NAN") < Decimal(2) + ctx.traps[decimal.InvalidOperation] = False + + result = vcs.sort_index() + expected = vcs_ex.sort_index() + + tm.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + def test_groupby_agg_extension(self, data_for_grouping): + super().test_groupby_agg_extension(data_for_grouping) + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestPrinting(base.BasePrintingTests): + def test_series_repr(self, data): + # Overriding this base test to explicitly test that + # the custom _formatter is used + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + assert "Decimal: " in repr(ser) + + +@pytest.mark.xfail( + reason=( + "DecimalArray constructor raises bc _from_sequence wants Decimals, not ints." + "Easy to fix, just need to do it." + ), + raises=TypeError, +) +def test_series_constructor_coerce_data_to_extension_dtype_raises(): + xpr = ( + "Cannot cast data to extension dtype 'decimal'. Pass the " + "extension array directly." + ) + with pytest.raises(ValueError, match=xpr): + pd.Series([0, 1, 2], dtype=DecimalDtype()) + + +def test_series_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.Series(arr, dtype=DecimalDtype()) + expected = pd.Series(arr) + tm.assert_series_equal(result, expected) + + result = pd.Series(arr, dtype="int64") + expected = pd.Series([10]) + tm.assert_series_equal(result, expected) + + +def test_dataframe_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + + result = pd.DataFrame({"A": arr}, dtype=DecimalDtype()) + expected = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, expected) + + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.DataFrame({"A": arr}, dtype="int64") + expected = pd.DataFrame({"A": [10]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame", [True, False]) +def test_astype_dispatches(frame): + # This is a dtype-specific test that ensures Series[decimal].astype + # gets all the way through to ExtensionArray.astype + # Designing a reliable smoke test that works for arbitrary data types + # is difficult. + data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a") + ctx = decimal.Context() + ctx.prec = 5 + + if frame: + data = data.to_frame() + + result = data.astype(DecimalDtype(ctx)) + + if frame: + result = result["a"] + + assert result.dtype.context.prec == ctx.prec + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + op_name = all_arithmetic_operators + s = pd.Series(data) + + context = decimal.getcontext() + divbyzerotrap = context.traps[decimal.DivisionByZero] + invalidoptrap = context.traps[decimal.InvalidOperation] + context.traps[decimal.DivisionByZero] = 0 + context.traps[decimal.InvalidOperation] = 0 + + # Decimal supports ops with int, but not float + other = pd.Series([int(d * 100) for d in data]) + self.check_opname(s, op_name, other) + + if "mod" not in op_name: + self.check_opname(s, op_name, s * 2) + + self.check_opname(s, op_name, 0) + self.check_opname(s, op_name, 5) + context.traps[decimal.DivisionByZero] = divbyzerotrap + context.traps[decimal.InvalidOperation] = invalidoptrap + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + # We implement divmod + super()._check_divmod_op(s, op, other, exc=None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def test_compare_scalar(self, data, comparison_op): + s = pd.Series(data) + self._compare_other(s, data, comparison_op, 0.5) + + def test_compare_array(self, data, comparison_op): + s = pd.Series(data) + + alter = np.random.choice([-1, 0, 1], len(data)) + # Randomly double, halve or keep same value + other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] + self._compare_other(s, data, comparison_op, other) + + +class DecimalArrayWithoutFromSequence(DecimalArray): + """Helper class for testing error handling in _from_sequence.""" + + def _from_sequence(cls, scalars, dtype=None, copy=False): + raise KeyError("For the test") + + +class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence): + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False) + + +DecimalArrayWithoutCoercion._add_arithmetic_ops() + + +def test_combine_from_sequence_raises(monkeypatch): + # https://github.com/pandas-dev/pandas/issues/22850 + cls = DecimalArrayWithoutFromSequence + + @classmethod + def construct_array_type(cls): + return DecimalArrayWithoutFromSequence + + monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type) + + arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + ser = pd.Series(arr) + result = ser.combine(ser, operator.add) + + # note: object dtype + expected = pd.Series( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion] +) +def test_scalar_ops_from_sequence_raises(class_): + # op(EA, EA) should return an EA, or an ndarray if it's not possible + # to return an EA with the return values. + arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr + arr + expected = np.array( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "reverse, expected_div, expected_mod", + [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])], +) +def test_divmod_array(reverse, expected_div, expected_mod): + # https://github.com/pandas-dev/pandas/issues/22930 + arr = to_decimal([1, 2, 3, 4]) + if reverse: + div, mod = divmod(2, arr) + else: + div, mod = divmod(arr, 2) + expected_div = to_decimal(expected_div) + expected_mod = to_decimal(expected_mod) + + tm.assert_extension_array_equal(div, expected_div) + tm.assert_extension_array_equal(mod, expected_mod) + + +def test_ufunc_fallback(data): + a = data[:5] + s = pd.Series(a, index=range(3, 8)) + result = np.abs(s) + expected = pd.Series(np.abs(a), index=range(3, 8)) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc(): + a = to_decimal([1, 2, 3]) + result = np.exp(a) + expected = to_decimal(np.exp(a._data)) + tm.assert_extension_array_equal(result, expected) + + +def test_array_ufunc_series(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.exp(s) + expected = pd.Series(to_decimal(np.exp(a._data))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_scalar_other(): + # check _HANDLED_TYPES + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.add(s, decimal.Decimal(1)) + expected = pd.Series(np.add(a, decimal.Decimal(1))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_defer(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + + expected = pd.Series(to_decimal([2, 4, 6])) + r1 = np.add(s, a) + r2 = np.add(a, s) + + tm.assert_series_equal(r1, expected) + tm.assert_series_equal(r2, expected) + + +def test_groupby_agg(): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + data = make_data()[:5] + df = pd.DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + # single key, selected column + expected = pd.Series(to_decimal([data[0], data[3]])) + result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple keys, selected column + expected = pd.Series( + to_decimal([data[0], data[1], data[3]]), + index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]), + ) + result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple columns + expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])}) + result = df.groupby("id1").agg(lambda x: x.iloc[0]) + tm.assert_frame_equal(result, expected, check_names=False) + + +def test_groupby_agg_ea_method(monkeypatch): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + def DecimalArray__my_sum(self): + return np.sum(np.array(self)) + + monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False) + + data = make_data()[:5] + df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)}) + expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]])) + + result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + s = pd.Series(DecimalArray(data)) + result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + + +def test_indexing_no_materialize(monkeypatch): + # See https://github.com/pandas-dev/pandas/issues/29708 + # Ensure that indexing operations do not materialize (convert to a numpy + # array) the ExtensionArray unnecessary + + def DecimalArray__array__(self, dtype=None): + raise Exception("tried to convert a DecimalArray to a numpy array") + + monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False) + + data = make_data() + s = pd.Series(DecimalArray(data)) + df = pd.DataFrame({"a": s, "b": range(len(s))}) + + # ensure the following operations do not raise an error + s[s > 0.5] + df[s > 0.5] + s.at[0] + df.at[0, "a"] + + +def test_to_numpy_keyword(): + # test the extra keyword + values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")] + expected = np.array( + [decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object" + ) + a = pd.array(values, dtype="decimal") + result = a.to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(a).to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py new file mode 100644 index 00000000..7ebfd54a --- /dev/null +++ b/pandas/tests/extension/json/__init__.py @@ -0,0 +1,7 @@ +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) + +__all__ = ["JSONArray", "JSONDtype", "make_data"] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py new file mode 100644 index 00000000..125a0aee --- /dev/null +++ b/pandas/tests/extension/json/array.py @@ -0,0 +1,244 @@ +""" +Test extension array for storing nested data in a pandas container. + +The JSONArray stores lists of dictionaries. The storage mechanism is a list, +not an ndarray. + +Note +---- +We currently store lists of UserDicts. Pandas has a few places +internally that specifically check for dicts, and does non-scalar things +in that case. We *want* the dictionaries to be treated as scalars, so we +hack around pandas by using UserDicts. +""" +from __future__ import annotations + +from collections import ( + UserDict, + abc, +) +import itertools +import numbers +import random +import string +import sys +from typing import ( + Any, + Mapping, +) + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_list_like, + pandas_dtype, +) + +import pandas as pd +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, +) +from pandas.core.indexers import unpack_tuple_and_ellipses + + +class JSONDtype(ExtensionDtype): + type = abc.Mapping + name = "json" + na_value: Mapping[str, Any] = UserDict() + + @classmethod + def construct_array_type(cls) -> type_t[JSONArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return JSONArray + + +class JSONArray(ExtensionArray): + dtype = JSONDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False) -> None: + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self._data = self.data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_factorized(cls, values, original): + return cls([UserDict(x) for x in values if x != ()]) + + def __getitem__(self, item): + if isinstance(item, tuple): + item = unpack_tuple_and_ellipses(item) + + if isinstance(item, numbers.Integral): + return self.data[item] + elif isinstance(item, slice) and item == slice(None): + # Make sure we get a view + return type(self)(self.data) + elif isinstance(item, slice): + # slice + return type(self)(self.data[item]) + elif not is_list_like(item): + # e.g. "foo" or 2.5 + # exception message copied from numpy + raise IndexError( + r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " + r"(`None`) and integer or boolean arrays are valid indices" + ) + else: + item = pd.api.indexers.check_array_indexer(self, item) + if is_bool_dtype(item.dtype): + return self._from_sequence([x for x, m in zip(self, item) if m]) + # integer + return type(self)([self.data[i] for i in item]) + + def __setitem__(self, key, value): + if isinstance(key, numbers.Integral): + self.data[key] = value + else: + if not isinstance(value, (type(self), abc.Sequence)): + # broadcast value + value = itertools.cycle([value]) + + if isinstance(key, np.ndarray) and key.dtype == "bool": + # masking + for i, (k, v) in enumerate(zip(key, value)): + if k: + assert isinstance(v, self.dtype.type) + self.data[i] = v + else: + for k, v in zip(key, value): + assert isinstance(v, self.dtype.type) + self.data[k] = v + + def __len__(self) -> int: + return len(self.data) + + def __eq__(self, other): + return NotImplemented + + def __ne__(self, other): + return NotImplemented + + def __array__(self, dtype=None): + if dtype is None: + dtype = object + if dtype == object: + # on py38 builds it looks like numpy is inferring to a non-1D array + return construct_1d_object_array_from_listlike(list(self)) + return np.asarray(self.data, dtype=dtype) + + @property + def nbytes(self) -> int: + return sys.getsizeof(self.data) + + def isna(self): + return np.array([x == self.dtype.na_value for x in self.data], dtype=bool) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError as err: + raise IndexError(msg) from err + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError as err: + raise IndexError(msg) from err + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + # NumPy has issues when all the dicts are the same length. + # np.array([UserDict(...), UserDict(...)]) fails, + # but np.array([{...}, {...}]) works, so cast. + from pandas.core.arrays.string_ import StringDtype + + dtype = pandas_dtype(dtype) + # needed to add this check for the Series constructor + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + elif isinstance(dtype, StringDtype): + value = self.astype(str) # numpy doesn'y like nested dicts + return dtype.construct_array_type()._from_sequence(value, copy=False) + + return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + + def unique(self): + # Parent method doesn't work since np.array will try to infer + # a 2-dim object. + return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}]) + + @classmethod + def _concat_same_type(cls, to_concat): + data = list(itertools.chain.from_iterable(x.data for x in to_concat)) + return cls(data) + + def _values_for_factorize(self): + frozen = self._values_for_argsort() + if len(frozen) == 0: + # factorize_array expects 1-d array, this is a len-0 2-d array. + frozen = frozen.ravel() + return frozen, () + + def _values_for_argsort(self): + # Bypass NumPy's shape inference to get a (N,) array of tuples. + frozen = [tuple(x.items()) for x in self] + return construct_1d_object_array_from_listlike(frozen) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [ + UserDict( + [ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10)) + ] + ) + for _ in range(100) + ] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py new file mode 100644 index 00000000..f3129a89 --- /dev/null +++ b/pandas/tests/extension/json/test_json.py @@ -0,0 +1,390 @@ +import collections +import operator +import sys + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) + + +@pytest.fixture +def dtype(): + return JSONDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + data = make_data() + + # Why the while loop? NumPy is unable to construct an ndarray from + # equal-length ndarrays. Many of our operations involve coercing the + # EA to an ndarray of objects. To avoid random test failures, we ensure + # that our data is coercible to an ndarray. Several tests deal with only + # the first two elements, so that's what we'll check. + + while len(data[0]) == len(data[1]): + data = make_data() + + return JSONArray(data) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return JSONArray([{}, {"a": 10}]) + + +@pytest.fixture +def data_for_sorting(): + return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}]) + + +@pytest.fixture +def data_missing_for_sorting(): + return JSONArray([{"b": 1}, {}, {"a": 4}]) + + +@pytest.fixture +def na_value(dtype): + return dtype.na_value + + +@pytest.fixture +def na_cmp(): + return operator.eq + + +@pytest.fixture +def data_for_grouping(): + return JSONArray( + [ + {"b": 1}, + {"b": 1}, + {}, + {}, + {"a": 0, "c": 2}, + {"a": 0, "c": 2}, + {"b": 1}, + {"c": 2}, + ] + ) + + +class BaseJSON: + # NumPy doesn't handle an array of equal-length UserDicts. + # The default assert_series_equal eventually does a + # Series.values, which raises. We work around it by + # converting the UserDicts to dicts. + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + if left.dtype.name == "json": + assert left.dtype == right.dtype + left = pd.Series( + JSONArray(left.values.astype(object)), index=left.index, name=left.name + ) + right = pd.Series( + JSONArray(right.values.astype(object)), + index=right.index, + name=right.name, + ) + tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj=f"{obj_type}.columns", + ) + + jsons = (left.dtypes == "json").index + + for col in jsons: + cls.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=jsons) + right = right.drop(columns=jsons) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseJSON, base.BaseDtypeTests): + pass + + +class TestInterface(BaseJSON, base.BaseInterfaceTests): + def test_custom_asserts(self): + # This would always trigger the KeyError from trying to put + # an array of equal-length UserDicts inside an ndarray. + data = JSONArray( + [ + collections.UserDict({"a": 1}), + collections.UserDict({"b": 2}), + collections.UserDict({"c": 3}), + ] + ) + a = pd.Series(data) + self.assert_series_equal(a, a) + self.assert_frame_equal(a.to_frame(), a.to_frame()) + + b = pd.Series(data.take([0, 0, 1])) + msg = r"ExtensionArray are different" + with pytest.raises(AssertionError, match=msg): + self.assert_series_equal(a, b) + + with pytest.raises(AssertionError, match=msg): + self.assert_frame_equal(a.to_frame(), b.to_frame()) + + @pytest.mark.xfail( + reason="comparison method not implemented for JSONArray (GH-37867)" + ) + def test_contains(self, data): + # GH-37867 + super().test_contains(data) + + +class TestConstructors(BaseJSON, base.BaseConstructorsTests): + @pytest.mark.xfail(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + super(self).test_from_dtype(data) + + @pytest.mark.xfail(reason="RecursionError, GH-33900") + def test_series_constructor_no_data_with_index(self, dtype, na_value): + # RecursionError: maximum recursion depth exceeded in comparison + rec_limit = sys.getrecursionlimit() + try: + # Limit to avoid stack overflow on Windows CI + sys.setrecursionlimit(100) + super().test_series_constructor_no_data_with_index(dtype, na_value) + finally: + sys.setrecursionlimit(rec_limit) + + @pytest.mark.xfail(reason="RecursionError, GH-33900") + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): + # RecursionError: maximum recursion depth exceeded in comparison + rec_limit = sys.getrecursionlimit() + try: + # Limit to avoid stack overflow on Windows CI + sys.setrecursionlimit(100) + super().test_series_constructor_scalar_na_with_index(dtype, na_value) + finally: + sys.setrecursionlimit(rec_limit) + + @pytest.mark.xfail(reason="collection as scalar, GH-33901") + def test_series_constructor_scalar_with_index(self, data, dtype): + # TypeError: All values must be of type + rec_limit = sys.getrecursionlimit() + try: + # Limit to avoid stack overflow on Windows CI + sys.setrecursionlimit(100) + super().test_series_constructor_scalar_with_index(data, dtype) + finally: + sys.setrecursionlimit(rec_limit) + + +class TestReshaping(BaseJSON, base.BaseReshapingTests): + @pytest.mark.xfail(reason="Different definitions of NA") + def test_stack(self): + """ + The test does .astype(object).stack(). If we happen to have + any missing values in `data`, then we'll end up with different + rows since we consider `{}` NA, but `.astype(object)` doesn't. + """ + super().test_stack() + + @pytest.mark.xfail(reason="dict for NA") + def test_unstack(self, data, index): + # The base test has NaN for the expected NA value. + # this matches otherwise + return super().test_unstack(data, index) + + +class TestGetitem(BaseJSON, base.BaseGetitemTests): + pass + + +class TestIndex(BaseJSON, base.BaseIndexTests): + pass + + +class TestMissing(BaseJSON, base.BaseMissingTests): + @pytest.mark.xfail(reason="Setting a dict as a scalar") + def test_fillna_series(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + super().test_fillna_series() + + @pytest.mark.xfail(reason="Setting a dict as a scalar") + def test_fillna_frame(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + super().test_fillna_frame() + + +unhashable = pytest.mark.xfail(reason="Unhashable") + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(BaseJSON, base.BaseMethodsTests): + @unhashable + def test_value_counts(self, all_data, dropna): + super().test_value_counts(all_data, dropna) + + @unhashable + def test_value_counts_with_normalize(self, data): + super().test_value_counts_with_normalize(data) + + @unhashable + def test_sort_values_frame(self): + # TODO (EA.factorize): see if _values_for_factorize allows this. + super().test_sort_values_frame() + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key): + super().test_sort_values(data_for_sorting, ascending, sort_by_key) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + super().test_sort_values_missing( + data_missing_for_sorting, ascending, sort_by_key + ) + + @pytest.mark.xfail(reason="combine for JSONArray not supported") + def test_combine_le(self, data_repeated): + super().test_combine_le(data_repeated) + + @pytest.mark.xfail(reason="combine for JSONArray not supported") + def test_combine_add(self, data_repeated): + super().test_combine_add(data_repeated) + + @pytest.mark.xfail( + reason="combine for JSONArray not supported - " + "may pass depending on random data", + strict=False, + ) + def test_combine_first(self, data): + super().test_combine_first(data) + + @unhashable + def test_hash_pandas_object_works(self, data, kind): + super().test_hash_pandas_object_works(data, kind) + + @pytest.mark.xfail(reason="broadcasting error") + def test_where_series(self, data, na_value): + # Fails with + # *** ValueError: operands could not be broadcast together + # with shapes (4,) (4,) (0,) + super().test_where_series(data, na_value) + + @pytest.mark.xfail(reason="Can't compare dicts.") + def test_searchsorted(self, data_for_sorting): + super().test_searchsorted(data_for_sorting) + + @pytest.mark.xfail(reason="Can't compare dicts.") + def test_equals(self, data, na_value, as_series): + super().test_equals(data, na_value, as_series) + + +class TestCasting(BaseJSON, base.BaseCastingTests): + @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)") + def test_astype_str(self): + """This currently fails in NumPy on np.array(self, dtype=str) with + + *** ValueError: setting an array element with a sequence + """ + super().test_astype_str() + + +# We intentionally don't run base.BaseSetitemTests because pandas' +# internals has trouble setting sequences of values into scalar positions. + + +class TestGroupby(BaseJSON, base.BaseGroupbyTests): + @unhashable + def test_groupby_extension_transform(self): + """ + This currently fails in Series.name.setter, since the + name must be hashable, but the value is a dictionary. + I think this is what we want, i.e. `.name` should be the original + values, and not the values for factorization. + """ + super().test_groupby_extension_transform() + + @unhashable + def test_groupby_extension_apply(self): + """ + This fails in Index._do_unique_check with + + > hash(val) + E TypeError: unhashable type: 'UserDict' with + + I suspect that once we support Index[ExtensionArray], + we'll be able to dispatch unique. + """ + super().test_groupby_extension_apply() + + @unhashable + def test_groupby_extension_agg(self): + """ + This fails when we get to tm.assert_series_equal when left.index + contains dictionaries, which are not hashable. + """ + super().test_groupby_extension_agg() + + @unhashable + def test_groupby_extension_no_sort(self): + """ + This fails when we get to tm.assert_series_equal when left.index + contains dictionaries, which are not hashable. + """ + super().test_groupby_extension_no_sort() + + @pytest.mark.xfail(reason="GH#39098: Converts agg result to object") + def test_groupby_agg_extension(self, data_for_grouping): + super().test_groupby_agg_extension(data_for_grouping) + + +class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + if len(data[0]) != 1: + mark = pytest.mark.xfail(reason="raises in coercing to Series") + request.node.add_marker(mark) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="unsupported"): + ser + data + + @pytest.mark.xfail(reason="not implemented") + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + super().test_divmod_series_array() + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): + pass + + +class TestPrinting(BaseJSON, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/list/__init__.py b/pandas/tests/extension/list/__init__.py new file mode 100644 index 00000000..0f3f2f35 --- /dev/null +++ b/pandas/tests/extension/list/__init__.py @@ -0,0 +1,7 @@ +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) + +__all__ = ["ListArray", "ListDtype", "make_data"] diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py new file mode 100644 index 00000000..f281a0f8 --- /dev/null +++ b/pandas/tests/extension/list/array.py @@ -0,0 +1,132 @@ +""" +Test extension array for storing nested data in a pandas container. + +The ListArray stores an ndarray of lists. +""" +from __future__ import annotations + +import numbers +import random +import string + +import numpy as np + +from pandas._typing import type_t + +from pandas.core.dtypes.base import ExtensionDtype + +import pandas as pd +from pandas.api.types import ( + is_object_dtype, + is_string_dtype, +) +from pandas.core.arrays import ExtensionArray + + +class ListDtype(ExtensionDtype): + type = list + name = "list" + na_value = np.nan + + @classmethod + def construct_array_type(cls) -> type_t[ListArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ListArray + + +class ListArray(ExtensionArray): + dtype = ListDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False) -> None: + if not isinstance(values, np.ndarray): + raise TypeError("Need to pass a numpy array as values") + for val in values: + if not isinstance(val, self.dtype.type) and not pd.isna(val): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + data = np.empty(len(scalars), dtype=object) + data[:] = scalars + return cls(data) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + else: + # slice, list-like, mask + return type(self)(self.data[item]) + + def __len__(self) -> int: + return len(self.data) + + def isna(self): + return np.array( + [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool + ) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError as err: + raise IndexError(msg) from err + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError as err: + raise IndexError(msg) from err + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # numpy has problems with astype(str) for nested elements + return np.array([str(x) for x in self.data], dtype=dtype) + return np.array(self.data, dtype=dtype, copy=copy) + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x.data for x in to_concat]) + return cls(data) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + data = np.empty(100, dtype=object) + data[:] = [ + [random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))] + for _ in range(100) + ] + return data diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py new file mode 100644 index 00000000..295f0867 --- /dev/null +++ b/pandas/tests/extension/list/test_list.py @@ -0,0 +1,33 @@ +import pytest + +import pandas as pd +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) + + +@pytest.fixture +def dtype(): + return ListDtype() + + +@pytest.fixture +def data(): + """Length-100 ListArray for semantics test.""" + data = make_data() + + while len(data[0]) == len(data[1]): + data = make_data() + + return ListArray(data) + + +def test_to_csv(data): + # https://github.com/pandas-dev/pandas/issues/28840 + # array with list-likes fail when doing astype(str) on the numpy array + # which was done in to_native_types + df = pd.DataFrame({"a": data}) + res = df.to_csv() + assert str(data[0]) in res diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py new file mode 100644 index 00000000..b7ddb1f2 --- /dev/null +++ b/pandas/tests/extension/test_arrow.py @@ -0,0 +1,1709 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. +""" +from datetime import ( + date, + datetime, + time, + timedelta, +) + +import numpy as np +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_windows, + pa_version_under2p0, + pa_version_under3p0, + pa_version_under4p0, + pa_version_under6p0, + pa_version_under7p0, + pa_version_under8p0, + pa_version_under9p0, +) +from pandas.errors import PerformanceWarning + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base + +pa = pytest.importorskip("pyarrow", minversion="1.0.1") + +from pandas.core.arrays.arrow.array import ArrowExtensionArray + +from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip + + +@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str) +def dtype(request): + return ArrowDtype(pyarrow_dtype=request.param) + + +@pytest.fixture +def data(dtype): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + data = [True, False] * 4 + [None] + [True, False] * 44 + [None] + [True, False] + elif pa.types.is_floating(pa_dtype): + data = [1.0, 0.0] * 4 + [None] + [-2.0, -1.0] * 44 + [None] + [0.5, 99.5] + elif pa.types.is_signed_integer(pa_dtype): + data = [1, 0] * 4 + [None] + [-2, -1] * 44 + [None] + [1, 99] + elif pa.types.is_unsigned_integer(pa_dtype): + data = [1, 0] * 4 + [None] + [2, 1] * 44 + [None] + [1, 99] + elif pa.types.is_date(pa_dtype): + data = ( + [date(2022, 1, 1), date(1999, 12, 31)] * 4 + + [None] + + [date(2022, 1, 1), date(2022, 1, 1)] * 44 + + [None] + + [date(1999, 12, 31), date(1999, 12, 31)] + ) + elif pa.types.is_timestamp(pa_dtype): + data = ( + [datetime(2020, 1, 1, 1, 1, 1, 1), datetime(1999, 1, 1, 1, 1, 1, 1)] * 4 + + [None] + + [datetime(2020, 1, 1, 1), datetime(1999, 1, 1, 1)] * 44 + + [None] + + [datetime(2020, 1, 1), datetime(1999, 1, 1)] + ) + elif pa.types.is_duration(pa_dtype): + data = ( + [timedelta(1), timedelta(1, 1)] * 4 + + [None] + + [timedelta(-1), timedelta(0)] * 44 + + [None] + + [timedelta(-10), timedelta(10)] + ) + elif pa.types.is_time(pa_dtype): + data = ( + [time(12, 0), time(0, 12)] * 4 + + [None] + + [time(0, 0), time(1, 1)] * 44 + + [None] + + [time(0, 5), time(5, 0)] + ) + else: + raise NotImplementedError + return pd.array(data, dtype=dtype) + + +@pytest.fixture +def data_missing(data): + """Length-2 array with [NA, Valid]""" + return type(data)._from_sequence([None, data[0]]) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture returning 'data' or 'data_missing' integer arrays. + + Used to test dtype conversion with and without missing values. + """ + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + A = False + B = True + C = True + elif pa.types.is_floating(pa_dtype): + A = -1.1 + B = 0.0 + C = 1.1 + elif pa.types.is_signed_integer(pa_dtype): + A = -1 + B = 0 + C = 1 + elif pa.types.is_unsigned_integer(pa_dtype): + A = 0 + B = 1 + C = 10 + elif pa.types.is_date(pa_dtype): + A = date(1999, 12, 31) + B = date(2010, 1, 1) + C = date(2022, 1, 1) + elif pa.types.is_timestamp(pa_dtype): + A = datetime(1999, 1, 1, 1, 1, 1, 1) + B = datetime(2020, 1, 1) + C = datetime(2020, 1, 1, 1) + elif pa.types.is_duration(pa_dtype): + A = timedelta(-1) + B = timedelta(0) + C = timedelta(1, 4) + elif pa.types.is_time(pa_dtype): + A = time(0, 0) + B = time(0, 12) + C = time(12, 12) + else: + raise NotImplementedError + return pd.array([B, B, None, None, A, A, B, C], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(data_for_grouping): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + return type(data_for_grouping)._from_sequence( + [data_for_grouping[0], data_for_grouping[7], data_for_grouping[4]] + ) + + +@pytest.fixture +def data_missing_for_sorting(data_for_grouping): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + return type(data_for_grouping)._from_sequence( + [data_for_grouping[0], data_for_grouping[2], data_for_grouping[4]] + ) + + +@pytest.fixture +def data_for_twos(data): + """Length-100 array in which all the elements are two.""" + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): + return pd.array([2] * 100, dtype=data.dtype) + # tests will be xfailed where 2 is not a valid scalar for pa_dtype + return data + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return pd.NA + + +class TestBaseCasting(base.BaseCastingTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + def test_from_dtype(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz: + if pa_version_under2p0: + request.node.add_marker( + pytest.mark.xfail( + reason=f"timestamp data with tz={pa_dtype.tz} " + "converted to integer when pyarrow < 2.0", + ) + ) + else: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_from_dtype(data) + + def test_from_sequence_pa_array(self, data, request): + # https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784 + # data._data = pa.ChunkedArray + if pa_version_under3p0: + request.node.add_marker( + pytest.mark.xfail( + reason="ChunkedArray has no attribute combine_chunks", + ) + ) + result = type(data)._from_sequence(data._data) + tm.assert_extension_array_equal(result, data) + assert isinstance(result._data, pa.ChunkedArray) + + result = type(data)._from_sequence(data._data.combine_chunks()) + tm.assert_extension_array_equal(result, data) + assert isinstance(result._data, pa.ChunkedArray) + + def test_from_sequence_pa_array_notimplemented(self, request): + if pa_version_under6p0: + request.node.add_marker( + pytest.mark.xfail( + raises=AttributeError, + reason="month_day_nano_interval not implemented by pyarrow.", + ) + ) + with pytest.raises(NotImplementedError, match="Converting strings to"): + ArrowExtensionArray._from_sequence_of_strings( + ["12-1"], dtype=pa.month_day_nano_interval() + ) + + def test_from_sequence_of_strings_pa_array(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa_version_under3p0: + request.node.add_marker( + pytest.mark.xfail( + reason="ChunkedArray has no attribute combine_chunks", + ) + ) + elif pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]"): + request.node.add_marker( + pytest.mark.xfail( + reason="Nanosecond time parsing not supported.", + ) + ) + elif pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support parsing {pa_dtype}", + ) + ) + elif pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="Iterating over ChunkedArray[bool] returns PyArrow scalars.", + ) + ) + elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + if pa_version_under7p0: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support string cast from {pa_dtype}", + ) + ) + elif is_platform_windows() and is_ci_environment(): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + "TODO: Set ARROW_TIMEZONE_DATABASE environment variable " + "on CI to path to the tzdata for pyarrow." + ), + ) + ) + elif pa_version_under6p0 and pa.types.is_temporal(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support string cast from {pa_dtype}", + ) + ) + pa_array = data._data.cast(pa.string()) + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + tm.assert_extension_array_equal(result, data) + + pa_array = pa_array.combine_chunks() + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + tm.assert_extension_array_equal(result, data) + + +class TestGetitemTests(base.BaseGetitemTests): + @pytest.mark.xfail( + reason=( + "data.dtype.type return pyarrow.DataType " + "but this (intentionally) returns " + "Python scalars or pd.Na" + ) + ) + def test_getitem_scalar(self, data): + super().test_getitem_scalar(data) + + def test_take_series(self, request, data): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + bad_units = ["ns"] + if pa_version_under2p0: + bad_units.extend(["s", "ms", "us"]) + if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 3.0 " + f"with timestamp type {tz} and {unit}" + ) + ) + ) + super().test_take_series(data) + + def test_reindex(self, request, data, na_value): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + bad_units = ["ns"] + if pa_version_under2p0: + bad_units.extend(["s", "ms", "us"]) + if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 3.0 " + f"with timestamp type {tz} and {unit}" + ) + ) + ) + super().test_reindex(data, na_value) + + def test_loc_iloc_frame_single_dtype(self, request, using_array_manager, data): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + bad_units = ["ns"] + if pa_version_under2p0: + bad_units.extend(["s", "ms", "us"]) + if ( + pa_version_under3p0 + and not using_array_manager + and tz not in (None, "UTC") + and unit in bad_units + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 3.0 " + f"with timestamp type {tz} and {unit}" + ) + ) + ) + super().test_loc_iloc_frame_single_dtype(data) + + +class TestBaseNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, ser, op_name, skipna): + pa_dtype = ser.dtype.pyarrow_dtype + result = getattr(ser, op_name)(skipna=skipna) + if pa.types.is_boolean(pa_dtype): + # Can't convert if ser contains NA + pytest.skip( + "pandas boolean data with NA does not fully support all reductions" + ) + elif pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): + ser = ser.astype("Float64") + expected = getattr(ser, op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_numeric_reductions, skipna, request): + pa_dtype = data.dtype.pyarrow_dtype + xfail_mark = pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_numeric_reductions} is not implemented in " + f"pyarrow={pa.__version__} for {pa_dtype}" + ), + ) + if all_numeric_reductions in {"skew", "kurt"}: + request.node.add_marker(xfail_mark) + elif ( + all_numeric_reductions in {"median", "var", "std", "prod", "max", "min"} + and pa_version_under6p0 + ): + request.node.add_marker(xfail_mark) + elif all_numeric_reductions in {"sum", "mean"} and pa_version_under2p0: + request.node.add_marker(xfail_mark) + elif ( + all_numeric_reductions in {"sum", "mean"} + and skipna is False + and pa_version_under6p0 + and (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)) + ): + request.node.add_marker( + pytest.mark.xfail( + raises=AssertionError, + reason=( + f"{all_numeric_reductions} with skip_nulls={skipna} did not " + f"return NA for {pa_dtype} with pyarrow={pa.__version__}" + ), + ) + ) + elif not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or pa.types.is_boolean(pa_dtype) + ) and not ( + all_numeric_reductions in {"min", "max"} + and (pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype)) + ): + request.node.add_marker(xfail_mark) + elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { + "std", + "var", + "median", + }: + request.node.add_marker(xfail_mark) + super().test_reduce_series(data, all_numeric_reductions, skipna) + + +class TestBaseBooleanReduce(base.BaseBooleanReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series( + self, data, all_boolean_reductions, skipna, na_value, request + ): + pa_dtype = data.dtype.pyarrow_dtype + xfail_mark = pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_boolean_reductions} is not implemented in " + f"pyarrow={pa.__version__} for {pa_dtype}" + ), + ) + if not pa.types.is_boolean(pa_dtype): + request.node.add_marker(xfail_mark) + elif pa_version_under3p0: + request.node.add_marker(xfail_mark) + op_name = all_boolean_reductions + s = pd.Series(data) + result = getattr(s, op_name)(skipna=skipna) + assert result is (op_name == "any") + + +class TestBaseGroupby(base.BaseGroupbyTests): + def test_groupby_agg_extension(self, data_for_grouping, request): + tz = getattr(data_for_grouping.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}." + ) + ) + super().test_groupby_agg_extension(data_for_grouping) + + def test_groupby_extension_no_sort(self, data_for_grouping, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + elif pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support factorizing {pa_dtype}", + ) + ) + super().test_groupby_extension_no_sort(data_for_grouping) + + def test_groupby_extension_transform(self, data_for_grouping, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + elif pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support factorizing {pa_dtype}", + ) + ) + super().test_groupby_extension_transform(data_for_grouping) + + def test_groupby_extension_apply( + self, data_for_grouping, groupby_apply_op, request + ): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support factorizing {pa_dtype}", + ) + ) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + + def test_in_numeric_groupby(self, data_for_grouping, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="ArrowExtensionArray doesn't support .sum() yet.", + ) + ) + super().test_in_numeric_groupby(data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=ValueError, + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + elif pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow doesn't support factorizing {pa_dtype}", + ) + ) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_agg(as_index, data_for_grouping) + + +class TestBaseDtype(base.BaseDtypeTests): + def test_construct_from_string_own_name(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_construct_from_string_own_name(dtype) + + def test_is_dtype_from_name(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_is_dtype_from_name(dtype) + + def test_construct_from_string(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_construct_from_string(dtype) + + def test_construct_from_string_another_type_raises(self, dtype): + msg = r"'another_type' must end with '\[pyarrow\]'" + with pytest.raises(TypeError, match=msg): + type(dtype).construct_from_string("another_type") + + def test_get_common_dtype(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_time(pa_dtype) + or ( + pa.types.is_timestamp(pa_dtype) + and (pa_dtype.unit != "ns" or pa_dtype.tz is not None) + ) + or (pa.types.is_duration(pa_dtype) and pa_dtype.unit != "ns") + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"{pa_dtype} does not have associated numpy " + f"dtype findable by find_common_type" + ) + ) + ) + super().test_get_common_dtype(dtype) + + +class TestBaseIndex(base.BaseIndexTests): + pass + + +class TestBaseInterface(base.BaseInterfaceTests): + def test_contains(self, data, data_missing, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 2.0 " + f"with timestamp type {tz} and {unit}" + ) + ) + ) + super().test_contains(data, data_missing) + + @pytest.mark.xfail(reason="pyarrow.ChunkedArray does not support views.") + def test_view(self, data): + super().test_view(data) + + +class TestBaseMissing(base.BaseMissingTests): + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") + def test_dropna_array(self, data_missing): + super().test_dropna_array(data_missing) + + +class TestBasePrinting(base.BasePrintingTests): + pass + + +class TestBaseReshaping(base.BaseReshapingTests): + def test_concat_columns(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_concat_columns(data, na_value) + + def test_concat_extension_arrays_copy_false(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_concat_extension_arrays_copy_false(data, na_value) + + def test_align(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align(data, na_value) + + def test_align_frame(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align_frame(data, na_value) + + def test_align_series_frame(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_align_series_frame(data, na_value) + + def test_merge(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_merge(data, na_value) + + def test_ravel(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_ravel(data) + + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") + def test_transpose(self, data): + super().test_transpose(data) + + def test_transpose_frame(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_transpose_frame(data) + + +class TestBaseSetitem(base.BaseSetitemTests): + def test_setitem_scalar_series(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_scalar_series(data, box_in_series) + + def test_setitem_sequence(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_sequence(data, box_in_series) + + def test_setitem_sequence_broadcasts(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_sequence_broadcasts(data, box_in_series) + + @pytest.mark.parametrize("setter", ["loc", "iloc"]) + def test_setitem_scalar(self, data, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_scalar(data, setter) + + def test_setitem_loc_scalar_mixed(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_scalar_mixed(data) + + def test_setitem_loc_scalar_single(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_loc_scalar_single(data) + + def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_scalar_multiple_homogoneous(data) + + def test_setitem_iloc_scalar_mixed(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_mixed(data) + + def test_setitem_iloc_scalar_single(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_single(data) + + def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_multiple_homogoneous(data) + + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array", "boolean-array-na"], + ) + def test_setitem_mask(self, data, mask, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask(data, mask, box_in_series) + + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_boolean_array_with_na(data, box_in_series) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_integer_array(data, idx, box_in_series) + + @pytest.mark.parametrize("as_callable", [True, False]) + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_aligned(self, data, as_callable, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_aligned(data, as_callable, setter) + + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_broadcast(data, setter) + + def test_setitem_tuple_index(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_tuple_index(data) + + def test_setitem_slice(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_slice(data, box_in_series) + + def test_setitem_loc_iloc_slice(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_loc_iloc_slice(data) + + def test_setitem_slice_array(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_slice_array(data) + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): + # Is there a better way to get the full_indexer id "null_slice"? + is_null_slice = "null_slice" in request.node.nodeid + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC") and not is_null_slice: + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_with_expansion_dataframe_column(data, full_indexer) + + def test_setitem_with_expansion_row(self, data, na_value, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_with_expansion_row(data, na_value) + + def test_setitem_frame_2d_values(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_setitem_frame_2d_values(data) + + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") + def test_setitem_preserves_views(self, data): + super().test_setitem_preserves_views(data) + + +class TestBaseParsing(base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail(raises=TypeError, reason="GH 47534") + ) + elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"Parameterized types with tz={pa_dtype.tz} not supported.", + ) + ) + super().test_EA_types(engine, data) + + +class TestBaseUnaryOps(base.BaseUnaryOpsTests): + @pytest.mark.xfail( + pa_version_under2p0, + raises=NotImplementedError, + reason="pyarrow.compute.invert not supported in pyarrow<2.0", + ) + def test_invert(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if not pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow.compute.invert does support {pa_dtype}", + ) + ) + super().test_invert(data) + + +class TestBaseMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("periods", [1, -2]) + def test_diff(self, data, periods, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_unsigned_integer(pa_dtype) and periods == 1: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + f"diff with {pa_dtype} and periods={periods} will overflow" + ), + ) + ) + super().test_diff(data, periods) + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna, request): + pa_dtype = all_data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"value_count has no kernel for {pa_dtype}", + ) + ) + super().test_value_counts(all_data, dropna) + + def test_value_counts_with_normalize(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"value_count has no pyarrow kernel for {pa_dtype}", + ) + ) + super().test_value_counts_with_normalize(data) + + @pytest.mark.xfail( + pa_version_under6p0, + raises=NotImplementedError, + reason="argmin/max only implemented for pyarrow version >= 6.0", + ) + def test_argmin_argmax( + self, data_for_sorting, data_missing_for_sorting, na_value, request + ): + pa_dtype = data_for_sorting.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + elif pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"min_max not supported in pyarrow for {pa_dtype}", + ) + ) + super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting, na_value) + + @pytest.mark.parametrize( + "op_name, skipna, expected", + [ + ("idxmax", True, 0), + ("idxmin", True, 2), + ("argmax", True, 0), + ("argmin", True, 2), + ("idxmax", False, np.nan), + ("idxmin", False, np.nan), + ("argmax", False, -1), + ("argmin", False, -1), + ], + ) + def test_argreduce_series( + self, data_missing_for_sorting, op_name, skipna, expected, request + ): + pa_dtype = data_missing_for_sorting.dtype.pyarrow_dtype + if pa_version_under6p0 and skipna: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason="min_max not supported in pyarrow", + ) + ) + elif not pa_version_under6p0 and pa.types.is_duration(pa_dtype) and skipna: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"min_max not supported in pyarrow for {pa_dtype}", + ) + ) + super().test_argreduce_series( + data_missing_for_sorting, op_name, skipna, expected + ) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): + pa_dtype = data_for_sorting.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype) and not ascending and not pa_version_under2p0: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=( + f"unique has no pyarrow kernel " + f"for {pa_dtype} when ascending={ascending}" + ), + ) + ) + super().test_sort_values(data_for_sorting, ascending, sort_by_key) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending, request): + pa_dtype = data_for_sorting.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=( + f"dictionary_encode has no pyarrow kernel " + f"for {pa_dtype} when ascending={ascending}" + ), + ) + ) + super().test_sort_values_frame(data_for_sorting, ascending) + + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype) and not pa_version_under2p0: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"unique has no pyarrow kernel for {pa_dtype}.", + ) + ) + super().test_unique(data, box, method) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}", + ) + ) + elif pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + super().test_factorize(data_for_grouping, na_sentinel) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}", + ) + ) + super().test_factorize_equivalence(data_for_grouping, na_sentinel) + + def test_factorize_empty(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}", + ) + ) + super().test_factorize_empty(data) + + def test_shift_fill_value(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + tz = getattr(pa_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_shift_fill_value(data) + + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy, request): + pa_dtype = data.dtype.pyarrow_dtype + tz = getattr(pa_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC") and repeats != 0: + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 2.0 with " + f"timestamp type {tz} when repeats={repeats}" + ) + ) + ) + super().test_repeat(data, repeats, as_series, use_numpy) + + def test_insert(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + tz = getattr(pa_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_insert(data) + + def test_combine_first(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + tz = getattr(pa_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" + ) + ) + super().test_combine_first(data) + + @pytest.mark.xfail( + reason="result dtype pyarrow[bool] better than expected dtype object" + ) + def test_combine_le(self, data_repeated): + super().test_combine_le(data_repeated) + + def test_combine_add(self, data_repeated, request): + pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype + if pa.types.is_temporal(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=f"{pa_dtype} cannot be added to {pa_dtype}", + ) + ) + super().test_combine_add(data_repeated) + + def test_searchsorted(self, data_for_sorting, as_series, request): + pa_dtype = data_for_sorting.dtype.pyarrow_dtype + if pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{pa_dtype} only has 2 unique possible values", + ) + ) + super().test_searchsorted(data_for_sorting, as_series) + + def test_where_series(self, data, na_value, as_frame, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_temporal(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"Unsupported cast from double to {pa_dtype}", + ) + ) + super().test_where_series(data, na_value, as_frame) + + +class TestBaseArithmeticOps(base.BaseArithmeticOpsTests): + + divmod_exc = NotImplementedError + + def _patch_combine(self, obj, other, op): + # BaseOpsUtil._combine can upcast expected dtype + # (because it generates expected on python scalars) + # while ArrowExtensionArray maintains original type + expected = base.BaseArithmeticOpsTests._combine(self, obj, other, op) + was_frame = False + if isinstance(expected, pd.DataFrame): + was_frame = True + expected_data = expected.iloc[:, 0] + original_dtype = obj.iloc[:, 0].dtype + else: + expected_data = expected + original_dtype = obj.dtype + pa_array = pa.array(expected_data._values).cast(original_dtype.pyarrow_dtype) + pd_array = type(expected_data._values)(pa_array) + if was_frame: + expected = pd.DataFrame( + pd_array, index=expected.index, columns=expected.columns + ) + else: + expected = pd.Series(pd_array) + return expected + + def test_arith_series_with_scalar( + self, data, all_arithmetic_operators, request, monkeypatch + ): + pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and pa.types.is_temporal(pa_dtype) + ) + if ( + all_arithmetic_operators + in { + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): + self.series_scalar_exc = NotImplementedError + elif arrow_temporal_supported: + self.series_scalar_exc = None + elif not ( + pa.types.is_floating(pa_dtype) + or pa.types.is_integer(pa_dtype) + or arrow_temporal_supported + ): + self.series_scalar_exc = pa.ArrowNotImplementedError + else: + self.series_scalar_exc = None + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) + if all_arithmetic_operators == "__floordiv__" and pa.types.is_integer(pa_dtype): + # BaseOpsUtil._combine always returns int64, while ArrowExtensionArray does + # not upcast + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_frame_with_scalar( + self, data, all_arithmetic_operators, request, monkeypatch + ): + pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and pa.types.is_temporal(pa_dtype) + ) + if ( + all_arithmetic_operators + in { + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): + self.frame_scalar_exc = NotImplementedError + elif arrow_temporal_supported: + self.frame_scalar_exc = None + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + self.frame_scalar_exc = pa.ArrowNotImplementedError + else: + self.frame_scalar_exc = None + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) + if all_arithmetic_operators == "__floordiv__" and pa.types.is_integer(pa_dtype): + # BaseOpsUtil._combine always returns int64, while ArrowExtensionArray does + # not upcast + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array( + self, data, all_arithmetic_operators, request, monkeypatch + ): + pa_dtype = data.dtype.pyarrow_dtype + + arrow_temporal_supported = not pa_version_under8p0 and ( + all_arithmetic_operators in ("__add__", "__radd__") + and pa.types.is_duration(pa_dtype) + or all_arithmetic_operators in ("__sub__", "__rsub__") + and pa.types.is_temporal(pa_dtype) + ) + if ( + all_arithmetic_operators + in { + "__mod__", + "__rmod__", + } + or pa_version_under2p0 + ): + self.series_array_exc = NotImplementedError + elif arrow_temporal_supported: + self.series_array_exc = None + elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): + self.series_array_exc = pa.ArrowNotImplementedError + else: + self.series_array_exc = None + if ( + all_arithmetic_operators == "__rpow__" + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"GH 29997: 1**pandas.NA == 1 while 1**pyarrow.NA == NULL " + f"for {pa_dtype}" + ) + ) + ) + elif ( + all_arithmetic_operators + in ( + "__sub__", + "__rsub__", + ) + and pa.types.is_unsigned_integer(pa_dtype) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + f"Implemented pyarrow.compute.subtract_checked " + f"which raises on overflow for {pa_dtype}" + ), + ) + ) + elif arrow_temporal_supported: + request.node.add_marker( + pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_arithmetic_operators} not supported between" + f"pd.NA and {pa_dtype} Python scalar" + ), + ) + ) + elif ( + all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} + and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) + and not pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason="divide by 0", + ) + ) + op_name = all_arithmetic_operators + ser = pd.Series(data) + # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray + # since ser.iloc[0] is a python scalar + other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) + if pa.types.is_floating(pa_dtype) or ( + pa.types.is_integer(pa_dtype) and all_arithmetic_operators != "__truediv__" + ): + monkeypatch.setattr(TestBaseArithmeticOps, "_combine", self._patch_combine) + self.check_opname(ser, op_name, other, exc=self.series_array_exc) + + def test_add_series_with_extension_array(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if ( + not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) + ) + or pa_version_under2p0 + ): + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"add_checked not implemented for {pa_dtype}", + ) + ) + elif pa_dtype.equals("int8"): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=f"raises on overflow for {pa_dtype}", + ) + ) + super().test_add_series_with_extension_array(data) + + +class TestBaseComparisonOps(base.BaseComparisonOpsTests): + def assert_series_equal(self, left, right, *args, **kwargs): + # Series.combine for "expected" retains bool[pyarrow] dtype + # While "result" return "boolean" dtype + right = pd.Series(right._values.to_numpy(), dtype="boolean") + super().assert_series_equal(left, right, *args, **kwargs) + + def test_compare_array(self, data, comparison_op, na_value, request): + pa_dtype = data.dtype.pyarrow_dtype + ser = pd.Series(data) + # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray + # since ser.iloc[0] is a python scalar + other = pd.Series(pd.array([ser.iloc[0]] * len(ser), dtype=data.dtype)) + if comparison_op.__name__ in ["eq", "ne"]: + # comparison should match point-wise comparisons + result = comparison_op(ser, other) + # Series.combine does not calculate the NA mask correctly + # when comparing over an array + assert result[8] is na_value + assert result[97] is na_value + expected = ser.combine(other, comparison_op) + expected[8] = na_value + expected[97] = na_value + self.assert_series_equal(result, expected) + + else: + exc = None + try: + result = comparison_op(ser, other) + except Exception as err: + exc = err + + if exc is None: + # Didn't error, then should match point-wise behavior + if pa.types.is_temporal(pa_dtype): + # point-wise comparison with pd.NA raises TypeError + assert result[8] is na_value + assert result[97] is na_value + result = result.drop([8, 97]).reset_index(drop=True) + ser = ser.drop([8, 97]) + other = other.drop([8, 97]) + expected = ser.combine(other, comparison_op) + self.assert_series_equal(result, expected) + else: + with pytest.raises(type(exc)): + ser.combine(other, comparison_op) + + def test_invalid_other_comp(self, data, comparison_op): + # GH 48833 + with pytest.raises( + NotImplementedError, match=".* not implemented for " + ): + comparison_op(data, object()) + + +def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): + with pytest.raises(NotImplementedError, match="Passing pyarrow type"): + ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") + + +@pytest.mark.xfail( + pa_version_under4p0, + raises=NotImplementedError, + reason="quantile only supported for pyarrow version >= 4.0", +) +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize("quantile", [0.5, [0.5, 0.5]]) +def test_quantile(data, interpolation, quantile, request): + pa_dtype = data.dtype.pyarrow_dtype + if not (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"quantile not supported by pyarrow for {pa_dtype}", + ) + ) + data = data.take([0, 0, 0]) + ser = pd.Series(data) + result = ser.quantile(q=quantile, interpolation=interpolation) + if quantile == 0.5: + assert result == data[0] + else: + # Just check the values + result = result.astype("float64[pyarrow]") + expected = pd.Series( + data.take([0, 0]).astype("float64[pyarrow]"), index=[0.5, 0.5] + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail( + pa_version_under6p0, + raises=NotImplementedError, + reason="mode only supported for pyarrow version >= 6.0", +) +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize( + "take_idx, exp_idx", + [[[0, 0, 2, 2, 4, 4], [4, 0]], [[0, 0, 0, 2, 4, 4], [0]]], + ids=["multi_mode", "single_mode"], +) +def test_mode(data_for_grouping, dropna, take_idx, exp_idx, request): + pa_dtype = data_for_grouping.dtype.pyarrow_dtype + if pa.types.is_temporal(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"mode not supported by pyarrow for {pa_dtype}", + ) + ) + elif ( + pa.types.is_boolean(pa_dtype) + and "multi_mode" in request.node.nodeid + and pa_version_under9p0 + ): + request.node.add_marker( + pytest.mark.xfail( + reason="https://issues.apache.org/jira/browse/ARROW-17096", + ) + ) + data = data_for_grouping.take(take_idx) + ser = pd.Series(data) + result = ser.mode(dropna=dropna) + expected = pd.Series(data_for_grouping.take(exp_idx)) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py new file mode 100644 index 00000000..dd067102 --- /dev/null +++ b/pandas/tests/extension/test_boolean.py @@ -0,0 +1,405 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.boolean import BooleanDtype +from pandas.tests.extension import base + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([np.nan, True], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([True, True, False], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([True, np.nan, False], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = True + a = False + na = np.nan + return pd.array([b, b, na, na, a, a, b], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + exc = None + if op_name.strip("_").lstrip("r") in ["pow", "truediv", "floordiv"]: + # match behavior with non-masked bool dtype + exc = NotImplementedError + super().check_opname(s, op_name, other, exc=exc) + + def _check_op(self, obj, op, other, op_name, exc=NotImplementedError): + if exc is None: + if op_name in self.implements: + msg = r"numpy boolean subtract" + with pytest.raises(TypeError, match=msg): + op(obj, other) + return + + result = op(obj, other) + expected = self._combine(obj, other, op) + + if op_name in ( + "__floordiv__", + "__rfloordiv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", + ): + # combine keeps boolean type + expected = expected.astype("Int8") + elif op_name in ("__truediv__", "__rtruediv__"): + # combine with bools does not generate the correct result + # (numpy behaviour for div is to regard the bools as numeric) + expected = self._combine(obj.astype(float), other, op) + expected = expected.astype("Float64") + if op_name == "__rpow__": + # for rpow, combine does not propagate NaN + expected[result.isna()] = np.nan + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(obj, other) + + @pytest.mark.xfail( + reason="Inconsistency between floordiv and divmod; we raise for floordiv " + "but not for divmod. This matches what we do for non-masked bool dtype." + ) + def test_divmod_series_array(self, data, data_for_twos): + super().test_divmod_series_array(data, data_for_twos) + + @pytest.mark.xfail( + reason="Inconsistency between floordiv and divmod; we raise for floordiv " + "but not for divmod. This matches what we do for non-masked bool dtype." + ) + def test_divmod(self, data): + super().test_divmod(data) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + # override because we only have 2 unique values + if na_sentinel == -1: + msg = "Specifying `na_sentinel=-1` is deprecated" + else: + msg = "Specifying the specific value to use for `na_sentinel` is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + labels, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_labels = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4]) + + tm.assert_numpy_array_equal(labels, expected_labels) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_combine_le(self, data_repeated): + # override because expected needs to be boolean instead of bool dtype + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + dtype="boolean", + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean") + self.assert_series_equal(result, expected) + + def test_searchsorted(self, data_for_sorting, as_series): + # override because we only have 2 unique values + data_for_sorting = pd.array([True, False], dtype="boolean") + b, a = data_for_sorting + arr = type(data_for_sorting)._from_sequence([a, b]) + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + result = arr.searchsorted(arr.take([0, 1])) + expected = np.array([0, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting): + # override because there are only 2 unique values + + # data_for_sorting -> [B, C, A] with A < B < C -> here True, True, False + assert data_for_sorting.argmax() == 0 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurrence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 1 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + """ + Groupby-specific tests are overridden because boolean only has 2 + unique values, base tests uses 3 groups. + """ + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, uniques = pd.factorize(data_for_grouping, sort=True) + + if as_index: + index = pd.Index(uniques, name="B") + expected = pd.Series([3.0, 1.0], index=index, name="A") + self.assert_series_equal(result, expected) + else: + expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0]}) + self.assert_frame_equal(result, expected) + + def test_groupby_agg_extension(self, data_for_grouping): + # GH#38980 groupby agg on extension type fails for non-numeric types + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + + expected = df.iloc[[0, 2, 4]] + expected = expected.set_index("A") + + result = df.groupby("A").agg({"B": "first"}) + self.assert_frame_equal(result, expected) + + result = df.groupby("A").agg("first") + self.assert_frame_equal(result, expected) + + result = df.groupby("A").first() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index(index, name="B") + expected = pd.Series([1.0, 3.0], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + df.groupby("B", group_keys=False).apply(groupby_apply_op) + df.groupby("B", group_keys=False).A.apply(groupby_apply_op) + df.groupby("A", group_keys=False).apply(groupby_apply_op) + df.groupby("A", group_keys=False).B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + ], + index=pd.Index([1, 2, 3], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1], + } + ) + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("min_count", [0, 10]) + def test_groupby_sum_mincount(self, data_for_grouping, min_count): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("A").sum(min_count=min_count) + if min_count == 0: + expected = pd.DataFrame( + {"B": pd.array([3, 0, 0], dtype="Int64")}, + index=pd.Index([1, 2, 3], name="A"), + ) + tm.assert_frame_equal(result, expected) + else: + expected = pd.DataFrame( + {"B": pd.array([pd.NA] * 3, dtype="Int64")}, + index=pd.Index([1, 2, 3], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + # override parent function to cast to bool for min/max + if np.isnan(expected): + expected = pd.NA + elif op_name in ("min", "max"): + expected = bool(expected) + tm.assert_almost_equal(result, expected) + + +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestUnaryOps(base.BaseUnaryOpsTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py new file mode 100644 index 00000000..1e17bf33 --- /dev/null +++ b/pandas/tests/extension/test_categorical.py @@ -0,0 +1,310 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype +from pandas.tests.extension import base + + +def make_data(): + while True: + values = np.random.choice(list(string.ascii_letters), size=100) + # ensure we meet the requirements + # 1. first two not null + # 2. first and second are different + if values[0] != values[1]: + break + return values + + +@pytest.fixture +def dtype(): + return CategoricalDtype() + + +@pytest.fixture +def data(): + """Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not be equal + """ + return Categorical(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return Categorical([np.nan, "A"]) + + +@pytest.fixture +def data_for_sorting(): + return Categorical(["A", "B", "C"], categories=["C", "A", "B"], ordered=True) + + +@pytest.fixture +def data_missing_for_sorting(): + return Categorical(["A", None, "B"], categories=["B", "A"], ordered=True) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + return Categorical(["a", "a", None, None, "b", "b", "a", "c"]) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + @pytest.mark.xfail(reason="Memory usage doesn't match") + def test_memory_usage(self, data): + # Is this deliberate? + super().test_memory_usage(data) + + def test_contains(self, data, data_missing): + # GH-37867 + # na value handling in Categorical.__contains__ is deprecated. + # See base.BaseInterFaceTests.test_contains for more details. + + na_value = data.dtype.na_value + # ensure data without missing values + data = data[~data.isna()] + + # first elements are non-missing + assert data[0] in data + assert data_missing[0] in data_missing + + # check the presence of na_value + assert na_value in data_missing + assert na_value not in data + + # Categoricals can contain other nan-likes than na_value + for na_value_obj in tm.NULL_OBJECTS: + if na_value_obj is na_value: + continue + assert na_value_obj not in data + assert na_value_obj in data_missing # this line differs from super method + + +class TestConstructors(base.BaseConstructorsTests): + def test_empty(self, dtype): + cls = dtype.construct_array_type() + result = cls._empty((4,), dtype=dtype) + + assert isinstance(result, cls) + # the dtype we passed is not initialized, so will not match the + # dtype on our result. + assert result.dtype == CategoricalDtype([]) + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + @pytest.mark.skip(reason="Backwards compatibility") + def test_getitem_scalar(self, data): + # CategoricalDtype.type isn't "correct" since it should + # be a parent of the elements (object). But don't want + # to break things by changing. + super().test_getitem_scalar(data) + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.xfail(reason="Unobserved categories included") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + def test_combine_add(self, data_repeated): + # GH 20825 + # When adding categoricals in combine, result is a string + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series([a + val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]]) + def test_cast_nan_to_int(self, cls, values): + # GH 28406 + s = cls(values) + + msg = "Cannot (cast|convert)" + with pytest.raises((ValueError, TypeError), match=msg): + s.astype(int) + + @pytest.mark.parametrize( + "expected", + [ + pd.Series(["2019", "2020"], dtype="datetime64[ns, UTC]"), + pd.Series([0, 0], dtype="timedelta64[ns]"), + pd.Series([pd.Period("2019"), pd.Period("2020")], dtype="period[A-DEC]"), + pd.Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval"), + pd.Series([1, np.nan], dtype="Int64"), + ], + ) + def test_cast_category_to_extension_dtype(self, expected): + # GH 28668 + result = expected.astype("category").astype(expected.dtype) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected", + [ + ( + "datetime64[ns]", + np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"), + ), + ( + "datetime64[ns, MET]", + pd.DatetimeIndex( + [Timestamp("2015-01-01 00:00:00+0100", tz="MET")] + ).array, + ), + ], + ) + def test_consistent_casting(self, dtype, expected): + # GH 28448 + result = Categorical(["2015-01-01"]).astype(dtype) + assert result == expected + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + # frame & scalar + op_name = all_arithmetic_operators + if op_name == "__rmod__": + request.node.add_marker( + pytest.mark.xfail( + reason="rmod never called when string is first argument" + ) + ) + super().test_arith_frame_with_scalar(data, op_name) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): + op_name = all_arithmetic_operators + if op_name == "__rmod__": + request.node.add_marker( + pytest.mark.xfail( + reason="rmod never called when string is first argument" + ) + ) + super().test_arith_series_with_scalar(data, op_name) + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="cannot perform|unsupported operand"): + ser + data + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + if op_name == "__eq__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() + + elif op_name == "__ne__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() + + else: + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + op(data, other) + + @pytest.mark.parametrize( + "categories", + [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], + ) + def test_not_equal_with_na(self, categories): + # https://github.com/pandas-dev/pandas/issues/32276 + c1 = Categorical.from_codes([-1, 0], categories=categories) + c2 = Categorical.from_codes([0, 1], categories=categories) + + result = c1 != c2 + + assert result.all() + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.NDArrayBacked2DTests): + def test_repr_2d(self, data): + # Categorical __repr__ doesn't include "Categorical", so we need + # to special-case + res = repr(data.reshape(1, -1)) + assert res.count("\nCategories") == 1 + + res = repr(data.reshape(-1, 1)) + assert res.count("\nCategories") == 1 diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py new file mode 100644 index 00000000..62bc2501 --- /dev/null +++ b/pandas/tests/extension/test_common.py @@ -0,0 +1,81 @@ +import numpy as np +import pytest + +from pandas.core.dtypes import dtypes +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class DummyDtype(dtypes.ExtensionDtype): + pass + + +class DummyArray(ExtensionArray): + def __init__(self, data) -> None: + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return DummyDtype() + + def astype(self, dtype, copy=True): + # we don't support anything but a single dtype + if isinstance(dtype, DummyDtype): + if copy: + return type(self)(self.data) + return self + + return np.array(self, dtype=dtype, copy=copy) + + +class TestExtensionArrayDtype: + @pytest.mark.parametrize( + "values", + [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(np.array([1, 2])), + ], + ) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize("values", [np.array([]), pd.Series(np.array([]))]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) + + +def test_astype(): + + arr = DummyArray(np.array([1, 2, 3])) + expected = np.array([1, 2, 3], dtype=object) + + result = arr.astype(object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("object") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_no_copy(): + arr = DummyArray(np.array([1, 2, 3], dtype=np.int64)) + result = arr.astype(arr.dtype, copy=False) + + assert arr is result + + result = arr.astype(arr.dtype) + assert arr is not result + + +@pytest.mark.parametrize("dtype", [dtypes.CategoricalDtype(), dtypes.IntervalDtype()]) +def test_is_extension_array_dtype(dtype): + assert isinstance(dtype, dtypes.ExtensionDtype) + assert is_extension_array_dtype(dtype) diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py new file mode 100644 index 00000000..92796c60 --- /dev/null +++ b/pandas/tests/extension/test_datetime.py @@ -0,0 +1,194 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas.core.arrays import DatetimeArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["US/Central"]) +def dtype(request): + return DatetimeTZDtype(unit="ns", tz=request.param) + + +@pytest.fixture +def data(dtype): + data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) + return data + + +@pytest.fixture +def data_missing(dtype): + return DatetimeArray( + np.array(["NaT", "2000-01-01"], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def data_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + return DatetimeArray(np.array([b, c, a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + return DatetimeArray(np.array([b, "NaT", a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + na = "NaT" + return DatetimeArray( + np.array([b, b, na, na, a, a, b, c], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pd.NaT and a is b + + return cmp + + +@pytest.fixture +def na_value(): + return pd.NaT + + +# ---------------------------------------------------------------------------- +class BaseDatetimeTests: + pass + + +# ---------------------------------------------------------------------------- +# Tests +class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): + def test_series_constructor(self, data): + # Series construction drops any .freq attr + data = data._with_freq(None) + super().test_series_constructor(data) + + +class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): + def test_combine_add(self, data_repeated): + # Timestamp.__add__(Timestamp) not defined + pass + + +class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): + pass + + +class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + if all_arithmetic_operators in self.implements: + df = pd.DataFrame({"A": data}) + self.check_opname(df, all_arithmetic_operators, data[0], exc=None) + else: + # ... but not the rest. + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + ser = pd.Series(data) + self.check_opname(ser, all_arithmetic_operators, ser.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_add_series_with_extension_array(self, data): + # Datetime + Datetime not implemented + ser = pd.Series(data) + msg = "cannot add DatetimeArray and DatetimeArray" + with pytest.raises(TypeError, match=msg): + ser + data + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + ser = pd.Series(data) + self.check_opname(ser, all_arithmetic_operators, ser.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + +class TestCasting(BaseDatetimeTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): + pass + + +class TestMissing(BaseDatetimeTests, base.BaseMissingTests): + pass + + +class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): + pass + + +class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): + pass + + +class Test2DCompat(BaseDatetimeTests, base.NDArrayBacked2DTests): + pass diff --git a/pandas/tests/extension/test_extension.py b/pandas/tests/extension/test_extension.py new file mode 100644 index 00000000..a4b1a4b4 --- /dev/null +++ b/pandas/tests/extension/test_extension.py @@ -0,0 +1,40 @@ +""" +Tests for behavior if an author does *not* implement EA methods. +""" +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class MyEA(ExtensionArray): + def __init__(self, values) -> None: + self._values = values + + +@pytest.fixture +def data(): + arr = np.arange(10) + return MyEA(arr) + + +class TestExtensionArray: + def test_errors(self, data, all_arithmetic_operators): + # invalid ops + op_name = all_arithmetic_operators + with pytest.raises(AttributeError): + getattr(data, op_name) + + +def test_depr_na_sentinel(): + # GH#46910 + msg = "The `na_sentinel` argument of `MyEA.factorize` is deprecated" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + + class MyEA(ExtensionArray): + def factorize(self, na_sentinel=-1): + pass + + with tm.assert_produces_warning(None): + MyEA() diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py new file mode 100644 index 00000000..13dec96b --- /dev/null +++ b/pandas/tests/extension/test_external_block.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas._libs.internals import BlockPlacement +import pandas.util._test_decorators as td + +import pandas as pd +from pandas.core.internals import BlockManager +from pandas.core.internals.blocks import ExtensionBlock + +pytestmark = td.skip_array_manager_invalid_test + + +class CustomBlock(ExtensionBlock): + + _holder = np.ndarray + + # Cannot override final attribute "_can_hold_na" + @property # type: ignore[misc] + def _can_hold_na(self) -> bool: + return False + + +@pytest.fixture +def df(): + df1 = pd.DataFrame({"a": [1, 2, 3]}) + blocks = df1._mgr.blocks + values = np.arange(3, dtype="int64") + bp = BlockPlacement(slice(1, 2)) + custom_block = CustomBlock(values, placement=bp, ndim=2) + blocks = blocks + (custom_block,) + block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index]) + return pd.DataFrame(block_manager) + + +def test_concat_axis1(df): + # GH17954 + df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]}) + res = pd.concat([df, df2], axis=1) + assert isinstance(res._mgr.blocks[1], CustomBlock) diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py new file mode 100644 index 00000000..ff0ff739 --- /dev/null +++ b/pandas/tests/extension/test_floating.py @@ -0,0 +1,215 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_float_dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.tests.extension import base + + +def make_data(): + return ( + list(np.arange(0.1, 0.9, 0.1)) + + [pd.NA] + + list(np.arange(1, 9.8, 0.1)) + + [pd.NA] + + [9.9, 10.0] + ) + + +@pytest.fixture(params=[Float32Dtype, Float64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100) * 2, dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([pd.NA, 0.1], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([0.1, 0.2, 0.0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([0.1, pd.NA, 0.0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = 0.1 + a = 0.0 + c = 0.2 + na = pd.NA + return pd.array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + sdtype = tm.get_dtype(s) + if ( + hasattr(other, "dtype") + and not is_extension_array_dtype(other.dtype) + and is_float_dtype(other.dtype) + ): + # other is np.float64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(sdtype.numpy_dtype) + + result = op(s, other) + expected = self._combine(s, other, op) + + # combine method result in 'biggest' (float64) dtype + expected = expected.astype(sdtype) + + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + # TODO: share with IntegerArray? + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + # Override to do the astype to boolean + expected = s.combine(other, op).astype("boolean") + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + # overwrite to ensure pd.NA is tested instead of np.nan + # https://github.com/pandas-dev/pandas/issues/30958 + result = getattr(s, op_name)(skipna=skipna) + if not skipna and s.isna().any(): + expected = pd.NA + else: + expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)( + skipna=skipna + ) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py new file mode 100644 index 00000000..a6cf820d --- /dev/null +++ b/pandas/tests/extension/test_integer.py @@ -0,0 +1,236 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import ( + is_extension_array_dtype, + is_integer_dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.tests.extension import base + + +def make_data(): + return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100] + + +@pytest.fixture( + params=[ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + ] +) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100) * 2, dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([pd.NA, 1], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([1, 2, 0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([1, pd.NA, 0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = 1 + a = 0 + c = 2 + na = pd.NA + return pd.array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + sdtype = tm.get_dtype(s) + + if ( + hasattr(other, "dtype") + and not is_extension_array_dtype(other.dtype) + and is_integer_dtype(other.dtype) + and sdtype.is_unsigned_integer + ): + # TODO: comment below is inaccurate; other can be int8, int16, ... + # and the trouble is that e.g. if s is UInt8 and other is int8, + # then result is UInt16 + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(sdtype.numpy_dtype) + + result = op(s, other) + expected = self._combine(s, other, op) + + if op_name in ("__rtruediv__", "__truediv__", "__div__"): + expected = expected.fillna(np.nan).astype("Float64") + else: + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(sdtype) + + self.assert_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + # Override to do the astype to boolean + expected = s.combine(other, op).astype("boolean") + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op, other): + op_name = f"__{op.__name__}__" + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + # overwrite to ensure pd.NA is tested instead of np.nan + # https://github.com/pandas-dev/pandas/issues/30958 + result = getattr(s, op_name)(skipna=skipna) + if not skipna and s.isna().any(): + expected = pd.NA + else: + expected = getattr(s.dropna().astype("int64"), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py new file mode 100644 index 00000000..0f916cea --- /dev/null +++ b/pandas/tests/extension/test_interval.py @@ -0,0 +1,188 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Interval, + Series, +) +from pandas.core.arrays import IntervalArray +from pandas.tests.extension import base + + +def make_data(): + N = 100 + left_array = np.random.uniform(size=N).cumsum() + right_array = left_array + np.random.uniform(size=N) + return [Interval(left, right) for left, right in zip(left_array, right_array)] + + +@pytest.fixture +def dtype(): + return IntervalDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return IntervalArray(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return IntervalArray.from_tuples([None, (0, 1)]) + + +@pytest.fixture +def data_for_sorting(): + return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)]) + + +@pytest.fixture +def data_missing_for_sorting(): + return IntervalArray.from_tuples([(1, 2), None, (0, 1)]) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + a = (0, 1) + b = (1, 2) + c = (2, 3) + return IntervalArray.from_tuples([b, b, None, None, a, a, b, c]) + + +class BaseInterval: + pass + + +class TestDtype(BaseInterval, base.BaseDtypeTests): + pass + + +class TestCasting(BaseInterval, base.BaseCastingTests): + pass + + +class TestConstructors(BaseInterval, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseInterval, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestGrouping(BaseInterval, base.BaseGroupbyTests): + pass + + +class TestInterface(BaseInterval, base.BaseInterfaceTests): + pass + + +class TestReduce(base.BaseNoReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + ser = Series(data) + + if op_name in ["min", "max"]: + # IntervalArray *does* implement these + assert getattr(ser, op_name)(skipna=skipna) in data + assert getattr(data, op_name)(skipna=skipna) in data + return + + super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) + + +class TestMethods(BaseInterval, base.BaseMethodsTests): + @pytest.mark.xfail(reason="addition is not defined for intervals") + def test_combine_add(self, data_repeated): + super().test_combine_add(data_repeated) + + @pytest.mark.xfail( + reason="Raises with incorrect message bc it disallows *all* listlikes " + "instead of just wrong-length listlikes" + ) + def test_fillna_length_mismatch(self, data_missing): + super().test_fillna_length_mismatch(data_missing) + + +class TestMissing(BaseInterval, base.BaseMissingTests): + # Index.fillna only accepts scalar `value`, so we have to xfail all + # non-scalar fill tests. + unsupported_fill = pytest.mark.xfail( + reason="Unsupported fillna option for Interval." + ) + + @unsupported_fill + def test_fillna_limit_pad(self): + super().test_fillna_limit_pad() + + @unsupported_fill + def test_fillna_series_method(self): + super().test_fillna_series_method() + + @unsupported_fill + def test_fillna_limit_backfill(self): + super().test_fillna_limit_backfill() + + @unsupported_fill + def test_fillna_no_op_returns_copy(self): + super().test_fillna_no_op_returns_copy() + + @unsupported_fill + def test_fillna_series(self): + super().test_fillna_series() + + def test_fillna_non_scalar_raises(self, data_missing): + msg = "can only insert Interval objects and NA into an IntervalArray" + with pytest.raises(TypeError, match=msg): + data_missing.fillna([1, 1]) + + +class TestReshaping(BaseInterval, base.BaseReshapingTests): + pass + + +class TestSetitem(BaseInterval, base.BaseSetitemTests): + pass + + +class TestPrinting(BaseInterval, base.BasePrintingTests): + @pytest.mark.xfail(reason="Interval has custom repr") + def test_array_repr(self, data, size): + super().test_array_repr() + + +class TestParsing(BaseInterval, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + super().test_EA_types(engine, data) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py new file mode 100644 index 00000000..148059a6 --- /dev/null +++ b/pandas/tests/extension/test_numpy.py @@ -0,0 +1,454 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +Note: we do not bother with base.BaseIndexTests because PandasArray +will never be held in an Index. +""" +import numpy as np +import pytest + +from pandas.core.dtypes.cast import can_hold_element +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.internals import blocks +from pandas.tests.extension import base + + +def _can_hold_element_patched(obj, element) -> bool: + if isinstance(element, PandasArray): + element = element.to_numpy() + return can_hold_element(obj, element) + + +orig_assert_attr_equal = tm.assert_attr_equal + + +def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): + """ + patch tm.assert_attr_equal so PandasDtype("object") is closed enough to + np.dtype("object") + """ + if attr == "dtype": + lattr = getattr(left, "dtype", None) + rattr = getattr(right, "dtype", None) + if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype): + left = left.astype(lattr.numpy_dtype) + elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype): + right = right.astype(rattr.numpy_dtype) + + orig_assert_attr_equal(attr, left, right, obj) + + +@pytest.fixture(params=["float", "object"]) +def dtype(request): + return PandasDtype(np.dtype(request.param)) + + +@pytest.fixture +def allow_in_pandas(monkeypatch): + """ + A monkeypatch to tells pandas to let us in. + + By default, passing a PandasArray to an index / series / frame + constructor will unbox that PandasArray to an ndarray, and treat + it as a non-EA column. We don't want people using EAs without + reason. + + The mechanism for this is a check against ABCPandasArray + in each constructor. + + But, for testing, we need to allow them in pandas. So we patch + the _typ of PandasArray, so that we evade the ABCPandasArray + check. + """ + with monkeypatch.context() as m: + m.setattr(PandasArray, "_typ", "extension") + m.setattr(blocks, "can_hold_element", _can_hold_element_patched) + m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal) + yield + + +@pytest.fixture +def data(allow_in_pandas, dtype): + if dtype.numpy_dtype == "object": + return pd.Series([(i,) for i in range(100)]).array + return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) + + +@pytest.fixture +def data_missing(allow_in_pandas, dtype): + if dtype.numpy_dtype == "object": + return PandasArray(np.array([np.nan, (1,)], dtype=object)) + return PandasArray(np.array([np.nan, 1.0])) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return np.isnan(a) and np.isnan(b) + + return cmp + + +@pytest.fixture +def data_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + if dtype.numpy_dtype == "object": + # Use an empty tuple for first element, then remove, + # to disable np.array's shape inference. + return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) + return PandasArray(np.array([1, 2, 0])) + + +@pytest.fixture +def data_missing_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + if dtype.numpy_dtype == "object": + return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) + return PandasArray(np.array([1, np.nan, 0])) + + +@pytest.fixture +def data_for_grouping(allow_in_pandas, dtype): + """Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + if dtype.numpy_dtype == "object": + a, b, c = (1,), (2,), (3,) + else: + a, b, c = np.arange(3) + return PandasArray( + np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) + ) + + +@pytest.fixture +def skip_numpy_object(dtype, request): + """ + Tests for PandasArray with nested data. Users typically won't create + these objects via `pd.array`, but they can show up through `.array` + on a Series with nested data. Many of the base tests fail, as they aren't + appropriate for nested data. + + This fixture allows these tests to be skipped when used as a usefixtures + marker to either an individual test or a test class. + """ + if dtype == "object": + mark = pytest.mark.xfail(reason="Fails for object dtype") + request.node.add_marker(mark) + + +skip_nested = pytest.mark.usefixtures("skip_numpy_object") + + +class BaseNumPyTests: + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + # base class tests hard-code expected values with numpy dtypes, + # whereas we generally want the corresponding PandasDtype + if ( + isinstance(right, pd.Series) + and not isinstance(right.dtype, ExtensionDtype) + and isinstance(left.dtype, PandasDtype) + ): + right = right.astype(PandasDtype(right.dtype)) + return tm.assert_series_equal(left, right, *args, **kwargs) + + +class TestCasting(BaseNumPyTests, base.BaseCastingTests): + @skip_nested + def test_astype_str(self, data): + # ValueError: setting an array element with a sequence + super().test_astype_str(data) + + +class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): + @pytest.mark.skip(reason="We don't register our dtype") + # We don't want to register. This test should probably be split in two. + def test_from_dtype(self, data): + pass + + @skip_nested + def test_series_constructor_scalar_with_index(self, data, dtype): + # ValueError: Length of passed values is 1, index implies 3. + super().test_series_constructor_scalar_with_index(data, dtype) + + +class TestDtype(BaseNumPyTests, base.BaseDtypeTests): + def test_check_dtype(self, data, request): + if data.dtype.numpy_dtype == "object": + request.node.add_marker( + pytest.mark.xfail( + reason=f"PandasArray expectedly clashes with a " + f"NumPy name: {data.dtype.numpy_dtype}" + ) + ) + super().test_check_dtype(data) + + +class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): + @skip_nested + def test_getitem_scalar(self, data): + # AssertionError + super().test_getitem_scalar(data) + + +class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + + +class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): + @skip_nested + def test_array_interface(self, data): + # NumPy array shape inference + super().test_array_interface(data) + + +class TestMethods(BaseNumPyTests, base.BaseMethodsTests): + @skip_nested + def test_shift_fill_value(self, data): + # np.array shape inference. Shift implementation fails. + super().test_shift_fill_value(data) + + @skip_nested + def test_fillna_copy_frame(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_frame(data_missing) + + @skip_nested + def test_fillna_copy_series(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_series(data_missing) + + @skip_nested + def test_searchsorted(self, data_for_sorting, as_series): + # Test setup fails. + super().test_searchsorted(data_for_sorting, as_series) + + @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") + def test_diff(self, data, periods): + return super().test_diff(data, periods) + + def test_insert(self, data, request): + if data.dtype.numpy_dtype == object: + mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate") + request.node.add_marker(mark) + + super().test_insert(data) + + @skip_nested + def test_insert_invalid(self, data, invalid_scalar): + # PandasArray[object] can hold anything, so skip + super().test_insert_invalid(data, invalid_scalar) + + +class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + @skip_nested + def test_divmod(self, data): + super().test_divmod(data) + + @skip_nested + def test_divmod_series_array(self, data): + ser = pd.Series(data) + self._check_divmod_op(ser, divmod, data, exc=None) + + @skip_nested + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + opname = all_arithmetic_operators + if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: + mark = pytest.mark.xfail(reason="Fails for object dtype") + request.node.add_marker(mark) + super().test_arith_series_with_array(data, all_arithmetic_operators) + + @skip_nested + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + +class TestPrinting(BaseNumPyTests, base.BasePrintingTests): + pass + + +class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + # avoid coercing int -> float. Just cast to the actual numpy type. + expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_boolean_reductions, skipna): + super().test_reduce_series(data, all_boolean_reductions, skipna) + + +@skip_nested +class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): + pass + + +class TestMissing(BaseNumPyTests, base.BaseMissingTests): + @skip_nested + def test_fillna_series(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_series(data_missing) + + @skip_nested + def test_fillna_frame(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_frame(data_missing) + + +class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): + @pytest.mark.parametrize( + "in_frame", + [ + True, + pytest.param( + False, + marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"), + ), + ], + ) + def test_concat(self, data, in_frame): + super().test_concat(data, in_frame) + + +class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): + @skip_nested + def test_setitem_invalid(self, data, invalid_scalar): + # object dtype can hold anything, so doesn't raise + super().test_setitem_invalid(data, invalid_scalar) + + @skip_nested + def test_setitem_sequence_broadcasts(self, data, box_in_series): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_sequence_broadcasts(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_mask_broadcast(data, setter) + + @skip_nested + def test_setitem_scalar_key_sequence_raise(self, data): + # Failed: DID NOT RAISE + super().test_setitem_scalar_key_sequence_raise(data) + + # TODO: there is some issue with PandasArray, therefore, + # skip the setitem test for now, and fix it later (GH 31446) + + @skip_nested + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + super().test_setitem_mask(data, mask, box_in_series) + + def test_setitem_mask_raises(self, data, box_in_series): + super().test_setitem_mask_raises(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + super().test_setitem_integer_array(data, idx, box_in_series) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) + + @skip_nested + def test_setitem_slice(self, data, box_in_series): + super().test_setitem_slice(data, box_in_series) + + @skip_nested + def test_setitem_loc_iloc_slice(self, data): + super().test_setitem_loc_iloc_slice(data) + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + + # because result has object dtype, the attempt to do setting inplace + # is successful, and object dtype is retained + key = full_indexer(df) + result.loc[key, "data"] = df["data"] + + # base class method has expected = df; PandasArray behaves oddly because + # we patch _typ for these tests. + if data.dtype.numpy_dtype != object: + if not isinstance(key, slice) or key != slice(None): + expected = pd.DataFrame({"data": data.to_numpy()}) + self.assert_frame_equal(result, expected) + + +@skip_nested +class TestParsing(BaseNumPyTests, base.BaseParsingTests): + pass + + +class Test2DCompat(BaseNumPyTests, base.NDArrayBacked2DTests): + pass diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py new file mode 100644 index 00000000..06b372ab --- /dev/null +++ b/pandas/tests/extension/test_period.py @@ -0,0 +1,191 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas._libs import iNaT + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +from pandas.core.arrays import PeriodArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["D", "2D"]) +def dtype(request): + return PeriodDtype(freq=request.param) + + +@pytest.fixture +def data(dtype): + return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) + + +@pytest.fixture +def data_for_twos(dtype): + return PeriodArray(np.ones(100) * 2, freq=dtype.freq) + + +@pytest.fixture +def data_for_sorting(dtype): + return PeriodArray([2018, 2019, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing(dtype): + return PeriodArray([iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return PeriodArray([2018, iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_for_grouping(dtype): + B = 2018 + NA = iNaT + A = 2017 + C = 2019 + return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq) + + +@pytest.fixture +def na_value(): + return pd.NaT + + +class BasePeriodTests: + pass + + +class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BasePeriodTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BasePeriodTests, base.BaseGetitemTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMethods(BasePeriodTests, base.BaseMethodsTests): + def test_combine_add(self, data_repeated): + # Period + Period is not defined. + pass + + +class TestInterface(BasePeriodTests, base.BaseInterfaceTests): + + pass + + +class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + if all_arithmetic_operators in self.implements: + df = pd.DataFrame({"A": data}) + self.check_opname(df, all_arithmetic_operators, data[0], exc=None) + else: + # ... but not the rest. + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # we implement substitution... + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + super()._check_divmod_op(s, op, other, exc=TypeError) + + def test_add_series_with_extension_array(self, data): + # we don't implement + for Period + s = pd.Series(data) + msg = ( + r"unsupported operand type\(s\) for \+: " + r"\'PeriodArray\' and \'PeriodArray\'" + ) + with pytest.raises(TypeError, match=msg): + s + data + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_direct_arith_with_ndframe_returns_not_implemented(self, data, box): + # Override to use __sub__ instead of __add__ + other = pd.Series(data) + if box is pd.DataFrame: + other = other.to_frame() + + result = data.__sub__(other) + assert result is NotImplemented + + +class TestCasting(BasePeriodTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): + pass + + +class TestMissing(BasePeriodTests, base.BaseMissingTests): + pass + + +class TestReshaping(BasePeriodTests, base.BaseReshapingTests): + pass + + +class TestSetitem(BasePeriodTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BasePeriodTests, base.BasePrintingTests): + pass + + +class TestParsing(BasePeriodTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + super().test_EA_types(engine, data) + + +class Test2DCompat(BasePeriodTests, base.NDArrayBacked2DTests): + pass diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py new file mode 100644 index 00000000..0a2686a2 --- /dev/null +++ b/pandas/tests/extension/test_sparse.py @@ -0,0 +1,528 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.common import is_object_dtype + +import pandas as pd +from pandas import SparseDtype +import pandas._testing as tm +from pandas.arrays import SparseArray +from pandas.tests.extension import base + + +def make_data(fill_value): + if np.isnan(fill_value): + data = np.random.uniform(size=100) + else: + data = np.random.randint(1, 100, size=100) + if data[0] == data[1]: + data[0] += 1 + + data[2::3] = fill_value + return data + + +@pytest.fixture +def dtype(): + return SparseDtype() + + +@pytest.fixture(params=[0, np.nan]) +def data(request): + """Length-100 PeriodArray for semantics test.""" + res = SparseArray(make_data(request.param), fill_value=request.param) + return res + + +@pytest.fixture +def data_for_twos(): + return SparseArray(np.ones(100) * 2) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing(request): + """Length 2 array with [NA, Valid]""" + return SparseArray([np.nan, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_repeated(request): + """Return different versions of data for count times""" + + def gen(count): + for _ in range(count): + yield SparseArray(make_data(request.param), fill_value=request.param) + + yield gen + + +@pytest.fixture(params=[0, np.nan]) +def data_for_sorting(request): + return SparseArray([2, 3, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing_for_sorting(request): + return SparseArray([2, np.nan, 1], fill_value=request.param) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + return lambda left, right: pd.isna(left) and pd.isna(right) + + +@pytest.fixture(params=[0, np.nan]) +def data_for_grouping(request): + return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_for_compare(request): + return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param) + + +class BaseSparseTests: + def _check_unsupported(self, data): + if data.dtype == SparseDtype(int, 0): + pytest.skip("Can't store nan in int array.") + + @pytest.mark.xfail(reason="SparseArray does not support setitem") + def test_ravel(self, data): + super().test_ravel(data) + + +class TestDtype(BaseSparseTests, base.BaseDtypeTests): + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type() is SparseArray + + +class TestInterface(BaseSparseTests, base.BaseInterfaceTests): + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + +class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): + pass + + +class TestReshaping(BaseSparseTests, base.BaseReshapingTests): + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + # This should be the same, aside from concat([sparse, float]) + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat( + [x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs] + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False, match="astype from Sparse" + ): + super().test_stack(data, columns) + + def test_concat_columns(self, data, na_value): + self._check_unsupported(data) + super().test_concat_columns(data, na_value) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + self._check_unsupported(data) + super().test_concat_extension_arrays_copy_false(data, na_value) + + def test_align(self, data, na_value): + self._check_unsupported(data) + super().test_align(data, na_value) + + def test_align_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_frame(data, na_value) + + def test_align_series_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_series_frame(data, na_value) + + def test_merge(self, data, na_value): + self._check_unsupported(data) + super().test_merge(data, na_value) + + @pytest.mark.xfail(reason="SparseArray does not support setitem") + def test_transpose(self, data): + super().test_transpose(data) + + +class TestGetitem(BaseSparseTests, base.BaseGetitemTests): + def test_get(self, data): + ser = pd.Series(data, index=[2 * i for i in range(len(data))]) + if np.isnan(ser.values.fill_value): + assert np.isnan(ser.get(4)) and np.isnan(ser.iloc[2]) + else: + assert ser.get(4) == ser.iloc[2] + assert ser.get(2) == ser.iloc[1] + + def test_reindex(self, data, na_value): + self._check_unsupported(data) + super().test_reindex(data, na_value) + + +# Skipping TestSetitem, since we don't implement it. + + +class TestIndex(base.BaseIndexTests): + def test_index_from_array(self, data): + msg = "will store that array directly" + with tm.assert_produces_warning(FutureWarning, match=msg): + idx = pd.Index(data) + + if data.dtype.subtype == "f": + assert idx.dtype == np.float64 + elif data.dtype.subtype == "i": + assert idx.dtype == np.int64 + else: + assert idx.dtype == data.dtype.subtype + + # TODO(2.0): should pass once SparseArray is stored directly in Index. + @pytest.mark.xfail(reason="Index cannot yet store sparse dtype") + def test_index_from_listlike_with_dtype(self, data): + msg = "passing a SparseArray to pd.Index" + with tm.assert_produces_warning(FutureWarning, match=msg): + super().test_index_from_listlike_with_dtype(data) + + +class TestMissing(BaseSparseTests, base.BaseMissingTests): + def test_isna(self, data_missing): + sarr = SparseArray(data_missing) + expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) + expected = SparseArray([True, False], dtype=expected_dtype) + result = sarr.isna() + tm.assert_sp_array_equal(result, expected) + + # test isna for arr without na + sarr = sarr.fillna(0) + expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) + expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype) + self.assert_equal(sarr.isna(), expected) + + def test_fillna_limit_pad(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_no_op_returns_copy(self, data, request): + if np.isnan(data.fill_value): + request.node.add_marker( + pytest.mark.xfail(reason="returns array with different fill value") + ) + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + super().test_fillna_no_op_returns_copy(data) + + def test_fillna_series_method(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + super().test_fillna_limit_backfill(data_missing) + + @pytest.mark.xfail(reason="Unsupported") + def test_fillna_series(self): + # this one looks doable. + super(self).test_fillna_series() + + def test_fillna_frame(self, data_missing): + # Have to override to specify that fill_value will change. + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + if pd.isna(data_missing.fill_value): + dtype = SparseDtype(data_missing.dtype, fill_value) + else: + dtype = data_missing.dtype + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + +class TestMethods(BaseSparseTests, base.BaseMethodsTests): + def test_combine_le(self, data_repeated): + # We return a Series[SparseArray].__le__ returns a + # Series[Sparse[bool]] + # rather than Series[bool] + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + fill_value=False, + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray([a <= val for a in list(orig_data1)], fill_value=False) + ) + self.assert_series_equal(result, expected) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}, copy=False) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + if hasattr(df._mgr, "blocks"): + assert df.values.base is not result.values.base + assert df.A._values.to_dense() is arr.to_dense() + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values.to_dense() is arr.to_dense() + + @pytest.mark.xfail(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + super().test_fillna_length_mismatch(data_missing) + + def test_where_series(self, data, na_value): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + + cond = np.array([True, True, False, False]) + result = ser.where(cond) + + new_dtype = SparseDtype("float", 0.0) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype) + ) + self.assert_series_equal(result, expected) + + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + cond = np.array([True, False, True, True]) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data, request): + if data.dtype.subtype == "int": + # Right now this is upcasted to float, just like combine_first + # for Series[int] + mark = pytest.mark.xfail( + reason="TODO(SparseArray.__setitem__) will preserve dtype." + ) + request.node.add_marker(mark) + super().test_combine_first(data) + + def test_searchsorted(self, data_for_sorting, as_series): + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + super().test_searchsorted(data_for_sorting, as_series) + + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + + data._sparse_values[0] = data._sparse_values[1] + assert result._sparse_values[0] != result._sparse_values[1] + + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_argmin_argmax_all_na(self, method, data, na_value): + # overriding because Sparse[int64, 0] cannot handle na_value + self._check_unsupported(data) + super().test_argmin_argmax_all_na(method, data, na_value) + + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): + self._check_unsupported(data) + super().test_equals(data, na_value, as_series, box) + + +class TestCasting(BaseSparseTests, base.BaseCastingTests): + def test_astype_object_series(self, all_data): + # Unlike the base class, we do not expect the resulting Block + # to be ObjectBlock / resulting array to be np.dtype("object") + ser = pd.Series(all_data, name="A") + with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): + result = ser.astype(object) + assert is_object_dtype(result.dtype) + assert is_object_dtype(result._mgr.array.dtype) + + def test_astype_object_frame(self, all_data): + # Unlike the base class, we do not expect the resulting Block + # to be ObjectBlock / resulting array to be np.dtype("object") + df = pd.DataFrame({"A": all_data}) + + with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): + result = df.astype(object) + assert is_object_dtype(result._mgr.arrays[0].dtype) + + # check that we can compare the dtypes + comp = result.dtypes == df.dtypes + assert not comp.any() + + def test_astype_str(self, data): + with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): + result = pd.Series(data[:5]).astype(str) + expected_dtype = SparseDtype(str, str(data.fill_value)) + expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) + self.assert_series_equal(result, expected) + + @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype") + def test_astype_string(self, data): + super().test_astype_string(data) + + +class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests): + series_scalar_exc = None + frame_scalar_exc = None + divmod_exc = None + series_array_exc = None + + def _skip_if_different_combine(self, data): + if data.fill_value == 0: + # arith ops call on dtype.fill_value so that the sparsity + # is maintained. Combine can't be called on a dtype in + # general, so we can't make the expected. This is tested elsewhere + pytest.skip("Incorrected expected from Series.combine and tested elsewhere") + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_array(data, all_arithmetic_operators) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): + if data.dtype.fill_value != 0: + pass + elif all_arithmetic_operators.strip("_") not in [ + "mul", + "rmul", + "floordiv", + "rfloordiv", + "pow", + "mod", + "rmod", + ]: + mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch") + request.node.add_marker(mark) + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + + def _check_divmod_op(self, ser, op, other, exc=NotImplementedError): + # We implement divmod + super()._check_divmod_op(ser, op, other, exc=None) + + +class TestComparisonOps(BaseSparseTests): + def _compare_other(self, data_for_compare: SparseArray, comparison_op, other): + op = comparison_op + + result = op(data_for_compare, other) + assert isinstance(result, SparseArray) + assert result.dtype.subtype == np.bool_ + + if isinstance(other, SparseArray): + fill_value = op(data_for_compare.fill_value, other.fill_value) + else: + fill_value = np.all( + op(np.asarray(data_for_compare.fill_value), np.asarray(other)) + ) + + expected = SparseArray( + op(data_for_compare.to_dense(), np.asarray(other)), + fill_value=fill_value, + dtype=np.bool_, + ) + tm.assert_sp_array_equal(result, expected) + + def test_scalar(self, data_for_compare: SparseArray, comparison_op): + self._compare_other(data_for_compare, comparison_op, 0) + self._compare_other(data_for_compare, comparison_op, 1) + self._compare_other(data_for_compare, comparison_op, -1) + self._compare_other(data_for_compare, comparison_op, np.nan) + + @pytest.mark.xfail(reason="Wrong indices") + def test_array(self, data_for_compare: SparseArray, comparison_op): + arr = np.linspace(-4, 5, 10) + self._compare_other(data_for_compare, comparison_op, arr) + + @pytest.mark.xfail(reason="Wrong indices") + def test_sparse_array(self, data_for_compare: SparseArray, comparison_op): + arr = data_for_compare + 1 + self._compare_other(data_for_compare, comparison_op, arr) + arr = data_for_compare * 2 + self._compare_other(data_for_compare, comparison_op, arr) + + +class TestPrinting(BaseSparseTests, base.BasePrintingTests): + @pytest.mark.xfail(reason="Different repr") + def test_array_repr(self, data, size): + super().test_array_repr(data, size) + + +class TestParsing(BaseSparseTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + with tm.assert_produces_warning(FutureWarning, match="astype from"): + super().test_EA_types(engine, data) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py new file mode 100644 index 00000000..73a2e017 --- /dev/null +++ b/pandas/tests/extension/test_string.py @@ -0,0 +1,393 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import string + +import numpy as np +import pytest + +from pandas.compat import ( + pa_version_under6p0, + pa_version_under7p0, +) +from pandas.errors import PerformanceWarning + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ArrowStringArray +from pandas.core.arrays.string_ import StringDtype +from pandas.tests.extension import base + + +def split_array(arr): + if arr.dtype.storage != "pyarrow": + pytest.skip("only applicable for pyarrow chunked array n/a") + + def _split_array(arr): + import pyarrow as pa + + arrow_array = arr._data + split = len(arrow_array) // 2 + arrow_array = pa.chunked_array( + [*arrow_array[:split].chunks, *arrow_array[split:].chunks] + ) + assert arrow_array.num_chunks == 2 + return type(arr)(arrow_array) + + return _split_array(arr) + + +@pytest.fixture(params=[True, False]) +def chunked(request): + return request.param + + +@pytest.fixture +def dtype(string_storage): + return StringDtype(storage=string_storage) + + +@pytest.fixture +def data(dtype, chunked): + strings = np.random.choice(list(string.ascii_letters), size=100) + while strings[0] == strings[1]: + strings = np.random.choice(list(string.ascii_letters), size=100) + + arr = dtype.construct_array_type()._from_sequence(strings) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_missing(dtype, chunked): + """Length 2 array with [NA, Valid]""" + arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_for_sorting(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def data_missing_for_sorting(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"]) + return split_array(arr) if chunked else arr + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype, chunked): + arr = dtype.construct_array_type()._from_sequence( + ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"] + ) + return split_array(arr) if chunked else arr + + +class TestDtype(base.BaseDtypeTests): + def test_eq_with_str(self, dtype): + assert dtype == f"string[{dtype.storage}]" + super().test_eq_with_str(dtype) + + +class TestInterface(base.BaseInterfaceTests): + def test_view(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_view(data) + + +class TestConstructors(base.BaseConstructorsTests): + def test_from_dtype(self, data): + # base test uses string representation of dtype + pass + + +class TestReshaping(base.BaseReshapingTests): + def test_transpose(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_transpose(data) + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + def test_setitem_preserves_views(self, data, request): + if data.dtype.storage == "pyarrow": + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + super().test_setitem_preserves_views(data) + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestMissing(base.BaseMissingTests): + def test_dropna_array(self, data_missing): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under6p0 and data_missing.dtype.storage == "pyarrow", + ): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + + +class TestNoReduce(base.BaseNoReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + + if op_name in ["min", "max"]: + return None + + ser = pd.Series(data) + with pytest.raises(TypeError): + getattr(ser, op_name)(skipna=skipna) + + +class TestMethods(base.BaseMethodsTests): + def test_argsort(self, data_for_sorting): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_argsort(data_for_sorting) + + def test_argsort_missing(self, data_missing_for_sorting): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_argsort_missing(data_missing_for_sorting) + + def test_argmin_argmax( + self, data_for_sorting, data_missing_for_sorting, na_value, request + ): + if pa_version_under6p0 and data_missing_for_sorting.dtype.storage == "pyarrow": + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason="min_max not supported in pyarrow", + ) + ) + super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting, na_value) + + @pytest.mark.parametrize( + "op_name, skipna, expected", + [ + ("idxmax", True, 0), + ("idxmin", True, 2), + ("argmax", True, 0), + ("argmin", True, 2), + ("idxmax", False, np.nan), + ("idxmin", False, np.nan), + ("argmax", False, -1), + ("argmin", False, -1), + ], + ) + def test_argreduce_series( + self, data_missing_for_sorting, op_name, skipna, expected, request + ): + if ( + pa_version_under6p0 + and data_missing_for_sorting.dtype.storage == "pyarrow" + and skipna + ): + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason="min_max not supported in pyarrow", + ) + ) + super().test_argreduce_series( + data_missing_for_sorting, op_name, skipna, expected + ) + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna, request): + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(all_data.dtype, "storage", "") == "pyarrow" + and not (dropna and "data_missing" in request.node.nodeid), + ): + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(other.dtype, "storage", "") == "pyarrow" + and not (dropna and "data_missing" in request.node.nodeid), + ): + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") + def test_value_counts_with_normalize(self, data): + super().test_value_counts_with_normalize(data) + + def test_argsort_missing_array(self, data_missing_for_sorting): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_argsort_missing(data_missing_for_sorting) + + @pytest.mark.parametrize( + "na_position, expected", + [ + ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), + ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), + ], + ) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + # GH 25439 + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_nargsort(data_missing_for_sorting, na_position, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending, sort_by_key): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_sort_values(data_for_sorting, ascending, sort_by_key) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_sort_values_missing( + data_missing_for_sorting, ascending, sort_by_key + ) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_sort_values_frame(data_for_sorting, ascending) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, ser, data, op, other): + op_name = f"__{op.__name__}__" + result = getattr(ser, op_name)(other) + expected = getattr(ser.astype(object), op_name)(other).astype("boolean") + self.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, comparison_op): + ser = pd.Series(data) + self._compare_other(ser, data, comparison_op, "abc") + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestGroupBy(base.BaseGroupbyTests): + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", + ): + result = df.groupby("B", as_index=as_index).A.mean() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", + ): + _, uniques = pd.factorize(data_for_grouping, sort=True) + + if as_index: + index = pd.Index._with_infer(uniques, name="B") + expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") + self.assert_series_equal(result, expected) + else: + expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) + self.assert_frame_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_groupby_extension_transform(data_for_grouping) + + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + + +class Test2DCompat(base.Dim2CompatTests): + @pytest.fixture(autouse=True) + def arrow_not_supported(self, data, request): + if isinstance(data, ArrowStringArray): + mark = pytest.mark.xfail( + reason="2D support not implemented for ArrowStringArray" + ) + request.node.add_marker(mark) diff --git a/pandas/tests/frame/__init__.py b/pandas/tests/frame/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py new file mode 100644 index 00000000..a1603ea3 --- /dev/null +++ b/pandas/tests/frame/common.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from pandas import ( + DataFrame, + concat, +) + + +def _check_mixed_float(df, dtype=None): + # float16 are most likely to be upcasted to float32 + dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"} + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] + + +def _check_mixed_int(df, dtype=None): + dtypes = {"A": "int32", "B": "uint64", "C": "uint8", "D": "int64"} + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] + + +def zip_frames(frames: list[DataFrame], axis: int = 1) -> DataFrame: + """ + take a list of frames, zip them together under the + assumption that these all have the first frames' index/columns. + + Returns + ------- + new_frame : DataFrame + """ + if axis == 1: + columns = frames[0].columns + zipped = [f.loc[:, c] for c in columns for f in frames] + return concat(zipped, axis=1) + else: + index = frames[0].index + zipped = [f.loc[i, :] for i in index for f in frames] + return DataFrame(zipped) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py new file mode 100644 index 00000000..8dbed84b --- /dev/null +++ b/pandas/tests/frame/conftest.py @@ -0,0 +1,284 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 + DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 + neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 + 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 + 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 + soujjZ0A08 NaN NaN NaN NaN + 7W6NLGsjB9 NaN NaN NaN NaN + ... ... ... ... ... + uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 + n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 + ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 + uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 + 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 + 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 + sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + # set some NAs + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan + return df + + +@pytest.fixture +def bool_frame_with_na(): + """ + Fixture for DataFrame of booleans with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + zBZxY2IDGd False False False False + IhBWBMWllt False True True True + ctjdvZSR6R True False True True + AVTujptmxb False True False True + G9lrImrSWq False False False True + sFFwdIUfz2 NaN NaN NaN NaN + s15ptEJnRb NaN NaN NaN NaN + ... ... ... ... ... + UW41KkDyZ4 True True False False + l9l6XkOdqV True False False False + X2MeZfzDYA False True False False + xWkIKU7vfX False True False True + QOhL6VmpGU False False False True + 22PwkRJdat False True False False + kfboQ3VeIK True False True False + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) > 0 + df = df.astype(object) + # set some NAs + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan + + # For `any` tests we need to have at least one True before the first NaN + # in each column + for i in range(4): + df.iloc[i, i] = True + return df + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + + A B C D foo + w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar + PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar + ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar + 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar + khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar + LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar + HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar + ... ... ... ... ... ... + 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar + h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar + mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar + oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar + 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar + jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar + lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar + + [30 rows x 5 columns] + """ + df = DataFrame(tm.getSeriesData()) + df["foo"] = "bar" + return df + + +@pytest.fixture +def mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 + KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 + VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 + kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 + CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 + 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 + tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 + ... ... ... ... ... + 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 + 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 + B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 + hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 + 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 + 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 + xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + df.A = df.A.astype("float32") + df.B = df.B.astype("float32") + df.C = df.C.astype("float16") + df.D = df.D.astype("float64") + return df + + +@pytest.fixture +def mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + mUrCZ67juP 0 1 2 2 + rw99ACYaKS 0 1 0 0 + 7QsEcpaaVU 0 1 1 1 + xkrimI2pcE 0 1 0 0 + dz01SuzoS8 0 1 255 255 + ccQkqOHX75 -1 1 0 0 + DN0iXaoDLd 0 1 0 0 + ... .. .. ... ... + Dfb141wAaQ 1 1 254 254 + IPD8eQOVu5 0 1 0 0 + CcaKulsCmv 0 1 0 0 + rIBa8gu7E5 0 1 0 0 + RP6peZmh5o 0 1 1 1 + NMb9pipQWQ 0 1 0 0 + PqgbJEzjib 0 1 3 3 + + [30 rows x 4 columns] + """ + df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) + df.A = df.A.astype("int32") + df.B = np.ones(len(df.B), dtype="uint64") + df.C = df.C.astype("uint8") + df.D = df.C.astype("int64") + return df + + +@pytest.fixture +def timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + + A B C + 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00 + 1 2013-01-02 NaT NaT + 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00 + """ + df = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": date_range("20130101", periods=3, tz="US/Eastern"), + "C": date_range("20130101", periods=3, tz="CET"), + } + ) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +@pytest.fixture +def uint64_frame(): + """ + Fixture for DataFrame with uint64 values + + Columns are ['A', 'B'] + """ + return DataFrame( + {"A": np.arange(3), "B": [2**63, 2**63 + 5, 2**63 + 10]}, dtype=np.uint64 + ) + + +@pytest.fixture +def simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + + one two three + a 1.0 2.0 3.0 + b 4.0 5.0 6.0 + c 7.0 8.0 9.0 + """ + arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) + + return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"]) + + +@pytest.fixture +def frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + + A B C D E (tuple, as, label) + 0 foo one a 0.608477 -0.012500 -1.664297 + 1 foo two b -0.633460 0.249614 -0.364411 + 2 foo three c 0.615256 2.154968 -0.834666 + 3 bar one d 0.234246 1.085675 0.718445 + 4 bar two e 0.533841 -0.005702 -3.533912 + """ + df = DataFrame( + { + "A": ["foo", "foo", "foo", "bar", "bar"], + "B": ["one", "two", "three", "one", "two"], + "C": ["a", "b", "c", "d", "e"], + "D": np.random.randn(5), + "E": np.random.randn(5), + ("tuple", "as", "label"): np.random.randn(5), + } + ) + return df + + +@pytest.fixture( + params=[ + "any", + "all", + "count", + "sum", + "prod", + "max", + "min", + "mean", + "median", + "skew", + "kurt", + "sem", + "var", + "std", + "mad", + ] +) +def reduction_functions(request): + return request.param diff --git a/pandas/tests/frame/constructors/__init__.py b/pandas/tests/frame/constructors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py new file mode 100644 index 00000000..7c2b0096 --- /dev/null +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -0,0 +1,199 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.construction import create_series_with_explicit_dtype + + +class TestFromDict: + # Note: these tests are specific to the from_dict method, not for + # passing dictionaries to DataFrame.__init__ + + def test_constructor_list_of_odicts(self): + data = [ + OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), + OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), + OrderedDict([["a", 1.5], ["d", 6]]), + OrderedDict(), + OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), + OrderedDict([["b", 3], ["c", 4], ["d", 6]]), + ] + + result = DataFrame(data) + expected = DataFrame.from_dict( + dict(zip(range(len(data)), data)), orient="index" + ) + tm.assert_frame_equal(result, expected.reindex(result.index)) + + def test_constructor_single_row(self): + data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])] + + result = DataFrame(data) + expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex( + result.index + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_series(self): + data = [ + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), + ] + sdict = OrderedDict(zip(["x", "y"], data)) + idx = Index(["a", "b", "c"]) + + # all named + data2 = [ + Series([1.5, 3, 4], idx, dtype="O", name="x"), + Series([1.5, 3, 6], idx, name="y"), + ] + result = DataFrame(data2) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + # some unnamed + data2 = [ + Series([1.5, 3, 4], idx, dtype="O", name="x"), + Series([1.5, 3, 6], idx), + ] + result = DataFrame(data2) + + sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + # none named + data = [ + OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), + OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), + OrderedDict([["a", 1.5], ["d", 6]]), + OrderedDict(), + OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), + OrderedDict([["b", 3], ["c", 4], ["d", 6]]), + ] + data = [ + create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data + ] + + result = DataFrame(data) + sdict = OrderedDict(zip(range(len(data)), data)) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected.reindex(result.index)) + + result2 = DataFrame(data, index=np.arange(6)) + tm.assert_frame_equal(result, result2) + + result = DataFrame([Series(dtype=object)]) + expected = DataFrame(index=[0]) + tm.assert_frame_equal(result, expected) + + data = [ + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), + OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), + ] + sdict = OrderedDict(zip(range(len(data)), data)) + + idx = Index(["a", "b", "c"]) + data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)] + result = DataFrame(data2) + expected = DataFrame.from_dict(sdict, orient="index") + tm.assert_frame_equal(result, expected) + + def test_constructor_orient(self, float_string_frame): + data_dict = float_string_frame.T._series + recons = DataFrame.from_dict(data_dict, orient="index") + expected = float_string_frame.reindex(index=recons.index) + tm.assert_frame_equal(recons, expected) + + # dict of sequence + a = {"hi": [32, 3, 3], "there": [3, 5, 3]} + rs = DataFrame.from_dict(a, orient="index") + xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) + tm.assert_frame_equal(rs, xp) + + def test_constructor_from_ordered_dict(self): + # GH#8425 + a = OrderedDict( + [ + ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), + ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), + ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), + ] + ) + expected = DataFrame.from_dict(a, orient="columns").T + result = DataFrame.from_dict(a, orient="index") + tm.assert_frame_equal(result, expected) + + def test_from_dict_columns_parameter(self): + # GH#18529 + # Test new columns parameter for from_dict that was added to make + # from_items(..., orient='index', columns=[...]) easier to replicate + result = DataFrame.from_dict( + OrderedDict([("A", [1, 2]), ("B", [4, 5])]), + orient="index", + columns=["one", "two"], + ) + expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"]) + tm.assert_frame_equal(result, expected) + + msg = "cannot use columns parameter with orient='columns'" + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict( + {"A": [1, 2], "B": [4, 5]}, + orient="columns", + columns=["one", "two"], + ) + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"]) + + @pytest.mark.parametrize( + "data_dict, keys, orient", + [ + ({}, [], "index"), + ([{("a",): 1}, {("a",): 2}], [("a",)], "columns"), + ([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"), + ([{("a", "b"): 1}], [("a", "b")], "columns"), + ], + ) + def test_constructor_from_dict_tuples(self, data_dict, keys, orient): + # GH#16769 + df = DataFrame.from_dict(data_dict, orient) + + result = df.columns + expected = Index(keys, dtype="object", tupleize_cols=False) + + tm.assert_index_equal(result, expected) + + def test_frame_dict_constructor_empty_series(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) + + def test_from_dict_scalars_requires_index(self): + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)])) + + def test_from_dict_orient_invalid(self): + msg = ( + "Expected 'index', 'columns' or 'tight' for orient parameter. " + "Got 'abc' instead" + ) + with pytest.raises(ValueError, match=msg): + DataFrame.from_dict({"foo": 1, "baz": 3, "bar": 2}, orient="abc") diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py new file mode 100644 index 00000000..c6d54e28 --- /dev/null +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -0,0 +1,466 @@ +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest +import pytz + +from pandas.compat import is_platform_little_endian + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + Interval, + RangeIndex, + Series, +) +import pandas._testing as tm + + +class TestFromRecords: + def test_from_records_with_datetimes(self): + + # this may fail on certain platforms because of a numpy issue + # related GH#6140 + if not is_platform_little_endian(): + pytest.skip("known failure of test on non-little endian") + + # construction with a null in a recarray + # GH#6140 + expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]}) + + arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] + dtypes = [("EXPIRY", " None: + self.args = args + + def __getitem__(self, i): + return self.args[i] + + def __iter__(self): + return iter(self.args) + + recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)] + tups = [tuple(rec) for rec in recs] + + result = DataFrame.from_records(recs) + expected = DataFrame.from_records(tups) + tm.assert_frame_equal(result, expected) + + def test_from_records_len0_with_columns(self): + # GH#2633 + result = DataFrame.from_records([], index="foo", columns=["foo", "bar"]) + expected = Index(["bar"]) + + assert len(result) == 0 + assert result.index.name == "foo" + tm.assert_index_equal(result.columns, expected) + + def test_from_records_series_list_dict(self): + # GH#27358 + expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T + data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]]) + result = DataFrame.from_records(data) + tm.assert_frame_equal(result, expected) + + def test_from_records_series_categorical_index(self): + # GH#32805 + index = CategoricalIndex( + [Interval(-20, -10), Interval(-10, 0), Interval(0, 10)] + ) + series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index) + frame = DataFrame.from_records(series_of_dicts, index=index) + expected = DataFrame( + {"a": [1, 2, np.NaN], "b": [np.NaN, np.NaN, 3]}, index=index + ) + tm.assert_frame_equal(frame, expected) + + def test_frame_from_records_utc(self): + rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)} + + # it works + DataFrame.from_records([rec], index="begin_time") + + def test_from_records_to_records(self): + # from numpy documentation + arr = np.zeros((2,), dtype=("i4,f4,a10")) + arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] + + # TODO(wesm): unused + frame = DataFrame.from_records(arr) # noqa + + index = Index(np.arange(len(arr))[::-1]) + indexed_frame = DataFrame.from_records(arr, index=index) + tm.assert_index_equal(indexed_frame.index, index) + + # without names, it should go to last ditch + arr2 = np.zeros((2, 3)) + tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) + + # wrong length + msg = "|".join( + [ + r"Length of values \(2\) does not match length of index \(1\)", + ] + ) + with pytest.raises(ValueError, match=msg): + DataFrame.from_records(arr, index=index[:-1]) + + indexed_frame = DataFrame.from_records(arr, index="f1") + + # what to do? + records = indexed_frame.to_records() + assert len(records.dtype.names) == 3 + + records = indexed_frame.to_records(index=False) + assert len(records.dtype.names) == 2 + assert "index" not in records.dtype.names + + def test_from_records_nones(self): + tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)] + + df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"]) + assert np.isnan(df["c"][0]) + + def test_from_records_iterator(self): + arr = np.array( + [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)], + dtype=[ + ("x", np.float64), + ("u", np.float32), + ("y", np.int64), + ("z", np.int32), + ], + ) + df = DataFrame.from_records(iter(arr), nrows=2) + xp = DataFrame( + { + "x": np.array([1.0, 3.0], dtype=np.float64), + "u": np.array([1.0, 3.0], dtype=np.float32), + "y": np.array([2, 4], dtype=np.int64), + "z": np.array([2, 4], dtype=np.int32), + } + ) + tm.assert_frame_equal(df.reindex_like(xp), xp) + + # no dtypes specified here, so just compare with the default + arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)] + df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2) + tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False) + + def test_from_records_tuples_generator(self): + def tuple_generator(length): + for i in range(length): + letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + yield (i, letters[i % len(letters)], i / length) + + columns_names = ["Integer", "String", "Float"] + columns = [ + [i[j] for i in tuple_generator(10)] for j in range(len(columns_names)) + ] + data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = tuple_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + tm.assert_frame_equal(result, expected) + + def test_from_records_lists_generator(self): + def list_generator(length): + for i in range(length): + letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + yield [i, letters[i % len(letters)], i / length] + + columns_names = ["Integer", "String", "Float"] + columns = [ + [i[j] for i in list_generator(10)] for j in range(len(columns_names)) + ] + data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = list_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + tm.assert_frame_equal(result, expected) + + def test_from_records_columns_not_modified(self): + tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)] + + columns = ["a", "b", "c"] + original_columns = list(columns) + + df = DataFrame.from_records(tuples, columns=columns, index="a") # noqa + + assert columns == original_columns + + def test_from_records_decimal(self): + + tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)] + + df = DataFrame.from_records(tuples, columns=["a"]) + assert df["a"].dtype == object + + df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True) + assert df["a"].dtype == np.float64 + assert np.isnan(df["a"].values[-1]) + + def test_from_records_duplicates(self): + result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) + + expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) + + tm.assert_frame_equal(result, expected) + + def test_from_records_set_index_name(self): + def create_dict(order_id): + return { + "order_id": order_id, + "quantity": np.random.randint(1, 10), + "price": np.random.randint(1, 10), + } + + documents = [create_dict(i) for i in range(10)] + # demo missing data + documents.append({"order_id": 10, "quantity": 5}) + + result = DataFrame.from_records(documents, index="order_id") + assert result.index.name == "order_id" + + # MultiIndex + result = DataFrame.from_records(documents, index=["order_id", "quantity"]) + assert result.index.names == ("order_id", "quantity") + + def test_from_records_misc_brokenness(self): + # GH#2179 + + data = {1: ["foo"], 2: ["bar"]} + + result = DataFrame.from_records(data, columns=["a", "b"]) + exp = DataFrame(data, columns=["a", "b"]) + tm.assert_frame_equal(result, exp) + + # overlap in index/index_names + + data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + result = DataFrame.from_records(data, index=["a", "b", "c"]) + exp = DataFrame(data, index=["a", "b", "c"]) + tm.assert_frame_equal(result, exp) + + # GH#2623 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + result = df2_obj.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"] + ) + tm.assert_series_equal(result, expected) + + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 1]) + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + result = df2_obj.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"] + ) + tm.assert_series_equal(result, expected) + + def test_from_records_empty(self): + # GH#3562 + result = DataFrame.from_records([], columns=["a", "b", "c"]) + expected = DataFrame(columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) + + result = DataFrame.from_records([], columns=["a", "b", "b"]) + expected = DataFrame(columns=["a", "b", "b"]) + tm.assert_frame_equal(result, expected) + + def test_from_records_empty_with_nonempty_fields_gh3682(self): + a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)]) + df = DataFrame.from_records(a, index="id") + + ex_index = Index([1], name="id") + expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"]) + tm.assert_frame_equal(df, expected) + + b = a[:0] + df2 = DataFrame.from_records(b, index="id") + tm.assert_frame_equal(df2, df.iloc[:0]) + + def test_from_records_empty2(self): + # GH#42456 + dtype = [("prop", int)] + shape = (0, len(dtype)) + arr = np.empty(shape, dtype=dtype) + + result = DataFrame.from_records(arr) + expected = DataFrame({"prop": np.array([], dtype=int)}) + tm.assert_frame_equal(result, expected) + + alt = DataFrame(arr) + tm.assert_frame_equal(alt, expected) diff --git a/pandas/tests/frame/indexing/__init__.py b/pandas/tests/frame/indexing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py new file mode 100644 index 00000000..c9705581 --- /dev/null +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -0,0 +1,188 @@ +""" +Tests for values coercion in setitem-like operations on DataFrame. + +For the most part, these should be multi-column DataFrames, otherwise +we would share the tests with Series. +""" +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameSetitemCoercion: + @pytest.mark.xfail(reason="Unnecessary cast.") + @pytest.mark.parametrize("consolidate", [True, False]) + def test_loc_setitem_multiindex_columns(self, consolidate): + # GH#18415 Setting values in a single column preserves dtype, + # while setting them in multiple columns did unwanted cast. + + # Note that A here has 2 blocks, below we do the same thing + # with a consolidated frame. + A = DataFrame(np.zeros((6, 5), dtype=np.float32)) + A = pd.concat([A, A], axis=1, keys=[1, 2]) + if consolidate: + A = A._consolidate() + + A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32) + assert (A.dtypes == np.float32).all() + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32) + + assert (A.dtypes == np.float32).all() + + A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32) + assert (A.dtypes == np.float32).all() + + # TODO: i think this isn't about MultiIndex and could be done with iloc? + + +def test_37477(): + # fixed by GH#45121 + orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]}) + expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]}) + + df = orig.copy() + df.at[1, "B"] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.loc[1, "B"] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.iat[1, 1] = 1.2 + tm.assert_frame_equal(df, expected) + + df = orig.copy() + df.iloc[1, 1] = 1.2 + tm.assert_frame_equal(df, expected) + + +def test_6942(indexer_al): + # check that the .at __setitem__ after setting "Live" actually sets the data + start = Timestamp("2014-04-01") + t1 = Timestamp("2014-04-23 12:42:38.883082") + t2 = Timestamp("2014-04-24 01:33:30.040039") + + dti = date_range(start, periods=1) + orig = DataFrame(index=dti, columns=["timenow", "Live"]) + + df = orig.copy() + indexer_al(df)[start, "timenow"] = t1 + + df["Live"] = True + + df.at[start, "timenow"] = t2 + assert df.iloc[0, 0] == t2 + + +def test_26395(indexer_al): + # .at case fixed by GH#45121 (best guess) + df = DataFrame(index=["A", "B", "C"]) + df["D"] = 0 + + indexer_al(df)["C", "D"] = 2 + expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + indexer_al(df)["C", "D"] = 44.5 + expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64) + tm.assert_frame_equal(df, expected) + + indexer_al(df)["C", "D"] = "hello" + expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.xfail(reason="unwanted upcast") +def test_15231(): + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + df.loc[2] = Series({"a": 5, "b": 6}) + assert (df.dtypes == np.int64).all() + + df.loc[3] = Series({"a": 7}) + + # df["a"] doesn't have any NaNs, should not have been cast + exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"]) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + +@pytest.mark.xfail(reason="Unnecessarily upcasts to float64") +def test_iloc_setitem_unnecesssary_float_upcasting(): + # GH#12255 + df = DataFrame( + { + 0: np.array([1, 3], dtype=np.float32), + 1: np.array([2, 4], dtype=np.float32), + 2: ["a", "b"], + } + ) + orig = df.copy() + + values = df[0].values.reshape(2, 1) + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0:1] = values + + tm.assert_frame_equal(df, orig) + + +@pytest.mark.xfail(reason="unwanted casting to dt64") +def test_12499(): + # TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT, + # which has consequences for the expected df["two"] (though i think at + # the time it might not have because of a separate bug). See if it makes + # a difference which one we use here. + ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC") + + data = [{"one": 0, "two": ts}] + orig = DataFrame(data) + df = orig.copy() + df.loc[1] = [np.nan, NaT] + + expected = DataFrame( + {"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")} + ) + tm.assert_frame_equal(df, expected) + + data = [{"one": 0, "two": ts}] + df = orig.copy() + df.loc[1, :] = [np.nan, NaT] + tm.assert_frame_equal(df, expected) + + +def test_20476(): + mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) + df = DataFrame(-1, index=range(3), columns=mi) + filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"]) + df["A"] = filler + + expected = DataFrame( + { + 0: [1, 1, 1], + 1: [2, 2, 2], + 2: [3.0, 3.0, 3.0], + 3: [-1, -1, -1], + 4: [-1, -1, -1], + 5: [-1, -1, -1], + } + ) + expected.columns = mi + exp_dtypes = Series( + [np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3, + index=mi, + ) + tm.assert_series_equal(df.dtypes, exp_dtypes) diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py new file mode 100644 index 00000000..fa10c9ef --- /dev/null +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -0,0 +1,60 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, +) + + +class TestDataFrameDelItem: + def test_delitem(self, float_frame): + del float_frame["A"] + assert "A" not in float_frame + + def test_delitem_multiindex(self): + midx = MultiIndex.from_product([["A", "B"], [1, 2]]) + df = DataFrame(np.random.randn(4, 4), columns=midx) + assert len(df.columns) == 4 + assert ("A",) in df.columns + assert "A" in df.columns + + result = df["A"] + assert isinstance(result, DataFrame) + del df["A"] + + assert len(df.columns) == 2 + + # A still in the levels, BUT get a KeyError if trying + # to delete + assert ("A",) not in df.columns + with pytest.raises(KeyError, match=re.escape("('A',)")): + del df[("A",)] + + # behavior of dropped/deleted MultiIndex levels changed from + # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' + # levels which are dropped/deleted + assert "A" not in df.columns + with pytest.raises(KeyError, match=re.escape("('A',)")): + del df["A"] + + def test_delitem_corner(self, float_frame): + f = float_frame.copy() + del f["D"] + assert len(f.columns) == 3 + with pytest.raises(KeyError, match=r"^'D'$"): + del f["D"] + del f["B"] + assert len(f.columns) == 2 + + def test_delitem_col_still_multiindex(self): + arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.random.randn(3, 4), columns=index) + del df[("a", "", "")] + assert isinstance(df.columns, MultiIndex) diff --git a/pandas/tests/frame/indexing/test_get.py b/pandas/tests/frame/indexing/test_get.py new file mode 100644 index 00000000..5f2651ee --- /dev/null +++ b/pandas/tests/frame/indexing/test_get.py @@ -0,0 +1,27 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestGet: + def test_get(self, float_frame): + b = float_frame.get("B") + tm.assert_series_equal(b, float_frame["B"]) + + assert float_frame.get("foo") is None + tm.assert_series_equal( + float_frame.get("foo", float_frame["B"]), float_frame["B"] + ) + + @pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=list("AB")), + DataFrame(columns=list("AB"), index=range(3)), + ], + ) + def test_get_none(self, df): + # see gh-5652 + assert df.get(None) is None diff --git a/pandas/tests/frame/indexing/test_get_value.py b/pandas/tests/frame/indexing/test_get_value.py new file mode 100644 index 00000000..65a1c64a --- /dev/null +++ b/pandas/tests/frame/indexing/test_get_value.py @@ -0,0 +1,22 @@ +import pytest + +from pandas import ( + DataFrame, + MultiIndex, +) + + +class TestGetValue: + def test_get_set_value_no_partial_indexing(self): + # partial w/ MultiIndex raise exception + index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) + df = DataFrame(index=index, columns=range(4)) + with pytest.raises(KeyError, match=r"^0$"): + df._get_value(0, 1) + + def test_get_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + result = float_frame._get_value(idx, col) + expected = float_frame[col][idx] + assert result == expected diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py new file mode 100644 index 00000000..f5c85bd9 --- /dev/null +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -0,0 +1,477 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + DataFrame, + DateOffset, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + get_dummies, + period_range, +) +import pandas._testing as tm +from pandas.core.arrays import SparseArray + + +class TestGetitem: + def test_getitem_unused_level_raises(self): + # GH#20410 + mi = MultiIndex( + levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]], + codes=[[1, 0], [1, 0]], + ) + df = DataFrame(-1, index=range(3), columns=mi) + + with pytest.raises(KeyError, match="notevenone"): + df["notevenone"] + + def test_getitem_periodindex(self): + rng = period_range("1/1/2000", periods=5) + df = DataFrame(np.random.randn(10, 5), columns=rng) + + ts = df[rng[0]] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + # GH#1211; smoketest unrelated to the rest of this test + repr(df) + + ts = df["1/1/2000"] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_getitem_list_of_labels_categoricalindex_cols(self): + # GH#16115 + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + expected = DataFrame( + [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats + ) + dummies = get_dummies(cats) + result = dummies[list(dummies.columns)] + tm.assert_frame_equal(result, expected) + + def test_getitem_sparse_column_return_type_and_dtype(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = SparseArray([0, 1]) + df = DataFrame({"A": data}) + expected = Series(data, name="A") + result = df["A"] + tm.assert_series_equal(result, expected) + + # Also check iloc and loc while we're here + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, "A"] + tm.assert_series_equal(result, expected) + + def test_getitem_string_columns(self): + # GH#46185 + df = DataFrame([[1, 2]], columns=Index(["A", "B"], dtype="string")) + result = df.A + expected = df["A"] + tm.assert_series_equal(result, expected) + + +class TestGetitemListLike: + def test_getitem_list_missing_key(self): + # GH#13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}) + df.columns = ["x", "x", "z"] + + # Check that we get the correct value in the KeyError + with pytest.raises(KeyError, match=r"\['y'\] not in index"): + df[["x", "y", "z"]] + + def test_getitem_list_duplicates(self): + # GH#1943 + df = DataFrame(np.random.randn(4, 4), columns=list("AABC")) + df.columns.name = "foo" + + result = df[["B", "C"]] + assert result.columns.name == "foo" + + expected = df.iloc[:, 2:] + tm.assert_frame_equal(result, expected) + + def test_getitem_dupe_cols(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\"" + with pytest.raises(KeyError, match=re.escape(msg)): + df[["baf"]] + + @pytest.mark.parametrize( + "idx_type", + [ + list, + iter, + Index, + set, + lambda l: dict(zip(l, range(len(l)))), + lambda l: dict(zip(l, range(len(l)))).keys(), + ], + ids=["list", "iter", "Index", "set", "dict", "dict_keys"], + ) + @pytest.mark.parametrize("levels", [1, 2]) + def test_getitem_listlike(self, idx_type, levels, float_frame): + # GH#21294 + + if levels == 1: + frame, missing = float_frame, "food" + else: + # MultiIndex columns + frame = DataFrame( + np.random.randn(8, 3), + columns=Index( + [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")], + name=("sth", "sth2"), + ), + ) + missing = ("good", "food") + + keys = [frame.columns[1], frame.columns[0]] + idx = idx_type(keys) + idx_check = list(idx_type(keys)) + + if isinstance(idx, (set, dict)): + with tm.assert_produces_warning(FutureWarning): + result = frame[idx] + else: + result = frame[idx] + + expected = frame.loc[:, idx_check] + expected.columns.names = frame.columns.names + + tm.assert_frame_equal(result, expected) + + idx = idx_type(keys + [missing]) + with pytest.raises(KeyError, match="not in index"): + with tm.assert_produces_warning(FutureWarning): + frame[idx] + + def test_getitem_iloc_generator(self): + # GH#39614 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + result = df.iloc[indexer] + expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + + def test_getitem_iloc_two_dimensional_generator(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + result = df.iloc[indexer, 1] + expected = Series([5, 6], name="b", index=[1, 2]) + tm.assert_series_equal(result, expected) + + def test_getitem_iloc_dateoffset_days(self): + # GH 46671 + df = DataFrame( + list(range(10)), + index=date_range("01-01-2022", periods=10, freq=DateOffset(days=1)), + ) + result = df.loc["2022-01-01":"2022-01-03"] + expected = DataFrame( + [0, 1, 2], + index=DatetimeIndex( + ["2022-01-01", "2022-01-02", "2022-01-03"], + dtype="datetime64[ns]", + freq=DateOffset(days=1), + ), + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + list(range(10)), + index=date_range( + "01-01-2022", periods=10, freq=DateOffset(days=1, hours=2) + ), + ) + result = df.loc["2022-01-01":"2022-01-03"] + expected = DataFrame( + [0, 1, 2], + index=DatetimeIndex( + ["2022-01-01 00:00:00", "2022-01-02 02:00:00", "2022-01-03 04:00:00"], + dtype="datetime64[ns]", + freq=DateOffset(days=1, hours=2), + ), + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + list(range(10)), + index=date_range("01-01-2022", periods=10, freq=DateOffset(minutes=3)), + ) + result = df.loc["2022-01-01":"2022-01-03"] + tm.assert_frame_equal(result, df) + + +class TestGetitemCallable: + def test_getitem_callable(self, float_frame): + # GH#12533 + result = float_frame[lambda x: "A"] + expected = float_frame.loc[:, "A"] + tm.assert_series_equal(result, expected) + + result = float_frame[lambda x: ["A", "B"]] + expected = float_frame.loc[:, ["A", "B"]] + tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) + + df = float_frame[:3] + result = df[lambda x: [True, False, True]] + expected = float_frame.iloc[[0, 2], :] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_columns_one_level(self): + # GH#29749 + df = DataFrame([[1, 2]], columns=[["a", "b"]]) + expected = DataFrame([1], columns=[["a"]]) + + result = df["a"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, "a"] + tm.assert_frame_equal(result, expected) + + +class TestGetitemBooleanMask: + def test_getitem_bool_mask_categorical_index(self): + + df3 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=True), + name="B", + ), + ) + df4 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=False), + name="B", + ), + ) + + result = df3[df3.index == "a"] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == "a"] + expected = df4.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index == 1] + expected = df3.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == 1] + expected = df4.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # since we have an ordered categorical + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=True, + # name='B') + result = df3[df3.index < 2] + expected = df3.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index > 1] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name='B') + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + df4[df4.index < 2] + with pytest.raises(TypeError, match=msg): + df4[df4.index > 1] + + @pytest.mark.parametrize( + "data1,data2,expected_data", + ( + ( + [[1, 2], [3, 4]], + [[0.5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], + ), + ( + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], + ), + ), + ) + def test_getitem_bool_mask_duplicate_columns_mixed_dtypes( + self, + data1, + data2, + expected_data, + ): + # GH#31954 + + df1 = DataFrame(np.array(data1)) + df2 = DataFrame(np.array(data2)) + df = concat([df1, df2], axis=1) + + result = df[df > 2] + + exdict = {i: np.array(col) for i, col in enumerate(expected_data)} + expected = DataFrame(exdict).rename(columns={2: 0, 3: 1}) + tm.assert_frame_equal(result, expected) + + @pytest.fixture + def df_dup_cols(self): + dups = ["A", "A", "C", "D"] + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + return df + + def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols): + # `df.A > 6` is a DataFrame with a different shape from df + + # boolean with the duplicate raises + df = df_dup_cols + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df[df.A > 6] + + def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): + # boolean indexing + # GH#4879 + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df.C > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df.C > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols): + + # where + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + # `df > 6` is a DataFrame with the same shape+alignment as df + expected = df[df > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_empty_frame_with_boolean(self): + # Test for issue GH#11859 + + df = DataFrame() + df2 = df[df > 0] + tm.assert_frame_equal(df, df2) + + def test_getitem_returns_view_when_column_is_unique_in_df( + self, using_copy_on_write + ): + # GH#45316 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + df_orig = df.copy() + view = df["b"] + view.loc[:] = 100 + if using_copy_on_write: + expected = df_orig + else: + expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"]) + tm.assert_frame_equal(df, expected) + + def test_getitem_frozenset_unique_in_column(self): + # GH#41062 + df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"]) + result = df[frozenset(["KEY"])] + expected = Series([1], name=frozenset(["KEY"])) + tm.assert_series_equal(result, expected) + + +class TestGetitemSlice: + def test_getitem_slice_float64(self, frame_or_series): + values = np.arange(10.0, 50.0, 2) + index = Index(values) + + start, end = values[[5, 15]] + + data = np.random.randn(20, 3) + if frame_or_series is not DataFrame: + data = data[:, 0] + + obj = frame_or_series(data, index=index) + + result = obj[start:end] + expected = obj.iloc[5:16] + tm.assert_equal(result, expected) + + result = obj.loc[start:end] + tm.assert_equal(result, expected) + + def test_getitem_datetime_slice(self): + # GH#43223 + df = DataFrame( + {"a": 0}, + index=DatetimeIndex( + [ + "11.01.2011 22:00", + "11.01.2011 23:00", + "12.01.2011 00:00", + "2011-01-13 00:00", + ] + ), + ) + with tm.assert_produces_warning(FutureWarning): + result = df["2011-01-01":"2011-11-01"] + expected = DataFrame( + {"a": 0}, + index=DatetimeIndex( + ["11.01.2011 22:00", "11.01.2011 23:00", "2011-01-13 00:00"] + ), + ) + tm.assert_frame_equal(result, expected) + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)]) + ) + with tm.assert_produces_warning(FutureWarning): + df[key] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py new file mode 100644 index 00000000..e2a99348 --- /dev/null +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -0,0 +1,1760 @@ +from collections import namedtuple +from datetime import ( + datetime, + timedelta, +) +import re + +import numpy as np +import pytest + +from pandas._libs import iNaT +from pandas.errors import ( + InvalidIndexError, + SettingWithCopyError, +) +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) +import pandas._testing as tm + +# We pass through a TypeError raised by numpy +_slice_msg = "slice indices must be integers or None or have an __index__ method" + + +class TestDataFrameIndexing: + def test_getitem(self, float_frame): + # Slicing + sl = float_frame[:20] + assert len(sl.index) == 20 + + # Column access + for _, series in sl.items(): + assert len(series.index) == 20 + assert tm.equalContents(series.index, sl.index) + + for key, _ in float_frame._series.items(): + assert float_frame[key] is not None + + assert "random" not in float_frame + with pytest.raises(KeyError, match="random"): + float_frame["random"] + + def test_getitem2(self, float_frame): + + df = float_frame.copy() + df["$10"] = np.random.randn(len(df)) + + ad = np.random.randn(len(df)) + df["@awesome_domain"] = ad + + with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): + df.__getitem__('df["$10"]') + + res = df["@awesome_domain"] + tm.assert_numpy_array_equal(ad, res.values) + + def test_setitem_list(self, float_frame): + + float_frame["E"] = "foo" + data = float_frame[["A", "B"]] + float_frame[["B", "A"]] = data + + tm.assert_series_equal(float_frame["B"], data["A"], check_names=False) + tm.assert_series_equal(float_frame["A"], data["B"], check_names=False) + + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + data[["A"]] = float_frame[["A", "B"]] + newcolumndata = range(len(data.index) - 1) + msg = ( + rf"Length of values \({len(newcolumndata)}\) " + rf"does not match length of index \({len(data)}\)" + ) + with pytest.raises(ValueError, match=msg): + data["A"] = newcolumndata + + def test_setitem_list2(self): + + df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=np.int_) + df.loc[1, ["tt1", "tt2"]] = [1, 2] + + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series([1, 2], df.columns, dtype=np.int_, name=1) + tm.assert_series_equal(result, expected) + + df["tt1"] = df["tt2"] = "0" + df.loc[df.index[1], ["tt1", "tt2"]] = ["1", "2"] + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series(["1", "2"], df.columns, name=1) + tm.assert_series_equal(result, expected) + + def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_frame): + # boolean indexing + d = datetime_frame.index[10] + indexer = datetime_frame.index > d + indexer_obj = indexer.astype(object) + + subindex = datetime_frame.index[indexer] + subframe = datetime_frame[indexer] + + tm.assert_index_equal(subindex, subframe.index) + with pytest.raises(ValueError, match="Item wrong length"): + datetime_frame[indexer[:-1]] + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + with pytest.raises(ValueError, match="Boolean array expected"): + datetime_frame[datetime_frame] + + # test that Series work + indexer_obj = Series(indexer_obj, datetime_frame.index) + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test that Series indexers reindex + # we are producing a warning that since the passed boolean + # key is not the same as the given index, we will reindex + # not sure this is really necessary + with tm.assert_produces_warning(UserWarning): + indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1]) + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test df[df > 0] + for df in [ + datetime_frame, + mixed_float_frame, + mixed_int_frame, + ]: + + data = df._get_numeric_data() + bif = df[df > 0] + bifw = DataFrame( + {c: np.where(data[c] > 0, data[c], np.nan) for c in data.columns}, + index=data.index, + columns=data.columns, + ) + + # add back other columns to compare + for c in df.columns: + if c not in bifw: + bifw[c] = df[c] + bifw = bifw.reindex(columns=df.columns) + + tm.assert_frame_equal(bif, bifw, check_dtype=False) + for c in df.columns: + if bif[c].dtype != bifw[c].dtype: + assert bif[c].dtype == df[c].dtype + + def test_getitem_boolean_casting(self, datetime_frame): + + # don't upcast if we don't need to + df = datetime_frame.copy() + df["E"] = 1 + df["E"] = df["E"].astype("int32") + df["E1"] = df["E"].copy() + df["F"] = 1 + df["F"] = df["F"].astype("int64") + df["F1"] = df["F"].copy() + + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] * 2 + + [np.dtype("int64")] * 2, + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + # int block splitting + df.loc[df.index[1:3], ["E1", "F1"]] = 0 + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] + + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("float64")], + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "lst", [[True, False, True], [True, True, True], [False, False, False]] + ) + def test_getitem_boolean_list(self, lst): + df = DataFrame(np.arange(12).reshape(3, 4)) + result = df[lst] + expected = df.loc[df.index[lst]] + tm.assert_frame_equal(result, expected) + + def test_getitem_boolean_iadd(self): + arr = np.random.randn(5, 5) + + df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) + + df[df < 0] += 1 + arr[arr < 0] += 1 + + tm.assert_almost_equal(df.values, arr) + + def test_boolean_index_empty_corner(self): + # #2096 + blah = DataFrame(np.empty([0, 1]), columns=["A"], index=DatetimeIndex([])) + + # both of these should succeed trivially + k = np.array([], bool) + + blah[k] + blah[k] = 0 + + def test_getitem_ix_mixed_integer(self): + df = DataFrame( + np.random.randn(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3] + ) + + result = df.iloc[:-1] + expected = df.loc[df.index[:-1]] + tm.assert_frame_equal(result, expected) + + result = df.loc[[1, 10]] + expected = df.loc[Index([1, 10])] + tm.assert_frame_equal(result, expected) + + def test_getitem_ix_mixed_integer2(self): + # 11320 + df = DataFrame( + { + "rna": (1.5, 2.2, 3.2, 4.5), + -1000: [11, 21, 36, 40], + 0: [10, 22, 43, 34], + 1000: [0, 10, 20, 30], + }, + columns=["rna", -1000, 0, 1000], + ) + result = df[[1000]] + expected = df.iloc[:, [3]] + tm.assert_frame_equal(result, expected) + result = df[[-1000]] + expected = df.iloc[:, [1]] + tm.assert_frame_equal(result, expected) + + def test_getattr(self, float_frame): + tm.assert_series_equal(float_frame.A, float_frame["A"]) + msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'" + with pytest.raises(AttributeError, match=msg): + float_frame.NONEXISTENT_NAME + + def test_setattr_column(self): + df = DataFrame({"foobar": 1}, index=range(10)) + + df.foobar = 5 + assert (df.foobar == 5).all() + + def test_setitem(self, float_frame, using_copy_on_write): + # not sure what else to do here + series = float_frame["A"][::2] + float_frame["col5"] = series + assert "col5" in float_frame + + assert len(series) == 15 + assert len(float_frame) == 30 + + exp = np.ravel(np.column_stack((series.values, [np.nan] * 15))) + exp = Series(exp, index=float_frame.index, name="col5") + tm.assert_series_equal(float_frame["col5"], exp) + + series = float_frame["A"] + float_frame["col6"] = series + tm.assert_series_equal(series, float_frame["col6"], check_names=False) + + # set ndarray + arr = np.random.randn(len(float_frame)) + float_frame["col9"] = arr + assert (float_frame["col9"] == arr).all() + + float_frame["col7"] = 5 + assert (float_frame["col7"] == 5).all() + + float_frame["col0"] = 3.14 + assert (float_frame["col0"] == 3.14).all() + + float_frame["col8"] = "foo" + assert (float_frame["col8"] == "foo").all() + + # this is partially a view (e.g. some blocks are view) + # so raise/warn + smaller = float_frame[:2] + + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + if using_copy_on_write: + # With CoW, adding a new column doesn't raise a warning + smaller["col10"] = ["1", "2"] + else: + with pytest.raises(SettingWithCopyError, match=msg): + smaller["col10"] = ["1", "2"] + + assert smaller["col10"].dtype == np.object_ + assert (smaller["col10"] == ["1", "2"]).all() + + def test_setitem2(self): + # dtype changing GH4204 + df = DataFrame([[0, 0]]) + df.iloc[0] = np.nan + expected = DataFrame([[np.nan, np.nan]]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[0, 0]]) + df.loc[0] = np.nan + tm.assert_frame_equal(df, expected) + + def test_setitem_boolean(self, float_frame): + df = float_frame.copy() + values = float_frame.values + + df[df["A"] > 0] = 4 + values[values[:, 0] > 0] = 4 + tm.assert_almost_equal(df.values, values) + + # test that column reindexing works + series = df["A"] == 4 + series = series.reindex(df.index[::-1]) + df[series] = 1 + values[values[:, 0] == 4] = 1 + tm.assert_almost_equal(df.values, values) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + # indexed with same shape but rows-reversed df + df[df[::-1] == 2] = 3 + values[values == 2] = 3 + tm.assert_almost_equal(df.values, values) + + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with pytest.raises(TypeError, match=msg): + df[df * 0] = 2 + + # index with DataFrame + mask = df > np.abs(df) + expected = df.copy() + df[df > np.abs(df)] = np.nan + expected.values[mask.values] = np.nan + tm.assert_frame_equal(df, expected) + + # set from DataFrame + expected = df.copy() + df[df > np.abs(df)] = df * 2 + np.putmask(expected.values, mask.values, df.values * 2) + tm.assert_frame_equal(df, expected) + + def test_setitem_cast(self, float_frame): + float_frame["D"] = float_frame["D"].astype("i8") + assert float_frame["D"].dtype == np.int64 + + # #669, should not cast? + # this is now set to int64, which means a replacement of the column to + # the value dtype (and nothing to do with the existing dtype) + float_frame["B"] = 0 + assert float_frame["B"].dtype == np.int64 + + # cast if pass array of course + float_frame["B"] = np.arange(len(float_frame)) + assert issubclass(float_frame["B"].dtype.type, np.integer) + + float_frame["foo"] = "bar" + float_frame["foo"] = 0 + assert float_frame["foo"].dtype == np.int64 + + float_frame["foo"] = "bar" + float_frame["foo"] = 2.5 + assert float_frame["foo"].dtype == np.float64 + + float_frame["something"] = 0 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2.5 + assert float_frame["something"].dtype == np.float64 + + def test_setitem_corner(self, float_frame): + # corner case + df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3)) + del df["B"] + df["B"] = [1.0, 2.0, 3.0] + assert "B" in df + assert len(df.columns) == 2 + + df["A"] = "beginning" + df["E"] = "foo" + df["D"] = "bar" + df[datetime.now()] = "date" + df[datetime.now()] = 5.0 + + # what to do when empty frame with index + dm = DataFrame(index=float_frame.index) + dm["A"] = "foo" + dm["B"] = "bar" + assert len(dm.columns) == 2 + assert dm.values.dtype == np.object_ + + # upcast + dm["C"] = 1 + assert dm["C"].dtype == np.int64 + + dm["E"] = 1.0 + assert dm["E"].dtype == np.float64 + + # set existing column + dm["A"] = "bar" + assert "bar" == dm["A"][0] + + dm = DataFrame(index=np.arange(3)) + dm["A"] = 1 + dm["foo"] = "bar" + del dm["foo"] + dm["foo"] = "bar" + assert dm["foo"].dtype == np.object_ + + dm["coercible"] = ["1", "2", "3"] + assert dm["coercible"].dtype == np.object_ + + def test_setitem_corner2(self): + data = { + "title": ["foobar", "bar", "foobar"] + ["foobar"] * 17, + "cruft": np.random.random(20), + } + + df = DataFrame(data) + ix = df[df["title"] == "bar"].index + + df.loc[ix, ["title"]] = "foobar" + df.loc[ix, ["cruft"]] = 0 + + assert df.loc[1, "title"] == "foobar" + assert df.loc[1, "cruft"] == 0 + + def test_setitem_ambig(self): + # Difficulties with mixed-type data + from decimal import Decimal + + # Created as float type + dm = DataFrame(index=range(3), columns=range(3)) + + coercable_series = Series([Decimal(1) for _ in range(3)], index=range(3)) + uncoercable_series = Series(["foo", "bzr", "baz"], index=range(3)) + + dm[0] = np.ones(3) + assert len(dm.columns) == 3 + + dm[1] = coercable_series + assert len(dm.columns) == 3 + + dm[2] = uncoercable_series + assert len(dm.columns) == 3 + assert dm[2].dtype == np.object_ + + def test_setitem_None(self, float_frame): + # GH #766 + float_frame[None] = float_frame["A"] + tm.assert_series_equal( + float_frame.iloc[:, -1], float_frame["A"], check_names=False + ) + tm.assert_series_equal( + float_frame.loc[:, None], float_frame["A"], check_names=False + ) + tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False) + repr(float_frame) + + def test_loc_setitem_boolean_mask_allfalse(self): + # GH 9596 + df = DataFrame( + {"a": ["1", "2", "3"], "b": ["11", "22", "33"], "c": ["111", "222", "333"]} + ) + + result = df.copy() + result.loc[result.b.isna(), "a"] = result.a + tm.assert_frame_equal(result, df) + + def test_getitem_fancy_slice_integers_step(self): + df = DataFrame(np.random.randn(10, 5)) + + # this is OK + result = df.iloc[:8:2] # noqa + df.iloc[:8:2] = np.nan + assert isna(df.iloc[:8:2]).values.all() + + def test_getitem_setitem_integer_slice_keyerrors(self): + df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + + # this is OK + cp = df.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).values.all() + + # so is this + cp = df.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = df.iloc[2:6] + result2 = df.loc[3:11] + expected = df.reindex([4, 6, 8, 10]) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # non-monotonic, raise KeyError + df2 = df.iloc[list(range(5)) + list(range(5, 10))[::-1]] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] = 0 + + @td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view + def test_fancy_getitem_slice_mixed( + self, float_frame, float_string_frame, using_copy_on_write + ): + + sliced = float_string_frame.iloc[:, -3:] + assert sliced["D"].dtype == np.float64 + + # get view with single block + # setting it triggers setting with copy + original = float_frame.copy() + sliced = float_frame.iloc[:, -3:] + + assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) + + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + if not using_copy_on_write: + with pytest.raises(SettingWithCopyError, match=msg): + sliced.loc[:, "C"] = 4.0 + + assert (float_frame["C"] == 4).all() + else: + sliced.loc[:, "C"] = 4.0 + tm.assert_frame_equal(float_frame, original) + + def test_getitem_setitem_non_ix_labels(self): + df = tm.makeTimeDataFrame() + + start, end = df.index[[5, 10]] + + result = df.loc[start:end] + result2 = df[start:end] + expected = df[5:11] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + result = df.copy() + result.loc[start:end] = 0 + result2 = df.copy() + result2[start:end] = 0 + expected = df.copy() + expected[5:11] = 0 + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_ix_multi_take(self): + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index == 0, :] + xp = df.reindex([0]) + tm.assert_frame_equal(rs, xp) + + # GH#1321 + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index == 0, df.columns == 1] + xp = df.reindex(index=[0], columns=[1]) + tm.assert_frame_equal(rs, xp) + + def test_getitem_fancy_scalar(self, float_frame): + f = float_frame + ix = f.loc + + # individual value + for col in f.columns: + ts = f[col] + for idx in f.index[::5]: + assert ix[idx, col] == ts[idx] + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_setitem_fancy_scalar(self, float_frame): + f = float_frame + expected = float_frame.copy() + ix = f.loc + + # individual value + for j, col in enumerate(f.columns): + ts = f[col] # noqa + for idx in f.index[::5]: + i = f.index.get_loc(idx) + val = np.random.randn() + expected.values[i, j] = val + + ix[idx, col] = val + tm.assert_frame_equal(f, expected) + + def test_getitem_fancy_boolean(self, float_frame): + f = float_frame + ix = f.loc + + expected = f.reindex(columns=["B", "D"]) + result = ix[:, [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + expected = f.reindex(index=f.index[5:10], columns=["B", "D"]) + result = ix[f.index[5:10], [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + boolvec = f.index > f.index[7] + expected = f.reindex(index=f.index[boolvec]) + result = ix[boolvec] + tm.assert_frame_equal(result, expected) + result = ix[boolvec, :] + tm.assert_frame_equal(result, expected) + + result = ix[boolvec, f.columns[2:]] + expected = f.reindex(index=f.index[boolvec], columns=["C", "D"]) + tm.assert_frame_equal(result, expected) + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_setitem_fancy_boolean(self, float_frame): + # from 2d, set with booleans + frame = float_frame.copy() + expected = float_frame.copy() + + mask = frame["A"] > 0 + frame.loc[mask] = 0.0 + expected.values[mask.values] = 0.0 + tm.assert_frame_equal(frame, expected) + + frame = float_frame.copy() + expected = float_frame.copy() + frame.loc[mask, ["A", "B"]] = 0.0 + expected.values[mask.values, :2] = 0.0 + tm.assert_frame_equal(frame, expected) + + def test_getitem_fancy_ints(self, float_frame): + result = float_frame.iloc[[1, 4, 7]] + expected = float_frame.loc[float_frame.index[[1, 4, 7]]] + tm.assert_frame_equal(result, expected) + + result = float_frame.iloc[:, [2, 0, 1]] + expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]] + tm.assert_frame_equal(result, expected) + + def test_getitem_setitem_boolean_misaligned(self, float_frame): + # boolean index misaligned labels + mask = float_frame["A"][::-1] > 1 + + result = float_frame.loc[mask] + expected = float_frame.loc[mask[::-1]] + tm.assert_frame_equal(result, expected) + + cp = float_frame.copy() + expected = float_frame.copy() + cp.loc[mask] = 0 + expected.loc[mask] = 0 + tm.assert_frame_equal(cp, expected) + + def test_getitem_setitem_boolean_multi(self): + df = DataFrame(np.random.randn(3, 2)) + + # get + k1 = np.array([True, False, True]) + k2 = np.array([False, True]) + result = df.loc[k1, k2] + expected = df.loc[[0, 2], [1]] + tm.assert_frame_equal(result, expected) + + expected = df.copy() + df.loc[np.array([True, False, True]), np.array([False, True])] = 5 + expected.loc[[0, 2], [1]] = 5 + tm.assert_frame_equal(df, expected) + + def test_getitem_setitem_float_labels(self, using_array_manager): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.random.randn(5, 5), index=index) + + result = df.loc[1.5:4] + expected = df.reindex([1.5, 2, 3, 4]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4:5] + expected = df.reindex([4, 5]) # reindex with int + tm.assert_frame_equal(result, expected, check_index_type=False) + assert len(result) == 2 + + result = df.loc[4:5] + expected = df.reindex([4.0, 5.0]) # reindex with float + tm.assert_frame_equal(result, expected) + assert len(result) == 2 + + # loc_float changes this to work properly + result = df.loc[1:2] + expected = df.iloc[0:2] + tm.assert_frame_equal(result, expected) + + df.loc[1:2] = 0 + result = df[1:2] + assert (result == 0).all().all() + + # #2727 + index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) + df = DataFrame(np.random.randn(5, 5), index=index) + + # positional slicing only via iloc! + msg = ( + "cannot do positional indexing on Float64Index with " + r"these indexers \[1.0\] of type float" + ) + with pytest.raises(TypeError, match=msg): + df.iloc[1.0:5] + + result = df.iloc[4:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + cp = df.copy() + + with pytest.raises(TypeError, match=_slice_msg): + cp.iloc[1.0:5] = 0 + + with pytest.raises(TypeError, match=msg): + result = cp.iloc[1.0:5] == 0 + + assert result.values.all() + assert (cp.iloc[0:1] == df.iloc[0:1]).values.all() + + cp = df.copy() + cp.iloc[4:5] = 0 + assert (cp.iloc[4:5] == 0).values.all() + assert (cp.iloc[0:4] == df.iloc[0:4]).values.all() + + # float slicing + result = df.loc[1.0:5] + expected = df + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + result = df.loc[1.1:5] + expected = df.reindex([2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4.51:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + result = df.loc[1.0:5.0] + expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + cp = df.copy() + warn = DeprecationWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + cp.loc[1.0:5.0] = 0 + result = cp.loc[1.0:5.0] + assert (result == 0).values.all() + + def test_setitem_single_column_mixed_datetime(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + + df["timestamp"] = Timestamp("20010102") + + # check our dtypes + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("datetime64[ns]")], + index=["foo", "bar", "baz", "timestamp"], + ) + tm.assert_series_equal(result, expected) + + # GH#16674 iNaT is treated as an integer when given by the user + df.loc["b", "timestamp"] = iNaT + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT + + # allow this syntax (as of GH#3216) + df.loc["c", "timestamp"] = np.nan + assert isna(df.loc["c", "timestamp"]) + + # allow this syntax + df.loc["d", :] = np.nan + assert not isna(df.loc["c", :]).all() + + def test_setitem_mixed_datetime(self): + # GH 9336 + expected = DataFrame( + { + "a": [0, 0, 0, 0, 13, 14], + "b": [ + datetime(2012, 1, 1), + 1, + "x", + "y", + datetime(2013, 1, 1), + datetime(2014, 1, 1), + ], + } + ) + df = DataFrame(0, columns=list("ab"), index=range(6)) + df["b"] = pd.NaT + df.loc[0, "b"] = datetime(2012, 1, 1) + df.loc[1, "b"] = 1 + df.loc[[2, 3], "b"] = "x", "y" + A = np.array( + [ + [13, np.datetime64("2013-01-01T00:00:00")], + [14, np.datetime64("2014-01-01T00:00:00")], + ] + ) + df.loc[[4, 5], ["a", "b"]] = A + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_float(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + float_frame.loc[float_frame.index[-2] :, ["A", "B"]] = piece.values + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_setitem_frame_mixed(self, float_string_frame): + # GH 3216 + + # already aligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"] + ) + key = (f.index[slice(None, 2)], ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) + + def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame): + # GH#3216 rows unaligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], + index=list(f.index[0:2]) + ["foo", "bar"], + columns=["A", "B"], + ) + key = (f.index[slice(None, 2)], ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal( + f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] + ) + + def test_setitem_frame_mixed_key_unaligned(self, float_string_frame): + # GH#3216 key is unaligned with values + f = float_string_frame.copy() + piece = f.loc[f.index[:2], ["A"]] + piece.index = f.index[-2:] + key = (f.index[slice(-2, None)], ["A", "B"]) + f.loc[key] = piece + piece["B"] = np.nan + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + def test_setitem_frame_mixed_ndarray(self, float_string_frame): + # GH#3216 ndarray + f = float_string_frame.copy() + piece = float_string_frame.loc[f.index[:2], ["A", "B"]] + key = (f.index[slice(-2, None)], ["A", "B"]) + f.loc[key] = piece.values + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + def test_setitem_frame_upcast(self): + # needs upcasting + df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"]) + df2 = df.copy() + df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5 + expected = df.reindex(columns=["A", "B"]) + expected += 0.5 + expected["C"] = df["C"] + tm.assert_frame_equal(df2, expected) + + def test_setitem_frame_align(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + piece.index = float_frame.index[-2:] + piece.columns = ["A", "B"] + float_frame.loc[float_frame.index[-2:], ["A", "B"]] = piece + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_getitem_setitem_ix_duplicates(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc["foo"] + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.loc["bar"] + expected = df.iloc[[2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.loc["baz"] + expected = df.iloc[3] + tm.assert_series_equal(result, expected) + + def test_getitem_ix_boolean_duplicates_multiple(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc[["bar"]] + exp = df.iloc[[2, 4]] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[1] > 0] + exp = df[df[1] > 0] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[0] > 0] + exp = df[df[0] > 0] + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize("bool_value", [True, False]) + def test_getitem_setitem_ix_bool_keyerror(self, bool_value): + # #2199 + df = DataFrame({"a": [1, 2, 3]}) + message = f"{bool_value}: boolean label can not be used without a boolean index" + with pytest.raises(KeyError, match=message): + df.loc[bool_value] + + msg = "cannot use a single bool to index into setitem" + with pytest.raises(KeyError, match=msg): + df.loc[bool_value] = 0 + + # TODO: rename? remove? + def test_single_element_ix_dont_upcast(self, float_frame): + float_frame["E"] = 1 + assert issubclass(float_frame["E"].dtype.type, (int, np.integer)) + + result = float_frame.loc[float_frame.index[5], "E"] + assert is_integer(result) + + # GH 11617 + df = DataFrame({"a": [1.23]}) + df["b"] = 666 + + result = df.loc[0, "b"] + assert is_integer(result) + + expected = Series([666], [0], name="b") + result = df.loc[[0], "b"] + tm.assert_series_equal(result, expected) + + def test_iloc_row(self): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + + result = df.iloc[1] + exp = df.loc[2] + tm.assert_series_equal(result, exp) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[slice(4, 8)] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[1, 2, 4, 6]] + expected = df.reindex(df.index[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_row_slice_view(self, using_array_manager, using_copy_on_write): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + original = df.copy() + + # verify slice is view + # setting it makes it raise/warn + subset = df.iloc[slice(4, 8)] + + assert np.shares_memory(df[2], subset[2]) + + exp_col = original[2].copy() + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + if using_copy_on_write: + subset.loc[:, 2] = 0.0 + else: + with pytest.raises(SettingWithCopyError, match=msg): + subset.loc[:, 2] = 0.0 + + # TODO(ArrayManager) verify it is expected that the original didn't change + if not using_array_manager: + exp_col._values[4:8] = 0.0 + tm.assert_series_equal(df[2], exp_col) + + def test_iloc_col(self): + + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + + result = df.iloc[:, 1] + exp = df.loc[:, 2] + tm.assert_series_equal(result, exp) + + result = df.iloc[:, 2] + exp = df.loc[:, 4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[:, slice(4, 8)] + expected = df.loc[:, 8:14] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[:, [1, 2, 4, 6]] + expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + original = df.copy() + subset = df.iloc[:, slice(4, 8)] + + if not using_array_manager and not using_copy_on_write: + # verify slice is view + assert np.shares_memory(df[8]._values, subset[8]._values) + + # and that we are setting a copy + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + subset.loc[:, 8] = 0.0 + + assert (df[8] == 0).all() + else: + if using_copy_on_write: + # verify slice is view + assert np.shares_memory(df[8]._values, subset[8]._values) + subset[8] = 0.0 + # subset changed + assert (subset[8] == 0).all() + # but df itself did not change (setitem replaces full column) + tm.assert_frame_equal(df, original) + + def test_loc_duplicates(self): + # gh-17105 + + # insert a duplicate element to the index + trange = date_range( + start=Timestamp(year=2017, month=1, day=1), + end=Timestamp(year=2017, month=1, day=5), + ) + + trange = trange.insert(loc=5, item=Timestamp(year=2017, month=1, day=5)) + + df = DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # assignment + df.loc[trange[bool_idx], "A"] = 6 + + expected = DataFrame( + {"A": [0, 0, 0, 0, 6, 6], "B": [0, 0, 0, 0, 0, 0]}, index=trange + ) + tm.assert_frame_equal(df, expected) + + # in-place + df = DataFrame(0, index=trange, columns=["A", "B"]) + df.loc[trange[bool_idx], "A"] += 6 + tm.assert_frame_equal(df, expected) + + def test_setitem_with_unaligned_tz_aware_datetime_column(self): + # GH 12981 + # Assignment of unaligned offset-aware datetime series. + # Make sure timezone isn't lost + column = Series(date_range("2015-01-01", periods=3, tz="utc"), name="dates") + df = DataFrame({"dates": column}) + df["dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + df = DataFrame({"dates": column}) + df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + def test_loc_setitem_datetimelike_with_inference(self): + # GH 7592 + # assignment of timedeltas with NaT + + one_hour = timedelta(hours=1) + df = DataFrame(index=date_range("20130101", periods=4)) + df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]") + df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]") + df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") + df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]") + df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]") + df["F"] = np.timedelta64("NaT") + df.loc[df.index[:-1], "F"] = np.array([6 * one_hour] * 3, dtype="m8[ns]") + df.loc[df.index[-3] :, "G"] = date_range("20130101", periods=3) + df["H"] = np.datetime64("NaT") + result = df.dtypes + expected = Series( + [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, + index=list("ABCDEFGH"), + ) + tm.assert_series_equal(result, expected) + + def test_getitem_boolean_indexing_mixed(self): + df = DataFrame( + { + 0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 1: { + 35: np.nan, + 40: 0.32632316859446198, + 43: np.nan, + 49: 0.32632316859446198, + 50: 0.39114724480578139, + }, + 2: { + 35: np.nan, + 40: np.nan, + 43: 0.29012581014105987, + 49: np.nan, + 50: np.nan, + }, + 3: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 4: { + 35: 0.34215328467153283, + 40: np.nan, + 43: np.nan, + 49: np.nan, + 50: np.nan, + }, + "y": {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}, + } + ) + + # mixed int/float ok + df2 = df.copy() + df2[df2 > 0.3] = 1 + expected = df.copy() + expected.loc[40, 1] = 1 + expected.loc[49, 1] = 1 + expected.loc[50, 1] = 1 + expected.loc[35, 4] = 1 + tm.assert_frame_equal(df2, expected) + + df["foo"] = "test" + msg = "not supported between instances|unorderable types" + + with pytest.raises(TypeError, match=msg): + df[df > 0.3] = 1 + + def test_type_error_multiindex(self): + # See gh-12218 + mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"]) + dg = DataFrame( + [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i") + ) + with pytest.raises(InvalidIndexError, match="slice"): + dg[:, 0] + + index = Index(range(2), name="i") + columns = MultiIndex( + levels=[["x", "y"], [0, 1]], codes=[[0, 1], [0, 0]], names=[None, "c"] + ) + expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index) + + result = dg.loc[:, (slice(None), 0)] + tm.assert_frame_equal(result, expected) + + name = ("x", 0) + index = Index(range(2), name="i") + expected = Series([1, 3], index=index, name=name) + + result = dg["x", 0] + tm.assert_series_equal(result, expected) + + def test_getitem_interval_index_partial_indexing(self): + # GH#36490 + df = DataFrame( + np.ones((3, 4)), columns=pd.IntervalIndex.from_breaks(np.arange(5)) + ) + + expected = df.iloc[:, 0] + + res = df[0.5] + tm.assert_series_equal(res, expected) + + res = df.loc[:, 0.5] + tm.assert_series_equal(res, expected) + + def test_setitem_array_as_cell_value(self): + # GH#43422 + df = DataFrame(columns=["a", "b"], dtype=object) + df.loc[0] = {"a": np.zeros((2,)), "b": np.zeros((2, 2))} + expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]}) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_nullable_2d_values(self): + df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") + orig = df.copy() + + df.loc[:] = df.values[:, ::-1] + tm.assert_frame_equal(df, orig) + + df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1]) + tm.assert_frame_equal(df, orig) + + df.iloc[:] = df.iloc[:, :] + tm.assert_frame_equal(df, orig) + + def test_getitem_segfault_with_empty_like_object(self): + # GH#46848 + df = DataFrame(np.empty((1, 1), dtype=object)) + df[0] = np.empty_like(df[0]) + # this produces the segfault + df[[0]] + + @pytest.mark.parametrize( + "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")] + ) + def test_setting_mismatched_na_into_nullable_fails( + self, null, any_numeric_ea_dtype + ): + # GH#44514 don't cast mismatched nulls to pd.NA + df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype) + ser = df["A"] + arr = ser._values + + msg = "|".join( + [ + r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype", + r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype", + "'values' contains non-numeric NA", + r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}", + ] + ) + with pytest.raises(TypeError, match=msg): + arr[0] = null + + with pytest.raises(TypeError, match=msg): + arr[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser[0] = null + + with pytest.raises(TypeError, match=msg): + ser[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser.iloc[0] = null + + with pytest.raises(TypeError, match=msg): + ser.iloc[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + df.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df.iloc[:2, 0] = [null, null] + + # Multi-Block + df2 = df.copy() + df2["B"] = ser.copy() + with pytest.raises(TypeError, match=msg): + df2.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df2.iloc[:2, 0] = [null, null] + + def test_loc_expand_empty_frame_keep_index_name(self): + # GH#45621 + df = DataFrame(columns=["b"], index=Index([], name="a")) + df.loc[0] = 1 + expected = DataFrame({"b": [1]}, index=Index([0], name="a")) + tm.assert_frame_equal(df, expected) + + def test_loc_expand_empty_frame_keep_midx_names(self): + # GH#46317 + df = DataFrame( + columns=["d"], index=MultiIndex.from_tuples([], names=["a", "b", "c"]) + ) + df.loc[(1, 2, 3)] = "foo" + expected = DataFrame( + {"d": ["foo"]}, + index=MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"]), + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("val", ["x", 1]) + @pytest.mark.parametrize("idxr", ["a", ["a"]]) + def test_loc_setitem_rhs_frame(self, idxr, val): + # GH#47578 + df = DataFrame({"a": [1, 2]}) + with tm.assert_produces_warning(None): + df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2]) + expected = DataFrame({"a": [np.nan, val]}) + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_iloc_setitem_enlarge_no_warning(self): + # GH#47381 + df = DataFrame(columns=["a", "b"]) + expected = df.copy() + view = df[:] + with tm.assert_produces_warning(None): + df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) + tm.assert_frame_equal(view, expected) + + def test_loc_internals_not_updated_correctly(self): + # GH#47867 all steps are necessary to reproduce the initial bug + df = DataFrame( + {"bool_col": True, "a": 1, "b": 2.5}, + index=MultiIndex.from_arrays([[1, 2], [1, 2]], names=["idx1", "idx2"]), + ) + idx = [(1, 1)] + + df["c"] = 3 + df.loc[idx, "c"] = 0 + + df.loc[idx, "c"] + df.loc[idx, ["a", "b"]] + + df.loc[idx, "c"] = 15 + result = df.loc[idx, "c"] + expected = df = Series( + 15, + index=MultiIndex.from_arrays([[1], [1]], names=["idx1", "idx2"]), + name="c", + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("val", [None, [None], pd.NA, [pd.NA]]) + def test_iloc_setitem_string_list_na(self, val): + # GH#45469 + df = DataFrame({"a": ["a", "b", "c"]}, dtype="string") + df.iloc[[0], :] = val + expected = DataFrame({"a": [pd.NA, "b", "c"]}, dtype="string") + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("val", [None, pd.NA]) + def test_iloc_setitem_string_na(self, val): + # GH#45469 + df = DataFrame({"a": ["a", "b", "c"]}, dtype="string") + df.iloc[0, :] = val + expected = DataFrame({"a": [pd.NA, "b", "c"]}, dtype="string") + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("func", [list, Series, np.array]) + def test_iloc_setitem_ea_null_slice_length_one_list(self, func): + # GH#48016 + df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + df.iloc[:, func([0])] = 5 + expected = DataFrame({"a": [5, 5, 5]}, dtype="Int64") + tm.assert_frame_equal(df, expected) + + def test_loc_named_tuple_for_midx(self): + # GH#48124 + df = DataFrame( + index=MultiIndex.from_product( + [["A", "B"], ["a", "b", "c"]], names=["first", "second"] + ) + ) + indexer_tuple = namedtuple("Indexer", df.index.names) + idxr = indexer_tuple(first="A", second=["a", "b"]) + result = df.loc[idxr, :] + expected = DataFrame( + index=MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["first", "second"] + ) + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("col", [{}, {"name": "a"}]) + def test_loc_setitem_reordering_with_all_true_indexer(self, col): + # GH#48701 + n = 17 + df = DataFrame({**col, "x": range(n), "y": range(n)}) + expected = df.copy() + df.loc[n * [True], ["x", "y"]] = df[["x", "y"]] + tm.assert_frame_equal(df, expected) + + def test_loc_rhs_empty_warning(self): + # GH48480 + df = DataFrame(columns=["a", "b"]) + expected = df.copy() + rhs = DataFrame(columns=["a"]) + with tm.assert_produces_warning(None): + df.loc[:, "a"] = rhs + tm.assert_frame_equal(df, expected) + + +class TestDataFrameIndexingUInt64: + def test_setitem(self, uint64_frame): + + df = uint64_frame + idx = df["A"].rename("foo") + + # setitem + assert "C" not in df.columns + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + assert "D" not in df.columns + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # With NaN: because uint64 has no NaN element, + # the column should be cast to object. + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal( + df2.dtypes, + Series( + [np.dtype("uint64"), np.dtype("O"), np.dtype("O")], + index=["A", "B", "C"], + ), + ) + + +def test_object_casting_indexing_wraps_datetimelike(using_array_manager): + # GH#31649, check the indexing methods all the way down the stack + df = DataFrame( + { + "A": [1, 2], + "B": date_range("2000", periods=2), + "C": pd.timedelta_range("1 Day", periods=2), + } + ) + + ser = df.loc[0] + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.iloc[0] + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.xs(0, axis=0) + assert isinstance(ser.values[1], Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + if using_array_manager: + # remainder of the test checking BlockManager internals + return + + mgr = df._mgr + mgr._rebuild_blknos_and_blklocs() + arr = mgr.fast_xs(0).array + assert isinstance(arr[1], Timestamp) + assert isinstance(arr[2], pd.Timedelta) + + blk = mgr.blocks[mgr.blknos[1]] + assert blk.dtype == "M8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, Timestamp) + + blk = mgr.blocks[mgr.blknos[2]] + assert blk.dtype == "m8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, pd.Timedelta) + + +msg1 = r"Cannot setitem on a Categorical with a new category( \(.*\))?, set the" +msg2 = "Cannot set a Categorical with another, without identical categories" + + +class TestLocILocDataFrameCategorical: + @pytest.fixture + def orig(self): + cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 1, 1, 1, 1, 1, 1] + orig = DataFrame({"cats": cats, "values": values}, index=idx) + return orig + + @pytest.fixture + def exp_single_row(self): + # The expected values if we change a single row + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) + return exp_single_row + + @pytest.fixture + def exp_multi_row(self): + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) + return exp_multi_row + + @pytest.fixture + def exp_parts_cats_col(self): + # changed part of the cats column + cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx3 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values3 = [1, 1, 1, 1, 1, 1, 1] + exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3) + return exp_parts_cats_col + + @pytest.fixture + def exp_single_cats_value(self): + # changed single value in cats col + cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx4 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values4 = [1, 1, 1, 1, 1, 1, 1] + exp_single_cats_value = DataFrame( + {"cats": cats4, "values": values4}, index=idx4 + ) + return exp_single_cats_value + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer): + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + + key = slice(2, 4) + if indexer is tm.loc: + key = slice("j", "k") + + indexer(df)[key, :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + df = orig.copy() + with pytest.raises(TypeError, match=msg1): + indexer(df)[key, :] = [["c", 2], ["c", 2]] + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat]) + def test_loc_iloc_at_iat_setitem_single_value_in_categories( + self, orig, exp_single_cats_value, indexer + ): + # - assign a single value -> exp_single_cats_value + df = orig.copy() + + key = (2, 0) + if indexer in [tm.loc, tm.at]: + key = (df.index[2], df.columns[0]) + + # "b" is among the categories for df["cat"}] + indexer(df)[key] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # "c" is not among the categories for df["cat"] + with pytest.raises(TypeError, match=msg1): + indexer(df)[key] = "c" + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_mask_single_value_in_categories( + self, orig, exp_single_cats_value, indexer + ): + # mask with single True + df = orig.copy() + + mask = df.index == "j" + key = 0 + if indexer is tm.loc: + key = df.columns[key] + + indexer(df)[mask, key] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_full_row_non_categorical_rhs( + self, orig, exp_single_row, indexer + ): + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + + key = 2 + if indexer is tm.loc: + key = df.index[2] + + # not categorical dtype, but "b" _is_ among the categories for df["cat"] + indexer(df)[key, :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # "c" is not among the categories for df["cat"] + with pytest.raises(TypeError, match=msg1): + indexer(df)[key, :] = ["c", 2] + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_partial_col_categorical_rhs( + self, orig, exp_parts_cats_col, indexer + ): + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + + key = (slice(2, 4), 0) + if indexer is tm.loc: + key = (slice("j", "k"), df.columns[0]) + + # same categories as we currently have in df["cats"] + compat = Categorical(["b", "b"], categories=["a", "b"]) + indexer(df)[key] = compat + tm.assert_frame_equal(df, exp_parts_cats_col) + + # categories do not match df["cat"]'s, but "b" is among them + semi_compat = Categorical(list("bb"), categories=list("abc")) + with pytest.raises(TypeError, match=msg2): + # different categories but holdable values + # -> not sure if this should fail or pass + indexer(df)[key] = semi_compat + + # categories do not match df["cat"]'s, and "c" is not among them + incompat = Categorical(list("cc"), categories=list("abc")) + with pytest.raises(TypeError, match=msg2): + # different values + indexer(df)[key] = incompat + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_loc_iloc_setitem_non_categorical_rhs( + self, orig, exp_parts_cats_col, indexer + ): + # assign a part of a column with dtype != categorical -> exp_parts_cats_col + df = orig.copy() + + key = (slice(2, 4), 0) + if indexer is tm.loc: + key = (slice("j", "k"), df.columns[0]) + + # "b" is among the categories for df["cat"] + indexer(df)[key] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + # "c" not part of the categories + with pytest.raises(TypeError, match=msg1): + indexer(df)[key] = ["c", "c"] + + @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc, tm.iloc]) + def test_getitem_preserve_object_index_with_dates(self, indexer): + # https://github.com/pandas-dev/pandas/pull/42950 - when selecting a column + # from dataframe, don't try to infer object dtype index on Series construction + idx = date_range("2012", periods=3).astype(object) + df = DataFrame({0: [1, 2, 3]}, index=idx) + assert df.index.dtype == object + + if indexer is tm.getitem: + ser = indexer(df)[0] + else: + ser = indexer(df)[:, 0] + + assert ser.index.dtype == object + + def test_loc_on_multiindex_one_level(self): + # GH#45779 + df = DataFrame( + data=[[0], [1]], + index=MultiIndex.from_tuples([("a",), ("b",)], names=["first"]), + ) + expected = DataFrame( + data=[[0]], index=MultiIndex.from_tuples([("a",)], names=["first"]) + ) + result = df.loc["a"] + tm.assert_frame_equal(result, expected) + + +class TestDepreactedIndexers: + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py new file mode 100644 index 00000000..f67ecf60 --- /dev/null +++ b/pandas/tests/frame/indexing/test_insert.py @@ -0,0 +1,106 @@ +""" +test_insert is specifically for the DataFrame.insert method; not to be +confused with tests with "insert" in their names that are really testing +__setitem__. +""" +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + + +class TestDataFrameInsert: + def test_insert(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + + df.insert(0, "foo", df["a"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"])) + tm.assert_series_equal(df["a"], df["foo"], check_names=False) + + df.insert(2, "bar", df["c"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"])) + tm.assert_almost_equal(df["c"], df["bar"], check_names=False) + + with pytest.raises(ValueError, match="already exists"): + df.insert(1, "a", df["b"]) + + msg = "cannot insert c, already exists" + with pytest.raises(ValueError, match=msg): + df.insert(1, "c", df["b"]) + + df.columns.name = "some_name" + # preserve columns name field + df.insert(0, "baz", df["c"]) + assert df.columns.name == "some_name" + + def test_insert_column_bug_4032(self): + + # GH#4032, inserting a column and renaming causing errors + df = DataFrame({"b": [1.1, 2.2]}) + + df = df.rename(columns={}) + df.insert(0, "a", [1, 2]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + df.insert(0, "c", [1.3, 2.3]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_insert_with_columns_dups(self): + # GH#14291 + df = DataFrame() + df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True) + df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) + df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) + exp = DataFrame( + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] + ) + tm.assert_frame_equal(df, exp) + + def test_insert_item_cache(self, using_array_manager): + df = DataFrame(np.random.randn(4, 3)) + ser = df[0] + + if using_array_manager: + expected_warning = None + else: + # with BlockManager warn about high fragmentation of single dtype + expected_warning = PerformanceWarning + + with tm.assert_produces_warning(expected_warning): + for n in range(100): + df[n + 3] = df[1] * n + + ser.values[0] = 99 + + assert df.iloc[0, 0] == df[0][0] + + def test_insert_EA_no_warning(self): + # PerformanceWarning about fragmented frame should not be raised when + # using EAs (https://github.com/pandas-dev/pandas/issues/44098) + df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64") + with tm.assert_produces_warning(None): + df["a"] = np.array([1, 2, 3]) + + def test_insert_frame(self): + # GH#42403 + df = DataFrame({"col1": [1, 2], "col2": [3, 4]}) + + msg = r"Expected a 1D array, got an array with shape \(2, 2\)" + with pytest.raises(ValueError, match=msg): + df.insert(1, "newcol", df) diff --git a/pandas/tests/frame/indexing/test_lookup.py b/pandas/tests/frame/indexing/test_lookup.py new file mode 100644 index 00000000..caab5fee --- /dev/null +++ b/pandas/tests/frame/indexing/test_lookup.py @@ -0,0 +1,94 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestLookup: + def test_lookup_float(self, float_frame): + df = float_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) + + expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)]) + tm.assert_numpy_array_equal(result, expected) + + def test_lookup_mixed(self, float_string_frame): + df = float_string_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) + + expected = np.array( + [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_ + ) + tm.assert_almost_equal(result, expected) + + def test_lookup_bool(self): + df = DataFrame( + { + "label": ["a", "b", "a", "c"], + "mask_a": [True, True, False, True], + "mask_b": [True, False, False, False], + "mask_c": [False, True, False, True], + } + ) + with tm.assert_produces_warning(FutureWarning): + df["mask"] = df.lookup(df.index, "mask_" + df["label"]) + + exp_mask = np.array( + [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])] + ) + + tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask")) + assert df["mask"].dtype == np.bool_ + + def test_lookup_raises(self, float_frame): + with pytest.raises(KeyError, match="'One or more row labels was not found'"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["xyz"], ["A"]) + + with pytest.raises(KeyError, match="'One or more column labels was not found'"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup([float_frame.index[0]], ["xyz"]) + + with pytest.raises(ValueError, match="same size"): + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["a", "b", "c"], ["a"]) + + def test_lookup_requires_unique_axes(self): + # GH#33041 raise with a helpful error message + df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) + + rows = [0, 1] + cols = ["A", "A"] + + # homogeneous-dtype case + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.T.lookup(cols, rows) + + # heterogeneous dtype + df["B"] = 0 + with pytest.raises(ValueError, match="requires unique index and columns"): + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) + + +def test_lookup_deprecated(): + # GH#18262 + df = DataFrame( + {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]} + ) + with tm.assert_produces_warning(FutureWarning): + df.lookup(df.index, df["col"]) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py new file mode 100644 index 00000000..bd7ffe9a --- /dev/null +++ b/pandas/tests/frame/indexing/test_mask.py @@ -0,0 +1,162 @@ +""" +Tests for DataFrame.mask; tests DataFrame.where as a side-effect. +""" + +import numpy as np + +from pandas import ( + NA, + DataFrame, + Series, + StringDtype, + Timedelta, + isna, +) +import pandas._testing as tm + + +class TestDataFrameMask: + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + tm.assert_frame_equal(rs, df.mask(df <= 0)) + tm.assert_frame_equal(rs, df.mask(~cond)) + + other = DataFrame(np.random.randn(5, 3)) + rs = df.where(cond, other) + tm.assert_frame_equal(rs, df.mask(df <= 0, other)) + tm.assert_frame_equal(rs, df.mask(~cond, other)) + + def test_mask2(self): + # see GH#21891 + df = DataFrame([1, 2]) + res = df.mask([[True], [False]]) + + exp = DataFrame([np.nan, 2]) + tm.assert_frame_equal(res, exp) + + def test_mask_inplace(self): + # GH#8801 + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rdf = df.copy() + + return_value = rdf.where(cond, inplace=True) + assert return_value is None + tm.assert_frame_equal(rdf, df.where(cond)) + tm.assert_frame_equal(rdf, df.mask(~cond)) + + rdf = df.copy() + return_value = rdf.where(cond, -df, inplace=True) + assert return_value is None + tm.assert_frame_equal(rdf, df.where(cond, -df)) + tm.assert_frame_equal(rdf, df.mask(~cond, -df)) + + def test_mask_edge_case_1xN_frame(self): + # GH#4071 + df = DataFrame([[1, 2]]) + res = df.mask(DataFrame([[True, False]])) + expec = DataFrame([[np.nan, 2]]) + tm.assert_frame_equal(res, expec) + + def test_mask_callable(self): + # GH#12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.mask(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df > 4, df + 1)) + + # return ndarray and scalar + result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99)) + + # chain + result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10)) + + def test_mask_dtype_bool_conversion(self): + # GH#3733 + df = DataFrame(data=np.random.randn(100, 50)) + df = df.where(df > 0) # create nans + bools = df > 0 + mask = isna(df) + expected = bools.astype(object).mask(mask) + result = bools.mask(mask) + tm.assert_frame_equal(result, expected) + + def test_mask_pos_args_deprecation(self, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/41485 + obj = DataFrame({"a": range(5)}) + expected = DataFrame({"a": [-1, 1, -1, 3, -1]}) + obj = tm.get_obj(obj, frame_or_series) + expected = tm.get_obj(expected, frame_or_series) + + cond = obj % 2 == 0 + msg = ( + r"In a future version of pandas all arguments of " + f"{frame_or_series.__name__}.mask except for " + r"the arguments 'cond' and 'other' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.mask(cond, -1, False) + tm.assert_equal(result, expected) + + +def test_mask_try_cast_deprecated(frame_or_series): + + obj = DataFrame(np.random.randn(4, 3)) + if frame_or_series is not DataFrame: + obj = obj[0] + + mask = obj > 0 + + with tm.assert_produces_warning(FutureWarning): + # try_cast keyword deprecated + obj.mask(mask, -1, try_cast=True) + + +def test_mask_stringdtype(frame_or_series): + # GH 40824 + obj = DataFrame( + {"A": ["foo", "bar", "baz", NA]}, + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + filtered_obj = DataFrame( + {"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype() + ) + expected = DataFrame( + {"A": [NA, "this", "that", NA]}, + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + if frame_or_series is Series: + obj = obj["A"] + filtered_obj = filtered_obj["A"] + expected = expected["A"] + + filter_ser = Series([False, True, True, False]) + result = obj.mask(filter_ser, filtered_obj) + + tm.assert_equal(result, expected) + + +def test_mask_where_dtype_timedelta(): + # https://github.com/pandas-dev/pandas/issues/39548 + df = DataFrame([Timedelta(i, unit="d") for i in range(5)]) + + expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]")) + tm.assert_frame_equal(df.mask(df.notna()), expected) + + expected = DataFrame( + [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")] + ) + tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected) diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py new file mode 100644 index 00000000..7b68566b --- /dev/null +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -0,0 +1,68 @@ +import numpy as np + +from pandas.core.dtypes.common import is_float_dtype + +from pandas import ( + DataFrame, + isna, +) + + +class TestSetValue: + def test_set_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + float_frame._set_value(idx, col, 1) + assert float_frame[col][idx] == 1 + + def test_set_value_resize(self, float_frame): + + res = float_frame._set_value("foobar", "B", 0) + assert res is None + assert float_frame.index[-1] == "foobar" + assert float_frame._get_value("foobar", "B") == 0 + + float_frame.loc["foobar", "qux"] = 0 + assert float_frame._get_value("foobar", "qux") == 0 + + res = float_frame.copy() + res._set_value("foobar", "baz", "sam") + assert res["baz"].dtype == np.object_ + + res = float_frame.copy() + res._set_value("foobar", "baz", True) + assert res["baz"].dtype == np.object_ + + res = float_frame.copy() + res._set_value("foobar", "baz", 5) + assert is_float_dtype(res["baz"]) + assert isna(res["baz"].drop(["foobar"])).all() + + res._set_value("foobar", "baz", "sam") + assert res.loc["foobar", "baz"] == "sam" + + def test_set_value_with_index_dtype_change(self): + df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) + + # this is actually ambiguous as the 2 is interpreted as a positional + # so column is not created + df = df_orig.copy() + df._set_value("C", 2, 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + df = df_orig.copy() + df.loc["C", 2] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + # create both new + df = df_orig.copy() + df._set_value("C", "D", 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] + + df = df_orig.copy() + df.loc["C", "D"] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py new file mode 100644 index 00000000..e33c6d6a --- /dev/null +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -0,0 +1,1255 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.base import _registry as ea_registry +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Period, + PeriodIndex, + Series, + Timestamp, + cut, + date_range, + notna, + period_range, +) +import pandas._testing as tm +from pandas.core.arrays import SparseArray + +from pandas.tseries.offsets import BDay + + +class TestDataFrameSetItem: + def test_setitem_str_subclass(self): + # GH#37366 + class mystring(str): + pass + + data = ["2020-10-22 01:21:00+00:00"] + index = DatetimeIndex(data) + df = DataFrame({"a": [1]}, index=index) + df["b"] = 2 + df[mystring("c")] = 3 + expected = DataFrame({"a": [1], "b": [2], mystring("c"): [3]}, index=index) + tm.assert_equal(df, expected) + + @pytest.mark.parametrize( + "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"] + ) + def test_setitem_dtype(self, dtype, float_frame): + arr = np.random.randn(len(float_frame)) + + float_frame[dtype] = np.array(arr, dtype=dtype) + assert float_frame[dtype].dtype.name == dtype + + def test_setitem_list_not_dataframe(self, float_frame): + data = np.random.randn(len(float_frame), 2) + float_frame[["A", "B"]] = data + tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + + def test_setitem_error_msmgs(self): + + # GH 7432 + df = DataFrame( + {"bar": [1, 2, 3], "baz": ["d", "e", "f"]}, + index=Index(["a", "b", "c"], name="foo"), + ) + ser = Series( + ["g", "h", "i", "j"], + index=Index(["a", "b", "c", "a"], name="foo"), + name="fiz", + ) + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df["newcol"] = ser + + # GH 4107, more descriptive error message + df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) + + msg = "Cannot set a DataFrame with multiple columns to the single column gr" + with pytest.raises(ValueError, match=msg): + df["gr"] = df.groupby(["b", "c"]).count() + + def test_setitem_benchmark(self): + # from the vb_suite/frame_methods/frame_insert_columns + N = 10 + K = 5 + df = DataFrame(index=range(N)) + new_col = np.random.randn(N) + for i in range(K): + df[i] = new_col + expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) + tm.assert_frame_equal(df, expected) + + def test_setitem_different_dtype(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + df.insert(0, "foo", df["a"]) + df.insert(2, "bar", df["c"]) + + # diff dtype + + # new item + df["x"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 5 + [np.dtype("float32")], + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + # replacing current (in different block) + df["a"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + df["y"] = df["a"].astype("int32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], + index=["foo", "c", "bar", "b", "a", "x", "y"], + ) + tm.assert_series_equal(result, expected) + + def test_setitem_empty_columns(self): + # GH 13522 + df = DataFrame(index=["A", "B", "C"]) + df["X"] = df.index + df["X"] = ["x", "y", "z"] + exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) + tm.assert_frame_equal(df, exp) + + def test_setitem_dt64_index_empty_columns(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + df = DataFrame(index=np.arange(len(rng))) + + df["A"] = rng + assert df["A"].dtype == np.dtype("M8[ns]") + + def test_setitem_timestamp_empty_columns(self): + # GH#19843 + df = DataFrame(index=range(3)) + df["now"] = Timestamp("20130101", tz="UTC") + + expected = DataFrame( + [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_wrong_length_categorical_dtype_raises(self): + # GH#29523 + cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) + df = DataFrame(range(10), columns=["bar"]) + + msg = ( + rf"Length of values \({len(cat)}\) " + rf"does not match length of index \({len(df)}\)" + ) + with pytest.raises(ValueError, match=msg): + df["foo"] = cat + + def test_setitem_with_sparse_value(self): + # GH#8131 + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_array = SparseArray([0, 0, 1]) + df["new_column"] = sp_array + + expected = Series(sp_array, name="new_column") + tm.assert_series_equal(df["new_column"], expected) + + def test_setitem_with_unaligned_sparse_value(self): + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) + + df["new_column"] = sp_series + expected = Series(SparseArray([1, 0, 0]), name="new_column") + tm.assert_series_equal(df["new_column"], expected) + + def test_setitem_period_preserves_dtype(self): + # GH: 26861 + data = [Period("2003-12", "D")] + result = DataFrame([]) + result["a"] = data + + expected = DataFrame({"a": data}) + + tm.assert_frame_equal(result, expected) + + def test_setitem_dict_preserves_dtypes(self): + # https://github.com/pandas-dev/pandas/issues/34573 + expected = DataFrame( + { + "a": Series([0, 1, 2], dtype="int64"), + "b": Series([1, 2, 3], dtype=float), + "c": Series([1, 2, 3], dtype=float), + "d": Series([1, 2, 3], dtype="uint32"), + } + ) + df = DataFrame( + { + "a": Series([], dtype="int64"), + "b": Series([], dtype=float), + "c": Series([], dtype=float), + "d": Series([], dtype="uint32"), + } + ) + for idx, b in enumerate([1, 2, 3]): + df.loc[df.shape[0]] = { + "a": int(idx), + "b": float(b), + "c": float(b), + "d": np.uint32(b), + } + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "obj,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_setitem_extension_types(self, obj, dtype): + # GH: 34832 + expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)}) + + df = DataFrame({"idx": [1, 2, 3]}) + df["obj"] = obj + + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "ea_name", + [ + dtype.name + for dtype in ea_registry.dtypes + # property would require instantiation + if not isinstance(dtype.name, property) + ] + # mypy doesn't allow adding lists of different types + # https://github.com/python/mypy/issues/5492 + + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] + ) + def test_setitem_with_ea_name(self, ea_name): + # GH 38386 + result = DataFrame([0]) + result[ea_name] = [1] + expected = DataFrame({0: [0], ea_name: [1]}) + tm.assert_frame_equal(result, expected) + + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): + # GH#7492 + data_ns = np.array([1, "nat"], dtype="datetime64[ns]") + result = Series(data_ns).to_frame() + result["new"] = data_ns + expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # OutOfBoundsDatetime error shouldn't occur + data_s = np.array([1, "nat"], dtype="datetime64[s]") + result["new"] = data_s + expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into a not-yet-existing column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + assert df[unit].dtype == np.dtype("M8[ns]") + assert (df[unit].values == ex_vals).all() + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_existing_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into an already-existing dt64 column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") + + # We overwrite existing dt64 column with new, non-nano dt64 vals + df["dates"] = vals + assert (df["dates"].values == ex_vals).all() + + def test_setitem_dt64tz(self, timezone_frame): + + df = timezone_frame + idx = df["B"].rename("foo") + + # setitem + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # assert that A & C are not sharing the same base (e.g. they + # are copies) + v1 = df._mgr.arrays[1] + v2 = df._mgr.arrays[2] + tm.assert_extension_array_equal(v1, v2) + v1base = v1._data.base + v2base = v2._data.base + assert v1base is None or (id(v1base) != id(v2base)) + + # with nan + df2 = df.copy() + df2.iloc[1, 1] = NaT + df2.iloc[1, 2] = NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal(df2.dtypes, df.dtypes) + + def test_setitem_periodindex(self): + rng = period_range("1/1/2000", periods=5, name="index") + df = DataFrame(np.random.randn(5, 3), index=rng) + + df["Index"] = rng + rs = Index(df["Index"]) + tm.assert_index_equal(rs, rng, check_names=False) + assert rs.name == "Index" + assert rng.name == "index" + + rs = df.reset_index().set_index("index") + assert isinstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) + + def test_setitem_complete_column_with_array(self): + # GH#37954 + df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) + arr = np.array([[1, 1], [3, 1], [5, 1]]) + df[["c", "d"]] = arr + expected = DataFrame( + { + "a": ["one", "two", "three"], + "b": [1, 2, 3], + "c": [1, 3, 5], + "d": [1, 1, 1], + } + ) + expected["c"] = expected["c"].astype(arr.dtype) + expected["d"] = expected["d"].astype(arr.dtype) + assert expected["c"].dtype == arr.dtype + assert expected["d"].dtype == arr.dtype + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) + def test_setitem_bool_with_numeric_index(self, dtype): + # GH#36319 + cols = Index([1, 2, 3], dtype=dtype) + df = DataFrame(np.random.randn(3, 3), columns=cols) + + df[False] = ["a", "b", "c"] + + expected_cols = Index([1, 2, 3, False], dtype=object) + if dtype == "f8": + expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) + + tm.assert_index_equal(df.columns, expected_cols) + + @pytest.mark.parametrize("indexer", ["B", ["B"]]) + def test_setitem_frame_length_0_str_key(self, indexer): + # GH#38831 + df = DataFrame(columns=["A", "B"]) + other = DataFrame({"B": [1, 2]}) + df[indexer] = other + expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) + expected["A"] = expected["A"].astype("object") + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_duplicate_columns(self, using_array_manager): + # GH#15695 + warn = DeprecationWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + + cols = ["A", "B", "C"] * 2 + df = DataFrame(index=range(3), columns=cols) + df.loc[0, "A"] = (0, 3) + with tm.assert_produces_warning(warn, match=msg): + df.loc[:, "B"] = (1, 4) + df["C"] = (2, 5) + expected = DataFrame( + [ + [0, 1, 2, 3, 4, 5], + [np.nan, 1, 2, np.nan, 4, 5], + [np.nan, 1, 2, np.nan, 4, 5], + ], + dtype="object", + ) + + if using_array_manager: + # setitem replaces column so changes dtype + + expected.columns = cols + expected["C"] = expected["C"].astype("int64") + # TODO(ArrayManager) .loc still overwrites + expected["B"] = expected["B"].astype("int64") + + else: + # set these with unique columns to be extra-unambiguous + expected[2] = expected[2].astype(np.int64) + expected[5] = expected[5].astype(np.int64) + expected.columns = cols + + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_duplicate_columns_size_mismatch(self): + # GH#39510 + cols = ["A", "B", "C"] * 2 + df = DataFrame(index=range(3), columns=cols) + with pytest.raises(ValueError, match="Columns must be same length as key"): + df[["A"]] = (0, 3, 5) + + df2 = df.iloc[:, :3] # unique columns + with pytest.raises(ValueError, match="Columns must be same length as key"): + df2[["A"]] = (0, 3, 5) + + @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) + def test_setitem_df_wrong_column_number(self, cols): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=cols) + rhs = DataFrame([[10, 11]], columns=["d", "e"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df["a"] = rhs + + def test_setitem_listlike_indexer_duplicate_columns(self): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) + df[["a", "b"]] = rhs + expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + + df[["c", "b"]] = rhs + expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) + tm.assert_frame_equal(df, expected) + + def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): + # GH#39403 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11]], columns=["a", "b"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df[["a", "b"]] = rhs + + def test_setitem_intervals(self): + + df = DataFrame({"A": range(10)}) + ser = cut(df["A"], 5) + assert isinstance(ser.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainder are converted to in-line objects + # containing an IntervalIndex.values + df["B"] = ser + df["C"] = np.array(ser) + df["D"] = ser.values + df["E"] = np.array(ser.values) + df["F"] = ser.astype(object) + + assert is_categorical_dtype(df["B"].dtype) + assert is_interval_dtype(df["B"].cat.categories) + assert is_categorical_dtype(df["D"].dtype) + assert is_interval_dtype(df["D"].cat.categories) + + # These go through the Series constructor and so get inferred back + # to IntervalDtype + assert is_interval_dtype(df["C"]) + assert is_interval_dtype(df["E"]) + + # But the Series constructor doesn't do inference on Series objects, + # so setting df["F"] doesn't get cast back to IntervalDtype + assert is_object_dtype(df["F"]) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B)) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.C), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df["B"], df["B"]) + tm.assert_series_equal(df["B"], df["D"], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df["C"], df["C"]) + tm.assert_series_equal(df["C"], df["E"], check_names=False) + + def test_setitem_categorical(self): + # GH#35369 + df = DataFrame({"h": Series(list("mn")).astype("category")}) + df.h = df.h.cat.reorder_categories(["n", "m"]) + expected = DataFrame( + {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_with_empty_listlike(self): + # GH#17101 + index = Index([], name="idx") + result = DataFrame(columns=["A"], index=index) + result["A"] = [] + expected = DataFrame(columns=["A"], index=index) + tm.assert_index_equal(result.index, expected.index) + + @pytest.mark.parametrize( + "cols, values, expected", + [ + (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates + (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order + (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols + (["C", "B", "a"], [1, 2, 3], 3), # no duplicates + (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order + (["C", "a", "B"], [3, 2, 1], 2), # in the middle + ], + ) + def test_setitem_same_column(self, cols, values, expected): + # GH#23239 + df = DataFrame([values], columns=cols) + df["a"] = df["a"] + result = df["a"].values[0] + assert result == expected + + def test_setitem_multi_index(self): + # GH#7655, test that assigning to a sub-frame of a frame + # with multi-index columns aligns both rows and columns + it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] + + cols = MultiIndex.from_product(it) + index = date_range("20141006", periods=20) + vals = np.random.randint(1, 1000, (len(index), len(cols))) + df = DataFrame(vals, columns=cols, index=index) + + i, j = df.index.values.copy(), it[-1][:] + + np.random.shuffle(i) + df["jim"] = df["jolie"].loc[i, ::-1] + tm.assert_frame_equal(df["jim"], df["jolie"]) + + np.random.shuffle(j) + df[("joe", "first")] = df[("jolie", "last")].loc[i, j] + tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) + + np.random.shuffle(j) + df[("joe", "last")] = df[("jolie", "first")].loc[i, j] + tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) + + @pytest.mark.parametrize( + "columns,box,expected", + [ + ( + ["A", "B", "C", "D"], + 7, + DataFrame( + [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "D"], + [7, 8], + DataFrame( + [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "B", "C"], + np.array([7, 8, 9], dtype=np.int64), + DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), + ), + ( + ["B", "C", "D"], + [[7, 8, 9], [10, 11, 12], [13, 14, 15]], + DataFrame( + [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "A", "D"], + np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), + DataFrame( + [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "C"], + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_setitem_list_missing_columns(self, columns, box, expected): + # GH#29334 + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df[columns] = box + tm.assert_frame_equal(df, expected) + + def test_setitem_list_of_tuples(self, float_frame): + tuples = list(zip(float_frame["A"], float_frame["B"])) + float_frame["tuples"] = tuples + + result = float_frame["tuples"] + expected = Series(tuples, index=float_frame.index, name="tuples") + tm.assert_series_equal(result, expected) + + def test_setitem_iloc_generator(self): + # GH#39614 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer] = 1 + expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_iloc_two_dimensional_generator(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + indexer = (x for x in [1, 2]) + df.iloc[indexer, 1] = 1 + expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_dtypes_bytes_type_to_object(self): + # GH 20734 + index = Series(name="id", dtype="S24") + df = DataFrame(index=index) + df["a"] = Series(name="a", index=index, dtype=np.uint32) + df["b"] = Series(name="b", index=index, dtype="S64") + df["c"] = Series(name="c", index=index, dtype="S64") + df["d"] = Series(name="d", index=index, dtype=np.uint8) + result = df.dtypes + expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) + tm.assert_series_equal(result, expected) + + def test_boolean_mask_nullable_int64(self): + # GH 28928 + result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + mask = Series(False, index=result.index) + result.loc[mask, "a"] = result["a"] + result.loc[mask, "b"] = result["b"] + expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + tm.assert_frame_equal(result, expected) + + def test_setitem_ea_dtype_rhs_series(self): + # GH#47425 + df = DataFrame({"a": [1, 2]}) + df["a"] = Series([1, 2], dtype="Int64") + expected = DataFrame({"a": [1, 2]}, dtype="Int64") + tm.assert_frame_equal(df, expected) + + # TODO(ArrayManager) set column with 2d column array, see #44788 + @td.skip_array_manager_not_yet_implemented + def test_setitem_npmatrix_2d(self): + # GH#42376 + # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) + expected = DataFrame( + {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) + ) + + a = np.ones((10, 1)) + df = DataFrame(index=np.arange(10)) + df["np-array"] = a + + # Instantiation of `np.matrix` gives PendingDeprecationWarning + with tm.assert_produces_warning(PendingDeprecationWarning): + df["np-matrix"] = np.matrix(a) + + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) + def test_setitem_aligning_dict_with_index(self, vals): + # GH#47216 + df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) + df.loc[:, "a"] = {1: 100, 0: 200} + df.loc[:, "c"] = {0: 5, 1: 6} + df.loc[:, "e"] = {1: 5} + expected = DataFrame( + {"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5]} + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_rhs_dataframe(self): + # GH#47578 + df = DataFrame({"a": [1, 2]}) + df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2]) + expected = DataFrame({"a": [np.nan, 10]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2]}) + df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype): + # GH#46896 + df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]]) + df["a"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype) + expected = DataFrame( + { + "a": Series([10, 11], dtype=any_numeric_ea_dtype), + "b": [2, 4], + } + ) + tm.assert_frame_equal(df, expected) + + +class TestSetitemTZAwareValues: + @pytest.fixture + def idx(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + return idx + + @pytest.fixture + def expected(self, idx): + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + return expected + + def test_setitem_dt64series(self, idx, expected): + # convert to utc + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df["B"] = idx + + with tm.assert_produces_warning(FutureWarning) as m: + df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + result = df["B"] + comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + + def test_setitem_datetimeindex(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # assign to frame + df["B"] = idx + result = df["B"] + tm.assert_series_equal(result, expected) + + def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # object array of datetimes with a tz + df["B"] = idx.to_pydatetime() + result = df["B"] + tm.assert_series_equal(result, expected) + + +class TestDataFrameSetItemWithExpansion: + def test_setitem_listlike_views(self, using_copy_on_write): + # GH#38148 + df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) + + # get one column as a view of df + ser = df["a"] + + # add columns with list-like indexer + df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]]) + + # edit in place the first column to check view semantics + df.iloc[0, 0] = 100 + + if using_copy_on_write: + expected = Series([1, 2, 3], name="a") + else: + expected = Series([100, 2, 3], name="a") + tm.assert_series_equal(ser, expected) + + def test_setitem_string_column_numpy_dtype_raising(self): + # GH#39010 + df = DataFrame([[1, 2], [3, 4]]) + df["0 - Name"] = [5, 6] + expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) + tm.assert_frame_equal(df, expected) + + def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): + # GH#38521 + df = DataFrame(columns=["a", "b", "b"], dtype="float64") + df.loc[:, "a"] = list(range(2)) + expected = DataFrame( + [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_with_expansion_categorical_dtype(self): + # assignment + df = DataFrame( + {"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")} + ) + labels = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + + df = df.sort_values(by=["value"], ascending=True) + ser = cut(df.value, range(0, 10500, 500), right=False, labels=labels) + cat = ser.values + + # setting with a Categorical + df["D"] = cat + str(df) + + result = df.dtypes + expected = Series( + [np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)], + index=["value", "D"], + ) + tm.assert_series_equal(result, expected) + + # setting with a Series + df["E"] = ser + str(df) + + result = df.dtypes + expected = Series( + [ + np.dtype("int32"), + CategoricalDtype(categories=labels, ordered=False), + CategoricalDtype(categories=labels, ordered=False), + ], + index=["value", "D", "E"], + ) + tm.assert_series_equal(result, expected) + + result1 = df["D"] + result2 = df["E"] + tm.assert_categorical_equal(result1._mgr.array, cat) + + # sorting + ser.name = "E" + tm.assert_series_equal(result2.sort_index(), ser.sort_index()) + + def test_setitem_scalars_no_index(self): + # GH#16823 / GH#17894 + df = DataFrame() + df["foo"] = 1 + expected = DataFrame(columns=["foo"]).astype(np.int64) + tm.assert_frame_equal(df, expected) + + def test_setitem_newcol_tuple_key(self, float_frame): + assert ( + "A", + "B", + ) not in float_frame.columns + float_frame["A", "B"] = float_frame["A"] + assert ("A", "B") in float_frame.columns + + result = float_frame["A", "B"] + expected = float_frame["A"] + tm.assert_series_equal(result, expected, check_names=False) + + def test_frame_setitem_newcol_timestamp(self): + # GH#2155 + columns = date_range(start="1/1/2012", end="2/1/2012", freq=BDay()) + data = DataFrame(columns=columns, index=range(10)) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works, mostly a smoke-test + assert np.isnan(data[ts]).all() + + def test_frame_setitem_rangeindex_into_new_col(self): + # GH#47128 + df = DataFrame({"a": ["a", "b"]}) + df["b"] = df.index + df.loc[[False, True], "b"] = 100 + result = df.loc[[1], :] + expected = DataFrame({"a": ["b"], "b": [100]}, index=[1]) + tm.assert_frame_equal(result, expected) + + def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype): + # GH#46896 + df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]]) + df["c"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype) + expected = DataFrame( + { + "a": [1, 3], + "b": [2, 4], + "c": Series([10, 11], dtype=any_numeric_ea_dtype), + } + ) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemSlicing: + def test_setitem_slice_position(self): + # GH#31469 + df = DataFrame(np.zeros((100, 1))) + df[-4:] = 1 + arr = np.zeros((100, 1)) + arr[-4:] = 1 + expected = DataFrame(arr) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): + # GH#40440 + df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) + indexer(df)[1:] = box([10, 11, 12]) + expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_list_indexer_broadcasting_rhs(self, n, box): + # GH#40440 + df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) + df.iloc[list(range(1, n + 1))] = box([10, 11, 12]) + expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) + @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) + @pytest.mark.parametrize("n", [1, 2, 3]) + def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): + # GH#40440 + df = DataFrame( + [[1, 3, 5], ["x", "y", "z"]] + [[2, 4, 6]] * n, columns=["a", "b", "c"] + ) + indexer(df)[1:] = box([10, 11, 12]) + expected = DataFrame( + [[1, 3, 5]] + [[10, 11, 12]] * (n + 1), + columns=["a", "b", "c"], + dtype="object", + ) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemCallable: + def test_setitem_callable(self): + # GH#12533 + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df[lambda x: "A"] = [11, 12, 13, 14] + + exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + + def test_setitem_other_callable(self): + # GH#13299 + def inc(x): + return x + 1 + + df = DataFrame([[-1, 1], [1, -1]]) + df[df > 0] = inc + + expected = DataFrame([[-1, inc], [inc, -1]]) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemBooleanMask: + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], + ids=["dataframe", "array"], + ) + def test_setitem_boolean_mask(self, mask_type, float_frame): + + # Test for issue #18582 + df = float_frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="Currently empty indexers are treated as all False") + @pytest.mark.parametrize("box", [list, np.array, Series]) + def test_setitem_loc_empty_indexer_raises_with_non_empty_value(self, box): + # GH#37672 + df = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + if box == Series: + indexer = box([], dtype="object") + else: + indexer = box([]) + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[indexer, ["b"]] = [1] + + @pytest.mark.parametrize("box", [list, np.array, Series]) + def test_setitem_loc_only_false_indexer_dtype_changed(self, box): + # GH#37550 + # Dtype is only changed when value to set is a Series and indexer is + # empty/bool all False + df = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + indexer = box([False]) + df.loc[indexer, ["b"]] = 10 - df["c"] + expected = DataFrame({"a": ["a"], "b": [1], "c": [1]}) + tm.assert_frame_equal(df, expected) + + df.loc[indexer, ["b"]] = 9 + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) + def test_setitem_boolean_mask_aligning(self, indexer): + # GH#39931 + df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) + expected = df.copy() + mask = df["a"] >= 3 + indexer(df)[mask] = indexer(df)[mask].sort_values("a") + tm.assert_frame_equal(df, expected) + + def test_setitem_mask_categorical(self): + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + catsf = Categorical( + ["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"] + ) + idxf = Index(["h", "i", "j", "k", "l", "m", "n"]) + valuesf = [1, 1, 3, 3, 1, 1, 1] + df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) + + exp_fancy = exp_multi_row.copy() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # issue #37643 inplace kwarg deprecated + return_value = exp_fancy["cats"].cat.set_categories( + ["a", "b", "c"], inplace=True + ) + assert return_value is None + + mask = df["cats"] == "c" + df[mask] = ["b", 2] + # category c is kept in .categories + tm.assert_frame_equal(df, exp_fancy) + + @pytest.mark.parametrize("dtype", ["float", "int64"]) + @pytest.mark.parametrize("kwargs", [{}, {"index": [1]}, {"columns": ["A"]}]) + def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): + # see GH#10126 + kwargs["dtype"] = dtype + df = DataFrame(**kwargs) + + df2 = df.copy() + df[df > df2] = 47 + tm.assert_frame_equal(df, df2) + + def test_setitem_boolean_indexing(self): + idx = list(range(3)) + cols = ["A", "B", "C"] + df1 = DataFrame( + index=idx, + columns=cols, + data=np.array( + [[0.0, 0.5, 1.0], [1.5, 2.0, 2.5], [3.0, 3.5, 4.0]], dtype=float + ), + ) + df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols)))) + + expected = DataFrame( + index=idx, + columns=cols, + data=np.array([[0.0, 0.5, 1.0], [1.5, 2.0, -1], [-1, -1, -1]], dtype=float), + ) + + df1[df1 > 2.0 * df2] = -1 + tm.assert_frame_equal(df1, expected) + with pytest.raises(ValueError, match="Item wrong length"): + df1[df1.index[:-1] > 2] = -1 + + def test_loc_setitem_all_false_boolean_two_blocks(self): + # GH#40885 + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": "a"}) + expected = df.copy() + indexer = Series([False, False], name="c") + df.loc[indexer, ["b"]] = DataFrame({"b": [5, 6]}, index=[0, 1]) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetitemCopyViewSemantics: + def test_setitem_always_copy(self, float_frame): + assert "E" not in float_frame.columns + s = float_frame["A"].copy() + float_frame["E"] = s + + float_frame["E"][5:10] = np.nan + assert notna(s[5:10]).all() + + @pytest.mark.parametrize("consolidate", [True, False]) + def test_setitem_partial_column_inplace( + self, consolidate, using_array_manager, using_copy_on_write + ): + # This setting should be in-place, regardless of whether frame is + # single-block or multi-block + # GH#304 this used to be incorrectly not-inplace, in which case + # we needed to ensure _item_cache was cleared. + + df = DataFrame( + {"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3] + ) + df.insert(2, "z", np.nan) + if not using_array_manager: + if consolidate: + df._consolidate_inplace() + assert len(df._mgr.blocks) == 1 + else: + assert len(df._mgr.blocks) == 2 + + zvals = df["z"]._values + + df.loc[2:, "z"] = 42 + + expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") + tm.assert_series_equal(df["z"], expected) + + # check setting occurred in-place + if not using_copy_on_write: + tm.assert_numpy_array_equal(zvals, expected.values) + assert np.shares_memory(zvals, df["z"]._values) + + def test_setitem_duplicate_columns_not_inplace(self): + # GH#39510 + cols = ["A", "B"] * 2 + df = DataFrame(0.0, index=[0], columns=cols) + df_copy = df.copy() + df_view = df[:] + df["B"] = (2, 5) + + expected = DataFrame([[0.0, 2, 0.0, 5]], columns=cols) + tm.assert_frame_equal(df_view, df_copy) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]] + ) + def test_setitem_same_dtype_not_inplace(self, value, using_array_manager): + # GH#39510 + cols = ["A", "B"] + df = DataFrame(0, index=[0, 1], columns=cols) + df_copy = df.copy() + df_view = df[:] + df[["B"]] = value + + expected = DataFrame([[0, 1], [0, 1]], columns=cols) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df_view, df_copy) + + @pytest.mark.parametrize("value", [1.0, np.array([[1.0], [1.0]]), [[1.0], [1.0]]]) + def test_setitem_listlike_key_scalar_value_not_inplace(self, value): + # GH#39510 + cols = ["A", "B"] + df = DataFrame(0, index=[0, 1], columns=cols) + df_copy = df.copy() + df_view = df[:] + df[["B"]] = value + + expected = DataFrame([[0, 1.0], [0, 1.0]], columns=cols) + tm.assert_frame_equal(df_view, df_copy) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", + [ + "a", + ["a"], + pytest.param( + [True, False], + marks=pytest.mark.xfail( + reason="Boolean indexer incorrectly setting inplace", + strict=False, # passing on some builds, no obvious pattern + ), + ), + ], + ) + @pytest.mark.parametrize( + "value, set_value", + [ + (1, 5), + (1.0, 5.0), + (Timestamp("2020-12-31"), Timestamp("2021-12-31")), + ("a", "b"), + ], + ) + def test_setitem_not_operating_inplace(self, value, set_value, indexer): + # GH#43406 + df = DataFrame({"a": value}, index=[0, 1]) + expected = df.copy() + view = df[:] + df[indexer] = set_value + tm.assert_frame_equal(view, expected) + + @td.skip_array_manager_invalid_test + def test_setitem_column_update_inplace(self, using_copy_on_write): + # https://github.com/pandas-dev/pandas/issues/47172 + + labels = [f"c{i}" for i in range(10)] + df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) + values = df._mgr.blocks[0].values + + for label in df.columns: + df[label][label] = 1 + + if not using_copy_on_write: + # diagonal values all updated + assert np.all(values[np.arange(10), np.arange(10)] == 1) + else: + # original dataframe not updated + assert np.all(values[np.arange(10), np.arange(10)] == 0) diff --git a/pandas/tests/frame/indexing/test_take.py b/pandas/tests/frame/indexing/test_take.py new file mode 100644 index 00000000..3b59d3cf --- /dev/null +++ b/pandas/tests/frame/indexing/test_take.py @@ -0,0 +1,88 @@ +import pytest + +import pandas._testing as tm + + +class TestDataFrameTake: + def test_take(self, float_frame): + # homogeneous + order = [3, 1, 2, 0] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["D", "B", "C", "A"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # negative indices + order = [2, 1, -1] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + result = df.take(order, axis=0) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["C", "B", "D"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # illegal indices + msg = "indices are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 30], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -31], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 5], axis=1) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -5], axis=1) + + def test_take_mixed_type(self, float_string_frame): + + # mixed-dtype + order = [4, 1, 2, 0, 3] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) + + # negative indices + order = [4, 1, -2] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "D"]] + tm.assert_frame_equal(result, expected) + + def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame): + # by dtype + order = [1, 2, 0, 3] + for df in [mixed_float_frame, mixed_int_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py new file mode 100644 index 00000000..c7e0a10c --- /dev/null +++ b/pandas/tests/frame/indexing/test_where.py @@ -0,0 +1,1058 @@ +from datetime import datetime + +from hypothesis import given +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + StringDtype, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm +from pandas._testing._hypothesis import OPTIONAL_ONE_OF_ALL + + +@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"]) +def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): + if request.param == "default": + return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + if request.param == "float_string": + return float_string_frame + if request.param == "mixed_float": + return mixed_float_frame + if request.param == "mixed_int": + return mixed_int_frame + + +def _safe_add(df): + # only add to the numeric items + def is_ok(s): + return ( + issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != "uint8" + ) + + return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items())) + + +class TestDataFrameIndexingWhere: + def test_where_get(self, where_frame, float_string_frame): + def _check_get(df, cond, check_dtypes=True): + other1 = _safe_add(df) + rs = df.where(cond, other1) + rs2 = df.where(cond.values, other1) + for k, v in rs.items(): + exp = Series(np.where(cond[k], df[k], other1[k]), index=v.index) + tm.assert_series_equal(v, exp, check_names=False) + tm.assert_frame_equal(rs, rs2) + + # dtypes + if check_dtypes: + assert (rs.dtypes == df.dtypes).all() + + # check getting + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + cond = df > 0 + _check_get(df, cond) + + def test_where_upcasting(self): + # upcasting case (GH # 2794) + df = DataFrame( + { + c: Series([1] * 3, dtype=c) + for c in ["float32", "float64", "int32", "int64"] + } + ) + df.iloc[1, :] = 0 + result = df.dtypes + expected = Series( + [ + np.dtype("float32"), + np.dtype("float64"), + np.dtype("int32"), + np.dtype("int64"), + ], + index=["float32", "float64", "int32", "int64"], + ) + + # when we don't preserve boolean casts + # + # expected = Series({ 'float32' : 1, 'float64' : 3 }) + + tm.assert_series_equal(result, expected) + + def test_where_alignment(self, where_frame, float_string_frame): + # aligning + def _check_align(df, cond, other, check_dtypes=True): + rs = df.where(cond, other) + for i, k in enumerate(rs.columns): + result = rs[k] + d = df[k].values + c = cond[k].reindex(df[k].index).fillna(False).values + + if is_scalar(other): + o = other + else: + if isinstance(other, np.ndarray): + o = Series(other[:, i], index=result.index).values + else: + o = other[k].values + + new_values = d if c.all() else np.where(c, d, o) + expected = Series(new_values, index=result.index, name=k) + + # since we can't always have the correct numpy dtype + # as numpy doesn't know how to downcast, don't check + tm.assert_series_equal(result, expected, check_dtype=False) + + # dtypes + # can't check dtype when other is an ndarray + + if check_dtypes and not isinstance(other, np.ndarray): + assert (rs.dtypes == df.dtypes).all() + + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + + # other is a frame + cond = (df > 0)[1:] + _check_align(df, cond, _safe_add(df)) + + # check other is ndarray + cond = df > 0 + _check_align(df, cond, (_safe_add(df).values)) + + # integers are upcast, so don't check the dtypes + cond = df > 0 + check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) + _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + + def test_where_invalid(self): + # invalid conditions + df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + cond = df > 0 + + err1 = (df + 1).values[0:2, :] + msg = "other must be the same shape as self when an ndarray" + with pytest.raises(ValueError, match=msg): + df.where(cond, err1) + + err2 = cond.iloc[:2, :].values + other1 = _safe_add(df) + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + df.where(err2, other1) + + with pytest.raises(ValueError, match=msg): + df.mask(True) + with pytest.raises(ValueError, match=msg): + df.mask(0) + + def test_where_set(self, where_frame, float_string_frame): + # where inplace + + def _check_set(df, cond, check_dtypes=True): + dfi = df.copy() + econd = cond.reindex_like(df).fillna(True) + expected = dfi.mask(~econd) + + return_value = dfi.where(cond, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfi, expected) + + # dtypes (and confirm upcasts)x + if check_dtypes: + for k, v in df.dtypes.items(): + if issubclass(v.type, np.integer) and not cond[k].all(): + v = np.dtype("float64") + assert dfi[k].dtype == v + + df = where_frame + if df is float_string_frame: + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + df > 0 + return + + cond = df > 0 + _check_set(df, cond) + + cond = df >= 0 + _check_set(df, cond) + + # aligning + cond = (df >= 0)[1:] + _check_set(df, cond) + + def test_where_series_slicing(self): + # GH 10218 + # test DataFrame.where with Series slicing + df = DataFrame({"a": range(3), "b": range(4, 7)}) + result = df.where(df["a"] == 1) + expected = df[df["a"] == 1].reindex(df.index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array]) + def test_where_array_like(self, klass): + # see gh-15414 + df = DataFrame({"a": [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({"a": [np.nan, 2, 3]}) + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + df["b"] = 2 + expected["b"] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "cond", + [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({"a": [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], [pd.NaT], [Timestamp("2017-01-02")]], + ], + ) + def test_where_invalid_input_single(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + @pytest.mark.parametrize( + "cond", + [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], ["True", "True"]], + DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}), + [ + [pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")], + ], + ], + ) + def test_where_invalid_input_multiple(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + result = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(result, expected) + + # this *does* align, though has no matching columns + cond.columns = ["a", "b", "c"] + result = df.where(cond) + expected = DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + def test_where_bug(self): + # see gh-2793 + df = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [4.0, 3.0, 2.0, 1.0]}, dtype="float64" + ) + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(result > 2, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_bug_mixed(self, any_signed_int_numpy_dtype): + # see gh-2793 + df = DataFrame( + { + "a": np.array([1, 2, 3, 4], dtype=any_signed_int_numpy_dtype), + "b": np.array([4.0, 3.0, 2.0, 1.0], dtype="float64"), + } + ) + + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(result > 2, np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_bug_transposition(self): + # see gh-7506 + a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) + b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + a = DataFrame({0: [4, 6], 1: [1, 0]}) + b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + def test_where_datetime(self, using_array_manager): + + # GH 3311 + df = DataFrame( + { + "A": date_range("20130102", periods=5), + "B": date_range("20130104", periods=5), + "C": np.random.randn(5), + } + ) + + stamp = datetime(2013, 1, 3) + msg = "'>' not supported between instances of 'float' and 'datetime.datetime'" + with pytest.raises(TypeError, match=msg): + df > stamp + + result = df[df.iloc[:, :-1] > stamp] + + expected = df.copy() + expected.loc[[0, 1], "A"] = np.nan + + warn = DeprecationWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + expected.loc[:, "C"] = np.nan + tm.assert_frame_equal(result, expected) + + def test_where_none(self): + # GH 4667 + # setting with None changes dtype + df = DataFrame({"series": Series(range(10))}).astype(float) + df[df > 7] = None + expected = DataFrame( + {"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])} + ) + tm.assert_frame_equal(df, expected) + + # GH 7656 + df = DataFrame( + [ + {"A": 1, "B": np.nan, "C": "Test"}, + {"A": np.nan, "B": "Test", "C": np.nan}, + ] + ) + msg = "boolean setting on mixed-type" + + with pytest.raises(TypeError, match=msg): + df.where(~isna(df), None, inplace=True) + + def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): + # see gh-21947 + df = DataFrame(columns=["a"]) + cond = df + assert (cond.dtypes == object).all() + + result = df.where(cond) + tm.assert_frame_equal(result, df) + + def test_where_align(self): + def create(): + df = DataFrame(np.random.randn(10, 3)) + df.iloc[3:5, 0] = np.nan + df.iloc[4:6, 1] = np.nan + df.iloc[5:8, 2] = np.nan + return df + + # series + df = create() + expected = df.fillna(df.mean()) + result = df.where(pd.notna(df), df.mean(), axis="columns") + tm.assert_frame_equal(result, expected) + + return_value = df.where(pd.notna(df), df.mean(), inplace=True, axis="columns") + assert return_value is None + tm.assert_frame_equal(df, expected) + + df = create().fillna(0) + expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0]) + result = df.where(df > 0, df[0], axis="index") + tm.assert_frame_equal(result, expected) + result = df.where(df > 0, df[0], axis="rows") + tm.assert_frame_equal(result, expected) + + # frame + df = create() + expected = df.fillna(1) + result = df.where( + pd.notna(df), DataFrame(1, index=df.index, columns=df.columns) + ) + tm.assert_frame_equal(result, expected) + + def test_where_complex(self): + # GH 6345 + expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"]) + df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"]) + df[df.abs() >= 5] = np.nan + tm.assert_frame_equal(df, expected) + + def test_where_axis(self, using_array_manager): + # GH 9736 + df = DataFrame(np.random.randn(2, 2)) + mask = DataFrame([[False, False], [False, False]]) + ser = Series([0, 1]) + + expected = DataFrame([[0, 0], [1, 1]], dtype="float64") + result = df.where(mask, ser, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, ser, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0, 1], [0, 1]], dtype="float64") + result = df.where(mask, ser, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, ser, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_axis_with_upcast(self): + # Upcast needed + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + mask = DataFrame([[False, False], [False, False]]) + ser = Series([0, np.nan]) + + expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64") + result = df.where(mask, ser, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, ser, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0, np.nan], [0, np.nan]]) + result = df.where(mask, ser, axis="columns") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + 0: np.array([0, 0], dtype="int64"), + 1: np.array([np.nan, np.nan], dtype="float64"), + } + ) + result = df.copy() + return_value = result.where(mask, ser, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_axis_multiple_dtypes(self, using_array_manager): + # Multiple dtypes (=> multiple Blocks) + df = pd.concat( + [ + DataFrame(np.random.randn(10, 2)), + DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"), + ], + ignore_index=True, + axis=1, + ) + mask = DataFrame(False, columns=df.columns, index=df.index) + s1 = Series(1, index=df.columns) + s2 = Series(2, index=df.index) + + result = df.where(mask, s1, axis="columns") + expected = DataFrame(1.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s1, axis="columns", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + result = df.where(mask, s2, axis="index") + expected = DataFrame(2.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result.where(mask, s2, axis="index", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + # DataFrame vs DataFrame + d1 = df.copy().drop(1, axis=0) + expected = df.copy() + expected.loc[1, :] = np.nan + + result = df.where(mask, d1) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d1, axis="index") + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d1, inplace=True, axis="index") + assert return_value is None + tm.assert_frame_equal(result, expected) + + d2 = df.copy().drop(1, axis=1) + expected = df.copy() + warn = DeprecationWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + expected.loc[:, 1] = np.nan + + result = df.where(mask, d2) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d2, axis="columns") + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d2, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + result = df.copy() + return_value = result.where(mask, d2, inplace=True, axis="columns") + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_where_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.where(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df > 4, df + 1)) + + # return ndarray and scalar + result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df % 2 == 0, 99)) + + # chain + result = (df + 2).where(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).where((df + 2) > 8, (df + 2) + 10)) + + def test_where_tz_values(self, tz_naive_fixture, frame_or_series): + obj1 = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + columns=["date"], + ) + obj2 = DataFrame( + DatetimeIndex(["20150103", "20150104", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + mask = DataFrame([True, True, False], columns=["date"]) + exp = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + if frame_or_series is Series: + obj1 = obj1["date"] + obj2 = obj2["date"] + mask = mask["date"] + exp = exp["date"] + + result = obj1.where(mask, obj2) + tm.assert_equal(exp, result) + + def test_df_where_change_dtype(self): + # GH#16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC") + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [{}, {"other": None}]) + def test_df_where_with_category(self, kwargs): + # GH#16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.where(mask, **kwargs) + A = pd.Categorical([0, np.nan], categories=[0, 3]) + B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) + C = pd.Categorical([np.nan, 5], categories=[2, 5]) + expected = DataFrame({"A": A, "B": B, "C": C}) + + tm.assert_frame_equal(result, expected) + + # Check Series.where while we're here + result = df.A.where(mask[:, 0], **kwargs) + expected = Series(A, name="A") + + tm.assert_series_equal(result, expected) + + def test_where_categorical_filtering(self): + # GH#22609 Verify filtering operations on DataFrames with categorical Series + df = DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) + df["b"] = df["b"].astype("category") + + result = df.where(df["a"] > 0) + expected = df.copy() + expected.loc[0, :] = np.nan + + tm.assert_equal(result, expected) + + def test_where_ea_other(self): + # GH#38729/GH#38742 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + arr = pd.array([7, pd.NA, 9]) + ser = Series(arr) + mask = np.ones(df.shape, dtype=bool) + mask[1, :] = False + + # TODO: ideally we would get Int64 instead of object + result = df.where(mask, ser, axis=0) + expected = DataFrame({"A": [1, pd.NA, 3], "B": [4, pd.NA, 6]}).astype(object) + tm.assert_frame_equal(result, expected) + + ser2 = Series(arr[:2], index=["A", "B"]) + expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]}) + expected["B"] = expected["B"].astype(object) + result = df.where(mask, ser2, axis=1) + tm.assert_frame_equal(result, expected) + + def test_where_interval_noop(self): + # GH#44181 + df = DataFrame([pd.Interval(0, 0)]) + res = df.where(df.notna()) + tm.assert_frame_equal(res, df) + + ser = df[0] + res = ser.where(ser.notna()) + tm.assert_series_equal(res, ser) + + def test_where_interval_fullop_downcast(self, frame_or_series): + # GH#45768 + obj = frame_or_series([pd.Interval(0, 0)] * 2) + other = frame_or_series([1.0, 2.0]) + res = obj.where(~obj.notna(), other) + + # since all entries are being changed, we will downcast result + # from object to ints (not floats) + tm.assert_equal(res, other.astype(np.int64)) + + # unlike where, Block.putmask does not downcast + obj.mask(obj.notna(), other, inplace=True) + tm.assert_equal(obj, other.astype(object)) + + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[ns]", + "datetime64[ns]", + "datetime64[ns, Asia/Tokyo]", + "Period[D]", + ], + ) + def test_where_datetimelike_noop(self, dtype): + # GH#45135, analogue to GH#44181 for Period don't raise on no-op + # For td64/dt64/dt64tz we already don't raise, but also are + # checking that we don't unnecessarily upcast to object. + ser = Series(np.arange(3) * 10**9, dtype=np.int64).view(dtype) + df = ser.to_frame() + mask = np.array([False, False, False]) + + res = ser.where(~mask, "foo") + tm.assert_series_equal(res, ser) + + mask2 = mask.reshape(-1, 1) + res2 = df.where(~mask2, "foo") + tm.assert_frame_equal(res2, df) + + res3 = ser.mask(mask, "foo") + tm.assert_series_equal(res3, ser) + + res4 = df.mask(mask2, "foo") + tm.assert_frame_equal(res4, df) + + # opposite case where we are replacing *all* values -> we downcast + # from object dtype # GH#45768 + res5 = df.where(mask2, 4) + expected = DataFrame(4, index=df.index, columns=df.columns) + tm.assert_frame_equal(res5, expected) + + # unlike where, Block.putmask does not downcast + df.mask(~mask2, 4, inplace=True) + tm.assert_frame_equal(df, expected.astype(object)) + + +def test_where_try_cast_deprecated(frame_or_series): + obj = DataFrame(np.random.randn(4, 3)) + obj = tm.get_obj(obj, frame_or_series) + + mask = obj > 0 + + with tm.assert_produces_warning(FutureWarning): + # try_cast keyword deprecated + obj.where(mask, -1, try_cast=False) + + +def test_where_int_downcasting_deprecated(): + # GH#44597 + arr = np.arange(6).astype(np.int16).reshape(3, 2) + df = DataFrame(arr) + + mask = np.zeros(arr.shape, dtype=bool) + mask[:, 0] = True + + res = df.where(mask, 2**17) + + expected = DataFrame({0: arr[:, 0], 1: np.array([2**17] * 3, dtype=np.int32)}) + tm.assert_frame_equal(res, expected) + + +def test_where_copies_with_noop(frame_or_series): + # GH-39595 + result = frame_or_series([1, 2, 3, 4]) + expected = result.copy() + col = result[0] if frame_or_series is DataFrame else result + + where_res = result.where(col < 5) + where_res *= 2 + + tm.assert_equal(result, expected) + + where_res = result.where(col > 5, [1, 2, 3, 4]) + where_res *= 2 + + tm.assert_equal(result, expected) + + +def test_where_string_dtype(frame_or_series): + # GH40824 + obj = frame_or_series( + ["a", "b", "c", "d"], index=["id1", "id2", "id3", "id4"], dtype=StringDtype() + ) + filtered_obj = frame_or_series( + ["b", "c"], index=["id2", "id3"], dtype=StringDtype() + ) + filter_ser = Series([False, True, True, False]) + + result = obj.where(filter_ser, filtered_obj) + expected = frame_or_series( + [pd.NA, "b", "c", pd.NA], + index=["id1", "id2", "id3", "id4"], + dtype=StringDtype(), + ) + tm.assert_equal(result, expected) + + result = obj.mask(~filter_ser, filtered_obj) + tm.assert_equal(result, expected) + + obj.mask(~filter_ser, filtered_obj, inplace=True) + tm.assert_equal(result, expected) + + +def test_where_bool_comparison(): + # GH 10336 + df_mask = DataFrame( + {"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False, True, False]} + ) + result = df_mask.where(df_mask == False) # noqa:E712 + expected = DataFrame( + { + "AAA": np.array([np.nan] * 4, dtype=object), + "BBB": [False] * 4, + "CCC": [np.nan, False, np.nan, False], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_where_none_nan_coerce(): + # GH 15613 + expected = DataFrame( + { + "A": [Timestamp("20130101"), pd.NaT, Timestamp("20130103")], + "B": [1, 2, np.nan], + } + ) + result = expected.where(expected.notnull(), None) + tm.assert_frame_equal(result, expected) + + +def test_where_duplicate_axes_mixed_dtypes(): + # GH 25399, verify manually masking is not affected anymore by dtype of column for + # duplicate axes. + result = DataFrame(data=[[0, np.nan]], columns=Index(["A", "A"])) + index, columns = result.axes + mask = DataFrame(data=[[True, True]], columns=columns, index=index) + a = result.astype(object).where(mask) + b = result.astype("f8").where(mask) + c = result.T.where(mask.T).T + d = result.where(mask) # used to fail with "cannot reindex from a duplicate axis" + tm.assert_frame_equal(a.astype("f8"), b.astype("f8")) + tm.assert_frame_equal(b.astype("f8"), c.astype("f8")) + tm.assert_frame_equal(c.astype("f8"), d.astype("f8")) + + +def test_where_non_keyword_deprecation(frame_or_series): + # GH 41485 + obj = frame_or_series(range(5)) + msg = ( + "In a future version of pandas all arguments of " + f"{frame_or_series.__name__}.where except for the arguments 'cond' " + "and 'other' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.where(obj > 1, 10, False) + expected = frame_or_series([10, 10, 2, 3, 4]) + tm.assert_equal(expected, result) + + +def test_where_columns_casting(): + # GH 42295 + + df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]}) + expected = df.copy() + result = df.where(pd.notnull(df), None) + # make sure dtypes don't change + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("as_cat", [True, False]) +def test_where_period_invalid_na(frame_or_series, as_cat, request): + # GH#44697 + idx = pd.period_range("2016-01-01", periods=3, freq="D") + if as_cat: + idx = idx.astype("category") + obj = frame_or_series(idx) + + # NA value that we should *not* cast to Period dtype + tdnat = pd.NaT.to_numpy("m8[ns]") + + mask = np.array([True, True, False], ndmin=obj.ndim).T + + if as_cat: + msg = ( + r"Cannot setitem on a Categorical with a new category \(NaT\), " + "set the categories first" + ) + else: + msg = "value should be a 'Period'" + + if as_cat: + with pytest.raises(TypeError, match=msg): + obj.where(mask, tdnat) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, tdnat) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, tdnat, inplace=True) + + else: + # With PeriodDtype, ser[i] = tdnat coerces instead of raising, + # so for consistency, ser[mask] = tdnat must as well + expected = obj.astype(object).where(mask, tdnat) + result = obj.where(mask, tdnat) + tm.assert_equal(result, expected) + + expected = obj.astype(object).mask(mask, tdnat) + result = obj.mask(mask, tdnat) + tm.assert_equal(result, expected) + + obj.mask(mask, tdnat, inplace=True) + tm.assert_equal(obj, expected) + + +def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): + # GH#44697 + arr = pd.array([1, 2, 3], dtype=any_numeric_ea_dtype) + obj = frame_or_series(arr) + + mask = np.array([True, True, False], ndmin=obj.ndim).T + + msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}" + + for null in tm.NP_NAT_OBJECTS + [pd.NaT]: + # NaT is an NA value that we should *not* cast to pd.NA dtype + with pytest.raises(TypeError, match=msg): + obj.where(mask, null) + + with pytest.raises(TypeError, match=msg): + obj.mask(mask, null) + + +@given(data=OPTIONAL_ONE_OF_ALL) +def test_where_inplace_casting(data): + # GH 22051 + df = DataFrame({"a": data}) + df_copy = df.where(pd.notnull(df), None).copy() + df.where(pd.notnull(df), None, inplace=True) + tm.assert_equal(df, df_copy) + + +def test_where_downcast_to_td64(): + ser = Series([1, 2, 3]) + + mask = np.array([False, False, False]) + + td = pd.Timedelta(days=1) + + res = ser.where(mask, td) + expected = Series([td, td, td], dtype="m8[ns]") + tm.assert_series_equal(res, expected) + + +def _check_where_equivalences(df, mask, other, expected): + # similar to tests.series.indexing.test_setitem.SetitemCastingEquivalences + # but with DataFrame in mind and less fleshed-out + res = df.where(mask, other) + tm.assert_frame_equal(res, expected) + + res = df.mask(~mask, other) + tm.assert_frame_equal(res, expected) + + # Note: frame.mask(~mask, other, inplace=True) takes some more work bc + # Block.putmask does *not* downcast. The change to 'expected' here + # is specific to the cases in test_where_dt64_2d. + df = df.copy() + df.mask(~mask, other, inplace=True) + if not mask.all(): + # with mask.all(), Block.putmask is a no-op, so does not downcast + expected = expected.copy() + expected["A"] = expected["A"].astype(object) + tm.assert_frame_equal(df, expected) + + +def test_where_dt64_2d(): + dti = date_range("2016-01-01", periods=6) + dta = dti._data.reshape(3, 2) + other = dta - dta[0, 0] + + df = DataFrame(dta, columns=["A", "B"]) + + mask = np.asarray(df.isna()) + mask[:, 1] = True + + # setting all of one column, none of the other + expected = DataFrame({"A": other[:, 0], "B": dta[:, 1]}) + _check_where_equivalences(df, mask, other, expected) + + # setting part of one column, none of the other + mask[1, 0] = True + expected = DataFrame( + { + "A": np.array([other[0, 0], dta[1, 0], other[2, 0]], dtype=object), + "B": dta[:, 1], + } + ) + _check_where_equivalences(df, mask, other, expected) + + # setting nothing in either column + mask[:] = True + expected = df + _check_where_equivalences(df, mask, other, expected) + + +def test_where_mask_deprecated(frame_or_series): + # GH 47728 + obj = DataFrame(np.random.randn(4, 3)) + obj = tm.get_obj(obj, frame_or_series) + + mask = obj > 0 + + with tm.assert_produces_warning(FutureWarning): + obj.where(mask, -1, errors="raise") + + with tm.assert_produces_warning(FutureWarning): + obj.mask(mask, -1, errors="raise") + + +def test_where_producing_ea_cond_for_np_dtype(): + # GH#44014 + df = DataFrame({"a": Series([1, pd.NA, 2], dtype="Int64"), "b": [1, 2, 3]}) + result = df.where(lambda x: x.apply(lambda y: y > 1, axis=1)) + expected = DataFrame( + {"a": Series([pd.NA, pd.NA, 2], dtype="Int64"), "b": [np.nan, 2, 3]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py new file mode 100644 index 00000000..5951c1dd --- /dev/null +++ b/pandas/tests/frame/indexing/test_xs.py @@ -0,0 +1,419 @@ +import re + +import numpy as np +import pytest + +from pandas.errors import SettingWithCopyError + +from pandas import ( + DataFrame, + Index, + IndexSlice, + MultiIndex, + Series, + concat, +) +import pandas._testing as tm + +from pandas.tseries.offsets import BDay + + +@pytest.fixture +def four_level_index_dataframe(): + arr = np.array( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], + ] + ) + index = MultiIndex( + levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], + codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], + names=["one", "two", "three", "four"], + ) + return DataFrame(arr, index=index, columns=list("ABCDE")) + + +class TestXS: + def test_xs(self, float_frame, datetime_frame): + idx = float_frame.index[5] + xs = float_frame.xs(idx) + for item, value in xs.items(): + if np.isnan(value): + assert np.isnan(float_frame[item][idx]) + else: + assert value == float_frame[item][idx] + + # mixed-type xs + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data) + xs = frame.xs("1") + assert xs.dtype == np.object_ + assert xs["A"] == 1 + assert xs["B"] == "1" + + with pytest.raises( + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + ): + datetime_frame.xs(datetime_frame.index[0] - BDay()) + + # xs get column + series = float_frame.xs("A", axis=1) + expected = float_frame["A"] + tm.assert_series_equal(series, expected) + + # view is returned if possible + series = float_frame.xs("A", axis=1) + series[:] = 5 + assert (expected == 5).all() + + def test_xs_corner(self): + # pathological mixed-type reordering case + df = DataFrame(index=[0]) + df["A"] = 1.0 + df["B"] = "foo" + df["C"] = 2.0 + df["D"] = "bar" + df["E"] = 3.0 + + xs = df.xs(0) + exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) + tm.assert_series_equal(xs, exp) + + # no columns but Index(dtype=object) + df = DataFrame(index=["a", "b", "c"]) + result = df.xs("a") + expected = Series([], name="a", index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_xs_duplicates(self): + df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) + + cross = df.xs("c") + exp = df.iloc[2] + tm.assert_series_equal(cross, exp) + + def test_xs_keep_level(self): + df = DataFrame( + { + "day": {0: "sat", 1: "sun"}, + "flavour": {0: "strawberry", 1: "strawberry"}, + "sales": {0: 10, 1: 12}, + "year": {0: 2008, 1: 2008}, + } + ).set_index(["year", "flavour", "day"]) + result = df.xs("sat", level="day", drop_level=False) + expected = df[:1] + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) + tm.assert_frame_equal(result, expected) + + def test_xs_view(self, using_array_manager, using_copy_on_write): + # in 0.14 this will return a view if possible a copy otherwise, but + # this is numpy dependent + + dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) + df_orig = dm.copy() + + if using_copy_on_write: + dm.xs(2)[:] = 20 + tm.assert_frame_equal(dm, df_orig) + elif using_array_manager: + # INFO(ArrayManager) with ArrayManager getting a row as a view is + # not possible + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + dm.xs(2)[:] = 20 + assert not (dm.xs(2) == 20).any() + else: + dm.xs(2)[:] = 20 + assert (dm.xs(2) == 20).all() + + +class TestXSWithMultiIndex: + def test_xs_doc_example(self): + # TODO: more descriptive name + # based on example in advanced.rst + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + result = df.xs(("one", "bar"), level=("second", "first"), axis=1) + + expected = df.iloc[:, [0]] + tm.assert_frame_equal(result, expected) + + def test_xs_integer_key(self): + # see GH#2107 + dates = range(20111201, 20111205) + ids = list("abcde") + index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) + df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) + + result = df.xs(20111201, level="date") + expected = df.loc[20111201, :] + tm.assert_frame_equal(result, expected) + + def test_xs_level(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + expected = df[df.index.get_level_values(1) == "two"] + expected.index = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_frame_equal(result, expected) + + def test_xs_level_eq_2(self): + arr = np.random.randn(3, 5) + index = MultiIndex( + levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], + codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], + ) + df = DataFrame(arr, index=index) + expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) + result = df.xs("c", level=2) + tm.assert_frame_equal(result, expected) + + def test_xs_setting_with_copy_error( + self, multiindex_dataframe_random_data, using_copy_on_write + ): + # this is a copy in 0.14 + df = multiindex_dataframe_random_data + df_orig = df.copy() + result = df.xs("two", level="second") + + if using_copy_on_write: + result[:] = 10 + else: + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + result[:] = 10 + tm.assert_frame_equal(df, df_orig) + + def test_xs_setting_with_copy_error_multiple( + self, four_level_index_dataframe, using_copy_on_write + ): + # this is a copy in 0.14 + df = four_level_index_dataframe + df_orig = df.copy() + result = df.xs(("a", 4), level=["one", "four"]) + + if using_copy_on_write: + result[:] = 10 + else: + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + result[:] = 10 + tm.assert_frame_equal(df, df_orig) + + @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) + def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data): + # see GH#13719 + frame = multiindex_dataframe_random_data + df = concat([frame] * 2) + assert df.index.is_unique is False + expected = concat([frame.xs("one", level="second")] * 2) + + if isinstance(key, list): + with tm.assert_produces_warning(FutureWarning): + result = df.xs(key, level=level) + else: + result = df.xs(key, level=level) + tm.assert_frame_equal(result, expected) + + def test_xs_missing_values_in_index(self): + # see GH#6574 + # missing values in returned index should be preserved + acc = [ + ("a", "abcde", 1), + ("b", "bbcde", 2), + ("y", "yzcde", 25), + ("z", "xbcde", 24), + ("z", None, 26), + ("z", "zbcde", 25), + ("z", "ybcde", 26), + ] + df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) + expected = DataFrame( + {"cnt": [24, 26, 25, 26]}, + index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), + ) + + result = df.xs("z", level="a1") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "key, level, exp_arr, exp_index", + [ + ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), + ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), + ], + ) + def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): + # see GH#2903 + arr = np.random.randn(4, 4) + index = MultiIndex( + levels=[["a", "b"], ["bar", "foo", "hello", "world"]], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + names=["lvl0", "lvl1"], + ) + df = DataFrame(arr, columns=index) + result = df.xs(key, level=level, axis=1) + expected = DataFrame(exp_arr(arr), columns=exp_index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", + [ + lambda df: df.xs(("a", 4), level=["one", "four"]), + lambda df: df.xs("a").xs(4, level="four"), + ], + ) + def test_xs_level_multiple(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] + expected_index = MultiIndex( + levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] + ) + def test_xs_level0(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + ] + expected_index = MultiIndex( + levels=[["b", "q"], [10.0032, 20.0], [4, 5]], + codes=[[0, 1], [0, 1], [1, 0]], + names=["two", "three", "four"], + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + def test_xs_values(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")).values + expected = df.values[4] + tm.assert_almost_equal(result, expected) + + def test_xs_loc_equality(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")) + expected = df.loc[("bar", "two")] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_IndexSlice_argument_not_implemented(self, klass): + # GH#35301 + + index = MultiIndex( + levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + obj = DataFrame(np.random.randn(6, 4), index=index) + if klass is Series: + obj = obj[0] + + expected = obj.iloc[-2:].droplevel(0) + + result = obj.xs(IndexSlice[("foo", "qux", 0), :]) + tm.assert_equal(result, expected) + + result = obj.loc[IndexSlice[("foo", "qux", 0), :]] + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_levels_raises(self, klass): + obj = DataFrame({"A": [1, 2, 3]}) + if klass is Series: + obj = obj["A"] + + msg = "Index must be a MultiIndex" + with pytest.raises(TypeError, match=msg): + obj.xs(0, level="as") + + def test_xs_multiindex_droplevel_false(self): + # GH#19056 + mi = MultiIndex.from_tuples( + [("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"] + ) + df = DataFrame([[1, 2, 3]], columns=mi) + result = df.xs("a", axis=1, drop_level=False) + expected = DataFrame( + [[1, 2]], + columns=MultiIndex.from_tuples( + [("a", "x"), ("a", "y")], names=["level1", "level2"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_xs_droplevel_false(self): + # GH#19056 + df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected) + + def test_xs_droplevel_false_view(self, using_array_manager, using_copy_on_write): + # GH#37832 + df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + # check that result still views the same data as df + assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) + + df.iloc[0, 0] = 2 + if using_copy_on_write: + # with copy on write the subset is never modified + expected = DataFrame({"a": [1]}) + else: + # modifying original df also modifies result when having a single block + expected = DataFrame({"a": [2]}) + tm.assert_frame_equal(result, expected) + + # with mixed dataframe, modifying the parent doesn't modify result + # TODO the "split" path behaves differently here as with single block + df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"])) + result = df.xs("a", axis=1, drop_level=False) + df.iloc[0, 0] = 2 + if using_copy_on_write: + # with copy on write the subset is never modified + expected = DataFrame({"a": [1]}) + elif using_array_manager: + # Here the behavior is consistent + expected = DataFrame({"a": [2]}) + else: + # FIXME: iloc does not update the array inplace using + # "split" path + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected) + + def test_xs_list_indexer_droplevel_false(self): + # GH#41760 + mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + with tm.assert_produces_warning(FutureWarning): + with pytest.raises(KeyError, match="y"): + df.xs(["x", "y"], drop_level=False, axis=1) diff --git a/pandas/tests/frame/methods/__init__.py b/pandas/tests/frame/methods/__init__.py new file mode 100644 index 00000000..245594bf --- /dev/null +++ b/pandas/tests/frame/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) DataFrame methods + +Ideally these files/tests should correspond 1-to-1 with tests.series.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/pandas/tests/frame/methods/test_add_prefix_suffix.py b/pandas/tests/frame/methods/test_add_prefix_suffix.py new file mode 100644 index 00000000..ea75e9ff --- /dev/null +++ b/pandas/tests/frame/methods/test_add_prefix_suffix.py @@ -0,0 +1,20 @@ +from pandas import Index +import pandas._testing as tm + + +def test_add_prefix_suffix(float_frame): + with_prefix = float_frame.add_prefix("foo#") + expected = Index([f"foo#{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_prefix.columns, expected) + + with_suffix = float_frame.add_suffix("#foo") + expected = Index([f"{c}#foo" for c in float_frame.columns]) + tm.assert_index_equal(with_suffix.columns, expected) + + with_pct_prefix = float_frame.add_prefix("%") + expected = Index([f"%{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = float_frame.add_suffix("%") + expected = Index([f"{c}%" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py new file mode 100644 index 00000000..575db40f --- /dev/null +++ b/pandas/tests/frame/methods/test_align.py @@ -0,0 +1,403 @@ +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameAlign: + def test_frame_align_aware(self): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert("US/Central") + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + def test_align_float(self, float_frame): + af, bf = float_frame.align(float_frame) + assert af._mgr is not float_frame._mgr + + af, bf = float_frame.align(float_frame, copy=False) + assert af._mgr is float_frame._mgr + + # axis = 0 + other = float_frame.iloc[:-5, :3] + af, bf = float_frame.align(other, axis=0, fill_value=-1) + + tm.assert_index_equal(bf.columns, other.columns) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="right", axis=0) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) + + # axis = 1 + other = float_frame.iloc[:-5, :3].copy() + af, bf = float_frame.align(other, axis=1) + tm.assert_index_equal(bf.columns, float_frame.columns) + tm.assert_index_equal(bf.index, other.index) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="inner", axis=1) + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None + ) + tm.assert_index_equal(bf.index, Index([])) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + # Try to align DataFrame to Series along bad axis + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + float_frame.align(af.iloc[0, :3], join="inner", axis=2) + + # align dataframe to series with broadcast or not + idx = float_frame.index + s = Series(range(len(idx)), index=idx) + + left, right = float_frame.align(s, axis=0) + tm.assert_index_equal(left.index, float_frame.index) + tm.assert_index_equal(right.index, float_frame.index) + assert isinstance(right, Series) + + left, right = float_frame.align(s, broadcast_axis=1) + tm.assert_index_equal(left.index, float_frame.index) + expected = {c: s for c in float_frame.columns} + expected = DataFrame( + expected, index=float_frame.index, columns=float_frame.columns + ) + tm.assert_frame_equal(right, expected) + + # see gh-9558 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df[df["a"] == 2] + expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.where(df["a"] == 2, 0) + expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]}) + tm.assert_frame_equal(result, expected) + + def test_align_int(self, int_frame): + # test other non-float types + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = int_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + def test_align_mixed_type(self, float_string_frame): + + af, bf = float_string_frame.align( + float_string_frame, join="inner", axis=1, method="pad" + ) + tm.assert_index_equal(bf.columns, float_string_frame.columns) + + def test_align_mixed_float(self, mixed_float_frame): + # mixed floats/ints + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def test_align_mixed_int(self, mixed_int_frame): + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_int_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + @pytest.mark.parametrize( + "l_ordered,r_ordered,expected", + [ + [True, True, pd.CategoricalIndex], + [True, False, Index], + [False, True, Index], + [False, False, pd.CategoricalIndex], + ], + ) + def test_align_categorical(self, l_ordered, r_ordered, expected): + # GH-28397 + df_1 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype( + pd.CategoricalDtype(list("cab"), ordered=l_ordered) + ), + } + ).set_index("B") + df_2 = DataFrame( + { + "A": np.arange(5, dtype="int64"), + "B": Series(list("babca")).astype( + pd.CategoricalDtype(list("cab"), ordered=r_ordered) + ), + } + ).set_index("B") + + aligned_1, aligned_2 = df_1.align(df_2) + assert isinstance(aligned_1.index, expected) + assert isinstance(aligned_2.index, expected) + tm.assert_index_equal(aligned_1.index, aligned_2.index) + + def test_align_multiindex(self): + # GH#10665 + # same test cases as test_align_multiindex in test_series.py + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = Index(range(2), name="b") + df1 = DataFrame(np.arange(12, dtype="int64"), index=midx) + df2 = DataFrame(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = df1.align(df2, join="left") + res2l, res2r = df2.align(df1, join="right") + + expl = df1 + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + res1l, res1r = df1.align(df2, join="right") + res2l, res2r = df2.align(df1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = DataFrame([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + def test_align_series_combinations(self): + df = DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = Series([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = DataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") + + tm.assert_frame_equal(res1, exp1) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + tm.assert_series_equal(res1, exp2) + tm.assert_frame_equal(res2, exp1) + + def test_multiindex_align_to_series_with_common_index_level(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series([1, 2], index=bar_index, name="foo_series") + df = DataFrame( + {"col": np.arange(6)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, 2] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_left(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series( + [1, 2, 3, 4], index=Index([1, 2, 3, 4], name="bar"), name="foo_series" + ) + df = DataFrame( + {"col": np.arange(6)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, 2] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_right(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2, 3, 4], name="bar") + + series = Series([1, 2], index=Index([1, 2], name="bar"), name="foo_series") + df = DataFrame( + {"col": np.arange(12)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series( + [1, 2, np.nan, np.nan] * 3, index=df.index, name="foo_series" + ) + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_both(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 3, 4], name="bar") + + series = Series( + [1, 2, 3], index=Index([1, 2, 4], name="bar"), name="foo_series" + ) + df = DataFrame( + {"col": np.arange(9)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, np.nan, 3] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_non_unique_cols(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series([1, 2], index=bar_index, name="foo_series") + df = DataFrame( + np.arange(18).reshape(6, 3), + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + df.columns = ["cfoo", "cbar", "cfoo"] + + expected = Series([1, 2] * 3, index=df.index, name="foo_series") + result_left, result_right = df.align(series, axis=0) + + tm.assert_series_equal(result_right, expected) + tm.assert_index_equal(result_left.columns, df.columns) + + def test_missing_axis_specification_exception(self): + df = DataFrame(np.arange(50).reshape((10, 5))) + series = Series(np.arange(5)) + + with pytest.raises(ValueError, match=r"axis=0 or 1"): + df.align(series) + + def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): + aa, ab = a.align( + b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis + ) + + join_index, join_columns = None, None + + ea, eb = a, b + if axis is None or axis == 0: + join_index = a.index.join(b.index, how=how) + ea = ea.reindex(index=join_index) + eb = eb.reindex(index=join_index) + + if axis is None or axis == 1: + join_columns = a.columns.join(b.columns, how=how) + ea = ea.reindex(columns=join_columns) + eb = eb.reindex(columns=join_columns) + + ea = ea.fillna(axis=fill_axis, method=method, limit=limit) + eb = eb.fillna(axis=fill_axis, method=method, limit=limit) + + tm.assert_frame_equal(aa, ea) + tm.assert_frame_equal(ab, eb) + + @pytest.mark.parametrize("meth", ["pad", "bfill"]) + @pytest.mark.parametrize("ax", [0, 1, None]) + @pytest.mark.parametrize("fax", [0, 1]) + @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) + def test_align_fill_method(self, how, meth, ax, fax, float_frame): + df = float_frame + self._check_align_fill(df, how, meth, ax, fax) + + def _check_align_fill(self, frame, kind, meth, ax, fax): + left = frame.iloc[0:4, :10] + right = frame.iloc[2:, 6:] + empty = frame.iloc[:0, :0] + + self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty left + self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty right + self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # both empty + self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py new file mode 100644 index 00000000..f07ffee2 --- /dev/null +++ b/pandas/tests/frame/methods/test_append.py @@ -0,0 +1,292 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestDataFrameAppend: + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") + def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): + obj = multiindex_dataframe_random_data + obj = tm.get_obj(obj, frame_or_series) + + a = obj[:5] + b = obj[5:] + + result = a.append(b) + tm.assert_equal(result, obj) + + def test_append_empty_list(self): + # GH 28769 + df = DataFrame() + result = df._append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df + + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + result = df._append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df # ._append() should return a new object + + def test_append_series_dict(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + series = df.loc[4] + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + df._append(series, verify_integrity=True) + + series.name = None + msg = "Can only append a Series if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df._append(series, verify_integrity=True) + + result = df._append(series[::-1], ignore_index=True) + expected = df._append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) + tm.assert_frame_equal(result, expected) + + # dict + result = df._append(series.to_dict(), ignore_index=True) + tm.assert_frame_equal(result, expected) + + result = df._append(series[::-1][:3], ignore_index=True) + expected = df._append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + msg = "Can only append a dict if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df._append(series.to_dict()) + + # can append when name set + row = df.loc[4] + row.name = 5 + result = df._append(row) + expected = df._append(df[-1:], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_of_series_dicts(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [x.to_dict() for idx, x in df.iterrows()] + + result = df._append(dicts, ignore_index=True) + expected = df._append(df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # different columns + dicts = [ + {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, + {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, + ] + result = df._append(dicts, ignore_index=True, sort=True) + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_retain_index_name(self): + df = DataFrame( + [[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname") + ) + + serc = Series([5, 6], name="c") + + expected = DataFrame( + [[1, 2], [3, 4], [5, 6]], + index=pd.Index(["a", "b", "c"], name="keepthisname"), + ) + + # append series + result = df._append(serc) + tm.assert_frame_equal(result, expected) + + # append list of series + result = df._append([serc]) + tm.assert_frame_equal(result, expected) + + def test_append_missing_cols(self): + # GH22252 + # exercise the conditional branch in append method where the data + # to be appended is a list and does not contain all columns that are in + # the target DataFrame + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [{"foo": 9}, {"bar": 10}] + result = df._append(dicts, ignore_index=True, sort=True) + + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_empty_dataframe(self): + + # Empty df append empty df + df1 = DataFrame() + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-empty df append empty df + df1 = DataFrame(np.random.randn(5, 2)) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Empty df with columns append empty df + df1 = DataFrame(columns=["bar", "foo"]) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-Empty df with columns append empty df + df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + def test_append_dtypes(self, using_array_manager): + + # GH 5754 + # row appends of different dtypes (so need to do by-item) + # can sometimes infer the correct type + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) + df2 = DataFrame() + result = df1._append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + if using_array_manager: + # TODO(ArrayManager) decide on exact casting rules in concat + # With ArrayManager, all-NaN float is not ignored + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + if using_array_manager: + # With ArrayManager, all-NaN float is not ignored + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": np.nan}, index=range(1)) + df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) + result = df1._append(df2) + expected = DataFrame( + {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} + ) + if using_array_manager: + # With ArrayManager, all-NaN float is not ignored + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) + result = df1._append(df2) + expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] + ) + def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): + # GH 30238 + tz = tz_naive_fixture + df = DataFrame([Timestamp(timestamp, tz=tz)]) + result = df._append(df.iloc[0]).iloc[-1] + expected = Series(Timestamp(timestamp, tz=tz), name=0) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, dtype", + [ + ([1], pd.Int64Dtype()), + ([1], pd.CategoricalDtype()), + ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), + ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), + ([1], pd.SparseDtype()), + ], + ) + def test_other_dtypes(self, data, dtype, using_array_manager): + df = DataFrame(data, dtype=dtype) + + warn = None + if using_array_manager and isinstance(dtype, pd.SparseDtype): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="astype from SparseDtype"): + result = df._append(df.iloc[0]).iloc[-1] + + expected = Series(data, name=0, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_append_numpy_bug_1681(self, dtype): + # another datetime64 bug + if dtype == "datetime64[ns]": + index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + else: + index = timedelta_range("1 days", "10 days", freq="2D") + + df = DataFrame() + other = DataFrame({"A": "foo", "B": index}, index=index) + + result = df._append(other) + assert (result["B"] == index).all() + + @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") + def test_multiindex_column_append_multiple(self): + # GH 29699 + df = DataFrame( + [[1, 11], [2, 12], [3, 13]], + columns=pd.MultiIndex.from_tuples( + [("multi", "col1"), ("multi", "col2")], names=["level1", None] + ), + ) + df2 = df.copy() + for i in range(1, 10): + df[i, "colA"] = 10 + df = df._append(df2, ignore_index=True) + result = df["multi"] + expected = DataFrame( + {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} + ) + tm.assert_frame_equal(result, expected) + + def test_append_raises_future_warning(self): + # GH#35407 + df1 = DataFrame([[1, 2], [3, 4]]) + df2 = DataFrame([[5, 6], [7, 8]]) + with tm.assert_produces_warning(FutureWarning): + df1.append(df2) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py new file mode 100644 index 00000000..07eacb5e --- /dev/null +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -0,0 +1,198 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestAsFreq: + def test_asfreq2(self, frame_or_series): + ts = frame_or_series( + [0.0, 1.0, 2.0], + index=DatetimeIndex( + [ + datetime(2009, 10, 30), + datetime(2009, 11, 30), + datetime(2009, 12, 31), + ], + freq="BM", + ), + ) + + daily_ts = ts.asfreq("B") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_equal(monthly_ts, ts) + + daily_ts = ts.asfreq("B", method="pad") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_equal(monthly_ts, ts) + + daily_ts = ts.asfreq(offsets.BDay()) + monthly_ts = daily_ts.asfreq(offsets.BMonthEnd()) + tm.assert_equal(monthly_ts, ts) + + result = ts[:0].asfreq("M") + assert len(result) == 0 + assert result is not ts + + if frame_or_series is Series: + daily_ts = ts.asfreq("D", fill_value=-1) + result = daily_ts.value_counts().sort_index() + expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty(self, frame_or_series): + # GH#14320 + index = DatetimeIndex(["2016-09-29 11:00"]) + expected = frame_or_series(index=index, dtype=object).asfreq("H") + result = frame_or_series([3], index=index.copy()).asfreq("H") + tm.assert_index_equal(expected.index, result.index) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_aware_asfreq_smoke(self, tz, frame_or_series): + dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) + + obj = frame_or_series(np.random.randn(len(dr)), index=dr) + + # it works! + obj.asfreq("T") + + def test_asfreq_normalize(self, frame_or_series): + rng = date_range("1/1/2000 09:30", periods=20) + norm = date_range("1/1/2000", periods=20) + + vals = np.random.randn(20, 3) + + obj = DataFrame(vals, index=rng) + expected = DataFrame(vals, index=norm) + if frame_or_series is Series: + obj = obj[0] + expected = expected[0] + + result = obj.asfreq("D", normalize=True) + tm.assert_equal(result, expected) + + def test_asfreq_keep_index_name(self, frame_or_series): + # GH#9854 + index_name = "bar" + index = date_range("20130101", periods=20, name=index_name) + obj = DataFrame(list(range(20)), columns=["foo"], index=index) + obj = tm.get_obj(obj, frame_or_series) + + assert index_name == obj.index.name + assert index_name == obj.asfreq("10D").index.name + + def test_asfreq_ts(self, frame_or_series): + index = period_range(freq="A", start="1/1/2001", end="12/31/2010") + obj = DataFrame(np.random.randn(len(index), 3), index=index) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.asfreq("D", how="end") + exp_index = index.asfreq("D", how="end") + assert len(result) == len(obj) + tm.assert_index_equal(result.index, exp_index) + + result = obj.asfreq("D", how="start") + exp_index = index.asfreq("D", how="start") + assert len(result) == len(obj) + tm.assert_index_equal(result.index, exp_index) + + def test_asfreq_resample_set_correct_freq(self, frame_or_series): + # GH#5613 + # we test if .asfreq() and .resample() set the correct value for .freq + dti = to_datetime(["2012-01-01", "2012-01-02", "2012-01-03"]) + obj = DataFrame({"col": [1, 2, 3]}, index=dti) + obj = tm.get_obj(obj, frame_or_series) + + # testing the settings before calling .asfreq() and .resample() + assert obj.index.freq is None + assert obj.index.inferred_freq == "D" + + # does .asfreq() set .freq correctly? + assert obj.asfreq("D").index.freq == "D" + + # does .resample() set .freq correctly? + assert obj.resample("D").asfreq().index.freq == "D" + + def test_asfreq_empty(self, datetime_frame): + # test does not blow up on length-0 DataFrame + zero_length = datetime_frame.reindex([]) + result = zero_length.asfreq("BM") + assert result is not zero_length + + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") + + tm.assert_frame_equal(offset_monthly, rule_monthly) + + filled = rule_monthly.asfreq("B", method="pad") # noqa + # TODO: actually check that this worked. + + # don't forget! + filled_dep = rule_monthly.asfreq("B", method="pad") # noqa + + def test_asfreq_datetimeindex(self): + df = DataFrame( + {"A": [1, 2, 3]}, + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], + ) + df = df.asfreq("B") + assert isinstance(df.index, DatetimeIndex) + + ts = df["A"].asfreq("B") + assert isinstance(ts.index, DatetimeIndex) + + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = date_range("1/1/2016", periods=10, freq="2S") + ts = Series(np.arange(len(rng)), index=rng) + df = DataFrame({"one": ts}) + + # insert pre-existing missing value + df.loc["2016-01-01 00:00:08", "one"] = None + + actual_df = df.asfreq(freq="1S", fill_value=9.0) + expected_df = df.asfreq(freq="1S").fillna(9.0) + expected_df.loc["2016-01-01 00:00:08", "one"] = None + tm.assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq="1S").fillna(9.0) + actual_series = ts.asfreq(freq="1S", fill_value=9.0) + tm.assert_series_equal(expected_series, actual_series) + + def test_asfreq_with_date_object_index(self, frame_or_series): + rng = date_range("1/1/2000", periods=20) + ts = frame_or_series(np.random.randn(20), index=rng) + + ts2 = ts.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts2.asfreq("4H", method="ffill") + expected = ts.asfreq("4H", method="ffill") + tm.assert_equal(result, expected) + + def test_asfreq_with_unsorted_index(self, frame_or_series): + # GH#39805 + # Test that rows are not dropped when the datetime index is out of order + index = to_datetime(["2021-01-04", "2021-01-02", "2021-01-03", "2021-01-01"]) + result = frame_or_series(range(4), index=index) + + expected = result.reindex(sorted(index)) + expected.index = expected.index._with_freq("infer") + + result = result.asfreq("D") + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py new file mode 100644 index 00000000..0b27fe59 --- /dev/null +++ b/pandas/tests/frame/methods/test_asof.py @@ -0,0 +1,195 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + DataFrame, + Period, + Series, + Timestamp, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +@pytest.fixture +def date_range_frame(): + """ + Fixture for DataFrame of ints with date_range index + + Columns are ['A', 'B']. + """ + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng) + + +class TestFrameAsof: + def test_basic(self, date_range_frame): + df = date_range_frame + N = 50 + df.loc[df.index[15:30], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + assert result.notna().all(1).all() + lb = df.index[14] + ub = df.index[30] + + dates = list(dates) + + result = df.asof(dates) + assert result.notna().all(1).all() + + mask = (result.index >= lb) & (result.index < ub) + rs = result[mask] + assert (rs == 14).all(1).all() + + def test_subset(self, date_range_frame): + N = 10 + df = date_range_frame.iloc[:N].copy() + df.loc[df.index[4:8], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + # with a subset of A should be the same + result = df.asof(dates, subset="A") + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # same with A/B + result = df.asof(dates, subset=["A", "B"]) + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # B gives df.asof + result = df.asof(dates, subset="B") + expected = df.resample("25s", closed="right").ffill().reindex(dates) + expected.iloc[20:] = 9 + # no "missing", so "B" can retain int dtype (df["A"].dtype platform-dependent) + expected["B"] = expected["B"].astype(df["B"].dtype) + + tm.assert_frame_equal(result, expected) + + def test_missing(self, date_range_frame): + # GH 15118 + # no match found - `where` value before earliest date in index + N = 10 + df = date_range_frame.iloc[:N].copy() + + result = df.asof("1989-12-31") + + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) + + result = df.asof(to_datetime(["1989-12-31"])) + expected = DataFrame( + index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" + ) + tm.assert_frame_equal(result, expected) + + # Check that we handle PeriodIndex correctly, dont end up with + # period.ordinal for series name + df = df.to_period("D") + result = df.asof("1989-12-31") + assert isinstance(result.name, Period) + + def test_asof_all_nans(self, frame_or_series): + # GH 15713 + # DataFrame/Series is all nans + result = frame_or_series([np.nan]).asof([0]) + expected = frame_or_series([np.nan]) + tm.assert_equal(result, expected) + + def test_all_nans(self, date_range_frame): + # GH 15713 + # DataFrame is all nans + + # testing non-default indexes, multiple inputs + N = 150 + rng = date_range_frame.index + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A"]) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3) + expected = Series(np.nan, index=["A", "B"], name=3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "stamp,expected", + [ + ( + Timestamp("2018-01-01 23:22:43.325+00:00"), + Series(2, name=Timestamp("2018-01-01 23:22:43.325+00:00")), + ), + ( + Timestamp("2018-01-01 22:33:20.682+01:00"), + Series(1, name=Timestamp("2018-01-01 22:33:20.682+01:00")), + ), + ], + ) + def test_time_zone_aware_index(self, stamp, expected): + # GH21194 + # Testing awareness of DataFrame index considering different + # UTC and timezone + df = DataFrame( + data=[1, 2], + index=[ + Timestamp("2018-01-01 21:00:05.001+00:00"), + Timestamp("2018-01-01 22:35:10.550+00:00"), + ], + ) + + result = df.asof(stamp) + tm.assert_series_equal(result, expected) + + def test_is_copy(self, date_range_frame): + # GH-27357, GH-30784: ensure the result of asof is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = date_range_frame + N = 50 + df.loc[df.index[15:30], "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + + with tm.assert_produces_warning(None): + result["C"] = 1 + + def test_asof_periodindex_mismatched_freq(self): + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + df = DataFrame(np.random.randn(N), index=rng) + + # Mismatched freq + msg = "Input has different freq" + with pytest.raises(IncompatibleFrequency, match=msg): + df.asof(rng.asfreq("D")) + + def test_asof_preserves_bool_dtype(self): + # GH#16063 was casting bools to floats + dti = date_range("2017-01-01", freq="MS", periods=4) + ser = Series([True, False, True], index=dti[:-1]) + + ts = dti[-1] + res = ser.asof([ts]) + + expected = Series([True], index=[ts]) + tm.assert_series_equal(res, expected) diff --git a/pandas/tests/frame/methods/test_assign.py b/pandas/tests/frame/methods/test_assign.py new file mode 100644 index 00000000..0ae501d4 --- /dev/null +++ b/pandas/tests/frame/methods/test_assign.py @@ -0,0 +1,84 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestAssign: + def test_assign(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected["C"] = [4, 2.5, 2] + tm.assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + tm.assert_frame_equal(result, expected) + + # original is unmodified + tm.assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + tm.assert_frame_equal(result, expected) + # original is unmodified + tm.assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) + tm.assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected["A"] = [5, 7, 9] + tm.assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + tm.assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame( + [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") + ) + tm.assert_frame_equal(result, expected) + + def test_assign_order(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) + tm.assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) + + tm.assert_frame_equal(result, expected) + + def test_assign_bad(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + # non-keyword argument + msg = r"assign\(\) takes 1 positional argument but 2 were given" + with pytest.raises(TypeError, match=msg): + df.assign(lambda x: x.A) + msg = "'DataFrame' object has no attribute 'C'" + with pytest.raises(AttributeError, match=msg): + df.assign(C=df.A, D=df.A + df.C) + + def test_assign_dependent(self): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py new file mode 100644 index 00000000..6d343de9 --- /dev/null +++ b/pandas/tests/frame/methods/test_astype.py @@ -0,0 +1,783 @@ +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalDtype, + DataFrame, + DatetimeTZDtype, + Interval, + IntervalDtype, + NaT, + Series, + Timedelta, + Timestamp, + concat, + date_range, + option_context, +) +import pandas._testing as tm +from pandas.core.api import UInt64Index + + +def _check_cast(df, v): + """ + Check if all dtypes of df are equal to v + """ + assert all(s.dtype.name == v for _, s in df.items()) + + +class TestAstype: + def test_astype_float(self, float_frame): + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + casted = float_frame.astype(np.int32) + expected = DataFrame( + float_frame.values.astype(np.int32), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + float_frame["foo"] = "5" + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + def test_astype_mixed_float(self, mixed_float_frame): + # mixed casting + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float32") + _check_cast(casted, "float32") + + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float16") + _check_cast(casted, "float16") + + def test_astype_mixed_type(self, mixed_type_frame): + # mixed casting + mn = mixed_type_frame._get_numeric_data().copy() + mn["little_float"] = np.array(12345.0, dtype="float16") + mn["big_float"] = np.array(123456789101112.0, dtype="float64") + + casted = mn.astype("float64") + _check_cast(casted, "float64") + + casted = mn.astype("int64") + _check_cast(casted, "int64") + + casted = mn.reindex(columns=["little_float"]).astype("float16") + _check_cast(casted, "float16") + + casted = mn.astype("float32") + _check_cast(casted, "float32") + + casted = mn.astype("int32") + _check_cast(casted, "int32") + + # to object + casted = mn.astype("O") + _check_cast(casted, "object") + + def test_astype_with_exclude_string(self, float_frame): + df = float_frame.copy() + expected = float_frame.astype(int) + df["string"] = "foo" + casted = df.astype(int, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + df = float_frame.copy() + expected = float_frame.astype(np.int32) + df["string"] = "foo" + casted = df.astype(np.int32, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + def test_astype_with_view_float(self, float_frame): + + # this is the only real reason to do it this way + tf = np.round(float_frame).astype(np.int32) + casted = tf.astype(np.float32, copy=False) + + # TODO(wesm): verification? + tf = float_frame.astype(np.float64) + casted = tf.astype(np.int64, copy=False) # noqa + + def test_astype_with_view_mixed_float(self, mixed_float_frame): + + tf = mixed_float_frame.reindex(columns=["A", "B", "C"]) + + casted = tf.astype(np.int64) + casted = tf.astype(np.float32) # noqa + + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("val", [np.nan, np.inf]) + def test_astype_cast_nan_inf_int(self, val, dtype): + # see GH#14265 + # + # Check NaN and inf --> raise error when converting to int. + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + df = DataFrame([val]) + + with pytest.raises(ValueError, match=msg): + df.astype(dtype) + + def test_astype_str(self): + # see GH#9757 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) + c = Series([Timedelta(x, unit="d") for x in range(5)]) + d = Series(range(5)) + e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) + + # Datetime-like + result = df.astype(str) + + expected = DataFrame( + { + "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))), + "b": list(map(str, map(Timestamp, b._values))), + "c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)), + "d": list(map(str, d._values)), + "e": list(map(str, e._values)), + } + ) + + tm.assert_frame_equal(result, expected) + + def test_astype_str_float(self): + # see GH#11302 + result = DataFrame([np.NaN]).astype(str) + expected = DataFrame(["nan"]) + + tm.assert_frame_equal(result, expected) + result = DataFrame([1.12345678901234567890]).astype(str) + + val = "1.1234567890123457" + expected = DataFrame([val]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # GH7271 & GH16717 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(range(5)) + c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + d = Series(["1.0", "2", "3.14", "4", "5.4"]) + df = DataFrame({"a": a, "b": b, "c": c, "d": d}) + original = df.copy(deep=True) + + # change type of a subset of columns + dt1 = dtype_class({"b": "str", "d": "float32"}) + result = df.astype(dt1) + expected = DataFrame( + { + "a": a, + "b": Series(["0", "1", "2", "3", "4"]), + "c": c, + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + dt2 = dtype_class({"b": np.float32, "c": "float32", "d": np.float64}) + result = df.astype(dt2) + expected = DataFrame( + { + "a": a, + "b": Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float32"), + "c": Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype="float32"), + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + # change all columns + dt3 = dtype_class({"a": str, "b": str, "c": str, "d": str}) + tm.assert_frame_equal(df.astype(dt3), df.astype(str)) + tm.assert_frame_equal(df, original) + + # error should be raised when using something other than column labels + # in the keys of the dtype dict + dt4 = dtype_class({"b": str, 2: str}) + dt5 = dtype_class({"e": str}) + msg_frame = ( + "Only a column name can be used for the key in a dtype mappings argument. " + "'{}' not found in columns." + ) + with pytest.raises(KeyError, match=msg_frame.format(2)): + df.astype(dt4) + with pytest.raises(KeyError, match=msg_frame.format("e")): + df.astype(dt5) + tm.assert_frame_equal(df, original) + + # if the dtypes provided are the same as the original dtypes, the + # resulting DataFrame should be the same as the original DataFrame + dt6 = dtype_class({col: df[col].dtype for col in df.columns}) + equiv = df.astype(dt6) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + # GH#16717 + # if dtypes provided is empty, the resulting DataFrame + # should be the same as the original DataFrame + dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object) + equiv = df.astype(dt7) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + def test_astype_duplicate_col(self): + a1 = Series([1, 2, 3, 4, 5], name="a") + b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name="b") + a2 = Series([0, 1, 2, 3, 4], name="a") + df = concat([a1, b, a2], axis=1) + + result = df.astype(str) + a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a") + b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b") + a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a") + expected = concat([a1_str, b_str, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + result = df.astype({"a": "str"}) + expected = concat([a1_str, b, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + def test_astype_duplicate_col_series_arg(self): + # GH#44417 + vals = np.random.randn(3, 4) + df = DataFrame(vals, columns=["A", "B", "C", "A"]) + dtypes = df.dtypes + dtypes.iloc[0] = str + dtypes.iloc[2] = "Float64" + + result = df.astype(dtypes) + expected = DataFrame( + { + 0: vals[:, 0].astype(str), + 1: vals[:, 1], + 2: pd.array(vals[:, 2], dtype="Float64"), + 3: vals[:, 3], + } + ) + expected.columns = df.columns + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "category", + CategoricalDtype(), + CategoricalDtype(ordered=True), + CategoricalDtype(ordered=False), + CategoricalDtype(categories=list("abcdef")), + CategoricalDtype(categories=list("edba"), ordered=False), + CategoricalDtype(categories=list("edcb"), ordered=True), + ], + ids=repr, + ) + def test_astype_categorical(self, dtype): + # GH#18099 + d = {"A": list("abbc"), "B": list("bccd"), "C": list("cdde")} + df = DataFrame(d) + result = df.astype(dtype) + expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype]) + def test_astype_categoricaldtype_class_raises(self, cls): + df = DataFrame({"A": ["a", "a", "b", "c"]}) + xpr = f"Expected an instance of {cls.__name__}" + with pytest.raises(TypeError, match=xpr): + df.astype({"A": cls}) + + with pytest.raises(TypeError, match=xpr): + df["A"].astype(cls) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes(self, dtype): + # GH#22578 + df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + + expected1 = DataFrame( + { + "a": pd.array([1, 3, 5], dtype=dtype), + "b": pd.array([2, 4, 6], dtype=dtype), + } + ) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + tm.assert_frame_equal(df.astype(dtype).astype("float64"), df) + + df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + df["b"] = df["b"].astype(dtype) + expected2 = DataFrame( + {"a": [1.0, 3.0, 5.0], "b": pd.array([2, 4, 6], dtype=dtype)} + ) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes_1d(self, dtype): + # GH#22578 + df = DataFrame({"a": [1.0, 2.0, 3.0]}) + + expected1 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + df = DataFrame({"a": [1.0, 2.0, 3.0]}) + df["a"] = df["a"].astype(dtype) + expected2 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["category", "Int64"]) + def test_astype_extension_dtypes_duplicate_col(self, dtype): + # GH#24704 + a1 = Series([0, np.nan, 4], name="a") + a2 = Series([np.nan, 3, 5], name="a") + df = concat([a1, a2], axis=1) + + result = df.astype(dtype) + expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] + ) + def test_astype_column_metadata(self, dtype): + # GH#19920 + columns = UInt64Index([100, 200, 300], name="foo") + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_from_datetimelike_to_object(self, dtype, unit): + # tests astype to object dtype + # GH#19223 / GH#12425 + dtype = f"{dtype}[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(object) + assert (result.dtypes == object).all() + + if dtype.startswith("M8"): + assert result.iloc[0, 0] == Timestamp(1, unit=unit) + else: + assert result.iloc[0, 0] == Timedelta(1, unit=unit) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units from numeric origination + # GH#19223 / GH#12425 + dtype = f"{dtype}[{unit}]" + arr = np.array([[1, 2, 3]], dtype=arr_dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetime_unit(self, unit): + # tests all units from datetime origination + # GH#19223 + dtype = f"M8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns"]) + def test_astype_to_timedelta_unit_ns(self, unit): + # preserver the timedelta conversion + # GH#19223 + dtype = f"m8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"]) + def test_astype_to_timedelta_unit(self, unit): + # coerce to float + # GH#19223 + dtype = f"m8[{unit}]" + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(df.values.astype(dtype).astype(float)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_incorrect_datetimelike(self, unit): + # trying to astype a m to a M, or vice-versa + # GH#19224 + dtype = f"M8[{unit}]" + other = f"m8[{unit}]" + + df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) + msg = "|".join( + [ + # BlockManager path + rf"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", + # ArrayManager path + "cannot astype a datetimelike from " + rf"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", + ] + ) + with pytest.raises(TypeError, match=msg): + df.astype(other) + + msg = "|".join( + [ + # BlockManager path + rf"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", + # ArrayManager path + "cannot astype a timedelta from " + rf"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", + ] + ) + df = DataFrame(np.array([[1, 2, 3]], dtype=other)) + with pytest.raises(TypeError, match=msg): + df.astype(dtype) + + def test_astype_arg_for_errors(self): + # GH#14878 + + df = DataFrame([1, 2, 3]) + + msg = ( + "Expected value of kwarg 'errors' to be one of " + "['raise', 'ignore']. Supplied value is 'True'" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + df.astype(np.float64, errors=True) + + df.astype(np.int8, errors="ignore") + + def test_astype_arg_for_errors_dictlist(self): + # GH#25905 + df = DataFrame( + [ + {"a": "1", "b": "16.5%", "c": "test"}, + {"a": "2.2", "b": "15.3", "c": "another_test"}, + ] + ) + expected = DataFrame( + [ + {"a": 1.0, "b": "16.5%", "c": "test"}, + {"a": 2.2, "b": "15.3", "c": "another_test"}, + ] + ) + type_dict = {"a": "float64", "b": "float64", "c": "object"} + + result = df.astype(dtype=type_dict, errors="ignore") + + tm.assert_frame_equal(result, expected) + + def test_astype_dt64tz(self, timezone_frame): + # astype + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + expected = DataFrame( + expected, + index=timezone_frame.index, + columns=timezone_frame.columns, + dtype=object, + ) + result = timezone_frame.astype(object) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # dt64tz->dt64 deprecated + result = timezone_frame.astype("datetime64[ns]") + expected = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": ( + date_range("20130101", periods=3, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ), + "C": ( + date_range("20130101", periods=3, tz="CET") + .tz_convert("UTC") + .tz_localize(None) + ), + } + ) + expected.iloc[1, 1] = NaT + expected.iloc[1, 2] = NaT + tm.assert_frame_equal(result, expected) + + def test_astype_dt64tz_to_str(self, timezone_frame): + # str formatting + result = timezone_frame.astype(str) + expected = DataFrame( + [ + [ + "2013-01-01", + "2013-01-01 00:00:00-05:00", + "2013-01-01 00:00:00+01:00", + ], + ["2013-01-02", "NaT", "NaT"], + [ + "2013-01-03", + "2013-01-03 00:00:00-05:00", + "2013-01-03 00:00:00+01:00", + ], + ], + columns=timezone_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + with option_context("display.max_columns", 20): + result = str(timezone_frame) + assert ( + "0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00" + ) in result + assert ( + "1 2013-01-02 NaT NaT" + ) in result + assert ( + "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" + ) in result + + def test_astype_empty_dtype_dict(self): + # issue mentioned further down in the following issue's thread + # https://github.com/pandas-dev/pandas/issues/33113 + df = DataFrame() + result = df.astype({}) + tm.assert_frame_equal(result, df) + assert result is not df + + @pytest.mark.parametrize( + "data, dtype", + [ + (["x", "y", "z"], "string[python]"), + pytest.param( + ["x", "y", "z"], + "string[pyarrow]", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), + ], + ) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): + # https://github.com/pandas-dev/pandas/issues/35471 + df = DataFrame(Series(data, dtype=dtype)) + if errors == "ignore": + expected = df + result = df.astype(float, errors=errors) + tm.assert_frame_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + df.astype(float, errors=errors) + + def test_astype_tz_conversion(self): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + df = DataFrame(val) + result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + + expected = df + expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) + def test_astype_tz_object_conversion(self, tz): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + expected = DataFrame(val) + + # convert expected to object dtype from other tz str (independently tested) + result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) + result = result.astype({"tz": "object"}) + + # do real test: object dtype to a specified tz, different from construction tz. + result = result.astype({"tz": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(result, expected) + + def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture): + # GH#41409 + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + dta = dti._data + dta[0] = NaT + + obj = frame_or_series(dta) + result = obj.astype("string") + + # Check that Series/DataFrame.astype matches DatetimeArray.astype + expected = frame_or_series(dta.astype("string")) + tm.assert_equal(result, expected) + + item = result.iloc[0] + if frame_or_series is DataFrame: + item = item.iloc[0] + assert item is pd.NA + + # For non-NA values, we should match what we get for non-EA str + alt = obj.astype(str) + assert np.all(alt.iloc[1:] == result.iloc[1:]) + + def test_astype_td64_to_string(self, frame_or_series): + # GH#41409 + tdi = pd.timedelta_range("1 Day", periods=3) + obj = frame_or_series(tdi) + + expected = frame_or_series(["1 days", "2 days", "3 days"], dtype="string") + result = obj.astype("string") + tm.assert_equal(result, expected) + + def test_astype_bytes(self): + # GH#39474 + result = DataFrame(["foo", "bar", "baz"]).astype(bytes) + assert result.dtypes[0] == np.dtype("S3") + + @pytest.mark.parametrize( + "index_slice", + [ + np.s_[:2, :2], + np.s_[:1, :2], + np.s_[:2, :1], + np.s_[::2, ::2], + np.s_[::1, ::2], + np.s_[::2, ::1], + ], + ) + def test_astype_noncontiguous(self, index_slice): + # GH#42396 + data = np.arange(16).reshape(4, 4) + df = DataFrame(data) + + result = df.iloc[index_slice].astype("int16") + expected = df.iloc[index_slice] + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_astype_retain_attrs(self, any_numpy_dtype): + # GH#44414 + df = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) + df.attrs["Location"] = "Michigan" + + result = df.astype({"a": any_numpy_dtype}).attrs + expected = df.attrs + + tm.assert_dict_equal(expected, result) + + +class TestAstypeCategorical: + def test_astype_from_categorical3(self): + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_astype_from_categorical4(self): + df = DataFrame( + {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} + ) + cats = Categorical(["a", "b", "b", "a", "a", "d"]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_categorical_astype_to_int(self, any_int_dtype): + # GH#39402 + + df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) + df.col1 = df.col1.astype("category") + df.col1 = df.col1.astype(any_int_dtype) + expected = DataFrame({"col1": pd.array([2, 1, 3], dtype=any_int_dtype)}) + tm.assert_frame_equal(df, expected) + + def test_astype_categorical_to_string_missing(self): + # https://github.com/pandas-dev/pandas/issues/41797 + df = DataFrame(["a", "b", np.nan]) + expected = df.astype(str) + cat = df.astype("category") + result = cat.astype(str) + tm.assert_frame_equal(result, expected) + + +class IntegerArrayNoCopy(pd.core.arrays.IntegerArray): + # GH 42501 + + def copy(self): + assert False + + +class Int16DtypeNoCopy(pd.Int16Dtype): + # GH 42501 + + @classmethod + def construct_array_type(cls): + return IntegerArrayNoCopy + + +def test_frame_astype_no_copy(): + # GH 42501 + df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object) + result = df.astype({"a": Int16DtypeNoCopy()}, copy=False) + + assert result.a.dtype == pd.Int16Dtype() + assert np.shares_memory(df.b.values, result.b.values) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py new file mode 100644 index 00000000..8537c32c --- /dev/null +++ b/pandas/tests/frame/methods/test_at_time.py @@ -0,0 +1,124 @@ +from datetime import time + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import timezones + +from pandas import ( + DataFrame, + date_range, +) +import pandas._testing as tm + + +class TestAtTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = frame_or_series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_at_time(self, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_equal(result, expected) + + def test_at_time_midnight(self, frame_or_series): + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + result = ts.at_time(time(0, 0)) + tm.assert_equal(result, ts) + + def test_at_time_nonexistent(self, frame_or_series): + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = DataFrame(np.random.randn(len(rng)), rng) + ts = tm.get_obj(ts, frame_or_series) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + @pytest.mark.parametrize( + "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] + ) + def test_at_time_errors(self, hour): + # GH#24043 + dti = date_range("2018", periods=3, freq="H") + df = DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, "tzinfo", None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH#24043 + dti = date_range("2018", periods=3, freq="H", tz="US/Pacific") + df = DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + + def test_at_time_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.at_time("00:00") + + @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ["index", 0]: + expected = ts.loc[indices, :] + elif axis in ["columns", 1]: + expected = ts.loc[:, indices] + + result = ts.at_time("9:30", axis=axis) + + # Without clearing freq, result has freq 1440T and expected 5T + result.index = result.index._with_freq(None) + expected.index = expected.index._with_freq(None) + tm.assert_frame_equal(result, expected) + + def test_at_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + ainds = [24, 72, 120, 168] + + result = df.at_time(akey) + expected = df.loc[akey] + expected2 = df.iloc[ainds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 4 diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py new file mode 100644 index 00000000..eb5cfbc2 --- /dev/null +++ b/pandas/tests/frame/methods/test_between_time.py @@ -0,0 +1,289 @@ +from datetime import ( + datetime, + time, +) + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestBetweenTime: + @td.skip_if_not_us_locale + def test_between_time_formats(self, frame_or_series): + # GH#11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_between_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + ts_local = ts.tz_localize(tzstr) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_between_time_types(self, frame_or_series): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + obj = DataFrame({"A": 0}, index=rng) + obj = tm.get_obj(obj, frame_or_series) + + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + stime = time(0, 0) + etime = time(1, 0) + inclusive = inclusive_endpoints_fixture + + filtered = ts.between_time(stime, etime, inclusive=inclusive) + exp_len = 13 * 4 + 1 + + if inclusive in ["right", "neither"]: + exp_len -= 5 + if inclusive in ["left", "neither"]: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inclusive in ["left", "both"]: + assert t >= stime + else: + assert t > stime + + if inclusive in ["right", "both"]: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + stime = time(22, 0) + etime = time(9, 0) + + filtered = ts.between_time(stime, etime, inclusive=inclusive) + exp_len = (12 * 11 + 1) * 4 + 1 + if inclusive in ["right", "neither"]: + exp_len -= 4 + if inclusive in ["left", "neither"]: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inclusive in ["left", "both"]: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inclusive in ["right", "both"]: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, frame_or_series): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = f"No axis named {ts.ndim} for object type {type(ts).__name__}" + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=ts.ndim) + + def test_between_time_axis_aliases(self, axis): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ("08:00:00", "09:00:00") + exp_len = 7 + + if axis in ["index", 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ["columns", 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ("08:00:00", "09:00:00") + + msg = "Index must be DatetimeIndex" + if axis in ["columns", 1]: + ts.index = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime) + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=0) + + if axis in ["index", 0]: + ts.columns = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=1) + + def test_between_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.between_time(bkey.start, bkey.stop) + expected = df.loc[bkey] + expected2 = df.iloc[binds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 12 + + @pytest.mark.parametrize("include_start", [True, False]) + @pytest.mark.parametrize("include_end", [True, False]) + def test_between_time_warn(self, include_start, include_end, frame_or_series): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = tm.get_obj(ts, frame_or_series) + + stime = time(0, 0) + etime = time(1, 0) + + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + with tm.assert_produces_warning(FutureWarning, match=match): + _ = ts.between_time(stime, etime, include_start, include_end) + + def test_between_time_incorr_arg_inclusive(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + inclusive = "bad_string" + msg = "Inclusive has to be either 'both', 'neither', 'left' or 'right'" + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, inclusive=inclusive) + + @pytest.mark.parametrize( + "include_start, include_end", [(True, None), (True, True), (None, True)] + ) + def test_between_time_incompatiable_args_given(self, include_start, include_end): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + msg = ( + "Deprecated arguments `include_start` and `include_end` cannot be " + "passed if `inclusive` has been given." + ) + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, include_start, include_end, inclusive="left") + + def test_between_time_same_functionality_old_and_new_args(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + + result = ts.between_time(stime, etime) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=False) + expected = ts.between_time(stime, etime, inclusive="right") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_end=False) + expected = ts.between_time(stime, etime, inclusive="left") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time( + stime, etime, include_start=False, include_end=False + ) + expected = ts.between_time(stime, etime, inclusive="neither") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=True, include_end=True) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py new file mode 100644 index 00000000..c851e65a --- /dev/null +++ b/pandas/tests/frame/methods/test_clip.py @@ -0,0 +1,178 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameClip: + def test_clip(self, float_frame): + median = float_frame.median().median() + original = float_frame.copy() + + double = float_frame.clip(upper=median, lower=median) + assert not (double.values != median).any() + + # Verify that float_frame was not changed inplace + assert (float_frame.values == original.values).all() + + def test_inplace_clip(self, float_frame): + # GH#15388 + median = float_frame.median().median() + frame_copy = float_frame.copy() + + return_value = frame_copy.clip(upper=median, lower=median, inplace=True) + assert return_value is None + assert not (frame_copy.values != median).any() + + def test_dataframe_clip(self): + # GH#2747 + df = DataFrame(np.random.randn(1000, 2)) + + for lb, ub in [(-1, 1), (1, -1)]: + clipped_df = df.clip(lb, ub) + + lb, ub = min(lb, ub), max(ub, lb) + lb_mask = df.values <= lb + ub_mask = df.values >= ub + mask = ~lb_mask & ~ub_mask + assert (clipped_df.values[lb_mask] == lb).all() + assert (clipped_df.values[ub_mask] == ub).all() + assert (clipped_df.values[mask] == df.values[mask]).all() + + def test_clip_mixed_numeric(self): + # clip on mixed integer or floats + # GH#24162, clipping now preserves numeric types per column + df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) + result = df.clip(1, 2) + expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) + expected = df.dtypes + result = df.clip(upper=3).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): + # GH#6966 + + df = DataFrame(np.random.randn(1000, 2)) + lb = Series(np.random.randn(1000)) + ub = lb + 1 + + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df + + for i in range(2): + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub + mask = ~lb_mask & ~ub_mask + + result = clipped_df.loc[lb_mask, i] + tm.assert_series_equal(result, lb[lb_mask], check_names=False) + assert result.name == i + + result = clipped_df.loc[ub_mask, i] + tm.assert_series_equal(result, ub[ub_mask], check_names=False) + assert result.name == i + + tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize( + "axis,res", + [ + (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]), + (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]), + ], + ) + def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): + # GH#15390 + original = simple_frame.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) + + expected = DataFrame(res, columns=original.columns, index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): + df = DataFrame(np.random.randn(1000, 2)) + lb = DataFrame(np.random.randn(1000, 2)) + ub = lb + 1 + + clipped_df = df.clip(lb, ub, axis=axis) + + lb_mask = df <= lb + ub_mask = df >= ub + mask = ~lb_mask & ~ub_mask + + tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask]) + tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) + tm.assert_frame_equal(clipped_df[mask], df[mask]) + + def test_clip_against_unordered_columns(self): + # GH#20911 + df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) + df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"]) + df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) + result_upper = df1.clip(lower=0, upper=df2) + expected_upper = df1.clip(lower=0, upper=df2[df1.columns]) + result_lower = df1.clip(lower=df3, upper=3) + expected_lower = df1.clip(lower=df3[df1.columns], upper=3) + result_lower_upper = df1.clip(lower=df3, upper=df2) + expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns]) + tm.assert_frame_equal(result_upper, expected_upper) + tm.assert_frame_equal(result_lower, expected_lower) + tm.assert_frame_equal(result_lower_upper, expected_lower_upper) + + def test_clip_with_na_args(self, float_frame): + """Should process np.nan argument as None""" + # GH#17276 + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) + tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) + + # GH#19992 and adjusted in GH#40420 + df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) + + result = df.clip(lower=[4, 5, np.nan], axis=0) + expected = DataFrame( + {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]} + ) + tm.assert_frame_equal(result, expected) + + result = df.clip(lower=[4, 5, np.nan], axis=1) + expected = DataFrame( + {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} + ) + tm.assert_frame_equal(result, expected) + + # GH#40420 + data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} + df = DataFrame(data) + t = Series([2, -4, np.NaN, 6, 3]) + result = df.clip(lower=t, axis=0) + expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]}) + tm.assert_frame_equal(result, expected) + + def test_clip_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.clip except " + r"for the arguments 'lower' and 'upper' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(0, 1, 0) + expected = DataFrame({"a": [1, 1, 1]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_combine.py b/pandas/tests/frame/methods/test_combine.py new file mode 100644 index 00000000..bc6a67e4 --- /dev/null +++ b/pandas/tests/frame/methods/test_combine.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestCombine: + @pytest.mark.parametrize( + "data", + [ + pd.date_range("2000", periods=4), + pd.date_range("2000", periods=4, tz="US/Central"), + pd.period_range("2000", periods=4), + pd.timedelta_range(0, periods=4), + ], + ) + def test_combine_datetlike_udf(self, data): + # GH#23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + + def test_combine_generic(self, float_frame): + df1 = float_frame + df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] + + combined = df1.combine(df2, np.add) + combined2 = df2.combine(df1, np.add) + assert combined["D"].isna().all() + assert combined2["D"].isna().all() + + chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] + chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] + + exp = ( + float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) + * 2 + ) + tm.assert_frame_equal(chunk, exp) + tm.assert_frame_equal(chunk2, exp) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py new file mode 100644 index 00000000..47ebca0b --- /dev/null +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -0,0 +1,528 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + find_common_type, + is_dtype_equal, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCombineFirst: + def test_combine_first_mixed(self): + a = Series(["a", "b"], index=range(2)) + b = Series(range(2), index=range(2)) + f = DataFrame({"A": a, "B": b}) + + a = Series(["a", "b"], index=range(5, 7)) + b = Series(range(2), index=range(5, 7)) + g = DataFrame({"A": a, "B": b}) + + exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6]) + combined = f.combine_first(g) + tm.assert_frame_equal(combined, exp) + + def test_combine_first(self, float_frame): + # disjoint + head, tail = float_frame[:5], float_frame[5:] + + combined = head.combine_first(tail) + reordered_frame = float_frame.reindex(combined.index) + tm.assert_frame_equal(combined, reordered_frame) + assert tm.equalContents(combined.columns, float_frame.columns) + tm.assert_series_equal(combined["A"], reordered_frame["A"]) + + # same index + fcopy = float_frame.copy() + fcopy["A"] = 1 + del fcopy["C"] + + fcopy2 = float_frame.copy() + fcopy2["B"] = 0 + del fcopy2["D"] + + combined = fcopy.combine_first(fcopy2) + + assert (combined["A"] == 1).all() + tm.assert_series_equal(combined["B"], fcopy["B"]) + tm.assert_series_equal(combined["C"], fcopy2["C"]) + tm.assert_series_equal(combined["D"], fcopy["D"]) + + # overlap + head, tail = reordered_frame[:10].copy(), reordered_frame + head["A"] = 1 + + combined = head.combine_first(tail) + assert (combined["A"][:10] == 1).all() + + # reverse overlap + tail.iloc[:10, tail.columns.get_loc("A")] = 0 + combined = tail.combine_first(head) + assert (combined["A"][:10] == 0).all() + + # no overlap + f = float_frame[:10] + g = float_frame[10:] + combined = f.combine_first(g) + tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) + tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) + + # corner cases + comb = float_frame.combine_first(DataFrame()) + tm.assert_frame_equal(comb, float_frame) + + comb = DataFrame().combine_first(float_frame) + tm.assert_frame_equal(comb, float_frame) + + comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) + assert "faz" in comb.index + + # #2525 + df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) + df2 = DataFrame(columns=["b"]) + result = df.combine_first(df2) + assert "b" in result + + def test_combine_first_mixed_bug(self): + idx = Index(["a", "b", "c", "e"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "e"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) + + idx = Index(["a", "b", "c", "f"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "f"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) + + combined = frame1.combine_first(frame2) + assert len(combined.columns) == 5 + + def test_combine_first_same_as_in_update(self): + # gh 3016 (same as in update) + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + result = df.combine_first(other) + tm.assert_frame_equal(result, df) + + df.loc[0, "A"] = np.nan + result = df.combine_first(other) + df.loc[0, "A"] = 45 + tm.assert_frame_equal(result, df) + + def test_combine_first_doc_example(self): + # doc example + df1 = DataFrame( + {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} + ) + + df2 = DataFrame( + { + "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], + "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], + } + ) + + result = df1.combine_first(df2) + expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) + tm.assert_frame_equal(result, expected) + + def test_combine_first_return_obj_type_with_bools(self): + # GH3552 + + df1 = DataFrame( + [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] + ) + df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) + + expected = Series([True, True, False], name=2, dtype=bool) + + result_12 = df1.combine_first(df2)[2] + tm.assert_series_equal(result_12, expected) + + result_21 = df2.combine_first(df1)[2] + tm.assert_series_equal(result_21, expected) + + @pytest.mark.parametrize( + "data1, data2, data_expected", + ( + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [pd.NaT, pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [pd.NaT, pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [datetime(2000, 1, 2), pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [datetime(2000, 1, 2), pd.NaT, pd.NaT], + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + ), + ) + def test_combine_first_convert_datatime_correctly( + self, data1, data2, data_expected + ): + # GH 3593 + + df1, df2 = DataFrame({"a": data1}), DataFrame({"a": data2}) + result = df1.combine_first(df2) + expected = DataFrame({"a": data_expected}) + tm.assert_frame_equal(result, expected) + + def test_combine_first_align_nan(self): + # GH 7509 (not fixed) + dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + dfb = DataFrame([[4], [5]], columns=["b"]) + assert dfa["a"].dtype == "datetime64[ns]" + assert dfa["b"].dtype == "int64" + + res = dfa.combine_first(dfb) + exp = DataFrame( + {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]}, + columns=["a", "b"], + ) + tm.assert_frame_equal(res, exp) + assert res["a"].dtype == "datetime64[ns]" + # TODO: this must be int64 + assert res["b"].dtype == "int64" + + res = dfa.iloc[:0].combine_first(dfb) + exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) + tm.assert_frame_equal(res, exp) + # TODO: this must be datetime64 + assert res["a"].dtype == "float64" + # TODO: this must be int64 + assert res["b"].dtype == "int64" + + def test_combine_first_timezone(self): + # see gh-7630 + data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") + df1 = DataFrame( + columns=["UTCdatetime", "abc"], + data=data1, + index=pd.date_range("20140627", periods=1), + ) + data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") + df2 = DataFrame( + columns=["UTCdatetime", "xyz"], + data=data2, + index=pd.date_range("20140628", periods=1), + ) + res = df2[["UTCdatetime"]].combine_first(df1) + exp = DataFrame( + { + "UTCdatetime": [ + pd.Timestamp("2010-01-01 01:01", tz="UTC"), + pd.Timestamp("2012-12-12 12:12", tz="UTC"), + ], + "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], + }, + columns=["UTCdatetime", "abc"], + index=pd.date_range("20140627", periods=2, freq="D"), + ) + assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" + assert res["abc"].dtype == "datetime64[ns, UTC]" + + tm.assert_frame_equal(res, exp) + + # see gh-10567 + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") + df2 = DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, UTC]" + + dts1 = pd.DatetimeIndex( + ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" + ) + df1 = DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) + dts2 = pd.DatetimeIndex( + ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" + ) + df2 = DataFrame({"DATE": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.DatetimeIndex( + [ + "2011-01-01", + "2012-01-01", + "NaT", + "2012-01-02", + "2011-01-03", + "2011-01-04", + ], + tz="US/Eastern", + ) + exp = DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + + # different tz + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05") + df2 = DataFrame({"DATE": dts2}) + + # if df1 doesn't have NaN, keep its dtype + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" + + dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") + df1 = DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-01", "2015-01-03") + df2 = DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Timestamp("2015-01-01", tz="US/Eastern"), + pd.Timestamp("2015-01-02", tz="US/Eastern"), + pd.Timestamp("2015-01-03"), + ] + exp = DataFrame({"DATE": exp_dts}) + tm.assert_frame_equal(res, exp) + assert res["DATE"].dtype == "object" + + def test_combine_first_timedelta(self): + data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) + df1 = DataFrame({"TD": data1}, index=[1, 3, 5, 7]) + data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) + df2 = DataFrame({"TD": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.TimedeltaIndex( + ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] + ) + exp = DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["TD"].dtype == "timedelta64[ns]" + + def test_combine_first_period(self): + data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") + df1 = DataFrame({"P": data1}, index=[1, 3, 5, 7]) + data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") + df2 = DataFrame({"P": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.PeriodIndex( + ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" + ) + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == data1.dtype + + # different freq + dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") + df2 = DataFrame({"P": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Period("2011-01", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.NaT, + pd.Period("2012-01-02", freq="D"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == "object" + + def test_combine_first_int(self): + # GH14687 - integer series that do no align exactly + + df1 = DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") + df2 = DataFrame({"a": [1, 4]}, dtype="int64") + + result_12 = df1.combine_first(df2) + expected_12 = DataFrame({"a": [0, 1, 3, 5]}) + tm.assert_frame_equal(result_12, expected_12) + + result_21 = df2.combine_first(df1) + expected_21 = DataFrame({"a": [1, 4, 3, 5]}) + tm.assert_frame_equal(result_21, expected_21) + + @pytest.mark.parametrize("val", [1, 1.0]) + def test_combine_first_with_asymmetric_other(self, val): + # see gh-20699 + df1 = DataFrame({"isNum": [val]}) + df2 = DataFrame({"isBool": [True]}) + + res = df1.combine_first(df2) + exp = DataFrame({"isBool": [True], "isNum": [val]}) + + tm.assert_frame_equal(res, exp) + + def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): + # GH: 37519 + df = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ) + df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype) + df.set_index(["a", "b"], inplace=True) + df2.set_index(["a", "b"], inplace=True) + result = df.combine_first(df2) + expected = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "scalar1, scalar2", + [ + (datetime(2020, 1, 1), datetime(2020, 1, 2)), + (pd.Period("2020-01-01", "D"), pd.Period("2020-01-02", "D")), + (pd.Timedelta("89 days"), pd.Timedelta("60 min")), + (pd.Interval(left=0, right=1), pd.Interval(left=2, right=3, closed="left")), + ], +) +def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture): + # GH28481 + na_value = nulls_fixture + + frame = DataFrame([[na_value, na_value]], columns=["a", "b"]) + other = DataFrame([[scalar1, scalar2]], columns=["b", "c"]) + + common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]]) + + if is_dtype_equal(common_dtype, "object") or frame.dtypes["b"] == other.dtypes["b"]: + val = scalar1 + else: + val = na_value + + result = frame.combine_first(other) + + expected = DataFrame([[na_value, val, scalar2]], columns=["a", "b", "c"]) + + expected["b"] = expected["b"].astype(common_dtype) + + tm.assert_frame_equal(result, expected) + + +def test_combine_first_timestamp_bug_NaT(): + # GH28481 + frame = DataFrame([[pd.NaT, pd.NaT]], columns=["a", "b"]) + other = DataFrame( + [[datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["b", "c"] + ) + + result = frame.combine_first(other) + expected = DataFrame( + [[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"] + ) + + tm.assert_frame_equal(result, expected) + + +def test_combine_first_with_nan_multiindex(): + # gh-36562 + + mi1 = MultiIndex.from_arrays( + [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"] + ) + df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1) + mi2 = MultiIndex.from_arrays( + [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"] + ) + s = Series([1, 2, 3, 4, 5, 6], index=mi2) + res = df.combine_first(DataFrame({"d": s})) + mi_expected = MultiIndex.from_arrays( + [ + ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan], + [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6], + ], + names=["a", "b"], + ) + expected = DataFrame( + { + "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1], + "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], + }, + index=mi_expected, + ) + tm.assert_frame_equal(res, expected) + + +def test_combine_preserve_dtypes(): + # GH7509 + a_column = Series(["a", "b"], index=range(2)) + b_column = Series(range(2), index=range(2)) + df1 = DataFrame({"A": a_column, "B": b_column}) + + c_column = Series(["a", "b"], index=range(5, 7)) + b_column = Series(range(-1, 1), index=range(5, 7)) + df2 = DataFrame({"B": b_column, "C": c_column}) + + expected = DataFrame( + { + "A": ["a", "b", np.nan, np.nan], + "B": [0, 1, -1, 0], + "C": [np.nan, np.nan, "a", "b"], + }, + index=[0, 1, 5, 6], + ) + combined = df1.combine_first(df2) + tm.assert_frame_equal(combined, expected) + + +def test_combine_first_duplicates_rows_for_nan_index_values(): + # GH39881 + df1 = DataFrame( + {"x": [9, 10, 11]}, + index=MultiIndex.from_arrays([[1, 2, 3], [np.nan, 5, 6]], names=["a", "b"]), + ) + + df2 = DataFrame( + {"y": [12, 13, 14]}, + index=MultiIndex.from_arrays([[1, 2, 4], [np.nan, 5, 7]], names=["a", "b"]), + ) + + expected = DataFrame( + { + "x": [9.0, 10.0, 11.0, np.nan], + "y": [12.0, 13.0, np.nan, 14.0], + }, + index=MultiIndex.from_arrays( + [[1, 2, 3, 4], [np.nan, 5.0, 6.0, 7.0]], names=["a", "b"] + ), + ) + combined = df1.combine_first(df2) + tm.assert_frame_equal(combined, expected) + + +def test_combine_first_int64_not_cast_to_float64(): + # GH 28613 + df_1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df_2 = DataFrame({"A": [1, 20, 30], "B": [40, 50, 60], "C": [12, 34, 65]}) + result = df_1.combine_first(df_2) + expected = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py new file mode 100644 index 00000000..55e5db96 --- /dev/null +++ b/pandas/tests/frame/methods/test_compare.py @@ -0,0 +1,254 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) +def test_compare_axis(align_axis): + # GH#30429 + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis in (1, "columns"): + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]], + index=indices, + columns=columns, + ) + else: + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + columns = pd.Index(["col1", "col3"]) + expected = pd.DataFrame( + [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "keep_shape, keep_equal", + [ + (True, False), + (False, True), + (True, True), + # False, False case is already covered in test_compare_axis + ], +) +def test_compare_various_formats(keep_shape, keep_equal): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal) + + if keep_shape: + indices = pd.Index([0, 1, 2]) + columns = pd.MultiIndex.from_product( + [["col1", "col2", "col3"], ["self", "other"]] + ) + if keep_equal: + expected = pd.DataFrame( + [ + ["a", "c", 1.0, 1.0, 1.0, 1.0], + ["b", "b", 2.0, 2.0, 2.0, 2.0], + ["c", "c", np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + expected = pd.DataFrame( + [ + ["a", "c", np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_equal_nulls(): + # We want to make sure two NaNs are considered the same + # and dropped where applicable + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + + result = df.compare(df2) + indices = pd.Index([0]) + columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]]) + expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_non_equal_nulls(): + # We want to make sure the relevant NaNs do not get dropped + # even if the entire row or column are NaNs + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = np.nan + + result = df.compare(df2) + + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("align_axis", [0, 1]) +def test_compare_multi_index(align_axis): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]} + ) + df.columns = pd.MultiIndex.from_arrays([["a", "a", "b"], ["col1", "col2", "col3"]]) + df.index = pd.MultiIndex.from_arrays([["x", "x", "y"], [0, 1, 2]]) + + df2 = df.copy() + df2.iloc[0, 0] = "c" + df2.iloc[2, 2] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis == 0: + indices = pd.MultiIndex.from_arrays( + [["x", "x", "y", "y"], [0, 0, 2, 2], ["self", "other", "self", "other"]] + ) + columns = pd.MultiIndex.from_arrays([["a", "b"], ["col1", "col3"]]) + data = [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]] + else: + indices = pd.MultiIndex.from_arrays([["x", "y"], [0, 2]]) + columns = pd.MultiIndex.from_arrays( + [ + ["a", "a", "b", "b"], + ["col1", "col1", "col3", "col3"], + ["self", "other", "self", "other"], + ] + ) + data = [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]] + + expected = pd.DataFrame(data=data, index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_unaligned_objects(): + # test DataFrames with different indices + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) + df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) + df1.compare(df2) + + # test DataFrames with different shapes + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame(np.ones((3, 3))) + df2 = pd.DataFrame(np.zeros((2, 1))) + df1.compare(df2) + + +def test_compare_result_names(): + # GH 44354 + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + ) + df2 = pd.DataFrame( + { + "col1": ["c", "b", "c"], + "col2": [1.0, 2.0, np.nan], + "col3": [1.0, 2.0, np.nan], + }, + ) + result = df1.compare(df2, result_names=("left", "right")) + expected = pd.DataFrame( + { + ("col1", "left"): {0: "a", 2: np.nan}, + ("col1", "right"): {0: "c", 2: np.nan}, + ("col3", "left"): {0: np.nan, 2: 3.0}, + ("col3", "right"): {0: np.nan, 2: np.nan}, + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "result_names", + [ + [1, 2], + "HK", + {"2": 2, "3": 3}, + 3, + 3.0, + ], +) +def test_invalid_input_result_names(result_names): + # GH 44354 + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + ) + df2 = pd.DataFrame( + { + "col1": ["c", "b", "c"], + "col2": [1.0, 2.0, np.nan], + "col3": [1.0, 2.0, np.nan], + }, + ) + with pytest.raises( + TypeError, + match=( + f"Passing 'result_names' as a {type(result_names)} is not " + "supported. Provide 'result_names' as a tuple instead." + ), + ): + df1.compare(df2, result_names=result_names) + + +def test_compare_ea_and_np_dtype(): + # GH#44014 + df1 = pd.DataFrame({"a": [4.0, 4], "b": [1.0, 2]}) + df2 = pd.DataFrame({"a": pd.Series([1, pd.NA], dtype="Int64"), "b": [1.0, 2]}) + result = df1.compare(df2, keep_shape=True) + expected = pd.DataFrame( + { + ("a", "self"): [4.0, np.nan], + ("a", "other"): pd.Series([1, pd.NA], dtype="Int64"), + ("b", "self"): np.nan, + ("b", "other"): np.nan, + } + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_convert.py b/pandas/tests/frame/methods/test_convert.py new file mode 100644 index 00000000..118af9f5 --- /dev/null +++ b/pandas/tests/frame/methods/test_convert.py @@ -0,0 +1,59 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestConvert: + def test_convert_objects(self, float_string_frame): + + oops = float_string_frame.T.T + converted = oops._convert(datetime=True) + tm.assert_frame_equal(converted, float_string_frame) + assert converted["A"].dtype == np.float64 + + # force numeric conversion + float_string_frame["H"] = "1." + float_string_frame["I"] = "1" + + # add in some items that will be nan + length = len(float_string_frame) + float_string_frame["J"] = "1." + float_string_frame["K"] = "1" + float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled" + converted = float_string_frame._convert(datetime=True, numeric=True) + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + assert converted["J"].dtype == "float64" + assert converted["K"].dtype == "float64" + assert len(converted["J"].dropna()) == length - 5 + assert len(converted["K"].dropna()) == length - 5 + + # via astype + converted = float_string_frame.copy() + converted["H"] = converted["H"].astype("float64") + converted["I"] = converted["I"].astype("int64") + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + + # via astype, but errors + converted = float_string_frame.copy() + with pytest.raises(ValueError, match="invalid literal"): + converted["H"].astype("int32") + + def test_convert_mixed_single_column(self): + # GH#4119, not converting a mixed type (e.g.floats and object) + # mixed in a single column + df = DataFrame({"s": Series([1, "na", 3, 4])}) + result = df._convert(datetime=True, numeric=True) + expected = DataFrame({"s": Series([1, np.nan, 3, 4])}) + tm.assert_frame_equal(result, expected) + + def test_convert_objects_no_conversion(self): + mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]}) + mixed2 = mixed1._convert(datetime=True) + tm.assert_frame_equal(mixed1, mixed2) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py new file mode 100644 index 00000000..ec639ed7 --- /dev/null +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestConvertDtypes: + @pytest.mark.parametrize( + "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")] + ) + def test_convert_dtypes(self, convert_integer, expected, string_storage): + # Specific types are tested in tests/series/test_dtypes.py + # Just check that it works for DataFrame here + df = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + } + ) + with pd.option_context("string_storage", string_storage): + result = df.convert_dtypes(True, True, convert_integer, False) + expected = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=expected), + "b": pd.Series(["x", "y", "z"], dtype=f"string[{string_storage}]"), + } + ) + tm.assert_frame_equal(result, expected) + + def test_convert_empty(self): + # Empty DataFrame can pass convert_dtypes, see GH#40393 + empty_df = pd.DataFrame() + tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) + + def test_convert_dtypes_retain_column_names(self): + # GH#41435 + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df.columns.name = "cols" + + result = df.convert_dtypes() + tm.assert_index_equal(result.columns, df.columns) + assert result.columns.name == "cols" diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py new file mode 100644 index 00000000..1c0b0755 --- /dev/null +++ b/pandas/tests/frame/methods/test_copy.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + + +class TestCopy: + @pytest.mark.parametrize("attr", ["index", "columns"]) + def test_copy_index_name_checking(self, float_frame, attr): + # don't want to be able to modify the index stored elsewhere after + # making a copy + ind = getattr(float_frame, attr) + ind.name = None + cp = float_frame.copy() + getattr(cp, attr).name = "foo" + assert getattr(float_frame, attr).name is None + + def test_copy_cache(self): + # GH#31784 _item_cache not cleared on copy causes incorrect reads after updates + df = DataFrame({"a": [1]}) + + df["x"] = [0] + df["a"] + + df.copy() + + df["a"].values[0] = -1 + + tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]})) + + df["y"] = [0] + + assert df["a"].values[0] == -1 + tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]})) + + def test_copy(self, float_frame, float_string_frame): + cop = float_frame.copy() + cop["E"] = cop["A"] + assert "E" not in float_frame + + # copy objects + copy = float_string_frame.copy() + assert copy._mgr is not float_string_frame._mgr + + @td.skip_array_manager_invalid_test + def test_copy_consolidates(self): + # GH#42477 + df = DataFrame( + { + "a": np.random.randint(0, 100, size=55), + "b": np.random.randint(0, 100, size=55), + } + ) + + for i in range(0, 10): + df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55) + + assert len(df._mgr.blocks) == 11 + result = df.copy() + assert len(result._mgr.blocks) == 1 diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py new file mode 100644 index 00000000..43eb96f7 --- /dev/null +++ b/pandas/tests/frame/methods/test_count.py @@ -0,0 +1,39 @@ +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count(self): + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH#423 + df = DataFrame(index=range(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=range(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) + + tm.assert_series_equal(dm.count(), df.count()) + tm.assert_series_equal(dm.count(1), df.count(1)) diff --git a/pandas/tests/frame/methods/test_count_with_level_deprecated.py b/pandas/tests/frame/methods/test_count_with_level_deprecated.py new file mode 100644 index 00000000..f6fbc281 --- /dev/null +++ b/pandas/tests/frame/methods/test_count_with_level_deprecated.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame = frame.copy() + frame.index.names = ["a", "b"] + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="b") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=1) + tm.assert_frame_equal(result, expected, check_names=False) + + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level="a") + with tm.assert_produces_warning(FutureWarning): + expected = frame.count(level=0) + tm.assert_frame_equal(result, expected, check_names=False) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + frame.count(level="x") + + def test_count_level_corner(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + ser = frame["A"][:0] + with tm.assert_produces_warning(FutureWarning): + result = ser.count(level=0) + expected = Series(0, index=ser.index.levels[0], name="A") + tm.assert_series_equal(result, expected) + + df = frame[:0] + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=0) + expected = ( + DataFrame( + index=ser.index.levels[0].set_names(["first"]), columns=df.columns + ) + .fillna(0) + .astype(np.int64) + ) + tm.assert_frame_equal(result, expected) + + def test_count_index_with_nan(self): + # https://github.com/pandas-dev/pandas/issues/21824 + df = DataFrame( + { + "Person": ["John", "Myla", None, "John", "Myla"], + "Age": [24.0, 5, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + + # count on row labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).count(level="Person") + expected = DataFrame( + index=Index(["John", "Myla"], name="Person"), + columns=Index(["Age"]), + data=[2, 2], + ) + tm.assert_frame_equal(res, expected) + + # count on column labels + with tm.assert_produces_warning(FutureWarning): + res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) + expected = DataFrame( + columns=Index(["John", "Myla"], name="Person"), + index=Index(["Age"]), + data=[[2, 2]], + ) + tm.assert_frame_equal(res, expected) + + def test_count_level( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _check_counts(frame, axis=0): + index = frame._get_axis(axis) + for i in range(index.nlevels): + with tm.assert_produces_warning(FutureWarning): + result = frame.count(axis=axis, level=i) + expected = frame.groupby(axis=axis, level=i).count() + expected = expected.reindex_like(result).astype("i8") + tm.assert_frame_equal(result, expected) + + frame.iloc[1, [1, 2]] = np.nan + frame.iloc[7, [0, 1]] = np.nan + ymd.iloc[1, [1, 2]] = np.nan + ymd.iloc[7, [0, 1]] = np.nan + + _check_counts(frame) + _check_counts(ymd) + _check_counts(frame.T, axis=1) + _check_counts(ymd.T, axis=1) + + # can't call with level on regular DataFrame + df = tm.makeTimeDataFrame() + with pytest.raises(TypeError, match="hierarchical"): + with tm.assert_produces_warning(FutureWarning): + df.count(level=0) + + frame["D"] = "foo" + with tm.assert_produces_warning(FutureWarning): + result = frame.count(level=0, numeric_only=True) + tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py new file mode 100644 index 00000000..25ef4971 --- /dev/null +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -0,0 +1,434 @@ +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + isna, +) +import pandas._testing as tm + + +class TestDataFrameCov: + def test_cov(self, float_frame, float_string_frame): + # min_periods no NAs (corner case) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) + + tm.assert_frame_equal(expected, result) + + result = float_frame.cov(min_periods=len(float_frame) + 1) + assert isna(result.values).all() + + # with NAs + frame = float_frame.copy() + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan + frame.iloc[5:10, frame.columns.get_loc("B")] = np.nan + result = frame.cov(min_periods=len(frame) - 8) + expected = frame.cov() + expected.loc["A", "B"] = np.nan + expected.loc["B", "A"] = np.nan + tm.assert_frame_equal(result, expected) + + # regular + result = frame.cov() + expected = frame["A"].cov(frame["C"]) + tm.assert_almost_equal(result["A"]["C"], expected) + + # exclude non-numeric types + with tm.assert_produces_warning( + FutureWarning, match="The default value of numeric_only" + ): + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() + tm.assert_frame_equal(result, expected) + + # Single column frame + df = DataFrame(np.linspace(0.0, 1.0, 10)) + result = df.cov() + expected = DataFrame( + np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns + ) + tm.assert_frame_equal(result, expected) + df.loc[0] = np.nan + result = df.cov() + expected = DataFrame( + np.cov(df.values[1:].T).reshape((1, 1)), + index=df.columns, + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) + def test_cov_ddof(self, test_ddof): + # GH#34611 + np_array1 = np.random.rand(10) + np_array2 = np.random.rand(10) + df = DataFrame({0: np_array1, 1: np_array2}) + result = df.cov(ddof=test_ddof) + expected_np = np.cov(np_array1, np_array2, ddof=test_ddof) + expected = DataFrame(expected_np) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])] + ) + def test_cov_nullable_integer(self, other_column): + # https://github.com/pandas-dev/pandas/issues/33803 + data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column}) + result = data.cov() + arr = np.array([[0.5, 0.5], [0.5, 1.0]]) + expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("numeric_only", [True, False]) + def test_cov_numeric_only(self, numeric_only): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [1, 0], "c": ["x", "y"]}) + expected = DataFrame(0.5, index=["a"], columns=["a"]) + if numeric_only: + result = df.cov(numeric_only=numeric_only) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="could not convert string to float"): + df.cov(numeric_only=numeric_only) + + +class TestDataFrameCorr: + # DataFrame.corr(), as opposed to DataFrame.corrwith + + @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) + @td.skip_if_no_scipy + def test_corr_scipy_method(self, float_frame, method): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + float_frame["A"][:10] = float_frame["A"][10:20] + + correls = float_frame.corr(method=method) + expected = float_frame["A"].corr(float_frame["C"], method=method) + tm.assert_almost_equal(correls["A"]["C"], expected) + + # --------------------------------------------------------------------- + + def test_corr_non_numeric(self, float_string_frame): + # exclude non-numeric types + with tm.assert_produces_warning( + FutureWarning, match="The default value of numeric_only" + ): + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr() + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_nooverlap(self, meth): + # nothing in common + df = DataFrame( + { + "A": [1, 1.5, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1.5, 1], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + rs = df.corr(meth) + assert isna(rs.loc["A", "B"]) + assert isna(rs.loc["B", "A"]) + assert rs.loc["A", "A"] == 1 + assert rs.loc["B", "B"] == 1 + assert isna(rs.loc["C", "C"]) + + @pytest.mark.parametrize("meth", ["pearson", "spearman"]) + def test_corr_constant(self, meth): + # constant --> all NA + df = DataFrame( + { + "A": [1, 1, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1, 1], + } + ) + rs = df.corr(meth) + assert isna(rs.values).all() + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_int_and_boolean(self, meth): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [True, False], "b": [1, 0]}) + + expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + result = df.corr(meth) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["cov", "corr"]) + def test_corr_cov_independent_index_column(self, method): + # GH#14617 + df = DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + + def test_corr_invalid_method(self): + # GH#22298 + df = DataFrame(np.random.normal(size=(10, 2))) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + df.corr(method="____") + + def test_corr_int(self): + # dtypes other than float64 GH#1761 + df = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) + + df.cov() + df.corr() + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])] + ) + @pytest.mark.parametrize( + "other_column", + [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])], + ) + @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"]) + def test_corr_nullable_integer(self, nullable_column, other_column, method): + # https://github.com/pandas-dev/pandas/issues/33803 + data = DataFrame({"a": nullable_column, "b": other_column}) + result = data.corr(method=method) + expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_corr_item_cache(self): + # Check that corr does not lead to incorrect entries in item_cache + + df = DataFrame({"A": range(10)}) + df["B"] = range(10)[::-1] + + ser = df["A"] # populate item_cache + assert len(df._mgr.arrays) == 2 # i.e. 2 blocks + + _ = df.corr() + + # Check that the corr didn't break link between ser and df + ser.values[0] = 99 + assert df.loc[0, "A"] == 99 + assert df["A"] is ser + assert df.values[0, 0] == 99 + + @pytest.mark.parametrize("length", [2, 20, 200, 2000]) + def test_corr_for_constant_columns(self, length): + # GH: 37448 + df = DataFrame(length * [[0.4, 0.1]], columns=["A", "B"]) + result = df.corr() + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + def test_calc_corr_small_numbers(self): + # GH: 37452 + df = DataFrame( + {"A": [1.0e-20, 2.0e-20, 3.0e-20], "B": [1.0e-20, 2.0e-20, 3.0e-20]} + ) + result = df.corr() + expected = DataFrame({"A": [1.0, 1.0], "B": [1.0, 1.0]}, index=["A", "B"]) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"]) + def test_corr_min_periods_greater_than_length(self, method): + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + result = df.corr(method=method, min_periods=3) + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + @pytest.mark.parametrize("numeric_only", [True, False]) + def test_corr_numeric_only(self, meth, numeric_only): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [1, 0], "b": [1, 0], "c": ["x", "y"]}) + expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) + if numeric_only: + result = df.corr(meth, numeric_only=numeric_only) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="could not convert string to float"): + df.corr(meth, numeric_only=numeric_only) + + +class TestDataFrameCorrWith: + def test_corrwith(self, datetime_frame): + a = datetime_frame + noise = Series(np.random.randn(len(a)), index=a.index) + + b = datetime_frame.add(noise, axis=0) + + # make sure order does not matter + b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) + del b["B"] + + colcorr = a.corrwith(b, axis=0) + tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"])) + + rowcorr = a.corrwith(b, axis=1) + tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0)) + + dropped = a.corrwith(b, axis=0, drop=True) + tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"])) + assert "B" not in dropped + + dropped = a.corrwith(b, axis=1, drop=True) + assert a.index[-1] not in dropped.index + + # non time-series data + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) + df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + correls = df1.corrwith(df2, axis=1) + for row in index[:4]: + tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) + + def test_corrwith_with_objects(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + cols = ["A", "B", "C", "D"] + + df1["obj"] = "foo" + df2["obj"] = "bar" + + with tm.assert_produces_warning( + FutureWarning, match="The default value of numeric_only" + ): + result = df1.corrwith(df2) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="The default value of numeric_only" + ): + result = df1.corrwith(df2, axis=1) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) + tm.assert_series_equal(result, expected) + + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame["A"]) + expected = datetime_frame.apply(datetime_frame["A"].corr) + + tm.assert_series_equal(result, expected) + + def test_corrwith_matches_corrcoef(self): + df1 = DataFrame(np.arange(10000), columns=["a"]) + df2 = DataFrame(np.arange(10000) ** 2, columns=["a"]) + c1 = df1.corrwith(df2)["a"] + c2 = np.corrcoef(df1["a"], df2["a"])[0][1] + + tm.assert_almost_equal(c1, c2) + assert c1 < 1 + + @pytest.mark.parametrize("numeric_only", [True, False]) + def test_corrwith_mixed_dtypes(self, numeric_only): + # GH#18570 + df = DataFrame( + {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} + ) + s = Series([0, 6, 7, 3]) + if numeric_only: + result = df.corrwith(s, numeric_only=numeric_only) + corrs = [df["a"].corr(s), df["b"].corr(s)] + expected = Series(data=corrs, index=["a", "b"]) + tm.assert_series_equal(result, expected) + else: + with pytest.raises( + TypeError, + match=r"unsupported operand type\(s\) for /: 'str' and 'int'", + ): + df.corrwith(s, numeric_only=numeric_only) + + def test_corrwith_index_intersection(self): + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=True).index.sort_values() + expected = df1.columns.intersection(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_index_union(self): + df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=False).index.sort_values() + expected = df1.columns.union(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_dup_cols(self): + # GH#21925 + df1 = DataFrame(np.vstack([np.arange(10)] * 3).T) + df2 = df1.copy() + df2 = pd.concat((df2, df2[0]), axis=1) + + result = df1.corrwith(df2) + expected = Series(np.ones(4), index=[0, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_corr_numerical_instabilities(self): + # GH#45640 + df = DataFrame([[0.2, 0.4], [0.4, 0.2]]) + result = df.corr() + expected = DataFrame({0: [1.0, -1.0], 1: [-1.0, 1.0]}) + tm.assert_frame_equal(result - 1, expected - 1, atol=1e-17) + + @td.skip_if_no_scipy + def test_corrwith_spearman(self): + # GH#21925 + df = DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df**2, method="spearman") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_kendall(self): + # GH#21925 + df = DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df**2, method="kendall") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_spearman_with_tied_data(self): + # GH#48826 + df1 = DataFrame( + { + "A": [1, np.nan, 7, 8], + "B": [False, True, True, False], + "C": [10, 4, 9, 3], + } + ) + df2 = df1[["B", "C"]] + result = (df1 + 1).corrwith(df2.B, method="spearman") + expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + df_bool = DataFrame( + {"A": [True, True, False, False], "B": [True, False, False, True]} + ) + ser_bool = Series([True, True, False, True]) + result = df_bool.corrwith(ser_bool) + expected = Series([0.57735, 0.57735], index=["A", "B"]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py new file mode 100644 index 00000000..24d327a1 --- /dev/null +++ b/pandas/tests/frame/methods/test_describe.py @@ -0,0 +1,411 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameDescribe: + def test_describe_bool_in_mixed_frame(self): + df = DataFrame( + { + "string_data": ["a", "b", "c", "d", "e"], + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + } + ) + + # Integer data are included in .describe() output, + # Boolean and string data are not. + result = df.describe() + expected = DataFrame( + {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + # Top value is a boolean value that is False + result = df.describe(include=["bool"]) + + expected = DataFrame( + {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"] + ) + tm.assert_frame_equal(result, expected) + + def test_describe_empty_object(self): + # GH#27183 + df = DataFrame({"A": [None, None]}, dtype=object) + result = df.describe() + expected = DataFrame( + {"A": [0, 0, np.nan, np.nan]}, + dtype=object, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].describe() + tm.assert_frame_equal(result, expected) + + def test_describe_bool_frame(self): + # GH#13891 + df = DataFrame( + { + "bool_data_1": [False, False, True, True], + "bool_data_2": [False, True, True, True], + } + ) + result = df.describe() + expected = DataFrame( + {"bool_data_1": [4, 2, False, 2], "bool_data_2": [4, 2, True, 3]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "bool_data": [False, False, True, True, False], + "int_data": [0, 1, 2, 3, 4], + } + ) + result = df.describe() + expected = DataFrame( + {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]} + ) + result = df.describe() + expected = DataFrame( + {"bool_data": [4, 2, False, 2], "str_data": [4, 3, "a", 2]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_categorical(self): + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + cat = df + + # Categoricals should not show up together with numerical columns + result = cat.describe() + assert len(result.columns) == 1 + + # In a frame, describe() for the cat should be the same as for string + # arrays (count, unique, top, freq) + + cat = Categorical( + ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True + ) + s = Series(cat) + result = s.describe() + expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"]) + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "b", "c", "c"])) + df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) + result = df3.describe() + tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) + + def test_describe_empty_categorical_column(self): + # GH#26397 + # Ensure the index of an empty categorical DataFrame column + # also contains (count, unique, top, freq) + df = DataFrame({"empty_col": Categorical([])}) + result = df.describe() + expected = DataFrame( + {"empty_col": [0, 0, np.nan, np.nan]}, + index=["count", "unique", "top", "freq"], + dtype="object", + ) + tm.assert_frame_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2, 0]) + assert np.isnan(result.iloc[3, 0]) + + def test_describe_categorical_columns(self): + # GH#11558 + columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX") + df = DataFrame( + { + "int1": [10, 20, 30, 40, 50], + "int2": [10, 20, 30, 40, 50], + "obj": ["A", 0, None, "X", 1], + }, + columns=columns, + ) + result = df.describe() + + exp_columns = pd.CategoricalIndex( + ["int1", "int2"], + categories=["int1", "int2", "obj"], + ordered=True, + name="XXX", + ) + expected = DataFrame( + { + "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50], + "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + columns=exp_columns, + ) + + tm.assert_frame_equal(result, expected) + tm.assert_categorical_equal(result.columns.values, expected.columns.values) + + def test_describe_datetime_columns(self): + columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], + freq="MS", + tz="US/Eastern", + name="XXX", + ) + df = DataFrame( + { + 0: [10, 20, 30, 40, 50], + 1: [10, 20, 30, 40, 50], + 2: ["A", 0, None, "X", 1], + } + ) + df.columns = columns + result = df.describe() + + exp_columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX" + ) + expected = DataFrame( + { + 0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50], + 1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + expected.columns = exp_columns + tm.assert_frame_equal(result, expected) + assert result.columns.freq == "MS" + assert result.columns.tz == expected.columns.tz + + def test_describe_timedelta_values(self): + # GH#6145 + t1 = pd.timedelta_range("1 days", freq="D", periods=5) + t2 = pd.timedelta_range("1 hours", freq="H", periods=5) + df = DataFrame({"t1": t1, "t2": t2}) + + expected = DataFrame( + { + "t1": [ + 5, + pd.Timedelta("3 days"), + df.iloc[:, 0].std(), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.Timedelta("4 days"), + pd.Timedelta("5 days"), + ], + "t2": [ + 5, + pd.Timedelta("3 hours"), + df.iloc[:, 1].std(), + pd.Timedelta("1 hours"), + pd.Timedelta("2 hours"), + pd.Timedelta("3 hours"), + pd.Timedelta("4 hours"), + pd.Timedelta("5 hours"), + ], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + + result = df.describe() + tm.assert_frame_equal(result, expected) + + exp_repr = ( + " t1 t2\n" + "count 5 5\n" + "mean 3 days 00:00:00 0 days 03:00:00\n" + "std 1 days 13:56:50.394919273 0 days 01:34:52.099788303\n" + "min 1 days 00:00:00 0 days 01:00:00\n" + "25% 2 days 00:00:00 0 days 02:00:00\n" + "50% 3 days 00:00:00 0 days 03:00:00\n" + "75% 4 days 00:00:00 0 days 04:00:00\n" + "max 5 days 00:00:00 0 days 05:00:00" + ) + assert repr(result) == exp_repr + + def test_describe_tz_values(self, tz_naive_fixture): + # GH#21332 + tz = tz_naive_fixture + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = DataFrame({"s1": s1, "s2": s2}) + + expected = DataFrame( + { + "s1": [5, 2, 0, 1, 2, 3, 4, 1.581139], + "s2": [ + 5, + Timestamp(2018, 1, 3).tz_localize(tz), + start.tz_localize(tz), + s2[1], + s2[2], + s2[3], + end.tz_localize(tz), + np.nan, + ], + }, + index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"], + ) + result = df.describe(include="all", datetime_is_numeric=True) + tm.assert_frame_equal(result, expected) + + def test_datetime_is_numeric_includes_datetime(self): + df = DataFrame({"a": date_range("2012", periods=3), "b": [1, 2, 3]}) + result = df.describe(datetime_is_numeric=True) + expected = DataFrame( + { + "a": [ + 3, + Timestamp("2012-01-02"), + Timestamp("2012-01-01"), + Timestamp("2012-01-01T12:00:00"), + Timestamp("2012-01-02"), + Timestamp("2012-01-02T12:00:00"), + Timestamp("2012-01-03"), + np.nan, + ], + "b": [3, 2, 1, 1.5, 2, 2.5, 3, 1], + }, + index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_tz_values2(self): + tz = "CET" + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = DataFrame({"s1": s1, "s2": s2}) + + s1_ = s1.describe() + s2_ = Series( + [ + 5, + 5, + s2.value_counts().index[0], + 1, + start.tz_localize(tz), + end.tz_localize(tz), + ], + index=["count", "unique", "top", "freq", "first", "last"], + ) + idx = [ + "count", + "unique", + "top", + "freq", + "first", + "last", + "mean", + "std", + "min", + "25%", + "50%", + "75%", + "max", + ] + expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx] + + with tm.assert_produces_warning(FutureWarning): + result = df.describe(include="all") + tm.assert_frame_equal(result, expected) + + def test_describe_percentiles_integer_idx(self): + # GH#26660 + df = DataFrame({"x": [1]}) + pct = np.linspace(0, 1, 10 + 1) + result = df.describe(percentiles=pct) + + expected = DataFrame( + {"x": [1.0, 1.0, np.NaN, 1.0, *(1.0 for _ in pct), 1.0]}, + index=[ + "count", + "mean", + "std", + "min", + "0%", + "10%", + "20%", + "30%", + "40%", + "50%", + "60%", + "70%", + "80%", + "90%", + "100%", + "max", + ], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_does_not_raise_error_for_dictlike_elements(self): + # GH#32409 + df = DataFrame([{"test": {"a": "1"}}, {"test": {"a": "2"}}]) + expected = DataFrame( + {"test": [2, 2, {"a": "1"}, 1]}, index=["count", "unique", "top", "freq"] + ) + result = df.describe() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("exclude", ["x", "y", ["x", "y"], ["x", "z"]]) + def test_describe_when_include_all_exclude_not_allowed(self, exclude): + """ + When include is 'all', then setting exclude != None is not allowed. + """ + df = DataFrame({"x": [1], "y": [2], "z": [3]}) + msg = "exclude must be None when include is 'all'" + with pytest.raises(ValueError, match=msg): + df.describe(include="all", exclude=exclude) + + def test_describe_with_duplicate_columns(self): + df = DataFrame( + [[1, 1, 1], [2, 2, 2], [3, 3, 3]], + columns=["bar", "a", "a"], + dtype="float64", + ) + result = df.describe() + ser = df.iloc[:, 0].describe() + expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1) + tm.assert_frame_equal(result, expected) + + def test_ea_with_na(self, any_numeric_ea_dtype): + # GH#48778 + + df = DataFrame({"a": [1, pd.NA, pd.NA], "b": pd.NA}, dtype=any_numeric_ea_dtype) + result = df.describe() + expected = DataFrame( + {"a": [1.0, 1.0, pd.NA] + [1.0] * 5, "b": [0.0] + [pd.NA] * 7}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + dtype="Float64", + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py new file mode 100644 index 00000000..9a9fea34 --- /dev/null +++ b/pandas/tests/frame/methods/test_diff.py @@ -0,0 +1,305 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameDiff: + def test_diff_requires_integer(self): + df = DataFrame(np.random.randn(2, 2)) + with pytest.raises(ValueError, match="periods must be an integer"): + df.diff(1.5) + + # GH#44572 np.int64 is accepted + @pytest.mark.parametrize("num", [1, np.int64(1)]) + def test_diff(self, datetime_frame, num): + df = datetime_frame + the_diff = df.diff(num) + + expected = df["A"] - df["A"].shift(num) + tm.assert_series_equal(the_diff["A"], expected) + + def test_diff_int_dtype(self): + # int dtype + a = 10_000_000_000_000_000 + b = a + 1 + ser = Series([a, b]) + + rs = DataFrame({"s": ser}).diff() + assert rs.s[1] == 1 + + def test_diff_mixed_numeric(self, datetime_frame): + # mixed numeric + tf = datetime_frame.astype("float32") + the_diff = tf.diff(1) + tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) + + def test_diff_axis1_nonconsolidated(self): + # GH#10907 + df = DataFrame({"y": Series([2]), "z": Series([3])}) + df.insert(0, "x", 1) + result = df.diff(axis=1) + expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)}) + tm.assert_frame_equal(result, expected) + + def test_diff_timedelta64_with_nat(self): + # GH#32441 + arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]") + arr[:, 0] = np.timedelta64("NaT", "ns") + + df = DataFrame(arr) + result = df.diff(1, axis=0) + + expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]}) + tm.assert_equal(result, expected) + + result = df.diff(0) + expected = df - df + assert expected[0].isna().all() + tm.assert_equal(result, expected) + + result = df.diff(-1, axis=1) + expected = df * np.nan + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis0_with_nat(self, tz): + # GH#32441 + dti = pd.DatetimeIndex(["NaT", "2019-01-01", "2019-01-02"], tz=tz) + ser = Series(dti) + + df = ser.to_frame() + + result = df.diff() + ex_index = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta(days=1)]) + expected = Series(ex_index).to_frame() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [pytest.param(None, marks=pytest.mark.xfail), "UTC"]) + def test_diff_datetime_with_nat_zero_periods(self, tz): + # diff on NaT values should give NaT, not timedelta64(0) + dti = date_range("2016-01-01", periods=4, tz=tz) + ser = Series(dti) + df = ser.to_frame() + + df[1] = ser.copy() + + with tm.assert_produces_warning(None): + df.iloc[:, 0] = pd.NaT + + expected = df - df + assert expected[0].isna().all() + + result = df.diff(0, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.diff(0, axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis0(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + + result = df.diff(axis=0) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "1 days"]), + 1: pd.TimedeltaIndex(["NaT", "1 days"]), + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis1(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + + result = df.diff(axis=1) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "NaT"]), + 1: pd.TimedeltaIndex(["0 days", "0 days"]), + } + ) + tm.assert_frame_equal(result, expected) + + def test_diff_timedelta(self): + # GH#4533 + df = DataFrame( + { + "time": [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")], + "value": [1.0, 2.0], + } + ) + + res = df.diff() + exp = DataFrame( + [[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"] + ) + tm.assert_frame_equal(res, exp) + + def test_diff_mixed_dtype(self): + df = DataFrame(np.random.randn(5, 3)) + df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) + + result = df.diff() + assert result[0].dtype == np.float64 + + def test_diff_neg_n(self, datetime_frame): + rs = datetime_frame.diff(-1) + xp = datetime_frame - datetime_frame.shift(-1) + tm.assert_frame_equal(rs, xp) + + def test_diff_float_n(self, datetime_frame): + rs = datetime_frame.diff(1.0) + xp = datetime_frame.diff(1) + tm.assert_frame_equal(rs, xp) + + def test_diff_axis(self): + # GH#9727 + df = DataFrame([[1.0, 2.0], [3.0, 4.0]]) + tm.assert_frame_equal( + df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]]) + ) + tm.assert_frame_equal( + df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]) + ) + + def test_diff_period(self): + # GH#32995 Don't pass an incorrect axis + pi = date_range("2016-01-01", periods=3).to_period("D") + df = DataFrame({"A": pi}) + + result = df.diff(1, axis=1) + + expected = (df - pd.NaT).astype(object) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2}) + + result = df.diff(axis=1) + tm.assert_frame_equal(result, expected) + + # GH#21437 mixed-float-dtypes + df = DataFrame( + {"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")} + ) + result = df.diff(axis=1) + expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0}) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_large_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = df * np.nan + + result = df.diff(axis=1, periods=3) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_negative_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan}) + + result = df.diff(axis=1, periods=-1) + tm.assert_frame_equal(result, expected) + + def test_diff_sparse(self): + # GH#28813 .diff() should work for sparse dataframes as well + sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") + + result = sparse_df.diff() + expected = DataFrame( + [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0) + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis,expected", + [ + ( + 0, + DataFrame( + { + "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0], + "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan], + "c": np.repeat(np.nan, 8), + "d": [np.nan, 3, 5, 7, 9, 11, 13, 15], + }, + dtype="Int64", + ), + ), + ( + 1, + DataFrame( + { + "a": np.repeat(np.nan, 8), + "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0], + "c": np.repeat(np.nan, 8), + "d": np.repeat(np.nan, 8), + }, + dtype="Int64", + ), + ), + ], + ) + def test_diff_integer_na(self, axis, expected): + # GH#24171 IntegerNA Support for DataFrame.diff() + df = DataFrame( + { + "a": np.repeat([0, 1, np.nan, 2], 2), + "b": np.tile([0, 1, np.nan, 2], 2), + "c": np.repeat(np.nan, 8), + "d": np.arange(1, 9) ** 2, + }, + dtype="Int64", + ) + + # Test case for default behaviour of diff + result = df.diff(axis=axis) + tm.assert_frame_equal(result, expected) + + def test_diff_readonly(self): + # https://github.com/pandas-dev/pandas/issues/35559 + arr = np.random.randn(5, 2) + arr.flags.writeable = False + df = DataFrame(arr) + result = df.diff() + expected = DataFrame(np.array(df)).diff() + tm.assert_frame_equal(result, expected) + + def test_diff_all_int_dtype(self, any_int_numpy_dtype): + # GH 14773 + df = DataFrame(range(5)) + df = df.astype(any_int_numpy_dtype) + result = df.diff() + expected_dtype = ( + "float32" if any_int_numpy_dtype in ("int8", "int16") else "float64" + ) + expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py new file mode 100644 index 00000000..555e5f0e --- /dev/null +++ b/pandas/tests/frame/methods/test_dot.py @@ -0,0 +1,131 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class DotSharedTests: + @pytest.fixture + def obj(self): + raise NotImplementedError + + @pytest.fixture + def other(self) -> DataFrame: + """ + other is a DataFrame that is indexed so that obj.dot(other) is valid + """ + raise NotImplementedError + + @pytest.fixture + def expected(self, obj, other) -> DataFrame: + """ + The expected result of obj.dot(other) + """ + raise NotImplementedError + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + raise NotImplementedError + + def test_dot_equiv_values_dot(self, obj, other, expected): + # `expected` is constructed from obj.values.dot(other.values) + result = obj.dot(other) + tm.assert_equal(result, expected) + + def test_dot_2d_ndarray(self, obj, other, expected): + # Check ndarray argument; in this case we get matching values, + # but index/columns may not match + result = obj.dot(other.values) + assert np.all(result == expected.values) + + def test_dot_1d_ndarray(self, obj, expected): + # can pass correct-length array + row = obj.iloc[0] if obj.ndim == 2 else obj + + result = obj.dot(row.values) + expected = obj.dot(row) + self.reduced_dim_assert(result, expected) + + def test_dot_series(self, obj, other, expected): + # Check series argument + result = obj.dot(other["1"]) + self.reduced_dim_assert(result, expected["1"]) + + def test_dot_series_alignment(self, obj, other, expected): + result = obj.dot(other.iloc[::-1]["1"]) + self.reduced_dim_assert(result, expected["1"]) + + def test_dot_aligns(self, obj, other, expected): + # Check index alignment + other2 = other.iloc[::-1] + result = obj.dot(other2) + tm.assert_equal(result, expected) + + def test_dot_shape_mismatch(self, obj): + msg = "Dot product shape mismatch" + # exception raised is of type Exception + with pytest.raises(Exception, match=msg): + obj.dot(obj.values[:3]) + + def test_dot_misaligned(self, obj, other): + msg = "matrices are not aligned" + with pytest.raises(ValueError, match=msg): + obj.dot(other.T) + + +class TestSeriesDot(DotSharedTests): + @pytest.fixture + def obj(self): + return Series(np.random.randn(4), index=["p", "q", "r", "s"]) + + @pytest.fixture + def other(self): + return DataFrame( + np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + ).T + + @pytest.fixture + def expected(self, obj, other): + return Series(np.dot(obj.values, other.values), index=other.columns) + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + tm.assert_almost_equal(result, expected) + + +class TestDataFrameDot(DotSharedTests): + @pytest.fixture + def obj(self): + return DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + + @pytest.fixture + def other(self): + return DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["1", "2"] + ) + + @pytest.fixture + def expected(self, obj, other): + return DataFrame( + np.dot(obj.values, other.values), index=obj.index, columns=other.columns + ) + + @classmethod + def reduced_dim_assert(cls, result, expected): + """ + Assertion about results with 1 fewer dimension that self.obj + """ + tm.assert_series_equal(result, expected, check_names=False) + assert result.name is None diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py new file mode 100644 index 00000000..50b60f9e --- /dev/null +++ b/pandas/tests/frame/methods/test_drop.py @@ -0,0 +1,551 @@ +import re + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "msg,labels,level", + [ + (r"labels \[4\] not found in level", 4, "a"), + (r"labels \[7\] not found in level", 7, "b"), + ], +) +def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = Series([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) + + with pytest.raises(KeyError, match=msg): + s.drop(labels, level=level) + with pytest.raises(KeyError, match=msg): + df.drop(labels, level=level) + + +@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")]) +def test_drop_errors_ignore(labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = Series([10, 20, 30], index=mi) + df = DataFrame([10, 20, 30], index=mi) + + expected_s = s.drop(labels, level=level, errors="ignore") + tm.assert_series_equal(s, expected_s) + + expected_df = df.drop(labels, level=level, errors="ignore") + tm.assert_frame_equal(df, expected_df) + + +def test_drop_with_non_unique_datetime_index_and_invalid_keys(): + # GH 30399 + + # define dataframe with unique datetime index + df = DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=pd.date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique datetime index + df = df.iloc[[0, 2, 2, 3]].copy() + + with pytest.raises(KeyError, match="not found in axis"): + df.drop(["a", "b"]) # Dropping with labels not exist in the index + + +class TestDataFrameDrop: + def test_drop_names(self): + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + df.index.name, df.columns.name = "first", "second" + df_dropped_b = df.drop("b") + df_dropped_e = df.drop("e", axis=1) + df_inplace_b, df_inplace_e = df.copy(), df.copy() + return_value = df_inplace_b.drop("b", inplace=True) + assert return_value is None + return_value = df_inplace_e.drop("e", axis=1, inplace=True) + assert return_value is None + for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): + assert obj.index.name == "first" + assert obj.columns.name == "second" + assert list(df.columns) == ["d", "e", "f"] + + msg = r"\['g'\] not found in axis" + with pytest.raises(KeyError, match=msg): + df.drop(["g"]) + with pytest.raises(KeyError, match=msg): + df.drop(["g"], axis=1) + + # errors = 'ignore' + dropped = df.drop(["g"], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["b", "g"], errors="ignore") + expected = Index(["a", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["g"], axis=1, errors="ignore") + expected = Index(["d", "e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + dropped = df.drop(["d", "g"], axis=1, errors="ignore") + expected = Index(["e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + # GH 16398 + dropped = df.drop([], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + def test_drop(self): + simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) + tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]]) + tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]]) + tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :]) + tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :]) + + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop(5) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop("C", axis=1) + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop([1, 5]) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop(["A", "C"], axis=1) + + # GH 42881 + with pytest.raises(KeyError, match=r"\['C', 'D', 'F'\] not found in axis"): + simple.drop(["C", "D", "F"], axis=1) + + # errors = 'ignore' + tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :] + ) + tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]] + ) + + # non-unique - wheee! + nu_df = DataFrame( + list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"] + ) + tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]]) + tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"]) + tm.assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 + + nu_df = nu_df.set_index(Index(["X", "Y", "X"])) + nu_df.columns = list("abc") + tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :]) + tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :]) + + # inplace cache issue + # GH#5628 + df = DataFrame(np.random.randn(10, 3), columns=list("abc")) + expected = df[~(df.b > 0)] + return_value = df.drop(labels=df[df.b > 0].index, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + def test_drop_multiindex_not_lexsorted(self): + # GH#11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns._is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns._is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.drop("a", axis=1) + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.drop("a", axis=1) + + tm.assert_frame_equal(result, expected) + + def test_drop_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's (GH#12392) + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.drop("a") + res2 = df.drop(index="a") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop("d", axis=1) + res2 = df.drop(columns="d") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(labels="e", axis=1) + res2 = df.drop(columns="e") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0) + res2 = df.drop(index=["a"]) + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0).drop(["d"], axis=1) + res2 = df.drop(index=["a"], columns=["d"]) + tm.assert_frame_equal(res1, res2) + + msg = "Cannot specify both 'labels' and 'index'/'columns'" + with pytest.raises(ValueError, match=msg): + df.drop(labels="a", index="b") + + with pytest.raises(ValueError, match=msg): + df.drop(labels="a", columns="b") + + msg = "Need to specify at least one of 'labels', 'index' or 'columns'" + with pytest.raises(ValueError, match=msg): + df.drop(axis=1) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize( + "actual", + [ + DataFrame(data=data, index=["a", "a"]), + DataFrame(data=data, index=["a", "b"]), + DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), + DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), + ], + ) + def test_raise_on_drop_duplicate_index(self, actual): + + # GH#19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + msg = re.escape("\"['c'] not found in axis\"") + with pytest.raises(KeyError, match=msg): + actual.drop("c", level=level, axis=0) + with pytest.raises(KeyError, match=msg): + actual.T.drop("c", level=level, axis=1) + expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err.T, actual) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]]) + @pytest.mark.parametrize("drop_labels", [[], [1], [2]]) + def test_drop_empty_list(self, index, drop_labels): + # GH#21494 + expected_index = [i for i in index if i not in drop_labels] + frame = DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, DataFrame(index=expected_index)) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) + @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH# 21494 + with pytest.raises(KeyError, match="not found in axis"): + DataFrame(index=index).drop(drop_labels) + + @pytest.mark.parametrize( + "empty_listlike", + [ + [], + {}, + np.array([]), + Series([], dtype="datetime64[ns]"), + Index([]), + DatetimeIndex([]), + ], + ) + def test_drop_empty_listlike_non_unique_datetime_index(self, empty_listlike): + # GH#27994 + data = {"column_a": [5, 10], "column_b": ["one", "two"]} + index = [Timestamp("2021-01-01"), Timestamp("2021-01-01")] + df = DataFrame(data, index=index) + + # Passing empty list-like should return the same DataFrame. + expected = df.copy() + result = df.drop(empty_listlike) + tm.assert_frame_equal(result, expected) + + def test_mixed_depth_drop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.drop("a", axis=1) + expected = df.drop([("a", "", "")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(["top"], axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + expected = expected.drop([("top", "OD", "wy")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(("top", "OD", "wx"), axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + tm.assert_frame_equal(expected, result) + + expected = df.drop([("top", "OD", "wy")], axis=1) + expected = df.drop("top", axis=1) + + result = df.drop("result1", level=1, axis=1) + expected = df.drop( + [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1 + ) + tm.assert_frame_equal(expected, result) + + def test_drop_multiindex_other_level_nan(self): + # GH#12754 + df = ( + DataFrame( + { + "A": ["one", "one", "two", "two"], + "B": [np.nan, 0.0, 1.0, 2.0], + "C": ["a", "b", "c", "c"], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.drop("c", level="C") + expected = DataFrame( + [2, 1], + columns=["D"], + index=MultiIndex.from_tuples( + [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_drop_nonunique(self): + df = DataFrame( + [ + ["x-a", "x", "a", 1.5], + ["x-a", "x", "a", 1.2], + ["z-c", "z", "c", 3.1], + ["x-a", "x", "a", 4.1], + ["x-b", "x", "b", 5.1], + ["x-b", "x", "b", 4.1], + ["x-b", "x", "b", 2.2], + ["y-a", "y", "a", 1.2], + ["z-b", "z", "b", 2.1], + ], + columns=["var1", "var2", "var3", "var4"], + ) + + grp_size = df.groupby("var1").size() + drop_idx = grp_size.loc[grp_size == 1] + + idf = df.set_index(["var1", "var2", "var3"]) + + # it works! GH#2101 + result = idf.drop(drop_idx.index, level=0).reset_index() + expected = df[-df.var1.isin(drop_idx.index)] + + result.index = expected.index + + tm.assert_frame_equal(result, expected) + + def test_drop_level(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.drop(["bar", "qux"], level="first") + expected = frame.iloc[[0, 1, 2, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = frame.drop(["two"], level="second") + expected = frame.iloc[[0, 2, 3, 6, 7, 9]] + tm.assert_frame_equal(result, expected) + + result = frame.T.drop(["bar", "qux"], axis=1, level="first") + expected = frame.iloc[[0, 1, 2, 5, 6]].T + tm.assert_frame_equal(result, expected) + + result = frame.T.drop(["two"], axis=1, level="second") + expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T + tm.assert_frame_equal(result, expected) + + def test_drop_level_nonunique_datetime(self): + # GH#12701 + idx = Index([2, 3, 4, 4, 5], name="id") + idxdt = pd.to_datetime( + [ + "201603231400", + "201603231500", + "201603231600", + "201603231600", + "201603231700", + ] + ) + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) + df["tstamp"] = idxdt + df = df.set_index("tstamp", append=True) + ts = Timestamp("201603231600") + assert df.index.is_unique is False + + result = df.drop(ts, level="tstamp") + expected = df.loc[idx != 4] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [Series, DataFrame]) + def test_drop_tz_aware_timestamp_across_dst(self, box): + # GH#21761 + start = Timestamp("2017-10-29", tz="Europe/Berlin") + end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin") + index = pd.date_range(start, end, freq="15min") + data = box(data=[1] * len(index), index=index) + result = data.drop(start) + expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin") + expected_idx = pd.date_range(expected_start, end, freq="15min") + expected = box(data=[1] * len(expected_idx), index=expected_idx) + tm.assert_equal(result, expected) + + def test_drop_preserve_names(self): + index = MultiIndex.from_arrays( + [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] + ) + + df = DataFrame(np.random.randn(6, 3), index=index) + + result = df.drop([(0, 2)]) + assert result.index.names == ("one", "two") + + @pytest.mark.parametrize( + "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"] + ) + @pytest.mark.parametrize("inplace", [False, True]) + def test_inplace_drop_and_operation(self, operation, inplace): + # GH#30484 + df = DataFrame({"x": range(5)}) + expected = df.copy() + df["y"] = range(5) + y = df["y"] + + with tm.assert_produces_warning(None): + if inplace: + df.drop("y", axis=1, inplace=inplace) + else: + df = df.drop("y", axis=1, inplace=inplace) + + # Perform operation and check result + getattr(y, operation)(1) + tm.assert_frame_equal(df, expected) + + def test_drop_with_non_unique_multiindex(self): + # GH#36293 + mi = MultiIndex.from_arrays([["x", "y", "x"], ["i", "j", "i"]]) + df = DataFrame([1, 2, 3], index=mi) + result = df.drop(index="x") + expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]]) + def test_drop_tuple_with_non_unique_multiindex(self, indexer): + # GH#42771 + idx = MultiIndex.from_product([["a", "b"], ["a", "a"]]) + df = DataFrame({"x": range(len(idx))}, index=idx) + result = df.drop(index=[("a", "a")]) + expected = DataFrame( + {"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")]) + ) + tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns(self): + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + result = df.drop(["a"], axis=1) + expected = DataFrame([[1], [1], [1]], columns=["bar"]) + tm.assert_frame_equal(result, expected) + result = df.drop("a", axis=1) + tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns2(self): + # drop buggy GH#6240 + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + + expected = df.take([0, 1, 1], axis=1) + df2 = df.take([2, 0, 1, 2, 1], axis=1) + result = df2.drop("C", axis=1) + tm.assert_frame_equal(result, expected) + + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.drop " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.drop("a", 1) + expected = DataFrame(index=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_drop_inplace_no_leftover_column_reference(self): + # GH 13934 + df = DataFrame({"a": [1, 2, 3]}) + a = df.a + df.drop(["a"], axis=1, inplace=True) + tm.assert_index_equal(df.columns, Index([], dtype="object")) + a -= a.mean() + tm.assert_index_equal(df.columns, Index([], dtype="object")) + + def test_drop_level_missing_label_multiindex(self): + # GH 18561 + df = DataFrame(index=MultiIndex.from_product([range(3), range(3)])) + with pytest.raises(KeyError, match="labels \\[5\\] not found in level"): + df.drop(5, level=0) + + @pytest.mark.parametrize("idx, level", [(["a", "b"], 0), (["a"], None)]) + def test_drop_index_ea_dtype(self, any_numeric_ea_dtype, idx, level): + # GH#45860 + df = DataFrame( + {"a": [1, 2, 2, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype + ).set_index(idx) + result = df.drop(Index([2, pd.NA]), level=level) + expected = DataFrame( + {"a": [1], "b": 100}, dtype=any_numeric_ea_dtype + ).set_index(idx) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py new file mode 100644 index 00000000..cd61f59a --- /dev/null +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -0,0 +1,488 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + concat, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_drop_duplicates_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.drop_duplicates(subset) + + +def test_drop_duplicates(): + df = DataFrame( + { + "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates(np.array(["AAA", "B"])) + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep="last") + expected = df.loc[[0, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep=False) + expected = df.loc[[0]] + tm.assert_frame_equal(result, expected) + + # consider everything + df2 = df.loc[:, ["AAA", "B", "C"]] + + result = df2.drop_duplicates() + # in this case only + expected = df2.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep="last") + expected = df2.drop_duplicates(["AAA", "B"], keep="last") + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep=False) + expected = df2.drop_duplicates(["AAA", "B"], keep=False) + tm.assert_frame_equal(result, expected) + + # integers + result = df.drop_duplicates("C") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + df["E"] = df["C"].astype("int8") + result = df.drop_duplicates("E") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("E", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + # GH 11376 + df = DataFrame({"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]}) + expected = df.loc[df.index != 3] + tm.assert_frame_equal(df.drop_duplicates(), expected) + + df = DataFrame([[1, 0], [0, 2]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-2, 0], [0, -4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + x = np.iinfo(np.int64).max / 3 * 2 + df = DataFrame([[-x, x], [0, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-x, x], [x, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + # GH 11864 + df = DataFrame([i] * 9 for i in range(16)) + df = concat([df, DataFrame([[1] + [0] * 8])], ignore_index=True) + + for keep in ["first", "last", False]: + assert df.duplicated(keep=keep).sum() == 0 + + +def test_drop_duplicates_with_duplicate_column_names(): + # GH17836 + df = DataFrame([[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates("a") + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) + + +def test_drop_duplicates_for_take_all(): + df = DataFrame( + { + "AAA": ["foo", "bar", "baz", "bar", "foo", "bar", "qux", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.iloc[[2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.iloc[[2, 6]] + tm.assert_frame_equal(result, expected) + + # multiple columns + result = df.drop_duplicates(["AAA", "B"]) + expected = df.iloc[[0, 1, 2, 3, 4, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep="last") + expected = df.iloc[[0, 1, 2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep=False) + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_tuple(): + df = DataFrame( + { + ("AA", "AB"): ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates(("AA", "AB")) + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep=False) + expected = df.loc[[]] # empty df + assert len(result) == 0 + tm.assert_frame_equal(result, expected) + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates((("AA", "AB"), "B")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=[]), + DataFrame(columns=["A", "B", "C"]), + DataFrame(index=[]), + DataFrame(index=["A", "B", "C"]), + ], +) +def test_drop_duplicates_empty(df): + # GH 20516 + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + result = df.copy() + result.drop_duplicates(inplace=True) + tm.assert_frame_equal(result, df) + + +def test_drop_duplicates_NA(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("A") + expected = df.loc[[0, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.loc[[1, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["A", "B"]) + expected = df.loc[[0, 2, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep="last") + expected = df.loc[[1, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep=False) + expected = df.loc[[6]] + tm.assert_frame_equal(result, expected) + + # nan + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("C") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.loc[[3, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["C", "B"]) + expected = df.loc[[0, 1, 2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep="last") + expected = df.loc[[1, 3, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep=False) + expected = df.loc[[1]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_NA_for_take_all(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "baz", "bar", "qux"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 2.0, 3, 1.0], + } + ) + + # single column + result = df.drop_duplicates("A") + expected = df.iloc[[0, 2, 3, 5, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.iloc[[1, 4, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.iloc[[5, 7]] + tm.assert_frame_equal(result, expected) + + # nan + + # single column + result = df.drop_duplicates("C") + expected = df.iloc[[0, 1, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[3, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.iloc[[5, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_inplace(): + orig = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + df = orig.copy() + return_value = df.drop_duplicates("A", inplace=True) + expected = orig[:2] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates("A", keep="last", inplace=True) + expected = orig.loc[[6, 7]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates("A", keep=False, inplace=True) + expected = orig.loc[[]] + result = df + tm.assert_frame_equal(result, expected) + assert len(df) == 0 + assert return_value is None + + # multi column + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], inplace=True) + expected = orig.loc[[0, 1, 2, 3]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], keep="last", inplace=True) + expected = orig.loc[[0, 5, 6, 7]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + df = orig.copy() + return_value = df.drop_duplicates(["A", "B"], keep=False, inplace=True) + expected = orig.loc[[0]] + result = df + tm.assert_frame_equal(result, expected) + assert return_value is None + + # consider everything + orig2 = orig.loc[:, ["A", "B", "C"]].copy() + + df2 = orig2.copy() + return_value = df2.drop_duplicates(inplace=True) + # in this case only + expected = orig2.drop_duplicates(["A", "B"]) + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + df2 = orig2.copy() + return_value = df2.drop_duplicates(keep="last", inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep="last") + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + df2 = orig2.copy() + return_value = df2.drop_duplicates(keep=False, inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep=False) + result = df2 + tm.assert_frame_equal(result, expected) + assert return_value is None + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "origin_dict, output_dict, ignore_index, output_index", + [ + ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]), + ], +) +def test_drop_duplicates_ignore_index( + inplace, origin_dict, output_dict, ignore_index, output_index +): + # GH 30114 + df = DataFrame(origin_dict) + expected = DataFrame(output_dict, index=output_index) + + if inplace: + result_df = df.copy() + result_df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + else: + result_df = df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(origin_dict)) + + +def test_drop_duplicates_null_in_object_column(nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32992 + df = DataFrame([[1, nulls_fixture], [2, "a"]], dtype=object) + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates_series_vs_dataframe(keep): + # GH#14192 + df = DataFrame( + { + "a": [1, 1, 1, "one", "one"], + "b": [2, 2, np.nan, np.nan, np.nan], + "c": [3, 3, np.nan, np.nan, "three"], + "d": [1, 2, 3, 4, 4], + "e": [ + datetime(2015, 1, 1), + datetime(2015, 1, 1), + datetime(2015, 2, 1), + NaT, + NaT, + ], + } + ) + for column in df.columns: + dropped_frame = df[[column]].drop_duplicates(keep=keep) + dropped_series = df[column].drop_duplicates(keep=keep) + tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) + + +@pytest.mark.parametrize("arg", [[1], 1, "True", [], 0]) +def test_drop_duplicates_non_boolean_ignore_index(arg): + # GH#38274 + df = DataFrame({"a": [1, 2, 1, 3]}) + msg = '^For argument "ignore_index" expected type bool, received type .*.$' + with pytest.raises(ValueError, match=msg): + df.drop_duplicates(ignore_index=arg) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) + msg = ( + "In a future version of pandas all arguments of " + "DataFrame.drop_duplicates except for the argument 'subset' " + "will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.drop_duplicates(["b", "c"], "last") + expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py new file mode 100644 index 00000000..e1302d4b --- /dev/null +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -0,0 +1,36 @@ +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestDropLevel: + def test_droplevel(self, frame_or_series): + # GH#20342 + cols = MultiIndex.from_tuples( + [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ) + mi = MultiIndex.from_tuples([(1, 2), (5, 6), (9, 10)], names=["a", "b"]) + df = DataFrame([[3, 4], [7, 8], [11, 12]], index=mi, columns=cols) + if frame_or_series is not DataFrame: + df = df.iloc[:, 0] + + # test that dropping of a level in index works + expected = df.reset_index("a", drop=True) + result = df.droplevel("a", axis="index") + tm.assert_equal(result, expected) + + if frame_or_series is DataFrame: + # test that dropping of a level in columns works + expected = df.copy() + expected.columns = Index(["c", "d"], name="level_1") + result = df.droplevel("level_2", axis="columns") + tm.assert_equal(result, expected) + else: + # test that droplevel raises ValueError on axis != 0 + with pytest.raises(ValueError, match="No axis named columns"): + df.droplevel(1, axis="columns") diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py new file mode 100644 index 00000000..53d9f754 --- /dev/null +++ b/pandas/tests/frame/methods/test_dropna.py @@ -0,0 +1,288 @@ +import datetime + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameMissingData: + def test_dropEmptyRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + original = Series(mat, index=float_frame.index, name="foo") + expected = original.dropna() + inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna(how="all") + # check that original was preserved + tm.assert_series_equal(frame["foo"], original) + return_value = inplace_frame1.dropna(how="all", inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame1["foo"], expected) + assert return_value is None + + smaller_frame = frame.dropna(how="all", subset=["foo"]) + return_value = inplace_frame2.dropna(how="all", subset=["foo"], inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame2["foo"], expected) + assert return_value is None + + def test_dropIncompleteRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + frame["bar"] = 5 + original = Series(mat, index=float_frame.index, name="foo") + inp_frame1, inp_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna() + tm.assert_series_equal(frame["foo"], original) + return_value = inp_frame1.dropna(inplace=True) + + exp = Series(mat[5:], index=float_frame.index[5:], name="foo") + tm.assert_series_equal(smaller_frame["foo"], exp) + tm.assert_series_equal(inp_frame1["foo"], exp) + assert return_value is None + + samesize_frame = frame.dropna(subset=["bar"]) + tm.assert_series_equal(frame["foo"], original) + assert (frame["bar"] == 5).all() + return_value = inp_frame2.dropna(subset=["bar"], inplace=True) + tm.assert_index_equal(samesize_frame.index, float_frame.index) + tm.assert_index_equal(inp_frame2.index, float_frame.index) + assert return_value is None + + def test_dropna(self): + df = DataFrame(np.random.randn(6, 4)) + df.iloc[:2, 2] = np.nan + + dropped = df.dropna(axis=1) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + return_value = inp.dropna(axis=1, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=0) + expected = df.loc[list(range(2, 6))] + inp = df.copy() + return_value = inp.dropna(axis=0, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + # threshold + dropped = df.dropna(axis=1, thresh=5) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + return_value = inp.dropna(axis=1, thresh=5, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=0, thresh=4) + expected = df.loc[range(2, 6)] + inp = df.copy() + return_value = inp.dropna(axis=0, thresh=4, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + assert return_value is None + + dropped = df.dropna(axis=1, thresh=4) + tm.assert_frame_equal(dropped, df) + + dropped = df.dropna(axis=1, thresh=3) + tm.assert_frame_equal(dropped, df) + + # subset + dropped = df.dropna(axis=0, subset=[0, 1, 3]) + inp = df.copy() + return_value = inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) + tm.assert_frame_equal(dropped, df) + tm.assert_frame_equal(inp, df) + assert return_value is None + + # all + dropped = df.dropna(axis=1, how="all") + tm.assert_frame_equal(dropped, df) + + df[2] = np.nan + dropped = df.dropna(axis=1, how="all") + expected = df.loc[:, [0, 1, 3]] + tm.assert_frame_equal(dropped, expected) + + # bad input + msg = "No axis named 3 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.dropna(axis=3) + + def test_drop_and_dropna_caching(self): + # tst that cacher updates + original = Series([1, 2, np.nan], name="A") + expected = Series([1, 2], dtype=original.dtype, name="A") + df = DataFrame({"A": original.values.copy()}) + df2 = df.copy() + df["A"].dropna() + tm.assert_series_equal(df["A"], original) + + ser = df["A"] + return_value = ser.dropna(inplace=True) + tm.assert_series_equal(ser, expected) + tm.assert_series_equal(df["A"], original) + assert return_value is None + + df2["A"].drop([1]) + tm.assert_series_equal(df2["A"], original) + + ser = df2["A"] + return_value = ser.drop([1], inplace=True) + tm.assert_series_equal(ser, original.drop([1])) + tm.assert_series_equal(df2["A"], original) + assert return_value is None + + def test_dropna_corner(self, float_frame): + # bad input + msg = "invalid how option: foo" + with pytest.raises(ValueError, match=msg): + float_frame.dropna(how="foo") + # non-existent column - 8303 + with pytest.raises(KeyError, match=r"^\['X'\]$"): + float_frame.dropna(subset=["A", "X"]) + + def test_dropna_multiple_axes(self): + df = DataFrame( + [ + [1, np.nan, 2, 3], + [4, np.nan, 5, 6], + [np.nan, np.nan, np.nan, np.nan], + [7, np.nan, 8, 9], + ] + ) + + # GH20987 + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=[0, 1]) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=(0, 1)) + + inp = df.copy() + with pytest.raises(TypeError, match="supplying multiple axes"): + inp.dropna(how="all", axis=(0, 1), inplace=True) + + def test_dropna_tz_aware_datetime(self): + # GH13407 + df = DataFrame() + dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) + dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) + df["Time"] = [dt1] + result = df.dropna(axis=0) + expected = DataFrame({"Time": [dt1]}) + tm.assert_frame_equal(result, expected) + + # Ex2 + df = DataFrame({"Time": [dt1, None, np.nan, dt2]}) + result = df.dropna(axis=0) + expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3]) + tm.assert_frame_equal(result, expected) + + def test_dropna_categorical_interval_index(self): + # GH 25087 + ii = pd.IntervalIndex.from_breaks([0, 2.78, 3.14, 6.28]) + ci = pd.CategoricalIndex(ii) + df = DataFrame({"A": list("abc")}, index=ci) + + expected = df + result = df.dropna() + tm.assert_frame_equal(result, expected) + + def test_dropna_with_duplicate_columns(self): + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + df.iloc[2, [0, 1, 2]] = np.nan + df.iloc[0, 0] = np.nan + df.iloc[1, 1] = np.nan + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 3] = np.nan + expected = df.dropna(subset=["A", "B", "C"], how="all") + expected.columns = ["A", "A", "B", "C"] + + df.columns = ["A", "A", "B", "C"] + + result = df.dropna(subset=["A", "C"], how="all") + tm.assert_frame_equal(result, expected) + + def test_dropna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.dropna " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.dropna(1) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_set_single_column_subset(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame( + {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2] + ) + result = df.dropna(subset="C") + tm.assert_frame_equal(result, expected) + + def test_single_column_not_present_in_axis(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3]}) + + # Column not present + with pytest.raises(KeyError, match="['D']"): + df.dropna(subset="D", axis=0) + + def test_subset_is_nparray(self): + # GH 41021 + df = DataFrame({"A": [1, 2, np.NaN], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame({"A": [1.0], "B": ["a"], "C": [4.0]}) + result = df.dropna(subset=np.array(["A", "C"])) + tm.assert_frame_equal(result, expected) + + def test_no_nans_in_frame(self, axis): + # GH#41965 + df = DataFrame([[1, 2], [3, 4]], columns=pd.RangeIndex(0, 2)) + expected = df.copy() + result = df.dropna(axis=axis) + tm.assert_frame_equal(result, expected, check_index_type=True) + + def test_how_thresh_param_incompatible(self): + # GH46575 + df = DataFrame([1, 2, pd.NA]) + msg = "You cannot set both the how and thresh arguments at the same time" + with pytest.raises(TypeError, match=msg): + df.dropna(how="all", thresh=2) + + with pytest.raises(TypeError, match=msg): + df.dropna(how="any", thresh=2) + + with pytest.raises(TypeError, match=msg): + df.dropna(how=None, thresh=None) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py new file mode 100644 index 00000000..87e6ed5b --- /dev/null +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -0,0 +1,155 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, + option_context, +) +import pandas._testing as tm + + +def _check_cast(df, v): + """ + Check if all dtypes of df are equal to v + """ + assert all(s.dtype.name == v for _, s in df.items()) + + +class TestDataFrameDataTypes: + def test_empty_frame_dtypes(self): + empty_df = DataFrame() + tm.assert_series_equal(empty_df.dtypes, Series(dtype=object)) + + nocols_df = DataFrame(index=[1, 2, 3]) + tm.assert_series_equal(nocols_df.dtypes, Series(dtype=object)) + + norows_df = DataFrame(columns=list("abc")) + tm.assert_series_equal(norows_df.dtypes, Series(object, index=list("abc"))) + + norows_int_df = DataFrame(columns=list("abc")).astype(np.int32) + tm.assert_series_equal( + norows_int_df.dtypes, Series(np.dtype("int32"), index=list("abc")) + ) + + df = DataFrame({"a": 1, "b": True, "c": 1.0}, index=[1, 2, 3]) + ex_dtypes = Series({"a": np.int64, "b": np.bool_, "c": np.float64}) + tm.assert_series_equal(df.dtypes, ex_dtypes) + + # same but for empty slice of df + tm.assert_series_equal(df[:0].dtypes, ex_dtypes) + + def test_datetime_with_tz_dtypes(self): + tzframe = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": date_range("20130101", periods=3, tz="US/Eastern"), + "C": date_range("20130101", periods=3, tz="CET"), + } + ) + tzframe.iloc[1, 1] = pd.NaT + tzframe.iloc[1, 2] = pd.NaT + result = tzframe.dtypes.sort_index() + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype("ns", "US/Eastern"), + DatetimeTZDtype("ns", "CET"), + ], + ["A", "B", "C"], + ) + + tm.assert_series_equal(result, expected) + + def test_dtypes_are_correct_after_column_slice(self): + # GH6525 + df = DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) + tm.assert_series_equal( + df.dtypes, + Series({"a": np.float_, "b": np.float_, "c": np.float_}), + ) + tm.assert_series_equal(df.iloc[:, 2:].dtypes, Series({"c": np.float_})) + tm.assert_series_equal( + df.dtypes, + Series({"a": np.float_, "b": np.float_, "c": np.float_}), + ) + + @pytest.mark.parametrize( + "data", + [pd.NA, True], + ) + def test_dtypes_are_correct_after_groupby_last(self, data): + # GH46409 + df = DataFrame( + {"id": [1, 2, 3, 4], "test": [True, pd.NA, data, False]} + ).convert_dtypes() + result = df.groupby("id").last().test + expected = df.set_index("id").test + assert result.dtype == pd.BooleanDtype() + tm.assert_series_equal(expected, result) + + def test_dtypes_gh8722(self, float_string_frame): + float_string_frame["bool"] = float_string_frame["A"] > 0 + result = float_string_frame.dtypes + expected = Series( + {k: v.dtype for k, v in float_string_frame.items()}, index=result.index + ) + tm.assert_series_equal(result, expected) + + # compat, GH 8722 + with option_context("use_inf_as_na", True): + df = DataFrame([[1]]) + result = df.dtypes + tm.assert_series_equal(result, Series({0: np.dtype("int64")})) + + def test_dtypes_timedeltas(self): + df = DataFrame( + { + "A": Series(date_range("2012-1-1", periods=3, freq="D")), + "B": Series([timedelta(days=i) for i in range(3)]), + } + ) + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB") + ) + tm.assert_series_equal(result, expected) + + df["C"] = df["A"] + df["B"] + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + ], + index=list("ABC"), + ) + tm.assert_series_equal(result, expected) + + # mixed int types + df["D"] = 1 + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + np.dtype("int64"), + ], + index=list("ABCD"), + ) + tm.assert_series_equal(result, expected) + + def test_frame_apply_np_array_return_type(self): + # GH 35517 + df = DataFrame([["foo"]]) + result = df.apply(lambda col: np.array("bar")) + expected = Series(["bar"]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py new file mode 100644 index 00000000..9d46a8ab --- /dev/null +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -0,0 +1,113 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_duplicated_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.duplicated(subset) + + +@pytest.mark.slow +def test_duplicated_do_not_fail_on_wide_dataframes(): + # gh-21524 + # Given the wide dataframe with a lot of columns + # with different (important!) values + data = {f"col_{i:02d}": np.random.randint(0, 1000, 30000) for i in range(100)} + df = DataFrame(data).T + result = df.duplicated() + + # Then duplicates produce the bool Series as a result and don't fail during + # calculation. Actual values doesn't matter here, though usually it's all + # False in this case + assert isinstance(result, Series) + assert result.dtype == np.bool_ + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_keep(keep, expected): + df = DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]}) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal") +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_nan_none(keep, expected): + df = DataFrame({"C": [np.nan, 3, 3, None, np.nan], "x": 1}, dtype=object) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("subset", [None, ["A", "B"], "A"]) +def test_duplicated_subset(subset, keep): + df = DataFrame( + { + "A": [0, 1, 1, 2, 0], + "B": ["a", "b", "b", "c", "a"], + "C": [np.nan, 3, 3, None, np.nan], + } + ) + + if subset is None: + subset = list(df.columns) + elif isinstance(subset, str): + # need to have a DataFrame, not a Series + # -> select columns with singleton list, not string + subset = [subset] + + expected = df[subset].duplicated(keep=keep) + result = df.duplicated(keep=keep, subset=subset) + tm.assert_series_equal(result, expected) + + +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=["a", "b"]) + dupes = df.duplicated("a") + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + +def test_frame_datetime64_duplicated(): + dates = date_range("2010-07-01", end="2010-08-05") + + tst = DataFrame({"symbol": "AAA", "date": dates}) + result = tst.duplicated(["date", "symbol"]) + assert (-result).all() + + tst = DataFrame({"date": dates}) + result = tst.date.duplicated() + assert (-result).all() diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py new file mode 100644 index 00000000..dddd6c6d --- /dev/null +++ b/pandas/tests/frame/methods/test_equals.py @@ -0,0 +1,82 @@ +import numpy as np + +from pandas import ( + DataFrame, + date_range, +) +import pandas._testing as tm + + +class TestEquals: + def test_dataframe_not_equal(self): + # see GH#28839 + df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]}) + df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]}) + assert df1.equals(df2) is False + + def test_equals_different_blocks(self, using_array_manager): + # GH#9330 + df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) + df1 = df0.reset_index()[["A", "B", "C"]] + if not using_array_manager: + # this assert verifies that the above operations have + # induced a block rearrangement + assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype + + # do the real tests + tm.assert_frame_equal(df0, df1) + assert df0.equals(df1) + assert df1.equals(df0) + + def test_equals(self): + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + df1["text"] = "the sky is so blue. we could use more chocolate.".split() + df1["start"] = date_range("2000-1-1", periods=10, freq="T") + df1["end"] = date_range("2000-1-1", periods=10, freq="D") + df1["diff"] = df1["end"] - df1["start"] + df1["bool"] = np.arange(10) % 3 == 0 + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1["text"].equals(df2["text"]) + assert df1["start"].equals(df2["start"]) + assert df1["end"].equals(df2["end"]) + assert df1["diff"].equals(df2["diff"]) + assert df1["bool"].equals(df2["bool"]) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different["floats"] = different["floats"].astype("float32") + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = date_range("2000-1-1", periods=10, freq="T") + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(["text"], append=True) + df2 = df1.set_index(["text"], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(["floats"], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(["floats"], append=True) + df2 = df1.set_index(["floats"], append=True) + assert df3.equals(df2) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py new file mode 100644 index 00000000..8716a181 --- /dev/null +++ b/pandas/tests/frame/methods/test_explode.py @@ -0,0 +1,277 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_error(): + df = pd.DataFrame( + {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + with pytest.raises( + ValueError, match="column must be a scalar, tuple, or list thereof" + ): + df.explode([list("AA")]) + + with pytest.raises(ValueError, match="column must be unique"): + df.explode(list("AA")) + + df.columns = list("AA") + with pytest.raises(ValueError, match="columns must be unique"): + df.explode("A") + + +@pytest.mark.parametrize( + "input_subset, error_message", + [ + ( + list("AC"), + "columns must have matching element counts", + ), + ( + [], + "column must be nonempty", + ), + ( + list("AC"), + "columns must have matching element counts", + ), + ], +) +def test_error_multi_columns(input_subset, error_message): + # GH 39240 + df = pd.DataFrame( + { + "A": [[0, 1, 2], np.nan, [], (3, 4)], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]], + }, + index=list("abcd"), + ) + with pytest.raises(ValueError, match=error_message): + df.explode(input_subset) + + +@pytest.mark.parametrize( + "scalar", + ["a", 0, 1.5, pd.Timedelta("1 days"), pd.Timestamp("2019-12-31")], +) +def test_basic(scalar): + df = pd.DataFrame( + {scalar: pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + result = df.explode(scalar) + expected = pd.DataFrame( + { + scalar: pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_rows(): + df = pd.DataFrame( + {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1}, + index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]), + ) + + result = df.explode("A") + expected = pd.DataFrame( + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.MultiIndex.from_tuples( + [ + ("a", 1), + ("a", 1), + ("a", 1), + ("a", 2), + ("b", 1), + ("b", 2), + ("b", 2), + ] + ), + dtype=object, + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_columns(): + df = pd.DataFrame( + {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1} + ) + + result = df.explode(("A", 1)) + expected = pd.DataFrame( + { + ("A", 1): pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.Index([0, 0, 0, 1, 2, 3, 3]), + dtype=object, + ), + ("A", 2): 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_usecase(): + # explode a single column + # gh-10511 + df = pd.DataFrame( + [[11, range(5), 10], [22, range(3), 20]], columns=list("ABC") + ).set_index("C") + result = df.explode("B") + + expected = pd.DataFrame( + { + "A": [11, 11, 11, 11, 11, 22, 22, 22], + "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object), + "C": [10, 10, 10, 10, 10, 20, 20, 20], + }, + columns=list("ABC"), + ).set_index("C") + + tm.assert_frame_equal(result, expected) + + # gh-8517 + df = pd.DataFrame( + [["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]], + columns=["dt", "name", "text"], + ) + result = df.assign(text=df.text.str.split(" ")).explode("text") + expected = pd.DataFrame( + [ + ["2014-01-01", "Alice", "A"], + ["2014-01-01", "Alice", "B"], + ["2014-01-02", "Bob", "C"], + ["2014-01-02", "Bob", "D"], + ], + columns=["dt", "name", "text"], + index=[0, 0, 1, 1], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_dict, input_index, expected_dict, expected_index", + [ + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + [0, 0], + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + [0, 0, 0, 0], + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.Index([0, 0], name="my_index"), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.Index([0, 0, 0, 0], name="my_index"), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0], [1, 1]], names=["my_first_index", "my_second_index"] + ), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], + names=["my_first_index", "my_second_index"], + ), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None] + ), + ), + ], +) +def test_duplicate_index(input_dict, input_index, expected_dict, expected_index): + # GH 28005 + df = pd.DataFrame(input_dict, index=input_index) + result = df.explode("col1") + expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object) + tm.assert_frame_equal(result, expected) + + +def test_ignore_index(): + # GH 34932 + df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]}) + result = df.explode("values", ignore_index=True) + expected = pd.DataFrame( + {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] + ) + tm.assert_frame_equal(result, expected) + + +def test_explode_sets(): + # https://github.com/pandas-dev/pandas/issues/35614 + df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1]) + result = df.explode(column="a").sort_values(by="a") + expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_subset, expected_dict, expected_index", + [ + ( + list("AC"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4, np.nan], + index=list("aaabcdde"), + dtype=object, + ), + "B": 1, + "C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan], + }, + list("aaabcdde"), + ), + ( + list("A"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4, np.nan], + index=list("aaabcdde"), + dtype=object, + ), + "B": 1, + "C": [ + ["a", "b", "c"], + ["a", "b", "c"], + ["a", "b", "c"], + "foo", + [], + ["d", "e"], + ["d", "e"], + np.nan, + ], + }, + list("aaabcdde"), + ), + ], +) +def test_multi_columns(input_subset, expected_dict, expected_index): + # GH 39240 + df = pd.DataFrame( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + index=list("abcde"), + ) + result = df.explode(input_subset) + expected = pd.DataFrame(expected_dict, expected_index) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py new file mode 100644 index 00000000..ccd564b4 --- /dev/null +++ b/pandas/tests/frame/methods/test_fillna.py @@ -0,0 +1,808 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.tests.frame.common import _check_mixed_float + + +class TestFillNA: + @td.skip_array_manager_not_yet_implemented + def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): + df = DataFrame( + {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} + ) + df.columns = ["A", "A", "A"] + orig = df[:] + + df.fillna({"A": 2}, inplace=True) + # The first and third columns can be set inplace, while the second cannot. + + expected = DataFrame( + {"A": [2.0] * 3, "B": [2, Timestamp(1), 2], "C": [2, "foo", 2]} + ) + expected.columns = ["A", "A", "A"] + tm.assert_frame_equal(df, expected) + + # TODO: what's the expected/desired behavior with CoW? + if not using_copy_on_write: + assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0]) + assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1]) + if not using_copy_on_write: + assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2]) + + @td.skip_array_manager_not_yet_implemented + def test_fillna_on_column_view(self, using_copy_on_write): + # GH#46149 avoid unnecessary copies + arr = np.full((40, 50), np.nan) + df = DataFrame(arr) + + df[0].fillna(-1, inplace=True) + if using_copy_on_write: + assert np.isnan(arr[:, 0]).all() + else: + assert (arr[:, 0] == -1).all() + + # i.e. we didn't create a new 49-column block + assert len(df._mgr.arrays) == 1 + assert np.shares_memory(df.values, arr) + + def test_fillna_datetime(self, datetime_frame): + tf = datetime_frame + tf.loc[tf.index[:5], "A"] = np.nan + tf.loc[tf.index[-5:], "A"] = np.nan + + zero_filled = datetime_frame.fillna(0) + assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all() + + padded = datetime_frame.fillna(method="pad") + assert np.isnan(padded.loc[padded.index[:5], "A"]).all() + assert ( + padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"] + ).all() + + msg = "Must specify a fill 'value' or 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna() + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna(5, method="ffill") + + def test_fillna_mixed_type(self, float_string_frame): + + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + # TODO: make stronger assertion here, GH 25640 + mf.fillna(value=0) + mf.fillna(method="pad") + + def test_fillna_mixed_float(self, mixed_float_frame): + + # mixed numeric (but no float16) + mf = mixed_float_frame.reindex(columns=["A", "B", "D"]) + mf.loc[mf.index[-10:], "A"] = np.nan + result = mf.fillna(value=0) + _check_mixed_float(result, dtype={"C": None}) + + result = mf.fillna(method="pad") + _check_mixed_float(result, dtype={"C": None}) + + def test_fillna_empty(self): + # empty frame (GH#2778) + df = DataFrame(columns=["x"]) + for m in ["pad", "backfill"]: + df.x.fillna(method=m, inplace=True) + df.x.fillna(method=m) + + def test_fillna_different_dtype(self): + # with different dtype (GH#3386) + df = DataFrame( + [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]] + ) + + result = df.fillna({2: "foo"}) + expected = DataFrame( + [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]] + ) + tm.assert_frame_equal(result, expected) + + return_value = df.fillna({2: "foo"}, inplace=True) + tm.assert_frame_equal(df, expected) + assert return_value is None + + def test_fillna_limit_and_value(self): + # limit and value + df = DataFrame(np.random.randn(10, 3)) + df.iloc[2:7, 0] = np.nan + df.iloc[3:5, 2] = np.nan + + expected = df.copy() + expected.iloc[2, 0] = 999 + expected.iloc[3, 2] = 999 + result = df.fillna(999, limit=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_datelike(self): + # with datelike + # GH#6344 + df = DataFrame( + { + "Date": [NaT, Timestamp("2014-1-1")], + "Date2": [Timestamp("2013-1-1"), NaT], + } + ) + + expected = df.copy() + expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"]) + result = df.fillna(value={"Date": df["Date2"]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_tzaware(self): + # with timezone + # GH#15855 + df = DataFrame({"A": [Timestamp("2012-11-11 00:00:00+01:00"), NaT]}) + exp = DataFrame( + { + "A": [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="pad"), exp) + + df = DataFrame({"A": [NaT, Timestamp("2012-11-11 00:00:00+01:00")]}) + exp = DataFrame( + { + "A": [ + Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="bfill"), exp) + + def test_fillna_tzaware_different_column(self): + # with timezone in another column + # GH#15522 + df = DataFrame( + { + "A": date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1, 2, np.nan, np.nan], + } + ) + result = df.fillna(method="pad") + expected = DataFrame( + { + "A": date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1.0, 2.0, 2.0, 2.0], + } + ) + tm.assert_frame_equal(result, expected) + + def test_na_actions_categorical(self): + + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + vals = ["a", "b", np.nan, "d"] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + vals2 = ["a", "b", "b", "d"] + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) + vals3 = ["a", "b", np.nan] + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) + cat4 = Categorical([1, 2], categories=[1, 2, 3]) + vals4 = ["a", "b"] + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) + + # fillna + res = df.fillna(value={"cats": 3, "vals": "b"}) + tm.assert_frame_equal(res, df_exp_fill) + + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + df.fillna(value={"cats": 4, "vals": "c"}) + + res = df.fillna(method="pad") + tm.assert_frame_equal(res, df_exp_fill) + + # dropna + res = df.dropna(subset=["cats"]) + tm.assert_frame_equal(res, df_exp_drop_cats) + + res = df.dropna() + tm.assert_frame_equal(res, df_exp_drop_all) + + # make sure that fillna takes missing values into account + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) + df = DataFrame({"cats": c, "vals": [1, 2, 3]}) + + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) + + res = df.fillna("a") + tm.assert_frame_equal(res, df_exp) + + def test_fillna_categorical_nan(self): + # GH#14021 + # np.nan should always be a valid filler + cat = Categorical([np.nan, 2, np.nan]) + val = Categorical([np.nan, np.nan, np.nan]) + df = DataFrame({"cats": cat, "vals": val}) + + # GH#32950 df.median() is poorly behaved because there is no + # Categorical.median + median = Series({"cats": 2.0, "vals": np.nan}) + + res = df.fillna(median) + v_exp = [np.nan, np.nan, np.nan] + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") + tm.assert_frame_equal(res, df_exp) + + result = df.cats.fillna(np.nan) + tm.assert_series_equal(result, df.cats) + + result = df.vals.fillna(np.nan) + tm.assert_series_equal(result, df.vals) + + idx = DatetimeIndex( + ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", NaT, NaT] + ) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + idx = PeriodIndex(["2011-01", "2011-01", "2011-01", NaT, NaT], freq="M") + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + idx = TimedeltaIndex(["1 days", "2 days", "1 days", NaT, NaT]) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) + + def test_fillna_downcast(self): + # GH#15277 + # infer int64 from float64 + df = DataFrame({"a": [1.0, np.nan]}) + result = df.fillna(0, downcast="infer") + expected = DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + df = DataFrame({"a": [1.0, np.nan]}) + result = df.fillna({"a": 0}, downcast="infer") + expected = DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_downcast_false(self, frame_or_series): + # GH#45603 preserve object dtype with downcast=False + obj = frame_or_series([1, 2, 3], dtype="object") + result = obj.fillna("", downcast=False) + tm.assert_equal(result, obj) + + def test_fillna_downcast_noop(self, frame_or_series): + # GH#45423 + # Two relevant paths: + # 1) not _can_hold_na (e.g. integer) + # 2) _can_hold_na + noop + not can_hold_element + + obj = frame_or_series([1, 2, 3], dtype=np.int64) + res = obj.fillna("foo", downcast=np.dtype(np.int32)) + expected = obj.astype(np.int32) + tm.assert_equal(res, expected) + + obj2 = obj.astype(np.float64) + res2 = obj2.fillna("foo", downcast="infer") + expected2 = obj # get back int64 + tm.assert_equal(res2, expected2) + + res3 = obj2.fillna("foo", downcast=np.dtype(np.int32)) + tm.assert_equal(res3, expected) + + @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) + def test_fillna_dictlike_value_duplicate_colnames(self, columns): + # GH#43476 + df = DataFrame(np.nan, index=[0, 1], columns=columns) + with tm.assert_produces_warning(None): + result = df.fillna({"A": 0}) + + expected = df.copy() + expected["A"] = 0.0 + tm.assert_frame_equal(result, expected) + + def test_fillna_dtype_conversion(self): + # make sure that fillna on an empty frame works + df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + result = df.dtypes + expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + result = df.fillna(1) + expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + tm.assert_frame_equal(result, expected) + + # empty block + df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64") + result = df.fillna("nan") + expected = DataFrame("nan", index=range(3), columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0]) + def test_fillna_dtype_conversion_equiv_replace(self, val): + df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]}) + expected = df.replace(np.nan, val) + result = df.fillna(val) + tm.assert_frame_equal(result, expected) + + def test_fillna_datetime_columns(self): + # GH#7095 + df = DataFrame( + { + "A": [-1, -2, np.nan], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = DataFrame( + { + "A": [-1, -2, "?"], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "A": [-1, -2, np.nan], + "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), NaT], + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = DataFrame( + { + "A": [-1, -2, "?"], + "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), "?"], + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + def test_ffill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.ffill(), datetime_frame.fillna(method="ffill") + ) + + def test_ffill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.ffill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.ffill(0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_bfill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.bfill(), datetime_frame.fillna(method="bfill") + ) + + def test_bfill_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.bfill " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.bfill(0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.iloc[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.iloc[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.iloc[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.iloc[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_fillna_skip_certain_blocks(self): + # don't try to fill boolean, int blocks + + df = DataFrame(np.random.randn(10, 4).astype(int)) + + # it works! + df.fillna(np.nan) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_positive_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=-5) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_integer_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=0.5) + + def test_fillna_inplace(self): + df = DataFrame(np.random.randn(10, 4)) + df[1][:4] = np.nan + df[3][-4:] = np.nan + + expected = df.fillna(value=0) + assert expected is not df + + df.fillna(value=0, inplace=True) + tm.assert_frame_equal(df, expected) + + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None + + df[1][:4] = np.nan + df[3][-4:] = np.nan + expected = df.fillna(method="ffill") + assert expected is not df + + df.fillna(method="ffill", inplace=True) + tm.assert_frame_equal(df, expected) + + def test_fillna_dict_series(self): + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + + result = df.fillna({"a": 0, "b": 5}) + + expected = df.copy() + expected["a"] = expected["a"].fillna(0) + expected["b"] = expected["b"].fillna(5) + tm.assert_frame_equal(result, expected) + + # it works + result = df.fillna({"a": 0, "b": 5, "d": 7}) + + # Series treated same as dict + result = df.fillna(df.max()) + expected = df.fillna(df.max().to_dict()) + tm.assert_frame_equal(result, expected) + + # disable this for now + with pytest.raises(NotImplementedError, match="column by column"): + df.fillna(df.max(1), axis=1) + + def test_fillna_dataframe(self): + # GH#8377 + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + # df2 may have different index and columns + df2 = DataFrame( + { + "a": [np.nan, 10, 20, 30, 40], + "b": [50, 60, 70, 80, 90], + "foo": ["bar"] * 5, + }, + index=list("VWXuZ"), + ) + + result = df.fillna(df2) + + # only those columns and indices which are shared get filled + expected = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, 40], + "b": [1, 2, 3, np.nan, 90], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + tm.assert_frame_equal(result, expected) + + def test_fillna_columns(self): + df = DataFrame(np.random.randn(10, 10)) + df.values[:, ::2] = np.nan + + result = df.fillna(method="ffill", axis=1) + expected = df.T.fillna(method="pad").T + tm.assert_frame_equal(result, expected) + + df.insert(6, "foo", 5) + result = df.fillna(method="ffill", axis=1) + expected = df.astype(float).fillna(method="ffill", axis=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_invalid_method(self, float_frame): + with pytest.raises(ValueError, match="ffil"): + float_frame.fillna(method="ffil") + + def test_fillna_invalid_value(self, float_frame): + # list + msg = '"value" parameter must be a scalar or dict, but you passed a "{}"' + with pytest.raises(TypeError, match=msg.format("list")): + float_frame.fillna([1, 2]) + # tuple + with pytest.raises(TypeError, match=msg.format("tuple")): + float_frame.fillna((1, 2)) + # frame with series + msg = ( + '"value" parameter must be a scalar, dict or Series, but you ' + 'passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + float_frame.iloc[:, 0].fillna(float_frame) + + def test_fillna_col_reordering(self): + cols = ["COL." + str(i) for i in range(5, 0, -1)] + data = np.random.rand(20, 5) + df = DataFrame(index=range(20), columns=cols, data=data) + filled = df.fillna(method="ffill") + assert df.columns.tolist() == filled.columns.tolist() + + def test_fill_corner(self, float_frame, float_string_frame): + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + + filled = float_string_frame.fillna(value=0) + assert (filled.loc[filled.index[5:20], "foo"] == 0).all() + del float_string_frame["foo"] + + empty_float = float_frame.reindex(columns=[]) + + # TODO(wesm): unused? + result = empty_float.fillna(value=0) # noqa + + def test_fillna_downcast_dict(self): + # GH#40809 + df = DataFrame({"col1": [1, np.nan]}) + result = df.fillna({"col1": 2}, downcast={"col1": "int64"}) + expected = DataFrame({"col1": [1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3, np.nan]}, dtype=float) + msg = ( + r"In a future version of pandas all arguments of DataFrame.fillna " + r"except for the argument 'value' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.fillna(0, None, None) + expected = DataFrame({"a": [1, 2, 3, 0]}, dtype=float) + tm.assert_frame_equal(result, expected) + + def test_fillna_with_columns_and_limit(self): + # GH40989 + df = DataFrame( + [ + [np.nan, 2, np.nan, 0], + [3, 4, np.nan, 1], + [np.nan, np.nan, np.nan, 5], + [np.nan, 3, np.nan, 4], + ], + columns=list("ABCD"), + ) + result = df.fillna(axis=1, value=100, limit=1) + result2 = df.fillna(axis=1, value=100, limit=2) + + expected = DataFrame( + { + "A": Series([100, 3, 100, 100], dtype="float64"), + "B": [2, 4, np.nan, 3], + "C": [np.nan, 100, np.nan, np.nan], + "D": Series([0, 1, 5, 4], dtype="float64"), + }, + index=[0, 1, 2, 3], + ) + expected2 = DataFrame( + { + "A": Series([100, 3, 100, 100], dtype="float64"), + "B": Series([2, 4, 100, 3], dtype="float64"), + "C": [100, 100, np.nan, 100], + "D": Series([0, 1, 5, 4], dtype="float64"), + }, + index=[0, 1, 2, 3], + ) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected2) + + def test_fillna_datetime_inplace(self): + # GH#48863 + df = DataFrame( + { + "date1": to_datetime(["2018-05-30", None]), + "date2": to_datetime(["2018-09-30", None]), + } + ) + expected = df.copy() + df.fillna(np.nan, inplace=True) + tm.assert_frame_equal(df, expected) + + def test_fillna_inplace_with_columns_limit_and_value(self): + # GH40989 + df = DataFrame( + [ + [np.nan, 2, np.nan, 0], + [3, 4, np.nan, 1], + [np.nan, np.nan, np.nan, 5], + [np.nan, 3, np.nan, 4], + ], + columns=list("ABCD"), + ) + + expected = df.fillna(axis=1, value=100, limit=1) + assert expected is not df + + df.fillna(axis=1, value=100, limit=1, inplace=True) + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) + def test_inplace_dict_update_view(self, val, using_copy_on_write): + # GH#47188 + df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) + df_orig = df.copy() + result_view = df[:] + df.fillna(val, inplace=True) + expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) + tm.assert_frame_equal(df, expected) + if using_copy_on_write: + tm.assert_frame_equal(result_view, df_orig) + else: + tm.assert_frame_equal(result_view, expected) + + def test_single_block_df_with_horizontal_axis(self): + # GH 47713 + df = DataFrame( + { + "col1": [5, 0, np.nan, 10, np.nan], + "col2": [7, np.nan, np.nan, 5, 3], + "col3": [12, np.nan, 1, 2, 0], + "col4": [np.nan, 1, 1, np.nan, 18], + } + ) + result = df.fillna(50, limit=1, axis=1) + expected = DataFrame( + [ + [5.0, 7.0, 12.0, 50.0], + [0.0, 50.0, np.nan, 1.0], + [50.0, np.nan, 1.0, 1.0], + [10.0, 5.0, 2.0, 50.0], + [50.0, 3.0, 0.0, 18.0], + ], + columns=["col1", "col2", "col3", "col4"], + ) + tm.assert_frame_equal(result, expected) + + def test_fillna_with_multi_index_frame(self): + # GH 47649 + pdf = DataFrame( + { + ("x", "a"): [np.nan, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, np.nan], + ("y", "c"): [1.0, 2.0, np.nan], + } + ) + expected = DataFrame( + { + ("x", "a"): [-1.0, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, -1.0], + ("y", "c"): [1.0, 2.0, np.nan], + } + ) + tm.assert_frame_equal(pdf.fillna({"x": -1}), expected) + tm.assert_frame_equal(pdf.fillna({"x": -1, ("x", "b"): -2}), expected) + + expected = DataFrame( + { + ("x", "a"): [-1.0, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, -2.0], + ("y", "c"): [1.0, 2.0, np.nan], + } + ) + tm.assert_frame_equal(pdf.fillna({("x", "b"): -2, "x": -1}), expected) + + +def test_fillna_nonconsolidated_frame(): + # https://github.com/pandas-dev/pandas/issues/36495 + df = DataFrame( + [ + [1, 1, 1, 1.0], + [2, 2, 2, 2.0], + [3, 3, 3, 3.0], + ], + columns=["i1", "i2", "i3", "f1"], + ) + df_nonconsol = df.pivot(index="i1", columns="i2") + result = df_nonconsol.fillna(0) + assert result.isna().sum().sum() == 0 + + +def test_fillna_nones_inplace(): + # GH 48480 + df = DataFrame( + [[None, None], [None, None]], + columns=["A", "B"], + ) + with tm.assert_produces_warning(False): + df.fillna(value={"A": 1, "B": 2}, inplace=True) + + expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"]) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_filter.py b/pandas/tests/frame/methods/test_filter.py new file mode 100644 index 00000000..af77db40 --- /dev/null +++ b/pandas/tests/frame/methods/test_filter.py @@ -0,0 +1,139 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameFilter: + def test_filter(self, float_frame, float_string_frame): + # Items + filtered = float_frame.filter(["A", "B", "E"]) + assert len(filtered.columns) == 2 + assert "E" not in filtered + + filtered = float_frame.filter(["A", "B", "E"], axis="columns") + assert len(filtered.columns) == 2 + assert "E" not in filtered + + # Other axis + idx = float_frame.index[0:4] + filtered = float_frame.filter(idx, axis="index") + expected = float_frame.reindex(index=idx) + tm.assert_frame_equal(filtered, expected) + + # like + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + filtered = fcopy.filter(like="A") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # like with ints in column names + df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"]) + filtered = df.filter(like="_") + assert len(filtered.columns) == 2 + + # regex with ints in column names + # from PR #10384 + df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"]) + expected = DataFrame( + 0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object) + ) + filtered = df.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"]) + # shouldn't remove anything + filtered = expected.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + # pass in None + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter() + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(items=None) + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(axis=1) + + # test mutually exclusive arguments + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", like="bbi") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", axis=1) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi", axis=0) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi") + + # objects + filtered = float_string_frame.filter(like="foo") + assert "foo" in filtered + + # unicode columns, won't ascii-encode + df = float_frame.rename(columns={"B": "\u2202"}) + filtered = df.filter(like="C") + assert "C" in filtered + + def test_filter_regex_search(self, float_frame): + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + # regex + filtered = fcopy.filter(regex="[A]+") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # doesn't have to be at beginning + df = DataFrame( + {"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]} + ) + + result = df.filter(regex="BB") + exp = df[[x for x in df.columns if "BB" in x]] + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize( + "name,expected", + [ + ("a", DataFrame({"a": [1, 2]})), + ("a", DataFrame({"a": [1, 2]})), + ("あ", DataFrame({"あ": [3, 4]})), + ], + ) + def test_filter_unicode(self, name, expected): + # GH13101 + df = DataFrame({"a": [1, 2], "あ": [3, 4]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + @pytest.mark.parametrize("name", ["a", "a"]) + def test_filter_bytestring(self, name): + # GH13101 + df = DataFrame({b"a": [1, 2], b"b": [3, 4]}) + expected = DataFrame({b"a": [1, 2]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + def test_filter_corner(self): + empty = DataFrame() + + result = empty.filter([]) + tm.assert_frame_equal(result, empty) + + result = empty.filter(like="foo") + tm.assert_frame_equal(result, empty) + + def test_filter_regex_non_string(self): + # GH#5798 trying to filter on non-string columns should drop, + # not raise + df = DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + result = df.filter(regex="STRING") + expected = df[["STRING"]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py new file mode 100644 index 00000000..6b112115 --- /dev/null +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -0,0 +1,88 @@ +""" +Note: includes tests for `last` +""" +import pytest + +from pandas import ( + DataFrame, + bdate_range, +) +import pandas._testing as tm + + +class TestFirst: + def test_first_subset(self, frame_or_series): + ts = tm.makeTimeDataFrame(freq="12h") + ts = tm.get_obj(ts, frame_or_series) + result = ts.first("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_equal(result, ts[:0]) + + def test_first_last_raises(self, frame_or_series): + # GH#20725 + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = tm.get_obj(obj, frame_or_series) + + msg = "'first' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.first("1D") + + msg = "'last' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.last("1D") + + def test_last_subset(self, frame_or_series): + ts = tm.makeTimeDataFrame(freq="12h") + ts = tm.get_obj(ts, frame_or_series) + result = ts.last("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(nper=30, freq="D") + ts = tm.get_obj(ts, frame_or_series) + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["2000-01-10":] + tm.assert_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_equal(result, ts[:0]) + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + result = x.first("1M") + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) + + def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) + result = x.first("2M") + expected = frame_or_series( + [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") + ) + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py new file mode 100644 index 00000000..e4cbd892 --- /dev/null +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -0,0 +1,94 @@ +""" +Includes test for last_valid_index. +""" +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestFirstValidIndex: + def test_first_valid_index_single_nan(self, frame_or_series): + # GH#9752 Series/DataFrame should both return None, not raise + obj = frame_or_series([np.nan]) + + assert obj.first_valid_index() is None + assert obj.iloc[:0].first_valid_index() is None + + @pytest.mark.parametrize( + "empty", [DataFrame(), Series(dtype=object), Series([], index=[], dtype=object)] + ) + def test_first_valid_index_empty(self, empty): + # GH#12800 + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + @pytest.mark.parametrize( + "data,idx,expected_first,expected_last", + [ + ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2), + ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2), + ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"), + ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), + ], + ) + def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): + # GH#21441 + df = DataFrame(data, index=idx) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid(self, index_func): + N = 30 + index = index_func(N) + mat = np.random.randn(N) + mat[:5] = np.nan + mat[-5:] = np.nan + + frame = DataFrame({"foo": mat}, index=index) + assert frame.first_valid_index() == frame.index[5] + assert frame.last_valid_index() == frame.index[-6] + + ser = frame["foo"] + assert ser.first_valid_index() == frame.index[5] + assert ser.last_valid_index() == frame.index[-6] + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid_all_nan(self, index_func): + # GH#17400: no valid entries + index = index_func(30) + frame = DataFrame(np.nan, columns=["foo"], index=index) + + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None + + ser = frame["foo"] + assert ser.first_valid_index() is None + assert ser.last_valid_index() is None + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_first_last_valid_preserves_freq(self): + # GH#20499: its preserves freq with holes + index = date_range("20110101", periods=30, freq="B") + frame = DataFrame(np.nan, columns=["foo"], index=index) + + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + + ts = frame["foo"] + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py new file mode 100644 index 00000000..8628b76f --- /dev/null +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -0,0 +1,103 @@ +import numpy as np + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + # get the numeric data + obj = DataFrame({"A": [1, "2", 3.0]}) + result = obj._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data(self): + + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + df = DataFrame( + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [ + np.dtype("float64"), + np.dtype("int64"), + np.dtype(objectname), + np.dtype(datetime64name), + ], + index=["a", "b", "c", "f"], + ) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "d": np.array([1.0] * 10, dtype="float32"), + "e": np.array([1] * 10, dtype="int32"), + "f": np.array([1] * 10, dtype="int16"), + "g": Timestamp("20010102"), + }, + index=np.arange(10), + ) + + result = df._get_numeric_data() + expected = df.loc[:, ["a", "b", "d", "e", "f"]] + tm.assert_frame_equal(result, expected) + + only_obj = df.loc[:, ["c", "g"]] + result = only_obj._get_numeric_data() + expected = df.loc[:, []] + tm.assert_frame_equal(result, expected) + + df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) + result = df._get_numeric_data() + expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) + tm.assert_frame_equal(result, expected) + + df = result.copy() + result = df._get_numeric_data() + expected = df + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data_mixed_dtype(self): + # numeric and object columns + + df = DataFrame( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["foo", "bar", "baz"], + "d": [None, None, None], + "e": [3.14, 0.577, 2.773], + } + ) + result = df._get_numeric_data() + tm.assert_index_equal(result.columns, Index(["a", "b", "e"])) + + def test_get_numeric_data_extension_dtype(self): + # GH#22290 + df = DataFrame( + { + "A": pd.array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "B": Categorical(list("abcabc")), + "C": pd.array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "D": IntervalArray.from_breaks(range(7)), + } + ) + result = df._get_numeric_data() + expected = df.loc[:, ["A", "C"]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py new file mode 100644 index 00000000..99cb7840 --- /dev/null +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -0,0 +1,57 @@ +import numpy as np + +from pandas import DataFrame +import pandas._testing as tm + + +def test_head_tail_generic(index, frame_or_series): + # GH#5370 + + ndim = 2 if frame_or_series is DataFrame else 1 + shape = (len(index),) * ndim + vals = np.random.randn(*shape) + obj = frame_or_series(vals, index=index) + + tm.assert_equal(obj.head(), obj.iloc[:5]) + tm.assert_equal(obj.tail(), obj.iloc[-5:]) + + # 0-len + tm.assert_equal(obj.head(0), obj.iloc[0:0]) + tm.assert_equal(obj.tail(0), obj.iloc[0:0]) + + # bounded + tm.assert_equal(obj.head(len(obj) + 1), obj) + tm.assert_equal(obj.tail(len(obj) + 1), obj) + + # neg index + tm.assert_equal(obj.head(-3), obj.head(len(index) - 3)) + tm.assert_equal(obj.tail(-3), obj.tail(len(index) - 3)) + + +def test_head_tail(float_frame): + tm.assert_frame_equal(float_frame.head(), float_frame[:5]) + tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) + + tm.assert_frame_equal(float_frame.head(0), float_frame[0:0]) + tm.assert_frame_equal(float_frame.tail(0), float_frame[0:0]) + + tm.assert_frame_equal(float_frame.head(-1), float_frame[:-1]) + tm.assert_frame_equal(float_frame.tail(-1), float_frame[1:]) + tm.assert_frame_equal(float_frame.head(1), float_frame[:1]) + tm.assert_frame_equal(float_frame.tail(1), float_frame[-1:]) + # with a float index + df = float_frame.copy() + df.index = np.arange(len(float_frame)) + 0.1 + tm.assert_frame_equal(df.head(), df.iloc[:5]) + tm.assert_frame_equal(df.tail(), df.iloc[-5:]) + tm.assert_frame_equal(df.head(0), df[0:0]) + tm.assert_frame_equal(df.tail(0), df[0:0]) + tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) + tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) + + +def test_head_tail_empty(): + # test empty dataframe + empty_df = DataFrame() + tm.assert_frame_equal(empty_df.tail(), empty_df) + tm.assert_frame_equal(empty_df.head(), empty_df) diff --git a/pandas/tests/frame/methods/test_infer_objects.py b/pandas/tests/frame/methods/test_infer_objects.py new file mode 100644 index 00000000..a824a615 --- /dev/null +++ b/pandas/tests/frame/methods/test_infer_objects.py @@ -0,0 +1,42 @@ +from datetime import datetime + +from pandas import DataFrame +import pandas._testing as tm + + +class TestInferObjects: + def test_infer_objects(self): + # GH#11221 + df = DataFrame( + { + "a": ["a", 1, 2, 3], + "b": ["b", 2.0, 3.0, 4.1], + "c": [ + "c", + datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3), + ], + "d": [1, 2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + df = df.iloc[1:].infer_objects() + + assert df["a"].dtype == "int64" + assert df["b"].dtype == "float64" + assert df["c"].dtype == "M8[ns]" + assert df["d"].dtype == "object" + + expected = DataFrame( + { + "a": [1, 2, 3], + "b": [2.0, 3.0, 4.1], + "c": [datetime(2016, 1, 1), datetime(2016, 1, 2), datetime(2016, 1, 3)], + "d": [2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + # reconstruct frame to verify inference is same + result = df.reset_index(drop=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py new file mode 100644 index 00000000..7d6cf43c --- /dev/null +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -0,0 +1,411 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameInterpolate: + def test_interpolate_inplace(self, frame_or_series, using_array_manager, request): + # GH#44749 + if using_array_manager and frame_or_series is DataFrame: + mark = pytest.mark.xfail(reason=".values-based in-place check is invalid") + request.node.add_marker(mark) + + obj = frame_or_series([1, np.nan, 2]) + orig = obj.values + + obj.interpolate(inplace=True) + expected = frame_or_series([1, 1.5, 2]) + tm.assert_equal(obj, expected) + + # check we operated *actually* inplace + assert np.shares_memory(orig, obj.values) + assert orig.squeeze()[1] == 1.5 + + def test_interp_basic(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + result = df.interpolate() + tm.assert_frame_equal(result, expected) + + # check we didn't operate inplace GH#45791 + cvalues = df["C"]._values + dvalues = df["D"].values + assert not np.shares_memory(cvalues, result["C"]._values) + assert not np.shares_memory(dvalues, result["D"]._values) + + res = df.interpolate(inplace=True) + assert res is None + tm.assert_frame_equal(df, expected) + + # check we DID operate inplace + assert np.shares_memory(df["C"]._values, cvalues) + assert np.shares_memory(df["D"]._values, dvalues) + + def test_interp_basic_with_non_range_index(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df.set_index("C").interpolate() + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) + + def test_interp_empty(self): + # https://github.com/pandas-dev/pandas/issues/35598 + df = DataFrame() + result = df.interpolate() + assert result is not df + expected = df + tm.assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + msg = ( + r"method must be one of \['linear', 'time', 'index', 'values', " + r"'nearest', 'zero', 'slinear', 'quadratic', 'cubic', " + r"'barycentric', 'krogh', 'spline', 'polynomial', " + r"'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima', " + r"'cubicspline'\]. Got 'not_a_method' instead." + ) + with pytest.raises(ValueError, match=msg): + df.interpolate(method="not_a_method") + + def test_interp_combo(self): + df = DataFrame( + { + "A": [1.0, 2.0, np.nan, 4.0], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df["A"].interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], name="A") + tm.assert_series_equal(result, expected) + + result = df["A"].interpolate(downcast="infer") + expected = Series([1, 2, 3, 4], name="A") + tm.assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) + df = df.set_index("A") + msg = ( + "Interpolation with NaNs in the index has not been implemented. " + "Try filling those NaNs before interpolating." + ) + with pytest.raises(NotImplementedError, match=msg): + df.interpolate(method="values") + + @td.skip_if_no_scipy + def test_interp_various(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + df = df.set_index("C") + expected = df.copy() + result = df.interpolate(method="polynomial", order=1) + + expected.loc[3, "A"] = 2.66666667 + expected.loc[13, "A"] = 5.76923076 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="cubic") + # GH #15662. + expected.loc[3, "A"] = 2.81547781 + expected.loc[13, "A"] = 5.52964175 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="nearest") + expected.loc[3, "A"] = 2 + expected.loc[13, "A"] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method="quadratic") + expected.loc[3, "A"] = 2.82150771 + expected.loc[13, "A"] = 6.12648668 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="slinear") + expected.loc[3, "A"] = 2.66666667 + expected.loc[13, "A"] = 5.76923077 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="zero") + expected.loc[3, "A"] = 2.0 + expected.loc[13, "A"] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + @td.skip_if_no_scipy + def test_interp_alt_scipy(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + result = df.interpolate(method="barycentric") + expected = df.copy() + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="barycentric", downcast="infer") + tm.assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method="krogh") + expectedk = df.copy() + expectedk["A"] = expected["A"] + tm.assert_frame_equal(result, expectedk) + + result = df.interpolate(method="pchip") + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6.0 + + tm.assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame( + { + 0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4], + } + ) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=1, method="values") + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} + + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") + tm.assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame( + { + 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], + } + ) + df.interpolate(axis=0) + # TODO: assert something? + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_leading_nans(self, check_scipy): + df = DataFrame( + {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} + ) + result = df.interpolate() + expected = df.copy() + expected.loc[3, "B"] = -3.75 + tm.assert_frame_equal(result, expected) + + if check_scipy: + result = df.interpolate(method="polynomial", order=1) + tm.assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self, axis): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": ["a", "b", "c", "d"], + "C": [np.nan, 2, 5, 7], + "D": [np.nan, np.nan, 9, 9], + "E": [1, 2, 3, 4], + } + ) + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.astype("object").interpolate(axis=axis) + + def test_interp_raise_on_all_object_dtype(self): + # GH 22985 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.interpolate() + + def test_interp_inplace(self, using_copy_on_write): + # TODO(CoW) inplace keyword (it is still mutating the parent) + if using_copy_on_write: + pytest.skip("CoW: inplace keyword not yet handled") + df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) + result = df.copy() + return_value = result["a"].interpolate(inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + result = df.copy() + return_value = result["a"].interpolate(inplace=True, downcast="infer") + assert return_value is None + tm.assert_frame_equal(result, expected.astype("int64")) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} + ) + expected = result.interpolate(method="linear", axis=1, inplace=False) + return_value = result.interpolate(method="linear", axis=1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 2, 3, 4], + "C": [1.0, 2.0, np.nan, 4.0], + "D": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="float64"), + "B": np.array([1, 2, 3, 4], dtype="int64"), + "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), + "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), + } + ) + + result = df.interpolate(downcast=None) + tm.assert_frame_equal(result, expected) + + # all good + result = df[["B", "D"]].interpolate(downcast=None) + tm.assert_frame_equal(result, df[["B", "D"]]) + + def test_interp_time_inplace_axis(self): + # GH 9687 + periods = 5 + idx = date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + return_value = expected.interpolate(axis=0, method="time", inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("axis_name, axis_number", [("index", 0), ("columns", 1)]) + def test_interp_string_axis(self, axis_name, axis_number): + # https://github.com/pandas-dev/pandas/issues/25190 + x = np.linspace(0, 100, 1000) + y = np.sin(x) + df = DataFrame( + data=np.tile(y, (10, 1)), index=np.arange(10), columns=x + ).reindex(columns=x * 1.005) + result = df.interpolate(method="linear", axis=axis_name) + expected = df.interpolate(method="linear", axis=axis_number) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) + def test_interp_fillna_methods(self, request, axis, method, using_array_manager): + # GH 12918 + if using_array_manager and (axis == 1 or axis == "columns"): + # TODO(ArrayManager) support axis=1 + td.mark_array_manager_not_yet_implemented(request) + + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.fillna(axis=axis, method=method) + result = df.interpolate(method=method, axis=axis) + tm.assert_frame_equal(result, expected) + + def test_interpolate_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.interpolate " + r"except for the argument 'method' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.interpolate("pad", 0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py new file mode 100644 index 00000000..a5f285d3 --- /dev/null +++ b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py @@ -0,0 +1,57 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + Categorical, + DataFrame, +) + +# _is_homogeneous_type always returns True for ArrayManager +pytestmark = td.skip_array_manager_invalid_test + + +@pytest.mark.parametrize( + "data, expected", + [ + # empty + (DataFrame(), True), + # multi-same + (DataFrame({"A": [1, 2], "B": [1, 2]}), True), + # multi-object + ( + DataFrame( + { + "A": np.array([1, 2], dtype=object), + "B": np.array(["a", "b"], dtype=object), + } + ), + True, + ), + # multi-extension + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["a", "b"])}), + True, + ), + # differ types + (DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False), + # differ sizes + ( + DataFrame( + { + "A": np.array([1, 2], dtype=np.int32), + "B": np.array([1, 2], dtype=np.int64), + } + ), + False, + ), + # multi-extension differ + ( + DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["b", "c"])}), + False, + ), + ], +) +def test_is_homogeneous_type(data, expected): + assert data._is_homogeneous_type is expected diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py new file mode 100644 index 00000000..e924963f --- /dev/null +++ b/pandas/tests/frame/methods/test_isin.py @@ -0,0 +1,219 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFrameIsIn: + def test_isin(self): + # GH#4211 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + other = ["a", "b", "c"] + + result = df.isin(other) + expected = DataFrame([df.loc[s].isin(other) for s in df.index]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # GH#16991 + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) + tm.assert_frame_equal(result, expected) + + def test_isin_dict(self): + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + d = {"A": ["a"]} + + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + # non unique columns + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + df.columns = ["A", "A"] + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + def test_isin_with_string_scalar(self): + # GH#4763 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + msg = ( + r"only list-like or dict-like objects are allowed " + r"to be passed to DataFrame.isin\(\), you passed a 'str'" + ) + with pytest.raises(TypeError, match=msg): + df.isin("a") + + with pytest.raises(TypeError, match=msg): + df.isin("aaa") + + def test_isin_df(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + df2 = DataFrame({"A": [0, 2, 12, 4], "B": [2, np.nan, 4, 5]}) + expected = DataFrame(False, df1.index, df1.columns) + result = df1.isin(df2) + expected.loc[[1, 3], "A"] = True + expected.loc[[0, 2], "B"] = True + tm.assert_frame_equal(result, expected) + + # partial overlapping columns + df2.columns = ["A", "C"] + result = df1.isin(df2) + expected["B"] = False + tm.assert_frame_equal(result, expected) + + def test_isin_tuples(self): + # GH#16394 + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]}) + df["C"] = list(zip(df["A"], df["B"])) + result = df["C"].isin([(1, "a")]) + tm.assert_series_equal(result, Series([True, False, False], name="C")) + + def test_isin_df_dupe_values(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + # just cols duped + df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=["B", "B"]) + msg = r"cannot compute isin with a duplicate axis\." + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + # just index duped + df2 = DataFrame( + [[0, 2], [12, 4], [2, np.nan], [4, 5]], + columns=["A", "B"], + index=[0, 0, 1, 1], + ) + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + # cols and index: + df2.columns = ["B", "B"] + with pytest.raises(ValueError, match=msg): + df1.isin(df2) + + def test_isin_dupe_self(self): + other = DataFrame({"A": [1, 0, 1, 0], "B": [1, 1, 0, 0]}) + df = DataFrame([[1, 1], [1, 0], [0, 0]], columns=["A", "A"]) + result = df.isin(other) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected.loc[0] = True + expected.iloc[1, 1] = True + tm.assert_frame_equal(result, expected) + + def test_isin_against_series(self): + df = DataFrame( + {"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}, index=["a", "b", "c", "d"] + ) + s = Series([1, 3, 11, 4], index=["a", "b", "c", "d"]) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected.loc["a", "A"] = True + expected.loc["d"] = True + result = df.isin(s) + tm.assert_frame_equal(result, expected) + + def test_isin_multiIndex(self): + idx = MultiIndex.from_tuples( + [ + (0, "a", "foo"), + (0, "a", "bar"), + (0, "b", "bar"), + (0, "b", "baz"), + (2, "a", "foo"), + (2, "a", "bar"), + (2, "c", "bar"), + (2, "c", "baz"), + (1, "b", "foo"), + (1, "b", "bar"), + (1, "c", "bar"), + (1, "c", "baz"), + ] + ) + df1 = DataFrame({"A": np.ones(12), "B": np.zeros(12)}, index=idx) + df2 = DataFrame( + { + "A": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + "B": [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1], + } + ) + # against regular index + expected = DataFrame(False, index=df1.index, columns=df1.columns) + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + df2.index = idx + expected = df2.values.astype(bool) + expected[:, 1] = ~expected[:, 1] + expected = DataFrame(expected, columns=["A", "B"], index=idx) + + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + def test_isin_empty_datetimelike(self): + # GH#15473 + df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])}) + df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]}) + df2 = DataFrame({"date": []}) + df3 = DataFrame() + + expected = DataFrame({"date": [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + DataFrame({"a": [1, 2, 3]}, dtype="category"), + Series([1, 2, 3], dtype="category"), + ], + ) + def test_isin_category_frame(self, values): + # GH#34256 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = DataFrame({"a": [True, True, True], "b": [False, False, False]}) + + result = df.isin(values) + tm.assert_frame_equal(result, expected) + + def test_isin_read_only(self): + # https://github.com/pandas-dev/pandas/issues/37174 + arr = np.array([1, 2, 3]) + arr.setflags(write=False) + df = DataFrame([1, 2, 3]) + result = df.isin(arr) + expected = DataFrame([True, True, True]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py new file mode 100644 index 00000000..7db26f7e --- /dev/null +++ b/pandas/tests/frame/methods/test_join.py @@ -0,0 +1,564 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.errors import MergeError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.reshape.concat import concat + + +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list("abcde"), + index=period_range(start="2000", freq="A", periods=4), + ) + + +@pytest.fixture +def left(): + return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.fixture +def left_no_dup(): + return DataFrame( + {"a": ["a", "b", "c", "d"], "b": ["cat", "dog", "weasel", "horse"]}, + index=range(4), + ) + + +@pytest.fixture +def right_no_dup(): + return DataFrame( + { + "a": ["a", "b", "c", "d", "e"], + "c": ["meow", "bark", "um... weasel noise?", "nay", "chirp"], + }, + index=range(5), + ).set_index("a") + + +@pytest.fixture +def left_w_dups(left_no_dup): + return concat( + [left_no_dup, DataFrame({"a": ["a"], "b": ["cow"]}, index=[3])], sort=True + ) + + +@pytest.fixture +def right_w_dups(right_no_dup): + return concat( + [right_no_dup, DataFrame({"a": ["e"], "c": ["moo"]}, index=[3])] + ).set_index("a") + + +@pytest.mark.parametrize( + "how, sort, expected", + [ + ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), + ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), + ( + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + ), + ( + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + ), + ( + "right", + False, + DataFrame({"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2]), + ), + ( + "right", + True, + DataFrame({"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3]), + ), + ( + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ( + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ], +) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort, validate="1:1") + tm.assert_frame_equal(result, expected) + + +def test_suffix_on_list_join(): + first = DataFrame({"key": [1, 2, 3, 4, 5]}) + second = DataFrame({"key": [1, 8, 3, 2, 5], "v1": [1, 2, 3, 4, 5]}) + third = DataFrame({"keys": [5, 2, 3, 4, 1], "v2": [1, 2, 3, 4, 5]}) + + # check proper errors are raised + msg = "Suffixes not supported when joining multiple DataFrames" + with pytest.raises(ValueError, match=msg): + first.join([second], lsuffix="y") + with pytest.raises(ValueError, match=msg): + first.join([second, third], rsuffix="x") + with pytest.raises(ValueError, match=msg): + first.join([second, third], lsuffix="y", rsuffix="x") + with pytest.raises(ValueError, match="Indexes have overlapping values"): + first.join([second, third]) + + # no errors should be raised + arr_joined = first.join([third]) + norm_joined = first.join(third) + tm.assert_frame_equal(arr_joined, norm_joined) + + +def test_join_invalid_validate(left_no_dup, right_no_dup): + # GH 46622 + # Check invalid arguments + msg = "Not a valid argument for validate" + with pytest.raises(ValueError, match=msg): + left_no_dup.merge(right_no_dup, on="a", validate="invalid") + + +def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups): + # GH 46622 + # Dups on right allowed by one_to_many constraint + left_no_dup.join( + right_w_dups, + on="a", + validate="one_to_many", + ) + + # Dups on right not allowed by one_to_one constraint + msg = "Merge keys are not unique in right dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + left_no_dup.join( + right_w_dups, + on="a", + validate="one_to_one", + ) + + +def test_join_on_single_col_dup_on_left(left_w_dups, right_no_dup): + # GH 46622 + # Dups on left allowed by many_to_one constraint + left_w_dups.join( + right_no_dup, + on="a", + validate="many_to_one", + ) + + # Dups on left not allowed by one_to_one constraint + msg = "Merge keys are not unique in left dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + left_w_dups.join( + right_no_dup, + on="a", + validate="one_to_one", + ) + + +def test_join_on_single_col_dup_on_both(left_w_dups, right_w_dups): + # GH 46622 + # Dups on both allowed by many_to_many constraint + left_w_dups.join(right_w_dups, on="a", validate="many_to_many") + + # Dups on both not allowed by many_to_one constraint + msg = "Merge keys are not unique in right dataset; not a many-to-one merge" + with pytest.raises(MergeError, match=msg): + left_w_dups.join( + right_w_dups, + on="a", + validate="many_to_one", + ) + + # Dups on both not allowed by one_to_many constraint + msg = "Merge keys are not unique in left dataset; not a one-to-many merge" + with pytest.raises(MergeError, match=msg): + left_w_dups.join( + right_w_dups, + on="a", + validate="one_to_many", + ) + + +def test_join_on_multi_col_check_dup(): + # GH 46622 + # Two column join, dups in both, but jointly no dups + left = DataFrame( + { + "a": ["a", "a", "b", "b"], + "b": [0, 1, 0, 1], + "c": ["cat", "dog", "weasel", "horse"], + }, + index=range(4), + ).set_index(["a", "b"]) + + right = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ).set_index(["a", "b"]) + + expected_multi = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "c": ["cat", "dog", "weasel"], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ).set_index(["a", "b"]) + + # Jointly no dups allowed by one_to_one constraint + result = left.join(right, how="inner", validate="1:1") + tm.assert_frame_equal(result, expected_multi) + + +def test_join_index(float_frame): + # left / right + + f = float_frame.loc[float_frame.index[:10], ["A", "B"]] + f2 = float_frame.loc[float_frame.index[5:], ["C", "D"]].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(["A", "B", "C", "D"]) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="left") + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="right") + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how="inner") + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how="outer") + tm.assert_index_equal(joined.index, float_frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + with pytest.raises(ValueError, match="join method"): + f.join(f2, how="foo") + + # corner case - overlapping columns + msg = "columns overlap but no suffix" + for how in ("outer", "left", "inner"): + with pytest.raises(ValueError, match=msg): + float_frame.join(float_frame, how=how) + + +def test_join_index_more(float_frame): + af = float_frame.loc[:, ["A", "B"]] + bf = float_frame.loc[::2, ["C", "D"]] + + expected = af.copy() + expected["C"] = float_frame["C"][::2] + expected["D"] = float_frame["D"][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how="right") + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how="right") + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(float_frame): + df = float_frame.copy() + ser = df.pop(float_frame.columns[-1]) + joined = df.join(ser) + + tm.assert_frame_equal(joined, float_frame) + + ser.name = None + with pytest.raises(ValueError, match="must have a name"): + df.join(ser) + + +def test_join_overlap(float_frame): + df1 = float_frame.loc[:, ["A", "B", "C"]] + df2 = float_frame.loc[:, ["B", "C", "D"]] + + joined = df1.join(df2, lsuffix="_df1", rsuffix="_df2") + df1_suf = df1.loc[:, ["B", "C"]].add_suffix("_df1") + df2_suf = df2.loc[:, ["B", "C"]].add_suffix("_df2") + + no_overlap = float_frame.loc[:, ["A", "D"]] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename(columns=lambda key: f"{key}{key}") + + joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, columns=joined_cols, index=frame_with_period_index.index + ) + + tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({"a": [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({"b": [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({"c": [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how="left") + + expected = DataFrame( + { + "a": [0, 10, 10, 20], + "b": [np.nan, 300, 300, 200], + "c": [np.nan, 400, 500, np.nan], + }, + index=[1, 2, 2, 3], + ) + + tm.assert_frame_equal(joined, expected) + + +def test_join_list_series(float_frame): + # GH#46850 + # Join a DataFrame with a list containing both a Series and a DataFrame + left = float_frame.A.to_frame() + right = [float_frame.B, float_frame[["C", "D"]]] + result = left.join(right) + tm.assert_frame_equal(result, float_frame) + + +@pytest.mark.parametrize("sort_kw", [True, False]) +def test_suppress_future_warning_with_sort_kw(sort_kw): + a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) + + b = DataFrame({"col2": [4, 5]}, index=["b", "a"]) + + c = DataFrame({"col3": [7, 8]}, index=["a", "b"]) + + expected = DataFrame( + { + "col1": {"a": 2.0, "b": float("nan"), "c": 1.0}, + "col2": {"a": 5.0, "b": 4.0, "c": float("nan")}, + "col3": {"a": 7.0, "b": 8.0, "c": float("nan")}, + } + ) + if sort_kw is False: + expected = expected.reindex(index=["c", "a", "b"]) + + with tm.assert_produces_warning(None): + result = a.join([b, c], how="outer", sort=sort_kw) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameJoin: + def test_join(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + a = frame.loc[frame.index[:5], ["A"]] + b = frame.loc[frame.index[2:], ["B", "C"]] + + joined = a.join(b, how="outer").reindex(frame.index) + expected = frame.copy().values + expected[np.isnan(joined.values)] = np.nan + expected = DataFrame(expected, index=frame.index, columns=frame.columns) + + assert not np.isnan(joined.values).all() + + tm.assert_frame_equal(joined, expected) + + def test_join_segfault(self): + # GH#1532 + df1 = DataFrame({"a": [1, 1], "b": [1, 2], "x": [1, 2]}) + df2 = DataFrame({"a": [2, 2], "b": [1, 2], "y": [1, 2]}) + df1 = df1.set_index(["a", "b"]) + df2 = df2.set_index(["a", "b"]) + # it works! + for how in ["left", "right", "outer"]: + df1.join(df2, how=how) + + def test_join_str_datetime(self): + str_dates = ["20120209", "20120222"] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + A = DataFrame(str_dates, index=range(2), columns=["aa"]) + C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) + + tst = A.join(C, on="aa") + + assert len(tst.columns) == 3 + + def test_join_multiindex_leftright(self): + # GH 10741 + df1 = DataFrame( + [ + ["a", "x", 0.471780], + ["a", "y", 0.774908], + ["a", "z", 0.563634], + ["b", "x", -0.353756], + ["b", "y", 0.368062], + ["b", "z", -1.721840], + ["c", "x", 1], + ["c", "y", 2], + ["c", "z", 3], + ], + columns=["first", "second", "value1"], + ).set_index(["first", "second"]) + + df2 = DataFrame([["a", 10], ["b", 20]], columns=["first", "value2"]).set_index( + ["first"] + ) + + exp = DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + [1.000000, np.nan], + [2.000000, np.nan], + [3.000000, np.nan], + ], + index=df1.index, + columns=["value1", "value2"], + ) + + # these must be the same results (but columns are flipped) + tm.assert_frame_equal(df1.join(df2, how="left"), exp) + tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) + + exp_idx = MultiIndex.from_product( + [["a", "b"], ["x", "y", "z"]], names=["first", "second"] + ) + exp = DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + ], + index=exp_idx, + columns=["value1", "value2"], + ) + + tm.assert_frame_equal(df1.join(df2, how="right"), exp) + tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) + + def test_join_multiindex_dates(self): + # GH 33692 + date = pd.Timestamp(2000, 1, 1).date() + + df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + df1 = DataFrame({"col1": [0]}, index=df1_index) + df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + df2 = DataFrame({"col2": [0]}, index=df2_index) + df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + df3 = DataFrame({"col3": [0]}, index=df3_index) + + result = df1.join([df2, df3]) + + expected_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"]) + expected = DataFrame( + {"col1": [0], "col2": [0], "col3": [0]}, index=expected_index + ) + + tm.assert_equal(result, expected) + + def test_merge_join_different_levels(self): + # GH#9455 + + # first dataframe + df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) + + # second dataframe + columns = MultiIndex.from_tuples([("a", ""), ("c", "c1")]) + df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) + + # merge + columns = ["a", "b", ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) + with tm.assert_produces_warning(FutureWarning): + result = pd.merge(df1, df2, on="a") + tm.assert_frame_equal(result, expected) + + # join, see discussion in GH#12219 + columns = ["a", "b", ("a", ""), ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) + msg = "merging between different levels is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + # stacklevel is chosen to be correct for pd.merge, not DataFrame.join + result = df1.join(df2, on="a") + tm.assert_frame_equal(result, expected) + + def test_frame_join_tzaware(self): + test1 = DataFrame( + np.zeros((6, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central" + ), + ) + test2 = DataFrame( + np.zeros((3, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central" + ), + columns=range(3, 6), + ) + + result = test1.join(test2, how="outer") + expected = test1.index.union(test2.index) + + tm.assert_index_equal(result.index, expected) + assert result.index.tz.zone == "US/Central" diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py new file mode 100644 index 00000000..702ab391 --- /dev/null +++ b/pandas/tests/frame/methods/test_matmul.py @@ -0,0 +1,86 @@ +import operator + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +class TestMatMul: + def test_matmul(self): + # matmul test is for GH#10259 + a = DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + b = DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"] + ) + + # DataFrame @ DataFrame + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # DataFrame @ Series + result = operator.matmul(a, b.one) + expected = Series(np.dot(a.values, b.one.values), index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + # np.array @ DataFrame + result = operator.matmul(a.values, b) + assert isinstance(result, DataFrame) + assert result.columns.equals(b.columns) + assert result.index.equals(Index(range(3))) + expected = np.dot(a.values, b.values) + tm.assert_almost_equal(result.values, expected) + + # nested list @ DataFrame (__rmatmul__) + result = operator.matmul(a.values.tolist(), b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_almost_equal(result.values, expected.values) + + # mixed dtype DataFrame @ DataFrame + a["q"] = a.q.round().astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # different dtypes DataFrame @ DataFrame + a = a.astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # unaligned + df = DataFrame(np.random.randn(3, 4), index=[1, 2, 3], columns=range(4)) + df2 = DataFrame(np.random.randn(5, 3), index=range(5), columns=[1, 2, 3]) + + with pytest.raises(ValueError, match="aligned"): + operator.matmul(df, df2) + + def test_matmul_message_shapes(self): + # GH#21581 exception message should reflect original shapes, + # not transposed shapes + a = np.random.rand(10, 4) + b = np.random.rand(5, 3) + + df = DataFrame(b) + + msg = r"shapes \(10, 4\) and \(5, 3\) not aligned" + with pytest.raises(ValueError, match=msg): + a @ df + with pytest.raises(ValueError, match=msg): + a.tolist() @ df diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py new file mode 100644 index 00000000..a317dae5 --- /dev/null +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -0,0 +1,239 @@ +""" +Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" +but are implicitly also testing nsmallest_foo. +""" +from string import ascii_lowercase + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame( + {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1], + ) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame( + { + "a": np.random.permutation(10), + "b": list(ascii_lowercase[:10]), + "c": np.random.permutation(10).astype("float64"), + } + ) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": pd.Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": pd.date_range("20130101", periods=3), + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + +class TestNLargestNSmallest: + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + "order", + [ + ["a"], + ["c"], + ["a", "b"], + ["a", "c"], + ["b", "a"], + ["b", "c"], + ["a", "b", "c"], + ["c", "a", "b"], + ["c", "b", "a"], + ["b", "c", "a"], + ["b", "a", "c"], + # dups! + ["b", "c", "c"], + ], + ) + @pytest.mark.parametrize("n", range(1, 11)) + def test_nlargest_n(self, df_strings, nselect_method, n, order): + # GH#10393 + df = df_strings + if "b" in order: + + error_msg = ( + f"Column 'b' has dtype object, " + f"cannot use method '{nselect_method}' with this dtype" + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(n, order) + else: + ascending = nselect_method == "nsmallest" + result = getattr(df, nselect_method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", [["group", "category_string"], ["group", "string"]] + ) + def test_nlargest_error(self, df_main_dtypes, nselect_method, columns): + df = df_main_dtypes + col = columns[1] + error_msg = ( + f"Column '{col}' has dtype {df[col].dtype}, " + f"cannot use method '{nselect_method}' with this dtype" + ) + # escape some characters that may be in the repr + error_msg = ( + error_msg.replace("(", "\\(") + .replace(")", "\\)") + .replace("[", "\\[") + .replace("]", "\\]") + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(2, columns) + + def test_nlargest_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {"category_string", "string"})) + df.nlargest(2, list(set(df) - {"category_string", "string"})) + + def test_nlargest_duplicates_on_starter_columns(self): + # regression test for GH#22752 + + df = pd.DataFrame({"a": [2, 2, 2, 1, 1, 1], "b": [1, 2, 3, 3, 2, 1]}) + + result = df.nlargest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [2, 2, 2, 1], "b": [3, 2, 1, 3]}, index=[2, 1, 0, 3] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [1, 1, 1, 2], "b": [1, 2, 3, 1]}, index=[5, 4, 3, 0] + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_n_identical_values(self): + # GH#15297 + df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "order", + [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], + ) + @pytest.mark.parametrize("n", range(1, 6)) + def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): + # GH#13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) + + def test_nlargest_duplicate_keep_all_ties(self): + # GH#16818 + df = pd.DataFrame( + {"a": [5, 4, 4, 2, 3, 3, 3, 3], "b": [10, 9, 8, 7, 5, 50, 10, 20]} + ) + result = df.nlargest(4, "a", keep="all") + expected = pd.DataFrame( + { + "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(2, "a", keep="all") + expected = pd.DataFrame( + { + "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_multiindex_column_lookup(self): + # Check whether tuples are correctly treated as multi-level lookups. + # GH#23033 + df = pd.DataFrame( + columns=pd.MultiIndex.from_product([["x"], ["a", "b"]]), + data=[[0.33, 0.13], [0.86, 0.25], [0.25, 0.70], [0.85, 0.91]], + ) + + # nsmallest + result = df.nsmallest(3, ("x", "a")) + expected = df.iloc[[2, 0, 3]] + tm.assert_frame_equal(result, expected) + + # nlargest + result = df.nlargest(3, ("x", "b")) + expected = df.iloc[[3, 2, 1]] + tm.assert_frame_equal(result, expected) + + def test_nlargest_nan(self): + # GH#43060 + df = pd.DataFrame([np.nan, np.nan, 0, 1, 2, 3]) + result = df.nlargest(5, 0) + expected = df.sort_values(0, ascending=False).head(5) + tm.assert_frame_equal(result, expected) + + def test_nsmallest_nan_after_n_element(self): + # GH#46589 + df = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5, None, 7], + "b": [7, 6, 5, 4, 3, 2, 1], + "c": [1, 1, 2, 2, 3, 3, 3], + }, + index=range(7), + ) + result = df.nsmallest(5, columns=["a", "b"]) + expected = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5], + "b": [7, 6, 5, 4, 3], + "c": [1, 1, 2, 2, 3], + }, + index=range(5), + ).astype({"a": "float"}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py new file mode 100644 index 00000000..8749218d --- /dev/null +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFramePctChange: + @pytest.mark.parametrize( + "periods,fill_method,limit,exp", + [ + (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), + (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), + (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), + (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), + (-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]), + (-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]), + (-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + (-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + ], + ) + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_pct_change_with_nas(self, periods, fill_method, limit, exp, klass): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + obj = klass(vals) + + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + tm.assert_equal(res, klass(exp)) + + def test_pct_change_numeric(self): + # GH#11150 + pnl = DataFrame( + [np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)] + ).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 + + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method="pad") + + tm.assert_frame_equal(result, expected) + + def test_pct_change(self, datetime_frame): + rs = datetime_frame.pct_change(fill_method=None) + tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) + + rs = datetime_frame.pct_change(2) + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) + + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + filled = datetime_frame.fillna(method="bfill", limit=1) + tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) + + rs = datetime_frame.pct_change(freq="5D") + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal( + rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + ) + + def test_pct_change_shift_over_nas(self): + s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) + + df = DataFrame({"a": s, "b": s}) + + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + edf = DataFrame({"a": expected, "b": expected}) + tm.assert_frame_equal(chg, edf) + + @pytest.mark.parametrize( + "freq, periods, fill_method, limit", + [ + ("5B", 5, None, None), + ("3B", 3, None, None), + ("3B", 3, "bfill", None), + ("7B", 7, "pad", 1), + ("7B", 7, "bfill", 3), + ("14B", 14, None, None), + ], + ) + def test_pct_change_periods_freq( + self, datetime_frame, freq, periods, fill_method, limit + ): + # GH#7292 + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) + tm.assert_frame_equal(rs_freq, rs_periods) + + empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + tm.assert_frame_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + data = DataFrame( + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 + ) + result = data.pct_change(fill_method=fill_method) + if fill_method is None: + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] + else: + second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + expected = DataFrame( + {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, + index=["a", "b"] * 3, + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_pipe.py b/pandas/tests/frame/methods/test_pipe.py new file mode 100644 index 00000000..5bcc4360 --- /dev/null +++ b/pandas/tests/frame/methods/test_pipe.py @@ -0,0 +1,39 @@ +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestPipe: + def test_pipe(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + expected = DataFrame({"A": [1, 4, 9]}) + if frame_or_series is Series: + obj = obj["A"] + expected = expected["A"] + + f = lambda x, y: x**y + result = obj.pipe(f, 2) + tm.assert_equal(result, expected) + + def test_pipe_tuple(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + f = lambda x, y: y + result = obj.pipe((f, "y"), 0) + tm.assert_equal(result, obj) + + def test_pipe_tuple_error(self, frame_or_series): + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + f = lambda x, y: y + + msg = "y is both the pipe target and a keyword argument" + + with pytest.raises(ValueError, match=msg): + obj.pipe((f, "y"), x=1, y=0) diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py new file mode 100644 index 00000000..a4f99b82 --- /dev/null +++ b/pandas/tests/frame/methods/test_pop.py @@ -0,0 +1,71 @@ +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestDataFramePop: + def test_pop(self, float_frame): + float_frame.columns.name = "baz" + + float_frame.pop("A") + assert "A" not in float_frame + + float_frame["foo"] = "bar" + float_frame.pop("foo") + assert "foo" not in float_frame + assert float_frame.columns.name == "baz" + + # gh-10912: inplace ops cause caching issue + a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"]) + b = a.pop("B") + b += 1 + + # original frame + expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"]) + tm.assert_frame_equal(a, expected) + + # result + expected = Series([2, 5], index=["X", "Y"], name="B") + 1 + tm.assert_series_equal(b, expected) + + def test_pop_non_unique_cols(self): + df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) + df.columns = ["a", "b", "a"] + + res = df.pop("a") + assert type(res) == DataFrame + assert len(res) == 2 + assert len(df.columns) == 1 + assert "b" in df.columns + assert "a" not in df.columns + assert len(df.index) == 2 + + def test_mixed_depth_pop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + df1 = df.copy() + df2 = df.copy() + result = df1.pop("a") + expected = df2.pop(("a", "", "")) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) + assert result.name == "a" + + expected = df1["top"] + df1 = df1.drop(["top"], axis=1) + result = df2.pop("top") + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py new file mode 100644 index 00000000..139360d3 --- /dev/null +++ b/pandas/tests/frame/methods/test_quantile.py @@ -0,0 +1,1020 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import ( + np_percentile_argname, + np_version_under1p21, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, +) +import pandas._testing as tm + + +@pytest.fixture( + params=[["linear", "single"], ["nearest", "table"]], ids=lambda x: "-".join(x) +) +def interp_method(request): + """(interpolation, method) arguments for quantile""" + return request.param + + +class TestDataFrameQuantile: + @pytest.mark.parametrize( + "non_num_col", + [ + pd.date_range("2014-01-01", periods=3, freq="m"), + ["a", "b", "c"], + [DataFrame, Series, Timestamp], + ], + ) + def test_numeric_only_default_false_warning( + self, non_num_col, interp_method, request, using_array_manager + ): + # GH #7308 + interpolation, method = interp_method + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) + df["C"] = non_num_col + + expected = Series( + [2.0, 3.0], + index=["A", "B"], + name=0.5, + ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + with tm.assert_produces_warning(FutureWarning, match="numeric_only"): + result = df.quantile(0.5, interpolation=interpolation, method=method) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "df,expected", + [ + [ + DataFrame( + { + 0: Series(pd.arrays.SparseArray([1, 2])), + 1: Series(pd.arrays.SparseArray([3, 4])), + } + ), + Series([1.5, 3.5], name=0.5), + ], + [ + DataFrame(Series([0.0, None, 1.0, 2.0], dtype="Sparse[float]")), + Series([1.0], name=0.5), + ], + ], + ) + def test_quantile_sparse(self, df, expected): + # GH#17198 + # GH#24600 + result = df.quantile() + + tm.assert_series_equal(result, expected) + + def test_quantile( + self, datetime_frame, interp_method, using_array_manager, request + ): + interpolation, method = interp_method + df = datetime_frame + result = df.quantile( + 0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series( + [np.percentile(df[col], 10) for col in df.columns], + index=df.columns, + name=0.1, + ) + if interpolation == "linear": + # np.percentile values only comparable to linear interpolation + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result.index, expected.index) + request.node.add_marker( + pytest.mark.xfail( + using_array_manager, reason="Name set incorrectly for arraymanager" + ) + ) + assert result.name == expected.name + + result = df.quantile( + 0.9, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series( + [np.percentile(df.loc[date], 90) for date in df.index], + index=df.index, + name=0.9, + ) + if interpolation == "linear": + # np.percentile values only comparable to linear interpolation + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result.index, expected.index) + request.node.add_marker( + pytest.mark.xfail( + using_array_manager, reason="Name set incorrectly for arraymanager" + ) + ) + assert result.name == expected.name + + def test_empty(self, interp_method): + interpolation, method = interp_method + q = DataFrame({"x": [], "y": []}).quantile( + 0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method + ) + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) + rs = df.quantile( + 0.5, numeric_only=True, interpolation=interpolation, method=method + ) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + xp = df.median().rename(0.5) + if interpolation == "nearest": + xp = (xp + 0.5).astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_series_equal(rs, xp) + + def test_axis(self, interp_method, request, using_array_manager): + # axis + interpolation, method = interp_method + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_series_equal(result, expected) + + result = df.quantile( + [0.5, 0.75], axis=1, interpolation=interpolation, method=method + ) + expected = DataFrame( + {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] + ) + if interpolation == "nearest": + expected.iloc[0, :] -= 0.5 + expected.iloc[1, :] += 0.25 + expected = expected.astype(np.int64) + tm.assert_frame_equal(result, expected, check_index_type=True) + + def test_axis_numeric_only_true(self, interp_method, request, using_array_manager): + # We may want to break API in the future to change this + # so that we exclude non-numeric along the same axis + # See GH #7312 + interpolation, method = interp_method + df = DataFrame([[1, 2, 3], ["a", "b", 4]]) + result = df.quantile( + 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_series_equal(result, expected) + + def test_quantile_date_range(self, interp_method, request, using_array_manager): + # GH 2460 + interpolation, method = interp_method + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + ser = Series(dti) + df = DataFrame(ser) + + result = df.quantile( + numeric_only=False, interpolation=interpolation, method=method + ) + expected = Series( + ["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]" + ) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + + tm.assert_series_equal(result, expected) + + def test_quantile_axis_mixed(self, interp_method, request, using_array_manager): + + # mixed on axis=1 + interpolation, method = interp_method + df = DataFrame( + { + "A": [1, 2, 3], + "B": [2.0, 3.0, 4.0], + "C": pd.date_range("20130101", periods=3), + "D": ["foo", "bar", "baz"], + } + ) + result = df.quantile( + 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series([1.5, 2.5, 3.5], name=0.5) + if interpolation == "nearest": + expected -= 0.5 + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_series_equal(result, expected) + + # must raise + msg = "'<' not supported between instances of 'Timestamp' and 'float'" + with pytest.raises(TypeError, match=msg): + df.quantile(0.5, axis=1, numeric_only=False) + + def test_quantile_axis_parameter(self, interp_method, request, using_array_manager): + # GH 9543/9544 + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + + result = df.quantile(0.5, axis=0, interpolation=interpolation, method=method) + + expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + tm.assert_series_equal(result, expected) + + expected = df.quantile( + 0.5, axis="index", interpolation=interpolation, method=method + ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + tm.assert_series_equal(result, expected) + + result = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) + + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + if interpolation == "nearest": + expected = expected.astype(np.int64) + tm.assert_series_equal(result, expected) + + result = df.quantile( + 0.5, axis="columns", interpolation=interpolation, method=method + ) + tm.assert_series_equal(result, expected) + + msg = "No axis named -1 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis=-1, interpolation=interpolation, method=method) + msg = "No axis named column for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis="column") + + def test_quantile_interpolation(self): + # see gh-10174 + + # interpolation method other than default linear + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + # cross-check interpolation=nearest results in original dtype + exp = np.percentile( + np.array([[1, 2, 3], [2, 3, 4]]), + 0.5, + axis=0, + **{np_percentile_argname: "nearest"}, + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="int64") + tm.assert_series_equal(result, expected) + + # float + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + exp = np.percentile( + np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), + 0.5, + axis=0, + **{np_percentile_argname: "nearest"}, + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="float64") + tm.assert_series_equal(result, expected) + + # axis + result = df.quantile([0.5, 0.75], axis=1, interpolation="lower") + expected = DataFrame( + {1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75] + ) + tm.assert_frame_equal(result, expected) + + # test degenerate case + df = DataFrame({"x": [], "y": []}) + q = df.quantile(0.1, axis=0, interpolation="higher") + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + # multi + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5], interpolation="midpoint") + + # https://github.com/numpy/numpy/issues/7163 + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_interpolation_datetime(self, datetime_frame): + # see gh-10174 + + # interpolation = linear (default case) + df = datetime_frame + q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") + assert q["A"] == np.percentile(df["A"], 10) + + def test_quantile_interpolation_int(self, int_frame): + # see gh-10174 + + df = int_frame + # interpolation = linear (default case) + q = df.quantile(0.1) + assert q["A"] == np.percentile(df["A"], 10) + + # test with and without interpolation keyword + q1 = df.quantile(0.1, axis=0, interpolation="linear") + assert q1["A"] == np.percentile(df["A"], 10) + tm.assert_series_equal(q, q1) + + def test_quantile_multi(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5], interpolation=interpolation, method=method) + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile( + [0.25, 0.5], axis=1, interpolation=interpolation, method=method + ) + expected = DataFrame( + [[1.0, 2.0, 3.0]] * 2, index=[0.25, 0.5], columns=[0, 1, 2] + ) + if interpolation == "nearest": + expected = expected.astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_multi_empty(self, interp_method): + interpolation, method = interp_method + result = DataFrame({"x": [], "y": []}).quantile( + [0.1, 0.9], axis=0, interpolation=interpolation, method=method + ) + expected = DataFrame( + {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_datetime(self): + df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) + + # exclude datetime + result = df.quantile(0.5, numeric_only=True) + expected = Series([2.5], index=["b"], name=0.5) + tm.assert_series_equal(result, expected) + + # datetime + result = df.quantile(0.5, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), 2.5], index=["a", "b"], name=0.5 + ) + tm.assert_series_equal(result, expected) + + # datetime w/ multi + result = df.quantile([0.5], numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), 2.5]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(result, expected) + + # axis = 1 + df["c"] = pd.to_datetime(["2011", "2012"]) + result = df[["a", "c"]].quantile(0.5, axis=1, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")], + index=[0, 1], + name=0.5, + ) + tm.assert_series_equal(result, expected) + + result = df[["a", "c"]].quantile([0.5], axis=1, numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")]], + index=[0.5], + columns=[0, 1], + ) + tm.assert_frame_equal(result, expected) + + # empty when numeric_only=True + result = df[["a", "c"]].quantile(0.5, numeric_only=True) + expected = Series([], index=[], dtype=np.float64, name=0.5) + tm.assert_series_equal(result, expected) + + result = df[["a", "c"]].quantile([0.5], numeric_only=True) + expected = DataFrame(index=[0.5]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[ns]", + "datetime64[ns, US/Pacific]", + "timedelta64[ns]", + "Period[D]", + ], + ) + def test_quantile_dt64_empty(self, dtype, interp_method): + # GH#41544 + interpolation, method = interp_method + df = DataFrame(columns=["a", "b"], dtype=dtype) + + res = df.quantile( + 0.5, axis=1, numeric_only=False, interpolation=interpolation, method=method + ) + expected = Series([], index=[], name=0.5, dtype=dtype) + tm.assert_series_equal(res, expected) + + # no columns in result, so no dtype preservation + res = df.quantile( + [0.5], + axis=1, + numeric_only=False, + interpolation=interpolation, + method=method, + ) + expected = DataFrame(index=[0.5]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]]) + def test_quantile_invalid(self, invalid, datetime_frame, interp_method): + msg = "percentiles should all be in the interval \\[0, 1\\]" + interpolation, method = interp_method + with pytest.raises(ValueError, match=msg): + datetime_frame.quantile(invalid, interpolation=interpolation, method=method) + + def test_quantile_box(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + df = DataFrame( + { + "A": [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ], + "B": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + } + ) + + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + + exp = Series( + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=["A", "B", "C"], + ) + tm.assert_series_equal(res, exp) + + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) + exp = DataFrame( + [ + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_box_nat(self): + # DatetimeLikeBlock may be consolidated and contain NaT in different loc + df = DataFrame( + { + "A": [ + Timestamp("2011-01-01"), + pd.NaT, + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ], + "a": [ + Timestamp("2011-01-01"), + Timestamp("2011-01-02"), + pd.NaT, + Timestamp("2011-01-03"), + ], + "B": [ + Timestamp("2011-01-01", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "b": [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.NaT, + ], + "c": [ + pd.NaT, + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + }, + columns=list("AaBbCc"), + ) + + res = df.quantile(0.5, numeric_only=False) + exp = Series( + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=list("AaBbCc"), + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame( + [ + [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-02", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=list("AaBbCc"), + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_nan(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + # GH 14357 - float block where some cols have missing values + df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) + df.iloc[-1, 1] = np.nan + + res = df.quantile(0.5, interpolation=interpolation, method=method) + exp = Series( + [3.0, 2.5 if interpolation == "linear" else 3.0], index=["a", "b"], name=0.5 + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75], interpolation=interpolation, method=method) + exp = DataFrame( + { + "a": [3.0, 4.0], + "b": [2.5, 3.25] if interpolation == "linear" else [3.0, 4.0], + }, + index=[0.5, 0.75], + ) + tm.assert_frame_equal(res, exp) + + res = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) + exp = Series(np.arange(1.0, 6.0), name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile( + [0.5, 0.75], axis=1, interpolation=interpolation, method=method + ) + exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) + if interpolation == "nearest": + exp.iloc[1, -1] = np.nan + tm.assert_frame_equal(res, exp) + + # full-nan column + df["b"] = np.nan + + res = df.quantile(0.5, interpolation=interpolation, method=method) + exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75], interpolation=interpolation, method=method) + exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + def test_quantile_nat(self, interp_method, request, using_array_manager): + interpolation, method = interp_method + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + # full NaT column + df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]}) + + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + exp = Series([pd.NaT], index=["a"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) + exp = DataFrame({"a": [pd.NaT]}, index=[0.5]) + tm.assert_frame_equal(res, exp) + + # mixed non-null / full null column + df = DataFrame( + { + "a": [ + Timestamp("2012-01-01"), + Timestamp("2012-01-02"), + Timestamp("2012-01-03"), + ], + "b": [pd.NaT, pd.NaT, pd.NaT], + } + ) + + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + exp = Series([Timestamp("2012-01-02"), pd.NaT], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile( + [0.5], numeric_only=False, interpolation=interpolation, method=method + ) + exp = DataFrame( + [[Timestamp("2012-01-02"), pd.NaT]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_empty_no_rows_floats(self, interp_method): + interpolation, method = interp_method + + df = DataFrame(columns=["a", "b"], dtype="float64") + + res = df.quantile(0.5, interpolation=interpolation, method=method) + exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], interpolation=interpolation, method=method) + exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) + tm.assert_frame_equal(res, exp) + + res = df.quantile(0.5, axis=1, interpolation=interpolation, method=method) + exp = Series([], index=[], dtype="float64", name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], axis=1, interpolation=interpolation, method=method) + exp = DataFrame(columns=[], index=[0.5]) + tm.assert_frame_equal(res, exp) + + def test_quantile_empty_no_rows_ints(self, interp_method): + interpolation, method = interp_method + df = DataFrame(columns=["a", "b"], dtype="int64") + + res = df.quantile(0.5, interpolation=interpolation, method=method) + exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + def test_quantile_empty_no_rows_dt64(self, interp_method): + interpolation, method = interp_method + # datetimes + df = DataFrame(columns=["a", "b"], dtype="datetime64[ns]") + + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + exp = Series( + [pd.NaT, pd.NaT], index=["a", "b"], dtype="datetime64[ns]", name=0.5 + ) + tm.assert_series_equal(res, exp) + + # Mixed dt64/dt64tz + df["a"] = df["a"].dt.tz_localize("US/Central") + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + exp = exp.astype(object) + tm.assert_series_equal(res, exp) + + # both dt64tz + df["b"] = df["b"].dt.tz_localize("US/Central") + res = df.quantile( + 0.5, numeric_only=False, interpolation=interpolation, method=method + ) + exp = exp.astype(df["b"].dtype) + tm.assert_series_equal(res, exp) + + def test_quantile_empty_no_columns(self, interp_method): + # GH#23925 _get_numeric_data may drop all columns + interpolation, method = interp_method + df = DataFrame(pd.date_range("1/1/18", periods=5)) + df.columns.name = "captain tightpants" + result = df.quantile( + 0.5, numeric_only=True, interpolation=interpolation, method=method + ) + expected = Series([], index=[], name=0.5, dtype=np.float64) + expected.index.name = "captain tightpants" + tm.assert_series_equal(result, expected) + + result = df.quantile( + [0.5], numeric_only=True, interpolation=interpolation, method=method + ) + expected = DataFrame([], index=[0.5], columns=[]) + expected.columns.name = "captain tightpants" + tm.assert_frame_equal(result, expected) + + def test_quantile_item_cache(self, using_array_manager, interp_method): + # previous behavior incorrect retained an invalid _item_cache entry + interpolation, method = interp_method + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + df.quantile(numeric_only=False, interpolation=interpolation, method=method) + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] + + def test_invalid_method(self): + with pytest.raises(ValueError, match="Invalid method: foo"): + DataFrame(range(1)).quantile(0.5, method="foo") + + def test_table_invalid_interpolation(self): + with pytest.raises(ValueError, match="Invalid interpolation: foo"): + DataFrame(range(1)).quantile(0.5, method="table", interpolation="foo") + + +class TestQuantileExtensionDtype: + # TODO: tests for axis=1? + # TODO: empty case? + + @pytest.fixture( + params=[ + pytest.param( + pd.IntervalIndex.from_breaks(range(10)), + marks=pytest.mark.xfail(reason="raises when trying to add Intervals"), + ), + pd.period_range("2016-01-01", periods=9, freq="D"), + pd.date_range("2016-01-01", periods=9, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=9), + pd.array(np.arange(9), dtype="Int64"), + pd.array(np.arange(9), dtype="Float64"), + ], + ids=lambda x: str(x.dtype), + ) + def index(self, request): + # NB: not actually an Index object + idx = request.param + idx.name = "A" + return idx + + @pytest.fixture + def obj(self, index, frame_or_series): + # bc index is not always an Index (yet), we need to re-patch .name + obj = frame_or_series(index).copy() + + if frame_or_series is Series: + obj.name = "A" + else: + obj.columns = ["A"] + return obj + + def compute_quantile(self, obj, qs): + if isinstance(obj, Series): + result = obj.quantile(qs) + else: + result = obj.quantile(qs, numeric_only=False) + return result + + def test_quantile_ea(self, request, obj, index): + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + if np_version_under1p21 and index.dtype == "timedelta64[ns]": + msg = "failed on Numpy 1.20.3; TypeError: data type 'Int64' not understood" + mark = pytest.mark.xfail(reason=msg, raises=TypeError) + request.node.add_marker(mark) + + exp_dtype = index.dtype + if index.dtype == "Int64": + # match non-nullable casting behavior + exp_dtype = "Float64" + + # expected here assumes len(index) == 9 + expected = Series( + [index[4], index[0], index[-1]], dtype=exp_dtype, index=qs, name="A" + ) + expected = type(obj)(expected) + + tm.assert_equal(result, expected) + + def test_quantile_ea_with_na(self, obj, index): + + obj.iloc[0] = index._na_value + obj.iloc[-1] = index._na_value + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + # expected here assumes len(index) == 9 + expected = Series( + [index[4], index[1], index[-2]], dtype=index.dtype, index=qs, name="A" + ) + expected = type(obj)(expected) + tm.assert_equal(result, expected) + + # TODO(GH#39763): filtering can be removed after GH#39763 is fixed + @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") + def test_quantile_ea_all_na(self, request, obj, index): + obj.iloc[:] = index._na_value + + # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed + obj = obj.astype(index.dtype) + assert np.all(obj.dtypes == index.dtype) + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = [0.5, 0, 1] + result = self.compute_quantile(obj, qs) + + expected = index.take([-1, -1, -1], allow_fill=True, fill_value=index._na_value) + expected = Series(expected, index=qs, name="A") + expected = type(obj)(expected) + tm.assert_equal(result, expected) + + def test_quantile_ea_scalar(self, request, obj, index): + # scalar qs + + # result should be invariant to shuffling + indexer = np.arange(len(index), dtype=np.intp) + np.random.shuffle(indexer) + obj = obj.iloc[indexer] + + qs = 0.5 + result = self.compute_quantile(obj, qs) + + if np_version_under1p21 and index.dtype == "timedelta64[ns]": + msg = "failed on Numpy 1.20.3; TypeError: data type 'Int64' not understood" + mark = pytest.mark.xfail(reason=msg, raises=TypeError) + request.node.add_marker(mark) + + exp_dtype = index.dtype + if index.dtype == "Int64": + exp_dtype = "Float64" + + expected = Series({"A": index[4]}, dtype=exp_dtype, name=0.5) + if isinstance(obj, Series): + expected = expected["A"] + assert result == expected + else: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis", + [ + ["float64", [], [], 1], + ["int64", [], [], 1], + ["float64", [np.nan, np.nan], ["a", "b"], 0], + ["int64", [np.nan, np.nan], ["a", "b"], 0], + ], + ) + def test_empty_numeric(self, dtype, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis, expected_dtype", + [ + ["datetime64[ns]", [], [], 1, "datetime64[ns]"], + ["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"], + ], + ) + def test_empty_datelike( + self, dtype, expected_data, expected_index, axis, expected_dtype + ): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis, numeric_only=False) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=expected_dtype + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, expected_index, axis", + [ + [[np.nan, np.nan], range(2), 1], + [[], [], 0], + ], + ) + def test_datelike_numeric_only(self, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame( + { + "a": pd.to_datetime(["2010", "2011"]), + "b": [0, 5], + "c": pd.to_datetime(["2011", "2012"]), + } + ) + result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py new file mode 100644 index 00000000..7b2f7908 --- /dev/null +++ b/pandas/tests/frame/methods/test_rank.py @@ -0,0 +1,497 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas._libs.algos import ( + Infinity, + NegInfinity, +) +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestRank: + s = Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) + df = DataFrame({"A": s, "B": s}) + + results = { + "average": np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5]), + "min": np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5]), + "max": np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6]), + "first": np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6]), + "dense": np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]), + } + + @pytest.fixture(params=["average", "min", "max", "first", "dense"]) + def method(self, request): + """ + Fixture for trying all rank methods + """ + return request.param + + @td.skip_if_no_scipy + def test_rank(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + ranks0 = float_frame.rank() + ranks1 = float_frame.rank(1) + mask = np.isnan(float_frame.values) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", "c", "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", np.nan, "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2.0, np.nan, 1.0], [2.0, 3.0, 1.0]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1.0, np.nan, 2.0], [2.0, 1.0, 3.0]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_does_not_mutate(self): + # GH#18521 + # Check rank does not mutate DataFrame + df = DataFrame(np.random.randn(10, 3), dtype="float64") + expected = df.copy() + df.rank() + result = df + tm.assert_frame_equal(result, expected) + + def test_rank_mixed_frame(self, float_string_frame): + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + + with tm.assert_produces_warning(FutureWarning, match="numeric_only=None"): + float_string_frame.rank(numeric_only=None) + with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"): + result = float_string_frame.rank(1) + expected = float_string_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + def test_rank_na_option(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + # bottom + ranks0 = float_frame.rank(na_option="bottom") + ranks1 = float_frame.rank(1, na_option="bottom") + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = float_frame.rank(na_option="top") + ranks1 = float_frame.rank(1, na_option="top") + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = float_frame.rank(na_option="top", ascending=False) + ranks1 = float_frame.rank(1, na_option="top", ascending=False) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = float_frame.rank(na_option="bottom", ascending=False) + ranks1 = float_frame.rank(1, na_option="bottom", ascending=False) + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + # bad values throw error + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option="bad", ascending=False) + + # invalid type + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option=True, ascending=False) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis="index")) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis="columns")) + + @td.skip_if_no_scipy + def test_rank_methods_frame(self): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord("z") - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ["average", "min", "max", "first", "dense"]: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, m if m != "first" else "ordinal" + ) + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols).astype("float64") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") + def test_rank_descending(self, method, dtype): + if "i" in dtype: + df = self.df.dropna().astype(dtype) + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + tm.assert_frame_equal(res, expected) + + expected = (df.max() - df).rank(method=method) + + if dtype != "O": + res2 = df.rank(method=method, ascending=False, numeric_only=True) + tm.assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, numeric_only=False) + tm.assert_frame_equal(res3, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("dtype", [None, object]) + def test_rank_2d_tie_methods(self, method, axis, dtype): + df = self.df + + def _check2d(df, expected, method="average", axis=0): + exp_df = DataFrame({"A": expected, "B": expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + tm.assert_frame_equal(result, exp_df) + + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, self.results[method], method=method, axis=axis) + + @pytest.mark.parametrize( + "method,exp", + [ + ("dense", [[1.0, 1.0, 1.0], [1.0, 0.5, 2.0 / 3], [1.0, 0.5, 1.0 / 3]]), + ( + "min", + [ + [1.0 / 3, 1.0, 1.0], + [1.0 / 3, 1.0 / 3, 2.0 / 3], + [1.0 / 3, 1.0 / 3, 1.0 / 3], + ], + ), + ( + "max", + [[1.0, 1.0, 1.0], [1.0, 2.0 / 3, 2.0 / 3], [1.0, 2.0 / 3, 1.0 / 3]], + ), + ( + "average", + [[2.0 / 3, 1.0, 1.0], [2.0 / 3, 0.5, 2.0 / 3], [2.0 / 3, 0.5, 1.0 / 3]], + ), + ( + "first", + [ + [1.0 / 3, 1.0, 1.0], + [2.0 / 3, 1.0 / 3, 2.0 / 3], + [3.0 / 3, 2.0 / 3, 1.0 / 3], + ], + ), + ], + ) + def test_rank_pct_true(self, method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) + + expected = DataFrame(exp) + tm.assert_frame_equal(result, expected) + + @pytest.mark.single_cpu + def test_pct_max_many_rows(self): + # GH 18271 + df = DataFrame( + {"A": np.arange(2**24 + 1), "B": np.arange(2**24 + 1, 0, -1)} + ) + result = df.rank(pct=True).max() + assert (result == 1).all() + + @pytest.mark.parametrize( + "contents,dtype", + [ + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-50, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float64", + ), + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-45, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float32", + ), + ([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"), + ( + [ + np.iinfo(np.int64).min, + -100, + 0, + 1, + 9999, + 100000, + 1e10, + np.iinfo(np.int64).max, + ], + "int64", + ), + ([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"), + ( + [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 5)], + "datetime64", + ), + ], + ) + def test_rank_inf_and_nan(self, contents, dtype, frame_or_series): + dtype_na_map = { + "float64": np.nan, + "float32": np.nan, + "object": None, + "datetime64": np.datetime64("nat"), + } + # Insert nans at random positions if underlying dtype has missing + # value. Then adjust the expected order by adding nans accordingly + # This is for testing whether rank calculation is affected + # when values are interwined with nan values. + values = np.array(contents, dtype=dtype) + exp_order = np.array(range(len(values)), dtype="float64") + 1.0 + if dtype in dtype_na_map: + na_value = dtype_na_map[dtype] + nan_indices = np.random.choice(range(len(values)), 5) + values = np.insert(values, nan_indices, na_value) + exp_order = np.insert(exp_order, nan_indices, np.nan) + + # Shuffle the testing array and expected results in the same way + random_order = np.random.permutation(len(values)) + obj = frame_or_series(values[random_order]) + expected = frame_or_series(exp_order[random_order], dtype="float64") + result = obj.rank() + tm.assert_equal(result, expected) + + def test_df_series_inf_nan_consistency(self): + # GH#32593 + index = [5, 4, 3, 2, 1, 6, 7, 8, 9, 10] + col1 = [5, 4, 3, 5, 8, 5, 2, 1, 6, 6] + col2 = [5, 4, np.nan, 5, 8, 5, np.inf, np.nan, 6, -np.inf] + df = DataFrame( + data={ + "col1": col1, + "col2": col2, + }, + index=index, + dtype="f8", + ) + df_result = df.rank() + + series_result = df.copy() + series_result["col1"] = df["col1"].rank() + series_result["col2"] = df["col2"].rank() + + tm.assert_frame_equal(df_result, series_result) + + def test_rank_both_inf(self): + # GH#32593 + df = DataFrame({"a": [-np.inf, 0, np.inf]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0]}) + result = df.rank() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "na_option,ascending,expected", + [ + ("top", True, [3.0, 1.0, 2.0]), + ("top", False, [2.0, 1.0, 3.0]), + ("bottom", True, [2.0, 3.0, 1.0]), + ("bottom", False, [1.0, 3.0, 2.0]), + ], + ) + def test_rank_inf_nans_na_option( + self, frame_or_series, method, na_option, ascending, expected + ): + obj = frame_or_series([np.inf, np.nan, -np.inf]) + result = obj.rank(method=method, na_option=na_option, ascending=ascending) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "na_option,ascending,expected", + [ + ("bottom", True, [1.0, 2.0, 4.0, 3.0]), + ("bottom", False, [1.0, 2.0, 4.0, 3.0]), + ("top", True, [2.0, 3.0, 1.0, 4.0]), + ("top", False, [2.0, 3.0, 1.0, 4.0]), + ], + ) + def test_rank_object_first(self, frame_or_series, na_option, ascending, expected): + obj = frame_or_series(["foo", "foo", None, "foo"]) + result = obj.rank(method="first", na_option=na_option, ascending=ascending) + expected = frame_or_series(expected) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data,expected", + [ + ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})), + ({"a": [1, 2, "a"]}, DataFrame(index=range(3))), + ], + ) + def test_rank_mixed_axis_zero(self, data, expected): + df = DataFrame(data) + msg = "Dropping of nuisance columns" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.rank() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py new file mode 100644 index 00000000..8575e789 --- /dev/null +++ b/pandas/tests/frame/methods/test_reindex.py @@ -0,0 +1,1225 @@ +from datetime import ( + datetime, + timedelta, +) +import inspect + +import numpy as np +import pytest + +from pandas._libs.tslibs.timezones import dateutil_gettz as gettz +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + date_range, + isna, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +import pandas.core.common as com + + +class TestReindexSetIndex: + # Tests that check both reindex and set_index + + def test_dti_set_index_reindex_datetimeindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_dti_set_index_reindex_freq_with_tz(self): + # GH#11314 with tz + index = date_range( + datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" + ) + df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + new_index = date_range( + datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" + ) + + result = df.set_index(new_index) + assert result.index.freq == index.freq + + def test_set_reset_index_intervalindex(self): + + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + df = df.reset_index() + + def test_setitem_reset_index_dtypes(self): + # GH 22060 + df = DataFrame(columns=["a", "b", "c"]).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64} + ) + df1 = df.set_index(["a"]) + df1["d"] = [] + result = df1.reset_index() + expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64} + ) + tm.assert_frame_equal(result, expected) + + df2 = df.set_index(["a", "b"]) + df2["d"] = [] + result = df2.reset_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timezone, year, month, day, hour", + [["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]], + ) + def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour): + # see gh-40817 + test_timezone = gettz(timezone) + transition_1 = pd.Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=0, + tzinfo=test_timezone, + ) + transition_2 = pd.Timestamp( + year=year, + month=month, + day=day, + hour=hour, + minute=0, + fold=1, + tzinfo=test_timezone, + ) + df = ( + DataFrame({"index": [transition_1, transition_2], "vals": ["a", "b"]}) + .set_index("index") + .reindex(["1", "2"]) + ) + tm.assert_frame_equal( + df, + DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"), + ) + + +class TestDataFrameSelectReindex: + # These are specific reindex-based tests; other indexing tests should go in + # test_indexing + + def test_reindex_copies(self): + # based on asv time_reindex_axis1 + N = 10 + df = DataFrame(np.random.randn(N * 10, N)) + cols = np.arange(N) + np.random.shuffle(cols) + + result = df.reindex(columns=cols, copy=True) + assert not np.shares_memory(result[0]._values, df[0]._values) + + # pass both columns and index + result2 = df.reindex(columns=cols, index=df.index, copy=True) + assert not np.shares_memory(result2[0]._values, df[0]._values) + + @td.skip_array_manager_not_yet_implemented + def test_reindex_date_fill_value(self): + # passing date to dt64 is deprecated + arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) + df = DataFrame(arr, columns=["A", "B"], index=range(3)) + + ts = df.iloc[0, 0] + fv = ts.date() + + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) + + expected = DataFrame( + {"A": df["A"].tolist() + [ts], "B": df["B"].tolist() + [ts], "C": [ts] * 4} + ) + tm.assert_frame_equal(res, expected) + + # only reindexing rows + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), fill_value=fv) + tm.assert_frame_equal(res, expected[["A", "B"]]) + + # same with a datetime-castable str + res = df.reindex( + index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01" + ) + tm.assert_frame_equal(res, expected) + + def test_reindex_with_multi_index(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests for reindexing a multi-indexed DataFrame with a new MultiIndex + # + # confirms that we can reindex a multi-indexed DataFrame with a new + # MultiIndex object correctly when using no filling, backfilling, and + # padding + # + # The DataFrame, `df`, used in this test is: + # c + # a b + # -1 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # 0 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # 1 0 A + # 1 B + # 2 C + # 3 D + # 4 E + # 5 F + # 6 G + # + # and the other MultiIndex, `new_multi_index`, is: + # 0: 0 0.5 + # 1: 2.0 + # 2: 5.0 + # 3: 5.8 + df = DataFrame( + { + "a": [-1] * 7 + [0] * 7 + [1] * 7, + "b": list(range(7)) * 3, + "c": ["A", "B", "C", "D", "E", "F", "G"] * 3, + } + ).set_index(["a", "b"]) + new_index = [0.5, 2.0, 5.0, 5.8] + new_multi_index = MultiIndex.from_product([[0], new_index], names=["a", "b"]) + + # reindexing w/o a `method` value + reindexed = df.reindex(new_multi_index) + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": [np.nan, "C", "F", np.nan]} + ).set_index(["a", "b"]) + tm.assert_frame_equal(expected, reindexed) + + # reindexing with backfilling + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": ["B", "C", "F", "G"]} + ).set_index(["a", "b"]) + reindexed_with_backfilling = df.reindex(new_multi_index, method="bfill") + tm.assert_frame_equal(expected, reindexed_with_backfilling) + + reindexed_with_backfilling = df.reindex(new_multi_index, method="backfill") + tm.assert_frame_equal(expected, reindexed_with_backfilling) + + # reindexing with padding + expected = DataFrame( + {"a": [0] * 4, "b": new_index, "c": ["A", "C", "F", "F"]} + ).set_index(["a", "b"]) + reindexed_with_padding = df.reindex(new_multi_index, method="pad") + tm.assert_frame_equal(expected, reindexed_with_padding) + + reindexed_with_padding = df.reindex(new_multi_index, method="ffill") + tm.assert_frame_equal(expected, reindexed_with_padding) + + @pytest.mark.parametrize( + "method,expected_values", + [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]), + ], + ) + def test_reindex_methods(self, method, expected_values): + df = DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = DataFrame({"x": expected_values}, index=target) + actual = df.reindex(target, method=method) + tm.assert_frame_equal(expected, actual) + + actual = df.reindex(target, method=method, tolerance=1) + tm.assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + tm.assert_frame_equal(expected, actual) + + e2 = expected[::-1] + actual = df.reindex(target[::-1], method=method) + tm.assert_frame_equal(e2, actual) + + new_order = [3, 0, 2, 1] + e2 = expected.iloc[new_order] + actual = df.reindex(target[new_order], method=method) + tm.assert_frame_equal(e2, actual) + + switched_method = ( + "pad" if method == "backfill" else "backfill" if method == "pad" else method + ) + actual = df[::-1].reindex(target, method=switched_method) + tm.assert_frame_equal(expected, actual) + + def test_reindex_methods_nearest_special(self): + df = DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = DataFrame({"x": [0, 1, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=0.2) + tm.assert_frame_equal(expected, actual) + + expected = DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1]) + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz(self, tz_aware_fixture): + # GH26683 + tz = tz_aware_fixture + idx = date_range("2019-01-01", periods=5, tz=tz) + df = DataFrame({"x": list(range(5))}, index=idx) + + expected = df.head(3) + actual = df.reindex(idx[:3], method="nearest") + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz_empty_frame(self): + # https://github.com/pandas-dev/pandas/issues/31964 + dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"]) + df = DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) + expected = DataFrame(index=dti) + result = df.reindex(dti, method="nearest") + tm.assert_frame_equal(result, expected) + + def test_reindex_frame_add_nat(self): + rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) + + result = df.reindex(range(15)) + assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) + + mask = com.isna(result)["B"] + assert mask[-5:].all() + assert not mask[:-5].any() + + @pytest.mark.parametrize( + "method, exp_values", + [("ffill", [0, 1, 2, 3]), ("bfill", [1.0, 2.0, 3.0, np.nan])], + ) + def test_reindex_frame_tz_ffill_bfill(self, frame_or_series, method, exp_values): + # GH#38566 + obj = frame_or_series( + [0, 1, 2, 3], + index=date_range("2020-01-01 00:00:00", periods=4, freq="H", tz="UTC"), + ) + new_index = date_range("2020-01-01 00:01:00", periods=4, freq="H", tz="UTC") + result = obj.reindex(new_index, method=method, tolerance=pd.Timedelta("1 hour")) + expected = frame_or_series(exp_values, index=new_index) + tm.assert_equal(result, expected) + + def test_reindex_limit(self): + # GH 28631 + data = [["A", "A", "A"], ["B", "B", "B"], ["C", "C", "C"], ["D", "D", "D"]] + exp_data = [ + ["A", "A", "A"], + ["B", "B", "B"], + ["C", "C", "C"], + ["D", "D", "D"], + ["D", "D", "D"], + [np.nan, np.nan, np.nan], + ] + df = DataFrame(data) + result = df.reindex([0, 1, 2, 3, 4, 5], method="ffill", limit=1) + expected = DataFrame(exp_data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "idx, check_index_type", + [ + [["C", "B", "A"], True], + [["F", "C", "A", "D"], True], + [["A"], True], + [["A", "B", "C"], True], + [["C", "A", "B"], True], + [["C", "B"], True], + [["C", "A"], True], + [["A", "B"], True], + [["B", "A", "C"], True], + # reindex by these causes different MultiIndex levels + [["D", "F"], False], + [["A", "C", "B"], False], + ], + ) + def test_reindex_level_verify_first_level(self, idx, check_index_type): + df = DataFrame( + { + "jim": list("B" * 4 + "A" * 2 + "C" * 3), + "joe": list("abcdeabcd")[::-1], + "jolie": [10, 20, 30] * 3, + "joline": np.random.randint(0, 1000, 9), + } + ) + icol = ["jim", "joe", "jolie"] + + def f(val): + return np.nonzero((df["jim"] == val).to_numpy())[0] + + i = np.concatenate(list(map(f, idx))) + left = df.set_index(icol).reindex(idx, level="jim") + right = df.iloc[i].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + @pytest.mark.parametrize( + "idx", + [ + ("mid",), + ("mid", "btm"), + ("mid", "btm", "top"), + ("mid",), + ("mid", "top"), + ("mid", "top", "btm"), + ("btm",), + ("btm", "mid"), + ("btm", "mid", "top"), + ("btm",), + ("btm", "top"), + ("btm", "top", "mid"), + ("top",), + ("top", "mid"), + ("top", "mid", "btm"), + ("top",), + ("top", "btm"), + ("top", "btm", "mid"), + ], + ) + def test_reindex_level_verify_first_level_repeats(self, idx): + df = DataFrame( + { + "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7, + "joe": ["3rd"] * 2 + + ["1st"] * 3 + + ["2nd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["2nd"] * 2, + # this needs to be jointly unique with jim and joe or + # reindexing will fail ~1.5% of the time, this works + # out to needing unique groups of same size as joe + "jolie": np.concatenate( + [ + np.random.choice(1000, x, replace=False) + for x in [2, 3, 3, 2, 3, 2, 3, 2] + ] + ), + "joline": np.random.randn(20).round(3) * 10, + } + ) + icol = ["jim", "joe", "jolie"] + + def f(val): + return np.nonzero((df["jim"] == val).to_numpy())[0] + + i = np.concatenate(list(map(f, idx))) + left = df.set_index(icol).reindex(idx, level="jim") + right = df.iloc[i].set_index(icol) + tm.assert_frame_equal(left, right) + + @pytest.mark.parametrize( + "idx, indexer", + [ + [ + ["1st", "2nd", "3rd"], + [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17], + ], + [ + ["3rd", "2nd", "1st"], + [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14], + ], + [["2nd", "3rd"], [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]], + [["3rd", "1st"], [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]], + ], + ) + def test_reindex_level_verify_repeats(self, idx, indexer): + df = DataFrame( + { + "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7, + "joe": ["3rd"] * 2 + + ["1st"] * 3 + + ["2nd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["2nd"] * 2, + # this needs to be jointly unique with jim and joe or + # reindexing will fail ~1.5% of the time, this works + # out to needing unique groups of same size as joe + "jolie": np.concatenate( + [ + np.random.choice(1000, x, replace=False) + for x in [2, 3, 3, 2, 3, 2, 3, 2] + ] + ), + "joline": np.random.randn(20).round(3) * 10, + } + ) + icol = ["jim", "joe", "jolie"] + left = df.set_index(icol).reindex(idx, level="joe") + right = df.iloc[indexer].set_index(icol) + tm.assert_frame_equal(left, right) + + @pytest.mark.parametrize( + "idx, indexer, check_index_type", + [ + [list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6], True], + [list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6], True], + [list("abc"), [3, 2, 1, 8, 7, 6], True], + [list("eca"), [1, 3, 4, 6, 8], True], + [list("edc"), [0, 1, 4, 5, 6], True], + [list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6], True], + [list("edwq"), [0, 4, 5], True], + [list("wq"), [], False], + ], + ) + def test_reindex_level_verify(self, idx, indexer, check_index_type): + df = DataFrame( + { + "jim": list("B" * 4 + "A" * 2 + "C" * 3), + "joe": list("abcdeabcd")[::-1], + "jolie": [10, 20, 30] * 3, + "joline": np.random.randint(0, 1000, 9), + } + ) + icol = ["jim", "joe", "jolie"] + left = df.set_index(icol).reindex(idx, level="joe") + right = df.iloc[indexer].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + def test_non_monotonic_reindex_methods(self): + dr = date_range("2013-08-01", periods=6, freq="B") + data = np.random.randn(6, 1) + df = DataFrame(data, index=dr, columns=list("A")) + df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) + # index is not monotonic increasing or decreasing + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="pad") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="ffill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="bfill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="nearest") + + def test_reindex_sparse(self): + # https://github.com/pandas-dev/pandas/issues/35286 + df = DataFrame( + {"A": [0, 1], "B": pd.array([0, 1], dtype=pd.SparseDtype("int64", 0))} + ) + result = df.reindex([0, 2]) + expected = DataFrame( + { + "A": [0.0, np.nan], + "B": pd.array([0.0, np.nan], dtype=pd.SparseDtype("float64", 0.0)), + }, + index=[0, 2], + ) + tm.assert_frame_equal(result, expected) + + def test_reindex(self, float_frame): + datetime_series = tm.makeTimeSeries(nper=30) + + newFrame = float_frame.reindex(datetime_series.index) + + for col in newFrame.columns: + for idx, val in newFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in newFrame.items(): + assert tm.equalContents(series.index, newFrame.index) + emptyFrame = float_frame.reindex(Index([])) + assert len(emptyFrame.index) == 0 + + # Cython code should be unit-tested directly + nonContigFrame = float_frame.reindex(datetime_series.index[::2]) + + for col in nonContigFrame.columns: + for idx, val in nonContigFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in nonContigFrame.items(): + assert tm.equalContents(series.index, nonContigFrame.index) + + # corner cases + + # Same index, copies values but not index if copy=False + newFrame = float_frame.reindex(float_frame.index, copy=False) + assert newFrame.index is float_frame.index + + # length zero + newFrame = float_frame.reindex([]) + assert newFrame.empty + assert len(newFrame.columns) == len(float_frame.columns) + + # length zero with columns reindexed with non-empty index + newFrame = float_frame.reindex([]) + newFrame = newFrame.reindex(float_frame.index) + assert len(newFrame.index) == len(float_frame.index) + assert len(newFrame.columns) == len(float_frame.columns) + + # pass non-Index + newFrame = float_frame.reindex(list(datetime_series.index)) + expected = datetime_series.index._with_freq(None) + tm.assert_index_equal(newFrame.index, expected) + + # copy with no axes + result = float_frame.reindex() + tm.assert_frame_equal(result, float_frame) + assert result is not float_frame + + def test_reindex_nan(self): + df = DataFrame( + [[1, 2], [3, 5], [7, 11], [9, 23]], + index=[2, np.nan, 1, 5], + columns=["joe", "jim"], + ) + + i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1] + tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + + df.index = df.index.astype("object") + tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False) + + # GH10388 + df = DataFrame( + { + "other": ["a", "b", np.nan, "c"], + "date": ["2015-03-22", np.nan, "2012-01-08", np.nan], + "amount": [2, 3, 4, 5], + } + ) + + df["date"] = pd.to_datetime(df.date) + df["delta"] = (pd.to_datetime("2015-06-18") - df["date"]).shift(1) + + left = df.set_index(["delta", "other", "date"]).reset_index() + right = df.reindex(columns=["delta", "other", "date", "amount"]) + tm.assert_frame_equal(left, right) + + def test_reindex_name_remains(self): + s = Series(np.random.rand(10)) + df = DataFrame(s, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + + df = df.reindex(i) + assert df.index.name == "iname" + + df = df.reindex(Index(np.arange(10), name="tmpname")) + assert df.index.name == "tmpname" + + s = Series(np.random.rand(10)) + df = DataFrame(s.T, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + df = df.reindex(columns=i) + assert df.columns.name == "iname" + + def test_reindex_int(self, int_frame): + smaller = int_frame.reindex(int_frame.index[::2]) + + assert smaller["A"].dtype == np.int64 + + bigger = smaller.reindex(int_frame.index) + assert bigger["A"].dtype == np.float64 + + smaller = int_frame.reindex(columns=["A", "B"]) + assert smaller["A"].dtype == np.int64 + + def test_reindex_columns(self, float_frame): + new_frame = float_frame.reindex(columns=["A", "B", "E"]) + + tm.assert_series_equal(new_frame["B"], float_frame["B"]) + assert np.isnan(new_frame["E"]).all() + assert "C" not in new_frame + + # Length zero + new_frame = float_frame.reindex(columns=[]) + assert new_frame.empty + + def test_reindex_columns_method(self): + + # GH 14992, reindexing over columns ignored method + df = DataFrame( + data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]], + index=[1, 2, 4], + columns=[1, 2, 4], + dtype=float, + ) + + # default method + result = df.reindex(columns=range(6)) + expected = DataFrame( + data=[ + [np.nan, 11, 12, np.nan, 13, np.nan], + [np.nan, 21, 22, np.nan, 23, np.nan], + [np.nan, 31, 32, np.nan, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='ffill' + result = df.reindex(columns=range(6), method="ffill") + expected = DataFrame( + data=[ + [np.nan, 11, 12, 12, 13, 13], + [np.nan, 21, 22, 22, 23, 23], + [np.nan, 31, 32, 32, 33, 33], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='bfill' + result = df.reindex(columns=range(6), method="bfill") + expected = DataFrame( + data=[ + [11, 11, 12, 13, 13, np.nan], + [21, 21, 22, 23, 23, np.nan], + [31, 31, 32, 33, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_reindex_axes(self): + # GH 3317, reindexing by both axes loses freq of the index + df = DataFrame( + np.ones((3, 3)), + index=[datetime(2012, 1, 1), datetime(2012, 1, 2), datetime(2012, 1, 3)], + columns=["a", "b", "c"], + ) + time_freq = date_range("2012-01-01", "2012-01-03", freq="d") + some_cols = ["a", "b"] + + index_freq = df.reindex(index=time_freq).index.freq + both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq + seq_freq = df.reindex(index=time_freq).reindex(columns=some_cols).index.freq + assert index_freq == both_freq + assert index_freq == seq_freq + + def test_reindex_fill_value(self): + df = DataFrame(np.random.randn(10, 4)) + + # axis=0 + result = df.reindex(list(range(15))) + assert np.isnan(result.values[-5:]).all() + + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + # axis=1 + result = df.reindex(columns=range(5), fill_value=0.0) + expected = df.copy() + expected[4] = 0.0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value=0) + expected = df.copy() + expected[4] = 0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value="foo") + expected = df.copy() + expected[4] = "foo" + tm.assert_frame_equal(result, expected) + + # other dtypes + df["foo"] = "foo" + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + def test_reindex_dups(self): + + # GH4746, reindex on duplicate index error messages + arr = np.random.randn(10) + df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]) + + # set index is ok + result = df.copy() + result.index = list(range(len(df))) + expected = DataFrame(arr, index=list(range(len(df)))) + tm.assert_frame_equal(result, expected) + + # reindex fails + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(index=list(range(len(df)))) + + def test_reindex_with_duplicate_columns(self): + + # reindex is invalid! + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar"]) + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar", "foo"]) + + def test_reindex_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame( + {"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, index=[0, 1, 3] + ) + result = df.reindex([0, 1, 3]) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis=0) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis="index") + tm.assert_frame_equal(result, expected) + + def test_reindex_positional_warns(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]}) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex([0, 1], ["A", "B", "C"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis=1) + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify all"): + df.reindex([0, 1], [0], ["A"]) + + # Mixing styles + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.reindex([0, 1], labels=[0, 1]) + + def test_reindex_single_named_indexer(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + result = df.reindex([0, 1], columns=["A"]) + expected = DataFrame({"A": [1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_reindex_api_equivalence(self): + # https://github.com/pandas-dev/pandas/issues/12392 + # equivalence of the labels/axis and index/columns API's + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.reindex(["b", "a"]) + res2 = df.reindex(index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"]) + res4 = df.reindex(labels=["b", "a"], axis=0) + res5 = df.reindex(["b", "a"], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=["e", "d"]) + res2 = df.reindex(["e", "d"], axis=1) + res3 = df.reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + with tm.assert_produces_warning(FutureWarning) as m: + res1 = df.reindex(["b", "a"], ["e", "d"]) + assert "reindex" in str(m[0].message) + res2 = df.reindex(columns=["e", "d"], index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + def test_reindex_boolean(self): + frame = DataFrame( + np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2] + ) + + reindexed = frame.reindex(np.arange(10)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[0][1]) + + reindexed = frame.reindex(columns=range(3)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[1]).all() + + def test_reindex_objects(self, float_string_frame): + reindexed = float_string_frame.reindex(columns=["foo", "A", "B"]) + assert "foo" in reindexed + + reindexed = float_string_frame.reindex(columns=["A", "B"]) + assert "foo" not in reindexed + + def test_reindex_corner(self, int_frame): + index = Index(["a", "b", "c"]) + dm = DataFrame({}).reindex(index=[1, 2, 3]) + reindexed = dm.reindex(columns=index) + tm.assert_index_equal(reindexed.columns, index) + + # ints are weird + smaller = int_frame.reindex(columns=["A", "B", "E"]) + assert smaller["E"].dtype == np.float64 + + def test_reindex_with_nans(self): + df = DataFrame( + [[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]], + columns=["a", "b"], + index=[100.0, 101.0, np.nan, 102.0, 103.0], + ) + + result = df.reindex(index=[101.0, 102.0, 103.0]) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[103.0]) + expected = df.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[101.0]) + expected = df.iloc[[1]] + tm.assert_frame_equal(result, expected) + + def test_reindex_multi(self): + df = DataFrame(np.random.randn(3, 3)) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(2), columns=range(2)) + expected = df.reindex(range(2)).reindex(columns=range(2)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randn(5, 3) + 1j, columns=["a", "b", "c"]) + + result = df.reindex(index=[0, 1], columns=["a", "b"]) + expected = df.reindex([0, 1]).reindex(columns=["a", "b"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_multi_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + df = DataFrame({"a": range(len(midx))}, index=midx) + df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]] + + result = df2.reindex(midx) + expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) + tm.assert_frame_equal(result, expected) + + def test_reindex_with_categoricalindex(self): + df = DataFrame( + { + "A": np.arange(3, dtype="int64"), + }, + index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"), + ) + + # reindexing + # convert to a regular index + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["d"]) + expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # since we are actually reindexing with a Categorical + # then return a Categorical + cats = list("cabe") + + result = df.reindex(Categorical(["a", "e"], categories=cats)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a"], categories=cats)) + expected = DataFrame( + {"A": [0], "B": Series(list("a")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # give back the type of categorical that we received + result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a", "d"], categories=["a", "d"])) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + df2 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), + ) + # passed duplicate indexers are not allowed + msg = "cannot reindex on an axis with duplicate labels" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df2.reindex(["a", "b"]) + + # args NotImplemented ATM + msg = r"argument {} is not implemented for CategoricalIndex\.reindex" + with pytest.raises(NotImplementedError, match=msg.format("method")): + df.reindex(["a"], method="ffill") + with pytest.raises(NotImplementedError, match=msg.format("level")): + df.reindex(["a"], level=1) + with pytest.raises(NotImplementedError, match=msg.format("limit")): + df.reindex(["a"], limit=2) + + def test_reindex_signature(self): + sig = inspect.signature(DataFrame.reindex) + parameters = set(sig.parameters) + assert parameters == { + "self", + "labels", + "index", + "columns", + "axis", + "limit", + "copy", + "level", + "method", + "fill_value", + "tolerance", + } + + def test_reindex_multiindex_ffill_added_rows(self): + # GH#23693 + # reindex added rows with nan values even when fill method was specified + mi = MultiIndex.from_tuples([("a", "b"), ("d", "e")]) + df = DataFrame([[0, 7], [3, 4]], index=mi, columns=["x", "y"]) + mi2 = MultiIndex.from_tuples([("a", "b"), ("d", "e"), ("h", "i")]) + result = df.reindex(mi2, axis=0, method="ffill") + expected = DataFrame([[0, 7], [3, 4], [3, 4]], index=mi2, columns=["x", "y"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "kwargs", + [ + {"method": "pad", "tolerance": timedelta(seconds=9)}, + {"method": "backfill", "tolerance": timedelta(seconds=9)}, + {"method": "nearest"}, + {"method": None}, + ], + ) + def test_reindex_empty_frame(self, kwargs): + # GH#27315 + idx = date_range(start="2020", freq="30s", periods=3) + df = DataFrame([], index=Index([], name="time"), columns=["a"]) + result = df.reindex(idx, **kwargs) + expected = DataFrame({"a": [pd.NA] * 3}, index=idx) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "src_idx", + [ + Index([]), + CategoricalIndex([]), + ], + ) + @pytest.mark.parametrize( + "cat_idx", + [ + # No duplicates + Index([]), + CategoricalIndex([]), + Index(["A", "B"]), + CategoricalIndex(["A", "B"]), + # Duplicates: GH#38906 + Index(["A", "A"]), + CategoricalIndex(["A", "A"]), + ], + ) + def test_reindex_empty(self, src_idx, cat_idx): + df = DataFrame(columns=src_idx, index=["K"], dtype="f8") + + result = df.reindex(columns=cat_idx) + expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) + def test_reindex_datetimelike_to_object(self, dtype): + # GH#39755 dont cast dt64/td64 to ints + mi = MultiIndex.from_product([list("ABCDE"), range(2)]) + + dti = date_range("2016-01-01", periods=10) + fv = np.timedelta64("NaT", "ns") + if dtype == "m8[ns]": + dti = dti - dti[0] + fv = np.datetime64("NaT", "ns") + + ser = Series(dti, index=mi) + ser[::3] = pd.NaT + + df = ser.unstack() + + index = df.index.append(Index([1])) + columns = df.columns.append(Index(["foo"])) + + res = df.reindex(index=index, columns=columns, fill_value=fv) + + expected = DataFrame( + { + 0: df[0].tolist() + [fv], + 1: df[1].tolist() + [fv], + "foo": np.array(["NaT"] * 6, dtype=fv.dtype), + }, + index=index, + ) + assert (res.dtypes[[0, 1]] == object).all() + assert res.iloc[0, 0] is pd.NaT + assert res.iloc[-1, 0] is fv + assert res.iloc[-1, 1] is fv + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize( + "index_df,index_res,index_exp", + [ + ( + CategoricalIndex([], categories=["A"]), + Index(["A"]), + Index(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + Index(["B"]), + Index(["B"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["A"]), + CategoricalIndex(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["B"]), + CategoricalIndex(["B"]), + ), + ], + ) + def test_reindex_not_category(self, index_df, index_res, index_exp): + # GH#28690 + df = DataFrame(index=index_df) + result = df.reindex(index=index_res) + expected = DataFrame(index=index_exp) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex_like.py b/pandas/tests/frame/methods/test_reindex_like.py new file mode 100644 index 00000000..ce68ec28 --- /dev/null +++ b/pandas/tests/frame/methods/test_reindex_like.py @@ -0,0 +1,39 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameReindexLike: + def test_reindex_like(self, float_frame): + other = float_frame.reindex(index=float_frame.index[:10], columns=["C", "B"]) + + tm.assert_frame_equal(other, float_frame.reindex_like(other)) + + @pytest.mark.parametrize( + "method,expected_values", + [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]), + ], + ) + def test_reindex_like_methods(self, method, expected_values): + df = DataFrame({"x": list(range(5))}) + + result = df.reindex_like(df, method=method, tolerance=0) + tm.assert_frame_equal(df, result) + result = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + tm.assert_frame_equal(df, result) + + def test_reindex_like_subclass(self): + # https://github.com/pandas-dev/pandas/issues/31925 + class MyDataFrame(DataFrame): + pass + + expected = DataFrame() + df = MyDataFrame() + result = df.reindex_like(expected) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py new file mode 100644 index 00000000..405518c3 --- /dev/null +++ b/pandas/tests/frame/methods/test_rename.py @@ -0,0 +1,430 @@ +from collections import ChainMap +import inspect + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + merge, +) +import pandas._testing as tm + + +class TestRename: + def test_rename_signature(self): + sig = inspect.signature(DataFrame.rename) + parameters = set(sig.parameters) + assert parameters == { + "self", + "mapper", + "index", + "columns", + "axis", + "inplace", + "copy", + "level", + "errors", + } + + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_rename_mi(self, klass): + obj = klass( + [11, 21, 31], + index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]), + ) + obj.rename(str.lower) + + def test_rename(self, float_frame): + mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) + + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal( + renamed2.rename(columns=str.upper), float_frame, check_names=False + ) + + # index + data = {"A": {"foo": 0, "bar": 1}} + + # gets sorted alphabetical + df = DataFrame(data) + renamed = df.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + + renamed = df.rename(index=str.upper) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + + # have to pass something + with pytest.raises(TypeError, match="must pass an index to rename"): + float_frame.rename() + + # partial columns + renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) + + # other axis + renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) + + # index with name + index = Index(["foo", "bar"], name="name") + renamer = DataFrame(data, index=index) + renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) + assert renamed.index.name == renamer.index.name + + @pytest.mark.parametrize( + "args,kwargs", + [ + ((ChainMap({"A": "a"}, {"B": "b"}),), {"axis": "columns"}), + ((), {"columns": ChainMap({"A": "a"}, {"B": "b"})}), + ], + ) + def test_rename_chainmap(self, args, kwargs): + # see gh-23859 + colAData = range(1, 11) + colBdata = np.random.randn(10) + + df = DataFrame({"A": colAData, "B": colBdata}) + result = df.rename(*args, **kwargs) + + expected = DataFrame({"a": colAData, "b": colBdata}) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex(self): + + tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] + tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] + index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) + columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename( + index={"foo1": "foo3", "bar2": "bar3"}, + columns={"fizz1": "fizz3", "buzz2": "buzz3"}, + ) + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] + ) + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples( + [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] + ) + renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) + tm.assert_index_equal(renamed.index, new_index) + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem copy/view + def test_rename_nocopy(self, float_frame, using_copy_on_write): + renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + + assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) + + # TODO(CoW) this also shouldn't warn in case of CoW, but the heuristic + # checking if the array shares memory doesn't work if CoW happened + with tm.assert_produces_warning( + DeprecationWarning if using_copy_on_write else None + ): + # This loc setitem already happens inplace, so no warning + # that this will change in the future + renamed.loc[:, "foo"] = 1.0 + if using_copy_on_write: + assert not (float_frame["C"] == 1.0).all() + else: + assert (float_frame["C"] == 1.0).all() + + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={"C": "foo"}) + assert "C" in float_frame + assert "foo" not in float_frame + + c_values = float_frame["C"] + float_frame = float_frame.copy() + return_value = float_frame.rename(columns={"C": "foo"}, inplace=True) + assert return_value is None + + assert "C" not in float_frame + assert "foo" in float_frame + # GH 44153 + # Used to be id(float_frame["foo"]) != c_id, but flaky in the CI + assert float_frame["foo"] is not c_values + + def test_rename_bug(self): + # GH 5344 + # rename set ref_locs, and set_index was not resetting + df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) + df = df.rename(columns={0: "a"}) + df = df.rename(columns={1: "b"}) + df = df.set_index(["a", "b"]) + df.columns = ["2001-01-01"] + expected = DataFrame( + [[1], [2]], + index=MultiIndex.from_tuples( + [("foo", "bah"), ("bar", "bas")], names=["a", "b"] + ), + columns=["2001-01-01"], + ) + tm.assert_frame_equal(df, expected) + + def test_rename_bug2(self): + # GH 19497 + # rename was changing Index to MultiIndex if Index contained tuples + + df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) + df = df.rename({(1, 1): (5, 4)}, axis="index") + expected = DataFrame( + data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] + ) + tm.assert_frame_equal(df, expected) + + def test_rename_errors_raises(self): + df = DataFrame(columns=["A", "B", "C", "D"]) + with pytest.raises(KeyError, match="'E'] not found in axis"): + df.rename(columns={"A": "a", "E": "e"}, errors="raise") + + @pytest.mark.parametrize( + "mapper, errors, expected_columns", + [ + ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), + ({"A": "a"}, "raise", ["a", "B", "C", "D"]), + (str.lower, "raise", ["a", "b", "c", "d"]), + ], + ) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=["A", "B", "C", "D"]) + result = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) + + assert "FOO" in renamed + assert "foo" not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + + result = df.rename(str.lower, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis="columns") + tm.assert_frame_equal(result, expected) + + # Index + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + result = df.rename(str.lower, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( + ["A", "B"] + ) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + tm.assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + result = df.rename(index=str.lower, columns=str.upper) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) + + # Named target and axis + over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=1) + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, axis="columns") + + # Too many targets + over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, columns=str.lower) + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.rename(id, mapper=id) + + def test_rename_positional_raises(self): + # GH 29136 + df = DataFrame(columns=["A", "B"]) + msg = r"rename\(\) takes from 1 to 2 positional arguments" + + with pytest.raises(TypeError, match=msg): + df.rename(None, str.lower) + + def test_rename_no_mappings_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "must pass an index to rename" + with pytest.raises(TypeError, match=msg): + df.rename() + + with pytest.raises(TypeError, match=msg): + df.rename(None, index=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None, index=None) + + def test_rename_mapper_and_positional_arguments_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=msg): + df.rename({}, index={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}, index={}) + + def test_rename_with_duplicate_columns(self): + # GH#4403 + df4 = DataFrame( + {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, + index=MultiIndex.from_tuples( + [(600809, 20130331)], names=["STK_ID", "RPT_Date"] + ), + ) + + df5 = DataFrame( + { + "RPT_Date": [20120930, 20121231, 20130331], + "STK_ID": [600809] * 3, + "STK_Name": ["饡驦", "饡驦", "饡驦"], + "TClose": [38.05, 41.66, 30.01], + }, + index=MultiIndex.from_tuples( + [(600809, 20120930), (600809, 20121231), (600809, 20130331)], + names=["STK_ID", "RPT_Date"], + ), + ) + # TODO: can we construct this without merge? + k = merge(df4, df5, how="inner", left_index=True, right_index=True) + result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) + str(result) + result.dtypes + + expected = DataFrame( + [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], + columns=[ + "RT", + "TClose", + "TExg", + "RPT_Date", + "STK_ID", + "STK_Name", + "QT_Close", + ], + ).set_index(["STK_ID", "RPT_Date"], drop=False) + tm.assert_frame_equal(result, expected) + + def test_rename_boolean_index(self): + df = DataFrame(np.arange(15).reshape(3, 5), columns=[False, True, 2, 3, 4]) + mapper = {0: "foo", 1: "bar", 2: "bah"} + res = df.rename(index=mapper) + exp = DataFrame( + np.arange(15).reshape(3, 5), + columns=[False, True, 2, 3, 4], + index=["foo", "bar", "bah"], + ) + tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/frame/methods/test_rename_axis.py b/pandas/tests/frame/methods/test_rename_axis.py new file mode 100644 index 00000000..dd4a77c6 --- /dev/null +++ b/pandas/tests/frame/methods/test_rename_axis.py @@ -0,0 +1,111 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestDataFrameRenameAxis: + def test_rename_axis_inplace(self, float_frame): + # GH#15704 + expected = float_frame.rename_axis("foo") + result = float_frame.copy() + return_value = no_return = result.rename_axis("foo", inplace=True) + assert return_value is None + + assert no_return is None + tm.assert_frame_equal(result, expected) + + expected = float_frame.rename_axis("bar", axis=1) + result = float_frame.copy() + return_value = no_return = result.rename_axis("bar", axis=1, inplace=True) + assert return_value is None + + assert no_return is None + tm.assert_frame_equal(result, expected) + + def test_rename_axis_raises(self): + # GH#17833 + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis({0: 10, 1: 20}, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=1) + + with pytest.raises(ValueError, match="Use `.rename`"): + df["A"].rename_axis(id) + + def test_rename_axis_mapper(self): + # GH#19978 + mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) + df = DataFrame( + {"x": list(range(len(mi))), "y": [i * 10 for i in range(len(mi))]}, index=mi + ) + + # Test for rename of the Index object of columns + result = df.rename_axis("cols", axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="cols")) + + # Test for rename of the Index object of columns using dict + result = result.rename_axis(columns={"cols": "new"}, axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="new")) + + # Test for renaming index using dict + result = df.rename_axis(index={"ll": "foo"}) + assert result.index.names == ["foo", "nn"] + + # Test for renaming index using a function + result = df.rename_axis(index=str.upper, axis=0) + assert result.index.names == ["LL", "NN"] + + # Test for renaming index providing complete list + result = df.rename_axis(index=["foo", "goo"]) + assert result.index.names == ["foo", "goo"] + + # Test for changing index and columns at same time + sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) + result = sdf.rename_axis(index="foo", columns="meh") + assert result.index.name == "foo" + assert result.columns.name == "meh" + + # Test different error cases + with pytest.raises(TypeError, match="Must pass"): + df.rename_axis(index="wrong") + + with pytest.raises(ValueError, match="Length of names"): + df.rename_axis(index=["wrong"]) + + with pytest.raises(TypeError, match="bogus"): + df.rename_axis(bogus=None) + + @pytest.mark.parametrize( + "kwargs, rename_index, rename_columns", + [ + ({"mapper": None, "axis": 0}, True, False), + ({"mapper": None, "axis": 1}, False, True), + ({"index": None}, True, False), + ({"columns": None}, False, True), + ({"index": None, "columns": None}, True, True), + ({}, False, False), + ], + ) + def test_rename_axis_none(self, kwargs, rename_index, rename_columns): + # GH 25034 + index = Index(list("abc"), name="foo") + columns = Index(["col1", "col2"], name="bar") + data = np.arange(6).reshape(3, 2) + df = DataFrame(data, index, columns) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if rename_index else index + expected_columns = columns.rename(None) if rename_columns else columns + expected = DataFrame(data, expected_index, expected_columns) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reorder_levels.py b/pandas/tests/frame/methods/test_reorder_levels.py new file mode 100644 index 00000000..9080bdbe --- /dev/null +++ b/pandas/tests/frame/methods/test_reorder_levels.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + + +class TestReorderLevels: + def test_reorder_levels(self, frame_or_series): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + names=["L0", "L1", "L2"], + ) + df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) + obj = tm.get_obj(df, frame_or_series) + + # no change, position + result = obj.reorder_levels([0, 1, 2]) + tm.assert_equal(obj, result) + + # no change, labels + result = obj.reorder_levels(["L0", "L1", "L2"]) + tm.assert_equal(obj, result) + + # rotate, position + result = obj.reorder_levels([1, 2, 0]) + e_idx = MultiIndex( + levels=[["one", "two", "three"], [0, 1], ["bar"]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], + names=["L1", "L2", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = tm.get_obj(expected, frame_or_series) + tm.assert_equal(result, expected) + + result = obj.reorder_levels([0, 0, 0]) + e_idx = MultiIndex( + levels=[["bar"], ["bar"], ["bar"]], + codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], + names=["L0", "L0", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = tm.get_obj(expected, frame_or_series) + tm.assert_equal(result, expected) + + result = obj.reorder_levels(["L0", "L0", "L0"]) + tm.assert_equal(result, expected) + + def test_reorder_levels_swaplevel_equivalence( + self, multiindex_year_month_day_dataframe_random_data + ): + + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.reorder_levels(["month", "day", "year"]) + expected = ymd.swaplevel(0, 1).swaplevel(1, 2) + tm.assert_frame_equal(result, expected) + + result = ymd["A"].reorder_levels(["month", "day", "year"]) + expected = ymd["A"].swaplevel(0, 1).swaplevel(1, 2) + tm.assert_series_equal(result, expected) + + result = ymd.T.reorder_levels(["month", "day", "year"], axis=1) + expected = ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError, match="hierarchical axis"): + ymd.reorder_levels([1, 2], axis=1) + + with pytest.raises(IndexError, match="Too many levels"): + ymd.index.reorder_levels([1, 2, 3]) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py new file mode 100644 index 00000000..f4de6856 --- /dev/null +++ b/pandas/tests/frame/methods/test_replace.py @@ -0,0 +1,1587 @@ +from __future__ import annotations + +from datetime import datetime +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def mix_ab() -> dict[str, list[int | str]]: + return {"a": list(range(4)), "b": list("ab..")} + + +@pytest.fixture +def mix_abc() -> dict[str, list[float | str]]: + return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + + +class TestDataFrameReplace: + def test_replace_inplace(self, datetime_frame, float_string_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tsframe = datetime_frame.copy() + return_value = tsframe.replace(np.nan, 0, inplace=True) + assert return_value is None + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + # mixed type + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, 0) + expected = float_string_frame.fillna(value=0) + tm.assert_frame_equal(result, expected) + + tsframe = datetime_frame.copy() + return_value = tsframe.replace([np.nan], [0], inplace=True) + assert return_value is None + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + @pytest.mark.parametrize( + "to_replace,values,expected", + [ + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + ( + [r"\s*\.\s*", r"e|f|g"], + [np.nan, "crap"], + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap"] * 3 + ["h"], + "c": ["h", "crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + ( + [r"\s*(\.)\s*", r"(e|f|g)"], + [r"\1\1", r"\1_crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["e_crap", "f_crap", "g_crap", "h"], + "c": ["h", "e_crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + ( + [r"\s*(\.)\s*", r"e"], + [r"\1\1", r"crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + }, + ), + ], + ) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("use_value_regex_args", [True, False]) + def test_regex_replace_list_obj( + self, to_replace, values, expected, inplace, use_value_regex_args + ): + df = DataFrame({"a": list("ab.."), "b": list("efgh"), "c": list("helo")}) + + if use_value_regex_args: + result = df.replace(value=values, regex=to_replace, inplace=inplace) + else: + result = df.replace(to_replace, values, regex=True, inplace=inplace) + + if inplace: + assert result is None + result = df + + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_list_mixed(self, mix_ab): + # mixed frame to make sure this doesn't break things + dfmix = DataFrame(mix_ab) + + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + mix2 = {"a": list(range(4)), "b": list("ab.."), "c": list("halo")} + dfmix2 = DataFrame(mix2) + res = dfmix2.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": mix2["a"], + "b": ["crap", "b", np.nan, np.nan], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(regex=to_replace_res, value=values) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_mixed_inplace(self, mix_ab): + dfmix = DataFrame(mix_ab) + # the same inplace + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(to_replace_res, values, inplace=True, regex=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + return_value = res.replace(regex=to_replace_res, value=values, inplace=True) + assert return_value is None + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_dict_mixed(self, mix_abc): + dfmix = DataFrame(mix_abc) + + # dicts + # single dict {re1: v1}, search the whole frame + # need test for this... + + # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole + # frame + res = dfmix.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace( + {"b": r"\s*\.\s*"}, {"b": np.nan}, inplace=True, regex=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the + # whole frame + res = dfmix.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace( + {"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, inplace=True, regex=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}) + res2 = dfmix.copy() + return_value = res2.replace( + regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}, inplace=True + ) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # scalar -> dict + # to_replace regex, {value: value} + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace("a", {"b": np.nan}, regex=True, inplace=True) + assert return_value is None + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + return_value = res2.replace(regex="a", value={"b": np.nan}, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + def test_regex_replace_dict_nested(self, mix_abc): + # nested dicts will not work until this is implemented for Series + dfmix = DataFrame(mix_abc) + res = dfmix.replace({"b": {r"\s*\.\s*": np.nan}}, regex=True) + res2 = dfmix.copy() + res4 = dfmix.copy() + return_value = res2.replace( + {"b": {r"\s*\.\s*": np.nan}}, inplace=True, regex=True + ) + assert return_value is None + res3 = dfmix.replace(regex={"b": {r"\s*\.\s*": np.nan}}) + return_value = res4.replace(regex={"b": {r"\s*\.\s*": np.nan}}, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + tm.assert_frame_equal(res4, expec) + + def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype): + # GH 25259 + dtype = any_string_dtype + df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype) + expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype) + result = df.replace({"a": "."}, regex=True) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_dict_nested_gh4115(self): + df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) + expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) + result = df.replace({"Type": {"Q": 0, "T": 1}}) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_list_to_scalar(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame( + { + "a": mix_abc["a"], + "b": np.array([np.nan] * 4), + "c": [np.nan, np.nan, np.nan, "d"], + } + ) + res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True) + res2 = df.copy() + res3 = df.copy() + return_value = res2.replace( + [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True + ) + assert return_value is None + return_value = res3.replace( + regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_str_to_numeric(self, mix_abc): + # what happens when you try to replace a numeric value with a regex? + df = DataFrame(mix_abc) + res = df.replace(r"\s*\.\s*", 0, regex=True) + res2 = df.copy() + return_value = res2.replace(r"\s*\.\s*", 0, inplace=True, regex=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True) + assert return_value is None + expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]}) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_regex_list_to_numeric(self, mix_abc): + df = DataFrame(mix_abc) + res = df.replace([r"\s*\.\s*", "b"], 0, regex=True) + res2 = df.copy() + return_value = res2.replace([r"\s*\.\s*", "b"], 0, regex=True, inplace=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=[r"\s*\.\s*", "b"], value=0, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_series_of_regexes(self, mix_abc): + df = DataFrame(mix_abc) + s1 = Series({"b": r"\s*\.\s*"}) + s2 = Series({"b": np.nan}) + res = df.replace(s1, s2, regex=True) + res2 = df.copy() + return_value = res2.replace(s1, s2, inplace=True, regex=True) + assert return_value is None + res3 = df.copy() + return_value = res3.replace(regex=s1, value=s2, inplace=True) + assert return_value is None + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_numeric_to_object_conversion(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]}) + res = df.replace(0, "a") + tm.assert_frame_equal(res, expec) + assert res.a.dtype == np.object_ + + @pytest.mark.parametrize( + "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}] + ) + def test_joint_simple_replace_and_regex_replace(self, to_replace): + # GH-39338 + df = DataFrame( + { + "col1": ["1,000", "a", "3"], + "col2": ["a", "", "b"], + "col3": ["a", "b", "c"], + } + ) + result = df.replace(regex=to_replace) + expected = DataFrame( + { + "col1": ["1000", "a", "3"], + "col2": ["a", np.nan, "b"], + "col3": ["a", "b", "c"], + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) + def test_replace_regex_metachar(self, metachar): + df = DataFrame({"a": [metachar, "else"]}) + result = df.replace({"a": {metachar: "paren"}}) + expected = DataFrame({"a": ["paren", "else"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "data,to_replace,expected", + [ + (["xax", "xbx"], {"a": "c", "b": "d"}, ["xcx", "xdx"]), + (["d", "", ""], {r"^\s*$": pd.NA}, ["d", pd.NA, pd.NA]), + ], + ) + def test_regex_replace_string_types( + self, data, to_replace, expected, frame_or_series, any_string_dtype + ): + # GH-41333, GH-35977 + dtype = any_string_dtype + obj = frame_or_series(data, dtype=dtype) + result = obj.replace(to_replace, regex=True) + expected = frame_or_series(expected, dtype=dtype) + + tm.assert_equal(result, expected) + + def test_replace(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + zero_filled = datetime_frame.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) + + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + datetime_frame["B"][:5] = -1e8 + + # empty + df = DataFrame(index=["a", "b"]) + tm.assert_frame_equal(df, df.replace(5, 7)) + + # GH 11698 + # test for mixed data types. + df = DataFrame( + [("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + df1 = df.replace("-", np.nan) + expected_df = DataFrame( + [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + tm.assert_frame_equal(df1, expected_df) + + def test_replace_list(self): + obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} + dfobj = DataFrame(obj) + + # lists of regexes and values + # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN] + to_replace_res = [r".", r"e"] + values = [np.nan, "crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [v1, v2, ..., vN] -> [v1, v2, .., vN] + to_replace_res = [r".", r"f"] + values = [r"..", r"crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["e", "crap", "g", "h"], + "c": ["h", "e", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + def test_replace_with_empty_list(self, frame_or_series): + # GH 21977 + ser = Series([["a", "b"], [], np.nan, [1]]) + obj = DataFrame({"col": ser}) + obj = tm.get_obj(obj, frame_or_series) + expected = obj + result = obj.replace([], np.nan) + tm.assert_equal(result, expected) + + # GH 19266 + msg = ( + "NumPy boolean array indexing assignment cannot assign {size} " + "input values to the 1 output values where the mask is true" + ) + with pytest.raises(ValueError, match=msg.format(size=0)): + obj.replace({np.nan: []}) + with pytest.raises(ValueError, match=msg.format(size=2)): + obj.replace({np.nan: ["dummy", "alt"]}) + + def test_replace_series_dict(self): + # from GH 3064 + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + result = df.replace(0, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, df.mean()) + tm.assert_frame_equal(result, expected) + + # series to series/dict + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + s = Series({"zero": 0.0, "one": 2.0}) + result = df.replace(s, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(s, df.mean()) + tm.assert_frame_equal(result, expected) + + def test_replace_convert(self): + # gh 3907 + df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]]) + m = {"foo": 1, "bar": 2, "bah": 3} + rep = df.replace(m) + expec = Series([np.int64] * 3) + res = rep.dtypes + tm.assert_series_equal(expec, res) + + def test_replace_mixed(self, float_string_frame): + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, -18) + expected = float_string_frame.fillna(value=-18) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame) + + result = float_string_frame.replace(np.nan, -1e8) + expected = float_string_frame.fillna(value=-1e8) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + + def test_replace_mixed_int_block_upcasting(self): + + # int block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + return_value = df.replace(0, 0.5, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + def test_replace_mixed_int_block_splitting(self): + + # int block splitting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + "C": Series([1, 2], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + "C": Series([1, 2], dtype="int64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + def test_replace_mixed2(self): + + # to object block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1, "foo"], dtype="object"), + "B": Series([0, 1], dtype="int64"), + } + ) + result = df.replace(2, "foo") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "A": Series(["foo", "bar"], dtype="object"), + "B": Series([0, "foo"], dtype="object"), + } + ) + result = df.replace([1, 2], ["foo", "bar"]) + tm.assert_frame_equal(result, expected) + + def test_replace_mixed3(self): + # test case from + df = DataFrame( + {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")} + ) + result = df.replace(3, df.mean().to_dict()) + expected = df.copy().astype("float64") + m = df.mean() + expected.iloc[0, 0] = m[0] + expected.iloc[1, 1] = m[1] + tm.assert_frame_equal(result, expected) + + def test_replace_nullable_int_with_string_doesnt_cast(self): + # GH#25438 don't cast df['a'] to float64 + df = DataFrame({"a": [1, 2, 3, np.nan], "b": ["some", "strings", "here", "he"]}) + df["a"] = df["a"].astype("Int64") + + res = df.replace("", np.nan) + tm.assert_series_equal(res["a"], df["a"]) + + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"]) + def test_replace_with_nullable_column(self, dtype): + # GH-44499 + nullable_ser = Series([1, 0, 1], dtype=dtype) + df = DataFrame({"A": ["A", "B", "x"], "B": nullable_ser}) + result = df.replace("x", "X") + expected = DataFrame({"A": ["A", "B", "X"], "B": nullable_ser}) + tm.assert_frame_equal(result, expected) + + def test_replace_simple_nested_dict(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({"col": {1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + # in this case, should be the same as the not nested version + result = df.replace({1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + def test_replace_simple_nested_dict_with_nonexistent_value(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({-1: "-", 1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + def test_replace_NA_with_None(self): + # gh-45601 + df = DataFrame({"value": [42, None]}).astype({"value": "Int64"}) + result = df.replace({pd.NA: None}) + expected = DataFrame({"value": [42, None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + def test_replace_NAT_with_None(self): + # gh-45836 + df = DataFrame([pd.NaT, pd.NaT]) + result = df.replace({pd.NaT: None, np.NaN: None}) + expected = DataFrame([None, None]) + tm.assert_frame_equal(result, expected) + + def test_replace_with_None_keeps_categorical(self): + # gh-46634 + cat_series = Series(["b", "b", "b", "d"], dtype="category") + df = DataFrame( + { + "id": Series([5, 4, 3, 2], dtype="float64"), + "col": cat_series, + } + ) + result = df.replace({3: None}) + + expected = DataFrame( + { + "id": Series([5.0, 4.0, None, 2.0], dtype="object"), + "col": cat_series, + } + ) + tm.assert_frame_equal(result, expected) + + def test_replace_value_is_none(self, datetime_frame): + orig_value = datetime_frame.iloc[0, 0] + orig2 = datetime_frame.iloc[1, 0] + + datetime_frame.iloc[0, 0] = np.nan + datetime_frame.iloc[1, 0] = 1 + + result = datetime_frame.replace(to_replace={np.nan: 0}) + expected = datetime_frame.T.replace(to_replace={np.nan: 0}).T + tm.assert_frame_equal(result, expected) + + result = datetime_frame.replace(to_replace={np.nan: 0, 1: -1e8}) + tsframe = datetime_frame.copy() + tsframe.iloc[0, 0] = 0 + tsframe.iloc[1, 0] = -1e8 + expected = tsframe + tm.assert_frame_equal(expected, result) + datetime_frame.iloc[0, 0] = orig_value + datetime_frame.iloc[1, 0] = orig2 + + def test_replace_for_new_dtypes(self, datetime_frame): + + # dtypes + tsframe = datetime_frame.copy().astype(np.float32) + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + + zero_filled = tsframe.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) + + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + tsframe["B"][:5] = -1e8 + + b = tsframe["B"] + b[b == -1e8] = np.nan + tsframe["B"] = b + result = tsframe.fillna(method="bfill") + tm.assert_frame_equal(result, tsframe.fillna(method="bfill")) + + @pytest.mark.parametrize( + "frame, to_replace, value, expected", + [ + (DataFrame({"ints": [1, 2, 3]}), 1, 0, DataFrame({"ints": [0, 2, 3]})), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int32), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int32), + ), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int16), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int16), + ), + ( + DataFrame({"bools": [True, False, True]}), + False, + True, + DataFrame({"bools": [True, True, True]}), + ), + ( + DataFrame({"complex": [1j, 2j, 3j]}), + 1j, + 0, + DataFrame({"complex": [0j, 2j, 3j]}), + ), + ( + DataFrame( + { + "datetime64": Index( + [ + datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28), + ] + ) + } + ), + datetime(2018, 5, 28), + datetime(2018, 7, 28), + DataFrame({"datetime64": Index([datetime(2018, 7, 28)] * 3)}), + ), + # GH 20380 + ( + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["foo"]}), + "foo", + "bar", + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["bar"]}), + ), + # GH 36782 + ( + DataFrame({"dt": [datetime(2920, 10, 1)]}), + datetime(2920, 10, 1), + datetime(2020, 10, 1), + DataFrame({"dt": [datetime(2020, 10, 1)]}), + ), + ( + DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ), + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ), + ), + # GH 35376 + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ], + ) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, "replace")(to_replace, value) + tm.assert_frame_equal(result, expected) + + def test_replace_input_formats_listlike(self): + # both dicts + to_rep = {"A": np.nan, "B": 0, "C": ""} + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(to_rep, values) + expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + result = df.replace([0, 2, 5], [5, 2, 0]) + expected = DataFrame( + {"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]} + ) + tm.assert_frame_equal(result, expected) + + # scalar to dict + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.nan], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(np.nan, values) + expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + # list to list + to_rep = [np.nan, 0, ""] + values = [-2, -1, "missing"] + result = df.replace(to_rep, values) + expected = df.copy() + for i in range(len(to_rep)): + return_value = expected.replace(to_rep[i], values[i], inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + df.replace(to_rep, values[1:]) + + def test_replace_input_formats_scalar(self): + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + + # dict to scalar + to_rep = {"A": np.nan, "B": 0, "C": ""} + filled = df.replace(to_rep, 0) + expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + msg = "value argument must be scalar, dict, or Series" + with pytest.raises(TypeError, match=msg): + df.replace(to_rep, [np.nan, 0, ""]) + + # list to scalar + to_rep = [np.nan, 0, ""] + result = df.replace(to_rep, -1) + expected = df.copy() + for i in range(len(to_rep)): + return_value = expected.replace(to_rep[i], -1, inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) + + def test_replace_limit(self): + # TODO + pass + + def test_replace_dict_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_series_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = Series( + { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + ) + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_dict_tuple_list_ordering_remains_the_same(self): + df = DataFrame({"A": [np.nan, 1]}) + res1 = df.replace(to_replace={np.nan: 0, 1: -1e8}) + res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0]) + res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0]) + + expected = DataFrame({"A": [0, -1e8]}) + tm.assert_frame_equal(res1, res2) + tm.assert_frame_equal(res2, res3) + tm.assert_frame_equal(res3, expected) + + def test_replace_doesnt_replace_without_regex(self): + df = DataFrame( + { + "fol": [1, 2, 2, 3], + "T_opp": ["0", "vr", "0", "0"], + "T_Dir": ["0", "0", "0", "bt"], + "T_Enh": ["vo", "0", "0", "0"], + } + ) + res = df.replace({r"\D": 1}) + tm.assert_frame_equal(df, res) + + def test_replace_bool_with_string(self): + df = DataFrame({"a": [True, False], "b": list("ab")}) + result = df.replace(True, "a") + expected = DataFrame({"a": ["a", False], "b": df.b}) + tm.assert_frame_equal(result, expected) + + def test_replace_pure_bool_with_string_no_op(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace("asdf", "fdsa") + tm.assert_frame_equal(df, result) + + def test_replace_bool_with_bool(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace(False, True) + expected = DataFrame(np.ones((2, 2), dtype=bool)) + tm.assert_frame_equal(result, expected) + + def test_replace_with_dict_with_bool_keys(self): + df = DataFrame({0: [True, False], 1: [False, True]}) + result = df.replace({"asdf": "asdb", True: "yes"}) + expected = DataFrame({0: ["yes", False], 1: [False, "yes"]}) + tm.assert_frame_equal(result, expected) + + def test_replace_dict_strings_vs_ints(self): + # GH#34789 + df = DataFrame({"Y0": [1, 2], "Y1": [3, 4]}) + result = df.replace({"replace_string": "test"}) + + tm.assert_frame_equal(result, df) + + result = df["Y0"].replace({"replace_string": "test"}) + tm.assert_series_equal(result, df["Y0"]) + + def test_replace_truthy(self): + df = DataFrame({"a": [True, True]}) + r = df.replace([np.inf, -np.inf], np.nan) + e = df + tm.assert_frame_equal(r, e) + + def test_nested_dict_overlapping_keys_replace_int(self): + # GH 27660 keep behaviour consistent for simple dictionary and + # nested dictionary replacement + df = DataFrame({"a": list(range(1, 5))}) + + result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + expected = df.replace(dict(zip(range(1, 5), range(2, 6)))) + tm.assert_frame_equal(result, expected) + + def test_nested_dict_overlapping_keys_replace_str(self): + # GH 27660 + a = np.arange(1, 5) + astr = a.astype(str) + bstr = np.arange(2, 6).astype(str) + df = DataFrame({"a": astr}) + result = df.replace(dict(zip(astr, bstr))) + expected = df.replace({"a": dict(zip(astr, bstr))}) + tm.assert_frame_equal(result, expected) + + def test_replace_swapping_bug(self): + df = DataFrame({"a": [True, False, True]}) + res = df.replace({"a": {True: "Y", False: "N"}}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + df = DataFrame({"a": [0, 1, 0]}) + res = df.replace({"a": {0: "Y", 1: "N"}}) + expect = DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + def test_replace_period(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": pd.Period(year=2011, month=8, freq="M"), + "out_augmented_JAN_2011.json": pd.Period(year=2011, month=1, freq="M"), + "out_augmented_MAY_2012.json": pd.Period(year=2012, month=5, freq="M"), + "out_augmented_SUBSIDY_WEEK.json": pd.Period( + year=2011, month=4, freq="M" + ), + "out_augmented_AUG_2012.json": pd.Period(year=2012, month=8, freq="M"), + "out_augmented_MAY_2011.json": pd.Period(year=2011, month=5, freq="M"), + "out_augmented_SEP_2013.json": pd.Period(year=2013, month=9, freq="M"), + } + } + + df = DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + + expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) + assert expected.dtypes[0] == "Period[M]" + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetime(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": Timestamp("2011-08"), + "out_augmented_JAN_2011.json": Timestamp("2011-01"), + "out_augmented_MAY_2012.json": Timestamp("2012-05"), + "out_augmented_SUBSIDY_WEEK.json": Timestamp("2011-04"), + "out_augmented_AUG_2012.json": Timestamp("2012-08"), + "out_augmented_MAY_2011.json": Timestamp("2011-05"), + "out_augmented_SEP_2013.json": Timestamp("2013-09"), + } + } + + df = DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetimetz(self): + + # GH 11326 + # behaving poorly when presented with a datetime64[ns, tz] + df = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ) + result = df.replace(np.nan, 1) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": Series([0, 1, 2], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + + result = df.fillna(1) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, np.nan) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [np.nan, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.replace( + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + ) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) + tm.assert_frame_equal(result, expected) + + # coerce to object + result = df.copy() + result.iloc[1, 0] = np.nan + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = result.replace( + {"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific") + ) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Pacific"), + # once deprecation is enforced + # Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": np.nan}, Timestamp("20130104")) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + def test_replace_with_empty_dictlike(self, mix_abc): + # GH 15289 + df = DataFrame(mix_abc) + tm.assert_frame_equal(df, df.replace({})) + tm.assert_frame_equal(df, df.replace(Series([], dtype=object))) + + tm.assert_frame_equal(df, df.replace({"b": {}})) + tm.assert_frame_equal(df, df.replace(Series({"b": {}}))) + + @pytest.mark.parametrize( + "to_replace, method, expected", + [ + (0, "bfill", {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + np.nan, + "bfill", + {"A": [0, 1, 2], "B": [5.0, 7.0, 7.0], "C": ["a", "b", "c"]}, + ), + ("d", "ffill", {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + [0, 2], + "bfill", + {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + [1, 2], + "pad", + {"A": [0, 0, 0], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + (1, 2), + "bfill", + {"A": [0, 2, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + ["b", "c"], + "ffill", + {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "a", "a"]}, + ), + ], + ) + def test_replace_method(self, to_replace, method, expected): + # GH 19632 + df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}) + + result = df.replace(to_replace=to_replace, value=None, method=method) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replace_dict, final_data", + [({"a": 1, "b": 1}, [[3, 3], [2, 2]]), ({"a": 1, "b": 2}, [[3, 1], [2, 3]])], + ) + def test_categorical_replace_with_dict(self, replace_dict, final_data): + # GH 26988 + df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category") + + final_data = np.array(final_data) + + a = pd.Categorical(final_data[:, 0], categories=[3, 2]) + + ex_cat = [3, 2] if replace_dict["b"] == 1 else [1, 3] + b = pd.Categorical(final_data[:, 1], categories=ex_cat) + + expected = DataFrame({"a": a, "b": b}) + result = df.replace(replace_dict, 3) + tm.assert_frame_equal(result, expected) + msg = ( + r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are " + "different" + ) + with pytest.raises(AssertionError, match=msg): + # ensure non-inplace call does not affect original + tm.assert_frame_equal(df, expected) + return_value = df.replace(replace_dict, 3, inplace=True) + assert return_value is None + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "df, to_replace, exp", + [ + ( + {"col1": [1, 2, 3], "col2": [4, 5, 6]}, + {4: 5, 5: 6, 6: 7}, + {"col1": [1, 2, 3], "col2": [5, 6, 7]}, + ), + ( + {"col1": [1, 2, 3], "col2": ["4", "5", "6"]}, + {"4": "5", "5": "6", "6": "7"}, + {"col1": [1, 2, 3], "col2": ["5", "6", "7"]}, + ), + ], + ) + def test_replace_commutative(self, df, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + # also added to data frame whilst issue was for series + + df = DataFrame(df) + + expected = DataFrame(exp) + result = df.replace(to_replace) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replacer", + [ + Timestamp("20170827"), + np.int8(1), + np.int16(1), + np.float32(1), + np.float64(1), + ], + ) + def test_replace_replacer_dtype(self, request, replacer): + # GH26632 + df = DataFrame(["a"]) + result = df.replace({"a": replacer, "b": replacer}) + expected = DataFrame([replacer]) + tm.assert_frame_equal(result, expected) + + def test_replace_after_convert_dtypes(self): + # GH31517 + df = DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64") + result = df.replace(1, 10) + expected = DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + df = DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + df.replace(lambda x: x.strip()) + + @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) + @pytest.mark.parametrize("value", [np.nan, pd.NA]) + def test_replace_no_replacement_dtypes(self, dtype, value): + # https://github.com/pandas-dev/pandas/issues/32988 + df = DataFrame(np.eye(2), dtype=dtype) + result = df.replace(to_replace=[None, -np.inf, np.inf], value=value) + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("replacement", [np.nan, 5]) + def test_replace_with_duplicate_columns(self, replacement): + # GH 24798 + result = DataFrame({"A": [1, 2, 3], "A1": [4, 5, 6], "B": [7, 8, 9]}) + result.columns = list("AAB") + + expected = DataFrame( + {"A": [1, 2, 3], "A1": [4, 5, 6], "B": [replacement, 8, 9]} + ) + expected.columns = list("AAB") + + result["B"] = result["B"].replace(7, replacement) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_ea_ignore_float(self, frame_or_series, value): + # GH#34871 + obj = DataFrame({"Per": [value] * 3}) + obj = tm.get_obj(obj, frame_or_series) + + expected = obj.copy() + result = obj.replace(1.0, 0.0) + tm.assert_equal(expected, result) + + def test_replace_value_category_type(self): + """ + Test for #23305: to ensure category dtypes are maintained + after replace with direct values + """ + + # create input data + input_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "cat2", "cat3", "cat4"], + "col5": ["obj1", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + input_df = DataFrame(data=input_dict).astype( + {"col2": "category", "col4": "category"} + ) + input_df["col2"] = input_df["col2"].cat.reorder_categories( + ["a", "b", "c", "d"], ordered=True + ) + input_df["col4"] = input_df["col4"].cat.reorder_categories( + ["cat1", "cat2", "cat3", "cat4"], ordered=True + ) + + # create expected dataframe + expected_dict = { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "z"], + "col3": [1.5, 2.5, 3.5, 4.5], + "col4": ["cat1", "catX", "cat3", "cat4"], + "col5": ["obj9", "obj2", "obj3", "obj4"], + } + # explicitly cast columns as category and order them + expected = DataFrame(data=expected_dict).astype( + {"col2": "category", "col4": "category"} + ) + expected["col2"] = expected["col2"].cat.reorder_categories( + ["a", "b", "c", "z"], ordered=True + ) + expected["col4"] = expected["col4"].cat.reorder_categories( + ["cat1", "catX", "cat3", "cat4"], ordered=True + ) + + # replace values in input dataframe + input_df = input_df.replace("d", "z") + input_df = input_df.replace("obj1", "obj9") + result = input_df.replace("cat2", "catX") + + tm.assert_frame_equal(result, expected) + + def test_replace_dict_category_type(self): + """ + Test to ensure category dtypes are maintained + after replace with dict values + """ + # GH#35268, GH#44940 + + # create input dataframe + input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]} + # explicitly cast columns as category + input_df = DataFrame(data=input_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # create expected dataframe + expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} + # explicitly cast columns as category + expected = DataFrame(data=expected_dict).astype( + {"col1": "category", "col2": "category", "col3": "category"} + ) + + # replace values in input dataframe using a dict + result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) + + tm.assert_frame_equal(result, expected) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + df = DataFrame(["a", "b", "c"]) + regex = re.compile("^a$") + result = df.replace({regex: "z"}, regex=True) + expected = DataFrame(["z", "b", "c"]) + tm.assert_frame_equal(result, expected) + + def test_replace_intervals(self): + # https://github.com/pandas-dev/pandas/issues/35931 + df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) + result = df.replace({"a": {pd.Interval(0, 1): "x"}}) + expected = DataFrame({"a": ["x", "x"]}) + tm.assert_frame_equal(result, expected) + + def test_replace_unicode(self): + # GH: 16784 + columns_values_map = {"positive": {"正面": 1, "中立": 1, "负面": 0}} + df1 = DataFrame({"positive": np.ones(3)}) + result = df1.replace(columns_values_map) + expected = DataFrame({"positive": np.ones(3)}) + tm.assert_frame_equal(result, expected) + + def test_replace_bytes(self, frame_or_series): + # GH#38900 + obj = frame_or_series(["o"]).astype("|S") + expected = obj.copy() + obj = obj.replace({None: np.nan}) + tm.assert_equal(obj, expected) + + @pytest.mark.parametrize( + "data, to_replace, value, expected", + [ + ([1], [1.0], [0], [0]), + ([1], [1], [0], [0]), + ([1.0], [1.0], [0], [0.0]), + ([1.0], [1], [0], [0.0]), + ], + ) + @pytest.mark.parametrize("box", [list, tuple, np.array]) + def test_replace_list_with_mixed_type( + self, data, to_replace, value, expected, box, frame_or_series + ): + # GH#40371 + obj = frame_or_series(data) + expected = frame_or_series(expected) + result = obj.replace(box(to_replace), value) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("val", [2, np.nan, 2.0]) + def test_replace_value_none_dtype_numeric(self, val): + # GH#48231 + df = DataFrame({"a": [1, val]}) + result = df.replace(val, None) + expected = DataFrame({"a": [1, None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1, val]}) + result = df.replace({val: None}) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameReplaceRegex: + @pytest.mark.parametrize( + "data", + [ + {"a": list("ab.."), "b": list("efgh")}, + {"a": list("ab.."), "b": list(range(4))}, + ], + ) + @pytest.mark.parametrize( + "to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] + ) + @pytest.mark.parametrize("compile_regex", [True, False]) + @pytest.mark.parametrize("regex_kwarg", [True, False]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_regex_replace_scalar( + self, data, to_replace, value, compile_regex, regex_kwarg, inplace + ): + df = DataFrame(data) + expected = df.copy() + + if compile_regex: + to_replace = re.compile(to_replace) + + if regex_kwarg: + regex = to_replace + to_replace = None + else: + regex = True + + result = df.replace(to_replace, value, inplace=inplace, regex=regex) + + if inplace: + assert result is None + result = df + + if value is np.nan: + expected_replace_val = np.nan + else: + expected_replace_val = "..." + + expected.loc[expected["a"] == ".", "a"] = expected_replace_val + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("regex", [False, True]) + def test_replace_regex_dtype_frame(self, regex): + # GH-48644 + df1 = DataFrame({"A": ["0"], "B": ["0"]}) + expected_df1 = DataFrame({"A": [1], "B": [1]}) + result_df1 = df1.replace(to_replace="0", value=1, regex=regex) + tm.assert_frame_equal(result_df1, expected_df1) + + df2 = DataFrame({"A": ["0"], "B": ["1"]}) + expected_df2 = DataFrame({"A": [1], "B": ["1"]}) + result_df2 = df2.replace(to_replace="0", value=1, regex=regex) + tm.assert_frame_equal(result_df2, expected_df2) + + def test_replace_with_value_also_being_replaced(self): + # GH46306 + df = DataFrame({"A": [0, 1, 2], "B": [1, 0, 2]}) + result = df.replace({0: 1, 1: np.nan}) + expected = DataFrame({"A": [1, np.nan, 2], "B": [np.nan, 1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_replace_categorical_no_replacement(self): + # GH#46672 + df = DataFrame( + { + "a": ["one", "two", None, "three"], + "b": ["one", None, "two", "three"], + }, + dtype="category", + ) + expected = df.copy() + + result = df.replace(to_replace=[".", "def"], value=["_", None]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py new file mode 100644 index 00000000..37431bc2 --- /dev/null +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -0,0 +1,795 @@ +from datetime import datetime +from itertools import product + +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Interval, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, + cut, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture() +def multiindex_df(): + levels = [["A", ""], ["B", "b"]] + return DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + + +class TestResetIndex: + def test_reset_index_empty_rangeindex(self): + # GH#45230 + df = DataFrame( + columns=["brand"], dtype=np.int64, index=RangeIndex(0, 0, 1, name="foo") + ) + + df2 = df.set_index([df.index, "brand"]) + + result = df2.reset_index([1], drop=True) + tm.assert_frame_equal(result, df[[]], check_index_type=True) + + def test_set_reset(self): + + idx = Index([2**63, 2**63 + 5, 2**63 + 10], name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype == np.dtype("uint64") + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_set_index_reset_index_dt64tz(self): + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype == "datetime64[ns, US/Eastern]" + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_reset_index_tz(self, tz_aware_fixture): + # GH 3950 + # reset_index with single level + tz = tz_aware_fixture + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) + + expected = DataFrame( + { + "idx": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "a": range(5), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx", "a", "b"], + ) + expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_reset_index_tzaware_index(self, tz): + dr = date_range("2012-06-02", periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index("index") + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs + + def test_reset_index_with_intervals(self): + idx = IntervalIndex.from_breaks(np.arange(11), name="x") + original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] + + result = original.set_index("x") + expected = DataFrame({"y": np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) + + result2 = result.reset_index() + tm.assert_frame_equal(result2, original) + + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + for i, (lev, level_codes) in enumerate( + zip(stacked.index.levels, stacked.index.codes) + ): + values = lev.take(level_codes) + name = names[i] + tm.assert_index_equal(values, Index(deleveled[name])) + + stacked.index.names = [None, None] + deleveled2 = stacked.reset_index() + tm.assert_series_equal( + deleveled["first"], deleveled2["level_0"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["level_1"], check_names=False + ) + + # default name assigned + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name="index") + tm.assert_series_equal(rdf["index"], exp) + + # default name assigned, corner case + df = float_frame.copy() + df["index"] = "foo" + rdf = df.reset_index() + exp = Series(float_frame.index.values, name="level_0") + tm.assert_series_equal(rdf["level_0"], exp) + + # but this is ok + float_frame.index.name = "index" + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) + tm.assert_index_equal(deleveled.index, Index(range(len(deleveled))), exact=True) + + # preserve column names + float_frame.columns.name = "columns" + reset = float_frame.reset_index() + assert reset.columns.name == "columns" + + # only remove certain columns + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index(["A", "B"]) + + tm.assert_frame_equal(rs, float_frame) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index()) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index()) + + rs = df.reset_index("A") + xp = float_frame.reset_index().set_index(["index", "B"]) + tm.assert_frame_equal(rs, xp) + + # test resetting in place + df = float_frame.copy() + reset = float_frame.reset_index() + return_value = df.reset_index(inplace=True) + assert return_value is None + tm.assert_frame_equal(df, reset) + + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index("A", drop=True) + xp = float_frame.copy() + del xp["A"] + xp = xp.set_index(["B"], append=True) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_name(self): + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + index=Index(range(2), name="x"), + ) + assert df.reset_index().index.name is None + assert df.reset_index(drop=True).index.name is None + return_value = df.reset_index(inplace=True) + assert return_value is None + assert df.index.name is None + + @pytest.mark.parametrize("levels", [["A", "B"], [0, 1]]) + def test_reset_index_level(self, levels): + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + # With MultiIndex + result = df.set_index(["A", "B"]).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C", "D"]]) + + # With single-level Index (GH 16263) + result = df.set_index("A").reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index("A").reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) + tm.assert_frame_equal(result, df[["B", "C", "D"]]) + + @pytest.mark.parametrize("idx_lev", [["A", "B"], ["A"]]) + def test_reset_index_level_missing(self, idx_lev): + # Missing levels - for both MultiIndex and single-level Index: + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): + df.set_index(idx_lev).reset_index(level=["A", "E"]) + with pytest.raises(IndexError, match="Too many levels"): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + + def test_reset_index_right_dtype(self): + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series( + (9.81 * time**2) / 2, index=Index(time, name="time"), name="speed" + ) + df = DataFrame(s1) + + reset = s1.reset_index() + assert reset["time"].dtype == np.float64 + + reset = df.reset_index() + assert reset["time"].dtype == np.float64 + + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ["x", "y", "z"] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame( + vals, + Index(idx, name="a"), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index() + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill="blah") + xp = DataFrame( + full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + df = DataFrame( + vals, + MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index("a") + xp = DataFrame( + full, + Index([0, 1, 2], name="d"), + columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill=None) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill="blah", col_level=1) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_multiindex_nan(self): + # GH#6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + { + "A": ["a", "b", "c"], + "B": [np.nan, np.nan, np.nan], + "C": np.random.rand(3), + } + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + @pytest.mark.parametrize( + "name", + [ + None, + "foo", + 2, + 3.0, + pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), + "2012-12-31", + ], + ) + def test_reset_index_with_datetimeindex_cols(self, name): + # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("1/1/2013", "1/2/2013"), + index=["A", "B"], + ) + df.index.name = name + + with tm.assert_produces_warning(warn): + result = df.reset_index() + + item = name if name is not None else "index" + columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) + if isinstance(item, str) and item == "2012-12-31": + columns = columns.astype("datetime64[ns]") + else: + assert columns.dtype == object + + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_range(self): + # GH#12071 + df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) + result = df.reset_index() + assert isinstance(result.index, RangeIndex) + expected = DataFrame( + [[0, 0, 0], [1, 1, 1]], + columns=["index", "A", "B"], + index=RangeIndex(stop=2), + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_multiindex_columns(self, multiindex_df): + result = multiindex_df[["B"]].rename_axis("A").reset_index() + tm.assert_frame_equal(result, multiindex_df) + + # GH#16120: already existing column + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + multiindex_df.rename_axis("A").reset_index() + + # GH#16164: multiindex (tuple) full key + result = multiindex_df.set_index([("A", "")]).reset_index() + tm.assert_frame_equal(result, multiindex_df) + + # with additional (unnamed) index level + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) + ) + expected = pd.concat([idx_col, multiindex_df[[("B", "b"), ("A", "")]]], axis=1) + result = multiindex_df.set_index([("B", "b")], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + msg = "Item must have length equal to number of levels." + with pytest.raises(ValueError, match=msg): + multiindex_df.rename_axis([("C", "c", "i")]).reset_index() + + # or too short... + levels = [["A", "a", ""], ["B", "b", "i"]] + df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) + ) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with pytest.raises( + ValueError, + match=( + "col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)" + ), + ): + df2.rename_axis([("C", "c")]).reset_index(col_fill=None) + + # with col_level != 0 + result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("flag", [False, True]) + @pytest.mark.parametrize("allow_duplicates", [False, True]) + def test_reset_index_duplicate_columns_allow( + self, multiindex_df, flag, allow_duplicates + ): + # GH#44755 reset_index with duplicate column labels + df = multiindex_df.rename_axis("A") + df = df.set_flags(allows_duplicate_labels=flag) + + if flag and allow_duplicates: + result = df.reset_index(allow_duplicates=allow_duplicates) + levels = [["A", ""], ["A", ""], ["B", "b"]] + expected = DataFrame( + [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) + ) + tm.assert_frame_equal(result, expected) + else: + if not flag and allow_duplicates: + msg = "Cannot specify 'allow_duplicates=True' when " + "'self.flags.allows_duplicate_labels' is False" + else: + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.reset_index(allow_duplicates=allow_duplicates) + + @pytest.mark.parametrize("flag", [False, True]) + def test_reset_index_duplicate_columns_default(self, multiindex_df, flag): + df = multiindex_df.rename_axis("A") + df = df.set_flags(allows_duplicate_labels=flag) + + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.reset_index() + + @pytest.mark.parametrize("allow_duplicates", ["bad value"]) + def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicates): + with pytest.raises(ValueError, match="expected type bool"): + multiindex_df.reset_index(allow_duplicates=allow_duplicates) + + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") + def test_reset_index_datetime(self, tz_naive_fixture): + # GH#3950 + tz = tz_naive_fixture + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + + tm.assert_frame_equal(df.reset_index(), expected) + + idx3 = date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime(2012, 1, 1), + datetime(2012, 2, 1), + datetime(2012, 3, 1), + datetime(2012, 4, 1), + datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) + + # GH#7793 + idx = MultiIndex.from_product( + [["a", "b"], date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) + + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply(lambda d: Timestamp(d, tz=tz)) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_period(self): + # GH#7746 + idx = MultiIndex.from_product( + [pd.period_range("20130101", periods=3, freq="M"), list("abc")], + names=["month", "feature"], + ) + + df = DataFrame( + np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] + ) + expected = DataFrame( + { + "month": ( + [pd.Period("2013-01", freq="M")] * 3 + + [pd.Period("2013-02", freq="M")] * 3 + + [pd.Period("2013-03", freq="M")] * 3 + ), + "feature": ["a", "b", "c"] * 3, + "a": np.arange(9, dtype="int64"), + }, + columns=["month", "feature", "a"], + ) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_delevel_infer_dtype(self): + tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) + index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) + deleveled = df.reset_index() + assert is_integer_dtype(deleveled["prm1"]) + assert is_float_dtype(deleveled["prm2"]) + + def test_reset_index_with_drop( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + deleveled = ymd.reset_index(drop=True) + assert len(deleveled.columns) == len(ymd.columns) + assert deleveled.index.name == ymd.index.name + + @pytest.mark.parametrize( + "ix_data, exp_data", + [ + ( + [(pd.NaT, 1), (pd.NaT, 2)], + {"a": [pd.NaT, pd.NaT], "b": [1, 2], "x": [11, 12]}, + ), + ( + [(pd.NaT, 1), (Timestamp("2020-01-01"), 2)], + {"a": [pd.NaT, Timestamp("2020-01-01")], "b": [1, 2], "x": [11, 12]}, + ), + ( + [(pd.NaT, 1), (pd.Timedelta(123, "d"), 2)], + {"a": [pd.NaT, pd.Timedelta(123, "d")], "b": [1, 2], "x": [11, 12]}, + ), + ], + ) + def test_reset_index_nat_multiindex(self, ix_data, exp_data): + # GH#36541: that reset_index() does not raise ValueError + ix = MultiIndex.from_tuples(ix_data, names=["a", "b"]) + result = DataFrame({"x": [11, 12]}, index=ix) + result = result.reset_index() + + expected = DataFrame(exp_data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]]) + ) + def test_rest_index_multiindex_categorical_with_missing_values(self, codes): + # GH#24206 + + index = MultiIndex( + [CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes + ) + data = {"col": range(len(index))} + df = DataFrame(data=data, index=index) + + expected = DataFrame( + { + "level_0": Categorical.from_codes(codes[0], categories=["A", "B"]), + "level_1": Categorical.from_codes(codes[1], categories=["a", "b"]), + "col": range(4), + } + ) + + res = df.reset_index() + tm.assert_frame_equal(res, expected) + + # roundtrip + res = expected.set_index(["level_0", "level_1"]).reset_index() + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "array, dtype", + [ + (["a", "b"], object), + ( + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.PeriodDtype(freq="Q-DEC"), + ), + ], +) +def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype): + # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex + idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) + result = DataFrame(index=idx)[:0].reset_index().dtypes + expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype}) + tm.assert_series_equal(result, expected) + + +def test_reset_index_empty_frame_with_datetime64_multiindex(): + # https://github.com/pandas-dev/pandas/issues/35606 + idx = MultiIndex( + levels=[[Timestamp("2020-07-20 00:00:00")], [3, 4]], + codes=[[], []], + names=["a", "b"], + ) + df = DataFrame(index=idx, columns=["c", "d"]) + result = df.reset_index() + expected = DataFrame( + columns=list("abcd"), index=RangeIndex(start=0, stop=0, step=1) + ) + expected["a"] = expected["a"].astype("datetime64[ns]") + expected["b"] = expected["b"].astype("int64") + tm.assert_frame_equal(result, expected) + + +def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(): + # https://github.com/pandas-dev/pandas/issues/35657 + df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")}) + df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum() + result = df.reset_index() + expected = DataFrame( + columns=["c2", "c3", "c1"], index=RangeIndex(start=0, stop=0, step=1) + ) + expected["c3"] = expected["c3"].astype("datetime64[ns]") + expected["c1"] = expected["c1"].astype("float64") + tm.assert_frame_equal(result, expected) + + +def test_reset_index_multiindex_nat(): + # GH 11479 + idx = range(3) + tstamp = date_range("2015-07-01", freq="D", periods=3) + df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")}) + df.loc[2, "tstamp"] = pd.NaT + result = df.set_index(["id", "tstamp"]).reset_index("id") + expected = DataFrame( + {"id": range(3), "a": list("abc")}, + index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"), + ) + tm.assert_frame_equal(result, expected) + + +def test_drop_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}).set_index("a") + msg = ( + r"In a future version of pandas all arguments of DataFrame\.reset_index " + r"except for the argument 'level' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.reset_index("a", False) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + +def test_reset_index_interval_columns_object_cast(): + # GH 19136 + df = DataFrame( + np.eye(2), index=Index([1, 2], name="Year"), columns=cut([1, 2], [0, 1, 2]) + ) + result = df.reset_index() + expected = DataFrame( + [[1, 1.0, 0.0], [2, 0.0, 1.0]], + columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), + ) + tm.assert_frame_equal(result, expected) + + +def test_reset_index_rename(float_frame): + # GH 6878 + result = float_frame.reset_index(names="new_name") + expected = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(result["new_name"], expected) + + result = float_frame.reset_index(names=123) + expected = Series(float_frame.index.values, name=123) + tm.assert_series_equal(result[123], expected) + + +def test_reset_index_rename_multiindex(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df}) + + names = ["first", "second"] + stacked_df.index.names = names + + result = stacked_df.reset_index() + expected = stacked_df.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) + tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) + + +def test_errorreset_index_rename(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"first": stacked_df, "second": stacked_df}) + + with pytest.raises( + ValueError, match="Index names must be str or 1-dimensional list" + ): + stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) + + with pytest.raises(IndexError, match="list index out of range"): + stacked_df.reset_index(names=["new_first"]) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py new file mode 100644 index 00000000..dd920694 --- /dev/null +++ b/pandas/tests/frame/methods/test_round.py @@ -0,0 +1,218 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameRound: + def test_round(self): + # GH#2665 + + # Test that rounding an empty DataFrame does nothing + df = DataFrame() + tm.assert_frame_equal(df, df.round()) + + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(df.round(), expected_rounded) + + # Round with an integer + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # This should also work with np.round (since np.round dispatches to + # df.round) + tm.assert_frame_equal(np.round(df, decimals), expected_rounded) + + # Round with a list + round_list = [1, 2] + msg = "decimals must be an integer, a dict-like or a Series" + with pytest.raises(TypeError, match=msg): + df.round(round_list) + + # Round with a dictionary + expected_rounded = DataFrame( + {"col1": [1.1, 2.1, 3.1], "col2": [1.23, 2.23, 3.23]} + ) + round_dict = {"col1": 1, "col2": 2} + tm.assert_frame_equal(df.round(round_dict), expected_rounded) + + # Incomplete dict + expected_partially_rounded = DataFrame( + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + partial_round_dict = {"col2": 1} + tm.assert_frame_equal(df.round(partial_round_dict), expected_partially_rounded) + + # Dict with unknown elements + wrong_round_dict = {"col3": 2, "col2": 1} + tm.assert_frame_equal(df.round(wrong_round_dict), expected_partially_rounded) + + # float input to `decimals` + non_int_round_dict = {"col1": 1, "col2": 0.5} + msg = "Values in decimals must be integers" + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + # String input + non_int_round_dict = {"col1": 1, "col2": "foo"} + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # List input + non_int_round_dict = {"col1": 1, "col2": [1, 2]} + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # Non integer Series inputs + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError, match=msg): + df.round(non_int_round_Series) + + # Negative numbers + negative_round_dict = {"col1": -1, "col2": -2} + big_df = df * 100 + expected_neg_rounded = DataFrame( + {"col1": [110.0, 210, 310], "col2": [100.0, 200, 300]} + ) + tm.assert_frame_equal(big_df.round(negative_round_dict), expected_neg_rounded) + + # nan in Series round + nan_round_Series = Series({"col1": np.nan, "col2": 1}) + + with pytest.raises(TypeError, match=msg): + df.round(nan_round_Series) + + # Make sure this doesn't break existing Series.round + tm.assert_series_equal(df["col1"].round(1), expected_rounded["col1"]) + + # named columns + # GH#11986 + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + df.columns.name = "cols" + expected_rounded.columns.name = "cols" + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # interaction of named columns & series + tm.assert_series_equal(df["col1"].round(decimals), expected_rounded["col1"]) + tm.assert_series_equal(df.round(decimals)["col1"], expected_rounded["col1"]) + + def test_round_numpy(self): + # GH#12600 + df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) + out = np.round(df, decimals=0) + expected = DataFrame([[2.0, 1.0], [0.0, 7.0]]) + tm.assert_frame_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(df, decimals=0, out=df) + + def test_round_numpy_with_nan(self): + # See GH#14197 + df = Series([1.53, np.nan, 0.06]).to_frame() + with tm.assert_produces_warning(None): + result = df.round() + expected = Series([2.0, np.nan, 0.0]).to_frame() + tm.assert_frame_equal(result, expected) + + def test_round_mixed_type(self): + # GH#11885 + df = DataFrame( + { + "col1": [1.1, 2.2, 3.3, 4.4], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + round_0 = DataFrame( + { + "col1": [1.0, 2.0, 3.0, 4.0], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + tm.assert_frame_equal(df.round(), round_0) + tm.assert_frame_equal(df.round(1), df) + tm.assert_frame_equal(df.round({"col1": 1}), df) + tm.assert_frame_equal(df.round({"col1": 0}), round_0) + tm.assert_frame_equal(df.round({"col1": 0, "col2": 1}), round_0) + tm.assert_frame_equal(df.round({"col3": 1}), df) + + def test_round_with_duplicate_columns(self): + # GH#11611 + + df = DataFrame( + np.random.random([3, 3]), + columns=["A", "B", "C"], + index=["first", "second", "third"], + ) + + dfs = pd.concat((df, df), axis=1) + rounded = dfs.round() + tm.assert_index_equal(rounded.index, dfs.index) + + decimals = Series([1, 0, 2], index=["A", "B", "A"]) + msg = "Index of decimals must be unique" + with pytest.raises(ValueError, match=msg): + df.round(decimals) + + def test_round_builtin(self): + # GH#11763 + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(round(df), expected_rounded) + + def test_round_nonunique_categorical(self): + # See GH#21809 + idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) + df = DataFrame(np.random.rand(6, 3), columns=list("abc")) + + expected = df.round(3) + expected.index = idx + + df_categorical = df.copy().set_index(idx) + assert df_categorical.shape == (6, 3) + result = df_categorical.round(3) + assert result.shape == (6, 3) + + tm.assert_frame_equal(result, expected) + + def test_round_interval_category_columns(self): + # GH#30063 + columns = pd.CategoricalIndex(pd.interval_range(0, 2)) + df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns) + + result = df.round() + expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py new file mode 100644 index 00000000..901987a9 --- /dev/null +++ b/pandas/tests/frame/methods/test_sample.py @@ -0,0 +1,365 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +import pandas.core.common as com + + +class TestSample: + @pytest.fixture(params=[Series, DataFrame]) + def obj(self, request): + klass = request.param + if klass is Series: + arr = np.random.randn(10) + else: + arr = np.random.randn(10, 10) + return klass(arr, dtype=None) + + @pytest.mark.parametrize("test", list(range(10))) + def test_sample(self, test, obj): + # Fixes issue: 2419 + # Check behavior of random_state argument + # Check for stability when receives seed or random state -- run 10 + # times. + + seed = np.random.randint(0, 100) + tm.assert_equal( + obj.sample(n=4, random_state=seed), obj.sample(n=4, random_state=seed) + ) + + tm.assert_equal( + obj.sample(frac=0.7, random_state=seed), + obj.sample(frac=0.7, random_state=seed), + ) + + tm.assert_equal( + obj.sample(n=4, random_state=np.random.RandomState(test)), + obj.sample(n=4, random_state=np.random.RandomState(test)), + ) + + tm.assert_equal( + obj.sample(frac=0.7, random_state=np.random.RandomState(test)), + obj.sample(frac=0.7, random_state=np.random.RandomState(test)), + ) + + tm.assert_equal( + obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), + obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), + ) + + os1, os2 = [], [] + for _ in range(2): + np.random.seed(test) + os1.append(obj.sample(n=4)) + os2.append(obj.sample(frac=0.7)) + tm.assert_equal(*os1) + tm.assert_equal(*os2) + + def test_sample_lengths(self, obj): + # Check lengths are right + assert len(obj.sample(n=4) == 4) + assert len(obj.sample(frac=0.34) == 3) + assert len(obj.sample(frac=0.36) == 4) + + def test_sample_invalid_random_state(self, obj): + # Check for error when random_state argument invalid. + msg = ( + "random_state must be an integer, array-like, a BitGenerator, Generator, " + "a numpy RandomState, or None" + ) + with pytest.raises(ValueError, match=msg): + obj.sample(random_state="a_string") + + def test_sample_wont_accept_n_and_frac(self, obj): + # Giving both frac and N throws error + msg = "Please enter a value for `frac` OR `n`, not both" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, frac=0.3) + + def test_sample_requires_positive_n_frac(self, obj): + with pytest.raises( + ValueError, + match="A negative number of rows requested. Please provide `n` >= 0", + ): + obj.sample(n=-3) + with pytest.raises( + ValueError, + match="A negative number of rows requested. Please provide `frac` >= 0", + ): + obj.sample(frac=-0.3) + + def test_sample_requires_integer_n(self, obj): + # Make sure float values of `n` give error + with pytest.raises(ValueError, match="Only integers accepted as `n` values"): + obj.sample(n=3.2) + + def test_sample_invalid_weight_lengths(self, obj): + # Weight length must be right + msg = "Weights and axis to be sampled must be of same length" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=[0, 1]) + + with pytest.raises(ValueError, match=msg): + bad_weights = [0.5] * 11 + obj.sample(n=3, weights=bad_weights) + + with pytest.raises(ValueError, match="Fewer non-zero entries in p than size"): + bad_weight_series = Series([0, 0, 0.2]) + obj.sample(n=4, weights=bad_weight_series) + + def test_sample_negative_weights(self, obj): + # Check won't accept negative weights + bad_weights = [-0.1] * 10 + msg = "weight vector many not include negative values" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=bad_weights) + + def test_sample_inf_weights(self, obj): + # Check inf and -inf throw errors: + + weights_with_inf = [0.1] * 10 + weights_with_inf[0] = np.inf + msg = "weight vector may not include `inf` values" + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=weights_with_inf) + + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf + with pytest.raises(ValueError, match=msg): + obj.sample(n=3, weights=weights_with_ninf) + + def test_sample_zero_weights(self, obj): + # All zeros raises errors + + zero_weights = [0] * 10 + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + obj.sample(n=3, weights=zero_weights) + + def test_sample_missing_weights(self, obj): + # All missing weights + + nan_weights = [np.nan] * 10 + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + obj.sample(n=3, weights=nan_weights) + + def test_sample_none_weights(self, obj): + # Check None are also replaced by zeros. + weights_with_None = [None] * 10 + weights_with_None[5] = 0.5 + tm.assert_equal( + obj.sample(n=1, axis=0, weights=weights_with_None), obj.iloc[5:6] + ) + + @pytest.mark.parametrize( + "func_str,arg", + [ + ("np.array", [2, 3, 1, 0]), + ("np.random.MT19937", 3), + ("np.random.PCG64", 11), + ], + ) + def test_sample_random_state(self, func_str, arg, frame_or_series): + # GH#32503 + obj = DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) + obj = tm.get_obj(obj, frame_or_series) + result = obj.sample(n=3, random_state=eval(func_str)(arg)) + expected = obj.sample(n=3, random_state=com.random_state(eval(func_str)(arg))) + tm.assert_equal(result, expected) + + def test_sample_generator(self, frame_or_series): + # GH#38100 + obj = frame_or_series(np.arange(100)) + rng = np.random.default_rng() + + # Consecutive calls should advance the seed + result1 = obj.sample(n=50, random_state=rng) + result2 = obj.sample(n=50, random_state=rng) + assert not (result1.index.values == result2.index.values).all() + + # Matching generator initialization must give same result + # Consecutive calls should advance the seed + result1 = obj.sample(n=50, random_state=np.random.default_rng(11)) + result2 = obj.sample(n=50, random_state=np.random.default_rng(11)) + tm.assert_equal(result1, result2) + + def test_sample_upsampling_without_replacement(self, frame_or_series): + # GH#27451 + + obj = DataFrame({"A": list("abc")}) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + with pytest.raises(ValueError, match=msg): + obj.sample(frac=2, replace=False) + + +class TestSampleDataFrame: + # Tests which are relevant only for DataFrame, so these are + # as fully parametrized as they can get. + + def test_sample(self): + # GH#2419 + # additional specific object based tests + + # A few dataframe test with degenerate weights. + easy_weight_list = [0] * 10 + easy_weight_list[5] = 1 + + df = DataFrame( + { + "col1": range(10, 20), + "col2": range(20, 30), + "colString": ["a"] * 10, + "easyweights": easy_weight_list, + } + ) + sample1 = df.sample(n=1, weights="easyweights") + tm.assert_frame_equal(sample1, df.iloc[5:6]) + + # Ensure proper error if string given as weight for Series or + # DataFrame with axis = 1. + ser = Series(range(10)) + msg = "Strings cannot be passed as weights when sampling from a Series." + with pytest.raises(ValueError, match=msg): + ser.sample(n=3, weights="weight_column") + + msg = ( + "Strings can only be passed to weights when sampling from rows on a " + "DataFrame" + ) + with pytest.raises(ValueError, match=msg): + df.sample(n=1, weights="weight_column", axis=1) + + # Check weighting key error + with pytest.raises( + KeyError, match="'String passed to weights not a valid column'" + ): + df.sample(n=3, weights="not_a_real_column_name") + + # Check that re-normalizes weights that don't sum to one. + weights_less_than_1 = [0] * 10 + weights_less_than_1[0] = 0.5 + tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) + + ### + # Test axis argument + ### + + # Test axis argument + df = DataFrame({"col1": range(10), "col2": ["a"] * 10}) + second_column_weight = [0, 1] + tm.assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[["col2"]] + ) + + # Different axis arg types + tm.assert_frame_equal( + df.sample(n=1, axis="columns", weights=second_column_weight), df[["col2"]] + ) + + weight = [0] * 10 + weight[5] = 0.5 + tm.assert_frame_equal(df.sample(n=1, axis="rows", weights=weight), df.iloc[5:6]) + tm.assert_frame_equal( + df.sample(n=1, axis="index", weights=weight), df.iloc[5:6] + ) + + # Check out of range axis values + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis=2) + + msg = "No axis named not_a_name for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis="not_a_name") + + ser = Series(range(10)) + with pytest.raises(ValueError, match="No axis named 1 for object type Series"): + ser.sample(n=1, axis=1) + + # Test weight length compared to correct axis + msg = "Weights and axis to be sampled must be of same length" + with pytest.raises(ValueError, match=msg): + df.sample(n=1, axis=1, weights=[0.5] * 10) + + def test_sample_axis1(self): + # Check weights with axis = 1 + easy_weight_list = [0] * 3 + easy_weight_list[2] = 1 + + df = DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) + tm.assert_frame_equal(sample1, df[["colString"]]) + + # Test default axes + tm.assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42) + ) + + def test_sample_aligns_weights_with_frame(self): + + # Test that function aligns weights with frame + df = DataFrame({"col1": [5, 6, 7], "col2": ["a", "b", "c"]}, index=[9, 5, 3]) + ser = Series([1, 0, 0], index=[3, 5, 9]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser)) + + # Weights have index values to be dropped because not in + # sampled DataFrame + ser2 = Series([0.001, 0, 10000], index=[3, 5, 10]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser2)) + + # Weights have empty values to be filed with zeros + ser3 = Series([0.01, 0], index=[3, 5]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=ser3)) + + # No overlap in weight and sampled DataFrame indices + ser4 = Series([1, 0], index=[1, 2]) + + with pytest.raises(ValueError, match="Invalid weights: weights sum to zero"): + df.sample(1, weights=ser4) + + def test_sample_is_copy(self): + # GH#27357, GH#30784: ensure the result of sample is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df2 = df.sample(3) + + with tm.assert_produces_warning(None): + df2["d"] = 1 + + def test_sample_does_not_modify_weights(self): + # GH-42843 + result = np.array([np.nan, 1, np.nan]) + expected = result.copy() + ser = Series([1, 2, 3]) + + # Test numpy array weights won't be modified in place + ser.sample(weights=result) + tm.assert_numpy_array_equal(result, expected) + + # Test DataFrame column won't be modified in place + df = DataFrame({"values": [1, 1, 1], "weights": [1, np.nan, np.nan]}) + expected = df["weights"].copy() + + df.sample(frac=1.0, replace=True, weights="weights") + result = df["weights"] + tm.assert_series_equal(result, expected) + + def test_sample_ignore_index(self): + # GH 38581 + df = DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + result = df.sample(3, ignore_index=True) + expected_index = Index(range(3)) + tm.assert_index_equal(result.index, expected_index, exact=True) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py new file mode 100644 index 00000000..9284e0c0 --- /dev/null +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -0,0 +1,467 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +from pandas import ( + DataFrame, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class DummyDtype(ExtensionDtype): + type = int + + def __init__(self, numeric) -> None: + self._numeric = numeric + + @property + def name(self): + return "Dummy" + + @property + def _is_numeric(self): + return self._numeric + + +class DummyArray(ExtensionArray): + def __init__(self, data, dtype) -> None: + self.data = data + self._dtype = dtype + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self._dtype + + def __len__(self) -> int: + return len(self.data) + + def __getitem__(self, item): + pass + + def copy(self): + return self + + +class TestSelectDtypes: + def test_select_dtypes_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=[np.number]) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) + ei = df[["b", "c", "d"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) + ei = df[["b", "c", "d", "f"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime64"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetimetz"]) + ei = df[["h", "i"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include=["period"]) + + def test_select_dtypes_exclude_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + } + ) + re = df.select_dtypes(exclude=[np.number]) + ee = df[["a", "e"]] + tm.assert_frame_equal(re, ee) + + def test_select_dtypes_exclude_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6, dtype="u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + include = np.bool_, "integer" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "c", "e"]] + tm.assert_frame_equal(r, e) + + exclude = ("datetime",) + include = "bool", "int64", "int32" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "e"]] + tm.assert_frame_equal(r, e) + + @pytest.mark.parametrize( + "include", [(np.bool_, "int"), (np.bool_, "integer"), ("bool", int)] + ) + def test_select_dtypes_exclude_include_int(self, include): + # Fix select_dtypes(include='int') for Windows, FYI #36596 + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6, dtype="int32"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + result = df.select_dtypes(include=include, exclude=exclude) + expected = df[["b", "c", "e"]] + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_include_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime64") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="category") + ei = df[["f"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include="period") + + def test_select_dtypes_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(exclude=np.number) + ei = df[["a", "e", "f", "g", "h", "i", "j"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(exclude="category") + ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(exclude="period") + + def test_select_dtypes_include_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude="floating") + ei = df[["b", "c", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_include_exclude_mixed_scalars_lists(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) + ei = df[["b", "c"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") + ei = df[["b", "c", "f", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_duplicate_columns(self): + # GH20839 + df = DataFrame( + { + "a": ["a", "b", "c"], + "b": [1, 2, 3], + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df.columns = ["a", "a", "b", "b", "b", "c"] + + expected = DataFrame( + {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} + ) + + result = df.select_dtypes(include=[np.number], exclude=["floating"]) + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df["g"] = df.f.diff() + assert not hasattr(np, "u8") + r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) + e = df[["a", "b"]] + tm.assert_frame_equal(r, e) + + r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) + e = df[["a", "b", "g"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_empty(self): + df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) + msg = "at least one of include or exclude must be nonempty" + with pytest.raises(ValueError, match=msg): + df.select_dtypes() + + def test_select_dtypes_bad_datetime64(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(include=["datetime64[D]"]) + + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(exclude=["datetime64[as]"]) + + def test_select_dtypes_datetime_with_tz(self): + + df2 = DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ) + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + result = df3.select_dtypes(include=["datetime64[ns]"]) + expected = df3.reindex(columns=[]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] + ) + @pytest.mark.parametrize("arg", ["include", "exclude"]) + def test_select_dtypes_str_raises(self, dtype, arg): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + msg = "string dtypes are not allowed" + kwargs = {arg: [dtype]} + + with pytest.raises(TypeError, match=msg): + df.select_dtypes(**kwargs) + + def test_select_dtypes_bad_arg_raises(self): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + + msg = "data type.*not understood" + with pytest.raises(TypeError, match=msg): + df.select_dtypes(["blargy, blarg, blarg"]) + + def test_select_dtypes_typecodes(self): + # GH 11990 + df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) + expected = df + FLOAT_TYPES = list(np.typecodes["AllFloat"]) + tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) + + @pytest.mark.parametrize( + "arr,expected", + ( + (np.array([1, 2], dtype=np.int32), True), + (pd.array([1, 2], dtype="Int32"), True), + (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), + (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), + ), + ) + def test_select_dtypes_numeric(self, arr, expected): + # GH 35340 + + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert is_selected == expected + + def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype): + arr = pd.array(["a", "b"], dtype=nullable_string_dtype) + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert not is_selected + + @pytest.mark.parametrize( + "expected, float_dtypes", + [ + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + float, + ], + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + "float", + ], + [DataFrame({"C": range(10, 7, -1)}, dtype=np.float32), np.float32], + [ + DataFrame({"A": range(3), "B": range(5, 8)}).astype( + dtype={"A": float, "B": np.float64} + ), + np.float64, + ], + ], + ) + def test_select_dtypes_float_dtype(self, expected, float_dtypes): + # GH#42452 + dtype_dict = {"A": float, "B": np.float64, "C": np.float32} + df = DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)}, + ) + df = df.astype(dtype_dict) + result = df.select_dtypes(include=float_dtypes) + tm.assert_frame_equal(result, expected) + + def test_np_bool_ea_boolean_include_number(self): + # GH 46870 + df = DataFrame( + { + "a": [1, 2, 3], + "b": pd.Series([True, False, True], dtype="boolean"), + "c": np.array([True, False, True]), + "d": pd.Categorical([True, False, True]), + "e": pd.arrays.SparseArray([True, False, True]), + } + ) + result = df.select_dtypes(include="number") + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_no_view(self): + # https://github.com/pandas-dev/pandas/issues/48090 + # result of this method is not a view on the original dataframe + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + result = df.select_dtypes(include=["number"]) + result.iloc[0, 0] = 0 + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py new file mode 100644 index 00000000..f105a38e --- /dev/null +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -0,0 +1,193 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class SharedSetAxisTests: + @pytest.fixture + def obj(self): + raise NotImplementedError("Implemented by subclasses") + + def test_set_axis(self, obj): + # GH14636; this tests setting index for both Series and DataFrame + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + expected.index = new_index + + # inplace=False + msg = "set_axis 'inplace' keyword is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.set_axis(new_index, axis=0, inplace=False) + tm.assert_equal(expected, result) + + def test_set_axis_copy(self, obj): + # Test copy keyword GH#47932 + new_index = list("abcd")[: len(obj)] + + orig = obj.iloc[:] + expected = obj.copy() + expected.index = new_index + + with pytest.raises( + ValueError, match="Cannot specify both inplace=True and copy=True" + ): + with tm.assert_produces_warning(FutureWarning): + obj.set_axis(new_index, axis=0, inplace=True, copy=True) + + result = obj.set_axis(new_index, axis=0, copy=True) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + result = obj.set_axis(new_index, axis=0, copy=False) + tm.assert_equal(expected, result) + assert result is not obj + # check we did NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(result, obj) + else: + assert all( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + # copy defaults to True + result = obj.set_axis(new_index, axis=0) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + # Do this last since it alters obj inplace + with tm.assert_produces_warning(FutureWarning): + res = obj.set_axis(new_index, inplace=True, copy=False) + assert res is None + tm.assert_equal(expected, obj) + # check we did NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(obj, orig) + else: + assert all( + tm.shares_memory(obj.iloc[:, i], orig.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) + def test_set_axis_inplace_axis(self, axis, obj): + # GH#14636 + if obj.ndim == 1 and axis in [1, "columns"]: + # Series only has [0, "index"] + return + + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + if axis in [0, "index"]: + expected.index = new_index + else: + expected.columns = new_index + + result = obj.copy() + with tm.assert_produces_warning(FutureWarning): + result.set_axis(new_index, axis=axis, inplace=True) + tm.assert_equal(result, expected) + + def test_set_axis_unnamed_kwarg_warns(self, obj): + # omitting the "axis" parameter + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + expected.index = new_index + + with tm.assert_produces_warning( + FutureWarning, match="set_axis 'inplace' keyword" + ): + result = obj.set_axis(new_index, inplace=False) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("axis", [3, "foo"]) + def test_set_axis_invalid_axis_name(self, axis, obj): + # wrong values for the "axis" parameter + with pytest.raises(ValueError, match="No axis named"): + obj.set_axis(list("abc"), axis=axis) + + def test_set_axis_setattr_index_not_collection(self, obj): + # wrong type + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed" + ) + with pytest.raises(TypeError, match=msg): + obj.index = None + + def test_set_axis_setattr_index_wrong_length(self, obj): + # wrong length + msg = ( + f"Length mismatch: Expected axis has {len(obj)} elements, " + f"new values have {len(obj)-1} elements" + ) + with pytest.raises(ValueError, match=msg): + obj.index = np.arange(len(obj) - 1) + + if obj.ndim == 2: + with pytest.raises(ValueError, match="Length mismatch"): + obj.columns = obj.columns[::2] + + +class TestDataFrameSetAxis(SharedSetAxisTests): + @pytest.fixture + def obj(self): + df = DataFrame( + {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2], "C": [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012], + ) + return df + + +class TestSeriesSetAxis(SharedSetAxisTests): + @pytest.fixture + def obj(self): + ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") + return ser + + +def test_nonkeyword_arguments_deprecation_warning(): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.set_axis " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.set_axis([1, 2, 4], 0) + expected = DataFrame({"a": [1, 2, 3]}, index=[1, 2, 4]) + tm.assert_frame_equal(result, expected) + + ser = Series([1, 2, 3]) + msg = ( + r"In a future version of pandas all arguments of Series\.set_axis " + r"except for the argument 'labels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.set_axis([1, 2, 4], 0) + expected = Series([1, 2, 3], index=[1, 2, 4]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py new file mode 100644 index 00000000..4c39cf99 --- /dev/null +++ b/pandas/tests/frame/methods/test_set_index.py @@ -0,0 +1,718 @@ +""" +See also: test_reindex.py:TestReindexSetIndex +""" + +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestSetIndex: + def test_set_index_multiindex(self): + # segfault in GH#3308 + d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} + df = DataFrame(d) + tuples = [(0, 1), (0, 2), (1, 2)] + df["tuples"] = tuples + + index = MultiIndex.from_tuples(df["tuples"]) + # it works! + df.set_index(index) + + def test_set_index_empty_column(self): + # GH#1971 + df = DataFrame( + [ + {"a": 1, "p": 0}, + {"a": 2, "m": 10}, + {"a": 3, "m": 11, "p": 20}, + {"a": 4, "m": 12, "p": 21}, + ], + columns=["a", "m", "p", "x"], + ) + + result = df.set_index(["a", "x"]) + + expected = df[["m", "p"]] + expected.index = MultiIndex.from_arrays([df["a"], df["x"]], names=["a", "x"]) + tm.assert_frame_equal(result, expected) + + def test_set_index_empty_dataframe(self): + # GH#38419 + df1 = DataFrame( + {"a": Series(dtype="datetime64[ns]"), "b": Series(dtype="int64"), "c": []} + ) + + df2 = df1.set_index(["a", "b"]) + result = df2.index.to_frame().dtypes + expected = df1[["a", "b"]].dtypes + tm.assert_series_equal(result, expected) + + def test_set_index_multiindexcolumns(self): + columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) + df = DataFrame(np.random.randn(3, 3), columns=columns) + + result = df.set_index(df.columns[0]) + + expected = df.iloc[:, 1:] + expected.index = df.iloc[:, 0].values + expected.index.names = [df.columns[0]] + tm.assert_frame_equal(result, expected) + + def test_set_index_timezone(self): + # GH#12358 + # tz-aware Series should retain the tz + idx = DatetimeIndex(["2014-01-01 10:10:10"], tz="UTC").tz_convert("Europe/Rome") + df = DataFrame({"A": idx}) + assert df.set_index(idx).index[0].hour == 11 + assert DatetimeIndex(Series(df.A))[0].hour == 11 + assert df.set_index(df.A).index[0].hour == 11 + + def test_set_index_cast_datetimeindex(self): + df = DataFrame( + { + "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], + "B": np.random.randn(1000), + } + ) + + idf = df.set_index("A") + assert isinstance(idf.index, DatetimeIndex) + + def test_set_index_dst(self): + di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific") + + df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index() + # single level + res = df.set_index("index") + exp = DataFrame( + data={"a": [0, 1, 2], "b": [3, 4, 5]}, + index=Index(di, name="index"), + ) + exp.index = exp.index._with_freq(None) + tm.assert_frame_equal(res, exp) + + # GH#12920 + res = df.set_index(["index", "a"]) + exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], names=["index", "a"]) + exp = DataFrame({"b": [3, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp) + + def test_set_index(self, float_string_frame): + df = float_string_frame + idx = Index(np.arange(len(df))[::-1]) + + df = df.set_index(idx) + tm.assert_index_equal(df.index, idx) + with pytest.raises(ValueError, match="Length mismatch"): + df.set_index(idx[::2]) + + def test_set_index_names(self): + df = tm.makeDataFrame() + df.index.name = "name" + + assert df.set_index(df.index).index.names == ["name"] + + mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) + mi2 = MultiIndex.from_arrays( + df[["A", "B", "A", "B"]].T.values, names=["A", "B", "C", "D"] + ) + + df = df.set_index(["A", "B"]) + + assert df.set_index(df.index).index.names == ["A", "B"] + + # Check that set_index isn't converting a MultiIndex into an Index + assert isinstance(df.set_index(df.index).index, MultiIndex) + + # Check actual equality + tm.assert_index_equal(df.set_index(df.index).index, mi) + + idx2 = df.index.rename(["C", "D"]) + + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather + # than a pair of tuples + assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex) + + # Check equality + tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) + + def test_set_index_cast(self): + # issue casting an index then set_index + df = DataFrame( + {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012] + ) + df2 = df.set_index(df.index.astype(np.int32)) + tm.assert_frame_equal(df, df2) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys): + df = frame_of_index_cols + + if isinstance(keys, list): + idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) + else: + idx = Index(df[keys], name=keys) + expected = df.drop(keys, axis=1) if drop else df + expected.index = idx + + if inplace: + result = df.copy() + return_value = result.set_index(keys, drop=drop, inplace=True) + assert return_value is None + else: + result = df.set_index(keys, drop=drop) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_append(self, frame_of_index_cols, drop, keys): + df = frame_of_index_cols + + keys = keys if isinstance(keys, list) else [keys] + idx = MultiIndex.from_arrays( + [df.index] + [df[x] for x in keys], names=[None] + keys + ) + expected = df.drop(keys, axis=1) if drop else df.copy() + expected.index = idx + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_append_to_multiindex(self, frame_of_index_cols, drop, keys): + # append to existing multiindex + df = frame_of_index_cols.set_index(["D"], drop=drop, append=True) + + keys = keys if isinstance(keys, list) else [keys] + expected = frame_of_index_cols.set_index(["D"] + keys, drop=drop, append=True) + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + def test_set_index_after_mutation(self): + # GH#1590 + df = DataFrame({"val": [0, 1, 2], "key": ["a", "b", "c"]}) + expected = DataFrame({"val": [1, 2]}, Index(["b", "c"], name="key")) + + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + result = df2.set_index("key") + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # Add list-of-list constructor because list is ambiguous -> lambda + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize( + "box", + [ + Series, + Index, + np.array, + list, + lambda x: [list(x)], + lambda x: MultiIndex.from_arrays([x]), + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "B"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_single_array( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + key = box(df["B"]) + if box == list: + # list of strings gets interpreted as list of keys + msg = "['one', 'two', 'three', 'one', 'two']" + with pytest.raises(KeyError, match=msg): + df.set_index(key, drop=drop, append=append) + else: + # np.array/list-of-list "forget" the name of B + name_mi = getattr(key, "names", None) + name = [getattr(key, "name", None)] if name_mi is None else name_mi + + result = df.set_index(key, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, nothing is dropped + expected = df.set_index(["B"], drop=False, append=append) + expected.index.names = [index_name] + name if append else name + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize( + "box", [Series, Index, np.array, list, lambda x: MultiIndex.from_arrays([x])] + ) + @pytest.mark.parametrize( + "append, index_name", + [(True, None), (True, "A"), (True, "B"), (True, "test"), (False, None)], + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = ["A", box(df["B"])] + # np.array/list "forget" the name of B + names = ["A", None if box in [np.array, list, tuple, iter] else "B"] + + result = df.set_index(keys, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, only A is dropped, if at all + expected = df.set_index(["A", "B"], drop=False, append=append) + expected = expected.drop("A", axis=1) if drop else expected + expected.index.names = [index_name] + names if append else names + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # We also emulate a "constructor" for the label -> lambda + # also test index name if append=True (name is duplicate here for A) + @pytest.mark.parametrize( + "box2", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "box1", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "A"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays_duplicate( + self, frame_of_index_cols, drop, append, index_name, box1, box2 + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = [box1(df["A"]), box2(df["A"])] + result = df.set_index(keys, drop=drop, append=append) + + # if either box is iter, it has been consumed; re-read + keys = [box1(df["A"]), box2(df["A"])] + + # need to adapt first drop for case that both keys are 'A' -- + # cannot drop the same column twice; + # plain == would give ambiguous Boolean error for containers + first_drop = ( + False + if ( + isinstance(keys[0], str) + and keys[0] == "A" + and isinstance(keys[1], str) + and keys[1] == "A" + ) + else drop + ) + # to test against already-tested behaviour, we add sequentially, + # hence second append always True; must wrap keys in list, otherwise + # box = list would be interpreted as keys + expected = df.set_index([keys[0]], drop=first_drop, append=append) + expected = expected.set_index([keys[1]], drop=drop, append=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + keys = MultiIndex.from_arrays([df["A"], df["B"]], names=["A", "B"]) + + result = df.set_index(keys, drop=drop, append=append) + + # setting with a MultiIndex will never drop columns + expected = df.set_index(["A", "B"], drop=False, append=append) + + tm.assert_frame_equal(result, expected) + + def test_construction_with_categorical_index(self): + ci = tm.makeCategoricalIndex(10) + ci.name = "B" + + # with Categorical + df = DataFrame({"A": np.random.randn(10), "B": ci.values}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # from a CategoricalIndex + df = DataFrame({"A": np.random.randn(10), "B": ci}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # round-trip + idf = idf.reset_index().set_index("B") + tm.assert_index_equal(idf.index, ci) + + def test_set_index_preserve_categorical_dtype(self): + # GH#13743, GH#13854 + df = DataFrame( + { + "A": [1, 2, 1, 1, 2], + "B": [10, 16, 22, 28, 34], + "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False), + "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True), + } + ) + for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]: + result = df.set_index(cols).reset_index() + result = result.reindex(columns=df.columns) + tm.assert_frame_equal(result, df) + + def test_set_index_datetime(self): + # GH#3950 + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + "value": range(6), + } + ) + df.index = to_datetime(df.pop("datetime"), utc=True) + df.index = df.index.tz_convert("US/Pacific") + + expected = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + name="datetime", + ) + expected = expected.tz_localize("UTC").tz_convert("US/Pacific") + + df = df.set_index("label", append=True) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) + assert df.index.names == ["datetime", "label"] + + df = df.swaplevel(0, 1) + tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) + tm.assert_index_equal(df.index.levels[1], expected) + assert df.index.names == ["label", "datetime"] + + df = DataFrame(np.random.random(6)) + idx1 = DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Eastern", + ) + idx2 = DatetimeIndex( + [ + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + ], + tz="US/Eastern", + ) + idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") + idx3 = idx3._with_freq(None) + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Eastern", + ) + expected2 = DatetimeIndex( + ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" + ) + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + # GH#7092 + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + def test_set_index_period(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011-01-01", periods=3, freq="M") + idx1 = idx1.append(idx1) + idx2 = period_range("2013-01-01 09:00", periods=2, freq="H") + idx2 = idx2.append(idx2).append(idx2) + idx3 = period_range("2005", periods=6, freq="A") + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = period_range("2011-01-01", periods=3, freq="M") + expected2 = period_range("2013-01-01 09:00", periods=2, freq="H") + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + +class TestSetIndexInvalid: + def test_set_index_verify_integrity(self, frame_of_index_cols): + df = frame_of_index_cols + + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index("A", verify_integrity=True) + # with MultiIndex + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index([df["A"], df["A"]], verify_integrity=True) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + + with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): + # column names are A-E, as well as one tuple + df.set_index(["foo", "bar", "baz"], drop=drop, append=append) + + # non-existent key in list with arrays + with pytest.raises(KeyError, match="X"): + df.set_index([df["A"], df["B"], "X"], drop=drop, append=append) + + msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]" + # tuples always raise KeyError + with pytest.raises(KeyError, match=msg): + df.set_index(tuple(df["A"]), drop=drop, append=append) + + # also within a list + with pytest.raises(KeyError, match=msg): + df.set_index(["A", df["A"], tuple(df["A"])], drop=drop, append=append) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + @pytest.mark.parametrize("box", [set], ids=["set"]) + def test_set_index_raise_on_type(self, frame_of_index_cols, box, drop, append): + df = frame_of_index_cols + + msg = 'The parameter "keys" may be a column key, .*' + # forbidden type, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(box(df["A"]), drop=drop, append=append) + + # forbidden type in list, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(["A", df["A"], box(df["A"])], drop=drop, append=append) + + # MultiIndex constructor does not work directly on Series -> lambda + @pytest.mark.parametrize( + "box", + [Series, Index, np.array, iter, lambda x: MultiIndex.from_arrays([x])], + ids=["Series", "Index", "np.array", "iter", "MultiIndex"], + ) + @pytest.mark.parametrize("length", [4, 6], ids=["too_short", "too_long"]) + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_on_len( + self, frame_of_index_cols, box, length, drop, append + ): + # GH 24984 + df = frame_of_index_cols # has length 5 + + values = np.random.randint(0, 10, (length,)) + + msg = "Length mismatch: Expected 5 rows, received array of length.*" + + # wrong length directly + with pytest.raises(ValueError, match=msg): + df.set_index(box(values), drop=drop, append=append) + + # wrong length in list + with pytest.raises(ValueError, match=msg): + df.set_index(["A", df.A, box(values)], drop=drop, append=append) + + +class TestSetIndexCustomLabelType: + def test_set_index_custom_label_type(self): + # GH#24969 + + class Thing: + def __init__(self, name, color) -> None: + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + # necessary for pretty KeyError + __repr__ = __str__ + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing("Three", "pink") + msg = "" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_hashable_iterable(self): + # GH#24969 + + # actual example discussed in GH 24984 was e.g. for shapely.geometry + # objects (e.g. a collection of Points) that can be both hashable and + # iterable; using frozenset as a stand-in for testing here + + class Thing(frozenset): + # need to stabilize repr for KeyError (due to random order in sets) + def __repr__(self) -> str: + tmp = sorted(self) + joined_reprs = ", ".join(map(repr, tmp)) + # double curly brace prints one brace in format string + return f"frozenset({{{joined_reprs}}})" + + thing1 = Thing(["One", "red"]) + thing2 = Thing(["Two", "blue"]) + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing(["Three", "pink"]) + msg = r"frozenset\(\{'Three', 'pink'\}\)" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_type_raises(self): + # GH#24969 + + # purposefully inherit from something unhashable + class Thing(set): + def __init__(self, name, color) -> None: + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) + + msg = 'The parameter "keys" may be a column key, .*' + + with pytest.raises(TypeError, match=msg): + # use custom label directly + df.set_index(thing2) + + with pytest.raises(TypeError, match=msg): + # custom label wrapped in list + df.set_index([thing2]) + + def test_set_index_periodindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011/01/01", periods=6, freq="M") + idx2 = period_range("2013", periods=6, freq="A") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.set_index(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_drop_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.set_index " + r"except for the argument 'keys' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.set_index("a", True) + expected = DataFrame(index=Index([1, 2, 3], name="a")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py new file mode 100644 index 00000000..9b4dcf58 --- /dev/null +++ b/pandas/tests/frame/methods/test_shift.py @@ -0,0 +1,691 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + NaT, + Series, + date_range, + offsets, +) +import pandas._testing as tm + + +class TestDataFrameShift: + @pytest.mark.parametrize( + "input_data, output_data", + [(np.empty(shape=(0,)), []), (np.ones(shape=(2,)), [np.nan, 1.0])], + ) + def test_shift_non_writable_array(self, input_data, output_data, frame_or_series): + # GH21049 Verify whether non writable numpy array is shiftable + input_data.setflags(write=False) + + result = frame_or_series(input_data).shift(1) + if frame_or_series is not Series: + # need to explicitly specify columns in the empty case + expected = frame_or_series( + output_data, + index=range(len(output_data)), + columns=range(1), + dtype="float64", + ) + else: + expected = frame_or_series(output_data, dtype="float64") + + tm.assert_equal(result, expected) + + def test_shift_mismatched_freq(self, frame_or_series): + ts = frame_or_series( + np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + ) + + result = ts.shift(1, freq="5T") + exp_index = ts.index.shift(1, freq="5T") + tm.assert_index_equal(result.index, exp_index) + + # GH#1063, multiple of same base + result = ts.shift(1, freq="4H") + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + @pytest.mark.parametrize( + "obj", + [ + Series([np.arange(5)]), + date_range("1/1/2011", periods=24, freq="H"), + Series(range(5), index=date_range("2017", periods=5)), + ], + ) + @pytest.mark.parametrize("shift_size", [0, 1, 2]) + def test_shift_always_copy(self, obj, shift_size, frame_or_series): + # GH#22397 + if frame_or_series is not Series: + obj = obj.to_frame() + assert obj.shift(shift_size) is not obj + + def test_shift_object_non_scalar_fill(self): + # shift requires scalar fill_value except for object dtype + ser = Series(range(3)) + with pytest.raises(ValueError, match="fill_value must be a scalar"): + ser.shift(1, fill_value=[]) + + df = ser.to_frame() + with pytest.raises(ValueError, match="fill_value must be a scalar"): + df.shift(1, fill_value=np.arange(3)) + + obj_ser = ser.astype(object) + result = obj_ser.shift(1, fill_value={}) + assert result[0] == {} + + obj_df = obj_ser.to_frame() + result = obj_df.shift(1, fill_value={}) + assert result.iloc[0, 0] == {} + + def test_shift_int(self, datetime_frame, frame_or_series): + ts = tm.get_obj(datetime_frame, frame_or_series).astype(int) + shifted = ts.shift(1) + expected = ts.astype(float).shift(1) + tm.assert_equal(shifted, expected) + + @pytest.mark.parametrize("dtype", ["int32", "int64"]) + def test_shift_32bit_take(self, frame_or_series, dtype): + # 32-bit taking + # GH#8129 + index = date_range("2000-01-01", periods=5) + arr = np.arange(5, dtype=dtype) + s1 = frame_or_series(arr, index=index) + p = arr[1] + result = s1.shift(periods=p) + expected = frame_or_series([np.nan, 0, 1, 2, 3], index=index) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("periods", [1, 2, 3, 4]) + def test_shift_preserve_freqstr(self, periods, frame_or_series): + # GH#21275 + obj = frame_or_series( + range(periods), + index=date_range("2016-1-1 00:00:00", periods=periods, freq="H"), + ) + + result = obj.shift(1, "2H") + + expected = frame_or_series( + range(periods), + index=date_range("2016-1-1 02:00:00", periods=periods, freq="H"), + ) + tm.assert_equal(result, expected) + + def test_shift_dst(self, frame_or_series): + # GH#13926 + dates = date_range("2016-11-06", freq="H", periods=10, tz="US/Eastern") + obj = frame_or_series(dates) + + res = obj.shift(0) + tm.assert_equal(res, obj) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + res = obj.shift(1) + exp_vals = [NaT] + dates.astype(object).values.tolist()[:9] + exp = frame_or_series(exp_vals) + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + res = obj.shift(-2) + exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT] + exp = frame_or_series(exp_vals) + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + @pytest.mark.parametrize("ex", [10, -10, 20, -20]) + def test_shift_dst_beyond(self, frame_or_series, ex): + # GH#13926 + dates = date_range("2016-11-06", freq="H", periods=10, tz="US/Eastern") + obj = frame_or_series(dates) + res = obj.shift(ex) + exp = frame_or_series([NaT] * 10, dtype="datetime64[ns, US/Eastern]") + tm.assert_equal(res, exp) + assert tm.get_dtype(res) == "datetime64[ns, US/Eastern]" + + def test_shift_by_zero(self, datetime_frame, frame_or_series): + # shift by 0 + obj = tm.get_obj(datetime_frame, frame_or_series) + unshifted = obj.shift(0) + tm.assert_equal(unshifted, obj) + + def test_shift(self, datetime_frame): + # naive shift + ser = datetime_frame["A"] + + shifted = datetime_frame.shift(5) + tm.assert_index_equal(shifted.index, datetime_frame.index) + + shifted_ser = ser.shift(5) + tm.assert_series_equal(shifted["A"], shifted_ser) + + shifted = datetime_frame.shift(-5) + tm.assert_index_equal(shifted.index, datetime_frame.index) + + shifted_ser = ser.shift(-5) + tm.assert_series_equal(shifted["A"], shifted_ser) + + unshifted = datetime_frame.shift(5).shift(-5) + tm.assert_numpy_array_equal( + unshifted.dropna().values, datetime_frame.values[:-5] + ) + + unshifted_ser = ser.shift(5).shift(-5) + tm.assert_numpy_array_equal(unshifted_ser.dropna().values, ser.values[:-5]) + + def test_shift_by_offset(self, datetime_frame, frame_or_series): + # shift by DateOffset + obj = tm.get_obj(datetime_frame, frame_or_series) + offset = offsets.BDay() + + shifted = obj.shift(5, freq=offset) + assert len(shifted) == len(obj) + unshifted = shifted.shift(-5, freq=offset) + tm.assert_equal(unshifted, obj) + + shifted2 = obj.shift(5, freq="B") + tm.assert_equal(shifted, shifted2) + + unshifted = obj.shift(0, freq=offset) + tm.assert_equal(unshifted, obj) + + d = obj.index[0] + shifted_d = d + offset * 5 + if frame_or_series is DataFrame: + tm.assert_series_equal(obj.xs(d), shifted.xs(shifted_d), check_names=False) + else: + tm.assert_almost_equal(obj.at[d], shifted.at[shifted_d]) + + def test_shift_with_periodindex(self, frame_or_series): + # Shifting with PeriodIndex + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + + shifted = ps.shift(1) + unshifted = shifted.shift(-1) + tm.assert_index_equal(shifted.index, ps.index) + tm.assert_index_equal(unshifted.index, ps.index) + if frame_or_series is DataFrame: + tm.assert_numpy_array_equal( + unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values + ) + else: + tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) + + shifted2 = ps.shift(1, "B") + shifted3 = ps.shift(1, offsets.BDay()) + tm.assert_equal(shifted2, shifted3) + tm.assert_equal(ps, shifted2.shift(-1, "B")) + + msg = "does not match PeriodIndex freq" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="D") + + # legacy support + shifted4 = ps.shift(1, freq="B") + tm.assert_equal(shifted2, shifted4) + + shifted5 = ps.shift(1, freq=offsets.BDay()) + tm.assert_equal(shifted5, shifted4) + + def test_shift_other_axis(self): + # shift other axis + # GH#6371 + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis=1) + tm.assert_frame_equal(result, expected) + + def test_shift_named_axis(self): + # shift named axis + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis="columns") + tm.assert_frame_equal(result, expected) + + def test_shift_other_axis_with_freq(self, datetime_frame): + obj = datetime_frame.T + offset = offsets.BDay() + + # GH#47039 + shifted = obj.shift(5, freq=offset, axis=1) + assert len(shifted) == len(obj) + unshifted = shifted.shift(-5, freq=offset, axis=1) + tm.assert_equal(unshifted, obj) + + def test_shift_bool(self): + df = DataFrame({"high": [True, False], "low": [False, False]}) + rs = df.shift(1) + xp = DataFrame( + np.array([[np.nan, np.nan], [True, False]], dtype=object), + columns=["high", "low"], + ) + tm.assert_frame_equal(rs, xp) + + def test_shift_categorical1(self, frame_or_series): + # GH#9416 + obj = frame_or_series(["a", "b", "c", "d"], dtype="category") + + rt = obj.shift(1).shift(-1) + tm.assert_equal(obj.iloc[:-1], rt.dropna()) + + def get_cat_values(ndframe): + # For Series we could just do ._values; for DataFrame + # we may be able to do this if we ever have 2D Categoricals + return ndframe._mgr.arrays[0] + + cat = get_cat_values(obj) + + sp1 = obj.shift(1) + tm.assert_index_equal(obj.index, sp1.index) + assert np.all(get_cat_values(sp1).codes[:1] == -1) + assert np.all(cat.codes[:-1] == get_cat_values(sp1).codes[1:]) + + sn2 = obj.shift(-2) + tm.assert_index_equal(obj.index, sn2.index) + assert np.all(get_cat_values(sn2).codes[-2:] == -1) + assert np.all(cat.codes[2:] == get_cat_values(sn2).codes[:-2]) + + tm.assert_index_equal(cat.categories, get_cat_values(sp1).categories) + tm.assert_index_equal(cat.categories, get_cat_values(sn2).categories) + + def test_shift_categorical(self): + # GH#9416 + s1 = Series(["a", "b", "c"], dtype="category") + s2 = Series(["A", "B", "C"], dtype="category") + df = DataFrame({"one": s1, "two": s2}) + rs = df.shift(1) + xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)}) + tm.assert_frame_equal(rs, xp) + + def test_shift_categorical_fill_value(self, frame_or_series): + ts = frame_or_series(["a", "b", "c", "d"], dtype="category") + res = ts.shift(1, fill_value="a") + expected = frame_or_series( + pd.Categorical( + ["a", "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + ) + tm.assert_equal(res, expected) + + # check for incorrect fill_value + msg = r"Cannot setitem on a Categorical with a new category \(f\)" + with pytest.raises(TypeError, match=msg): + ts.shift(1, fill_value="f") + + def test_shift_fill_value(self, frame_or_series): + # GH#24128 + dti = date_range("1/1/2000", periods=5, freq="H") + + ts = frame_or_series([1.0, 2.0, 3.0, 4.0, 5.0], index=dti) + exp = frame_or_series([0.0, 1.0, 2.0, 3.0, 4.0], index=dti) + # check that fill value works + result = ts.shift(1, fill_value=0.0) + tm.assert_equal(result, exp) + + exp = frame_or_series([0.0, 0.0, 1.0, 2.0, 3.0], index=dti) + result = ts.shift(2, fill_value=0.0) + tm.assert_equal(result, exp) + + ts = frame_or_series([1, 2, 3]) + res = ts.shift(2, fill_value=0) + assert tm.get_dtype(res) == tm.get_dtype(ts) + + # retain integer dtype + obj = frame_or_series([1, 2, 3, 4, 5], index=dti) + exp = frame_or_series([0, 1, 2, 3, 4], index=dti) + result = obj.shift(1, fill_value=0) + tm.assert_equal(result, exp) + + exp = frame_or_series([0, 0, 1, 2, 3], index=dti) + result = obj.shift(2, fill_value=0) + tm.assert_equal(result, exp) + + def test_shift_empty(self): + # Regression test for GH#8019 + df = DataFrame({"foo": []}) + rs = df.shift(-1) + + tm.assert_frame_equal(df, rs) + + def test_shift_duplicate_columns(self, using_array_manager): + # GH#9092; verify that position-based shifting works + # in the presence of duplicate columns + column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] + data = np.random.randn(20, 5) + + warn = None + if using_array_manager: + warn = DeprecationWarning + + shifted = [] + for columns in column_lists: + df = DataFrame(data.copy(), columns=columns) + for s in range(5): + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.columns = range(5) + shifted.append(df) + + # sanity check the base case + nulls = shifted[0].isna().sum() + tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64")) + + # check all answers are the same + tm.assert_frame_equal(shifted[0], shifted[1]) + tm.assert_frame_equal(shifted[0], shifted[2]) + + def test_shift_axis1_multiple_blocks(self, using_array_manager): + # GH#35488 + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1, df2], axis=1) + if not using_array_manager: + assert len(df3._mgr.blocks) == 2 + + result = df3.shift(2, axis=1) + + expected = df3.take([-1, -1, 0, 1, 2], axis=1) + expected.iloc[:, :2] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + # rebuild df3 because `take` call above consolidated + df3 = pd.concat([df1, df2], axis=1) + if not using_array_manager: + assert len(df3._mgr.blocks) == 2 + result = df3.shift(-2, axis=1) + + expected = df3.take([2, 3, 4, -1, -1], axis=1) + expected.iloc[:, -2:] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support + def test_shift_axis1_multiple_blocks_with_int_fill(self): + # GH#42719 + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([-1, -1, 0, 1], axis=1) + expected.iloc[:, :2] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(-2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([2, 3, -1, -1], axis=1) + expected.iloc[:, -2:] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") + def test_tshift(self, datetime_frame, frame_or_series): + # TODO(2.0): remove this test when tshift deprecation is enforced + + # PeriodIndex + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_equal(unshifted, ps) + + shifted2 = ps.tshift(freq="B") + tm.assert_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=offsets.BDay()) + tm.assert_equal(shifted, shifted3) + + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.tshift(freq="M") + + # DatetimeIndex + dtobj = tm.get_obj(datetime_frame, frame_or_series) + shifted = dtobj.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_equal(dtobj, unshifted) + + shifted2 = dtobj.tshift(freq=dtobj.index.freq) + tm.assert_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + inferred_ts = tm.get_obj(inferred_ts, frame_or_series) + shifted = inferred_ts.tshift(1) + + expected = dtobj.tshift(1) + expected.index = expected.index._with_freq(None) + tm.assert_equal(shifted, expected) + + unshifted = shifted.tshift(-1) + tm.assert_equal(unshifted, inferred_ts) + + no_freq = dtobj.iloc[[0, 5, 7]] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.tshift() + + def test_tshift_deprecated(self, datetime_frame, frame_or_series): + # GH#11631 + dtobj = tm.get_obj(datetime_frame, frame_or_series) + with tm.assert_produces_warning(FutureWarning): + dtobj.tshift() + + def test_period_index_frame_shift_with_freq(self, frame_or_series): + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + + shifted = ps.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(unshifted, ps) + + shifted2 = ps.shift(freq="B") + tm.assert_equal(shifted, shifted2) + + shifted3 = ps.shift(freq=offsets.BDay()) + tm.assert_equal(shifted, shifted3) + + def test_datetime_frame_shift_with_freq(self, datetime_frame, frame_or_series): + dtobj = tm.get_obj(datetime_frame, frame_or_series) + shifted = dtobj.shift(1, freq="infer") + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(dtobj, unshifted) + + shifted2 = dtobj.shift(freq=dtobj.index.freq) + tm.assert_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + inferred_ts = tm.get_obj(inferred_ts, frame_or_series) + shifted = inferred_ts.shift(1, freq="infer") + expected = dtobj.shift(1, freq="infer") + expected.index = expected.index._with_freq(None) + tm.assert_equal(shifted, expected) + + unshifted = shifted.shift(-1, freq="infer") + tm.assert_equal(unshifted, inferred_ts) + + def test_period_index_frame_shift_with_freq_error(self, frame_or_series): + ps = tm.makePeriodFrame() + ps = tm.get_obj(ps, frame_or_series) + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="M") + + def test_datetime_frame_shift_with_freq_error( + self, datetime_frame, frame_or_series + ): + dtobj = tm.get_obj(datetime_frame, frame_or_series) + no_freq = dtobj.iloc[[0, 5, 7]] + msg = "Freq was not set in the index hence cannot be inferred" + with pytest.raises(ValueError, match=msg): + no_freq.shift(freq="infer") + + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + + with tm.assert_produces_warning(FutureWarning): + result = ser.shift(1, fill_value=0) + expected = Series([pd.Timestamp(0), ser[0]]) + tm.assert_series_equal(result, expected) + + df = ser.to_frame() + with tm.assert_produces_warning(FutureWarning): + result = df.shift(1, fill_value=0) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + # axis = 1 + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + with tm.assert_produces_warning(FutureWarning): + result = df2.shift(1, axis=1, fill_value=0) + + expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + # This isn't great that we get different behavior, but + # that will go away when the deprecation is enforced + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(1, axis=1, fill_value=0) + expected = DataFrame({"A": [0, 0], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "as_cat", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + False, + ], + ) + @pytest.mark.parametrize( + "vals", + [ + date_range("2020-01-01", periods=2), + date_range("2020-01-01", periods=2, tz="US/Pacific"), + pd.period_range("2020-01-01", periods=2, freq="D"), + pd.timedelta_range("2020 Days", periods=2, freq="D"), + pd.interval_range(0, 3, periods=2), + pytest.param( + pd.array([1, 2], dtype="Int64"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + pytest.param( + pd.array([1, 2], dtype="Float32"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + ], + ids=lambda x: str(x.dtype), + ) + # TODO(2.0): remove filtering + @pytest.mark.filterwarnings("ignore:Index.ravel.*:FutureWarning") + def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat): + # GH#44564 + ser = Series(vals) + if as_cat: + ser = ser.astype("category") + + df = DataFrame({"A": ser}) + result = df.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but multiple blocks + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + result = df2.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": df2["B"], "B": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(-1, axis=1, fill_value="foo") + tm.assert_frame_equal(result, expected) + + def test_shift_axis1_categorical_columns(self): + # GH#38434 + ci = CategoricalIndex(["a", "b", "c"]) + df = DataFrame( + {"a": [1, 3], "b": [2, 4], "c": [5, 6]}, index=ci[:-1], columns=ci + ) + result = df.shift(axis=1) + + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [1, 3], "c": [2, 4]}, index=ci[:-1], columns=ci + ) + tm.assert_frame_equal(result, expected) + + # periods != 1 + result = df.shift(2, axis=1) + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 3]}, + index=ci[:-1], + columns=ci, + ) + tm.assert_frame_equal(result, expected) + + def test_shift_axis1_many_periods(self): + # GH#44978 periods > len(columns) + df = DataFrame(np.random.rand(5, 3)) + shifted = df.shift(6, axis=1, fill_value=None) + + expected = df * np.nan + tm.assert_frame_equal(shifted, expected) + + shifted2 = df.shift(-6, axis=1, fill_value=None) + tm.assert_frame_equal(shifted2, expected) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py new file mode 100644 index 00000000..5d1cc3d4 --- /dev/null +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -0,0 +1,927 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalDtype, + CategoricalIndex, + DataFrame, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDataFrameSortIndex: + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame( + {"value": [1, 2, 3, 4]}, + index=MultiIndex( + levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + assert df.index._is_lexsorted() + assert not df.index.is_monotonic_increasing + + # sort it + expected = DataFrame( + {"value": [2, 1, 4, 3]}, + index=MultiIndex( + levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = df.sort_index() + assert result.index.is_monotonic_increasing + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._sort_levels_monotonic() + assert result.index.is_monotonic_increasing + tm.assert_frame_equal(result, expected) + + # FIXME: the FutureWarning is issued on a setitem-with-expansion + # which will *not* change behavior, so should not get a warning. + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") + def test_sort_index_non_existent_label_multiindex(self): + # GH#12261 + df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []])) + df.loc["b", "2"] = 1 + df.loc["a", "3"] = 1 + result = df.sort_index().index.is_monotonic_increasing + assert result is True + + def test_sort_index_reorder_on_ops(self): + # GH#15687 + df = DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["red", "blu"]], + names=["letter", "size", "color"], + ), + columns=["near", "far"], + ) + df = df.sort_index() + + def my_func(group): + group.index = ["newz", "newa"] + return group + + result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index() + expected = MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["newa", "newz"]], + names=["letter", "size", None], + ) + + tm.assert_index_equal(result.index, expected) + + def test_sort_index_nan_multiindex(self): + # GH#14784 + # incorrect sorting w.r.t. nans + tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] + mi = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD")) + s = Series(np.arange(4), index=mi) + + df2 = DataFrame( + { + "date": pd.DatetimeIndex( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + np.nan, + 280, + 259, + np.nan, + 623, + 90, + 312, + np.nan, + 301, + 359, + 801, + ], + "cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12], + } + ).set_index(["date", "user_id"]) + + # sorting frame, default nan position is last + result = df.sort_index() + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position last + result = df.sort_index(na_position="last") + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position first + result = df.sort_index(na_position="first") + expected = df.iloc[[1, 2, 3, 0], :] + tm.assert_frame_equal(result, expected) + + # sorting frame with removed rows + result = df2.dropna().sort_index() + expected = df2.sort_index().dropna() + tm.assert_frame_equal(result, expected) + + # sorting series, default nan position is last + result = s.sort_index() + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position last + result = s.sort_index(na_position="last") + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position first + result = s.sort_index(na_position="first") + expected = s.iloc[[1, 2, 3, 0]] + tm.assert_series_equal(result, expected) + + def test_sort_index_nan(self): + # GH#3917 + + # Test DataFrame with nan label + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + + # NaN label, ascending=True, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last") + expected = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=True, na_position='first' + sorted_df = df.sort_index(na_position="first") + expected = DataFrame( + {"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]}, + index=[np.nan, 1, 2, 3, 4, 5, 6], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=False) + expected = DataFrame( + {"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]}, + index=[6, 5, 4, 3, 2, 1, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='first' + sorted_df = df.sort_index( + kind="quicksort", ascending=False, na_position="first" + ) + expected = DataFrame( + {"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]}, + index=[np.nan, 6, 5, 4, 3, 2, 1], + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_index_multi_index(self): + # GH#25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} + ) + result = df.set_index(list("abc")).sort_index(level=list("ba")) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} + ) + expected = expected.set_index(list("abc")) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + # axis=0 + unordered = frame.loc[[3, 2, 4, 1]] + a_values = unordered["A"] + df = unordered.copy() + return_value = df.sort_index(inplace=True) + assert return_value is None + expected = frame + tm.assert_frame_equal(df, expected) + # GH 44153 related + # Used to be a_id != id(df["A"]), but flaky in the CI + assert a_values is not df["A"] + + df = unordered.copy() + return_value = df.sort_index(ascending=False, inplace=True) + assert return_value is None + expected = frame[::-1] + tm.assert_frame_equal(df, expected) + + # axis=1 + unordered = frame.loc[:, ["D", "B", "C", "A"]] + df = unordered.copy() + return_value = df.sort_index(axis=1, inplace=True) + assert return_value is None + expected = frame + tm.assert_frame_equal(df, expected) + + df = unordered.copy() + return_value = df.sort_index(axis=1, ascending=False, inplace=True) + assert return_value is None + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(df, expected) + + def test_sort_index_different_sortorder(self): + A = np.arange(20).repeat(5) + B = np.tile(np.arange(5), 20) + + indexer = np.random.permutation(100) + A = A.take(indexer) + B = B.take(indexer) + + df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) + expected = df.take(ex_indexer) + + # test with multiindex, too + idf = df.set_index(["A", "B"]) + + result = idf.sort_index(ascending=[1, 0]) + expected = idf.take(ex_indexer) + tm.assert_frame_equal(result, expected) + + # also, Series! + result = idf["C"].sort_index(ascending=[1, 0]) + tm.assert_series_equal(result, expected["C"]) + + def test_sort_index_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + df = DataFrame([[1, 2], [3, 4]], mi) + + result = df.sort_index(level="A", sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["A", "B"], sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + # Error thrown by sort_index when + # first index is sorted last (GH#26053) + result = df.sort_index(level=["C", "B", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["B", "C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + def test_sort_index_categorical_index(self): + + df = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), + } + ).set_index("B") + + result = df.sort_index() + expected = df.iloc[[4, 0, 1, 5, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(ascending=False) + expected = df.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_frame_equal(result, expected) + + def test_sort_index(self): + # GH#13496 + + frame = DataFrame( + np.arange(16).reshape(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], + ) + + # axis=0 : sort rows by index labels + unordered = frame.loc[[3, 2, 4, 1]] + result = unordered.sort_index(axis=0) + expected = frame + tm.assert_frame_equal(result, expected) + + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + tm.assert_frame_equal(result, expected) + + # axis=1 : sort columns by column names + unordered = frame.iloc[:, [2, 1, 3, 0]] + result = unordered.sort_index(axis=1) + tm.assert_frame_equal(result, frame) + + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 + def test_sort_index_multiindex(self, level): + # GH#13496 + + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples( + [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") + ) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) + + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") + ) + expected = DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) + result = df.sort_index(level=level) + tm.assert_frame_equal(result, expected) + + # sort_remaining=False + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") + ) + expected = DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) + result = df.sort_index(level=level, sort_remaining=False) + tm.assert_frame_equal(result, expected) + + def test_sort_index_intervalindex(self): + # this is a de-facto sort via unstack + # confirming that we sort in the order of the bins + y = Series(np.random.randn(100)) + x1 = Series(np.sign(np.random.randn(100))) + x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) + model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) + + result = model.groupby(["X1", "X2"], observed=True).mean().unstack() + expected = IntervalIndex.from_tuples( + [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" + ) + result = result.columns.levels[1].categories + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), + ], + ) + def test_sort_index_ignore_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114 + original_index = [2, 5, 3] + df = DataFrame(original_dict, index=original_index) + expected_df = DataFrame(sorted_dict, index=output_index) + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("ignore_index", [True, False]) + def test_respect_ignore_index(self, inplace, ignore_index): + # GH 43591 + df = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2)) + result = df.sort_index( + ascending=False, ignore_index=ignore_index, inplace=inplace + ) + + if inplace: + result = df + if ignore_index: + expected = DataFrame({"a": [1, 2, 3]}) + else: + expected = DataFrame({"a": [1, 2, 3]}, index=RangeIndex(4, -1, -2)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + False, + MultiIndex.from_tuples([(2, 1), (3, 4)], names=list("AB")), + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + False, + MultiIndex.from_tuples([(3, 4), (2, 1)], names=list("AB")), + ), + ], + ) + def test_sort_index_ignore_index_multi_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114, this is to test ignore_index on MulitIndex of index + mi = MultiIndex.from_tuples([(2, 1), (3, 4)], names=list("AB")) + df = DataFrame(original_dict, index=mi) + expected_df = DataFrame(sorted_dict, index=output_index) + + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) + + def test_sort_index_categorical_multiindex(self): + # GH#15058 + df = DataFrame( + { + "a": range(6), + "l1": pd.Categorical( + ["a", "a", "b", "b", "c", "c"], + categories=["c", "a", "b"], + ordered=True, + ), + "l2": [0, 1, 0, 1, 0, 1], + } + ) + result = df.set_index(["l1", "l2"]).sort_index() + expected = DataFrame( + [4, 5, 0, 1, 2, 3], + columns=["a"], + index=MultiIndex( + levels=[ + CategoricalIndex( + ["c", "a", "b"], + categories=["c", "a", "b"], + ordered=True, + name="l1", + dtype="category", + ), + [0, 1], + ], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["l1", "l2"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_sort_index_and_reconstruction(self): + + # GH#15622 + # lexsortedness should be identical + # across MultiIndex construction methods + + df = DataFrame([[1, 1], [2, 2]], index=list("ab")) + expected = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples( + [(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] + ), + ) + assert expected.index._is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), + ) + result = result.sort_index() + assert result.index.is_monotonic_increasing + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex( + levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = result.sort_index() + assert result.index._is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + assert result.index.is_monotonic_increasing + + tm.assert_frame_equal(result, expected) + + # GH#14015 + df = DataFrame( + [[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, "20160811 12:00:00"), (0, "20160809 12:00:00")], + names=["l1", "Date"], + ), + ) + + df.columns = df.columns.set_levels( + pd.to_datetime(df.columns.levels[1]), level=1 + ) + assert not df.columns.is_monotonic_increasing + result = df.sort_index(axis=1) + assert result.columns.is_monotonic_increasing + result = df.sort_index(axis=1, level=1) + assert result.columns.is_monotonic_increasing + + # TODO: better name, de-duplicate with test_sort_index_level above + def test_sort_index_level2(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + df = frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = frame["A"].sort_index(level=0) + + # preserve names + assert a_sorted.index.names == frame.index.names + + # inplace + rs = frame.copy() + return_value = rs.sort_index(level=0, inplace=True) + assert return_value is None + tm.assert_frame_equal(rs, frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # GH#2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000).astype("int64"), index=index) + + # it works! + result = df.sort_index(level=0) + assert result.index._lexsort_depth == 3 + + # GH#2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000).astype("int32"), index=index) + + # it works! + result = df.sort_index(level=0) + assert (result.dtypes.values == df.dtypes.values).all() + assert result.index._lexsort_depth == 3 + + def test_sort_index_level_by_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame.index.names = ["first", "second"] + result = frame.sort_index(level="second") + expected = frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + sorted_before = frame.sort_index(level=1) + + df = frame.copy() + df["foo"] = "bar" + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1)) + + dft = frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft["foo", "three"] = "bar" + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal( + sorted_before.drop([("foo", "three")], axis=1), + sorted_after.drop([("foo", "three")], axis=1), + ) + + def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.sort_index() + assert result.index.names == frame.index.names + + @pytest.mark.parametrize( + "gen,extra", + [ + ([1.0, 3.0, 2.0, 5.0], 4.0), + ([1, 3, 2, 5], 4), + ( + [ + Timestamp("20130101"), + Timestamp("20130103"), + Timestamp("20130102"), + Timestamp("20130105"), + ], + Timestamp("20130104"), + ), + (["1one", "3one", "2one", "5one"], "4one"), + ], + ) + def test_sort_index_multilevel_repr_8017(self, gen, extra): + + np.random.seed(0) + data = np.random.randn(3, 4) + + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "categories", + [ + pytest.param(["a", "b", "c"], id="str"), + pytest.param( + [pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)], + id="pd.Interval", + ), + ], + ) + def test_sort_index_with_categories(self, categories): + # GH#23452 + df = DataFrame( + {"foo": range(len(categories))}, + index=CategoricalIndex( + data=categories, categories=categories, ordered=True + ), + ) + df.index = df.index.reorder_categories(df.index.categories[::-1]) + result = df.sort_index() + expected = DataFrame( + {"foo": reversed(range(len(categories)))}, + index=CategoricalIndex( + data=categories[::-1], categories=categories[::-1], ordered=True + ), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "ascending", + [ + None, + [True, None], + [False, "True"], + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + # GH 39434 + df = DataFrame(np.arange(64)) + length = len(df.index) + df.index = [(i - length / 2) % length for i in range(length)] + match = 'For argument "ascending" expected type bool' + with pytest.raises(ValueError, match=match): + df.sort_index(axis=0, ascending=ascending, na_position="first") + + def test_sort_index_use_inf_as_na(self): + # GH 29687 + expected = DataFrame( + {"col1": [1, 2, 3], "col2": [3, 4, 5]}, + index=pd.date_range("2020", periods=3), + ) + with pd.option_context("mode.use_inf_as_na", True): + result = expected.sort_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "ascending", + [(True, False), [True, False]], + ) + def test_sort_index_ascending_tuple(self, ascending): + df = DataFrame( + { + "legs": [4, 2, 4, 2, 2], + }, + index=MultiIndex.from_tuples( + [ + ("mammal", "dog"), + ("bird", "duck"), + ("mammal", "horse"), + ("bird", "penguin"), + ("mammal", "kangaroo"), + ], + names=["class", "animal"], + ), + ) + + # parameter `ascending`` is a tuple + result = df.sort_index(level=(0, 1), ascending=ascending) + + expected = DataFrame( + { + "legs": [2, 2, 2, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bird", "penguin"), + ("bird", "duck"), + ("mammal", "kangaroo"), + ("mammal", "horse"), + ("mammal", "dog"), + ], + names=["class", "animal"], + ), + ) + + tm.assert_frame_equal(result, expected) + + +class TestDataFrameSortIndexKey: + def test_sort_multi_index_key(self): + # GH 25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} + ).set_index(list("abc")) + + result = df.sort_index(level=list("ac"), key=lambda x: x) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=list("ac"), key=lambda x: -x) + expected = DataFrame( + {"a": [3, 2, 1], "b": [0, 0, 0], "c": [0, 2, 1], "d": list("acb")} + ).set_index(list("abc")) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_key(self): # issue 27237 + df = DataFrame(np.arange(6, dtype="int64"), index=list("aaBBca")) + + result = df.sort_index() + expected = df.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: x.str.lower()) + expected = df.iloc[[0, 1, 5, 2, 3, 4]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: x.str.lower(), ascending=False) + expected = df.iloc[[4, 2, 3, 0, 1, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_index_key_int(self): + df = DataFrame(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64")) + + result = df.sort_index() + tm.assert_frame_equal(result, df) + + result = df.sort_index(key=lambda x: -x) + expected = df.sort_index(ascending=False) + tm.assert_frame_equal(result, expected) + + result = df.sort_index(key=lambda x: 2 * x) + tm.assert_frame_equal(result, df) + + def test_sort_multi_index_key_str(self): + # GH 25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": ["B", "a", "C"], "b": [0, 1, 0], "c": list("abc"), "d": [0, 1, 2]} + ).set_index(list("abc")) + + result = df.sort_index(level="a", key=lambda x: x.str.lower()) + + expected = DataFrame( + {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + result = df.sort_index( + level=list("abc"), # can refer to names + key=lambda x: x.str.lower() if x.name in ["a", "c"] else -x, + ) + + expected = DataFrame( + {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} + ).set_index(list("abc")) + tm.assert_frame_equal(result, expected) + + def test_changes_length_raises(self): + df = DataFrame({"A": [1, 2, 3]}) + with pytest.raises(ValueError, match="change the shape"): + df.sort_index(key=lambda x: x[:1]) + + def test_sort_index_multiindex_sparse_column(self): + # GH 29735, testing that sort_index on a multiindexed frame with sparse + # columns fills with 0. + expected = DataFrame( + { + i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)) + for i in range(0, 4) + }, + index=MultiIndex.from_product([[1, 2], [1, 2]]), + ) + + result = expected.sort_index(level=0) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame.sort_index " + r"will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.sort_index(1) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py new file mode 100644 index 00000000..51b59026 --- /dev/null +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -0,0 +1,893 @@ +import random + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + NaT, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameSortValues: + @pytest.mark.parametrize("dtype", [np.uint8, bool]) + def test_sort_values_sparse_no_warning(self, dtype): + # GH#45618 + # TODO(2.0): test will be unnecessary + ser = pd.Series(Categorical(["a", "b", "a"], categories=["a", "b", "c"])) + df = pd.get_dummies(ser, dtype=dtype, sparse=True) + + with tm.assert_produces_warning(None): + # No warnings about constructing Index from SparseArray + df.sort_values(by=df.columns.tolist()) + + def test_sort_values(self): + frame = DataFrame( + [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC") + ) + + # by column (axis=0) + sorted_df = frame.sort_values(by="A") + indexer = frame["A"].argsort().values + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + indexer = indexer[::-1] + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + # GH4839 + sorted_df = frame.sort_values(by=["A"], ascending=[False]) + tm.assert_frame_equal(sorted_df, expected) + + # multiple bys + sorted_df = frame.sort_values(by=["B", "C"]) + expected = frame.loc[[2, 1, 3]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=["B", "C"], ascending=False) + tm.assert_frame_equal(sorted_df, expected[::-1]) + + sorted_df = frame.sort_values(by=["B", "A"], ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=2, inplace=True) + + # by row (axis=1): GH#10806 + sorted_df = frame.sort_values(by=3, axis=1) + expected = frame + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=3, axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 2], axis="columns") + expected = frame.reindex(columns=["B", "A", "C"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + msg = r"Length of ascending \(5\) != length of by \(2\)" + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5) + + def test_sort_values_by_empty_list(self): + # https://github.com/pandas-dev/pandas/issues/40258 + expected = DataFrame({"a": [1, 4, 2, 5, 3, 6]}) + result = expected.sort_values(by=[]) + tm.assert_frame_equal(result, expected) + assert result is not expected + + def test_sort_values_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", inplace=True) + assert return_value is None + expected = frame.sort_values(by="A") + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by=1, axis=1, inplace=True) + assert return_value is None + expected = frame.sort_values(by=1, axis=1) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", ascending=False, inplace=True) + assert return_value is None + expected = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by=["A", "B"], ascending=False, inplace=True + ) + assert return_value is None + expected = frame.sort_values(by=["A", "B"], ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_multicolumn(self): + A = np.arange(5).repeat(20) + B = np.tile(np.arange(5), 20) + random.shuffle(A) + random.shuffle(B) + frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + result = frame.sort_values(by=["A", "B"]) + indexer = np.lexsort((frame["B"], frame["A"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["A", "B"], ascending=False) + indexer = np.lexsort( + (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) + ) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["B", "A"]) + indexer = np.lexsort((frame["A"], frame["B"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + def test_sort_values_multicolumn_uint64(self): + # GH#9918 + # uint64 multicolumn sort + + df = DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + } + ) + df["a"] = df["a"].astype(np.uint64) + result = df.sort_values(["a", "b"]) + + expected = DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + }, + index=pd.Index([1, 0]), + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nan(self): + # GH#3917 + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]} + ) + + # sort one column only + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + expected = DataFrame( + {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]}, + index=[2, 5, 4, 6, 1, 0, 3], + ) + sorted_df = df.sort_values(["A"], na_position="first", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + expected = df.reindex(columns=["B", "A"]) + sorted_df = df.sort_values(by=1, axis=1, na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', order + expected = DataFrame( + {"A": [1, 1, 2, 4, 6, 8, np.nan], "B": [2, 9, np.nan, 5, 5, 4, 5]}, + index=[3, 0, 1, 6, 4, 5, 2], + ) + sorted_df = df.sort_values(["A", "B"]) + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 2, 9, np.nan, 5, 5, 4]}, + index=[2, 3, 0, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', not order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[1, 0], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', not order + expected = DataFrame( + {"A": [8, 6, 4, 2, 1, 1, np.nan], "B": [4, 5, 5, np.nan, 2, 9, 5]}, + index=[5, 4, 6, 1, 3, 0, 2], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="last") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_descending_sort(self): + # GH#6399 + df = DataFrame( + [[2, "first"], [2, "second"], [1, "a"], [1, "b"]], + columns=["sort_col", "order"], + ) + sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False) + tm.assert_frame_equal(df, sorted_df) + + @pytest.mark.parametrize( + "expected_idx_non_na, ascending", + [ + [ + [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14], + [True, True], + ], + [ + [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9], + [True, False], + ], + [ + [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0], + [False, True], + ], + [ + [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5], + [False, False], + ], + ], + ) + @pytest.mark.parametrize("na_position", ["first", "last"]) + def test_sort_values_stable_multicolumn_sort( + self, expected_idx_non_na, ascending, na_position + ): + # GH#38426 Clarify sort_values with mult. columns / labels is stable + df = DataFrame( + { + "A": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8], + "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4], + } + ) + # All rows with NaN in col "B" only have unique values in "A", therefore, + # only the rows with NaNs in "A" have to be treated individually: + expected_idx = ( + [11, 12, 2] + expected_idx_non_na + if na_position == "first" + else expected_idx_non_na + [2, 11, 12] + ) + expected = df.take(expected_idx) + sorted_df = df.sort_values( + ["A", "B"], ascending=ascending, na_position=na_position + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_categorial(self): + # GH#16793 + df = DataFrame({"x": Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True)}) + expected = df.copy() + sorted_df = df.sort_values("x", kind="mergesort") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_datetimes(self): + + # GH#3461, argsort / lexsort differences for a datetime column + df = DataFrame( + ["a", "a", "a", "b", "c", "d", "e", "f", "g"], + columns=["A"], + index=date_range("20130101", periods=9), + ) + dts = [ + Timestamp(x) + for x in [ + "2004-02-11", + "2004-01-21", + "2004-01-26", + "2005-09-20", + "2010-10-04", + "2009-05-12", + "2008-11-12", + "2010-09-28", + "2010-09-28", + ] + ] + df["B"] = dts[::2] + dts[1::2] + df["C"] = 2.0 + df["A1"] = 3.0 + + df1 = df.sort_values(by="A") + df2 = df.sort_values(by=["A"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + df2 = df.sort_values(by=["B"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + + df2 = df.sort_values(by=["C", "B"]) + tm.assert_frame_equal(df1, df2) + + def test_sort_values_frame_column_inplace_sort_exception(self, float_frame): + s = float_frame["A"] + with pytest.raises(ValueError, match="This Series is a view"): + s.sort_values(inplace=True) + + cp = s.copy() + cp.sort_values() # it works! + + def test_sort_values_nat_values_in_int_column(self): + + # GH#14922: "sorting with large float and multiple columns incorrect" + + # cause was that the int64 value NaT was considered as "na". Which is + # only correct for datetime64 columns. + + int_values = (2, int(NaT.value)) + float_values = (2.0, -1.797693e308) + + df = DataFrame( + {"int": int_values, "float": float_values}, columns=["int", "float"] + ) + + df_reversed = DataFrame( + {"int": int_values[::-1], "float": float_values[::-1]}, + columns=["int", "float"], + index=[1, 0], + ) + + # NaT is not a "na" for int64 columns, so na_position must not + # influence the result: + df_sorted = df.sort_values(["int", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["int", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + # reverse sorting order + df_sorted = df.sort_values(["int", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + # and now check if NaT is still considered as "na" for datetime64 + # columns: + df = DataFrame( + {"datetime": [Timestamp("2016-01-01"), NaT], "float": float_values}, + columns=["datetime", "float"], + ) + + df_reversed = DataFrame( + {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]}, + columns=["datetime", "float"], + index=[1, 0], + ) + + df_sorted = df.sort_values(["datetime", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["datetime", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + def test_sort_nat(self): + # GH 16836 + + d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] + d2 = [ + Timestamp(x) + for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] + ] + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] + d4 = [ + Timestamp(x) + for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] + ] + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_na_position_with_categories(self): + # GH#22556 + # Positioning missing value properly when column is Categorical. + categories = ["A", "B", "C"] + category_indices = [0, 2, 4] + list_of_nans = [np.nan, np.nan] + na_indices = [1, 3] + na_position_first = "first" + na_position_last = "last" + column_name = "c" + + reversed_categories = sorted(categories, reverse=True) + reversed_category_indices = sorted(category_indices, reverse=True) + reversed_na_indices = sorted(na_indices) + + df = DataFrame( + { + column_name: Categorical( + ["A", np.nan, "B", np.nan, "C"], categories=categories, ordered=True + ) + } + ) + # sort ascending with na first + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + categories, categories=categories, ordered=True + ) + }, + index=na_indices + category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort ascending with na last + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + categories + list_of_nans, categories=categories, ordered=True + ) + }, + index=category_indices + na_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na first + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + reversed_categories, + categories=categories, + ordered=True, + ) + }, + index=reversed_na_indices + reversed_category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na last + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + reversed_categories + list_of_nans, + categories=categories, + ordered=True, + ) + }, + index=reversed_category_indices + reversed_na_indices, + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nat(self): + + # GH#16836 + + d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] + d2 = [ + Timestamp(x) + for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] + ] + df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] + d4 = [ + Timestamp(x) + for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] + ] + expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_na_position_with_categories_raises(self): + df = DataFrame( + { + "c": Categorical( + ["A", np.nan, "B", np.nan, "C"], + categories=["A", "B", "C"], + ordered=True, + ) + } + ) + + with pytest.raises(ValueError, match="invalid na_position: bad_position"): + df.sort_values(by="c", ascending=False, na_position="bad_position") + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), + ], + ) + def test_sort_values_ignore_index( + self, inplace, original_dict, sorted_dict, ignore_index, output_index + ): + # GH 30114 + df = DataFrame(original_dict) + expected = DataFrame(sorted_dict, index=output_index) + kwargs = {"ignore_index": ignore_index, "inplace": inplace} + + if inplace: + result_df = df.copy() + result_df.sort_values("A", ascending=False, **kwargs) + else: + result_df = df.sort_values("A", ascending=False, **kwargs) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(original_dict)) + + def test_sort_values_nat_na_position_default(self): + # GH 13230 + expected = DataFrame( + { + "A": [1, 2, 3, 4, 4], + "date": pd.DatetimeIndex( + [ + "2010-01-01 09:00:00", + "2010-01-01 09:00:01", + "2010-01-01 09:00:02", + "2010-01-01 09:00:03", + "NaT", + ] + ), + } + ) + result = expected.sort_values(["A", "date"]) + tm.assert_frame_equal(result, expected) + + def test_sort_values_item_cache(self, using_array_manager): + # previous behavior incorrect retained an invalid _item_cache entry + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + df.sort_values(by="A") + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] + + def test_sort_values_reshaping(self): + # GH 39426 + values = list(range(21)) + expected = DataFrame([values], columns=values) + df = expected.sort_values(expected.index[0], axis=1, ignore_index=True) + + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSortKey: # test key sorting (issue 27237) + def test_sort_values_inplace_key(self, sort_by_key): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values(by="A", inplace=True, key=sort_by_key) + assert return_value is None + expected = frame.sort_values(by="A", key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by=1, axis=1, inplace=True, key=sort_by_key + ) + assert return_value is None + expected = frame.sort_values(by=1, axis=1, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + return_value = sorted_df.sort_values( + by="A", ascending=False, inplace=True, key=sort_by_key + ) + assert return_value is None + expected = frame.sort_values(by="A", ascending=False, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values( + by=["A", "B"], ascending=False, inplace=True, key=sort_by_key + ) + expected = frame.sort_values(by=["A", "B"], ascending=False, key=sort_by_key) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_key(self): + df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan])) + + result = df.sort_values(0) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(0, key=lambda x: x + 5) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(0, key=lambda x: -x, ascending=False) + expected = df.iloc[[0, 4, 3, 1, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_by_key(self): + df = DataFrame( + { + "a": np.array([0, 3, np.nan, 3, 2, np.nan]), + "b": np.array([0, 2, np.nan, 5, 2, np.nan]), + } + ) + + result = df.sort_values("a", key=lambda x: -x) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=lambda x: -x) + expected = df.iloc[[3, 1, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=lambda x: -x, ascending=False) + expected = df.iloc[[0, 4, 1, 3, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_by_key_by_name(self): + df = DataFrame( + { + "a": np.array([0, 3, np.nan, 3, 2, np.nan]), + "b": np.array([0, 2, np.nan, 5, 2, np.nan]), + } + ) + + def key(col): + if col.name == "a": + return -col + else: + return col + + result = df.sort_values(by="a", key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a"], key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by="b", key=key) + expected = df.iloc[[0, 1, 4, 3, 2, 5]] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(by=["a", "b"], key=key) + expected = df.iloc[[1, 3, 4, 0, 2, 5]] + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_string(self): + df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]])) + + result = df.sort_values(1) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values([0, 1], key=lambda col: col.str.lower()) + tm.assert_frame_equal(result, df) + + result = df.sort_values( + [0, 1], key=lambda col: col.str.lower(), ascending=False + ) + expected = df.sort_values(1, key=lambda col: col.str.lower(), ascending=False) + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_empty(self, sort_by_key): + df = DataFrame(np.array([])) + + df.sort_values(0, key=sort_by_key) + df.sort_index(key=sort_by_key) + + def test_changes_length_raises(self): + df = DataFrame({"A": [1, 2, 3]}) + with pytest.raises(ValueError, match="change the shape"): + df.sort_values("A", key=lambda x: x[:1]) + + def test_sort_values_key_axes(self): + df = DataFrame({0: ["Hello", "goodbye"], 1: [0, 1]}) + + result = df.sort_values(0, key=lambda col: col.str.lower()) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(1, key=lambda col: -col) + expected = df[::-1] + tm.assert_frame_equal(result, expected) + + def test_sort_values_key_dict_axis(self): + df = DataFrame({0: ["Hello", 0], 1: ["goodbye", 1]}) + + result = df.sort_values(0, key=lambda col: col.str.lower(), axis=1) + expected = df.loc[:, ::-1] + tm.assert_frame_equal(result, expected) + + result = df.sort_values(1, key=lambda col: -col, axis=1) + expected = df.loc[:, ::-1] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_sort_values_key_casts_to_categorical(self, ordered): + # https://github.com/pandas-dev/pandas/issues/36383 + categories = ["c", "b", "a"] + df = DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) + + def sorter(key): + if key.name == "y": + return pd.Series( + Categorical(key, categories=categories, ordered=ordered) + ) + return key + + result = df.sort_values(by=["x", "y"], key=sorter) + expected = DataFrame( + {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0]) + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def df_none(): + return DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 2, 2, 1, 1], + "A": np.arange(6, 0, -1), + ("B", 5): ["one", "one", "two", "two", "one", "one"], + } + ) + + +@pytest.fixture(params=[["outer"], ["outer", "inner"]]) +def df_idx(request, df_none): + levels = request.param + return df_none.set_index(levels) + + +@pytest.fixture( + params=[ + "inner", # index level + ["outer"], # list of index level + "A", # column + [("B", 5)], # list of column + ["inner", "outer"], # two index levels + [("B", 5), "outer"], # index level and column + ["A", ("B", 5)], # Two columns + ["inner", "outer"], # two index levels and column + ] +) +def sort_names(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + return request.param + + +class TestSortValuesLevelAsStr: + def test_sort_index_level_and_column_label( + self, df_none, df_idx, sort_names, ascending + ): + # GH#14353 + + # Get index levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on columns and the setting index + expected = df_none.sort_values( + by=sort_names, ascending=ascending, axis=0 + ).set_index(levels) + + # Compute result sorting on mix on columns and index levels + result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_sort_column_level_and_index_label( + self, df_none, df_idx, sort_names, ascending + ): + # GH#14353 + + # Get levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on axis=0, setting index levels, and then + # transposing. For some cases this will result in a frame with + # multiple column levels + expected = ( + df_none.sort_values(by=sort_names, ascending=ascending, axis=0) + .set_index(levels) + .T + ) + + # Compute result by transposing and sorting on axis=1. + result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_pos_args_deprecation(self): + # https://github.com/pandas-dev/pandas/issues/41485 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + r"In a future version of pandas all arguments of DataFrame\.sort_values " + r"except for the argument 'by' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.sort_values("a", 0) + expected = DataFrame({"a": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + def test_sort_values_validate_ascending_for_value_error(self): + # GH41634 + df = DataFrame({"D": [23, 7, 21]}) + + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + df.sort_values(by="D", ascending="False") + + @pytest.mark.parametrize("ascending", [False, 0, 1, True]) + def test_sort_values_validate_ascending_functional(self, ascending): + df = DataFrame({"D": [23, 7, 21]}) + indexer = df["D"].argsort().values + + if not ascending: + indexer = indexer[::-1] + + expected = df.loc[df.index[indexer]] + result = df.sort_values(by="D", ascending=ascending) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py new file mode 100644 index 00000000..306f7b2b --- /dev/null +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwapAxes: + def test_swapaxes(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) + tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) + + def test_swapaxes_noop(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df, df.swapaxes(0, 0)) + + def test_swapaxes_invalid_axis(self): + df = DataFrame(np.random.randn(10, 5)) + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.swapaxes(2, 5) diff --git a/pandas/tests/frame/methods/test_swaplevel.py b/pandas/tests/frame/methods/test_swaplevel.py new file mode 100644 index 00000000..5511ac7d --- /dev/null +++ b/pandas/tests/frame/methods/test_swaplevel.py @@ -0,0 +1,36 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwaplevel: + def test_swaplevel(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + swapped = frame["A"].swaplevel() + swapped2 = frame["A"].swaplevel(0) + swapped3 = frame["A"].swaplevel(0, 1) + swapped4 = frame["A"].swaplevel("first", "second") + assert not swapped.index.equals(frame.index) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) + + back = swapped.swaplevel() + back2 = swapped.swaplevel(0) + back3 = swapped.swaplevel(0, 1) + back4 = swapped.swaplevel("second", "first") + assert back.index.equals(frame.index) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) + + ft = frame.T + swapped = ft.swaplevel("first", "second", axis=1) + exp = frame.swaplevel("first", "second").T + tm.assert_frame_equal(swapped, exp) + + msg = "Can only swap levels on a hierarchical axis." + with pytest.raises(TypeError, match=msg): + DataFrame(range(3)).swaplevel() diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py new file mode 100644 index 00000000..1933278e --- /dev/null +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -0,0 +1,1328 @@ +import csv +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas.errors import ParserError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + read_csv, + to_datetime, +) +import pandas._testing as tm +import pandas.core.common as com + +from pandas.io.common import get_handle + + +class TestDataFrameToCSV: + def read_csv(self, path, **kwargs): + params = {"index_col": 0, "parse_dates": True} + params.update(**kwargs) + + return read_csv(path, **params) + + def test_to_csv_from_csv1(self, float_frame, datetime_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: + float_frame["A"][:5] = np.nan + + float_frame.to_csv(path) + float_frame.to_csv(path, columns=["A", "B"]) + float_frame.to_csv(path, header=False) + float_frame.to_csv(path, index=False) + + # test roundtrip + # freq does not roundtrip + datetime_frame.index = datetime_frame.index._with_freq(None) + datetime_frame.to_csv(path) + recons = self.read_csv(path) + tm.assert_frame_equal(datetime_frame, recons) + + datetime_frame.to_csv(path, index_label="index") + recons = self.read_csv(path, index_col=None) + + assert len(recons.columns) == len(datetime_frame.columns) + 1 + + # no index + datetime_frame.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(datetime_frame.values, recons.values) + + # corner case + dm = DataFrame( + { + "s1": Series(range(3), index=np.arange(3)), + "s2": Series(range(2), index=np.arange(2)), + } + ) + dm.to_csv(path) + + recons = self.read_csv(path) + tm.assert_frame_equal(dm, recons) + + def test_to_csv_from_csv2(self, float_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: + + # duplicate index + df = DataFrame( + np.random.randn(3, 3), index=["a", "a", "b"], columns=["x", "y", "z"] + ) + df.to_csv(path) + result = self.read_csv(path) + tm.assert_frame_equal(result, df) + + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + df = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + + df.to_csv(path) + result = self.read_csv(path, index_col=[0, 1, 2], parse_dates=False) + tm.assert_frame_equal(result, df, check_names=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + float_frame.to_csv(path, header=col_aliases) + + rs = self.read_csv(path) + xp = float_frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + msg = "Writing 4 cols but got 2 aliases" + with pytest.raises(ValueError, match=msg): + float_frame.to_csv(path, header=["AA", "X"]) + + def test_to_csv_from_csv3(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: + df1 = DataFrame(np.random.randn(3, 1)) + df2 = DataFrame(np.random.randn(3, 1)) + + df1.to_csv(path) + df2.to_csv(path, mode="a", header=False) + xp = pd.concat([df1, df2]) + rs = read_csv(path, index_col=0) + rs.columns = [int(label) for label in rs.columns] + xp.columns = [int(label) for label in xp.columns] + tm.assert_frame_equal(xp, rs) + + def test_to_csv_from_csv4(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv4__") as path: + # GH 10833 (TimedeltaIndex formatting) + dt = pd.Timedelta(seconds=1) + df = DataFrame( + {"dt_data": [i * dt for i in range(3)]}, + index=Index([i * dt for i in range(3)], name="dt_index"), + ) + df.to_csv(path) + + result = read_csv(path, index_col="dt_index") + result.index = pd.to_timedelta(result.index) + result["dt_data"] = pd.to_timedelta(result["dt_data"]) + + tm.assert_frame_equal(df, result, check_index_type=True) + + def test_to_csv_from_csv5(self, timezone_frame): + + # tz, 8260 + with tm.ensure_clean("__tmp_to_csv_from_csv5__") as path: + + timezone_frame.to_csv(path) + result = read_csv(path, index_col=0, parse_dates=["A"]) + + converter = ( + lambda c: to_datetime(result[c]) + .dt.tz_convert("UTC") + .dt.tz_convert(timezone_frame[c].dt.tz) + ) + result["B"] = converter("B") + result["C"] = converter("C") + tm.assert_frame_equal(result, timezone_frame) + + def test_to_csv_cols_reordering(self): + # GH3454 + chunksize = 5 + N = int(chunksize * 2.5) + + df = tm.makeCustomDataframe(N, 3) + cs = df.columns + cols = [cs[2], cs[0]] + + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = read_csv(path, index_col=0) + + tm.assert_frame_equal(df[cols], rs_c, check_names=False) + + @pytest.mark.parametrize("cols", [None, ["b", "a"]]) + def test_to_csv_new_dupe_cols(self, cols): + chunksize = 5 + N = int(chunksize * 2.5) + + # dupe cols + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = read_csv(path, index_col=0) + + # we wrote them in a different order + # so compare them in that order + if cols is not None: + + if df.columns.is_unique: + rs_c.columns = cols + else: + indexer, missing = df.columns.get_indexer_non_unique(cols) + rs_c.columns = df.columns.take(indexer) + + for c in cols: + obj_df = df[c] + obj_rs = rs_c[c] + if isinstance(obj_df, Series): + tm.assert_series_equal(obj_df, obj_rs) + else: + tm.assert_frame_equal(obj_df, obj_rs, check_names=False) + + # wrote in the same order + else: + rs_c.columns = df.columns + tm.assert_frame_equal(df, rs_c, check_names=False) + + @pytest.mark.slow + def test_to_csv_dtnat(self): + # GH3437 + def make_dtnat_arr(n, nnat=None): + if nnat is None: + nnat = int(n * 0.1) # 10% + s = list(date_range("2000", freq="5min", periods=n)) + if nnat: + for i in np.random.randint(0, len(s), nnat): + s[i] = NaT + i = np.random.randint(100) + s[-i] = NaT + s[i] = NaT + return s + + chunksize = 1000 + s1 = make_dtnat_arr(chunksize + 5) + s2 = make_dtnat_arr(chunksize + 5, 0) + + with tm.ensure_clean("1.csv") as pth: + df = DataFrame({"a": s1, "b": s2}) + df.to_csv(pth, chunksize=chunksize) + + recons = self.read_csv(pth).apply(to_datetime) + tm.assert_frame_equal(df, recons, check_names=False) + + def _return_result_expected( + self, + df, + chunksize, + r_dtype=None, + c_dtype=None, + rnlvl=None, + cnlvl=None, + dupe_col=False, + ): + kwargs = {"parse_dates": False} + if cnlvl: + if rnlvl is not None: + kwargs["index_col"] = list(range(rnlvl)) + kwargs["header"] = list(range(cnlvl)) + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + else: + kwargs["header"] = 0 + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + + def _to_uni(x): + if not isinstance(x, str): + return x.decode("utf8") + return x + + if dupe_col: + # read_Csv disambiguates the columns by + # labeling them dupe.1,dupe.2, etc'. monkey patch columns + recons.columns = df.columns + if rnlvl and not cnlvl: + delta_lvl = [recons.iloc[:, i].values for i in range(rnlvl - 1)] + ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl) + recons.index = ix + recons = recons.iloc[:, rnlvl - 1 :] + + type_map = {"i": "i", "f": "f", "s": "O", "u": "O", "dt": "O", "p": "O"} + if r_dtype: + if r_dtype == "u": # unicode + r_dtype = "O" + recons.index = np.array( + [_to_uni(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [_to_uni(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "dt": # unicode + r_dtype = "O" + recons.index = np.array( + [Timestamp(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [Timestamp(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "p": + r_dtype = "O" + idx_list = to_datetime(recons.index) + recons.index = np.array( + [Timestamp(label) for label in idx_list], dtype=r_dtype + ) + df.index = np.array( + list(map(Timestamp, df.index.to_timestamp())), dtype=r_dtype + ) + else: + r_dtype = type_map.get(r_dtype) + recons.index = np.array(recons.index, dtype=r_dtype) + df.index = np.array(df.index, dtype=r_dtype) + if c_dtype: + if c_dtype == "u": + c_dtype = "O" + recons.columns = np.array( + [_to_uni(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [_to_uni(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "dt": + c_dtype = "O" + recons.columns = np.array( + [Timestamp(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [Timestamp(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "p": + c_dtype = "O" + col_list = to_datetime(recons.columns) + recons.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + col_list = df.columns.to_timestamp() + df.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + else: + c_dtype = type_map.get(c_dtype) + recons.columns = np.array(recons.columns, dtype=c_dtype) + df.columns = np.array(df.columns, dtype=c_dtype) + return df, recons + + @pytest.mark.slow + @pytest.mark.parametrize( + "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] + ) + def test_to_csv_nrows(self, nrows): + df = tm.makeCustomDataframe(nrows, 4, r_idx_type="dt", c_idx_type="s") + result, expected = self._return_result_expected(df, 1000, "dt", "s") + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + @pytest.mark.parametrize( + "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] + ) + @pytest.mark.parametrize( + "r_idx_type, c_idx_type", [("i", "i"), ("s", "s"), ("s", "dt"), ("p", "p")] + ) + @pytest.mark.parametrize("ncols", [1, 2, 3, 4]) + def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols): + df = tm.makeCustomDataframe( + nrows, ncols, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + result, expected = self._return_result_expected( + df, + 1000, + r_idx_type, + c_idx_type, + ) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + @pytest.mark.parametrize( + "nrows", [10, 98, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] + ) + @pytest.mark.parametrize("ncols", [1, 2, 3, 4]) + def test_to_csv_idx_ncols(self, nrows, ncols): + df = tm.makeCustomDataframe(nrows, ncols) + result, expected = self._return_result_expected(df, 1000) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + @pytest.mark.parametrize("nrows", [10, 98, 99, 100, 101, 102]) + def test_to_csv_dup_cols(self, nrows): + df = tm.makeCustomDataframe(nrows, 3) + cols = list(df.columns) + cols[:2] = ["dupe", "dupe"] + cols[-2:] = ["dupe", "dupe"] + ix = list(df.index) + ix[:2] = ["rdupe", "rdupe"] + ix[-2:] = ["rdupe", "rdupe"] + df.index = ix + df.columns = cols + result, expected = self._return_result_expected(df, 1000, dupe_col=True) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + def test_to_csv_empty(self): + df = DataFrame(index=np.arange(10)) + result, expected = self._return_result_expected(df, 1000) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + def test_to_csv_chunksize(self): + chunksize = 1000 + df = tm.makeCustomDataframe(chunksize // 2 + 1, 2, r_idx_nlevels=2) + result, expected = self._return_result_expected(df, chunksize, rnlvl=2) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.slow + @pytest.mark.parametrize( + "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] + ) + @pytest.mark.parametrize("ncols", [2, 3, 4]) + @pytest.mark.parametrize( + "df_params, func_params", + [ + [{"r_idx_nlevels": 2}, {"rnlvl": 2}], + [{"c_idx_nlevels": 2}, {"cnlvl": 2}], + [{"r_idx_nlevels": 2, "c_idx_nlevels": 2}, {"rnlvl": 2, "cnlvl": 2}], + ], + ) + def test_to_csv_params(self, nrows, df_params, func_params, ncols): + df = tm.makeCustomDataframe(nrows, ncols, **df_params) + result, expected = self._return_result_expected(df, 1000, **func_params) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_to_csv_from_csv_w_some_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["G"] = np.nan + f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] + float_frame["H"] = float_frame.index.map(f) + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + tm.assert_frame_equal(float_frame, recons) + tm.assert_frame_equal(np.isinf(float_frame), np.isinf(recons)) + + def test_to_csv_from_csv_w_all_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["E"] = np.inf + float_frame["F"] = -np.inf + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + tm.assert_frame_equal(float_frame, recons) + tm.assert_frame_equal(np.isinf(float_frame), np.isinf(recons)) + + def test_to_csv_no_index(self): + # GH 3624, after appending columns, to_csv fails + with tm.ensure_clean("__tmp_to_csv_no_index__") as path: + df = DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]}) + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + df["c3"] = Series([7, 8, 9], dtype="int64") + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + + def test_to_csv_with_mix_columns(self): + # gh-11637: incorrect output when a mix of integer and string column + # names passed as columns parameter in to_csv + + df = DataFrame({0: ["a", "b", "c"], 1: ["aa", "bb", "cc"]}) + df["test"] = "txt" + assert df.to_csv() == df.to_csv(columns=[0, 1, "test"]) + + def test_to_csv_headers(self): + # GH6186, the presence or absence of `index` incorrectly + # causes to_csv to have different header semantics. + from_df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + to_df = DataFrame([[1, 2], [3, 4]], columns=["X", "Y"]) + with tm.ensure_clean("__tmp_to_csv_headers__") as path: + from_df.to_csv(path, header=["X", "Y"]) + recons = self.read_csv(path) + + tm.assert_frame_equal(to_df, recons) + + from_df.to_csv(path, index=False, header=["X", "Y"]) + recons = self.read_csv(path) + + return_value = recons.reset_index(inplace=True) + assert return_value is None + tm.assert_frame_equal(to_df, recons) + + def test_to_csv_multiindex(self, float_frame, datetime_frame): + + frame = float_frame + old_index = frame.index + arrays = np.arange(len(old_index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + + frame.to_csv(path, header=False) + frame.to_csv(path, columns=["A", "B"]) + + # round trip + frame.to_csv(path) + + df = self.read_csv(path, index_col=[0, 1], parse_dates=False) + + # TODO to_csv drops column name + tm.assert_frame_equal(frame, df, check_names=False) + assert frame.index.names == df.index.names + + # needed if setUp becomes a class method + float_frame.index = old_index + + # try multiindex with dates + tsframe = datetime_frame + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_csv(path, index_label=["time", "foo"]) + recons = self.read_csv(path, index_col=[0, 1]) + + # TODO to_csv drops column name + tm.assert_frame_equal(tsframe, recons, check_names=False) + + # do not load index + tsframe.to_csv(path) + recons = self.read_csv(path, index_col=None) + assert len(recons.columns) == len(tsframe.columns) + 2 + + # no index + tsframe.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(recons.values, datetime_frame.values) + + # needed if setUp becomes class method + datetime_frame.index = old_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # GH3571, GH1651, GH3141 + + def _make_frame(names=None): + if names is True: + names = ["first", "second"] + return DataFrame( + np.random.randint(0, 10, size=(3, 3)), + columns=MultiIndex.from_tuples( + [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names + ), + dtype="int64", + ) + + # column & index are multi-index + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) + tm.assert_frame_equal(df, result) + + # column is mi + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=0) + tm.assert_frame_equal(df, result) + + # dup column names? + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) + tm.assert_frame_equal(df, result) + + # writing with no index + df = _make_frame() + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + tm.assert_frame_equal(df, result) + + # we lose the names here + df = _make_frame(True) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + assert com.all_none(*result.columns.names) + result.columns.names = df.columns.names + tm.assert_frame_equal(df, result) + + # whatsnew example + df = _make_frame() + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + df = _make_frame(True) + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + # invalid options + df = _make_frame(True) + df.to_csv(path) + + for i in [6, 7]: + msg = f"len of {i}, but only 5 lines in file" + with pytest.raises(ParserError, match=msg): + read_csv(path, header=list(range(i)), index_col=0) + + # write with cols + msg = "cannot specify cols with a MultiIndex" + with pytest.raises(TypeError, match=msg): + df.to_csv(path, columns=["foo", "bar"]) + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # empty + tsframe[:0].to_csv(path) + recons = self.read_csv(path) + + exp = tsframe[:0] + exp.index = [] + + tm.assert_index_equal(recons.columns, exp.columns) + assert len(recons) == 0 + + def test_to_csv_interval_index(self): + # GH 28210 + df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) + + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + df.to_csv(path) + result = self.read_csv(path, index_col=0) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = df.copy() + expected.index = expected.index.astype(str) + + tm.assert_frame_equal(result, expected) + + def test_to_csv_float32_nanrep(self): + df = DataFrame(np.random.randn(1, 4).astype(np.float32)) + df[1] = np.nan + + with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: + df.to_csv(path, na_rep=999) + + with open(path) as f: + lines = f.readlines() + assert lines[1].split(",")[2] == "999" + + def test_to_csv_withcommas(self): + + # Commas inside fields should be correctly escaped when saving as CSV. + df = DataFrame({"A": [1, 2, 3], "B": ["5,6", "7,8", "9,0"]}) + + with tm.ensure_clean("__tmp_to_csv_withcommas__.csv") as path: + df.to_csv(path) + df2 = self.read_csv(path) + tm.assert_frame_equal(df2, df) + + def test_to_csv_mixed(self): + def create_cols(name): + return [f"{name}{i:03d}" for i in range(5)] + + df_float = DataFrame( + np.random.randn(100, 5), dtype="float64", columns=create_cols("float") + ) + df_int = DataFrame( + np.random.randn(100, 5).astype("int64"), + dtype="int64", + columns=create_cols("int"), + ) + df_bool = DataFrame(True, index=df_float.index, columns=create_cols("bool")) + df_object = DataFrame( + "foo", index=df_float.index, columns=create_cols("object") + ) + df_dt = DataFrame( + Timestamp("20010101"), index=df_float.index, columns=create_cols("date") + ) + + # add in some nans + df_float.iloc[30:50, 1:3] = np.nan + + # ## this is a bug in read_csv right now #### + # df_dt.loc[30:50,1:3] = np.nan + + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + # dtype + dtypes = {} + for n, dtype in [ + ("float", np.float64), + ("int", np.int64), + ("bool", np.bool_), + ("object", object), + ]: + for c in create_cols(n): + dtypes[c] = dtype + + with tm.ensure_clean() as filename: + df.to_csv(filename) + rs = read_csv( + filename, index_col=0, dtype=dtypes, parse_dates=create_cols("date") + ) + tm.assert_frame_equal(rs, df) + + def test_to_csv_dups_cols(self): + + df = DataFrame( + np.random.randn(1000, 30), + columns=list(range(15)) + list(range(15)), + dtype="float64", + ) + + with tm.ensure_clean() as filename: + df.to_csv(filename) # single dtype, fine + result = read_csv(filename, index_col=0) + result.columns = df.columns + tm.assert_frame_equal(result, df) + + df_float = DataFrame(np.random.randn(1000, 3), dtype="float64") + df_int = DataFrame(np.random.randn(1000, 3)).astype("int64") + df_bool = DataFrame(True, index=df_float.index, columns=range(3)) + df_object = DataFrame("foo", index=df_float.index, columns=range(3)) + df_dt = DataFrame(Timestamp("20010101"), index=df_float.index, columns=range(3)) + df = pd.concat( + [df_float, df_int, df_bool, df_object, df_dt], axis=1, ignore_index=True + ) + + df.columns = [0, 1, 2] * 5 + + with tm.ensure_clean() as filename: + df.to_csv(filename) + result = read_csv(filename, index_col=0) + + # date cols + for i in ["0.4", "1.4", "2.4"]: + result[i] = to_datetime(result[i]) + + result.columns = df.columns + tm.assert_frame_equal(result, df) + + # GH3457 + + N = 10 + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + + with tm.ensure_clean() as filename: + df.to_csv(filename) + + # read_csv will rename the dups columns + result = read_csv(filename, index_col=0) + result = result.rename(columns={"a.1": "a"}) + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("chunksize", [10000, 50000, 100000]) + def test_to_csv_chunking(self, chunksize): + + aa = DataFrame({"A": range(100000)}) + aa["B"] = aa.A + 1.0 + aa["C"] = aa.A + 2.0 + aa["D"] = aa.A + 3.0 + + with tm.ensure_clean() as filename: + aa.to_csv(filename, chunksize=chunksize) + rs = read_csv(filename, index_col=0) + tm.assert_frame_equal(rs, aa) + + @pytest.mark.slow + def test_to_csv_wide_frame_formatting(self): + # Issue #8621 + df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) + with tm.ensure_clean() as filename: + df.to_csv(filename, header=False, index=False) + rs = read_csv(filename, header=None) + tm.assert_frame_equal(rs, df) + + def test_to_csv_bug(self): + f1 = StringIO("a,1.0\nb,2.0") + df = self.read_csv(f1, header=None) + newdf = DataFrame({"t": df[df.columns[0]]}) + + with tm.ensure_clean() as path: + newdf.to_csv(path) + + recons = read_csv(path, index_col=0) + # don't check_names as t != 1 + tm.assert_frame_equal(recons, newdf, check_names=False) + + def test_to_csv_unicode(self): + + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + with tm.ensure_clean() as path: + + df.to_csv(path, encoding="UTF-8") + df2 = read_csv(path, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + df.to_csv(path, encoding="UTF-8", index=False) + df2 = read_csv(path, index_col=None, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_unicode_index_col(self): + buf = StringIO("") + df = DataFrame( + [["\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=["\u05d0", "\u05d1", "\u05d2", "\u05d3"], + index=["\u05d0", "\u05d1"], + ) + + df.to_csv(buf, encoding="UTF-8") + buf.seek(0) + + df2 = read_csv(buf, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_stringio(self, float_frame): + buf = StringIO() + float_frame.to_csv(buf) + buf.seek(0) + recons = read_csv(buf, index_col=0) + tm.assert_frame_equal(recons, float_frame) + + def test_to_csv_float_format(self): + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + + with tm.ensure_clean() as filename: + + df.to_csv(filename, float_format="%.2f") + + rs = read_csv(filename, index_col=0) + xp = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(rs, xp) + + def test_to_csv_float_format_over_decimal(self): + # GH#47436 + df = DataFrame({"a": [0.5, 1.0]}) + result = df.to_csv( + decimal=",", + float_format=lambda x: np.format_float_positional(x, trim="-"), + index=False, + ) + expected_rows = ["a", "0.5", "1"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_unicodewriter_quoting(self): + df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]}) + + buf = StringIO() + df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8") + + result = buf.getvalue() + expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + @pytest.mark.parametrize("encoding", [None, "utf-8"]) + def test_to_csv_quote_none(self, encoding): + # GH4328 + df = DataFrame({"A": ["hello", '{"hello"}']}) + buf = StringIO() + df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False) + + result = buf.getvalue() + expected_rows = ["A", "hello", '{"hello"}'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_index_no_leading_comma(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + buf = StringIO() + df.to_csv(buf, index_label=False) + + expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert buf.getvalue() == expected + + def test_to_csv_lineterminators(self): + # see gh-20353 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + with tm.ensure_clean() as path: + # case 1: CRLF as line terminator + df.to_csv(path, lineterminator="\r\n") + expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 2: LF as line terminator + df.to_csv(path, lineterminator="\n") + expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 3: The default line terminator(=os.linesep)(gh-21406) + df.to_csv(path) + os_linesep = os.linesep.encode("utf-8") + expected = ( + b",A,B" + + os_linesep + + b"one,1,4" + + os_linesep + + b"two,2,5" + + os_linesep + + b"three,3,6" + + os_linesep + ) + + with open(path, mode="rb") as f: + assert f.read() == expected + + def test_to_csv_from_csv_categorical(self): + + # CSV with categoricals should result in the same output + # as when one would add a "normal" Series/DataFrame. + s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + res = StringIO() + + s.to_csv(res, header=False) + exp = StringIO() + + s2.to_csv(exp, header=False) + assert res.getvalue() == exp.getvalue() + + df = DataFrame({"s": s}) + df2 = DataFrame({"s": s2}) + + res = StringIO() + df.to_csv(res) + + exp = StringIO() + df2.to_csv(exp) + + assert res.getvalue() == exp.getvalue() + + def test_to_csv_path_is_none(self, float_frame): + # GH 8215 + # Make sure we return string for consistency with + # Series.to_csv() + csv_str = float_frame.to_csv(path_or_buf=None) + assert isinstance(csv_str, str) + recons = read_csv(StringIO(csv_str), index_col=0) + tm.assert_frame_equal(float_frame, recons) + + @pytest.mark.parametrize( + "df,encoding", + [ + ( + DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ), + None, + ), + # GH 21241, 21118 + (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), + (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), + ( + DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]), + "cp737", + ), + ], + ) + def test_to_csv_compression(self, df, encoding, compression): + + with tm.ensure_clean() as filename: + + df.to_csv(filename, compression=compression, encoding=encoding) + # test the round trip - to_csv -> read_csv + result = read_csv( + filename, compression=compression, index_col=0, encoding=encoding + ) + tm.assert_frame_equal(df, result) + + # test the round trip using file handle - to_csv -> read_csv + with get_handle( + filename, "w", compression=compression, encoding=encoding + ) as handles: + df.to_csv(handles.handle, encoding=encoding) + assert not handles.handle.closed + + result = read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + ).squeeze("columns") + tm.assert_frame_equal(df, result) + + # explicitly make sure file is compressed + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode(encoding or "utf8") + for col in df.columns: + assert col in text + + with tm.decompress_file(filename, compression) as fh: + tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) + + def test_to_csv_date_format(self, datetime_frame): + with tm.ensure_clean("__tmp_to_csv_date_format__") as path: + dt_index = datetime_frame.index + datetime_frame = DataFrame( + {"A": dt_index, "B": dt_index.shift(1)}, index=dt_index + ) + datetime_frame.to_csv(path, date_format="%Y%m%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + + datetime_frame_int = datetime_frame.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + datetime_frame_int.index = datetime_frame_int.index.map( + lambda x: int(x.strftime("%Y%m%d")) + ) + + tm.assert_frame_equal(test, datetime_frame_int) + + datetime_frame.to_csv(path, date_format="%Y-%m-%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + datetime_frame_str = datetime_frame.applymap( + lambda x: x.strftime("%Y-%m-%d") + ) + datetime_frame_str.index = datetime_frame_str.index.map( + lambda x: x.strftime("%Y-%m-%d") + ) + + tm.assert_frame_equal(test, datetime_frame_str) + + # Check that columns get converted + datetime_frame_columns = datetime_frame.T + datetime_frame_columns.to_csv(path, date_format="%Y%m%d") + + test = read_csv(path, index_col=0) + + datetime_frame_columns = datetime_frame_columns.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + # Columns don't get converted to ints by read_csv + datetime_frame_columns.columns = datetime_frame_columns.columns.map( + lambda x: x.strftime("%Y%m%d") + ) + + tm.assert_frame_equal(test, datetime_frame_columns) + + # test NaTs + nat_index = to_datetime( + ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"] + ) + nat_frame = DataFrame({"A": nat_index}, index=nat_index) + nat_frame.to_csv(path, date_format="%Y-%m-%d") + + test = read_csv(path, parse_dates=[0, 1], index_col=0) + + tm.assert_frame_equal(test, nat_frame) + + @pytest.mark.parametrize("td", [pd.Timedelta(0), pd.Timedelta("10s")]) + def test_to_csv_with_dst_transitions(self, td): + + with tm.ensure_clean("csv_date_format_with_dst") as path: + # make sure we are not failing on transitions + times = date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + i = times + td + i = i._with_freq(None) # freq is not preserved by read_csv + time_range = np.array(range(len(i)), dtype="int64") + df = DataFrame({"A": time_range}, index=i) + df.to_csv(path, index=True) + # we have to reconvert the index as we + # don't parse the tz's + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/London" + ) + tm.assert_frame_equal(result, df) + + def test_to_csv_with_dst_transitions_with_pickle(self): + # GH11619 + idx = date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") + idx = idx._with_freq(None) # freq does not round-trip + idx._data._freq = None # otherwise there is trouble on unpickle + df = DataFrame({"values": 1, "idx": idx}, index=idx) + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_csv(path, index=True) + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/Paris" + ) + result["idx"] = to_datetime(result["idx"], utc=True).astype( + "datetime64[ns, Europe/Paris]" + ) + tm.assert_frame_equal(result, df) + + # assert working + df.astype(str) + + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_pickle(path) + result = pd.read_pickle(path) + tm.assert_frame_equal(result, df) + + def test_to_csv_quoting(self): + df = DataFrame( + { + "c_bool": [True, False], + "c_float": [1.0, 3.2], + "c_int": [42, np.nan], + "c_string": ["a", "b,c"], + } + ) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv() + assert result == expected + + result = df.to_csv(quoting=None) + assert result == expected + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_MINIMAL) + assert result == expected + + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '"0","True","1.0","42.0","a"', + '"1","False","3.2","","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_ALL) + assert result == expected + + # see gh-12922, gh-13259: make sure changes to + # the formatters do not break this behaviour + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '0,True,1.0,42.0,"a"', + '1,False,3.2,"","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) + assert result == expected + + msg = "need to escape, but no escapechar set" + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE) + + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,b!,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!") + assert result == expected + + expected_rows = [ + ",c_bool,c_ffloat,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,bf,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f") + assert result == expected + + # see gh-3503: quoting Windows line terminators + # presents with encoding? + text_rows = ["a,b,c", '1,"test \r\n",3'] + text = tm.convert_rows_list_to_csv_str(text_rows) + df = read_csv(StringIO(text)) + + buf = StringIO() + df.to_csv(buf, encoding="utf-8", index=False) + assert buf.getvalue() == text + + # xref gh-7791: make sure the quoting parameter is passed through + # with multi-indexes + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + df = df.set_index(["a", "b"]) + + expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(quoting=csv.QUOTE_ALL) == expected + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_multi_index_header(self): + # see gh-5539 + columns = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + df.columns = columns + + header = ["a", "b", "c", "d"] + result = df.to_csv(header=header) + + expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_single_level_multi_index(self): + # see gh-26303 + index = Index([(1,), (2,), (3,)]) + df = DataFrame([[1, 2, 3]], columns=index) + df = df.reindex(columns=[(1,), (3,)]) + expected = ",1,3\n0,1,3\n" + result = df.to_csv(lineterminator="\n") + tm.assert_almost_equal(result, expected) + + def test_gz_lineend(self): + # GH 25311 + df = DataFrame({"a": [1, 2]}) + expected_rows = ["a", "1", "2"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: + df.to_csv(path, index=False) + with tm.decompress_file(path, compression="gzip") as f: + result = f.read().decode("utf-8") + + assert result == expected + + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({"a": date_range("1/1/2000", periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + assert "2000-01-01" in result + + def test_to_csv_na_quoting(self): + # GH 15891 + # Normalize carriage return for Windows OS + result = ( + DataFrame([None, None]) + .to_csv(None, header=False, index=False, na_rep="") + .replace("\r\n", "\n") + ) + expected = '""\n""\n' + assert result == expected + + def test_to_csv_categorical_and_ea(self): + # GH#46812 + df = DataFrame({"a": "x", "b": [1, pd.NA]}) + df["b"] = df["b"].astype("Int16") + df["b"] = df["b"].astype("category") + result = df.to_csv() + expected_rows = [",a,b", "0,x,1", "1,x,"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_categorical_and_interval(self): + # GH#46297 + df = DataFrame( + { + "a": [ + pd.Interval( + Timestamp("2020-01-01"), + Timestamp("2020-01-02"), + closed="both", + ) + ] + } + ) + df["a"] = df["a"].astype("category") + result = df.to_csv() + expected_rows = [",a", '0,"[2020-01-01, 2020-01-02]"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py new file mode 100644 index 00000000..6d5c32ca --- /dev/null +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -0,0 +1,423 @@ +from collections import ( + OrderedDict, + defaultdict, +) +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +class TestDataFrameToDict: + def test_to_dict_timestamp(self): + + # GH#11247 + # split/records producing np.datetime64 rather than Timestamps + # on datetime64[ns] dtypes only + + tsmp = Timestamp("20130101") + test_data = DataFrame({"A": [tsmp, tsmp], "B": [tsmp, tsmp]}) + test_data_mixed = DataFrame({"A": [tsmp, tsmp], "B": [1, 2]}) + + expected_records = [{"A": tsmp, "B": tsmp}, {"A": tsmp, "B": tsmp}] + expected_records_mixed = [{"A": tsmp, "B": 1}, {"A": tsmp, "B": 2}] + + assert test_data.to_dict(orient="records") == expected_records + assert test_data_mixed.to_dict(orient="records") == expected_records_mixed + + expected_series = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([tsmp, tsmp], name="B"), + } + expected_series_mixed = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([1, 2], name="B"), + } + + tm.assert_dict_equal(test_data.to_dict(orient="series"), expected_series) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="series"), expected_series_mixed + ) + + expected_split = { + "index": [0, 1], + "data": [[tsmp, tsmp], [tsmp, tsmp]], + "columns": ["A", "B"], + } + expected_split_mixed = { + "index": [0, 1], + "data": [[tsmp, 1], [tsmp, 2]], + "columns": ["A", "B"], + } + + tm.assert_dict_equal(test_data.to_dict(orient="split"), expected_split) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="split"), expected_split_mixed + ) + + def test_to_dict_index_not_unique_with_index_orient(self): + # GH#22801 + # Data loss when indexes are not unique. Raise ValueError. + df = DataFrame({"a": [1, 2], "b": [0.5, 0.75]}, index=["A", "A"]) + msg = "DataFrame index must be unique for orient='index'" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="index") + + def test_to_dict_invalid_orient(self): + df = DataFrame({"A": [0, 1]}) + msg = "orient 'xinvalid' not understood" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="xinvalid") + + @pytest.mark.parametrize("orient", ["d", "l", "r", "sp", "s", "i"]) + def test_to_dict_short_orient_warns(self, orient): + # GH#32515 + df = DataFrame({"A": [0, 1]}) + msg = "Using short name for 'orient' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_dict(orient=orient) + + @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) + def test_to_dict(self, mapping): + # orient= should only take the listed options + # see GH#32515 + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + + # GH#16122 + recons_data = DataFrame(test_data).to_dict(into=mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("list", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][int(k2) - 1] + + recons_data = DataFrame(test_data).to_dict("series", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("split", mapping) + expected_split = { + "columns": ["A", "B"], + "index": ["1", "2", "3"], + "data": [[1.0, "1"], [2.0, "2"], [np.nan, "3"]], + } + tm.assert_dict_equal(recons_data, expected_split) + + recons_data = DataFrame(test_data).to_dict("records", mapping) + expected_records = [ + {"A": 1.0, "B": "1"}, + {"A": 2.0, "B": "2"}, + {"A": np.nan, "B": "3"}, + ] + assert isinstance(recons_data, list) + assert len(recons_data) == 3 + for left, right in zip(recons_data, expected_records): + tm.assert_dict_equal(left, right) + + # GH#10844 + recons_data = DataFrame(test_data).to_dict("index") + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + df = DataFrame(test_data) + df["duped"] = df[df.columns[0]] + recons_data = df.to_dict("index") + comp_data = test_data.copy() + comp_data["duped"] = comp_data[df.columns[0]] + for k, v in comp_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + @pytest.mark.parametrize("mapping", [list, defaultdict, []]) + def test_to_dict_errors(self, mapping): + # GH#16122 + df = DataFrame(np.random.randn(3, 3)) + msg = "|".join( + [ + "unsupported type: ", + r"to_dict\(\) only accepts initialized defaultdicts", + ] + ) + with pytest.raises(TypeError, match=msg): + df.to_dict(into=mapping) + + def test_to_dict_not_unique_warning(self): + # GH#16927: When converting to a dict, if a column has a non-unique name + # it will be dropped, throwing a warning. + df = DataFrame([[1, 2, 3]], columns=["a", "a", "b"]) + with tm.assert_produces_warning(UserWarning): + df.to_dict() + + # orient - orient argument to to_dict function + # item_getter - function for extracting value from + # the resulting dict using column name and index + @pytest.mark.parametrize( + "orient,item_getter", + [ + ("dict", lambda d, col, idx: d[col][idx]), + ("records", lambda d, col, idx: d[idx][col]), + ("list", lambda d, col, idx: d[col][idx]), + ("split", lambda d, col, idx: d["data"][idx][d["columns"].index(col)]), + ("index", lambda d, col, idx: d[idx][col]), + ], + ) + def test_to_dict_box_scalars(self, orient, item_getter): + # GH#14216, GH#23753 + # make sure that we are boxing properly + df = DataFrame({"a": [1, 2], "b": [0.1, 0.2]}) + result = df.to_dict(orient=orient) + assert isinstance(item_getter(result, "a", 0), int) + assert isinstance(item_getter(result, "b", 0), float) + + def test_to_dict_tz(self): + # GH#18372 When converting to dict with orient='records' columns of + # datetime that are tz-aware were not converted to required arrays + data = [ + (datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),), + (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc),), + ] + df = DataFrame(list(data), columns=["d"]) + + result = df.to_dict(orient="records") + expected = [ + {"d": Timestamp("2017-11-18 21:53:00.219225+0000", tz=pytz.utc)}, + {"d": Timestamp("2017-11-18 22:06:30.061810+0000", tz=pytz.utc)}, + ] + tm.assert_dict_equal(result[0], expected[0]) + tm.assert_dict_equal(result[1], expected[1]) + + @pytest.mark.parametrize( + "into, expected", + [ + ( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ( + OrderedDict, + OrderedDict( + [ + (0, {"int_col": 1, "float_col": 1.0}), + (1, {"int_col": 2, "float_col": 2.0}), + (2, {"int_col": 3, "float_col": 3.0}), + ] + ), + ), + ( + defaultdict(dict), + defaultdict( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ), + ], + ) + def test_to_dict_index_dtypes(self, into, expected): + # GH#18580 + # When using to_dict(orient='index') on a dataframe with int + # and float columns only the int columns were cast to float + + df = DataFrame({"int_col": [1, 2, 3], "float_col": [1.0, 2.0, 3.0]}) + + result = df.to_dict(orient="index", into=into) + cols = ["int_col", "float_col"] + result = DataFrame.from_dict(result, orient="index")[cols] + expected = DataFrame.from_dict(expected, orient="index")[cols] + tm.assert_frame_equal(result, expected) + + def test_to_dict_numeric_names(self): + # GH#24940 + df = DataFrame({str(i): [i] for i in range(5)}) + result = set(df.to_dict("records")[0].keys()) + expected = set(df.columns) + assert result == expected + + def test_to_dict_wide(self): + # GH#24939 + df = DataFrame({(f"A_{i:d}"): [i] for i in range(256)}) + result = df.to_dict("records")[0] + expected = {f"A_{i:d}": i for i in range(256)} + assert result == expected + + @pytest.mark.parametrize( + "data,dtype", + ( + ([True, True, False], bool), + [ + [ + datetime(2018, 1, 1), + datetime(2019, 2, 2), + datetime(2020, 3, 3), + ], + Timestamp, + ], + [[1.0, 2.0, 3.0], float], + [[1, 2, 3], int], + [["X", "Y", "Z"], str], + ), + ) + def test_to_dict_orient_dtype(self, data, dtype): + # GH22620 & GH21256 + + df = DataFrame({"a": data}) + d = df.to_dict(orient="records") + assert all(type(record["a"]) is dtype for record in d) + + @pytest.mark.parametrize( + "data,expected_dtype", + ( + [np.uint64(2), int], + [np.int64(-9), int], + [np.float64(1.1), float], + [np.bool_(True), bool], + [np.datetime64("2005-02-25"), Timestamp], + ), + ) + def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): + # GH22620 & GH21256 + + df = DataFrame({"a": data}, index=[0]) + d = df.to_dict(orient="records") + result = type(d[0]["a"]) + assert result is expected_dtype + + def test_to_dict_mixed_numeric_frame(self): + # GH 12859 + df = DataFrame({"a": [1.0], "b": [9.0]}) + result = df.reset_index().to_dict("records") + expected = [{"index": 0, "a": 1.0, "b": 9.0}] + assert result == expected + + @pytest.mark.parametrize( + "index", + [ + None, + Index(["aa", "bb"]), + Index(["aa", "bb"], name="cc"), + MultiIndex.from_tuples([("a", "b"), ("a", "c")]), + MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]), + ], + ) + @pytest.mark.parametrize( + "columns", + [ + ["x", "y"], + Index(["x", "y"]), + Index(["x", "y"], name="z"), + MultiIndex.from_tuples([("x", 1), ("y", 2)]), + MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]), + ], + ) + def test_to_dict_orient_tight(self, index, columns): + df = DataFrame.from_records( + [[1, 3], [2, 4]], + columns=columns, + index=index, + ) + roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight") + + tm.assert_frame_equal(df, roundtrip) + + @pytest.mark.parametrize( + "orient", + ["dict", "list", "split", "records", "index", "tight"], + ) + @pytest.mark.parametrize( + "data,expected_types", + ( + ( + { + "a": [np.int64(1), 1, np.int64(3)], + "b": [np.float64(1.0), 2.0, np.float64(3.0)], + "c": [np.float64(1.0), 2, np.int64(3)], + "d": [np.float64(1.0), "a", np.int64(3)], + "e": [np.float64(1.0), ["a"], np.int64(3)], + "f": [np.float64(1.0), ("a",), np.int64(3)], + }, + { + "a": [int, int, int], + "b": [float, float, float], + "c": [float, float, float], + "d": [float, str, int], + "e": [float, list, int], + "f": [float, tuple, int], + }, + ), + ( + { + "a": [1, 2, 3], + "b": [1.1, 2.2, 3.3], + }, + { + "a": [int, int, int], + "b": [float, float, float], + }, + ), + ), + ) + def test_to_dict_returns_native_types(self, orient, data, expected_types): + # GH 46751 + # Tests we get back native types for all orient types + df = DataFrame(data) + result = df.to_dict(orient) + if orient == "dict": + assertion_iterator = ( + (i, key, value) + for key, index_value_map in result.items() + for i, value in index_value_map.items() + ) + elif orient == "list": + assertion_iterator = ( + (i, key, value) + for key, values in result.items() + for i, value in enumerate(values) + ) + elif orient in {"split", "tight"}: + assertion_iterator = ( + (i, key, result["data"][i][j]) + for i in result["index"] + for j, key in enumerate(result["columns"]) + ) + elif orient == "records": + assertion_iterator = ( + (i, key, value) + for i, record in enumerate(result) + for key, value in record.items() + ) + elif orient == "index": + assertion_iterator = ( + (i, key, value) + for i, record in result.items() + for key, value in record.items() + ) + + for i, key, value in assertion_iterator: + assert value == data[key][i] + assert type(value) is expected_types[key][i] diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py new file mode 100644 index 00000000..eb9b7861 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -0,0 +1,77 @@ +import numpy as np + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm +from pandas.core.arrays import PandasArray + +pytestmark = td.skip_array_manager_invalid_test + + +class TestToDictOfBlocks: + def test_copy_blocks(self, float_frame): + # GH#9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the default copy=True, change a column + blocks = df._to_dict_of_blocks(copy=True) + for _df in blocks.values(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + # make sure we did not change the original DataFrame + assert not _df[column].equals(df[column]) + + def test_no_copy_blocks(self, float_frame, using_copy_on_write): + # GH#9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the copy=False, change a column + blocks = df._to_dict_of_blocks(copy=False) + for _df in blocks.values(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + if not using_copy_on_write: + # make sure we did change the original DataFrame + assert _df[column].equals(df[column]) + else: + assert not _df[column].equals(df[column]) + + +def test_to_dict_of_blocks_item_cache(): + # Calling to_dict_of_blocks should not poison item_cache + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df["c"] = PandasArray(np.array([1, 2, None, 3], dtype=object)) + mgr = df._mgr + assert len(mgr.blocks) == 3 # i.e. not consolidated + + ser = df["b"] # populations item_cache["b"] + + df._to_dict_of_blocks() + + # Check that the to_dict_of_blocks didn't break link between ser and df + ser.values[0] = "foo" + assert df.loc[0, "b"] == "foo" + + assert df["b"] is ser + + +def test_set_change_dtype_slice(): + # GH#8850 + cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) + df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) + df["2nd"] = df["2nd"] * 2.0 + + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ["float64", "int64"] + tm.assert_frame_equal( + blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) + ) + tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py new file mode 100644 index 00000000..532f7c87 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -0,0 +1,38 @@ +import numpy as np + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Timestamp, +) +import pandas._testing as tm + + +class TestToNumpy: + def test_to_numpy(self): + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4.5]]) + result = df.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_dtype(self): + df = DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4]], dtype="int64") + result = df.to_numpy(dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + @td.skip_array_manager_invalid_test + def test_to_numpy_copy(self): + arr = np.random.randn(4, 3) + df = DataFrame(arr) + assert df.values.base is arr + assert df.to_numpy(copy=False).base is arr + assert df.to_numpy(copy=True).base is not arr + + def test_to_numpy_mixed_dtype_to_str(self): + # https://github.com/pandas-dev/pandas/issues/35455 + df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]]) + result = df.to_numpy(dtype=str) + expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py new file mode 100644 index 00000000..cd1b4b61 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_period.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToPeriod: + def test_to_period(self, frame_or_series): + K = 5 + + dr = date_range("1/1/2000", "1/1/2001", freq="D") + obj = DataFrame( + np.random.randn(len(dr), K), index=dr, columns=["A", "B", "C", "D", "E"] + ) + obj["mix"] = "a" + obj = tm.get_obj(obj, frame_or_series) + + pts = obj.to_period() + exp = obj.copy() + exp.index = period_range("1/1/2000", "1/1/2001") + tm.assert_equal(pts, exp) + + pts = obj.to_period("M") + exp.index = exp.index.asfreq("M") + tm.assert_equal(pts, exp) + + def test_to_period_without_freq(self, frame_or_series): + # GH#7606 without freq + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) + exp_idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" + ) + + obj = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + obj = tm.get_obj(obj, frame_or_series) + expected = obj.copy() + expected.index = exp_idx + tm.assert_equal(obj.to_period(), expected) + + if frame_or_series is DataFrame: + expected = obj.copy() + expected.columns = exp_idx + tm.assert_frame_equal(obj.to_period(axis=1), expected) + + def test_to_period_columns(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = period_range("1/1/2000", "1/1/2001") + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M", axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + + def test_to_period_invalid_axis(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" + + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.to_period(axis=2) + + def test_to_period_raises(self, index, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/33327 + obj = Series(index=index, dtype=object) + if frame_or_series is DataFrame: + obj = obj.to_frame() + + if not isinstance(index, DatetimeIndex): + msg = f"unsupported Type {type(index).__name__}" + with pytest.raises(TypeError, match=msg): + obj.to_period() diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py new file mode 100644 index 00000000..32cccddc --- /dev/null +++ b/pandas/tests/frame/methods/test_to_records.py @@ -0,0 +1,510 @@ +from collections import abc + +import numpy as np +import pytest + +from pandas import ( + CategoricalDtype, + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameToRecords: + def test_to_records_timeseries(self): + index = date_range("1/1/2000", periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) + + result = df.to_records() + assert result["index"].dtype == "M8[ns]" + + result = df.to_records(index=False) + + def test_to_records_dt64(self): + df = DataFrame( + [["one", "two", "three"], ["four", "five", "six"]], + index=date_range("2012-01-01", "2012-01-02"), + ) + + expected = df.index.values[0] + result = df.to_records()["index"][0] + assert expected == result + + def test_to_records_dt64tz_column(self): + # GH#32535 dont less tz in to_records + df = DataFrame({"A": date_range("2012-01-01", "2012-01-02", tz="US/Eastern")}) + + result = df.to_records() + + assert result.dtype["A"] == object + val = result[0][1] + assert isinstance(val, Timestamp) + assert val == df.loc[0, "A"] + + def test_to_records_with_multindex(self): + # GH#3189 + index = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + data = np.zeros((8, 4)) + df = DataFrame(data, index=index) + r = df.to_records(index=True)["level_0"] + assert "bar" in r + assert "one" not in r + + def test_to_records_with_Mapping_type(self): + import email + from email.parser import Parser + + abc.Mapping.register(email.message.Message) + + headers = Parser().parsestr( + "From: \n" + "To: \n" + "Subject: Test message\n" + "\n" + "Body would go here\n" + ) + + frame = DataFrame.from_records([headers]) + all(x in frame for x in ["Type", "Subject", "From"]) + + def test_to_records_floats(self): + df = DataFrame(np.random.rand(10, 10)) + df.to_records() + + def test_to_records_index_name(self): + df = DataFrame(np.random.randn(3, 3)) + df.index.name = "X" + rs = df.to_records() + assert "X" in rs.dtype.fields + + df = DataFrame(np.random.randn(3, 3)) + rs = df.to_records() + assert "index" in rs.dtype.fields + + df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")]) + df.index.names = ["A", None] + result = df.to_records() + expected = np.rec.fromarrays( + [np.array(["a", "a", "b"]), np.array(["x", "y", "z"])] + + [np.asarray(df.iloc[:, i]) for i in range(3)], + dtype={ + "names": ["A", "level_1", "0", "1", "2"], + "formats": ["O", "O", " default to array dtypes. + ( + {}, + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Should have no effect in this case. + ( + {"index": True}, + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Column dtype applied across the board. Index unaffected. + ( + {"column_dtypes": f"{tm.ENDIAN}U4"}, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U4"), + ("B", f"{tm.ENDIAN}U4"), + ("C", f"{tm.ENDIAN}U4"), + ], + ), + ), + # Index dtype applied across the board. Columns unaffected. + ( + {"index_dtypes": f"{tm.ENDIAN}U1"}, + np.rec.array( + [("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}U1"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Pass in a type instance. + ( + {"column_dtypes": str}, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U"), + ("B", f"{tm.ENDIAN}U"), + ("C", f"{tm.ENDIAN}U"), + ], + ), + ), + # Pass in a dtype instance. + ( + {"column_dtypes": np.dtype("unicode")}, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U"), + ("B", f"{tm.ENDIAN}U"), + ("C", f"{tm.ENDIAN}U"), + ], + ), + ), + # Pass in a dictionary (name-only). + ( + { + "column_dtypes": { + "A": np.int8, + "B": np.float32, + "C": f"{tm.ENDIAN}U2", + } + }, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", "i1"), + ("B", f"{tm.ENDIAN}f4"), + ("C", f"{tm.ENDIAN}U2"), + ], + ), + ), + # Pass in a dictionary (indices-only). + ( + {"index_dtypes": {0: "int16"}}, + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[ + ("index", "i2"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Ignore index mappings if index is not True. + ( + {"index": False, "index_dtypes": f"{tm.ENDIAN}U2"}, + np.rec.array( + [(1, 0.2, "a"), (2, 1.5, "bc")], + dtype=[ + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Non-existent names / indices in mapping should not error. + ( + {"index_dtypes": {0: "int16", "not-there": "float32"}}, + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[ + ("index", "i2"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], + ), + ), + # Names / indices not in mapping default to array dtype. + ( + {"column_dtypes": {"A": np.int8, "B": np.float32}}, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", "i1"), + ("B", f"{tm.ENDIAN}f4"), + ("C", "O"), + ], + ), + ), + # Names / indices not in dtype mapping default to array dtype. + ( + {"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}}, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}i8"), + ("A", "i1"), + ("B", f"{tm.ENDIAN}f4"), + ("C", "O"), + ], + ), + ), + # Mixture of everything. + ( + { + "column_dtypes": {"A": np.int8, "B": np.float32}, + "index_dtypes": f"{tm.ENDIAN}U2", + }, + np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}U2"), + ("A", "i1"), + ("B", f"{tm.ENDIAN}f4"), + ("C", "O"), + ], + ), + ), + # Invalid dype values. + ( + {"index": False, "column_dtypes": []}, + (ValueError, "Invalid dtype \\[\\] specified for column A"), + ), + ( + {"index": False, "column_dtypes": {"A": "int32", "B": 5}}, + (ValueError, "Invalid dtype 5 specified for column B"), + ), + # Numpy can't handle EA types, so check error is raised + ( + { + "index": False, + "column_dtypes": {"A": "int32", "B": CategoricalDtype(["a", "b"])}, + }, + (ValueError, "Invalid dtype category specified for column B"), + ), + # Check that bad types raise + ( + {"index": False, "column_dtypes": {"A": "int32", "B": "foo"}}, + (TypeError, "data type [\"']foo[\"'] not understood"), + ), + ], + ) + def test_to_records_dtype(self, kwargs, expected): + # see GH#18146 + df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) + + if not isinstance(expected, np.recarray): + with pytest.raises(expected[0], match=expected[1]): + df.to_records(**kwargs) + else: + result = df.to_records(**kwargs) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "df,kwargs,expected", + [ + # MultiIndex in the index. + ( + DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("abc") + ).set_index(["a", "b"]), + {"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}}, + np.rec.array( + [(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)], + dtype=[ + ("a", f"{tm.ENDIAN}i4"), + ("b", "i1"), + ("c", f"{tm.ENDIAN}f8"), + ], + ), + ), + # MultiIndex in the columns. + ( + DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=MultiIndex.from_tuples( + [("a", "d"), ("b", "e"), ("c", "f")] + ), + ), + { + "column_dtypes": {0: f"{tm.ENDIAN}U1", 2: "float32"}, + "index_dtypes": "float32", + }, + np.rec.array( + [(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)], + dtype=[ + ("index", f"{tm.ENDIAN}f4"), + ("('a', 'd')", f"{tm.ENDIAN}U1"), + ("('b', 'e')", f"{tm.ENDIAN}i8"), + ("('c', 'f')", f"{tm.ENDIAN}f4"), + ], + ), + ), + # MultiIndex in both the columns and index. + ( + DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=MultiIndex.from_tuples( + [("a", "d"), ("b", "e"), ("c", "f")], names=list("ab") + ), + index=MultiIndex.from_tuples( + [("d", -4), ("d", -5), ("f", -6)], names=list("cd") + ), + ), + { + "column_dtypes": "float64", + "index_dtypes": {0: f"{tm.ENDIAN}U2", 1: "int8"}, + }, + np.rec.array( + [ + ("d", -4, 1.0, 2.0, 3.0), + ("d", -5, 4.0, 5.0, 6.0), + ("f", -6, 7, 8, 9.0), + ], + dtype=[ + ("c", f"{tm.ENDIAN}U2"), + ("d", "i1"), + ("('a', 'd')", f"{tm.ENDIAN}f8"), + ("('b', 'e')", f"{tm.ENDIAN}f8"), + ("('c', 'f')", f"{tm.ENDIAN}f8"), + ], + ), + ), + ], + ) + def test_to_records_dtype_mi(self, df, kwargs, expected): + # see GH#18146 + result = df.to_records(**kwargs) + tm.assert_almost_equal(result, expected) + + def test_to_records_dict_like(self): + # see GH#18146 + class DictLike: + def __init__(self, **kwargs) -> None: + self.d = kwargs.copy() + + def __getitem__(self, key): + return self.d.__getitem__(key) + + def __contains__(self, key) -> bool: + return key in self.d + + def keys(self): + return self.d.keys() + + df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) + + dtype_mappings = { + "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}), + "index_dtypes": f"{tm.ENDIAN}U2", + } + + result = df.to_records(**dtype_mappings) + expected = np.rec.array( + [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], + dtype=[ + ("index", f"{tm.ENDIAN}U2"), + ("A", "i1"), + ("B", f"{tm.ENDIAN}f4"), + ("C", "O"), + ], + ) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("tz", ["UTC", "GMT", "US/Eastern"]) + def test_to_records_datetimeindex_with_tz(self, tz): + # GH#13937 + dr = date_range("2016-01-01", periods=10, freq="S", tz=tz) + + df = DataFrame({"datetime": dr}, index=dr) + + expected = df.to_records() + result = df.tz_convert("UTC").to_records() + + # both converted to UTC, so they are equal + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py new file mode 100644 index 00000000..acbb51fe --- /dev/null +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -0,0 +1,153 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + PeriodIndex, + Series, + Timedelta, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + +class TestToTimestamp: + def test_to_timestamp(self, frame_or_series): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + obj = DataFrame( + np.random.randn(len(index), K), + index=index, + columns=["A", "B", "C", "D", "E"], + ) + obj["mix"] = "a" + obj = tm.get_obj(obj, frame_or_series) + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = obj.to_timestamp("D", "end") + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, obj.values) + if frame_or_series is Series: + assert result.name == "A" + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = obj.to_timestamp("D", "start") + tm.assert_index_equal(result.index, exp_index) + + result = obj.to_timestamp(how="start") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23) + result = obj.to_timestamp("H", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = obj.to_timestamp("T", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + result = obj.to_timestamp("S", "end") + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + def test_to_timestamp_columns(self): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + df = DataFrame( + np.random.randn(len(index), K), + index=index, + columns=["A", "B", "C", "D", "E"], + ) + df["mix"] = "a" + + # columns + df = df.T + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end", axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start", axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp("S", "end", axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + result1 = df.to_timestamp("5t", axis=1) + result2 = df.to_timestamp("t", axis=1) + expected = date_range("2001-01-01", "2009-01-01", freq="AS") + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) + tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + assert result1.columns.freqstr == "AS-JAN" + assert result2.columns.freqstr == "AS-JAN" + + def to_timestamp_invalid_axis(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + obj = DataFrame(np.random.randn(len(index), 5), index=index) + + # invalid axis + with pytest.raises(ValueError, match="axis"): + obj.to_timestamp(axis=2) + + def test_to_timestamp_hourly(self, frame_or_series): + + index = period_range(freq="H", start="1/1/2001", end="1/2/2001") + obj = Series(1, index=index, name="foo") + if frame_or_series is not Series: + obj = obj.to_frame() + + exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H") + result = obj.to_timestamp(how="end") + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + if frame_or_series is Series: + assert result.name == "foo" + + def test_to_timestamp_raises(self, index, frame_or_series): + # GH#33327 + obj = frame_or_series(index=index, dtype=object) + + if not isinstance(index, PeriodIndex): + msg = f"unsupported Type {type(index).__name__}" + with pytest.raises(TypeError, match=msg): + obj.to_timestamp() diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py new file mode 100644 index 00000000..7fca752f --- /dev/null +++ b/pandas/tests/frame/methods/test_transpose.py @@ -0,0 +1,118 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + DatetimeIndex, + date_range, +) +import pandas._testing as tm + + +class TestTranspose: + def test_transpose_empty_preserves_datetimeindex(self): + # GH#41382 + df = DataFrame(index=DatetimeIndex([])) + + expected = DatetimeIndex([], dtype="datetime64[ns]", freq=None) + + result1 = df.T.sum().index + result2 = df.sum(axis=1).index + + tm.assert_index_equal(result1, expected) + tm.assert_index_equal(result2, expected) + + def test_transpose_tzaware_1col_single_tz(self): + # GH#26825 + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + + df = DataFrame(dti) + assert (df.dtypes == dti.dtype).all() + res = df.T + assert (res.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_single_tz(self): + # GH#26825 + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + + df3 = DataFrame({"A": dti, "B": dti}) + assert (df3.dtypes == dti.dtype).all() + res3 = df3.T + assert (res3.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_mixed_tz(self): + # GH#26825 + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti2 = dti.tz_convert("US/Pacific") + + df4 = DataFrame({"A": dti, "B": dti2}) + assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() + assert (df4.T.dtypes == object).all() + tm.assert_frame_equal(df4.T.T, df4) + + @pytest.mark.parametrize("tz", [None, "America/New_York"]) + def test_transpose_preserves_dtindex_equality_with_dst(self, tz): + # GH#19970 + idx = date_range("20161101", "20161130", freq="4H", tz=tz) + df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list("ab"), columns=idx) + tm.assert_frame_equal(result, expected) + + def test_transpose_object_to_tzaware_mixed_tz(self): + # GH#26825 + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti2 = dti.tz_convert("US/Pacific") + + # mixed all-tzaware dtypes + df2 = DataFrame([dti, dti2]) + assert (df2.dtypes == object).all() + res2 = df2.T + assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() + + def test_transpose_uint64(self, uint64_frame): + + result = uint64_frame.T + expected = DataFrame(uint64_frame.values.T) + expected.index = ["A", "B"] + tm.assert_frame_equal(result, expected) + + def test_transpose_float(self, float_frame): + frame = float_frame + dft = frame.T + for idx, series in dft.items(): + for col, value in series.items(): + if np.isnan(value): + assert np.isnan(frame[col][idx]) + else: + assert value == frame[col][idx] + + # mixed type + index, data = tm.getMixedTypeDict() + mixed = DataFrame(data, index=index) + + mixed_T = mixed.T + for col, s in mixed_T.items(): + assert s.dtype == np.object_ + + @td.skip_array_manager_invalid_test + def test_transpose_get_view(self, float_frame): + dft = float_frame.T + dft.values[:, 5:10] = 5 + + assert (float_frame.values[5:10] == 5).all() + + @td.skip_array_manager_invalid_test + def test_transpose_get_view_dt64tzget_view(self): + dti = date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = dti._data.reshape(3, 2) + df = DataFrame(arr) + assert df._mgr.nblocks == 1 + + result = df.T + assert result._mgr.nblocks == 1 + + rtrip = result._mgr.blocks[0].values + assert np.shares_memory(arr._ndarray, rtrip._ndarray) diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py new file mode 100644 index 00000000..bfee3edc --- /dev/null +++ b/pandas/tests/frame/methods/test_truncate.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +class TestDataFrameTruncate: + def test_truncate(self, datetime_frame, frame_or_series): + ts = datetime_frame[::3] + ts = tm.get_obj(ts, frame_or_series) + + start, end = datetime_frame.index[3], datetime_frame.index[6] + + start_missing = datetime_frame.index[2] + end_missing = datetime_frame.index[7] + + # neither specified + truncated = ts.truncate() + tm.assert_equal(truncated, ts) + + # both specified + expected = ts[1:3] + + truncated = ts.truncate(start, end) + tm.assert_equal(truncated, expected) + + truncated = ts.truncate(start_missing, end_missing) + tm.assert_equal(truncated, expected) + + # start specified + expected = ts[1:] + + truncated = ts.truncate(before=start) + tm.assert_equal(truncated, expected) + + truncated = ts.truncate(before=start_missing) + tm.assert_equal(truncated, expected) + + # end specified + expected = ts[:3] + + truncated = ts.truncate(after=end) + tm.assert_equal(truncated, expected) + + truncated = ts.truncate(after=end_missing) + tm.assert_equal(truncated, expected) + + # corner case, empty series/frame returned + truncated = ts.truncate(after=ts.index[0] - ts.index.freq) + assert len(truncated) == 0 + + truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) + assert len(truncated) == 0 + + msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" + with pytest.raises(ValueError, match=msg): + ts.truncate( + before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq + ) + + def test_truncate_copy(self, datetime_frame): + index = datetime_frame.index + truncated = datetime_frame.truncate(index[5], index[10]) + truncated.values[:] = 5.0 + assert not (datetime_frame.values[5:11] == 5).any() + + def test_truncate_nonsortedindex(self, frame_or_series): + # GH#17935 + + obj = DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0]) + obj = tm.get_obj(obj, frame_or_series) + + msg = "truncate requires a sorted index" + with pytest.raises(ValueError, match=msg): + obj.truncate(before=3, after=9) + + def test_sort_values_nonsortedindex(self): + rng = date_range("2011-01-01", "2012-01-01", freq="W") + ts = DataFrame( + {"A": np.random.randn(len(rng)), "B": np.random.randn(len(rng))}, index=rng + ) + + decreasing = ts.sort_values("A", ascending=False) + + msg = "truncate requires a sorted index" + with pytest.raises(ValueError, match=msg): + decreasing.truncate(before="2011-11", after="2011-12") + + def test_truncate_nonsortedindex_axis1(self): + # GH#17935 + + df = DataFrame( + { + 3: np.random.randn(5), + 20: np.random.randn(5), + 2: np.random.randn(5), + 0: np.random.randn(5), + }, + columns=[3, 20, 2, 0], + ) + msg = "truncate requires a sorted index" + with pytest.raises(ValueError, match=msg): + df.truncate(before=2, after=20, axis=1) + + @pytest.mark.parametrize( + "before, after, indices", + [(1, 2, [2, 1]), (None, 2, [2, 1, 0]), (1, None, [3, 2, 1])], + ) + @pytest.mark.parametrize("klass", [Int64Index, DatetimeIndex]) + def test_truncate_decreasing_index( + self, before, after, indices, klass, frame_or_series + ): + # https://github.com/pandas-dev/pandas/issues/33756 + idx = klass([3, 2, 1, 0]) + if klass is DatetimeIndex: + before = pd.Timestamp(before) if before is not None else None + after = pd.Timestamp(after) if after is not None else None + indices = [pd.Timestamp(i) for i in indices] + values = frame_or_series(range(len(idx)), index=idx) + result = values.truncate(before=before, after=after) + expected = values.loc[indices] + tm.assert_equal(result, expected) + + def test_truncate_multiindex(self, frame_or_series): + # GH 34564 + mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"]) + s1 = DataFrame(range(mi.shape[0]), index=mi, columns=["col"]) + s1 = tm.get_obj(s1, frame_or_series) + + result = s1.truncate(before=2, after=3) + + df = DataFrame.from_dict( + {"L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5]} + ) + expected = df.set_index(["L1", "L2"]) + expected = tm.get_obj(expected, frame_or_series) + + tm.assert_equal(result, expected) + + def test_truncate_index_only_one_unique_value(self, frame_or_series): + # GH 42365 + obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5) + if frame_or_series is DataFrame: + obj = obj.to_frame(name="a") + + truncated = obj.truncate("2021-06-28", "2021-07-01") + + tm.assert_equal(truncated, obj) diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py new file mode 100644 index 00000000..c5f68707 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -0,0 +1,132 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestTZConvert: + def test_tz_convert(self, frame_or_series): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + + obj = DataFrame({"a": 1}, index=rng) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.tz_convert("Europe/Berlin") + expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) + expected = tm.get_obj(expected, frame_or_series) + + assert result.index.tz.zone == "Europe/Berlin" + tm.assert_equal(result, expected) + + def test_tz_convert_axis1(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + + obj = DataFrame({"a": 1}, index=rng) + + obj = obj.T + result = obj.tz_convert("Europe/Berlin", axis=1) + assert result.columns.tz.zone == "Europe/Berlin" + + expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) + + tm.assert_equal(result, expected.T) + + def test_tz_convert_naive(self, frame_or_series): + # can't convert tz-naive + rng = date_range("1/1/2011", periods=200, freq="D") + ts = Series(1, index=rng) + ts = frame_or_series(ts) + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert("US/Eastern") + + @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) + def test_tz_convert_and_localize(self, fn): + l0 = date_range("20140701", periods=5, freq="D") + l1 = date_range("20140701", periods=5, freq="D") + + int_idx = Index(range(5)) + + if fn == "tz_convert": + l0 = l0.tz_localize("UTC") + l1 = l1.tz_localize("UTC") + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)("US/Pacific") + l1_expected = getattr(idx, fn)("US/Pacific") + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)("US/Pacific") + tm.assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + # freq is not preserved in MultiIndex construction + l1_expected = l1_expected._with_freq(None) + l0_expected = l0_expected._with_freq(None) + l1 = l1._with_freq(None) + l0 = l0._with_freq(None) + + df3 = getattr(df2, fn)("US/Pacific", level=0) + assert not df3.index.levels[0].equals(l0) + tm.assert_index_equal(df3.index.levels[0], l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)("US/Pacific", level=1) + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa + + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(index=int_idx) + getattr(df, fn)("US/Pacific") + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + getattr(df, fn)("US/Pacific", level=0) + + # Invalid level + with pytest.raises(ValueError, match="not valid"): + df = DataFrame(index=l0) + getattr(df, fn)("US/Pacific", level=1) + + @pytest.mark.parametrize("copy", [True, False]) + def test_tz_convert_copy_inplace_mutate(self, copy, frame_or_series): + # GH#6326 + obj = frame_or_series( + np.arange(0, 5), + index=date_range("20131027", periods=5, freq="1H", tz="Europe/Berlin"), + ) + orig = obj.copy() + result = obj.tz_convert("UTC", copy=copy) + expected = frame_or_series(np.arange(0, 5), index=obj.index.tz_convert("UTC")) + tm.assert_equal(result, expected) + tm.assert_equal(obj, orig) + assert result.index is not obj.index + assert result is not obj diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py new file mode 100644 index 00000000..43c6eb45 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestTZLocalize: + # See also: + # test_tz_convert_and_localize in test_tz_convert + + def test_tz_localize(self, frame_or_series): + rng = date_range("1/1/2011", periods=100, freq="H") + + obj = DataFrame({"a": 1}, index=rng) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.tz_localize("utc") + expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) + expected = tm.get_obj(expected, frame_or_series) + + assert result.index.tz.zone == "UTC" + tm.assert_equal(result, expected) + + def test_tz_localize_axis1(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + df = DataFrame({"a": 1}, index=rng) + + df = df.T + result = df.tz_localize("utc", axis=1) + assert result.columns.tz.zone == "UTC" + + expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) + + tm.assert_frame_equal(result, expected.T) + + def test_tz_localize_naive(self, frame_or_series): + + # Can't localize if already tz-aware + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + ts = Series(1, index=rng) + ts = frame_or_series(ts) + + with pytest.raises(TypeError, match="Already tz-aware"): + ts.tz_localize("US/Eastern") + + @pytest.mark.parametrize("copy", [True, False]) + def test_tz_localize_copy_inplace_mutate(self, copy, frame_or_series): + # GH#6326 + obj = frame_or_series( + np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=None) + ) + orig = obj.copy() + result = obj.tz_localize("UTC", copy=copy) + expected = frame_or_series( + np.arange(0, 5), + index=date_range("20131027", periods=5, freq="1H", tz="UTC"), + ) + tm.assert_equal(result, expected) + tm.assert_equal(obj, orig) + assert result.index is not obj.index + assert result is not obj diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py new file mode 100644 index 00000000..29034363 --- /dev/null +++ b/pandas/tests/frame/methods/test_update.py @@ -0,0 +1,169 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrameUpdate: + def test_update_nan(self): + # #15593 #15617 + # test 1 + df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = df1.copy() + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + # test 2 + df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + def test_update(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other) + + expected = DataFrame( + [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_dtypes(self): + + # gh 3016 + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + df.update(other) + + expected = DataFrame( + [[45.0, 45.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + tm.assert_frame_equal(df, expected) + + def test_update_nooverwrite(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, overwrite=False) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_filtered(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, filter_func=lambda x: x > 2) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "bad_kwarg, exception, msg", + [ + # errors must be 'ignore' or 'raise' + ({"errors": "something"}, ValueError, "The parameter errors must.*"), + ({"join": "inner"}, NotImplementedError, "Only left join is supported"), + ], + ) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + df = DataFrame([[1.5, 1, 3.0]]) + with pytest.raises(exception, match=msg): + df.update(df, **bad_kwarg) + + def test_update_raise_on_overlap(self): + df = DataFrame( + [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) + with pytest.raises(ValueError, match="Data overlaps"): + df.update(other, errors="raise") + + def test_update_from_non_df(self): + d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} + df = DataFrame(d) + + d["a"] = Series([5, 6, 7, 8]) + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + df = DataFrame(d) + + d["a"] = [5, 6, 7, 8] + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + def test_update_datetime_tz(self): + # GH 25807 + result = DataFrame([pd.Timestamp("2019", tz="UTC")]) + with tm.assert_produces_warning(None): + result.update(result) + expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) + tm.assert_frame_equal(result, expected) + + def test_update_with_different_dtype(self, using_copy_on_write): + # GH#3217 + df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) + df["c"] = np.nan + if using_copy_on_write: + df.update({"c": Series(["foo"], index=[0])}) + else: + df["c"].update(Series(["foo"], index=[0])) + + expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_update_modify_view(self, using_copy_on_write): + # GH#47188 + df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) + df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) + df2_orig = df2.copy() + result_view = df2[:] + df2.update(df) + expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) + tm.assert_frame_equal(df2, expected) + if using_copy_on_write: + tm.assert_frame_equal(result_view, df2_orig) + else: + tm.assert_frame_equal(result_view, expected) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py new file mode 100644 index 00000000..6e852884 --- /dev/null +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -0,0 +1,146 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_data_frame_value_counts_unsorted(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(sort=False) + expected = pd.Series( + data=[1, 2, 1], + index=pd.MultiIndex.from_arrays( + [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_ascending(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(ascending=True) + expected = pd.Series( + data=[1, 1, 2], + index=pd.MultiIndex.from_arrays( + [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_default(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays( + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_normalize(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(normalize=True) + expected = pd.Series( + data=[0.5, 0.25, 0.25], + index=pd.MultiIndex.from_arrays( + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_single_col_default(): + df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts() + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty_normalize(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts(normalize=True) + expected = pd.Series([], dtype=np.float64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_dropna_true(nulls_fixture): + # GH 41334 + df = pd.DataFrame( + { + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + result = df.value_counts() + expected = pd.Series( + data=[1, 1], + index=pd.MultiIndex.from_arrays( + [("Beth", "John"), ("Louise", "Smith")], names=["first_name", "middle_name"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_dropna_false(nulls_fixture): + # GH 41334 + df = pd.DataFrame( + { + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + + result = df.value_counts(dropna=False) + expected = pd.Series( + data=[1, 1, 1, 1], + index=pd.MultiIndex( + levels=[ + pd.Index(["Anne", "Beth", "John"]), + pd.Index(["Louise", "Smith", nulls_fixture]), + ], + codes=[[0, 1, 2, 2], [2, 0, 1, 2]], + names=["first_name", "middle_name"], + ), + ) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py new file mode 100644 index 00000000..f755b0ad --- /dev/null +++ b/pandas/tests/frame/methods/test_values.py @@ -0,0 +1,273 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestDataFrameValues: + @td.skip_array_manager_invalid_test + def test_values(self, float_frame): + float_frame.values[:, 0] = 5.0 + assert (float_frame.values[:, 0] == 5).all() + + def test_more_values(self, float_string_frame): + values = float_string_frame.values + assert values.shape[1] == len(float_string_frame.columns) + + def test_values_mixed_dtypes(self, float_frame, float_string_frame): + frame = float_frame + arr = frame.values + + frame_cols = frame.columns + for i, row in enumerate(arr): + for j, value in enumerate(row): + col = frame_cols[j] + if np.isnan(value): + assert np.isnan(frame[col][i]) + else: + assert value == frame[col][i] + + # mixed type + arr = float_string_frame[["foo", "A"]].values + assert arr[0, 0] == "bar" + + df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) + arr = df.values + assert arr[0, 0] == 1j + + def test_values_duplicates(self): + df = DataFrame( + [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"] + ) + + result = df.values + expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object) + + tm.assert_numpy_array_equal(result, expected) + + def test_values_with_duplicate_columns(self): + df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) + result = df.values + expected = np.array([[1, 2.5], [3, 4.5]]) + assert (result == expected).all().all() + + @pytest.mark.parametrize("constructor", [date_range, period_range]) + def test_values_casts_datetimelike_to_object(self, constructor): + series = Series(constructor("2000-01-01", periods=10, freq="D")) + + expected = series.astype("object") + + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + def test_frame_values_with_tz(self): + tz = "US/Central" + df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) + result = df.values + expected = np.array( + [ + [Timestamp("2000-01-01", tz=tz)], + [Timestamp("2000-01-02", tz=tz)], + [Timestamp("2000-01-03", tz=tz)], + [Timestamp("2000-01-04", tz=tz)], + ] + ) + tm.assert_numpy_array_equal(result, expected) + + # two columns, homogeneous + + df["B"] = df["A"] + result = df.values + expected = np.concatenate([expected, expected], axis=1) + tm.assert_numpy_array_equal(result, expected) + + # three columns, heterogeneous + est = "US/Eastern" + df["C"] = df["A"].dt.tz_convert(est) + + new = np.array( + [ + [Timestamp("2000-01-01T01:00:00", tz=est)], + [Timestamp("2000-01-02T01:00:00", tz=est)], + [Timestamp("2000-01-03T01:00:00", tz=est)], + [Timestamp("2000-01-04T01:00:00", tz=est)], + ] + ) + expected = np.concatenate([expected, new], axis=1) + result = df.values + tm.assert_numpy_array_equal(result, expected) + + def test_interleave_with_tzaware(self, timezone_frame): + + # interleave with object + result = timezone_frame.assign(D="foo").values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ["foo", "foo", "foo"], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + # interleave with only datetime64[ns] + result = timezone_frame.values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + def test_values_interleave_non_unique_cols(self): + df = DataFrame( + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + columns=["x", "x"], + index=[1, 2], + ) + + df_unique = df.copy() + df_unique.columns = ["x", "y"] + assert df_unique.values.shape == df.values.shape + tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) + tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) + + def test_values_numeric_cols(self, float_frame): + float_frame["foo"] = "bar" + + values = float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + + # mixed lcd + values = mixed_float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_float_frame[["A", "B", "C"]].values + assert values.dtype == np.float32 + + values = mixed_float_frame[["C"]].values + assert values.dtype == np.float16 + + # GH#10364 + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_int_frame[["A", "D"]].values + assert values.dtype == np.int64 + + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C"]].values + assert values.dtype == np.float64 + + # as B and C are both unsigned, no forcing to float is needed + values = mixed_int_frame[["B", "C"]].values + assert values.dtype == np.uint64 + + values = mixed_int_frame[["A", "C"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C", "D"]].values + assert values.dtype == np.int64 + + values = mixed_int_frame[["A"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C"]].values + assert values.dtype == np.uint8 + + +class TestPrivateValues: + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz(self): + dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) + + df = DataFrame(dta, columns=["A"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + @td.skip_array_manager_invalid_test + def test_private_values_dt64tz_multicol(self): + dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2) + + df = DataFrame(dta, columns=["A", "B"]) + tm.assert_equal(df._values, dta) + + # we have a view + assert np.shares_memory(df._values._ndarray, dta._ndarray) + + # TimedeltaArray + tda = dta - dta + df2 = df - df + tm.assert_equal(df2._values, tda) + + def test_private_values_dt64_multiblock(self, using_array_manager, request): + if using_array_manager: + mark = pytest.mark.xfail(reason="returns ndarray") + request.node.add_marker(mark) + + dta = date_range("2000", periods=8)._data + + df = DataFrame({"A": dta[:4]}, copy=False) + df["B"] = dta[4:] + + assert len(df._mgr.arrays) == 2 + + result = df._values + expected = dta.reshape(2, 4).T + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py new file mode 100644 index 00000000..c68171ab --- /dev/null +++ b/pandas/tests/frame/test_alter_axes.py @@ -0,0 +1,30 @@ +from datetime import datetime + +import pytz + +from pandas import DataFrame +import pandas._testing as tm + + +class TestDataFrameAlterAxes: + # Tests for setting index/columns attributes directly (i.e. __setattr__) + + def test_set_axis_setattr_index(self): + # GH 6785 + # set the index manually + + df = DataFrame([{"ts": datetime(2014, 4, 1, tzinfo=pytz.utc), "foo": 1}]) + expected = df.set_index("ts") + df.index = df["ts"] + df.pop("ts") + tm.assert_frame_equal(df, expected) + + # Renaming + + def test_assign_columns(self, float_frame): + float_frame["hi"] = "there" + + df = float_frame.copy() + df.columns = ["foo", "bar", "baz", "quux", "foo2"] + tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False) + tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py new file mode 100644 index 00000000..bc6c6765 --- /dev/null +++ b/pandas/tests/frame/test_api.py @@ -0,0 +1,389 @@ +from copy import deepcopy +import inspect +import pydoc + +import numpy as np +import pytest + +from pandas._config.config import option_context + +import pandas.util._test_decorators as td +from pandas.util._test_decorators import ( + async_mark, + skip_if_no, +) + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestDataFrameMisc: + def test_getitem_pop_assign_name(self, float_frame): + s = float_frame["A"] + assert s.name == "A" + + s = float_frame.pop("A") + assert s.name == "A" + + s = float_frame.loc[:, "B"] + assert s.name == "B" + + s2 = s.loc[:] + assert s2.name == "B" + + def test_get_axis(self, float_frame): + f = float_frame + assert f._get_axis_number(0) == 0 + assert f._get_axis_number(1) == 1 + assert f._get_axis_number("index") == 0 + assert f._get_axis_number("rows") == 0 + assert f._get_axis_number("columns") == 1 + + assert f._get_axis_name(0) == "index" + assert f._get_axis_name(1) == "columns" + assert f._get_axis_name("index") == "index" + assert f._get_axis_name("rows") == "index" + assert f._get_axis_name("columns") == "columns" + + assert f._get_axis(0) is f.index + assert f._get_axis(1) is f.columns + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(2) + + with pytest.raises(ValueError, match="No axis.*foo"): + f._get_axis_name("foo") + + with pytest.raises(ValueError, match="No axis.*None"): + f._get_axis_name(None) + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(None) + + def test_column_contains_raises(self, float_frame): + with pytest.raises(TypeError, match="unhashable type: 'Index'"): + float_frame.columns in float_frame + + def test_tab_completion(self): + # DataFrame whose columns are identifiers shall have them in __dir__. + df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) + for key in list("ABCD"): + assert key in dir(df) + assert isinstance(df.__getitem__("A"), Series) + + # DataFrame whose first-level columns are identifiers shall have + # them in __dir__. + df = DataFrame( + [list("abcd"), list("efgh")], + columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), + ) + for key in list("ABCD"): + assert key in dir(df) + for key in list("EFGH"): + assert key not in dir(df) + assert isinstance(df.__getitem__("A"), DataFrame) + + def test_display_max_dir_items(self): + # display.max_dir_items increaes the number of columns that are in __dir__. + columns = ["a" + str(i) for i in range(420)] + values = [range(420), range(420)] + df = DataFrame(values, columns=columns) + + # The default value for display.max_dir_items is 100 + assert "a99" in dir(df) + assert "a100" not in dir(df) + + with option_context("display.max_dir_items", 300): + df = DataFrame(values, columns=columns) + assert "a299" in dir(df) + assert "a300" not in dir(df) + + with option_context("display.max_dir_items", None): + df = DataFrame(values, columns=columns) + assert "a419" in dir(df) + + def test_not_hashable(self): + empty_frame = DataFrame() + + df = DataFrame([1]) + msg = "unhashable type: 'DataFrame'" + with pytest.raises(TypeError, match=msg): + hash(df) + with pytest.raises(TypeError, match=msg): + hash(empty_frame) + + def test_column_name_contains_unicode_surrogate(self): + # GH 25509 + colname = "\ud83d" + df = DataFrame({colname: []}) + # this should not crash + assert colname not in dir(df) + assert df.columns[0] == colname + + def test_new_empty_index(self): + df1 = DataFrame(np.random.randn(0, 3)) + df2 = DataFrame(np.random.randn(0, 3)) + df1.index.name = "foo" + assert df2.index.name is None + + def test_get_agg_axis(self, float_frame): + cols = float_frame._get_agg_axis(0) + assert cols is float_frame.columns + + idx = float_frame._get_agg_axis(1) + assert idx is float_frame.index + + msg = r"Axis must be 0 or 1 \(got 2\)" + with pytest.raises(ValueError, match=msg): + float_frame._get_agg_axis(2) + + def test_empty(self, float_frame, float_string_frame): + empty_frame = DataFrame() + assert empty_frame.empty + + assert not float_frame.empty + assert not float_string_frame.empty + + # corner case + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3)) + del df["A"] + assert not df.empty + + def test_len(self, float_frame): + assert len(float_frame) == len(float_frame.index) + + # single block corner case + arr = float_frame[["A", "B"]].values + expected = float_frame.reindex(columns=["A", "B"]).values + tm.assert_almost_equal(arr, expected) + + def test_axis_aliases(self, float_frame): + f = float_frame + + # reg name + expected = f.sum(axis=0) + result = f.sum(axis="index") + tm.assert_series_equal(result, expected) + + expected = f.sum(axis=1) + result = f.sum(axis="columns") + tm.assert_series_equal(result, expected) + + def test_class_axis(self): + # GH 18147 + # no exception and no empty docstring + assert pydoc.getdoc(DataFrame.index) + assert pydoc.getdoc(DataFrame.columns) + + def test_series_put_names(self, float_string_frame): + series = float_string_frame._series + for k, v in series.items(): + assert v.name == k + + def test_empty_nonzero(self): + df = DataFrame([1, 2, 3]) + assert not df.empty + df = DataFrame(index=[1], columns=[1]) + assert not df.empty + df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() + assert df.empty + assert df.T.empty + + @pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(index=[1]), + DataFrame(columns=[1]), + DataFrame({1: []}), + ], + ) + def test_empty_like(self, df): + assert df.empty + assert df.T.empty + + def test_with_datetimelikes(self): + + df = DataFrame( + { + "A": date_range("20130101", periods=10), + "B": timedelta_range("1 day", periods=10), + } + ) + t = df.T + + result = t.dtypes.value_counts() + expected = Series({np.dtype("object"): 10}) + tm.assert_series_equal(result, expected) + + def test_deepcopy(self, float_frame): + cp = deepcopy(float_frame) + series = cp["A"] + series[:] = 10 + for idx, value in series.items(): + assert float_frame["A"][idx] != value + + def test_inplace_return_self(self): + # GH 1893 + + data = DataFrame( + {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]} + ) + + def _check_f(base, f): + result = f(base) + assert result is None + + # -----DataFrame----- + + # set_index + f = lambda x: x.set_index("a", inplace=True) + _check_f(data.copy(), f) + + # reset_index + f = lambda x: x.reset_index(inplace=True) + _check_f(data.set_index("a"), f) + + # drop_duplicates + f = lambda x: x.drop_duplicates(inplace=True) + _check_f(data.copy(), f) + + # sort + f = lambda x: x.sort_values("b", inplace=True) + _check_f(data.copy(), f) + + # sort_index + f = lambda x: x.sort_index(inplace=True) + _check_f(data.copy(), f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(data.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(data.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(data.copy(), f) + + # -----Series----- + d = data.copy()["c"] + + # reset_index + f = lambda x: x.reset_index(inplace=True, drop=True) + _check_f(data.set_index("a")["c"], f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(d.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(d.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(d.copy(), f) + + @async_mark() + @td.check_file_leaks + async def test_tab_complete_warning(self, ip, frame_or_series): + # GH 16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + if frame_or_series is DataFrame: + code = "from pandas import DataFrame; obj = DataFrame()" + else: + code = "from pandas import Series; obj = Series(dtype=object)" + + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("obj.", 1)) + + def test_attrs(self): + df = DataFrame({"A": [2, 3]}) + assert df.attrs == {} + df.attrs["version"] = 1 + + result = df.rename(columns=str) + assert result.attrs == {"version": 1} + + @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) + def test_set_flags( + self, allows_duplicate_labels, frame_or_series, using_copy_on_write + ): + obj = DataFrame({"A": [1, 2]}) + key = (0, 0) + if frame_or_series is Series: + obj = obj["A"] + key = 0 + + result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) + + if allows_duplicate_labels is None: + # We don't update when it's not provided + assert result.flags.allows_duplicate_labels is True + else: + assert result.flags.allows_duplicate_labels is allows_duplicate_labels + + # We made a copy + assert obj is not result + + # We didn't mutate obj + assert obj.flags.allows_duplicate_labels is True + + # But we didn't copy data + if frame_or_series is Series: + assert np.may_share_memory(obj.values, result.values) + else: + assert np.may_share_memory(obj["A"].values, result["A"].values) + + result.iloc[key] = 0 + if using_copy_on_write: + assert obj.iloc[key] == 1 + else: + assert obj.iloc[key] == 0 + # set back to 1 for test below + result.iloc[key] = 1 + + # Now we do copy. + result = obj.set_flags( + copy=True, allows_duplicate_labels=allows_duplicate_labels + ) + result.iloc[key] = 10 + assert obj.iloc[key] == 1 + + def test_constructor_expanddim(self): + # GH#33628 accessing _constructor_expanddim should not raise NotImplementedError + # GH38782 pandas has no container higher than DataFrame (two-dim), so + # DataFrame._constructor_expand_dim, doesn't make sense, so is removed. + df = DataFrame() + + msg = "'DataFrame' object has no attribute '_constructor_expanddim'" + with pytest.raises(AttributeError, match=msg): + df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) + + @skip_if_no("jinja2") + def test_inspect_getmembers(self): + # GH38740 + df = DataFrame() + with tm.assert_produces_warning(None): + inspect.getmembers(df) + + def test_dataframe_iteritems_deprecated(self): + df = DataFrame([1]) + with tm.assert_produces_warning(FutureWarning): + next(df.iteritems()) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py new file mode 100644 index 00000000..93c4b44d --- /dev/null +++ b/pandas/tests/frame/test_arithmetic.py @@ -0,0 +1,2085 @@ +from collections import deque +from datetime import datetime +from enum import Enum +import functools +import operator +import re + +import numpy as np +import pytest +import pytz + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +import pandas.core.common as com +from pandas.core.computation import expressions as expr +from pandas.core.computation.expressions import ( + _MIN_ELEMENTS, + NUMEXPR_INSTALLED, +) +from pandas.tests.frame.common import ( + _check_mixed_float, + _check_mixed_int, +) + + +@pytest.fixture( + autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] +) +def switch_numexpr_min_elements(request): + _MIN_ELEMENTS = expr._MIN_ELEMENTS + expr._MIN_ELEMENTS = request.param + yield request.param + expr._MIN_ELEMENTS = _MIN_ELEMENTS + + +class DummyElement: + def __init__(self, value, dtype) -> None: + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self) -> str: + return f"DummyElement({self.value}, {self.dtype})" + + def __repr__(self) -> str: + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +# ------------------------------------------------------------------- +# Comparisons + + +class TestFrameComparisons: + # Specifically _not_ flex-comparisons + + def test_comparison_with_categorical_dtype(self): + # GH#12564 + + df = DataFrame({"A": ["foo", "bar", "baz"]}) + exp = DataFrame({"A": [True, False, False]}) + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + # casting to categorical shouldn't affect the result + df["A"] = df["A"].astype("category") + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + def test_frame_in_list(self): + # GH#12689 this should raise at the DataFrame level, not blocks + df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df in [None] + + @pytest.mark.parametrize( + "arg, arg2", + [ + [ + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + ], + [ + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + ], + [ + { + "a": pd.date_range("20010101", periods=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": np.random.randint(10, size=10), + "b": np.random.randint(10, size=10), + }, + ], + [ + { + "a": np.random.randint(10, size=10), + "b": pd.date_range("20010101", periods=10), + }, + { + "a": pd.date_range("20010101", periods=10), + "b": pd.date_range("20010101", periods=10), + }, + ], + ], + ) + def test_comparison_invalid(self, arg, arg2): + # GH4968 + # invalid date/int comparisons + x = DataFrame(arg) + y = DataFrame(arg2) + # we expect the result to match Series comparisons for + # == and !=, inequalities should raise + result = x == y + expected = DataFrame( + {col: x[col] == y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + result = x != y + expected = DataFrame( + {col: x[col] != y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + msgs = [ + r"Invalid comparison between dtype=datetime64\[ns\] and ndarray", + "invalid type promotion", + ( + # npdev 1.20.0 + r"The DTypes and " + r" do not have a common DType." + ), + ] + msg = "|".join(msgs) + with pytest.raises(TypeError, match=msg): + x >= y + with pytest.raises(TypeError, match=msg): + x > y + with pytest.raises(TypeError, match=msg): + x < y + with pytest.raises(TypeError, match=msg): + x <= y + + @pytest.mark.parametrize( + "left, right", + [ + ("gt", "lt"), + ("lt", "gt"), + ("ge", "le"), + ("le", "ge"), + ("eq", "eq"), + ("ne", "ne"), + ], + ) + def test_timestamp_compare(self, left, right): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + df = DataFrame( + { + "dates1": pd.date_range("20010101", periods=10), + "dates2": pd.date_range("20010102", periods=10), + "intcol": np.random.randint(1000000000, size=10), + "floatcol": np.random.randn(10), + "stringcol": list(tm.rands(10)), + } + ) + df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'Timestamp'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("20010109")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("20010109"), df) + # nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'NaTType'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("nat")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("nat"), df) + + def test_mixed_comparison(self): + # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, + # not raise TypeError + # (this appears to be fixed before GH#22163, not sure when) + df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]]) + other = DataFrame([["a", "b"], ["c", "d"]]) + + result = df == other + assert not result.any().any() + + result = df != other + assert result.all().all() + + def test_df_boolean_comparison_error(self): + # GH#4576, GH#22880 + # comparing DataFrame against list/tuple with len(obj) matching + # len(df.columns) is supported as of GH#22800 + df = DataFrame(np.arange(6).reshape((3, 2))) + + expected = DataFrame([[False, False], [True, False], [False, False]]) + + result = df == (2, 2) + tm.assert_frame_equal(result, expected) + + result = df == [2, 2] + tm.assert_frame_equal(result, expected) + + def test_df_float_none_comparison(self): + df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"]) + + result = df.__eq__(None) + assert not result.any().any() + + def test_df_string_comparison(self): + df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + mask_a = df.a > 1 + tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) + tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) + + mask_b = df.b == "foo" + tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) + tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + + +class TestFrameFlexComparisons: + # TODO: test_bool_flex_frame needs a better name + @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_bool_flex_frame(self, op): + data = np.random.randn(5, 3) + other_data = np.random.randn(5, 3) + df = DataFrame(data) + other = DataFrame(other_data) + ndim_5 = np.ones(df.shape + (1, 3)) + + # DataFrame + assert df.eq(df).values.all() + assert not df.ne(df).values.any() + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + tm.assert_frame_equal(f(other), o(df, other)) + # Unaligned + part_o = other.loc[3:, 1:].copy() + rs = f(part_o) + xp = o(df, part_o.reindex(index=df.index, columns=df.columns)) + tm.assert_frame_equal(rs, xp) + # ndarray + tm.assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + tm.assert_frame_equal(f(0), o(df, 0)) + # NAs + msg = "Unable to coerce to Series/DataFrame" + tm.assert_frame_equal(f(np.nan), o(df, np.nan)) + with pytest.raises(ValueError, match=msg): + f(ndim_5) + + @pytest.mark.parametrize("box", [np.array, Series]) + def test_bool_flex_series(self, box): + # Series + # list/tuple + data = np.random.randn(5, 3) + df = DataFrame(data) + idx_ser = box(np.random.randn(5)) + col_ser = box(np.random.randn(3)) + + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + tm.assert_frame_equal(col_eq, df == Series(col_ser)) + tm.assert_frame_equal(col_eq, -col_ne) + tm.assert_frame_equal(idx_eq, -idx_ne) + tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) + tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0)) + tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + tm.assert_frame_equal(col_gt, df > Series(col_ser)) + tm.assert_frame_equal(col_gt, -col_le) + tm.assert_frame_equal(idx_gt, -idx_le) + tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + tm.assert_frame_equal(col_ge, df >= Series(col_ser)) + tm.assert_frame_equal(col_ge, -col_lt) + tm.assert_frame_equal(idx_ge, -idx_lt) + tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + + idx_ser = Series(np.random.randn(5)) + col_ser = Series(np.random.randn(3)) + + def test_bool_flex_frame_na(self): + df = DataFrame(np.random.randn(5, 3)) + # NA + df.loc[0, 0] = np.nan + rs = df.eq(df) + assert not rs.loc[0, 0] + rs = df.ne(df) + assert rs.loc[0, 0] + rs = df.gt(df) + assert not rs.loc[0, 0] + rs = df.lt(df) + assert not rs.loc[0, 0] + rs = df.ge(df) + assert not rs.loc[0, 0] + rs = df.le(df) + assert not rs.loc[0, 0] + + def test_bool_flex_frame_complex_dtype(self): + # complex + arr = np.array([np.nan, 1, 6, np.nan]) + arr2 = np.array([2j, np.nan, 7, None]) + df = DataFrame({"a": arr}) + df2 = DataFrame({"a": arr2}) + + msg = "|".join( + [ + "'>' not supported between instances of '.*' and 'complex'", + r"unorderable types: .*complex\(\)", # PY35 + ] + ) + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df.gt(df2) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df["a"].gt(df2["a"]) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df.values > df2.values + + rs = df.ne(df2) + assert rs.values.all() + + arr3 = np.array([2j, np.nan, None]) + df3 = DataFrame({"a": arr3}) + + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df3.gt(2j) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df3["a"].gt(2j) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df3.values > 2j + + def test_bool_flex_frame_object_dtype(self): + # corner, dtype=object + df1 = DataFrame({"col": ["foo", np.nan, "bar"]}) + df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]}) + result = df1.ne(df2) + exp = DataFrame({"col": [False, True, False]}) + tm.assert_frame_equal(result, exp) + + def test_flex_comparison_nat(self): + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # and _definitely_ not be NaN + df = DataFrame([pd.NaT]) + + result = df == pd.NaT + # result.iloc[0, 0] is a np.bool_ object + assert result.iloc[0, 0].item() is False + + result = df.eq(pd.NaT) + assert result.iloc[0, 0].item() is False + + result = df != pd.NaT + assert result.iloc[0, 0].item() is True + + result = df.ne(pd.NaT) + assert result.iloc[0, 0].item() is True + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types(self, opname): + # GH 15077, non-empty DataFrame + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + result = getattr(df, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types_empty(self, opname): + # GH 15077 empty DataFrame + df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + empty = df.iloc[:0] + result = getattr(empty, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) + + def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): + ii = pd.IntervalIndex.from_breaks([1, 2, 3]) + df = DataFrame({"A": ii, "B": ii}) + + ser = Series([0, 0]) + res = df.eq(ser, axis=0) + + expected = DataFrame({"A": [False, False], "B": [False, False]}) + tm.assert_frame_equal(res, expected) + + ser2 = Series([1, 2], index=["A", "B"]) + res2 = df.eq(ser2, axis=1) + tm.assert_frame_equal(res2, expected) + + +# ------------------------------------------------------------------- +# Arithmetic + + +class TestFrameFlexArithmetic: + def test_floordiv_axis0(self): + # make sure we df.floordiv(ser, axis=0) matches column-wise result + arr = np.arange(3) + ser = Series(arr) + df = DataFrame({"A": ser, "B": ser}) + + result = df.floordiv(ser, axis=0) + + expected = DataFrame({col: df[col] // ser for col in df.columns}) + + tm.assert_frame_equal(result, expected) + + result2 = df.floordiv(ser.values, axis=0) + tm.assert_frame_equal(result2, expected) + + @pytest.mark.skipif(not NUMEXPR_INSTALLED, reason="numexpr not installed") + @pytest.mark.parametrize("opname", ["floordiv", "pow"]) + def test_floordiv_axis0_numexpr_path(self, opname): + # case that goes through numexpr and has to fall back to masked_arith_op + op = getattr(operator, opname) + + arr = np.arange(_MIN_ELEMENTS + 100).reshape(_MIN_ELEMENTS // 100 + 1, -1) * 100 + df = DataFrame(arr) + df["C"] = 1.0 + + ser = df[0] + result = getattr(df, opname)(ser, axis=0) + + expected = DataFrame({col: op(df[col], ser) for col in df.columns}) + tm.assert_frame_equal(result, expected) + + result2 = getattr(df, opname)(ser.values, axis=0) + tm.assert_frame_equal(result2, expected) + + def test_df_add_td64_columnwise(self): + # GH 22534 Check that column-wise addition broadcasts correctly + dti = pd.date_range("2016-01-01", periods=10) + tdi = pd.timedelta_range("1", periods=10) + tser = Series(tdi) + df = DataFrame({0: dti, 1: tdi}) + + result = df.add(tser, axis=0) + expected = DataFrame({0: dti + tdi, 1: tdi + tdi}) + tm.assert_frame_equal(result, expected) + + def test_df_add_flex_filled_mixed_dtypes(self): + # GH 19611 + dti = pd.date_range("2016-01-01", periods=3) + ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]") + df = DataFrame({"A": dti, "B": ser}) + other = DataFrame({"A": ser, "B": ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = DataFrame( + { + "A": Series( + ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]" + ), + "B": ser * 2, + } + ) + tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame( + self, all_arithmetic_operators, float_frame, mixed_float_frame + ): + # one instance of parametrized fixture + op = all_arithmetic_operators + + def f(x, y): + # r-versions not in operator-stdlib; get op without "r" and invert + if op.startswith("__r"): + return getattr(operator, op.replace("__r", "__"))(y, x) + return getattr(operator, op)(x, y) + + result = getattr(float_frame, op)(2 * float_frame) + expected = f(float_frame, 2 * float_frame) + tm.assert_frame_equal(result, expected) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype={"C": None}) + + @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"]) + def test_arith_flex_frame_mixed( + self, + op, + int_frame, + mixed_int_frame, + mixed_float_frame, + switch_numexpr_min_elements, + ): + f = getattr(operator, op) + + # vs mix int + result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) + expected = f(mixed_int_frame, 2 + mixed_int_frame) + + # no overflow in the uint + dtype = None + if op in ["__sub__"]: + dtype = {"B": "uint64", "C": None} + elif op in ["__add__", "__mul__"]: + dtype = {"C": None} + if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0: + # when using numexpr, the casting rules are slightly different: + # in the `2 + mixed_int_frame` operation, int32 column becomes + # and int64 column (not preserving dtype in operation with Python + # scalar), and then the int32/int64 combo results in int64 result + dtype["A"] = (2 + mixed_int_frame)["A"].dtype + tm.assert_frame_equal(result, expected) + _check_mixed_int(result, dtype=dtype) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype={"C": None}) + + # vs plain int + result = getattr(int_frame, op)(2 * int_frame) + expected = f(int_frame, 2 * int_frame) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dim", range(3, 6)) + def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame, dim): + # one instance of parametrized fixture + op = all_arithmetic_operators + + # Check that arrays with dim >= 3 raise + arr = np.ones((1,) * dim) + msg = "Unable to coerce to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + getattr(float_frame, op)(arr) + + def test_arith_flex_frame_corner(self, float_frame): + + const_add = float_frame.add(1) + tm.assert_frame_equal(const_add, float_frame + 1) + + # corner cases + result = float_frame.add(float_frame[:0]) + tm.assert_frame_equal(result, float_frame * np.nan) + + result = float_frame[:0].add(float_frame) + tm.assert_frame_equal(result, float_frame * np.nan) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], fill_value=3) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) + + @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) + def test_arith_flex_series_ops(self, simple_frame, op): + # after arithmetic refactor, add truediv here + df = simple_frame + + row = df.xs("a") + col = df["two"] + f = getattr(df, op) + op = getattr(operator, op) + tm.assert_frame_equal(f(row), op(df, row)) + tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + + def test_arith_flex_series(self, simple_frame): + df = simple_frame + + row = df.xs("a") + col = df["two"] + # special case for some reason + tm.assert_frame_equal(df.add(row, axis=None), df + row) + + # cases which will be refactored after big arithmetic refactor + tm.assert_frame_equal(df.div(row), df / row) + tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) + + @pytest.mark.parametrize("dtype", ["int64", "float64"]) + def test_arith_flex_series_broadcasting(self, dtype): + # broadcasting issue in GH 7325 + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype) + expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis="index") + tm.assert_frame_equal(result, expected) + + def test_arith_flex_zero_len_raises(self): + # GH 19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = Series([], dtype=object) + df_len0 = DataFrame(columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + + with pytest.raises(NotImplementedError, match="fill_value"): + df.add(ser_len0, fill_value="E") + + with pytest.raises(NotImplementedError, match="fill_value"): + df_len0.sub(df["A"], axis=None, fill_value=3) + + def test_flex_add_scalar_fill_value(self): + # GH#12723 + dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") + df = DataFrame({"foo": dat}, index=range(6)) + + exp = df.fillna(0).add(2) + res = df.add(2, fill_value=0) + tm.assert_frame_equal(res, exp) + + def test_sub_alignment_with_duplicate_index(self): + # GH#5185 dup aligning operations should work + df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) + df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) + expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) + result = df1.sub(df2) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) + def test_arithmetic_with_duplicate_columns(self, op): + # operations + df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) + expected = getattr(df, op)(df) + expected.columns = ["A", "A"] + df.columns = ["A", "A"] + result = getattr(df, op)(df) + tm.assert_frame_equal(result, expected) + str(result) + result.dtypes + + @pytest.mark.parametrize("level", [0, None]) + def test_broadcast_multiindex(self, level): + # GH34388 + df1 = DataFrame({"A": [0, 1, 2], "B": [1, 2, 3]}) + df1.columns = df1.columns.set_names("L1") + + df2 = DataFrame({("A", "C"): [0, 0, 0], ("A", "D"): [0, 0, 0]}) + df2.columns = df2.columns.set_names(["L1", "L2"]) + + result = df1.add(df2, level=level) + expected = DataFrame({("A", "C"): [0, 1, 2], ("A", "D"): [0, 1, 2]}) + expected.columns = expected.columns.set_names(["L1", "L2"]) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations(self): + # GH 43321 + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_series_index_to_frame_index(self): + # GH 43321 + df = DataFrame( + {2010: [1], 2020: [3]}, + index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]), + ) + + series = Series( + [10.0, 20.0, 30.0], + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + expected = DataFrame( + {2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_no_align(self): + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: np.nan, 2020: np.nan}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "b", 2), + ("a", "c", np.nan), + ], + names=["scen", "mod", "id"], + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_part_align(self): + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "c", 2), + ], + names=["scen", "mod", "id"], + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "c", 2), + ], + names=["scen", "mod", "id"], + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + +class TestFrameArithmetic: + def test_td64_op_nat_casting(self): + # Make sure we don't accidentally treat timedelta64(NaT) as datetime64 + # when calling dispatch_to_series in DataFrame arithmetic + ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]") + df = DataFrame([[1, 2], [3, 4]]) + + result = df * ser + expected = DataFrame({0: ser, 1: ser}) + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + expected = DataFrame( + [[2, 4], [4, 6], [6, 8]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_collike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + expected = DataFrame( + [[1, 2], [5, 6], [9, 10]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_rowlike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): + # GH#23000 + opname = all_arithmetic_operators + + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + exvals = [ + getattr(df.loc["A"], opname)(rowlike.squeeze()), + getattr(df.loc["B"], opname)(rowlike.squeeze()), + getattr(df.loc["C"], opname)(rowlike.squeeze()), + ] + + expected = DataFrame(exvals, columns=df.columns, index=df.index) + + result = getattr(df, opname)(rowlike) + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_collike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): + # GH#23000 + opname = all_arithmetic_operators + + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + + arr = np.arange(6).reshape(3, 2) + df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + exvals = { + True: getattr(df[True], opname)(collike.squeeze()), + False: getattr(df[False], opname)(collike.squeeze()), + } + + dtype = None + if opname in ["__rmod__", "__rfloordiv__"]: + # Series ops may return mixed int/float dtypes in cases where + # DataFrame op will return all-float. So we upcast `expected` + dtype = np.common_type(*(x.values for x in exvals.values())) + + expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype) + + result = getattr(df, opname)(collike) + tm.assert_frame_equal(result, expected) + + def test_df_bool_mul_int(self): + # GH 22047, GH 22163 multiplication by 1 should result in int dtype, + # not object dtype + df = DataFrame([[False, True], [False, False]]) + result = df * 1 + + # On appveyor this comes back as np.int32 instead of np.int64, + # so we check dtype.kind instead of just dtype + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + result = 1 * df + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + def test_arith_mixed(self): + + left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]}) + + result = left + left + expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("col", ["A", "B"]) + def test_arith_getitem_commute(self, all_arithmetic_functions, col): + df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) + result = all_arithmetic_functions(df, 1)[col] + expected = all_arithmetic_functions(df[col], 1) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])] + ) + def test_arith_alignment_non_pandas_object(self, values): + # GH#17901 + df = DataFrame({"A": [1, 1], "B": [1, 1]}) + expected = DataFrame({"A": [2, 2], "B": [3, 3]}) + result = df + values + tm.assert_frame_equal(result, expected) + + def test_arith_non_pandas_object(self): + df = DataFrame( + np.arange(1, 10, dtype="f8").reshape(3, 3), + columns=["one", "two", "three"], + index=["a", "b", "c"], + ) + + val1 = df.xs("a").values + added = DataFrame(df.values + val1, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val1, added) + + added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val1, axis=0), added) + + val2 = list(df["two"]) + + added = DataFrame(df.values + val2, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val2, added) + + added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val2, axis="index"), added) + + val3 = np.random.rand(*df.shape) + added = DataFrame(df.values + val3, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val3), added) + + def test_operations_with_interval_categories_index(self, all_arithmetic_operators): + # GH#27415 + op = all_arithmetic_operators + ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0)) + data = [1, 2] + df = DataFrame([data], columns=ind) + num = 10 + result = getattr(df, op)(num) + expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) + tm.assert_frame_equal(result, expected) + + def test_frame_with_frame_reindex(self): + # GH#31623 + df = DataFrame( + { + "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], + "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], + }, + columns=["foo", "bar"], + ) + df2 = df[["foo"]] + + result = df - df2 + + expected = DataFrame( + {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, + columns=["bar", "foo"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "value, dtype", + [ + (1, "i8"), + (1.0, "f8"), + (2**63, "f8"), + (1j, "complex128"), + (2**63, "complex128"), + (True, "bool"), + (np.timedelta64(20, "ns"), " b + tm.assert_frame_equal(result, expected) + + result = df.values > b + tm.assert_numpy_array_equal(result, expected.values) + + msg1d = "Unable to coerce to Series, length must be 2: given 3" + msg2d = "Unable to coerce to DataFrame, shape must be" + msg2db = "operands could not be broadcast together with shapes" + with pytest.raises(ValueError, match=msg1d): + # wrong shape + df > lst + + with pytest.raises(ValueError, match=msg1d): + # wrong shape + df > tup + + # broadcasts like ndarray (GH#23000) + result = df > b_r + tm.assert_frame_equal(result, expected) + + result = df.values > b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df > b_c + + with pytest.raises(ValueError, match=msg2db): + df.values > b_c + + # == + expected = DataFrame([[False, False], [True, False], [False, False]]) + result = df == b + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match=msg1d): + df == lst + + with pytest.raises(ValueError, match=msg1d): + df == tup + + # broadcasts like ndarray (GH#23000) + result = df == b_r + tm.assert_frame_equal(result, expected) + + result = df.values == b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df == b_c + + assert df.values.shape != b_c.shape + + # with alignment + df = DataFrame( + np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc") + ) + expected.index = df.index + expected.columns = df.columns + + with pytest.raises(ValueError, match=msg1d): + df == lst + + with pytest.raises(ValueError, match=msg1d): + df == tup + + def test_inplace_ops_alignment(self): + + # inplace ops / ops alignment + # GH 8511 + + columns = list("abcdefg") + X_orig = DataFrame( + np.arange(10 * len(columns)).reshape(-1, len(columns)), + columns=columns, + index=range(10), + ) + Z = 100 * X_orig.iloc[:, 1:-1].copy() + block1 = list("bedcf") + subs = list("bcdef") + + # add + X = X_orig.copy() + result1 = (X[block1] + Z).reindex(columns=subs) + + X[block1] += Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] + Z[block1]).reindex(columns=subs) + + X[block1] += Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + # sub + X = X_orig.copy() + result1 = (X[block1] - Z).reindex(columns=subs) + + X[block1] -= Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] - Z[block1]).reindex(columns=subs) + + X[block1] -= Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + def test_inplace_ops_identity(self): + + # GH 5104 + # make sure that we are actually changing the object + s_orig = Series([1, 2, 3]) + df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5)) + + # no dtype change + s = s_orig.copy() + s2 = s + s += 1 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1, s) + assert s is s2 + assert s._mgr is s2._mgr + + df = df_orig.copy() + df2 = df + df += 1 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1, df) + assert df is df2 + assert df._mgr is df2._mgr + + # dtype change + s = s_orig.copy() + s2 = s + s += 1.5 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1.5, s) + + df = df_orig.copy() + df2 = df + df += 1.5 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1.5, df) + assert df is df2 + assert df._mgr is df2._mgr + + # mixed dtype + arr = np.random.randint(0, 10, size=5) + df_orig = DataFrame({"A": arr.copy(), "B": "foo"}) + df = df_orig.copy() + df2 = df + df["A"] += 1 + expected = DataFrame({"A": arr.copy() + 1, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._mgr is df2._mgr + + df = df_orig.copy() + df2 = df + df["A"] += 1.5 + expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._mgr is df2._mgr + + @pytest.mark.parametrize( + "op", + [ + "add", + "and", + "div", + "floordiv", + "mod", + "mul", + "or", + "pow", + "sub", + "truediv", + "xor", + ], + ) + def test_inplace_ops_identity2(self, op): + + if op == "div": + return + + df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]}) + + operand = 2 + if op in ("and", "or", "xor"): + # cannot use floats for boolean ops + df["a"] = [True, False, True] + + df_copy = df.copy() + iop = f"__i{op}__" + op = f"__{op}__" + + # no id change and value is correct + getattr(df, iop)(operand) + expected = getattr(df_copy, op)(operand) + tm.assert_frame_equal(df, expected) + expected = id(df) + assert id(df) == expected + + @pytest.mark.parametrize( + "val", + [ + [1, 2, 3], + (1, 2, 3), + np.array([1, 2, 3], dtype=np.int64), + range(1, 4), + ], + ) + def test_alignment_non_pandas(self, val): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops.align_method_FRAME + + expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) + tm.assert_frame_equal(align(df, val, "index")[1], expected) + + expected = DataFrame( + {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index + ) + tm.assert_frame_equal(align(df, val, "columns")[1], expected) + + @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]) + def test_alignment_non_pandas_length_mismatch(self, val): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops.align_method_FRAME + # length mismatch + msg = "Unable to coerce to Series, length must be 3: given 2" + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + def test_alignment_non_pandas_index_columns(self): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops.align_method_FRAME + val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal( + align(df, val, "index")[1], + DataFrame(val, index=df.index, columns=df.columns), + ) + tm.assert_frame_equal( + align(df, val, "columns")[1], + DataFrame(val, index=df.index, columns=df.columns), + ) + + # shape mismatch + msg = "Unable to coerce to DataFrame, shape must be" + val = np.array([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + val = np.zeros((3, 3, 3)) + msg = re.escape( + "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)" + ) + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + def test_no_warning(self, all_arithmetic_operators): + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with tm.assert_produces_warning(None): + getattr(df, all_arithmetic_operators)(b) + + def test_dunder_methods_binary(self, all_arithmetic_operators): + # GH#??? frame.__foo__ should only accept one argument + df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with pytest.raises(TypeError, match="takes 2 positional arguments"): + getattr(df, all_arithmetic_operators)(b, 0) + + def test_align_int_fill_bug(self): + # GH#910 + X = np.arange(10 * 10, dtype="float64").reshape(10, 10) + Y = np.ones((10, 1), dtype=int) + + df1 = DataFrame(X) + df1["0.X"] = Y.squeeze() + + df2 = df1.astype(float) + + result = df1 - df1.mean() + expected = df2 - df2.mean() + tm.assert_frame_equal(result, expected) + + +def test_pow_with_realignment(): + # GH#32685 pow has special semantics for operating with null values + left = DataFrame({"A": [0, 1, 2]}) + right = DataFrame(index=[0, 1, 2]) + + result = left**right + expected = DataFrame({"A": [np.nan, 1.0, np.nan]}) + tm.assert_frame_equal(result, expected) + + +# TODO: move to tests.arithmetic and parametrize +def test_pow_nan_with_zero(): + left = DataFrame({"A": [np.nan, np.nan, np.nan]}) + right = DataFrame({"A": [0, 0, 0]}) + + expected = DataFrame({"A": [1.0, 1.0, 1.0]}) + + result = left**right + tm.assert_frame_equal(result, expected) + + result = left["A"] ** right["A"] + tm.assert_series_equal(result, expected["A"]) + + +def test_dataframe_series_extension_dtypes(): + # https://github.com/pandas-dev/pandas/issues/34311 + df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + ser = Series([1, 2, 3], index=["a", "b", "c"]) + + expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) + expected = DataFrame(expected, columns=df.columns, dtype="Int64") + + df_ea = df.astype("Int64") + result = df_ea + ser + tm.assert_frame_equal(result, expected) + result = df_ea + ser.astype("Int64") + tm.assert_frame_equal(result, expected) + + +def test_dataframe_blockwise_slicelike(): + # GH#34367 + arr = np.random.randint(0, 1000, (100, 10)) + df1 = DataFrame(arr) + df2 = df1.copy() + df2.iloc[0, [1, 3, 7]] = np.nan + + df3 = df1.copy() + df3.iloc[0, [5]] = np.nan + + df4 = df1.copy() + df4.iloc[0, np.arange(2, 5)] = np.nan + df5 = df1.copy() + df5.iloc[0, np.arange(4, 7)] = np.nan + + for left, right in [(df1, df2), (df2, df3), (df4, df5)]: + res = left + right + + expected = DataFrame({i: left[i] + right[i] for i in left.columns}) + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "df, col_dtype", + [ + (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"), + (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"), + ], +) +def test_dataframe_operation_with_non_numeric_types(df, col_dtype): + # GH #22663 + expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab")) + expected = expected.astype({"b": col_dtype}) + result = df + Series([-1.0], index=list("a")) + tm.assert_frame_equal(result, expected) + + +def test_arith_reindex_with_duplicates(): + # https://github.com/pandas-dev/pandas/issues/35194 + df1 = DataFrame(data=[[0]], columns=["second"]) + df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"]) + result = df1 + df2 + expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]]) +def test_arith_list_of_arraylike_raise(to_add): + # GH 36702. Raise when trying to add list of array-like to DataFrame + df = DataFrame({"x": [1, 2], "y": [1, 2]}) + + msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + df + to_add + with pytest.raises(ValueError, match=msg): + to_add + df + + +def test_inplace_arithmetic_series_update(): + # https://github.com/pandas-dev/pandas/issues/36373 + df = DataFrame({"A": [1, 2, 3]}) + series = df["A"] + vals = series._values + + series += 1 + assert series._values is vals + + expected = DataFrame({"A": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + +def test_arithemetic_multiindex_align(): + """ + Regression test for: https://github.com/pandas-dev/pandas/issues/33765 + """ + df1 = DataFrame( + [[1]], + index=["a"], + columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]), + ) + df2 = DataFrame([[1]], index=["a"], columns=Index([0], name="a")) + expected = DataFrame( + [[0]], + index=["a"], + columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]), + ) + result = df1 - df2 + tm.assert_frame_equal(result, expected) + + +def test_bool_frame_mult_float(): + # GH 18549 + df = DataFrame(True, list("ab"), list("cd")) + result = df * 1.0 + expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd")) + tm.assert_frame_equal(result, expected) + + +def test_frame_sub_nullable_int(any_int_ea_dtype): + # GH 32822 + series1 = Series([1, 2, None], dtype=any_int_ea_dtype) + series2 = Series([1, 2, 3], dtype=any_int_ea_dtype) + expected = DataFrame([0, 0, None], dtype=any_int_ea_dtype) + result = series1.to_frame() - series2.to_frame() + tm.assert_frame_equal(result, expected) + + +def test_frame_op_subclass_nonclass_constructor(): + # GH#43201 subclass._constructor is a function, not the subclass itself + + class SubclassedSeries(Series): + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + class SubclassedDataFrame(DataFrame): + _metadata = ["my_extra_data"] + + def __init__(self, my_extra_data, *args, **kwargs): + self.my_extra_data = my_extra_data + super().__init__(*args, **kwargs) + + @property + def _constructor(self): + return functools.partial(type(self), self.my_extra_data) + + @property + def _constructor_sliced(self): + return SubclassedSeries + + sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]}) + result = sdf * 2 + expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + result = sdf + sdf + tm.assert_frame_equal(result, expected) + + +def test_enum_column_equality(): + Cols = Enum("Cols", "col1 col2") + + q1 = DataFrame({Cols.col1: [1, 2, 3]}) + q2 = DataFrame({Cols.col1: [1, 2, 3]}) + + result = q1[Cols.col1] == q2[Cols.col1] + expected = Series([True, True, True], name=Cols.col1) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py new file mode 100644 index 00000000..46c712cf --- /dev/null +++ b/pandas/tests/frame/test_block_internals.py @@ -0,0 +1,440 @@ +from datetime import ( + datetime, + timedelta, +) +import itertools + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + compat, + date_range, + option_context, +) +import pandas._testing as tm +from pandas.core.internals import ( + NumericBlock, + ObjectBlock, +) + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +# TODO(ArrayManager) check which of those tests need to be rewritten to test the +# equivalent for ArrayManager +pytestmark = td.skip_array_manager_invalid_test + + +class TestDataFrameBlockInternals: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz column inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + + df = DataFrame({"B": dti}) + assert df["B"]._values.freq is None + + df.iloc[1, 0] = pd.NaT + assert df["B"]._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert dti.freq == "D" + assert dti[1] == ts + + def test_cast_internals(self, float_frame): + casted = DataFrame(float_frame._mgr, dtype=int) + expected = DataFrame(float_frame._series, dtype=int) + tm.assert_frame_equal(casted, expected) + + casted = DataFrame(float_frame._mgr, dtype=np.int32) + expected = DataFrame(float_frame._series, dtype=np.int32) + tm.assert_frame_equal(casted, expected) + + def test_consolidate(self, float_frame): + float_frame["E"] = 7.0 + consolidated = float_frame._consolidate() + assert len(consolidated._mgr.blocks) == 1 + + # Ensure copy, do I want this? + recons = consolidated._consolidate() + assert recons is not consolidated + tm.assert_frame_equal(recons, consolidated) + + float_frame["F"] = 8.0 + assert len(float_frame._mgr.blocks) == 3 + + return_value = float_frame._consolidate_inplace() + assert return_value is None + assert len(float_frame._mgr.blocks) == 1 + + def test_consolidate_inplace(self, float_frame): + frame = float_frame.copy() # noqa + + # triggers in-place consolidation + for letter in range(ord("A"), ord("Z")): + float_frame[chr(letter)] = chr(letter) + + def test_values_consolidate(self, float_frame): + float_frame["E"] = 7.0 + assert not float_frame._mgr.is_consolidated() + _ = float_frame.values + assert float_frame._mgr.is_consolidated() + + def test_modify_values(self, float_frame): + float_frame.values[5] = 5 + assert (float_frame.values[5] == 5).all() + + # unconsolidated + float_frame["E"] = 7.0 + col = float_frame["E"] + float_frame.values[6] = 6 + assert (float_frame.values[6] == 6).all() + + # check that item_cache was cleared + assert float_frame["E"] is not col + assert (col == 7).all() + + def test_boolean_set_uncons(self, float_frame): + float_frame["E"] = 7.0 + + expected = float_frame.values.copy() + expected[expected > 1] = 2 + + float_frame[float_frame > 1] = 2 + tm.assert_almost_equal(expected, float_frame.values) + + def test_constructor_with_convert(self): + # this is actually mostly a test of lib.maybe_convert_objects + # #2845 + df = DataFrame({"A": [2**63 - 1]}) + result = df["A"] + expected = Series(np.asarray([2**63 - 1], np.int64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2**63]}) + result = df["A"] + expected = Series(np.asarray([2**63], np.uint64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [datetime(2005, 1, 1), True]}) + result = df["A"] + expected = Series( + np.asarray([datetime(2005, 1, 1), True], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [None, 1]}) + result = df["A"] + expected = Series(np.asarray([np.nan, 1], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, 2]}) + result = df["A"] + expected = Series(np.asarray([1.0, 2], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3.0]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3.0], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, True]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, True], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, None]}) + result = df["A"] + expected = Series(np.asarray([1.0, np.nan], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, None]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, np.nan], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, True, None]}) + result = df["A"] + expected = Series(np.asarray([2.0, 1, True, None], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, datetime(2006, 1, 1), None]}) + result = df["A"] + expected = Series( + np.asarray([2.0, 1, datetime(2006, 1, 1), None], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_mixed(self, float_string_frame): + # test construction edge cases with mixed types + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check dtypes + result = df.dtypes + expected = Series({"datetime64[ns]": 3}) + + # mixed-type frames + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + assert float_string_frame["datetime"].dtype == "M8[ns]" + assert float_string_frame["timedelta"].dtype == "m8[ns]" + result = float_string_frame.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [ + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + ], + index=list("ABCD") + ["foo", "datetime", "timedelta"], + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_conversions(self): + + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype="timedelta64[s]") + df = DataFrame(index=range(3)) + df["A"] = arr + expected = DataFrame( + {"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3) + ) + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "dt1": Timestamp("20130101"), + "dt2": date_range("20130101", periods=3), + # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), + }, + index=range(3), + ) + + df = DataFrame(index=range(3)) + df["dt1"] = np.datetime64("2013-01-01") + df["dt2"] = np.array( + ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" + ) + + # df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01 + # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]') + + tm.assert_frame_equal(df, expected) + + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9)) + return DataFrame(data=data, columns=["A", "B", "C"], dtype=dtype) + + msg = "compound dtypes are not implemented in the DataFrame constructor" + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + depr_msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("int64") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("float64") + + # 10822 + # invalid error message on dt inference + if not compat.is_platform_windows(): + f("M8[ns]") + + def test_pickle(self, float_string_frame, timezone_frame): + empty_frame = DataFrame() + + unpickled = tm.round_trip_pickle(float_string_frame) + tm.assert_frame_equal(float_string_frame, unpickled) + + # buglet + float_string_frame._mgr.ndim + + # empty + unpickled = tm.round_trip_pickle(empty_frame) + repr(unpickled) + + # tz frame + unpickled = tm.round_trip_pickle(timezone_frame) + tm.assert_frame_equal(timezone_frame, unpickled) + + def test_consolidate_datetime64(self): + # numpy vstack bug + + df = DataFrame( + { + "starting": pd.to_datetime( + [ + "2012-06-21 00:00", + "2012-06-23 07:00", + "2012-06-23 16:30", + "2012-06-25 08:00", + "2012-06-26 12:00", + ] + ), + "ending": pd.to_datetime( + [ + "2012-06-23 07:00", + "2012-06-23 16:30", + "2012-06-25 08:00", + "2012-06-26 12:00", + "2012-06-27 08:00", + ] + ), + "measure": [77, 65, 77, 0, 77], + } + ) + + ser_starting = df.starting + ser_starting.index = ser_starting.values + ser_starting = ser_starting.tz_localize("US/Eastern") + ser_starting = ser_starting.tz_convert("UTC") + ser_starting.index.name = "starting" + + ser_ending = df.ending + ser_ending.index = ser_ending.values + ser_ending = ser_ending.tz_localize("US/Eastern") + ser_ending = ser_ending.tz_convert("UTC") + ser_ending.index.name = "ending" + + df.starting = ser_starting.index + df.ending = ser_ending.index + + tm.assert_index_equal(pd.DatetimeIndex(df.starting), ser_starting.index) + tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) + + def test_is_mixed_type(self, float_frame, float_string_frame): + assert not float_frame._is_mixed_type + assert float_string_frame._is_mixed_type + + def test_stale_cached_series_bug_473(self, using_copy_on_write): + + # this is chained, but ok + with option_context("chained_assignment", None): + Y = DataFrame( + np.random.random((4, 4)), + index=("a", "b", "c", "d"), + columns=("e", "f", "g", "h"), + ) + repr(Y) + Y["e"] = Y["e"].astype("object") + Y["g"]["c"] = np.NaN + repr(Y) + result = Y.sum() # noqa + exp = Y["g"].sum() # noqa + if using_copy_on_write: + assert not pd.isna(Y["g"]["c"]) + else: + assert pd.isna(Y["g"]["c"]) + + def test_strange_column_corruption_issue(self, using_copy_on_write): + # TODO(wesm): Unclear how exactly this is related to internal matters + df = DataFrame(index=[0, 1]) + df[0] = np.nan + wasCol = {} + + with tm.assert_produces_warning(PerformanceWarning): + for i, dt in enumerate(df.index): + for col in range(100, 200): + if col not in wasCol: + wasCol[col] = 1 + df[col] = np.nan + if using_copy_on_write: + df.loc[dt, col] = i + else: + df[col][dt] = i + + myid = 100 + + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) + assert first == second == 0 + + def test_constructor_no_pandas_array(self): + # Ensure that PandasArray isn't allowed inside Series + # See https://github.com/pandas-dev/pandas/issues/23995 for more. + arr = Series([1, 2, 3]).array + result = DataFrame({"A": arr}) + expected = DataFrame({"A": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + assert isinstance(result._mgr.blocks[0], NumericBlock) + + def test_add_column_with_pandas_array(self): + # GH 26390 + df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) + df2 = DataFrame( + { + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"], + "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), + } + ) + assert type(df["c"]._mgr.blocks[0]) == ObjectBlock + assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock + tm.assert_frame_equal(df, df2) + + +def test_update_inplace_sets_valid_block_values(using_copy_on_write): + # https://github.com/pandas-dev/pandas/issues/33457 + df = DataFrame({"a": Series([1, 2, None], dtype="category")}) + + # inplace update of a single column + df["a"].fillna(1, inplace=True) + + # check we haven't put a Series into any block.values + assert isinstance(df._mgr.blocks[0].values, Categorical) + + if not using_copy_on_write: + # smoketest for OP bug from GH#35731 + assert df.isnull().sum().sum() == 0 + + +def test_nonconsolidated_item_cache_take(): + # https://github.com/pandas-dev/pandas/issues/35521 + + # create non-consolidated dataframe with object dtype columns + df = DataFrame() + df["col1"] = Series(["a"], dtype=object) + df["col2"] = Series([0], dtype=object) + + # access column (item cache) + df["col1"] == "A" + # take operation + # (regression was that this consolidated but didn't reset item cache, + # resulting in an invalid cache and the .at operation not working properly) + df[df["col2"] == 0] + + # now setting value should update actual dataframe + df.at[0, "col1"] = "A" + + expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object) + tm.assert_frame_equal(df, expected) + assert df.at[0, "col1"] == "A" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py new file mode 100644 index 00000000..16021fac --- /dev/null +++ b/pandas/tests/frame/test_constructors.py @@ -0,0 +1,3181 @@ +from collections import ( + OrderedDict, + abc, +) +from datetime import ( + date, + datetime, + timedelta, +) +import functools +import itertools +import re +import warnings + +import numpy as np +import numpy.ma as ma +import numpy.ma.mrecords as mrecords +import pytest +import pytz + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer_dtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + Interval, + MultiIndex, + Period, + RangeIndex, + Series, + Timedelta, + Timestamp, + cut, + date_range, + isna, +) +import pandas._testing as tm +from pandas.arrays import ( + DatetimeArray, + IntervalArray, + PeriodArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.api import Int64Index + +MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] +MIXED_INT_DTYPES = [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", +] + + +class TestDataFrameConstructors: + def test_constructor_from_ndarray_with_str_dtype(self): + # If we don't ravel/reshape around ensure_str_array, we end up + # with an array of strings each of which is e.g. "[0 1 2]" + arr = np.arange(12).reshape(4, 3) + df = DataFrame(arr, dtype=str) + expected = DataFrame(arr.astype(str)) + tm.assert_frame_equal(df, expected) + + def test_constructor_from_2d_datetimearray(self, using_array_manager): + dti = date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + df = DataFrame(dta) + expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) + tm.assert_frame_equal(df, expected) + if not using_array_manager: + # GH#44724 big performance hit if we de-consolidate + assert len(df._mgr.blocks) == 1 + + def test_constructor_dict_with_tzaware_scalar(self): + # GH#42505 + dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles") + + df = DataFrame({"dt": dt}, index=[0]) + expected = DataFrame({"dt": [dt]}) + tm.assert_frame_equal(df, expected) + + # Non-homogeneous + df = DataFrame({"dt": dt, "value": [1]}) + expected = DataFrame({"dt": [dt], "value": [1]}) + tm.assert_frame_equal(df, expected) + + def test_construct_ndarray_with_nas_and_int_dtype(self): + # GH#26919 match Series by not casting np.nan to meaningless int + arr = np.array([[1, np.nan], [2, 3]]) + with tm.assert_produces_warning(FutureWarning): + df = DataFrame(arr, dtype="i8") + assert df.values.dtype == arr.dtype + assert isna(df.iloc[0, 1]) + + # check this matches Series behavior + with tm.assert_produces_warning(FutureWarning): + ser = Series(arr[0], dtype="i8", name=0) + expected = df.iloc[0] + tm.assert_series_equal(ser, expected) + + def test_construct_from_list_of_datetimes(self): + df = DataFrame([datetime.now(), datetime.now()]) + assert df[0].dtype == np.dtype("M8[ns]") + + def test_constructor_from_tzaware_datetimeindex(self): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH#6032 + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + + def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): + # GH#39462 + nat = np.datetime64("NaT", "ns") + arr = np.array([nat], dtype=object) + if frame_or_series is DataFrame: + arr = arr.reshape(1, 1) + + msg = "|".join( + [ + "Could not convert object to NumPy timedelta", + "Invalid type for timedelta scalar: ", + ] + ) + with pytest.raises(ValueError, match=msg): + frame_or_series(arr, dtype="m8[ns]") + + @pytest.mark.parametrize("kind", ["m", "M"]) + def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series): + # with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes + if kind == "M": + dtype = "M8[ns]" + scalar_type = Timestamp + else: + dtype = "m8[ns]" + scalar_type = Timedelta + + arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2) + if frame_or_series is Series: + arr = arr[:, 0] + + obj = frame_or_series(arr, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + # go through a different path in internals.construction + obj = frame_or_series(frame_or_series(arr), dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object)) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + if frame_or_series is DataFrame: + # other paths through internals.construction + sers = [Series(x) for x in arr] + obj = frame_or_series(sers, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + def test_series_with_name_not_matching_column(self): + # GH#9232 + x = Series(range(5), name=1) + y = Series(range(5), name=0) + + result = DataFrame(x, columns=[0]) + expected = DataFrame([], columns=[0]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(y, columns=[1]) + expected = DataFrame([], columns=[1]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame(), + lambda: DataFrame(None), + lambda: DataFrame({}), + lambda: DataFrame(()), + lambda: DataFrame([]), + lambda: DataFrame(_ for _ in []), + lambda: DataFrame(range(0)), + lambda: DataFrame(data=None), + lambda: DataFrame(data={}), + lambda: DataFrame(data=()), + lambda: DataFrame(data=[]), + lambda: DataFrame(data=(_ for _ in [])), + lambda: DataFrame(data=range(0)), + ], + ) + def test_empty_constructor(self, constructor): + expected = DataFrame() + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "emptylike,expected_index,expected_columns", + [ + ([[]], RangeIndex(1), RangeIndex(0)), + ([[], []], RangeIndex(2), RangeIndex(0)), + ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)), + ], + ) + def test_emptylike_constructor(self, emptylike, expected_index, expected_columns): + expected = DataFrame(index=expected_index, columns=expected_columns) + result = DataFrame(emptylike) + tm.assert_frame_equal(result, expected) + + def test_constructor_mixed(self, float_string_frame): + index, data = tm.getMixedTypeDict() + + # TODO(wesm), incomplete test? + indexed_frame = DataFrame(data, index=index) # noqa + unindexed_frame = DataFrame(data) # noqa + + assert float_string_frame["foo"].dtype == np.object_ + + def test_constructor_cast_failure(self): + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) + assert foo["a"].dtype == object + + # GH 3010, constructing with odd arrays + df = DataFrame(np.ones((4, 2))) + + # this is ok + df["foo"] = np.ones((4, 2)).tolist() + + # this is not ok + msg = "Expected a 1D array, got an array with shape \\(4, 2\\)" + with pytest.raises(ValueError, match=msg): + df["test"] = np.ones((4, 2)) + + # this is ok + df["foo2"] = np.ones((4, 2)).tolist() + + def test_constructor_dtype_copy(self): + orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]}) + + new_df = DataFrame(orig_df, dtype=float, copy=True) + + new_df["col1"] = 200.0 + assert orig_df["col1"][0] == 1.0 + + def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): + df = DataFrame([[1, 2]]) + should_be_view = DataFrame(df, dtype=df[0].dtype) + if using_copy_on_write: + # INFO(CoW) doesn't mutate original + should_be_view.iloc[0, 0] = 99 + assert df.values[0, 0] == 1 + else: + should_be_view[0][0] = 99 + assert df.values[0, 0] == 99 + + def test_constructor_dtype_nocast_view_2d_array( + self, using_array_manager, using_copy_on_write + ): + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + if not using_array_manager and not using_copy_on_write: + should_be_view = DataFrame(df.values, dtype=df[0].dtype) + should_be_view[0][0] = 97 + assert df.values[0, 0] == 97 + else: + # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve + # a view on the array to ensure contiguous 1D arrays + df2 = DataFrame(df.values, dtype=df[0].dtype) + assert df2._mgr.arrays[0].flags.c_contiguous + + @td.skip_array_manager_invalid_test + def test_1d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array(["a", "b"], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + + @td.skip_array_manager_invalid_test + def test_2d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array([["a", "b"], ["c", "d"]], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + + def test_constructor_dtype_list_data(self): + df = DataFrame([[1, "2"], [None, "a"]], dtype=object) + assert df.loc[1, 0] is None + assert df.loc[0, 1] == "2" + + def test_constructor_list_of_2d_raises(self): + # https://github.com/pandas-dev/pandas/issues/32289 + a = DataFrame() + b = np.empty((0, 0)) + with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): + DataFrame([a]) + + with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): + DataFrame([b]) + + a = DataFrame({"A": [1, 2]}) + with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"): + DataFrame([a, a]) + + @pytest.mark.parametrize( + "typ, ad", + [ + # mixed floating and integer coexist in the same frame + ["float", {}], + # add lots of types + ["float", {"A": 1, "B": "foo", "C": "bar"}], + # GH 622 + ["int", {}], + ], + ) + def test_constructor_mixed_dtypes(self, typ, ad): + if typ == "int": + dtypes = MIXED_INT_DTYPES + arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] + elif typ == "float": + dtypes = MIXED_FLOAT_DTYPES + arrays = [np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes] + + for d, a in zip(dtypes, arrays): + assert a.dtype == d + ad.update({d: a for d, a in zip(dtypes, arrays)}) + df = DataFrame(ad) + + dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES + for d in dtypes: + if d in df: + assert df.dtypes[d] == d + + def test_constructor_complex_dtypes(self): + # GH10952 + a = np.random.rand(10).astype(np.complex64) + b = np.random.rand(10).astype(np.complex128) + + df = DataFrame({"a": a, "b": b}) + assert a.dtype == df.a.dtype + assert b.dtype == df.b.dtype + + def test_constructor_dtype_str_na_values(self, string_dtype): + # https://github.com/pandas-dev/pandas/issues/21083 + df = DataFrame({"A": ["x", None]}, dtype=string_dtype) + result = df.isna() + expected = DataFrame({"A": [False, True]}) + tm.assert_frame_equal(result, expected) + assert df.iloc[1, 0] is None + + df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype) + assert np.isnan(df.iloc[1, 0]) + + def test_constructor_rec(self, float_frame): + rec = float_frame.to_records(index=False) + rec.dtype.names = list(rec.dtype.names)[::-1] + + index = float_frame.index + + df = DataFrame(rec) + tm.assert_index_equal(df.columns, Index(rec.dtype.names)) + + df2 = DataFrame(rec, index=index) + tm.assert_index_equal(df2.columns, Index(rec.dtype.names)) + tm.assert_index_equal(df2.index, index) + + # case with columns != the ones we would infer from the data + rng = np.arange(len(rec))[::-1] + df3 = DataFrame(rec, index=rng, columns=["C", "B"]) + expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"]) + tm.assert_frame_equal(df3, expected) + + def test_constructor_bool(self): + df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) + assert df.values.dtype == np.bool_ + + def test_constructor_overflow_int64(self): + # see gh-14881 + values = np.array([2**64 - i for i in range(1, 10)], dtype=np.uint64) + + result = DataFrame({"a": values}) + assert result["a"].dtype == np.uint64 + + # see gh-2355 + data_scores = [ + (6311132704823138710, 273), + (2685045978526272070, 23), + (8921811264899370420, 45), + (17019687244989530680, 270), + (9930107427299601010, 273), + ] + dtype = [("uid", "u8"), ("score", "u8")] + data = np.zeros((len(data_scores),), dtype=dtype) + data[:] = data_scores + df_crawls = DataFrame(data) + assert df_crawls["uid"].dtype == np.uint64 + + @pytest.mark.parametrize( + "values", + [ + np.array([2**64], dtype=object), + np.array([2**65]), + [2**64 + 1], + np.array([-(2**63) - 4], dtype=object), + np.array([-(2**64) - 1]), + [-(2**65) - 2], + ], + ) + def test_constructor_int_overflow(self, values): + # see gh-18584 + value = values[0] + result = DataFrame(values) + + assert result[0].dtype == object + assert result[0][0] == value + + @pytest.mark.parametrize( + "values", + [ + np.array([1], dtype=np.uint16), + np.array([1], dtype=np.uint32), + np.array([1], dtype=np.uint64), + [np.uint16(1)], + [np.uint32(1)], + [np.uint64(1)], + ], + ) + def test_constructor_numpy_uints(self, values): + # GH#47294 + value = values[0] + result = DataFrame(values) + + assert result[0].dtype == value.dtype + assert result[0][0] == value + + def test_constructor_ordereddict(self): + import random + + nitems = 100 + nums = list(range(nitems)) + random.shuffle(nums) + expected = [f"A{i:d}" for i in nums] + df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) + assert expected == list(df.columns) + + def test_constructor_dict(self): + datetime_series = tm.makeTimeSeries(nper=30) + # test expects index shifted by 5 + datetime_series_short = tm.makeTimeSeries(nper=30)[5:] + + frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short}) + + # col2 is padded with NaN + assert len(datetime_series) == 30 + assert len(datetime_series_short) == 25 + + tm.assert_series_equal(frame["col1"], datetime_series.rename("col1")) + + exp = Series( + np.concatenate([[np.nan] * 5, datetime_series_short.values]), + index=datetime_series.index, + name="col2", + ) + tm.assert_series_equal(exp, frame["col2"]) + + frame = DataFrame( + {"col1": datetime_series, "col2": datetime_series_short}, + columns=["col2", "col3", "col4"], + ) + + assert len(frame) == len(datetime_series_short) + assert "col1" not in frame + assert isna(frame["col3"]).all() + + # Corner cases + assert len(DataFrame()) == 0 + + # mix dict and array, wrong size - no spec for which error should raise + # first + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + with pytest.raises(ValueError, match=msg): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + def test_constructor_dict_length1(self): + # Length-one dict micro-optimization + frame = DataFrame({"A": {"1": 1, "2": 2}}) + tm.assert_index_equal(frame.index, Index(["1", "2"])) + + def test_constructor_dict_with_index(self): + # empty dict plus index + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx) + assert frame.index is idx + + def test_constructor_dict_with_index_and_columns(self): + # empty dict with index and columns + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx, columns=idx) + assert frame.index is idx + assert frame.columns is idx + assert len(frame._series) == 3 + + def test_constructor_dict_of_empty_lists(self): + # with dict of empty list and Series + frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) + tm.assert_index_equal(frame.index, RangeIndex(0), exact=True) + + def test_constructor_dict_with_none(self): + # GH 14381 + # Dict with None value + frame_none = DataFrame({"a": None}, index=[0]) + frame_none_list = DataFrame({"a": [None]}, index=[0]) + assert frame_none._get_value(0, "a") is None + assert frame_none_list._get_value(0, "a") is None + tm.assert_frame_equal(frame_none, frame_none_list) + + def test_constructor_dict_errors(self): + # GH10856 + # dict with scalar values should raise error, even if columns passed + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}) + + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}, columns=["a"]) + + @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"]) + def test_constructor_invalid_items_unused(self, scalar): + # No error if invalid (scalar) value is in fact not used: + result = DataFrame({"a": scalar}, columns=["b"]) + expected = DataFrame(columns=["b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + def test_constructor_dict_nan_key(self, value): + # GH 18455 + cols = [1, value, 3] + idx = ["a", value] + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values(1).sort_values("a", axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values("a", axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [np.nan, None, float("nan")]) + def test_constructor_dict_nan_tuple_key(self, value): + # GH 18455 + cols = Index([(11, 21), (value, 22), (13, value)]) + idx = Index([("a", value), (value, 2)]) + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values(("a", value), axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_order_insertion(self): + datetime_series = tm.makeTimeSeries(nper=30) + datetime_series_short = tm.makeTimeSeries(nper=25) + + # GH19018 + # initialization ordering: by insertion order if python>= 3.6 + d = {"b": datetime_series_short, "a": datetime_series} + frame = DataFrame(data=d) + expected = DataFrame(data=d, columns=list("ba")) + tm.assert_frame_equal(frame, expected) + + def test_constructor_dict_nan_key_and_columns(self): + # GH 16894 + result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) + expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) + tm.assert_frame_equal(result, expected) + + def test_constructor_multi_index(self): + # GH 4078 + # construction error with mi and all-nan frame + tuples = [(2, 3), (3, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert isna(df).values.ravel().all() + + tuples = [(3, 3), (2, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert isna(df).values.ravel().all() + + def test_constructor_2d_index(self): + # GH 25416 + # handling of 2d index in construction + df = DataFrame([[1]], columns=[[1]], index=[1, 2]) + expected = DataFrame( + [1, 1], + index=Int64Index([1, 2], dtype="int64"), + columns=MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1]], columns=[[1]], index=[[1, 2]]) + expected = DataFrame( + [1, 1], + index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), + columns=MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + def test_constructor_error_msgs(self): + msg = "Empty data passed with indices specified." + # passing an empty array with columns specified. + with pytest.raises(ValueError, match=msg): + DataFrame(np.empty(0), columns=list("abc")) + + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + # mix dict and array, wrong size + with pytest.raises(ValueError, match=msg): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + # wrong size ndarray, GH 3105 + msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame( + np.arange(12).reshape((4, 3)), + columns=["foo", "bar", "baz"], + index=date_range("2000-01-01", periods=3), + ) + + arr = np.array([[4, 5, 6]]) + msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + arr = np.array([4, 5, 6]) + msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1]) + + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2]) + + # gh-26429 + msg = "2 columns passed, passed data had 10 columns" + with pytest.raises(ValueError, match=msg): + DataFrame((range(10), range(10, 20)), columns=("ones", "twos")) + + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": False, "b": True}) + + def test_constructor_subclass_dict(self, dict_subclass): + # Test for passing dict subclass to constructor + data = { + "col1": dict_subclass((x, 10.0 * x) for x in range(10)), + "col2": dict_subclass((x, 20.0 * x) for x in range(10)), + } + df = DataFrame(data) + refdf = DataFrame({col: dict(val.items()) for col, val in data.items()}) + tm.assert_frame_equal(refdf, df) + + data = dict_subclass(data.items()) + df = DataFrame(data) + tm.assert_frame_equal(refdf, df) + + def test_constructor_defaultdict(self, float_frame): + # try with defaultdict + from collections import defaultdict + + data = {} + float_frame["B"][:10] = np.nan + for k, v in float_frame.items(): + dct = defaultdict(dict) + dct.update(v.to_dict()) + data[k] = dct + frame = DataFrame(data) + expected = frame.reindex(index=float_frame.index) + tm.assert_frame_equal(float_frame, expected) + + def test_constructor_dict_block(self): + expected = np.array([[4.0, 3.0, 2.0, 1.0]]) + df = DataFrame( + {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]}, + columns=["d", "c", "b", "a"], + ) + tm.assert_numpy_array_equal(df.values, expected) + + def test_constructor_dict_cast(self): + # cast float tests + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data, dtype=float) + assert len(frame) == 3 + assert frame["B"].dtype == np.float64 + assert frame["A"].dtype == np.float64 + + frame = DataFrame(test_data) + assert len(frame) == 3 + assert frame["B"].dtype == np.object_ + assert frame["A"].dtype == np.float64 + + def test_constructor_dict_cast2(self): + # can't cast to float + test_data = { + "A": dict(zip(range(20), tm.makeStringIndex(20))), + "B": dict(zip(range(15), np.random.randn(15))), + } + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + frame = DataFrame(test_data, dtype=float) + + assert len(frame) == 20 + assert frame["A"].dtype == np.object_ + assert frame["B"].dtype == np.float64 + + def test_constructor_dict_dont_upcast(self): + d = {"Col1": {"Row1": "A String", "Row2": np.nan}} + df = DataFrame(d) + assert isinstance(df["Col1"]["Row2"], float) + + def test_constructor_dict_dont_upcast2(self): + dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) + assert isinstance(dm[1][1], int) + + def test_constructor_dict_of_tuples(self): + # GH #1491 + data = {"a": (1, 2, 3), "b": (4, 5, 6)} + + result = DataFrame(data) + expected = DataFrame({k: list(v) for k, v in data.items()}) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_dict_of_ranges(self): + # GH 26356 + data = {"a": range(3), "b": range(3, 6)} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_iterators(self): + # GH 26349 + data = {"a": iter(range(3)), "b": reversed(range(3))} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_generators(self): + # GH 26349 + data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))} + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_multiindex(self): + d = { + ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2}, + ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4}, + ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9}, + } + _d = sorted(d.items()) + df = DataFrame(d) + expected = DataFrame( + [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) + ).T + expected.index = MultiIndex.from_tuples(expected.index) + tm.assert_frame_equal( + df, + expected, + ) + + d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111} + _d.insert(0, ("z", d["z"])) + expected = DataFrame( + [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) + ).T + expected.index = Index(expected.index, tupleize_cols=False) + df = DataFrame(d) + df = df.reindex(columns=expected.columns, index=expected.index) + tm.assert_frame_equal(df, expected) + + def test_constructor_dict_datetime64_index(self): + # GH 10160 + dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)} + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) + data_Timestamp = create_data(Timestamp) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timestamp(dt) for dt in dates_as_str], + ) + + result_datetime64 = DataFrame(data_datetime64) + result_datetime = DataFrame(data_datetime) + result_Timestamp = DataFrame(data_Timestamp) + tm.assert_frame_equal(result_datetime64, expected) + tm.assert_frame_equal(result_datetime, expected) + tm.assert_frame_equal(result_Timestamp, expected) + + def test_constructor_dict_timedelta64_index(self): + # GH 10160 + td_as_int = [1, 2, 3, 4] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} + + data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D")) + data_timedelta = create_data(lambda x: timedelta(days=x)) + data_Timedelta = create_data(lambda x: Timedelta(x, "D")) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timedelta(td, "D") for td in td_as_int], + ) + + result_timedelta64 = DataFrame(data_timedelta64) + result_timedelta = DataFrame(data_timedelta) + result_Timedelta = DataFrame(data_Timedelta) + tm.assert_frame_equal(result_timedelta64, expected) + tm.assert_frame_equal(result_timedelta, expected) + tm.assert_frame_equal(result_Timedelta, expected) + + def test_constructor_period_dict(self): + # PeriodIndex + a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") + b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") + df = DataFrame({"a": a, "b": b}) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + # list of periods + df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()}) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): + ea_scalar, ea_dtype = ea_scalar_and_dtype + df = DataFrame({"a": ea_scalar}, index=[0]) + assert df["a"].dtype == ea_dtype + + expected = DataFrame(index=[0], columns=["a"], data=ea_scalar) + + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "data,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_constructor_extension_scalar_data(self, data, dtype): + # GH 34832 + df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) + + assert df["a"].dtype == dtype + assert df["b"].dtype == dtype + + arr = pd.array([data] * 2, dtype=dtype) + expected = DataFrame({"a": arr, "b": arr}) + + tm.assert_frame_equal(df, expected) + + def test_nested_dict_frame_constructor(self): + rng = pd.period_range("1/1/2000", periods=5) + df = DataFrame(np.random.randn(10, 5), columns=rng) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(col, {})[row] = df._get_value(row, col) + + result = DataFrame(data, columns=rng) + tm.assert_frame_equal(result, df) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(row, {})[col] = df._get_value(row, col) + + result = DataFrame(data, index=rng).T + tm.assert_frame_equal(result, df) + + def _check_basic_constructor(self, empty): + # mat: 2d matrix with shape (3, 2) to input. empty - makes sized + # objects + mat = empty((2, 3), dtype=float) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + + # 1-D input + frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3]) + assert len(frame.index) == 3 + assert len(frame.columns) == 1 + + warn = None if empty is np.ones else FutureWarning + with tm.assert_produces_warning(warn): + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) + if empty is np.ones: + # passing dtype casts + assert frame.values.dtype == np.int64 + else: + # i.e. ma.masked_all + # Since we have NaNs, refuse to cast to int dtype, which would take NaN + # to meaningless integers. This matches Series behavior. GH#26919 + assert frame.isna().all().all() + assert frame.values.dtype == np.float64 + assert isna(frame.values).all() + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B", "C"], index=[1]) + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B"], index=[1, 2]) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # automatic labeling + frame = DataFrame(mat) + tm.assert_index_equal(frame.index, Index(range(2)), exact=True) + tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) + + frame = DataFrame(mat, index=[1, 2]) + tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) + + frame = DataFrame(mat, columns=["A", "B", "C"]) + tm.assert_index_equal(frame.index, Index(range(2)), exact=True) + + # 0-length axis + frame = DataFrame(empty((0, 3))) + assert len(frame.index) == 0 + + frame = DataFrame(empty((3, 0))) + assert len(frame.columns) == 0 + + def test_constructor_ndarray(self): + self._check_basic_constructor(np.ones) + + frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"]) + assert len(frame) == 2 + + def test_constructor_maskedarray(self): + self._check_basic_constructor(ma.masked_all) + + # Check non-masked values + mat = ma.masked_all((2, 3), dtype=float) + mat[0, 0] = 1.0 + mat[1, 2] = 2.0 + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert 1.0 == frame["A"][1] + assert 2.0 == frame["C"][2] + + # what is this even checking?? + mat = ma.masked_all((2, 3), dtype=float) + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert np.all(~np.asarray(frame == frame)) + + def test_constructor_maskedarray_nonfloat(self): + # masked int promoted to float + mat = ma.masked_all((2, 3), dtype=int) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64) + assert frame.values.dtype == np.float64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"][1] + assert 2 == frame["C"][2] + + # masked np.datetime64 stays (use NaT as null) + mat = ma.masked_all((2, 3), dtype="M8[ns]") + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert isna(frame).values.all() + + # cast type + msg = r"datetime64\[ns\] values and dtype=int64" + with tm.assert_produces_warning(FutureWarning, match=msg): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + message="elementwise comparison failed", + ) + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) + assert frame.values.dtype == np.int64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"].view("i8")[1] + assert 2 == frame["C"].view("i8")[2] + + # masked bool promoted to object + mat = ma.masked_all((2, 3), dtype=bool) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object) + assert frame.values.dtype == object + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = True + mat2[1, 2] = False + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert frame["A"][1] is True + assert frame["C"][2] is False + + def test_constructor_maskedarray_hardened(self): + # Check numpy masked arrays with hard masks -- from GH24574 + mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + # Check case where mask is hard but no data are masked + mat_hard = ma.ones((2, 2), dtype=float).harden_mask() + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( + {"A": [1.0, 1.0], "B": [1.0, 1.0]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_maskedrecarray_dtype(self): + # Ensure constructor honors dtype + data = np.ma.array( + np.ma.zeros(5, dtype=[("date", " None: + self._lst = lst + + def __getitem__(self, n): + return self._lst.__getitem__(n) + + def __len__(self): + return self._lst.__len__() + + lst_containers = [DummyContainer([1, "a"]), DummyContainer([2, "b"])] + columns = ["num", "str"] + result = DataFrame(lst_containers, columns=columns) + expected = DataFrame([[1, "a"], [2, "b"]], columns=columns) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_stdlib_array(self): + # GH 4297 + # support Array + import array + + result = DataFrame({"A": array.array("i", range(10))}) + expected = DataFrame({"A": list(range(10))}) + tm.assert_frame_equal(result, expected, check_dtype=False) + + expected = DataFrame([list(range(10)), list(range(10))]) + result = DataFrame([array.array("i", range(10)), array.array("i", range(10))]) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_range(self): + # GH26342 + result = DataFrame(range(10)) + expected = DataFrame(list(range(10))) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_ranges(self): + result = DataFrame([range(10), range(10)]) + expected = DataFrame([list(range(10)), list(range(10))]) + tm.assert_frame_equal(result, expected) + + def test_constructor_iterable(self): + # GH 21987 + class Iter: + def __iter__(self): + for i in range(10): + yield [1, 2, 3] + + expected = DataFrame([[1, 2, 3]] * 10) + result = DataFrame(Iter()) + tm.assert_frame_equal(result, expected) + + def test_constructor_iterator(self): + result = DataFrame(iter(range(10))) + expected = DataFrame(list(range(10))) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_iterators(self): + result = DataFrame([iter(range(10)), iter(range(10))]) + expected = DataFrame([list(range(10)), list(range(10))]) + tm.assert_frame_equal(result, expected) + + def test_constructor_generator(self): + # related #2305 + + gen1 = (i for i in range(10)) + gen2 = (i for i in range(10)) + + expected = DataFrame([list(range(10)), list(range(10))]) + result = DataFrame([gen1, gen2]) + tm.assert_frame_equal(result, expected) + + gen = ([i, "a"] for i in range(10)) + result = DataFrame(gen) + expected = DataFrame({0: range(10), 1: "a"}) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_list_of_dicts(self): + + result = DataFrame([{}]) + expected = DataFrame(index=[0]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dict_type", [dict, OrderedDict]) + def test_constructor_ordered_dict_preserve_order(self, dict_type): + # see gh-13304 + expected = DataFrame([[2, 1]], columns=["b", "a"]) + + data = dict_type() + data["b"] = [2] + data["a"] = [1] + + result = DataFrame(data) + tm.assert_frame_equal(result, expected) + + data = dict_type() + data["b"] = 2 + data["a"] = 1 + + result = DataFrame([data]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dict_type", [dict, OrderedDict]) + def test_constructor_ordered_dict_conflicting_orders(self, dict_type): + # the first dict element sets the ordering for the DataFrame, + # even if there are conflicting orders from subsequent ones + row_one = dict_type() + row_one["b"] = 2 + row_one["a"] = 1 + + row_two = dict_type() + row_two["a"] = 1 + row_two["b"] = 2 + + row_three = {"b": 2, "a": 1} + + expected = DataFrame([[2, 1], [2, 1]], columns=["b", "a"]) + result = DataFrame([row_one, row_two]) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2, 1], [2, 1], [2, 1]], columns=["b", "a"]) + result = DataFrame([row_one, row_two, row_three]) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_series_aligned_index(self): + series = [Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)] + result = DataFrame(series) + expected = DataFrame( + {"b": [0, 1, 2], "a": [0, 1, 2], "c": [0, 1, 2]}, + columns=["b", "a", "c"], + index=["0", "1", "2"], + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_derived_dicts(self): + class CustomDict(dict): + pass + + d = {"a": 1.5, "b": 3} + + data_custom = [CustomDict(d)] + data = [d] + + result_custom = DataFrame(data_custom) + result = DataFrame(data) + tm.assert_frame_equal(result, result_custom) + + def test_constructor_ragged(self): + data = {"A": np.random.randn(10), "B": np.random.randn(8)} + with pytest.raises(ValueError, match="All arrays must be of the same length"): + DataFrame(data) + + def test_constructor_scalar(self): + idx = Index(range(3)) + df = DataFrame({"a": 0}, index=idx) + expected = DataFrame({"a": [0, 0, 0]}, index=idx) + tm.assert_frame_equal(df, expected, check_dtype=False) + + def test_constructor_Series_copy_bug(self, float_frame): + df = DataFrame(float_frame["A"], index=float_frame.index, columns=["A"]) + df.copy() + + def test_constructor_mixed_dict_and_Series(self): + data = {} + data["A"] = {"foo": 1, "bar": 2, "baz": 3} + data["B"] = Series([4, 3, 2, 1], index=["bar", "qux", "baz", "foo"]) + + result = DataFrame(data) + assert result.index.is_monotonic_increasing + + # ordering ambiguous, raise exception + with pytest.raises(ValueError, match="ambiguous ordering"): + DataFrame({"A": ["a", "b"], "B": {"a": "a", "b": "b"}}) + + # this is OK though + result = DataFrame({"A": ["a", "b"], "B": Series(["a", "b"], index=["a", "b"])}) + expected = DataFrame({"A": ["a", "b"], "B": ["a", "b"]}, index=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_constructor_mixed_type_rows(self): + # Issue 25075 + data = [[1, 2], (3, 4)] + result = DataFrame(data) + expected = DataFrame([[1, 2], [3, 4]]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "tuples,lists", + [ + ((), []), + ((()), []), + (((), ()), [(), ()]), + (((), ()), [[], []]), + (([], []), [[], []]), + (([1], [2]), [[1], [2]]), # GH 32776 + (([1, 2, 3], [4, 5, 6]), [[1, 2, 3], [4, 5, 6]]), + ], + ) + def test_constructor_tuple(self, tuples, lists): + # GH 25691 + result = DataFrame(tuples) + expected = DataFrame(lists) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_tuples(self): + result = DataFrame({"A": [(1, 2), (3, 4)]}) + expected = DataFrame({"A": Series([(1, 2), (3, 4)])}) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_namedtuples(self): + # GH11181 + from collections import namedtuple + + named_tuple = namedtuple("Pandas", list("ab")) + tuples = [named_tuple(1, 3), named_tuple(2, 4)] + expected = DataFrame({"a": [1, 2], "b": [3, 4]}) + result = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + # with columns + expected = DataFrame({"y": [1, 2], "z": [3, 4]}) + result = DataFrame(tuples, columns=["y", "z"]) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_dataclasses(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + data = [Point(0, 3), Point(1, 3)] + expected = DataFrame({"x": [0, 1], "y": [3, 3]}) + result = DataFrame(data) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_dataclasses_with_varying_types(self): + # GH21910 + from dataclasses import make_dataclass + + # varying types + Point = make_dataclass("Point", [("x", int), ("y", int)]) + HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)]) + + data = [Point(0, 3), HLine(1, 3, 3)] + + expected = DataFrame( + {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]} + ) + result = DataFrame(data) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_dataclasses_error_thrown(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + # expect TypeError + msg = "asdict() should be called on dataclass instances" + with pytest.raises(TypeError, match=re.escape(msg)): + DataFrame([Point(0, 0), {"x": 1, "y": 0}]) + + def test_constructor_list_of_dict_order(self): + # GH10056 + data = [ + {"First": 1, "Second": 4, "Third": 7, "Fourth": 10}, + {"Second": 5, "First": 2, "Fourth": 11, "Third": 8}, + {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13}, + ] + expected = DataFrame( + { + "First": [1, 2, 3], + "Second": [4, 5, 6], + "Third": [7, 8, 9], + "Fourth": [10, 11, 12], + "YYY": [None, None, 14], + "XXX": [None, None, 13], + } + ) + result = DataFrame(data) + tm.assert_frame_equal(result, expected) + + def test_constructor_Series_named(self): + a = Series([1, 2, 3], index=["a", "b", "c"], name="x") + df = DataFrame(a) + assert df.columns[0] == "x" + tm.assert_index_equal(df.index, a.index) + + # ndarray like + arr = np.random.randn(10) + s = Series(arr, name="x") + df = DataFrame(s) + expected = DataFrame({"x": s}) + tm.assert_frame_equal(df, expected) + + s = Series(arr, index=range(3, 13)) + df = DataFrame(s) + expected = DataFrame({0: s}) + tm.assert_frame_equal(df, expected) + + msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(s, columns=[1, 2]) + + # #2234 + a = Series([], name="x", dtype=object) + df = DataFrame(a) + assert df.columns[0] == "x" + + # series with name and w/o + s1 = Series(arr, name="x") + df = DataFrame([s1, arr]).T + expected = DataFrame({"x": s1, "Unnamed 0": arr}, columns=["x", "Unnamed 0"]) + tm.assert_frame_equal(df, expected) + + # this is a bit non-intuitive here; the series collapse down to arrays + df = DataFrame([arr, s1]).T + expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) + tm.assert_frame_equal(df, expected) + + def test_constructor_Series_named_and_columns(self): + # GH 9232 validation + + s0 = Series(range(5), name=0) + s1 = Series(range(5), name=1) + + # matching name and column gives standard frame + tm.assert_frame_equal(DataFrame(s0, columns=[0]), s0.to_frame()) + tm.assert_frame_equal(DataFrame(s1, columns=[1]), s1.to_frame()) + + # non-matching produces empty frame + assert DataFrame(s0, columns=[1]).empty + assert DataFrame(s1, columns=[0]).empty + + def test_constructor_Series_differently_indexed(self): + # name + s1 = Series([1, 2, 3], index=["a", "b", "c"], name="x") + + # no name + s2 = Series([1, 2, 3], index=["a", "b", "c"]) + + other_index = Index(["a", "b"]) + + df1 = DataFrame(s1, index=other_index) + exp1 = DataFrame(s1.reindex(other_index)) + assert df1.columns[0] == "x" + tm.assert_frame_equal(df1, exp1) + + df2 = DataFrame(s2, index=other_index) + exp2 = DataFrame(s2.reindex(other_index)) + assert df2.columns[0] == 0 + tm.assert_index_equal(df2.index, other_index) + tm.assert_frame_equal(df2, exp2) + + @pytest.mark.parametrize( + "name_in1,name_in2,name_in3,name_out", + [ + ("idx", "idx", "idx", "idx"), + ("idx", "idx", None, None), + ("idx", None, None, None), + ("idx1", "idx2", None, None), + ("idx1", "idx1", "idx2", None), + ("idx1", "idx2", "idx3", None), + (None, None, None, None), + ], + ) + def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out): + # GH13475 + indices = [ + Index(["a", "b", "c"], name=name_in1), + Index(["b", "c", "d"], name=name_in2), + Index(["c", "d", "e"], name=name_in3), + ] + series = { + c: Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"]) + } + result = DataFrame(series) + + exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out) + expected = DataFrame( + { + "x": [0, 1, 2, np.nan, np.nan], + "y": [np.nan, 0, 1, 2, np.nan], + "z": [np.nan, np.nan, 0, 1, 2], + }, + index=exp_ind, + ) + + tm.assert_frame_equal(result, expected) + + def test_constructor_manager_resize(self, float_frame): + index = list(float_frame.index[:5]) + columns = list(float_frame.columns[:3]) + + result = DataFrame(float_frame._mgr, index=index, columns=columns) + tm.assert_index_equal(result.index, Index(index)) + tm.assert_index_equal(result.columns, Index(columns)) + + def test_constructor_mix_series_nonseries(self, float_frame): + df = DataFrame( + {"A": float_frame["A"], "B": list(float_frame["B"])}, columns=["A", "B"] + ) + tm.assert_frame_equal(df, float_frame.loc[:, ["A", "B"]]) + + msg = "does not match index length" + with pytest.raises(ValueError, match=msg): + DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]}) + + def test_constructor_miscast_na_int_dtype(self): + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) + expected = DataFrame([[np.nan, 1], [1, 0]]) + tm.assert_frame_equal(df, expected) + + def test_constructor_column_duplicates(self): + # it works! #2079 + df = DataFrame([[8, 5]], columns=["a", "a"]) + edf = DataFrame([[8, 5]]) + edf.columns = ["a", "a"] + + tm.assert_frame_equal(df, edf) + + idf = DataFrame.from_records([(8, 5)], columns=["a", "a"]) + + tm.assert_frame_equal(idf, edf) + + def test_constructor_empty_with_string_dtype(self): + # GH 9428 + expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object) + + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str) + tm.assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_) + tm.assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_) + tm.assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5") + tm.assert_frame_equal(df, expected) + + def test_constructor_empty_with_string_extension(self, nullable_string_dtype): + # GH 34915 + expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype) + df = DataFrame(columns=["c1"], dtype=nullable_string_dtype) + tm.assert_frame_equal(df, expected) + + def test_constructor_single_value(self): + # expecting single value upcasting here + df = DataFrame(0.0, index=[1, 2, 3], columns=["a", "b", "c"]) + tm.assert_frame_equal( + df, DataFrame(np.zeros(df.shape).astype("float64"), df.index, df.columns) + ) + + df = DataFrame(0, index=[1, 2, 3], columns=["a", "b", "c"]) + tm.assert_frame_equal( + df, DataFrame(np.zeros(df.shape).astype("int64"), df.index, df.columns) + ) + + df = DataFrame("a", index=[1, 2], columns=["a", "c"]) + tm.assert_frame_equal( + df, + DataFrame( + np.array([["a", "a"], ["a", "a"]], dtype=object), + index=[1, 2], + columns=["a", "c"], + ), + ) + + msg = "DataFrame constructor not properly called!" + with pytest.raises(ValueError, match=msg): + DataFrame("a", [1, 2]) + with pytest.raises(ValueError, match=msg): + DataFrame("a", columns=["a", "c"]) + + msg = "incompatible data and dtype" + with pytest.raises(TypeError, match=msg): + DataFrame("a", [1, 2], ["a", "c"], float) + + def test_constructor_with_datetimes(self): + intname = np.dtype(np.int_).name + floatname = np.dtype(np.float_).name + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + # single item + df = DataFrame( + { + "A": 1, + "B": "foo", + "C": "bar", + "D": Timestamp("20010101"), + "E": datetime(2001, 1, 2, 0, 0), + }, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [np.dtype("int64")] + + [np.dtype(objectname)] * 2 + + [np.dtype(datetime64name)] * 2, + index=list("ABCDE"), + ) + tm.assert_series_equal(result, expected) + + # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 + # ndarray with a dtype specified) + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + floatname: np.array(1.0, dtype=floatname), + intname: np.array(1, dtype=intname), + }, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("object")] + + [np.dtype("float64")] + + [np.dtype(intname)], + index=["a", "b", "c", floatname, intname], + ) + tm.assert_series_equal(result, expected) + + # check with ndarray construction ndim>0 + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + floatname: np.array([1.0] * 10, dtype=floatname), + intname: np.array([1] * 10, dtype=intname), + }, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("object")] + + [np.dtype("float64")] + + [np.dtype(intname)], + index=["a", "b", "c", floatname, intname], + ) + tm.assert_series_equal(result, expected) + + def test_constructor_with_datetimes1(self): + + # GH 2809 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + datetime_s = Series(datetimes) + assert datetime_s.dtype == "M8[ns]" + + def test_constructor_with_datetimes2(self): + # GH 2810 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + dates = [ts.date() for ts in ind] + df = DataFrame(datetimes, columns=["datetimes"]) + df["dates"] = dates + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("object")], + index=["datetimes", "dates"], + ) + tm.assert_series_equal(result, expected) + + def test_constructor_with_datetimes3(self): + # GH 7594 + # don't coerce tz-aware + tz = pytz.timezone("US/Eastern") + dt = tz.localize(datetime(2012, 1, 1)) + + df = DataFrame({"End Date": dt}, index=[0]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + df = DataFrame([{"End Date": dt}]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + def test_constructor_with_datetimes4(self): + # tz-aware (UTC and other tz's) + # GH 8411 + dr = date_range("20130101", periods=3) + df = DataFrame({"value": dr}) + assert df.iat[0, 0].tz is None + dr = date_range("20130101", periods=3, tz="UTC") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "UTC" + dr = date_range("20130101", periods=3, tz="US/Eastern") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "US/Eastern" + + def test_constructor_with_datetimes5(self): + # GH 7822 + # preserver an index with a tz on dict construction + i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") + + expected = DataFrame({"a": i.to_series().reset_index(drop=True)}) + df = DataFrame() + df["a"] = i + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": i}) + tm.assert_frame_equal(df, expected) + + def test_constructor_with_datetimes6(self): + # multiples + i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") + i_no_tz = date_range("1/1/2011", periods=5, freq="10s") + df = DataFrame({"a": i, "b": i_no_tz}) + expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "arr", + [ + np.array([None, None, None, None, datetime.now(), None]), + np.array([None, None, datetime.now(), None]), + [[np.datetime64("NaT")], [None]], + [[np.datetime64("NaT")], [pd.NaT]], + [[None], [np.datetime64("NaT")]], + [[None], [pd.NaT]], + [[pd.NaT], [np.datetime64("NaT")]], + [[pd.NaT], [None]], + ], + ) + def test_constructor_datetimes_with_nulls(self, arr): + # gh-15869, GH#11220 + result = DataFrame(arr).dtypes + expected = Series([np.dtype("datetime64[ns]")]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[M]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + def test_constructor_datetimes_non_ns(self, order, dtype): + na = np.array( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ], + dtype=dtype, + order=order, + ) + df = DataFrame(na) + expected = DataFrame( + [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ] + ) + expected = expected.astype(dtype=dtype) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) + @pytest.mark.parametrize( + "dtype", + [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ], + ) + def test_constructor_timedelta_non_ns(self, order, dtype): + na = np.array( + [ + [np.timedelta64(1, "D"), np.timedelta64(2, "D")], + [np.timedelta64(4, "D"), np.timedelta64(5, "D")], + ], + dtype=dtype, + order=order, + ) + df = DataFrame(na).astype("timedelta64[ns]") + expected = DataFrame( + [ + [Timedelta(1, "D"), Timedelta(2, "D")], + [Timedelta(4, "D"), Timedelta(5, "D")], + ], + ) + tm.assert_frame_equal(df, expected) + + def test_constructor_for_list_with_dtypes(self): + # test list of lists/ndarrays + df = DataFrame([np.arange(5) for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int")] * 5) + tm.assert_series_equal(result, expected) + + df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int32")] * 5) + tm.assert_series_equal(result, expected) + + # overflow issue? (we always expected int64 upcasting here) + df = DataFrame({"a": [2**31, 2**31 + 1]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + # GH #2751 (construction with no index specified), make sure we cast to + # platform values + df = DataFrame([1, 2]) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame([1.0, 2.0]) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": [1, 2]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": [1.0, 2.0]}) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": 1}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": 1.0}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("float64") + + # with object list + df = DataFrame( + { + "a": [1, 2, 4, 7], + "b": [1.2, 2.3, 5.1, 6.3], + "c": list("abcd"), + "d": [datetime(2000, 1, 1) for i in range(4)], + "e": [1.0, 2, 4.0, 7], + } + ) + result = df.dtypes + expected = Series( + [ + np.dtype("int64"), + np.dtype("float64"), + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("float64"), + ], + index=list("abcde"), + ) + tm.assert_series_equal(result, expected) + + def test_constructor_frame_copy(self, float_frame): + cop = DataFrame(float_frame, copy=True) + cop["A"] = 5 + assert (cop["A"] == 5).all() + assert not (float_frame["A"] == 5).all() + + def test_constructor_ndarray_copy(self, float_frame, using_array_manager): + if not using_array_manager: + df = DataFrame(float_frame.values) + + float_frame.values[5] = 5 + assert (df.values[5] == 5).all() + + df = DataFrame(float_frame.values, copy=True) + float_frame.values[6] = 6 + assert not (df.values[6] == 6).all() + else: + arr = float_frame.values.copy() + # default: copy to ensure contiguous arrays + df = DataFrame(arr) + assert df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 100 + assert df.iloc[0, 0] != 100 + + # manually specify copy=False + df = DataFrame(arr, copy=False) + assert not df._mgr.arrays[0].flags.c_contiguous + arr[0, 0] = 1000 + assert df.iloc[0, 0] == 1000 + + def test_constructor_series_copy(self, float_frame): + series = float_frame._series + + df = DataFrame({"A": series["A"]}, copy=True) + df["A"][:] = 5 + + assert not (series["A"] == 5).all() + + @pytest.mark.parametrize( + "df", + [ + DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]), + DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]), + DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]), + DataFrame( + [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan] + ), + DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]), + ], + ) + def test_constructor_with_nas(self, df): + # GH 5016 + # na's in indices + # GH 21428 (non-unique columns) + + for i in range(len(df.columns)): + df.iloc[:, i] + + indexer = np.arange(len(df.columns))[isna(df.columns)] + + # No NaN found -> error + if len(indexer) == 0: + with pytest.raises(KeyError, match="^nan$"): + df.loc[:, np.nan] + # single nan should result in Series + elif len(indexer) == 1: + tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan]) + # multiple nans should result in DataFrame + else: + tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan]) + + def test_constructor_lists_to_object_dtype(self): + # from #1074 + d = DataFrame({"a": [np.nan, False]}) + assert d["a"].dtype == np.object_ + assert not d["a"][1] + + def test_constructor_ndarray_categorical_dtype(self): + cat = Categorical(["A", "B", "C"]) + arr = np.array(cat).reshape(-1, 1) + arr = np.broadcast_to(arr, (3, 4)) + + result = DataFrame(arr, dtype=cat.dtype) + + expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat}) + tm.assert_frame_equal(result, expected) + + def test_constructor_categorical(self): + + # GH8626 + + # dict creation + df = DataFrame({"A": list("abc")}, dtype="category") + expected = Series(list("abc"), dtype="category", name="A") + tm.assert_series_equal(df["A"], expected) + + # to_frame + s = Series(list("abc"), dtype="category") + result = s.to_frame() + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(result[0], expected) + result = s.to_frame(name="foo") + expected = Series(list("abc"), dtype="category", name="foo") + tm.assert_series_equal(result["foo"], expected) + + # list-like creation + df = DataFrame(list("abc"), dtype="category") + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(df[0], expected) + + def test_construct_from_1item_list_of_categorical(self): + # ndim != 1 + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc"))]) + expected = DataFrame({0: Series(list("abc"), dtype="category")}) + tm.assert_frame_equal(df, expected) + + def test_construct_from_list_of_categoricals(self): + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + expected = DataFrame( + { + 0: Series(list("abc"), dtype="category"), + 1: Series(list("abd"), dtype="category"), + }, + columns=[0, 1], + ) + tm.assert_frame_equal(df, expected) + + def test_from_nested_listlike_mixed_types(self): + # mixed + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), list("def")]) + expected = DataFrame( + {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] + ) + tm.assert_frame_equal(df, expected) + + def test_construct_from_listlikes_mismatched_lengths(self): + # invalid (shape) + msg = "|".join( + [ + r"Length of values \(6\) does not match length of index \(3\)", + ] + ) + msg2 = "will be changed to match the behavior" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=msg2): + DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + + def test_constructor_categorical_series(self): + + items = [1, 2, 3, 1] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + items = ["a", "b", "c", "a"] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + # insert into frame with different index + # GH 8076 + index = date_range("20000101", periods=3) + expected = Series( + Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) + ) + expected.index = index + + expected = DataFrame({"x": expected}) + df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "dtype", + tm.ALL_INT_NUMPY_DTYPES + + tm.ALL_INT_EA_DTYPES + + tm.FLOAT_NUMPY_DTYPES + + tm.COMPLEX_DTYPES + + tm.DATETIME64_DTYPES + + tm.TIMEDELTA64_DTYPES + + tm.BOOL_DTYPES, + ) + def test_check_dtype_empty_numeric_column(self, dtype): + # GH24386: Ensure dtypes are set correctly for an empty DataFrame. + # Empty DataFrame is generated via dictionary data with non-overlapping columns. + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + + assert data.b.dtype == dtype + + @pytest.mark.parametrize( + "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES + ) + def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager): + # GH24386: Ensure dtypes are set correctly for an empty DataFrame. + # Empty DataFrame is generated via dictionary data with non-overlapping columns. + data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + + if using_array_manager and dtype in tm.BYTES_DTYPES: + # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype + td.mark_array_manager_not_yet_implemented(request) + + assert data.b.dtype.name == "object" + + def test_to_frame_with_falsey_names(self): + # GH 16114 + result = Series(name=0, dtype=object).to_frame().dtypes + expected = Series({0: object}) + tm.assert_series_equal(result, expected) + + result = DataFrame(Series(name=0, dtype=object)).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.arm_slow + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) + def test_constructor_range_dtype(self, dtype): + expected = DataFrame({"A": [0, 1, 2, 3, 4]}, dtype=dtype or "int64") + + # GH 26342 + result = DataFrame(range(5), columns=["A"], dtype=dtype) + tm.assert_frame_equal(result, expected) + + # GH 16804 + result = DataFrame({"A": range(5)}, dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_frame_from_list_subclass(self): + # GH21226 + class List(list): + pass + + expected = DataFrame([[1, 2, 3], [4, 5, 6]]) + result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])])) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "extension_arr", + [ + Categorical(list("aabbc")), + SparseArray([1, np.nan, np.nan, np.nan]), + IntervalArray([Interval(0, 1), Interval(1, 5)]), + PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), + ], + ) + def test_constructor_with_extension_array(self, extension_arr): + # GH11363 + expected = DataFrame(Series(extension_arr)) + result = DataFrame(extension_arr) + tm.assert_frame_equal(result, expected) + + def test_datetime_date_tuple_columns_from_dict(self): + # GH 10863 + v = date.today() + tup = v, v + result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) + expected = DataFrame([0, 1, 2], columns=Index(Series([tup]))) + tm.assert_frame_equal(result, expected) + + def test_construct_with_two_categoricalindex_series(self): + # GH 14600 + s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"])) + s2 = Series( + [2, 152, 2, 242, 150], + index=CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = DataFrame([s1, s2]) + expected = DataFrame( + np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]), + columns=["female", "male", "unknown", "f", "m"], + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_series_nonexact_categoricalindex(self): + # GH 42424 + ser = Series(range(0, 100)) + ser1 = cut(ser, 10).value_counts().head(5) + ser2 = cut(ser, 10).value_counts().tail(5) + result = DataFrame({"1": ser1, "2": ser2}) + index = CategoricalIndex( + [ + Interval(-0.099, 9.9, closed="right"), + Interval(9.9, 19.8, closed="right"), + Interval(19.8, 29.7, closed="right"), + Interval(29.7, 39.6, closed="right"), + Interval(39.6, 49.5, closed="right"), + Interval(49.5, 59.4, closed="right"), + Interval(59.4, 69.3, closed="right"), + Interval(69.3, 79.2, closed="right"), + Interval(79.2, 89.1, closed="right"), + Interval(89.1, 99, closed="right"), + ], + ordered=True, + ) + expected = DataFrame( + {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index + ) + tm.assert_frame_equal(expected, result) + + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) + df = DataFrame(arr) + + assert df["Date"][0] == dates[0][0] + assert df["Forecasting"][0] == dates[0][1] + + s = Series(arr["Date"]) + assert isinstance(s[0], Timestamp) + assert s[0] == dates[0][0] + + def test_from_datetime_subclass(self): + # GH21142 Verify whether Datetime subclasses are also of dtype datetime + class DatetimeSubclass(datetime): + pass + + data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) + assert data.datetime.dtype == "datetime64[ns]" + + def test_with_mismatched_index_length_raises(self): + # GH#33437 + dti = date_range("2016-01-01", periods=3, tz="US/Pacific") + msg = "Shape of passed values|Passed arrays should have the same length" + with pytest.raises(ValueError, match=msg): + DataFrame(dti, index=range(4)) + + def test_frame_ctor_datetime64_column(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + dates = np.asarray(rng) + + df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) + assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) + + def test_dataframe_constructor_infer_multiindex(self): + index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] + + multi = DataFrame( + np.random.randn(4, 4), + index=[np.array(x) for x in index_lists], + ) + assert isinstance(multi.index, MultiIndex) + assert not isinstance(multi.columns, MultiIndex) + + multi = DataFrame(np.random.randn(4, 4), columns=index_lists) + assert isinstance(multi.columns, MultiIndex) + + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(date_range("1/1/2011", periods=2, freq="H"))), + (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH#16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + result = DataFrame({"A": input_vals}, dtype=string_dtype) + expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + + result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) + expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("copy", [False, True]) + def test_dict_nocopy( + self, + request, + copy, + any_numeric_ea_dtype, + any_numpy_dtype, + using_array_manager, + using_copy_on_write, + ): + if ( + using_array_manager + and not copy + and not (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES)) + ): + # TODO(ArrayManager) properly honor copy keyword for dict input + td.mark_array_manager_not_yet_implemented(request) + + a = np.array([1, 2], dtype=any_numpy_dtype) + b = np.array([3, 4], dtype=any_numpy_dtype) + if b.dtype.kind in ["S", "U"]: + # These get cast, making the checks below more cumbersome + return + + c = pd.array([1, 2], dtype=any_numeric_ea_dtype) + c_orig = c.copy() + df = DataFrame({"a": a, "b": b, "c": c}, copy=copy) + + def get_base(obj): + if isinstance(obj, np.ndarray): + return obj.base + elif isinstance(obj.dtype, np.dtype): + # i.e. DatetimeArray, TimedeltaArray + return obj._ndarray.base + else: + raise TypeError + + def check_views(c_only: bool = False): + # written to work for either BlockManager or ArrayManager + + # Check that the underlying data behind df["c"] is still `c` + # after setting with iloc. Since we don't know which entry in + # df._mgr.arrays corresponds to df["c"], we just check that exactly + # one of these arrays is `c`. GH#38939 + assert sum(x is c for x in df._mgr.arrays) == 1 + if c_only: + # If we ever stop consolidating in setitem_with_indexer, + # this will become unnecessary. + return + + assert ( + sum( + get_base(x) is a + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + assert ( + sum( + get_base(x) is b + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + + if not copy: + # constructor preserves views + check_views() + + # TODO: most of the rest of this test belongs in indexing tests + df.iloc[0, 0] = 0 + df.iloc[0, 1] = 0 + if not copy: + check_views(True) + + # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve + # view, so we have to check in the other direction + with tm.assert_produces_warning( + DeprecationWarning, match="will attempt to set" + ): + df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) + assert df.dtypes.iloc[2] == c.dtype + if not copy and not using_copy_on_write: + check_views(True) + + if copy: + if a.dtype.kind == "M": + assert a[0] == a.dtype.type(1, "ns") + assert b[0] == b.dtype.type(3, "ns") + else: + assert a[0] == a.dtype.type(1) + assert b[0] == b.dtype.type(3) + # FIXME(GH#35417): enable after GH#35417 + assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c + elif not using_copy_on_write: + # TODO: we can call check_views if we stop consolidating + # in setitem_with_indexer + assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c + # TODO: we can check b[0] == 0 if we stop consolidating in + # setitem_with_indexer (except for datetimelike?) + + def test_from_series_with_name_with_columns(self): + # GH 7893 + result = DataFrame(Series(1, name="foo"), columns=["bar"]) + expected = DataFrame(columns=["bar"]) + tm.assert_frame_equal(result, expected) + + def test_nested_list_columns(self): + # GH 14467 + result = DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] + ) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), + ) + tm.assert_frame_equal(result, expected) + + def test_from_2d_object_array_of_periods_or_intervals(self): + # Period analogue to GH#26825 + pi = pd.period_range("2016-04-05", periods=3) + data = pi._data.astype(object).reshape(1, -1) + df = DataFrame(data) + assert df.shape == (1, 3) + assert (df.dtypes == pi.dtype).all() + assert (df == pi).all().all() + + ii = pd.IntervalIndex.from_breaks([3, 4, 5, 6]) + data2 = ii._data.astype(object).reshape(1, -1) + df2 = DataFrame(data2) + assert df2.shape == (1, 3) + assert (df2.dtypes == ii.dtype).all() + assert (df2 == ii).all().all() + + # mixed + data3 = np.r_[data, data2, data, data2].T + df3 = DataFrame(data3) + expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) + tm.assert_frame_equal(df3, expected) + + @pytest.mark.parametrize( + "col_a, col_b", + [ + ([[1], [2]], np.array([[1], [2]])), + (np.array([[1], [2]]), [[1], [2]]), + (np.array([[1], [2]]), np.array([[1], [2]])), + ], + ) + def test_error_from_2darray(self, col_a, col_b): + msg = "Per-column arrays must each be 1-dimensional" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": col_a, "b": col_b}) + + def test_from_dict_with_missing_copy_false(self): + # GH#45369 filled columns should not be views of one another + df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False) + assert not np.shares_memory(df["a"]._values, df["b"]._values) + + df.iloc[0, 0] = 0 + expected = DataFrame( + { + "a": [0, np.nan, np.nan], + "b": [np.nan, np.nan, np.nan], + "c": [np.nan, np.nan, np.nan], + }, + index=[1, 2, 3], + dtype=object, + ) + tm.assert_frame_equal(df, expected) + + def test_construction_empty_array_multi_column_raises(self): + # GH#46822 + msg = "Empty data passed with indices specified." + with pytest.raises(ValueError, match=msg): + DataFrame(data=np.array([]), columns=["a", "b"]) + + +class TestDataFrameConstructorIndexInference: + def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): + rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") + tm.assert_index_equal(df.index, exp) + + def test_frame_from_dict_with_mixed_tzaware_indexes(self): + # GH#44091 + dti = date_range("2016-01-01", periods=3) + + ser1 = Series(range(3), index=dti) + ser2 = Series(range(3), index=dti.tz_localize("UTC")) + ser3 = Series(range(3), index=dti.tz_localize("US/Central")) + ser4 = Series(range(3)) + + # no tz-naive, but we do have mixed tzs and a non-DTI + df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4}) + exp_index = Index( + list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df1.index, exp_index) + + df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3}) + exp_index3 = Index( + list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object + ) + tm.assert_index_equal(df2.index, exp_index3) + + df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4}) + exp_index3 = Index( + list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df3.index, exp_index3) + + df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2}) + exp_index4 = Index( + list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object + ) + tm.assert_index_equal(df4.index, exp_index4) + + # TODO: not clear if these raising is desired (no extant tests), + # but this is de facto behavior 2021-12-22 + msg = "Cannot join tz-naive with tz-aware DatetimeIndex" + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"D": ser1, "A": ser2, "B": ser3}) + + +class TestDataFrameConstructorWithDtypeCoercion: + def test_floating_values_integer_dtype(self): + # GH#40110 make DataFrame behavior with arraylike floating data and + # inty dtype match Series behavior + + arr = np.random.randn(10, 5) + + msg = "if they cannot be cast losslessly" + with tm.assert_produces_warning(FutureWarning, match=msg): + DataFrame(arr, dtype="i8") + + with tm.assert_produces_warning(None): + # if they can be cast losslessly, no warning + DataFrame(arr.round(), dtype="i8") + + # with NaNs, we go through a different path with a different warning + arr[0, 0] = np.nan + msg = "passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): + DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + Series(arr[0], dtype="i8") + # The future (raising) behavior matches what we would get via astype: + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + DataFrame(arr).astype("i8") + with pytest.raises(ValueError, match=msg): + Series(arr[0]).astype("i8") + + +class TestDataFrameConstructorWithDatetimeTZ: + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_construction_preserves_tzaware_dtypes(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tz) + df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) + tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) + assert df["B"].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({"dr": dr}) + df["dr_tz"] = dr_tz + df["datetimes_naive"] = datetimes_naive + df["datetimes_with_tz"] = datetimes_with_tz + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + ], + index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("pydt", [True, False]) + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt): + # GH#25843, GH#41555, GH#33401 + tz = tz_aware_fixture + ts = Timestamp("2019", tz=tz) + if pydt: + ts = ts.to_pydatetime() + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: [ts]}, dtype="datetime64[ns]") + + expected = DataFrame({0: [ts_naive]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame([ts], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + df = DataFrame([Series([ts])], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]") + tm.assert_equal(df, expected) + + def test_from_dict(self): + + # 8260 + # support datetime64 with tz + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + + # construction + df = DataFrame({"A": idx, "B": dr}) + assert df["A"].dtype, "M8[ns, US/Eastern" + assert df["A"].name == "A" + tm.assert_series_equal(df["A"], Series(idx, name="A")) + tm.assert_series_equal(df["B"], Series(dr, name="B")) + + def test_from_index(self): + + # from index + idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + df2 = DataFrame(idx2) + tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) + df2 = DataFrame(Series(idx2)) + tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) + + idx2 = date_range("20130101", periods=3, tz="US/Eastern") + df2 = DataFrame(idx2) + tm.assert_series_equal(df2[0], Series(idx2, name=0)) + df2 = DataFrame(Series(idx2)) + tm.assert_series_equal(df2[0], Series(idx2, name=0)) + + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range("1/1/2012", periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({"a": "foo", "b": s}, index=dr) + DataFrame({"a": "foo", "b": s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + ts = Series(dr) + + # it works! + d = DataFrame({"A": "foo", "B": ts}, index=dr) + assert d["B"].isna().all() + + def test_frame_timeseries_column(self): + # GH19157 + dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern") + result = DataFrame(dr, columns=["timestamps"]) + expected = DataFrame( + { + "timestamps": [ + Timestamp("20130101T10:00:00", tz="US/Eastern"), + Timestamp("20130101T10:01:00", tz="US/Eastern"), + Timestamp("20130101T10:02:00", tz="US/Eastern"), + ] + } + ) + tm.assert_frame_equal(result, expected) + + def test_nested_dict_construction(self): + # GH22227 + columns = ["Nevada", "Ohio"] + pop = { + "Nevada": {2001: 2.4, 2002: 2.9}, + "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, + } + result = DataFrame(pop, index=[2001, 2002, 2003], columns=columns) + expected = DataFrame( + [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)], + columns=columns, + index=Index([2001, 2002, 2003]), + ) + tm.assert_frame_equal(result, expected) + + def test_from_tzaware_object_array(self): + # GH#26825 2D object array of tzaware timestamps should not raise + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") + data = dti._data.astype(object).reshape(1, -1) + df = DataFrame(data) + assert df.shape == (1, 3) + assert (df.dtypes == dti.dtype).all() + assert (df == dti).all().all() + + def test_from_tzaware_mixed_object_array(self): + # GH#26825 + arr = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + pd.NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + pd.NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + res = DataFrame(arr, columns=["A", "B", "C"]) + + expected_dtypes = [ + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + "datetime64[ns, CET]", + ] + assert (res.dtypes == expected_dtypes).all() + + def test_from_2d_ndarray_with_dtype(self): + # GH#12513 + array_dim2 = np.arange(10).reshape((5, 2)) + df = DataFrame(array_dim2, dtype="datetime64[ns, UTC]") + + expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("typ", [set, frozenset]) + def test_construction_from_set_raises(self, typ): + # https://github.com/pandas-dev/pandas/issues/32582 + values = typ({1, 2, 3}) + msg = f"'{typ.__name__}' type is unordered" + with pytest.raises(TypeError, match=msg): + DataFrame({"a": values}) + + with pytest.raises(TypeError, match=msg): + Series(values) + + def test_construction_from_ndarray_datetimelike(self): + # ensure the underlying arrays are properly wrapped as EA when + # constructed from 2D ndarray + arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3) + df = DataFrame(arr) + assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) + + def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): + arr = np.random.randn(10, 2) + dtype = pd.array([2.0]).dtype + msg = r"len\(arrays\) must match len\(columns\)" + with pytest.raises(ValueError, match=msg): + DataFrame(arr, columns=["foo"], dtype=dtype) + + arr2 = pd.array([2.0, 3.0, 4.0]) + with pytest.raises(ValueError, match=msg): + DataFrame(arr2, columns=["foo", "bar"]) + + def test_columns_indexes_raise_on_sets(self): + # GH 47215 + data = [[1, 2, 3], [4, 5, 6]] + with pytest.raises(ValueError, match="index cannot be a set"): + DataFrame(data, index={"a", "b"}) + with pytest.raises(ValueError, match="columns cannot be a set"): + DataFrame(data, columns={"a", "b", "c"}) + + +def get1(obj): # TODO: make a helper in tm? + if isinstance(obj, Series): + return obj.iloc[0] + else: + return obj.iloc[0, 0] + + +class TestFromScalar: + @pytest.fixture(params=[list, dict, None]) + def constructor(self, request, frame_or_series): + box = request.param + + extra = {"index": range(2)} + if frame_or_series is DataFrame: + extra["columns"] = ["A"] + + if box is None: + return functools.partial(frame_or_series, **extra) + + elif box is dict: + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series( + {0: x, 1: x}, **extra, **kwargs + ) + else: + return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs) + else: + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs) + else: + return lambda x, **kwargs: frame_or_series( + {"A": [x, x]}, **extra, **kwargs + ) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_from_nat_scalar(self, dtype, constructor): + obj = constructor(pd.NaT, dtype=dtype) + assert np.all(obj.dtypes == dtype) + assert np.all(obj.isna()) + + def test_from_timedelta_scalar_preserves_nanos(self, constructor): + td = Timedelta(1) + + obj = constructor(td, dtype="m8[ns]") + assert get1(obj) == td + + def test_from_timestamp_scalar_preserves_nanos(self, constructor, fixed_now_ts): + ts = fixed_now_ts + Timedelta(1) + + obj = constructor(ts, dtype="M8[ns]") + assert get1(obj) == ts + + def test_from_timedelta64_scalar_object(self, constructor): + + td = Timedelta(1) + td64 = td.to_timedelta64() + + obj = constructor(td64, dtype=object) + assert isinstance(get1(obj), np.timedelta64) + + @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) + def test_from_scalar_datetimelike_mismatched(self, constructor, cls): + scalar = cls("NaT", "ns") + dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] + + msg = "Cannot cast" + if cls is np.datetime64: + msg = "|".join( + [ + r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]", + "Cannot cast", + ] + ) + + with pytest.raises(TypeError, match=msg): + constructor(scalar, dtype=dtype) + + scalar = cls(4, "ns") + with pytest.raises(TypeError, match=msg): + constructor(scalar, dtype=dtype) + + @pytest.mark.parametrize("cls", [datetime, np.datetime64]) + def test_from_out_of_bounds_datetime(self, constructor, cls): + scalar = datetime(9999, 1, 1) + if cls is np.datetime64: + scalar = np.datetime64(scalar, "D") + result = constructor(scalar) + + assert type(get1(result)) is cls + + @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) + def test_from_out_of_bounds_timedelta(self, constructor, cls): + scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1) + if cls is np.timedelta64: + scalar = np.timedelta64(scalar, "D") + result = constructor(scalar) + + assert type(get1(result)) is cls + + def test_tzaware_data_tznaive_dtype(self, constructor): + tz = "US/Eastern" + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning, match="Data is timezone-aware"): + result = constructor(ts, dtype="M8[ns]") + + assert np.all(result.dtypes == "M8[ns]") + assert np.all(result == ts_naive) + + +# TODO: better location for this test? +class TestAllowNonNano: + # Until 2.0, we do not preserve non-nano dt64/td64 when passed as ndarray, + # but do preserve it when passed as DTA/TDA + + @pytest.fixture(params=[True, False]) + def as_td(self, request): + return request.param + + @pytest.fixture + def arr(self, as_td): + values = np.arange(5).astype(np.int64).view("M8[s]") + if as_td: + values = values - values[0] + return TimedeltaArray._simple_new(values, dtype=values.dtype) + else: + return DatetimeArray._simple_new(values, dtype=values.dtype) + + def test_index_allow_non_nano(self, arr): + idx = Index(arr) + assert idx.dtype == arr.dtype + + def test_dti_tdi_allow_non_nano(self, arr, as_td): + if as_td: + idx = pd.TimedeltaIndex(arr) + else: + idx = DatetimeIndex(arr) + assert idx.dtype == arr.dtype + + def test_series_allow_non_nano(self, arr): + ser = Series(arr) + assert ser.dtype == arr.dtype + + def test_frame_allow_non_nano(self, arr): + df = DataFrame(arr) + assert df.dtypes[0] == arr.dtype + + @pytest.mark.xfail( + # TODO(2.0): xfail should become unnecessary + strict=False, + reason="stack_arrays converts TDA to ndarray, then goes " + "through ensure_wrapped_if_datetimelike", + ) + def test_frame_from_dict_allow_non_nano(self, arr): + df = DataFrame({0: arr}) + assert df.dtypes[0] == arr.dtype diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py new file mode 100644 index 00000000..5bd9c426 --- /dev/null +++ b/pandas/tests/frame/test_cumulative.py @@ -0,0 +1,81 @@ +""" +Tests for DataFrame cumulative operations + +See also +-------- +tests.series.test_cumulative +""" + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDataFrameCumulativeOps: + # --------------------------------------------------------------------- + # Cumulative Operations - cumsum, cummax, ... + + def test_cumulative_ops_smoke(self): + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummax() + df.cummin() + df.cumsum() + + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # TODO(wesm): do something with this? + dm.cumsum() + + def test_cumprod_smoke(self, datetime_frame): + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + def test_cumulative_ops_match_series_apply(self, datetime_frame, method): + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan + + # axis = 0 + result = getattr(datetime_frame, method)() + expected = datetime_frame.apply(getattr(Series, method)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = getattr(datetime_frame, method)(axis=1) + expected = datetime_frame.apply(getattr(Series, method), axis=1) + tm.assert_frame_equal(result, expected) + + # fix issue TODO: GH ref? + assert np.shape(result) == np.shape(datetime_frame) + + def test_cumsum_preserve_dtypes(self): + # GH#19296 dont incorrectly upcast to object + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]}) + + result = df.cumsum() + + expected = DataFrame( + { + "A": Series([1, 3, 6], dtype=np.int64), + "B": Series([1, 3, 6], dtype=np.float64), + "C": df["C"].cumsum(), + } + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py new file mode 100644 index 00000000..c6e5aa6f --- /dev/null +++ b/pandas/tests/frame/test_iteration.py @@ -0,0 +1,162 @@ +import datetime + +import numpy as np + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +class TestIteration: + def test_keys(self, float_frame): + assert float_frame.keys() is float_frame.columns + + def test_iteritems(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + for k, v in df.items(): + assert isinstance(v, DataFrame._constructor_sliced) + + def test_items(self): + # GH#17213, GH#13918 + cols = ["a", "b", "c"] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() + + def test_items_names(self, float_string_frame): + for k, v in float_string_frame.items(): + assert v.name == k + + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) + + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] + tm.assert_series_equal(v, exp) + + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_iso8601(self): + # GH#19671 + s = DataFrame( + { + "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], + "iso8601": date_range("2000-01-01", periods=4, freq="M"), + } + ) + for k, v in s.iterrows(): + exp = s.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_corner(self): + # GH#12222 + df = DataFrame( + { + "a": [datetime.datetime(2015, 1, 1)], + "b": [None], + "c": [None], + "d": [""], + "e": [[]], + "f": [set()], + "g": [{}], + } + ) + expected = Series( + [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}], + index=list("abcdefg"), + name=0, + dtype="object", + ) + _, result = next(df.iterrows()) + tm.assert_series_equal(result, expected) + + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): + ser = DataFrame._constructor_sliced(tup[1:]) + ser.name = tup[0] + expected = float_frame.iloc[i, :].reset_index(drop=True) + tm.assert_series_equal(ser, expected) + + df = DataFrame( + {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] + ) + + for tup in df.itertuples(index=False): + assert isinstance(tup[1], int) + + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + dfaa = df[["a", "a"]] + + assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] + + # repr with int on 32-bit/windows + if not (is_platform_windows() or not IS64): + assert ( + repr(list(df.itertuples(name=None))) + == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" + ) + + tup = next(df.itertuples(name="TestName")) + assert tup._fields == ("Index", "a", "b") + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == "TestName" + + df.columns = ["def", "return"] + tup2 = next(df.itertuples(name="TestName")) + assert tup2 == (0, 1, 4) + assert tup2._fields == ("Index", "_1", "_2") + + df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) + # will raise SyntaxError if trying to create namedtuple + tup3 = next(df3.itertuples()) + assert isinstance(tup3, tuple) + assert hasattr(tup3, "_fields") + + # GH#28282 + df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) + result_254_columns = next(df_254_columns.itertuples(index=False)) + assert isinstance(result_254_columns, tuple) + assert hasattr(result_254_columns, "_fields") + + df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) + result_255_columns = next(df_255_columns.itertuples(index=False)) + assert isinstance(result_255_columns, tuple) + assert hasattr(result_255_columns, "_fields") + + def test_sequence_like_with_categorical(self): + + # GH#7839 + # make sure can iterate + df = DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"]) + + # basic sequencing testing + result = list(df.grade.values) + expected = np.array(df.grade.values).tolist() + tm.assert_almost_equal(result, expected) + + # iteration + for t in df.itertuples(index=False): + str(t) + + for row, s in df.iterrows(): + str(s) + + for c, col in df.items(): + str(s) diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py new file mode 100644 index 00000000..f509ae52 --- /dev/null +++ b/pandas/tests/frame/test_logical_ops.py @@ -0,0 +1,191 @@ +import operator +import re + +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DataFrame, + Interval, + Series, + isnull, +) +import pandas._testing as tm + + +class TestDataFrameLogicalOperators: + # &, |, ^ + + @pytest.mark.parametrize( + "left, right, op, expected", + [ + ( + [True, False, np.nan], + [True, False, True], + operator.and_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.and_, + [True, False, False], + ), + ( + [True, False, np.nan], + [True, False, True], + operator.or_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.or_, + [True, False, True], + ), + ], + ) + def test_logical_operators_nans(self, left, right, op, expected, frame_or_series): + # GH#13896 + result = op(frame_or_series(left), frame_or_series(right)) + expected = frame_or_series(expected) + + tm.assert_equal(result, expected) + + def test_logical_ops_empty_frame(self): + # GH#5808 + # empty frames, non-mixed dtype + df = DataFrame(index=[1]) + + result = df & df + tm.assert_frame_equal(result, df) + + result = df | df + tm.assert_frame_equal(result, df) + + df2 = DataFrame(index=[1, 2]) + result = df & df2 + tm.assert_frame_equal(result, df2) + + dfa = DataFrame(index=[1], columns=["A"]) + + result = dfa & dfa + expected = DataFrame(False, index=[1], columns=["A"]) + tm.assert_frame_equal(result, expected) + + def test_logical_ops_bool_frame(self): + # GH#5808 + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_bool & df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + result = df1a_bool | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + def test_logical_ops_int_frame(self): + # GH#5808 + df1a_int = DataFrame(1, index=[1], columns=["A"]) + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_int | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + # Check that this matches Series behavior + res_ser = df1a_int["A"] | df1a_bool["A"] + tm.assert_series_equal(res_ser, df1a_bool["A"]) + + def test_logical_ops_invalid(self): + # GH#5808 + + df1 = DataFrame(1.0, index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + msg = re.escape("unsupported operand type(s) for |: 'float' and 'bool'") + with pytest.raises(TypeError, match=msg): + df1 | df2 + + df1 = DataFrame("foo", index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'") + with pytest.raises(TypeError, match=msg): + df1 | df2 + + def test_logical_operators(self): + def _check_bin_op(op): + result = op(df1, df2) + expected = DataFrame( + op(df1.values, df2.values), index=df1.index, columns=df1.columns + ) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + def _check_unary_op(op): + result = op(df1) + expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + df1 = { + "a": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": False, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "e": {"a": True, "b": False, "c": False, "d": True, "e": True}, + } + + df2 = { + "a": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": False, "b": False, "c": False, "d": True, "e": False}, + "e": {"a": False, "b": False, "c": False, "d": False, "e": True}, + } + + df1 = DataFrame(df1) + df2 = DataFrame(df2) + + _check_bin_op(operator.and_) + _check_bin_op(operator.or_) + _check_bin_op(operator.xor) + + _check_unary_op(operator.inv) # TODO: belongs elsewhere + + def test_logical_with_nas(self): + d = DataFrame({"a": [np.nan, False], "b": [True, True]}) + + # GH4947 + # bool comparisons should return bool + result = d["a"] | d["b"] + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + + # GH4604, automatic casting here + result = d["a"].fillna(False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + result = d["a"].fillna(False, downcast=False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + def test_logical_ops_categorical_columns(self): + # GH#38367 + intervals = [Interval(1, 2), Interval(3, 4)] + data = DataFrame( + [[1, np.nan], [2, np.nan]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + mask = DataFrame( + [[False, False], [False, False]], columns=data.columns, dtype=bool + ) + result = mask | isnull(data) + expected = DataFrame( + [[False, True], [False, True]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py new file mode 100644 index 00000000..38861a2b --- /dev/null +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -0,0 +1,346 @@ +import numpy as np +import pytest + +from pandas.compat import is_platform_windows + +import pandas as pd +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm + + +def check(result, expected=None): + if expected is not None: + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + +class TestDataFrameNonuniqueIndexes: + def test_setattr_columns_vs_construct_with_columns(self): + + # assignment + # GH 3687 + arr = np.random.randn(3, 2) + idx = list(range(2)) + df = DataFrame(arr, columns=["A", "A"]) + df.columns = idx + expected = DataFrame(arr, columns=idx) + check(df, expected) + + def test_setattr_columns_vs_construct_with_columns_datetimeindx(self): + idx = date_range("20130101", periods=4, freq="Q-NOV") + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=["a", "a", "a", "a"] + ) + df.columns = idx + expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx) + check(df, expected) + + def test_insert_with_duplicate_columns(self): + # insert + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + df["string"] = "bah" + expected = DataFrame( + [[1, 1, 1, 5, "bah"], [1, 1, 2, 5, "bah"], [2, 1, 3, 5, "bah"]], + columns=["foo", "bar", "foo", "hello", "string"], + ) + check(df, expected) + with pytest.raises(ValueError, match="Length of value"): + df.insert(0, "AnotherColumn", range(len(df.index) - 1)) + + # insert same dtype + df["foo2"] = 3 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 3], [1, 1, 2, 5, "bah", 3], [2, 1, 3, 5, "bah", 3]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # set (non-dup) + df["foo2"] = 4 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 4], [1, 1, 2, 5, "bah", 4], [2, 1, 3, 5, "bah", 4]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + df["foo2"] = 3 + + # delete (non dup) + del df["bar"] + expected = DataFrame( + [[1, 1, 5, "bah", 3], [1, 2, 5, "bah", 3], [2, 3, 5, "bah", 3]], + columns=["foo", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # try to delete again (its not consolidated) + del df["hello"] + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # consolidate + df = df._consolidate() + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # insert + df.insert(2, "new_col", 5.0) + expected = DataFrame( + [[1, 1, 5.0, "bah", 3], [1, 2, 5.0, "bah", 3], [2, 3, 5.0, "bah", 3]], + columns=["foo", "foo", "new_col", "string", "foo2"], + ) + check(df, expected) + + # insert a dup + with pytest.raises(ValueError, match="cannot insert"): + df.insert(2, "new_col", 4.0) + + df.insert(2, "new_col", 4.0, allow_duplicates=True) + expected = DataFrame( + [ + [1, 1, 4.0, 5.0, "bah", 3], + [1, 2, 4.0, 5.0, "bah", 3], + [2, 3, 4.0, 5.0, "bah", 3], + ], + columns=["foo", "foo", "new_col", "new_col", "string", "foo2"], + ) + check(df, expected) + + # delete (dup) + del df["foo"] + expected = DataFrame( + [[4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3]], + columns=["new_col", "new_col", "string", "foo2"], + ) + tm.assert_frame_equal(df, expected) + + def test_dup_across_dtypes(self): + # dup across dtypes + df = DataFrame( + [[1, 1, 1.0, 5], [1, 1, 2.0, 5], [2, 1, 3.0, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + check(df) + + df["foo2"] = 7.0 + expected = DataFrame( + [[1, 1, 1.0, 5, 7.0], [1, 1, 2.0, 5, 7.0], [2, 1, 3.0, 5, 7.0]], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + result = df["foo"] + expected = DataFrame([[1, 1.0], [1, 2.0], [2, 3.0]], columns=["foo", "foo"]) + check(result, expected) + + # multiple replacements + df["foo"] = "string" + expected = DataFrame( + [ + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + del df["foo"] + expected = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "hello", "foo2"] + ) + check(df, expected) + + def test_column_dups_indexes(self): + # check column dups with index equal and not equal to df's index + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "A"], + ) + for index in [df.index, pd.Index(list("edcba"))]: + this_df = df.copy() + expected_ser = Series(index.values, index=this_df.index) + expected_df = DataFrame( + {"A": expected_ser, "B": this_df["B"]}, + columns=["A", "B", "A"], + ) + this_df["A"] = index + check(this_df, expected_df) + + def test_changing_dtypes_with_duplicate_columns(self): + # multiple assignments that change dtypes + # the location indexer is a slice + # GH 6120 + df = DataFrame(np.random.randn(5, 2), columns=["that", "that"]) + expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) + + df["that"] = 1.0 + check(df, expected) + + df = DataFrame(np.random.rand(5, 2), columns=["that", "that"]) + expected = DataFrame(1, index=range(5), columns=["that", "that"]) + + df["that"] = 1 + check(df, expected) + + def test_dup_columns_comparisons(self): + # equality + df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) + df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) + + # not-comparing like-labelled + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 == df2 + + df1r = df1.reindex_like(df2) + result = df1r == df2 + expected = DataFrame( + [[False, True], [True, False], [False, False], [True, False]], + columns=["A", "A"], + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_column_selection(self): + # mixed column selection + # GH 5639 + dfbool = DataFrame( + { + "one": Series([True, True, False], index=["a", "b", "c"]), + "two": Series([False, False, True, False], index=["a", "b", "c", "d"]), + "three": Series([False, True, True, True], index=["a", "b", "c", "d"]), + } + ) + expected = pd.concat([dfbool["one"], dfbool["three"], dfbool["one"]], axis=1) + result = dfbool[["one", "three", "one"]] + check(result, expected) + + def test_multi_axis_dups(self): + # multi-axis dups + # GH 6121 + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]].copy() + expected = z.loc[["a", "c", "a"]] + + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]] + result = z.loc[["a", "c", "a"]] + check(result, expected) + + def test_columns_with_dups(self): + # GH 3468 related + + # basic + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["a", "a.1"] + str(df) + expected = DataFrame([[1, 2]], columns=["a", "a.1"]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3]], columns=["b", "a", "a"]) + df.columns = ["b", "a", "a.1"] + str(df) + expected = DataFrame([[1, 2, 3]], columns=["b", "a", "a.1"]) + tm.assert_frame_equal(df, expected) + + def test_columns_with_dup_index(self): + # with a dup index + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["b", "b"] + str(df) + expected = DataFrame([[1, 2]], columns=["b", "b"]) + tm.assert_frame_equal(df, expected) + + def test_multi_dtype(self): + # multi-dtype + df = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], + columns=["a", "a", "b", "b", "d", "c", "c"], + ) + df.columns = list("ABCDEFG") + str(df) + expected = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + def test_multi_dtype2(self): + df = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a", "a", "a"]) + df.columns = ["a", "a.1", "a.2", "a.3"] + str(df) + expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) + tm.assert_frame_equal(df, expected) + + def test_dups_across_blocks(self, using_array_manager): + # dups across blocks + df_float = DataFrame(np.random.randn(10, 3), dtype="float64") + df_int = DataFrame(np.random.randn(10, 3).astype("int64")) + df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) + df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) + df_dt = DataFrame( + pd.Timestamp("20010101"), index=df_float.index, columns=df_float.columns + ) + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + if not using_array_manager: + assert len(df._mgr.blknos) == len(df.columns) + assert len(df._mgr.blklocs) == len(df.columns) + + # testing iloc + for i in range(len(df.columns)): + df.iloc[:, i] + + def test_dup_columns_across_dtype(self): + # dup columns across dtype GH 2079/2194 + vals = [[1, -1, 2.0], [2, -2, 3.0]] + rs = DataFrame(vals, columns=["A", "A", "B"]) + xp = DataFrame(vals) + xp.columns = ["A", "A", "B"] + tm.assert_frame_equal(rs, xp) + + def test_set_value_by_index(self, using_array_manager): + # See gh-12344 + warn = ( + DeprecationWarning + if using_array_manager and not is_platform_windows() + else None + ) + msg = "will attempt to set the values inplace" + + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = list("AAA") + expected = df.iloc[:, 2] + + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 2], expected) + + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = [2, float(2), str(2)] + expected = df.iloc[:, 1] + + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 1], expected) diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py new file mode 100644 index 00000000..0b7699e4 --- /dev/null +++ b/pandas/tests/frame/test_npfuncs.py @@ -0,0 +1,28 @@ +""" +Tests for np.foo applied to DataFrame, not necessarily ufuncs. +""" +import numpy as np + +from pandas import ( + Categorical, + DataFrame, +) +import pandas._testing as tm + + +class TestAsArray: + def test_asarray_homogenous(self): + df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])}) + result = np.asarray(df) + # may change from object in the future + expected = np.array([[1, 1], [2, 2]], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_np_sqrt(self, float_frame): + with np.errstate(all="ignore"): + result = np.sqrt(float_frame) + assert isinstance(result, type(float_frame)) + assert result.index is float_frame.index + assert result.columns is float_frame.columns + + tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py new file mode 100644 index 00000000..35335c54 --- /dev/null +++ b/pandas/tests/frame/test_query_eval.py @@ -0,0 +1,1270 @@ +import operator + +import numpy as np +import pytest + +from pandas.errors import UndefinedVariableError +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.computation.check import NUMEXPR_INSTALLED + + +@pytest.fixture(params=["python", "pandas"], ids=lambda x: x) +def parser(request): + return request.param + + +@pytest.fixture( + params=["python", pytest.param("numexpr", marks=td.skip_if_no_ne)], ids=lambda x: x +) +def engine(request): + return request.param + + +def skip_if_no_pandas_parser(parser): + if parser != "pandas": + pytest.skip(f"cannot evaluate with parser {repr(parser)}") + + +class TestCompat: + def setup_method(self): + self.df = DataFrame({"A": [1, 2, 3]}) + self.expected1 = self.df[self.df.A > 0] + self.expected2 = self.df.A + 1 + + def test_query_default(self): + + # GH 12749 + # this should always work, whether NUMEXPR_INSTALLED or not + df = self.df + result = df.query("A>0") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_None(self): + + df = self.df + result = df.query("A>0", engine=None) + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine=None) + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_python(self): + + df = self.df + result = df.query("A>0", engine="python") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="python") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_numexpr(self): + + df = self.df + if NUMEXPR_INSTALLED: + result = df.query("A>0", engine="numexpr") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="numexpr") + tm.assert_series_equal(result, self.expected2, check_names=False) + else: + msg = ( + r"'numexpr' is not installed or an unsupported version. " + r"Cannot use engine='numexpr' for query/eval if 'numexpr' is " + r"not installed" + ) + with pytest.raises(ImportError, match=msg): + df.query("A>0", engine="numexpr") + with pytest.raises(ImportError, match=msg): + df.eval("A+1", engine="numexpr") + + +class TestDataFrameEval: + + # smaller hits python, larger hits numexpr + @pytest.mark.parametrize("n", [4, 4000]) + @pytest.mark.parametrize( + "op_str,op,rop", + [ + ("+", "__add__", "__radd__"), + ("-", "__sub__", "__rsub__"), + ("*", "__mul__", "__rmul__"), + ("/", "__truediv__", "__rtruediv__"), + ], + ) + def test_ops(self, op_str, op, rop, n): + + # tst ops and reversed ops in evaluation + # GH7198 + + df = DataFrame(1, index=range(n), columns=list("abcd")) + df.iloc[0] = 2 + m = df.mean() + + base = DataFrame( # noqa:F841 + np.tile(m.values, n).reshape(n, -1), columns=list("abcd") + ) + + expected = eval(f"base {op_str} df") + + # ops as strings + result = eval(f"m {op_str} df") + tm.assert_frame_equal(result, expected) + + # these are commutative + if op in ["+", "*"]: + result = getattr(df, op)(m) + tm.assert_frame_equal(result, expected) + + # these are not + elif op in ["-", "/"]: + result = getattr(df, rop)(m) + tm.assert_frame_equal(result, expected) + + def test_dataframe_sub_numexpr_path(self): + # GH7192: Note we need a large number of rows to ensure this + # goes through the numexpr path + df = DataFrame({"A": np.random.randn(25000)}) + df.iloc[0:5] = np.nan + expected = 1 - np.isnan(df.iloc[0:25]) + result = (1 - np.isnan(df)).iloc[0:25] + tm.assert_frame_equal(result, expected) + + def test_query_non_str(self): + # GH 11485 + df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]}) + + msg = "expr must be a string to be evaluated" + with pytest.raises(ValueError, match=msg): + df.query(lambda x: x.B == "b") + + with pytest.raises(ValueError, match=msg): + df.query(111) + + def test_query_empty_string(self): + # GH 13139 + df = DataFrame({"A": [1, 2, 3]}) + + msg = "expr cannot be an empty string" + with pytest.raises(ValueError, match=msg): + df.query("") + + def test_eval_resolvers_as_list(self): + # GH 14095 + df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + dict1 = {"a": 1} + dict2 = {"b": 2} + assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + assert pd.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + + def test_eval_resolvers_combined(self): + # GH 34966 + df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + dict1 = {"c": 2} + + # Both input and default index/column resolvers should be usable + result = df.eval("a + b * c", resolvers=[dict1]) + + expected = df["a"] + df["b"] * dict1["c"] + tm.assert_series_equal(result, expected) + + def test_eval_object_dtype_binop(self): + # GH#24883 + df = DataFrame({"a1": ["Y", "N"]}) + res = df.eval("c = ((a1 == 'Y') & True)") + expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]}) + tm.assert_frame_equal(res, expected) + + +class TestDataFrameQueryWithMultiIndex: + def test_query_with_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b], names=["color", "food"]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series( + df.index.get_level_values("color").values, index=index, name="color" + ) + + # equality + res1 = df.query('color == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == color', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('color != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != color', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('color == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('color != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in color', parser=parser, engine=engine) + res2 = df.query('"red" in color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in color', parser=parser, engine=engine) + res2 = df.query('"red" not in color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_unnamed_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series(df.index.get_level_values(0).values, index=index) + + res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # ## LEVEL 1 + ind = Series(df.index.get_level_values(1).values, index=index) + res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) + exp = df[ind == "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) + exp = df[ind != "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_partially_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.arange(10) + index = MultiIndex.from_arrays([a, b]) + index.names = [None, "rating"] + df = DataFrame(np.random.randn(10, 2), index=index) + res = df.query("rating == 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind == 1] + tm.assert_frame_equal(res, exp) + + res = df.query("rating != 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind != 1] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind == "red"] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind != "red"] + tm.assert_frame_equal(res, exp) + + def test_query_multiindex_get_index_resolvers(self): + df = tm.makeCustomDataframe( + 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"] + ) + resolvers = df._get_index_resolvers() + + def to_series(mi, level): + level_values = mi.get_level_values(level) + s = level_values.to_series() + s.index = mi + return s + + col_series = df.columns.to_series() + expected = { + "index": df.index, + "columns": col_series, + "spam": to_series(df.index, "spam"), + "eggs": to_series(df.index, "eggs"), + "C0": col_series, + } + for k, v in resolvers.items(): + if isinstance(v, Index): + assert v.is_(expected[k]) + elif isinstance(v, Series): + tm.assert_series_equal(v, expected[k]) + else: + raise AssertionError("object must be a Series or Index") + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPandas: + @classmethod + def setup_class(cls): + cls.engine = "numexpr" + cls.parser = "pandas" + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + + def test_date_query_with_attribute_access(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + d = {} + d["dates1"] = date_range("1/1/2012", periods=n) + d["dates3"] = date_range("1/1/2014", periods=n) + df = DataFrame(d) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_non_date(self): + engine, parser = self.engine, self.parser + + n = 10 + df = DataFrame( + {"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)} + ) + + result = df.query("dates == nondate", parser=parser, engine=engine) + assert len(result) == 0 + + result = df.query("dates != nondate", parser=parser, engine=engine) + tm.assert_frame_equal(result, df) + + msg = r"Invalid comparison between dtype=datetime64\[ns\] and ndarray" + for op in ["<", ">", "<=", ">="]: + with pytest.raises(TypeError, match=msg): + df.query(f"dates {op} nondate", parser=parser, engine=engine) + + def test_query_syntax_error(self): + engine, parser = self.engine, self.parser + df = DataFrame({"i": range(10), "+": range(3, 13), "r": range(4, 14)}) + msg = "invalid syntax" + with pytest.raises(SyntaxError, match=msg): + df.query("i - +", engine=engine, parser=parser) + + def test_query_scope(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(20, 2), columns=list("ab")) + + a, b = 1, 2 # noqa:F841 + res = df.query("a > b", engine=engine, parser=parser) + expected = df[df.a > df.b] + tm.assert_frame_equal(res, expected) + + res = df.query("@a > b", engine=engine, parser=parser) + expected = df[a > df.b] + tm.assert_frame_equal(res, expected) + + # no local variable c + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("@a > b > @c", engine=engine, parser=parser) + + # no column named 'c' + with pytest.raises(UndefinedVariableError, match="name 'c' is not defined"): + df.query("@a > b > c", engine=engine, parser=parser) + + def test_query_doesnt_pickup_local(self): + engine, parser = self.engine, self.parser + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + # we don't pick up the local 'sin' + with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query_builtin(self): + from pandas.errors import NumExprClobberingError + + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + msg = "Variables in expression.+" + with pytest.raises(NumExprClobberingError, match=msg): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + + tm.assert_frame_equal( + df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] + ) + tm.assert_frame_equal( + df.query("a + b > b * c", engine=engine, parser=parser), + df[df.a + df.b > df.b * df.c], + ) + + def test_query_index_with_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=Index(range(10), name="blob"), + columns=["a", "b", "c"], + ) + res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser) + expec = df[(df.index < 5) & (df.a < df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("blob < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + + tm.assert_frame_equal(res, expec) + + def test_query_index_without_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=range(10), + columns=["a", "b", "c"], + ) + + # "index" should refer to the index + res = df.query("index < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + tm.assert_frame_equal(res, expec) + + # test against a scalar + res = df.query("index < 5", engine=engine, parser=parser) + expec = df[df.index < 5] + tm.assert_frame_equal(res, expec) + + def test_nested_scope(self): + engine = self.engine + parser = self.parser + + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + expected = df[(df > 0) & (df2 > 0)] + + result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval( + "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser + ) + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser) + expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + def test_nested_raises_on_local_self_reference(self): + df = DataFrame(np.random.randn(5, 3)) + + # can't reference ourself b/c we're a local so @ is necessary + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("df > 0", engine=self.engine, parser=self.parser) + + def test_local_syntax(self): + skip_if_no_pandas_parser(self.parser) + + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij")) + b = 1 + expect = df[df.a < b] + result = df.query("a < @b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + expect = df[df.a < df.b] + result = df.query("a < b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + def test_chained_cmp_and_in(self): + skip_if_no_pandas_parser(self.parser) + engine, parser = self.engine, self.parser + cols = list("abc") + df = DataFrame(np.random.randn(100, len(cols)), columns=cols) + res = df.query( + "a < b < c and a not in b not in c", engine=engine, parser=parser + ) + ind = (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b) + expec = df[ind] + tm.assert_frame_equal(res, expec) + + def test_local_variable_with_in(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + a = Series(np.random.randint(3, size=15), name="a") + b = Series(np.random.randint(10, size=15), name="b") + df = DataFrame({"a": a, "b": b}) + + expected = df.loc[(df.b - 1).isin(a)] + result = df.query("b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + b = Series(np.random.randint(10, size=15), name="b") + expected = df.loc[(b - 1).isin(a)] + result = df.query("@b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + def test_at_inside_string(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + c = 1 # noqa:F841 + df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]}) + result = df.query('a == "@c"', engine=engine, parser=parser) + expected = df[df.a == "@c"] + tm.assert_frame_equal(result, expected) + + def test_query_undefined_local(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.rand(10, 2), columns=list("ab")) + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("a == @c", engine=engine, parser=parser) + + def test_index_resolvers_come_after_columns_with_the_same_name(self): + n = 1 # noqa:F841 + a = np.r_[20:101:20] + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + df.index.name = "index" + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df[df["index"] > 5] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": a, "b": np.random.randn(a.size)}) + df.index.name = "a" + result = df.query("a > 5", engine=self.engine, parser=self.parser) + expected = df[df.a > 5] + tm.assert_frame_equal(result, expected) + + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op, f", [["==", operator.eq], ["!=", operator.ne]]) + def test_inf(self, op, f): + n = 10 + df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)}) + df.loc[::2, 0] = np.inf + q = f"a {op} inf" + expected = df[f(df.a, np.inf)] + result = df.query(q, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) + + def test_check_tz_aware_index_query(self, tz_aware_fixture): + # https://github.com/pandas-dev/pandas/issues/29463 + tz = tz_aware_fixture + df_index = date_range( + start="2019-01-01", freq="1d", periods=10, tz=tz, name="time" + ) + expected = DataFrame(index=df_index) + df = DataFrame(index=df_index) + result = df.query('"2018-01-03 00:00:00+00" < time') + tm.assert_frame_equal(result, expected) + + expected = DataFrame(df_index) + result = df.reset_index().query('"2018-01-03 00:00:00+00" < time') + tm.assert_frame_equal(result, expected) + + def test_method_calls_in_query(self): + # https://github.com/pandas-dev/pandas/issues/22435 + n = 10 + df = DataFrame({"a": 2 * np.random.rand(n), "b": np.random.rand(n)}) + expected = df[df["a"].astype("int") == 0] + result = df.query( + "a.astype('int') == 0", engine=self.engine, parser=self.parser + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "a": np.where(np.random.rand(n) < 0.5, np.nan, np.random.randn(n)), + "b": np.random.randn(n), + } + ) + expected = df[df["a"].notnull()] + result = df.query("a.notnull()", engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "python" + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + return_value = df.set_index("dates1", inplace=True, drop=True) + assert return_value is None + msg = r"'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("index < 20130101 < dates3", engine=engine, parser=parser) + + def test_nested_scope(self): + engine = self.engine + parser = self.parser + # smoke test + x = 1 # noqa:F841 + result = pd.eval("x + 1", engine=engine, parser=parser) + assert result == 2 + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + + # don't have the pandas parser + msg = r"The '@' prefix is only supported by the pandas parser" + with pytest.raises(SyntaxError, match=msg): + df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("(df>0) & (df2>0)", engine=engine, parser=parser) + + expected = df[(df > 0) & (df2 > 0)] + result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + result = pd.eval( + "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser + ) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = cls.parser = "python" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryStrings: + def test_str_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings == "a"] + + if parser != "pandas": + col = "strings" + lst = '"a"' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + msg = r"'(Not)?In' nodes are not implemented" + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = f"{lhs} {op} {rhs}" + with pytest.raises(NotImplementedError, match=msg): + df.query( + ex, + engine=engine, + parser=parser, + local_dict={"strings": df.strings}, + ) + else: + res = df.query('"a" == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('strings == "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[df.strings.isin(["a"])]) + + expect = df[df.strings != "a"] + res = df.query('strings != "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('"a" != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) + + def test_str_list_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings.isin(["a", "b"])] + + if parser != "pandas": + col = "strings" + lst = '["a", "b"]' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + msg = r"'(Not)?In' nodes are not implemented" + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = f"{lhs} {op} {rhs}" + with pytest.raises(NotImplementedError, match=msg): + df.query(ex, engine=engine, parser=parser) + else: + res = df.query('strings == ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + expect = df[~df.strings.isin(["a", "b"])] + + res = df.query('strings != ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + def test_query_with_string_columns(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + if parser == "pandas": + res = df.query("a in b", parser=parser, engine=engine) + expec = df[df.a.isin(df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("a in b and c < d", parser=parser, engine=engine) + expec = df[df.a.isin(df.b) & (df.c < df.d)] + tm.assert_frame_equal(res, expec) + else: + msg = r"'(Not)?In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("a in b", parser=parser, engine=engine) + + msg = r"'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query("a in b and c < d", parser=parser, engine=engine) + + def test_object_array_eq_ne(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + res = df.query("a == b", parser=parser, engine=engine) + exp = df[df.a == df.b] + tm.assert_frame_equal(res, exp) + + res = df.query("a != b", parser=parser, engine=engine) + exp = df[df.a != df.b] + tm.assert_frame_equal(res, exp) + + def test_query_with_nested_strings(self, parser, engine): + skip_if_no_pandas_parser(parser) + events = [ + f"page {n} {act}" for n in range(1, 4) for act in ["load", "exit"] + ] * 2 + stamps1 = date_range("2014-01-01 0:00:01", freq="30s", periods=6) + stamps2 = date_range("2014-02-01 1:00:01", freq="30s", periods=6) + df = DataFrame( + { + "id": np.arange(1, 7).repeat(2), + "event": events, + "timestamp": stamps1.append(stamps2), + } + ) + + expected = df[df.event == '"page 1 load"'] + res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) + tm.assert_frame_equal(expected, res) + + def test_query_with_nested_special_character(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]}) + res = df.query('a == "test & test"', parser=parser, engine=engine) + expec = df[df.a == "test & test"] + tm.assert_frame_equal(res, expec) + + @pytest.mark.parametrize( + "op, func", + [ + ["<", operator.lt], + [">", operator.gt], + ["<=", operator.le], + [">=", operator.ge], + ], + ) + def test_query_lex_compare_strings(self, parser, engine, op, func): + + a = Series(np.random.choice(list("abcde"), 20)) + b = Series(np.arange(a.size)) + df = DataFrame({"X": a, "Y": b}) + + res = df.query(f'X {op} "d"', engine=engine, parser=parser) + expected = df[func(df.X, "d")] + tm.assert_frame_equal(res, expected) + + def test_query_single_element_booleans(self, parser, engine): + columns = "bid", "bidsize", "ask", "asksize" + data = np.random.randint(2, size=(1, len(columns))).astype(bool) + df = DataFrame(data, columns=columns) + res = df.query("bid & ask", engine=engine, parser=parser) + expected = df[df.bid & df.ask] + tm.assert_frame_equal(res, expected) + + def test_query_string_scalar_variable(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = DataFrame( + { + "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"], + "Price": [109.70, 109.72, 183.30, 183.35], + } + ) + e = df[df.Symbol == "BUD US"] + symb = "BUD US" # noqa:F841 + r = df.query("Symbol == @symb", parser=parser, engine=engine) + tm.assert_frame_equal(e, r) + + +class TestDataFrameEvalWithFrame: + @pytest.fixture + def frame(self): + return DataFrame(np.random.randn(10, 3), columns=list("abc")) + + def test_simple_expr(self, frame, parser, engine): + res = frame.eval("a + b", engine=engine, parser=parser) + expect = frame.a + frame.b + tm.assert_series_equal(res, expect) + + def test_bool_arith_expr(self, frame, parser, engine): + res = frame.eval("a[a < 1] + b", engine=engine, parser=parser) + expect = frame.a[frame.a < 1] + frame.b + tm.assert_series_equal(res, expect) + + @pytest.mark.parametrize("op", ["+", "-", "*", "/"]) + def test_invalid_type_for_operator_raises(self, parser, engine, op): + df = DataFrame({"a": [1, 2], "b": ["c", "d"]}) + msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'" + + with pytest.raises(TypeError, match=msg): + df.eval(f"a {op} b", engine=engine, parser=parser) + + +class TestDataFrameQueryBacktickQuoting: + @pytest.fixture(scope="class") + def df(self): + """ + Yields a dataframe with strings that may or may not need escaping + by backticks. The last two columns cannot be escaped by backticks + and should raise a ValueError. + """ + yield DataFrame( + { + "A": [1, 2, 3], + "B B": [3, 2, 1], + "C C": [4, 5, 6], + "C C": [7, 4, 3], + "C_C": [8, 9, 10], + "D_D D": [11, 1, 101], + "E.E": [6, 3, 5], + "F-F": [8, 1, 10], + "1e1": [2, 4, 8], + "def": [10, 11, 2], + "A (x)": [4, 1, 3], + "B(x)": [1, 1, 5], + "B (x)": [2, 7, 4], + " &^ :!€$?(} > <++*'' ": [2, 5, 6], + "": [10, 11, 1], + " A": [4, 7, 9], + " ": [1, 2, 1], + "it's": [6, 3, 1], + "that's": [9, 1, 8], + "☺": [8, 7, 6], + "foo#bar": [2, 4, 5], + 1: [5, 7, 9], + } + ) + + def test_single_backtick_variable_query(self, df): + res = df.query("1 < `B B`") + expect = df[1 < df["B B"]] + tm.assert_frame_equal(res, expect) + + def test_two_backtick_variables_query(self, df): + res = df.query("1 < `B B` and 4 < `C C`") + expect = df[(1 < df["B B"]) & (4 < df["C C"])] + tm.assert_frame_equal(res, expect) + + def test_single_backtick_variable_expr(self, df): + res = df.eval("A + `B B`") + expect = df["A"] + df["B B"] + tm.assert_series_equal(res, expect) + + def test_two_backtick_variables_expr(self, df): + res = df.eval("`B B` + `C C`") + expect = df["B B"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_already_underscore_variable(self, df): + res = df.eval("`C_C` + A") + expect = df["C_C"] + df["A"] + tm.assert_series_equal(res, expect) + + def test_same_name_but_underscores(self, df): + res = df.eval("C_C + `C C`") + expect = df["C_C"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_mixed_underscores_and_spaces(self, df): + res = df.eval("A + `D_D D`") + expect = df["A"] + df["D_D D"] + tm.assert_series_equal(res, expect) + + def test_backtick_quote_name_with_no_spaces(self, df): + res = df.eval("A + `C_C`") + expect = df["A"] + df["C_C"] + tm.assert_series_equal(res, expect) + + def test_special_characters(self, df): + res = df.eval("`E.E` + `F-F` - A") + expect = df["E.E"] + df["F-F"] - df["A"] + tm.assert_series_equal(res, expect) + + def test_start_with_digit(self, df): + res = df.eval("A + `1e1`") + expect = df["A"] + df["1e1"] + tm.assert_series_equal(res, expect) + + def test_keyword(self, df): + res = df.eval("A + `def`") + expect = df["A"] + df["def"] + tm.assert_series_equal(res, expect) + + def test_unneeded_quoting(self, df): + res = df.query("`A` > 2") + expect = df[df["A"] > 2] + tm.assert_frame_equal(res, expect) + + def test_parenthesis(self, df): + res = df.query("`A (x)` > 2") + expect = df[df["A (x)"] > 2] + tm.assert_frame_equal(res, expect) + + def test_empty_string(self, df): + res = df.query("`` > 5") + expect = df[df[""] > 5] + tm.assert_frame_equal(res, expect) + + def test_multiple_spaces(self, df): + res = df.query("`C C` > 5") + expect = df[df["C C"] > 5] + tm.assert_frame_equal(res, expect) + + def test_start_with_spaces(self, df): + res = df.eval("` A` + ` `") + expect = df[" A"] + df[" "] + tm.assert_series_equal(res, expect) + + def test_lots_of_operators_string(self, df): + res = df.query("` &^ :!€$?(} > <++*'' ` > 4") + expect = df[df[" &^ :!€$?(} > <++*'' "] > 4] + tm.assert_frame_equal(res, expect) + + def test_missing_attribute(self, df): + message = "module 'pandas' has no attribute 'thing'" + with pytest.raises(AttributeError, match=message): + df.eval("@pd.thing") + + def test_failing_quote(self, df): + msg = r"(Could not convert ).*( to a valid Python identifier.)" + with pytest.raises(SyntaxError, match=msg): + df.query("`it's` > `that's`") + + def test_failing_character_outside_range(self, df): + msg = r"(Could not convert ).*( to a valid Python identifier.)" + with pytest.raises(SyntaxError, match=msg): + df.query("`☺` > 4") + + def test_failing_hashtag(self, df): + msg = "Failed to parse backticks" + with pytest.raises(SyntaxError, match=msg): + df.query("`foo#bar` > 4") + + def test_call_non_named_expression(self, df): + """ + Only attributes and variables ('named functions') can be called. + .__call__() is not an allowed attribute because that would allow + calling anything. + https://github.com/pandas-dev/pandas/pull/32460 + """ + + def func(*_): + return 1 + + funcs = [func] # noqa:F841 + + df.eval("@func()") + + with pytest.raises(TypeError, match="Only named functions are supported"): + df.eval("@funcs[0]()") + + with pytest.raises(TypeError, match="Only named functions are supported"): + df.eval("@funcs[0].__call__()") diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py new file mode 100644 index 00000000..b4d3d1ae --- /dev/null +++ b/pandas/tests/frame/test_reductions.py @@ -0,0 +1,1887 @@ +from datetime import timedelta +from decimal import Decimal +import inspect +import re + +from dateutil.tz import tzlocal +import numpy as np +import pytest + +from pandas._libs import lib +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +import pandas.core.algorithms as algorithms +import pandas.core.nanops as nanops + + +def assert_stat_op_calc( + opname, + alternative, + frame, + has_skipna=True, + check_dtype=True, + check_dates=False, + rtol=1e-5, + atol=1e-8, + skipna_alternative=None, +): + """ + Check that operator opname works as advertised on frame + + Parameters + ---------- + opname : str + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + check_dtype : bool, default True + Whether the dtypes of the result of "frame.opname()" and + "alternative(frame)" should be checked. + check_dates : bool, default false + Whether opname should be tested on a Datetime Series + rtol : float, default 1e-5 + Relative tolerance. + atol : float, default 1e-8 + Absolute tolerance. + skipna_alternative : function, default None + NaN-safe version of alternative + """ + warn = FutureWarning if opname == "mad" else None + f = getattr(frame, opname) + + if check_dates: + expected_warning = FutureWarning if opname in ["mean", "median"] else None + df = DataFrame({"b": date_range("1/1/2001", periods=2)}) + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() + assert isinstance(result, Series) + + df["a"] = range(len(df)) + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + + def wrapper(x): + return alternative(x.values) + + skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal( + result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol + ) + tm.assert_series_equal( + result1, + frame.apply(wrapper, axis=1), + rtol=rtol, + atol=atol, + ) + else: + skipna_wrapper = alternative + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal( + result0, + frame.apply(skipna_wrapper), + check_dtype=check_dtype, + rtol=rtol, + atol=atol, + ) + + if opname in ["sum", "prod"]: + expected = frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal( + result1, expected, check_dtype=False, rtol=rtol, atol=atol + ) + + # check dtypes + if check_dtype: + lcd_dtype = frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ["sum", "prod"]: + unit = 1 if opname == "prod" else 0 # result for empty sum/prod + expected = Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + +class TestDataFrameAnalytics: + + # --------------------------------------------------------------------- + # Reductions + @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize( + "opname", + [ + "count", + "sum", + "mean", + "product", + "median", + "min", + "max", + "nunique", + "mad", + "var", + "std", + "sem", + pytest.param("skew", marks=td.skip_if_no_scipy), + pytest.param("kurt", marks=td.skip_if_no_scipy), + ], + ) + def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): + warn = FutureWarning if opname == "mad" else None + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + getattr(float_string_frame, opname)(axis=axis) + if opname not in ("nunique", "mad"): + getattr(float_string_frame, opname)(axis=axis, numeric_only=True) + + @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize( + "opname", + [ + "count", + "sum", + "mean", + "product", + "median", + "min", + "max", + "var", + "std", + "sem", + pytest.param("skew", marks=td.skip_if_no_scipy), + pytest.param("kurt", marks=td.skip_if_no_scipy), + ], + ) + def test_stat_op_api_float_frame(self, float_frame, axis, opname): + getattr(float_frame, opname)(axis=axis, numeric_only=False) + + def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame): + def count(s): + return notna(s).sum() + + def nunique(s): + return len(algorithms.unique1d(s.dropna())) + + def mad(x): + return np.abs(x - x.mean()).mean() + + def var(x): + return np.var(x, ddof=1) + + def std(x): + return np.std(x, ddof=1) + + def sem(x): + return np.std(x, ddof=1) / np.sqrt(len(x)) + + assert_stat_op_calc( + "nunique", + nunique, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + # GH#32571 check_less_precise is needed on apparently-random + # py37-npdev builds and OSX-PY36-min_version builds + # mixed types (with upcasting happening) + assert_stat_op_calc( + "sum", + np.sum, + mixed_float_frame.astype("float32"), + check_dtype=False, + rtol=1e-3, + ) + + assert_stat_op_calc( + "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum + ) + assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True) + assert_stat_op_calc( + "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod + ) + + assert_stat_op_calc("mad", mad, float_frame_with_na) + assert_stat_op_calc("var", var, float_frame_with_na) + assert_stat_op_calc("std", std, float_frame_with_na) + assert_stat_op_calc("sem", sem, float_frame_with_na) + + assert_stat_op_calc( + "count", + count, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + @td.skip_if_no_scipy + def test_stat_op_calc_skew_kurtosis(self, float_frame_with_na): + def skewness(x): + from scipy.stats import skew + + if len(x) < 3: + return np.nan + return skew(x, bias=False) + + def kurt(x): + from scipy.stats import kurtosis + + if len(x) < 4: + return np.nan + return kurtosis(x, bias=False) + + assert_stat_op_calc("skew", skewness, float_frame_with_na) + assert_stat_op_calc("kurt", kurt, float_frame_with_na) + + # TODO: Ensure warning isn't emitted in the first place + # ignore mean of empty slice and all-NaN + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_median(self, float_frame_with_na, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc("median", wrapper, float_frame_with_na, check_dates=True) + assert_stat_op_calc( + "median", wrapper, int_frame, check_dtype=False, check_dates=True + ) + + @pytest.mark.parametrize( + "method", ["sum", "mean", "prod", "var", "std", "skew", "min", "max"] + ) + @pytest.mark.parametrize( + "df", + [ + DataFrame( + { + "a": [ + -0.00049987540199591344, + -0.0016467257772919831, + 0.00067695870775883013, + ], + "b": [-0, -0, 0.0], + "c": [ + 0.00031111847529610595, + 0.0014902627951905339, + -0.00094099200035979691, + ], + }, + index=["foo", "bar", "baz"], + dtype="O", + ), + DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object), + ], + ) + def test_stat_operators_attempt_obj_array(self, method, df): + # GH#676 + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype("f8"), method)(1) + + if method in ["sum", "prod"]: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"]) + def test_mixed_ops(self, op): + # GH#16116 + df = DataFrame( + { + "int": [1, 2, 3, 4], + "float": [1.0, 2.0, 3.0, 4.0], + "str": ["a", "b", "c", "d"], + } + ) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context("use_bottleneck", False): + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() + assert len(result) == 2 + + def test_reduce_mixed_frame(self): + # GH 6806 + df = DataFrame( + { + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + "string_data": ["a", "b", "c", "d", "e"], + } + ) + df.reindex(columns=["bool_data", "int_data", "string_data"]) + test = df.sum(axis=0) + tm.assert_numpy_array_equal( + test.values, np.array([2, 150, "abcde"], dtype=object) + ) + alt = df.T.sum(axis=1) + tm.assert_series_equal(test, alt) + + def test_nunique(self): + df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]}) + tm.assert_series_equal(df.nunique(), Series({"A": 1, "B": 3, "C": 2})) + tm.assert_series_equal( + df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3}) + ) + tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) + tm.assert_series_equal( + df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}) + ) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_mixed_datetime_numeric(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2}) + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + expected = Series([1.0], index=["A"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_excludes_datetimes(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + # Our long-term desired behavior is unclear, but the behavior in + # 0.24.0rc1 was buggy. + df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2}) + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + + expected = Series(dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_mean_mixed_string_decimal(self): + # GH 11670 + # possible bug when calculating mean of DataFrame? + + d = [ + {"A": 2, "B": None, "C": Decimal("628.00")}, + {"A": 1, "B": None, "C": Decimal("383.00")}, + {"A": 3, "B": None, "C": Decimal("651.00")}, + {"A": 2, "B": None, "C": Decimal("575.00")}, + {"A": 4, "B": None, "C": Decimal("1114.00")}, + {"A": 1, "B": "TEST", "C": Decimal("241.00")}, + {"A": 2, "B": None, "C": Decimal("572.00")}, + {"A": 4, "B": None, "C": Decimal("609.00")}, + {"A": 3, "B": None, "C": Decimal("820.00")}, + {"A": 5, "B": None, "C": Decimal("1223.00")}, + ] + + df = DataFrame(d) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([2.7, 681.6], index=["A", "C"]) + tm.assert_series_equal(result, expected) + + def test_var_std(self, datetime_frame): + result = datetime_frame.std(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4)) + tm.assert_almost_equal(result, expected) + + result = datetime_frame.var(ddof=4) + expected = datetime_frame.apply(lambda x: x.var(ddof=4)) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + @pytest.mark.parametrize("meth", ["sem", "var", "std"]) + def test_numeric_only_flag(self, meth): + # GH 9201 + df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a number in str format + df1.loc[0, "foo"] = "100" + + df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a non-number str + df2.loc[0, "foo"] = "a" + + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + # df1 has all numbers, df2 has a letter inside + msg = r"unsupported operand type\(s\) for -: 'float' and 'str'" + with pytest.raises(TypeError, match=msg): + getattr(df1, meth)(axis=1, numeric_only=False) + msg = "could not convert string to float: 'a'" + with pytest.raises(TypeError, match=msg): + getattr(df2, meth)(axis=1, numeric_only=False) + + def test_sem(self, datetime_frame): + result = datetime_frame.sem(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x))) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + @td.skip_if_no_scipy + def test_kurt(self): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + df = DataFrame(np.random.randn(6, 3), index=index) + + kurt = df.kurt() + with tm.assert_produces_warning(FutureWarning): + kurt2 = df.kurt(level=0).xs("bar") + tm.assert_series_equal(kurt, kurt2, check_names=False) + assert kurt.name is None + assert kurt2.name == "bar" + + @pytest.mark.parametrize( + "dropna, expected", + [ + ( + True, + { + "A": [12], + "B": [10.0], + "C": [1.0], + "D": ["a"], + "E": Categorical(["a"], categories=["a"]), + "F": to_datetime(["2000-1-2"]), + "G": to_timedelta(["1 days"]), + }, + ), + ( + False, + { + "A": [12], + "B": [10.0], + "C": [np.nan], + "D": np.array([np.nan], dtype=object), + "E": Categorical([np.nan], categories=["a"]), + "F": [pd.NaT], + "G": to_timedelta([pd.NaT]), + }, + ), + ( + True, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical(["a", np.nan, np.nan, np.nan], categories=["a"]), + "L": to_datetime(["2000-1-2", "NaT", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ( + False, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical([np.nan, "a", np.nan, np.nan], categories=["a"]), + "L": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["nan", "1 days", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ], + ) + def test_mode_dropna(self, dropna, expected): + + df = DataFrame( + { + "A": [12, 12, 19, 11], + "B": [10, 10, np.nan, 3], + "C": [1, np.nan, np.nan, np.nan], + "D": [np.nan, np.nan, "a", np.nan], + "E": Categorical([np.nan, np.nan, "a", np.nan]), + "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "G": to_timedelta(["1 days", "nan", "nan", "nan"]), + "H": [8, 8, 9, 9], + "I": [9, 9, 8, 8], + "J": [1, 1, np.nan, np.nan], + "K": Categorical(["a", np.nan, "a", np.nan]), + "L": to_datetime(["2000-1-2", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "1 days", "nan"]), + "N": np.arange(4, dtype="int64"), + } + ) + + result = df[sorted(expected.keys())].mode(dropna=dropna) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + df = DataFrame({"A": [np.nan, np.nan, "a", "a"]}) + expected = DataFrame({"A": ["a", np.nan]}) + + with tm.assert_produces_warning(UserWarning): + result = df.mode(dropna=False) + result = result.sort_values(by="A").reset_index(drop=True) + + tm.assert_frame_equal(result, expected) + + def test_mode_empty_df(self): + df = DataFrame([], columns=["a", "b"]) + result = df.mode() + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int)) + tm.assert_frame_equal(result, expected) + + def test_operators_timedelta64(self): + df = DataFrame( + { + "A": date_range("2012-1-1", periods=3, freq="D"), + "B": date_range("2012-1-2", periods=3, freq="D"), + "C": Timestamp("20120101") - timedelta(minutes=5, seconds=5), + } + ) + + diffs = DataFrame({"A": df["A"] - df["C"], "B": df["A"] - df["B"]}) + + # min + result = diffs.min() + assert result[0] == diffs.loc[0, "A"] + assert result[1] == diffs.loc[0, "B"] + + result = diffs.min(axis=1) + assert (result == diffs.loc[0, "B"]).all() + + # max + result = diffs.max() + assert result[0] == diffs.loc[2, "A"] + assert result[1] == diffs.loc[2, "B"] + + result = diffs.max(axis=1) + assert (result == diffs["A"]).all() + + # abs + result = diffs.abs() + result2 = abs(diffs) + expected = DataFrame({"A": df["A"] - df["C"], "B": df["B"] - df["A"]}) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # mixed frame + mixed = diffs.copy() + mixed["C"] = "foo" + mixed["D"] = 1 + mixed["E"] = 1.0 + mixed["F"] = Timestamp("20130101") + + # results in an object array + result = mixed.min() + expected = Series( + [ + pd.Timedelta(timedelta(seconds=5 * 60 + 5)), + pd.Timedelta(timedelta(days=-1)), + "foo", + 1, + 1.0, + Timestamp("20130101"), + ], + index=mixed.columns, + ) + tm.assert_series_equal(result, expected) + + # excludes numeric + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = mixed.min(axis=1) + expected = Series([1, 1, 1.0], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + # works when only those columns are selected + result = mixed[["A", "B"]].min(1) + expected = Series([timedelta(days=-1)] * 3) + tm.assert_series_equal(result, expected) + + result = mixed[["A", "B"]].min() + expected = Series( + [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"] + ) + tm.assert_series_equal(result, expected) + + # GH 3106 + df = DataFrame( + { + "time": date_range("20130102", periods=5), + "time2": date_range("20130105", periods=5), + } + ) + df["off1"] = df["time2"] - df["time"] + assert df["off1"].dtype == "timedelta64[ns]" + + df["off2"] = df["time"] - df["time2"] + df._consolidate_inplace() + assert df["off1"].dtype == "timedelta64[ns]" + assert df["off2"].dtype == "timedelta64[ns]" + + def test_std_timedelta64_skipna_false(self): + # GH#37392 + tdi = pd.timedelta_range("1 Day", periods=10) + df = DataFrame({"A": tdi, "B": tdi}, copy=True) + df.iloc[-2, -1] = pd.NaT + + result = df.std(skipna=False) + expected = Series( + [df["A"].std(), pd.NaT], index=["A", "B"], dtype="timedelta64[ns]" + ) + tm.assert_series_equal(result, expected) + + result = df.std(axis=1, skipna=False) + expected = Series([pd.Timedelta(0)] * 8 + [pd.NaT, pd.Timedelta(0)]) + tm.assert_series_equal(result, expected) + + def test_sum_corner(self): + empty_frame = DataFrame() + + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) + assert isinstance(axis0, Series) + assert isinstance(axis1, Series) + assert len(axis0) == 0 + assert len(axis1) == 0 + + @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)]) + @pytest.mark.parametrize("numeric_only", [None, True, False]) + def test_sum_prod_nanops(self, method, unit, numeric_only): + idx = ["a", "b", "c"] + df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) + # The default + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = getattr(df, method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(df, method)(numeric_only=numeric_only, min_count=0) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 + df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df, method)(numeric_only=numeric_only, min_count=5) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=6) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_timedelta(self): + # prod isn't defined on timedeltas + idx = ["a", "b", "c"] + df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) + + df2 = df.apply(to_timedelta) + + # 0 by default + result = df2.sum() + expected = Series([0, 0, 0], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df2.sum(min_count=0) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df2.sum(min_count=1) + expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_min_count(self): + # https://github.com/pandas-dev/pandas/issues/39738 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + result = df.sum(min_count=10) + expected = Series([np.nan, np.nan], index=["x", "y"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"]) + @pytest.mark.parametrize( + "kwargs, expected_result", + [ + ({"axis": 1, "min_count": 2}, [3.2, 5.3, np.NaN]), + ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]), + ({"axis": 1, "skipna": False}, [3.2, 5.3, np.NaN]), + ], + ) + def test_sum_nanops_dtype_min_count(self, float_type, kwargs, expected_result): + # GH#46947 + df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype=float_type) + result = df.sum(**kwargs) + expected = Series(expected_result).astype(float_type) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"]) + @pytest.mark.parametrize( + "kwargs, expected_result", + [ + ({"axis": 1, "min_count": 2}, [2.0, 4.0, np.NaN]), + ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]), + ({"axis": 1, "skipna": False}, [2.0, 4.0, np.NaN]), + ], + ) + def test_prod_nanops_dtype_min_count(self, float_type, kwargs, expected_result): + # GH#46947 + df = DataFrame( + {"a": [1.0, 2.0, 4.4], "b": [2.0, 2.0, np.nan]}, dtype=float_type + ) + result = df.prod(**kwargs) + expected = Series(expected_result).astype(float_type) + tm.assert_series_equal(result, expected) + + def test_sum_object(self, float_frame): + values = float_frame.values.astype(int) + frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) + deltas = frame * timedelta(1) + deltas.sum() + + def test_sum_bool(self, float_frame): + # ensure this works, bug report + bools = np.isnan(float_frame) + bools.sum(1) + bools.sum(0) + + def test_sum_mixed_datetime(self): + # GH#30886 + df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( + [2, 3, 4] + ) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.sum() + + expected = Series({"B": 7.0}) + tm.assert_series_equal(result, expected) + + def test_mean_corner(self, float_frame, float_string_frame): + # unit test when have object data + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=0) + the_sum = float_string_frame.sum(axis=0, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + assert len(the_mean.index) < len(float_string_frame.columns) + + # xs sum mixed type, just want to know it works... + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=1) + the_sum = float_string_frame.sum(axis=1, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + + # take mean of boolean column + float_frame["bool"] = float_frame["A"] > 0 + means = float_frame.mean(0) + assert means["bool"] == float_frame["bool"].values.mean() + + def test_mean_datetimelike(self): + # GH#24757 check that datetimelike are excluded by default, handled + # correctly with numeric_only=True + + df = DataFrame( + { + "A": np.arange(3), + "B": date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + "D": pd.period_range("2016", periods=3, freq="A"), + } + ) + result = df.mean(numeric_only=True) + expected = Series({"A": 1.0}) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # in the future datetime columns will be included + result = df.mean() + expected = Series({"A": 1.0, "C": df.loc[1, "C"]}) + tm.assert_series_equal(result, expected) + + def test_mean_datetimelike_numeric_only_false(self): + df = DataFrame( + { + "A": np.arange(3), + "B": date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + } + ) + + # datetime(tz) and timedelta work + result = df.mean(numeric_only=False) + expected = Series({"A": 1, "B": df.loc[1, "B"], "C": df.loc[1, "C"]}) + tm.assert_series_equal(result, expected) + + # mean of period is not allowed + df["D"] = pd.period_range("2016", periods=3, freq="A") + + with pytest.raises(TypeError, match="mean is not implemented for Period"): + df.mean(numeric_only=False) + + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + + def test_stats_mixed_type(self, float_string_frame): + # don't blow up + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) + + def test_sum_bools(self): + df = DataFrame(index=range(1), columns=range(10)) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 + + # ---------------------------------------------------------------------- + # Index of max / min + + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("axis", [0, 1]) + def test_idxmin(self, float_frame, int_frame, skipna, axis): + frame = float_frame + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan + for df in [frame, int_frame]: + result = df.idxmin(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numeric_only", [True, False]) + def test_idxmin_numeric_only(self, numeric_only): + df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")}) + if numeric_only: + result = df.idxmin(numeric_only=numeric_only) + expected = Series([2, 1], index=["a", "b"]) + tm.assert_series_equal(result, expected) + else: + with pytest.raises(TypeError, match="not allowed for this dtype"): + df.idxmin(numeric_only=numeric_only) + + def test_idxmin_axis_2(self, float_frame): + frame = float_frame + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.idxmin(axis=2) + + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("axis", [0, 1]) + def test_idxmax(self, float_frame, int_frame, skipna, axis): + frame = float_frame + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan + for df in [frame, int_frame]: + result = df.idxmax(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numeric_only", [True, False]) + def test_idxmax_numeric_only(self, numeric_only): + df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")}) + if numeric_only: + result = df.idxmax(numeric_only=numeric_only) + expected = Series([1, 0], index=["a", "b"]) + tm.assert_series_equal(result, expected) + else: + with pytest.raises(TypeError, match="not allowed for this dtype"): + df.idxmin(numeric_only=numeric_only) + + def test_idxmax_axis_2(self, float_frame): + frame = float_frame + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + frame.idxmax(axis=2) + + def test_idxmax_mixed_dtype(self): + # don't cast to object, which would raise in nanops + dti = date_range("2016-01-01", periods=3) + + # Copying dti is needed for ArrayManager otherwise when we set + # df.loc[0, 3] = pd.NaT below it edits dti + df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti.copy(deep=True)}) + + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 0], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # with NaTs + df.loc[0, 3] = pd.NaT + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # with multi-column dt64 block + df[4] = dti[::-1] + df._consolidate_inplace() + + result = df.idxmax() + expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "op, expected_value", + [("idxmax", [0, 4]), ("idxmin", [0, 5])], + ) + def test_idxmax_idxmin_convert_dtypes(self, op, expected_value): + # GH 40346 + df = DataFrame( + { + "ID": [100, 100, 100, 200, 200, 200], + "value": [0, 0, 0, 1, 2, 0], + }, + dtype="Int64", + ) + df = df.groupby("ID") + + result = getattr(df, op)() + expected = DataFrame( + {"value": expected_value}, + index=Index([100, 200], name="ID", dtype="Int64"), + ) + tm.assert_frame_equal(result, expected) + + def test_idxmax_dt64_multicolumn_axis1(self): + dti = date_range("2016-01-01", periods=3) + df = DataFrame({3: dti, 4: dti[::-1]}, copy=True) + df.iloc[0, 0] = pd.NaT + + df._consolidate_inplace() + + result = df.idxmax(axis=1) + expected = Series([4, 3, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin(axis=1) + expected = Series([4, 3, 4]) + tm.assert_series_equal(result, expected) + + # ---------------------------------------------------------------------- + # Logical reductions + + @pytest.mark.parametrize("opname", ["any", "all"]) + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("bool_only", [False, True]) + def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame): + # make sure op works on mixed-type frame + mixed = float_string_frame + mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5 + + getattr(mixed, opname)(axis=axis, bool_only=bool_only) + + @pytest.mark.parametrize("opname", ["any", "all"]) + @pytest.mark.parametrize("axis", [0, 1]) + def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na): + getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False) + + @pytest.mark.parametrize("opname", ["any", "all"]) + def test_any_all_bool_frame(self, opname, bool_frame_with_na): + # GH#12863: numpy gives back non-boolean data for object type + # so fill NaNs to compare with pandas behavior + frame = bool_frame_with_na.fillna(True) + alternative = getattr(np, opname) + f = getattr(frame, opname) + + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1)) + + result0 = f(axis=0) + result1 = f(axis=1) + + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal( + result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False + ) + + # bad axis + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + all_na = frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == "any": + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + + def test_any_all_extra(self): + df = DataFrame( + { + "A": [True, False, False], + "B": [True, True, False], + "C": [True, True, True], + }, + index=["a", "b", "c"], + ) + result = df[["A", "B"]].any(axis=1) + expected = Series([True, True, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df[["A", "B"]].any(axis=1, bool_only=True) + tm.assert_series_equal(result, expected) + + result = df.all(1) + expected = Series([True, False, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df.all(1, bool_only=True) + tm.assert_series_equal(result, expected) + + # Axis is None + result = df.all(axis=None).item() + assert result is False + + result = df.any(axis=None).item() + assert result is True + + result = df[["C"]].all(axis=None).item() + assert result is True + + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_any_all_object_dtype(self, axis, bool_agg_func, skipna): + # GH#35450 + df = DataFrame( + data=[ + [1, np.nan, np.nan, True], + [np.nan, 2, np.nan, True], + [np.nan, np.nan, np.nan, True], + [np.nan, np.nan, "5", np.nan], + ] + ) + result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna) + expected = Series([True, True, True, True]) + tm.assert_series_equal(result, expected) + + def test_any_datetime(self): + + # GH 23070 + float_data = [1, np.nan, 3, np.nan] + datetime_data = [ + Timestamp("1960-02-15"), + Timestamp("1960-02-16"), + pd.NaT, + pd.NaT, + ] + df = DataFrame({"A": float_data, "B": datetime_data}) + + result = df.any(axis=1) + expected = Series([True, True, True, False]) + tm.assert_series_equal(result, expected) + + def test_any_all_bool_only(self): + + # GH 25101 + df = DataFrame( + {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]} + ) + + result = df.all(bool_only=True) + expected = Series(dtype=np.bool_) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "col1": [1, 2, 3], + "col2": [4, 5, 6], + "col3": [None, None, None], + "col4": [False, False, True], + } + ) + + result = df.all(bool_only=True) + expected = Series({"col4": False}) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func, data, expected", + [ + (np.any, {}, False), + (np.all, {}, True), + (np.any, {"A": []}, False), + (np.all, {"A": []}, True), + (np.any, {"A": [False, False]}, False), + (np.all, {"A": [False, False]}, False), + (np.any, {"A": [True, False]}, True), + (np.all, {"A": [True, False]}, False), + (np.any, {"A": [True, True]}, True), + (np.all, {"A": [True, True]}, True), + (np.any, {"A": [False], "B": [False]}, False), + (np.all, {"A": [False], "B": [False]}, False), + (np.any, {"A": [False, False], "B": [False, True]}, True), + (np.all, {"A": [False, False], "B": [False, True]}, False), + # other types + (np.all, {"A": Series([0.0, 1.0], dtype="float")}, False), + (np.any, {"A": Series([0.0, 1.0], dtype="float")}, True), + (np.all, {"A": Series([0, 1], dtype=int)}, False), + (np.any, {"A": Series([0, 1], dtype=int)}, True), + pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns]")}, False), + pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, False), + pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), + pytest.param(np.all, {"A": Series([0, 1], dtype="m8[ns]")}, False), + pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="m8[ns]")}, True), + # np.all on Categorical raises, so the reduction drops the + # column, so all is being done on an empty Series, so is True + (np.all, {"A": Series([0, 1], dtype="category")}, True), + (np.any, {"A": Series([0, 1], dtype="category")}, False), + (np.all, {"A": Series([1, 2], dtype="category")}, True), + (np.any, {"A": Series([1, 2], dtype="category")}, False), + # Mix GH#21484 + pytest.param( + np.all, + { + "A": Series([10, 20], dtype="M8[ns]"), + "B": Series([10, 20], dtype="m8[ns]"), + }, + True, + ), + ], + ) + def test_any_all_np_func(self, func, data, expected): + # GH 19976 + data = DataFrame(data) + + warn = None + if any(is_categorical_dtype(x) for x in data.dtypes): + warn = FutureWarning + + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = func(data) + assert isinstance(result, np.bool_) + assert result.item() is expected + + # method version + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(DataFrame(data), func.__name__)(axis=None) + assert isinstance(result, np.bool_) + assert result.item() is expected + + def test_any_all_object(self): + # GH 19976 + result = np.all(DataFrame(columns=["a", "b"])).item() + assert result is True + + result = np.any(DataFrame(columns=["a", "b"])).item() + assert result is False + + def test_any_all_object_bool_only(self): + msg = "object-dtype columns with all-bool values" + + df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object) + df._consolidate_inplace() + df["C"] = Series([True, True]) + + # Categorical of bools is _not_ considered booly + df["D"] = df["C"].astype("category") + + # The underlying bug is in DataFrame._get_bool_data, so we check + # that while we're here + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df._get_bool_data() + expected = df[["B", "C"]] + tm.assert_frame_equal(res, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.all(bool_only=True, axis=0) + expected = Series([False, True], index=["B", "C"]) + tm.assert_series_equal(res, expected) + + # operating on a subset of columns should not produce a _larger_ Series + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df[["B", "C"]].all(bool_only=True, axis=0) + tm.assert_series_equal(res, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + assert not df.all(bool_only=True, axis=None) + + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.any(bool_only=True, axis=0) + expected = Series([True, True], index=["B", "C"]) + tm.assert_series_equal(res, expected) + + # operating on a subset of columns should not produce a _larger_ Series + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df[["B", "C"]].any(bool_only=True, axis=0) + tm.assert_series_equal(res, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + assert df.any(bool_only=True, axis=None) + + @pytest.mark.parametrize("method", ["any", "all"]) + def test_any_all_level_axis_none_raises(self, method): + df = DataFrame( + {"A": 1}, + index=MultiIndex.from_product( + [["A", "B"], ["a", "b"]], names=["out", "in"] + ), + ) + xpr = "Must specify 'axis' when aggregating by level." + with pytest.raises(ValueError, match=xpr): + with tm.assert_produces_warning(FutureWarning): + getattr(df, method)(axis=None, level="out") + + # --------------------------------------------------------------------- + # Unsorted + + def test_series_broadcasting(self): + # smoke test for numpy warnings + # GH 16378, GH 16306 + df = DataFrame([1.0, 1.0, 1.0]) + df_nan = DataFrame({"A": [np.nan, 2.0, np.nan]}) + s = Series([1, 1, 1]) + s_nan = Series([np.nan, np.nan, 1]) + + with tm.assert_produces_warning(None): + df_nan.clip(lower=s, axis=0) + for op in ["lt", "le", "gt", "ge", "eq", "ne"]: + getattr(df, op)(s_nan, axis=0) + + +class TestDataFrameReductions: + def test_min_max_dt64_with_NaT(self): + # Both NaT and Timestamp are in DataFrame. + df = DataFrame({"foo": [pd.NaT, pd.NaT, Timestamp("2012-05-01")]}) + + res = df.min() + exp = Series([Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = Series([Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + # GH12941, only NaTs are in DataFrame. + df = DataFrame({"foo": [pd.NaT, pd.NaT]}) + + res = df.min() + exp = Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture): + # GH#36907 + tz = tz_naive_fixture + if isinstance(tz, tzlocal) and is_platform_windows(): + pytest.skip( + "GH#37659 OSError raised within tzlocal bc Windows " + "chokes in times before 1970-01-01" + ) + + df = DataFrame( + { + "a": [ + Timestamp("2020-01-01 08:00:00", tz=tz), + Timestamp("1920-02-01 09:00:00", tz=tz), + ], + "b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT], + } + ) + res = df.min(axis=1, skipna=False) + expected = Series([df.loc[0, "a"], pd.NaT]) + assert expected.dtype == df["a"].dtype + + tm.assert_series_equal(res, expected) + + res = df.max(axis=1, skipna=False) + expected = Series([df.loc[0, "b"], pd.NaT]) + assert expected.dtype == df["a"].dtype + + tm.assert_series_equal(res, expected) + + def test_min_max_dt64_api_consistency_with_NaT(self): + # Calling the following sum functions returned an error for dataframes but + # returned NaT for series. These tests check that the API is consistent in + # min/max calls on empty Series/DataFrames. See GH:33704 for more + # information + df = DataFrame({"x": to_datetime([])}) + expected_dt_series = Series(to_datetime([])) + # check axis 0 + assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT) + assert (df.max(axis=0).x is pd.NaT) == (expected_dt_series.max() is pd.NaT) + + # check axis 1 + tm.assert_series_equal(df.min(axis=1), expected_dt_series) + tm.assert_series_equal(df.max(axis=1), expected_dt_series) + + def test_min_max_dt64_api_consistency_empty_df(self): + # check DataFrame/Series api consistency when calling min/max on an empty + # DataFrame/Series. + df = DataFrame({"x": []}) + expected_float_series = Series([], dtype=float) + # check axis 0 + assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min()) + assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max()) + # check axis 1 + tm.assert_series_equal(df.min(axis=1), expected_float_series) + tm.assert_series_equal(df.min(axis=1), expected_float_series) + + @pytest.mark.parametrize( + "initial", + ["2018-10-08 13:36:45+00:00", "2018-10-08 13:36:45+03:00"], # Non-UTC timezone + ) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_preserve_timezone(self, initial: str, method): + # GH 28552 + initial_dt = to_datetime(initial) + expected = Series([initial_dt]) + df = DataFrame([expected]) + result = getattr(df, method)(axis=1) + tm.assert_series_equal(result, expected) + + def test_frame_any_all_with_level(self): + df = DataFrame( + {"data": [False, False, True, False, True, False, True]}, + index=[ + ["one", "one", "two", "one", "two", "two", "two"], + [0, 1, 0, 2, 1, 2, 3], + ], + ) + + with tm.assert_produces_warning(FutureWarning, match="Using the level"): + result = df.any(level=0) + ex = DataFrame({"data": [False, True]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + with tm.assert_produces_warning(FutureWarning, match="Using the level"): + result = df.all(level=0) + ex = DataFrame({"data": [False, False]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + def test_frame_any_with_timedelta(self): + # GH#17667 + df = DataFrame( + { + "a": Series([0, 0]), + "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]), + } + ) + + result = df.any(axis=0) + expected = Series(data=[False, True], index=["a", "t"]) + tm.assert_series_equal(result, expected) + + result = df.any(axis=1) + expected = Series(data=[False, True]) + tm.assert_series_equal(result, expected) + + def test_reductions_deprecation_skipna_none(self, frame_or_series): + # GH#44580 + obj = frame_or_series([1, 2, 3]) + with tm.assert_produces_warning( + FutureWarning, match="skipna", raise_on_extra_warnings=False + ): + obj.mad(skipna=None) + + def test_reductions_deprecation_level_argument( + self, frame_or_series, reduction_functions + ): + # GH#39983 + obj = frame_or_series( + [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) + ) + with tm.assert_produces_warning(FutureWarning, match="level"): + getattr(obj, reduction_functions)(level=0) + + def test_reductions_skipna_none_raises( + self, request, frame_or_series, reduction_functions + ): + if reduction_functions == "count": + request.node.add_marker( + pytest.mark.xfail(reason="Count does not accept skipna") + ) + elif reduction_functions == "mad": + pytest.skip("Mad is deprecated: GH#11787") + obj = frame_or_series([1, 2, 3]) + msg = 'For argument "skipna" expected type bool, received type NoneType.' + with pytest.raises(ValueError, match=msg): + getattr(obj, reduction_functions)(skipna=None) + + +class TestNuisanceColumns: + @pytest.mark.parametrize("method", ["any", "all"]) + def test_any_all_categorical_dtype_nuisance_column(self, method): + # GH#36076 DataFrame should match Series behavior + ser = Series([0, 1], dtype="category", name="A") + df = ser.to_frame() + + # Double-check the Series behavior is to raise + with pytest.raises(TypeError, match="does not support reduction"): + getattr(ser, method)() + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(np, method)(ser) + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(df, method)(bool_only=False) + + # With bool_only=None, operating on this column raises and is ignored, + # so we expect an empty result. + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)(bool_only=None) + expected = Series([], index=Index([]), dtype=bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df, axis=0) + tm.assert_series_equal(result, expected) + + def test_median_categorical_dtype_nuisance_column(self): + # GH#21020 DataFrame.median should match Series.median + df = DataFrame({"A": Categorical([1, 2, 2, 2, 3])}) + ser = df["A"] + + # Double-check the Series behavior is to raise + with pytest.raises(TypeError, match="does not support reduction"): + ser.median() + + with pytest.raises(TypeError, match="does not support reduction"): + df.median(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() + expected = Series([], index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + # same thing, but with an additional non-categorical column + df["B"] = df["A"].astype(int) + + with pytest.raises(TypeError, match="does not support reduction"): + df.median(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() + expected = Series([2.0], index=["B"]) + tm.assert_series_equal(result, expected) + + # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead + # of expected.values + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): + # GH#28949 DataFrame.min should behave like Series.min + cat = Categorical(["a", "b", "c", "b"], ordered=False) + ser = Series(cat) + df = ser.to_frame("A") + + # Double-check the Series behavior + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(ser, method)() + + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(ser) + + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)(numeric_only=False) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() + expected = Series([], index=Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) + tm.assert_series_equal(result, expected) + + # same thing, but with an additional non-categorical column + df["B"] = df["A"].astype(object) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() + if method == "min": + expected = Series(["a"], index=["B"]) + else: + expected = Series(["c"], index=["B"]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) + tm.assert_series_equal(result, expected) + + def test_reduction_object_block_splits_nuisance_columns(self): + # GH#37827 + df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) + + # We should only exclude "B", not "A" + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([1.0], index=["A"]) + tm.assert_series_equal(result, expected) + + # Same behavior but heterogeneous dtype + df["C"] = df["A"].astype(int) + 4 + + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() + expected = Series([1.0, 5.0], index=["A", "C"]) + tm.assert_series_equal(result, expected) + + +def test_sum_timedelta64_skipna_false(): + # GH#17235 + arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2) + arr[-1, -1] = "Nat" + + df = DataFrame(arr) + + result = df.sum(skipna=False) + expected = Series([pd.Timedelta(seconds=12), pd.NaT]) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=0, skipna=False) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=1, skipna=False) + expected = Series( + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=5), + pd.Timedelta(seconds=9), + pd.NaT, + ] + ) + tm.assert_series_equal(result, expected) + + +def test_mixed_frame_with_integer_sum(): + # https://github.com/pandas-dev/pandas/issues/34520 + df = DataFrame([["a", 1]], columns=list("ab")) + df = df.astype({"b": "Int64"}) + result = df.sum() + expected = Series(["a", 1], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("numeric_only", [True, False, None]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_minmax_extensionarray(method, numeric_only): + # https://github.com/pandas-dev/pandas/issues/32651 + int64_info = np.iinfo("int64") + ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype()) + df = DataFrame({"Int64": ser}) + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series( + [getattr(int64_info, method)], index=Index(["Int64"], dtype="object") + ) + tm.assert_series_equal(result, expected) + + +def test_mad_nullable_integer(any_signed_int_ea_dtype): + # GH#33036 + df = DataFrame(np.random.randn(100, 4).astype(np.int64)) + df2 = df.astype(any_signed_int_ea_dtype) + + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad(axis=1) + expected = df.mad(axis=1) + tm.assert_series_equal(result, expected) + + # case with NAs present + df2.iloc[::2, 1] = pd.NA + + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() + expected[1] = df.iloc[1::2, 1].mad() + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad(axis=1) + expected = df.mad(axis=1) + expected[::2] = df.T.loc[[0, 2, 3], ::2].mad() + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH#42895 caused by lack of 2D EA") +def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype): + # GH#33036 + df = DataFrame(np.random.randn(100, 4).astype(np.int64)) + df2 = df.astype(any_signed_int_ea_dtype) + + # case with all-NA row/column + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place + df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) + + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() + + expected[1] = pd.NA + expected = expected.astype("Float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"]) +def test_groupby_regular_arithmetic_equivalent(meth): + # GH#40660 + df = DataFrame( + {"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]} + ) + expected = df.copy() + + with tm.assert_produces_warning(FutureWarning): + result = getattr(df, meth)(level=0) + tm.assert_frame_equal(result, expected) + + result = getattr(df.groupby(level=0), meth)(numeric_only=False) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ts_value", [Timestamp("2000-01-01"), pd.NaT]) +def test_frame_mixed_numeric_object_with_timestamp(ts_value): + # GH 13912 + df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) + with tm.assert_produces_warning( + FutureWarning, match="The default value of numeric_only" + ): + result = df.sum() + expected = Series([1, 1.1, "foo"], index=list("abc")) + tm.assert_series_equal(result, expected) + + +def test_prod_sum_min_count_mixed_object(): + # https://github.com/pandas-dev/pandas/issues/41074 + df = DataFrame([1, "a", True]) + + result = df.prod(axis=0, min_count=1, numeric_only=False) + expected = Series(["a"]) + tm.assert_series_equal(result, expected) + + msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'") + with pytest.raises(TypeError, match=msg): + df.sum(axis=0, min_count=1, numeric_only=False) + + +@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"]) +def test_reduction_axis_none_deprecation(method): + # GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it + # to reducing over all axes. + + df = DataFrame(np.random.randn(4, 4)) + meth = getattr(df, method) + + msg = f"scalar {method} over the entire DataFrame" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = meth(axis=None) + with tm.assert_produces_warning(None): + expected = meth() + tm.assert_series_equal(res, expected) + tm.assert_series_equal(res, meth(axis=0)) + + +@pytest.mark.parametrize( + "kernel", + [ + "corr", + "corrwith", + "count", + "cov", + "idxmax", + "idxmin", + "kurt", + "kurt", + "max", + "mean", + "median", + "min", + "mode", + "prod", + "prod", + "quantile", + "sem", + "skew", + "std", + "sum", + "var", + ], +) +def test_numeric_only_deprecation(kernel): + # GH#46852 + df = DataFrame({"a": [1, 2, 3], "b": object}) + args = (df,) if kernel == "corrwith" else () + signature = inspect.signature(getattr(DataFrame, kernel)) + default = signature.parameters["numeric_only"].default + assert default is not True + + if kernel in ("idxmax", "idxmin"): + # kernels that default to numeric_only=False and fail on nuisance columns + assert default is False + with pytest.raises(TypeError, match="not allowed for this dtype"): + getattr(df, kernel)(*args) + else: + if default is None or default is lib.no_default: + expected = getattr(df[["a"]], kernel)(*args) + warn = FutureWarning + else: + # default must be False and works on any nuisance columns + expected = getattr(df, kernel)(*args) + if kernel == "mode": + assert "b" in expected.columns + else: + assert "b" in expected.index + warn = None + msg = f"The default value of numeric_only in DataFrame.{kernel}" + with tm.assert_produces_warning(warn, match=msg): + result = getattr(df, kernel)(*args) + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py new file mode 100644 index 00000000..86c8e36c --- /dev/null +++ b/pandas/tests/frame/test_repr_info.py @@ -0,0 +1,365 @@ +from datetime import ( + datetime, + timedelta, +) +from io import StringIO +import warnings + +import numpy as np +import pytest + +from pandas import ( + NA, + Categorical, + DataFrame, + MultiIndex, + NaT, + PeriodIndex, + Series, + Timestamp, + date_range, + option_context, + period_range, +) +import pandas._testing as tm + +import pandas.io.formats.format as fmt + + +class TestDataFrameReprInfoEtc: + def test_repr_bytes_61_lines(self): + # GH#12857 + lets = list("ACDEFGHIJKLMNOP") + slen = 50 + nseqs = 1000 + words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] + df = DataFrame(words).astype("U1") + assert (df.dtypes == object).all() + + # smoke tests; at one point this raised with 61 but not 60 + repr(df) + repr(df.iloc[:60, :]) + repr(df.iloc[:61, :]) + + def test_repr_unicode_level_names(self, frame_or_series): + index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) + + obj = DataFrame(np.random.randn(2, 4), index=index) + obj = tm.get_obj(obj, frame_or_series) + repr(obj) + + def test_assign_index_sequences(self): + # GH#2200 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( + ["a", "b"] + ) + index = list(df.index) + index[0] = ("faz", "boo") + df.index = index + repr(df) + + # this travels an improper code path + index[0] = ["faz", "boo"] + df.index = index + repr(df) + + def test_repr_with_mi_nat(self): + df = DataFrame({"X": [1, 2]}, index=[[NaT, Timestamp("20130101")], ["a", "b"]]) + result = repr(df) + expected = " X\nNaT a 1\n2013-01-01 b 2" + assert result == expected + + def test_repr_with_different_nulls(self): + # GH45263 + df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT]) + result = repr(df) + expected = """ 0 +True 1 +None 2 +NaN 3 +NaT 4""" + assert result == expected + + def test_repr_with_different_nulls_cols(self): + # GH45263 + d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]} + df = DataFrame(data=d) + result = repr(df) + expected = """ NaN None NaT True +0 1 3 6 8 +1 2 4 7 9""" + assert result == expected + + def test_multiindex_na_repr(self): + # only an issue with long columns + df3 = DataFrame( + { + "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, + "B" * 30: {("A", "A0006000", "nuit"): np.nan}, + "C" * 30: {("A", "A0006000", "nuit"): np.nan}, + "D" * 30: {("A", "A0006000", "nuit"): np.nan}, + "E" * 30: {("A", "A0006000", "nuit"): "A"}, + "F" * 30: {("A", "A0006000", "nuit"): np.nan}, + } + ) + + idf = df3.set_index(["A" * 30, "C" * 30]) + repr(idf) + + def test_repr_name_coincide(self): + index = MultiIndex.from_tuples( + [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] + ) + + df = DataFrame({"value": [0, 1]}, index=index) + + lines = repr(df).split("\n") + assert lines[2].startswith("a 0 foo") + + def test_repr_to_string( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + repr(frame) + repr(ymd) + repr(frame.T) + repr(ymd.T) + + buf = StringIO() + frame.to_string(buf=buf) + ymd.to_string(buf=buf) + frame.T.to_string(buf=buf) + ymd.T.to_string(buf=buf) + + def test_repr_empty(self): + # empty + repr(DataFrame()) + + # empty with index + frame = DataFrame(index=np.arange(1000)) + repr(frame) + + def test_repr_mixed(self, float_string_frame): + buf = StringIO() + + # mixed + repr(float_string_frame) + float_string_frame.info(verbose=False, buf=buf) + + @pytest.mark.slow + def test_repr_mixed_big(self): + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200) + ) + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + + repr(biggie) + + def test_repr(self, float_frame): + buf = StringIO() + + # small one + repr(float_frame) + float_frame.info(verbose=False, buf=buf) + + # even smaller + float_frame.reindex(columns=["A"]).info(verbose=False, buf=buf) + float_frame.reindex(columns=["A", "B"]).info(verbose=False, buf=buf) + + # exhausting cases in DataFrame.info + + # columns but no index + no_index = DataFrame(columns=[0, 1, 3]) + repr(no_index) + + # no columns or index + DataFrame().info(buf=buf) + + df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) + + def test_repr_dimensions(self): + df = DataFrame([[1, 2], [3, 4]]) + with option_context("display.show_dimensions", True): + assert "2 rows x 2 columns" in repr(df) + + with option_context("display.show_dimensions", False): + assert "2 rows x 2 columns" not in repr(df) + + with option_context("display.show_dimensions", "truncate"): + assert "2 rows x 2 columns" not in repr(df) + + @pytest.mark.slow + def test_repr_big(self): + # big one + biggie = DataFrame(np.zeros((200, 4)), columns=range(4), index=range(200)) + repr(biggie) + + def test_repr_unsortable(self, float_frame): + # columns are not sortable + + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + + unsortable = DataFrame( + { + "foo": [1] * 50, + datetime.today(): [1] * 50, + "bar": ["bar"] * 50, + datetime.today() + timedelta(1): ["bar"] * 50, + }, + index=np.arange(50), + ) + repr(unsortable) + + fmt.set_option("display.precision", 3) + repr(float_frame) + + fmt.set_option("display.max_rows", 10, "display.max_columns", 2) + repr(float_frame) + + fmt.set_option("display.max_rows", 1000, "display.max_columns", 1000) + repr(float_frame) + + tm.reset_display_options() + + warnings.filters = warn_filters + + def test_repr_unicode(self): + uval = "\u03c3\u03c3\u03c3\u03c3" + + df = DataFrame({"A": [uval, uval]}) + + result = repr(df) + ex_top = " A" + assert result.split("\n")[0].rstrip() == ex_top + + df = DataFrame({"A": [uval, uval]}) + result = repr(df) + assert result.split("\n")[0].rstrip() == ex_top + + def test_unicode_string_with_unicode(self): + df = DataFrame({"A": ["\u05d0"]}) + str(df) + + def test_repr_unicode_columns(self): + df = DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_str_to_bytes_raises(self): + # GH 26447 + df = DataFrame({"A": ["abc"]}) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(df) + + def test_very_wide_info_repr(self): + df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) + repr(df) + + def test_repr_column_name_unicode_truncation_bug(self): + # #1906 + df = DataFrame( + { + "Id": [7117434], + "StringCol": ( + "Is it possible to modify drop plot code" + "so that the output graph is displayed " + "in iphone simulator, Is it possible to " + "modify drop plot code so that the " + "output graph is \xe2\x80\xa8displayed " + "in iphone simulator.Now we are adding " + "the CSV file externally. I want to Call " + "the File through the code.." + ), + } + ) + + with option_context("display.max_columns", 20): + assert "StringCol" in repr(df) + + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_latex_repr(self): + result = r"""\begin{tabular}{llll} +\toprule +{} & 0 & 1 & 2 \\ +\midrule +0 & $\alpha$ & b & c \\ +1 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + with option_context("display.latex.escape", False, "display.latex.repr", True): + df = DataFrame([[r"$\alpha$", "b", "c"], [1, 2, 3]]) + assert result == df._repr_latex_() + + # GH 12182 + assert df._repr_latex_() is None + + def test_repr_categorical_dates_periods(self): + # normal DataFrame + dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + p = period_range("2011-01", freq="M", periods=5) + df = DataFrame({"dt": dt, "p": p}) + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" + + assert repr(df) == exp + + df2 = DataFrame({"dt": Categorical(dt), "p": Categorical(p)}) + assert repr(df2) == exp + + @pytest.mark.parametrize("arg", [np.datetime64, np.timedelta64]) + @pytest.mark.parametrize( + "box, expected", + [[Series, "0 NaT\ndtype: object"], [DataFrame, " 0\n0 NaT"]], + ) + def test_repr_np_nat_with_object(self, arg, box, expected): + # GH 25445 + result = repr(box([arg("NaT")], dtype=object)) + assert result == expected + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")}) + # it works! + repr(df) + + def test_frame_to_string_with_periodindex(self): + index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") + frame = DataFrame(np.random.randn(3, 4), index=index) + + # it works! + frame.to_string() + + def test_to_string_ea_na_in_multiindex(self): + # GH#47986 + df = DataFrame( + {"a": [1, 2]}, + index=MultiIndex.from_arrays([Series([NA, 1], dtype="Int64")]), + ) + + result = df.to_string() + expected = """ a + 1 +1 2""" + assert result == expected + + def test_datetime64tz_slice_non_truncate(self): + # GH 30263 + df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")}) + expected = repr(df) + df = df.iloc[:, :5] + result = repr(df) + assert result == expected diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py new file mode 100644 index 00000000..e2255980 --- /dev/null +++ b/pandas/tests/frame/test_stack_unstack.py @@ -0,0 +1,2185 @@ +from datetime import datetime +from io import StringIO +import itertools + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timedelta, + date_range, +) +import pandas._testing as tm +from pandas.core.reshape import reshape as reshape_lib + + +class TestDataFrameReshape: + def test_stack_unstack(self, float_frame, using_array_manager): + warn = DeprecationWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + + df = float_frame.copy() + with tm.assert_produces_warning(warn, match=msg): + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + + stacked = df.stack() + stacked_df = DataFrame({"foo": stacked, "bar": stacked}) + + unstacked = stacked.unstack() + unstacked_df = stacked_df.unstack() + + tm.assert_frame_equal(unstacked, df) + tm.assert_frame_equal(unstacked_df["bar"], df) + + unstacked_cols = stacked.unstack(0) + unstacked_cols_df = stacked_df.unstack(0) + tm.assert_frame_equal(unstacked_cols.T, df) + tm.assert_frame_equal(unstacked_cols_df["bar"].T, df) + + def test_stack_mixed_level(self): + # GH 18310 + levels = [range(3), [3, "a", "b"], [1, 2]] + + # flat columns: + df = DataFrame(1, index=levels[0], columns=levels[1]) + result = df.stack() + expected = Series(1, index=MultiIndex.from_product(levels[:2])) + tm.assert_series_equal(result, expected) + + # MultiIndex columns: + df = DataFrame(1, index=levels[0], columns=MultiIndex.from_product(levels[1:])) + result = df.stack(1) + expected = DataFrame( + 1, index=MultiIndex.from_product([levels[0], levels[2]]), columns=levels[1] + ) + tm.assert_frame_equal(result, expected) + + # as above, but used labels in level are actually of homogeneous type + result = df[["a", "b"]].stack(1) + expected = expected[["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_unstack_not_consolidated(self, using_array_manager): + # Gh#34708 + df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + df2 = df[["x"]] + df2["y"] = df["y"] + if not using_array_manager: + assert len(df2._mgr.blocks) == 2 + + res = df2.unstack() + expected = df.unstack() + tm.assert_series_equal(res, expected) + + def test_unstack_fill(self): + + # GH #9746: fill_value keyword argument for Series + # and DataFrame unstack + + # From a series + data = Series([1, 2, 4, 5], dtype=np.int16) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack(fill_value=-1) + expected = DataFrame( + {"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16 + ) + tm.assert_frame_equal(result, expected) + + # From a series with incorrect data type for fill_value + result = data.unstack(fill_value=0.5) + expected = DataFrame( + {"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=float + ) + tm.assert_frame_equal(result, expected) + + # GH #13971: fill_value when unstacking multiple levels: + df = DataFrame( + {"x": ["a", "a", "b"], "y": ["j", "k", "j"], "z": [0, 1, 2], "w": [0, 1, 2]} + ).set_index(["x", "y", "z"]) + unstacked = df.unstack(["x", "y"], fill_value=0) + key = ("w", "b", "j") + expected = unstacked[key] + result = Series([0, 0, 2], index=unstacked.index, name=key) + tm.assert_series_equal(result, expected) + + stacked = unstacked.stack(["x", "y"]) + stacked.index = stacked.index.reorder_levels(df.index.names) + # Workaround for GH #17886 (unnecessarily casts to float): + stacked = stacked.astype(np.int64) + result = stacked.loc[df.index] + tm.assert_frame_equal(result, df) + + # From a series + s = df["w"] + result = s.unstack(["x", "y"], fill_value=0) + expected = unstacked["w"] + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame(self): + + # From a dataframe + rows = [[1, 2], [3, 4], [5, 6], [7, 8]] + df = DataFrame(rows, columns=list("AB"), dtype=np.int32) + df.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = df.unstack(fill_value=-1) + + rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]] + expected = DataFrame(rows, index=list("xyz"), dtype=np.int32) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + # From a mixed type dataframe + df["A"] = df["A"].astype(np.int16) + df["B"] = df["B"].astype(np.float64) + + result = df.unstack(fill_value=-1) + expected["A"] = expected["A"].astype(np.int16) + expected["B"] = expected["B"].astype(np.float64) + tm.assert_frame_equal(result, expected) + + # From a dataframe with incorrect data type for fill_value + result = df.unstack(fill_value=0.5) + + rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]] + expected = DataFrame(rows, index=list("xyz"), dtype=float) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_datetime(self): + + # Test unstacking with date times + dv = date_range("2012-01-01", periods=4).values + data = Series(dv) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=dv[0]) + expected = DataFrame( + {"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_timedelta(self): + + # Test unstacking with time deltas + td = [Timedelta(days=i) for i in range(4)] + data = Series(td) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=td[1]) + expected = DataFrame( + {"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_period(self): + + # Test unstacking with period + periods = [ + Period("2012-01"), + Period("2012-02"), + Period("2012-03"), + Period("2012-04"), + ] + data = Series(periods) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=periods[1]) + expected = DataFrame( + { + "a": [periods[0], periods[1], periods[3]], + "b": [periods[1], periods[2], periods[1]], + }, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_categorical(self): + + # Test unstacking with categorical + data = Series(["a", "b", "c", "a"], dtype="category") + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = DataFrame( + { + "a": pd.Categorical(list("axa"), categories=list("abc")), + "b": pd.Categorical(list("bcx"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + # Fill with non-category results in a ValueError + msg = r"Cannot setitem on a Categorical with a new category \(d\)" + with pytest.raises(TypeError, match=msg): + data.unstack(fill_value="d") + + # Fill with category value replaces missing values as expected + result = data.unstack(fill_value="c") + expected = DataFrame( + { + "a": pd.Categorical(list("aca"), categories=list("abc")), + "b": pd.Categorical(list("bcc"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_tuplename_in_multiindex(self): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + df = DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) + result = df.unstack(("A", "a")) + + expected = DataFrame( + [[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]], + columns=MultiIndex.from_tuples( + [ + ("d", "a"), + ("d", "b"), + ("d", "c"), + ("e", "a"), + ("e", "b"), + ("e", "c"), + ], + names=[None, ("A", "a")], + ), + index=Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], + MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + MultiIndex.from_tuples( + [("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")], + names=[None, ("A", "a")], + ), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]], + Index([3, 4], name="C"), + MultiIndex.from_tuples( + [ + ("d", "a", 1), + ("d", "a", 2), + ("d", "b", 1), + ("d", "b", 2), + ("e", "a", 1), + ("e", "a", 2), + ("e", "b", 1), + ("e", "b", 2), + ], + names=[None, ("A", "a"), "B"], + ), + ), + ], + ) + def test_unstack_mixed_type_name_in_multiindex( + self, unstack_idx, expected_values, expected_index, expected_columns + ): + # GH 19966 + idx = MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + df = DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) + result = df.unstack(unstack_idx) + + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_preserve_dtypes(self): + # Checks fix for #11847 + df = DataFrame( + { + "state": ["IL", "MI", "NC"], + "index": ["a", "b", "c"], + "some_categories": Series(["a", "b", "c"]).astype("category"), + "A": np.random.rand(3), + "B": 1, + "C": "foo", + "D": pd.Timestamp("20010102"), + "E": Series([1.0, 50.0, 100.0]).astype("float32"), + "F": Series([3.0, 4.0, 5.0]).astype("float64"), + "G": False, + "H": Series([1, 200, 923442]).astype("int8"), + } + ) + + def unstack_and_compare(df, column_name): + unstacked1 = df.unstack([column_name]) + unstacked2 = df.unstack(column_name) + tm.assert_frame_equal(unstacked1, unstacked2) + + df1 = df.set_index(["state", "index"]) + unstack_and_compare(df1, "index") + + df1 = df.set_index(["state", "some_categories"]) + unstack_and_compare(df1, "some_categories") + + df1 = df.set_index(["F", "C"]) + unstack_and_compare(df1, "F") + + df1 = df.set_index(["G", "B", "state"]) + unstack_and_compare(df1, "B") + + df1 = df.set_index(["E", "A"]) + unstack_and_compare(df1, "E") + + df1 = df.set_index(["state", "index"]) + s = df1["A"] + unstack_and_compare(s, "index") + + def test_stack_ints(self): + columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) + df = DataFrame(np.random.randn(30, 27), columns=columns) + + tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) + tm.assert_frame_equal( + df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1) + ) + + df_named = df.copy() + return_value = df_named.columns.set_names(range(3), inplace=True) + assert return_value is None + + tm.assert_frame_equal( + df_named.stack(level=[1, 2]), df_named.stack(level=1).stack(level=1) + ) + + def test_stack_mixed_levels(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + # GH #8584: Need to check that stacking works when a number + # is passed that is both a level name and in the range of + # the level numbers + df2 = df.copy() + df2.columns.names = ["exp", "animal", 1] + tm.assert_frame_equal( + df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False + ) + + # When mixed types are passed and the ints are not level + # names, raise + msg = ( + "level should contain all level names or all level numbers, not " + "a mixture of the two" + ) + with pytest.raises(ValueError, match=msg): + df2.stack(level=["animal", 0]) + + # GH #8584: Having 0 in the level names could raise a + # strange error about lexsort depth + df3 = df.copy() + df3.columns.names = ["exp", "animal", 0] + tm.assert_frame_equal( + df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False + ) + + def test_stack_int_level_names(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + exp_animal_stacked = df.stack(level=["exp", "animal"]) + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + df2 = df.copy() + df2.columns.names = [0, 1, 2] + tm.assert_frame_equal( + df2.stack(level=[1, 2]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 1]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False + ) + + # Out-of-order int column names + df3 = df.copy() + df3.columns.names = [2, 0, 1] + tm.assert_frame_equal( + df3.stack(level=[0, 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 0]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False + ) + + def test_unstack_bool(self): + df = DataFrame( + [False, False], + index=MultiIndex.from_arrays([["a", "b"], ["c", "l"]]), + columns=["col"], + ) + rs = df.unstack() + xp = DataFrame( + np.array([[False, np.nan], [np.nan, False]], dtype=object), + index=["a", "b"], + columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]), + ) + tm.assert_frame_equal(rs, xp) + + def test_unstack_level_binding(self): + # GH9856 + mi = MultiIndex( + levels=[["foo", "bar"], ["one", "two"], ["a", "b"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], + names=["first", "second", "third"], + ) + s = Series(0, index=mi) + result = s.unstack([1, 2]).stack(0) + + expected_mi = MultiIndex( + levels=[["foo", "bar"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["first", "second"], + ) + + expected = DataFrame( + np.array( + [[np.nan, 0], [0, np.nan], [np.nan, 0], [0, np.nan]], dtype=np.float64 + ), + index=expected_mi, + columns=Index(["a", "b"], name="third"), + ) + + tm.assert_frame_equal(result, expected) + + def test_unstack_to_series(self, float_frame): + # check reversibility + data = float_frame.unstack() + + assert isinstance(data, Series) + undo = data.unstack().T + tm.assert_frame_equal(undo, float_frame) + + # check NA handling + data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + data.index = Index(["a", "b", "c"]) + result = data.unstack() + + midx = MultiIndex( + levels=[["x", "y"], ["a", "b", "c"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) + + tm.assert_series_equal(result, expected) + + # check composability of unstack + old_data = data.copy() + for _ in range(4): + data = data.unstack() + tm.assert_frame_equal(old_data, data) + + def test_unstack_dtypes(self): + + # GH 2929 + rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]] + + df = DataFrame(rows, columns=list("ABCD")) + result = df.dtypes + expected = Series([np.dtype("int64")] * 4, index=list("ABCD")) + tm.assert_series_equal(result, expected) + + # single dtype + df2 = df.set_index(["A", "B"]) + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("int64")] * 4, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + # mixed + df2 = df.set_index(["A", "B"]) + df2["C"] = 3.0 + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("int64")] * 2, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + df2["D"] = "foo" + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("object")] * 2, + index=MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "c, d", + ( + (np.zeros(5), np.zeros(5)), + (np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")), + ), + ) + def test_unstack_dtypes_mixed_date(self, c, d): + # GH7405 + df = DataFrame( + { + "A": ["a"] * 5, + "C": c, + "D": d, + "B": date_range("2012-01-01", periods=5), + } + ) + + right = df.iloc[:3].copy(deep=True) + + df = df.set_index(["A", "B"]) + df["D"] = df["D"].astype("int64") + + left = df.iloc[:3].unstack(0) + right = right.set_index(["A", "B"]).unstack(0) + right[("D", "a")] = right[("D", "a")].astype("int64") + + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) + + def test_unstack_non_unique_index_names(self): + idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"]) + df = DataFrame([1, 2], index=idx) + msg = "The name c1 occurs multiple times, use a level number" + with pytest.raises(ValueError, match=msg): + df.unstack("c1") + + with pytest.raises(ValueError, match=msg): + df.T.stack("c1") + + def test_unstack_unused_levels(self): + # GH 17845: unused codes in index make unstack() cast int to float + idx = MultiIndex.from_product([["a"], ["A", "B", "C", "D"]])[:-1] + df = DataFrame([[1, 0]] * 3, index=idx) + + result = df.unstack() + exp_col = MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) + expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + # Unused items on both levels + levels = [[0, 1, 7], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = MultiIndex(levels, codes) + block = np.arange(4).reshape(2, 2) + df = DataFrame(np.concatenate([block, block + 4]), index=idx) + result = df.unstack() + expected = DataFrame( + np.concatenate([block * 2, block * 2 + 1], axis=1), columns=idx + ) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + @pytest.mark.parametrize( + "level, idces, col_level, idx_level", + ( + (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]), + ), + ) + def test_unstack_unused_levels_mixed_with_nan( + self, level, idces, col_level, idx_level + ): + # With mixed dtype and NaN + levels = [["a", 2, "c"], [1, 3, 5, 7]] + codes = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = MultiIndex(levels, codes) + data = np.arange(8) + df = DataFrame(data.reshape(4, 2), index=idx) + + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = MultiIndex.from_product([[0, 1], col_level]) + expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cols", [["A", "C"], slice(None)]) + def test_unstack_unused_level(self, cols): + # GH 18562 : unused codes on the unstacked level + df = DataFrame([[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"]) + + ind = df.set_index(["A", "B", "C"], drop=False) + selection = ind.loc[(slice(None), slice(None), "I"), cols] + result = selection.unstack() + + expected = ind.iloc[[0]][cols] + expected.columns = MultiIndex.from_product( + [expected.columns, ["I"]], names=[None, "C"] + ) + expected.index = expected.index.droplevel("C") + tm.assert_frame_equal(result, expected) + + def test_unstack_long_index(self): + # PH 32624: Error when using a lot of indices to unstack. + # The error occurred only, if a lot of indices are used. + df = DataFrame( + [[1]], + columns=MultiIndex.from_tuples([[0]], names=["c1"]), + index=MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["i1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + ) + result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"]) + expected = DataFrame( + [[1]], + columns=MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["c1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + index=Index([0], name="i1"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_multi_level_cols(self): + # PH 24729: Unstack a df with multi level columns + df = DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + columns=MultiIndex.from_tuples( + [["B", "C"], ["B", "D"]], names=["c1", "c2"] + ), + index=MultiIndex.from_tuples( + [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"] + ), + ) + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] + + def test_unstack_multi_level_rows_and_cols(self): + # PH 28306: Unstack df with multi level cols and rows + df = DataFrame( + [[1, 2], [3, 4], [-1, -2], [-3, -4]], + columns=MultiIndex.from_tuples([["a", "b", "c"], ["d", "e", "f"]]), + index=MultiIndex.from_tuples( + [ + ["m1", "P3", 222], + ["m1", "A5", 111], + ["m2", "P3", 222], + ["m2", "A5", 111], + ], + names=["i1", "i2", "i3"], + ), + ) + result = df.unstack(["i3", "i2"]) + expected = df.unstack(["i3"]).unstack(["i2"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("idx", [("jim", "joe"), ("joe", "jim")]) + @pytest.mark.parametrize("lev", list(range(2))) + def test_unstack_nan_index1(self, idx, lev): + # GH7466 + def cast(val): + val_str = "" if val != val else val + return f"{val_str:1}" + + df = DataFrame( + { + "jim": ["a", "b", np.nan, "d"], + "joe": ["w", "x", "y", "z"], + "jolie": ["a.w", "b.x", " .y", "d.z"], + } + ) + + left = df.set_index(["jim", "joe"]).unstack()["jolie"] + right = df.set_index(["joe", "jim"]).unstack()["jolie"].T + tm.assert_frame_equal(left, right) + + mi = df.set_index(list(idx)) + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == len(df) + mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] + rows, cols = udf["jolie"].notna().values.nonzero() + for i, j in zip(rows, cols): + left = sorted(udf["jolie"].iloc[i, j].split(".")) + right = mk_list(udf["jolie"].index[i]) + mk_list(udf["jolie"].columns[j]) + right = sorted(map(cast, right)) + assert left == right + + @pytest.mark.parametrize("idx", itertools.permutations(["1st", "2nd", "3rd"])) + @pytest.mark.parametrize("lev", list(range(3))) + @pytest.mark.parametrize("col", ["4th", "5th"]) + def test_unstack_nan_index_repeats(self, idx, lev, col): + def cast(val): + val_str = "" if val != val else val + return f"{val_str:1}" + + df = DataFrame( + { + "1st": ["d"] * 3 + + [np.nan] * 5 + + ["a"] * 2 + + ["c"] * 3 + + ["e"] * 2 + + ["b"] * 5, + "2nd": ["y"] * 2 + + ["w"] * 3 + + [np.nan] * 3 + + ["z"] * 4 + + [np.nan] * 3 + + ["x"] * 3 + + [np.nan] * 2, + "3rd": [ + 67, + 39, + 53, + 72, + 57, + 80, + 31, + 18, + 11, + 30, + 59, + 50, + 62, + 59, + 76, + 52, + 14, + 53, + 60, + 51, + ], + } + ) + + df["4th"], df["5th"] = ( + df.apply(lambda r: ".".join(map(cast, r)), axis=1), + df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1), + ) + + mi = df.set_index(list(idx)) + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == 2 * len(df) + mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] + rows, cols = udf[col].notna().values.nonzero() + for i, j in zip(rows, cols): + left = sorted(udf[col].iloc[i, j].split(".")) + right = mk_list(udf[col].index[i]) + mk_list(udf[col].columns[j]) + right = sorted(map(cast, right)) + assert left == right + + def test_unstack_nan_index2(self): + # GH7403 + df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [ + [3, 0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4, 5, 6, 7], + ] + vals = list(map(list, zip(*vals))) + idx = Index([np.nan, 0, 1, 2, 4, 5, 6, 7], name="B") + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) + df.iloc[2, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[2, np.nan], [0, 4], [1, 5], [np.nan, 6], [3, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[3, np.nan], [0, 4], [1, 5], [2, 6], [np.nan, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + def test_unstack_nan_index3(self, using_array_manager): + # GH7401 + df = DataFrame( + { + "A": list("aaaaabbbbb"), + "B": (date_range("2012-01-01", periods=5).tolist() * 2), + "C": np.arange(10), + } + ) + + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack() + + vals = np.array([[3, 0, 1, 2, np.nan, 4], [np.nan, 5, 6, 7, 8, 9]]) + idx = Index(["a", "b"], name="A") + cols = MultiIndex( + levels=[["C"], date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + cols = right.columns[[1, 2, 3, 5]] + right[cols] = right[cols].astype(df["C"].dtype) + tm.assert_frame_equal(left, right) + + def test_unstack_nan_index4(self): + # GH4862 + vals = [ + ["Hg", np.nan, np.nan, 680585148], + ["U", 0.0, np.nan, 680585148], + ["Pb", 7.07e-06, np.nan, 680585148], + ["Sn", 2.3614e-05, 0.0133, 680607017], + ["Ag", 0.0, 0.0133, 680607017], + ["Hg", -0.00015, 0.0133, 680607017], + ] + df = DataFrame( + vals, + columns=["agent", "change", "dosage", "s_id"], + index=[17263, 17264, 17265, 17266, 17267, 17268], + ) + + left = df.copy().set_index(["s_id", "dosage", "agent"]).unstack() + + vals = [ + [np.nan, np.nan, 7.07e-06, np.nan, 0.0], + [0.0, -0.00015, np.nan, 2.3614e-05, np.nan], + ] + + idx = MultiIndex( + levels=[[680585148, 680607017], [0.0133]], + codes=[[0, 1], [-1, 0]], + names=["s_id", "dosage"], + ) + + cols = MultiIndex( + levels=[["change"], ["Ag", "Hg", "Pb", "Sn", "U"]], + codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], + names=[None, "agent"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) + tm.assert_frame_equal(left.unstack(), right) + + def test_unstack_nan_index5(self): + # GH9497 - multiple unstack with nulls + df = DataFrame( + { + "1st": [1, 2, 1, 2, 1, 2], + "2nd": date_range("2014-02-01", periods=6, freq="D"), + "jim": 100 + np.arange(6), + "joe": (np.random.randn(6) * 10).round(2), + } + ) + + df["3rd"] = df["2nd"] - pd.Timestamp("2014-02-02") + df.loc[1, "2nd"] = df.loc[3, "2nd"] = np.nan + df.loc[1, "3rd"] = df.loc[4, "3rd"] = np.nan + + left = df.set_index(["1st", "2nd", "3rd"]).unstack(["2nd", "3rd"]) + assert left.notna().values.sum() == 2 * len(df) + + for col in ["jim", "joe"]: + for _, r in df.iterrows(): + key = r["1st"], (col, r["2nd"], r["3rd"]) + assert r[col] == left.loc[key] + + def test_stack_datetime_column_multiIndex(self): + # GH 8039 + t = datetime(2014, 1, 1) + df = DataFrame([1, 2, 3, 4], columns=MultiIndex.from_tuples([(t, "A", "B")])) + result = df.stack() + + eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) + ecols = MultiIndex.from_tuples([(t, "A")]) + expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "multiindex_columns", + [ + [0, 1, 2, 3, 4], + [0, 1, 2, 3], + [0, 1, 2, 4], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [0, 1], + [0, 2], + [0, 3], + [0], + [2], + [4], + [4, 3, 2, 1, 0], + [3, 2, 1, 0], + [4, 2, 1, 0], + [2, 1, 0], + [3, 2, 1], + [4, 3, 2], + [1, 0], + [2, 0], + [3, 0], + ], + ) + @pytest.mark.parametrize("level", (-1, 0, 1, [0, 1], [1, 0])) + def test_stack_partial_multiIndex(self, multiindex_columns, level): + # GH 8844 + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) + multiindex = full_multiindex[multiindex_columns] + df = DataFrame( + np.arange(3 * len(multiindex)).reshape(3, len(multiindex)), + columns=multiindex, + ) + result = df.stack(level=level, dropna=False) + + if isinstance(level, int): + # Stacking a single level should not make any all-NaN rows, + # so df.stack(level=level, dropna=False) should be the same + # as df.stack(level=level, dropna=True). + expected = df.stack(level=level, dropna=True) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + df.columns = MultiIndex.from_tuples( + df.columns.to_numpy(), names=df.columns.names + ) + expected = df.stack(level=level, dropna=False) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + def test_stack_full_multiIndex(self): + # GH 8844 + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) + df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]]) + result = df.stack(dropna=False) + expected = DataFrame( + [[0, 2], [1, np.nan], [3, 5], [4, np.nan]], + index=MultiIndex( + levels=[[0, 1], ["u", "x", "y", "z"]], + codes=[[0, 0, 1, 1], [1, 3, 1, 3]], + names=[None, "Lower"], + ), + columns=Index(["B", "C"], name="Upper"), + ) + expected["B"] = expected["B"].astype(df.dtypes[0]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) + @pytest.mark.parametrize("labels", [list("yxz"), list("yxy")]) + def test_stack_preserve_categorical_dtype(self, ordered, labels): + # GH13854 + cidx = pd.CategoricalIndex(labels, categories=list("xyz"), ordered=ordered) + df = DataFrame([[10, 11, 12]], columns=cidx) + result = df.stack() + + # `MultiIndex.from_product` preserves categorical dtype - + # it's tested elsewhere. + midx = MultiIndex.from_product([df.index, cidx]) + expected = Series([10, 11, 12], index=midx) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) + @pytest.mark.parametrize( + "labels,data", + [ + (list("xyz"), [10, 11, 12, 13, 14, 15]), + (list("zyx"), [14, 15, 12, 13, 10, 11]), + ], + ) + def test_stack_multi_preserve_categorical_dtype(self, ordered, labels, data): + # GH-36991 + cidx = pd.CategoricalIndex(labels, categories=sorted(labels), ordered=ordered) + cidx2 = pd.CategoricalIndex(["u", "v"], ordered=ordered) + midx = MultiIndex.from_product([cidx, cidx2]) + df = DataFrame([sorted(data)], columns=midx) + result = df.stack([0, 1]) + + s_cidx = pd.CategoricalIndex(sorted(labels), ordered=ordered) + expected = Series(data, index=MultiIndex.from_product([[0], s_cidx, cidx2])) + + tm.assert_series_equal(result, expected) + + def test_stack_preserve_categorical_dtype_values(self): + # GH-23077 + cat = pd.Categorical(["a", "a", "b", "c"]) + df = DataFrame({"A": cat, "B": cat}) + result = df.stack() + index = MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) + expected = Series( + pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index, columns", + [ + ([0, 0, 1, 1], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 0, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 1, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), + ], + ) + def test_stack_multi_columns_non_unique_index(self, index, columns): + # GH-28301 + df = DataFrame(index=index, columns=columns).fillna(1) + stacked = df.stack() + new_index = MultiIndex.from_tuples(stacked.index.to_numpy()) + expected = DataFrame( + stacked.to_numpy(), index=new_index, columns=stacked.columns + ) + tm.assert_frame_equal(stacked, expected) + stacked_codes = np.asarray(stacked.index.codes) + expected_codes = np.asarray(new_index.codes) + tm.assert_numpy_array_equal(stacked_codes, expected_codes) + + @pytest.mark.parametrize("level", [0, 1]) + def test_unstack_mixed_extension_types(self, level): + index = MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 1)], names=["a", "b"]) + df = DataFrame( + { + "A": pd.array([0, 1, None], dtype="Int64"), + "B": pd.Categorical(["a", "a", "b"]), + }, + index=index, + ) + + result = df.unstack(level=level) + expected = df.astype(object).unstack(level=level) + + expected_dtypes = Series( + [df.A.dtype] * 2 + [df.B.dtype] * 2, index=result.columns + ) + tm.assert_series_equal(result.dtypes, expected_dtypes) + tm.assert_frame_equal(result.astype(object), expected) + + @pytest.mark.parametrize("level", [0, "baz"]) + def test_unstack_swaplevel_sortlevel(self, level): + # GH 20994 + mi = MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) + df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) + df.columns.name = "foo" + + expected = DataFrame( + [[3, 1, 2, 0]], + columns=MultiIndex.from_tuples( + [("c", "A"), ("c", "B"), ("d", "A"), ("d", "B")], names=["baz", "foo"] + ), + ) + expected.index.name = "bar" + + result = df.unstack().swaplevel(axis=1).sort_index(axis=1, level=level) + tm.assert_frame_equal(result, expected) + + +def test_unstack_fill_frame_object(): + # GH12815 Test unstacking with object. + data = Series(["a", "b", "c", "a"], dtype="object") + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = DataFrame( + {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + # Fill with any value replaces missing values as expected + result = data.unstack(fill_value="d") + expected = DataFrame( + {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + +def test_unstack_timezone_aware_values(): + # GH 18338 + df = DataFrame( + { + "timestamp": [pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC")], + "a": ["a"], + "b": ["b"], + "c": ["c"], + }, + columns=["timestamp", "a", "b", "c"], + ) + result = df.set_index(["a", "b"]).unstack() + expected = DataFrame( + [[pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC"), "c"]], + index=Index(["a"], name="a"), + columns=MultiIndex( + levels=[["timestamp", "c"], ["b"]], + codes=[[0, 1], [0, 0]], + names=[None, "b"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_stack_timezone_aware_values(): + # GH 19420 + ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York") + df = DataFrame({"A": ts}, index=["a", "b", "c"]) + result = df.stack() + expected = Series( + ts, + index=MultiIndex(levels=[["a", "b", "c"], ["A"]], codes=[[0, 1, 2], [0, 0, 0]]), + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_stack_empty_frame(dropna): + # GH 36113 + expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + result = DataFrame(dtype=np.float64).stack(dropna=dropna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("fill_value", [None, 0]) +def test_stack_unstack_empty_frame(dropna, fill_value): + # GH 36113 + result = ( + DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value) + ) + expected = DataFrame(dtype=np.int64) + tm.assert_frame_equal(result, expected) + + +def test_unstack_single_index_series(): + # GH 36113 + msg = r"index must be a MultiIndex to unstack.*" + with pytest.raises(ValueError, match=msg): + Series(dtype=np.int64).unstack() + + +def test_unstacking_multi_index_df(): + # see gh-30740 + df = DataFrame( + { + "name": ["Alice", "Bob"], + "score": [9.5, 8], + "employed": [False, True], + "kids": [0, 0], + "gender": ["female", "male"], + } + ) + df = df.set_index(["name", "employed", "kids", "gender"]) + df = df.unstack(["gender"], fill_value=0) + expected = df.unstack("employed", fill_value=0).unstack("kids", fill_value=0) + result = df.unstack(["employed", "kids"], fill_value=0) + expected = DataFrame( + [[9.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 8.0]], + index=Index(["Alice", "Bob"], name="name"), + columns=MultiIndex.from_tuples( + [ + ("score", "female", False, 0), + ("score", "female", True, 0), + ("score", "male", False, 0), + ("score", "male", True, 0), + ], + names=[None, "gender", "employed", "kids"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_stack_positional_level_duplicate_column_names(): + # https://github.com/pandas-dev/pandas/issues/36353 + columns = MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"]) + df = DataFrame([[1, 1, 1, 1]], columns=columns) + result = df.stack(0) + + new_columns = Index(["y", "z"], name="a") + new_index = MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) + + tm.assert_frame_equal(result, expected) + + +def test_unstack_non_slice_like_blocks(using_array_manager): + # Case where the mgr_locs of a DataFrame's underlying blocks are not slice-like + + mi = MultiIndex.from_product([range(5), ["A", "B", "C"]]) + df = DataFrame(np.random.randn(15, 4), index=mi) + df[1] = df[1].astype(np.int64) + if not using_array_manager: + assert any(not x.mgr_locs.is_slice_like for x in df._mgr.blocks) + + res = df.unstack() + + expected = pd.concat([df[n].unstack() for n in range(4)], keys=range(4), axis=1) + tm.assert_frame_equal(res, expected) + + +class TestStackUnstackMultiLevel: + def test_unstack(self, multiindex_year_month_day_dataframe_random_data): + # just check that it works for now + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack() + unstacked.unstack() + + # test that ints work + ymd.astype(int).unstack() + + # test that int32 work + ymd.astype(np.int32).unstack() + + @pytest.mark.parametrize( + "result_rows,result_columns,index_product,expected_row", + [ + ( + [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], + ["ix1", "ix2", "col1", "col2", "col3", "col4"], + 2, + [None, None, 30.0, None], + ), + ( + [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + 2, + [None, None, 30.0], + ), + ( + [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + None, + [None, None, 30.0], + ), + ], + ) + def test_unstack_partial( + self, result_rows, result_columns, index_product, expected_row + ): + # check for regressions on this issue: + # https://github.com/pandas-dev/pandas/issues/19351 + # make sure DataFrame.unstack() works when its run on a subset of the DataFrame + # and the Index levels contain values that are not present in the subset + result = DataFrame(result_rows, columns=result_columns).set_index( + ["ix1", "ix2"] + ) + result = result.iloc[1:2].unstack("ix2") + expected = DataFrame( + [expected_row], + columns=MultiIndex.from_product( + [result_columns[2:], [index_product]], names=[None, "ix2"] + ), + index=Index([2], name="ix1"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_multiple_no_empty_columns(self): + index = MultiIndex.from_tuples( + [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] + ) + + s = Series(np.random.randn(4), index=index) + + unstacked = s.unstack([1, 2]) + expected = unstacked.dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + def test_stack(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + # regular roundtrip + unstacked = ymd.unstack() + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, ymd) + + unlexsorted = ymd.sort_index(level=2) + + unstacked = unlexsorted.unstack(2) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + unlexsorted = unlexsorted[::-1] + unstacked = unlexsorted.unstack(1) + restacked = unstacked.stack().swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + unlexsorted = unlexsorted.swaplevel(0, 1) + unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) + restacked = unstacked.stack(0).swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), ymd) + + # columns unsorted + unstacked = ymd.unstack() + unstacked = unstacked.sort_index(axis=1, ascending=False) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, ymd) + + # more than 2 levels in the columns + unstacked = ymd.unstack(1).unstack(1) + + result = unstacked.stack(1) + expected = ymd.unstack() + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(2) + expected = ymd.unstack(1) + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(0) + expected = ymd.stack().unstack(1).unstack(1) + tm.assert_frame_equal(result, expected) + + # not all levels present in each echelon + unstacked = ymd.unstack(2).loc[:, ::3] + stacked = unstacked.stack().stack() + ymd_stacked = ymd.stack() + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + + # stack with negative number + result = ymd.unstack(0).stack(-2) + expected = ymd.unstack(0).stack(0) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "idx, columns, exp_idx", + [ + [ + list("abab"), + ["1st", "2nd", "3rd"], + MultiIndex( + levels=[["a", "b"], ["1st", "2nd", "3rd"]], + codes=[ + np.tile(np.arange(2).repeat(3), 2), + np.tile(np.arange(3), 4), + ], + ), + ], + [ + list("abab"), + ["1st", "2nd", "1st"], + MultiIndex( + levels=[["a", "b"], ["1st", "2nd"]], + codes=[np.tile(np.arange(2).repeat(3), 2), np.tile([0, 1, 0], 4)], + ), + ], + [ + MultiIndex.from_tuples((("a", 2), ("b", 1), ("a", 1), ("b", 2))), + ["1st", "2nd", "1st"], + MultiIndex( + levels=[["a", "b"], [1, 2], ["1st", "2nd"]], + codes=[ + np.tile(np.arange(2).repeat(3), 2), + np.repeat([1, 0, 1], [3, 6, 3]), + np.tile([0, 1, 0], 4), + ], + ), + ], + ], + ) + def test_stack_duplicate_index(self, idx, columns, exp_idx): + # GH10417 + df = DataFrame( + np.arange(12).reshape(4, 3), + index=idx, + columns=columns, + ) + result = df.stack() + expected = Series(np.arange(12), index=exp_idx) + tm.assert_series_equal(result, expected) + assert result.index.is_unique is False + li, ri = result.index, expected.index + tm.assert_index_equal(li, ri) + + def test_unstack_odd_failure(self): + data = """day,time,smoker,sum,len +Fri,Dinner,No,8.25,3. +Fri,Dinner,Yes,27.03,9 +Fri,Lunch,No,3.0,1 +Fri,Lunch,Yes,13.68,6 +Sat,Dinner,No,139.63,45 +Sat,Dinner,Yes,120.77,42 +Sun,Dinner,No,180.57,57 +Sun,Dinner,Yes,66.82,19 +Thu,Dinner,No,3.0,1 +Thu,Lunch,No,117.32,44 +Thu,Lunch,Yes,51.51,17""" + + df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"]) + + # it works, #2100 + result = df.unstack(2) + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + def test_stack_mixed_dtype(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + df = frame.T + df["foo", "four"] = "foo" + df = df.sort_index(level=1, axis=1) + + stacked = df.stack() + result = df["foo"].stack().sort_index() + tm.assert_series_equal(stacked["foo"], result, check_names=False) + assert result.name is None + assert stacked["bar"].dtype == np.float_ + + def test_unstack_bug(self): + df = DataFrame( + { + "state": ["naive", "naive", "naive", "active", "active", "active"], + "exp": ["a", "b", "b", "b", "a", "a"], + "barcode": [1, 2, 3, 4, 1, 3], + "v": ["hi", "hi", "bye", "bye", "bye", "peace"], + "extra": np.arange(6.0), + } + ) + + result = df.groupby(["state", "exp", "barcode", "v"]).apply(len) + + unstacked = result.unstack() + restacked = unstacked.stack() + tm.assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) + + def test_stack_unstack_preserve_names(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + unstacked = frame.unstack() + assert unstacked.index.name == "first" + assert unstacked.columns.names == ["exp", "second"] + + restacked = unstacked.stack() + assert restacked.index.names == frame.index.names + + @pytest.mark.parametrize("method", ["stack", "unstack"]) + def test_stack_unstack_wrong_level_name( + self, method, multiindex_dataframe_random_data + ): + # GH 18303 - wrong level name should raise + frame = multiindex_dataframe_random_data + + # A DataFrame with flat axes: + df = frame.loc["foo"] + + with pytest.raises(KeyError, match="does not match index name"): + getattr(df, method)("mistake") + + if method == "unstack": + # Same on a Series: + s = df.iloc[:, 0] + with pytest.raises(KeyError, match="does not match index name"): + getattr(s, method)("mistake") + + def test_unstack_level_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.unstack("second") + expected = frame.unstack(level=1) + tm.assert_frame_equal(result, expected) + + def test_stack_level_name(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + unstacked = frame.unstack("second") + result = unstacked.stack("exp") + expected = frame.unstack().stack(0) + tm.assert_frame_equal(result, expected) + + result = frame.stack("exp") + expected = frame.stack() + tm.assert_series_equal(result, expected) + + def test_stack_unstack_multiple( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + expected = ymd.unstack("year").unstack("month") + tm.assert_frame_equal(unstacked, expected) + assert unstacked.columns.names == expected.columns.names + + # series + s = ymd["A"] + s_unstacked = s.unstack(["year", "month"]) + tm.assert_frame_equal(s_unstacked, expected["A"]) + + restacked = unstacked.stack(["year", "month"]) + restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) + restacked = restacked.sort_index(level=0) + + tm.assert_frame_equal(restacked, ymd) + assert restacked.index.names == ymd.index.names + + # GH #451 + unstacked = ymd.unstack([1, 2]) + expected = ymd.unstack(1).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + unstacked = ymd.unstack([2, 1]) + expected = ymd.unstack(2).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + + def test_stack_names_and_numbers( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + + # Can't use mixture of names and numbers to stack + with pytest.raises(ValueError, match="level should contain"): + unstacked.stack([0, "month"]) + + def test_stack_multiple_out_of_bounds( + self, multiindex_year_month_day_dataframe_random_data + ): + # nlevels == 3 + ymd = multiindex_year_month_day_dataframe_random_data + + unstacked = ymd.unstack(["year", "month"]) + + with pytest.raises(IndexError, match="Too many levels"): + unstacked.stack([2, 3]) + with pytest.raises(IndexError, match="not a valid level number"): + unstacked.stack([-4, -3]) + + def test_unstack_period_series(self): + # GH4342 + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period", + ) + idx2 = Index(["A", "B"] * 3, name="str") + value = [1, 2, 3, 4, 5, 6] + + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period" + ) + expected = DataFrame( + {"A": [1, 3, 5], "B": [2, 4, 6]}, index=e_idx, columns=["A", "B"] + ) + expected.columns.name = "str" + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period1", + ) + + idx2 = pd.PeriodIndex( + ["2013-12", "2013-11", "2013-10", "2013-09", "2013-08", "2013-07"], + freq="M", + name="period2", + ) + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period1" + ) + e_cols = pd.PeriodIndex( + ["2013-07", "2013-08", "2013-09", "2013-10", "2013-11", "2013-12"], + freq="M", + name="period2", + ) + expected = DataFrame( + [ + [np.nan, np.nan, np.nan, np.nan, 2, 1], + [np.nan, np.nan, 4, 3, np.nan, np.nan], + [6, 5, np.nan, np.nan, np.nan, np.nan], + ], + index=e_idx, + columns=e_cols, + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + def test_unstack_period_frame(self): + # GH4342 + idx1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-02", "2014-02", "2014-01", "2014-01"], + freq="M", + name="period1", + ) + idx2 = pd.PeriodIndex( + ["2013-12", "2013-12", "2014-02", "2013-10", "2013-10", "2014-02"], + freq="M", + name="period2", + ) + value = {"A": [1, 2, 3, 4, 5, 6], "B": [6, 5, 4, 3, 2, 1]} + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame(value, index=idx) + + result1 = df.unstack() + result2 = df.unstack(level=1) + result3 = df.unstack(level=0) + + e_1 = pd.PeriodIndex(["2014-01", "2014-02"], freq="M", name="period1") + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02", "2013-10", "2013-12", "2014-02"], + freq="M", + name="period2", + ) + e_cols = MultiIndex.from_arrays(["A A A B B B".split(), e_2]) + expected = DataFrame( + [[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + e_1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-01", "2014-02"], freq="M", name="period1" + ) + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02"], freq="M", name="period2" + ) + e_cols = MultiIndex.from_arrays(["A A B B".split(), e_1]) + expected = DataFrame( + [[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols + ) + + tm.assert_frame_equal(result3, expected) + + def test_stack_multiple_bug(self): + # bug when some uniques are not present in the data GH#3170 + id_col = ([1] * 3) + ([2] * 3) + name = (["a"] * 3) + (["b"] * 3) + date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2) + var1 = np.random.randint(0, 100, 6) + df = DataFrame({"ID": id_col, "NAME": name, "DATE": date, "VAR1": var1}) + + multi = df.set_index(["DATE", "ID"]) + multi.columns.name = "Params" + unst = multi.unstack("ID") + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + down = unst.resample("W-THU").mean() + + rs = down.stack("ID") + xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") + xp.columns.name = "Params" + tm.assert_frame_equal(rs, xp) + + def test_stack_dropna(self): + # GH#3997 + df = DataFrame({"A": ["a1", "a2"], "B": ["b1", "b2"], "C": [1, 1]}) + df = df.set_index(["A", "B"]) + + stacked = df.unstack().stack(dropna=False) + assert len(stacked) > len(stacked.dropna()) + + stacked = df.unstack().stack(dropna=True) + tm.assert_frame_equal(stacked, stacked.dropna()) + + def test_unstack_multiple_hierarchical(self): + df = DataFrame( + index=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + columns=[[0, 0, 1, 1], [0, 1, 0, 1]], + ) + + df.index.names = ["a", "b", "c"] + df.columns.names = ["d", "e"] + + # it works! + df.unstack(["b", "c"]) + + def test_unstack_sparse_keyspace(self): + # memory problems with naive impl GH#2278 + # Generate Long File & Test Pivot + NUM_ROWS = 1000 + + df = DataFrame( + { + "A": np.random.randint(100, size=NUM_ROWS), + "B": np.random.randint(300, size=NUM_ROWS), + "C": np.random.randint(-7, 7, size=NUM_ROWS), + "D": np.random.randint(-19, 19, size=NUM_ROWS), + "E": np.random.randint(3000, size=NUM_ROWS), + "F": np.random.randn(NUM_ROWS), + } + ) + + idf = df.set_index(["A", "B", "C", "D", "E"]) + + # it works! is sufficient + idf.unstack("E") + + def test_unstack_unobserved_keys(self): + # related to GH#2278 refactoring + levels = [[0, 1], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + + index = MultiIndex(levels, codes) + + df = DataFrame(np.random.randn(4, 2), index=index) + + result = df.unstack() + assert len(result.columns) == 4 + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + @pytest.mark.slow + def test_unstack_number_of_levels_larger_than_int32(self, monkeypatch): + # GH#20601 + # GH 26314: Change ValueError to PerformanceWarning + + class MockUnstacker(reshape_lib._Unstacker): + def __init__(self, *args, **kwargs) -> None: + # __init__ will raise the warning + super().__init__(*args, **kwargs) + raise Exception("Don't compute final result.") + + with monkeypatch.context() as m: + m.setattr(reshape_lib, "_Unstacker", MockUnstacker) + df = DataFrame( + np.random.randn(2**16, 2), + index=[np.arange(2**16), np.arange(2**16)], + ) + msg = "The following operation may generate" + with tm.assert_produces_warning(PerformanceWarning, match=msg): + with pytest.raises(Exception, match="Don't compute final result."): + df.unstack() + + @pytest.mark.parametrize( + "levels", + itertools.chain.from_iterable( + itertools.product(itertools.permutations([0, 1, 2], width), repeat=2) + for width in [2, 3] + ), + ) + @pytest.mark.parametrize("stack_lev", range(2)) + def test_stack_order_with_unsorted_levels(self, levels, stack_lev): + # GH#16323 + # deep check for 1-row case + columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + df_stacked = df.stack(stack_lev) + assert all( + df.loc[row, col] + == df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]] + for row in df.index + for col in df.columns + ) + + def test_stack_order_with_unsorted_levels_multi_row(self): + # GH#16323 + + # check multi-row case + mi = MultiIndex( + levels=[["A", "C", "B"], ["B", "A", "C"]], + codes=[np.repeat(range(3), 3), np.tile(range(3), 3)], + ) + df = DataFrame( + columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1) + ) + assert all( + df.loc[row, col] == df.stack(0).loc[(row, col[0]), col[1]] + for row in df.index + for col in df.columns + ) + + def test_stack_unstack_unordered_multiindex(self): + # GH# 18265 + values = np.arange(5) + data = np.vstack( + [ + [f"b{x}" for x in values], # b0, b1, .. + [f"a{x}" for x in values], # a0, a1, .. + ] + ) + df = DataFrame(data.T, columns=["b", "a"]) + df.columns.name = "first" + second_level_dict = {"x": df} + multi_level_df = pd.concat(second_level_dict, axis=1) + multi_level_df.columns.names = ["second", "first"] + df = multi_level_df.reindex(sorted(multi_level_df.columns), axis=1) + result = df.stack(["first", "second"]).unstack(["first", "second"]) + expected = DataFrame( + [["a0", "b0"], ["a1", "b1"], ["a2", "b2"], ["a3", "b3"], ["a4", "b4"]], + index=[0, 1, 2, 3, 4], + columns=MultiIndex.from_tuples( + [("a", "x"), ("b", "x")], names=["first", "second"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_preserve_types( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH#403 + ymd = multiindex_year_month_day_dataframe_random_data + ymd["E"] = "foo" + ymd["F"] = 2 + + unstacked = ymd.unstack("month") + assert unstacked["A", 1].dtype == np.float64 + assert unstacked["E", 1].dtype == np.object_ + assert unstacked["F", 1].dtype == np.float64 + + def test_unstack_group_index_overflow(self): + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + index = MultiIndex( + levels=[level] * 8 + [[0, 1]], + codes=[codes] * 8 + [np.arange(2).repeat(500)], + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack() + assert result.shape == (500, 2) + + # test roundtrip + stacked = result.stack() + tm.assert_series_equal(s, stacked.reindex(s.index)) + + # put it at beginning + index = MultiIndex( + levels=[[0, 1]] + [level] * 8, + codes=[np.arange(2).repeat(500)] + [codes] * 8, + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(0) + assert result.shape == (500, 2) + + # put it in middle + index = MultiIndex( + levels=[level] * 4 + [[0, 1]] + [level] * 4, + codes=([codes] * 4 + [np.arange(2).repeat(500)] + [codes] * 4), + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(4) + assert result.shape == (500, 2) + + def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): + # https://github.com/pandas-dev/pandas/issues/37115 + df = DataFrame( + { + "a": ["A", "A", "B"], + "b": ["ca", "cb", "cb"], + "v": [10] * 3, + } + ).set_index(["a", "b"]) + + # add another int column to get 2 blocks + df["is_"] = 1 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + result = df.unstack("b") + result[("is_", "ca")] = result[("is_", "ca")].fillna(0) + + expected = DataFrame( + [[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]], + index=Index(["A", "B"], dtype="object", name="a"), + columns=MultiIndex.from_tuples( + [("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")], + names=[None, "b"], + ), + ) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + expected[("v", "cb")] = expected[("v", "cb")].astype("int64") + expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64") + tm.assert_frame_equal(result, expected) + + def test_unstack_with_level_has_nan(self): + # GH 37510 + df1 = DataFrame( + { + "L1": [1, 2, 3, 4], + "L2": [3, 4, 1, 2], + "L3": [1, 1, 1, 1], + "x": [1, 2, 3, 4], + } + ) + df1 = df1.set_index(["L1", "L2", "L3"]) + new_levels = ["n1", "n2", "n3", None] + df1.index = df1.index.set_levels(levels=new_levels, level="L1") + df1.index = df1.index.set_levels(levels=new_levels, level="L2") + + result = df1.unstack("L3")[("x", 1)].sort_index().index + expected = MultiIndex( + levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], + codes=[[0, 1, 2, 3], [2, 3, 0, 1]], + names=["L1", "L2"], + ) + + tm.assert_index_equal(result, expected) + + def test_stack_nan_in_multiindex_columns(self): + # GH#39481 + df = DataFrame( + np.zeros([1, 5]), + columns=MultiIndex.from_tuples( + [ + (0, None, None), + (0, 2, 0), + (0, 2, 1), + (0, 3, 0), + (0, 3, 1), + ], + ), + ) + result = df.stack(2) + expected = DataFrame( + [[0.0, np.nan, np.nan], [np.nan, 0.0, 0.0], [np.nan, 0.0, 0.0]], + index=Index([(0, None), (0, 0), (0, 1)]), + columns=Index([(0, None), (0, 2), (0, 3)]), + ) + tm.assert_frame_equal(result, expected) + + def test_multi_level_stack_categorical(self): + # GH 15239 + midx = MultiIndex.from_arrays( + [ + ["A"] * 2 + ["B"] * 2, + pd.Categorical(list("abab")), + pd.Categorical(list("ccdd")), + ] + ) + df = DataFrame(np.arange(8).reshape(2, 4), columns=midx) + result = df.stack([1, 2]) + expected = DataFrame( + [ + [0, np.nan], + [np.nan, 2], + [1, np.nan], + [np.nan, 3], + [4, np.nan], + [np.nan, 6], + [5, np.nan], + [np.nan, 7], + ], + columns=["A", "B"], + index=MultiIndex.from_arrays( + [ + [0] * 4 + [1] * 4, + pd.Categorical(list("aabbaabb")), + pd.Categorical(list("cdcdcdcd")), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_stack_nan_level(self): + # GH 9406 + df_nan = DataFrame( + np.arange(4).reshape(2, 2), + columns=MultiIndex.from_tuples( + [("A", np.nan), ("B", "b")], names=["Upper", "Lower"] + ), + index=Index([0, 1], name="Num"), + dtype=np.float64, + ) + result = df_nan.stack() + expected = DataFrame( + [[0.0, np.nan], [np.nan, 1], [2.0, np.nan], [np.nan, 3.0]], + columns=Index(["A", "B"], name="Upper"), + index=MultiIndex.from_tuples( + [(0, np.nan), (0, "b"), (1, np.nan), (1, "b")], names=["Num", "Lower"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_categorical_columns(self): + # GH 14018 + idx = MultiIndex.from_product([["A"], [0, 1]]) + df = DataFrame({"cat": pd.Categorical(["a", "b"])}, index=idx) + result = df.unstack() + expected = DataFrame( + { + 0: pd.Categorical(["a"], categories=["a", "b"]), + 1: pd.Categorical(["b"], categories=["a", "b"]), + }, + index=["A"], + ) + expected.columns = MultiIndex.from_tuples([("cat", 0), ("cat", 1)]) + tm.assert_frame_equal(result, expected) + + def test_stack_unsorted(self): + # GH 16925 + PAE = ["ITA", "FRA"] + VAR = ["A1", "A2"] + TYP = ["CRT", "DBT", "NET"] + MI = MultiIndex.from_product([PAE, VAR, TYP], names=["PAE", "VAR", "TYP"]) + + V = list(range(len(MI))) + DF = DataFrame(data=V, index=MI, columns=["VALUE"]) + + DF = DF.unstack(["VAR", "TYP"]) + DF.columns = DF.columns.droplevel(0) + DF.loc[:, ("A0", "NET")] = 9999 + + result = DF.stack(["VAR", "TYP"]).sort_index() + expected = DF.sort_index(axis=1).stack(["VAR", "TYP"]).sort_index() + tm.assert_series_equal(result, expected) + + def test_stack_nullable_dtype(self): + # GH#43561 + columns = MultiIndex.from_product( + [["54511", "54515"], ["r", "t_mean"]], names=["station", "element"] + ) + index = Index([1, 2, 3], name="time") + + arr = np.array([[50, 226, 10, 215], [10, 215, 9, 220], [305, 232, 111, 220]]) + df = DataFrame(arr, columns=columns, index=index, dtype=pd.Int64Dtype()) + + result = df.stack("station") + + expected = df.astype(np.int64).stack("station").astype(pd.Int64Dtype()) + tm.assert_frame_equal(result, expected) + + # non-homogeneous case + df[df.columns[0]] = df[df.columns[0]].astype(pd.Float64Dtype()) + result = df.stack("station") + + # TODO(EA2D): we get object dtype because DataFrame.values can't + # be an EA + expected = df.astype(object).stack("station") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py new file mode 100644 index 00000000..d5331b10 --- /dev/null +++ b/pandas/tests/frame/test_subclass.py @@ -0,0 +1,747 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture() +def gpd_style_subclass_df(): + class SubclassedDataFrame(DataFrame): + @property + def _constructor(self): + return SubclassedDataFrame + + return SubclassedDataFrame({"a": [1, 2, 3]}) + + +class TestDataFrameSubclassing: + def test_frame_subclassing_and_slicing(self): + # Subclass frame and ensure it returns the right class on slicing it + # In reference to PR 9632 + + class CustomSeries(Series): + @property + def _constructor(self): + return CustomSeries + + def custom_series_function(self): + return "OK" + + class CustomDataFrame(DataFrame): + """ + Subclasses pandas DF, fills DF with simulation results, adds some + custom plotting functions. + """ + + def __init__(self, *args, **kw) -> None: + super().__init__(*args, **kw) + + @property + def _constructor(self): + return CustomDataFrame + + _constructor_sliced = CustomSeries + + def custom_frame_function(self): + return "OK" + + data = {"col1": range(10), "col2": range(10)} + cdf = CustomDataFrame(data) + + # Did we get back our own DF class? + assert isinstance(cdf, CustomDataFrame) + + # Do we get back our own Series class after selecting a column? + cdf_series = cdf.col1 + assert isinstance(cdf_series, CustomSeries) + assert cdf_series.custom_series_function() == "OK" + + # Do we get back our own DF class after slicing row-wise? + cdf_rows = cdf[1:5] + assert isinstance(cdf_rows, CustomDataFrame) + assert cdf_rows.custom_frame_function() == "OK" + + # Make sure sliced part of multi-index frame is custom class + mcol = MultiIndex.from_tuples([("A", "A"), ("A", "B")]) + cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi["A"], CustomDataFrame) + + mcol = MultiIndex.from_tuples([("A", ""), ("B", "")]) + cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi2["A"], CustomSeries) + + def test_dataframe_metadata(self): + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"] + ) + df.testattr = "XXX" + + assert df.testattr == "XXX" + assert df[["X"]].testattr == "XXX" + assert df.loc[["a", "b"], :].testattr == "XXX" + assert df.iloc[[0, 1], :].testattr == "XXX" + + # see gh-9776 + assert df.iloc[0:1, :].testattr == "XXX" + + # see gh-10553 + unpickled = tm.round_trip_pickle(df) + tm.assert_frame_equal(df, unpickled) + assert df._metadata == unpickled._metadata + assert df.testattr == unpickled.testattr + + def test_indexing_sliced(self): + # GH 11559 + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"] + ) + res = df.loc[:, "X"] + exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[:, 1] + exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc[:, "Z"] + exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["a", :] + exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[1, :] + exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["c", :] + exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + def test_subclass_attr_err_propagation(self): + # GH 11808 + class A(DataFrame): + @property + def bar(self): + return self.i_dont_exist + + with pytest.raises(AttributeError, match=".*i_dont_exist.*"): + A().bar + + def test_subclass_align(self): + # GH 12983 + df1 = tm.SubclassedDataFrame( + {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE") + ) + df2 = tm.SubclassedDataFrame( + {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD") + ) + + res1, res2 = df1.align(df2, axis=0) + exp1 = tm.SubclassedDataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = tm.SubclassedDataFrame( + {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]}, + index=list("ABCDE"), + ) + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp2) + + res1, res2 = df1.a.align(df2.c) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp1.a) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2.c) + + def test_subclass_align_combinations(self): + # GH 12983 + df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = tm.SubclassedDataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + # name is lost when + exp2 = tm.SubclassedSeries( + [1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x" + ) + + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp2) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp1) + + def test_subclass_iterrows(self): + # GH 13977 + df = tm.SubclassedDataFrame({"a": [1]}) + for i, row in df.iterrows(): + assert isinstance(row, tm.SubclassedSeries) + tm.assert_series_equal(row, df.loc[i]) + + def test_subclass_stack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_stack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_stack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12.0], + [11, 13.0], + [20, 22.0], + [21, 23.0], + [30, 32.0], + [31, 33.0], + [40, 42.0], + [41, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10.0, 11.0], + [12.0, 13.0], + [20.0, 21.0], + [22.0, 23.0], + [30.0, 31.0], + [32.0, 33.0], + [40.0, 41.0], + [42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_unstack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0], + [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0], + ], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0], + [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0], + ], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_pivot(self): + # GH 15564 + df = tm.SubclassedDataFrame( + { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + ) + + pivoted = df.pivot(index="index", columns="columns", values="values") + + expected = tm.SubclassedDataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + + tm.assert_frame_equal(pivoted, expected) + + def test_subclassed_melt(self): + # GH 15564 + cheese = tm.SubclassedDataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) + + melted = pd.melt(cheese, id_vars=["first", "last"]) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + tm.assert_frame_equal(melted, expected) + + def test_subclassed_wide_to_long(self): + # GH 9762 + + np.random.seed(123) + x = np.random.randn(3) + df = tm.SubclassedDataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = tm.SubclassedDataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") + + tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def stretch(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 6.0], + ["Mary", "Bo", "height", 6.5], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + result = df.apply(lambda x: stretch(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:.*None will no longer:FutureWarning") + def test_subclassed_reductions(self, all_reductions): + # GH 25596 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = getattr(df, all_reductions)() + assert isinstance(result, tm.SubclassedSeries) + + def test_subclassed_count(self): + + df = tm.SubclassedDataFrame( + { + "Person": ["John", "Myla", "Lewis", "John", "Myla"], + "Age": [24.0, np.nan, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]}) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + with tm.assert_produces_warning(FutureWarning): + result = df.count(level=1) + assert isinstance(result, tm.SubclassedDataFrame) + + df = tm.SubclassedDataFrame() + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + def test_isin(self): + + df = tm.SubclassedDataFrame( + {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"] + ) + result = df.isin([0, 2]) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_duplicated(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame() + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + @pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"]) + def test_idx(self, idx_method): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = getattr(df, idx_method)() + assert isinstance(result, tm.SubclassedSeries) + + def test_dot(self): + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedSeries([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedDataFrame([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_memory_usage(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.memory_usage() + assert isinstance(result, tm.SubclassedSeries) + + result = df.memory_usage(index=False) + assert isinstance(result, tm.SubclassedSeries) + + @td.skip_if_no_scipy + def test_corrwith(self): + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = tm.SubclassedDataFrame( + np.random.randn(5, 4), index=index, columns=columns + ) + df2 = tm.SubclassedDataFrame( + np.random.randn(4, 4), index=index[:4], columns=columns + ) + correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") + + assert isinstance(correls, (tm.SubclassedSeries)) + + def test_asof(self): + + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + df = tm.SubclassedDataFrame( + { + "A": [np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan], + }, + index=rng, + ) + + result = df.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedDataFrame) + + result = df.asof(rng[-2]) + assert isinstance(result, tm.SubclassedSeries) + + result = df.asof("1989-12-31") + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmin_preserves_subclass(self): + # GH 28330 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmin() + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmax_preserves_subclass(self): + # GH 28330 + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmax() + assert isinstance(result, tm.SubclassedSeries) + + def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): + # GH 43668 + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.convert_dtypes() + assert isinstance(result, tm.SubclassedDataFrame) + + result = gpd_style_subclass_df.convert_dtypes() + assert isinstance(result, type(gpd_style_subclass_df)) + + def test_astype_preserves_subclass(self): + # GH#40810 + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + + result = df.astype({"A": np.int64, "B": np.int32, "C": np.float64}) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_equals_subclass(self): + # https://github.com/pandas-dev/pandas/pull/34402 + # allow subclass in both directions + df1 = DataFrame({"a": [1, 2, 3]}) + df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]}) + assert df1.equals(df2) + assert df2.equals(df1) + + def test_replace_list_method(self): + # https://github.com/pandas-dev/pandas/pull/46018 + df = tm.SubclassedDataFrame({"A": [0, 1, 2]}) + result = df.replace([1, 2], method="ffill") + expected = tm.SubclassedDataFrame({"A": [0, 0, 0]}) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py new file mode 100644 index 00000000..b8842243 --- /dev/null +++ b/pandas/tests/frame/test_ufunc.py @@ -0,0 +1,303 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.compat.numpy import np_version_gte1p22 +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_extension_array_dtype + +dtypes = [ + "int64", + "Int64", + {"A": "int64", "B": "Int64"}, +] + + +@pytest.mark.parametrize("dtype", dtypes) +def test_unary_unary(dtype): + # unary input, unary output + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result = np.positive(df) + expected = pd.DataFrame( + np.positive(values), index=df.index, columns=df.columns + ).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_unary_binary(request, dtype): + # unary input, binary output + if is_extension_array_dtype(dtype) or isinstance(dtype, dict): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple outputs not implemented." + ) + ) + + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result_pandas = np.modf(df) + assert isinstance(result_pandas, tuple) + assert len(result_pandas) == 2 + expected_numpy = np.modf(values) + + for result, b in zip(result_pandas, expected_numpy): + expected = pd.DataFrame(b, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_binary_input_dispatch_binop(dtype): + # binop ufuncs are dispatched to our dunder methods. + values = np.array([[-1, -1], [1, 1]], dtype="int64") + df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype) + result = np.add(df, df) + expected = pd.DataFrame( + np.add(values, values), index=df.index, columns=df.columns + ).astype(dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func,arg,expected", + [ + (np.add, 1, [2, 3, 4, 5]), + ( + partial(np.add, where=[[False, True], [True, False]]), + np.array([[1, 1], [1, 1]]), + [0, 3, 4, 0], + ), + (np.power, np.array([[1, 1], [2, 2]]), [1, 2, 9, 16]), + (np.subtract, 2, [-1, 0, 1, 2]), + ( + partial(np.negative, where=np.array([[False, True], [True, False]])), + None, + [0, -2, -3, 0], + ), + ], +) +def test_ufunc_passes_args(func, arg, expected): + # GH#40662 + arr = np.array([[1, 2], [3, 4]]) + df = pd.DataFrame(arr) + result_inplace = np.zeros_like(arr) + # 1-argument ufunc + if arg is None: + result = func(df, out=result_inplace) + else: + result = func(df, arg, out=result_inplace) + + expected = np.array(expected).reshape(2, 2) + tm.assert_numpy_array_equal(result_inplace, expected) + + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype_a", dtypes) +@pytest.mark.parametrize("dtype_b", dtypes) +def test_binary_input_aligns_columns(request, dtype_a, dtype_b): + if ( + is_extension_array_dtype(dtype_a) + or isinstance(dtype_a, dict) + or is_extension_array_dtype(dtype_b) + or isinstance(dtype_b, dict) + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple inputs not implemented." + ) + ) + + df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}).astype(dtype_a) + + if isinstance(dtype_a, dict) and isinstance(dtype_b, dict): + dtype_b["C"] = dtype_b.pop("B") + + df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b) + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3, np.nan], [2, 4, np.nan]]), + # np.array([[1, np.nan, 3], [2, np.nan, 4]]), + # ) + # expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) + expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", dtypes) +def test_binary_input_aligns_index(request, dtype): + if is_extension_array_dtype(dtype) or isinstance(dtype, dict): + request.node.add_marker( + pytest.mark.xfail( + reason="Extension / mixed with multiple inputs not implemented." + ) + ) + df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype) + df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype) + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3], [3, 4], [np.nan, np.nan]]), + # np.array([[1, 3], [np.nan, np.nan], [3, 4]]), + # ) + # # TODO(FloatArray): this will be Float64Dtype. + # expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) + expected = pd.DataFrame( + [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"] + ) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning") +def test_binary_frame_series_raises(): + # We don't currently implement + df = pd.DataFrame({"A": [1, 2]}) + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): + np.logaddexp(df, df["A"]) + + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): + np.logaddexp(df["A"], df) + + +def test_unary_accumulate_axis(): + # https://github.com/pandas-dev/pandas/issues/39259 + df = pd.DataFrame({"a": [1, 3, 2, 4]}) + result = np.maximum.accumulate(df) + expected = pd.DataFrame({"a": [1, 3, 3, 4]}) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"a": [1, 3, 2, 4], "b": [0.1, 4.0, 3.0, 2.0]}) + result = np.maximum.accumulate(df) + # in theory could preserve int dtype for default axis=0 + expected = pd.DataFrame({"a": [1.0, 3.0, 3.0, 4.0], "b": [0.1, 4.0, 4.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + result = np.maximum.accumulate(df, axis=0) + tm.assert_frame_equal(result, expected) + + result = np.maximum.accumulate(df, axis=1) + expected = pd.DataFrame({"a": [1.0, 3.0, 2.0, 4.0], "b": [1.0, 4.0, 3.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +def test_frame_outer_deprecated(): + df = pd.DataFrame({"A": [1, 2]}) + with tm.assert_produces_warning(FutureWarning): + np.subtract.outer(df, df) + + +def test_alignment_deprecation(): + # https://github.com/pandas-dev/pandas/issues/39184 + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + s1 = pd.Series([1, 2], index=["a", "b"]) + s2 = pd.Series([1, 2], index=["b", "c"]) + + # binary dataframe / dataframe + expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]}) + + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, df1) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + # non-aligned -> warns + result = np.add(df1, df2) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, df2.values) + tm.assert_frame_equal(result, expected) + + result = np.add(df1.values, df2) + expected = pd.DataFrame({"b": [2, 4, 6], "c": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + # binary dataframe / series + expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]}) + + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, s1) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(df1, s2) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(s2, df1) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, s2.values) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_alignment_deprecation_many_inputs(request): + # https://github.com/pandas-dev/pandas/issues/39184 + # test that the deprecation also works with > 2 inputs -> using a numba + # written ufunc for this because numpy itself doesn't have such ufuncs + from numba import ( + float64, + vectorize, + ) + + if np_version_gte1p22: + mark = pytest.mark.filterwarnings( + "ignore:`np.MachAr` is deprecated.*:DeprecationWarning" + ) + request.node.add_marker(mark) + + @vectorize([float64(float64, float64, float64)]) + def my_ufunc(x, y, z): + return x + y + z + + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]}) + + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3) + expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # all aligned -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df1, df1) + tm.assert_frame_equal(result, expected) + + # mixed frame / arrays + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3.values) + tm.assert_frame_equal(result, expected) + + # single frame -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df2.values, df3.values) + tm.assert_frame_equal(result, expected) + + # takes indices of first frame + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1.values, df2, df3) + expected = expected.set_axis(["b", "c"], axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py new file mode 100644 index 00000000..9caadd09 --- /dev/null +++ b/pandas/tests/frame/test_unary.py @@ -0,0 +1,184 @@ +from decimal import Decimal + +import numpy as np +import pytest + +from pandas.compat import is_numpy_dev + +import pandas as pd +import pandas._testing as tm + + +class TestDataFrameUnaryOperators: + # __pos__, __neg__, __invert__ + + @pytest.mark.parametrize( + "df,expected", + [ + (pd.DataFrame({"a": [-1, 1]}), pd.DataFrame({"a": [1, -1]})), + (pd.DataFrame({"a": [False, True]}), pd.DataFrame({"a": [True, False]})), + ( + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([1, -1]))}), + ), + ], + ) + def test_neg_numeric(self, df, expected): + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df, expected", + [ + (np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)), + ([Decimal("1.0"), Decimal("2.0")], [Decimal("-1.0"), Decimal("-2.0")]), + ], + ) + def test_neg_object(self, df, expected): + # GH#21380 + df = pd.DataFrame({"a": df}) + expected = pd.DataFrame({"a": expected}) + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": ["a", "b"]}), + pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}), + ], + ) + def test_neg_raises(self, df): + msg = ( + "bad operand type for unary -: 'str'|" + r"bad operand type for unary -: 'DatetimeArray'" + ) + with pytest.raises(TypeError, match=msg): + (-df) + with pytest.raises(TypeError, match=msg): + (-df["a"]) + + def test_invert(self, float_frame): + df = float_frame + + tm.assert_frame_equal(-(df < 0), ~(df < 0)) + + def test_invert_mixed(self): + shape = (10, 5) + df = pd.concat( + [ + pd.DataFrame(np.zeros(shape, dtype="bool")), + pd.DataFrame(np.zeros(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + result = ~df + expected = pd.concat( + [ + pd.DataFrame(np.ones(shape, dtype="bool")), + pd.DataFrame(-np.ones(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": [-1, 1]}), + pd.DataFrame({"a": [False, True]}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + ], + ) + def test_pos_numeric(self, df): + # GH#16073 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": np.array([-1, 2], dtype=object)}), + pd.DataFrame({"a": [Decimal("-1.0"), Decimal("2.0")]}), + ], + ) + def test_pos_object(self, df): + # GH#21380 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", + [ + pytest.param( + pd.DataFrame({"a": ["a", "b"]}), + marks=[pytest.mark.filterwarnings("ignore")], + ), + ], + ) + def test_pos_object_raises(self, df): + # GH#21380 + if is_numpy_dev: + with pytest.raises( + TypeError, match=r"^bad operand type for unary \+: \'str\'$" + ): + tm.assert_frame_equal(+df, df) + else: + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})] + ) + def test_pos_raises(self, df): + msg = r"bad operand type for unary \+: 'DatetimeArray'" + with pytest.raises(TypeError, match=msg): + (+df) + with pytest.raises(TypeError, match=msg): + (+df["a"]) + + def test_unary_nullable(self): + df = pd.DataFrame( + { + "a": pd.array([1, -2, 3, pd.NA], dtype="Int64"), + "b": pd.array([4.0, -5.0, 6.0, pd.NA], dtype="Float32"), + "c": pd.array([True, False, False, pd.NA], dtype="boolean"), + # include numpy bool to make sure bool-vs-boolean behavior + # is consistent in non-NA locations + "d": np.array([True, False, False, True]), + } + ) + + result = +df + res_ufunc = np.positive(df) + expected = df + # TODO: assert that we have copies? + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) + + result = -df + res_ufunc = np.negative(df) + expected = pd.DataFrame( + { + "a": pd.array([-1, 2, -3, pd.NA], dtype="Int64"), + "b": pd.array([-4.0, 5.0, -6.0, pd.NA], dtype="Float32"), + "c": pd.array([False, True, True, pd.NA], dtype="boolean"), + "d": np.array([False, True, True, False]), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) + + result = abs(df) + res_ufunc = np.abs(df) + expected = pd.DataFrame( + { + "a": pd.array([1, 2, 3, pd.NA], dtype="Int64"), + "b": pd.array([4.0, 5.0, 6.0, pd.NA], dtype="Float32"), + "c": pd.array([True, False, False, pd.NA], dtype="boolean"), + "d": np.array([True, False, False, True]), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(res_ufunc, expected) diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py new file mode 100644 index 00000000..e99e0a68 --- /dev/null +++ b/pandas/tests/frame/test_validate.py @@ -0,0 +1,41 @@ +import pytest + +from pandas.core.frame import DataFrame + + +@pytest.fixture +def dataframe(): + return DataFrame({"a": [1, 2], "b": [3, 4]}) + + +class TestDataFrameValidate: + """Tests for error handling related to data types of method arguments.""" + + @pytest.mark.parametrize( + "func", + [ + "query", + "eval", + "set_index", + "reset_index", + "dropna", + "drop_duplicates", + "sort_values", + ], + ) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = 'For argument "inplace" expected type bool' + kwargs = {"inplace": inplace} + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with pytest.raises(ValueError, match=msg): + getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/generic/__init__.py b/pandas/tests/generic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py new file mode 100644 index 00000000..0546534d --- /dev/null +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -0,0 +1,454 @@ +"""Tests dealing with the NDFrame.allows_duplicates.""" +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +not_implemented = pytest.mark.xfail(reason="Not implemented.") + +# ---------------------------------------------------------------------------- +# Preservation + + +class TestPreserves: + @pytest.mark.parametrize( + "cls, data", + [ + (pd.Series, np.array([])), + (pd.Series, [1, 2]), + (pd.DataFrame, {}), + (pd.DataFrame, {"A": [1, 2]}), + ], + ) + def test_construction_ok(self, cls, data): + result = cls(data) + assert result.flags.allows_duplicate_labels is True + + result = cls(data).set_flags(allows_duplicate_labels=False) + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "func", + [ + operator.itemgetter(["a"]), + operator.methodcaller("add", 1), + operator.methodcaller("rename", str.upper), + operator.methodcaller("rename", "name"), + operator.methodcaller("abs"), + np.abs, + ], + ) + def test_preserved_series(self, func): + s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + assert func(s).flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])] + ) + # TODO: frame + @not_implemented + def test_align(self, other): + s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + a, b = s.align(other) + assert a.flags.allows_duplicate_labels is False + assert b.flags.allows_duplicate_labels is False + + def test_preserved_frame(self): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + assert df.loc[["a"]].flags.allows_duplicate_labels is False + assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False + + def test_to_frame(self): + ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) + assert ser.to_frame().flags.allows_duplicate_labels is False + + @pytest.mark.parametrize("func", ["add", "sub"]) + @pytest.mark.parametrize( + "frame", [False, pytest.param(True, marks=not_implemented)] + ) + @pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")]) + def test_binops(self, func, other, frame): + df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + if frame: + df = df.to_frame() + if isinstance(other, pd.Series) and frame: + other = other.to_frame() + func = operator.methodcaller(func, other) + assert df.flags.allows_duplicate_labels is False + assert func(df).flags.allows_duplicate_labels is False + + def test_preserve_getitem(self): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + assert df[["A"]].flags.allows_duplicate_labels is False + assert df["A"].flags.allows_duplicate_labels is False + assert df.loc[0].flags.allows_duplicate_labels is False + assert df.loc[[0]].flags.allows_duplicate_labels is False + assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False + + @pytest.mark.xfail(reason="Unclear behavior.") + def test_ndframe_getitem_caching_issue(self): + # NDFrame.__getitem__ will cache the first df['A']. May need to + # invalidate that cache? Update the cached entries? + df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) + assert df["A"].flags.allows_duplicate_labels is False + df.flags.allows_duplicate_labels = True + assert df["A"].flags.allows_duplicate_labels is True + + @pytest.mark.parametrize( + "objs, kwargs", + [ + # Series + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["c", "d"]).set_flags( + allows_duplicate_labels=False + ), + ], + {}, + ), + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"ignore_index": True}, + ), + ( + [ + pd.Series(1, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ), + # Frame + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"A": [1, 2]}, index=["c", "d"]).set_flags( + allows_duplicate_labels=False + ), + ], + {}, + ), + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"ignore_index": True}, + ), + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ), + # Series / Frame + ( + [ + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.Series([1, 2], index=["a", "b"], name="B",).set_flags( + allows_duplicate_labels=False, + ), + ], + {"axis": 1}, + ), + ], + ) + def test_concat(self, objs, kwargs): + result = pd.concat(objs, **kwargs) + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize( + "left, right, kwargs, expected", + [ + # false false false + pytest.param( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags( + allows_duplicate_labels=False + ), + {"left_index": True, "right_index": True}, + False, + marks=not_implemented, + ), + # false true false + pytest.param( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + {"left_index": True, "right_index": True}, + False, + marks=not_implemented, + ), + # true true true + ( + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + {"left_index": True, "right_index": True}, + True, + ), + ], + ) + def test_merge(self, left, right, kwargs, expected): + result = pd.merge(left, right, **kwargs) + assert result.flags.allows_duplicate_labels is expected + + @not_implemented + def test_groupby(self): + # XXX: This is under tested + # TODO: + # - apply + # - transform + # - Should passing a grouper that disallows duplicates propagate? + df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) + result = df.groupby([0, 0, 1]).agg("count") + assert result.flags.allows_duplicate_labels is False + + @pytest.mark.parametrize("frame", [True, False]) + @not_implemented + def test_window(self, frame): + df = pd.Series( + 1, + index=pd.date_range("2000", periods=12), + name="A", + allows_duplicate_labels=False, + ) + if frame: + df = df.to_frame() + assert df.rolling(3).mean().flags.allows_duplicate_labels is False + assert df.ewm(3).mean().flags.allows_duplicate_labels is False + assert df.expanding(3).mean().flags.allows_duplicate_labels is False + + +# ---------------------------------------------------------------------------- +# Raises + + +class TestRaises: + @pytest.mark.parametrize( + "cls, axes", + [ + (pd.Series, {"index": ["a", "a"], "dtype": float}), + (pd.DataFrame, {"index": ["a", "a"]}), + (pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}), + (pd.DataFrame, {"columns": ["b", "b"]}), + ], + ) + def test_set_flags_with_duplicates(self, cls, axes): + result = cls(**axes) + assert result.flags.allows_duplicate_labels is True + + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + cls(**axes).set_flags(allows_duplicate_labels=False) + + @pytest.mark.parametrize( + "data", + [ + pd.Series(index=[0, 0], dtype=float), + pd.DataFrame(index=[0, 0]), + pd.DataFrame(columns=[0, 0]), + ], + ) + def test_setting_allows_duplicate_labels_raises(self, data): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + data.flags.allows_duplicate_labels = False + + assert data.flags.allows_duplicate_labels is True + + def test_series_raises(self): + a = pd.Series(0, index=["a", "b"]) + b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.concat([a, b]) + + @pytest.mark.parametrize( + "getter, target", + [ + (operator.itemgetter(["A", "A"]), None), + # loc + (operator.itemgetter(["a", "a"]), "loc"), + pytest.param(operator.itemgetter(("a", ["A", "A"])), "loc"), + (operator.itemgetter((["a", "a"], "A")), "loc"), + # iloc + (operator.itemgetter([0, 0]), "iloc"), + pytest.param(operator.itemgetter((0, [0, 0])), "iloc"), + pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"), + ], + ) + def test_getitem_raises(self, getter, target): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + if target: + # df, df.loc, or df.iloc + target = getattr(df, target) + else: + target = df + + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + getter(target) + + @pytest.mark.parametrize( + "objs, kwargs", + [ + ( + [ + pd.Series(1, index=[0, 1], name="a").set_flags( + allows_duplicate_labels=False + ), + pd.Series(2, index=[0, 1], name="a").set_flags( + allows_duplicate_labels=False + ), + ], + {"axis": 1}, + ) + ], + ) + def test_concat_raises(self, objs, kwargs): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.concat(objs, **kwargs) + + @not_implemented + def test_merge_raises(self): + a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( + allows_duplicate_labels=False + ) + b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"]) + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.merge(a, b, left_index=True, right_index=True) + + +@pytest.mark.parametrize( + "idx", + [ + pd.Index([1, 1]), + pd.Index(["a", "a"]), + pd.Index([1.1, 1.1]), + pd.PeriodIndex([pd.Period("2000", "D")] * 2), + pd.DatetimeIndex([pd.Timestamp("2000")] * 2), + pd.TimedeltaIndex([pd.Timedelta("1D")] * 2), + pd.CategoricalIndex(["a", "a"]), + pd.IntervalIndex([pd.Interval(0, 1)] * 2), + pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]), + ], + ids=lambda x: type(x).__name__, +) +def test_raises_basic(idx): + msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) + + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False) + + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): + pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) + + +def test_format_duplicate_labels_message(): + idx = pd.Index(["a", "b", "a", "b", "c"]) + result = idx._format_duplicate_message() + expected = pd.DataFrame( + {"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label") + ) + tm.assert_frame_equal(result, expected) + + +def test_format_duplicate_labels_message_multi(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]]) + result = idx._format_duplicate_message() + expected = pd.DataFrame( + {"positions": [[0, 2], [1, 3]]}, + index=pd.MultiIndex.from_product([["A"], ["a", "b"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_insert_raises(): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + msg = "Cannot specify" + with pytest.raises(ValueError, match=msg): + df.insert(0, "A", [3, 4], allow_duplicates=True) + + +@pytest.mark.parametrize( + "method, frame_only", + [ + (operator.methodcaller("set_index", "A", inplace=True), True), + (operator.methodcaller("set_axis", ["A", "B"], inplace=True), False), + (operator.methodcaller("reset_index", inplace=True), True), + (operator.methodcaller("rename", lambda x: x, inplace=True), False), + ], +) +def test_inplace_raises(method, frame_only): + df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( + allows_duplicate_labels=False + ) + s = df["A"] + s.flags.allows_duplicate_labels = False + msg = "Cannot specify" + + warn_msg = "Series.set_axis 'inplace' keyword" + if "set_axis" in str(method): + warn = FutureWarning + else: + warn = None + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(warn, match=warn_msg): + method(df) + if not frame_only: + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(warn, match=warn_msg): + method(s) + + +def test_pickle(): + a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) + b = tm.round_trip_pickle(a) + tm.assert_series_equal(a, b) + + a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) + b = tm.round_trip_pickle(a) + tm.assert_frame_equal(a, b) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py new file mode 100644 index 00000000..dddab05a --- /dev/null +++ b/pandas/tests/generic/test_finalize.py @@ -0,0 +1,773 @@ +""" +An exhaustive list of pandas methods exercising NDFrame.__finalize__. +""" +import operator +import re + +import numpy as np +import pytest + +import pandas as pd + +# TODO: +# * Binary methods (mul, div, etc.) +# * Binary outputs (align, etc.) +# * top-level methods (concat, merge, get_dummies, etc.) +# * window +# * cumulative reductions + +not_implemented_mark = pytest.mark.xfail(reason="not implemented") + +mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"]) + +frame_data = ({"A": [1]},) +frame_mi_data = ({"A": [1, 2, 3, 4]}, mi) + + +# Tuple of +# - Callable: Constructor (Series, DataFrame) +# - Tuple: Constructor args +# - Callable: pass the constructed value with attrs set to this. + +_all_methods = [ + ( + pd.Series, + (np.array([0], dtype="float64")), + operator.methodcaller("view", "int64"), + ), + (pd.Series, ([0],), operator.methodcaller("take", [])), + (pd.Series, ([0],), operator.methodcaller("__getitem__", [True])), + (pd.Series, ([0],), operator.methodcaller("repeat", 2)), + (pd.Series, ([0],), operator.methodcaller("reset_index")), + (pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)), + (pd.Series, ([0],), operator.methodcaller("to_frame")), + (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")), + (pd.Series, ([0, 0],), operator.methodcaller("duplicated")), + (pd.Series, ([0, 0],), operator.methodcaller("round")), + (pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)), + (pd.Series, ([0, 0],), operator.methodcaller("rename", "name")), + (pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])), + (pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])), + (pd.Series, ([0, 0],), operator.methodcaller("drop", [0])), + (pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)), + (pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})), + (pd.Series, ([0, 0],), operator.methodcaller("shift")), + (pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])), + (pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)), + (pd.Series, ([0, 0],), operator.methodcaller("isna")), + (pd.Series, ([0, 0],), operator.methodcaller("isnull")), + (pd.Series, ([0, 0],), operator.methodcaller("notna")), + (pd.Series, ([0, 0],), operator.methodcaller("notnull")), + (pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))), + # TODO: mul, div, etc. + ( + pd.Series, + ([0], pd.period_range("2000", periods=1)), + operator.methodcaller("to_timestamp"), + ), + ( + pd.Series, + ([0], pd.date_range("2000", periods=1)), + operator.methodcaller("to_period"), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("dot", pd.DataFrame(index=["A"])), + ), + marks=pytest.mark.xfail(reason="Implement binary finalize"), + ), + (pd.DataFrame, frame_data, operator.methodcaller("transpose")), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), + (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")), + (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")), + (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")), + (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)), + (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])), + (pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])), + (pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])), + (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})), + (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)), + (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")), + (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("reset_index")), + (pd.DataFrame, frame_data, operator.methodcaller("isna")), + (pd.DataFrame, frame_data, operator.methodcaller("isnull")), + (pd.DataFrame, frame_data, operator.methodcaller("notna")), + (pd.DataFrame, frame_data, operator.methodcaller("notnull")), + (pd.DataFrame, frame_data, operator.methodcaller("dropna")), + (pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")), + (pd.DataFrame, frame_data, operator.methodcaller("duplicated")), + (pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")), + (pd.DataFrame, frame_data, operator.methodcaller("sort_index")), + (pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")), + (pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")), + (pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("add", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + # TODO: div, mul, etc. + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add), + ), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("combine_first", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("update", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A"), + ), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]), + ), + (pd.DataFrame, frame_data, operator.methodcaller("stack")), + (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")), + (pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")), + ( + pd.DataFrame, + ({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},), + operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]), + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)) + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("append", pd.DataFrame({"A": [1]})), + ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("append", pd.DataFrame({"B": [1]})), + ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("merge", pd.DataFrame({"A": [1]})), + ), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("round", 2)), + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("corr")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("cov")), + marks=[ + not_implemented_mark, + pytest.mark.filterwarnings("ignore::RuntimeWarning"), + ], + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("corrwith", pd.DataFrame(*frame_data)), + ), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("count")), + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("count", level="A")), + marks=[ + pytest.mark.filterwarnings("ignore:Using the level keyword:FutureWarning"), + ], + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("nunique")), + ), + (pd.DataFrame, frame_data, operator.methodcaller("idxmin")), + (pd.DataFrame, frame_data, operator.methodcaller("idxmax")), + (pd.DataFrame, frame_data, operator.methodcaller("mode")), + pytest.param( + (pd.Series, [0], operator.methodcaller("mode")), + marks=not_implemented_mark, + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", numeric_only=True), + ), + ), + pytest.param( + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True), + ), + ), + pytest.param( + ( + pd.DataFrame, + ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},), + operator.methodcaller("quantile", numeric_only=False), + ), + ), + pytest.param( + ( + pd.DataFrame, + ({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},), + operator.methodcaller("quantile", numeric_only=True), + ), + ), + ( + pd.DataFrame, + ({"A": [1]}, [pd.Period("2000", "D")]), + operator.methodcaller("to_timestamp"), + ), + ( + pd.DataFrame, + ({"A": [1]}, [pd.Timestamp("2000")]), + operator.methodcaller("to_period", freq="D"), + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])), + ), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))), + ), + pytest.param( + ( + pd.DataFrame, + frame_mi_data, + operator.methodcaller("isin", pd.DataFrame({"A": [1]})), + ), + ), + (pd.DataFrame, frame_data, operator.methodcaller("swapaxes", 0, 1)), + (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), + (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("squeeze")), + marks=not_implemented_mark, + ), + (pd.Series, ([1, 2],), operator.methodcaller("squeeze")), + (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")), + (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")), + # Unary ops + (pd.DataFrame, frame_data, operator.neg), + (pd.Series, [1], operator.neg), + (pd.DataFrame, frame_data, operator.pos), + (pd.Series, [1], operator.pos), + (pd.DataFrame, frame_data, operator.inv), + (pd.Series, [1], operator.inv), + (pd.DataFrame, frame_data, abs), + (pd.Series, [1], abs), + pytest.param((pd.DataFrame, frame_data, round)), + (pd.Series, [1], round), + (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])), + (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")), + (pd.Series, (1, mi), operator.methodcaller("xs", "a")), + (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})), + ), + ( + pd.Series, + frame_data, + operator.methodcaller("reindex_like", pd.Series([0, 1, 2])), + ), + (pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")), + (pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")), + (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")), + (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")), + (pd.Series, ([3, 2],), operator.methodcaller("sort_values")), + (pd.Series, ([1] * 10,), operator.methodcaller("head")), + (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")), + (pd.Series, ([1] * 10,), operator.methodcaller("tail")), + (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")), + (pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)), + (pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)), + (pd.Series, ([1, 2],), operator.methodcaller("astype", float)), + (pd.DataFrame, frame_data, operator.methodcaller("astype", float)), + (pd.Series, ([1, 2],), operator.methodcaller("copy")), + (pd.DataFrame, frame_data, operator.methodcaller("copy")), + (pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")), + ( + pd.DataFrame, + ({"A": np.array([1, 2], dtype=object)},), + operator.methodcaller("infer_objects"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), + (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), + (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), + (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), + (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)), + (pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("asfreq", "H"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("asfreq", "H"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("at_time", "12:00"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("at_time", "12:00"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("between_time", "12:00", "13:00"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("between_time", "12:00", "13:00"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("first", "3D"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("first", "3D"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("last", "3D"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("last", "3D"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("rank")), + (pd.DataFrame, frame_data, operator.methodcaller("rank")), + (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))), + (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))), + (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))), + (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), + pytest.param( + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), + ), + pytest.param( + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("tshift"), + ), + marks=pytest.mark.filterwarnings("ignore::FutureWarning"), + ), + (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)), + (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)), + ( + pd.Series, + (1, pd.date_range("2000", periods=4, tz="UTC")), + operator.methodcaller("tz_convert", "CET"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")), + operator.methodcaller("tz_convert", "CET"), + ), + ( + pd.Series, + (1, pd.date_range("2000", periods=4)), + operator.methodcaller("tz_localize", "CET"), + ), + ( + pd.DataFrame, + ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + operator.methodcaller("tz_localize", "CET"), + ), + pytest.param( + (pd.Series, ([1, 2],), operator.methodcaller("describe")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("describe")), + marks=not_implemented_mark, + ), + (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), + (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), + (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())), + pytest.param( + ( + pd.DataFrame, + frame_mi_data, + operator.methodcaller("transform", lambda x: x - x.min()), + ), + ), + (pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)), + pytest.param( + (pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)), + ), + # Cumulative reductions + (pd.Series, ([1],), operator.methodcaller("cumsum")), + (pd.DataFrame, frame_data, operator.methodcaller("cumsum")), + # Reductions + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("any")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("sum")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("std")), + marks=not_implemented_mark, + ), + pytest.param( + (pd.DataFrame, frame_data, operator.methodcaller("mean")), + marks=not_implemented_mark, + ), +] + + +def idfn(x): + xpr = re.compile(r"'(.*)?'") + m = xpr.search(str(x)) + if m: + return m.group(1) + else: + return str(x) + + +@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1])) +def ndframe_method(request): + """ + An NDFrame method returning an NDFrame. + """ + return request.param + + +def test_finalize_called(ndframe_method): + cls, init_args, method = ndframe_method + ndframe = cls(*init_args) + + ndframe.attrs = {"a": 1} + result = method(ndframe) + + assert result.attrs == {"a": 1} + + +@not_implemented_mark +def test_finalize_called_eval_numexpr(): + pytest.importorskip("numexpr") + df = pd.DataFrame({"A": [1, 2]}) + df.attrs["A"] = 1 + result = df.eval("A + 1", engine="numexpr") + assert result.attrs == {"A": 1} + + +# ---------------------------------------------------------------------------- +# Binary operations + + +@pytest.mark.parametrize("annotate", ["left", "right", "both"]) +@pytest.mark.parametrize( + "args", + [ + (1, pd.Series([1])), + (1, pd.DataFrame({"A": [1]})), + (pd.Series([1]), 1), + (pd.DataFrame({"A": [1]}), 1), + (pd.Series([1]), pd.Series([1])), + (pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})), + (pd.Series([1]), pd.DataFrame({"A": [1]})), + (pd.DataFrame({"A": [1]}), pd.Series([1])), + ], +) +def test_binops(request, args, annotate, all_arithmetic_functions): + # This generates 326 tests... Is that needed? + left, right = args + if annotate == "both" and isinstance(left, int) or isinstance(right, int): + return + + if isinstance(left, pd.DataFrame) or isinstance(right, pd.DataFrame): + request.node.add_marker(pytest.mark.xfail(reason="not implemented")) + + if annotate in {"left", "both"} and not isinstance(left, int): + left.attrs = {"a": 1} + if annotate in {"left", "both"} and not isinstance(right, int): + right.attrs = {"a": 1} + + result = all_arithmetic_functions(left, right) + assert result.attrs == {"a": 1} + + +# ---------------------------------------------------------------------------- +# Accessors + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("capitalize"), + operator.methodcaller("casefold"), + operator.methodcaller("cat", ["a"]), + operator.methodcaller("contains", "a"), + operator.methodcaller("count", "a"), + operator.methodcaller("encode", "utf-8"), + operator.methodcaller("endswith", "a"), + operator.methodcaller("extract", r"(\w)(\d)"), + operator.methodcaller("extract", r"(\w)(\d)", expand=False), + operator.methodcaller("find", "a"), + operator.methodcaller("findall", "a"), + operator.methodcaller("get", 0), + operator.methodcaller("index", "a"), + operator.methodcaller("len"), + operator.methodcaller("ljust", 4), + operator.methodcaller("lower"), + operator.methodcaller("lstrip"), + operator.methodcaller("match", r"\w"), + operator.methodcaller("normalize", "NFC"), + operator.methodcaller("pad", 4), + operator.methodcaller("partition", "a"), + operator.methodcaller("repeat", 2), + operator.methodcaller("replace", "a", "b"), + operator.methodcaller("rfind", "a"), + operator.methodcaller("rindex", "a"), + operator.methodcaller("rjust", 4), + operator.methodcaller("rpartition", "a"), + operator.methodcaller("rstrip"), + operator.methodcaller("slice", 4), + operator.methodcaller("slice_replace", 1, repl="a"), + operator.methodcaller("startswith", "a"), + operator.methodcaller("strip"), + operator.methodcaller("swapcase"), + operator.methodcaller("translate", {"a": "b"}), + operator.methodcaller("upper"), + operator.methodcaller("wrap", 4), + operator.methodcaller("zfill", 4), + operator.methodcaller("isalnum"), + operator.methodcaller("isalpha"), + operator.methodcaller("isdigit"), + operator.methodcaller("isspace"), + operator.methodcaller("islower"), + operator.methodcaller("isupper"), + operator.methodcaller("istitle"), + operator.methodcaller("isnumeric"), + operator.methodcaller("isdecimal"), + operator.methodcaller("get_dummies"), + ], + ids=idfn, +) +def test_string_method(method): + s = pd.Series(["a1"]) + s.attrs = {"a": 1} + result = method(s.str) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("to_period"), + operator.methodcaller("tz_localize", "CET"), + operator.methodcaller("normalize"), + operator.methodcaller("strftime", "%Y"), + operator.methodcaller("round", "H"), + operator.methodcaller("floor", "H"), + operator.methodcaller("ceil", "H"), + operator.methodcaller("month_name"), + operator.methodcaller("day_name"), + ], + ids=idfn, +) +def test_datetime_method(method): + s = pd.Series(pd.date_range("2000", periods=4)) + s.attrs = {"a": 1} + result = method(s.dt) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "attr", + [ + "date", + "time", + "timetz", + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + "daysinmonth", + "days_in_month", + ], +) +def test_datetime_property(attr): + s = pd.Series(pd.date_range("2000", periods=4)) + s.attrs = {"a": 1} + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"] +) +def test_timedelta_property(attr): + s = pd.Series(pd.timedelta_range("2000", periods=4)) + s.attrs = {"a": 1} + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")]) +def test_timedelta_methods(method): + s = pd.Series(pd.timedelta_range("2000", periods=4)) + s.attrs = {"a": 1} + result = method(s.dt) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("add_categories", ["c"]), + operator.methodcaller("as_ordered"), + operator.methodcaller("as_unordered"), + lambda x: getattr(x, "codes"), + operator.methodcaller("remove_categories", "a"), + operator.methodcaller("remove_unused_categories"), + operator.methodcaller("rename_categories", {"a": "A", "b": "B"}), + operator.methodcaller("reorder_categories", ["b", "a"]), + operator.methodcaller("set_categories", ["A", "B"]), + ], +) +@not_implemented_mark +def test_categorical_accessor(method): + s = pd.Series(["a", "b"], dtype="category") + s.attrs = {"a": 1} + result = method(s.cat) + assert result.attrs == {"a": 1} + + +# ---------------------------------------------------------------------------- +# Groupby + + +@pytest.mark.parametrize( + "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] +) +@pytest.mark.parametrize( + "method", + [ + operator.methodcaller("sum"), + lambda x: x.apply(lambda y: y), + lambda x: x.agg("sum"), + lambda x: x.agg("mean"), + lambda x: x.agg("median"), + ], +) +def test_groupby_finalize(obj, method): + obj.attrs = {"a": 1} + result = method(obj.groupby([0, 0], group_keys=False)) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] +) +@pytest.mark.parametrize( + "method", + [ + lambda x: x.agg(["sum", "count"]), + lambda x: x.agg("std"), + lambda x: x.agg("var"), + lambda x: x.agg("sem"), + lambda x: x.agg("size"), + lambda x: x.agg("ohlc"), + lambda x: x.agg("describe"), + ], +) +@not_implemented_mark +def test_groupby_finalize_not_implemented(obj, method): + obj.attrs = {"a": 1} + result = method(obj.groupby([0, 0])) + assert result.attrs == {"a": 1} + + +def test_finalize_frame_series_name(): + # https://github.com/pandas-dev/pandas/pull/37186/files#r506978889 + # ensure we don't copy the column `name` to the Series. + df = pd.DataFrame({"name": [1, 2]}) + result = pd.Series([1, 2]).__finalize__(df) + assert result.name is None diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py new file mode 100644 index 00000000..b4a3a60e --- /dev/null +++ b/pandas/tests/generic/test_frame.py @@ -0,0 +1,200 @@ +from copy import deepcopy +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDataFrame: + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name(self, func): + df = DataFrame([[1, 2], [3, 4]]) + + result = methodcaller(func, "foo")(df) + assert df.index.name is None + assert result.index.name == "foo" + + result = methodcaller(func, "cols", axis=1)(df) + assert df.columns.name is None + assert result.columns.name == "cols" + + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name_mi(self, func): + df = DataFrame( + np.empty((3, 3)), + index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]), + columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), + ) + + level_names = ["L1", "L2"] + + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] + + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + assert df.bool() + + df = DataFrame([[False]]) + assert not df.bool() + + df = DataFrame([[False, False]]) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df.bool() + with pytest.raises(ValueError, match=msg): + bool(df) + + def test_metadata_propagation_indiv_groupby(self): + # groupby + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").sum() + tm.assert_metadata_equivalent(df, result) + + def test_metadata_propagation_indiv_resample(self): + # resample + df = DataFrame( + np.random.randn(1000, 2), + index=date_range("20130101", periods=1000, freq="s"), + ) + result = df.resample("1T") + tm.assert_metadata_equivalent(df, result) + + def test_metadata_propagation_indiv(self, monkeypatch): + # merging with override + # GH 6923 + + def finalize(self, other, method=None, **kwargs): + + for name in self._metadata: + if method == "merge": + left, right = other.left, other.right + value = getattr(left, name, "") + "|" + getattr(right, name, "") + object.__setattr__(self, name, value) + elif method == "concat": + value = "+".join( + [getattr(o, name) for o in other.objs if getattr(o, name, None)] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, "")) + + return self + + with monkeypatch.context() as m: + m.setattr(DataFrame, "_metadata", ["filename"]) + m.setattr(DataFrame, "__finalize__", finalize) + + np.random.seed(10) + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"]) + DataFrame._metadata = ["filename"] + df1.filename = "fname1.csv" + df2.filename = "fname2.csv" + + result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner") + assert result.filename == "fname1.csv|fname2.csv" + + # concat + # GH#6927 + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab")) + df1.filename = "foo" + + result = pd.concat([df1, df1]) + assert result.filename == "foo+foo" + + def test_set_attribute(self): + # Test for consistent setattr behavior when an attribute and a column + # have the same name (Issue #8994) + df = DataFrame({"x": [1, 2, 3]}) + + df.y = 2 + df["y"] = [2, 4, 6] + df.y = 5 + + assert df.y == 5 + tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y")) + + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=["A"]) + empty_frame_copy = deepcopy(empty_frame) + + tm.assert_frame_equal(empty_frame_copy, empty_frame) + + +# formerly in Generic but only test DataFrame +class TestDataFrame2: + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).rename_axis( + mapper={"a": "x", "b": "y"}, axis=1, inplace=value + ) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).drop("a", axis=1, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).fillna(value=0, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).interpolate(inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + + with pytest.raises(ValueError, match=msg): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + + def test_unexpected_keyword(self): + # GH8597 + df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"]) + ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) + ts = df["joe"].copy() + ts[2] = np.nan + + msg = "unexpected keyword" + with pytest.raises(TypeError, match=msg): + df.drop("joe", axis=1, in_place=True) + + with pytest.raises(TypeError, match=msg): + df.reindex([1, 0], inplace=True) + + with pytest.raises(TypeError, match=msg): + ca.fillna(0, inplace=True) + + with pytest.raises(TypeError, match=msg): + ts.fillna(0, in_place=True) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py new file mode 100644 index 00000000..80d9bde7 --- /dev/null +++ b/pandas/tests/generic/test_generic.py @@ -0,0 +1,487 @@ +from copy import ( + copy, + deepcopy, +) + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +# ---------------------------------------------------------------------- +# Generic types test cases + + +def construct(box, shape, value=None, dtype=None, **kwargs): + """ + construct an object for the given shape + if value is specified use that if its a scalar + if value is an array, repeat it as needed + """ + if isinstance(shape, int): + shape = tuple([shape] * box._AXIS_LEN) + if value is not None: + if is_scalar(value): + if value == "empty": + arr = None + dtype = np.float64 + + # remove the info axis + kwargs.pop(box._info_axis_name, None) + else: + arr = np.empty(shape, dtype=dtype) + arr.fill(value) + else: + fshape = np.prod(shape) + arr = value.ravel() + new_shape = fshape / arr.shape[0] + if fshape % arr.shape[0] != 0: + raise Exception("invalid value passed in construct") + + arr = np.repeat(arr, new_shape).reshape(shape) + else: + arr = np.random.randn(*shape) + return box(arr, dtype=dtype, **kwargs) + + +class Generic: + @pytest.mark.parametrize( + "func", + [ + str.lower, + {x: x.lower() for x in list("ABCD")}, + Series({x: x.lower() for x in list("ABCD")}), + ], + ) + def test_rename(self, frame_or_series, func): + + # single axis + idx = list("ABCD") + + for axis in frame_or_series._AXIS_ORDERS: + kwargs = {axis: idx} + obj = construct(4, **kwargs) + + # rename a single axis + result = obj.rename(**{axis: func}) + expected = obj.copy() + setattr(expected, axis, list("abcd")) + tm.assert_equal(result, expected) + + def test_get_numeric_data(self, frame_or_series): + + n = 4 + kwargs = { + frame_or_series._get_axis_name(i): list(range(n)) + for i in range(frame_or_series._AXIS_LEN) + } + + # get the numeric data + o = construct(n, **kwargs) + result = o._get_numeric_data() + tm.assert_equal(result, o) + + # non-inclusion + result = o._get_bool_data() + expected = construct(n, value="empty", **kwargs) + if isinstance(o, DataFrame): + # preserve columns dtype + expected.columns = o.columns[:0] + tm.assert_equal(result, expected) + + # get the bool data + arr = np.array([True, True, False, True]) + o = construct(n, value=arr, **kwargs) + result = o._get_numeric_data() + tm.assert_equal(result, o) + + def test_nonzero(self, frame_or_series): + + # GH 4633 + # look at the boolean/nonzero behavior for objects + obj = construct(frame_or_series, shape=4) + msg = f"The truth value of a {frame_or_series.__name__} is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = construct(frame_or_series, shape=4, value=1) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = construct(frame_or_series, shape=4, value=np.nan) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # empty + obj = construct(frame_or_series, shape=0) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # invalid behaviors + + obj1 = construct(frame_or_series, shape=4, value=1) + obj2 = construct(frame_or_series, shape=4, value=1) + + with pytest.raises(ValueError, match=msg): + if obj1: + pass + + with pytest.raises(ValueError, match=msg): + obj1 and obj2 + with pytest.raises(ValueError, match=msg): + obj1 or obj2 + with pytest.raises(ValueError, match=msg): + not obj1 + + def test_frame_or_series_compound_dtypes(self, frame_or_series): + # see gh-5191 + # Compound dtypes should raise NotImplementedError. + + def f(dtype): + return construct(frame_or_series, shape=3, value=1, dtype=dtype) + + msg = ( + "compound dtypes are not implemented " + f"in the {frame_or_series.__name__} frame_or_series" + ) + + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + f("int64") + f("float64") + f("M8[ns]") + + def test_metadata_propagation(self, frame_or_series): + # check that the metadata matches up on the resulting ops + + o = construct(frame_or_series, shape=3) + o.name = "foo" + o2 = construct(frame_or_series, shape=3) + o2.name = "bar" + + # ---------- + # preserving + # ---------- + + # simple ops with scalars + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(1) + tm.assert_metadata_equivalent(o, result) + + # ops with like + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(o) + tm.assert_metadata_equivalent(o, result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + v1 = getattr(o, op)(o) + tm.assert_metadata_equivalent(o, v1) + tm.assert_metadata_equivalent(o, v1 & v1) + tm.assert_metadata_equivalent(o, v1 | v1) + + # combine_first + result = o.combine_first(o2) + tm.assert_metadata_equivalent(o, result) + + # --------------------------- + # non-preserving (by default) + # --------------------------- + + # add non-like + result = o + o2 + tm.assert_metadata_equivalent(result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + + # this is a name matching op + v1 = getattr(o, op)(o) + v2 = getattr(o, op)(o2) + tm.assert_metadata_equivalent(v2) + tm.assert_metadata_equivalent(v1 & v2) + tm.assert_metadata_equivalent(v1 | v2) + + def test_size_compat(self, frame_or_series): + # GH8846 + # size property should be defined + + o = construct(frame_or_series, shape=10) + assert o.size == np.prod(o.shape) + assert o.size == 10 ** len(o.axes) + + def test_split_compat(self, frame_or_series): + # xref GH8846 + o = construct(frame_or_series, shape=10) + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 + + # See gh-12301 + def test_stat_unexpected_keyword(self, frame_or_series): + obj = construct(frame_or_series, 5) + starwars = "Star Wars" + errmsg = "unexpected keyword" + + with pytest.raises(TypeError, match=errmsg): + obj.max(epic=starwars) # stat_function + with pytest.raises(TypeError, match=errmsg): + obj.var(epic=starwars) # stat_function_ddof + with pytest.raises(TypeError, match=errmsg): + obj.sum(epic=starwars) # cum_function + with pytest.raises(TypeError, match=errmsg): + obj.any(epic=starwars) # logical_function + + @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"]) + def test_api_compat(self, func, frame_or_series): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = (frame_or_series, 5) + f = getattr(obj, func) + assert f.__name__ == func + assert f.__qualname__.endswith(func) + + def test_stat_non_defaults_args(self, frame_or_series): + obj = construct(frame_or_series, 5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with pytest.raises(ValueError, match=errmsg): + obj.max(out=out) # stat_function + with pytest.raises(ValueError, match=errmsg): + obj.var(out=out) # stat_function_ddof + with pytest.raises(ValueError, match=errmsg): + obj.sum(out=out) # cum_function + with pytest.raises(ValueError, match=errmsg): + obj.any(out=out) # logical_function + + def test_truncate_out_of_bounds(self, frame_or_series): + # GH11382 + + # small + shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1)) + small = construct(frame_or_series, shape, dtype="int8", value=1) + tm.assert_equal(small.truncate(), small) + tm.assert_equal(small.truncate(before=0, after=3e3), small) + tm.assert_equal(small.truncate(before=-1, after=2e3), small) + + # big + shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1)) + big = construct(frame_or_series, shape, dtype="int8", value=1) + tm.assert_equal(big.truncate(), big) + tm.assert_equal(big.truncate(before=0, after=3e6), big) + tm.assert_equal(big.truncate(before=-1, after=2e6), big) + + @pytest.mark.parametrize( + "func", + [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], + ) + @pytest.mark.parametrize("shape", [0, 1, 2]) + def test_copy_and_deepcopy(self, frame_or_series, shape, func): + # GH 15444 + obj = construct(frame_or_series, shape) + obj_copy = func(obj) + assert obj_copy is not obj + tm.assert_equal(obj_copy, obj) + + +class TestNDFrame: + # tests that don't fit elsewhere + + @pytest.mark.parametrize( + "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()] + ) + def test_squeeze_series_noop(self, ser): + # noop + tm.assert_series_equal(ser.squeeze(), ser) + + def test_squeeze_frame_noop(self): + # noop + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(df.squeeze(), df) + + def test_squeeze_frame_reindex(self): + # squeezing + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(df.squeeze(), df["A"]) + + def test_squeeze_0_len_dim(self): + # don't fail with 0 length dimensions GH11229 & GH8999 + empty_series = Series([], name="five", dtype=np.float64) + empty_frame = DataFrame([empty_series]) + tm.assert_series_equal(empty_series, empty_series.squeeze()) + tm.assert_series_equal(empty_series, empty_frame.squeeze()) + + def test_squeeze_axis(self): + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + assert df.shape == (1, 1) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) + assert df.squeeze() == df.iloc[0, 0] + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.squeeze(axis=2) + msg = "No axis named x for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.squeeze(axis="x") + + def test_squeeze_axis_len_3(self): + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(np.squeeze(df), df["A"]) + + @pytest.mark.parametrize( + "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()] + ) + def test_transpose_series(self, ser): + # calls implementation in pandas/core/base.py + tm.assert_series_equal(ser.transpose(), ser) + + def test_transpose_frame(self): + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(df.transpose().transpose(), df) + + def test_numpy_transpose(self, frame_or_series): + + obj = tm.makeTimeDataFrame() + obj = tm.get_obj(obj, frame_or_series) + + if frame_or_series is Series: + # 1D -> np.transpose is no-op + tm.assert_series_equal(np.transpose(obj), obj) + + # round-trip preserved + tm.assert_equal(np.transpose(np.transpose(obj)), obj) + + msg = "the 'axes' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.transpose(obj, axes=1) + + @pytest.mark.parametrize( + "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()] + ) + def test_take_series(self, ser): + indices = [1, 5, -2, 6, 3, -1] + out = ser.take(indices) + expected = Series( + data=ser.values.take(indices), + index=ser.index.take(indices), + dtype=ser.dtype, + ) + tm.assert_series_equal(out, expected) + + def test_take_frame(self): + indices = [1, 5, -2, 6, 3, -1] + df = tm.makeTimeDataFrame() + out = df.take(indices) + expected = DataFrame( + data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns, + ) + tm.assert_frame_equal(out, expected) + + def test_take_invalid_kwargs(self, frame_or_series): + indices = [-3, 2, 0, 1] + + obj = tm.makeTimeDataFrame() + obj = tm.get_obj(obj, frame_or_series) + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode="clip") + + @pytest.mark.parametrize("is_copy", [True, False]) + def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series): + # GH 27357 + obj = DataFrame({"A": [1, 2, 3]}) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this." + ) + with tm.assert_produces_warning(FutureWarning) as w: + obj.take([0, 1], is_copy=is_copy) + + assert w[0].message.args[0] == msg + + def test_axis_classmethods(self, frame_or_series): + box = frame_or_series + obj = box(dtype=object) + values = box._AXIS_TO_AXIS_NUMBER.keys() + for v in values: + assert obj._get_axis_number(v) == box._get_axis_number(v) + assert obj._get_axis_name(v) == box._get_axis_name(v) + assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) + + def test_axis_names_deprecated(self, frame_or_series): + # GH33637 + box = frame_or_series + obj = box(dtype=object) + msg = "_AXIS_NAMES has been deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj._AXIS_NAMES + + def test_axis_numbers_deprecated(self, frame_or_series): + # GH33637 + box = frame_or_series + obj = box(dtype=object) + msg = "_AXIS_NUMBERS has been deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + obj._AXIS_NUMBERS + + def test_flags_identity(self, frame_or_series): + obj = Series([1, 2]) + if frame_or_series is DataFrame: + obj = obj.to_frame() + + assert obj.flags is obj.flags + obj2 = obj.copy() + assert obj2.flags is not obj.flags + + def test_slice_shift_deprecated(self, frame_or_series): + # GH 37601 + obj = DataFrame({"A": [1, 2, 3, 4]}) + obj = tm.get_obj(obj, frame_or_series) + + with tm.assert_produces_warning(FutureWarning): + obj.slice_shift() diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py new file mode 100644 index 00000000..d1c85d77 --- /dev/null +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -0,0 +1,349 @@ +import pytest + +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd + + +# Fixtures +# ======== +@pytest.fixture +def df(): + """DataFrame with columns 'L1', 'L2', and 'L3'""" + return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]}) + + +@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]]) +def df_levels(request, df): + """DataFrame with columns or index levels 'L1', 'L2', and 'L3'""" + levels = request.param + + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture +def df_ambig(df): + """DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'""" + df = df.set_index(["L1", "L2"]) + + df["L1"] = df["L3"] + + return df + + +@pytest.fixture +def df_duplabels(df): + """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'""" + df = df.set_index(["L1"]) + df = pd.concat([df, df["L2"]], axis=1) + + return df + + +# Test is label/level reference +# ============================= +def get_labels_levels(df_levels): + expected_labels = list(df_levels.columns) + expected_levels = [name for name in df_levels.index.names if name is not None] + return expected_labels, expected_levels + + +def assert_label_reference(frame, labels, axis): + for label in labels: + assert frame._is_label_reference(label, axis=axis) + assert not frame._is_level_reference(label, axis=axis) + assert frame._is_label_or_level_reference(label, axis=axis) + + +def assert_level_reference(frame, levels, axis): + for level in levels: + assert frame._is_level_reference(level, axis=axis) + assert not frame._is_label_reference(level, axis=axis) + assert frame._is_label_or_level_reference(level, axis=axis) + + +# DataFrame +# --------- +def test_is_level_or_label_reference_df_simple(df_levels, axis): + + axis = df_levels._get_axis_number(axis) + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_level_reference(df_levels, expected_levels, axis=axis) + assert_label_reference(df_levels, expected_labels, axis=axis) + + +def test_is_level_reference_df_ambig(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has both an on-axis level and off-axis label named L1 + # Therefore L1 should reference the label, not the level + assert_label_reference(df_ambig, ["L1"], axis=axis) + + # df has an on-axis level named L2 and it is not ambiguous + # Therefore L2 is an level reference + assert_level_reference(df_ambig, ["L2"], axis=axis) + + # df has a column named L3 and it not an level reference + assert_label_reference(df_ambig, ["L3"], axis=axis) + + +# Series +# ------ +def test_is_level_reference_series_simple_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_reference(s, ["L1"], axis=0) + assert not s._is_level_reference("L2") + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_reference(s, ["L1", "L2"], axis=0) + assert not s._is_level_reference("L3") + + +def test_is_level_reference_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._is_level_reference("L1", axis=1) + + +# Test _check_label_or_level_ambiguity_df +# ======================================= + +# DataFrame +# --------- +def test_check_label_or_level_ambiguity_df(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + msg = "'L1' is both a column level and an index label" + + else: + msg = "'L1' is both an index level and a column label" + # df_ambig has both an on-axis level and off-axis label named L1 + # Therefore, L1 is ambiguous. + with pytest.raises(ValueError, match=msg): + df_ambig._check_label_or_level_ambiguity("L1", axis=axis) + + # df_ambig has an on-axis level named L2,, and it is not ambiguous. + df_ambig._check_label_or_level_ambiguity("L2", axis=axis) + + # df_ambig has an off-axis label named L3, and it is not ambiguous + assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis) + + +# Series +# ------ +def test_check_label_or_level_ambiguity_series(df): + + # A series has no columns and therefore references are never ambiguous + + # Make series with L1 as index + s = df.set_index("L1").L2 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + s._check_label_or_level_ambiguity("L3", axis=0) + + +def test_check_label_or_level_ambiguity_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._check_label_or_level_ambiguity("L1", axis=1) + + +# Test _get_label_or_level_values +# =============================== +def assert_label_values(frame, labels, axis): + axis = frame._get_axis_number(axis) + for label in labels: + if axis == 0: + expected = frame[label]._values + else: + expected = frame.loc[label]._values + + result = frame._get_label_or_level_values(label, axis=axis) + assert array_equivalent(expected, result) + + +def assert_level_values(frame, levels, axis): + axis = frame._get_axis_number(axis) + for level in levels: + if axis == 0: + expected = frame.index.get_level_values(level=level)._values + else: + expected = frame.columns.get_level_values(level=level)._values + + result = frame._get_label_or_level_values(level, axis=axis) + assert array_equivalent(expected, result) + + +# DataFrame +# --------- +def test_get_label_or_level_values_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + axis = df_levels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_label_values(df_levels, expected_labels, axis=axis) + assert_level_values(df_levels, expected_levels, axis=axis) + + +def test_get_label_or_level_values_df_ambig(df_ambig, axis): + + axis = df_ambig._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has an on-axis level named L2, and it is not ambiguous. + assert_level_values(df_ambig, ["L2"], axis=axis) + + # df has an off-axis label named L3, and it is not ambiguous. + assert_label_values(df_ambig, ["L3"], axis=axis) + + +def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): + + axis = df_duplabels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_duplabels = df_duplabels.T + + # df has unambiguous level 'L1' + assert_level_values(df_duplabels, ["L1"], axis=axis) + + # df has unique label 'L3' + assert_label_values(df_duplabels, ["L3"], axis=axis) + + # df has duplicate labels 'L2' + if axis == 0: + expected_msg = "The column label 'L2' is not unique" + else: + expected_msg = "The index label 'L2' is not unique" + + with pytest.raises(ValueError, match=expected_msg): + assert_label_values(df_duplabels, ["L2"], axis=axis) + + +# Series +# ------ +def test_get_label_or_level_values_series_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_values(s, ["L1"], axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_values(s, ["L1", "L2"], axis=0) + + +def test_get_label_or_level_values_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._get_label_or_level_values("L1", axis=1) + + +# Test _drop_labels_or_levels +# =========================== +def assert_labels_dropped(frame, labels, axis): + axis = frame._get_axis_number(axis) + for label in labels: + df_dropped = frame._drop_labels_or_levels(label, axis=axis) + + if axis == 0: + assert label in frame.columns + assert label not in df_dropped.columns + else: + assert label in frame.index + assert label not in df_dropped.index + + +def assert_levels_dropped(frame, levels, axis): + axis = frame._get_axis_number(axis) + for level in levels: + df_dropped = frame._drop_labels_or_levels(level, axis=axis) + + if axis == 0: + assert level in frame.index.names + assert level not in df_dropped.index.names + else: + assert level in frame.columns.names + assert level not in df_dropped.columns.names + + +# DataFrame +# --------- +def test_drop_labels_or_levels_df(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + axis = df_levels._get_axis_number(axis) + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_labels_dropped(df_levels, expected_labels, axis=axis) + assert_levels_dropped(df_levels, expected_levels, axis=axis) + + with pytest.raises(ValueError, match="not valid labels or levels"): + df_levels._drop_labels_or_levels("L4", axis=axis) + + +# Series +# ------ +def test_drop_labels_or_levels_series(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_levels_dropped(s, ["L1"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_levels_dropped(s, ["L1", "L2"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py new file mode 100644 index 00000000..dd2380e2 --- /dev/null +++ b/pandas/tests/generic/test_series.py @@ -0,0 +1,144 @@ +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestSeries: + @pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"]) + def test_set_axis_name_mi(self, func): + ser = Series( + [11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"] + ), + ) + + result = methodcaller(func, ["L1", "L2"])(ser) + assert ser.index.name is None + assert ser.index.names == ["l1", "l2"] + assert result.index.name is None + assert result.index.names, ["L1", "L2"] + + def test_set_axis_name_raises(self): + ser = Series([1]) + msg = "No axis named 1 for object type Series" + with pytest.raises(ValueError, match=msg): + ser._set_axis_name(name="a", axis=1) + + def test_get_bool_data_preserve_dtype(self): + ser = Series([True, False, True]) + result = ser._get_bool_data() + tm.assert_series_equal(result, ser) + + def test_nonzero_single_element(self): + + # allow single item via bool method + ser = Series([True]) + assert ser.bool() + + ser = Series([False]) + assert not ser.bool() + + @pytest.mark.parametrize("data", [np.nan, pd.NaT, True, False]) + def test_nonzero_single_element_raise_1(self, data): + # single item nan to raise + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + + @pytest.mark.parametrize("data", [np.nan, pd.NaT]) + def test_nonzero_single_element_raise_2(self, data): + series = Series([data]) + + msg = "bool cannot act on a non-boolean single element Series" + with pytest.raises(ValueError, match=msg): + series.bool() + + @pytest.mark.parametrize("data", [(True, True), (False, False)]) + def test_nonzero_multiple_element_raise(self, data): + # multiple bool are still an error + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + with pytest.raises(ValueError, match=msg): + series.bool() + + @pytest.mark.parametrize("data", [1, 0, "a", 0.0]) + def test_nonbool_single_element_raise(self, data): + # single non-bool are an error + series = Series([data]) + + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(series) + + msg = "bool cannot act on a non-boolean single element Series" + with pytest.raises(ValueError, match=msg): + series.bool() + + def test_metadata_propagation_indiv_resample(self): + # resample + ts = Series( + np.random.rand(1000), + index=date_range("20130101", periods=1000, freq="s"), + name="foo", + ) + result = ts.resample("1T").mean() + tm.assert_metadata_equivalent(ts, result) + + result = ts.resample("1T").min() + tm.assert_metadata_equivalent(ts, result) + + result = ts.resample("1T").apply(lambda x: x.sum()) + tm.assert_metadata_equivalent(ts, result) + + def test_metadata_propagation_indiv(self, monkeypatch): + # check that the metadata matches up on the resulting ops + + ser = Series(range(3), range(3)) + ser.name = "foo" + ser2 = Series(range(3), range(3)) + ser2.name = "bar" + + result = ser.T + tm.assert_metadata_equivalent(ser, result) + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == "concat" and name == "filename": + value = "+".join( + [ + getattr(obj, name) + for obj in other.objs + if getattr(obj, name, None) + ] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + with monkeypatch.context() as m: + m.setattr(Series, "_metadata", ["name", "filename"]) + m.setattr(Series, "__finalize__", finalize) + + ser.filename = "foo" + ser2.filename = "bar" + + result = pd.concat([ser, ser2]) + assert result.filename == "foo+bar" + assert result.name is None diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py new file mode 100644 index 00000000..1fbd82f0 --- /dev/null +++ b/pandas/tests/generic/test_to_xarray.py @@ -0,0 +1,128 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm + + +@td.skip_if_no("xarray") +class TestDataFrameToXArray: + @pytest.fixture + def df(self): + return DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": Categorical(list("abc")), + "g": date_range("20130101", periods=3), + "h": date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + + def test_to_xarray_index_types(self, index_flat, df): + index = index_flat + # MultiIndex is tested in test_to_xarray_with_multiindex + if len(index) == 0: + pytest.skip("Test doesn't make sense for empty index") + + from xarray import Dataset + + df.index = index[:3] + df.index.name = "foo" + df.columns.name = "bar" + result = df.to_xarray() + assert result.dims["foo"] == 3 + assert len(result.coords) == 1 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, Dataset) + + # idempotency + # datetimes w/tz are preserved + # column names are lost + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal(result.to_dataframe(), expected) + + def test_to_xarray_empty(self, df): + from xarray import Dataset + + df.index.name = "foo" + result = df[0:0].to_xarray() + assert result.dims["foo"] == 0 + assert isinstance(result, Dataset) + + def test_to_xarray_with_multiindex(self, df): + from xarray import Dataset + + # MultiIndex + df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) + result = df.to_xarray() + assert result.dims["one"] == 1 + assert result.dims["two"] == 3 + assert len(result.coords) == 2 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, Dataset) + + result = result.to_dataframe() + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("xarray") +class TestSeriesToXArray: + def test_to_xarray_index_types(self, index_flat): + index = index_flat + # MultiIndex is tested in test_to_xarray_with_multiindex + + from xarray import DataArray + + ser = Series(range(len(index)), index=index, dtype="int64") + ser.index.name = "foo" + result = ser.to_xarray() + repr(result) + assert len(result) == len(index) + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + # idempotency + tm.assert_series_equal(result.to_series(), ser) + + def test_to_xarray_empty(self): + from xarray import DataArray + + ser = Series([], dtype=object) + ser.index.name = "foo" + result = ser.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + def test_to_xarray_with_multiindex(self): + from xarray import DataArray + + mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"]) + ser = Series(range(6), dtype="int64", index=mi) + result = ser.to_xarray() + assert len(result) == 2 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, DataArray) + res = result.to_series() + tm.assert_series_equal(res, ser) diff --git a/pandas/tests/groupby/__init__.py b/pandas/tests/groupby/__init__.py new file mode 100644 index 00000000..c63aa568 --- /dev/null +++ b/pandas/tests/groupby/__init__.py @@ -0,0 +1,27 @@ +def get_groupby_method_args(name, obj): + """ + Get required arguments for a groupby method. + + When parametrizing a test over groupby methods (e.g. "sum", "mean", "fillna"), + it is often the case that arguments are required for certain methods. + + Parameters + ---------- + name: str + Name of the method. + obj: Series or DataFrame + pandas object that is being grouped. + + Returns + ------- + A tuple of required arguments for the method. + """ + if name in ("nth", "fillna", "take"): + return (0,) + if name == "quantile": + return (0.5,) + if name == "corrwith": + return (obj,) + if name == "tshift": + return (0, 0) + return () diff --git a/pandas/tests/groupby/aggregate/__init__.py b/pandas/tests/groupby/aggregate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py new file mode 100644 index 00000000..4e8cc2cb --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -0,0 +1,1468 @@ +""" +test .agg behavior / note that .apply is tested generally in test_groupby.py +""" +import datetime +import functools +from functools import partial +import re + +import numpy as np +import pytest + +from pandas.errors import SpecificationError + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + to_datetime, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouping + + +def test_groupby_agg_no_extra_calls(): + # GH#31760 + df = DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) + gb = df.groupby("key")["value"] + + def dummy_func(x): + assert len(x) != 0 + return x.sum() + + gb.agg(dummy_func) + + +def test_agg_regression1(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_must_agg(df): + grouped = df.groupby("A")["C"] + + msg = "Must produce aggregated value" + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.describe()) + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.index[:2]) + + +def test_agg_ser_multi_key(df): + + f = lambda x: x.sum() + results = df.C.groupby([df.A, df.B]).aggregate(f) + expected = df.groupby(["A", "B"]).sum()["C"] + tm.assert_series_equal(results, expected) + + +def test_groupby_aggregation_mixed_dtype(): + # GH 6212 + expected = DataFrame( + { + "v1": [5, 5, 7, np.nan, 3, 3, 4, 1], + "v2": [55, 55, 77, np.nan, 33, 33, 44, 11], + }, + index=MultiIndex.from_tuples( + [ + (1, 95), + (1, 99), + (2, 95), + (2, 99), + ("big", "damp"), + ("blue", "dry"), + ("red", "red"), + ("red", "wet"), + ], + names=["by1", "by2"], + ), + ) + + df = DataFrame( + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + result = g[["v1", "v2"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_multi_level_column(): + # GH 29772 + lst = [ + [True, True, True, False], + [True, False, np.nan, False], + [True, True, np.nan, False], + [True, True, np.nan, False], + ] + df = DataFrame( + data=lst, + columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), + ) + + gb = df.groupby(level=1, axis=1) + result = gb.sum(numeric_only=False) + expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]}) + + tm.assert_frame_equal(result, expected) + + +def test_agg_apply_corner(ts, tsframe): + # nothing to group, all NA + grouped = ts.groupby(ts * np.nan, group_keys=False) + assert ts.dtype == np.float64 + + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, index=Index([], dtype=np.float64)) + tm.assert_series_equal(grouped.sum(), exp) + tm.assert_series_equal(grouped.agg(np.sum), exp) + tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) + + # DataFrame + grouped = tsframe.groupby(tsframe["A"] * np.nan, group_keys=False) + exp_df = DataFrame( + columns=tsframe.columns, + dtype=float, + index=Index([], name="A", dtype=np.float64), + ) + tm.assert_frame_equal(grouped.sum(), exp_df) + tm.assert_frame_equal(grouped.agg(np.sum), exp_df) + tm.assert_frame_equal(grouped.apply(np.sum), exp_df) + + +def test_agg_grouping_is_list_tuple(ts): + df = tm.makeTimeDataFrame() + + grouped = df.groupby(lambda x: x.year) + grouper = grouped.grouper.groupings[0].grouping_vector + grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_python_multiindex(mframe): + grouped = mframe.groupby(["A", "B"]) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "groupbyfunc", [lambda x: x.weekday(), [lambda x: x.month, lambda x: x.weekday()]] +) +def test_aggregate_str_func(tsframe, groupbyfunc): + grouped = tsframe.groupby(groupbyfunc) + + # single series + result = grouped["A"].agg("std") + expected = grouped["A"].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate("var") + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"}) + expected = DataFrame( + { + "A": grouped["A"].var(), + "B": grouped["B"].std(), + "C": grouped["C"].mean(), + "D": grouped["D"].sem(), + } + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func): + gb = df.groupby(level=0) + if reduction_func in ("idxmax", "idxmin"): + error = TypeError + msg = "reduction operation '.*' not allowed for this dtype" + else: + error = ValueError + msg = f"Operation {reduction_func} does not support axis=1" + with pytest.raises(error, match=msg): + gb.agg(reduction_func, axis=1) + + +@pytest.mark.parametrize( + "func, expected, dtype, result_dtype_dict", + [ + ("sum", [5, 7, 9], "int64", {}), + ("std", [4.5**0.5] * 3, int, {"i": float, "j": float, "k": float}), + ("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}), + ("sum", [5, 7, 9], "Int64", {"j": "int64"}), + ("std", [4.5**0.5] * 3, "Int64", {"i": float, "j": float, "k": float}), + ("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}), + ], +) +def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict): + # GH#43209 + df = DataFrame( + [[1, 2, 3, 4, 5, 6]] * 3, + columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), + ).astype({("a", "j"): dtype, ("b", "j"): dtype}) + warn = FutureWarning if func == "std" else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby(level=1, axis=1).agg(func) + expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( + result_dtype_dict + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func, expected_data, result_dtype_dict", + [ + ("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}), + # std should ideally return Int64 / Float64 #43330 + ("std", [[2**0.5] * 2] * 3, "float64"), + ("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}), + ], +) +def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): + # GH#43209 + df = DataFrame( + np.arange(12).reshape(3, 4), + index=Index([0, 1, 0], name="y"), + columns=Index([10, 20, 10, 20], name="x"), + dtype="int64", + ).astype({10: "Int64"}) + warn = FutureWarning if func == "std" else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby("x", axis=1).agg(func) + expected = DataFrame( + data=expected_data, + index=Index([0, 1, 0], name="y"), + columns=Index([10, 20], name="x"), + ).astype(result_dtype_dict) + tm.assert_frame_equal(result, expected) + + +def test_aggregate_item_by_item(df): + grouped = df.groupby("A") + + aggfun = lambda ser: ser.size + result = grouped.agg(aggfun) + foo = (df.A == "foo").sum() + bar = (df.A == "bar").sum() + K = len(result.columns) + + # GH5782 + exp = Series(np.array([foo] * K), index=list("BCD"), name="foo") + tm.assert_series_equal(result.xs("foo"), exp) + + exp = Series(np.array([bar] * K), index=list("BCD"), name="bar") + tm.assert_almost_equal(result.xs("bar"), exp) + + def aggfun(ser): + return ser.size + + result = DataFrame().groupby(df.A).agg(aggfun) + assert isinstance(result, DataFrame) + assert len(result) == 0 + + +def test_wrap_agg_out(three_group): + grouped = three_group.groupby(["A", "B"]) + + def func(ser): + if ser.dtype == object: + raise TypeError + else: + return ser.sum() + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = grouped.aggregate(func) + exp_grouped = three_group.loc[:, three_group.columns != "C"] + expected = exp_grouped.groupby(["A", "B"]).aggregate(func) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_maintain_order(df): + # GH #610 + funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)] + result = df.groupby("A")["C"].agg(funcs) + exp_cols = Index(["mean", "max", "min"]) + + tm.assert_index_equal(result.columns, exp_cols) + + +def test_agg_multiple_functions_same_name(): + # GH 30880 + df = DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000), + columns=["A", "B", "C"], + ) + result = df.resample("3T").agg( + {"A": [partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]} + ) + expected_index = pd.date_range("1/1/2012", freq="3T", periods=6) + expected_columns = MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")]) + expected_values = np.array( + [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] + ).T + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_same_name_with_ohlc_present(): + # GH 30880 + # ohlc expands dimensions, so different test to the above is required. + df = DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"), + columns=Index(["A", "B", "C"], name="alpha"), + ) + result = df.resample("3T").agg( + {"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]} + ) + expected_index = pd.date_range("1/1/2012", freq="3T", periods=6, name="dti") + expected_columns = MultiIndex.from_tuples( + [ + ("A", "ohlc", "open"), + ("A", "ohlc", "high"), + ("A", "ohlc", "low"), + ("A", "ohlc", "close"), + ("A", "quantile", "A"), + ("A", "quantile", "A"), + ], + names=["alpha", None, None], + ) + non_ohlc_expected_values = np.array( + [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]] + ).T + expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values]) + expected = DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +def test_multiple_functions_tuples_and_non_tuples(df): + # #1359 + funcs = [("foo", "mean"), "std"] + ex_funcs = [("foo", "mean"), ("std", "std")] + + result = df.groupby("A")["C"].agg(funcs) + expected = df.groupby("A")["C"].agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning( + FutureWarning, match=r"\['B'\] did not aggregate successfully" + ): + result = df.groupby("A").agg(funcs) + with tm.assert_produces_warning( + FutureWarning, match=r"\['B'\] did not aggregate successfully" + ): + expected = df.groupby("A").agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + +def test_more_flexible_frame_multi_function(df): + grouped = df.groupby("A") + + exmean = grouped.agg({"C": np.mean, "D": np.mean}) + exstd = grouped.agg({"C": np.std, "D": np.std}) + + expected = concat([exmean, exstd], keys=["mean", "std"], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = {"C": [np.mean, np.std], "D": [np.mean, np.std]} + result = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + tm.assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}} + grouped.aggregate(d) + + # But without renaming, these functions are OK + d = {"C": [np.mean], "D": [foo, bar]} + grouped.aggregate(d) + + +def test_multi_function_flexible_mix(df): + # GH #1268 + grouped = df.groupby("A") + + # Expected + d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}} + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 1 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 2 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + +def test_groupby_agg_coercing_bools(): + # issue 14873 + dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + gp = dat.groupby("a") + + index = Index([1, 2], name="a") + + result = gp["b"].aggregate(lambda x: (x != 0).all()) + expected = Series([False, True], index=index, name="b") + tm.assert_series_equal(result, expected) + + result = gp["c"].aggregate(lambda x: x.isnull().all()) + expected = Series([True, False], index=index, name="c") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", + [ + lambda x: x.sum(), + lambda x: x.cumsum(), + lambda x: x.transform("sum"), + lambda x: x.transform("cumsum"), + lambda x: x.agg("sum"), + lambda x: x.agg("cumsum"), + ], +) +def test_bool_agg_dtype(op): + # GH 7001 + # Bool sum aggregations result in int + df = DataFrame({"a": [1, 1], "b": [False, True]}) + s = df.set_index("a")["b"] + + result = op(df.groupby("a"))["b"].dtype + assert is_integer_dtype(result) + + result = op(s.groupby("a")).dtype + assert is_integer_dtype(result) + + +@pytest.mark.parametrize( + "keys, agg_index", + [ + (["a"], Index([1], name="a")), + (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), + ], +) +@pytest.mark.parametrize( + "input_dtype", ["bool", "int32", "int64", "float32", "float64"] +) +@pytest.mark.parametrize( + "result_dtype", ["bool", "int32", "int64", "float32", "float64"] +) +@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"]) +def test_callable_result_dtype_frame( + keys, agg_index, input_dtype, result_dtype, method +): + # GH 21240 + df = DataFrame({"a": [1], "b": [2], "c": [True]}) + df["c"] = df["c"].astype(input_dtype) + op = getattr(df.groupby(keys)[["c"]], method) + result = op(lambda x: x.astype(result_dtype).iloc[0]) + expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index + expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype( + result_dtype + ) + if method == "apply": + expected.columns.names = [0] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "keys, agg_index", + [ + (["a"], Index([1], name="a")), + (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), + ], +) +@pytest.mark.parametrize("input", [True, 1, 1.0]) +@pytest.mark.parametrize("dtype", [bool, int, float]) +@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"]) +def test_callable_result_dtype_series(keys, agg_index, input, dtype, method): + # GH 21240 + df = DataFrame({"a": [1], "b": [2], "c": [input]}) + op = getattr(df.groupby(keys)["c"], method) + result = op(lambda x: x.astype(dtype).iloc[0]) + expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index + expected = Series([df["c"].iloc[0]], index=expected_index, name="c").astype(dtype) + tm.assert_series_equal(result, expected) + + +def test_order_aggregate_multiple_funcs(): + # GH 25692 + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) + + res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) + result = res.columns.levels[1] + + expected = Index(["sum", "max", "mean", "ohlc", "min"]) + + tm.assert_index_equal(result, expected) + + +def test_ohlc_ea_dtypes(any_numeric_ea_dtype): + # GH#37493 + df = DataFrame( + {"a": [1, 1, 2, 3, 4, 4], "b": [22, 11, pd.NA, 10, 20, pd.NA]}, + dtype=any_numeric_ea_dtype, + ) + result = df.groupby("a").ohlc() + expected = DataFrame( + [[22, 22, 11, 11], [pd.NA] * 4, [10] * 4, [20] * 4], + columns=MultiIndex.from_product([["b"], ["open", "high", "low", "close"]]), + index=Index([1, 2, 3, 4], dtype=any_numeric_ea_dtype, name="a"), + dtype=any_numeric_ea_dtype, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [np.int64, np.uint64]) +@pytest.mark.parametrize("how", ["first", "last", "min", "max", "mean", "median"]) +def test_uint64_type_handling(dtype, how): + # GH 26310 + df = DataFrame({"x": 6903052872240755750, "y": [1, 2]}) + expected = df.groupby("y").agg({"x": how}) + df.x = df.x.astype(dtype) + result = df.groupby("y").agg({"x": how}) + if how not in ("mean", "median"): + # mean and median always result in floats + result.x = result.x.astype(np.int64) + tm.assert_frame_equal(result, expected, check_exact=True) + + +def test_func_duplicates_raises(): + # GH28426 + msg = "Function names" + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg(["min", "min"]) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_agg_index_has_complex_internals(index): + # GH 31223 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").agg({"value": Series.nunique}) + expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group") + tm.assert_frame_equal(result, expected) + + +def test_agg_split_block(): + # https://github.com/pandas-dev/pandas/issues/31522 + df = DataFrame( + { + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + "key3": ["three", "three", "three", "six", "six"], + } + ) + result = df.groupby("key1").min() + expected = DataFrame( + {"key2": ["one", "one"], "key3": ["six", "six"]}, + index=Index(["a", "b"], name="key1"), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_split_object_part_datetime(): + # https://github.com/pandas-dev/pandas/pull/31616 + df = DataFrame( + { + "A": pd.date_range("2000", periods=4), + "B": ["a", "b", "c", "d"], + "C": [1, 2, 3, 4], + "D": ["b", "c", "d", "e"], + "E": pd.date_range("2000", periods=4), + "F": [1, 2, 3, 4], + } + ).astype(object) + result = df.groupby([0, 0, 0, 0]).min() + expected = DataFrame( + { + "A": [pd.Timestamp("2000")], + "B": ["a"], + "C": [1], + "D": ["b"], + "E": [pd.Timestamp("2000")], + "F": [1], + } + ) + tm.assert_frame_equal(result, expected) + + +class TestNamedAggregationSeries: + def test_series_named_agg(self): + df = Series([1, 2, 3, 4]) + gr = df.groupby([0, 0, 1, 1]) + result = gr.agg(a="sum", b="min") + expected = DataFrame( + {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1] + ) + tm.assert_frame_equal(result, expected) + + result = gr.agg(b="min", a="sum") + expected = expected[["b", "a"]] + tm.assert_frame_equal(result, expected) + + def test_no_args_raises(self): + gr = Series([1, 2]).groupby([0, 1]) + with pytest.raises(TypeError, match="Must provide"): + gr.agg() + + # but we do allow this + result = gr.agg([]) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + def test_series_named_agg_duplicates_no_raises(self): + # GH28426 + gr = Series([1, 2, 3]).groupby([0, 0, 1]) + grouped = gr.agg(a="sum", b="sum") + expected = DataFrame({"a": [3, 3], "b": [3, 3]}) + tm.assert_frame_equal(expected, grouped) + + def test_mangled(self): + gr = Series([1, 2, 3]).groupby([0, 0, 1]) + result = gr.agg(a=lambda x: 0, b=lambda x: 1) + expected = DataFrame({"a": [0, 0], "b": [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "inp", + [ + pd.NamedAgg(column="anything", aggfunc="min"), + ("anything", "min"), + ["anything", "min"], + ], + ) + def test_named_agg_nametuple(self, inp): + # GH34422 + s = Series([1, 1, 2, 2, 3, 3, 4, 5]) + msg = f"func is expected but received {type(inp).__name__}" + with pytest.raises(TypeError, match=msg): + s.groupby(s.values).agg(a=inp) + + +class TestNamedAggregationDataFrame: + def test_agg_relabel(self): + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + result = df.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")) + expected = DataFrame( + {"a_max": [1, 3], "b_max": [6, 8]}, + index=Index(["a", "b"], name="group"), + columns=["a_max", "b_max"], + ) + tm.assert_frame_equal(result, expected) + + # order invariance + p98 = functools.partial(np.percentile, q=98) + result = df.groupby("group").agg( + b_min=("B", "min"), + a_min=("A", min), + a_mean=("A", np.mean), + a_max=("A", "max"), + b_max=("B", "max"), + a_98=("A", p98), + ) + expected = DataFrame( + { + "b_min": [5, 7], + "a_min": [0, 2], + "a_mean": [0.5, 2.5], + "a_max": [1, 3], + "b_max": [6, 8], + "a_98": [0.98, 2.98], + }, + index=Index(["a", "b"], name="group"), + columns=["b_min", "a_min", "a_mean", "a_max", "b_max", "a_98"], + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_non_identifier(self): + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + + result = df.groupby("group").agg(**{"my col": ("A", "max")}) + expected = DataFrame({"my col": [1, 3]}, index=Index(["a", "b"], name="group")) + tm.assert_frame_equal(result, expected) + + def test_duplicate_no_raises(self): + # GH 28426, if use same input function on same column, + # no error should raise + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + + grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min")) + expected = DataFrame({"a": [1, 3], "b": [1, 3]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(grouped, expected) + + quant50 = functools.partial(np.percentile, q=50) + quant70 = functools.partial(np.percentile, q=70) + quant50.__name__ = "quant50" + quant70.__name__ = "quant70" + + test = DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) + + grouped = test.groupby("col1").agg( + quantile_50=("col2", quant50), quantile_70=("col2", quant70) + ) + expected = DataFrame( + {"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]}, + index=Index(["a", "b"], name="col1"), + ) + tm.assert_frame_equal(grouped, expected) + + def test_agg_relabel_with_level(self): + df = DataFrame( + {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}, + index=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + ) + result = df.groupby(level=0).agg( + aa=("A", "max"), bb=("A", "min"), cc=("B", "mean") + ) + expected = DataFrame( + {"aa": [0, 1], "bb": [0, 1], "cc": [1.5, 3.5]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_other_raises(self): + df = DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]}) + grouped = df.groupby("A") + match = "Must provide" + with pytest.raises(TypeError, match=match): + grouped.agg(foo=1) + + with pytest.raises(TypeError, match=match): + grouped.agg() + + with pytest.raises(TypeError, match=match): + grouped.agg(a=("B", "max"), b=(1, 2, 3)) + + def test_missing_raises(self): + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + match = re.escape("Column(s) ['C'] do not exist") + with pytest.raises(KeyError, match=match): + df.groupby("A").agg(c=("C", "sum")) + + def test_agg_namedtuple(self): + df = DataFrame({"A": [0, 1], "B": [1, 2]}) + result = df.groupby("A").agg( + b=pd.NamedAgg("B", "sum"), c=pd.NamedAgg(column="B", aggfunc="count") + ) + expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) + tm.assert_frame_equal(result, expected) + + def test_mangled(self): + df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) + result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) + expected = DataFrame({"b": [0, 0], "c": [1, 1]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3", + [ + ( + (("y", "A"), "max"), + (("y", "A"), np.min), + (("y", "B"), "mean"), + [1, 3], + [0, 2], + [5.5, 7.5], + ), + ( + (("y", "A"), lambda x: max(x)), + (("y", "A"), lambda x: 1), + (("y", "B"), "mean"), + [1, 3], + [1, 1], + [5.5, 7.5], + ), + ( + pd.NamedAgg(("y", "A"), "max"), + pd.NamedAgg(("y", "B"), np.mean), + pd.NamedAgg(("y", "A"), lambda x: 1), + [1, 3], + [5.5, 7.5], + [1, 1], + ), + ], +) +def test_agg_relabel_multiindex_column( + agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3 +): + # GH 29422, add tests for multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) + expected = DataFrame({"a_max": [1, 3]}, index=idx) + tm.assert_frame_equal(result, expected) + + result = df.groupby(("x", "group")).agg( + col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 + ) + expected = DataFrame( + {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_multiindex_raises_not_exist(): + # GH 29422, add test for raises scenario when aggregate column does not exist + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + with pytest.raises(KeyError, match="do not exist"): + df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) + + +def test_agg_relabel_multiindex_duplicates(): + # GH29422, add test for raises scenario when getting duplicates + # GH28426, after this change, duplicates should also work if the relabelling is + # different + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + result = df.groupby(("x", "group")).agg( + a=(("y", "A"), "min"), b=(("y", "A"), "min") + ) + idx = Index(["a", "b"], name=("x", "group")) + expected = DataFrame({"a": [0, 2], "b": [0, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [{"c": ["min"]}, {"b": [], "c": ["min"]}]) +def test_groupby_aggregate_empty_key(kwargs): + # GH: 32580 + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + result = df.groupby("a").agg(kwargs) + expected = DataFrame( + [1, 4], + index=Index([1, 2], dtype="int64", name="a"), + columns=MultiIndex.from_tuples([["c", "min"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregate_empty_key_empty_return(): + # GH: 32580 Check if everything works, when return is empty + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) + result = df.groupby("a").agg({"b": []}) + expected = DataFrame(columns=MultiIndex(levels=[["b"], []], codes=[[], []])) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregate_empty_with_multiindex_frame(): + # GH 39178 + df = DataFrame(columns=["a", "b", "c"]) + result = df.groupby(["a", "b"], group_keys=False).agg(d=("c", list)) + expected = DataFrame( + columns=["d"], index=MultiIndex([[], []], [[], []], names=["a", "b"]) + ) + tm.assert_frame_equal(result, expected) + + +def test_grouby_agg_loses_results_with_as_index_false_relabel(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. + + df = DataFrame( + {"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]} + ) + + grouped = df.groupby("key", as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]}) + tm.assert_frame_equal(result, expected) + + +def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): + # GH 32240: When the aggregate function relabels column names and + # as_index=False is specified, the results are dropped. Check if + # multiindex is returned in the right order + + df = DataFrame( + { + "key": ["x", "y", "x", "y", "x", "x"], + "key1": ["a", "b", "c", "b", "a", "c"], + "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75], + } + ) + + grouped = df.groupby(["key", "key1"], as_index=False) + result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min")) + expected = DataFrame( + {"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]} + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] +) +def test_multiindex_custom_func(func): + # GH 31777 + data = [[1, 4, 2], [5, 7, 1]] + df = DataFrame( + data, + columns=MultiIndex.from_arrays( + [[1, 1, 2], [3, 4, 3]], names=["Sisko", "Janeway"] + ), + ) + result = df.groupby(np.array([0, 1])).agg(func) + expected_dict = { + (1, 3): {0: 1.0, 1: 5.0}, + (1, 4): {0: 4.0, 1: 7.0}, + (2, 3): {0: 2.0, 1: 1.0}, + } + expected = DataFrame(expected_dict) + expected.columns = df.columns + tm.assert_frame_equal(result, expected) + + +def myfunc(s): + return np.percentile(s, q=0.90) + + +@pytest.mark.parametrize("func", [lambda s: np.percentile(s, q=0.90), myfunc]) +def test_lambda_named_agg(func): + # see gh-28467 + animals = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + result = animals.groupby("kind").agg( + mean_height=("height", "mean"), perc90=("height", func) + ) + expected = DataFrame( + [[9.3, 9.1036], [20.0, 6.252]], + columns=["mean_height", "perc90"], + index=Index(["cat", "dog"], name="kind"), + ) + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_mixed_types(): + # GH 16916 + df = DataFrame( + data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") + ) + df["grouping"] = ["group 1", "group 1", 2] + result = df.groupby("grouping").aggregate(lambda x: x.tolist()) + expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] + expected = DataFrame( + expected_data, + index=Index([2, "group 1"], dtype="object", name="grouping"), + columns=Index(["X", "Y", "Z"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="Not implemented;see GH 31256") +def test_aggregate_udf_na_extension_type(): + # https://github.com/pandas-dev/pandas/pull/31359 + # This is currently failing to cast back to Int64Dtype. + # The presence of the NA causes two problems + # 1. NA is not an instance of Int64Dtype.type (numpy.int64) + # 2. The presence of an NA forces object type, so the non-NA values is + # a Python int rather than a NumPy int64. Python ints aren't + # instances of numpy.int64. + def aggfunc(x): + if all(x > 2): + return 1 + else: + return pd.NA + + df = DataFrame({"A": pd.array([1, 2, 3])}) + result = df.groupby([1, 1, 2]).agg(aggfunc) + expected = DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + + +class TestLambdaMangling: + def test_basic(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) + + expected = DataFrame( + {("B", ""): [0, 0], ("B", ""): [1, 1]}, + index=Index([0, 1], name="A"), + ) + tm.assert_frame_equal(result, expected) + + def test_mangle_series_groupby(self): + gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) + result = gr.agg([lambda x: 0, lambda x: 1]) + expected = DataFrame({"": [0, 0], "": [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") + @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") + def test_with_kwargs(self): + f1 = lambda x, y, b=1: x.sum() + y + b + f2 = lambda x, y, b=2: x.sum() + y * b + result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) + expected = DataFrame({"": [4], "": [6]}) + tm.assert_frame_equal(result, expected) + + result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) + expected = DataFrame({"": [13], "": [30]}) + tm.assert_frame_equal(result, expected) + + def test_agg_with_one_lambda(self): + # GH 25719, write tests for DataFrameGroupby.agg with only one lambda + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + columns = ["height_sqr_min", "height_max", "weight_max"] + expected = DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + }, + index=Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check pd.NameAgg case + result1 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x**2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + ) + tm.assert_frame_equal(result1, expected) + + # check agg(key=(col, aggfunc)) case + result2 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x**2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + ) + tm.assert_frame_equal(result2, expected) + + def test_agg_multiple_lambda(self): + # GH25719, test for DataFrameGroupby.agg with multiple lambdas + # with mixed aggfunc + df = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + columns = [ + "height_sqr_min", + "height_max", + "weight_max", + "height_max_2", + "weight_min", + ] + expected = DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + "height_max_2": [9.5, 34.0], + "weight_min": [7.9, 7.5], + }, + index=Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check agg(key=(col, aggfunc)) case + result1 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x**2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + height_max_2=("height", lambda x: np.max(x)), + weight_min=("weight", lambda x: np.min(x)), + ) + tm.assert_frame_equal(result1, expected) + + # check pd.NamedAgg case + result2 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x**2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)), + weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), + ) + tm.assert_frame_equal(result2, expected) + + +def test_groupby_get_by_index(): + # GH 33439 + df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]}) + res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])}) + expected = DataFrame({"A": ["S", "W"], "B": [1.0, 2.0]}).set_index("A") + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}), + ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}), + ({"nr": "min"}, {"nr": [1, 5]}), + ], +) +def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): + # test single aggregations on ordered categorical cols GHGH27800 + + # create the result dataframe + input_df = DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + expected_df = DataFrame(data=exp_data, index=cat_index) + + if "cat_ord" in expected_df: + # ordered categorical columns should be preserved + dtype = input_df["cat_ord"].dtype + expected_df["cat_ord"] = expected_df["cat_ord"].astype(dtype) + + tm.assert_frame_equal(result_df, expected_df) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]), + ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]), + ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), + ], +) +def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): + # test combined aggregations on ordered categorical cols GH27800 + + # create the result dataframe + input_df = DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + # unpack the grp_col_dict to create the multi-index tuple + # this tuple will be used to create the expected dataframe index + multi_index_list = [] + for k, v in grp_col_dict.items(): + if isinstance(v, list): + for value in v: + multi_index_list.append([k, value]) + else: + multi_index_list.append([k, v]) + multi_index = MultiIndex.from_tuples(tuple(multi_index_list)) + + expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index) + for col in expected_df.columns: + if isinstance(col, tuple) and "cat_ord" in col: + # ordered categorical should be preserved + expected_df[col] = expected_df[col].astype(input_df["cat_ord"].dtype) + + tm.assert_frame_equal(result_df, expected_df) + + +def test_nonagg_agg(): + # GH 35490 - Single/Multiple agg of non-agg function give same results + # TODO: agg should raise for functions that don't aggregate + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) + g = df.groupby("a") + + result = g.agg(["cumsum"]) + result.columns = result.columns.droplevel(-1) + expected = g.agg("cumsum") + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_datetime_objects(): + # https://github.com/pandas-dev/pandas/issues/36003 + # ensure we don't raise an error but keep object dtype for out-of-bounds + # datetimes + df = DataFrame( + { + "A": ["X", "Y"], + "B": [ + datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + datetime.datetime(3005, 1, 1, 10, 30, 23, 540000), + ], + } + ) + result = df.groupby("A").B.max() + expected = df.set_index("A")["B"] + tm.assert_series_equal(result, expected) + + +def test_groupby_index_object_dtype(): + # GH 40014 + df = DataFrame({"c0": ["x", "x", "x"], "c1": ["x", "x", "y"], "p": [0, 1, 2]}) + df.index = df.index.astype("O") + grouped = df.groupby(["c0", "c1"]) + res = grouped.p.agg(lambda x: all(x > 0)) + # Check that providing a user-defined function in agg() + # produces the correct index shape when using an object-typed index. + expected_index = MultiIndex.from_tuples( + [("x", "x"), ("x", "y")], names=("c0", "c1") + ) + expected = Series([False, True], index=expected_index, name="p") + tm.assert_series_equal(res, expected) + + +def test_timeseries_groupby_agg(): + # GH#43290 + + def func(ser): + if ser.isna().all(): + return None + return np.sum(ser) + + df = DataFrame([1.0], index=[pd.Timestamp("2018-01-16 00:00:00+00:00")]) + res = df.groupby(lambda x: 1).agg(func) + + expected = DataFrame([[1.0]], index=[1]) + tm.assert_frame_equal(res, expected) + + +def test_groupby_aggregate_directory(reduction_func): + # GH#32793 + if reduction_func in ["corrwith", "nth"]: + return None + warn = FutureWarning if reduction_func == "mad" else None + + obj = DataFrame([[0, 1], [0, np.nan]]) + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) + + if reduction_func in ["size", "ngroup"]: + # names are different: None / 1 + tm.assert_series_equal( + result_reduced_series, result_reduced_frame[1], check_names=False + ) + else: + tm.assert_frame_equal(result_reduced_series, result_reduced_frame) + tm.assert_series_equal( + result_reduced_series.dtypes, result_reduced_frame.dtypes + ) + + +def test_group_mean_timedelta_nat(): + # GH43132 + data = Series(["1 day", "3 days", "NaT"], dtype="timedelta64[ns]") + expected = Series(["2 days"], dtype="timedelta64[ns]") + + result = data.groupby([0, 0, 0]).mean() + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ( # no timezone + ["2021-01-01T00:00", "NaT", "2021-01-01T02:00"], + ["2021-01-01T01:00"], + ), + ( # timezone + ["2021-01-01T00:00-0100", "NaT", "2021-01-01T02:00-0100"], + ["2021-01-01T01:00-0100"], + ), + ], +) +def test_group_mean_datetime64_nat(input_data, expected_output): + # GH43132 + data = to_datetime(Series(input_data)) + expected = to_datetime(Series(expected_output)) + + result = data.groupby([0, 0, 0]).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, output", [("mean", [8 + 18j, 10 + 22j]), ("sum", [40 + 90j, 50 + 110j])] +) +def test_groupby_complex(func, output): + # GH#43701 + data = Series(np.arange(20).reshape(10, 2).dot([1, 2j])) + result = data.groupby(data.index % 2).agg(func) + expected = Series(output) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "var"]) +def test_groupby_complex_raises(func): + # GH#43701 + data = Series(np.arange(20).reshape(10, 2).dot([1, 2j])) + msg = "No matching signature found" + with pytest.raises(TypeError, match=msg): + data.groupby(data.index % 2).agg(func) + + +@pytest.mark.parametrize( + "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}] +) +def test_multi_axis_1_raises(func): + # GH#46995 + df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]}) + gb = df.groupby("a", axis=1) + with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"): + gb.agg(func) + + +@pytest.mark.parametrize( + "test, constant", + [ + ([[20, "A"], [20, "B"], [10, "C"]], {0: [10, 20], 1: ["C", ["A", "B"]]}), + ([[20, "A"], [20, "B"], [30, "C"]], {0: [20, 30], 1: [["A", "B"], "C"]}), + ([["a", 1], ["a", 1], ["b", 2], ["b", 3]], {0: ["a", "b"], 1: [1, [2, 3]]}), + pytest.param( + [["a", 1], ["a", 2], ["b", 3], ["b", 3]], + {0: ["a", "b"], 1: [[1, 2], 3]}, + marks=pytest.mark.xfail, + ), + ], +) +def test_agg_of_mode_list(test, constant): + # GH#25581 + df1 = DataFrame(test) + result = df1.groupby(0).agg(Series.mode) + # Mode usually only returns 1 value, but can return a list in the case of a tie. + + expected = DataFrame(constant) + expected = expected.set_index(0) + + tm.assert_frame_equal(result, expected) + + +def test_numeric_only_warning_numpy(): + # GH#50538 + df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]}) + gb = df.groupby("a") + msg = "The operation 0 else np.nan), + ("var", lambda x: np.var(x, ddof=1)), + ("min", np.min), + ("max", np.max), + ], +) +def test_cython_agg_empty_buckets(op, targop, observed): + df = DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + # calling _cython_agg_general directly, instead of via the user API + # which sets different values for min_count, so do that here. + g = df.groupby(pd.cut(df[0], grps), observed=observed) + result = g._cython_agg_general(op, alt=None, numeric_only=True) + + g = df.groupby(pd.cut(df[0], grps), observed=observed) + expected = g.agg(lambda x: targop(x)) + tm.assert_frame_equal(result, expected) + + +def test_cython_agg_empty_buckets_nanops(observed): + # GH-18869 can't call nanops on empty groups, so hardcode expected + # for these + df = DataFrame([11, 12, 13], columns=["a"]) + grps = range(0, 25, 5) + # add / sum + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "sum", alt=None, numeric_only=True + ) + intervals = pd.interval_range(0, 20, freq=5) + expected = DataFrame( + {"a": [0, 0, 36, 0]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 0] + + tm.assert_frame_equal(result, expected) + + # prod + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "prod", alt=None, numeric_only=True + ) + expected = DataFrame( + {"a": [1, 1, 1716, 1]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 1] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["first", "last", "max", "min"]) +@pytest.mark.parametrize( + "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")] +) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({"a": [0, 1], "b": [data, NaT]}) + index = Index([0, 1], name="a") + + # We will group by a and test the cython aggregations + expected = DataFrame({"b": [data, NaT]}, index=index) + + result = df.groupby("a").aggregate(op) + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize( + "agg", + [ + "min", + "max", + "count", + "sum", + "prod", + "var", + "mean", + "median", + "ohlc", + "cumprod", + "cumsum", + "shift", + "any", + "all", + "quantile", + "first", + "last", + "rank", + "cummin", + "cummax", + ], +) +def test_read_only_buffer_source_agg(agg): + # https://github.com/pandas-dev/pandas/issues/36014 + df = DataFrame( + { + "sepal_length": [5.1, 4.9, 4.7, 4.6, 5.0], + "species": ["setosa", "setosa", "setosa", "setosa", "setosa"], + } + ) + df._mgr.arrays[0].flags.writeable = False + + result = df.groupby(["species"]).agg({"sepal_length": agg}) + expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op_name", + [ + "count", + "sum", + "std", + "var", + "sem", + "mean", + "median", + "prod", + "min", + "max", + ], +) +def test_cython_agg_nullable_int(op_name): + # ensure that the cython-based aggregations don't fail for nullable dtype + # (eg https://github.com/pandas-dev/pandas/issues/37415) + df = DataFrame( + { + "A": ["A", "B"] * 5, + "B": pd.array([1, 2, 3, 4, 5, 6, 7, 8, 9, pd.NA], dtype="Int64"), + } + ) + result = getattr(df.groupby("A")["B"], op_name)() + df2 = df.assign(B=df["B"].astype("float64")) + expected = getattr(df2.groupby("A")["B"], op_name)() + + if op_name != "count": + # the result is not yet consistently using Int64/Float64 dtype, + # so for now just checking the values by casting to float + result = result.astype("float64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("with_na", [True, False]) +@pytest.mark.parametrize( + "op_name, action", + [ + # ("count", "always_int"), + ("sum", "large_int"), + # ("std", "always_float"), + ("var", "always_float"), + # ("sem", "always_float"), + ("mean", "always_float"), + ("median", "always_float"), + ("prod", "large_int"), + ("min", "preserve"), + ("max", "preserve"), + ("first", "preserve"), + ("last", "preserve"), + ], +) +@pytest.mark.parametrize( + "data", + [ + pd.array([1, 2, 3, 4], dtype="Int64"), + pd.array([1, 2, 3, 4], dtype="Int8"), + pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float32"), + pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64"), + pd.array([True, True, False, False], dtype="boolean"), + ], +) +def test_cython_agg_EA_known_dtypes(data, op_name, action, with_na): + if with_na: + data[3] = pd.NA + + df = DataFrame({"key": ["a", "a", "b", "b"], "col": data}) + grouped = df.groupby("key") + + if action == "always_int": + # always Int64 + expected_dtype = pd.Int64Dtype() + elif action == "large_int": + # for any int/bool use Int64, for float preserve dtype + if is_float_dtype(data.dtype): + expected_dtype = data.dtype + elif is_integer_dtype(data.dtype): + # match the numpy dtype we'd get with the non-nullable analogue + expected_dtype = data.dtype + else: + expected_dtype = pd.Int64Dtype() + elif action == "always_float": + # for any int/bool use Float64, for float preserve dtype + if is_float_dtype(data.dtype): + expected_dtype = data.dtype + else: + expected_dtype = pd.Float64Dtype() + elif action == "preserve": + expected_dtype = data.dtype + + result = getattr(grouped, op_name)() + assert result["col"].dtype == expected_dtype + + result = grouped.aggregate(op_name) + assert result["col"].dtype == expected_dtype + + result = getattr(grouped["col"], op_name)() + assert result.dtype == expected_dtype + + result = grouped["col"].aggregate(op_name) + assert result.dtype == expected_dtype diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py new file mode 100644 index 00000000..0b2fb56a --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -0,0 +1,241 @@ +import numpy as np +import pytest + +from pandas.errors import NumbaUtilError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + NamedAgg, + Series, + option_context, +) +import pandas._testing as tm + + +@td.skip_if_no("numba") +def test_correct_function_signature(): + def incorrect_function(x): + return sum(x) * 2.7 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key").agg(incorrect_function, engine="numba") + + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key")["data"].agg(incorrect_function, engine="numba") + + +@td.skip_if_no("numba") +def test_check_nopython_kwargs(): + def incorrect_function(values, index): + return sum(values) * 2.7 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key").agg(incorrect_function, engine="numba", a=1) + + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key")["data"].agg(incorrect_function, engine="numba", a=1) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): + def func_numba(values, index): + return np.mean(values) * 2.7 + + if jit: + # Test accepted jitted functions + import numba + + func_numba = numba.jit(func_numba) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.agg(func_numba, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython") + + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_cache(jit, pandas_obj, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(values, index): + return np.mean(values) - 3.4 + + def func_2(values, index): + return np.mean(values) * 2.7 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython") + tm.assert_equal(result, expected) + + # Add func_2 to the cache + result = grouped.agg(func_2, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) * 2.7, engine="cython") + tm.assert_equal(result, expected) + + # Retest func_1 which should use the cache + result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython") + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +def test_use_global_config(): + def func_1(values, index): + return np.mean(values) - 3.4 + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + expected = grouped.agg(func_1, engine="numba") + with option_context("compute.use_numba", True): + result = grouped.agg(func_1, engine=None) + tm.assert_frame_equal(expected, result) + + +@td.skip_if_no("numba") +@pytest.mark.parametrize( + "agg_func", + [ + ["min", "max"], + "min", + {"B": ["min", "max"], "C": "sum"}, + NamedAgg(column="B", aggfunc="min"), + ], +) +def test_multifunc_notimplimented(agg_func): + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped.agg(agg_func, engine="numba") + + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped[1].agg(agg_func, engine="numba") + + +@td.skip_if_no("numba") +def test_args_not_cached(): + # GH 41647 + def sum_last(values, index, n): + return values[-n:].sum() + + df = DataFrame({"id": [0, 0, 1, 1], "x": [1, 1, 1, 1]}) + grouped_x = df.groupby("id")["x"] + result = grouped_x.agg(sum_last, 1, engine="numba") + expected = Series([1.0] * 2, name="x", index=Index([0, 1], name="id")) + tm.assert_series_equal(result, expected) + + result = grouped_x.agg(sum_last, 2, engine="numba") + expected = Series([2.0] * 2, name="x", index=Index([0, 1], name="id")) + tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return np.mean(index) + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").aggregate(f, engine="numba") + expected = DataFrame( + [-1.5, -3.0], columns=["v"], index=Index(["A", "B"], name="group") + ) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_engine_kwargs_not_cached(): + # If the user passes a different set of engine_kwargs don't return the same + # jitted function + nogil = True + parallel = False + nopython = True + + def func_kwargs(values, index): + return nogil + parallel + nopython + + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + df = DataFrame({"value": [0, 0, 0]}) + result = df.groupby(level=0).aggregate( + func_kwargs, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame({"value": [2.0, 2.0, 2.0]}) + tm.assert_frame_equal(result, expected) + + nogil = False + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + result = df.groupby(level=0).aggregate( + func_kwargs, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame({"value": [1.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +def test_multiindex_one_key(nogil, parallel, nopython): + def numba_func(values, index): + return 1 + + df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"]) + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + result = df.groupby("A").agg( + numba_func, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame([1.0], index=Index([1], name="A"), columns=["C"]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_multiindex_multi_key_not_supported(nogil, parallel, nopython): + def numba_func(values, index): + return 1 + + df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"]) + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + with pytest.raises(NotImplementedError, match="More than 1 grouping labels"): + df.groupby(["A", "B"]).agg( + numba_func, engine="numba", engine_kwargs=engine_kwargs + ) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py new file mode 100644 index 00000000..f84abece --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -0,0 +1,674 @@ +""" +test all other .agg behavior +""" + +import datetime as dt +from functools import partial + +import numpy as np +import pytest + +from pandas.errors import SpecificationError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + +from pandas.io.formats.printing import pprint_thing + + +def test_agg_api(): + # GH 6337 + # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error + # different api for agg when passed custom function with mixed frame + + df = DataFrame( + { + "data1": np.random.randn(5), + "data2": np.random.randn(5), + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + } + ) + grouped = df.groupby("key1") + + def peak_to_peak(arr): + return arr.max() - arr.min() + + with tm.assert_produces_warning( + FutureWarning, + match=r"\['key2'\] did not aggregate successfully", + ): + expected = grouped.agg([peak_to_peak]) + expected.columns = ["data1", "data2"] + + with tm.assert_produces_warning( + FutureWarning, + match=r"\['key2'\] did not aggregate successfully", + ): + result = grouped.agg(peak_to_peak) + tm.assert_frame_equal(result, expected) + + +def test_agg_datetimes_mixed(): + data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]] + + df1 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + data = [ + [ + row[0], + (dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None), + row[2], + ] + for row in data + ] + + df2 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + df1["weights"] = df1["value"] / df1["value"].sum() + gb1 = df1.groupby("date").aggregate(np.sum) + + df2["weights"] = df1["value"] / df1["value"].sum() + gb2 = df2.groupby("date").aggregate(np.sum) + + assert len(gb1) == len(gb2) + + +def test_agg_period_index(): + prng = period_range("2012-1-1", freq="M", periods=3) + df = DataFrame(np.random.randn(3, 2), index=prng) + rs = df.groupby(level=0).sum() + assert isinstance(rs.index, PeriodIndex) + + # GH 3579 + index = period_range(start="1999-01", periods=5, freq="M") + s1 = Series(np.random.rand(len(index)), index=index) + s2 = Series(np.random.rand(len(index)), index=index) + df = DataFrame.from_dict({"s1": s1, "s2": s2}) + grouped = df.groupby(df.index.month) + list(grouped) + + +def test_agg_dict_parameter_cast_result_dtypes(): + # GH 12821 + + df = DataFrame( + { + "class": ["A", "A", "B", "B", "C", "C", "D", "D"], + "time": date_range("1/1/2011", periods=8, freq="H"), + } + ) + df.loc[[0, 1, 2, 5], "time"] = None + + # test for `first` function + exp = df.loc[[0, 3, 4, 6]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.first(), exp) + tm.assert_frame_equal(grouped.agg("first"), exp) + tm.assert_frame_equal(grouped.agg({"time": "first"}), exp) + tm.assert_series_equal(grouped.time.first(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("first"), exp["time"]) + + # test for `last` function + exp = df.loc[[0, 3, 4, 7]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.last(), exp) + tm.assert_frame_equal(grouped.agg("last"), exp) + tm.assert_frame_equal(grouped.agg({"time": "last"}), exp) + tm.assert_series_equal(grouped.time.last(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("last"), exp["time"]) + + # count + exp = Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.agg(len), exp) + tm.assert_series_equal(grouped.time.size(), exp) + + exp = Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.count(), exp) + + +def test_agg_cast_results_dtypes(): + # similar to GH12821 + # xref #11444 + u = [dt.datetime(2015, x + 1, 1) for x in range(12)] + v = list("aaabbbbbbccd") + df = DataFrame({"X": v, "Y": u}) + + result = df.groupby("X")["Y"].agg(len) + expected = df.groupby("X")["Y"].count() + tm.assert_series_equal(result, expected) + + +def test_aggregate_float64_no_int64(): + # see gh-11199 + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]}) + + expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a"]].mean() + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a", "c"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_api_consistency(): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + grouped = df.groupby(["A", "B"]) + c_mean = grouped["C"].mean() + c_sum = grouped["C"].sum() + d_mean = grouped["D"].mean() + d_sum = grouped["D"].sum() + + result = grouped["D"].agg(["sum", "mean"]) + expected = pd.concat([d_sum, d_mean], axis=1) + expected.columns = ["sum", "mean"] + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg([np.sum, np.mean]) + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped[["D", "C"]].agg([np.sum, np.mean]) + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) + expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": "mean", "D": "sum"}) + expected = pd.concat([d_sum, c_mean], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]}) + expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) + + msg = r"Column\(s\) \['r', 'r2'\] do not exist" + with pytest.raises(KeyError, match=msg): + grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean}) + + +def test_agg_dict_renaming_deprecation(): + # 15931 + df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg( + {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} + ) + + msg = r"Column\(s\) \['ma'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.groupby("A")[["B", "C"]].agg({"ma": "max"}) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").B.agg({"foo": "count"}) + + +def test_agg_compat(): + # GH 12334 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": ["sum", "std"]}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": "sum", "D": "std"}) + + +def test_agg_nested_dicts(): + # API change for disallowing these types of nested dicts + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}}) + + with pytest.raises(SpecificationError, match=msg): + g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}}) + + # same name as the original column + # GH9052 + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"result1": np.sum, "result2": np.mean}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"D": np.sum, "result2": np.mean}) + + +def test_agg_item_by_item_raise_typeerror(): + df = DataFrame(np.random.randint(10, size=(20, 10))) + + def raiseException(df): + pprint_thing("----------------------------------------") + pprint_thing(df.to_string()) + raise TypeError("test") + + with pytest.raises(TypeError, match="test"): + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + df.groupby(0).agg(raiseException) + + +def test_series_agg_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + + +def test_series_agg_multi_pure_python(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + def bad(x): + assert len(x.values.base) > 0 + return "foo" + + result = data.groupby(["A", "B"]).agg(bad) + expected = data.groupby(["A", "B"]).agg(lambda x: "foo") + tm.assert_frame_equal(result, expected) + + +def test_agg_consistency(): + # agg with ([]) and () not consistent + # GH 6715 + def P1(a): + return np.percentile(a.dropna(), q=1) + + df = DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [10, 25, 26, 31], + "date": [ + dt.date(2013, 2, 10), + dt.date(2013, 2, 10), + dt.date(2013, 2, 11), + dt.date(2013, 2, 11), + ], + } + ) + + g = df.groupby("date") + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + tm.assert_frame_equal(result, expected) + + +def test_agg_callables(): + # GH 7929 + df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64) + + class fn_class: + def __call__(self, x): + return sum(x) + + equiv_callables = [ + sum, + np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), + fn_class(), + ] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby("foo").agg(ecall) + tm.assert_frame_equal(result, expected) + + +def test_agg_over_numpy_arrays(): + # GH 3788 + df = DataFrame( + [ + [1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])], + ], + columns=["category", "arraydata"], + ) + gb = df.groupby("category") + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = Index([1, 2], name="category") + expected_column = ["arraydata"] + expected = DataFrame(expected_data, index=expected_index, columns=expected_column) + + alt = gb.sum(numeric_only=False) + tm.assert_frame_equal(alt, expected) + + result = gb.agg("sum", numeric_only=False) + tm.assert_frame_equal(result, expected) + + # FIXME: the original version of this test called `gb.agg(sum)` + # and that raises TypeError if `numeric_only=False` is passed + + +@pytest.mark.parametrize("as_period", [True, False]) +def test_agg_tzaware_non_datetime_result(as_period): + # discussed in GH#29589, fixed in GH#29641, operating on tzaware values + # with function that is not dtype-preserving + dti = date_range("2012-01-01", periods=4, tz="UTC") + if as_period: + dti = dti.tz_localize(None).to_period("D") + + df = DataFrame({"a": [0, 0, 1, 1], "b": dti}) + gb = df.groupby("a") + + # Case that _does_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0]) + expected = Series(dti[::2], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + # Cases that do _not_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0].year) + expected = Series([2012, 2012], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0]) + expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b") + expected.index.name = "a" + if as_period: + expected = Series([pd.offsets.Day(1), pd.offsets.Day(1)], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + +def test_agg_timezone_round_trip(): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific") + df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]}) + + result1 = df.groupby("a")["b"].agg(np.min).iloc[0] + result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby("a")["b"].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [ + pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5) + ] + df = DataFrame({"A": ["a", "b"] * 2, "B": dates}) + grouped = df.groupby("A") + + ts = df["B"].iloc[0] + assert ts == grouped.nth(0)["B"].iloc[0] + assert ts == grouped.head(1)["B"].iloc[0] + assert ts == grouped.first()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1] + + ts = df["B"].iloc[2] + assert ts == grouped.last()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1] + + +def test_sum_uint64_overflow(): + # see gh-14758 + # Convert to uint64 and don't overflow + df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = df + 9223372036854775807 + + index = Index( + [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 + ) + expected = DataFrame( + {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]}, + index=index, + ) + + expected.index.name = 0 + result = df.groupby(0).sum(numeric_only=False) + tm.assert_frame_equal(result, expected) + + # out column is non-numeric, so with numeric_only=True it is dropped + result2 = df.groupby(0).sum(numeric_only=True) + expected2 = expected[[]] + tm.assert_frame_equal(result2, expected2) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + ( + lambda x: tuple(x), + DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), + ), + ( + lambda x: list(x), + DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), + ), + ], +) +def test_agg_structs_dataframe(structure, expected): + df = DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby(["A", "B"]).aggregate(structure) + expected.index.names = ["A", "B"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + (lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + ], +) +def test_agg_structs_series(structure, expected): + # Issue #18079 + df = DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby("A")["C"].aggregate(structure) + expected.index.name = "A" + tm.assert_series_equal(result, expected) + + +def test_agg_category_nansum(observed): + categories = ["a", "b", "c"] + df = DataFrame( + {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]} + ) + result = df.groupby("A", observed=observed).B.agg(np.nansum) + expected = Series( + [3, 3, 0], + index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"), + name="B", + ) + if observed: + expected = expected[expected != 0] + tm.assert_series_equal(result, expected) + + +def test_agg_list_like_func(): + # GH 18473 + df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]}) + grouped = df.groupby("A", as_index=False, sort=False) + result = grouped.agg({"B": lambda x: list(x)}) + expected = DataFrame( + {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]} + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_lambda_with_timezone(): + # GH 23683 + df = DataFrame( + { + "tag": [1, 1], + "date": [ + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-01-02", tz="UTC"), + ], + } + ) + result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) + expected = DataFrame( + [pd.Timestamp("2018-01-01", tz="UTC")], + index=Index([1], name="tag"), + columns=["date"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "err_cls", + [ + NotImplementedError, + RuntimeError, + KeyError, + IndexError, + OSError, + ValueError, + ArithmeticError, + AttributeError, + ], +) +def test_groupby_agg_err_catching(err_cls): + # make sure we suppress anything other than TypeError or AssertionError + # in _python_agg_general + + # Use a non-standard EA to make sure we don't go down ndarray paths + from pandas.tests.extension.decimal.array import ( + DecimalArray, + make_data, + to_decimal, + ) + + data = make_data()[:5] + df = DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + expected = Series(to_decimal([data[0], data[3]])) + + def weird_func(x): + # weird function that raise something other than TypeError or IndexError + # in _python_agg_general + if len(x) == 0: + raise err_cls + return x.iloc[0] + + result = df["decimals"].groupby(df["id1"]).agg(weird_func) + tm.assert_series_equal(result, expected, check_names=False) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py new file mode 100644 index 00000000..7e7b97d9 --- /dev/null +++ b/pandas/tests/groupby/conftest.py @@ -0,0 +1,204 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.core.groupby.base import ( + reduction_kernels, + transformation_kernels, +) + + +@pytest.fixture(params=[True, False]) +def sort(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def as_index(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def dropna(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def observed(request): + return request.param + + +@pytest.fixture +def mframe(multiindex_dataframe_random_data): + return multiindex_dataframe_random_data + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def ts(): + return tm.makeTimeSeries() + + +@pytest.fixture +def tsd(): + return tm.getTimeSeriesData() + + +@pytest.fixture +def tsframe(tsd): + return DataFrame(tsd) + + +@pytest.fixture +def df_mixed_floats(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.array(np.random.randn(8), dtype="float32"), + } + ) + + +@pytest.fixture +def three_group(): + return DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + +@pytest.fixture() +def slice_test_df(): + data = [ + [0, "a", "a0_at_0"], + [1, "b", "b0_at_1"], + [2, "a", "a1_at_2"], + [3, "b", "b1_at_3"], + [4, "c", "c0_at_4"], + [5, "a", "a2_at_5"], + [6, "a", "a3_at_6"], + [7, "a", "a4_at_7"], + ] + df = DataFrame(data, columns=["Index", "Group", "Value"]) + return df.set_index("Index") + + +@pytest.fixture() +def slice_test_grouped(slice_test_df): + return slice_test_df.groupby("Group", as_index=False) + + +@pytest.fixture(params=sorted(reduction_kernels)) +def reduction_func(request): + """ + yields the string names of all groupby reduction functions, one at a time. + """ + return request.param + + +@pytest.fixture(params=sorted(transformation_kernels)) +def transformation_func(request): + """yields the string names of all groupby transformation functions.""" + return request.param + + +@pytest.fixture(params=sorted(reduction_kernels) + sorted(transformation_kernels)) +def groupby_func(request): + """yields both aggregation and transformation functions.""" + return request.param + + +@pytest.fixture(params=[True, False]) +def parallel(request): + """parallel keyword argument for numba.jit""" + return request.param + + +# Can parameterize nogil & nopython over True | False, but limiting per +# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472 + + +@pytest.fixture(params=[False]) +def nogil(request): + """nogil keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True]) +def nopython(request): + """nopython keyword argument for numba.jit""" + return request.param + + +@pytest.fixture( + params=[ + ("mean", {}), + ("var", {"ddof": 1}), + ("var", {"ddof": 0}), + ("std", {"ddof": 1}), + ("std", {"ddof": 0}), + ("sum", {}), + ("min", {}), + ("max", {}), + ], + ids=["mean", "var_1", "var_0", "std_1", "std_0", "sum", "min", "max"], +) +def numba_supported_reductions(request): + """reductions supported with engine='numba'""" + return request.param diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py new file mode 100644 index 00000000..e541abb3 --- /dev/null +++ b/pandas/tests/groupby/test_allowlist.py @@ -0,0 +1,454 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.base import ( + groupby_other_methods, + reduction_kernels, + transformation_kernels, +) + +AGG_FUNCTIONS = [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "mad", + "std", + "var", + "sem", +] +AGG_FUNCTIONS_WITH_SKIPNA = ["skew", "mad"] + +df_allowlist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtypes", + "corrwith", + "corr", + "cov", + "diff", +] + + +@pytest.fixture(params=df_allowlist) +def df_allowlist_fixture(request): + return request.param + + +s_allowlist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtype", + "corr", + "cov", + "diff", + "unique", + "nlargest", + "nsmallest", + "is_monotonic_increasing", + "is_monotonic_decreasing", +] + + +@pytest.fixture(params=s_allowlist) +def s_allowlist_fixture(request): + return request.param + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + return df + + +@pytest.mark.parametrize("allowlist", [df_allowlist, s_allowlist]) +def test_groupby_allowlist(df_letters, allowlist): + df = df_letters + if allowlist == df_allowlist: + # dataframe + obj = df_letters + else: + obj = df_letters["floats"] + + gb = obj.groupby(df.letters) + + assert set(allowlist) == set(gb._apply_allowlist) + + +def check_allowlist(obj, df, m): + # check the obj for a particular allowlist m + + gb = obj.groupby(df.letters) + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + return + assert n == m + + # qualname + try: + n = f.__qualname__ + except AttributeError: + return + assert n.endswith(m) + + +def test_groupby_series_allowlist(df_letters, s_allowlist_fixture): + m = s_allowlist_fixture + df = df_letters + check_allowlist(df.letters, df, m) + + +def test_groupby_frame_allowlist(df_letters, df_allowlist_fixture): + m = df_allowlist_fixture + df = df_letters + check_allowlist(df, df, m) + + +@pytest.fixture +def raw_frame(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.iloc[1, [1, 2]] = np.nan + df.iloc[7, [0, 1]] = np.nan + return df + + +@pytest.mark.parametrize("op", AGG_FUNCTIONS) +@pytest.mark.parametrize("level", [0, 1]) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the allowlist methods + warn = FutureWarning if op == "mad" else None + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + result = getattr(grouped, op)(skipna=skipna) + with tm.assert_produces_warning(FutureWarning): + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + with tm.assert_produces_warning(FutureWarning): + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blocklist(df_letters): + df = df_letters + s = df_letters.floats + + blocklist = [ + "eval", + "query", + "abs", + "where", + "mask", + "align", + "groupby", + "clip", + "astype", + "at", + "combine", + "consolidate", + "convert_objects", + ] + to_methods = [method for method in dir(df) if method.startswith("to_")] + + blocklist.extend(to_methods) + + for bl in blocklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + + # e.g., to_csv + defined_but_not_allowed = ( + f"(?:^Cannot.+{repr(bl)}.+'{type(gb).__name__}'.+try " + f"using the 'apply' method$)" + ) + + # e.g., query, eval + not_defined = ( + f"(?:^'{type(gb).__name__}' object has no attribute {repr(bl)}$)" + ) + + msg = f"{defined_but_not_allowed}|{not_defined}" + + with pytest.raises(AttributeError, match=msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level="second") + results = {v for v in dir(grp) if not v.startswith("_")} + expected = { + "A", + "B", + "C", + "agg", + "aggregate", + "apply", + "boxplot", + "filter", + "first", + "get_group", + "groups", + "hist", + "indices", + "last", + "max", + "mean", + "median", + "min", + "ngroups", + "nth", + "ohlc", + "plot", + "prod", + "size", + "std", + "sum", + "transform", + "var", + "sem", + "count", + "nunique", + "head", + "describe", + "cummax", + "quantile", + "rank", + "cumprod", + "tail", + "resample", + "cummin", + "fillna", + "cumsum", + "cumcount", + "ngroup", + "all", + "shift", + "skew", + "take", + "tshift", + "pct_change", + "any", + "mad", + "corr", + "corrwith", + "cov", + "dtypes", + "ndim", + "diff", + "idxmax", + "idxmin", + "ffill", + "bfill", + "pad", + "backfill", + "rolling", + "expanding", + "pipe", + "sample", + "ewm", + "value_counts", + } + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level="second") + for name in ["sum", "prod", "min", "max", "first", "last"]: + f = getattr(grp, name) + assert f.__name__ == name + + +@pytest.mark.parametrize( + "method", + [ + "count", + "corr", + "cummax", + "cummin", + "cumprod", + "describe", + "rank", + "quantile", + "diff", + "shift", + "all", + "any", + "idxmin", + "idxmax", + "ffill", + "bfill", + "pct_change", + ], +) +def test_groupby_selection_with_methods(df, method): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + # TODO check groupby with > 1 col ? + + res = getattr(g, method)() + exp = getattr(g_exp, method)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_groupby_selection_tshift_raises(df): + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + + # check that the index cache is cleared + with pytest.raises(ValueError, match="Freq was not set in the index"): + # GH#35937 + g.tshift() + + +def test_groupby_selection_other_methods(df): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.columns.name = "foo" + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) + tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc()) + + tm.assert_frame_equal( + g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) + ) + + +def test_all_methods_categorized(mframe): + grp = mframe.groupby(mframe.iloc[:, 0]) + names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns) + new_names = set(names) + new_names -= reduction_kernels + new_names -= transformation_kernels + new_names -= groupby_other_methods + + assert not (reduction_kernels & transformation_kernels) + assert not (reduction_kernels & groupby_other_methods) + assert not (transformation_kernels & groupby_other_methods) + + # new public method? + if new_names: + msg = f""" +There are uncategorized methods defined on the Grouper class: +{new_names}. + +Was a new method recently added? + +Every public method On Grouper must appear in exactly one the +following three lists defined in pandas.core.groupby.base: +- `reduction_kernels` +- `transformation_kernels` +- `groupby_other_methods` +see the comments in pandas/core/groupby/base.py for guidance on +how to fix this test. + """ + raise AssertionError(msg) + + # removed a public method? + all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods + print(names) + print(all_categorized) + if not (names == all_categorized): + msg = f""" +Some methods which are supposed to be on the Grouper class +are missing: +{all_categorized - names}. + +They're still defined in one of the lists that live in pandas/core/groupby/base.py. +If you removed a method, you should update them +""" + raise AssertionError(msg) diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py new file mode 100644 index 00000000..3f61a4ec --- /dev/null +++ b/pandas/tests/groupby/test_any_all.py @@ -0,0 +1,190 @@ +import builtins + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + isna, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["foo", "bar", "baz"], + ["foo", "", ""], + ["", "", ""], + [1, 2, 3], + [1, 0, 0], + [0, 0, 0], + [1.0, 2.0, 3.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [True, True, True], + [True, False, False], + [False, False, False], + [np.nan, np.nan, np.nan], + ], +) +def test_groupby_bool_aggs(agg_func, skipna, vals): + df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == "any": + exp = False + + exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) + result = getattr(df.groupby("key"), agg_func)(skipna=skipna) + tm.assert_frame_equal(result, exp_df) + + +def test_any(): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + expected = DataFrame( + [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] + ) + expected.index.name = "A" + result = df.groupby("A").any() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_bool_aggs_dup_column_labels(bool_agg_func): + # 21668 + df = DataFrame([[True, True]], columns=["a", "a"]) + grp_by = df.groupby([0]) + result = getattr(grp_by, bool_agg_func)() + + expected = df + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "data", + [ + [False, False, False], + [True, True, True], + [pd.NA, pd.NA, pd.NA], + [False, pd.NA, False], + [True, pd.NA, True], + [True, pd.NA, False], + ], +) +def test_masked_kleene_logic(bool_agg_func, skipna, data): + # GH#37506 + ser = Series(data, dtype="boolean") + + # The result should match aggregating on the whole series. Correctness + # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic + expected_data = getattr(ser, bool_agg_func)(skipna=skipna) + expected = Series(expected_data, dtype="boolean") + + result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype1,dtype2,exp_col1,exp_col2", + [ + ( + "float", + "Float64", + np.array([True], dtype=bool), + pd.array([pd.NA], dtype="boolean"), + ), + ( + "Int64", + "float", + pd.array([pd.NA], dtype="boolean"), + np.array([True], dtype=bool), + ), + ( + "Int64", + "Int64", + pd.array([pd.NA], dtype="boolean"), + pd.array([pd.NA], dtype="boolean"), + ), + ( + "Float64", + "boolean", + pd.array([pd.NA], dtype="boolean"), + pd.array([pd.NA], dtype="boolean"), + ), + ], +) +def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2): + # GH#37506 + data = [1.0, np.nan] + df = DataFrame( + {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)} + ) + result = df.groupby([1, 1]).agg("all", skipna=False) + + expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=[1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series): + # GH#40585 + obj = frame_or_series([pd.NA, 1], dtype=dtype) + expected_res = True + if not skipna and bool_agg_func == "all": + expected_res = pd.NA + expected = frame_or_series([expected_res], index=[1], dtype="boolean") + + result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "bool_agg_func,data,expected_res", + [ + ("any", [pd.NA, np.nan], False), + ("any", [pd.NA, 1, np.nan], True), + ("all", [pd.NA, pd.NaT], True), + ("all", [pd.NA, False, pd.NaT], False), + ], +) +def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_series): + # GH#37501 + obj = frame_or_series(data, dtype=object) + result = obj.groupby([1] * len(data)).agg(bool_agg_func) + expected = frame_or_series([expected_res], index=[1], dtype="bool") + tm.assert_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_object_NA_raises_with_skipna_false(bool_agg_func): + # GH#37501 + ser = Series([pd.NA], dtype=object) + with pytest.raises(TypeError, match="boolean value of NA is ambiguous"): + ser.groupby([1]).agg(bool_agg_func, skipna=False) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_empty(frame_or_series, bool_agg_func): + # GH 45231 + kwargs = {"columns": ["a"]} if frame_or_series is DataFrame else {"name": "a"} + obj = frame_or_series(**kwargs, dtype=object) + result = getattr(obj.groupby(obj.index), bool_agg_func)() + expected = frame_or_series(**kwargs, dtype=bool) + tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py new file mode 100644 index 00000000..b6c16c0d --- /dev/null +++ b/pandas/tests/groupby/test_apply.py @@ -0,0 +1,1372 @@ +from datetime import ( + date, + datetime, +) +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + bdate_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index +from pandas.tests.groupby import get_groupby_method_args + + +def test_apply_issues(): + # GH 5788 + + s = """2011.05.16,00:00,1.40893 +2011.05.16,01:00,1.40760 +2011.05.16,02:00,1.40750 +2011.05.16,03:00,1.40649 +2011.05.17,02:00,1.40893 +2011.05.17,03:00,1.40760 +2011.05.17,04:00,1.40750 +2011.05.17,05:00,1.40649 +2011.05.18,02:00,1.40893 +2011.05.18,03:00,1.40760 +2011.05.18,04:00,1.40750 +2011.05.18,05:00,1.40649""" + + df = pd.read_csv( + StringIO(s), + header=None, + names=["date", "time", "value"], + parse_dates=[["date", "time"]], + ) + df = df.set_index("date_time") + + expected = df.groupby(df.index.date).idxmax() + result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) + tm.assert_frame_equal(result, expected) + + # GH 5789 + # don't auto coerce dates + df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"]) + exp_idx = Index( + ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date" + ) + expected = Series(["00:00", "02:00", "02:00"], index=exp_idx) + result = df.groupby("date", group_keys=False).apply( + lambda x: x["time"][x["value"].idxmax()] + ) + tm.assert_series_equal(result, expected) + + +def test_apply_trivial(): + # GH 20066 + # trivial apply: ignore input and return a constant dataframe. + df = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply( + lambda x: df.iloc[1:] + ) + + tm.assert_frame_equal(result, expected) + + +def test_apply_trivial_fail(): + # GH 20066 + df = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df, df], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True).apply( + lambda x: df + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df, group_names", + [ + (DataFrame({"a": [1, 1, 1, 2, 3], "b": ["a", "a", "a", "b", "c"]}), [1, 2, 3]), + (DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]}), [0, 1]), + (DataFrame({"a": [1]}), [1]), + (DataFrame({"a": [1, 1, 1, 2, 2, 1, 1, 2], "b": range(8)}), [1, 2]), + (DataFrame({"a": [1, 2, 3, 1, 2, 3], "two": [4, 5, 6, 7, 8, 9]}), [1, 2, 3]), + ( + DataFrame( + { + "a": list("aaabbbcccc"), + "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], + "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], + } + ), + ["a", "b", "c"], + ), + (DataFrame([[1, 2, 3], [2, 2, 3]], columns=["a", "b", "c"]), [1, 2]), + ], + ids=[ + "GH2936", + "GH7739 & GH10519", + "GH10519", + "GH2656", + "GH12155", + "GH20084", + "GH21417", + ], +) +def test_group_apply_once_per_group(df, group_names): + # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417 + + # This test should ensure that a function is only evaluated + # once per group. Previously the function has been evaluated twice + # on the first group to check if the Cython index slider is safe to use + # This test ensures that the side effect (append to list) is only triggered + # once per group + + names = [] + # cannot parameterize over the functions since they need external + # `names` to detect side effects + + def f_copy(group): + # this takes the fast apply path + names.append(group.name) + return group.copy() + + def f_nocopy(group): + # this takes the slow apply path + names.append(group.name) + return group + + def f_scalar(group): + # GH7739, GH2656 + names.append(group.name) + return 0 + + def f_none(group): + # GH10519, GH12155, GH21417 + names.append(group.name) + return None + + def f_constant_df(group): + # GH2936, GH20084 + names.append(group.name) + return DataFrame({"a": [1], "b": [1]}) + + for func in [f_copy, f_nocopy, f_scalar, f_none, f_constant_df]: + del names[:] + + df.groupby("a", group_keys=False).apply(func) + assert names == group_names + + +def test_group_apply_once_per_group2(capsys): + # GH: 31111 + # groupby-apply need to execute len(set(group_by_columns)) times + + expected = 2 # Number of times `apply` should call a function for the current test + + df = DataFrame( + { + "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], + }, + index=["0", "2", "4", "6", "8", "10", "12", "14"], + ) + + df.groupby("group_by_column", group_keys=False).apply( + lambda df: print("function_called") + ) + + result = capsys.readouterr().out.count("function_called") + # If `groupby` behaves unexpectedly, this test will break + assert result == expected + + +def test_apply_fast_slow_identical(): + # GH 31613 + + df = DataFrame({"A": [0, 0, 1], "b": range(3)}) + + # For simple index structures we check for fast/slow apply using + # an identity check on in/output + def slow(group): + return group + + def fast(group): + return group.copy() + + fast_df = df.groupby("A", group_keys=False).apply(fast) + slow_df = df.groupby("A", group_keys=False).apply(slow) + + tm.assert_frame_equal(fast_df, slow_df) + + +@pytest.mark.parametrize( + "func", + [ + lambda x: x, + lambda x: x[:], + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True), + ], +) +def test_groupby_apply_identity_maybecopy_index_identical(func): + # GH 14927 + # Whether the function returns a copy of the input data or not should not + # have an impact on the index structure of the result since this is not + # transparent to the user + + df = DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) + + result = df.groupby("g", group_keys=False).apply(func) + tm.assert_frame_equal(result, df) + + +def test_apply_with_mixed_dtype(): + # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 + df = DataFrame( + { + "foo1": np.random.randn(6), + "foo2": ["one", "two", "two", "three", "one", "two"], + } + ) + result = df.apply(lambda x: x, axis=1).dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) + + # GH 3610 incorrect dtype conversion with as_index=False + df = DataFrame({"c1": [1, 2, 6, 6, 8]}) + df["c2"] = df.c1 / 2.0 + result1 = df.groupby("c2").mean().reset_index().c2 + result2 = df.groupby("c2", as_index=False).mean().c2 + tm.assert_series_equal(result1, result2) + + +def test_groupby_as_index_apply(): + # GH #4648 and #3417 + df = DataFrame( + { + "item_id": ["b", "b", "a", "c", "a", "b"], + "user_id": [1, 2, 1, 1, 3, 1], + "time": range(6), + } + ) + + g_as = df.groupby("user_id", as_index=True) + g_not_as = df.groupby("user_id", as_index=False) + + res_as = g_as.head(2).index + res_not_as = g_not_as.head(2).index + exp = Index([0, 1, 2, 4]) + tm.assert_index_equal(res_as, exp) + tm.assert_index_equal(res_not_as, exp) + + res_as_apply = g_as.apply(lambda x: x.head(2)).index + res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index + + # apply doesn't maintain the original ordering + # changed in GH5610 as the as_index=False returns a MI here + exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) + tp = [(1, 0), (1, 2), (2, 1), (3, 4)] + exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None]) + + tm.assert_index_equal(res_as_apply, exp_as_apply) + tm.assert_index_equal(res_not_as_apply, exp_not_as_apply) + + ind = Index(list("abcde")) + df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind) + res = df.groupby(0, as_index=False, group_keys=False).apply(lambda x: x).index + tm.assert_index_equal(res, ind) + + +def test_apply_concat_preserve_names(three_group): + grouped = three_group.groupby(["A", "B"]) + + def desc(group): + result = group.describe() + result.index.name = "stat" + return result + + def desc2(group): + result = group.describe() + result.index.name = "stat" + result = result[: len(group)] + # weirdo + return result + + def desc3(group): + result = group.describe() + + # names are different + result.index.name = f"stat_{len(group):d}" + + result = result[: len(group)] + # weirdo + return result + + result = grouped.apply(desc) + assert result.index.names == ("A", "B", "stat") + + result2 = grouped.apply(desc2) + assert result2.index.names == ("A", "B", "stat") + + result3 = grouped.apply(desc3) + assert result3.index.names == ("A", "B", None) + + +def test_apply_series_to_frame(): + def f(piece): + with np.errstate(invalid="ignore"): + logged = np.log(piece) + return DataFrame( + {"value": piece, "demeaned": piece - piece.mean(), "logged": logged} + ) + + dr = bdate_range("1/1/2000", periods=100) + ts = Series(np.random.randn(100), index=dr) + + grouped = ts.groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(f) + + assert isinstance(result, DataFrame) + assert not hasattr(result, "name") # GH49907 + tm.assert_index_equal(result.index, ts.index) + + +def test_apply_series_yield_constant(df): + result = df.groupby(["A", "B"])["C"].apply(len) + assert result.index.names[:2] == ("A", "B") + + +def test_apply_frame_yield_constant(df): + # GH13568 + result = df.groupby(["A", "B"]).apply(len) + assert isinstance(result, Series) + assert result.name is None + + result = df.groupby(["A", "B"])[["C", "D"]].apply(len) + assert isinstance(result, Series) + assert result.name is None + + +def test_apply_frame_to_series(df): + grouped = df.groupby(["A", "B"]) + result = grouped.apply(len) + expected = grouped.count()["C"] + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + +def test_apply_frame_not_as_index_column_name(df): + # GH 35964 - path within _wrap_applied_output not hit by a test + grouped = df.groupby(["A", "B"], as_index=False) + result = grouped.apply(len) + expected = grouped.count().rename(columns={"C": np.nan}).drop(columns="D") + # TODO(GH#34306): Use assert_frame_equal when column name is not np.nan + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + +def test_apply_frame_concat_series(): + def trans(group): + return group.groupby("B")["C"].sum().sort_values().iloc[:2] + + def trans2(group): + grouped = group.groupby(df.reindex(group.index)["B"]) + return grouped.sum().sort_values().iloc[:2] + + df = DataFrame( + { + "A": np.random.randint(0, 5, 1000), + "B": np.random.randint(0, 5, 1000), + "C": np.random.randn(1000), + } + ) + + result = df.groupby("A").apply(trans) + exp = df.groupby("A")["C"].apply(trans2) + tm.assert_series_equal(result, exp, check_names=False) + assert result.name == "C" + + +def test_apply_transform(ts): + grouped = ts.groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x * 2) + expected = grouped.transform(lambda x: x * 2) + tm.assert_series_equal(result, expected) + + +def test_apply_multikey_corner(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + + def f(group): + return group.sort_values("A")[-5:] + + result = grouped.apply(f) + for key, group in grouped: + tm.assert_frame_equal(result.loc[key], f(group)) + + +@pytest.mark.parametrize("group_keys", [True, False]) +def test_apply_chunk_view(group_keys): + # Low level tinkering could be unsafe, make sure not + df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result = df.groupby("key", group_keys=group_keys).apply(lambda x: x.iloc[:2]) + expected = df.take([0, 1, 3, 4, 6, 7]) + if group_keys: + expected.index = MultiIndex.from_arrays( + [[1, 1, 2, 2, 3, 3], expected.index], names=["key", None] + ) + + tm.assert_frame_equal(result, expected) + + +def test_apply_no_name_column_conflict(): + df = DataFrame( + { + "name": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], + "name2": [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], + "value": range(9, -1, -1), + } + ) + + # it works! #2605 + grouped = df.groupby(["name", "name2"]) + grouped.apply(lambda x: x.sort_values("value", inplace=True)) + + +def test_apply_typecast_fail(): + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + } + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d", group_keys=False).apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_multiindex_fail(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d", group_keys=False).apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_corner(tsframe): + result = tsframe.groupby(lambda x: x.year, group_keys=False).apply(lambda x: x * 2) + expected = tsframe * 2 + tm.assert_frame_equal(result, expected) + + +def test_apply_without_copy(): + # GH 5545 + # returning a non-copy in an applied function fails + + data = DataFrame( + { + "id_field": [100, 100, 200, 300], + "category": ["a", "b", "c", "c"], + "value": [1, 2, 3, 4], + } + ) + + def filt1(x): + if x.shape[0] == 1: + return x.copy() + else: + return x[x.category == "c"] + + def filt2(x): + if x.shape[0] == 1: + return x + else: + return x[x.category == "c"] + + expected = data.groupby("id_field").apply(filt1) + result = data.groupby("id_field").apply(filt2) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +def test_apply_with_duplicated_non_sorted_axis(test_series): + # GH 30667 + df = DataFrame( + [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2] + ) + if test_series: + ser = df.set_index("Y")["X"] + result = ser.groupby(level=0, group_keys=False).apply(lambda x: x) + + # not expecting the order to remain the same for duplicated axis + result = result.sort_index() + expected = ser.sort_index() + tm.assert_series_equal(result, expected) + else: + result = df.groupby("Y", group_keys=False).apply(lambda x: x) + + # not expecting the order to remain the same for duplicated axis + result = result.sort_values("Y") + expected = df.sort_values("Y") + tm.assert_frame_equal(result, expected) + + +def test_apply_reindex_values(): + # GH: 26209 + # reindexing from a single column of a groupby object with duplicate indices caused + # a ValueError (cannot reindex from duplicate axis) in 0.24.2, the problem was + # solved in #30679 + values = [1, 2, 3, 4] + indices = [1, 1, 2, 2] + df = DataFrame({"group": ["Group1", "Group2"] * 2, "value": values}, index=indices) + expected = Series(values, index=indices, name="value") + + def reindex_helper(x): + return x.reindex(np.arange(x.index.min(), x.index.max() + 1)) + + # the following group by raised a ValueError + result = df.groupby("group", group_keys=False).value.apply(reindex_helper) + tm.assert_series_equal(expected, result) + + +def test_apply_corner_cases(): + # #535, can't use sliding iterator + + N = 1000 + labels = np.random.randint(0, 100, size=N) + df = DataFrame( + { + "key": labels, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + + grouped = df.groupby("key", group_keys=False) + + def f(g): + g["value3"] = g["value1"] * 2 + return g + + result = grouped.apply(f) + assert "value3" in result + + +def test_apply_numeric_coercion_when_datetime(): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = DataFrame( + {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} + ) + expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result["Str"], expected["Str"]) + + # GH 15421 + df = DataFrame( + {"A": [10, 20, 30], "B": ["foo", "3", "4"], "T": [pd.Timestamp("12:31:22")] * 3} + ) + + def get_B(g): + return g.iloc[0][["B"]] + + result = df.groupby("A").apply(get_B)["B"] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = Series(index=["p1", "p2", "useTime"], dtype=object) + if "step1" in list(tool.State): + out["p1"] = str(tool[tool.State == "step1"].Machine.values[0]) + if "step2" in list(tool.State): + out["p2"] = str(tool[tool.State == "step2"].Machine.values[0]) + out["useTime"] = str(tool[tool.State == "step2"].oTime.values[0]) + return out + + df1 = DataFrame( + { + "Key": ["B", "B", "A", "A"], + "State": ["step1", "step2", "step1", "step2"], + "oTime": ["", "2016-09-19 05:24:33", "", "2016-09-19 23:59:04"], + "Machine": ["23", "36L", "36R", "36R"], + } + ) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby("Key").apply(predictions).p1 + result = df2.groupby("Key").apply(predictions).p1 + tm.assert_series_equal(expected, result) + + +def test_apply_aggregating_timedelta_and_datetime(): + # Regression test for GH 15562 + # The following groupby caused ValueErrors and IndexErrors pre 0.20.0 + + df = DataFrame( + { + "clientid": ["A", "B", "C"], + "datetime": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ) + df["time_delta_zero"] = df.datetime - df.datetime + result = df.groupby("clientid").apply( + lambda ddf: Series( + {"clientid_age": ddf.time_delta_zero.min(), "date": ddf.datetime.min()} + ) + ) + expected = DataFrame( + { + "clientid": ["A", "B", "C"], + "clientid_age": [np.timedelta64(0, "D")] * 3, + "date": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ).set_index("clientid") + + tm.assert_frame_equal(result, expected) + + +def test_apply_groupby_datetimeindex(): + # GH 26182 + # groupby apply failed on dataframe with DatetimeIndex + + data = [["A", 10], ["B", 20], ["B", 30], ["C", 40], ["C", 50]] + df = DataFrame( + data, columns=["Name", "Value"], index=pd.date_range("2020-09-01", "2020-09-05") + ) + + result = df.groupby("Name").sum() + + expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]}) + expected.set_index("Name", inplace=True) + + tm.assert_frame_equal(result, expected) + + +def test_time_field_bug(): + # Test a fix for the following error related to GH issue 11324 When + # non-key fields in a group-by dataframe contained time-based fields + # that were not returned by the apply function, an exception would be + # raised. + + df = DataFrame({"a": 1, "b": [datetime.now() for nn in range(10)]}) + + def func_with_no_date(batch): + return Series({"c": 2}) + + def func_with_date(batch): + return Series({"b": datetime(2015, 1, 1), "c": 2}) + + dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date) + dfg_no_conversion_expected = DataFrame({"c": 2}, index=[1]) + dfg_no_conversion_expected.index.name = "a" + + dfg_conversion = df.groupby(by=["a"]).apply(func_with_date) + dfg_conversion_expected = DataFrame({"b": datetime(2015, 1, 1), "c": 2}, index=[1]) + dfg_conversion_expected.index.name = "a" + + tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) + tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected) + + +def test_gb_apply_list_of_unequal_len_arrays(): + + # GH1738 + df = DataFrame( + { + "group1": ["a", "a", "a", "b", "b", "b", "a", "a", "a", "b", "b", "b"], + "group2": ["c", "c", "d", "d", "d", "e", "c", "c", "d", "d", "d", "e"], + "weight": [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2], + "value": [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + def noddy(value, weight): + out = np.array(value * weight).repeat(3) + return out + + # the kernel function returns arrays of unequal length + # pandas sniffs the first one, sees it's an array and not + # a list, and assumed the rest are of equal length + # and so tries a vstack + + # don't die + df_grouped.apply(lambda x: noddy(x.value, x.weight)) + + +def test_groupby_apply_all_none(): + # Tests to make sure no errors if apply function returns all None + # values. Issue 9684. + test_df = DataFrame({"groups": [0, 0, 1, 1], "random_vars": [8, 7, 4, 5]}) + + def test_func(x): + pass + + result = test_df.groupby("groups").apply(test_func) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_none_first(): + # GH 12824. Tests if apply returns None first. + test_df1 = DataFrame({"groups": [1, 1, 1, 2], "vars": [0, 1, 2, 3]}) + test_df2 = DataFrame({"groups": [1, 2, 2, 2], "vars": [0, 1, 2, 3]}) + + def test_func(x): + if x.shape[0] < 2: + return None + return x.iloc[[0, -1]] + + result1 = test_df1.groupby("groups").apply(test_func) + result2 = test_df2.groupby("groups").apply(test_func) + index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=["groups", None]) + index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=["groups", None]) + expected1 = DataFrame({"groups": [1, 1], "vars": [0, 2]}, index=index1) + expected2 = DataFrame({"groups": [2, 2], "vars": [1, 3]}, index=index2) + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + + +def test_groupby_apply_return_empty_chunk(): + # GH 22221: apply filter which returns some empty groups + df = DataFrame({"value": [0, 1], "group": ["filled", "empty"]}) + groups = df.groupby("group") + result = groups.apply(lambda group: group[group.value != 1]["value"]) + expected = Series( + [0], + name="value", + index=MultiIndex.from_product( + [["empty", "filled"], [0]], names=["group", None] + ).drop("empty"), + ) + tm.assert_series_equal(result, expected) + + +def test_apply_with_mixed_types(): + # gh-20949 + df = DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) + g = df.groupby("A", group_keys=False) + + result = g.transform(lambda x: x / x.sum()) + expected = DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]}) + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x / x.sum()) + tm.assert_frame_equal(result, expected) + + +def test_func_returns_object(): + # GH 28652 + df = DataFrame({"a": [1, 2]}, index=Int64Index([1, 2])) + result = df.groupby("a").apply(lambda g: g.index) + expected = Series( + [Int64Index([1]), Int64Index([2])], index=Int64Index([1, 2], name="a") + ) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "group_column_dtlike", + [datetime.today(), datetime.today().date(), datetime.today().time()], +) +def test_apply_datetime_issue(group_column_dtlike): + # GH-28247 + # groupby-apply throws an error if one of the columns in the DataFrame + # is a datetime object and the column labels are different from + # standard int values in range(len(num_columns)) + + df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42])) + + expected = DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_series_return_dataframe_groups(): + # GH 10078 + tdf = DataFrame( + { + "day": { + 0: pd.Timestamp("2015-02-24 00:00:00"), + 1: pd.Timestamp("2015-02-24 00:00:00"), + 2: pd.Timestamp("2015-02-24 00:00:00"), + 3: pd.Timestamp("2015-02-24 00:00:00"), + 4: pd.Timestamp("2015-02-24 00:00:00"), + }, + "userAgent": { + 0: "some UA string", + 1: "some UA string", + 2: "some UA string", + 3: "another UA string", + 4: "some UA string", + }, + "userId": { + 0: "17661101", + 1: "17661101", + 2: "17661101", + 3: "17661101", + 4: "17661101", + }, + } + ) + + def most_common_values(df): + return Series({c: s.value_counts().index[0] for c, s in df.items()}) + + result = tdf.groupby("day").apply(most_common_values)["userId"] + expected = Series( + ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("category", [False, True]) +def test_apply_multi_level_name(category): + # https://github.com/pandas-dev/pandas/issues/31068 + b = [1, 2] * 5 + if category: + b = pd.Categorical(b, categories=[1, 2, 3]) + expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B") + else: + expected_index = Index([1, 2], name="B") + df = DataFrame( + {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} + ).set_index(["A", "B"]) + result = df.groupby("B").apply(lambda x: x.sum()) + expected = DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index) + tm.assert_frame_equal(result, expected) + assert df.index.names == ["A", "B"] + + +def test_groupby_apply_datetime_result_dtypes(): + # GH 14849 + data = DataFrame.from_records( + [ + (pd.Timestamp(2016, 1, 1), "red", "dark", 1, "8"), + (pd.Timestamp(2015, 1, 1), "green", "stormy", 2, "9"), + (pd.Timestamp(2014, 1, 1), "blue", "bright", 3, "10"), + (pd.Timestamp(2013, 1, 1), "blue", "calm", 4, "potato"), + ], + columns=["observation", "color", "mood", "intensity", "score"], + ) + result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes + expected = Series( + [np.dtype("datetime64[ns]"), object, object, np.int64, object], + index=["observation", "color", "mood", "intensity", "score"], + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_apply_index_has_complex_internals(index): + # GH 31248 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group", group_keys=False).apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize( + "function, expected_values", + [ + (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), + (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), + ( + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], + ), + ( + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + ), + ], +) +def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): + # GH 31441 + df = DataFrame(["A", "A", "B", "B"], columns=["groups"]) + result = df.groupby("groups").apply(function) + expected = Series(expected_values, index=Index(["A", "B"], name="groups")) + tm.assert_series_equal(result, expected) + + +def test_apply_function_returns_numpy_array(): + # GH 31605 + def fct(group): + return group["B"].values.flatten() + + df = DataFrame({"A": ["a", "a", "b", "none"], "B": [1, 2, 3, np.nan]}) + + result = df.groupby("A").apply(fct) + expected = Series( + [[1.0, 2.0], [3.0], [np.nan]], index=Index(["a", "b", "none"], name="A") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("function", [lambda gr: gr.index, lambda gr: gr.index + 1 - 1]) +def test_apply_function_index_return(function): + # GH: 22541 + df = DataFrame([1, 2, 2, 2, 1, 2, 3, 1, 3, 1], columns=["id"]) + result = df.groupby("id").apply(function) + expected = Series( + [Index([0, 4, 7, 9]), Index([1, 2, 3, 5]), Index([6, 8])], + index=Index([1, 2, 3], name="id"), + ) + tm.assert_series_equal(result, expected) + + +def test_apply_function_with_indexing_return_column(): + # GH: 7002 + df = DataFrame( + { + "foo1": ["one", "two", "two", "three", "one", "two"], + "foo2": [1, 2, 4, 4, 5, 6], + } + ) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) + expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "udf", + [(lambda x: x.copy()), (lambda x: x.copy().rename(lambda y: y + 1))], +) +@pytest.mark.parametrize("group_keys", [True, False]) +def test_apply_result_type(group_keys, udf): + # https://github.com/pandas-dev/pandas/issues/34809 + # We'd like to control whether the group keys end up in the index + # regardless of whether the UDF happens to be a transform. + df = DataFrame({"A": ["a", "b"], "B": [1, 2]}) + df_result = df.groupby("A", group_keys=group_keys).apply(udf) + series_result = df.B.groupby(df.A, group_keys=group_keys).apply(udf) + + if group_keys: + assert df_result.index.nlevels == 2 + assert series_result.index.nlevels == 2 + else: + assert df_result.index.nlevels == 1 + assert series_result.index.nlevels == 1 + + +def test_result_order_group_keys_false(): + # GH 34998 + # apply result order should not depend on whether index is the same or just equal + df = DataFrame({"A": [2, 1, 2], "B": [1, 2, 3]}) + result = df.groupby("A", group_keys=False).apply(lambda x: x) + expected = df.groupby("A", group_keys=False).apply(lambda x: x.copy()) + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_group_keys_warns(): + df = DataFrame({"A": [0, 1, 1], "B": [1, 2, 3]}) + msg = "Not prepending group keys to the result index" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").apply(lambda x: x) + + tm.assert_frame_equal(result, df) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A")["B"].apply(lambda x: x) + + tm.assert_series_equal(result, df["B"]) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df["B"].groupby(df["A"]).apply(lambda x: x) + + tm.assert_series_equal(result, df["B"]) + + +def test_apply_with_timezones_aware(): + # GH: 27212 + dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2 + index_no_tz = pd.DatetimeIndex(dates) + index_tz = pd.DatetimeIndex(dates, tz="UTC") + df1 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_no_tz}) + df2 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_tz}) + + result1 = df1.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) + result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy()) + + tm.assert_frame_equal(result1, result2) + + +def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): + # GH #34656 + # GH #34271 + warn = FutureWarning if reduction_func == "mad" else None + + df = DataFrame( + { + "a": [99, 99, 99, 88, 88, 88], + "b": [1, 2, 3, 4, 5, 6], + "c": [10, 20, 30, 40, 50, 60], + } + ) + + expected = DataFrame( + {"a": [264, 297], "b": [15, 6], "c": [150, 60]}, + index=Index([88, 99], name="a"), + ) + + # Check output when no other methods are called before .apply() + grp = df.groupby(by="a") + result = grp.apply(sum) + tm.assert_frame_equal(result, expected) + + # Check output when another method is called before .apply() + grp = df.groupby(by="a") + args = get_groupby_method_args(reduction_func, df) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + _ = getattr(grp, reduction_func)(*args) + result = grp.apply(sum) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): + # GH 29617 + + df = DataFrame( + { + "A": ["a", "a", "a", "b"], + "B": [ + date(2020, 1, 10), + date(2020, 1, 10), + date(2020, 2, 10), + date(2020, 2, 10), + ], + "C": [1, 2, 3, 4], + }, + index=Index([100, 101, 102, 103], name="idx"), + ) + + grp = df.groupby(["A", "B"]) + result = grp.apply(lambda x: x.head(1)) + + expected = df.iloc[[0, 2, 3]] + expected = expected.reset_index() + expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]]) + expected = expected.drop(columns="idx") + + tm.assert_frame_equal(result, expected) + for val in result.index.levels[1]: + assert type(val) is date + + +def test_apply_by_cols_equals_apply_by_rows_transposed(): + # GH 16646 + # Operating on the columns, or transposing and operating on the rows + # should give the same result. There was previously a bug where the + # by_rows operation would work fine, but by_cols would throw a ValueError + + df = DataFrame( + np.random.random([6, 4]), + columns=MultiIndex.from_product([["A", "B"], [1, 2]]), + ) + + by_rows = df.T.groupby(axis=0, level=0).apply( + lambda x: x.droplevel(axis=0, level=0) + ) + by_cols = df.groupby(axis=1, level=0).apply(lambda x: x.droplevel(axis=1, level=0)) + + tm.assert_frame_equal(by_cols, by_rows.T) + tm.assert_frame_equal(by_cols, df) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_dropna_with_indexed_same(dropna): + # GH 38227 + # GH#43205 + df = DataFrame( + { + "col": [1, 2, 3, 4, 5], + "group": ["a", np.nan, np.nan, "b", "b"], + }, + index=list("xxyxz"), + ) + result = df.groupby("group", dropna=dropna, group_keys=False).apply(lambda x: x) + expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + [ + False, + DataFrame( + [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object) + ), + ], + [ + True, + Series( + [1, 1], index=MultiIndex.from_tuples([(1, 1), (2, 2)], names=["a", "b"]) + ), + ], + ], +) +def test_apply_as_index_constant_lambda(as_index, expected): + # GH 13217 + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]}) + result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1) + tm.assert_equal(result, expected) + + +def test_sort_index_groups(): + # GH 20420 + df = DataFrame( + {"A": [1, 2, 3, 4, 5], "B": [6, 7, 8, 9, 0], "C": [1, 1, 1, 2, 2]}, + index=range(5), + ) + result = df.groupby("C").apply(lambda x: x.A.sort_index()) + expected = Series( + range(1, 6), + index=MultiIndex.from_tuples( + [(1, 0), (1, 1), (1, 2), (2, 3), (2, 4)], names=["C", None] + ), + name="A", + ) + tm.assert_series_equal(result, expected) + + +def test_positional_slice_groups_datetimelike(): + # GH 21651 + expected = DataFrame( + { + "date": pd.date_range("2010-01-01", freq="12H", periods=5), + "vals": range(5), + "let": list("abcde"), + } + ) + result = expected.groupby( + [expected.let, expected.date.dt.date], group_keys=False + ).apply(lambda x: x.iloc[0:]) + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_shape_cache_safety(): + # GH#42702 this fails if we cache_readonly Block.shape + df = DataFrame({"A": ["a", "a", "b"], "B": [1, 2, 3], "C": [4, 6, 5]}) + gb = df.groupby("A") + result = gb[["B", "C"]].apply(lambda x: x.astype(float).max() - x.min()) + + expected = DataFrame( + {"B": [1.0, 0.0], "C": [2.0, 0.0]}, index=Index(["a", "b"], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_na(dropna): + # GH#28984 + df = DataFrame( + {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} + ) + dfgrp = df.groupby("grp", dropna=dropna) + result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z")) + expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1)) + tm.assert_frame_equal(result, expected) + + +def test_apply_empty_string_nan_coerce_bug(): + # GH#24903 + result = ( + DataFrame( + { + "a": [1, 1, 2, 2], + "b": ["", "", "", ""], + "c": pd.to_datetime([1, 2, 3, 4], unit="s"), + } + ) + .groupby(["a", "b"]) + .apply(lambda df: df.iloc[-1]) + ) + expected = DataFrame( + [[1, "", pd.to_datetime(2, unit="s")], [2, "", pd.to_datetime(4, unit="s")]], + columns=["a", "b", "c"], + index=MultiIndex.from_tuples([(1, ""), (2, "")], names=["a", "b"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_values", [[1, 2, 3], [1.0, 2.0, 3.0]]) +def test_apply_index_key_error_bug(index_values): + # GH 44310 + result = DataFrame( + { + "a": ["aa", "a2", "a3"], + "b": [1, 2, 3], + }, + index=Index(index_values), + ) + expected = DataFrame( + { + "b_mean": [2.0, 3.0, 1.0], + }, + index=Index(["a2", "a3", "aa"], name="a"), + ) + result = result.groupby("a").apply( + lambda df: Series([df["b"].mean()], index=["b_mean"]) + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg,idx", + [ + [ + [ + 1, + 2, + 3, + ], + [ + 0.1, + 0.3, + 0.2, + ], + ], + [ + [ + 1, + 2, + 3, + ], + [ + 0.1, + 0.2, + 0.3, + ], + ], + [ + [ + 1, + 4, + 3, + ], + [ + 0.1, + 0.4, + 0.2, + ], + ], + ], +) +def test_apply_nonmonotonic_float_index(arg, idx): + # GH 34455 + expected = DataFrame({"col": arg}, index=idx) + result = expected.groupby("col", group_keys=False).apply(lambda x: x) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("args, kwargs", [([True], {}), ([], {"numeric_only": True})]) +def test_apply_str_with_args(df, args, kwargs): + # GH#46479 + gb = df.groupby("A") + result = gb.apply("sum", *args, **kwargs) + expected = gb.sum(numeric_only=True) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("name", ["some_name", None]) +def test_result_name_when_one_group(name): + # GH 46369 + ser = Series([1, 2], name=name) + result = ser.groupby(["a", "a"], group_keys=False).apply(lambda x: x) + expected = Series([1, 2], name=name) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, op", + [ + ("apply", lambda gb: gb.values[-1]), + ("apply", lambda gb: gb["b"].iloc[0]), + ("agg", "mad"), + ("agg", "skew"), + ("agg", "prod"), + ("agg", "sum"), + ], +) +def test_empty_df(method, op): + # GH 47985 + empty_df = DataFrame({"a": [], "b": []}) + gb = empty_df.groupby("a", group_keys=True) + group = getattr(gb, "b") + + result = getattr(group, method)(op) + expected = Series( + [], name="b", dtype="float64", index=Index([], dtype="float64", name="a") + ) + + tm.assert_series_equal(result, expected) + + +def test_numeric_only_warning_numpy(): + # GH#50538 + df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]}) + gb = df.groupby("a") + msg = "The operation float + df = DataFrame({"A": [None] * 3, "B": Categorical(["train", "train", "test"])}) + result = df.groupby("A").first()["B"] + expected = Series( + Categorical([], categories=["test", "train"]), + index=Series([], dtype="object", name="A"), + name="B", + ) + tm.assert_series_equal(result, expected) + + +def test_sort(): + + # https://stackoverflow.com/questions/23814368/sorting-pandas- + # categorical-labels-after-groupby + # This should result in a properly sorted Series so that the plot + # has a sorted x axis + # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + res = df.groupby(["value_group"], observed=False)["value_group"].count() + exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] + exp.index = CategoricalIndex(exp.index, name=exp.index.name) + tm.assert_series_equal(res, exp) + + +def test_sort2(): + # dataframe groupby sort was being ignored # GH 8868 + df = DataFrame( + [ + ["(7.5, 10]", 10, 10], + ["(7.5, 10]", 8, 20], + ["(2.5, 5]", 5, 30], + ["(5, 7.5]", 6, 40], + ["(2.5, 5]", 4, 50], + ["(0, 2.5]", 1, 60], + ["(5, 7.5]", 7, 70], + ], + columns=["range", "foo", "bar"], + ) + df["range"] = Categorical(df["range"], ordered=True) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range", ordered=True + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + col = "range" + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + # when categories is ordered, group is ordered by category's order + expected_sort = result_sort + result_sort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + df["range"] = Categorical(df["range"], ordered=False) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range" + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + index = CategoricalIndex( + ["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + categories=["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + name="range", + ) + expected_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=["foo", "bar"] + ) + + col = "range" + + # this is an unordered categorical, but we allow this #### + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + result_nosort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_nosort, expected_nosort) + + +def test_sort_datetimelike(): + # GH10505 + + # use same data as test_groupby_sort_categorical, which category is + # corresponding to datetime.month + df = DataFrame( + { + "dt": [ + datetime(2011, 7, 1), + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 2, 1), + datetime(2011, 1, 1), + datetime(2011, 5, 1), + ], + "foo": [10, 8, 5, 6, 4, 1, 7], + "bar": [10, 20, 30, 40, 50, 60, 70], + }, + columns=["dt", "foo", "bar"], + ) + + # ordered=True + df["dt"] = Categorical(df["dt"], ordered=True) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt", ordered=True) + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex( + index, categories=index, name="dt", ordered=True + ) + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + + # when categories is ordered, group is ordered by category's order + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=False, observed=False).first() + ) + + # ordered = False + df["dt"] = Categorical(df["dt"], ordered=False) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt") + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex(index, categories=index, name="dt") + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + tm.assert_frame_equal( + result_nosort, df.groupby(col, sort=False, observed=False).first() + ) + + +def test_empty_sum(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 0 by default + result = df.groupby("A", observed=False).B.sum() + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.sum(min_count=0) + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.sum(min_count=1) + expected = Series([3, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count>1 + result = df.groupby("A", observed=False).B.sum(min_count=2) + expected = Series([3, np.nan, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_empty_prod(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 1 by default + result = df.groupby("A", observed=False).B.prod() + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.prod(min_count=0) + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.prod(min_count=1) + expected = Series([2, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_groupby_multiindex_categorical_datetime(): + # https://github.com/pandas-dev/pandas/issues/21390 + + df = DataFrame( + { + "key1": Categorical(list("abcbabcba")), + "key2": Categorical( + list(pd.date_range("2018-06-01 00", freq="1T", periods=3)) * 3 + ), + "values": np.arange(9), + } + ) + result = df.groupby(["key1", "key2"]).mean() + + idx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(pd.date_range("2018-06-01 00", freq="1T", periods=3)), + ], + names=["key1", "key2"], + ) + expected = DataFrame({"values": [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + ( + True, + Series( + index=MultiIndex.from_arrays( + [Series([1, 1, 2], dtype="category"), [1, 2, 2]], names=["a", "b"] + ), + data=[1, 2, 3], + name="x", + ), + ), + ( + False, + DataFrame( + { + "a": Series([1, 1, 2], dtype="category"), + "b": [1, 2, 2], + "x": [1, 2, 3], + } + ), + ), + ], +) +def test_groupby_agg_observed_true_single_column(as_index, expected): + # GH-23970 + df = DataFrame( + {"a": Series([1, 1, 2], dtype="category"), "b": [1, 2, 2], "x": [1, 2, 3]} + ) + + result = df.groupby(["a", "b"], as_index=as_index, observed=True)["x"].sum() + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [None, np.nan, pd.NaT]) +def test_shift(fill_value): + ct = Categorical( + ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False + ) + expected = Categorical( + [None, "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + res = ct.shift(1, fill_value=fill_value) + tm.assert_equal(res, expected) + + +@pytest.fixture +def df_cat(df): + """ + DataFrame with multiple categorical columns and a column of integers. + Shortened so as not to contain all possible combinations of categories. + Useful for testing `observed` kwarg functionality on GroupBy objects. + + Parameters + ---------- + df: DataFrame + Non-categorical, longer DataFrame from another fixture, used to derive + this one + + Returns + ------- + df_cat: DataFrame + """ + df_cat = df.copy()[:4] # leave out some groups + df_cat["A"] = df_cat["A"].astype("category") + df_cat["B"] = df_cat["B"].astype("category") + df_cat["C"] = Series([1, 2, 3, 4]) + df_cat = df_cat.drop(["D"], axis=1) + return df_cat + + +@pytest.mark.parametrize("operation", ["agg", "apply"]) +def test_seriesgroupby_observed_true(df_cat, operation): + # GH 24880 + lev_a = Index(["foo", "foo", "bar", "bar"], dtype=df_cat["A"].dtype, name="A") + lev_b = Index(["one", "two", "one", "three"], dtype=df_cat["B"].dtype, name="B") + index = MultiIndex.from_arrays([lev_a, lev_b]) + expected = Series(data=[1, 3, 2, 4], index=index, name="C") + + grouped = df_cat.groupby(["A", "B"], observed=True)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("operation", ["agg", "apply"]) +@pytest.mark.parametrize("observed", [False, None]) +def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): + # GH 24880 + index, _ = MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + ], + names=["A", "B"], + ).sortlevel() + + expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C") + if operation == "agg": + expected = expected.fillna(0, downcast="infer") + grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "observed, index, data", + [ + ( + True, + MultiIndex.from_arrays( + [ + Index(["foo"] * 4 + ["bar"] * 4, dtype="category", name="A"), + Index( + ["one", "one", "two", "two", "one", "one", "three", "three"], + dtype="category", + name="B", + ), + Index(["min", "max"] * 4), + ] + ), + [1, 1, 3, 3, 2, 2, 4, 4], + ), + ( + False, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ( + None, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ], +) +def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): + # GH 24880 + expected = Series(data=data, index=index, name="C") + result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( + lambda x: {"min": x.min(), "max": x.max()} + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_categorical_series_dataframe_consistent(df_cat): + # GH 20416 + expected = df_cat.groupby(["A", "B"])["C"].mean() + result = df_cat.groupby(["A", "B"]).mean()["C"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) +def test_groupby_categorical_axis_1(code): + # GH 13420 + df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) + cat = Categorical.from_codes(code, categories=list("abc")) + result = df.groupby(cat, axis=1).mean() + expected = df.T.groupby(cat, axis=0).mean().T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_groupby_cat_preserves_structure(observed, ordered): + # GH 28787 + df = DataFrame( + {"Name": Categorical(["Bob", "Greg"], ordered=ordered), "Item": [1, 2]}, + columns=["Name", "Item"], + ) + expected = df.copy() + + result = ( + df.groupby("Name", observed=observed) + .agg(DataFrame.sum, skipna=True) + .reset_index() + ) + + tm.assert_frame_equal(result, expected) + + +def test_get_nonexistent_category(): + # Accessing a Category that is not in the dataframe + df = DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)}) + with pytest.raises(KeyError, match="'vau'"): + df.groupby("var").apply( + lambda rows: DataFrame( + {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]} + ) + ) + + +def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request): + # GH 17605 + if reduction_func == "ngroup": + pytest.skip("ngroup is not truly a reduction") + + if reduction_func == "corrwith": # GH 32293 + mark = pytest.mark.xfail( + reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" + ) + request.node.add_marker(mark) + warn = FutureWarning if reduction_func == "mad" else None + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABCD")), + "cat_2": Categorical(list("AB") * 2, categories=list("ABCD")), + "value": [0.1] * 4, + } + ) + args = get_groupby_method_args(reduction_func, df) + + expected_length = 4 if observed else 16 + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=observed)["value"] + agg = getattr(series_groupby, reduction_func) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = agg(*args) + + assert len(result) == expected_length + + +def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( + reduction_func, request +): + # GH 17605 + # Tests whether the unobserved categories in the result contain 0 or NaN + + if reduction_func == "ngroup": + pytest.skip("ngroup is not truly a reduction") + + if reduction_func == "corrwith": # GH 32293 + mark = pytest.mark.xfail( + reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" + ) + request.node.add_marker(mark) + warn = FutureWarning if reduction_func == "mad" else None + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("AB") * 2, categories=list("ABC")), + "value": [0.1] * 4, + } + ) + unobserved = [tuple("AC"), tuple("BC"), tuple("CA"), tuple("CB"), tuple("CC")] + args = get_groupby_method_args(reduction_func, df) + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"] + agg = getattr(series_groupby, reduction_func) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = agg(*args) + + zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func] + + for idx in unobserved: + val = result.loc[idx] + assert (pd.isna(zero_or_nan) and pd.isna(val)) or (val == zero_or_nan) + + # If we expect unobserved values to be zero, we also expect the dtype to be int. + # Except for .sum(). If the observed categories sum to dtype=float (i.e. their + # sums have decimals), then the zeros for the missing categories should also be + # floats. + if zero_or_nan == 0 and reduction_func != "sum": + assert np.issubdtype(result.dtype, np.integer) + + +def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_func): + # GH 23865 + # GH 27075 + # Ensure that df.groupby, when 'by' is two Categorical variables, + # does not return the categories that are not in df when observed=True + if reduction_func == "ngroup": + pytest.skip("ngroup does not return the Categories on the index") + warn = FutureWarning if reduction_func == "mad" else None + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("1111"), categories=list("12")), + "value": [0.1, 0.1, 0.1, 0.1], + } + ) + unobserved_cats = [("A", "2"), ("B", "2"), ("C", "1"), ("C", "2")] + + df_grp = df.groupby(["cat_1", "cat_2"], observed=True) + + args = get_groupby_method_args(reduction_func, df) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + res = getattr(df_grp, reduction_func)(*args) + + for cat in unobserved_cats: + assert cat not in res.index + + +@pytest.mark.parametrize("observed", [False, None]) +def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( + reduction_func, observed +): + # GH 23865 + # GH 27075 + # Ensure that df.groupby, when 'by' is two Categorical variables, + # returns the categories that are not in df when observed=False/None + + if reduction_func == "ngroup": + pytest.skip("ngroup does not return the Categories on the index") + warn = FutureWarning if reduction_func == "mad" else None + + df = DataFrame( + { + "cat_1": Categorical(list("AABB"), categories=list("ABC")), + "cat_2": Categorical(list("1111"), categories=list("12")), + "value": [0.1, 0.1, 0.1, 0.1], + } + ) + unobserved_cats = [("A", "2"), ("B", "2"), ("C", "1"), ("C", "2")] + + df_grp = df.groupby(["cat_1", "cat_2"], observed=observed) + + args = get_groupby_method_args(reduction_func, df) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + res = getattr(df_grp, reduction_func)(*args) + + expected = _results_for_groupbys_with_missing_categories[reduction_func] + + if expected is np.nan: + assert res.loc[unobserved_cats].isnull().all().all() + else: + assert (res.loc[unobserved_cats] == expected).all().all() + + +def test_series_groupby_categorical_aggregation_getitem(): + # GH 8870 + d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} + df = DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=True, sort=True) + result = groups["foo"].agg("mean") + expected = groups.agg("mean")["foo"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, expected_values", + [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], +) +def test_groupby_agg_categorical_columns(func, expected_values): + # 31256 + df = DataFrame( + { + "id": [0, 1, 2, 3, 4], + "groups": [0, 1, 1, 2, 2], + "value": Categorical([0, 0, 0, 0, 1]), + } + ).set_index("id") + result = df.groupby("groups").agg(func) + + expected = DataFrame( + {"value": expected_values}, index=Index([0, 1, 2], name="groups") + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_agg_non_numeric(): + df = DataFrame({"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"])}) + expected = DataFrame({"A": [2, 1]}, index=[1, 2]) + + result = df.groupby([1, 2, 1]).agg(Series.nunique) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 2, 1]).nunique() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupby_first_returned_categorical_instead_of_dataframe(func): + # GH 28641: groupby drops index, when grouping over categorical column with + # first/last. Renamed Categorical instead of DataFrame previously. + df = DataFrame({"A": [1997], "B": Series(["b"], dtype="category").cat.as_ordered()}) + df_grouped = df.groupby("A")["B"] + result = getattr(df_grouped, func)() + + # ordered categorical dtype should be preserved + expected = Series( + ["b"], index=Index([1997], name="A"), name="B", dtype=df["B"].dtype + ) + tm.assert_series_equal(result, expected) + + +def test_read_only_category_no_sort(): + # GH33410 + cats = np.array([1, 2]) + cats.flags.writeable = False + df = DataFrame( + {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))} + ) + expected = DataFrame( + data={"a": [2.0, 6.0]}, index=CategoricalIndex([1, 2], name="b") + ) + result = df.groupby("b", sort=False).mean() + tm.assert_frame_equal(result, expected) + + +def test_sorted_missing_category_values(): + # GH 28597 + df = DataFrame( + { + "foo": [ + "small", + "large", + "large", + "large", + "medium", + "large", + "large", + "medium", + ], + "bar": ["C", "A", "A", "C", "A", "C", "A", "C"], + } + ) + df["foo"] = ( + df["foo"] + .astype("category") + .cat.set_categories(["tiny", "small", "medium", "large"], ordered=True) + ) + + expected = DataFrame( + { + "tiny": {"A": 0, "C": 0}, + "small": {"A": 0, "C": 1}, + "medium": {"A": 1, "C": 1}, + "large": {"A": 3, "C": 2}, + } + ) + expected = expected.rename_axis("bar", axis="index") + expected.columns = CategoricalIndex( + ["tiny", "small", "medium", "large"], + categories=["tiny", "small", "medium", "large"], + ordered=True, + name="foo", + dtype="category", + ) + + result = df.groupby(["bar", "foo"]).size().unstack() + + tm.assert_frame_equal(result, expected) + + +def test_agg_cython_category_not_implemented_fallback(): + # https://github.com/pandas-dev/pandas/issues/31450 + df = DataFrame({"col_num": [1, 1, 2, 3]}) + df["col_cat"] = df["col_num"].astype("category") + + result = df.groupby("col_num").col_cat.first() + + # ordered categorical dtype should definitely be preserved; + # this is unordered, so is less-clear case (if anything, it should raise) + expected = Series( + [1, 2, 3], + index=Index([1, 2, 3], name="col_num"), + name="col_cat", + dtype=df["col_cat"].dtype, + ) + tm.assert_series_equal(result, expected) + + result = df.groupby("col_num").agg({"col_cat": "first"}) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_categorical_with_isnan(): + # GH 29837 + df = DataFrame( + { + "A": [1, 1, 1, 1], + "B": [1, 2, 1, 2], + "numerical_col": [0.1, 0.2, np.nan, 0.3], + "object_col": ["foo", "bar", "foo", "fee"], + "categorical_col": ["foo", "bar", "foo", "fee"], + } + ) + + df = df.astype({"categorical_col": "category"}) + + result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum()) + index = MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) + expected = DataFrame( + data={ + "numerical_col": [1, 0], + "object_col": [0, 0], + "categorical_col": [0, 0], + }, + index=index, + ) + tm.assert_frame_equal(result, expected) + + +def test_categorical_transform(): + # GH 29037 + df = DataFrame( + { + "package_id": [1, 1, 1, 2, 2, 3], + "status": [ + "Waiting", + "OnTheWay", + "Delivered", + "Waiting", + "OnTheWay", + "Waiting", + ], + } + ) + + delivery_status_type = pd.CategoricalDtype( + categories=["Waiting", "OnTheWay", "Delivered"], ordered=True + ) + df["status"] = df["status"].astype(delivery_status_type) + df["last_status"] = df.groupby("package_id")["status"].transform(max) + result = df.copy() + + expected = DataFrame( + { + "package_id": [1, 1, 1, 2, 2, 3], + "status": [ + "Waiting", + "OnTheWay", + "Delivered", + "Waiting", + "OnTheWay", + "Waiting", + ], + "last_status": [ + "Delivered", + "Delivered", + "Delivered", + "OnTheWay", + "OnTheWay", + "Waiting", + ], + } + ) + + expected["status"] = expected["status"].astype(delivery_status_type) + + # .transform(max) should preserve ordered categoricals + expected["last_status"] = expected["last_status"].astype(delivery_status_type) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( + func: str, observed: bool +): + # GH 34951 + cat = Categorical([0, 0, 1, 1]) + val = [0, 1, 1, 0] + df = DataFrame({"a": cat, "b": cat, "c": val}) + + cat2 = Categorical([0, 1]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + expected_dict = { + "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), + "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), + } + + expected = expected_dict[func] + if observed: + expected = expected.dropna().astype(np.int64) + + srs_grp = df.groupby(["a", "b"], observed=observed)["c"] + result = getattr(srs_grp, func)() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( + func: str, observed: bool +): + # GH 34951 + cat = Categorical([0, 0, 1, 1]) + val = [0, 1, 1, 0] + df = DataFrame({"a": cat, "b": cat, "c": val}) + + cat2 = Categorical([0, 1]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + expected_dict = { + "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), + "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), + } + + expected = expected_dict[func].to_frame() + if observed: + expected = expected.dropna().astype(np.int64) + + df_grp = df.groupby(["a", "b"], observed=observed) + result = getattr(df_grp, func)() + tm.assert_frame_equal(result, expected) + + +def test_groupby_categorical_indices_unused_categories(): + # GH#38642 + df = DataFrame( + { + "key": Categorical(["b", "b", "a"], categories=["a", "b", "c"]), + "col": range(3), + } + ) + grouped = df.groupby("key", sort=False) + result = grouped.indices + expected = { + "b": np.array([0, 1], dtype="intp"), + "a": np.array([2], dtype="intp"), + "c": np.array([], dtype="intp"), + } + assert result.keys() == expected.keys() + for key in result.keys(): + tm.assert_numpy_array_equal(result[key], expected[key]) + + +@pytest.mark.parametrize("func", ["first", "last"]) +def test_groupby_last_first_preserve_categoricaldtype(func): + # GH#33090 + df = DataFrame({"a": [1, 2, 3]}) + df["b"] = df["a"].astype("category") + result = getattr(df.groupby("a")["b"], func)() + expected = Series( + Categorical([1, 2, 3]), name="b", index=Index([1, 2, 3], name="a") + ) + tm.assert_series_equal(expected, result) + + +def test_groupby_categorical_observed_nunique(): + # GH#45128 + df = DataFrame({"a": [1, 2], "b": [1, 2], "c": [10, 11]}) + df = df.astype(dtype={"a": "category", "b": "category"}) + result = df.groupby(["a", "b"], observed=True).nunique()["c"] + expected = Series( + [1, 1], + index=MultiIndex.from_arrays( + [CategoricalIndex([1, 2], name="a"), CategoricalIndex([1, 2], name="b")] + ), + name="c", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_categorical_aggregate_functions(): + # GH#37275 + dtype = pd.CategoricalDtype(categories=["small", "big"], ordered=True) + df = DataFrame( + [[1, "small"], [1, "big"], [2, "small"]], columns=["grp", "description"] + ).astype({"description": dtype}) + + result = df.groupby("grp")["description"].max() + expected = Series( + ["big", "small"], + index=Index([1, 2], name="grp"), + name="description", + dtype=pd.CategoricalDtype(categories=["small", "big"], ordered=True), + ) + + tm.assert_series_equal(result, expected) + + +def test_groupby_categorical_dropna(observed, dropna): + # GH#48645 - dropna should have no impact on the result when there are no NA values + cat = Categorical([1, 2], categories=[1, 2, 3]) + df = DataFrame({"x": Categorical([1, 2], categories=[1, 2, 3]), "y": [3, 4]}) + gb = df.groupby("x", observed=observed, dropna=dropna) + result = gb.sum() + + if observed: + expected = DataFrame({"y": [3, 4]}, index=cat) + else: + index = CategoricalIndex([1, 2, 3], [1, 2, 3]) + expected = DataFrame({"y": [3, 4, 0]}, index=index) + expected.index.name = "x" + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py new file mode 100644 index 00000000..7e7f1a62 --- /dev/null +++ b/pandas/tests/groupby/test_counting.py @@ -0,0 +1,377 @@ +from itertools import product +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestCounting: + def test_cumcount(self): + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"]) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3]) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.cumcount()) + tm.assert_series_equal(e, se.cumcount()) + + def test_cumcount_dupe_index(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=mi) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=mi) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_groupby_not_col(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_ngroup(self): + df = DataFrame({"A": list("aaaba")}) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_distinct(self): + df = DataFrame({"A": list("abcde")}) + g = df.groupby("A") + sg = g.A + + expected = Series(range(5), dtype="int64") + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_one_group(self): + df = DataFrame({"A": [0] * 5}) + g = df.groupby("A") + sg = g.A + + expected = Series([0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.ngroup()) + tm.assert_series_equal(e, se.ngroup()) + + def test_ngroup_series_matches_frame(self): + df = DataFrame({"A": list("aaaba")}) + s = Series(list("aaaba")) + + tm.assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) + + def test_ngroup_dupe_index(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame({"A": list("aaaba")}, index=mi) + g = df.groupby("A") + sg = g.A + expected = Series([0, 0, 0, 1, 0], index=mi) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_groupby_not_col(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_descending(self): + df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) + g = df.groupby(["A"]) + + ascending = Series([0, 0, 1, 0, 1]) + descending = Series([1, 1, 0, 1, 0]) + + tm.assert_series_equal(descending, (g.ngroups - 1) - ascending) + tm.assert_series_equal(ascending, g.ngroup(ascending=True)) + tm.assert_series_equal(descending, g.ngroup(ascending=False)) + + def test_ngroup_matches_cumcount(self): + # verify one manually-worked out case works + df = DataFrame( + [["a", "x"], ["a", "y"], ["b", "x"], ["a", "x"], ["b", "y"]], + columns=["A", "X"], + ) + g = df.groupby(["A", "X"]) + g_ngroup = g.ngroup() + g_cumcount = g.cumcount() + expected_ngroup = Series([0, 1, 2, 0, 3]) + expected_cumcount = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(g_ngroup, expected_ngroup) + tm.assert_series_equal(g_cumcount, expected_cumcount) + + def test_ngroup_cumcount_pair(self): + # brute force comparison for all small series + for p in product(range(3), repeat=4): + df = DataFrame({"a": p}) + g = df.groupby(["a"]) + + order = sorted(set(p)) + ngroupd = [order.index(val) for val in p] + cumcounted = [p[:i].count(val) for i, val in enumerate(p)] + + tm.assert_series_equal(g.ngroup(), Series(ngroupd)) + tm.assert_series_equal(g.cumcount(), Series(cumcounted)) + + def test_ngroup_respects_groupby_order(self, sort): + np.random.seed(0) + df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) + g = df.groupby("a", sort=sort) + df["group_id"] = -1 + df["group_index"] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, "group_id"] = i + for j, ind in enumerate(group.index): + df.loc[ind, "group_index"] = j + + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) + + @pytest.mark.parametrize( + "datetimelike", + [ + [Timestamp(f"2016-05-{i:02d} 20:09:25+00:00") for i in range(1, 4)], + [Timestamp(f"2016-05-{i:02d} 20:09:25") for i in range(1, 4)], + [Timestamp(f"2016-05-{i:02d} 20:09:25", tz="UTC") for i in range(1, 4)], + [Timedelta(x, unit="h") for x in range(1, 4)], + [Period(freq="2W", year=2017, month=x) for x in range(1, 4)], + ], + ) + def test_count_with_datetimelike(self, datetimelike): + # test for #13393, where DataframeGroupBy.count() fails + # when counting a datetimelike column. + + df = DataFrame({"x": ["a", "a", "b"], "y": datetimelike}) + res = df.groupby("x").count() + expected = DataFrame({"y": [2, 1]}, index=["a", "b"]) + expected.index.name = "x" + tm.assert_frame_equal(expected, res) + + def test_count_with_only_nans_in_first_group(self): + # GH21956 + df = DataFrame({"A": [np.nan, np.nan], "B": ["a", "b"], "C": [1, 2]}) + result = df.groupby(["A", "B"]).C.count() + mi = MultiIndex(levels=[[], ["a", "b"]], codes=[[], []], names=["A", "B"]) + expected = Series([], index=mi, dtype=np.int64, name="C") + tm.assert_series_equal(result, expected, check_index_type=False) + + def test_count_groupby_column_with_nan_in_groupby_column(self): + # https://github.com/pandas-dev/pandas/issues/32841 + df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]}) + res = df.groupby(["B"]).count() + expected = DataFrame( + index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]} + ) + tm.assert_frame_equal(expected, res) + + def test_groupby_count_dateparseerror(self): + dr = date_range(start="1/1/2012", freq="5min", periods=10) + + # BAD Example, datetimes first + ser = Series(np.arange(10), index=[dr, np.arange(10)]) + grouped = ser.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + ser = Series(np.arange(10), index=[np.arange(10), dr]) + grouped = ser.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + tm.assert_series_equal(result, expected) + + +def test_groupby_timedelta_cython_count(): + df = DataFrame( + {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} + ) + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") + result = df.groupby("g").delt.count() + tm.assert_series_equal(expected, result) + + +def test_count(): + n = 1 << 15 + dr = date_range("2015-08-30", periods=n // 10, freq="T") + + df = DataFrame( + { + "1st": np.random.choice(list(ascii_lowercase), n), + "2nd": np.random.randint(0, 5, n), + "3rd": np.random.randn(n).round(3), + "4th": np.random.randint(-10, 10, n), + "5th": np.random.choice(dr, n), + "6th": np.random.randn(n).round(3), + "7th": np.random.randn(n).round(3), + "8th": np.random.choice(dr, n) - np.random.choice(dr, 1), + "9th": np.random.choice(list(ascii_lowercase), n), + } + ) + + for col in df.columns.drop(["1st", "2nd", "4th"]): + df.loc[np.random.choice(n, n // 10), col] = np.nan + + df["9th"] = df["9th"].astype("category") + + for key in ["1st", "2nd", ["1st", "2nd"]]: + left = df.groupby(key).count() + right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) + tm.assert_frame_equal(left, right) + + +def test_count_non_nulls(): + # GH#5610 + # count counts non-nulls + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]], + columns=["A", "B", "C"], + ) + + count_as = df.groupby("A").count() + count_not_as = df.groupby("A", as_index=False).count() + + expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3]) + expected.index.name = "A" + tm.assert_frame_equal(count_not_as, expected.reset_index()) + tm.assert_frame_equal(count_as, expected) + + count_B = df.groupby("A")["B"].count() + tm.assert_series_equal(count_B, expected["B"]) + + +def test_count_object(): + df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = Series([3, 3], index=Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = Series([1, 3], index=Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + +def test_count_cross_type(): + # GH8169 + vals = np.hstack( + (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2))) + ) + + df = DataFrame(vals, columns=["a", "b", "c", "d"]) + df[df == 2] = np.nan + expected = df.groupby(["c", "d"]).count() + + for t in ["float32", "object"]: + df["a"] = df["a"].astype(t) + df["b"] = df["b"].astype(t) + result = df.groupby(["c", "d"]).count() + tm.assert_frame_equal(result, expected) + + +def test_lower_int_prec_count(): + df = DataFrame( + { + "a": np.array([0, 1, 2, 100], np.int8), + "b": np.array([1, 2, 3, 6], np.uint32), + "c": np.array([4, 5, 6, 8], np.int16), + "grp": list("ab" * 2), + } + ) + result = df.groupby("grp").count() + expected = DataFrame( + {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp") + ) + tm.assert_frame_equal(result, expected) + + +def test_count_uses_size_on_exception(): + class RaisingObjectException(Exception): + pass + + class RaisingObject: + def __init__(self, msg="I will raise inside Cython") -> None: + super().__init__() + self.msg = msg + + def __eq__(self, other): + # gets called in Cython to check that raising calls the method + raise RaisingObjectException(self.msg) + + df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)}) + result = df.groupby("grp").count() + expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py new file mode 100644 index 00000000..b4051456 --- /dev/null +++ b/pandas/tests/groupby/test_filters.py @@ -0,0 +1,614 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, +) +import pandas._testing as tm + + +def test_filter_series(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + expected_odd = Series([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = Series([20, 22, 24], index=[2, 4, 5]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(s.index), + ) + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(s.index), + ) + + +def test_filter_single_column_df(): + df = DataFrame([1, 3, 20, 5, 22, 24, 7]) + expected_odd = DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = DataFrame([20, 22, 24], index=[2, 4, 5]) + grouper = df[0].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(df.index), + ) + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(df.index), + ) + + +def test_filter_multi_column_df(): + df = DataFrame({"A": [1, 12, 12, 1], "B": [1, 1, 1, 1]}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = DataFrame({"A": [12, 12], "B": [1, 1]}, index=[1, 2]) + tm.assert_frame_equal( + grouped.filter(lambda x: x["A"].sum() - x["B"].sum() > 10), expected + ) + + +def test_filter_mixed_df(): + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = DataFrame({"A": [12, 12], "B": ["b", "c"]}, index=[1, 2]) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 10), expected) + + +def test_filter_out_all_groups(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]]) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 1000), df.loc[[]]) + + +def test_filter_out_no_groups(): + s = Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + filtered = grouped.filter(lambda x: x.mean() > 0) + tm.assert_series_equal(filtered, s) + df = DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + filtered = grouped.filter(lambda x: x["A"].mean() > 0) + tm.assert_frame_equal(filtered, df) + + +def test_filter_out_all_groups_in_df(): + # GH12768 + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=False) + expected = DataFrame({"a": [np.nan] * 3, "b": [np.nan] * 3}) + tm.assert_frame_equal(expected, res) + + df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=True) + expected = DataFrame({"a": [], "b": []}, dtype="int64") + tm.assert_frame_equal(expected, res) + + +def test_filter_condition_raises(): + def raise_if_sum_is_zero(x): + if x.sum() == 0: + raise ValueError + else: + return x.sum() > 0 + + s = Series([-1, 0, 1, 2]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + grouped.filter(raise_if_sum_is_zero) + + +def test_filter_with_axis_in_groupby(): + # issue 11041 + index = pd.MultiIndex.from_product([range(10), [0, 1]]) + data = DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") + result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) + expected = data.iloc[:, 12:20] + tm.assert_frame_equal(result, expected) + + +def test_filter_bad_shapes(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby("B") + g_s = s.groupby(s) + + f = lambda x: x + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: x == 1 + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: np.outer(x, x) + msg = "can't multiply sequence by non-int of type 'str'" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + +def test_filter_nan_is_false(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby(df["B"]) + g_s = s.groupby(s) + + f = lambda x: np.nan + tm.assert_frame_equal(g_df.filter(f), df.loc[[]]) + tm.assert_series_equal(g_s.filter(f), s[[]]) + + +def test_filter_against_workaround(): + np.random.seed(0) + # Series of ints + s = Series(np.random.randint(0, 100, 1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Series of floats + s = 100 * Series(np.random.random(1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Set up DataFrame of ints, floats, strings. + from string import ascii_lowercase + + letters = np.array(list(ascii_lowercase)) + N = 1000 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "ints": Series(np.random.randint(0, 100, N)), + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + + # Group by ints; filter on floats. + grouped = df.groupby("ints") + old_way = df[grouped.floats.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["floats"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + # Group by floats (rounded); filter on strings. + grouper = df.floats.apply(lambda x: np.round(x, -1)) + grouped = df.groupby(grouper) + old_way = df[grouped.letters.transform(lambda x: len(x) < N / 10).astype("bool")] + new_way = grouped.filter(lambda x: len(x.letters) < N / 10) + tm.assert_frame_equal(new_way, old_way) + + # Group by strings; filter on ints. + grouped = df.groupby("letters") + old_way = df[grouped.ints.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["ints"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + +def test_filter_using_len(): + # BUG GH4447 + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + grouped = df.groupby("B") + actual = grouped.filter(lambda x: len(x) > 2) + expected = DataFrame( + {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, + index=np.arange(2, 6), + ) + tm.assert_frame_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = df.loc[[]] + tm.assert_frame_equal(actual, expected) + + # Series have always worked properly, but we'll test anyway. + s = df["B"] + grouped = s.groupby(s) + actual = grouped.filter(lambda x: len(x) > 2) + expected = Series(4 * ["b"], index=np.arange(2, 6), name="B") + tm.assert_series_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = s[[]] + tm.assert_series_equal(actual, expected) + + +def test_filter_maintains_ordering(): + # Simple case: index is sequential. #4621 + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]} + ) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Now index is sequentially decreasing. + df.index = np.arange(len(df) - 1, -1, -1) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Index is shuffled. + SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3] + df.index = df.index[SHUFFLED] + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + +def test_filter_multiple_timestamp(): + # GH 10114 + df = DataFrame( + { + "A": np.arange(5, dtype="int64"), + "B": ["foo", "bar", "foo", "bar", "bar"], + "C": Timestamp("20130101"), + } + ) + + grouped = df.groupby(["B", "C"]) + + result = grouped["A"].filter(lambda x: True) + tm.assert_series_equal(df["A"], result) + + result = grouped["A"].transform(len) + expected = Series([2, 3, 2, 3, 3], name="A") + tm.assert_series_equal(result, expected) + + result = grouped.filter(lambda x: True) + tm.assert_frame_equal(df, result) + + result = grouped.transform("sum") + expected = DataFrame({"A": [2, 8, 2, 8, 8]}) + tm.assert_frame_equal(result, expected) + + result = grouped.transform(len) + expected = DataFrame({"A": [2, 3, 2, 3, 3]}) + tm.assert_frame_equal(result, expected) + + +def test_filter_and_transform_with_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 1, 1, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_multiple_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 0, 0, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_float_index(): + # GH4620 + index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float) + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_timestamp_index(): + # GH4620 + t0 = Timestamp("2013-09-30 00:05:00") + t1 = Timestamp("2013-10-30 00:05:00") + t2 = Timestamp("2013-11-30 00:05:00") + index = [t1, t1, t1, t2, t1, t1, t0, t1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_string_index(): + # GH4620 + index = list("bbbcbbab") + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_has_access_to_grouped_cols(): + df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + # previously didn't have access to col A #???? + filt = g.filter(lambda x: x["A"].sum() == 2) + tm.assert_frame_equal(filt, df.iloc[[0, 1]]) + + +def test_filter_enforces_scalarness(): + df = DataFrame( + [ + ["best", "a", "x"], + ["worst", "b", "y"], + ["best", "c", "x"], + ["best", "d", "y"], + ["worst", "d", "y"], + ["worst", "d", "y"], + ["best", "d", "z"], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("c").filter(lambda g: g["a"] == "best") + + +def test_filter_non_bool_raises(): + df = DataFrame( + [ + ["best", "a", 1], + ["worst", "b", 1], + ["best", "c", 1], + ["best", "d", 1], + ["worst", "d", 1], + ["worst", "d", 1], + ["best", "d", 1], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("a").filter(lambda g: g.c.mean()) + + +def test_filter_dropna_with_empty_groups(): + # GH 10780 + data = Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3)) + groupped = data.groupby(level=0) + result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False) + expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) + tm.assert_series_equal(result_false, expected_false) + + result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True) + expected_true = Series(index=pd.Index([], dtype=int), dtype=np.float64) + tm.assert_series_equal(result_true, expected_true) + + +def test_filter_consistent_result_before_after_agg_func(): + # GH 17091 + df = DataFrame({"data": range(6), "key": list("ABCABC")}) + grouper = df.groupby("key") + result = grouper.filter(lambda x: True) + expected = DataFrame({"data": range(6), "key": list("ABCABC")}) + tm.assert_frame_equal(result, expected) + + grouper.sum() + result = grouper.filter(lambda x: True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_frame_value_counts.py b/pandas/tests/groupby/test_frame_value_counts.py new file mode 100644 index 00000000..8255fbab --- /dev/null +++ b/pandas/tests/groupby/test_frame_value_counts.py @@ -0,0 +1,783 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture +def education_df(): + return DataFrame( + { + "gender": ["male", "male", "female", "male", "female", "male"], + "education": ["low", "medium", "high", "low", "high", "low"], + "country": ["US", "FR", "US", "FR", "FR", "FR"], + } + ) + + +def test_axis(education_df): + gp = education_df.groupby("country", axis=1) + with pytest.raises(NotImplementedError, match="axis"): + gp.value_counts() + + +def test_bad_subset(education_df): + gp = education_df.groupby("country") + with pytest.raises(ValueError, match="subset"): + gp.value_counts(subset=["country"]) + + +def test_basic(education_df): + # gh43564 + result = education_df.groupby("country")[["gender", "education"]].value_counts( + normalize=True + ) + expected = Series( + data=[0.5, 0.25, 0.25, 0.5, 0.5], + index=MultiIndex.from_tuples( + [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("US", "female", "high"), + ("US", "male", "low"), + ], + names=["country", "gender", "education"], + ), + ) + tm.assert_series_equal(result, expected) + + +def _frame_value_counts(df, keys, normalize, sort, ascending): + return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending) + + +@pytest.mark.parametrize("groupby", ["column", "array", "function"]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize( + "sort, ascending", + [ + (False, None), + (True, True), + (True, False), + ], +) +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("frame", [True, False]) +def test_against_frame_and_seriesgroupby( + education_df, groupby, normalize, sort, ascending, as_index, frame +): + # test all parameters: + # - Use column, array or function as by= parameter + # - Whether or not to normalize + # - Whether or not to sort and how + # - Whether or not to use the groupby as an index + # - 3-way compare against: + # - apply with :meth:`~DataFrame.value_counts` + # - `~SeriesGroupBy.value_counts` + by = { + "column": "country", + "array": education_df["country"].values, + "function": lambda x: education_df["country"][x] == "US", + }[groupby] + + gp = education_df.groupby(by=by, as_index=as_index) + result = gp[["gender", "education"]].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + if frame: + # compare against apply with DataFrame value_counts + expected = gp.apply( + _frame_value_counts, ["gender", "education"], normalize, sort, ascending + ) + + if as_index: + tm.assert_series_equal(result, expected) + else: + name = "proportion" if normalize else "count" + expected = expected.reset_index().rename({0: name}, axis=1) + if groupby == "column": + expected = expected.rename({"level_0": "country"}, axis=1) + expected["country"] = np.where(expected["country"], "US", "FR") + elif groupby == "function": + expected["level_0"] = expected["level_0"] == 1 + else: + expected["level_0"] = np.where(expected["level_0"], "US", "FR") + tm.assert_frame_equal(result, expected) + else: + # compare against SeriesGroupBy value_counts + education_df["both"] = education_df["gender"] + "-" + education_df["education"] + expected = gp["both"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + expected.name = None + if as_index: + index_frame = expected.index.to_frame(index=False) + index_frame["gender"] = index_frame["both"].str.split("-").str.get(0) + index_frame["education"] = index_frame["both"].str.split("-").str.get(1) + del index_frame["both"] + index_frame = index_frame.rename({0: None}, axis=1) + expected.index = MultiIndex.from_frame(index_frame) + tm.assert_series_equal(result, expected) + else: + expected.insert(1, "gender", expected["both"].str.split("-").str.get(0)) + expected.insert(2, "education", expected["both"].str.split("-").str.get(1)) + del expected["both"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize( + "sort, ascending, expected_rows, expected_count, expected_group_size", + [ + (False, None, [0, 1, 2, 3, 4], [1, 1, 1, 2, 1], [1, 3, 1, 3, 1]), + (True, False, [4, 3, 1, 2, 0], [1, 2, 1, 1, 1], [1, 3, 3, 1, 1]), + (True, True, [4, 1, 3, 2, 0], [1, 1, 2, 1, 1], [1, 3, 3, 1, 1]), + ], +) +def test_compound( + education_df, + normalize, + sort, + ascending, + expected_rows, + expected_count, + expected_group_size, +): + # Multiple groupby keys and as_index=False + gp = education_df.groupby(["country", "gender"], as_index=False, sort=False) + result = gp["education"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) + expected = DataFrame() + for column in ["country", "gender", "education"]: + expected[column] = [education_df[column][row] for row in expected_rows] + if normalize: + expected["proportion"] = expected_count + expected["proportion"] /= expected_group_size + else: + expected["count"] = expected_count + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def animals_df(): + return DataFrame( + {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + +@pytest.mark.parametrize( + "sort, ascending, normalize, expected_data, expected_index", + [ + (False, None, False, [1, 2, 1], [(1, 1, 1), (2, 4, 6), (2, 0, 0)]), + (True, True, False, [1, 1, 2], [(1, 1, 1), (2, 6, 4), (2, 0, 0)]), + (True, False, False, [2, 1, 1], [(1, 1, 1), (4, 2, 6), (0, 2, 0)]), + (True, False, True, [0.5, 0.25, 0.25], [(1, 1, 1), (4, 2, 6), (0, 2, 0)]), + ], +) +def test_data_frame_value_counts( + animals_df, sort, ascending, normalize, expected_data, expected_index +): + # 3-way compare with :meth:`~DataFrame.value_counts` + # Tests from frame/methods/test_value_counts.py + result_frame = animals_df.value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) + expected = Series( + data=expected_data, + index=MultiIndex.from_arrays( + expected_index, names=["key", "num_legs", "num_wings"] + ), + ) + tm.assert_series_equal(result_frame, expected) + + result_frame_groupby = animals_df.groupby("key").value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) + + tm.assert_series_equal(result_frame_groupby, expected) + + +@pytest.fixture +def nulls_df(): + n = np.nan + return DataFrame( + { + "A": [1, 1, n, 4, n, 6, 6, 6, 6], + "B": [1, 1, 3, n, n, 6, 6, 6, 6], + "C": [1, 2, 3, 4, 5, 6, n, 8, n], + "D": [1, 2, 3, 4, 5, 6, 7, n, n], + } + ) + + +@pytest.mark.parametrize( + "group_dropna, count_dropna, expected_rows, expected_values", + [ + ( + False, + False, + [0, 1, 3, 5, 7, 6, 8, 2, 4], + [0.5, 0.5, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0], + ), + (False, True, [0, 1, 3, 5, 2, 4], [0.5, 0.5, 1.0, 1.0, 1.0, 1.0]), + (True, False, [0, 1, 5, 7, 6, 8], [0.5, 0.5, 0.25, 0.25, 0.25, 0.25]), + (True, True, [0, 1, 5], [0.5, 0.5, 1.0]), + ], +) +def test_dropna_combinations( + nulls_df, group_dropna, count_dropna, expected_rows, expected_values +): + gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) + result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) + columns = DataFrame() + for column in nulls_df.columns: + columns[column] = [nulls_df[column][row] for row in expected_rows] + index = MultiIndex.from_frame(columns) + expected = Series(data=expected_values, index=index) + tm.assert_series_equal(result, expected) + + +@pytest.fixture +def names_with_nulls_df(nulls_fixture): + return DataFrame( + { + "key": [1, 1, 1, 1], + "first_name": ["John", "Anne", "John", "Beth"], + "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], + }, + ) + + +@pytest.mark.parametrize( + "dropna, expected_data, expected_index", + [ + ( + True, + [1, 1], + MultiIndex.from_arrays( + [(1, 1), ("Beth", "John"), ("Louise", "Smith")], + names=["key", "first_name", "middle_name"], + ), + ), + ( + False, + [1, 1, 1, 1], + MultiIndex( + levels=[ + Index([1]), + Index(["Anne", "Beth", "John"]), + Index(["Louise", "Smith", np.nan]), + ], + codes=[[0, 0, 0, 0], [0, 1, 2, 2], [2, 0, 1, 2]], + names=["key", "first_name", "middle_name"], + ), + ), + ], +) +@pytest.mark.parametrize("normalize", [False, True]) +def test_data_frame_value_counts_dropna( + names_with_nulls_df, dropna, normalize, expected_data, expected_index +): + # GH 41334 + # 3-way compare with :meth:`~DataFrame.value_counts` + # Tests with nulls from frame/methods/test_value_counts.py + result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) + expected = Series( + data=expected_data, + index=expected_index, + ) + if normalize: + expected /= float(len(expected_data)) + + tm.assert_series_equal(result_frame, expected) + + result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( + dropna=dropna, normalize=normalize + ) + + tm.assert_series_equal(result_frame_groupby, expected) + + +@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("observed", [False, True]) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + (False, np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64)), + ( + True, + np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]), + ), + ], +) +def test_categorical_single_grouper_with_only_observed_categories( + education_df, as_index, observed, normalize, expected_data +): + + # Test single categorical grouper with only observed grouping categories + # when non-groupers are also categorical + + gp = education_df.astype("category").groupby( + "country", as_index=as_index, observed=observed + ) + result = gp.value_counts(normalize=normalize) + + expected_index = MultiIndex.from_tuples( + [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("FR", "female", "low"), + ("FR", "female", "medium"), + ("FR", "male", "high"), + ("US", "female", "high"), + ("US", "male", "low"), + ("US", "female", "low"), + ("US", "female", "medium"), + ("US", "male", "high"), + ("US", "male", "medium"), + ], + names=["country", "gender", "education"], + ) + + expected_series = Series( + data=expected_data, + index=expected_index, + ) + for i in range(3): + expected_series.index = expected_series.index.set_levels( + CategoricalIndex(expected_series.index.levels[i]), level=i + ) + + if as_index: + tm.assert_series_equal(result, expected_series) + else: + expected = expected_series.reset_index( + name="proportion" if normalize else "count" + ) + tm.assert_frame_equal(result, expected) + + +def assert_categorical_single_grouper( + education_df, as_index, observed, expected_index, normalize, expected_data +): + # Test single categorical grouper when non-groupers are also categorical + education_df = education_df.copy().astype("category") + + # Add non-observed grouping categories + education_df["country"] = education_df["country"].cat.add_categories(["ASIA"]) + + gp = education_df.groupby("country", as_index=as_index, observed=observed) + result = gp.value_counts(normalize=normalize) + + expected_series = Series( + data=expected_data, + index=MultiIndex.from_tuples( + expected_index, + names=["country", "gender", "education"], + ), + ) + for i in range(3): + index_level = CategoricalIndex(expected_series.index.levels[i]) + if i == 0: + index_level = index_level.set_categories( + education_df["country"].cat.categories + ) + expected_series.index = expected_series.index.set_levels(index_level, level=i) + + if as_index: + tm.assert_series_equal(result, expected_series) + else: + expected = expected_series.reset_index( + name="proportion" if normalize else "count" + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + (False, np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64)), + ( + True, + np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]), + ), + ], +) +def test_categorical_single_grouper_observed_true( + education_df, as_index, normalize, expected_data +): + # GH#46357 + + expected_index = [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("FR", "female", "low"), + ("FR", "female", "medium"), + ("FR", "male", "high"), + ("US", "female", "high"), + ("US", "male", "low"), + ("US", "female", "low"), + ("US", "female", "medium"), + ("US", "male", "high"), + ("US", "male", "medium"), + ] + + assert_categorical_single_grouper( + education_df=education_df, + as_index=as_index, + observed=True, + expected_index=expected_index, + normalize=normalize, + expected_data=expected_data, + ) + + +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + ( + False, + np.array( + [2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int64 + ), + ), + ( + True, + np.array( + [ + 0.5, + 0.25, + 0.25, + 0.0, + 0.0, + 0.0, + 0.5, + 0.5, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] + ), + ), + ], +) +def test_categorical_single_grouper_observed_false( + education_df, as_index, normalize, expected_data +): + # GH#46357 + + expected_index = [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("FR", "female", "low"), + ("FR", "male", "high"), + ("FR", "female", "medium"), + ("US", "female", "high"), + ("US", "male", "low"), + ("US", "male", "medium"), + ("US", "male", "high"), + ("US", "female", "medium"), + ("US", "female", "low"), + ("ASIA", "male", "low"), + ("ASIA", "male", "high"), + ("ASIA", "female", "medium"), + ("ASIA", "female", "low"), + ("ASIA", "female", "high"), + ("ASIA", "male", "medium"), + ] + + assert_categorical_single_grouper( + education_df=education_df, + as_index=as_index, + observed=False, + expected_index=expected_index, + normalize=normalize, + expected_data=expected_data, + ) + + +@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize( + "observed, expected_index", + [ + ( + False, + [ + ("FR", "high", "female"), + ("FR", "high", "male"), + ("FR", "low", "male"), + ("FR", "low", "female"), + ("FR", "medium", "male"), + ("FR", "medium", "female"), + ("US", "high", "female"), + ("US", "high", "male"), + ("US", "low", "male"), + ("US", "low", "female"), + ("US", "medium", "female"), + ("US", "medium", "male"), + ], + ), + ( + True, + [ + ("FR", "high", "female"), + ("FR", "low", "male"), + ("FR", "medium", "male"), + ("US", "high", "female"), + ("US", "low", "male"), + ], + ), + ], +) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + (False, np.array([1, 0, 2, 0, 1, 0, 1, 0, 1, 0, 0, 0], dtype=np.int64)), + ( + True, + # NaN values corresponds to non-observed groups + np.array([1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]), + ), + ], +) +def test_categorical_multiple_groupers( + education_df, as_index, observed, expected_index, normalize, expected_data +): + # GH#46357 + + # Test multiple categorical groupers when non-groupers are non-categorical + education_df = education_df.copy() + education_df["country"] = education_df["country"].astype("category") + education_df["education"] = education_df["education"].astype("category") + + gp = education_df.groupby( + ["country", "education"], as_index=as_index, observed=observed + ) + result = gp.value_counts(normalize=normalize) + + expected_series = Series( + data=expected_data[expected_data > 0.0] if observed else expected_data, + index=MultiIndex.from_tuples( + expected_index, + names=["country", "education", "gender"], + ), + ) + for i in range(2): + expected_series.index = expected_series.index.set_levels( + CategoricalIndex(expected_series.index.levels[i]), level=i + ) + + if as_index: + tm.assert_series_equal(result, expected_series) + else: + expected = expected_series.reset_index( + name="proportion" if normalize else "count" + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("observed", [False, True]) +@pytest.mark.parametrize( + "normalize, expected_data", + [ + (False, np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64)), + ( + True, + # NaN values corresponds to non-observed groups + np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]), + ), + ], +) +def test_categorical_non_groupers( + education_df, as_index, observed, normalize, expected_data +): + # GH#46357 Test non-observed categories are included in the result, + # regardless of `observed` + education_df = education_df.copy() + education_df["gender"] = education_df["gender"].astype("category") + education_df["education"] = education_df["education"].astype("category") + + gp = education_df.groupby("country", as_index=as_index, observed=observed) + result = gp.value_counts(normalize=normalize) + + expected_index = [ + ("FR", "male", "low"), + ("FR", "female", "high"), + ("FR", "male", "medium"), + ("FR", "female", "low"), + ("FR", "female", "medium"), + ("FR", "male", "high"), + ("US", "female", "high"), + ("US", "male", "low"), + ("US", "female", "low"), + ("US", "female", "medium"), + ("US", "male", "high"), + ("US", "male", "medium"), + ] + expected_series = Series( + data=expected_data, + index=MultiIndex.from_tuples( + expected_index, + names=["country", "gender", "education"], + ), + ) + for i in range(1, 3): + expected_series.index = expected_series.index.set_levels( + CategoricalIndex(expected_series.index.levels[i]), level=i + ) + + if as_index: + tm.assert_series_equal(result, expected_series) + else: + expected = expected_series.reset_index( + name="proportion" if normalize else "count" + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "normalize, expected_label, expected_values", + [ + (False, "count", [1, 1, 1]), + (True, "proportion", [0.5, 0.5, 1.0]), + ], +) +def test_mixed_groupings(normalize, expected_label, expected_values): + # Test multiple groupings + df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + gp = df.groupby([[4, 5, 4], "A", lambda i: 7 if i == 1 else 8], as_index=False) + result = gp.value_counts(sort=True, normalize=normalize) + expected = DataFrame( + { + "level_0": [4, 4, 5], + "A": [1, 1, 2], + "level_2": [8, 8, 7], + "B": [1, 3, 2], + expected_label: expected_values, + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "test, columns, expected_names", + [ + ("repeat", list("abbde"), ["a", None, "d", "b", "b", "e"]), + ("level", list("abcd") + ["level_1"], ["a", None, "d", "b", "c", "level_1"]), + ], +) +@pytest.mark.parametrize("as_index", [False, True]) +def test_column_label_duplicates(test, columns, expected_names, as_index): + # GH 44992 + # Test for duplicate input column labels and generated duplicate labels + df = DataFrame([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]], columns=columns) + expected_data = [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)] + result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + if as_index: + expected = Series( + data=(1, 1), + index=MultiIndex.from_tuples( + expected_data, + names=expected_names, + ), + ) + tm.assert_series_equal(result, expected) + else: + expected_data = [list(row) + [1] for row in expected_data] + expected_columns = list(expected_names) + expected_columns[1] = "level_1" + expected_columns.append("count") + expected = DataFrame(expected_data, columns=expected_columns) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "normalize, expected_label", + [ + (False, "count"), + (True, "proportion"), + ], +) +def test_result_label_duplicates(normalize, expected_label): + # Test for result column label duplicating an input column label + gb = DataFrame([[1, 2, 3]], columns=["a", "b", expected_label]).groupby( + "a", as_index=False + ) + msg = f"Column label '{expected_label}' is duplicate of result column" + with pytest.raises(ValueError, match=msg): + gb.value_counts(normalize=normalize) + + +def test_ambiguous_grouping(): + # Test that groupby is not confused by groupings length equal to row count + df = DataFrame({"a": [1, 1]}) + gb = df.groupby([1, 1]) + result = gb.value_counts() + expected = Series([2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"])) + tm.assert_series_equal(result, expected) + + +def test_subset_overlaps_gb_key_raises(): + # GH 46383 + df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) + msg = "Keys {'c1'} in subset cannot be in the groupby column keys." + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c1"]) + + +def test_subset_doesnt_exist_in_frame(): + # GH 46383 + df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) + msg = "Keys {'c3'} in subset do not exist in the DataFrame." + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c3"]) + + +def test_subset(): + # GH 46383 + df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) + result = df.groupby(level=0).value_counts(subset=["c2"]) + expected = Series( + [1, 2], index=MultiIndex.from_arrays([[0, 1], ["x", "y"]], names=[None, "c2"]) + ) + tm.assert_series_equal(result, expected) + + +def test_subset_duplicate_columns(): + # GH 46383 + df = DataFrame( + [["a", "x", "x"], ["b", "y", "y"], ["b", "y", "y"]], + index=[0, 1, 1], + columns=["c1", "c2", "c2"], + ) + result = df.groupby(level=0).value_counts(subset=["c2"]) + expected = Series( + [1, 2], + index=MultiIndex.from_arrays( + [[0, 1], ["x", "y"], ["x", "y"]], names=[None, "c2", "c2"] + ), + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py new file mode 100644 index 00000000..d813a284 --- /dev/null +++ b/pandas/tests/groupby/test_function.py @@ -0,0 +1,1603 @@ +import builtins +from io import StringIO + +import numpy as np +import pytest + +from pandas._libs import lib +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +import pandas.core.nanops as nanops +from pandas.tests.groupby import get_groupby_method_args +from pandas.util import _test_decorators as td + + +@pytest.fixture( + params=[np.int32, np.int64, np.float32, np.float64, "Int64", "Float64"], + ids=["np.int32", "np.int64", "np.float32", "np.float64", "Int64", "Float64"], +) +def dtypes_for_minmax(request): + """ + Fixture of dtypes with min and max values used for testing + cummin and cummax + """ + dtype = request.param + + np_type = dtype + if dtype == "Int64": + np_type = np.int64 + elif dtype == "Float64": + np_type = np.float64 + + min_val = ( + np.iinfo(np_type).min + if np.dtype(np_type).kind == "i" + else np.finfo(np_type).min + ) + max_val = ( + np.iinfo(np_type).max + if np.dtype(np_type).kind == "i" + else np.finfo(np_type).max + ) + + return (dtype, min_val, max_val) + + +def test_intercept_builtin_sum(): + s = Series([1.0, 2.0, np.nan, 3.0]) + grouped = s.groupby([0, 1, 2, 2]) + + result = grouped.agg(builtins.sum) + result2 = grouped.apply(builtins.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +@pytest.mark.parametrize("f", [max, min, sum]) +@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key +def test_builtins_apply(keys, f): + # see gh-8155 + df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) + df["jolie"] = np.random.randn(1000) + + gb = df.groupby(keys) + + fname = f.__name__ + result = gb.apply(f) + ngroups = len(df.drop_duplicates(subset=keys)) + + assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" + assert result.shape == (ngroups, 3), assert_msg + + npfunc = getattr(np, fname) # numpy's equivalent function + if f in [max, min]: + warn = FutureWarning + else: + warn = None + msg = "scalar (max|min) over the entire DataFrame" + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + # stacklevel can be thrown off because (i think) the stack + # goes through some of numpy's C code. + expected = gb.apply(npfunc) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + expected2 = gb.apply(lambda x: npfunc(x, axis=0)) + tm.assert_frame_equal(result, expected2) + + if f != sum: + expected = gb.agg(fname).reset_index() + expected.set_index(keys, inplace=True, drop=False) + tm.assert_frame_equal(result, expected, check_dtype=False) + + tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) + + +class TestNumericOnly: + # make sure that we are passing thru kwargs to our agg functions + + @pytest.fixture + def df(self): + # GH3668 + # GH5724 + df = DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": date_range("20130101", periods=3), + "datetimetz": date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + return df + + @pytest.mark.parametrize("method", ["mean", "median"]) + def test_averages(self, df, method): + # mean / median + expected_columns_numeric = Index(["int", "float", "category_int"]) + + gb = df.groupby("group") + expected = DataFrame( + { + "category_int": [7.5, 9], + "float": [4.5, 6.0], + "timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")], + "int": [1.5, 3], + "datetime": [ + Timestamp("2013-01-01 12:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + "datetimetz": [ + Timestamp("2013-01-01 12:00:00", tz="US/Eastern"), + Timestamp("2013-01-03 00:00:00", tz="US/Eastern"), + ], + }, + index=Index([1, 2], name="group"), + columns=[ + "int", + "float", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = getattr(gb, method)(numeric_only=False) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + expected_columns = expected.columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_extrema(self, df, method): + # TODO: min, max *should* handle + # categorical (ordered) dtype + + expected_columns = Index( + [ + "int", + "float", + "string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["first", "last"]) + def test_first_last(self, df, method): + + expected_columns = Index( + [ + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["sum", "cumsum"]) + def test_sum_cumsum(self, df, method): + + expected_columns_numeric = Index(["int", "float", "category_int"]) + expected_columns = Index( + ["int", "float", "string", "category_int", "timedelta"] + ) + if method == "cumsum": + # cumsum loses string + expected_columns = Index(["int", "float", "category_int", "timedelta"]) + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["prod", "cumprod"]) + def test_prod_cumprod(self, df, method): + + expected_columns = Index(["int", "float", "category_int"]) + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + @pytest.mark.parametrize("method", ["cummin", "cummax"]) + def test_cummin_cummax(self, df, method): + # like min, max, but don't include strings + expected_columns = Index( + ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"] + ) + + # GH#15561: numeric_only=False set by default like min/max + expected_columns_numeric = expected_columns + + self._check(df, method, expected_columns, expected_columns_numeric) + + def _check(self, df, method, expected_columns, expected_columns_numeric): + gb = df.groupby("group") + + # cummin, cummax dont have numeric_only kwarg, always use False + warn = None + if method in ["cummin", "cummax"]: + # these dont have numeric_only kwarg, always use False + warn = FutureWarning + elif method in ["min", "max"]: + # these have numeric_only kwarg, but default to False + warn = FutureWarning + + with tm.assert_produces_warning( + warn, match="Dropping invalid columns", raise_on_extra_warnings=False + ): + result = getattr(gb, method)() + + tm.assert_index_equal(result.columns, expected_columns_numeric) + + # GH#41475 deprecated silently ignoring nuisance columns + warn = None + if len(expected_columns) < len(gb._obj_with_exclusions.columns): + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = getattr(gb, method)(numeric_only=False) + + tm.assert_index_equal(result.columns, expected_columns) + + +class TestGroupByNonCythonPaths: + # GH#5610 non-cython calls should not include the grouper + # Tests for code not expected to go through cython paths. + + @pytest.fixture + def df(self): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + return df + + @pytest.fixture + def gb(self, df): + gb = df.groupby("A") + return gb + + @pytest.fixture + def gni(self, df): + gni = df.groupby("A", as_index=False) + return gni + + # TODO: non-unique columns, as_index=False + def test_idxmax(self, gb): + # object dtype so idxmax goes through _aggregate_item_by_item + # GH#5610 + # non-cython calls should not include the grouper + expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + msg = "The default value of numeric_only in DataFrameGroupBy.idxmax" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.idxmax() + tm.assert_frame_equal(result, expected) + + def test_idxmin(self, gb): + # object dtype so idxmax goes through _aggregate_item_by_item + # GH#5610 + # non-cython calls should not include the grouper + expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + msg = "The default value of numeric_only in DataFrameGroupBy.idxmin" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.idxmin() + tm.assert_frame_equal(result, expected) + + def test_mad(self, gb, gni): + # mad + expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = gb.mad() + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1, 0.0], [3, np.nan]], columns=["A", "B"], index=[0, 1]) + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = gni.mad() + tm.assert_frame_equal(result, expected) + + def test_describe(self, df, gb, gni): + # describe + expected_index = Index([1, 3], name="A") + expected_col = MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = gb.describe() + tm.assert_frame_equal(result, expected) + + expected = pd.concat( + [ + df[df.A == 1].describe().unstack().to_frame().T, + df[df.A == 3].describe().unstack().to_frame().T, + ] + ) + expected.index = Index([0, 1]) + result = gni.describe() + tm.assert_frame_equal(result, expected) + + +def test_cython_api2(): + + # this takes the fast apply path + + # cumsum (GH5614) + df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=["A", "B", "C"]) + expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=["B", "C"]) + result = df.groupby("A").cumsum() + tm.assert_frame_equal(result, expected) + + # GH 5755 - cumsum is a transformer and should ignore as_index + result = df.groupby("A", as_index=False).cumsum() + tm.assert_frame_equal(result, expected) + + # GH 13994 + result = df.groupby("A").cumsum(axis=1) + expected = df.cumsum(axis=1) + tm.assert_frame_equal(result, expected) + result = df.groupby("A").cumprod(axis=1) + expected = df.cumprod(axis=1) + tm.assert_frame_equal(result, expected) + + +def test_cython_median(): + df = DataFrame(np.random.randn(1000)) + df.values[::2] = np.nan + + labels = np.random.randint(0, 50, size=1000).astype(float) + labels[::17] = np.nan + + result = df.groupby(labels).median() + exp = df.groupby(labels).agg(nanops.nanmedian) + tm.assert_frame_equal(result, exp) + + df = DataFrame(np.random.randn(1000, 5)) + rs = df.groupby(labels).agg(np.median) + xp = df.groupby(labels).median() + tm.assert_frame_equal(rs, xp) + + +def test_median_empty_bins(observed): + df = DataFrame(np.random.randint(0, 44, 500)) + + grps = range(0, 55, 5) + bins = pd.cut(df[0], grps) + + result = df.groupby(bins, observed=observed).median() + expected = df.groupby(bins, observed=observed).agg(lambda x: x.median()) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"] +) +@pytest.mark.parametrize( + "method,data", + [ + ("first", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("last", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("min", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("max", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("nth", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}], "args": [1]}), + ("count", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 2}], "out_type": "int64"}), + ], +) +def test_groupby_non_arithmetic_agg_types(dtype, method, data): + # GH9311, GH6620 + df = DataFrame( + [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] + ) + + df["b"] = df.b.astype(dtype) + + if "args" not in data: + data["args"] = [] + + if "out_type" in data: + out_type = data["out_type"] + else: + out_type = dtype + + exp = data["df"] + df_out = DataFrame(exp) + + df_out["b"] = df_out.b.astype(out_type) + df_out.set_index("a", inplace=True) + + grpd = df.groupby("a") + t = getattr(grpd, method)(*data["args"]) + tm.assert_frame_equal(t, df_out) + + +@pytest.mark.parametrize( + "i", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +def test_groupby_non_arithmetic_agg_int_like_precision(i): + # see gh-6620, gh-9311 + df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) + + grp_exp = { + "first": {"expected": i[0]}, + "last": {"expected": i[1]}, + "min": {"expected": i[0]}, + "max": {"expected": i[1]}, + "nth": {"expected": i[1], "args": [1]}, + "count": {"expected": 2}, + } + + for method, data in grp_exp.items(): + if "args" not in data: + data["args"] = [] + + grouped = df.groupby("a") + res = getattr(grouped, method)(*data["args"]) + + assert res.iloc[0].b == data["expected"] + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", {"c_int": [0, 2], "c_float": [1, 3], "c_date": [1, 2]}), + ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}), + ], +) +@pytest.mark.parametrize("numeric_only", [True, False]) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): + # GH 25444 + df = DataFrame( + { + "name": ["A", "A", "B", "B"], + "c_int": [1, 2, 3, 4], + "c_float": [4.02, 3.03, 2.04, 1.05], + "c_date": ["2019", "2018", "2016", "2017"], + } + ) + df["c_date"] = pd.to_datetime(df["c_date"]) + df["c_date_tz"] = df["c_date"].dt.tz_localize("US/Pacific") + df["c_timedelta"] = df["c_date"] - df["c_date"].iloc[0] + df["c_period"] = df["c_date"].dt.to_period("W") + df["c_Integer"] = df["c_int"].astype("Int64") + df["c_Floating"] = df["c_float"].astype("Float64") + + result = getattr(df.groupby("name"), func)(numeric_only=numeric_only) + + expected = DataFrame(values, index=Index(["A", "B"], name="name")) + if numeric_only: + expected = expected.drop(columns=["c_date"]) + else: + expected["c_date_tz"] = expected["c_date"] + expected["c_timedelta"] = expected["c_date"] + expected["c_period"] = expected["c_date"] + expected["c_Integer"] = expected["c_int"] + expected["c_Floating"] = expected["c_float"] + + tm.assert_frame_equal(result, expected) + + +def test_idxmin_idxmax_axis1(): + df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] + + gb = df.groupby("A") + + res = gb.idxmax(axis=1) + + alt = df.iloc[:, 1:].idxmax(axis=1) + indexer = res.index.get_level_values(1) + + tm.assert_series_equal(alt[indexer], res.droplevel("A")) + + df["E"] = date_range("2016-01-01", periods=10) + gb2 = df.groupby("A") + + msg = "reduction operation 'argmax' not allowed for this dtype" + with pytest.raises(TypeError, match=msg): + gb2.idxmax(axis=1) + + +@pytest.mark.parametrize("numeric_only", [True, False, None]) +def test_axis1_numeric_only(request, groupby_func, numeric_only): + if groupby_func in ("idxmax", "idxmin"): + pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1") + if groupby_func in ("mad", "tshift"): + pytest.skip("mad and tshift are deprecated") + if groupby_func in ("corrwith", "skew"): + msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + + df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df["E"] = "x" + groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] + gb = df.groupby(groups) + method = getattr(gb, groupby_func) + args = get_groupby_method_args(groupby_func, df) + kwargs = {"axis": 1} + if numeric_only is not None: + # when numeric_only is None we don't pass any argument + kwargs["numeric_only"] = numeric_only + + # Functions without numeric_only and axis args + no_args = ("cumprod", "cumsum", "diff", "fillna", "pct_change", "rank", "shift") + # Functions with axis args + has_axis = ( + "cumprod", + "cumsum", + "diff", + "pct_change", + "rank", + "shift", + "cummax", + "cummin", + "idxmin", + "idxmax", + "fillna", + ) + if numeric_only is not None and groupby_func in no_args: + msg = "got an unexpected keyword argument 'numeric_only'" + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + elif groupby_func not in has_axis: + msg = "got an unexpected keyword argument 'axis'" + warn = FutureWarning if groupby_func == "skew" and not numeric_only else None + with tm.assert_produces_warning(warn, match="Dropping of nuisance columns"): + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + # fillna and shift are successful even on object dtypes + elif (numeric_only is None or not numeric_only) and groupby_func not in ( + "fillna", + "shift", + ): + msgs = ( + # cummax, cummin, rank + "not supported between instances of", + # cumprod + "can't multiply sequence by non-int of type 'float'", + # cumsum, diff, pct_change + "unsupported operand type", + ) + with pytest.raises(TypeError, match=f"({'|'.join(msgs)})"): + method(*args, **kwargs) + else: + result = method(*args, **kwargs) + + df_expected = df.drop(columns="E").T if numeric_only else df.T + expected = getattr(df_expected, groupby_func)(*args).T + if groupby_func == "shift" and not numeric_only: + # shift with axis=1 leaves the leftmost column as numeric + # but transposing for expected gives us object dtype + expected = expected.astype(float) + + tm.assert_equal(result, expected) + + +def test_groupby_cumprod(): + # GH 4095 + df = DataFrame({"key": ["b"] * 10, "value": 2}) + + actual = df.groupby("key")["value"].cumprod() + expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + df = DataFrame({"key": ["b"] * 100, "value": 2}) + actual = df.groupby("key")["value"].cumprod() + # if overflows, groupby product casts to float + # while numpy passes back invalid values + df["value"] = df["value"].astype(float) + expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + +def test_groupby_cumprod_nan_influences_other_columns(): + # GH#48064 + df = DataFrame( + { + "a": 1, + "b": [1, np.nan, 2], + "c": [1, 2, 3.0], + } + ) + result = df.groupby("a").cumprod(numeric_only=True, skipna=False) + expected = DataFrame({"b": [1, np.nan, np.nan], "c": [1, 2, 6.0]}) + tm.assert_frame_equal(result, expected) + + +def scipy_sem(*args, **kwargs): + from scipy.stats import sem + + return sem(*args, ddof=1, **kwargs) + + +@pytest.mark.parametrize( + "op,targop", + [ + ("mean", np.mean), + ("median", np.median), + ("std", np.std), + ("var", np.var), + ("sum", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ("count", np.size), + pytest.param("sem", scipy_sem, marks=td.skip_if_no_scipy), + ], +) +def test_ops_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = getattr(df.groupby(labels), op)() + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +def test_max_nan_bug(): + raw = """,Date,app,File +-04-23,2013-04-23 00:00:00,,log080001.log +-05-06,2013-05-06 00:00:00,,log.log +-05-07,2013-05-07 00:00:00,OE,xlsx""" + + df = pd.read_csv(StringIO(raw), parse_dates=[0]) + gb = df.groupby("Date") + r = gb[["File"]].max() + e = gb["File"].max().to_frame() + tm.assert_frame_equal(r, e) + assert not r["File"].isna().any() + + +def test_nlargest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nlargest(3) + e = Series( + [7, 5, 3, 10, 9, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [3, 2, 1, 9, 5, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [3, 2, 1, 3, 3, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [2, 3, 1, 6, 5, 7]]), + ) + tm.assert_series_equal(gb.nlargest(3, keep="last"), e) + + +def test_nlargest_mi_grouper(): + # see gh-21411 + npr = np.random.RandomState(123456789) + + dts = date_range("20180101", periods=10) + iterables = [dts, ["one", "two"]] + + idx = MultiIndex.from_product(iterables, names=["first", "second"]) + s = Series(npr.randn(20), index=idx) + + result = s.groupby("first").nlargest(1) + + exp_idx = MultiIndex.from_tuples( + [ + (dts[0], dts[0], "one"), + (dts[1], dts[1], "one"), + (dts[2], dts[2], "one"), + (dts[3], dts[3], "two"), + (dts[4], dts[4], "one"), + (dts[5], dts[5], "one"), + (dts[6], dts[6], "one"), + (dts[7], dts[7], "one"), + (dts[8], dts[8], "two"), + (dts[9], dts[9], "one"), + ], + names=["first", "first", "second"], + ) + + exp_values = [ + 2.2129019979039612, + 1.8417114045748335, + 0.858963679564603, + 1.3759151378258088, + 0.9430284594687134, + 0.5296914208183142, + 0.8318045593815487, + -0.8476703342910327, + 0.3804446884133735, + -0.8028845810770998, + ] + + expected = Series(exp_values, index=exp_idx) + tm.assert_series_equal(result, expected, check_exact=False, rtol=1e-3) + + +def test_nsmallest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nsmallest(3) + e = Series( + [1, 2, 3, 0, 4, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [0, 4, 1, 6, 7, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [0, 1, 1, 0, 1, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [4, 1, 0, 9, 8, 7]]), + ) + tm.assert_series_equal(gb.nsmallest(3, keep="last"), e) + + +@pytest.mark.parametrize( + "data, groups", + [([0, 1, 2, 3], [0, 0, 1, 1]), ([0], [0])], +) +@pytest.mark.parametrize("method", ["nlargest", "nsmallest"]) +def test_nlargest_and_smallest_noop(data, groups, method): + # GH 15272, GH 16345, GH 29129 + # Test nlargest/smallest when it results in a noop, + # i.e. input is sorted and group size <= n + if method == "nlargest": + data = list(reversed(data)) + ser = Series(data, name="a") + result = getattr(ser.groupby(groups), method)(n=2) + expected = Series(data, index=MultiIndex.from_arrays([groups, ser.index]), name="a") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["cumprod", "cumsum"]) +def test_numpy_compat(func): + # see gh-12811 + df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + g = df.groupby("A") + + msg = "numpy operations are not valid with groupby" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(foo=1) + + +def test_cummin(dtypes_for_minmax): + dtype = dtypes_for_minmax[0] + min_val = dtypes_for_minmax[1] + + # GH 15048 + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) + expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] + + df = base_df.astype(dtype) + + expected = DataFrame({"B": expected_mins}).astype(dtype) + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test w/ min value for dtype + df.loc[[2, 6], "B"] = min_val + df.loc[[1, 5], "B"] = min_val + 1 + expected.loc[[2, 3, 6, 7], "B"] = min_val + expected.loc[[1, 5], "B"] = min_val + 1 # should not be rounded to min_val + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected, check_exact=True) + expected = ( + df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame() + ) + tm.assert_frame_equal(result, expected, check_exact=True) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = ( + base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame() + ) + tm.assert_frame_equal(result, expected) + + # GH 15561 + df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])}) + expected = Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummin() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = DataFrame({"a": [1, 2, 1], "b": [1, 2, 2]}) + result = df.groupby("a").b.cummin() + expected = Series([1, 2, 1], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize("dtype", ["UInt64", "Int64", "Float64", "float", "boolean"]) +def test_cummin_max_all_nan_column(method, dtype): + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + base_df["B"] = base_df["B"].astype(dtype) + grouped = base_df.groupby("A") + + expected = DataFrame({"B": [np.nan] * 8}, dtype=dtype) + result = getattr(grouped, method)() + tm.assert_frame_equal(expected, result) + + result = getattr(grouped["B"], method)().to_frame() + tm.assert_frame_equal(expected, result) + + +def test_cummax(dtypes_for_minmax): + dtype = dtypes_for_minmax[0] + max_val = dtypes_for_minmax[2] + + # GH 15048 + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}) + expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] + + df = base_df.astype(dtype) + + expected = DataFrame({"B": expected_maxs}).astype(dtype) + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test w/ max value for dtype + df.loc[[2, 6], "B"] = max_val + expected.loc[[2, 3, 6, 7], "B"] = max_val + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = ( + df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame() + ) + tm.assert_frame_equal(result, expected) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = ( + base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame() + ) + tm.assert_frame_equal(result, expected) + + # GH 15561 + df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])}) + expected = Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummax() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = DataFrame({"a": [1, 2, 1], "b": [2, 1, 1]}) + result = df.groupby("a").b.cummax() + expected = Series([2, 1, 2], name="b") + tm.assert_series_equal(result, expected) + + +def test_cummax_i8_at_implementation_bound(): + # the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT + # for int64 dtype GH#46382 + ser = Series([pd.NaT.value + n for n in range(5)]) + df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")}) + gb = df.groupby("A") + + res = gb.cummax() + exp = df[["B", "C"]] + tm.assert_frame_equal(res, exp) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"]) +@pytest.mark.parametrize( + "groups,expected_data", + [ + ([1, 1, 1], [1, None, None]), + ([1, 2, 3], [1, None, 2]), + ([1, 3, 3], [1, None, None]), + ], +) +def test_cummin_max_skipna(method, dtype, groups, expected_data): + # GH-34047 + df = DataFrame({"a": Series([1, None, 2], dtype=dtype)}) + orig = df.copy() + gb = df.groupby(groups)["a"] + + result = getattr(gb, method)(skipna=False) + expected = Series(expected_data, dtype=dtype, name="a") + + # check we didn't accidentally alter df + tm.assert_frame_equal(df, orig) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +def test_cummin_max_skipna_multiple_cols(method): + # Ensure missing value in "a" doesn't cause "b" to be nan-filled + df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]}) + gb = df.groupby([1, 1, 1])[["a", "b"]] + + result = getattr(gb, method)(skipna=False) + expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]}) + + tm.assert_frame_equal(result, expected) + + +@td.skip_if_32bit +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize( + "dtype,val", [("UInt64", np.iinfo("uint64").max), ("Int64", 2**53 + 1)] +) +def test_nullable_int_not_cast_as_float(method, dtype, val): + data = [val, pd.NA] + df = DataFrame({"grp": [1, 1], "b": data}, dtype=dtype) + grouped = df.groupby("grp") + + result = grouped.transform(method) + expected = DataFrame({"b": data}, dtype=dtype) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly increasing (T), strictly decreasing (F), + # abs val increasing (F), non-strictly increasing (T) + ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf], + [True, False, True, False], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_increasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + df = DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_increasing + index = Index(list("abcd"), name="B") + expected = Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + # Also check result equal to manually taking x.is_monotonic_increasing. + expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly decreasing (T), strictly increasing (F), + # abs val decreasing (F), non-strictly increasing (T) + ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf], + [True, True, False, True], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_decreasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + + df = DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_decreasing + index = Index(list("abcd"), name="B") + expected = Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + +# describe +# -------------------------------- + + +def test_apply_describe_bug(mframe): + grouped = mframe.groupby(level="first") + grouped.describe() # it works! + + +def test_series_describe_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False) + tm.assert_series_equal(result["std"], grouped.std(), check_names=False) + tm.assert_series_equal(result["min"], grouped.min(), check_names=False) + + +def test_series_describe_single(): + ts = tm.makeTimeSeries() + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x.describe()) + expected = grouped.describe().stack() + tm.assert_series_equal(result, expected) + + +def test_series_index_name(df): + grouped = df.loc[:, ["C"]].groupby(df["A"]) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == "A" + + +def test_frame_describe_multikey(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + desc_groups = [] + for col in tsframe: + group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels + group_col = MultiIndex( + levels=[[col], group.columns], + codes=[[0] * len(group.columns), range(len(group.columns))], + ) + group = DataFrame(group.values, columns=group_col, index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) + + groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = groupedT.describe() + expected = tsframe.describe().T + # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/ + expected.index = MultiIndex( + levels=[[0, 1], expected.index], + codes=[[0, 0, 1, 1], range(len(expected.index))], + ) + tm.assert_frame_equal(result, expected) + + +def test_frame_describe_tupleindex(): + + # GH 14848 - regression from 0.19.0 to 0.19.1 + df1 = DataFrame( + { + "x": [1, 2, 3, 4, 5] * 3, + "y": [10, 20, 30, 40, 50] * 3, + "z": [100, 200, 300, 400, 500] * 3, + } + ) + df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={"k": "key"}) + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + df1.groupby("k").describe() + with pytest.raises(ValueError, match=msg): + df2.groupby("key").describe() + + +def test_frame_describe_unstacked_format(): + # GH 4792 + prices = { + Timestamp("2011-01-06 10:59:05", tz=None): 24990, + Timestamp("2011-01-06 12:43:33", tz=None): 25499, + Timestamp("2011-01-06 12:54:09", tz=None): 25499, + } + volumes = { + Timestamp("2011-01-06 10:59:05", tz=None): 1500000000, + Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, + Timestamp("2011-01-06 12:54:09", tz=None): 100000000, + } + df = DataFrame({"PRICE": prices, "VOLUME": volumes}) + result = df.groupby("PRICE").VOLUME.describe() + data = [ + df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist(), + ] + expected = DataFrame( + data, + index=Index([24990, 25499], name="PRICE"), + columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.filterwarnings( + "ignore:" + "indexing past lexsort depth may impact performance:" + "pandas.errors.PerformanceWarning" +) +@pytest.mark.parametrize("as_index", [True, False]) +def test_describe_with_duplicate_output_column_names(as_index): + # GH 35314 + df = DataFrame( + { + "a": [99, 99, 99, 88, 88, 88], + "b": [1, 2, 3, 4, 5, 6], + "c": [10, 20, 30, 40, 50, 60], + }, + columns=["a", "b", "b"], + copy=False, + ) + + expected = ( + DataFrame.from_records( + [ + ("a", "count", 3.0, 3.0), + ("a", "mean", 88.0, 99.0), + ("a", "std", 0.0, 0.0), + ("a", "min", 88.0, 99.0), + ("a", "25%", 88.0, 99.0), + ("a", "50%", 88.0, 99.0), + ("a", "75%", 88.0, 99.0), + ("a", "max", 88.0, 99.0), + ("b", "count", 3.0, 3.0), + ("b", "mean", 5.0, 2.0), + ("b", "std", 1.0, 1.0), + ("b", "min", 4.0, 1.0), + ("b", "25%", 4.5, 1.5), + ("b", "50%", 5.0, 2.0), + ("b", "75%", 5.5, 2.5), + ("b", "max", 6.0, 3.0), + ("b", "count", 3.0, 3.0), + ("b", "mean", 5.0, 2.0), + ("b", "std", 1.0, 1.0), + ("b", "min", 4.0, 1.0), + ("b", "25%", 4.5, 1.5), + ("b", "50%", 5.0, 2.0), + ("b", "75%", 5.5, 2.5), + ("b", "max", 6.0, 3.0), + ], + ) + .set_index([0, 1]) + .T + ) + expected.columns.names = [None, None] + expected.index = Index([88, 99], name="a") + + if as_index: + expected = expected.drop(columns=["a"], level=0) + else: + expected = expected.reset_index(drop=True) + + result = df.groupby("a", as_index=as_index).describe() + + tm.assert_frame_equal(result, expected) + + +def test_groupby_mean_no_overflow(): + # Regression test for (#22487) + df = DataFrame( + { + "user": ["A", "A", "A", "A", "A"], + "connections": [4970, 4749, 4719, 4704, 18446744073699999744], + } + ) + assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = Index([1, 2, 3], name="a", dtype="Int64") + expected = DataFrame({"b": arr}, index=idx).astype("Float64") + + groups = DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) + + +def test_groupby_sum_below_mincount_nullable_integer(): + # https://github.com/pandas-dev/pandas/issues/32861 + df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64") + grouped = df.groupby("a") + idx = Index([0, 1, 2], name="a", dtype="Int64") + + result = grouped["b"].sum(min_count=2) + expected = Series([pd.NA] * 3, dtype="Int64", index=idx, name="b") + tm.assert_series_equal(result, expected) + + result = grouped.sum(min_count=2) + expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx) + tm.assert_frame_equal(result, expected) + + +def test_mean_on_timedelta(): + # GH 17382 + df = DataFrame({"time": pd.to_timedelta(range(10)), "cat": ["A", "B"] * 5}) + result = df.groupby("cat")["time"].mean() + expected = Series( + pd.to_timedelta([4, 5]), name="time", index=Index(["A", "B"], name="cat") + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_sum_timedelta_with_nat(): + # GH#42659 + df = DataFrame( + { + "a": [1, 1, 2, 2], + "b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT], + } + ) + td3 = pd.Timedelta(days=3) + + gb = df.groupby("a") + + res = gb.sum() + expected = DataFrame({"b": [td3, td3]}, index=Index([1, 2], name="a")) + tm.assert_frame_equal(res, expected) + + res = gb["b"].sum() + tm.assert_series_equal(res, expected["b"]) + + res = gb["b"].sum(min_count=2) + expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index) + tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize( + "kernel, numeric_only_default, drops_nuisance, has_arg", + [ + ("all", False, False, False), + ("any", False, False, False), + ("bfill", False, False, False), + ("corr", True, False, True), + ("corrwith", True, False, True), + ("cov", True, False, True), + ("cummax", False, True, True), + ("cummin", False, True, True), + ("cumprod", True, True, True), + ("cumsum", True, True, True), + ("diff", False, False, False), + ("ffill", False, False, False), + ("fillna", False, False, False), + ("first", False, False, True), + ("idxmax", True, False, True), + ("idxmin", True, False, True), + ("last", False, False, True), + ("max", False, True, True), + ("mean", True, True, True), + ("median", True, True, True), + ("min", False, True, True), + ("nth", False, False, False), + ("nunique", False, False, False), + ("pct_change", False, False, False), + ("prod", True, True, True), + ("quantile", True, False, True), + ("sem", True, True, True), + ("skew", True, False, True), + ("std", True, True, True), + ("sum", True, True, True), + ("var", True, False, True), + ], +) +@pytest.mark.parametrize("numeric_only", [True, False, lib.no_default]) +@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) +def test_deprecate_numeric_only( + kernel, numeric_only_default, drops_nuisance, has_arg, numeric_only, keys +): + # GH#46072 + # drops_nuisance: Whether the op drops nuisance columns even when numeric_only=False + # has_arg: Whether the op has a numeric_only arg + df = DataFrame({"a1": [1, 1], "a2": [2, 2], "a3": [5, 6], "b": 2 * [object]}) + + args = get_groupby_method_args(kernel, df) + kwargs = {} if numeric_only is lib.no_default else {"numeric_only": numeric_only} + + gb = df.groupby(keys) + method = getattr(gb, kernel) + if has_arg and ( + # Cases where b does not appear in the result + numeric_only is True + or (numeric_only is lib.no_default and numeric_only_default) + or drops_nuisance + ): + if numeric_only is True or (not numeric_only_default and not drops_nuisance): + warn = None + else: + warn = FutureWarning + if numeric_only is lib.no_default and numeric_only_default: + msg = f"The default value of numeric_only in DataFrameGroupBy.{kernel}" + else: + msg = f"Dropping invalid columns in DataFrameGroupBy.{kernel}" + with tm.assert_produces_warning(warn, match=msg): + result = method(*args, **kwargs) + + assert "b" not in result.columns + elif ( + # kernels that work on any dtype and have numeric_only arg + kernel in ("first", "last") + or ( + # kernels that work on any dtype and don't have numeric_only arg + kernel in ("any", "all", "bfill", "ffill", "fillna", "nth", "nunique") + and numeric_only is lib.no_default + ) + ): + result = method(*args, **kwargs) + assert "b" in result.columns + elif has_arg: + assert numeric_only is not True + assert numeric_only is not lib.no_default or numeric_only_default is False + assert not drops_nuisance + # kernels that are successful on any dtype were above; this will fail + msg = ( + "(not allowed for this dtype" + "|must be a string or a number" + "|cannot be performed against 'object' dtypes" + "|must be a string or a real number" + "|unsupported operand type)" + ) + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + elif not has_arg and numeric_only is not lib.no_default: + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'numeric_only'" + ): + method(*args, **kwargs) + else: + assert kernel in ("diff", "pct_change") + assert numeric_only is lib.no_default + # Doesn't have numeric_only argument and fails on nuisance columns + with pytest.raises(TypeError, match=r"unsupported operand type"): + method(*args, **kwargs) + + +@pytest.mark.parametrize("dtype", [bool, int, float, object]) +def test_deprecate_numeric_only_series(dtype, groupby_func, request): + # GH#46560 + if groupby_func in ("backfill", "mad", "pad", "tshift"): + pytest.skip("method is deprecated") + elif groupby_func == "corrwith": + msg = "corrwith is not implemented on SeriesGroupBy" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + + grouper = [0, 0, 1] + + ser = Series([1, 0, 0], dtype=dtype) + gb = ser.groupby(grouper) + method = getattr(gb, groupby_func) + + expected_ser = Series([1, 0, 0]) + expected_gb = expected_ser.groupby(grouper) + expected_method = getattr(expected_gb, groupby_func) + + args = get_groupby_method_args(groupby_func, ser) + + fails_on_numeric_object = ( + "corr", + "cov", + "cummax", + "cummin", + "cumprod", + "cumsum", + "idxmax", + "idxmin", + "quantile", + ) + # ops that give an object result on object input + obj_result = ( + "first", + "last", + "nth", + "bfill", + "ffill", + "shift", + "sum", + "diff", + "pct_change", + ) + + # Test default behavior; kernels that fail may be enabled in the future but kernels + # that succeed should not be allowed to fail (without deprecation, at least) + if groupby_func in fails_on_numeric_object and dtype is object: + if groupby_func in ("idxmax", "idxmin"): + msg = "not allowed for this dtype" + elif groupby_func == "quantile": + msg = "cannot be performed against 'object' dtypes" + else: + msg = "is not supported for object dtype" + with pytest.raises(TypeError, match=msg): + method(*args) + elif dtype is object: + result = method(*args) + expected = expected_method(*args) + if groupby_func in obj_result: + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + + has_numeric_only = ( + "first", + "last", + "max", + "mean", + "median", + "min", + "prod", + "quantile", + "sem", + "skew", + "std", + "sum", + "var", + "cummax", + "cummin", + "cumprod", + "cumsum", + ) + if groupby_func not in has_numeric_only: + msg = "got an unexpected keyword argument 'numeric_only'" + with pytest.raises(TypeError, match=msg): + method(*args, numeric_only=True) + elif dtype is object: + err_category = NotImplementedError + err_msg = f"{groupby_func} does not implement numeric_only" + if groupby_func.startswith("cum"): + # cum ops already exhibit future behavior + warn_category = None + warn_msg = "" + err_category = TypeError + err_msg = f"{groupby_func} is not supported for object dtype" + elif groupby_func == "skew": + warn_category = FutureWarning + warn_msg = "will raise a TypeError in the future" + else: + warn_category = FutureWarning + warn_msg = "This will raise a TypeError" + + with tm.assert_produces_warning(warn_category, match=warn_msg): + with pytest.raises(err_category, match=err_msg): + method(*args, numeric_only=True) + else: + result = method(*args, numeric_only=True) + expected = method(*args, numeric_only=False) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [int, float, object]) +@pytest.mark.parametrize( + "kwargs", + [ + {"percentiles": [0.10, 0.20, 0.30], "include": "all", "exclude": None}, + {"percentiles": [0.10, 0.20, 0.30], "include": None, "exclude": ["int"]}, + {"percentiles": [0.10, 0.20, 0.30], "include": ["int"], "exclude": None}, + ], +) +def test_groupby_empty_dataset(dtype, kwargs): + # GH#41575 + df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"], dtype=dtype) + df["B"] = df["B"].astype(int) + df["C"] = df["C"].astype(float) + + result = df.iloc[:0].groupby("A").describe(**kwargs) + expected = df.groupby("A").describe(**kwargs).reset_index(drop=True).iloc[:0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].groupby("A").B.describe(**kwargs) + expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0] + expected.index = Index([]) + tm.assert_frame_equal(result, expected) + + +def test_corrwith_with_1_axis(): + # GH 47723 + df = DataFrame({"a": [1, 1, 2], "b": [3, 7, 4]}) + result = df.groupby("a").corrwith(df, axis=1) + index = Index( + data=[(1, 0), (1, 1), (1, 2), (2, 2), (2, 0), (2, 1)], + name=("a", None), + ) + expected = Series([np.nan] * 6, index=index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning") +def test_multiindex_group_all_columns_when_empty(groupby_func): + # GH 32464 + df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) + method = getattr(gb, groupby_func) + args = get_groupby_method_args(groupby_func, df) + + result = method(*args).index + expected = df.index + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py new file mode 100644 index 00000000..f6bbb9ed --- /dev/null +++ b/pandas/tests/groupby/test_groupby.py @@ -0,0 +1,2923 @@ +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs import lib +from pandas.compat import IS64 +from pandas.errors import ( + PerformanceWarning, + SpecificationError, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Grouper, + Index, + MultiIndex, + RangeIndex, + Series, + Timedelta, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import BooleanArray +import pandas.core.common as com +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.tests.groupby import get_groupby_method_args + + +def test_repr(): + # GH18203 + result = repr(Grouper(key="A", level="B")) + expected = "Grouper(key='A', level='B', axis=0, sort=False, dropna=True)" + assert result == expected + + +@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) +def test_basic(dtype): + + data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3, group_keys=False) + + for k, v in grouped: + assert len(v) == 3 + + agged = grouped.aggregate(np.mean) + assert agged[1] == 1 + + tm.assert_series_equal(agged, grouped.agg(np.mean)) # shorthand + tm.assert_series_equal(agged, grouped.mean()) + tm.assert_series_equal(grouped.agg(np.sum), grouped.sum()) + + expected = grouped.apply(lambda x: x * x.sum()) + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + tm.assert_series_equal(transformed, expected) + + value_grouped = data.groupby(data) + tm.assert_series_equal( + value_grouped.aggregate(np.mean), agged, check_index_type=False + ) + + # complex agg + agged = grouped.aggregate([np.mean, np.std]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate({"one": np.mean, "two": np.std}) + + group_constants = {0: 10, 1: 20, 2: 30} + agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) + assert agged[1] == 21 + + # corner cases + msg = "Must produce aggregated value" + # exception raised is type Exception + with pytest.raises(Exception, match=msg): + grouped.aggregate(lambda x: x * 2) + + +def test_groupby_nonobject_dtype(mframe, df_mixed_floats): + key = mframe.index.codes[0] + grouped = mframe.groupby(key) + result = grouped.sum() + + expected = mframe.groupby(key.astype("O")).sum() + tm.assert_frame_equal(result, expected) + + # GH 3911, mixed frame non-conversion + df = df_mixed_floats.copy() + df["value"] = range(len(df)) + + def max_value(group): + return group.loc[group["value"].idxmax()] + + applied = df.groupby("A").apply(max_value) + result = applied.dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) + + +def test_groupby_return_type(): + + # GH2893, return a reduced type + df1 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 2, "val2": 27}, + {"val1": 2, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + with tm.assert_produces_warning(FutureWarning): + result = df1.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + df2 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + with tm.assert_produces_warning(FutureWarning): + result = df2.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + # GH3596, return a consistent type (regression in 0.11 from 0.10.1) + df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"]) + with tm.assert_produces_warning(FutureWarning): + result = df.groupby("X", squeeze=False).count() + assert isinstance(result, DataFrame) + + +def test_inconsistent_return_type(): + # GH5592 + # inconsistent return type + df = DataFrame( + { + "A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"], + "B": Series(np.arange(7), dtype="int64"), + "C": date_range("20130101", periods=7), + } + ) + + def f(grp): + return grp.iloc[0] + + expected = df.groupby("A").first()[["B"]] + result = df.groupby("A").apply(f)[["B"]] + tm.assert_frame_equal(result, expected) + + def f(grp): + if grp.name == "Tiger": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Tiger"] = np.nan + tm.assert_frame_equal(result, e) + + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Pony"] = np.nan + tm.assert_frame_equal(result, e) + + # 5592 revisited, with datetimes + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["C"]] + e = df.groupby("A").first()[["C"]] + e.loc["Pony"] = pd.NaT + tm.assert_frame_equal(result, e) + + # scalar outputs + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0].loc["C"] + + result = df.groupby("A").apply(f) + e = df.groupby("A").first()["C"].copy() + e.loc["Pony"] = np.nan + e.name = None + tm.assert_series_equal(result, e) + + +def test_pass_args_kwargs(ts, tsframe): + def f(x, q=None, axis=0): + return np.percentile(x, q, axis=axis) + + g = lambda x: np.percentile(x, 80, axis=0) + + # Series + ts_grouped = ts.groupby(lambda x: x.month) + agg_result = ts_grouped.agg(np.percentile, 80, axis=0) + apply_result = ts_grouped.apply(np.percentile, 80, axis=0) + trans_result = ts_grouped.transform(np.percentile, 80, axis=0) + + agg_expected = ts_grouped.quantile(0.8) + trans_expected = ts_grouped.transform(g) + + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + agg_result = ts_grouped.agg(f, q=80) + apply_result = ts_grouped.apply(f, q=80) + trans_result = ts_grouped.transform(f, q=80) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + # DataFrame + for as_index in [True, False]: + df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + expected = df_grouped.quantile(0.8) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + + apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) + expected_seq = df_grouped.quantile([0.4, 0.8]) + tm.assert_frame_equal(apply_result, expected_seq, check_names=False) + + agg_result = df_grouped.agg(f, q=80) + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) + + +@pytest.mark.parametrize("as_index", [True, False]) +def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): + # go through _aggregate_frame with self.axis == 0 and duplicate columns + tsframe.columns = ["A", "B", "A", "C"] + gb = tsframe.groupby(lambda x: x.month, as_index=as_index) + + res = gb.agg(np.percentile, 80, axis=0) + + ex_data = { + 1: tsframe[tsframe.index.month == 1].quantile(0.8), + 2: tsframe[tsframe.index.month == 2].quantile(0.8), + } + expected = DataFrame(ex_data).T + if not as_index: + # TODO: try to get this more consistent? + expected.index = Index(range(2)) + + tm.assert_frame_equal(res, expected) + + +def test_len(): + df = tm.makeTimeDataFrame() + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) + assert len(grouped) == len(df) + + grouped = df.groupby([lambda x: x.year, lambda x: x.month]) + expected = len({(x.year, x.month) for x in df.index}) + assert len(grouped) == expected + + # issue 11016 + df = DataFrame({"a": [np.nan] * 3, "b": [1, 2, 3]}) + assert len(df.groupby("a")) == 0 + assert len(df.groupby("b")) == 3 + assert len(df.groupby(["a", "b"])) == 3 + + +def test_basic_regression(): + # regression + result = Series([1.0 * x for x in list(range(1, 10)) * 10]) + + data = np.random.random(1100) * 10.0 + groupings = Series(data) + + grouped = result.groupby(groupings) + grouped.mean() + + +@pytest.mark.parametrize( + "dtype", ["float64", "float32", "int64", "int32", "int16", "int8"] +) +def test_with_na_groups(dtype): + index = Index(np.arange(10)) + values = Series(np.ones(10), index, dtype=dtype) + labels = Series( + [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"], + index=index, + ) + + # this SHOULD be an int + grouped = values.groupby(labels) + agged = grouped.agg(len) + expected = Series([4, 2], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected, check_dtype=False) + + # assert issubclass(agged.dtype.type, np.integer) + + # explicitly return a float from my function + def f(x): + return float(len(x)) + + agged = grouped.agg(f) + expected = Series([4.0, 2.0], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected) + + +def test_indices_concatenation_order(): + + # GH 2808 + + def f1(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=["b", "c"]) + res = DataFrame(columns=["a"], index=multiindex) + return res + else: + y = y.set_index(["b", "c"]) + return y + + def f2(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + return DataFrame() + else: + y = y.set_index(["b", "c"]) + return y + + def f3(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex( + levels=[[]] * 2, codes=[[]] * 2, names=["foo", "bar"] + ) + res = DataFrame(columns=["a", "b"], index=multiindex) + return res + else: + return y + + df = DataFrame({"a": [1, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + # correct result + result1 = df.groupby("a").apply(f1) + result2 = df2.groupby("a").apply(f1) + tm.assert_frame_equal(result1, result2) + + # should fail (not the same number of levels) + msg = "Cannot concat indices that do not have the same number of levels" + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f2) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f2) + + # should fail (incorrect shape) + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f3) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f3) + + +def test_attr_wrapper(ts): + grouped = ts.groupby(lambda x: x.weekday()) + + result = grouped.std() + expected = grouped.agg(lambda x: np.std(x, ddof=1)) + tm.assert_series_equal(result, expected) + + # this is pretty cool + result = grouped.describe() + expected = {name: gp.describe() for name, gp in grouped} + expected = DataFrame(expected).T + tm.assert_frame_equal(result, expected) + + # get attribute + result = grouped.dtype + expected = grouped.agg(lambda x: x.dtype) + tm.assert_series_equal(result, expected) + + # make sure raises error + msg = "'SeriesGroupBy' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + getattr(grouped, "foo") + + +def test_frame_groupby(tsframe): + grouped = tsframe.groupby(lambda x: x.weekday()) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == 5 + assert len(aggregated.columns) == 4 + + # by string + tscopy = tsframe.copy() + tscopy["weekday"] = [x.weekday() for x in tscopy.index] + stragged = tscopy.groupby("weekday").aggregate(np.mean) + tm.assert_frame_equal(stragged, aggregated, check_names=False) + + # transform + grouped = tsframe.head(30).groupby(lambda x: x.weekday()) + transformed = grouped.transform(lambda x: x - x.mean()) + assert len(transformed) == 30 + assert len(transformed.columns) == 4 + + # transform propagate + transformed = grouped.transform(lambda x: x.mean()) + for name, group in grouped: + mean = group.mean() + for idx in group.index: + tm.assert_series_equal(transformed.xs(idx), mean, check_names=False) + + # iterate + for weekday, group in grouped: + assert group.index[0].weekday() == weekday + + # groups / group_indices + groups = grouped.groups + indices = grouped.indices + + for k, v in groups.items(): + samething = tsframe.index.take(indices[k]) + assert (samething == v).all() + + +def test_frame_groupby_columns(tsframe): + mapping = {"A": 0, "B": 0, "C": 1, "D": 1} + grouped = tsframe.groupby(mapping, axis=1) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == len(tsframe) + assert len(aggregated.columns) == 2 + + # transform + tf = lambda x: x - x.mean() + groupedT = tsframe.T.groupby(mapping, axis=0) + tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) + + # iterate + for k, v in grouped: + assert len(v.columns) == 2 + + +def test_frame_set_name_single(df): + grouped = df.groupby("A") + + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.mean() + assert result.index.name == "A" + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A", as_index=False).mean() + assert result.index.name != "A" + + # GH#50538 + msg = "The operation groupby preserves dtype whereas reset_index casts to int64 + expected["a"] = expected["a"].astype(df["a"].dtype) + + g = df.groupby("a", as_index=False) + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = getattr(g, reduction_func)() + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g.agg(reduction_func) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = getattr(g["b"], reduction_func)() + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g["b"].agg(reduction_func) + tm.assert_frame_equal(result, expected) + + +def test_as_index_series_return_frame(df): + grouped = df.groupby("A", as_index=False) + grouped2 = df.groupby(["A", "B"], as_index=False) + + # GH#50538 + msg = "The operation 0 + return Decimal(str(x.mean())) + + grouped = s.groupby(labels) + + result = grouped.agg(convert_fast) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + result = grouped.agg(convert_force_pure) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + +def test_groupby_dtype_inference_empty(): + # GH 6733 + df = DataFrame({"x": [], "range": np.arange(0, dtype="int64")}) + assert df["x"].dtype == np.float64 + + result = df.groupby("x").first() + exp_index = Index([], name="x", dtype=np.float64) + expected = DataFrame({"range": Series([], index=exp_index, dtype="int64")}) + tm.assert_frame_equal(result, expected, by_blocks=True) + + +def test_groupby_unit64_float_conversion(): + #  GH: 30859 groupby converts unit64 to floats sometimes + df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) + result = df.groupby(["first", "second"])["value"].max() + expected = Series( + [16148277970000000000], + MultiIndex.from_product([[1], [1]], names=["first", "second"]), + name="value", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_list_infer_array_like(df): + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(list(df["A"])).mean() + expected = df.groupby(df["A"]).mean() + tm.assert_frame_equal(result, expected, check_names=False) + + with pytest.raises(KeyError, match=r"^'foo'$"): + df.groupby(list(df["A"][:-1])) + + # pathological case of ambiguity + df = DataFrame({"foo": [0, 1], "bar": [3, 4], "val": np.random.randn(2)}) + + result = df.groupby(["foo", "bar"]).mean() + expected = df.groupby([df["foo"], df["bar"]]).mean()[["val"]] + + +def test_groupby_keys_same_size_as_index(): + # GH 11185 + freq = "s" + index = date_range( + start=Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq + ) + df = DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) + result = df.groupby([Grouper(level=0, freq=freq), "metric"]).mean() + expected = df.set_index([df.index, "metric"]).astype(float) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_one_row(): + # GH 11741 + msg = r"^'Z'$" + df1 = DataFrame(np.random.randn(1, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df1.groupby("Z") + df2 = DataFrame(np.random.randn(2, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df2.groupby("Z") + + +def test_groupby_nat_exclude(): + # GH 6992 + df = DataFrame( + { + "values": np.random.randn(8), + "dt": [ + np.nan, + Timestamp("2013-01-01"), + np.nan, + Timestamp("2013-02-01"), + np.nan, + Timestamp("2013-02-01"), + np.nan, + Timestamp("2013-01-01"), + ], + "str": [np.nan, "a", np.nan, "a", np.nan, "a", np.nan, "b"], + } + ) + grouped = df.groupby("dt") + + expected = [Index([1, 7]), Index([3, 5])] + keys = sorted(grouped.groups.keys()) + assert len(keys) == 2 + for k, e in zip(keys, expected): + # grouped.groups keys are np.datetime64 with system tz + # not to be affected by tz, only compare values + tm.assert_index_equal(grouped.groups[k], e) + + # confirm obj is not filtered + tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df) + assert grouped.ngroups == 2 + + expected = { + Timestamp("2013-01-01 00:00:00"): np.array([1, 7], dtype=np.intp), + Timestamp("2013-02-01 00:00:00"): np.array([3, 5], dtype=np.intp), + } + + for k in grouped.indices: + tm.assert_numpy_array_equal(grouped.indices[k], expected[k]) + + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]]) + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]]) + + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + nan_df = DataFrame( + {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]} + ) + assert nan_df["nan"].dtype == "float64" + assert nan_df["nat"].dtype == "datetime64[ns]" + + for key in ["nan", "nat"]: + grouped = nan_df.groupby(key) + assert grouped.groups == {} + assert grouped.ngroups == 0 + assert grouped.indices == {} + with pytest.raises(KeyError, match=r"^nan$"): + grouped.get_group(np.nan) + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + +def test_groupby_two_group_keys_all_nan(): + # GH #36842: Grouping over two group keys shouldn't raise an error + df = DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) + result = df.groupby(["a", "b"]).indices + assert result == {} + + +def test_groupby_2d_malformed(): + d = DataFrame(index=range(2)) + d["group"] = ["g1", "g2"] + d["zeros"] = [0, 0] + d["ones"] = [1, 1] + d["label"] = ["l1", "l2"] + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + tmp = d.groupby(["group"]).mean() + res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) + tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) + tm.assert_numpy_array_equal(tmp.values, res_values) + + +def test_int32_overflow(): + B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) + A = np.arange(25000) + df = DataFrame({"A": A, "B": B, "C": A, "D": B, "E": np.random.randn(25000)}) + + left = df.groupby(["A", "B", "C", "D"]).sum() + right = df.groupby(["D", "C", "B", "A"]).sum() + assert len(left) == len(right) + + +def test_groupby_sort_multi(): + df = DataFrame( + { + "a": ["foo", "bar", "baz"], + "b": [3, 2, 1], + "c": [0, 1, 2], + "d": np.random.randn(3), + } + ) + + tups = [tuple(row) for row in df[["a", "b", "c"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["a", "b", "c"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) + + tups = [tuple(row) for row in df[["c", "a", "b"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["c", "a", "b"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups) + + tups = [tuple(x) for x in df[["b", "c", "a"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["b", "c", "a"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) + + df = DataFrame( + {"a": [0, 1, 2, 0, 1, 2], "b": [0, 0, 0, 1, 1, 1], "d": np.random.randn(6)} + ) + grouped = df.groupby(["a", "b"])["d"] + result = grouped.sum() + + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): + tups = [tuple(row) for row in df[keys].values] + tups = com.asarray_tuplesafe(tups) + expected = f(df.groupby(tups)[field]) + for k, v in expected.items(): + assert result[k] == v + + _check_groupby(df, result, ["a", "b"], "d") + + +def test_dont_clobber_name_column(): + df = DataFrame( + {"key": ["a", "a", "a", "b", "b", "b"], "name": ["foo", "bar", "baz"] * 2} + ) + + result = df.groupby("key", group_keys=False).apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +def test_skip_group_keys(): + + tsf = tm.makeTimeDataFrame() + + grouped = tsf.groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values(by="A")[:3]) + + pieces = [group.sort_values(by="A")[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_frame_equal(result, expected) + + grouped = tsf["A"].groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values()[:3]) + + pieces = [group.sort_values()[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_series_equal(result, expected) + + +def test_no_nonsense_name(float_frame): + # GH #995 + s = float_frame["C"].copy() + s.name = None + + result = s.groupby(float_frame["A"]).agg(np.sum) + assert result.name is None + + +def test_multifunc_sum_bug(): + # GH #1065 + x = DataFrame(np.arange(9).reshape(3, 3)) + x["test"] = 0 + x["fl"] = [1.3, 1.5, 1.6] + + grouped = x.groupby("test") + result = grouped.agg({"fl": "sum", 2: "size"}) + assert result["fl"].dtype == np.float64 + + +def test_handle_dict_return_value(df): + def f(group): + return {"max": group.max(), "min": group.min()} + + def g(group): + return Series({"max": group.max(), "min": group.min()}) + + result = df.groupby("A")["C"].apply(f) + expected = df.groupby("A")["C"].apply(g) + + assert isinstance(result, Series) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) +def test_set_group_name(df, grouper): + def f(group): + assert group.name is not None + return group + + def freduce(group): + assert group.name is not None + return group.sum() + + def foo(x): + return freduce(x) + + grouped = df.groupby(grouper, group_keys=False) + + # make sure all these work + grouped.apply(f) + grouped.aggregate(freduce) + grouped.aggregate({"C": freduce, "D": freduce}) + grouped.transform(f) + + grouped["C"].apply(f) + grouped["C"].aggregate(freduce) + grouped["C"].aggregate([freduce, foo]) + grouped["C"].transform(f) + + +def test_group_name_available_in_inference_pass(): + # gh-15062 + df = DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)}) + + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby("a", sort=False, group_keys=False).apply(f) + + expected_names = [0, 1, 2] + assert names == expected_names + + +def test_no_dummy_key_names(df): + # see gh-1291 + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(df["A"].values).sum() + assert result.index.name is None + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([df["A"].values, df["B"].values]).sum() + assert result.index.names == (None, None) + + +def test_groupby_sort_multiindex_series(): + # series multiindex groupby sort argument was not being passed through + # _compress_group_index + # GH 9444 + index = MultiIndex( + levels=[[1, 2], [1, 2]], + codes=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], + names=["a", "b"], + ) + mseries = Series([0, 1, 2, 3, 4, 5], index=index) + index = MultiIndex( + levels=[[1, 2], [1, 2]], codes=[[0, 0, 1], [1, 0, 0]], names=["a", "b"] + ) + mseries_result = Series([0, 2, 4], index=index) + + result = mseries.groupby(level=["a", "b"], sort=False).first() + tm.assert_series_equal(result, mseries_result) + result = mseries.groupby(level=["a", "b"], sort=True).first() + tm.assert_series_equal(result, mseries_result.sort_index()) + + +def test_groupby_reindex_inside_function(): + + periods = 1000 + ind = date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame({"high": np.arange(periods), "low": np.arange(periods)}, index=ind) + + def agg_before(func, fix=False): + """ + Run an aggregate func on the subset of data. + """ + + def _func(data): + d = data.loc[data.index.map(lambda x: x.hour < 11)].dropna() + if fix: + data[data.index[0]] + if len(d) == 0: + return None + return func(d) + + return _func + + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + closure_bad = grouped.agg({"high": agg_before(np.max)}) + closure_good = grouped.agg({"high": agg_before(np.max, True)}) + + tm.assert_frame_equal(closure_bad, closure_good) + + +def test_groupby_multiindex_missing_pair(): + # GH9049 + df = DataFrame( + { + "group1": ["a", "a", "a", "b"], + "group2": ["c", "c", "d", "c"], + "value": [1, 1, 1, 5], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + res = df_grouped.agg("sum") + idx = MultiIndex.from_tuples( + [("a", "c"), ("a", "d"), ("b", "c")], names=["group1", "group2"] + ) + exp = DataFrame([[2], [1], [5]], index=idx, columns=["value"]) + + tm.assert_frame_equal(res, exp) + + +def test_groupby_multiindex_not_lexsorted(): + # GH 11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns._is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns._is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.groupby("a").mean() + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.groupby("a").mean() + tm.assert_frame_equal(expected, result) + + # a transforming function should work regardless of sort + # GH 14776 + df = DataFrame( + {"x": ["a", "a", "b", "a"], "y": [1, 1, 2, 2], "z": [1, 2, 3, 4]} + ).set_index(["x", "y"]) + assert not df.index._is_lexsorted() + + for level in [0, 1, [0, 1]]: + for sort in [False, True]: + result = df.groupby(level=level, sort=sort, group_keys=False).apply( + DataFrame.drop_duplicates + ) + expected = df + tm.assert_frame_equal(expected, result) + + result = ( + df.sort_index() + .groupby(level=level, sort=sort, group_keys=False) + .apply(DataFrame.drop_duplicates) + ) + expected = df.sort_index() + tm.assert_frame_equal(expected, result) + + +def test_index_label_overlaps_location(): + # checking we don't have any label/location confusion in the + # wake of GH5375 + df = DataFrame(list("ABCDE"), index=[2, 0, 2, 1, 1]) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + # and again, with a generic Index of floats + df.index = df.index.astype(float) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + +def test_transform_doesnt_clobber_ints(): + # GH 7972 + n = 6 + x = np.arange(n) + df = DataFrame({"a": x // 2, "b": 2.0 * x, "c": 3.0 * x}) + df2 = DataFrame({"a": x // 2 * 1.0, "b": 2.0 * x, "c": 3.0 * x}) + + gb = df.groupby("a") + result = gb.transform("mean") + + gb2 = df2.groupby("a") + expected = gb2.transform("mean") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sort_column", + ["ints", "floats", "strings", ["ints", "floats"], ["ints", "strings"]], +) +@pytest.mark.parametrize( + "group_column", ["int_groups", "string_groups", ["int_groups", "string_groups"]] +) +def test_groupby_preserves_sort(sort_column, group_column): + # Test to ensure that groupby always preserves sort order of original + # object. Issue #8588 and #9651 + + df = DataFrame( + { + "int_groups": [3, 1, 0, 1, 0, 3, 3, 3], + "string_groups": ["z", "a", "z", "a", "a", "g", "g", "g"], + "ints": [8, 7, 4, 5, 2, 9, 1, 1], + "floats": [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5], + "strings": ["z", "d", "a", "e", "word", "word2", "42", "47"], + } + ) + + # Try sorting on different types and with different group types + + df = df.sort_values(by=sort_column) + g = df.groupby(group_column) + + def test_sort(x): + tm.assert_frame_equal(x, x.sort_values(by=sort_column)) + + g.apply(test_sort) + + +def test_pivot_table_values_key_error(): + # This test is designed to replicate the error in issue #14938 + df = DataFrame( + { + "eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(), + "thename": range(0, 20), + } + ) + + df["year"] = df.set_index("eventDate").index.year + df["month"] = df.set_index("eventDate").index.month + + with pytest.raises(KeyError, match="'badname'"): + df.reset_index().pivot_table( + index="year", columns="month", values="badname", aggfunc="count" + ) + + +@pytest.mark.parametrize("columns", ["C", ["C"]]) +@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) +@pytest.mark.parametrize( + "values", + [ + [True], + [0], + [0.0], + ["a"], + Categorical([0]), + [to_datetime(0)], + date_range(0, 1, 1, tz="US/Eastern"), + pd.array([0], dtype="Int64"), + pd.array([0], dtype="Float64"), + pd.array([False], dtype="boolean"), + ], + ids=[ + "bool", + "int", + "float", + "str", + "cat", + "dt64", + "dt64tz", + "Int64", + "Float64", + "boolean", + ], +) +@pytest.mark.parametrize("method", ["attr", "agg", "apply"]) +@pytest.mark.parametrize( + "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] +) +@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") +def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager): + # GH8093 & GH26411 + override_dtype = None + + if ( + isinstance(values, Categorical) + and not isinstance(columns, list) + and op in ["sum", "prod", "skew", "mad"] + ): + # handled below GH#41291 + + if using_array_manager and op == "mad": + right_msg = "Cannot interpret 'CategoricalDtype.* as a data type" + msg = "Regex pattern \"'Categorical' does not implement.*" + right_msg + mark = pytest.mark.xfail(raises=AssertionError, match=msg) + request.node.add_marker(mark) + + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and op in ["idxmax", "idxmin"] + ): + mark = pytest.mark.xfail( + raises=ValueError, match="attempt to get arg(min|max) of an empty sequence" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and not isinstance(columns, list) + ): + mark = pytest.mark.xfail( + raises=TypeError, match="'Categorical' does not implement" + ) + request.node.add_marker(mark) + elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]: + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 2 + and op in ["min", "max", "sum"] + ): + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + + elif ( + op == "mad" + and not isinstance(columns, list) + and isinstance(values, pd.DatetimeIndex) + and values.tz is not None + and using_array_manager + ): + mark = pytest.mark.xfail( + raises=TypeError, + match=r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type", + ) + request.node.add_marker(mark) + + elif isinstance(values, BooleanArray) and op in ["sum", "prod"]: + # We expect to get Int64 back for these + override_dtype = "Int64" + + if isinstance(values[0], bool) and op in ("prod", "sum"): + # sum/product of bools is an integer + override_dtype = "int64" + + df = DataFrame({"A": values, "B": values, "C": values}, columns=list("ABC")) + + if hasattr(values, "dtype"): + # check that we did the construction right + assert (df.dtypes == values.dtype).all() + + df = df.iloc[:0] + + gb = df.groupby(keys, group_keys=False)[columns] + + def get_result(): + warn = FutureWarning if op == "mad" else None + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + if method == "attr": + return getattr(gb, op)() + else: + return getattr(gb, method)(op) + + if columns == "C": + # i.e. SeriesGroupBy + if op in ["prod", "sum", "skew"]: + # ops that require more than just ordered-ness + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + if op == "skew": + msg = "does not support reduction 'skew'" + else: + msg = "datetime64 type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return + if op in ["prod", "sum", "skew", "mad"]: + if isinstance(values, Categorical): + # GH#41291 + if op == "mad": + # mad calls mean, which Categorical doesn't implement + msg = "does not support reduction 'mean'" + elif op == "skew": + msg = f"does not support reduction '{op}'" + else: + msg = "category type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return + else: + # ie. DataFrameGroupBy + if op in ["prod", "sum"]: + # ops that require more than just ordered-ness + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return + + elif isinstance(values, Categorical): + # GH#41291 + # Categorical doesn't implement sum or prod + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + if len(keys) != 1 and op == "prod": + # TODO: why just prod and not sum? + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + mi = MultiIndex.from_product([lev, lev], names=["A", "B"]) + expected = DataFrame([], columns=[], index=mi) + + tm.assert_equal(result, expected) + return + + elif df.dtypes[0] == object: + # FIXME: the test is actually wrong here, xref #41341 + result = get_result() + # In this case we have list-of-list, will raise TypeError, + # and subsequently be dropped as nuisance columns + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return + + if ( + op in ["mad", "min", "max", "skew"] + and isinstance(values, Categorical) + and len(keys) == 1 + ): + # Categorical doesn't implement, so with numeric_only=True + # these are dropped and we get an empty DataFrame back + result = get_result() + expected = df.set_index(keys)[[]] + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + if len(keys) != 1: + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + mi = MultiIndex.from_product([lev, lev], names=keys) + expected = DataFrame([], columns=[], index=mi) + else: + # all columns are dropped, but we end up with one row + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + ci = Index(lev, name=keys[0]) + expected = DataFrame([], columns=[], index=ci) + # expected = df.set_index(keys)[columns] + + tm.assert_equal(result, expected) + return + + result = get_result() + expected = df.set_index(keys)[columns] + if override_dtype is not None: + expected = expected.astype(override_dtype) + if len(keys) == 1: + expected.index.name = keys[0] + tm.assert_equal(result, expected) + + +def test_empty_groupby_apply_nonunique_columns(): + # GH#44417 + df = DataFrame(np.random.randn(0, 4)) + df[3] = df[3].astype(np.int64) + df.columns = [0, 1, 2, 0] + gb = df.groupby(df[1], group_keys=False) + res = gb.apply(lambda x: x) + assert (res.dtypes == df.dtypes).all() + + +def test_tuple_as_grouping(): + # https://github.com/pandas-dev/pandas/issues/18314 + df = DataFrame( + { + ("a", "b"): [1, 1, 1, 1], + "a": [2, 2, 2, 2], + "b": [2, 2, 2, 2], + "c": [1, 1, 1, 1], + } + ) + + with pytest.raises(KeyError, match=r"('a', 'b')"): + df[["a", "b", "c"]].groupby(("a", "b")) + + result = df.groupby(("a", "b"))["c"].sum() + expected = Series([4], name="c", index=Index([1], name=("a", "b"))) + tm.assert_series_equal(result, expected) + + +def test_tuple_correct_keyerror(): + # https://github.com/pandas-dev/pandas/issues/18798 + df = DataFrame(1, index=range(3), columns=MultiIndex.from_product([[1, 2], [3, 4]])) + with pytest.raises(KeyError, match=r"^\(7, 8\)$"): + df.groupby((7, 8)).mean() + + +def test_groupby_agg_ohlc_non_first(): + # GH 21716 + df = DataFrame( + [[1], [1]], + columns=Index(["foo"], name="mycols"), + index=date_range("2018-01-01", periods=2, freq="D", name="dti"), + ) + + expected = DataFrame( + [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], + columns=MultiIndex.from_tuples( + ( + ("foo", "sum", "foo"), + ("foo", "ohlc", "open"), + ("foo", "ohlc", "high"), + ("foo", "ohlc", "low"), + ("foo", "ohlc", "close"), + ), + names=["mycols", None, None], + ), + index=date_range("2018-01-01", periods=2, freq="D", name="dti"), + ) + + result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_multiindex_nat(): + # GH 9236 + values = [ + (pd.NaT, "a"), + (datetime(2012, 1, 2), "a"), + (datetime(2012, 1, 2), "b"), + (datetime(2012, 1, 3), "a"), + ] + mi = MultiIndex.from_tuples(values, names=["date", None]) + ser = Series([3, 2, 2.5, 4], index=mi) + + result = ser.groupby(level=1).mean() + expected = Series([3.0, 2.5], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + +def test_groupby_empty_list_raises(): + # GH 5289 + values = zip(range(10), range(10)) + df = DataFrame(values, columns=["apple", "b"]) + msg = "Grouper and axis must be same length" + with pytest.raises(ValueError, match=msg): + df.groupby([[]]) + + +def test_groupby_multiindex_series_keys_len_equal_group_axis(): + # GH 25704 + index_array = [["x", "x"], ["a", "b"], ["k", "k"]] + index_names = ["first", "second", "third"] + ri = MultiIndex.from_arrays(index_array, names=index_names) + s = Series(data=[1, 2], index=ri) + result = s.groupby(["first", "third"]).sum() + + index_array = [["x"], ["k"]] + index_names = ["first", "third"] + ei = MultiIndex.from_arrays(index_array, names=index_names) + expected = Series([3], index=ei) + + tm.assert_series_equal(result, expected) + + +def test_groupby_groups_in_BaseGrouper(): + # GH 26326 + # Test if DataFrame grouped with a pandas.Grouper has correct groups + mi = MultiIndex.from_product([["A", "B"], ["C", "D"]], names=["alpha", "beta"]) + df = DataFrame({"foo": [1, 2, 1, 2], "bar": [1, 2, 3, 4]}, index=mi) + result = df.groupby([Grouper(level="alpha"), "beta"]) + expected = df.groupby(["alpha", "beta"]) + assert result.groups == expected.groups + + result = df.groupby(["beta", Grouper(level="alpha")]) + expected = df.groupby(["beta", "alpha"]) + assert result.groups == expected.groups + + +@pytest.mark.parametrize("group_name", ["x", ["x"]]) +def test_groupby_axis_1(group_name): + # GH 27614 + df = DataFrame( + np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] + ) + df.index.name = "y" + df.columns.name = "x" + + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + # test on MI column + iterables = [["bar", "baz", "foo"], ["one", "two"]] + mi = MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) + df = DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + +@pytest.mark.parametrize( + "op, expected", + [ + ( + "shift", + { + "time": [ + None, + None, + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + ] + }, + ), + ( + "bfill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ( + "ffill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ], +) +def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): + # GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill + tz = tz_naive_fixture + data = { + "id": ["A", "B", "A", "B", "A", "B"], + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ], + } + df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz)) + + grouped = df.groupby("id") + result = getattr(grouped, op)() + expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_only_none_group(): + # see GH21624 + # this was crashing with "ValueError: Length of passed values is 1, index implies 0" + df = DataFrame({"g": [None], "x": 1}) + actual = df.groupby("g")["x"].transform("sum") + expected = Series([np.nan], name="x") + + tm.assert_series_equal(actual, expected) + + +def test_groupby_duplicate_index(): + # GH#29189 the groupby call here used to raise + ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + gb = ser.groupby(level=0) + + result = gb.mean() + expected = Series([2, 5.5, 8], index=[2.0, 4.0, 5.0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:.*is deprecated.*:FutureWarning") +def test_group_on_empty_multiindex(transformation_func, request): + # GH 47787 + # With one row, those are transforms so the schema should be the same + if transformation_func == "tshift": + mark = pytest.mark.xfail(raises=NotImplementedError) + request.node.add_marker(mark) + df = DataFrame( + data=[[1, Timestamp("today"), 3, 4]], + columns=["col_1", "col_2", "col_3", "col_4"], + ) + df["col_3"] = df["col_3"].astype(int) + df["col_4"] = df["col_4"].astype(int) + df = df.set_index(["col_1", "col_2"]) + if transformation_func == "fillna": + args = ("ffill",) + elif transformation_func == "tshift": + args = (1, "D") + else: + args = () + result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args) + expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0] + if transformation_func in ("diff", "shift"): + expected = expected.astype(int) + tm.assert_equal(result, expected) + + result = ( + df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func, *args) + ) + expected = ( + df["col_3"].groupby(["col_1"]).transform(transformation_func, *args).iloc[:0] + ) + if transformation_func in ("diff", "shift"): + expected = expected.astype(int) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "idx", + [ + Index(["a", "a"], name="foo"), + MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]), + ], +) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_dup_labels_output_shape(groupby_func, idx): + if groupby_func in {"size", "ngroup", "cumcount"}: + pytest.skip(f"Not applicable for {groupby_func}") + # TODO(2.0) Remove after pad/backfill deprecation enforced + groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + warn = FutureWarning if groupby_func in ("mad", "tshift") else None + + df = DataFrame([[1, 1]], columns=idx) + grp_by = df.groupby([0]) + + if groupby_func == "tshift": + df.index = [Timestamp("today")] + # args.extend([1, "D"]) + args = get_groupby_method_args(groupby_func, df) + + with tm.assert_produces_warning(warn, match="is deprecated"): + result = getattr(grp_by, groupby_func)(*args) + + assert result.shape == (1, 2) + tm.assert_index_equal(result.columns, idx) + + +def test_groupby_crash_on_nunique(axis): + # Fix following 30253 + dti = date_range("2016-01-01", periods=2, name="foo") + df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) + df.columns.names = ("bar", "baz") + df.index = dti + + axis_number = df._get_axis_number(axis) + if not axis_number: + df = df.T + + gb = df.groupby(axis=axis_number, level=0) + result = gb.nunique() + + expected = DataFrame({"A": [1, 2], "D": [1, 1]}, index=dti) + expected.columns.name = "bar" + if not axis_number: + expected = expected.T + + tm.assert_frame_equal(result, expected) + + if axis_number == 0: + # same thing, but empty columns + gb2 = df[[]].groupby(axis=axis_number, level=0) + exp = expected[[]] + else: + # same thing, but empty rows + gb2 = df.loc[[]].groupby(axis=axis_number, level=0) + # default for empty when we can't infer a dtype is float64 + exp = expected.loc[[]].astype(np.float64) + + res = gb2.nunique() + tm.assert_frame_equal(res, exp) + + +def test_groupby_list_level(): + # GH 9790 + expected = DataFrame(np.arange(0, 9).reshape(3, 3), dtype=float) + result = expected.groupby(level=[0]).mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "max_seq_items, expected", + [ + (5, "{0: [0], 1: [1], 2: [2], 3: [3], 4: [4]}"), + (4, "{0: [0], 1: [1], 2: [2], 3: [3], ...}"), + (1, "{0: [0], ...}"), + ], +) +def test_groups_repr_truncates(max_seq_items, expected): + # GH 1135 + df = DataFrame(np.random.randn(5, 1)) + df["a"] = df.index + + with pd.option_context("display.max_seq_items", max_seq_items): + result = df.groupby("a").groups.__repr__() + assert result == expected + + result = df.groupby(np.array(df.a)).groups.__repr__() + assert result == expected + + +def test_group_on_two_row_multiindex_returns_one_tuple_key(): + # GH 18451 + df = DataFrame([{"a": 1, "b": 2, "c": 99}, {"a": 1, "b": 2, "c": 88}]) + df = df.set_index(["a", "b"]) + + grp = df.groupby(["a", "b"]) + result = grp.indices + expected = {(1, 2): np.array([0, 1], dtype=np.int64)} + + assert len(result) == 1 + key = (1, 2) + assert (result[key] == expected[key]).all() + + +@pytest.mark.parametrize( + "klass, attr, value", + [ + (DataFrame, "level", "a"), + (DataFrame, "as_index", False), + (DataFrame, "sort", False), + (DataFrame, "group_keys", False), + (DataFrame, "squeeze", True), + (DataFrame, "observed", True), + (DataFrame, "dropna", False), + pytest.param( + Series, + "axis", + 1, + marks=pytest.mark.xfail( + reason="GH 35443: Attribute currently not passed on to series" + ), + ), + (Series, "level", "a"), + (Series, "as_index", False), + (Series, "sort", False), + (Series, "group_keys", False), + (Series, "squeeze", True), + (Series, "observed", True), + (Series, "dropna", False), + ], +) +@pytest.mark.filterwarnings( + "ignore:The `squeeze` parameter is deprecated:FutureWarning" +) +def test_subsetting_columns_keeps_attrs(klass, attr, value): + # GH 9959 - When subsetting columns, don't drop attributes + df = DataFrame({"a": [1], "b": [2], "c": [3]}) + if attr != "axis": + df = df.set_index("a") + + expected = df.groupby("a", **{attr: value}) + result = expected[["b"]] if klass is DataFrame else expected["b"] + assert getattr(result, attr) == getattr(expected, attr) + + +def test_subsetting_columns_axis_1(): + # GH 37725 + g = DataFrame({"A": [1], "B": [2], "C": [3]}).groupby([0, 0, 1], axis=1) + match = "Cannot subset columns when using axis=1" + with pytest.raises(ValueError, match=match): + g[["A", "B"]].sum() + + +@pytest.mark.parametrize("func", ["sum", "any", "shift"]) +def test_groupby_column_index_name_lost(func): + # GH: 29764 groupby loses index sometimes + expected = Index(["a"], name="idx") + df = DataFrame([[1]], columns=expected) + df_grouped = df.groupby([1]) + result = getattr(df_grouped, func)().columns + tm.assert_index_equal(result, expected) + + +def test_groupby_duplicate_columns(): + # GH: 31735 + df = DataFrame( + {"A": ["f", "e", "g", "h"], "B": ["a", "b", "c", "d"], "C": [1, 2, 3, 4]} + ).astype(object) + df.columns = ["A", "B", "B"] + result = df.groupby([0, 0, 0, 0]).min() + expected = DataFrame([["e", "a", 1]], columns=["A", "B", "B"]) + tm.assert_frame_equal(result, expected) + + +def test_groupby_series_with_tuple_name(): + # GH 37755 + ser = Series([1, 2, 3, 4], index=[1, 1, 2, 2], name=("a", "a")) + ser.index.name = ("b", "b") + result = ser.groupby(level=0).last() + expected = Series([2, 4], index=[1, 2], name=("a", "a")) + expected.index.name = ("b", "b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") +@pytest.mark.parametrize( + "func, values", [("sum", [97.0, 98.0]), ("mean", [24.25, 24.5])] +) +def test_groupby_numerical_stability_sum_mean(func, values): + # GH#38778 + data = [1e16, 1e16, 97, 98, -5e15, -5e15, -5e15, -5e15] + df = DataFrame({"group": [1, 2] * 4, "a": data, "b": data}) + result = getattr(df.groupby("group"), func)() + expected = DataFrame({"a": values, "b": values}, index=Index([1, 2], name="group")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(not IS64, reason="GH#38778: fail on 32-bit system") +def test_groupby_numerical_stability_cumsum(): + # GH#38934 + data = [1e16, 1e16, 97, 98, -5e15, -5e15, -5e15, -5e15] + df = DataFrame({"group": [1, 2] * 4, "a": data, "b": data}) + result = df.groupby("group").cumsum() + exp_data = ( + [1e16] * 2 + [1e16 + 96, 1e16 + 98] + [5e15 + 97, 5e15 + 98] + [97.0, 98.0] + ) + expected = DataFrame({"a": exp_data, "b": exp_data}) + tm.assert_frame_equal(result, expected, check_exact=True) + + +def test_groupby_cumsum_skipna_false(): + # GH#46216 don't propagate np.nan above the diagonal + arr = np.random.randn(5, 5) + df = DataFrame(arr) + for i in range(5): + df.iloc[i, i] = np.nan + + df["A"] = 1 + gb = df.groupby("A") + + res = gb.cumsum(skipna=False) + + expected = df[[0, 1, 2, 3, 4]].cumsum(skipna=False) + tm.assert_frame_equal(res, expected) + + +def test_groupby_cumsum_timedelta64(): + # GH#46216 don't ignore is_datetimelike in libgroupby.group_cumsum + dti = date_range("2016-01-01", periods=5) + ser = Series(dti) - dti[0] + ser[2] = pd.NaT + + df = DataFrame({"A": 1, "B": ser}) + gb = df.groupby("A") + + res = gb.cumsum(numeric_only=False, skipna=True) + exp = DataFrame({"B": [ser[0], ser[1], pd.NaT, ser[4], ser[4] * 2]}) + tm.assert_frame_equal(res, exp) + + res = gb.cumsum(numeric_only=False, skipna=False) + exp = DataFrame({"B": [ser[0], ser[1], pd.NaT, pd.NaT, pd.NaT]}) + tm.assert_frame_equal(res, exp) + + +def test_groupby_mean_duplicate_index(rand_series_with_duplicate_datetimeindex): + dups = rand_series_with_duplicate_datetimeindex + result = dups.groupby(level=0).mean() + expected = dups.groupby(dups.index).mean() + tm.assert_series_equal(result, expected) + + +def test_groupby_all_nan_groups_drop(): + # GH 15036 + s = Series([1, 2, 3], [np.nan, np.nan, np.nan]) + result = s.groupby(s.index).sum() + expected = Series([], index=Index([], dtype=np.float64), dtype=np.int64) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_groupby_empty_multi_column(as_index, numeric_only): + # GH 15106 & GH 41998 + df = DataFrame(data=[], columns=["A", "B", "C"]) + gb = df.groupby(["A", "B"], as_index=as_index) + result = gb.sum(numeric_only=numeric_only) + if as_index: + index = MultiIndex([[], []], [[], []], names=["A", "B"]) + columns = ["C"] if not numeric_only else [] + else: + index = RangeIndex(0) + columns = ["A", "B", "C"] if not numeric_only else ["A", "B"] + expected = DataFrame([], columns=columns, index=index) + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_non_numeric_dtype(): + # GH #43108 + df = DataFrame( + [["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"] + ) + + expected = DataFrame( + { + "v": [[1, 1], [10, 20]], + }, + index=Index(["M", "W"], dtype="object", name="MW"), + ) + + gb = df.groupby(by=["MW"]) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_multi_non_numeric_dtype(): + # GH #42395 + df = DataFrame( + { + "x": [1, 0, 1, 1, 0], + "y": [Timedelta(i, "days") for i in range(1, 6)], + "z": [Timedelta(i * 10, "days") for i in range(1, 6)], + } + ) + + expected = DataFrame( + { + "y": [Timedelta(i, "days") for i in range(7, 9)], + "z": [Timedelta(i * 10, "days") for i in range(7, 9)], + }, + index=Index([0, 1], dtype="int64", name="x"), + ) + + gb = df.groupby(by=["x"]) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_numeric_with_non_numeric_dtype(): + # GH #43108 + df = DataFrame( + { + "x": [1, 0, 1, 1, 0], + "y": [Timedelta(i, "days") for i in range(1, 6)], + "z": list(range(1, 6)), + } + ) + + expected = DataFrame( + {"z": [7, 8]}, + index=Index([0, 1], dtype="int64", name="x"), + ) + + gb = df.groupby(by=["x"]) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.sum() + tm.assert_frame_equal(result, expected) + + +def test_groupby_filtered_df_std(): + # GH 16174 + dicts = [ + {"filter_col": False, "groupby_col": True, "bool_col": True, "float_col": 10.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 20.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 30.5}, + ] + df = DataFrame(dicts) + + df_filter = df[df["filter_col"] == True] # noqa:E712 + dfgb = df_filter.groupby("groupby_col") + result = dfgb.std() + expected = DataFrame( + [[0.0, 0.0, 7.071068]], + columns=["filter_col", "bool_col", "float_col"], + index=Index([True], name="groupby_col"), + ) + tm.assert_frame_equal(result, expected) + + +def test_datetime_categorical_multikey_groupby_indices(): + # GH 26859 + df = DataFrame( + { + "a": Series(list("abc")), + "b": Series( + to_datetime(["2018-01-01", "2018-02-01", "2018-03-01"]), + dtype="category", + ), + "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]), + } + ) + result = df.groupby(["a", "b"]).indices + expected = { + ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]), + ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]), + ("c", Timestamp("2018-03-01 00:00:00")): np.array([2]), + } + assert result == expected + + +def test_rolling_wrong_param_min_period(): + # GH34037 + name_l = ["Alice"] * 5 + ["Bob"] * 5 + val_l = [np.nan, np.nan, 1, 2, 3] + [np.nan, 1, 2, 3, 4] + test_df = DataFrame([name_l, val_l]).T + test_df.columns = ["name", "val"] + + result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'" + with pytest.raises(TypeError, match=result_error_msg): + test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum() + + +def test_pad_backfill_deprecation(): + # GH 33396 + s = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning, match="backfill"): + s.groupby(level=0).backfill() + with tm.assert_produces_warning(FutureWarning, match="pad"): + s.groupby(level=0).pad() + + +def test_by_column_values_with_same_starting_value(): + # GH29635 + df = DataFrame( + { + "Name": ["Thomas", "Thomas", "Thomas John"], + "Credit": [1200, 1300, 900], + "Mood": ["sad", "happy", "happy"], + } + ) + aggregate_details = {"Mood": Series.mode, "Credit": "sum"} + + result = df.groupby(["Name"]).agg(aggregate_details) + expected_result = DataFrame( + { + "Mood": [["happy", "sad"], "happy"], + "Credit": [2500, 900], + "Name": ["Thomas", "Thomas John"], + } + ).set_index("Name") + + tm.assert_frame_equal(result, expected_result) + + +def test_groupby_none_in_first_mi_level(): + # GH#47348 + arr = [[None, 1, 0, 1], [2, 3, 2, 3]] + ser = Series(1, index=MultiIndex.from_arrays(arr, names=["a", "b"])) + result = ser.groupby(level=[0, 1]).sum() + expected = Series( + [1, 2], MultiIndex.from_tuples([(0.0, 2), (1.0, 3)], names=["a", "b"]) + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_none_column_name(): + # GH#47348 + df = DataFrame({None: [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [4, 5, 6, 7]}) + result = df.groupby(by=[None]).sum() + expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) + tm.assert_frame_equal(result, expected) + + +def test_single_element_list_grouping(): + # GH 42795 + df = DataFrame( + {"a": [np.nan, 1], "b": [np.nan, 5], "c": [np.nan, 2]}, index=["x", "y"] + ) + msg = ( + "In a future version of pandas, a length 1 " + "tuple will be returned when iterating over " + "a groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + values, _ = next(iter(df.groupby(["a"]))) + + +@pytest.mark.parametrize("func", ["sum", "cumsum", "prod"]) +def test_groupby_avoid_casting_to_float(func): + # GH#37493 + val = 922337203685477580 + df = DataFrame({"a": 1, "b": [val]}) + result = getattr(df.groupby("a"), func)() - val + expected = DataFrame({"b": [0]}, index=Index([1], name="a")) + if func == "cumsum": + expected = expected.reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func, val", [("sum", 3), ("prod", 2)]) +def test_groupby_sum_support_mask(any_numeric_ea_dtype, func, val): + # GH#37493 + df = DataFrame({"a": 1, "b": [1, 2, pd.NA]}, dtype=any_numeric_ea_dtype) + result = getattr(df.groupby("a"), func)() + expected = DataFrame( + {"b": [val]}, + index=Index([1], name="a", dtype=any_numeric_ea_dtype), + dtype=any_numeric_ea_dtype, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("val, dtype", [(111, "int"), (222, "uint")]) +def test_groupby_overflow(val, dtype): + # GH#37493 + df = DataFrame({"a": 1, "b": [val, val]}, dtype=f"{dtype}8") + result = df.groupby("a").sum() + expected = DataFrame( + {"b": [val * 2]}, + index=Index([1], name="a", dtype=f"{dtype}64"), + dtype=f"{dtype}64", + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a").cumsum() + expected = DataFrame({"b": [val, val * 2]}, dtype=f"{dtype}64") + tm.assert_frame_equal(result, expected) + + result = df.groupby("a").prod() + expected = DataFrame( + {"b": [val * val]}, + index=Index([1], name="a", dtype=f"{dtype}64"), + dtype=f"{dtype}64", + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipna, val", [(True, 3), (False, pd.NA)]) +def test_groupby_cumsum_mask(any_numeric_ea_dtype, skipna, val): + # GH#37493 + df = DataFrame({"a": 1, "b": [1, pd.NA, 2]}, dtype=any_numeric_ea_dtype) + result = df.groupby("a").cumsum(skipna=skipna) + expected = DataFrame( + {"b": [1, pd.NA, val]}, + dtype=any_numeric_ea_dtype, + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py new file mode 100644 index 00000000..ee660dd0 --- /dev/null +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -0,0 +1,500 @@ +import numpy as np +import pytest + +from pandas.compat.pyarrow import pa_version_under1p01 + +from pandas.core.dtypes.missing import na_value_for_dtype + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [13.0, 123.23], "d": [13.0, 123.0], "e": [13.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"]], + { + "c": [13.0, 12.3, 123.23], + "d": [13.0, 233.0, 123.0], + "e": [13.0, 12.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_nan_in_one_group( + dropna, tuples, outputs, nulls_fixture +): + # GH 3729 this is to test that NA is in one group + df_list = [ + ["A", "B", 12, 12, 12], + ["A", nulls_fixture, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + ["A", "B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + grouped = df.groupby(["a", "b"], dropna=dropna).sum() + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels(["A", "B", np.nan], level="b") + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [12.0, 123.23], "d": [12.0, 123.0], "e": [12.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"], [np.nan, "B"]], + { + "c": [12.0, 13.3, 123.23, 1.0], + "d": [12.0, 234.0, 123.0, 1.0], + "e": [12.0, 13.0, 1.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups( + dropna, tuples, outputs, nulls_fixture, nulls_fixture2 +): + # GH 3729 this is to test that NA in different groups with different representations + df_list = [ + ["A", "B", 12, 12, 12], + ["A", nulls_fixture, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + [nulls_fixture2, "B", 1, 1, 1.0], + ["A", nulls_fixture2, 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + grouped = df.groupby(["a", "b"], dropna=dropna).sum() + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels([["A", "B", np.nan], ["A", "B", np.nan]]) + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, idx, outputs", + [ + (True, ["A", "B"], {"b": [123.23, 13.0], "c": [123.0, 13.0], "d": [1.0, 13.0]}), + ( + False, + ["A", "B", np.nan], + { + "b": [123.23, 13.0, 12.3], + "c": [123.0, 13.0, 233.0], + "d": [1.0, 13.0, 12.0], + }, + ), + ], +) +def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs): + # GH 3729 + df_list = [ + ["B", 12, 12, 12], + [None, 12.3, 233.0, 12], + ["A", 123.23, 123, 1], + ["B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d"]) + grouped = df.groupby("a", dropna=dropna).sum() + + expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype="object", name="a")) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, idx, expected", + [ + (True, ["a", "a", "b", np.nan], pd.Series([3, 3], index=["a", "b"])), + ( + False, + ["a", "a", "b", np.nan], + pd.Series([3, 3, 3], index=["a", "b", np.nan]), + ), + ], +) +def test_groupby_dropna_series_level(dropna, idx, expected): + ser = pd.Series([1, 2, 3, 3], index=idx) + + result = ser.groupby(level=0, dropna=dropna).sum() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dropna, expected", + [ + (True, pd.Series([210.0, 350.0], index=["a", "b"], name="Max Speed")), + ( + False, + pd.Series([210.0, 350.0, 20.0], index=["a", "b", np.nan], name="Max Speed"), + ), + ], +) +def test_groupby_dropna_series_by(dropna, expected): + ser = pd.Series( + [390.0, 350.0, 30.0, 20.0], + index=["Falcon", "Falcon", "Parrot", "Parrot"], + name="Max Speed", + ) + + result = ser.groupby(["a", "b", "a", np.nan], dropna=dropna).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", (False, True)) +def test_grouper_dropna_propagation(dropna): + # GH 36604 + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) + gb = df.groupby("A", dropna=dropna) + assert gb.grouper.dropna == dropna + + +@pytest.mark.parametrize( + "index", + [ + pd.RangeIndex(0, 4), + list("abcd"), + pd.MultiIndex.from_product([(1, 2), ("R", "B")], names=["num", "col"]), + ], +) +def test_groupby_dataframe_slice_then_transform(dropna, index): + # GH35014 & GH35612 + expected_data = {"B": [2, 2, 1, np.nan if dropna else 1]} + + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}, index=index) + gb = df.groupby("A", dropna=dropna) + + result = gb.transform(len) + expected = pd.DataFrame(expected_data, index=index) + tm.assert_frame_equal(result, expected) + + result = gb[["B"]].transform(len) + expected = pd.DataFrame(expected_data, index=index) + tm.assert_frame_equal(result, expected) + + result = gb["B"].transform(len) + expected = pd.Series(expected_data["B"], index=index, name="B") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dropna, tuples, outputs", + [ + ( + True, + [["A", "B"], ["B", "A"]], + {"c": [13.0, 123.23], "d": [12.0, 123.0], "e": [1.0, 1.0]}, + ), + ( + False, + [["A", "B"], ["A", np.nan], ["B", "A"]], + { + "c": [13.0, 12.3, 123.23], + "d": [12.0, 233.0, 123.0], + "e": [1.0, 12.0, 1.0], + }, + ), + ], +) +def test_groupby_dropna_multi_index_dataframe_agg(dropna, tuples, outputs): + # GH 3729 + df_list = [ + ["A", "B", 12, 12, 12], + ["A", None, 12.3, 233.0, 12], + ["B", "A", 123.23, 123, 1], + ["A", "B", 1, 1, 1.0], + ] + df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"]) + agg_dict = {"c": sum, "d": max, "e": "min"} + grouped = df.groupby(["a", "b"], dropna=dropna).agg(agg_dict) + + mi = pd.MultiIndex.from_tuples(tuples, names=list("ab")) + + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna: + mi = mi.set_levels(["A", "B", np.nan], level="b") + expected = pd.DataFrame(outputs, index=mi) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.arm_slow +@pytest.mark.parametrize( + "datetime1, datetime2", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")), + (pd.Timedelta("-2 days"), pd.Timedelta("-1 days")), + (pd.Period("2020-01-01"), pd.Period("2020-02-01")), + ], +) +@pytest.mark.parametrize("dropna, values", [(True, [12, 3]), (False, [12, 3, 6])]) +def test_groupby_dropna_datetime_like_data( + dropna, values, datetime1, datetime2, unique_nulls_fixture, unique_nulls_fixture2 +): + # 3729 + df = pd.DataFrame( + { + "values": [1, 2, 3, 4, 5, 6], + "dt": [ + datetime1, + unique_nulls_fixture, + datetime2, + unique_nulls_fixture2, + datetime1, + datetime1, + ], + } + ) + + if dropna: + indexes = [datetime1, datetime2] + else: + indexes = [datetime1, datetime2, np.nan] + + grouped = df.groupby("dt", dropna=dropna).agg({"values": sum}) + expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt")) + + tm.assert_frame_equal(grouped, expected) + + +@pytest.mark.parametrize( + "dropna, data, selected_data, levels", + [ + pytest.param( + False, + {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + ["a", "b", np.nan], + id="dropna_false_has_nan", + ), + pytest.param( + True, + {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0]}, + None, + id="dropna_true_has_nan", + ), + pytest.param( + # no nan in "groups"; dropna=True|False should be same. + False, + {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + None, + id="dropna_false_no_nan", + ), + pytest.param( + # no nan in "groups"; dropna=True|False should be same. + True, + {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]}, + {"values": [0, 1, 0, 0]}, + None, + id="dropna_true_no_nan", + ), + ], +) +def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels): + # GH 35889 + + df = pd.DataFrame(data) + gb = df.groupby("groups", dropna=dropna) + result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))})) + + mi_tuples = tuple(zip(data["groups"], selected_data["values"])) + mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None]) + # Since right now, by default MI will drop NA from levels when we create MI + # via `from_*`, so we need to add NA for level manually afterwards. + if not dropna and levels: + mi = mi.set_levels(levels, level="groups") + + expected = pd.DataFrame(selected_data, index=mi) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("input_index", [None, ["a"], ["a", "b"]]) +@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) +@pytest.mark.parametrize("series", [True, False]) +def test_groupby_dropna_with_multiindex_input(input_index, keys, series): + # GH#46783 + obj = pd.DataFrame( + { + "a": [1, np.nan], + "b": [1, 1], + "c": [2, 3], + } + ) + + expected = obj.set_index(keys) + if series: + expected = expected["c"] + elif input_index == ["a", "b"] and keys == ["a"]: + # Column b should not be aggregated + expected = expected[["c"]] + + if input_index is not None: + obj = obj.set_index(input_index) + gb = obj.groupby(keys, dropna=False) + if series: + gb = gb["c"] + result = gb.sum() + + tm.assert_equal(result, expected) + + +def test_groupby_nan_included(): + # GH 35646 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + grouped = df.groupby("group", dropna=False) + result = grouped.indices + dtype = np.intp + expected = { + "g1": np.array([0, 2], dtype=dtype), + "g2": np.array([3], dtype=dtype), + np.nan: np.array([1, 4], dtype=dtype), + } + for result_values, expected_values in zip(result.values(), expected.values()): + tm.assert_numpy_array_equal(result_values, expected_values) + assert np.isnan(list(result.keys())[2]) + assert list(result.keys())[0:2] == ["g1", "g2"] + + +def test_groupby_drop_nan_with_multi_index(): + # GH 39895 + df = pd.DataFrame([[np.nan, 0, 1]], columns=["a", "b", "c"]) + df = df.set_index(["a", "b"]) + result = df.groupby(["a", "b"], dropna=False).first() + expected = df + tm.assert_frame_equal(result, expected) + + +# sequence_index enumerates all strings made up of x, y, z of length 4 +@pytest.mark.parametrize("sequence_index", range(3**4)) +@pytest.mark.parametrize( + "dtype", + [ + None, + "UInt8", + "Int8", + "UInt16", + "Int16", + "UInt32", + "Int32", + "UInt64", + "Int64", + "Float32", + "Int64", + "Float64", + "category", + "string", + pytest.param( + "string[pyarrow]", + marks=pytest.mark.skipif( + pa_version_under1p01, reason="pyarrow is not installed" + ), + ), + "datetime64[ns]", + "period[d]", + "Sparse[float]", + ], +) +@pytest.mark.parametrize("test_series", [True, False]) +def test_no_sort_keep_na(request, sequence_index, dtype, test_series): + # GH#46584, GH#48794 + + # Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz" + # This sequence is used for the grouper. + sequence = "".join( + [{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)] + ) + + if dtype == "category" and "z" in sequence: + # Only xfail when nulls are present + msg = "dropna=False not correct for categorical, GH#48645" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + + # Unique values to use for grouper, depends on dtype + if dtype in ("string", "string[pyarrow]"): + uniques = {"x": "x", "y": "y", "z": pd.NA} + elif dtype in ("datetime64[ns]", "period[d]"): + uniques = {"x": "2016-01-01", "y": "2017-01-01", "z": pd.NA} + else: + uniques = {"x": 1, "y": 2, "z": np.nan} + + df = pd.DataFrame( + { + "key": pd.Series([uniques[label] for label in sequence], dtype=dtype), + "a": [0, 1, 2, 3], + } + ) + gb = df.groupby("key", dropna=False, sort=False) + if test_series: + gb = gb["a"] + result = gb.sum() + + # Manually compute the groupby sum, use the labels "x", "y", and "z" to avoid + # issues with hashing np.nan + summed = {} + for idx, label in enumerate(sequence): + summed[label] = summed.get(label, 0) + idx + if dtype == "category": + index = pd.CategoricalIndex( + [uniques[e] for e in summed], + list({uniques[k]: 0 for k in sequence if not pd.isnull(uniques[k])}), + name="key", + ) + elif isinstance(dtype, str) and dtype.startswith("Sparse"): + index = pd.Index( + pd.array([uniques[label] for label in summed], dtype=dtype), name="key" + ) + else: + index = pd.Index([uniques[label] for label in summed], dtype=dtype, name="key") + expected = pd.Series(summed.values(), index=index, name="a", dtype=None) + if not test_series: + expected = expected.to_frame() + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize("dtype", [object, None]) +def test_null_is_null_for_dtype( + sort, dtype, nulls_fixture, nulls_fixture2, test_series +): + # GH#48506 - groups should always result in using the null for the dtype + df = pd.DataFrame({"a": [1, 2]}) + groups = pd.Series([nulls_fixture, nulls_fixture2], dtype=dtype) + obj = df["a"] if test_series else df + gb = obj.groupby(groups, dropna=False, sort=sort) + result = gb.sum() + index = pd.Index([na_value_for_dtype(groups.dtype)]) + expected = pd.DataFrame({"a": [3]}, index=index) + if test_series: + tm.assert_series_equal(result, expected["a"]) + else: + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py new file mode 100644 index 00000000..7ffee412 --- /dev/null +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + NaT, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm + + +def test_group_shift_with_null_key(): + # This test is designed to replicate the segfault in issue #13813. + n_rows = 1200 + + # Generate a moderately large dataframe with occasional missing + # values in column `B`, and then group by [`A`, `B`]. This should + # force `-1` in `labels` array of `g.grouper.group_info` exactly + # at those places, where the group-by key is partially missing. + df = DataFrame( + [(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i % 3 and i < n_rows - 12 else np.nan) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1) + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_with_fill_value(): + # GH #24128 + n_rows = 24 + df = DataFrame( + [(i % 12, i % 3, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i < n_rows - 12 else 0) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1, fill_value=0) + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_lose_timezone(): + # GH 30134 + now_dt = Timestamp.utcnow() + df = DataFrame({"a": [1, 1], "date": now_dt}) + result = df.groupby("a").shift(0).iloc[0] + expected = Series({"date": now_dt}, name=result.name) + tm.assert_series_equal(result, expected) + + +def test_group_diff_real_series(any_real_numpy_dtype): + df = DataFrame( + {"a": [1, 2, 3, 3, 2], "b": [1, 2, 3, 4, 5]}, + dtype=any_real_numpy_dtype, + ) + result = df.groupby("a")["b"].diff() + exp_dtype = "float" + if any_real_numpy_dtype in ["int8", "int16", "float32"]: + exp_dtype = "float32" + expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b") + tm.assert_series_equal(result, expected) + + +def test_group_diff_real_frame(any_real_numpy_dtype): + df = DataFrame( + { + "a": [1, 2, 3, 3, 2], + "b": [1, 2, 3, 4, 5], + "c": [1, 2, 3, 4, 6], + }, + dtype=any_real_numpy_dtype, + ) + result = df.groupby("a").diff() + exp_dtype = "float" + if any_real_numpy_dtype in ["int8", "int16", "float32"]: + exp_dtype = "float32" + expected = DataFrame( + { + "b": [np.nan, np.nan, np.nan, 1.0, 3.0], + "c": [np.nan, np.nan, np.nan, 1.0, 4.0], + }, + dtype=exp_dtype, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + [ + Timestamp("2013-01-01"), + Timestamp("2013-01-02"), + Timestamp("2013-01-03"), + ], + [Timedelta("5 days"), Timedelta("6 days"), Timedelta("7 days")], + ], +) +def test_group_diff_datetimelike(data): + df = DataFrame({"a": [1, 2, 2], "b": data}) + result = df.groupby("a")["b"].diff() + expected = Series([NaT, NaT, Timedelta("1 days")], name="b") + tm.assert_series_equal(result, expected) + + +def test_group_diff_bool(): + df = DataFrame({"a": [1, 2, 3, 3, 2], "b": [True, True, False, False, True]}) + result = df.groupby("a")["b"].diff() + expected = Series([np.nan, np.nan, np.nan, False, False], name="b") + tm.assert_series_equal(result, expected) + + +def test_group_diff_object_raises(object_dtype): + df = DataFrame( + {"a": ["foo", "bar", "bar"], "b": ["baz", "foo", "foo"]}, dtype=object_dtype + ) + with pytest.raises(TypeError, match=r"unsupported operand type\(s\) for -"): + df.groupby("a")["b"].diff() + + +def test_empty_shift_with_fill(): + # GH 41264, single-index check + df = DataFrame(columns=["a", "b", "c"]) + shifted = df.groupby(["a"]).shift(1) + shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0) + tm.assert_frame_equal(shifted, shifted_with_fill) + tm.assert_index_equal(shifted.index, shifted_with_fill.index) + + +def test_multindex_empty_shift_with_fill(): + # GH 41264, multi-index check + df = DataFrame(columns=["a", "b", "c"]) + shifted = df.groupby(["a", "b"]).shift(1) + shifted_with_fill = df.groupby(["a", "b"]).shift(1, fill_value=0) + tm.assert_frame_equal(shifted, shifted_with_fill) + tm.assert_index_equal(shifted.index, shifted_with_fill.index) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py new file mode 100644 index 00000000..fddf0c86 --- /dev/null +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -0,0 +1,113 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.tests.groupby import get_groupby_method_args + + +@pytest.mark.parametrize( + "obj", + [ + tm.SubclassedDataFrame({"A": np.arange(0, 10)}), + tm.SubclassedSeries(np.arange(0, 10), name="A"), + ], +) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_groupby_preserves_subclass(obj, groupby_func): + # GH28330 -- preserve subclass through groupby operations + + if isinstance(obj, Series) and groupby_func in {"corrwith"}: + pytest.skip(f"Not applicable for Series and {groupby_func}") + # TODO(2.0) Remove after pad/backfill deprecation enforced + groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + warn = FutureWarning if groupby_func in ("mad", "tshift") else None + + grouped = obj.groupby(np.arange(0, 10)) + + # Groups should preserve subclass type + assert isinstance(grouped.get_group(0), type(obj)) + + args = get_groupby_method_args(groupby_func, obj) + + with tm.assert_produces_warning(warn, match="is deprecated"): + result1 = getattr(grouped, groupby_func)(*args) + result2 = grouped.agg(groupby_func, *args) + + # Reduction or transformation kernels should preserve type + slices = {"ngroup", "cumcount", "size"} + if isinstance(obj, DataFrame) and groupby_func in slices: + assert isinstance(result1, tm.SubclassedSeries) + else: + assert isinstance(result1, type(obj)) + + # Confirm .agg() groupby operations return same results + if isinstance(result1, DataFrame): + tm.assert_frame_equal(result1, result2) + else: + tm.assert_series_equal(result1, result2) + + +def test_groupby_preserves_metadata(): + # GH-37343 + custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]}) + assert "testattr" in custom_df._metadata + custom_df.testattr = "hello" + for _, group_df in custom_df.groupby("c"): + assert group_df.testattr == "hello" + + # GH-45314 + def func(group): + assert isinstance(group, tm.SubclassedDataFrame) + assert hasattr(group, "testattr") + return group.testattr + + result = custom_df.groupby("c").apply(func) + expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c")) + tm.assert_series_equal(result, expected) + + def func2(group): + assert isinstance(group, tm.SubclassedSeries) + assert hasattr(group, "testattr") + return group.testattr + + custom_series = tm.SubclassedSeries([1, 2, 3]) + custom_series.testattr = "hello" + result = custom_series.groupby(custom_df["c"]).apply(func2) + tm.assert_series_equal(result, expected) + result = custom_series.groupby(custom_df["c"]).agg(func2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame]) +def test_groupby_resample_preserves_subclass(obj): + # GH28330 -- preserve subclass through groupby.resample() + + df = obj( + { + "Buyer": "Carl Carl Carl Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + df = df.set_index("Date") + + # Confirm groupby.resample() preserves dataframe type + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("Buyer").resample("5D").sum() + assert isinstance(result, obj) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py new file mode 100644 index 00000000..728575a8 --- /dev/null +++ b/pandas/tests/groupby/test_grouping.py @@ -0,0 +1,1026 @@ +""" test where we are determining what we are grouping, or getting groups """ + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) +from pandas.core.groupby.grouper import Grouping + +# selection +# -------------------------------- + + +class TestSelection: + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.groupby("A") + with pytest.raises(KeyError, match="\"Columns not found: 'C'\""): + g[["C"]] + + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] + + def test_groupby_duplicated_column_errormsg(self): + # GH7511 + df = DataFrame( + columns=["A", "B", "A", "C"], data=[range(4), range(2, 6), range(0, 8, 2)] + ) + + msg = "Grouper for 'A' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + df.groupby("A") + with pytest.raises(ValueError, match=msg): + df.groupby(["A", "B"]) + + grouped = df.groupby("B") + c = grouped.count() + assert c.columns.nlevels == 1 + assert c.columns.size == 3 + + def test_column_select_via_attr(self, df): + result = df.groupby("A").C.sum() + expected = df.groupby("A")["C"].sum() + tm.assert_series_equal(result, expected) + + df["mean"] = 1.5 + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").mean() + expected = df.groupby("A").agg(np.mean) + tm.assert_frame_equal(result, expected) + + def test_getitem_list_of_columns(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")[["C", "D"]].mean() + result2 = df.groupby("A")[df.columns[2:4]].mean() + + expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_getitem_numeric_column_names(self): + # GH #13731 + df = DataFrame( + { + 0: list("abcd") * 2, + 2: np.random.randn(8), + 4: np.random.randn(8), + 6: np.random.randn(8), + } + ) + result = df.groupby(0)[df.columns[1:3]].mean() + result2 = df.groupby(0)[[2, 4]].mean() + + expected = df.loc[:, [0, 2, 4]].groupby(0).mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby(0)[2, 4].mean() + + def test_getitem_single_list_of_columns(self, df): + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby("A")["C", "D"].mean() + + def test_getitem_single_column(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")["C"].mean() + + as_frame = df.loc[:, ["A", "C"]].groupby("A").mean() + as_series = as_frame.iloc[:, 0] + expected = as_series + + tm.assert_series_equal(result, expected) + + def test_indices_grouped_by_tuple_with_lambda(self): + # GH 36158 + df = DataFrame( + {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + ) + + gb = df.groupby("Tuples") + gb_lambda = df.groupby(lambda x: df.iloc[x, 0]) + + expected = gb.indices + result = gb_lambda.indices + + tm.assert_dict_equal(result, expected) + + +# grouping +# -------------------------------- + + +class TestGrouping: + @pytest.mark.parametrize( + "index", + [ + tm.makeFloatIndex, + tm.makeStringIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ], + ) + def test_grouper_index_types(self, index): + # related GH5375 + # groupby misbehaving when using a Floatlike index + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB")) + + df.index = index(len(df)) + df.groupby(list("abcde"), group_keys=False).apply(lambda x: x) + + df.index = list(reversed(df.index.tolist())) + df.groupby(list("abcde"), group_keys=False).apply(lambda x: x) + + def test_grouper_multilevel_freq(self): + + # GH 7885 + # with level and freq specified in a pd.Grouper + from datetime import ( + date, + timedelta, + ) + + d0 = date.today() - timedelta(days=14) + dates = date_range(d0, date.today()) + date_index = MultiIndex.from_product([dates, dates], names=["foo", "bar"]) + df = DataFrame(np.random.randint(0, 100, 225), index=date_index) + + # Check string level + expected = ( + df.reset_index() + .groupby([pd.Grouper(key="foo", freq="W"), pd.Grouper(key="bar", freq="W")]) + .sum() + ) + # reset index changes columns dtype to object + expected.columns = Index([0], dtype="int64") + + result = df.groupby( + [pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + # Check integer level + result = df.groupby( + [pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + def test_grouper_creation_bug(self): + + # GH 8795 + df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]}) + g = df.groupby("A") + expected = g.sum() + + g = df.groupby(pd.Grouper(key="A")) + result = g.sum() + tm.assert_frame_equal(result, expected) + + g = df.groupby(pd.Grouper(key="A", axis=0)) + result = g.sum() + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x.sum()) + expected["A"] = [0, 2, 4] + expected = expected.loc[:, ["A", "B"]] + tm.assert_frame_equal(result, expected) + + # GH14334 + # pd.Grouper(key=...) may be passed in a list + df = DataFrame( + {"A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6]} + ) + # Group by single column + expected = df.groupby("A").sum() + g = df.groupby([pd.Grouper(key="A")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group by two columns + # using a combination of strings and Grouper objects + expected = df.groupby(["A", "B"]).sum() + + # Group with two Grouper objects + g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a string and a Grouper object + g = df.groupby(["A", pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a Grouper object and a string + g = df.groupby([pd.Grouper(key="A"), "B"]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # GH8866 + s = Series( + np.arange(8, dtype="int64"), + index=MultiIndex.from_product( + [list("ab"), range(2), date_range("20130101", periods=2)], + names=["one", "two", "three"], + ), + ) + result = s.groupby(pd.Grouper(level="three", freq="M")).sum() + expected = Series( + [28], + index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"), + ) + tm.assert_series_equal(result, expected) + + # just specifying a level breaks + result = s.groupby(pd.Grouper(level="one")).sum() + expected = s.groupby(level="one").sum() + tm.assert_series_equal(result, expected) + + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3)] + ) + idx.names = ["outer", "inner"] + df_multi = DataFrame( + {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, + index=idx, + ) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index("outer") + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_single.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_single.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_levels_and_columns(self): + # GH9344, GH9049 + idx_names = ["x", "y"] + idx = MultiIndex.from_tuples([(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) + df = DataFrame(np.arange(12).reshape(-1, 3), index=idx) + + by_levels = df.groupby(level=idx_names).mean() + # reset_index changes columns dtype to object + by_columns = df.reset_index().groupby(idx_names).mean() + + # without casting, by_columns.columns is object-dtype + by_columns.columns = by_columns.columns.astype(np.int64) + tm.assert_frame_equal(by_levels, by_columns) + + def test_groupby_categorical_index_and_columns(self, observed): + # GH18432, adapted for GH25871 + columns = ["A", "B", "A", "B"] + categories = ["B", "A"] + data = np.array( + [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int + ) + cat_columns = CategoricalIndex(columns, categories=categories, ordered=True) + df = DataFrame(data=data, columns=cat_columns) + result = df.groupby(axis=1, level=0, observed=observed).sum() + expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int) + expected_columns = CategoricalIndex( + categories, categories=categories, ordered=True + ) + expected = DataFrame(data=expected_data, columns=expected_columns) + tm.assert_frame_equal(result, expected) + + # test transposed version + df = DataFrame(data.T, index=cat_columns) + result = df.groupby(axis=0, level=0, observed=observed).sum() + expected = DataFrame(data=expected_data.T, index=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_grouper_getting_correct_binner(self): + + # GH 10063 + # using a non-time-based grouper and a time-based grouper + # and specifying levels + df = DataFrame( + {"A": 1}, + index=MultiIndex.from_product( + [list("ab"), date_range("20130101", periods=80)], names=["one", "two"] + ), + ) + result = df.groupby( + [pd.Grouper(level="one"), pd.Grouper(level="two", freq="M")] + ).sum() + expected = DataFrame( + {"A": [31, 28, 21, 31, 28, 21]}, + index=MultiIndex.from_product( + [list("ab"), date_range("20130101", freq="M", periods=3)], + names=["one", "two"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_grouper_iter(self, df): + assert sorted(df.groupby("A").grouper) == ["bar", "foo"] + + def test_empty_groups(self, df): + # see gh-1048 + with pytest.raises(ValueError, match="No group keys passed!"): + df.groupby([]) + + def test_groupby_grouper(self, df): + grouped = df.groupby("A") + + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(grouped.grouper).mean() + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_dict_mapping(self): + # GH #679 + from pandas import Series + + s = Series({"T1": 5}) + result = s.groupby({"T1": "T2"}).agg(sum) + expected = s.groupby(["T2"]).agg(sum) + tm.assert_series_equal(result, expected) + + s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd")) + mapping = {"a": 0, "b": 0, "c": 1, "d": 1} + + result = s.groupby(mapping).mean() + result2 = s.groupby(mapping).agg(np.mean) + expected = s.groupby([0, 0, 1, 1]).mean() + expected2 = s.groupby([0, 0, 1, 1]).mean() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, result2) + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "index", + [ + [0, 1, 2, 3], + ["a", "b", "c", "d"], + [Timestamp(2021, 7, 28 + i) for i in range(4)], + ], + ) + def test_groupby_series_named_with_tuple(self, frame_or_series, index): + # GH 42731 + obj = frame_or_series([1, 2, 3, 4], index=index) + groups = Series([1, 0, 1, 0], index=index, name=("a", "a")) + result = obj.groupby(groups).last() + expected = frame_or_series([4, 3]) + expected.index.name = ("a", "a") + tm.assert_equal(result, expected) + + def test_groupby_grouper_f_sanity_checked(self): + dates = date_range("01-Jan-2013", periods=12, freq="MS") + ts = Series(np.random.randn(12), index=dates) + + # GH3035 + # index.map is used to apply grouper to the index + # if it fails on the elements, map tries it on the entire index as + # a sequence. That can yield invalid results that cause trouble + # down the line. + # the surprise comes from using key[0:6] rather than str(key)[0:6] + # when the elements are Timestamp. + # the result is Index[0:6], very confusing. + + msg = r"Grouper result violates len\(labels\) == len\(data\)" + with pytest.raises(AssertionError, match=msg): + ts.groupby(lambda key: key[0:6]) + + def test_grouping_error_on_multidim_input(self, df): + msg = "Grouper for '' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + Grouping(df.index, df[["A", "A"]]) + + def test_multiindex_passthru(self): + + # GH 7997 + # regression from 0.14.1 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df.columns = MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) + + result = df.groupby(axis=1, level=[0, 1]).first() + tm.assert_frame_equal(result, df) + + def test_multiindex_negative_level(self, mframe): + # GH 13901 + result = mframe.groupby(level=-1).sum() + expected = mframe.groupby(level="second").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=-2).sum() + expected = mframe.groupby(level="first").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-2, -1]).sum() + expected = mframe.sort_index() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-1, "first"]).sum() + expected = mframe.groupby(level=["second", "first"]).sum() + tm.assert_frame_equal(result, expected) + + def test_multifunc_select_col_integer_cols(self, df): + df.columns = np.arange(len(df.columns)) + + # it works! + df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) + + def test_multiindex_columns_empty_level(self): + lst = [["count", "values"], ["to filter", ""]] + midx = MultiIndex.from_tuples(lst) + + df = DataFrame([[1, "A"]], columns=midx) + + grouped = df.groupby("to filter").groups + assert grouped["A"] == [0] + + grouped = df.groupby([("to filter", "")]).groups + assert grouped["A"] == [0] + + df = DataFrame([[1, "A"], [2, "B"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + assert result == expected + + df = DataFrame([[1, "A"], [2, "A"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + tm.assert_dict_equal(result, expected) + + def test_groupby_multiindex_tuple(self): + # GH 17979 + df = DataFrame( + [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], + columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), + ) + expected = df.groupby([("b", 1)]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df2 = DataFrame( + df.values, + columns=MultiIndex.from_arrays( + [["a", "b", "b", "c"], ["d", "d", "e", "e"]] + ), + ) + expected = df2.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) + expected = df3.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level(self, sort, mframe, df): + # GH 17537 + frame = mframe + deleveled = frame.reset_index() + + result0 = frame.groupby(level=0, sort=sort).sum() + result1 = frame.groupby(level=1, sort=sort).sum() + + expected0 = frame.groupby(deleveled["first"].values, sort=sort).sum() + expected1 = frame.groupby(deleveled["second"].values, sort=sort).sum() + + expected0.index.name = "first" + expected1.index.name = "second" + + assert result0.index.name == "first" + assert result1.index.name == "second" + + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + assert result0.index.name == frame.index.names[0] + assert result1.index.name == frame.index.names[1] + + # groupby level name + result0 = frame.groupby(level="first", sort=sort).sum() + result1 = frame.groupby(level="second", sort=sort).sum() + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + + # axis=1 + + result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() + result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() + tm.assert_frame_equal(result0, expected0.T) + tm.assert_frame_equal(result1, expected1.T) + + # raise exception for non-MultiIndex + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + df.groupby(level=1) + + def test_groupby_level_index_names(self, axis): + # GH4014 this used to raise ValueError since 'exp'>1 (in py2) + df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index( + "exp" + ) + if axis in (1, "columns"): + df = df.T + df.groupby(level="exp", axis=axis) + msg = f"level name foo is not the name of the {df._get_axis_name(axis)}" + with pytest.raises(ValueError, match=msg): + df.groupby(level="foo", axis=axis) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level_with_nas(self, sort): + # GH 17537 + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 22.0], index=[0, 1]) + tm.assert_series_equal(result, expected) + + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 18.0], index=[0.0, 1.0]) + tm.assert_series_equal(result, expected) + + def test_groupby_args(self, mframe): + # PR8618 and issue 8015 + frame = mframe + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby() + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby(by=None, level=None) + + @pytest.mark.parametrize( + "sort,labels", + [ + [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], + [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]], + ], + ) + def test_level_preserve_order(self, sort, labels, mframe): + # GH 17537 + grouped = mframe.groupby(level=0, sort=sort) + exp_labels = np.array(labels, np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_grouping_labels(self, mframe): + grouped = mframe.groupby(mframe.index.get_level_values(0)) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_list_grouper_with_nat(self): + # GH 14715 + df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")}) + df.iloc[-1] = pd.NaT + grouper = pd.Grouper(key="date", freq="AS") + + # Grouper in a list grouping + result = df.groupby([grouper]) + expected = {Timestamp("2011-01-01"): Index(list(range(364)))} + tm.assert_dict_equal(result.groups, expected) + + # Test case without a list + result = df.groupby(grouper) + expected = {Timestamp("2011-01-01"): 365} + tm.assert_dict_equal(result.groups, expected) + + @pytest.mark.parametrize( + "func,expected", + [ + ( + "transform", + Series(name=2, dtype=np.float64, index=Index([])), + ), + ( + "agg", + Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + ), + ( + "apply", + Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + ), + ], + ) + def test_evaluate_with_empty_groups(self, func, expected): + # 26208 + # test transform'ing empty groups + # (not testing other agg fns, because they return + # different index objects. + df = DataFrame({1: [], 2: []}) + g = df.groupby(1, group_keys=False) + result = getattr(g[2], func)(lambda x: x) + tm.assert_series_equal(result, expected) + + def test_groupby_empty(self): + # https://github.com/pandas-dev/pandas/issues/27190 + s = Series([], name="name", dtype="float64") + gr = s.groupby([]) + + result = gr.mean() + tm.assert_series_equal(result, s) + + # check group properties + assert len(gr.grouper.groupings) == 1 + tm.assert_numpy_array_equal( + gr.grouper.group_info[0], np.array([], dtype=np.dtype(np.intp)) + ) + + tm.assert_numpy_array_equal( + gr.grouper.group_info[1], np.array([], dtype=np.dtype(np.intp)) + ) + + assert gr.grouper.group_info[2] == 0 + + # check name + assert s.groupby(s).grouper.names == ["name"] + + def test_groupby_level_index_value_all_na(self): + # issue 20519 + df = DataFrame( + [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] + ).set_index(["A", "B"]) + result = df.groupby(level=["A", "B"]).sum() + expected = DataFrame( + data=[], + index=MultiIndex( + levels=[Index(["x"], dtype="object"), Index([], dtype="float64")], + codes=[[], []], + names=["A", "B"], + ), + columns=["C"], + dtype="int64", + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_multiindex_level_empty(self): + # https://github.com/pandas-dev/pandas/issues/31670 + df = DataFrame( + [[123, "a", 1.0], [123, "b", 2.0]], columns=["id", "category", "value"] + ) + df = df.set_index(["id", "category"]) + empty = df[df.value < 0] + result = empty.groupby("id").sum() + expected = DataFrame( + dtype="float64", columns=["value"], index=Int64Index([], name="id") + ) + tm.assert_frame_equal(result, expected) + + +# get_group +# -------------------------------- + + +class TestGetGroup: + def test_get_group(self): + # GH 5267 + # be datelike friendly + df = DataFrame( + { + "DATE": pd.to_datetime( + [ + "10-Oct-2013", + "10-Oct-2013", + "10-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + ] + ), + "label": ["foo", "foo", "bar", "foo", "foo", "bar"], + "VAL": [1, 2, 3, 4, 5, 6], + } + ) + + g = df.groupby("DATE") + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group(Timestamp(key).to_pydatetime()) + result3 = g.get_group(str(Timestamp(key))) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + g = df.groupby(["DATE", "label"]) + + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) + result3 = g.get_group((str(Timestamp(key[0])), key[1])) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + # must pass a same-length tuple with multiple keys + msg = "must supply a tuple to get_group with multiple grouping keys" + with pytest.raises(ValueError, match=msg): + g.get_group("foo") + with pytest.raises(ValueError, match=msg): + g.get_group("foo") + msg = "must supply a same-length tuple to get_group with multiple grouping keys" + with pytest.raises(ValueError, match=msg): + g.get_group(("foo", "bar", "baz")) + + def test_get_group_empty_bins(self, observed): + + d = DataFrame([3, 1, 7, 6]) + bins = [0, 5, 10, 15] + g = d.groupby(pd.cut(d[0], bins), observed=observed) + + # TODO: should prob allow a str of Interval work as well + # IOW '(0, 5]' + result = g.get_group(pd.Interval(0, 5)) + expected = DataFrame([3, 1], index=[0, 1]) + tm.assert_frame_equal(result, expected) + + msg = r"Interval\(10, 15, closed='right'\)" + with pytest.raises(KeyError, match=msg): + g.get_group(pd.Interval(10, 15)) + + def test_get_group_grouped_by_tuple(self): + # GH 8121 + df = DataFrame([[(1,), (1, 2), (1,), (1, 2)]], index=["ids"]).T + gr = df.groupby("ids") + expected = DataFrame({"ids": [(1,), (1,)]}, index=[0, 2]) + result = gr.get_group((1,)) + tm.assert_frame_equal(result, expected) + + dt = pd.to_datetime(["2010-01-01", "2010-01-02", "2010-01-01", "2010-01-02"]) + df = DataFrame({"ids": [(x,) for x in dt]}) + gr = df.groupby("ids") + result = gr.get_group(("2010-01-01",)) + expected = DataFrame({"ids": [(dt[0],), (dt[0],)]}, index=[0, 2]) + tm.assert_frame_equal(result, expected) + + def test_get_group_grouped_by_tuple_with_lambda(self): + # GH 36158 + df = DataFrame( + {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + ) + + gb = df.groupby("Tuples") + gb_lambda = df.groupby(lambda x: df.iloc[x, 0]) + + expected = gb.get_group(list(gb.groups.keys())[0]) + result = gb_lambda.get_group(list(gb_lambda.groups.keys())[0]) + + tm.assert_frame_equal(result, expected) + + def test_groupby_with_empty(self): + index = pd.DatetimeIndex(()) + data = () + series = Series(data, index, dtype=object) + grouper = pd.Grouper(freq="D") + grouped = series.groupby(grouper) + assert next(iter(grouped), None) is None + + def test_groupby_with_single_column(self): + df = DataFrame({"a": list("abssbab")}) + tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) + # GH 13530 + exp = DataFrame(index=Index(["a", "b", "s"], name="a")) + tm.assert_frame_equal(df.groupby("a").count(), exp) + tm.assert_frame_equal(df.groupby("a").sum(), exp) + tm.assert_frame_equal(df.groupby("a").nth(1), exp) + + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = DataFrame( + [["foo", "bar", "B", 1], ["foo", "bar", "B", 2], ["foo", "baz", "C", 3]], + columns=["first", "second", "third", "one"], + ) + df = df.set_index(["first", "second"]) + df = df.groupby(["first", "second", "third"]).size() + assert df.loc[("foo", "bar", "B")] == 2 + assert df.loc[("foo", "baz", "C")] == 1 + + +# groups & iteration +# -------------------------------- + + +class TestIteration: + def test_groups(self, df): + grouped = df.groupby(["A"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k).all() + + grouped = df.groupby(["A", "B"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k[0]).all() + assert (df.loc[v]["B"] == k[1]).all() + + def test_grouping_is_iterable(self, tsframe): + # this code path isn't used anywhere else + # not sure it's useful + grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year]) + + # test it works + for g in grouped.grouper.groupings[0]: + pass + + def test_multi_iter(self): + s = Series(np.arange(6)) + k1 = np.array(["a", "a", "a", "b", "b", "b"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + + grouped = s.groupby([k1, k2]) + + iterated = list(grouped) + expected = [ + ("a", "1", s[[0, 2]]), + ("a", "2", s[[1]]), + ("b", "1", s[[4]]), + ("b", "2", s[[3, 5]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_series_equal(three, e3) + + def test_multi_iter_frame(self, three_group): + k1 = np.array(["b", "b", "b", "a", "a", "a"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + df = DataFrame( + {"v1": np.random.randn(6), "v2": np.random.randn(6), "k1": k1, "k2": k2}, + index=["one", "two", "three", "four", "five", "six"], + ) + + grouped = df.groupby(["k1", "k2"]) + + # things get sorted! + iterated = list(grouped) + idx = df.index + expected = [ + ("a", "1", df.loc[idx[[4]]]), + ("a", "2", df.loc[idx[[3, 5]]]), + ("b", "1", df.loc[idx[[0, 2]]]), + ("b", "2", df.loc[idx[[1]]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_frame_equal(three, e3) + + # don't iterate through groups with no data + df["k1"] = np.array(["b", "b", "b", "a", "a", "a"]) + df["k2"] = np.array(["1", "1", "1", "2", "2", "2"]) + grouped = df.groupby(["k1", "k2"]) + groups = {key: gp for key, gp in grouped} + assert len(groups) == 2 + + # axis = 1 + three_levels = three_group.groupby(["A", "B", "C"]).mean() + grouped = three_levels.T.groupby(axis=1, level=(1, 2)) + for key, group in grouped: + pass + + def test_dictify(self, df): + dict(iter(df.groupby("A"))) + dict(iter(df.groupby(["A", "B"]))) + dict(iter(df["C"].groupby(df["A"]))) + dict(iter(df["C"].groupby([df["A"], df["B"]]))) + dict(iter(df.groupby("A")["C"])) + dict(iter(df.groupby(["A", "B"])["C"])) + + def test_groupby_with_small_elem(self): + # GH 8542 + # length=2 + df = DataFrame( + {"event": ["start", "start"], "change": [1234, 5678]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + df = DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0, 2], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + # length=3 + df = DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 3 + assert grouped.ngroups == 3 + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-08-31"), "start") in grouped.groups + + res = grouped.get_group((Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + res = grouped.get_group((Timestamp("2014-08-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[2], :]) + + def test_grouping_string_repr(self): + # GH 13394 + mi = MultiIndex.from_arrays([list("AAB"), list("aba")]) + df = DataFrame([[1, 2, 3]], columns=mi) + gr = df.groupby(df[("A", "a")]) + + result = gr.grouper.groupings[0].__repr__() + expected = "Grouping(('A', 'a'))" + assert result == expected diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 00000000..501a2198 --- /dev/null +++ b/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture(params=[["inner"], ["inner", "outer"]]) +def frame(request): + levels = request.param + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + s = df.set_index(["outer", "inner", "B"])["A"] + + return s + + +@pytest.mark.parametrize( + "key_strs,groupers", + [ + ("inner", pd.Grouper(level="inner")), # Index name + (["inner"], [pd.Grouper(level="inner")]), # List of index name + (["B", "inner"], ["B", pd.Grouper(level="inner")]), # Column and index + (["inner", "B"], [pd.Grouper(level="inner"), "B"]), # Index and column + ], +) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + warn = FutureWarning if "B" not in key_strs or "outer" in frame.columns else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "levels", + [ + "inner", + "outer", + "B", + ["inner"], + ["outer"], + ["B"], + ["inner", "outer"], + ["outer", "inner"], + ["inner", "outer", "B"], + ["B", "outer", "inner"], + ], +) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py new file mode 100644 index 00000000..06b77c8f --- /dev/null +++ b/pandas/tests/groupby/test_indexing.py @@ -0,0 +1,332 @@ +# Test GroupBy._positional_selector positional grouped indexing GH#42864 + +import random + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [0, [0, 1, 4]], + [2, [5]], + [5, []], + [-1, [3, 4, 7]], + [-2, [1, 6]], + [-6, []], + ], +) +def test_int(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test single integer + result = slice_test_grouped._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_slice(slice_test_df, slice_test_grouped): + # Test single slice + result = slice_test_grouped._positional_selector[0:3:2] + expected = slice_test_df.iloc[[0, 1, 4, 5]] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [[0, 2], [0, 1, 4, 5]], + [[0, 2, -1], [0, 1, 3, 4, 5, 7]], + [range(0, 3, 2), [0, 1, 4, 5]], + [{0, 2}, [0, 1, 4, 5]], + ], + ids=[ + "list", + "negative", + "range", + "set", + ], +) +def test_list(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test lists of integers and integer valued iterables + result = slice_test_grouped._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_ints(slice_test_df, slice_test_grouped): + # Test tuple of ints + result = slice_test_grouped._positional_selector[0, 2, -1] + expected = slice_test_df.iloc[[0, 1, 3, 4, 5, 7]] + + tm.assert_frame_equal(result, expected) + + +def test_slices(slice_test_df, slice_test_grouped): + # Test tuple of slices + result = slice_test_grouped._positional_selector[:2, -2:] + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + + +def test_mix(slice_test_df, slice_test_grouped): + # Test mixed tuple of ints and slices + result = slice_test_grouped._positional_selector[0, 1, -2:] + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [0, [0, 1, 4]], + [[0, 2, -1], [0, 1, 3, 4, 5, 7]], + [(slice(None, 2), slice(-2, None)), [0, 1, 2, 3, 4, 6, 7]], + ], +) +def test_as_index(slice_test_df, arg, expected_rows): + # Test the default as_index behaviour + result = slice_test_df.groupby("Group", sort=False)._positional_selector[arg] + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + + +def test_doc_examples(): + # Test the examples in the documentation + df = pd.DataFrame( + [["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], columns=["A", "B"] + ) + + grouped = df.groupby("A", as_index=False) + + result = grouped._positional_selector[1:2] + expected = pd.DataFrame([["a", 2], ["b", 5]], columns=["A", "B"], index=[1, 4]) + + tm.assert_frame_equal(result, expected) + + result = grouped._positional_selector[1, -1] + expected = pd.DataFrame( + [["a", 2], ["a", 3], ["b", 5]], columns=["A", "B"], index=[1, 2, 4] + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture() +def multiindex_data(): + ndates = 100 + nitems = 20 + dates = pd.date_range("20130101", periods=ndates, freq="D") + items = [f"item {i}" for i in range(nitems)] + + data = {} + for date in dates: + nitems_for_date = nitems - random.randint(0, 12) + levels = [ + (item, random.randint(0, 10000) / 100, random.randint(0, 10000) / 100) + for item in items[:nitems_for_date] + ] + levels.sort(key=lambda x: x[1]) + data[date] = levels + + return data + + +def _make_df_from_data(data): + rows = {} + for date in data: + for level in data[date]: + rows[(date, level[0])] = {"A": level[1], "B": level[2]} + + df = pd.DataFrame.from_dict(rows, orient="index") + df.index.names = ("Date", "Item") + return df + + +def test_multiindex(multiindex_data): + # Test the multiindex mentioned as the use-case in the documentation + df = _make_df_from_data(multiindex_data) + result = df.groupby("Date", as_index=False).nth(slice(3, -3)) + + sliced = {date: multiindex_data[date][3:-3] for date in multiindex_data} + expected = _make_df_from_data(sliced) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("arg", [1, 5, 30, 1000, -1, -5, -30, -1000]) +@pytest.mark.parametrize("method", ["head", "tail"]) +@pytest.mark.parametrize("simulated", [True, False]) +def test_against_head_and_tail(arg, method, simulated): + # Test gives the same results as grouped head and tail + n_groups = 100 + n_rows_per_group = 30 + + data = { + "group": [ + f"group {g}" for j in range(n_rows_per_group) for g in range(n_groups) + ], + "value": [ + f"group {g} row {j}" + for j in range(n_rows_per_group) + for g in range(n_groups) + ], + } + df = pd.DataFrame(data) + grouped = df.groupby("group", as_index=False) + size = arg if arg >= 0 else n_rows_per_group + arg + + if method == "head": + result = grouped._positional_selector[:arg] + + if simulated: + indices = [] + for j in range(size): + for i in range(n_groups): + if j * n_groups + i < n_groups * n_rows_per_group: + indices.append(j * n_groups + i) + + expected = df.iloc[indices] + + else: + expected = grouped.head(arg) + + else: + result = grouped._positional_selector[-arg:] + + if simulated: + indices = [] + for j in range(size): + for i in range(n_groups): + if (n_rows_per_group + j - size) * n_groups + i >= 0: + indices.append((n_rows_per_group + j - size) * n_groups + i) + + expected = df.iloc[indices] + + else: + expected = grouped.tail(arg) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("start", [None, 0, 1, 10, -1, -10]) +@pytest.mark.parametrize("stop", [None, 0, 1, 10, -1, -10]) +@pytest.mark.parametrize("step", [None, 1, 5]) +def test_against_df_iloc(start, stop, step): + # Test that a single group gives the same results as DataFrame.iloc + n_rows = 30 + + data = { + "group": ["group 0"] * n_rows, + "value": list(range(n_rows)), + } + df = pd.DataFrame(data) + grouped = df.groupby("group", as_index=False) + + result = grouped._positional_selector[start:stop:step] + expected = df.iloc[start:stop:step] + + tm.assert_frame_equal(result, expected) + + +def test_series(): + # Test grouped Series + ser = pd.Series([1, 2, 3, 4, 5], index=["a", "a", "a", "b", "b"]) + grouped = ser.groupby(level=0) + result = grouped._positional_selector[1:2] + expected = pd.Series([2, 5], index=["a", "b"]) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("step", [1, 2, 3, 4, 5]) +def test_step(step): + # Test slice with various step values + data = [["x", f"x{i}"] for i in range(5)] + data += [["y", f"y{i}"] for i in range(4)] + data += [["z", f"z{i}"] for i in range(3)] + df = pd.DataFrame(data, columns=["A", "B"]) + + grouped = df.groupby("A", as_index=False) + + result = grouped._positional_selector[::step] + + data = [["x", f"x{i}"] for i in range(0, 5, step)] + data += [["y", f"y{i}"] for i in range(0, 4, step)] + data += [["z", f"z{i}"] for i in range(0, 3, step)] + + index = [0 + i for i in range(0, 5, step)] + index += [5 + i for i in range(0, 4, step)] + index += [9 + i for i in range(0, 3, step)] + + expected = pd.DataFrame(data, columns=["A", "B"], index=index) + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture() +def column_group_df(): + return pd.DataFrame( + [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], + columns=["A", "B", "C", "D", "E", "F", "G"], + ) + + +def test_column_axis(column_group_df): + g = column_group_df.groupby(column_group_df.iloc[1], axis=1) + result = g._positional_selector[1:-1] + expected = column_group_df.iloc[:, [1, 3]] + + tm.assert_frame_equal(result, expected) + + +def test_columns_on_iter(): + # GitHub issue #44821 + df = pd.DataFrame({k: range(10) for k in "ABC"}) + + # Group-by and select columns + cols = ["A", "B"] + for _, dg in df.groupby(df.A < 4)[cols]: + tm.assert_index_equal(dg.columns, pd.Index(cols)) + assert "C" not in dg.columns + + +@pytest.mark.parametrize("func", [list, pd.Index, pd.Series, np.array]) +def test_groupby_duplicated_columns(func): + # GH#44924 + df = pd.DataFrame( + { + "A": [1, 2], + "B": [3, 3], + "C": ["G", "G"], + } + ) + result = df.groupby("C")[func(["A", "B", "A"])].mean() + expected = pd.DataFrame( + [[1.5, 3.0, 1.5]], columns=["A", "B", "A"], index=pd.Index(["G"], name="C") + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_get_nonexisting_groups(): + # GH#32492 + df = pd.DataFrame( + data={ + "A": ["a1", "a2", None], + "B": ["b1", "b2", "b1"], + "val": [1, 2, 3], + } + ) + grps = df.groupby(by=["A", "B"]) + + msg = "('a2', 'b1')" + with pytest.raises(KeyError, match=msg): + grps.get_group(("a2", "b1")) diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py new file mode 100644 index 00000000..24abbd0f --- /dev/null +++ b/pandas/tests/groupby/test_libgroupby.py @@ -0,0 +1,284 @@ +import numpy as np +import pytest + +from pandas._libs import groupby as libgroupby +from pandas._libs.groupby import ( + group_cumprod_float64, + group_cumsum, + group_mean, + group_var, +) + +from pandas.core.dtypes.common import ensure_platform_int + +from pandas import isna +import pandas._testing as tm + + +class GroupVarTestMixin: + def test_group_var_generic_1d(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 1))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(15, 1).astype(self.dtype) + labels = np.tile(np.arange(5), (3,)).astype("intp") + + expected_out = ( + np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2 + )[:, np.newaxis] + expected_counts = counts + 3 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_1d_flat_labels(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((1, 1))).astype(self.dtype) + counts = np.zeros(1, dtype="int64") + values = 10 * prng.rand(5, 1).astype(self.dtype) + labels = np.zeros(5, dtype="intp") + + expected_out = np.array([[values.std(ddof=1) ** 2]]) + expected_counts = counts + 5 + + self.algo(out, counts, values, labels) + + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_all_finite(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + labels = np.tile(np.arange(5), (2,)).astype("intp") + + expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_some_nan(self): + prng = np.random.RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + values[:, 1] = np.nan + labels = np.tile(np.arange(5), (2,)).astype("intp") + + expected_out = np.vstack( + [ + values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2, + np.nan * np.ones(5), + ] + ).T.astype(self.dtype) + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + tm.assert_almost_equal(out, expected_out, rtol=0.5e-06) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_constant(self): + # Regression test from GH 10448. + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype) + labels = np.zeros(3, dtype="intp") + + self.algo(out, counts, values, labels) + + assert counts[0] == 3 + assert out[0, 0] >= 0 + tm.assert_almost_equal(out[0, 0], 0.0) + + +class TestGroupVarFloat64(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(group_var) + dtype = np.float64 + rtol = 1e-5 + + def test_group_var_large_inputs(self): + prng = np.random.RandomState(1234) + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = (prng.rand(10**6) + 10**12).astype(self.dtype) + values.shape = (10**6, 1) + labels = np.zeros(10**6, dtype="intp") + + self.algo(out, counts, values, labels) + + assert counts[0] == 10**6 + tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3) + + +class TestGroupVarFloat32(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(group_var) + dtype = np.float32 + rtol = 1e-2 + + +@pytest.mark.parametrize("dtype", ["float32", "float64"]) +def test_group_ohlc(dtype): + obj = np.array(np.random.randn(20), dtype=dtype) + + bins = np.array([6, 12, 20]) + out = np.zeros((3, 4), dtype) + counts = np.zeros(len(out), dtype=np.int64) + labels = ensure_platform_int(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) + + func = libgroupby.group_ohlc + func(out, counts, obj[:, None], labels) + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) + + tm.assert_almost_equal(out, expected) + tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) + + obj[:6] = np.nan + func(out, counts, obj[:, None], labels) + expected[0] = np.nan + tm.assert_almost_equal(out, expected) + + +def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): + """ + Check a group transform that executes a cumulative function. + + Parameters + ---------- + pd_op : callable + The pandas cumulative function. + np_op : callable + The analogous one in NumPy. + dtype : type + The specified dtype of the data. + """ + is_datetimelike = False + + data = np.array([[1], [2], [3], [4]], dtype=dtype) + answer = np.zeros_like(data) + + labels = np.array([0, 0, 0, 0], dtype=np.intp) + ngroups = 1 + pd_op(answer, data, labels, ngroups, is_datetimelike) + + tm.assert_numpy_array_equal(np_op(data), answer[:, 0], check_dtype=False) + + +@pytest.mark.parametrize("np_dtype", ["int64", "uint64", "float32", "float64"]) +def test_cython_group_transform_cumsum(np_dtype): + # see gh-4095 + dtype = np.dtype(np_dtype).type + pd_op, np_op = group_cumsum, np.cumsum + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_cumprod(): + # see gh-4095 + dtype = np.float64 + pd_op, np_op = group_cumprod_float64, np.cumproduct + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_algos(): + # see gh-4095 + is_datetimelike = False + + # with nans + labels = np.array([0, 0, 0, 0, 0], dtype=np.intp) + ngroups = 1 + + data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64") + actual = np.zeros_like(data) + actual.fill(np.nan) + group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 2, 6, np.nan, 24], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + actual = np.zeros_like(data) + actual.fill(np.nan) + group_cumsum(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 3, 6, np.nan, 10], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + # timedelta + is_datetimelike = True + data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None] + actual = np.zeros_like(data, dtype="int64") + group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike) + expected = np.array( + [ + np.timedelta64(1, "ns"), + np.timedelta64(2, "ns"), + np.timedelta64(3, "ns"), + np.timedelta64(4, "ns"), + np.timedelta64(5, "ns"), + ] + ) + tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected) + + +def test_cython_group_mean_datetimelike(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.array([0], dtype="int64") + data = ( + np.array( + [np.timedelta64(2, "ns"), np.timedelta64(4, "ns"), np.timedelta64("NaT")], + dtype="m8[ns]", + )[:, None] + .view("int64") + .astype("float64") + ) + labels = np.zeros(len(data), dtype=np.intp) + + group_mean(actual, counts, data, labels, is_datetimelike=True) + + tm.assert_numpy_array_equal(actual[:, 0], np.array([3], dtype="float64")) + + +def test_cython_group_mean_wrong_min_count(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.zeros(1, dtype="int64") + data = np.zeros(1, dtype="float64")[:, None] + labels = np.zeros(1, dtype=np.intp) + + with pytest.raises(AssertionError, match="min_count"): + group_mean(actual, counts, data, labels, is_datetimelike=True, min_count=0) + + +def test_cython_group_mean_not_datetimelike_but_has_NaT_values(): + actual = np.zeros(shape=(1, 1), dtype="float64") + counts = np.array([0], dtype="int64") + data = ( + np.array( + [np.timedelta64("NaT"), np.timedelta64("NaT")], + dtype="m8[ns]", + )[:, None] + .view("int64") + .astype("float64") + ) + labels = np.zeros(len(data), dtype=np.intp) + + group_mean(actual, counts, data, labels, is_datetimelike=False) + + tm.assert_numpy_array_equal( + actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64") + ) diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py new file mode 100644 index 00000000..b26ee057 --- /dev/null +++ b/pandas/tests/groupby/test_min_max.py @@ -0,0 +1,244 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_max_min_non_numeric(): + # #2700 + aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]}) + + result = aa.groupby("nn").max() + assert "ss" in result + + result = aa.groupby("nn").max(numeric_only=False) + assert "ss" in result + + result = aa.groupby("nn").min() + assert "ss" in result + + result = aa.groupby("nn").min(numeric_only=False) + assert "ss" in result + + +def test_max_min_object_multiple_columns(using_array_manager): + # GH#41111 case where the aggregation is valid for some columns but not + # others; we split object blocks column-wise, consistent with + # DataFrame._reduce + + df = DataFrame( + { + "A": [1, 1, 2, 2, 3], + "B": [1, "foo", 2, "bar", False], + "C": ["a", "b", "c", "d", "e"], + } + ) + df._consolidate_inplace() # should already be consolidate, but double-check + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + gb = df.groupby("A") + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.max(numeric_only=False) + # "max" is valid for column "C" but not for "B" + ei = Index([1, 2, 3], name="A") + expected = DataFrame({"C": ["b", "d", "e"]}, index=ei) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.min(numeric_only=False) + # "min" is valid for column "C" but not for "B" + ei = Index([1, 2, 3], name="A") + expected = DataFrame({"C": ["a", "c", "e"]}, index=ei) + tm.assert_frame_equal(result, expected) + + +def test_min_date_with_nans(): + # GH26321 + dates = pd.to_datetime( + Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d" + ).dt.date + df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) + + result = df.groupby("b", as_index=False)["c"].min()["c"] + expected = pd.to_datetime( + Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" + ).dt.date + tm.assert_series_equal(result, expected) + + result = df.groupby("b")["c"].min() + expected.index.name = "b" + tm.assert_series_equal(result, expected) + + +def test_max_inat(): + # GH#40767 dont interpret iNaT as NaN + ser = Series([1, iNaT]) + gb = ser.groupby([1, 1]) + + result = gb.max(min_count=2) + expected = Series({1: 1}, dtype=np.int64) + tm.assert_series_equal(result, expected, check_exact=True) + + result = gb.min(min_count=2) + expected = Series({1: iNaT}, dtype=np.int64) + tm.assert_series_equal(result, expected, check_exact=True) + + # not enough entries -> gets masked to NaN + result = gb.min(min_count=3) + expected = Series({1: np.nan}) + tm.assert_series_equal(result, expected, check_exact=True) + + +def test_max_inat_not_all_na(): + # GH#40767 dont interpret iNaT as NaN + + # make sure we dont round iNaT+1 to iNaT + ser = Series([1, iNaT, 2, iNaT + 1]) + gb = ser.groupby([1, 2, 3, 3]) + result = gb.min(min_count=2) + + # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy + expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1}) + tm.assert_series_equal(result, expected, check_exact=True) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_column(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a")["b"], func)() + idx = Int64Index([1, 2], name="a") + expected = Series(periods, index=idx, name="b") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_frame(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a"), func)() + idx = Int64Index([1, 2], name="a") + expected = DataFrame({"b": periods}, index=idx) + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_numeric_object_dtype(): + # https://github.com/pandas-dev/pandas/issues/39329 + # simplified case: multiple object columns where one is all-NaN + # -> gets split as the all-NaN is inferred as float + df = DataFrame( + {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4}, + ).astype(object) + result = df.groupby("key").min() + expected = DataFrame( + {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]} + ).set_index("key") + tm.assert_frame_equal(result, expected) + + # same but with numbers + df = DataFrame( + {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)}, + ).astype(object) + result = df.groupby("key").min() + expected = DataFrame( + {"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]} + ).set_index("key") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_aggregate_categorical_lost_index(func: str): + # GH: 28641 groupby drops index, when grouping over categorical column with min/max + ds = Series(["b"], dtype="category").cat.as_ordered() + df = DataFrame({"A": [1997], "B": ds}) + result = df.groupby("A").agg({"B": func}) + expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A")) + + # ordered categorical dtype should be preserved + expected["B"] = expected["B"].astype(ds.dtype) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32", "boolean"]) +def test_groupby_min_max_nullable(dtype): + if dtype == "Int64": + # GH#41743 avoid precision loss + ts = 1618556707013635762 + elif dtype == "boolean": + ts = 0 + else: + ts = 4.0 + + df = DataFrame({"id": [2, 2], "ts": [ts, ts + 1]}) + df["ts"] = df["ts"].astype(dtype) + + gb = df.groupby("id") + + result = gb.min() + expected = df.iloc[:1].set_index("id") + tm.assert_frame_equal(result, expected) + + res_max = gb.max() + expected_max = df.iloc[1:].set_index("id") + tm.assert_frame_equal(res_max, expected_max) + + result2 = gb.min(min_count=3) + expected2 = DataFrame({"ts": [pd.NA]}, index=expected.index, dtype=dtype) + tm.assert_frame_equal(result2, expected2) + + res_max2 = gb.max(min_count=3) + tm.assert_frame_equal(res_max2, expected2) + + # Case with NA values + df2 = DataFrame({"id": [2, 2, 2], "ts": [ts, pd.NA, ts + 1]}) + df2["ts"] = df2["ts"].astype(dtype) + gb2 = df2.groupby("id") + + result3 = gb2.min() + tm.assert_frame_equal(result3, expected) + + res_max3 = gb2.max() + tm.assert_frame_equal(res_max3, expected_max) + + result4 = gb2.min(min_count=100) + tm.assert_frame_equal(result4, expected2) + + res_max4 = gb2.max(min_count=100) + tm.assert_frame_equal(res_max4, expected2) + + +def test_min_max_nullable_uint64_empty_group(): + # don't raise NotImplementedError from libgroupby + cat = pd.Categorical([0] * 10, categories=[0, 1]) + df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))}) + gb = df.groupby("A") + + res = gb.min() + + idx = pd.CategoricalIndex([0, 1], dtype=cat.dtype, name="A") + expected = DataFrame({"B": pd.array([0, pd.NA], dtype="UInt64")}, index=idx) + tm.assert_frame_equal(res, expected) + + res = gb.max() + expected.iloc[0, 0] = 9 + tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py new file mode 100644 index 00000000..76da8dfe --- /dev/null +++ b/pandas/tests/groupby/test_missing.py @@ -0,0 +1,155 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_column_index_name_lost_fill_funcs(func): + # GH: 29764 groupby loses index sometimes + df = DataFrame( + [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], + columns=Index(["type", "a", "b"], name="idx"), + ) + df_grouped = df.groupby(["type"])[["a", "b"]] + result = getattr(df_grouped, func)().columns + expected = Index(["a", "b"], name="idx") + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_fill_duplicate_column_names(func): + # GH: 25610 ValueError with duplicate column names + df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) + df2 = DataFrame({"field1": [1, np.nan, 4]}) + df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) + expected = DataFrame( + [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] + ) + result = getattr(df_grouped, func)() + tm.assert_frame_equal(result, expected) + + +def test_ffill_missing_arguments(): + # GH 14955 + df = DataFrame({"a": [1, 2], "b": [1, 1]}) + with pytest.raises(ValueError, match="Must specify a fill"): + df.groupby("b").fillna() + + +@pytest.mark.parametrize( + "method, expected", [("ffill", [None, "a", "a"]), ("bfill", ["a", "a", None])] +) +def test_fillna_with_string_dtype(method, expected): + # GH 40250 + df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]}) + grp = df.groupby("b") + result = grp.fillna(method=method) + expected = DataFrame({"a": pd.array(expected, dtype="string")}) + tm.assert_frame_equal(result, expected) + + +def test_fill_consistency(): + + # GH9221 + # pass thru keyword arguments to the generated wrapper + # are set if the passed kw is None (only) + df = DataFrame( + index=pd.MultiIndex.from_product( + [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] + ), + columns=Index(["1", "2"], name="id"), + ) + df["1"] = [ + np.nan, + 1, + np.nan, + np.nan, + 11, + np.nan, + np.nan, + 2, + np.nan, + np.nan, + 22, + np.nan, + ] + df["2"] = [ + np.nan, + 3, + np.nan, + np.nan, + 33, + np.nan, + np.nan, + 4, + np.nan, + np.nan, + 44, + np.nan, + ] + + expected = df.groupby(level=0, axis=0).fillna(method="ffill") + result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["ffill", "bfill"]) +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("has_nan_group", [True, False]) +def test_ffill_handles_nan_groups(dropna, method, has_nan_group): + # GH 34725 + + df_without_nan_rows = DataFrame([(1, 0.1), (2, 0.2)]) + + ridx = [-1, 0, -1, -1, 1, -1] + df = df_without_nan_rows.reindex(ridx).reset_index(drop=True) + + group_b = np.nan if has_nan_group else "b" + df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3) + + grouped = df.groupby(by="group_col", dropna=dropna) + result = getattr(grouped, method)(limit=None) + + expected_rows = { + ("ffill", True, True): [-1, 0, 0, -1, -1, -1], + ("ffill", True, False): [-1, 0, 0, -1, 1, 1], + ("ffill", False, True): [-1, 0, 0, -1, 1, 1], + ("ffill", False, False): [-1, 0, 0, -1, 1, 1], + ("bfill", True, True): [0, 0, -1, -1, -1, -1], + ("bfill", True, False): [0, 0, -1, 1, 1, -1], + ("bfill", False, True): [0, 0, -1, 1, 1, -1], + ("bfill", False, False): [0, 0, -1, 1, 1, -1], + } + + ridx = expected_rows.get((method, dropna, has_nan_group)) + expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True) + # columns are a 'take' on df.columns, which are object dtype + expected.columns = expected.columns.astype(object) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("min_count, value", [(2, np.nan), (-1, 1.0)]) +@pytest.mark.parametrize("func", ["first", "last", "max", "min"]) +def test_min_count(func, min_count, value): + # GH#37821 + df = DataFrame({"a": [1] * 3, "b": [1, np.nan, np.nan], "c": [np.nan] * 3}) + result = getattr(df.groupby("a"), func)(min_count=min_count) + expected = DataFrame({"b": [value], "c": [np.nan]}, index=Index([1], name="a")) + tm.assert_frame_equal(result, expected) + + +def test_indices_with_missing(): + # GH 9304 + df = DataFrame({"a": [1, 1, np.nan], "b": [2, 3, 4], "c": [5, 6, 7]}) + g = df.groupby(["a", "b"]) + result = g.indices + expected = {(1.0, 2): np.array([0]), (1.0, 3): np.array([1])} + assert result == expected diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py new file mode 100644 index 00000000..187c8007 --- /dev/null +++ b/pandas/tests/groupby/test_nth.py @@ -0,0 +1,843 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + isna, +) +import pandas._testing as tm + + +def test_first_last_nth(df): + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + nth = grouped.nth(0) + tm.assert_frame_equal(nth, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(-1) + tm.assert_frame_equal(nth, expected) + + nth = grouped.nth(1) + expected = df.loc[[2, 3], ["B", "C", "D"]].copy() + expected.index = Index(["foo", "bar"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # it works! + grouped["B"].first() + grouped["B"].last() + grouped["B"].nth(0) + + df.loc[df["A"] == "foo", "B"] = np.nan + assert isna(grouped["B"].first()["foo"]) + assert isna(grouped["B"].last()["foo"]) + assert isna(grouped["B"].nth(0)["foo"]) + + # v0.14.0 whatsnew + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.first() + expected = df.iloc[[1, 2]].set_index("A") + tm.assert_frame_equal(result, expected) + + expected = df.iloc[[1, 2]].set_index("A") + result = g.nth(0, dropna="any") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_na_object(method, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") + result = getattr(groups, method)() + + if method == "first": + values = [1, 3] + else: + values = [2, 3] + + values = np.array(values, dtype=result["b"].dtype) + idx = Index([1, 2], name="a") + expected = DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index", [0, -1]) +def test_nth_with_na_object(index, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby("a") + result = groups.nth(index) + + if index == 0: + values = [1, 3] + else: + values = [2, nulls_fixture] + + values = np.array(values, dtype=result["b"].dtype) + idx = Index([1, 2], name="a") + expected = DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("method", ["first", "last"]) +@pytest.mark.parametrize( + "df, expected", + [ + ( + DataFrame({"id": "a", "value": [None, "foo", np.nan]}), + DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")), + ), + ( + DataFrame({"id": "a", "value": [np.nan]}, dtype=object), + DataFrame({"value": [None]}, index=Index(["a"], name="id")), + ), + ], +) +def test_first_last_with_None_expanded(method, df, expected): + # GH 32800, 38286 + result = getattr(df.groupby("id"), method)() + tm.assert_frame_equal(result, expected) + + +def test_first_last_nth_dtypes(df_mixed_floats): + + df = df_mixed_floats.copy() + df["E"] = True + df["F"] = 1 + + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(1) + expected = df.loc[[3, 2], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # GH 2763, first/last shifting dtypes + idx = list(range(10)) + idx.append(9) + s = Series(data=range(11), index=idx, name="IntCol") + assert s.dtype == "int64" + f = s.groupby(level=0).first() + assert f.dtype == "int64" + + +def test_first_last_nth_nan_dtype(): + # GH 33591 + df = DataFrame({"data": ["A"], "nans": Series([np.nan], dtype=object)}) + + grouped = df.groupby("data") + expected = df.set_index("data").nans + tm.assert_series_equal(grouped.nans.first(), expected) + tm.assert_series_equal(grouped.nans.last(), expected) + tm.assert_series_equal(grouped.nans.nth(-1), expected) + tm.assert_series_equal(grouped.nans.nth(0), expected) + + +def test_first_strings_timestamps(): + # GH 11244 + test = DataFrame( + { + Timestamp("2012-01-01 00:00:00"): ["a", "b"], + Timestamp("2012-01-02 00:00:00"): ["c", "d"], + "name": ["e", "e"], + "aaaa": ["f", "g"], + } + ) + result = test.groupby("name").first() + expected = DataFrame( + [["a", "c", "f"]], + columns=Index([Timestamp("2012-01-01"), Timestamp("2012-01-02"), "aaaa"]), + index=Index(["e"], name="name"), + ) + tm.assert_frame_equal(result, expected) + + +def test_nth(): + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(1), df.iloc[[1]].set_index("A")) + tm.assert_frame_equal(g.nth(2), df.loc[[]].set_index("A")) + tm.assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index("A")) + tm.assert_frame_equal(g.nth(-3), df.loc[[]].set_index("A")) + tm.assert_series_equal(g.B.nth(0), df.set_index("A").B.iloc[[0, 2]]) + tm.assert_series_equal(g.B.nth(1), df.set_index("A").B.iloc[[1]]) + tm.assert_frame_equal(g[["B"]].nth(0), df.loc[[0, 2], ["A", "B"]].set_index("A")) + + exp = df.set_index("A") + tm.assert_frame_equal(g.nth(0, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(-1, dropna="any"), exp.iloc[[1, 2]]) + + exp["B"] = np.nan + tm.assert_frame_equal(g.nth(7, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(2, dropna="any"), exp.iloc[[1, 2]]) + + # out of bounds, regression from 0.13.1 + # GH 6621 + df = DataFrame( + { + "color": {0: "green", 1: "green", 2: "red", 3: "red", 4: "red"}, + "food": {0: "ham", 1: "eggs", 2: "eggs", 3: "ham", 4: "pork"}, + "two": { + 0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997, + }, + "one": { + 0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997, + }, + } + ).set_index(["color", "food"]) + + result = df.groupby(level=0, as_index=False).nth(2) + expected = df.iloc[[-1]] + tm.assert_frame_equal(result, expected) + + result = df.groupby(level=0, as_index=False).nth(3) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + + # GH 7559 + # from the vbench + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype="int64") + s = df[1] + g = df[0] + expected = s.groupby(g).first() + expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(expected2, expected, check_names=False) + assert expected.name == 1 + assert expected2.name == 1 + + # validate first + v = s[g == 1].iloc[0] + assert expected.iloc[0] == v + assert expected2.iloc[0] == v + + # this is NOT the same as .first (as sorted is default!) + # as it keeps the order in the series (and not the group order) + # related GH 7287 + expected = s.groupby(g, sort=False).first() + result = s.groupby(g, sort=False).nth(0, dropna="all") + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="For a DataFrame"): + s.groupby(g, sort=False).nth(0, dropna=True) + + # doc example + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.B.nth(0, dropna="all") + expected = g.B.first() + tm.assert_series_equal(result, expected) + + # test multiple nth values + df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index("A")) + tm.assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index("A")) + + business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") + df = DataFrame(1, index=business_dates, columns=["a", "b"]) + # get the first, fourth and last two business days for each month + key = [df.index.year, df.index.month] + result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) + expected_dates = pd.to_datetime( + [ + "2014/4/1", + "2014/4/4", + "2014/4/29", + "2014/4/30", + "2014/5/1", + "2014/5/6", + "2014/5/29", + "2014/5/30", + "2014/6/2", + "2014/6/5", + "2014/6/27", + "2014/6/30", + ] + ) + expected = DataFrame(1, columns=["a", "b"], index=expected_dates) + tm.assert_frame_equal(result, expected) + + +def test_nth_multi_index(three_group): + # PR 9090, related to issue 8979 + # test nth on MultiIndex, should match .first() + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = grouped.first() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected_first, expected_last", + [ + ( + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + ), + ( + { + "id": ["A", "B", "A"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + ], + "foo": [1, 2, 3], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [1, 2], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [3, 2], + }, + ), + ], +) +def test_first_last_tz(data, expected_first, expected_last): + # GH15884 + # Test that the timezone is retained when calling first + # or last on groupby with as_index=False + + df = DataFrame(data) + + result = df.groupby("id", as_index=False).first() + expected = DataFrame(expected_first) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].first() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + result = df.groupby("id", as_index=False).last() + expected = DataFrame(expected_last) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].last() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + +@pytest.mark.parametrize( + "method, ts, alpha", + [ + ["first", Timestamp("2013-01-01", tz="US/Eastern"), "a"], + ["last", Timestamp("2013-01-02", tz="US/Eastern"), "b"], + ], +) +def test_first_last_tz_multi_column(method, ts, alpha): + # GH 21603 + category_string = Series(list("abc")).astype("category") + df = DataFrame( + { + "group": [1, 1, 2], + "category_string": category_string, + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + result = getattr(df.groupby("group"), method)() + expected = DataFrame( + { + "category_string": pd.Categorical( + [alpha, "c"], dtype=category_string.dtype + ), + "datetimetz": [ts, Timestamp("2013-01-03", tz="US/Eastern")], + }, + index=Index([1, 2], name="group"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([True, False], dtype="boolean"), + pd.array([1, 2], dtype="Int64"), + pd.to_datetime(["2020-01-01", "2020-02-01"]), + pd.to_timedelta([1, 2], unit="D"), + ], +) +@pytest.mark.parametrize("function", ["first", "last", "min", "max"]) +def test_first_last_extension_array_keeps_dtype(values, function): + # https://github.com/pandas-dev/pandas/issues/33071 + # https://github.com/pandas-dev/pandas/issues/32194 + df = DataFrame({"a": [1, 2], "b": values}) + grouped = df.groupby("a") + idx = Index([1, 2], name="a") + expected_series = Series(values, name="b", index=idx) + expected_frame = DataFrame({"b": values}, index=idx) + + result_series = getattr(grouped["b"], function)() + tm.assert_series_equal(result_series, expected_series) + + result_frame = grouped.agg({"b": function}) + tm.assert_frame_equal(result_frame, expected_frame) + + +def test_nth_multi_index_as_expected(): + # PR 9090, related to issue 8979 + # test nth on MultiIndex + three_group = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + } + ) + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = DataFrame( + {"C": ["dull", "dull", "dull", "dull"]}, + index=MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo"], ["one", "two", "one", "two"]], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, n, expected_rows", + [ + ("head", -1, [0]), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, [1]), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +@pytest.mark.parametrize("columns", [None, [], ["A"], ["B"], ["A", "B"]]) +@pytest.mark.parametrize("as_index", [True, False]) +def test_groupby_head_tail(op, n, expected_rows, columns, as_index): + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A", as_index=as_index) + expected = df.iloc[expected_rows] + if columns is not None: + g = g[columns] + expected = expected[columns] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, n, expected_cols", + [ + ("head", -1, [0]), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, [1]), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +def test_groupby_head_tail_axis_1(op, n, expected_cols): + # GH 9772 + df = DataFrame( + [[1, 2, 3], [1, 4, 5], [2, 6, 7], [3, 8, 9]], columns=["A", "B", "C"] + ) + g = df.groupby([0, 0, 1], axis=1) + expected = df.iloc[:, expected_cols] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) + + +def test_group_selection_cache(): + # GH 12839 nth, head, and tail should return same result consistently + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + expected = df.iloc[[0, 2]].set_index("A") + + g = df.groupby("A") + result1 = g.head(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.tail(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.head(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.tail(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + +def test_nth_empty(): + # GH 16064 + df = DataFrame(index=[0], columns=["a", "b", "c"]) + result = df.groupby("a").nth(10) + expected = DataFrame(index=Index([], name="a"), columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["a", "b"]).nth(10) + expected = DataFrame( + index=MultiIndex([[], []], [[], []], names=["a", "b"]), columns=["c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_nth_column_order(): + # GH 20760 + # Check that nth preserves column order + df = DataFrame( + [[1, "b", 100], [1, "a", 50], [1, "a", np.nan], [2, "c", 200], [2, "d", 150]], + columns=["A", "C", "B"], + ) + result = df.groupby("A").nth(0) + expected = DataFrame( + [["b", 100.0], ["c", 200.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").nth(-1, dropna="any") + expected = DataFrame( + [["a", 50.0], ["d", 150.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [None, "any", "all"]) +def test_nth_nan_in_grouper(dropna): + # GH 26011 + df = DataFrame( + [[np.nan, 0, 1], ["abc", 2, 3], [np.nan, 4, 5], ["def", 6, 7], [np.nan, 8, 9]], + columns=list("abc"), + ) + result = df.groupby("a").nth(0, dropna=dropna) + expected = DataFrame( + [[2, 3], [6, 7]], columns=list("bc"), index=Index(["abc", "def"], name="a") + ) + + tm.assert_frame_equal(result, expected) + + +def test_first_categorical_and_datetime_data_nat(): + # GH 20520 + df = DataFrame( + { + "group": ["first", "first", "second", "third", "third"], + "time": 5 * [np.datetime64("NaT")], + "categories": Series(["a", "b", "c", "a", "b"], dtype="category"), + } + ) + result = df.groupby("group").first() + expected = DataFrame( + { + "time": 3 * [np.datetime64("NaT")], + "categories": Series(["a", "c", "a"]).astype( + pd.CategoricalDtype(["a", "b", "c"]) + ), + } + ) + expected.index = Index(["first", "second", "third"], name="group") + tm.assert_frame_equal(result, expected) + + +def test_first_multi_key_groupby_categorical(): + # GH 22512 + df = DataFrame( + { + "A": [1, 1, 1, 2, 2], + "B": [100, 100, 200, 100, 100], + "C": ["apple", "orange", "mango", "mango", "orange"], + "D": ["jupiter", "mercury", "mars", "venus", "venus"], + } + ) + df = df.astype({"D": "category"}) + result = df.groupby(by=["A", "B"]).first() + expected = DataFrame( + { + "C": ["apple", "mango", "mango"], + "D": Series(["jupiter", "mars", "venus"]).astype( + pd.CategoricalDtype(["jupiter", "mars", "mercury", "venus"]) + ), + } + ) + expected.index = MultiIndex.from_tuples( + [(1, 100), (1, 200), (2, 100)], names=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last", "nth"]) +def test_groupby_last_first_nth_with_none(method, nulls_fixture): + # GH29645 + expected = Series(["y"]) + data = Series( + [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture], + index=[0, 0, 0, 0, 0], + ).groupby(level=0) + + if method == "nth": + result = getattr(data, method)(3) + else: + result = getattr(data, method)() + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "arg, expected_rows", + [ + [slice(None, 3, 2), [0, 1, 4, 5]], + [slice(None, -2), [0, 2, 5]], + [[slice(None, 2), slice(-2, None)], [0, 1, 2, 3, 4, 6, 7]], + [[0, 1, slice(-2, None)], [0, 1, 2, 3, 4, 6, 7]], + ], +) +def test_slice(slice_test_df, slice_test_grouped, arg, expected_rows): + # Test slices GH #42947 + + result = slice_test_grouped.nth[arg] + equivalent = slice_test_grouped.nth(arg) + expected = slice_test_df.iloc[expected_rows] + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(equivalent, expected) + + +def test_nth_indexed(slice_test_df, slice_test_grouped): + # Test index notation GH #44688 + + result = slice_test_grouped.nth[0, 1, -2:] + equivalent = slice_test_grouped.nth([0, 1, slice(-2, None)]) + expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]] + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(equivalent, expected) + + +def test_invalid_argument(slice_test_grouped): + # Test for error on invalid argument + + with pytest.raises(TypeError, match="Invalid index"): + slice_test_grouped.nth(3.14) + + +def test_negative_step(slice_test_grouped): + # Test for error on negative slice step + + with pytest.raises(ValueError, match="Invalid step"): + slice_test_grouped.nth(slice(None, None, -1)) + + +def test_np_ints(slice_test_df, slice_test_grouped): + # Test np ints work + + result = slice_test_grouped.nth(np.array([0, 1])) + expected = slice_test_df.iloc[[0, 1, 2, 3, 4]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_nth_with_column_axis(): + # GH43926 + df = DataFrame( + [ + [4, 5, 6], + [8, 8, 7], + ], + index=["z", "y"], + columns=["C", "B", "A"], + ) + result = df.groupby(df.iloc[1], axis=1).nth(0) + expected = DataFrame( + [ + [6, 4], + [7, 8], + ], + index=["z", "y"], + columns=[7, 8], + ) + expected.columns.name = "y" + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "start, stop, expected_values, expected_columns", + [ + (None, None, [0, 1, 2, 3, 4], [5, 5, 5, 6, 6]), + (None, 1, [0, 3], [5, 6]), + (None, 9, [0, 1, 2, 3, 4], [5, 5, 5, 6, 6]), + (None, -1, [0, 1, 3], [5, 5, 6]), + (1, None, [1, 2, 4], [5, 5, 6]), + (1, -1, [1], [5]), + (-1, None, [2, 4], [5, 6]), + (-1, 2, [4], [6]), + ], +) +@pytest.mark.parametrize("method", ["call", "index"]) +def test_nth_slices_with_column_axis( + start, stop, expected_values, expected_columns, method +): + df = DataFrame([range(5)], columns=[list("ABCDE")]) + gb = df.groupby([5, 5, 5, 6, 6], axis=1) + result = { + "call": lambda start, stop: gb.nth(slice(start, stop)), + "index": lambda start, stop: gb.nth[start:stop], + }[method](start, stop) + expected = DataFrame([expected_values], columns=expected_columns) + tm.assert_frame_equal(result, expected) + + +def test_head_tail_dropna_true(): + # GH#45089 + df = DataFrame( + [["a", "z"], ["b", np.nan], ["c", np.nan], ["c", np.nan]], columns=["X", "Y"] + ) + expected = DataFrame([["a", "z"]], columns=["X", "Y"]) + + result = df.groupby(["X", "Y"]).head(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"]).tail(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"]).nth(n=0).reset_index() + tm.assert_frame_equal(result, expected) + + +def test_head_tail_dropna_false(): + # GH#45089 + df = DataFrame([["a", "z"], ["b", np.nan], ["c", np.nan]], columns=["X", "Y"]) + expected = DataFrame([["a", "z"], ["b", np.nan], ["c", np.nan]], columns=["X", "Y"]) + + result = df.groupby(["X", "Y"], dropna=False).head(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"], dropna=False).tail(n=1) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["X", "Y"], dropna=False).nth(n=0).reset_index() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py new file mode 100644 index 00000000..4eb7b6a7 --- /dev/null +++ b/pandas/tests/groupby/test_numba.py @@ -0,0 +1,73 @@ +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +class TestEngine: + def test_cython_vs_numba_frame( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = df.groupby("a", sort=sort) + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func not in ("sum", "min", "max") + tm.assert_frame_equal(result, expected, check_dtype=check_dtype) + + def test_cython_vs_numba_getitem( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = df.groupby("a", sort=sort)["c"] + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func not in ("sum", "min", "max") + tm.assert_series_equal(result, expected, check_dtype=check_dtype) + + def test_cython_vs_numba_series( + self, sort, nogil, parallel, nopython, numba_supported_reductions + ): + func, kwargs = numba_supported_reductions + ser = Series(range(3), index=[1, 2, 1], name="foo") + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + gb = ser.groupby(level=0, sort=sort) + result = getattr(gb, func)( + engine="numba", engine_kwargs=engine_kwargs, **kwargs + ) + expected = getattr(gb, func)(**kwargs) + # check_dtype can be removed if GH 44952 is addressed + check_dtype = func not in ("sum", "min", "max") + tm.assert_series_equal(result, expected, check_dtype=check_dtype) + + def test_as_index_false_unsupported(self, numba_supported_reductions): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + gb = df.groupby("a", as_index=False) + with pytest.raises(NotImplementedError, match="as_index=False"): + getattr(gb, func)(engine="numba", **kwargs) + + def test_axis_1_unsupported(self, numba_supported_reductions): + func, kwargs = numba_supported_reductions + df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) + gb = df.groupby("a", axis=1) + with pytest.raises(NotImplementedError, match="axis=1"): + getattr(gb, func)(engine="numba", **kwargs) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py new file mode 100644 index 00000000..6656fd56 --- /dev/null +++ b/pandas/tests/groupby/test_nunique.py @@ -0,0 +1,184 @@ +import datetime as dt +from string import ascii_lowercase + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +@pytest.mark.slow +@pytest.mark.parametrize("n", 10 ** np.arange(2, 6)) +@pytest.mark.parametrize("m", [10, 100, 1000]) +@pytest.mark.parametrize("sort", [False, True]) +@pytest.mark.parametrize("dropna", [False, True]) +def test_series_groupby_nunique(n, m, sort, dropna): + def check_nunique(df, keys, as_index=True): + original_df = df.copy() + gr = df.groupby(keys, as_index=as_index, sort=sort) + left = gr["julie"].nunique(dropna=dropna) + + gr = df.groupby(keys, as_index=as_index, sort=sort) + right = gr["julie"].apply(Series.nunique, dropna=dropna) + if not as_index: + right = right.reset_index(drop=True) + + if as_index: + tm.assert_series_equal(left, right, check_names=False) + else: + tm.assert_frame_equal(left, right, check_names=False) + tm.assert_frame_equal(df, original_df) + + days = date_range("2015-08-23", periods=10) + + frame = DataFrame( + { + "jim": np.random.choice(list(ascii_lowercase), n), + "joe": np.random.choice(days, n), + "julie": np.random.randint(0, m, n), + } + ) + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + + frame.loc[1::17, "jim"] = None + frame.loc[3::37, "joe"] = None + frame.loc[7::19, "julie"] = None + frame.loc[8::19, "julie"] = None + frame.loc[9::19, "julie"] = None + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + check_nunique(frame, ["jim"], as_index=False) + check_nunique(frame, ["jim", "joe"], as_index=False) + + +def test_nunique(): + df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")}) + + expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]}) + result = df.groupby("A", as_index=False).nunique() + tm.assert_frame_equal(result, expected) + + # as_index + expected.index = list("abc") + expected.index.name = "A" + expected = expected.drop(columns="A") + result = df.groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + # with na + result = df.replace({"x": None}).groupby("A").nunique(dropna=False) + tm.assert_frame_equal(result, expected) + + # dropna + expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc")) + expected.index.name = "A" + result = df.replace({"x": None}).groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + +def test_nunique_with_object(): + # GH 11077 + data = DataFrame( + [ + [100, 1, "Alice"], + [200, 2, "Bob"], + [300, 3, "Charlie"], + [-400, 4, "Dan"], + [500, 5, "Edith"], + ], + columns=["amount", "id", "name"], + ) + + result = data.groupby(["id", "amount"])["name"].nunique() + index = MultiIndex.from_arrays([data.id, data.amount]) + expected = Series([1] * 5, name="name", index=index) + tm.assert_series_equal(result, expected) + + +def test_nunique_with_empty_series(): + # GH 12553 + data = Series(name="name", dtype=object) + result = data.groupby(level=0).nunique() + expected = Series(name="name", dtype="int64") + tm.assert_series_equal(result, expected) + + +def test_nunique_with_timegrouper(): + # GH 13453 + test = DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + Timestamp("2016-06-28 16:09:30"), + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ).set_index("time") + result = test.groupby(pd.Grouper(freq="h"))["data"].nunique() + expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(Series.nunique) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "key, data, dropna, expected", + [ + ( + ["x", "x", "x"], + [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "y", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "x", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ], +) +def test_nunique_with_NaT(key, data, dropna, expected): + # GH 27951 + df = DataFrame({"key": key, "data": data}) + result = df.groupby(["key"])["data"].nunique(dropna=dropna) + tm.assert_series_equal(result, expected) + + +def test_nunique_preserves_column_level_names(): + # GH 23222 + test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) + result = test.groupby([0, 0, 0]).nunique() + expected = DataFrame([2], columns=test.columns) + tm.assert_frame_equal(result, expected) + + +def test_nunique_transform_with_datetime(): + # GH 35109 - transform with nunique on datetimes results in integers + df = DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"]) + result = df.groupby([0, 0, 1])["date"].transform("nunique") + expected = Series([2, 2, 1], name="date") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py new file mode 100644 index 00000000..4f58bcb5 --- /dev/null +++ b/pandas/tests/groupby/test_pipe.py @@ -0,0 +1,84 @@ +import numpy as np + +import pandas as pd +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_pipe(): + # Test the pipe method of DataFrameGroupBy. + # Issue #17871 + + random_state = np.random.RandomState(1234567890) + + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": random_state.randn(8), + "C": random_state.randn(8), + } + ) + + def f(dfgb): + return dfgb.B.max() - dfgb.C.min().min() + + def square(srs): + return srs**2 + + # Note that the transformations are + # GroupBy -> Series + # Series -> Series + # This then chains the GroupBy.pipe and the + # NDFrame.pipe methods + result = df.groupby("A").pipe(f).pipe(square) + + index = Index(["bar", "foo"], dtype="object", name="A") + expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index) + + tm.assert_series_equal(expected, result) + + +def test_pipe_args(): + # Test passing args to the pipe method of DataFrameGroupBy. + # Issue #17871 + + df = DataFrame( + { + "group": ["A", "A", "B", "B", "C"], + "x": [1.0, 2.0, 3.0, 2.0, 5.0], + "y": [10.0, 100.0, 1000.0, -100.0, -1000.0], + } + ) + + def f(dfgb, arg1): + return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby( + dfgb.grouper + ) + + def g(dfgb, arg2): + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + return dfgb.sum() / dfgb.sum().sum() + arg2 + + def h(df, arg3): + return df.x + df.y - arg3 + + result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) + + # Assert the results here + index = Index(["A", "B", "C"], name="group") + expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index) + + tm.assert_series_equal(expected, result) + + # test SeriesGroupby.pipe + ser = pd.Series([1, 1, 2, 2, 3, 3]) + result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count()) + + expected = pd.Series([4, 8, 12], index=Int64Index([1, 2, 3])) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py new file mode 100644 index 00000000..2b7e71d9 --- /dev/null +++ b/pandas/tests/groupby/test_quantile.py @@ -0,0 +1,380 @@ +import numpy as np +import pytest + +from pandas._libs import lib + +import pandas as pd +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "a_vals,b_vals", + [ + # Ints + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), + ([1, 2, 3, 4], [4, 3, 2, 1]), + ([1, 2, 3, 4, 5], [4, 3, 2, 1]), + # Floats + ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]), + # Missing data + ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]), + ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]), + # Timestamps + ( + list(pd.date_range("1/1/18", freq="D", periods=5)), + list(pd.date_range("1/1/18", freq="D", periods=5))[::-1], + ), + # All NA + ([np.nan] * 5, [np.nan] * 5), + ], +) +@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1]) +def test_quantile(interpolation, a_vals, b_vals, q, request): + if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]: + request.node.add_marker( + pytest.mark.xfail( + reason="Unclear numpy expectation for nearest " + "result with equidistant data" + ) + ) + + a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation) + b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation) + + df = DataFrame( + {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals} + ) + + expected = DataFrame( + [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key") + ) + result = df.groupby("key").quantile(q, interpolation=interpolation) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_array(): + # https://github.com/pandas-dev/pandas/issues/27526 + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) + + index = pd.MultiIndex.from_product([[0, 1], [0.25]]) + expected = DataFrame({"A": [0.25, 2.50]}, index=index) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) + index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) + + result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) + expected = DataFrame( + {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array2(): + # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 + df = DataFrame( + np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC") + ) + result = df.groupby("A").quantile([0.3, 0.7]) + expected = DataFrame( + { + "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0], + "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0], + }, + index=pd.MultiIndex.from_product( + [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_no_sort(): + df = DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) + result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) + expected = DataFrame( + {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) + expected = DataFrame( + {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_multiple_levels(): + df = DataFrame( + {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]} + ) + result = df.groupby(["c", "d"]).quantile([0.25, 0.75]) + index = pd.MultiIndex.from_tuples( + [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)], + names=["c", "d", None], + ) + expected = DataFrame( + {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)]) +@pytest.mark.parametrize("groupby", [[0], [0, 1]]) +@pytest.mark.parametrize("q", [[0.5, 0.6]]) +def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q): + # GH30289 + nrow, ncol = frame_size + df = DataFrame(np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)) + + idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] + idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ + list(range(len(q))) * min(nrow, 4) + ] + expected_index = pd.MultiIndex( + levels=idx_levels, codes=idx_codes, names=groupby + [None] + ) + expected_values = [ + [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q + ] + expected_columns = [x for x in range(ncol) if x not in groupby] + expected = DataFrame( + expected_values, index=expected_index, columns=expected_columns + ) + result = df.groupby(groupby).quantile(q) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_raises(): + df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) + + with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + df.groupby("key").quantile() + + +def test_quantile_out_of_bounds_q_raises(): + # https://github.com/pandas-dev/pandas/issues/27470 + df = DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": range(6)}) + g = df.groupby([0, 0, 0, 1, 1, 1]) + with pytest.raises(ValueError, match="Got '50.0' instead"): + g.quantile(50) + + with pytest.raises(ValueError, match="Got '-1.0' instead"): + g.quantile(-1) + + +def test_quantile_missing_group_values_no_segfaults(): + # GH 28662 + data = np.array([1.0, np.nan, 1.0]) + df = DataFrame({"key": data, "val": range(3)}) + + # Random segfaults; would have been guaranteed in loop + grp = df.groupby("key") + for _ in range(100): + grp.quantile() + + +@pytest.mark.parametrize( + "key, val, expected_key, expected_val", + [ + ([1.0, np.nan, 3.0, np.nan], range(4), [1.0, 3.0], [0.0, 2.0]), + ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]), + (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]), + ([0], [42], [0], [42.0]), + ([], [], np.array([], dtype="float64"), np.array([], dtype="float64")), + ], +) +def test_quantile_missing_group_values_correct_results( + key, val, expected_key, expected_val +): + # GH 28662, GH 33200, GH 33569 + df = DataFrame({"key": key, "val": val}) + + expected = DataFrame( + expected_val, index=Index(expected_key, name="key"), columns=["val"] + ) + + grp = df.groupby("key") + + result = grp.quantile(0.5) + tm.assert_frame_equal(result, expected) + + result = grp.quantile() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([1, 0, None] * 2, dtype="Int64"), + pd.array([True, False, None] * 2, dtype="boolean"), + ], +) +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_nullable_array(values, q): + # https://github.com/pandas-dev/pandas/issues/33136 + df = DataFrame({"a": ["x"] * 3 + ["y"] * 3, "b": values}) + result = df.groupby("a")["b"].quantile(q) + + if isinstance(q, list): + idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None]) + true_quantiles = [0.0, 0.5, 1.0] + else: + idx = Index(["x", "y"], name="a") + true_quantiles = [0.5] + + expected = pd.Series(true_quantiles * 2, index=idx, name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +@pytest.mark.parametrize("numeric_only", [lib.no_default, True, False]) +def test_groupby_quantile_skips_invalid_dtype(q, numeric_only): + df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) + + if numeric_only is lib.no_default or numeric_only: + warn = FutureWarning if numeric_only is lib.no_default else None + msg = "The default value of numeric_only in DataFrameGroupBy.quantile" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby("a").quantile(q, numeric_only=numeric_only) + + expected = df.groupby("a")[["b"]].quantile(q) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises( + TypeError, match="'quantile' cannot be performed against 'object' dtypes!" + ): + df.groupby("a").quantile(q, numeric_only=numeric_only) + + +def test_groupby_quantile_NA_float(any_float_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_dtype) + result = df.groupby("x")["y"].quantile(0.5) + exp_index = Index([1.0], dtype=any_float_dtype, name="x") + expected = pd.Series([0.2], dtype=float, index=exp_index, name="y") + tm.assert_series_equal(expected, result) + + result = df.groupby("x")["y"].quantile([0.5, 0.75]) + expected = pd.Series( + [0.2] * 2, + index=pd.MultiIndex.from_product((exp_index, [0.5, 0.75]), names=["x", None]), + name="y", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_quantile_NA_int(any_int_ea_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_int_ea_dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series( + [3.5], dtype=float, index=Index([1], name="x", dtype=any_int_ea_dtype), name="y" + ) + tm.assert_series_equal(expected, result) + + result = df.groupby("x").quantile(0.5) + expected = DataFrame({"y": 3.5}, index=Index([1], name="x", dtype=any_int_ea_dtype)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Float64", "Float32"]) +def test_groupby_quantile_allNA_column(dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series( + [np.nan], dtype=float, index=Index([1.0], dtype=dtype), name="y" + ) + expected.index.name = "x" + tm.assert_series_equal(expected, result) + + +def test_groupby_timedelta_quantile(): + # GH: 29485 + df = DataFrame( + {"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]} + ) + result = df.groupby("group").quantile(0.99) + expected = DataFrame( + { + "value": [ + pd.Timedelta("0 days 00:00:00.990000"), + pd.Timedelta("0 days 00:00:02.990000"), + ] + }, + index=Index([1, 2], name="group"), + ) + tm.assert_frame_equal(result, expected) + + +def test_columns_groupby_quantile(): + # GH 33795 + df = DataFrame( + np.arange(12).reshape(3, -1), + index=list("XYZ"), + columns=pd.Series(list("ABAB"), name="col"), + ) + result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2]) + expected = DataFrame( + [ + [1.6, 0.4, 2.6, 1.4], + [5.6, 4.4, 6.6, 5.4], + [9.6, 8.4, 10.6, 9.4], + ], + index=list("XYZ"), + columns=pd.MultiIndex.from_tuples( + [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] + ), + ) + + tm.assert_frame_equal(result, expected) + + +def test_timestamp_groupby_quantile(): + # GH 33168 + df = DataFrame( + { + "timestamp": pd.date_range( + start="2020-04-19 00:00:00", freq="1T", periods=100, tz="UTC" + ).floor("1H"), + "category": list(range(1, 101)), + "value": list(range(101, 201)), + } + ) + + result = df.groupby("timestamp").quantile([0.2, 0.8]) + + expected = DataFrame( + [ + {"category": 12.8, "value": 112.8}, + {"category": 48.2, "value": 148.2}, + {"category": 68.8, "value": 168.8}, + {"category": 92.2, "value": 192.2}, + ], + index=pd.MultiIndex.from_tuples( + [ + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.2), + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.8), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.2), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.8), + ], + names=("timestamp", None), + ), + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py new file mode 100644 index 00000000..8bbe38d3 --- /dev/null +++ b/pandas/tests/groupby/test_rank.py @@ -0,0 +1,681 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + NaT, + Series, + concat, +) +import pandas._testing as tm + + +def test_rank_apply(): + lev1 = tm.rands_array(10, 100) + lev2 = tm.rands_array(10, 130) + lab1 = np.random.randint(0, 100, size=500) + lab2 = np.random.randint(0, 130, size=500) + + df = DataFrame( + { + "value": np.random.randn(500), + "key1": lev1.take(lab1), + "key2": lev2.take(lab2), + } + ) + + result = df.groupby(["key1", "key2"]).value.rank() + + expected = [piece.value.rank() for key, piece in df.groupby(["key1", "key2"])] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + result = df.groupby(["key1", "key2"]).value.rank(pct=True) + + expected = [ + piece.value.rank(pct=True) for key, piece in df.groupby(["key1", "key2"]) + ] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + np.array([2, 2, 8, 2, 6], dtype=dtype) + for dtype in ["i8", "i4", "i2", "i1", "u8", "u4", "u2", "u1", "f8", "f4", "f2"] + ] + + [ + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + ], + [ + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-08", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-06", tz="US/Pacific"), + ], + [ + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-08") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-06") - pd.Timestamp(0), + ], + [ + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-08").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-06").to_period("D"), + ], + ], + ids=lambda x: type(x[0]), +) +@pytest.mark.parametrize( + "ties_method,ascending,pct,exp", + [ + ("average", True, False, [2.0, 2.0, 5.0, 2.0, 4.0]), + ("average", True, True, [0.4, 0.4, 1.0, 0.4, 0.8]), + ("average", False, False, [4.0, 4.0, 1.0, 4.0, 2.0]), + ("average", False, True, [0.8, 0.8, 0.2, 0.8, 0.4]), + ("min", True, False, [1.0, 1.0, 5.0, 1.0, 4.0]), + ("min", True, True, [0.2, 0.2, 1.0, 0.2, 0.8]), + ("min", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("min", False, True, [0.6, 0.6, 0.2, 0.6, 0.4]), + ("max", True, False, [3.0, 3.0, 5.0, 3.0, 4.0]), + ("max", True, True, [0.6, 0.6, 1.0, 0.6, 0.8]), + ("max", False, False, [5.0, 5.0, 1.0, 5.0, 2.0]), + ("max", False, True, [1.0, 1.0, 0.2, 1.0, 0.4]), + ("first", True, False, [1.0, 2.0, 5.0, 3.0, 4.0]), + ("first", True, True, [0.2, 0.4, 1.0, 0.6, 0.8]), + ("first", False, False, [3.0, 4.0, 1.0, 5.0, 2.0]), + ("first", False, True, [0.6, 0.8, 0.2, 1.0, 0.4]), + ("dense", True, False, [1.0, 1.0, 3.0, 1.0, 2.0]), + ("dense", True, True, [1.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0]), + ("dense", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("dense", False, True, [3.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 2.0 / 3.0]), + ], +) +def test_rank_args(grps, vals, ties_method, ascending, pct, exp): + key = np.repeat(grps, len(vals)) + + orig_vals = vals + vals = list(vals) * len(grps) + if isinstance(orig_vals, np.ndarray): + vals = np.array(vals, dtype=orig_vals.dtype) + + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank(method=ties_method, ascending=ascending, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", [[-np.inf, -np.inf, np.nan, 1.0, np.nan, np.inf, np.inf]] +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,exp", + [ + ("average", True, "keep", [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]), + ("average", True, "top", [3.5, 3.5, 1.5, 5.0, 1.5, 6.5, 6.5]), + ("average", True, "bottom", [1.5, 1.5, 6.5, 3.0, 6.5, 4.5, 4.5]), + ("average", False, "keep", [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]), + ("average", False, "top", [6.5, 6.5, 1.5, 5.0, 1.5, 3.5, 3.5]), + ("average", False, "bottom", [4.5, 4.5, 6.5, 3.0, 6.5, 1.5, 1.5]), + ("min", True, "keep", [1.0, 1.0, np.nan, 3.0, np.nan, 4.0, 4.0]), + ("min", True, "top", [3.0, 3.0, 1.0, 5.0, 1.0, 6.0, 6.0]), + ("min", True, "bottom", [1.0, 1.0, 6.0, 3.0, 6.0, 4.0, 4.0]), + ("min", False, "keep", [4.0, 4.0, np.nan, 3.0, np.nan, 1.0, 1.0]), + ("min", False, "top", [6.0, 6.0, 1.0, 5.0, 1.0, 3.0, 3.0]), + ("min", False, "bottom", [4.0, 4.0, 6.0, 3.0, 6.0, 1.0, 1.0]), + ("max", True, "keep", [2.0, 2.0, np.nan, 3.0, np.nan, 5.0, 5.0]), + ("max", True, "top", [4.0, 4.0, 2.0, 5.0, 2.0, 7.0, 7.0]), + ("max", True, "bottom", [2.0, 2.0, 7.0, 3.0, 7.0, 5.0, 5.0]), + ("max", False, "keep", [5.0, 5.0, np.nan, 3.0, np.nan, 2.0, 2.0]), + ("max", False, "top", [7.0, 7.0, 2.0, 5.0, 2.0, 4.0, 4.0]), + ("max", False, "bottom", [5.0, 5.0, 7.0, 3.0, 7.0, 2.0, 2.0]), + ("first", True, "keep", [1.0, 2.0, np.nan, 3.0, np.nan, 4.0, 5.0]), + ("first", True, "top", [3.0, 4.0, 1.0, 5.0, 2.0, 6.0, 7.0]), + ("first", True, "bottom", [1.0, 2.0, 6.0, 3.0, 7.0, 4.0, 5.0]), + ("first", False, "keep", [4.0, 5.0, np.nan, 3.0, np.nan, 1.0, 2.0]), + ("first", False, "top", [6.0, 7.0, 1.0, 5.0, 2.0, 3.0, 4.0]), + ("first", False, "bottom", [4.0, 5.0, 6.0, 3.0, 7.0, 1.0, 2.0]), + ("dense", True, "keep", [1.0, 1.0, np.nan, 2.0, np.nan, 3.0, 3.0]), + ("dense", True, "top", [2.0, 2.0, 1.0, 3.0, 1.0, 4.0, 4.0]), + ("dense", True, "bottom", [1.0, 1.0, 4.0, 2.0, 4.0, 3.0, 3.0]), + ("dense", False, "keep", [3.0, 3.0, np.nan, 2.0, np.nan, 1.0, 1.0]), + ("dense", False, "top", [4.0, 4.0, 1.0, 3.0, 1.0, 2.0, 2.0]), + ("dense", False, "bottom", [3.0, 3.0, 4.0, 2.0, 4.0, 1.0, 1.0]), + ], +) +def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp): + # GH 20561 + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option + ) + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + np.array([2, 2, np.nan, 8, 2, 6, np.nan, np.nan], dtype=dtype) + for dtype in ["f8", "f4", "f2"] + ] + + [ + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + np.nan, + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + np.nan, + pd.Timestamp("2018-01-08", tz="US/Pacific"), + pd.Timestamp("2018-01-02", tz="US/Pacific"), + pd.Timestamp("2018-01-06", tz="US/Pacific"), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + np.nan, + pd.Timestamp("2018-01-08") - pd.Timestamp(0), + pd.Timestamp("2018-01-02") - pd.Timestamp(0), + pd.Timestamp("2018-01-06") - pd.Timestamp(0), + np.nan, + np.nan, + ], + [ + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + np.nan, + pd.Timestamp("2018-01-08").to_period("D"), + pd.Timestamp("2018-01-02").to_period("D"), + pd.Timestamp("2018-01-06").to_period("D"), + np.nan, + np.nan, + ], + ], + ids=lambda x: type(x[0]), +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,pct,exp", + [ + ( + "average", + True, + "keep", + False, + [2.0, 2.0, np.nan, 5.0, 2.0, 4.0, np.nan, np.nan], + ), + ( + "average", + True, + "keep", + True, + [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + False, + [4.0, 4.0, np.nan, 1.0, 4.0, 2.0, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + True, + [0.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan], + ), + ("min", True, "keep", False, [1.0, 1.0, np.nan, 5.0, 1.0, 4.0, np.nan, np.nan]), + ("min", True, "keep", True, [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]), + ( + "min", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ("min", False, "keep", True, [0.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ("max", True, "keep", False, [3.0, 3.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan]), + ("max", True, "keep", True, [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ( + "max", + False, + "keep", + False, + [5.0, 5.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ("max", False, "keep", True, [1.0, 1.0, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan]), + ( + "first", + True, + "keep", + False, + [1.0, 2.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan], + ), + ( + "first", + True, + "keep", + True, + [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + False, + [3.0, 4.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + True, + [0.6, 0.8, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + False, + [1.0, 1.0, np.nan, 3.0, 1.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + True, + [ + 1.0 / 3.0, + 1.0 / 3.0, + np.nan, + 3.0 / 3.0, + 1.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ( + "dense", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + False, + "keep", + True, + [ + 3.0 / 3.0, + 3.0 / 3.0, + np.nan, + 1.0 / 3.0, + 3.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ("average", True, "bottom", False, [2.0, 2.0, 7.0, 5.0, 2.0, 4.0, 7.0, 7.0]), + ( + "average", + True, + "bottom", + True, + [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875], + ), + ("average", False, "bottom", False, [4.0, 4.0, 7.0, 1.0, 4.0, 2.0, 7.0, 7.0]), + ( + "average", + False, + "bottom", + True, + [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875], + ), + ("min", True, "bottom", False, [1.0, 1.0, 6.0, 5.0, 1.0, 4.0, 6.0, 6.0]), + ( + "min", + True, + "bottom", + True, + [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75], + ), + ("min", False, "bottom", False, [3.0, 3.0, 6.0, 1.0, 3.0, 2.0, 6.0, 6.0]), + ( + "min", + False, + "bottom", + True, + [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75], + ), + ("max", True, "bottom", False, [3.0, 3.0, 8.0, 5.0, 3.0, 4.0, 8.0, 8.0]), + ("max", True, "bottom", True, [0.375, 0.375, 1.0, 0.625, 0.375, 0.5, 1.0, 1.0]), + ("max", False, "bottom", False, [5.0, 5.0, 8.0, 1.0, 5.0, 2.0, 8.0, 8.0]), + ( + "max", + False, + "bottom", + True, + [0.625, 0.625, 1.0, 0.125, 0.625, 0.25, 1.0, 1.0], + ), + ("first", True, "bottom", False, [1.0, 2.0, 6.0, 5.0, 3.0, 4.0, 7.0, 8.0]), + ( + "first", + True, + "bottom", + True, + [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.0], + ), + ("first", False, "bottom", False, [3.0, 4.0, 6.0, 1.0, 5.0, 2.0, 7.0, 8.0]), + ( + "first", + False, + "bottom", + True, + [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.0], + ), + ("dense", True, "bottom", False, [1.0, 1.0, 4.0, 3.0, 1.0, 2.0, 4.0, 4.0]), + ("dense", True, "bottom", True, [0.25, 0.25, 1.0, 0.75, 0.25, 0.5, 1.0, 1.0]), + ("dense", False, "bottom", False, [3.0, 3.0, 4.0, 1.0, 3.0, 2.0, 4.0, 4.0]), + ("dense", False, "bottom", True, [0.75, 0.75, 1.0, 0.25, 0.75, 0.5, 1.0, 1.0]), + ], +) +def test_rank_args_missing(grps, vals, ties_method, ascending, na_option, pct, exp): + key = np.repeat(grps, len(vals)) + + orig_vals = vals + vals = list(vals) * len(grps) + if isinstance(orig_vals, np.ndarray): + vals = np.array(vals, dtype=orig_vals.dtype) + + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize( + "pct,exp", [(False, [3.0, 3.0, 3.0, 3.0, 3.0]), (True, [0.6, 0.6, 0.6, 0.6, 0.6])] +) +def test_rank_resets_each_group(pct, exp): + df = DataFrame( + {"key": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], "val": [1] * 10} + ) + result = df.groupby("key").rank(pct=pct) + exp_df = DataFrame(exp * 2, columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize( + "dtype", ["int64", "int32", "uint64", "uint32", "float64", "float32"] +) +@pytest.mark.parametrize("upper", [True, False]) +def test_rank_avg_even_vals(dtype, upper): + if upper: + # use IntegerDtype/FloatingDtype + dtype = dtype[0].upper() + dtype[1:] + dtype = dtype.replace("Ui", "UI") + df = DataFrame({"key": ["a"] * 4, "val": [1] * 4}) + df["val"] = df["val"].astype(dtype) + assert df["val"].dtype == dtype + + result = df.groupby("key").rank() + exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=["val"]) + if upper: + exp_df = exp_df.astype("Float64") + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", [["bar", "bar", "foo", "bar", "baz"], ["bar", np.nan, "foo", np.nan, "baz"]] +) +def test_rank_object_dtype(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + mask = df["val"].isna() + + gb = df.groupby("key") + res = gb.rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) + + # construct our expected by using numeric values with the same ordering + if mask.any(): + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, np.nan, 2, np.nan, 1]}) + else: + df2 = DataFrame({"key": ["foo"] * 5, "val": [0, 0, 2, 0, 1]}) + + gb2 = df2.groupby("key") + alt = gb2.rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + tm.assert_frame_equal(res, alt) + + +@pytest.mark.parametrize("na_option", [True, "bad", 1]) +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["bar", "bar", "foo", "bar", "baz"], + ["bar", np.nan, "foo", np.nan, "baz"], + [1, np.nan, 2, np.nan, 3], + ], +) +def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + +def test_rank_empty_group(): + # see gh-22519 + column = "A" + df = DataFrame({"A": [0, 1, 0], "B": [1.0, np.nan, 2.0]}) + + result = df.groupby(column).B.rank(pct=True) + expected = Series([0.5, np.nan, 1.0], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby(column).rank(pct=True) + expected = DataFrame({"B": [0.5, np.nan, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_key,input_value,output_value", + [ + ([1, 2], [1, 1], [1.0, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]), + ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan]), + ], +) +def test_rank_zero_div(input_key, input_value, output_value): + # GH 23666 + df = DataFrame({"A": input_key, "B": input_value}) + + result = df.groupby("A").rank(method="dense", pct=True) + expected = DataFrame({"B": output_value}) + tm.assert_frame_equal(result, expected) + + +def test_rank_min_int(): + # GH-32859 + df = DataFrame( + { + "grp": [1, 1, 2], + "int_col": [ + np.iinfo(np.int64).min, + np.iinfo(np.int64).max, + np.iinfo(np.int64).min, + ], + "datetimelike": [NaT, datetime(2001, 1, 1), NaT], + } + ) + + result = df.groupby("grp").rank() + expected = DataFrame( + {"int_col": [1.0, 2.0, 1.0], "datetimelike": [np.NaN, 1.0, np.NaN]} + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("use_nan", [True, False]) +def test_rank_pct_equal_values_on_group_transition(use_nan): + # GH#40518 + fill_value = np.nan if use_nan else 3 + df = DataFrame( + [ + [-1, 1], + [-1, 2], + [1, fill_value], + [-1, fill_value], + ], + columns=["group", "val"], + ) + result = df.groupby(["group"])["val"].rank( + method="dense", + pct=True, + ) + if use_nan: + expected = Series([0.5, 1, np.nan, np.nan], name="val") + else: + expected = Series([1 / 3, 2 / 3, 1, 1], name="val") + + tm.assert_series_equal(result, expected) + + +def test_rank_multiindex(): + # GH27721 + df = concat( + { + "a": DataFrame({"col1": [3, 4], "col2": [1, 2]}), + "b": DataFrame({"col3": [5, 6], "col4": [7, 8]}), + }, + axis=1, + ) + + gb = df.groupby(level=0, axis=1) + result = gb.rank(axis=1) + + expected = concat( + [ + df["a"].rank(axis=1), + df["b"].rank(axis=1), + ], + axis=1, + keys=["a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_axis0_rank_axis1(): + # GH#41320 + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + res = gb.rank(axis=1) + + # This should match what we get when "manually" operating group-by-group + expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0) + tm.assert_frame_equal(res, expected) + + # check that we haven't accidentally written a case that coincidentally + # matches rank(axis=0) + alt = gb.rank(axis=0) + assert not alt.equals(expected) + + +def test_groupby_axis0_cummax_axis1(): + # case where groupby axis is 0 and axis keyword in transform is 1 + + # df has mixed dtype -> multiple blocks + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + cmax = gb.cummax(axis=1) + expected = df[[0, 1]].astype(np.float64) + expected[2] = expected[1] + tm.assert_frame_equal(cmax, expected) + + +def test_non_unique_index(): + # GH 16577 + df = DataFrame( + {"A": [1.0, 2.0, 3.0, np.nan], "value": 1.0}, + index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4, + ) + result = df.groupby([df.index, "A"]).value.rank(ascending=True, pct=True) + expected = Series( + [1.0, 1.0, 1.0, np.nan], + index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4, + name="value", + ) + tm.assert_series_equal(result, expected) + + +def test_rank_categorical(): + cat = pd.Categorical(["a", "a", "b", np.nan, "c", "b"], ordered=True) + cat2 = pd.Categorical([1, 2, 3, np.nan, 4, 5], ordered=True) + + df = DataFrame({"col1": [0, 1, 0, 1, 0, 1], "col2": cat, "col3": cat2}) + + gb = df.groupby("col1") + + res = gb.rank() + + expected = df.astype(object).groupby("col1").rank() + tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py new file mode 100644 index 00000000..9153fac0 --- /dev/null +++ b/pandas/tests/groupby/test_sample.py @@ -0,0 +1,144 @@ +import pytest + +from pandas import ( + DataFrame, + Index, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("n, frac", [(2, None), (None, 0.2)]) +def test_groupby_sample_balanced_groups_shape(n, frac): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=n, frac=frac) + values = [1] * 2 + [2] * 2 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=n, frac=frac) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_unbalanced_groups_shape(): + values = [1] * 10 + [2] * 20 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=5) + values = [1] * 5 + [2] * 5 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=5) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_index_value_spans_groups(): + values = [1] * 3 + [2] * 3 + df = DataFrame({"a": values, "b": values}, index=[1, 2, 2, 2, 2, 2]) + + result = df.groupby("a").sample(n=2) + values = [1] * 2 + [2] * 2 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=2) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_n_and_frac_raises(): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + msg = "Please enter a value for `frac` OR `n`, not both" + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(n=1, frac=1.0) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(n=1, frac=1.0) + + +def test_groupby_sample_frac_gt_one_without_replacement_raises(): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + msg = "Replace has to be set to `True` when upsampling the population `frac` > 1." + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(frac=1.5, replace=False) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(frac=1.5, replace=False) + + +@pytest.mark.parametrize("n", [-1, 1.5]) +def test_groupby_sample_invalid_n_raises(n): + df = DataFrame({"a": [1, 2], "b": [1, 2]}) + + if n < 0: + msg = "A negative number of rows requested. Please provide `n` >= 0." + else: + msg = "Only integers accepted as `n` values" + + with pytest.raises(ValueError, match=msg): + df.groupby("a").sample(n=n) + + with pytest.raises(ValueError, match=msg): + df.groupby("a")["b"].sample(n=n) + + +def test_groupby_sample_oversample(): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(frac=2.0, replace=True) + values = [1] * 20 + [2] * 20 + expected = DataFrame({"a": values, "b": values}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(frac=2.0, replace=True) + expected = Series(values, name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_without_n_or_frac(): + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values}) + + result = df.groupby("a").sample(n=None, frac=None) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=result.index) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=None, frac=None) + expected = Series([1, 2], name="b", index=result.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "index, expected_index", + [(["w", "x", "y", "z"], ["w", "w", "y", "y"]), ([3, 4, 5, 6], [3, 3, 5, 5])], +) +def test_groupby_sample_with_weights(index, expected_index): + # GH 39927 - tests for integer index needed + values = [1] * 2 + [2] * 2 + df = DataFrame({"a": values, "b": values}, index=Index(index)) + + result = df.groupby("a").sample(n=2, replace=True, weights=[1, 0, 1, 0]) + expected = DataFrame({"a": values, "b": values}, index=Index(expected_index)) + tm.assert_frame_equal(result, expected) + + result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0]) + expected = Series(values, name="b", index=Index(expected_index)) + tm.assert_series_equal(result, expected) + + +def test_groupby_sample_with_selections(): + # GH 39928 + values = [1] * 10 + [2] * 10 + df = DataFrame({"a": values, "b": values, "c": values}) + + result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None) + expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py new file mode 100644 index 00000000..92012436 --- /dev/null +++ b/pandas/tests/groupby/test_size.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + PeriodIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +def test_size(df, by): + grouped = df.groupby(by=by) + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + +@pytest.mark.parametrize( + "by", + [ + [0, 0, 0, 0], + [0, 1, 1, 1], + [1, 0, 1, 1], + [0, None, None, None], + pytest.param([None, None, None, None], marks=pytest.mark.xfail), + ], +) +def test_size_axis_1(df, axis_1, by, sort, dropna): + # GH#45715 + counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)} + if dropna: + counts = {key: value for key, value in counts.items() if key is not None} + expected = Series(counts) + if sort: + expected = expected.sort_index() + grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna) + result = grouped.size() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +@pytest.mark.parametrize("sort", [True, False]) +def test_size_sort(sort, by): + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) + left = df.groupby(by=by, sort=sort).size() + right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0]) + tm.assert_series_equal(left, right, check_names=False) + + +def test_size_series_dataframe(): + # https://github.com/pandas-dev/pandas/issues/11699 + df = DataFrame(columns=["A", "B"]) + out = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(df.groupby("A").size(), out) + + +def test_size_groupby_all_null(): + # https://github.com/pandas-dev/pandas/issues/23050 + # Assert no 'Value Error : Length of passed values is 2, index implies 0' + df = DataFrame({"A": [None, None]}) # all-null groups + result = df.groupby("A").size() + expected = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(result, expected) + + +def test_size_period_index(): + # https://github.com/pandas-dev/pandas/issues/34010 + ser = Series([1], index=PeriodIndex(["2000"], name="A", freq="D")) + grp = ser.groupby(level="A") + result = grp.size() + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize("as_index", [True, False]) +def test_size_on_categorical(as_index): + df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"]) + df["A"] = df["A"].astype("category") + result = df.groupby(["A", "B"], as_index=as_index).size() + + expected = DataFrame( + [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"] + ) + expected["A"] = expected["A"].astype("category") + if as_index: + expected = expected.set_index(["A", "B"])["size"].rename(None) + + tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py new file mode 100644 index 00000000..ae725cbb --- /dev/null +++ b/pandas/tests/groupby/test_timegrouper.py @@ -0,0 +1,928 @@ +""" test with the TimeGrouper / grouping with datetimes """ + +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest +import pytz + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + offsets, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper + + +@pytest.fixture +def frame_for_truncated_bingrouper(): + """ + DataFrame used by groupby_with_truncated_bingrouper, made into + a separate fixture for easier re-use in + test_groupby_apply_timegrouper_with_nat_apply_squeeze + """ + df = DataFrame( + { + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + Timestamp(2013, 9, 1, 13, 0), + Timestamp(2013, 9, 1, 13, 5), + Timestamp(2013, 10, 1, 20, 0), + Timestamp(2013, 10, 3, 10, 0), + pd.NaT, + Timestamp(2013, 9, 2, 14, 0), + ], + } + ) + return df + + +@pytest.fixture +def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper): + """ + GroupBy object such that gb.grouper is a BinGrouper and + len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) + + Aggregations on this groupby should have + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + + As either the index or an index level. + """ + df = frame_for_truncated_bingrouper + + tdg = Grouper(key="Date", freq="5D") + gb = df.groupby(tdg) + + # check we're testing the case we're interested in + assert len(gb.grouper.result_index) != len(gb.grouper.group_keys_seq) + + return gb + + +class TestGroupBy: + def test_groupby_with_timegrouper(self): + # GH 4161 + # TimeGrouper requires a sorted index + # also verifies that the resultant index has the correct name + df_original = DataFrame( + { + "Buyer": "Carl Carl Carl Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + + # GH 6908 change target column's order + df_reordered = df_original.sort_values(by="Quantity") + + for df in [df_original, df_reordered]: + df = df.set_index(["Date"]) + + expected = DataFrame( + {"Quantity": 0}, + index=date_range( + "20130901", "20131205", freq="5D", name="Date", inclusive="left" + ), + ) + expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") + + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result1 = df.resample("5D").sum() + tm.assert_frame_equal(result1, expected) + + df_sorted = df.sort_index() + with tm.assert_produces_warning(FutureWarning, match=msg): + result2 = df_sorted.groupby(Grouper(freq="5D")).sum() + tm.assert_frame_equal(result2, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result3 = df.groupby(Grouper(freq="5D")).sum() + tm.assert_frame_equal(result3, expected) + + @pytest.mark.parametrize("should_sort", [True, False]) + def test_groupby_with_timegrouper_methods(self, should_sort): + # GH 3881 + # make sure API of timegrouper conforms + + df = DataFrame( + { + "Branch": "A A A A A B".split(), + "Buyer": "Carl Mark Carl Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 8, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ) + + if should_sort: + df = df.sort_values(by="Quantity", ascending=False) + + df = df.set_index("Date", drop=False) + g = df.groupby(Grouper(freq="6M")) + assert g.group_keys + + assert isinstance(g.grouper, BinGrouper) + groups = g.groups + assert isinstance(groups, dict) + assert len(groups) == 3 + + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combination of timegrouper/reg groups + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + + for df in [df_original, df_sorted]: + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "Buyer": "Carl Mark Carl Joe".split(), + "Quantity": [1, 3, 9, 18], + "Date": [ + datetime(2013, 1, 1, 0, 0), + datetime(2013, 1, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 10, 1, 13, 0), + datetime(2013, 10, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 10, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + for df in [df_original, df_sorted]: + + expected = DataFrame( + { + "Buyer": "Carl Joe Mark Carl Joe".split(), + "Quantity": [6, 8, 3, 4, 10], + "Date": [ + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 2, 0, 0), + datetime(2013, 10, 2, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + warn_msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # passing the name + df = df.reset_index() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="'The grouper name foo is not found'"): + df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum() + + # passing the level + df = df.set_index("Date") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match="The level foo is not valid"): + df.groupby([Grouper(freq="1M", level="foo"), "Buyer"]).sum() + + # multi names + df = df.copy() + df["Date"] = df.index + offsets.MonthEnd(2) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # error as we have both a level and a name! + msg = "The Grouper cannot specify both a key and a level!" + with pytest.raises(ValueError, match=msg): + df.groupby( + [Grouper(freq="1M", key="Date", level="Date"), "Buyer"] + ).sum() + + # single groupers + expected = DataFrame( + [[31]], + columns=["Quantity"], + index=DatetimeIndex( + [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" + ), + ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby(Grouper(freq="1M")).sum() + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M")]).sum() + tm.assert_frame_equal(result, expected) + + expected.index = expected.index.shift(1) + assert expected.index.freq == offsets.MonthEnd() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby(Grouper(freq="1M", key="Date")).sum() + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date")]).sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) + def test_timegrouper_with_reg_groups_freq(self, freq): + # GH 6764 multiple grouping with/without sort + df = DataFrame( + { + "date": pd.to_datetime( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + 364, + 280, + 259, + 201, + 623, + 90, + 312, + 359, + 301, + 359, + 801, + ], + "cost1": [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12], + } + ).set_index("date") + + expected = ( + df.groupby("user_id")["whole_cost"] + .resample(freq) + .sum(min_count=1) # XXX + .dropna() + .reorder_levels(["date", "user_id"]) + .sort_index() + .astype("int64") + ) + expected.name = "whole_cost" + + result1 = ( + df.sort_index().groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum() + ) + tm.assert_series_equal(result1, expected) + + result2 = df.groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum() + tm.assert_series_equal(result2, expected) + + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame( + { + "Buyer": "Carl Joe Joe Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + df_reordered = df_original.sort_values(by="Quantity") + + # single grouping + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + dt_list = ["2013-09-30", "2013-10-31", "2013-12-31"] + + for df in [df_original, df_reordered]: + grouped = df.groupby(Grouper(freq="M", key="Date")) + for t, expected in zip(dt_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [ + df_original.iloc[[1]], + df_original.iloc[[3]], + df_original.iloc[[4]], + ] + g_list = [("Joe", "2013-09-30"), ("Carl", "2013-10-31"), ("Joe", "2013-12-31")] + + for df in [df_original, df_reordered]: + grouped = df.groupby(["Buyer", Grouper(freq="M", key="Date")]) + for (b, t), expected in zip(g_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group((b, dt)) + tm.assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index("Date") + df_reordered = df_original.sort_values(by="Quantity") + + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + + for df in [df_original, df_reordered]: + grouped = df.groupby(Grouper(freq="M")) + for t, expected in zip(dt_list, expected_list): + dt = Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_series(x): + return Series([x["value"].sum()], ("sum",)) + + expected = df.groupby(Grouper(key="date")).apply(sumfunc_series) + result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_series) + tm.assert_frame_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(Grouper(key="date")).apply(sumfunc_value) + result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value) + tm.assert_series_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_groupby_groups_datetimeindex(self): + # GH#1430 + periods = 1000 + ind = date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame( + {"high": np.arange(periods), "low": np.arange(periods)}, index=ind + ) + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + + # it works! + groups = grouped.groups + assert isinstance(list(groups.keys())[0], datetime) + + # GH#11442 + index = date_range("2015/01/01", periods=5, name="date") + df = DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) + result = df.groupby(level="date").groups + dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] + expected = { + Timestamp(date): DatetimeIndex([date], name="date") for date in dates + } + tm.assert_dict_equal(result, expected) + + grouped = df.groupby(level="date") + for date in dates: + result = grouped.get_group(date) + data = [[df.loc[date, "A"], df.loc[date, "B"]]] + expected_index = DatetimeIndex([date], name="date", freq="D") + expected = DataFrame(data, columns=list("AB"), index=expected_index) + tm.assert_frame_equal(result, expected) + + def test_groupby_groups_datetimeindex_tz(self): + # GH 3950 + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": dates, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific")) + + exp_idx1 = DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Pacific", + name="datetime", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["datetime", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = DatetimeIndex(dates, tz="Asia/Tokyo") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="Asia/Tokyo", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame( + [(3, np.datetime64("2012-07-03")), (3, np.datetime64("2012-07-04"))], + columns=["a", "date"], + ) + result = df.groupby("a").first() + assert result["date"][3] == Timestamp("2012-07-03") + + def test_groupby_multi_timezone(self): + + # combining multiple / different timezones yields UTC + + data = """0,2000-01-28 16:47:00,America/Chicago +1,2000-01-29 16:48:00,America/Chicago +2,2000-01-30 16:49:00,America/Los_Angeles +3,2000-01-31 16:50:00,America/Chicago +4,2000-01-01 16:50:00,America/New_York""" + + df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"]) + result = df.groupby("tz", group_keys=False).date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name) + ) + + expected = Series( + [ + Timestamp("2000-01-28 16:47:00-0600", tz="America/Chicago"), + Timestamp("2000-01-29 16:48:00-0600", tz="America/Chicago"), + Timestamp("2000-01-30 16:49:00-0800", tz="America/Los_Angeles"), + Timestamp("2000-01-31 16:50:00-0600", tz="America/Chicago"), + Timestamp("2000-01-01 16:50:00-0500", tz="America/New_York"), + ], + name="date", + dtype=object, + ) + tm.assert_series_equal(result, expected) + + tz = "America/Chicago" + res_values = df.groupby("tz").date.get_group(tz) + result = pd.to_datetime(res_values).dt.tz_localize(tz) + exp_values = Series( + ["2000-01-28 16:47:00", "2000-01-29 16:48:00", "2000-01-31 16:50:00"], + index=[0, 1, 3], + name="date", + ) + expected = pd.to_datetime(exp_values).dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_groupby_groups_periods(self): + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "period": [pd.Period(d, freq="H") for d in dates], + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + + exp_idx1 = pd.PeriodIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + freq="H", + name="period", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["period", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = pd.PeriodIndex(dates, freq="H") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = pd.PeriodIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + freq="H", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_groupby_first_datetime64(self): + df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) + df[1] = df[1].view("M8[ns]") + + assert issubclass(df[1].dtype.type, np.datetime64) + + result = df.groupby(level=0).first() + got_dt = result[1].dtype + assert issubclass(got_dt.type, np.datetime64) + + result = df[1].groupby(level=0).first() + got_dt = result.dtype + assert issubclass(got_dt.type, np.datetime64) + + def test_groupby_max_datetime64(self): + # GH 5869 + # datetimelike dtype conversion from int + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + expected = df.groupby("A")["A"].apply(lambda x: x.max()) + result = df.groupby("A")["A"].max() + tm.assert_series_equal(result, expected) + + def test_groupby_datetime64_32_bit(self): + # GH 6410 / numpy 4328 + # 32-bit under 1.9-dev indexing issue + + df = DataFrame({"A": range(2), "B": [Timestamp("2000-01-1")] * 2}) + result = df.groupby("A")["B"].transform(min) + expected = Series([Timestamp("2000-01-1")] * 2, name="B") + tm.assert_series_equal(result, expected) + + def test_groupby_with_timezone_selection(self): + # GH 11616 + # Test that column selection returns output in correct timezone. + np.random.seed(42) + df = DataFrame( + { + "factor": np.random.randint(0, 3, size=60), + "time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"), + } + ) + df1 = df.groupby("factor").max()["time"] + df2 = df.groupby("factor")["time"].max() + tm.assert_series_equal(df1, df2) + + def test_timezone_info(self): + # see gh-11682: Timezone info lost when broadcasting + # scalar datetime to DataFrame + + df = DataFrame({"a": [1], "b": [datetime.now(pytz.utc)]}) + assert df["b"][0].tzinfo == pytz.utc + df = DataFrame({"a": [1, 2, 3]}) + df["b"] = datetime.now(pytz.utc) + assert df["b"][0].tzinfo == pytz.utc + + def test_datetime_count(self): + df = DataFrame( + {"a": [1, 2, 3] * 2, "dates": date_range("now", periods=6, freq="T")} + ) + result = df.groupby("a").dates.count() + expected = Series([2, 2, 2], index=Index([1, 2, 3], name="a"), name="dates") + tm.assert_series_equal(result, expected) + + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + + df_test = DataFrame( + { + "dt": [ + np.nan, + "2015-07-24 10:10", + "2015-07-25 11:11", + "2015-07-23 12:12", + np.nan, + ], + "td": [np.nan, td(days=1), td(days=2), td(days=3), np.nan], + } + ) + df_test.dt = pd.to_datetime(df_test.dt) + df_test["group"] = "A" + df_ref = df_test[df_test.dt.notna()] + + grouped_test = df_test.groupby("group") + grouped_ref = df_ref.groupby("group") + + tm.assert_frame_equal(grouped_ref.max(), grouped_test.max()) + tm.assert_frame_equal(grouped_ref.min(), grouped_test.min()) + tm.assert_frame_equal(grouped_ref.first(), grouped_test.first()) + tm.assert_frame_equal(grouped_ref.last(), grouped_test.last()) + + def test_nunique_with_timegrouper_and_nat(self): + # GH 17575 + test = DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + pd.NaT, + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ) + + grouper = Grouper(key="time", freq="h") + result = test.groupby(grouper)["data"].nunique() + expected = test[test.time.notnull()].groupby(grouper)["data"].nunique() + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + def test_scalar_call_versus_list_call(self): + # Issue: 17530 + data_frame = { + "location": ["shanghai", "beijing", "shanghai"], + "time": Series( + ["2017-08-09 13:32:23", "2017-08-11 23:23:15", "2017-08-11 22:23:15"], + dtype="datetime64[ns]", + ), + "value": [1, 2, 3], + } + data_frame = DataFrame(data_frame).set_index("time") + grouper = Grouper(freq="D") + + grouped = data_frame.groupby(grouper) + result = grouped.count() + grouped = data_frame.groupby([grouper]) + expected = grouped.count() + + tm.assert_frame_equal(result, expected) + + def test_grouper_period_index(self): + # GH 32108 + periods = 2 + index = pd.period_range( + start="2018-01", periods=periods, freq="M", name="Month" + ) + period_series = Series(range(periods), index=index) + result = period_series.groupby(period_series.index.month).sum() + + expected = Series( + range(0, periods), index=Index(range(1, periods + 1), name=index.name) + ) + tm.assert_series_equal(result, expected) + + def test_groupby_apply_timegrouper_with_nat_dict_returns( + self, groupby_with_truncated_bingrouper + ): + # GH#43500 case where gb.grouper.result_index and gb.grouper.group_keys_seq + # have different lengths that goes through the `isinstance(values[0], dict)` + # path + gb = groupby_with_truncated_bingrouper + + res = gb["Quantity"].apply(lambda x: {"foo": len(x)}) + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + mi = MultiIndex.from_arrays([dti, ["foo"] * len(dti)]) + expected = Series([3, 0, 0, 0, 0, 0, 2], index=mi, name="Quantity") + tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_scalar_returns( + self, groupby_with_truncated_bingrouper + ): + # GH#43500 Previously raised ValueError bc used index with incorrect + # length in wrap_applied_result + gb = groupby_with_truncated_bingrouper + + res = gb["Quantity"].apply(lambda x: x.iloc[0] if len(x) else np.nan) + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + expected = Series( + [18, np.nan, np.nan, np.nan, np.nan, np.nan, 5], + index=dti._with_freq(None), + name="Quantity", + ) + + tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_apply_squeeze( + self, frame_for_truncated_bingrouper + ): + df = frame_for_truncated_bingrouper + + # We need to create a GroupBy object with only one non-NaT group, + # so use a huge freq so that all non-NaT dates will be grouped together + tdg = Grouper(key="Date", freq="100Y") + + with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"): + gb = df.groupby(tdg, squeeze=True) + + # check that we will go through the singular_series path + # in _wrap_applied_output_series + assert gb.ngroups == 1 + assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + + # function that returns a Series + res = gb.apply(lambda x: x["Quantity"] * 2) + + key = Timestamp("2013-12-31") + ordering = df["Date"].sort_values().dropna().index + mi = MultiIndex.from_product([[key], ordering], names=["Date", None]) + + ex_values = df["Quantity"].take(ordering).values * 2 + expected = Series(ex_values, index=mi, name="Quantity") + tm.assert_series_equal(res, expected) + + @td.skip_if_no("numba") + def test_groupby_agg_numba_timegrouper_with_nat( + self, groupby_with_truncated_bingrouper + ): + # See discussion in GH#43487 + gb = groupby_with_truncated_bingrouper + + result = gb["Quantity"].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + + expected = gb["Quantity"].aggregate(np.nanmean) + tm.assert_series_equal(result, expected) + + result_df = gb[["Quantity"]].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + expected_df = gb[["Quantity"]].aggregate(np.nanmean) + tm.assert_frame_equal(result_df, expected_df) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 00000000..577a72d3 --- /dev/null +++ b/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,193 @@ +""" +these are systematically testing all of the args to value_counts +with different size combinations. This is to ensure stability of the sorting +and proper parameter handling +""" + +from itertools import product + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Grouper, + MultiIndex, + Series, + date_range, + to_datetime, +) +import pandas._testing as tm + + +def tests_value_counts_index_names_category_column(): + # GH44324 Missing name of index category column + df = DataFrame( + { + "gender": ["female"], + "country": ["US"], + } + ) + df["gender"] = df["gender"].astype("category") + result = df.groupby("country")["gender"].value_counts() + + # Construct expected, very specific multiindex + df_mi_expected = DataFrame([["US", "female"]], columns=["country", "gender"]) + df_mi_expected["gender"] = df_mi_expected["gender"].astype("category") + mi_expected = MultiIndex.from_frame(df_mi_expected) + expected = Series([1], index=mi_expected, name="gender") + + tm.assert_series_equal(result, expected) + + +# our starting frame +def seed_df(seed_nans, n, m): + np.random.seed(1234) + days = date_range("2015-08-24", periods=10) + + frame = DataFrame( + { + "1st": np.random.choice(list("abcd"), n), + "2nd": np.random.choice(days, n), + "3rd": np.random.randint(1, m + 1, n), + } + ) + + if seed_nans: + frame.loc[1::11, "1st"] = np.nan + frame.loc[3::17, "2nd"] = np.nan + frame.loc[7::19, "3rd"] = np.nan + frame.loc[8::19, "3rd"] = np.nan + frame.loc[9::19, "3rd"] = np.nan + + return frame + + +# create input df, keys, and the bins +binned = [] +ids = [] +for seed_nans in [True, False]: + for n, m in product((100, 1000), (5, 20)): + + df = seed_df(seed_nans, n, m) + bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2) + keys = "1st", "2nd", ["1st", "2nd"] + for k, b in product(keys, bins): + binned.append((df, k, b, n, m)) + ids.append(f"{k}-{n}-{m}") + + +@pytest.mark.slow +@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids) +@pytest.mark.parametrize("isort", [True, False]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("dropna", [True, False]) +def test_series_groupby_value_counts( + df, keys, bins, n, m, isort, normalize, sort, ascending, dropna +): + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + kwargs = { + "normalize": normalize, + "sort": sort, + "ascending": ascending, + "dropna": dropna, + "bins": bins, + } + + gr = df.groupby(keys, sort=isort) + left = gr["3rd"].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr["3rd"].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ["3rd"] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) + + +def test_series_groupby_value_counts_with_grouper(): + # GH28479 + df = DataFrame( + { + "Timestamp": [ + 1565083561, + 1565083561 + 86400, + 1565083561 + 86500, + 1565083561 + 86400 * 2, + 1565083561 + 86400 * 3, + 1565083561 + 86500 * 3, + 1565083561 + 86400 * 4, + ], + "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"], + } + ).drop([3]) + + df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s") + dfg = df.groupby(Grouper(freq="1D", key="Datetime")) + + # have to sort on index because of unstable sort on values xref GH9212 + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() + expected.index.names = result.index.names + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_empty(columns): + # GH39172 + df = DataFrame(columns=columns) + dfg = df.groupby(columns[:-1]) + + result = dfg[columns[-1]].value_counts() + expected = Series([], name=columns[-1], dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_one_row(columns): + # GH42618 + df = DataFrame(data=[range(len(columns))], columns=columns) + dfg = df.groupby(columns[:-1]) + + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) + + tm.assert_series_equal(result, expected) + + +def test_series_groupby_value_counts_on_categorical(): + # GH38672 + + s = Series(Categorical(["a"], categories=["a", "b"])) + result = s.groupby([0]).value_counts() + + expected = Series( + data=[1, 0], + index=MultiIndex.from_arrays( + [ + [0, 0], + CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, dtype="category" + ), + ] + ), + ) + + # Expected: + # 0 a 1 + # b 0 + # dtype: int64 + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/transform/__init__.py b/pandas/tests/groupby/transform/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py new file mode 100644 index 00000000..2b70d732 --- /dev/null +++ b/pandas/tests/groupby/transform/test_numba.py @@ -0,0 +1,229 @@ +import pytest + +from pandas.errors import NumbaUtilError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Series, + option_context, +) +import pandas._testing as tm + + +@td.skip_if_no("numba") +def test_correct_function_signature(): + def incorrect_function(x): + return x + 1 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key").transform(incorrect_function, engine="numba") + + with pytest.raises(NumbaUtilError, match="The first 2"): + data.groupby("key")["data"].transform(incorrect_function, engine="numba") + + +@td.skip_if_no("numba") +def test_check_nopython_kwargs(): + def incorrect_function(values, index): + return values + 1 + + data = DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key").transform(incorrect_function, engine="numba", a=1) + + with pytest.raises(NumbaUtilError, match="numba does not support"): + data.groupby("key")["data"].transform(incorrect_function, engine="numba", a=1) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): + def func(values, index): + return values + 1 + + if jit: + # Test accepted jitted functions + import numba + + func = numba.jit(func) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.transform(func, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +@pytest.mark.parametrize("jit", [True, False]) +@pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) +def test_cache(jit, pandas_obj, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(values, index): + return values + 1 + + def func_2(values, index): + return values * 5 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + grouped = data.groupby(0) + if pandas_obj == "Series": + grouped = grouped[1] + + result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + tm.assert_equal(result, expected) + + result = grouped.transform(func_2, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x * 5, engine="cython") + tm.assert_equal(result, expected) + + # Retest func_1 which should use the cache + result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs) + expected = grouped.transform(lambda x: x + 1, engine="cython") + tm.assert_equal(result, expected) + + +@td.skip_if_no("numba") +def test_use_global_config(): + def func_1(values, index): + return values + 1 + + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + expected = grouped.transform(func_1, engine="numba") + with option_context("compute.use_numba", True): + result = grouped.transform(func_1, engine=None) + tm.assert_frame_equal(expected, result) + + +@td.skip_if_no("numba") +@pytest.mark.parametrize( + "agg_func", [["min", "max"], "min", {"B": ["min", "max"], "C": "sum"}] +) +def test_multifunc_notimplimented(agg_func): + data = DataFrame( + {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] + ) + grouped = data.groupby(0) + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped.transform(agg_func, engine="numba") + + with pytest.raises(NotImplementedError, match="Numba engine can"): + grouped[1].transform(agg_func, engine="numba") + + +@td.skip_if_no("numba") +def test_args_not_cached(): + # GH 41647 + def sum_last(values, index, n): + return values[-n:].sum() + + df = DataFrame({"id": [0, 0, 1, 1], "x": [1, 1, 1, 1]}) + grouped_x = df.groupby("id")["x"] + result = grouped_x.transform(sum_last, 1, engine="numba") + expected = Series([1.0] * 4, name="x") + tm.assert_series_equal(result, expected) + + result = grouped_x.transform(sum_last, 2, engine="numba") + expected = Series([2.0] * 4, name="x") + tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return index - 1 + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").transform(f, engine="numba") + expected = DataFrame([-4.0, -3.0, -2.0], columns=["v"], index=[-1, -2, -3]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_engine_kwargs_not_cached(): + # If the user passes a different set of engine_kwargs don't return the same + # jitted function + nogil = True + parallel = False + nopython = True + + def func_kwargs(values, index): + return nogil + parallel + nopython + + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + df = DataFrame({"value": [0, 0, 0]}) + result = df.groupby(level=0).transform( + func_kwargs, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame({"value": [2.0, 2.0, 2.0]}) + tm.assert_frame_equal(result, expected) + + nogil = False + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + result = df.groupby(level=0).transform( + func_kwargs, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame({"value": [1.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore") +def test_multiindex_one_key(nogil, parallel, nopython): + def numba_func(values, index): + return 1 + + df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"]) + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + result = df.groupby("A").transform( + numba_func, engine="numba", engine_kwargs=engine_kwargs + ) + expected = DataFrame([{"A": 1, "B": 2, "C": 1.0}]).set_index(["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba") +def test_multiindex_multi_key_not_supported(nogil, parallel, nopython): + def numba_func(values, index): + return 1 + + df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"]) + engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel} + with pytest.raises(NotImplementedError, match="More than 1 grouping labels"): + df.groupby(["A", "B"]).transform( + numba_func, engine="numba", engine_kwargs=engine_kwargs + ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py new file mode 100644 index 00000000..5b5b28be --- /dev/null +++ b/pandas/tests/groupby/transform/test_transform.py @@ -0,0 +1,1580 @@ +""" test with the .transform """ +from io import StringIO + +import numpy as np +import pytest + +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_timedelta64_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + Timestamp, + concat, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.base import maybe_normalize_deprecated_kernels +from pandas.core.groupby.generic import DataFrameGroupBy +from pandas.tests.groupby import get_groupby_method_args + + +def assert_fp_equal(a, b): + assert (np.abs(a - b) < 1e-12).all() + + +def test_transform(): + data = Series(np.arange(9) // 3, index=np.arange(9)) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + + # GH 8046 + # make sure that we preserve the input order + + df = DataFrame( + np.arange(6, dtype="int64").reshape(3, 2), columns=["a", "b"], index=[0, 2, 1] + ) + key = [0, 0, 1] + expected = ( + df.sort_index() + .groupby(key) + .transform(lambda x: x - x.mean()) + .groupby(key) + .mean() + ) + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + def demean(arr): + return arr - arr.mean() + + people = DataFrame( + np.random.randn(5, 5), + columns=["a", "b", "c", "d", "e"], + index=["Joe", "Steve", "Wes", "Jim", "Travis"], + ) + key = ["one", "two", "one", "two", "one"] + result = people.groupby(key).transform(demean).groupby(key).mean() + expected = people.groupby(key, group_keys=False).apply(demean).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + # GH 8430 + df = tm.makeTimeDataFrame() + g = df.groupby(pd.Grouper(freq="M")) + g.transform(lambda x: x - 1) + + # GH 9700 + df = DataFrame({"a": range(5, 10), "b": range(5)}) + result = df.groupby("a").transform(max) + expected = DataFrame({"b": range(5)}) + tm.assert_frame_equal(result, expected) + + +def test_transform_fast(): + + df = DataFrame({"id": np.arange(100000) / 3, "val": np.random.randn(100000)}) + + grp = df.groupby("id")["val"] + + values = np.repeat(grp.mean().values, ensure_platform_int(grp.count().values)) + expected = Series(values, index=df.index, name="val") + + result = grp.transform(np.mean) + tm.assert_series_equal(result, expected) + + result = grp.transform("mean") + tm.assert_series_equal(result, expected) + + # GH 12737 + df = DataFrame( + { + "grouping": [0, 1, 1, 3], + "f": [1.1, 2.1, 3.1, 4.5], + "d": date_range("2014-1-1", "2014-1-4"), + "i": [1, 2, 3, 4], + }, + columns=["grouping", "f", "i", "d"], + ) + result = df.groupby("grouping").transform("first") + + dates = [ + Timestamp("2014-1-1"), + Timestamp("2014-1-2"), + Timestamp("2014-1-2"), + Timestamp("2014-1-4"), + ] + expected = DataFrame( + {"f": [1.1, 2.1, 2.1, 4.5], "d": dates, "i": [1, 2, 2, 4]}, + columns=["f", "i", "d"], + ) + tm.assert_frame_equal(result, expected) + + # selection + result = df.groupby("grouping")[["f", "i"]].transform("first") + expected = expected[["f", "i"]] + tm.assert_frame_equal(result, expected) + + # dup columns + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) + result = df.groupby("g").transform("first") + expected = df.drop("g", axis=1) + tm.assert_frame_equal(result, expected) + + +def test_transform_broadcast(tsframe, ts): + grouped = ts.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + + tm.assert_index_equal(result.index, ts.index) + for _, gp in grouped: + assert_fp_equal(result.reindex(gp.index), gp.mean()) + + grouped = tsframe.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + for _, gp in grouped: + agged = gp.mean() + res = result.reindex(gp.index) + for col in tsframe: + assert_fp_equal(res[col], agged[col]) + + # group columns + grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + tm.assert_index_equal(result.columns, tsframe.columns) + for _, gp in grouped: + agged = gp.mean(1) + res = result.reindex(columns=gp.columns) + for idx in gp.index: + assert_fp_equal(res.xs(idx), agged[idx]) + + +def test_transform_axis_1(request, transformation_func): + # GH 36308 + + # TODO(2.0) Remove after pad/backfill deprecation enforced + transformation_func = maybe_normalize_deprecated_kernels(transformation_func) + + if transformation_func == "ngroup": + msg = "ngroup fails with axis=1: #45986" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + + warn = FutureWarning if transformation_func == "tshift" else None + + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + args = get_groupby_method_args(transformation_func, df) + with tm.assert_produces_warning(warn): + result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + + if transformation_func in ["diff", "shift"]: + # Result contains nans, so transpose coerces to float + expected["b"] = expected["b"].astype("int64") + + # cumcount returns Series; the rest are DataFrame + tm.assert_equal(result, expected) + + +def test_transform_axis_1_reducer(request, reduction_func): + # GH#45715 + if reduction_func in ( + "corrwith", + "idxmax", + "idxmin", + "ngroup", + "nth", + ): + marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") + request.node.add_marker(marker) + if reduction_func == "mad": + warn = FutureWarning + msg = "The 'mad' method is deprecated" + elif reduction_func in ("sem", "std"): + warn = FutureWarning + msg = "The default value of numeric_only" + else: + warn = None + msg = "" + + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby([0, 0, 1], axis=1).transform(reduction_func) + warn = FutureWarning if reduction_func == "mad" else None + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T + tm.assert_equal(result, expected) + + +def test_transform_axis_ts(tsframe): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + # GH12713 + + base = tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame( + np.random.randn(r, c), index=base.index, columns=base.columns, dtype="float64" + ) + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + +def test_transform_dtype(): + # GH 9807 + # Check transform dtype output is preserved + df = DataFrame([[1, 3], [2, 3]]) + result = df.groupby(1).transform("mean") + expected = DataFrame([[1.5], [1.5]]) + tm.assert_frame_equal(result, expected) + + +def test_transform_bug(): + # GH 5712 + # transforming on a datetime column + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + result = df.groupby("A")["B"].transform(lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name="B", dtype="float64") + tm.assert_series_equal(result, expected) + + +def test_transform_numeric_to_boolean(): + # GH 16875 + # inconsistency in transforming boolean values + expected = Series([True, True], name="A") + + df = DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_timedelta(): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + expected = Series([Timestamp("20130101") - Timestamp("20130101")] * 5, name="A") + + # this does date math without changing result type in transform + base_time = df["A"][0] + result = ( + df.groupby("A")["A"].transform(lambda x: x.max() - x.min() + base_time) + - base_time + ) + tm.assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby("A")["A"].transform(lambda x: x.max() - x.min()) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_numeric(): + # GH 10972 + # convert dt to float + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean() + ) + + expected = Series([-0.5, 0.5], name="b") + tm.assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min() + ) + + expected = Series([0, 1], name="b") + tm.assert_series_equal(result, expected) + + +def test_transform_casting(): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv( + StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"] + ) + + result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[["ID3", "DATETIME"]].groupby("ID3").transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + + +def test_transform_multiple(ts): + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) + + +def test_dispatch_transform(tsframe): + df = tsframe[::5].reindex(tsframe.index) + + grouped = df.groupby(lambda x: x.month) + + filled = grouped.fillna(method="pad") + fillit = lambda x: x.fillna(method="pad") + expected = df.groupby(lambda x: x.month).transform(fillit) + tm.assert_frame_equal(filled, expected) + + +def test_transform_transformation_func(request, transformation_func): + # GH 30918 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], + "B": [1, 2, np.nan, 3, 3, np.nan, 4], + }, + index=date_range("2020-01-01", "2020-01-07"), + ) + # TODO(2.0) Remove after pad/backfill deprecation enforced + transformation_func = maybe_normalize_deprecated_kernels(transformation_func) + if transformation_func == "cumcount": + test_op = lambda x: x.transform("cumcount") + mock_op = lambda x: Series(range(len(x)), x.index) + elif transformation_func == "fillna": + test_op = lambda x: x.transform("fillna", value=0) + mock_op = lambda x: x.fillna(value=0) + elif transformation_func == "ngroup": + test_op = lambda x: x.transform("ngroup") + counter = -1 + + def mock_op(x): + nonlocal counter + counter += 1 + return Series(counter, index=x.index) + + elif transformation_func == "tshift": + msg = ( + "Current behavior of groupby.tshift is inconsistent with other " + "transformations. See GH34452 for more details" + ) + request.node.add_marker(pytest.mark.xfail(reason=msg)) + else: + test_op = lambda x: x.transform(transformation_func) + mock_op = lambda x: getattr(x, transformation_func)() + + result = test_op(df.groupby("A")) + # pass the group in same order as iterating `for ... in df.groupby(...)` + # but reorder to match df's index since this is a transform + groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] + expected = concat([mock_op(g) for g in groups]).sort_index() + # sort_index does not preserve the freq + expected = expected.set_axis(df.index) + + if transformation_func in ("cumcount", "ngroup"): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + +def test_transform_select_columns(df): + f = lambda x: x.mean() + result = df.groupby("A")[["C", "D"]].transform(f) + + selection = df[["C", "D"]] + expected = selection.groupby(df["A"]).transform(f) + + tm.assert_frame_equal(result, expected) + + +def test_transform_exclude_nuisance(df): + # case that goes through _transform_item_by_item + + df.columns = ["A", "B", "B", "D"] + + # this also tests orderings in transform between + # series/frame to make sure it's consistent + expected = {} + grouped = df.groupby("A") + + gbc = grouped["B"] + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + expected["B"] = gbc.transform(lambda x: np.mean(x)) + # squeeze 1-column DataFrame down to Series + expected["B"] = expected["B"]["B"] + + assert isinstance(gbc.obj, DataFrame) + assert isinstance(gbc, DataFrameGroupBy) + + expected["D"] = grouped["D"].transform(np.mean) + expected = DataFrame(expected) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(lambda x: np.mean(x)) + + tm.assert_frame_equal(result, expected) + + +def test_transform_function_aliases(df): + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A")["C"].transform("mean") + expected = df.groupby("A")["C"].transform(np.mean) + tm.assert_series_equal(result, expected) + + +def test_series_fast_transform_date(): + # GH 13191 + df = DataFrame( + {"grouping": [np.nan, 1, 1, 3], "d": date_range("2014-1-1", "2014-1-4")} + ) + result = df.groupby("grouping")["d"].transform("first") + dates = [ + pd.NaT, + Timestamp("2014-1-2"), + Timestamp("2014-1-2"), + Timestamp("2014-1-4"), + ] + expected = Series(dates, name="d") + tm.assert_series_equal(result, expected) + + +def test_transform_length(): + # GH 9697 + df = DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) + expected = Series([3.0] * 4) + + def nsum(x): + return np.nansum(x) + + results = [ + df.groupby("col1").transform(sum)["col2"], + df.groupby("col1")["col2"].transform(sum), + df.groupby("col1").transform(nsum)["col2"], + df.groupby("col1")["col2"].transform(nsum), + ] + for result in results: + tm.assert_series_equal(result, expected, check_names=False) + + +def test_transform_coercion(): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = DataFrame({"A": ["a", "a", "b", "b"], "B": [0, 1, 3, 4]}) + g = df.groupby("A") + + expected = g.transform(np.mean) + + msg = "will return a scalar mean" + with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): + result = g.transform(lambda x: np.mean(x)) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + result2 = g.transform(lambda x: np.mean(x, axis=0)) + tm.assert_frame_equal(result2, expected) + + +def test_groupby_transform_with_int(): + + # GH 3740, make sure that we might upcast on item-by-item transform + + # floats + df = DataFrame( + { + "A": [1, 1, 1, 2, 2, 2], + "B": Series(1, dtype="float64"), + "C": Series([1, 2, 3, 1, 2, 3], dtype="float64"), + "D": "foo", + } + ) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame( + {"B": np.nan, "C": Series([-1, 0, 1, -1, 0, 1], dtype="float64")} + ) + tm.assert_frame_equal(result, expected) + + # int case + df = DataFrame( + { + "A": [1, 1, 1, 2, 2, 2], + "B": 1, + "C": [1, 2, 3, 1, 2, 3], + "D": "foo", + } + ) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + # int that needs float conversion + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"}) + with np.errstate(all="ignore"): + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + + s1 = s.iloc[0:3] + s1 = (s1 - s1.mean()) / s1.std() + s2 = s.iloc[3:6] + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame({"B": np.nan, "C": concat([s1, s2])}) + tm.assert_frame_equal(result, expected) + + # int doesn't get downcasted + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(lambda x: x * 2 / 2) + expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]}) + tm.assert_frame_equal(result, expected) + + +def test_groupby_transform_with_nan_group(): + # GH 9941 + df = DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + result = df.groupby(df.b)["a"].transform(max) + expected = Series([1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a") + tm.assert_series_equal(result, expected) + + +def test_transform_mixed_type(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + group["g"] = group["d"] * 2 + return group[:1] + + grouped = df.groupby("c") + result = grouped.apply(f) + + assert result["d"].dtype == np.float64 + + # this is by definition a mutating operation! + with pd.option_context("mode.chained_assignment", None): + for key, group in grouped: + res = f(group) + tm.assert_frame_equal(res, result.loc[key]) + + +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_series(op, args, targop): + # GH 4095 + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + + # series + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal(expected, data.groupby(labels).transform(op, *args)) + tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) + + +@pytest.mark.parametrize("op", ["cumprod", "cumsum"]) +@pytest.mark.parametrize("skipna", [False, True]) +@pytest.mark.parametrize( + "input, exp", + [ + # When everything is NaN + ({"key": ["b"] * 10, "value": np.nan}, Series([np.nan] * 10, name="value")), + # When there is a single NaN + ( + {"key": ["b"] * 10 + ["a"] * 2, "value": [3] * 3 + [np.nan] + [3] * 8}, + { + ("cumprod", False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0], + ("cumprod", True): [ + 3.0, + 9.0, + 27.0, + np.nan, + 81.0, + 243.0, + 729.0, + 2187.0, + 6561.0, + 19683.0, + 3.0, + 9.0, + ], + ("cumsum", False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0], + ("cumsum", True): [ + 3.0, + 6.0, + 9.0, + np.nan, + 12.0, + 15.0, + 18.0, + 21.0, + 24.0, + 27.0, + 3.0, + 6.0, + ], + }, + ), + ], +) +def test_groupby_cum_skipna(op, skipna, input, exp): + df = DataFrame(input) + result = df.groupby("key")["value"].transform(op, skipna=skipna) + if isinstance(exp, dict): + expected = exp[(op, skipna)] + else: + expected = exp + expected = Series(expected, name="value") + tm.assert_series_equal(expected, result) + + +@pytest.mark.slow +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_frame(op, args, targop): + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + strings = list("qwertyuiopasdfghjklz") + strings_missing = strings[:] + strings_missing[5] = np.nan + df = DataFrame( + { + "float": s, + "float_missing": s_missing, + "int": [1, 1, 1, 1, 2] * 200, + "datetime": date_range("1990-1-1", periods=1000), + "timedelta": pd.timedelta_range(1, freq="s", periods=1000), + "string": strings * 50, + "string_missing": strings_missing * 50, + }, + columns=[ + "float", + "float_missing", + "int", + "datetime", + "timedelta", + "string", + "string_missing", + ], + ) + df["cat"] = df["string"].astype("category") + + df2 = df.copy() + df2.index = MultiIndex.from_product([range(100), range(10)]) + + # DataFrame - Single and MultiIndex, + # group by values, index level, columns + for df in [df, df2]: + for gb_target in [ + {"by": labels}, + {"level": 0}, + {"by": "string"}, + ]: # {"by": 'string_missing'}]: + # {"by": ['int','string']}]: + + gb = df.groupby(group_keys=False, **gb_target) + # allowlisted methods set the selection before applying + # bit a of hack to make sure the cythonized shift + # is equivalent to pre 0.17.1 behavior + if op == "shift": + gb._set_group_selection() + + if op != "shift" and "int" not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply separately and concat + i = gb[["int"]].apply(targop) + f = gb[["float", "float_missing"]].apply(targop) + expected = concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + + warn = None if op == "shift" else FutureWarning + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(op, *args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(warn, match=msg): + result = getattr(gb, op)(*args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) + # individual columns + for c in df: + if ( + c not in ["float", "int", "float_missing"] + and op != "shift" + and not (c == "timedelta" and op == "cumsum") + ): + msg = "|".join( + [ + "does not support .* operations", + ".* is not supported for object dtype", + "is not implemented for this dtype", + ] + ) + with pytest.raises(TypeError, match=msg): + gb[c].transform(op) + with pytest.raises(TypeError, match=msg): + getattr(gb[c], op)() + else: + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, gb[c].transform(op, *args)) + tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) + + +def test_transform_with_non_scalar_group(): + # GH 10165 + cols = MultiIndex.from_tuples( + [ + ("syn", "A"), + ("mis", "A"), + ("non", "A"), + ("syn", "C"), + ("mis", "C"), + ("non", "C"), + ("syn", "T"), + ("mis", "T"), + ("non", "T"), + ("syn", "G"), + ("mis", "G"), + ("non", "G"), + ] + ) + df = DataFrame( + np.random.randint(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"] + ) + + msg = "transform must return a scalar value for each group.*" + with pytest.raises(ValueError, match=msg): + df.groupby(axis=1, level=1).transform(lambda z: z.div(z.sum(axis=1), axis=0)) + + +@pytest.mark.parametrize( + "cols,expected", + [ + ("a", Series([1, 1, 1], name="a")), + ( + ["a", "c"], + DataFrame({"a": [1, 1, 1], "c": [1, 1, 1]}), + ), + ], +) +@pytest.mark.parametrize("agg_func", ["count", "rank", "size"]) +def test_transform_numeric_ret(cols, expected, agg_func): + # GH#19200 and GH#27469 + df = DataFrame( + {"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} + ) + result = df.groupby("b")[cols].transform(agg_func) + + if agg_func == "rank": + expected = expected.astype("float") + elif agg_func == "size" and cols == ["a", "c"]: + # transform("size") returns a Series + expected = expected["a"].rename(None) + tm.assert_equal(result, expected) + + +def test_transform_ffill(): + # GH 24211 + data = [["a", 0.0], ["a", float("nan")], ["b", 1.0], ["b", float("nan")]] + df = DataFrame(data, columns=["key", "values"]) + result = df.groupby("key").transform("ffill") + expected = DataFrame({"values": [0.0, 0.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + result = df.groupby("key")["values"].transform("ffill") + expected = Series([0.0, 0.0, 1.0, 1.0], name="values") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("mix_groupings", [True, False]) +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize("val1,val2", [("foo", "bar"), (1, 2), (1.0, 2.0)]) +@pytest.mark.parametrize( + "fill_method,limit,exp_vals", + [ + ( + "ffill", + None, + [np.nan, np.nan, "val1", "val1", "val1", "val2", "val2", "val2"], + ), + ("ffill", 1, [np.nan, np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan]), + ( + "bfill", + None, + ["val1", "val1", "val1", "val2", "val2", "val2", np.nan, np.nan], + ), + ("bfill", 1, [np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan, np.nan]), + ], +) +def test_group_fill_methods( + mix_groupings, as_series, val1, val2, fill_method, limit, exp_vals +): + vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan] + _exp_vals = list(exp_vals) + # Overwrite placeholder values + for index, exp_val in enumerate(_exp_vals): + if exp_val == "val1": + _exp_vals[index] = val1 + elif exp_val == "val2": + _exp_vals[index] = val2 + + # Need to modify values and expectations depending on the + # Series / DataFrame that we ultimately want to generate + if mix_groupings: # ['a', 'b', 'a, 'b', ...] + keys = ["a", "b"] * len(vals) + + def interweave(list_obj): + temp = [] + for x in list_obj: + temp.extend([x, x]) + + return temp + + _exp_vals = interweave(_exp_vals) + vals = interweave(vals) + else: # ['a', 'a', 'a', ... 'b', 'b', 'b'] + keys = ["a"] * len(vals) + ["b"] * len(vals) + _exp_vals = _exp_vals * 2 + vals = vals * 2 + + df = DataFrame({"key": keys, "val": vals}) + if as_series: + result = getattr(df.groupby("key")["val"], fill_method)(limit=limit) + exp = Series(_exp_vals, name="val") + tm.assert_series_equal(result, exp) + else: + result = getattr(df.groupby("key"), fill_method)(limit=limit) + exp = DataFrame({"val": _exp_vals}) + tm.assert_frame_equal(result, exp) + + +@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +def test_pad_stable_sorting(fill_method): + # GH 21207 + x = [0] * 20 + y = [np.nan] * 10 + [1] * 10 + + if fill_method == "bfill": + y = y[::-1] + + df = DataFrame({"x": x, "y": y}) + expected = df.drop("x", axis=1) + + result = getattr(df.groupby("x"), fill_method)() + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize( + "freq", + [ + None, + pytest.param( + "D", + marks=pytest.mark.xfail( + reason="GH#23918 before method uses freq in vectorized approach" + ), + ), + ], +) +@pytest.mark.parametrize("periods", [1, -1]) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill", None]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_pct_change(test_series, freq, periods, fill_method, limit): + # GH 21200, 21621, 30463 + vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4] + keys = ["a", "b"] + key_v = np.repeat(keys, len(vals)) + df = DataFrame({"key": key_v, "vals": vals * 2}) + + df_g = df + if fill_method is not None: + df_g = getattr(df.groupby("key"), fill_method)(limit=limit) + grp = df_g.groupby(df.key) + + expected = grp["vals"].obj / grp["vals"].shift(periods) - 1 + + if test_series: + result = df.groupby("key")["vals"].pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_series_equal(result, expected) + else: + result = df.groupby("key").pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_frame_equal(result, expected.to_frame("vals")) + + +@pytest.mark.parametrize( + "func, expected_status", + [ + ("ffill", ["shrt", "shrt", "lng", np.nan, "shrt", "ntrl", "ntrl"]), + ("bfill", ["shrt", "lng", "lng", "shrt", "shrt", "ntrl", np.nan]), + ], +) +def test_ffill_bfill_non_unique_multilevel(func, expected_status): + # GH 19437 + date = pd.to_datetime( + [ + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-02", + "2018-01-01", + "2018-01-02", + ] + ) + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", np.nan, "lng", np.nan, "shrt", "ntrl", np.nan] + + df = DataFrame({"date": date, "symbol": symbol, "status": status}) + df = df.set_index(["date", "symbol"]) + result = getattr(df.groupby("symbol")["status"], func)() + + index = MultiIndex.from_tuples( + tuples=list(zip(*[date, symbol])), names=["date", "symbol"] + ) + expected = Series(expected_status, index=index, name="status") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.any, np.all]) +def test_any_all_np_func(func): + # GH 20653 + df = DataFrame( + [["foo", True], [np.nan, True], ["foo", True]], columns=["key", "val"] + ) + + exp = Series([True, np.nan, True], name="val") + + res = df.groupby("key")["val"].transform(func) + tm.assert_series_equal(res, exp) + + +def test_groupby_transform_rename(): + # https://github.com/pandas-dev/pandas/issues/23461 + def demean_rename(x): + result = x - x.mean() + + if isinstance(x, Series): + return result + + result = result.rename(columns={c: f"{c}_demeaned" for c in result.columns}) + + return result + + df = DataFrame({"group": list("ababa"), "value": [1, 1, 1, 2, 2]}) + expected = DataFrame({"value": [-1.0 / 3, -0.5, -1.0 / 3, 0.5, 2.0 / 3]}) + + result = df.groupby("group").transform(demean_rename) + tm.assert_frame_equal(result, expected) + result_single = df.groupby("group").value.transform(demean_rename) + tm.assert_series_equal(result_single, expected["value"]) + + +@pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"]) +def test_groupby_transform_timezone_column(func): + # GH 24198 + ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore") + result = DataFrame({"end_time": [ts], "id": [1]}) + result["max_end_time"] = result.groupby("id").end_time.transform(func) + expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", ["1/1/2011"] * 2 + ["1/3/2011"] * 7 + ["1/10/2011"]), + ("idxmax", ["1/2/2011"] * 2 + ["1/9/2011"] * 7 + ["1/10/2011"]), + ], +) +def test_groupby_transform_with_datetimes(func, values): + # GH 15306 + dates = date_range("1/1/2011", periods=10, freq="D") + + stocks = DataFrame({"price": np.arange(10.0)}, index=dates) + stocks["week_id"] = dates.isocalendar().week + + result = stocks.groupby(stocks["week_id"])["price"].transform(func) + + expected = Series(data=pd.to_datetime(values), index=dates, name="price") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["cumsum", "cumprod", "cummin", "cummax"]) +def test_transform_absent_categories(func): + # GH 16771 + # cython transforms with more groups than rows + x_vals = [1] + x_cats = range(2) + y = [1] + df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y}) + result = getattr(df.y.groupby(df.x), func)() + expected = df.y + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill", "shift"]) +@pytest.mark.parametrize("key, val", [("level", 0), ("by", Series([0]))]) +def test_ffill_not_in_axis(func, key, val): + # GH 21521 + df = DataFrame([[np.nan]]) + result = getattr(df.groupby(**{key: val}), func)() + expected = df + + tm.assert_frame_equal(result, expected) + + +def test_transform_invalid_name_raises(): + # GH#27486 + df = DataFrame({"a": [0, 1, 1, 2]}) + g = df.groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + # method exists on the object, but is not a valid transformation/agg + assert hasattr(g, "aggregate") # make sure the method exists + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("aggregate") + + # Test SeriesGroupBy + g = df["a"].groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + +@pytest.mark.parametrize( + "obj", + [ + DataFrame( + {"a": [0, 0, 0, 1, 1, 1], "b": range(6)}, + index=["A", "B", "C", "D", "E", "F"], + ), + Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]), + ], +) +def test_transform_agg_by_name(request, reduction_func, obj): + func = reduction_func + warn = FutureWarning if func == "mad" else None + + g = obj.groupby(np.repeat([0, 1], 3)) + + if func == "corrwith" and isinstance(obj, Series): # GH#32293 + request.node.add_marker( + pytest.mark.xfail(reason="TODO: implement SeriesGroupBy.corrwith") + ) + + args = get_groupby_method_args(reduction_func, obj) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g.transform(func, *args) + + # this is the *definition* of a transformation + tm.assert_index_equal(result.index, obj.index) + + if func not in ("ngroup", "size") and obj.ndim == 2: + # size/ngroup return a Series, unlike other transforms + tm.assert_index_equal(result.columns, obj.columns) + + # verify that values were broadcasted across each group + assert len(set(DataFrame(result).iloc[-3:, -1])) == 1 + + +def test_transform_lambda_with_datetimetz(): + # GH 27496 + df = DataFrame( + { + "time": [ + Timestamp("2010-07-15 03:14:45"), + Timestamp("2010-11-19 18:47:06"), + ], + "timezone": ["Etc/GMT+4", "US/Eastern"], + } + ) + result = df.groupby(["timezone"])["time"].transform( + lambda x: x.dt.tz_localize(x.name) + ) + expected = Series( + [ + Timestamp("2010-07-15 03:14:45", tz="Etc/GMT+4"), + Timestamp("2010-11-19 18:47:06", tz="US/Eastern"), + ], + name="time", + ) + tm.assert_series_equal(result, expected) + + +def test_transform_fastpath_raises(): + # GH#29631 case where fastpath defined in groupby.generic _choose_path + # raises, but slow_path does not + + df = DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) + gb = df.groupby("A") + + def func(grp): + # we want a function such that func(frame) fails but func.apply(frame) + # works + if grp.ndim == 2: + # Ensure that fast_path fails + raise NotImplementedError("Don't cross the streams") + return grp * 2 + + # Check that the fastpath raises, see _transform_general + obj = gb._obj_with_exclusions + gen = gb.grouper.get_iterator(obj, axis=gb.axis) + fast_path, slow_path = gb._define_paths(func) + _, group = next(gen) + + with pytest.raises(NotImplementedError, match="Don't cross the streams"): + fast_path(group) + + result = gb.transform(func) + + expected = DataFrame([2, -2, 2, 4], columns=["B"]) + tm.assert_frame_equal(result, expected) + + +def test_transform_lambda_indexing(): + # GH 7883 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"], + "B": ["one", "one", "two", "three", "two", "six", "five", "three"], + "C": range(8), + "D": range(8), + "E": range(8), + } + ) + df = df.set_index(["A", "B"]) + df = df.sort_index() + result = df.groupby(level="A").transform(lambda x: x.iloc[-1]) + expected = DataFrame( + { + "C": [3, 3, 7, 7, 4, 4, 4, 4], + "D": [3, 3, 7, 7, 4, 4, 4, 4], + "E": [3, 3, 7, 7, 4, 4, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "three"), + ("flux", "six"), + ("flux", "three"), + ("foo", "five"), + ("foo", "one"), + ("foo", "two"), + ("foo", "two"), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_categorical_and_not_categorical_key(observed): + # Checks that groupby-transform, when grouping by both a categorical + # and a non-categorical key, doesn't try to expand the output to include + # non-observed categories but instead matches the input shape. + # GH 32494 + df_with_categorical = DataFrame( + { + "A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]), + "B": [1, 2, 3], + "C": ["a", "b", "a"], + } + ) + df_without_categorical = DataFrame( + {"A": ["a", "b", "a"], "B": [1, 2, 3], "C": ["a", "b", "a"]} + ) + + # DataFrame case + result = df_with_categorical.groupby(["A", "C"], observed=observed).transform("sum") + expected = df_without_categorical.groupby(["A", "C"]).transform("sum") + tm.assert_frame_equal(result, expected) + expected_explicit = DataFrame({"B": [4, 2, 4]}) + tm.assert_frame_equal(result, expected_explicit) + + # Series case + result = df_with_categorical.groupby(["A", "C"], observed=observed)["B"].transform( + "sum" + ) + expected = df_without_categorical.groupby(["A", "C"])["B"].transform("sum") + tm.assert_series_equal(result, expected) + expected_explicit = Series([4, 2, 4], name="B") + tm.assert_series_equal(result, expected_explicit) + + +def test_string_rank_grouping(): + # GH 19354 + df = DataFrame({"A": [1, 1, 2], "B": [1, 2, 3]}) + result = df.groupby("A").transform("rank") + expected = DataFrame({"B": [1.0, 2.0, 1.0]}) + tm.assert_frame_equal(result, expected) + + +def test_transform_cumcount(): + # GH 27472 + df = DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": range(6)}) + grp = df.groupby(np.repeat([0, 1], 3)) + + result = grp.cumcount() + expected = Series([0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + result = grp.transform("cumcount") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("keys", [["A1"], ["A1", "A2"]]) +def test_null_group_lambda_self(sort, dropna, keys): + # GH 17093 + size = 50 + nulls1 = np.random.choice([False, True], size) + nulls2 = np.random.choice([False, True], size) + # Whether a group contains a null value or not + nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2 + + a1 = np.random.randint(0, 5, size=size).astype(float) + a1[nulls1] = np.nan + a2 = np.random.randint(0, 5, size=size).astype(float) + a2[nulls2] = np.nan + values = np.random.randint(0, 5, size=a1.shape) + df = DataFrame({"A1": a1, "A2": a2, "B": values}) + + expected_values = values + if dropna and nulls_grouper.any(): + expected_values = expected_values.astype(float) + expected_values[nulls_grouper] = np.nan + expected = DataFrame(expected_values, columns=["B"]) + + gb = df.groupby(keys, dropna=dropna, sort=sort) + result = gb[["B"]].transform(lambda x: x) + tm.assert_frame_equal(result, expected) + + +def test_null_group_str_reducer(request, dropna, reduction_func): + # GH 17093 + if reduction_func == "corrwith": + msg = "incorrectly raises" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + warn = FutureWarning if reduction_func == "mad" else None + + index = [1, 2, 3, 4] # test transform preserves non-standard index + df = DataFrame({"A": [1, 1, np.nan, np.nan], "B": [1, 2, 2, 3]}, index=index) + gb = df.groupby("A", dropna=dropna) + + args = get_groupby_method_args(reduction_func, df) + + # Manually handle reducers that don't fit the generic pattern + # Set expected with dropna=False, then replace if necessary + if reduction_func == "first": + expected = DataFrame({"B": [1, 1, 2, 2]}, index=index) + elif reduction_func == "last": + expected = DataFrame({"B": [2, 2, 3, 3]}, index=index) + elif reduction_func == "nth": + expected = DataFrame({"B": [1, 1, 2, 2]}, index=index) + elif reduction_func == "size": + expected = Series([2, 2, 2, 2], index=index) + elif reduction_func == "corrwith": + expected = DataFrame({"B": [1.0, 1.0, 1.0, 1.0]}, index=index) + else: + expected_gb = df.groupby("A", dropna=False) + buffer = [] + for idx, group in expected_gb: + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated" + ): + res = getattr(group["B"], reduction_func)() + buffer.append(Series(res, index=group.index)) + expected = concat(buffer).to_frame("B") + if dropna: + dtype = object if reduction_func in ("any", "all") else float + expected = expected.astype(dtype) + if expected.ndim == 2: + expected.iloc[[2, 3], 0] = np.nan + else: + expected.iloc[[2, 3]] = np.nan + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = gb.transform(reduction_func, *args) + tm.assert_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_null_group_str_transformer(request, dropna, transformation_func): + # GH 17093 + if transformation_func == "tshift": + msg = "tshift requires timeseries" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + df = DataFrame({"A": [1, 1, np.nan], "B": [1, 2, 2]}, index=[1, 2, 3]) + args = get_groupby_method_args(transformation_func, df) + gb = df.groupby("A", dropna=dropna) + + buffer = [] + for k, (idx, group) in enumerate(gb): + if transformation_func == "cumcount": + # DataFrame has no cumcount method + res = DataFrame({"B": range(len(group))}, index=group.index) + elif transformation_func == "ngroup": + res = DataFrame(len(group) * [k], index=group.index, columns=["B"]) + else: + res = getattr(group[["B"]], transformation_func)(*args) + buffer.append(res) + if dropna: + dtype = object if transformation_func in ("any", "all") else None + buffer.append(DataFrame([[np.nan]], index=[3], dtype=dtype, columns=["B"])) + expected = concat(buffer) + + if transformation_func in ("cumcount", "ngroup"): + # ngroup/cumcount always returns a Series as it counts the groups, not values + expected = expected["B"].rename(None) + + warn = FutureWarning if transformation_func in ("backfill", "pad") else None + msg = f"{transformation_func} is deprecated" + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(transformation_func, *args) + + tm.assert_equal(result, expected) + + +def test_null_group_str_reducer_series(request, dropna, reduction_func): + # GH 17093 + if reduction_func == "corrwith": + msg = "corrwith not implemented for SeriesGroupBy" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + warn = FutureWarning if reduction_func == "mad" else None + + # GH 17093 + index = [1, 2, 3, 4] # test transform preserves non-standard index + ser = Series([1, 2, 2, 3], index=index) + gb = ser.groupby([1, 1, np.nan, np.nan], dropna=dropna) + + args = get_groupby_method_args(reduction_func, ser) + + # Manually handle reducers that don't fit the generic pattern + # Set expected with dropna=False, then replace if necessary + if reduction_func == "first": + expected = Series([1, 1, 2, 2], index=index) + elif reduction_func == "last": + expected = Series([2, 2, 3, 3], index=index) + elif reduction_func == "nth": + expected = Series([1, 1, 2, 2], index=index) + elif reduction_func == "size": + expected = Series([2, 2, 2, 2], index=index) + elif reduction_func == "corrwith": + expected = Series([1, 1, 2, 2], index=index) + else: + expected_gb = ser.groupby([1, 1, np.nan, np.nan], dropna=False) + buffer = [] + for idx, group in expected_gb: + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated" + ): + res = getattr(group, reduction_func)() + buffer.append(Series(res, index=group.index)) + expected = concat(buffer) + if dropna: + dtype = object if reduction_func in ("any", "all") else float + expected = expected.astype(dtype) + expected.iloc[[2, 3]] = np.nan + + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = gb.transform(reduction_func, *args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") +def test_null_group_str_transformer_series(request, dropna, transformation_func): + # GH 17093 + if transformation_func == "tshift": + msg = "tshift requires timeseries" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + ser = Series([1, 2, 2], index=[1, 2, 3]) + args = get_groupby_method_args(transformation_func, ser) + gb = ser.groupby([1, 1, np.nan], dropna=dropna) + + buffer = [] + for k, (idx, group) in enumerate(gb): + if transformation_func == "cumcount": + # Series has no cumcount method + res = Series(range(len(group)), index=group.index) + elif transformation_func == "ngroup": + res = Series(k, index=group.index) + else: + res = getattr(group, transformation_func)(*args) + buffer.append(res) + if dropna: + dtype = object if transformation_func in ("any", "all") else None + buffer.append(Series([np.nan], index=[3], dtype=dtype)) + expected = concat(buffer) + + warn = FutureWarning if transformation_func in ("backfill", "pad") else None + msg = f"{transformation_func} is deprecated" + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(transformation_func, *args) + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "func, series, expected_values", + [ + (Series.sort_values, False, [4, 5, 3, 1, 2]), + (lambda x: x.head(1), False, ValueError), + # SeriesGroupBy already has correct behavior + (Series.sort_values, True, [5, 4, 3, 2, 1]), + (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), + ], +) +@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) +@pytest.mark.parametrize("keys_in_index", [True, False]) +def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): + # GH#45648 - transform should align with the input's index + df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}) + if "a2" in keys: + df["a2"] = df["a1"] + if keys_in_index: + df = df.set_index(keys, append=True) + + gb = df.groupby(keys) + if series: + gb = gb["b"] + + warn = None if series else FutureWarning + msg = "returning a DataFrame in groupby.transform will align" + if expected_values is ValueError: + with tm.assert_produces_warning(warn, match=msg): + with pytest.raises(ValueError, match="Length mismatch"): + gb.transform(func) + else: + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(func) + expected = DataFrame({"b": expected_values}, index=df.index) + if series: + expected = expected["b"] + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("keys", ["A", ["A", "B"]]) +def test_as_index_no_change(keys, df, groupby_func): + # GH#49834 - as_index should have no impact on DataFrameGroupBy.transform + if keys == "A": + # Column B is string dtype; will fail on some ops + df = df.drop(columns="B") + args = get_groupby_method_args(groupby_func, df) + gb_as_index_true = df.groupby(keys, as_index=True) + gb_as_index_false = df.groupby(keys, as_index=False) + result = gb_as_index_true.transform(groupby_func, *args) + expected = gb_as_index_false.transform(groupby_func, *args) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.mean, np.cumprod]) +def test_numeric_only_warning_numpy(func): + # GH#50538 + df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]}) + gb = df.groupby("a") + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + gb.transform(func) + # Ensure users can pass numeric_only + result = gb.transform(func, numeric_only=True) + values = [3.5, 3.5, 5.0] if func == np.mean else [3, 12, 5] + expected = DataFrame({"c": values}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/__init__.py b/pandas/tests/indexes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/base_class/__init__.py b/pandas/tests/indexes/base_class/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py new file mode 100644 index 00000000..df04502a --- /dev/null +++ b/pandas/tests/indexes/base_class/test_constructors.py @@ -0,0 +1,50 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +class TestIndexConstructor: + # Tests for the Index constructor, specifically for cases that do + # not return a subclass + + @pytest.mark.parametrize("value", [1, np.int64(1)]) + def test_constructor_corner(self, value): + # corner case + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + f"kind, {value} was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(value) + + @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]]) + def test_construction_list_mixed_tuples(self, index_vals): + # see gh-10697: if we are constructing from a mixed list of tuples, + # make sure that we are independent of the sorting order. + index = Index(index_vals) + assert isinstance(index, Index) + assert not isinstance(index, MultiIndex) + + def test_constructor_wrong_kwargs(self): + # GH #19348 + with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"): + with tm.assert_produces_warning(FutureWarning): + Index([], foo="bar") + + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Index(["a", "b", "c"], dtype=float) + + @pytest.mark.parametrize("tuple_list", [[()], [(), ()]]) + def test_construct_empty_tuples(self, tuple_list): + # GH #45608 + result = Index(tuple_list) + expected = MultiIndex.from_tuples(tuple_list) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/base_class/test_formats.py b/pandas/tests/indexes/base_class/test_formats.py new file mode 100644 index 00000000..9053d45d --- /dev/null +++ b/pandas/tests/indexes/base_class/test_formats.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +import pandas._config.config as cf + +from pandas import Index + + +class TestIndexRendering: + @pytest.mark.parametrize( + "index,expected", + [ + # ASCII + # short + ( + Index(["a", "bb", "ccc"]), + """Index(['a', 'bb', 'ccc'], dtype='object')""", + ), + # multiple lines + ( + Index(["a", "bb", "ccc"] * 10), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object')", + ), + # truncated + ( + Index(["a", "bb", "ccc"] * 100), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n" + " ...\n" + " 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object', length=300)", + ), + # Non-ASCII + # short + ( + Index(["あ", "いい", "ううう"]), + """Index(['あ', 'いい', 'ううう'], dtype='object')""", + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう'],\n" + " dtype='object')" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr(self, index, expected): + result = repr(index) + assert result == expected + + @pytest.mark.parametrize( + "index,expected", + [ + # short + ( + Index(["あ", "いい", "ううう"]), + ("Index(['あ', 'いい', 'ううう'], dtype='object')"), + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう'],\n" + " dtype='object')" + "" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', " + "'いい', 'ううう', 'あ', 'いい',\n" + " 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr_with_unicode_option(self, index, expected): + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + result = repr(index) + assert result == expected + + def test_repr_summary(self): + with cf.option_context("display.max_seq_items", 10): + result = repr(Index(np.arange(1000))) + assert len(result) < 200 + assert "..." in result + + def test_summary_bug(self): + # GH#3869 + ind = Index(["{other}%s", "~:{range}:0"], name="A") + result = ind._summary() + # shouldn't be formatted accidentally. + assert "~:{range}:0" in result + assert "{other}%s" in result + + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + + def test_format_different_scalar_lengths(self): + # GH#35439 + idx = Index(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py new file mode 100644 index 00000000..770fa3f6 --- /dev/null +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -0,0 +1,86 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, +) +import pandas._testing as tm + + +class TestGetSliceBounds: + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, kind, side, expected): + index = Index(list("abcdef")) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound("e", kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize( + "data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)] + ) + def test_get_slice_bounds_outside(self, kind, side, expected, data, bound): + index = Index(data) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound(bound, kind=kind, side=side) + assert result == expected + + def test_get_slice_bounds_invalid_side(self): + with pytest.raises(ValueError, match="Invalid value for side kwarg"): + Index([]).get_slice_bound("a", side="middle") + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_dtype_mismatch(self): + # GH#25459 + indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0])) + tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing) + + +class TestGetLoc: + @pytest.mark.slow # to_flat_index takes a while + def test_get_loc_tuple_monotonic_above_size_cutoff(self): + # Go through the libindex path for which using + # _bin_search vs ndarray.searchsorted makes a difference + + lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + dti = pd.date_range("2016-01-01", periods=100) + + mi = pd.MultiIndex.from_product([lev, range(10**3), dti]) + oidx = mi.to_flat_index() + + loc = len(oidx) // 2 + tup = oidx[loc] + + res = oidx.get_loc(tup) + assert res == loc + + def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): + # case that goes through _maybe_get_bool_indexer + idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object) + + # we dont raise KeyError on nan + res = idx.get_loc(np.nan) + assert res == 1 + + # we only match on None, not on np.nan + res = idx.get_loc(None) + expected = np.array([False, False, True, False, False, True]) + tm.assert_numpy_array_equal(res, expected) + + # we don't match at all on mismatched NA + with pytest.raises(KeyError, match="NaT"): + idx.get_loc(NaT) + + +def test_getitem_boolean_ea_indexer(): + # GH#45806 + ser = pd.Series([True, False, pd.NA], dtype="boolean") + result = ser.index[ser] + expected = Index([0]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/base_class/test_pickle.py b/pandas/tests/indexes/base_class/test_pickle.py new file mode 100644 index 00000000..c670921d --- /dev/null +++ b/pandas/tests/indexes/base_class/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import Index +import pandas._testing as tm + + +def test_pickle_preserves_object_dtype(): + # GH#43188, GH#43155 don't infer numeric dtype + index = Index([1, 2, 3], dtype=object) + + result = tm.round_trip_pickle(index) + assert result.dtype == object + tm.assert_index_equal(index, result) diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py new file mode 100644 index 00000000..547d6266 --- /dev/null +++ b/pandas/tests/indexes/base_class/test_reshape.py @@ -0,0 +1,86 @@ +""" +Tests for ndarray-like method on the base Index class +""" +import numpy as np +import pytest + +from pandas import Index +import pandas._testing as tm + + +class TestReshape: + def test_repeat(self): + repeats = 2 + index = Index([1, 2, 3]) + expected = Index([1, 1, 2, 2, 3, 3]) + + result = index.repeat(repeats) + tm.assert_index_equal(result, expected) + + def test_insert(self): + + # GH 7256 + # validate neg/pos inserts + result = Index(["b", "c", "d"]) + + # test 0th element + tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) + + # test Nth element that follows Python list behavior + tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) + + # test loc +/- neq (0, -1) + tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) + + # test empty + null_index = Index([]) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) + + def test_insert_missing(self, nulls_fixture): + # GH#22295 + # test there is no mangling of NA values + expected = Index(["a", nulls_fixture, "b", "c"]) + result = Index(list("abc")).insert(1, nulls_fixture) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")] + ) + @pytest.mark.parametrize("loc", [-1, 2]) + def test_insert_datetime_into_object(self, loc, val): + # GH#44509 + idx = Index(["1", "2", "3"]) + result = idx.insert(loc, val) + expected = Index(["1", "2", val, "3"]) + tm.assert_index_equal(result, expected) + assert type(expected[2]) is type(val) + + @pytest.mark.parametrize( + "pos,expected", + [ + (0, Index(["b", "c", "d"], name="index")), + (-1, Index(["a", "b", "c"], name="index")), + ], + ) + def test_delete(self, pos, expected): + index = Index(["a", "b", "c", "d"], name="index") + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_delete_raises(self): + index = Index(["a", "b", "c", "d"], name="index") + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + index.delete(5) + + def test_append_multiple(self): + index = Index(["a", "b", "c", "d", "e", "f"]) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, index) + + # empty + result = index.append([]) + tm.assert_index_equal(result, index) diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py new file mode 100644 index 00000000..87ffe998 --- /dev/null +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -0,0 +1,261 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort + + +class TestIndexSetOps: + @pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_setops_disallow_true(self, method): + idx1 = Index(["a", "b"]) + idx2 = Index(["b", "c"]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + def test_setops_preserve_object_dtype(self): + idx = Index([1, 2, 3], dtype=object) + result = idx.intersection(idx[1:]) + expected = idx[1:] + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, intersection goes through + # a different route + result = idx.intersection(idx[1:][::-1]) + tm.assert_index_equal(result, expected) + + result = idx._union(idx[1:], sort=None) + expected = idx + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:], sort=None) + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, _union goes through + # a different route + result = idx._union(idx[1:][::-1], sort=None) + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:][::-1], sort=None) + tm.assert_index_equal(result, expected) + + def test_union_base(self): + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(second) + + expected = Index([0, 1, 2, "a", "b", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_union_different_type_base(self, klass): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(klass(second.values)) + + assert tm.equalContents(result, index) + + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = Index([1, pd.Timestamp("2000")]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") + def test_union_sort_other_incomparable_true(self): + # TODO(GH#25151): decide on True behaviour + # sort=True + idx = Index([1, pd.Timestamp("2000")]) + with pytest.raises(TypeError, match=".*"): + idx.union(idx[:1], sort=True) + + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") + def test_intersection_equal_sort_true(self): + # TODO(GH#25151): decide on True behaviour + idx = Index(["c", "a", "b"]) + sorted_ = Index(["a", "b", "c"]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + def test_intersection_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1]) + result = first.intersection(second, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_intersection_different_type_base(self, klass, sort): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + result = first.intersection(klass(second.values), sort=sort) + assert tm.equalContents(result, second) + + def test_intersection_nosort(self): + result = Index(["c", "b", "a"]).intersection(["b", "a"]) + expected = Index(["b", "a"]) + tm.assert_index_equal(result, expected) + + def test_intersection_equal_sort(self): + idx = Index(["c", "a", "b"]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + def test_intersection_str_dates(self, sort): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(["aa"], dtype=object) + result = i2.intersection(i1, sort=sort) + + assert len(result) == 0 + + @pytest.mark.parametrize( + "index2,expected_arr", + [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])], + ) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): + # non-monotonic non-unique + index1 = Index(["A", "B", "A", "C"]) + expected = Index(expected_arr, dtype="object") + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_difference_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.difference(second, sort) + expected = Index([0, "a", 1]) + if sort is None: + expected = Index(safe_sort(expected)) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.symmetric_difference(second) + expected = Index([0, 1, 2, "a", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "method,expected,sort", + [ + ( + "intersection", + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + False, + ), + ( + "intersection", + np.array( + [(1, "A"), (1, "B"), (2, "A"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ( + "union", + np.array( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ], + ) + def test_tuple_union_bug(self, method, expected, sort): + index1 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ) + ) + index2 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ) + ) + + result = getattr(index1, method)(index2, sort=sort) + assert result.ndim == 1 + + expected = Index(expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("first_list", [["b", "a"], []]) + @pytest.mark.parametrize("second_list", [["a", "b"], []]) + @pytest.mark.parametrize( + "first_name, second_name, expected_name", + [("A", "B", None), (None, "B", None), ("A", None, None)], + ) + def test_union_name_preservation( + self, first_list, second_list, first_name, second_name, expected_name, sort + ): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) + union = first.union(second, sort=sort) + + vals = set(first_list).union(second_list) + + if sort is None and len(first_list) > 0 and len(second_list) > 0: + expected = Index(sorted(vals), name=expected_name) + tm.assert_index_equal(union, expected) + else: + expected = Index(vals, name=expected_name) + tm.equalContents(union, expected) + + @pytest.mark.parametrize( + "diff_type, expected", + [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]], + ) + def test_difference_object_type(self, diff_type, expected): + # GH 13432 + idx1 = Index([0, 1, "A", "B"]) + idx2 = Index([0, 2, "A", "C"]) + result = getattr(idx1, diff_type)(idx2) + expected = Index(expected) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/base_class/test_where.py b/pandas/tests/indexes/base_class/test_where.py new file mode 100644 index 00000000..0c896973 --- /dev/null +++ b/pandas/tests/indexes/base_class/test_where.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import Index +import pandas._testing as tm + + +class TestWhere: + def test_where_intlike_str_doesnt_cast_ints(self): + idx = Index(range(3)) + mask = np.array([True, False, True]) + res = idx.where(mask, "2") + expected = Index([0, "2", 2]) + tm.assert_index_equal(res, expected) diff --git a/pandas/tests/indexes/categorical/__init__.py b/pandas/tests/indexes/categorical/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/categorical/test_append.py b/pandas/tests/indexes/categorical/test_append.py new file mode 100644 index 00000000..b48c3219 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_append.py @@ -0,0 +1,62 @@ +import pytest + +from pandas import ( + CategoricalIndex, + Index, +) +import pandas._testing as tm + + +class TestAppend: + @pytest.fixture + def ci(self): + categories = list("cab") + return CategoricalIndex(list("aabbca"), categories=categories, ordered=False) + + def test_append(self, ci): + # append cats with the same categories + result = ci[:3].append(ci[3:]) + tm.assert_index_equal(result, ci, exact=True) + + foos = [ci[:1], ci[1:3], ci[3:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, ci, exact=True) + + def test_append_empty(self, ci): + # empty + result = ci.append([]) + tm.assert_index_equal(result, ci, exact=True) + + def test_append_mismatched_categories(self, ci): + # appending with different categories or reordered is not ok + msg = "all inputs must be Index" + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.set_categories(list("abcd"))) + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.reorder_categories(list("abc"))) + + def test_append_category_objects(self, ci): + # with objects + result = ci.append(Index(["c", "a"])) + expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_non_categories(self, ci): + # invalid objects -> cast to object via concat_compat + result = ci.append(Index(["a", "d"])) + expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"]) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_object(self, ci): + # GH#14298 - if base object is not categorical -> coerce to object + result = Index(["c", "a"]).append(ci) + expected = Index(list("caaabbca")) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_to_another(self): + # hits Index._concat + fst = Index(["a", "b"]) + snd = CategoricalIndex(["d", "e"]) + result = fst.append(snd) + expected = Index(["a", "b", "d", "e"]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py new file mode 100644 index 00000000..854ae8b6 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -0,0 +1,87 @@ +from datetime import date + +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, + IntervalIndex, +) +import pandas._testing as tm + + +class TestAstype: + def test_astype(self): + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + + result = ci.astype(object) + tm.assert_index_equal(result, Index(np.array(ci))) + + # this IS equal, but not the same class + assert result.equals(ci) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + # interval + ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right") + + ci = CategoricalIndex( + Categorical.from_codes([0, 1, -1], categories=ii, ordered=True) + ) + + result = ci.astype("interval") + expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan) + tm.assert_index_equal(result, expected) + + result = IntervalIndex(result.values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("index_ordered", [True, False]) + def test_astype_category(self, name, dtype_ordered, index_ordered): + # GH#18630 + index = CategoricalIndex( + list("aabbca"), categories=list("cab"), ordered=index_ordered + ) + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex( + index.tolist(), + name=name, + categories=index.categories, + ordered=dtype_ordered, + ) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = index.astype("category") + expected = index + tm.assert_index_equal(result, expected) + + def test_categorical_date_roundtrip(self): + # astype to categorical and back should preserve date objects + v = date.today() + + obj = Index([v, v]) + assert obj.dtype == object + + cat = obj.astype("category") + + rtrip = cat.astype(object) + assert rtrip.dtype == object + assert type(rtrip[0]) is date diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py new file mode 100644 index 00000000..06c00123 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_category.py @@ -0,0 +1,408 @@ +import numpy as np +import pytest + +from pandas._libs import index as libindex +from pandas._libs.arrays import NDArrayBacked + +import pandas as pd +from pandas import ( + Categorical, + CategoricalDtype, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, +) +from pandas.tests.indexes.common import Base + + +class TestCategoricalIndex(Base): + _index_cls = CategoricalIndex + + @pytest.fixture + def simple_index(self) -> CategoricalIndex: + return self._index_cls(list("aabbca"), categories=list("cab"), ordered=False) + + @pytest.fixture + def index(self): + return tm.makeCategoricalIndex(100) + + def create_index(self, *, categories=None, ordered=False): + if categories is None: + categories = list("cab") + return CategoricalIndex(list("aabbca"), categories=categories, ordered=ordered) + + def test_can_hold_identifiers(self): + idx = self.create_index(categories=list("abcd")) + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + def test_pickle_compat_construction(self): + # Once the deprecation is enforced, we can use the parent class's test + with tm.assert_produces_warning(FutureWarning, match="without passing data"): + self._index_cls() + + def test_insert(self, simple_index): + + ci = simple_index + categories = ci.categories + + # test 0th element + result = ci.insert(0, "a") + expected = CategoricalIndex(list("aaabbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test Nth element that follows Python list behavior + result = ci.insert(-1, "a") + expected = CategoricalIndex(list("aabbcaa"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test empty + result = CategoricalIndex([], categories=categories).insert(0, "a") + expected = CategoricalIndex(["a"], categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid -> cast to object + expected = ci.astype(object).insert(0, "d") + result = ci.insert(0, "d") + tm.assert_index_equal(result, expected, exact=True) + + # GH 18295 (test missing) + expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"]) + for na in (np.nan, pd.NaT, None): + result = CategoricalIndex(list("aabcb")).insert(1, na) + tm.assert_index_equal(result, expected) + + def test_insert_na_mismatched_dtype(self): + ci = CategoricalIndex([0, 1, 1]) + result = ci.insert(0, pd.NaT) + expected = Index([pd.NaT, 0, 1, 1], dtype=object) + tm.assert_index_equal(result, expected) + + def test_delete(self, simple_index): + + ci = simple_index + categories = ci.categories + + result = ci.delete(0) + expected = CategoricalIndex(list("abbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + result = ci.delete(-1) + expected = CategoricalIndex(list("aabbc"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + with tm.external_error_raised((IndexError, ValueError)): + # Either depending on NumPy version + ci.delete(10) + + @pytest.mark.parametrize( + "data, non_lexsorted_data", + [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], + ) + def test_is_monotonic(self, data, non_lexsorted_data): + c = CategoricalIndex(data) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, ordered=True) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + c = CategoricalIndex(data, categories=reversed(data), ordered=True) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + # test when data is neither monotonic increasing nor decreasing + reordered_data = [data[0], data[2], data[1]] + c = CategoricalIndex(reordered_data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is False + + # non lexsorted categories + categories = non_lexsorted_data + + c = CategoricalIndex(categories[:2], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(categories[1:3], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + def test_has_duplicates(self): + idx = CategoricalIndex([0, 0, 0], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo") + assert idx.is_unique is True + assert idx.has_duplicates is False + + @pytest.mark.parametrize( + "data, categories, expected", + [ + ( + [1, 1, 1], + [1, 2, 3], + { + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, + ), + ( + [1, 1, 1], + list("abc"), + { + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, + ), + ( + [2, "a", "b"], + list("abc"), + { + "first": np.zeros(shape=(3), dtype=np.bool_), + "last": np.zeros(shape=(3), dtype=np.bool_), + False: np.zeros(shape=(3), dtype=np.bool_), + }, + ), + ( + list("abb"), + list("abc"), + { + "first": np.array([False, False, True]), + "last": np.array([False, True, False]), + False: np.array([False, True, True]), + }, + ), + ], + ) + def test_drop_duplicates(self, data, categories, expected): + + idx = CategoricalIndex(data, categories=categories, name="foo") + for keep, e in expected.items(): + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) + e = idx[~e] + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, e) + + @pytest.mark.parametrize( + "data, categories, expected_data", + [ + ([1, 1, 1], [1, 2, 3], [1]), + ([1, 1, 1], list("abc"), [np.nan]), + ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]), + ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]), + ], + ) + def test_unique(self, data, categories, expected_data, ordered): + dtype = CategoricalDtype(categories, ordered=ordered) + + idx = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(expected_data, dtype=dtype) + tm.assert_index_equal(idx.unique(), expected) + + def test_repr_roundtrip(self): + + ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + str(ci) + tm.assert_index_equal(eval(repr(ci)), ci, exact=True) + + # formatting + str(ci) + + # long format + # this is not reprable + ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + str(ci) + + def test_isin(self): + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + tm.assert_numpy_array_equal( + ci.isin(["c"]), np.array([False, False, False, True, False, False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6) + ) + + # mismatched categorical -> coerced to ndarray so doesn't matter + result = ci.isin(ci.set_categories(list("abcdefghi"))) + expected = np.array([True] * 6) + tm.assert_numpy_array_equal(result, expected) + + result = ci.isin(ci.set_categories(list("defghi"))) + expected = np.array([False] * 5 + [True]) + tm.assert_numpy_array_equal(result, expected) + + def test_identical(self): + + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + assert ci1.identical(ci1) + assert ci1.identical(ci1.copy()) + assert not ci1.identical(ci2) + + def test_ensure_copied_data(self, index): + # gh-12309: Check the "copy" argument of each + # Index.__new__ is honored. + # + # Must be tested separately from other indexes because + # self.values is not an ndarray. + + result = CategoricalIndex(index.values, copy=True) + tm.assert_index_equal(index, result) + assert not np.shares_memory(result._data._codes, index._data._codes) + + result = CategoricalIndex(index.values, copy=False) + assert result._data._codes is index._data._codes + + def test_frame_repr(self): + df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) + result = repr(df) + expected = " A\na 1\nb 2\nc 3" + assert result == expected + + def test_reindex_base(self): + # See test_reindex.py + pass + + def test_map_str(self): + # See test_map.py + pass + + +class TestCategoricalIndex2: + # Tests that are not overriding a test in Base + + def test_view_i8(self): + # GH#25464 + ci = tm.makeCategoricalIndex(100) + msg = "When changing to a larger dtype, its size must be a divisor" + with pytest.raises(ValueError, match=msg): + ci.view("i8") + with pytest.raises(ValueError, match=msg): + ci._data.view("i8") + + ci = ci[:-4] # length divisible by 8 + + res = ci.view("i8") + expected = ci._data.codes.view("i8") + tm.assert_numpy_array_equal(res, expected) + + cat = ci._data + tm.assert_numpy_array_equal(cat.view("i8"), expected) + + @pytest.mark.parametrize( + "dtype, engine_type", + [ + (np.int8, libindex.Int8Engine), + (np.int16, libindex.Int16Engine), + (np.int32, libindex.Int32Engine), + (np.int64, libindex.Int64Engine), + ], + ) + def test_engine_type(self, dtype, engine_type): + if dtype != np.int64: + # num. of uniques required to push CategoricalIndex.codes to a + # dtype (128 categories required for .codes dtype to be int16 etc.) + num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] + ci = CategoricalIndex(range(num_uniques)) + else: + # having 2**32 - 2**31 categories would be very memory-intensive, + # so we cheat a bit with the dtype + ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1) + arr = ci.values._ndarray.astype("int64") + NDArrayBacked.__init__(ci._data, arr, ci.dtype) + assert np.issubdtype(ci.codes.dtype, dtype) + assert isinstance(ci._engine, engine_type) + + @pytest.mark.parametrize( + "func,op_name", + [ + (lambda idx: idx - idx, "__sub__"), + (lambda idx: idx + idx, "__add__"), + (lambda idx: idx - ["a", "b"], "__sub__"), + (lambda idx: idx + ["a", "b"], "__add__"), + (lambda idx: ["a", "b"] - idx, "__rsub__"), + (lambda idx: ["a", "b"] + idx, "__radd__"), + ], + ) + def test_disallow_addsub_ops(self, func, op_name): + # GH 10039 + # set ops (+/-) raise TypeError + idx = Index(Categorical(["a", "b"])) + cat_or_list = "'(Categorical|list)' and '(Categorical|list)'" + msg = "|".join( + [ + f"cannot perform {op_name} with this index type: CategoricalIndex", + "can only concatenate list", + rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}", + ] + ) + with pytest.raises(TypeError, match=msg): + func(idx) + + def test_method_delegation(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.set_categories(list("cab")) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cab")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.rename_categories(list("efg")) + tm.assert_index_equal( + result, CategoricalIndex(list("ffggef"), categories=list("efg")) + ) + + # GH18862 (let rename_categories take callables) + result = ci.rename_categories(lambda x: x.upper()) + tm.assert_index_equal( + result, CategoricalIndex(list("AABBCA"), categories=list("CAB")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.add_categories(["d"]) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cabd")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.remove_categories(["c"]) + tm.assert_index_equal( + result, + CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_unordered() + tm.assert_index_equal(result, ci) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_ordered() + tm.assert_index_equal( + result, + CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), + ) + + # invalid + msg = "cannot use inplace with CategoricalIndex" + with pytest.raises(ValueError, match=msg): + ci.set_categories(list("cab"), inplace=True) diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py new file mode 100644 index 00000000..98da8038 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -0,0 +1,159 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, +) +import pandas._testing as tm + + +class TestCategoricalIndexConstructors: + def test_construction_without_data_deprecated(self): + # Once the deprecation is enforced, we can add this case to + # test_construction_disallows_scalar + msg = "without passing data" + with tm.assert_produces_warning(FutureWarning, match=msg): + CategoricalIndex(categories=list("abcd"), ordered=False) + + def test_construction_disallows_scalar(self): + msg = "must be called with a collection of some kind" + with pytest.raises(TypeError, match=msg): + CategoricalIndex(data=1, categories=list("abcd"), ordered=False) + + def test_construction(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False) + categories = ci.categories + + result = Index(ci) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + result = Index(ci.values) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + # empty + result = CategoricalIndex([], categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8")) + assert not result.ordered + + # passing categories + result = CategoricalIndex(list("aabbca"), categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + + c = Categorical(list("aabbca")) + result = CategoricalIndex(c) + tm.assert_index_equal(result.categories, Index(list("abc"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(c, categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + ci = CategoricalIndex(c, categories=list("abcd")) + result = CategoricalIndex(ci) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab")) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + expected = CategoricalIndex( + ci, categories=list("ab"), ordered=True, dtype="category" + ) + tm.assert_index_equal(result, expected, exact=True) + + # turn me to an Index + result = Index(np.array(ci)) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + def test_construction_with_dtype(self): + + # specify dtype + ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False) + + result = Index(np.array(ci), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + result = Index(np.array(ci).tolist(), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + # these are generally only equal when the categories are reordered + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + + result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories) + tm.assert_index_equal(result, ci, exact=True) + + # make sure indexes are handled + idx = Index(range(3)) + expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True) + result = CategoricalIndex(idx, categories=idx, ordered=True) + tm.assert_index_equal(result, expected, exact=True) + + def test_construction_empty_with_bool_categories(self): + # see GH#22702 + cat = CategoricalIndex([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_construction_with_categorical_dtype(self): + # construction with CategoricalDtype + # GH#18109 + data, cats, ordered = "a a b b".split(), "c b a".split(), True + dtype = CategoricalDtype(categories=cats, ordered=ordered) + + result = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(data, categories=cats, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # GH#19032 + result = Index(data, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + # error when combining categories/ordered and dtype kwargs + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, ordered=ordered, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, ordered=ordered, dtype=dtype) diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py new file mode 100644 index 00000000..1ed8f3a9 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_equals.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + MultiIndex, +) + + +class TestEquals: + def test_equals_categorical(self): + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + + assert ci1.equals(ci1) + assert not ci1.equals(ci2) + assert ci1.equals(ci1.astype(object)) + assert ci1.astype(object).equals(ci1) + + assert (ci1 == ci1).all() + assert not (ci1 != ci1).all() + assert not (ci1 > ci1).all() + assert not (ci1 < ci1).all() + assert (ci1 <= ci1).all() + assert (ci1 >= ci1).all() + + assert not (ci1 == 1).all() + assert (ci1 == Index(["a", "b"])).all() + assert (ci1 == ci1.values).all() + + # invalid comparisons + with pytest.raises(ValueError, match="Lengths must match"): + ci1 == Index(["a", "b", "c"]) + + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + ci1 == ci2 + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, ordered=False) + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, categories=list("abc")) + + # tests + # make sure that we are testing for category inclusion properly + ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + # Same categories, but different order + # Unordered + assert ci.equals(CategoricalIndex(list("aabca"))) + # Ordered + assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + assert not ci.equals(CategoricalIndex(list("aabca"))) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca") + [np.nan]) + assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) + assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True)) + assert ci.equals(ci.copy()) + + def test_equals_categorical_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = CategoricalIndex(["A"], categories=["A", "B"]) + b = CategoricalIndex(["A"], categories=["B", "A"]) + c = CategoricalIndex(["C"], categories=["B", "A"]) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + + def test_equals_non_category(self): + # GH#37667 Case where other contains a value not among ci's + # categories ("D") and also contains np.nan + ci = CategoricalIndex(["A", "B", np.nan, np.nan]) + other = Index(["A", "B", "D", np.nan]) + + assert not ci.equals(other) + + def test_equals_multiindex(self): + # dont raise NotImplementedError when calling is_dtype_compat + + mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)]) + ci = mi.to_flat_index().astype("category") + + assert not ci.equals(mi) diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py new file mode 100644 index 00000000..09de578f --- /dev/null +++ b/pandas/tests/indexes/categorical/test_fillna.py @@ -0,0 +1,54 @@ +import numpy as np +import pytest + +from pandas import CategoricalIndex +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_categorical(self): + # GH#11343 + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") + # fill by value in categories + exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") + tm.assert_index_equal(idx.fillna(1.0), exp) + + cat = idx._data + + # fill by value not in categories raises TypeError on EA, casts on CI + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + cat.fillna(2.0) + + result = idx.fillna(2.0) + expected = idx.astype(object).fillna(2.0) + tm.assert_index_equal(result, expected) + + def test_fillna_copies_with_no_nas(self): + # Nothing to fill, should still get a copy for the Categorical method, + # but OK to get a view on CategoricalIndex method + ci = CategoricalIndex([0, 1, 1]) + result = ci.fillna(0) + assert result is not ci + assert tm.shares_memory(result, ci) + + # But at the EA level we always get a copy. + cat = ci._data + result = cat.fillna(0) + assert result._ndarray is not cat._ndarray + assert result._ndarray.base is None + assert not tm.shares_memory(result, cat) + + def test_fillna_validates_with_no_nas(self): + # We validate the fill value even if fillna is a no-op + ci = CategoricalIndex([2, 3, 3]) + cat = ci._data + + msg = "Cannot setitem on a Categorical with a new category" + res = ci.fillna(False) + # nothing to fill, so we dont cast + tm.assert_index_equal(res, ci) + + # Same check directly on the Categorical + with pytest.raises(TypeError, match=msg): + cat.fillna(False) diff --git a/pandas/tests/indexes/categorical/test_formats.py b/pandas/tests/indexes/categorical/test_formats.py new file mode 100644 index 00000000..d7812888 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_formats.py @@ -0,0 +1,114 @@ +""" +Tests for CategoricalIndex.__repr__ and related methods. +""" +import pandas._config.config as cf + +from pandas import CategoricalIndex + + +class TestCategoricalIndexRepr: + def test_format_different_scalar_lengths(self): + # GH#35439 + idx = CategoricalIndex(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected + + def test_string_categorical_index_repr(self): + # short + idx = CategoricalIndex(["a", "bb", "ccc"]) + expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["a", "bb", "ccc"] * 10) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["a", "bb", "ccc"] * 100) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("abcdefghijklmmo")) + expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'm', 'o'], + categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # short + idx = CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', + 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + + # short + idx = CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + assert repr(idx) == expected + + # multiple lines + idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected + + # truncated + idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501 + + assert repr(idx) == expected + + # larger categories + idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', + 'さ', 'し', 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa:E501 + + assert repr(idx) == expected diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py new file mode 100644 index 00000000..58848645 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -0,0 +1,426 @@ +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + Timestamp, +) +import pandas._testing as tm + + +class TestTake: + def test_take_fill_value(self): + # GH 12631 + + # numeric category + idx = CategoricalIndex([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # object category + idx = CategoricalIndex( + list("CBA"), categories=list("ABC"), ordered=True, name="xxx" + ) + result = idx.take(np.array([1, 0, -1])) + expected = CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = CategoricalIndex( + ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_fill_value_datetime(self): + + # datetime category + idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + idx = CategoricalIndex(idx) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + expected = CategoricalIndex(expected, categories=exp_cats) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = CategoricalIndex([1, 2, 3], name="foo") + indices = [1, 0, -1] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +class TestGetLoc: + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) + idx1 = Index(list("abcde")) + assert cidx1.get_loc("a") == idx1.get_loc("a") + assert cidx1.get_loc("e") == idx1.get_loc("e") + + for i in [cidx1, idx1]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique + cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) + idx2 = Index(list("aacded")) + + # results in bool array + res = cidx2.get_loc("d") + tm.assert_numpy_array_equal(res, idx2.get_loc("d")) + tm.assert_numpy_array_equal( + res, np.array([False, False, False, True, False, True]) + ) + # unique element results in scalar + res = cidx2.get_loc("e") + assert res == idx2.get_loc("e") + assert res == 4 + + for i in [cidx2, idx2]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique, sliceable + cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) + idx3 = Index(list("aabbb")) + + # results in slice + res = cidx3.get_loc("a") + assert res == idx3.get_loc("a") + assert res == slice(0, 2, None) + + res = cidx3.get_loc("b") + assert res == idx3.get_loc("b") + assert res == slice(2, 5, None) + + for i in [cidx3, idx3]: + with pytest.raises(KeyError, match="'c'"): + i.get_loc("c") + + def test_get_loc_unique(self): + cidx = CategoricalIndex(list("abc")) + result = cidx.get_loc("b") + assert result == 1 + + def test_get_loc_monotonic_nonunique(self): + cidx = CategoricalIndex(list("abbc")) + result = cidx.get_loc("b") + expected = slice(1, 3, None) + assert result == expected + + def test_get_loc_nonmonotonic_nonunique(self): + cidx = CategoricalIndex(list("abcb")) + result = cidx.get_loc("b") + expected = np.array([False, True, False, True], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + def test_get_loc_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + res = ci.get_loc(np.nan) + + assert res == 2 + + +class TestGetIndexer: + def test_get_indexer_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_get_indexer_requires_unique(self): + np.random.seed(123456789) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + oidx = Index(np.array(ci)) + + msg = "Reindexing only valid with uniquely valued Index objects" + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + + with pytest.raises(InvalidIndexError, match=msg): + ci.get_indexer(finder) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + + with pytest.raises(InvalidIndexError, match=msg): + ci.get_indexer(finder) + + def test_get_indexer_non_unique(self): + + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + for indexer in [idx2, list("abf"), Index(list("abf"))]: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(indexer) + + r1, _ = idx1.get_indexer_non_unique(indexer) + expected = np.array([0, 1, 2, -1], dtype=np.intp) + tm.assert_almost_equal(r1, expected) + + def test_get_indexer_method(self): + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + msg = "method pad not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="pad") + msg = "method backfill not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="backfill") + + msg = "method nearest not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="nearest") + + def test_get_indexer_array(self): + arr = np.array( + [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], + dtype=object, + ) + cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] + ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") + result = ci.get_indexer(arr) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_same_order(self): + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19551 + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_nans_in_index_and_target(self): + # GH 45361 + ci = CategoricalIndex([1, 2, np.nan, 3]) + other1 = [2, 3, 4, np.nan] + res1 = ci.get_indexer(other1) + expected1 = np.array([1, 3, -1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res1, expected1) + other2 = [1, 4, 2, 3] + res2 = ci.get_indexer(other2) + expected2 = np.array([0, -1, 1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(res2, expected2) + + +class TestWhere: + def test_where(self, listlike_box): + klass = listlike_box + + i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * (len(i) - 1) + expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_where_non_categories(self): + ci = CategoricalIndex(["a", "b", "c", "d"]) + mask = np.array([True, False, True, False]) + + result = ci.where(mask, 2) + expected = Index(["a", 2, "c", 2], dtype=object) + tm.assert_index_equal(result, expected) + + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(TypeError, match=msg): + # Test the Categorical method directly + ci._data._where(mask, 2) + + +class TestContains: + def test_contains(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False) + + assert "a" in ci + assert "z" not in ci + assert "e" not in ci + assert np.nan not in ci + + # assert codes NOT in index + assert 0 not in ci + assert 1 not in ci + + def test_contains_nan(self): + ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) + assert np.nan in ci + + @pytest.mark.parametrize("unwrap", [True, False]) + def test_contains_na_dtype(self, unwrap): + dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT) + pi = dti.to_period("D") + tdi = dti - dti[-1] + ci = CategoricalIndex(dti) + + obj = ci + if unwrap: + obj = ci._data + + assert np.nan in obj + assert None in obj + assert pd.NaT in obj + assert np.datetime64("NaT") in obj + assert np.timedelta64("NaT") not in obj + + obj2 = CategoricalIndex(tdi) + if unwrap: + obj2 = obj2._data + + assert np.nan in obj2 + assert None in obj2 + assert pd.NaT in obj2 + assert np.datetime64("NaT") not in obj2 + assert np.timedelta64("NaT") in obj2 + + obj3 = CategoricalIndex(pi) + if unwrap: + obj3 = obj3._data + + assert np.nan in obj3 + assert None in obj3 + assert pd.NaT in obj3 + assert np.datetime64("NaT") not in obj3 + assert np.timedelta64("NaT") not in obj3 + + @pytest.mark.parametrize( + "item, expected", + [ + (pd.Interval(0, 1), True), + (1.5, True), + (pd.Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (pd.Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH 23705 + ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) + result = item in ci + assert result is expected + + def test_contains_list(self): + # GH#21729 + idx = CategoricalIndex([1, 2, 3]) + + assert "a" not in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in idx diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py new file mode 100644 index 00000000..71ee8298 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_map.py @@ -0,0 +1,115 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + Series, +) +import pandas._testing as tm + + +class TestMap: + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered + ) + tm.assert_index_equal(result, expected) + + def test_map(self): + ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") + ) + + # change categories dtype + ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = Index([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize( + ("data", "f"), + ( + ([1, 1, np.nan], pd.isna), + ([1, 2, np.nan], pd.isna), + ([1, 1, np.nan], {1: False}), + ([1, 2, np.nan], {1: False, 2: False}), + ([1, 1, np.nan], Series([False, False])), + ([1, 2, np.nan], Series([False, False, False])), + ), + ) + def test_map_with_nan(self, data, f): # GH 24241 + values = pd.Categorical(data) + result = values.map(f) + if data[1] == 1: + expected = pd.Categorical([False, False, np.nan]) + tm.assert_categorical_equal(result, expected) + else: + expected = Index([False, False, np.nan]) + tm.assert_index_equal(result, expected) + + def test_map_with_dict_or_series(self): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cur_index = CategoricalIndex(orig_values, name="XXX") + expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) + + mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])} + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py new file mode 100644 index 00000000..1337eff1 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -0,0 +1,86 @@ +import numpy as np + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, +) +import pandas._testing as tm + + +class TestReindex: + def test_reindex_list_non_unique(self): + # GH#11586 + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_categorical_non_unique(self): + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) + + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_list_non_unique_unused_category(self): + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + exp = Index(["a", "a", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_categorical_non_unique_unused_category(self): + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_duplicate_target(self): + # See GH25459 + cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) + res, indexer = cat.reindex(["a", "c", "c"]) + exp = Index(["a", "c", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + res, indexer = cat.reindex( + CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + ) + exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(["a", "b"]) + tm.assert_index_equal(res, Index(["a", "b"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) + + def test_reindex_categorical_added_category(self): + # GH 42424 + ci = CategoricalIndex( + [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")], + ordered=True, + ) + ci_add = CategoricalIndex( + [ + Interval(0, 1, closed="right"), + Interval(1, 2, closed="right"), + Interval(2, 3, closed="right"), + Interval(3, 4, closed="right"), + ], + ordered=True, + ) + result, _ = ci.reindex(ci_add) + expected = ci_add + tm.assert_index_equal(expected, result) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py new file mode 100644 index 00000000..3f8c679c --- /dev/null +++ b/pandas/tests/indexes/common.py @@ -0,0 +1,902 @@ +from __future__ import annotations + +from datetime import datetime +import gc + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_integer_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + isna, +) +import pandas._testing as tm +from pandas.core.api import ( # noqa:F401 + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.arrays import BaseMaskedArray + + +class Base: + """ + Base class for index sub-class tests. + """ + + _index_cls: type[Index] + + @pytest.fixture + def simple_index(self): + raise NotImplementedError("Method not implemented") + + def create_index(self) -> Index: + raise NotImplementedError("Method not implemented") + + def test_pickle_compat_construction(self): + # need an object to create with + msg = "|".join( + [ + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed", + r"DatetimeIndex\(\) must be called with a collection of some " + r"kind, None was passed", + r"TimedeltaIndex\(\) must be called with a collection of some " + r"kind, None was passed", + r"__new__\(\) missing 1 required positional argument: 'data'", + r"__new__\(\) takes at least 2 arguments \(1 given\)", + ] + ) + with pytest.raises(TypeError, match=msg): + self._index_cls() + + def test_shift(self, simple_index): + + # GH8083 test the base class for shift + idx = simple_index + msg = ( + f"This method is only implemented for DatetimeIndex, PeriodIndex and " + f"TimedeltaIndex; Got type {type(idx).__name__}" + ) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + def test_constructor_name_unhashable(self, simple_index): + # GH#29069 check that name is hashable + # See also same-named test in tests.series.test_constructors + idx = simple_index + with pytest.raises(TypeError, match="Index.name must be a hashable type"): + type(idx)(idx, name=[]) + + def test_create_index_existing_name(self, simple_index): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + expected = simple_index + if not isinstance(expected, MultiIndex): + expected.name = "foo" + result = Index(expected) + tm.assert_index_equal(result, expected) + + result = Index(expected, name="bar") + expected.name = "bar" + tm.assert_index_equal(result, expected) + else: + expected.names = ["foo", "bar"] + result = Index(expected) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["foo", "bar"], + ), + ) + + result = Index(expected, names=["A", "B"]) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["A", "B"], + ), + ) + + def test_numeric_compat(self, simple_index): + + idx = simple_index + # Check that this doesn't cover MultiIndex case, if/when it does, + # we can remove multi.test_compat.test_numeric_compat + assert not isinstance(idx, MultiIndex) + if type(idx) is Index: + return + + typ = type(idx._data).__name__ + cls = type(idx).__name__ + lmsg = "|".join( + [ + rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'", + "cannot perform (__mul__|__truediv__|__floordiv__) with " + f"this index type: ({cls}|{typ})", + ] + ) + with pytest.raises(TypeError, match=lmsg): + idx * 1 + rmsg = "|".join( + [ + rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'", + "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with " + f"this index type: ({cls}|{typ})", + ] + ) + with pytest.raises(TypeError, match=rmsg): + 1 * idx + + div_err = lmsg.replace("*", "/") + with pytest.raises(TypeError, match=div_err): + idx / 1 + div_err = rmsg.replace("*", "/") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + floordiv_err = lmsg.replace("*", "//") + with pytest.raises(TypeError, match=floordiv_err): + idx // 1 + floordiv_err = rmsg.replace("*", "//") + with pytest.raises(TypeError, match=floordiv_err): + 1 // idx + + def test_logical_compat(self, simple_index): + idx = simple_index + with pytest.raises(TypeError, match="cannot perform all"): + idx.all() + with pytest.raises(TypeError, match="cannot perform any"): + idx.any() + + def test_repr_roundtrip(self, simple_index): + + idx = simple_index + tm.assert_index_equal(eval(repr(idx)), idx) + + def test_repr_max_seq_item_setting(self, simple_index): + # GH10182 + idx = simple_index + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + def test_ensure_copied_data(self, index): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs["freq"] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + return + elif index.dtype == object and index.inferred_type == "boolean": + init_kwargs["dtype"] = index.dtype + + index_type = type(index) + result = index_type(index.values, copy=True, **init_kwargs) + if is_datetime64tz_dtype(index.dtype): + result = result.tz_localize("UTC").tz_convert(index.tz) + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + index = index._with_freq(None) + + tm.assert_index_equal(index, result) + + if isinstance(index, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same") + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + elif type(index) is Index and not isinstance(index.dtype, np.dtype): + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_index_equal(result, index) + + if isinstance(index._values, BaseMaskedArray): + assert np.shares_memory(index._values._data, result._values._data) + tm.assert_numpy_array_equal( + index._values._data, result._values._data, check_same="same" + ) + assert np.shares_memory(index._values._mask, result._values._mask) + tm.assert_numpy_array_equal( + index._values._mask, result._values._mask, check_same="same" + ) + elif index.dtype == "string[python]": + assert np.shares_memory(index._values._ndarray, result._values._ndarray) + tm.assert_numpy_array_equal( + index._values._ndarray, result._values._ndarray, check_same="same" + ) + elif index.dtype == "string[pyarrow]": + assert tm.shares_memory(result._values, index._values) + else: + raise NotImplementedError(index.dtype) + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, check_same="same") + + def test_memory_usage(self, index): + index._engine.clear_mapping() + result = index.memory_usage() + if index.empty: + # we report 0 for no-length + assert result == 0 + return + + # non-zero length + index.get_loc(index[0]) + result2 = index.memory_usage() + result3 = index.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + # Index[EA] has engine but it does not have a Hashtable .mapping + if not isinstance(index, (RangeIndex, IntervalIndex)) and not ( + type(index) is Index and not isinstance(index.dtype, np.dtype) + ): + assert result2 > result + + if index.inferred_type == "object": + assert result3 > result2 + + def test_argsort(self, request, index): + # separately tested + if isinstance(index, CategoricalIndex): + return + + result = index.argsort() + expected = np.array(index).argsort() + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + def test_numpy_argsort(self, index): + result = np.argsort(index) + expected = index.argsort() + tm.assert_numpy_array_equal(result, expected) + + result = np.argsort(index, kind="mergesort") + expected = index.argsort(kind="mergesort") + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(index, (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(index, axis=1) + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(index, order=("a", "b")) + + def test_repeat(self, simple_index): + rep = 2 + idx = simple_index.copy() + new_index_cls = Int64Index if isinstance(idx, RangeIndex) else idx._constructor + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) + tm.assert_index_equal(idx.repeat(rep), expected) + + idx = simple_index + rep = np.arange(len(idx)) + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) + tm.assert_index_equal(idx.repeat(rep), expected) + + def test_numpy_repeat(self, simple_index): + rep = 2 + idx = simple_index + expected = idx.repeat(rep) + tm.assert_index_equal(np.repeat(idx, rep), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(idx, rep, axis=0) + + def test_where(self, listlike_box, simple_index): + klass = listlike_box + + idx = simple_index + if isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + # where does not preserve freq + idx = idx._with_freq(None) + + cond = [True] * len(idx) + result = idx.where(klass(cond)) + expected = idx + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_insert_base(self, index): + result = index[1:4] + + if not len(index): + return + + # test 0th element + assert index[0:4].equals(result.insert(0, index[0])) + + def test_insert_out_of_bounds(self, index): + # TypeError/IndexError matches what np.insert raises in these cases + + if len(index) > 0: + err = TypeError + else: + err = IndexError + if len(index) == 0: + # 0 vs 0.5 in error message varies with numpy version + msg = "index (0|0.5) is out of bounds for axis 0 with size 0" + else: + msg = "slice indices must be integers or None or have an __index__ method" + with pytest.raises(err, match=msg): + index.insert(0.5, "foo") + + msg = "|".join( + [ + r"index -?\d+ is out of bounds for axis 0 with size \d+", + "loc must be an integer between", + ] + ) + with pytest.raises(IndexError, match=msg): + index.insert(len(index) + 1, 1) + + with pytest.raises(IndexError, match=msg): + index.insert(-len(index) - 1, 1) + + def test_delete_base(self, index): + if not len(index): + return + + if isinstance(index, RangeIndex): + # tested in class + return + + expected = index[1:] + result = index.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = index[:-1] + result = index.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + length = len(index) + msg = f"index {length} is out of bounds for axis 0 with size {length}" + with pytest.raises(IndexError, match=msg): + index.delete(length) + + def test_equals(self, index): + if isinstance(index, IntervalIndex): + # IntervalIndex tested separately, the index.equals(index.astype(object)) + # fails for IntervalIndex + return + + is_ea_idx = type(index) is Index and not isinstance(index.dtype, np.dtype) + + assert index.equals(index) + assert index.equals(index.copy()) + if not is_ea_idx: + # doesn't hold for e.g. IntegerDtype + assert index.equals(index.astype(object)) + + assert not index.equals(list(index)) + assert not index.equals(np.array(index)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(index, RangeIndex) and not is_ea_idx: + same_values = Index(index, dtype=object) + assert index.equals(same_values) + assert same_values.equals(index) + + if index.nlevels == 1: + # do not test MultiIndex + assert not index.equals(Series(index)) + + def test_equals_op(self, simple_index): + # GH9947, GH10637 + index_a = simple_index + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + + msg = "Lengths must match|could not be broadcast" + with pytest.raises(ValueError, match=msg): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match=msg): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match=msg): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + # For RangeIndex we can convert to Int64Index + tm.assert_series_equal(series_a == item, Series(expected3)) + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + expected = [str(x) for x in idx] + assert idx.format() == expected + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls([]) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + def test_fillna(self, index): + # GH 11343 + if len(index) == 0: + return + elif index.dtype == bool: + # can't hold NAs + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return + elif isinstance(index, MultiIndex): + idx = index.copy(deep=True) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + else: + idx = index.copy(deep=True) + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with pytest.raises(TypeError, match=msg): + idx.fillna([idx[0]]) + + idx = index.copy(deep=True) + values = idx._values + + values[1] = np.nan + + idx = type(index)(values) + + msg = "does not support 'downcast'" + with pytest.raises(NotImplementedError, match=msg): + # For now at least, we only raise if there are NAs present + idx.fillna(idx[0], downcast="infer") + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + def test_nulls(self, index): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + if len(index) == 0: + tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + elif not index.hasnans: + tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + def test_empty(self, simple_index): + # GH 15270 + idx = simple_index + assert not idx.empty + assert idx[:0].empty + + def test_join_self_unique(self, join_type, simple_index): + idx = simple_index + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() + + def test_map(self, simple_index): + # callable + idx = simple_index + + result = idx.map(lambda x: x) + # For RangeIndex we convert to Int64Index + tm.assert_index_equal(result, idx, exact="equiv") + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike(self, mapper, simple_index): + + idx = simple_index + if isinstance(idx, CategoricalIndex): + # TODO(2.0): see if we can avoid skipping once + # CategoricalIndex.reindex is removed. + pytest.skip(f"skipping tests for {type(idx)}") + + identity = mapper(idx.values, idx) + + result = idx.map(identity) + # For RangeIndex we convert to Int64Index + tm.assert_index_equal(result, idx, exact="equiv") + + # empty mappable + dtype = None + if idx._is_backward_compat_public_numeric_index: + new_index_cls = NumericIndex + if idx.dtype.kind == "f": + dtype = idx.dtype + else: + new_index_cls = Float64Index + + expected = new_index_cls([np.nan] * len(idx), dtype=dtype) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + def test_map_str(self, simple_index): + # GH 31202 + idx = simple_index + result = idx.map(str) + expected = Index([str(x) for x in idx], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("copy", [True, False]) + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype_category(self, copy, name, ordered, simple_index): + # GH 18630 + idx = simple_index + if name: + idx = idx.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=ordered) + result = idx.astype(dtype, copy=copy) + expected = CategoricalIndex(idx, name=name, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # non-standard categories + dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered) + result = idx.astype(dtype, copy=copy) + expected = CategoricalIndex(idx, name=name, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + result = idx.astype("category", copy=copy) + expected = CategoricalIndex(idx, name=name) + tm.assert_index_equal(result, expected, exact=True) + + def test_is_unique(self, simple_index): + # initialize a unique index + index = simple_index.drop_duplicates() + assert index.is_unique is True + + # empty index should be unique + index_empty = index[:0] + assert index_empty.is_unique is True + + # test basic dupes + index_dup = index.insert(0, index[0]) + assert index_dup.is_unique is False + + # single NA should be unique + index_na = index.insert(0, np.nan) + assert index_na.is_unique is True + + # multiple NA should not be unique + index_na_dup = index_na.insert(0, np.nan) + assert index_na_dup.is_unique is False + + @pytest.mark.arm_slow + def test_engine_reference_cycle(self, simple_index): + # GH27585 + index = simple_index + nrefs_pre = len(gc.get_referrers(index)) + index._engine + assert len(gc.get_referrers(index)) == nrefs_pre + + def test_getitem_2d_deprecated(self, simple_index): + # GH#30588, GH#31479 + idx = simple_index + msg = "Support for multi-dimensional indexing" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[:, None] + + assert isinstance(res, np.ndarray), type(res) + + if not isinstance(idx, RangeIndex): + # GH#44051 RangeIndex already raises + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[True] + assert isinstance(res, np.ndarray), type(res) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = idx[False] + assert isinstance(res, np.ndarray), type(res) + else: + msg = "only integers, slices" + with pytest.raises(IndexError, match=msg): + idx[True] + with pytest.raises(IndexError, match=msg): + idx[False] + + def test_copy_shares_cache(self, simple_index): + # GH32898, GH36840 + idx = simple_index + idx.get_loc(idx[0]) # populates the _cache. + copy = idx.copy() + + assert copy._cache is idx._cache + + def test_shallow_copy_shares_cache(self, simple_index): + # GH32669, GH36840 + idx = simple_index + idx.get_loc(idx[0]) # populates the _cache. + shallow_copy = idx._view() + + assert shallow_copy._cache is idx._cache + + shallow_copy = idx._shallow_copy(idx._data) + assert shallow_copy._cache is not idx._cache + assert shallow_copy._cache == {} + + def test_index_groupby(self, simple_index): + idx = simple_index[:5] + to_groupby = np.array([1, 2, np.nan, 2, 1]) + tm.assert_dict_equal( + idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]} + ) + + to_groupby = DatetimeIndex( + [ + datetime(2011, 11, 1), + datetime(2011, 12, 1), + pd.NaT, + datetime(2011, 12, 1), + datetime(2011, 11, 1), + ], + tz="UTC", + ).values + + ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")] + expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]} + tm.assert_dict_equal(idx.groupby(to_groupby), expected) + + def test_append_preserves_dtype(self, simple_index): + # In particular NumericIndex with dtype float32 + index = simple_index + N = len(index) + + result = index.append(index) + assert result.dtype == index.dtype + tm.assert_index_equal(result[:N], index, check_exact=True) + tm.assert_index_equal(result[N:], index, check_exact=True) + + alt = index.take(list(range(N)) * 2) + tm.assert_index_equal(result, alt, check_exact=True) + + def test_inv(self, simple_index): + idx = simple_index + + if idx.dtype.kind in ["i", "u"]: + res = ~idx + expected = Index(~idx.values, name=idx.name) + tm.assert_index_equal(res, expected) + + # check that we are matching Series behavior + res2 = ~Series(idx) + # TODO(2.0): once we preserve dtype, check_dtype can be True + tm.assert_series_equal(res2, Series(expected), check_dtype=False) + else: + if idx.dtype.kind == "f": + msg = "ufunc 'invert' not supported for the input types" + else: + msg = "bad operand" + with pytest.raises(TypeError, match=msg): + ~idx + + # check that we get the same behavior with Series + with pytest.raises(TypeError, match=msg): + ~Series(idx) + + +class NumericBase(Base): + """ + Base class for numeric index (incl. RangeIndex) sub-class tests. + """ + + def test_constructor_unwraps_index(self, dtype): + index_cls = self._index_cls + + idx = Index([1, 2], dtype=dtype) + result = index_cls(idx) + expected = np.array([1, 2], dtype=idx.dtype) + tm.assert_numpy_array_equal(result._data, expected) + + def test_where(self): + # Tested in numeric.test_indexing + pass + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + max_width = max(len(str(x)) for x in idx) + expected = [str(x).ljust(max_width) for x in idx] + assert idx.format() == expected + + def test_numeric_compat(self): + pass # override Base method + + def test_insert_non_na(self, simple_index): + # GH#43921 inserting an element that we know we can hold should + # not change dtype or type (except for RangeIndex) + index = simple_index + + result = index.insert(0, index[0]) + + cls = type(index) + if cls is RangeIndex: + cls = Int64Index + + expected = cls([index[0]] + list(index), dtype=index.dtype) + tm.assert_index_equal(result, expected, exact=True) + + def test_insert_na(self, nulls_fixture, simple_index): + # GH 18295 (test missing) + index = simple_index + na_val = nulls_fixture + + if na_val is pd.NaT: + expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) + else: + expected = Float64Index([index[0], np.nan] + list(index[1:])) + + if index._is_backward_compat_public_numeric_index: + # GH#43921 we preserve NumericIndex + if index.dtype.kind == "f": + expected = NumericIndex(expected, dtype=index.dtype) + else: + expected = NumericIndex(expected) + + result = index.insert(1, na_val) + tm.assert_index_equal(result, expected, exact=True) + + def test_arithmetic_explicit_conversions(self): + # GH 8608 + # add/sub are overridden explicitly for Float/Int Index + index_cls = self._index_cls + if index_cls is RangeIndex: + idx = RangeIndex(5) + else: + idx = index_cls(np.arange(5, dtype="int64")) + + # float conversions + arr = np.arange(5, dtype="int64") * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype="float64") + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype="float64") + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_invalid_dtype(self, invalid_dtype): + # GH 29539 + dtype = invalid_dtype + msg = rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}" + with pytest.raises(ValueError, match=msg): + self._index_cls([1, 2, 3], dtype=dtype) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py new file mode 100644 index 00000000..1e701945 --- /dev/null +++ b/pandas/tests/indexes/conftest.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from pandas import ( + Series, + array, +) + + +@pytest.fixture(params=[None, False]) +def sort(request): + """ + Valid values for the 'sort' parameter used in the Index + setops methods (intersection, union, etc.) + + Caution: + Don't confuse this one with the "sort" fixture used + for DataFrame.append or concat. That one has + parameters [True, False]. + + We can't combine them as sort=True is not permitted + in the Index setops methods. + """ + return request.param + + +@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]) +def freq_sample(request): + """ + Valid values for 'freq' parameter used to create date_range and + timedelta_range.. + """ + return request.param + + +@pytest.fixture(params=[list, tuple, np.array, array, Series]) +def listlike_box(request): + """ + Types that may be passed as the indexer to searchsorted. + """ + return request.param diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py new file mode 100644 index 00000000..ecdbf01f --- /dev/null +++ b/pandas/tests/indexes/datetimelike.py @@ -0,0 +1,139 @@ +""" generic datetimelike tests """ + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.indexes.common import Base + + +class DatetimeLike(Base): + def test_isin(self, simple_index): + index = simple_index[:4] + result = index.isin(index) + assert result.all() + + result = index.isin(list(index)) + assert result.all() + + result = index.isin([index[2], 5]) + expected = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_argsort_matches_array(self, simple_index): + idx = simple_index + idx = idx.insert(1, pd.NaT) + + result = idx.argsort() + expected = idx._data.argsort() + tm.assert_numpy_array_equal(result, expected) + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_shift_identity(self, simple_index): + + idx = simple_index + tm.assert_index_equal(idx, idx.shift(0)) + + def test_shift_empty(self, simple_index): + # GH#14811 + idx = simple_index[:0] + tm.assert_index_equal(idx, idx.shift(1)) + + def test_str(self, simple_index): + + # test the string repr + idx = simple_index + idx.name = "foo" + assert not (f"length={len(idx)}" in str(idx)) + assert "'foo'" in str(idx) + assert type(idx).__name__ in str(idx) + + if hasattr(idx, "tz"): + if idx.tz is not None: + assert idx.tz in str(idx) + if isinstance(idx, pd.PeriodIndex): + assert f"dtype='period[{idx.freqstr}]'" in str(idx) + else: + assert f"freq='{idx.freqstr}'" in str(idx) + + def test_view(self, simple_index): + idx = simple_index + + idx_view = idx.view("i8") + result = self._index_cls(idx) + tm.assert_index_equal(result, idx) + + idx_view = idx.view(self._index_cls) + result = self._index_cls(idx) + tm.assert_index_equal(result, idx_view) + + def test_map_callable(self, simple_index): + index = simple_index + expected = index + index.freq + result = index.map(lambda x: x + x.freq) + tm.assert_index_equal(result, expected) + + # map to NaT + result = index.map(lambda x: pd.NaT if x == index[0] else x) + expected = pd.Index([pd.NaT] + index[1:].tolist()) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index, dtype=object), + ], + ) + def test_map_dictlike(self, mapper, simple_index): + index = simple_index + expected = index + index.freq + + # don't compare the freqs + if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)): + expected = expected._with_freq(None) + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + expected = pd.Index([pd.NaT] + index[1:].tolist()) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + # empty map; these map to np.nan because we cannot know + # to re-infer things + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper([], [])) + tm.assert_index_equal(result, expected) + + def test_getitem_preserves_freq(self, simple_index): + index = simple_index + assert index.freq is not None + + result = index[:] + assert result.freq == index.freq + + def test_where_cast_str(self, simple_index): + index = simple_index + + mask = np.ones(len(index), dtype=bool) + mask[-1] = False + + result = index.where(mask, str(index[0])) + expected = index.where(mask, index[0]) + tm.assert_index_equal(result, expected) + + result = index.where(mask, [str(index[0])]) + tm.assert_index_equal(result, expected) + + expected = index.astype(object).where(mask, "foo") + result = index.where(mask, "foo") + tm.assert_index_equal(result, expected) + + result = index.where(mask, ["foo"]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimelike_/__init__.py b/pandas/tests/indexes/datetimelike_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py new file mode 100644 index 00000000..c56fc84b --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + Series, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class DropDuplicates: + def test_drop_duplicates_metadata(self, idx): + # GH#10115 + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + result = idx_dup.drop_duplicates() + + expected = idx + if not isinstance(idx, PeriodIndex): + # freq is reset except for PeriodIndex + assert idx_dup.freq is None + assert result.freq is None + expected = idx._with_freq(None) + else: + assert result.freq == expected.freq + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], + ) + def test_drop_duplicates(self, keep, expected, index, idx): + # to check Index/Series compat + idx = idx.append(idx[:5]) + + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) + + +class TestDropDuplicatesPeriodIndex(DropDuplicates): + @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) + def freq(self, request): + return request.param + + @pytest.fixture + def idx(self, freq): + return period_range("2011-01-01", periods=10, freq=freq, name="idx") + + +class TestDropDuplicatesDatetimeIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") + + +class TestDropDuplicatesTimedeltaIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") diff --git a/pandas/tests/indexes/datetimelike_/test_equals.py b/pandas/tests/indexes/datetimelike_/test_equals.py new file mode 100644 index 00000000..39e8270b --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_equals.py @@ -0,0 +1,182 @@ +""" +Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex +""" +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + PeriodIndex, + TimedeltaIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +class EqualsTests: + def test_not_equals_numeric(self, index): + + assert not index.equals(Index(index.asi8)) + assert not index.equals(Index(index.asi8.astype("u8"))) + assert not index.equals(Index(index.asi8).astype("f8")) + + def test_equals(self, index): + assert index.equals(index) + assert index.equals(index.astype(object)) + assert index.equals(CategoricalIndex(index)) + assert index.equals(CategoricalIndex(index.astype(object))) + + def test_not_equals_non_arraylike(self, index): + assert not index.equals(list(index)) + + def test_not_equals_strings(self, index): + + other = Index([str(x) for x in index], dtype=object) + assert not index.equals(other) + assert not index.equals(CategoricalIndex(other)) + + def test_not_equals_misc_strs(self, index): + other = Index(list("abc")) + assert not index.equals(other) + + +class TestPeriodIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return period_range("2013-01-01", periods=5, freq="D") + + # TODO: de-duplicate with other test_equals2 methods + @pytest.mark.parametrize("freq", ["D", "M"]) + def test_equals2(self, freq): + # GH#13107 + idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = PeriodIndex._simple_new( + idx._values._simple_new(idx._values.asi8, freq="H") + ) + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + +class TestDatetimeIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return date_range("2013-01-01", periods=5) + + def test_equals2(self): + # GH#13107 + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + # check that we do not raise when comparing with OutOfBounds objects + oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + assert not idx3.equals(oob) + + # check that we do not raise when comparing with OutOfBounds dt64 + oob2 = oob.map(np.datetime64) + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + assert not idx3.equals(oob2) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_not_equals_bday(self, freq): + rng = date_range("2009-01-01", "2010-01-01", freq=freq) + assert not rng.equals(list(rng)) + + +class TestTimedeltaIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return tm.makeTimedeltaIndex(10) + + def test_equals2(self): + # GH#13107 + idx = TimedeltaIndex(["1 days", "2 days", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"]) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.astype(object).equals(idx2.astype(object)) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # Check that we dont raise OverflowError on comparisons outside the + # implementation range GH#28532 + oob = Index([timedelta(days=10**6)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + + oob2 = Index([np.timedelta64(x) for x in oob], dtype=object) + assert (oob == oob2).all() + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + + oob3 = oob.map(np.timedelta64) + assert (oob3 == oob).all() + assert not idx.equals(oob3) + assert not idx2.equals(oob3) diff --git a/pandas/tests/indexes/datetimelike_/test_indexing.py b/pandas/tests/indexes/datetimelike_/test_indexing.py new file mode 100644 index 00000000..b64d5421 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_indexing.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, +) +import pandas._testing as tm + +dtlike_dtypes = [ + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + pd.DatetimeTZDtype("ns", "Asia/Tokyo"), + pd.PeriodDtype("ns"), +] + + +@pytest.mark.parametrize("ldtype", dtlike_dtypes) +@pytest.mark.parametrize("rdtype", dtlike_dtypes) +def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): + + vals = np.tile(3600 * 10**9 * np.arange(3), 2) + + def construct(dtype): + if dtype is dtlike_dtypes[-1]: + # PeriodArray will try to cast ints to strings + return DatetimeIndex(vals).astype(dtype) + return Index(vals, dtype=dtype) + + left = construct(ldtype) + right = construct(rdtype) + + result = left.get_indexer_non_unique(right) + + if ldtype is rdtype: + ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) + ex2 = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result[0], ex1) + tm.assert_numpy_array_equal(result[1], ex2) + + else: + no_matches = np.array([-1] * 6, dtype=np.intp) + missing = np.arange(6, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], no_matches) + tm.assert_numpy_array_equal(result[1], missing) diff --git a/pandas/tests/indexes/datetimelike_/test_is_monotonic.py b/pandas/tests/indexes/datetimelike_/test_is_monotonic.py new file mode 100644 index 00000000..088ccc40 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_is_monotonic.py @@ -0,0 +1,46 @@ +from pandas import ( + Index, + NaT, + date_range, +) + + +def test_is_monotonic_with_nat(): + # GH#31437 + # PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex, + # in particular never be monotonic when we have NaT + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + tdi = Index(dti.view("timedelta64[ns]")) + + for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert obj.is_monotonic_increasing + assert obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti1 = dti.insert(0, NaT) + pi1 = dti1.to_period("D") + tdi1 = Index(dti1.view("timedelta64[ns]")) + + for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti2 = dti.insert(3, NaT) + pi2 = dti2.to_period("H") + tdi2 = Index(dti2.view("timedelta64[ns]")) + + for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py new file mode 100644 index 00000000..50cf29d0 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm + + +class NATests: + def test_nat(self, index_without_na): + empty_index = index_without_na[:0] + + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT + + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + + idx = index_without_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + + idx = index_with_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + + +class TestDatetimeIndexNA(NATests): + @pytest.fixture + def index_without_na(self, tz_naive_fixture): + tz = tz_naive_fixture + return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + + +class TestTimedeltaIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return TimedeltaIndex(["1 days", "2 days"]) + + +class TestPeriodIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") diff --git a/pandas/tests/indexes/datetimelike_/test_sort_values.py b/pandas/tests/indexes/datetimelike_/test_sort_values.py new file mode 100644 index 00000000..6b7ad79e --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_sort_values.py @@ -0,0 +1,316 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +def check_freq_ascending(ordered, orig, ascending): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is generated (or generate-able) with + period_range/date_range/timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + if ascending: + assert ordered.freq.n == orig.freq.n + else: + assert ordered.freq.n == -1 * orig.freq.n + + +def check_freq_nonmonotonic(ordered, orig): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is _not_ generated (or generate-able) with + period_range/date_range//timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + assert ordered.freq is None + + +class TestSortValues: + @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) + def non_monotonic_idx(self, request): + if request.param is DatetimeIndex: + return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + elif request.param is PeriodIndex: + dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + return dti.to_period("D") + else: + return TimedeltaIndex( + ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] + ) + + def test_argmin_argmax(self, non_monotonic_idx): + assert non_monotonic_idx.argmin() == 1 + assert non_monotonic_idx.argmax() == 0 + + def test_sort_values(self, non_monotonic_idx): + idx = non_monotonic_idx + ordered = idx.sort_values() + assert ordered.is_monotonic_increasing + ordered = idx.sort_values(ascending=False) + assert ordered[::-1].is_monotonic_increasing + + ordered, dexer = idx.sort_values(return_indexer=True) + assert ordered.is_monotonic_increasing + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + assert ordered[::-1].is_monotonic_increasing + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) + + def check_sort_values_with_freq(self, idx): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + check_freq_ascending(ordered, idx, True) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + check_freq_ascending(ordered, idx, False) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) + check_freq_ascending(ordered, idx, True) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) + check_freq_ascending(ordered, idx, False) + + @pytest.mark.parametrize("freq", ["D", "H"]) + def test_sort_values_with_freq_timedeltaindex(self, freq): + # GH#10295 + idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") + + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" + ), + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + name="tzidx", + tz="Asia/Tokyo", + ), + ], + ) + def test_sort_values_with_freq_datetimeindex(self, idx): + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) + def test_sort_values_with_freq_periodindex(self, freq): + # here with_freq refers to being period_range-like + idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" + ) + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"), + Index([2011, 2012, 2013], name="idx"), # for compatibility check + ], + ) + def test_sort_values_with_freq_periodindex2(self, idx): + # here with_freq indicates this is period_range-like + self.check_sort_values_with_freq(idx) + + def check_sort_values_without_freq(self, idx, expected): + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + def test_sort_values_without_freq_timedeltaindex(self): + # GH#10295 + + idx = TimedeltaIndex( + ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" + ) + expected = TimedeltaIndex( + ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" + ) + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "index_dates,expected_dates", + [ + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ], + ) + def test_sort_values_without_freq_datetimeindex( + self, index_dates, expected_dates, tz_naive_fixture + ): + tz = tz_naive_fixture + + # without freq + idx = DatetimeIndex(index_dates, tz=tz, name="idx") + expected = DatetimeIndex(expected_dates, tz=tz, name="idx") + + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "idx,expected", + [ + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx1", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx1", + ), + ), + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx2", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx2", + ), + ), + ( + PeriodIndex( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + freq="D", + name="idx3", + ), + PeriodIndex( + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx3", + ), + ), + ( + PeriodIndex( + ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" + ), + PeriodIndex( + ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" + ), + ), + ( + # For compatibility check + Index([2011, 2013, 2015, 2012, 2011], name="idx"), + Index([2011, 2011, 2012, 2013, 2015], name="idx"), + ), + ], + ) + def test_sort_values_without_freq_periodindex(self, idx, expected): + # here without_freq means not generateable by period_range + self.check_sort_values_without_freq(idx, expected) + + def test_sort_values_without_freq_periodindex_nat(self): + # doesn't quite fit into check_sort_values_without_freq + idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") + expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + +def test_order_stability_compat(): + # GH#35922. sort_values is stable both for normal and datetime-like Index + pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") + iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") + ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) + ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) + tm.assert_numpy_array_equal(indexer1, indexer2) diff --git a/pandas/tests/indexes/datetimelike_/test_value_counts.py b/pandas/tests/indexes/datetimelike_/test_value_counts.py new file mode 100644 index 00000000..f0df6dd6 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_value_counts.py @@ -0,0 +1,103 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestValueCounts: + # GH#7735 + + def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): + tz = tz_naive_fixture + orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_timedeltaindex(self): + orig = timedelta_range("1 days 09:00:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_periodindex(self): + orig = period_range("2011-01-01 09:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def _check_value_counts_with_repeats(self, orig): + # create repeated values, 'n'th element is repeated by n+1 times + idx = type(orig)( + np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype + ) + + exp_idx = orig[::-1] + if not isinstance(exp_idx, PeriodIndex): + exp_idx = exp_idx._with_freq(None) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + tm.assert_index_equal(idx.unique(), orig) + + def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): + tz = tz_naive_fixture + idx = DatetimeIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + tz=tz, + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_timedeltaindex2(self): + idx = TimedeltaIndex( + [ + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 08:00:00", + "1 days 08:00:00", + NaT, + ] + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_periodindex2(self): + idx = PeriodIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + freq="H", + ) + self._check_value_counts_dropna(idx) + + def _check_value_counts_dropna(self, idx): + exp_idx = idx[[2, 3]] + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = idx[[2, 3, -1]] + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) diff --git a/pandas/tests/indexes/datetimes/__init__.py b/pandas/tests/indexes/datetimes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/datetimes/methods/__init__.py b/pandas/tests/indexes/datetimes/methods/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py new file mode 100644 index 00000000..e7823f0c --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -0,0 +1,332 @@ +from datetime import datetime + +import dateutil +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Int64Index, + UInt64Index, +) + + +class TestDatetimeIndex: + def test_astype(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") + + result = idx.astype(object) + expected = Index( + [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [1463356800000000000] + [-9223372036854775808] * 3, + dtype=np.int64, + name="idx", + ) + tm.assert_index_equal(result, expected) + + rng = date_range("1/1/2000", periods=10, name="idx") + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8, name="idx")) + tm.assert_numpy_array_equal(result.values, rng.asi8) + + def test_astype_uint(self): + arr = date_range("2000", periods=2, name="idx") + expected = UInt64Index( + np.array([946684800000000000, 946771200000000000], dtype="uint64"), + name="idx", + ) + tm.assert_index_equal(arr.astype("uint64"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) + + def test_astype_with_tz(self): + + # with tz + rng = date_range("1/1/2000", periods=10, tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning): + # deprecated + result = rng.astype("datetime64[ns]") + with tm.assert_produces_warning(FutureWarning): + # check DatetimeArray while we're here deprecated + rng._data.astype("datetime64[ns]") + + expected = ( + date_range("1/1/2000", periods=10, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ) + tm.assert_index_equal(result, expected) + + def test_astype_tzaware_to_tzaware(self): + # GH 18951: tz-aware to tz-aware + idx = date_range("20170101", periods=4, tz="US/Pacific") + result = idx.astype("datetime64[ns, US/Eastern]") + expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_astype_tznaive_to_tzaware(self): + # GH 18951: tz-naive to tz-aware + idx = date_range("20170101", periods=4) + idx = idx._with_freq(None) # tz_localize does not preserve freq + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz deprecated + result = idx.astype("datetime64[ns, US/Eastern]") + with tm.assert_produces_warning(FutureWarning): + # dt64->dt64tz deprecated + idx._data.astype("datetime64[ns, US/Eastern]") + + expected = date_range("20170101", periods=4, tz="US/Eastern") + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + def test_astype_str_nat(self): + # GH 13149, GH 13209 + # verify that we are returning NaT as a string (and not unicode) + + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + result = idx.astype(str) + expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object) + tm.assert_index_equal(result, expected) + + def test_astype_str(self): + # test astype string - #10442 + dti = date_range("2012-01-01", periods=4, name="test_name") + result = dti.astype(str) + expected = Index( + ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_tz_and_name(self): + # test astype string with tz and name + dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern") + result = dti.astype(str) + expected = Index( + [ + "2012-01-01 00:00:00-05:00", + "2012-01-02 00:00:00-05:00", + "2012-01-03 00:00:00-05:00", + ], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_freq_and_name(self): + # test astype string with freqH and name + dti = date_range("1/1/2011", periods=3, freq="H", name="test_name") + result = dti.astype(str) + expected = Index( + ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + def test_astype_str_freq_and_tz(self): + # test astype string with freqH and timezone + dti = date_range( + "3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name" + ) + result = dti.astype(str) + expected = Index( + ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"], + dtype=object, + name="test_name", + ) + tm.assert_index_equal(result, expected) + + def test_astype_datetime64(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") + + result = idx.astype("datetime64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("datetime64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx") + with tm.assert_produces_warning(FutureWarning): + # dt64tz->dt64 deprecated + result = idx_tz.astype("datetime64[ns]") + expected = DatetimeIndex( + ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], + dtype="datetime64[ns]", + name="idx", + ) + tm.assert_index_equal(result, expected) + + def test_astype_object(self): + rng = date_range("1/1/2000", periods=20) + + casted = rng.astype("O") + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + assert casted.tolist() == exp_values + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) + def test_astype_object_tz(self, tz): + idx = date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz) + expected_list = [ + Timestamp("2013-01-31", tz=tz), + Timestamp("2013-02-28", tz=tz), + Timestamp("2013-03-31", tz=tz), + Timestamp("2013-04-30", tz=tz), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = DatetimeIndex( + [datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)], + name="idx", + ) + expected_list = [ + Timestamp("2013-01-01"), + Timestamp("2013-01-02"), + NaT, + Timestamp("2013-01-04"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + @pytest.mark.parametrize( + "dtype", + [float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"], + ) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + msg = "Cannot cast DatetimeIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_index_convert_to_datetime_array(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="US/Eastern") + rng_utc = date_range("20090415", "20090519", tz="utc") + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + rng_utc = date_range("20090415", "20090519", tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + @pytest.mark.parametrize( + "tz, dtype", + [["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]], + ) + def test_integer_index_astype_datetime(self, tz, dtype): + # GH 20997, 20964, 24559 + val = [Timestamp("2018-01-01", tz=tz).value] + result = Index(val, name="idx").astype(dtype) + expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx") + tm.assert_index_equal(result, expected) + + def test_dti_astype_period(self): + idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx") + + res = idx.astype("period[M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + tm.assert_index_equal(res, exp) + + res = idx.astype("period[3M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(res, exp) + + +class TestAstype: + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_category(self, tz): + obj = date_range("2000", periods=2, tz=tz, name="idx") + result = obj.astype("category") + expected = pd.CategoricalIndex( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)], + name="idx", + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_array_fallback(self, tz): + obj = date_range("2000", periods=2, tz=tz, name="idx") + result = obj.astype(bool) + expected = Index(np.array([True, True]), name="idx") + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_factorize.py b/pandas/tests/indexes/datetimes/methods/test_factorize.py new file mode 100644 index 00000000..90ad65c4 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_factorize.py @@ -0,0 +1,107 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + Index, + date_range, + factorize, +) +import pandas._testing as tm + + +class TestDatetimeIndexFactorize: + def test_factorize(self): + idx1 = DatetimeIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + # tz must be preserved + idx1 = idx1.tz_localize("Asia/Tokyo") + exp_idx = exp_idx.tz_localize("Asia/Tokyo") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + idx2 = DatetimeIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + def test_factorize_tz(self, tz_naive_fixture, index_or_series): + tz = tz_naive_fixture + # GH#13750 + base = date_range("2016-11-05", freq="H", periods=100, tz=tz) + idx = base.repeat(5) + + exp_arr = np.arange(100, dtype=np.intp).repeat(5) + + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + expected = base._with_freq(None) + tm.assert_index_equal(res, expected) + assert res.freq == expected.freq + + def test_factorize_dst(self, index_or_series): + # GH#13750 + idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq + + idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq diff --git a/pandas/tests/indexes/datetimes/methods/test_fillna.py b/pandas/tests/indexes/datetimes/methods/test_fillna.py new file mode 100644 index 00000000..5fbe60bb --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_fillna.py @@ -0,0 +1,62 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestDatetimeIndexFillNA: + @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) + def test_fillna_datetime64(self, tz): + # GH 11343 + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"]) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"] + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # tz mismatch + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 10:00", tz=tz), + pd.Timestamp("2011-01-01 11:00"), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + # object + exp = pd.Index( + [pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) + + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + pd.Timestamp("2011-01-01 10:00"), + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # object + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + "x", + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/pandas/tests/indexes/datetimes/methods/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py new file mode 100644 index 00000000..592f4240 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -0,0 +1,267 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas import ( + NA, + DatetimeIndex, + Index, + NaT, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestInsert: + @pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA]) + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_nat(self, tz, null): + # GH#16537, GH#18295 (test missing) + + idx = DatetimeIndex(["2017-01-01"], tz=tz) + expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz) + if tz is not None and isinstance(null, np.datetime64): + expected = Index([null, idx[0]], dtype=object) + + res = idx.insert(0, null) + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_invalid_na(self, tz): + idx = DatetimeIndex(["2017-01-01"], tz=tz) + + item = np.timedelta64("NaT") + result = idx.insert(0, item) + expected = Index([item] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_empty_preserves_freq(self, tz_naive_fixture): + # GH#33573 + tz = tz_naive_fixture + dti = DatetimeIndex([], tz=tz, freq="D") + item = Timestamp("2017-04-05").tz_localize(tz) + + result = dti.insert(0, item) + assert result.freq == dti.freq + + # But not when we insert an item that doesn't conform to freq + dti = DatetimeIndex([], tz=tz, freq="W-THU") + result = dti.insert(0, item) + assert result.freq is None + + def test_insert(self): + idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx") + + result = idx.insert(2, datetime(2000, 1, 5)) + exp = DatetimeIndex( + ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx" + ) + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [ + datetime(2000, 1, 4), + "inserted", + datetime(2000, 1, 1), + datetime(2000, 1, 2), + ], + name="idx", + ) + assert not isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = date_range("1/1/2000", periods=3, freq="M", name="idx") + + # preserve freq + expected_0 = DatetimeIndex( + ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq="M", + ) + expected_3 = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], + name="idx", + freq="M", + ) + + # reset freq to None + expected_1_nofreq = DatetimeIndex( + ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq=None, + ) + expected_3_nofreq = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + + cases = [ + (0, datetime(1999, 12, 31), expected_0), + (-3, datetime(1999, 12, 31), expected_0), + (3, datetime(2000, 4, 30), expected_3), + (1, datetime(2000, 1, 31), expected_1_nofreq), + (3, datetime(2000, 1, 2), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # reset freq to None + result = idx.insert(3, datetime(2000, 1, 2)) + expected = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None + + for tz in ["US/Pacific", "Asia/Singapore"]: + idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx") + # preserve freq + expected = date_range( + "1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx" + ) + for d in [ + Timestamp("2000-01-01 15:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)), + ]: + + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-01 14:00", + "2000-01-01 10:00", + ], + name="idx", + tz=tz, + freq=None, + ) + # reset freq to None + for d in [ + Timestamp("2000-01-01 10:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)), + ]: + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.tz == expected.tz + assert result.freq is None + + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tzawareness(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz-awareness + item = Timestamp("2000-01-04") + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # mismatched tz-awareness + item = datetime(2000, 1, 4) + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + # TODO: also changes DataFrame.__setitem__ with expansion + def test_insert_mismatched_tz(self): + # see GH#7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + + # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) + item = Timestamp("2000-01-04", tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), + name="idx", + ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype + tm.assert_index_equal(result, expected) + + # mismatched tz -> cast to object (could reasonably cast to same tz) + item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) + expected = Index( + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), + name="idx", + ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] + ) + def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): + # GH#33703 dont cast these to dt64 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) + + result = dti.insert(1, item) + + if isinstance(item, np.ndarray): + assert item.item() == 0 + expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9) + else: + expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9) + + tm.assert_index_equal(result, expected) + + def test_insert_castable_str(self, tz_aware_fixture): + # GH#33703 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + value = "2019-11-05" + result = dti.insert(0, value) + + ts = Timestamp(value).tz_localize(tz) + expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self, tz_aware_fixture): + # GH#33703 + tz = tz_aware_fixture + dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) + + value = "foo" + result = dti.insert(0, value) + + expected = Index(["foo"] + list(dti), dtype=object, name=9) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_isocalendar.py b/pandas/tests/indexes/datetimes/methods/test_isocalendar.py new file mode 100644 index 00000000..128a8b3e --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_isocalendar.py @@ -0,0 +1,20 @@ +from pandas import ( + DataFrame, + DatetimeIndex, +) +import pandas._testing as tm + + +def test_isocalendar_returns_correct_values_close_to_new_year_with_tz(): + # GH#6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + result = dates.isocalendar() + expected_data_frame = DataFrame( + [[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]], + columns=["year", "week", "day"], + index=dates, + dtype="UInt32", + ) + tm.assert_frame_equal(result, expected_data_frame) diff --git a/pandas/tests/indexes/datetimes/methods/test_repeat.py b/pandas/tests/indexes/datetimes/methods/test_repeat.py new file mode 100644 index 00000000..c18109a2 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_repeat.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat_range(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range("1/1/2000", "1/1/2001") + + result = rng.repeat(5) + assert result.freq is None + assert len(result) == 5 * len(rng) + + index = date_range("2001-01-01", periods=2, freq="D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) + exp = DatetimeIndex( + [ + "2001-01-01", + "2001-01-01", + "2001-01-01", + "NaT", + "NaT", + "NaT", + "2003-01-01", + "2003-01-01", + "2003-01-01", + ], + tz=tz, + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + def test_repeat(self, tz_naive_fixture): + tz = tz_naive_fixture + reps = 2 + msg = "the 'axis' parameter is not supported" + + rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:30:00", tz=tz), + Timestamp("2016-01-01 00:30:00", tz=tz), + ] + ) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) diff --git a/pandas/tests/indexes/datetimes/methods/test_shift.py b/pandas/tests/indexes/datetimes/methods/test_shift.py new file mode 100644 index 00000000..5a47b36a --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_shift.py @@ -0,0 +1,163 @@ +from datetime import datetime + +import pytest +import pytz + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import ( + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexShift: + + # ------------------------------------------------------------- + # DatetimeIndex.shift is used in integer addition + + def test_dti_shift_tzaware(self, tz_naive_fixture): + # GH#9903 + tz = tz_naive_fixture + idx = DatetimeIndex([], name="xxx", tz=tz) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + idx = DatetimeIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = DatetimeIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = DatetimeIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + tz=tz, + freq="H", + ) + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_dti_shift_freqs(self): + # test shift for DatetimeIndex and non DatetimeIndex + # GH#8083 + drange = date_range("20130101", periods=5) + result = drange.shift(1) + expected = DatetimeIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = DatetimeIndex( + ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq="2D") + expected = DatetimeIndex( + ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_dti_shift_int(self): + rng = date_range("1/1/2000", periods=20) + + result = rng + 5 * rng.freq + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 * rng.freq + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_dti_shift_no_freq(self): + # GH#19147 + dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) + with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): + dti.shift(2) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_shift_localized(self, tzstr): + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tzstr) + + result = dr_tz.shift(1, "10T") + assert result.tz == dr_tz.tz + + def test_dti_shift_across_dst(self): + # GH 8616 + idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H") + s = Series(index=idx[:-1], dtype=object) + result = s.shift(freq="H") + expected = Series(index=idx[1:], dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "shift, result_time", + [ + [0, "2014-11-14 00:00:00"], + [-1, "2014-11-13 23:00:00"], + [1, "2014-11-14 01:00:00"], + ], + ) + def test_dti_shift_near_midnight(self, shift, result_time): + # GH 8616 + dt = datetime(2014, 11, 14, 0) + dt_est = pytz.timezone("EST").localize(dt) + s = Series(data=[1], index=[dt_est]) + result = s.shift(shift, freq="H") + expected = Series(1, index=DatetimeIndex([result_time], tz="EST")) + tm.assert_series_equal(result, expected) + + def test_shift_periods(self): + # GH#22458 : argument 'n' was deprecated in favor of 'periods' + idx = date_range(start=START, end=END, periods=3) + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_shift_bday(self, freq): + rng = date_range(START, END, freq=freq) + shifted = rng.shift(5) + assert shifted[0] == rng[5] + assert shifted.freq == rng.freq + + shifted = rng.shift(-5) + assert shifted[5] == rng[0] + assert shifted.freq == rng.freq + + shifted = rng.shift(0) + assert shifted[0] == rng[0] + assert shifted.freq == rng.freq + + def test_shift_bmonth(self): + rng = date_range(START, END, freq=pd.offsets.BMonthEnd()) + shifted = rng.shift(1, freq=pd.offsets.BDay()) + assert shifted[0] == rng[0] + pd.offsets.BDay() + + rng = date_range(START, END, freq=pd.offsets.BMonthEnd()) + with tm.assert_produces_warning(pd.errors.PerformanceWarning): + shifted = rng.shift(1, freq=pd.offsets.CDay()) + assert shifted[0] == rng[0] + pd.offsets.CDay() + + def test_shift_empty(self): + # GH#14811 + dti = date_range(start="2016-10-21", end="2016-10-21", freq="BM") + result = dti.shift(1) + tm.assert_index_equal(result, dti) diff --git a/pandas/tests/indexes/datetimes/methods/test_snap.py b/pandas/tests/indexes/datetimes/methods/test_snap.py new file mode 100644 index 00000000..a94d00d9 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_snap.py @@ -0,0 +1,67 @@ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm + + +def astype_non_nano(dti_nano, unit): + # TODO(2.0): remove once DTI/DTA.astype supports non-nano + if unit == "ns": + return dti_nano + + dta_nano = dti_nano._data + arr_nano = dta_nano._ndarray + + arr = arr_nano.astype(f"M8[{unit}]") + if dti_nano.tz is None: + dtype = arr.dtype + else: + dtype = type(dti_nano.dtype)(tz=dti_nano.tz, unit=unit) + dta = type(dta_nano)._simple_new(arr, dtype=dtype) + dti = DatetimeIndex(dta, name=dti_nano.name) + assert dti.dtype == dtype + return dti + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"]) +@pytest.mark.parametrize("name", [None, "my_dti"]) +@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) +def test_dti_snap(name, tz, unit): + dti = DatetimeIndex( + [ + "1/1/2002", + "1/2/2002", + "1/3/2002", + "1/4/2002", + "1/5/2002", + "1/6/2002", + "1/7/2002", + ], + name=name, + tz=tz, + freq="D", + ) + dti = astype_non_nano(dti, unit) + + result = dti.snap(freq="W-MON") + expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon") + expected = expected.repeat([3, 4]) + expected = astype_non_nano(expected, unit) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + assert result.freq is None + assert expected.freq is None + + result = dti.snap(freq="B") + + expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b") + expected = expected.repeat([1, 1, 1, 2, 2]) + expected = astype_non_nano(expected, unit) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + assert result.freq is None + assert expected.freq is None diff --git a/pandas/tests/indexes/datetimes/methods/test_to_frame.py b/pandas/tests/indexes/datetimes/methods/test_to_frame.py new file mode 100644 index 00000000..fa5cca1c --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_frame.py @@ -0,0 +1,31 @@ +from pandas import ( + DataFrame, + Index, + date_range, +) +import pandas._testing as tm + + +class TestToFrame: + def test_to_frame_datetime_tz(self): + # GH#25809 + idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") + result = idx.to_frame() + expected = DataFrame(idx, index=idx) + tm.assert_frame_equal(result, expected) + + def test_to_frame_respects_none_name(self): + # GH#44212 if we explicitly pass name=None, then that should be respected, + # not changed to 0 + # GH-45448 this is first deprecated to only change in the future + idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") + with tm.assert_produces_warning(FutureWarning): + result = idx.to_frame(name=None) + # exp_idx = Index([None], dtype=object) + exp_idx = Index([0]) + tm.assert_index_equal(exp_idx, result.columns) + + with tm.assert_produces_warning(FutureWarning): + result = idx.rename("foo").to_frame(name=None) + exp_idx = Index(["foo"], dtype=object) + tm.assert_index_equal(exp_idx, result.columns) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py new file mode 100644 index 00000000..f6a598bd --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -0,0 +1,191 @@ +import warnings + +import dateutil.tz +from dateutil.tz import tzlocal +import pytest +import pytz + +from pandas._libs.tslibs.ccalendar import MONTHS +from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG + +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToPeriod: + def test_dti_to_period(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + pi1 = dti.to_period() + pi2 = dti.to_period(freq="D") + pi3 = dti.to_period(freq="3D") + + assert pi1[0] == Period("Jan 2005", freq="M") + assert pi2[0] == Period("1/31/2005", freq="D") + assert pi3[0] == Period("1/31/2005", freq="3D") + + assert pi1[-1] == Period("Nov 2005", freq="M") + assert pi2[-1] == Period("11/30/2005", freq="D") + assert pi3[-1], Period("11/30/2005", freq="3D") + + tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M")) + tm.assert_index_equal( + pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D") + ) + tm.assert_index_equal( + pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D") + ) + + @pytest.mark.parametrize("month", MONTHS) + def test_to_period_quarterly(self, month): + # make sure we can make the round trip + freq = f"Q-{month}" + rng = period_range("1989Q3", "1991Q3", freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + @pytest.mark.parametrize("off", ["BQ", "QS", "BQS"]) + def test_to_period_quarterlyish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "Q-DEC" + + @pytest.mark.parametrize("off", ["BA", "AS", "BAS"]) + def test_to_period_annualish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "A-DEC" + + def test_to_period_monthish(self): + offsets = ["MS", "BM"] + for off in offsets: + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "M" + + rng = date_range("01-Jan-2012", periods=8, freq="M") + prng = rng.to_period() + assert prng.freq == "M" + + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + date_range("01-Jan-2012", periods=8, freq="EOM") + + def test_to_period_infer(self): + # https://github.com/pandas-dev/pandas/issues/33358 + rng = date_range( + start="2019-12-22 06:40:00+00:00", + end="2019-12-22 08:45:00+00:00", + freq="5min", + ) + + with tm.assert_produces_warning(None): + # Using simple filter because we are not checking for the warning here + warnings.simplefilter("ignore", UserWarning) + + pi1 = rng.to_period("5min") + + with tm.assert_produces_warning(None): + # Using simple filter because we are not checking for the warning here + warnings.simplefilter("ignore", UserWarning) + + pi2 = rng.to_period() + + tm.assert_index_equal(pi1, pi2) + + def test_period_dt64_round_trip(self): + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period() + tm.assert_index_equal(pi.to_timestamp(), dti) + + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period(freq="H") + tm.assert_index_equal(pi.to_timestamp(), dti) + + def test_to_period_millisecond(self): + index = DatetimeIndex( + [ + Timestamp("2007-01-01 10:11:12.123456Z"), + Timestamp("2007-01-01 10:11:13.789123Z"), + ] + ) + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="L") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123Z", "L") + assert period[1] == Period("2007-01-01 10:11:13.789Z", "L") + + def test_to_period_microsecond(self): + index = DatetimeIndex( + [ + Timestamp("2007-01-01 10:11:12.123456Z"), + Timestamp("2007-01-01 10:11:13.789123Z"), + ] + ) + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="U") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U") + assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U") + + @pytest.mark.parametrize( + "tz", + ["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()], + ) + def test_to_period_tz(self, tz): + ts = date_range("1/1/2000", "2/1/2000", tz=tz) + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + # filter warning about freq deprecation + warnings.filterwarnings("ignore", category=FutureWarning) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + assert result == expected + + expected = date_range("1/1/2000", "2/1/2000").to_period() + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"]) + def test_to_period_tz_utc_offset_consistency(self, tz): + # GH#22905 + ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1") + with tm.assert_produces_warning(UserWarning): + warnings.filterwarnings("ignore", category=FutureWarning) + + result = ts.to_period()[0] + expected = ts[0].to_period() + assert result == expected + + def test_to_period_nofreq(self): + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"]) + msg = "You must pass a freq argument as current index has none." + with pytest.raises(ValueError, match=msg): + idx.to_period() + + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer") + assert idx.freqstr == "D" + expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D") + tm.assert_index_equal(idx.to_period(), expected) + + # GH#7606 + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + assert idx.freqstr is None + tm.assert_index_equal(idx.to_period(), expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_series.py b/pandas/tests/indexes/datetimes/methods/test_to_series.py new file mode 100644 index 00000000..5a216d3c --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +class TestToSeries: + @pytest.fixture + def idx_expected(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + + assert expected.dtype == idx.dtype + return idx, expected + + def test_to_series_keep_tz_deprecated_true(self, idx_expected): + # convert to series while keeping the timezone + idx, expected = idx_expected + + msg = "stop passing 'keep_tz'" + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=True, index=[0, 1]) + assert msg in str(m[0].message) + + tm.assert_series_equal(result, expected) + + def test_to_series_keep_tz_deprecated_false(self, idx_expected): + idx, expected = idx_expected + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) diff --git a/pandas/tests/indexes/datetimes/test_asof.py b/pandas/tests/indexes/datetimes/test_asof.py new file mode 100644 index 00000000..7adc4003 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_asof.py @@ -0,0 +1,31 @@ +from datetime import timedelta + +from pandas import ( + Index, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +class TestAsOf: + def test_asof_partial(self): + index = date_range("2010-01-01", periods=2, freq="m") + expected = Timestamp("2010-02-28") + result = index.asof("2010-02") + assert result == expected + assert not isinstance(result, Index) + + def test_asof(self): + index = tm.makeDateIndex(100) + + dt = index[0] + assert index.asof(dt) == dt + assert isna(index.asof(dt - timedelta(1))) + + dt = index[-1] + assert index.asof(dt + timedelta(1)) == dt + + dt = index[0].to_pydatetime() + assert isinstance(index.asof(dt), Timestamp) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py new file mode 100644 index 00000000..1d161630 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -0,0 +1,1184 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) +from functools import partial +from operator import attrgetter + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + astype_overflowsafe, +) +from pandas.compat import PY39 + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + period_array, +) + +if PY39: + import zoneinfo + + +class TestDatetimeIndex: + @pytest.mark.parametrize( + "dt_cls", [DatetimeIndex, DatetimeArray._from_sequence_not_strict] + ) + def test_freq_validation_with_nat(self, dt_cls): + # GH#11587 make sure we get a useful error message when generate_range + # raises + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, Timestamp("2011-01-01")], freq="D") + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, Timestamp("2011-01-01").value], freq="D") + + # TODO: better place for tests shared by DTI/TDI? + @pytest.mark.parametrize( + "index", + [ + date_range("2016-01-01", periods=5, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=5), + ], + ) + def test_shallow_copy_inherits_array_freq(self, index): + # If we pass a DTA/TDA to shallow_copy and dont specify a freq, + # we should inherit the array's freq, not our own. + array = index._data + + arr = array[[0, 3, 2, 4, 1]] + assert arr.freq is None + + result = index._shallow_copy(arr) + assert result.freq is None + + def test_categorical_preserves_tz(self): + # GH#18664 retain tz when going DTI-->Categorical-->DTI + dti = DatetimeIndex( + [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern" + ) + + for dtobj in [dti, dti._data]: + # works for DatetimeIndex or DatetimeArray + + ci = pd.CategoricalIndex(dtobj) + carr = pd.Categorical(dtobj) + cser = pd.Series(ci) + + for obj in [ci, carr, cser]: + result = DatetimeIndex(obj) + tm.assert_index_equal(result, dti) + + def test_dti_with_period_data_raises(self): + # GH#23675 + data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q") + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(period_array(data)) + + def test_dti_with_timedelta64_data_raises(self): + # GH#23675 deprecated, enforrced in GH#29794 + data = np.array([0], dtype="m8[ns]") + msg = r"timedelta64\[ns\] cannot be converted to datetime64" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(data) + + with pytest.raises(TypeError, match=msg): + to_datetime(data) + + with pytest.raises(TypeError, match=msg): + DatetimeIndex(pd.TimedeltaIndex(data)) + + with pytest.raises(TypeError, match=msg): + to_datetime(pd.TimedeltaIndex(data)) + + def test_constructor_from_sparse_array(self): + # https://github.com/pandas-dev/pandas/issues/35843 + values = [ + Timestamp("2012-05-01T01:00:00.000000"), + Timestamp("2016-05-01T01:00:00.000000"), + ] + arr = pd.arrays.SparseArray(values) + msg = "will store that array directly" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = Index(arr) + expected = DatetimeIndex(values) + tm.assert_index_equal(result, expected) + + def test_construction_caching(self): + + df = pd.DataFrame( + { + "dt": date_range("20130101", periods=3), + "dttz": date_range("20130101", periods=3, tz="US/Eastern"), + "dt_with_null": [ + Timestamp("20130101"), + pd.NaT, + Timestamp("20130103"), + ], + "dtns": date_range("20130101", periods=3, freq="ns"), + } + ) + assert df.dttz.dtype.tz.zone == "US/Eastern" + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = date_range("20130101", periods=5, freq="H", tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(i, result) + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = date_range("20130101", periods=5, freq="H", tz=tz) + i = i._with_freq(None) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + + if "tz" in kwargs: + result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"]) + + expected = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(result, expected) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC") + expected = i.tz_localize(None).tz_localize("UTC") + tm.assert_index_equal(i2, expected) + + # incompat tz/dtype + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific") + + def test_construction_index_with_mixed_timezones(self): + # gh-11488: no tz results in DatetimeIndex + result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # same tz results in DatetimeIndex + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # Different tz results in Index(dtype=object) + result = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # length = 1 + result = Index([Timestamp("2011-01-01")], name="idx") + exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # length = 1 with tz + result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # see gh-11488 + result = Index( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + exp = DatetimeIndex( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # Same tz results in DatetimeIndex + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00"), + ], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # different tz results in Index(dtype=object) + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # all NaT + result = Index([pd.NaT, pd.NaT], name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # all NaT with tz + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + + msg = "cannot be converted to datetime64" + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + # passing tz should results in DatetimeIndex, then mismatch raises + # TypeError + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + def test_construction_base_constructor(self): + arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")] + tm.assert_index_equal(Index(arr), DatetimeIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, Timestamp("2011-01-03")] + tm.assert_index_equal(Index(arr), DatetimeIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr))) + + def test_construction_outofbounds(self): + # GH 13663 + dates = [ + datetime(3000, 1, 1), + datetime(4000, 1, 1), + datetime(5000, 1, 1), + datetime(6000, 1, 1), + ] + exp = Index(dates, dtype=object) + # coerces to object + tm.assert_index_equal(Index(dates), exp) + + msg = "Out of bounds .* present at position 0" + with pytest.raises(OutOfBoundsDatetime, match=msg): + # can't create DatetimeIndex + DatetimeIndex(dates) + + def test_construction_with_ndarray(self): + # GH 5152 + dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)] + data = DatetimeIndex(dates, freq=offsets.BDay()).values + result = DatetimeIndex(data, freq=offsets.BDay()) + expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B") + tm.assert_index_equal(result, expected) + + def test_integer_values_and_tz_interpreted_as_utc(self): + # GH-24559 + val = np.datetime64("2000-01-01 00:00:00", "ns") + values = np.array([val.view("i8")]) + + result = DatetimeIndex(values).tz_localize("US/Central") + + expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + tm.assert_index_equal(result, expected) + + # but UTC is *not* deprecated. + with tm.assert_produces_warning(None): + result = DatetimeIndex(values, tz="UTC") + expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + + def test_constructor_coverage(self): + rng = date_range("1/1/2000", periods=10.5) + exp = date_range("1/1/2000", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + date_range(start="1/1/2000", periods="foo", freq="D") + + msg = r"DatetimeIndex\(\.\.\.\) must be called with a collection" + with pytest.raises(TypeError, match=msg): + DatetimeIndex("1/1/2000") + + # generator expression + gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) + result = DatetimeIndex(gen) + expected = DatetimeIndex( + [datetime(2000, 1, 1) + timedelta(i) for i in range(10)] + ) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # string with NaT + strings = np.array(["2000-01-01", "2000-01-02", "NaT"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start="2011-01-01", freq="b") + with pytest.raises(ValueError, match=msg): + date_range(end="2011-01-01", freq="B") + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="D") + + @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) + def test_constructor_datetime64_tzformat(self, freq): + # see GH#6572: ISO 8601 format results in pytz.FixedOffset + idx = date_range( + "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range( + "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + # Non ISO 8601 format results in dateutil.tz.tzoffset + idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + def test_constructor_dtype(self): + + # passing a dtype with a tz should localize + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern") + tm.assert_index_equal(idx, expected) + + idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern") + tm.assert_index_equal(idx, expected) + + # if we already have a tz and its not the same, then raise + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + + msg = ( + "cannot supply both a tz and a timezone-naive dtype " + r"\(i\.e\. datetime64\[ns\]\)" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns]") + + # this is effectively trying to convert tz's + msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns, CET]") + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]") + + result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]") + tm.assert_index_equal(idx, result) + + @pytest.mark.parametrize("dtype", [object, np.int32, np.int64]) + def test_constructor_invalid_dtype_raises(self, dtype): + # GH 23986 + msg = "Unexpected value for 'dtype'" + with pytest.raises(ValueError, match=msg): + DatetimeIndex([1, 2], dtype=dtype) + + def test_constructor_name(self): + idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST") + assert idx.name == "TEST" + + def test_000constructor_resolution(self): + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) + idx = DatetimeIndex([t1]) + + assert idx.nanosecond[0] == t1.nanosecond + + def test_disallow_setting_tz(self): + # GH 3746 + dti = DatetimeIndex(["2010"], tz="UTC") + msg = "Cannot directly set timezone" + with pytest.raises(AttributeError, match=msg): + dti.tz = pytz.timezone("US/Pacific") + + @pytest.mark.parametrize( + "tz", + [ + None, + "America/Los_Angeles", + pytz.timezone("America/Los_Angeles"), + Timestamp("2000", tz="America/Los_Angeles").tz, + ], + ) + def test_constructor_start_end_with_tz(self, tz): + # GH 18595 + start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles") + end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles") + result = date_range(freq="D", start=start, end=end, tz=tz) + expected = DatetimeIndex( + ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], + tz="America/Los_Angeles", + freq="D", + ) + tm.assert_index_equal(result, expected) + # Especially assert that the timezone is consistent for pytz + assert pytz.timezone("America/Los_Angeles") is result.tz + + @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"]) + def test_constructor_with_non_normalized_pytz(self, tz): + # GH 18595 + non_norm_tz = Timestamp("2010", tz=tz).tz + result = DatetimeIndex(["2010"], tz=non_norm_tz) + assert pytz.timezone(tz) is result.tz + + def test_constructor_timestamp_near_dst(self): + # GH 20854 + ts = [ + Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"), + Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"), + ] + result = DatetimeIndex(ts) + expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list]) + @pytest.mark.parametrize( + "tz, dtype", + [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")], + ) + def test_constructor_with_int_tz(self, klass, box, tz, dtype): + # GH 20997, 20964 + ts = Timestamp("2018-01-01", tz=tz) + result = klass(box([ts.value]), dtype=dtype) + expected = klass([ts]) + assert result == expected + + def test_construction_int_rountrip(self, tz_naive_fixture): + # GH 12619, GH#24559 + tz = tz_naive_fixture + + result = 1293858000000000000 + expected = DatetimeIndex([result], tz=tz).asi8[0] + assert result == expected + + def test_construction_from_replaced_timestamps_with_dst(self): + # GH 18785 + index = date_range( + Timestamp(2000, 1, 1), + Timestamp(2005, 1, 1), + freq="MS", + tz="Australia/Melbourne", + ) + test = pd.DataFrame({"data": range(len(index))}, index=index) + test = test.resample("Y").mean() + result = DatetimeIndex([x.replace(month=6, day=1) for x in test.index]) + expected = DatetimeIndex( + [ + "2000-06-01 00:00:00", + "2001-06-01 00:00:00", + "2002-06-01 00:00:00", + "2003-06-01 00:00:00", + "2004-06-01 00:00:00", + "2005-06-01 00:00:00", + ], + tz="Australia/Melbourne", + ) + tm.assert_index_equal(result, expected) + + def test_construction_with_tz_and_tz_aware_dti(self): + # GH 23579 + dti = date_range("2016-01-01", periods=3, tz="US/Central") + msg = "data is already tz-aware US/Central, unable to set specified tz" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(dti, tz="Asia/Tokyo") + + def test_construction_with_nat_and_tzlocal(self): + tz = dateutil.tz.tzlocal() + result = DatetimeIndex(["2018", "NaT"], tz=tz) + expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) + tm.assert_index_equal(result, expected) + + def test_constructor_with_ambiguous_keyword_arg(self): + # GH 35297 + + expected = DatetimeIndex( + ["2020-11-01 01:00:00", "2020-11-02 01:00:00"], + dtype="datetime64[ns, America/New_York]", + freq="D", + ambiguous=False, + ) + + # ambiguous keyword in start + timezone = "America/New_York" + start = Timestamp(year=2020, month=11, day=1, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = date_range(start=start, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + # ambiguous keyword in end + timezone = "America/New_York" + end = Timestamp(year=2020, month=11, day=2, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = date_range(end=end, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + def test_constructor_with_nonexistent_keyword_arg(self): + # GH 35297 + + timezone = "Europe/Warsaw" + + # nonexistent keyword in start + start = Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = date_range(start=start, periods=2, freq="H") + expected = DatetimeIndex( + [ + Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + Timestamp("2015-03-29 04:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + + # nonexistent keyword in end + end = Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = date_range(end=end, periods=2, freq="H") + expected = DatetimeIndex( + [ + Timestamp("2015-03-29 01:00:00+01:00", tz=timezone), + Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000"], dtype="datetime64") + + with pytest.raises(ValueError, match=msg): + Index(["2000"], dtype="datetime64") + + def test_constructor_wrong_precision_raises(self): + msg = "Unexpected value for 'dtype': 'datetime64\\[us\\]'" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000"], dtype="datetime64[us]") + + def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self): + # GH 27011 + result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object)) + expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT]) + tm.assert_index_equal(result, expected) + + +class TestTimeSeries: + def test_dti_constructor_preserve_dti_freq(self): + rng = date_range("1/1/2000", "1/2/2000", freq="5min") + + rng2 = DatetimeIndex(rng) + assert rng.freq == rng2.freq + + def test_explicit_none_freq(self): + # Explicitly passing freq=None is respected + rng = date_range("1/1/2000", "1/2/2000", freq="5min") + + result = DatetimeIndex(rng, freq=None) + assert result.freq is None + + result = DatetimeIndex(rng._data, freq=None) + assert result.freq is None + + dta = DatetimeArray(rng, freq=None) + assert dta.freq is None + + def test_dti_constructor_years_only(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 6961 + rng1 = date_range("2014", "2015", freq="M", tz=tz) + expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz) + + rng2 = date_range("2014", "2015", freq="MS", tz=tz) + expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) + + rng3 = date_range("2014", "2020", freq="A", tz=tz) + expected3 = date_range("2014-12-31", "2019-12-31", freq="A", tz=tz) + + rng4 = date_range("2014", "2020", freq="AS", tz=tz) + expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz) + + for rng, expected in [ + (rng1, expected1), + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + tm.assert_index_equal(rng, expected) + + def test_dti_constructor_small_int(self, any_int_numpy_dtype): + # see gh-13721 + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.00000000", + "1970-01-01 00:00:00.00000001", + "1970-01-01 00:00:00.00000002", + ] + ) + + arr = np.array([0, 10, 20], dtype=any_int_numpy_dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) + + def test_ctor_str_intraday(self): + rng = DatetimeIndex(["1-1-2000 00:00:01"]) + assert rng[0].second == 1 + + def test_is_(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + assert dti.is_(dti) + assert dti.is_(dti.view()) + assert not dti.is_(dti.copy()) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]") + idx = Index(arr) + + assert (idx.values == astype_overflowsafe(arr, dtype=np.dtype("M8[ns]"))).all() + + def test_constructor_int64_nocopy(self): + # GH#1624 + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr) + + arr[50:100] = -1 + assert (index.asi8[50:100] == -1).all() + + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr, copy=True) + + arr[50:100] = -1 + assert (index.asi8[50:100] != -1).all() + + @pytest.mark.parametrize( + "freq", ["M", "Q", "A", "D", "B", "BH", "T", "S", "L", "U", "H", "N", "C"] + ) + def test_from_freq_recreate_from_data(self, freq): + org = date_range(start="2001/02/01 09:00", freq=freq, periods=1) + idx = DatetimeIndex(org, freq=freq) + tm.assert_index_equal(idx, org) + + org = date_range( + start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1 + ) + idx = DatetimeIndex(org, freq=freq, tz="US/Pacific") + tm.assert_index_equal(idx, org) + + def test_datetimeindex_constructor_misc(self): + arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"] + msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(arr) + + arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"] + idx2 = DatetimeIndex(arr) + + arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"] + idx3 = DatetimeIndex(arr) + + arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O") + idx4 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True) + idx8 = DatetimeIndex( + ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True + ) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + assert (idx1.values == other.values).all() + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = date_range(start=sdate, freq="1B", periods=20) + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == "B" + + idx1 = date_range(start=sdate, end=edate, freq="W-SUN") + idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="QS") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="BQ") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + def test_pass_datetimeindex_to_index(self): + # Bugs in #1396 + rng = date_range("1/1/2000", "3/1/2000") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_date_range_tuple_freq_raises(self): + # GH#34703 + edate = datetime(2000, 1, 1) + with pytest.raises(TypeError, match="pass as a string instead"): + date_range(end=edate, freq=("D", 5), periods=20) + + +def test_timestamp_constructor_invalid_fold_raise(): + # Test for #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + +def test_timestamp_constructor_pytz_fold_raise(): + # Test for #25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) + + +@pytest.mark.parametrize("fold", [0, 1]) +@pytest.mark.parametrize( + "ts_input", + [ + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), + ], +) +def test_timestamp_constructor_fold_conflict(ts_input, fold): + # Test for #25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input=ts_input, fold=fold) + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_retain_fold(tz, fold): + # Test for #25057 + # Check that we retain fold + ts = Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected + + +_tzs = ["dateutil/Europe/London"] +if PY39: + try: + _tzs = ["dateutil/Europe/London", zoneinfo.ZoneInfo("Europe/London")] + except zoneinfo.ZoneInfoNotFoundError: + pass + + +@pytest.mark.parametrize("tz", _tzs) +@pytest.mark.parametrize( + "ts_input,fold_out", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), + ], +) +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): + # Test for #25057 + # Check that we infer fold correctly based on timestamps since utc + # or strings + ts = Timestamp(ts_input, tz=tz) + result = ts.fold + expected = fold_out + assert result == expected + # TODO: belongs in Timestamp tests? + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold,value_out", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): + # Test for #25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = Timestamp(ts_input, tz=tz, fold=fold) + result = ts.value + expected = value_out + assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py new file mode 100644 index 00000000..07f57d3f --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -0,0 +1,1161 @@ +""" +test date_range, bdate_range construction from the convenience range functions +""" + +from datetime import ( + datetime, + time, + timedelta, +) + +import numpy as np +import pytest +import pytz +from pytz import timezone + +from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.offsets import ( + BDay, + CDay, + DateOffset, + MonthEnd, + prefix_mapping, +) +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Timedelta, + Timestamp, + bdate_range, + date_range, + offsets, +) +import pandas._testing as tm +from pandas.core.arrays.datetimes import generate_range + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +def _get_expected_range( + begin_to_match, + end_to_match, + both_range, + inclusive_endpoints, +): + """Helper to get expected range from a both inclusive range""" + left_match = begin_to_match == both_range[0] + right_match = end_to_match == both_range[-1] + + if inclusive_endpoints == "left" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "right" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "neither" and left_match and right_match: + expected_range = both_range[1:-1] + elif inclusive_endpoints == "neither" and right_match: + expected_range = both_range[:-1] + elif inclusive_endpoints == "neither" and left_match: + expected_range = both_range[1:] + elif inclusive_endpoints == "both": + expected_range = both_range[:] + else: + expected_range = both_range[:] + + return expected_range + + +class TestTimestampEquivDateRange: + # Older tests in TestTimeSeries constructed their `stamp` objects + # using `date_range` instead of the `Timestamp` constructor. + # TestTimestampEquivDateRange checks that these are equivalent in the + # pertinent cases. + + def test_date_range_timestamp_equiv(self): + rng = date_range("20090415", "20090519", tz="US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="US/Eastern") + assert ts == stamp + + def test_date_range_timestamp_equiv_dateutil(self): + rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="dateutil/US/Eastern") + assert ts == stamp + + def test_date_range_timestamp_equiv_explicit_pytz(self): + rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern")) + assert ts == stamp + + @td.skip_if_windows + def test_date_range_timestamp_equiv_explicit_dateutil(self): + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + rng = date_range("20090415", "20090519", tz=gettz("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=gettz("US/Eastern")) + assert ts == stamp + + def test_date_range_timestamp_equiv_from_datetime_instance(self): + datetime_instance = datetime(2014, 3, 4) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0] + + ts = Timestamp(datetime_instance) + assert ts == timestamp_instance + + def test_date_range_timestamp_equiv_preserve_frequency(self): + timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0] + ts = Timestamp("2014-03-05") + + assert timestamp_instance == ts + + +class TestDateRanges: + @pytest.mark.parametrize("freq", ["N", "U", "L", "T", "S", "H", "D"]) + def test_date_range_edges(self, freq): + # GH#13672 + td = Timedelta(f"1{freq}") + ts = Timestamp("1970-01-01") + + idx = date_range( + start=ts + td, + end=ts + 4 * td, + freq=freq, + ) + exp = DatetimeIndex( + [ts + n * td for n in range(1, 5)], + freq=freq, + ) + tm.assert_index_equal(idx, exp) + + # start after end + idx = date_range( + start=ts + 4 * td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([], freq=freq) + tm.assert_index_equal(idx, exp) + + # start matches end + idx = date_range( + start=ts + td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([ts + td], freq=freq) + tm.assert_index_equal(idx, exp) + + def test_date_range_near_implementation_bound(self): + # GH#??? + freq = Timedelta(1) + + with pytest.raises(OutOfBoundsDatetime, match="Cannot generate range with"): + date_range(end=Timestamp.min, periods=2, freq=freq) + + def test_date_range_nat(self): + # GH#11587 + msg = "Neither `start` nor `end` can be NaT" + with pytest.raises(ValueError, match=msg): + date_range(start="2016-01-01", end=pd.NaT, freq="D") + with pytest.raises(ValueError, match=msg): + date_range(start=pd.NaT, end="2016-01-01", freq="D") + + def test_date_range_multiplication_overflow(self): + # GH#24255 + # check that overflows in calculating `addend = periods * stride` + # are caught + with tm.assert_produces_warning(None): + # we should _not_ be seeing a overflow RuntimeWarning + dti = date_range(start="1677-09-22", periods=213503, freq="D") + + assert dti[0] == Timestamp("1677-09-22") + assert len(dti) == 213503 + + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range("1969-05-04", periods=200000000, freq="30000D") + + def test_date_range_unsigned_overflow_handling(self): + # GH#24255 + # case where `addend = periods * stride` overflows int64 bounds + # but not uint64 bounds + dti = date_range(start="1677-09-22", end="2262-04-11", freq="D") + + dti2 = date_range(start=dti[0], periods=len(dti), freq="D") + assert dti2.equals(dti) + + dti3 = date_range(end=dti[-1], periods=len(dti), freq="D") + assert dti3.equals(dti) + + def test_date_range_int64_overflow_non_recoverable(self): + # GH#24255 + # case with start later than 1970-01-01, overflow int64 but not uint64 + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(start="1970-02-01", periods=106752 * 24, freq="H") + + # case with end before 1970-01-01, overflow int64 but not uint64 + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(end="1969-11-14", periods=106752 * 24, freq="H") + + @pytest.mark.slow + def test_date_range_int64_overflow_stride_endpoint_different_signs(self): + # cases where stride * periods overflow int64 and stride/endpoint + # have different signs + start = Timestamp("2262-02-23") + end = Timestamp("1969-11-14") + + expected = date_range(start=start, end=end, freq="-1H") + assert expected[0] == start + assert expected[-1] == end + + dti = date_range(end=end, periods=len(expected), freq="-1H") + tm.assert_index_equal(dti, expected) + + start2 = Timestamp("1970-02-01") + end2 = Timestamp("1677-10-22") + + expected2 = date_range(start=start2, end=end2, freq="-1H") + assert expected2[0] == start2 + assert expected2[-1] == end2 + + dti2 = date_range(start=start2, periods=len(expected2), freq="-1H") + tm.assert_index_equal(dti2, expected2) + + def test_date_range_out_of_bounds(self): + # GH#14187 + msg = "Cannot generate range" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range("2016-01-01", periods=100000, freq="D") + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(end="1763-10-12", periods=100000, freq="D") + + def test_date_range_gen_error(self): + rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min") + assert len(rng) == 4 + + @pytest.mark.parametrize("freq", ["AS", "YS"]) + def test_begin_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"], + freq=freq, + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["A", "Y"]) + def test_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["BA", "BY"]) + def test_business_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + def test_date_range_negative_freq(self): + # GH 11018 + rng = date_range("2011-12-31", freq="-2A", periods=3) + exp = DatetimeIndex(["2011-12-31", "2009-12-31", "2007-12-31"], freq="-2A") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2A" + + rng = date_range("2011-01-31", freq="-2M", periods=3) + exp = DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2M" + + def test_date_range_bms_bug(self): + # #1645 + rng = date_range("1/1/2000", periods=10, freq="BMS") + + ex_first = Timestamp("2000-01-03") + assert rng[0] == ex_first + + def test_date_range_normalize(self): + snap = datetime.today() + n = 50 + + rng = date_range(snap, periods=n, normalize=False, freq="2D") + + offset = timedelta(2) + values = DatetimeIndex([snap + i * offset for i in range(n)], freq=offset) + + tm.assert_index_equal(rng, values) + + rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B") + the_time = time(8, 15) + for val in rng: + assert val.time() == the_time + + def test_date_range_fy5252(self): + dr = date_range( + start="2013-01-01", + periods=2, + freq=offsets.FY5253(startingMonth=1, weekday=3, variation="nearest"), + ) + assert dr[0] == Timestamp("2013-01-31") + assert dr[1] == Timestamp("2014-01-30") + + def test_date_range_ambiguous_arguments(self): + # #2538 + start = datetime(2011, 1, 1, 5, 3, 40) + end = datetime(2011, 1, 1, 8, 9, 40) + + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start, end, periods=10, freq="s") + + def test_date_range_convenience_periods(self): + # GH 20808 + result = date_range("2018-04-24", "2018-04-27", periods=3) + expected = DatetimeIndex( + ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"], + freq=None, + ) + + tm.assert_index_equal(result, expected) + + # Test if spacing remains linear if tz changes to dst in range + result = date_range( + "2018-04-01 01:00:00", + "2018-04-01 04:00:00", + tz="Australia/Sydney", + periods=3, + ) + expected = DatetimeIndex( + [ + Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"), + Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"), + Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"), + ] + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start,end,result_tz", + [ + ["20180101", "20180103", "US/Eastern"], + [datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"], + [Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + "US/Eastern", + ], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + None, + ], + ], + ) + def test_date_range_linspacing_tz(self, start, end, result_tz): + # GH 20983 + result = date_range(start, end, periods=3, tz=result_tz) + expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected) + + def test_date_range_businesshour(self): + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex(["2014-07-04 16:00", "2014-07-07 09:00"], freq="BH") + rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + "2014-07-08 11:00", + "2014-07-08 12:00", + "2014-07-08 13:00", + "2014-07-08 14:00", + "2014-07-08 15:00", + "2014-07-08 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + def test_range_misspecified(self): + # GH #1095 + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="H") + + with pytest.raises(ValueError, match=msg): + date_range() + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + result = date_range(Timestamp("1960-04-01 00:00:00"), periods=76, freq="QS-JAN") + assert len(result) == 76 + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + msg = "Offset did not increment date" + with pytest.raises(ValueError, match=msg): + date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + @pytest.mark.parametrize("periods", (1, 2)) + def test_wom_len(self, periods): + # https://github.com/pandas-dev/pandas/issues/20517 + res = date_range(start="20110101", periods=periods, freq="WOM-1MON") + assert len(res) == periods + + def test_construct_over_dst(self): + # GH 20854 + pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=True + ) + pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=False + ) + expect_data = [ + Timestamp("2010-11-07 00:00:00", tz="US/Pacific"), + pre_dst, + pst_dst, + ] + expected = DatetimeIndex(expect_data, freq="H") + result = date_range(start="2010-11-7", periods=3, freq="H", tz="US/Pacific") + tm.assert_index_equal(result, expected) + + def test_construct_with_different_start_end_string_format(self): + # GH 12064 + result = date_range( + "2013-01-01 00:00:00+09:00", "2013/01/01 02:00:00+09:00", freq="H" + ) + expected = DatetimeIndex( + [ + Timestamp("2013-01-01 00:00:00+09:00"), + Timestamp("2013-01-01 01:00:00+09:00"), + Timestamp("2013-01-01 02:00:00+09:00"), + ], + freq="H", + ) + tm.assert_index_equal(result, expected) + + def test_error_with_zero_monthends(self): + msg = r"Offset <0 \* MonthEnds> did not increment date" + with pytest.raises(ValueError, match=msg): + date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + expected = DatetimeIndex([start + i * offset for i in range(5)], freq=offset) + tm.assert_index_equal(result, expected) + + def test_range_tz_pytz(self): + # see gh-2906 + tz = timezone("US/Eastern") + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize( + "start, end", + [ + [ + Timestamp(datetime(2014, 3, 6), tz="US/Eastern"), + Timestamp(datetime(2014, 3, 12), tz="US/Eastern"), + ], + [ + Timestamp(datetime(2013, 11, 1), tz="US/Eastern"), + Timestamp(datetime(2013, 11, 6), tz="US/Eastern"), + ], + ], + ) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq="D") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq="D", tz="US/Eastern") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range( + start.replace(tzinfo=None), + end.replace(tzinfo=None), + freq="D", + tz="US/Eastern", + ) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + def test_range_tz_dateutil(self): + # see gh-2906 + + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas._libs.tslibs.timezones import maybe_get_tz + + tz = lambda x: maybe_get_tz("dateutil/" + x) + + start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern")) + end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern")) + + dr = date_range(start=start, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed(self, freq, inclusive_endpoints_fixture): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, end, both_range, inclusive_endpoints_fixture + ) + + tm.assert_index_equal(expected_range, result_range) + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): + # GH12409, GH12684 + begin = Timestamp("2011/1/1", tz="US/Eastern") + end = Timestamp("2014/1/1", tz="US/Eastern") + + result_range = date_range( + begin, end, inclusive=inclusive_endpoints_fixture, freq=freq + ) + both_range = date_range(begin, end, inclusive="both", freq=freq) + expected_range = _get_expected_range( + begin, + end, + both_range, + inclusive_endpoints_fixture, + ) + + tm.assert_index_equal(expected_range, result_range) + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_with_tz_closed_with_tz_aware_start_end( + self, freq, inclusive_endpoints_fixture + ): + begin = Timestamp("2011/1/1") + end = Timestamp("2014/1/1") + begintz = Timestamp("2011/1/1", tz="US/Eastern") + endtz = Timestamp("2014/1/1", tz="US/Eastern") + + result_range = date_range( + begin, + end, + inclusive=inclusive_endpoints_fixture, + freq=freq, + tz="US/Eastern", + ) + both_range = date_range( + begin, end, inclusive="both", freq=freq, tz="US/Eastern" + ) + expected_range = _get_expected_range( + begintz, + endtz, + both_range, + inclusive_endpoints_fixture, + ) + + tm.assert_index_equal(expected_range, result_range) + + def test_range_closed_boundary(self, inclusive_endpoints_fixture): + # GH#11804 + right_boundary = date_range( + "2015-09-12", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + left_boundary = date_range( + "2015-09-01", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + both_boundary = date_range( + "2015-09-01", + "2015-12-01", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + neither_boundary = date_range( + "2015-09-11", + "2015-09-12", + freq="QS-MAR", + inclusive=inclusive_endpoints_fixture, + ) + + expected_right = both_boundary + expected_left = both_boundary + expected_both = both_boundary + + if inclusive_endpoints_fixture == "right": + expected_left = both_boundary[1:] + elif inclusive_endpoints_fixture == "left": + expected_right = both_boundary[:-1] + elif inclusive_endpoints_fixture == "both": + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + expected_neither = both_boundary[1:-1] + + tm.assert_index_equal(right_boundary, expected_right) + tm.assert_index_equal(left_boundary, expected_left) + tm.assert_index_equal(both_boundary, expected_both) + tm.assert_index_equal(neither_boundary, expected_neither) + + def test_years_only(self): + # GH 6961 + dr = date_range("2014", "2015", freq="M") + assert dr[0] == datetime(2014, 1, 31) + assert dr[-1] == datetime(2014, 12, 31) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min") + result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min") + expected_1 = DatetimeIndex( + ["2005-01-12 10:00:00", "2005-01-12 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + expected_2 = DatetimeIndex( + ["2005-01-13 10:00:00", "2005-01-13 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + tm.assert_index_equal(result_1, expected_1) + tm.assert_index_equal(result_2, expected_2) + + def test_cached_range_bug(self): + rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) + + def test_timezone_comparison_bug(self): + # smoke test + start = Timestamp("20130220 10:00", tz="US/Eastern") + result = date_range(start, periods=2, tz="US/Eastern") + assert len(result) == 2 + + def test_timezone_comparison_assert(self): + start = Timestamp("20130220 10:00", tz="US/Eastern") + msg = "Inferred time zone not equal to passed time zone" + with pytest.raises(AssertionError, match=msg): + date_range(start, periods=2, tz="Europe/Berlin") + + def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): + # GH 23270 + tz = tz_aware_fixture + result = date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz) + expected = date_range(end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz)[ + ::-1 + ] + tm.assert_index_equal(result, expected) + + def test_range_where_start_equal_end(self, inclusive_endpoints_fixture): + # GH 43394 + start = "2021-09-02" + end = "2021-09-02" + result = date_range( + start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture + ) + + both_range = date_range(start=start, end=end, freq="D", inclusive="both") + if inclusive_endpoints_fixture == "neither": + expected = both_range[1:-1] + elif inclusive_endpoints_fixture in ("left", "right", "both"): + expected = both_range[:] + + tm.assert_index_equal(result, expected) + + +class TestDateRangeTZ: + """Tests for date_range with timezones""" + + def test_hongkong_tz_convert(self): + # GH#1673 smoke test + dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong") + + # it works! + dr.hour + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_span_dst_transition(self, tzstr): + # GH#1778 + + # Standard -> Daylight Savings Time + dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern") + + assert (dr.hour == 0).all() + + dr = date_range("2012-11-02", periods=10, tz=tzstr) + result = dr.hour + expected = pd.Index([0] * 10) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_timezone_str_argument(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + result = date_range("1/1/2000", periods=10, tz=tzstr) + expected = date_range("1/1/2000", periods=10, tz=tz) + + tm.assert_index_equal(result, expected) + + def test_date_range_with_fixedoffset_noname(self): + from pandas.tests.indexes.datetimes.test_timezones import fixed_off_no_name + + off = fixed_off_no_name + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + idx = pd.Index([start, end]) + assert off == idx.tz + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_with_tz(self, tzstr): + stamp = Timestamp("3/11/2012 05:00", tz=tzstr) + assert stamp.hour == 5 + + rng = date_range("3/11/2012 04:00", periods=10, freq="H", tz=tzstr) + + assert stamp == rng[1] + + +class TestGenRangeGeneration: + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, offset="B")) + assert rng1 == rng2 + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, offset="C")) + assert rng1 == rng2 + + def test_1(self): + rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) + expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] + assert rng == expected + + def test_2(self): + rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3))) + expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)] + assert rng == expected + + def test_3(self): + rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6))) + expected = [] + assert rng == expected + + def test_precision_finer_than_offset(self): + # GH#9907 + result1 = date_range( + start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q" + ) + result2 = date_range( + start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W" + ) + expected1_list = [ + "2015-06-30 00:00:03", + "2015-09-30 00:00:03", + "2015-12-31 00:00:03", + "2016-03-31 00:00:03", + ] + expected2_list = [ + "2015-04-19 00:00:03", + "2015-04-26 00:00:03", + "2015-05-03 00:00:03", + "2015-05-10 00:00:03", + "2015-05-17 00:00:03", + "2015-05-24 00:00:03", + "2015-05-31 00:00:03", + "2015-06-07 00:00:03", + "2015-06-14 00:00:03", + "2015-06-21 00:00:03", + ] + expected1 = DatetimeIndex( + expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None + ) + expected2 = DatetimeIndex( + expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None + ) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + dt1, dt2 = "2017-01-01", "2017-01-01" + tz1, tz2 = "US/Eastern", "Europe/London" + + @pytest.mark.parametrize( + "start,end", + [ + (Timestamp(dt1, tz=tz1), Timestamp(dt2)), + (Timestamp(dt1), Timestamp(dt2, tz=tz2)), + (Timestamp(dt1, tz=tz1), Timestamp(dt2, tz=tz2)), + (Timestamp(dt1, tz=tz2), Timestamp(dt2, tz=tz1)), + ], + ) + def test_mismatching_tz_raises_err(self, start, end): + # issue 18488 + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + date_range(start, end) + with pytest.raises(TypeError, match=msg): + date_range(start, end, freq=BDay()) + + +class TestBusinessDateRange: + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + + msg = "periods must be a number, got B" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "B") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "B") + + msg = "freq must be specified for bdate_range; use date_range instead" + with pytest.raises(TypeError, match=msg): + bdate_range(START, END, periods=10, freq=None) + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = "2007/100/1" + + msg = "could not convert string to Timestamp" + with pytest.raises(ValueError, match=msg): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError, match=msg): + bdate_range(start=badly_formed_date, periods=10) + + with pytest.raises(ValueError, match=msg): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError, match=msg): + bdate_range(badly_formed_date, badly_formed_date) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011") + rng2 = bdate_range("12/2/2011", "12/5/2011") + assert rng2._data.freq == BDay() + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"]) + def test_bdays_and_open_boundaries(self, inclusive): + # GH 6673 + start = "2018-07-21" # Saturday + end = "2018-07-29" # Sunday + result = date_range(start, end, freq="B", inclusive=inclusive) + + bday_start = "2018-07-23" # Monday + bday_end = "2018-07-27" # Friday + expected = date_range(bday_start, bday_end, freq="D") + tm.assert_index_equal(result, expected) + # Note: we do _not_ expect the freqs to match here + + def test_bday_near_overflow(self): + # GH#24252 avoid doing unnecessary addition that _would_ overflow + start = Timestamp.max.floor("D").to_pydatetime() + rng = date_range(start, end=None, periods=1, freq="B") + expected = DatetimeIndex([start], freq="B") + tm.assert_index_equal(rng, expected) + + def test_bday_overflow_error(self): + # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError + msg = "Out of bounds nanosecond timestamp" + start = Timestamp.max.floor("D").to_pydatetime() + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(start, periods=2, freq="B") + + +class TestCustomDateRange: + def test_constructor(self): + bdate_range(START, END, freq=CDay()) + bdate_range(START, periods=20, freq=CDay()) + bdate_range(end=START, periods=20, freq=CDay()) + + msg = "periods must be a number, got C" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "C") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "C") + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20, freq="C") + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") + rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") + assert rng2._data.freq == CDay() + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + def test_cdaterange(self): + result = bdate_range("2013-05-01", periods=3, freq="C") + expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"], freq="C") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_cdaterange_weekmask(self): + result = bdate_range( + "2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu" + ) + expected = DatetimeIndex( + ["2013-05-01", "2013-05-02", "2013-05-05"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu") + + def test_cdaterange_holidays(self): + result = bdate_range("2013-05-01", periods=3, freq="C", holidays=["2013-05-01"]) + expected = DatetimeIndex( + ["2013-05-02", "2013-05-03", "2013-05-06"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"]) + + def test_cdaterange_weekmask_and_holidays(self): + result = bdate_range( + "2013-05-01", + periods=3, + freq="C", + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + expected = DatetimeIndex( + ["2013-05-02", "2013-05-05", "2013-05-06"], freq=result.freq + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range( + "2013-05-01", + periods=3, + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + + @pytest.mark.parametrize( + "freq", [freq for freq in prefix_mapping if freq.startswith("C")] + ) + def test_all_custom_freq(self, freq): + # should not raise + bdate_range( + START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"] + ) + + bad_freq = freq + "FOO" + msg = f"invalid custom frequency string: {bad_freq}" + with pytest.raises(ValueError, match=msg): + bdate_range(START, END, freq=bad_freq) + + @pytest.mark.parametrize( + "start_end", + [ + ("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"), + ("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"), + ("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"), + ], + ) + def test_range_with_millisecond_resolution(self, start_end): + # https://github.com/pandas-dev/pandas/issues/24110 + start, end = start_end + result = date_range(start=start, end=end, periods=2, inclusive="left") + expected = DatetimeIndex([start]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start,period,expected", + [ + ("2022-07-23 00:00:00+02:00", 1, ["2022-07-25 00:00:00+02:00"]), + ("2022-07-22 00:00:00+02:00", 1, ["2022-07-22 00:00:00+02:00"]), + ( + "2022-07-22 00:00:00+02:00", + 2, + ["2022-07-22 00:00:00+02:00", "2022-07-25 00:00:00+02:00"], + ), + ], + ) + def test_range_with_timezone_and_custombusinessday(self, start, period, expected): + # GH49441 + result = date_range(start=start, periods=period, freq="C") + expected = DatetimeIndex(expected) + tm.assert_index_equal(result, expected) + + +def test_date_range_with_custom_holidays(): + # GH 30593 + freq = offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"]) + result = date_range(start="2020-11-25 15:00", periods=4, freq=freq) + expected = DatetimeIndex( + [ + "2020-11-25 15:00:00", + "2020-11-25 16:00:00", + "2020-11-27 15:00:00", + "2020-11-27 16:00:00", + ], + freq=freq, + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py new file mode 100644 index 00000000..5c85221c --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -0,0 +1,168 @@ +from datetime import date + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, +) +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = date_range(start="2000", periods=periods, freq="S") + assert len(idx1) == periods + + idx2 = date_range(end="2000", periods=periods, freq="S") + assert len(idx2) == periods + + def test_nat(self): + assert DatetimeIndex([np.nan])[0] is pd.NaT + + def test_week_of_month_frequency(self): + # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise + d1 = date(2002, 9, 1) + d2 = date(2013, 10, 27) + d3 = date(2012, 9, 30) + idx1 = DatetimeIndex([d1, d2]) + idx2 = DatetimeIndex([d3]) + result_append = idx1.append(idx2) + expected = DatetimeIndex([d1, d2, d3]) + tm.assert_index_equal(result_append, expected) + result_union = idx1.union(idx2) + expected = DatetimeIndex([d1, d3, d2]) + tm.assert_index_equal(result_union, expected) + + # GH 5115 + result = date_range("2013-1-1", periods=4, freq="WOM-1SAT") + dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"] + expected = DatetimeIndex(dates, freq="WOM-1SAT") + tm.assert_index_equal(result, expected) + + def test_append_nondatetimeindex(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timestamp) + + def test_iteration_preserves_tz(self): + # see gh-8890 + index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern") + + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result == expected + + index = date_range( + "2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800) + ) + + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result._repr_base == expected._repr_base + assert result == expected + + # 9100 + index = DatetimeIndex( + ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"] + ) + for i, ts in enumerate(index): + result = ts + expected = index[i] + assert result._repr_base == expected._repr_base + assert result == expected + + @pytest.mark.parametrize("periods", [0, 9999, 10000, 10001]) + def test_iteration_over_chunksize(self, periods): + # GH21012 + + index = date_range("2000-01-01 00:00:00", periods=periods, freq="min") + num = 0 + for stamp in index: + assert index[num] == stamp + num += 1 + assert num == len(index) + + def test_misc_coverage(self): + rng = date_range("1/1/2000", periods=5) + result = rng.groupby(rng.day) + assert isinstance(list(result.values())[0][0], Timestamp) + + def test_groupby_function_tuple_1677(self): + df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) + monthly_group = df.groupby(lambda x: (x.year, x.month)) + + result = monthly_group.mean() + assert isinstance(result.index[0], tuple) + + def assert_index_parameters(self, index): + assert index.freq == "40960N" + assert index.inferred_freq == "40960N" + + def test_ns_index(self): + nsamples = 400 + ns = int(1e9 / 24414) + dtstart = np.datetime64("2012-09-20T00:00:00") + + dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns") + freq = ns * offsets.Nano() + index = DatetimeIndex(dt, freq=freq, name="time") + self.assert_index_parameters(index) + + new_index = date_range(start=index[0], end=index[-1], freq=index.freq) + self.assert_index_parameters(new_index) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + idx = date_range("2000", periods=2) + # M8[ns] by default + result = np.asarray(idx) + + expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + # optionally, object + result = np.asarray(idx, dtype=object) + + expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")]) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = "US/Central" + idx = date_range("2000", periods=2, tz=tz) + expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") + result = np.asarray(idx, dtype="datetime64[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + result = np.asarray(idx, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Future behavior with no warning + expected = np.array( + [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)] + ) + result = np.asarray(idx, dtype=object) + + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py new file mode 100644 index 00000000..31ec8c49 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -0,0 +1,39 @@ +""" generic tests from the Datetimelike class """ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm +from pandas.tests.indexes.datetimelike import DatetimeLike + + +class TestDatetimeIndex(DatetimeLike): + _index_cls = DatetimeIndex + + @pytest.fixture + def simple_index(self) -> DatetimeIndex: + return date_range("20130101", periods=5) + + @pytest.fixture( + params=[tm.makeDateIndex(10), date_range("20130110", periods=10, freq="-1D")], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_format(self, simple_index): + # GH35439 + idx = simple_index + expected = [f"{x:%Y-%m-%d}" for x in idx] + assert idx.format() == expected + + def test_shift(self): + pass # handled in test_ops + + def test_intersection(self): + pass # handled in test_setops + + def test_union(self): + pass # handled in test_setops diff --git a/pandas/tests/indexes/datetimes/test_delete.py b/pandas/tests/indexes/datetimes/test_delete.py new file mode 100644 index 00000000..e9de5a05 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_delete.py @@ -0,0 +1,138 @@ +import pytest + +from pandas import ( + DatetimeIndex, + Series, + date_range, +) +import pandas._testing as tm + + +class TestDelete: + def test_delete(self): + idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx") + + # preserve freq + expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx") + expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx") + + # reset freq to None + expected_1 = DatetimeIndex( + ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"], + freq=None, + name="idx", + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with pytest.raises((IndexError, ValueError), match="out of bounds"): + # either depending on numpy version + idx.delete(5) + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + idx = date_range( + start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ) + + expected = date_range( + start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + expected = date_range( + start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + def test_delete_slice(self): + idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx") + expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = DatetimeIndex( + [ + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + ], + freq=None, + name="idx", + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + ts = Series( + 1, + index=date_range( + "2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ), + ) + # preserve freq + result = ts.drop(ts.index[:5]).index + expected = date_range( + "2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + # reset freq to None + result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + "2000-01-01 15:00", + "2000-01-01 17:00", + ], + freq=None, + name="idx", + tz=tz, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py new file mode 100644 index 00000000..197038db --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -0,0 +1,273 @@ +from datetime import datetime + +import dateutil.tz +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +def test_to_native_types_method_deprecated(): + index = pd.date_range(freq="1D", periods=3, start="2017-01-01") + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) + + with tm.assert_produces_warning(FutureWarning): + result = index.to_native_types() + + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(["2017-01-01", "2017-01-03"], dtype=object) + + with tm.assert_produces_warning(FutureWarning): + result = index.to_native_types([0, 2]) + + tm.assert_numpy_array_equal(result, expected) + + +def test_to_native_types(): + index = pd.date_range(freq="1D", periods=3, start="2017-01-01") + + # First, with no arguments. + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) + + result = index._format_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index._format_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object) + + result = index._format_native_types(date_format="%m-%Y-%d") + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = DatetimeIndex(["2017-01-01", pd.NaT, "2017-01-03"]) + expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object) + + result = index._format_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object) + + result = index._format_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeIndexRendering: + def test_dti_repr_short(self): + dr = pd.date_range(start="1/1/2012", periods=1) + repr(dr) + + dr = pd.date_range(start="1/1/2012", periods=2) + repr(dr) + + dr = pd.date_range(start="1/1/2012", periods=3) + repr(dr) + + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_dti_representation(self, method): + idxs = [] + idxs.append(DatetimeIndex([], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01"], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")) + idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")) + idxs.append( + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + ) + idxs.append( + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + ) + idxs.append( + DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="UTC") + ) + + exp = [] + exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')") + exp.append( + "DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')" + ) + exp.append( + "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)" + ) + exp.append( + "DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)" + "" + ) + + with pd.option_context("display.width", 300): + for indx, expected in zip(idxs, exp): + result = getattr(indx, method)() + assert result == expected + + def test_dti_representation_to_series(self): + idx1 = DatetimeIndex([], freq="D") + idx2 = DatetimeIndex(["2011-01-01"], freq="D") + idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + idx6 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"]) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = "0 2011-01-01\ndtype: datetime64[ns]" + + exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]" + + exp4 = ( + "0 2011-01-01\n" + "1 2011-01-02\n" + "2 2011-01-03\n" + "dtype: datetime64[ns]" + ) + + exp5 = ( + "0 2011-01-01 09:00:00+09:00\n" + "1 2011-01-01 10:00:00+09:00\n" + "2 2011-01-01 11:00:00+09:00\n" + "dtype: datetime64[ns, Asia/Tokyo]" + ) + + exp6 = ( + "0 2011-01-01 09:00:00-05:00\n" + "1 2011-01-01 10:00:00-05:00\n" + "2 NaT\n" + "dtype: datetime64[ns, US/Eastern]" + ) + + exp7 = ( + "0 2011-01-01 09:00:00\n" + "1 2011-01-02 10:15:00\n" + "dtype: datetime64[ns]" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7], + ): + result = repr(Series(idx)) + assert result == expected + + def test_dti_summary(self): + # GH#9116 + idx1 = DatetimeIndex([], freq="D") + idx2 = DatetimeIndex(["2011-01-01"], freq="D") + idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + tz="Asia/Tokyo", + ) + idx6 = DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern" + ) + + exp1 = "DatetimeIndex: 0 entries\nFreq: D" + + exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D" + + exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D" + + exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D" + + exp5 = ( + "DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H" + ) + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6] + ): + result = idx._summary() + assert result == expected + + def test_dti_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))) + + def test_dti_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)) + rng._summary() + rng[2:2]._summary() + + def test_dti_business_summary_pytz(self): + pd.bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)._summary() + + def test_dti_business_summary_dateutil(self): + pd.bdate_range("1/1/2005", "1/1/2009", tz=dateutil.tz.tzutc())._summary() + + def test_dti_custom_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C")) + + def test_dti_custom_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C") + rng._summary() + rng[2:2]._summary() + + def test_dti_custom_business_summary_pytz(self): + pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=pytz.utc)._summary() + + def test_dti_custom_business_summary_dateutil(self): + pd.bdate_range( + "1/1/2005", "1/1/2009", freq="C", tz=dateutil.tz.tzutc() + )._summary() + + +class TestFormat: + def test_format_with_name_time_info(self): + # bug I fixed 12/20/2011 + dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something") + + formatted = dates.format(name=True) + assert formatted[0] == "something" + + def test_format_datetime_with_time(self): + dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) + + result = dti.format() + expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"] + assert len(result) == 2 + assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_freq_attr.py b/pandas/tests/indexes/datetimes/test_freq_attr.py new file mode 100644 index 00000000..f5821a31 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_freq_attr.py @@ -0,0 +1,61 @@ +import pytest + +from pandas import ( + DatetimeIndex, + date_range, +) + +from pandas.tseries.offsets import ( + BDay, + DateOffset, + Day, + Hour, +) + + +class TestFreq: + def test_freq_setter_errors(self): + # GH#20678 + idx = DatetimeIndex(["20180101", "20180103", "20180105"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" + + @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_freq_setter(self, values, freq, tz): + # GH#20678 + idx = DatetimeIndex(values, tz=tz) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, DateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_view_safe(self): + # Setting the freq for one DatetimeIndex shouldn't alter the freq + # for another that views the same data + + dti = date_range("2016-01-01", periods=5) + dta = dti._data + + dti2 = DatetimeIndex(dta)._with_freq(None) + assert dti2.freq is None + + # Original was not altered + assert dti.freq == "D" + assert dta.freq == "D" diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py new file mode 100644 index 00000000..a203fee5 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -0,0 +1,808 @@ +from datetime import ( + date, + datetime, + time, + timedelta, +) + +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + notna, +) +import pandas._testing as tm + +from pandas.tseries.frequencies import to_offset + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestGetItem: + def test_getitem_slice_keeps_name(self): + # GH4226 + st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles") + et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles") + dr = date_range(st, et, freq="H", name="timebucket") + assert dr[1:].name == dr.name + + def test_getitem(self): + idx1 = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx2 = date_range( + "2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx" + ) + + for idx in [idx1, idx2]: + result = idx[0] + assert result == Timestamp("2011-01-01", tz=idx.tz) + + result = idx[0:5] + expected = date_range( + "2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = date_range( + "2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = date_range( + "2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = DatetimeIndex( + ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], + freq="-1D", + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_dti_business_getitem(self, freq): + rng = bdate_range(START, END, freq=freq) + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq) + tm.assert_index_equal(smaller, exp) + assert smaller.freq == exp.freq + assert smaller.freq == rng.freq + + sliced = rng[::5] + assert sliced.freq == to_offset(freq) * 5 + + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_dti_business_getitem_matplotlib_hackaround(self, freq): + rng = bdate_range(START, END, freq=freq) + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + def test_getitem_int_list(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + assert v1 == Timestamp("2/28/2005") + assert v2 == Timestamp("4/30/2005") + assert v3 == Timestamp("6/30/2005") + + # getitem with non-slice drops freq + assert dti2.freq is None + + +class TestWhere: + def test_where_doesnt_retain_freq(self): + dti = date_range("20130101", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx") + + result = dti.where(cond, dti[::-1]) + tm.assert_index_equal(result, expected) + + def test_where_other(self): + # other is ndarray or Index + i = date_range("20130101", periods=3, tz="US/Eastern") + + for arr in [np.nan, pd.NaT]: + result = i.where(notna(i), other=arr) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2), i2._values) + tm.assert_index_equal(result, i2) + + def test_where_invalid_dtypes(self): + dti = date_range("20130101", periods=3, tz="US/Eastern") + + tail = dti[2:].tolist() + i2 = Index([pd.NaT, pd.NaT] + tail) + + mask = notna(i2) + + # passing tz-naive ndarray to tzaware DTI + result = dti.where(mask, i2.values) + expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object) + tm.assert_index_equal(result, expected) + + # passing tz-aware DTI to tznaive DTI + naive = dti.tz_localize(None) + result = naive.where(mask, i2) + expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object) + tm.assert_index_equal(result, expected) + + pi = i2.tz_localize(None).to_period("D") + result = dti.where(mask, pi) + expected = Index([pi[0], pi[1]] + tail, dtype=object) + tm.assert_index_equal(result, expected) + + tda = i2.asi8.view("timedelta64[ns]") + result = dti.where(mask, tda) + expected = Index([tda[0], tda[1]] + tail, dtype=object) + assert isinstance(expected[0], np.timedelta64) + tm.assert_index_equal(result, expected) + + result = dti.where(mask, i2.asi8) + expected = Index([pd.NaT.value, pd.NaT.value] + tail, dtype=object) + assert isinstance(expected[0], int) + tm.assert_index_equal(result, expected) + + # non-matching scalar + td = pd.Timedelta(days=4) + result = dti.where(mask, td) + expected = Index([td, td] + tail, dtype=object) + assert expected[0] is td + tm.assert_index_equal(result, expected) + + def test_where_mismatched_nat(self, tz_aware_fixture): + tz = tz_aware_fixture + dti = date_range("2013-01-01", periods=3, tz=tz) + cond = np.array([True, False, True]) + + tdnat = np.timedelta64("NaT", "ns") + expected = Index([dti[0], tdnat, dti[2]], dtype=object) + assert expected[1] is tdnat + + result = dti.where(cond, tdnat) + tm.assert_index_equal(result, expected) + + def test_where_tz(self): + i = date_range("20130101", periods=3, tz="US/Eastern") + result = i.where(notna(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notna(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + +class TestTake: + def test_take_nan_first_datetime(self): + index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")]) + result = index.take([-1, 0, 1]) + expected = DatetimeIndex([index[-1], index[0], index[1]]) + tm.assert_index_equal(result, expected) + + def test_take(self): + # GH#10295 + idx1 = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx2 = date_range( + "2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx" + ) + + for idx in [idx1, idx2]: + result = idx.take([0]) + assert result == Timestamp("2011-01-01", tz=idx.tz) + + result = idx.take([0, 1, 2]) + expected = date_range( + "2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = date_range( + "2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = date_range( + "2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex( + ["2011-01-04", "2011-01-03", "2011-01-06"], + freq=None, + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex( + ["2011-01-29", "2011-01-03", "2011-01-06"], + freq=None, + tz=idx.tz, + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + # TODO: This method came from test_datetime; de-dup with version above + @pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"]) + def test_take2(self, tz): + dates = [ + datetime(2010, 1, 1, 14), + datetime(2010, 1, 1, 15), + datetime(2010, 1, 1, 17), + datetime(2010, 1, 1, 21), + ] + + idx = date_range( + start="2010-01-01 09:00", + end="2010-02-01 09:00", + freq="H", + tz=tz, + name="idx", + ) + expected = DatetimeIndex(dates, freq=None, name="idx", tz=tz) + + taken1 = idx.take([5, 6, 8, 12]) + taken2 = idx[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, DatetimeIndex) + assert taken.freq is None + assert taken.tz == expected.tz + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH#12631 + idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + result = idx.take(np.array([1, 0, -1])) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_method_exact_match(self, method): + idx = date_range("2000-01-01", periods=3) + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 + + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc(self): + idx = date_range("2000-01-01", periods=3) + + assert idx.get_loc("2000-01-01", method="nearest") == 0 + assert idx.get_loc("2000-01-01T12", method="nearest") == 1 + + assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") + with pytest.raises(KeyError, match="'2000-01-01T03'"): + idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-01", + method="nearest", + tolerance=[ + pd.Timedelta("1day").to_timedelta64(), + pd.Timedelta("1day").to_timedelta64(), + ], + ) + + assert idx.get_loc("2000", method="nearest") == slice(0, 3) + assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) + + assert idx.get_loc("1999", method="nearest") == 0 + assert idx.get_loc("2001", method="nearest") == 2 + + with pytest.raises(KeyError, match="'1999'"): + idx.get_loc("1999", method="pad") + with pytest.raises(KeyError, match="'2001'"): + idx.get_loc("2001", method="backfill") + + with pytest.raises(KeyError, match="'foobar'"): + idx.get_loc("foobar") + with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): + idx.get_loc(slice(2)) + + idx = DatetimeIndex(["2000-01-01", "2000-01-04"]) + assert idx.get_loc("2000-01-02", method="nearest") == 0 + assert idx.get_loc("2000-01-03", method="nearest") == 1 + assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) + + def test_get_loc_time_obj(self): + # time indexing + idx = date_range("2000-01-01", periods=24, freq="H") + + result = idx.get_loc(time(12)) + expected = np.array([12]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + result = idx.get_loc(time(12, 30)) + expected = np.array([]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + msg = "cannot yet lookup inexact labels when key is a time object" + with pytest.raises(NotImplementedError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + idx.get_loc(time(12, 30), method="pad") + + def test_get_loc_time_obj2(self): + # GH#8667 + + from pandas._libs.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = date_range("2014-11-26", periods=n, freq="S") + ts = pd.Series(np.random.randn(n), index=idx) + locs = np.arange(start, n, step, dtype=np.intp) + + result = ts.index.get_loc(key) + tm.assert_numpy_array_equal(result, locs) + tm.assert_series_equal(ts[key], ts.iloc[locs]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[locs] *= -10 + tm.assert_series_equal(left, right) + + def test_get_loc_time_nat(self): + # GH#35114 + # Case where key's total microseconds happens to match iNaT % 1e6 // 1000 + tic = time(minute=12, second=43, microsecond=145224) + dti = DatetimeIndex([pd.NaT]) + + loc = dti.get_loc(tic) + expected = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(loc, expected) + + def test_get_loc_tz_aware(self): + # https://github.com/pandas-dev/pandas/issues/32140 + dti = date_range( + Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), + Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), + freq="5s", + ) + key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern") + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + result = dti.get_loc(key, method="nearest") + assert result == 7433 + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(["1/3/2000", "NaT"]) + assert index.get_loc(pd.NaT) == 1 + + assert index.get_loc(None) == 1 + + assert index.get_loc(np.nan) == 1 + + assert index.get_loc(pd.NA) == 1 + + assert index.get_loc(np.datetime64("NaT")) == 1 + + with pytest.raises(KeyError, match="NaT"): + index.get_loc(np.timedelta64("NaT")) + + @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) + def test_get_loc_timedelta_invalid_key(self, key): + # GH#20464 + dti = date_range("1970-01-01", periods=10) + msg = "Cannot index DatetimeIndex with [Tt]imedelta" + with pytest.raises(TypeError, match=msg): + dti.get_loc(key) + + def test_get_loc_reasonable_key_error(self): + # GH#1062 + index = DatetimeIndex(["1/3/2000"]) + with pytest.raises(KeyError, match="2000"): + index.get_loc("1/1/2000") + + def test_get_loc_year_str(self): + rng = date_range("1/1/2000", "1/1/2010") + + result = rng.get_loc("2009") + expected = slice(3288, 3653) + assert result == expected + + +class TestContains: + def test_dti_contains_with_duplicates(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + assert d in ix + + @pytest.mark.parametrize( + "vals", + [ + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["2015", "2015", "2016"], + ["2015", "2015", "2014"], + ], + ) + def test_contains_nonunique(self, vals): + # GH#9512 + idx = DatetimeIndex(vals) + assert idx[0] in idx + + +class TestGetIndexer: + def test_get_indexer_date_objs(self): + rng = date_range("1/1/2000", periods=20) + + result = rng.get_indexer(rng.map(lambda x: x.date())) + expected = rng.get_indexer(rng) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer(self): + idx = date_range("2000-01-01", periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_raw = [ + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour").to_timedelta64(), + ] + tm.assert_numpy_array_equal( + idx.get_indexer( + target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw] + ), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_bad = [ + pd.Timedelta("2 hour").to_timedelta64(), + pd.Timedelta("1 hour").to_timedelta64(), + "foo", + ] + msg = "Could not convert 'foo' to NumPy timedelta" + with pytest.raises(ValueError, match=msg): + idx.get_indexer(target, "nearest", tolerance=tol_bad) + with pytest.raises(ValueError, match="abbreviation w/o a number"): + idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") + + @pytest.mark.parametrize( + "target", + [ + [date(2020, 1, 1), Timestamp("2020-01-02")], + [Timestamp("2020-01-01"), date(2020, 1, 2)], + ], + ) + def test_get_indexer_mixed_dtypes(self, target): + # https://github.com/pandas-dev/pandas/issues/33741 + values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) + result = values.get_indexer(target) + expected = np.array([0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "target, positions", + [ + ([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]), + ([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]), + ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), + ], + ) + @pytest.mark.filterwarnings("ignore:Comparison of Timestamp.*:FutureWarning") + def test_get_indexer_out_of_bounds_date(self, target, positions): + values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) + + result = values.get_indexer(target) + expected = np.array(positions, dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_pad_requires_monotonicity(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + rng2.get_indexer(rng, method="pad") + + +class TestMaybeCastSliceBound: + def test_maybe_cast_slice_bounds_empty(self): + # GH#14354 + empty_idx = date_range(freq="1H", periods=0, end="2015") + + right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right") + exp = Timestamp("2015-01-02 23:59:59.999999999") + assert right == exp + + left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left") + exp = Timestamp("2015-01-02 00:00:00") + assert left == exp + + def test_maybe_cast_slice_duplicate_monotonic(self): + # https://github.com/pandas-dev/pandas/issues/16515 + idx = DatetimeIndex(["2017", "2017"]) + result = idx._maybe_cast_slice_bound("2017-01-01", "left") + expected = Timestamp("2017-01-01") + assert result == expected + + +class TestGetValue: + def test_get_value(self): + # specifically make sure we have test for np.datetime64 key + dti = date_range("2016-01-01", periods=3) + + arr = np.arange(6, 9) + ser = pd.Series(arr, index=dti) + + key = dti[1] + + with pytest.raises(AttributeError, match="has no attribute '_values'"): + with tm.assert_produces_warning(FutureWarning): + dti.get_value(arr, key) + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key) + assert result == 7 + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key.to_pydatetime()) + assert result == 7 + + with tm.assert_produces_warning(FutureWarning): + result = dti.get_value(ser, key.to_datetime64()) + assert result == 7 + + +class TestGetSliceBounds: + @pytest.mark.parametrize("box", [date, datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_datetime_within( + self, box, kind, side, expected, tz_aware_fixture + ): + # GH 35690 + tz = tz_aware_fixture + index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) + key = box(year=2000, month=1, day=7) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.get_slice_bound(key, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)]) + def test_get_slice_bounds_datetime_outside( + self, box, kind, side, year, expected, tz_aware_fixture + ): + # GH 35690 + tz = tz_aware_fixture + index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) + key = box(year=year, month=1, day=7) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.get_slice_bound(key, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + def test_slice_datetime_locs(self, box, kind, tz_aware_fixture): + # GH 34077 + tz = tz_aware_fixture + index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz) + key = box(2010, 1, 1) + + warn = None if tz is None else FutureWarning + with tm.assert_produces_warning(warn): + # GH#36148 will require tzawareness-compat + result = index.slice_locs(key, box(2010, 1, 2)) + expected = (0, 1) + assert result == expected + + +class TestIndexerBetweenTime: + def test_indexer_between_time(self): + # GH#11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + @pytest.mark.parametrize("unit", ["us", "ms", "s"]) + def test_indexer_between_time_non_nano(self, unit): + # For simple cases like this, the non-nano indexer_between_time + # should match the nano result + + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + arr_nano = rng._data._ndarray + + arr = arr_nano.astype(f"M8[{unit}]") + + dta = type(rng._data)._simple_new(arr, dtype=arr.dtype) + dti = DatetimeIndex(dta) + assert dti.dtype == arr.dtype + + tic = time(1, 25) + toc = time(2, 29) + + result = dti.indexer_between_time(tic, toc) + expected = rng.indexer_between_time(tic, toc) + tm.assert_numpy_array_equal(result, expected) + + # case with non-zero micros in arguments + tic = time(1, 25, 0, 45678) + toc = time(2, 29, 0, 1234) + + result = dti.indexer_between_time(tic, toc) + expected = rng.indexer_between_time(tic, toc) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py new file mode 100644 index 00000000..9afeb7ce --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -0,0 +1,152 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries.offsets import ( + BDay, + BMonthEnd, +) + + +class TestJoin: + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="dt", + ) + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_join_self(self, join_type): + index = date_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_join_with_period_index(self, join_type): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:5, 0] + + expected = df.columns.astype("O").join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) + + def test_join_object_index(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.join(idx, how="outer") + assert isinstance(result[0], Timestamp) + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic_increasing + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_outer_join(self, freq): + # should just behave as union + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + rng = date_range(start=start, end=end, freq=freq) + + # overlapping + left = rng[:10] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + other = date_range(start, end, freq=BMonthEnd()) + + the_join = rng.join(other, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_naive_aware_conflicts(self): + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + naive = date_range(start, end, freq=BDay(), tz=None) + aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_map.py b/pandas/tests/indexes/datetimes/test_map.py new file mode 100644 index 00000000..45698ef2 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_map.py @@ -0,0 +1,47 @@ +import pytest + +from pandas import ( + DatetimeIndex, + Index, + MultiIndex, + Period, + date_range, +) +import pandas._testing as tm + + +class TestMap: + def test_map(self): + rng = date_range("1/1/2000", periods=10) + + f = lambda x: x.strftime("%Y%m%d") + result = rng.map(f) + exp = Index([f(x) for x in rng], dtype=" return Index + for accessor in DatetimeArray._field_ops: + if accessor in ["week", "weekofyear"]: + # GH#33595 Deprecate week and weekofyear + continue + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == "name" + + # boolean accessors -> return array + for accessor in DatetimeArray._bool_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name") + tm.assert_index_equal(res, exp) + + def test_datetimeindex_accessors2(self): + dti = date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4) + + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 + + def test_datetimeindex_accessors3(self): + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + msg = "Custom business days is not supported by is_month_start" + with pytest.raises(ValueError, match=msg): + dti.is_month_start + + def test_datetimeindex_accessors4(self): + dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + + assert dti.is_month_start[0] == 1 + + def test_datetimeindex_accessors5(self): + with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): + tests = [ + (Timestamp("2013-06-01", freq="M").is_month_start, 1), + (Timestamp("2013-06-01", freq="BM").is_month_start, 0), + (Timestamp("2013-06-03", freq="M").is_month_start, 0), + (Timestamp("2013-06-03", freq="BM").is_month_start, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0), + (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0), + (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1), + (Timestamp("2013-06-30", freq="BQ").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQ").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1), + (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1), + (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1), + (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1), + (Timestamp("2012-02-01").days_in_month, 29), + (Timestamp("2013-02-01").days_in_month, 28), + ] + + for ts, value in tests: + assert ts == value + + def test_datetimeindex_accessors6(self): + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.isocalendar().week.tolist() == expected + assert [d.weekofyear for d in dates] == expected + + # GH 12806 + # error: Unsupported operand types for + ("List[None]" and "List[str]") + @pytest.mark.parametrize( + "time_locale", [None] + (tm.get_locales() or []) # type: ignore[operator] + ) + def test_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + # GH#11128 + dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert dti.day_name(locale=time_locale)[day] == name + assert dti.day_name(locale=None)[day] == eng_name + ts = Timestamp(datetime(2016, 4, day)) + assert ts.day_name(locale=time_locale) == name + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.day_name(locale=time_locale)[-1]) + ts = Timestamp(pd.NaT) + assert np.isnan(ts.day_name(locale=time_locale)) + + # GH#12805 + dti = date_range(freq="M", start="2012", end="2013") + result = dti.month_name(locale=time_locale) + expected = Index([month.capitalize() for month in expected_months]) + + # work around different normalization schemes + # https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_index_equal(result, expected) + + for date, expected in zip(dti, expected_months): + result = date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", result) + + assert result == expected + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.month_name(locale=time_locale)[-1]) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + + tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64))) + + +def test_iter_readonly(): + # GH#28055 ints_to_pydatetime with readonly array + arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")]) + arr.setflags(write=False) + dti = pd.to_datetime(arr) + list(dti) + + +def test_week_and_weekofyear_are_deprecated(): + # GH#33595 Deprecate week and weekofyear + idx = date_range(start="2019-12-29", freq="D", periods=4) + with tm.assert_produces_warning(FutureWarning): + idx.week + with tm.assert_produces_warning(FutureWarning): + idx.weekofyear + + +def test_add_timedelta_preserves_freq(): + # GH#37295 should hold for any DTI with freq=None or Tick freq + tz = "Canada/Eastern" + dti = date_range( + start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), + end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), + freq="D", + ) + result = dti + Timedelta(days=1) + assert result.freq == dti.freq diff --git a/pandas/tests/indexes/datetimes/test_npfuncs.py b/pandas/tests/indexes/datetimes/test_npfuncs.py new file mode 100644 index 00000000..301466c0 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_npfuncs.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import date_range +import pandas._testing as tm + + +class TestSplit: + def test_split_non_utc(self): + # GH#14042 + indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) + result = np.split(indices, indices_or_sections=[])[0] + expected = indices._with_freq(None) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py new file mode 100644 index 00000000..d6ef4198 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -0,0 +1,85 @@ +from datetime import datetime + +from dateutil.tz import tzlocal +import pytest + +from pandas.compat import IS64 + +from pandas import ( + DatetimeIndex, + Index, + bdate_range, + date_range, +) +import pandas._testing as tm + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexOps: + @pytest.mark.parametrize( + "freq,expected", + [ + ("A", "day"), + ("Q", "day"), + ("M", "day"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ], + ) + def test_resolution(self, request, tz_naive_fixture, freq, expected): + tz = tz_naive_fixture + if freq == "A" and not IS64 and isinstance(tz, tzlocal): + request.node.add_marker( + pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") + ) + + idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) + assert idx.resolution == expected + + def test_infer_freq(self, freq_sample): + # GH 11018 + idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) + result = DatetimeIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq_sample + + +@pytest.mark.parametrize("freq", ["B", "C"]) +class TestBusinessDatetimeIndex: + @pytest.fixture + def rng(self, freq): + return bdate_range(START, END, freq=freq) + + def test_comparison(self, rng): + d = rng[10] + + comp = rng > d + assert comp[11] + assert not comp[9] + + def test_copy(self, rng): + cp = rng.copy() + repr(cp) + tm.assert_index_equal(cp, rng) + + def test_identical(self, rng): + t1 = rng.copy() + t2 = rng.copy() + assert t1.identical(t2) + + # name + t1 = t1.rename("foo") + assert t1.equals(t2) + assert not t1.identical(t2) + t2 = t2.rename("foo") + assert t1.identical(t2) + + # freq + t2v = Index(t2.values) + assert t1.equals(t2v) + assert not t1.identical(t2v) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py new file mode 100644 index 00000000..8ddcd6a4 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -0,0 +1,459 @@ +""" test partial slicing on Series/Frame """ + +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestSlicing: + def test_string_index_series_name_converted(self): + # GH#1644 + df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10)) + + result = df.loc["1/3/2000"] + assert result.name == df.index[2] + + result = df.T["1/3/2000"] + assert result.name == df.index[2] + + def test_stringified_slice_with_tz(self): + # GH#2658 + start = "2013-01-07" + idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern") + df = DataFrame(np.arange(10), index=idx) + df["2013-01-14 23:44:34.437768-05:00":] # no exception here + + def test_return_type_doesnt_depend_on_monotonicity(self): + # GH#24892 we get Series back regardless of whether our DTI is monotonic + dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) + ser = Series(range(3), index=dti) + + # non-monotonic index + ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) + + # key with resolution strictly lower than "min" + key = "2015-5-14 00" + + # monotonic increasing index + result = ser.loc[key] + expected = ser.iloc[1:] + tm.assert_series_equal(result, expected) + + # monotonic decreasing index + result = ser.iloc[::-1].loc[key] + expected = ser.iloc[::-1][:-1] + tm.assert_series_equal(result, expected) + + # non-monotonic index + result2 = ser2.loc[key] + expected2 = ser2.iloc[::2] + tm.assert_series_equal(result2, expected2) + + def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self): + # GH#24892 we get Series back regardless of whether our DTI is monotonic + dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) + ser = Series(range(3), index=dti) + + # non-monotonic index + ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) + + # key with resolution strictly *higher) than "min" + key = "2015-5-14 00:00:00" + + # monotonic increasing index + result = ser.loc[key] + assert result == 1 + + # monotonic decreasing index + result = ser.iloc[::-1].loc[key] + assert result == 1 + + # non-monotonic index + result2 = ser2.loc[key] + assert result2 == 0 + + def test_monotone_DTI_indexing_bug(self): + # GH 19362 + # Testing accessing the first element in a monotonic descending + # partial string indexing. + + df = DataFrame(list(range(5))) + date_list = [ + "2018-01-02", + "2017-02-10", + "2016-03-10", + "2015-03-15", + "2014-03-16", + ] + date_index = DatetimeIndex(date_list) + df["date"] = date_index + expected = DataFrame({0: list(range(5)), "date": date_index}) + tm.assert_frame_equal(df, expected) + + # We get a slice because df.index's resolution is hourly and we + # are slicing with a daily-resolution string. If both were daily, + # we would get a single item back + dti = date_range("20170101 01:00:00", periods=3) + df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1]) + + expected = DataFrame({"A": 1}, index=dti[-1:][::-1]) + result = df.loc["2017-01-03"] + tm.assert_frame_equal(result, expected) + + result2 = df.iloc[::-1].loc["2017-01-03"] + expected2 = expected.iloc[::-1] + tm.assert_frame_equal(result2, expected2) + + def test_slice_year(self): + dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s["2005"] + expected = s[s.index.year == 2005] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc["2005"] + expected = df[df.index.year == 2005] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "partial_dtime", + [ + "2019", + "2019Q4", + "Dec 2019", + "2019-12-31", + "2019-12-31 23", + "2019-12-31 23:59", + ], + ) + def test_slice_end_of_period_resolution(self, partial_dtime): + # GH#31064 + dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s") + + ser = Series(range(10), index=dti) + result = ser[partial_dtime] + expected = ser.iloc[:5] + tm.assert_series_equal(result, expected) + + def test_slice_quarter(self): + dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2001Q1"]) == 90 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["1Q01"]) == 90 + + def test_slice_month(self): + dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2005-11"]) == 30 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["2005-11"]) == 30 + + tm.assert_series_equal(s["2005-11"], s["11-2005"]) + + def test_partial_slice(self): + rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-05":"2006-02"] + expected = s["20050501":"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-05":] + expected = s["20050501":] + tm.assert_series_equal(result, expected) + + result = s[:"2006-02"] + expected = s[:"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-1-1"] + assert result == s.iloc[0] + + with pytest.raises(KeyError, match=r"^'2004-12-31'$"): + s["2004-12-31"] + + def test_partial_slice_daily(self): + rng = date_range(freq="H", start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-31"] + tm.assert_series_equal(result, s.iloc[:24]) + + with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"): + s["2004-12-31 00"] + + def test_partial_slice_hourly(self): + rng = date_range(freq="T", start=datetime(2005, 1, 1, 20, 0, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[: 60 * 4]) + + result = s["2005-1-1 20"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s["2005-1-1 20:00"] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"): + s["2004-12-31 00:15"] + + def test_partial_slice_minutely(self): + rng = date_range(freq="S", start=datetime(2005, 1, 1, 23, 59, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1 23:59"] + tm.assert_series_equal(result, s.iloc[:60]) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"): + s["2004-12-31 00:00:00"] + + def test_partial_slice_second_precision(self): + rng = date_range( + start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), + periods=20, + freq="US", + ) + s = Series(np.arange(20), rng) + + tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10]) + tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10]) + + tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:]) + tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:]) + + assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0] + with pytest.raises(KeyError, match="2005-1-1 00:00:00"): + s["2005-1-1 00:00:00"] + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = [ + "%Y", + "%Y-%m", + "%Y-%m-%d", + "%Y-%m-%d %H", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + ] + resolutions = ["year", "month", "day", "hour", "minute", "second"] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({"a": values}, index, dtype=np.int64) + assert df.index.resolution == resolution + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == expected + msg = rf"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df["a"][ts_string] + expected = df["a"][theslice] + tm.assert_series_equal(result, expected) + + # Frame should return slice as well + with tm.assert_produces_warning(FutureWarning): + # GH#36179 deprecated this indexing + result = df[ts_string] + expected = df[theslice] + tm.assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1 :]: + ts_string = index[1].strftime(fmt) + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == 2 + msg = rf"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + msg = rf"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df["a"][ts_string] + with pytest.raises(KeyError, match=msg): + df[ts_string] + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame( + { + "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + "TICKER": ["ABC", "MNP", "XYZ", "XYZ"], + "val": [1, 2, 3, 4], + }, + index=date_range("2013-06-19 09:30:00", periods=4, freq="5T"), + ) + df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True) + + expected = DataFrame( + [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"] + ) + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")] + tm.assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC") + ] + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")] + tm.assert_series_equal(result, expected) + + # partial string indexing on first level, scalar indexing on the other two + result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + expected = df_multi.iloc[:1].droplevel([1, 2]) + tm.assert_frame_equal(result, expected) + + def test_partial_slicing_with_multiindex_series(self): + # GH 4294 + # partial slice on a series mi + ser = DataFrame( + np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000) + ).stack() + + s2 = ser[:-1].copy() + expected = s2["2000-1-4"] + result = s2[Timestamp("2000-1-4")] + tm.assert_series_equal(result, expected) + + result = ser[Timestamp("2000-1-4")] + expected = ser["2000-1-4"] + tm.assert_series_equal(result, expected) + + df2 = DataFrame(ser) + expected = df2.xs("2000-1-4") + result = df2.loc[Timestamp("2000-1-4")] + tm.assert_frame_equal(result, expected) + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + ser = Series(np.arange(10), date_range("2014-01-01", periods=10)) + + nonmonotonic = ser[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = Timestamp("2014-01-10") + with tm.assert_produces_warning(FutureWarning): + result = nonmonotonic["2014-01-10":] + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic[timestamp:] + + with tm.assert_produces_warning(FutureWarning): + result = nonmonotonic.loc["2014-01-10":] + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic.loc[timestamp:] + + def test_loc_datetime_length_one(self): + # GH16071 + df = DataFrame( + columns=["1"], + index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"), + ) + result = df.loc[datetime(2016, 10, 1) :] + tm.assert_frame_equal(result, df) + + result = df.loc["2016-10-01T00:00:00":] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize( + "start", + [ + "2018-12-02 21:50:00+00:00", + Timestamp("2018-12-02 21:50:00+00:00"), + Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(), + ], + ) + @pytest.mark.parametrize( + "end", + [ + "2018-12-02 21:52:00+00:00", + Timestamp("2018-12-02 21:52:00+00:00"), + Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(), + ], + ) + def test_getitem_with_datestring_with_UTC_offset(self, start, end): + # GH 24076 + idx = date_range( + start="2018-12-02 14:50:00-07:00", + end="2018-12-02 14:50:00-07:00", + freq="1min", + ) + df = DataFrame(1, index=idx, columns=["A"]) + result = df[start:end] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + start = str(start) + end = str(end) + with pytest.raises(ValueError, match="Both dates must"): + df[start : end[:-4] + "1:00"] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df[start:end] + + def test_slice_reduce_to_series(self): + # GH 27516 + df = DataFrame({"A": range(24)}, index=date_range("2000", periods=24, freq="M")) + expected = Series( + range(12), index=date_range("2000", periods=12, freq="M"), name="A" + ) + result = df.loc["2000", "A"] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_pickle.py b/pandas/tests/indexes/datetimes/test_pickle.py new file mode 100644 index 00000000..922b4a18 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_pickle.py @@ -0,0 +1,45 @@ +import pytest + +from pandas import ( + NaT, + date_range, + to_datetime, +) +import pandas._testing as tm + + +class TestPickle: + def test_pickle(self): + # GH#4606 + idx = to_datetime(["2013-01-01", NaT, "2014-01-06"]) + idx_p = tm.round_trip_pickle(idx) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] + + def test_pickle_dont_infer_freq(self): + # GH#11002 + # don't infer freq + idx = date_range("1750-1-1", "2050-1-1", freq="7D") + idx_p = tm.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + + def test_pickle_after_set_freq(self): + dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + dti = dti._with_freq(None) + + res = tm.round_trip_pickle(dti) + tm.assert_index_equal(res, dti) + + def test_roundtrip_pickle_with_tz(self): + # GH#8367 + # round-trip of timezone + index = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + unpickled = tm.round_trip_pickle(index) + tm.assert_index_equal(index, unpickled) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_pickle_unpickle(self, freq): + rng = date_range("2009-01-01", "2010-01-01", freq=freq) + unpickled = tm.round_trip_pickle(rng) + assert unpickled.freq == freq diff --git a/pandas/tests/indexes/datetimes/test_reindex.py b/pandas/tests/indexes/datetimes/test_reindex.py new file mode 100644 index 00000000..e4911aa3 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_reindex.py @@ -0,0 +1,56 @@ +from datetime import timedelta + +import numpy as np + +from pandas import ( + DatetimeIndex, + date_range, +) +import pandas._testing as tm + + +class TestDatetimeIndexReindex: + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH#7774 + index = date_range("2013-01-01", periods=3, tz="US/Eastern") + assert str(index.reindex([])[0].tz) == "US/Eastern" + assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern" + + def test_reindex_with_same_tz_nearest(self): + # GH#32740 + rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc") + rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc") + result1, result2 = rng_a.reindex( + rng_b, method="nearest", tolerance=timedelta(seconds=20) + ) + expected_list1 = [ + "2010-01-01 00:00:00", + "2010-01-01 01:05:27.272727272", + "2010-01-01 02:10:54.545454545", + "2010-01-01 03:16:21.818181818", + "2010-01-01 04:21:49.090909090", + "2010-01-01 05:27:16.363636363", + "2010-01-01 06:32:43.636363636", + "2010-01-01 07:38:10.909090909", + "2010-01-01 08:43:38.181818181", + "2010-01-01 09:49:05.454545454", + "2010-01-01 10:54:32.727272727", + "2010-01-01 12:00:00", + "2010-01-01 13:05:27.272727272", + "2010-01-01 14:10:54.545454545", + "2010-01-01 15:16:21.818181818", + "2010-01-01 16:21:49.090909090", + "2010-01-01 17:27:16.363636363", + "2010-01-01 18:32:43.636363636", + "2010-01-01 19:38:10.909090909", + "2010-01-01 20:43:38.181818181", + "2010-01-01 21:49:05.454545454", + "2010-01-01 22:54:32.727272727", + "2010-01-02 00:00:00", + ] + expected1 = DatetimeIndex( + expected_list1, dtype="datetime64[ns, UTC]", freq=None + ) + expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp")) + tm.assert_index_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py new file mode 100644 index 00000000..89059009 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -0,0 +1,363 @@ +""" +Tests for DatetimeIndex methods behaving like their Timestamp counterparts +""" +from datetime import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + to_offset, +) +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +import pandas as pd +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index + + +class TestDatetimeIndexOps: + def test_dti_time(self): + rng = date_range("1/1/2000", freq="12min", periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + assert (result == expected).all() + + def test_dti_date(self): + rng = date_range("1/1/2000", freq="12H", periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + assert (result == expected).all() + + @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) + def test_dti_date_out_of_range(self, data): + # GH#1475 + msg = "Out of bounds .* present at position 0" + with pytest.raises(OutOfBoundsDatetime, match=msg): + DatetimeIndex(data) + + @pytest.mark.parametrize( + "field", + [ + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "days_in_month", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + ], + ) + def test_dti_timestamp_fields(self, field): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + expected = getattr(idx, field)[-1] + + warn = FutureWarning if field.startswith("is_") else None + with tm.assert_produces_warning(warn, match="Timestamp.freq is deprecated"): + result = getattr(Timestamp(idx[-1]), field) + assert result == expected + + def test_dti_timestamp_isocalendar_fields(self): + idx = tm.makeDateIndex(100) + expected = tuple(idx.isocalendar().iloc[-1].to_list()) + result = idx[-1].isocalendar() + assert result == expected + + def test_dti_timestamp_freq_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + msg = "The 'freq' argument in Timestamp is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ts = Timestamp(idx[-1], idx.freq) + + msg2 = "Timestamp.freq is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + assert idx.freq == ts.freq + + msg3 = "Timestamp.freqstr is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg3): + assert idx.freqstr == ts.freqstr + + # ---------------------------------------------------------------- + # DatetimeIndex.round + + def test_round_daily(self): + dti = date_range("20130101 09:10:11", periods=5) + result = dti.round("D") + expected = date_range("20130101", periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + result = dti.round("D") + expected = date_range("20130101", periods=5).tz_localize("US/Eastern") + tm.assert_index_equal(result, expected) + + result = dti.round("s") + tm.assert_index_equal(result, dti) + + @pytest.mark.parametrize( + "freq, error_msg", + [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ], + ) + def test_round_invalid(self, freq, error_msg): + dti = date_range("20130101 09:10:11", periods=5) + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + with pytest.raises(ValueError, match=error_msg): + dti.round(freq) + + def test_round(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 01:00:00", tz=tz), + Timestamp("2016-01-01 02:00:00", tz=tz), + Timestamp("2016-01-01 02:00:00", tz=tz), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + rng.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + rng.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") + + # GH#14440 & GH#15578 + index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz) + tm.assert_index_equal(result, expected) + + for freq in ["us", "ns"]: + tm.assert_index_equal(index, index.round(freq)) + + index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz) + tm.assert_index_equal(result, expected) + + index = DatetimeIndex(["2016-10-17 12:00:00.001501031"]) + result = index.round("10ns") + expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"]) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(False): + ts = "2016-10-17 12:00:00.001501031" + DatetimeIndex([ts]).round("1010ns") + + def test_no_rounding_occurs(self, tz_naive_fixture): + # GH 21262 + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz), + Timestamp("2016-01-01 00:02:00", tz=tz), + Timestamp("2016-01-01 00:04:00", tz=tz), + Timestamp("2016-01-01 00:06:00", tz=tz), + Timestamp("2016-01-01 00:08:00", tz=tz), + ] + ) + + tm.assert_index_equal(rng.round(freq="2T"), expected_rng) + + @pytest.mark.parametrize( + "test_input, rounder, freq, expected", + [ + (["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]), + (["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]), + ( + ["2117-01-01 00:00:45.000000012"], + "floor", + "10ns", + ["2117-01-01 00:00:45.000000010"], + ), + ( + ["1823-01-01 00:00:01.000000012"], + "ceil", + "10ns", + ["1823-01-01 00:00:01.000000020"], + ), + (["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]), + (["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]), + (["2018-01-01 00:15:00"], "ceil", "15T", ["2018-01-01 00:15:00"]), + (["2018-01-01 00:15:00"], "floor", "15T", ["2018-01-01 00:15:00"]), + (["1823-01-01 03:00:00"], "ceil", "3H", ["1823-01-01 03:00:00"]), + (["1823-01-01 03:00:00"], "floor", "3H", ["1823-01-01 03:00:00"]), + ( + ("NaT", "1823-01-01 00:00:01"), + "floor", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ( + ("NaT", "1823-01-01 00:00:01"), + "ceil", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ], + ) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + + @pytest.mark.parametrize( + "start, index_freq, periods", + [("2018-01-01", "12H", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)], + ) + @pytest.mark.parametrize( + "round_freq", + [ + "2ns", + "3ns", + "4ns", + "5ns", + "6ns", + "7ns", + "250ns", + "500ns", + "750ns", + "1us", + "19us", + "250us", + "500us", + "750us", + "1s", + "2s", + "3s", + "12H", + "1D", + ], + ) + def test_round_int64(self, start, index_freq, periods, round_freq): + dt = date_range(start=start, freq=index_freq, periods=periods) + unit = to_offset(round_freq).nanos + + # test floor + result = dt.floor(round_freq) + diff = dt.asi8 - result.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), f"floor not a {round_freq} multiple" + assert (0 <= diff).all() and (diff < unit).all(), "floor error" + + # test ceil + result = dt.ceil(round_freq) + diff = result.asi8 - dt.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), f"ceil not a {round_freq} multiple" + assert (0 <= diff).all() and (diff < unit).all(), "ceil error" + + # test round + result = dt.round(round_freq) + diff = abs(result.asi8 - dt.asi8) + mod = result.asi8 % unit + assert (mod == 0).all(), f"round not a {round_freq} multiple" + assert (diff <= unit // 2).all(), "round error" + if unit % 2 == 0: + assert ( + result.asi8[diff == unit // 2] % 2 == 0 + ).all(), "round half to even error" + + # ---------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D") + tm.assert_index_equal(result, expected) + + arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype( + "datetime64[ns]" + ) + rng_ns = DatetimeIndex(arr_ns) + rng_ns_normalized = rng_ns.normalize() + + arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype( + "datetime64[ns]" + ) + expected = DatetimeIndex(arr_ns) + tm.assert_index_equal(rng_ns_normalized, expected) + + assert result.is_normalized + assert not rng.is_normalized + + def test_normalize_nat(self): + dti = DatetimeIndex([pd.NaT, Timestamp("2018-01-01 01:00:00")]) + result = dti.normalize() + expected = DatetimeIndex([pd.NaT, Timestamp("2018-01-01")]) + tm.assert_index_equal(result, expected) + + +class TestDateTimeIndexToJulianDate: + def test_1700(self): + dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="H") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="T") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="S") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py new file mode 100644 index 00000000..be8d70c1 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -0,0 +1,607 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + bdate_range, + date_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + +from pandas.tseries.offsets import ( + BMonthEnd, + Minute, + MonthEnd, +) + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +def test_union_many_deprecated(): + dti = date_range("2016-01-01", periods=3) + + with tm.assert_produces_warning(FutureWarning): + dti.union_many([dti, dti]) + + +class TestDatetimeIndexSetOps: + tz = [ + None, + "UTC", + "Asia/Tokyo", + "US/Eastern", + "dateutil/Asia/Singapore", + "dateutil/US/Pacific", + ] + + # TODO: moved from test_datetimelike; dedup with version below + def test_union2(self, sort): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + union = first.union(second, sort=sort) + tm.assert_index_equal(union, everything) + + @pytest.mark.parametrize("box", [np.array, Series, list]) + def test_union3(self, sort, box): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + + # GH 10149 support listlike inputs other than Index objects + expected = first.union(second, sort=sort) + case = box(second.values) + result = first.union(case, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", tz) + def test_union(self, tz, sort): + rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz) + expected1_notsorted = DatetimeIndex(list(other1) + list(rng1)) + + rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz) + expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3])) + + rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + other3 = DatetimeIndex([], tz=tz) + expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) + expected3_notsorted = rng3 + + for rng, other, exp, exp_notsorted in [ + (rng1, other1, expected1, expected1_notsorted), + (rng2, other2, expected2, expected2_notsorted), + (rng3, other3, expected3, expected3_notsorted), + ]: + + result_union = rng.union(other, sort=sort) + tm.assert_index_equal(result_union, exp) + + result_union = other.union(rng, sort=sort) + if sort is None: + tm.assert_index_equal(result_union, exp) + else: + tm.assert_index_equal(result_union, exp_notsorted) + + def test_union_coverage(self, sort): + idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"]) + ordered = DatetimeIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx, sort=sort) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered, sort=sort) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + def test_union_bug_1730(self, sort): + rng_a = date_range("1/1/2012", periods=4, freq="3H") + rng_b = date_range("1/1/2012", periods=4, freq="4H") + + result = rng_a.union(rng_b, sort=sort) + exp = list(rng_a) + list(rng_b[1:]) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self, sort): + left = DatetimeIndex(["2012-05-11 15:19:49.695000"]) + right = DatetimeIndex( + [ + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + + result = left.union(right, sort=sort) + exp = DatetimeIndex( + [ + "2012-05-11 15:19:49.695000", + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + if sort is None: + exp = exp.sort_values() + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self, sort): + from pandas import DateOffset + + left = date_range("2013-01-01", "2013-02-01") + right = left + DateOffset(minutes=15) + + result = left.union(right, sort=sort) + exp = list(left) + list(right) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + def test_union_freq_both_none(self, sort): + # GH11086 + expected = bdate_range("20150101", periods=10) + expected._data.freq = None + + result = expected.union(expected, sort=sort) + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_union_freq_infer(self): + # When taking the union of two DatetimeIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # TimedeltaIndex behavior. + dti = date_range("2016-01-01", periods=5) + left = dti[[0, 1, 3, 4]] + right = dti[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, dti) + assert result.freq == "D" + + def test_union_dataframe_index(self): + rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = date_range("1/1/1980", "1/1/2012", freq="MS") + tm.assert_index_equal(df.index, exp) + + def test_union_with_DatetimeIndex(self, sort): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D") + # Works + i1.union(i2, sort=sort) + # Fails with "AttributeError: can't set attribute" + i2.union(i1, sort=sort) + + # TODO: moved from test_datetimelike; de-duplicate with version below + def test_intersection2(self): + first = tm.makeDateIndex(10) + second = first[5:] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + third = Index(["a", "b", "c"]) + result = first.intersection(third) + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + ) + def test_intersection(self, tz, sort): + # GH 4690 (with tz) + base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = DatetimeIndex([], freq="D", name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # non-monotonic + base = DatetimeIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx" + ) + + rng2 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx" + ) + expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx") + + rng3 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + tz=tz, + name="other", + ) + expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None) + + # GH 7880 + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") + expected4 = DatetimeIndex([], tz=tz, name="idx") + assert expected4.freq is None + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + # parametrize over both anchored and non-anchored freqs, as they + # have different code paths + @pytest.mark.parametrize("freq", ["T", "B"]) + def test_intersection_empty(self, tz_aware_fixture, freq): + # empty same freq GH2129 + tz = tz_aware_fixture + rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz) + result = rng[0:0].intersection(rng) + assert len(result) == 0 + assert result.freq == rng.freq + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + assert result.freq == rng.freq + + # no overlap GH#33604 + check_freq = freq != "T" # We don't preserve freq on non-anchored offsets + result = rng[:3].intersection(rng[-3:]) + tm.assert_index_equal(result, rng[:0]) + if check_freq: + # We don't preserve freq on non-anchored offsets + assert result.freq == rng.freq + + # swapped left and right + result = rng[-3:].intersection(rng[:3]) + tm.assert_index_equal(result, rng[:0]) + if check_freq: + # We don't preserve freq on non-anchored offsets + assert result.freq == rng.freq + + def test_intersection_bug_1708(self): + from pandas import DateOffset + + index_1 = date_range("1/1/2012", periods=4, freq="12H") + index_2 = index_1 + DateOffset(hours=1) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + assert len(result) == 0 + + @pytest.mark.parametrize("tz", tz) + def test_difference(self, tz, sort): + rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"] + + rng1 = DatetimeIndex(rng_dates, tz=tz) + other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = DatetimeIndex(rng_dates, tz=tz) + + rng2 = DatetimeIndex(rng_dates, tz=tz) + other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = DatetimeIndex(rng_dates[:3], tz=tz) + + rng3 = DatetimeIndex(rng_dates, tz=tz) + other3 = DatetimeIndex([], tz=tz) + expected3 = DatetimeIndex(rng_dates, tz=tz) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + ]: + result_diff = rng.difference(other, sort) + if sort is None and len(other): + # We dont sort (yet?) when empty GH#24959 + expected = expected.sort_values() + tm.assert_index_equal(result_diff, expected) + + def test_difference_freq(self, sort): + # GH14323: difference of DatetimeIndex should not preserve frequency + + index = date_range("20160920", "20160925", freq="D") + other = date_range("20160921", "20160924", freq="D") + expected = DatetimeIndex(["20160920", "20160925"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = date_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = DatetimeIndex(["20160920", "20160921"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_datetimeindex_diff(self, sort): + dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100) + dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) + assert len(dti1.difference(dti2, sort)) == 2 + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) + def test_setops_preserve_freq(self, tz): + rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) + + result = rng[:50].union(rng[50:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[30:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[60:100]) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz + + result = rng[:50].intersection(rng[25:75]) + assert result.name == rng.name + assert result.freqstr == "D" + assert result.tz == rng.tz + + nofreq = DatetimeIndex(list(rng[25:75]), name="other") + result = rng[:50].union(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].intersection(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + def test_intersection_non_tick_no_fastpath(self): + # GH#42104 + dti = DatetimeIndex( + [ + "2018-12-31", + "2019-03-31", + "2019-06-30", + "2019-09-30", + "2019-12-31", + "2020-03-31", + ], + freq="Q-DEC", + ) + result = dti[::2].intersection(dti[1::2]) + expected = dti[:0] + tm.assert_index_equal(result, expected) + + +class TestBusinessDatetimeIndex: + def test_union(self, sort): + rng = bdate_range(START, END) + # overlapping + left = rng[:10] + right = rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + else: + expected = DatetimeIndex(list(right) + list(left)) + tm.assert_index_equal(right.union(left, sort=sort), expected) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_union_not_cacheable(self, sort): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2, sort=sort) + if sort is None: + tm.assert_index_equal(the_union, rng) + else: + expected = DatetimeIndex(list(rng[10:]) + list(rng[:10])) + tm.assert_index_equal(the_union, expected) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2, sort=sort) + expected = rng[10:] + tm.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + tm.assert_index_equal(the_int, expected) + assert isinstance(the_int, DatetimeIndex) + assert the_int.freq == rng.freq + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + tm.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + tm.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011") + b = bdate_range("12/10/2011", "12/20/2011") + result = a.intersection(b) + tm.assert_index_equal(result, b) + assert result.freq == b.freq + + def test_intersection_list(self): + # GH#35876 + # values is not an Index -> no name -> retain "a" + values = [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")] + idx = DatetimeIndex(values, name="a") + res = idx.intersection(values) + tm.assert_index_equal(res, idx) + + def test_month_range_union_tz_pytz(self, sort): + from pytz import timezone + + tz = timezone("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + @td.skip_if_windows + def test_month_range_union_tz_dateutil(self, sort): + from pandas._libs.tslibs.timezones import dateutil_gettz + + tz = dateutil_gettz("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + @pytest.mark.parametrize("sort", [False, None]) + def test_intersection_duplicates(self, sort): + # GH#38196 + idx1 = Index( + [ + pd.Timestamp("2019-12-13"), + pd.Timestamp("2019-12-12"), + pd.Timestamp("2019-12-12"), + ] + ) + result = idx1.intersection(idx1, sort=sort) + expected = Index([pd.Timestamp("2019-12-13"), pd.Timestamp("2019-12-12")]) + tm.assert_index_equal(result, expected) + + +class TestCustomDatetimeIndex: + def test_union(self, sort): + # overlapping + rng = bdate_range(START, END, freq="C") + left = rng[:10] + right = rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_union = left.union(right, sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011", freq="C") + b = bdate_range("12/10/2011", "12/20/2011", freq="C") + result = a.intersection(b) + tm.assert_index_equal(result, b) + assert result.freq == b.freq + + @pytest.mark.parametrize( + "tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)] + ) + def test_intersection_dst_transition(self, tz): + # GH 46702: Europe/Berlin has DST transition + idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz) + idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz) + result = idx1.intersection(idx2) + expected = date_range("2020-03-30", periods=2, freq="D", tz=tz) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py new file mode 100644 index 00000000..a07f21f7 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -0,0 +1,1220 @@ +""" +Tests for DatetimeIndex timezone-related methods +""" +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) + +import dateutil +from dateutil.tz import ( + gettz, + tzlocal, +) +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import ( + conversion, + timezones, +) +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + isna, + to_datetime, +) +import pandas._testing as tm + + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name) -> None: + self.__offset = timedelta(minutes=offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return timedelta(0) + + +fixed_off = FixedOffset(-420, "-07:00") +fixed_off_no_name = FixedOffset(-330, None) + + +class TestDatetimeIndexTimezones: + # ------------------------------------------------------------- + # DatetimeIndex.tz_convert + def test_tz_convert_nat(self): + # GH#5546 + dates = [pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern")) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC")) + + dates = ["2010-12-01 00:00", "2010-12-02 00:00", pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 03:00", "2010-12-02 03:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + idx = idx + pd.offsets.Hour(5) + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + idx = idx.tz_convert("US/Pacific") + expected = ["2010-12-01 05:00", "2010-12-02 05:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx + np.timedelta64(3, "h") + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 11:00", "2010-12-02 11:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_convert_compat_timestamp(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + + conv = idx[0].tz_convert(prefix + "US/Pacific") + expected = idx.tz_convert(prefix + "US/Pacific")[0] + + assert conv == expected + + def test_dti_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pandas-dev/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): + # Regression test for GH#13306 + + # sorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2009-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2009-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2008-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2008-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) + def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): + # Regression test for tslib.tz_convert(vals, tz1, tz2). + # See https://github.com/pandas-dev/pandas/issues/4496 for details. + idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize("UTC") + idx = idx.tz_convert("Europe/Moscow") + + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + def test_dti_tz_convert_dst(self): + for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: + # Start DST + idx = date_range( + "2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # End DST + idx = date_range( + "2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # daily + # Start DST + idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([19, 19])) + + idx = date_range( + "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([5, 5])) + + # End DST + idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([20, 20])) + + idx = date_range( + "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([4, 4])) + + def test_tz_convert_roundtrip(self, tz_aware_fixture): + tz = tz_aware_fixture + idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="M", tz="UTC") + exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="M") + + idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC") + exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D") + + idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="H", tz="UTC") + exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="H") + + idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="T", tz="UTC") + exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="T") + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]: + converted = idx.tz_convert(tz) + reset = converted.tz_convert(None) + tm.assert_index_equal(reset, expected) + assert reset.tzinfo is None + expected = converted.tz_convert("UTC").tz_localize(None) + expected = expected._with_freq("infer") + tm.assert_index_equal(reset, expected) + + def test_dti_tz_convert_tzlocal(self): + # GH#13583 + # tz_convert doesn't affect to internal + dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC") + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_convert_utc_to_local_no_modify(self, tz): + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tz) + + # Values are unmodified + tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) + + assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_convert_unsorted(self, tzstr): + dr = date_range("2012-03-09", freq="H", periods=100, tz="utc") + dr = dr.tz_convert(tzstr) + + result = dr[::-1].hour + exp = dr.hour[::-1] + tm.assert_almost_equal(result, exp) + + # ------------------------------------------------------------- + # DatetimeIndex.tz_localize + + def test_tz_localize_utc_copies(self, utc_fixture): + # GH#46460 + times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] + index = DatetimeIndex(times) + + res = index.tz_localize(utc_fixture) + assert not tm.shares_memory(res, index) + + res2 = index._data.tz_localize(utc_fixture) + assert not tm.shares_memory(index._data, res2) + + def test_dti_tz_localize_nonexistent_raise_coerce(self): + # GH#13057 + times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] + index = DatetimeIndex(times) + tz = "US/Eastern" + with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + index.tz_localize(tz=tz) + + with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + index.tz_localize(tz=tz, nonexistent="raise") + + result = index.tz_localize(tz=tz, nonexistent="NaT") + test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] + dti = to_datetime(test_times, utc=True) + expected = dti.tz_convert("US/Eastern") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_infer(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + # With no repeated hours, we cannot infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dr.tz_localize(tz) + + # With repeated hours, we can infer the transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="infer") + expected = dr._with_freq(None) + tm.assert_index_equal(expected, localized) + tm.assert_index_equal(expected, DatetimeIndex(times, tz=tz, ambiguous="infer")) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + localized = dr.tz_localize(tz) + localized_infer = dr.tz_localize(tz, ambiguous="infer") + tm.assert_index_equal(localized, localized_infer) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_times(self, tz): + # March 13, 2011, spring forward, skip from 2 AM to 3 AM + dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"): + dr.tz_localize(tz) + + # after dst transition, it works + dr = date_range( + datetime(2011, 3, 13, 3, 30), periods=3, freq=pd.offsets.Hour(), tz=tz + ) + + # November 6, 2011, fall back, repeat 2 AM hour + dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dr.tz_localize(tz) + + # UTC is OK + dr = date_range( + datetime(2011, 3, 13), periods=48, freq=pd.offsets.Minute(30), tz=pytz.utc + ) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + + idx = DatetimeIndex(strdates) + conv = idx.tz_localize(tzstr) + + fromdates = DatetimeIndex(strdates, tz=tzstr) + + assert conv.tz == fromdates.tz + tm.assert_numpy_array_equal(conv.values, fromdates.values) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_localize(self, prefix): + tzstr = prefix + "US/Eastern" + dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="L") + dti2 = dti.tz_localize(tzstr) + + dti_utc = date_range( + start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="L", tz="utc" + ) + + tm.assert_numpy_array_equal(dti2.values, dti_utc.values) + + dti3 = dti2.tz_convert(prefix + "US/Pacific") + tm.assert_numpy_array_equal(dti3.values, dti_utc.values) + + dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="L") + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + dti.tz_localize(tzstr) + + dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="L") + with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"): + dti.tz_localize(tzstr) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_localize_utc_conversion(self, tz): + # Localizing to time zone should: + # 1) check for DST ambiguities + # 2) convert to UTC + + rng = date_range("3/10/2012", "3/11/2012", freq="30T") + + converted = rng.tz_localize(tz) + expected_naive = rng + pd.offsets.Hour(5) + tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) + + # DST ambiguity, this should fail + rng = date_range("3/11/2012", "3/12/2012", freq="30T") + # Is this really how it should fail?? + with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"): + rng.tz_localize(tz) + + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + # note: this tz tests that a tz-naive index can be localized + # and de-localized successfully, when there are no DST transitions + # in the range. + idx = date_range(start="2014-06-01", end="2014-08-30", freq="15T") + tz = tz_aware_fixture + localized = idx.tz_localize(tz) + # can't localize a tz-aware object + with pytest.raises( + TypeError, match="Already tz-aware, use tz_convert to convert" + ): + localized.tz_localize(tz) + reset = localized.tz_localize(None) + assert reset.tzinfo is None + expected = idx._with_freq(None) + tm.assert_index_equal(reset, expected) + + def test_dti_tz_localize_naive(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + conv = rng.tz_localize("US/Pacific") + exp = date_range("1/1/2011", periods=100, freq="H", tz="US/Pacific") + + tm.assert_index_equal(conv, exp._with_freq(None)) + + def test_dti_tz_localize_tzlocal(self): + # GH#13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start="2001-01-01", end="2001-03-01") + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_nat(self, tz): + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="NaT") + + times = [ + "11/06/2011 00:00", + np.NaN, + np.NaN, + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di_test = DatetimeIndex(times, tz="US/Eastern") + + # left dtype is datetime64[ns, US/Eastern] + # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] + tm.assert_numpy_array_equal(di_test.values, localized.values) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_flags(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + + # Pass in flags to determine right dst transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + expected = dr._with_freq(None) + tm.assert_index_equal(expected, localized) + tm.assert_index_equal(expected, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + tm.assert_index_equal(dr, localized) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool")) + tm.assert_index_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + + # Test duplicate times where inferring the dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + msg = "Length of ambiguous bool-array must be the same size as vals" + with pytest.raises(Exception, match=msg): + di.tz_localize(tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + tm.assert_index_equal(dr, localized) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(localized, localized_is_dst) + + # TODO: belongs outside tz_localize tests? + @pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"]) + def test_dti_construction_ambiguous_endpoint(self, tz): + # construction with an ambiguous end-point + # GH#11626 + + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + date_range( + "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="H" + ) + + times = date_range( + "2013-10-26 23:00", "2013-10-27 01:00", freq="H", tz=tz, ambiguous="infer" + ) + assert times[0] == Timestamp("2013-10-26 23:00", tz=tz) + assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz) + + @pytest.mark.parametrize( + "tz, option, expected", + [ + ["US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["US/Pacific", "shift_backward", "2019-03-10 01:00"], + ["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"], + ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"], + ], + ) + def test_dti_construction_nonexistent_endpoint(self, tz, option, expected): + # construction with an nonexistent end-point + + with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"): + date_range( + "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="H" + ) + + times = date_range( + "2019-03-10 00:00", "2019-03-10 02:00", freq="H", tz=tz, nonexistent=option + ) + assert times[-1] == Timestamp(expected, tz=tz) + + def test_dti_tz_localize_bdate_range(self): + dr = bdate_range("1/1/2009", "1/1/2010") + dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc) + localized = dr.tz_localize(pytz.utc) + tm.assert_index_equal(dr_utc, localized) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", [["NaT", pd.NaT], ["raise", None], ["foo", "invalid"]] + ) + def test_dti_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError, match="2015-03-29 02:00:00"): + dti.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + msg = ( + "The nonexistent argument must be one of " + "'raise', 'NaT', 'shift_forward', 'shift_backward' " + "or a timedelta object" + ) + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=method) + else: + result = dti.tz_localize(tz, nonexistent=method) + expected = DatetimeIndex([exp] * n, tz=tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start_ts, tz, end_ts, shift", + [ + ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:59:59.999999999", + "backward", + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 03:20:00", + timedelta(hours=1), + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:20:00", + timedelta(hours=-1), + ], + ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:59:59.999999999", + "backward", + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 03:33:00", + timedelta(hours=1), + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:33:00", + timedelta(hours=-1), + ], + ], + ) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift( + self, start_ts, tz, end_ts, shift, tz_type + ): + # GH 8917 + tz = tz_type + tz + if isinstance(shift, str): + shift = "shift_" + shift + dti = DatetimeIndex([Timestamp(start_ts)]) + result = dti.tz_localize(tz, nonexistent=shift) + expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("offset", [-1, 1]) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917 + tz = tz_type + "Europe/Warsaw" + dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")]) + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + + # ------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize_tz(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern") + + result = rng.normalize() # does not preserve freq + expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected._with_freq(None)) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC") + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + result = rng.normalize() # does not preserve freq + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + tm.assert_index_equal(result, expected._with_freq(None)) + + assert result.is_normalized + assert not rng.is_normalized + + @td.skip_if_windows + @pytest.mark.parametrize( + "timezone", + [ + "US/Pacific", + "US/Eastern", + "UTC", + "Asia/Kolkata", + "Asia/Shanghai", + "Australia/Canberra", + ], + ) + def test_normalize_tz_local(self, timezone): + # GH#13459 + with tm.set_timezone(timezone): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + expected = expected._with_freq(None) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + # ------------------------------------------------------------ + # DatetimeIndex.__new__ + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_constructor_static_tzinfo(self, prefix): + # it works! + index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST") + index.hour + index[0] + + def test_dti_constructor_with_fixed_tz(self): + off = FixedOffset(420, "+07:00") + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + + rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00") + assert (rng.values == rng3.values).all() + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_convert_datetime_list(self, tzstr): + dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo") + dr2 = DatetimeIndex(list(dr), name="foo", freq="D") + tm.assert_index_equal(dr, dr2) + + def test_dti_construction_univalent(self): + rng = date_range("03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern") + rng2 = DatetimeIndex(data=rng, tz="US/Eastern") + tm.assert_index_equal(rng, rng2) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_from_tzaware_datetime(self, tz): + d = [datetime(2012, 8, 19, tzinfo=tz)] + + index = DatetimeIndex(d) + assert timezones.tz_compare(index.tz, tz) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_constructors(self, tzstr): + """Test different DatetimeIndex constructions with timezone + Follow-up of GH#4229 + """ + arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"] + + idx1 = to_datetime(arr).tz_localize(tzstr) + idx2 = date_range(start="2005-11-10 08:00:00", freq="H", periods=2, tz=tzstr) + idx2 = idx2._with_freq(None) # the others all have freq=None + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) + + for other in [idx2, idx3, idx4]: + tm.assert_index_equal(idx1, other) + + # ------------------------------------------------------------- + # Unsorted + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_date_accessor(self, dtype): + # Regression test for GH#21230 + expected = np.array([date(2018, 6, 4), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:00:00", pd.NaT], dtype=dtype) + result = index.date + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_time_accessor(self, dtype): + # Regression test for GH#21267 + expected = np.array([time(10, 20, 30), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) + result = index.time + + tm.assert_numpy_array_equal(result, expected) + + def test_timetz_accessor(self, tz_naive_fixture): + # GH21358 + tz = timezones.maybe_get_tz(tz_naive_fixture) + + expected = np.array([time(10, 20, 30, tzinfo=tz), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], tz=tz) + result = index.timetz + + tm.assert_numpy_array_equal(result, expected) + + def test_dti_drop_dont_lose_tz(self): + # GH#2621 + ind = date_range("2012-12-01", periods=10, tz="utc") + ind = ind.drop(ind[-1]) + + assert ind.tz is not None + + def test_dti_tz_conversion_freq(self, tz_naive_fixture): + # GH25241 + t3 = DatetimeIndex(["2019-01-01 10:00"], freq="H") + assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq + t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="T") + assert t4.tz_convert(tz="UTC").freq == t4.freq + + def test_drop_dst_boundary(self): + # see gh-18031 + tz = "Europe/Brussels" + freq = "15min" + + start = Timestamp("201710290100", tz=tz) + end = Timestamp("201710290300", tz=tz) + index = date_range(start=start, end=end, freq=freq) + + expected = DatetimeIndex( + [ + "201710290115", + "201710290130", + "201710290145", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290300", + ], + tz=tz, + freq=freq, + ambiguous=[ + True, + True, + True, + True, + True, + True, + True, + False, + False, + False, + False, + False, + ], + ) + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + + def test_date_range_localize(self): + rng = date_range("3/11/2012 03:00", periods=15, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex(["3/11/2012 03:00", "3/11/2012 04:00"], tz="US/Eastern") + rng3 = date_range("3/11/2012 03:00", periods=15, freq="H") + rng3 = rng3.tz_localize("US/Eastern") + + tm.assert_index_equal(rng._with_freq(None), rng3) + + # DST transition time + val = rng[0] + exp = Timestamp("3/11/2012 03:00", tz="US/Eastern") + + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value + tm.assert_index_equal(rng[:2], rng2) + + # Right before the DST transition + rng = date_range("3/11/2012 00:00", periods=2, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex( + ["3/11/2012 00:00", "3/11/2012 01:00"], tz="US/Eastern", freq="H" + ) + tm.assert_index_equal(rng, rng2) + exp = Timestamp("3/11/2012 00:00", tz="US/Eastern") + assert exp.hour == 0 + assert rng[0] == exp + exp = Timestamp("3/11/2012 01:00", tz="US/Eastern") + assert exp.hour == 1 + assert rng[1] == exp + + rng = date_range("3/11/2012 00:00", periods=10, freq="H", tz="US/Eastern") + assert rng[2].hour == 3 + + def test_timestamp_equality_different_timezones(self): + utc_range = date_range("1/1/2000", periods=20, tz="UTC") + eastern_range = utc_range.tz_convert("US/Eastern") + berlin_range = utc_range.tz_convert("Europe/Berlin") + + for a, b, c in zip(utc_range, eastern_range, berlin_range): + assert a == b + assert b == c + assert a == c + + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() + + def test_dti_intersection(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng[10:90][::-1] + right = rng[20:80][::-1] + + assert left.tz == rng.tz + result = left.intersection(right) + assert result.tz == left.tz + + def test_dti_equals_with_tz(self): + left = date_range("1/1/2011", periods=100, freq="H", tz="utc") + right = date_range("1/1/2011", periods=100, freq="H", tz="US/Eastern") + + assert not left.equals(right) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_nat(self, tzstr): + idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) + + assert isna(idx[1]) + assert idx[0].tzinfo is not None + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_astype_asobject_tzinfos(self, tzstr): + # GH#1345 + + # dates around a dst transition + rng = date_range("2/13/2010", "5/6/2010", tz=tzstr) + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_with_timezone_repr(self, tzstr): + rng = date_range("4/13/2010", "5/6/2010") + + rng_eastern = rng.tz_localize(tzstr) + + rng_repr = repr(rng_eastern) + assert "2010-04-13 00:00:00" in rng_repr + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_take_dont_lose_meta(self, tzstr): + rng = date_range("1/1/2000", periods=20, tz=tzstr) + + result = rng.take(range(5)) + assert result.tz == rng.tz + assert result.freq == rng.freq + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_utc_box_timestamp_and_localize(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + + expected = rng[-1].astimezone(tz) + + stamp = rng_eastern[-1] + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo + + # right tzinfo + rng = date_range("3/13/2012", "3/14/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + # test not valid for dateutil timezones. + # assert 'EDT' in repr(rng_eastern[0].tzinfo) + assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( + rng_eastern[0].tzinfo + ) + + def test_dti_to_pydatetime(self): + dt = dateutil.parser.parse("2012-06-13T01:39:00Z") + dt = dt.replace(tzinfo=tzlocal()) + + arr = np.array([dt], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal()) + arr = rng.to_pydatetime() + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_dti_to_pydatetime_fizedtz(self): + dates = np.array( + [ + datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off), + ] + ) + dti = DatetimeIndex(dates) + + result = dti.to_pydatetime() + tm.assert_numpy_array_equal(dates, result) + + result = dti._mpl_repr() + tm.assert_numpy_array_equal(dates, result) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")]) + def test_with_tz(self, tz): + # just want it to work + start = datetime(2011, 3, 12, tzinfo=pytz.utc) + dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) + assert dr.tz is pytz.utc + + # DateRange with naive datetimes + dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc) + dr = bdate_range("1/1/2005", "1/1/2009", tz=tz) + + # normalized + central = dr.tz_convert(tz) + assert central.tz is tz + naive = central[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # compare vs a localized tz + naive = dr[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # datetimes with tzinfo set + dr = bdate_range( + datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc) + ) + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(Exception, match=msg): + bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_field_access_localize(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + assert (rng.hour == 0).all() + + # a more unusual time zone, #1946 + dr = date_range( + "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" + ) + + expected = Index(np.arange(10, dtype=np.int64)) + tm.assert_index_equal(dr.hour, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_convert_tz_aware_datetime_datetime(self, tz): + # GH#1581 + dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)] + + dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates] + result = DatetimeIndex(dates_aware) + assert timezones.tz_compare(result.tz, tz) + + converted = to_datetime(dates_aware, utc=True) + ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + tm.assert_numpy_array_equal(converted.asi8, ex_vals) + assert converted.tz is pytz.utc + + # Note: not difference, as there is no symmetry requirement there + @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"]) + def test_dti_setop_aware(self, setop): + # non-overlapping + rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") + + rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") + + with tm.assert_produces_warning(FutureWarning): + # # GH#39328 will cast both to UTC + result = getattr(rng, setop)(rng2) + + expected = getattr(rng.astype("O"), setop)(rng2.astype("O")) + tm.assert_index_equal(result, expected) + if len(result): + assert result[0].tz.zone == "US/Central" + assert result[-1].tz.zone == "US/Eastern" + + def test_dti_union_mixed(self): + # GH 21671 + rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT]) + rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo") + result = rng.union(rng2) + expected = Index( + [ + Timestamp("2011-01-01"), + pd.NaT, + Timestamp("2012-01-01", tz="Asia/Tokyo"), + Timestamp("2012-01-02", tz="Asia/Tokyo"), + ], + dtype=object, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)] + ) + def test_iteration_preserves_nanoseconds(self, tz): + # GH 19603 + index = DatetimeIndex( + ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz + ) + for i, ts in enumerate(index): + assert ts == index[i] + + +def test_tz_localize_invalidates_freq(): + # we only preserve freq in unambiguous cases + + # if localized to US/Eastern, this crosses a DST transition + dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="H") + assert dti.freq == "H" + + result = dti.tz_localize(None) # no-op + assert result.freq == "H" + + result = dti.tz_localize("UTC") # unambiguous freq preservation + assert result.freq == "H" + + result = dti.tz_localize("US/Eastern", nonexistent="shift_forward") + assert result.freq is None + assert result.inferred_freq is None # i.e. we are not _too_ strict here + + # Case where we _can_ keep freq because we're length==1 + dti2 = dti[:1] + result = dti2.tz_localize("US/Eastern") + assert result.freq == "H" diff --git a/pandas/tests/indexes/datetimes/test_unique.py b/pandas/tests/indexes/datetimes/test_unique.py new file mode 100644 index 00000000..68ac770f --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_unique.py @@ -0,0 +1,77 @@ +from datetime import ( + datetime, + timedelta, +) + +from pandas import ( + DatetimeIndex, + NaT, + Timestamp, +) +import pandas._testing as tm + + +def test_unique(tz_naive_fixture): + + idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture) + expected = idx[:1] + + result = idx.unique() + tm.assert_index_equal(result, expected) + # GH#21737 + # Ensure the underlying data is consistent + assert result[0] == expected[0] + + +def test_index_unique(rand_series_with_duplicate_datetimeindex): + dups = rand_series_with_duplicate_datetimeindex + index = dups.index + + uniques = index.unique() + expected = DatetimeIndex( + [ + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + ) + assert uniques.dtype == "M8[ns]" # sanity + tm.assert_index_equal(uniques, expected) + assert index.nunique() == 4 + + # GH#2563 + assert isinstance(uniques, DatetimeIndex) + + dups_local = index.tz_localize("US/Eastern") + dups_local.name = "foo" + result = dups_local.unique() + expected = DatetimeIndex(expected, name="foo") + expected = expected.tz_localize("US/Eastern") + assert result.tz is not None + assert result.name == "foo" + tm.assert_index_equal(result, expected) + + +def test_index_unique2(): + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [NaT.value] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_index_unique3(): + arr = [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20) + ] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex): + index = rand_series_with_duplicate_datetimeindex.index + assert not index.is_unique diff --git a/pandas/tests/indexes/interval/__init__.py b/pandas/tests/indexes/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 00000000..c253a745 --- /dev/null +++ b/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,245 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + IntervalDtype, +) + +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + NaT, + Timedelta, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +class AstypeTests: + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype("interval") + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype="object") + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype("category") + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex(index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "int64", + "uint64", + "float64", + "complex128", + "period[M]", + "timedelta64", + "timedelta64[ns]", + "datetime64", + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + ], + ) + def test_astype_cannot_cast(self, index, dtype): + msg = "Cannot cast IntervalIndex to dtype" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = "data type [\"']fake_dtype[\"'] not understood" + with pytest.raises(TypeError, match=msg): + index.astype("fake_dtype") + + +class TestIntSubtype(AstypeTests): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), + IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize( + "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] + ) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] + ) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="GH#15832") + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype("uint64", "right") + + # Until we decide what the exception message _should_ be, we + # assert something that it should _not_ be. + # We should _not_ be getting a message suggesting that the -10 + # has been wrapped around to a large-positive integer + msg = "^(?!(left side of interval must be <= right side))" + with pytest.raises(ValueError, match=msg): + index.astype(dtype) + + +class TestFloatSubtype(AstypeTests): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed="neither"), + IntervalIndex.from_arrays( + [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" + ), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer_with_non_integer_borders(self, subtype): + index = interval_range(0.0, 3.0, freq=0.25) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype("uint64", "right") + msg = re.escape( + "Cannot convert interval[float64, right] to interval[uint64, right]; " + "subtypes are incompatible" + ) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(AstypeTests): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), + interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), + interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), + interval_range(Timedelta("0 days"), periods=10, closed="both"), + interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + + warn = None + if index.isna().any() and subtype == "uint64": + warn = FutureWarning + msg = "In a future version, this astype will raise if the conversion overflows" + + with tm.assert_produces_warning(warn, match=msg): + result = index.astype(dtype) + new_left = index.left.astype(subtype) + new_right = index.right.astype(subtype) + + expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype("float64", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype("timedelta64[ns]", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + + index = interval_range(Timestamp("2018-01-01"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype("datetime64[ns]", "right") + index = interval_range(Timedelta("0 days"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py new file mode 100644 index 00000000..c44303aa --- /dev/null +++ b/pandas/tests/indexes/interval/test_base.py @@ -0,0 +1,71 @@ +import numpy as np +import pytest + +from pandas import IntervalIndex +import pandas._testing as tm +from pandas.tests.indexes.common import Base + + +class TestBase(Base): + """ + Tests specific to the shared common index tests; unrelated tests should be placed + in test_interval.py or the specific test file (e.g. test_astype.py) + """ + + _index_cls = IntervalIndex + + @pytest.fixture + def simple_index(self) -> IntervalIndex: + return self._index_cls.from_breaks(range(11), closed="right") + + @pytest.fixture + def index(self): + return tm.makeIntervalIndex(10) + + def create_index(self, *, closed="right"): + return IntervalIndex.from_breaks(range(11), closed=closed) + + def test_repr_max_seq_item_setting(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_repr_roundtrip(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_take(self, closed): + index = self.create_index(closed=closed) + + result = index.take(range(10)) + tm.assert_index_equal(result, index) + + result = index.take([0, 0, 1]) + expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) + tm.assert_index_equal(result, expected) + + def test_where(self, simple_index, listlike_box): + klass = listlike_box + + idx = simple_index + cond = [True] * len(idx) + expected = idx + result = expected.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = IntervalIndex([np.nan] + idx[1:].tolist()) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_getitem_2d_deprecated(self, simple_index): + # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable + idx = simple_index + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + with tm.assert_produces_warning(FutureWarning): + idx[:, None] + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + # GH#44051 + idx[True] + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + # GH#44051 + idx[False] diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py new file mode 100644 index 00000000..a71a8f9e --- /dev/null +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -0,0 +1,473 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + date_range, + notna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) +from pandas.core.arrays import IntervalArray +import pandas.core.common as com + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class ConstructorTests: + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize( + "breaks", + [ + [3, 14, 15, 92, 653], + np.arange(10, dtype="int64"), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range("20180101", periods=10), + date_range("20180101", periods=10, tz="US/Eastern"), + timedelta_range("1 day", periods=10), + ], + ) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, "dtype", "int64") + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize( + "breaks, subtype", + [ + (Int64Index([0, 1, 2, 3, 4]), "float64"), + (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), + (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), + (Float64Index([0, 1, 2, 3, 4]), "int64"), + (date_range("2017-01-01", periods=5), "int64"), + (timedelta_range("1 day", periods=5), "int64"), + ], + ) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype, "right") + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "breaks", + [ + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Float64Index([0, 1, 2, 3, 4]), + date_range("2017-01-01", periods=5), + timedelta_range("1 day", periods=5), + ], + ) + def test_constructor_pass_closed(self, constructor, breaks): + # not passing closed to IntervalDtype, but to IntervalArray constructor + warn = None + if isinstance(constructor, partial) and constructor.func is Index: + # passing kwargs to Index is deprecated + warn = FutureWarning + + iv_dtype = IntervalDtype(breaks.dtype) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + + for dtype in (iv_dtype, str(iv_dtype)): + with tm.assert_produces_warning(warn): + + result = constructor(dtype=dtype, closed="left", **result_kwargs) + assert result.dtype.closed == "left" + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize( + "breaks", + [ + [], + np.array([], dtype="int64"), + np.array([], dtype="float64"), + np.array([], dtype="datetime64[ns]"), + np.array([], dtype="timedelta64[ns]"), + ], + ) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, "dtype", np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(np.array(result), expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + tuple("0123456789"), + list("abcdefghij"), + np.array(list("abcdefghij"), dtype=object), + np.array(list("abcdefghij"), dtype=" Interval(0.5, 1.5) + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + tm.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + msg = "|".join( + [ + "not supported between instances of 'int' and '.*.Interval'", + r"Invalid comparison between dtype=interval\[int64, right\] and ", + ] + ) + with pytest.raises(TypeError, match=msg): + self.index > 0 + with pytest.raises(TypeError, match=msg): + self.index <= 0 + with pytest.raises(TypeError, match=msg): + self.index > np.arange(2) + + msg = "Lengths must match to compare" + with pytest.raises(ValueError, match=msg): + self.index > np.arange(3) + + def test_missing_values(self, closed): + idx = Index( + [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] + ) + idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) + assert idx.equals(idx2) + + msg = ( + "missing values must be missing in the same location both left " + "and right sides" + ) + with pytest.raises(ValueError, match=msg): + IntervalIndex.from_arrays( + [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed + ) + + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) + + def test_sort_values(self, closed): + index = self.create_index(closed=closed) + + result = index.sort_values() + tm.assert_index_equal(result, index) + + result = index.sort_values(ascending=False) + tm.assert_index_equal(result, index[::-1]) + + # with nan + index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) + + result = index.sort_values() + expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) + tm.assert_index_equal(result, expected) + + result = index.sort_values(ascending=False, na_position="first") + expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_datetime(self, tz): + start = Timestamp("2000-01-01", tz=tz) + dates = date_range(start=start, periods=10) + index = IntervalIndex.from_breaks(dates) + + # test mid + start = Timestamp("2000-01-01T12:00", tz=tz) + expected = date_range(start=start, periods=9) + tm.assert_index_equal(index.mid, expected) + + # __contains__ doesn't check individual points + assert Timestamp("2000-01-01", tz=tz) not in index + assert Timestamp("2000-01-01T12", tz=tz) not in index + assert Timestamp("2000-01-02", tz=tz) not in index + iv_true = Interval( + Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) + ) + iv_false = Interval( + Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) + ) + assert iv_true in index + assert iv_false not in index + + # .contains does check individual points + assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() + assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() + assert index.contains(Timestamp("2000-01-02", tz=tz)).any() + + # test get_indexer + start = Timestamp("1999-12-31T12:00", tz=tz) + target = date_range(start=start, periods=7, freq="12H") + actual = index.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + start = Timestamp("2000-01-08T18:00", tz=tz) + target = date_range(start=start, periods=7, freq="6H") + actual = index.get_indexer(target) + expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + def test_append(self, closed): + + index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays( + [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed + ) + tm.assert_index_equal(result, expected) + + for other_closed in {"left", "right", "both", "neither"} - {closed}: + index_other_closed = IntervalIndex.from_arrays( + [0, 1], [1, 2], closed=other_closed + ) + result = index1.append(index_other_closed) + expected = index1.astype(object).append(index_other_closed.astype(object)) + tm.assert_index_equal(result, expected) + + def test_is_non_overlapping_monotonic(self, closed): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', otherwise True (GH16560) + if closed == "both": + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is False + else: + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + @pytest.mark.parametrize( + "start, shift, na_value", + [ + (0, 1, np.nan), + (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), + (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), + ], + ) + def test_is_overlapping(self, start, shift, na_value, closed): + # GH 23309 + # see test_interval_tree.py for extensive tests; interface tests here + + # non-overlapping + tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # non-overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # overlapping + tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # common endpoints + tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + expected = closed == "both" + assert result is expected + + # common endpoints with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + assert result is expected + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))), + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ), + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ), + ], + ) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(com.asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))) + [np.nan], + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ) + + [np.nan], + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ) + + [np.nan], + ], + ) + @pytest.mark.parametrize("na_tuple", [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + + def test_nbytes(self): + # GH 19209 + left = np.arange(0, 4, dtype="i8") + right = np.arange(1, 5, dtype="i8") + + result = IntervalIndex.from_arrays(left, right).nbytes + expected = 64 # 4 * 8 * 2 + assert result == expected + + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, name, closed, new_closed): + # GH 21670 + index = interval_range(0, 5, closed=closed, name=name) + result = index.set_closed(new_closed) + expected = interval_range(0, 5, closed=new_closed, name=name) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) + def test_set_closed_errors(self, bad_closed): + # GH 21670 + index = interval_range(0, 5) + msg = f"invalid option for 'closed': {bad_closed}" + with pytest.raises(ValueError, match=msg): + index.set_closed(bad_closed) + + def test_is_all_dates(self): + # GH 23576 + year_2017 = Interval( + Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00") + ) + year_2017_index = IntervalIndex([year_2017]) + assert not year_2017_index._is_all_dates + + +def test_dir(): + # GH#27571 dir(interval_index) should not raise + index = IntervalIndex.from_arrays([0, 1], [1, 2]) + result = dir(index) + assert "str" not in result + + +def test_searchsorted_different_argument_classes(listlike_box): + # https://github.com/pandas-dev/pandas/issues/32762 + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + result = values.searchsorted(listlike_box(values)) + expected = np.array([0, 1], dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = values._data.searchsorted(listlike_box(values)) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] +) +def test_searchsorted_invalid_argument(arg): + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " + with pytest.raises(TypeError, match=msg): + values.searchsorted(arg) diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 00000000..2f28c33a --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,355 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +from pandas import ( + DateOffset, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + interval_range, + timedelta_range, +) +import pandas._testing as tm + +from pandas.tseries.offsets import Day + + +@pytest.fixture(scope="class", params=[None, "foo"]) +def name(request): + return request.param + + +class TestIntervalRange: + @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) + def test_constructor_numeric(self, closed, name, freq, periods): + start, end = 0, 100 + breaks = np.arange(101, step=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + @pytest.mark.parametrize( + "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)] + ) + def test_constructor_timestamp(self, closed, name, freq, periods, tz): + start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + if not breaks.freq.is_anchored() and tz is None: + # matches expected only for non-anchored offsets and tz naive + # (anchored/DST transitions cause unequal spacing in expected) + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)] + ) + def test_constructor_timedelta(self, closed, name, freq, periods): + start, end = Timedelta("0 days"), Timedelta("100 days") + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, end, freq, expected_endpoint", + [ + (0, 10, 3, 9), + (0, 10, 1.5, 9), + (0.5, 10, 3, 9.5), + (Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")), + ( + Timestamp("2018-01-01"), + Timestamp("2018-02-09"), + "MS", + Timestamp("2018-02-01"), + ), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-20", tz="US/Eastern"), + "5D12H", + Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), + ), + ], + ) + def test_early_truncation(self, start, end, freq, expected_endpoint): + # index truncates early if freq causes end to be skipped + result = interval_range(start=start, end=end, freq=freq) + result_endpoint = result.right[-1] + assert result_endpoint == expected_endpoint + + @pytest.mark.parametrize( + "start, end, freq", + [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], + ) + def test_no_invalid_float_truncation(self, start, end, freq): + # GH 21161 + if freq is None: + breaks = [0.5, 1.5, 2.5, 3.5, 4.5] + else: + breaks = [0.5, 2.0, 3.5, 5.0, 6.5] + expected = IntervalIndex.from_breaks(breaks) + + result = interval_range(start=start, end=end, periods=4, freq=freq) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, mid, end", + [ + ( + Timestamp("2018-03-10", tz="US/Eastern"), + Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), + Timestamp("2018-03-12", tz="US/Eastern"), + ), + ( + Timestamp("2018-11-03", tz="US/Eastern"), + Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), + Timestamp("2018-11-05", tz="US/Eastern"), + ), + ], + ) + def test_linspace_dst_transition(self, start, mid, end): + # GH 20976: linspace behavior defined from start/end/periods + # accounts for the hour gained/lost during DST transition + result = interval_range(start=start, end=end, periods=2) + expected = IntervalIndex.from_breaks([start, mid, end]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", [2, 2.0]) + @pytest.mark.parametrize("end", [10, 10.0]) + @pytest.mark.parametrize("start", [0, 0.0]) + def test_float_subtype(self, start, end, freq): + # Has float subtype if any of start/end/freq are float, even if all + # resulting endpoints can safely be upcast to integers + + # defined from start/end/freq + index = interval_range(start=start, end=end, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end + freq) else "float64" + assert result == expected + + # defined from start/periods/freq + index = interval_range(start=start, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + freq) else "float64" + assert result == expected + + # defined from end/periods/freq + index = interval_range(end=end, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(end + freq) else "float64" + assert result == expected + + # GH 20976: linspace behavior defined from start/end/periods + index = interval_range(start=start, end=end, periods=5) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end) else "float64" + assert result == expected + + def test_constructor_coverage(self): + # float value for periods + expected = interval_range(start=0, periods=10) + result = interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = [ + "D", + Day(), + Timedelta(days=1), + timedelta(days=1), + DateOffset(days=1), + ] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + interval_range(start=0) + + with pytest.raises(ValueError, match=msg): + interval_range(end=5) + + with pytest.raises(ValueError, match=msg): + interval_range(periods=2) + + with pytest.raises(ValueError, match=msg): + interval_range() + + # too many params + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=5, periods=6, freq=1.5) + + # mixed units + msg = "start, end, freq need to be type compatible" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timestamp("20130101"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timedelta("1 day"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timestamp("20130101"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) + + # invalid periods + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, periods="foo") + + # invalid start + msg = "start must be numeric or datetime-like, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start="foo", periods=10) + + # invalid end + msg = r"end must be numeric or datetime-like, got \(0, 1\]" + with pytest.raises(ValueError, match=msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = "freq must be numeric or convertible to DateOffset, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(start=Timestamp("20130101"), periods=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(end=Timedelta("1 day"), periods=10, freq="foo") + + # mixed tz + start = Timestamp("2017-01-01", tz="US/Eastern") + end = Timestamp("2017-01-07", tz="US/Pacific") + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 00000000..3b9de8d9 --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,209 @@ +from itertools import permutations + +import numpy as np +import pytest + +from pandas._libs.interval import IntervalTree +from pandas.compat import IS64 + +import pandas._testing as tm + + +def skipif_32bit(param): + """ + Skip parameters in a parametrize on 32bit systems. Specifically used + here to skip leaf_size parameters related to GH 23440. + """ + marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") + return pytest.param(param, marks=marks) + + +@pytest.fixture(scope="class", params=["int64", "float64", "uint64"]) +def dtype(request): + return request.param + + +@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) +def leaf_size(request): + """ + Fixture to specify IntervalTree leaf_size parameter; to be used with the + tree fixture. + """ + return request.param + + +@pytest.fixture( + params=[ + np.arange(5, dtype="int64"), + np.arange(5, dtype="uint64"), + np.arange(5, dtype="float64"), + np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), + ] +) +def tree(request, leaf_size): + left = request.param + return IntervalTree(left, left + 2, leaf_size=leaf_size) + + +class TestIntervalTree: + def test_get_indexer(self, tree): + result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) + expected = np.array([0, 4, -1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([3.0])) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) + tree = IntervalTree(left, right) + + result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_non_unique(self, tree): + indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) + + result = indexer[:1] + expected = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[1:3]) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[3:]) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) + tree = IntervalTree(left, right) + target = np.array([target_value], dtype=target_dtype) + + result_indexer, result_missing = tree.get_indexer_non_unique(target) + expected_indexer = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + expected_missing = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result_missing, expected_missing) + + def test_duplicates(self, dtype): + left = np.array([0, 0, 0], dtype=dtype) + tree = IntervalTree(left, left + 1) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + result = np.sort(indexer) + expected = np.array([0, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] + ) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype="float64") + found = x.astype("intp") + not_found = (-1 * np.ones(1000)).astype("intp") + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), + (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), + (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), + (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), + (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), + ], + ) + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping(self, closed, order, left, right, expected): + # GH 23309 + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + assert result is expected + + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping_endpoints(self, closed, order): + """shared endpoints are marked as overlapping""" + # GH 23309 + left, right = np.arange(3, dtype="int64"), np.arange(1, 4) + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + expected = closed == "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + (np.array([], dtype="int64"), np.array([], dtype="int64")), + (np.array([0], dtype="int64"), np.array([1], dtype="int64")), + (np.array([np.nan]), np.array([np.nan])), + (np.array([np.nan] * 3), np.array([np.nan] * 3)), + ], + ) + def test_is_overlapping_trivial(self, closed, left, right): + # GH 23309 + tree = IntervalTree(left, right, closed=closed) + assert tree.is_overlapping is False + + @pytest.mark.skipif(not IS64, reason="GH 23440") + def test_construction_overflow(self): + # GH 25485 + left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 + tree = IntervalTree(left, right) + + # pivot should be average of left/right medians + result = tree.root.pivot + expected = (50 + np.iinfo(np.int64).max) / 2 + assert result == expected + + @pytest.mark.xfail(not IS64, reason="GH 23440") + @pytest.mark.parametrize( + "left, right, expected", + [ + ([-np.inf, 1.0], [1.0, 2.0], 0.0), + ([-np.inf, -2.0], [-2.0, -1.0], -2.0), + ([-2.0, -1.0], [-1.0, np.inf], 0.0), + ([1.0, 2.0], [2.0, np.inf], 2.0), + ], + ) + def test_inf_bound_infinite_recursion(self, left, right, expected): + # GH 46658 + + tree = IntervalTree(left * 101, right * 101) + + result = tree.root.pivot + assert result == expected diff --git a/pandas/tests/indexes/interval/test_join.py b/pandas/tests/indexes/interval/test_join.py new file mode 100644 index 00000000..2f42c530 --- /dev/null +++ b/pandas/tests/indexes/interval/test_join.py @@ -0,0 +1,44 @@ +import pytest + +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm + + +@pytest.fixture +def range_index(): + return RangeIndex(3, name="range_index") + + +@pytest.fixture +def interval_index(): + return IntervalIndex.from_tuples( + [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index" + ) + + +def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = multi_index.join(interval_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = interval_index.join(multi_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_interval_to_another_intervalindex(interval_index): + # GH-45661 + flipped_interval_index = interval_index[::-1] + result = interval_index.join(flipped_interval_index) + + tm.assert_index_equal(result, interval_index) diff --git a/pandas/tests/indexes/interval/test_pickle.py b/pandas/tests/indexes/interval/test_pickle.py new file mode 100644 index 00000000..308a90e7 --- /dev/null +++ b/pandas/tests/indexes/interval/test_pickle.py @@ -0,0 +1,13 @@ +import pytest + +from pandas import IntervalIndex +import pandas._testing as tm + + +class TestPickle: + @pytest.mark.parametrize("closed", ["left", "right", "both"]) + def test_pickle_round_trip_closed(self, closed): + # https://github.com/pandas-dev/pandas/issues/35658 + idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py new file mode 100644 index 00000000..059b0b75 --- /dev/null +++ b/pandas/tests/indexes/interval/test_setops.py @@ -0,0 +1,202 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + IntervalIndex, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +def monotonic_index(start, end, dtype="int64", closed="right"): + return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) + + +def empty_index(dtype="int64", closed="right"): + return IntervalIndex(np.array([], dtype=dtype), closed=closed) + + +class TestIntervalIndex: + def test_union(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(0, 13, closed=closed) + result = index[::-1].union(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].union(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.union(index, sort=sort), index) + tm.assert_index_equal(index.union(index[:1], sort=sort), index) + + def test_union_empty_result(self, closed, sort): + # GH 19101: empty result, same dtype + index = empty_index(dtype="int64", closed=closed) + result = index.union(index, sort=sort) + tm.assert_index_equal(result, index) + + # GH 19101: empty result, different numeric dtypes -> common dtype is f8 + other = empty_index(dtype="float64", closed=closed) + result = index.union(other, sort=sort) + expected = other + tm.assert_index_equal(result, expected) + + other = index.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + other = empty_index(dtype="uint64", closed=closed) + result = index.union(other, sort=sort) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(5, 11, closed=closed) + result = index[::-1].intersection(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].intersection(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.intersection(index, sort=sort), index) + + # GH 26225: nested intervals + index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) + other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225 + index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) + other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(0, 2)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225: duplicate nan element + index = IntervalIndex([np.nan, np.nan]) + other = IntervalIndex([np.nan]) + expected = IntervalIndex([np.nan]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_intersection_empty_result(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + + # GH 19101: empty result, same dtype + other = monotonic_index(300, 314, closed=closed) + expected = empty_index(dtype="int64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different numeric dtypes -> common dtype is float64 + other = monotonic_index(300, 314, dtype="float64", closed=closed) + result = index.intersection(other, sort=sort) + expected = other[:0] + tm.assert_index_equal(result, expected) + + other = monotonic_index(300, 314, dtype="uint64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_duplicates(self): + # GH#38743 + index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) + other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_difference(self, closed, sort): + index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) + result = index.difference(index[:1], sort=sort) + expected = index[1:] + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, same dtype + result = index.difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.difference(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + result = index[1:].symmetric_difference(index[:-1], sort=sort) + expected = IntervalIndex([index[0], index[-1]]) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, same dtype + result = index.symmetric_difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.symmetric_difference(other, sort=sort) + expected = empty_index(dtype="float64", closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning") + @pytest.mark.parametrize( + "op_name", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_set_incompatible_types(self, closed, op_name, sort): + index = monotonic_index(0, 11, closed=closed) + set_op = getattr(index, op_name) + + # TODO: standardize return type of non-union setops type(self vs other) + # non-IntervalIndex + if op_name == "difference": + expected = index + else: + expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) + result = set_op(Index([1, 2, 3]), sort=sort) + tm.assert_index_equal(result, expected) + + # mixed closed -> cast to object + for other_closed in {"right", "left", "both", "neither"} - {closed}: + other = monotonic_index(0, 11, closed=other_closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19016: incompatible dtypes -> cast to object + other = interval_range(Timestamp("20180101"), periods=9, closed=closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/__init__.py b/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 00000000..3cc4fa47 --- /dev/null +++ b/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,77 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) + + +# Note: identical the "multi" entry in the top-level "index" fixture +@pytest.fixture +def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def idx_dup(): + # compare tests/indexes/multi/conftest.py + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 0, 1, 1]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def index_names(): + # names that match those in the idx fixture for testing equality of + # names assigned to the idx + return ["first", "second"] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is narrower than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + return MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ["a", "b", "dti_1", "dti_2", "dti_3"] + return MultiIndex.from_arrays(levels, names=names) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 00000000..629cd7ea --- /dev/null +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,260 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.api import UInt64Index + + +def test_shift(idx): + + # GH8083 test the base class for shift + msg = "This method is only implemented for DatetimeIndex, PeriodIndex and " + "TimedeltaIndex; Got type MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate_multiindex(): + # GH 34564 for MultiIndex level names check + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["L1", "L2"], + ) + + result = index.truncate(before=1) + assert "foo" not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + assert index.names == result.names + + msg = "after < before" + with pytest.raises(ValueError, match=msg): + index.truncate(3, 1) + + +# TODO: reshape + + +def test_reorder_levels(idx): + # this blows up + with pytest.raises(IndexError, match="^Too many levels"): + idx.reorder_levels([2, 1, 0]) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range("2011-01-01", freq="M", periods=3) + dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern") + pi = period_range("2011-01", freq="M", periods=3) + + mi = MultiIndex.from_arrays( + [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] + ) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ["a", "b", "c", "a", "b", "c"], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi), + ] + ) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays( + [ + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ] + ) + + res = mi.append(other) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, "x", "y", "z"], + [1.1, np.nan, 3.3, "x", "y", "z"], + ["a", "b", "c", "x", "y", "z"], + dti.append(Index(["x", "y", "z"])), + dti_tz.append(Index(["x", "y", "z"])), + pi.append(Index(["x", "y", "z"])), + ] + ) + tm.assert_index_equal(res, exp) + + +def test_iter(idx): + result = list(idx) + expected = [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ] + assert result == expected + + +def test_sub(idx): + + first = idx + + # - now raises (previously was set op difference) + msg = "cannot perform __sub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first - idx[-3:] + with pytest.raises(TypeError, match=msg): + idx[-3:] - first + with pytest.raises(TypeError, match=msg): + idx[-3:] - first.tolist() + msg = "cannot perform __rsub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first.tolist() - idx[-3:] + + +def test_map(idx): + # callable + index = idx + + result = index.map(lambda x: x) + tm.assert_index_equal(result, index) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx), + ], +) +def test_map_dictlike(idx, mapper): + + identity = mapper(idx.values, idx) + + # we don't infer to UInt64 for a dict + if isinstance(idx, UInt64Index) and isinstance(identity, dict): + expected = idx.astype("int64") + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda func: func.__name__, +) +def test_numpy_ufuncs(idx, func): + # test ufuncs of numpy. see: + # https://numpy.org/doc/stable/reference/ufuncs.html + + expected_exception = TypeError + msg = ( + "loop of ufunc does not support argument 0 of type tuple which " + f"has no callable {func.__name__} method" + ) + with pytest.raises(expected_exception, match=msg): + func(idx) + + +@pytest.mark.parametrize( + "func", + [np.isfinite, np.isinf, np.isnan, np.signbit], + ids=lambda func: func.__name__, +) +def test_numpy_type_funcs(idx, func): + msg = ( + f"ufunc '{func.__name__}' not supported for the input types, and the inputs " + "could not be safely coerced to any supported types according to " + "the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + func(idx) diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py new file mode 100644 index 00000000..29908537 --- /dev/null +++ b/pandas/tests/indexes/multi/test_astype.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas._testing as tm + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype("O") + tm.assert_copy(actual.levels, expected.levels) + tm.assert_copy(actual.codes, expected.codes) + assert actual.names == list(expected.names) + + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with pytest.raises(NotImplementedError, match=msg): + idx.astype("category") diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py new file mode 100644 index 00000000..d50a4405 --- /dev/null +++ b/pandas/tests/indexes/multi/test_compat.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def test_numeric_compat(idx): + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = "cannot perform __truediv__" + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + +@pytest.mark.parametrize("method", ["all", "any", "__invert__"]) +def test_logical_compat(idx, method): + msg = f"cannot perform {method}" + + with pytest.raises(TypeError, match=msg): + getattr(idx, method)() + + +def test_inplace_mutation_resets_values(): + levels = [["a", "b", "c"], [4]] + levels2 = [[1, 2, 3], ["a"]] + codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, codes=codes) + mi2 = MultiIndex(levels=levels2, codes=codes) + + # instantiating MultiIndex should not access/cache _.values + assert "_values" not in mi1._cache + assert "_values" not in mi2._cache + + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + # accessing .values should cache ._values + assert mi1._values is mi1._cache["_values"] + assert mi1.values is mi1._cache["_values"] + assert isinstance(mi1._cache["_values"], np.ndarray) + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't drop _values from _cache [implementation detail] + tm.assert_almost_equal(mi1._cache["_values"], vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should drop _values from _cache + with tm.assert_produces_warning(FutureWarning): + mi1.set_levels(levels2, inplace=True) + assert "_values" not in mi1._cache + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(1, "a")] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + + new_mi = mi2.set_codes(codes2) + assert "_values" not in new_mi._cache + new_values = new_mi.values + assert "_values" in new_mi._cache + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._cache["_values"], vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should drop _values from _cache, etc + with tm.assert_produces_warning(FutureWarning): + mi2.set_codes(codes2, inplace=True) + assert "_values" not in mi2._cache + tm.assert_almost_equal(mi2.values, new_values) + assert "_values" in mi2._cache diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py new file mode 100644 index 00000000..7fad59fc --- /dev/null +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -0,0 +1,839 @@ +from datetime import ( + date, + datetime, +) +import itertools + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +def test_constructor_single_level(): + result = MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + assert isinstance(result, MultiIndex) + expected = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["first"] + + +def test_constructor_no_levels(): + msg = "non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], codes=[]) + + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=[]) + with pytest.raises(TypeError, match=msg): + MultiIndex(codes=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], ["one", "two"]] + codes = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = (["foo"], ["bar"]) + msg = r"MultiIndex\.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=levels, codes=codes, names=names) + + # With .rename() + mi = MultiIndex( + levels=[[1, 2], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=("foo", "bar"), + ) + renamed = [["foor"], ["barr"]] + with pytest.raises(TypeError, match=msg): + mi.rename(names=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=msg): + mi.set_names(names=renamed) + + +def test_constructor_mismatched_codes_levels(idx): + codes = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + + msg = "Length of levels and codes must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, codes=codes) + + length_error = ( + r"On level 0, code max \(3\) >= length of level \(1\)\. " + "NOTE: this index is in an inconsistent state" + ) + label_error = r"Unequal code lengths: \[4, 2\]" + code_value_error = r"On level 0, code value \(-2\) < -1" + + # important to check that it's looking at the right thing. + with pytest.raises(ValueError, match=length_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with pytest.raises(ValueError, match=label_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) + + # external API + with pytest.raises(ValueError, match=length_error): + idx.copy().set_levels([["a"], ["b"]]) + + with pytest.raises(ValueError, match=label_error): + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) + + # test set_codes with verify_integrity=False + # the setting should not raise any value error + idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) + + # code value smaller than -1 + with pytest.raises(ValueError, match=code_value_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) + + +def test_na_levels(): + # GH26408 + # test if codes are re-assigned value -1 for levels + # with missing values (NaN, NaT, None) + result = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] + ) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] + ) + tm.assert_index_equal(result, expected) + + # verify set_levels and set_codes + result = MultiIndex( + levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] + ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] + ).set_codes([[0, -1, 1, 2, 3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +# ---------------------------------------------------------------------------- +# from_arrays +# ---------------------------------------------------------------------------- +def test_from_arrays(idx): + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) + assert result.levels[0].equals(Index([Timestamp("20130101")])) + assert result.levels[1].equals(Index(["a", "b"])) + + +def test_from_arrays_iterator(idx): + # GH 18434 + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) + + # invalid iterator input + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(0) + + +def test_from_arrays_tuples(idx): + arrays = tuple( + tuple(np.asarray(lev).take(level_codes)) + for lev, level_codes in zip(idx.levels, idx.codes) + ) + + # tuple of tuples as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + +@pytest.mark.parametrize( + ("idx1", "idx2"), + [ + ( + pd.period_range("2011-01-01", freq="D", periods=3), + pd.period_range("2015-01-01", freq="H", periods=3), + ), + ( + date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo"), + ), + ( + pd.timedelta_range("1 days", freq="D", periods=3), + pd.timedelta_range("2 hours", freq="H", periods=3), + ), + ], +) +def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2): + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx2 = date_range("2015-01-01 10:00", freq="H", periods=3) + idx3 = pd.timedelta_range("1 days", freq="D", periods=3) + idx4 = pd.period_range("2011-01-01", freq="D", periods=3) + + result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = MultiIndex.from_arrays( + [Series(idx1), Series(idx2), Series(idx3), Series(idx4)] + ) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) + + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) + assert isinstance(result, MultiIndex) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list("ABC")[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_sequence_of_arrays", + [ + 1, + [1], + [1, 2], + [[1], 2], + [1, [2]], + "a", + ["a"], + ["a", "b"], + [["a"], "b"], + (1,), + (1, 2), + ([1], 2), + (1, [2]), + "a", + ("a",), + ("a", "b"), + (["a"], "b"), + [(1,), 2], + [1, (2,)], + [("a",), "b"], + ((1,), 2), + (1, (2,)), + (("a",), "b"), + ], +) +def test_from_arrays_invalid_input(invalid_sequence_of_arrays): + msg = "Input must be a list / sequence of array-likes" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) + + +@pytest.mark.parametrize( + "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] +) +def test_from_arrays_different_lengths(idx1, idx2): + # see gh-13599 + msg = "^all arrays must be same length$" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) + + +def test_from_arrays_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b", "c"], name="bar") + + result = MultiIndex.from_arrays([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None + ) + + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_tuples +# ---------------------------------------------------------------------------- +def test_from_tuples(): + msg = "Cannot infer number of levels from empty list" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) + + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + # input non-iterables + msg = "Input must be a list / sequence of tuple-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=["a", "b"]) + expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + Index(li, name="abc") + with pytest.raises(ValueError, match=msg): + Index(li, name="a") + + +def test_from_tuples_with_tuple_label(): + # GH 15457 + expected = pd.DataFrame( + [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] + ).set_index(["a", "b"]) + idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) + result = pd.DataFrame([2, 3], columns=["c"], index=idx) + tm.assert_frame_equal(expected, result) + + +# ---------------------------------------------------------------------------- +# from_product +# ---------------------------------------------------------------------------- +def test_from_product_empty_zero_levels(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_product([]) + + +def test_from_product_empty_one_level(): + result = MultiIndex.from_product([[]], names=["A"]) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + +@pytest.mark.parametrize( + "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] +) +def test_from_product_empty_two_levels(first, second): + names = ["A", "B"] + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("N", list(range(4))) +def test_from_product_empty_three_levels(N): + # GH12258 + names = ["A", "B", "C"] + lvl2 = list(range(N)) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] +) +def test_from_product_invalid_input(invalid_input): + msg = r"Input must be a list / sequence of iterables|Input must be list-like" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(iterables=invalid_input) + + +def test_from_product_datetimeindex(): + dt_index = date_range("2000-01-01", periods=2) + mi = MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike( + [ + (1, Timestamp("2000-01-01")), + (1, Timestamp("2000-01-02")), + (2, Timestamp("2000-01-01")), + (2, Timestamp("2000-01-02")), + ] + ) + tm.assert_numpy_array_equal(mi.values, etalon) + + +def test_from_product_rangeindex(): + # RangeIndex is preserved by factorize, so preserved in levels + rng = Index(range(5)) + other = ["a", "b"] + mi = MultiIndex.from_product([rng, other]) + tm.assert_index_equal(mi._levels[0], rng, exact=True) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) +def test_from_product_index_series_categorical(ordered, f): + # GH13743 + first = ["foo", "bar"] + + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) + expected = pd.CategoricalIndex( + list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered + ) + + result = MultiIndex.from_product([first, f(idx)]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(0) + + +@pytest.mark.parametrize( + "a, b, expected_names", + [ + ( + Series([1, 2, 3], name="foo"), + Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infer_names(a, b, expected_names): + # GH27292 + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=expected_names, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b"], name="bar") + + result = MultiIndex.from_product([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_readonly(): + # GH#15286 passing read-only array to from_product + a = np.array(range(3)) + b = ["a", "b"] + expected = MultiIndex.from_product([a, b]) + + a.setflags(write=False) + result = MultiIndex.from_product([a, b]) + tm.assert_index_equal(result, expected) + + +def test_create_index_existing_name(idx): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = idx + index.names = ["foo", "bar"] + result = Index(index) + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ) + ) + tm.assert_index_equal(result, expected) + + result = Index(index, name="A") + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + name="A", + ) + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_frame +# ---------------------------------------------------------------------------- +def test_from_frame(): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] + ) + expected = MultiIndex.from_tuples( + [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] + ) + result = MultiIndex.from_frame(df) + tm.assert_index_equal(expected, result) + + +@pytest.mark.parametrize( + "non_frame", + [ + Series([1, 2, 3, 4]), + [1, 2, 3, 4], + [[1, 2], [3, 4], [5, 6]], + Index([1, 2, 3, 4]), + np.array([[1, 2], [3, 4], [5, 6]]), + 27, + ], +) +def test_from_frame_error(non_frame): + # GH 22420 + with pytest.raises(TypeError, match="Input must be a DataFrame"): + MultiIndex.from_frame(non_frame) + + +def test_from_frame_dtype_fidelity(): + # GH 22420 + df = pd.DataFrame( + { + "dates": date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + original_dtypes = df.dtypes.to_dict() + + expected_mi = MultiIndex.from_arrays( + [ + date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + mi = MultiIndex.from_frame(df) + mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + tm.assert_index_equal(expected_mi, mi) + assert original_dtypes == mi_dtypes + + +@pytest.mark.parametrize( + "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] +) +def test_from_frame_valid_names(names_in, names_out): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + mi = MultiIndex.from_frame(df, names=names_in) + assert mi.names == names_out + + +@pytest.mark.parametrize( + "names,expected_error_msg", + [ + ("bad_input", "Names should be list-like for a MultiIndex"), + (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), + ], +) +def test_from_frame_invalid_names(names, expected_error_msg): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + with pytest.raises(ValueError, match=expected_error_msg): + MultiIndex.from_frame(df, names=names) + + +def test_index_equal_empty_iterable(): + # #16844 + a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) + b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1 + ) + + +def test_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" + ) + idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = np.datetime64("today") + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + # but NOT date objects, matching Index behavior + date4 = date.today() + index = MultiIndex.from_product([[date4], [date2]]) + assert not isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +def test_constructor_with_tz(): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + +def test_multiindex_inference_consistency(): + # check that inference behavior matches the base class + + v = date.today() + + arr = [v, v] + + idx = Index(arr) + assert idx.dtype == object + + mi = MultiIndex.from_arrays([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_product([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_tuples([(x,) for x in arr]) + lev = mi.levels[0] + assert lev.dtype == object + + +def test_dtype_representation(): + # GH#46900 + pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")]) + result = pmidx.dtypes + expected = Series( + ["int64", "object"], index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]) + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 00000000..3c2ca045 --- /dev/null +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,164 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + + +def test_to_numpy(idx): + result = idx.to_numpy() + exp = idx.values + tm.assert_numpy_array_equal(result, exp) + + +def test_to_frame(): + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with pytest.raises(TypeError, match=msg): + index.to_frame(name="first") + + msg = "'name' should have same length as number of levels on index." + with pytest.raises(ValueError, match=msg): + index.to_frame(name=["first"]) + + # Tests for datetime index + index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + { + 0: np.repeat(np.arange(5, dtype="int64"), 3), + 1: np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame( + { + "first": np.repeat(np.arange(5, dtype="int64"), 3), + "second": np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_frame_dtype_fidelity(): + # GH 22420 + mi = MultiIndex.from_arrays( + [ + pd.date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + expected_df = DataFrame( + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + df = mi.to_frame(index=False) + df_dtypes = df.dtypes.to_dict() + + tm.assert_frame_equal(df, expected_df) + assert original_dtypes == df_dtypes + + +def test_to_frame_resulting_column_order(): + # GH 22420 + expected = ["z", 0, "a"] + mi = MultiIndex.from_arrays( + [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected + ) + result = mi.to_frame().columns.tolist() + assert result == expected + + +def test_to_frame_duplicate_labels(): + # GH 45245 + data = [(1, 2), (3, 4)] + names = ["a", "a"] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=names) + tm.assert_frame_equal(result, expected) + + names = [None, 0] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=[0, 0]) + tm.assert_frame_equal(result, expected) + + +def test_to_flat_index(idx): + expected = pd.Index( + ( + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ), + tupleize_cols=False, + ) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 00000000..2b64845c --- /dev/null +++ b/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,118 @@ +from copy import ( + copy, + deepcopy, +) + +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.codes, original.codes) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(idx): + i_copy = idx.copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_shallow_copy(idx): + i_copy = idx._view() + + assert_multiindex_copied(i_copy, idx) + + +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) + + +@pytest.mark.parametrize("func", [copy, deepcopy]) +def test_copy_and_deepcopy(func): + + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = func(idx) + assert idx_copy is not idx + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +def test_copy_method(deep): + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(deep=deep) + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "kwarg, value", + [ + ("names", ["third", "fourth"]), + ], +) +def test_copy_method_kwargs(deep, kwarg, value): + # gh-12309: Check that the "name" argument as well other kwargs are honored + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(**{kwarg: value, "deep": deep}) + assert getattr(idx_copy, kwarg) == value + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "param_name, param_value", + [ + ("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]), + ("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]), + ], +) +def test_copy_deprecated_parameters(deep, param_name, param_value): + # gh-36685 + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + with tm.assert_produces_warning(FutureWarning): + idx_copy = idx.copy(deep=deep, **{param_name: param_value}) + + assert [list(i) for i in getattr(idx_copy, param_name)] == param_value + + +def test_copy_deep_false_retains_id(): + # GH#47878 + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + + res = idx.copy(deep=False) + assert res._id is idx._id diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 00000000..47959ec0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,193 @@ +import warnings + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_drop(idx): + dropped = idx.drop([("foo", "two"), ("qux", "one")]) + + index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) + dropped2 = idx.drop(index) + + expected = idx[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = idx.drop(["bar"]) + expected = idx[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop("foo") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(index) + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(["foo", "two"]) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(mixed_index) + + # error='ignore' + dropped = idx.drop(index, errors="ignore") + expected = idx[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(["foo", "two"], errors="ignore") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = idx.drop(["foo", ("qux", "one")]) + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ["foo", ("qux", "one"), "two"] + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(mixed_index) + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(idx): + index = idx[idx.get_loc("foo")] + dropped = index.droplevel(0) + assert dropped.name == "second" + + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + dropped = index.droplevel(0) + assert dropped.names == ("two", "three") + + dropped = index.droplevel("two") + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + + dropped = index[:2].droplevel(["three", "one"]) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + msg = ( + "Cannot remove 3 levels from an index with 3 levels: " + "at least one level must be left" + ) + with pytest.raises(ValueError, match=msg): + index[:2].droplevel(["one", "two", "three"]) + + with pytest.raises(KeyError, match="'Level four not found'"): + index[:2].droplevel(["one", "four"]) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) + assert lexsorted_mi._is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + df = df.pivot_table(index="a", columns=["b", "c"], values="d") + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi._is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_nan_in_index(nulls_fixture): + # GH#18853 + mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) + msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(pd.Timestamp("2001"), level="date") + + +def test_drop_with_non_monotonic_duplicates(): + # GH#33494 + mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) + result = mi.drop((1, 2)) + expected = MultiIndex.from_tuples([(2, 3)]) + tm.assert_index_equal(result, expected) + + +def test_single_level_drop_partially_missing_elements(): + # GH 37820 + + mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) + msg = r"labels \[4\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(4, level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([1, 4], level=0) + msg = r"labels \[nan\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan], level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, 2, 3], level=0) + + mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) + msg = r"labels \['a'\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, "a"], level=0) + + +def test_droplevel_multiindex_one_level(): + # GH#37208 + index = MultiIndex.from_tuples([(2,)], names=("b",)) + result = index.droplevel([]) + expected = Index([2], name="b") + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py new file mode 100644 index 00000000..6ec4d1fa --- /dev/null +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -0,0 +1,339 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas._libs import hashtable + +from pandas import ( + DatetimeIndex, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("names", [None, ["first", "second"]]) +def test_unique(names): + mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) + + res = mi.unique() + exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"] + ) + idx2 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"], + tz="Asia/Tokyo", + ) + result = MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"]) + eidx2 = DatetimeIndex( + ["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo" + ) + exp = MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize("level", [0, "first", 1, "second"]) +def test_unique_level(idx, level): + # GH #17896 - with level= argument + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = MultiIndex.from_arrays([[], []], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + +def test_duplicate_multiindex_codes(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + msg = r"Level values must be unique: \[[A', ]+\] on level 0" + with pytest.raises(ValueError, match=msg): + mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + msg = r"Level values must be unique: \[[AB', ]+\] on level 0" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning): + mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True) + + +@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) +def test_duplicate_level_names(names): + # GH18872, GH19029 + mi = MultiIndex.from_product([[0, 1]] * 3, names=names) + assert mi.names == names + + # With .rename() + mi = MultiIndex.from_product([[0, 1]] * 3) + mi = mi.rename(names) + assert mi.names == names + + # With .rename(., level=) + mi.rename(names[1], level=1, inplace=True) + mi = mi.rename([names[0], names[2]], level=[0, 2]) + assert mi.names == names + + +def test_duplicate_meta_data(): + # GH 10115 + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + + for idx in [ + mi, + mi.set_names([None, None]), + mi.set_names([None, "Num"]), + mi.set_names(["Upper", "Num"]), + ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_has_duplicates(idx, idx_dup): + # see fixtures + assert idx.is_unique is True + assert idx.has_duplicates is False + assert idx_dup.is_unique is False + assert idx_dup.has_duplicates is True + + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + assert mi.is_unique is False + assert mi.has_duplicates is True + + # single instance of NaN + mi_nan = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]] + ) + assert mi_nan.is_unique is True + assert mi_nan.has_duplicates is False + + # multiple instances of NaN + mi_nan_dup = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]] + ) + assert mi_nan_dup.is_unique is False + assert mi_nan_dup.has_duplicates is True + + +def test_has_duplicates_from_tuples(): + # GH 9075 + t = [ + ("x", "out", "z", 5, "y", "in", "z", 169), + ("x", "out", "z", 7, "y", "in", "z", 119), + ("x", "out", "z", 9, "y", "in", "z", 135), + ("x", "out", "z", 13, "y", "in", "z", 145), + ("x", "out", "z", 14, "y", "in", "z", 158), + ("x", "out", "z", 16, "y", "in", "z", 122), + ("x", "out", "z", 17, "y", "in", "z", 160), + ("x", "out", "z", 18, "y", "in", "z", 180), + ("x", "out", "z", 20, "y", "in", "z", 143), + ("x", "out", "z", 21, "y", "in", "z", 128), + ("x", "out", "z", 22, "y", "in", "z", 129), + ("x", "out", "z", 25, "y", "in", "z", 111), + ("x", "out", "z", 28, "y", "in", "z", 114), + ("x", "out", "z", 29, "y", "in", "z", 121), + ("x", "out", "z", 31, "y", "in", "z", 126), + ("x", "out", "z", 32, "y", "in", "z", 155), + ("x", "out", "z", 33, "y", "in", "z", 123), + ("x", "out", "z", 12, "y", "in", "z", 144), + ] + + mi = MultiIndex.from_tuples(t) + assert not mi.has_duplicates + + +@pytest.mark.parametrize("nlevels", [4, 8]) +@pytest.mark.parametrize("with_nulls", [True, False]) +def test_has_duplicates_overflow(nlevels, with_nulls): + # handle int64 overflow if possible + # no overflow with 4 + # overflow possible with 8 + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + codes[500] = -1 # common nan value + codes = [codes.copy() for i in range(nlevels)] + for i in range(nlevels): + codes[i][500 + i - nlevels // 2] = -1 + + codes += [np.array([-1, 1]).repeat(500)] + else: + codes = [codes] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + mi = MultiIndex(levels=levels, codes=codes) + assert not mi.has_duplicates + + # with a dup + if with_nulls: + + def f(a): + return np.insert(a, 1000, a[0]) + + codes = list(map(f, codes)) + mi = MultiIndex(levels=levels, codes=codes) + else: + values = mi.values.tolist() + mi = MultiIndex.from_tuples(values + [values[0]]) + + assert mi.has_duplicates + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", np.array([False, False, False, True, True, False])), + ("last", np.array([False, True, True, False, False, False])), + (False, np.array([False, True, True, True, True, False])), + ], +) +def test_duplicated(idx_dup, keep, expected): + result = idx_dup.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.arm_slow +def test_duplicated_large(keep): + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + codes = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, codes=codes) + + result = mi.duplicated(keep=keep) + expected = hashtable.duplicated(mi.values, keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +def test_duplicated2(): + # TODO: more informative test name + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + codes = product(range(-1, n), range(-1, m)) + mi = MultiIndex( + levels=[list("abcde")[:n], list("WXYZ")[:m]], + codes=np.random.permutation(list(codes)).T, + ) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal( + mi.duplicated(), np.zeros(len(mi), dtype="bool") + ) + + +def test_duplicated_drop_duplicates(): + # GH#4060 + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) + + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(), expected) + + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) + + expected = np.array([True, False, False, True, False, False]) + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) + + +@pytest.mark.parametrize( + "dtype", + [ + np.complex64, + np.complex128, + ], +) +def test_duplicated_series_complex_numbers(dtype): + # GH 17927 + expected = Series( + [False, False, False, True, False, False, False, True, False, True], + dtype=bool, + ) + result = Series( + [ + np.nan + np.nan * 1j, + 0, + 1j, + 1j, + 1, + 1 + 1j, + 1 + 2j, + 1 + 1j, + np.nan, + np.nan + np.nan * 1j, + ], + dtype=dtype, + ).duplicated() + tm.assert_series_equal(result, expected) + + +def test_multi_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) + msg = ( + "In a future version of pandas all arguments of " + "MultiIndex.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.drop_duplicates("last") + expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) + tm.assert_index_equal(expected, result) diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 00000000..c6567b86 --- /dev/null +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,298 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +def test_equals(idx): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.equals(idx.to_flat_index()) + assert idx.equals(idx.to_flat_index().astype("category")) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(Series(idx)) + + +def test_equals_op(idx): + # GH9947, GH10637 + index_a = idx + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_compare_tuple(): + # GH#21517 + mi = MultiIndex.from_product([[1, 2]] * 2) + + all_false = np.array([False, False, False, False]) + + result = mi == mi[0] + expected = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = mi != mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi < mi[0] + tm.assert_numpy_array_equal(result, all_false) + + result = mi <= mi[0] + tm.assert_numpy_array_equal(result, expected) + + result = mi > mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi >= mi[0] + tm.assert_numpy_array_equal(result, ~all_false) + + +def test_compare_tuple_strs(): + # GH#34180 + + mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) + + result = mi == ("c", "a") + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = mi == ("c",) + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) + + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) + + # different number of levels + index = MultiIndex( + levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + + index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + assert not idx.equal_levels(index) + + # some of the labels are different + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + + +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() + assert mi.identical(mi2) + + mi = mi.set_names(["new1", "new2"]) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(["new1", "new2"]) + assert mi.identical(mi2) + + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + mi3 = Index(mi.tolist(), names=mi.names) + + msg = r"Unexpected keyword arguments {'names'}" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + Index(mi.tolist(), names=mi.names, tupleize_cols=False) + + mi4 = Index(mi.tolist(), tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(idx): + # GH9785 + assert (idx == idx).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_equals_missing_values_differently_sorted(): + # GH#38439 + mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) + assert not mi1.equals(mi2) + + mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + assert mi1.equals(mi2) + + +def test_is_(): + mi = MultiIndex.from_tuples(zip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert not mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([list(range(10)), list(range(10))]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + with tm.assert_produces_warning(FutureWarning): + mi4.set_levels([list(range(10)), list(range(10))], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + with tm.assert_produces_warning(FutureWarning): + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_is_all_dates(idx): + assert not idx._is_all_dates + + +def test_is_numeric(idx): + # MultiIndex is never numeric + assert not idx.is_numeric() + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = Series([True, True]) + result = Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = Series([False, False]) + result = Series(midx > midx) + tm.assert_series_equal(result, expected) + + +def test_equals_ea_int_regular_int(): + # GH#46026 + mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]]) + assert not mi1.equals(mi2) + assert not mi2.equals(mi1) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py new file mode 100644 index 00000000..238a3e78 --- /dev/null +++ b/pandas/tests/indexes/multi/test_formats.py @@ -0,0 +1,229 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) + + +def test_format(idx): + idx.format() + idx[:0].format() + + +def test_format_integer_names(): + index = MultiIndex( + levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] + ) + index.format(names=True) + + +def test_format_sparse_config(idx): + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + # GH1538 + with pd.option_context("display.multi_sparse", False): + result = idx.format() + assert result[1] == "foo two" + + warnings.filters = warn_filters + + +def test_format_sparse_display(): + index = MultiIndex( + levels=[[0, 1], [0, 1], [0, 1], [0]], + codes=[ + [0, 0, 0, 1, 1, 1], + [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0], + ], + ) + + result = index.format() + assert result[3] == "1 0 0 0" + + +def test_repr_with_unicode_data(): + with pd.option_context("display.encoding", "UTF-8"): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip_raises(): + mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + eval(repr(mi)) + + +def test_unicode_string_with_unicode(): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + str(idx) + + +def test_repr_max_seq_item_setting(idx): + # GH10182 + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + +class TestRepr: + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + repr(index.get_level_values(1)) + + def test_repr_max_seq_items_equal_to_n(self, idx): + # display.max_seq_items == n + with pd.option_context("display.max_seq_items", 6): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([('foo', 'one')], + names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + with pd.option_context("display.max_seq_items", 5): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'], length=6)""" + assert result == expected + + # display.max_seq_items == 1 + with pd.option_context("display.max_seq_items", 1): + result = idx.__repr__() + expected = """\ +MultiIndex([... + ('qux', 'two')], + names=['first', ...], length=6)""" + assert result == expected + + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa:E501 + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" + assert result == expected diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py new file mode 100644 index 00000000..bab6481f --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -0,0 +1,125 @@ +import numpy as np + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + MultiIndex, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestGetLevelValues: + def test_get_level_values_box_datetime64(self): + + dates = date_range("1/1/2000", periods=4) + levels = [dates, [0, 1]] + codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, codes=codes) + + assert isinstance(index.get_level_values(0)[0], Timestamp) + + +def test_get_level_values(idx): + result = idx.get_level_values(0) + expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first") + tm.assert_index_equal(result, expected) + assert result.name == "first" + + result = idx.get_level_values("first") + expected = idx.get_level_values(0) + tm.assert_index_equal(result, expected) + + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])], + codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])], + ) + + exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"]) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) + + +def test_get_level_values_all_na(): + # GH#17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = Index(["a", np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_int_with_na(): + # GH#17924 + arrays = [["a", "b", "b"], [1, np.nan, 2]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], [np.nan, np.nan, 2]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = Index(["a", np.nan, 1]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) + + arrays = [[], []] + index = MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_when_periods(): + # GH33131. See also discussion in GH32669. + # This test can probably be removed when PeriodIndex._engine is removed. + from pandas import ( + Period, + PeriodIndex, + ) + + idx = MultiIndex.from_arrays( + [PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")] + ) + idx2 = MultiIndex.from_arrays( + [idx._get_level_values(level) for level in range(idx.nlevels)] + ) + assert all(x.is_monotonic_increasing for x in idx2.levels) + + +def test_values_loses_freq_of_underlying_index(): + # GH#49054 + idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BM")) + expected = idx.copy(deep=True) + idx2 = Index([1, 2, 3]) + midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]]) + midx.values + assert idx.freq is not None + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py new file mode 100644 index 00000000..42cf0168 --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -0,0 +1,498 @@ +import numpy as np +import pytest + +from pandas.compat import PY311 + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + MultiIndex, +) +import pandas._testing as tm + + +def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + msg = "Too many levels: Index has only 2 levels, not 3" + with pytest.raises(IndexError, match=msg): + idx._get_level_number(2) + with pytest.raises(KeyError, match="Level fourth not found"): + idx._get_level_number("fourth") + + +def test_get_dtypes(): + # Test MultiIndex.dtypes (# Gh37062) + idx_multitype = MultiIndex.from_product( + [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], + names=["int", "string", "dt"], + ) + expected = pd.Series( + { + "int": np.dtype("int64"), + "string": np.dtype("O"), + "dt": DatetimeTZDtype(tz="utc"), + } + ) + tm.assert_series_equal(expected, idx_multitype.dtypes) + + +def test_get_dtypes_no_level_name(): + # Test MultiIndex.dtypes (# GH38580 ) + idx_multitype = MultiIndex.from_product( + [ + [1, 2, 3], + ["a", "b", "c"], + pd.date_range("20200101", periods=2, tz="UTC"), + ], + ) + expected = pd.Series( + { + "level_0": np.dtype("int64"), + "level_1": np.dtype("O"), + "level_2": DatetimeTZDtype(tz="utc"), + } + ) + tm.assert_series_equal(expected, idx_multitype.dtypes) + + +def test_get_dtypes_duplicate_level_names(): + # Test MultiIndex.dtypes with non-unique level names (# GH45174) + result = MultiIndex.from_product( + [ + [1, 2, 3], + ["a", "b", "c"], + pd.date_range("20200101", periods=2, tz="UTC"), + ], + names=["A", "A", "A"], + ).dtypes + expected = pd.Series( + [np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")], + index=["A", "A", "A"], + ) + tm.assert_series_equal(result, expected) + + +def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + with pytest.raises(IndexError, match="Too many levels"): + frame.index._get_level_number(2) + with pytest.raises(IndexError, match="not a valid level number"): + frame.index._get_level_number(-3) + + +def test_set_name_methods(idx, index_names): + # so long as these are synonyms, we don't need to test set_names + assert idx.rename == idx.set_names + new_names = [name + "SUFFIX" for name in index_names] + ind = idx.set_names(new_names) + assert idx.names == index_names + assert ind.names == new_names + msg = "Length of names must match number of levels in MultiIndex" + with pytest.raises(ValueError, match=msg): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + assert res is None + assert ind.names == new_names2 + + # set names for specific level (# GH7792) + ind = idx.set_names(new_names[0], level=0) + assert idx.names == index_names + assert ind.names == [new_names[0], index_names[1]] + + res = ind.set_names(new_names2[0], level=0, inplace=True) + assert res is None + assert ind.names == [new_names2[0], index_names[1]] + + # set names for multiple levels + ind = idx.set_names(new_names, level=[0, 1]) + assert idx.names == index_names + assert ind.names == new_names + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + assert res is None + assert ind.names == new_names2 + + +def test_set_levels_codes_directly(idx): + # setting levels/codes directly raises AttributeError + + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + msg = "Can't set attribute" + with pytest.raises(AttributeError, match=msg): + idx.levels = new_levels + + msg = ( + "property 'codes' of 'MultiIndex' object has no setter" + if PY311 + else "can't set attribute" + ) + with pytest.raises(AttributeError, match=msg): + idx.codes = new_codes + + +def test_set_levels(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + # level changing [w/o mutation] + ind2 = idx.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = idx.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = idx.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # illegal level changing should not change levels + # GH 13754 + original_index = idx.copy() + for inplace in [True, False]: + with pytest.raises(ValueError, match="^On"): + with tm.assert_produces_warning(FutureWarning): + idx.set_levels(["c"], level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(ValueError, match="^On"): + with tm.assert_produces_warning(FutureWarning): + idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + with pytest.raises(TypeError, match="^Levels"): + with tm.assert_produces_warning(FutureWarning): + idx.set_levels("c", level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(TypeError, match="^Codes"): + with tm.assert_produces_warning(FutureWarning): + idx.set_codes(1, level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + +def test_set_codes(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + # changing codes w/o mutation + ind2 = idx.set_codes(new_codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # changing label w/ mutation + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + + # codes changing specific level w/o mutation + ind2 = idx.set_codes(new_codes[0], level=0) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.set_codes(new_codes[1], level=1) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels w/o mutation + ind2 = idx.set_codes(new_codes, level=[0, 1]) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing specific level w/ mutation + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels [w/ mutation] + ind2 = idx.copy() + with tm.assert_produces_warning(FutureWarning): + inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing for levels of different magnitude of categories + ind = MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_codes = range(129, -1, -1) + expected = MultiIndex.from_tuples([(0, i) for i in new_codes]) + + # [w/o mutation] + result = ind.set_codes(codes=new_codes, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + with tm.assert_produces_warning(FutureWarning): + result.set_codes(codes=new_codes, level=1, inplace=True) + assert result.equals(expected) + + +def test_set_levels_codes_names_bad_input(idx): + levels, codes = idx.levels, idx.codes + names = idx.names + + with pytest.raises(ValueError, match="Length of levels"): + idx.set_levels([levels[0]]) + + with pytest.raises(ValueError, match="Length of codes"): + idx.set_codes([codes[0]]) + + with pytest.raises(ValueError, match="Length of names"): + idx.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list-like"): + idx.set_names(names[0]) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_levels(levels, level=0) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_codes(codes, level=0) + + # should have equal lengths + with pytest.raises(ValueError, match="Length of names"): + idx.set_names(names[0], level=[0, 1]) + + with pytest.raises(TypeError, match="Names must be a"): + idx.set_names(names, level=0) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_names_with_nlevel_1(inplace): + # GH 21149 + # Ensure that .set_names for MultiIndex with + # nlevels == 1 does not raise any errors + expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"]) + m = MultiIndex.from_product([[0, 1]]) + result = m.set_names("first", level=0, inplace=inplace) + + if inplace: + result = m + + tm.assert_index_equal(result, expected) + + +def test_multi_set_names_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_product([["python", "cobra"], [2018, 2019]]) + msg = ( + "In a future version of pandas all arguments of MultiIndex.set_names " + "except for the argument 'names' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_names(["kind", "year"], None) + expected = MultiIndex( + levels=[["python", "cobra"], [2018, 2019]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["kind", "year"], + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_set_levels_categorical(ordered): + # GH13854 + index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) + + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, level=0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex( + list("bacb"), categories=cidx.categories, ordered=cidx.ordered + ) + tm.assert_index_equal(result_lvl, expected_lvl) + + +def test_set_value_keeps_names(): + # motivating example from #3742 + lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"] + lev2 = ["1", "2", "3"] * 2 + idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) + df = pd.DataFrame( + np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx + ) + df = df.sort_index() + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + df.at[("grethe", "4"), "one"] = 99.34 + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + + +def test_set_levels_with_iterable(): + # GH23273 + sizes = [1, 2, 3] + colors = ["black"] * 3 + index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"]) + + result = index.set_levels(map(int, ["3", "2", "1"]), level="size") + + expected_sizes = [3, 2, 1] + expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_codes_inplace_deprecated(idx, inplace): + new_codes = idx.codes[1][::-1] + + with tm.assert_produces_warning(FutureWarning): + idx.set_codes(codes=new_codes, level=1, inplace=inplace) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_levels_inplace_deprecated(idx, inplace): + new_level = idx.levels[1].copy() + + with tm.assert_produces_warning(FutureWarning): + idx.set_levels(levels=new_level, level=1, inplace=inplace) + + +def test_set_levels_pos_args_deprecation(): + # https://github.com/pandas-dev/pandas/issues/41485 + idx = MultiIndex.from_tuples( + [ + (1, "one"), + (2, "one"), + (3, "one"), + ], + names=["foo", "bar"], + ) + msg = ( + r"In a future version of pandas all arguments of MultiIndex.set_levels except " + r"for the argument 'levels' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_levels(["a", "b", "c"], 0) + expected = MultiIndex.from_tuples( + [ + ("a", "one"), + ("b", "one"), + ("c", "one"), + ], + names=["foo", "bar"], + ) + tm.assert_index_equal(result, expected) + + +def test_set_codes_pos_args_depreciation(idx): + # https://github.com/pandas-dev/pandas/issues/41485 + msg = ( + r"In a future version of pandas all arguments of MultiIndex.set_codes except " + r"for the argument 'codes' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_codes([[0, 0, 1, 2, 3, 3], [0, 1, 0, 1, 0, 1]], [0, 1]) + expected = MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + names=["first", "second"], + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py new file mode 100644 index 00000000..9626352a --- /dev/null +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -0,0 +1,900 @@ +from datetime import timedelta +import re + +import numpy as np +import pytest + +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, +) + +import pandas as pd +from pandas import ( + Categorical, + Index, + MultiIndex, + date_range, +) +import pandas._testing as tm + + +class TestSliceLocs: + def test_slice_locs_partial(self, idx): + sorted_idx, _ = idx.sortlevel(0) + + result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) + assert result == (1, 5) + + result = sorted_idx.slice_locs(None, ("qux", "one")) + assert result == (0, 5) + + result = sorted_idx.slice_locs(("foo", "two"), None) + assert result == (1, len(sorted_idx)) + + result = sorted_idx.slice_locs("bar", "baz") + assert result == (2, 4) + + def test_slice_locs(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice( + *idx.slice_locs( + df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30), + ) + ) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + def test_slice_locs_with_type_mismatch(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[1], (16, "a")) + + def test_slice_locs_not_sorted(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + def test_slice_locs_not_contained(self): + # some searchsorted action + + index = MultiIndex( + levels=[[0, 2, 4, 6], [0, 2, 4]], + codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], + ) + + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(1, 5) + assert result == (3, 6) + + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(2, 5) + assert result == (3, 6) + + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) + + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) + + @pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), + ], + ) + def test_slice_locs_with_missing_value( + self, index_arr, expected, start_idx, end_idx + ): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_locs(start=start_idx, end=end_idx) + assert result == expected + + +class TestPutmask: + def test_putmask_with_wrong_mask(self, idx): + # GH18368 + + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) + 1, np.bool_), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) - 1, np.bool_), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask("foo", 1) + + def test_putmask_multiindex_other(self): + # GH#43212 `value` is also a MultiIndex + + left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)]) + right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)]) + mask = np.array([True, True, False]) + + result = left.putmask(mask, right) + + expected = MultiIndex.from_tuples([right[0], right[1], left[2]]) + tm.assert_index_equal(result, expected) + + +class TestGetIndexer: + def test_get_indexer(self): + major_axis = Index(np.arange(4)) + minor_axis = Index(np.arange(2)) + + major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method="pad") + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="pad") + tm.assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method="ffill") + tm.assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method="backfill") + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="backfill") + tm.assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method="bfill") + tm.assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(list(range(10)) + list(range(10))) + idx2 = Index(list(range(20))) + + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(idx2) + + def test_get_indexer_nearest(self): + midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = ( + "method='nearest' not implemented yet for MultiIndex; " + "see GitHub issue 9365" + ) + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="nearest") + msg = "tolerance not implemented yet for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="pad", tolerance=2) + + def test_get_indexer_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) + + @pytest.mark.parametrize( + "index_arr,labels,expected", + [ + ( + [[1, np.nan, 2], [3, 4, 5]], + [1, np.nan, 2], + np.array([-1, -1, -1], dtype=np.intp), + ), + ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), + ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), + ( + [[1, 2, 3], [np.nan, 4, 5]], + [np.nan, 4, 5], + np.array([-1, -1, -1], dtype=np.intp), + ), + ], + ) + def test_get_indexer_with_missing_value(self, index_arr, labels, expected): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.get_indexer(labels) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_methods(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # test getting an indexer for another index with different methods + # confirms that getting an indexer without a filling method, getting an + # indexer and backfilling, and getting an indexer and padding all behave + # correctly in the case where all of the target values fall in between + # several levels in the MultiIndex into which they are getting an indexer + # + # visually, the MultiIndexes used in this test are: + # mult_idx_1: + # 0: -1 0 + # 1: 2 + # 2: 3 + # 3: 4 + # 4: 0 0 + # 5: 2 + # 6: 3 + # 7: 4 + # 8: 1 0 + # 9: 2 + # 10: 3 + # 11: 4 + # + # mult_idx_2: + # 0: 0 1 + # 1: 3 + # 2: 4 + mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]]) + mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]]) + + indexer = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, 6, 7], dtype=indexer.dtype) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill") + expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + # ensure the legacy "bfill" option functions identically to "backfill" + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") + expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad") + expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + # ensure the legacy "ffill" option functions identically to "pad" + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") + expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_three_or_more_levels(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests get_indexer() on MultiIndexes with 3+ levels + # visually, these are + # mult_idx_1: + # 0: 1 2 5 + # 1: 7 + # 2: 4 5 + # 3: 7 + # 4: 6 5 + # 5: 7 + # 6: 3 2 5 + # 7: 7 + # 8: 4 5 + # 9: 7 + # 10: 6 5 + # 11: 7 + # + # mult_idx_2: + # 0: 1 1 8 + # 1: 1 5 9 + # 2: 1 6 7 + # 3: 2 1 6 + # 4: 2 7 6 + # 5: 2 7 8 + # 6: 3 6 8 + mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]]) + mult_idx_2 = MultiIndex.from_tuples( + [ + (1, 1, 8), + (1, 5, 9), + (1, 6, 7), + (2, 1, 6), + (2, 7, 7), + (2, 7, 8), + (3, 6, 8), + ] + ) + # sanity check + assert mult_idx_1.is_monotonic_increasing + assert mult_idx_1.is_unique + assert mult_idx_2.is_monotonic_increasing + assert mult_idx_2.is_unique + + # show the relationships between the two + assert mult_idx_2[0] < mult_idx_1[0] + assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4] + assert mult_idx_1[5] == mult_idx_2[2] + assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6] + assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6] + assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6] + assert mult_idx_1[-1] < mult_idx_2[6] + + indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype) + tm.assert_almost_equal(expected, indexer_no_fill) + + # test with backfilling + indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill") + expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype) + tm.assert_almost_equal(expected, indexer_backfilled) + + # now, the same thing, but forward-filled (aka "padded") + indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad") + expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype) + tm.assert_almost_equal(expected, indexer_padded) + + # now, do the indexing in the other direction + assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1] + assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1] + assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2] + assert mult_idx_2[2] == mult_idx_1[5] + assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6] + assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6] + + indexer = mult_idx_2.get_indexer(mult_idx_1) + expected = np.array( + [-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype + ) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill") + expected = np.array( + [1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype + ) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad") + expected = np.array( + [0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype + ) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_crossing_levels(self): + # https://github.com/pandas-dev/pandas/issues/29896 + # tests a corner case with get_indexer() with MultiIndexes where, when we + # need to "carry" across levels, proper tuple ordering is respected + # + # the MultiIndexes used in this test, visually, are: + # mult_idx_1: + # 0: 1 1 1 1 + # 1: 2 + # 2: 2 1 + # 3: 2 + # 4: 1 2 1 1 + # 5: 2 + # 6: 2 1 + # 7: 2 + # 8: 2 1 1 1 + # 9: 2 + # 10: 2 1 + # 11: 2 + # 12: 2 2 1 1 + # 13: 2 + # 14: 2 1 + # 15: 2 + # + # mult_idx_2: + # 0: 1 3 2 2 + # 1: 2 3 2 2 + mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4) + mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)]) + + # show the tuple orderings, which get_indexer() should respect + assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8] + assert mult_idx_1[-1] < mult_idx_2[1] + + indexer = mult_idx_1.get_indexer(mult_idx_2) + expected = np.array([-1, -1], dtype=indexer.dtype) + tm.assert_almost_equal(expected, indexer) + + backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") + expected = np.array([8, -1], dtype=backfill_indexer.dtype) + tm.assert_almost_equal(expected, backfill_indexer) + + pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") + expected = np.array([7, 15], dtype=pad_indexer.dtype) + tm.assert_almost_equal(expected, pad_indexer) + + def test_get_indexer_kwarg_validation(self): + # GH#41918 + mi = MultiIndex.from_product([range(3), ["A", "B"]]) + + msg = "limit argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], limit=4) + + msg = "tolerance argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], tolerance="piano") + + +def test_getitem(idx): + # scalar + assert idx[2] == ("bar", "one") + + # slice + result = idx[2:5] + expected = idx[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) + assert sorted_idx.get_loc("baz") == slice(3, 4) + assert sorted_idx.get_loc("foo") == slice(0, 2) + + +@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)]) +@pytest.mark.parametrize( + "ind2", + [[True, False, True, False, False], Index([True, False, True, False, False])], +) +def test_getitem_bool_index_all(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) + tm.assert_index_equal(idx[ind2], expected) + + +@pytest.mark.parametrize("ind1", [[True], Index([True])]) +@pytest.mark.parametrize("ind2", [[False], Index([False])]) +def test_getitem_bool_index_single(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex( + levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], + codes=[[], []], + ) + tm.assert_index_equal(idx[ind2], expected) + + +class TestGetLoc: + def test_get_loc(self, idx): + assert idx.get_loc(("foo", "two")) == 1 + assert idx.get_loc(("baz", "two")) == 3 + with pytest.raises(KeyError, match=r"^10$"): + idx.get_loc(("bar", "two")) + with pytest.raises(KeyError, match=r"^'quux'$"): + idx.get_loc("quux") + + msg = "only the default get_loc method is currently supported for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.get_loc("foo", method="nearest") + + # 3 levels + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + with pytest.raises(KeyError, match=r"^\(1, 1\)$"): + index.get_loc((1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + def test_get_loc_duplicates(self): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + + index = Index(["c", "a", "a", "b", "b"]) + rs = index.get_loc("c") + xp = 0 + assert rs == xp + + with pytest.raises(KeyError, match="2"): + index.get_loc(2) + + def test_get_loc_level(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + with pytest.raises(KeyError, match=r"^\(2, 2\)$"): + index.get_loc_level((2, 2)) + # GH 22221: unused label + with pytest.raises(KeyError, match=r"^2$"): + index.drop(2).get_loc_level(2) + # Unused label on unsorted level: + with pytest.raises(KeyError, match=r"^2$"): + index.drop(1, level=2).get_loc_level(2, level=2) + + index = MultiIndex( + levels=[[2000], list(range(4))], + codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], + ) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + @pytest.mark.parametrize("dtype1", [int, float, bool, str]) + @pytest.mark.parametrize("dtype2", [int, float, bool, str]) + def test_get_loc_multiple_dtypes(self, dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] + idx = MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) + def test_get_loc_implicit_cast(self, level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + @pytest.mark.parametrize("dtype", [bool, object]) + def test_get_loc_cast_bool(self, dtype): + # GH 19086 : int is casted to bool, but not vice-versa (for object dtype) + # With bool dtype, we don't cast in either direction. + levels = [Index([False, True], dtype=dtype), np.arange(2, dtype="int64")] + idx = MultiIndex.from_product(levels) + + if dtype is bool: + with pytest.raises(KeyError, match=r"^\(0, 1\)$"): + assert idx.get_loc((0, 1)) == 1 + with pytest.raises(KeyError, match=r"^\(1, 0\)$"): + assert idx.get_loc((1, 0)) == 2 + else: + # We use python object comparisons, which treat 0 == False and 1 == True + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + with pytest.raises(KeyError, match=r"^\(False, True\)$"): + idx.get_loc((False, True)) + with pytest.raises(KeyError, match=r"^\(True, False\)$"): + idx.get_loc((True, False)) + + @pytest.mark.parametrize("level", [0, 1]) + def test_get_loc_nan(self, level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match=r"^nan$"): + idx.get_loc(np.nan) + with pytest.raises(InvalidIndexError, match=r"\[nan\]"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + def test_get_loc_with_values_including_missing_values(self): + # issue 19132 + idx = MultiIndex.from_product([[np.nan, 1]] * 2) + expected = slice(0, 2, None) + assert idx.get_loc(np.nan) == expected + + idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) + expected = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) + + idx = MultiIndex.from_product([[np.nan, 1]] * 3) + expected = slice(2, 4, None) + assert idx.get_loc((np.nan, 1)) == expected + + def test_get_loc_duplicates2(self): + # TODO: de-duplicate with test_get_loc_duplicates above? + index = MultiIndex( + levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + + assert index.get_loc("D") == slice(0, 3) + + def test_get_loc_past_lexsort_depth(self): + # GH#30053 + idx = MultiIndex( + levels=[["a"], [0, 7], [1]], + codes=[[0, 0], [1, 0], [0, 0]], + names=["x", "y", "z"], + sortorder=0, + ) + key = ("a", 7) + + with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning: indexing past lexsort depth may impact performance + result = idx.get_loc(key) + + assert result == slice(0, 1, None) + + def test_multiindex_get_loc_list_raises(self): + # GH#35878 + idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = r"\[\]" + with pytest.raises(InvalidIndexError, match=msg): + idx.get_loc([]) + + def test_get_loc_nested_tuple_raises_keyerror(self): + # raise KeyError, not TypeError + mi = MultiIndex.from_product([range(3), range(4), range(5), range(6)]) + key = ((2, 3, 4), "foo") + + with pytest.raises(KeyError, match=re.escape(str(key))): + mi.get_loc(key) + + +class TestWhere: + def test_where(self): + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + i.where(True) + + def test_where_array_like(self, listlike_box): + mi = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + cond = [False, True] + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + mi.where(listlike_box(cond)) + + +class TestContains: + def test_contains_top_level(self): + midx = MultiIndex.from_product([["A", "B"], [1, 2]]) + assert "A" in midx + assert "A" not in midx._engine + + def test_contains_with_nat(self): + # MI with a NaT + mi = MultiIndex( + levels=[["C"], date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + assert ("C", pd.Timestamp("2012-01-01")) in mi + for val in mi.values: + assert val in mi + + def test_contains(self, idx): + assert ("foo", "two") in idx + assert ("bar", "two") not in idx + assert None not in idx + + def test_contains_with_missing_value(self): + # GH#19132 + idx = MultiIndex.from_arrays([[1, np.nan, 2]]) + assert np.nan in idx + + idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]]) + assert np.nan not in idx + assert (1, np.nan) in idx + + def test_multiindex_contains_dropped(self): + # GH#19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + assert "a" in idx + idx = idx.drop("a") + assert "a" in idx.levels[0] + assert "a" not in idx + + def test_contains_td64_level(self): + # GH#24570 + tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min") + idx = MultiIndex.from_arrays([tx, np.arange(len(tx))]) + assert tx[0] in idx + assert "element_not_exit" not in idx + assert "0 day 09:30:00" in idx + + @pytest.mark.slow + def test_large_mi_contains(self): + # GH#10645 + result = MultiIndex.from_arrays([range(10**6), range(10**6)]) + assert not (10**6, 0) in result + + +def test_timestamp_multiindex_indexer(): + # https://github.com/pandas-dev/pandas/issues/26944 + idx = MultiIndex.from_product( + [ + date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"), + ["x"], + [3], + ] + ) + df = pd.DataFrame({"foo": np.arange(len(idx))}, idx) + result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"] + qidx = MultiIndex.from_product( + [ + date_range( + start="2019-01-02T00:15:33", + end="2019-01-05T03:15:33", + freq="H", + name="date", + ), + ["x"], + [3], + ] + ) + should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo") + tm.assert_series_equal(result, should_be) + + +@pytest.mark.parametrize( + "index_arr,expected,target,algo", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"), + ([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"), + ], +) +def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = idx.get_slice_bound(target, side=algo, kind="loc") + assert result == expected + + +@pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1), + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)), + ], +) +def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_indexer(start=start_idx, end=end_idx) + assert result == expected + + +def test_pyint_engine(): + # GH#18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(arr) + for arr in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 00000000..c3587174 --- /dev/null +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,280 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + + +def test_labels_dtypes(): + + # GH 8456 + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + assert i.codes[0].dtype == "int8" + assert i.codes[1].dtype == "int8" + + i = MultiIndex.from_product([["a"], range(40)]) + assert i.codes[1].dtype == "int8" + i = MultiIndex.from_product([["a"], range(400)]) + assert i.codes[1].dtype == "int16" + i = MultiIndex.from_product([["a"], range(40000)]) + assert i.codes[1].dtype == "int32" + + i = MultiIndex.from_product([["a"], range(1000)]) + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() + + +def test_values_boxed(): + tuples = [ + (1, pd.Timestamp("2000-01-01")), + (2, pd.NaT), + (3, pd.Timestamp("2000-01-03")), + (1, pd.Timestamp("2000-01-04")), + (2, pd.Timestamp("2000-01-02")), + (3, pd.Timestamp("2000-01-03")), + ] + result = MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10**18, 10**18 + 5) + naive = pd.DatetimeIndex(ints) + + aware = pd.DatetimeIndex(ints, tz="US/Central") + + idx = MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq="D") + + idx = MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = list(range(70000)) + minor_axis = list(range(10)) + + major_codes = np.arange(70000) + minor_codes = np.repeat(range(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + # inconsistent + major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + assert index.is_unique is False + + +@pytest.mark.slow +def test_hash_collisions(): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product( + [np.arange(1000), np.arange(1000)], names=["one", "two"] + ) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_dims(): + pass + + +def test_take_invalid_kwargs(): + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = MultiIndex.from_product(vals, names=["str", "dt"]) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_isna_behavior(idx): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + pd.isna(idx) + + +def test_large_multiindex_error(): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_below_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_below_1000000.loc[(3, 0), "dest"] + df_above_1000000 = pd.DataFrame( + 1, index=MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_above_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_above_1000000.loc[(3, 0), "dest"] + + +def test_million_record_attribute_error(): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame( + {"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r]) + ) + + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + df["a"].foo() + + +def test_can_hold_identifiers(idx): + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(idx): + levels, codes = idx.levels, idx.codes + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile("does not support mutable operations") + with pytest.raises(TypeError, match=mutable_regex): + levels[0] = levels[0] + with pytest.raises(TypeError, match=mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with pytest.raises(TypeError, match=mutable_regex): + codes[0] = codes[0] + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[0][0] = codes[0][0] + # and for names + names = idx.names + with pytest.raises(TypeError, match=mutable_regex): + names[0] = names[0] + + +def test_level_setting_resets_attributes(): + ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert ind.is_monotonic_increasing + with tm.assert_produces_warning(FutureWarning): + ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic_increasing + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + foo = pd.DataFrame(np.arange(100).reshape((10, 10))) + bar = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1) + df.index.names = ["fizz", "buzz"] + + str(df) + expected = pd.DataFrame( + {"bar": np.arange(100), "foo": np.arange(100)}, + index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), + ) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values("fizz") + expected = Int64Index(np.arange(10), name="fizz").repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values("buzz") + expected = Int64Index(np.tile(np.arange(10), 10), name="buzz") + tm.assert_index_equal(result, expected) + + +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result + + if idx.inferred_type == "object": + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/pandas/tests/indexes/multi/test_isin.py b/pandas/tests/indexes/multi/test_isin.py new file mode 100644 index 00000000..69545827 --- /dev/null +++ b/pandas/tests/indexes/multi/test_isin.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def test_isin_nan(): + idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True])) + tm.assert_numpy_array_equal( + idx.isin([("bar", float("nan"))]), np.array([False, True]) + ) + + +def test_isin(): + values = [("foo", 2), ("bar", 3), ("quux", 4)] + + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + + vals_0 = ["foo", "bar", "quux"] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + msg = "Too many levels: Index has only 2 levels, not 6" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=5) + msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=-5) + + with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): + idx.isin(vals_0, level=1.0) + with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): + idx.isin(vals_1, level=-1.0) + with pytest.raises(KeyError, match="'Level A not found'"): + idx.isin(vals_1, level="A") + + idx.names = ["A", "B"] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A")) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B")) + + with pytest.raises(KeyError, match="'Level C not found'"): + idx.isin(vals_1, level="C") + + +@pytest.mark.parametrize( + "labels,expected,level", + [ + ([("b", np.nan)], np.array([False, False, True]), None), + ([np.nan, "a"], np.array([True, True, False]), 0), + (["d", np.nan], np.array([False, True, True]), 1), + ], +) +def test_isin_multi_index_with_missing_value(labels, expected, level): + # GH 19132 + midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) + result = midx.isin(labels, level=level) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 00000000..7000724a --- /dev/null +++ b/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,208 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + Interval, + MultiIndex, + Series, + StringDtype, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] +) +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join( + idx, how=join_type, level="second", return_indexers=True + ) + + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array([x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ("outer", "inner"): + join_index2, ridx2, lidx2 = idx.join( + other, how=join_type, level="second", return_indexers=True + ) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(idx): + # some corner cases + index = Index(["three", "one", "two"]) + result = index.join(idx, level="second") + assert isinstance(result, MultiIndex) + + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) + + +def test_join_self(idx, join_type): + joined = idx.join(idx, how=join_type) + tm.assert_index_equal(joined, idx) + + +def test_join_multi(): + # GH 10665 + midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"]) + idx = Index([1, 2, 5], name="b") + + # inner + jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) + exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) + exp_ridx = np.array( + [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp + ) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_self_unique(idx, join_type): + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() + + +def test_join_multi_wrong_order(): + # GH 25760 + # GH 28956 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) + + join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True) + + exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(midx1, join_idx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_multi_return_indexers(): + # GH 34074 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + + result = midx1.join(midx2, return_indexers=False) + tm.assert_index_equal(result, midx1) + + +def test_join_overlapping_interval_level(): + # GH 44096 + idx_1 = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0 + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + + idx_2 = MultiIndex.from_tuples( + [ + (1, Interval(2.0, 5.0)), + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (2, Interval(3.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + ], + names=["num", "interval"], + ) + + expected = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + result = idx_1.join(idx_2, how="outer") + + tm.assert_index_equal(result, expected) + + +def test_join_midx_ea(): + # GH#49277 + midx = MultiIndex.from_arrays( + [Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"] + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series([1, 1], dtype="Int64"), + Series([1, 2], dtype="Int64"), + Series([3, 3], dtype="Int64"), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) + + +def test_join_midx_string(): + # GH#49277 + midx = MultiIndex.from_arrays( + [ + Series(["a", "a", "c"], dtype=StringDtype()), + Series(["a", "b", "c"], dtype=StringDtype()), + ], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())], + names=["a", "c"], + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series(["a", "a"], dtype=StringDtype()), + Series(["a", "b"], dtype=StringDtype()), + Series(["c", "c"], dtype=StringDtype()), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py new file mode 100644 index 00000000..0aadbdb5 --- /dev/null +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -0,0 +1,63 @@ +from pandas import MultiIndex +import pandas._testing as tm + + +class TestIsLexsorted: + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index._is_lexsorted() + assert index._lexsort_depth == 0 + + def test_is_lexsorted_deprecation(self): + # GH 32259 + with tm.assert_produces_warning( + FutureWarning, + match="MultiIndex.is_lexsorted is deprecated as a public function", + ): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() + + +class TestLexsortDepth: + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index._lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index._lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index._lexsort_depth == 0 + + def test_lexsort_depth_deprecation(self): + # GH 32259 + with tm.assert_produces_warning( + FutureWarning, + match="MultiIndex.lexsort_depth is deprecated as a public function", + ): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py new file mode 100644 index 00000000..cd95802a --- /dev/null +++ b/pandas/tests/indexes/multi/test_missing.py @@ -0,0 +1,112 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def test_fillna(idx): + # GH 11343 + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + + +def test_dropna(): + # GH 6194 + idx = MultiIndex.from_arrays( + [ + [1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ["a", "b", "c", np.nan, "e"], + ] + ) + + exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how="any"), exp) + + exp = MultiIndex.from_arrays( + [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]] + ) + tm.assert_index_equal(idx.dropna(how="all"), exp) + + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + idx.dropna(how="xxx") + + # GH26408 + # test if missing values are dropped for multiindex constructed + # from codes and values + idx = MultiIndex( + levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], + codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]], + ) + expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) + tm.assert_index_equal(idx.dropna(), expected) + tm.assert_index_equal(idx.dropna(how="any"), expected) + + expected = MultiIndex.from_arrays( + [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]] + ) + tm.assert_index_equal(idx.dropna(how="all"), expected) + + +def test_nulls(idx): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + + +@pytest.mark.xfail(reason="isna is not defined for MultiIndex") +def test_hasnans_isnans(idx): + # GH 11343, added tests for hasnans / isnans + index = idx.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(index), dtype=bool) + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is False + + index = idx.copy() + values = index.values + values[1] = np.nan + + index = type(idx)(values) + + expected = np.array([False] * len(index), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is True + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]) + idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1]) + idxm = idx0.join(idx1, how="outer") + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + +def test_tuples_have_na(): + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + assert pd.isna(index[4][0]) + assert pd.isna(index.values[4][0]) diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 00000000..2b0b3f7c --- /dev/null +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,188 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) + + +def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex): + # string ordering + mi = lexsorted_two_level_string_multiindex + assert mi.is_monotonic_increasing is False + assert Index(mi.values).is_monotonic_increasing is False + assert mi._is_strictly_monotonic_increasing is False + assert Index(mi.values)._is_strictly_monotonic_increasing is False + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"]) + assert i.is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]]) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex( + levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [1, 2, 3, 4], + [ + "gb00b03mlx29", + "lu0197800237", + "nl0000289783", + "nl0000289965", + "nl0000301109", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product( + [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + # string ordering + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [4, 3, 2, 1], + [ + "nl0000301109", + "nl0000289965", + "nl0000289783", + "lu0197800237", + "gb00b03mlx29", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + +def test_is_strictly_monotonic_increasing(): + idx = MultiIndex( + levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_increasing is True + assert idx._is_strictly_monotonic_increasing is False + + +def test_is_strictly_monotonic_decreasing(): + idx = MultiIndex( + levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_decreasing is True + assert idx._is_strictly_monotonic_decreasing is False + + +@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"]) +@pytest.mark.parametrize( + "values", + [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], +) +def test_is_monotonic_with_nans(values, attr): + # GH: 37220 + idx = MultiIndex.from_tuples(values, names=["test"]) + assert getattr(idx, attr) is False diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py new file mode 100644 index 00000000..cfbc90d1 --- /dev/null +++ b/pandas/tests/indexes/multi/test_names.py @@ -0,0 +1,205 @@ +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_slice_keep_name(): + x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"]) + assert x[1:].names == x.names + + +def test_index_name_retained(): + # GH9857 + result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}) + result = result.set_index("z") + result.loc[10] = [9, 10] + df_expected = pd.DataFrame( + {"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]} + ) + df_expected = df_expected.set_index("z") + tm.assert_frame_equal(result, df_expected) + + +def test_changing_names(idx): + assert [level.name for level in idx.levels] == ["first", "second"] + + view = idx.view() + copy = idx.copy() + shallow_copy = idx._view() + + # changing names should not change level names on object + new_names = [name + "a" for name in idx.names] + idx.names = new_names + check_level_names(idx, ["firsta", "seconda"]) + + # and not on copies + check_level_names(view, ["first", "second"]) + check_level_names(copy, ["first", "second"]) + check_level_names(shallow_copy, ["first", "second"]) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + check_level_names(idx, ["firsta", "seconda"]) + + +def test_take_preserve_name(idx): + taken = idx.take([3, 0, 1]) + assert taken.names == idx.names + + +def test_copy_names(): + # Check that adding a "names" parameter to the copy is honored + # GH14302 + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) + multi_idx1 = multi_idx.copy() + + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx1.names == ["MyName1", "MyName2"] + + multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx2.names == ["NewName1", "NewName2"] + + multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx3.names == ["NewName1", "NewName2"] + + # gh-35592 + with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): + multi_idx.copy(names=["mario"]) + + with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"): + multi_idx.copy(names=[["mario"], ["luigi"]]) + + +def test_names(idx, index_names): + + # names are assigned in setup + assert index_names == ["first", "second"] + level_names = [level.name for level in idx.levels] + assert level_names == index_names + + # setting bad names on existing + index = idx + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", list(index.names) + ["third"]) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = idx.levels + major_codes, minor_codes = idx.codes + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first"], + ) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first", "second", "third"], + ) + + # names are assigned on index, but not transferred to the levels + index.names = ["a", "b"] + level_names = [level.name for level in index.levels] + assert level_names == ["a", "b"] + + +def test_duplicate_level_names_access_raises(idx): + # GH19029 + idx.names = ["foo", "foo"] + with pytest.raises(ValueError, match="name foo occurs multiple times"): + idx._get_level_number("foo") + + +def test_get_names_from_levels(): + idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + + assert idx.levels[0].name == "a" + assert idx.levels[1].name == "b" + + +def test_setting_names_from_levels_raises(): + idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[0].name = "foo" + + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[1].name = "foo" + + new = pd.Series(1, index=idx.levels[0]) + with pytest.raises(RuntimeError, match="set_names"): + new.index.name = "bar" + + assert pd.Index._no_setting_name is False + assert pd.core.api.NumericIndex._no_setting_name is False + assert pd.RangeIndex._no_setting_name is False + + +@pytest.mark.parametrize("func", ["rename", "set_names"]) +@pytest.mark.parametrize( + "rename_dict, exp_names", + [ + ({"x": "z"}, ["z", "y", "z"]), + ({"x": "z", "y": "x"}, ["z", "x", "z"]), + ({"y": "z"}, ["x", "z", "x"]), + ({}, ["x", "y", "x"]), + ({"z": "a"}, ["x", "y", "x"]), + ({"y": "z", "a": "b"}, ["x", "z", "x"]), + ], +) +def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"]) + result = getattr(mi, func)(rename_dict) + expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["rename", "set_names"]) +@pytest.mark.parametrize( + "rename_dict, exp_names", + [ + ({"x": "z"}, ["z", "y"]), + ({"x": "z", "y": "x"}, ["z", "x"]), + ({"a": "z"}, ["x", "y"]), + ({}, ["x", "y"]), + ], +) +def test_name_mi_with_dict_like(func, rename_dict, exp_names): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) + result = getattr(mi, func)(rename_dict) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names) + tm.assert_index_equal(result, expected) + + +def test_index_name_with_dict_like_raising(): + # GH#20421 + ix = pd.Index([1, 2]) + msg = "Can only pass dict-like as `names` for MultiIndex." + with pytest.raises(TypeError, match=msg): + ix.set_names({"x": "z"}) + + +def test_multiindex_name_and_level_raising(): + # GH#20421 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) + with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."): + mi.set_names(names={"x": "z"}, level={"x": "z"}) diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py new file mode 100644 index 00000000..47efc43d --- /dev/null +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -0,0 +1,148 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture +def df(): + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + dr = date_range("2016-01-01", "2016-01-03", freq="12H") + abc = ["a", "b", "c"] + mi = MultiIndex.from_product([dr, abc]) + frame = DataFrame({"c1": range(0, 15)}, index=mi) + return frame + + +def test_partial_string_matching_single_index(df): + # partial string matching on a single index + for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]: + df_swap = df_swap.sort_index() + just_a = df_swap.loc["a"] + result = just_a.loc["2016-01-01"] + expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + +def test_get_loc_partial_timestamp_multiindex(df): + mi = df.index + key = ("2016-01-01", "a") + loc = mi.get_loc(key) + + expected = np.zeros(len(mi), dtype=bool) + expected[[0, 3]] = True + tm.assert_numpy_array_equal(loc, expected) + + key2 = ("2016-01-02", "a") + loc2 = mi.get_loc(key2) + expected2 = np.zeros(len(mi), dtype=bool) + expected2[[6, 9]] = True + tm.assert_numpy_array_equal(loc2, expected2) + + key3 = ("2016-01", "a") + loc3 = mi.get_loc(key3) + expected3 = np.zeros(len(mi), dtype=bool) + expected3[mi.get_level_values(1).get_loc("a")] = True + tm.assert_numpy_array_equal(loc3, expected3) + + key4 = ("2016", "a") + loc4 = mi.get_loc(key4) + expected4 = expected3 + tm.assert_numpy_array_equal(loc4, expected4) + + # non-monotonic + taker = np.arange(len(mi), dtype=np.intp) + taker[::2] = taker[::-2] + mi2 = mi.take(taker) + loc5 = mi2.get_loc(key) + expected5 = np.zeros(len(mi2), dtype=bool) + expected5[[3, 14]] = True + tm.assert_numpy_array_equal(loc5, expected5) + + +def test_partial_string_timestamp_multiindex(df): + # GH10331 + df_swap = df.swaplevel(0, 1).sort_index() + SLC = IndexSlice + + # indexing with IndexSlice + result = df.loc[SLC["2016-01-01":"2016-02-01", :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # partial string match on year only + result = df.loc["2016"] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc["2016-01-01"] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc["2016-01-02 12"] + # hourly resolution, same as index.levels[0], so we are _not_ slicing on + # that level, so that level gets dropped + expected = df.iloc[9:12].droplevel(0) + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[SLC[:, "2016-01-02"], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + # "2016-01-01" has daily resolution, so _is_ a slice on the first level. + result = df.loc[("2016-01-01", "a"), :] + expected = df.iloc[[0, 3]] + expected = df.iloc[[0, 3]].droplevel(1) + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) bc the DTI is the + # second level on df_swap + with pytest.raises(KeyError, match="'2016-01-01'"): + df_swap.loc["2016-01-01"] + + +def test_partial_string_timestamp_multiindex_str_key_raises(df): + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError, match="'2016-01-01'"): + df["2016-01-01"] + + +def test_partial_string_timestamp_multiindex_daily_resolution(df): + # GH12685 (partial string with daily resolution or below) + result = df.loc[IndexSlice["2013-03":"2013-03", :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_pickle.py b/pandas/tests/indexes/multi/test_pickle.py new file mode 100644 index 00000000..1d8b7214 --- /dev/null +++ b/pandas/tests/indexes/multi/test_pickle.py @@ -0,0 +1,10 @@ +import pytest + +from pandas import MultiIndex + + +def test_pickle_compat_construction(): + # this is testing for pickle compat + # need an object to create with + with pytest.raises(TypeError, match="Must pass both levels and codes"): + MultiIndex() diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 00000000..5d124c19 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,161 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) + assert isinstance(result, MultiIndex) + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + result, indexer = idx.reindex(list(idx)) + assert isinstance(result, MultiIndex) + assert indexer is None + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + +def test_reindex_level(idx): + index = Index(["one"]) + + target, indexer = idx.reindex(index, level="second") + target2, indexer2 = index.reindex(idx, level="second") + + exp_index = idx.join(index, level="second", how="right") + exp_index2 = idx.join(index, level="second", how="left") + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method="pad", level="second") + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): + # GH6552 + idx = idx.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ["foo", "bar"] + assert idx.reindex([])[0].names == ["foo", "bar"] + assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] + assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(target.values)[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) + assert idx.reindex([], level=0)[0].names == ["foo", "bar"] + assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ + + # case with EA levels + cat = pd.Categorical(["foo", "bar"]) + dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific") + mi = MultiIndex.from_product([cat, dti]) + assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype + assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype + + +def test_reindex_base(idx): + idx = idx + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + +def test_reindex_non_unique(): + idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) + a = pd.Series(np.arange(4), index=idx) + new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + msg = "cannot handle a non-unique multi-index!" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + a.reindex(new_idx) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + idx = MultiIndex.from_arrays(values) + result, result_indexer = idx.reindex(np.array(["b"]), level=0) + expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) + expected_indexer = np.array([], dtype=result_indexer.dtype) + tm.assert_index_equal(result, expected) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + +def test_reindex_not_all_tuples(): + keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"] + mi = MultiIndex.from_tuples(keys[:-1]) + idx = Index(keys) + res, indexer = mi.reindex(idx) + + tm.assert_index_equal(res, idx) + expected = np.array([0, 1, 2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + +def test_reindex_limit_arg_with_multiindex(): + # GH21247 + + idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")]) + + df = pd.Series([0.02, 0.01, 0.012], index=idx) + + new_idx = MultiIndex.from_tuples( + [ + (3, "A"), + (3, "B"), + (4, "A"), + (4, "B"), + (4, "C"), + (5, "B"), + (5, "C"), + (6, "B"), + (6, "C"), + ] + ) + + with pytest.raises( + ValueError, + match="limit argument only valid if doing pad, backfill or nearest reindexing", + ): + df.reindex(new_idx, fill_value=0, limit=1) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py new file mode 100644 index 00000000..eed27cd4 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -0,0 +1,185 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ("bar", "two")) + assert new_index.equal_levels(idx) + assert new_index[0] == ("bar", "two") + + # key not contained in all levels + new_index = idx.insert(0, ("abc", "three")) + + exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") + tm.assert_index_equal(new_index.levels[0], exp0) + assert new_index.names == ["first", "second"] + + exp1 = Index(list(idx.levels[1]) + ["three"], name="second") + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ("abc", "three") + + # key wrong length + msg = "Item must have length equal to number of levels" + with pytest.raises(ValueError, match=msg): + idx.insert(0, ("foo2",)) + + left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) + left.set_index(["1st", "2nd"], inplace=True) + ts = left["3rd"].copy(deep=True) + + left.loc[("b", "x"), "3rd"] = 2 + left.loc[("b", "a"), "3rd"] = -1 + left.loc[("b", "b"), "3rd"] = 3 + left.loc[("a", "x"), "3rd"] = 4 + left.loc[("a", "w"), "3rd"] = 5 + left.loc[("a", "a"), "3rd"] = 6 + + ts.loc[("b", "x")] = 2 + ts.loc["b", "a"] = -1 + ts.loc[("b", "b")] = 3 + ts.loc["a", "x"] = 4 + ts.loc[("a", "w")] = 5 + ts.loc["a", "a"] = 6 + + right = pd.DataFrame( + [ + ["a", "b", 0], + ["b", "d", 1], + ["b", "x", 2], + ["b", "a", -1], + ["b", "b", 3], + ["a", "x", 4], + ["a", "w", 5], + ["a", "a", 6], + ], + columns=["1st", "2nd", "3rd"], + ) + right.set_index(["1st", "2nd"], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right["3rd"]) + + +def test_insert2(): + # GH9250 + idx = ( + [("test1", i) for i in range(5)] + + [("test2", i) for i in range(6)] + + [("test", 17), ("test", 18)] + ) + + left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2])) + + left.loc[("test", 17)] = 11 + left.loc[("test", 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_append_index(): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime(2011, 1, 1))), + (1.2, tz.localize(datetime(2011, 1, 2))), + (1.3, tz.localize(datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + +def test_insert_base(idx): + + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + msg = "index 6 is out of bounds for axis 0 with size 6" + with pytest.raises(IndexError, match=msg): + idx.delete(len(idx)) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py new file mode 100644 index 00000000..d29abc1b --- /dev/null +++ b/pandas/tests/indexes/multi/test_setops.py @@ -0,0 +1,576 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_set_ops_error_cases(idx, case, sort, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case, sort=sort) + + +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_intersection_base(idx, sort, klass): + first = idx[2::-1] # first 3 elements reversed + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + intersect = first.intersection(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(intersect, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3], sort=sort) + + +@pytest.mark.arm_slow +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_union_base(idx, sort, klass): + first = idx[::-1] + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + union = first.union(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(union, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3], sort=sort) + + +def test_difference_base(idx, sort): + second = idx[4:] + answer = idx[:4] + result = idx.difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + assert result.equals(answer) + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = idx.difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + idx.difference([1, 2, 3], sort=sort) + + +def test_symmetric_difference(idx, sort): + first = idx[1:] + second = idx[:-1] + answer = idx[[-1, 0]] + result = first.symmetric_difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.symmetric_difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3], sort=sort) + + +def test_multiindex_symmetric_difference(): + # GH 13490 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx + assert result.names == idx.names + + idx2 = idx.copy().rename(["A", "B"]) + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx2 + assert result.names == [None, None] + + +def test_empty(idx): + # GH 15270 + assert not idx.empty + assert idx[:0].empty + + +def test_difference(idx, sort): + + first = idx + result = first.difference(idx[-3:], sort=sort) + vals = idx[:-3].values + + if sort is None: + vals = sorted(vals) + + expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == idx.names + tm.assert_index_equal(result, expected) + + # empty difference: reflexive + result = idx.difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: superset + result = idx[-3:].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: degenerate + result = idx[:0].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # names not the same + chunklet = idx[-3:] + chunklet.names = ["foo", "baz"] + result = first.difference(chunklet, sort=sort) + assert result.names == (None, None) + + # empty, but non-equal + result = idx.difference(idx.sortlevel(1)[0], sort=sort) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values, sort=sort) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([], sort=sort) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([("foo", "one")], sort=sort) + expected = MultiIndex.from_tuples( + [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")] + ) + expected.names = first.names + assert first.names == result.names + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5], sort=sort) + + +def test_difference_sort_special(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_special_true(): + # TODO(GH#25151): decide on True behaviour + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + result = idx.difference([], sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + # GH-24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + # sort=None, the default + msg = "sort order is undefined for incomparable objects" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result = idx.difference(other) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.difference(other, sort=False) + tm.assert_index_equal(result, idx) + + +def test_difference_sort_incomparable_true(): + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + + msg = "The 'sort' keyword only takes the values of None or False; True was passed." + with pytest.raises(ValueError, match=msg): + idx.difference(other, sort=True) + + +def test_union(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_union = piece1.union(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_union, idx.sort_values()) + + assert tm.equalContents(the_union, idx) + + # corner case, pass self or empty thing: + the_union = idx.union(idx, sort=sort) + tm.assert_index_equal(the_union, idx) + + the_union = idx.union(idx[:0], sort=sort) + tm.assert_index_equal(the_union, idx) + + tuples = idx.values + result = idx[:4].union(tuples[4:], sort=sort) + if sort is None: + tm.equalContents(result, idx) + else: + assert result.equals(idx) + + +@pytest.mark.xfail( + # This test was commented out from Oct 2011 to Dec 2021, may no longer + # be relevant. + reason="Length of names must match number of levels in MultiIndex", + raises=ValueError, +) +def test_union_with_regular_index(idx): + other = Index(["A", "B", "C"]) + + result = other.union(idx) + assert ("foo", "one") in result + assert "B" in result + + msg = "The values in the array are unorderable" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result2 = idx.union(other) + assert result.equals(result2) + + +def test_intersection(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_int = piece1.intersection(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_int, idx[3:5]) + assert tm.equalContents(the_int, idx[3:5]) + + # corner case, pass self + the_int = idx.intersection(idx, sort=sort) + tm.assert_index_equal(the_int, idx) + + # empty intersection: disjoint + empty = idx[:2].intersection(idx[2:], sort=sort) + expected = idx[:0] + assert empty.equals(expected) + + tuples = idx.values + result = idx.intersection(tuples) + assert result.equals(idx) + + +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_setop_with_categorical(idx, sort, method): + other = idx.to_flat_index().astype("category") + res_names = [None] * idx.nlevels + + result = getattr(idx, method)(other, sort=sort) + expected = getattr(idx, method)(idx, sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + result = getattr(idx, method)(other[:5], sort=sort) + expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + +def test_intersection_non_object(idx, sort): + other = Index(range(3), name="foo") + + result = idx.intersection(other, sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None) + tm.assert_index_equal(result, expected, exact=True) + + # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name) + result = idx.intersection(np.asarray(other)[:0], sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names) + tm.assert_index_equal(result, expected, exact=True) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + # With non-zero length non-index, we try and fail to convert to tuples + idx.intersection(np.asarray(other), sort=sort) + + +def test_intersect_equal_sort(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_intersect_equal_sort_true(): + # TODO(GH#25151): decide on True behaviour + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + sorted_ = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + +@pytest.mark.parametrize("slice_", [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_empty_sort(): + # TODO(GH#25151): decide on True behaviour + # # sort=True + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + + # default, sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_incomparable_sort(): + # TODO(GH#25151): decide on True behaviour + # # sort=True + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + with pytest.raises(TypeError, match="Cannot compare"): + idx.union(idx[:1], sort=True) + + +def test_union_non_object_dtype_raises(): + # GH#32646 raise NotImplementedError instead of less-informative error + mi = MultiIndex.from_product([["a", "b"], [1, 2]]) + + idx = mi.levels[1] + + msg = "Can only union MultiIndex with MultiIndex or Index of tuples" + with pytest.raises(NotImplementedError, match=msg): + mi.union(idx) + + +def test_union_empty_self_different_names(): + # GH#38423 + mi = MultiIndex.from_arrays([[]]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + result = mi.union(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_union_multiindex_empty_rangeindex(): + # GH#41234 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + ri = pd.RangeIndex(0) + + result_left = mi.union(ri) + tm.assert_index_equal(mi, result_left, check_names=False) + + result_right = ri.union(mi) + tm.assert_index_equal(mi, result_right, check_names=False) + + +@pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] +) +def test_setops_disallow_true(method): + idx1 = MultiIndex.from_product([["a", "b"], [1, 2]]) + idx2 = MultiIndex.from_product([["b", "c"], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + +@pytest.mark.parametrize( + ("tuples", "exp_tuples"), + [ + ([("val1", "test1")], [("val1", "test1")]), + ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]), + ( + [("val2", "test2"), ("val1", "test1")], + [("val2", "test2"), ("val1", "test1")], + ), + ], +) +def test_intersect_with_duplicates(tuples, exp_tuples): + # GH#36915 + left = MultiIndex.from_tuples(tuples, names=["first", "second"]) + right = MultiIndex.from_tuples( + [("val1", "test1"), ("val1", "test1"), ("val2", "test2")], + names=["first", "second"], + ) + result = left.intersection(right) + expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "data, names, expected", + [ + ((1,), None, [None, None]), + ((1,), ["a"], [None, None]), + ((1,), ["b"], [None, None]), + ((1, 2), ["c", "d"], [None, None]), + ((1, 2), ["b", "a"], [None, None]), + ((1, 2, 3), ["a", "b", "c"], [None, None]), + ((1, 2), ["a", "c"], ["a", None]), + ((1, 2), ["c", "b"], [None, "b"]), + ((1, 2), ["a", "b"], ["a", "b"]), + ((1, 2), [None, "b"], [None, "b"]), + ], +) +def test_maybe_match_names(data, names, expected): + # GH#38323 + mi = MultiIndex.from_tuples([], names=["a", "b"]) + mi2 = MultiIndex.from_tuples([data], names=names) + result = mi._maybe_match_names(mi2) + assert result == expected + + +def test_intersection_equal_different_names(): + # GH#30302 + mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"]) + tm.assert_index_equal(result, expected) + + +def test_intersection_different_names(): + # GH#38323 + mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1], [3]]) + result = mi.intersection(mi2) + tm.assert_index_equal(result, mi2) + + +def test_intersection_with_missing_values_on_both_sides(nulls_fixture): + # GH#38623 + mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]]) + mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]]) + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]]) + tm.assert_index_equal(result, expected) + + +def test_union_nan_got_duplicated(): + # GH#38977 + mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]]) + mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]]) + result = mi1.union(mi2) + tm.assert_index_equal(result, mi2) + + +def test_union_duplicates(index, request): + # GH#38977 + if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)): + # No duplicates in empty indexes + return + if index.dtype.kind == "c": + mark = pytest.mark.xfail( + reason="sort_values() call raises bc complex objects are not comparable" + ) + request.node.add_marker(mark) + + values = index.unique().values.tolist() + mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) + mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) + result = mi1.union(mi2) + expected = mi2.sort_values() + if mi2.levels[0].dtype == np.uint64 and (mi2.get_level_values(0) < 2**63).all(): + # GH#47294 - union uses lib.fast_zip, converting data to Python integers + # and loses type information. Result is then unsigned only when values are + # sufficiently large to require unsigned dtype. + expected = expected.set_levels( + [expected.levels[0].astype(int), expected.levels[1]] + ) + tm.assert_index_equal(result, expected) + + result = mi2.union(mi1) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "levels1, levels2, codes1, codes2, names", + [ + ( + [["a", "b", "c"], [0, ""]], + [["c", "d", "b"], [""]], + [[0, 1, 2], [1, 1, 1]], + [[0, 1, 2], [0, 0, 0]], + ["name1", "name2"], + ), + ], +) +def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names): + # GH#25169 + mi1 = MultiIndex(levels=levels1, codes=codes1, names=names) + mi2 = MultiIndex(levels=levels2, codes=codes2, names=names) + mi_int = mi1.intersection(mi2) + + with tm.assert_produces_warning(FutureWarning, match="MultiIndex.lexsort_depth"): + assert mi_int.lexsort_depth == 0 diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 00000000..6fd1781b --- /dev/null +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,282 @@ +import random + +import numpy as np +import pytest + +from pandas.errors import ( + PerformanceWarning, + UnsortedIndexError, +) + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList + + +def test_sortlevel(idx): + tuples = list(idx) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + sorted_idx, _ = mi.sortlevel("A", sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [ + ("bar", "one"), + ("foo", "two"), + ("qux", "two"), + ("foo", "one"), + ("baz", "two"), + ("qux", "one"), + ] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=("a", "b")) + + +def test_unsortedindex(): + # GH 11897 + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)["z", "a"] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc(axis=0)["z", slice("a")] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)["z", :]) == 2 + + with pytest.raises(KeyError, match="'q'"): + df.loc(axis=0)["q", :] + + +def test_unsortedindex_doc_examples(): + # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex + dfm = DataFrame( + {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)} + ) + + dfm = dfm.set_index(["jim", "joe"]) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, "z")] + + msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" + with pytest.raises(UnsortedIndexError, match=msg): + dfm.loc[(0, "y"):(1, "z")] + + assert not dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, "z")] + dfm.loc[(0, "y"):(1, "z")] + + assert dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 2 + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert recons.is_monotonic_increasing + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + assert not mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic_increasing + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + assert not mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic_increasing + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame( + [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]], + columns=["first", "second", "third"], + ) + df2 = df.set_index(["first", "second"], drop=False) + df2 = df2[df2["first"] != "deleteMe"] + + # removed levels are there + expected = MultiIndex( + levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]], + codes=[[1, 2], [1, 2]], + names=["first", "second"], + ) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex( + levels=[["keepMe", "keepMeToo"], [2, 3]], + codes=[[0, 1], [0, 1]], + names=["first", "second"], + ) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize( + "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")] +) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame( + { + "first": rng.randint(0, 1 << 13, size).astype(first_type), + "second": rng.randint(0, 1 << 10, size).astype(second_type), + "third": rng.rand(size), + } + ) + df = df.groupby(["first", "second"]).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(["first", "second"]).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]]) +@pytest.mark.parametrize( + "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]] +) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert "unused" not in result.levels[level] + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected) + + +def test_remove_unused_levels_with_nan(): + # GH 37510 + idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) + idx = idx.set_levels(["a", np.nan], level="id1") + idx = idx.remove_unused_levels() + result = idx.levels + expected = FrozenList([["a", np.nan], [4]]) + assert str(result) == str(expected) diff --git a/pandas/tests/indexes/multi/test_take.py b/pandas/tests/indexes/multi/test_take.py new file mode 100644 index 00000000..f8e7632c --- /dev/null +++ b/pandas/tests/indexes/multi/test_take.py @@ -0,0 +1,79 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + # GH 10791 + msg = "'MultiIndex' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + idx.freq + + +def test_take_invalid_kwargs(idx): + idx = idx + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_take_fill_value(): + # GH 12631 + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + (np.nan, pd.NaT), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 4" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) diff --git a/pandas/tests/indexes/numeric/__init__.py b/pandas/tests/indexes/numeric/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py new file mode 100644 index 00000000..ee75f56e --- /dev/null +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -0,0 +1,99 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import pandas_dtype + +from pandas import Index +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestAstype: + def test_astype_float64_to_uint64(self): + # GH#45309 used to incorrectly return Int64Index + idx = Float64Index([0.0, 5.0, 10.0, 15.0, 20.0]) + result = idx.astype("u8") + expected = UInt64Index([0, 5, 10, 15, 20]) + tm.assert_index_equal(result, expected) + + idx_with_negatives = idx - 10 + with pytest.raises(ValueError, match="losslessly"): + idx_with_negatives.astype(np.uint64) + + def test_astype_float64_to_object(self): + float_index = Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) + result = float_index.astype(object) + assert result.equals(float_index) + assert float_index.equals(result) + assert isinstance(result, Index) and not isinstance(result, Float64Index) + + def test_astype_float64_mixed_to_object(self): + # mixed int-float + idx = Float64Index([1.5, 2, 3, 4, 5]) + idx.name = "foo" + result = idx.astype(object) + assert result.equals(idx) + assert idx.equals(result) + assert isinstance(result, Index) and not isinstance(result, Float64Index) + + @pytest.mark.parametrize("dtype", ["int16", "int32", "int64"]) + def test_astype_float64_to_int_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Float64Index([0, 1, 2]) + result = idx.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + idx = Float64Index([0, 1.1, 2]) + result = idx.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["float32", "float64"]) + def test_astype_float64_to_float_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Float64Index([0, 1, 2]) + result = idx.astype(dtype) + expected = idx + tm.assert_index_equal(result, expected) + + idx = Float64Index([0, 1.1, 2]) + result = idx.astype(dtype) + expected = Index(idx.values.astype(dtype)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_cannot_cast_to_datetimelike(self, dtype): + idx = Float64Index([0, 1.1, 2]) + + msg = ( + f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; " + f"integer values are required for conversion" + ) + with pytest.raises(TypeError, match=re.escape(msg)): + idx.astype(dtype) + + @pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"]) + @pytest.mark.parametrize("non_finite", [np.inf, np.nan]) + def test_cannot_cast_inf_to_int(self, non_finite, dtype): + # GH#13149 + idx = Float64Index([1, 2, non_finite]) + + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + idx.astype(dtype) + + def test_astype_from_object(self): + index = Index([1.0, np.nan, 0.2], dtype="object") + result = index.astype(float) + expected = Float64Index([1.0, np.nan, 0.2]) + assert result.dtype == expected.dtype + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py new file mode 100644 index 00000000..1c4cbd21 --- /dev/null +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -0,0 +1,595 @@ +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + Index, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +@pytest.fixture +def index_large(): + # large values used in UInt64Index tests where no compat needed with Int64/Float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return UInt64Index(large) + + +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc(self, method): + index = Index([0, 1, 2]) + warn = None if method is None else FutureWarning + + with tm.assert_produces_warning(warn, match="deprecated"): + assert index.get_loc(1, method=method) == 1 + + if method: + with tm.assert_produces_warning(warn, match="deprecated"): + assert index.get_loc(1, method=method, tolerance=0) == 1 + + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_raises_bad_label(self, method): + index = Index([0, 1, 2]) + if method: + msg = "not supported between" + err = TypeError + else: + msg = r"\[1, 2\]" + err = InvalidIndexError + + with pytest.raises(err, match=msg): + index.get_loc([1, 2], method=method) + + @pytest.mark.parametrize( + "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] + ) + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_tolerance(self, method, loc): + index = Index([0, 1, 2]) + assert index.get_loc(1.1, method) == loc + assert index.get_loc(1.1, method, tolerance=1) == loc + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_loc_outside_tolerance_raises(self, method): + index = Index([0, 1, 2]) + with pytest.raises(KeyError, match="1.1"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, method, tolerance=0.05) + + def test_get_loc_bad_tolerance_raises(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="must be numeric"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, "nearest", tolerance="invalid") + + def test_get_loc_tolerance_no_method_raises(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance .* valid if"): + index.get_loc(1.1, tolerance=1) + + def test_get_loc_raises_missized_tolerance(self): + index = Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance size must match"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc(1.1, "nearest", tolerance=[1, 1]) + + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc_float64(self): + idx = Float64Index([0.0, 1.0, 2.0]) + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(1, method) == 1 + if method is not None: + assert idx.get_loc(1, method, tolerance=0) == 1 + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc(1.1, method) == loc + assert idx.get_loc(1.1, method, tolerance=0.9) == loc + + with pytest.raises(KeyError, match="^'foo'$"): + idx.get_loc("foo") + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5) + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5, method="pad", tolerance=0.1) + with pytest.raises(KeyError, match="^True$"): + idx.get_loc(True) + with pytest.raises(KeyError, match="^False$"): + idx.get_loc(False) + + with pytest.raises(ValueError, match="must be numeric"): + idx.get_loc(1.4, method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="must contain numeric elements"): + idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) + + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) + + def test_get_loc_na(self): + idx = Float64Index([np.nan, 1, 2]) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 + + idx = Float64Index([np.nan, 1, np.nan]) + assert idx.get_loc(1) == 1 + + # representable by slice [0:2:2] + msg = "'Cannot get left slice bound for non-unique label: nan'" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + # not representable by slice + idx = Float64Index([np.nan, 1, np.nan, np.nan]) + assert idx.get_loc(1) == 1 + msg = "'Cannot get left slice bound for non-unique label: nan" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + + def test_get_loc_missing_nan(self): + # GH#8569 + idx = Float64Index([1, 2]) + assert idx.get_loc(1) == 0 + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match="^nan$"): + idx.get_loc(np.nan) + with pytest.raises(InvalidIndexError, match=r"\[nan\]"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) + @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) + def test_get_loc_float_index_nan_with_method(self, vals, method): + # GH#39382 + idx = Index(vals) + with pytest.raises(KeyError, match="nan"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + idx.get_loc(np.nan, method=method) + + @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) + def test_get_loc_numericindex_none_raises(self, dtype): + # case that goes through searchsorted and key is non-comparable to values + arr = np.arange(10**7, dtype=dtype) + idx = Index(arr) + with pytest.raises(KeyError, match="None"): + idx.get_loc(None) + + def test_get_loc_overflows(self): + # unique but non-monotonic goes through IndexEngine.mapping.get_item + idx = Index([0, 2, 1]) + + val = np.iinfo(np.int64).max + 1 + + with pytest.raises(KeyError, match=str(val)): + idx.get_loc(val) + with pytest.raises(KeyError, match=str(val)): + idx._engine.get_loc(val) + + +class TestGetIndexer: + def test_get_indexer(self): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + r1 = index1.get_indexer(index2) + e1 = np.array([1, 3, -1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize( + "expected,method", + [ + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), + ], + ) + def test_get_indexer_methods(self, reverse, expected, method): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + if reverse: + index1 = index1[::-1] + expected = expected[::-1] + + result = index2.get_indexer(index1, method=method) + tm.assert_almost_equal(result, expected) + + def test_get_indexer_invalid(self): + # GH10411 + index = Index(np.arange(10)) + + with pytest.raises(ValueError, match="tolerance argument"): + index.get_indexer([1, 0], tolerance=1) + + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], limit=1) + + @pytest.mark.parametrize( + "method, tolerance, indexer, expected", + [ + ("pad", None, [0, 5, 9], [0, 5, 9]), + ("backfill", None, [0, 5, 9], [0, 5, 9]), + ("nearest", None, [0, 5, 9], [0, 5, 9]), + ("pad", 0, [0, 5, 9], [0, 5, 9]), + ("backfill", 0, [0, 5, 9], [0, 5, 9]), + ("nearest", 0, [0, 5, 9], [0, 5, 9]), + ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), + ], + ) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): + index = Index(np.arange(10)) + + actual = index.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + "tolerance, expected", + list( + zip( + [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], + ) + ), + ) + def test_get_indexer_nearest_listlike_tolerance( + self, tolerance, expected, listtype + ): + index = Index(np.arange(10)) + + actual = index.get_indexer( + [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) + ) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + def test_get_indexer_nearest_error(self): + index = Index(np.arange(10)) + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], method="nearest", limit=1) + + with pytest.raises(ValueError, match="tolerance size must match"): + index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) + + @pytest.mark.parametrize( + "method,expected", + [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], + ) + def test_get_indexer_nearest_decreasing(self, method, expected): + index = Index(np.arange(10))[::-1] + + actual = index.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) + + actual = index.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize( + "idx_class", [Int64Index, RangeIndex, Float64Index, UInt64Index] + ) + @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"]) + def test_get_indexer_numeric_index_boolean_target(self, method, idx_class): + # GH 16877 + + numeric_index = idx_class(RangeIndex(4)) + other = Index([True, False, True]) + + result = getattr(numeric_index, method)(other) + expected = np.array([-1, -1, -1], dtype=np.intp) + if method == "get_indexer": + tm.assert_numpy_array_equal(result, expected) + else: + missing = np.arange(3, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], expected) + tm.assert_numpy_array_equal(result[1], missing) + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_indexer_with_method_numeric_vs_bool(self, method): + left = Index([1, 2, 3]) + right = Index([True, False]) + + with pytest.raises(TypeError, match="Cannot compare"): + left.get_indexer(right, method=method) + + with pytest.raises(TypeError, match="Cannot compare"): + right.get_indexer(left, method=method) + + def test_get_indexer_numeric_vs_bool(self): + left = Index([1, 2, 3]) + right = Index([True, False]) + + res = left.get_indexer(right) + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer(left) + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = left.get_indexer_non_unique(right)[0] + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer_non_unique(left)[0] + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + def test_get_indexer_float64(self): + idx = Float64Index([0.0, 1.0, 2.0]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = [-0.1, 0.5, 1.1] + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + def test_get_indexer_nan(self): + # GH#7820 + result = Float64Index([1, 2, np.nan]).get_indexer([np.nan]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_int64(self): + index = Int64Index(range(0, 20, 2)) + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_uint64(self, index_large): + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target) + expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + +class TestWhere: + @pytest.mark.parametrize( + "index", + [ + Float64Index(np.arange(5, dtype="float64")), + Int64Index(range(0, 20, 2)), + UInt64Index(np.arange(5, dtype="uint64")), + ], + ) + def test_where(self, listlike_box, index): + cond = [True] * len(index) + expected = index + result = index.where(listlike_box(cond)) + + cond = [False] + [True] * (len(index) - 1) + expected = Float64Index([index._na_value] + index[1:].tolist()) + result = index.where(listlike_box(cond)) + tm.assert_index_equal(result, expected) + + def test_where_uint64(self): + idx = UInt64Index([0, 6, 2]) + mask = np.array([False, True, False]) + other = np.array([1], dtype=np.int64) + + expected = UInt64Index([1, 6, 1]) + + result = idx.where(mask, other) + tm.assert_index_equal(result, expected) + + result = idx.putmask(~mask, other) + tm.assert_index_equal(result, expected) + + def test_where_infers_type_instead_of_trying_to_convert_string_to_float(self): + # GH 32413 + index = Index([1, np.nan]) + cond = index.notna() + other = Index(["a", "b"], dtype="string") + + expected = Index([1.0, "b"]) + result = index.where(cond, other) + + tm.assert_index_equal(result, expected) + + +class TestTake: + @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) + def test_take_preserve_name(self, klass): + index = klass([1, 2, 3, 4], name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value_float64(self): + # GH 12631 + idx = Float64Index([1.0, 2.0, 3.0], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = Float64Index([2.0, 1.0, np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + @pytest.mark.parametrize("klass", [Int64Index, UInt64Index]) + def test_take_fill_value_ints(self, klass): + # see gh-12631 + idx = klass([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = klass([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + name = klass.__name__ + msg = f"Unable to fill values because {name} cannot contain NA" + + # fill_value=True + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = klass([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestContains: + @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) + def test_contains_none(self, klass): + # GH#35788 should return False, not raise TypeError + index = klass([0, 1, 2, 3, 4]) + assert None not in index + + def test_contains_float64_nans(self): + index = Float64Index([1.0, 2.0, np.nan]) + assert np.nan in index + + def test_contains_float64_not_nans(self): + index = Float64Index([1.0, 2.0, np.nan]) + assert 1.0 in index + + +class TestSliceLocs: + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + + assert index.slice_locs(start=2) == (2, n) + assert index.slice_locs(start=3) == (3, n) + assert index.slice_locs(3, 8) == (3, 6) + assert index.slice_locs(5, 10) == (3, n) + assert index.slice_locs(end=8) == (0, 6) + assert index.slice_locs(end=9) == (0, 7) + + # reversed + index2 = index[::-1] + assert index2.slice_locs(8, 2) == (2, 6) + assert index2.slice_locs(7, 3) == (2, 5) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_float_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + assert index.slice_locs(5.0, 10.0) == (3, n) + assert index.slice_locs(4.5, 10.5) == (3, 8) + + index2 = index[::-1] + assert index2.slice_locs(8.5, 1.5) == (2, 6) + assert index2.slice_locs(10.5, -1) == (0, n) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_dup_numeric(self, dtype): + index = Index(np.array([10, 12, 12, 14], dtype=dtype)) + assert index.slice_locs(12, 12) == (1, 3) + assert index.slice_locs(11, 13) == (1, 3) + + index2 = index[::-1] + assert index2.slice_locs(12, 12) == (1, 3) + assert index2.slice_locs(13, 11) == (1, 3) + + def test_slice_locs_na(self): + index = Index([np.nan, 1, 2]) + assert index.slice_locs(1) == (1, 3) + assert index.slice_locs(np.nan) == (0, 3) + + index = Index([0, np.nan, np.nan, 1, 2]) + assert index.slice_locs(np.nan) == (1, 5) + + def test_slice_locs_na_raises(self): + index = Index([np.nan, 1, 2]) + with pytest.raises(KeyError, match=""): + index.slice_locs(start=1.5) + + with pytest.raises(KeyError, match=""): + index.slice_locs(end=1.5) + + +class TestGetSliceBounds: + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, kind, side, expected): + index = Index(range(6)) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + + result = index.get_slice_bound(4, kind=kind, side=side) + assert result == expected + + @pytest.mark.parametrize("kind", ["getitem", "loc", None]) + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)]) + def test_get_slice_bounds_outside(self, kind, side, expected, bound): + index = Index(range(6)) + with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): + result = index.get_slice_bound(bound, kind=kind, side=side) + assert result == expected diff --git a/pandas/tests/indexes/numeric/test_join.py b/pandas/tests/indexes/numeric/test_join.py new file mode 100644 index 00000000..9bbe7a64 --- /dev/null +++ b/pandas/tests/indexes/numeric/test_join.py @@ -0,0 +1,392 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Index, + Int64Index, + UInt64Index, +) + + +class TestJoinInt64Index: + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + tm.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_inner(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) + + res2 = index.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # monotonic + res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + index = Int64Index(range(0, 20, 2)) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_outer(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp + ) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) + noidx_res = index.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp + ) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestJoinUInt64Index: + @pytest.fixture + def index_large(self): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return UInt64Index(large) + + def test_join_inner(self, index_large): + other = UInt64Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = UInt64Index(2**63 + np.array([10, 25], dtype="uint64")) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="inner", return_indexers=True + ) + + res2 = index_large.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self, index_large): + other = UInt64Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) + eres = index_large + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self, index_large): + other = UInt64Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(lidx, elidx) + assert isinstance(other, UInt64Index) + tm.assert_index_equal(res, eres) + assert ridx is None + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="right", return_indexers=True + ) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) + + assert isinstance(other, UInt64Index) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_index_equal(res, eres) + assert ridx is None + + # non-unique + idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self, index_large): + other = Index( + 2**63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object + ) + + outer = index_large.join(other, how="outer") + outer2 = other.join(index_large, how="outer") + expected = Index( + 2**63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") + ) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index_large.join(other, how="inner") + inner2 = other.join(index_large, how="inner") + expected = Index(2**63 + np.array([10, 20], dtype="uint64")) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index_large.join(other, how="left") + tm.assert_index_equal(left, index_large.astype(object)) + + left2 = other.join(index_large, how="left") + tm.assert_index_equal(left2, other) + + right = index_large.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index_large, how="right") + tm.assert_index_equal(right2, index_large.astype(object)) + + def test_join_outer(self, index_large): + other = UInt64Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) + noidx_res = index_large.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = UInt64Index( + 2**63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") + ) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="outer", return_indexers=True + ) + noidx_res = index_large.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py new file mode 100644 index 00000000..23262cb2 --- /dev/null +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -0,0 +1,703 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.tests.indexes.common import NumericBase + + +class TestFloatNumericIndex(NumericBase): + _index_cls = NumericIndex + + @pytest.fixture(params=[np.float64, np.float32]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + values = np.arange(5, dtype=dtype) + return self._index_cls(values) + + @pytest.fixture( + params=[ + [1.5, 2, 3, 4, 5], + [0.0, 2.5, 5.0, 7.5, 10.0], + [5, 4, 3, 2, 1.5], + [10.0, 7.5, 5.0, 2.5, 0.0], + ], + ids=["mixed", "float", "mixed_dec", "float_dec"], + ) + def index(self, request, dtype): + return self._index_cls(request.param, dtype=dtype) + + @pytest.fixture + def mixed_index(self, dtype): + return self._index_cls([1.5, 2, 3, 4, 5], dtype=dtype) + + @pytest.fixture + def float_index(self, dtype): + return self._index_cls([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) + + def test_repr_roundtrip(self, index): + tm.assert_index_equal(eval(repr(index)), index, exact=True) + + def check_is_index(self, idx): + assert isinstance(idx, Index) + assert not isinstance(idx, self._index_cls) + + def check_coerce(self, a, b, is_float_index=True): + assert a.equals(b) + tm.assert_index_equal(a, b, exact=False) + if is_float_index: + assert isinstance(b, self._index_cls) + else: + self.check_is_index(b) + + def test_constructor(self, dtype): + index_cls = self._index_cls + + # explicit construction + index = index_cls([1, 2, 3, 4, 5], dtype=dtype) + + assert isinstance(index, index_cls) + assert index.dtype == dtype + + expected = np.array([1, 2, 3, 4, 5], dtype=dtype) + tm.assert_numpy_array_equal(index.values, expected) + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + # nan handling + result = index_cls([np.nan, np.nan], dtype=dtype) + assert pd.isna(result.values).all() + + result = index_cls(np.array([np.nan]), dtype=dtype) + assert pd.isna(result.values).all() + + def test_constructor_invalid(self): + index_cls = self._index_cls + cls_name = index_cls.__name__ + + # invalid + msg = ( + rf"{cls_name}\(\.\.\.\) must be called with a collection of " + r"some kind, 0\.0 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(0.0) + + # 2021-02-1 we get ValueError in numpy 1.20, but not on all builds + msg = "|".join( + [ + "String dtype not supported, you may need to explicitly cast ", + "could not convert string to float: 'a'", + ] + ) + with pytest.raises((TypeError, ValueError), match=msg): + index_cls(["a", "b", 0.0]) + + msg = f"data is not compatible with {index_cls.__name__}" + with pytest.raises(ValueError, match=msg): + index_cls([Timestamp("20130101")]) + + def test_constructor_coerce(self, mixed_index, float_index): + + self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(float_index, Index(np.arange(5) * 2.5)) + + with tm.assert_produces_warning(FutureWarning, match="will not infer"): + result = Index(np.array(np.arange(5) * 2.5, dtype=object)) + self.check_coerce(float_index, result.astype("float64")) + + def test_constructor_explicit(self, mixed_index, float_index): + + # these don't auto convert + self.check_coerce( + float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False + ) + self.check_coerce( + mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False + ) + + def test_type_coercion_fail(self, any_int_numpy_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3.5], dtype=any_int_numpy_dtype) + + def test_type_coercion_valid(self, float_numpy_dtype): + # There is no Float32Index, so we always + # generate Float64Index. + idx = Index([1, 2, 3.5], dtype=float_numpy_dtype) + tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) + + def test_equals_numeric(self): + index_cls = self._index_cls + + idx = index_cls([1.0, 2.0]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, 2.0]) + assert idx.equals(idx2) + + idx = index_cls([1.0, np.nan]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, np.nan]) + assert idx.equals(idx2) + + @pytest.mark.parametrize( + "other", + ( + Int64Index([1, 2]), + Index([1.0, 2.0], dtype=object), + Index([1, 2], dtype=object), + ), + ) + def test_equals_numeric_other_index_type(self, other): + idx = self._index_cls([1.0, 2.0]) + assert idx.equals(other) + assert other.equals(idx) + + @pytest.mark.parametrize( + "vals", + [ + pd.date_range("2016-01-01", periods=3), + pd.timedelta_range("1 Day", periods=3), + ], + ) + def test_lookups_datetimelike_values(self, vals, dtype): + + # If we have datetime64 or timedelta64 values, make sure they are + # wrapped correctly GH#31163 + ser = Series(vals, index=range(3, 6)) + ser.index = ser.index.astype(dtype) + + expected = vals[1] + + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, 4.0) + assert isinstance(result, type(expected)) and result == expected + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, 4) + assert isinstance(result, type(expected)) and result == expected + + result = ser[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.loc[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser.loc[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.at[4.0] + assert isinstance(result, type(expected)) and result == expected + # GH#31329 .at[4] should cast to 4.0, matching .loc behavior + result = ser.at[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iloc[1] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iat[1] + assert isinstance(result, type(expected)) and result == expected + + def test_doesnt_contain_all_the_things(self): + idx = self._index_cls([np.nan]) + assert not idx.isin([0]).item() + assert not idx.isin([1]).item() + assert idx.isin([np.nan]).item() + + def test_nan_multiple_containment(self): + index_cls = self._index_cls + + idx = index_cls([1.0, np.nan]) + tm.assert_numpy_array_equal(idx.isin([1.0]), np.array([True, False])) + tm.assert_numpy_array_equal(idx.isin([2.0, np.pi]), np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([1.0, np.nan]), np.array([True, True])) + idx = index_cls([1.0, 2.0]) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False])) + + def test_fillna_float64(self): + index_cls = self._index_cls + # GH 11343 + idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") + # can't downcast + exp = Index([1.0, 0.1, 3.0], name="x") + tm.assert_index_equal(idx.fillna(0.1), exp, exact=True) + + # downcast + exact = True if index_cls is Int64Index else "equiv" + exp = index_cls([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp, exact=exact) + + # object + exp = Index([1.0, "obj", 3.0], name="x") + tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) + + +class TestFloat64Index(TestFloatNumericIndex): + _index_cls = Float64Index + + @pytest.fixture + def dtype(self, request): + return np.float64 + + @pytest.fixture( + params=["int64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_from_base_index(self, dtype): + index_cls = self._index_cls + + result = Index(np.array([np.nan], dtype=dtype)) + assert isinstance(result, index_cls) + assert result.dtype == dtype + assert pd.isna(result.values).all() + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + +class NumericInt(NumericBase): + def test_view(self, dtype): + index_cls = self._index_cls + + idx = index_cls([], dtype=dtype, name="Foo") + idx_view = idx.view() + assert idx_view.name == "Foo" + + idx_view = idx.view(dtype) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) + + idx_view = idx.view(index_cls) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) + + def test_is_monotonic(self): + index_cls = self._index_cls + + index = index_cls([1, 2, 3, 4]) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([4, 3, 2, 1]) + assert index.is_monotonic_increasing is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is True + + index = index_cls([1]) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_is_strictly_monotonic(self): + index_cls = self._index_cls + + index = index_cls([1, 1, 2, 3]) + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is False + + index = index_cls([3, 2, 1, 1]) + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self, simple_index, dtype): + index = simple_index + + idx = Index(index.copy()) + assert idx.identical(index) + + same_values_different_type = Index(idx, dtype=object) + assert not idx.identical(same_values_different_type) + + idx = index.astype(dtype=object) + idx = idx.rename("foo") + same_values = Index(idx, dtype=object) + assert same_values.identical(idx) + + assert not idx.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(idx) + + assert not index.astype(dtype=object).identical(index.astype(dtype=dtype)) + + def test_cant_or_shouldnt_cast(self): + msg = ( + "String dtype not supported, " + "you may need to explicitly cast to a numeric type" + ) + # can't + data = ["foo", "bar", "baz"] + with pytest.raises(TypeError, match=msg): + self._index_cls(data) + + # shouldn't + data = ["0", "1", "2"] + with pytest.raises(TypeError, match=msg): + self._index_cls(data) + + def test_view_index(self, simple_index): + index = simple_index + index.view(Index) + + def test_prevent_casting(self, simple_index): + index = simple_index + result = index.astype("O") + assert result.dtype == np.object_ + + +class TestIntNumericIndex(NumericInt): + _index_cls = NumericIndex + + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + return self._index_cls(range(0, 20, 2), dtype=dtype) + + @pytest.fixture( + params=[range(0, 20, 2), range(19, -1, -1)], ids=["index_inc", "index_dec"] + ) + def index(self, request, dtype): + return self._index_cls(request.param, dtype=dtype) + + def test_constructor(self, dtype): + index_cls = self._index_cls + + # scalar raise Exception + msg = ( + rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some " + "kind, 5 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(5) + + # copy + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + arr = index.values + new_index = index_cls(arr, copy=True) + tm.assert_index_equal(new_index, index, exact=True) + val = arr[0] + 3000 + + # this should not change index + arr[0] = val + assert new_index[0] != val + + if dtype == np.int64: + exact = "equiv" if index_cls != Int64Index else True + + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + expected = Index([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # from iterable + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # interpret list-like + expected = index_cls([5, 0], dtype=dtype) + for cls in [Index, index_cls]: + for idx in [ + cls([5, 0], dtype=dtype), + cls(np.array([5, 0]), dtype=dtype), + cls(Series([5, 0]), dtype=dtype), + ]: + tm.assert_index_equal(idx, expected, exact=exact) + + def test_constructor_corner(self, dtype): + index_cls = self._index_cls + + arr = np.array([1, 2, 3, 4], dtype=object) + + index = index_cls(arr, dtype=dtype) + assert index.values.dtype == index.dtype + if dtype == np.int64: + + msg = "will not infer" + with tm.assert_produces_warning(FutureWarning, match=msg): + without_dtype = Index(arr) + + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, without_dtype, exact=exact) + + # preventing casting + arr = np.array([1, "2", 3, "4"], dtype=object) + with pytest.raises(TypeError, match="casting"): + index_cls(arr, dtype=dtype) + + def test_constructor_coercion_signed_to_unsigned( + self, + any_unsigned_int_numpy_dtype, + ): + + # see gh-15832 + msg = "Trying to coerce negative values to unsigned integers" + + with pytest.raises(OverflowError, match=msg): + Index([-1], dtype=any_unsigned_int_numpy_dtype) + + def test_constructor_np_signed(self, any_signed_int_numpy_dtype): + # GH#47475 + scalar = np.dtype(any_signed_int_numpy_dtype).type(1) + result = Index([scalar]) + expected = Int64Index([1]) + tm.assert_index_equal(result, expected) + + def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype): + # GH#47475 + scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1) + result = Index([scalar]) + expected = UInt64Index([1]) + tm.assert_index_equal(result, expected) + + def test_coerce_list(self): + # coerce things + arr = Index([1, 2, 3, 4]) + assert isinstance(arr, self._index_cls) + + # but not if explicit dtype passed + arr = Index([1, 2, 3, 4], dtype=object) + assert type(arr) is Index + + +class TestInt64Index(TestIntNumericIndex): + _index_cls = Int64Index + + @pytest.fixture + def dtype(self): + return np.int64 + + @pytest.fixture( + params=["float64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + +class TestUIntNumericIndex(NumericInt): + + _index_cls = NumericIndex + + @pytest.fixture(params=[np.uint64]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + # compat with shared Int64/Float64 tests + return self._index_cls(np.arange(5, dtype=dtype)) + + @pytest.fixture( + params=[ + [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25], + [2**63 + 25, 2**63 + 20, 2**63 + 15, 2**63 + 10, 2**63], + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return self._index_cls(request.param, dtype=np.uint64) + + +class TestUInt64Index(TestUIntNumericIndex): + + _index_cls = UInt64Index + + @pytest.fixture + def dtype(self): + return np.uint64 + + @pytest.fixture( + params=["int64", "float64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor(self, dtype): + index_cls = self._index_cls + exact = True if index_cls is UInt64Index else "equiv" + + idx = index_cls([1, 2, 3]) + res = Index([1, 2, 3], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + idx = index_cls([1, 2**63]) + res = Index([1, 2**63], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + idx = index_cls([1, 2**63]) + res = Index([1, 2**63]) + tm.assert_index_equal(res, idx, exact=exact) + + idx = Index([-1, 2**63], dtype=object) + res = Index(np.array([-1, 2**63], dtype=object)) + tm.assert_index_equal(res, idx, exact=exact) + + # https://github.com/pandas-dev/pandas/issues/29526 + idx = index_cls([1, 2**63 + 1], dtype=dtype) + res = Index([1, 2**63 + 1], dtype=dtype) + tm.assert_index_equal(res, idx, exact=exact) + + def test_constructor_does_not_cast_to_float(self): + # https://github.com/numpy/numpy/issues/19146 + values = [0, np.iinfo(np.uint64).max] + + result = UInt64Index(values) + assert list(result) == values + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + +@pytest.mark.parametrize( + "box", + [list, lambda x: np.array(x, dtype=object), lambda x: Index(x, dtype=object)], +) +def test_uint_index_does_not_convert_to_float64(box): + # https://github.com/pandas-dev/pandas/issues/28279 + # https://github.com/pandas-dev/pandas/issues/28023 + series = Series( + [0, 1, 2, 3, 4, 5], + index=[ + 7606741985629028552, + 17876870360202815256, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411472, + ], + ) + + result = series.loc[box([7606741985629028552, 17876870360202815256])] + + expected = UInt64Index( + [7606741985629028552, 17876870360202815256, 17876870360202815256], + dtype="uint64", + ) + tm.assert_index_equal(result.index, expected) + + tm.assert_equal(result, series.iloc[:3]) + + +def test_float64_index_equals(): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.equals(string_index) + assert result is False + + result = string_index.equals(float_index) + assert result is False + + +def test_map_dtype_inference_unsigned_to_signed(): + # GH#44609 cases where we don't retain dtype + idx = UInt64Index([1, 2, 3]) + result = idx.map(lambda x: -x) + expected = Int64Index([-1, -2, -3]) + tm.assert_index_equal(result, expected) + + +def test_map_dtype_inference_overflows(): + # GH#44609 case where we have to upcast + idx = NumericIndex(np.array([1, 2, 3], dtype=np.int8)) + result = idx.map(lambda x: x * 1000) + # TODO: we could plausibly try to infer down to int16 here + expected = NumericIndex([1000, 2000, 3000], dtype=np.int64) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py new file mode 100644 index 00000000..9f2174c2 --- /dev/null +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -0,0 +1,166 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Index, + Int64Index, + RangeIndex, + UInt64Index, +) + + +@pytest.fixture +def index_large(): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return UInt64Index(large) + + +class TestSetOps: + @pytest.mark.parametrize("dtype", ["f8", "u8", "i8"]) + def test_union_non_numeric(self, dtype): + # corner case, non-numeric + index = Index(np.arange(5, dtype=dtype), dtype=dtype) + assert index.dtype == dtype + + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + def test_intersection(self): + index = Int64Index(range(5)) + + other = Index([1, 2, 3, 4, 5]) + result = index.intersection(other) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_int_float_union_dtype(self, dtype): + # https://github.com/pandas-dev/pandas/issues/26778 + # [u]int | float -> float + index = Index([0, 2, 3], dtype=dtype) + other = Float64Index([0.5, 1.5]) + expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_range_float_union_dtype(self): + # https://github.com/pandas-dev/pandas/issues/26778 + index = RangeIndex(start=0, stop=3) + other = Float64Index([0.5, 1.5]) + result = index.union(other) + expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0]) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_float64_index_difference(self): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.difference(string_index) + tm.assert_index_equal(result, float_index) + + result = string_index.difference(float_index) + tm.assert_index_equal(result, string_index) + + def test_intersection_uint64_outside_int64_range(self, index_large): + other = Index([2**63, 2**63 + 5, 2**63 + 10, 2**63 + 15, 2**63 + 20]) + result = index_large.intersection(other) + expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index_large) + expected = Index( + np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([4, 7, 6, 5, 3], name="index"), True), + (Index([4, 7, 6, 5, 3], name="other"), False), + ], + ) + def test_intersection_monotonic(self, index2, keeps_name, sort): + index1 = Index([5, 3, 2, 4, 1], name="index") + expected = Index([5, 3, 4]) + + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, sort): + # smoke + index1 = Index([5, 2, 3, 4], name="index1") + index2 = Index([2, 3, 4, 1]) + result = index1.symmetric_difference(index2, sort=sort) + expected = Index([5, 1]) + assert tm.equalContents(result, expected) + assert result.name is None + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # __xor__ syntax + with tm.assert_produces_warning(FutureWarning): + expected = index1 ^ index2 + assert tm.equalContents(result, expected) + assert result.name is None + + +class TestSetOpsSort: + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + # TODO(GH#25151): decide on True behaviour + # sort=True + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = Index([0, 1, 2]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/object/__init__.py b/pandas/tests/indexes/object/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py new file mode 100644 index 00000000..91e266e8 --- /dev/null +++ b/pandas/tests/indexes/object/test_astype.py @@ -0,0 +1,24 @@ +import pytest + +from pandas import ( + Index, + NaT, +) +import pandas._testing as tm + + +def test_astype_str_from_bytes(): + # https://github.com/pandas-dev/pandas/issues/38607 + idx = Index(["あ", b"a"], dtype="object") + result = idx.astype(str) + expected = Index(["あ", "a"], dtype="object") + tm.assert_index_equal(result, expected) + + +def test_astype_invalid_nas_to_tdt64_raises(): + # GH#45722 don't cast np.datetime64 NaTs to timedelta64 NaT + idx = Index([NaT.asm8] * 2, dtype=object) + + msg = r"Cannot cast Index to dtype timedelta64\[ns\]" + with pytest.raises(TypeError, match=msg): + idx.astype("m8[ns]") diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py new file mode 100644 index 00000000..924a3316 --- /dev/null +++ b/pandas/tests/indexes/object/test_indexing.py @@ -0,0 +1,203 @@ +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +class TestGetLoc: + def test_get_loc_raises_object_nearest(self): + index = Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc("a", method="nearest") + + def test_get_loc_raises_object_tolerance(self): + index = Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + index.get_loc("a", method="pad", tolerance="invalid") + + +class TestGetIndexer: + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), + ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), + ], + ) + def test_get_indexer_strings(self, method, expected): + index = Index(["b", "c"]) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self): + index = Index(["b", "c"]) + + msg = r"unsupported operand type\(s\) for -: 'str' and 'str'" + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + def test_get_indexer_with_NA_values( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH#22332 + # check pairwise, that no pair of na values + # is mangled + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values are not unique + arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) + index = Index(arr, dtype=object) + result = index.get_indexer( + [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"] + ) + expected = np.array([0, 1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_nas(self, nulls_fixture): + # even though this isn't non-unique, this should still work + index = Index(["a", "b", nulls_fixture]) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # actually non-unique + index = Index(["a", nulls_fixture, "b", nulls_fixture]) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # matching-but-not-identical nans + if is_matching_na(nulls_fixture, float("NaN")): + index = Index(["a", float("NaN"), "b", float("NaN")]) + match_but_not_identical = True + elif is_matching_na(nulls_fixture, Decimal("NaN")): + index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")]) + match_but_not_identical = True + else: + match_but_not_identical = False + + if match_but_not_identical: + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): + expected_missing = np.array([], dtype=np.intp) + # matching-but-not-identical nats + if is_matching_na(np_nat_fixture, np_nat_fixture2): + # ensure nats are different objects + index = Index( + np.array( + ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + # dt64nat vs td64nat + else: + try: + np_nat_fixture == np_nat_fixture2 + except (TypeError, OverflowError): + # Numpy will raise on uncomparable types, like + # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps') + # https://github.com/numpy/numpy/issues/22762 + return + index = Index( + np.array( + [ + "2021-10-02", + np_nat_fixture, + np_nat_fixture2, + np_nat_fixture, + np_nat_fixture2, + ], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + +class TestSliceLocs: + @pytest.mark.parametrize( + "in_slice,expected", + [ + # error: Slice index must be an integer or None + (pd.IndexSlice[::-1], "yxdcb"), + (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] + (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] + (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] + (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] + # absent labels + (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] + (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] + (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] + (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] + (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] + (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] + (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] + ], + ) + def test_slice_locs_negative_step(self, in_slice, expected): + index = Index(list("bcdxy")) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) + result = index[s_start : s_stop : in_slice.step] + expected = Index(list(expected)) + tm.assert_index_equal(result, expected) + + def test_slice_locs_dup(self): + index = Index(["a", "a", "b", "c", "d", "d"]) + assert index.slice_locs("a", "d") == (0, 6) + assert index.slice_locs(end="d") == (0, 6) + assert index.slice_locs("a", "c") == (0, 4) + assert index.slice_locs("b", "d") == (2, 6) + + index2 = index[::-1] + assert index2.slice_locs("d", "a") == (0, 6) + assert index2.slice_locs(end="a") == (0, 6) + assert index2.slice_locs("d", "b") == (0, 4) + assert index2.slice_locs("c", "a") == (2, 6) diff --git a/pandas/tests/indexes/period/__init__.py b/pandas/tests/indexes/period/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/period/methods/__init__.py b/pandas/tests/indexes/period/methods/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/period/methods/test_asfreq.py b/pandas/tests/indexes/period/methods/test_asfreq.py new file mode 100644 index 00000000..23b88fb6 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_asfreq.py @@ -0,0 +1,130 @@ +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndex: + def test_asfreq(self): + pi1 = period_range(freq="A", start="1/1/2001", end="1/1/2001") + pi2 = period_range(freq="Q", start="1/1/2001", end="1/1/2001") + pi3 = period_range(freq="M", start="1/1/2001", end="1/1/2001") + pi4 = period_range(freq="D", start="1/1/2001", end="1/1/2001") + pi5 = period_range(freq="H", start="1/1/2001", end="1/1/2001 00:00") + pi6 = period_range(freq="Min", start="1/1/2001", end="1/1/2001 00:00") + pi7 = period_range(freq="S", start="1/1/2001", end="1/1/2001 00:00:00") + + assert pi1.asfreq("Q", "S") == pi2 + assert pi1.asfreq("Q", "s") == pi2 + assert pi1.asfreq("M", "start") == pi3 + assert pi1.asfreq("D", "StarT") == pi4 + assert pi1.asfreq("H", "beGIN") == pi5 + assert pi1.asfreq("Min", "S") == pi6 + assert pi1.asfreq("S", "S") == pi7 + + assert pi2.asfreq("A", "S") == pi1 + assert pi2.asfreq("M", "S") == pi3 + assert pi2.asfreq("D", "S") == pi4 + assert pi2.asfreq("H", "S") == pi5 + assert pi2.asfreq("Min", "S") == pi6 + assert pi2.asfreq("S", "S") == pi7 + + assert pi3.asfreq("A", "S") == pi1 + assert pi3.asfreq("Q", "S") == pi2 + assert pi3.asfreq("D", "S") == pi4 + assert pi3.asfreq("H", "S") == pi5 + assert pi3.asfreq("Min", "S") == pi6 + assert pi3.asfreq("S", "S") == pi7 + + assert pi4.asfreq("A", "S") == pi1 + assert pi4.asfreq("Q", "S") == pi2 + assert pi4.asfreq("M", "S") == pi3 + assert pi4.asfreq("H", "S") == pi5 + assert pi4.asfreq("Min", "S") == pi6 + assert pi4.asfreq("S", "S") == pi7 + + assert pi5.asfreq("A", "S") == pi1 + assert pi5.asfreq("Q", "S") == pi2 + assert pi5.asfreq("M", "S") == pi3 + assert pi5.asfreq("D", "S") == pi4 + assert pi5.asfreq("Min", "S") == pi6 + assert pi5.asfreq("S", "S") == pi7 + + assert pi6.asfreq("A", "S") == pi1 + assert pi6.asfreq("Q", "S") == pi2 + assert pi6.asfreq("M", "S") == pi3 + assert pi6.asfreq("D", "S") == pi4 + assert pi6.asfreq("H", "S") == pi5 + assert pi6.asfreq("S", "S") == pi7 + + assert pi7.asfreq("A", "S") == pi1 + assert pi7.asfreq("Q", "S") == pi2 + assert pi7.asfreq("M", "S") == pi3 + assert pi7.asfreq("D", "S") == pi4 + assert pi7.asfreq("H", "S") == pi5 + assert pi7.asfreq("Min", "S") == pi6 + + msg = "How must be one of S or E" + with pytest.raises(ValueError, match=msg): + pi7.asfreq("T", "foo") + result1 = pi1.asfreq("3M") + result2 = pi1.asfreq("M") + expected = period_range(freq="M", start="2001-12", end="2001-12") + tm.assert_numpy_array_equal(result1.asi8, expected.asi8) + assert result1.freqstr == "3M" + tm.assert_numpy_array_equal(result2.asi8, expected.asi8) + assert result2.freqstr == "M" + + def test_asfreq_nat(self): + idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-04"], freq="M") + result = idx.asfreq(freq="Q") + expected = PeriodIndex(["2011Q1", "2011Q1", "NaT", "2011Q2"], freq="Q") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "3D"]) + def test_asfreq_mult_pi(self, freq): + pi = PeriodIndex(["2001-01", "2001-02", "NaT", "2001-03"], freq="2M") + + result = pi.asfreq(freq) + exp = PeriodIndex(["2001-02-28", "2001-03-31", "NaT", "2001-04-30"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = pi.asfreq(freq, how="S") + exp = PeriodIndex(["2001-01-01", "2001-02-01", "NaT", "2001-03-01"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_asfreq_combined_pi(self): + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="25H") + for freq, how in zip(["1D1H", "1H1D"], ["S", "E"]): + result = pi.asfreq(freq, how=how) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + for freq in ["1D1H", "1H1D"]: + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq) + result = pi.asfreq("H") + exp = PeriodIndex(["2001-01-02 00:00", "2001-01-03 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + pi = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq) + result = pi.asfreq("H", how="S") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_astype_asfreq(self): + pi1 = PeriodIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="D") + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + tm.assert_index_equal(pi1.asfreq("M"), exp) + tm.assert_index_equal(pi1.astype("period[M]"), exp) + + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="3M") + tm.assert_index_equal(pi1.asfreq("3M"), exp) + tm.assert_index_equal(pi1.astype("period[3M]"), exp) diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py new file mode 100644 index 00000000..fbc1d370 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_astype.py @@ -0,0 +1,178 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + NaT, + Period, + PeriodIndex, + Timedelta, + period_range, +) +import pandas._testing as tm +from pandas.core.indexes.api import ( + Int64Index, + UInt64Index, +) + + +class TestPeriodIndexAsType: + @pytest.mark.parametrize("dtype", [float, "timedelta64", "timedelta64[ns]"]) + def test_astype_raises(self, dtype): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") + msg = "Cannot cast PeriodIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_conversion(self): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D", name="idx") + + result = idx.astype(object) + expected = Index( + [Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3, + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(np.int64) + expected = Int64Index( + [16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index([str(x) for x in idx], name="idx") + tm.assert_index_equal(result, expected) + + idx = period_range("1990", "2009", freq="A", name="idx") + result = idx.astype("i8") + tm.assert_index_equal(result, Index(idx.asi8, name="idx")) + tm.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_uint(self): + arr = period_range("2000", periods=2, name="idx") + expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") + tm.assert_index_equal(arr.astype("uint64"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) + + def test_astype_object(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + # TODO: de-duplicate this version (from test_ops) with the one above + # (from test_period) + def test_astype_object2(self): + idx = period_range(start="2013-01-01", periods=4, freq="M", name="idx") + expected_list = [ + Period("2013-01-31", freq="M"), + Period("2013-02-28", freq="M"), + Period("2013-03-31", freq="M"), + Period("2013-04-30", freq="M"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert idx.tolist() == expected_list + + idx = PeriodIndex( + ["2013-01-01", "2013-01-02", "NaT", "2013-01-04"], freq="D", name="idx" + ) + expected_list = [ + Period("2013-01-01", freq="D"), + Period("2013-01-02", freq="D"), + Period("NaT", freq="D"), + Period("2013-01-04", freq="D"), + ] + expected = Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + for i in [0, 1, 3]: + assert result[i] == expected[i] + assert result[2] is NaT + assert result.name == expected.name + + result_list = idx.tolist() + for i in [0, 1, 3]: + assert result_list[i] == expected_list[i] + assert result_list[2] is NaT + + def test_astype_category(self): + obj = period_range("2000", periods=2, name="idx") + result = obj.astype("category") + expected = CategoricalIndex( + [Period("2000-01-01", freq="D"), Period("2000-01-02", freq="D")], name="idx" + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = period_range("2000", periods=2, name="idx") + result = obj.astype(bool) + expected = Index(np.array([True, True]), name="idx") + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_period_astype_to_timestamp(self): + pi = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + + exp = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="MS") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns]", how="start") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"]) + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns]", how="end") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], tz="US/Eastern") + res = pi.astype("datetime64[ns, US/Eastern]") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq + + exp = DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"], tz="US/Eastern") + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + with tm.assert_produces_warning(FutureWarning): + # how keyword deprecated GH#37982 + res = pi.astype("datetime64[ns, US/Eastern]", how="end") + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq diff --git a/pandas/tests/indexes/period/methods/test_factorize.py b/pandas/tests/indexes/period/methods/test_factorize.py new file mode 100644 index 00000000..9e297d6c --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_factorize.py @@ -0,0 +1,52 @@ +import numpy as np + +from pandas import ( + PeriodIndex, + factorize, +) +import pandas._testing as tm + + +class TestFactorize: + def test_factorize(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M") + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + def test_factorize_complex(self): + # GH 17927 + array = [1, 2, 2 + 1j] + labels, uniques = factorize(array) + + expected_labels = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(labels, expected_labels) + + # Should return a complex dtype in the future + expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object) + tm.assert_numpy_array_equal(uniques, expected_uniques) diff --git a/pandas/tests/indexes/period/methods/test_fillna.py b/pandas/tests/indexes/period/methods/test_fillna.py new file mode 100644 index 00000000..12a07bac --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_fillna.py @@ -0,0 +1,41 @@ +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, +) +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_period(self): + # GH#11343 + idx = PeriodIndex(["2011-01-01 09:00", NaT, "2011-01-01 11:00"], freq="H") + + exp = PeriodIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H" + ) + result = idx.fillna(Period("2011-01-01 10:00", freq="H")) + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + "x", + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna("x") + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + Period("2011-01-01", freq="D"), + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna(Period("2011-01-01", freq="D")) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/period/methods/test_insert.py b/pandas/tests/indexes/period/methods/test_insert.py new file mode 100644 index 00000000..32bbe09d --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_insert.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestInsert: + @pytest.mark.parametrize("na", [np.nan, NaT, None]) + def test_insert(self, na): + # GH#18295 (test missing) + expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") + result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/methods/test_is_full.py b/pandas/tests/indexes/period/methods/test_is_full.py new file mode 100644 index 00000000..490f199a --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_is_full.py @@ -0,0 +1,23 @@ +import pytest + +from pandas import PeriodIndex + + +def test_is_full(): + index = PeriodIndex([2005, 2007, 2009], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2006, 2007], freq="A") + assert index.is_full + + index = PeriodIndex([2005, 2005, 2007], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2005, 2006], freq="A") + assert index.is_full + + index = PeriodIndex([2006, 2005, 2005], freq="A") + with pytest.raises(ValueError, match="Index is not monotonic"): + index.is_full + + assert index[:0].is_full diff --git a/pandas/tests/indexes/period/methods/test_repeat.py b/pandas/tests/indexes/period/methods/test_repeat.py new file mode 100644 index 00000000..fc344b06 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_repeat.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestRepeat: + @pytest.mark.parametrize("use_numpy", [True, False]) + @pytest.mark.parametrize( + "index", + [ + period_range("2000-01-01", periods=3, freq="D"), + period_range("2001-01-01", periods=3, freq="2D"), + PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), + ], + ) + def test_repeat_freqstr(self, index, use_numpy): + # GH#10183 + expected = PeriodIndex([per for per in index for _ in range(3)]) + result = np.repeat(index, 3) if use_numpy else index.repeat(3) + tm.assert_index_equal(result, expected) + assert result.freqstr == index.freqstr diff --git a/pandas/tests/indexes/period/methods/test_shift.py b/pandas/tests/indexes/period/methods/test_shift.py new file mode 100644 index 00000000..730172ca --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_shift.py @@ -0,0 +1,122 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndexShift: + # --------------------------------------------------------------- + # PeriodIndex.shift is used by __add__ and __sub__ + + def test_pi_shift_ndarray(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex( + ["2011-02", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2002", end="12/1/2010") + + tm.assert_index_equal(pi1.shift(0), pi1) + + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2000", end="12/1/2008") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="2/1/2001", end="1/1/2010") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="12/1/2000", end="11/1/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="1/2/2001", end="12/2/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="12/31/2000", end="11/30/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_corner_cases(self): + # GH#9903 + idx = PeriodIndex([], name="xxx", freq="H") + + msg = "`freq` argument is not supported for PeriodArray._time_shift" + with pytest.raises(TypeError, match=msg): + # period shift doesn't accept freq + idx.shift(1, freq="H") + + tm.assert_index_equal(idx.shift(0), idx) + tm.assert_index_equal(idx.shift(3), idx) + + idx = PeriodIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(0), idx) + exp = PeriodIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(3), exp) + exp = PeriodIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(-3), exp) + + def test_shift_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(1) + expected = PeriodIndex( + ["2011-02", "2011-03", "NaT", "2011-05"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_shift_gh8083(self): + # test shift for PeriodIndex + # GH#8083 + drange = period_range("20130101", periods=5, freq="D") + result = drange.shift(1) + expected = PeriodIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_periods(self): + # GH #22458 : argument 'n' was deprecated in favor of 'periods' + idx = period_range(freq="A", start="1/1/2001", end="12/1/2009") + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py new file mode 100644 index 00000000..164ed3ec --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -0,0 +1,132 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Timedelta, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToTimestamp: + def test_to_timestamp_non_contiguous(self): + # GH#44100 + dti = date_range("2021-10-18", periods=9, freq="B") + pi = dti.to_period() + + result = pi[::2].to_timestamp() + expected = dti[::2] + tm.assert_index_equal(result, expected) + + result = pi._data[::2].to_timestamp() + expected = dti._data[::2] + # TODO: can we get the freq to round-trip? + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + result = pi[::-1].to_timestamp() + expected = dti[::-1] + tm.assert_index_equal(result, expected) + + result = pi._data[::-1].to_timestamp() + expected = dti._data[::-1] + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + result = pi[::2][::-1].to_timestamp() + expected = dti[::2][::-1] + tm.assert_index_equal(result, expected) + + result = pi._data[::2][::-1].to_timestamp() + expected = dti._data[::2][::-1] + tm.assert_datetime_array_equal(result, expected, check_freq=False) + + def test_to_timestamp_freq(self): + idx = period_range("2017", periods=12, freq="A-DEC") + result = idx.to_timestamp() + expected = date_range("2017", periods=12, freq="AS-JAN") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_nat(self): + # GH#7228 + index = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + + result = index.to_timestamp("D") + expected = DatetimeIndex( + [NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == "idx" + + result2 = result.to_period(freq="M") + tm.assert_index_equal(result2, index) + assert result2.name == "idx" + + result3 = result.to_period(freq="3M") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(result3, exp) + assert result3.freqstr == "3M" + + msg = "Frequency must be positive, because it represents span: -2A" + with pytest.raises(ValueError, match=msg): + result.to_period(freq="-2A") + + def test_to_timestamp_preserve_name(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009", name="foo") + assert index.name == "foo" + + conv = index.to_timestamp("D") + assert conv.name == "foo" + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(list(range(1, 5)), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp("D", "end") + expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex]) + tm.assert_index_equal(stamps, expected) + assert stamps.freq == expected.freq + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="2M", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01", "NaT", "2011-02-01"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex(["2011-02-28", "NaT", "2011-03-31"], name="idx") + expected = expected + Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = period_range(start="2011", periods=2, freq="1D1H", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01 00:00", "2011-01-02 01:00"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex( + ["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx" + ) + expected = expected + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E", freq="H") + expected = DatetimeIndex(["2011-01-02 00:00", "2011-01-03 01:00"], name="idx") + expected = expected + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_1703(self): + index = period_range("1/1/2012", periods=4, freq="D") + + result = index.to_timestamp() + assert result[0] == Timestamp("1/1/2012") diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py new file mode 100644 index 00000000..5dff5c2a --- /dev/null +++ b/pandas/tests/indexes/period/test_constructors.py @@ -0,0 +1,546 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas.core.dtypes.dtypes import PeriodDtype + +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, + Series, + date_range, + offsets, + period_range, +) +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + + +class TestPeriodIndex: + def test_construction_base_constructor(self): + # GH 13664 + arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")] + tm.assert_index_equal(Index(arr), PeriodIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr))) + + arr = [np.nan, NaT, Period("2011-03", freq="M")] + tm.assert_index_equal(Index(arr), PeriodIndex(arr)) + tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr))) + + arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="D")] + tm.assert_index_equal(Index(arr), Index(arr, dtype=object)) + + tm.assert_index_equal(Index(np.array(arr)), Index(np.array(arr), dtype=object)) + + def test_base_constructor_with_period_dtype(self): + dtype = PeriodDtype("D") + values = ["2011-01-01", "2012-03-04", "2014-05-01"] + result = Index(values, dtype=dtype) + + expected = PeriodIndex(values, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "values_constructor", [list, np.array, PeriodIndex, PeriodArray._from_sequence] + ) + def test_index_object_dtype(self, values_constructor): + # Index(periods, dtype=object) is an Index (not an PeriodIndex) + periods = [ + Period("2011-01", freq="M"), + NaT, + Period("2011-03", freq="M"), + ] + values = values_constructor(periods) + result = Index(values, dtype=object) + + assert type(result) is Index + tm.assert_numpy_array_equal(result.values, np.array(values)) + + def test_constructor_use_start_freq(self): + # GH #1118 + p = Period("4/2/2012", freq="B") + expected = period_range(start="4/2/2012", periods=10, freq="B") + + index = period_range(start=p, periods=10) + tm.assert_index_equal(index, expected) + + def test_constructor_field_arrays(self): + # GH #1264 + + years = np.arange(1990, 2010).repeat(4)[2:-2] + quarters = np.tile(np.arange(1, 5), 20)[2:-2] + + index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC") + expected = period_range("1990Q3", "2009Q2", freq="Q-DEC") + tm.assert_index_equal(index, expected) + + index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC") + tm.assert_numpy_array_equal(index.asi8, index2.asi8) + + index = PeriodIndex(year=years, quarter=quarters) + tm.assert_index_equal(index, expected) + + years = [2007, 2007, 2007] + months = [1, 2] + + msg = "Mismatched Period array lengths" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="M") + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="2M") + + years = [2007, 2007, 2007] + months = [1, 2, 3] + idx = PeriodIndex(year=years, month=months, freq="M") + exp = period_range("2007-01", periods=3, freq="M") + tm.assert_index_equal(idx, exp) + + def test_constructor_U(self): + # U was used as undefined period + with pytest.raises(ValueError, match="Invalid frequency: X"): + period_range("2007-1-1", periods=500, freq="X") + + def test_constructor_nano(self): + idx = period_range( + start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N" + ) + exp = PeriodIndex( + [ + Period(ordinal=1, freq="N"), + Period(ordinal=2, freq="N"), + Period(ordinal=3, freq="N"), + Period(ordinal=4, freq="N"), + ], + freq="N", + ) + tm.assert_index_equal(idx, exp) + + def test_constructor_arrays_negative_year(self): + years = np.arange(1960, 2000, dtype=np.int64).repeat(4) + quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + tm.assert_index_equal(pindex.year, Index(years)) + tm.assert_index_equal(pindex.quarter, Index(quarters)) + + def test_constructor_invalid_quarters(self): + msg = "Quarter must be 1 <= q <= 4" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC") + + def test_constructor_corner(self): + result = period_range("2007-01", periods=10.5, freq="M") + exp = period_range("2007-01", periods=10, freq="M") + tm.assert_index_equal(result, exp) + + def test_constructor_fromarraylike(self): + idx = period_range("2007-01", periods=20, freq="M") + + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + msg = "freq not specified and cannot be inferred" + with pytest.raises(ValueError, match=msg): + PeriodIndex(idx.asi8) + with pytest.raises(ValueError, match=msg): + PeriodIndex(list(idx.asi8)) + + msg = "'Period' object is not iterable" + with pytest.raises(TypeError, match=msg): + PeriodIndex(data=Period("2007", freq="A")) + + result = PeriodIndex(iter(idx)) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq="M") + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq=offsets.MonthEnd()) + tm.assert_index_equal(result, idx) + assert result.freq == "M" + + result = PeriodIndex(idx, freq="2M") + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq="D") + exp = idx.asfreq("D", "e") + tm.assert_index_equal(result, exp) + + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype("M8[us]")) + + pi = PeriodIndex(vals, freq="D") + + expected = PeriodIndex(vals.astype("M8[ns]"), freq="D") + tm.assert_index_equal(pi, expected) + + @pytest.mark.parametrize("box", [None, "series", "index"]) + def test_constructor_datetime64arr_ok(self, box): + # https://github.com/pandas-dev/pandas/issues/23438 + data = date_range("2017", periods=4, freq="M") + if box is None: + data = data._values + elif box == "series": + data = Series(data) + + result = PeriodIndex(data, freq="D") + expected = PeriodIndex( + ["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D" + ) + tm.assert_index_equal(result, expected) + + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-03"], freq="M") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[M]" + + idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]") + exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[3D]" + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D") + + res = PeriodIndex(idx, dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-01"], freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + res = PeriodIndex(idx, freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + msg = "specified freq and dtype are different" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(["2011-01"], freq="M", dtype="period[D]") + + def test_constructor_empty(self): + idx = PeriodIndex([], freq="M") + assert isinstance(idx, PeriodIndex) + assert len(idx) == 0 + assert idx.freq == "M" + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex( + [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")]) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array( + [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")] + ) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([NaT, NaT, "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex([NaT, NaT]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array([NaT, NaT])) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(["NaT", "NaT"]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array(["NaT", "NaT"])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")]) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")] + ) + ) + + # first element is NaT + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex([NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")] + ) + ) + + def test_constructor_mixed(self): + idx = PeriodIndex(["2011-01", NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(["NaT", NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period("2011-01-01", freq="D"), NaT, "2012-01-01"]) + exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D") + tm.assert_index_equal(idx, exp) + + def test_constructor_simple_new(self): + idx = period_range("2007-01", name="p", periods=2, freq="M") + + with pytest.raises(AssertionError, match=""): + idx._simple_new(idx, name="p") + + result = idx._simple_new(idx._data, name="p") + tm.assert_index_equal(result, idx) + + msg = "Should be numpy array of type i8" + with pytest.raises(AssertionError, match=msg): + # Need ndarray, not Int64Index + type(idx._data)._simple_new(Index(idx.asi8), freq=idx.freq) + + arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq) + result = idx._simple_new(arr, name="p") + tm.assert_index_equal(result, idx) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq="M", name="p") + with pytest.raises(AssertionError, match=""): + idx._simple_new(idx, name="p") + + result = idx._simple_new(idx._data, name="p") + tm.assert_index_equal(result, idx) + + @pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + with pytest.raises(AssertionError, match="= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestGetValue: + def test_get_value(self): + # GH 17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx0 = PeriodIndex([p0, p1, p2]) + input0 = Series(np.array([1, 2, 3]), index=idx0) + expected0 = 2 + + with tm.assert_produces_warning(FutureWarning): + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = PeriodIndex([p1, p1, p2]) + input1 = Series(np.array([1, 2, 3]), index=idx1) + expected1 = input1.iloc[[0, 1]] + + with tm.assert_produces_warning(FutureWarning): + result1 = idx1.get_value(input1, p1) + tm.assert_series_equal(result1, expected1) + + idx2 = PeriodIndex([p1, p2, p1]) + input2 = Series(np.array([1, 2, 3]), index=idx2) + expected2 = input2.iloc[[0, 2]] + + with tm.assert_produces_warning(FutureWarning): + result2 = idx2.get_value(input2, p1) + tm.assert_series_equal(result2, expected2) + + @pytest.mark.parametrize("freq", ["H", "D"]) + def test_get_value_datetime_hourly(self, freq): + # get_loc and get_value should treat datetime objects symmetrically + dti = date_range("2016-01-01", periods=3, freq="MS") + pi = dti.to_period(freq) + ser = Series(range(7, 10), index=pi) + + ts = dti[0] + + assert pi.get_loc(ts) == 0 + with tm.assert_produces_warning(FutureWarning): + assert pi.get_value(ser, ts) == 7 + assert ser[ts] == 7 + assert ser.loc[ts] == 7 + + ts2 = ts + Timedelta(hours=3) + if freq == "H": + with pytest.raises(KeyError, match="2016-01-01 03:00"): + pi.get_loc(ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + with tm.assert_produces_warning(FutureWarning): + pi.get_value(ser, ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser[ts2] + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser.loc[ts2] + else: + assert pi.get_loc(ts2) == 0 + with tm.assert_produces_warning(FutureWarning): + assert pi.get_value(ser, ts2) == 7 + assert ser[ts2] == 7 + assert ser.loc[ts2] == 7 + + def test_get_value_integer(self): + msg = "index 16801 is out of bounds for axis 0 with size 3" + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + ser = Series(range(3), index=pi) + with pytest.raises(IndexError, match=msg): + with tm.assert_produces_warning(FutureWarning): + pi.get_value(ser, 16801) + + msg = "index 46 is out of bounds for axis 0 with size 3" + pi2 = dti.to_period("Y") # duplicates, ordinals are all 46 + ser2 = Series(range(3), index=pi2) + with pytest.raises(IndexError, match=msg): + with tm.assert_produces_warning(FutureWarning): + pi2.get_value(ser2, 46) + + +class TestContains: + def test_contains(self): + # GH 17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + p3 = Period("2017-09-04") + + ps0 = [p0, p1, p2] + idx0 = PeriodIndex(ps0) + ser = Series(range(6, 9), index=idx0) + + for p in ps0: + assert p in idx0 + assert str(p) in idx0 + + # GH#31172 + # Higher-resolution period-like are _not_ considered as contained + key = "2017-09-01 00:00:01" + assert key not in idx0 + with pytest.raises(KeyError, match=key): + idx0.get_loc(key) + with pytest.raises(KeyError, match=key): + with tm.assert_produces_warning(FutureWarning): + idx0.get_value(ser, key) + + assert "2017-09" in idx0 + + assert p3 not in idx0 + + def test_contains_freq_mismatch(self): + rng = period_range("2007-01", freq="M", periods=10) + + assert Period("2007-01", freq="M") in rng + assert not Period("2007-01", freq="D") in rng + assert not Period("2007-01", freq="2M") in rng + + def test_contains_nat(self): + # see gh-13582 + idx = period_range("2007-01", freq="M", periods=10) + assert NaT not in idx + assert None not in idx + assert float("nan") not in idx + assert np.nan not in idx + + idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") + assert NaT in idx + assert None in idx + assert float("nan") in idx + assert np.nan in idx + + +class TestAsOfLocs: + def test_asof_locs_mismatched_type(self): + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + pi2 = dti.to_period("H") + + mask = np.array([0, 1, 0], dtype=bool) + + msg = "must be DatetimeIndex or PeriodIndex" + with pytest.raises(TypeError, match=msg): + pi.asof_locs(Int64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + pi.asof_locs(Float64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + # TimedeltaIndex + pi.asof_locs(dti - dti, mask) + + msg = "Input has different freq=H" + with pytest.raises(libperiod.IncompatibleFrequency, match=msg): + pi.asof_locs(pi2, mask) diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py new file mode 100644 index 00000000..27cba867 --- /dev/null +++ b/pandas/tests/indexes/period/test_join.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + Index, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestJoin: + def test_join_outer_indexer(self): + pi = period_range("1/1/2000", "1/20/2000", freq="D") + + result = pi._outer_indexer(pi) + tm.assert_extension_array_equal(result[0], pi._values) + tm.assert_numpy_array_equal(result[1], np.arange(len(pi), dtype=np.intp)) + tm.assert_numpy_array_equal(result[2], np.arange(len(pi), dtype=np.intp)) + + def test_joins(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + joined = index.join(index[:-5], how=join_type) + + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq + + def test_join_self(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + res = index.join(index, how=join_type) + assert index is res + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, + 2, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + ser = df.iloc[:2, 0] + + res = ser.index.join(df.columns, how="outer") + expected = Index( + [ser.index[0], ser.index[1], df.columns[0], df.columns[1]], object + ) + tm.assert_index_equal(res, expected) + + def test_join_mismatched_freq_raises(self): + index = period_range("1/1/2000", "1/20/2000", freq="D") + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + msg = r".*Input has different freq=2D from Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + index.join(index3) diff --git a/pandas/tests/indexes/period/test_monotonic.py b/pandas/tests/indexes/period/test_monotonic.py new file mode 100644 index 00000000..15cb8f71 --- /dev/null +++ b/pandas/tests/indexes/period/test_monotonic.py @@ -0,0 +1,42 @@ +from pandas import ( + Period, + PeriodIndex, +) + + +def test_is_monotonic_increasing(): + # GH#17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx_inc0 = PeriodIndex([p0, p1, p2]) + idx_inc1 = PeriodIndex([p0, p1, p1]) + idx_dec0 = PeriodIndex([p2, p1, p0]) + idx_dec1 = PeriodIndex([p2, p1, p1]) + idx = PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing is True + assert idx_inc1.is_monotonic_increasing is True + assert idx_dec0.is_monotonic_increasing is False + assert idx_dec1.is_monotonic_increasing is False + assert idx.is_monotonic_increasing is False + + +def test_is_monotonic_decreasing(): + # GH#17717 + p0 = Period("2017-09-01") + p1 = Period("2017-09-02") + p2 = Period("2017-09-03") + + idx_inc0 = PeriodIndex([p0, p1, p2]) + idx_inc1 = PeriodIndex([p0, p1, p1]) + idx_dec0 = PeriodIndex([p2, p1, p0]) + idx_dec1 = PeriodIndex([p2, p1, p1]) + idx = PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_decreasing is False + assert idx_inc1.is_monotonic_decreasing is False + assert idx_dec0.is_monotonic_decreasing is True + assert idx_dec1.is_monotonic_decreasing is True + assert idx.is_monotonic_decreasing is False diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py new file mode 100644 index 00000000..2cf1cf0f --- /dev/null +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -0,0 +1,205 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndex: + def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + original = ts.copy() + + result = ts["2007"] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + if using_copy_on_write: + tm.assert_series_equal(ts, original) + else: + assert (ts[1:3] == 1).all() + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[idx == "2007"] + tm.assert_series_equal(result, expected) + + def test_getitem_periodindex_quarter_string(self): + pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") + ser = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + assert ser["05Q4"] == ser[2] + + def test_pindex_slice_index(self): + pi = period_range(start="1/1/10", end="12/31/12", freq="M") + s = Series(np.random.rand(len(pi)), index=pi) + res = s["2010"] + exp = s[0:12] + tm.assert_series_equal(res, exp) + res = s["2011"] + exp = s[12:24] + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_day(self, make_range): + # GH#6716 + idx = make_range(start="2013/01/01", freq="D", periods=400) + + msg = "slice indices must be integers or None or have an __index__ method" + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError, match=msg): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/02":], s[1:]) + tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5]) + tm.assert_series_equal(s["2013/02":], s[31:]) + tm.assert_series_equal(s["2014":], s[365:]) + + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for v in invalid: + with pytest.raises(TypeError, match=msg): + idx[v:] + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_seconds(self, make_range): + # GH#6716 + idx = make_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + msg = "slice indices must be integers or None or have an __index__ method" + + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError, match=msg): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660]) + tm.assert_series_equal(s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]) + tm.assert_series_equal(s["2013/01/01 10H":], s[3600:]) + tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860]) + for d in ["2013/01/01", "2013/01", "2013"]: + tm.assert_series_equal(s[d:], s) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_range_slice_outofbounds(self, make_range): + # GH#5407 + idx = make_range(start="2013/10/01", freq="D", periods=10) + + df = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) + empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) + empty["units"] = empty["units"].astype("int64") + + tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty) + tm.assert_frame_equal(df["2013-06":"2013-09"], empty) + tm.assert_frame_equal(df["2013-11":"2013-12"], empty) + + @pytest.mark.parametrize("make_range", [date_range, period_range]) + def test_maybe_cast_slice_bound(self, make_range, frame_or_series): + idx = make_range(start="2013/10/01", freq="D", periods=10) + + obj = DataFrame({"units": [100 + i for i in range(10)]}, index=idx) + obj = tm.get_obj(obj, frame_or_series) + + msg = ( + f"cannot do slice indexing on {type(idx).__name__} with " + r"these indexers \[foo\] of type str" + ) + + # Check the lower-level calls are raising where expected. + with pytest.raises(TypeError, match=msg): + idx._maybe_cast_slice_bound("foo", "left") + with pytest.raises(TypeError, match=msg): + idx.get_slice_bound("foo", "left") + + with pytest.raises(TypeError, match=msg): + obj["2013/09/30":"foo"] + with pytest.raises(TypeError, match=msg): + obj["foo":"2013/09/30"] + with pytest.raises(TypeError, match=msg): + obj.loc["2013/09/30":"foo"] + with pytest.raises(TypeError, match=msg): + obj.loc["foo":"2013/09/30"] + + def test_partial_slice_doesnt_require_monotonicity(self): + # See also: DatetimeIndex test ofm the same name + dti = date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + + ser_montonic = Series(np.arange(30), index=pi) + + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_montonic[shuffler] + nidx = ser.index + + # Manually identified locations of year==2014 + indexer_2014 = np.array( + [0, 1, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20], dtype=np.intp + ) + assert (nidx[indexer_2014].year == 2014).all() + assert not (nidx[~indexer_2014].year == 2014).any() + + result = nidx.get_loc("2014") + tm.assert_numpy_array_equal(result, indexer_2014) + + expected = ser[indexer_2014] + + with tm.assert_produces_warning(FutureWarning): + result = nidx.get_value(ser, "2014") + tm.assert_series_equal(result, expected) + + result = ser.loc["2014"] + tm.assert_series_equal(result, expected) + + result = ser["2014"] + tm.assert_series_equal(result, expected) + + # Manually identified locations where ser.index is within Mat 2015 + indexer_may2015 = np.array([23], dtype=np.intp) + assert nidx[23].year == 2015 and nidx[23].month == 5 + + result = nidx.get_loc("May 2015") + tm.assert_numpy_array_equal(result, indexer_may2015) + + expected = ser[indexer_may2015] + + with tm.assert_produces_warning(FutureWarning): + result = nidx.get_value(ser, "May 2015") + tm.assert_series_equal(result, expected) + + result = ser.loc["May 2015"] + tm.assert_series_equal(result, expected) + + result = ser["May 2015"] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py new file mode 100644 index 00000000..e5c85edf --- /dev/null +++ b/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,353 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, + Series, + date_range, + offsets, + period_range, +) +import pandas._testing as tm +from pandas.tests.indexes.datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike): + _index_cls = PeriodIndex + + @pytest.fixture + def simple_index(self) -> Index: + return period_range("20130101", periods=5, freq="D") + + @pytest.fixture( + params=[ + tm.makePeriodIndex(10), + period_range("20130101", periods=10, freq="D")[::-1], + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_where(self): + # This is handled in test_indexing + pass + + def test_make_time_series(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + series = Series(1, index=index) + assert isinstance(series, Series) + + def test_view_asi8(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_values(self): + idx = PeriodIndex([], freq="M") + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01", NaT], freq="M") + + exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = PeriodIndex(["2011-01-01", NaT], freq="D") + + exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_period_index_length(self): + pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") + assert len(pi) == 9 + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") + assert len(pi) == 4 * 9 + + pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") + assert len(pi) == 12 * 9 + + start = Period("02-Apr-2005", "B") + i1 = period_range(start=start, periods=20) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv + + end_intv = Period("2006-12-31", "1w") + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + msg = "start and end must have same freq" + with pytest.raises(ValueError, match=msg): + period_range(start=start, end=end_intv) + + end_intv = Period("2005-05-01", "B") + i1 = period_range(start=start, end=end_intv) + + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start=start) + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) + assert len(i2) == 2 + assert i2[0] == end_intv + + i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) + assert len(i2) == 2 + assert i2[0] == end_intv + + # Mixed freq should fail + vals = [end_intv, Period("2006-12-31", "w")] + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) + vals = np.array(vals) + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals) + + def test_fields(self): + # year, month, day, hour, minute + # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter + # qyear + pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") + self._check_all_fields(pi) + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") + self._check_all_fields(pi) + + pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") + self._check_all_fields(pi) + + pi = period_range( + freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00" + ) + self._check_all_fields(pi) + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + self._check_all_fields(i1) + + def _check_all_fields(self, periodindex): + fields = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "dayofweek", + "day_of_week", + "dayofyear", + "day_of_year", + "quarter", + "qyear", + "days_in_month", + ] + + periods = list(periodindex) + ser = Series(periodindex) + + for field in fields: + field_idx = getattr(periodindex, field) + assert len(periodindex) == len(field_idx) + for x, val in zip(periods, field_idx): + assert getattr(x, field) == val + + if len(ser) == 0: + continue + + field_s = getattr(ser.dt, field) + assert len(periodindex) == len(field_s) + for x, val in zip(periods, field_s): + assert getattr(x, field) == val + + def test_is_(self): + create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009") + index = create_index() + assert index.is_(index) + assert not index.is_(create_index()) + assert index.is_(index.view()) + assert index.is_(index.view().view().view().view().view()) + assert index.view().is_(index) + ind2 = index.view() + index.name = "Apple" + assert ind2.is_(index) + assert not index.is_(index[:]) + assert not index.is_(index.asfreq("M")) + assert not index.is_(index.asfreq("A")) + + assert not index.is_(index - 2) + assert not index.is_(index - 0) + + def test_index_unique(self): + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 + + def test_shift(self): + # This is tested in test_arithmetic + pass + + def test_negative_ordinals(self): + Period(ordinal=-1000, freq="A") + Period(ordinal=0, freq="A") + + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") + tm.assert_index_equal(idx1, idx2) + + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name" + ) + + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") + tm.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") + tm.assert_index_equal(idx.month, exp) + + def test_pindex_multiples(self): + expected = PeriodIndex( + ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], + freq="2M", + ) + + pi = period_range(start="1/1/11", end="12/31/11", freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + pi = period_range(start="1/1/11", periods=6, freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + def test_iteration(self): + index = period_range(start="1/1/10", periods=4, freq="B") + + result = list(index) + assert isinstance(result[0], Period) + assert result[0].freq == index.freq + + def test_with_multi_index(self): + # #1705 + index = date_range("1/1/2012", periods=4, freq="12H") + index_as_arrays = [index.to_period(freq="D"), index.hour] + + s = Series([0, 1, 2, 3], index_as_arrays) + + assert isinstance(s.index.levels[0], PeriodIndex) + + assert isinstance(s.index.values[0][0], Period) + + def test_map(self): + # test_map_dictlike generally tests + + index = PeriodIndex([2005, 2007, 2009], freq="A") + result = index.map(lambda x: x.ordinal) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls([], freq="A") + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + +def test_maybe_convert_timedelta(): + pi = PeriodIndex(["2000", "2001"], freq="D") + offset = offsets.Day(2) + assert pi._maybe_convert_timedelta(offset) == 2 + assert pi._maybe_convert_timedelta(2) == 2 + + offset = offsets.BusinessDay() + msg = r"Input has different freq=B from PeriodIndex\(freq=D\)" + with pytest.raises(ValueError, match=msg): + pi._maybe_convert_timedelta(offset) + + +@pytest.mark.parametrize("array", [True, False]) +def test_dunder_array(array): + obj = PeriodIndex(["2000-01-01", "2001-01-01"], freq="D") + if array: + obj = obj._data + + expected = np.array([obj[0], obj[1]], dtype=object) + result = np.array(obj) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj) + tm.assert_numpy_array_equal(result, expected) + + expected = obj.asi8 + for dtype in ["i8", "int64", np.int64]: + result = np.array(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + for dtype in ["float64", "int32", "uint64"]: + msg = "argument must be" + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=dtype) + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=getattr(np, dtype)) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 00000000..c94ddf57 --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,121 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + Period, + PeriodIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodRange: + def test_required_arguments(self): + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range("2011-1-1", "2012-1-1", "B") + + @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range( + start="2017-01-01", periods=5, freq=freq, name="foo" + ).to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name="foo") + + result = period_range(start=start, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + expected = date_range( + start="2017-03-31", end="2018-03-31", freq="M", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period("2017-1", freq="M"), Period("2019-12", freq="M") + expected = date_range( + start="2017-01-31", end="2019-12-31", freq="Q", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="Q", name="foo") + tm.assert_index_equal(result, expected) + + # test for issue # 21793 + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + idx = period_range(start=start, end=end, freq="Q", name="foo") + result = idx == idx.values + expected = np.array([True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty + expected = PeriodIndex([], freq="W", name="foo") + + result = period_range(start=start, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(end="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(periods=5) + + with pytest.raises(ValueError, match=msg): + period_range() + + # too many params + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end="2018Q1", periods=8, freq="Q") + + # start/end NaT + msg = "start and end must not be NaT" + with pytest.raises(ValueError, match=msg): + period_range(start=NaT, end="2018Q1") + + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end=NaT) + + # invalid periods param + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + period_range(start="2017Q1", periods="foo") diff --git a/pandas/tests/indexes/period/test_pickle.py b/pandas/tests/indexes/period/test_pickle.py new file mode 100644 index 00000000..82f906d1 --- /dev/null +++ b/pandas/tests/indexes/period/test_pickle.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + PeriodIndex, + period_range, +) +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestPickle: + @pytest.mark.parametrize("freq", ["D", "M", "A"]) + def test_pickle_round_trip(self, freq): + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) + + def test_pickle_freq(self): + # GH#2891 + prng = period_range("1/1/2011", "1/1/2012", freq="M") + new_prng = tm.round_trip_pickle(prng) + assert new_prng.freq == offsets.MonthEnd() + assert new_prng.freqstr == "M" diff --git a/pandas/tests/indexes/period/test_resolution.py b/pandas/tests/indexes/period/test_resolution.py new file mode 100644 index 00000000..7ecbde75 --- /dev/null +++ b/pandas/tests/indexes/period/test_resolution.py @@ -0,0 +1,23 @@ +import pytest + +import pandas as pd + + +class TestResolution: + @pytest.mark.parametrize( + "freq,expected", + [ + ("A", "year"), + ("Q", "quarter"), + ("M", "month"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ], + ) + def test_resolution(self, freq, expected): + idx = pd.period_range(start="2013-04-01", periods=30, freq=freq) + assert idx.resolution == expected diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py new file mode 100644 index 00000000..a42b8496 --- /dev/null +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -0,0 +1,32 @@ +"""Tests for PeriodIndex behaving like a vectorized Period scalar""" + +from pandas import ( + Timedelta, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestPeriodIndexOps: + def test_start_time(self): + # GH#17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS") + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + # GH#17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") + expected_index += Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(index.end_time, expected_index) + + def test_end_time_business_friday(self): + # GH#34449 + pi = period_range("1990-01-05", freq="B", periods=1) + result = pi.end_time + + dti = date_range("1990-01-05", freq="D", periods=1)._with_freq(None) + expected = dti + Timedelta(days=1, nanoseconds=-1) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_searchsorted.py b/pandas/tests/indexes/period/test_searchsorted.py new file mode 100644 index 00000000..b9863d1b --- /dev/null +++ b/pandas/tests/indexes/period/test_searchsorted.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + NaT, + Period, + PeriodIndex, +) +import pandas._testing as tm + + +class TestSearchsorted: + @pytest.mark.parametrize("freq", ["D", "2D"]) + def test_searchsorted(self, freq): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq=freq, + ) + + p1 = Period("2014-01-01", freq=freq) + assert pidx.searchsorted(p1) == 0 + + p2 = Period("2014-01-04", freq=freq) + assert pidx.searchsorted(p2) == 3 + + assert pidx.searchsorted(NaT) == 5 + + msg = "Input has different freq=H from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(Period("2014-01-01", freq="H")) + + msg = "Input has different freq=5D from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(Period("2014-01-01", freq="5D")) + + def test_searchsorted_different_argument_classes(self, listlike_box): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + result = pidx.searchsorted(listlike_box(pidx)) + expected = np.arange(len(pidx), dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = pidx._data.searchsorted(listlike_box(pidx)) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_invalid(self): + pidx = PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + + other = np.array([0, 1], dtype=np.int64) + + msg = "|".join( + [ + "searchsorted requires compatible dtype or scalar", + "value should be a 'Period', 'NaT', or array of those. Got", + ] + ) + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other.astype("timedelta64[ns]")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64(4)) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64("NaT", "ms")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64(4, "ns")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64("NaT", "ns")) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py new file mode 100644 index 00000000..bac231ef --- /dev/null +++ b/pandas/tests/indexes/period/test_setops.py @@ -0,0 +1,360 @@ +import numpy as np + +import pandas as pd +from pandas import ( + PeriodIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex: + def test_union(self, sort): + # union + other1 = period_range("1/1/2000", freq="D", periods=5) + rng1 = period_range("1/6/2000", freq="D", periods=5) + expected1 = PeriodIndex( + [ + "2000-01-06", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-04", + "2000-01-05", + ], + freq="D", + ) + + rng2 = period_range("1/1/2000", freq="D", periods=5) + other2 = period_range("1/4/2000", freq="D", periods=5) + expected2 = period_range("1/1/2000", freq="D", periods=8) + + rng3 = period_range("1/1/2000", freq="D", periods=5) + other3 = PeriodIndex([], freq="D") + expected3 = period_range("1/1/2000", freq="D", periods=5) + + rng4 = period_range("2000-01-01 09:00", freq="H", periods=5) + other4 = period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = PeriodIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-02 09:00", + "2000-01-02 10:00", + "2000-01-02 11:00", + "2000-01-02 12:00", + "2000-01-02 13:00", + ], + freq="H", + ) + + rng5 = PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T" + ) + other5 = PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T" + ) + expected5 = PeriodIndex( + [ + "2000-01-01 09:01", + "2000-01-01 09:03", + "2000-01-01 09:05", + "2000-01-01 09:08", + ], + freq="T", + ) + + rng6 = period_range("2000-01-01", freq="M", periods=7) + other6 = period_range("2000-04-01", freq="M", periods=7) + expected6 = period_range("2000-01-01", freq="M", periods=10) + + rng7 = period_range("2003-01-01", freq="A", periods=5) + other7 = period_range("1998-01-01", freq="A", periods=8) + expected7 = PeriodIndex( + [ + "2003", + "2004", + "2005", + "2006", + "2007", + "1998", + "1999", + "2000", + "2001", + "2002", + ], + freq="A", + ) + + rng8 = PeriodIndex( + ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D" + ) + other8 = period_range("1/6/2000", freq="D", periods=5) + expected8 = PeriodIndex( + [ + "1/3/2000", + "1/2/2000", + "1/1/2000", + "1/5/2000", + "1/4/2000", + "1/6/2000", + "1/7/2000", + "1/8/2000", + "1/9/2000", + "1/10/2000", + ], + freq="D", + ) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + (rng8, other8, expected8), + ]: + + result_union = rng.union(other, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result_union, expected) + + def test_union_misc(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].union(index[10:], sort=sort) + tm.assert_index_equal(result, index) + + # not in order + result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) + if sort is None: + tm.assert_index_equal(result, index) + assert tm.equalContents(result, index) + + # cast if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + result = index.union(index2, sort=sort) + expected = index.astype(object).union(index2.astype(object), sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].intersection(index[10:], sort=sort) + tm.assert_index_equal(result, index[10:-5]) + + # not in order + left = _permute(index[:-5]) + right = _permute(index[10:]) + result = left.intersection(right, sort=sort) + if sort is None: + tm.assert_index_equal(result, index[10:-5]) + assert tm.equalContents(result, index[10:-5]) + + # cast if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + + result = index.intersection(index2, sort=sort) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + result = index.intersection(index3, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_cases(self, sort): + base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], name="idx", freq="D") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # non-monotonic + base = PeriodIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], + freq="D", + name="idx", + ) + + rng2 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="idx", + ) + expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx") + + rng3 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="other", + ) + expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], freq="D", name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == "D" + + # empty same freq + rng = date_range("6/1/2000", "6/15/2000", freq="T") + result = rng[0:0].intersection(rng) + assert len(result) == 0 + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + + def test_difference(self, sort): + # diff + period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"] + rng1 = PeriodIndex(period_rng, freq="D") + other1 = period_range("1/6/2000", freq="D", periods=5) + expected1 = rng1 + + rng2 = PeriodIndex(period_rng, freq="D") + other2 = period_range("1/4/2000", freq="D", periods=5) + expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D") + + rng3 = PeriodIndex(period_rng, freq="D") + other3 = PeriodIndex([], freq="D") + expected3 = rng3 + + period_rng = [ + "2000-01-01 10:00", + "2000-01-01 09:00", + "2000-01-01 12:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + ] + rng4 = PeriodIndex(period_rng, freq="H") + other4 = period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = rng4 + + rng5 = PeriodIndex( + ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T" + ) + other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T") + expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T") + + period_rng = [ + "2000-02-01", + "2000-01-01", + "2000-06-01", + "2000-07-01", + "2000-05-01", + "2000-03-01", + "2000-04-01", + ] + rng6 = PeriodIndex(period_rng, freq="M") + other6 = period_range("2000-04-01", freq="M", periods=7) + expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M") + + period_rng = ["2003", "2007", "2006", "2005", "2004"] + rng7 = PeriodIndex(period_rng, freq="A") + other7 = period_range("1998-01-01", freq="A", periods=8) + expected7 = PeriodIndex(["2007", "2006"], freq="A") + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + ]: + result_difference = rng.difference(other, sort=sort) + if sort is None and len(other): + # We dont sort (yet?) when empty GH#24959 + expected = expected.sort_values() + tm.assert_index_equal(result_difference, expected) + + def test_difference_freq(self, sort): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq="D") + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = PeriodIndex(["20160920", "20160921"], freq="D") + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_intersection_equal_duplicates(self): + # GH#38302 + idx = period_range("2011-01-01", periods=2) + idx_dup = idx.append(idx) + result = idx_dup.intersection(idx_dup) + tm.assert_index_equal(result, idx) + + def test_union_duplicates(self): + # GH#36289 + idx = period_range("2011-01-01", periods=2) + idx_dup = idx.append(idx) + + idx2 = period_range("2011-01-02", periods=2) + idx2_dup = idx2.append(idx2) + result = idx_dup.union(idx2_dup) + + expected = PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-02", + "2011-01-03", + "2011-01-03", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py new file mode 100644 index 00000000..41b76d6a --- /dev/null +++ b/pandas/tests/indexes/period/test_tools.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest + +from pandas import ( + Period, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestPeriodRepresentation: + """ + Wish to match NumPy units + """ + + @pytest.mark.parametrize( + "freq, base_date", + [ + ("W-THU", "1970-01-01"), + ("D", "1970-01-01"), + ("B", "1970-01-01"), + ("H", "1970-01-01"), + ("T", "1970-01-01"), + ("S", "1970-01-01"), + ("L", "1970-01-01"), + ("U", "1970-01-01"), + ("N", "1970-01-01"), + ("M", "1970-01"), + ("A", 1970), + ], + ) + def test_freq(self, freq, base_date): + rng = period_range(start=base_date, periods=10, freq=freq) + exp = np.arange(10, dtype=np.int64) + + tm.assert_numpy_array_equal(rng.asi8, exp) + + +class TestPeriodIndexConversion: + def test_tolist(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + rs = index.tolist() + for x in rs: + assert isinstance(x, Period) + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) diff --git a/pandas/tests/indexes/ranges/__init__.py b/pandas/tests/indexes/ranges/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py new file mode 100644 index 00000000..c4f26220 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -0,0 +1,167 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + Index, + RangeIndex, + Series, +) +import pandas._testing as tm + + +class TestRangeIndexConstructors: + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize( + "args, kwargs, start, stop, step", + [ + ((5,), {}, 0, 5, 1), + ((1, 5), {}, 1, 5, 1), + ((1, 5, 2), {}, 1, 5, 2), + ((0,), {}, 0, 0, 1), + ((0, 0), {}, 0, 0, 1), + ((), {"start": 0}, 0, 0, 1), + ((), {"stop": 0}, 0, 0, 1), + ], + ) + def test_constructor(self, args, kwargs, start, stop, step, name): + result = RangeIndex(*args, name=name, **kwargs) + expected = Index(np.arange(start, stop, step, dtype=np.int64), name=name) + assert isinstance(result, RangeIndex) + assert result.name is name + assert result._range == range(start, stop, step) + tm.assert_index_equal(result, expected, exact="equiv") + + def test_constructor_invalid_args(self): + msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" + with pytest.raises(TypeError, match=msg): + RangeIndex() + + with pytest.raises(TypeError, match=msg): + RangeIndex(name="Foo") + + # we don't allow on a bare Index + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, 0 was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(0) + + @pytest.mark.parametrize( + "args", + [ + Index(["a", "b"]), + Series(["a", "b"]), + np.array(["a", "b"]), + [], + np.arange(0, 10), + np.array([1]), + [1], + ], + ) + def test_constructor_additional_invalid_args(self, args): + msg = f"Value needs to be a scalar value, was type {type(args).__name__}" + with pytest.raises(TypeError, match=msg): + RangeIndex(args) + + @pytest.mark.parametrize("args", ["foo", datetime(2000, 1, 1, 0, 0)]) + def test_constructor_invalid_args_wrong_type(self, args): + msg = f"Wrong type {type(args)} for value {args}" + with pytest.raises(TypeError, match=msg): + RangeIndex(args) + + def test_constructor_same(self): + + # pass thru w and w/o copy + index = RangeIndex(1, 5, 2) + result = RangeIndex(index, copy=False) + assert result.identical(index) + + result = RangeIndex(index, copy=True) + tm.assert_index_equal(result, index, exact=True) + + result = RangeIndex(index) + tm.assert_index_equal(result, index, exact=True) + + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(index, dtype="float64") + + def test_constructor_range_object(self): + result = RangeIndex(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + def test_constructor_range(self): + + result = RangeIndex.from_range(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5, 6)) + expected = RangeIndex(5, 6, 1) + tm.assert_index_equal(result, expected, exact=True) + + # an invalid range + result = RangeIndex.from_range(range(5, 1)) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5)) + expected = RangeIndex(0, 5, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = Index(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + msg = ( + r"(RangeIndex.)?from_range\(\) got an unexpected keyword argument( 'copy')?" + ) + with pytest.raises(TypeError, match=msg): + RangeIndex.from_range(range(10), copy=True) + + def test_constructor_name(self): + # GH#12288 + orig = RangeIndex(10) + orig.name = "original" + + copy = RangeIndex(orig) + copy.name = "copy" + + assert orig.name == "original" + assert copy.name == "copy" + + new = Index(copy) + assert new.name == "copy" + + new.name = "new" + assert orig.name == "original" + assert copy.name == "copy" + assert new.name == "new" + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + assert index.values.dtype == np.int64 + with tm.assert_produces_warning(FutureWarning, match="will not infer"): + expected = Index(arr).astype("int64") + + tm.assert_index_equal(index, expected, exact="equiv") + + # non-int raise Exception + with pytest.raises(TypeError, match=r"Wrong type \"): + RangeIndex("1", "10", "1") + with pytest.raises(TypeError, match=r"Wrong type \"): + RangeIndex(1.1, 10.2, 1.3) + + # invalid passed type + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(1, 5, dtype="float64") diff --git a/pandas/tests/indexes/ranges/test_indexing.py b/pandas/tests/indexes/ranges/test_indexing.py new file mode 100644 index 00000000..f8c3eff0 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_indexing.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest + +from pandas import RangeIndex +import pandas._testing as tm +from pandas.core.api import Int64Index + + +class TestGetIndexer: + def test_get_indexer(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + index = RangeIndex(start=0, stop=20, step=2) + target = RangeIndex(10) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_limit(self): + # GH#28631 + idx = RangeIndex(4) + target = RangeIndex(6) + result = idx.get_indexer(target, method="pad", limit=1) + expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("stop", [0, -1, -2]) + def test_get_indexer_decreasing(self, stop): + # GH#28678 + index = RangeIndex(7, stop, -3) + result = index.get_indexer(range(9)) + expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestTake: + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value(self): + # GH#12631 + idx = RangeIndex(1, 4, name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestWhere: + def test_where_putmask_range_cast(self): + # GH#43240 + idx = RangeIndex(0, 5, name="test") + + mask = np.array([True, True, False, False, False]) + result = idx.putmask(mask, 10) + expected = Int64Index([10, 10, 2, 3, 4], name="test") + tm.assert_index_equal(result, expected) + + result = idx.where(~mask, 10) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py new file mode 100644 index 00000000..ed21996d --- /dev/null +++ b/pandas/tests/indexes/ranges/test_join.py @@ -0,0 +1,178 @@ +import numpy as np + +from pandas import ( + Index, + RangeIndex, +) +import pandas._testing as tm +from pandas.core.indexes.api import Int64Index + + +class TestJoin: + def test_join_outer(self): + # join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index( + [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ) + elidx = np.array( + [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.intp, + ) + eridx = np.array( + [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.intp, + ) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9], dtype=np.intp) + eridx = np.array([9, 7], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres, exact="equiv") + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + + assert isinstance(other, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + def test_join_non_int_index(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_non_unique(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self, join_type): + index = RangeIndex(start=0, stop=20, step=2) + joined = index.join(index, how=join_type) + assert index is joined diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py new file mode 100644 index 00000000..1c65c369 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_range.py @@ -0,0 +1,626 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import ensure_platform_int + +import pandas as pd +import pandas._testing as tm +from pandas.core.indexes.api import ( + Float64Index, + Index, + Int64Index, + RangeIndex, +) +from pandas.tests.indexes.common import NumericBase + +# aliases to make some tests easier to read +RI = RangeIndex +I64 = Int64Index +F64 = Float64Index +OI = Index + + +class TestRangeIndex(NumericBase): + _index_cls = RangeIndex + + @pytest.fixture + def dtype(self): + return np.int64 + + @pytest.fixture( + params=["uint64", "float64", "category", "datetime64", "object"], + ) + def invalid_dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls(start=0, stop=20, step=2) + + @pytest.fixture( + params=[ + RangeIndex(start=0, stop=20, step=2, name="foo"), + RangeIndex(start=18, stop=-1, step=-2, name="bar"), + ], + ids=["index_inc", "index_dec"], + ) + def index(self, request): + return request.param + + def test_constructor_unwraps_index(self, dtype): + result = self._index_cls(1, 3) + expected = np.array([1, 2], dtype=dtype) + tm.assert_numpy_array_equal(result._data, expected) + + def test_can_hold_identifiers(self, simple_index): + idx = simple_index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_too_many_names(self, simple_index): + index = simple_index + with pytest.raises(ValueError, match="^Length"): + index.names = ["roger", "harold"] + + @pytest.mark.parametrize( + "index, start, stop, step", + [ + (RangeIndex(5), 0, 5, 1), + (RangeIndex(0, 5), 0, 5, 1), + (RangeIndex(5, step=2), 0, 5, 2), + (RangeIndex(1, 5, 2), 1, 5, 2), + ], + ) + def test_start_stop_step_attrs(self, index, start, stop, step): + # GH 25710 + assert index.start == start + assert index.stop == stop + assert index.step == step + + @pytest.mark.parametrize("attr_name", ["_start", "_stop", "_step"]) + def test_deprecated_start_stop_step_attrs(self, attr_name, simple_index): + # GH 26581 + idx = simple_index + with tm.assert_produces_warning(FutureWarning): + getattr(idx, attr_name) + + def test_copy(self): + i = RangeIndex(5, name="Foo") + i_copy = i.copy() + assert i_copy is not i + assert i_copy.identical(i) + assert i_copy._range == range(0, 5, 1) + assert i_copy.name == "Foo" + + def test_repr(self): + i = RangeIndex(5, name="Foo") + result = repr(i) + expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + i = RangeIndex(5, 0, -1) + result = repr(i) + expected = "RangeIndex(start=5, stop=0, step=-1)" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + def test_insert(self): + + idx = RangeIndex(5, name="Foo") + result = idx[1:4] + + # test 0th element + tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]), exact="equiv") + + # GH 18295 (test missing) + expected = Float64Index([0, np.nan, 1, 2, 3, 4]) + for na in [np.nan, None, pd.NA]: + result = RangeIndex(5).insert(1, na) + tm.assert_index_equal(result, expected) + + result = RangeIndex(5).insert(1, pd.NaT) + expected = Index([0, pd.NaT, 1, 2, 3, 4], dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_edges_preserves_rangeindex(self): + idx = Index(range(4, 9, 2)) + + result = idx.insert(0, 2) + expected = Index(range(2, 9, 2)) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.insert(3, 10) + expected = Index(range(4, 11, 2)) + tm.assert_index_equal(result, expected, exact=True) + + def test_insert_middle_preserves_rangeindex(self): + # insert in the middle + idx = Index(range(0, 3, 2)) + result = idx.insert(1, 1) + expected = Index(range(3)) + tm.assert_index_equal(result, expected, exact=True) + + idx = idx * 2 + result = idx.insert(1, 2) + expected = expected * 2 + tm.assert_index_equal(result, expected, exact=True) + + def test_delete(self): + + idx = RangeIndex(5, name="Foo") + expected = idx[1:] + result = idx.delete(0) + tm.assert_index_equal(result, expected, exact=True) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + tm.assert_index_equal(result, expected, exact=True) + assert result.name == expected.name + + msg = "index 5 is out of bounds for axis 0 with size 5" + with pytest.raises((IndexError, ValueError), match=msg): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_delete_preserves_rangeindex(self): + idx = Index(range(2), name="foo") + + result = idx.delete([1]) + expected = Index(range(1), name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(1) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_middle(self): + idx = Index(range(3), name="foo") + result = idx.delete(1) + expected = idx[::2] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(-2) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_at_end(self): + idx = RangeIndex(0, 6, 1) + + loc = [2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:2] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_middle(self): + idx = RangeIndex(0, 6, 1) + + loc = [1, 2, 3, 4] + result = idx.delete(loc) + expected = RangeIndex(0, 6, 5) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_all_preserves_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 1, 2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:0] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_not_preserving_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 3, 5] + result = idx.delete(loc) + expected = Int64Index([1, 2, 4]) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_view(self): + i = RangeIndex(0, name="Foo") + i_view = i.view() + assert i_view.name == "Foo" + + i_view = i.view("i8") + tm.assert_numpy_array_equal(i.values, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_dtype(self, simple_index): + index = simple_index + assert index.dtype == np.int64 + + def test_cache(self): + # GH 26565, GH26617, GH35432 + # This test checks whether _cache has been set. + # Calling RangeIndex._cache["_data"] creates an int64 array of the same length + # as the RangeIndex and stores it in _cache. + idx = RangeIndex(0, 100, 10) + + assert idx._cache == {} + + repr(idx) + assert idx._cache == {} + + str(idx) + assert idx._cache == {} + + idx.get_loc(20) + assert idx._cache == {} + + 90 in idx # True + assert idx._cache == {} + + 91 in idx # False + assert idx._cache == {} + + idx.all() + assert idx._cache == {} + + idx.any() + assert idx._cache == {} + + for _ in idx: + pass + assert idx._cache == {} + + idx.format() + assert idx._cache == {} + + df = pd.DataFrame({"a": range(10)}, index=idx) + + str(df) + assert idx._cache == {} + + df.loc[50] + assert idx._cache == {} + + with pytest.raises(KeyError, match="51"): + df.loc[51] + assert idx._cache == {} + + df.loc[10:50] + assert idx._cache == {} + + df.iloc[5:10] + assert idx._cache == {} + + # idx._cache should contain a _data entry after call to idx._data + idx._data + assert isinstance(idx._data, np.ndarray) + assert idx._data is idx._data # check cached value is reused + assert len(idx._cache) == 1 + expected = np.arange(0, 100, 10, dtype="int64") + tm.assert_numpy_array_equal(idx._cache["_data"], expected) + + def test_is_monotonic(self): + index = RangeIndex(0, 20, 2) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = RangeIndex(4, 0, -1) + assert index.is_monotonic_increasing is False + assert index._is_strictly_monotonic_increasing is False + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 2) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(2, 1) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 1) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_equals_range(self): + equiv_pairs = [ + (RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), + (RangeIndex(0), RangeIndex(1, -1, 3)), + (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), + (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2)), + ] + for left, right in equiv_pairs: + assert left.equals(right) + assert right.equals(left) + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self, simple_index): + index = simple_index + i = Index(index.copy()) + assert i.identical(index) + + # we don't allow object dtype for RangeIndex + if isinstance(index, RangeIndex): + return + + same_values_different_type = Index(i, dtype=object) + assert not i.identical(same_values_different_type) + + i = index.copy(dtype=object) + i = i.rename("foo") + same_values = Index(i, dtype=object) + assert same_values.identical(index.copy(dtype=object)) + + assert not i.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(i) + + assert not index.copy(dtype=object).identical(index.copy(dtype="int64")) + + def test_nbytes(self): + + # memory savings vs int index + idx = RangeIndex(0, 1000) + assert idx.nbytes < Int64Index(idx._values).nbytes / 10 + + # constant memory usage + i2 = RangeIndex(0, 10) + assert idx.nbytes == i2.nbytes + + @pytest.mark.parametrize( + "start,stop,step", + [ + # can't + ("foo", "bar", "baz"), + # shouldn't + ("0", "1", "2"), + ], + ) + def test_cant_or_shouldnt_cast(self, start, stop, step): + msg = f"Wrong type {type(start)} for value {start}" + with pytest.raises(TypeError, match=msg): + RangeIndex(start, stop, step) + + def test_view_index(self, simple_index): + index = simple_index + index.view(Index) + + def test_prevent_casting(self, simple_index): + index = simple_index + result = index.astype("O") + assert result.dtype == np.object_ + + def test_repr_roundtrip(self, simple_index): + index = simple_index + tm.assert_index_equal(eval(repr(index)), index) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name="asdf") + assert idx.name == idx[1:].name + + def test_has_duplicates(self, index): + assert index.is_unique + assert not index.has_duplicates + + def test_extended_gcd(self, simple_index): + index = simple_index + result = index._extended_gcd(6, 10) + assert result[0] == result[1] * 6 + result[2] * 10 + assert 2 == result[0] + + result = index._extended_gcd(10, 6) + assert 2 == result[1] * 10 + result[2] * 6 + assert 2 == result[0] + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(1, 6)._min_fitting_element(1) + assert 1 == result + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + assert 1 == result + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) + assert big_num == result + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self, simple_index): + index = simple_index + index.name = "foo" + + # scalar indexing + res = index[1] + expected = 2 + assert res == expected + + res = index[-1] + expected = 18 + assert res == expected + + # slicing + # slice value completion + index_slice = index[:] + expected = index + tm.assert_index_equal(index_slice, expected) + + # positive slice values + index_slice = index[7:10:2] + expected = Index(np.array([14, 18]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # negative slice values + index_slice = index[-1:-5:-2] + expected = Index(np.array([18, 14]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # stop overshoot + index_slice = index[2:100:4] + expected = Index(np.array([4, 12]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + # reverse + index_slice = index[::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[-8::-1] + expected = Index(np.array([4, 2, 0]), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[-40::-1] + expected = Index(np.array([], dtype=np.int64), name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[40::-1] + expected = Index(index.values[40::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + index_slice = index[10::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected, exact="equiv") + + @pytest.mark.parametrize("step", set(range(-5, 6)) - {0}) + def test_len_specialised(self, step): + # make sure that our len is the same as np.arange calc + start, stop = (0, 5) if step > 0 else (5, 0) + + arr = np.arange(start, stop, step) + index = RangeIndex(start, stop, step) + assert len(index) == len(arr) + + index = RangeIndex(stop, start, step) + assert len(index) == 0 + + @pytest.fixture( + params=[ + ([RI(1, 12, 5)], RI(1, 12, 5)), + ([RI(0, 6, 4)], RI(0, 6, 4)), + ([RI(1, 3), RI(3, 7)], RI(1, 7)), + ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), + ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), + ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), + ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), + ([RI(-4, -8), RI(3, -4)], RI(0, 0)), + ([RI(-4, -8), RI(3, 5)], RI(3, 5)), + ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), + ([RI(-2), RI(3, 5)], RI(3, 5)), + ([RI(2), RI(2)], I64([0, 1, 0, 1])), + ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), + ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), + ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), + ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), + ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), + ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), + ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), + ] + ) + def appends(self, request): + """Inputs and expected outputs for RangeIndex.append test""" + return request.param + + def test_append(self, appends): + # GH16212 + + indices, expected = appends + + result = indices[0].append(indices[1:]) + tm.assert_index_equal(result, expected, exact=True) + + if len(indices) == 2: + # Append single item rather than list + result2 = indices[0].append(indices[1]) + tm.assert_index_equal(result2, expected, exact=True) + + def test_engineless_lookup(self): + # GH 16685 + # Standard lookup on RangeIndex should not require the engine to be + # created + idx = RangeIndex(2, 10, 3) + + assert idx.get_loc(5) == 1 + tm.assert_numpy_array_equal( + idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2])) + ) + with pytest.raises(KeyError, match="3"): + idx.get_loc(3) + + assert "_engine" not in idx._cache + + # Different types of scalars can be excluded immediately, no need to + # use the _engine + with pytest.raises(KeyError, match="'a'"): + idx.get_loc("a") + + assert "_engine" not in idx._cache + + def test_format_empty(self): + # GH35712 + empty_idx = self._index_cls(0) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + + @pytest.mark.parametrize( + "RI", + [ + RangeIndex(0, -1, -1), + RangeIndex(0, 1, 1), + RangeIndex(1, 3, 2), + RangeIndex(0, -1, -2), + RangeIndex(-3, -5, -2), + ], + ) + def test_append_len_one(self, RI): + # GH39401 + result = RI.append([]) + tm.assert_index_equal(result, RI, exact=True) + + @pytest.mark.parametrize("base", [RangeIndex(0, 2), Index([0, 1])]) + def test_isin_range(self, base): + # GH#41151 + values = RangeIndex(0, 1) + result = base.isin(values) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_sort_values_key(self): + # GH#43666 + sort_order = {8: 2, 6: 0, 4: 8, 2: 10, 0: 12} + values = RangeIndex(0, 10, 2) + result = values.sort_values(key=lambda x: x.map(sort_order)) + expected = Index([4, 8, 6, 0, 2], dtype="int64") + tm.assert_index_equal(result, expected, check_exact=True) diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py new file mode 100644 index 00000000..71bd2f55 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -0,0 +1,492 @@ +from datetime import ( + datetime, + timedelta, +) + +from hypothesis import ( + assume, + given, + strategies as st, +) +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Index, + Int64Index, + RangeIndex, + UInt64Index, +) + + +class TestRangeIndexSetOps: + @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) + def test_intersection_mismatched_dtype(self, klass): + # check that we cast to float, not object + index = RangeIndex(start=0, stop=20, step=2, name="foo") + index = klass(index) + + flt = index.astype(np.float64) + + # bc index.equals(flt), we go through fastpath and get RangeIndex back + result = index.intersection(flt) + tm.assert_index_equal(result, index, exact=True) + + result = flt.intersection(index) + tm.assert_index_equal(result, flt, exact=True) + + # neither empty, not-equals + result = index.intersection(flt[1:]) + tm.assert_index_equal(result, flt[1:], exact=True) + + result = flt[1:].intersection(index) + tm.assert_index_equal(result, flt[1:], exact=True) + + # empty other + result = index.intersection(flt[:0]) + tm.assert_index_equal(result, flt[:0], exact=True) + + result = flt[:0].intersection(index) + tm.assert_index_equal(result, flt[:0], exact=True) + + def test_intersection_empty(self, sort, names): + # name retention on empty intersections + index = RangeIndex(start=0, stop=20, step=2, name=names[0]) + + # empty other + result = index.intersection(index[:0].rename(names[1]), sort=sort) + tm.assert_index_equal(result, index[:0].rename(names[2]), exact=True) + + # empty self + result = index[:0].intersection(index.rename(names[1]), sort=sort) + tm.assert_index_equal(result, index[:0].rename(names[2]), exact=True) + + def test_intersection(self, sort): + # intersect with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index(np.arange(1, 6)) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + # intersect with increasing RangeIndex + other = RangeIndex(1, 6) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected, exact="equiv") + + # intersect with decreasing RangeIndex + other = RangeIndex(5, 0, -1) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected, exact="equiv") + + # reversed (GH 17296) + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected, exact="equiv") + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int), sort=sort) + result = first.intersection(other, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + index = RangeIndex(5, name="foo") + + # intersect of non-overlapping indices + other = RangeIndex(5, 10, 1, name="foo") + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1, name="foo") + tm.assert_index_equal(result, expected) + + other = RangeIndex(-1, -5, -1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + # intersection of empty indices + other = RangeIndex(0, 0, 1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_non_overlapping_gcd(self, sort, names): + # intersection of non-overlapping values based on start value and gcd + index = RangeIndex(1, 10, 2, name=names[0]) + other = RangeIndex(0, 10, 4, name=names[1]) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1, name=names[2]) + tm.assert_index_equal(result, expected) + + def test_union_noncomparable(self, sort): + # corner case, non-Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other, sort=sort) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "idx1, idx2, expected_sorted, expected_notsorted", + [ + ( + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(5, 20, 1), + RangeIndex(0, 20, 1), + RangeIndex(0, 20, 1), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(10, 20, 1), + RangeIndex(0, 20, 1), + RangeIndex(0, 20, 1), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(-10, -20, -1), + RangeIndex(-19, 1, 1), + RangeIndex(0, -20, -1), + ), + ( + RangeIndex(0, 10, 2), + RangeIndex(1, 10, 2), + RangeIndex(0, 10, 1), + Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), + ), + ( + RangeIndex(0, 11, 2), + RangeIndex(1, 12, 2), + RangeIndex(0, 12, 1), + Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), + ), + ( + RangeIndex(0, 21, 4), + RangeIndex(-2, 24, 4), + RangeIndex(-2, 24, 2), + Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), + ), + ( + RangeIndex(0, -20, -2), + RangeIndex(-1, -21, -2), + RangeIndex(-19, 1, 1), + Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), + ), + ( + RangeIndex(0, 100, 5), + RangeIndex(0, 100, 20), + RangeIndex(0, 100, 5), + RangeIndex(0, 100, 5), + ), + ( + RangeIndex(0, -100, -5), + RangeIndex(5, -100, -20), + RangeIndex(-95, 10, 5), + Int64Index(list(range(0, -100, -5)) + [5]), + ), + ( + RangeIndex(0, -11, -1), + RangeIndex(1, -12, -4), + RangeIndex(-11, 2, 1), + Int64Index(list(range(0, -11, -1)) + [1, -11]), + ), + (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), + ( + RangeIndex(0, -10, -2), + RangeIndex(0), + RangeIndex(0, -10, -2), + RangeIndex(0, -10, -2), + ), + ( + RangeIndex(0, 100, 2), + RangeIndex(100, 150, 200), + RangeIndex(0, 102, 2), + RangeIndex(0, 102, 2), + ), + ( + RangeIndex(0, -100, -2), + RangeIndex(-100, 50, 102), + RangeIndex(-100, 4, 2), + Int64Index(list(range(0, -100, -2)) + [-100, 2]), + ), + ( + RangeIndex(0, -100, -1), + RangeIndex(0, -50, -3), + RangeIndex(-99, 1, 1), + RangeIndex(0, -100, -1), + ), + ( + RangeIndex(0, 1, 1), + RangeIndex(5, 6, 10), + RangeIndex(0, 6, 5), + RangeIndex(0, 10, 5), + ), + ( + RangeIndex(0, 10, 5), + RangeIndex(-5, -6, -20), + RangeIndex(-5, 10, 5), + Int64Index([0, 5, -5]), + ), + ( + RangeIndex(0, 3, 1), + RangeIndex(4, 5, 1), + Int64Index([0, 1, 2, 4]), + Int64Index([0, 1, 2, 4]), + ), + ( + RangeIndex(0, 10, 1), + Int64Index([]), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + ), + # GH 43885 + ( + RangeIndex(0, 10), + RangeIndex(0, 5), + RangeIndex(0, 10), + RangeIndex(0, 10), + ), + ], + ids=lambda x: repr(x) if isinstance(x, RangeIndex) else x, + ) + def test_union_sorted(self, idx1, idx2, expected_sorted, expected_notsorted): + res1 = idx1.union(idx2, sort=None) + tm.assert_index_equal(res1, expected_sorted, exact=True) + + res1 = idx1.union(idx2, sort=False) + tm.assert_index_equal(res1, expected_notsorted, exact=True) + + res2 = idx2.union(idx1, sort=None) + res3 = Int64Index(idx1._values, name=idx1.name).union(idx2, sort=None) + tm.assert_index_equal(res2, expected_sorted, exact=True) + tm.assert_index_equal(res3, expected_sorted, exact="equiv") + + def test_union_same_step_misaligned(self): + # GH#44019 + left = RangeIndex(range(0, 20, 4)) + right = RangeIndex(range(1, 21, 4)) + + result = left.union(right) + expected = Int64Index([0, 1, 4, 5, 8, 9, 12, 13, 16, 17]) + tm.assert_index_equal(result, expected, exact=True) + + def test_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + obj = RangeIndex.from_range(range(1, 10), name="foo") + + result = obj.difference(obj) + expected = RangeIndex.from_range(range(0), name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = obj.difference(expected.rename("bar")) + tm.assert_index_equal(result, obj.rename(None), exact=True) + + result = obj.difference(obj[:3]) + tm.assert_index_equal(result, obj[3:], exact=True) + + result = obj.difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + # Flipping the step of 'other' doesn't affect the result, but + # flipping the stepof 'self' does when sort=None + result = obj[::-1].difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + result = obj[::-1].difference(obj[-3:], sort=False) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) + + result = obj[::-1].difference(obj[-3:][::-1]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + result = obj[::-1].difference(obj[-3:][::-1], sort=False) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) + + result = obj.difference(obj[2:6]) + expected = Int64Index([1, 2, 7, 8, 9], name="foo") + tm.assert_index_equal(result, expected) + + def test_difference_sort(self): + # GH#44085 ensure we respect the sort keyword + + idx = Index(range(4))[::-1] + other = Index(range(3, 4)) + + result = idx.difference(other) + expected = Index(range(3)) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.difference(other, sort=False) + expected = expected[::-1] + tm.assert_index_equal(result, expected, exact=True) + + # case where the intersection is empty + other = range(10, 12) + result = idx.difference(other, sort=None) + expected = idx[::-1] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_mismatched_step(self): + obj = RangeIndex.from_range(range(1, 10), name="foo") + + result = obj.difference(obj[::2]) + expected = obj[1::2] + tm.assert_index_equal(result, expected, exact=True) + + result = obj[::-1].difference(obj[::2], sort=False) + tm.assert_index_equal(result, expected[::-1], exact=True) + + result = obj.difference(obj[1::2]) + expected = obj[::2] + tm.assert_index_equal(result, expected, exact=True) + + result = obj[::-1].difference(obj[1::2], sort=False) + tm.assert_index_equal(result, expected[::-1], exact=True) + + def test_difference_interior_overlap_endpoints_preserved(self): + left = RangeIndex(range(4)) + right = RangeIndex(range(1, 3)) + + result = left.difference(right) + expected = RangeIndex(0, 4, 3) + assert expected.tolist() == [0, 3] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_endpoints_overlap_interior_preserved(self): + left = RangeIndex(-8, 20, 7) + right = RangeIndex(13, -9, -3) + + result = left.difference(right) + expected = RangeIndex(-1, 13, 7) + assert expected.tolist() == [-1, 6] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_interior_non_preserving(self): + # case with intersection of length 1 but RangeIndex is not preserved + idx = Index(range(10)) + + other = idx[3:4] + result = idx.difference(other) + expected = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 9]) + tm.assert_index_equal(result, expected, exact=True) + + # case with other.step / self.step > 2 + other = idx[::3] + result = idx.difference(other) + expected = Int64Index([1, 2, 4, 5, 7, 8]) + tm.assert_index_equal(result, expected, exact=True) + + # cases with only reaching one end of left + obj = Index(range(20)) + other = obj[:10:2] + result = obj.difference(other) + expected = Int64Index([1, 3, 5, 7, 9] + list(range(10, 20))) + tm.assert_index_equal(result, expected, exact=True) + + other = obj[1:11:2] + result = obj.difference(other) + expected = Int64Index([0, 2, 4, 6, 8, 10] + list(range(11, 20))) + tm.assert_index_equal(result, expected, exact=True) + + def test_symmetric_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + left = RangeIndex.from_range(range(1, 10), name="foo") + + result = left.symmetric_difference(left) + expected = RangeIndex.from_range(range(0), name="foo") + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(expected.rename("bar")) + tm.assert_index_equal(result, left.rename(None)) + + result = left[:-2].symmetric_difference(left[2:]) + expected = Int64Index([1, 2, 8, 9], name="foo") + tm.assert_index_equal(result, expected) + + right = RangeIndex.from_range(range(10, 15)) + + result = left.symmetric_difference(right) + expected = RangeIndex.from_range(range(1, 15)) + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(right[1:]) + expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]) + tm.assert_index_equal(result, expected) + + +def assert_range_or_not_is_rangelike(index): + """ + Check that we either have a RangeIndex or that this index *cannot* + be represented as a RangeIndex. + """ + if not isinstance(index, RangeIndex) and len(index) > 0: + diff = index[:-1] - index[1:] + assert not (diff == diff[0]).all() + + +@given( + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), + st.integers(-20, 20), +) +def test_range_difference(start1, stop1, step1, start2, stop2, step2): + # test that + # a) we match Int64Index.difference and + # b) we return RangeIndex whenever it is possible to do so. + assume(step1 != 0) + assume(step2 != 0) + + left = RangeIndex(start1, stop1, step1) + right = RangeIndex(start2, stop2, step2) + + result = left.difference(right, sort=None) + assert_range_or_not_is_rangelike(result) + + alt = Int64Index(left).difference(Int64Index(right), sort=None) + tm.assert_index_equal(result, alt, exact="equiv") + + result = left.difference(right, sort=False) + assert_range_or_not_is_rangelike(result) + + alt = Int64Index(left).difference(Int64Index(right), sort=False) + tm.assert_index_equal(result, alt, exact="equiv") diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py new file mode 100644 index 00000000..b062dfb7 --- /dev/null +++ b/pandas/tests/indexes/test_any_index.py @@ -0,0 +1,195 @@ +""" +Tests that can be parametrized over _any_ Index object. +""" +import re + +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +import pandas._testing as tm + + +def test_boolean_context_compat(index): + # GH#7897 + with pytest.raises(ValueError, match="The truth value of a"): + if index: + pass + + with pytest.raises(ValueError, match="The truth value of a"): + bool(index) + + +def test_sort(index): + msg = "cannot sort an Index object in-place, use sort_values instead" + with pytest.raises(TypeError, match=msg): + index.sort() + + +def test_hash_error(index): + with pytest.raises(TypeError, match=f"unhashable type: '{type(index).__name__}'"): + hash(index) + + +def test_copy_dtype_deprecated(index): + # GH#35853 + with tm.assert_produces_warning(FutureWarning): + index.copy(dtype=object) + + +def test_mutability(index): + if not len(index): + return + msg = "Index does not support mutable operations" + with pytest.raises(TypeError, match=msg): + index[0] = index[0] + + +def test_map_identity_mapping(index, request): + # GH#12766 + + result = index.map(lambda x: x) + if index.dtype == object and result.dtype == bool: + assert (index == result).all() + # TODO: could work that into the 'exact="equiv"'? + return # FIXME: doesn't belong in this file anymore! + tm.assert_index_equal(result, index, exact="equiv") + + +def test_wrong_number_names(index): + names = index.nlevels * ["apple", "banana", "carrot"] + with pytest.raises(ValueError, match="^Length"): + index.names = names + + +def test_view_preserves_name(index): + assert index.view().name == index.name + + +def test_ravel_deprecation(index): + # GH#19956 ravel returning ndarray is deprecated + with tm.assert_produces_warning(FutureWarning): + index.ravel() + + +def test_is_type_compatible_deprecation(index): + # GH#42113 + msg = "is_type_compatible is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.is_type_compatible(index.inferred_type) + + +def test_is_mixed_deprecated(index): + # GH#32922 + msg = "Index.is_mixed is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.is_mixed() + + +class TestConversion: + def test_to_series(self, index): + # assert that we are creating a copy of the index + + ser = index.to_series() + assert ser.values is not index.values + assert ser.index is not index + assert ser.name == index.name + + def test_to_series_with_arguments(self, index): + # GH#18699 + + # index kwarg + ser = index.to_series(index=index) + + assert ser.values is not index.values + assert ser.index is index + assert ser.name == index.name + + # name kwarg + ser = index.to_series(name="__test") + + assert ser.values is not index.values + assert ser.index is not index + assert ser.name != index.name + + def test_tolist_matches_list(self, index): + assert index.tolist() == list(index) + + +class TestRoundTrips: + def test_pickle_roundtrip(self, index): + result = tm.round_trip_pickle(index) + tm.assert_index_equal(result, index, exact=True) + if result.nlevels > 1: + # GH#8367 round-trip with timezone + assert index.equal_levels(result) + + def test_pickle_preserves_name(self, index): + original_name, index.name = index.name, "foo" + unpickled = tm.round_trip_pickle(index) + assert index.equals(unpickled) + index.name = original_name + + +class TestIndexing: + def test_get_loc_listlike_raises_invalid_index_error(self, index): + # and never TypeError + key = np.array([0, 1], dtype=np.intp) + + with pytest.raises(InvalidIndexError, match=r"\[0 1\]"): + index.get_loc(key) + + with pytest.raises(InvalidIndexError, match=r"\[False True\]"): + index.get_loc(key.astype(bool)) + + def test_getitem_ellipsis(self, index): + # GH#21282 + result = index[...] + assert result.equals(index) + assert result is not index + + def test_slice_keeps_name(self, index): + assert index.name == index[1:].name + + @pytest.mark.parametrize("item", [101, "no_int", 2.5]) + # FutureWarning from non-tuple sequence of nd indexing + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_getitem_error(self, index, item): + msg = "|".join( + [ + r"index 101 is out of bounds for axis 0 with size [\d]+", + re.escape( + "only integers, slices (`:`), ellipsis (`...`), " + "numpy.newaxis (`None`) and integer or boolean arrays " + "are valid indices" + ), + "index out of bounds", # string[pyarrow] + "Only integers, slices and integer or " + "boolean arrays are valid indices.", # string[pyarrow] + ] + ) + with pytest.raises(IndexError, match=msg): + index[item] + + +class TestRendering: + def test_str(self, index): + # test the string repr + index.name = "foo" + assert "'foo'" in str(index) + assert type(index).__name__ in str(index) + + +class TestReductions: + def test_argmax_axis_invalid(self, index): + # GH#23081 + msg = r"`axis` must be fewer than the number of dimensions \(1\)" + with pytest.raises(ValueError, match=msg): + index.argmax(axis=1) + with pytest.raises(ValueError, match=msg): + index.argmin(axis=2) + with pytest.raises(ValueError, match=msg): + index.min(axis=-2) + with pytest.raises(ValueError, match=msg): + index.max(axis=-3) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py new file mode 100644 index 00000000..43b893b0 --- /dev/null +++ b/pandas/tests/indexes/test_base.py @@ -0,0 +1,1620 @@ +from collections import defaultdict +from datetime import datetime +from io import StringIO +import math +import operator +import re + +import numpy as np +import pytest + +from pandas.compat import IS64 +from pandas.errors import InvalidIndexError +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + DatetimeIndex, + IntervalIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, + _get_combined_index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.tests.indexes.common import Base + + +class TestIndex(Base): + _index_cls = Index + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls(list("abcde")) + + def test_can_hold_identifiers(self, simple_index): + index = simple_index + key = index[0] + assert index._can_hold_identifiers_and_holds_name(key) is True + + @pytest.mark.parametrize("index", ["datetime"], indirect=True) + def test_new_axis(self, index): + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + new_index = index[None, :] + assert new_index.ndim == 2 + assert isinstance(new_index, np.ndarray) + + def test_constructor_regular(self, index): + tm.assert_contains_all(index, index) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_constructor_casting(self, index): + # casting + arr = np.array(index) + new_index = Index(arr) + tm.assert_contains_all(arr, new_index) + tm.assert_index_equal(index, new_index) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_constructor_copy(self, index): + arr = np.array(index) + new_index = Index(arr, copy=True, name="name") + assert isinstance(new_index, Index) + assert new_index.name == "name" + tm.assert_numpy_array_equal(arr, new_index.values) + arr[0] = "SOMEBIGLONGSTRING" + assert new_index[0] != "SOMEBIGLONGSTRING" + + @pytest.mark.parametrize("cast_as_obj", [True, False]) + @pytest.mark.parametrize( + "index", + [ + date_range( + "2015-01-01 10:00", + freq="D", + periods=3, + tz="US/Eastern", + name="Green Eggs & Ham", + ), # DTI with tz + date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz + pd.timedelta_range("1 days", freq="D", periods=3), # td + period_range("2015-01-01", freq="D", periods=3), # period + ], + ) + def test_constructor_from_index_dtlike(self, cast_as_obj, index): + if cast_as_obj: + result = Index(index.astype(object)) + else: + result = Index(index) + + tm.assert_index_equal(result, index) + + if isinstance(index, DatetimeIndex): + assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) + + @pytest.mark.parametrize( + "index,has_tz", + [ + ( + date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + True, + ), # datetimetz + (pd.timedelta_range("1 days", freq="D", periods=3), False), # td + (period_range("2015-01-01", freq="D", periods=3), False), # period + ], + ) + def test_constructor_from_series_dtlike(self, index, has_tz): + result = Index(Series(index)) + tm.assert_index_equal(result, index) + + if has_tz: + assert result.tz == index.tz + + def test_constructor_from_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + s = Series(pd.to_datetime(dts)) + result = DatetimeIndex(s, freq="MS") + + tm.assert_index_equal(result, expected) + + def test_constructor_from_frame_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + df = DataFrame(np.random.rand(5, 3)) + df["date"] = dts + result = DatetimeIndex(df["date"], freq="MS") + + assert df["date"].dtype == object + expected.name = "date" + tm.assert_index_equal(result, expected) + + expected = Series(dts, name="date") + tm.assert_series_equal(df["date"], expected) + + # GH 6274 + # infer freq of same + freq = pd.infer_freq(df["date"]) + assert freq == "MS" + + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + expected = Float64Index(data) + result = Index(data, dtype="float") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "klass,dtype,na_val", + [ + (Float64Index, np.float64, np.nan), + (DatetimeIndex, "datetime64[ns]", pd.NaT), + ], + ) + def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): + # GH 13467 + na_list = [na_val, na_val] + expected = klass(na_list) + assert expected.dtype == dtype + + result = Index(na_list) + tm.assert_index_equal(result, expected) + + result = Index(np.array(na_list)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "vals,dtype", + [ + ([1, 2, 3, 4, 5], "int"), + ([1.1, np.nan, 2.2, 3.0], "float"), + (["A", "B", "C", np.nan], "obj"), + ], + ) + def test_constructor_simple_new(self, vals, dtype): + index = Index(vals, name=dtype) + result = index._simple_new(index.values, dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): + # Test constructing with a datetimetz dtype + # .values produces numpy datetimes, so these are considered naive + # .asi8 produces integers, so these are considered epoch timestamps + # ^the above will be true in a later version. Right now we `.view` + # the i8 values as NS_DTYPE, effectively treating them as wall times. + index = date_range("2011-01-01", periods=5) + arg = getattr(index, attr) + index = index.tz_localize(tz_naive_fixture) + dtype = index.dtype + + warn = None if tz_naive_fixture is None else FutureWarning + # astype dt64 -> dt64tz deprecated + + if attr == "asi8": + result = DatetimeIndex(arg).tz_localize(tz_naive_fixture) + else: + result = klass(arg, tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + with tm.assert_produces_warning(warn): + result = DatetimeIndex(arg).astype(dtype) + else: + result = klass(arg, dtype=dtype) + tm.assert_index_equal(result, index) + + if attr == "asi8": + result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) + else: + result = klass(list(arg), tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + with tm.assert_produces_warning(warn): + result = DatetimeIndex(list(arg)).astype(dtype) + else: + result = klass(list(arg), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [Index, TimedeltaIndex]) + def test_constructor_dtypes_timedelta(self, attr, klass): + index = pd.timedelta_range("1 days", periods=5) + index = index._with_freq(None) # won't be preserved by constructors + dtype = index.dtype + + values = getattr(index, attr) + + result = klass(values, dtype=dtype) + tm.assert_index_equal(result, index) + + result = klass(list(values), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])]) + @pytest.mark.parametrize( + "klass", + [ + Index, + Float64Index, + Int64Index, + UInt64Index, + CategoricalIndex, + DatetimeIndex, + TimedeltaIndex, + ], + ) + def test_constructor_empty(self, value, klass): + empty = klass(value) + assert isinstance(empty, klass) + assert not len(empty) + + @pytest.mark.parametrize( + "empty,klass", + [ + (PeriodIndex([], freq="B"), PeriodIndex), + (PeriodIndex(iter([]), freq="B"), PeriodIndex), + (PeriodIndex((_ for _ in []), freq="B"), PeriodIndex), + (RangeIndex(step=1), RangeIndex), + (MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex), + ], + ) + def test_constructor_empty_special(self, empty, klass): + assert isinstance(empty, klass) + assert not len(empty) + + @pytest.mark.parametrize( + "index", + [ + "datetime", + "float", + "int", + "period", + "range", + "repeats", + "timedelta", + "tuples", + "uint", + ], + indirect=True, + ) + def test_view_with_args(self, index): + index.view("i8") + + @pytest.mark.parametrize( + "index", + [ + "string", + pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")), + "bool-object", + "bool-dtype", + "empty", + ], + indirect=True, + ) + def test_view_with_args_object_array_raises(self, index): + if index.dtype == bool: + msg = "When changing to a larger dtype" + with pytest.raises(ValueError, match=msg): + index.view("i8") + else: + msg = "Cannot change data-type for object array" + with pytest.raises(TypeError, match=msg): + index.view("i8") + + @pytest.mark.parametrize("index", ["int", "range"], indirect=True) + def test_astype(self, index): + casted = index.astype("i8") + + # it works! + casted.get_loc(5) + + # pass on name + index.name = "foobar" + casted = index.astype("i8") + assert casted.name == "foobar" + + def test_equals_object(self): + # same + assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"])) + + @pytest.mark.parametrize( + "comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]] + ) + def test_not_equals_object(self, comp): + assert not Index(["a", "b", "c"]).equals(comp) + + def test_identical(self): + + # index + i1 = Index(["a", "b", "c"]) + i2 = Index(["a", "b", "c"]) + + assert i1.identical(i2) + + i1 = i1.rename("foo") + assert i1.equals(i2) + assert not i1.identical(i2) + + i2 = i2.rename("foo") + assert i1.identical(i2) + + i3 = Index([("a", "a"), ("a", "b"), ("b", "a")]) + i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False) + assert not i3.identical(i4) + + def test_is_(self): + ind = Index(range(10)) + assert ind.is_(ind) + assert ind.is_(ind.view().view().view().view()) + assert not ind.is_(Index(range(10))) + assert not ind.is_(ind.copy()) + assert not ind.is_(ind.copy(deep=False)) + assert not ind.is_(ind[:]) + assert not ind.is_(np.array(range(10))) + + # quasi-implementation dependent + assert ind.is_(ind.view()) + ind2 = ind.view() + ind2.name = "bob" + assert ind.is_(ind2) + assert ind2.is_(ind) + # doesn't matter if Indices are *actually* views of underlying data, + assert not ind.is_(Index(ind.values)) + arr = np.array(range(1, 11)) + ind1 = Index(arr, copy=False) + ind2 = Index(arr, copy=False) + assert not ind1.is_(ind2) + + def test_asof_numeric_vs_bool_raises(self): + left = Index([1, 2, 3]) + right = Index([True, False], dtype=object) + + msg = "Cannot compare dtypes int64 and bool" + with pytest.raises(TypeError, match=msg): + left.asof(right[0]) + # TODO: should right.asof(left[0]) also raise? + + with pytest.raises(InvalidIndexError, match=re.escape(str(right))): + left.asof(right) + + with pytest.raises(InvalidIndexError, match=re.escape(str(left))): + right.asof(left) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_booleanindex(self, index): + bool_index = np.ones(len(index), dtype=bool) + bool_index[5:30:2] = False + + sub_index = index[bool_index] + + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + sub_index = index[list(bool_index)] + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + def test_fancy(self, simple_index): + index = simple_index + sl = index[[1, 2, 3]] + for i in sl: + assert i == sl[sl.get_loc(i)] + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) + def test_empty_fancy(self, index, dtype): + empty_arr = np.array([], dtype=dtype) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + assert index[empty_arr].identical(empty_index) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_empty_fancy_raises(self, index): + # DatetimeIndex is excluded, because it overrides getitem and should + # be tested separately. + empty_farr = np.array([], dtype=np.float_) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + # np.ndarray only accepts ndarray of int & bool dtypes, so should Index + msg = r"arrays used as indices must be of integer \(or boolean\) type" + with pytest.raises(IndexError, match=msg): + index[empty_farr] + + def test_union_dt_as_obj(self, simple_index): + # TODO: Replace with fixturesult + index = simple_index + date_index = date_range("2019-01-01", periods=10) + first_cat = index.union(date_index) + second_cat = index.union(index) + + appended = np.append(index, date_index.astype("O")) + + assert tm.equalContents(first_cat, appended) + assert tm.equalContents(second_cat, index) + tm.assert_contains_all(index, first_cat) + tm.assert_contains_all(index, second_cat) + tm.assert_contains_all(date_index, first_cat) + + def test_map_with_tuples(self): + # GH 12766 + + # Test that returning a single tuple from an Index + # returns an Index. + index = tm.makeIntIndex(3) + result = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(i,) for i in index]) + tm.assert_index_equal(result, expected) + + # Test that returning a tuple from a map of a single index + # returns a MultiIndex object. + result = index.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in index]) + tm.assert_index_equal(result, expected) + + def test_map_with_tuples_mi(self): + # Test that returning a single object from a MultiIndex + # returns an Index. + first_level = ["foo", "bar", "baz"] + multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3])) + reduced_index = multi_index.map(lambda x: x[0]) + tm.assert_index_equal(reduced_index, Index(first_level)) + + @pytest.mark.parametrize( + "attr", ["makeDateIndex", "makePeriodIndex", "makeTimedeltaIndex"] + ) + def test_map_tseries_indices_return_index(self, attr): + index = getattr(tm, attr)(10) + expected = Index([1] * 10) + result = index.map(lambda x: 1) + tm.assert_index_equal(expected, result) + + def test_map_tseries_indices_accsr_return_index(self): + date_index = tm.makeDateIndex(24, freq="h", name="hourly") + expected = Int64Index(range(24), name="hourly") + tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike_simple(self, mapper): + # GH 12756 + expected = Index(["foo", "bar", "baz"]) + index = tm.makeIntIndex(3) + result = index.map(mapper(expected.values, index)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: Series(values, index), + ], + ) + def test_map_dictlike(self, index, mapper, request): + # GH 12756 + if isinstance(index, CategoricalIndex): + # Tested in test_categorical + return + elif not index.is_unique: + # Cannot map duplicated index + return + + rng = np.arange(len(index), 0, -1) + + if index.empty: + # to match proper result coercion for uints + expected = Index([]) + elif index._is_backward_compat_public_numeric_index: + expected = index._constructor(rng, dtype=index.dtype) + elif type(index) is Index and index.dtype != object: + # i.e. EA-backed, for now just Nullable + expected = Index(rng, dtype=index.dtype) + elif index.dtype.kind == "u": + expected = Index(rng, dtype=index.dtype) + else: + expected = Index(rng) + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}], + ) + def test_map_with_non_function_missing_values(self, mapper): + # GH 12756 + expected = Index([2.0, np.nan, "foo"]) + result = Index([2, 1, 0]).map(mapper) + + tm.assert_index_equal(expected, result) + + def test_map_na_exclusion(self): + index = Index([1.5, np.nan, 3, np.nan, 5]) + + result = index.map(lambda x: x * 2, na_action="ignore") + expected = index * 2 + tm.assert_index_equal(result, expected) + + def test_map_defaultdict(self): + index = Index([1, 2, 3]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = index.map(default_dict) + expected = Index(["stuff", "blank", "blank"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)]) + def test_append_empty_preserve_name(self, name, expected): + left = Index([], name="foo") + right = Index([1, 2, 3], name=name) + + result = left.append(right) + assert result.name == expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool-object", False), + ("bool-dtype", False), + ("categorical", False), + ("int", True), + ("datetime", False), + ("float", True), + ], + indirect=["index"], + ) + def test_is_numeric(self, index, expected): + assert index.is_numeric() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", True), + ("bool-object", True), + ("bool-dtype", False), + ("categorical", False), + ("int", False), + ("datetime", False), + ("float", False), + ], + indirect=["index"], + ) + def test_is_object(self, index, expected): + assert index.is_object() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool-object", False), + ("bool-dtype", False), + ("categorical", False), + ("int", False), + ("datetime", True), + ("float", False), + ], + indirect=["index"], + ) + def test_is_all_dates(self, index, expected): + with tm.assert_produces_warning(FutureWarning): + assert index.is_all_dates is expected + + def test_summary(self, index): + index._summary() + + def test_format_bug(self): + # GH 14626 + # windows has different precision on datetime.datetime.now (it doesn't + # include us since the default for Timestamp shows these but Index + # formatting does not we are skipping) + now = datetime.now() + if not str(now).endswith("000"): + index = Index([now]) + formatted = index.format() + expected = [str(index[0])] + assert formatted == expected + + Index([]).format() + + @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]]) + def test_format_missing(self, vals, nulls_fixture): + # 2845 + vals = list(vals) # Copy for each iteration + vals.append(nulls_fixture) + index = Index(vals, dtype=object) + # TODO: case with complex dtype? + + formatted = index.format() + null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture) + expected = [str(index[0]), str(index[1]), str(index[2]), null_repr] + + assert formatted == expected + assert index[3] is nulls_fixture + + @pytest.mark.parametrize("op", ["any", "all"]) + def test_logical_compat(self, op, simple_index): + index = simple_index + assert getattr(index, op)() == getattr(index.values, op)() + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label(self, index): + n = len(index) + drop = index[list(range(5, 10))] + dropped = index.drop(drop) + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(index[0]) + expected = index[1:] + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]]) + def test_drop_by_str_label_raises_missing_keys(self, index, keys): + with pytest.raises(KeyError, match=""): + index.drop(keys) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label_errors_ignore(self, index): + n = len(index) + drop = index[list(range(5, 10))] + mixed = drop.tolist() + ["foo"] + dropped = index.drop(mixed, errors="ignore") + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(["foo", "bar"], errors="ignore") + expected = index[list(range(n))] + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_loc(self): + # TODO: Parametrize numeric and str tests after self.strIndex fixture + index = Index([1, 2, 3]) + dropped = index.drop(1) + expected = Index([2, 3]) + + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_raises_missing_keys(self): + index = Index([1, 2, 3]) + with pytest.raises(KeyError, match=""): + index.drop([3, 4]) + + @pytest.mark.parametrize( + "key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))] + ) + def test_drop_by_numeric_label_errors_ignore(self, key, expected): + index = Index([1, 2, 3]) + dropped = index.drop(key, errors="ignore") + + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize( + "values", + [["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]], + ) + @pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]]) + def test_drop_tuple(self, values, to_drop): + # GH 18304 + index = Index(values) + expected = Index(["b"]) + + result = index.drop(to_drop) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[0]) + for drop_me in to_drop[1], [to_drop[1]]: + result = removed.drop(drop_me) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[1]) + msg = rf"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\"" + for drop_me in to_drop[1], [to_drop[1]]: + with pytest.raises(KeyError, match=msg): + removed.drop(drop_me) + + def test_drop_with_duplicates_in_index(self, index): + # GH38051 + if len(index) == 0 or isinstance(index, MultiIndex): + return + if isinstance(index, IntervalIndex) and not IS64: + pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform") + index = index.unique().repeat(2) + expected = index[2:] + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "attr", + [ + "is_monotonic_increasing", + "is_monotonic_decreasing", + "_is_strictly_monotonic_increasing", + "_is_strictly_monotonic_decreasing", + ], + ) + def test_is_monotonic_incomparable(self, attr): + index = Index([5, datetime.now(), 7]) + assert not getattr(index, attr) + + def test_set_value_deprecated(self, simple_index): + # GH 28621 + idx = simple_index + arr = np.array([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + idx.set_value(arr, idx[1], 80) + assert arr[1] == 80 + + @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) + @pytest.mark.parametrize( + "index,expected", + [ + (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])), + (Index([]), np.array([], dtype=bool)), # empty + ], + ) + def test_isin(self, values, index, expected): + result = index.isin(values) + tm.assert_numpy_array_equal(result, expected) + + def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2): + # Test cartesian product of null fixtures and ensure that we don't + # mangle the various types (save a corner case with PyPy) + + # all nans are the same + if ( + isinstance(nulls_fixture, float) + and isinstance(nulls_fixture2, float) + and math.isnan(nulls_fixture) + and math.isnan(nulls_fixture2) + ): + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + elif nulls_fixture is nulls_fixture2: # should preserve NA type + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + else: + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, False]), + ) + + def test_isin_nan_common_float64(self, nulls_fixture): + + if nulls_fixture is pd.NaT or nulls_fixture is pd.NA: + # Check 1) that we cannot construct a Float64Index with this value + # and 2) that with an NaN we do not have .isin(nulls_fixture) + msg = "data is not compatible with Float64Index" + with pytest.raises(ValueError, match=msg): + Float64Index([1.0, nulls_fixture]) + + idx = Float64Index([1.0, np.nan]) + assert not idx.isin([nulls_fixture]).any() + return + + idx = Float64Index([1.0, nulls_fixture]) + res = idx.isin([np.nan]) + tm.assert_numpy_array_equal(res, np.array([False, True])) + + # we cannot compare NaT with NaN + res = idx.isin([pd.NaT]) + tm.assert_numpy_array_equal(res, np.array([False, False])) + + @pytest.mark.parametrize("level", [0, -1]) + @pytest.mark.parametrize( + "index", + [ + Index(["qux", "baz", "foo", "bar"]), + # Float64Index overrides isin, so must be checked separately + Float64Index([1.0, 2.0, 3.0, 4.0]), + ], + ) + def test_isin_level_kwarg(self, level, index): + values = index.tolist()[-2:] + ["nonexisting"] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, index.isin(values, level=level)) + + index.name = "foobar" + tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar")) + + def test_isin_level_kwarg_bad_level_raises(self, index): + for level in [10, index.nlevels, -(index.nlevels + 1)]: + with pytest.raises(IndexError, match="Too many levels"): + index.isin([], level=level) + + @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) + def test_isin_level_kwarg_bad_label_raises(self, label, index): + if isinstance(index, MultiIndex): + index = index.rename(["foo", "bar"] + index.names[2:]) + msg = f"'Level {label} not found'" + else: + index = index.rename("foo") + msg = rf"Requested level \({label}\) does not match index name \(foo\)" + with pytest.raises(KeyError, match=msg): + index.isin([], level=label) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + index = Index(["a", "b"]) + expected = np.array([False, False]) + + result = index.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize( + "values", + [ + [1, 2, 3, 4], + [1.0, 2.0, 3.0, 4.0], + [True, True, True, True], + ["foo", "bar", "baz", "qux"], + date_range("2018-01-01", freq="D", periods=4), + ], + ) + def test_boolean_cmp(self, values): + index = Index(values) + result = index == values + expected = np.array([True, True, True, True], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")]) + def test_get_level_values(self, index, name, level): + expected = index.copy() + if name: + expected.name = name + + result = expected.get_level_values(level) + tm.assert_index_equal(result, expected) + + def test_slice_keep_name(self): + index = Index(["a", "b"], name="asdf") + assert index.name == index[1:].name + + @pytest.mark.parametrize( + "index", + ["string", "datetime", "int", "uint", "float"], + indirect=True, + ) + def test_join_self(self, index, join_type): + joined = index.join(index, how=join_type) + assert index is joined + + @pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"]) + def test_str_attribute(self, method): + # GH9068 + index = Index([" jack", "jill ", " jesse ", "frank"]) + expected = Index([getattr(str, method)(x) for x in index.values]) + + result = getattr(index.str, method)() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + Index(range(5)), + tm.makeDateIndex(10), + MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]), + period_range(start="2000", end="2010", freq="A"), + ], + ) + def test_str_attribute_raises(self, index): + with pytest.raises(AttributeError, match="only use .str accessor"): + index.str.repeat(2) + + @pytest.mark.parametrize( + "expand,expected", + [ + (None, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + (False, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + ( + True, + MultiIndex.from_tuples( + [("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)] + ), + ), + ], + ) + def test_str_split(self, expand, expected): + index = Index(["a b c", "d e", "f"]) + if expand is not None: + result = index.str.split(expand=expand) + else: + result = index.str.split() + + tm.assert_index_equal(result, expected) + + def test_str_bool_return(self): + # test boolean case, should return np.array instead of boolean Index + index = Index(["a1", "a2", "b1", "b2"]) + result = index.str.startswith("a") + expected = np.array([True, True, False, False]) + + tm.assert_numpy_array_equal(result, expected) + assert isinstance(result, np.ndarray) + + def test_str_bool_series_indexing(self): + index = Index(["a1", "a2", "b1", "b2"]) + s = Series(range(4), index=index) + + result = s[s.index.str.startswith("a")] + expected = Series(range(2), index=["a1", "a2"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)] + ) + def test_tab_completion(self, index, expected): + # GH 9910 + result = "str" in dir(index) + assert result == expected + + def test_indexing_doesnt_change_class(self): + index = Index([1, 2, 3, "a", "b", "c"]) + + assert index[1:3].identical(Index([2, 3], dtype=np.object_)) + assert index[[0, 1]].identical(Index([1, 2], dtype=np.object_)) + + def test_outer_join_sort(self): + left_index = Index(np.random.permutation(15)) + right_index = tm.makeDateIndex(10) + + with tm.assert_produces_warning(RuntimeWarning): + result = left_index.join(right_index, how="outer") + + # right_index in this case because DatetimeIndex has join precedence + # over Int64Index + with tm.assert_produces_warning(RuntimeWarning): + expected = right_index.astype(object).union(left_index.astype(object)) + + tm.assert_index_equal(result, expected) + + def test_take_fill_value(self): + # GH 12631 + index = Index(list("ABC"), name="xxx") + result = index.take(np.array([1, 0, -1])) + expected = Index(list("BAC"), name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = index.take(np.array([1, 0, -1]), fill_value=True) + expected = Index(["B", "A", np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Index(["B", "A", "C"], name="xxx") + tm.assert_index_equal(result, expected) + + def test_take_fill_value_none_raises(self): + index = Index(list("ABC"), name="xxx") + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -5]), fill_value=True) + + def test_take_bad_bounds_raises(self): + index = Index(list("ABC"), name="xxx") + with pytest.raises(IndexError, match="out of bounds"): + index.take(np.array([1, -5])) + + @pytest.mark.parametrize("name", [None, "foobar"]) + @pytest.mark.parametrize( + "labels", + [ + [], + np.array([]), + ["A", "B", "C"], + ["C", "B", "A"], + np.array(["A", "B", "C"]), + np.array(["C", "B", "A"]), + # Must preserve name even if dtype changes + date_range("20130101", periods=3).values, + date_range("20130101", periods=3).tolist(), + ], + ) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels): + # GH6552 + index = Index([0, 1, 2]) + index.name = name + assert index.reindex(labels)[0].name == name + + @pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)]) + def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): + # GH7774 + index = Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == np.object_ + + @pytest.mark.parametrize( + "labels,dtype", + [ + (Int64Index([]), np.int64), + (Float64Index([]), np.float64), + (DatetimeIndex([]), np.datetime64), + ], + ) + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype): + # GH7774 + index = Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == dtype + + def test_reindex_no_type_preserve_target_empty_mi(self): + index = Index(list("abc")) + result = index.reindex( + MultiIndex([Int64Index([]), Float64Index([])], [[], []]) + )[0] + assert result.levels[0].dtype.type == np.int64 + assert result.levels[1].dtype.type == np.float64 + + def test_reindex_ignoring_level(self): + # GH#35132 + idx = Index([1, 2, 3], name="x") + idx2 = Index([1, 2, 3, 4], name="x") + expected = Index([1, 2, 3, 4], name="x") + result, _ = idx.reindex(idx2, level="x") + tm.assert_index_equal(result, expected) + + def test_groupby(self): + index = Index(range(5)) + result = index.groupby(np.array([1, 1, 2, 2, 2])) + expected = {1: Index([0, 1]), 2: Index([2, 3, 4])} + + tm.assert_dict_equal(result, expected) + + @pytest.mark.parametrize( + "mi,expected", + [ + (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])), + (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])), + ], + ) + def test_equals_op_multiindex(self, mi, expected): + # GH9785 + # test comparisons of multiindex + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == mi + tm.assert_numpy_array_equal(result, expected) + + def test_equals_op_multiindex_identify(self): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == df.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]), + Index(["foo", "bar", "baz"]), + ], + ) + def test_equals_op_mismatched_multiindex_raises(self, index): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + with pytest.raises(ValueError, match="Lengths must match"): + df.index == index + + def test_equals_op_index_vs_mi_same_length(self): + mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) + index = Index(["foo", "bar", "baz"]) + + result = mi == index + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta]) + def test_dt_conversion_preserves_name(self, dt_conv): + # GH 10875 + index = Index(["01:02:03", "01:02:04"], name="label") + assert index.name == dt_conv(index).name + + def test_cached_properties_not_settable(self): + index = Index([1, 2, 3]) + with pytest.raises(AttributeError, match="Can't set attribute"): + index.is_unique = False + + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; idx = pd.Index([1, 2])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning; + # appears resolved 2021-02-02 + with tm.assert_produces_warning(None): + with provisionalcompleter("ignore"): + list(ip.Completer.completions("idx.", 4)) + + def test_contains_method_removed(self, index): + # GH#30103 method removed for all types except IntervalIndex + if isinstance(index, IntervalIndex): + index.contains(1) + else: + msg = f"'{type(index).__name__}' object has no attribute 'contains'" + with pytest.raises(AttributeError, match=msg): + index.contains(1) + + def test_sortlevel(self): + index = Index([5, 4, 3, 2, 1]) + with pytest.raises(Exception, match="ascending must be a single bool value or"): + index.sortlevel(ascending="True") + + with pytest.raises( + Exception, match="ascending must be a list of bool values of length 1" + ): + index.sortlevel(ascending=[True, True]) + + with pytest.raises(Exception, match="ascending must be a bool value"): + index.sortlevel(ascending=["True"]) + + expected = Index([1, 2, 3, 4, 5]) + result = index.sortlevel(ascending=[True]) + tm.assert_index_equal(result[0], expected) + + expected = Index([1, 2, 3, 4, 5]) + result = index.sortlevel(ascending=True) + tm.assert_index_equal(result[0], expected) + + expected = Index([5, 4, 3, 2, 1]) + result = index.sortlevel(ascending=False) + tm.assert_index_equal(result[0], expected) + + +class TestMixedIntIndex(Base): + # Mostly the tests from common.py for which the results differ + # in py2 and py3 because ints and strings are uncomparable in py3 + # (GH 13514) + _index_cls = Index + + @pytest.fixture + def simple_index(self) -> Index: + return self._index_cls([0, "a", 1, "b", 2, "c"]) + + @pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"]) + def index(self, request): + return Index(request.param) + + def test_argsort(self, simple_index): + index = simple_index + with pytest.raises(TypeError, match="'>|<' not supported"): + index.argsort() + + def test_numpy_argsort(self, simple_index): + index = simple_index + with pytest.raises(TypeError, match="'>|<' not supported"): + np.argsort(index) + + def test_copy_name(self, simple_index): + # Check that "name" argument passed at initialization is honoured + # GH12309 + index = simple_index + + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + tm.assert_index_equal(first, second) + + assert first.name == "mario" + assert second.name == "mario" + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + s3 = s1 * s2 + + assert s3.index.name == "mario" + + def test_copy_name2(self): + # Check that adding a "name" parameter to the copy is honored + # GH14302 + index = Index([1, 2], name="MyName") + index1 = index.copy() + + tm.assert_index_equal(index, index1) + + index2 = index.copy(name="NewName") + tm.assert_index_equal(index, index2, check_names=False) + assert index.name == "MyName" + assert index2.name == "NewName" + + with tm.assert_produces_warning(FutureWarning): + index3 = index.copy(names=["NewName"]) + tm.assert_index_equal(index, index3, check_names=False) + assert index.name == "MyName" + assert index.names == ["MyName"] + assert index3.name == "NewName" + assert index3.names == ["NewName"] + + def test_copy_names_deprecated(self, simple_index): + # GH44916 + with tm.assert_produces_warning(FutureWarning): + simple_index.copy(names=["a"]) + + def test_unique_na(self): + idx = Index([2, np.nan, 2, 1], name="my_index") + expected = Index([2, np.nan, 1], name="my_index") + result = idx.unique() + tm.assert_index_equal(result, expected) + + def test_logical_compat(self, simple_index): + index = simple_index + assert index.all() == index.values.all() + assert index.any() == index.values.any() + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize("dtype", [None, object, "category"]) + @pytest.mark.parametrize( + "vals,expected", + [ + ([1, 2, 3], [1, 2, 3]), + ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), + ([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]), + (["A", "B", "C"], ["A", "B", "C"]), + (["A", np.nan, "B", "C"], ["A", "B", "C"]), + ], + ) + def test_dropna(self, how, dtype, vals, expected): + # GH 6194 + index = Index(vals, dtype=dtype) + result = index.dropna(how=how) + expected = Index(expected, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize( + "index,expected", + [ + ( + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]), + DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + TimedeltaIndex(["1 days", "2 days", "3 days"]), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ( + PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"), + PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ], + ) + def test_dropna_dt_like(self, how, index, expected): + result = index.dropna(how=how) + tm.assert_index_equal(result, expected) + + def test_dropna_invalid_how_raises(self): + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3]).dropna(how="xxx") + + @pytest.mark.parametrize( + "index", + [ + Index([np.nan]), + Index([np.nan, 1]), + Index([1, 2, np.nan]), + Index(["a", "b", np.nan]), + pd.to_datetime(["NaT"]), + pd.to_datetime(["NaT", "2000-01-01"]), + pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]), + pd.to_timedelta(["1 day", "NaT"]), + ], + ) + def test_is_monotonic_na(self, index): + assert index.is_monotonic_increasing is False + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is False + + def test_int_name_format(self, frame_or_series): + index = Index(["a", "b", "c"], name=0) + result = frame_or_series(list(range(3)), index=index) + assert "0" in repr(result) + + def test_str_to_bytes_raises(self): + # GH 26447 + index = Index([str(x) for x in range(10)]) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(index) + + @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") + def test_index_with_tuple_bool(self): + # GH34123 + # TODO: also this op right now produces FutureWarning from numpy + # https://github.com/numpy/numpy/issues/11521 + idx = Index([("a", "b"), ("b", "c"), ("c", "a")]) + result = idx == ("c", "a") + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + +class TestIndexUtils: + @pytest.mark.parametrize( + "data, names, expected", + [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")), + ( + [["a", "a"], ["c", "d"]], + None, + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]), + ), + ( + [["a", "a"], ["c", "d"]], + ["L1", "L2"], + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]), + ), + ], + ) + def test_ensure_index_from_sequences(self, data, names, expected): + result = ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) + + def test_ensure_index_mixed_closed_intervals(self): + # GH27172 + intervals = [ + pd.Interval(0, 1, closed="left"), + pd.Interval(1, 2, closed="right"), + pd.Interval(2, 3, closed="neither"), + pd.Interval(3, 4, closed="both"), + ] + result = ensure_index(intervals) + expected = Index(intervals, dtype=object) + tm.assert_index_equal(result, expected) + + def test_ensure_index_uint64(self): + # with both 0 and a large-uint64, np.array will infer to float64 + # https://github.com/numpy/numpy/issues/19146 + # but a more accurate choice would be uint64 + values = [0, np.iinfo(np.uint64).max] + + result = ensure_index(values) + assert list(result) == values + + expected = Index(values, dtype="uint64") + tm.assert_index_equal(result, expected) + + def test_get_combined_index(self): + result = _get_combined_index([]) + expected = Index([]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "opname", + [ + "eq", + "ne", + "le", + "lt", + "ge", + "gt", + "add", + "radd", + "sub", + "rsub", + "mul", + "rmul", + "truediv", + "rtruediv", + "floordiv", + "rfloordiv", + "pow", + "rpow", + "mod", + "divmod", + ], +) +def test_generated_op_names(opname, index): + opname = f"__{opname}__" + method = getattr(index, opname) + assert method.__name__ == opname + + +@pytest.mark.parametrize("index_maker", tm.index_subclass_makers_generator()) +def test_index_subclass_constructor_wrong_kwargs(index_maker): + # GH #19348 + with pytest.raises(TypeError, match="unexpected keyword argument"): + index_maker(foo="bar") + + +@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") +def test_deprecated_fastpath(): + msg = "[Uu]nexpected keyword argument" + with pytest.raises(TypeError, match=msg): + Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + RangeIndex(0, 5, 2, name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + CategoricalIndex(["a", "b", "c"], name="test", fastpath=True) + + +def test_shape_of_invalid_index(): + # Currently, it is possible to create "invalid" index objects backed by + # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125 + # about this). However, as long as this is not solved in general,this test ensures + # that the returned shape is consistent with this underlying array for + # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) + idx = Index([0, 1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + # GH#30588 multi-dimensional indexing deprecated + assert idx[:, None].shape == (4, 1) + + +def test_validate_1d_input(): + # GH#27125 check that we do not have >1-dimensional input + msg = "Index data must be 1-dimensional" + + arr = np.arange(8).reshape(2, 2, 2) + with pytest.raises(ValueError, match=msg): + Index(arr) + + with pytest.raises(ValueError, match=msg): + Float64Index(arr.astype(np.float64)) + + with pytest.raises(ValueError, match=msg): + Int64Index(arr.astype(np.int64)) + + with pytest.raises(ValueError, match=msg): + UInt64Index(arr.astype(np.uint64)) + + df = DataFrame(arr.reshape(4, 2)) + with pytest.raises(ValueError, match=msg): + Index(df) + + # GH#13601 trying to assign a multi-dimensional array to an index is not + # allowed + ser = Series(0, range(4)) + with pytest.raises(ValueError, match=msg): + ser.index = np.array([[2, 3]] * 4) + + +@pytest.mark.parametrize( + "klass, extra_kwargs", + [ + [Index, {}], + [Int64Index, {}], + [Float64Index, {}], + [DatetimeIndex, {}], + [TimedeltaIndex, {}], + [NumericIndex, {}], + [PeriodIndex, {"freq": "Y"}], + ], +) +def test_construct_from_memoryview(klass, extra_kwargs): + # GH 13120 + result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs) + expected = klass(list(range(2000, 2005)), **extra_kwargs) + tm.assert_index_equal(result, expected, exact=True) + + +def test_index_set_names_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 4]) + msg = ( + "In a future version of pandas all arguments of Index.set_names " + "except for the argument 'names' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.set_names("quarter", None) + expected = Index([1, 2, 3, 4], name="quarter") + tm.assert_index_equal(result, expected) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 1]) + msg = ( + "In a future version of pandas all arguments of " + "Index.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.drop_duplicates("last") + result = idx.drop_duplicates("last") + expected = Index([2, 3, 1]) + tm.assert_index_equal(expected, result) + + +def test_get_attributes_dict_deprecated(): + # https://github.com/pandas-dev/pandas/pull/44028 + idx = Index([1, 2, 3, 1]) + with tm.assert_produces_warning(DeprecationWarning): + attrs = idx._get_attributes_dict() + assert attrs == {"name": None} + + +@pytest.mark.parametrize("op", [operator.lt, operator.gt]) +def test_nan_comparison_same_object(op): + # GH#47105 + idx = Index([np.nan]) + expected = np.array([False]) + + result = op(idx, idx) + tm.assert_numpy_array_equal(result, expected) + + result = op(idx, idx.copy()) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py new file mode 100644 index 00000000..40a10765 --- /dev/null +++ b/pandas/tests/indexes/test_common.py @@ -0,0 +1,514 @@ +""" +Collection of tests asserting things that should be true for +any index subclass except for MultiIndex. Makes use of the `index_flat` +fixture defined in pandas/conftest.py. +""" +import re + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + pa_version_under7p0, +) + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.api import NumericIndex + + +class TestCommon: + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name, index_flat): + # see GH#15230, GH#22580 + idx = index_flat + + if name: + idx_name = name + else: + idx_name = idx.name or 0 + + df = idx.to_frame(name=idx_name) + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values + + df = idx.to_frame(index=False, name=idx_name) + assert df.index is not idx + + def test_droplevel(self, index_flat): + # GH 21115 + # MultiIndex is tested separately in test_multi.py + index = index_flat + + assert index.droplevel([]).equals(index) + + for level in [index.name, [index.name]]: + if isinstance(index.name, tuple) and level is index.name: + # GH 21121 : droplevel with tuple name + continue + msg = ( + "Cannot remove 1 levels from an index with 1 levels: at least one " + "level must be left." + ) + with pytest.raises(ValueError, match=msg): + index.droplevel(level) + + for level in "wrong", ["wrong"]: + with pytest.raises( + KeyError, + match=r"'Requested level \(wrong\) does not match index name \(None\)'", + ): + index.droplevel(level) + + def test_constructor_non_hashable_name(self, index_flat): + # GH 20527 + index = index_flat + + message = "Index.name must be a hashable type" + renamed = [["1"]] + + # With .rename() + with pytest.raises(TypeError, match=message): + index.rename(name=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=message): + index.set_names(names=renamed) + + def test_constructor_unwraps_index(self, index_flat): + a = index_flat + # Passing dtype is necessary for Index([True, False], dtype=object) + # case. + b = type(a)(a, dtype=a.dtype) + tm.assert_equal(a._data, b._data) + + def test_to_flat_index(self, index_flat): + # 22866 + index = index_flat + + result = index.to_flat_index() + tm.assert_index_equal(result, index) + + def test_set_name_methods(self, index_flat): + # MultiIndex tested separately + index = index_flat + new_name = "This is the new name for this index" + + original_name = index.name + new_ind = index.set_names([new_name]) + assert new_ind.name == new_name + assert index.name == original_name + res = index.rename(new_name, inplace=True) + + # should return None + assert res is None + assert index.name == new_name + assert index.names == [new_name] + # FIXME: dont leave commented-out + # with pytest.raises(TypeError, match="list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with pytest.raises(ValueError, match="Level must be None"): + index.set_names("a", level=0) + + # rename in place just leaves tuples and other containers alone + name = ("A", "B") + index.rename(name, inplace=True) + assert index.name == name + assert index.names == [name] + + def test_copy_and_deepcopy(self, index_flat): + from copy import ( + copy, + deepcopy, + ) + + index = index_flat + + for func in (copy, deepcopy): + idx_copy = func(index) + assert idx_copy is not index + assert idx_copy.equals(index) + + new_copy = index.copy(deep=True, name="banana") + assert new_copy.name == "banana" + + def test_copy_name(self, index_flat): + # GH#12309: Check that the "name" argument + # passed at initialization is honored. + index = index_flat + + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + tm.assert_index_equal(first, second) + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == "mario" + assert second.name == "mario" + + # TODO: belongs in series arithmetic tests? + s1 = pd.Series(2, index=first) + s2 = pd.Series(3, index=second[:-1]) + # See GH#13365 + s3 = s1 * s2 + assert s3.index.name == "mario" + + def test_copy_name2(self, index_flat): + # GH#35592 + index = index_flat + + assert index.copy(name="mario").name == "mario" + + with pytest.raises(ValueError, match="Length of new names must be 1, got 2"): + index.copy(name=["mario", "luigi"]) + + msg = f"{type(index).__name__}.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + index.copy(name=[["mario"]]) + + def test_unique_level(self, index_flat): + # don't test a MultiIndex here (as its tested separated) + index = index_flat + + # GH 17896 + expected = index.drop_duplicates() + for level in [0, index.name, None]: + result = index.unique(level=level) + tm.assert_index_equal(result, expected) + + msg = "Too many levels: Index has only 1 level, not 4" + with pytest.raises(IndexError, match=msg): + index.unique(level=3) + + msg = ( + rf"Requested level \(wrong\) does not match index name " + rf"\({re.escape(index.name.__repr__())}\)" + ) + with pytest.raises(KeyError, match=msg): + index.unique(level="wrong") + + def test_unique(self, index_flat): + # MultiIndex tested separately + index = index_flat + if not len(index): + pytest.skip("Skip check for empty Index and MultiIndex") + + idx = index[[0] * 5] + idx_unique = index[[0]] + + # We test against `idx_unique`, so first we make sure it's unique + # and doesn't contain nans. + assert idx_unique.is_unique is True + try: + assert idx_unique.hasnans is False + except NotImplementedError: + pass + + result = idx.unique() + tm.assert_index_equal(result, idx_unique) + + # nans: + if not index._can_hold_na: + pytest.skip("Skip na-check if index cannot hold na") + + vals = index._values[[0] * 5] + vals[0] = np.nan + + vals_unique = vals[:2] + idx_nan = index._shallow_copy(vals) + idx_unique_nan = index._shallow_copy(vals_unique) + assert idx_unique_nan.is_unique is True + + assert idx_nan.dtype == index.dtype + assert idx_unique_nan.dtype == index.dtype + + expected = idx_unique_nan + for i in [idx_nan, idx_unique_nan]: + result = i.unique() + tm.assert_index_equal(result, expected) + + def test_searchsorted_monotonic(self, index_flat, request): + # GH17271 + index = index_flat + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(index, pd.IntervalIndex): + mark = pytest.mark.xfail( + reason="IntervalIndex.searchsorted does not support Interval arg", + raises=NotImplementedError, + ) + request.node.add_marker(mark) + + # nothing to test if the index is empty + if index.empty: + pytest.skip("Skip check for empty Index") + value = index[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (index == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(index) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if index.is_monotonic_increasing: + ssm_left = index._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + + ss_left = index.searchsorted(value, side="left") + assert expected_left == ss_left + + ss_right = index.searchsorted(value, side="right") + assert expected_right == ss_right + + elif index.is_monotonic_decreasing: + ssm_left = index._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + else: + # non-monotonic should raise. + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + index._searchsorted_monotonic(value, side="left") + + def test_drop_duplicates(self, index_flat, keep): + # MultiIndex is tested separately + index = index_flat + if isinstance(index, RangeIndex): + pytest.skip( + "RangeIndex is tested in test_drop_duplicates_no_duplicates " + "as it cannot hold duplicates" + ) + if len(index) == 0: + pytest.skip( + "empty index is tested in test_drop_duplicates_no_duplicates " + "as it cannot hold duplicates" + ) + + # make unique index + holder = type(index) + unique_values = list(set(index)) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) + + # make duplicated index + n = len(unique_idx) + duplicated_selection = np.random.choice(n, int(n * 1.5)) + idx = holder(unique_idx.values[duplicated_selection]) + + # Series.duplicated is tested separately + expected_duplicated = ( + pd.Series(duplicated_selection).duplicated(keep=keep).values + ) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated) + + # Series.drop_duplicates is tested separately + expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep)) + tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped) + + def test_drop_duplicates_no_duplicates(self, index_flat): + # MultiIndex is tested separately + index = index_flat + + # make unique index + if isinstance(index, RangeIndex): + # RangeIndex cannot have duplicates + unique_idx = index + else: + holder = type(index) + unique_values = list(set(index)) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) + + # check on unique index + expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") + tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated) + result_dropped = unique_idx.drop_duplicates() + tm.assert_index_equal(result_dropped, unique_idx) + # validate shallow copy + assert result_dropped is not unique_idx + + def test_drop_duplicates_inplace(self, index): + msg = r"drop_duplicates\(\) got an unexpected keyword argument" + with pytest.raises(TypeError, match=msg): + index.drop_duplicates(inplace=True) + + def test_has_duplicates(self, index_flat): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates. + index = index_flat + holder = type(index) + if not len(index) or isinstance(index, RangeIndex): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates. + # RangeIndex is unique by definition. + pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex") + + idx = holder([index[0]] * 5) + assert idx.is_unique is False + assert idx.has_duplicates is True + + @pytest.mark.parametrize( + "dtype", + ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"], + ) + def test_astype_preserves_name(self, index, dtype): + # https://github.com/pandas-dev/pandas/issues/32013 + if isinstance(index, MultiIndex): + index.names = ["idx" + str(i) for i in range(index.nlevels)] + else: + index.name = "idx" + + warn = None + if ( + isinstance(index, DatetimeIndex) + and index.tz is not None + and dtype == "datetime64[ns]" + ): + # This astype is deprecated in favor of tz_localize + warn = FutureWarning + elif index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]: + # imaginary components discarded + warn = np.ComplexWarning + + is_pyarrow_str = ( + str(index.dtype) == "string[pyarrow]" + and pa_version_under7p0 + and dtype == "category" + ) + try: + # Some of these conversions cannot succeed so we use a try / except + with tm.assert_produces_warning( + warn, + raise_on_extra_warnings=is_pyarrow_str, + check_stacklevel=False, + ): + result = index.astype(dtype) + except (ValueError, TypeError, NotImplementedError, SystemError): + return + + if isinstance(index, MultiIndex): + assert result.names == index.names + else: + assert result.name == index.name + + def test_asi8_deprecation(self, index): + # GH#37877 + if isinstance(index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)): + warn = None + else: + warn = FutureWarning + + with tm.assert_produces_warning(warn): + index.asi8 + + def test_hasnans_isnans(self, index_flat): + # GH#11343, added tests for hasnans / isnans + index = index_flat + + # cases in indices doesn't include NaN + idx = index.copy(deep=True) + expected = np.array([False] * len(idx), dtype=bool) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is False + + idx = index.copy(deep=True) + values = idx._values + + if len(index) == 0: + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return + elif index.dtype == bool: + # values[1] = np.nan below casts to True! + return + + values[1] = np.nan + + idx = type(index)(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + +@pytest.mark.parametrize("na_position", [None, "middle"]) +def test_sort_values_invalid_na_position(index_with_missing, na_position): + + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + index_with_missing.sort_values(na_position=na_position) + + +@pytest.mark.parametrize("na_position", ["first", "last"]) +def test_sort_values_with_missing(index_with_missing, na_position, request): + # GH 35584. Test that sort_values works with missing values, + # sort non-missing and place missing according to na_position + + if isinstance(index_with_missing, CategoricalIndex): + request.node.add_marker( + pytest.mark.xfail( + reason="missing value sorting order not well-defined", strict=False + ) + ) + + missing_count = np.sum(index_with_missing.isna()) + not_na_vals = index_with_missing[index_with_missing.notna()].values + sorted_values = np.sort(not_na_vals) + if na_position == "first": + sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + else: + sorted_values = np.concatenate([sorted_values, [None] * missing_count]) + + # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray + expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) + + result = index_with_missing.sort_values(na_position=na_position) + tm.assert_index_equal(result, expected) + + +def test_ndarray_compat_properties(index): + if isinstance(index, PeriodIndex) and not IS64: + pytest.skip("Overflow") + idx = index + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + + assert idx.shape == values.shape + assert idx.ndim == values.ndim + assert idx.size == values.size + + if not isinstance(index, (RangeIndex, MultiIndex)): + # These two are not backed by an ndarray + assert idx.nbytes == values.nbytes + + # test for validity + idx.nbytes + idx.values.nbytes diff --git a/pandas/tests/indexes/test_engines.py b/pandas/tests/indexes/test_engines.py new file mode 100644 index 00000000..02d8c5b2 --- /dev/null +++ b/pandas/tests/indexes/test_engines.py @@ -0,0 +1,193 @@ +import re + +import numpy as np +import pytest + +from pandas._libs import index as libindex + +import pandas as pd + + +@pytest.fixture( + params=[ + (libindex.Int64Engine, np.int64), + (libindex.Int32Engine, np.int32), + (libindex.Int16Engine, np.int16), + (libindex.Int8Engine, np.int8), + (libindex.UInt64Engine, np.uint64), + (libindex.UInt32Engine, np.uint32), + (libindex.UInt16Engine, np.uint16), + (libindex.UInt8Engine, np.uint8), + (libindex.Float64Engine, np.float64), + (libindex.Float32Engine, np.float32), + ], + ids=lambda x: x[0].__name__, +) +def numeric_indexing_engine_type_and_dtype(request): + return request.param + + +class TestDatetimeEngine: + @pytest.mark.parametrize( + "scalar", + [ + pd.Timedelta(pd.Timestamp("2016-01-01").asm8.view("m8[ns]")), + pd.Timestamp("2016-01-01").value, + pd.Timestamp("2016-01-01").to_pydatetime(), + pd.Timestamp("2016-01-01").to_datetime64(), + ], + ) + def test_not_contains_requires_timestamp(self, scalar): + dti1 = pd.date_range("2016-01-01", periods=3) + dti2 = dti1.insert(1, pd.NaT) # non-monotonic + dti3 = dti1.insert(3, dti1[0]) # non-unique + dti4 = pd.date_range("2016-01-01", freq="ns", periods=2_000_000) + dti5 = dti4.insert(0, dti4[0]) # over size threshold, not unique + + msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))]) + for dti in [dti1, dti2, dti3, dti4, dti5]: + with pytest.raises(TypeError, match=msg): + scalar in dti._engine + + with pytest.raises(KeyError, match=msg): + dti._engine.get_loc(scalar) + + +class TestTimedeltaEngine: + @pytest.mark.parametrize( + "scalar", + [ + pd.Timestamp(pd.Timedelta(days=42).asm8.view("datetime64[ns]")), + pd.Timedelta(days=42).value, + pd.Timedelta(days=42).to_pytimedelta(), + pd.Timedelta(days=42).to_timedelta64(), + ], + ) + def test_not_contains_requires_timedelta(self, scalar): + tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234) + tdi2 = tdi1.insert(1, pd.NaT) # non-monotonic + tdi3 = tdi1.insert(3, tdi1[0]) # non-unique + tdi4 = pd.timedelta_range("42 days", freq="ns", periods=2_000_000) + tdi5 = tdi4.insert(0, tdi4[0]) # over size threshold, not unique + + msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))]) + for tdi in [tdi1, tdi2, tdi3, tdi4, tdi5]: + with pytest.raises(TypeError, match=msg): + scalar in tdi._engine + + with pytest.raises(KeyError, match=msg): + tdi._engine.get_loc(scalar) + + +class TestNumericEngine: + def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + + # monotonic increasing + engine = engine_type(arr) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype) + engine = engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 3, 2], dtype=dtype) + engine = engine_type(arr) + assert engine.is_unique is True + + # not unique + arr = np.array([1, 2, 1], dtype=dtype) + engine = engine_type(arr) + assert engine.is_unique is False + + def test_get_loc(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 2, 3], dtype=dtype) + engine = engine_type(arr) + assert engine.get_loc(2) == 1 + + # monotonic + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + engine = engine_type(arr) + assert engine.get_loc(2) == slice(1000, 2000) + + # not monotonic + arr = np.array([1, 2, 3] * num, dtype=dtype) + engine = engine_type(arr) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc(2) + assert (result == expected).all() + + +class TestObjectEngine: + engine_type = libindex.ObjectEngine + dtype = np.object_ + values = list("abc") + + def test_is_monotonic(self): + + num = 1000 + arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype) + + # monotonic increasing + engine = self.engine_type(arr) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = self.engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype) + engine = self.engine_type(arr[::-1]) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.is_unique is True + + # not unique + arr = np.array(["a", "b", "a"], dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.is_unique is False + + def test_get_loc(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.get_loc("b") == 1 + + # monotonic + num = 1000 + arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype) + engine = self.engine_type(arr) + assert engine.get_loc("b") == slice(1000, 2000) + + # not monotonic + arr = np.array(self.values * num, dtype=self.dtype) + engine = self.engine_type(arr) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc("b") + assert (result == expected).all() diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 00000000..ace66b5b --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,113 @@ +import re + +import pytest + +from pandas.core.indexes.frozen import FrozenList + + +@pytest.fixture +def lst(): + return [1, 2, 3, 4, 5] + + +@pytest.fixture +def container(lst): + return FrozenList(lst) + + +@pytest.fixture +def unicode_container(): + return FrozenList(["\u05d0", "\u05d1", "c"]) + + +class TestFrozenList: + def check_mutable_error(self, *args, **kwargs): + # Pass whatever function you normally would to pytest.raises + # (after the Exception kind). + mutable_regex = re.compile("does not support mutable operations") + msg = "'(_s)?re.(SRE_)?Pattern' object is not callable" + with pytest.raises(TypeError, match=msg): + mutable_regex(*args, **kwargs) + + def test_no_mutable_funcs(self, container): + def setitem(): + container[0] = 5 + + self.check_mutable_error(setitem) + + def setslice(): + container[1:2] = 3 + + self.check_mutable_error(setslice) + + def delitem(): + del container[0] + + self.check_mutable_error(delitem) + + def delslice(): + del container[0:3] + + self.check_mutable_error(delslice) + + mutable_methods = ("extend", "pop", "remove", "insert") + + for meth in mutable_methods: + self.check_mutable_error(getattr(container, meth)) + + def test_slicing_maintains_type(self, container, lst): + result = container[1:2] + expected = lst[1:2] + self.check_result(result, expected) + + def check_result(self, result, expected): + assert isinstance(result, FrozenList) + assert result == expected + + def test_string_methods_dont_fail(self, container): + repr(container) + str(container) + bytes(container) + + def test_tricky_container(self, unicode_container): + repr(unicode_container) + str(unicode_container) + + def test_add(self, container, lst): + result = container + (1, 2, 3) + expected = FrozenList(lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + container + expected = FrozenList([1, 2, 3] + lst) + self.check_result(result, expected) + + def test_iadd(self, container, lst): + q = r = container + + q += [5] + self.check_result(q, lst + [5]) + + # Other shouldn't be mutated. + self.check_result(r, lst) + + def test_union(self, container, lst): + result = container.union((1, 2, 3)) + expected = FrozenList(lst + [1, 2, 3]) + self.check_result(result, expected) + + def test_difference(self, container): + result = container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) + self.check_result(result, expected) + + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) + + def test_tricky_container_to_bytes_raises(self, unicode_container): + # GH 26447 + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(unicode_container) diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py new file mode 100644 index 00000000..9a57e3e0 --- /dev/null +++ b/pandas/tests/indexes/test_index_new.py @@ -0,0 +1,374 @@ +""" +Tests for the Index constructor conducting inference. +""" +from datetime import ( + datetime, + timedelta, +) +from decimal import Decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_unsigned_integer_dtype + +from pandas import ( + NA, + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + Timestamp, + array, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestIndexConstructorInference: + @pytest.mark.parametrize("na_value", [None, np.nan]) + @pytest.mark.parametrize("vtype", [list, tuple, iter]) + def test_construction_list_tuples_nan(self, na_value, vtype): + # GH#18505 : valid tuples containing NaN + values = [(1, "two"), (3.0, na_value)] + result = Index(vtype(values)) + expected = MultiIndex.from_tuples(values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], + ) + def test_constructor_int_dtype_float(self, dtype): + # GH#18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", [[True, False, True], np.array([True, False, True], dtype=bool)] + ) + def test_constructor_dtypes_to_object(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=bool) + else: + index = Index(vals) + + assert type(index) is Index + assert index.dtype == bool + + def test_constructor_categorical_to_object(self): + # GH#32167 Categorical data and dtype=object should return object-dtype + ci = CategoricalIndex(range(5)) + result = Index(ci, dtype=object) + assert not isinstance(result, CategoricalIndex) + + def test_constructor_infer_periodindex(self): + xp = period_range("2012-1-1", freq="M", periods=3) + rs = Index(xp) + tm.assert_index_equal(rs, xp) + assert isinstance(rs, PeriodIndex) + + def test_from_list_of_periods(self): + rng = period_range("1/1/2000", periods=20, freq="D") + periods = list(rng) + + result = Index(periods) + assert isinstance(result, PeriodIndex) + + @pytest.mark.parametrize("pos", [0, 1]) + @pytest.mark.parametrize( + "klass,dtype,ctor", + [ + (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")), + (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")), + ], + ) + def test_constructor_infer_nat_dt_like( + self, pos, klass, dtype, ctor, nulls_fixture, request + ): + if isinstance(nulls_fixture, Decimal): + # We dont cast these to datetime64/timedelta64 + return + + expected = klass([NaT, NaT]) + assert expected.dtype == dtype + data = [ctor] + data.insert(pos, nulls_fixture) + + warn = None + if nulls_fixture is NA: + expected = Index([NA, NaT]) + mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884") + request.node.add_marker(mark) + # GH#35942 numpy will emit a DeprecationWarning within the + # assert_index_equal calls. Since we can't do anything + # about it until GH#31884 is fixed, we suppress that warning. + warn = DeprecationWarning + + result = Index(data) + + with tm.assert_produces_warning(warn): + tm.assert_index_equal(result, expected) + + result = Index(np.array(data, dtype=object)) + + with tm.assert_produces_warning(warn): + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("swap_objs", [True, False]) + def test_constructor_mixed_nat_objs_infers_object(self, swap_objs): + # mixed np.datetime64/timedelta64 nat results in object + data = [np.datetime64("nat"), np.timedelta64("nat")] + if swap_objs: + data = data[::-1] + + expected = Index(data, dtype=object) + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) + + @pytest.mark.parametrize("swap_objs", [True, False]) + def test_constructor_datetime_and_datetime64(self, swap_objs): + data = [Timestamp(2021, 6, 8, 9, 42), np.datetime64("now")] + if swap_objs: + data = data[::-1] + expected = DatetimeIndex(data) + + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) + + +class TestDtypeEnforced: + # check we don't silently ignore the dtype keyword + + def test_constructor_object_dtype_with_ea_data(self, any_numeric_ea_dtype): + # GH#45206 + arr = array([0], dtype=any_numeric_ea_dtype) + + idx = Index(arr, dtype=object) + assert idx.dtype == object + + @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"]) + def test_constructor_range_values_mismatched_dtype(self, dtype): + rng = Index(range(5)) + + result = Index(rng, dtype=dtype) + assert result.dtype == dtype + + result = Index(range(5), dtype=dtype) + assert result.dtype == dtype + + @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"]) + def test_constructor_categorical_values_mismatched_non_ea_dtype(self, dtype): + cat = Categorical([1, 2, 3]) + + result = Index(cat, dtype=dtype) + assert result.dtype == dtype + + def test_constructor_categorical_values_mismatched_dtype(self): + dti = date_range("2016-01-01", periods=3) + cat = Categorical(dti) + result = Index(cat, dti.dtype) + tm.assert_index_equal(result, dti) + + dti2 = dti.tz_localize("Asia/Tokyo") + cat2 = Categorical(dti2) + result = Index(cat2, dti2.dtype) + tm.assert_index_equal(result, dti2) + + ii = IntervalIndex.from_breaks(range(5)) + cat3 = Categorical(ii) + result = Index(cat3, dtype=ii.dtype) + tm.assert_index_equal(result, ii) + + def test_constructor_ea_values_mismatched_categorical_dtype(self): + dti = date_range("2016-01-01", periods=3) + result = Index(dti, dtype="category") + expected = CategoricalIndex(dti) + tm.assert_index_equal(result, expected) + + dti2 = date_range("2016-01-01", periods=3, tz="US/Pacific") + result = Index(dti2, dtype="category") + expected = CategoricalIndex(dti2) + tm.assert_index_equal(result, expected) + + def test_constructor_period_values_mismatched_dtype(self): + pi = period_range("2016-01-01", periods=3, freq="D") + result = Index(pi, dtype="category") + expected = CategoricalIndex(pi) + tm.assert_index_equal(result, expected) + + def test_constructor_timedelta64_values_mismatched_dtype(self): + # check we don't silently ignore the dtype keyword + tdi = timedelta_range("4 Days", periods=5) + result = Index(tdi, dtype="category") + expected = CategoricalIndex(tdi) + tm.assert_index_equal(result, expected) + + def test_constructor_interval_values_mismatched_dtype(self): + dti = date_range("2016-01-01", periods=3) + ii = IntervalIndex.from_breaks(dti) + result = Index(ii, dtype="category") + expected = CategoricalIndex(ii) + tm.assert_index_equal(result, expected) + + def test_constructor_datetime64_values_mismatched_period_dtype(self): + dti = date_range("2016-01-01", periods=3) + result = Index(dti, dtype="Period[D]") + expected = dti.to_period("D") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_constructor_int_dtype_nan_raises(self, dtype): + # see GH#15187 + data = [np.nan] + msg = "cannot convert" + with pytest.raises(ValueError, match=msg): + Index(data, dtype=dtype) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3]), + np.array([1, 2, 3], dtype=int), + # below should coerce + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_int64(self, vals): + index = Index(vals, dtype=int) + assert isinstance(index, Int64Index) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0]), + np.array([1, 2, 3], dtype=int), + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_float64(self, vals): + index = Index(vals, dtype=float) + assert isinstance(index, Float64Index) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3], dtype=int), + np.array(["2011-01-01", "2011-01-02"], dtype="datetime64[ns]"), + [datetime(2011, 1, 1), datetime(2011, 1, 2)], + ], + ) + def test_constructor_dtypes_to_categorical(self, vals): + index = Index(vals, dtype="category") + assert isinstance(index, CategoricalIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + Index(np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")])), + Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]), + ], + ) + def test_constructor_dtypes_to_datetime(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, DatetimeIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]), + [timedelta(1), timedelta(1)], + ], + ) + def test_constructor_dtypes_to_timedelta(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, TimedeltaIndex) + + +class TestIndexConstructorUnwrapping: + # Test passing different arraylike values to pd.Index + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + def test_constructor_from_series_dt64(self, klass): + stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + expected = DatetimeIndex(stamps) + ser = Series(stamps) + result = klass(ser) + tm.assert_index_equal(result, expected) + + def test_constructor_no_pandas_array(self): + ser = Series([1, 2, 3]) + result = Index(ser.array) + expected = Index([1, 2, 3]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "array", + [ + np.arange(5), + np.array(["a", "b", "c"]), + date_range("2000-01-01", periods=3).values, + ], + ) + def test_constructor_ndarray_like(self, array): + # GH#5460#issuecomment-44474502 + # it should be possible to convert any object that satisfies the numpy + # ndarray interface directly into an Index + class ArrayLike: + def __init__(self, array) -> None: + self.array = array + + def __array__(self, dtype=None) -> np.ndarray: + return self.array + + expected = Index(array) + result = Index(ArrayLike(array)) + tm.assert_index_equal(result, expected) + + +class TestIndexConstructionErrors: + def test_constructor_overflow_int64(self): + # see GH#15832 + msg = ( + "The elements provided in the data cannot " + "all be casted to the dtype int64" + ) + with pytest.raises(OverflowError, match=msg): + Index([np.iinfo(np.uint64).max - 1], dtype="int64") diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py new file mode 100644 index 00000000..ab934df9 --- /dev/null +++ b/pandas/tests/indexes/test_indexing.py @@ -0,0 +1,373 @@ +""" +test_indexing tests the following Index methods: + __getitem__ + get_loc + get_value + __contains__ + take + where + get_indexer + get_indexer_for + slice_locs + asof_locs + +The corresponding tests.indexes.[index_type].test_indexing files +contain tests for the corresponding methods specific to those Index subclasses. +""" +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + NA, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + NaT, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestTake: + def test_take_invalid_kwargs(self, index): + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + index.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + index.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + index.take(indices, mode="clip") + + def test_take(self, index): + indexer = [4, 3, 0, 2] + if len(index) < 5: + # not enough elements; ignore + return + + result = index.take(indexer) + expected = index[indexer] + assert result.equals(expected) + + if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + msg = r"'(.*Index)' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + index.freq + + def test_take_indexer_type(self): + # GH#42875 + integer_index = Index([0, 1, 2, 3]) + scalar_index = 1 + msg = "Expected indices to be array-like" + with pytest.raises(TypeError, match=msg): + integer_index.take(scalar_index) + + def test_take_minus1_without_fill(self, index): + # -1 does not get treated as NA unless allow_fill=True is passed + if len(index) == 0: + # Test is not applicable + return + + result = index.take([0, 0, -1]) + + expected = index.take([0, 0, len(index) - 1]) + tm.assert_index_equal(result, expected) + + +class TestContains: + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), 2), + (Index([0, 1, "2"]), "2"), + (Index([0, 1, 2, np.inf, 4]), 4), + (Index([0, 1, 2, np.nan, 4]), 4), + (Index([0, 1, 2, np.inf]), np.inf), + (Index([0, 1, 2, np.nan]), np.nan), + ], + ) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), "2"), + (Index([0, 1, "2"]), 2), + (Index([0, 1, 2, np.inf]), 4), + (Index([0, 1, 2, np.nan]), 4), + (Index([0, 1, 2, np.inf]), np.nan), + (Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (Int64Index([0, 1, 2]), np.inf), + (Int64Index([0, 1, 2]), np.nan), + (UInt64Index([0, 1, 2]), np.inf), + (UInt64Index([0, 1, 2]), np.nan), + ], + ) + def test_index_not_contains(self, index, val): + assert val not in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] + ) + def test_mixed_index_contains(self, index, val): + # GH#19860 + assert val in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] + ) + def test_mixed_index_not_contains(self, index, val): + # GH#19860 + assert val not in index + + def test_contains_with_float_index(self): + # GH#22085 + integer_index = Int64Index([0, 1, 2, 3]) + uinteger_index = UInt64Index([0, 1, 2, 3]) + float_index = Float64Index([0.1, 1.1, 2.2, 3.3]) + + for index in (integer_index, uinteger_index): + assert 1.1 not in index + assert 1.0 in index + assert 1 in index + + assert 1.1 in float_index + assert 1.0 not in float_index + assert 1 not in float_index + + def test_contains_requires_hashable_raises(self, index): + if isinstance(index, MultiIndex): + return # TODO: do we want this to raise? + + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): + [] in index + + msg = "|".join( + [ + r"unhashable type: 'dict'", + r"must be real number, not dict", + r"an integer is required", + r"\{\}", + r"pandas\._libs\.interval\.IntervalTree' is not iterable", + ] + ) + with pytest.raises(TypeError, match=msg): + {} in index._engine + + +class TestGetValue: + @pytest.mark.parametrize( + "index", ["string", "int", "datetime", "timedelta"], indirect=True + ) + def test_get_value(self, index): + # TODO(2.0): can remove once get_value deprecation is enforced GH#19728 + values = np.random.randn(100) + value = index[67] + + with pytest.raises(AttributeError, match="has no attribute '_values'"): + # Index.get_value requires a Series, not an ndarray + with tm.assert_produces_warning(FutureWarning): + index.get_value(values, value) + + with tm.assert_produces_warning(FutureWarning): + result = index.get_value(Series(values, index=values), value) + tm.assert_almost_equal(result, values[67]) + + +class TestGetLoc: + def test_get_loc_non_hashable(self, index): + # MultiIndex and Index raise TypeError, others InvalidIndexError + + with pytest.raises((TypeError, InvalidIndexError), match="slice"): + index.get_loc(slice(0, 1)) + + def test_get_loc_generator(self, index): + + exc = KeyError + if isinstance( + index, + ( + DatetimeIndex, + TimedeltaIndex, + PeriodIndex, + RangeIndex, + IntervalIndex, + MultiIndex, + ), + ): + # TODO: make these more consistent? + exc = InvalidIndexError + with pytest.raises(exc, match="generator object"): + # MultiIndex specifically checks for generator; others for scalar + index.get_loc(x for x in range(5)) + + def test_get_loc_masked_duplicated_na(self): + # GH#48411 + idx = Index([1, 2, NA, NA], dtype="Int64") + result = idx.get_loc(NA) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + +class TestGetIndexer: + def test_get_indexer_base(self, index): + + if index._index_as_unique: + expected = np.arange(index.size, dtype=np.intp) + actual = index.get_indexer(index) + tm.assert_numpy_array_equal(expected, actual) + else: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + index.get_indexer(index) + + with pytest.raises(ValueError, match="Invalid fill method"): + index.get_indexer(index, method="invalid") + + def test_get_indexer_consistency(self, index): + # See GH#16819 + + if index._index_as_unique: + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + index.get_indexer(index[0:2]) + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + def test_get_indexer_masked_duplicated_na(self): + # GH#48411 + idx = Index([1, 2, NA, NA], dtype="Int64") + result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) + expected = np.array([0, 2, 3], dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + +class TestConvertSliceIndexer: + def test_convert_almost_null_slice(self, index): + # slice with None at both ends, but not step + + key = slice(None, None, "foo") + + if isinstance(index, IntervalIndex): + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + index._convert_slice_indexer(key, "loc") + else: + msg = "'>=' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + index._convert_slice_indexer(key, "loc") + + +class TestPutmask: + def test_putmask_with_wrong_mask(self, index): + # GH#18368 + if not len(index): + return + + fill = index[0] + + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): + index.putmask(np.ones(len(index) + 1, np.bool_), fill) + + with pytest.raises(ValueError, match=msg): + index.putmask(np.ones(len(index) - 1, np.bool_), fill) + + with pytest.raises(ValueError, match=msg): + index.putmask("foo", fill) + + +@pytest.mark.parametrize( + "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] +) +def test_getitem_deprecated_float(idx): + # https://github.com/pandas-dev/pandas/issues/34191 + + with tm.assert_produces_warning(FutureWarning): + result = idx[1.0] + + expected = idx[1] + assert result == expected + + +def test_maybe_cast_slice_bound_kind_deprecated(index): + if not len(index): + return + + with tm.assert_produces_warning(FutureWarning): + # passed as keyword + index._maybe_cast_slice_bound(index[0], "left", kind="loc") + + with tm.assert_produces_warning(FutureWarning): + # pass as positional + index._maybe_cast_slice_bound(index[0], "left", "loc") + + +@pytest.mark.parametrize( + "idx,target,expected", + [ + ([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)), + ( + [np.nan, "var1", np.nan], + [np.nan, "var1"], + np.array([0, 2, 1], dtype=np.intp), + ), + ( + np.array([np.nan, "var1", np.nan], dtype=object), + [np.nan], + np.array([0, 2], dtype=np.intp), + ), + ( + DatetimeIndex(["2020-08-05", NaT, NaT]), + [NaT], + np.array([1, 2], dtype=np.intp), + ), + (["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)), + ( + np.array(["b", np.nan, float("NaN"), "b"], dtype=object), + Index([np.nan], dtype=object), + np.array([1, 2], dtype=np.intp), + ), + ], +) +def test_get_indexer_non_unique_multiple_nans(idx, target, expected): + # GH 35392 + axis = Index(idx) + actual = axis.get_indexer_for(target) + tm.assert_numpy_array_equal(actual, expected) + + +def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture): + idx = Index([1.0, 2.0]) + target = Index([1, nulls_fixture], dtype="object") + + result_idx, result_missing = idx.get_indexer_non_unique(target) + tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp)) + tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp)) diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py new file mode 100644 index 00000000..453ece35 --- /dev/null +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -0,0 +1,193 @@ +import numpy as np +import pytest + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + PeriodIndex, + TimedeltaIndex, + isna, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + NumericIndex, +) +from pandas.core.arrays import BooleanArray +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +def test_numpy_ufuncs_out(index): + result = index == index + + out = np.empty(index.shape, dtype=bool) + np.equal(index, index, out=out) + tm.assert_numpy_array_equal(out, result) + + if not index._is_multi: + # same thing on the ExtensionArray + out = np.empty(index.shape, dtype=bool) + np.equal(index.array, index.array, out=out) + tm.assert_numpy_array_equal(out, result) + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda x: x.__name__, +) +def test_numpy_ufuncs_basic(index, func): + # test ufuncs of numpy, see: + # https://numpy.org/doc/stable/reference/ufuncs.html + + if isinstance(index, DatetimeIndexOpsMixin): + with tm.external_error_raised((TypeError, AttributeError)): + with np.errstate(all="ignore"): + func(index) + elif ( + isinstance(index, NumericIndex) + or (not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric) + or (index.dtype.kind == "c" and func not in [np.deg2rad, np.rad2deg]) + or index.dtype == bool + ): + # coerces to float (e.g. np.sin) + with np.errstate(all="ignore"): + result = func(index) + exp = Index(func(index.values), name=index.name) + + tm.assert_index_equal(result, exp) + if type(index) is not Index or index.dtype == bool: + # i.e NumericIndex + assert isinstance(result, Float64Index) + else: + # e.g. np.exp with Int64 -> Float64 + assert type(result) is Index + else: + # raise AttributeError or TypeError + if len(index) == 0: + pass + else: + with tm.external_error_raised((TypeError, AttributeError)): + with np.errstate(all="ignore"): + func(index) + + +@pytest.mark.parametrize( + "func", [np.isfinite, np.isinf, np.isnan, np.signbit], ids=lambda x: x.__name__ +) +def test_numpy_ufuncs_other(index, func): + # test ufuncs of numpy, see: + # https://numpy.org/doc/stable/reference/ufuncs.html + if isinstance(index, (DatetimeIndex, TimedeltaIndex)): + + if func in (np.isfinite, np.isinf, np.isnan): + # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 + result = func(index) + assert isinstance(result, np.ndarray) + + out = np.empty(index.shape, dtype=bool) + func(index, out=out) + tm.assert_numpy_array_equal(out, result) + else: + with tm.external_error_raised(TypeError): + func(index) + + elif isinstance(index, PeriodIndex): + with tm.external_error_raised(TypeError): + func(index) + + elif ( + isinstance(index, NumericIndex) + or (not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric) + or (index.dtype.kind == "c" and func is not np.signbit) + or index.dtype == bool + ): + # Results in bool array + result = func(index) + if not isinstance(index.dtype, np.dtype): + # e.g. Int64 we expect to get BooleanArray back + assert isinstance(result, BooleanArray) + else: + assert isinstance(result, np.ndarray) + + out = np.empty(index.shape, dtype=bool) + func(index, out=out) + + if not isinstance(index.dtype, np.dtype): + tm.assert_numpy_array_equal(out, result._data) + else: + tm.assert_numpy_array_equal(out, result) + + else: + if len(index) == 0: + pass + else: + with tm.external_error_raised(TypeError): + func(index) + + +@pytest.mark.parametrize("func", [np.maximum, np.minimum]) +def test_numpy_ufuncs_reductions(index, func, request): + # TODO: overlap with tests.series.test_ufunc.test_reductions + if len(index) == 0: + return + + if repr(index.dtype) == "string[pyarrow]": + mark = pytest.mark.xfail(reason="ArrowStringArray has no min/max") + request.node.add_marker(mark) + + if isinstance(index, CategoricalIndex) and index.dtype.ordered is False: + with pytest.raises(TypeError, match="is not ordered for"): + func.reduce(index) + return + else: + result = func.reduce(index) + + if func is np.maximum: + expected = index.max(skipna=False) + else: + expected = index.min(skipna=False) + # TODO: do we have cases both with and without NAs? + + assert type(result) is type(expected) + if isna(result): + assert isna(expected) + else: + assert result == expected + + +@pytest.mark.parametrize("func", [np.bitwise_and, np.bitwise_or, np.bitwise_xor]) +def test_numpy_ufuncs_bitwise(func): + # https://github.com/pandas-dev/pandas/issues/46769 + idx1 = Index([1, 2, 3, 4], dtype="int64") + idx2 = Index([3, 4, 5, 6], dtype="int64") + + with tm.assert_produces_warning(None): + result = func(idx1, idx2) + + expected = Index(func(idx1.values, idx2.values)) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py new file mode 100644 index 00000000..d2f6a173 --- /dev/null +++ b/pandas/tests/indexes/test_setops.py @@ -0,0 +1,880 @@ +""" +The tests in this package are to ensure the proper resultant dtypes of +set operations. +""" +from datetime import datetime +import operator + +import numpy as np +import pytest + +from pandas.compat import pa_version_under7p0 + +from pandas.core.dtypes.cast import find_common_type + +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + MultiIndex, + RangeIndex, + Series, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import ( + is_datetime64tz_dtype, + is_signed_integer_dtype, + pandas_dtype, +) +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +def test_union_same_types(index): + # Union with a non-unique, non-monotonic index raises error + # Only needed for bool index factory + idx1 = index.sort_values() + idx2 = index.sort_values() + assert idx1.union(idx2).dtype == idx1.dtype + + +def test_union_different_types(index_flat, index_flat2, request): + # This test only considers combinations of indices + # GH 23525 + idx1 = index_flat + idx2 = index_flat2 + + if ( + not idx1.is_unique + and not idx2.is_unique + and idx1.dtype.kind == "i" + and idx2.dtype.kind == "b" + ) or ( + not idx2.is_unique + and not idx1.is_unique + and idx2.dtype.kind == "i" + and idx1.dtype.kind == "b" + ): + # Each condition had idx[1|2].is_monotonic_decreasing + # but failed when e.g. + # idx1 = Index( + # [True, True, True, True, True, True, True, True, False, False], dtype='bool' + # ) + # idx2 = Int64Index([0, 0, 1, 1, 2, 2], dtype='int64') + mark = pytest.mark.xfail( + reason="GH#44000 True==1", raises=ValueError, strict=False + ) + request.node.add_marker(mark) + + common_dtype = find_common_type([idx1.dtype, idx2.dtype]) + + warn = None + if not len(idx1) or not len(idx2): + pass + elif ( + idx1.dtype.kind == "c" + and ( + idx2.dtype.kind not in ["i", "u", "f", "c"] + or not isinstance(idx2.dtype, np.dtype) + ) + ) or ( + idx2.dtype.kind == "c" + and ( + idx1.dtype.kind not in ["i", "u", "f", "c"] + or not isinstance(idx1.dtype, np.dtype) + ) + ): + # complex objects non-sortable + warn = RuntimeWarning + + any_uint64 = idx1.dtype == np.uint64 or idx2.dtype == np.uint64 + idx1_signed = is_signed_integer_dtype(idx1.dtype) + idx2_signed = is_signed_integer_dtype(idx2.dtype) + + # Union with a non-unique, non-monotonic index raises error + # This applies to the boolean index + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + + with tm.assert_produces_warning(warn, match="'<' not supported between"): + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + if any_uint64 and (idx1_signed or idx2_signed): + assert res1.dtype == np.dtype("O") + assert res2.dtype == np.dtype("O") + else: + assert res1.dtype == common_dtype + assert res2.dtype == common_dtype + + +@pytest.mark.parametrize( + "idx_fact1,idx_fact2", + [ + (tm.makeIntIndex, tm.makeRangeIndex), + (tm.makeFloatIndex, tm.makeIntIndex), + (tm.makeFloatIndex, tm.makeRangeIndex), + (tm.makeFloatIndex, tm.makeUIntIndex), + ], +) +def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): + # GH 23525 + idx1 = idx_fact1(10) + idx2 = idx_fact2(20) + + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + assert res1.dtype in (idx1.dtype, idx2.dtype) + assert res2.dtype in (idx1.dtype, idx2.dtype) + + +@pytest.mark.parametrize( + "left, right, expected", + [ + ("int64", "int64", "int64"), + ("int64", "uint64", "object"), + ("int64", "float64", "float64"), + ("uint64", "float64", "float64"), + ("uint64", "uint64", "uint64"), + ("float64", "float64", "float64"), + ("datetime64[ns]", "int64", "object"), + ("datetime64[ns]", "uint64", "object"), + ("datetime64[ns]", "float64", "object"), + ("datetime64[ns, CET]", "int64", "object"), + ("datetime64[ns, CET]", "uint64", "object"), + ("datetime64[ns, CET]", "float64", "object"), + ("Period[D]", "int64", "object"), + ("Period[D]", "uint64", "object"), + ("Period[D]", "float64", "object"), + ], +) +@pytest.mark.parametrize("names", [("foo", "foo", "foo"), ("foo", "bar", None)]) +def test_union_dtypes(left, right, expected, names): + left = pandas_dtype(left) + right = pandas_dtype(right) + a = Index([], dtype=left, name=names[0]) + b = Index([], dtype=right, name=names[1]) + result = a.union(b) + assert result.dtype == expected + assert result.name == names[2] + + # Testing name retention + # TODO: pin down desired dtype; do we want it to be commutative? + result = a.intersection(b) + assert result.name == names[2] + + +def test_dunder_inplace_setops_deprecated(index): + # GH#37374 these will become logical ops, not setops + + with tm.assert_produces_warning(FutureWarning): + index |= index + + with tm.assert_produces_warning(FutureWarning): + index &= index + + is_pyarrow = str(index.dtype) == "string[pyarrow]" and pa_version_under7p0 + with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=is_pyarrow): + index ^= index + + +@pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]]) +def test_intersection_duplicates(values): + # GH#31326 + a = Index(values) + b = Index([3, 3]) + result = a.intersection(b) + expected = Index([3]) + tm.assert_index_equal(result, expected) + + +class TestSetOps: + # Set operation tests shared by all indexes in the `index` fixture + @pytest.mark.parametrize("case", [0.5, "xxx"]) + @pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] + ) + def test_set_ops_error_cases(self, case, method, index): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(index, method)(case) + + def test_intersection_base(self, index): + if isinstance(index, CategoricalIndex): + return + + first = index[:5] + second = index[:3] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + if is_datetime64tz_dtype(index.dtype): + # The second.values below will drop tz, so the rest of this test + # is not applicable. + return + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) + + def test_union_base(self, index): + first = index[3:] + second = index[:5] + everything = index + + union = first.union(second) + assert tm.equalContents(union, everything) + + if is_datetime64tz_dtype(index.dtype): + # The second.values below will drop tz, so the rest of this test + # is not applicable. + return + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) + + def test_difference_base(self, sort, index): + first = index[2:] + second = index[:4] + if index.is_boolean(): + # i think (TODO: be sure) there assumptions baked in about + # the index fixture that don't hold here? + answer = set(first).difference(set(second)) + elif isinstance(index, CategoricalIndex): + answer = [] + else: + answer = index[4:] + result = first.difference(second, sort) + assert tm.equalContents(result, answer) + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.difference(case, sort) + assert tm.equalContents(result, answer) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3], sort) + + def test_symmetric_difference(self, index): + if isinstance(index, CategoricalIndex): + return + if len(index) < 2: + return + if index[0] in index[1:] or index[-1] in index[:-1]: + # index fixture has e.g. an index of bools that does not satisfy this, + # another with [0, 0, 1, 1, 2, 2] + return + + first = index[1:] + second = index[:-1] + answer = index[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH#10149 + cases = [second.to_numpy(), second.to_series(), second.to_list()] + for case in cases: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(index, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3]) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_corner_union(self, index_flat, fname, sname, expected_name): + # GH#9943, GH#9862 + # Test unions with various name combinations + # Do not test MultiIndex or repeats + if not index_flat.is_unique: + pytest.skip("Randomly generated index_flat was not unique.") + index = index_flat + + # Test copy.union(copy) + first = index.copy().set_names(fname) + second = index.copy().set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = index.copy().set_names(fname) + second = index.drop(index).set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = index.drop(index).set_names(fname) + second = index.copy().set_names(sname) + union = first.union(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(empty) + first = index.drop(index).set_names(fname) + second = index.drop(index).set_names(sname) + union = first.union(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_union_unequal(self, index_flat, fname, sname, expected_name): + if not index_flat.is_unique: + pytest.skip("Randomly generated index_flat was not unique.") + index = index_flat + + # test copy.union(subset) - need sort for unicode and string + first = index.copy().set_names(fname) + second = index[1:].set_names(sname) + union = first.union(second).sort_values() + expected = index.set_names(expected_name).sort_values() + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_corner_intersect(self, index_flat, fname, sname, expected_name): + # GH#35847 + # Test intersections with various name combinations + if not index_flat.is_unique: + pytest.skip("Randomly generated index_flat was not unique.") + index = index_flat + + # Test copy.intersection(copy) + first = index.copy().set_names(fname) + second = index.copy().set_names(sname) + intersect = first.intersection(second) + expected = index.copy().set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test copy.intersection(empty) + first = index.copy().set_names(fname) + second = index.drop(index).set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test empty.intersection(copy) + first = index.drop(index).set_names(fname) + second = index.copy().set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + # Test empty.intersection(empty) + first = index.drop(index).set_names(fname) + second = index.drop(index).set_names(sname) + intersect = first.intersection(second) + expected = index.drop(index).set_names(expected_name) + tm.assert_index_equal(intersect, expected) + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_intersect_unequal(self, index_flat, fname, sname, expected_name): + if not index_flat.is_unique: + pytest.skip("Randomly generated index_flat was not unique.") + index = index_flat + + # test copy.intersection(subset) - need sort for unicode and string + first = index.copy().set_names(fname) + second = index[1:].set_names(sname) + intersect = first.intersection(second).sort_values() + expected = index[1:].set_names(expected_name).sort_values() + tm.assert_index_equal(intersect, expected) + + def test_intersection_name_retention_with_nameless(self, index): + if isinstance(index, MultiIndex): + index = index.rename(list(range(index.nlevels))) + else: + index = index.rename("foo") + + other = np.asarray(index) + + result = index.intersection(other) + assert result.name == index.name + + # empty other, same dtype + result = index.intersection(other[:0]) + assert result.name == index.name + + # empty `self` + result = index[:0].intersection(other) + assert result.name == index.name + + def test_difference_preserves_type_empty(self, index, sort): + # GH#20040 + # If taking difference of a set and itself, it + # needs to preserve the type of the index + if not index.is_unique: + return + result = index.difference(index, sort=sort) + expected = index[:0] + tm.assert_index_equal(result, expected, exact=True) + + def test_difference_name_retention_equals(self, index, names): + if isinstance(index, MultiIndex): + names = [[x] * index.nlevels for x in names] + index = index.rename(names[0]) + other = index.rename(names[1]) + + assert index.equals(other) + + result = index.difference(other) + expected = index[:0].rename(names[2]) + tm.assert_index_equal(result, expected) + + def test_intersection_difference_match_empty(self, index, sort): + # GH#20040 + # Test that the intersection of an index with an + # empty index produces the same index as the difference + # of an index with itself. Test for all types + if not index.is_unique: + return + inter = index.intersection(index[:0]) + diff = index.difference(index, sort=sort) + tm.assert_index_equal(inter, diff, exact=True) + + +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_setop_with_categorical(index_flat, sort, method): + # MultiIndex tested separately in tests.indexes.multi.test_setops + index = index_flat + + other = index.astype("category") + exact = "equiv" if isinstance(index, RangeIndex) else True + + result = getattr(index, method)(other, sort=sort) + expected = getattr(index, method)(index, sort=sort) + tm.assert_index_equal(result, expected, exact=exact) + + result = getattr(index, method)(other[:5], sort=sort) + expected = getattr(index, method)(index[:5], sort=sort) + tm.assert_index_equal(result, expected, exact=exact) + + +def test_intersection_duplicates_all_indexes(index): + # GH#38743 + if index.empty: + # No duplicates in empty indexes + return + + def check_intersection_commutative(left, right): + assert left.intersection(right).equals(right.intersection(left)) + + idx = index + idx_non_unique = idx[[0, 0, 1, 2]] + + check_intersection_commutative(idx, idx_non_unique) + assert idx.intersection(idx_non_unique).is_unique + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + CategoricalIndex, + lambda x: CategoricalIndex(x, categories=set(x)), + TimedeltaIndex, + lambda x: Index(x, dtype=object), + UInt64Index, + ], +) +def test_union_duplicate_index_subsets_of_each_other(cls): + # GH#31326 + a = cls([1, 2, 2, 3]) + b = cls([3, 3, 4]) + expected = cls([1, 2, 2, 3, 3, 4]) + if isinstance(a, CategoricalIndex): + expected = Index([1, 2, 2, 3, 3, 4]) + result = a.union(b) + tm.assert_index_equal(result, expected) + result = a.union(b, sort=False) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + CategoricalIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 0]) + b = cls([0, 1]) + expected = cls([0, 0, 1]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = b.union(a) + tm.assert_index_equal(result, expected) + + +def test_union_duplicate_index_different_dtypes(): + # GH#36289 + a = Index([1, 2, 2, 3]) + b = Index(["1", "0", "0"]) + expected = Index([1, 2, 2, 3, "1", "0", "0"]) + result = a.union(b, sort=False) + tm.assert_index_equal(result, expected) + + +def test_union_same_value_duplicated_in_both(): + # GH#36289 + a = Index([0, 0, 1]) + b = Index([0, 0, 1, 2]) + result = a.union(b) + expected = Index([0, 0, 1, 2]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dup", [1, np.nan]) +def test_union_nan_in_both(dup): + # GH#36289 + a = Index([np.nan, 1, 2, 2]) + b = Index([np.nan, dup, 1, 2]) + result = a.union(b, sort=False) + expected = Index([np.nan, dup, 1.0, 2.0, 2.0]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 2]) + b = cls([0, 0, 1]) + expected = cls([0, 0, 1, 2]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = b.union(a) + tm.assert_index_equal(result, expected) + + +def test_union_int_categorical_with_nan(): + ci = CategoricalIndex([1, 2, np.nan]) + assert ci.categories.dtype.kind == "i" + + idx = Index([1, 2]) + + result = idx.union(ci) + expected = Index([1, 2, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = ci.union(idx) + tm.assert_index_equal(result, expected) + + +class TestSetOpsUnsorted: + # These may eventually belong in a dtype-specific test_setops, or + # parametrized over a more general fixture + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + index1 = Index(dt_dates, dtype=object) + index2 = Index(["aa"], dtype=object) + result = index2.intersection(index1) + + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_intersection(self, index, sort): + first = index[:20] + second = index[:10] + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name + (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names + (Index([3, 4, 5, 6, 7]), False), + ], + ) + def test_intersection_name_preservation(self, index2, keeps_name, sort): + index1 = Index([1, 2, 3, 4, 5], name="index") + expected = Index([3, 4, 5]) + result = index1.intersection(index2, sort) + + if keeps_name: + expected.name = "index" + + assert result.name == expected.name + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize( + "first_name,second_name,expected_name", + [("A", "A", "A"), ("A", "B", None), (None, "B", None)], + ) + def test_intersection_name_preservation2( + self, index, first_name, second_name, expected_name, sort + ): + first = index[5:20] + second = index[:10] + first.name = first_name + second.name = second_name + intersect = first.intersection(second, sort=sort) + assert intersect.name == expected_name + + def test_chained_union(self, sort): + # Chained unions handles names correctly + i1 = Index([1, 2], name="i1") + i2 = Index([5, 6], name="i2") + i3 = Index([3, 4], name="i3") + union = i1.union(i2.union(i3, sort=sort), sort=sort) + expected = i1.union(i2, sort=sort).union(i3, sort=sort) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name="j1") + j2 = Index([], name="j2") + j3 = Index([], name="j3") + union = j1.union(j2.union(j3, sort=sort), sort=sort) + expected = j1.union(j2, sort=sort).union(j3, sort=sort) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union(self, index, sort): + first = index[5:20] + second = index[:10] + everything = index[:20] + + union = first.union(second, sort=sort) + if sort is None: + tm.assert_index_equal(union, everything.sort_values()) + assert tm.equalContents(union, everything) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_from_iterables(self, index, klass, sort): + # GH#10149 + first = index[5:20] + second = index[:10] + everything = index[:20] + + case = klass(second.values) + result = first.union(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, everything.sort_values()) + assert tm.equalContents(result, everything) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_identity(self, index, sort): + first = index[5:20] + + union = first.union(first, sort=sort) + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) + + # This should no longer be the same object, since [] is not consistent, + # both objects will be recast to dtype('O') + union = first.union([], sort=sort) + assert (union is first) is (not sort) + + union = Index([]).union(first, sort=sort) + assert (union is first) is (not sort) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) + def test_difference_name_preservation(self, index, second_name, expected, sort): + first = index[5:20] + second = index[:10] + answer = index[10:20] + + first.name = "name" + second.name = second_name + result = first.difference(second, sort=sort) + + assert tm.equalContents(result, answer) + + if expected is None: + assert result.name is None + else: + assert result.name == expected + + def test_difference_empty_arg(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference([], sort) + + tm.assert_index_equal(result, first) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_identity(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference(first, sort) + + assert len(result) == 0 + assert result.name == first.name + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_sort(self, index, sort): + first = index[5:20] + second = index[:10] + + result = first.difference(second, sort) + expected = index[10:20] + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable(self, opname): + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b) + + with tm.assert_produces_warning(RuntimeWarning): + # sort=None, the default + result = op(a) + expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")]) + if opname == "difference": + expected = expected[:2] + tm.assert_index_equal(result, expected) + + # sort=False + op = operator.methodcaller(opname, b, sort=False) + result = op(a) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable_true(self, opname): + # TODO(GH#25151): decide on True behaviour + # # sort=True, raises + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match="Cannot compare"): + op(a) + + def test_symmetric_difference_mi(self, sort): + index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) + index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) + result = index1.symmetric_difference(index2, sort=sort) + expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + @pytest.mark.parametrize( + "index2,expected", + [ + (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), + (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), + ], + ) + def test_symmetric_difference_missing(self, index2, expected, sort): + # GH#13514 change: {nan} - {nan} == {} + # (GH#6444, sorting of nans, is no longer an issue) + index1 = Index([1, np.nan, 2, 3]) + + result = index1.symmetric_difference(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference_non_index(self, sort): + index1 = Index([1, 2, 3, 4], name="index1") + index2 = np.array([2, 3, 4, 5]) + expected = Index([1, 5]) + result = index1.symmetric_difference(index2, sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "index1" + + result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "new_name" diff --git a/pandas/tests/indexes/test_subclass.py b/pandas/tests/indexes/test_subclass.py new file mode 100644 index 00000000..2ddf3baa --- /dev/null +++ b/pandas/tests/indexes/test_subclass.py @@ -0,0 +1,38 @@ +""" +Tests involving custom Index subclasses +""" +import numpy as np + +from pandas import ( + DataFrame, + Index, +) +import pandas._testing as tm + + +class CustomIndex(Index): + def __new__(cls, data, name=None): + # assert that this index class cannot hold strings + if any(isinstance(val, str) for val in data): + raise TypeError("CustomIndex cannot hold strings") + + if name is None and hasattr(data, "name"): + name = data.name + data = np.array(data, dtype="O") + + return cls._simple_new(data, name) + + +def test_insert_fallback_to_base_index(): + # https://github.com/pandas-dev/pandas/issues/47071 + + idx = CustomIndex([1, 2, 3]) + result = idx.insert(0, "string") + expected = Index(["string", 1, 2, 3], dtype=object) + tm.assert_index_equal(result, expected) + + df = DataFrame( + np.random.randn(2, 3), columns=idx, index=Index([1, 2], name="string") + ) + result = df.reset_index() + tm.assert_index_equal(result.columns, expected) diff --git a/pandas/tests/indexes/timedeltas/__init__.py b/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/timedeltas/methods/__init__.py b/pandas/tests/indexes/timedeltas/methods/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py new file mode 100644 index 00000000..aa2f7b7a --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -0,0 +1,131 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, +) + + +class TestTimedeltaIndex: + def test_astype_object(self): + idx = timedelta_range(start="1 days", periods=4, freq="D", name="idx") + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + Timedelta("3 days"), + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = TimedeltaIndex( + [timedelta(days=1), timedelta(days=2), NaT, timedelta(days=4)], name="idx" + ) + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + NaT, + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN], name="idx") + + result = idx.astype(object) + expected = Index( + [Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index([str(x) for x in idx], name="idx") + tm.assert_index_equal(result, expected) + + rng = timedelta_range("1 days", periods=10) + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_uint(self): + arr = timedelta_range("1H", periods=2) + expected = UInt64Index( + np.array([3600000000000, 90000000000000], dtype="uint64") + ) + tm.assert_index_equal(arr.astype("uint64"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + + result = idx.astype("timedelta64") + expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64") + tm.assert_index_equal(result, expected) + + result = idx.astype("timedelta64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("timedelta64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + @pytest.mark.parametrize("dtype", [float, "datetime64", "datetime64[ns]"]) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + msg = "Cannot cast TimedeltaIndex to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_category(self): + obj = timedelta_range("1H", periods=2, freq="H") + + result = obj.astype("category") + expected = pd.CategoricalIndex([Timedelta("1H"), Timedelta("2H")]) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = timedelta_range("1H", periods=2) + result = obj.astype(bool) + expected = Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/methods/test_factorize.py b/pandas/tests/indexes/timedeltas/methods/test_factorize.py new file mode 100644 index 00000000..24ab3888 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_factorize.py @@ -0,0 +1,40 @@ +import numpy as np + +from pandas import ( + TimedeltaIndex, + factorize, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexFactorize: + def test_factorize(self): + idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"]) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = timedelta_range("1 day", periods=4, freq="s") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq diff --git a/pandas/tests/indexes/timedeltas/methods/test_fillna.py b/pandas/tests/indexes/timedeltas/methods/test_fillna.py new file mode 100644 index 00000000..40aa95d0 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_fillna.py @@ -0,0 +1,22 @@ +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, +) +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_timedelta(self): + # GH#11343 + idx = TimedeltaIndex(["1 day", NaT, "3 day"]) + + exp = TimedeltaIndex(["1 day", "2 day", "3 day"]) + tm.assert_index_equal(idx.fillna(Timedelta("2 day")), exp) + + exp = TimedeltaIndex(["1 day", "3 hour", "3 day"]) + idx.fillna(Timedelta("3 hour")) + + exp = Index([Timedelta("1 day"), "x", Timedelta("3 day")], dtype=object) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/pandas/tests/indexes/timedeltas/methods/test_insert.py b/pandas/tests/indexes/timedeltas/methods/test_insert.py new file mode 100644 index 00000000..c2f22da9 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_insert.py @@ -0,0 +1,146 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas._libs import lib + +import pandas as pd +from pandas import ( + Index, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexInsert: + def test_insert(self): + + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")], + name="idx", + ) + assert not isinstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") + + # preserve freq + expected_0 = TimedeltaIndex( + ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq="s", + ) + expected_3 = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"], + name="idx", + freq="s", + ) + + # reset freq to None + expected_1_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq=None, + ) + expected_3_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"], + name="idx", + freq=None, + ) + + cases = [ + (0, Timedelta("1day"), expected_0), + (-3, Timedelta("1day"), expected_0), + (3, Timedelta("1day 00:00:04"), expected_3), + (1, Timedelta("1day 00:00:01"), expected_1_nofreq), + (3, Timedelta("1day 00:00:05"), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA] + ) + def test_insert_nat(self, null): + # GH 18295 (test missing) + idx = timedelta_range("1day", "3day") + result = idx.insert(1, null) + expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) + tm.assert_index_equal(result, expected) + + def test_insert_invalid_na(self): + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + item = np.datetime64("NaT") + result = idx.insert(0, item) + + expected = Index([item] + list(idx), dtype=object, name="idx") + tm.assert_index_equal(result, expected) + + # Also works if we pass a different dt64nat object + item2 = np.datetime64("NaT") + result = idx.insert(0, item2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "item", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")] + ) + def test_insert_mismatched_types_raises(self, item): + # GH#33703 dont cast these to td64 + tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = tdi.insert(1, item) + + expected = Index( + [tdi[0], lib.item_from_zerodim(item)] + list(tdi[1:]), + dtype=object, + name="idx", + ) + tm.assert_index_equal(result, expected) + + def test_insert_castable_str(self): + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "1 Day") + + expected = TimedeltaIndex([idx[0]] + list(idx)) + tm.assert_index_equal(result, expected) + + def test_insert_non_castable_str(self): + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "foo") + + expected = Index(["foo"] + list(idx), dtype=object) + tm.assert_index_equal(result, expected) + + def test_insert_empty(self): + # Corner case inserting with length zero doesn't raise IndexError + # GH#33573 for freq preservation + idx = timedelta_range("1 Day", periods=3) + td = idx[0] + + result = idx[:0].insert(0, td) + assert result.freq == "D" + + with pytest.raises(IndexError, match="loc must be an integer between"): + result = idx[:0].insert(1, td) + + with pytest.raises(IndexError, match="loc must be an integer between"): + result = idx[:0].insert(-1, td) diff --git a/pandas/tests/indexes/timedeltas/methods/test_repeat.py b/pandas/tests/indexes/timedeltas/methods/test_repeat.py new file mode 100644 index 00000000..2a9b58d1 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_repeat.py @@ -0,0 +1,34 @@ +import numpy as np + +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat(self): + index = timedelta_range("1 days", periods=2, freq="D") + exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = TimedeltaIndex(["1 days", "NaT", "3 days"]) + exp = TimedeltaIndex( + [ + "1 days", + "1 days", + "1 days", + "NaT", + "NaT", + "NaT", + "3 days", + "3 days", + "3 days", + ] + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None diff --git a/pandas/tests/indexes/timedeltas/methods/test_shift.py b/pandas/tests/indexes/timedeltas/methods/test_shift.py new file mode 100644 index 00000000..9864f735 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_shift.py @@ -0,0 +1,77 @@ +import pytest + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import TimedeltaIndex +import pandas._testing as tm + + +class TestTimedeltaIndexShift: + + # ------------------------------------------------------------- + # TimedeltaIndex.shift is used by __add__/__sub__ + + def test_tdi_shift_empty(self): + # GH#9903 + idx = TimedeltaIndex([], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + def test_tdi_shift_hours(self): + # GH#9903 + idx = TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = TimedeltaIndex(["8 hours", "9 hours", "12 hours"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = TimedeltaIndex(["2 hours", "3 hours", "6 hours"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_tdi_shift_minutes(self): + # GH#9903 + idx = TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="T"), idx) + exp = TimedeltaIndex(["05:03:00", "06:03:00", "9:03:00"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="T"), exp) + exp = TimedeltaIndex(["04:57:00", "05:57:00", "8:57:00"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="T"), exp) + + def test_tdi_shift_int(self): + # GH#8083 + tdi = pd.to_timedelta(range(5), unit="d") + trange = tdi._with_freq("infer") + pd.offsets.Hour(1) + result = trange.shift(1) + expected = TimedeltaIndex( + [ + "1 days 01:00:00", + "2 days 01:00:00", + "3 days 01:00:00", + "4 days 01:00:00", + "5 days 01:00:00", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_tdi_shift_nonstandard_freq(self): + # GH#8083 + tdi = pd.to_timedelta(range(5), unit="d") + trange = tdi._with_freq("infer") + pd.offsets.Hour(1) + result = trange.shift(3, freq="2D 1s") + expected = TimedeltaIndex( + [ + "6 days 01:00:03", + "7 days 01:00:03", + "8 days 01:00:03", + "9 days 01:00:03", + "10 days 01:00:03", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(["1 days 01:00:00", "2 days 01:00:00"], freq=None) + with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): + tdi.shift(2) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py new file mode 100644 index 00000000..2a5b1be7 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -0,0 +1,279 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Timedelta, + TimedeltaIndex, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm +from pandas.core.arrays.timedeltas import ( + TimedeltaArray, + sequence_to_td64ns, +) + + +class TestTimedeltaIndex: + def test_array_of_dt64_nat_raises(self): + # GH#39462 + nat = np.datetime64("NaT", "ns") + arr = np.array([nat], dtype=object) + + # TODO: should be TypeError? + msg = "Invalid type for timedelta scalar" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(arr) + + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence(arr) + + with pytest.raises(ValueError, match=msg): + sequence_to_td64ns(arr) + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_unit_m_y_raises(self, unit): + msg = "Units 'M', 'Y', and 'y' are no longer supported" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex([1, 3, 7], unit) + + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data._data.base is arr + + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = timedelta_range("1 second", periods=10**7, freq="1s") + + result = TimedeltaIndex(tdi, freq="infer") + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = timedelta_range("1 second", periods=100, freq="1s") + + msg = ( + "Inferred frequency .* from passed values does " + "not conform to passed frequency" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi, freq="D") + + with pytest.raises(ValueError, match=msg): + # GH#23789 + TimedeltaArray(tdi, freq="D") + + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi._data, freq="D") + + with pytest.raises(ValueError, match=msg): + TimedeltaArray(tdi._data, freq="D") + + def test_dt64_data_invalid(self): + # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # raise as of GH#29794 + dti = pd.date_range("2016-01-01", periods=3) + + msg = "cannot be converted to timedelta64" + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti.tz_localize("Europe/Brussels")) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # integral floats are non-lossy + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit="D") + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + + def test_construction_base_constructor(self): + arr = [Timedelta("1 days"), pd.NaT, Timedelta("3 days")] + tm.assert_index_equal(pd.Index(arr), TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, Timedelta("1 days")] + tm.assert_index_equal(pd.Index(arr), TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex( + [ + "1 days", + "1 days 00:00:05", + "2 days", + "2 days 00:00:02", + "0 days 00:00:03", + ] + ) + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"] + ) + tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"] + ) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"] + ) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected) + + def test_constructor_iso(self): + # GH #21877 + expected = timedelta_range("1s", periods=9, freq="s") + durations = [f"P0DT0H0M{i}S" for i in range(1, 10)] + result = to_timedelta(durations) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + rng = timedelta_range("1 days", periods=10.5) + exp = timedelta_range("1 days", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + timedelta_range(start="1 days", periods="foo", freq="D") + + msg = ( + r"TimedeltaIndex\(\.\.\.\) must be called with a collection of some kind, " + "'1 days' was passed" + ) + with pytest.raises(TypeError, match=msg): + TimedeltaIndex("1 days") + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["1 days", "2 days", "3 days"]) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit="d") + tm.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming freq + msg = ( + "Inferred frequency None from passed values does not conform to " + "passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=10, freq="D") + + def test_constructor_name(self): + idx = timedelta_range(start="1 days", periods=1, freq="D", name="TEST") + assert idx.name == "TEST" + + # GH10025 + idx2 = TimedeltaIndex(idx, name="something else") + assert idx2.name == "something else" + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["2000"], dtype="timedelta64") + + with pytest.raises(ValueError, match=msg): + pd.Index(["2000"], dtype="timedelta64") + + def test_constructor_wrong_precision_raises(self): + msg = r"dtype timedelta64\[us\] cannot be converted to timedelta64\[ns\]" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["2000"], dtype="timedelta64[us]") + + def test_explicit_none_freq(self): + # Explicitly passing freq=None is respected + tdi = timedelta_range(1, periods=5) + assert tdi.freq is not None + + result = TimedeltaIndex(tdi, freq=None) + assert result.freq is None + + result = TimedeltaIndex(tdi._data, freq=None) + assert result.freq is None + + tda = TimedeltaArray(tdi, freq=None) + assert tda.freq is None + + def test_from_categorical(self): + tdi = timedelta_range(1, periods=5) + + cat = pd.Categorical(tdi) + + result = TimedeltaIndex(cat) + tm.assert_index_equal(result, tdi) + + ci = pd.CategoricalIndex(tdi) + result = TimedeltaIndex(ci) + tm.assert_index_equal(result, tdi) diff --git a/pandas/tests/indexes/timedeltas/test_delete.py b/pandas/tests/indexes/timedeltas/test_delete.py new file mode 100644 index 00000000..6e6f5470 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_delete.py @@ -0,0 +1,71 @@ +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexDelete: + def test_delete(self): + idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") + + # preserve freq + expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") + expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") + + # reset freq to None + expected_1 = TimedeltaIndex( + ["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx" + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with tm.external_error_raised((IndexError, ValueError)): + # either depending on numpy version + idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") + expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = TimedeltaIndex( + ["1 d", "2 d", "3 d", "7 d", "8 d", "9 d", "10d"], freq=None, name="idx" + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + def test_delete_doesnt_infer_freq(self): + # GH#30655 behavior matches DatetimeIndex + + tdi = TimedeltaIndex(["1 Day", "2 Days", None, "3 Days", "4 Days"]) + result = tdi.delete(2) + assert result.freq is None diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py new file mode 100644 index 00000000..751f9e4c --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -0,0 +1,93 @@ +import pytest + +import pandas as pd +from pandas import ( + Series, + TimedeltaIndex, +) + + +class TestTimedeltaIndexRendering: + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_representation(self, method): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')" + + exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')" + + exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')" + + exp4 = ( + "TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')" + ) + + exp5 = ( + "TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = "0 1 days\ndtype: timedelta64[ns]" + + exp3 = "0 1 days\n1 2 days\ndtype: timedelta64[ns]" + + exp4 = "0 1 days\n1 2 days\n2 3 days\ndtype: timedelta64[ns]" + + exp5 = ( + "0 1 days 00:00:01\n" + "1 2 days 00:00:00\n" + "2 3 days 00:00:00\n" + "dtype: timedelta64[ns]" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = repr(Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex: 0 entries\nFreq: D" + + exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D" + + exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D" + + exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D" + + exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = idx._summary() + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_freq_attr.py b/pandas/tests/indexes/timedeltas/test_freq_attr.py new file mode 100644 index 00000000..39b9c11a --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -0,0 +1,61 @@ +import pytest + +from pandas import TimedeltaIndex + +from pandas.tseries.offsets import ( + DateOffset, + Day, + Hour, +) + + +class TestFreq: + @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) + def test_freq_setter(self, values, freq): + # GH#20678 + idx = TimedeltaIndex(values) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, DateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_setter_errors(self): + # GH#20678 + idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with a non-fixed frequency + msg = r"<2 \* BusinessDays> is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + idx._data.freq = "2B" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" + + def test_freq_view_safe(self): + # Setting the freq for one TimedeltaIndex shouldn't alter the freq + # for another that views the same data + + tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") + tda = tdi._data + + tdi2 = TimedeltaIndex(tda)._with_freq(None) + assert tdi2.freq is None + + # Original was not altered + assert tdi.freq == "2D" + assert tda.freq == "2D" diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 00000000..154a6289 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,360 @@ +from datetime import ( + datetime, + timedelta, +) +import re + +import numpy as np +import pytest + +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, + Timestamp, + notna, + offsets, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm + + +class TestGetItem: + def test_getitem_slice_keeps_name(self): + # GH#4226 + tdi = timedelta_range("1d", "5d", freq="H", name="timebucket") + assert tdi[1:].name == tdi.name + + def test_getitem(self): + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx[0] + assert result == Timedelta("1 day") + + result = idx[0:5] + expected = timedelta_range("1 day", "5 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = timedelta_range("1 day", "9 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = timedelta_range("12 day", "24 day", freq="3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = TimedeltaIndex( + ["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "key", + [ + Timestamp("1970-01-01"), + Timestamp("1970-01-02"), + datetime(1970, 1, 1), + Timestamp("1970-01-03").to_datetime64(), + # non-matching NA values + np.datetime64("NaT"), + ], + ) + def test_timestamp_invalid_key(self, key): + # GH#20464 + tdi = timedelta_range(0, periods=10) + with pytest.raises(KeyError, match=re.escape(repr(key))): + tdi.get_loc(key) + + +class TestGetLoc: + @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") + def test_get_loc(self): + idx = to_timedelta(["0 days", "1 days", "2 days"]) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc(idx[1], method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="tolerance size must match"): + idx.get_loc( + idx[1], + method="nearest", + tolerance=[ + Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64(), + ], + ) + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc("1 day 1 hour", method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc("0 days") == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) + + assert tidx.get_loc(NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float("nan")) == 1 + assert tidx.get_loc(np.nan) == 1 + + +class TestGetIndexer: + def test_get_indexer(self): + idx = to_timedelta(["0 days", "1 days", "2 days"]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + +class TestWhere: + def test_where_doesnt_retain_freq(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = TimedeltaIndex([tdi[0], tdi[1], tdi[0]], freq=None, name="idx") + + result = tdi.where(cond, tdi[::-1]) + tm.assert_index_equal(result, expected) + + def test_where_invalid_dtypes(self, fixed_now_ts): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + + tail = tdi[2:].tolist() + i2 = Index([NaT, NaT] + tail) + mask = notna(i2) + + expected = Index([NaT.value, NaT.value] + tail, dtype=object, name="idx") + assert isinstance(expected[0], int) + result = tdi.where(mask, i2.asi8) + tm.assert_index_equal(result, expected) + + ts = i2 + fixed_now_ts + expected = Index([ts[0], ts[1]] + tail, dtype=object, name="idx") + result = tdi.where(mask, ts) + tm.assert_index_equal(result, expected) + + per = (i2 + fixed_now_ts).to_period("D") + expected = Index([per[0], per[1]] + tail, dtype=object, name="idx") + result = tdi.where(mask, per) + tm.assert_index_equal(result, expected) + + ts = fixed_now_ts + expected = Index([ts, ts] + tail, dtype=object, name="idx") + result = tdi.where(mask, ts) + tm.assert_index_equal(result, expected) + + def test_where_mismatched_nat(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + cond = np.array([True, False, False]) + + dtnat = np.datetime64("NaT", "ns") + expected = Index([tdi[0], dtnat, dtnat], dtype=object, name="idx") + assert expected[2] is dtnat + result = tdi.where(cond, dtnat) + tm.assert_index_equal(result, expected) + + +class TestTake: + def test_take(self): + # GH 10295 + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx.take([0]) + assert result == Timedelta("1 day") + + result = idx.take([-1]) + assert result == Timedelta("31 day") + + result = idx.take([0, 1, 2]) + expected = timedelta_range("1 day", "3 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = timedelta_range("1 day", "5 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = timedelta_range("1 day", "31 day", freq="D", name="idx") + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + def test_take_equiv_getitem(self): + tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"] + idx = timedelta_range(start="1d", end="2d", freq="H", name="idx") + expected = TimedeltaIndex(tds, freq=None, name="idx") + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, TimedeltaIndex) + assert taken.freq is None + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH 12631 + idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestMaybeCastSliceBound: + @pytest.fixture(params=["increasing", "decreasing", None]) + def monotonic(self, request): + return request.param + + @pytest.fixture + def tdi(self, monotonic): + tdi = timedelta_range("1 Day", periods=10) + if monotonic == "decreasing": + tdi = tdi[::-1] + elif monotonic is None: + taker = np.arange(10, dtype=np.intp) + np.random.shuffle(taker) + tdi = tdi.take(taker) + return tdi + + def test_maybe_cast_slice_bound_invalid_str(self, tdi): + # test the low-level _maybe_cast_slice_bound and that we get the + # expected exception+message all the way up the stack + msg = ( + "cannot do slice indexing on TimedeltaIndex with these " + r"indexers \[foo\] of type str" + ) + with pytest.raises(TypeError, match=msg): + tdi._maybe_cast_slice_bound("foo", side="left") + with pytest.raises(TypeError, match=msg): + tdi.get_slice_bound("foo", side="left") + with pytest.raises(TypeError, match=msg): + tdi.slice_locs("foo", None, None) + + def test_slice_invalid_str_with_timedeltaindex( + self, tdi, frame_or_series, indexer_sl + ): + obj = frame_or_series(range(10), index=tdi) + + msg = ( + "cannot do slice indexing on TimedeltaIndex with these " + r"indexers \[foo\] of type str" + ) + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)["foo":] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)["foo":-1] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)[:"foo"] + with pytest.raises(TypeError, match=msg): + indexer_sl(obj)[tdi[0] : "foo"] + + +class TestContains: + def test_contains_nonunique(self): + # GH#9512 + for vals in ( + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["00:01:00", "00:01:00", "00:02:00"], + ["00:01:00", "00:01:00", "00:00:01"], + ): + idx = TimedeltaIndex(vals) + assert idx[0] in idx + + def test_contains(self): + # Checking for any NaT-like objects + # GH#13603 + td = to_timedelta(range(5), unit="d") + offsets.Hour(1) + for v in [NaT, None, float("nan"), np.nan]: + assert not (v in td) + + td = to_timedelta([NaT]) + for v in [NaT, None, float("nan"), np.nan]: + assert v in td diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py new file mode 100644 index 00000000..2d8795b4 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import ( + Index, + Timedelta, + timedelta_range, +) +import pandas._testing as tm + + +class TestJoin: + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) + + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 00000000..f6013baf --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,14 @@ +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndexOps: + def test_infer_freq(self, freq_sample): + # GH#11018 + idx = timedelta_range("1", freq=freq_sample, periods=10) + result = TimedeltaIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq_sample diff --git a/pandas/tests/indexes/timedeltas/test_pickle.py b/pandas/tests/indexes/timedeltas/test_pickle.py new file mode 100644 index 00000000..befe7097 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import timedelta_range +import pandas._testing as tm + + +class TestPickle: + def test_pickle_after_set_freq(self): + tdi = timedelta_range("1 day", periods=4, freq="s") + tdi = tdi._with_freq(None) + + res = tm.round_trip_pickle(tdi) + tm.assert_index_equal(res, tdi) diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py new file mode 100644 index 00000000..5e4b228b --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -0,0 +1,142 @@ +""" +Tests for TimedeltaIndex methods behaving like their Timedelta counterparts +""" + +import numpy as np +import pytest + +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG + +from pandas import ( + Index, + Series, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestVectorizedTimedelta: + def test_tdi_total_seconds(self): + # GH#10939 + # test index + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + expt = [ + 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456.0 / 1e9, + ] + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) + + # test Series + ser = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with nat + ser[1] = np.nan + s_expt = Series( + [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, np.nan], + index=[0, 1], + ) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with both nat + ser = Series([np.nan, np.nan], dtype="timedelta64[ns]") + tm.assert_series_equal( + ser.dt.total_seconds(), Series([np.nan, np.nan], index=[0, 1]) + ) + + def test_tdi_round(self): + td = timedelta_range(start="16801 days", periods=5, freq="30Min") + elt = td[1] + + expected_rng = TimedeltaIndex( + [ + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 01:00:00"), + Timedelta("16801 days 02:00:00"), + Timedelta("16801 days 02:00:00"), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + td.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + td.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") + + @pytest.mark.parametrize( + "freq,msg", + [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ], + ) + def test_tdi_round_invalid(self, freq, msg): + t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") + + with pytest.raises(ValueError, match=msg): + t1.round(freq) + with pytest.raises(ValueError, match=msg): + # Same test for TimedeltaArray + t1._data.round(freq) + + # TODO: de-duplicate with test_tdi_round + def test_round(self): + t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") + t2 = -1 * t1 + t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s") + t1c = TimedeltaIndex([1, 1, 1], unit="D") + + # note that negative times round DOWN! so don't give whole numbers + for (freq, s1, s2) in [ + ("N", t1, t2), + ("U", t1, t2), + ( + "L", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"] + ), + ), + ( + "S", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"] + ), + ), + ("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), + ("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])), + ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")), + ]: + + r1 = t1.round(freq) + tm.assert_index_equal(r1, s1) + r2 = t2.round(freq) + tm.assert_index_equal(r2, s2) + + def test_components(self): + rng = timedelta_range("1 days, 10:11:12", periods=2, freq="s") + rng.components + + # with nat + s = Series(rng) + s[1] = np.nan + + result = s.dt.components + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() diff --git a/pandas/tests/indexes/timedeltas/test_searchsorted.py b/pandas/tests/indexes/timedeltas/test_searchsorted.py new file mode 100644 index 00000000..710571ef --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_searchsorted.py @@ -0,0 +1,28 @@ +import numpy as np +import pytest + +from pandas import ( + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm + + +class TestSearchSorted: + def test_searchsorted_different_argument_classes(self, listlike_box): + idx = TimedeltaIndex(["1 day", "2 days", "3 days"]) + result = idx.searchsorted(listlike_box(idx)) + expected = np.arange(len(idx), dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = idx._data.searchsorted(listlike_box(idx)) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] + ) + def test_searchsorted_invalid_argument_dtype(self, arg): + idx = TimedeltaIndex(["1 day", "2 days", "3 days"]) + msg = "value should be a 'Timedelta', 'NaT', or array of those. Got" + with pytest.raises(TypeError, match=msg): + idx.searchsorted(arg) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 00000000..4574c153 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,260 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Int64Index + +from pandas.tseries.offsets import Hour + + +class TestTimedeltaIndex: + def test_union(self): + + i1 = timedelta_range("1day", periods=5) + i2 = timedelta_range("3day", periods=5) + result = i1.union(i2) + expected = timedelta_range("1day", periods=7) + tm.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = timedelta_range(start="1 day", periods=10, freq="D") + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_sort_false(self): + tdi = timedelta_range("1day", periods=5) + + left = tdi[3:] + right = tdi[:3] + + # Check that we are testing the desired code path + assert left._can_fast_union(right) + + result = left.union(right) + tm.assert_index_equal(result, tdi) + + result = left.union(right, sort=False) + expected = TimedeltaIndex(["4 Days", "5 Days", "1 Days", "2 Day", "3 Days"]) + tm.assert_index_equal(result, expected) + + def test_union_coverage(self): + + idx = TimedeltaIndex(["3d", "1d", "2d"]) + ordered = TimedeltaIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + def test_union_bug_1730(self): + + rng_a = timedelta_range("1 day", periods=4, freq="3H") + rng_b = timedelta_range("1 day", periods=4, freq="4H") + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(rng_a) | set(rng_b))) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(["1 day 15:19:49.695000"]) + right = TimedeltaIndex( + ["2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000"] + ) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_freq_infer(self): + # When taking the union of two TimedeltaIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # DatetimeIndex behavior. + tdi = timedelta_range("1 Day", periods=5) + left = tdi[[0, 1, 3, 4]] + right = tdi[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, tdi) + assert result.freq == "D" + + def test_intersection_bug_1708(self): + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(5) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + assert len(result) == 0 + + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(1) + + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 + expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_intersection_equal(self, sort): + # GH 24471 Test intersection outcome given the sort keyword + # for equal indices intersection should return the original index + first = timedelta_range("1 day", periods=4, freq="h") + second = timedelta_range("1 day", periods=4, freq="h") + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) + def test_intersection_zero_length(self, period_1, period_2, sort): + # GH 24471 test for non overlap the intersection should be zero length + index_1 = timedelta_range("1 day", periods=period_1, freq="h") + index_2 = timedelta_range("1 day", periods=period_2, freq="h") + expected = timedelta_range("1 day", periods=0, freq="h") + result = index_1.intersection(index_2, sort=sort) + tm.assert_index_equal(result, expected) + + def test_zero_length_input_index(self, sort): + # GH 24966 test for 0-len intersections are copied + index_1 = timedelta_range("1 day", periods=0, freq="h") + index_2 = timedelta_range("1 day", periods=3, freq="h") + result = index_1.intersection(index_2, sort=sort) + assert index_1 is not result + assert index_2 is not result + tm.assert_copy(result, index_1) + + @pytest.mark.parametrize( + "rng, expected", + # if target has the same name, it is preserved + [ + ( + timedelta_range("1 day", periods=5, freq="h", name="idx"), + timedelta_range("1 day", periods=4, freq="h", name="idx"), + ), + # if target name is different, it will be reset + ( + timedelta_range("1 day", periods=5, freq="h", name="other"), + timedelta_range("1 day", periods=4, freq="h", name=None), + ), + # if no overlap exists return empty index + ( + timedelta_range("1 day", periods=10, freq="h", name="idx")[5:], + TimedeltaIndex([], freq="h", name="idx"), + ), + ], + ) + def test_intersection(self, rng, expected, sort): + # GH 4690 (with tz) + base = timedelta_range("1 day", periods=4, freq="h", name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "rng, expected", + # part intersection works + [ + ( + TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"], name="idx"), + TimedeltaIndex(["2 hour", "4 hour"], name="idx"), + ), + # reordered part intersection + ( + TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"], name="other"), + TimedeltaIndex(["1 hour", "2 hour"], name=None), + ), + # reversed index + ( + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")[ + ::-1 + ], + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx"), + ), + ], + ) + def test_intersection_non_monotonic(self, rng, expected, sort): + # 24471 non-monotonic + base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + # if reversed order, frequency is still the same + if all(base == rng[::-1]) and sort is None: + assert isinstance(result.freq, Hour) + else: + assert result.freq is None + + +class TestTimedeltaIndexDifference: + def test_difference_freq(self, sort): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + def test_difference_sort(self, sort): + + index = TimedeltaIndex( + ["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"] + ) + + other = timedelta_range("1 days", "4 days", freq="D") + idx_diff = index.difference(other, sort) + + expected = TimedeltaIndex(["5 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["1 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 00000000..6904a847 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,145 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + NaT, + Series, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.indexes.api import Int64Index +from pandas.tests.indexes.datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike): + _index_cls = TimedeltaIndex + + @pytest.fixture + def simple_index(self) -> TimedeltaIndex: + index = pd.to_timedelta(range(5), unit="d")._with_freq("infer") + assert index.freq == "D" + ret = index + pd.offsets.Hour(1) + assert ret.freq == "D" + return ret + + @pytest.fixture + def index(self): + return tm.makeTimedeltaIndex(10) + + def test_numeric_compat(self): + # Dummy method to override super's version; this test is now done + # in test_arithmetic.py + pass + + def test_shift(self): + pass # this is handled in test_arithmetic.py + + def test_misc_coverage(self): + + rng = timedelta_range("1 day", periods=5) + result = rng.groupby(rng.days) + assert isinstance(list(result.values())[0][0], Timedelta) + + def test_map(self): + # test_map_dictlike generally tests + + rng = timedelta_range("1 day", periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range("1 days", "10 days") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_fields(self): + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal( + rng.seconds, + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + ) + tm.assert_index_equal( + rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + ) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + + msg = "'TimedeltaIndex' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal( + s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]) + ) + + # preserve name (GH15589) + rng.name = "name" + assert rng.days.name == "name" + + def test_freq_conversion_always_floating(self): + # even if we have no NaTs, we get back float64; this matches TDA and Series + tdi = timedelta_range("1 Day", periods=30) + + res = tdi.astype("m8[s]") + expected = Index((tdi.view("i8") / 10**9).astype(np.float64)) + tm.assert_index_equal(res, expected) + + # check this matches Series and TimedeltaArray + res = tdi._data.astype("m8[s]") + tm.assert_numpy_array_equal(res, expected._values) + + res = tdi.to_series().astype("m8[s]") + tm.assert_numpy_array_equal(res._values, expected._values) + + def test_freq_conversion(self, index_or_series): + + # doc example + + scalar = Timedelta(days=31) + td = index_or_series( + [scalar, scalar, scalar + timedelta(minutes=5, seconds=3), NaT], + dtype="m8[ns]", + ) + + result = td / np.timedelta64(1, "D") + expected = index_or_series( + [31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan] + ) + tm.assert_equal(result, expected) + + result = td.astype("timedelta64[D]") + expected = index_or_series([31, 31, 31, np.nan]) + tm.assert_equal(result, expected) + + result = td / np.timedelta64(1, "s") + expected = index_or_series( + [31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan] + ) + tm.assert_equal(result, expected) + + result = td.astype("timedelta64[s]") + tm.assert_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 00000000..7277595f --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas import ( + Timedelta, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm + +from pandas.tseries.offsets import ( + Day, + Second, +) + + +class TestTimedeltas: + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit="D") + result = timedelta_range("0 days", periods=5, freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit="D") + result = timedelta_range("0 days", "10 days", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) + result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit="T") * 30 + result = timedelta_range("0 days", freq="30T", periods=50) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")] + ) + def test_linspace_behavior(self, periods, freq): + # GH 20976 + result = timedelta_range(start="0 days", end="4 days", periods=periods) + expected = timedelta_range(start="0 days", end="4 days", freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(end="5 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=2) + + with pytest.raises(ValueError, match=msg): + timedelta_range() + + # too many params + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days", end="5 days", periods=10, freq="H") + + @pytest.mark.parametrize( + "start, end, freq, expected_periods", + [ + ("1D", "10D", "2D", (10 - 1) // 2 + 1), + ("2D", "30D", "3D", (30 - 2) // 3 + 1), + ("2s", "50s", "5s", (50 - 2) // 5 + 1), + # tests that worked before GH 33498: + ("4D", "16D", "3D", (16 - 4) // 3 + 1), + ("8D", "16D", "40s", (16 * 3600 * 24 - 8 * 3600 * 24) // 40 + 1), + ], + ) + def test_timedelta_range_freq_divide_end(self, start, end, freq, expected_periods): + # GH 33498 only the cases where `(end % freq) == 0` used to fail + res = timedelta_range(start=start, end=end, freq=freq) + assert Timedelta(start) == res[0] + assert Timedelta(end) >= res[-1] + assert len(res) == expected_periods + + def test_timedelta_range_infer_freq(self): + # https://github.com/pandas-dev/pandas/issues/35897 + result = timedelta_range("0s", "1s", periods=31) + assert result.freq is None diff --git a/pandas/tests/indexing/__init__.py b/pandas/tests/indexing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py new file mode 100644 index 00000000..ea9f2584 --- /dev/null +++ b/pandas/tests/indexing/common.py @@ -0,0 +1,190 @@ +""" common utilities """ +import itertools + +import numpy as np + +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + UInt64Index, +) + + +def _mklbl(prefix, n): + return [f"{prefix}{i}" for i in range(n)] + + +def _axify(obj, key, axis): + # create a tuple accessor + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) + + +class Base: + """indexing comprehensive base class""" + + _kinds = {"series", "frame"} + _typs = { + "ints", + "uints", + "labels", + "mixed", + "ts", + "floats", + "empty", + "ts_rev", + "multi", + } + + def setup_method(self): + + self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2)) + self.frame_ints = DataFrame( + np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + ) + + self.series_uints = Series( + np.random.rand(4), index=UInt64Index(np.arange(0, 8, 2)) + ) + self.frame_uints = DataFrame( + np.random.randn(4, 4), + index=UInt64Index(range(0, 8, 2)), + columns=UInt64Index(range(0, 12, 3)), + ) + + self.series_floats = Series( + np.random.rand(4), index=Float64Index(range(0, 8, 2)) + ) + self.frame_floats = DataFrame( + np.random.randn(4, 4), + index=Float64Index(range(0, 8, 2)), + columns=Float64Index(range(0, 12, 3)), + ) + + m_idces = [ + MultiIndex.from_product([[1, 2], [3, 4]]), + MultiIndex.from_product([[5, 6], [7, 8]]), + MultiIndex.from_product([[9, 10], [11, 12]]), + ] + + self.series_multi = Series(np.random.rand(4), index=m_idces[0]) + self.frame_multi = DataFrame( + np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1] + ) + + self.series_labels = Series(np.random.randn(4), index=list("abcd")) + self.frame_labels = DataFrame( + np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD") + ) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8]) + self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8]) + + self.series_ts = Series( + np.random.randn(4), index=date_range("20130101", periods=4) + ) + self.frame_ts = DataFrame( + np.random.randn(4, 4), index=date_range("20130101", periods=4) + ) + + dates_rev = date_range("20130101", periods=4).sort_values(ascending=False) + self.series_ts_rev = Series(np.random.randn(4), index=dates_rev) + self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) + + self.frame_empty = DataFrame() + self.series_empty = Series(dtype=object) + + # form agglomerates + for kind in self._kinds: + d = {} + for typ in self._typs: + d[typ] = getattr(self, f"{kind}_{typ}") + + setattr(self, kind, d) + + def generate_indices(self, f, values=False): + """ + generate the indices + if values is True , use the axis values + is False, use the range + """ + axes = f.axes + if values: + axes = (list(range(len(ax))) for ax in axes) + + return itertools.product(*axes) + + def get_value(self, name, f, i, values=False): + """return the value for the location i""" + # check against values + if values: + return f.values[i] + + elif name == "iat": + return f.iloc[i] + else: + assert name == "at" + return f.loc[i] + + def check_values(self, f, func, values=False): + + if f is None: + return + axes = f.axes + indices = itertools.product(*axes) + + for i in indices: + result = getattr(f, func)[i] + + # check against values + if values: + expected = f.values[i] + else: + expected = f + for a in reversed(i): + expected = expected.__getitem__(a) + + tm.assert_almost_equal(result, expected) + + def check_result(self, method, key, typs=None, axes=None, fails=None): + def _eq(axis, obj, key): + """compare equal for these 2 keys""" + axified = _axify(obj, key, axis) + try: + getattr(obj, method).__getitem__(axified) + + except (IndexError, TypeError, KeyError) as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + return + raise + + if typs is None: + typs = self._typs + + if axes is None: + axes = [0, 1] + else: + assert axes in [0, 1] + axes = [axes] + + # check + for kind in self._kinds: + + d = getattr(self, kind) + for ax in axes: + for typ in typs: + assert typ in self._typs + + obj = d[typ] + if ax < obj.ndim: + _eq(axis=ax, obj=obj, key=key) diff --git a/pandas/tests/indexing/interval/__init__.py b/pandas/tests/indexing/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py new file mode 100644 index 00000000..db3a569d --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval.py @@ -0,0 +1,175 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + IntervalIndex, + Series, +) +import pandas._testing as tm + + +class TestIntervalIndex: + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl): + + ser = series_with_interval_index.copy() + + expected = ser.iloc[:3] + tm.assert_series_equal(expected, indexer_sl(ser)[:3]) + tm.assert_series_equal(expected, indexer_sl(ser)[:2.5]) + tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5]) + if indexer_sl is tm.loc: + tm.assert_series_equal(expected, ser.loc[-1:3]) + + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) + + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) + + @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) + def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == "decreasing": + tpls = tpls[::-1] + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + ser = Series(list("abc"), idx) + + for key, expected in zip(idx.left, ser): + if idx.closed_left: + assert indexer_sl(ser)[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + indexer_sl(ser)[key] + + for key, expected in zip(idx.right, ser): + if idx.closed_right: + assert indexer_sl(ser)[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + indexer_sl(ser)[key] + + for key, expected in zip(idx.mid, ser): + assert indexer_sl(ser)[key] == expected + + def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): + ser = series_with_interval_index.copy() + + # this is a departure from our current + # indexing scheme, but simpler + with pytest.raises(KeyError, match=r"\[-1\] not in index"): + indexer_sl(ser)[[-1, 3, 4, 5]] + + with pytest.raises(KeyError, match=r"\[-1\] not in index"): + indexer_sl(ser)[[-1, 3]] + + @pytest.mark.slow + def test_loc_getitem_large_series(self): + ser = Series( + np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) + ) + + result1 = ser.loc[:80000] + result2 = ser.loc[0:80000] + result3 = ser.loc[0:80000:1] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + def test_loc_getitem_frame(self): + # CategoricalIndex with IntervalIndex categories + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="10"): + df.loc[10] + + # single list-like + result = df.loc[[4]] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + # non-unique + result = df.loc[[4, 5]] + expected = df.take([4, 5, 4, 5]) + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match=r"None of \[\[10\]\] are"): + df.loc[[10]] + + # partial missing + with pytest.raises(KeyError, match=r"\[10\] not in index"): + df.loc[[10, 4]] + + def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl): + # GH#41831 + + index = IntervalIndex([np.nan, np.nan]) + key = index[:-1] + + obj = frame_or_series(range(2), index=index) + if frame_or_series is DataFrame and indexer_sl is tm.setitem: + obj = obj.T + + result = indexer_sl(obj)[key] + expected = obj + + tm.assert_equal(result, expected) + + +class TestIntervalIndexInsideMultiIndex: + def test_mi_intervalindex_slicing_with_scalar(self): + # GH#27456 + ii = IntervalIndex.from_arrays( + [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP" + ) + idx = pd.MultiIndex.from_arrays( + [ + pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]), + pd.Index( + ["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"] + ), + ii, + ] + ) + + idx.names = ["Item", "RID", "MP"] + df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) + df.index = idx + + query_df = DataFrame( + { + "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], + "RID": ["RID1", "RID1", "RID1", "RID2", "RID2"], + "MP": [0.2, 1.5, 1.6, 11.1, 10.9], + } + ) + + query_df = query_df.sort_index() + + idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP]) + query_df.index = idx + result = df.value.loc[query_df.index] + + # the IntervalIndex level is indexed with floats, which map to + # the intervals containing them. Matching the behavior we would get + # with _only_ an IntervalIndex, we get an IntervalIndex level back. + sliced_level = ii.take([0, 1, 1, 3, 2]) + expected_index = pd.MultiIndex.from_arrays( + [idx.get_level_values(0), idx.get_level_values(1), sliced_level] + ) + expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py new file mode 100644 index 00000000..4b89232f --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -0,0 +1,233 @@ +import re + +import numpy as np +import pytest + +from pandas.compat import IS64 + +from pandas import ( + Index, + Interval, + IntervalIndex, + Series, +) +import pandas._testing as tm + + +class TestIntervalIndex: + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_loc_with_interval(self, series_with_interval_index, indexer_sl): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + ser = series_with_interval_index.copy() + + expected = 0 + result = indexer_sl(ser)[Interval(0, 1)] + assert result == expected + + expected = ser.iloc[3:5] + result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + + # missing or not exact + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): + indexer_sl(ser)[Interval(3, 5, closed="left")] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + indexer_sl(ser)[Interval(3, 5)] + + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): + indexer_sl(ser)[Interval(-2, 0)] + + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): + indexer_sl(ser)[Interval(5, 6)] + + def test_loc_with_scalar(self, series_with_interval_index, indexer_sl): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + ser = series_with_interval_index.copy() + + assert indexer_sl(ser)[1] == 0 + assert indexer_sl(ser)[1.5] == 1 + assert indexer_sl(ser)[2] == 1 + + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) + + expected = ser.iloc[[1, 1, 2, 1]] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]]) + + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) + + def test_loc_with_slices(self, series_with_interval_index, indexer_sl): + + # loc with slices: + # - Interval objects: only works with exact matches + # - scalars: only works for non-overlapping, monotonic intervals, + # and start/stop select location based on the interval that + # contains them: + # (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) + + ser = series_with_interval_index.copy() + + # slice of interval + + expected = ser.iloc[:3] + result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)] + tm.assert_series_equal(expected, result) + + expected = ser.iloc[3:] + result = indexer_sl(ser)[Interval(3, 4) :] + tm.assert_series_equal(expected, result) + + msg = "Interval objects are not currently supported" + with pytest.raises(NotImplementedError, match=msg): + indexer_sl(ser)[Interval(3, 6) :] + + with pytest.raises(NotImplementedError, match=msg): + indexer_sl(ser)[Interval(3, 4, closed="left") :] + + def test_slice_step_ne1(self, series_with_interval_index): + # GH#31658 slice of scalar with step != 1 + ser = series_with_interval_index.copy() + expected = ser.iloc[0:4:2] + + result = ser[0:4:2] + tm.assert_series_equal(result, expected) + + result2 = ser[0:4][::2] + tm.assert_series_equal(result2, expected) + + def test_slice_float_start_stop(self, series_with_interval_index): + # GH#31658 slicing with integers is positional, with floats is not + # supported + ser = series_with_interval_index.copy() + + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + ser[1.5:9.5:2] + + def test_slice_interval_step(self, series_with_interval_index): + # GH#31658 allows for integer step!=1, not Interval step + ser = series_with_interval_index.copy() + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + ser[0 : 4 : Interval(0, 1)] + + def test_loc_with_overlap(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + # scalar + expected = ser + result = indexer_sl(ser)[4] + tm.assert_series_equal(expected, result) + + result = indexer_sl(ser)[[4]] + tm.assert_series_equal(expected, result) + + # interval + expected = 0 + result = indexer_sl(ser)[Interval(1, 5)] + result == expected + + expected = ser + result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + indexer_sl(ser)[Interval(3, 5)] + + msg = r"None of \[\[Interval\(3, 5, closed='right'\)\]\]" + with pytest.raises(KeyError, match=msg): + indexer_sl(ser)[[Interval(3, 5)]] + + # slices with interval (only exact matches) + expected = ser + result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)] + tm.assert_series_equal(expected, result) + + msg = "'can only get slices from an IntervalIndex if bounds are" + " non-overlapping and all monotonic increasing or decreasing'" + with pytest.raises(KeyError, match=msg): + indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)] + + if indexer_sl is tm.loc: + # slices with scalar raise for overlapping intervals + # TODO KeyError is the appropriate error? + with pytest.raises(KeyError, match=msg): + ser.loc[1:4] + + def test_non_unique(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + result = indexer_sl(ser)[Interval(1, 3)] + assert result == 0 + + result = indexer_sl(ser)[[Interval(1, 3)]] + expected = ser.iloc[0:1] + tm.assert_series_equal(expected, result) + + def test_non_unique_moar(self, indexer_sl): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + ser = Series(range(len(idx)), index=idx) + + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[Interval(1, 3)] + tm.assert_series_equal(expected, result) + + expected = ser + result = indexer_sl(ser)[Interval(1, 3) :] + tm.assert_series_equal(expected, result) + + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[[Interval(1, 3)]] + tm.assert_series_equal(expected, result) + + def test_loc_getitem_missing_key_error_message( + self, frame_or_series, series_with_interval_index + ): + # GH#27365 + ser = series_with_interval_index.copy() + obj = frame_or_series(ser) + with pytest.raises(KeyError, match=r"\[6\]"): + obj.loc[[4, 5, 6]] + + +@pytest.mark.xfail(not IS64, reason="GH 23440") +@pytest.mark.parametrize( + "intervals", + [ + ([Interval(-np.inf, 0.0), Interval(0.0, 1.0)]), + ([Interval(-np.inf, -2.0), Interval(-2.0, -1.0)]), + ([Interval(-1.0, 0.0), Interval(0.0, np.inf)]), + ([Interval(1.0, 2.0), Interval(2.0, np.inf)]), + ], +) +def test_repeating_interval_index_with_infs(intervals): + # GH 46658 + + interval_index = Index(intervals * 51) + + expected = np.arange(1, 102, 2, dtype=np.intp) + result = interval_index.get_indexer_for([intervals[1]]) + + tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/__init__.py b/pandas/tests/indexing/multiindex/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py new file mode 100644 index 00000000..2efb288a --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas.errors import SettingWithCopyError +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +def test_detect_chained_assignment(using_copy_on_write): + # Inplace ops, originally from: + # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")] + events = { + ("eyes", "left"): a, + ("eyes", "right"): b, + ("ears", "left"): c, + ("ears", "right"): d, + } + multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) + zed = DataFrame(events, index=["a", "b"], columns=multiind) + + if using_copy_on_write: + zed["eyes"]["right"].fillna(value=555, inplace=True) + else: + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + zed["eyes"]["right"].fillna(value=555, inplace=True) + + +@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view +def test_cache_updating(using_copy_on_write): + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=["x", "y", "z"]) + df_original = df.copy() + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]["z"].iloc[0] = 1.0 + result = df.loc[(0, 0), "z"] + if using_copy_on_write: + assert result == df_original.loc[0, "z"] + else: + assert result == 1 + + # correct setting + df.loc[(0, 0), "z"] = 2 + result = df.loc[(0, 0), "z"] + assert result == 2 + + +@pytest.mark.slow +def test_indexer_caching(): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = (range(n), range(n)) + index = MultiIndex.from_tuples(zip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) + + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/indexing/multiindex/test_datetime.py b/pandas/tests/indexing/multiindex/test_datetime.py new file mode 100644 index 00000000..a49cb0bc --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_datetime.py @@ -0,0 +1,50 @@ +from datetime import datetime + +import numpy as np + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + period_range, + to_datetime, +) +import pandas._testing as tm + + +def test_multiindex_period_datetime(): + # GH4861, using datetime in period of multiindex raises exception + + idx1 = Index(["a", "a", "a", "b", "b"]) + idx2 = period_range("2012-01", periods=len(idx1), freq="M") + s = Series(np.random.randn(len(idx1)), [idx1, idx2]) + + # try Period as index + expected = s.iloc[0] + result = s.loc["a", Period("2012-01")] + assert result == expected + + # try datetime as index + result = s.loc["a", datetime(2012, 1, 1)] + assert result == expected + + +def test_multiindex_datetime_columns(): + # GH35015, using datetime as column indices raises exception + + mi = MultiIndex.from_tuples( + [(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"] + ) + + df = DataFrame([], columns=mi) + + expected_df = DataFrame( + [], + columns=MultiIndex.from_arrays( + [[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"] + ), + ) + + tm.assert_frame_equal(df, expected_df) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py new file mode 100644 index 00000000..3790a6e9 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -0,0 +1,394 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +# ---------------------------------------------------------------------------- +# test indexing of Series with multi-level Index +# ---------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "access_method", + [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)], +) +@pytest.mark.parametrize( + "level1_value, expected", + [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], +) +def test_series_getitem_multiindex(access_method, level1_value, expected): + + # GH 6018 + # series regression getitem with a multi-index + + mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"]) + ser = Series([1, 2, 3], index=mi) + expected.index.name = "A" + + result = access_method(ser, level1_value) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("level0_value", ["D", "A"]) +def test_series_getitem_duplicates_multiindex(level0_value): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + + index = MultiIndex( + levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=["val"]) + + # confirm indexing on missing value raises KeyError + if level0_value != "A": + with pytest.raises(KeyError, match=r"^'A'$"): + df.val["A"] + + with pytest.raises(KeyError, match=r"^'X'$"): + df.val["X"] + + result = df.val[level0_value] + expected = Series( + arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day") + ) + tm.assert_series_equal(result, expected) + + +def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.reindex(s.index[42:65]) + expected.index = expected.index.droplevel(0).droplevel(0) + + result = indexer_sl(s)[2000, 3] + tm.assert_series_equal(result, expected) + + +def test_series_getitem_returns_scalar( + multiindex_year_month_day_dataframe_random_data, indexer_sl +): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.iloc[49] + + result = indexer_sl(s)[2000, 3, 10] + assert result == expected + + +@pytest.mark.parametrize( + "indexer,expected_error,expected_error_msg", + [ + (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), + (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) + (lambda s: s[len(s)], KeyError, ""), # match should include len(s) + ( + lambda s: s.iloc[len(s)], + IndexError, + "single positional indexer is out-of-bounds", + ), + ], +) +def test_series_getitem_indexing_errors( + multiindex_year_month_day_dataframe_random_data, + indexer, + expected_error, + expected_error_msg, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + with pytest.raises(expected_error, match=expected_error_msg): + indexer(s) + + +def test_series_getitem_corner_generator( + multiindex_year_month_day_dataframe_random_data, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + result = s[(x > 0 for x in s)] + expected = s[s > 0] + tm.assert_series_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index +# ---------------------------------------------------------------------------- + + +def test_getitem_simple(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data.T + expected = df.values[:, 0] + result = df["foo", "one"].values + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_error_msg", + [ + (lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"), + (lambda df: df["foobar"], r"^'foobar'$"), + ], +) +def test_frame_getitem_simple_key_error( + multiindex_dataframe_random_data, indexer, expected_error_msg +): + df = multiindex_dataframe_random_data.T + with pytest.raises(KeyError, match=expected_error_msg): + indexer(df) + + +def test_frame_getitem_multicolumn_empty_level(): + df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]}) + df.columns = [ + ["level1 item1", "level1 item2"], + ["", "level2 item2"], + ["level3 item1", "level3 item2"], + ] + + result = df["level1 item1"] + expected = DataFrame( + [["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_slice", + [ + (lambda df: df["foo"], slice(3)), + (lambda df: df["bar"], slice(3, 5)), + (lambda df: df.loc[:, "bar"], slice(3, 5)), + ], +) +def test_frame_getitem_toplevel( + multiindex_dataframe_random_data, indexer, expected_slice +): + df = multiindex_dataframe_random_data.T + expected = df.reindex(columns=df.columns[expected_slice]) + expected.columns = expected.columns.droplevel(0) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mixed_depth_get(): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df["a"] + expected = df["a", "", ""].rename("a") + tm.assert_series_equal(result, expected) + + result = df["routine1", "result1"] + expected = df["routine1", "result1", ""] + expected = expected.rename(("routine1", "result1")) + tm.assert_series_equal(result, expected) + + +def test_frame_getitem_nan_multiindex(nulls_fixture): + # GH#29751 + # loc on a multiindex containing nan values + n = nulls_fixture # for code readability + cols = ["a", "b", "c"] + df = DataFrame( + [[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]], + columns=cols, + ).set_index(["a", "b"]) + df["c"] = df["c"].astype("int64") + + idx = (21, n) + result = df.loc[:idx] + expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.loc[idx:] + expected = DataFrame( + [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols + ).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + idx1, idx2 = (21, n), (31, n) + result = df.loc[idx1:idx2] + expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected", + [ + ( + (["b"], ["bar", np.nan]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["a", "b"]), + ( + DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "bar"), ("b", np.nan)] + ), + dtype="int64", + ) + ), + ), + ( + (["b"]), + ( + DataFrame( + [[2, 3], [5, 6]], + columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), + dtype="int64", + ) + ), + ), + ( + (["b"], ["bar"]), + ( + DataFrame( + [[2], [5]], + columns=MultiIndex.from_tuples([("b", "bar")]), + dtype="int64", + ) + ), + ), + ( + (["b"], [np.nan]), + ( + DataFrame( + [[3], [6]], + columns=MultiIndex( + codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]] + ), + dtype="int64", + ) + ), + ), + (("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))), + ], +) +def test_frame_getitem_nan_cols_multiindex( + indexer, + expected, + nulls_fixture, +): + # Slicing MultiIndex including levels with nan values, for more information + # see GH#25154 + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples( + [("a", "foo"), ("b", "bar"), ("b", nulls_fixture)] + ), + dtype="int64", + ) + + result = df.loc[:, indexer] + tm.assert_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index with duplicates +# ---------------------------------------------------------------------------- + + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]] + index = ["h1", "h3", "h5"] + columns = MultiIndex( + levels=[["A", "B"], ["A1", "A2", "B1", "B2"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=["main", "sub"], + ) + return DataFrame(data, index=index, columns=columns) + + +@pytest.mark.parametrize( + "indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]] +) +def test_frame_mi_access(dataframe_with_duplicate_index, indexer): + # GH 4145 + df = dataframe_with_duplicate_index + index = Index(["h1", "h3", "h5"]) + columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"]) + expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mi_access_returns_series(dataframe_with_duplicate_index): + # GH 4146, not returning a block manager when selecting a unique index + # from a duplicate index + # as of 4879, this returns a Series (which is similar to what happens + # with a non-unique) + df = dataframe_with_duplicate_index + expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1") + result = df["A"]["A1"] + tm.assert_series_equal(result, expected) + + +def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): + # selecting a non_unique from the 2nd level + df = dataframe_with_duplicate_index + expected = DataFrame( + [["d", 4, 4], ["e", 5, 5]], + index=Index(["B2", "B2"], name="sub"), + columns=["h1", "h3", "h5"], + ).T + result = df["A"]["B2"] + tm.assert_frame_equal(result, expected) + + +def test_frame_mi_empty_slice(): + # GH 15454 + df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]])) + result = df[[]] + expected = DataFrame( + index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []]) + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_empty_multiindex(): + # GH#36936 + arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + df = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) + + # loc on empty multiindex == loc with False mask + empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index + result = df.loc[empty_multiindex, :] + expected = df.loc[[False] * len(df.index), :] + tm.assert_frame_equal(result, expected) + + # replacing value with loc on empty multiindex + df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5 + result = df + expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py new file mode 100644 index 00000000..db91d5ad --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -0,0 +1,171 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture +def simple_multiindex_dataframe(): + """ + Factory function to create simple 3 x 3 dataframe with + both columns and row MultiIndex using supplied data or + random data by default. + """ + + data = np.random.randn(3, 3) + return DataFrame( + data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] + ) + + +@pytest.mark.parametrize( + "indexer, expected", + [ + ( + lambda df: df.iloc[0], + lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)), + ), + ( + lambda df: df.iloc[2], + lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)), + ), + ( + lambda df: df.iloc[:, 2], + lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)), + ), + ], +) +def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): + df = simple_multiindex_dataframe + arr = df.values + result = indexer(df) + expected = expected(arr) + tm.assert_series_equal(result, expected) + + +def test_iloc_returns_dataframe(simple_multiindex_dataframe): + df = simple_multiindex_dataframe + result = df.iloc[[0, 1]] + expected = df.xs(4, drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_returns_scalar(simple_multiindex_dataframe): + df = simple_multiindex_dataframe + arr = df.values + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_iloc_getitem_multiple_items(): + # GH 5528 + tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) + index = MultiIndex.from_tuples(tup) + df = DataFrame(np.random.randn(4, 4), index=index) + result = df.iloc[[2, 3]] + expected = df.xs("b", drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_getitem_labels(): + # this is basically regular indexing + arr = np.random.randn(4, 3) + df = DataFrame( + arr, + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]], + ) + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_frame_getitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[:4] + expected = df[:4] + tm.assert_frame_equal(result, expected) + + +def test_frame_setitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.iloc[:4] = 0 + + assert (df.values[:4] == 0).all() + assert (df.values[4:] != 0).all() + + +def test_indexing_ambiguity_bug_1678(): + # GH 1678 + columns = MultiIndex.from_tuples( + [("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")] + ) + index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + + df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) + + result = df.iloc[:, 1] + expected = df.loc[:, ("Ohio", "Red")] + tm.assert_series_equal(result, expected) + + +def test_iloc_integer_locations(): + # GH 13797 + data = [ + ["str00", "str01"], + ["str10", "str11"], + ["str20", "srt21"], + ["str30", "str31"], + ["str40", "str41"], + ] + + index = MultiIndex.from_tuples( + [("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")] + ) + + expected = DataFrame(data) + df = DataFrame(data, index=index) + + result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)]) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, indexes, values, expected_k", + [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]), + ], +) +def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): + # GH17148 + df = DataFrame(data=data, columns=["i", "j", "k"]) + df = df.set_index(["i", "j"]) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df["k"] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + + +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py new file mode 100644 index 00000000..e8c766d4 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -0,0 +1,97 @@ +from typing import ( + Any, + List, +) +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + +m = 50 +n = 1000 +cols = ["jim", "joe", "jolie", "joline", "jolia"] + +vals: List[Any] = [ + np.random.randint(0, 10, n), + np.random.choice(list("abcdefghij"), n), + np.random.choice(pd.date_range("20141009", periods=10).tolist(), n), + np.random.choice(list("ZYXWVUTSRQ"), n), + np.random.randn(n), +] +vals = list(map(tuple, zip(*vals))) + +# bunch of keys for testing +keys: List[Any] = [ + np.random.randint(0, 11, m), + np.random.choice(list("abcdefghijk"), m), + np.random.choice(pd.date_range("20141009", periods=11).tolist(), m), + np.random.choice(list("ZYXWVUTSRQP"), m), +] +keys = list(map(tuple, zip(*keys))) +keys += list(map(lambda t: t[:-1], vals[:: n // m])) + + +# covers both unique index and non-unique index +df = DataFrame(vals, columns=cols) +a = pd.concat([df, df]) +b = df.drop_duplicates(subset=cols[:-1]) + + +def validate(mi, df, key): + # check indexing into a multi-index before & past the lexsort depth + + mask = np.ones(len(df)).astype("bool") + + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k + + if not mask.any(): + assert key[: i + 1] not in mi.index + continue + + assert key[: i + 1] in mi.index + right = df[mask].copy() + + if i + 1 != len(key): # partial key + return_value = right.drop(cols[: i + 1], axis=1, inplace=True) + assert return_value is None + return_value = right.set_index(cols[i + 1 : -1], inplace=True) + assert return_value is None + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + else: # full key + return_value = right.set_index(cols[:-1], inplace=True) + assert return_value is None + if len(right) == 1: # single hit + right = Series( + right["jolia"].values, name=right.index[0], index=["jolia"] + ) + tm.assert_series_equal(mi.loc[key[: i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +@pytest.mark.parametrize("lexsort_depth", list(range(5))) +@pytest.mark.parametrize("key", keys) +@pytest.mark.parametrize("frame", [a, b]) +def test_multiindex_get_loc(lexsort_depth, key, frame): + # GH7724, GH2646 + + with warnings.catch_warnings(record=True): + if lexsort_depth == 0: + df = frame.copy() + else: + df = frame.sort_values(by=cols[:lexsort_depth]) + + mi = df.set_index(cols[:-1]) + assert not mi.index._lexsort_depth < lexsort_depth + validate(mi, df, key) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py new file mode 100644 index 00000000..5cf04428 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -0,0 +1,962 @@ +import numpy as np +import pytest + +from pandas.errors import ( + IndexingError, + PerformanceWarning, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +@pytest.fixture +def single_level_multiindex(): + """single level MultiIndex""" + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + + +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + +class TestMultiIndexLoc: + def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + frame.loc[("bar", "two"), "B"] = 5 + assert frame.loc[("bar", "two"), "B"] == 5 + + # with integer labels + df = frame.copy() + df.columns = list(range(3)) + df.loc[("bar", "two"), 1] = 7 + assert df.loc[("bar", "two"), 1] == 7 + + def test_loc_getitem_general(self): + + # GH#2817 + data = { + "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, + } + df = DataFrame(data).set_index(keys=["col", "year"]) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + return_value = df.sort_index(inplace=True) + assert return_value is None + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) + expected = DataFrame({"amount": [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_loc_getitem_multiindex_missing_label_raises(self): + # GH#21593 + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + def test_loc_getitem_list_of_tuples_with_multiindex( + self, multiindex_year_month_day_dataframe_random_data + ): + ser = multiindex_year_month_day_dataframe_random_data["A"] + expected = ser.reindex(ser.index[49:51]) + result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_series(self): + # GH14730 + # passing a series as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = Series([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + result = x.loc[[1, 3]] + tm.assert_series_equal(result, expected) + + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + + empty = Series(data=[], dtype=np.float64) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype=np.float64, + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype="float64", + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + + def test_loc_multiindex_labels(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j"], ["X", "X", "Y"]], + ) + + # the first 2 rows + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc["i"] + tm.assert_frame_equal(result, expected) + + # 2nd (last) column + expected = df.iloc[:, [2]].droplevel(0, axis=1) + result = df.loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # bottom right corner + expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1) + result = df.loc["j"].loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # with a tuple + expected = df.iloc[[0, 1]] + result = df.loc[("i", "X")] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_ints(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc[4] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_missing_label_raises(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) + def test_loc_multiindex_list_missing_label(self, key, pos): + # GH 27148 - lists with missing labels _do_ raise + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match="not in index"): + df.loc[key] + + def test_loc_multiindex_too_many_dims_raises(self): + # GH 14885 + s = Series( + range(8), + index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + ) + + with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"): + s.loc["a", "b"] + with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"): + s.loc["a", "d", "g"] + with pytest.raises(IndexingError, match="Too many indexers"): + s.loc["a", "d", "g", "j"] + + def test_loc_multiindex_indexer_none(self): + + # GH6788 + # multi-index indexer is None (meaning take all) + attributes = ["Attribute" + str(i) for i in range(1)] + attribute_values = ["Value" + str(i) for i in range(5)] + + index = MultiIndex.from_product([attributes, attribute_values]) + df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = DataFrame(df, columns=index) + result = df[attributes] + tm.assert_frame_equal(result, df) + + # GH 7349 + # loc with a multi-index seems to be doing fallback + df = DataFrame( + np.arange(12).reshape(-1, 1), + index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]), + ) + + expected = df.loc[([1, 2],), :] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_incomplete(self): + + # GH 7399 + # incomplete indexers + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.loc[:, "a":"c"] + + result = s.loc[0:4, "a":"c"] + tm.assert_series_equal(result, expected) + + result = s.loc[:4, "a":"c"] + tm.assert_series_equal(result, expected) + + result = s.loc[0:, "a":"c"] + tm.assert_series_equal(result, expected) + + # GH 7400 + # multiindexer getitem with list of indexers skips wrong element + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] + result = s.loc[2:4:2, "a":"c"] + tm.assert_series_equal(result, expected) + + def test_get_loc_single_level(self, single_level_multiindex): + single_level = single_level_multiindex + s = Series(np.random.randn(len(single_level)), index=single_level) + for k in single_level.values: + s[k] + + def test_loc_getitem_int_slice(self): + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ["a", "b"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + expected = df[10] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + @pytest.mark.parametrize( + "indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = MultiIndex.from_product([a, b]) + df = DataFrame( + np.arange(len(index), dtype="int64"), index=index, columns=["Data"] + ) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys) + ) + if indexer_type_1 is set or indexer_type_2 is set: + with tm.assert_produces_warning(FutureWarning): + result = df.loc[indexer, "Data"] + else: + result = df.loc[indexer, "Data"] + expected = Series( + [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) + ) + + tm.assert_series_equal(result, expected) + + def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series): + # GH#37711 + mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) + obj = frame_or_series([1, 2], index=mi) + obj.loc[("a",)] = 0 + expected = frame_or_series([0, 2], index=mi) + tm.assert_equal(obj, expected) + + @pytest.mark.parametrize("indexer", [("a",), ("a")]) + def test_multiindex_one_dimensional_tuple_columns(self, indexer): + # GH#37711 + mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) + obj = DataFrame([1, 2], index=mi) + obj.loc[indexer, :] = 0 + expected = DataFrame([0, 2], index=mi) + tm.assert_frame_equal(obj, expected) + + @pytest.mark.parametrize( + "indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)] + ) + def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): + # GH#39147 + mi = MultiIndex.from_tuples([(1, 2), (3, 4)]) + df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"]) + df.loc[indexer, ["c", "d"]] = 1.0 + expected = DataFrame( + [[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]], + index=mi, + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(df, expected) + + def test_sorted_multiindex_after_union(self): + # GH#44752 + midx = MultiIndex.from_product( + [pd.date_range("20110101", periods=2), Index(["a", "b"])] + ) + ser1 = Series(1, index=midx) + ser2 = Series(1, index=midx[:2]) + df = pd.concat([ser1, ser2], axis=1) + expected = df.copy() + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + df = DataFrame({0: ser1, 1: ser2}) + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1) + result = df.loc["2011-01-01":"2011-01-02"] + tm.assert_frame_equal(result, expected) + + def test_loc_no_second_level_index(self): + # GH#43599 + df = DataFrame( + index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]), + columns=["Val"], + ) + res = df.loc[np.s_[:, "c", :]] + expected = DataFrame( + index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"] + ) + tm.assert_frame_equal(res, expected) + + +@pytest.mark.parametrize( + "indexer, pos", + [ + ([], []), # empty ok + (["A"], slice(3)), + (["A", "D"], []), # "D" isn't present -> raise + (["D", "E"], []), # no values found -> raise + (["D"], []), # same, with single item list: GH 27148 + (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), + (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), + ], +) +def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): + # GH 7866 + # multi-index slicing with missing indexers + idx = MultiIndex.from_product( + [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"] + ) + ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index() + expected = ser.iloc[pos] + + if expected.size == 0 and indexer != []: + with pytest.raises(KeyError, match=str(indexer)): + ser.loc[indexer] + else: + warn = None + msg = "MultiIndex with a nested sequence" + if indexer == (slice(None), ["foo", "bah"]): + # "bah" is not in idx.levels[1], so is ignored, will raise KeyError + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + result = ser.loc[indexer] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])]) +def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame( + [[np.mean, np.median], ["mean", "median"]], + columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]), + index=["function", "name"], + ) + result = df.loc["function", ("functs", "mean")] + expected = np.mean + assert result == expected + + +def test_loc_getitem_tuple_plus_slice(): + # GH 671 + df = DataFrame( + { + "a": np.arange(10), + "b": np.arange(10), + "c": np.random.randn(10), + "d": np.random.randn(10), + } + ).set_index(["a", "b"]) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] + tm.assert_series_equal(result, expected) + + +def test_loc_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + with pytest.raises(KeyError, match=r"^3$"): + df.loc[3] + + +def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"): + df.loc[("bar", "three"), "B"] + + # in theory should be inserting in a sorted space???? + df.loc[("bar", "three"), "B"] = 0 + expected = 0 + result = df.sort_index().loc[("bar", "three"), "B"] + assert result == expected + + +def test_loc_setitem_single_column_slice(): + # case from https://github.com/pandas-dev/pandas/issues/27841 + df = DataFrame( + "string", + index=list("abcd"), + columns=MultiIndex.from_product([["Main"], ("another", "one")]), + ) + df["labels"] = "a" + df.loc[:, "labels"] = df.index + tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index)) + + # test with non-object block + df = DataFrame( + np.nan, + index=range(4), + columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), + ) + expected = df.copy() + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, "B"] = np.arange(4) + with tm.assert_produces_warning(DeprecationWarning, match=msg): + expected.iloc[:, 2] = np.arange(4) + tm.assert_frame_equal(df, expected) + + +def test_loc_nan_multiindex(): + # GH 5286 + tups = [ + ("Good Things", "C", np.nan), + ("Good Things", "R", np.nan), + ("Bad Things", "C", np.nan), + ("Bad Things", "T", np.nan), + ("Okay Things", "N", "B"), + ("Okay Things", "N", "D"), + ("Okay Things", "B", np.nan), + ("Okay Things", "D", np.nan), + ] + df = DataFrame( + np.ones((8, 4)), + columns=Index(["d1", "d2", "d3", "d4"]), + index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]), + ) + result = df.loc["Good Things"].loc["C"] + expected = DataFrame( + np.ones((1, 4)), + index=Index([np.nan], dtype="object", name="u3"), + columns=Index(["d1", "d2", "d3", "d4"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_period_string_indexing(): + # GH 9892 + a = pd.period_range("2013Q1", "2013Q4", freq="Q") + i = (1111, 2222, 3333) + idx = MultiIndex.from_product((a, i), names=("Period", "CVR")) + df = DataFrame( + index=idx, + columns=( + "OMS", + "OMK", + "RES", + "DRIFT_IND", + "OEVRIG_IND", + "FIN_IND", + "VARE_UD", + "LOEN_UD", + "FIN_UD", + ), + ) + result = df.loc[("2013Q1", 1111), "OMS"] + + alt = df.loc[(a[0], 1111), "OMS"] + assert np.isnan(alt) + + # Because the resolution of the string matches, it is an exact lookup, + # not a slice + assert np.isnan(result) + + # TODO: should it figure this out? + # alt = df.loc["2013Q1", 1111, "OMS"] + # assert np.isnan(alt) + + +def test_loc_datetime_mask_slicing(): + # GH 16699 + dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) + m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) + df = DataFrame( + data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] + ) + result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] + expected = Series( + [3], + name="C1", + index=MultiIndex.from_tuples( + [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))], + names=["Idx1", "Idx2"], + ), + ) + tm.assert_series_equal(result, expected) + + +def test_loc_datetime_series_tuple_slicing(): + # https://github.com/pandas-dev/pandas/issues/35858 + date = pd.Timestamp("2000") + ser = Series( + 1, + index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]), + name="c", + ) + result = ser.loc[:, [date]] + tm.assert_series_equal(result, ser) + + +def test_loc_with_mi_indexer(): + # https://github.com/pandas-dev/pandas/issues/35351 + df = DataFrame( + data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]], + index=MultiIndex.from_tuples( + [(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"] + ), + columns=["author", "price"], + ) + idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"]) + result = df.loc[idx, :] + expected = DataFrame( + [["a", 1], ["b", 1], ["c", 2]], + index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]), + columns=["author", "price"], + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_mi_with_level1_named_0(): + # GH#37194 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + ser = Series(range(3), index=dti) + df = ser.to_frame() + df[1] = dti + + df2 = df.set_index(0, append=True) + assert df2.index.names == (None, 0) + df2.index.get_loc(dti[0]) # smoke test + + result = df2.loc[dti[0]] + expected = df2.iloc[[0]].droplevel(None) + tm.assert_frame_equal(result, expected) + + ser2 = df2[1] + assert ser2.index.names == (None, 0) + + result = ser2.loc[dti[0]] + expected = ser2.iloc[[0]].droplevel(None) + tm.assert_series_equal(result, expected) + + +def test_getitem_str_slice(datapath): + # GH#15928 + path = datapath("reshape", "merge", "data", "quotes2.csv") + df = pd.read_csv(path, parse_dates=["time"]) + df2 = df.set_index(["ticker", "time"]).sort_index() + + res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0) + expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :] + tm.assert_frame_equal(res, expected) + + +def test_3levels_leading_period_index(): + # GH#24091 + pi = pd.PeriodIndex( + ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"], + name="datetime", + freq="B", + ) + lev2 = ["A", "A", "Z", "W"] + lev3 = ["B", "C", "Q", "F"] + mi = MultiIndex.from_arrays([pi, lev2, lev3]) + + ser = Series(range(4), index=mi, dtype=np.float64) + result = ser.loc[(pi[0], "A", "B")] + assert result == 0.0 + + +class TestKeyErrorsWithMultiIndex: + def test_missing_keys_raises_keyerror(self): + # GH#27420 KeyError, not TypeError + df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) + df2 = df.set_index(["A", "B"]) + + with pytest.raises(KeyError, match="1"): + df2.loc[(1, 6)] + + def test_missing_key_raises_keyerror2(self): + # GH#21168 KeyError, not "IndexingError: Too many indexers" + ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2)) + + with pytest.raises(KeyError, match=r"\(0, 3\)"): + ser.loc[0, 3] + + def test_missing_key_combination(self): + # GH: 19556 + mi = MultiIndex.from_arrays( + [ + np.array(["a", "a", "b", "b"]), + np.array(["1", "2", "2", "3"]), + np.array(["c", "d", "c", "d"]), + ], + names=["one", "two", "three"], + ) + df = DataFrame(np.random.rand(4, 3), index=mi) + msg = r"\('b', '1', slice\(None, None, None\)\)" + with pytest.raises(KeyError, match=msg): + df.loc[("b", "1", slice(None)), :] + with pytest.raises(KeyError, match=msg): + df.index.get_locs(("b", "1", slice(None))) + with pytest.raises(KeyError, match=r"\('b', '1'\)"): + df.loc[("b", "1"), :] + + +def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): + df = multiindex_year_month_day_dataframe_random_data + ser = df["A"] + result = ser[2000, 5] + expected = df.loc[2000, 5]["A"] + tm.assert_series_equal(result, expected) + + +def test_loc_with_nan(): + # GH: 27104 + df = DataFrame( + {"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]} + ).set_index(["ind1", "ind2"]) + result = df.loc[["a"]] + expected = DataFrame( + {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"]) + ) + tm.assert_frame_equal(result, expected) + + result = df.loc["a"] + expected = DataFrame({"col": [1]}, index=Index([1], name="ind2")) + tm.assert_frame_equal(result, expected) + + +def test_getitem_non_found_tuple(): + # GH: 25236 + df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index( + ["a", "b", "c"] + ) + with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"): + df.loc[(2.0, 2.0, 3.0)] + + +def test_get_loc_datetime_index(): + # GH#24263 + index = pd.date_range("2001-01-01", periods=100) + mi = MultiIndex.from_arrays([index]) + # Check if get_loc matches for Index and MultiIndex + assert mi.get_loc("2001-01") == slice(0, 31, None) + assert index.get_loc("2001-01") == slice(0, 31, None) + + loc = mi[::2].get_loc("2001-01") + expected = index[::2].get_loc("2001-01") + assert loc == expected + + loc = mi.repeat(2).get_loc("2001-01") + expected = index.repeat(2).get_loc("2001-01") + assert loc == expected + + loc = mi.append(mi).get_loc("2001-01") + expected = index.append(index).get_loc("2001-01") + # TODO: standardize return type for MultiIndex.get_loc + tm.assert_numpy_array_equal(loc.nonzero()[0], expected) + + +def test_loc_setitem_indexer_differently_ordered(): + # GH#34603 + mi = MultiIndex.from_product([["a", "b"], [0, 1]]) + df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi) + + indexer = ("a", [1, 0]) + df.loc[indexer, :] = np.array([[9, 10], [11, 12]]) + expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi) + tm.assert_frame_equal(df, expected) + + +def test_loc_getitem_index_differently_ordered_slice_none(): + # GH#31330 + df = DataFrame( + [[1, 2], [3, 4], [5, 6], [7, 8]], + index=[["a", "a", "b", "b"], [1, 2, 1, 2]], + columns=["a", "b"], + ) + result = df.loc[(slice(None), [2, 1]), :] + expected = DataFrame( + [[3, 4], [7, 8], [1, 2], [5, 6]], + index=[["a", "b", "a", "b"], [2, 2, 1, 1]], + columns=["a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]]) +def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): + # GH#40978 + df = DataFrame( + [1] * 8, + index=MultiIndex.from_tuples( + [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)] + ), + columns=["a"], + ) + result = df.loc[(slice(None), indexer), :] + expected = DataFrame( + [1] * 8, + index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], + columns=["a"], + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[df.index.isin(indexer, level=1), :] + tm.assert_frame_equal(result, df) + + +def test_loc_getitem_drops_levels_for_one_row_dataframe(): + # GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped + mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"]) + df = DataFrame({"d": [0]}, index=mi) + expected = df.droplevel([0, 2]) + result = df.loc["x", :, "z"] + tm.assert_frame_equal(result, expected) + + ser = Series([0], index=mi) + result = ser.loc["x", :, "z"] + expected = Series([0], index=Index(["y"], name="b")) + tm.assert_series_equal(result, expected) + + +def test_mi_columns_loc_list_label_order(): + # GH 10710 + cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]]) + df = DataFrame(np.zeros((5, 6)), columns=cols) + result = df.loc[:, ["B", "A"]] + expected = DataFrame( + np.zeros((5, 4)), + columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]), + ) + tm.assert_frame_equal(result, expected) + + +def test_mi_partial_indexing_list_raises(): + # GH 13501 + frame = DataFrame( + np.arange(12).reshape((4, 3)), + index=[["a", "a", "b", "b"], [1, 2, 1, 2]], + columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]], + ) + frame.index.names = ["key1", "key2"] + frame.columns.names = ["state", "color"] + with pytest.raises(KeyError, match="\\[2\\] not in index"): + frame.loc[["b", 2], "Colorado"] + + +def test_mi_indexing_list_nonexistent_raises(): + # GH 15452 + s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]])) + with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"): + s.loc[["not", "found"]] + + +def test_mi_add_cell_missing_row_non_unique(): + # GH 16018 + result = DataFrame( + [[1, 2, 5, 6], [3, 4, 7, 8]], + index=["a", "a"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + result.loc["c"] = -1 + result.loc["c", (1, "A")] = 3 + result.loc["d", (1, "A")] = 3 + expected = DataFrame( + [ + [1.0, 2.0, 5.0, 6.0], + [3.0, 4.0, 7.0, 8.0], + [3.0, -1.0, -1, -1], + [3.0, np.nan, np.nan, np.nan], + ], + index=["a", "a", "c", "d"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_get_scalar_casting_to_float(): + # GH#41369 + df = DataFrame( + {"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"]) + ) + result = df.loc[(3, 4), "b"] + assert result == 2 + assert isinstance(result, np.int64) + result = df.loc[[(3, 4)], "b"].iloc[0] + assert result == 2 + assert isinstance(result, np.int64) + + +def test_loc_empty_single_selector_with_names(): + # GH 19517 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0]) + s2 = Series(index=idx, dtype=np.float64) + result = s2.loc["a"] + expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0)) + tm.assert_series_equal(result, expected) + + +def test_loc_keyerror_rightmost_key_missing(): + # GH 20951 + + df = DataFrame( + { + "A": [100, 100, 200, 200, 300, 300], + "B": [10, 10, 20, 21, 31, 33], + "C": range(6), + } + ) + df = df.set_index(["A", "B"]) + with pytest.raises(KeyError, match="^1$"): + df.loc[(100, 1)] + + +def test_multindex_series_loc_with_tuple_label(): + # GH#43908 + mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))]) + ser = Series([1, 2], index=mi) + result = ser.loc[(3, (4, 5))] + assert result == 2 diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py new file mode 100644 index 00000000..08e15545 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -0,0 +1,229 @@ +import numpy as np +import pytest + +import pandas._libs.index as _index +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestMultiIndexBasic: + def test_multiindex_perf_warn(self): + df = DataFrame( + { + "jim": [0, 0, 1, 1], + "joe": ["x", "x", "z", "y"], + "jolie": np.random.rand(4), + } + ).set_index(["jim", "joe"]) + + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(1, "z")] + + df = df.iloc[[2, 1, 3, 0]] + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(0,)] + + def test_indexing_over_hashtable_size_cutoff(self): + n = 10000 + + old_cutoff = _index._SIZE_CUTOFF + _index._SIZE_CUTOFF = 20000 + + s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n)))) + + # hai it works! + assert s[("a", 5)] == 5 + assert s[("a", 6)] == 6 + assert s[("a", 7)] == 7 + + _index._SIZE_CUTOFF = old_cutoff + + def test_multi_nan_indexing(self): + # GH 3588 + df = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + } + ) + result = df.set_index(["a", "b"], drop=False) + expected = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + }, + index=[ + Index(["R1", "R2", np.nan, "R4"], name="a"), + Index(["C1", "C2", "C3", "C4"], name="b"), + ], + ) + tm.assert_frame_equal(result, expected) + + def test_exclusive_nat_column_indexing(self): + # GH 38025 + # test multi indexing when one column exclusively contains NaT values + df = DataFrame( + { + "a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + } + ) + df = df.set_index(["a", "b"]) + expected = DataFrame( + { + "c": [10, 15, np.nan, 20], + }, + index=[ + Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"), + Index(["C1", "C2", "C3", "C4"], name="b"), + ], + ) + tm.assert_frame_equal(df, expected) + + def test_nested_tuples_duplicates(self): + # GH#30892 + + dti = pd.to_datetime(["20190101", "20190101", "20190102"]) + idx = Index(["a", "a", "c"]) + mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) + + df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) + + expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) + + df2 = df.copy(deep=True) + df2.loc[(dti[0], "a"), "c2"] = 1.0 + tm.assert_frame_equal(df2, expected) + + df3 = df.copy(deep=True) + df3.loc[[(dti[0], "a")], "c2"] = 1.0 + tm.assert_frame_equal(df3, expected) + + def test_multiindex_with_datatime_level_preserves_freq(self): + # https://github.com/pandas-dev/pandas/issues/35563 + idx = Index(range(2), name="A") + dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") + mi = MultiIndex.from_product([idx, dti]) + df = DataFrame(np.random.randn(14, 2), index=mi) + result = df.loc[0].index + tm.assert_index_equal(result, dti) + assert result.freq == dti.freq + + def test_multiindex_complex(self): + # GH#42145 + complex_data = [1 + 2j, 4 - 3j, 10 - 1j] + non_complex_data = [3, 4, 5] + result = DataFrame( + { + "x": complex_data, + "y": non_complex_data, + "z": non_complex_data, + } + ) + result.set_index(["x", "y"], inplace=True) + expected = DataFrame( + {"z": non_complex_data}, + index=MultiIndex.from_arrays( + [complex_data, non_complex_data], + names=("x", "y"), + ), + ) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex_with_duplicates(self): + # GH 38015 + mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")]) + df = DataFrame(index=mi) + df = df.rename(index={"A": "Apple"}, level=0) + + mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")]) + expected = DataFrame(index=mi2) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "data_result, data_expected", + [ + ( + [ + [(81.0, np.nan), (np.nan, np.nan)], + [(np.nan, np.nan), (82.0, np.nan)], + [1, 2], + [1, 2], + ], + [ + [(81.0, np.nan), (np.nan, np.nan)], + [(81.0, np.nan), (np.nan, np.nan)], + [1, 2], + [1, 1], + ], + ), + ( + [ + [(81.0, np.nan), (np.nan, np.nan)], + [(np.nan, np.nan), (81.0, np.nan)], + [1, 2], + [1, 2], + ], + [ + [(81.0, np.nan), (np.nan, np.nan)], + [(81.0, np.nan), (np.nan, np.nan)], + [1, 2], + [2, 1], + ], + ), + ], + ) + def test_subtracting_two_series_with_unordered_index_and_all_nan_index( + self, data_result, data_expected + ): + # GH 38439 + a_index_result = MultiIndex.from_tuples(data_result[0]) + b_index_result = MultiIndex.from_tuples(data_result[1]) + a_series_result = Series(data_result[2], index=a_index_result) + b_series_result = Series(data_result[3], index=b_index_result) + result = a_series_result.align(b_series_result) + + a_index_expected = MultiIndex.from_tuples(data_expected[0]) + b_index_expected = MultiIndex.from_tuples(data_expected[1]) + a_series_expected = Series(data_expected[2], index=a_index_expected) + b_series_expected = Series(data_expected[3], index=b_index_expected) + a_series_expected.index = a_series_expected.index.set_levels( + [ + a_series_expected.index.levels[0].astype("float"), + a_series_expected.index.levels[1].astype("float"), + ] + ) + b_series_expected.index = b_series_expected.index.set_levels( + [ + b_series_expected.index.levels[0].astype("float"), + b_series_expected.index.levels[1].astype("float"), + ] + ) + + tm.assert_series_equal(result[0], a_series_expected) + tm.assert_series_equal(result[1], b_series_expected) + + def test_nunique_smoke(self): + # GH 34019 + n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique() + assert n == 1 + + def test_multiindex_repeated_keys(self): + # GH19414 + tm.assert_series_equal( + Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[ + ["a", "a", "b", "b"] + ], + Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])), + ) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py new file mode 100644 index 00000000..cface630 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -0,0 +1,258 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + MultiIndex, + date_range, + to_datetime, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + + +class TestMultiIndexPartial: + def test_getitem_partial_int(self): + # GH 12416 + # with single item + l1 = [10, 20] + l2 = ["a", "b"] + df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2])) + expected = DataFrame(index=range(2), columns=l2) + result = df[20] + tm.assert_frame_equal(result, expected) + + # with list + expected = DataFrame( + index=range(2), columns=MultiIndex.from_product([l1[1:], l2]) + ) + result = df[[20]] + tm.assert_frame_equal(result, expected) + + # missing item: + with pytest.raises(KeyError, match="1"): + df[1] + with pytest.raises(KeyError, match=r"'\[1\] not in index'"): + df[[1]] + + def test_series_slice_partial(self): + pass + + def test_xs_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + ymd = multiindex_year_month_day_dataframe_random_data + result = frame.xs("foo") + result2 = frame.loc["foo"] + expected = frame.T["foo"].T + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) + + result = ymd.xs((2000, 4)) + expected = ymd.loc[2000, 4] + tm.assert_frame_equal(result, expected) + + # ex from #1796 + index = MultiIndex( + levels=[["foo", "bar"], ["one", "two"], [-1, 1]], + codes=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + ) + df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd")) + + with tm.assert_produces_warning(FutureWarning): + result = df.xs(["foo", "one"]) + expected = df.loc["foo", "one"] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + ymd = ymd.T + result = ymd[2000, 2] + + expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) + expected.columns = expected.columns.droplevel(0).droplevel(0) + tm.assert_frame_equal(result, expected) + + def test_fancy_slice_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + result = frame.loc["bar":"baz"] + expected = frame[3:7] + tm.assert_frame_equal(result, expected) + + ymd = multiindex_year_month_day_dataframe_random_data + result = ymd.loc[(2000, 2):(2000, 4)] + lev = ymd.index.codes[1] + expected = ymd[(lev >= 1) & (lev <= 3)] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial_column_select(self): + idx = MultiIndex( + codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], + levels=[["a", "b"], ["x", "y"], ["p", "q"]], + ) + df = DataFrame(np.random.rand(3, 2), index=idx) + + result = df.loc[("a", "y"), :] + expected = df.loc[("a", "y")] + tm.assert_frame_equal(result, expected) + + result = df.loc[("a", "y"), [1, 0]] + expected = df.loc[("a", "y")][[1, 0]] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match=r"\('a', 'foo'\)"): + df.loc[("a", "foo"), :] + + # TODO(ArrayManager) rewrite test to not use .values + # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view + @td.skip_array_manager_invalid_test + def test_partial_set( + self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write + ): + # GH #397 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd.copy() + exp = ymd.copy() + df.loc[2000, 4] = 0 + exp.iloc[65:85] = 0 + tm.assert_frame_equal(df, exp) + + df["A"].loc[2000, 4] = 1 + if not using_copy_on_write: + exp["A"].loc[2000, 4].values[:] = 1 + tm.assert_frame_equal(df, exp) + + df.loc[2000] = 5 + exp.iloc[:100] = 5 + tm.assert_frame_equal(df, exp) + + # this works...for now + df["A"].iloc[14] = 5 + if using_copy_on_write: + df["A"].iloc[14] == exp["A"].iloc[14] + else: + assert df["A"].iloc[14] == 5 + + @pytest.mark.parametrize("dtype", [int, float]) + def test_getitem_intkey_leading_level( + self, multiindex_year_month_day_dataframe_random_data, dtype + ): + # GH#33355 dont fall-back to positional when leading level is int + ymd = multiindex_year_month_day_dataframe_random_data + levels = ymd.index.levels + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) + ser = ymd["A"] + mi = ser.index + assert isinstance(mi, MultiIndex) + if dtype is int: + assert isinstance(mi.levels[0], Int64Index) + else: + assert isinstance(mi.levels[0], Float64Index) + + assert 14 not in mi.levels[0] + assert not mi.levels[0]._should_fallback_to_positional + assert not mi._should_fallback_to_positional + + with pytest.raises(KeyError, match="14"): + ser[14] + with pytest.raises(KeyError, match="14"): + with tm.assert_produces_warning(FutureWarning): + mi.get_value(ser, 14) + + # --------------------------------------------------------------------- + + def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + expected = frame.copy() + result = frame.copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame.copy() + result = frame.copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "indexer, exp_idx, exp_values", + [ + (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]), + ( + slice(None, "2019-2"), + date_range("2019", periods=2, freq="MS"), + [0, 1, 2, 3], + ), + ], + ) + def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): + # GH: 25165 + date_idx = date_range("2019", periods=2, freq="MS") + df = DataFrame( + list(range(4)), + index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + ) + expected = DataFrame( + exp_values, + index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), + ) + result = df[indexer] + tm.assert_frame_equal(result, expected) + result = df.loc[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc[indexer, :] + tm.assert_frame_equal(result, expected) + + df2 = df.swaplevel(0, 1).sort_index() + expected = expected.swaplevel(0, 1).sort_index() + + result = df2.loc[:, indexer, :] + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_partial_both_axis(): + # gh-12660 + iterables = [["a", "b"], [2, 1]] + columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) + rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) + df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns) + expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) + result = df.loc["a", "b"] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py new file mode 100644 index 00000000..ac10a6d8 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -0,0 +1,526 @@ +import numpy as np +import pytest + +from pandas.errors import SettingWithCopyError +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) +import pandas._testing as tm + + +def assert_equal(a, b): + assert a == b + + +class TestMultiIndexSetItem: + def check(self, target, indexers, value, compare_fn=assert_equal, expected=None): + target.loc[indexers] = value + result = target.loc[indexers] + if expected is None: + expected = value + compare_fn(result, expected) + + def test_setitem_multiindex(self): + # GH#7190 + cols = ["A", "w", "l", "a", "x", "X", "d", "profit"] + index = MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] + ) + t, n = 0, 2 + + df = DataFrame( + np.nan, + columns=cols, + index=index, + ) + self.check(target=df, indexers=((t, n), "X"), value=0) + + df = DataFrame(-999, columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=1) + + df = DataFrame(columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=2) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame(-999, columns=cols, index=index) + self.check( + target=df, + indexers=((t, n), "X"), + value=np.array(3), + expected=3, + ) + + def test_setitem_multiindex2(self): + # GH#5206 + df = DataFrame( + np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float + ) + df["F"] = 99 + row_selection = df["A"] % 2 == 0 + col_selection = ["B", "C"] + df.loc[row_selection, col_selection] = df["F"] + output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) + tm.assert_frame_equal(df.loc[row_selection, col_selection], output) + self.check( + target=df, + indexers=(row_selection, col_selection), + value=df["F"], + compare_fn=tm.assert_frame_equal, + expected=output, + ) + + def test_setitem_multiindex3(self): + # GH#11372 + idx = MultiIndex.from_product( + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] + ) + cols = MultiIndex.from_product( + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] + ) + + df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] + ) + subcols = MultiIndex.from_tuples( + [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] + ) + + vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + self.check( + target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # set all columns + vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + self.check( + target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # identity + copy = df.copy() + self.check( + target=df, + indexers=(df.index, df.columns), + value=df, + compare_fn=tm.assert_frame_equal, + expected=copy, + ) + + # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in + # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) + @td.skip_array_manager_not_yet_implemented + def test_multiindex_setitem(self): + + # GH 3738 + # setting with a multi-index right hand side + arrays = [ + np.array(["bar", "bar", "baz", "qux", "qux", "bar"]), + np.array(["one", "two", "one", "one", "two", "one"]), + np.arange(0, 6, 1), + ] + + df_orig = DataFrame( + np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"] + ).sort_index() + + expected = df_orig.loc[["bar"]] * 2 + df = df_orig.copy() + df.loc[["bar"]] *= 2 + tm.assert_frame_equal(df.loc[["bar"]], expected) + + # raise because these have differing levels + msg = "cannot align on a multi-index with out specifying the join levels" + with pytest.raises(TypeError, match=msg): + df.loc["bar"] *= 2 + + def test_multiindex_setitem2(self): + + # from SO + # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + df_orig = DataFrame.from_dict( + { + "price": { + ("DE", "Coal", "Stock"): 2, + ("DE", "Gas", "Stock"): 4, + ("DE", "Elec", "Demand"): 1, + ("FR", "Gas", "Stock"): 5, + ("FR", "Solar", "SupIm"): 0, + ("FR", "Wind", "SupIm"): 0, + } + } + ) + df_orig.index = MultiIndex.from_tuples( + df_orig.index, names=["Sit", "Com", "Type"] + ) + + expected = df_orig.copy() + expected.iloc[[0, 2, 3]] *= 2 + + idx = pd.IndexSlice + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], :] *= 2 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], "price"] *= 2 + tm.assert_frame_equal(df, expected) + + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + ) + df["d"] = np.nan + arr = np.array([0.0, 1.0]) + + df.loc[4, "d"] = arr + tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) + + def test_multiindex_assignment_single_dtype(self, using_copy_on_write): + # GH3777 part 2b + # single dtype + arr = np.array([0.0, 1.0]) + + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + dtype=np.int64, + ) + view = df["c"].iloc[:2].values + + # arr can be losslessly cast to int, so this setitem is inplace + df.loc[4, "c"] = arr + exp = Series(arr, index=[8, 10], name="c", dtype="int64") + result = df.loc[4, "c"] + tm.assert_series_equal(result, exp) + + # extra check for inplace-ness + if not using_copy_on_write: + tm.assert_numpy_array_equal(view, exp.values) + + # arr + 0.5 cannot be cast losslessly to int, so we upcast + df.loc[4, "c"] = arr + 0.5 + result = df.loc[4, "c"] + exp = exp + 0.5 + tm.assert_series_equal(result, exp) + + # scalar ok + df.loc[4, "c"] = 10 + exp = Series(10, index=[8, 10], name="c", dtype="float64") + tm.assert_series_equal(df.loc[4, "c"], exp) + + # invalid assignments + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[4, "c"] = [0, 1, 2, 3] + + with pytest.raises(ValueError, match=msg): + df.loc[4, "c"] = [0] + + # But with a length-1 listlike column indexer this behaves like + # `df.loc[4, "c"] = 0 + df.loc[4, ["c"]] = [0] + assert (df.loc[4, "c"] == 0).all() + + def test_groupby_example(self): + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + + df = DataFrame( + np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + dtype=np.int64, + columns=col_names, + ) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df["new_col"] = np.nan + + # we are actually operating on a copy here + # but in this case, that's ok + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.loc[name, "new_col"] = new_vals + + def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + + s[2000, 3] = np.nan + assert isna(s.values[42:65]).all() + assert notna(s.values[:42]).all() + assert notna(s.values[65:]).all() + + s[2000, 3, 10] = np.nan + assert isna(s.iloc[49]) + + with pytest.raises(KeyError, match="49"): + # GH#33355 dont fall-back to positional when leading level is int + s[49] + + def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T.copy() + values = df.values + + result = df[df > 0] + expected = df.where(df > 0) + tm.assert_frame_equal(result, expected) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + with pytest.raises(TypeError, match="boolean values only"): + df[df * 0] = 2 + + def test_frame_getitem_setitem_multislice(self): + levels = [["t1", "t2"], ["a", "b", "c"]] + codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] + midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"]) + df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) + + result = df.loc[:, "value"] + tm.assert_series_equal(df["value"], result) + + result = df.loc[df.index[1:3], "value"] + tm.assert_series_equal(df["value"][1:3], result) + + result = df.loc[:, :] + tm.assert_frame_equal(df, result) + + result = df + df.loc[:, "value"] = 10 + result["value"] = 10 + tm.assert_frame_equal(df, result) + + df.loc[:, :] = 10 + tm.assert_frame_equal(df, result) + + def test_frame_setitem_multi_column(self): + df = DataFrame( + np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]] + ) + + cp = df.copy() + cp["a"] = cp["b"] + tm.assert_frame_equal(cp["a"], cp["b"]) + + # set with ndarray + cp = df.copy() + cp["a"] = cp["b"].values + tm.assert_frame_equal(cp["a"], cp["b"]) + + def test_frame_setitem_multi_column2(self): + + # --------------------------------------- + # GH#1803 + columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) + df = DataFrame(index=[1, 3, 5], columns=columns) + + # Works, but adds a column instead of updating the two existing ones + df["A"] = 0.0 # Doesn't work + assert (df["A"].values == 0).all() + + # it broadcasts + df["B", "1"] = [1, 2, 3] + df["A"] = df["B", "1"] + + sliced_a1 = df["A", "1"] + sliced_a2 = df["A", "2"] + sliced_b1 = df["B", "1"] + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) + assert sliced_a1.name == ("A", "1") + assert sliced_a2.name == ("A", "2") + assert sliced_b1.name == ("B", "1") + + def test_loc_getitem_tuple_plus_columns( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH #1013 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd[:5] + + result = df.loc[(2000, 1, 6), ["A", "B", "C"]] + expected = df.loc[2000, 1, 6][["A", "B", "C"]] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_setitem_slice_integers(self, frame_or_series): + index = MultiIndex( + levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] + ) + + obj = DataFrame( + np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] + ) + obj = tm.get_obj(obj, frame_or_series) + + res = obj.loc[1:2] + exp = obj.reindex(obj.index[2:]) + tm.assert_equal(res, exp) + + obj.loc[1:2] = 7 + assert (obj.loc[1:2] == 7).values.all() + + def test_setitem_change_dtype(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + dft = frame.T + s = dft["foo", "two"] + dft["foo", "two"] = s > s.median() + tm.assert_series_equal(dft["foo", "two"], s > s.median()) + # assert isinstance(dft._data.blocks[1].items, MultiIndex) + + reindexed = dft.reindex(columns=[("foo", "two")]) + tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) + + def test_set_column_scalar_with_loc( + self, multiindex_dataframe_random_data, using_copy_on_write + ): + frame = multiindex_dataframe_random_data + subset = frame.index[[1, 4, 5]] + + frame.loc[subset] = 99 + assert (frame.loc[subset].values == 99).all() + + frame_original = frame.copy() + col = frame["B"] + col[subset] = 97 + if using_copy_on_write: + # chained setitem doesn't work with CoW + tm.assert_frame_equal(frame, frame_original) + else: + assert (frame.loc[subset, "B"] == 97).all() + + def test_nonunique_assignment_1750(self): + df = DataFrame( + [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD") + ) + + df = df.set_index(["A", "B"]) + mi = MultiIndex.from_tuples([(1, 1)]) + + df.loc[mi, "C"] = "_" + + assert (df.xs((1, 1))["C"] == "_").all() + + def test_astype_assignment_with_dups(self): + + # GH 4686 + # assignment with dups that has a dtype change + cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")]) + df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) + index = df.index.copy() + + df["A"] = df["A"].astype(np.float64) + tm.assert_index_equal(df.index, index) + + def test_setitem_nonmonotonic(self): + # https://github.com/pandas-dev/pandas/issues/31449 + index = MultiIndex.from_tuples( + [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"] + ) + df = DataFrame(data=[0, 1, 2], index=index, columns=["e"]) + df.loc["a", "e"] = np.arange(99, 101, dtype="int64") + expected = DataFrame({"e": [99, 1, 100]}, index=index) + tm.assert_frame_equal(df, expected) + + +class TestSetitemWithExpansionMultiIndex: + def test_setitem_new_column_mixed_depth(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.copy() + expected = df.copy() + result["b"] = [1, 2, 3, 4] + expected["b", "", ""] = [1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_setitem_new_column_all_na(self): + # GH#1534 + mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) + df["new"] = s + assert df["new"].isna().all() + + +@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values +# is not a view +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): + # this works because we are modifying the underlying array + # really a no-no + df = multiindex_dataframe_random_data.T + df["foo"].values[:] = 0 + assert (df["foo"].values == 0).all() + + +def test_frame_setitem_copy_raises( + multiindex_dataframe_random_data, using_copy_on_write +): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T + if using_copy_on_write: + # TODO(CoW) it would be nice if this could still warn/raise + df["foo"]["one"] = 2 + else: + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + +def test_frame_setitem_copy_no_write( + multiindex_dataframe_random_data, using_copy_on_write +): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() + if using_copy_on_write: + df["foo"]["one"] = 2 + else: + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + result = df + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py new file mode 100644 index 00000000..91ea1f7c --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -0,0 +1,805 @@ +import numpy as np +import pytest + +from pandas.errors import UnsortedIndexError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.tests.indexing.common import _mklbl + + +class TestMultiIndexSlicers: + def test_per_axis_per_level_getitem(self): + + # GH6134 + # example test case + ix = MultiIndex.from_product( + [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)] + ) + df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) + + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") + and (c == "C1" or c == "C2" or c == "C3") + ] + ] + result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :] + tm.assert_frame_equal(result, expected) + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df = df.sort_index(axis=0).sort_index(axis=1) + + # identity + result = df.loc[(slice(None), slice(None)), :] + tm.assert_frame_equal(result, df) + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + result = df.loc[:, (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + + # index + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # columns + result = df.loc[:, (slice(None), ["foo"])] + expected = df.iloc[:, [1, 3]] + tm.assert_frame_equal(result, expected) + + # both + result = df.loc[(slice(None), 1), (slice(None), ["foo"])] + expected = df.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc["A", "a"] + expected = DataFrame( + {"bar": [1, 5, 9], "foo": [0, 4, 8]}, + index=Index([1, 2, 3], name="two"), + columns=Index(["bar", "foo"], name="lvl1"), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # multi-level series + s = Series(np.arange(len(ix.to_numpy())), index=ix) + result = s.loc["A1":"A3", :, ["C1", "C3"]] + expected = s.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in s.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_series_equal(result, expected) + + # boolean indexers + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + expected = df.iloc[[2, 3]] + tm.assert_frame_equal(result, expected) + + msg = ( + "cannot index with a boolean indexer " + "that is not the same length as the index" + ) + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), np.array([True, False])), :] + + with pytest.raises(KeyError, match=r"\[1\] not in index"): + # slice(None) is on the index, [1] is on the columns, but 1 is + # not in the columns, so we raise + # This used to treat [1] as positional GH#16396 + df.loc[slice(None), [1]] + + # not lexsorted + assert df.index._lexsort_depth == 2 + df = df.sort_index(level=1, axis=0) + assert df.index._lexsort_depth == 0 + + msg = ( + "MultiIndex slicing requires the index to be " + r"lexsorted: slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc[(slice(None), slice("bar")), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) + + def test_multiindex_slicers_non_unique(self): + + # GH 7106 + # non-unique mi index support + df = ( + DataFrame( + { + "A": ["foo", "foo", "foo", "foo"], + "B": ["a", "a", "a", "a"], + "C": [1, 2, 1, 3], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + tm.assert_frame_equal(result, expected) + + # this is equivalent of an xs expression + result = df.xs(1, level=2, drop_level=False) + tm.assert_frame_equal(result, expected) + + df = ( + DataFrame( + { + "A": ["foo", "foo", "foo", "foo"], + "B": ["a", "a", "a", "a"], + "C": [1, 2, 1, 2], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + assert not result.index.is_unique + tm.assert_frame_equal(result, expected) + + # GH12896 + # numpy-implementation dependent bug + ints = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 12, + 13, + 14, + 14, + 16, + 17, + 18, + 19, + 200000, + 200000, + ] + n = len(ints) + idx = MultiIndex.from_arrays([["a"] * n, ints]) + result = Series([1] * n, index=idx) + result = result.sort_index() + result = result.loc[(slice(None), slice(100000))] + expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() + tm.assert_series_equal(result, expected) + + def test_multiindex_slicers_datetimelike(self): + + # GH 7429 + # buggy/inconsistent behavior when slicing with datetime-like + import datetime + + dates = [ + datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i) + for i in range(6) + ] + freq = [1, 2] + index = MultiIndex.from_product([dates, freq], names=["date", "frequency"]) + + df = DataFrame( + np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4), + index=index, + columns=list("ABCD"), + ) + + # multi-axis slicing + idx = pd.IndexSlice + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + slice(1, 1), + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + idx[ + Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12") + ], + idx[1:1], + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + 1, + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + # with strings + result = df.loc[ + (slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + (idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"] + ] + tm.assert_frame_equal(result, expected) + + def test_multiindex_slicers_edges(self): + # GH 8132 + # various edge cases + df = DataFrame( + { + "A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5, + "B": ["B0", "B0", "B1", "B1", "B2"] * 3, + "DATE": [ + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-09-03", + "2013-10-01", + "2013-07-09", + "2013-08-06", + "2013-09-03", + ], + "VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2], + } + ) + + df["DATE"] = pd.to_datetime(df["DATE"]) + df1 = df.set_index(["A", "B", "DATE"]) + df1 = df1.sort_index() + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice("A1")), :] + expected = df1.iloc[0:10] + tm.assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice("A2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None), slice("B1", "B2")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] + tm.assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice("A2"), slice("B0")), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice("B2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] + tm.assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + def test_per_axis_per_level_doc_examples(self): + + # test index maker + idx = pd.IndexSlice + + # from indexing.rst / advanced + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx[:, :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + # not sorted + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 1" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc["A1", ("a", slice("foo"))] + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal( + df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]] + ) + + df = df.sort_index(axis=1) + + # slicing + df.loc["A1", (slice(None), "foo")] + df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")] + + # setitem + df.loc(axis=0)[:, :, ["C1", "C3"]] = -10 + + def test_loc_axis_arguments(self): + + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = ( + DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + .sort_index() + .sort_index(axis=1) + ) + + # axis 0 + result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="index")[:, :, ["C1", "C3"]] + expected = df.loc[ + [ + ( + a, + b, + c, + d, + ) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + # axis 1 + result = df.loc(axis=1)[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="columns")[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + # invalid axis + for i in [-1, 2, "foo"]: + msg = f"No axis named {i} for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.loc(axis=i)[:, :, ["C1", "C3"]] + + def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): + + # GH29519 + df = DataFrame( + np.arange(27).reshape(3, 9), + columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), + ) + result = df.loc(axis=1)["a1":"a2"] + expected = df.iloc[:, :-3] + + tm.assert_frame_equal(result, expected) + + def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self): + + # GH29519 + df = DataFrame( + np.arange(27).reshape(3, 9), + columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), + ) + result = df.loc(axis=1)["a1"] + expected = df.iloc[:, :3] + expected.columns = ["b1", "b2", "b3"] + + tm.assert_frame_equal(result, expected) + + def test_loc_ax_single_level_indexer_simple_df(self): + + # GH29519 + # test single level indexing on single index column data frame + df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) + result = df.loc(axis=1)["a"] + expected = Series(np.array([0, 3, 6]), name="a") + tm.assert_series_equal(result, expected) + + def test_per_axis_per_level_setitem(self): + + # test index maker + idx = pd.IndexSlice + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df_orig = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) + + # identity + df = df_orig.copy() + df.loc[(slice(None), slice(None)), :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + # index + df = df_orig.copy() + df.loc[(slice(None), [1]), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, 1] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # columns + df = df_orig.copy() + df.loc[:, (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[:, [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # both + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, 1], idx[:, ["foo"]]] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc["A", "a"] = 100 + expected = df_orig.copy() + expected.iloc[0:3, 0:2] = 100 + tm.assert_frame_equal(df, expected) + + # setting with a list-like + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100, 100], [100, 100]], dtype="int64" + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # not enough values + df = df_orig.copy() + + msg = "setting an array element with a sequence." + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100], [100, 100]], dtype="int64" + ) + + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [100, 100, 100, 100], dtype="int64" + ) + + # with an alignable rhs + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = ( + df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5 + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[ + (slice(None), 1), (slice(None), ["foo"]) + ] + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy() + rhs.loc[:, ("c", "bah")] = 10 + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + def test_multiindex_label_slicing_with_negative_step(self): + ser = Series( + np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)]) + ) + SLC = pd.IndexSlice + + tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1]) + + tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1]) + tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0]) + + tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1]) + tm.assert_indexing_slices_equivalent( + ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1] + ) + + def test_multiindex_slice_first_level(self): + # GH 12697 + freq = ["a", "b", "c", "d"] + idx = MultiIndex.from_product([freq, np.arange(500)]) + df = DataFrame(list(range(2000)), index=idx, columns=["Test"]) + df_slice = df.loc[pd.IndexSlice[:, 30:70], :] + result = df_slice.loc["a"] + expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71)) + tm.assert_frame_equal(result, expected) + result = df_slice.loc["d"] + expected = DataFrame( + list(range(1530, 1571)), columns=["Test"], index=range(30, 71) + ) + tm.assert_frame_equal(result, expected) + + def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + result = s[5:] + expected = s.reindex(s.index[5:]) + tm.assert_series_equal(result, expected) + + exp = ymd["A"].copy() + s[5:] = 0 + exp.values[5:] = 0 + tm.assert_numpy_array_equal(s.values, exp.values) + + result = ymd[5:] + expected = ymd.reindex(s.index[5:]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, loc, iloc", + [ + # dtype = int, step = -1 + ("int", slice(None, None, -1), slice(None, None, -1)), + ("int", slice(3, None, -1), slice(3, None, -1)), + ("int", slice(None, 1, -1), slice(None, 0, -1)), + ("int", slice(3, 1, -1), slice(3, 0, -1)), + # dtype = int, step = -2 + ("int", slice(None, None, -2), slice(None, None, -2)), + ("int", slice(3, None, -2), slice(3, None, -2)), + ("int", slice(None, 1, -2), slice(None, 0, -2)), + ("int", slice(3, 1, -2), slice(3, 0, -2)), + # dtype = str, step = -1 + ("str", slice(None, None, -1), slice(None, None, -1)), + ("str", slice("d", None, -1), slice(3, None, -1)), + ("str", slice(None, "b", -1), slice(None, 0, -1)), + ("str", slice("d", "b", -1), slice(3, 0, -1)), + # dtype = str, step = -2 + ("str", slice(None, None, -2), slice(None, None, -2)), + ("str", slice("d", None, -2), slice(3, None, -2)), + ("str", slice(None, "b", -2), slice(None, 0, -2)), + ("str", slice("d", "b", -2), slice(3, 0, -2)), + ], + ) + def test_loc_slice_negative_stepsize(self, dtype, loc, iloc): + # GH#38071 + labels = { + "str": list("abcde"), + "int": range(5), + }[dtype] + + mi = MultiIndex.from_arrays([labels] * 2) + df = DataFrame(1.0, index=mi, columns=["A"]) + + SLC = pd.IndexSlice + + expected = df.iloc[iloc, :] + result_get_loc = df.loc[SLC[loc], :] + result_get_locs_level_0 = df.loc[SLC[loc, :], :] + result_get_locs_level_1 = df.loc[SLC[:, loc], :] + + tm.assert_frame_equal(result_get_loc, expected) + tm.assert_frame_equal(result_get_locs_level_0, expected) + tm.assert_frame_equal(result_get_locs_level_1, expected) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py new file mode 100644 index 00000000..2214aaa9 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -0,0 +1,127 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestMultiIndexSorted: + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame( + [[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"] + ) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"]) + xp = Series(["x"], index=xp_idx, name="data") + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, : np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("key", [None, lambda x: x]) + def test_frame_getitem_not_sorted2(self, key): + # 13431 + df = DataFrame( + { + "col1": ["b", "d", "b", "a"], + "col2": [3, 1, 1, 2], + "data": ["one", "two", "three", "four"], + } + ) + + df2 = df.set_index(["col1", "col2"]) + df2_original = df2.copy() + + with tm.assert_produces_warning(FutureWarning): + return_value = df2.index.set_levels( + ["b", "d", "a"], level="col1", inplace=True + ) + assert return_value is None + with tm.assert_produces_warning(FutureWarning): + return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True) + assert return_value is None + assert not df2.index.is_monotonic_increasing + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index(key=key) + assert expected.index.is_monotonic_increasing + + result = df2.sort_index(level=0, key=key) + assert result.index.is_monotonic_increasing + tm.assert_frame_equal(result, expected) + + def test_sort_values_key(self): + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + index = index.sort_values( # sort by third letter + key=lambda x: x.map(lambda entry: entry[2]) + ) + result = DataFrame(range(8), index=index) + + arrays = [ + ["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + expected = DataFrame(range(8), index=index) + + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T + df["foo", "four"] = "foo" + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df["foo"] + result2 = df.loc[:, "foo"] + expected = df.reindex(columns=df.columns[arrays[0] == "foo"]) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + df = df.T + result = df.xs("foo") + result2 = df.loc["foo"] + expected = df.reindex(df.index[arrays[0] == "foo"]) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_series_getitem_not_sorted(self): + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(np.random.randn(8), index=index) + + arrays = [np.array(x) for x in zip(*index.values)] + + result = s["qux"] + result2 = s.loc["qux"] + expected = s[arrays[0] == "qux"] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py new file mode 100644 index 00000000..adbc0e2f --- /dev/null +++ b/pandas/tests/indexing/test_at.py @@ -0,0 +1,236 @@ +from datetime import ( + datetime, + timezone, +) + +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + CategoricalDtype, + CategoricalIndex, + DataFrame, + MultiIndex, + Series, + Timestamp, +) +import pandas._testing as tm + + +def test_at_timezone(): + # https://github.com/pandas-dev/pandas/issues/33544 + result = DataFrame({"foo": [datetime(2000, 1, 1)]}) + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + expected = DataFrame( + {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object + ) + tm.assert_frame_equal(result, expected) + + +def test_selection_methods_of_assigned_col(): + # GH 29282 + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0]) + df["c"] = df2["c"] + df.at[1, "c"] = 11 + result = df + expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]}) + tm.assert_frame_equal(result, expected) + result = df.at[1, "c"] + assert result == 11 + + result = df["c"] + expected = Series([9, 11, 7], name="c") + tm.assert_series_equal(result, expected) + + result = df[["c"]] + expected = DataFrame({"c": [9, 11, 7]}) + tm.assert_frame_equal(result, expected) + + +class TestAtSetItem: + def test_at_setitem_item_cache_cleared(self): + # GH#22372 Note the multi-step construction is necessary to trigger + # the original bug. pandas/issues/22372#issuecomment-413345309 + df = DataFrame(index=[0]) + df["x"] = 1 + df["cost"] = 2 + + # accessing df["cost"] adds "cost" to the _item_cache + df["cost"] + + # This loc[[0]] lookup used to call _consolidate_inplace at the + # BlockManager level, which failed to clear the _item_cache + df.loc[[0]] + + df.at[0, "x"] = 4 + df.at[0, "cost"] = 789 + + expected = DataFrame({"x": [4], "cost": 789}, index=[0]) + tm.assert_frame_equal(df, expected) + + # And in particular, check that the _item_cache has updated correctly. + tm.assert_series_equal(df["cost"], expected["cost"]) + + def test_at_setitem_mixed_index_assignment(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + ser.at["a"] = 11 + assert ser.iat[0] == 11 + ser.at[1] = 22 + assert ser.iat[3] == 22 + + def test_at_setitem_categorical_missing(self): + df = DataFrame( + index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"]) + ) + df.at[1, 1] = "foo" + + expected = DataFrame( + [ + [np.nan, np.nan, np.nan], + [np.nan, "foo", np.nan], + [np.nan, np.nan, np.nan], + ], + dtype=CategoricalDtype(["foo", "bar"]), + ) + + tm.assert_frame_equal(df, expected) + + def test_at_setitem_multiindex(self): + df = DataFrame( + np.zeros((3, 2), dtype="int64"), + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + df.at[0, "a"] = 10 + expected = DataFrame( + [[10, 10], [0, 0], [0, 0]], + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + tm.assert_frame_equal(df, expected) + + +class TestAtSetItemWithExpansion: + def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): + # GH#25506 + ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) + result = Series(ts) + result.at[1] = ts + expected = Series([ts, ts]) + tm.assert_series_equal(result, expected) + + +class TestAtWithDuplicates: + def test_at_with_duplicate_axes_requires_scalar_lookup(self): + # GH#33041 check that falling back to loc doesn't allow non-scalar + # args to slip in + + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + msg = "Invalid call for scalar access" + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] + + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] = 1 + + +class TestAtErrors: + # TODO: De-duplicate/parametrize + # test_at_series_raises_key_error2, test_at_frame_raises_key_error2 + + def test_at_series_raises_key_error(self, indexer_al): + # GH#31724 .at should match .loc + + ser = Series([1, 2, 3], index=[3, 2, 1]) + result = indexer_al(ser)[1] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + indexer_al(ser)["a"] + + def test_at_frame_raises_key_error(self, indexer_al): + # GH#31724 .at should match .loc + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + + result = indexer_al(df)[1, 0] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + indexer_al(df)["a", 0] + + with pytest.raises(KeyError, match="a"): + indexer_al(df)[1, "a"] + + def test_at_series_raises_key_error2(self, indexer_al): + # at should not fallback + # GH#7814 + # GH#31724 .at should match .loc + ser = Series([1, 2, 3], index=list("abc")) + result = indexer_al(ser)["a"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + indexer_al(ser)[0] + + def test_at_frame_raises_key_error2(self, indexer_al): + # GH#31724 .at should match .loc + df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) + result = indexer_al(df)["a", "A"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + indexer_al(df)["a", 0] + + def test_at_frame_multiple_columns(self): + # GH#48296 - at shouldn't modify multiple columns + df = DataFrame({"a": [1, 2], "b": [3, 4]}) + new_row = [6, 7] + with pytest.raises( + InvalidIndexError, + match=f"You can only assign a scalar value not a \\{type(new_row)}", + ): + df.at[5] = new_row + + def test_at_getitem_mixed_index_no_fallback(self): + # GH#19860 + ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + with pytest.raises(KeyError, match="^0$"): + ser.at[0] + with pytest.raises(KeyError, match="^4$"): + ser.at[4] + + def test_at_categorical_integers(self): + # CategoricalIndex with integer categories that don't happen to match + # the Categorical's codes + ci = CategoricalIndex([3, 4]) + + arr = np.arange(4).reshape(2, 2) + frame = DataFrame(arr, index=ci) + + for df in [frame, frame.T]: + for key in [0, 1]: + with pytest.raises(KeyError, match=str(key)): + df.at[key, key] + + def test_at_applied_for_rows(self): + # GH#48729 .at should raise InvalidIndexError when assigning rows + df = DataFrame(index=["a"], columns=["col1", "col2"]) + new_row = [123, 15] + with pytest.raises( + InvalidIndexError, + match=f"You can only assign a scalar value not a \\{type(new_row)}", + ): + df.at["a"] = new_row diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py new file mode 100644 index 00000000..b94323e9 --- /dev/null +++ b/pandas/tests/indexing/test_categorical.py @@ -0,0 +1,559 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Interval, + Series, + Timedelta, + Timestamp, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"), + ) + + +@pytest.fixture +def df2(): + return DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), + ) + + +class TestCategoricalIndex: + def test_loc_scalar(self, df): + dtype = CDT(list("cab")) + result = df.loc["a"] + bidx = Series(list("aaa"), name="B").astype(dtype) + assert bidx.dtype == dtype + + expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx)) + tm.assert_frame_equal(result, expected) + + df = df.copy() + df.loc["a"] = 20 + bidx2 = Series(list("aabbca"), name="B").astype(dtype) + assert bidx2.dtype == dtype + expected = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20], + }, + index=Index(bidx2), + ) + tm.assert_frame_equal(df, expected) + + # value not in the categories + with pytest.raises(KeyError, match=r"^'d'$"): + df.loc["d"] + + df2 = df.copy() + expected = df2.copy() + expected.index = expected.index.astype(object) + expected.loc["d"] = 10 + df2.loc["d"] = 10 + tm.assert_frame_equal(df2, expected) + + def test_loc_setitem_with_expansion_non_category(self, df): + # Setting-with-expansion with a new key "d" that is not among caegories + df.loc["a"] = 20 + + # Setting a new row on an existing column + df3 = df.copy() + df3.loc["d", "A"] = 10 + bidx3 = Index(list("aabbcad"), name="B") + expected3 = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20, 10.0], + }, + index=Index(bidx3), + ) + tm.assert_frame_equal(df3, expected3) + + # Settig a new row _and_ new column + df4 = df.copy() + df4.loc["d", "C"] = 10 + expected3 = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20, np.nan], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10], + }, + index=Index(bidx3), + ) + tm.assert_frame_equal(df4, expected3) + + def test_loc_getitem_scalar_non_category(self, df): + with pytest.raises(KeyError, match="^1$"): + df.loc[1] + + def test_slicing(self): + cat = Series(Categorical([1, 2, 3, 4])) + reverse = cat[::-1] + exp = np.array([4, 3, 2, 1], dtype=np.int64) + tm.assert_numpy_array_equal(reverse.__array__(), exp) + + df = DataFrame({"value": (np.arange(100) + 1).astype("int64")}) + df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) + + expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10) + result = df.iloc[10] + tm.assert_series_equal(result, expected) + + expected = DataFrame( + {"value": np.arange(11, 21).astype("int64")}, + index=np.arange(10, 20).astype("int64"), + ) + expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) + result = df.iloc[10:20] + tm.assert_frame_equal(result, expected) + + expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8) + result = df.loc[8] + tm.assert_series_equal(result, expected) + + def test_slicing_and_getting_ops(self): + + # systematically test the slicing operations: + # for all slicing ops: + # - returning a dataframe + # - returning a column + # - returning a row + # - returning a single value + + cats = Categorical( + ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 3, 4, 5, 6, 7] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = Index(["j", "k"]) + values2 = [3, 4] + + # 2:4,: | "j":"k",: + exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + # :,"cats" | :,0 + exp_col = Series(cats, index=idx, name="cats") + + # "j",: | 2,: + exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j") + + # "j","cats | 2,0 + exp_val = "b" + + # iloc + # frame + res_df = df.iloc[2:4, :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + # row + res_row = df.iloc[2, :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + # single value + res_val = df.iloc[2, 0] + assert res_val == exp_val + + # loc + # frame + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + # single value + res_val = df.loc["j", "cats"] + assert res_val == exp_val + + # single value + res_val = df.loc["j", df.columns[0]] + assert res_val == exp_val + + # iat + res_val = df.iat[2, 0] + assert res_val == exp_val + + # at + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # fancy indexing + exp_fancy = df.iloc[[2]] + + res_fancy = df[df["cats"] == "b"] + tm.assert_frame_equal(res_fancy, exp_fancy) + res_fancy = df[df["values"] == 3] + tm.assert_frame_equal(res_fancy, exp_fancy) + + # get_value + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # i : int, slice, or sequence of integers + res_row = df.iloc[2] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + res_df = df.iloc[slice(2, 4)] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_df = df.iloc[[2, 3]] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col.dtype) + + res_df = df.iloc[:, slice(0, 2)] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"].dtype) + + res_df = df.iloc[:, [0, 1]] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"].dtype) + + def test_slicing_doc_examples(self): + + # GH 7918 + cats = Categorical( + ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 2, 2, 3, 4, 5] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + result = df.iloc[2:4, :] + expected = DataFrame( + { + "cats": Categorical(["b", "b"], categories=["a", "b", "c"]), + "values": [2, 2], + }, + index=["j", "k"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[2:4, :].dtypes + expected = Series(["category", "int64"], ["cats", "values"]) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", "cats"] + expected = Series( + Categorical(["a", "b", "b"], categories=["a", "b", "c"]), + index=["h", "i", "j"], + name="cats", + ) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", df.columns[0:1]] + expected = DataFrame( + {"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])}, + index=["h", "i", "j"], + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_listlike_labels(self, df): + # list of labels + result = df.loc[["c", "a"]] + expected = df.iloc[[4, 0, 1, 5]] + tm.assert_frame_equal(result, expected, check_index_type=True) + + def test_loc_getitem_listlike_unused_category(self, df2): + # GH#37901 a label that is in index.categories but not in index + # listlike containing an element in the categories but not in the values + with pytest.raises(KeyError, match=re.escape("['e'] not in index")): + df2.loc[["a", "b", "e"]] + + def test_loc_getitem_label_unused_category(self, df2): + # element in the categories but not in the values + with pytest.raises(KeyError, match=r"^'e'$"): + df2.loc["e"] + + def test_loc_getitem_non_category(self, df2): + # not all labels in the categories + with pytest.raises(KeyError, match=re.escape("['d'] not in index")): + df2.loc[["a", "d"]] + + def test_loc_setitem_expansion_label_unused_category(self, df2): + # assigning with a label that is in the categories but not in the index + df = df2.copy() + df.loc["e"] = 20 + result = df.loc[["a", "b", "e"]] + exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") + expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index) + tm.assert_frame_equal(result, expected) + + def test_loc_listlike_dtypes(self): + # GH 11586 + + # unique categories and codes + index = CategoricalIndex(["a", "b", "c"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp_index = CategoricalIndex(["a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + + exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_listlike_dtypes_duplicated_categories_and_codes(self): + # duplicated categories and codes + index = CategoricalIndex(["a", "b", "a"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"]) + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"]), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_listlike_dtypes_unused_category(self): + # contains unused category + index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) + + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [5, 7, 6]}, + index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with pytest.raises(KeyError, match=re.escape("['x'] not in index")): + df.loc[["a", "x"]] + + def test_loc_getitem_listlike_unused_category_raises_keyerror(self): + # key that is an *unused* category raises + index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) + + with pytest.raises(KeyError, match="e"): + # For comparison, check the scalar behavior + df.loc["e"] + + with pytest.raises(KeyError, match=re.escape("['e'] not in index")): + df.loc[["a", "e"]] + + def test_ix_categorical_index(self): + # GH 12531 + df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + expect = Series(df.loc["A", :], index=cdf.columns, name="A") + tm.assert_series_equal(cdf.loc["A", :], expect) + + expect = Series(df.loc[:, "X"], index=cdf.index, name="X") + tm.assert_series_equal(cdf.loc[:, "X"], expect) + + exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"]) + expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"]) + expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + def test_ix_categorical_index_non_unique(self): + + # non-unique + df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + exp_index = CategoricalIndex(list("AA"), categories=["A", "B"]) + expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc["A", :], expect) + + exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"]) + expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, "X"], expect) + + expect = DataFrame( + df.loc[["A", "B"], :], + columns=cdf.columns, + index=CategoricalIndex(list("AAB")), + ) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + expect = DataFrame( + df.loc[:, ["X", "Y"]], + index=cdf.index, + columns=CategoricalIndex(list("XXY")), + ) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + def test_loc_slice(self, df): + # GH9748 + msg = ( + "cannot do slice indexing on CategoricalIndex with these " + r"indexers \[1\] of type int" + ) + with pytest.raises(TypeError, match=msg): + df.loc[1:5] + + result = df.loc["b":"c"] + expected = df.iloc[[2, 3, 4]] + tm.assert_frame_equal(result, expected) + + def test_loc_and_at_with_categorical_index(self): + # GH 20629 + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"]) + ) + + s = df[0] + assert s.loc["A"] == 1 + assert s.at["A"] == 1 + + assert df.loc["B", 1] == 4 + assert df.at["B", 1] == 4 + + @pytest.mark.parametrize( + "idx_values", + [ + # python types + [1, 2, 3], + [-1, -2, -3], + [1.5, 2.5, 3.5], + [-1.5, -2.5, -3.5], + # numpy int/uint + *(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES), + # numpy floats + *(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES), + # numpy object + np.array([1, "b", 3.5], dtype=object), + # pandas scalars + [Interval(1, 4), Interval(4, 6), Interval(6, 9)], + [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], + [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], + # pandas Integer arrays + *(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES), + # other pandas arrays + pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, + pd.date_range("2019-01-01", periods=3).array, + pd.timedelta_range(start="1d", periods=3).array, + ], + ) + def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): + # GH-17569 + cat_idx = CategoricalIndex(idx_values, ordered=ordered) + df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) + sl = slice(idx_values[0], idx_values[1]) + + # scalar selection + result = df.loc[idx_values[0]] + expected = Series(["foo"], index=["A"], name=idx_values[0]) + tm.assert_series_equal(result, expected) + + # list selection + result = df.loc[idx_values[:2]] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # slice selection + result = df.loc[sl] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # scalar assignment + result = df.copy() + result.loc[idx_values[0]] = "qux" + expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # list assignment + result = df.copy() + result.loc[idx_values[:2], "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # slice assignment + result = df.copy() + result.loc[sl, "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + def test_getitem_categorical_with_nan(self): + # GH#41933 + ci = CategoricalIndex(["A", "B", np.nan]) + + ser = Series(range(3), index=ci) + + assert ser[np.nan] == 2 + assert ser.loc[np.nan] == 2 + + df = DataFrame(ser) + assert df.loc[np.nan, 0] == 2 + assert df.loc[np.nan][0] == 2 diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py new file mode 100644 index 00000000..81914e1b --- /dev/null +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -0,0 +1,595 @@ +from string import ascii_letters as letters + +import numpy as np +import pytest + +from pandas.errors import ( + SettingWithCopyError, + SettingWithCopyWarning, +) +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + option_context, +) +import pandas._testing as tm + +msg = "A value is trying to be set on a copy of a slice from a DataFrame" + + +def random_text(nobs=100): + # Construct a DataFrame where each row is a random slice from 'letters' + idxs = np.random.randint(len(letters), size=(nobs, 2)) + idxs.sort(axis=1) + strings = [letters[x[0] : x[1]] for x in idxs] + + return DataFrame(strings, columns=["letters"]) + + +class TestCaching: + def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): + + # this is chained assignment, but will 'work' + with option_context("chained_assignment", None): + + # #3970 + df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5}) + + # Creates a second float block + df["cc"] = 0.0 + + # caches a reference to the 'bb' series + df["bb"] + + # repr machinery triggers consolidation + repr(df) + + # Assignment to wrong series + df["bb"].iloc[0] = 0.17 + df._clear_item_cache() + if not using_copy_on_write: + tm.assert_almost_equal(df["bb"][0], 0.17) + else: + # with ArrayManager, parent is not mutated with chained assignment + tm.assert_almost_equal(df["bb"][0], 2.2) + + @pytest.mark.parametrize("do_ref", [True, False]) + def test_setitem_cache_updating(self, do_ref): + # GH 5424 + cont = ["one", "two", "three", "four", "five", "six", "seven"] + + df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) + + # ref the cache + if do_ref: + df.loc[0, "c"] + + # set it + df.loc[7, "c"] = 1 + + assert df.loc[0, "c"] == 0.0 + assert df.loc[7, "c"] == 1.0 + + def test_setitem_cache_updating_slices(self, using_copy_on_write): + # GH 7084 + # not updating cache on series setting with slices + expected = DataFrame( + {"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014") + ) + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]}) + + # loop through df to update out + six = Timestamp("5/7/2014") + eix = Timestamp("5/9/2014") + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + # try via a chain indexing + # this actually works + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + out_original = out.copy() + for ix, row in df.iterrows(): + v = out[row["C"]][six:eix] + row["D"] + out[row["C"]][six:eix] = v + + if not using_copy_on_write: + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + else: + tm.assert_frame_equal(out, out_original) + tm.assert_series_equal(out["A"], out_original["A"]) + + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] += row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + def test_altering_series_clears_parent_cache(self): + # GH #33675 + df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) + ser = df["A"] + + assert "A" in df._item_cache + + # Adding a new entry to ser swaps in a new array, so "A" needs to + # be removed from df._item_cache + ser["c"] = 5 + assert len(ser) == 3 + assert "A" not in df._item_cache + assert df["A"] is not ser + assert len(df["A"]) == 2 + + +class TestChaining: + def test_setitem_chained_setfault(self, using_copy_on_write): + + # GH6026 + data = ["right", "left", "left", "left", "right", "left", "timeout"] + mdata = ["right", "left", "left", "left", "right", "left", "none"] + + df = DataFrame({"response": np.array(data)}) + mask = df.response == "timeout" + df.response[mask] = "none" + if using_copy_on_write: + tm.assert_frame_equal(df, DataFrame({"response": data})) + else: + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + recarray = np.rec.fromarrays([data], names=["response"]) + df = DataFrame(recarray) + mask = df.response == "timeout" + df.response[mask] = "none" + if using_copy_on_write: + tm.assert_frame_equal(df, DataFrame({"response": data})) + else: + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + df = DataFrame({"response": data, "response1": data}) + df_original = df.copy() + mask = df.response == "timeout" + df.response[mask] = "none" + if using_copy_on_write: + tm.assert_frame_equal(df, df_original) + else: + tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) + + # GH 6056 + expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df["A"].iloc[0] = np.nan + result = df.head() + if using_copy_on_write: + expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) + else: + expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) + df.A.iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment(self, using_copy_on_write): + + with option_context("chained_assignment", "raise"): + # work with the chain + expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) + df = DataFrame( + np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64" + ) + df_original = df.copy() + assert df._is_copy is None + + df["A"][0] = -5 + df["A"][1] = -6 + if using_copy_on_write: + tm.assert_frame_equal(df, df_original) + else: + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_raises( + self, using_array_manager, using_copy_on_write + ): + + # test with the chaining + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + df_original = df.copy() + assert df._is_copy is None + + if using_copy_on_write: + df["A"][0] = -5 + df["A"][1] = -6 + tm.assert_frame_equal(df, df_original) + elif not using_array_manager: + with pytest.raises(SettingWithCopyError, match=msg): + df["A"][0] = -5 + + with pytest.raises(SettingWithCopyError, match=msg): + df["A"][1] = np.nan + + assert df["A"]._is_copy is None + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = -5 + df["A"][1] = -6 + expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) + expected["B"] = expected["B"].astype("float64") + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_fails(self, using_copy_on_write): + + # Using a copy (the chain), fails + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + + if using_copy_on_write: + # TODO(CoW) can we still warn here? + df.loc[0]["A"] = -5 + else: + with pytest.raises(SettingWithCopyError, match=msg): + df.loc[0]["A"] = -5 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_doc_example(self, using_copy_on_write): + + # Doc example + df = DataFrame( + { + "a": ["one", "one", "two", "three", "two", "one", "six"], + "c": Series(range(7), dtype="int64"), + } + ) + assert df._is_copy is None + + if using_copy_on_write: + # TODO(CoW) can we still warn here? + indexer = df.a.str.startswith("o") + df[indexer]["c"] = 42 + else: + with pytest.raises(SettingWithCopyError, match=msg): + indexer = df.a.str.startswith("o") + df[indexer]["c"] = 42 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_object_dtype( + self, using_array_manager, using_copy_on_write + ): + + expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + df_original = df.copy() + + if not using_copy_on_write: + with pytest.raises(SettingWithCopyError, match=msg): + df.loc[0]["A"] = 111 + + if using_copy_on_write: + # TODO(CoW) can we still warn here? + df["A"][0] = 111 + tm.assert_frame_equal(df, df_original) + elif not using_array_manager: + with pytest.raises(SettingWithCopyError, match=msg): + df["A"][0] = 111 + + df.loc[0, "A"] = 111 + tm.assert_frame_equal(df, expected) + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = 111 + tm.assert_frame_equal(df, expected) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy_pickle(self): + + # gh-5475: Make sure that is_copy is picked up reconstruction + df = DataFrame({"A": [1, 2]}) + assert df._is_copy is None + + with tm.ensure_clean("__tmp__pickle") as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + df2["B"] = df2["A"] + df2["B"] = df2["A"] + + @pytest.mark.arm_slow + def test_detect_chained_assignment_setting_entire_column(self): + + # gh-5597: a spurious raise as we are setting the entire column here + + df = random_text(100000) + + # Always a copy + x = df.iloc[[0, 1, 2]] + assert x._is_copy is not None + + x = df.iloc[[0, 1, 2, 4]] + assert x._is_copy is not None + + # Explicitly copy + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer].copy() + + assert df._is_copy is None + df["letters"] = df["letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take(self): + + # Implicitly take + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer] + + assert df._is_copy is not None + df["letters"] = df["letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_implicit_take2(self, using_copy_on_write): + if using_copy_on_write: + pytest.skip("_is_copy is not always set for CoW") + # Implicitly take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + + df = df.loc[indexer] + assert df._is_copy is not None + df.loc[:, "letters"] = df["letters"].apply(str.lower) + + # Should be ok even though it's a copy! + assert df._is_copy is None + + df["letters"] = df["letters"].apply(str.lower) + assert df._is_copy is None + + @pytest.mark.arm_slow + def test_detect_chained_assignment_str(self): + + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_is_copy(self): + + # an identical take, so no copy + df = DataFrame({"a": [1]}).dropna() + assert df._is_copy is None + df["a"] += 1 + + @pytest.mark.arm_slow + def test_detect_chained_assignment_sorting(self): + + df = DataFrame(np.random.randn(10, 4)) + ser = df.iloc[:, 0].sort_values() + + tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(ser, df[0].sort_values()) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_false_positives(self): + + # see gh-6025: false positives + df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]}) + str(df) + + df["column1"] = df["column1"] + "b" + str(df) + + df = df[df["column2"] != 8] + str(df) + + df["column1"] = df["column1"] + "c" + str(df) + + @pytest.mark.arm_slow + def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): + + # from SO: + # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=["count"]) + df["group"] = "b" + df_original = df.copy() + + if using_copy_on_write: + # TODO(CoW) can we still warn here? + df.iloc[0:5]["group"] = "a" + tm.assert_frame_equal(df, df_original) + else: + with pytest.raises(SettingWithCopyError, match=msg): + df.iloc[0:5]["group"] = "a" + + @pytest.mark.arm_slow + def test_detect_chained_assignment_changing_dtype( + self, using_array_manager, using_copy_on_write + ): + + # Mixed type setting but same dtype & changing dtype + df = DataFrame( + { + "A": date_range("20130101", periods=5), + "B": np.random.randn(5), + "C": np.arange(5, dtype="int64"), + "D": ["a", "b", "c", "d", "e"], + } + ) + df_original = df.copy() + + if using_copy_on_write: + df.loc[2]["D"] = "foo" + df.loc[2]["C"] = "foo" + df["C"][2] = "foo" + tm.assert_frame_equal(df, df_original) + + if not using_copy_on_write: + with pytest.raises(SettingWithCopyError, match=msg): + df.loc[2]["D"] = "foo" + + with pytest.raises(SettingWithCopyError, match=msg): + df.loc[2]["C"] = "foo" + + if not using_array_manager: + with pytest.raises(SettingWithCopyError, match=msg): + df["C"][2] = "foo" + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter if it's + # changing the dtype or not + df["C"][2] = "foo" + assert df.loc[2, "C"] == "foo" + + def test_setting_with_copy_bug(self, using_copy_on_write): + + # operating on a copy + df = DataFrame( + {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + ) + df_original = df.copy() + mask = pd.isna(df.c) + + if using_copy_on_write: + df[["c"]][mask] = df[["b"]][mask] + tm.assert_frame_equal(df, df_original) + else: + with pytest.raises(SettingWithCopyError, match=msg): + df[["c"]][mask] = df[["b"]][mask] + + def test_setting_with_copy_bug_no_warning(self): + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) + df2 = df1[["x"]] + + # this should not raise + df2["y"] = ["g", "h", "i"] + + def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + if using_copy_on_write: + df.loc[0]["A"] = 111 + return + + with option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): + df.loc[0]["A"] = 111 + + with option_context("chained_assignment", "raise"): + with pytest.raises(SettingWithCopyError, match=msg): + df.loc[0]["A"] = 111 + + @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) + def test_detect_chained_assignment_warning_stacklevel( + self, rhs, using_copy_on_write + ): + # GH#42570 + df = DataFrame(np.arange(25).reshape(5, 5)) + df_original = df.copy() + chained = df.loc[:3] + with option_context("chained_assignment", "warn"): + if not using_copy_on_write: + with tm.assert_produces_warning(SettingWithCopyWarning) as t: + chained[2] = rhs + assert t[0].filename == __file__ + else: + # INFO(CoW) no warning, and original dataframe not changed + with tm.assert_produces_warning(None): + chained[2] = rhs + tm.assert_frame_equal(df, df_original) + + # TODO(ArrayManager) fast_xs with array-like scalars is not yet working + @td.skip_array_manager_not_yet_implemented + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + + df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]}) + expected = df["A"].iloc[2] + result = df.loc[2, "A"] + tm.assert_numpy_array_equal(result, expected) + result2 = df.iloc[2]["A"] + tm.assert_numpy_array_equal(result2, expected) + result3 = df["A"].loc[2] + tm.assert_numpy_array_equal(result3, expected) + result4 = df["A"].iloc[2] + tm.assert_numpy_array_equal(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df["A"] # cache series + df.loc["Hello Friend"] = df.iloc[0] + assert "Hello Friend" in df["A"].index + assert "Hello Friend" in df["B"].index + + def test_cache_updating2(self): + # 10264 + df = DataFrame( + np.zeros((5, 5), dtype="int64"), + columns=["a", "b", "c", "d", "e"], + index=range(5), + ) + df["f"] = 0 + df.f.values[3] = 1 + + df.f.values[3] = 2 + expected = DataFrame( + np.zeros((5, 6), dtype="int64"), + columns=["a", "b", "c", "d", "e", "f"], + index=range(5), + ) + expected.at[3, "f"] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name="f") + tm.assert_series_equal(df.f, expected) + + def test_iloc_setitem_chained_assignment(self, using_copy_on_write): + # GH#3970 + with option_context("chained_assignment", None): + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) + df["cc"] = 0.0 + + ck = [True] * len(df) + + df["bb"].iloc[0] = 0.13 + + # GH#3970 this lookup used to break the chained setting to 0.15 + df.iloc[ck] + + df["bb"].iloc[0] = 0.15 + if not using_copy_on_write: + assert df["bb"].iloc[0] == 0.15 + else: + assert df["bb"].iloc[0] == 2.2 + + def test_getitem_loc_assignment_slice_state(self): + # GH 13569 + df = DataFrame({"a": [10, 20, 30]}) + df["a"].loc[4] = 40 + tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) + tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py new file mode 100644 index 00000000..975a31b8 --- /dev/null +++ b/pandas/tests/indexing/test_check_indexer.py @@ -0,0 +1,105 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.indexers import check_array_indexer + + +@pytest.mark.parametrize( + "indexer, expected", + [ + # integer + ([1, 2], np.array([1, 2], dtype=np.intp)), + (np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)), + (pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)), + (pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)), + # boolean + ([True, False, True], np.array([True, False, True], dtype=np.bool_)), + (np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)), + ( + pd.array([True, False, True], dtype="boolean"), + np.array([True, False, True], dtype=np.bool_), + ), + # other + ([], np.array([], dtype=np.intp)), + ], +) +def test_valid_input(indexer, expected): + arr = np.array([1, 2, 3]) + result = check_array_indexer(arr, indexer) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")] +) +def test_boolean_na_returns_indexer(indexer): + # https://github.com/pandas-dev/pandas/issues/31503 + arr = np.array([1, 2, 3]) + + result = check_array_indexer(arr, indexer) + expected = np.array([True, False, False], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", + [ + [True, False], + pd.array([True, False], dtype="boolean"), + np.array([True, False], dtype=np.bool_), + ], +) +def test_bool_raise_length(indexer): + arr = np.array([1, 2, 3]) + + msg = "Boolean index has wrong length" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize( + "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")] +) +def test_int_raise_missing_values(indexer): + arr = np.array([1, 2, 3]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize( + "indexer", + [ + [0.0, 1.0], + np.array([1.0, 2.0], dtype="float64"), + np.array([True, False], dtype=object), + pd.Index([True, False], dtype=object), + ], +) +def test_raise_invalid_array_dtypes(indexer): + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +def test_raise_nullable_string_dtype(nullable_string_dtype): + indexer = pd.array(["a", "b"], dtype=nullable_string_dtype) + arr = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(arr, indexer) + + +@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)]) +def test_pass_through_non_array_likes(indexer): + arr = np.array([1, 2, 3]) + + result = check_array_indexer(arr, indexer) + assert result == indexer diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py new file mode 100644 index 00000000..2d54a9ba --- /dev/null +++ b/pandas/tests/indexing/test_coercion.py @@ -0,0 +1,945 @@ +from __future__ import annotations + +from datetime import ( + datetime, + timedelta, +) +import itertools + +import numpy as np +import pytest + +from pandas.compat import ( + IS64, + is_platform_windows, +) + +import pandas as pd +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + +############################################################### +# Index / Series common tests which may trigger dtype coercions +############################################################### + + +@pytest.fixture(autouse=True, scope="class") +def check_comprehensiveness(request): + # Iterate over combination of dtype, method and klass + # and ensure that each are contained within a collected test + cls = request.cls + combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) + + def has_test(combo): + klass, dtype, method = combo + cls_funcs = request.node.session.items + return any( + klass in x.name and dtype in x.name and method in x.name for x in cls_funcs + ) + + opts = request.config.option + if opts.lf or opts.keyword: + # If we are running with "last-failed" or -k foo, we expect to only + # run a subset of tests. + yield + + else: + + for combo in combos: + if not has_test(combo): + raise AssertionError( + f"test method is not defined: {cls.__name__}, {combo}" + ) + + yield + + +class CoercionBase: + + klasses = ["index", "series"] + dtypes = [ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "timedelta64", + "period", + ] + + @property + def method(self): + raise NotImplementedError(self) + + +class TestSetitemCoercion(CoercionBase): + + method = "setitem" + + # disable comprehensiveness tests, as most of these have been moved to + # tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses. + klasses: list[str] = [] + + def test_setitem_series_no_coercion_from_values_list(self): + # GH35865 - int casted to str when internally calling np.array(ser.values) + ser = pd.Series(["a", 1]) + ser[:] = list(ser.values) + + expected = pd.Series(["a", 1]) + + tm.assert_series_equal(ser, expected) + + def _assert_setitem_index_conversion( + self, original_series, loc_key, expected_index, expected_dtype + ): + """test index's coercion triggered by assign key""" + temp = original_series.copy() + warn = None + if isinstance(loc_key, int) and temp.index.dtype == np.float64: + # GH#33469 + warn = FutureWarning + with tm.assert_produces_warning(warn): + temp[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + temp = original_series.copy() + temp.loc[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + @pytest.mark.parametrize( + "val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)] + ) + def test_setitem_index_object(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], index=list("abcd")) + assert obj.index.dtype == object + + if exp_dtype is IndexError: + temp = obj.copy() + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(exp_dtype, match=msg): + temp[5] = 5 + else: + exp_index = pd.Index(list("abcd") + [val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)] + ) + def test_setitem_index_int64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4]) + assert obj.index.dtype == np.int64 + + exp_index = pd.Index([0, 1, 2, 3, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, IndexError), (5.1, np.float64), ("x", object)] + ) + def test_setitem_index_float64(self, val, exp_dtype, request): + obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) + assert obj.index.dtype == np.float64 + + if exp_dtype is IndexError: + # float + int -> int + temp = obj.copy() + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(exp_dtype, match=msg): + # GH#33469 + depr_msg = "Treating integers as positional" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + temp[5] = 5 + mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float") + request.node.add_marker(mark) + exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_series_period(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_datetime64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_datetime64tz(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_setitem_index_period(self): + raise NotImplementedError + + +class TestInsertIndexCoercion(CoercionBase): + + klasses = ["index"] + method = "insert" + + def _assert_insert_conversion(self, original, value, expected, expected_dtype): + """test coercion triggered by insert""" + target = original.copy() + res = target.insert(1, value) + tm.assert_index_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, object), + (1.1, 1.1, object), + (False, False, object), + ("x", "x", object), + ], + ) + def test_insert_index_object(self, insert, coerced_val, coerced_dtype): + obj = pd.Index(list("abcd")) + assert obj.dtype == object + + exp = pd.Index(["a", coerced_val, "b", "c", "d"]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, np.int64), + (1.1, 1.1, np.float64), + (False, False, object), # GH#36319 + ("x", "x", object), + ], + ) + def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): + obj = Int64Index([1, 2, 3, 4]) + assert obj.dtype == np.int64 + + exp = pd.Index([1, coerced_val, 2, 3, 4]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1.0, np.float64), + (1.1, 1.1, np.float64), + (False, False, object), # GH#36319 + ("x", "x", object), + ], + ) + def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): + obj = Float64Index([1.0, 2.0, 3.0, 4.0]) + assert obj.dtype == np.float64 + + exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + ], + ids=["datetime64", "datetime64tz"], + ) + @pytest.mark.parametrize( + "insert_value", + [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1], + ) + def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): + + obj = pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz + ) + assert obj.dtype == exp_dtype + + exp = pd.DatetimeIndex( + ["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"], + tz=fill_val.tz, + ) + self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) + + if fill_val.tz: + + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + # mismatched tz --> cast to object (could reasonably cast to common tz) + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = obj.insert(1, ts) + # once deprecation is enforced: + # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) + # assert expected.dtype == obj.dtype + expected = obj.astype(object).insert(1, ts) + tm.assert_index_equal(result, expected) + + else: + # mismatched tzawareness + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + result = obj.insert(1, ts) + expected = obj.astype(object).insert(1, ts) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + item = 1 + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected[1] == item + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + def test_insert_index_timedelta64(self): + obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) + assert obj.dtype == "timedelta64[ns]" + + # timedelta64 + timedelta64 => timedelta64 + exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"]) + self._assert_insert_conversion( + obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" + ) + + for item in [pd.Timestamp("2012-01-01"), 1]: + result = obj.insert(1, item) + expected = obj.astype(object).insert(1, item) + assert expected.dtype == object + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), + (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object), + (1, 1, object), + ("x", "x", object), + ], + ) + def test_insert_index_period(self, insert, coerced_val, coerced_dtype): + obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M") + assert obj.dtype == "period[M]" + + data = [ + pd.Period("2011-01", freq="M"), + coerced_val, + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + if isinstance(insert, pd.Period): + exp = pd.PeriodIndex(data, freq="M") + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + # string that can be parsed to appropriate PeriodDtype + self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype) + + else: + result = obj.insert(0, insert) + expected = obj.astype(object).insert(0, insert) + tm.assert_index_equal(result, expected) + + # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M" + # casts that string to Period[M], not clear that is desirable + if not isinstance(insert, pd.Timestamp): + # non-castable string + result = obj.insert(0, str(insert)) + expected = obj.astype(object).insert(0, str(insert)) + tm.assert_index_equal(result, expected) + + msg = r"Unexpected keyword arguments {'freq'}" + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning): + # passing keywords to pd.Index + pd.Index(data, freq="M") + + @pytest.mark.xfail(reason="Test not implemented") + def test_insert_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_insert_index_bool(self): + raise NotImplementedError + + +class TestWhereCoercion(CoercionBase): + + method = "where" + _cond = np.array([True, False, True, False]) + + def _assert_where_conversion( + self, original, cond, values, expected, expected_dtype + ): + """test coercion triggered by where""" + target = original.copy() + res = target.where(cond, values) + tm.assert_equal(res, expected) + assert res.dtype == expected_dtype + + def _construct_exp(self, obj, klass, fill_val, exp_dtype): + if fill_val is True: + values = klass([True, False, True, True]) + elif isinstance(fill_val, (datetime, np.datetime64)): + values = pd.date_range(fill_val, periods=4) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + + exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype) + return values, exp + + def _run_test(self, obj, fill_val, klass, exp_dtype): + cond = klass(self._cond) + + exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, object)], + ) + def test_where_object(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + obj = klass(list("abcd")) + assert obj.dtype == object + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_where_int64(self, index_or_series, fill_val, exp_dtype, request): + klass = index_or_series + + obj = klass([1, 2, 3, 4]) + assert obj.dtype == np.int64 + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.parametrize( + "fill_val, exp_dtype", + [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_where_float64(self, index_or_series, fill_val, exp_dtype, request): + klass = index_or_series + + obj = klass([1.1, 2.2, 3.3, 4.4]) + assert obj.dtype == np.float64 + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, object), + ], + ) + def test_where_complex128(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128) + assert obj.dtype == np.complex128 + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)], + ) + def test_where_series_bool(self, fill_val, exp_dtype): + klass = pd.Series # TODO: use index_or_series once we have Index[bool] + + obj = klass([True, False, True, False]) + assert obj.dtype == np.bool_ + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + ], + ids=["datetime64", "datetime64tz"], + ) + def test_where_datetime64(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + + obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None)) + assert obj.dtype == "datetime64[ns]" + + fv = fill_val + # do the check with each of the available datetime scalars + if exp_dtype == "datetime64[ns]": + for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + self._run_test(obj, scalar, klass, exp_dtype) + else: + for scalar in [fv, fv.to_pydatetime()]: + self._run_test(obj, fill_val, klass, exp_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_index_complex128(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_series_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_where_series_period(self): + raise NotImplementedError + + @pytest.mark.parametrize( + "value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")] + ) + def test_where_index_timedelta64(self, value): + tdi = pd.timedelta_range("1 Day", periods=4) + cond = np.array([True, False, False, True]) + + expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"]) + result = tdi.where(cond, value) + tm.assert_index_equal(result, expected) + + # wrong-dtyped NaT + dtnat = np.datetime64("NaT", "ns") + expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object) + assert expected[1] is dtnat + + result = tdi.where(cond, dtnat) + tm.assert_index_equal(result, expected) + + def test_where_index_period(self): + dti = pd.date_range("2016-01-01", periods=3, freq="QS") + pi = dti.to_period("Q") + + cond = np.array([False, True, False]) + + # Passing a valid scalar + value = pi[-1] + pi.freq * 10 + expected = pd.PeriodIndex([value, pi[1], value]) + result = pi.where(cond, value) + tm.assert_index_equal(result, expected) + + # Case passing ndarray[object] of Periods + other = np.asarray(pi + pi.freq * 10, dtype=object) + result = pi.where(cond, other) + expected = pd.PeriodIndex([other[0], pi[1], other[2]]) + tm.assert_index_equal(result, expected) + + # Passing a mismatched scalar -> casts to object + td = pd.Timedelta(days=4) + expected = pd.Index([td, pi[1], td], dtype=object) + result = pi.where(cond, td) + tm.assert_index_equal(result, expected) + + per = pd.Period("2020-04-21", "D") + expected = pd.Index([per, pi[1], per], dtype=object) + result = pi.where(cond, per) + tm.assert_index_equal(result, expected) + + +class TestFillnaSeriesCoercion(CoercionBase): + + # not indexing, but place here for consistency + + method = "fillna" + + @pytest.mark.xfail(reason="Test not implemented") + def test_has_comprehensive_tests(self): + raise NotImplementedError + + def _assert_fillna_conversion(self, original, value, expected, expected_dtype): + """test coercion triggered by fillna""" + target = original.copy() + res = target.fillna(value) + tm.assert_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "fill_val, fill_dtype", + [(1, object), (1.1, object), (1 + 1j, object), (True, object)], + ) + def test_fillna_object(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass(["a", np.nan, "c", "d"]) + assert obj.dtype == object + + exp = klass(["a", fill_val, "c", "d"]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + ) + def test_fillna_float64(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass([1.1, np.nan, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = klass([1.1, fill_val, 3.3, 4.4]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, object), + ], + ) + def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128) + assert obj.dtype == np.complex128 + + exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + (1, object), + ("x", object), + ], + ids=["datetime64", "datetime64tz", "object", "object"], + ) + def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass( + [ + pd.Timestamp("2011-01-01"), + pd.NaT, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + + exp = klass( + [ + pd.Timestamp("2011-01-01"), + fill_val, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01"), object), + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), object), + (1, object), + ("x", object), + ], + ) + def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + tz = "US/Eastern" + + obj = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.NaT, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + assert obj.dtype == "datetime64[ns, US/Eastern]" + + exp = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + fill_val, + # Once deprecation is enforced, this becomes: + # fill_val.tz_convert(tz) if getattr(fill_val, "tz", None) + # is not None else fill_val, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + warn = None + if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="mismatched timezone"): + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val", + [ + 1, + 1.1, + 1 + 1j, + True, + pd.Interval(1, 2, closed="left"), + pd.Timestamp("2012-01-01", tz="US/Eastern"), + pd.Timestamp("2012-01-01"), + pd.Timedelta(days=1), + pd.Period("2016-01-01", "D"), + ], + ) + def test_fillna_interval(self, index_or_series, fill_val): + ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan) + assert isinstance(ii.dtype, pd.IntervalDtype) + obj = index_or_series(ii) + + exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object) + + fill_dtype = object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_int64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_int64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_bool(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_series_timedelta64(self): + raise NotImplementedError + + @pytest.mark.parametrize( + "fill_val", + [ + 1, + 1.1, + 1 + 1j, + True, + pd.Interval(1, 2, closed="left"), + pd.Timestamp("2012-01-01", tz="US/Eastern"), + pd.Timestamp("2012-01-01"), + pd.Timedelta(days=1), + pd.Period("2016-01-01", "W"), + ], + ) + def test_fillna_series_period(self, index_or_series, fill_val): + + pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT) + assert isinstance(pi.dtype, pd.PeriodDtype) + obj = index_or_series(pi) + + exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object) + + fill_dtype = object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_timedelta64(self): + raise NotImplementedError + + @pytest.mark.xfail(reason="Test not implemented") + def test_fillna_index_period(self): + raise NotImplementedError + + +class TestReplaceSeriesCoercion(CoercionBase): + + klasses = ["series"] + method = "replace" + + rep: dict[str, list] = {} + rep["object"] = ["a", "b"] + rep["int64"] = [4, 5] + rep["float64"] = [1.1, 2.2] + rep["complex128"] = [1 + 1j, 2 + 2j] + rep["bool"] = [True, False] + rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")] + + for tz in ["UTC", "US/Eastern"]: + # to test tz => different tz replacement + key = f"datetime64[ns, {tz}]" + rep[key] = [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-03", tz=tz), + ] + + rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] + + @pytest.fixture(params=["dict", "series"]) + def how(self, request): + return request.param + + @pytest.fixture( + params=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ] + ) + def from_key(self, request): + return request.param + + @pytest.fixture( + params=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ], + ids=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "datetime64tz", + "timedelta64", + ], + ) + def to_key(self, request): + return request.param + + @pytest.fixture + def replacer(self, how, from_key, to_key): + """ + Object we will pass to `Series.replace` + """ + if how == "dict": + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == "series": + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + return replacer + + def test_replace_series(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xxx") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + if from_key.startswith("datetime") and to_key.startswith("datetime"): + # tested below + return + elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]: + # tested below + return + + result = obj.replace(replacer) + + if (from_key == "float64" and to_key in ("int64")) or ( + from_key == "complex128" and to_key in ("int64", "float64") + ): + + if not IS64 or is_platform_windows(): + pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}") + + # Expected: do not downcast by replacement + exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key) + + else: + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "to_key", + ["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"], + indirect=True, + ) + @pytest.mark.parametrize( + "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True + ) + def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + result = obj.replace(replacer) + + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "to_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + indirect=True, + ) + @pytest.mark.parametrize( + "from_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + indirect=True, + ) + def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + warn = None + rep_ser = pd.Series(replacer) + if ( + isinstance(obj.dtype, pd.DatetimeTZDtype) + and isinstance(rep_ser.dtype, pd.DatetimeTZDtype) + and obj.dtype != rep_ser.dtype + ): + # mismatched tz DatetimeArray behavior will change to cast + # for setitem-like methods with mismatched tzs GH#44940 + warn = FutureWarning + + msg = "explicitly cast to object" + with tm.assert_produces_warning(warn, match=msg): + result = obj.replace(replacer) + + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.xfail(reason="Test not implemented") + def test_replace_series_period(self): + raise NotImplementedError diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py new file mode 100644 index 00000000..8d498b59 --- /dev/null +++ b/pandas/tests/indexing/test_datetime.py @@ -0,0 +1,170 @@ +import pandas as pd +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_get_loc_naive_dti_aware_str_deprecated(self): + # GH#46903 + ts = Timestamp("20130101").value + dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)]) + ser = Series(range(100), index=dti) + + key = "2013-01-01 00:00:00.000000050+0000" + msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser[key] + assert res == 0 + + with tm.assert_produces_warning(FutureWarning, match=msg): + loc = dti.get_loc(key) + assert loc == 0 + + def test_indexing_with_datetime_tz(self): + + # GH#8260 + # support datetime64 with tz + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + df = DataFrame({"A": idx, "B": dr}) + df["C"] = idx + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT + + expected = Series( + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], + index=list("ABC"), + dtype="object", + name=1, + ) + + # indexing + result = df.iloc[1] + tm.assert_series_equal(result, expected) + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_indexing_fast_xs(self): + # indexing - fast_xs + df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) + result = df.iloc[5] + expected = Series( + [Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5 + ) + tm.assert_series_equal(result, expected) + + result = df.loc[5] + tm.assert_series_equal(result, expected) + + # indexing - boolean + result = df[df.a > df.a[3]] + expected = df.iloc[4:] + tm.assert_frame_equal(result, expected) + + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels") + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + + def test_indexing_with_datetimeindex_tz(self, indexer_sl): + + # GH 12050 + # indexing on a series with a datetimeindex with tz + index = date_range("2015-01-01", periods=2, tz="utc") + + ser = Series(range(2), index=index, dtype="int64") + + # list-like indexing + + for sel in (index, list(index)): + # getitem + result = indexer_sl(ser)[sel] + expected = ser.copy() + if sel is not index: + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + + # setitem + result = ser.copy() + indexer_sl(result)[sel] = 1 + expected = Series(1, index=index) + tm.assert_series_equal(result, expected) + + # single element indexing + + # getitem + assert indexer_sl(ser)[index[1]] == 1 + + # setitem + result = ser.copy() + indexer_sl(result)[index[1]] = 5 + expected = Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + def test_nanosecond_getitem_setitem_with_tz(self): + # GH 11679 + data = ["2016-06-28 08:30:00.123456789"] + index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]") + df = DataFrame({"a": [10]}, index=index) + result = df.loc[df.index[0]] + expected = Series(10, index=["a"], name=df.index[0]) + tm.assert_series_equal(result, expected) + + result = df.copy() + result.loc[df.index[0], "a"] = -1 + expected = DataFrame(-1, index=index, columns=["a"]) + tm.assert_frame_equal(result, expected) + + def test_getitem_str_slice_millisecond_resolution(self, frame_or_series): + # GH#33589 + + keys = [ + "2017-10-25T16:25:04.151", + "2017-10-25T16:25:04.252", + "2017-10-25T16:50:05.237", + "2017-10-25T16:50:05.238", + ] + obj = frame_or_series( + [1, 2, 3, 4], + index=[Timestamp(x) for x in keys], + ) + result = obj[keys[1] : keys[2]] + expected = frame_or_series( + [2, 3], + index=[ + Timestamp(keys[1]), + Timestamp(keys[2]), + ], + ) + tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py new file mode 100644 index 00000000..afc2def7 --- /dev/null +++ b/pandas/tests/indexing/test_floats.py @@ -0,0 +1,694 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + RangeIndex, + Series, +) +import pandas._testing as tm +from pandas.core.api import ( + Float64Index, + Int64Index, +) + + +def gen_obj(klass, index): + if klass is Series: + obj = Series(np.arange(len(index)), index=index) + else: + obj = DataFrame( + np.random.randn(len(index), len(index)), index=index, columns=index + ) + return obj + + +class TestFloatIndexers: + def check(self, result, original, indexer, getitem): + """ + comparator for results + we need to take care if we are indexing on a + Series or a frame + """ + if isinstance(original, Series): + expected = original.iloc[indexer] + else: + if getitem: + expected = original.iloc[:, indexer] + else: + expected = original.iloc[indexer] + + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + i = index_func(5) + s = gen_obj(frame_or_series, i) + + # getting + with pytest.raises(KeyError, match="^3.0$"): + indexer_sl(s)[3.0] + + # contains + assert 3.0 not in s + + s2 = s.copy() + indexer_sl(s2)[3.0] = 10 + + if indexer_sl is tm.setitem: + assert 3.0 in s2.axes[-1] + elif indexer_sl is tm.loc: + assert 3.0 in s2.axes[0] + else: + assert 3.0 not in s2.axes[0] + assert 3.0 not in s2.axes[-1] + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + def test_scalar_non_numeric_series_fallback(self, index_func): + # fallsback to position selection, series only + i = index_func(5) + s = Series(np.arange(len(i)), index=i) + s[3] + with pytest.raises(KeyError, match="^3.0$"): + s[3.0] + + def test_scalar_with_mixed(self, indexer_sl): + + s2 = Series([1, 2, 3], index=["a", "b", "c"]) + s3 = Series([1, 2, 3], index=["a", "b", 1.5]) + + # lookup in a pure string index with an invalid indexer + + with pytest.raises(KeyError, match="^1.0$"): + indexer_sl(s2)[1.0] + + with pytest.raises(KeyError, match=r"^1\.0$"): + indexer_sl(s2)[1.0] + + result = indexer_sl(s2)["b"] + expected = 2 + assert result == expected + + # mixed index so we have label + # indexing + with pytest.raises(KeyError, match="^1.0$"): + indexer_sl(s3)[1.0] + + if indexer_sl is not tm.loc: + # __getitem__ falls back to positional + result = s3[1] + expected = 2 + assert result == expected + + with pytest.raises(KeyError, match=r"^1\.0$"): + indexer_sl(s3)[1.0] + + result = indexer_sl(s3)[1.5] + expected = 3 + assert result == expected + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_scalar_integer(self, index_func, frame_or_series, indexer_sl): + getitem = indexer_sl is not tm.loc + + # test how scalar float indexers work on int indexes + + # integer index + i = index_func(5) + obj = gen_obj(frame_or_series, i) + + # coerce to equal int + + result = indexer_sl(obj)[3.0] + self.check(result, obj, 3, getitem) + + if isinstance(obj, Series): + + def compare(x, y): + assert x == y + + expected = 100 + else: + compare = tm.assert_series_equal + if getitem: + expected = Series(100, index=range(len(obj)), name=3) + else: + expected = Series(100.0, index=range(len(obj)), name=3) + + s2 = obj.copy() + indexer_sl(s2)[3.0] = 100 + + result = indexer_sl(s2)[3.0] + compare(result, expected) + + result = indexer_sl(s2)[3] + compare(result, expected) + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_scalar_integer_contains_float(self, index_func, frame_or_series): + # contains + # integer index + index = index_func(5) + obj = gen_obj(frame_or_series, index) + + # coerce to equal int + assert 3.0 in obj + + def test_scalar_float(self, frame_or_series): + + # scalar float indexers work on a float index + index = Index(np.arange(5.0)) + s = gen_obj(frame_or_series, index) + + # assert all operations except for iloc are ok + indexer = index[3] + for idxr in [tm.loc, tm.setitem]: + getitem = idxr is not tm.loc + + # getting + result = idxr(s)[indexer] + self.check(result, s, 3, getitem) + + # setting + s2 = s.copy() + + result = idxr(s2)[indexer] + self.check(result, s, 3, getitem) + + # random float is a KeyError + with pytest.raises(KeyError, match=r"^3\.5$"): + idxr(s)[3.5] + + # contains + assert 3.0 in s + + # iloc succeeds with an integer + expected = s.iloc[3] + s2 = s.copy() + + s2.iloc[3] = expected + result = s2.iloc[3] + self.check(result, s, 3, False) + + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + def test_slice_non_numeric(self, index_func, idx, frame_or_series, indexer_sli): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + index = index_func(5) + s = gen_obj(frame_or_series, index) + + # getitem + if indexer_sli is tm.iloc: + msg = ( + "cannot do positional indexing " + rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + else: + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of type (float|int)" + ) + with pytest.raises(TypeError, match=msg): + indexer_sli(s)[idx] + + # setitem + if indexer_sli is tm.iloc: + # otherwise we keep the same message as above + msg = "slice indices must be integers or None or have an __index__ method" + with pytest.raises(TypeError, match=msg): + indexer_sli(s)[idx] = 0 + + def test_slice_integer(self): + + # same as above, but for Integer based indexes + # these coerce to a like integer + # oob indicates if we are out of bounds + # of positional indexing + for index, oob in [ + (Int64Index(range(5)), False), + (RangeIndex(5), False), + (Int64Index(range(5)) + 10, True), + ]: + + # s is an in-range index + s = Series(range(5), index=index) + + # getitem + for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + result = s.loc[idx] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(3, 5) + self.check(result, s, indexer, False) + + # getitem out-of-bounds + for idx in [slice(-6, 6), slice(-6.0, 6.0)]: + + result = s.loc[idx] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(-6, 6) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[-6\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[slice(-6.0, 6.0)] + + # getitem odd floats + for idx, res1 in [ + (slice(2.5, 4), slice(3, 5)), + (slice(2, 3.5), slice(2, 4)), + (slice(2.5, 3.5), slice(3, 4)), + ]: + + result = s.loc[idx] + if oob: + res = slice(0, 0) + else: + res = res1 + + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) + def test_integer_positional_indexing(self, idx): + """make sure that we are raising on positional indexing + w.r.t. an integer index + """ + s = Series(range(2, 6), index=range(2, 6)) + + result = s[2:4] + expected = s.iloc[2:4] + tm.assert_series_equal(result, expected) + + klass = RangeIndex + msg = ( + "cannot do (slice|positional) indexing " + rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + with pytest.raises(TypeError, match=msg): + s.iloc[idx] + + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_slice_integer_frame_getitem(self, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + + # getitem + for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: + + result = s.loc[idx] + indexer = slice(0, 2) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + # getitem out-of-bounds + for idx in [slice(-10, 10), slice(-10.0, 10.0)]: + + result = s.loc[idx] + self.check(result, s, slice(-10, 10), True) + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[-10\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[slice(-10.0, 10.0)] + + # getitem odd floats + for idx, res in [ + (slice(0.5, 1), slice(1, 2)), + (slice(0, 0.5), slice(0, 1)), + (slice(0.5, 1.5), slice(1, 2)), + ]: + + result = s.loc[idx] + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[0\.5\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + + # setitem + sc = s.copy() + sc.loc[idx] = 0 + result = sc.loc[idx].values.ravel() + assert (result == 0).all() + + # positional indexing + msg = ( + "cannot do slice indexing " + rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[idx] = 0 + + with pytest.raises(TypeError, match=msg): + s[idx] + + @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + def test_slice_float(self, idx, frame_or_series, indexer_sl): + + # same as above, but for floats + index = Index(np.arange(5.0)) + 0.1 + s = gen_obj(frame_or_series, index) + + expected = s.iloc[3:4] + + # getitem + result = indexer_sl(s)[idx] + assert isinstance(result, type(s)) + tm.assert_equal(result, expected) + + # setitem + s2 = s.copy() + indexer_sl(s2)[idx] = 0 + result = indexer_sl(s2)[idx].values.ravel() + assert (result == 0).all() + + def test_floating_index_doc_example(self): + + index = Index([1.5, 2, 3, 4.5, 5]) + s = Series(range(5), index=index) + assert s[3] == 2 + assert s.loc[3] == 2 + assert s.iloc[3] == 3 + + def test_floating_misc(self, indexer_sl): + + # related 236 + # scalar/slicing of a float index + s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) + + # label based slicing + result = indexer_sl(s)[1.0:3.0] + expected = Series(1, index=[2.5]) + tm.assert_series_equal(result, expected) + + # exact indexing when found + + result = indexer_sl(s)[5.0] + assert result == 2 + + result = indexer_sl(s)[5] + assert result == 2 + + # value not found (and no fallbacking at all) + + # scalar integers + with pytest.raises(KeyError, match=r"^4$"): + indexer_sl(s)[4] + + # fancy floats/integers create the correct entry (as nan) + # fancy tests + expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) + + expected = Series([2, 0], index=Index([5, 0], dtype="float64")) + for fancy_idx in [[5, 0], np.array([5, 0])]: + tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) + + # all should return the same as we are slicing 'the same' + result1 = indexer_sl(s)[2:5] + result2 = indexer_sl(s)[2.0:5.0] + result3 = indexer_sl(s)[2.0:5] + result4 = indexer_sl(s)[2.1:5] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + expected = Series([1, 2], index=[2.5, 5.0]) + result = indexer_sl(s)[2:5] + + tm.assert_series_equal(result, expected) + + # list selection + result1 = indexer_sl(s)[[0.0, 5, 10]] + result2 = s.iloc[[0, 2, 4]] + tm.assert_series_equal(result1, result2) + + with pytest.raises(KeyError, match="not in index"): + indexer_sl(s)[[1.6, 5, 10]] + + with pytest.raises(KeyError, match="not in index"): + indexer_sl(s)[[0, 1, 2]] + + result = indexer_sl(s)[[2.5, 5]] + tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0])) + + result = indexer_sl(s)[[2.5]] + tm.assert_series_equal(result, Series([1], index=[2.5])) + + def test_float64index_slicing_bug(self): + # GH 5557, related to slicing a float index + ser = { + 256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0, + } + + # smoke test for the repr + s = Series(ser) + result = s.value_counts() + str(result) diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py new file mode 100644 index 00000000..91630388 --- /dev/null +++ b/pandas/tests/indexing/test_iat.py @@ -0,0 +1,49 @@ +import numpy as np + +from pandas import ( + DataFrame, + Series, + period_range, +) + + +def test_iat(float_frame): + + for i, row in enumerate(float_frame.index): + for j, col in enumerate(float_frame.columns): + result = float_frame.iat[i, j] + expected = float_frame.at[row, col] + assert result == expected + + +def test_iat_duplicate_columns(): + # https://github.com/pandas-dev/pandas/issues/11754 + df = DataFrame([[1, 2]], columns=["x", "x"]) + assert df.iat[0, 0] == 1 + + +def test_iat_getitem_series_with_period_index(): + # GH#4390, iat incorrectly indexing + index = period_range("1/1/2001", periods=10) + ser = Series(np.random.randn(10), index=index) + expected = ser[index[0]] + result = ser.iat[0] + assert expected == result + + +def test_iat_setitem_item_cache_cleared(indexer_ial, using_copy_on_write): + # GH#45684 + data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} + df = DataFrame(data).copy() + ser = df["y"] + + # previously this iat setting would split the block and fail to clear + # the item_cache. + indexer_ial(df)[7, 0] = 9999 + + indexer_ial(df)[7, 1] = 1234 + + assert df.iat[7, 1] == 1234 + if not using_copy_on_write: + assert ser.iloc[-1] == 1234 + assert df.iloc[-1, -1] == 1234 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py new file mode 100644 index 00000000..dcc95d9e --- /dev/null +++ b/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,1458 @@ +""" test positional based indexing with iloc """ + +from datetime import datetime +import re +from warnings import ( + catch_warnings, + simplefilter, +) + +import numpy as np +import pytest + +from pandas.errors import IndexingError +import pandas.util._test_decorators as td + +from pandas import ( + NA, + Categorical, + CategoricalDtype, + DataFrame, + Index, + Interval, + NaT, + Series, + Timestamp, + array, + concat, + date_range, + interval_range, + isna, + to_datetime, +) +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.tests.indexing.common import Base + +# We pass through the error message from numpy +_slice_iloc_msg = re.escape( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean arrays are valid indices" +) + + +class TestiLoc(Base): + @pytest.mark.parametrize("key", [2, -1, [0, 1, 2]]) + def test_iloc_getitem_int_and_list_int(self, key): + self.check_result( + "iloc", + key, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + + +class TestiLocBaseIndependent: + """Tests Independent Of Base Class""" + + @pytest.mark.parametrize( + "key", + [ + slice(None), + slice(3), + range(3), + [0, 1, 2], + Index(range(3)), + np.asarray([0, 1, 2]), + ], + ) + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): + frame = DataFrame({0: range(3)}, dtype=object) + + cat = Categorical(["alpha", "beta", "gamma"]) + + if not using_array_manager: + assert frame._mgr.blocks[0]._can_hold_element(cat) + + df = frame.copy() + orig_vals = df.values + + overwrite = isinstance(key, slice) and key == slice(None) + warn = None + if overwrite: + warn = DeprecationWarning + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(warn, match=msg): + indexer(df)[key, 0] = cat + + if overwrite: + # TODO: GH#39986 this probably shouldn't behave differently + expected = DataFrame({0: cat}) + assert not np.shares_memory(df.values, orig_vals) + else: + expected = DataFrame({0: cat}).astype(object) + if not using_array_manager: + assert np.shares_memory(df[0].values, orig_vals) + + tm.assert_frame_equal(df, expected) + + # check we dont have a view on cat (may be undesired GH#39986) + df.iloc[0, 0] = "gamma" + assert cat[0] != "gamma" + + # TODO with mixed dataframe ("split" path), we always overwrite the column + frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) + df = frame.copy() + orig_vals = df.values + with tm.assert_produces_warning(DeprecationWarning, match=msg): + indexer(df)[key, 0] = cat + expected = DataFrame({0: cat, 1: range(3)}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("box", [array, Series]) + def test_iloc_setitem_ea_inplace(self, frame_or_series, box): + # GH#38952 Case with not setting a full column + # IntegerArray without NAs + arr = array([1, 2, 3, 4]) + obj = frame_or_series(arr.to_numpy("i8")) + + if frame_or_series is Series: + values = obj.values + else: + values = obj._mgr.arrays[0] + + if frame_or_series is Series: + obj.iloc[:2] = box(arr[2:]) + else: + obj.iloc[:2, 0] = box(arr[2:]) + + expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) + tm.assert_equal(obj, expected) + + # Check that we are actually in-place + if frame_or_series is Series: + assert obj.values is values + else: + assert np.shares_memory(obj[0].values, values) + + def test_is_scalar_access(self): + # GH#32085 index with duplicates doesn't matter for _is_scalar_access + index = Index([1, 2, 1]) + ser = Series(range(3), index=index) + + assert ser.iloc._is_scalar_access((1,)) + + df = ser.to_frame() + assert df.iloc._is_scalar_access((1, 0)) + + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) + + # lists of positions should raise IndexError! + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, 30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, -30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[100]] + + s = df["A"] + with pytest.raises(IndexError, match=msg): + s.iloc[[100]] + with pytest.raises(IndexError, match=msg): + s.iloc[[-100]] + + # still raise on a single indexer + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[30] + with pytest.raises(IndexError, match=msg): + df.iloc[-30] + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with pytest.raises(IndexError, match=msg): + s.iloc[30] + with pytest.raises(IndexError, match=msg): + s.iloc[-30] + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[[4, 5, 6]] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[:, 4] + + @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))]) + @pytest.mark.parametrize( + "index_vals,column_vals", + [ + ([slice(None), ["A", "D"]]), + (["1", "2"], slice(None)), + ([datetime(2019, 1, 1)], slice(None)), + ], + ) + def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): + # GH 25753 + df = DataFrame( + np.random.randn(len(index), len(columns)), index=index, columns=columns + ) + msg = ".iloc requires numeric indexers, got" + with pytest.raises(IndexError, match=msg): + df.iloc[index_vals, column_vals] + + def test_iloc_getitem_invalid_scalar(self, frame_or_series): + # GH 21982 + + obj = DataFrame(np.arange(100).reshape(10, 10)) + obj = tm.get_obj(obj, frame_or_series) + + with pytest.raises(TypeError, match="Cannot index by location index"): + obj.iloc["a"] + + def test_iloc_array_not_mutating_negative_indices(self): + + # GH 21867 + array_with_neg_numbers = np.array([1, 2, -1]) + array_copy = array_with_neg_numbers.copy() + df = DataFrame( + {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, + index=[1, 2, 3], + ) + df.iloc[array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + df.iloc[:, array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]}) + s = df["A"] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + assert result == expected + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = Series(["a"], index=["A"]) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + # GH 6766 + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + assert isna(result) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}]) + tm.assert_frame_equal(df.iloc[[0]], expected) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + tm.assert_frame_equal(df.iloc[[0, 1]], expected) + + expected = DataFrame([{"B": 2, "C": 3}, {"B": 2000, "C": 3000}], index=[0, 2]) + result = df.iloc[[0, 2], [1, 2]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_bool(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + result = df.iloc[[True, True, False]] + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [{"A": 1, "B": 2, "C": 3}, {"A": 1000, "B": 2000, "C": 3000}], index=[0, 2] + ) + result = df.iloc[lambda x: x.index % 2 == 0] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_iloc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" + with pytest.raises(IndexError, match=msg): + s.iloc[index] + + def test_iloc_getitem_slice(self): + df = DataFrame( + [ + {"A": 1, "B": 2, "C": 3}, + {"A": 100, "B": 200, "C": 300}, + {"A": 1000, "B": 2000, "C": 3000}, + ] + ) + + expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) + result = df.iloc[:2] + tm.assert_frame_equal(result, expected) + + expected = DataFrame([{"A": 100, "B": 200}], index=[1]) + result = df.iloc[1:2, 0:2] + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}] + ) + result = df.iloc[:, lambda df: [0, 2]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df2 = DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = DataFrame( + np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + ) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_axis_argument(self): + # GH45032 + df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) + expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]]) + df.iloc(axis=0)[2] = 5 + tm.assert_frame_equal(df, expected) + + df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) + expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]]) + df.iloc(axis=1)[2] = 5 + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame( + np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] + ) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_pandas_object(self): + # GH 17193 + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ["B", "B"] + del df["A"] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( + self, using_array_manager + ): + # Same as the "assign back to self" check in test_iloc_setitem_dups + # but on a DataFrame with multiple blocks + df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) + + # setting float values that can be held by existing integer arrays + # is inplace + df.iloc[:, 0] = df.iloc[:, 0].astype("f8") + if not using_array_manager: + assert len(df._mgr.blocks) == 1 + + # if the assigned values cannot be held by existing integer arrays, + # we cast + df.iloc[:, 0] = df.iloc[:, 0] + 0.5 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 + + expected = df.copy() + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + + tm.assert_frame_equal(df, expected) + + # TODO: GH#27620 this test used to compare iloc against ix; check if this + # is redundant with another test comparing iloc against loc + def test_iloc_getitem_frame(self): + df = DataFrame( + np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) + ) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + exp = df.loc[4, 4] + assert result == exp + + # slice + result = df.iloc[4:8] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + expected = df.loc[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + expected = df.loc[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + expected = df.loc[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indices + result = df.iloc[[-1, 1, 3], [-1, 1]] + expected = df.loc[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indices + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + expected = df.loc[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=range(1, 5), dtype=object) + result = df.iloc[s.index] + expected = df.loc[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + result = df.iloc[1, 1] + exp = df.loc["b", "B"] + assert result == exp + + result = df.iloc[:, 2:3] + expected = df.loc[:, ["C"]] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc["j", "D"] + assert result == exp + + # out-of-bounds exception + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + df.iloc[10, 5] + + # trying to use a label + msg = ( + r"Location based indexing can only have \[integer, integer " + r"slice \(START point is INCLUDED, END point is EXCLUDED\), " + r"listlike of integers, boolean array\] types" + ) + with pytest.raises(ValueError, match=msg): + df.iloc["j", "D"] + + def test_iloc_getitem_doc_issue(self, using_array_manager): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range("20130101", periods=6) + columns = list("ABCD") + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list("aaaa") + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa")) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + if not using_array_manager: + df._mgr.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=range(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + assert result == 1 + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame( + {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")} + ) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")} + ) + df.iloc[2:4] = [["x", 11], ["y", 13]] + expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_iloc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.iloc[0, indexer] = value + result = df.iloc[0, 0] + + assert is_scalar(result) and result == "Z" + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) + mask = df.a % 2 == 0 + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + df.iloc[mask] + mask.index = range(len(mask)) + msg = "iLocation based boolean indexing on an integer type is not available" + with pytest.raises(NotImplementedError, match=msg): + df.iloc[mask] + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2**locs + reps = [bin(num) for num in nums] + df = DataFrame({"locs": locs, "nums": nums}, reps) + + expected = { + (None, ""): "0b1100", + (None, ".loc"): "0b1100", + (None, ".iloc"): "0b1100", + ("index", ""): "0b11", + ("index", ".loc"): "0b11", + ("index", ".iloc"): ( + "iLocation based boolean indexing cannot use an indexable as a mask" + ), + ("locs", ""): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the indexed " + "object do not match).", + ("locs", ".loc"): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the " + "indexed object do not match).", + ("locs", ".iloc"): ( + "iLocation based boolean indexing on an " + "integer type is not available" + ), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + simplefilter("ignore", UserWarning) + for idx in [None, "index", "locs"]: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ["", ".loc", ".iloc"]: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + answer = str(bin(accessor[mask]["nums"].sum())) + except (ValueError, IndexingError, NotImplementedError) as e: + answer = str(e) + + key = ( + idx, + method, + ) + r = expected.get(key) + if r != answer: + raise AssertionError( + f"[{key}] does not match [{answer}], received [{r}]" + ) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000}) + idx = np.arange(30) * 99 + expected = df.iloc[idx] + + df3 = concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) + df2 = concat([df2, 2 * df2, 3 * df2]) + + with pytest.raises(KeyError, match="not in index"): + df2.loc[idx] + + def test_iloc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.iloc[:, []], + df.iloc[:, :0], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[], :], + df.iloc[:0, :], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object( + self, using_array_manager, using_copy_on_write, request + ): + # GH13873 + if using_array_manager: + mark = pytest.mark.xfail( + reason="setting with .loc[:, 'a'] does not alter inplace" + ) + request.node.add_marker(mark) + + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df + + # should be a shallow copy + assert np.shares_memory(original_df["a"], sliced_df["a"]) + + # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig + # depending on CoW + original_df.loc[:, "a"] = [4, 4, 4] + if using_copy_on_write: + assert (sliced_df["a"] == [1, 2, 3]).all() + else: + assert (sliced_df["a"] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + if using_copy_on_write: + # shallow copy not updated (CoW) + assert all(sliced_series[:3] == [1, 2, 3]) + else: + assert all(sliced_series[:3] == [7, 8, 9]) + + def test_indexing_zerodim_np_array(self): + # GH24919 + df = DataFrame([[1, 2], [3, 4]]) + result = df.iloc[np.array(0)] + s = Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24919 + s = Series([1, 2]) + result = s.iloc[np.array(0)] + assert result == 1 + + @td.skip_array_manager_not_yet_implemented + def test_iloc_setitem_categorical_updates_inplace(self, using_copy_on_write): + # Mixed dtype ensures we go through take_split_path in setitem_with_indexer + cat = Categorical(["A", "B", "C"]) + cat_original = cat.copy() + df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False) + + assert tm.shares_memory(df[1], cat) + + # This should modify our original values in-place + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 0] = cat[::-1] + + if not using_copy_on_write: + assert tm.shares_memory(df[1], cat) + expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"]) + else: + expected = cat_original + + tm.assert_categorical_equal(cat, expected) + + def test_iloc_with_boolean_operation(self): + # GH 20627 + result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]]) + result.iloc[result.index <= 2] *= 2 + expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[result.index > 2] *= 2 + expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[[True, True, False, False]] *= 2 + expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]]) + tm.assert_frame_equal(result, expected) + + result.iloc[[False, False, True, True]] /= 2 + expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): + # GH#29521 + df = DataFrame({"x": Categorical("a b c d e".split())}) + result = df.iloc[0] + raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"]) + expected = Series(raw_cat, index=["x"], name=0, dtype="category") + + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_categorical_values(self): + # GH#14580 + # test iloc() on Series with Categorical data + + ser = Series([1, 2, 3]).astype("category") + + # get slice + result = ser.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = ser.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = ser.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [None, NaT, np.nan]) + def test_iloc_setitem_td64_values_cast_na(self, value): + # GH#18586 + series = Series([0, 1, 2], dtype="timedelta64[ns]") + series.iloc[0] = value + expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize("not_na", [Interval(0, 1), "a", 1.0]) + def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture): + # GH#27937 + dtype = CategoricalDtype(categories=[not_na]) + ser = Series( + [nulls_fixture, nulls_fixture, nulls_fixture, nulls_fixture], dtype=dtype + ) + ser.iloc[:3] = [nulls_fixture, not_na, nulls_fixture] + exp = Series([nulls_fixture, not_na, nulls_fixture, nulls_fixture], dtype=dtype) + tm.assert_series_equal(ser, exp) + + def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): + idx = Index([]) + obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = f"Cannot set values with ndim > {obj.ndim}" + with pytest.raises(ValueError, match=msg): + obj.iloc[nd3] = 0 + + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_getitem_read_only_values(self, indexer): + # GH#10043 this is fundamentally a test for iloc, but test loc while + # we're here + rw_array = np.eye(10) + rw_df = DataFrame(rw_array) + + ro_array = np.eye(10) + ro_array.setflags(write=False) + ro_df = DataFrame(ro_array) + + tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]]) + tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]]) + tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1]) + tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3]) + + def test_iloc_getitem_readonly_key(self): + # GH#17192 iloc with read-only array raising TypeError + df = DataFrame({"data": np.ones(100, dtype="float64")}) + indices = np.array([1, 3, 6]) + indices.flags.writeable = False + + result = df.iloc[indices] + expected = df.loc[[1, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df["data"].iloc[indices] + expected = df["data"].loc[[1, 3, 6]] + tm.assert_series_equal(result, expected) + + def test_iloc_assign_series_to_df_cell(self): + # GH 37593 + df = DataFrame(columns=["a"], index=[0]) + df.iloc[0, 0] = Series([1, 2, 3]) + expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("klass", [list, np.array]) + def test_iloc_setitem_bool_indexer(self, klass): + # GH#36741 + df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) + indexer = klass([True, False, False]) + df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 + expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) + def test_iloc_setitem_pure_position_based(self, indexer): + # GH#22046 + df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) + df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df2.iloc[:, indexer] = df1.iloc[:, [0]] + expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) + tm.assert_frame_equal(df2, expected) + + def test_iloc_setitem_dictionary_value(self): + # GH#37728 + df = DataFrame({"x": [1, 2], "y": [2, 2]}) + rhs = {"x": 9, "y": 99} + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2, 99]}) + tm.assert_frame_equal(df, expected) + + # GH#38335 same thing, mixed dtypes + df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]}) + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]}) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_float_duplicates(self): + df = DataFrame( + np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") + ) + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [1, 0.2, 0.2] + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df = DataFrame( + np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") + ) + expect = df.iloc[1:-1] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:-1, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [0.1, 0.2, 2, 0.2] + expect = df.iloc[[1, -1]] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[[1, -1], 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + def test_iloc_setitem_custom_object(self): + # iloc with an object + class TO: + def __init__(self, value) -> None: + self.value = value + + def __str__(self) -> str: + return f"[{self.value}]" + + __repr__ = __str__ + + def __eq__(self, other) -> bool: + return self.value == other.value + + def view(self): + return self + + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = TO(2) + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = TO(2) + + tm.assert_frame_equal(result, df) + + # remains object dtype even after setting it back + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = np.nan + result = DataFrame(index=[0, 1], columns=[0]) + + tm.assert_frame_equal(result, df) + + def test_iloc_getitem_with_duplicates(self): + + df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) + + result = df.iloc[0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + result = df.T.iloc[:, 0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + def test_iloc_getitem_with_duplicates2(self): + # GH#2259 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) + result = df.iloc[:, [0]] + expected = df.take([0], axis=1) + tm.assert_frame_equal(result, expected) + + def test_iloc_interval(self): + # GH#17130 + df = DataFrame({Interval(1, 2): [1, 2]}) + + result = df.iloc[0] + expected = Series({Interval(1, 2): 1}, name=0) + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + expected = Series([1, 2], name=Interval(1, 2)) + tm.assert_series_equal(result, expected) + + result = df.copy() + result.iloc[:, 0] += 1 + expected = DataFrame({Interval(1, 2): [2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexing_func", [list, np.array]) + @pytest.mark.parametrize("rhs_func", [list, np.array]) + def test_loc_setitem_boolean_list(self, rhs_func, indexing_func): + # GH#20438 testing specifically list key, not arraylike + ser = Series([0, 1, 2]) + ser.iloc[indexing_func([True, False, True])] = rhs_func([5, 10]) + expected = Series([5, 1, 10]) + tm.assert_series_equal(ser, expected) + + df = DataFrame({"a": [0, 1, 2]}) + df.iloc[indexing_func([True, False, True])] = rhs_func([[5], [10]]) + expected = DataFrame({"a": [5, 1, 10]}) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_slice_negative_step_ea_block(self): + # GH#44551 + df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") + + res = df.iloc[:, ::-1] + tm.assert_frame_equal(res, df) + + df["B"] = "foo" + res = df.iloc[:, ::-1] + expected = DataFrame({"B": df["B"], "A": df["A"]}) + tm.assert_frame_equal(res, expected) + + def test_iloc_setitem_2d_ndarray_into_ea_block(self): + # GH#44703 + df = DataFrame({"status": ["a", "b", "c"]}, dtype="category") + df.iloc[np.array([0, 1]), np.array([0])] = np.array([["a"], ["a"]]) + + expected = DataFrame({"status": ["a", "a", "c"]}, dtype=df["status"].dtype) + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_not_yet_implemented + def test_iloc_getitem_int_single_ea_block_view(self): + # GH#45241 + # TODO: make an extension interface test for this? + arr = interval_range(1, 10.0)._values + df = DataFrame(arr) + + # ser should be a *view* on the DataFrame data + ser = df.iloc[2] + + # if we have a view, then changing arr[2] should also change ser[0] + assert arr[2] != arr[-1] # otherwise the rest isn't meaningful + arr[2] = arr[-1] + assert ser[0] == arr[-1] + + def test_iloc_setitem_multicolumn_to_datetime(self, using_array_manager): + + # GH#20511 + df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]}) + + df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])}) + expected = DataFrame( + { + "A": [ + Timestamp("2021-01-01 00:00:00"), + Timestamp("2022-01-01 00:00:00"), + ], + "B": ["2021", "2022"], + } + ) + tm.assert_frame_equal(df, expected, check_dtype=using_array_manager) + + +class TestILocErrors: + # NB: this test should work for _any_ Series we can pass as + # series_with_simple_index + def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): + # GH#4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is specifically testing for the error + # message + + obj = series_with_simple_index + if frame_or_series is DataFrame: + obj = obj.to_frame() + + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + obj.iloc[3.0] + + with pytest.raises(IndexError, match=_slice_iloc_msg): + obj.iloc[3.0] = 0 + + def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): + with pytest.raises(IndexingError, match="Too many indexers"): + float_frame.iloc[:, :, :] + + with pytest.raises(IndexError, match="too many indices for array"): + # GH#32257 we let numpy do validation, get their exception + float_frame.iloc[:, :, :] = 1 + + def test_iloc_frame_indexer(self): + # GH#39004 + df = DataFrame({"a": [1, 2, 3]}) + indexer = DataFrame({"a": [True, False, True]}) + with tm.assert_produces_warning(FutureWarning): + df.iloc[indexer] = 1 + + msg = ( + "DataFrame indexer is not allowed for .iloc\n" + "Consider using .loc for automatic alignment." + ) + with pytest.raises(IndexError, match=msg): + df.iloc[indexer] + + +class TestILocSetItemDuplicateColumns: + def test_iloc_setitem_scalar_duplicate_columns(self): + # GH#15686, duplicate columns and mixed dtype + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + df.iloc[0, 0] = -1 + + assert df.iloc[0, 0] == -1 + assert df.iloc[0, 2] == 3 + assert df.dtypes.iloc[2] == np.int64 + + def test_iloc_setitem_list_duplicate_columns(self): + # GH#22036 setting with same-sized list + df = DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) + + df.iloc[:, 2] = ["str3"] + + expected = DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_series_duplicate_columns(self): + df = DataFrame( + np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"] + ) + df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) + assert df.dtypes.iloc[2] == np.int64 + + @pytest.mark.parametrize( + ["dtypes", "init_value", "expected_value"], + [("int64", "0", 0), ("float", "1.2", 1.2)], + ) + def test_iloc_setitem_dtypes_duplicate_columns( + self, dtypes, init_value, expected_value + ): + # GH#22035 + df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"]) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) + + expected_df = DataFrame( + [[expected_value, "str", "str2"]], columns=["a", "b", "b"] + ) + tm.assert_frame_equal(df, expected_df) + + +class TestILocCallable: + def test_frame_iloc_getitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.iloc[lambda x: [1, 3]] + tm.assert_frame_equal(res, df.iloc[[1, 3]]) + + res = df.iloc[lambda x: [1, 3], :] + tm.assert_frame_equal(res, df.iloc[[1, 3], :]) + + res = df.iloc[lambda x: [1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + # mixture + res = df.iloc[[1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[[1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + res = df.iloc[lambda x: [1, 3], 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + def test_frame_iloc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.copy() + res.iloc[lambda x: [1, 3]] = 0 + exp = df.copy() + exp.iloc[[1, 3]] = 0 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], :] = -1 + exp = df.copy() + exp.iloc[[1, 3], :] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: 0] = 5 + exp = df.copy() + exp.iloc[[1, 3], 0] = 5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 + exp = df.copy() + exp.iloc[[1, 3], [0]] = 25 + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.iloc[[1, 3], lambda x: 0] = -3 + exp = df.copy() + exp.iloc[[1, 3], 0] = -3 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[[1, 3], lambda x: [0]] = -5 + exp = df.copy() + exp.iloc[[1, 3], [0]] = -5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], 0] = 10 + exp = df.copy() + exp.iloc[[1, 3], 0] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], [0]] = [-5, -5] + exp = df.copy() + exp.iloc[[1, 3], [0]] = [-5, -5] + tm.assert_frame_equal(res, exp) + + +class TestILocSeries: + def test_iloc(self, using_copy_on_write): + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + ser_original = ser.copy() + + for i in range(len(ser)): + result = ser.iloc[i] + exp = ser[ser.index[i]] + tm.assert_almost_equal(result, exp) + + # pass a slice + result = ser.iloc[slice(1, 3)] + expected = ser.loc[2:4] + tm.assert_series_equal(result, expected) + + # test slice is a view + with tm.assert_produces_warning(None): + # GH#45324 make sure we aren't giving a spurious FutureWarning + result[:] = 0 + if using_copy_on_write: + tm.assert_series_equal(ser, ser_original) + else: + assert (ser.iloc[1:3] == 0).all() + + # list of integers + result = ser.iloc[[0, 2, 3, 4, 5]] + expected = ser.reindex(ser.index[[0, 2, 3, 4, 5]]) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_nonunique(self): + ser = Series([0, 1, 2], index=[0, 1, 0]) + assert ser.iloc[2] == 2 + + def test_iloc_setitem_pure_position_based(self): + # GH#22046 + ser1 = Series([1, 2, 3]) + ser2 = Series([4, 5, 6], index=[1, 0, 2]) + ser1.iloc[1:3] = ser2.iloc[1:3] + expected = Series([1, 5, 6]) + tm.assert_series_equal(ser1, expected) + + def test_iloc_nullable_int64_size_1_nan(self): + # GH 31861 + result = DataFrame({"a": ["test"], "b": [np.nan]}) + result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexers.py b/pandas/tests/indexing/test_indexers.py new file mode 100644 index 00000000..ddc5c039 --- /dev/null +++ b/pandas/tests/indexing/test_indexers.py @@ -0,0 +1,61 @@ +# Tests aimed at pandas.core.indexers +import numpy as np +import pytest + +from pandas.core.indexers import ( + is_scalar_indexer, + length_of_indexer, + validate_indices, +) + + +def test_length_of_indexer(): + arr = np.zeros(4, dtype=bool) + arr[0] = 1 + result = length_of_indexer(arr) + assert result == 1 + + +def test_is_scalar_indexer(): + indexer = (0, 1) + assert is_scalar_indexer(indexer, 2) + assert not is_scalar_indexer(indexer[0], 2) + + indexer = (np.array([2]), 1) + assert not is_scalar_indexer(indexer, 2) + + indexer = (np.array([2]), np.array([3])) + assert not is_scalar_indexer(indexer, 2) + + indexer = (np.array([2]), np.array([3, 4])) + assert not is_scalar_indexer(indexer, 2) + + assert not is_scalar_indexer(slice(None), 1) + + indexer = 0 + assert is_scalar_indexer(indexer, 1) + + indexer = (0,) + assert is_scalar_indexer(indexer, 1) + + +class TestValidateIndices: + def test_validate_indices_ok(self): + indices = np.asarray([0, 1]) + validate_indices(indices, 2) + validate_indices(indices[:0], 0) + validate_indices(np.array([-1, -1]), 0) + + def test_validate_indices_low(self): + indices = np.asarray([0, -2]) + with pytest.raises(ValueError, match="'indices' contains"): + validate_indices(indices, 2) + + def test_validate_indices_high(self): + indices = np.asarray([0, 1, 2]) + with pytest.raises(IndexError, match="indices are out"): + validate_indices(indices, 2) + + def test_validate_indices_empty(self): + with pytest.raises(IndexError, match="indices are out"): + validate_indices(np.array([0, 1]), 0) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py new file mode 100644 index 00000000..210c75b0 --- /dev/null +++ b/pandas/tests/indexing/test_indexing.py @@ -0,0 +1,1114 @@ +""" test fancy indexing & misc """ + +import array +from datetime import datetime +import re +import weakref + +import numpy as np +import pytest + +from pandas.errors import IndexingError + +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) + +import pandas as pd +from pandas import ( + DataFrame, + Index, + NaT, + Series, + date_range, + offsets, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.api import Float64Index +from pandas.tests.indexing.common import _mklbl +from pandas.tests.indexing.test_floats import gen_obj + +# ------------------------------------------------------------------------ +# Indexing test cases + + +class TestFancy: + """pure get/set item & fancy indexing""" + + def test_setitem_ndarray_1d(self): + # GH5508 + + # len of indexer vs length of the 1d ndarray + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=complex) + + # invalid + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + # valid + df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + result = df.loc[df.index[2:6], "bar"] + expected = Series( + [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar" + ) + tm.assert_series_equal(result, expected) + + def test_setitem_ndarray_1d_2(self): + # GH5508 + + # dtype getting changed? + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=complex) + + msg = "Must have equal len keys and value when setting with an iterable" + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(FutureWarning, match="label-based"): + df[2:5] = np.arange(1, 4) * 1j + + def test_getitem_ndarray_3d( + self, index, frame_or_series, indexer_sli, using_array_manager + ): + # GH 25567 + obj = gen_obj(frame_or_series, index) + idxr = indexer_sli(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msgs = [] + if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: + msgs.append(r"Wrong number of dimensions. values.ndim > ndim \[3 > 1\]") + if using_array_manager: + msgs.append("Passed array should be 1-dimensional") + if frame_or_series is Series or indexer_sli is tm.iloc: + msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") + if using_array_manager: + msgs.append("indexer should be 1-dimensional") + if indexer_sli is tm.loc or ( + frame_or_series is Series and indexer_sli is tm.setitem + ): + msgs.append("Cannot index with multidimensional key") + if frame_or_series is DataFrame and indexer_sli is tm.setitem: + msgs.append("Index data must be 1-dimensional") + if isinstance(index, pd.IntervalIndex) and indexer_sli is tm.iloc: + msgs.append("Index data must be 1-dimensional") + if isinstance(index, (pd.TimedeltaIndex, pd.DatetimeIndex, pd.PeriodIndex)): + msgs.append("Data must be 1-dimensional") + if len(index) == 0 or isinstance(index, pd.MultiIndex): + msgs.append("positional indexers are out-of-bounds") + if type(index) is Index and not isinstance(index._values, np.ndarray): + # e.g. Int64 + msgs.append("values must be a 1D array") + + # string[pyarrow] + msgs.append("only handle 1-dimensional arrays") + + msg = "|".join(msgs) + + potential_errors = (IndexError, ValueError, NotImplementedError) + with pytest.raises(potential_errors, match=msg): + idxr[nd3] + + def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): + # GH 25567 + obj = gen_obj(frame_or_series, index) + idxr = indexer_sli(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + if indexer_sli is tm.iloc: + err = ValueError + msg = f"Cannot set values with ndim > {obj.ndim}" + else: + err = ValueError + msg = "|".join( + [ + r"Buffer has wrong number of dimensions \(expected 1, got 3\)", + "Cannot set values with ndim > 1", + "Index data must be 1-dimensional", + "Data must be 1-dimensional", + "Array conditional must be same shape as self", + ] + ) + + with pytest.raises(err, match=msg): + idxr[nd3] = 0 + + def test_getitem_ndarray_0d(self): + # GH#24924 + key = np.array(0) + + # dataframe __getitem__ + df = DataFrame([[1, 2], [3, 4]]) + result = df[key] + expected = Series([1, 3], name=0) + tm.assert_series_equal(result, expected) + + # series __getitem__ + ser = Series([1, 2]) + result = ser[key] + assert result == 1 + + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 + + result = df.index + expected = Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + def test_setitem_dtype_upcast(self): + + # GH3216 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + assert df["c"].dtype == np.float64 + + df.loc[0, "c"] = "foo" + expected = DataFrame( + [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("val", [3.14, "wxyz"]) + def test_setitem_dtype_upcast2(self, val): + + # GH10280 + df = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + left = df.copy() + left.loc["a", "bar"] = val + right = DataFrame( + [[0, val, 2], [3, 4, 5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_integer_dtype(left["foo"]) + assert is_integer_dtype(left["baz"]) + + def test_setitem_dtype_upcast3(self): + left = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3) / 10.0, + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + left.loc["a", "bar"] = "wxyz" + + right = DataFrame( + [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_float_dtype(left["foo"]) + assert is_float_dtype(left["baz"]) + + def test_dups_fancy_indexing(self): + + # GH 3455 + + df = tm.makeCustomDataframe(10, 3) + df.columns = ["a", "a", "b"] + result = df[["b", "a"]].columns + expected = Index(["b", "a", "a"]) + tm.assert_index_equal(result, expected) + + def test_dups_fancy_indexing_across_dtypes(self): + + # across dtypes + df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) + df.head() + str(df) + result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) + result.columns = list("aaaaaaa") # GH#3468 + + # GH#3509 smoke tests for indexing with duplicate columns + df.iloc[:, 4] + result.iloc[:, 4] + + tm.assert_frame_equal(df, result) + + def test_dups_fancy_indexing_not_in_order(self): + # GH 3561, dups not in selected order + df = DataFrame( + {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, + index=["A", "A", "B", "C"], + ) + rows = ["C", "B"] + expected = DataFrame( + {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows + ) + result = df.loc[rows] + tm.assert_frame_equal(result, expected) + + result = df.loc[Index(rows)] + tm.assert_frame_equal(result, expected) + + rows = ["C", "B", "E"] + with pytest.raises(KeyError, match="not in index"): + df.loc[rows] + + # see GH5553, make sure we use the right indexer + rows = ["F", "G", "H", "C", "B", "E"] + with pytest.raises(KeyError, match="not in index"): + df.loc[rows] + + def test_dups_fancy_indexing_only_missing_label(self): + + # List containing only missing label + dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Index(['E'], dtype='object')] are in the [index]\"" + ), + ): + dfnu.loc[["E"]] + + @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")]) + def test_dups_fancy_indexing_missing_label(self, vals): + + # GH 4619; duplicate indexer with missing label + df = DataFrame({"A": vals}) + with pytest.raises(KeyError, match="not in index"): + df.loc[[0, 8, 0]] + + def test_dups_fancy_indexing_non_unique(self): + + # non unique with non unique selector + df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) + with pytest.raises(KeyError, match="not in index"): + df.loc[["A", "A", "E"]] + + def test_dups_fancy_indexing2(self): + # GH 5835 + # dups on index and missing values + df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) + + with pytest.raises(KeyError, match="not in index"): + df.loc[:, ["A", "B", "C"]] + + def test_dups_fancy_indexing3(self): + + # GH 6504, multi-axis indexing + df = DataFrame( + np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] + ) + + expected = df.iloc[0:6] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + expected = df + result = df.loc[:, ["a", "b"]] + tm.assert_frame_equal(result, expected) + + expected = df.iloc[0:6, :] + result = df.loc[[1, 2], ["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_duplicate_int_indexing(self, indexer_sl): + # GH 17347 + ser = Series(range(3), index=[1, 1, 3]) + expected = Series(range(2), index=[1, 1]) + result = indexer_sl(ser)[[1]] + tm.assert_series_equal(result, expected) + + def test_indexing_mixed_frame_bug(self): + + # GH3492 + df = DataFrame( + {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}} + ) + + # this works, new column is created correctly + df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x) + + # this does not work, ie column test is not changed + idx = df["test"] == "_" + temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x) + df.loc[idx, "test"] = temp + assert df.iloc[0, 2] == "-----" + + def test_multitype_list_index_access(self): + # GH 10610 + df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) + + with pytest.raises(KeyError, match=re.escape("'[26, -8] not in index'")): + df[[22, 26, -8]] + assert df[21].shape[0] == df.shape[0] + + def test_set_index_nan(self): + + # GH 3586 + df = DataFrame( + { + "PRuid": { + 17: "nonQC", + 18: "nonQC", + 19: "nonQC", + 20: "10", + 21: "11", + 22: "12", + 23: "13", + 24: "24", + 25: "35", + 26: "46", + 27: "47", + 28: "48", + 29: "59", + 30: "10", + }, + "QC": { + 17: 0.0, + 18: 0.0, + 19: 0.0, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, + 24: 1.0, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan, + }, + "data": { + 17: 7.9544899999999998, + 18: 8.0142609999999994, + 19: 7.8591520000000008, + 20: 0.86140349999999999, + 21: 0.87853110000000001, + 22: 0.8427041999999999, + 23: 0.78587700000000005, + 24: 0.73062459999999996, + 25: 0.81668560000000001, + 26: 0.81927080000000008, + 27: 0.80705009999999999, + 28: 0.81440240000000008, + 29: 0.80140849999999997, + 30: 0.81307740000000006, + }, + "year": { + 17: 2006, + 18: 2007, + 19: 2008, + 20: 1985, + 21: 1985, + 22: 1985, + 23: 1985, + 24: 1985, + 25: 1985, + 26: 1985, + 27: 1985, + 28: 1985, + 29: 1985, + 30: 1986, + }, + } + ).reset_index() + + result = ( + df.set_index(["year", "PRuid", "QC"]) + .reset_index() + .reindex(columns=df.columns) + ) + tm.assert_frame_equal(result, df) + + def test_multi_assign(self): + + # GH 3626, an assignment of a sub-df to a df + df = DataFrame( + { + "FC": ["a", "b", "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": list(range(6)), + "col2": list(range(6, 12)), + } + ) + df.iloc[1, 0] = np.nan + df2 = df.copy() + + mask = ~df2.FC.isna() + cols = ["col1", "col2"] + + dft = df2 * 2 + dft.iloc[3, 3] = np.nan + + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": Series([0, 1, 4, 6, 8, 10]), + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + + # frame on rhs + df2.loc[mask, cols] = dft.loc[mask, cols] + tm.assert_frame_equal(df2, expected) + + # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + df2 = df.copy() + df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) + + def test_multi_assign_broadcasting_rhs(self): + # broadcasting on the rhs is required + df = DataFrame( + { + "A": [1, 2, 0, 0, 0], + "B": [0, 0, 0, 10, 11], + "C": [0, 0, 0, 10, 11], + "D": [3, 4, 5, 6, 7], + } + ) + + expected = df.copy() + mask = expected["A"] == 0 + for col in ["A", "B"]: + expected.loc[mask, col] = df["D"] + + df.loc[df["A"] == 0, ["A", "B"]] = df["D"] + tm.assert_frame_equal(df, expected) + + def test_setitem_list(self): + + # GH 6043 + # iloc with a list + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = [1, 2, 3] + df.iloc[1, 0] = [1, 2] + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = [1, 2] + + tm.assert_frame_equal(result, df) + + def test_string_slice(self): + # GH 14424 + # string indexing against datetimelike with object + # dtype should properly raises KeyError + df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object)) + assert df.index._is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="'2011'"): + df.loc["2011", 0] + + def test_string_slice_empty(self): + # GH 14424 + + df = DataFrame() + assert not df.index._is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="^0$"): + df.loc["2011", 0] + + def test_astype_assignment(self): + + # GH4312 (iloc) + df_orig = DataFrame( + [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + + df = df_orig.copy() + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + # GH5702 (loc) + df = df_orig.copy() + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + expected = DataFrame( + [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) + expected = DataFrame( + [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + def test_astype_assignment_full_replacements(self): + # full replacements / no nans + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.iloc[:, 0] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, "A"] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) + def test_index_type_coercion(self, indexer): + + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object + + # integer indexes + for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: + + assert s.index.is_integer() + + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 + + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) + + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() + + for s in [Series(range(5), index=np.arange(5.0))]: + + assert s.index.is_floating() + + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 + + s2 = s.copy() + indexer(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) + + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() + + +class TestMisc: + def test_float_index_to_mixed(self): + df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) + df["a"] = 10 + + expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) + tm.assert_frame_equal(expected, df) + + def test_float_index_non_scalar_assignment(self): + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df.loc[df.index[:2]] = 1 + expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index) + tm.assert_frame_equal(expected, df) + + def test_loc_setitem_fullindex_views(self): + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df2 = df.copy() + df.loc[df.index] = df.loc[df.index] + tm.assert_frame_equal(df, df2) + + def test_rhs_alignment(self): + # GH8258, tests that both rows & columns are aligned to what is + # assigned to. covers both uniform data-type & multi-type cases + def run_tests(df, rhs, right_loc, right_iloc): + # label, index, slice + lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) + lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) + + left = df.copy() + left.loc[lbl_one, lbl_two] = rhs + tm.assert_frame_equal(left, right_loc) + + left = df.copy() + left.iloc[idx_one, idx_two] = rhs + tm.assert_frame_equal(left, right_iloc) + + left = df.copy() + left.iloc[slice_one, slice_two] = rhs + tm.assert_frame_equal(left, right_iloc) + + xs = np.arange(20).reshape(5, 4) + cols = ["jim", "joe", "jolie", "joline"] + df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") + + # right hand side; permute the indices and multiplpy by -2 + rhs = -2 * df.iloc[3:0:-1, 2:0:-1] + + # expected `right` result; just multiply by -2 + right_iloc = df.copy() + right_iloc["joe"] = [1, 14, 10, 6, 17] + right_iloc["jolie"] = [2, 13, 9, 5, 18] + right_iloc.iloc[1:4, 1:3] *= -2 + right_loc = df.copy() + right_loc.iloc[1:4, 1:3] *= -2 + + # run tests with uniform dtypes + run_tests(df, rhs, right_loc, right_iloc) + + # make frames multi-type & re-run tests + for frame in [df, rhs, right_loc, right_iloc]: + frame["joe"] = frame["joe"].astype("float64") + frame["jolie"] = frame["jolie"].map("@{}".format) + right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] + right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] + run_tests(df, rhs, right_loc, right_iloc) + + @pytest.mark.parametrize( + "idx", [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)] + ) + def test_str_label_slicing_with_negative_step(self, idx): + SLC = pd.IndexSlice + + idx = Index(idx) + ser = Series(np.arange(20), index=idx) + tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] :: -1], SLC[9::-1]) + tm.assert_indexing_slices_equivalent(ser, SLC[: idx[9] : -1], SLC[:8:-1]) + tm.assert_indexing_slices_equivalent( + ser, SLC[idx[13] : idx[9] : -1], SLC[13:8:-1] + ) + tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] : idx[13] : -1], SLC[:0]) + + def test_slice_with_zero_step_raises(self, index, indexer_sl, frame_or_series): + obj = frame_or_series(np.arange(len(index)), index=index) + with pytest.raises(ValueError, match="slice step cannot be zero"): + indexer_sl(obj)[::0] + + def test_loc_setitem_indexing_assignment_dict_already_exists(self): + index = Index([-5, 0, 5], name="z") + df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index) + expected = df.copy() + rhs = {"x": 9, "y": 99} + df.loc[5] = rhs + expected.loc[5] = [9, 99] + tm.assert_frame_equal(df, expected) + + # GH#38335 same thing, mixed dtypes + df = DataFrame({"x": [1, 2, 6], "y": [2.0, 2.0, 8.0]}, index=index) + df.loc[5] = rhs + expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_indexing_dtypes_on_empty(self): + # Check that .iloc returns correct dtypes GH9983 + df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]}) + df2 = df.iloc[[], :] + + assert df2.loc[:, "a"].dtype == np.int64 + tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) + + @pytest.mark.parametrize("size", [5, 999999, 1000000]) + def test_loc_range_in_series_indexing(self, size): + # range can cause an indexing error + # GH 11652 + s = Series(index=range(size), dtype=np.float64) + s.loc[range(1)] = 42 + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + + s.loc[range(2)] = 43 + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = DataFrame( + np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC") + ) + index_df = DataFrame(1, index=list("ab"), columns=list("AB")) + result = df[index_df.notnull()] + expected = DataFrame( + np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]), + index=list("abc"), + columns=list("ABC"), + ) + tm.assert_frame_equal(result, expected) + + def test_no_reference_cycle(self): + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + for name in ("loc", "iloc", "at", "iat"): + getattr(df, name) + wr = weakref.ref(df) + del df + assert wr() is None + + def test_label_indexing_on_nan(self, nulls_fixture): + # GH 32431 + df = Series([1, "{1,2}", 1, nulls_fixture]) + vc = df.value_counts(dropna=False) + result1 = vc.loc[nulls_fixture] + result2 = vc[nulls_fixture] + + expected = 1 + assert result1 == expected + assert result2 == expected + + +class TestDataframeNoneCoercion: + EXPECTED_SINGLE_ROW_RESULTS = [ + # For numeric series, we should coerce to NaN. + ([1, 2, 3], [np.nan, 2, 3]), + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), + # For datetime series, we should coerce to NaT. + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + # For objects, we should preserve the None value. + (["foo", "bar", "baz"], [None, "bar", "baz"]), + ] + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_loc(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[0, ["foo"]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_setitem_and_dataframe(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_none_coercion_loc_and_dataframe(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + def test_none_coercion_mixed_dtypes(self): + start_dataframe = DataFrame( + { + "a": [1, 2, 3], + "b": [1.0, 2.0, 3.0], + "c": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": ["a", "b", "c"], + } + ) + start_dataframe.iloc[0] = None + + exp = DataFrame( + { + "a": [np.nan, 2, 3], + "b": [np.nan, 2.0, 3.0], + "c": [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": [None, "b", "c"], + } + ) + tm.assert_frame_equal(start_dataframe, exp) + + +class TestDatetimelikeCoercion: + def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): + # dispatching _can_hold_element to underlying DatetimeArray + tz = tz_naive_fixture + + dti = date_range("2016-01-01", periods=3, tz=tz) + ser = Series(dti) + + values = ser._values + + newval = "2018-01-01" + values._validate_setitem_value(newval) + + indexer_sli(ser)[0] = newval + + if tz is None: + # TODO(EA2D): we can make this no-copy in tz-naive case too + assert ser.dtype == dti.dtype + assert ser._values._data is values._data + else: + assert ser._values is values + + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) + @pytest.mark.parametrize( + "key", [[0, 1], slice(0, 2), np.array([True, True, False])] + ) + def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box): + # dispatching _can_hold_element to underling DatetimeArray + tz = tz_naive_fixture + + if isinstance(key, slice) and indexer_sli is tm.loc: + key = slice(0, 1) + + dti = date_range("2016-01-01", periods=3, tz=tz) + ser = Series(dti) + + values = ser._values + + newvals = box(["2019-01-01", "2010-01-02"]) + values._validate_setitem_value(newvals) + + indexer_sli(ser)[key] = newvals + + if tz is None: + # TODO(EA2D): we can make this no-copy in tz-naive case too + assert ser.dtype == dti.dtype + assert ser._values._data is values._data + else: + assert ser._values is values + + @pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)]) + def test_setitem_td64_scalar(self, indexer_sli, scalar): + # dispatching _can_hold_element to underling TimedeltaArray + tdi = timedelta_range("1 Day", periods=3) + ser = Series(tdi) + + values = ser._values + values._validate_setitem_value(scalar) + + indexer_sli(ser)[0] = scalar + assert ser._values._data is values._data + + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) + @pytest.mark.parametrize( + "key", [[0, 1], slice(0, 2), np.array([True, True, False])] + ) + def test_setitem_td64_string_values(self, indexer_sli, key, box): + # dispatching _can_hold_element to underling TimedeltaArray + if isinstance(key, slice) and indexer_sli is tm.loc: + key = slice(0, 1) + + tdi = timedelta_range("1 Day", periods=3) + ser = Series(tdi) + + values = ser._values + + newvals = box(["10 Days", "44 hours"]) + values._validate_setitem_value(newvals) + + indexer_sli(ser)[key] = newvals + assert ser._values._data is values._data + + +def test_extension_array_cross_section(): + # A cross-section of a homogeneous EA should be an EA + df = DataFrame( + { + "A": pd.array([1, 2], dtype="Int64"), + "B": pd.array([3, 4], dtype="Int64"), + }, + index=["a", "b"], + ) + expected = Series(pd.array([1, 3], dtype="Int64"), index=["A", "B"], name="a") + result = df.loc["a"] + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + +def test_extension_array_cross_section_converts(): + # all numeric columns -> numeric series + df = DataFrame( + { + "A": pd.array([1, 2], dtype="Int64"), + "B": np.array([1, 2], dtype="int64"), + }, + index=["a", "b"], + ) + result = df.loc["a"] + expected = Series([1, 1], dtype="Int64", index=["A", "B"], name="a") + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + # mixed columns -> object series + df = DataFrame( + {"A": pd.array([1, 2], dtype="Int64"), "B": np.array(["a", "b"])}, + index=["a", "b"], + ) + result = df.loc["a"] + expected = Series([1, "a"], dtype=object, index=["A", "B"], name="a") + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ser, keys", + [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], +) +def test_ser_tup_indexer_exceeds_dimensions(ser, keys, indexer_li): + # GH#13831 + exp_err, exp_msg = IndexingError, "Too many indexers" + with pytest.raises(exp_err, match=exp_msg): + indexer_li(ser)[keys] + + if indexer_li == tm.iloc: + # For iloc.__setitem__ we let numpy handle the error reporting. + exp_err, exp_msg = IndexError, "too many indices for array" + + with pytest.raises(exp_err, match=exp_msg): + indexer_li(ser)[keys] = 0 + + +def test_ser_list_indexer_exceeds_dimensions(indexer_li): + # GH#13831 + # Make sure an exception is raised when a tuple exceeds the dimension of the series, + # but not list when a list is used. + ser = Series([10]) + res = indexer_li(ser)[[0, 0]] + exp = Series([10, 10], index=Index([0, 0])) + tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_with_nested_value(value): + # For numeric data, we try to unpack and thus raise for mismatching length + df = DataFrame({"A": [1, 2, 3]}) + msg = "|".join( + [ + "Must have equal len keys and value", + "setting an array element with a sequence", + ] + ) + with pytest.raises(ValueError, match=msg): + df.loc[0, "B"] = value + + # TODO For object dtype this happens as well, but should we rather preserve + # the nested data and set as such? + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + with pytest.raises(ValueError, match="Must have equal len keys and value"): + df.loc[0, "B"] = value + # if isinstance(value, np.ndarray): + # assert (df.loc[0, "B"] == value).all() + # else: + # assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])] +) +def test_scalar_setitem_series_with_nested_value(value, indexer_sli): + # For numeric data, we try to unpack and thus raise for mismatching length + ser = Series([1, 2, 3]) + with pytest.raises(ValueError, match="setting an array element with a sequence"): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data and set as such + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_with_nested_value_length1(value): + # https://github.com/pandas-dev/pandas/issues/46268 + + # For numeric data, assigning length-1 array to scalar position gets unpacked + df = DataFrame({"A": [1, 2, 3]}) + df.loc[0, "B"] = value + expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]}) + tm.assert_frame_equal(df, expected) + + # but for object dtype we preserve the nested data + df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)}) + df.loc[0, "B"] = value + if isinstance(value, np.ndarray): + assert (df.loc[0, "B"] == value).all() + else: + assert df.loc[0, "B"] == value + + +@pytest.mark.parametrize( + "value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])] +) +def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli): + # For numeric data, assigning length-1 array to scalar position gets unpacked + # TODO this only happens in case of ndarray, should we make this consistent + # for all list-likes? (as happens for DataFrame.(i)loc, see test above) + ser = Series([1.0, 2.0, 3.0]) + if isinstance(value, np.ndarray): + indexer_sli(ser)[0] = value + expected = Series([0.0, 2.0, 3.0]) + tm.assert_series_equal(ser, expected) + else: + with pytest.raises( + ValueError, match="setting an array element with a sequence" + ): + indexer_sli(ser)[0] = value + + # but for object dtype we preserve the nested data + ser = Series([1, "a", "b"], dtype=object) + indexer_sli(ser)[0] = value + if isinstance(value, np.ndarray): + assert (ser.loc[0] == value).all() + else: + assert ser.loc[0] == value diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py new file mode 100644 index 00000000..235ad3d2 --- /dev/null +++ b/pandas/tests/indexing/test_loc.py @@ -0,0 +1,3221 @@ +""" test label based indexing with loc """ +from collections import namedtuple +from datetime import ( + date, + datetime, + time, + timedelta, +) +import re + +from dateutil.tz import gettz +import numpy as np +import pytest + +from pandas.errors import IndexingError +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IndexSlice, + MultiIndex, + Period, + PeriodIndex, + Series, + SparseDtype, + Timedelta, + Timestamp, + date_range, + timedelta_range, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.core.api import Float64Index +from pandas.core.indexing import _one_ellipsis_message +from pandas.tests.indexing.common import Base + + +@pytest.mark.parametrize( + "series, new_series, expected_ser", + [ + [[np.nan, np.nan, "b"], ["a", np.nan, np.nan], [False, True, True]], + [[np.nan, "b"], ["a", np.nan], [False, True]], + ], +) +def test_not_change_nan_loc(series, new_series, expected_ser): + # GH 28403 + df = DataFrame({"A": series}) + df.loc[:, "A"] = new_series + expected = DataFrame({"A": expected_ser}) + tm.assert_frame_equal(df.isna(), expected) + tm.assert_frame_equal(df.notna(), ~expected) + + +class TestLoc(Base): + def test_loc_getitem_int(self): + + # int label + self.check_result("loc", 2, typs=["labels"], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result("loc", "c", typs=["empty"], fails=KeyError) + + @pytest.mark.parametrize( + "key, typs, axes", + [ + ["f", ["ints", "uints", "labels", "mixed", "ts"], None], + ["f", ["floats"], None], + [20, ["ints", "uints", "mixed"], None], + [20, ["labels"], None], + [20, ["ts"], 0], + [20, ["floats"], 0], + ], + ) + def test_loc_getitem_label_out_of_range(self, key, typs, axes): + + # out of range label + self.check_result("loc", key, typs=typs, axes=axes, fails=KeyError) + + @pytest.mark.parametrize( + "key, typs", + [ + [[0, 1, 2], ["ints", "uints", "floats"]], + [[1, 3.0, "A"], ["ints", "uints", "floats"]], + ], + ) + def test_loc_getitem_label_list(self, key, typs): + # list of labels + self.check_result("loc", key, typs=typs, fails=KeyError) + + @pytest.mark.parametrize( + "key, typs, axes", + [ + [[0, 1, 2], ["empty"], None], + [[0, 2, 10], ["ints", "uints", "floats"], 0], + [[3, 6, 7], ["ints", "uints", "floats"], 1], + # GH 17758 - MultiIndex and missing keys + [[(1, 3), (1, 4), (2, 5)], ["multi"], 0], + ], + ) + def test_loc_getitem_label_list_with_missing(self, key, typs, axes): + self.check_result("loc", key, typs=typs, axes=axes, fails=KeyError) + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result( + "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError + ) + + def test_loc_getitem_label_array_like(self): + # TODO: test something? + # array like + pass + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + + self.check_result("loc", b, typs=["empty"], fails=IndexError) + + @pytest.mark.parametrize( + "slc, typs, axes, fails", + [ + [ + slice(1, 3), + ["labels", "mixed", "empty", "ts", "floats"], + None, + TypeError, + ], + [slice("20130102", "20130104"), ["ts"], 1, TypeError], + [slice(2, 8), ["mixed"], 0, TypeError], + [slice(2, 8), ["mixed"], 1, KeyError], + [slice(2, 4, 2), ["mixed"], 0, TypeError], + ], + ) + def test_loc_getitem_label_slice(self, slc, typs, axes, fails): + + # label slices (with ints) + + # real label slices + + # GH 14316 + + self.check_result( + "loc", + slc, + typs=typs, + axes=axes, + fails=fails, + ) + + def test_setitem_from_duplicate_axis(self): + # GH#34034 + df = DataFrame( + [[20, "a"], [200, "a"], [200, "a"]], + columns=["col1", "col2"], + index=[10, 1, 1], + ) + df.loc[1, "col1"] = np.arange(2) + expected = DataFrame( + [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1] + ) + tm.assert_frame_equal(df, expected) + + def test_column_types_consistent(self): + # GH 26779 + df = DataFrame( + data={ + "channel": [1, 2, 3], + "A": ["String 1", np.NaN, "String 2"], + "B": [ + Timestamp("2019-06-11 11:00:00"), + pd.NaT, + Timestamp("2019-06-11 12:00:00"), + ], + } + ) + df2 = DataFrame( + data={"A": ["String 3"], "B": [Timestamp("2019-06-11 12:00:00")]} + ) + # Change Columns A and B to df2.values wherever Column A is NaN + df.loc[df["A"].isna(), ["A", "B"]] = df2.values + expected = DataFrame( + data={ + "channel": [1, 2, 3], + "A": ["String 1", "String 3", "String 2"], + "B": [ + Timestamp("2019-06-11 11:00:00"), + Timestamp("2019-06-11 12:00:00"), + Timestamp("2019-06-11 12:00:00"), + ], + } + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "obj, key, exp", + [ + ( + DataFrame([[1]], columns=Index([False])), + IndexSlice[:, False], + Series([1], name=False), + ), + (Series([1], index=Index([False])), False, [1]), + (DataFrame([[1]], index=Index([False])), False, Series([1], name=False)), + ], + ) + def test_loc_getitem_single_boolean_arg(self, obj, key, exp): + # GH 44322 + res = obj.loc[key] + if isinstance(exp, (DataFrame, Series)): + tm.assert_equal(res, exp) + else: + assert res == exp + + +class TestLocBaseIndependent: + # Tests for loc that do not depend on subclassing Base + def test_loc_npstr(self): + # GH#45580 + df = DataFrame(index=date_range("2021", "2022")) + result = df.loc[np.array(["2021/6/1"])[0] :] + expected = df.iloc[151:] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "msg, key", + [ + (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), + (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), + (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), + ( + r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", + (Period(2018), Period(2016), "bar"), + ), + (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), + ( + r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", + (Period(2017), "foo", Period(2015)), + ), + (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), + ], + ) + def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): + # GH#20684 + """ + parse_time_string return parameter if type not matched. + PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. + If first argument is Period and a tuple has 3 items, + process go on not raise exception + """ + df = DataFrame( + { + "A": [Period(2019), "x1", "x2"], + "B": [Period(2018), Period(2016), "y1"], + "C": [Period(2017), "z1", Period(2015)], + "V1": [1, 2, 3], + "V2": [10, 20, 30], + } + ).set_index(["A", "B", "C"]) + with pytest.raises(KeyError, match=msg): + df.loc[key] + + def test_loc_getitem_missing_unicode_key(self): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + + def test_loc_getitem_dups(self): + # GH 5678 + # repeated getitems on a dup index returning a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] + ) + expected = df.loc["A", 0] + result = df.loc[:, 0].loc["A"] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame( + [[1, 2, "foo", "bar", Timestamp("20130101")]], + columns=["a", "a", "a", "a", "a"], + index=[1], + ) + expected = Series( + [1, 2, "foo", "bar", Timestamp("20130101")], + index=["a", "a", "a", "a", "a"], + name=1, + ) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + { + "me": list("rttti"), + "foo": list("aaade"), + "bar": np.arange(5, dtype="float64") * 1.34 + 2, + "bar2": np.arange(5, dtype="float64") * -0.34 + 2, + } + ).set_index("me") + + indexer = ( + "r", + ["bar", "bar2"], + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = ( + "r", + "bar", + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] + + indexer = ( + "t", + ["bar", "bar2"], + ) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) + ix = df1["a"] == 1 + newb1 = df1.loc[ix, "b"] + 1 + df1.loc[ix, "b"] = newb1 + expected = DataFrame( + {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} + ) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + ix = df1["a"] == 1 + newb2 = df2.loc[ix, "b"] + df1.loc[ix, "b"] = newb2 + expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + tm.assert_frame_equal(df2, expected) + + def test_loc_setitem_dtype(self): + # GH31340 + df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) + cols = ["a", "b", "c"] + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, cols] = df.loc[:, cols].astype("float32") + + expected = DataFrame( + { + "id": ["A"], + "a": np.array([1.2], dtype="float32"), + "b": np.array([0.0], dtype="float32"), + "c": np.array([-2.5], dtype="float32"), + } + ) # id is inferred as object + + tm.assert_frame_equal(df, expected) + + def test_getitem_label_list_with_missing(self): + s = Series(range(3), index=["a", "b", "c"]) + + # consistency + with pytest.raises(KeyError, match="not in index"): + s[["a", "d"]] + + s = Series(range(3)) + with pytest.raises(KeyError, match="not in index"): + s[[0, 3]] + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_loc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" + with pytest.raises(IndexError, match=msg): + s.loc[index] + + def test_loc_getitem_int_slice(self): + # TODO: test something here? + pass + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame( + np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] + ) + + # raise a KeyError? + msg = ( + r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[1, 2], [1, 2]] + + def test_loc_to_fail2(self): + # GH 7496 + # loc should not fallback + + s = Series(dtype=object) + s.loc[1] = 1 + s.loc["a"] = 2 + + with pytest.raises(KeyError, match=r"^-1$"): + s.loc[-1] + + msg = ( + r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-1, -2]] + + msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\"" + with pytest.raises(KeyError, match=msg): + s.loc[["4"]] + + s.loc[-1] = 3 + with pytest.raises(KeyError, match="not in index"): + s.loc[[-1, -2]] + + s["a"] = 2 + msg = ( + r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] + + del s["a"] + + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] = 0 + + def test_loc_to_fail3(self): + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) + + msg = ( + r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[3], :] + + with pytest.raises(KeyError, match=msg): + df.loc[[3]] + + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" + ), + ): + s.loc[[3]] + + # a non-match and a match + with pytest.raises(KeyError, match="not in index"): + s.loc[[2, 3]] + + def test_loc_index(self): + # gh-17131 + # a boolean index should index like a boolean numpy array + + df = DataFrame( + np.random.random(size=(5, 10)), + index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], + ) + + mask = df.index.map(lambda x: "alpha" in x) + expected = df.loc[np.array(mask)] + + result = df.loc[mask] + tm.assert_frame_equal(result, expected) + + result = df.loc[mask.values] + tm.assert_frame_equal(result, expected) + + result = df.loc[pd.array(mask, dtype="boolean")] + tm.assert_frame_equal(result, expected) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), + columns=["A", "B", "C", "D"], + index=["A", "B", "C", "D"], + ) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + assert (result.columns == ["A", "B"]).all() + assert (result.index == ["A", "B"]).all() + + # mixed type + result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] + expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + @pytest.fixture + def frame_for_consistency(self): + return DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + + @pytest.mark.parametrize( + "val", + [0, np.array(0, dtype=np.int64), np.array([0, 0, 0, 0, 0], dtype=np.int64)], + ) + def test_loc_setitem_consistency(self, frame_for_consistency, val): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(0, index=range(5), dtype=np.int64), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = val + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + + expected = DataFrame( + { + "date": Series("foo", index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = "foo" + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(1.0, index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = frame_for_consistency.copy() + df.loc[:, "date"] = 1.0 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_single_row(self): + # GH 15494 + # setting on frame with single row + df = DataFrame({"date": Series([Timestamp("20180101")])}) + df.loc[:, "date"] = "string" + expected = DataFrame({"date": Series(["string"])}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=["x", "y"]) + expected["x"] = expected["x"].astype(np.int64) + df = DataFrame(columns=["x", "y"]) + with tm.assert_produces_warning(None): + df.loc[:, "x"] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df["x"] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + levels = [ + ["Region_1"] * 4, + ["Site_1", "Site_1", "Site_2", "Site_2"], + [3987227376, 3980680971, 3977723249, 3977723089], + ] + mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"]) + + clevels = [ + ["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"], + ["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"], + ] + cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"]) + + values = [ + ["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan], + ["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"], + ["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan], + ["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"], + ] + df = DataFrame(values, index=mi, columns=cols) + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, ("Respondent", "StartDate")] = to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, ("Respondent", "EndDate")] = to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) + with tm.assert_produces_warning(None, match=msg): + # Adding a new key -> no warning + df.loc[:, ("Respondent", "Duration")] = ( + df.loc[:, ("Respondent", "EndDate")] + - df.loc[:, ("Respondent", "StartDate")] + ) + + with tm.assert_produces_warning(None, match=msg): + # timedelta64[s] -> float64, so this cannot be done inplace, so + # no warning + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ].astype("timedelta64[s]") + + expected = Series( + [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") + ) + tm.assert_series_equal(df[("Respondent", "Duration")], expected) + + @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) + def test_loc_assign_non_ns_datetime(self, unit): + # GH 27395, non-ns dtype assignment via .loc should work + # and return the same result when using simple assignment + df = DataFrame( + { + "timestamp": [ + np.datetime64("2017-02-11 12:41:29"), + np.datetime64("1991-11-07 04:22:37"), + ] + } + ) + + df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") + df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") + expected = Series(df.loc[:, "expected"], name=unit) + tm.assert_series_equal(df.loc[:, unit], expected) + + def test_loc_modify_datetime(self): + # see gh-28837 + df = DataFrame.from_dict( + {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} + ) + + df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True) + + df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] + df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] + + expected = DataFrame( + [ + [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"], + [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"], + [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"], + [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"], + ], + columns=["date", "date_dt", "date_dt_cp"], + ) + + columns = ["date_dt", "date_dt_cp"] + expected[columns] = expected[columns].apply(to_datetime) + + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_reindex(self): + # GH#6254 setting issue + df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float) + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + + # setting integer values into a float dataframe with loc is inplace, + # so we retain float dtype + ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float) + expected = DataFrame({"A": ser}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_reindex_mixed(self): + # GH#40480 + df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) + df["B"] = "string" + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") + expected = DataFrame({"A": ser}) + expected["B"] = "string" + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_with_inverted_slice(self): + # GH#40480 + df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) + df["B"] = "string" + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") + expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_frame(self): + # GH#6252 setting with an empty frame + keys1 = ["@" + str(i) for i in range(5)] + val1 = np.arange(5, dtype="int64") + + keys2 = ["@" + str(i) for i in range(4)] + val2 = np.arange(4, dtype="int64") + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df["A"] = np.nan + df.loc[keys1, "A"] = val1 + + df["B"] = np.nan + df.loc[keys2, "B"] = val2 + + # Because df["A"] was initialized as float64, setting values into it + # is inplace, so that dtype is retained + sera = Series(val1, index=keys1, dtype=np.float64) + serb = Series(val2, index=keys2) + expected = DataFrame({"A": sera, "B": serb}).reindex(index=index) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame(self): + df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) + + result = df.iloc[0, 0] + + df.loc["a", "A"] = 1 + result = df.loc["a", "A"] + assert result == 1 + + result = df.iloc[0, 0] + assert result == 1 + + df.loc[:, "B":"D"] = 0 + expected = df.loc[:, "B":"D"] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_frame_nan_int_coercion_invalid(self): + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({"A": [1, 2, 3], "B": np.nan}) + df.loc[df.B > df.A, "B"] = df.A + expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_mixed_labels(self): + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) + + result = df.loc[0, [1, 2]] + expected = Series( + [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0 + ) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame( + {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} + ) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame( + {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} + ) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + expected = DataFrame( + { + "date": [ + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000103"), + ], + "val": Series([0, 1, 0, 1, 2], dtype=np.int64), + } + ) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "A", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.loc[0, indexer] = value + result = df.loc[0, "A"] + + assert is_scalar(result) and result == "Z" + + @pytest.mark.parametrize( + "index,box,expected", + [ + ( + ([0, 2], ["A", "B", "C", "D"]), + 7, + DataFrame( + [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["C", "D"]), + [7, 8], + DataFrame( + [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["A", "B", "C"]), + np.array([7, 8, 9], dtype=np.int64), + DataFrame( + [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"] + ), + ), + ( + (slice(1, 3, None), ["B", "C", "D"]), + [[7, 8, 9], [10, 11, 12]], + DataFrame( + [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(1, 3, None), ["C", "A", "D"]), + np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), + DataFrame( + [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(None, None, None), ["A", "C"]), + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_loc_setitem_missing_columns(self, index, box, expected): + # GH 29334 + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + + warn = None + if isinstance(index[0], slice) and index[0] == slice(None): + warn = DeprecationWarning + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(warn, match=msg): + df.loc[index] = box + tm.assert_frame_equal(df, expected) + + def test_loc_coercion(self): + + # GH#12411 + df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_coercion2(self): + # GH#12045 + import datetime + + df = DataFrame( + {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} + ) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_coercion3(self): + # GH#11594 + df = DataFrame({"text": ["some words"] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_setitem_new_key_tz(self, indexer_sl): + # GH#12862 should not raise on assigning the second value + vals = [ + to_datetime(42).tz_localize("UTC"), + to_datetime(666).tz_localize("UTC"), + ] + expected = Series(vals, index=["foo", "bar"]) + + ser = Series(dtype=object) + indexer_sl(ser)["foo"] = vals[0] + indexer_sl(ser)["bar"] = vals[1] + + tm.assert_series_equal(ser, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise because the we are non monotonic + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ) + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:] + msg = "'Cannot get left slice bound for non-unique label: 0'" + with pytest.raises(KeyError, match=msg): + df.loc[0:] + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:2] + + # monotonic are ok + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.arm_slow + @pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]]) + def test_loc_non_unique_memory_error(self, length, l2): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list("ABCDEFG") + + df = pd.concat( + [ + DataFrame( + np.random.randn(length, len(columns)), + index=np.arange(length), + columns=columns, + ), + DataFrame(np.ones((l2, len(columns))), index=[0] * l2, columns=columns), + ] + ) + + assert df.index.is_unique is False + + mask = np.arange(l2) + result = df.loc[mask] + expected = pd.concat( + [ + df.take([0]), + DataFrame( + np.ones((len(mask), len(columns))), + index=[0] * len(mask), + columns=columns, + ), + df.take(mask[1:]), + ] + ) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = "index_name" + result = df.iloc[[0, 1]].index.name + assert result == "index_name" + + result = df.loc[[0, 1]].index.name + assert result == "index_name" + + def test_loc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object( + self, using_array_manager, request, using_copy_on_write + ): + # GH13873 + if using_array_manager: + mark = pytest.mark.xfail( + reason="setting with .loc[:, 'a'] does not alter inplace" + ) + request.node.add_marker(mark) + + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values) + + # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig + # depending on CoW + original_df.loc[:, "a"] = [4, 4, 4] + if using_copy_on_write: + assert (sliced_df["a"] == [1, 2, 3]).all() + else: + assert (sliced_df["a"] == 4).all() + + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + if using_copy_on_write: + assert all(sliced_series[:3] == [1, 2, 3]) + else: + assert all(sliced_series[:3] == [7, 8, 9]) + + @pytest.mark.xfail(reason="accidental fix reverted - GH37497") + def test_loc_copy_vs_view(self): + # GH 15631 + x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) + + y = x.copy() + q = y.loc[:, "a"] + q += 2 + + tm.assert_frame_equal(x, y) + + z = x.copy() + q = z.loc[x.index, "a"] + q += 2 + + tm.assert_frame_equal(x, z) + + def test_loc_uint64(self): + # GH20722 + # Test whether loc accept uint64 max value as index. + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + result = ser.loc[umax - 1] + expected = ser.iloc[0] + assert result == expected + + result = ser.loc[[umax - 1]] + expected = ser.iloc[[0]] + tm.assert_series_equal(result, expected) + + result = ser.loc[[umax - 1, umax]] + tm.assert_series_equal(result, ser) + + def test_loc_uint64_disallow_negative(self): + # GH#41775 + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[-1] + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[[-1]] + + # FIXME: warning issued here is false-positive + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") + def test_loc_setitem_empty_append_expands_rows(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + # appends to fit length of data + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + # FIXME: warning issued here is false-positive + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") + def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): + # GH#37932 same as test_loc_setitem_empty_append_expands_rows + # but with mixed dtype so we go through take_split_path + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + df = DataFrame(columns=["x", "y"]) + df["x"] = df["x"].astype(np.int64) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_single_value(self): + # only appends one value + expected = DataFrame({"x": [1.0], "y": [np.nan]}) + df = DataFrame(columns=["x", "y"], dtype=float) + df.loc[0, "x"] = expected.loc[0, "x"] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_raises(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2] + df = DataFrame(columns=["x", "y"]) + df.index = df.index.astype(np.int64) + msg = ( + r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + r"are in the \[index\]" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[0, 1], "x"] = data + + msg = "|".join( + [ + "cannot copy sequence with size 2 to array axis with dimension 0", + r"could not broadcast input array from shape \(2,\) into shape \(0,\)", + "Must have equal len keys and value when setting with an iterable", + ] + ) + with pytest.raises(ValueError, match=msg): + df.loc[0:2, "x"] = data + + def test_indexing_zerodim_np_array(self): + # GH24924 + df = DataFrame([[1, 2], [3, 4]]) + result = df.loc[np.array(0)] + s = Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24924 + s = Series([1, 2]) + result = s.loc[np.array(0)] + assert result == 1 + + def test_loc_reverse_assignment(self): + # GH26939 + data = [1, 2, 3, 4, 5, 6] + [None] * 4 + expected = Series(data, index=range(2010, 2020)) + + result = Series(index=range(2010, 2020), dtype=np.float64) + result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] + + tm.assert_series_equal(result, expected) + + def test_loc_setitem_str_to_small_float_conversion_type(self): + # GH#20388 + np.random.seed(13) + col_data = [str(np.random.random() * 1e-12) for _ in range(5)] + result = DataFrame(col_data, columns=["A"]) + expected = DataFrame(col_data, columns=["A"], dtype=object) + tm.assert_frame_equal(result, expected) + + # assigning with loc/iloc attempts to set the values inplace, which + # in this case is successful + result.loc[result.index, "A"] = [float(x) for x in col_data] + expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object) + tm.assert_frame_equal(result, expected) + + # assigning the entire column using __setitem__ swaps in the new array + # GH#??? + result["A"] = [float(x) for x in col_data] + expected = DataFrame(col_data, columns=["A"], dtype=float) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_time_object(self, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + mask = (rng.hour == 9) & (rng.minute == 30) + + obj = DataFrame(np.random.randn(len(rng), 3), index=rng) + obj = tm.get_obj(obj, frame_or_series) + + result = obj.loc[time(9, 30)] + exp = obj.loc[mask] + tm.assert_equal(result, exp) + + chunk = obj.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result[-1:] + + # Without resetting the freqs, these are 5 min and 1440 min, respectively + result.index = result.index._with_freq(None) + expected.index = expected.index._with_freq(None) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) + @td.skip_if_no_scipy + @pytest.mark.filterwarnings( + # TODO(2.0): remove filtering; note only needed for using_array_manager + "ignore:The behavior of .astype from SparseDtype.*FutureWarning" + ) + def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): + import scipy.sparse + + spmatrix_t = getattr(scipy.sparse, spmatrix_t) + + # The bug is triggered by a sparse matrix with purely sparse columns. So the + # recipe below generates a rectangular matrix of dimension (5, 7) where all the + # diagonal cells are ones, meaning the last two columns are purely sparse. + rows, cols = 5, 7 + spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) + df = DataFrame.sparse.from_spmatrix(spmatrix) + + # regression test for GH#34526 + itr_idx = range(2, rows) + result = df.loc[itr_idx].values + expected = spmatrix.toarray()[itr_idx] + tm.assert_numpy_array_equal(result, expected) + + # regression test for GH#34540 + result = df.loc[itr_idx].dtypes.values + expected = np.full(cols, SparseDtype(dtype, fill_value=0)) + tm.assert_numpy_array_equal(result, expected) + + def test_loc_getitem_listlike_all_retains_sparse(self): + df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) + result = df.loc[[0, 1]] + tm.assert_frame_equal(result, df) + + @td.skip_if_no_scipy + def test_loc_getitem_sparse_frame(self): + # GH34687 + from scipy.sparse import eye + + df = DataFrame.sparse.from_spmatrix(eye(5)) + result = df.loc[range(2)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]], + dtype=SparseDtype("float64", 0.0), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[range(2)].loc[range(1)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0) + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_sparse_series(self): + # GH34687 + s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) + + result = s.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = s.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("indexer", ["loc", "iloc"]) + def test_getitem_single_row_sparse_df(self, indexer): + # GH#46406 + df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float)) + result = getattr(df, indexer)[0] + expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) + def test_loc_getitem_iterable(self, float_frame, key_type): + idx = key_type(["A", "B", "C"]) + result = float_frame.loc[:, idx] + expected = float_frame.loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_timedelta_0seconds(self): + # GH#10583 + df = DataFrame(np.random.normal(size=(10, 4))) + df.index = timedelta_range(start="0s", periods=10, freq="s") + expected = df.loc[Timedelta("0s") :, :] + result = df.loc["0s":, :] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "val,expected", [(2**63 - 1, Series([1])), (2**63, Series([2]))] + ) + def test_loc_getitem_uint64_scalar(self, val, expected): + # see GH#19399 + df = DataFrame([1, 2], index=[2**63 - 1, 2**63]) + result = df.loc[val] + + expected.name = val + tm.assert_series_equal(result, expected) + + def test_loc_setitem_int_label_with_float64index(self): + # note labels are floats + ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) + expected = ser.copy() + + ser.loc[1] = "zoo" + expected.iloc[2] = "zoo" + + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "indexer, expected", + [ + # The test name is a misnomer in the 0 case as df.index[indexer] + # is a scalar. + (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), + ], + ) + def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): + # GH#16637 + tdi = to_timedelta(range(10), unit="s") + df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) + + df.loc[df.index[indexer], "x"] = 20 + + expected = DataFrame( + expected, + index=tdi, + columns=["x"], + dtype="int64", + ) + + tm.assert_frame_equal(expected, df) + + def test_loc_setitem_categorical_values_partial_column_slice(self): + # Assigning a Category to parts of a int/... column uses the values of + # the Categorical + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) + exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_single_row_categorical(self): + # GH#25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + + def test_loc_setitem_datetime_coercion(self): + # GH#1048 + df = DataFrame({"c": [Timestamp("2010-10-01")] * 3}) + df.loc[0:1, "c"] = np.datetime64("2008-08-08") + assert Timestamp("2008-08-08") == df.loc[0, "c"] + assert Timestamp("2008-08-08") == df.loc[1, "c"] + df.loc[2, "c"] = date(2005, 5, 5) + with tm.assert_produces_warning(FutureWarning): + # Comparing Timestamp to date obj is deprecated + assert Timestamp("2005-05-05") == df.loc[2, "c"] + assert Timestamp("2005-05-05").date() == df.loc[2, "c"] + + @pytest.mark.parametrize("idxer", ["var", ["var"]]) + def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): + # GH#11365 + tz = tz_naive_fixture + idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz) + expected = DataFrame(1.2, index=idx, columns=["var"]) + # if result started off with object dtype, then the .loc.__setitem__ + # below would retain object dtype + result = DataFrame(index=idx, columns=["var"], dtype=np.float64) + result.loc[:, idxer] = expected + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_time_key(self, using_array_manager): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + ainds = [24, 72, 120, 168] + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.copy() + result.loc[akey] = 0 + result = result.loc[akey] + expected = df.loc[akey].copy() + expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[akey] = 0 + result.loc[akey] = df.iloc[ainds] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[bkey] = 0 + result = result.loc[bkey] + expected = df.loc[bkey].copy() + expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[bkey] = 0 + result.loc[bkey] = df.iloc[binds] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("key", ["A", ["A"], ("A", slice(None))]) + def test_loc_setitem_unsorted_multiindex_columns(self, key): + # GH#38601 + mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + obj = df.copy() + obj.loc[:, key] = np.zeros((2, 2), dtype=int) + expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi) + tm.assert_frame_equal(obj, expected) + + df = df.sort_index(axis=1) + df.loc[:, key] = np.zeros((2, 2), dtype=int) + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_uint_drop(self, any_int_numpy_dtype): + # see GH#18311 + # assigning series.loc[0] = 4 changed series.dtype to int + series = Series([1, 2, 3], dtype=any_int_numpy_dtype) + series.loc[0] = 4 + expected = Series([4, 2, 3], dtype=any_int_numpy_dtype) + tm.assert_series_equal(series, expected) + + def test_loc_setitem_td64_non_nano(self): + # GH#14155 + ser = Series(10 * [np.timedelta64(10, "m")]) + ser.loc[[1, 2, 3]] = np.timedelta64(20, "m") + expected = Series(10 * [np.timedelta64(10, "m")]) + expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m")) + tm.assert_series_equal(ser, expected) + + def test_loc_setitem_2d_to_1d_raises(self): + data = np.random.randn(2, 2) + ser = Series(range(2)) + + msg = "|".join( + [ + r"shape mismatch: value array of shape \(2,2\)", + r"cannot reshape array of size 4 into shape \(2,\)", + ] + ) + with pytest.raises(ValueError, match=msg): + ser.loc[range(2)] = data + + msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" + with pytest.raises(ValueError, match=msg): + ser.loc[:] = data + + def test_loc_getitem_interval_index(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3) + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + expected = 1 + result = df.loc[0.5, "A"] + tm.assert_almost_equal(result, expected) + + def test_loc_getitem_interval_index2(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3, closed="both") + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both") + expected = Series([1, 4], index=index_exp, name="A") + result = df.loc[1, "A"] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) + def test_loc_getitem_index_single_double_tuples(self, tpl): + # GH#20991 + idx = Index( + [(1,), (1, 2)], + name="A", + tupleize_cols=False, + ) + df = DataFrame(index=idx) + + result = df.loc[[tpl]] + idx = Index([tpl], name="A", tupleize_cols=False) + expected = DataFrame(index=idx) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_index_namedtuple(self): + IndexType = namedtuple("IndexType", ["a", "b"]) + idx1 = IndexType("foo", "bar") + idx2 = IndexType("baz", "bof") + index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) + df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + + result = df.loc[IndexType("foo", "bar")]["A"] + assert result == 1 + + def test_loc_setitem_single_column_mixed(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + df["str"] = "qux" + df.loc[df.index[::2], "str"] = np.nan + expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) + tm.assert_almost_equal(df["str"].values, expected) + + def test_loc_setitem_cast2(self): + # GH#7704 + # dtype conversion on setting + df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) + df["event"] = np.nan + df.loc[10, "event"] = "foo" + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("object")], + index=["A", "B", "C", "event"], + ) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_cast3(self): + # Test that data type is preserved . GH#5782 + df = DataFrame({"one": np.arange(6, dtype=np.int8)}) + df.loc[1, "one"] = 6 + assert df.dtypes.one == np.dtype(np.int8) + df.one = np.int8(7) + assert df.dtypes.one == np.dtype(np.int8) + + def test_loc_setitem_range_key(self, frame_or_series): + # GH#45479 don't treat range key as positional + obj = frame_or_series(range(5), index=[3, 4, 1, 0, 2]) + + values = [9, 10, 11] + if obj.ndim == 2: + values = [[9], [10], [11]] + + obj.loc[range(3)] = values + + expected = frame_or_series([0, 1, 10, 9, 11], index=obj.index) + tm.assert_equal(obj, expected) + + +class TestLocWithEllipsis: + @pytest.fixture(params=[tm.loc, tm.iloc]) + def indexer(self, request): + # Test iloc while we're here + return request.param + + @pytest.fixture + def obj(self, series_with_simple_index, frame_or_series): + obj = series_with_simple_index + if frame_or_series is not Series: + obj = obj.to_frame() + return obj + + def test_loc_iloc_getitem_ellipsis(self, obj, indexer): + result = indexer(obj)[...] + tm.assert_equal(result, obj) + + def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer): + obj = series_with_simple_index + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + if indexer is tm.loc and obj.index.is_boolean(): + # passing [False] will get interpreted as a boolean mask + # TODO: should it? unambiguous when lengths dont match? + return + if indexer is tm.loc and isinstance(obj.index, MultiIndex): + msg = "MultiIndex does not support indexing with Ellipsis" + with pytest.raises(NotImplementedError, match=msg): + result = indexer(obj)[..., [key]] + + elif len(obj) != 0: + result = indexer(obj)[..., [key]] + expected = indexer(obj)[[key]] + tm.assert_series_equal(result, expected) + + key2 = 0 if indexer is tm.iloc else obj.name + df = obj.to_frame() + result = indexer(df)[..., [key2]] + expected = indexer(df)[:, [key2]] + tm.assert_frame_equal(result, expected) + + def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer): + # GH37750 + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., [key], ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ..., key] + + # one_ellipsis_message takes precedence over "Too many indexers" + # only when the first key is Ellipsis + with pytest.raises(IndexingError, match="Too many indexers"): + indexer(obj)[key, ..., ...] + + +class TestLocWithMultiIndex: + @pytest.mark.parametrize( + "keys, expected", + [ + (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]), + (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]), + ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]), + ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]), + ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]), + ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]), + ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]), + ], + ) + @pytest.mark.parametrize("dim", ["index", "columns"]) + def test_loc_getitem_multilevel_index_order(self, dim, keys, expected): + # GH#22797 + # Try to respect order of keys given for MultiIndex.loc + kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]} + df = DataFrame(np.arange(25).reshape(5, 5), **kwargs) + exp_index = MultiIndex.from_arrays(expected) + if dim == "index": + res = df.loc[keys, :] + tm.assert_index_equal(res.index, exp_index) + elif dim == "columns": + res = df.loc[:, keys] + tm.assert_index_equal(res.columns, exp_index) + + def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.loc[2000] + result2 = ymd["A"].loc[2000] + assert result.index.names == ymd.index.names[1:] + assert result2.index.names == ymd.index.names[1:] + + result = ymd.loc[2000, 2] + result2 = ymd["A"].loc[2000, 2] + assert result.index.name == ymd.index.names[2] + assert result2.index.name == ymd.index.names[2] + + def test_loc_getitem_multiindex_nonunique_len_zero(self): + # GH#13691 + mi = MultiIndex.from_product([[0], [1, 1]]) + ser = Series(0, index=mi) + + res = ser.loc[[]] + + expected = ser[:0] + tm.assert_series_equal(res, expected) + + res2 = ser.loc[ser.iloc[0:0]] + tm.assert_series_equal(res2, expected) + + def test_loc_getitem_access_none_value_in_multiindex(self): + # GH#34318: test that you can access a None value using .loc + # through a Multiindex + + ser = Series([None], MultiIndex.from_arrays([["Level1"], ["Level2"]])) + result = ser.loc[("Level1", "Level2")] + assert result is None + + midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]]) + ser = Series([None] * len(midx), dtype=object, index=midx) + result = ser.loc[("Level1", "Level2_a")] + assert result is None + + ser = Series([1] * len(midx), dtype=object, index=midx) + result = ser.loc[("Level1", "Level2_a")] + assert result == 1 + + def test_loc_setitem_multiindex_slice(self): + # GH 34870 + + index = MultiIndex.from_tuples( + zip( + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ), + names=["first", "second"], + ) + + result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) + result.loc[("baz", "one"):("foo", "two")] = 100 + + expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): + times = date_range("2000-01-01", freq="10min", periods=100000) + ser = Series(range(100000), times) + result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + tm.assert_series_equal(result, ser) + + def test_loc_getitem_datetime_string_with_datetimeindex(self): + # GH 16710 + df = DataFrame( + {"a": range(10), "b": range(10)}, + index=date_range("2010-01-01", "2010-01-10"), + ) + result = df.loc[["2010-01-01", "2010-01-05"], ["a", "b"]] + expected = DataFrame( + {"a": [0, 4], "b": [0, 4]}, + index=DatetimeIndex(["2010-01-01", "2010-01-05"]), + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_sorted_index_level_with_duplicates(self): + # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes + mi = MultiIndex.from_tuples( + [ + ("foo", "bar"), + ("foo", "bar"), + ("bah", "bam"), + ("bah", "bam"), + ("foo", "bar"), + ("bah", "bam"), + ], + names=["A", "B"], + ) + df = DataFrame( + [ + [1.0, 1], + [2.0, 2], + [3.0, 3], + [4.0, 4], + [5.0, 5], + [6.0, 6], + ], + index=mi, + columns=["C", "D"], + ) + df = df.sort_index(level=0) + + expected = DataFrame( + [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4]) + ) + + result = df.loc[("foo", "bar")] + tm.assert_frame_equal(result, expected) + + def test_additional_element_to_categorical_series_loc(self): + # GH#47677 + result = Series(["a", "b", "c"], dtype="category") + result.loc[3] = 0 + expected = Series(["a", "b", "c", 0], dtype="object") + tm.assert_series_equal(result, expected) + + def test_additional_categorical_element_loc(self): + # GH#47677 + result = Series(["a", "b", "c"], dtype="category") + result.loc[3] = "a" + expected = Series(["a", "b", "c", "a"], dtype="category") + tm.assert_series_equal(result, expected) + + def test_loc_set_nan_in_categorical_series(self, any_numeric_ea_dtype): + # GH#47677 + srs = Series( + [1, 2, 3], + dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)), + ) + # enlarge + srs.loc[3] = np.nan + expected = Series( + [1, 2, 3, np.nan], + dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)), + ) + tm.assert_series_equal(srs, expected) + # set into + srs.loc[1] = np.nan + expected = Series( + [1, np.nan, 3, np.nan], + dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)), + ) + tm.assert_series_equal(srs, expected) + + @pytest.mark.parametrize("na", (np.nan, pd.NA, None, pd.NaT)) + def test_loc_consistency_series_enlarge_set_into(self, na): + # GH#47677 + srs_enlarge = Series(["a", "b", "c"], dtype="category") + srs_enlarge.loc[3] = na + + srs_setinto = Series(["a", "b", "c", "a"], dtype="category") + srs_setinto.loc[3] = na + + tm.assert_series_equal(srs_enlarge, srs_setinto) + expected = Series(["a", "b", "c", na], dtype="category") + tm.assert_series_equal(srs_enlarge, expected) + + def test_loc_getitem_preserves_index_level_category_dtype(self): + # GH#15166 + df = DataFrame( + data=np.arange(2, 22, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], + names=["Index1", "Index2"], + ), + ) + + expected = CategoricalIndex( + ["a", "b"], + categories=["a", "b"], + ordered=False, + name="Index1", + dtype="category", + ) + + result = df.index.levels[0] + tm.assert_index_equal(result, expected) + + result = df.loc[["a"]].index.levels[0] + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("lt_value", [30, 10]) + def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value): + # GH#41170 + df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]]) + with pytest.raises(KeyError, match=r"\['b'\] not in index"): + df.loc[df["a"] < lt_value, :].loc[["b"], :] + + def test_loc_multiindex_null_slice_na_level(self): + # GH#42055 + lev1 = np.array([np.nan, np.nan]) + lev2 = ["bar", "baz"] + mi = MultiIndex.from_arrays([lev1, lev2]) + ser = Series([0, 1], index=mi) + result = ser.loc[:, "bar"] + + # TODO: should we have name="bar"? + expected = Series([0], index=[np.nan]) + tm.assert_series_equal(result, expected) + + def test_loc_drops_level(self): + # Based on test_series_varied_multiindex_alignment, where + # this used to fail to drop the first level + mi = MultiIndex.from_product( + [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"] + ) + ser = Series(range(8), index=mi) + + loc_result = ser.loc["a", :, :] + expected = ser.index.droplevel(0)[:4] + tm.assert_index_equal(loc_result.index, expected) + + +class TestLocSetitemWithExpansion: + @pytest.mark.slow + def test_loc_setitem_with_expansion_large_dataframe(self): + # GH#10692 + result = DataFrame({"x": range(10**6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10**6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_empty_series(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc[1] = 1 + tm.assert_series_equal(ser, Series([1], index=[1])) + ser.loc[3] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) + + def test_loc_setitem_empty_series_float(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc[1] = 1.0 + tm.assert_series_equal(ser, Series([1.0], index=[1])) + ser.loc[3] = 3.0 + tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) + + def test_loc_setitem_empty_series_str_idx(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc["foo"] = 1 + tm.assert_series_equal(ser, Series([1], index=["foo"])) + ser.loc["bar"] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) + ser.loc[3] = 4 + tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) + + def test_loc_setitem_incremental_with_dst(self): + # GH#20724 + base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "conv", + [ + lambda x: x, + lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), + lambda x: np.datetime64(x), + ], + ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"], + ) + def test_loc_setitem_datetime_keys_cast(self, conv): + # GH#9516 + dt1 = Timestamp("20130101 09:00:00") + dt2 = Timestamp("20130101 10:00:00") + df = DataFrame() + df.loc[conv(dt1), "one"] = 100 + df.loc[conv(dt2), "one"] = 200 + + expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_categorical_column_retains_dtype(self, ordered): + # GH16360 + result = DataFrame({"A": [1]}) + result.loc[:, "B"] = Categorical(["b"], ordered=ordered) + expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) + tm.assert_frame_equal(result, expected) + + def test_loc_setitem_with_expansion_and_existing_dst(self): + # GH#18308 + start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") + end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") + ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") + idx = date_range(start, end, inclusive="left", freq="H") + assert ts not in idx # i.e. result.loc setitem is with-expansion + + result = DataFrame(index=idx, columns=["value"]) + result.loc[ts, "value"] = 12 + expected = DataFrame( + [np.nan] * len(idx) + [12], + index=idx.append(DatetimeIndex([ts])), + columns=["value"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_setitem_with_expansion(self): + # indexing - setting an element + df = DataFrame( + data=to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), + columns=["time"], + ) + df["new_col"] = ["new", "old"] + df.time = df.set_index("time").index.tz_localize("UTC") + v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") + + # trying to set a single element on a part of a different timezone + # this converts to object + df2 = df.copy() + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + df2.loc[df2.new_col == "new", "time"] = v + + expected = Series([v[0], df.loc[1, "time"]], name="time") + tm.assert_series_equal(df2.time, expected) + + v = df.loc[df.new_col == "new", "time"] + Timedelta("1s") + df.loc[df.new_col == "new", "time"] = v + tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) + + def test_loc_setitem_with_expansion_inf_upcast_empty(self): + # Test with np.inf in columns + df = DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:indexing past lexsort depth") + def test_loc_setitem_with_expansion_nonunique_index(self, index): + # GH#40096 + if not len(index): + return + + index = index.repeat(2) # ensure non-unique + N = len(index) + arr = np.arange(N).astype(np.int64) + + orig = DataFrame(arr, index=index, columns=[0]) + + # key that will requiring object-dtype casting in the index + key = "kapow" + assert key not in index # otherwise test is invalid + # TODO: using a tuple key breaks here in many cases + + exp_index = index.insert(len(index), key) + if isinstance(index, MultiIndex): + assert exp_index[-1][0] == key + else: + assert exp_index[-1] == key + exp_data = np.arange(N + 1).astype(np.float64) + expected = DataFrame(exp_data, index=exp_index, columns=[0]) + + # Add new row, but no new columns + df = orig.copy() + df.loc[key, 0] = N + tm.assert_frame_equal(df, expected) + + # add new row on a Series + ser = orig.copy()[0] + ser.loc[key] = N + # the series machinery lets us preserve int dtype instead of float + expected = expected[0].astype(np.int64) + tm.assert_series_equal(ser, expected) + + # add new row and new column + df = orig.copy() + df.loc[key, 1] = N + expected = DataFrame( + {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]}, + index=exp_index, + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"] + ) + def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype): + # GH#42099 + ser = Series([0, 1, 2, 3], dtype=dtype) + df = DataFrame({"data": ser}) + + result = DataFrame(index=df.index) + result.loc[df.index, "data"] = ser + + tm.assert_frame_equal(result, df) + + result = DataFrame(index=df.index) + result.loc[df.index, "data"] = ser._values + tm.assert_frame_equal(result, df) + + +class TestLocCallable: + def test_frame_loc_getitem_callable(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_getitem_callable_mixture(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_getitem_callable_labels(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + +class TestPartialStringSlicing: + def test_loc_getitem_partial_string_slicing_datetimeindex(self): + # GH#35509 + df = DataFrame( + {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, + index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), + ) + expected = DataFrame( + {"col1": ["a", "c"], "col2": [1, 3]}, + index=to_datetime(["2020-08-01", "2020-08-05"]), + ) + result = df.loc["2020-08"] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_periodindex(self): + pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = pi.to_series() + result = ser.loc[:"2017-12"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_str_timedeltaindex(self): + # GH#16896 + df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) + expected = df.iloc[0] + sliced = df.loc["0 days"] + tm.assert_series_equal(sliced, expected) + + @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) + def test_loc_getitem_partial_slice_non_monotonicity( + self, tz_aware_fixture, indexer_end, frame_or_series + ): + # GH#33146 + obj = frame_or_series( + [1] * 5, + index=DatetimeIndex( + [ + Timestamp("2019-12-30"), + Timestamp("2020-01-01"), + Timestamp("2019-12-25"), + Timestamp("2020-01-02 23:59:59.999999999"), + Timestamp("2019-12-19"), + ], + tz=tz_aware_fixture, + ), + ) + expected = frame_or_series( + [1] * 2, + index=DatetimeIndex( + [ + Timestamp("2020-01-01"), + Timestamp("2020-01-02 23:59:59.999999999"), + ], + tz=tz_aware_fixture, + ), + ) + indexer = slice("2020-01-01", indexer_end) + + result = obj[indexer] + tm.assert_equal(result, expected) + + result = obj.loc[indexer] + tm.assert_equal(result, expected) + + +class TestLabelSlicing: + def test_loc_getitem_slicing_datetimes_frame(self): + # GH#7523 + + # unique + df_unique = DataFrame( + np.arange(4.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]], + ) + + # duplicates + df_dups = DataFrame( + np.arange(5.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], + ) + + for df in [df_unique, df_dups]: + result = df.loc[datetime(2001, 1, 1, 10) :] + tm.assert_frame_equal(result, df) + result = df.loc[: datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11) :] + expected = df.iloc[1:] + tm.assert_frame_equal(result, expected) + result = df.loc["20010101 11":] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_label_slice_across_dst(self): + # GH#21846 + idx = date_range( + "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" + ) + series2 = Series([0, 1, 2, 3, 4], index=idx) + + t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin") + t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin") + result = series2.loc[t_1:t_2] + expected = Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + @pytest.mark.parametrize( + "index", + [ + pd.period_range(start="2017-01-01", end="2018-01-01", freq="M"), + timedelta_range(start="1 day", end="2 days", freq="1H"), + ], + ) + def test_loc_getitem_label_slice_period_timedelta(self, index): + ser = index.to_series() + result = ser.loc[: index[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_floats_inexact(self): + index = [52195.504153, 52196.303147, 52198.369883] + df = DataFrame(np.random.rand(3, 2), index=index) + + s1 = df.loc[52195.1:52196.5] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52196.6] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52198.9] + assert len(s1) == 3 + + def test_loc_getitem_float_slice_float64index(self): + ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + idx = np.arange(10, 20, dtype=float) + idx[2] = 12.2 + ser.index = idx + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + @pytest.mark.parametrize( + "start,stop, expected_slice", + [ + [np.timedelta64(0, "ns"), None, slice(0, 11)], + [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], + [None, np.timedelta64(4, "D"), slice(0, 5)], + ], + ) + def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): + # GH#20393 + ser = Series(range(11), timedelta_range("0 days", "10 days")) + result = ser.loc[slice(start, stop)] + expected = ser.iloc[expected_slice] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("start", ["2018", "2020"]) + def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start): + obj = frame_or_series( + [1, 2, 3], + index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")], + ) + with tm.assert_produces_warning(FutureWarning): + obj.loc[start:"2022"] + + @pytest.mark.parametrize("value", [1, 1.5]) + def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value): + # GH: 26491 + obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) + result = obj.loc[value:"third"] + expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) + tm.assert_equal(result, expected) + + def test_loc_getitem_slice_columns_mixed_dtype(self): + # GH: 20975 + df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0]) + expected = DataFrame( + data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object) + ) + tm.assert_frame_equal(df.loc[:, 1:], expected) + + +class TestLocBooleanLabelsAndSlices(Base): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_incompatible_index_raises( + self, index, frame_or_series, bool_value + ): + # GH20432 + message = f"{bool_value}: boolean label can not be used without a boolean index" + if index.inferred_type != "boolean": + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(KeyError, match=message): + obj.loc[bool_value] + + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_should_not_raise(self, frame_or_series, bool_value): + obj = frame_or_series( + index=Index([True, False], dtype="boolean"), dtype="object" + ) + obj.loc[bool_value] + + def test_loc_bool_slice_raises(self, index, frame_or_series): + # GH20432 + message = ( + r"slice\(True, False, None\): boolean values can not be used in a slice" + ) + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(TypeError, match=message): + obj.loc[True:False] + + +class TestLocBooleanMask: + def test_loc_setitem_bool_mask_timedeltaindex(self): + # GH#14946 + df = DataFrame({"x": range(10)}) + df.index = to_timedelta(range(10), unit="s") + conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] + expected_data = [ + [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], + ] + for cond, data in zip(conditions, expected_data): + result = df.copy() + result.loc[cond, "x"] = 10 + + expected = DataFrame( + data, + index=to_timedelta(range(10), unit="s"), + columns=["x"], + dtype="int64", + ) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_loc_setitem_mask_with_datetimeindex_tz(self, tz): + # GH#16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range("20010101", periods=4, tz=tz) + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + def test_loc_setitem_mask_and_label_with_datetimeindex(self): + # GH#9478 + # a datetimeindex alignment issue with partial setting + df = DataFrame( + np.arange(6.0).reshape(3, 2), + columns=list("AB"), + index=date_range("1/1/2000", periods=3, freq="1H"), + ) + expected = df.copy() + expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, "C"] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_mask_td64_series_value(self): + # GH#23462 key list of bools, value is a Series + td1 = Timedelta(0) + td2 = Timedelta(28767471428571405) + df = DataFrame({"col": Series([td1, td2])}) + df_copy = df.copy() + ser = Series([td1]) + + expected = df["col"].iloc[1].value + df.loc[[True, False]] = ser + result = df["col"].iloc[1].value + + assert expected == result + tm.assert_frame_equal(df, df_copy) + + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values + def test_loc_setitem_boolean_and_column(self, float_frame): + expected = float_frame.copy() + mask = float_frame["A"] > 0 + + float_frame.loc[mask, "B"] = 0 + expected.values[mask.values, 1] = 0 + + tm.assert_frame_equal(float_frame, expected) + + def test_loc_setitem_ndframe_values_alignment(self, using_copy_on_write): + # GH#45501 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], ["a"]] = DataFrame( + {"a": [10, 20, 30]}, index=[2, 1, 0] + ) + + expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]}) + tm.assert_frame_equal(df, expected) + + # same thing with Series RHS + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0]) + tm.assert_frame_equal(df, expected) + + # same thing but setting "a" instead of ["a"] + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0]) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + ser = df["a"] + ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0]) + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + else: + tm.assert_frame_equal(df, expected) + + +class TestLocListlike: + @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list]) + def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): + # passing a list can include valid categories _or_ NA values + ci = CategoricalIndex(["A", "B", np.nan]) + ser = Series(range(3), index=ci) + + result = ser.loc[box(ci)] + tm.assert_series_equal(result, ser) + + result = ser[box(ci)] + tm.assert_series_equal(result, ser) + + result = ser.to_frame().loc[box(ci)] + tm.assert_frame_equal(result, ser.to_frame()) + + ser2 = ser[:-1] + ci2 = ci[1:] + # but if there are no NAs present, this should raise KeyError + msg = "not in index" + with pytest.raises(KeyError, match=msg): + ser2.loc[box(ci2)] + + with pytest.raises(KeyError, match=msg): + ser2[box(ci2)] + + with pytest.raises(KeyError, match=msg): + ser2.to_frame().loc[box(ci2)] + + def test_loc_getitem_series_label_list_missing_values(self): + # gh-11428 + key = np.array( + ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" + ) + ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) + with pytest.raises(KeyError, match="not in index"): + ser.loc[key] + + def test_loc_getitem_series_label_list_missing_integer_values(self): + # GH: 25927 + ser = Series( + index=np.array([9730701000001104, 10049011000001109]), + data=np.array([999000011000001104, 999000011000001104]), + ) + with pytest.raises(KeyError, match="not in index"): + ser.loc[np.array([9730701000001104, 10047311000001102])] + + @pytest.mark.parametrize("to_period", [True, False]) + def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): + # GH#11497 + + idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") + if to_period: + idx = idx.to_period("D") + ser = Series([0.1, 0.2], index=idx, name="s") + + keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + if to_period: + keys = [x.to_period("D") for x in keys] + result = ser.loc[keys] + exp = Series([0.1, 0.2], index=idx, name="s") + if not to_period: + exp.index = exp.index._with_freq(None) + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-01"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + exp = Series( + [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s" + ) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-03"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + + with pytest.raises(KeyError, match="not in index"): + ser.loc[keys] + + def test_loc_named_index(self): + # GH 42790 + df = DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", "sidewinder"], + columns=["max_speed", "shield"], + ) + expected = df.iloc[:2] + expected.index.name = "foo" + result = df.loc[Index(["cobra", "viper"], name="foo")] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "columns, column_key, expected_columns", + [ + ([2011, 2012, 2013], [2011, 2012], [0, 1]), + ([2011, 2012, "All"], [2011, 2012], [0, 1]), + ([2011, 2012, "All"], [2011, "All"], [0, 2]), + ], +) +def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): + # gh-14836 + df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) + expected = df.iloc[:, expected_columns] + result = df.loc[["A", "B", "C"], column_key] + + tm.assert_frame_equal(result, expected, check_column_type=True) + + +def test_loc_setitem_float_intindex(): + # GH 8720 + rand_data = np.random.randn(8, 4) + result = DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) + expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + tm.assert_frame_equal(result, expected) + + +def test_loc_axis_1_slice(): + # GH 10586 + cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] + df = DataFrame( + np.ones((10, 8)), + index=tuple("ABCDEFGHIJ"), + columns=MultiIndex.from_tuples(cols), + ) + result = df.loc(axis=1)[(2014, 9):(2015, 8)] + expected = DataFrame( + np.ones((10, 4)), + index=tuple("ABCDEFGHIJ"), + columns=MultiIndex.from_tuples([(2014, 9), (2014, 10), (2015, 7), (2015, 8)]), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_set_dataframe_multiindex(): + # GH 14592 + expected = DataFrame( + "a", index=range(2), columns=MultiIndex.from_product([range(2), range(2)]) + ) + result = expected.copy() + result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] + tm.assert_frame_equal(result, expected) + + +def test_loc_mixed_int_float(): + # GH#19456 + ser = Series(range(2), Index([1, 2.0], dtype=object)) + + result = ser.loc[1] + assert result == 0 + + +def test_loc_with_positional_slice_deprecation(): + # GH#31840 + ser = Series(range(4), index=["A", "B", "C", "D"]) + + with tm.assert_produces_warning(FutureWarning): + ser.loc[:3] = 2 + + expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) + tm.assert_series_equal(ser, expected) + + +def test_loc_slice_disallows_positional(): + # GH#16121, GH#24612, GH#31810 + dti = date_range("2016-01-01", periods=3) + df = DataFrame(np.random.random((3, 2)), index=dti) + + ser = df[0] + + msg = ( + "cannot do slice indexing on DatetimeIndex with these " + r"indexers \[1\] of type int" + ) + + for obj in [df, ser]: + with pytest.raises(TypeError, match=msg): + obj.loc[1:3] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + obj.loc[1:3] = 1 + + with pytest.raises(TypeError, match=msg): + df.loc[1:3, 1] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + df.loc[1:3, 1] = 2 + + +def test_loc_datetimelike_mismatched_dtypes(): + # GH#32650 dont mix and match datetime/timedelta/period dtypes + + df = DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique DatetimeIndex + df = df.iloc[[0, 2, 2, 3]].copy() + + dti = df.index + tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values + + msg = r"None of \[TimedeltaIndex.* are in the \[index\]" + with pytest.raises(KeyError, match=msg): + df.loc[tdi] + + with pytest.raises(KeyError, match=msg): + df["a"].loc[tdi] + + +def test_loc_with_period_index_indexer(): + # GH#4125 + idx = pd.period_range("2002-01", "2003-12", freq="M") + df = DataFrame(np.random.randn(24, 10), index=idx) + tm.assert_frame_equal(df, df.loc[idx]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + + +def test_loc_setitem_multiindex_timestamp(): + # GH#13831 + vals = np.random.randn(8, 6) + idx = date_range("1/1/2000", periods=8) + cols = ["A", "B", "C", "D", "E", "F"] + exp = DataFrame(vals, index=idx, columns=cols) + exp.loc[exp.index[1], ("A", "B")] = np.nan + vals[1][0:2] = np.nan + res = DataFrame(vals, index=idx, columns=cols) + tm.assert_frame_equal(res, exp) + + +def test_loc_getitem_multiindex_tuple_level(): + # GH#27591 + lev1 = ["a", "b", "c"] + lev2 = [(0, 1), (1, 0)] + lev3 = [0, 1] + cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"]) + df = DataFrame(6, index=range(5), columns=cols) + + # the lev2[0] here should be treated as a single label, not as a sequence + # of labels + result = df.loc[:, (lev1[0], lev2[0], lev3[0])] + + # TODO: i think this actually should drop levels + expected = df.iloc[:, :1] + tm.assert_frame_equal(result, expected) + + alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1) + tm.assert_frame_equal(alt, expected) + + # same thing on a Series + ser = df.iloc[0] + expected2 = ser.iloc[:1] + + alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0) + tm.assert_series_equal(alt2, expected2) + + result2 = ser.loc[lev1[0], lev2[0], lev3[0]] + assert result2 == 6 + + +def test_loc_getitem_nullable_index_with_duplicates(): + # GH#34497 + df = DataFrame( + data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T, + columns=["a", "b", "c"], + dtype="Int64", + ) + df2 = df.set_index("c") + assert df2.index.dtype == "Int64" + + res = df2.loc[1] + expected = Series([1, 5], index=df2.columns, dtype="Int64", name=1) + tm.assert_series_equal(res, expected) + + # pd.NA and duplicates in an object-dtype Index + df2.index = df2.index.astype(object) + res = df2.loc[1] + tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize("value", [300, np.uint16(300), np.int16(300)]) +def test_loc_setitem_uint8_upcast(value): + # GH#26049 + + df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8") + df.loc[2, "col1"] = value # value that can't be held in uint8 + + expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16") + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (Timestamp("2022-01-06"), "datetime64[ns]"), + (Timestamp("2022-01-07", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + ], +) +def test_loc_setitem_using_datetimelike_str_as_index(fill_val, exp_dtype): + + data = ["2022-01-02", "2022-01-03", "2022-01-04", fill_val.date()] + index = DatetimeIndex(data, tz=fill_val.tz, dtype=exp_dtype) + df = DataFrame([10, 11, 12, 14], columns=["a"], index=index) + # adding new row using an unexisting datetime-like str index + df.loc["2022-01-08", "a"] = 13 + + data.append("2022-01-08") + expected_index = DatetimeIndex(data, dtype=exp_dtype) + tm.assert_index_equal(df.index, expected_index, exact=True) + + +def test_loc_set_int_dtype(): + # GH#23326 + df = DataFrame([list("abc")]) + df.loc[:, "col1"] = 5 + + expected = DataFrame({0: ["a"], 1: ["b"], 2: ["c"], "col1": [5]}) + tm.assert_frame_equal(df, expected) + + +def test_loc_periodindex_3_levels(): + # GH#24091 + p_index = PeriodIndex( + ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"], + name="datetime", + freq="B", + ) + mi_series = DataFrame( + [["A", "B", 1.0], ["A", "C", 2.0], ["Z", "Q", 3.0], ["W", "F", 4.0]], + index=p_index, + columns=["ONE", "TWO", "VALUES"], + ) + mi_series = mi_series.set_index(["ONE", "TWO"], append=True)["VALUES"] + assert mi_series.loc[(p_index[0], "A", "B")] == 1.0 + + +class TestLocSeries: + @pytest.mark.parametrize("val,expected", [(2**63 - 1, 3), (2**63, 4)]) + def test_loc_uint64(self, val, expected): + # see GH#19399 + ser = Series({2**63 - 1: 3, 2**63: 4}) + assert ser.loc[val] == expected + + def test_loc_getitem(self, string_series, datetime_series): + inds = string_series.index[[3, 4, 7]] + tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds)) + tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2]) + + # slice with indices + d1, d2 = datetime_series.index[[5, 15]] + result = datetime_series.loc[d1:d2] + expected = datetime_series.truncate(d1, d2) + tm.assert_series_equal(result, expected) + + # boolean + mask = string_series > string_series.median() + tm.assert_series_equal(string_series.loc[mask], string_series[mask]) + + # ask for index value + assert datetime_series.loc[d1] == datetime_series[d1] + assert datetime_series.loc[d2] == datetime_series[d2] + + def test_loc_getitem_not_monotonic(self, datetime_series): + d1, d2 = datetime_series.index[[5, 15]] + + ts2 = datetime_series[::2][[1, 2, 0]] + + msg = r"Timestamp\('2000-01-10 00:00:00'\)" + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] = 0 + + def test_loc_getitem_setitem_integer_slice_keyerrors(self): + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + + # this is OK + cp = ser.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).all() + + # so is this + cp = ser.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = ser.iloc[2:6] + result2 = ser.loc[3:11] + expected = ser.reindex([4, 6, 8, 10]) + + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + # non-monotonic, raise KeyError + s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] = 0 + + def test_loc_getitem_iterator(self, string_series): + idx = iter(string_series.index[:10]) + result = string_series.loc[idx] + tm.assert_series_equal(result, string_series[:10]) + + def test_loc_setitem_boolean(self, string_series): + mask = string_series > string_series.median() + + result = string_series.copy() + result.loc[mask] = 0 + expected = string_series + expected[mask] = 0 + tm.assert_series_equal(result, expected) + + def test_loc_setitem_corner(self, string_series): + inds = list(string_series.index[[5, 8, 12]]) + string_series.loc[inds] = 5 + msg = r"\['foo'\] not in index" + with pytest.raises(KeyError, match=msg): + string_series.loc[inds + ["foo"]] = 5 + + def test_basic_setitem_with_labels(self, datetime_series): + indices = datetime_series.index[[5, 10, 15]] + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices] = 0 + exp.loc[indices] = 0 + tm.assert_series_equal(cp, exp) + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices[0] : indices[2]] = 0 + exp.loc[indices[0] : indices[2]] = 0 + tm.assert_series_equal(cp, exp) + + def test_loc_setitem_listlike_of_ints(self): + + # integer indexes, be careful + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 4, 6] + arr_inds = np.array([0, 4, 6]) + + cp = ser.copy() + exp = ser.copy() + ser[inds] = 0 + ser.loc[inds] = 0 + tm.assert_series_equal(cp, exp) + + cp = ser.copy() + exp = ser.copy() + ser[arr_inds] = 0 + ser.loc[arr_inds] = 0 + tm.assert_series_equal(cp, exp) + + inds_notfound = [0, 4, 5, 6] + arr_inds_notfound = np.array([0, 4, 5, 6]) + msg = r"\[5\] not in index" + with pytest.raises(KeyError, match=msg): + ser[inds_notfound] = 0 + with pytest.raises(Exception, match=msg): + ser[arr_inds_notfound] = 0 + + def test_loc_setitem_dt64tz_values(self): + # GH#12089 + ser = Series( + date_range("2011-01-01", periods=3, tz="US/Eastern"), + index=["a", "b", "c"], + ) + s2 = ser.copy() + expected = Timestamp("2011-01-03", tz="US/Eastern") + s2.loc["a"] = expected + result = s2.loc["a"] + assert result == expected + + s2 = ser.copy() + s2.iloc[0] = expected + result = s2.iloc[0] + assert result == expected + + s2 = ser.copy() + s2["a"] = expected + result = s2["a"] + assert result == expected + + @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple]) + @pytest.mark.parametrize("size", [0, 4, 5, 6]) + def test_loc_iloc_setitem_with_listlike(self, size, array_fn): + # GH37748 + # testing insertion, in a Series of size N (here 5), of a listlike object + # of size 0, N-1, N, N+1 + + arr = array_fn([0] * size) + expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object) + + ser = Series(0, index=list("abcde"), dtype=object) + ser.loc["a"] = arr + tm.assert_series_equal(ser, expected) + + ser = Series(0, index=list("abcde"), dtype=object) + ser.iloc[0] = arr + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("indexer", [IndexSlice["A", :], ("A", slice(None))]) + def test_loc_series_getitem_too_many_dimensions(self, indexer): + # GH#35349 + ser = Series( + index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]), + data=[21, 22, 23], + ) + msg = "Too many indexers" + with pytest.raises(IndexingError, match=msg): + ser.loc[indexer, :] + + with pytest.raises(IndexingError, match=msg): + ser.loc[indexer, :] = 1 + + def test_loc_setitem(self, string_series): + inds = string_series.index[[3, 4, 7]] + + result = string_series.copy() + result.loc[inds] = 5 + + expected = string_series.copy() + expected[[3, 4, 7]] = 5 + tm.assert_series_equal(result, expected) + + result.iloc[5:10] = 10 + expected[5:10] = 10 + tm.assert_series_equal(result, expected) + + # set slice with indices + d1, d2 = string_series.index[[5, 15]] + result.loc[d1:d2] = 6 + expected[5:16] = 6 # because it's inclusive + tm.assert_series_equal(result, expected) + + # set index value + string_series.loc[d1] = 4 + string_series.loc[d2] = 6 + assert string_series[d1] == 4 + assert string_series[d2] == 6 + + @pytest.mark.parametrize("dtype", ["object", "string"]) + def test_loc_assign_dict_to_row(self, dtype): + # GH41044 + df = DataFrame({"A": ["abc", "def"], "B": ["ghi", "jkl"]}, dtype=dtype) + df.loc[0, :] = {"A": "newA", "B": "newB"} + + expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype) + + tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_loc_setitem_dict_timedelta_multiple_set(self): + # GH 16309 + result = DataFrame(columns=["time", "value"]) + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + expected = DataFrame( + [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] + ) + tm.assert_frame_equal(result, expected) + + def test_loc_set_multiple_items_in_multiple_new_columns(self): + # GH 25594 + df = DataFrame(index=[1, 2], columns=["a"]) + df.loc[1, ["b", "c"]] = [6, 7] + + expected = DataFrame( + { + "a": Series([np.nan, np.nan], dtype="object"), + "b": [6, np.nan], + "c": [7, np.nan], + }, + index=[1, 2], + ) + + tm.assert_frame_equal(df, expected) + + def test_getitem_loc_str_periodindex(self): + # GH#33964 + index = pd.period_range(start="2000", periods=20, freq="B") + series = Series(range(20), index=index) + assert series.loc["2000-01-14"] == 9 + + def test_deprecation_warnings_raised_loc(self): + # GH#48673 + with tm.assert_produces_warning(DeprecationWarning): + values = np.arange(4).reshape(2, 2) + df = DataFrame(values, columns=["a", "b"]) + new = np.array([10, 11]).astype(np.int16) + df.loc[:, "a"] = new diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py new file mode 100644 index 00000000..7e54bbc3 --- /dev/null +++ b/pandas/tests/indexing/test_na_indexing.py @@ -0,0 +1,75 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "values, dtype", + [ + ([], "object"), + ([1, 2, 3], "int64"), + ([1.0, 2.0, 3.0], "float64"), + (["a", "b", "c"], "object"), + (["a", "b", "c"], "string"), + ([1, 2, 3], "datetime64[ns]"), + ([1, 2, 3], "datetime64[ns, CET]"), + ([1, 2, 3], "timedelta64[ns]"), + (["2000", "2001", "2002"], "Period[D]"), + ([1, 0, 3], "Sparse"), + ([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"), + ], +) +@pytest.mark.parametrize( + "mask", [[True, False, False], [True, True, True], [False, False, False]] +) +@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series]) +@pytest.mark.parametrize("frame", [True, False]) +def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): + # In case len(values) < 3 + index = ["a", "b", "c"][: len(values)] + mask = mask[: len(values)] + + obj = pd.Series(values, dtype=dtype, index=index) + if frame: + if len(values) == 0: + # Otherwise obj is an empty DataFrame with shape (0, 1) + obj = pd.DataFrame(dtype=dtype) + else: + obj = obj.to_frame() + + if indexer_class is pd.array: + mask = pd.array(mask, dtype="boolean") + elif indexer_class is pd.Series: + mask = pd.Series(mask, index=obj.index, dtype="boolean") + else: + mask = indexer_class(mask) + + expected = obj[mask] + + result = obj[mask] + tm.assert_equal(result, expected) + + if indexer_class is pd.Series: + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + result = obj.iloc[mask] + tm.assert_equal(result, expected) + else: + result = obj.iloc[mask] + tm.assert_equal(result, expected) + + result = obj.loc[mask] + tm.assert_equal(result, expected) + + +def test_na_treated_as_false(frame_or_series, indexer_sli): + # https://github.com/pandas-dev/pandas/issues/31503 + obj = frame_or_series([1, 2, 3]) + + mask = pd.array([True, False, None], dtype="boolean") + + result = indexer_sli(obj)[mask] + expected = indexer_sli(obj)[mask.fillna(False)] + + tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py new file mode 100644 index 00000000..f973bdf7 --- /dev/null +++ b/pandas/tests/indexing/test_partial.py @@ -0,0 +1,672 @@ +""" +test setting *parts* of objects both positionally and label based + +TODO: these should be split among the indexer tests +""" + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Period, + Series, + Timestamp, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestEmptyFrameSetitemExpansion: + def test_empty_frame_setitem_index_name_retained(self): + # GH#31368 empty frame has non-None index.name -> retained + df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") + ) + + tm.assert_frame_equal(df, expected) + + def test_empty_frame_setitem_index_name_inherited(self): + # GH#36527 empty frame has None index.name -> not retained + df = DataFrame() + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") + ) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_zerolen_series_columns_align(self): + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=range(4)) + expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=["B"]) + + exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_zerolen_list_length_must_match_columns(self): + # list-like must conform + df = DataFrame(columns=["A", "B"]) + + msg = "cannot set a row with mismatched columns" + with pytest.raises(ValueError, match=msg): + df.loc[0] = [1, 2, 3] + + df = DataFrame(columns=["A", "B"]) + df.loc[3] = [6, 7] # length matches len(df.columns) --> OK! + + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) + tm.assert_frame_equal(df, exp) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + msg = "cannot set a frame with no defined columns" + + with pytest.raises(ValueError, match=msg): + df.loc[1] = 1 + + with pytest.raises(ValueError, match=msg): + df.loc[1] = Series([1], index=["foo"]) + + msg = "cannot set a frame with no defined index and a scalar" + with pytest.raises(ValueError, match=msg): + df.loc[:, 1] = 1 + + def test_partial_set_empty_frame2(self): + # these work as they don't really change + # anything but the index + # GH#5632 + expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) + + df = DataFrame(index=Index([], dtype="object")) + df["foo"] = Series([], dtype="object") + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = Series(df.index) + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = df.index + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame3(self): + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + expected["foo"] = expected["foo"].astype("float64") + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = [] + + tm.assert_frame_equal(df, expected) + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = Series(np.arange(len(df)), dtype="float64") + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame4(self): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = range(len(df)) + + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + # range is int-dtype-like, so we get int64 dtype + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame5(self): + df = DataFrame() + tm.assert_index_equal(df.columns, Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=["foo"]) + df.loc[:, 1] = Series([1], index=["foo"]) + tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) + tm.assert_frame_equal(df, df2) + + def test_partial_set_empty_frame_no_index(self): + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) + + df = DataFrame(columns=["A", "B"]) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["A", "B"]) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH#5720, GH#5744 + # don't create rows when empty + expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["New"] = expected["New"].astype("float64") + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y["New"] = np.nan + tm.assert_frame_equal(y, expected) + + expected = DataFrame(columns=["a", "b", "c c", "d"]) + expected["d"] = expected["d"].astype("int64") + df = DataFrame(columns=["a", "b", "c c"]) + df["d"] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=["A", "B", "C"]) + expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["C"] = expected["C"].astype("float64") + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH#5756 + # setting with empty Series + df = DataFrame(Series(dtype=object)) + expected = DataFrame({0: Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + df = DataFrame(Series(name="foo", dtype=object)) + expected = DataFrame({"foo": Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH#5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df["a"] = 0 + expected = DataFrame(0, index=[0], columns=["a"]) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH#6171 + # consistency on empty frames + df = DataFrame(columns=["x", "y"]) + df["x"] = [1, 2] + expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=["x", "y"]) + df["x"] = ["1", "2"] + expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df.loc[0, "x"] = 1 + expected = DataFrame({"x": [1], "y": [np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + +class TestPartialSetting: + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): + s.iloc[3] = 5.0 + + msg = "index 3 is out of bounds for axis 0 with size 3" + with pytest.raises(IndexError, match=msg): + s.iat[3] = 5.0 + + def test_partial_setting_frame(self, using_array_manager): + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" + ) + + # iloc/iat raise + df = df_orig.copy() + + msg = "iloc cannot enlarge its target object" + with pytest.raises(IndexError, match=msg): + df.iloc[4, 2] = 5.0 + + msg = "index 2 is out of bounds for axis 0 with size 2" + if using_array_manager: + msg = "list index out of range" + with pytest.raises(IndexError, match=msg): + df.iat[4, 2] = 5.0 + + # row setting where it exists + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) + df = df_orig.copy() + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + df = df_orig.copy() + df["B"] = df["B"].astype(np.float64) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + def test_partial_setting2(self): + # GH 8473 + dates = date_range("1/1/2000", periods=8) + df_orig = DataFrame( + np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"] + ) + + expected = pd.concat( + [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True + ) + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = pd.concat([df, DataFrame(s).T.infer_objects()]) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc equiv to .reindex + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + with pytest.raises(KeyError, match=r"not in index"): + ser.loc[[3, 2, 3]] + + result = ser.reindex([3, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[3, 2, 3, "x"]] + + result = ser.reindex([3, 2, 3, "x"]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, "x", 1]] + + result = ser.reindex([2, 2, "x", 1]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing is in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " + r"in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, 3]] + + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[3, 4, 4]] + + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[5, 3, 3]] + + result = s.reindex([5, 3, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[5, 4, 4]] + + result = s.reindex([5, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[7, 2, 2]] + + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + with pytest.raises(KeyError, match="not in index"): + s.loc[[4, 5, 5]] + + result = s.reindex([4, 5, 5]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype="int64", name="idx") + ser = Series([0.1, 0.2], index=idx, name="s") + + # loc + with pytest.raises(KeyError, match=r"\[3\] not in index"): + ser.loc[[3, 2, 3]] + + with pytest.raises(KeyError, match=r"not in index"): + ser.loc[[3, 2, 3, "x"]] + + exp_idx = Index([2, 2, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + with pytest.raises(KeyError, match=r"\['x'\] not in index"): + ser.loc[[2, 2, "x", 1]] + + # raises as nothing is in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " + r"name='idx'\)\] are in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + with pytest.raises(KeyError, match="not in index"): + ser.loc[[2, 2, 3]] + + idx = Index([1, 2, 3], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] + + idx = Index([4, 5, 6, 7], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="not in index"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + @pytest.mark.parametrize("key", [100, 100.0]) + def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): + # GH#4940 inserting non-strings + orig = tm.makeTimeDataFrame() + df = orig.copy() + + df.loc[key, :] = df.iloc[0] + ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) + ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) + expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) + + tm.assert_frame_equal(df, expected) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + + # allow object conversion here + df = orig.copy() + df.loc["a", :] = df.iloc[0] + ser = Series(df.iloc[0], name="a") + exp = pd.concat([orig, DataFrame(ser).T.infer_objects()]) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) + assert df.index.dtype == "object" + + @pytest.mark.parametrize( + "idx,labels,expected_idx", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Period("2000-01-04", freq="D"), + Period("2000-01-08", freq="D"), + Period("2000-01-12", freq="D"), + ], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Timestamp("2000-01-04"), + Timestamp("2000-01-08"), + Timestamp("2000-01-12"), + ], + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["4D", "8D", "12D"], + [pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")], + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes( + self, idx, labels, expected_idx, frame_or_series + ): + # GH 11278 + obj = frame_or_series(range(20), index=idx) + + expected_value = [3, 7, 11] + expected = frame_or_series(expected_value, expected_idx) + + tm.assert_equal(expected, obj.loc[labels]) + if frame_or_series is Series: + tm.assert_series_equal(expected, obj[labels]) + + @pytest.mark.parametrize( + "idx,labels", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_missing_value( + self, idx, labels + ): + # GH 11278 + ser = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + msg = r"not in index" + + with pytest.raises(KeyError, match=msg): + ser.loc[labels] + with pytest.raises(KeyError, match=msg): + ser[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels] + + @pytest.mark.parametrize( + "idx,labels,msg", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["2000-01-04", "2000-01-08"], + ( + r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " + r"dtype='object'\)\] are in the \[index\]" + ), + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( + self, idx, labels, msg + ): + # GH 11278 + ser = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + + with pytest.raises(KeyError, match=msg): + ser.loc[labels] + with pytest.raises(KeyError, match=msg): + ser[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels] + + +class TestStringSlicing: + def test_slice_irregular_datetime_index_with_nan(self): + # GH36953 + index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None]) + df = DataFrame(range(len(index)), index=index) + expected = DataFrame(range(len(index[:3])), index=index[:3]) + result = df["2012-01-01":"2012-01-04"] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py new file mode 100644 index 00000000..552db9c5 --- /dev/null +++ b/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,292 @@ +""" test scalar indexing, including at and iat """ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base): + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["ints", "uints"]) + def test_iat_set_ints(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + indices = self.generate_indices(f, True) + for i in indices: + f.iat[i] = 1 + expected = self.get_value("iat", f, i, True) + tm.assert_almost_equal(expected, 1) + + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["labels", "ts", "floats"]) + def test_iat_set_other(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + msg = "iAt based indexing can only have integer indexers" + with pytest.raises(ValueError, match=msg): + indices = self.generate_indices(f, False) + for i in indices: + f.iat[i] = 1 + expected = self.get_value("iat", f, i, False) + tm.assert_almost_equal(expected, 1) + + @pytest.mark.parametrize("kind", ["series", "frame"]) + @pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"]) + def test_at_set_ints_other(self, kind, col): + f = getattr(self, kind)[col] + if f is not None: + indices = self.generate_indices(f, False) + for i in indices: + f.at[i] = 1 + expected = self.get_value("at", f, i, False) + tm.assert_almost_equal(expected, 1) + + +class TestAtAndiAT: + # at and iat tests that don't need Base class + + def test_float_index_at_iat(self): + ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) + for el, item in ser.items(): + assert ser.at[el] == item + for i in range(len(ser)): + assert ser.iat[i] == i + 1 + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range("1/1/2000", periods=8) + df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) + s = df["A"] + + result = s.at[dates[5]] + xp = s.values[5] + assert result == xp + + @pytest.mark.parametrize( + "ser, expected", + [ + [ + Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]"), + Timestamp("2014-02-02"), + ], + [ + Series(["1 days", "2 days"], dtype="timedelta64[ns]"), + Timedelta("2 days"), + ], + ], + ) + def test_iloc_iat_coercion_datelike(self, indexer_ial, ser, expected): + # GH 7729 + # make sure we are boxing the returns + result = indexer_ial(ser)[1] + assert result == expected + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64") + result = s.iloc[2] + assert result == 2 + result = s.iat[2] + assert result == 2 + + msg = "index 10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[10] + msg = "index -10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[-10] + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype="int64") + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + assert result == 2 + + def test_frame_at_with_duplicate_axes(self): + # GH#33041 + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + result = df.at[0, "A"] + expected = df.iloc[0] + + tm.assert_series_equal(result, expected) + + result = df.T.at["A", 0] + tm.assert_series_equal(result, expected) + + # setter + df.at[1, "A"] = 2 + expected = Series([2.0, 2.0], index=["A", "A"], name=1) + tm.assert_series_equal(df.iloc[1], expected) + + def test_at_getitem_dt64tz_values(self): + # gh-15822 + df = DataFrame( + { + "name": ["John", "Anderson"], + "date": [ + Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3), + ], + } + ) + df["date"] = df["date"].dt.tz_localize("Asia/Shanghai") + + expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai") + + result = df.loc[0, "date"] + assert result == expected + + result = df.at[0, "date"] + assert result == expected + + def test_mixed_index_at_iat_loc_iloc_series(self): + # GH 19860 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + for el, item in s.items(): + assert s.at[el] == s.loc[el] == item + for i in range(len(s)): + assert s.iat[i] == s.iloc[i] == i + 1 + + with pytest.raises(KeyError, match="^4$"): + s.at[4] + with pytest.raises(KeyError, match="^4$"): + s.loc[4] + + def test_mixed_index_at_iat_loc_iloc_dataframe(self): + # GH 19860 + df = DataFrame( + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2] + ) + for rowIdx, row in df.iterrows(): + for el, item in row.items(): + assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item + + for row in range(2): + for i in range(5): + assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i + + with pytest.raises(KeyError, match="^3$"): + df.at[0, 3] + with pytest.raises(KeyError, match="^3$"): + df.loc[0, 3] + + def test_iat_setter_incompatible_assignment(self): + # GH 23236 + result = DataFrame({"a": [0, 1], "b": [4, 5]}) + result.iat[0, 0] = None + expected = DataFrame({"a": [None, 1], "b": [4, 5]}) + tm.assert_frame_equal(result, expected) + + +def test_iat_dont_wrap_object_datetimelike(): + # GH#32809 .iat calls go through DataFrame._get_value, should not + # call maybe_box_datetimelike + dti = date_range("2016-01-01", periods=3) + tdi = dti - dti + ser = Series(dti.to_pydatetime(), dtype=object) + ser2 = Series(tdi.to_pytimedelta(), dtype=object) + df = DataFrame({"A": ser, "B": ser2}) + assert (df.dtypes == object).all() + + for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]: + assert result is ser[0] + assert isinstance(result, datetime) + assert not isinstance(result, Timestamp) + + for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]: + assert result is ser2[1] + assert isinstance(result, timedelta) + assert not isinstance(result, Timedelta) + + +def test_at_with_tuple_index_get(): + # GH 26989 + # DataFrame.at getter works with Index of tuples + df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) + assert df.index.nlevels == 1 + assert df.at[(1, 2), "a"] == 1 + + # Series.at getter works with Index of tuples + series = df["a"] + assert series.index.nlevels == 1 + assert series.at[(1, 2)] == 1 + + +def test_at_with_tuple_index_set(): + # GH 26989 + # DataFrame.at setter works with Index of tuples + df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) + assert df.index.nlevels == 1 + df.at[(1, 2), "a"] = 2 + assert df.at[(1, 2), "a"] == 2 + + # Series.at setter works with Index of tuples + series = df["a"] + assert series.index.nlevels == 1 + series.at[1, 2] = 3 + assert series.at[1, 2] == 3 + + +class TestMultiIndexScalar: + def test_multiindex_at_get(self): + # GH 26989 + # DataFrame.at and DataFrame.loc getter works with MultiIndex + df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) + assert df.index.nlevels == 2 + assert df.at[(1, 3), "a"] == 1 + assert df.loc[(1, 3), "a"] == 1 + + # Series.at and Series.loc getter works with MultiIndex + series = df["a"] + assert series.index.nlevels == 2 + assert series.at[1, 3] == 1 + assert series.loc[1, 3] == 1 + + def test_multiindex_at_set(self): + # GH 26989 + # DataFrame.at and DataFrame.loc setter works with MultiIndex + df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) + assert df.index.nlevels == 2 + df.at[(1, 3), "a"] = 3 + assert df.at[(1, 3), "a"] == 3 + df.loc[(1, 3), "a"] = 4 + assert df.loc[(1, 3), "a"] == 4 + + # Series.at and Series.loc setter works with MultiIndex + series = df["a"] + assert series.index.nlevels == 2 + series.at[1, 3] = 5 + assert series.at[1, 3] == 5 + series.loc[1, 3] = 6 + assert series.loc[1, 3] == 6 + + def test_multiindex_at_get_one_level(self): + # GH#38053 + s2 = Series((0, 1), index=[[False, True]]) + result = s2.at[False] + assert result == 0 diff --git a/pandas/tests/interchange/__init__.py b/pandas/tests/interchange/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/interchange/conftest.py b/pandas/tests/interchange/conftest.py new file mode 100644 index 00000000..f552ba44 --- /dev/null +++ b/pandas/tests/interchange/conftest.py @@ -0,0 +1,12 @@ +import pytest + +import pandas as pd + + +@pytest.fixture +def df_from_dict(): + def maker(dct, is_categorical=False): + df = pd.DataFrame(dct) + return df.astype("category") if is_categorical else df + + return maker diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py new file mode 100644 index 00000000..078a1751 --- /dev/null +++ b/pandas/tests/interchange/test_impl.py @@ -0,0 +1,206 @@ +from datetime import datetime +import random + +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.core.interchange.column import PandasColumn +from pandas.core.interchange.dataframe_protocol import ( + ColumnNullType, + DtypeKind, +) +from pandas.core.interchange.from_dataframe import from_dataframe + +test_data_categorical = { + "ordered": pd.Categorical(list("testdata") * 30, ordered=True), + "unordered": pd.Categorical(list("testdata") * 30, ordered=False), +} + +NCOLS, NROWS = 100, 200 + + +def _make_data(make_one): + return { + f"col{int((i - NCOLS / 2) % NCOLS + 1)}": [make_one() for _ in range(NROWS)] + for i in range(NCOLS) + } + + +int_data = _make_data(lambda: random.randint(-100, 100)) +uint_data = _make_data(lambda: random.randint(1, 100)) +bool_data = _make_data(lambda: random.choice([True, False])) +float_data = _make_data(lambda: random.random()) +datetime_data = _make_data( + lambda: datetime( + year=random.randint(1900, 2100), + month=random.randint(1, 12), + day=random.randint(1, 20), + ) +) + +string_data = { + "separator data": [ + "abC|DeF,Hik", + "234,3245.67", + "gSaf,qWer|Gre", + "asd3,4sad|", + np.NaN, + ] +} + + +@pytest.mark.parametrize("data", [("ordered", True), ("unordered", False)]) +def test_categorical_dtype(data): + df = pd.DataFrame({"A": (test_data_categorical[data[0]])}) + + col = df.__dataframe__().get_column_by_name("A") + assert col.dtype[0] == DtypeKind.CATEGORICAL + assert col.null_count == 0 + assert col.describe_null == (ColumnNullType.USE_SENTINEL, -1) + assert col.num_chunks() == 1 + desc_cat = col.describe_categorical + assert desc_cat["is_ordered"] == data[1] + assert desc_cat["is_dictionary"] is True + assert isinstance(desc_cat["categories"], PandasColumn) + tm.assert_series_equal( + desc_cat["categories"]._col, pd.Series(["a", "d", "e", "s", "t"]) + ) + + tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + + +@pytest.mark.parametrize( + "data", [int_data, uint_data, float_data, bool_data, datetime_data] +) +def test_dataframe(data): + df = pd.DataFrame(data) + + df2 = df.__dataframe__() + + assert df2.num_columns() == NCOLS + assert df2.num_rows() == NROWS + + assert list(df2.column_names()) == list(data.keys()) + + indices = (0, 2) + names = tuple(list(data.keys())[idx] for idx in indices) + + result = from_dataframe(df2.select_columns(indices)) + expected = from_dataframe(df2.select_columns_by_name(names)) + tm.assert_frame_equal(result, expected) + + assert isinstance(result.attrs["_INTERCHANGE_PROTOCOL_BUFFERS"], list) + assert isinstance(expected.attrs["_INTERCHANGE_PROTOCOL_BUFFERS"], list) + + +def test_missing_from_masked(): + df = pd.DataFrame( + { + "x": np.array([1, 2, 3, 4, 0]), + "y": np.array([1.5, 2.5, 3.5, 4.5, 0]), + "z": np.array([True, False, True, True, True]), + } + ) + + df2 = df.__dataframe__() + + rng = np.random.RandomState(42) + dict_null = {col: rng.randint(low=0, high=len(df)) for col in df.columns} + for col, num_nulls in dict_null.items(): + null_idx = df.index[ + rng.choice(np.arange(len(df)), size=num_nulls, replace=False) + ] + df.loc[null_idx, col] = None + + df2 = df.__dataframe__() + + assert df2.get_column_by_name("x").null_count == dict_null["x"] + assert df2.get_column_by_name("y").null_count == dict_null["y"] + assert df2.get_column_by_name("z").null_count == dict_null["z"] + + +@pytest.mark.parametrize( + "data", + [ + {"x": [1.5, 2.5, 3.5], "y": [9.2, 10.5, 11.8]}, + {"x": [1, 2, 0], "y": [9.2, 10.5, 11.8]}, + { + "x": np.array([True, True, False]), + "y": np.array([1, 2, 0]), + "z": np.array([9.2, 10.5, 11.8]), + }, + ], +) +def test_mixed_data(data): + df = pd.DataFrame(data) + df2 = df.__dataframe__() + + for col_name in df.columns: + assert df2.get_column_by_name(col_name).null_count == 0 + + +def test_mixed_missing(): + df = pd.DataFrame( + { + "x": np.array([True, None, False, None, True]), + "y": np.array([None, 2, None, 1, 2]), + "z": np.array([9.2, 10.5, None, 11.8, None]), + } + ) + + df2 = df.__dataframe__() + + for col_name in df.columns: + assert df2.get_column_by_name(col_name).null_count == 2 + + +def test_string(): + test_str_data = string_data["separator data"] + [""] + df = pd.DataFrame({"A": test_str_data}) + col = df.__dataframe__().get_column_by_name("A") + + assert col.size() == 6 + assert col.null_count == 1 + assert col.dtype[0] == DtypeKind.STRING + assert col.describe_null == (ColumnNullType.USE_BYTEMASK, 0) + + df_sliced = df[1:] + col = df_sliced.__dataframe__().get_column_by_name("A") + assert col.size() == 5 + assert col.null_count == 1 + assert col.dtype[0] == DtypeKind.STRING + assert col.describe_null == (ColumnNullType.USE_BYTEMASK, 0) + + +def test_nonstring_object(): + df = pd.DataFrame({"A": ["a", 10, 1.0, ()]}) + col = df.__dataframe__().get_column_by_name("A") + with pytest.raises(NotImplementedError, match="not supported yet"): + col.dtype + + +def test_datetime(): + df = pd.DataFrame({"A": [pd.Timestamp("2022-01-01"), pd.NaT]}) + col = df.__dataframe__().get_column_by_name("A") + + assert col.size() == 2 + assert col.null_count == 1 + assert col.dtype[0] == DtypeKind.DATETIME + assert col.describe_null == (ColumnNullType.USE_SENTINEL, iNaT) + + tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + + +@td.skip_if_np_lt("1.23") +def test_categorical_to_numpy_dlpack(): + # https://github.com/pandas-dev/pandas/issues/48393 + df = pd.DataFrame({"A": pd.Categorical(["a", "b", "a"])}) + col = df.__dataframe__().get_column_by_name("A") + result = np.from_dlpack(col.get_buffers()["data"][0]) + expected = np.array([0, 1, 0], dtype="int8") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/interchange/test_spec_conformance.py b/pandas/tests/interchange/test_spec_conformance.py new file mode 100644 index 00000000..965938b1 --- /dev/null +++ b/pandas/tests/interchange/test_spec_conformance.py @@ -0,0 +1,164 @@ +""" +A verbatim copy (vendored) of the spec tests. +Taken from https://github.com/data-apis/dataframe-api +""" +import ctypes +import math + +import pytest + + +@pytest.mark.parametrize( + "test_data", + [ + {"a": ["foo", "bar"], "b": ["baz", "qux"]}, + {"a": [1.5, 2.5, 3.5], "b": [9.2, 10.5, 11.8]}, + {"A": [1, 2, 3, 4], "B": [1, 2, 3, 4]}, + ], + ids=["str_data", "float_data", "int_data"], +) +def test_only_one_dtype(test_data, df_from_dict): + columns = list(test_data.keys()) + df = df_from_dict(test_data) + dfX = df.__dataframe__() + + column_size = len(test_data[columns[0]]) + for column in columns: + null_count = dfX.get_column_by_name(column).null_count + assert null_count == 0 + assert isinstance(null_count, int) + assert dfX.get_column_by_name(column).size() == column_size + assert dfX.get_column_by_name(column).offset == 0 + + +def test_mixed_dtypes(df_from_dict): + df = df_from_dict( + { + "a": [1, 2, 3], # dtype kind INT = 0 + "b": [3, 4, 5], # dtype kind INT = 0 + "c": [1.5, 2.5, 3.5], # dtype kind FLOAT = 2 + "d": [9, 10, 11], # dtype kind INT = 0 + "e": [True, False, True], # dtype kind BOOLEAN = 20 + "f": ["a", "", "c"], # dtype kind STRING = 21 + } + ) + dfX = df.__dataframe__() + # for meanings of dtype[0] see the spec; we cannot import the spec here as this + # file is expected to be vendored *anywhere*; + # values for dtype[0] are explained above + columns = {"a": 0, "b": 0, "c": 2, "d": 0, "e": 20, "f": 21} + + for column, kind in columns.items(): + colX = dfX.get_column_by_name(column) + assert colX.null_count == 0 + assert isinstance(colX.null_count, int) + assert colX.size() == 3 + assert colX.offset == 0 + + assert colX.dtype[0] == kind + + assert dfX.get_column_by_name("c").dtype[1] == 64 + + +def test_na_float(df_from_dict): + df = df_from_dict({"a": [1.0, math.nan, 2.0]}) + dfX = df.__dataframe__() + colX = dfX.get_column_by_name("a") + assert colX.null_count == 1 + assert isinstance(colX.null_count, int) + + +def test_noncategorical(df_from_dict): + df = df_from_dict({"a": [1, 2, 3]}) + dfX = df.__dataframe__() + colX = dfX.get_column_by_name("a") + with pytest.raises(TypeError, match=".*categorical.*"): + colX.describe_categorical + + +def test_categorical(df_from_dict): + df = df_from_dict( + {"weekday": ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]}, + is_categorical=True, + ) + + colX = df.__dataframe__().get_column_by_name("weekday") + categorical = colX.describe_categorical + assert isinstance(categorical["is_ordered"], bool) + assert isinstance(categorical["is_dictionary"], bool) + + +def test_dataframe(df_from_dict): + df = df_from_dict( + {"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]} + ) + dfX = df.__dataframe__() + + assert dfX.num_columns() == 3 + assert dfX.num_rows() == 3 + assert dfX.num_chunks() == 1 + assert list(dfX.column_names()) == ["x", "y", "z"] + assert list(dfX.select_columns((0, 2)).column_names()) == list( + dfX.select_columns_by_name(("x", "z")).column_names() + ) + + +@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)]) +def test_df_get_chunks(size, n_chunks, df_from_dict): + df = df_from_dict({"x": list(range(size))}) + dfX = df.__dataframe__() + chunks = list(dfX.get_chunks(n_chunks)) + assert len(chunks) == n_chunks + assert sum(chunk.num_rows() for chunk in chunks) == size + + +@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)]) +def test_column_get_chunks(size, n_chunks, df_from_dict): + df = df_from_dict({"x": list(range(size))}) + dfX = df.__dataframe__() + chunks = list(dfX.get_column(0).get_chunks(n_chunks)) + assert len(chunks) == n_chunks + assert sum(chunk.size() for chunk in chunks) == size + + +def test_get_columns(df_from_dict): + df = df_from_dict({"a": [0, 1], "b": [2.5, 3.5]}) + dfX = df.__dataframe__() + for colX in dfX.get_columns(): + assert colX.size() == 2 + assert colX.num_chunks() == 1 + # for meanings of dtype[0] see the spec; we cannot import the spec here as this + # file is expected to be vendored *anywhere* + assert dfX.get_column(0).dtype[0] == 0 # INT + assert dfX.get_column(1).dtype[0] == 2 # FLOAT + + +def test_buffer(df_from_dict): + arr = [0, 1, -1] + df = df_from_dict({"a": arr}) + dfX = df.__dataframe__() + colX = dfX.get_column(0) + bufX = colX.get_buffers() + + dataBuf, dataDtype = bufX["data"] + + assert dataBuf.bufsize > 0 + assert dataBuf.ptr != 0 + device, _ = dataBuf.__dlpack_device__() + + # for meanings of dtype[0] see the spec; we cannot import the spec here as this + # file is expected to be vendored *anywhere* + assert dataDtype[0] == 0 # INT + + if device == 1: # CPU-only as we're going to directly read memory here + bitwidth = dataDtype[1] + ctype = { + 8: ctypes.c_int8, + 16: ctypes.c_int16, + 32: ctypes.c_int32, + 64: ctypes.c_int64, + }[bitwidth] + + for idx, truth in enumerate(arr): + val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value + assert val == truth, f"Buffer at index {idx} mismatch" diff --git a/pandas/tests/interchange/test_utils.py b/pandas/tests/interchange/test_utils.py new file mode 100644 index 00000000..4fd42abb --- /dev/null +++ b/pandas/tests/interchange/test_utils.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.interchange.utils import dtype_to_arrow_c_fmt + +# TODO: use ArrowSchema to get reference C-string. +# At the time, there is no way to access ArrowSchema holding a type format string +# from python. The only way to access it is to export the structure to a C-pointer, +# see DataType._export_to_c() method defined in +# https://github.com/apache/arrow/blob/master/python/pyarrow/types.pxi + + +@pytest.mark.parametrize( + "pandas_dtype, c_string", + [ + (np.dtype("bool"), "b"), + (np.dtype("int8"), "c"), + (np.dtype("uint8"), "C"), + (np.dtype("int16"), "s"), + (np.dtype("uint16"), "S"), + (np.dtype("int32"), "i"), + (np.dtype("uint32"), "I"), + (np.dtype("int64"), "l"), + (np.dtype("uint64"), "L"), + (np.dtype("float16"), "e"), + (np.dtype("float32"), "f"), + (np.dtype("float64"), "g"), + (pd.Series(["a"]).dtype, "u"), + ( + pd.Series([0]).astype("datetime64[ns]").dtype, + "tsn:", + ), + (pd.CategoricalDtype(["a"]), "l"), + (np.dtype("O"), "u"), + ], +) +def test_dtype_to_arrow_c_fmt(pandas_dtype, c_string): # PR01 + """Test ``dtype_to_arrow_c_fmt`` utility function.""" + assert dtype_to_arrow_c_fmt(pandas_dtype) == c_string diff --git a/pandas/tests/internals/__init__.py b/pandas/tests/internals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py new file mode 100644 index 00000000..c759cc16 --- /dev/null +++ b/pandas/tests/internals/test_api.py @@ -0,0 +1,55 @@ +""" +Tests for the pseudo-public API implemented in internals/api.py and exposed +in core.internals +""" + +import pandas as pd +from pandas.core import internals +from pandas.core.internals import api + + +def test_internals_api(): + assert internals.make_block is api.make_block + + +def test_namespace(): + # SUBJECT TO CHANGE + + modules = [ + "blocks", + "concat", + "managers", + "construction", + "array_manager", + "base", + "api", + "ops", + ] + expected = [ + "Block", + "NumericBlock", + "DatetimeTZBlock", + "ExtensionBlock", + "ObjectBlock", + "make_block", + "DataManager", + "ArrayManager", + "BlockManager", + "SingleDataManager", + "SingleBlockManager", + "SingleArrayManager", + "concatenate_managers", + "create_block_manager_from_blocks", + ] + + result = [x for x in dir(internals) if not x.startswith("__")] + assert set(result) == set(expected + modules) + + +def test_make_block_2d_with_dti(): + # GH#41168 + dti = pd.date_range("2012", periods=3, tz="UTC") + blk = api.make_block(dti, placement=[0]) + + assert blk.shape == (1, 3) + assert blk.values.shape == (1, 3) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py new file mode 100644 index 00000000..b30b27f5 --- /dev/null +++ b/pandas/tests/internals/test_internals.py @@ -0,0 +1,1445 @@ +from datetime import ( + date, + datetime, +) +import itertools +import re + +import numpy as np +import pytest + +from pandas._libs.internals import BlockPlacement +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + Series, + Timedelta, + Timestamp, + period_range, +) +import pandas._testing as tm +import pandas.core.algorithms as algos +from pandas.core.arrays import ( + DatetimeArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.internals import ( + BlockManager, + SingleBlockManager, + make_block, +) +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, +) + +# this file contains BlockManager specific tests +# TODO(ArrayManager) factor out interleave_dtype tests +pytestmark = td.skip_array_manager_invalid_test + + +@pytest.fixture(params=[new_block, make_block]) +def block_maker(request): + """ + Fixture to test both the internal new_block and pseudo-public make_block. + """ + return request.param + + +@pytest.fixture +def mgr(): + return create_mgr( + "a: f8; b: object; c: f8; d: object; e: f8;" + "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;" + "k: M8[ns, US/Eastern]; l: M8[ns, CET];" + ) + + +def assert_block_equal(left, right): + tm.assert_numpy_array_equal(left.values, right.values) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) + + +def get_numeric_mat(shape): + arr = np.arange(shape[0]) + return np.lib.stride_tricks.as_strided( + x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1) + ).copy() + + +N = 10 + + +def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block): + """ + Supported typestr: + + * float, f8, f4, f2 + * int, i8, i4, i2, i1 + * uint, u8, u4, u2, u1 + * complex, c16, c8 + * bool + * object, string, O + * datetime, dt, M8[ns], M8[ns, tz] + * timedelta, td, m8[ns] + * sparse (SparseArray with fill_value=0.0) + * sparse_na (SparseArray with fill_value=np.nan) + * category, category2 + + """ + placement = BlockPlacement(placement) + num_items = len(placement) + + if item_shape is None: + item_shape = (N,) + + shape = (num_items,) + item_shape + + mat = get_numeric_mat(shape) + + if typestr in ( + "float", + "f8", + "f4", + "f2", + "int", + "i8", + "i4", + "i2", + "i1", + "uint", + "u8", + "u4", + "u2", + "u1", + ): + values = mat.astype(typestr) + num_offset + elif typestr in ("complex", "c16", "c8"): + values = 1.0j * (mat.astype(typestr) + num_offset) + elif typestr in ("object", "string", "O"): + values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape) + elif typestr in ("b", "bool"): + values = np.ones(shape, dtype=np.bool_) + elif typestr in ("datetime", "dt", "M8[ns]"): + values = (mat * 1e9).astype("M8[ns]") + elif typestr.startswith("M8[ns"): + # datetime with tz + m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) + assert m is not None, f"incompatible typestr -> {typestr}" + tz = m.groups()[0] + assert num_items == 1, "must have only 1 num items for a tz-aware" + values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data + values = ensure_block_shape(values, ndim=len(shape)) + elif typestr in ("timedelta", "td", "m8[ns]"): + values = (mat * 1).astype("m8[ns]") + elif typestr in ("category",): + values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + elif typestr in ("category2",): + values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"]) + elif typestr in ("sparse", "sparse_na"): + if shape[-1] != 10: + # We also are implicitly assuming this in the category cases above + raise NotImplementedError + + assert all(s == 1 for s in shape[:-1]) + if typestr.endswith("_na"): + fill_value = np.nan + else: + fill_value = 0.0 + values = SparseArray( + [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], + fill_value=fill_value, + ) + arr = values.sp_values.view() + arr += num_offset - 1 + else: + raise ValueError(f'Unsupported typestr: "{typestr}"') + + return maker(values, placement=placement, ndim=len(shape)) + + +def create_single_mgr(typestr, num_rows=None): + if num_rows is None: + num_rows = N + + return SingleBlockManager( + create_block(typestr, placement=slice(0, num_rows), item_shape=()), + Index(np.arange(num_rows)), + ) + + +def create_mgr(descr, item_shape=None): + """ + Construct BlockManager from string description. + + String description syntax looks similar to np.matrix initializer. It looks + like this:: + + a,b,c: f8; d,e,f: i8 + + Rules are rather simple: + + * see list of supported datatypes in `create_block` method + * components are semicolon-separated + * each component is `NAME,NAME,NAME: DTYPE_ID` + * whitespace around colons & semicolons are removed + * components with same DTYPE_ID are combined into single block + * to force multiple blocks with same dtype, use '-SUFFIX':: + + 'a:f8-1; b:f8-2; c:f8-foobar' + + """ + if item_shape is None: + item_shape = (N,) + + offset = 0 + mgr_items = [] + block_placements = {} + for d in descr.split(";"): + d = d.strip() + if not len(d): + continue + names, blockstr = d.partition(":")[::2] + blockstr = blockstr.strip() + names = names.strip().split(",") + + mgr_items.extend(names) + placement = list(np.arange(len(names)) + offset) + try: + block_placements[blockstr].extend(placement) + except KeyError: + block_placements[blockstr] = placement + offset += len(names) + + mgr_items = Index(mgr_items) + + blocks = [] + num_offset = 0 + for blockstr, placement in block_placements.items(): + typestr = blockstr.split("-")[0] + blocks.append( + create_block( + typestr, placement, item_shape=item_shape, num_offset=num_offset + ) + ) + num_offset += len(placement) + + sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0]) + return BlockManager( + tuple(sblocks), + [mgr_items] + [Index(np.arange(n)) for n in item_shape], + ) + + +@pytest.fixture +def fblock(): + return create_block("float", [0, 2, 4]) + + +class TestBlock: + def test_constructor(self): + int32block = create_block("i4", [0]) + assert int32block.dtype == np.int32 + + @pytest.mark.parametrize( + "typ, data", + [ + ["float", [0, 2, 4]], + ["complex", [7]], + ["object", [1, 3]], + ["bool", [5]], + ], + ) + def test_pickle(self, typ, data): + blk = create_block(typ, data) + assert_block_equal(tm.round_trip_pickle(blk), blk) + + def test_mgr_locs(self, fblock): + assert isinstance(fblock.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp) + ) + + def test_attrs(self, fblock): + assert fblock.shape == fblock.values.shape + assert fblock.dtype == fblock.values.dtype + assert len(fblock) == len(fblock.values) + + def test_copy(self, fblock): + cop = fblock.copy() + assert cop is not fblock + assert_block_equal(fblock, cop) + + def test_delete(self, fblock): + newb = fblock.copy() + locs = newb.mgr_locs + nb = newb.delete(0) + assert newb.mgr_locs is locs + + assert nb is not newb + + tm.assert_numpy_array_equal( + nb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp) + ) + assert not (newb.values[0] == 1).all() + assert (nb.values[0] == 1).all() + + newb = fblock.copy() + locs = newb.mgr_locs + nb = newb.delete(1) + assert newb.mgr_locs is locs + + tm.assert_numpy_array_equal( + nb.mgr_locs.as_array, np.array([0, 4], dtype=np.intp) + ) + assert not (newb.values[1] == 2).all() + assert (nb.values[1] == 2).all() + + newb = fblock.copy() + locs = newb.mgr_locs + nb = newb.delete(2) + tm.assert_numpy_array_equal( + nb.mgr_locs.as_array, np.array([0, 2], dtype=np.intp) + ) + assert (nb.values[1] == 1).all() + + newb = fblock.copy() + + with pytest.raises(IndexError, match=None): + newb.delete(3) + + def test_delete_datetimelike(self): + # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray + arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]") + df = DataFrame(arr) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, TimedeltaArray) + + nb = blk.delete(1) + assert isinstance(nb.values, TimedeltaArray) + + df = DataFrame(arr.view("M8[ns]")) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, DatetimeArray) + + nb = blk.delete([1, 3]) + assert isinstance(nb.values, DatetimeArray) + + def test_split(self): + # GH#37799 + values = np.random.randn(3, 4) + blk = new_block(values, placement=[3, 1, 6], ndim=2) + result = blk._split() + + # check that we get views, not copies + values[:] = -9999 + assert (blk.values == -9999).all() + + assert len(result) == 3 + expected = [ + new_block(values[[0]], placement=[3], ndim=2), + new_block(values[[1]], placement=[1], ndim=2), + new_block(values[[2]], placement=[6], ndim=2), + ] + for res, exp in zip(result, expected): + assert_block_equal(res, exp) + + def test_is_categorical_deprecated(self, fblock): + # GH#40571 + blk = fblock + with tm.assert_produces_warning(DeprecationWarning): + blk.is_categorical + + +class TestBlockManager: + def test_attrs(self): + mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2") + assert mgr.nblocks == 2 + assert len(mgr) == 6 + + def test_duplicate_ref_loc_failure(self): + tmp_mgr = create_mgr("a:bool; a: f8") + + axes, blocks = tmp_mgr.axes, tmp_mgr.blocks + + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([0])) + + # test trying to create block manager with overlapping ref locs + + msg = "Gaps in blk ref_locs" + + with pytest.raises(AssertionError, match=msg): + mgr = BlockManager(blocks, axes) + mgr._rebuild_blknos_and_blklocs() + + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([1])) + mgr = BlockManager(blocks, axes) + mgr.iget(1) + + def test_pickle(self, mgr): + + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + # GH2431 + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") + + # reset to False on load + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated + + @pytest.mark.parametrize("mgr_string", ["a,a,a:f8", "a: f8; a: i8"]) + def test_non_unique_pickle(self, mgr_string): + mgr = create_mgr(mgr_string) + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + def test_categorical_block_pickle(self): + mgr = create_mgr("a: category") + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + smgr = create_single_mgr("category") + smgr2 = tm.round_trip_pickle(smgr) + tm.assert_series_equal(Series(smgr), Series(smgr2)) + + def test_iget(self): + cols = Index(list("abc")) + values = np.random.rand(3, 3) + block = new_block( + values=values.copy(), + placement=np.arange(3, dtype=np.intp), + ndim=values.ndim, + ) + mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))]) + + tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0]) + tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1]) + tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2]) + + def test_set(self): + mgr = create_mgr("a,b,c: int", item_shape=(3,)) + + mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3)) + mgr.iset(1, np.array(["bar"] * 3)) + tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3)) + tm.assert_numpy_array_equal( + mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_) + ) + tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3)) + tm.assert_numpy_array_equal( + mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_) + ) + + def test_set_change_dtype(self, mgr): + mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool)) + + mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N)) + idx = mgr.items.get_loc("baz") + assert mgr.iget(idx).dtype == np.object_ + + mgr2 = mgr.consolidate() + mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N)) + idx = mgr2.items.get_loc("baz") + assert mgr2.iget(idx).dtype == np.object_ + + mgr2.insert(len(mgr2.items), "quux", np.random.randn(N).astype(int)) + idx = mgr2.items.get_loc("quux") + assert mgr2.iget(idx).dtype == np.int_ + + mgr2.iset(mgr2.items.get_loc("quux"), np.random.randn(N)) + assert mgr2.iget(idx).dtype == np.float_ + + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + # view assertion + tm.assert_equal(cp_blk.values, blk.values) + if isinstance(blk.values, np.ndarray): + assert cp_blk.values.base is blk.values.base + else: + # DatetimeTZBlock has DatetimeIndex values + assert cp_blk.values._data.base is blk.values._data.base + + # copy(deep=True) consolidates, so the block-wise assertions will + # fail is mgr is not consolidated + mgr._consolidate_inplace() + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + bvals = blk.values + cpvals = cp_blk.values + + tm.assert_equal(cpvals, bvals) + + if isinstance(cpvals, np.ndarray): + lbase = cpvals.base + rbase = bvals.base + else: + lbase = cpvals._ndarray.base + rbase = bvals._ndarray.base + + # copy assertion we either have a None for a base or in case of + # some blocks it is an array (e.g. datetimetz), but was copied + if isinstance(cpvals, DatetimeArray): + assert (lbase is None and rbase is None) or (lbase is not rbase) + elif not isinstance(cpvals, np.ndarray): + assert lbase is not rbase + else: + assert lbase is None and rbase is None + + def test_sparse(self): + mgr = create_mgr("a: sparse-1; b: sparse-2") + assert mgr.as_array().dtype == np.float64 + + def test_sparse_mixed(self): + mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8") + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) + + @pytest.mark.parametrize( + "mgr_string, dtype", + [("c: f4; d: f2", np.float32), ("c: f4; d: f2; e: f8", np.float64)], + ) + def test_as_array_float(self, mgr_string, dtype): + mgr = create_mgr(mgr_string) + assert mgr.as_array().dtype == dtype + + @pytest.mark.parametrize( + "mgr_string, dtype", + [ + ("a: bool-1; b: bool-2", np.bool_), + ("a: i8-1; b: i8-2; c: i4; d: i2; e: u1", np.int64), + ("c: i4; d: i2; e: u1", np.int32), + ], + ) + def test_as_array_int_bool(self, mgr_string, dtype): + mgr = create_mgr(mgr_string) + assert mgr.as_array().dtype == dtype + + def test_as_array_datetime(self): + mgr = create_mgr("h: datetime-1; g: datetime-2") + assert mgr.as_array().dtype == "M8[ns]" + + def test_as_array_datetime_tz(self): + mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]") + assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]" + assert mgr.iget(1).dtype == "datetime64[ns, CET]" + assert mgr.as_array().dtype == "object" + + @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"]) + def test_astype(self, t): + # coerce all + mgr = create_mgr("c: f4; d: f2; e: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t) + assert tmgr.iget(0).dtype.type == t + assert tmgr.iget(1).dtype.type == t + assert tmgr.iget(2).dtype.type == t + + # mixed + mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t, errors="ignore") + assert tmgr.iget(2).dtype.type == t + assert tmgr.iget(4).dtype.type == t + assert tmgr.iget(5).dtype.type == t + assert tmgr.iget(6).dtype.type == t + + assert tmgr.iget(0).dtype.type == np.object_ + assert tmgr.iget(1).dtype.type == np.object_ + if t != np.int64: + assert tmgr.iget(3).dtype.type == np.datetime64 + else: + assert tmgr.iget(3).dtype.type == t + + def test_convert(self): + def _compare(old_mgr, new_mgr): + """compare the blocks, numeric compare ==, object don't""" + old_blocks = set(old_mgr.blocks) + new_blocks = set(new_mgr.blocks) + assert len(old_blocks) == len(new_blocks) + + # compare non-numeric + for b in old_blocks: + found = False + for nb in new_blocks: + if (b.values == nb.values).all(): + found = True + break + assert found + + for b in new_blocks: + found = False + for ob in old_blocks: + if (b.values == ob.values).all(): + found = True + break + assert found + + # noops + mgr = create_mgr("f: i8; g: f8") + new_mgr = mgr.convert() + _compare(mgr, new_mgr) + + # convert + mgr = create_mgr("a,b,foo: object; f: i8; g: f8") + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int64 + assert new_mgr.iget(4).dtype == np.float64 + + mgr = create_mgr( + "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2" + ) + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int32 + assert new_mgr.iget(4).dtype == np.bool_ + assert new_mgr.iget(5).dtype.type, np.datetime64 + assert new_mgr.iget(6).dtype == np.int64 + assert new_mgr.iget(7).dtype == np.float64 + assert new_mgr.iget(8).dtype == np.float16 + + def test_invalid_ea_block(self): + with pytest.raises(ValueError, match="need to split"): + create_mgr("a: category; b: category") + + with pytest.raises(ValueError, match="need to split"): + create_mgr("a: category2; b: category2") + + def test_interleave(self): + # self + for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]: + mgr = create_mgr(f"a: {dtype}") + assert mgr.as_array().dtype == dtype + mgr = create_mgr(f"a: {dtype}; b: {dtype}") + assert mgr.as_array().dtype == dtype + + @pytest.mark.parametrize( + "mgr_string, dtype", + [ + ("a: category", "i8"), + ("a: category; b: category", "i8"), + ("a: category; b: category2", "object"), + ("a: category2", "object"), + ("a: category2; b: category2", "object"), + ("a: f8", "f8"), + ("a: f8; b: i8", "f8"), + ("a: f4; b: i8", "f8"), + ("a: f4; b: i8; d: object", "object"), + ("a: bool; b: i8", "object"), + ("a: complex", "complex"), + ("a: f8; b: category", "object"), + ("a: M8[ns]; b: category", "object"), + ("a: M8[ns]; b: bool", "object"), + ("a: M8[ns]; b: i8", "object"), + ("a: m8[ns]; b: bool", "object"), + ("a: m8[ns]; b: i8", "object"), + ("a: M8[ns]; b: m8[ns]", "object"), + ], + ) + def test_interleave_dtype(self, mgr_string, dtype): + # will be converted according the actual dtype of the underlying + mgr = create_mgr("a: category") + assert mgr.as_array().dtype == "i8" + mgr = create_mgr("a: category; b: category2") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: category2") + assert mgr.as_array().dtype == "object" + + # combinations + mgr = create_mgr("a: f8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f8; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8; d: object") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: bool; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: complex") + assert mgr.as_array().dtype == "complex" + mgr = create_mgr("a: f8; b: category") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: M8[ns]; b: category") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: m8[ns]") + assert mgr.as_array().dtype == "object" + + def test_consolidate_ordering_issues(self, mgr): + mgr.iset(mgr.items.get_loc("f"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("d"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("b"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("g"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("h"), np.random.randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp) + ) + + def test_reindex_items(self): + # mgr is not consolidated, f8 & f8-2 blocks + mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") + + reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0) + # reindex_axis does not consolidate_inplace, as that risks failing to + # invalidate _item_cache + assert not reindexed.is_consolidated() + + tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"])) + tm.assert_almost_equal( + mgr.iget(6).internal_values(), reindexed.iget(0).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(2).internal_values(), reindexed.iget(1).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(0).internal_values(), reindexed.iget(2).internal_values() + ) + tm.assert_almost_equal( + mgr.iget(3).internal_values(), reindexed.iget(3).internal_values() + ) + + def test_get_numeric_data(self, using_copy_on_write): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(5, np.array([1, 2, 3], dtype=np.object_)) + + numeric = mgr.get_numeric_data() + tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + numeric.iget(numeric.items.get_loc("float")).internal_values(), + ) + + # Check sharing + numeric.iset( + numeric.items.get_loc("float"), + np.array([100.0, 200.0, 300.0]), + inplace=True, + ) + if using_copy_on_write: + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([1.0, 1.0, 1.0]), + ) + else: + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), + ) + + numeric2 = mgr.get_numeric_data(copy=True) + tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + numeric2.iset( + numeric2.items.get_loc("float"), + np.array([1000.0, 2000.0, 3000.0]), + inplace=True, + ) + if using_copy_on_write: + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([1.0, 1.0, 1.0]), + ) + else: + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), + ) + + def test_get_bool_data(self, using_copy_on_write): + msg = "object-dtype columns with all-bool values" + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(6, np.array([True, False, True], dtype=np.object_)) + + with tm.assert_produces_warning(FutureWarning, match=msg): + bools = mgr.get_bool_data() + tm.assert_index_equal(bools.items, Index(["bool", "dt"])) + tm.assert_almost_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + bools.iget(bools.items.get_loc("bool")).internal_values(), + ) + + bools.iset(0, np.array([True, False, True]), inplace=True) + if using_copy_on_write: + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, True, True]), + ) + else: + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), + ) + + # Check sharing + with tm.assert_produces_warning(FutureWarning, match=msg): + bools2 = mgr.get_bool_data(copy=True) + bools2.iset(0, np.array([False, True, False])) + if using_copy_on_write: + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, True, True]), + ) + else: + tm.assert_numpy_array_equal( + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), + ) + + def test_unicode_repr_doesnt_raise(self): + repr(create_mgr("b,\u05d0: object")) + + @pytest.mark.parametrize( + "mgr_string", ["a,b,c: i8-1; d,e,f: i8-2", "a,a,a: i8-1; b,b,b: i8-2"] + ) + def test_equals(self, mgr_string): + # unique items + bm1 = create_mgr(mgr_string) + bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) + assert bm1.equals(bm2) + + @pytest.mark.parametrize( + "mgr_string", + [ + "a:i8;b:f8", # basic case + "a:i8;b:f8;c:c8;d:b", # many types + "a:i8;e:dt;f:td;g:string", # more types + "a:i8;b:category;c:category2", # categories + "c:sparse;d:sparse_na;b:f8", # sparse + ], + ) + def test_equals_block_order_different_dtypes(self, mgr_string): + # GH 9330 + bm = create_mgr(mgr_string) + block_perms = itertools.permutations(bm.blocks) + for bm_perm in block_perms: + bm_this = BlockManager(tuple(bm_perm), bm.axes) + assert bm.equals(bm_this) + assert bm_this.equals(bm) + + def test_single_mgr_ctor(self): + mgr = create_single_mgr("f8", num_rows=5) + assert mgr.external_values().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0] + + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): + bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") + + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}." + ) + with pytest.raises(ValueError, match=msg): + bm1.replace_list([1], [2], inplace=value) + + +def _as_array(mgr): + if mgr.ndim == 1: + return mgr.external_values() + return mgr.as_array().T + + +class TestIndexing: + # Nosetests-style data-driven tests. + # + # This test applies different indexing routines to block managers and + # compares the outcome to the result of same operations on np.ndarray. + # + # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests + # and are disabled. + + MANAGERS = [ + create_single_mgr("f8", N), + create_single_mgr("i8", N), + # 2-dim + create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)), + create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)), + ] + + @pytest.mark.parametrize("mgr", MANAGERS) + def test_get_slice(self, mgr): + def assert_slice_ok(mgr, axis, slobj): + mat = _as_array(mgr) + + # we maybe using an ndarray to test slicing and + # might not be the full length of the axis + if isinstance(slobj, np.ndarray): + ax = mgr.axes[axis] + if len(ax) and len(slobj) and len(slobj) != len(ax): + slobj = np.concatenate( + [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)] + ) + + if isinstance(slobj, slice): + sliced = mgr.get_slice(slobj, axis=axis) + elif mgr.ndim == 1 and axis == 0: + sliced = mgr.getitem_mgr(slobj) + else: + # BlockManager doesn't support non-slice, SingleBlockManager + # doesn't support axis > 0 + return + + mat_slobj = (slice(None),) * axis + (slobj,) + tm.assert_numpy_array_equal( + mat[mat_slobj], _as_array(sliced), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) + + assert mgr.ndim <= 2, mgr.ndim + for ax in range(mgr.ndim): + # slice + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) + + if mgr.ndim < 2: + # 2D only support slice objects + + # boolean mask + assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array([True, True, False], dtype=np.bool_) + ) + + # fancy indexer + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) + + @pytest.mark.parametrize("mgr", MANAGERS) + def test_take(self, mgr): + def assert_take_ok(mgr, axis, indexer): + mat = _as_array(mgr) + taken = mgr.take(indexer, axis) + tm.assert_numpy_array_equal( + np.take(mat, indexer, axis), _as_array(taken), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) + + for ax in range(mgr.ndim): + # take/fancy indexer + assert_take_ok(mgr, ax, indexer=[]) + assert_take_ok(mgr, ax, indexer=[0, 0, 0]) + assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_take_ok(mgr, ax, indexer=[0, 1, 2]) + assert_take_ok(mgr, ax, indexer=[-1, -2, -3]) + + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_axis(self, fill_value, mgr): + def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): + mat = _as_array(mgr) + indexer = mgr.axes[axis].get_indexer_for(new_labels) + + reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value) + tm.assert_numpy_array_equal( + algos.take_nd(mat, indexer, axis, fill_value=fill_value), + _as_array(reindexed), + check_dtype=False, + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for ax in range(mgr.ndim): + assert_reindex_axis_is_ok(mgr, ax, Index([]), fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok(mgr, ax, Index(["foo", "bar", "baz"]), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), fill_value + ) + + if mgr.shape[ax] >= 3: + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value + ) + + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_indexer(self, fill_value, mgr): + def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): + mat = _as_array(mgr) + reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value) + reindexed = mgr.reindex_indexer( + new_labels, indexer, axis, fill_value=fill_value + ) + tm.assert_numpy_array_equal( + reindexed_mat, _as_array(reindexed), check_dtype=False + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for ax in range(mgr.ndim): + assert_reindex_indexer_is_ok( + mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo"] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo", mgr.axes[ax][0], "baz"]), + np.array([-1, -1, -1]), + fill_value, + ) + + if mgr.shape[ax] >= 3: + assert_reindex_indexer_is_ok( + mgr, + ax, + Index(["foo", "bar", "baz"]), + np.array([0, 1, 2]), + fill_value, + ) + + +class TestBlockPlacement: + @pytest.mark.parametrize( + "slc, expected", + [ + (slice(0, 4), 4), + (slice(0, 4, 2), 2), + (slice(0, 3, 2), 2), + (slice(0, 1, 2), 1), + (slice(1, 0, -1), 1), + ], + ) + def test_slice_len(self, slc, expected): + assert len(BlockPlacement(slc)) == expected + + @pytest.mark.parametrize("slc", [slice(1, 1, 0), slice(1, 2, 0)]) + def test_zero_step_raises(self, slc): + msg = "slice step cannot be zero" + with pytest.raises(ValueError, match=msg): + BlockPlacement(slc) + + def test_slice_canonize_negative_stop(self): + # GH#37524 negative stop is OK with negative step and positive start + slc = slice(3, -1, -2) + + bp = BlockPlacement(slc) + assert bp.indexer == slice(3, None, -2) + + @pytest.mark.parametrize( + "slc", + [ + slice(None, None), + slice(10, None), + slice(None, None, -1), + slice(None, 10, -1), + # These are "unbounded" because negative index will + # change depending on container shape. + slice(-1, None), + slice(None, -1), + slice(-1, -1), + slice(-1, None, -1), + slice(None, -1, -1), + slice(-1, -1, -1), + ], + ) + def test_unbounded_slice_raises(self, slc): + msg = "unbounded slice" + with pytest.raises(ValueError, match=msg): + BlockPlacement(slc) + + @pytest.mark.parametrize( + "slc", + [ + slice(0, 0), + slice(100, 0), + slice(100, 100), + slice(100, 100, -1), + slice(0, 100, -1), + ], + ) + def test_not_slice_like_slices(self, slc): + assert not BlockPlacement(slc).is_slice_like + + @pytest.mark.parametrize( + "arr, slc", + [ + ([0], slice(0, 1, 1)), + ([100], slice(100, 101, 1)), + ([0, 1, 2], slice(0, 3, 1)), + ([0, 5, 10], slice(0, 15, 5)), + ([0, 100], slice(0, 200, 100)), + ([2, 1], slice(2, 0, -1)), + ], + ) + def test_array_to_slice_conversion(self, arr, slc): + assert BlockPlacement(arr).as_slice == slc + + @pytest.mark.parametrize( + "arr", + [ + [], + [-1], + [-1, -2, -3], + [-10], + [-1], + [-1, 0, 1, 2], + [-2, 0, 2, 4], + [1, 0, -1], + [1, 1, 1], + ], + ) + def test_not_slice_like_arrays(self, arr): + assert not BlockPlacement(arr).is_slice_like + + @pytest.mark.parametrize( + "slc, expected", + [(slice(0, 3), [0, 1, 2]), (slice(0, 0), []), (slice(3, 0), [])], + ) + def test_slice_iter(self, slc, expected): + assert list(BlockPlacement(slc)) == expected + + @pytest.mark.parametrize( + "slc, arr", + [ + (slice(0, 3), [0, 1, 2]), + (slice(0, 0), []), + (slice(3, 0), []), + (slice(3, 0, -1), [3, 2, 1]), + ], + ) + def test_slice_to_array_conversion(self, slc, arr): + tm.assert_numpy_array_equal( + BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp) + ) + + def test_blockplacement_add(self): + bpl = BlockPlacement(slice(0, 5)) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] + + @pytest.mark.parametrize( + "val, inc, expected", + [ + (slice(0, 0), 0, []), + (slice(1, 4), 0, [1, 2, 3]), + (slice(3, 0, -1), 0, [3, 2, 1]), + ([1, 2, 4], 0, [1, 2, 4]), + (slice(0, 0), 10, []), + (slice(1, 4), 10, [11, 12, 13]), + (slice(3, 0, -1), 10, [13, 12, 11]), + ([1, 2, 4], 10, [11, 12, 14]), + (slice(0, 0), -1, []), + (slice(1, 4), -1, [0, 1, 2]), + ([1, 2, 4], -1, [0, 1, 3]), + ], + ) + def test_blockplacement_add_int(self, val, inc, expected): + assert list(BlockPlacement(val).add(inc)) == expected + + @pytest.mark.parametrize("val", [slice(1, 4), [1, 2, 4]]) + def test_blockplacement_add_int_raises(self, val): + msg = "iadd causes length change" + with pytest.raises(ValueError, match=msg): + BlockPlacement(val).add(-10) + + +class TestCanHoldElement: + @pytest.fixture( + params=[ + lambda x: x, + lambda x: x.to_series(), + lambda x: x._data, + lambda x: list(x), + lambda x: x.astype(object), + lambda x: np.asarray(x), + lambda x: x[0], + lambda x: x[:0], + ] + ) + def element(self, request): + """ + Functions that take an Index and return an element that should have + blk._can_hold_element(element) for a Block with this index's dtype. + """ + return request.param + + def test_datetime_block_can_hold_element(self): + block = create_block("datetime", [0]) + + assert block._can_hold_element([]) + + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + + # coerce None + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT + + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] + for val in vals: + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + + msg = ( + "value should be a 'Timestamp', 'NaT', " + "or array of those. Got 'date' instead." + ) + with pytest.raises(TypeError, match=msg): + arr[0] = val + + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_interval_can_hold_element_emptylist(self, dtype, element): + arr = np.array([1, 3, 4], dtype=dtype) + ii = IntervalIndex.from_breaks(arr) + blk = new_block(ii._data, [1], ndim=2) + + assert blk._can_hold_element([]) + # TODO: check this holds for all blocks + + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_interval_can_hold_element(self, dtype, element): + arr = np.array([1, 3, 4, 9], dtype=dtype) + ii = IntervalIndex.from_breaks(arr) + blk = new_block(ii._data, [1], ndim=2) + + elem = element(ii) + self.check_series_setitem(elem, ii, True) + assert blk._can_hold_element(elem) + + # Careful: to get the expected Series-inplace behavior we need + # `elem` to not have the same length as `arr` + ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither") + elem = element(ii2) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)]) + elem = element(ii3) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)]) + elem = element(ii4) + self.check_series_setitem(elem, ii, False) + assert not blk._can_hold_element(elem) + + def test_period_can_hold_element_emptylist(self): + pi = period_range("2016", periods=3, freq="A") + blk = new_block(pi._data.reshape(1, 3), [1], ndim=2) + + assert blk._can_hold_element([]) + + def test_period_can_hold_element(self, element): + pi = period_range("2016", periods=3, freq="A") + + elem = element(pi) + self.check_series_setitem(elem, pi, True) + + # Careful: to get the expected Series-inplace behavior we need + # `elem` to not have the same length as `arr` + pi2 = pi.asfreq("D")[:-1] + elem = element(pi2) + self.check_series_setitem(elem, pi, False) + + dti = pi.to_timestamp("S")[:-1] + elem = element(dti) + self.check_series_setitem(elem, pi, False) + + def check_setting(self, elem, index: Index, inplace: bool): + self.check_series_setitem(elem, index, inplace) + self.check_frame_setitem(elem, index, inplace) + + def check_can_hold_element(self, obj, elem, inplace: bool): + blk = obj._mgr.blocks[0] + if inplace: + assert blk._can_hold_element(elem) + else: + assert not blk._can_hold_element(elem) + + def check_series_setitem(self, elem, index: Index, inplace: bool): + arr = index._data.copy() + ser = Series(arr) + + self.check_can_hold_element(ser, elem, inplace) + + if is_scalar(elem): + ser[0] = elem + else: + ser[: len(elem)] = elem + + if inplace: + assert ser.array is arr # i.e. setting was done inplace + else: + assert ser.dtype == object + + def check_frame_setitem(self, elem, index: Index, inplace: bool): + arr = index._data.copy() + df = DataFrame(arr) + + self.check_can_hold_element(df, elem, inplace) + + if is_scalar(elem): + df.iloc[0, 0] = elem + else: + df.iloc[: len(elem), 0] = elem + + if inplace: + # assertion here implies setting was done inplace + assert df._mgr.arrays[0] is arr + else: + assert df.dtypes[0] == object + + +class TestShouldStore: + def test_should_store_categorical(self): + cat = Categorical(["A", "B", "C"]) + df = DataFrame(cat) + blk = df._mgr.blocks[0] + + # matching dtype + assert blk.should_store(cat) + assert blk.should_store(cat[:-1]) + + # different dtype + assert not blk.should_store(cat.as_ordered()) + + # ndarray instead of Categorical + assert not blk.should_store(np.asarray(cat)) + + +def test_validate_ndim(block_maker): + values = np.array([1.0, 2.0]) + placement = slice(2) + msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" + + with pytest.raises(ValueError, match=msg): + block_maker(values, placement, ndim=2) + + +def test_block_shape(): + idx = Index([0, 1, 2, 3, 4]) + a = Series([1, 2, 3]).reindex(idx) + b = Series(Categorical([1, 2, 3])).reindex(idx) + + assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer + + +def test_make_block_no_pandas_array(block_maker): + # https://github.com/pandas-dev/pandas/pull/24866 + arr = pd.arrays.PandasArray(np.array([1, 2])) + + # PandasArray, no dtype + result = block_maker(arr, slice(len(arr)), ndim=arr.ndim) + assert result.dtype.kind in ["i", "u"] + + if block_maker is make_block: + # new_block requires caller to unwrap PandasArray + assert result.is_extension is False + + # PandasArray, PandasDtype + result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False + + # new_block no longer taked dtype keyword + # ndarray, PandasDtype + result = block_maker( + arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim + ) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False + + +def test_single_block_manager_fastpath_deprecated(): + # GH#33092 + ser = Series(range(3)) + blk = ser._data.blocks[0] + with tm.assert_produces_warning(FutureWarning): + SingleBlockManager(blk, ser.index, fastpath=True) diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py new file mode 100644 index 00000000..045c3cbb --- /dev/null +++ b/pandas/tests/internals/test_managers.py @@ -0,0 +1,72 @@ +""" +Testing interaction between the different managers (BlockManager, ArrayManager) +""" +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd +import pandas._testing as tm +from pandas.core.internals import ( + ArrayManager, + BlockManager, + SingleArrayManager, + SingleBlockManager, +) + + +def test_dataframe_creation(): + + with pd.option_context("mode.data_manager", "block"): + df_block = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + assert isinstance(df_block._mgr, BlockManager) + + with pd.option_context("mode.data_manager", "array"): + df_array = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) + assert isinstance(df_array._mgr, ArrayManager) + + # also ensure both are seen as equal + tm.assert_frame_equal(df_block, df_array) + + # conversion from one manager to the other + result = df_block._as_manager("block") + assert isinstance(result._mgr, BlockManager) + result = df_block._as_manager("array") + assert isinstance(result._mgr, ArrayManager) + tm.assert_frame_equal(result, df_block) + assert all( + array_equivalent(left, right) + for left, right in zip(result._mgr.arrays, df_array._mgr.arrays) + ) + + result = df_array._as_manager("array") + assert isinstance(result._mgr, ArrayManager) + result = df_array._as_manager("block") + assert isinstance(result._mgr, BlockManager) + tm.assert_frame_equal(result, df_array) + assert len(result._mgr.blocks) == 2 + + +def test_series_creation(): + + with pd.option_context("mode.data_manager", "block"): + s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + assert isinstance(s_block._mgr, SingleBlockManager) + + with pd.option_context("mode.data_manager", "array"): + s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) + assert isinstance(s_array._mgr, SingleArrayManager) + + # also ensure both are seen as equal + tm.assert_series_equal(s_block, s_array) + + # conversion from one manager to the other + result = s_block._as_manager("block") + assert isinstance(result._mgr, SingleBlockManager) + result = s_block._as_manager("array") + assert isinstance(result._mgr, SingleArrayManager) + tm.assert_series_equal(result, s_block) + + result = s_array._as_manager("array") + assert isinstance(result._mgr, SingleArrayManager) + result = s_array._as_manager("block") + assert isinstance(result._mgr, SingleBlockManager) + tm.assert_series_equal(result, s_array) diff --git a/pandas/tests/io/__init__.py b/pandas/tests/io/__init__.py new file mode 100644 index 00000000..3231e38b --- /dev/null +++ b/pandas/tests/io/__init__.py @@ -0,0 +1,27 @@ +import pytest + +pytestmark = [ + # fastparquet + pytest.mark.filterwarnings( + "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:Block.is_categorical is deprecated:DeprecationWarning" + ), + pytest.mark.filterwarnings( + r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" + ), + # xlrd + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions. " + r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead." + ":PendingDeprecationWarning" + ), + # GH 26552 + pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" + ), +] diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py new file mode 100644 index 00000000..522d2520 --- /dev/null +++ b/pandas/tests/io/conftest.py @@ -0,0 +1,214 @@ +import os +import shlex +import subprocess +import time + +import pytest + +from pandas.compat import ( + is_ci_environment, + is_platform_arm, + is_platform_mac, + is_platform_windows, +) +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.fixture +def tips_file(datapath): + """Path to the tips dataset""" + return datapath("io", "data", "csv", "tips.csv") + + +@pytest.fixture +def jsonl_file(datapath): + """Path to a JSONL dataset""" + return datapath("io", "parser", "data", "items.jsonl") + + +@pytest.fixture +def salaries_table(datapath): + """DataFrame with the salaries dataset""" + return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t") + + +@pytest.fixture +def feather_file(datapath): + return datapath("io", "data", "feather", "feather-0_3_1.feather") + + +@pytest.fixture +def s3so(worker_id): + if is_ci_environment(): + url = "http://localhost:5000/" + else: + worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw") + url = f"http://127.0.0.1:555{worker_id}/" + return {"client_kwargs": {"endpoint_url": url}} + + +@pytest.fixture(scope="session") +def s3_base(worker_id): + """ + Fixture for mocking S3 interaction. + + Sets up moto server in separate process locally + Return url for motoserver/moto CI service + """ + pytest.importorskip("s3fs") + pytest.importorskip("boto3") + + with tm.ensure_safe_environment_variables(): + # temporary workaround as moto fails for botocore >= 1.11 otherwise, + # see https://github.com/spulec/moto/issues/1924 & 1952 + os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") + os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") + if is_ci_environment(): + if is_platform_arm() or is_platform_mac() or is_platform_windows(): + # NOT RUN on Windows/MacOS/ARM, only Ubuntu + # - subprocess in CI can cause timeouts + # - Github Actions do not support + # container services for the above OSs + # - CircleCI will probably hit the Docker rate pull limit + pytest.skip( + "S3 tests do not have a corresponding service in " + "Windows, MacOS or ARM platforms" + ) + else: + yield "http://localhost:5000" + else: + requests = pytest.importorskip("requests") + pytest.importorskip("moto", minversion="1.3.14") + pytest.importorskip("flask") # server mode needs flask too + + # Launching moto in server mode, i.e., as a separate process + # with an S3 endpoint on localhost + + worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw") + endpoint_port = f"555{worker_id}" + endpoint_uri = f"http://127.0.0.1:{endpoint_port}/" + + # pipe to null to avoid logging in terminal + with subprocess.Popen( + shlex.split(f"moto_server s3 -p {endpoint_port}"), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) as proc: + + timeout = 5 + while timeout > 0: + try: + # OK to go once server is accepting connections + r = requests.get(endpoint_uri) + if r.ok: + break + except Exception: + pass + timeout -= 0.1 + time.sleep(0.1) + yield endpoint_uri + + proc.terminate() + + +@pytest.fixture +def s3_resource(s3_base, tips_file, jsonl_file, feather_file): + """ + Sets up S3 bucket with contents + + The primary bucket name is "pandas-test". The following datasets + are loaded. + + - tips.csv + - tips.csv.gz + - tips.csv.bz2 + - items.jsonl + + A private bucket "cant_get_it" is also created. The boto3 s3 resource + is yielded by the fixture. + """ + import boto3 + import s3fs + + test_s3_files = [ + ("tips#1.csv", tips_file), + ("tips.csv", tips_file), + ("tips.csv.gz", tips_file + ".gz"), + ("tips.csv.bz2", tips_file + ".bz2"), + ("items.jsonl", jsonl_file), + ("simple_dataset.feather", feather_file), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, "rb") as f: + cli.put_object(Bucket=bucket_name, Key=s3_key, Body=f) + + bucket = "pandas-test" + conn = boto3.resource("s3", endpoint_url=s3_base) + cli = boto3.client("s3", endpoint_url=s3_base) + + try: + cli.create_bucket(Bucket=bucket) + except Exception: + # OK is bucket already exists + pass + try: + cli.create_bucket(Bucket="cant_get_it", ACL="private") + except Exception: + # OK is bucket already exists + pass + timeout = 2 + while not cli.list_buckets()["Buckets"] and timeout > 0: + time.sleep(0.1) + timeout -= 0.1 + + add_tips_files(bucket) + add_tips_files("cant_get_it") + s3fs.S3FileSystem.clear_instance_cache() + yield conn + + s3 = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base}) + + try: + s3.rm(bucket, recursive=True) + except Exception: + pass + try: + s3.rm("cant_get_it", recursive=True) + except Exception: + pass + timeout = 2 + while cli.list_buckets()["Buckets"] and timeout > 0: + time.sleep(0.1) + timeout -= 0.1 + + +_compression_formats_params = [ + (".no_compress", None), + ("", None), + (".gz", "gzip"), + (".GZ", "gzip"), + (".bz2", "bz2"), + (".BZ2", "bz2"), + (".zip", "zip"), + (".ZIP", "zip"), + (".xz", "xz"), + (".XZ", "xz"), + pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")), + pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")), +] + + +@pytest.fixture(params=_compression_formats_params[1:]) +def compression_format(request): + return request.param + + +@pytest.fixture(params=_compression_formats_params) +def compression_ext(request): + return request.param[0] diff --git a/pandas/tests/io/data/csv/banklist.csv b/pandas/tests/io/data/csv/banklist.csv new file mode 100644 index 00000000..e7900830 --- /dev/null +++ b/pandas/tests/io/data/csv/banklist.csv @@ -0,0 +1,507 @@ +Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date +Banks of Wisconsin d/b/a Bank of Kenosha,Kenosha,WI,35386,"North Shore Bank, FSB",31-May-13,31-May-13 +Central Arizona Bank,Scottsdale,AZ,34527,Western State Bank,14-May-13,20-May-13 +Sunrise Bank,Valdosta,GA,58185,Synovus Bank,10-May-13,21-May-13 +Pisgah Community Bank,Asheville,NC,58701,"Capital Bank, N.A.",10-May-13,14-May-13 +Douglas County Bank,Douglasville,GA,21649,Hamilton State Bank,26-Apr-13,16-May-13 +Parkway Bank,Lenoir,NC,57158,"CertusBank, National Association",26-Apr-13,17-May-13 +Chipola Community Bank,Marianna,FL,58034,First Federal Bank of Florida,19-Apr-13,16-May-13 +Heritage Bank of North Florida,Orange Park,FL,26680,FirstAtlantic Bank,19-Apr-13,16-May-13 +First Federal Bank,Lexington,KY,29594,Your Community Bank,19-Apr-13,23-Apr-13 +Gold Canyon Bank,Gold Canyon,AZ,58066,"First Scottsdale Bank, National Association",5-Apr-13,9-Apr-13 +Frontier Bank,LaGrange,GA,16431,HeritageBank of the South,8-Mar-13,26-Mar-13 +Covenant Bank,Chicago,IL,22476,Liberty Bank and Trust Company,15-Feb-13,4-Mar-13 +1st Regents Bank,Andover,MN,57157,First Minnesota Bank,18-Jan-13,28-Feb-13 +Westside Community Bank,University Place,WA,33997,Sunwest Bank,11-Jan-13,24-Jan-13 +Community Bank of the Ozarks,Sunrise Beach,MO,27331,Bank of Sullivan,14-Dec-12,24-Jan-13 +Hometown Community Bank,Braselton,GA,57928,"CertusBank, National Association",16-Nov-12,24-Jan-13 +Citizens First National Bank,Princeton,IL,3731,Heartland Bank and Trust Company,2-Nov-12,24-Jan-13 +Heritage Bank of Florida,Lutz,FL,35009,Centennial Bank,2-Nov-12,24-Jan-13 +NOVA Bank,Berwyn,PA,27148,No Acquirer,26-Oct-12,24-Jan-13 +Excel Bank,Sedalia,MO,19189,Simmons First National Bank,19-Oct-12,24-Jan-13 +First East Side Savings Bank,Tamarac,FL,28144,Stearns Bank N.A.,19-Oct-12,24-Jan-13 +GulfSouth Private Bank,Destin,FL,58073,SmartBank,19-Oct-12,24-Jan-13 +First United Bank,Crete,IL,20685,"Old Plank Trail Community Bank, National Association",28-Sep-12,15-Nov-12 +Truman Bank,St. Louis,MO,27316,Simmons First National Bank,14-Sep-12,17-Dec-12 +First Commercial Bank,Bloomington,MN,35246,Republic Bank & Trust Company,7-Sep-12,17-Dec-12 +Waukegan Savings Bank,Waukegan,IL,28243,First Midwest Bank,3-Aug-12,11-Oct-12 +Jasper Banking Company,Jasper,GA,16240,Stearns Bank N.A.,27-Jul-12,17-Dec-12 +Second Federal Savings and Loan Association of Chicago,Chicago,IL,27986,Hinsdale Bank & Trust Company,20-Jul-12,14-Jan-13 +Heartland Bank,Leawood,KS,1361,Metcalf Bank,20-Jul-12,17-Dec-12 +First Cherokee State Bank,Woodstock,GA,32711,Community & Southern Bank,20-Jul-12,31-Oct-12 +Georgia Trust Bank,Buford,GA,57847,Community & Southern Bank,20-Jul-12,17-Dec-12 +The Royal Palm Bank of Florida,Naples,FL,57096,First National Bank of the Gulf Coast,20-Jul-12,7-Jan-13 +Glasgow Savings Bank,Glasgow,MO,1056,Regional Missouri Bank,13-Jul-12,11-Oct-12 +Montgomery Bank & Trust,Ailey,GA,19498,Ameris Bank,6-Jul-12,31-Oct-12 +The Farmers Bank of Lynchburg,Lynchburg,TN,1690,Clayton Bank and Trust,15-Jun-12,31-Oct-12 +Security Exchange Bank,Marietta,GA,35299,Fidelity Bank,15-Jun-12,10-Oct-12 +Putnam State Bank,Palatka,FL,27405,Harbor Community Bank,15-Jun-12,10-Oct-12 +Waccamaw Bank,Whiteville,NC,34515,First Community Bank,8-Jun-12,8-Nov-12 +Farmers' and Traders' State Bank,Shabbona,IL,9257,First State Bank,8-Jun-12,10-Oct-12 +Carolina Federal Savings Bank,Charleston,SC,35372,Bank of North Carolina,8-Jun-12,31-Oct-12 +First Capital Bank,Kingfisher,OK,416,F & M Bank,8-Jun-12,10-Oct-12 +"Alabama Trust Bank, National Association",Sylacauga,AL,35224,Southern States Bank,18-May-12,20-May-13 +"Security Bank, National Association",North Lauderdale,FL,23156,Banesco USA,4-May-12,31-Oct-12 +Palm Desert National Bank,Palm Desert,CA,23632,Pacific Premier Bank,27-Apr-12,17-May-13 +Plantation Federal Bank,Pawleys Island,SC,32503,First Federal Bank,27-Apr-12,17-May-13 +"Inter Savings Bank, fsb D/B/A InterBank, fsb",Maple Grove,MN,31495,Great Southern Bank,27-Apr-12,17-May-13 +HarVest Bank of Maryland,Gaithersburg,MD,57766,Sonabank,27-Apr-12,17-May-13 +Bank of the Eastern Shore,Cambridge,MD,26759,No Acquirer,27-Apr-12,17-Oct-12 +"Fort Lee Federal Savings Bank, FSB",Fort Lee,NJ,35527,Alma Bank,20-Apr-12,17-May-13 +Fidelity Bank,Dearborn,MI,33883,The Huntington National Bank,30-Mar-12,16-May-13 +Premier Bank,Wilmette,IL,35419,International Bank of Chicago,23-Mar-12,17-Oct-12 +Covenant Bank & Trust,Rock Spring,GA,58068,"Stearns Bank, N.A.",23-Mar-12,31-Oct-12 +New City Bank,Chicago,IL,57597,No Acquirer,9-Mar-12,29-Oct-12 +Global Commerce Bank,Doraville,GA,34046,Metro City Bank,2-Mar-12,31-Oct-12 +Home Savings of America,Little Falls,MN,29178,No Acquirer,24-Feb-12,17-Dec-12 +Central Bank of Georgia,Ellaville,GA,5687,Ameris Bank,24-Feb-12,9-Aug-12 +SCB Bank,Shelbyville,IN,29761,"First Merchants Bank, National Association",10-Feb-12,25-Mar-13 +Charter National Bank and Trust,Hoffman Estates,IL,23187,"Barrington Bank & Trust Company, National Association",10-Feb-12,25-Mar-13 +BankEast,Knoxville,TN,19869,U.S.Bank National Association,27-Jan-12,8-Mar-13 +Patriot Bank Minnesota,Forest Lake,MN,34823,First Resource Bank,27-Jan-12,12-Sep-12 +Tennessee Commerce Bank,Franklin,TN,35296,Republic Bank & Trust Company,27-Jan-12,20-Nov-12 +First Guaranty Bank and Trust Company of Jacksonville,Jacksonville,FL,16579,"CenterState Bank of Florida, N.A.",27-Jan-12,12-Sep-12 +American Eagle Savings Bank,Boothwyn,PA,31581,"Capital Bank, N.A.",20-Jan-12,25-Jan-13 +The First State Bank,Stockbridge,GA,19252,Hamilton State Bank,20-Jan-12,25-Jan-13 +Central Florida State Bank,Belleview,FL,57186,"CenterState Bank of Florida, N.A.",20-Jan-12,25-Jan-13 +Western National Bank,Phoenix,AZ,57917,Washington Federal,16-Dec-11,13-Aug-12 +Premier Community Bank of the Emerald Coast,Crestview,FL,58343,Summit Bank,16-Dec-11,12-Sep-12 +Central Progressive Bank,Lacombe,LA,19657,First NBC Bank,18-Nov-11,13-Aug-12 +Polk County Bank,Johnston,IA,14194,Grinnell State Bank,18-Nov-11,15-Aug-12 +Community Bank of Rockmart,Rockmart,GA,57860,Century Bank of Georgia,10-Nov-11,13-Aug-12 +SunFirst Bank,Saint George,UT,57087,Cache Valley Bank,4-Nov-11,16-Nov-12 +"Mid City Bank, Inc.",Omaha,NE,19397,Premier Bank,4-Nov-11,15-Aug-12 +All American Bank,Des Plaines,IL,57759,International Bank of Chicago,28-Oct-11,15-Aug-12 +Community Banks of Colorado,Greenwood Village,CO,21132,"Bank Midwest, N.A.",21-Oct-11,2-Jan-13 +Community Capital Bank,Jonesboro,GA,57036,State Bank and Trust Company,21-Oct-11,8-Nov-12 +Decatur First Bank,Decatur,GA,34392,Fidelity Bank,21-Oct-11,8-Nov-12 +Old Harbor Bank,Clearwater,FL,57537,1st United Bank,21-Oct-11,8-Nov-12 +Country Bank,Aledo,IL,35395,Blackhawk Bank & Trust,14-Oct-11,15-Aug-12 +First State Bank,Cranford,NJ,58046,Northfield Bank,14-Oct-11,8-Nov-12 +"Blue Ridge Savings Bank, Inc.",Asheville,NC,32347,Bank of North Carolina,14-Oct-11,8-Nov-12 +Piedmont Community Bank,Gray,GA,57256,State Bank and Trust Company,14-Oct-11,22-Jan-13 +Sun Security Bank,Ellington,MO,20115,Great Southern Bank,7-Oct-11,7-Nov-12 +The RiverBank,Wyoming,MN,10216,Central Bank,7-Oct-11,7-Nov-12 +First International Bank,Plano,TX,33513,American First National Bank,30-Sep-11,9-Oct-12 +Citizens Bank of Northern California,Nevada City,CA,33983,Tri Counties Bank,23-Sep-11,9-Oct-12 +Bank of the Commonwealth,Norfolk,VA,20408,Southern Bank and Trust Company,23-Sep-11,9-Oct-12 +The First National Bank of Florida,Milton,FL,25155,CharterBank,9-Sep-11,6-Sep-12 +CreekSide Bank,Woodstock,GA,58226,Georgia Commerce Bank,2-Sep-11,6-Sep-12 +Patriot Bank of Georgia,Cumming,GA,58273,Georgia Commerce Bank,2-Sep-11,2-Nov-12 +First Choice Bank,Geneva,IL,57212,Inland Bank & Trust,19-Aug-11,15-Aug-12 +First Southern National Bank,Statesboro,GA,57239,Heritage Bank of the South,19-Aug-11,2-Nov-12 +Lydian Private Bank,Palm Beach,FL,35356,"Sabadell United Bank, N.A.",19-Aug-11,2-Nov-12 +Public Savings Bank,Huntingdon Valley,PA,34130,"Capital Bank, N.A.",18-Aug-11,15-Aug-12 +The First National Bank of Olathe,Olathe,KS,4744,Enterprise Bank & Trust,12-Aug-11,23-Aug-12 +Bank of Whitman,Colfax,WA,22528,Columbia State Bank,5-Aug-11,16-Aug-12 +Bank of Shorewood,Shorewood,IL,22637,Heartland Bank and Trust Company,5-Aug-11,16-Aug-12 +Integra Bank National Association,Evansville,IN,4392,Old National Bank,29-Jul-11,16-Aug-12 +"BankMeridian, N.A.",Columbia,SC,58222,SCBT National Association,29-Jul-11,2-Nov-12 +Virginia Business Bank,Richmond,VA,58283,Xenith Bank,29-Jul-11,9-Oct-12 +Bank of Choice,Greeley,CO,2994,"Bank Midwest, N.A.",22-Jul-11,12-Sep-12 +LandMark Bank of Florida,Sarasota,FL,35244,American Momentum Bank,22-Jul-11,2-Nov-12 +Southshore Community Bank,Apollo Beach,FL,58056,American Momentum Bank,22-Jul-11,2-Nov-12 +Summit Bank,Prescott,AZ,57442,The Foothills Bank,15-Jul-11,16-Aug-12 +First Peoples Bank,Port St. Lucie,FL,34870,"Premier American Bank, N.A.",15-Jul-11,2-Nov-12 +High Trust Bank,Stockbridge,GA,19554,Ameris Bank,15-Jul-11,2-Nov-12 +One Georgia Bank,Atlanta,GA,58238,Ameris Bank,15-Jul-11,2-Nov-12 +Signature Bank,Windsor,CO,57835,Points West Community Bank,8-Jul-11,26-Oct-12 +Colorado Capital Bank,Castle Rock,CO,34522,First-Citizens Bank & Trust Company,8-Jul-11,15-Jan-13 +First Chicago Bank & Trust,Chicago,IL,27935,Northbrook Bank & Trust Company,8-Jul-11,9-Sep-12 +Mountain Heritage Bank,Clayton,GA,57593,First American Bank and Trust Company,24-Jun-11,2-Nov-12 +First Commercial Bank of Tampa Bay,Tampa,FL,27583,Stonegate Bank,17-Jun-11,2-Nov-12 +McIntosh State Bank,Jackson,GA,19237,Hamilton State Bank,17-Jun-11,2-Nov-12 +Atlantic Bank and Trust,Charleston,SC,58420,"First Citizens Bank and Trust Company, Inc.",3-Jun-11,31-Oct-12 +First Heritage Bank,Snohomish,WA,23626,Columbia State Bank,27-May-11,28-Jan-13 +Summit Bank,Burlington,WA,513,Columbia State Bank,20-May-11,22-Jan-13 +First Georgia Banking Company,Franklin,GA,57647,"CertusBank, National Association",20-May-11,13-Nov-12 +Atlantic Southern Bank,Macon,GA,57213,"CertusBank, National Association",20-May-11,31-Oct-12 +Coastal Bank,Cocoa Beach,FL,34898,"Florida Community Bank, a division of Premier American Bank, N.A.",6-May-11,30-Nov-12 +Community Central Bank,Mount Clemens,MI,34234,Talmer Bank & Trust,29-Apr-11,16-Aug-12 +The Park Avenue Bank,Valdosta,GA,19797,Bank of the Ozarks,29-Apr-11,30-Nov-12 +First Choice Community Bank,Dallas,GA,58539,Bank of the Ozarks,29-Apr-11,22-Jan-13 +Cortez Community Bank,Brooksville,FL,57625,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +First National Bank of Central Florida,Winter Park,FL,26297,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +Heritage Banking Group,Carthage,MS,14273,Trustmark National Bank,15-Apr-11,30-Nov-12 +Rosemount National Bank,Rosemount,MN,24099,Central Bank,15-Apr-11,16-Aug-12 +Superior Bank,Birmingham,AL,17750,"Superior Bank, National Association",15-Apr-11,30-Nov-12 +Nexity Bank,Birmingham,AL,19794,AloStar Bank of Commerce,15-Apr-11,4-Sep-12 +New Horizons Bank,East Ellijay,GA,57705,Citizens South Bank,15-Apr-11,16-Aug-12 +Bartow County Bank,Cartersville,GA,21495,Hamilton State Bank,15-Apr-11,22-Jan-13 +Nevada Commerce Bank,Las Vegas,NV,35418,City National Bank,8-Apr-11,9-Sep-12 +Western Springs National Bank and Trust,Western Springs,IL,10086,Heartland Bank and Trust Company,8-Apr-11,22-Jan-13 +The Bank of Commerce,Wood Dale,IL,34292,Advantage National Bank Group,25-Mar-11,22-Jan-13 +Legacy Bank,Milwaukee,WI,34818,Seaway Bank and Trust Company,11-Mar-11,12-Sep-12 +First National Bank of Davis,Davis,OK,4077,The Pauls Valley National Bank,11-Mar-11,20-Aug-12 +Valley Community Bank,St. Charles,IL,34187,First State Bank,25-Feb-11,12-Sep-12 +"San Luis Trust Bank, FSB",San Luis Obispo,CA,34783,First California Bank,18-Feb-11,20-Aug-12 +Charter Oak Bank,Napa,CA,57855,Bank of Marin,18-Feb-11,12-Sep-12 +Citizens Bank of Effingham,Springfield,GA,34601,Heritage Bank of the South,18-Feb-11,2-Nov-12 +Habersham Bank,Clarkesville,GA,151,SCBT National Association,18-Feb-11,2-Nov-12 +Canyon National Bank,Palm Springs,CA,34692,Pacific Premier Bank,11-Feb-11,12-Sep-12 +Badger State Bank,Cassville,WI,13272,Royal Bank,11-Feb-11,12-Sep-12 +Peoples State Bank,Hamtramck,MI,14939,First Michigan Bank,11-Feb-11,22-Jan-13 +Sunshine State Community Bank,Port Orange,FL,35478,"Premier American Bank, N.A.",11-Feb-11,2-Nov-12 +Community First Bank Chicago,Chicago,IL,57948,Northbrook Bank & Trust Company,4-Feb-11,20-Aug-12 +North Georgia Bank,Watkinsville,GA,35242,BankSouth,4-Feb-11,2-Nov-12 +American Trust Bank,Roswell,GA,57432,Renasant Bank,4-Feb-11,31-Oct-12 +First Community Bank,Taos,NM,12261,"U.S. Bank, N.A.",28-Jan-11,12-Sep-12 +FirsTier Bank,Louisville,CO,57646,No Acquirer,28-Jan-11,12-Sep-12 +Evergreen State Bank,Stoughton,WI,5328,McFarland State Bank,28-Jan-11,12-Sep-12 +The First State Bank,Camargo,OK,2303,Bank 7,28-Jan-11,12-Sep-12 +United Western Bank,Denver,CO,31293,First-Citizens Bank & Trust Company,21-Jan-11,12-Sep-12 +The Bank of Asheville,Asheville,NC,34516,First Bank,21-Jan-11,2-Nov-12 +CommunitySouth Bank & Trust,Easley,SC,57868,"CertusBank, National Association",21-Jan-11,2-Nov-12 +Enterprise Banking Company,McDonough,GA,19758,No Acquirer,21-Jan-11,2-Nov-12 +Oglethorpe Bank,Brunswick,GA,57440,Bank of the Ozarks,14-Jan-11,2-Nov-12 +Legacy Bank,Scottsdale,AZ,57820,Enterprise Bank & Trust,7-Jan-11,12-Sep-12 +First Commercial Bank of Florida,Orlando,FL,34965,First Southern Bank,7-Jan-11,2-Nov-12 +Community National Bank,Lino Lakes,MN,23306,Farmers & Merchants Savings Bank,17-Dec-10,20-Aug-12 +First Southern Bank,Batesville,AR,58052,Southern Bank,17-Dec-10,20-Aug-12 +"United Americas Bank, N.A.",Atlanta,GA,35065,State Bank and Trust Company,17-Dec-10,2-Nov-12 +"Appalachian Community Bank, FSB",McCaysville,GA,58495,Peoples Bank of East Tennessee,17-Dec-10,31-Oct-12 +Chestatee State Bank,Dawsonville,GA,34578,Bank of the Ozarks,17-Dec-10,2-Nov-12 +"The Bank of Miami,N.A.",Coral Gables,FL,19040,1st United Bank,17-Dec-10,2-Nov-12 +Earthstar Bank,Southampton,PA,35561,Polonia Bank,10-Dec-10,20-Aug-12 +Paramount Bank,Farmington Hills,MI,34673,Level One Bank,10-Dec-10,20-Aug-12 +First Banking Center,Burlington,WI,5287,First Michigan Bank,19-Nov-10,20-Aug-12 +Allegiance Bank of North America,Bala Cynwyd,PA,35078,VIST Bank,19-Nov-10,20-Aug-12 +Gulf State Community Bank,Carrabelle,FL,20340,Centennial Bank,19-Nov-10,2-Nov-12 +Copper Star Bank,Scottsdale,AZ,35463,"Stearns Bank, N.A.",12-Nov-10,20-Aug-12 +Darby Bank & Trust Co.,Vidalia,GA,14580,Ameris Bank,12-Nov-10,15-Jan-13 +Tifton Banking Company,Tifton,GA,57831,Ameris Bank,12-Nov-10,2-Nov-12 +First Vietnamese American Bank,Westminster,CA,57885,Grandpoint Bank,5-Nov-10,12-Sep-12 +Pierce Commercial Bank,Tacoma,WA,34411,Heritage Bank,5-Nov-10,20-Aug-12 +Western Commercial Bank,Woodland Hills,CA,58087,First California Bank,5-Nov-10,12-Sep-12 +K Bank,Randallstown,MD,31263,Manufacturers and Traders Trust Company (M&T Bank),5-Nov-10,20-Aug-12 +"First Arizona Savings, A FSB",Scottsdale,AZ,32582,No Acquirer,22-Oct-10,20-Aug-12 +Hillcrest Bank,Overland Park,KS,22173,"Hillcrest Bank, N.A.",22-Oct-10,20-Aug-12 +First Suburban National Bank,Maywood,IL,16089,Seaway Bank and Trust Company,22-Oct-10,20-Aug-12 +The First National Bank of Barnesville,Barnesville,GA,2119,United Bank,22-Oct-10,2-Nov-12 +The Gordon Bank,Gordon,GA,33904,Morris Bank,22-Oct-10,2-Nov-12 +Progress Bank of Florida,Tampa,FL,32251,Bay Cities Bank,22-Oct-10,2-Nov-12 +First Bank of Jacksonville,Jacksonville,FL,27573,Ameris Bank,22-Oct-10,2-Nov-12 +Premier Bank,Jefferson City,MO,34016,Providence Bank,15-Oct-10,20-Aug-12 +WestBridge Bank and Trust Company,Chesterfield,MO,58205,Midland States Bank,15-Oct-10,20-Aug-12 +"Security Savings Bank, F.S.B.",Olathe,KS,30898,Simmons First National Bank,15-Oct-10,20-Aug-12 +Shoreline Bank,Shoreline,WA,35250,GBC International Bank,1-Oct-10,20-Aug-12 +Wakulla Bank,Crawfordville,FL,21777,Centennial Bank,1-Oct-10,2-Nov-12 +North County Bank,Arlington,WA,35053,Whidbey Island Bank,24-Sep-10,20-Aug-12 +Haven Trust Bank Florida,Ponte Vedra Beach,FL,58308,First Southern Bank,24-Sep-10,5-Nov-12 +Maritime Savings Bank,West Allis,WI,28612,"North Shore Bank, FSB",17-Sep-10,20-Aug-12 +Bramble Savings Bank,Milford,OH,27808,Foundation Bank,17-Sep-10,20-Aug-12 +The Peoples Bank,Winder,GA,182,Community & Southern Bank,17-Sep-10,5-Nov-12 +First Commerce Community Bank,Douglasville,GA,57448,Community & Southern Bank,17-Sep-10,15-Jan-13 +Bank of Ellijay,Ellijay,GA,58197,Community & Southern Bank,17-Sep-10,15-Jan-13 +ISN Bank,Cherry Hill,NJ,57107,Customers Bank,17-Sep-10,22-Aug-12 +Horizon Bank,Bradenton,FL,35061,Bank of the Ozarks,10-Sep-10,5-Nov-12 +Sonoma Valley Bank,Sonoma,CA,27259,Westamerica Bank,20-Aug-10,12-Sep-12 +Los Padres Bank,Solvang,CA,32165,Pacific Western Bank,20-Aug-10,12-Sep-12 +Butte Community Bank,Chico,CA,33219,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +Pacific State Bank,Stockton,CA,27090,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +ShoreBank,Chicago,IL,15640,Urban Partnership Bank,20-Aug-10,16-May-13 +Imperial Savings and Loan Association,Martinsville,VA,31623,"River Community Bank, N.A.",20-Aug-10,24-Aug-12 +Independent National Bank,Ocala,FL,27344,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Community National Bank at Bartow,Bartow,FL,25266,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Palos Bank and Trust Company,Palos Heights,IL,17599,First Midwest Bank,13-Aug-10,22-Aug-12 +Ravenswood Bank,Chicago,IL,34231,Northbrook Bank & Trust Company,6-Aug-10,22-Aug-12 +LibertyBank,Eugene,OR,31964,Home Federal Bank,30-Jul-10,22-Aug-12 +The Cowlitz Bank,Longview,WA,22643,Heritage Bank,30-Jul-10,22-Aug-12 +Coastal Community Bank,Panama City Beach,FL,9619,Centennial Bank,30-Jul-10,5-Nov-12 +Bayside Savings Bank,Port Saint Joe,FL,57669,Centennial Bank,30-Jul-10,5-Nov-12 +Northwest Bank & Trust,Acworth,GA,57658,State Bank and Trust Company,30-Jul-10,5-Nov-12 +Home Valley Bank,Cave Junction,OR,23181,South Valley Bank & Trust,23-Jul-10,12-Sep-12 +SouthwestUSA Bank,Las Vegas,NV,35434,Plaza Bank,23-Jul-10,22-Aug-12 +Community Security Bank,New Prague,MN,34486,Roundbank,23-Jul-10,12-Sep-12 +Thunder Bank,Sylvan Grove,KS,10506,The Bennington State Bank,23-Jul-10,13-Sep-12 +Williamsburg First National Bank,Kingstree,SC,17837,"First Citizens Bank and Trust Company, Inc.",23-Jul-10,5-Nov-12 +Crescent Bank and Trust Company,Jasper,GA,27559,Renasant Bank,23-Jul-10,5-Nov-12 +Sterling Bank,Lantana,FL,32536,IBERIABANK,23-Jul-10,5-Nov-12 +"Mainstreet Savings Bank, FSB",Hastings,MI,28136,Commercial Bank,16-Jul-10,13-Sep-12 +Olde Cypress Community Bank,Clewiston,FL,28864,"CenterState Bank of Florida, N.A.",16-Jul-10,5-Nov-12 +Turnberry Bank,Aventura,FL,32280,NAFH National Bank,16-Jul-10,5-Nov-12 +Metro Bank of Dade County,Miami,FL,25172,NAFH National Bank,16-Jul-10,5-Nov-12 +First National Bank of the South,Spartanburg,SC,35383,NAFH National Bank,16-Jul-10,5-Nov-12 +Woodlands Bank,Bluffton,SC,32571,Bank of the Ozarks,16-Jul-10,5-Nov-12 +Home National Bank,Blackwell,OK,11636,RCB Bank,9-Jul-10,10-Dec-12 +USA Bank,Port Chester,NY,58072,New Century Bank,9-Jul-10,14-Sep-12 +Ideal Federal Savings Bank,Baltimore,MD,32456,No Acquirer,9-Jul-10,14-Sep-12 +Bay National Bank,Baltimore,MD,35462,"Bay Bank, FSB",9-Jul-10,15-Jan-13 +High Desert State Bank,Albuquerque,NM,35279,First American Bank,25-Jun-10,14-Sep-12 +First National Bank,Savannah,GA,34152,"The Savannah Bank, N.A.",25-Jun-10,5-Nov-12 +Peninsula Bank,Englewood,FL,26563,"Premier American Bank, N.A.",25-Jun-10,5-Nov-12 +Nevada Security Bank,Reno,NV,57110,Umpqua Bank,18-Jun-10,23-Aug-12 +Washington First International Bank,Seattle,WA,32955,East West Bank,11-Jun-10,14-Sep-12 +TierOne Bank,Lincoln,NE,29341,Great Western Bank,4-Jun-10,14-Sep-12 +Arcola Homestead Savings Bank,Arcola,IL,31813,No Acquirer,4-Jun-10,14-Sep-12 +First National Bank,Rosedale,MS,15814,The Jefferson Bank,4-Jun-10,5-Nov-12 +Sun West Bank,Las Vegas,NV,34785,City National Bank,28-May-10,14-Sep-12 +"Granite Community Bank, NA",Granite Bay,CA,57315,Tri Counties Bank,28-May-10,14-Sep-12 +Bank of Florida - Tampa,Tampa,FL,57814,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southwest,Naples,FL,35106,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southeast,Fort Lauderdale,FL,57360,EverBank,28-May-10,5-Nov-12 +Pinehurst Bank,Saint Paul,MN,57735,Coulee Bank,21-May-10,26-Oct-12 +Midwest Bank and Trust Company,Elmwood Park,IL,18117,"FirstMerit Bank, N.A.",14-May-10,23-Aug-12 +Southwest Community Bank,Springfield,MO,34255,Simmons First National Bank,14-May-10,23-Aug-12 +New Liberty Bank,Plymouth,MI,35586,Bank of Ann Arbor,14-May-10,23-Aug-12 +Satilla Community Bank,Saint Marys,GA,35114,Ameris Bank,14-May-10,5-Nov-12 +1st Pacific Bank of California,San Diego,CA,35517,City National Bank,7-May-10,13-Dec-12 +Towne Bank of Arizona,Mesa,AZ,57697,Commerce Bank of Arizona,7-May-10,23-Aug-12 +Access Bank,Champlin,MN,16476,PrinsBank,7-May-10,23-Aug-12 +The Bank of Bonifay,Bonifay,FL,14246,First Federal Bank of Florida,7-May-10,5-Nov-12 +Frontier Bank,Everett,WA,22710,"Union Bank, N.A.",30-Apr-10,15-Jan-13 +BC National Banks,Butler,MO,17792,Community First Bank,30-Apr-10,23-Aug-12 +Champion Bank,Creve Coeur,MO,58362,BankLiberty,30-Apr-10,23-Aug-12 +CF Bancorp,Port Huron,MI,30005,First Michigan Bank,30-Apr-10,15-Jan-13 +Westernbank Puerto Rico,Mayaguez,PR,31027,Banco Popular de Puerto Rico,30-Apr-10,5-Nov-12 +R-G Premier Bank of Puerto Rico,Hato Rey,PR,32185,Scotiabank de Puerto Rico,30-Apr-10,5-Nov-12 +Eurobank,San Juan,PR,27150,Oriental Bank and Trust,30-Apr-10,5-Nov-12 +Wheatland Bank,Naperville,IL,58429,Wheaton Bank & Trust,23-Apr-10,23-Aug-12 +Peotone Bank and Trust Company,Peotone,IL,10888,First Midwest Bank,23-Apr-10,23-Aug-12 +Lincoln Park Savings Bank,Chicago,IL,30600,Northbrook Bank & Trust Company,23-Apr-10,23-Aug-12 +New Century Bank,Chicago,IL,34821,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +Citizens Bank and Trust Company of Chicago,Chicago,IL,34658,Republic Bank of Chicago,23-Apr-10,23-Aug-12 +Broadway Bank,Chicago,IL,22853,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +"Amcore Bank, National Association",Rockford,IL,3735,Harris N.A.,23-Apr-10,23-Aug-12 +City Bank,Lynnwood,WA,21521,Whidbey Island Bank,16-Apr-10,14-Sep-12 +Tamalpais Bank,San Rafael,CA,33493,"Union Bank, N.A.",16-Apr-10,23-Aug-12 +Innovative Bank,Oakland,CA,23876,Center Bank,16-Apr-10,23-Aug-12 +Butler Bank,Lowell,MA,26619,People's United Bank,16-Apr-10,23-Aug-12 +Riverside National Bank of Florida,Fort Pierce,FL,24067,"TD Bank, N.A.",16-Apr-10,5-Nov-12 +AmericanFirst Bank,Clermont,FL,57724,"TD Bank, N.A.",16-Apr-10,31-Oct-12 +First Federal Bank of North Florida,Palatka,FL,28886,"TD Bank, N.A.",16-Apr-10,15-Jan-13 +Lakeside Community Bank,Sterling Heights,MI,34878,No Acquirer,16-Apr-10,23-Aug-12 +Beach First National Bank,Myrtle Beach,SC,34242,Bank of North Carolina,9-Apr-10,5-Nov-12 +Desert Hills Bank,Phoenix,AZ,57060,New York Community Bank,26-Mar-10,23-Aug-12 +Unity National Bank,Cartersville,GA,34678,Bank of the Ozarks,26-Mar-10,14-Sep-12 +Key West Bank,Key West,FL,34684,Centennial Bank,26-Mar-10,23-Aug-12 +McIntosh Commercial Bank,Carrollton,GA,57399,CharterBank,26-Mar-10,23-Aug-12 +State Bank of Aurora,Aurora,MN,8221,Northern State Bank,19-Mar-10,23-Aug-12 +First Lowndes Bank,Fort Deposit,AL,24957,First Citizens Bank,19-Mar-10,23-Aug-12 +Bank of Hiawassee,Hiawassee,GA,10054,Citizens South Bank,19-Mar-10,23-Aug-12 +Appalachian Community Bank,Ellijay,GA,33989,Community & Southern Bank,19-Mar-10,31-Oct-12 +Advanta Bank Corp.,Draper,UT,33535,No Acquirer,19-Mar-10,14-Sep-12 +Century Security Bank,Duluth,GA,58104,Bank of Upson,19-Mar-10,23-Aug-12 +American National Bank,Parma,OH,18806,The National Bank and Trust Company,19-Mar-10,23-Aug-12 +Statewide Bank,Covington,LA,29561,Home Bank,12-Mar-10,23-Aug-12 +Old Southern Bank,Orlando,FL,58182,Centennial Bank,12-Mar-10,23-Aug-12 +The Park Avenue Bank,New York,NY,27096,Valley National Bank,12-Mar-10,23-Aug-12 +LibertyPointe Bank,New York,NY,58071,Valley National Bank,11-Mar-10,23-Aug-12 +Centennial Bank,Ogden,UT,34430,No Acquirer,5-Mar-10,14-Sep-12 +Waterfield Bank,Germantown,MD,34976,No Acquirer,5-Mar-10,23-Aug-12 +Bank of Illinois,Normal,IL,9268,Heartland Bank and Trust Company,5-Mar-10,23-Aug-12 +Sun American Bank,Boca Raton,FL,27126,First-Citizens Bank & Trust Company,5-Mar-10,23-Aug-12 +Rainier Pacific Bank,Tacoma,WA,38129,Umpqua Bank,26-Feb-10,23-Aug-12 +Carson River Community Bank,Carson City,NV,58352,Heritage Bank of Nevada,26-Feb-10,15-Jan-13 +"La Jolla Bank, FSB",La Jolla,CA,32423,"OneWest Bank, FSB",19-Feb-10,24-Aug-12 +George Washington Savings Bank,Orland Park,IL,29952,"FirstMerit Bank, N.A.",19-Feb-10,24-Aug-12 +The La Coste National Bank,La Coste,TX,3287,Community National Bank,19-Feb-10,14-Sep-12 +Marco Community Bank,Marco Island,FL,57586,Mutual of Omaha Bank,19-Feb-10,24-Aug-12 +1st American State Bank of Minnesota,Hancock,MN,15448,"Community Development Bank, FSB",5-Feb-10,24-Aug-12 +American Marine Bank,Bainbridge Island,WA,16730,Columbia State Bank,29-Jan-10,24-Aug-12 +First Regional Bank,Los Angeles,CA,23011,First-Citizens Bank & Trust Company,29-Jan-10,24-Aug-12 +Community Bank and Trust,Cornelia,GA,5702,SCBT National Association,29-Jan-10,15-Jan-13 +"Marshall Bank, N.A.",Hallock,MN,16133,United Valley Bank,29-Jan-10,23-Aug-12 +Florida Community Bank,Immokalee,FL,5672,"Premier American Bank, N.A.",29-Jan-10,15-Jan-13 +First National Bank of Georgia,Carrollton,GA,16480,Community & Southern Bank,29-Jan-10,13-Dec-12 +Columbia River Bank,The Dalles,OR,22469,Columbia State Bank,22-Jan-10,14-Sep-12 +Evergreen Bank,Seattle,WA,20501,Umpqua Bank,22-Jan-10,15-Jan-13 +Charter Bank,Santa Fe,NM,32498,Charter Bank,22-Jan-10,23-Aug-12 +Bank of Leeton,Leeton,MO,8265,"Sunflower Bank, N.A.",22-Jan-10,15-Jan-13 +Premier American Bank,Miami,FL,57147,"Premier American Bank, N.A.",22-Jan-10,13-Dec-12 +Barnes Banking Company,Kaysville,UT,1252,No Acquirer,15-Jan-10,23-Aug-12 +St. Stephen State Bank,St. Stephen,MN,17522,First State Bank of St. Joseph,15-Jan-10,23-Aug-12 +Town Community Bank & Trust,Antioch,IL,34705,First American Bank,15-Jan-10,23-Aug-12 +Horizon Bank,Bellingham,WA,22977,Washington Federal Savings and Loan Association,8-Jan-10,23-Aug-12 +"First Federal Bank of California, F.S.B.",Santa Monica,CA,28536,"OneWest Bank, FSB",18-Dec-09,23-Aug-12 +Imperial Capital Bank,La Jolla,CA,26348,City National Bank,18-Dec-09,5-Sep-12 +Independent Bankers' Bank,Springfield,IL,26820,The Independent BankersBank (TIB),18-Dec-09,23-Aug-12 +New South Federal Savings Bank,Irondale,AL,32276,Beal Bank,18-Dec-09,23-Aug-12 +Citizens State Bank,New Baltimore,MI,1006,No Acquirer,18-Dec-09,5-Nov-12 +Peoples First Community Bank,Panama City,FL,32167,Hancock Bank,18-Dec-09,5-Nov-12 +RockBridge Commercial Bank,Atlanta,GA,58315,No Acquirer,18-Dec-09,5-Nov-12 +SolutionsBank,Overland Park,KS,4731,Arvest Bank,11-Dec-09,23-Aug-12 +"Valley Capital Bank, N.A.",Mesa,AZ,58399,Enterprise Bank & Trust,11-Dec-09,23-Aug-12 +"Republic Federal Bank, N.A.",Miami,FL,22846,1st United Bank,11-Dec-09,5-Nov-12 +Greater Atlantic Bank,Reston,VA,32583,Sonabank,4-Dec-09,5-Nov-12 +Benchmark Bank,Aurora,IL,10440,"MB Financial Bank, N.A.",4-Dec-09,23-Aug-12 +AmTrust Bank,Cleveland,OH,29776,New York Community Bank,4-Dec-09,5-Nov-12 +The Tattnall Bank,Reidsville,GA,12080,Heritage Bank of the South,4-Dec-09,5-Nov-12 +First Security National Bank,Norcross,GA,26290,State Bank and Trust Company,4-Dec-09,5-Nov-12 +The Buckhead Community Bank,Atlanta,GA,34663,State Bank and Trust Company,4-Dec-09,5-Nov-12 +Commerce Bank of Southwest Florida,Fort Myers,FL,58016,Central Bank,20-Nov-09,5-Nov-12 +Pacific Coast National Bank,San Clemente,CA,57914,Sunwest Bank,13-Nov-09,22-Aug-12 +Orion Bank,Naples,FL,22427,IBERIABANK,13-Nov-09,5-Nov-12 +"Century Bank, F.S.B.",Sarasota,FL,32267,IBERIABANK,13-Nov-09,22-Aug-12 +United Commercial Bank,San Francisco,CA,32469,East West Bank,6-Nov-09,5-Nov-12 +Gateway Bank of St. Louis,St. Louis,MO,19450,Central Bank of Kansas City,6-Nov-09,22-Aug-12 +Prosperan Bank,Oakdale,MN,35074,"Alerus Financial, N.A.",6-Nov-09,22-Aug-12 +Home Federal Savings Bank,Detroit,MI,30329,Liberty Bank and Trust Company,6-Nov-09,22-Aug-12 +United Security Bank,Sparta,GA,22286,Ameris Bank,6-Nov-09,15-Jan-13 +North Houston Bank,Houston,TX,18776,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Madisonville State Bank,Madisonville,TX,33782,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Citizens National Bank,Teague,TX,25222,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Park National Bank,Chicago,IL,11677,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Pacific National Bank,San Francisco,CA,30006,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +California National Bank,Los Angeles,CA,34659,U.S. Bank N.A.,30-Oct-09,5-Sep-12 +San Diego National Bank,San Diego,CA,23594,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Community Bank of Lemont,Lemont,IL,35291,U.S. Bank N.A.,30-Oct-09,15-Jan-13 +"Bank USA, N.A.",Phoenix,AZ,32218,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +First DuPage Bank,Westmont,IL,35038,First Midwest Bank,23-Oct-09,22-Aug-12 +Riverview Community Bank,Otsego,MN,57525,Central Bank,23-Oct-09,22-Aug-12 +Bank of Elmwood,Racine,WI,18321,Tri City National Bank,23-Oct-09,22-Aug-12 +Flagship National Bank,Bradenton,FL,35044,First Federal Bank of Florida,23-Oct-09,22-Aug-12 +Hillcrest Bank Florida,Naples,FL,58336,Stonegate Bank,23-Oct-09,22-Aug-12 +American United Bank,Lawrenceville,GA,57794,Ameris Bank,23-Oct-09,5-Sep-12 +Partners Bank,Naples,FL,57959,Stonegate Bank,23-Oct-09,15-Jan-13 +San Joaquin Bank,Bakersfield,CA,23266,Citizens Business Bank,16-Oct-09,22-Aug-12 +Southern Colorado National Bank,Pueblo,CO,57263,Legacy Bank,2-Oct-09,5-Sep-12 +Jennings State Bank,Spring Grove,MN,11416,Central Bank,2-Oct-09,21-Aug-12 +Warren Bank,Warren,MI,34824,The Huntington National Bank,2-Oct-09,21-Aug-12 +Georgian Bank,Atlanta,GA,57151,"First Citizens Bank and Trust Company, Inc.",25-Sep-09,21-Aug-12 +"Irwin Union Bank, F.S.B.",Louisville,KY,57068,"First Financial Bank, N.A.",18-Sep-09,5-Sep-12 +Irwin Union Bank and Trust Company,Columbus,IN,10100,"First Financial Bank, N.A.",18-Sep-09,21-Aug-12 +Venture Bank,Lacey,WA,22868,First-Citizens Bank & Trust Company,11-Sep-09,21-Aug-12 +Brickwell Community Bank,Woodbury,MN,57736,CorTrust Bank N.A.,11-Sep-09,15-Jan-13 +"Corus Bank, N.A.",Chicago,IL,13693,"MB Financial Bank, N.A.",11-Sep-09,21-Aug-12 +First State Bank,Flagstaff,AZ,34875,Sunwest Bank,4-Sep-09,15-Jan-13 +Platinum Community Bank,Rolling Meadows,IL,35030,No Acquirer,4-Sep-09,21-Aug-12 +Vantus Bank,Sioux City,IN,27732,Great Southern Bank,4-Sep-09,21-Aug-12 +InBank,Oak Forest,IL,20203,"MB Financial Bank, N.A.",4-Sep-09,21-Aug-12 +First Bank of Kansas City,Kansas City,MO,25231,Great American Bank,4-Sep-09,21-Aug-12 +Affinity Bank,Ventura,CA,27197,Pacific Western Bank,28-Aug-09,21-Aug-12 +Mainstreet Bank,Forest Lake,MN,1909,Central Bank,28-Aug-09,21-Aug-12 +Bradford Bank,Baltimore,MD,28312,Manufacturers and Traders Trust Company (M&T Bank),28-Aug-09,15-Jan-13 +Guaranty Bank,Austin,TX,32618,BBVA Compass,21-Aug-09,21-Aug-12 +CapitalSouth Bank,Birmingham,AL,22130,IBERIABANK,21-Aug-09,15-Jan-13 +First Coweta Bank,Newnan,GA,57702,United Bank,21-Aug-09,15-Jan-13 +ebank,Atlanta,GA,34682,"Stearns Bank, N.A.",21-Aug-09,21-Aug-12 +Community Bank of Nevada,Las Vegas,NV,34043,No Acquirer,14-Aug-09,21-Aug-12 +Community Bank of Arizona,Phoenix,AZ,57645,MidFirst Bank,14-Aug-09,21-Aug-12 +"Union Bank, National Association",Gilbert,AZ,34485,MidFirst Bank,14-Aug-09,21-Aug-12 +Colonial Bank,Montgomery,AL,9609,"Branch Banking & Trust Company, (BB&T)",14-Aug-09,5-Sep-12 +Dwelling House Savings and Loan Association,Pittsburgh,PA,31559,"PNC Bank, N.A.",14-Aug-09,15-Jan-13 +Community First Bank,Prineville,OR,23268,Home Federal Bank,7-Aug-09,15-Jan-13 +Community National Bank of Sarasota County,Venice,FL,27183,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +First State Bank,Sarasota,FL,27364,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +Mutual Bank,Harvey,IL,18659,United Central Bank,31-Jul-09,20-Aug-12 +First BankAmericano,Elizabeth,NJ,34270,Crown Bank,31-Jul-09,20-Aug-12 +Peoples Community Bank,West Chester,OH,32288,"First Financial Bank, N.A.",31-Jul-09,20-Aug-12 +Integrity Bank,Jupiter,FL,57604,Stonegate Bank,31-Jul-09,20-Aug-12 +First State Bank of Altus,Altus,OK,9873,Herring Bank,31-Jul-09,20-Aug-12 +Security Bank of Jones County,Gray,GA,8486,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Houston County,Perry,GA,27048,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Bibb County,Macon,GA,27367,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Metro,Woodstock,GA,57105,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Fulton,Alpharetta,GA,57430,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Gwinnett County,Suwanee,GA,57346,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Waterford Village Bank,Williamsville,NY,58065,"Evans Bank, N.A.",24-Jul-09,20-Aug-12 +Temecula Valley Bank,Temecula,CA,34341,First-Citizens Bank & Trust Company,17-Jul-09,20-Aug-12 +Vineyard Bank,Rancho Cucamonga,CA,23556,California Bank & Trust,17-Jul-09,20-Aug-12 +BankFirst,Sioux Falls,SD,34103,"Alerus Financial, N.A.",17-Jul-09,20-Aug-12 +First Piedmont Bank,Winder,GA,34594,First American Bank and Trust Company,17-Jul-09,15-Jan-13 +Bank of Wyoming,Thermopolis,WY,22754,Central Bank & Trust,10-Jul-09,20-Aug-12 +Founders Bank,Worth,IL,18390,The PrivateBank and Trust Company,2-Jul-09,20-Aug-12 +Millennium State Bank of Texas,Dallas,TX,57667,State Bank of Texas,2-Jul-09,26-Oct-12 +First National Bank of Danville,Danville,IL,3644,"First Financial Bank, N.A.",2-Jul-09,20-Aug-12 +Elizabeth State Bank,Elizabeth,IL,9262,Galena State Bank and Trust Company,2-Jul-09,20-Aug-12 +Rock River Bank,Oregon,IL,15302,The Harvard State Bank,2-Jul-09,20-Aug-12 +First State Bank of Winchester,Winchester,IL,11710,The First National Bank of Beardstown,2-Jul-09,20-Aug-12 +John Warner Bank,Clinton,IL,12093,State Bank of Lincoln,2-Jul-09,20-Aug-12 +Mirae Bank,Los Angeles,CA,57332,Wilshire State Bank,26-Jun-09,20-Aug-12 +MetroPacific Bank,Irvine,CA,57893,Sunwest Bank,26-Jun-09,20-Aug-12 +Horizon Bank,Pine City,MN,9744,"Stearns Bank, N.A.",26-Jun-09,20-Aug-12 +Neighborhood Community Bank,Newnan,GA,35285,CharterBank,26-Jun-09,20-Aug-12 +Community Bank of West Georgia,Villa Rica,GA,57436,No Acquirer,26-Jun-09,17-Aug-12 +First National Bank of Anthony,Anthony,KS,4614,Bank of Kansas,19-Jun-09,17-Aug-12 +Cooperative Bank,Wilmington,NC,27837,First Bank,19-Jun-09,17-Aug-12 +Southern Community Bank,Fayetteville,GA,35251,United Community Bank,19-Jun-09,17-Aug-12 +Bank of Lincolnwood,Lincolnwood,IL,17309,Republic Bank of Chicago,5-Jun-09,17-Aug-12 +Citizens National Bank,Macomb,IL,5757,Morton Community Bank,22-May-09,4-Sep-12 +Strategic Capital Bank,Champaign,IL,35175,Midland States Bank,22-May-09,4-Sep-12 +"BankUnited, FSB",Coral Gables,FL,32247,BankUnited,21-May-09,17-Aug-12 +Westsound Bank,Bremerton,WA,34843,Kitsap Bank,8-May-09,4-Sep-12 +America West Bank,Layton,UT,35461,Cache Valley Bank,1-May-09,17-Aug-12 +Citizens Community Bank,Ridgewood,NJ,57563,North Jersey Community Bank,1-May-09,4-Sep-12 +"Silverton Bank, NA",Atlanta,GA,26535,No Acquirer,1-May-09,17-Aug-12 +First Bank of Idaho,Ketchum,ID,34396,"U.S. Bank, N.A.",24-Apr-09,17-Aug-12 +First Bank of Beverly Hills,Calabasas,CA,32069,No Acquirer,24-Apr-09,4-Sep-12 +Michigan Heritage Bank,Farmington Hills,MI,34369,Level One Bank,24-Apr-09,17-Aug-12 +American Southern Bank,Kennesaw,GA,57943,Bank of North Georgia,24-Apr-09,17-Aug-12 +Great Basin Bank of Nevada,Elko,NV,33824,Nevada State Bank,17-Apr-09,4-Sep-12 +American Sterling Bank,Sugar Creek,MO,8266,Metcalf Bank,17-Apr-09,31-Aug-12 +New Frontier Bank,Greeley,CO,34881,No Acquirer,10-Apr-09,4-Sep-12 +Cape Fear Bank,Wilmington,NC,34639,First Federal Savings and Loan Association,10-Apr-09,17-Aug-12 +Omni National Bank,Atlanta,GA,22238,No Acquirer,27-Mar-09,17-Aug-12 +"TeamBank, NA",Paola,KS,4754,Great Southern Bank,20-Mar-09,17-Aug-12 +Colorado National Bank,Colorado Springs,CO,18896,Herring Bank,20-Mar-09,17-Aug-12 +FirstCity Bank,Stockbridge,GA,18243,No Acquirer,20-Mar-09,17-Aug-12 +Freedom Bank of Georgia,Commerce,GA,57558,Northeast Georgia Bank,6-Mar-09,17-Aug-12 +Security Savings Bank,Henderson,NV,34820,Bank of Nevada,27-Feb-09,7-Sep-12 +Heritage Community Bank,Glenwood,IL,20078,"MB Financial Bank, N.A.",27-Feb-09,17-Aug-12 +Silver Falls Bank,Silverton,OR,35399,Citizens Bank,20-Feb-09,17-Aug-12 +Pinnacle Bank of Oregon,Beaverton,OR,57342,Washington Trust Bank of Spokane,13-Feb-09,17-Aug-12 +Corn Belt Bank & Trust Co.,Pittsfield,IL,16500,The Carlinville National Bank,13-Feb-09,17-Aug-12 +Riverside Bank of the Gulf Coast,Cape Coral,FL,34563,TIB Bank,13-Feb-09,17-Aug-12 +Sherman County Bank,Loup City,NE,5431,Heritage Bank,13-Feb-09,17-Aug-12 +County Bank,Merced,CA,22574,Westamerica Bank,6-Feb-09,4-Sep-12 +Alliance Bank,Culver City,CA,23124,California Bank & Trust,6-Feb-09,16-Aug-12 +FirstBank Financial Services,McDonough,GA,57017,Regions Bank,6-Feb-09,16-Aug-12 +Ocala National Bank,Ocala,FL,26538,"CenterState Bank of Florida, N.A.",30-Jan-09,4-Sep-12 +Suburban FSB,Crofton,MD,30763,Bank of Essex,30-Jan-09,16-Aug-12 +MagnetBank,Salt Lake City,UT,58001,No Acquirer,30-Jan-09,16-Aug-12 +1st Centennial Bank,Redlands,CA,33025,First California Bank,23-Jan-09,16-Aug-12 +Bank of Clark County,Vancouver,WA,34959,Umpqua Bank,16-Jan-09,16-Aug-12 +National Bank of Commerce,Berkeley,IL,19733,Republic Bank of Chicago,16-Jan-09,16-Aug-12 +Sanderson State Bank,Sanderson,TX,11568,The Pecos County State Bank,12-Dec-08,4-Sep-12 +Haven Trust Bank,Duluth,GA,35379,"Branch Banking & Trust Company, (BB&T)",12-Dec-08,16-Aug-12 +First Georgia Community Bank,Jackson,GA,34301,United Bank,5-Dec-08,16-Aug-12 +PFF Bank & Trust,Pomona,CA,28344,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Downey Savings & Loan,Newport Beach,CA,30968,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Community Bank,Loganville,GA,16490,Bank of Essex,21-Nov-08,4-Sep-12 +Security Pacific Bank,Los Angeles,CA,23595,Pacific Western Bank,7-Nov-08,28-Aug-12 +"Franklin Bank, SSB",Houston,TX,26870,Prosperity Bank,7-Nov-08,16-Aug-12 +Freedom Bank,Bradenton,FL,57930,Fifth Third Bank,31-Oct-08,16-Aug-12 +Alpha Bank & Trust,Alpharetta,GA,58241,"Stearns Bank, N.A.",24-Oct-08,16-Aug-12 +Meridian Bank,Eldred,IL,13789,National Bank,10-Oct-08,31-May-12 +Main Street Bank,Northville,MI,57654,Monroe Bank & Trust,10-Oct-08,16-Aug-12 +Washington Mutual Bank,Henderson,NV,32633,JP Morgan Chase Bank,25-Sep-08,16-Aug-12 +Ameribank,Northfork,WV,6782,The Citizens Savings Bank,19-Sep-08,16-Aug-12 +Silver State Bank,Henderson,NV,34194,Nevada State Bank,5-Sep-08,16-Aug-12 +Integrity Bank,Alpharetta,GA,35469,Regions Bank,29-Aug-08,16-Aug-12 +Columbian Bank & Trust,Topeka,KS,22728,Citizens Bank & Trust,22-Aug-08,16-Aug-12 +First Priority Bank,Bradenton,FL,57523,SunTrust Bank,1-Aug-08,16-Aug-12 +"First Heritage Bank, NA",Newport Beach,CA,57961,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +First National Bank of Nevada,Reno,NV,27011,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +IndyMac Bank,Pasadena,CA,29730,"OneWest Bank, FSB",11-Jul-08,28-Aug-12 +"First Integrity Bank, NA",Staples,MN,12736,First International Bank and Trust,30-May-08,28-Aug-12 +"ANB Financial, NA",Bentonville,AR,33901,Pulaski Bank and Trust Company,9-May-08,28-Aug-12 +Hume Bank,Hume,MO,1971,Security Bank,7-Mar-08,28-Aug-12 +Douglass National Bank,Kansas City,MO,24660,Liberty Bank and Trust Company,25-Jan-08,26-Oct-12 +Miami Valley Bank,Lakeview,OH,16848,The Citizens Banking Company,4-Oct-07,28-Aug-12 +NetBank,Alpharetta,GA,32575,ING DIRECT,28-Sep-07,28-Aug-12 +Metropolitan Savings Bank,Pittsburgh,PA,35353,Allegheny Valley Bank of Pittsburgh,2-Feb-07,27-Oct-10 +Bank of Ephraim,Ephraim,UT,1249,Far West Bank,25-Jun-04,9-Apr-08 +Reliance Bank,White Plains,NY,26778,Union State Bank,19-Mar-04,9-Apr-08 +Guaranty National Bank of Tallahassee,Tallahassee,FL,26838,Hancock Bank of Florida,12-Mar-04,5-Jun-12 +Dollar Savings Bank,Newark,NJ,31330,No Acquirer,14-Feb-04,9-Apr-08 +Pulaski Savings Bank,Philadelphia,PA,27203,Earthstar Bank,14-Nov-03,22-Jul-05 +First National Bank of Blanchardville,Blanchardville,WI,11639,The Park Bank,9-May-03,5-Jun-12 +Southern Pacific Bank,Torrance,CA,27094,Beal Bank,7-Feb-03,20-Oct-08 +Farmers Bank of Cheneyville,Cheneyville,LA,16445,Sabine State Bank & Trust,17-Dec-02,20-Oct-04 +Bank of Alamo,Alamo,TN,9961,No Acquirer,8-Nov-02,18-Mar-05 +AmTrade International Bank,Atlanta,GA,33784,No Acquirer,30-Sep-02,11-Sep-06 +Universal Federal Savings Bank,Chicago,IL,29355,Chicago Community Bank,27-Jun-02,9-Apr-08 +Connecticut Bank of Commerce,Stamford,CT,19183,Hudson United Bank,26-Jun-02,14-Feb-12 +New Century Bank,Shelby Township,MI,34979,No Acquirer,28-Mar-02,18-Mar-05 +Net 1st National Bank,Boca Raton,FL,26652,Bank Leumi USA,1-Mar-02,9-Apr-08 +"NextBank, NA",Phoenix,AZ,22314,No Acquirer,7-Feb-02,27-Aug-10 +Oakwood Deposit Bank Co.,Oakwood,OH,8966,The State Bank & Trust Company,1-Feb-02,25-Oct-12 +Bank of Sierra Blanca,Sierra Blanca,TX,22002,The Security State Bank of Pecos,18-Jan-02,6-Nov-03 +"Hamilton Bank, NA",Miami,FL,24382,Israel Discount Bank of New York,11-Jan-02,5-Jun-12 +Sinclair National Bank,Gravette,AR,34248,Delta Trust & Bank,7-Sep-01,10-Feb-04 +"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB",27-Jul-01,5-Jun-12 +Malta National Bank,Malta,OH,6629,North Valley Bank,3-May-01,18-Nov-02 +First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03 +National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05 +Bank of Honolulu,Honolulu,HI,21029,Bank of the Orient,13-Oct-00,17-Mar-05 diff --git a/pandas/tests/io/data/csv/iris.csv b/pandas/tests/io/data/csv/iris.csv new file mode 100644 index 00000000..c19b9c36 --- /dev/null +++ b/pandas/tests/io/data/csv/iris.csv @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/pandas/tests/io/data/csv/test1.csv b/pandas/tests/io/data/csv/test1.csv new file mode 100644 index 00000000..4bdb6294 --- /dev/null +++ b/pandas/tests/io/data/csv/test1.csv @@ -0,0 +1,8 @@ +index,A,B,C,D +2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169 +2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967 +2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952 +2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227 +2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917 +2000-01-10 00:00:00,0.836648671666,0.246461918642,0.588542635376,1.0627820613 +2000-01-11 00:00:00,-0.157160753327,1.34030689438,1.19577795622,-1.09700699751 \ No newline at end of file diff --git a/pandas/tests/io/data/csv/test1.csv.bz2 b/pandas/tests/io/data/csv/test1.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f96f26a8e741907243c32845024b7277f0680005 GIT binary patch literal 307 zcmV-30nGkFT4*^jL0KkKSW1Ji@9)pArt5lpH4IRO00Iz@ zAlz^!=a51W3H02z!+mSBeK%(2i;3qroLGpgc)ZhAta;6*(Kx-+d8k8stbmZHPIKD0 z%{khavXfep_f#A?g|OEiy6GMb+kPrC)~cTlb6Q1R&R1#Y8j1mNvJ1wwH#o39Ih8E! zh)Zn6%{yHy5rH`%)11mfB0$%H6FSJnOFqhY6AzMzlFupWZ6*MkRe{ySGvcT?8Y%YR zy=rhGo@XLhosGo#8?jc!M4$nL1(98A8>(wY4d^qqg(%E!y5l#$-h21{F64@Ep&_5g FVD{@glUM)% literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/csv/test1.csv.gz b/pandas/tests/io/data/csv/test1.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1336db6e2af7e99bbcfc1105a34bfc575dc36e39 GIT binary patch literal 294 zcmV+>0onc^iwFSY)EiX*167eZQpG?FL~~BT0miP@){@9rjsgJ*F~>*q6PvOFYh3mE zsptFW^XqrLuDA8RKAsf70XbmLz{}%ZIJ=;%5X;PE=X|E2vcYxWG`b24pMTd1xj8vqwIDPuw&2SO=Plp!ZwMZ?<$$Gm`X7xbwUQ8jm3vxQ8$JTYzrw3Qs-g=ik%a+b{ s*hQ0nb}~LhhMDH09vE2TyUUuE=GJ?BbgQBhV0VW60b<1z@;U+l0HUsoVE_OC literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/csv/test_mmap.csv b/pandas/tests/io/data/csv/test_mmap.csv new file mode 100644 index 00000000..cc2cd7c3 --- /dev/null +++ b/pandas/tests/io/data/csv/test_mmap.csv @@ -0,0 +1,5 @@ +a,b,c +1,one,I +2,two,II + +3,three,III diff --git a/pandas/tests/io/data/csv/tips.csv b/pandas/tests/io/data/csv/tips.csv new file mode 100644 index 00000000..856a65a6 --- /dev/null +++ b/pandas/tests/io/data/csv/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/pandas/tests/io/data/csv/tips.csv.bz2 b/pandas/tests/io/data/csv/tips.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6 GIT binary patch literal 1316 zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs zYfM$)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02| zUew#%bX<$c*~vCvMH>_%oV^S&6a+#ukskADG3ECrBRBE^v4aChy? zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@dCkFG(R6{D)ETHdAf-8>fnW#-GXdM4pE5VK!{hIp z4{*7H7hK39V*E6-z)7yKmA;#^4 z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6 z1SFlk-!Cn`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq# z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FVmX?rYPxs1DA(^whpU+gQLdb{bOK!0;_ zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6 zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x| zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{ ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY! zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6 zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{ zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C3XK=k7;*$ zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL? zJT_x>_m~9QgT~p5h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.ods b/pandas/tests/io/data/excel/blank.ods new file mode 100644 index 0000000000000000000000000000000000000000..7ded3c3c1d688242b6af3b2bdd3afe617a23c33c GIT binary patch literal 2813 zcmZ{m2Q*yU8plUmJzCV!LR_6ON*G26!YCo5n-B~pj3^T$dW#x;)Cf09Mv2iv^pa$V z62Vn+wP8j=gdlioZW7+eU2o;RynFXKd!4iQS^saZb=LR$zCYZQ_7`TrUqcKK@2!yx zqApG+BX#VdZg+f8z5yUC#vg@r z$9ker0dQ(eZUFuF2x5T6=%h6iEdZcQJ?8WzY(R)N3JVJM@qTIh%9jM@=;mA1VLTg} zBZ1NtBKor_gl<%Q0~9=RSL2q!S$iL@L+6`*H7|9*-g}D>@gR4Jb%$vOX@!)VsL04# zd;90Wb=^mlButVnPL{tArc;qYT{9S#@gSB%=LEI(=Jw!&@|3Zsh(aH?4<4OrQ<9yFA%V5q zXrIP3y1Wl-X^w^AQ(8XmChpU2z0n&Q!w*z;EKBi95f4z6lW1%Q$>@)peUp<`D_zoO z36gG(I?|sWd1YCcZ^G}`dMC^GCznFu%`H6wf?w0me`6+GuViNYR&r^!rVqzD=AV)! zn)zVZZ(LipEwhA7&s8v$gXyMxCcsn%&*yA%JC@exKaF*L!d34B)YI%&%$6c;HDbt& zoSCr;G(EaSV-rzFbk~Ir#1m!tbd8Uulroi(o~Aa}pXLEsJ5x5sQ!p7osQ&X80)H--H80n&d7AsD!4i8-Y_Q~8_Fv*d zV`?+c;V6N03c~1`m187gh48Wd2|zkKt3H?8>$48(Pfn1R;}q8$waK2c=EHZ)g!M(; zl;Mml7Q>V#oZ~d#lDm{m!aBANc8wfNq62L@o^cupU@xeH$F^5wbA-d(Ts^molS(%c zBUU88j+6}huMNEFn~GG_PEKe6{|h6_u2UU|3ZpR#06@I~ZhpQ2r}R>3+^{vvn4{8Y z_c7{ndm+aG-t?D^Qk#tKrf)R;MbXXa!n165qbLo+PnqN`%bbGPJ6*NT`;ogIpuU9? z-fuAWGuNsil$_mF;8(Gw2ZbA3#l_1Kg$Ni=rKQ}AyT_B+^@H{M&2_TGlI4Qti_)-s zTeg!?aB*Wf9-2h1wJ5K!ov|Dj7#lWOh&IuE|DyeDGhS?^NPsL3d`5KF z0n5Y?{t?#A3fqHcM!Vpy39Hy^wpk|P>v!}YUT#c8n_E6^H4DVl$?SHmcZ3{*r*9VX zRsSK&UXRd`9tB03-E!Z4C(pnXNl;Tif&Ks~dd@3K$OK@^`y?SOP6KJ?-DRhY1vSbN~ zjC$3&-c=uDoT+>mIH!4IX@;MvN>QS>(n5nio}%OY{859sy+Y6Bp}h(Ck~+AJ^SNi? z27`4|E-2&(kxGtIP<{#Iih|ovstG6uxiXO90X!yCmEMj7p$W8I^Y@5;mMLsKCn`k2 z^z=LmlWKz=rR=uZgY5dSUqAPIvCE$qHSP077KDM*KGlIAp%}pxhyAqozR_eD?7VTD z@lxPefLK@=Cuom0;TF)o%APQt6}54zrHN=U)&+f?*Bvr-9YDCSA>1D+XlxP3>FQmt z((^V#ta$};27QmT;9po4D)-8%8and(eVYc>^js2cG^x%FZ76X|A{im)O8lH0m{)`| zVi~%SF0S|Zag&itc++K8C`W=Qksxv~+4>uQCMqp9LMj#g!N^9D+oYxHI^|Mu?A@$#P6pC_-^dZjAW1ux#6 z3}AuTfq+ZGSBodq?Hw1ln}fcv{~cC|L+m>8!zFJee4*6Zlk4vS=eT<^a$$rAqIhCT=!3uWgRZ3bZgPg&Om2MTA5_ z?$?+@u!QCP(Js$;Zm+9*Yx@IVNZSM+7I>T^Q@2y*J~BLVj;};jEQQ4Uhw;nSsdbd3 z-CZtdOw7iwZF4wRR#V+`hgiyug(YEVd8)p1pZf2$^bMf*Z>ONJm~D%Csmqw|{>kA~?FI+yh&)?mjf%_T2ljl#an z*Yf{;h50frigTg?01~Oc;=6@F1t3q$OKR4Lc`ukFZp)}ke_17IDX$_ew@i-_#CW?T z`sGMtYi+z6tYxyv?^;RJ{}G-=du-3yHonu_}})ZAjzk8~3JDD8jp_mxuW` zkLzHR>hkI@Y_%z*!eQ^$ELxs~X$jBsd5eg#0yHeqHr=bb3TduMKrp3;nw!yu>aCU+ z9N-g8JocSCCY>5%K6q|L7%0_wRbOxmvd5fd?obex;U&)_7f5kyH@Ej#D+e zO)($_e=xYBVtsWwXfmaSf@Bbo-|gqE&vIBAo_fB|iOjlk8w1i7){y=>$X(B~>+`W# z|41w;8#1z;JL>{!-pi`OIjq9Ewn_>4JV8Vjp)!%GTQ-uAhxZh#thX`e5QY8iko?1} zz)U!py-!{;1#GU@IFHkiP`ZKJ8b7Ijf-Sz+Bz|)(;n%%LW4`LV{0^W9mr!GY)1=hu ziP;Xdi2qgPY0fBow^!8c1N^^{JKg;;{$uOlrl+cZwrW2V*3{P@#_ea_&t2t*?k%;~ z{8x|pS@<(IKZFp5f1v2+5I^VJj}ZFQ^rPC>@7V`8rK3OnLPxzsso6Qebh`RCg-*K5 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xls b/pandas/tests/io/data/excel/blank.xls new file mode 100644 index 0000000000000000000000000000000000000000..952c76f045e8af5f100b8c2690ecf8d604770a64 GIT binary patch literal 23040 zcmeHPeQ;FO6+dsYn`{Ul2_NB8cmx7T2oZJxQ9hOrMI4ro5gc^H!6n&*3?X5bjSVw0 z#Mb^(snvwyRMJ|jPPJOU6+5*BoR0R7wRKt;D`hNpsAI=YJJNPq9d-LV_wC!g@4k=h zR_&Dbz07_4?s@nA&bjBFd)~e0-Iq6hS#$ouXQsSE%CVBl=+kT^l}mID*YnJLHQ|0X z%UScc3D+R`$0ekJ$}l>zJVYIl*DFr-YgCR7e}@9}bnq0;M88McLHlS7(bnC`eftur zwq|`p^0v-y{R=(T2pJL@%Wx8gw?Zo?4YV>E!nuYf3+cv+^Eh#SN|b$BoLA6TI53k*NO9JoHaTw>YNpng*2Vcp?sL`VRH}A*$QnfC8&!!X&0q% z{%`=nmmf)}@&Ot{i7b^y9tY%n^jUTqe@?lG?9HxXu zF-0G$q#7+6rUapcckgRdIV{GAVK4$TahMVstkBxGYFh|$xJf!aIE_N!L3Ok1W;M^6 zwP0uCvK=GQ^)oeisYf=dfE;ty5jdQG}qsY$ogh*2|LWo2fx zz{*dECb5jg3QU*anYU14YSz#5H7g6&g_`GSJVpi4-?O^-HBKjWrn74i^uL+6yy-l^ zoyxn>8P%!$|1w=8ottMI0Yjf5sjXKXjr0z=imc2{V5mv6E5^` zyU_1)p}WcBhX0(vnPAh;cUGTyE;-+b6?ecRTMlb%Gc!S?d>OKoR~quwN6uqCPWl;d z(obWGM=SGs7;Dm{cHJHxPq(vz^2bW**l;#;Fg%|QTIgILK%?O2%W%cMo*$BUyflr* z6FFe7z(q=`U`+T*;J|{E#35K}1$L<=ao|2FsRA2qNfo$LN~*wSUQz`fUnNywhf-1n z9vLN7;GtJi1s+Q!Rbb~=QU#6~lvJU))C%)UtUPb4;rh0^yuH;elz)hjrZ!f{FF;AlaBVNVFJ`NYo0VI>AK%DRjE6Id;((QLUA%bFCR8jI zvk4{BOre;TlZi~BWSU(ltmq`6K04^4ZYQB+t$a33siScTgfd1WYu_f6$t>Hi)oW8QJlTw~?`TvI*kuw|HV)0I^BOIxeGRyDXhulG zka22`>ipHwGpiU7!erR%S;mX0B6Q*ng80|)Aep!~2;yH72r|w(@dlY)3!Assv>ma4KlSD zkWAbgWTpiK8E2iyAcKLiRE@_MMHbZS^YzuOt;z26!h!Hc8erAs4lOu6I||YD1yoNu z%|}{It@uk)H>Ej>4Fza4k}879_H*xkkPY!QKW#7zsOQ)Ov#C+pMC@$pJlGg(5Wyyt zY`=K!x6W+Fs%)ZmHgi1K7^@V)raamH{s(V6vl*wdS!8F^&c;$d;Cg|K`HW&TJ;D zY#c0Yo(CJ*(lYVP$+OOErl@QjEbVd+HnOE<;>X{9&za3sm5qa?MLpQamX?WseCBy) zHq)5R5^qb>!DdMjmKKSA^SL(-Htf)FfZ_W~yu) zENzho8`;t#(T`4i!->sEyq?CqEC1YFtvoj`%T~BPD~UGW@e^mzSp`s7lnOdi0hO%? zuRgYHbOxPW0EPXip!Eu!N3^os=(WbwY!t8d-S!* zuC9UbAw{l<*%6RyHztFvvHDrx*LRQey7gN(b^aTKX$cqeZDvksWEPVdXV}F2@Xv13?Zz=i4AoAMAmHS7z^XXXnhT5^Y0x=GvYf80s=B zt~Mnbq-&wH9>r8SB5q=VnTa(T^L?x!)+ZBfo!uQe)(VQCW!2E^dI-d}-w8oGAfT~E z5GH^DGvm601{|AW1tc>Z;b34i78w#ae4h$l;{a~>KNs+9?E5Ep4$t6G85}-P1t04G zZumhLaARr^5a5esaQH&SA(V}C0B3(_^jL+gmchUXTFhar>$>;#!W4R~T^1OR_FM|I~#XGzAB-?W1e}f{-B>1Jc=;%G@^frqJD9c-8 z-WQqPheO=uP~1qm0`k+QWN$i^fO}CKvo;Nmc@u!Sr774HI~HR_Bm5~|mYD4w!W)&G z!Na8b3BxgJl~p6K+yqNc2?xPYIqhq>EG$+lu`dx2n|jj8f$%OXMqmWS3uB}q6vrA( z&&z>o)1`5cx!`Dxoaxexoaxex++d)JuH+cq=H9gE@h>S%Cjff0(1E?rWtYz$($eQl zC`e-DOi7HKDT$F|hj~?h2)dhX6(JNtM{GfxQ~8BItBg%`8Jp_1&U6>Fwa{9tYz(?* z8>}OZj$?ht(_}d1MU81#HA@Tun zwGnM+XV?q7i$IH=bepiCLJ1y09e9dzIAsalDz*-P6|H)aUn#>fuuD3bUdWW?{6^EH zM40jldsmYZy+1sO?w8~;OxxHn@({LVKP27bdWT`^8vdTtZpOgE`(;gAyCv1rkxJ~< z*L8L!_v;OPOP4g3ZQT?{zB|#~+L`F$Twm^dedn%SJ>4yRU5UN0U_(&zB4S=dZ{iV! z!}!&5zX7lf@^}tTMCg&l;Nz4QMj|@1G(aDVkfb2vtx^EF)^IJ7LE&?4?lM3}(Y&M5 zoi%^Z=oo0nBpP!#4BBCd#sIwM!q~N5ZP~S672uaBh2!UVMKlU>@URb{Q^$zw8k`Bw z@i2Y-*ayGbxNA#nr=T|p`aE&|vY_-!?WxCLBWl*}z@1aU@wlFV^c7M1PH~=t((U4S zH^!S`&Fh9d%6qX!>H0z5E(=7~qy~+%2BPBlb7eW8n+x1*uyvsmo9Cm@F3$F{{|@0?vQ+ zfr}q*-d+9pye=7Zp4PAFYfW|u8TwDW5Y9gOhPDcLbfor`vd7-g za-NJS{^4uD*MP49Ujx1dd=2;-@HOCTz}JAU0bc{Y2LA6g;I;k_oO%7sP|MWn!TYfO zZ~pK}UjOq4th_Ig*Z*6P_+zjek$9i=ok;wF+hHVL+aE#Vbw98FIrjT762}9cK;rno z86-I#FdO-8yOa72En8dktCRR&Bk8@^<*4_x>uHd;rILyLy{QAaDCKCu3sqcyr$Y#i z18}^;h;|n)BZbQ-jYL5fgo6^?fbBi0J^MM1zhBh%V+qiSfA|{kHQ;N&*MP49Ujx1d zd=2;-@HOCTz}LVftpQ%g^U{~sxOgqkyZd;1qgWmDJAGd3^NuTC7W0~&*YCW?h}ZqR z#;-x*cmBN2H}^&HTA$+q{GBm>SIhq&mcN_iPs{mJYF=i~K+=(BBJurM_~rF~J<=Sc zxkwF2jYxd|6D0oikIz>F*JFDl@7UC_Wu3nfMwj({vXQGqhOzVsv z>zVt}q-+QG6h24TCFIy+l8gSue}K*NXBmpExIVXYE<{_!W>5YOf!ofzwcCYEOeuW0 t;fC7){yc_w{1nTdzpEC52R|XY8r=9pk-gY@$~{n;JWKzL_SZ@L{{?{WA}{~| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsb b/pandas/tests/io/data/excel/blank.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..d72fd68ab3dbf214b3b0c4b5ace56b34ce627b13 GIT binary patch literal 8908 zcmeHMWmJ?~+a4H77*aq$X^D|e36Vw|P(TDhKx$?j7)lzY8|e@!Q9??(L-+J2YrW@LYi9p=p8Hz+?t5QbRSAei4!{B60RR9-fVZI_MgRr? z5QPl@oCn}x>d0DJ*upGq_3t`a!EE%n9nH-clCdz^UjZ;t-~aRc7h9kr>Xv0KFUWT- za+f|!H=VUQ)g+Gnk}&I+!bb+fqNzAnx-UNkB2SM;HyJ(HJ88>`O(O^L=v(aFv}VNa zdo{JjdD}Vp$s0u75)e=s+?$bbg_Gi#+{w)G@P6iQCQ|lcSp|Yi6+UcGOUYPvfl-x< z;dyfoP?k+#*@k+dr^6@G6g=r0MuDnYP!MCzq_; zEFV1TcQXkAlYM`G+iT#Um!WdRy}0n;Li4n_)cqk{Y%+wLbZ-Z@#)Ri%AMP^N2okKJp}?(|CGV9p3@C`sIXKqKJyr9N4#R}P(!zBwvyrhzHhSV{VE5ws578gr`Q!Y#G?Lyfy1WRd; z*S0!dR*bdvWK@aBapZd1sd*ZuH)suLtanx9CQ3EPwRf_IaEMrY>$$kSb(s&pL-(|D zM-4`Cfnz~^v%_C)@0uECeNoFZ&00CIUy`iK<5wY%)T2YGPE4gLJMTVGmmZAARc-5< zZyP9iZJslCq3h-Oa$7j|X2Kw~Fb6%qj-s%B3JpGOI)2c?+k_Cs@e9-!*ROTW*(aao zH3XcehIrq{)SnZz)5W|$L1a6Y3~S00-}0Xz^Yv<0y_78;>lH|!;f7NE=RE1dP6*vW zy}1Dd0B8Vs7>;H9xB`ZyPt}PHq$y@67MyiDzCvO zTq7n*=fZ8kKDtVqfUJsAd9#~{x00@hSwyLXSbJpF+A(^9TORO|6Pm{x9^u&z_)j@; zJ=*t&-)q)b99r*Td1{s4-y2WzBt6&P_oZ!#;z`>gshx0`B6kS6ZL1-eC6);HRxI&C z_v?*Eoe5AgSSLJBCkmD*`DuRum~~#IAd{m0RwbZ-Ru*zHF>*CnXn&>c&^@F>u33eC zub0u9zpp@E(@Hlyfje-eO)PBQ@DoQ}lnrmgGsCB%F@8$5B%v-#LmQz)kGW4yA~q6+ z13xTAM)hzO-kpDG-5mM#-UQ0mKL^c=h6SA|)EjkFFc70wRM7kwE~aoO!a&eS3aBjQ ziY-e7WDXMsMy7$XrAisS&XLgYQSr7yXVd@?5eaOtp@AyQ7-}&05D~*|z(P3lm`{tZ zo;Qj=71)*m^?%rGpa@ljH^bR+o>BsaoIqfyLMnFk+A#c7rD}dt(3c?478R_96*GXK z9|U6J2Z2Z#=B1?NahYW$nd>eoP=o=RttlC}jn=W^5Y5#%P+Xw75_T;a1AP`Kl|HEz z?^Z0SGPS@{eq%vcBj`L8^&CG$ufzazBp&nb@x$3BrK+14y9^FczE(ICiZFzJi~OI> zkt`;V5r%rhf-(oynK^6_Fqo|k&-aD@TN;@{p)sxOt`R-~H69qC6Il#@0UtF0r>|Tw zKff*ir#3?cGR$RVX|kA_3{NKFC?mNDU%qlG>R~GCyF*meG5b_BY1X_2d>MS#U~?Fv z?q1Ru*)S&Ny2}c_i=4MfZZYtgDbxgccP*Go9hJ8#V}cxF2)nB%Vq#K(qVnM{n2+BQr})}0ax=Gmhvd`VE~bs}1oq}cny zf;VeS*72l>oky3;`3TgZN8mo~epNE~*U6cr#v1dtEb!f%e^>xj(m*(qhp*NQZ1oei zc^m!J2MxoJi*!;&&*}xEgJF=(-5v8UgB#EHC6i8yeKf|{14GSTA@8f!3yXCWd91Yz zEQ_;)n~4URp$2RoJywp39mPhOEZWXw8Wfa3CGeswvVZ_Pij{TheXJ3YmT7CzZA6|( zd6~$!#wQoQ zG&%1uC?w=g_xO0b#|rmX`FH@GzRQa8zYc2lBm6A}Y;2v(U^d^vH#MRK0+c3^?@+$_ zF}t{kWb_b|HhA4ayq_Rdq{o^$MwRPPU7$;c;E@v{N&JP=jZpO_}BIj zCZ+Edr8^dpCJM{=PvS-`VvJfg;n@!-2~=p))je|(BOEsK z8}tfKERDtOX>QnM4HjHP^e7bQ^iDfUjK6@}KNJ$Uy>!tuR8i62vy9=2Enqv>yy|pZqasO$j1+6TV6>W+=kApuE$#5@oW@SEY?pl< z)VZ0mC)Kbx&U=KqTh?pRlnf6;yiHb;sBTp6?%G0JJQd?dp@r`J0!dK_t~KisLXM7|n2(d9V!=^pYdXj5fnLo;FsZr{g1{EI71{k7 zE7Hq19PIgsyZAbyG|z1Qh|oQgKX+nn@9R#&OALbvT%{at`NX!G$CdFLobD@4{A&+# zG?4H2F;Dv<+P_%59WhORua$-al@wOm-xn_|lfKDl2 zis0}#Ht18A<|(?6Mo%MR7LhqJO4H!sSEcxFJC;4prluQ?UXz$F^fqB8H<6yHJcwkm zalh(54ygwcrzd?|=?i^ZY)!;3X(f*q_q$T2{Uq-{Ao@%c;>Gm2#ZvNcYdN%wUgd++ z!G54=1?QC8@#jc=*OLkwyq%K#(}w3Nu1|Jn8t%P3IT@SA-;b`IW^$HeX6oXy_TF{E zDy42Ji;FA}p>JTX?@g*^T+cNayO#v45NgWCA7IJdTE$0jX;d#49fsuAskV~mggG+@ zU7F;c&u)=_4YWXLV-ttMAE|QbwmvO$2_k+2-LqQpE|pYzl`ius+GToL-lpJz4du@nPiOINWG}48Yx!tEh~{f~roP^hP6gJAEUEZMX$Zc> z9@N46N(dUu=3gl{qkE^Av>;8<5g)U~WF1exEpv%QV~{bpj%gR@1OPwnqPwTDuD7b` z@9HZu>1a~6c4x}_a9hF34<`f?h}C5YPiBf9IOnkGAo5UMS^2q#@7nf;f1}$#FSSpu zYjWn>aUr{Mju&Q2I{6ry&-&W~G^;>+^k$PIj+ASp8L`&qT?Rf(>{zbltWC6-9!!<* zCacL352Pf&=xOVvCANu%C@rAzNSc^}4}!#)9G5^W_y;&x=wZ z?TrYEm@O+-goc2~oBTcwuzLO%JMX@dI}f8ZuV%%jEhx6!>gN`I_}I$Hf`)T$-1{AF zO)=Jk2VNXNE}Gl2G2sEb<-ux{l;?c{LE)ec?^M2S`{%}YuS$Qiehu|6tz-4N?vOZD z;#ph@Kg8*7CreE(nZ+TR`+H(40}#`jf6F-{NdsPtz4dBqX}9co`dYaN-?DlH}V%}8a{Xpt~TQO zxHN(A(Bb*y{?x)&m`ihjk@`f4t2)#I@zECItO2q&ls|00o|Y5UK~7p{W$oQ)#=6Vr zt)G5nP#}qHRaW^%C$UvYdZqb;cWt6v@27m?YKH7uBS_M}Xg)`1ex$UZxs&VVbR`3~ zRAb2^Nc;mhZHA{g%82H)L%HkloUUs2ZCfnoGHpt#MaZI~Zbq2w1&GWNF&2m@%|WWP zdWW~dH`Vjdd`mw}_IRQ{Vb<^yO?#~AOB_Yj!O*+xctc*L(84ux_>S#bA*c4IX6RjM zy;!Z1rkSw(@Kq1wW6M2NyiAMtH-~XRtI(5o9VXbpK#3Xh<+erW!8j5*`7+VjjL%EW zf?gPSYwlFH!TFljN>=oE*uy85T_ndwd~l(nEP$a;&Z`&E$H*+J_MCqE?1+Y7&9qB$ z1!jDlb3irB0cET5Yz8*xE6(r_oLv`}{MTeH<=4mj;5a5gZI_Qkbc$CM%SC)^K5`F3 zyR);AJzY0Z2jpxX$L_Y-M61&N70!+8=|x+gTPfyyr{u-Nck-u^<)yuSBtwxb_)0x1 z491woRWJ@$(#EDe#?F!TXLUhflZbOwb7WNTg``X%yDLXmn8gW-l>fOyS9v8M$Wc9w z7WJe1t+#zUlm31PLk)h0)=oDqpfJbpN3re*0{|vAi3d&^KOgGjU*I_?*<@s+rXf=l zF+zeOMxd6)s@9fPHavz_R=@NA|Ii^Q%OylgBd2*ueAWmK>AhD=5};tEFpX&FbGlNq z>f28=XCHBzD#^sflzni8#S6AuJXtgsT-(@BrQRr49||`X zEYFFvV7vyyj0vH|eo|}o13b?2IlxdF%m7kWXt9DVve7du=^0}{VE_c?!of#(>@k$p zjbExstHawIZSuHf(%S&SZaJ#M@PcfB)>XK;*mRO0RDO*qe^N35SsX!S38pLF6duI? z*b(XJTlB7(S&wLGL1>q0*35P0T?C<-QGN#WynB(7*M!$4I&-!XVo!&%RjH(%%L@bh zW=fDmm^%hr%8Tv$G(lQk=3+s1)0mGppvOuX7`#@Lgz7e&^W#oA9|HOi{tYFM@M5Hr zt%9`>W(-G}_TAIs^>0Ksnt3;FiyTzHjoGRU59!`CHeq@(Vd|XGyhq&CSb96|0l83^ zO9UH~fV+_BjjWkGZz%yW|0S1>p`lhc zJ&&tmaM*o^OqX17Sk0ml@uf0}O1uiEL6_{3n19RgmASgE+ z8q-W7SeK2wkFeaXNw$?|74&||6?s)il3-2HI#8OBS3?DFV2G>HT*BNdwhyJSIT6iv z21}2wwP>5WTs9!7{B+C1Z5d-sNOWCVj`5;|peT54sBYTm=_fuN=4wg0KWszAcO)Ude5ItYG}6Vh<^4e%C7&53jY11^`9&F-|h(C ziI898VPJ9r{_0xzkMQ_o{lz7*s?whU{(LL-hv2uh7S(2cxT8WB{4-7equ>(iJone6 zJvt6Lb9;tFf9@w97hM?r5PT-gjsGvI|?7MM|MucE(Aei=y)_XQIR?y8U;y{dYtEpxhbz zf)4oCo+Bke2mFrv9SR)rbnjfV9=!EFvf31ieYX1DQgMLutmsb62tcHJw^?O!eqtgFd`z8e> NVF3UX-zHzc{{Z37J9_{C literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsm b/pandas/tests/io/data/excel/blank.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..c3c2074276d125fdd862bdfc423c3f40855bef1f GIT binary patch literal 8418 zcmeHMXH-+^)(*W%uZG^6K#(d;dhbO+dLYt!DAJ^NBowI;1cD$6g7j_x0Vz_HE?q#1 z1Ox%8U!1vfopHQt-Cy6IJLkuFSJuhiIeGWuX_QQvq-R_y7Qa6(A~-p6G@F z0AS((0F(fHOmigewo@v5)l-LpN#Ba$K8qJ56X05R(gQ|+09pe>FtRl^S++)M1R4*7aI31r44jGTq$nX~YxGhnt<= zJ0H^WX*Mi?*9xg0^NIsffr1fvrgD=ou*Gy+7bOB7f34$WLB4(AVr;yQg5{BEPVEL& z2tcqVd$iVE<3jFT?_$Am;{`jju!&UF+1a;L`y3$>z=|r?#TVy>qSC&nu1}Env}NfR zgHPIdTeiozS@8{!g#szm=i%8W=fR7y;Mg72RF;$~vv#Da2O40PmskM(zp%8<1jPOo z{X`p0LLxLvt-T?xK0<=u@Bgs#f0&Yg`s+z)!)SJ1=|QQtDt$cNbXUhOENRhJZ(%iN zdd0EBT$fzTA%Aojg%zP`H#>T`K77-BerL_en3(}Aq}$cZqfjjZ4w`fhs3^Ea`WVdS z;6&96e86CPdv4y&o_5BRMr-6eWDJX@BlIG_

    &d}~H*mKMQg-dtXj9MiBea6&+pSp9J05hpRb@OI+(+|I*5_C4~3M-J4M*IXP` z&S+8xGI6`&_w82@ezqVAh~r4!7kgl*j1IH#+1Ie;K_f;!;w61Ic}6wn^vfeZ-?@he z+xCx(bpwJK%D`Xu7Sqtl?>rljF2z7mwh*H(mgSH?4!wi1>R}eU>KcV2j3pH#)15qJeDNtK7UUGzO z@ub}_x%6ABF>r!gO<8m(vCIn<`9ztBnW==3MMO%~(frtJ@o?ze$_YNtX`pB(mmTb@ ztjK#P^?YRBjB$UNcRl^Vv`Y1@krPyklsXAL1mPxV_hQ|A%TGrH%d#PyPa2LUN`NayzP|A!RsJH;$>#lyM#rz$XkEAQ#4pny zSyWXsPlaedr|o@6SV%Nj&VNp^5zu)$JN?c7O$U0f{~nD~aEV(6E&xzU|6Pgx77eH) z#0?_!+gt8X@#E zD`6O2&9+$>EuTTI{3Dnx&;l;=*b1x@>O*zSmvjSKW4QDa zeva1U%zKz{U`sbJAj2v=sM8o%6>*!~xCfINV=m5QqI~==XQ$|)eiA!NP|)>4IRz!1 zvDyB(Bf8ce%}tDg_;4;AI5n64U&AbP7uSAoh%@+7^O>3d9xl~&pkCIyxcT#H$?CRFd`pAot&D>Z66U5w=DQTH3B>Y@Sw2Bu2yQd0FJf74sdjgXHP*$ z7&%eki~ReSo8OEMHov8>#121TzJQ*68r|z+77%b)5_}N3+?llydbmExTzoS>bpOj9 z?P`Eo_vJ|!cgNY;y&GiL9a%>cg$m9Nx8EebkUKwb>E+yY3s+M)hN&tSnSaD*+{K5A z#CUOXvwO8ucaa%6;E;s-5p3iWGA>r!xC?0{@k;i;Ex*TuWmPceZ_gM7e&R^5PH+1W za3tx#D5y#7skF~$8X%b%>AeQN)y-%VY}(V3XxCfUW`=1ZLZsPyI0usi?}#w)Gr+QW z5bMVoFpAGR2Afm9DJKzHat5E6R2a3jbgm=VxnVSkeook-IawQ5D*aq*{Aqn9`QHV` zGBm5J;}2>^F;lE%?@8tJGCWLWSdOD64yAlnrQL_LI99RHF>G zfrnx#wek6$GUcfW8uCa96yKxtu~OdIb&}{dFpOawk-P@J2{XbsNIGodi$r-zC>=;2 zWAyURg=A6m&Xv+eNQ-(Z-@%T{&xxn7w&MN}zkr16fP_tOHJgo|vI8$Ir|(j2*01hX zx;C`AO?CIDIUFfhTScFeW`1InNjxI3fBl8#Yo%hxvypjo>%!ZsUtUkHs_@3}RnSw+ zy5zl;zB3)5$76>j=J4@}Xj?-RvEzqWg|@d0QW~LTT8aVhbFN%ruwbb&i^RUoY(Pz- z2$x^5+80fge}ce@rx09fb&Sz+a*;W>=+76YKVbm#$96lXqD{y6PvfS zSBrgh7M~AJyg(=`G)&swdZ+j($9yR6ZbDI)z-_a{A~f~NUN%B*EKumQ>@Gaxb$=S$9a8!$C))uDULFR)fg&^duyA=A;IS;N0a&ae)8qB(gO#qp`c#R!$c^b#`@?gBsn$rf; zyb4Kw5k-7zls;|$tuwDeTUdg?+4I!)b33BS2#28JKhQj3dn`B_m75&Mkap_o8P@RK;Ys1Wd4L3KDRho6tjf5;Pu0yY|mLfdU{85n^mw6+vdefD)3KphMxA@(2huE&tQyV-fjgEE8zIQ zU+Z9rJ?R*_q1A;DgL8~y5W^l0`B3 zvc$>@^f=PTFLW>0xPyCT$NsDd<%FAzIroujf9R?Gw|N(z#e$t+*Xxmn?u_EZI(IKE zx?5#Ux6A$7%<@0Csi+)u7Y&s=m|+YOnk(?@oK8|L!ObfwOtpY7#I7&BN*n-o+02u1 zdZtDlFi%~U?dF^>7D~Kly%{!>uy`8Hg=>pt5b{QxNu5?*S~8;N4atnn7~;9py9o_* zf!XImjqirO<%8-ox368ul~(Vs4MNsGE~@~g>#FQrr0d7tFHWcP5-O!{v)D1%wP#S@ zyt_9V;(0(4ES@dQsRKfm@`V@1GG)tzeQaPD8mstq5er2otlFR@1r#kQ=zd5FA4doT z>Lc{qa{YTHOG-nch1ZoBRAgJ^UYplU#Vjx_LxFe87SvPUvcePZ2D|>L zM1nx0;QqOHvs_N*dBi@@t!Ld}@TzHaNxg4RM@M}P?b~2jeTKlDzT>Mh*6T<98{~8I z5`b`WRX zdPANyZqAWd-Iu(j$VuvU+`U5oshI#e#p#5)t=_+n+scN*rxeJ{Qj@r@UgY;O#(%@v zP1dCl*qndEhn?#I56RTmn&GHxAZ$^ii4ouMN;)-CxJjI?W?)^`W!fcr@Bc0>{)?!H z{1cV`@2ISql&v_T;}!Lvx$>LpOG+E~en?R{ioS>(k;_b%qg;+s#2+fsKbaZ$GDVXr zs+uc0wBWxDGwkD%P8RiPJbJJ;z>Mma=(ZQ8@`kS(%7aX&N2tE++sOuIu%(hb^rL$W z!&CQeYHZI%3A-B+$wv^dQorQfysOFiE@G0miv+XdJzX1@pk1*ct-|dtn7?z{`cc0N z{l`>LViu^XvLc*1_ff38nmTrA{MXV6zYXR6H46BoSId?vZb)z3n3Ps524A#9jSeMf zbF3Owuva0@eW(v#Y7--;oX8N?WsJ@b$}fKk9s^sM`Hl1nvb~QE25Qx1lP(3DO_knv zP-qUu!;5*m%&0NyV9n-!+Cyx;dR``#XGuEUp=zK|;s0RX^YTi)Q#`Bxc1eTiGZKFF zm>PR+@upMC>M=yl%m%I%)YVnJQ$ZFgt4^MhEwdtGJ2@-|da2BIP+Gp~hUFnP?2afS z-!MLKzPuP-ru`LXMh4Y0)X|aI#s&b0f6rt-(0i^BpYKFLWauQ$g2-Y}(HD4yE$qEI z^%_-eFneO6SwnTgHop4pxbfK3kMEr}?xXtr;8C)*RkSXmK-oYCQSY-U&2Zg8(@K+w z(sVNRC*GExCY;Boazr$AW@;u1(YLN49q#Vgo$M*O&Y4xsr3%La3SA>o-j4Gtx`x`P z&BRycL_9R4*I?AvQE_#n6L`MBffL`k8?N3@y+gpCb4jXfxIgn=p%S9Qc+(R(*JNkQ zu|MupQd@|JVK}>yv_|VEaW67UMq*}jOb4I_HTwc@Gil<`!bFG))_K#cHXD#Q<2c^$ z+%(xJ;M>=lpH?fWK9ANl4s%G*0IuJ0jASszjq+tu;oM>uH{Ok0l6#IwOY}iKXf5Vn zBjdh-SbNmHMTbk2*GN`e-bVyb11kA4i10-c0JqPZVpgav>{B+DFUT#g2Ii~o=Mf{M zLCK^uR3nm;ffg1{*GxC$%a+DRQ*dF$wtH`;UdySSpC|YLNolg z>y^t>bD+a25rsM-!^y5xDG6C-%IynWJBIQ zeZBDh09k;%a|AAIvr$f7BDgrOy=eU)2QjF^pG>qlj30_z4Q?Y|s~HMx8OpS!a=-W> zQ|m8Nv*Qoz*{!;D>y@3T(o3a;mS;jpiFB*b$zm4z%JVer>J<;ZbE02C?g1Dp+wxz< zlN{Z@vG=gk_xAAg5wi2}hJ1$|y4U$n+Xl_Ws5EQ6P7u}W1)VbztpY|%1>;;d4$S}q z3if@y;Afw4fDM&S{E;3W4M>L3?%qTA>DCR2{+r_0ly;5?GK&ZoiVwpuJtf zo4v1A)04ZWSC1=`RK#x)W*2cI#>J*gnoWuY2L(GB_(&}8dF2MxWB6-(`Q4PsaNg|# ztmH8tnz7GyC}`ZQ1JQiKqaX)}!}D@y{p4IaxtmCj#8`-tSVs9gHC{$fgpqq#(->la z*l{SS5r4;)-y|vMPFpYBtYMemT7#_N6xvg7h^@C8yWp~miR--uKAT-Ha0J$&!tjGt zXT+uWUKF>OiK}gZ*j7Nzgl~LRRWp>K+Vfj<-21+3heeW6-?AhLioMpn>=lJ#WtYie z;!S#&8Yk#1_jl<4R;`CUM`O|%-9aMzGbU|4J^z6u8jJtB;OPo3DGB4cXgTljg)I;f2@{+t&_puPr`PFUg$i+By)qNk9;${1t=F|v zKN}@WbXg$-g)1)st4F5fkBvSfdi~>W`z)v0EmF5vf4YQEY&f7te=Xy zS(NPdNVJW|ndOv2uaCyUp7B=fRDz8wEt9?ENq$XD&gH}!=eDJ71I3a|=@!?p%SK^R zvFsN5MN`fo>AsXq+AuTRjl`O~oKXT!gC?Eh>yi0%ab=l=cAetz!c{_u2$ z9&`WH*ZtY}=f>j?;{?KAwjqCZ@bmHFhXXzGfB*X5j)4EYmsN^idim|h^0SwpMfHz& z38MO?m)|7z&t85Oe?PojrTNXv4=MPw>0gWZ4?_ST34I{`TS@=f{IB`;&*m)XzVN@z a|CN39wQ$fl`92dR0`#NBDkjtSPyYuKH&N{X literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsx b/pandas/tests/io/data/excel/blank.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..73f6ba6d29af3c8764f2aa9bb3291a43d0650074 GIT binary patch literal 8379 zcmeHMXH-*bvko9#dX*|2K_C?AB2}6oy^ACy(n1eKs`M_Qw+I5^pdd(>ju8ZQtHom08jx4u+5d6o#0R>_QP-g5Hj>t9h5Od7lEXG5`PP_z(U9-N{|rtw3^> z1GUqT1p$qZw1jL!Rm3cs%Xxl}qIjYVJ8EQ|>@KpSPD~7PpXE`)z;CnbmX3u*vrEle zjmULm%(p>upDx2zl50HBGfP6_H*M$(3t6IvlEI~=V# z_-gm2m-TMD@fjI0>sBzc6?RjH#O-@4iPtW3!TD!!JRds4#T59hM6b*h!`_FRFz&3p znQN_62{1!-6cqE_3wB$mD4=>bfR7+zd^DCR8~2dEdg??DeHYC;2y84%a0*mEyu+Fw zI5)o^@2Bi@adq(Ez&H^SPz#7<#POb;a#FdQuyWQLysMVN^vUnI*`Q&)JBd8>*{seB z|M+uxl%R{;_OsQeYobI3Wag-5@goeYE-r8YI)5Q$wIPt>D+XzIFjt8&q`dD2g}Dm} zemnm|#{Xaj{`BgxiG3I{UT)t~YgGDtvg)KpkXz7jN3DU)faNvkCTn$EKBxS_eke|` zhV|sY{!-s9^Xbh+I|EiGu#k3ZJ+DH!DA;$@-n%5ni0mnt-Nuf(gW)j~$arcRcHbM7 zoCO{|c~%hfbWZ6 zBzG)(RPjno%h_9K(I1?q9V?$rCDY}(p!9(FdEex-OvNts5Sm@&2a5M6?AAEh+!Gde z_bzHAUD-9G`qe(|%FaZ)%df2k3Fg>7KWyriohuBT)q)&?&~M2M?qgp3?<{EuQl8kx zTv5dV02lxSSl%$9e-XvY+06m$?CkKZc>PHlEKCK%l)3-xqcO3^9+SaCx564j#(W!V zKBQ8zFX<80rS4&cHl4H8a8^ySw)lq;JK><4LM<-ktBA4|k8iYuTl(vK>NO=XO$=r? zuP>^M3x74=bxcSdQ=YQ&go}hjcs*u#YBR-$V~66^6C0X)*BxwCPH7XmQt?`&b|Ld9 zPY{q2YTKXn1;X$~R*O~m^y{O!ZhdAxl35)`d1h7Cq>BSjkExWs_0EH!7Z#DP@6u_1 zq*NEXk;^s!yf3Vsq(vEpCDi&^BK&=tRRv=!L=#9h8Vb5{GFEvMAF!u)dUj);t0pbr z15eZksh5Dh)%$)JMEo5ueZgNKU$gx9~x;6Ctq%_k= z@zF9ao9;wqlti3i$75JTRbXGct|kSO{; zJ^x*}L_r%q2-QC#uW|R&iv2_1O4}DQekQ(G)vWw9V#tf6tmXKhQa9W118d)x@TmYO@i;t4altr?7{@N2`&c7DLjo{^<xU!A7n7ao{4&$4IDDJpS*KWfV2hkZ2LHxIvW8xYoh9=sTshi`1VF0@Qf74w<* z8kKbGiwR1V%c|Ba&NfRk&?Aht{ChY~zy*%UcmO~l<2U8`Jsfabs3TP9`&sxKdDl(v zI!}sIw~&o_U4=V4^3*Wmdqj_wXO(C%wmC(x*kz=e=;z4~s{1oe%s;|vtp}xHHEaN3 z{QcMs@EmUQh!UJ*nte5m3i>Y1K|IEh;Rt2+K);Kbc@B@S6CshgO6;5qu9L<0N_`^R z@p0FG~ZR{KwCYruOCtMA71l^lC z9rRcU-huaQQ=@rp%+^NDjXm9t@m>#!l#Z@>_*Nscv8-nBwMwq5WPUxrU6M3#{n7fo z$c9t3Zb`!-_Ssji4D%=2g##MUa%GZtawBQmjKdH7eS6T1h_@1{+h2PLi2KeP?zMXH85 z>QT~sMBii9bNK1$t(S*n71 zywU>s52@TOl{dHTB-(WK!kPOeuY+$P^a*rh_v`pVwp=BY_GAvRI{2sj(h+&i6}^WtE?;J{xLRf!a>HeA!rL3Z!FGh3YTmI#RURd3amd=ITpFSa6o!2U0Y??KO>_UI8vj?V|Q%N&kIwLo!BkTz1Vd#}fPbjZx_^P`| zJ0BJUPq@63j*=Vn5juJH0Ga%@Gm+hljPdgEx_9(2(MPk)bw(GNn55|SRsJ#Pv@ewq zl%jE)Qb#7^@pVCNo|pmMNrxLF3@l$<2lu&M48l8jv0iFU-2v0S_D^~lN^+u~G!8j> zlT~s@Sc1^r z@`T^hxf09>fc@(CICGl`S3@ z+;X`QmhD+j6DyQx!G>$DBB>!S;qw`~e#!I+tO0D}Mj(j-uGi3F%hibEmL4h1R@4n7 z=P(ut;%OP{a~<*uGU$8dnQLwKE7MT*ieyC8v*22ha&Y`RHjvUioGc&b108}~ry_MT zyvv)A)4DA?yg2Ns&(ym;&ml+C4(^|FHvM2CA$m^C;v`xRE-cy_Z z?6u|f6xoM8D8EHnSCA8#n5UsP0r<#5W2@YcJXXG9UVLzhdecENQ`%sJ|Dn0Ce<%6-HCQF`9z@ho*41g+k%(Lf;pWZv}PGHFgRU!ly5-gt5&*wn?Hi3n4Cg8 zR`I2mWmL*lz3kLX@nuIF(xTXw-nw@m&`4Gi)eE5qO{2JT%|h#)u^s-!wX0L4mVuH+ zidAL;i9Dy3H?LmEbcG<>SlE)p*=-~Rs;714j9^8X!`MZ;(FwwQQ@ z{(G)`SADUGUEg*oDhFZbA^mcxNpe(kp^5}Oc{;}vU0=p%6U1(3iuKHRtt0f_am&Ps zxz`>%UhHDsYLjS#2vfTu7xd(TCgc6oUv{l;`XsX_kfwOjKSkiHxz*J+XKo2Q=@ZKb z6SC1%aIHSj;QA0edaacdyJd*JiCfS*Uyn|~xE0}LpSX0;>A?6o0T`18EGsPuqRD&` zA+M@-sW9qm;fUvo^6nxfa@4h9?KYl&N93TiW&{>rm_(%(6>xR1d@E0Y-#G*-yz686z0dES)EQc>t{Mv7-*wV?}v{c{&bF6eZb~E zyVFTK$^C`1XVO{s$i`c4>nfCZJ)U;CxLjiw#pbnM@Jg(hlwU2p5^_hp?u4p*5S1~p zf@cYb!PMR;+=Sm$qlizJofid-_Q?S&l-c(RixwPloNx3wp`KB!80^_!ocGPq{f;yJ z-D=5dn8>VO0su(QYsE|g$-}q8&hc{_I6Aay)XSO>5E9}^JvE{x zzMA#O;fRFKLv|~HTkkxPp*O3_=p4itZu;1Wxt)$_1Zj7hlo|#XCXsXe;dal(kn8Y7 zj+mC-RMk)+%;-AW=E08j@s1K~%Cu}MK{x`C3k#0#9p+br1waxfqDnJ@Q}h_sneS++ zz#QoXUd(XfM!ne%QtPDIB;?PyAXC=cofuLmg=#V1azRhkS%WxthusURa`CbBCRbt? z={zMKhNQ_#Oso!S0aW3pUyx0Pb)1^mC^5lmH`;}IT~d2o+rT%ghATOIyPDJEss-g| zVcG_dY@*c}mdtEJm<;ejJy=w@);PoswnJv+UZ4_V+_xS#=JPL-^GKl_e+lYSXkIy zH(8N?Hak2JuRXeb({|};r=5k1nVkx5Iw~R-50xa!=Gj~qknFWyqg<4b0q;`@&eig# z5ai|*Nx%6r$NOBM#HNKYX5vdWSsUZHS6;__Ld({rz^^YAxn?2jJW$ zE9JOZ!t=AL^Tv;IP~9?u(HN`!sGg7ozb2B!${wGFo>UOE)A>i)Dlgf}O)o_IcA1gU zYilv33Z>|VVj;9dl4ZbX{#C}(v&2i~^Ui!{#J_`_!je+oZA?ON#=wpO)4+i^TkE(v zySNKkJG()@!4A{x{HJe&;bLr}u4XHcI&22_Lg{^_V`iKq6x8+u4Dm3RyE!9_L1ubAx z<#DmK{5TEysAFmBU7dop##PIUsLKEN*>ilx26X>@ zAd?gv=7Hp4Tf`@k{{{T3@suC{CCzCWw@{ku0gFyAXO3aTWzC4eoG}d@eJ5e z6x@)i+i-{W;G*0Vwy26I4X8iAFw0rcc+ntHjS;gs@Lj6iaU7ZuovbrmJ1Q^DRVs4| zv$Y$3O~(>5tzv}^lpM9?5}wwRwOw#5qe;x+!6mVAPf)FA8X9m|SLqx-edo@_IM!yR z`<@$MlZMxVFC{ynZO?jX%VjD^aZSBHaK2LD+QU>VJY=lrpo1L{Oj5G~9El@dew*}F zC>QVbYg4g1J>^K>rRrvy;sNp)hk0^fkn$`;dHSRj(e*_D+U7bA;Y z>-RvfC}(90*SPx}R&rzWZ?0h;G@j#24|&D&0x9{2`;9c=B|gaJ$0i@SSNP(HMb}(C zk2vR8F|qoW3dX_~0Q|e*?w=9;bN$VzS4Z=&4*oib`qS{+nu4LtPh+WH4gcDk|Jkqs z)AIZGo%&z>{Mxhq;b{#szy76z`>XM<9mXHVK19FmH~#A2*WKL@2e&Bx=k9;o>;3P& z3{(Er%lEy>uU>vt&p)2Uf%>;zzH8}Uz5J^3et03I{qE(5=KIz3uhsg8Apqcq+4=sh ncK>Sr*L?f4`4PiU=Kr02bu@7?IQcgHBnEV1Fc@3LHj@k@dl|}3sLZt{yFu35SRygE z5JDsq5y~}%RPN|L-_!T?-FyDedCvL&&v}36dH&D)p7({@Fq~ik{N1DgDZ#qwAll1u zXQp*@AR2|mg`tpPVZnhuNNiwe2skpt7Zi%b1Y$s;VW<$_P@hX^R0tM?35!4>eK7$j z6c$da$p>KkUO@`*>gx4%6axUDPCMrKBn%c8jKYAT(7}UFk3;5Fxq1cO8Z)y+=gFZ= zL`e}GD$$$wz5#p zCOPQ!UnVObiUBngQFi8IivFZZ{W%c>>Kqk5qP+C96;XorS@ZAKnm*nA3Kv=bArRe? z2`pG!&vY+|oi;%GLVag^2Ci=Ej@{DOIY%g0yK)OvJsF7UnzwrHH#9kUeziw|t@gB! zyRpLdZ$UZswp;K^PQmHQkJ1VH@V54TVUaI4B)+oLx>mC=e=Re7Q8$#u{xkxgBbj|` zEcBVtnFrZri;TR*(|KXNO9!=<8Y2>UTYT<>#-i3F?A~Sb( z(o4F2lRHl*;`f2BqMu~a&Ip)T?@y~`t0Mz!99>%rgxI_Bo6qoJH-+>ghCv+r79s{= zKH3ZIvtCQ-^NV?Ka``aFWKv~ht`Y^hi#U&+Q*sGK2`(@!GG&NZsM;m8^KT0n2);=0 zItdj|IjiDcAmDIrWarJ<++973;YjAKdb(+@(g6T<-Y5 znBHGaEA}qN8=TvMEOoZBAqVAU6pU<*Uujk0cqS8bwutT}Kkl2H*~M}SD^-xIZm?I; zuz5~YDKGTT`Qc#|UXLS^P+EA$m70#I*kd%`bFAt3@D*KToGdc%qQ-;)eMn|$v)pF{R($0hQ{Q14EbSJf=7FFscY2R;EN5{8JgKuPmn+uN$2;J|-Rp!c#Q6F7 z&@TK*Z=@l+76%45bUvfX=>bb{^RDH9#@}p z^by18%M$j(@^0>@rgx(^E4Mn;Kb@%4Ye(se?7!D<_t8R>K`Wu}#YX9_8QwWqEeU

    F#(eioN{k% z0KPdnIl8h&|5edBrYk?-CBpIIM)LhrXu3i~J$p@TjkwT^TBNI0-FCcK+Tp#;SFGpVI(FC^3_Ek<~{5}!Ih;Y z>SgV@?a_^cgU*cZA-DqBHnOT!-8I}PF&XaY?4<2@3+*@)LL!@w+^hEg@<1T(xq)NZ zdOj7aF_RXORe)+1lvSTNXqojrsa@kf0cT8R$PHq6t&ZSGsa2}W}#>byI!ei zb%GU&X;6aQX&8rZdy-wAoXTdZO63MYTS=5Y=lc;+{ov&)yS*#P2g6A!@!(mu=)z|9 zM-Q{R6}O$$(;&{J3vq+6XguFj29#wE&}$A%T7<%y+@oWtjvqsIi&vr7V)7j3Zl>PT$##PW%Mu zpVnk_(~^OWEEw4$b0Y1n2>RCi@2}wEbqecjbuhJ*kK~?!6uzLd)b)?Uhw~q5PqD(2 z-B0#^lzlLl<;`wm!n222y6|#EOusV2 z)U(97O=y8gG0@B^pK)3m2KpqLOi%BcT+{_-#oaKp*YuAt1qUMU#_nZ*)#JiAk13K&1F{y$t~6ODL$6%lfz57{*GPV>$hUI$J}*2 z#gtBMlJo+x308dN08MSk20FoL`j2HEAX!7DNL`o(OP}WSW(yXgCGt3dWuSzG$f@)} zIbfVvZhHUM%;!_8@?y{H=q(Mas~8QXsOMmZ&rB;@KZ9Qef)ONE;%;A+;7q1tcC(wZ z4kPeo0Y~mNFU@V7!?~ldSlU7eG#dLAeKk^U}-2^_rekkyc?IM63{dz;?@TZ z$YSRese*(MBzF3ey8N6L zfZ(RW=!yo%&R1CJzKV0C0c#3ccYKCSx&t*bj=1jQBc&SE6yw0QtUA)|Mm!WKr@BEU4xEZ<$GMCu@CV7!SML&kM^IK z2)8*#{d@HLfpDNre+0pwaX-7%58N`%yZ+0weg^)mn;*byO#h;ypG*AAw;v^PY4oGT X;qUB&+W;AlSAeuflEzMf?-}|pl<4e` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xls b/pandas/tests/io/data/excel/blank_with_header.xls new file mode 100644 index 0000000000000000000000000000000000000000..a2e6350808147046455deb333b1f74cfd19c9c58 GIT binary patch literal 23040 zcmeHPYiwM_6+U;pYp>%x>^z(&$&C{yw&R#&H!sLz^WdpC50RjN2ue2gIoow+-^bLT$1 z3q>l`z1G~lbMDM{&YW}R+?ly^{pzpl-hSwrDesVWtfC6~G*?ZP3f;ih3ka&||HTA!)D-r{H)ijY={=tDqsA>u9o&Zmc+u6X&Ny*%!rmC5@#$#(hS| z&dAbbKp@n;F0@*dyHlJS#CbH%2AvRf&I-yxnoj3XK1z49yNBq{RmM6>QV(_0E=uG4 zzf@w6VbC&qk(E@CbYEW1q>!}LrG&jL;bBXhgN}4Ab_~{7-LM)PKWvE=!l;)(EXIgoFhVqOm=XrO&~973Ed)8dB%L0fMiKC!hS?3XT4&8# zu%mhT_9mIx-aI`#6VZK|?fwndq&jc37iyp?x~N#FZGjyJd4`$`Q5*D98d|>SQfi?X z(GC`+#gvlev)h{lVSDrPJfUHJ1LWS(w0Z})v{5tuP*tthVycx|Ok0fvwbG?_W>yRA z{Df!<%S58cbP1k$3l*laeu1y7ELaz7zD?uND~SG?GsUlUI%P7QLyMsQL*DYIa|d@T z??q=+xAOnbbc0N8o^^yQeMXQIdU!rh4?igM@1+0GgZ={#`jZ~?;~w;Wa{_0ALqFg4pj+}d%Ukj|>5q8eJmW!s-h=*g5Bk?V=s)qGmn+YY1Wt`Z zKbD_hG-UbJy>)f>*W6!2;bWqFU4h=!b#lhZ85BMt=n=#a;fMc50dYwGZ1$kHdeB>3 z>EnRu*55}+dcprKf3A}B*J(2@XJCvaOrY?B-=v;Qm!s1r8`>Rp6dc zRt4^QWmVw5R8|EJeq~kQHG{G$w3b_8ez_Ie%B_GwWO;2^ZmYokPYnXb;piGIYG90s zcp>|=ULh3ChZF9Y)_B-7ogf%k4-b+{_=Cht0g1)!AbJp71dyW99nEoyzM#OBqZ{IJ zPE}YajP~=5m;UyvoOk=1Y#>8a=?;>6*$-r?9VF!;N$&k2MUs0ob1nu*l`tnuok-l| zAxZX|1(2$I6OSA@k~8xMo1FnvTD3yMEpd!+p&gFuLS2Hgs!$1cp)Opw;1DX2NH~O2 zX;LVrKFu>LhXNVeS{1nXZ42r}+E@due*3JBKXQOw&(A&_XnA713|`JC;lLfrGRks@zV)UeoN4aKgi@#K(g)rAecWYk|5)*6Mv97 zrGW4Y3O}9j#I^*T_=8}2K0HXa-51ti<< z4>HpRf{eRPRFI>gu~du47sVDdn)A&y9UZCO%)-Iw1{!45kFunOq!3h zhC1+D4>zoCYVi~&L-w$)8NC#nu7>7kyO`( zbH8(EGgfC4ce0t|!^WDW2sV|euJ=E9%bm?Qoy{UAn-(87JdepMtt!=Z{`Y@zXER=B zv)IX|&4&$77xHYPsjjns{kuDx2|Al4PBt+gHrB9KD6Klx^~s;!bZ1k~Y!>-@+6=H+ zRD!2v+h53@cV{zEXXD~&vwYa7o|bKY_k|PgY$oY!Ts&>A4;$6fvhBY;|Drpa$vPVs zPn+k%M)kC8d-l{>cQ#XWHZGoau@4*7)3WU+-g?iS%~YL@i>JkX*r=YCZU6Yp^X_b> zF`Fg+o@Rp0k`g>E7QgSgH!Lb;jp}K!_~0w&+}X_3*|>Px zA|E!Yr^VtQo&1Iyn~``ujd54uxw%GrZeEnD@_bejZMx-W?x3@ZpztUibfyNXdJ$fI zY~J7wI=cu8|ItAkHBi-Oh>rjCdUw#qA}G8?2W{3sRSzK===`ob=$s-bd_xDFuYs!m zK(zjem)t?;7D3?&I_N?TRJA_Qzm8qu1{$R%JY8vRvU4zciQV$X)LaFv#d2e3;Rzh! zMG>qJ^5}@Kc^NSuNoukG0BiW`l06;$J;_XWU+-Y_7CT_5K37B5mZ1*n$Ht)~hF;y& zhanixWcb<3puC(V8SwIwU{FbTLxh{Wo?)P{nv2?U)wbNx&=~Bhz}tv*dr}>H&2_1s zp26r5O|FT#5s+&S27?{67em8!xaz^5Ditq9i9w+nK&4c?{#03z%F0Qptelj}${od` z?Yx1C++ftSixQ~-lXXzZKKv!Yiipd$4pgFSlUBA8yAX?IO}YV3f*T8U<`0BXXO31U zoQtAPKlW>-#n`ul4i##&4OF2#f@D@8JQ^B}NrnQB*r$Wnxqw^o&jUP{_})n#!!vkX1xF0j!N+!`8$1o$Eq9I;Rf2o>X8z&RdT7OPU#ax^r87V|RJ)xG=s;R^kBlZD2kJy%1s z`{9dh^kVnVVclOEjb9FY&hB4}0TSX;dw1{NRA;{buh)c`gt*i$480dd@3cjLih?&5 zVv&qKyu@7z#f_w;ke@cD`ZMVy!iyG|jcEwX8v)EMO~a=+uvjx15l`{5M7DPbZ&dOQ z50@GsbjP?|R)fHDBRoAV0t7?lwQu0EI9aX6zC=K5?8~GEqr2=Fp%LgWjFEv*yw)fq zFE3Qfkj9J5#XxK2WJt4eGNf6#qoEqQgxBac^=E{|zoIdn0O(CZ2Tq%-A)h0p&E`xf zN@C@tBvwvJV&ynsUOEtg?q=IXh(utB&1iGFF!5)Vv8yg;SKZp3>4CNu+G|yefqAyV zJ2EgF>&xlw21Be65Pcc;0B!A|VzPi>g%m6Yr-yQs0NXG8#tuVuVmD63#xCiaVQ4Y8}5)hG$@xbSkruDOLPNQ&M6~ zxs#)-q{JVHPND}Cxh&VVHjF%sZP^bi_qg62aCHNJr;R8Q(V1l-`j{rcQfw`;n%>dZ zvtuD_%)hZ51_V+1CEkrgr_>RxN5Vz^sL%g~=ook%Q|_$&qd~_(JFd`JwmU$(L!mJM zZ-B72kk{I_kk^F9^KEf(-Rtqb<{_j}kd*sj2wEN^uIq&CwUG4V$3FPYhFzNzI|RK& z(C3NsR|RE2ta)0PjqrY=R^t}jIUQzR6Og_pO5ZBZb5OcV9PcK0cdEQ@DxkcVXjZNt z;{B^YWDOh8G;1L)j@J_MH$y`hY7A*QK()fLf}cX-xE4~NV2HO@ST?n+6bk1 z3~r2}PlO)DzAD_Yat8Fn1>S(a2uGq-SeyGXucy;$KIMNO(r%>~%)k@mip%_LXh|wLO@{Y=dF;mxly;N97|MS}`$-*OlNeh;WLFo7uz-gZ ztTL9R2^RR=R(KGDsK5$eq!kpi{{LToipON7c!=5C${zdnZ4X}faMPaJ$M1>Kyt&W4 z$zkYeBz}Lz_g5ov7`_ULL(5L2u}BAz_>prr5?1#xeA?|bAkaXdfj|R+1_BKP z8VEEHXduu)pn*UGfd&E%WB%Xz;gdZ7=MPwUUn0-{HzV=K zVAmn>KI>bN_ye~)ka%u?FA~rDdH&DK&5t1QdcYG%ygqOSNnH<^jr`UD49Hv4spNtF^dWwek7Kmp#aGOe5B+yKgy6EgUSTcE7cXPQ%Q%fh zK^BA;B^YE|UwZEWUdKNm>IZ8H&`EFv8VEEHXduu)pn*UGfd&E%1R4l55NIIKz?WJB zJdfw8FVAuDTAp|JVZ)I%XXbbMJlE$iGPFR zzpH_3u)UFYY?|1z&fkdgHuZ9jdWwjRWGof?ks0MP(J67JE36;;ne8Yk+rT}I&k=SB zIrd6&$-npyum%1sL#Y+V@&2#LE--&Gm#6N literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xlsb b/pandas/tests/io/data/excel/blank_with_header.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..3c241513d221abd0276082638a7e20f1a0c00a6a GIT binary patch literal 9129 zcmeHMWmr^e*B%OmYA=02crNFaWF)9F^nH0Dx^Q z0DugDi>@zkZwH6k!S89gIY6BZd0lN`^y!%B?5O~B;vro(kw{;_Ae%#IK5tN<S)>cXWq5WPkF;Ph>=|Nf$Lupg!Eck$nL48(TWv0vb zWlRPB%g`-Kih9+ur})5(x=A^93#OsC<^bOt%~wIsS6Nw0LX2K>x^Z!<7T%c>on>$}N~f3y+|MhTajqRekez-lVxs4OQwb=Hj60BQ$~&#-e3LL-9#J*pu3- zHCH8Jaq@&1I~ga1!Q`OvU**T$maFgK?xt{TWuxbpL`0zs#ez(TC*sR{U9ri^LIMV3uIrui0dujYTMj$NZA8jB*_Vg43p#GPDmiL)%+C_S)8Zut-kpX?r z5ejqS()QuVp>Q%;SD0#~8W0}UV3>?Yko?X{k&!WS}43SA%B!?{Ep{R}-G zzx9}px=kBWy{!Sgc!_I4bE7**V^>0hyRoGGnNGa|M_`)#t%s@M_cdc9C{L_qsy}Ky z(v%%a#8L0;neQAddudxRcc~|ZtP*aHwUIPLCCo*~udgh8FM}HIN;Y2T!rP>96R=R9Ax3^!vFxN0k~+cFuvbH#l_yy#?;>4hS$`}?vJoRL)Jg!xBuH$RZP1z zGXE>?`k(m5dGL%{$zO;n;qZrzZeeJ-9nmA01wRFz*5qBfPFVLj4u&bq z$;1+m{pd#UrsZ^|W7;51ySnP@PK`nKO+mx~yhRA4#7t|8rn82OKMqaPi}+8knOENCi&p(f*rB~OIG6e)}mlSPm(gJAHz0Hg*}f;u2G z8UPg$Nh}T%V|Az*#CXoaGLAQu4R7WlSQp#~Vg_fTceYxznzdq1!2?VK_*0chB5#_v z0zzg1{W;)L0saoNr&x*T%)Gh+prBR|NWm)!fOYXoD8X?kffJ>+3_3l&o>zHLT?_-( z0;4d!IHjc#ComSUuMVX@#)T?beh4-G5U25!DuYA7Jg&AC^ju>p4~uMQoIG_&noHVf z;|5+4P6g8w6JaJMd3+{hwEQBN(!~UFB9U)ckXE2Pw}O);6bg6ZJNx7R9#7T~NL&ZI zXS9ECoj0-s%cBJe_-g>T0~AX6`QiK@I!%;F(U;X^N#p8rd>DzJnkqyGfGa7fMkuMY zhAFAy_9&^d96`n49Iym*4o%d{R~9WF%E;8ftQ4@weG_`+4| zaET-At(}aQ3=LRtGa=3htDAfy8ER(=K`!wZS_JQj)Yc*2&>*#7`bmr5vctJ9XSo5^ zU8m9r0%A{APKx_gj4rOoDkSi_0v|rxMr($QU?;{$Vp>g%kHMR!r5jEO1q^n|G?4IuT@Gn zk-oV5jRkksjI{ev2|J$wkNXipw;{gQwAWSXu!&zE=N0me&EM}M_#gHWn}yOF z&c!6pmN9n49C9Cro(=1|F?I%?GHoV}pDdtRDeFIM<^x7KSqud>xY{{*asT zZTf^{96dWyrUp0#TaZKtNo!tlvB`mnBmIe?IV)O#cn7wX@mdG1l6+rPQD9HAvBNN} z(p4}&Qj1?~vG4}wV^-uF5b`ER-pFuuvNUmoLNwuyR(2Ln-<3<~Zmnyqtp(KAA0Y#k zGoB%ZA&}uk9ni2g$OP?g0$?99t;LEonm)4YBK#c}?goRN0cJ+Gt6<0i6}#20w&j@tbvkad03X-pSjv#dk&i9XmG@4pt&5t^V+iVTu!r!EuS7xzB(AF z)-;sN245Xes%_Mjcb#gWeB{Ifn#o8S*Dk(z!!3sY0g-%ZK1`l$;yD7Cr}BYl zMrSqmZtpn7=t!bT_^pP>{_T%yuGgEzcV->Dm-+KOm^^M+vJj}+9()k5fBYQJ(XXr!56SxOP>V94QIrR`FQv6+SJ_VO_l_DQ+{j zcsAw$7foKq{7DT=E;8>3FZ)Iv>avj$6+f%5X_VJ$cXr?^9zIG5FW<(4R|Q-{pY`EO zk;!)^h>&B8i{{w)CAHI%EdWcxaU5BXEEQ7LJ*-Y#=l1&A z!oO-%pl$PR5B;=1y6c1-QQ7qM0_Jd@gn&@b*-)ju_>e%{A?5zP3m8C%(5&drro~(KB zVgo+P1Y8V@N??P8c(hK@hBy0|60?ZRky2QPjie&*9&g)!a<{f#clDh_Pd40uT3xsC z$>g(47aI?(?dOs)k~}@>-^^a<-(+hcPPrm|w7A!kF&!v<*NEr~QMfPTmv(#U!_DQ0 z9=coa-S+pMSXXgRJwE;tbI!>AH!$;TUHDcoGav*gjPT`oBcFKkfn zATNk?X9}g6a%~>Ej*{5`50*|5^#7tx!%|kiFQQ7`Msjm`JxJN5i5$#1PiNxP##a;0WoV zy`#NmxS|u}86Yz0YE`~^d&=){OUWS+J6z=nW{nSDfTLzYg<<`UAXRO8mH>`UG~HA*P-;m9 z8;yt;IP{UeuK${re*=&WG&LyM$sEbCER*i18h6y`$YLKB=;Zr`tv^<(VW^9tcx0BW zvMTR+Nrp;Sv*pFOO$Sy>Z2^&2Mg3eHnnm9p`1P0Fwusfanirq7pxl0AfLGY!p@W+p zHTT@OUjsNkK-FbxU{GXEz8Q3UILnc!aM=Vn@0Wj{E+gajcP zSbe2jUQCtwlp@Rzv3t8nGtH%@p3ZK2o66E-!Uc zY@9rNcnz#VzjQkyAr8yo&McS7r;|QYVeOhAb8&$V9v`u>B;EJV37ijP7&Awj`Ac(6 zj9zlon)0+QO;~#C^L_9Nv4acq=nOJYoe1&NM%Y=l!ByO~37kz754)tY3Z8b8lQcLu z`ZdE?cffx4vN?wY(nwe2)vkRcb|}lPwl%8g6y{q`;j3@B z^**#hv}6tAb<0|2B8#F{ylo!Z@2cbG+P%9zf=#djIjQKj!V1HXoFQNCT!ie8+t^H| zyl{tseKqXpgfVW+ofx`ii}QU`ielp1MbkEwh`xT{ za10Bc>Zh;tX6R-$P%clB=9XQCkE3hP8bUd&qA%3Uky4s3q~&6;dvf(e+MWE8qA5(i zB3p;7Y5%d$6!>!nMd>r)7ZoxWeQ?t5=+POgW!VgGbF{ql*x_6p1~Lr{s2OR_Sc_ZEIbj2^=Eqnj+p6G!=#?X4wl*ayrU3qw_nQ zwoY@MCyW@)L|KVSuj04M_1Cp@ucg_beH9scmC|gF)br=+{$I=sXS)CYo)^+ISmXYy zeLU-#|Ckq$Gc*DA`x+KsWx}j8->NXOT1Ux2baa!Ia-MZTOr15-k%&?7rRSMb6NDuEU512&yl5XDZ+&w3<@8(Qn`HUDjlP1sX_5Eu z)9>$dAF`J=adf+G2Z6etO@F=7#sG9Ipf`3FKN$J(FZ?2I!(jI=vLA#Y+h`IbDFd-L zQ+Kp?aN;wwcZB|a7V$3^gEXCc^u1dR^yEzwH;#x1yO-IMlU3gFYLq;$Xe_4=v&UQ! zeiKVtd%VBX>-@@4P^w;Zw?*`7<|lbQOmUEoj<`!+LELbga$(G zhp8QC6wM5L%zQ)xY>acn8O0c|RwEDE$q8i~2(X@}CN)8n*CGbJIliS}g!MYb_FgGl zg@=q<;E#tS9~wg~##VP5j&p*)Y?2)nyBY~=%jYf`-j~ti6q&`2d!>dL^BU*~k_81O zYjLbtZPVhLf3AP+&LS&8Q{o0Spf?6qeXtFAyiEI9&}yF^JtYirqwB-r!a7Gv8h5xd zsnT@h*xXD&^Mx>_l_VY=%GmD6&G*?Umo=u`m7R*NMtS z)hQ<*>5$e)-UIlfLz*}^{Ogb3gZQ^ADMr?28U*xT#XqF;TPaI|aHvLV$3mXdB4D*Q zAL-2A=eAarONc9f?+Hy5?6P~bXe+q7zL!b0Ua2`6N#y=<00NK7eCnlv=$nh|@mOD; z6K6qt$pJMZgp>x#tTqnvxz871L{Os#ldwWcmEbnbK6z=+7=qsft3W-tz_iES!+E`U z2%RhXpw?KchwYPo#wzUgWBT;Tq=Q#Hg-c7VC-EZ`R~d^YrITz*qbcnh90-|LiK_2}FAc3NTN$yBOZkdS+BWmT;2um> zn?SuuCJ|F=!0qWhwG4I)f_aC0|lX`6ZibMTfzI_e#@YyvirDkGU*Os zx|T5dqg?0SY4OHaqU)`o^_wF5wQu7#tE0kuH_WUUlP9d*b6R(adzuk96O71(B0Zwn zAo#p*h+fIV6hR1lVtyKr?&0B%$2Oa!JB3C1!45AzJ&2hv+fr}td9jULXbZ`{Iqq;@ zFHNe%qmxSk1rL*I!ga@37dgZ51(xqS?=w&FA-C%1Qv6@1@Uw#apHn!=4fLAtuP00RKdR<8T=@?k7O$T{r~|L2bD8EN0PblXAS-12m)0YwRJid zR>J#J_$T5O6$O>*JVznGM@9K1@reqE${d~p5)q&Ro~`a|`%zI)>A7>1)!$HlBI{5Q zP>Hy6glkA%?mON0CoP96ib^@1i^dV1iJ}rvr~s&a{W$>qBB~93FzR`ij|%wrd+kht z3V3GJGbmIP)Hd~eSrV5}QGQs~dDDsth?+9a0i}@S|3CMDs91mP_|B04fN&(%PyHXN z_+RVukK%Ml;_y%L->WsM^xrFjhD=?5tH>{{jGFh(*P`)HSigq>7BYbV0Hny@G$sI0 J^gVF`{trvud>{Y- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xlsm b/pandas/tests/io/data/excel/blank_with_header.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..5b2ed0055b243b198bbc43d7ea3e7ce5391eda46 GIT binary patch literal 8813 zcmeHMXH-+$)(us9lO~}lRS*b8snU^-bfrq@y@uXt_y7U`0B{*_Ie5n)5DNfE zzykm-0tm286x>`PHm;C+`u^@VUS|A$F3zkaxY*ne0N9xK|7rijEAUExR_g_bxfd48D|)wgT46lgEK(%-BoB3!x6$~L5@v*bLyf=C5&(d z!lA@k9^^1imkB9_QVIpcCGZqUy^K^b#niNfbyMz+>~!=T-Vmrw_V>3(^b5ZQoTveF4H8_proU!ZOHHF7!WfY zsh#?C(x@%%Ah;xDPG-mBR9PCk0P=3<@R%=NMEM4wOAV*czrktLYOrkkfq6u^hFDv8 z%G=l=)DBbNI<&5T#==r2=GN_(x<1tRI)QfXKC zaQx9#g`l{sO+%&avOe>3_Dz~vA zYK1KWryTt%iwwyhSYEMnpzdS1&j`LVJLi1Q51x^4Id%M~E-{gzHRFWMQiSO?HSx}d zXIUypXAo>Zl78ut&D)Hnn=zft+I&(INJoMTffp*IkE9OknnX0+`~;SREwi+9kC zMfQ&m5W`XndAs+n=Pt!W?jzcmKCrd-`j~A5IgAh#U#_NOQHxkU%8KRhxVTaOy0ex^HCdbGgZ+ zE40cE@E6lXvY!OYpvq6*mv42`;X4)>Ksl}wJS%%*Ji~n;hLDzuWSwjrEvSEFAseCK z5!w{mrR8;^`0xvRNdgC6%$qGZeAJ@)st27bNmlhDaTnB~K( z_cJ5hpWa~AicA?kb82gKRS)u5vP;RTzT59cH*8HIp70UH%9~@%-Z#J*-_2Ux^+7ad zAlsr2*k!E_nz;_{Uk#g(jLng6ed;)A9^r3H6BElH6GB9i{3Y#lF>jyoHle_#)zv;< zbS!Mva)*0x^!+HJs_-~ZUp{OV z@3IcMK3+80pzpqO_OnjA#foAAGMLYl%l%$ao4iK%r(l{zq8Kth9*HK46 zYp(8s)Kmm->E&`|cZiPZy&*{}UzXkt0xJ9LG4!!dZw{}MeKRnZR>YOZ3b^U<%w*Y^ z*tD79xoAR061~UjDS;e;Y%f0FoH<75HzU+bqUtO`hmP@K30s->iV?z+%QWR4urK}6 z2@O%h!J*?RA8JyKbNI_uV@8-?>oF9E@jtzxdmvHnkvB_b_nNh-LHV0Vo>oAB50Ztd zJ5ZiOVmfeLk7-fwi+4O(O_OkZvRQ)1>uqADdk<<^P&-AdAFz{8!^i|{W$&k{RjIwI z5Rg-p7+7$Zlsx0_p~Ecp-wAzeS>}>~4*-+{zf090+X1n+aj_Bj@f7?{=MAIVZZkKi zJISYgSs`vNT#djB-U-vS`IVZ$Zr505huo~YdL^=>s=+|?Vi;CeD>xgg?GwnE7lqvh zDdIGVt;9K^*;i4kqkpMBiVvKeh*i828gw?l$mab89T``waD{!zW9ErjRX|*~G|QNn zeV}j5sXCB3ji}G+m7fNB4yLk&o;=<%>&fR~rayS~ z&E~k8h&z<}z1S>RvvCHK6%)1`FGTjWlh{}T1BHsE9dG9(dH8;?SqpOL~|N+f(@1~V6*v?QSY^g z9mvo&HIm!Tczw#`j*sUN{_}C6s;PDFzy??$mc{%9&B|*^d0)QWs!Sib7Phe{^vShB zr?Tw;`}7M(u1SGb`G}fLts-f^^knunaKbh?a0m&6_K0R}e;M{^nt^QNTki~U1f8ip zG4kEPrxs$kosB{lpN6R2jMWgE;frBf7;T5{;c4p69D|ThO5%X(!qBsihkAP-57QT8 zU*Bi>202+B+38~8l_fsZ3$W?&lr4B|oF?v!gDq&XW^=aS9>( z%Oi;bMJM|k1Bunrr>AXw9NRA8%1Q^&TZ$zn9|)MX2_V8T9vob39vw7Y6nb`eWZ^!9 zYlTEi?<&P@ZJNnEl6~*U?r`In7tQ)wGeucGv?p8vf@|`qALn{sI&U%Fv4B{{RuvV3S87o#RLeO#5`|Y8 zGgBEC;%G=iEzs#Nu#RY9Avg*M9`9r3d}V%JZVA3p z|BGJciksUGqTM>WF-$12YnC!lJp!Gi{T80cEe}zJJ;?*CKHk~jY^uK5a=Hjf5qCvn z+_=Krc&dBmTyNv&kuXh=paH&GtKMTahBLD_w$vXRm$s{%n-DJ3-Gga%UlnW3qmRk6 zR+*#{zY<#ae5U zXo@1We;X@@7-p1E4WUq%_Z!b8Az?IStuczky~CnILnaTCoj2bVNtJyF$BC!nf8TB& zqwe4&wfAkX@CNXR(ULc|+c72O({?NK@O_~8B{l<#;b)ghy*1xGeKlDPSCng-0vnFW z7v!1@#n~p5bn)FW%Abb4M1E*-1m&1qlmOzMQrvAJ>#HO|`rx=SXeCv1G=owk_^yeMEp*}hGs zEtz}&8b2pj;)u?S)AdOP=Fc9Z`<(9jF@4-v)#|evmbA}<)2pLMkM+{uSRcN~uhb9} zC3JK@27l^+*XUiKx;#OaRZL1MoEZa1yw1gYW{mSHK!+$pzqDSpwgU^fU9F1c12I$Q zytweBq@I`!?X!QKuPo?c9DURxn~0^bM;P){LSL>(Fw=bz%o7n3zAof_8hg|^B(B~CzYb%cz#@e{ zs9|}mO;JZ~GoUzswcBxR&PKH^0~-G*qFJccGNu1ASiuY@Kfvv)HbJp#g{m<=$)@#5 z%a#LvGWP5T>fNEo)`xRWUhj%FgPesTbzPZmkZRhVnRd6|yt`51gD@)mgiuo2>n<6p zurtDXMPwq!t9d*{{T^mgS$S8Tq58Ve`{#)-8M-XyC^+0xqxM*)&mL{(p1vy<9ltCS zhE8~Q9LCj0C&ykpQRf|GTJzJ%7vrZ#+8C-f- zbY5M$8(r4u-P_sOSWh<`1Z~XVGwwgQBz14)tM3}+?3^edoDzAIBzhSVPho!D0-jQX zqn=4rkQ%Oghj?Bbx&*Q|u8mvd0>c}F*x5H}@eMN#YvjB^7F`4$s6pAI;8IDMk)&2U zJNOJsYp(nlWeKX1Yz1pDS7)HAan*WcbgulcRR3b2O4ar)-dN@*6qH&?^3}fP@tLf; zg;{wwR$c7KDv~;fTl&Y4FcwnfGXYz~6y8FIz{X%wU+|OWkF#Xvp<;&e9ONzs-HX80 zRzk=$hr`w#mBC&7_A6Ti7x}WXl|_YAN_^^KeAgUZZaNh+v=)kcv2ot#CY%0J|2pa# z2v;Qj+w zeyBbSS6pYla|LIgEMl`^!`bXkm(K`4_heTX*u*#622FbDi(HCbeB z>rAzx)xR!=iB&PVYFR1E3ttOdKC8Xkcke4*k$9qNs}=)9d-tMZ$XI*m9VWqz1p+8N zrRmedoHT*xI7eJws~BcRg8&jkPPH!95^~nILoTOo&et-8ybX)Q#UyTEi+LwJ6a7G9 zem*GR7UyRcl5# zy~8Qrf~#ZDgWKYii*G@cWok!&d(U(7=3Q1JLTyQjRApf_g?&%Zr$IGa;ohj12vwQw z1DZV$XBq;IrxD2AHbz@gf&+DLY>}Fo8)VPtd6zmOIAF8q8Ni^V%4~jC|9E^nP<;qD zQKYAMfY*c7>EpfR$rndFx#8MJD&)jdFa3Q1JqwS&KBh1}U{zRY1a@6>bcp98xw5F^M22Jg+Tc=Z){^|+F)U5>NZXc*nD0^Uf>-ZYQNjDwq-O2WN~rv$t2^BoNciJv+quLl|3uI zfTcULmb6UgBkCWSEhUQnII0Oyh8TT@Aq-mB)v@6s{0*M8ORYL&j(GN=FFqQq74ht< z&%IGDt38d@(hsvsP-R##wvS}g$B*)6R^nJ^yP>}w`Cj@dJT1{{>wbGF?=l6KIDEOF zdz~JiIKP>qw4$FFpv<7)%_z(hNyxBq+7h!!V``nUw(yP8>{3ACt=)W5xFjf<98HZ9 zn+h;Bb+~qSP4?0IiIEhosqLHgD-Q-8Ox=wgl<;!ku}S#wbm7ZB9W5alz8j5-6{)$9 z*Gdt^n!%L(oa{n5H>->MzVTJsbpjL7p9{&mfp2_E`W90=w{SzhRLjY(kPtxZo83)k zaoYC6jh;@3JGmb1dyWX<{?w1tiY<%G~C)nH=fhq<}EC5HA0_a;-(`eZc<<} zU)F9`+MaIiUIJiu_wN+KB#HmYcVVaqOP6z61W`n95g)TeZ8pH**VLf~S=QOijn2=t z0u7fut~sj9=jHf?o(|q{fVl*&xrl7-9KbK_wCT-2$%0Um){s1x%w;)cv5f#Omm*eZ=)og-!1({F7-&Bke zH5xLRfcmo>j*^jt$PDed<|$bTjw;C$XM2Z<=XA`8b4nJ-P_Zd{4#7EHDf=at8k)2s zE<92@uTlclxsiy*P%SKQrJ;l7$p}TF(;@{ZT=6|a zEoxf!K<^X0&o}Ol*Fq|Ip}_rfxSQZgAO};~Qs1beMCZhTcIuW)NwQ0U2!h*@^_cy3 zKdq@XlVR+1f`v03v$fb!VSRn>+2k_Eh8eht>b+F?I_GPrwc@1Go9mc?rm%e;qNsaZ zCNAs0-%b-#8360JfA=lt8c#B@@VbZ3A^QT$Z-Z)RNz<4)#>2m>Xv$wb+{(?<<{uZw ztl7U>RGQiCE>`;JEzJ{h(kUO=mGRU@b;z4!e!;4cZpkQLU~L($JIk3l+QmDO)BJ__ zR+qS&igJEMlA%RGg?Eg7$^(LDkIF_9Np8}mAK)tud-3@jIrXDx85x#V>>h(O#%Y;d zT%HHu@e{Fs$^l(64sfII)w))^P({Zm5x0vrd>dP8X3^Edbq}CL(DH)DkH2N58LRgp zTggq`1Tu#X=Qr5!=i2gkXi1Gg>NCHa(}SntC?4fuwZlrnf_;yaH@X|$!w8QGVNB*~ zu64eb;P7flzVs_}MeXQN748G6%m_l_w^KKY_>lCjOG*%O-ZH%)`1QbRExThipIfl~ z+lKmr%B=VFL!%J41T2an(mkEQ@8cUsHkqJ0{rx`4WP?s;uBIB=g%lET0|pw^!~p|o zSZt?XIeQ0!(*1pPK6Ra_Gxl0*Yrmj+vAP}S2l(K!ITo50iV3WLob*`Od>Ezj`(pZ^ zd+=xfo6@?r`d+|Jx zVgSYsLm3xmu$42|RL95F%EOrRk&`1`A<7NbCx9D>_y1}C!z(ZVF;?#6CW+idy^`rG z=YPi>-P>Y(>h>IY9Y1<-6PV6S&yq9Oal(Ht33J9l_Z=TugPsJW@b9{|*{d{!2hY9> zJ@QGkGE*+8%O%}*I)g0M;1ty<0c{G*93%y3j7@M#9Pl$W8@od%dCo-XtI7(Jdvnp1 z`O>fx;tzq=uiv_J+R&l3FC?($CYV{!7~%>5#U+`@nXMDq&)VoQ&62E6a$ZlpFdOqL zQl^N}xl?|&mf}J!R^P+^+N^NAAouG#v$+##L*lwn$6GS~>A2?6X@s<#1BJ~vNUz``d^*x8KsAq8LR<3eP80rV35djQBcB3hY)T}aY_X>t0Y0qvBdGl9>_ZE51rE2H%0^bfgIf90-XG`e|uEblA=)qfjD*6-jL|!q~EI zzL|v{L$gzyzlC#gA$e>A-09LtJKLuHM5Ilwe|nS!5noqV$N-JMfV5tho8buYiwXjR z*a(oCx?4GVaB+Tr{sYecVMP8i_4t%wwN7rFH@gaLG9S-BIV)fk6t}7WG_AC@p&*jcmFp``+GsdcunN>%fL{HE<6mFMf>KLOpcu_NgL zKBh8%Fu&kv`UnQe0ZpGjtBH#Pwm>czK>XC|B-s1g?#0R6n*HXsqp1|ntlmRb??rad zXmE&4!tF88{m@Ir&&0mgH1eyuJmOjl0A;AfOXd-azV%v_vBrKrJohYH`mJIZ=>c?{ zu$?+r`}hLp8YDzUL%jRH-P%%z?SBMK zSbNxLfLn^Q6-1;xUaHdY1i-AhJklkQe$3(`kt)+7*~eKx9>#bPATd&Mxm2>-MUG~l ztDD4h8>6oHg~1%_sZhJPSQy<@!&q+ZGc(Ch8MmOupiVW93)!bfjD@jGsFCk>VX!f? z%G+)%BxsrWMrH;w9V{yC3do~I>58B869nbQjRIz{%QQ?l@S$srj zAR_43*ooJYLST%bjbR`Ct@kH>MVaT>I?}-#sPvk%wK0MTx;>8_GwM2QR!R#n@StAP zq7=jNe5O=DIdQm%9aycWD4J-bm{7~bR*sY@;UNE5X=qO-Oa5vZ^``-#IOwwr)*0 zy7ms`$TwM#lw0B#V-7?4I3Fg`GWEw8E_37yPjJ{xH<R_M!uK?<5;)h zJ#1>zC)Kng`}rFmZzNm>7Hr21<0#*qQ2_a#hMM=O`Mx ztn`Q4g_2{60hF^V!APAg<{3z>pSc~`M{cx&^O+5!%8}1Vj}??^ZoX9>L!+FUjFP<- zQ71IPJKQt)F+~r7|T=3RaKBz$2B}1*pRA?9}V{c~kt-Lsl7>;As6hWO0i?enxyq z)Xj7(=2?@;i?;r$Nvs7Ga-MpSlezKY#hB-2=stLGj|9$YW3V-C_`u8k4DHnfZ^iVM zr(ZoZ56Ns1UA6plvR0Q3@k}xab9*Pfms=w zI2B%wwcg~OLhR$dQB*Ao3K#=Q;j~0+nfoGnx2e*Ssg|QjaRZ58RH^jBjZWo^)YZ}&XcWpjCzr~G9zjfd*K_mFFJV>M=tT6Bc{kZJ1HPJA5S^l0#p;(x@V!b!BrmYBGBfe=i^5uLhFP!UyBx*-Por;!uQ2d~ z_}qfwmm)e+`8;W^E9PvWfgxMMPELHQWb6E!u^AvVn;srzX=u7(ZlON2Vbg7eW}LNh zI+_hVwpPuZi_%O;2c2%L`GthG+%AhNYPI+j&EAsNLo*m(@n^RJX*^w4%;$^bBSM+8 zNTW9%v~lA~p?XiObWi@VVvJ1qY80`8)oO zykT0-)B?Dw)>lSdZTB8-mwL79=Y4LMlRNAx94xibM;gE~lwwyspC(y?8kUzoR0dYw zurj;xDMdq$xiYS%@xiZo+zsj5cfN_2R3EGjSgn6tmII2`S6MiSHjGVtc$az` zOD1)j&VtI~C4^MMdVe&)^$^crAd`nll^b5d7E%yJlPMPbv5{(Uti0p5Nh}Z^yK08e z6kvp=xcO64c-UH5fjzi>biCg)S$qooyB3SwMQE}8cDLE`IbaGz>h_j7_nU^+71kI# zN7OmvxyJmIB;Ji;K}^N`piB1_@vQXA&;y{;n{~|riiZ)!4W4g0IvQ%phy0ToARGq0 zrxaqQ>nGlug!2o6fDl6XS-c=UIEKLFt{E(`3RyV~D>per>jCznaMCKbr9pM{3X3_c z!JmHr@+_c8RAMx~1=R*NN86GmJx*9SQjWKd)Sso>7J%?koTwRq`DZaX6F~SN@92 zx_ugT`6btOe|%5Ci>6QWcqTzYdeThzPN!Wflp8IW;29>n-3JQ&2WV}#b}@)K(lg`* zc@+x1Y9hTi?Vav96aZWDggqFTAG6}k9Muko-{D5#*Pa^j45_#&j}O(2Hdpek&$vpt zLahDYrNw`-D}4X#%K!JSxF@}~XhrN7H{t~MAF%R6^&zn0JoggrL_&0a26iZutY9guWWD_dZpJlo3{NjC|N%D!f%a79Ghqp({<(opI8;^;9e zb$KQ>XTd|TKjq|pF1ex@Ye6~uGqzUJV@5j~-k`xNtO zeJvnh&?W9m#fQS@YnK;OuJOa5bjx#MoM$N1xiPhNmmJH^_Wn=Qg$Y;Qa}yRTodHZ= zWo9orZG;9{<6+IiX@{Yz__4N*lNA;sC%HB> zi_I~805;^5pz$AX_t4^k3X#=}=-g$6X`CtGV^6>J@dk7+KRVB^@->=Ir5hMOPwTuiXKok&Uzl{O_;Qnh) zU>`@TACrMWRO9Bj2_knRzM&ViGW4i6C|0#6S>j^PX(?c}vo&l-PevtwoUq#r+U@m% zhTp5JB6r{i-t)ELcfXiX3Q-$)SfLwQl1jkv)ZN%sm+ACe9GmQ>zPzqfgx(#vjrG38 z*}jb9ynfYuGEWqsz%ev&Xp&vpG0-w)Hl`vgG);?Aky=Gn&e7>6N7*7HYRv1s5QToy z9ZdGDD|}h4gV_nG3M*A=30L@hvxPb1!K6oVT>(0h*4$?N8o8ICPgsVS;OwU{Re(HL z{|mHTx0zA-28^Gx-kofR#0&;r!}LQZBz-pqTI`H#MFl-qPr z#?5}o!|$Se=Uv#dlH9S25_D84CAp4+0k&;&HJV3mJq*#WnfesLRy*k1I+$)w;{5HsSe>_6?T&ZSo4qPMy;m0eGBq->tuMIXf~h8f z(?xWY6_+U})hjM+m%puBfbKUWl_7Z39>JRgh{~6xi-m@}i>n8hxvT4U2qB8Z|J1t> zP=uyRIjnFKMC@Xp(}wTVLt%H6lXTN9GiVwdU#a=&t-9T@SC-Dsd=zxqFJK3C^4oOc z-`zijQS7&B&n4mck4#yDlVOlx&4!7!q|t!=tu;|5q~hCF7PyvmQIfk|(_H@R<;aP^ z8!?2;mUZ{R4sSwB3)1LgYGRbE0@!(IE<@gxj$t+EQ5$mirrVt*z%k(vjfJLZNfD+B z(F;dgyUADNG;s@ZX7C`PX9!!)o zU1pkn%mAAVv<`G(u~{{HT9jSa`4H(X#gU+uT8`U3=}2hMc&{-BJ8lqe!zTAs0`^9C z>Je7~+N)Rk{3?UhP`~y1m!vO73E~`92)ILJmw?qHGm@v;pJ6@T(GNV9lg*cNUB865 z@T~hWQ5UcFjLC|0OrB~a?@AOVIOX!Uv)a?0GphHJ8Cg>6Ma{&TIg-;@3Z3QE)@EHz ztubvIn>P|IiIr?I4?AoY#24M$LJTyK;maUF>YmNuT33(#UdO=*!0s-!pHrC1CSy zxp{qMT&Ddksp1%9Hpn3&+yz0bguh0(g^Rn@KOv4-vwt;6il%xe9ZAHZ>IFXTa06s; zFrv=5(3-2I%aRYci1FU3OB(Y-%cY-P>ty!VWd^8+#85MR4y_KwXSj&|CRH+{9HFfV z=JQ5jUt_Liidd+PK~>FIQW1j?KUYO1uQO}qowk|&;3Fa<5`|e!XZN~;1&w&>#SjoD zCLVQ{suk(Ml5OJ*_qV==V0k-{ZY(k40lxPDi*e#n%-fF=`Ar)fkwPt4r9zJJQo}#c zR-(UnpO0M5>&mv7$P5|nBZg%HFmKo*TrI4xZYRe(6&VE6nuFL(gKs(IC29*<(Xx6SGb@0~`!AT)^Dq_*Bx9xZT)m$PWbPC|J#Z9zxQ%N^jj}KPH@+~Tt8#{ z97_U1f&Q!F|La(OoH(v~xh`CPdI==^;pJ~K_PU4bGV!MeEkrdOF%FM^i^l7we@*v4 u4FP~|3IO1G^1p8VS1SG0x`gT%>pzKALm3qjX#fDu_YWdi8NL%V;Qs(&aoDl| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/chartsheet.xls b/pandas/tests/io/data/excel/chartsheet.xls new file mode 100644 index 0000000000000000000000000000000000000000..7d027400fbd527d82e7988e3f20a45e23f0936eb GIT binary patch literal 43008 zcmeHw2Y6IP_x|jrT_7Q$g$|nn1V~TlgoMx`A)!kROR|ALNMIA15)z6Eigcw5QltqY zC{m@00#WIFh;%^_P(cMzA^-QxWRkr%HvvAs?|J-v|J;3Mcjvul&di)SbI!E8d;F_1 zSKj%c%2lShS};5IB&P&(w80Jd-c`ok81CofP*i?)!FNE#*Z(02lu*!+jt;C(%ok4k znUOgvj9ta&W_yY;b`7B)8^K%|YnhT@wTv;fH;=K7OCB}M^k2Oqc^S+GD#Jb~UYNli zh=DD^dl^=lm#r*+FUQ~C=ajqodl)Orax!x=BfGo$HAWhS@PaXd)4jppJ^6cSyc^iZ zJk8e}31T(aH=tM8EJ|i%uQ?f7F*8eMNo)|a;{BgPQI8El85`!6)J0QL6N^H52eTn8 zotN6zU;y=Q5@;dyvVSVYH;`A+JV<(F^2$`0^vXY#qWM!PT0Diqz`~2HE!8FTDHIG8 zMQehAm3S(Tl274bWEG!6VSq*!)p8{D5=HX(927-s^@pGcFOo+k=v(0wHS8-f2Ud7&zi1Vvq@5}14I+QSlXS|RwNLh!|f;Oh&) z-zx+!tUPZOLcgI9yfFTS(SJ|~pZ|7wcJO>Fpo!G&^+_T4M}^=Y7lLms1Q+!!n6D^L zsRHHM$oXKDCCWLkOquzm=9gmjqFu`5;faZRYwoSd>_vGTU_p4jeg^@+q<+>d1otfj zucwQ{nhRWGPfAKWPydA;I!XLAi^bQP7`yR$VNfaf2;5!bCGv3-V_9$zf<6z|te;~eAk$jvbo>xwdel96f#=Vp~Zl+cwA&5rc!VR?z4 z#^8~)$df}^iQCj`yi%TxCjsSME!eq3LrfWpAInhTL||ktu=95KMz)^&68MMG@Vv%? z35evm5{SX!bL2n;d9EDnpEm)S)aS~9+T^(skkUR^0&0}!N9#d67noef!#O!Bp)Ky?Zh9dm)) z`jd5oJ%B^B%W@ZjGFJ08^#K0CSVT0ioze?(08H%=0O+af~sBry4 z-M@doK%v6J!wVG37L$b{EVoQd7RnYYSg5+HLj93Zh`99%Wh<4xCn;@jY=}aM-bm6f zP$-#MPL?50QUz;PFRx~#|8(i9^y<~C^O#5-uobP$%oclgacetf7>Xe}Ce;jWrPhIh zwZPmtS81pg1%@-SW9H;!7pTRSEn5mGjV&f?K^S=RODc^mR&dCK`M6CjNDow^%}{!1 z`eSn>u+KBE1!_TZr|Q`nHIlo;L~qnHxt>UsZ?_RC*vOJe;dYA##`#PNl#XO<$1I7q zr7LJbFa$Hz2*41k<6Z1o8$>Q;{uoS+#L;^4{ zXXa#YNj!h`evSiuCvSsMKrLFEplr%$vk5Mkjb{C z$jwHvBwoDzP(Par+H4vY%qF;qY{amZ3$27D@!C&c>u2Lm*@PCYX*H2e=rd?qcGQQ5 zzthjAqBa{HP4g-u8(U4wj{5!BM*VCmX|vJMv^qs(W2;6p@Xsre#MRzVe}dHq|Jb zhDB?d3E4D!22BeNnX#^rwyCboMn}`Ui^#@S(}F{O-8@CVHZ`=_=xCZx5!u*kT5!mb zBS-YJsj1CIN7H()X&C4n~jd9g%**G zt)>NstUEqKFB@m(iAgNW^P4R-ezSUxQ=z_+vDVf@f+;ylT)Do|nR(?ig+^(adT5y1 zY7t|1GlKM)dgn8R{%D!j(lE8v8OCTWfqQjrNo(aZg|=v!*3~ez)euy9AJb=AJD(|Z zL(9}p!_-zE80)+Ej6Tyk`Ane+TBbo7rnarm*ocko^q4Bl2i}!MniJ+IP1G%KsG4KP zBC*_zWloMCm26H=N==!gOjI*8y62Q);xJM? z8-*iDW(>WOSSp5Kw2nf)mzdIWmdt>bmv{z_3_C=)N$VK~5ZlHSRjh<6HrMEi(-hc^ zXf?zVKg`t1lAJt8$<&mqVophvYX}B|@oX4|hOO`|886#X>8g(`6|Fxx=9NmsWT`|< zmP*9tV$rsKmR-&q#iTBZgB_~g3L+Vfml*|MjA(tc96{@&p>@RR#(Y{6>kCiNKQE16 zmOawc)}*n|QIKX7PJ&ta*f*Yy%S+*(<%AUeniNhs&IMAa^QyoCFal?7#*RofraAHEo0N?DDBj&j16+!SQ|DLhw-TGQ+WHQq9EfS^X@3lSm*;SUdh&{#?Do*5~v%N zXWFHvLQ$Kk`5LO^*t2#JTB=4>X+>P}LS~XzX`Cj*45GcooRE|<*o3))rl59ikZgMt zh;%;*1s#k6id_g|=Y$T^y8BSUj#SYJB@>2luCX*G88*ye`?So<=r9-duMp-r;VZUZ zMMa2th%Iy2KrQpKI?RO~EQGlj8W=hAP+R7(g&KokS5AjH*+bD{DzVHpmShdkkgP+> zh*405QR*f$mPdU$K(ZsCi`3}(-9HEG{<6_%bKq}we>V(}V3(qjQifR)a{GTLO<^j* zF40lL6!d-zn%vd`*yU+so-LBrhnBb8PRd}dW7DIOoQJ2-gkY#6Rx|JIUVwEd>1HQsK=k=Gy)L%NM zrd#GHgVa2XCDC7qM>-0{P(NvTX`x!0G+JcNH?$%qO`3>FlO|$wjip!F1_j&P*5s3gRJAz)`6UrCSrQSGB@r<)m`$@BAUkh$5gZ)Q5V5GUHE-fi zA|q98MylF9DLom|3R0)Cb4BxXhjyf+;YeNuwcS80nsXMZ>7)UgwFkS(oCOJ7w7xx8 zD%JV|$z9qJQCk@!6)VHSV0c(Bvo(cGmN2j3>Vg_e!BY1^4pu0#v|4&*m7F-2@C@Yz zHp6C-WEN^y2Cwd5GKkl<8;U%Re~U)l$rz4;-UUO7No+LNpnMA3!@3RGKz=CsWz^~$ zzC-K?4{Wxk2N7~Cp5*Sxx(%^d(t{Q$mDnPiQV2yB`zb4NxO=6|JxVtnifuqEY!U~m z?JMF`C8aUowcns1FxKFQ((ErdCECcL0B9`2xEu@x6jlqH+>j=VSKJZsCHHdA{WP+- zlwQtn+1zoNZe+2Y?Ci^<)C7C5TbEq-LXfN)*q6g+-&Uz9=~#J+>ppITC9S{z*x||V z%|6!oV4z!@hwcBGe8aEfk}VNRty_~moAKF-@%uhs=dTE{&Ts5FZDN&5&CRP_7yP>Olifaz zUd`$-ZBf_NFWnq5IBHASj9J#2vsRt_qp|T=gVD9K9%q&QVpNxJD&3woyYclStOrc(lO#+N=+EhBxf|UdL9-9O7B^Le<+BVm@;$xl5U{>Zgb6!k%n;{n6EV z-?ltyTE3Ay-F-?aiEYaY&{s5 z{dUa$%=dk7w6RZVG%qUY+d=QF4Y=NI&#bkU^`~nb|7iQ8vyV}gf)yP$?k)GpsNib| zv}Ke_9xVDlc0LDFE0l{CaTJG$gSlUu~qjCaP&I^QUMmrt8PTaIno^?i}BPAc>Bi_gzzU?bq$Aed?Qh^|J~$Z;h>V z1US5S`yHPu%MDe2Jy|o})N=8Ysik+E^sn3H zo?YA7?%PYNSpL(d}#msMh8bA8N z!zr!rDP7#J?+ib9VDGM9Zq*rce4F#gDPgl_e%f&TpYO~&^5F0*l@0`#{rH7R@xKqx z9y`Um-~EB{hfdV}{*CVsC%zMVeyZEFjDWUf)`nG^dBp9djMXPE_x4Hcwtf2Q*ix;I zzSOb7Ht*2Up>^v`Sum~kjY}m~jH>yd@&~6{OsJhP)6?VXt2^V4XFfUnMxFHVnDy^0;sPC1Z~L5P!9S&zZPB`^Pkm-n`=FbIZ%e4cI@w z>PKyloqfM$-A}emdf$C?-_`3^$4!pxcQSZ%9n3` z=QH&c-<_x3|>|pXBwGOFYj*h(vv^#JW3rL zFkneoj^&e&ds>d%eW!8YjY~Pd|91Iu=GDi)-FE#cZo=G@J<}`Jesb}vxKI9kJkxdE ztL46XckG{cH;j2dZqMT%H-u-tc}lsa$*$?|*A07@v<`25 zXikL3_oLUQ96N5Tk?l6>YL$OAyjrew@52te8-;D#{vfi&n}Ef zI6UK2Sgk`_hRt_wQZCtd&zeKSOxHp?EOUM@;^@l{+S^Z9xI>w=!S6!)vav30`>#p4 zTCQq|j~!N&zMm207jmgpw`rd9kG6PiZPL#f-mNMgxisB1x?kV+Kc{Ea{pD*f-zlAL z&N?{GWli`L=j)?in*XI|)g!eQZcqQw>u^-U>(PrmUMpL1Tf1w=4_&HQPU-S(^`Kjm z#w^Tub?va9?Or>#^Uo6d-`e$6jQvcH9gF7OXI=J2?Kn99Vc^zlA2(VVKc#=~#QE)d zj=p%~Q03}xBn^D|L5suXm#!#t_|o-1OFO!}^+Ln1I)p188}68U4GfDI{oeZ9l}Zhn z+dXdUBF~4@Hb-uI)o$A>F=MvP8=L4e!!2&jBepx`>)kFrzFgSR_11_%Mpxy-Ot*l} zE31_}Znl5eXJb2$Ekoz;bpLVe(vin9EAII0$|udY4e58~6Z^{JlPb5r^KQ1&@Kfzh ze{FJI&|;F~!8cP{9D5vi03eN=@@x?+s0Ezq?__t)YMAWc1qllYI+_v3)$=hr5+YkK2E$g&}KKOY?BbN5K)PydXJ$*^yJ zVtUifYrp+4h-%7>zi{TvU@lk=(Hef z+UxDv=_ z*n67n4>lh1RlB1x>o!!pbmq(5onzh)dAEY$_@Dq!)5G-Wk^Nt4K5A#^Y5yxpuTLqH z(y+wv$``LpbE`4syA$hr21H)p^2w$f{vUl`x7+uRO&?mToVieQ(8wNflfSX1EdO9( z-640Do_&0@XXVJmsdILQzcMXo{2OJbR)6R1oo+5yJ~|eA|JPUcc1-NBeU_j1fzwx- z-dc6u*meA!y7h!<=YdJ{Ks@$eSFiD^ZxCQ zyZx3PGiSAn-_Bp?xMo5B?Q&w)vTe9i72ch8S8ZLO7_@N2#*60;VxmhP2rNJ4;iNH2 zYU-(-ZnJA&oO*oYX}c-kHgIVe?%n0iU6)P1ivO$TPZlQan!of&x%l8qK||J8QJUU* zbL67^!?)j=5z*DX^R)BL5;hN`K{;5cUykG>v;QzqcWD?TWlErsrQ~iZZDl-mEVe8l(Y45<0{hz2h?=$|D;RH zwDT`J_^ddbeZ_0>`Y8LOeRu9WbUCl;2BRO2&=fYR1)0sIGscbWq-LNU=H;gjggk8q zecFWDVN-s>${0`DHiB0CT29(AOg^WxdYt%(M$1F11o=G9V{yl}G)1UHY$pdg zD(~VPK4TRSCi6+p6#iZt6P84N-3+f|^WBF}WI)Wm53g(U{T=KF&_&ZJ1Fa$83gOqO zaQO)Yyzwk6))$v(%FPqxzBQ+%Cw>{+AbVzXqvKrSIv)Ok@%x-#4IzM*BOhA4OK>A* zBoPA(cB8zFjP5(RIkSEH_90HsYrC_O;DgNu#07N;7R|aU&IpXQO!y_^PFxC%ty1O~ zR%yyQ?V`){7m;1_aExkAqb=4ntXqe92Kfhink*^tskA;A=GmirTffGhrnGc(N`g5# zHN_IR>@Y=XmfIy=b*ts zo&hbI2WY60Q%qyRJexEKZs2K(H;)MOOvK7Nz78Le5ayYdGT75(P9BW<#HU-Innn89 zAWoA!YIt;Ng2*OtL<&gy~SUPL!sFVaz>f{vSVM$E1#HUB4 zrRP=!f@{$Yt=0{#g7q_gg`&0X(AuPX7Q9FoNCUdK!CI3Ffs)n}aKpuodY8d44mZm& zzTAMBB~F)9N=i?#k*CRnDPM zDNflR?|6XLjXGXdZeYxWR{~;E1!=8;E*Yl>3nT2a7LVmgmp@EyPBl>q*G=1!U#>fmxO&Awz6+JpFy<7UYWDAdsp#Dky z5g!Sb#IBqM-8&buq$I?et!BU`Z1KP(T2lOabjxjFqy;6FFt0aDv61*m>~=+p{eyo? z?8Lh*cpF$tx3UoLG6XL)$qHyieFWP^q+_Z#5Zc3tti-+QJOu#NMJZ~!)l*?bMQR#q zxbefD2KdDSgeE*`&KRjnMdpEbwHLF85)q1}UX5>)>LeM;IC(>CJi%xT)fkPy%>&D{ zn9~(?|2r!7pYmfVspQ0jiv5@UQ?PVIYX8(o!v6vPv`UFnN!!4LwA=kdr2RiI1X!(% zHQL(CdGo)_NNbUqg(B=)zENM10#MYg&4gIW3(GBZiEgG<-~t z!CPW-Dn?~EXpl4_Eh$al7#OBnU-UGMvC=RtZR9Ak)q>GkN<3gOKFf`?N=KXz_Mp>6 zTWC|-ix~V-aR!BX;ysDKj|%fl!4Q>(tJb7p7^dK238Y zJ}uV+36I6y(7)-h$hgu)PKBI9u4@+{fB$DW2P!X>v7j^fGOO4b*iKgdyUsvJyVx0s zX=2fx{+R~<#m;~x>&4FCpW+P4^Qpa!AISYm?w~3bAQHG4_H^TuRDREuFDlU2$CwNf zUH+m1p$?cN(t6diEhT-fye2vnYt+APU3O80AbDwsuH1B!V zOHPt$Ub7iXGxXdy&1NnjxZxYsmTG1@Nxp`OG(DI|YvwfTsic}Di`6_ko{DBXbde;K zTuIo>!f`Yye>G3eB;_nft`n_NPQm$cHo0$KX6^6)p4~3Q{?5ShzcLG=S#{pD2uAYx zmc!~m%|t^kRhqMYkL6!LwwGEOzp{%*Qb$?^{zY5$`}*g>KH<`5f`1U@1! z9%}QoA^l}N{0?RyJ+V%gv}m%=laoacxGrx9;g zIKp?na#LOBh%nu9D~j?GrcCg;#c8m)hU<%Ly6$i!-?1|GGe_w37-J&UJ&p)>B1-rR zM}$NKpI8j1=FAaq z9ub@cQi~&kvkRxG%@M)5G)HJ)A zhje*Gkbg8VveN2{5aAzx2-*Mc{Fv?BwhRB*EI6P{hVe6w2>&=c@|~Ll+a2eK@Q)1PY>c;k58-)*e{|}2cZFZNK!krh{PBkPt^))j{9~X;`ow*e z1R`o5^ySXS8@dS2!aur>Idk!19g(~6k2gjydvE>XEq zhf^;xHW2=?QoU^*sw9iL3;$R)YGd@OA!m3h;UCu-&t_EZA;uWOKduk|HSUEbCpeAp zkCP@G*j=T`mmCrPanrvZ$4@MIlq14FE^9IAU$gIj$`RooQz!2^H~-K591;HU>$=y< zu>QL_BK+g@J{eiNYkkBK;UDL2$W$)wdXFQ*KQ3EctxLn2%Q+(aW291YUFmX@IAY=o zWU6=IJ3Ftbdr5zUe~c*ip~rfMWgHRyF{bf@>0jSm!x7;h%kQbq_BVK+Bf>wH-;h%I z?2auQ5&m&>*wDS%-9F-o@Q)J~7-voz{xL^{e;g5HjQrer2Sf#2@YM?br+`*{_%KOk2QC?2t@eD^j7DiYCjRFgnzt#sNc`c1`0&@ z$4~3*^{O^eKh_+#KIe_v zAM#YfKPE4%z3a7!LK?z9uKNA6$f)eioJRP^1sC3KZmPP8Bf>w1hn&r+G=Duugn#rL zHX-wLMIm|NAIGFQ2ToeElG6zP82{F;JN09hb42(@_qnf}O!iyE5#b-%VyC)COU&bl z@Q=nwC3s54bdCuBcy)H9YpD$rIU@Yy>N#HXFRx4Ci13e1E<}xVU(zm|v?YS;&*@HK zr{<1IM`<7jtKww>8#fE{}uWcM}&X0-|0Wx zyh0$tKXzvKc3ip#Q0h_AywfYuYG>&^MOOAa2nwsv)6ubt@$gW zErfsUbL!C(*P$7lM)=2N3objSJ{-dl;U7mlc+`FAJ}XCrf4n`d`h?|KNIXYwx}WU7dHPU}2>&?s%~Spzol^Faf3#yQ z@Q7PJE)VRzt<+PU)!|lqCZ3QB9F65YhJ#os__3WHOtitPvS^c?ktpYsD&p78S^zv#GU z>)vNSM%e=;!>9mlO&7_77Kic22u9(XjVL__4Xzxch!!JyEoP^Kku0=y)TUa@Q3s>3 zjFyf@Gg^$sGg^!^L5tCtMvKv?MvIXqX)%(O7NfC^79$C2F&f`!F&g1$F&g7&G14C` zM&leUMtw$$(O5@|(P&4D(RfFT(TGQj(U?bz(Wpm@(YQy8(a1-O(b&fpd(8VUqd(cf zC1M7|NdG3la?q!^=ZOEDpuQktM2*Y99=o&FAllc`ReYAKfq(L8S%EoFYSLKZB;k)r(g7 zU74ICC^+bWj>cM|8GoS0hHmC5;tg7dd3V(PY3!Jl~raT75)RZ(ED^N7iNQGa&L zTh}9r&@o_FCTB7V?0jAVY8dM5RPF)1D7cB3oaE30;woaY0w9hU$cbrY=gQ>#N1;2m z4MSz7N}4$_<>AWY)JVY&n2MO}1||5NR}wc7ld~oTo=8PZ28H+~aL%}in4C~4_}3>D zFj64AdfXG>)f`8!2OF%k81`@?#DV!KL5t9=}1%I=kA|}g86}9KY zxQUpYODfPu6)~Aqs^|z#jGKtbX{LfFe^ta}Ye9TP5aT9dat5lf>S|&#!XVBX$vNXD zVsdgye;}mdOx791CMze#O~mB9Rl&Ju6)~A@s-VJ&aT75)rB#?IpZFYW%^xyJHdbRT zSQm`pw6CB72vG##WlKrp5)(4C^%aD9=wO~Yn3oRbt%KFl!D{PZG)LB!#YYFDd9s$S zo(@KHWi6er4(6wW`RiZ-I#{3%M)PKEzBF^zVj((Ms1DXZ2WzN<(HvTv2hF0j7|o-# z7|o=$STh}rX46_anon!77CIQsscq@V1ek!KhYU>PN)IJROs?7}&_q=aA;knSOXtM6 ziI`l_QJANim^2o|%LOrRA|}^*6g;b-;!LUx;)qe4Gj1X#mw^=KttKWNr;h5(iE$G# zxk7~BuTT?{@`H2MXikiqM4Y>L{t!G=sG{XLF>WFzm+=(*9%>#jaVHxjD=HFz0Ek@WQ}E+ac?pO+87P!J zB8zj!O~m8^pn^xVRK#StKs-wj<0fKqZBW5sCKWN6G!S1A#JGu=TsBnj_joE|vUOBR zcU}zoYfe`tR}>Ze{*j89j378?P2|M5iI`k$RPYp=ikPe;h>r_m+(b;SLn?TxO+`#* z6U3%ToHK4BCYLA`{1TChnCvNtM+jovL`<$;D)?O?6)_oFs$eC~88;D=3!4gt0xDv% zz~JmQnG@qCVsgz>!7(uvF_~r%4-~|>iI`jtRaitmF^xS!F@Z_OlgR~NVthgG+HYD~ z8fQcxQ(J#PSW6u&QU`0LgSFPd+UQ_yb+C3iSbH6;gAUeF2kWGRVX9l8zR@~ZXB{j? z2aDCgy69kCb+B$aSa%()hYr?L2kWJSy`Y2j*1`JdV10G4emYox9c+LO7N^0;Y()Tr z(r*?@4<$!TuJ|joP$3-=8AVb|S0=VDEAuV|0w8ijK*6tZsuGaay5{aS0)c!Py!GDky}g( zelAayfNY#Aljkug0SJJ|9Vi9+A*uvq09~0pu0aVv07Pz3DXgbD0a-^#J@+&RB>({s zxqqeLX8=_>km;0__TcRR0w8jGOJOgl6Odh%l@q{`p%3|2N2lswYbOBk;MZWKA{%T&W<*}N?BJ$L=92tReLTsb ziWv*kiR5wRufzw)8)+wkS5wdh}RyU02bKfloJtmMIBV%NSF8^BC*6QKcMH4UB z{L2du&Ke@XkGU=A*Bjq+o0X8_t5^cX5-65Hu>^`GP%MFB2^349SOUcoD3(C61d1i_ z|Gfl?n*STmd~s%pe^s}+^D+PTeYA<@|KB2%K%n`5ECTIz(sE}-gam|A2+0UEw@*i) zdH;9>cZ4Yjv>q@Wfz}7sBiOD7U|E1)V(#w7c@aQ7>C%4IKqi!))+5B9q42NbpHi4J z?h;2@X`qz7Qmw<%=r2>!_?iz@fNoJ@o+2LxQhXIlpjZOM5-65Hu>^`GP%MFB2^349 zSOUcoD3(C61pcQaK+|BFL(`m_X3Gu;basNK#k8kSbA3AaKr?8Xv(x;&Gy={0X^vk8 zf%g1qo?jlJ0s_tTX+5A4LS=+12vrfPAyh}8X?INo6M_eVCxRD(H$p9h+6Z+Jd=Tm) z)I+F`;EUjgKo123AOs@Nsr+Ds5QI^ak0v(h|dAEmG7sM$8#b*a_n!5%Bi?7s5EG>!qh5F(2~cRCTlA}z7P3i2voq5D<0u0 zaKwJ_jAgbgWB3D?gCsM+Q)b1$5b z%7LElkI(-z0{@He{J*gt5j!E!O)7gR z2`;8U%8yO$k1f>YS(oi$PGz9BIGTW8r7mK#Q4=z0O0enX;K1M)z(gE0q((}tVPfN6 z@J#JFXTMeSeMZIp5U2s7yT|FI1){Nz;JBJ?JW#=X9lDMZ955)4SIV6f5KdU=17Q8X z?-L#iy68H& zSU%NGoAMxId4y$ogIi<6Y&(1R=HvJF&BOuQDC&s@t(Fs;)9&rj*}ig~L}RC`x8shC z9*3MZ46l*Tj4Z({=n*pFFPg2rP@NM|E} zWT|%0&#gR3+Qr4sugPnW!^uaY=|W}@Mhc$L)bcepqEA(6i`%hE@oCZ`l1aIG#-xBc zKAc5+_<|SoN-}>D4W>K45v=O8W%2RcN;jdd%fnVj)6p|GXlF2pQ9cCbW0#J1(nl0Q zK>}0t(V>g8o=6&!D7*ksd)=Yf#P6>VabwVir3fzpCO1C@f z58b9Bp}F|?p@z7EviM7%C^4wjbla`>#^KHjdU%=9zf=3>127E#Z8qL!qQUI|T9O2K zV*}m*r7|?rw|8_fGd6Z~p#O8j_=f?pFfuwY+1-}bv(xhpun`N^5u>9B_AHbjBo#&^ z?a@-DYlI2r9)&R1yBH-96DHGL>@hQBXtYAiNvyi41lkS5;=xwLg2Pr`!otP^!@|KK z1DiiLcqWKBKbZ2O)6TYEGWTH(rVs*p~4(E9%xWqcB} zva$-6?>R0Kgb=nUGMK))$R|NCEhr-~78x*PbmbKYOcoJ)!6I2u=4}*LJ3;wLra^Kv ztmdsfuv`b3Aa|a<@?wBALpwc>%#QWLRrV0 zn%D}cMYZ+l%?f0c2Nra%q!@x)s?cO|7tQ9Rr)JMN7z+-aktkhnN!we*?uXaLUj$TP zb3@VujPOHk;{k^|%BFWQoqiA{~Q~r5EmG`-L5?&%vUwS>>qu z15D7Cn9)KQwm(xivtRB*nY*kqm~PW!(qmWQ&Ii@z>?bW5S*vMQLHFvOZkX1tLi0ZA ze|kYikMRX*TT|6K*!+-!)XZ6G9e0+E+!qFGZjDZA@odaXVi;U1h6X_r26ecuK+S!f z9XeVpC7jpyo(HBAHk;pUDxx{mUy61#APVSWGdRIP<(e*M!&z}fmGoV%r11x3oxP^) z$A%8EuG2?5zXL z@vVfFKv#=~KmR&7lz@EJlh@wPMTI;FdpnWJdnO_VqYSP!+85aPPL3?5<6dcIVm~9G zhaZ-2TcZ%2QaNx7Fa+~C8w3&7@N|Wkzz@gNvYy&66Ow2vsKB=V z7sq~M_(M0_l&&sdS`Ant=^3SS_~)OX!0&}Rze)kMRx*jNsn=kF-|zK6$AG#X(oF;k zt{GD{#v)>}NCER`oHZ5%gad=$3wBCU_)>dhJWch}H{5|c*wtWH(1YkeY+57+@S&f6 zP_W|%(`thgA>iMccsZusrEKIUlnq7ob>W*zOT}{q0zdBxfo;CTS?2!vlL2u$-L?s*$9g1&$CG@WXwEYztvlSJ$(qe@eFL=OF*5 z^g8c!_Z4ewD@naDok=sn8(j?Zoca!Du&;~2P#i-L7o<52MRo0s1+2Y)0Mk^_2~@UF zmW;jMJdcIV*8xK+$e@})+k~Z)W57%|fX$XpZ8vZFC4l80fY%|2 zbX;F19V}j}6dphlZ{QNO?({#$e299be9!}Hh3;0#Qqx=wqn5uQ_T(Tg&;00#X|%6( z7KU5f^Jphn0C`cGC)*W#ghj#1n6-fn)i@1A;zFzCMb);hipGNi ztP8feqy5}DfgdJ+kp%#=A+rBi%VU(sMhG6Y)dSuK1N2r-^!+jK120TDK>SGUmN~kY zWxUvwjX1y+{udXxRpHv}xa1I`9Xov2=F3(ZyrTfl-OA$lDw_iGn|Msjgg2}6C*FcJFXN ze=Hoh+kLG#omD^d)&r}ay@*XGj?MHv2q8cTC3VZW0)%XGQ zObZ4-dWo2RP)e)^qM^(Awp+?Bk@idp9Mi1Kh70Fs9&#HVi5=K+2~Y$ykzzbJ_U!pg zi|Ag=7h3oo`3sdf(`r^!JK;UDc3<)(NV2BKlL=GSYqe|(h9r7D$zcEM7JiFJt=I0N z(4|cNpg(`w3GPVeC@@!GmG~)}+MR@%qN0s@5rc zDGP|?lNaEnEy4brh%m>m1MP~-cZ$FTJ0=r?FuI*$Ed0?s=*COHhfk7}T{M8vW5XC& z>}&?XD#`mV{W%b=`sY*0jalV*J%m3+PFtRsueMu|1$G1e8RM)M7RH{P6|8TO--|X# za*Z5n>u#2_lNb7edtB zx5X9d5QmK!gY?O4D#>LxS~#~EV=sm=cd^CyGuiWZHS(q@2SUl(6Em@9`MymIp?RR@ zO?T8O@a0}4OJqUh$47m&u#z-8IjJp1GN65m_b?50_WWkrKmP6^>4rBfFp!?hTlDR5 zSCf6@fCx3)KB|?2z+ui!6f$sylOP?A8+KYfGcP!y*=&y%&yezLL86 zaNqn=>1_aD(UWS2YcpZCpR6jsFJQ|-3!V?@FH8bqK`8MKO2#J+O9sAUN7n@44X zM!=c(!F~%(BkmhORgz7p-d+bUYX^3Fe!5LbUKy(jR`k zTwQ;A9m8BZ=l6adNPLqhSnG#HeLKtW@wy+WI9EcXrjx$BDKt6CTBp4n9S<@vw>b|YC?tX_?9RcfV zdON$t75VT1WnlC^dl7_yShv7m6ZJH%dP^@=-ap5Okw7(BQ?J|@or^q7_m}RFvyV%7aEq%bRIK;9JQz|ynm@lq5Z7)48)f|+|R7OjF%MO}GD!mT>z;SXLbSzNIs z6C*C7PR!j8TOTrU=}vuFs(?b;E_dw(aKo>Ol{rn{dkGX2P+ai&-63xEna4wdIAV3R zsU7z~eXNsegLP>wZ`$n$KVe)yc+LTVNDULfr zenu~%tUYHyU9hsURvUzy$yoiA_Kl3t%Deu1`bEdg9fxzzfVt~RSXsQ&kgBQf2hJ4> zySvRFkju%y8svzK%CW1dPOEcTF04P^ywqxJ>2>d!p1C5Ab^1dHwwn`&WL5cO>6L2f zCzlFmB5&fwGah?AqCEEkr>nG-Jjrf~qI`8(YLMaHb9$|Oz9#K728${l?E=HF>556X z+-ToF$rlJ8F33P{Zu0K$!C^z`yALpCWnt`Zfe;v`j7KO;sR=<81$ zd9hT@Q#7*Y_P5EWLK)RIP?Sp}(F_zdlC$_^$ti-yWM28OVh+)h)X$VO-w`lq^Srl< z@i7Ew?_pkc?lwZ)2M|G#fQdOG0`5HXU*enHx3y14qYe+f`fEvmG)`o|(9&(F%RLec zR1<^0gY#I&Fm#VmWYOl}@*sC1Zqjhg9f+D~%`cpG=V9;~!|)OYstV>Txf6z$JmKur zJAlfJN(-W%w{Ei^5eJT<(Q^O5la1J9)Db#?x-Eg70@rGhCYNJ>QDGX1h2<0=`2b%Y zCz}j!K*D%W`I)<2!}|;(`2a7wNyRc6=|}C(_h{80Ry&=C^>LS;kp${OqZ`qbG7@IG& zhD|fZN0K&2$g3)h8l`tTy`MNdXjp;h*Q_^TDAbQFMM|ep_GA5Gp)%8Ik&Abyx{3%( zHG(Z!QwR{fg33%1xe_9GONo}4Q)eCnqERWu;uSS)n* z>coq%MxxN7L>MeJ>}JXpQ@fw+=fOXSgNrIGb~I43QHHUdyp=A$)!b?9lWl$f4#vPZ zk3`_X>>vLo#Qgyp^P^!1jB#=Il##!g?~{Ub0`ADyI+AE=SO27YiDeonNB?`sLGZ?3dlzFoTt#d9I- z?(P$~2!Udd{z4=5&OyM*|BzNv3!i)k2bd&zz}5cB*BIIByZiw~|JTl+{#B~-vP}*H zl9#UeTh|pgIGzyH`yw_n<*&uXh%JO0tg-^()<)ds#l>&!hTF=kj(KgZovy*0>71s) z$0>1GlQEVv)E_r*-gkvc<$u>XqkcKHL`BEKmNJ%tBzFtSXzF4wnaC)YE2GO86(a?T z&JQ#U;&ej)Ff=fSNsiB zW>RPCYej@Ny9I~(57BDb!y)$f0kY`92pOx)>8&qD$e=DU`)*ZaSd5{2MuHgO4Yqg5 zk=heDGT`F=Ol9x!=wb(!UAhMB&^{}6!-DRQT0r#QD-uE*V%VwP4gdq^i#rVVy7qQ zN+hZ++7k~+K6?CP1`~-DSH?@Drb7k^;;c#g3aXgLW=+}{$i&ypk~O7ku{(1L7|%x& zw(FuNVqGpgj4*rT?`l2eHrovN;bMfoDc_>I%LrN& z>*F(Y2SY28CGoLVy9=@UDXiW;JJ4?gK@s5DJCxOrk$BqK0*JJ2DvCR zm6rh)G7xzGt&u+ylRxv5KfQBJO^4-vbnkQNw}9bG&5+qO`s6|E&(+LnT%X^`UQM!x z1{%#1b^6NFkjK5P*#f8T_N~#zQCJg}%HpG}1gyTcJ1hqTOz`<=shRR8oI20VCZh5g z#?@5ou#Bj9`EWj-Ehj3mb1plJw$T*l)6u_^+jYKryyky9tm*69vYv9bW$Vl4>5(}) z?c1`ysy}AK+}~gBTejD>G9Ak=yyD$#Q(X zX;MqI5^;RpSh5df{?U>AUOMenT{#hzjS~^GB)grEF8=nWg}tY*-mNfk*M6;*^d6CZkvhi)>jlA3>7C$nl*5%JBWd- zmi2%QMl8W%wVaDAO(zv>J{I|-V%P6I$oFflHW@yuTC-y#Dk}Q;eWEIaS{wz`yQ|tw z2Rha*l^U_Vr-UKgD{LbL3#T8K9sOo&PE9WD=WJTk`szKjYbOo)pjGF*zWAIK@4fEk zmcBojhxv|P4my|rYHokmPRo`UcZ!H8swq1j=I%G5muY!8(%R!fz|b>XznTJM{AP?= z9`XD!!DU^Kbvc7UnXz=gUU#ne?S9i}4AN*N-*GzIF8<5#?oP$AyM|NG>G0M;NHvzXx>Xe5-GkV84q6SsXOUS;X)wye0}nzD?`T!N@P7p2xUk}?EN z4J8-WDG$d`+c~cvDA}^pjH7Kwpi_di`uWz@K(<#hyNZL}k46>fTnx;T-EATc*_GN6 zdoqL8Y2G_#r&IP_?FYyEm$BvQlLy^&ZT@Y2%U6|yu2!HJ{oeePo>RvjFArAOg>i8vGf)mT`kG4+5>NFc0PkDIM z;^)MW29Px}Smt)Fphn9Dx7Gm<7@oGaV1n+Ss&FqJyny?3v}O^H5gA(CDd^I*c>_km zCiT?=ZJ;c!O+NCyWe1MX6?>}_9zB&1c8;I`)etp;x(*0bmS4c8+@5$@g@y`0sd!t3 zMhF{Okf4VY&LZK59Lyr2hJ2ev!VOuRMZyeeoJArGd6-2aXjSW^`;7O_@iC&u`1CbT z6v~=UDtipthEJ;d3lKM*O+s%9>V{_mSSsq4XM#v7>W*hZU@GdKXF_i(>H*+)ZKMN9 zM=(D+ruQN$exaFw>W6vUp=0V@b*pmf=|W z1oL?GtkhavQOXi*?n#LZ78TLV5|)E70X6eUbiZL5m-KD&fuAC2^5TV1GvOz2Wyt(CKRSjVHpmVXETq7&x&AthB@|RLcsx9 z9i43tC5-Ki%2NyGUx5MqYkZNNhV4612mEXN7U(TZgBL+Z{K_(J!^$ePOe_KMhkK<~b~ zGsokUDe|Fq<$&<|4~s8N@jH-X!iU zeinm7^@C-r5(35X1*Zxwah)-9U)qJLWELDG0#lQhqGqVsVcBG?RESSi<19^XJ<_L9 z^MNl;s+#3SO!OpmL814<_M5FHS|GK#@cmqfDdyw3bvh1Z@%7ylY8ce~f)Ge{6SQo7 zd67uad=UuxUFRU=USPeN;*OFS}H(h!D0p@I5}6C5m~Bi;o}(+|2Se z{iVJXg}Fn0SLk>t1~7f@bOEfcaeFKYHAFqwTSzsPoQ8oirXQ+WgU}FU12*66l!1DZ zQWwahe6MK^o)E2#{XoF*q!LQ3m++;nragp)f{P ziCM=`KcX4ZzQaN zM-b#jp}r6)Beb!~U~V2R?_Gf7|+ak2m6(gH#P{vuT&)t6%WqQ2Go%5-=e`14eKmXKq@OZ39L` z_G7;(2H1suT0mq>px!Jn=9@5dFiprZoLbm4lpVP6rtA>X3fTgaX;PIC6l0)kKgUa! zfE^1Rx8^Jy-a#}L(Km~|9*h9dclQ{uA(QSQxj@h;XfS6i+#hKuggy}20$)T@Nn0A* z(1lSjNLz2XWQk)K$TFm%hOdF%auw2wL|+7<3GPUftKt{ngwUJOXEWSE3TQjm7ZOvx z2#LbAGQgqg^BlJrTBFb*gx3&nar@c9p$?wX9zhYvby_kHfyWa-Slb&7!0e$-Sk%T4 z_<>Nd8#E7%V>p3D)n&@zM9zXrz~c_mFwJvr-jc&fgH80{cWbCh8eF2LBkQ1XiVJ|M zTC+~{ep5#TkCm9`6hg(rxS|IaiH)IBmB@;=0~@qKX53JVX=?jM;U!U;lxbPbQ{gls6bo{1R2ty zLxh53SI#t2YE~FjDu>Cp)V^Mn_XH8aoe0;x{m~j%sm{Ic-#fx!2Ubc5eHsp4BpXa7Duu{#@II5uDhXK=@gMpc=Z zyt6ExI1h11i>`bi+rl+99Q^&5Kdb;#q9#$1*dhi(3f>ndgs=%|452IhXrbKREtZu? z(dx96Fc#28ScIPL&!pd&OhzPQwt)sXAc;+bk6?PkO8NR=sBfrtYPvUjZ9ZuYxlgMb z1)T}4(kes8%!54OX;zJgL4Z^S!iDr;YWO+0n&?)|;*`SrO!A*lr$#tn^;=NmklLw- ze6-j>%*u?eDOa}(dhLnCzkvdtk#sFtfstetI`X@w13?k)hJQ=AW{p$^{i^Zc)`-{d{=&q}(3D)M7c;L$6}@lL*Usash=gNDLv3Gg~q zXyHY6C+AW)c!vQB->9*J4OrR8#5g_#_25=p2VH=l%7g^eKr-^4e5OjTX>&Y?@zsIVf7g5W$AQRRhKKvPM7yyORfmCGkxfz+ygFQ zhvy;j6ZCId58B_)8$MqxnqrrCP%TI4_-NN>`CSG)Yw@qFsqUuC$vro8!)#B7iGOF| z2%_q~3n`z>A+(!5N+6rm@$4sWI;`QyZD&<`M9kgg9bNE()fb~`Y zlHETdupWeE;eAU8Hk3{4GmBocY024Z!sD_;M)xVw12w7#`Gx21jXh<}lrue{=;!ft&ufFLBl3FQut{#K72 z{br`*anSm@O>`bkpnoSk_(=~-qr(SrP?-HPr0q^AUhui2UH|nhcb_1rc74|Vt!U0W z6RW~@<7@Ki+~V3=3;&h=|{;&fkjFnz{KBr;wMzSEy6HmyCrntEU66bXkOU4nCpT#c5n zNrd06d&6XO=vsG_moU~5T&M3^v^4cEtDe`Z<=}SEvjuqMDto!#?~76fb`wU(~!lZ#-JU~+_u zz$I?EkBXM{vkaV{iL0Rb%VSP95M&ECM>O1iREixIG(7~#^4dr-BVT);L@!=$F=IG> zpv`#*D;{}mkEWWB*4h2KL_?HA2l32k$16uG<{j?Z)v@5YuA%o2`hQFirMEie!GQw- zeW&<6y#F(y_cOfL*tGx7f#i)IML-0hI2{O`BpG)&Iu>o2e=6+*s|lfPJo4@{%GHhb zP<^G9p->lE`8{FCeAt3bTOxKWYKRR_gz;`RU>KZQM$u2&JhGS8?b@i3HijQv4wo&u< zvj`Hv513t)hn(5yyFCOAS12>}N!aMSC4_FKX#N^EDW5q;_H8FgkoxgEv4gc{ArrexN|FzL8SXwf9ZNcR~jWh9&0 zH{BdsU_3g$cU}K1vBAz!);1Z{;tio5gN<3@aq%M+Kk=|;5~`+GGjeLojzL7ncZM+x zOZ9?T*3U9&G&*Dn4hdR$9LkjC1!}ZCCS9;AKx*Q-TPq84@V$?~g(!*z!Hp0zS9K65 zJ^7JMa#G1BWBW}Vq?10LK%?%6%iVRYmg_@EWdC5W71nxa843N7tyeST^Z{*7Y1n!^ zyZ1wkX}2HQWsA8d{A8}_A@BSUnPX|{b;Ip631w^g{aiyPc-V2vpYIdgU`@mMmLk zh5nX)We0vMW|w!I`)+1~6kSoKQh7pM!1#<5FeV_p2AJDP1=#HAmBqsk7`I`qn+nK4UbDEx|6Q{wnEn zOqf8$-L3e?7%F>1FpF*Gf}!Ot1s|}qo9=tkMg4|xty zDfw3oRRMMq4MZOo(<^;dFYG$4fm>%D%CjuLw^_S40Z%8Na0(>ol?ZTlQ5VXlYxOB1 zlz9q>b_x`ed665k%+7wTxxhsyL*d|4Kfp+dZ4**Uj4pO%3Gk>Lbn=`2=F>NGy%vZ5 zA^KZC-o^ZwPeV(54oem7=}K}Q1>F5JhbjR{qOyj++C@dVnS zmdK8Nb*|^Yr2*t8tPh8wkvT6KZ)I6k&+7Ev5Ttkz@aL#M_5)WAxg@xXb(*V6KrLw9 zB{a{Ye3X)oaJCGRck?%8^w&t?k6p(P1K$+pgmV@dx*PU*C!MOuILNl5{O2)X*zK%8 zE`e&O3dW5}pb)4bjm~GXxYgM-t!W^=U-NJO8gjCXh_?8BApVn+^E+sbsqi4&{IKwm zff_Dks^^%A7Bb}cK?nY?%LM-D{52zQ zazqi%vPuEC`6}EHri`njIUR`dx##f;=)VGc@~K{gA9|!Y4(@aVRt_8 z*?&b$UCl8EO#}TFbPk5_&N|sqnFQBUJGY?L&=%Lsnn&}*`KgigT?^%R9FGUkG5=H# zFB-8PZQJ{k#_2H>?v{%#<-_-bYR)r4X zX@SOdMJ60(#35NEf6?C+QjMu& zMk)fzdh8-nqf&&Gb%fcOOK47xbo@Ix`y?x1 zB($ZEOYp_(W3a4I@WV9i<2^np`JqkwZWlL&YE*=UuI;CV=$G4rGY_x94?PeQGaE_$ z=fQVjGGenZ6C778-c}Rx+K<<5vbz8H$U(0t=4k^Qp#}(!?C(PRS?c<;mikx0>z|d> zfl*^H-3&;9=lHPgStW6VNP4d6<7bvVy zU2UWVD`F50V%tJ4m^ZQv%wd4NbHorom*|m(dvj( z05n!WY&>V4>pAMC-Z1osDlO_qTOJY!)O_t_N%&&?;p*qed@mceo+v8l7@$&Ahr*Usu;B3& z6kE+n1_5I zR+f>@cuvawy*S7%-(X|kWs#NDm%#n?X2uRs)$`xA&Ilqm>#YFJ$$v@U--}W|`TzUU z)c-X9ZK0DTvykf3r-=2EgJM;-wwajVT{D&3C+ys!x5@7=b31ctaO{)(h%B|Zzw7jr zHJwBfab#P6vQJ>4>6QPwEb}Q3+K_8Y|Aq#ouR54m%ttY7On|xCG2P0Z3)3XR9Ioxk z>})aayXlf(kQ_w3uCzh}j?n5JQF33w&YRVOQ%+V5b5xSm3v$+|FEIsCaC$Zb)dGA) z?8psGVYR&O?l3!m#s6mB7k9;P8gL{Ez&zp4>Ho`Q{AB&lFMryNKN&BL?)uQnfE4_P z;_unaoCu1rtp*UCs4PH29oE997TVHR)vd4G55TA@jLK%i?DvcfR!rS{b+d!OYsgLee{$G z`JLMGU=7{4?rOx-5X6Bn{>c$$pM!%tpLDodTX?Soy*AG- zkDDUDK|#DA_KVn-5mKykRL`zZ_j_SnZgprXVpsXb3!_78zRwJH3|AKMDX9(Q?p@Bs zZ`E(s<_el49^1qTNSo#&V4Nq%-vDJ6JOn96)vtj}8QWtKYG^qQ88g;^`ILgL%rIQ6 z-AXFCgKY)EOi3)g^Ztxcb_cmVn&b+FSHwg?EMgF^SOV#z0K7R#_yM{7CjG3T1H=zi z2b`>QCH|L+Nl2SS*Z=P?XxL|qIV+lJ`r zwcbRR1erkI(QlFs9!n{#97{FxjIHnxzGbdq?w!I(XXrW~d-g$ABZRnt{imsrOz4wd z1x&#uARIvYOEdMY428|~&29cz*H<3uQxsMJ66wNq>2CFdcVB~lOLjFUuAdwGN*eNGbb-EVk_sP>AoL*|^M zrn{-BiLmwxbVJ5T3Dou~COgSmiMaMkHbd4;q%OPZD~XU-@&rSMZX|NMNj!;|SBg4A zmTsiByJ@Y7h*t`HL#B@;y1OZ!iG)md6BU%60KNNftrQ?rC=&;0r7hs8j|^z>F|sw3 zx3{%(pf|L&H~y>d`Tsh506G#8t83Xqgx0qX`HawYFCvrW7@K>7XpBe=H(yo7KDEro zf}_yFvhGcBSRHYuALTy&>NehI<))}|13mw>ZX8(#Doh|;Bf&i}XJPEUPUfInIeCQz zJf}rhmhmUAuh2ar)|*T75T*28qJFvfs1)KJ=kPs?pLr?IVmBrQSLRhe5k@+Pz+*zT zgQlV|4Z!dut~jg!8#1FzVN>;mvWRQm0#Pw2X9{?PmqH?8e z%PuLxZ5{A6*|V_8#Ie-yi4DqKK0h7iRp2BxFMkD6Hd18J_j~@1(V(GWmMy-vSkFbk z#52{*tGe>&r2beB=C+{oou1ZjEfG5%2s_Btit@Uwb?l6gjaoUEERQkklY32;gBV7{ zIDa!($6iZgPY~Ds1AC03*g@a}JSCIorD)hc$Kftn@E4xK7Bj8|5HAI z*8TM=Rg$s9WkhPjd?LhHH7uJ!3Y&V5RzMz-fOC>-s={p}Vmjh`Bv)Ego6v{tgy(W*oU=CX!!F{E@?%W|53mIx4|G}I zQX)Et7q!Xx<57BEYXmtV9 z`Qh$ZU*iHRkgf^vOQ82D^@+4027+^7fuw`@|n^zk)y7A3cCo9$pUBlbF1b(_)T zE-!G7$PL2|TIUL<_x};fzr>)}VWu7JTIYQDoVnt+n3i49&s#81d3g`B>DoV%tscie zIlUY8>LAE737#`XB+_S{#XH_=y(50F?46LhK7NGA+EySbb8$@=+X2wopSBbjgceY~ z^w0kz@vqtY*Y+R&RYG3sKLP%;_xxXke{A6abxlA0ra!%<-wA(j#{EUg3h+$-@-_a| zn)^HPfA)F)A_W4%1XRiZh<{%Azq&qu$N9ar?-!C4;5iJy`RV`t-stx`%{sOHRAn_@_ViQFBCICy%}IIzgOP>4)A;F-!A|hygvbc6$1WF`g?KPFH#5mpQOK+ zy8VvudpX-Lgr1)W|BfU69zpz$@_R|sFBEX1KgaQxL;lMZ{~h7?tm!X=9e_#y_muuD ziu}`={~h7?%)u`N64t*X82-v5{0{MZ9Q_MIjqPs`{~uBH?})#LroRwX0l^G_t^dD3 z^>@JEBfVdM>KuOp{(HXuhq&)|z~7^%Uw|r{f8+VDTl3ch{-bsHJ@)toX2kV7yT3;w zzYpwp=lK`bIrqy`^{!ad%4#VHcUHSh;{+Ih8F9rT* ScP~01Wt9Q&^~Hq#p!k0Ub}w)M literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/chartsheet.xlsm b/pandas/tests/io/data/excel/chartsheet.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..aadb48d6f4824dd3105f65a062d464a29c272a9a GIT binary patch literal 20093 zcmeIagL`Gmwmux&wmY_Mcg#-5wylnB8y(wb$F^-d=~!QS?|aVPr_cU<|G>TLd9vo3 zv*vg+t7?o{gC+MF7z70X3;+TE0Durc*;b~x0}uc}7#siq82|!EUC7$X!O+S@FCKxq1ZmkFdW66Wsw;dVZTanP-TJRAdp`zZM0{TX z{dg>MI2-mDCg0rqt(F6n33XT6cSln0U3u`L3eii)Q6?~o92?M#=Y&yA5@yOZXSeTr zq#uP)P`b?M$MtCD#kcjGt}UOc?!-aG)JsH=bKfaG6@zhQ54+ZJGdh3w8?4kN7sCPfgnCsgq{Z=LEg#hl_z4n2+1t*Hg`gx6R^qOY$L90R=f= z3@W%qn-w(NSi1}+XY*U&By;raL9wHawTPqlk92Du>y)!HJdWwNp&tB`YGq7w_&>bk zm=E4f4GQ@-iR~i3k1q7s#~6Znqbz_O4VrEQ!^OB>$!`q&XuUcFTQ`G$m;}^{1zqC9 z$-LJxQMww@$Pv@GGXI9dRXKyfdT}$xd*ZA_nHn==-w*l(9ysiy&#yCU*fX}j*sfpt zyd>Owu?l(+()#U3CH7*&EsCZaS~r6FD$AXxrM=iL^3EovK4%>BF^@&c#E&Ja4~~6* z2L_P)53#RTrYHXS(IWLB)zBYeuWe^&VNXZ<>-+!2{eQ8H|E=p~vC^_V^e}-J;x9pi z_j7B}2!fxT1;kqkmA-jPtisnvZE(=3K z;UaEuE)Pt4vULQfB(+NvwJqD~MRc0Gox4jBlXN9_YLB8UZ7TZyd1!-JbmmI98exJ~ z6$=tMA14rvC&lM$zqH!A-a`f8oS@=ad0M4Hz^V!;tCUoeN%>2wO}puL{) zQiaE$1>x-@ma?J=hiR2wrX44dtG0o~j~n5%PUKe)8kzJ#1tMmo2gV7pflt}DUSI2& zuSPOld)Xj+N(V28e8cdI)<0Ij{}z&_Q^sjGU;u!mkEIg+V`N+`=$x(X%=N6T&3~EfE|78qqyK`vU<`aCvGphz5l)aq9tiVo={0XAl zTTjr|=R2xK#UIq#^rm7@eAlLzZL^o^flp30`6Zh!7*#Yq$Yn`eSDpg6lGqV%-Rq3- zsj=8Chn^yV~IqM8Q0;qFO`j&uFE9K`= z@%nt)4J*yvRr@1$1{nM8Ut8q=4M6>+Jpk*RO*KXWf$6R-BgCcSc#%1L?{^IRZM|AF(8hlI=i zB}Erdgi9#JDEG&t5!1qaI!P@K{HL4QA+5pT%Mo}j%?{oU=_-uuaznAG6S>Hx@fNm3 z^`457N>0``8H06a>-X_1u$;=FnDM2vn8+Ktq_7cUPrB*8##RfIX}a7s6`{!7z@No| z#(e)MrePhCu?Qb6;~%1m1poo~A*O#WXMY#ge=Ti*ADZmPz5lyEWpNWSAFuZQ3-LCg zT`p-Z9YK>LID$M)U2gvP7U_D{V!fL}Ygwhbgq&iV!>i{qkGG*nwv~wn#0!j~I~jq* zJc3C~?b!;2%NDPe7GQxg&F|P3asp8xD=*i2H{lWc55X2|F~Jm(C|~x^wImtQ)TKi% zCi^6hgi1Mv<%VR^{3_%srX6T{CSV51QuI{GAzHVwdOI%k4mxW+)#kzQ+g;74u2Rrq ziP8J{aVZTz{a#pWNql{|C#t^2ZuJBKs|n+A$e;~4kvhGam)EKN}Qkf$zp*Q^@1LXKf#ES-9N+?J_UN7TR`WehH{= zupIK@R+S%5xtX4?5rh8Z1v-E~kIz@8L^jk4_oLRks$w_eUWm|N5f|e~59dc<8rYCH zzJgf*HiQ8W4`cg7JV^?@g8DZ?6bESFG*PMt-(!;>Ptg9^NxT>~8F;HwOvh;HF%Il- zF@j7Wy;}FnoicLc;gnA0BC*fB7<|ZFGixw>+U6`@5jib9aCEkbZia1J5J2eM((kyA z&m|8n%TDplXGQwt7e2jf(!^P`E-A^YtQ|}dY3Ipzc)nm6jxcL~TQ`VW5xEVjRGne- zAiF|)owc!h?>j}$aaFMUnOQ(#OqSP!cSVnOE|^>O)qb{) zU~Ba3)-&B`bq zs`mNuT33)My<&rL;CSDt`Ea8WE$iYrh(@~R{9<2yb7PejpKYRcjc%QB?bYM;`C#Bk z&+$E&f|vFpapl5sdT8i%`)5yK(45=b72L$DbnD~x?h2L{%-iz8%Irxl*BTxA$()g6 z?HT<|IDHg8-^8^=hf9j-%ZqiYXzNnTdkr;oQ`+zggZOgisG(Eb5+A#oOG;Uhx#yFs zt0(WH_1(pL>biDEmKJ70p^WQda+H-@z6Fdxl2^qWTQhsBmR`V!Vn*5XN0+%g-HH=Z zMrp@OKwsULyw=*D9u2tOAJn}T=o)y=bI8d0`1vYSI?=n}oAQQLjdL&FG+=)&+zvT< zF}-^ct=ImwLqW#Iux37nXE$XHQ z^{467dY83UI!)*)7leKdZ05~K0B4#fU|rwL;@%0vD>7Jl|0} z>I#zxb>0lss0EF(VwP}gR-19sKhJ~4B~*`VN{{KXa6o>&_jh7L|4y@TKZ(Tu1v#-| zhG75zX8icHeBqoPDS3M3c0Vh6(MLD_gd1rmM2uGt@z$ z;zqnoW+)Fs8iJA&bw-y&-j{@z@CrC1{sfj}%2&?Lj>0hQ@U|3>>{`GJnMT1RB{n$*0A)Pot%o7@ND}6p5TdALXPF zg3pg|qf~Ce>EA7L9pr|r5`{c`K_)zrDQ%yCSP9O)Cx<5~?4sxFj;tFCRE!JuWSOcw z5B#YP{xx(OFEQOSxPhLar?W2!x-13bi?a||KQj&R^)O|wrtd8|y!8)+EHqpCyi!0% zA*2dmSaYzw$8>-mQT}1Bj^#!pL|F4}-8ER(COGCF!m!IseN6e;4%-%@y}hYLA6S$h z6!f&j<8|`Ht3UCoFs2b>s!3Vr*PzJBfgJp#W0uMiEI6r8- zQar(K_QEwSEjM5C5vLgPhP;CuG{CWR5JrVd5l)It>PJQyG1cg$9JmcIrKZD)%Wmj( z;$RTw>aKn156k0x!0U22A7&T$OL z>jDpW-;^rf30LnO4_sUdUu=fE)L|B^1Zb>KV%{_!5Z3*JU<~FL!4LMm8ec^^Kv6hq z!qCMf1g4ba>fN+r)zO10oDE{p9yI?qoLh5>lx`)bi_Bs|pMN~)K@IzctKjRi6%KQaz zA|An)GA;WCi-pYgm~)^(c$%Ys&c)Z_AbwWYGvx}6Jpm}ppdflnMu^VQiDhI>l4)}j z9<>NbIU|>=0H(%Uq3|PtF0Q&+C)vSr5w@@hi8v!y`PHxhdI=*4vlkOla*EYJO=R%d z-$Kk^1N;m3cSUg%xrShQA_wIX1mXv0mqbX;g7Y+kTyAcS4V23%?UhkO^&wDrU~rGXlFB#{^@UQE zSg37geF{pbOZe2l)yWC4$NNP*&Vnm7gKFMRCIr*k6PiFFS8d*;zb3G|a=^+-xAUiyTh+j8cWQbYhqe7;eGN8QMUHru-_-oz z0D`YLZ6GA|5W8|9mb;5u!n@I=uP`aCAJc3RgN&C_n8Ks5rqdJA%_jh1;0d(&7NU?qWE({xU|M_vy{plr+&w>jfjNM~ z3=#W`E^6U*0kmof2+u{+x&+QE(y>i`5f&}j1>*c}H+yL=&^s4hVIry=!#QbVFo-MP zMjIIJ!ss@ZDB>HOK`8AKICsoBS!IMXXdZ7Gov{hi-<6y~DUA@rlm|Ip6{rl(&R*^3 z4v-xF6{JQ22U=4)#ig}jsI5K3uZhE9P}VssJ_uYnIYCw$VaJukCf^ed5|9MIH9dzjPwqs z@F`yKTFB$^ubMd(*}|GDmBkV@`@8( z9Wm$*gSa;1kxpcO+e{_p)U_grH$thCT^k*uY}8FE99quob#ml(SSXh|;yzL4IfL;0 zsgnszQ%Msp7fKv>nw0sf!anreI7DXaRKhW;-pt=u6me6x1Em$fW!bb<(3IiRgSWmQ zOFNoqY|ey{Un51+9&UdV=37Tqj-1KpYP6(H5SyJEnIh_>${95Nz&^UGD6-?`)Li8W zWyxoc)d$L@5Ai~2Mi57Cc_o%*Sw;o#c`Z!n^P$LD%CmX%p0Phg;QbnwV}d{Je;zDW zY}a-!m(uIgQg6aZ4zBKj$Ts{HbSSr19}lz^@6wrTje@%DrbKv+{a_niWtWnuIn-rT zS*ba3>AR0CRfN993p$qipVt?0ZBdmgeBkN@sY}H6k!+X8OK3y5LGy-=BV9r%7dDY| z5ih>i=k@kZ;zh~qQXsj8F5+IHC`aC-EFuI;2=t3TOsb7OSCd?$M3b)lYN^;77>=ow zI4q7Z2{tttkl3Xl(45Q{I6F|jhq$#?X%>Y6=)~FTmfH>|K*>Cd9aQ`nj6V>?4!Z2v zL8rtOas>vcteuHuV+Iwoxw3KjlsWKlqXE+>3aj(;Ypab#GKpde`FGbMvr2HH#uPuj{F!Z)d6Hlbfzi4$L>(Hw;hg7764|UssMee z2A46YYH*EOyh0p*^)cf(oJtDjV>Jc_Rs40J*N4I#L~)ga10Es?JComaK}$o4VI0;I zn6&5k$*xgTJV3F4w5Z@EZoK77G>92R;WapV_*LDHst%ON>`+rQMAn^A53R9%%|D@L zbA6#{qmC>Z`L325oFq~KG;KJt?^B8~rI}z}1Q#(ks zf?n^2pf;C(K9pabUOb%&hm7kXat}0$31(NTz+pMX>r>m_ z(7F3a=J}5RJDkciR?`yJyIY$obt%pNo0C>th}w zgnSV{F4PfxYM#yDydb%Ip+wBYAaEALGM+e;I>Z!buoHzaHM=`qdq+!v++7ax`glG~ z%>|pNdR|-q$|Pax%iUJB zNR`KzH%_--)>mmhB}bK>IXS++tk7tQUtIB49B=wrsg$1G(Cuk%RZdhkX}{qAlUde< zj?f|V5ohct_!VpX!=LJ4VrXed_s91iCiSV>SQs`NN+CmCy|ot>-e-PuyK%UhZcla-ONQ=_P1Np?yXHv`UrZ*MN%S&jCujDM6^{=`G=#Kz zgrpBEAI23ME<}L|{zQ4QCRRcY10Bg98o~&T$Es zuXQsSxbU{!r|-#;kiMBbetJDXZ=^$;QXv)I15*>S%6?~TmeTR~GG%{#)MYkJ0O9x5 zJzpERNMqpkdl;+mr)J6 z4VuJ;gs5o>Bj?!cUHLQv zfn*(sM5@!IL;)+Q{7}j3^Hx9{+Ca~km!PQ=@&#Mr&PA_xNUGt5DtnHNfG(5A{q6GT zCa9Cg>*wKPk9JLSJ>_RB@K1`0=W|io@2{`u>)M@fm&aFZN3&#C-MyVJk0VK)?^m3+ zRoknmbRC}8*GF;a8=kiZvQfCpD>%5xmxLkJmla$37o=gjK5)-hfP5Lf^n$&y{D_B6 zMdu@fKw9bq$K8%qXWc6}U;07YxI4|4TJW!$Ng`7!MXNzq^~0SooH;j3Qb#T?Z8D|S z;%=HlT0K&<=m9Q5CUi}(6ROlaj# z6*z<$Sa3VS40ajqyt>3lE9gjs-9jmDp7g2xKkst}-C_xJLc#37J>zNivc`V4JDhF~ z!(fWxaaxR!ZG~VgdA!KDM<};v@n>%{a*o?c77thU%Mj?SPKYIjeM(J0J^Z2yIG*)M)ks`r=(a&vBF)y%3v#4F&6+HKmbr0>ikOixZZ2y{Erte- zeRQGFh!9GWEI9nibFp4Nd-Cm2$yqH4jC^SZLs!ZOTiy{vUK>sjMa$Sr&dnE1LT5Mg-98F8wMe=Bf!yRfP=%?v@(YL6%#uA^^`ddZE@IQbH4OPo8GN5D z#R{9ZlD-YxGLj{n?soi$sCHn^12)9GFJI`Wz#~Jv!~E=y#~6Ils!(J09-X&!hD{dQ zLZ><6BZ)b~EVFA7-xkwX_rQ3<1S#pZLMkwhx@Xf7#41M&Pwit4XWO*Q60kUli{~qwIc8Y zNv@(s?APp%@SDEQ9)+ht2f)2qYs33L#z|bidt2mv_)8VQ002ImqrbwL_71KVhW5XF z*?G#A<}36FZ@i0M2v6t3PCL|)4I|UDa)h$~V${BGq127nrk3&nP13P(9yI_bIzN}M zK;JbPjuLvyvG%ARq=FJQtTIZRC0Hf<($%xZaxSLAR!8X8)x1Tckihj&#Kco^6HpsG z@)5+Q9yCBo(qJDyq(YzH@t}#;XslYtvpUdto1sgagtVBL>CL9-$F4sUMeXKKsVtmB z*@RS>YtZlOJpW~HSxm{2hTX+36*3OURCH?2z;(em88A%xkg>xAgF@T>7yJ#IKqUjj|A56TAo+o=0v%uAp_3DU9&jAim zXVq+@@#;gbqq5cv`d#(MW9b6hK|Cm@e06F=H%|ty(SwDT9;&sWN9a?x^YSeMChrQo zRD^5!mqUh6b`qpHp#IHTQr)X5sPB*|nW%F$9*Nu|mR@H>}x3x|L% zJ+~JdrQ88w^;JN4koM$va@#vgNn2A!*&7q&lX&)712dQUNC5)2xT0MKlEz&YZW$mV zNPVS82ChjcL&5IX!^T7;^32@qnI1JZ8jM|Y@};?g`gFk{E1f6;k&@9jSpsj^|D;X2 zoR2|geXw!&#~;f7u=7Wl#?U~~!Oqmm_z$iYjue3ErbiID_jQJ-Z7kLd7LTO`kVIi8l%`wqyW z=rZo|R?UQyj@XDnGkyr~(f5Sj;$dU`@wopTwNMS4B!l>nPdSi(M=cEObe(_oh3XB;CG<_v2%6(#2kqNi8{Uy;zK<{On$R z8;D~ueqsN1!Oqr5HkC`fP@q`zdywHFAjR7)y$#bisB0?C38?X30f?>L}5fAg3vDusseMXG_)Xi!o|5yBeV z8U@!cgur!B-ApPWO6#kT^o7bH2w|0|TKZI)O>|=+mZ&VTe^fdWIg{>Vj^@3#VBEO< zGebMak2AvTG@Jslq~HjH?-~AlK;rQATDhaFq>uaS_)IoQ7_<1W1#+2FDK&wCyozmX zJQq?s=ZK*8>*InmVvGDGZd9b8%NgVt9gTH6~#y(Jq)xWoCr+{QxgjF;mnM@PyQO$!Li)hXAc6Bx%^ z_21m%ti)KM^^v6l$MbJ-_%mur_s8K+>q=eIdSwvRlWy`o8_Jc6Wa2&u(Oq}sD#yeL z*`nrolI?4bB@0UNr{dmmy?6JC=+y6dE~JtTBI*RRsR>1x&^Bqk1wr3$_#E< z3u85u6a5a7{kd)Fs$qt%&a7{bXScoKV$x{|VP!?bFBUJ!Y&#d@?=OW1H#3i(Dvj3j zOWgQdq=xpKeApX}XHP$O^HMerO3PAIPTG2{8200LiRfLl4HX}DcfB#4meF@Zf_QX(LNDc%Yd2H6egmwev5u>cYTxgrl)B+EYgvz z>6$>PlIqbES+d`5Y^(O$}}2u3G)>;f3Rzr?t|1 z;IQ!JV;IKMYOm5m0fDaa0T4X(WEK-usS`tI3OuQ=zX`Ks%g@N0o@GBgxOtmhjhkn ztnc648P2pU>8WVSuD*Tm?2)c_@z9JqE^*9@Ouc<^Q)4?DDKphJW;soc%uubbyqvI< z_hiRD5CL5I+TF71nEiDQe;^m$v4Izxt<$Q86F;iPE^)0= z-ter#Vxe}>+N;|lHOhn%V&8sd%xvkkl>V|=|7Ij7`@}ij2vQnSj*5+{sd*`^Z$Xd@b?_`B^wc4v3&6Bw#(rQK4h0*QtT|?*S zt0BRv2)HvkzdOCReUIHNZ^eyzZ$j;rw(4G<$@9wIQWw1I{5!oh+_z-5j2Od09{M$U z^`dLLEeJas4muFJbzlZm9jFTT$Te{5&r1d6>n(Jut}%^|PWkG_TIkB|@&{@ci&1m^ zDKOMVne{fyDbTT88(cu{;;G8Y_4j-~xNchD0WWHwY*kR_GM{%;IHFUtXwl)_=QZZS zJU;h2QJx812;9m(mOneVzwPr*s6QncCruQoe|1K6q95VUGq1KXhTuAz=Xh2tTR{vw ze$2}1;PZ0e*>IjLt42A1-GX~fvTB0+U`v`=5f0xq;s=T3h1DfjO9e41cs2XBj*7Zo_SKE{zUyg0VDOy=^rw%e&1#;$OMOgEPp8|Mf-2r{ zV?YkC1+1lLkB|2pf=((9V=d(stxu3`dFlzkp7=U7@RyT6T$B7VrMT|E-~vghnU9Pf zm5}jLFBkcVm=UW<8JRzVUX;x8)uf!vQ`MxT%nTnfFy?CcLyezyiy|?(c$%E~5Ncdr zSG0JRoDn(FJgW{^0od55PIfco>$AWzh4Md&#nNSRgB9U9GDHar@$5Mx^kjGr9k4(% zTqko6>ESs)C)bIkNLMqRz4ck22#BG#-lJ>6-{4fSgOA$(*v6rs01UJL!3|D54*1ta zn+bv0r=|n%0glZ~V7x|$bAbcf&j)yhcw#B;@vb1M>TSUEGq7e5$-8 z52ILO8!9S=f^P}-qvbv%f@`_V4x^~)TI+nU-~lHZ_qoK$9;>Fhjt^%4)U8Og9SYtYzRXue`!6S5S5}0Q!-j5lZNr-7I?iOgUN|RS8+)ojciG=A4eZ&!M3zv7m z-_H@93H))sClIyVbe&!5aw(5zh$^P$8n29FxF;`~rs>DR+Ja08-;x^2gMAVAqi~vP zk3;U+O4q_6ODPkHMfo{b7+1+Pa8~KUImC!+7`dPzdE_$bO9$MG^gR!EhsPP@+_P5v zmyWxaV+tO-$K{!FcvcrA&5pa%lia0+Oixed=jGbUC?jq6yW6Q#^N9}LRL`@e1+iLd z?&l|yy9nbAoL)I3>9+8XA7dR?r)jxMY?+?+%+LL`)^|&6tf`Wi6CE6jNXR4i|Irp{ zwBdF)#`_$yr#{ialj?c0)cAaQ;v&_M>DfBb(J*CV%JY2Z{mM=^UuzA&NHzu1?RIzO zb1~o-dwaU#d8haR_pbx+-Ok7WqDvax67UoJ0(^#Xw7?Uc*87#&u)d&jvqK5gZqa*3 z$#$@cAOkd7~xqw=KmM=lPpkeo7D&A9K(+$8cbo6cd1) z`+~BU*f)Ub&ni*edt9!R%4M{JG_C^}Ytvms{{<$dDz#~}hPcm1)D-rseMBZJ?G+3w zz$8D3y^#^h_x+n`#1tr93JWnsk>>bA+__n&GH+YrEyk}VknT*j#pcmBO$lfkL_2<4 zHmHpg1S~NWK?^1_n+3x5loqRe;9Q#;-b5Izz~4iw$*ly!yV?B_4fDO{$k{VO?1r5{ zR1GQOE@dP&23#x*xnjJjBw6@>oHBb5ndJ&(5+QUUSmqHuz*{7Qv4i>H$~~m`+9Ky* zQVaBcY89vxF$b*9v^#)R>EnR4Lvt{dtb#{`%~A|Kaw$OBrOC5_zyf&}%Lc*_&CfZv z*;jIr3ZTV+%Pg@dSs?I{-m(kil$aDJM3hI?2UUZiKweGl#@v*gPw5Gh(2%;hqxT6f zgZ02p3#kT(O`X+QCn5Ea#K^$`s)~-qB#mA3a7Yd=1`@i2AVO3RL6zWd)0IL&VmZ2l zD8JGPBsWux``N(Ig;)ZuMNisGSm0__3d6Yy(##h002<_PPnZ+8Yr?A_6^+&Hkb#JF z>J1h4S$aARq6N?^8uncwwz|#`6wn%om{f{h6oycNu;8S~4^+DHgru7K3)D|?a~Ezs z5ar4f|L0cS;OwDG9D@!^G6P^G;!Py5fk@sy$oZ{#e@%pOY9Dak0l#vHhAmFpmkEVF z-1EdfABaRB3wee`j76q|i8ifz0rs_er5LB$AQ3wQl1qjng!J+kt+!%Ig0M-$FbXjC0P;2jNfEc{InJCsda7`HKo!iPS$zkPgO~jk!E%FyN>P>sy3#k*B9vD3a`}fN zcr-tW{kq|66v9utYc_7Bp~!-5&+Ru1_gN@y*LPvLE|C=f@(~2dvdR1|JIUEB1Fi!0lJcOUIw9jZc_0g7y4sXrzSGAz}eV>GUB45DhdMxDPqVz2ILek%5xCH1l_28Gy$kM z;AlonDBr|T@RD(l>e?onZZqKBoq^_zgZxVqq3B(LW-vwyeCSZh%o;|Uk><}wnv6-y z3w}k%cn5mEf=K8<(WCjK?A*%vP(jk6-JHNZWR#Fwm+7ePoIu_Q5L{S8PG&`0T~v`K zHq`0t=~Vi4N|hM{TkPP3#(_tSr>GL{P*7B-LwmKOo4pmb8WN(@1U1_Xe50B@@l`Ws z09krehh^jh0G|hK$o0M(u?LaOqvoj`Xg>J8>S~@Fq<7O7m6M16(8aZ)h~Z-wAl%aw zun{?i;bW~Lwnz>n{+e&ZUSuUBX^V{{&>iuR7|LQ4ty#^}1QGQ*qELayoG4P`qlb&G|!zXNiifSQh= z$txM#?j%WVb8O!XH48)Lofn8jsZK6)RVBDvO^f~6N=qO#;CW|pLjnus$PWpFigxon z{R;sgrn8Gs15K4@m)p*opcd3kK*fO|cn*p$UC$#qoaZ11f=EhWLt%t33J@q9mwcN# zdv$gNFcML~AxM4qp5BlZ;Q_xb=(uo|r$N_wvW(&ySOA(SfXaRY9l87_!1D%KtyEE+ zR>s*0{JrG+HMr{aac0`ijEQ2d@6SfeSOvzkkdQW@z~l=HyGP&JA?UFRgbVuHrPKS2 z)FZlzWHhe6?qdf-v&=B` zJF&ig8ak=GSw^M*W|uWGHYOd@srGHP9n}beyIm-BqK1a;?U7rV5#TCOg-n@jI3S4 zYi&tqWfHV1*Q!vvDwx@)(Bdb`Np+OR?H#jr{ylrwAjyMmIK#n;!$)(d$fFt1f&C`m zogFhtpW8y{Ud8AWWiG5^{)zq-K{+KwRqW)h5yMB?0m??ltTmbsqv=9p?Bbg|<#{J{ zDd(Mjm<;_EfEy&Tm-_7>pW5s>rS-dgv~$itABg)-gVPPBF+evMqKWS+DcebO^oVqTe`ug~ucyXq4 z)*7=`5401B@J~3(=Kmy7AO`>4R_|h!mfBv z)?{8Ia)Og0Tgl2gg|ew?X$OX6bV|pmSk>)q#|XA?Co2_wzC_Cuq*~ zH>yN0eqe{A24%(K1+)CPL(BTcX;0P=;10*rbj!qPZ7=ZSk^B2$AiFJ;(MHf6MvycNf6o&zqwlwhn*;aiRZ;9Mr<5~(A*?Bj+oZuWOKKS0Tm zsbU>gT~H4n$7fxZ6{~x*=nDX2jb?Fl0k#^*7$X@!;|3^ohW@gmiWIvwrvvlcV@M+D zZ_4;Eyf3rA$!fV($>djr(GF1TeHKG54*Cpy&hy&pQ}OwJeR~8>9J@{xum(gOb{JAk zz(LRrRrXWfHJ~COc**o>z>`ZlE@tTaOCL%EN{GAt+KXIG+{rS+wWQO$cs<-^vvM}| zw~e8nbrS|0$Ph`%!0XR^`8XsYo}V(*SE%DZUtyaPW_o58zwd4;~78uQoM;7(?OZ1we#Xv&s^{2vzdT8zr zbMD*f6 zglL4Y3YeD#3fDE3P=ql(Pl?=RXH$KNhOVYdf(;_td~(^@-=c zncR7WOnF<+8F|_8PEowbELn`sTaR+7Ju*Plfse-iCF|;q!To>o?~fim&i=rE0?e$ZJQ6N&$+GplNzb7 zCXi6nOCfZOpQ*+n)54Au-6-4?rv2LFd@*j>cu62Y7A#&{O1=?GaBZI;wXbmZ-D1Hp z_j@fHFDZ}!)*W9d2j4hpXra5sE^KnBtrd7 zR{rezUk>>{I$jptCE5F{uKr`%_pN451%z4G{0L5!7r-Ho>R{6hx}^}F?+VbX!^EPP zdfq+x?72Pji+E!`$u;P-;Gr1F>JQciM2(#EAd?rJ!3>9Q(YOXr5)E^3xArtk4p7o(phAfbFKSva0PH&lW5eT zTez9xU$E>dnOZ>c5Mef&S@OEw)FDUmtG2z%=| zjtTJ@Utig5c!cR{ie0%_f~PytYSbHdwDx>kPFPQh-2w!)d5~t3MY%48hCj$kXw@ZxQvm~cRZytFk3R@&JSu6wX zkCW&sdlr>%t~v2;j<^es=~*boG}B6RY+M^*5LzuWBJy1(iMVLMV(PUgUXaqV9d7>ti3{*X(6s?e z-4?n|^jyifqi{yxwL9b9OI@crPW0}{JfnC<@U=lpJ*UlwI(HuzfJXuM0qngwe&Fc9 z(SutVe_TM^hp_kO=*H2Fq!&vj9*s%3mMyGOf@+ZLLVaCfdaO5b{uMp_@9Dy&Pi~R< zm@ey&P$c3)bF1ew_U|-GN3dHCzo$$B)Gv1qz}#2+(Yo zPf+y|Q|cWs2jo?e6kWrT0-;z{CNY{?BpQlZm@W(nU^`G%K4cW1n`NfvIiwjHJfLHj zLY&FzDyWE4Zp3Q4Qm=4V03vfu0za{nChIqW2D%aC5Gz~)7T*_~0{H5a3;5ch#X)xQ zX@QF_^MP|3?#2h0*IhV-a~VOehxtA0gmrycxXsISV(|Fp%cn9?sX13AcUHlkO^ujt z2Ry1w+N#*ikIP-X%)Iw4X)|X2yxL&C{AG9nH28Uc^_$hz&kCt`kl-H!xt=L;*SwxC zqerQYf6 zIeRr^QF=;s2XDr6@lieC(xd+-XEBy`%(wG}@bTZNDu6&VA89H7yv@?TX769ef7o;`ajU-(h~Q>-`I64({K5`L8P9-%)-~L;nlK0_P8u-;>gR2lzda?k|9o4_)^^ z(&~OE{XJ>vFVdC|KhZDJ-&2@=NBBKU=r4pYvOf_1t!MvT!~PxR_iT~BP=+Y}K>3$( z_p9*z58Li{gx~#Je<7%G{2k#hZ`bb#znheQA&~R_9YOyu1M_!?->s~_AinVZ4dVaD z=K3A+cf;i`L{GWTV85%{t5PX z#rk*Z|J3aMq6Gjz5(WVHFFo&f^8Z|u|4yDH`Zx0bS)t`VgML^8zY0B&0f0aO01PF5 Go&A3%_(a|S literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/chartsheet.xlsx b/pandas/tests/io/data/excel/chartsheet.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c8d5e7afb3d07d8fe30779134bcec5054c3891b5 GIT binary patch literal 20069 zcmeIagL`Gmwmux&wmY_McWhf78y(xWZQDsl9otsNPCC|?-us@j_vw9p-#>8gdY-Jg z=Bzp1%&Hn=)?mqf1_nU^00V#k001BakV>91tN;W6&;SPjKn8#S(iF0>b~Ltj)Kzk` zHFnUZb+xi2$O8c)&jA4Xxc~n?|BFYUA#qfuj~+qfUi?Mq2aV)f9X|xiS>87!N_pOI zOS7b9hMIWyxA#(O;1G?{x%%jII~pi1bvA#W0!43D|as3ldc-kvmLwh+ZP<2q2DkNhYpM&2mH zU*zaA$dcYGMlKo$+;Ufkf~89-CL#cyE{_P?qwsO&X*syD{9 z(18FsHGp7Yz3U{Ud|KuOv4?t0COfuUhm)rBHK>`bg6R%l zggy=6CT#%uhES<+_+uK^mHbNVhpP20A-CEJn#;4kJ-1D$Oyw8uBr^-C6;Js~ZfC6* zhb7dablh(FXgkxltoemqd<;ggg}^xwBn>!db5^VSsLXQ;UZ-!d&2UH5mzVCq?IyZn z#Zdv@f|2gd-sD!-kdJU$AEI-4OHoRIE%9W(zma|L=leS_fZTrwd80Bt@z0MIsSg>3 z{t$9qdt*xnI@({~|0mr4izWJRU9X6fmhGd53Azw}2_CwiUynf${Olqi-cG3GB7p1Wrk6pCoEm zvE7g8JbycXmntUdM(*4hO&rMu9kEzHue?2BoZJT^{6i?kR@4!2meve>@ zQ3ME9h0aQisdt4lLx74E049-N6qIi^@392aWW>rXP4oM|-;G~!+~cL+(6tw-WIVP!YDt zEk1V;!Jbd5iD{@&VA|C95PHu%Mg;JyCJTtk_Ujpm+b1!8oQhGA zBFLCi19e`Uxe3`C4>D zU>bd8UzUr?kI})oHf;+DKH-_y0ufJM+A?af(c@x5eh;w_a194luTk-Vzi;zebRfRV zGuX1-OM2mvliAYq?dGRyV28hI>+Z=@0MijWCh~Y$@P8SEvU&U*iLW?g19E~Foe|21 z=l{MXo$%^eBkYyj^N1aMRnIff0Hq7 zL*WrdS|&#knyWxg?pUO>k(2)wk;J zZ~4P-#U?t>goz{1r&)1U@B82!O3v^NG)t@sG=2B{6QkUsNuWkeyT`yZ!k|%tRe*%E zYq|lv%KVrdHPK~TT2rQVy%s64SW*(q3cB$ag#ue`8Zks5k2Qq&T{h+rk4fn6wc$Ii zBqjI^ICKhnoOhC3Y2Nbsv;bc-;)jDY!-DPMz0?UPuGpVsbOr{eFFit72WKYJf3!?& zn+?hI<^f=u5D*W^Xx$o|_9UY86Rd{t5ZAB}*W--e0W03*F0J&+jE`N_F@MHU*Lqxm zc#j@+;Ktd0;fZ!b$xpc{z=(hR(ya{x{ z?07?oUPhWE0{AM#-IwKvDt06sGQ6mBMA4cUv2< zK!tWL_Jy24G|1}9wZTn9bWlb>(?^*ShGZ2LXOE0OgKViwi{&7TG3 zG9c0DaCTs)AQ)>8^sLbWx4S~gT%PKf4-MTpT6AK zOyAzo!OYm$(Sh!d1H-QsAR?~EdiB>53490Vciwc$;>1lX6YIzn)8_+t=B#S8f@Tm3 zR>*u5)cI^wtONcd4x2(A|2=!>c-!*p9ncTvmAGQNUQD&X#wM#_Z*EojiPW2!g*q|l zPu`$|_zU=a6-s2o?Qq}feQK(HWZnxA1}NfU92wwz3rYtY7ROgG55$Hr;^ATJd`KWk zrB~4OB1Cb722K~Hdhk0o`}PDKkdw@dVVjA!CdG7&mJ#d74i_uP1k$f_ztSxuHxWVU zTqzRw%!|Q?%r(0X!>4P(qJhY1>4BrSLv%A@*M_ch~=>-b#q(5m7T-(pT=Kz{Ml zyEaX{W&5&{yvq8)G?8wBe3$17mhmXFuGfZ9^s2~haJA|zn+Mqy+UuOH{rkWvdY+qt z{m-l-3RALzKD;Y>v~$7y8VbWsem*R9edHXv;C!A z`pgZ7Cb5g@F?I9>klv7Zc6JMd*NgyCmSM#u8PNf=Warz$+h^XY_m4UH=Nkb?y3R%) z765>Z4*&q;&l&vnF8Id;W@rvu6AvSHVqfw>aF1i?G2Dy9Fq^fqr0_0bxLWq9vMQS+ z5{Ci}Z|FU2zA_3S4N!_IFbLBr7}K998~q6Xlt1D1K#ZWRtDgvzw;}20r~}_{{m) zl{4K6t@-uUW403S89ZXGV&VRV#!K5f%?fYv^`*`0=*`+B0jmD_@mgPyDWhtWaqxKG zr1fyK8ZGHxcyF_97N-R8|+}u2QA8qa~-qSX8yRvmK z6N_csmQtdvzZP1;2qb$~y|J~jx9b=Ljw)tWJb!eV%kyh#V(J*}L>cI-`?B|X$J3(` z*ZYH}_aa>r&v_mh*#JLZl}a~y4}43(h^lG+#hVuF&&AteCvT>AkLK6G;}g2q>)A83 zc-L>VCthD)Up>3kzirV=o4!DY+~1tN>{xa_ef1mY<6(=wsYCtwWqPB>Mkj+N?34?_ zunsosW;Bp9-4n23V0LNmgyEHuwa-fDJn(VTi_YPacJFw6zy|;At&i(wSGVfz<^+D$ z#l*ymJAM7sf2Ey4<@$`R$#K{E9^rPIZuB<&>81I`$(tt^wX2~xnNaV|Se;tXBs+E) zw{EQ!C*$)1XnbPrgtqj!J_`q=#(jV@8+tCy;{6m7zZ!B<)hxpx0L;YkY31TMJyOcd z>g|4Z%#yEu!U;FhZsu*zL51MVlpJ?Nf!V{j3079jX;zq{Le-6Uh0JgPhBO2vC+e&| ziM$^PFX0t%X2J<9$+VxGy*-6-`sIy2$-r=@I@uAV1O@@cq8o_}S$ux|cTm{b#)ttE z$hemPqkE+W&b_a|q0JmvHjUB`=v0nsWLUXAK9ePxo=P&u zVK6XTkICrUm$SYl_y(ESJIkjhbk3lqo0(d;^tT6!*|`_C_^~2Pwvfc(P1ap9lTa1lI_g!Ar{U3~8b# z=<6OxhOS7(P;(Ii`_4=Qd_6*$ukCkB4sY`fAsfw(zMve?NeHP57}f%8?=b_QPn3Uz zt81m%1QFI^M}HmGtp$$xn=tGO(*RRpj^mD{Xn%iN$p;qY2L(MX@kE0>@!C(k8jKml z*jiH7h4q-o6Mo=`+F|0bP};98r43B2uOYI+gF%NMG@Kta-l?8oH+vD1J-pNR7e#nC%c zC!_{9``yP_xe^n~lKM_OP3A@}1$DHRBSkMC=_`W4oxf`L`KM}OF z9I@02cd5rLSPjrzrNq2tIw-9F2f;YZFM@CEdv$(_bbz98)P!M6%LrWQh+K=lU>KDK zVDAKQ&^`TQHT9^qOa&55LcmqPI4N_OASY1Xu@m3o6!mU&sCG=nkuwPS76IX+F8o+Q zp-}?Z>3@LXAPQ+39O@q`(>dr_2A5DB^}-KxMgNRnjI0O{z=?bWU(T``7%CMq-(${$ z2H|Oq`8l6ZkAwJG)8GqNSlkIfaV7=PTM9xxhi04yj2Q+ z66n&Jn+=j(ELUMm%h1R(a+O~Vi=dYo%R<$_YWtrFGmsWbAH0snCnOSKqQvW&>Wb(+{YNxxQNFKvtG4ALvQ zlpep=`-O~(6j{C_NdL0IIRj;dOaY-pekxeE>R$1 zXl_}AJJ|K+*3?M3lF~sLHOvqKg$D-r2rRjR6VXs8O^JotcFwn`jJk|Z9bA)~ z0DI!Qh{st-wRUjb+sWk6cg=x=FX#L#J%SPwd(5h?yBQ zg%stTd-*JMc{bv+JFrLXaLkuEOJxb7f+liI(4mXb=+kTkknkCoX!lGui$f@&1R5bCa(db~Xsm`wTH&N?{6* z!G4*UjA=aq2nSE3#kUlN1R~ok2?f*P6X;)yA?5D-_6y7r9A=o0FyDJpQYoboVwnmcCu)L}!P(hs{oMglA~ZniByga$rBhwo zn}$0&L;YJg9EW6GvJ-;Al~WRBr4e@BNNfu|0b@xzEDXY@*6!J9FPDpaX^E|FK9?CFX{cO1gCorrQK^WR}A ztE8?MLA()4o9fx@5@n-qQQ^>WVQ-Klx5q-c)D!oOw#XZT=TDnTWSUN%biGjGz|*EI zR2BB6=f)v2-=GqXRrO)cT~)+Q+X<3Z0GDOcRY6mR&j{I8LzZ?j)7qL1C%;CDp*`I3 z66V`LRgRj?>}j^5O%$7(9-SuYrpg;K{lGrDt17YQ=G0!}31cZ_k23_yrw{c;X+;o6 zZhIw`Wm!Q5?|Usy9Pp*cTQ0DD^OnV z#GHo@xFvp25=gi2gp`-quZ`?1%h3sx`D#715ct{2sew$=*%`I$0o?rK$q-sXAQdkgCx&YUwI*!qvx& z<8Ue|Sd7;h8P)JNfLVuYt5yLocBr@sF^OIeprh0&40qIb| zOWgR#muV3*io$Dg^zo~@AJrTvli8!DYKd&PpdQ*_`&oQK&EZm`>7b4(8O>GC4@nlO z0-7-%-S;iWnAT3TD1nO{ZM@RX55UK997LI}8&dul^0UO!8bP7;My&x9R3c{GL@*Xd z9CnmP1W?<{KOf4!!61Q7g+s>e5V;Q;#SF8jUEr{i;`OOxZ+JtA$Tar;CJcBrPuVXm zPjN-K!yH|af0b87+U&@6 z(whVbIMnU;E6Q2-j-O9wyDy6G&5uvB*ZL0ji<@&UjKBYvL zpE*0dzpT>eh+kasRvmBoTdS0x-O%l6Z&y!Nx9Gm$|2MO&2OXhH=EH^ao#0n!@ehZp zqnWXlG2I{Ef0)##>f_3Rh9qe?!k;D^Um~vDBT2WE)NqCk7kle0F22w5`g-Gd zHPe~$RV)R%H!exHOYWK{BVs9qWH!m$f}Nb%LsmQ?0?`=K`VpG?<@0K`mp!7@FvQ>z z@T0J0ZW6u($0{Y-07Ou1%Qq8t-%ZkpLGfPVY_vffT>p&}G^#>{N(|z-2OU2HId@M+ zLgG`{L`iS=(hk{ehFC$^2=YzUvf59%wYzsxWlq5~Iml)P(#@L}4vM4+n$D0sE{ zf-_BuO-~kcP;@o>7C)cq$Tk18kQ0`$ZwaCeY&3UiT|qVKJ&Z!$fbkdTjH8&8kAW>e zRZx^tBVyarh&X5D1@HkRNx-QS_>!Wz|P0!l{ z*=XFARUF)uOTy6F%c^a|3(|0XU%2NhK)%d=dcpoUe#ArPlJn6aARSGD<6ftlv))x4 zweO%E+}#$-ZTMHMBvGl=qP3uFh7ryfE}UCsX``2ywpmi^@i(oZ?V(e#7U#5*6#(}K zOgXAM3d@l-#`~~gNTEgSJu`~kwL%fNl?`j=>bm$ED7ULx+PWChjGgz{~M9-wht znZ*KyeKG`<@%>eTRgI~Qou4Zjc_`v$!6TONx}-MyUWU@w%qTeNEf%0X{2$@76pNsT z^l|Oo5+{)m>ZZwG(at@zZK=F$>Ie07H+asvKrlVbF$wOP@5T}2E&RCkgCL+_(AWJ! zP~WtKGs`tB&lA#@(plkwQAAZbk>h_v4orsDk-fN#;JE-gfPqG9UOgvprjr{V2>aGU z-s4kSd^O8j`(_(g;jrMQ?4UFM@cvw8GONQ|Z9HL-dZY3E_U=}|Jhj8acZBrf`Y=nj zqjrypFyVX@bf8_(#S^4sIs%-&WL3V6tkoQFg6JWQC53t|ftY63j+Bp#aV2^RMnlMW z{aZ$u)UA>$6KRfNL9i1QYW7s+v&@ZabmXjzX=_DWS}8PW+@mXnR-{m}WYM7p&&5XN z+=uw!9ODye^y|ijHZ3&T^{Sw4>Uz+;MWQhN#^h6ulKHs_Wj} zmPqGbu)vBykO@x0fgx&fLuyAv71Ttk=+}6r(o{e>13D&xXh+56Haj<4Moz-`i4y=NM&M4O+47nT{jj5iLZB$Xa|!NfoF1xvU=>%^{j8 ziJ2`Pkq(9^x-S>lpG`GWfpRidD96WdobI6(q|zy`A`x z(Vf7Y2W*H1YHD;;;8CGI;r{l=;|#v(HK?(Bk1jiUBW8;oVKW>FQN*0#a?W`vMp?Zq zp;cqkvQ+pOvdd{bxhJ&TDP_QSowDm}`12zUU|;r4iv}o<=N#jYg1LUcmR}3a)ln8& zzsk_qS6R@u!XeGONp@_{d4>b`(9>j(F-og<$n=HP*UPUqFi}Im$j-4oSUHKZ^ITPt z@eO5;X*lsypGamtJdLb%%$tC__;P=>^=2i&u|?*zDjS zqj~qDumoS1T%t+9((gu4@>j@3Y$^|Q#!wl3t~2~)Y6|4asE<0 zxJY}xyGJt~h!`F&r8|b+xc#)#rM2u~cY#Qd`P?{?qi*GF9%$0YLk`j@lRtI3z!|7h zsd)kWb-iX9hx@XV7#412-dgU24XV+;Srfwild)I6dI@-;g>JFK-tP-l^)@ZcgY*! z>3qa_mm0EZbY@QO@OuvKX}Q5@Xo*e)iRr}mw`Un=9URW4X|GV3gIR~kbq9r+bQ!1y z!|(Q$j5ykX7!1nCVdc_6-MpwhFvKS%WhGN##;%jpUBjE*pDS0P?^=w<2z}&O`!o;I zKna`H7$wdUtyBEy8d>8wm(pNsBlR2V-eOQl;QAM^T!bf&phD?lVlZMQ|MU?dag;6q`mX!x{J~|6Uz88<$ghuHK<=}8v<=- zfa(skHKZ6b$&e4Vl;i)}U_;R9CxNZ^)Z#5{1G6q>$H>Y56yVmjP~V!(q0S?h%-D<1#>PxSuvfc`s ztNQJ+e39)S0hCj|Hm#|bCzIFY!P46R)yCK(?5WpfxcNJcz)a=-yW`aizgHBD} z!ZpZygNAQR)kOqntapz(MXR19g=|zgM#?(5T$urW_wsJ>5YV;n_F}V~J21Sl1_%$* zf&5NxXLmVyd)g#tbCP@t&mnto_R3l8TQbUUi2L=3 zDG`Z0GdFvdN1d$}W6!*NdA^__T?oi(H;O=%WXw&rz#H~|V`ta%ap>$1HtzlSL-`+e z{s_|;8!0;4f3Y_GgR6z31fY6v;rdp~gp+~Tj6pMT z2=6iQgx=<1YxD8A|CCSF@F_Bg5BZb>`FGU9$X?&&S6}G<=f@v9az?_mbvivt&;{8U zF7+nzH^P9NICH|>G9~HGCA@DRi<3U~vP@dpar?zm4CQC{+S?!;%ZUqzw+nW*X0qvg z;>9Axx^Gw1y&<2Aj%CiFUv9)ujW94j8%w)?a`Vk@?P)%#*0q*toX8$iAx4NH4PFFl zUVwAMz*?{u7?*<8>b1^q$U)P&<*i`x1o|=^OcFLT1(P$P%MMcDwi#>8w6NjS6kh$V7`b2&AEk-UmsTmlzTC+Dc3?(5^C3u2r6 zC2n+-pz9grI310L+-b9b!uk=Q=tQ{;(xRKrlVP}QB-(_4*=bl=^%N86TvXA)Vc~>? zA`7iIRP)?SY`-@=gM*PT%%~?bp?)^Tmi!l_hvH{ugLO)01g(`Tg)bY=NAy-~7!i)= z`*NF$`Lo_mryO0W+ca$;ELW#oQ_f%B9lX|cJH45&c#8{>Ck){I2^KWtZ zGipiq$Kg-wN>kfrbqLjyZt6V;%8iO-@;(^RU4Qf{&&(OwvhI0`O(V~W1*P;;Y5#=5 zyZdBJT5f?WsbrIgCIM|)VhQHN7{~oatAp#~OM9>~!`JM^@jA-M?~an+^E)zB!;Rfs zSl=GcZu=v|q|+0_D@sOQEMHRCb}uI0Uy2WIW*FBpep%mFnm>)_s!Fyt=)5r;RrL5AuT0xd_juqaajy8H%t9keE!g0>iUhOk@Sp4!a3=`?~R~cb|Kv#tT z2%ZKqOG&EKNnx`^p48V~0txiZEAjN!Y=I{grUJteYeMBL!M^I zRLc=xt83)Lq#2g_g7wdulvZiZyS?Js&myW=)D_HXWtXWNzyRJ3K+-g3M9 zq#Ip5w4;y9oC>1SZePBtvz?7re9<*!IZcVmRBf!joV1enWXC=b0bJGSZCi87(U`{{ z%!hYs;>Bj`wyxvEkFK+i8r93Gd`(slznIexN@Y321~P|%($SaL`SW4grjfJ`b!Iv8l9=;~(t!Q; z)^Qhm_k^j)dfy7~RF!nC`l}qxlZ7PGT2=Oi$?fr7Q}@`bF~OP$xC=VJJH3xXpZy$f z)s1FRy4_^XlGm54_vLJG~8@R|;EZtZ^|9{W`s7$+i79guN{X9SGe9FaxR{ zRF!+wI=Ic}<)X@sHab1NDog==txdFw`bljkYVP&~aRwTtM#P zY0Aot_k7>DZrb1hFY2G{R8Z%$o_AF^V$!l{(c#?}wC2M-KKDCQo(WwD+{!*yK0CU< z?ek7*J|&wbPnKwExS%@IkMb8-)LNTDaGfo1JgZl%A_g5lW@mTtc{}oKx=dBnq8z|( z!@VY3x4?a{CC!`&hu=E!gG9>W+OnHAEw6>*vPTraqWVj&og?DRlMtU$9e76z(>EoAHbx*(L0Ve0C(;ZAf6(6{9Ajj7t)^fDR$9oPzXBEfs zw#usZC&-Qh%|u{Ne7!pO%c*Z}$^KbVTz6n_L8R2oM<$O-$ara&OZ-I4h_$4Q%pXB7 zN@n?5QcmXST2fMGhL0E+bFKWL)=&E-k=T4ZZO%dnbuRBKT0ASx$UJGDHAk#KZ0u8K z`&sghIbfM$`ER9S88Z1HitrqnqJ+hG4xEt&GCYTlSfH71Q~8JV@SLAh8pKkiYnjg8 z25e3Q#L(OC(Y4`kaBA4W#~i-x;LuM3hC6)Y2B)3?{Oh9aq`=%$%Yn}z$5s|FUbEx5 zz=7T813Y6qu~hd2Hyq=0`F*>DRzs1e zOL;tFR55k81Z5oKJ$cb|ZGRTlHe^cpwzM!F?2Gts#WPfU9CFXr`j(E_N?AxO%Fp@2 zxJqt8b4nL3p(a!#$VEjdqnF8QU2reb_dMKP9%qpA&pHWeU3V|X6g>8iE3=jGtgcAf zU3aG^`OAx0o}SFlEA`dUCc5r-x6`Kc5y5rA&=hwM_ZK1=GVJ%-sjLg&B-pFG|!Xe z=I7HBSE;5f&-Tf#rfIV;JkNJNuk3US^)~QJWYZwMU+>O*F9!YNZckS|?-W1a{&fJp z+Z`Q5bWNvQ27ZEHgwHgM5qP50dA~9rF%(p8bu5GWQS#nZwiBWvh(U`|A7DqQBeg>| zCJ+Xw3Ka=jtYR;bFUo#MD+NW18fTIRGmtX<({M_koh1mfGF3qsIxm0Qamu)5UyI5j zz;_hOB{QO*r9q?jMf|J$ ze|$C@iAW5v_d!eM?^up2EeNo%`jjXrKJKV>j^W5MB_;qluZFUhG%$!6z$#JNe_W}Q z#$~dLG@%C=XWLssuLcualh!g;M?Byw`UO_QAu@}V_6mj-V2Yo_!Nde5cmHMvF%?Rm z!ct68q&49XcYe;f!pDwyn^D6I(w)h!)FS4lB@s=FXxD$+7PWbjfF+h9c+pH|t4P>^ z(sGRtoNG(VhX{ieI5(`8+*%-_mpuT{xX@>woINwte#99>)tDmwQbtm1(ACnIE7pff zl7;`jv5ke1wRRPfhyk%lIJD5MN+(W9L9daHfwLt%;c7X;F3&7ee`vX{& z0S;JuG{-NJHSmbA*@|IDu0<$6XbNm0ut46$a)2;I3-iux_my0w0%kk`_BF}EZaQv1Rsw4`qC=zSw9U_G$YLu&!z(&qFwNJu>- zG4gPLYGUFrN#oW%98*F{frKt0h!8bHQ6>00^rcXcSdQ)>DzEf{$juewe>O4nAeKSv z(3AEP7P*<1!*H&Fw6X<1fCdLR5az}IFymE_ioxo2%tS;w^??fiEIpGB(FW)p1N*KJ zSKD9=3TOjFOe#e$3PY$sSaee24=P=KLQ+eu2KAHN!j;%$EeGS z%m`SCcnb+^Fp75oa$$QRKpSC#+83O6(7zI*X`9pTWl~`P_dIF97b3~mQl4ReAxIBI z9c~`7DqxfU8!Ap9R?qtW_tGB}lQ2+(ZTjJ40YrF=mQf=lV$eyxz_>`jmPK)D&B6!M z(~Y3p2r4!)Ca)m3qUM4369=|HbIlx@V8oVM6Mp5d z_Fv9}M&eB-s!1pZnYSadd+@t8K>z*(P$Ch9Mh((SCnZb>)yGg9NoMFWMT)pX&vEAL z(N}}p2dZEJ&FVLZ9I_Ii2$ml#RF1MN(37#H9;viukk3C7$)o*A?AHw%HDsau=70qO z@Fb*|jxxfYaNvft*K+<=G+#&rUWAQ=QA8%Tr+ic`B#_;uri6S+;+(=NApxNulL3gP zGbzXm=*2;RjGC7i5L9DZJ5&0dzHcx4CP9Rr8v)!0(U6(4i<5~f#30KRF{3t*j9g)otA{?3_;xv1K*f-Uqa2SIY71n)nNrW0l?=WTXKV3 z6ZT-T1=IqS1MLU@SAFesql{kql1lQ3Z~C~_6tR5l0)%_o0=6Q@Fnp{v#Fi;R#2SSr z>?PJRl6Kfg0=wP;)S3 zJ_SKol$zu+S2cn^YH6`QTk8mf1wQXCZAxIF9Qh+*P|tWUtMs0!AVVJOpX%-7^@r zCOqJ`1Dz1A@igi=Pmxhv2Ma{|0-&a1@OE{Rxef3pp$ua0)H?0ehsdAeVmoP zJ8Pzx@AtDAGfsgqJv6ifC@AH^(*Dt}ei(YZ3gLqOcKP%^Gwq13Dg}*ep!e8Ok=A?t zhxh7&+S`8ioh*U4meesEUj%_Z5f?4F&9I(7ut=cYzI}`)78DGWOq(J;mK?w zv-6}Is`xtlE+s&(?p|`(5xNAL7mLulE7TU=V=AlHdsdmo{wFrqPs1nGH!G<0UiR6e z}GMgWL~*WR=!f)=uCoMI{a$hm!pH_k!>!_Zv>2 zGq*@Lrz0EB!&OOp@XLK@qnSbBRfkSC0?U0T=9+n4v$FPyuk~f!)ydGRTx&v|s$k}y z!b+bgr!-L>cXrJ?`S%>$f+Y`j;Eac=4j(O`qK;-k2lre2c6ZGseQ%4Q`xRqOl)12u z`6s`x3d$)ds$!?~j2b`E4pKHdWv|nG7)=*i;}>4?l;_>l<(zkh;WG5w0AC@Iy)|!# z_|)glDQ(^zVqEeDi&-|<>3dJoxQy!o1YUFcq$LSHY+@58FX~{ zs+y;TTe-QTzj-9Az9re@N0#@$i|N=%cb_3t{-wyddR%w-_7lBg2v+gU3m7wptMP(Q1A{_b@J|AXKASlW^Sxa<^GpOm69pRUzIVkHEizdN zA}P=fKdeNnYV5Ys$A1)QB25D3#v(lIIw>VZaDUMl}I!o1a-=^>;)M& zF73u2>tpJsYX4$g3B}$x&X5V4Il4fw*6bU62G(^t|4rMyp5Cl2HIxvUv2ZJW3;*J~ zc%Y3cX6X_nHj_fIW-uyC_;ly(S|CTp`F3>IvMVy%K~dj0E@5Y{jMeCRU}n<+iS0-! zj4Fan0yC=&Q6`9P%8ZF%gBuYP(G3Ez9+6=w84wZB&K`A-8SVUMZIexZIXGE*4a(8= zr)*Gp6ympb0B5dl4DYUQLoGd*_Kt3$v6OtNN!bX9-;^k(^Brw%^fFe|$XuX-g}*S` zOWulOw$1}rUP>_5rt+;s18}a8a)~sMT@G+Vn6?JETO6R|$<(lps4i+oOyIMw$ci<* zSq=n(vBt1CxdK}cW{#6goN)t`yFjb0sv^a0&+EZF_ZgE&27FFAb8o# zY2cG<1}zW=F{Y_Fh25J>Kl(5^b{4#i}`-)3zQ^1v%Al% zXt@y5lea3xbAxKJ5X-8xr7?swbu5kg;(m&~#KUO``kCEgXqPY(>PGX84y1B<@aq*5 z`L?aNEB^4NSiwYSk{)$A3m^?ileqhm;3#XU>ZI}59ns=^lt>j+FKCtnYwUaZp}xE& z#Wwm5Zk`zGXGy^_UZDEuXNa+mAks{E#5bbOoG7KtYA2}{X0kaKbEbm| zPaBsd4bOa^m9yEdiN^!~SM~n4Gkqzp{@gsQjFZZI2rH#GAAMdk*cmeBQ?)GY= zM*uq{f=Z&^qLVOCG7M=;gW)@Z)k0hM3^_9)LZlw9=acS7ZxXs`Rq?bnQk+I{IMLOI zEsKSqnsWC0B`9EC7ARb|I6@J|i~=Qc*WE46Wg5D=j;$p>Xr91Vy)FDe=icv#aT&f_ ziuS9}eTU-1lGgm{Nf?MLY1@d6dZFg#Nv4*K0cEnpybYi4axB0?dV?~qf;AZO#zh+* zLz*zAM`XgO*8LR`P?pmz(3QMXWz8pGN2s$V`rgxmLNq6z2WImZ6td*)JZI%)Bf3TL zqOxT%x>L5?+Bd!KppHZD7Ki%oqSG9IY+f{S{?}j|YIPZR=f^lveYBGNeZ2lGBKfmG z>910fKgyE=qQ`$lD*`V3E)edlXTBoD7U>9IQr-e489n=3A=OV&VIG`E10i!n6T1-alY)8v+FCNfY%n6b2Jb6U#J(_9 zjN&X=d-O#xi8p36d0`MzF{xG&a2%CO;UsUV#^)p zeF-eIxK?!fDqByZ2-x$i?Cg`6sr%&;SEcO=Aq_cq^nX$#4b%k@ih3)Ajq@|rIc8bf zbE2C>e1Yk_HalO6Uol-42$Tg&(3O&J#u8lLCrBG8{_$?P=#-yZ&lH_%`GS}`=0iwI z5R{b%MzIK69XI-ux};7{cW;F4zjodSd(C&|qb2&I^B;*&f0LCzyZ)C${*R7V#Pmq^ z(<21^n)ZF`*;4^w)^&e^Q{_c)h@%GBbfaD=gy*{=^xAN-Xr{h*Pd*23&%zSk*iUjz zdTn?pCbEV@^?}i&Cw<7|rL@ts3Zl?dP#Hnb?Yr2bs+!_Sa>6Wh8bmG^ou%h8#Zn=XQou4g1k1bz63vuQfY?cmv401J$yu#|H%u<&EN{Y zA45_35w}42pW)EAG!!z^|6=_s^lUl5D%r=661eT&;{xGe6VEXzKI`Wvn*)z9Q%mtf zE{@>oPP7*F#vOS`a>>Ji!QJZqLOZ>)(2f(#kh8Qd7iCXGYmqzRc2(N>l6_e4OncW?&J$ndiDd>lGE0LuP*o~MVFYaX}=Re za{T+N2yL8lUX0e*Vh@rQWHpGIAhkdeJyi0?@Z5?f;$Fh0$#s6 z;j`Rxs^?7ap29PRX98azyxe!%dZ>5zaRGP?a38?IhvOTL9vnTmwduzN#C-^d0FGW9 z{U~~|6ymYi#B15&8YQSE$sW|#Ri?*AGnZe{(|=Bve6nD-*vE9)e1swqKgubMYz*b> zZEPLr3~lU-WxsGuKr|`z`R4@y*wQSR? ztjt*Q?aZ5Aq(^lT=laoKCtlqq1}xnalz&1lBsNYUN<)P4hifE#P0n8&r_{+Aa;y4O zZ4S#}-ji!==a~rECv3I7yZ}}~*CXOvfQw8j=6(U&w-m}lejc|qC9t-jVuv5)91M#N z-UXb2#5f4WoxJ9-2588HG>u6y5W*~`c?&>6uav{@9{fe{Dtggh`Fzq>CzW0KNE4Ye zb5~|r0cPirx7D7RRXU!zo>z28_UifRsIVFmfUf=h51+7^_!z@p1W2uP~ zkCA)2jYnne(Mj#G3B+wtXN8W&cOw}y3jj0F)sp=#hKc5OkX$x!ygiiD)e6)4L%2|Z4X-bshPr*r8rv;|UTo&PGp9rPV+Pk~*mFv~X zri6~Hx?FEIuqD=T+%;hl;;*XoM@7adZC9GVpJ6+9Kigl$Dnlk(B;LYdIs5fn|9EyW zK=$+r0aFk`pk+Dr!BEK{^Yqav1Z~81;>33kO!&@ouWib__VsnnU&u>+aScjE?72#uwuA6$OYzgP;OMHzuyoD!>5zLb;#ObZr&ezY=U_fQj+%BvD2V`dO6CDvYy*%2RAt!(yQU;i$9n3-f&63ve9y@=JE;#hUQ_jviz~pI|Pk@S46&LdIdg<%vje(=d~K?>Bi% zaePxVd&Nw$pmt*D5v=|QsLjE5Zu?fmd~Lzk^*^q^%6pd_@r;}Mb-xfk{yRqn5Qyd@ zALYMqr}VGc``7WmY_25t`9A^vbE}qr5&k;veTc}P3dJ94#P5W^Z(s2jDb&Y8{M%+0 zzZ3swP5ob_004DRe-QuqkN;6&|2xj_wbXwh0ez&P{o9E6y|VgunBNO}|AOg<`*&ad zQQrGI%J2E-f1zmN{DJa&Hu~=Xzh}_>1+em=7K%Sm{$9-yZRQ_Nw^JCiorfcOCjKFa^Os!TzpM|4#j%D&1eS004l(0093`@O~%% l&o%k)@>28o%8kUkqI;Esr2}$Yh6iJCix^xwkR6s)N z?D~G^$m?}JXRm92`^U~a*K^Nr?zv~w6fvD!si9pn0_1N~SEz#JN zN>1D>Kz2owC~f^Hj1G1S|Kk3UOBZiGp|Sj@1EgSi4VW~SYrHDtaP))JaG|ez0tEld zfwJNx`i8}}C7QTogsZW(=eM|!W zc??373jB(}?PNsIaD49-sDI1I>R$3q8baVBWzsaJ_1!@#cvBqHMe@kk^mOvpmUUNW z^Fy4mcCt{QwY#q}i~l2~-4~P${HBm+2D#@W>*Q*RxOici0-3u=5Z+;+p{e~>m`ITS zu!LEtxx$>?_{^N0dA%GRauQTkkyyRi^NZ$MXi)enCs-svj^~nz6%pL5&E(Sb@>~<4 zvfKm+tlt?O8z28U|I|-R%#hwpiTxyXX)2mxO?zTs&v1W6Jk4H%HQ7Xw?tc9ztSE;! zL;(!ap`=8G%;bJ{L+BtUYQ;@774}TW7dcEZD(0z(+%PjBer|9ky_e+^Z9zgStSYIr z?}V%Y{bQt81n;FTmeN+*G)M3Eh&fL}uzKcSZDT+dX>dR8J~$qamgu*MO-yGY6*~~= z9of4%Gr?v&nW^(E=R15U;B{7~KBX>;|K&#lidtxcS9Z*M=-9kqT%DrxM>-T#S7un- zJ4hh>(f?8?g2F#?mk^IkT}qPVc5s zkZu{CSTK+U3HcET@;}#nBwtBu%)g~!Z2sm1a2Flm;umIL{rYjL8H~EDhm?Vb;DKIQ zY^7*OutmvB99trG(wt5xJMN=VwN#DT&N>T8AwIBv#-$yPN#32p%79?g4dU_T zlDvec+VpA%_RL#-1sS)cyOEJVm;c{%X8xqCZ6-B5|z(I0n$1L->tC{p*hZBsqyApEg; z%)B<{IiaB>uH=B4^||K=Cm!Ku=Q$R}?>Cp$y2a{Yp$tv7Q*X;PpsvDeTt83S1h`HA*j%!@8N^EjZ%sB{^T3Mf5m% z44WE_-bz?h@;v{@a0pg0$0~xxstoXvSyn?(*5PwTH^- z^jLm_g^B79yNyi3@VA>u(%p;iYiXF6hhyhY?2Fr{b{{jpqw*G=Kqmq)8NA?sv zB2Yd*JI779(nhVicJt=6AMJJ=bunq3+~|OPIVn8SwqL-U7L*m~a@_PaYs%3vh1Rot z8oXGzZj;q7ATf1yB=0lE+2m5To~-T9Ccqb&;wkM3*X>iUx3hZ^C*drz9ayvFXumq% z_;yjoW1+!Ur7@y%txcSOBJj->Y(6up@gP^f&cD)?S!dTfqBPH@b#hef| z8P^Sd9{UvMVK~JY!f=PyEPw~Sy>2zFP$hP{B}IS&qwcYh>rFfQV_ubt8M9bVXSzK? z0`XwB&n0rUt8y8BS$;28l)99jD#O?%g7`N?n+$Z~FM+3`r=SagizP1ob$U)Ac&M+3Cy)OeYld zO3YYcFC40KsH%zP_AX?T6umgEgk~4P9~`0=c6`~MUq*Z$Ye`+(xQiQD{(v8lv-0ova___`L*_wuTk0iab$%YQo!u1CkT-&?Q_?eK<02k> zCdP|RR)7x9i{8f0-rn=F7wtE6?0=>+_?cJOU^qQb#H5^5o#M2Mxy?h3_nau~yM!zeRY2dtPl9`8mEG93Y@om38P;f3#$ z`)kQCIf&43NPG=uoC)=a?c&|AgGn+268jkOdZ})`JDJQEtI|l!+zl8~h#r*$Fc!0| zGsH&D#~;IOGDnLh84V%|q^O+$A;bmWEfbvJ$?RzkiQYex+&`<`I#d+Bq$2WXJyzT0 ztzUE-ZQP*$wBWMOS#jn$eQrWt{)>|ZGUJWTkeuxN*9J=QA5OM0bA?)Jy1UvsS^uKu z*aQ$kFOVe6GeEZ47%m!_%$gs(R{6}(W?X2($c~rt;#f{>x5a4-vKW%GrI0RBDc9?q0OmaBQK60gq z-r}xzqz&CJNz&fuS`M8lLwUwcpvF`+1}L#Dw)x7cV2&g z(Xg$K-fCY>M1XsAwG0_B57jwp!3m>O@Y|W z$bM-;oU(kZu%0&_cpX6(yE`9fbjI>m)@<0~tM8DNx0BBW-csN*s5=1b^P2Z2h#40S zK~zd-hgw5+)cHpC72w(EW0aZ|hM4tEch%b5datT2W{GZ^7WlpZ4ieoMbG`kXe^Mjf zeC;mZJ={ab)6e3cC72oJSca=YUT6q;-gs|nb$ic;0Cr=sLhb4HIqSiGQ-*9s)ylFT z1_~L~Z>K&}$a}N>wM!#u2Gylen7G+Mq3&*czaAhIrF7^j!xq6L9eY(I?p2*T4Va6v zi0MV_sn!xW&jcNhIu&X3bYe2Y*{tUyNbN_yZKh>K8b2Dr7{iEJ;SAq;%hq zf?u@W>234Q7mw+az}mBf?F`sp>x;O;qEXO~3ZBcitG!F;-$`D%qgJfCsX(XC<~<9G7)8x5R_b>$7U>*xAe z?n~VQ!Dbs!ZFP(x^e>(I1Ax%oAWR75{6m|HElADsFJDdx# z9BL;`?{@!z;lsG|CfkD)Ad$JmyNg)x@hybj{w-=YVQRKn7GuUAF7DzK+fT@5JHAvI z3i6Gbg}^5xaYGM6EX&ecSzrk=h!0=JrHZ*Hz5`y1ND^2OClJ3}otffDHMD*3k*8=4 zc5>*5_L%ZRSMLjD$?&Q3nc%{bM1y_hiTc!pr_!U*t&IBN^Rjv&`uVq5>Jxznx@8?y z4XMRMNnV(mFPdayX#2k~fBYuQI6!3XhYmU#=xdTE46&IAjKfWXh#fsFFkd~I7Wj$I zDUgNN=@$Am0=sEv?^}Ib{v5%0q~4y6UFsH1OC2GTOKgz@+Eid~+s}cycfnCz@6T!D zeYZ$|-0z*eeJ~?kjIHlpDkH3MvNuSPFYB*}<(utd~bOiZzEyp(n zJV%$8jQB4n8F+Q1(hYR17+vwHL5~x~;#ji~U{|}Kni8=eMhTeK_>&?^`g(_*q(CmN z#9Y{kN8`_!{%|y)ptR!J*O7;FwTrmT5aswCY^&6;jF9?1Mnd_XmuD_H*=|p~NA|2* zT7$)Edd+2Tew(C`&hbS+R6@0UI8HUv?8%23+)NKr2(sSp(z-D)cyJOH#O%#!2F;Vz z;`HEQC}gR-ZR!|t1mP`p+DA`q5ObfYle~{jRAE%H^=PjqDY%U5Fi-9^x9|E2KYwbD z>TIoo&f}bkGD(ZHO$T)*gD!We+FQ}8 zQtZo3#}fsdJKdGX&qZ6)8pL9+yr7E&HUu7p#siqdA7%p#-*NzU_{2|fP-(g&X?9|T z?3vTps5GHtP@w%@qg~$|q1I^swrfkZKOL{{QBaGe-v&S)_2-)DPw(qH0Mz^O+cc0Z z?Ek!fZ(jZkaD6UCtqOkIG*T7)4)CAF!JqEer8jDL{cS-=kp4Bw{^@#MeWA4ZZ(GLu zzyA8aO8rmg>k+?h>%r?jPPHC&!2Iw*DgvJ{5Aj* nkpCtV{)}?{)%tfHbCFs3f2v+h5gP|}6h873h)lKYFR%Xr|F=qU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_equals.xlsx b/pandas/tests/io/data/excel/df_equals.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d65a92b10e2932a10438c1580972698bff213420 GIT binary patch literal 5595 zcmaJ_by(DGv!+2p8cC%Wkl2-4Km?>@>28o%8kUkqI;Esr2}$Yh6iJCix^xwkR6s)N z?D~G^$m?}JXRm92`^U~a*K^Nr?zv~w6fvD!si9pn0_1N~SEz#JN zN>1D>Kz2owC~f^Hj1G1S|Kk3UOBZiGp|Sj@1EgSi4VW~SYrHDtaP))JaG|ez0tEld zfwJNx`i8}}C7QTogsZW(=eM|!W zc??373jB(}?PNsIaD49-sDI1I>R$3q8baVBWzsaJ_1!@#cvBqHMe@kk^mOvpmUUNW z^Fy4mcCt{QwY#q}i~l2~-4~P${HBm+2D#@W>*Q*RxOici0-3u=5Z+;+p{e~>m`ITS zu!LEtxx$>?_{^N0dA%GRauQTkkyyRi^NZ$MXi)enCs-svj^~nz6%pL5&E(Sb@>~<4 zvfKm+tlt?O8z28U|I|-R%#hwpiTxyXX)2mxO?zTs&v1W6Jk4H%HQ7Xw?tc9ztSE;! zL;(!ap`=8G%;bJ{L+BtUYQ;@774}TW7dcEZD(0z(+%PjBer|9ky_e+^Z9zgStSYIr z?}V%Y{bQt81n;FTmeN+*G)M3Eh&fL}uzKcSZDT+dX>dR8J~$qamgu*MO-yGY6*~~= z9of4%Gr?v&nW^(E=R15U;B{7~KBX>;|K&#lidtxcS9Z*M=-9kqT%DrxM>-T#S7un- zJ4hh>(f?8?g2F#?mk^IkT}qPVc5s zkZu{CSTK+U3HcET@;}#nBwtBu%)g~!Z2sm1a2Flm;umIL{rYjL8H~EDhm?Vb;DKIQ zY^7*OutmvB99trG(wt5xJMN=VwN#DT&N>T8AwIBv#-$yPN#32p%79?g4dU_T zlDvec+VpA%_RL#-1sS)cyOEJVm;c{%X8xqCZ6-B5|z(I0n$1L->tC{p*hZBsqyApEg; z%)B<{IiaB>uH=B4^||K=Cm!Ku=Q$R}?>Cp$y2a{Yp$tv7Q*X;PpsvDeTt83S1h`HA*j%!@8N^EjZ%sB{^T3Mf5m% z44WE_-bz?h@;v{@a0pg0$0~xxstoXvSyn?(*5PwTH^- z^jLm_g^B79yNyi3@VA>u(%p;iYiXF6hhyhY?2Fr{b{{jpqw*G=Kqmq)8NA?sv zB2Yd*JI779(nhVicJt=6AMJJ=bunq3+~|OPIVn8SwqL-U7L*m~a@_PaYs%3vh1Rot z8oXGzZj;q7ATf1yB=0lE+2m5To~-T9Ccqb&;wkM3*X>iUx3hZ^C*drz9ayvFXumq% z_;yjoW1+!Ur7@y%txcSOBJj->Y(6up@gP^f&cD)?S!dTfqBPH@b#hef| z8P^Sd9{UvMVK~JY!f=PyEPw~Sy>2zFP$hP{B}IS&qwcYh>rFfQV_ubt8M9bVXSzK? z0`XwB&n0rUt8y8BS$;28l)99jD#O?%g7`N?n+$Z~FM+3`r=SagizP1ob$U)Ac&M+3Cy)OeYld zO3YYcFC40KsH%zP_AX?T6umgEgk~4P9~`0=c6`~MUq*Z$Ye`+(xQiQD{(v8lv-0ova___`L*_wuTk0iab$%YQo!u1CkT-&?Q_?eK<02k> zCdP|RR)7x9i{8f0-rn=F7wtE6?0=>+_?cJOU^qQb#H5^5o#M2Mxy?h3_nau~yM!zeRY2dtPl9`8mEG93Y@om38P;f3#$ z`)kQCIf&43NPG=uoC)=a?c&|AgGn+268jkOdZ})`JDJQEtI|l!+zl8~h#r*$Fc!0| zGsH&D#~;IOGDnLh84V%|q^O+$A;bmWEfbvJ$?RzkiQYex+&`<`I#d+Bq$2WXJyzT0 ztzUE-ZQP*$wBWMOS#jn$eQrWt{)>|ZGUJWTkeuxN*9J=QA5OM0bA?)Jy1UvsS^uKu z*aQ$kFOVe6GeEZ47%m!_%$gs(R{6}(W?X2($c~rt;#f{>x5a4-vKW%GrI0RBDc9?q0OmaBQK60gq z-r}xzqz&CJNz&fuS`M8lLwUwcpvF`+1}L#Dw)x7cV2&g z(Xg$K-fCY>M1XsAwG0_B57jwp!3m>O@Y|W z$bM-;oU(kZu%0&_cpX6(yE`9fbjI>m)@<0~tM8DNx0BBW-csN*s5=1b^P2Z2h#40S zK~zd-hgw5+)cHpC72w(EW0aZ|hM4tEch%b5datT2W{GZ^7WlpZ4ieoMbG`kXe^Mjf zeC;mZJ={ab)6e3cC72oJSca=YUT6q;-gs|nb$ic;0Cr=sLhb4HIqSiGQ-*9s)ylFT z1_~L~Z>K&}$a}N>wM!#u2Gylen7G+Mq3&*czaAhIrF7^j!xq6L9eY(I?p2*T4Va6v zi0MV_sn!xW&jcNhIu&X3bYe2Y*{tUyNbN_yZKh>K8b2Dr7{iEJ;SAq;%hq zf?u@W>234Q7mw+az}mBf?F`sp>x;O;qEXO~3ZBcitG!F;-$`D%qgJfCsX(XC<~<9G7)8x5R_b>$7U>*xAe z?n~VQ!Dbs!ZFP(x^e>(I1Ax%oAWR75{6m|HElADsFJDdx# z9BL;`?{@!z;lsG|CfkD)Ad$JmyNg)x@hybj{w-=YVQRKn7GuUAF7DzK+fT@5JHAvI z3i6Gbg}^5xaYGM6EX&ecSzrk=h!0=JrHZ*Hz5`y1ND^2OClJ3}otffDHMD*3k*8=4 zc5>*5_L%ZRSMLjD$?&Q3nc%{bM1y_hiTc!pr_!U*t&IBN^Rjv&`uVq5>Jxznx@8?y z4XMRMNnV(mFPdayX#2k~fBYuQI6!3XhYmU#=xdTE46&IAjKfWXh#fsFFkd~I7Wj$I zDUgNN=@$Am0=sEv?^}Ib{v5%0q~4y6UFsH1OC2GTOKgz@+Eid~+s}cycfnCz@6T!D zeYZ$|-0z*eeJ~?kjIHlpDkH3MvNuSPFYB*}<(utd~bOiZzEyp(n zJV%$8jQB4n8F+Q1(hYR17+vwHL5~x~;#ji~U{|}Kni8=eMhTeK_>&?^`g(_*q(CmN z#9Y{kN8`_!{%|y)ptR!J*O7;FwTrmT5aswCY^&6;jF9?1Mnd_XmuD_H*=|p~NA|2* zT7$)Edd+2Tew(C`&hbS+R6@0UI8HUv?8%23+)NKr2(sSp(z-D)cyJOH#O%#!2F;Vz z;`HEQC}gR-ZR!|t1mP`p+DA`q5ObfYle~{jRAE%H^=PjqDY%U5Fi-9^x9|E2KYwbD z>TIoo&f}bkGD(ZHO$T)*gD!We+FQ}8 zQtZo3#}fsdJKdGX&qZ6)8pL9+yr7E&HUu7p#siqdA7%p#-*NzU_{2|fP-(g&X?9|T z?3vTps5GHtP@w%@qg~$|q1I^swrfkZKOL{{QBaGe-v&S)_2-)DPw(qH0Mz^O+cc0Z z?Ek!fZ(jZkaD6UCtqOkIG*T7)4)CAF!JqEer8jDL{cS-=kp4Bw{^@#MeWA4ZZ(GLu zzyA8aO8rmg>k+?h>%r?jPPHC&!2Iw*DgvJ{5Aj* nkpCtV{)}?{)%tfHbCFs3f2v+h5gP|}6h873h)lKYFR%Xr|F=qU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_header_oob.xlsx b/pandas/tests/io/data/excel/df_header_oob.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1e26091cd2ace45a96e3ac00cb12d80252b304e8 GIT binary patch literal 5605 zcmaJ_1z1$;)&`{8A*30Q5(S1H5Tpd@ZjeTDXz8IrKtLs=k&=`iq(izD2Wf^zrMu$} z=iL9w_5Q~-&&2cWS!?g_UG=V|s&EUP6a^a_8)aN^SQF)j;30n-yI9)0a&caNmc)Ki zZU^CqZTUp9AMwwKV`t?xD9Sc4@X@t0IZ#(eUz)u~C^|eq zfLIiaBDC}((3)7xq4^#Cm(E^X0)yFpUAMtc)E%YRU0$ik?+?sN^yT`vy_U!Qv8SXk zjQZVt-2z2a{DsR?Ef1bci|~~Yz4a)7vz2!CxIcPhh!Vt(-$9O^#`_WPf(~OGgL{v> zVvcV!)sy~F5>%tCMA*7*R8lXneAf^ zHUdIGR&G8@ccHS1oA1bId5z^?>SvtyuaKxJU}1-*@TF`bK^Q_qK~eojn23=7@U-Xh zaCEUVb9A)h^n}<)YfQOJf{3A8I-dK4%})0~N%;JpvB?OQ+})s-GDel7}^}zT^SnOk=s%J@1A6YEk6}_dRIraPD6BrK~ z^qLGZU_uu9^djD6_5J zUzCHcg0uS7m&%7vDlkK7!%=(Rj8Dn)y)AkB{`K}a|I$w0$$(vUx>GW*|AG8j@eWT0 zeTy~?WfNi zB-ZaoO*}|@G9Z;PQxhCH!IWwB9z_21_KUW{IIB$W5@~`HM`4$TYbq*rf%S=|3JF6E zLF^CHfuR|pWXVij&luQDHY0l&4nZRh%6oUP!*)ranvWUmg(%s?eg*+YL)@cVIKSIE ziZimtwga)-$nOjt4`)9tQ;$vA^zW6691v#(<}uD@DX$FAP*G5vZv8242>y;6S2r(vOV{hD+0gs!00+@D?~%UUdSLU`om!qZI*XCh z+AKXJgxH0IwPI!b&7uQ`V6D^mUEmJk^jzyhm}3ZSjm_86;O8W^YTk0^Cr1~4#*eOS z7QMT3;)Cu#aB0)K^YPFg8-(X6t2-Tf=K=wj!gh(!4QI2e#p?#Y=)NT3N;HrBOcKSc zN~wzojfkJ%?e7al&u5!mq(qfp%STH++Pd=X7`BeO7!>@T~bc5hpc7>CHcgukFPC;Fr7gcmV!(9${e zVG!?R;`xX+99t)4k*1bwJnj`ICXiuccC$Ch2eQg8tyU;CXV({2XxiLcjsVKo@n_7R~TQQTMe+$msNxatV&n?+0tpbp=5t5^^Mw#t@9j{xSVH+?3^-n8I?iv{pw27=0 zd)$o{ZJ^vB$;Mf<8Sqk-qk5H3kQf^$Qb=l_ajgw|n5G@z{i$q%W?wY&ZXo~=UtIulZ3FtiIQGEyUYhUi*#dh zk=%T(dyKi5>8gM#QZ9@P98PE8OGthIlxXmlL>tCQf%+P1lvTgsmU=8Y70+k9z>d2o zVG0(NY)WHjmQakhEC6!yP%YyO>^9D`#wUFamKxf9DDhZ{;Ix;DvIPYRvUa2#LY7>< zwvbi0;`d55t=iQQU#j&Oiu~JEG6P+9#kb+%HXVFsRNSfI&PO#rQ@=tW2~-}%BjBmr z73j4#O5WCsWAG@a> zyH8g8D1UrWI@chIM;g$1?m$bCsh<>Js=!(W*=gT?lPyjyRP|MoptVoT#_(@kE1| zmkVTUW@VCnQ+?kd6k8PilpI;a0(rj+*XV1%x&)mFpKxFBT}-pS(DDJ>m-=iucf7J@o18Qn$sdc z8Th^_8RjPMon9ws+OJEZw=2hketBBzHx3q05?SITih#woT(}>_!6^NGISXq#B>_V$ zex&=pL{;%tArO|!&)uCbsAuDFEAZ8ZwmW9cfBL6gawD8~nDC+!uuv40&m{|21@JBP z=K#F{I!T}8homD&6+2nt6I$7L=@aHJy50$#NUrMuu`|LUZmozx`+uG`6MtcWx!KS}lA`kCKHdU_n1YUS?JNQ#(}OCWwbeKNSW;Cq%H3IK zp29=exvGgXB;a?)9>W_V!w(0-T5u4(L*sE6vm61&N^k7vqIRl)6Sqt$KFE&@9S3i% zr1=Ft4ydfhyE5s~H7B;vKxxNHD<)UZI>6OPV^4eDVlWyR6!)#!&~*~c0{nzuD+R4~ zF}uPsaHqpcIGq&e_5SGq_j_T*S%m{c3yOczF(Po!C4muFF4DVc=sfl0e543|5^&fk zVB=hT3i=@A|CJ?riZ|3`+{Z{u6+FJ|<0BZQf;|{;OP*{34fe>yu9TF^|HnvDT)x`@ zx|qqcNOPB*m)glQghheiF3&Ur$&B-{gcCA~?-eh38-jQn!p(b)P2vqr;~Qh#0!N5N zClVI#t?X=PiF3cWJvbgB$)g*s6=;VFyGBxs955;()NWo$|p9ki65d_=n!Ha0-DaCbOn9%K0UikXT& zqB--8VUFqM>ta45_vmen9IUYUZ;Q?p0k;kJQ;A2u=6Kb_h<}NF`5G zf=;VB;l-Doomm?COAS0e;Kuwy5Di!(d??GbJ=>KuYfY0$)EFCbXOAp`0JVDrhSelK z+uQc6M|p0T2dzdWl#)De@e4Jknj4%NCf1zjTFS{43K`SX**?|O{m_Ftbcp?R(gxp! zG571|tD1U4YNlCt-A$f|X&98lSEZdZ)HHsF*5?7pKypOYKmuqDuU0;f#A7B7^l(s8 zJ9X=>G{#Vaz*i(*P1?LxFBnWqtiMR8?>03lW;RNzwBs9FxoTzv0%+ypj&qoo$v#ED zg#hlb7#qp#(nPk$J3B3nBqiz{+W5rpx(!qf972J4Llj};!zokFW0Z2F$nlQRBIvEL}!{Y9}a$YhQ><*>}7z)evSWf#Y&U1|$o zIs{@ztFQbxsSR)~*T2Ml8QW`?Jy_+7UJdV%!pAJiMi2ee>5knLc(J)LPpLhajhM4$ zepOBnC~hPf54a=8rC+rN*5fp5d;RcfZm+y@;aG2d@P-;!|E?T74Rw%Aqr~7A%;COj zgKOJWx%n6Yp>dASJ5Ub+&Y%n38{T2{S7vkfxp=VlAt%eC%VLbQ<9GYYg5RkNc;I|9 zenz+Djpv9nRif&*e$KSFTazqZQihoEMY~2uW&6bYEArgTf27f*|E5FMrY@Ei8g4E& z4px7$BK{a9+%^z#=urma03xrR_?}HYu}B~tFxQ;gnuVK}!`mlkZgU|lA)mrQ)^5A&%EH@86X6IiU~gMx%M*Uo-pgZ57;^x2E+zRY6;U6l;FRxsV1 z7M!R4<3!iTOJRFck&b-$PUQQar-a+cs}_#tsxFRBu3YAhE|%A=2&&$X9I1#K_sHIC zMeMP?*HvOiU5${()(@8c+};I%qaC_7_>t&FIy<3ojnvvVwmWE3U}Dg}Ee!vO5WGK*~C z+cl#BMf($2qORE=YJW1NB@iwZAnYFDPJJYp%uSrUyn9|*@4x!wpw6k-ETHnF;&vOU z>a98}f}X7DRuR@3k2U7L1rXtd)541#;gr`)u^nS&uIFn_ydoXdebbVbHC*Yo=;=VL zLDIx71kYM8eZZgsdt?{qBaP^*+MmpKmz4F@A=-z>281U{R-!{DQdZw?pZUYaiSqs2 zs$&>q2YM{rc_vN1=xA^E&d?9U>KQ*Dcsf<@<%9K%kT*Ianm~}5W{%-}kGqQN+F=%> zCJHV{dcO3>cqG2*FjFU|Yk#G`R#8FbXXlo0B>QBwQlUJhMhZ>{SSd(KqCtznxhC^m zV?lYUhBW}TF);Y~^XY`2JrtG)7FV2X$e7mVzt<{P2Aeuqf_mOd%#~$J19Uz7(TfAn z+oL`thX#aGslUPDJg|60p&idshjm{)0g{;B`{X{Tzc$A&1r}n(Wras*qcKJsdN)WT zv(E`LR%~H4;3|}z%YRdWzWtsBoz<;z6Ld7IeRV>*P)Znv-_h$ct4=y8kUs9fo9*b! zsO{5+V0SHp8J$71oEcx5ntq4%$pu@J0Wsge$CINk^Yy5;1fq=EDVjRE{;p}keZE2K zNF02SX#I0&liWng{JE=}Bjg&aYPgbP5QqY6`|J_MPg(<2O?(qlzK84O9f>|Vk$k}) zZHlS&4rg@2ehgc{r{Ia_{u^-uDgL7guG#XupUme7h@IG28LiHVgJ;|~lml}JR)k8? z^SP}HF!z@3d230kTT*7av~Woy5#N}I`sD`U;lQ;|0< zPM^ii^aS#Fqfde6$L=%My|Es{*N1N2Es)$ng=|;PMbc_hFZ||8;9w;RMrjMK#+Vf1 zE5VET4rz^$^Y`yc+dEgO!Id-Xw_YqxB77N^o7du^MUwTH#b9a?f^?Y-lD8X`rOWSJ z*%0|m897Pys&T{Jz|_pNHZL2Tb25BBMr)8QZV7X6i+yR7nhhx!T6gpk1pk;%9G&hh z=_$`5lIa$$XQ<6CjP?eB>jIdyH&UB6KCKBV_#qL;!WM!RQcEDwFaF~wdF=rMDMtr4 zO9wY&O)n=)*Qb9mUClw2b|7iXR+)8eO54gO3b+H7SWJC~mjHGdN2xFR`1GYu);G)K zYFE9*h*qB#E+b$n#3KE}C*dru7#7bcfU2TvRv%5%1G=wve5o;&oudH?l4UMJMU8@j zAzET_@4>d*dM|aRJd%K-bgrQWH%tAkadQ{By(YEzEib zB?gm>w&w<)7?L&C=nL@)miSp1Jm*B7%BQCIS~vK@y(7-bnxvHOgqpPS8}y=l8q&?2 z)?Z*)Po|Kd!2ohY~#s zgl;~fM-{7?tlj3-1~ajM?5N44 z7<-@MTsvvuVGWT5(n%v2*J*-^MvC%lp>}hJbiGjfZ@IBv`(5$o65)Eq^h;QgxB6}2 z^tbP2SK{@-Xx&(P^5+sIw6qIiwcMo=?aJ-9qGMD7o|v5M34?r zq&F!MLAc@lmLK|F_1@o~znwK}&ScH=oPExj+560XbTqMXE&%`p0Ki0Pq27IG$q0S` z0C09-{sOo-xWG_82p9x`aCU$~P!6te0Z+IMzbgdkfaG^Yz~DBnP!AUv9L0}BxWgbe zNP8FzrSl6XhUULUk7+5p!fhSwJly|bLkbH(pir1I40Gw~F7Rh0({GRtE)Y8yQb5T8 z`J21H>k)~9pgjH`INCpW`uE;` zhq85cg`i;nQy&qo2oD5iJw5(1E5GaQ407%x0Rh23j}vAv|1}a!%LM{=u!SK}{O&fk z{R#c>U_oHWmdt(Fz^H-%F0MOwPrYOM%j==WxB_2ND5qPZ6AX283RlxV%$KT(%tTi| z-GI=)iLA_TGZ2OHaP+9#*R1eLHS*WzPDqW9*77o?Fzh%6=cZq6@Pc2Jv`oBzO`OE} zZB-Vxcgn7eb`xvcu!NgBNoRF50U=|QlEeOj{}suBIYL__UDR4etPh%5HgZa;Afu89 zQmfbepq4rAo1$IEz!8pq3OCN|-gYALimYGo76v4KSi(Ri;~Kl5T><3Rdn2JE%6@ip!$^Uah|GiEzZZHhX9 zzlPJjvvRU?-sdK#Zt=dm0J#Ex^>tHs^`z-EFnw!R)Ejr*)G6?S7kkvE+<_h;JN2~hJ3c){3Oc7>nJG@pT+iU|E;GTc)X=GK|D&}H3< zN8C0x7seAUd^6iTwVV%TLTxMBzq`TNPh4q@6aqqt8``q!ZqJx0 z8V%C<#@QWw$&eP$TTWfL_t88+=WhK<4X0qW}}aw>2Dr| z@@MPJ?5^upiCJgwzpgV8<~2d0e4Js(vz4B@?czEi20Ur8WZ`~Uj+MYM2BPx|4=yoB zkKic}Su+ve&VrRb;*PCO+xxhsu3I_HCLKQ1PZl5Mzv+#h+)>wPc+n75P}61xOf(xm zc~f;Q6^D-|l^qd2d==rLJr_O@8m9&|6G!xFC`H#Q*KqRzO{tew2245PC`x1*{idJvy7j#1 z8|*pE4CZm}Fr}(&JGqK5k?N!*r7|3?T))QAy;Ph0ZD{4ga5FB713FF$Zk9xKhmIq8 z^Q&wacA&kPj-!+dovM?;uli8^1!BS^W;ZJ+DN5zPM2bdv)7JXpvOIgi{D9qE498WQ zsEBqA6sa%XIK4Q_PX8=8phQ_R>pGs#DjqC_9kx`Y`=G(K zpsji${q}gP6h9q>iBQ~JEk62q^*$3RUXT&AaN$80IL0tO$zI66&>rKqNf&V+eiyd& zOtQ=D=UVX-Q?0xAC}o!K7XiVkYj4X6iWKW{VEF7|keZxKJ*`+XkDI$lGy`hYa%*~% zQ#$5>aAEo5S2T!tGXAHR8{+%;Hj^7$hzamrKZMj>n|E^%Vwl+VUo;aozMjPWQY#Gm z>Sr30zDQf%M#Dt&@iHI>v6eG&Vj4Z=Mc=wLwPx;^bN*Ud)k zO{s>7wp1kUy;F)0zebT2q#jPfoJiLmu9b3h6Uxh4^3gk(@=~TDSCq^3S>XI;Nc)a* zaieX#!_{J=JI4-FO{>+VWd5RSlcga-8)E~!Xx4Vk>O5QdL?^M(RmpXnWIgEz&%?6A zvQdjUI&wluIAU%lG#QG-nrX2F;gn#x=E2J#&xZx1s`^?JV{smuV%gl`Biun2smq~& z!q5e)=`Om)(pUG!!sAD{?X61rx&llVW!0RfcUBt;Os}0@nyIz50{h)Fi)%Q^T0=WA z6en``2~^^~EXIEPE}4CI{mL6*BO%b8mYgg6ip}t-O@m9aoyeVd)vDAdd%JvHLZ_THjNrB=o@zSI z)L~A7@|yI{-gAoAdN3S>6NFC+=kS8}j5f?JWuwO#`u>3V(Ogrjc|aVyEcTxD9uQZF zyi&8)&-#>3ZFSr8&U%$MI4garpOK>Qy{sJA|B}mCINQB_+ClSO>I8e-wVj|rglnr@ zRI_$qT#HK1r720OkVU2izk!LimZK%Vukdm5oxsVh?_ZxZG0A75nO>&gb5u{1(2ZR2 zkA=H@!0Py7LzvDys!E~CcTcH4BYGj?DUSFzb`(v32Jd{Pl(lzulHfdF8#iG|0#Z8w zpK3JxL^Nnw76P0R*K7QTLRyFwahuEwD8DN4ot+B3({(;j2*SU7Zq1t~+YN6%FI(Pt+s_nF+H zK~tyGXyqU@!J23WsFydCo$y6O$~Tc)3XhT!r=)08pYn_NTz95)w-k@%V`wX4;Oh== z&;e4tIO*Fiv&O-M>FFh$-0TUr7!vJG!?)oq*!133f;kNF#G&DAi?3RSpf^mrbYH*o z90ko|Rc}L}so z+In*sQI5Uxa-{MVx>59CgIZ_TqR4j@o44{}6>x#uYr$Ha&JYn&HgiSPMN`7JE!~m% zLv!cjowsyR&+RV8Pi00?#AL+G<*pWpxd)fk@+Uu9WeFGmlDG7HOcq)dgk>;~858wyoeqa^P^*bFz z^CYsZ4Gj&5wWvIfl8(&I+N{zBhsNrpu!tUPXvZy~ZmE%vNsPQ+f6?ab+2N*QoTzmapLYGl?pEa~dt5b1e`%{LQ15cZ`<#S2 zTh%a|O{ZNf&QuMe*eXR-38SVDbY^FOp2@Lbss3Bn?n5dfiPN?oQak|}*StGK*%R;c z9cOkI4F_JBQooBSS{iaNVRrIMG<&GuDo(*wqf(r-(?-%jzW>^#%o=`lRJMBu>ny!W zv>#pN!Uq84dH=SJBF3aws4E93|4Zx3ud+8_qh`1onS5I8O&J&ZJ;`rRoM zsCoG-zt7Lr#mL7Ubxdq{(LH8o4Gk-Lq0UmpZuq3570c0%v*$XkKXinS2Sv?e8xk&@ z7!>YIUqD}}CjQNg>q}tK{%CSdfEj&pKbuBxWfRXwagD8&F3W-h3Surzx;+adf7iRB zMfY3iMkzp?TSqeF7 zQGLJ%@oXZS9M)>9tnVS47?uUa1*E{Sxm+^BQ`Q!2*g+pubPa?*KGe{09i|q6JAA&M z_>4VM z@1S67#Uhyo+U0)Pgq_@ky_P8_O0Z4SIHy{lbWOe;M1sHwNy%UEx>}bo*5AS)BFC0C zerPjka^#p;1|5Q0x#A` zj}&C{2V+->ujC9kb}6|MLkx$lM)LioJ1mfXwy;C>oyf+v z4f$nHii0Yi`$DoR{LT+ukT$o!p7=+lNDzK8wOLLDB)1Vx{SbS1ciDMUzE+ z3YqW6n%iGQyWs@K6QhF#p#>jJAXH?9Vc~T{p7D1Mti!>hQbqMg3^*}1COKnSgX{e9 z^j?T}hIa`msWwE*wQcYvnC#!x!}&(G^$>M-e6E|DhOE2C(x|-p6(0EQ$Jo{{-owD* zlXF<@Cst{@+bsUU(`6$3B*4d}al)X6RYMxV^cS%C_NN_}-jcp!xAS9utd{a1I818L zVCfPKXVSVEvfx_%5YAQ>JD9#i{nk2MuAer~kz7wKKGgwzIA`ftxy1~*UrqYzju2{* zDKUxg%5K96e)Ico!hBrmj<-n+w5Z1`d|~M_re@HRhp_(CpT8cHsz`28d&MH_#Svt zAUa@_+e>6_y!^S*+bx0*uGlr;JA{Rqc!!@~^36KX@Eg^Uh8cBiUta^af$OwK`Uk^q zh)-g>+pk?GE9#BwvB7G8hOCAacM4w%`Yb2Ss%)ec?>ZJba_X}D?kIgJSkmOMmJa8* zMkH3f!`U)up;JT;G7%DTS{Smo!B*;ed~mYa|Bblvvmd%>#9FeD|gF(I@#UTP2;l=yv!l5VoTw}2y zvqp~2xjYf{qtV0`^D?=BMqcE8AVXyW6#*#DQhH-35qV*<>+YA1nq9$WmnP&iI<4RQDGo9km9VJ|YCv+Hao^xa{U(mPj$4YY zihS%5LK=mlN2jH69A$B*JnW$B3oJz6Nb6bqB5A>QAb`Vd0bTl+uE274>Z!sj*p-0n5_y%@e1gquLWj!lnw;MPF9SFC9))GjeV%P0TeZ|kI$%qL`s*o8S>^-KL68-R98G1qu+l&D5FB5BDC6=BsVx_L6iuJwL_={z2V9^^B*?ylQ$ZgBnn*?^@Gc8sgEjF0U>NuPDm3ezazX9qSm8_Za|h zGswHMrRJiftP-X`F$wk&7g?zmK;Q@WNQ_(cM;uM61hERb!Q;l&(n(aq{N$FS=JZ5c z9aVQr^Z7hlIk&BsiuAaGx5^6k#2PM9yjiJpPdpj-7^p20U}RR@xZ{{G6^1M1?AyE3 z--W=tef&DC?e<4WQN6l`&lSwbg3OkjkO^^-_YjGoCw>EBiPT!`{9__E_BItKfMauA`J3;?pY96E+p0W{Uw&x-<5l&s z!5xCp4@hLL8?E}(5gnLVUP@=!U@gwB$D&{0O2-; zDUb3jH_SvVPj-_}2#h@AEPJdUDmTPn+JJl7K6V$ubd^BIVSe~{6Wh0H{W)&)e|A+7 zRnHOPn1GYY{3GD}C>1}a6J@2n008`RU;>VTy@!i69OB@N6hQrG@+07O;W}D3NQthT z)ip>#>MDAe^8n^JCBT$H0MSdAPcSDxotp-#+#KvY9Nbc3*G1)Ti74FCloq+63AWHw zHUaBdTUpxL+Ckx7HgIoGH>nPVoSb!jdV1R3;qeFn zz%mA@C>nT=Z+rxi1t|a(bVZ|S@l)+OxY6=io459RRG}6wchpyL?Ue?PhcBIW*7JSt z66As=rcT+)5+H?byT_t?wHOv%OsGAIBwp(ek&A0}@<~>8i?(2OCogr^wlY{iW-Kob zDnrlWR#3R5?%KrYhEogaGW)dbYLi27O!F~&Ce)}nRBfI z!gQPlzFYEsby8WcA`|QS;>hxE)@?f`CvuN#WWpH4CYiRf`*#LZtugdEKX6L7Ky+gyzXuIe5eP{#UoSsFg38)8`~XHF+Jp z`;*o#=pMS1(a6ywPlJ0J*zK=;xjPjMoG2n|FSpbao12N5Ik|AQWk{Z$))B r-YC zv5qFzg-d`x%ROi7b8bJxpMRsy?*IVK6q0i@z%>6Z@BG^TIT8Rk6L`)o8q@qY)Bh#+ z{F&@bemS>ZO!MDlKg&OV=J|Da*qCYjH_xwf(4U!p^+@(NOh3y*f95$8U(U_rH#}z} z`;Qd!|I1SJ8=01m@Z?I%D-p%TweMQ%MXR*XYJ%{*L!Z- z7?%IW4vgIN<01Zm{J9!`rmdV?G$t`&RL_5umVcvuE;5{Dv~%;s{kd-cZ`{xO_OlYi zxea67hMpHB{tf&&|Nc6_28v%d`9F|<%FpLr@?t(7{*6>&pQFcFTs}7+%&Pr1 XO6zFi;+@?_f%(BOL1*Lg+1396FmTk4 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xls b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xls new file mode 100644 index 0000000000000000000000000000000000000000..472ad75901286410980004e4b4df34cf4924a970 GIT binary patch literal 6144 zcmeHLO>9(E6h7~DrhU^wJ1svdNPSeHe*;okh%wlaQiK>%A{aL$cG}KBVQ8CL4X&Ib z#Em4Fnz(5~h}{qqe`10QG`4Z$0wbCbqj6DO_!C_W24hHQ$L~A$P5avPWv19@AU*AO z@11jh&b{ZHd(XY^>etni7d~2dUFOk~QXahdKS z4NQ4+r42(cVi=&!%K!%;zue33qGc^lwMY#-C;j*v5?9)#5BZ?H3TqcnYvD0|QN{H1 zpBKfnFVtpP{;UQH>5BD!{qy|hIbH%RwEubDbN^QWD}a?iB~S$*1V}Yd1FQm818ab_ zzCMz*y8aX_43)3FPH8_ z0}%&SEDg)$l~;J=>)C%D9`RvlLjB?=@D9rWeWNXr?soqx+NEmT^M>~i!v%zj7|5 z+z&gVA<@M8h|rT~6qLB7Bf zbUwUtDUG`$HuhXba?GQd63weKrJC1dLYlA2lxe;?6V7$2U!TF=_yQfdlMfzECWl(c zIj`4vkMkHg8W+5RT>w}rn6%K=0Vowek5ImS~y+okg<&GIy62;kt176kp0pkFc1 z^R`PxmQmqz>?}uyEkqG#96M5+xYXWY+&~<0Fqnfl;rxwIavMYEN5M#m9HNdRZ$lWP zPPelLK*1wM8IsM=WR9@toFj~b^>&s!Zw zqwl@tNOS$i-}4|E1$a?qeH`Giy%*p+r4!(xkp}o)ISDX&oCf%AIR{YEZ&T(C!h;Mk za$q1a*b{S~O}^6Gr*9LsX#wJZyfMO{X38vp;y@L0VL&h7>%*WN$8DS6A&wj7BGMYHmg}V--WLnY&xhL?#>A)P071MQ2 z8_MT;D4R5=GT=u&`1`y%3v$zv3}{)HT@Lw1vPlc~5NSjsUT8>RIVdk<{U`A1;kTcC z_zIL#n~*bv7f?Sq!PMZ%=Yd!3`kT=9iZ^6fc0)q9&LOE^pcXfu9e1$)t&lzB$zO_W NvHJIafBC=V{{yzBM6>_^ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsb b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsb new file mode 100755 index 0000000000000000000000000000000000000000..5052102c6655ddd9327f8312fd99c9ad45377b73 GIT binary patch literal 9132 zcmeHMWmJ@1yB@kj8dMsj8M>7aL_q11keGp?5s(JyP#T3Hl?4tkxgaC9z z9Vr`Y2e7rnV>K6Bu)Q9Kvy~-vDl#HdG5`_w{lCwD@eWk0bv|qY;#bpc<3+w@a@3*s zQyQd9memDd;F}uTBxHwKx!}-0>t*x^O3lQ_t)Z7oj2jIc!M#hro`fpiWSlQ$YHD~!$$fIWxBs*mN zpX24Z1T(8)Vz=F(wnJ=J+EvM&GtsCePgbNa4yrjtz&@uf`a-KGGUsRcqP}{|S%zPP zC_dU6&0-3v>^2ca)IIX~8n%wC?2sb|!|@5p{+WQ#8YD}RvoW89Mo&^LzKtI9Y}XF_ zLX?{MS%*h-X$xd)47;3kD{Wi@^2>@cv;|B-2f~YirloPYRPuAnyOQ3HpKH=lmf5>a z(1c}hjI97Cr565zKy_#*;2SflT#jegthXNG;s%#sIFTyCxA%}}LLA}!yT~*oMGu~P z=uNPwwCmqVo7c3>`hae)L-`pKcCd=^>`VOFXD^H(gGLqE0yTiSDS zeEgn#m>{`LQtc5;Lm3@q|TC_&hxB9CK--x%|?df9*V->XX)AM-t-eVZvaUG^p!EJ9vc7tXK0({$*Zsd!^`M#jaeg;$hS3g;3 z%M?e0CB7jf0DuC3j^J#``G>1G+1Oba+1OZc7@1lB<`=(Hosg@^8D9N11kc^*XmSk)8)Td&*6Ml-V&4|Nc>6Usm+xq?>t1*a zr&c>BY1k089no-#jE1dw0)TRMNHUude~>oK;9AigG2cVdr=0_s_={(vuwfd_Y>5FDqH5!erHBWT2Z=PFuM?d znFk*14Kt%34anRC1To+>5=T-H_e7DxMWTz~M~cqG&KEDI@x{QW;35Ndffkhka)M$g z%!UR^U}KQMvMB`0LEVP6D9oe9)eIEme#uS5^Abq!vjz&lPKHuQiO@BP%4ipHP(K-u z_m)r!@TNfoF%YRa0J-;gxVsG15HP&h3=rmNQ56G-lCj3T)(w)ibHud}7o}c#@6o*^ zPhsZqVK$agHVMh>f*K;yHR;Oa{oRGkK_F-slV_BFaDz82>q;R6@%SqP*aGgB za&tRyf9y7tAwc}1AVC0a$o8SdduenxDuAnsjC_KOTy30;9C}1Xk!c4k=Fa941ur8A zdHG79<%4PIn&@Q$*4Q56KcMEalxYa@>t8h&KdtIgK*V-};tbT!#fgOkthyNDrG+-k zy%P(uHUj-#i{K-{`Ucq44VV^mzi9DCK)BN7G8|a;Hpn#t@lofh=0yA|C)YM4?#6RC z?$~vE|lj)A6+J8-y8FInhxI=vSl>vRpn4s6Kgo#s^-Tf51 zR}af;!Asyy=qyaXU(b>1dIn`3Y^M01&ynlv`E)F<2$*?klmU2g>pR>mOYA6!%NfnRdrE1B>6|5DI)o z%}dJ_D8sw4*d-#}oJ6#gL8Rfr>_1Hq*5J2WTc`QpTvLdifh`uO_4Ra&RJ+l2x)Lkb&2umSU0dx)VO7^Lc8XJ&0`|6Squ-u8y(`uckG=Eijc zmPkVcI#{Oo%?2KW7StGFo2LUSxN!c&a&WN(UxK7Zb;==0;7j)^2z2C^mf%mFATr#h zt<<(fqAnuVFn z7fiSKDQl=v=Z;Cf=`0Ra=3-jnjCwKg1D9y-r?^t3`Ib_Iv$5s)xpE(I7d19>9u3Tp zOim;ihAB2h4Da_TINxs-Iaspw{=%K_LFe%R!hkJr_3hKtlQm3zG5q+vT$I`P3mi|o zN;_S_%ET6T*tY#@k-l#~FXW*D01MZ@x1^Vc;{X9$fjKXKxxTlg>)H=&u1k~E(rr`8 zxY-zsI8xkunmbl}9Wp3WtTVLWEH?An#L<+G=i%q;=HarkK|U4KPZqqqDdb~14s6&dHdKY@PNjFw?$eC6yM>eQ?fT@(?Tsq}}L&4CH9v zfJkPaCmf}>A?0r^S59%fa<~g;HxIwxWsz^~EmEhj+79hj(wa;U(R?bGmlapSBQq=aD|{C)_Y!+E1M}cTRS{g!fY^*0~%X zy)dt4n}2rpDf+SJc{K(4ep%5)ORS=&+reUsX43ij^aAElO#K3_`&~NPes(**0~h3S z^0tcj=wd;t7N+K*lzN)2LW60|6r^gtwtUP{hQi$qObEMr{g;xHu)-##E~0`6ce;=p za~v!AozfXd)(~wJyl|5zO6P+rup@z)M5z>AP-nHA1bW#*!gN)3r}nGi z$;CctNeKgul+z$!Tj=Pv!l&%#OX&CVSGN+h{MF@fR~k9zzo|&1BQ*+sRt$Qn&ifu^ zOb7iN4)%0D_j;8jWumAkbk~*0ASjvBqiw5+Cg{KtbhAjJ~&>-prTz zl$M*Epbqp9q^DLa6Fkx0o1vG}M&(RJo_ZKV_u@rMhmph3_C$e9r_}9-!ZSo+<6B&E z)Sp$t^{O%iCT5B*pA=CY11cKxl3~l^)s03jo3Rr z&qBO)I6rzlw|3xX*BGTCKj&kw54VPNILNuHV>=p3pY+_$EO^;VbgjwO&ac&y@qo+k zaTd!MPYS_?l)}9}JlnFY8Y}(EZXxy$^ZxM-<4?Py@Uy;Z#6mPWNUSMT3VmHzveC-b zKU;^0)G=o+a<;!TqPXZ)=s&^GRmy+pfb3qOO+vONx8|&y9U(<3C;1r<85=j#NxZy% zA6Olb?sHxYs;tsjxEd4sxNkY=-6oOAzQg7_Htmn?AT)RG6pT3LN411%WZt-2 zY{|v8jHHY>s$g4{Pkqas#oeTit^fMxpiL8ulS%vr2Y9brG)&%}Wc5>=!LqRFw}6+l|WavfsPNah%ns zH5OvTEfv7(lpJno>)lGRKv)+X%S&pt`E9xMAM_2Ey8r*~8&Z@Rq5si2Ue?Zk^$oE0 z85?!(Bze=o;c?;~ur)|^%rL@YjZ(biRWol)C|GXiVBhNN*mmsEos{!ZfAwjm7vYx4 z7LSyg`GtE+yf-^Xzlbm~x8shsgA8tY5867f^_CjtFlf6Is1uVQ$uqA>SrlWTykumY z{}5+{t7YC*@(}V?F#2}+*My#MRBXjeir3zvA8?1jiu*fMBF+hE%{jdMs5i4ZCNI=# z3XSsec zm;2Tru=8aXI1pt3Kt#d!M$P2rf_?m(s_%k9$Pd5iCSjSPH-7|N9cz?Tr>B#YBwN67U4f2iONHrD_!DS9~U8Ujg5Mla3I7|7#tWzF0cd!D%RXy@GsP{z#`5;Z#m zY6L3lKqn`>_AzydWGA=`TOnU-49P@_1D;oT+OAS_Sm->V3UlbUOJI!`tdt3S_GmL1~Ft4_*M0EVc&rDa78>zSCDn-kkLW7wFV^r1iyFV zwW_J{An%z;Ukmy%^EtGJ<3n!oz}G_LWYK6J46Lb9{>S+ZWY%C#SMP!W^4h)$v6!i2 zZLMP!Z}+j4UOqPYq8dm~jk;0vr`b1Nn-e!?0ObDe>wU}V)p(Ri&zV0GN)AFpj9fMQ zXyjF7P?N0u@{aj8Kcxzz9>+rapy^x;(vsT3MpCX4q6j-2hx=3=b-(RIoaF7Z^Iy=fjKFp88v!{)roZ%3E+1Ns==a$EvMj=YkiiT# z#sQTEif=ZLa=Nb+Aca#P247~#l^D~fRvYFDMeoZiX)04ln)O-#rTzZqXXzos;MQIe74Krow4*wWtM{l%GKuKjg zogVJzRlWn-B}>o6$|xL5qoQmOp;w8#@2u|)N~>FCcs@=Q8{#SZ^sA#smhy6Mz+MQq z(q8X9q6pFQwGs|_x`61q4LXz0MgZE9;HcWOt<1O-)CCVif?CR+phLw|Z9}ynmei*? zjspuK&3QuG?ZEAag2(mmp}RGaVFNqHX0)$o&E2!x5Aph2%OA$;6Y)iOMBM^malFIL zld_ZsmSf>@-|*-iAMbi*u}g4JSfmqd`)2TI^h(*DQfvR4eb_?Vt{t18j}HypNEUTF zCk-fYO{@*mnqpjI3B}_1^2G6&ex4I{RKNDx|8W$*%*p?E6xZgenr4FilK^b|uUX+T zFaCi?9fQaf;T3?)6Cpsw(%4?#o{R+|McP(lgmD0Wtr-E49Y)mr{?hvUS3f?#xx!YG z{~6%tOR^t@`(dvB2km#6wBZGRzBTw!@EEq*f4)P2$APDfuaLws{?4U;-9o?%!Mm^n9tED7yFxkm1LYT@ z4juuXkh?;Vgzgh@}8uwBZo``}6fUobb0Bnfi_uz+3y{h)%0sr}| zpOC-1P^U?gUtgGS`9uPimTmcHfE(3lBgs%nRv3{=nu8;tL zU>Mdfbs)U>&q?}6@tZIL@o(`zQ#QQxKPz$pt8D*Kk>BnzeDJ$kiy|x*{v+0(Zh&$b S?g#(`fCXd#pyYe>1pE)`Pn19a literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsm b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..51edc7f94f9d8d26d210b436d900d52f37658bf6 GIT binary patch literal 7236 zcmeHMbyQSq*QY}|l?Ew68V2bunV|+j5QgrShM`008oC5TTDl}fLRv}~KtMtSr6fh_ zoAJHtmFsu!{l4}7{qD2Q#9C+XbI#tsy`SISk0uZugB%412M6V>vJ8xJP4E!Eja@Ar z+<1AApD&ZT)VulcpX@@TI8OizGC27~tss?FCL#LQEKW2Hk9ddhAtob@CuI<1oV-JD1)8lIbGv5R1b%jWd{qP z?g^?lHV)K)wf=-qlJ7FSo znp&DXm_2LwnIE{MhU3B80vuk1>v(jq!E~#2AF-;uB40)t;th@$Wu>y(j*<&^^1>@o zKqKvACvT0^G8pOJfG&uhZ~;@hRoBm`!!~8eUF@o%kQyaeZ<_@zy|^5qV`f!-UnAL| z+X}bQ++}5U%zCRo%*-96ItXAKSEcW?lxDiuAK|K=9YT#$bXiSwiY-SS(>vb`2-=8p!Fg&QLj^ZVCO^r{*X61;l*cPgam zOtI|*LmPD@c5hTTc$kfOm}FYgit7qKA@q0{#=TO$wZ|#$$1d(GDBdy{w>UumNLUh+ z)+4Uf8jHM--IX^XU~E)g_EeD)r$&M8=2bKw&+(1se3881-80!}JoW>98mN)QFzcf6 z{fs)0EW1&;`N|eW^gvbos!KMgFQ0}dw4Wg4sQ-eTK-sB9UHikMTGgUW=c#(doMqn4 zA2UkJ*uF%c)?-@4dM*41gLxe_=%098H3F(;R#%9SXtF2BD?v%wxj=+*$fN`*p+B|Mai#iCJQ3goiRD-&_4$?7ap0}0n446qLR+&>9 zq)$~Hh~Dm&{7i4`V9MV1NQYKZ>qQ1R38A`QOo@tRp90D9na}<=G-k($ew?%rrRIEU z<6Z_iM{4z5RaF8s%zS1;YVE*%iJa8>+dmYhAH0ScmPOsVRrY4Gp@3yF^}%E3l{3Mr z+n|pkgJCz5~#LZJE0hFM75OOQd* zOq6JqyzG_9hNJn{w<5b=?{yJIbZ6d@Pf;-mYPoDHU1r76qfjr3Jp;PutB`?4vBDaI z+we*S^uoI-J~AfrgH>d|`_nCFyQn=T@=ttcM$pGp{0h~yH)^6$X*!pJUrBdN-_60U z_=6Xtzp7|w9qpy@Hrx z;YORz#Mo55cy0J_6#lB2^&t}pl8Q)&o~AjlEE-RT#S~ROHuk6XyB;rS4HJIUj_#o-9+y!q?;iV(Vy75zQQZSICpH0_6Ll1)zje(< zxcp#4y*La!Dx8fFf~M3zh>EJ;6#dLRdZ>}DRT)jPdk$FS*5POi&2<$mXfEF^O6P;0 z!$_~NkocaPeIot>f$unyf5P{zU)9ph#?sQ=jrZplKN9Ngx)8TXF=GGE54{i79k3tG zsncT3W#I{i^A#Fed23gARFqawBxR3X`s(g0gP_He?E5S)b*nZ_0W37S(1Kk% z+YdpI{4bf*GXu+yXvpwcqU)5eB21& z#f59S8k81xM$^u^l5};#3zG&zFTz5;fFFMf8pU7g!aBc4_U?ipfT`Oho9Cu&pwBDf zBo=&24Xy{6&@|(=XV~!@d(Xl`vnt3Qu9_;IKk8jTxfy%)h@oc`GmXgVDm}#o;GC z%_d^~r8uN&IFE5v!-e$0SWEF?fj=@?Pa;N&)}j;!>l|vYXse{_u|%s*=^C|CFNK7< zDZdG)qd8GVk=NnuQ`1uyTX&JFPfNNxsC*mfXw7VXR>KGZOw?dZ9%*Lk`0+pZVuTA` zZYA&FfzNKOz4!4Hlcv%s$n*=!6G>$IDyd@zKbbRU-v?~TCVH_5HjwzH8T?S*%N?RP z@q1P6tx`fL`)S(8L9Cd+hqygeE{J|4XP3#aeu)zS3_O6xcqor}?M~Y4 z?FF)TwI|+J$;ztP$}0z*s(w=WFPLARqkb!v5L?+)@Hc;PY)@8f3f4Xw#}IG)x&tpG z;uaEkvR*nq2%X9;2U6FV#8-5Cy&2m7CObEhqw!&f1yi>sziu|GqxTHpA!C>rWpKzc zUtPYMjnfK~kRDR+V*qk0TwN(|ihO+}&u#VEX-)EIRuos*_Ot4v&+?N2PFao$W($T} zXDqu-=JwSruPYrbSoZRq?Z)f}v%lD}veI_~Fa>SeKP6H7mV5i0sYmZN9IlqAZ43!e z*|RpBunI3f|MbRU!or@eg1(l`cw$r}*@Ky`2=X897e0G6Nnup8`Ij{+>m4lKFFGW!;3D(#dkbF+kw2&_v zS$OolUzxM6?(HL1gKG7H(OLJT8VvVfxi4y?0Snwxxz#;Bm6#7pn5;EE!e`IB9iAR{ z>7Yn4p=`VA`Z#wKmfqzod6kuis+LIVI3k(h2@^99j`l9q-7Mx3SGVT@*pqa?PfSeo z2hn(HZ0yE5Fl+Lf*?&|f^&Q#7uRy%t?9-*p9Z+etIWVIy9_|qsapOf30;M^scS_n{ z?h8r!Ygt3up6ZQk%bQD)R_CuwgYRFFi`JfUg)Ge#CL>ZdIK-Xfm8%SY_q}SgtD&Ce z&Ta$O!)13VVC&F9M#9t^MyOf-@a(BjnE7E-SPo4N*~4Wg*){53NIXT(QU&=87~QAc zQS7tE2S{E^0neCTEP7zj+$Xruh9>oK-~1VnEQWowK=sQmxLxVyWkAReOHM z%sFlrJdKB1*EIG!u_5X`JknkPIX40#`^g#;T=recV~1l;DBy9RvTI=|)~Z|8$n8-) z+y#%278l;|DpyYdQ8AH`IB7Zp$Zf4k#`m9miq#hLVPoT= z;(pu^>q#~a&D{p$%;vcJip(;1>lrde0t}6yHdfFJ&iZ|-hRWQ#Hj*w-_Hx z+zZ_080%-W%6{J%84a&e=&xl7u8+q(&h|Y4M^qV%oq6Gf+Zf|#R*h%GahrXmA6>Pu zAiPpK2t=k-ij?Fi0V1WwvHpXU68vvU{Wqoln^ON#N>TrdT>filalIk`IlyR#{d<7X z@0Hm|gB)Nm7DC>72i@_=vU4PzMQv;{CdLu7EfWEW>o=}mT@ePB9huVhV3VFKa@?t< z#SmM~TJ*TF=*jC9UtM|gJta2XBw3@sNum?&-D>_P>>RUtv;b?VJYd%JWQn<5DNHu2 zeMd9Z=aSL34D~3Z2rhKPW|ExI^KH=8__5ty}2*<_7VBYeY19Db4;GQ2q9 zX$nGjH(kL!LHY^=atrG?&`gltSFaz z<`y+Bpe@l<2)ox%;U%fPp6dP3nmfB%B8#d&SW6?6k2v7uVS*Ii3r4$slgP~W4N_Q*jbHh8 ztWLg=J6{IpLxc+wscy?gNF-D&%cW91q7=NM^yvc+ z*uGckAEU?5T)#o{mLx#Ju&6>OM>$CBLD8vlm_sn3}>+XQF#$=dSh==n+X_ z`K%s;HhdXS*#%bT?#;=sQc`xWUvsE`Km$3#$~G;@!?Gn{L+x66gx4Z$&3iZ;Sxf0z z^|8X8QTb>zLJQvLIM&xbb)>QP_1tzm{tMo$Bn5i1MqF$m2)*I8q;&%QAi*%u?@Du6 zlRli~TetnQ59Di{u}MNPOKjg~E=Ko=?}-^#$a0jOzBs`#U=sho`!USWgZN z7dy10))U>MJRR1tk?sk}8acz}gWj?Q89Ic&{D?xd{OoAB6A>qDoD#7gL_Vk}C|~|@ zmPUL%Zl*3S$gt%kXaLdqiF3M_ixYW$xfcJoWE>xhZ?kEd<8BKsRIKzsXcAu|r6 zJ8*~E#sCo~Y09IrAUmGJybUZh@)AE{VF2CEl5Q33w`FJ1k)MSV2AE#I6HoZ=AZaWNR#XHM=aJKXDx`WaM|zWX%O@Ikc~@Yyuw0#PLid&t zdG<1Qzdgi?>0G5vX)OimrQF?-H07wp!4BfIOn#)-Gf+cwJd2I(GK*{;RKS;l-c;KC7+H(~uN zb=Kap#BE4aCca`u>Fym{i_&-DDX@}fp}0;(2UrWwPu28Owsd+it&}s5CT@G>ek);FndO@2fNVB#K<%3ACWP1x$o#F5)zFyNphuIq&qB(Xcm!8l?x-~zo zA2#&`tFb&?s~et^Hw%C8H#H`QKsoC)EgM9zS%p#3#OB@7{6v7b`bQ>A+(v#?CPxk$ zGGz%|*)z;W+*9&pXr}a=Ejh!C)NY>{nhUUwNt3D%hkd>z>#{-xoumL(xx&U!P3`p8 zsxld7s2{a^!v&c~idi*dffu2qKRj@~gGP-wVV}V=Ry}egEQ())tWz)*bvY-{1$0p? z-amJ7Cj zcY@TpNP-V)PqpEkq(=j#Ob)N`Yt0*a(b~1xhn#IXqMCdwen=B6q9Wrr59bOCM8t3O zCxu5vBS-nQM{<2z0J%rqElYW{2m|HfZh`8gc;|64IeywzVk%g=u3 zcPrOf4wCf!l3K(Pg0S*86!3RD*VzP;6aA8T1cg94^>>E!yM^oP8j^MVl2+2c;2^)N zU$5rK#qXE6A)e>o6y}em@OSO&RR&q{e@Pp~HSJ&Z|91n|Vg6?WW;an#{-QK#0x=Pg QK|#Ss9thmsrb7PvA4<$&V*mgE literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsx b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ec4e49add4233bbffb00bd3d02a015ee32cf52b4 GIT binary patch literal 5507 zcmaJ_1z6Mj*B>3yor56_5=x^qNXH0)Il5u=rJI2hX^@g0qJRTYxU@(L5{h(6i%LmK zjd#2Eeg5+IeeeD3Sv|j<^V|32=Nv;FJbXF;F)=YfySfq#xPoh#&(^+<9)6-C*jH3~ zhklnBS^Q3Dg5a^_yb^Iqd5f-Q3rGU^mfMrH{vonEcJD}ymWC|pRadf4L?qho>4L7O z@PZhxj@3g`i-)*vy!PSGx(Coc!J<+_r4hZt@M8K8hZ zZCzy8GnrNza>sbm;04F^gNBPO1duf4p?ybl}?X#*M zHw!tE^|YtOg_`P=lFUvIc+Mzf!FTU#uhX-E@`+Xy)+mXSGPNB0rsaSH`I|dI?sDA_ zSob5dzBLhP%lA1YvQ|~pMsz3bTrB~Hl(%2E?Vod-lo*I3>7fgA2HU6Hq%T~eM@Ky7 z&ODVjg%YQQ^Y0F-7S1=sCQNe|J3kd;fKor`tjKUK4ne7tpLogn%6~U7Xo_ymw$*t& z+!7-wU%(kx7)PJW-RlJ6x7kkU109KtdFt;o636e+hMTB?JY<>q6~D%CPQU|_+C{#& zc`0%6rFU@>cQP;z{~RfmuK}m$ZAbQNB@8O@aX#Z&XHP;*ryP&EWV z#Zn@FIHY=xX7Xlxqz9j7_^%l<9_YxS8OX!Aj}5m)>Sp~0|NO$XI^(k^fI15tJGY{( zvc|@ZL|)4MGaLZG8}HxZhWzii@$(P%aP-4Q&6fEG&rvb9H~Vy1JHnA!0j%1$l1jKl zTyMUg0q7|TGOgGmPr}m?-Jn4476BctYpr`rotOPyd|#K?@U%Z|{6 z;_T{-71C^aqlqhX!f#Cl4?iNgtUHF>l*r@*(xRF-zk=T7Q>ITOLLWa**0V;LpVc&Q zP`%77%&(rASPh>i9~APK(k9U*+LpX+vB86cP@$*RU*<81Fmq$8(h6K9}cePL(W4D573CEZZfsg+9e_G7O`HQq;`{Dlk^z&0>0 zTO$keHX*R`)FFaW>%ww6bMhu&ZAmgYi_4fs!0q~{yk2+xX`!NgXn3W6HS0M1iT;i3 zjuq;a06QSn>-~jq-TroR;srVSgCcgVUE!xs5Axf4#^|Oyi%KY zkklcH%@q%mEW#g{`ud9Q-*ScNSB>>^adhCKwIZUyw+5Opueyaq+ z^H=e0+S0X@<7q2s^6E^l<$blPeG@+-{9ZbYtymZ6>jgyArGEf!0bK+4el*!EWNE4s z6rK7|odB0A-e!80qf_4XoTcpuB*|##dfz-eRy*Nv-rOSTUKyvO1V9xS&(Tz@R1nXQzK1v2~mQxi(d10vr0$VB)IbA^-F;`TqQ z45NI@!{({SJRUkQQluyRQ~Il8rYl!FFP;@6(}Ve4N*?)f=1@V#7J}U>CNO!vB>ILxL~(wU=|H7a1Y?VQ zhk`PvlEr*nB#GJ!an0ub=q0><|7#T^A?i$8k{$CqS&y9-9qL6VV(I}o4$tjo3;}^E zQb#w$3Qy@gb4jjS&!vec8|_-JJ! zC45-fMCJ}EOzdCOkfP2H`cOqfL(~%HWlfnutKzp91=ec-z(l4jU2Y`o@Jbx5yJp-G zpn%V=PEF;MJq`#YA6H`g5R@;kQg~|m?1WOR{+-D1IqC2@SMv49XroI~B)TX~B(+(u zT8ZcG!zcIs-zDWS9;J6|J@3KmJkhEheum~}ItX1^+UP1aW!U&hE6&Clrv)%r!a0~V zck3bL8zOqeLv{pFfU?SXClFizEYeeoe~)HSzG)(O{e(pVMaS08yRKGXoJi8XF>xyV z@io1ra??f4!2)8YZ?xkewxh563+&F3TT0s_;DgKPcV$O8_jhyfSC(c&6OmT!0dm0m zq+2owRDx=@@jazfjn@z9%oL!{TN+C*~u*ercJt-fn+!tJ#!)jOoiNA!smFZ@j_-Jcg4rkyQNnx;~sMkaer zqi1DKCW0{1FO!w#5bn>1yoY*;-(AnfP79-<+5^Oo*dqr?tUD_}9O<1Uuy*M~9Zq%Qn4iD&3POm$kV>%Oz8CtDfBT9fn;umM6{wM z*a)1V388>eF-lnh%Pk1|muBQ-eBB0h5-(?fAC+!fG_AZj(fC->=R|$5|wK% ze&X~REr0BmN}s5|j1XakkUDOD%=kFD|3>rhn?%w#M7Js#VPz?PTt`m;aa;7o_SO=! z*-+`iq6<&TOAeZf-s^E|VMp;K?_%m{UuV7OckB2F^|j zS*&g8<(-bwB>eTR;r93RkG@!nXaRoUY&6f?WFe7kO|z9C>bb9mqH#CZ){4#CWFX6W zl9O7wkC2@LI~MXhOAJ^hsD+yz!9vqJLU5OsHPlrp?XF+W?e0yoGxaPy=dfFJfbvX| z_qJYVs&Ww5j-Y*o7URI451p;1REYWaJ(%Y|d!8C|)xpc&(AUe`Pt@Ma*Ad%U!odTW ziHE9npFVA;qM%H@M;lCHXkem}O4>anGdqzgleML=cRb}vOKLI~;N1IK^x11wD1rP( z4T+noBZy)Ex0g0(lG&1B-#Y=O{vRW~e+zRDm=Rdnr1gf_0!;Ab|V_8X%P3 zFGosKbvqGsJ}P|h-c^sfh2p9gwtVLtgBh;({^$#zbvi@i%mJNQXc!^3LNcA3EQ;lt^4ghzN?xr~zTOiSUMnWVs5t(v;o!`1L0f$TC({(PEV z`Hg-O8jF3_BZlxO1PeHgMC8yRg~=?Fw~0sy{1}#9(0^A*Y@iVtkyi(I7InTx;c7g| z9(O&)xcI#{VY=e-x2VfFLD9%<9gZ#m2cR?Flnp+v^8uIw7{^>k5OJJ(=G+5R6|*EA zOxsODC+?oM#0Ho`=I;&JA?L%`8U{Q!XO`V;1{K4i)n+ChJ+ojnl}fbgVruK|{kx{c zz7LDp#MmJeW37LVY}%`qvj4-+-wTE{R{f}+SBw}F(e9Z7!B=*Op$VA{orL`6%kJ#Z zdkGS;f4Ww&S{%*ai{BV=7@Zx>K9AhWkjjgkc+~jw8YJ0(~!NGpH#zz zx39K*PKl#Lock_JZR@=b+s2*~-~MFZ{nR7>kak$vkWQ&z&Z7|uJSDZty00hQ)qW%KQGXd$9(LD9Ek4M?&_9!m=~s5-Zip*>Y*qaXe?GkF6)Y{Y zv6MYA*N^IZSwgAt?oKnPv9uy7L`j8+Wm#4p@r@ysHu(zWh^e^6OGGx%jN!PwpPe)S+}s^3MO<^EIt+hm z4jo!D?wDKeXR-3|ZD_lw73UL}L&1vDBSS`O-NxphqmrIcKnb}G<|D9Vd^<%z)XYiRi)8Q3ex7vaNl;4 zcPP&&4lzG+n5A%aysZT zd}yKGA#4Wefouvp|A2IWa*e-pRFILcO7en&7H>nfpX7zPw1ka@JUpvuQ`KXPvw15C z=?8QSn-|ZS0<0z)Eo#eD*hc~*AhE2LZJ-O)J;FuJ*SdsKwJVCtNcLki8`|}tx|HGP z>xQLpN`>g~Z3PY-#fF*2-CO1|^WA~l2z@l7O0Ke8;(siG9=(gma! zh0v==kt#(5UeNb`KY8DuyK?VZcddQy%szAW*)#Av_yn{705LJ3#!VJsT@d_k1^@s^ z;Q;_-xTh(~%F$g|=JJL9C*;W<+|BHyn-eT+fKmfoQ9{`}j5#f!C z>4I{waB*=E@^*5J*6ws3hk)z%uMkW*T{W?1evYpn6APFGzwAuHDAPMc(spb;&pMsD zV3o^{RiF~TNj6=V+Z=TPh2BqF+%uMeAB=`LrtG*( z%5Mw3s~$kKllROTMn`0IoYIX|hWIWdt*3sm$t3lUyjZ-vy%d1@Y@=nSM3xd`JwN3| zWT9`~Z9OY(!M44Y$*R-M3zWGM_GUR!f>tw?<-tk?{q=?SUpv-|WMvi!S4#$D+d@7J|*!YcM&zjcM-?=`ZA zJ-xe(E0iM!0D$868gcjVakO$jTOsQPlg=+7jP=vn1`Vbhc9=XG`2EyGPcdBw0gYtO z*&txyiG1bI!&}FW>cr(e1-XUKH5b|s%K8pts4Cr9Qi?Bv-WdpRdK0Ut+#*Q4%*i87 zHT#`2KsH>?u2owR^ns(OG}vaKR_<%$Tp=Ekf*fC;L$F*();mp&v^!RknrA7GhX%vp zR3N&q!!M%t3?#A=?H{-#q9_rC*I*$sM9OW^RuMPwFz8A{4zI)uxtV1lMf)7Tc9uJe znG7Hk1u!US#whKk#{)k2{tUgA!rb^b7Lm1;FBpl32FS8VB&b`{Z zv`acd=PwZ`D+`6O?7i-}%%o&CN-FZouL4H=7lNXiJ2N z=Aq==YICpsZ9A#L5B;F53`Ij^CsG^Ovy&yzGdN>iESX$0EmrUTs9?Yw3mgvXtfdvb zj#lt8t^uo(bb(ZMYsk$XzG^O|@Td0jqlvOFa#i?{qb&nNjm$a`P1@yw^~I%0sD&i_HW^G$gRq z);tY5%;t-*2C`I|ukdX~l%J#ck(R<(X2M+sRIajR?TQk+M zip1-hjViza6<$r{$O)9vE8dBbx(`8QO^n!Gb`5mUd@5kG<#a6ATBd&u+OMY8 z{j{JhWf$^v+T6E#NoJx`yXuZ0%?_nuhaJ_N^KPYea@QXA)(_C1k|=#FkKkXhfZ`DV z03NvJ!3bdaKiPD*wX*VX7yk7SIjas`9TEWk#~?G60ZZ8K<eOVzkYRrvz#V**vq-YCkx zluQYlz#q@VTww8e!F|WJ;2&(Wj~e4QmQLPbA^E z6nGv8`0Oq#rffp+KvNXMB?HE|Zsy!x>}W)+<6WhbAFYosGh=)>bV()th^INsm*SzS zc5DCk!c)Tt$efDw=4m z+W4@gyzdZ$nJ5UVth|Y32N>K&8Fjt#L9rYwtu=1MCm^gNeh5H@d6d*?HlgKQ%oYvM zFE^;>D9J0N3|i&CZ*#WOa+h1}(?Y+TSb1#uuwcOqTd9~3XBo64O*kD`ka6&On%GG( zUqfa}(8#MS(W%ERh*IGU0+!;+c~d;h2%(bKMR*-YzTG7XtTQp!BaW)$sTv5m_?Yyf zQ4Q=d2K60Z~ zqkZ4c_TTH|VOg$kbxe+~JBE3e^FC(A3RDXgHBgTUzqFV|6BnToZ>uA*tJ}J;Qx7_z zsWyacBB10>8A^rF-k2XK@H1o7vhfO&I`8b7Ad@CjYKDMm#0bko3(#+@4<3z0d=*y9 zbm>TgM0kZ_#YZ!{g}vo7=G>#n4O`V%q+f;U*7PecSi#G}CY>Z$LgfUr8EK}xyEdmG zD31(3a;OuPs{a~IZTi(>&++DuOhrdZj6~>+nw}r@1}o%X zg~I#ld_M%M|q+Vzg2zlIVmRTpv>~#I~ zGick$Ty8}>t^0m8mz+STU2#YrKLfSl18#eT^hdRM9);^UI(EnxAD!Y{pg!mtLX+t) zBX6r4{rj3ro{cj<#t-ZuHVwgZ#iIoQC*aribK%Myq|r7X;oIJwTTh~rt$8=i2<$7S z39$W$1<#tn$`Dj3U2;*)T1{@%b6QP|Movl70EQU)s5VjlF{1d{P)Ex(jZDo6iJx4# z*t+(EofGdVhBu8>ECTc!T0*iylhgaMWahVSHA-jK8!_sBQ;In--M1tvixfx|5@M6G`|phUcTpj(I5t>uwmJb z#9bka=2$7?oK7{({Lh^=A?sSgAGe`HDZm~&?P4SRa_1}XTKDGDQsid}O4Dp~I;4Yw zqzA2OhIQI?Z+l&kA?NIF8?i)6&qnr*|E%!OpU7#^_plY3|T zU6NAq>#0dUyuX={2YkL(f@44mXH0RP{Y+imZOu_umf9XDJ7=3;z9YJ?iL@C)PB>Ew z(?WgLyl}xzN@tT~7uZE_Y_4JR^ie~-F4$#m>$2%KjrKJ1wR7T4?zfP1-Hh1eVZs^% z30A71>wNUE;tiV2Fe1@mXNyFoXSx<-sU4*Q$uB6gzuaYt?OI4d?IJ81Y}rJAJeN{@ zRBxg)qNc)5tn*Zvn(PjoD{M952ecewHpsILgJMAmF+ky10!; z^Lf&=JoyB#E(>r}5V!V>jt_&Sp0A6qPtS8}7-kT0vwPaKm(`lsl+_mnvoc<>f5t`K z#s_G#7t%?}P8gA{Q+RM+$IWnr6y*VUY~7~Y?$KqB-lh9C8X03ZUNITqhv$B*ikw4Uz6X+#4h=j(Zc+T5ktTRe*9tBZzPPM_|CteUSJ$(I zq{hO|Dz0tgh_-1rS%Qosey*x0K9zKwgSi&er)NlY+M=iSWBrR$|6bjTw6p`LuVbx& zb_xgYx-pEcFqw8HM<2_I-WHcma6oX=YLd*e?k?axay-OGN8gfDq{#0eNbp77CTCWA zD*b>izpL_N??N_RqLnm4R>QD)>f>0j0(SVmin+Wism{d`xowq+l&$yh#l=4Vul`k` z-SdtB<|w(DsHv`|Y)D9Dn1>vK`|)8uGH z+hk*+=>guO@?!nkyUe=cQBy69^||fufq(PzWN%UYE{+p@5CA}f7u*9Vr-Ytxg?X;e5^K`7sErhJEmX_!*y==oXJiH2d4h!4q^Zzm6^me?&y* z+l__yJA=z9>dT4T8Dv~JC8+K4syo7+j*?yUMr5~|OxE2I;byNHca$h1 z(e{G2Me*(YY_6*U);@0*=t_6C4ua{FiTXJx;zHERnE4J1NIKUEpLo>}U+(-~BCNI; zYBb&vOe9!lV3jIC6uki~sgOV=1W05@IHLWo7Rw%)G_92)W=uZ0P8|M94mJ_?(h9$~Yr0YQgQFZVE;t)Q$YNX7yJ$okM>Oe{>zyK zE*<~){o@4VJn;M;{||};m)dhX{`27To4-Hct2iTjrp*7@3Z6mP@P7~Z|C?mSUr5{s z0p$A_z~V`NMgK3&^L(81&E`*>G@QBrx7Kq$!g)FW z6Cr^7U&Q@9{Ji%3frBak0smhmI*&eIp?}bPxbvKUsaN5?8|7+Shx=t~VDjkJi;A)$bT2qGaVAPk*@j5J8d z+#v-);2ZRNulMracg}Cl`D^X-oV}mD*4mGb2JRJV0DzDX(EC^hVqYHqZUF!QNa6qh zAnen^%g)tXP~hTTnL4bFD@-1P364vF`_Gt{ zw_=W?d~bBgurP)HkVjci^gV_4u#%pB0fSbvtDxU^xE3MLlxfXEtY>Zmv^F85?GLHO z7_VGesy9cv*NLP>3ZgQ+ku6ddT>XwL% z`hK0n>=|23;t<&#o-H{N)ugUDgTflux3{7qUskau-yL)?r$R^e=X`%&?&8PZ^z?DpN?wZ4M;^4SzmFv4-LcEX;x|a7rIZ5`*bAANY zhE@aii&EAsyIaqhp#xk%X}+kM^*C{Atz4$C&9}67R@ydt(G{}NtN5Gd(fs-n7*P3^ zPdK#%F`tN9!k~gq`)9hxid$BtvpHV`$w^kZj|*ZIroBXTLdNjqD~Qkxp=Lvqa}SlY zIAY|<>tB2HB-73Y=%-HjoNIs~w)hgK;=IBbOQnI3_xZ}>wql9+qY_M{o!h=5M?wtR z_#3dSv4vEyh<0dd1=D7Dh?>fL;dA#DAOam_0RNMt-Zw1>v3bTU+Gg0bN%zk^LUv;U zzJhI(4LSgT^w%Eo_6cyc^S;<2Xd_eiY$5vK`Rdm{S{24Pz}y9!62fS^@+oN6ZFC0^ z*C1HFemwf@_)`YCW476tz6STM^-1mU_p1ab@vB2q3E`%+d-J@q?QU+SoWH{kb)-28RIx4o)k0g3uw6_|QVl@5pr=TPkb%M{he_p2#+K{MwV1+`y z8HDjsKvi$-*%2;JSBIW*(8Z>faA?0!Bj$LT2~5Jgqe?cH@RZd{PratRAB0G!u%Pq3 zN~MmvClxI~j>kF1puv)j@3p{No~G;BShVL989S|`Uz@s^tTG$ekkmcs;Of0Ho*jLtyCspVU$S8vtt z-a*N~o9>sFY>A-0l7hm(jrjvwi*p$Gd>J_S*^f;prrsj_ACRXg2j z*|=cuYRX*g05?+^bKI0@Iqr-Msk-M%dm!eIZ_glKDz7OPH7!=@Wjk#Lb|nx9OLlFw z)Z+_BWXKQPuqUrg&gk1=4d&%8ihzjTEUPTu*tRkK@_ z=b#=6b2!d}J&~(GN+dr_c9b3oFO^`&%d^C&9Ii218~=LdZZBP`V(6+Q=$2iy=(%{m}ms^tu^_^ zMy2oBox1O2;1V<+@9g$o8=Vw$_@Oa+6zwJVN;kmiJ>AErmKHifc?I>$*V_?L{QHNz zB8_2P6DiSK_3bxA$lo2pygmr^ed9$D8Q^xMq~u)F0FD#seNetC*+sCjt_Pem+mM;|QkpbIe|Z zF6``a)L5jXMqw!GSPHDhE8;8j&dP4owDwO{I3;$wg;q7y`o`cko6zEdb|QMX?mHLB zixa6Vx-w0bboK-RO?`cm367XUdZMAv-ryCM9*{CUw0gfhQEwHkV7(nK*SD5M|0aje zbN@AS{dA)k8(;V)wHV4@ik__|tw(r!MlbR_`f*VNv#pXdT)7;V1SsC$1pOfj^5{wSmxn6dI=Z!v3P-Kf+ z|0r%?v!02|@a;NaAJU2k%^N+Krb2G?uf0~0-irOIl9Nw+7jENOo7bSG^0`7Kc!~m< z+^`-CGPxT8P0LR7iTI@9sjwgf<3hA%a+!g&sy9QlqY8(^89NQc(eihiww`p4lwt_u zD2@_KrMtTZ26Gk{wtn2YMW9>T^=d{spmWFgG~($zQACN%s@iOU3xJYU(}j6r`EI&6 zDMu-PFuosnV`@dMbgXA69VLa5_k=g;n$51QzG-a_9`1BY>t~K<4^<6we~sghL*0$j zur}P8u5j%>&rlvS`{*)Fa%l0y#Q6M&k6JD`D&mg}*lIC6uoxZ{(<-Z3J0z_8ZLxo9 zMM^nAA}3{PcymS#?fth?Rm$d?Pp+JFC4O+{(fS1z|O@c$w$XjxI}$!+uxmyRryi zxeXN-J^g;?IKMgM!hdRSe-9kPN+~B+RvCUt>MslWck}hjjy?vvd=Mgu#+=e9A9kw| z*d&OzB*<5v9szrFmqxn8Qm5XYhX^p565DN!JQ+dmwW=O$-cH<%?NTJcFE6|j)zR%s z*b;X7CwlRk!C>Lsf+J`0>+6)S-cwCKq*D_#sofJd7O?J2ku)prQ`ag(_BBPIbp`u( z)y8vxgEV@Trnq%(d^*kEo#(G@kfc{Fis1P|y`)5gUM!DAMtJ15o zY|PW_i0X)Zh?pPIq=h{TCJ^LKgsRjD+4{k;xBD*her5ICY|y1l$ODkKtFRf6#F|j7 zKfhp8Z$~RHJ6k;;FDEyLpB^J|xP!P;hy;J3SzFf&sYOBIBnjOn+6N+N&8)xYM!p%D4{!@Kx zP;T$*(X8yNMQg?kDTtLEuYHJht0RlZ@iR%KM=j>iNp+R$gwQ9-lpqZq_NcG1C)&Qz z5u%tlQnFUw!Bu>lR@Mfa6qZCw^&a`#qY+m`BSmq^LQ|u9R}$WPMB6H8_o$AeVfG z7DLZ3bAU|>2sp0$I`q^wn>*CDRD?6r^Ep3dr|jkdbUO<`GmFwErQR!s{Q>nfnI!h| z0mR#P>-YE|oN(^d1}3+H?0S%!B%JxPrlBmZU#B4SMZ(`axSPb z&07*Bd2RBzkt#|{6c%wh6qU?nm5VNrDB@A3)n+JN$*;SbP)8@|Gmr@(_8algl}ITH zbnoVI&Ej%S)!IDaK7#GkZ7GvRIp@a#Z@#f%=M|M>-fm|s4sHw!0JE?ddP6mrh=^<` zxpv|>%4Ct_ha~4s^9U<2Xa2TA*1krgzd9xB54;&ZYazeeG4GF^WK2^0@vf+M! zCoL5jdSipQovNHBB;F|2z?DX62+4c;JBb-Ndm4?m6jQU~;b06qi zN_yIW|IADBw^cD?Y)%aE001g%UQ89;-F)oad@P^=9(LYl7pZB1N@`>G>f8w*TQ!ew z|3h;1nXr$9PkBT|b~uF-ufLfcJA)7LLiLGwV`KZ?U|Q3)eZ^3vT*eW5W9vDZ>lqvc zAok)auid%^4Z%KF2?VVvNT$Oa?G1^stft?)PZ|ezzU5ev*2Bx<@%5&CK+Oux>%E<$ za2jQT5mwU25t_A(Jm1TS`q22veochjeFs&7>Z_5asNQgbTeU`Zx#9$g7+}>~aj*0c z@uFB)co2W3?1_2D=BBD#dB&|gKX!9|AH^9QYnJV%TekY$D+%_0*09?sArk>`pIipJ6Yvi>e-C|&~}9x z7jE|SUjB9ssZhs{0!K!m*vu>zEL!FPcF1IY|lxBvhE literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/dimension_small.xlsx b/pandas/tests/io/data/excel/dimension_small.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..78ce4723ebef46d117130df458affa93ed9d48bd GIT binary patch literal 4894 zcmZ`-2UJr{w@rXhL+@RB?}`X$K#(fENiU&yLzMs`U8I9FgD6F%qx2R80RibvNhBZw zQl(0f_JY3m`^o$M+?9LRx@+xoXU>_k_ny(!#Kofl00;>IO?M&sjwRvsvj6}<4hH}r z!9J~F4(>i;qUZPW)FBOA35uAFkho-cKpm~JeRXT>@DsI=u&n-5+pF2)jD8{T2tu>s zB7!9K(4B#+N;%V1jz8iMs&Oh_C1yhLiN_4^f;_VQRpwSVlL4+lV-0R{XkW>Ar~B@u zR8Qa=X7NuJdL4JHWan&(SQ=yOr|sd99+uX)XZA~bvBi5BXUtK8am;>XW>xX3N=(4K z6^_cakvWt6WRZP-6)~w6N}VBP1EXh5+Rg4_0f%sHLjFnf*R!#Aa~lk+>DM z^YU^N4fJqNFzEF}i&M1j3*lirM66tyGjL53lL6BdFMG37Rp{K}XnM9X^3O1o4n+be zC35L&Br`49_slQ|w`JD$JI{^!J8a*|>o$r;qPo`5P}wYrAx;xerkP^DsLj;C%Jg>O zz>Tq>jS;v5)R;q>WTyTe=t{lPbXdMxvDQG!?dly5Xc&DR&ResedK9-$a!9&lBcl%4 z!Il}EF$sg@SNPWyz-oz|7~}lc?zO^EkuNJalCBOoSy8V?en1?SnxL!`GPi>7JL~r) zn)VifTx&*OicJOdq{zf9jd)EH`d-E|4j0=}l%}vqz z#u5E)@W2`ZL? z9lyxiv2CwCXEp5S1zs15dc71UL!+I`@?fQw?#g`Ia?g4h^>zX41Jr|Mwc%L!Fr} zVH@R$9snTyy+(X|@3}kpoUf4eJJX&|#2H#=4DKMUIb2d-Q0w~TX8B7Qxe03J_p8U%X z&QM5!XaEE3JH9wLEN7#OU1MJf`>Yl!=prU z-%vn8<1t8LB_ZVgqJ*+cJPy)MVvLyEuB$d>Bsxq8S;#B%L}7MGRN1vKxSK^&`8hqv zN{IrLJ!}5-n(qUC-ThfQJ*7GHcNU4Ym2ass(O`+2Li8+X1^r9K{Yxxm+H|f@rCFf*Ctx_ExPSy`X%1f!;Kir*}O}=0@E3zB{exOTLS>(ms@( zTkRNhz3C!X`hFOc|4i8o+6y%R4(#L$4vfw^mdoZe%}BNSJS-UrL;=U6dYfq^ufUZ8 zESo6Qi26XPyG>x*=vN(8q#;xR!PN0MaZ({hU2E3H@SUZs9AZIDv#yTRaq3~vjY-R(=RO*+!Oe)oOnqm> zQczymJH0SzRz(i(#;fCdpv3{CeZ7$u6*KH>l)~5AQ{KxkVU@95<%JGBb7~@;)y@}M zw5gj?f0eSC%XGUvi|7ie>Cv<8p)rt6BH;EYHRlv`unSIGW-TU4+3gn!fG73?$ z9r}BYTW+xqYB%T(2>^o}jjRra5Yt3Y8naEgW$ttr&NgDOpE`vBUhp+kn#jluZ1p4g z92vM4{pe<8&^2NS5Rujtt9oh?oYBGppSJ@)_~ra7*`Y*@uDhhi#LH5-Y86LAh{0ZJ`OInw{^lKOWklbR^#Ye8wCZ%Gw5#v| zX?V1^nTe}%lWR`^@tLAmVUl4qjZ=2F7^Ga9wB7C60H?t#vO_oR(F_t40-Gm;KPQox zdZue$!)g3w!jC@&gXK8b5_)HUzEG1U;qprCk1=@d&5AdPD==jNn)W*^DF{V{TYx-T z!QT6gq64U>4E%mYc&|1i>~e7`Z7x4Kd{k{C@8%m`63naOr#AM$kT@S3RHmZKcAg1w9FTAp|GLVR{lwx|>}sRsTq z6>GHZoLZh=#J&tp{#=;(IanbCqv^?T7o$12`Wf6);aD6}3<*uQPN9z?d zOU3pL)qV;$sJMf;04W|;j)^0e*<@SZh=tyQ>DWRYnG+#q2oXmkde=;E9rQJ4X{*bS zXUE#k$v@NiYAq{3YLUaawI6rqIb)2IsT+%iI;{*!#{8)SA4?A`)b;omwp{l(tJv6a zowAs%fZALg{hrX30x692QUJ;TQ^NuNyk<>}$!46y2~O{Ij{|$RlG0ZkxxH<0R%&J# zP{SJbjt%-RB4Aatab-tqKZ_a$1rXe6YL*d`RF4o;aES=IA~Uzrln9qph1`a^1Cb(B zOZ&lw5*$;YYhyu1I|5H%U^m&@ILeC}W@ooS=naa*3+{6&9aqBN#F*a7&}OEOUvK*` z>}CtLQ(rveVA5ha3U;y;DjQy1B>LcJk%&1V_(Lh@mQ-i+FlZF3t~^+!W%{kMzg_7+ z{n&4Znxf_QP8=M)aYC!I-=$6fjhAVSSFAia0Cwxm4R=bXPS&1HBqm9)>v6bneF4JB`iZNrG9pi3I)G@x+L3J|h{THsU9KCtcFeuAU7vlc zyb?1VjPrLU6agPES70+BhxMgc%YII-KF+o<2YUlwn2V>=FWZqY)K1(X4#uBt*3pA~ z)TX3#ku%&R+6DH}S=wqjr9DKp8c}%7ZSh%eQya`cD?PKWalaAIGkTVsGmhVMM~0Pr z>4U7v|;dBL?+?Is9_n`X994iBq)f(57rW{Q5$k zhNl?s6`vqSEn(-tC-gXlT*kWe`pj2uEwg6?-0c2N-PO%jPSvet;j9cou8+B>y7&QI zuA+w71(_4_ElLmk4DXsv5W{={j~u&0dO!ZdjLYLiu$EUN`MQxN+w_{K47rJP84yiDKoBHtm^H1QQ z<+wx_bnSF#FpkfO8gvfIX92n{A9?>2x<&eTMcQ?n`c|-&2*Or{;-3|<_4Yn5NN%#u zdB=5a9MQDz=E#t6q|Y^!rRNf(IhdP4L&j$0XYY(Pey)G>7~X4n^7QF}dXRhb9Rmah z@4BT9TWJo>Y@rE?6}~MkpXsL0O`}cpm34O>=b`%{E?W95BTfDR zZE;`Shr#&*+AIfoeTbG>2j;_6xDsmIPt{h@o7j-CLSb9=Gh(YvcVS^DWI3cEvj3|) zfH_`aHXhU0ULYP(_jdI&l6a3Md_>GCBRJwrl!r zmh}P7!aP(>3GaLhSs9)HsIfRN!nYGHpS+|1Oxz3WAkFJ;OT2^c+cCx z$MQTit%hCsd8pn#zfkw9#0@0$%| zrjEZf`W3Hf#Lnmv;`UM&_cIc%!V1`SO@pRbue)p?ojJ*kcB^$C{TQ1{hMilaad1~* z=d$!}0XFYdLC1Tq=V_~UwhqE+RS1STNK+ziS2Ob;mJs!><0l0)5%Tr^s1Vawh%`s{ zgcAr?-*L#5AxPK&R@BPCGDBqwV%_1vBIS@HtM-)@HHDHa;imyyRwBMiC>%Ss&8A$M zv zE!J5&s`7YCUXDpUy>yRcv&@ng_sOT7yv^6l`(Id2U3QU+WSvm&10+G-TaJJ{^3I{$ z*KGIk3EHl1Wy|FoJW`$wUt>S9qAWf`FwL3MUqZSjNlu2Qj|J#z0&!^0`R~6RSzycY zkKaEIFfIZwZt?%1h_I!-u;IT5zPR`M0~W#h*mGw7&ra|h%7*)U!2jPPGyFngKM1IK zpn(6M@BWKp2K{!-7u@p2fnAi#KLbl6{uTYdi08#P7scjJoLsE8|2OHm7~vwF|A`O+ z{ugq;2*1cZf8gX~|A7B56J11K?9e}GKJ0PMzx1oFCJ38-0DuU4nP3Iz^*P@G{s-D? BQXv2U literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/empty_trailing_rows.xlsx b/pandas/tests/io/data/excel/empty_trailing_rows.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..920b03915a3c8f1800bc41e76dfdb75e783762d1 GIT binary patch literal 4900 zcmZ`-1yodByB=cbE`cGWOIiVG1c^bqL2~FAaA<~ZkrIP&Xhd3+PH6!dq*X>>NRg0~ z5P>`By}zH_@1L{goVCtc&wKWM-~B$%v$w7$4lWGuFW&Yue&Ro~eg~XAGU$N@j{M_=dov2+T{L z!3Q%mosYf6~JXbIA_ZL2Yg(NRDA+Fef==uxz5oegA42 z6zerekP%1)JwgO>y=!_V8#AOKY5S`_+T^(t}y&*^?yWxWnl5>e6-9IR7Op zY}K2i3nm4rf``1S!s4xzI`35sjPe+@Tik{HkKx(`yi*ph(DC=O8x3m`Gv4^hHpRiR z6{x>NSDks^Y$Q-kIcrVR`ufg!vSXCTSplH?S0Ww+h;JTZ0RT`O0Dv5mhyYAXPnesX zr>C1xfQNgM!2lE`LeX|8fII6UXypoeier+T3YejIF_4+2O6Qh9)4%hy;CzD~<#L^?zCJ-Z1o-CSu%$mSDrZDzL! zDL)>xH41lt7_&(b&ou-BZ!{>+gcqonY9SxqsoC>@gkP!0?lAwMM{(ygi?~-hI_8*_ z#PTV7T+%T44c-k!67}SsS>u9N?seB%DZZfpoc2?FRM1l9X7(4Se4>y9hRW65l~C9RCq0|n#E4|a#aVxRI}_U> z$9Y*hmfekfX2T(FEIEOgSE~t9G}_rr5o>jHH2LG2PHHpi%|lCS#c8@Z@Z`iZ*AQ z5=DKrS6>RbSi`65gmZ6KU7BpV*NIqYmCz zkE}6tnwFSOSXu?sT4bn(+FU`t=MvW7W{ffX8)v=WJQ|+a`qnh`}$>jC-m=Ki=zy?!(_5cDkHF?|&9(FES^7-%V zp$c(|E!)`Q5C5~>~=fRaUxE;-nxgQvEc&kMch))6w#|fDy~JreN37v`B#8e z$`rs%v_;NMp9o&vLo}V9@&f7`ljz3Umo%wZ646@%SC~+W`d3SbR+%ca7fKGU?vsiw9ts%fGhi3 zHdCk*4g%Hon@MbAOS-B_L#X_NsS~ji#69A<28b{++du&#>Di+@B{~--Z)+(ogTRYQe)7vjN-P=bs`SiASzu$DgR_QC`TI3^W8PWMsLyyzM`jT#qU%fN@N)MnFHiR`>|gsCbB29}(g$}WyS zzM62iG8<$?A&&Kg_vN@Dd3T{sXAN=7apH61vFc!4b&0*)i~|MaoYr9;i|=5@4u#@c zW-j9+Yut!rgQt`;Q3#EF&O9+YuhRO6VOov}k3p3Y^6q=7ltQOe^S9dYaMGgRZZIye zL&hFoO(3*OZPyO0a;vr8HwrL(C|7L0=c3mtBu8p>eFN+D896!Cdm)9{T6) z9J8N{b_{wcx6iO5yzfA}fs#7Og$lXS+cL{scj=4Uv3r|2Rh(nn5$>O|GCiC2p5Zqt zD{qpmjOR_u=ISR?O}~aGaEYIbH6O{6c*H+A8vbO}kRzAf6?z|+Sji{pOJbOSwr8Wg z^Nq5S#KUUq@S{jDdeT+q%z@X;LpS&tdoSo|2HsdIGK-K7q>4H*j-d?b|G|d}cLwlk zsE`&N>r$O zUC?YQ4=>`{lMn5rpl~&}*Nyp=Y45t$o>8By7+wob$@6^+mN3zSTSC9@>zGjP-vHKh zY<_9JuIcYkJ@tgDq>EfzE1|Vu)|p0oxZtd0l(2j^#tb<+vo-(hG;y*-@X47jQAoGn z9h+8;iTZg$htN)9Y}#BmyZ+!Zn=M+T=z z+$BtNgDb7Mv7+|eUFu}$B;9Jit;}El$TmqI_Rz4~eH-LB+QGkfP6d*(JEeP7!nVR* z)l7MsPWb@ZgZ6=SOKL+`<4KuB1I3!e>^>&Yvv7S#%0OsC_xlEtqz9NMmt80F<)y(@ zpnHDOO*fHMrflPB0kz^GS)|+o%{fA6rkD1cTAcR&~!A8n0R} zZ0u(qH%NrHi%(~*c^&I2mPn=V2no*ZWIlPu2-8!XJJ?z!Mha^h1B`Uox6OG5V`RZB+Lhq|8si8oB&|WE1x_tgK)1!a>%1EBft1X;5S}a2 zPfCb{u{;zX)lacQ3kZwh?|U3C%PlLgGk@e5d>NIiFX2x(Y3<0e6-4Ywh^o{L-T2A5 zx7(0;yYl7iOfdG}nNR|JxL$?HfDFc+VvPF*#d3t_rmk0?S zxs@MtImk2xZ$sPkj+#U`M38MmJp*%4>=SC3{rBP;x3#+W*|gn;eq+qFBhi2Zmf zqjJB^%5V~_#!6uLNR^6MQ3DJdldU*>&JYf#Mytn>4?QRi@);-#X zTlW`>WofudaZ7mk+3E;-kRMSK6f#dYB{t_4Ikn95@Htuiocd~7tek4vDk7Qb1zewS zQ1$WxdR>JKGYiuvWm}aad=2lJPZGg=01q5{jrx2BU2)zQPuU;C*b2fy@ur*#Nsj{4OK_Qn@;q%$7GI2DCGLY(V zO@<0?ZY^12?G<64p-g=u{}C@e>D0m?&t6{lEN<5{?X?r0BafZh4OP+@*W3gw{?`x= zK5<3n?G8{;NK-_h77L4sx8a=yLP7`?_f7(5sltcxLz466ImUx~9tlt7xa3!CGAXBz znGcr8JHF5wEl()~VDSf=`K3|1nwaUW(}jL@!}@uKmJ_m3R@;p~4J&d1L*J`1k~9zE z`4ybPfnO7nT~Kv1p~2YRr|OUe2+v)hoNRR18Dxj_?}~KlHV>|0DiMUK3dKJwV(aB~ zQIPCZor{j^-8!M^+|QCCW_!HQSn)WU2*n0!0lqgjCqM5t2LIUn;xTg2`Yb2sNF&I- z#nb@7#=U8&!&06_gDx_eW`^%d$fmpLbJA!NE;8>gVc&N@#(^ivE2@x|_T#1p!ETX& zz>j1jXiEp{-wiJn(q=fw>MLlOcg?>06sbHt;j3n=zT1L_%NTL5g}iE z{J`t2_H-RT2~-KUS=8RxD&o#+24@~IM^P1Qx3*DJc)(qHkj{cwzSC;cTR+a`CH>xQ z(ge8cHRp=QeS9ol>->&^ua;=5_jZmVX;tw@*hn8n-Khcb9+wdgY~rQ(Hxuv-e6JD) zFGpLT`XlkL)tEYDOW`MNVO7;h!O}ye3gg}3!Ge_vCsv(nYwC(+8P{_BIjjVIl&7)n zShkz9X;O!ph!5}D;=q$N=$WN-(#MKzJ^KW$6V1lX+_rSZ9r%bTicR^Vbv>EA=H0^c z7^kZY@0IGTo>Y5ukXO#8om~wi-mb9Z#(DN}FL(PD=y0Ct%w@lQg{%kSb<~cZ+rj3a zOWyN7`xQ$V9{wB2olKbmg9j?;$PLz0E6UPy1ml9mm8jxjC+FK_h!pa?Ogy|nGW48FVr`~w!mxY-M4{?C5!0?LB(d%*wSDbxQ# zVm=6{jZnn<&v*aDHv@nB=1ZRW^1v?3<)49N68(z)U&QlroXcYKCr%;8;s2ZTT#j&= z&i_P+BKa3`zYM?3J%8Y|WdDHwFB4rxU+&O9XhF=O&cF1lt|kzZeE@(EbD3ZSsNsU| G0RICPPjXKH literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/empty_with_blank_row.xlsx b/pandas/tests/io/data/excel/empty_with_blank_row.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..fe3bcfcc269d77e56e9bd99a4225e66a38bb6f6e GIT binary patch literal 4301 zcmZ`-by!qe`yCiM1ZC)uMnpnD8cFGy!I4Hf9lAuiJEc>}OCyMsbV+weH`3im{D$lA zd-U>s_nhaP{hUA6KKotgeb;{1QIrRwfB*mt48VEG3z%`fcjY_)01!k50I(6Gj;)co z9Xs3Yy(p?*4)_#5VAm};(#5fYP|~ohE^r`O#?3RK=UR_9k%QF1&BX^pBQG00Le~AT zmscWb=7I5XaEf%WlvTb4OKA8diA(0kd#9V^b!Mu)bbTsv^mqnc`6I37N7PZ4NT10$ z#$Uf|R?`+()Xk=-4ltZCbn&&&(gEHWuAKztoy?lf{tQzIIQ5M$$=j3)a9q|#mf{~= zRLzKFJ$)p_&Q*IK+AsM^IgJcbW6th)?gGJhG^SZPA83NT&zc{@L=5R?K^9$s5pcAiziBM+^qgTjwv`XL124)a_wO)W%9rB%W zPMq)@ZFz?aWTIx-AmfyCD(8?Rwc9qRweUoI2A&KB5W)rJpG=!R7;v`YO^JrvM1sge z1Qg&)XzM=qxRtO}Lm$^fdX}Af(nx4RVJ=ElhMZSyW}D9VU&Vrcf|-3lxQIbhXM`|} zTJC!q@!p8dJm6!c&h_v$d^$)&@rPovW-VB?;pA~Z)$mBb41P{$Vw4n-Sum($KQ7~D_P$Xz^P~hG z4?p%?ZK9LLY>L@BWs_REa!<2fqmW`Xo9|@X)|7!j!qa|QRn&M5u^u+viQbKwgRI`? z!_K>dE=C3_G(6aIl}@P7DkWw-Gi36>z3&yu4lN8ki7SvBH7;J_D_kXEw+Z%)8YXiF4FCcJf4C8N~5{E44$PBBdcZ?B0q!M1OA%6Wn6q)u4;zN?2RtSel- zGaD8DWhjSz!m;DMz#@$E(>%t45XmXChp1_m!efcPv3>e^LyvaNVVc=DblP9U*($3C zVc&3i7c{~GGdgYvCn|iLmlL;BS54Bf+=5Aq)((ES+kQ8Bsrv#uCER#v))8G_RjA}vsS+?G?(eF#;o5ObT%L#uCe)>9HfA0UQb zQDXfqf@sD`IcmiIMji|^L=(8;V}81;E7{{-`%!AxkS7dnP=ErrdG9Uf83!2@ZQUYi zXg&(e3H697q}cFwmzAE&NVi%>>e}{GaoM4-cpKx|wQ)_!)u7tCLHy4g35EKDd=Tnj za327`{goqj_D<$TcDDtwtp?{r z%k=E%CqHD^x0%r)fH03~ioN0V@?b0XZFETN{MvhXOH9hb+N0LME`=3^I4;+k1P`H> z4mDc4=GZK*X+i?dsjeBhr=2Z`;E)`iCbpfyKewZVXdM z<{qnco6gymq8mxPb}N#F+C%rwrsG1>t2siQfGi)QrY#>R;5!OeANkH z?{`L)IG}?fX<;z2tY30UhL+uiYdsDe120;`%IDO;*p}naKQXnT$S3hXr&dpE`_ChJ zQfZeJ##t8LXg6EC>yN!CElV{bJ$QZlgT?J<%B9<`YpV~78hp6i243)2Q6HB=_5m+; z^7&ilW>S;!qH1(FEfbG2b(5{LeW^Du)(j>2+@n3+{TrNz>?M=xNs<`znqx3tye<-{n!GHQ-|S04^>V9 z&r;-3S_soHA95@mF}xwLVXj!GA&hP_VNoH&`-LkW&vljSd&0N9GuEvvT`u69Hjm}!X2!Va+gT_Wn983CcV0LNV9D;+FG)<|fXuz8c)_oiLT{Un@JL3LwU z7pFuUF@__H2pmwznb7gh{FQX$VE6|*%|#pVGQ86~fH3r9*nGS=l6q>M<`OQI>Z1ll zLKziVnAhgWQ_aGTF;9HmUO+&{Z!@B(J}!QXYEv<8Y?DbZ z;N2>i%5g&w^m(s*vHkAjlt4U!$NQBMkVy4zgP%@%? zrF{f397F<-O8pqKqrpK^LTR=N#inJRq~7PpYm(O^@41&w!V>G!8aZX01o&NCc&v_v zDP)oK>iSGft^&GBC8W=9l5=nlxd5O*OMA%0%TLdO6nrLv(b)~624n{OIL4eNv`0t# zbf8PdtoCwmBI>voJ6bFi0(`BO%0tvLZTvB#jv${ecV6#5%(lMz zE6B7o98HkP>r=^r;MB#0l3@XB;lnbQ>0oSDqWvIf)@GA#=S%FlVGd?+`B=Ktec~5l zZZNiaycmH)|3heim~S@@zt!020Ykx2fbyC9d4bgFLR>{wSg|SQtZv�r~ha5qB6@ zZG2yIzf&9z2URJ};SIhUIAhP1QdhI^3F)eA`?b!y^F_Sh1$^6(?ktsTDI!{uK*W}V z*l_@cRtAc;R@Qdx23EF4x0P!F)3oyD0J+b7qIL@)>iHoKh_={&!ze#uKB4)Ys+2tD zFX7w7pcP^jLRgnP+dwO&jgrkT4ewl+^}^{rp-~IS|%@vVunv-xcvOy zbY+W-1dN1Xe#BEGCAp2L&t+m|XgXQuFG;^y%Y9(B3hR2gK11hroL)f*E!6Y@A z=m__vWIxEP~eWq}N`CMNRnBE7{?pPAZohwD4<(~eJS2@kM{q?9*jdKHY7@qNXE zgng^9$b37bd3E{JO;d>dXMw+*Bb31tH1Q@n$Pl6%RQ2OgPh{&(TX_W<28+d*k;9DJ5NLa|_hhv_i#IY>wAW*aHjB6Pi-_nnrDu zY?nj4Sj#PgCa)KSjPyQzjewQRK5Z`cn{spYZcM+ya1Fy^Az8CK-#rBVp`W+(Hz|JY z+Cb3mjQ9ljf6}jKZGFprY9!RGg#)K;_a~_7D2Wf7CT6j!FeVjiiiW%fwO>U8@1{jX z?qYk@V&J4U`NM}ZS!eSawO1)LjN4jJs)8iYe3t4YrON@2P`nw89t6Q$qC8qg4l+Ln zx`aI!mBh{KK#6y@6~`f$ix%=C%B4r1iwCv6M)Q zMGvH>Q}!DaS62Gn*4?UndzQ=r~>g38ip|dR{b=lteFz zZqZ~iUf?D>iNA$wP!AWgfrg=m#ZL0I0o}j)t84c<0k>@VYV~6Wk?cDpQpCQsQI5C{ z`fa)UO_8&8$-b>w56O+PHWO6e_cS6ZC{$fdJCyd7XFZu-(KG?Y=CxN2U4*ot6eJ6K zDm_)7-c_wtUyIK|hJV;|6lA7Owk4*jZbmzK5Y6HIL&z;lxzewWoY&)-F{JeTW24Ud zoIwSxZYiRNtRC&CSGlWM$LIwf;-J>Gt|Ks?> zvEBvV^>n{cn21mL?+)+oJa=us+L=tl^X@jt3dQ63c$GvdDm;v_`u4H@@tpZ*V(X?ZLF literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/gh-35802.ods b/pandas/tests/io/data/excel/gh-35802.ods new file mode 100755 index 0000000000000000000000000000000000000000..f3ad061f1d995488bf029f926dc3eba60fdded2f GIT binary patch literal 12692 zcmdseWmp|c)-DoULU4C?cMnc*cXx-4ySozzPH=a3cZZ+>g1ft4nEB4+%(-Xg`R?Dl z_S3zqclUa$YgKiv)vMkuD+&4m83+gx2q@G5MJ~{iBa9XZ2L-YWd9BG4d!2h z{#N>IV{L3^;%NU5GzUgHfQ^l%zMehZ|C^SvrHvlI=>Jvg?M@y4dtI1+bBDGzwvM*% zs{iF4|4z>Vpa*bt_+1|)B;-G={4aC770CXkK0N~iBTJ(<1KHTq893V8znj;^$`TYv zRuba9SAam?cW?5aO@{q;_OFJvwYM>`H*)yX4gJyW{{Z`+-QP9jy^C!1tc@)HivjTd zrbaU>Jrg4bIzcmlm7cA`|0w^R6=2_#t@NzTjEx)swDyL^WAS6w{`3d|r`&-?;|u(B zU|{y-!%gOyoh(7BV00HLh^y^U@yc?!#Rr+Qo8=-*Yf-hS$9mX<;njs*ip&NSB*S8+ zb^BDDEwoK}%bZIK^;CE%IOpd6d6|^W&eoJ1I*EZaY_OISHQD4YDVN+bt%O}a*zLq% z`)i{hp>QJw&91jRFgeCIpo~@I0EbyIuDAF+;VYa)S=G>b_3~}O_4u*A{E<*!h64cp z*v-J27x`oasAX zBTCRsV?2>LXyI~w-F=~*c*WSLsC5#u^^(7Fb7+$r72iC)-jP-cZ$2kp(?V1j@PAH2s;WS}92kX;!lb!>h{-nVYW&Lna{ zc~oYce_}WQXhXtrNK0;;9D%%vF!Cy+;H50`)=CWu&-Q6==Ew5tK6WqRE?$=74hWiVtNIdhWtK?kbFS-bbi)0iEqq;EyeLdmU9UXsiLtP!P-ss5qx zpdIGq<)tbesXjnO0O^P5X9s$8I!ZrVkjb-ri!??-3{ttcsa&xd?#xF`#^IovWC;X* z4t$qL0}=b6OGqR3oPZs1D6|D;CFUO5W-uum&x!-dv^E{23*Tv(1g~_mHbJepdz*40 z6%>wcI!Fg2zXm~=LzK0VGL+0VclyYUbpp0q`P+f9Tke%yJ6rIDqq2L7Mt_E8*Llew zK#U(c5T< z=(h*V0pMzBTUz2fA1weO(S zW31pHXH8cVo*B<|(UsBFGSzX7==8e4Lu2I%I9iVUh{-znym`4ot*(#u^kgy%*RadY znG(($06&_sIXDYvzUSqn=x+0eoAu1qh2^sE>yg$L1EBRDU6(piM^a)&Bg^cxH-o}k zHLOwPbycw@E?6))l?~^0k;l4J#myWQHG0u-PR=!S z9blr)C2hNysS7kG~LH@ubo$g`B6DY^Z9ffmzj79Hp10 zF@Yz)7^oqCO%BIdzRsCebX^Ues;00i8fv`G3yiP(xFFj4)ojsl1#PC%Yoey8R=dLF z$XXL{iZ{BEePyi(u(%w)+(_=8qpi$r+qUXojdQcNbf!U_;bzU0*q}PZX??@nj{h$LWZ6iucf|5d{%ln zVQ#m9%3VFXJsS7E1)3_Y)C%Wg^ul&MMAfsJ4Usu-($ec93nWPBhPs)Zd3J*R_Dfwx zm7pBlO!+!KL2)RmJizbCQ-Dop%!`C=@zv{jdvQ$0gwIYF z7;3p@j88(sAA|1Q_?~e&>TebDHEr!U#aX0AV7M$ljUeEiXTVKJezk?wA{auH0?!~c-(49#^?oLpar6_E0Kco zs=5v0bB)f733SSpu2M0AcJiAbTW$cYX?+~={=(Zql|ZY3@TFNzP6n<5g1{y>FsfnC zG|1O>)}EZ4NJgM1pCdAiYfzXAjl;c*U=!8J(_d(!i$*@BR-eny+J(1zuTl7$K@pC0 z%#`d&D%(bd-xs2f9Bjo|95e?XMsuW6Kd7%+H4xqpQBrJCxoZ4^Sip%va;BDK^9y}IVvN2E z#_L$S=UW+|4aXjUl_JR1bekKEiF0Yvm=*lBONWN=%cmVJ$ux-6rJ<4LjG!6JVSglu z8aI(xy!|Hyy7Grj^}SG|z0gQ4HfI3!54M~ndip;8pMugY!bUMWY&*!8;F_%Tu>4OZ ztCQXdiO&Vw=atRvvbl^~RU&44$On-eflHu8u#4Wa#xFz;0jF*B)@nYB{A!^kHM1#t zQ819nFd?C``>~~xJACvjmHZwn1Bu=OeC@dMO9ET;-*!!hTYo0>&MUfR8tsui96_G`-rwInx1j&&gx5XJ{OnTH$ zY=Z5l9*qS47U;^)QFGT1ybH>~2P}T%d{u>(9FNkPnNnXjf@v3jDvX<-jmARH6(9Jy zRrpA&H-`r9UbT{@%d7ijF8AJ1!_W;%bZuF5eiO+lWQj5zgGg;t226`l^okKzUZ&eU zMSwG>iiP=mnp!TSajZnLQATsP4)$`k+A$O+yn57WWq`u;chw)iR_B~pN;4_l`^U}O zZ8>%!PcWvv1!N6?Yy#l!dcTZhR_fJt*4x_s1wL&?22UtuN<|bOKRMop-+ZY)~exh&w}vsu?QI2^ZpU+#f5Dm7@~1cUF$? zL5fuF`Fv1;-UM|m?WURWNR!argon5LQ_^|lT3%~n=t@6t*x|mXW3*4Hh=oz(R>@cu zrY<$BoxPVfGco}VeZ=~+ThUrfdm7|C&``w!=S0WT^6q{QpAK=SwpjU8XVnRBo1@{G zUj))&K2&KKb5Lze&+E%aS>#eNoQ|l;kK733eVRF4wG!)M{>OMNhEI!=bvrz#n=F$U zy*ilrZIswGbp-kDKQfqlUW4IKwAvAW}|==ytjFgNLtabl4!sZvHe4ic)5o|nU%$)^4yhcwc31- z1{;e9G@!h_S%v-FVxVen4BJAj9ut=zvClblt(!}uhZ)T6fo9s{C}Y^L{G%DM&xNd? zhF!iQ2*o;W>c&^z48SJBm&9L9H_*K?=V=NL#FTU{Jpph*?K)yD-5hEPF8d{WB<(G}V;xW=fm#CSeH|3trp z-?@T7(hcHlkp_uMT3+44--9m<7KMz0FRw@&r@x+9Mm|*FWyb-`T_=zV?4D?Yi_><2 zLcLg57T?Ct9B*OjL~AXOv@sDvZki=92RUW6nrY2KqCIXiWN4c(leuZnn!5H7O?KU0(0Y75&MF&GmKtpK+<69^d4txzehb*zgvI ztdVQ=WUoy9!@<_(iRhAn?eEPSNr7%wBrG)iv-ZxLn|il*@)T0z{Ml{X0xjj9lyXvj za$}6RsS-_wO)DJufnS9ZQ))?FN+ME&Qz-&o?Lu{t!=M^)ZzleyD?CEYOa*M30 zy|V)6pA@N6UPG5<_o9TTjcI@;rZXsSreaNJMEiLN|9!KEUL{8bS4iA9t>Xqas5?+p z;v52j6E4~?<2@M`}ReHC0#@~$!?dI#y&Z) z)#Q{~^sFk@guR?C>r@y|WmB#W^Rtk+=4Azdnc_&^{)g4Sn#?56wr2f^&O2-0&p&VjKF2&xR z=Ho41P%RIT6D^Z}KKO?=A-v~|5o}kv=TV_0!Si(+w$or&n-XL}gxT`eO*ePijwPaFB0BS(V5L>xCI-GycQ|8&Sq}50ZC9lHy=zHzJ{#f9 zID@F0csRxN*1!N>l=32xRmyG>7Plva4=5ZqUf$wcv8}N(KSomLZnIZ`_~-=AiaRrV3prNm|dvV_S9O-A+)H1ZR&{{dIdAyW}wq(E1 zU&GlnocMfZ;ur(3Ve&wKl&r3yy>L`hSsO@c^YLdQnsUmF*7CHV2K@3-0O(S;t1M~NM3`|DEX)-lGDqJ@I0r+7X1#nXLAJikeQXnI_yvnufyyyR zJ%7^F)ywjA!4lW>ww&=M>A)>cl!(JwV@;KP)#P;t9Mh1>!VB}&#vypjh7tp=&Pm!^ z+3fFADlPen+SZ>?pP}QSuj(AZ45ssAUx80!Ujvo4m==wl@I9Cw%+kOtl zjX~iA9?@b!-2NiqT>c_Eoc{SgANiGEDxyB?eI}2g2`C|wGmU^4$&Oh!K5i%p63mH7 z#e}JpiWKgVf&(*D*gW8kg2Eu9z;`E7xy6vu4GEUWZmYoP4D6OzS;_it+X&L6kn3}d zOabQ{h8&ZgU9xFANiPMwXy43<7~Fq{72eQsk?IaPTxg1IkMsq2`GW$cwI2n(31I|4 z8V(b#Ty)?EQ2=^G@uT*-5+zRK6xwbX0f!A-zw4oKS^)?xUqtz5KX93YJ7tXi5W9&| z2z2mbgTZ4(0+S=kg$OMxb_10Xk8d@SOIXX@c)<3SrEG#Bk0foYyQ{X}^VN67zK(OK z_-<)}ML2v7N`v`EPtPgn1LxnLgi0?GVUy-a@Og!ym8D3K6unRv3fx@WPeAq*`|{Q$ zJSrcLrBxWBhj{N{FP6DXZ-&}Kn75Q))`SppkO?97y3GRXVT-A{bx71OC0gv!Gld$9 zn}G6}$KLX^AdRytvNeUvQg41T8?{wsVjx&iVBoCI6YvVv1g>Q4Mp|i%hmF+nQ--e@ zeI|I9C$dDASYESHwy;r%8nsTPo4qgbfh_{iZK`8*eyHMleZ8l5ThjW1$s@$ZJ?*Iz zsJK4#v34PvWDf3>eb8vvfsC8-0yc^WQRs##FX>x(#yNx;zc#zq2By`Pm-!;CUdqh2 ziRIDF%k$L~>$SS7*fx+@9e}R8nH6=q7zB0wn$vV}+jJ0A;Is9oNIIbu5qLf->&N)x zO?4}#@A|D9m>9F<=Tn1y(}6SlM9XFQ>7`*#^}-?tq6%kPVUd9oC-+-U9?|vYEu5ri zy)7jBu}BlFP>Ws-i2IrG)g&TmHJMCAVKkWMIaZITunT=|fe2@NMt6hE%s%B05?l0e zhquZoIiB#F65x`x;yfZwXrF!j> zW7$$pPt6%syxsks~7A-4Gf`FX`VX^w7f=|Rh^iMNcB2AYj60#lZB#e;-d{dCZ z5NbKw5eTPZ=@gG~$rhmVaS1H1jd9_0OAyv>pqE>b>s{w7_m6^gV%I8F+C64N;4TEu z=7^MTWN%LM(-3_WCaz~X4+cf|uX!InmBz8Nrq%SIlg@i1A|Uo0QFqlumx~nJL?#v5 zp>d)2N!CoUK^s&J)l7{GkMkPbDCFEK91Jbc91jdhmneU0{h{t)UYpemR%8+1*;Ppc ztj0z8qP~T8QPCeiX1WdI3G_U`wOm)3D#KWM6@ZeSJ0c)cGQR!5r8Uz|tz4}7DDTN! zS)`LlsZ{D&v`T#xr7_?f#4ULlFZ6Ct_jjc1BO7le9dpX)_bw>!vr{9jA7 z&y&>9^#oFe`x!xLjV;_QK{6sN>$-}rR^pb~^;CTS!NH{=!bIp6p0qs<;rja_#~8&AK$esF`$T6?k)U64u| zEF{K=0#p9LU`-cKC#lziiRGOO;)}>OygdhQ?w)U`g$bjORh}%}S3M{2EfP!bo~Yx* z&s3QLSb-c|oMqoZl!Un&>D})L?viBu>xHI%-QrCEiSsZxhF<4IAYVPI1zuV)hDXCd zQ4<6M801jF>_`TClZB>J_q?;wB?Mr2pUe=UJQ{y8Kl-UJ)YdMDjf{CXvL&Ix2rJKm zdG-c9qfw>r2Ae&*0mcUnYYf2>+pjCN$*4ZetK09W@p=m-Icjj>ZVb-2-+8~M^PWhx zlar8ufH-i0|0liwcL`sZw{)I?jWytZRtDNsHEfnyk-gR`=$UmFa1HyUxtO&#vL%(X zG^T+ixsGVE@`ysINT7+gCB2{FDPr>thm_{a=<0hN9^oefgPXmY=oTzt1m(3?KZM^4 z)B7{-K+)8s{(5eiUl0Ga{g7&rkhw?HZ+ni7$;nT~O9TBIVKqXE5 zwImK1UxiZp99n{^h4ZT#M$4ER^>$t}lzm>Y67sQA%l(P{xG-wlBhhrNY?rohA{UZ) zcTi&z8d{PZDv406t)$*<+1G{LV)O)6O)V|qpaWGB9J#iT7q>?52nTX8G07q^41WiF zGC`>ou%4q9x`stXHN!r3WEAW5Am0k&Pljwt;X8f0j~m&X;QW%zT@CGg(H|5w$w$gI z8Gd}*Id&<5hb4V1N%q_aK~APDb@g22$=MD*XP#6XNx3Kzxrqy;z=ML8DJWj27^cyp zH-M-q24ljHX9odS7ZM4-@BRw9`}AciLwM|wL5enh-^@PwZZ%gE`B8ZR!{(TpxJv7LY*!cM(0n!I|~Dlivos%h9=- z6Ha!N!!FVdCF2)mzjNo1F_`{3kP1mFwC;U>&~6zvC&fd*QLW@b~4(-P>e(BN$5i& zg(Pe0u2Ft z!7CZQ3tBCiTP`{?PmQ^8=)5$B!1%BEJHRhrQ0cqBY&cYgR(T?M%!39|@fHW=qB&%# zMePpvJuP06VDf!zOhmYlnDVQfOj6Dc+HXX`=(j?HNh9G8KpDcX)nX=LQQ#@y4oy^kPV?@Xy6}0~| z2Ut{J;kVY6Q)y=`oR9rZT{R`!G)G;RW5?*vAwj5UUpZIG5eG0gy3I@VboqY;!XGIo zwFZ2OV)YTpb3Hl3K>cb^TEndvJiLFz)2H^JRuNgbIj}`;4$UJ}#QNI>_g8cB#C- zy;}dymh}a%m#3w0WW@!t;ApFJaeGSYm>$<1jG~{S;c{ z*Yja;U2@*r&`C04<#1XBgwx6DU z=x^gD2{U)Ff9j=U7j3?Fi@zarH2HM+<~M~8Xq zWU!js$zM8MG+i1Y(qmoc74+SM=&aSVLV*JT5xy<9|8s%(?>xx+imQVW0AOZq^3Nq9 zbt~s=PJ|aPZ{cX2Q!}L{Hx-#f+}d`4N*PG|ISqe8DZx6iKyAkh`0Q#qRawbojr+-i z09c$!(2(IyvTw@-)khs_g3XY)N6qu%m=>Sje0ajk$Dfm?+Frw+9zpfRIDUS#AF{L6v=;us6d|KS3tH1dWt#CP*#Yl=o;kR52!eQrpTiMB|3^UKG)5YZ5*sYloI_ z{o|4FkPyu}+oab|TLnH`wxrbpxJ06j!C5GIiriRja8o~d3SFCQJ*gk^Af67*~ADRmafOC0P6#zESTiJ!0cP zvv^+X=CcbguIHCT&#~|sond*AUQ6(wgIAV>rZCya$7*#hWTE_UEw9NLFFz4OUPP<* zffIy%L3D*&o&5*6E_}|f2U53vA={Naz3}hPjaG7D1o%zGy1?9a4hR~UNSxrBp=3jp z#@~W)*dcB>#phskGw_c(hvu6o_U+SzqE-g%+{6cUmbvYnvT88;7C@+D>5y$k$RzGJ zr(RtF#95@FGuacmLOjxEjRw8nVy0BRGhS{&&)gLa!P3VvqGb<}6`Uru5~l02e&SPJcSEn8^NiXv^1CKuCOZSD*Wl zJsW|Mn^g>%18HSHLh5GF5yZpa+y|!dRM8A_0cga130Wgqd=fut{Hv&Il9*VQ_L%iJwF>0#a*d(Jx^0*N;*pXru7 z)Ueal@}^kOo^e)uO?A||920N&i8J^*9414Cc@ix(Z+>gL5`J`Wmsrr`6OoawEVltW zyyAxLgsZr!w8?>})8nL(38k zK%MX09!;JXIx8U#mQHXPDT62vo?Fj!Cwif}JF>)RYC+%e^hHoHZ#!IisIh99{W_Gz z;l5#2*l0GZo2P+dNo2})zp3J(Igzt5> zW_B*k=GV&-`Q=hc-5ve zxe%@gKUz%cAUa@+Ruk4!wAt(e3z&?zd$@T3kK@4f(rX5($Y8iew=em&6z^VTQ0ynr zAw(}KF#DPMXG}lV5^RoAU>?5I&Z|MZ`ik8BX;&uKkSu-b70M+)okAsu-XO9MMZN9j zFTreE7viVz%CBJWPQsfkulx`U2*{KC4=3>l7pR}4rcdVW%=`9E*HScfw9>cMGqZG{ z1N>2>wY4@0m6aBSgT{O(7Ql&%3CX{GzrJn1Am4ZhK#$7&^=~vKZP_o1!q6Xp;o(s~ zBI6;!!F+;8MnXVBM8L+v!a~C!#>2(OC8a|lWI`q3AVGyD!9u0NK_Vl>W5!3}AVuRQ z!{DVrB_YHoC#NK&;i8~p!lL5Gp%*5i6Chv_qhS-I;#FYc7Uty<;1d$(mDc5w)#p_( z6=fojWF-*eW0w?QQ~gY*EI_Xy#I7j8ryCc~g|E6zfUaYhkz2I6qrbC$m{t*PgZw~H202G_5P^p zTdW;g?iihI9^Y)A+3qg;Hdve4Szj_-_kE&+X&I-HX-3+l~2= zt%aeDg~_9}>62d*+rQ?H*QXA**Uq=5uMSt&_f9tt&vwpkk1sBdt{yH9H_lE^FV1(L zFLxd;4=*k+&u$*C?w_6?FJE3>-Ur<4tIoKD(c4HX6&DgvbXhu{Qb`5?`jEBO zndJU*Nk5d_phN0l%$@D*ayEb0roe|$)3yrAc$+J>7BXR9YMD>RrO3S9LPl8E^7Ak|N8huR#O=+sFsX%IA%2FuxPz(oIL<(4dAAyv9%nU89>MMZbyv^!r!Q|*A z^m0vF!-W(x*}3E{s^z)Qq|epyyW-a3nFCmHwQDq{??Ii0GIKa4~ zfSTLYL*=WYk{P324l5t8$;zC+Qu|e*F9VOKYy8ibS00J_<%>-Z&kD`*63KSE@?vMv ziYbnTt+dL#vh?S-^?PZrLmm4L??V0StK)R<%>4uje}NLK$Xe~0+*dk6LYd;j;}e%| z%C#6Jw8^m-D&S=`GmFhr9sPP%+F@=f9C?UJ>WQ$LFNVvaR!?+f!JlbXP`DUbhBIrp zfMZRQc%OAFdA*&7!dUQ{%FQLM?osITF8py{Y-k9hd!TBF(kdf7&0nb8BV_3l)W7}! zyn3#rj(o(bU3oo~`l`LW^w8vezBv$$$LIBQ_y29-wcrjEm;`a#+0g3bZ3F{}3rh=? z3+np)=SUsp?J6l@MFARdX%V`AktE+(TxAJT*4^|7fwyUd7xiH))y32xFu55%xSe$a zU2#nHPyWAd90@^+C=tvST{q5eS5te{tfJD(;1ROz7Gl^QBq;g!6T0dNGFze_(k~sD6rx{?a8u3v4tUG$%fJ)6s33I>CZEtJ!G2|kw7KwMKmA=VzJbD z{c1-Fg(z1{2-w{q^Vbj)xym9x@DjjE911%|W<>@z*9Oo0@EM<*)6Vt$ydb4BPvFCt zElu_j?$fFU5ya;spD=i)teHLL2Oz;d9)3d0sIO(8t(LIC-82~qNL0!5V7}}ixVeP7 z^(?;D;znRG-JqJXDoR6L_70dZlRx{|?z`wK!{oZ50kg{+6+~(DFAK z^{;w=&R@LexqnL<#=m8~|El=snD*ZrxcX+iKPNl>(ELv^@9!!>N&j;M{0GV(X8Q9) i@;+(*mJq7{FonxXg1u#GfPg-|eQe+6A9$MgqyGa7Jc#%J literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/gh-36122.ods b/pandas/tests/io/data/excel/gh-36122.ods new file mode 100755 index 0000000000000000000000000000000000000000..3dfdaf976da4589b48754606c8136196e68132c4 GIT binary patch literal 8974 zcmdUVby$>Z_wFE|ASu#FmvnbXhvd*8HFVdIBLdRW-Cfe%Fd*Hapn!CDOGy7*@3_v+}#v?jb+>h=73b*LJ!a%zq8}u4WCgF*h*=J2E;LnG8h_ z+4wM{`|k7m84phiF~P$-JnwI?NdLg$uK~|=l!P(Y5*Don%qiST|GZo(!8RXOoxBTr z)*D=z-=@NDNJH5#WmdCE$J@l%kUPygHCanXoJ4qN;gg$A+vs9L%cB?P$H0YbHBy!J z+%@SNzd|!v+ZS$oDdg_zFa$)RP!aQ!6?c4|;Uz>9b)e&RMugiLi9qlyZ$U=oV^FOU zG@zCw@~u!PqHMpfqtCNOd6_P@xNkt7w#?n79)0MTPZq!a0bSV0x`fO1J3w;+hD7WOZ za*mpX+7<)Z>8tS)L&=DxE?UA2KOu<_whI!HQ)m*6kv>R<8{CysvWd3Ytj~b!XQ98tQ5gWN4 z-KfMFwZ%WVLp+N53=#T@ZY;g_MZdwTl~3^cukD;acknd4-ieJ19NwEhrgc9-J}A*E zy@9=7>-RZV1GRT+UH-3Y{rK)t0Xw=`8H4Xvdsllie3}#cw#CzL5l;L;u|$ACbOqZo zNwSU0wDm*r=q?X>ZVbw&v0`ttdf2YF<8x4z-3VgVTiC{gC z0Ma3Lu6!I!A)9dioX(0Hv+}o>C)B%;3(!d&!A@hZzs#fQWPra)@MbjHGNK{svay){ zAU+HhD<_4v0eZG8o;RRd#)8Qnt|P=rb5%146z$9xW_s-0n8&WQb>Z9uog8LlK0GdK z-ELOMK8W1nIl)ISj@&}SJjm${K_4e&~hdi!j}ZbM_RYQyA37xLoDXI^B@;$^W~EbkBGu8jnnIGL8vR9+7>DR#8H* zDep2YrWqQ7jozvjXa)ltEYL95wJ-<_Z%b5`;$E0;$|uuJTV|Z|m5)k)$NwEy*xME7 z0DiiXBr^rGget***xW&nY{L|LCC90QHB$@S@_A0gf*FlH&YS-8FW0mZ(cfw6)fKRW z>A?ZX3q%;kp3L*=m7$3Z)+e`dB>rAvHg!>89IOz_@G?KfKvq;@7la5*RcVpT6hNbm z)Hlm(wr$MY^Q&#fuhJPo&)+j(3MOYrHCR!7_S5ec7TrtY&HlImtMGl3qDCW0wq%nw znMY^PL=I><*dJW!ZG_OO4^sQad+aQgd$M5A^yqHUYOOMbOzrT6MX#Px-qglf7h}<1OdVs4? zWkV@~wVGz|*hg?`78C@J8LiNIWqP!#ob7Df-c~)9EW%)|Fm9EIh3eSWt)T9ZxS|4# zyiP)S-V)GAj`;J`fRk^6G|CCnQO_Epja*!8-y+7yBaKx!fnDOuR5e&0%-gCePgDqAxjCV`{TWeR!Hgq!`bJe9+4;TA127s;i;5EOR}S+-)`%SeYH>> z-+t2ku2bGG3MD3#VJg?F$efex)%dWL^6X4Dgef3lt(qoV4DvmJfls%gw9UgkM}_LW z4z#ZaD}H3Kta8g%CT{1@jW285Rw6DRZPFAgkm;&3e*`5fjX5Ed*&VIE<3VXjuf~}7 zhPyYiP>cdjrcV?f4cdqK4_vmM2OF2u{4a$u&o=|LNQ1r@ogZHd0xyGAlKrZ#5h)ej zb%=>}hdHNGtdW;?qLS|#8)efh%tEgS3h1Fj&Vx)Abs zA2w9f#Th}3BvW4Xkcu^e%|mv64WuQU30GUDw=^pD3i%9U|P z1=7@HB2ST79yOiQEJ;d#PRm){kdbZ@mFshPJVJrNJaTd~tbaaps*x4g8LVI?!;z_n z7W-+uj}=nigBQo4DDeu)sw2@}fRTcf!5$r0{`U}8bv@YTh#=M%n&OFQ>cBNv)tK|SAQ2#7 z`)v%-7R#;m^AI+d$L-Sv;i6?boyKo4yCRC_D1^#3vNGmWiFD{wLn|RD7QHxrKk{^rei^m%GrGwc6N8FkJD2oyk+FAG--1{DjnX$M!Jd`*_|)SKnQ+ zQ$9Axpn+7l1UY}7QG1lrHeWaLOyla5)H=vV+@D z6M{$S=QM|#`QKKnPR)ZqS?8{9<-i}=IqSwYN}eo`QmJ>rP16p)pdwyaCB9aD@%D3a z;w=iy3z}>{p51qVzVM-sL9leqTl#_N-ybTu$KYnSCIBpnfCUV1NjfPO<`GPXUIjLD zKvU-o=OYFAW?)}hOc2K%O16}{C=OyLDQLYlFmSFE=qRb$T|0rPeZQ+yTsKuxCvODh z$U4zWA`*0Jz|4l@48cMm+SPT6|c3{_AD|^-=f!IUk z8!On~_)U-z>uEsxy1sU^As*Meitv=aB1@lJ^P5ghhb7r7T6uN}$0Y^aF+1Gy@$pmO0nDp8jzc}j(M)T;TF&ewOm4&K=2bcco~9*oZ8D02cpk0^ zQ9`?c_3TXe@-;-w4yjTN{_9GR5ZWR|H5xph`A2ozsN3wo3s!T7>)HoWpiYr!soW^+ zD|rup%op_PX(juUQ_rbKJHe1RkiW56w+P$X7}f}*+?8C-f^Si)0;^vlmVX7NCpdSz zJwJbb(|}G?J07pID&va*DI0|~c~_Qh`ZeSlxL9=i*Ez9KN=fJd5dg5z{F%6p zXlvtmAAa8*6rmb2;Au|umNRv`7;{@@N}j#R;u2ZvM4~C~fT`3?EV6itZjv?_A>dZo z{(}YB9I;sIv9*PS4<1ePg5^lx4?8Lr+F)`FvfQ<-ZN`fL{TdX-gK285&>OKmbz zULrI2c5r|`HLI&qf%hf(;B0+_$sQG+vREZ4j^!A2MP(}&Sl-@pxfQ|CmGyirGoRV_ zHEK(g*r-VXHhFO0>0%{5hCVvudFsAPrteFsS?OeVdJYpK$+@kk0bMuvAYvV_Jef9$ z@g5=`y7{$%4?E6T1sWDB`P|Z7vPxo&Dxp$O>UN{?&e)5z9AsGEi7x1q(9X^ix~8xk$j0jLMDBfVHS}U^bkH9tl-A4 z5;b_C8y(artDRh*G4~-W(tlI99zS}Bmz@==j_g`FNGJT5!>KQ$hlcC>s(IZwe{h@xD{Metvw{GS|jE*C9PiIcCF;_Qf;`SHTx+-3+(qtFtc7UD^V z*f7IQ&u6h=#{)#v>Kr_SwFWu3_y|54l&)6LnYbB9^oje`dHllaGJR9Qg1RBjaI997 zyd}dyU{!*Kz#EjQbaFj|ip|iP8SDKv(C~Jawv&`+q{`%_u*E}KDe>xfq4{2-1EWk4 z!^W}6ATD~AS(DdQ(57&tf%$B<5xHUe=&VHa4y^RpY1Uw#a{8XNs+NXnkMQoYr=?iv zKyx)kdH3o#jw`~s>9XF->)BJ!rIO86SC?RT)Rz%CS$OCH-{ELV8q&B|c~<BgY~+|s3pwbB7E4hZ*A9g&gRu+;E!Ju(NnFX-^)DyAhQ}ffm1CchYMyD6KdK6^ za#vbq&=L(Hwe@_PV@mq|#I=d60 zhJS&`M1exN)xn!xkDlb~@rEt9p{8o_zyLFCe%zWirUH*&3r z7!>UpU7S?ZIvo6_j~``A>Xyepje6A8-b#Sl;t{$@CC@5Ts`n|=`x+t;r!=~2dp@Oz zCp!ZxUf9`Jl~mV_<3)BcLrU;Mn58T&#d2Udcg+dNU-QB3^H=W_%W(~5zw4-&)pZBx zj%*VrmaZA>mObv$7S6}7^J%)Y9U1bk^2wQ<3|?W^eoMO}`>r46c<&RT8+f^-p@#xn*+YjX@QL$9nEb_{}ZZf>VT(s(ckQs z^?px1i1eR!#o+bj#L=2!PRN12d3yRFsaGqU!f>Ok%t+K`X$lHQpO@O8p#O@uNtZBn zHkN6=RK&Rwj+Ewz?!DWToxIm2uA*S8!yXk^4cGAWaFI;PP(GTCm&daQ>KuqKH%X7g z6xK&iB1iJm(2@4YB2^kwd`Waxn{Z*~<-zG;&dP>gQ?atI7W*}yV>O&WqA1??eWeL< zgSy$<0~OM30?3w@g?v)o;!tfk@XNNpo-A)1Td4PN+9vYWp5IciGrX?~?KnN*i%)jx z^RQ46HZuTMHdu$~@Jt!INRzsSBq_7+pQpU0-HZ`d2PEpRSP>UG-mvus zdj&8nJ4dvB6k-UHxGgg+Ou`#TZRRfH{$UTS>cH3@xJeC zh`*1|2XR;;@i8jLz}gpm*<;w^p!Ea!eV9k@cqdU1^XcJnS~+)k67Q7N4goCdqwJDa zG24B_JqG6US09_L6rSLr?>WW`tRTxq=di7&8hSiWjKXEO+RwUjq;W6lM?htW~e-0~1#=o;RaNkwjH z${KyOtt3YBettmn!gPWsxwOUA3$Es}K+c&QyR&?gvWIKGgU0L-jr<{<&iY5G*^Q(g z@4;OE#wZM%`9Z~iMFbNYo>%kI2p;CyB2Nl6D8r&$iwpg_+E<_Z`E^-*vKe9>-Hp{$ zQ?$RC)PG4)t*56f-#JxqOO-foQlQ_IyCDIjWm_Tyt{@Op_iHV>Zn_6j;BLPO(u!uF zci>W+Bp(dN1LjijGN(?yBq!2rH+mAFEdTs6`eHP+-+dgXYn_j;ci^F+7#TX(gAS3t zLXuu6{FBa!I9Uctp`=um^M>JwqkyL8A2I1)e0gyxK;R_We!25>m0_PHhBIdpvdj}f zEPPNMMA$W>&O-B@9q5;76^jC*&aed2WwAx(f_V!~%s z_|Iq<>Dbtq*m>Dl`8oJ`xdr(I#QCK(BzU-G1bJjc1;m9!#6(|8NlL0o2nzH zloe$)On`dUnmUF$7H$?AFYUEu?e!FG^pzoIY97|QHl~L9md@r5u3#%;4{Lo-7h7{k zupPwL-8;}TFx<;h&DQ}G_S!ty*FE0H&iAeNJ0JhBu(05;=$NR;gqY}r#Kge3tl*U5 z#MI1)%<{PW`mFe%_i6FDnaM?2@kO~AWo2dWODalB%kwK5tBbR%%ZkctTbi32p=}+F zJtK|19~+04TSk}q+v+CYu3KfAKMI@z}}^J#H*>S%3pX?J^V=V<-(a`*Ur_x$E?XXET> z`|{iR)$!)R@yYSk^~LGo_0`GE&CUHh-`@6X6B6D{@{qKcu!`%{?iU#aWgw>4r^kWM zRovnHo_lA625J%H1Mv;vwN^;Yl%KFK1qZy&d8gC%fCam>7+uKfJ9y5TxI7hht{ z3;bmMx-&Z~hx}P+%-pjx&jQak>NJ%0Dk*s1*_}Ku4Age1amX$_PsNez9*)^MxEDdj zQD3LEDAaOV%t|}&QmZRnN)@?U@HeVm6SPh|Ow(iFfS^@gnA(gCW1ae&t~0qXLf^SE ztCa2En7_D4%Kq|g2uCNe#`cRYR`l?IX;Z}ouR*Cc<9r2n+qMiTWtRZEd>yh5ZA-0qH-4X+&#pT6HMfAP@EXZKp-6bckBFrEyFTwO*Ve2jo zD~Xn~=^#OuIOFl_s%5R#RDCXw9SQM127E?F#UMgQ20C0C(8+77MjFuYdHA0RJgoSd z`dU4eglfA2M0}O1>Qyp<-Q@p5pFmBKP%J5==D^4yf;?n&&G5N0ZC z^)J_tyLt0v+?r?LWt~e=gThdrqCxlM0M@E<@c^ywnh&T>5^&ZYVA`I`s#PBCF_ zPLbtK7@@y&jEXWa4=@3L7Nzgk=coOUsQ-!jc?SS+ujBqSmAmS{i_m{>|0favxR;@S zTG(CnpG^Ntl>Rf>y-57i2JWi=B>PpQ{xi?thX;E%jsN8NyJ-DqroZ<{;5SUair9bV zxqo~8G^gM2+>h)(g7Ne^`F# zxW6jA_aVbiQ@CUKukW}M#eZz#AIM+z!g~eyr-j|U>+k-X@s}R_Pt>p3qx(a`PlMcr zG(VNaf8u@(dhT;eKP?OGZ<(fl0)IWy{C$ArcW(062OJoL@=qhadrkk=ODoF2KfJ%~$=%2J&gn!G-Cz9=7`lA` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx b/pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a667be86283b8a721446657122100460f19f0d7f GIT binary patch literal 4734 zcmai12|U#87RF=?GZ@>D?K2U=*eUx~5m_TyGEBxcgffFn+D2NhO^9_lP1qd7 zYUN>vvUU5vR74V-GYX=b>xqtE40_FcMjwXa}NeO-wn$@^vX6W*!C zWQt zDtmI2GW)EX6P@~v-9wh06AT^1>{nXP)~I@Q2AA?$G-44N9xDM=NC104PiM8vx9N#e zfi9_vGlGt#7VnFC%m1pf)YT%VAcxywNQsGv$N@w|y8n+#4`){gHzWe#?j~~d@q@~k z2~;wS<$lSHul_z9)R(v@N%B!f6{$u;wd{56Cj{SVRBm@o^hdpQW8zNZ;KI$VO&jkk zT;wFX>dpQr>||=d0LcG>3=`>W2X*~{m&K=m9EmB?d{RD$449N--PulL=8{nmvY(>5 zTPf={Rw>s-okhP9Ng?g5$qgC2DezENGWsce^rd|@Fpxz|O}3hazUC!&cQkw*>e53o z!W;^f9OP}xM`kEEIaa?B4a?F^f2+W>Y%fL$g93OBp6+RL-H(UDFQ$&(M8rf~T2pq& z?>7)G_$W4@nQF!^$Ywo`P1wLj4n6ryi;ACZtm_5Hjc0PUEI4Gt{H?w{u@7Hag?pY2 zDhd^P^DfNnI`j(PY(cNV>%}D3bpiPyF}Lmc)l8Kqk?D5RC6lw3)uk)xg&;$ed|k?oYSjZ zW>(Yn6d_NdG&}{=SG!(gbI_;cSTkyS>&AT^@GoCJ<;bCG>f^Llfhn2)9ML8XY)ove zVwybn?GEtaSq@pWL~sYwLw32;Ru860*>5P|W9-KqmsO63{o;w#WQETyD8zh#(tqXzmJAO@2gAIv!}mfYK4}D@vN&#B%mz7fK-XT6B}b&=TV{Lv zeOfIgLE`0X_he)R(`HCvW!)dUfb@M6w;BLUVAF{5a;Q}rg?2-}j)Zl$^sUD}Hjq-i zmW1d@D!QSIWIP_Jjyhh<)Y)?PmC=UGr!G2%!3A4#h&SWAOoh0g*Pa*_O5HCtpS%(V z)G?>QeVM*DK)hOG!YfK019jOSZD@_QWl}Q_EeYEMA97HXyQE!CxI+N~5C2>|;2#{g zBN0vrk)!vcx_Mdth%QpcQ@S*Jc$FM67D6;Jt+JD zru%Ha@sYm7Mn#My<~&%i{(fjo+evP^GvZ9lp2`V3OtW&W`r4ikR3bSUoirX=Uh52e)uSmb-Xp3X7pPaI7 zMcYc(eJaF^JPpcT_VurHnw&&AxC266cV-djaFzDHyxqjdqZc6i*o_8~g{k~cUH(1- z7lQ!z>gM>fPs>}-$)P|m6WBLH%3~Vd2ndOa0fww>Cvsmb@#JMh>T815L!(PnwCH8E zTN}g{L+{u}n?dbOL`It1PA!SzYj#t*XQJ>bLe@)+a6N+#5{4SxU`id=rOZ?1&Yoz2 zrH|wMiRk?P}c4I3_>Wdc>m4ScBB}&ErLh1Z5rD%|yuKmtZ7_8DNP<2rH zNzHK2EIKbm53zcV_Hb z7?BR_x#Vk@4!ZP>VrAjcL6d;q{`G^=AtZO9*F?Qilr+Z5H-B+;vGqpu0O^PyU40Kg$Enzjy8K z>4-SmZiQj<-@=gERu`7fTgE;+l^eemRrFS{{h0=FJBd`q_f7T(6B9!k_6&In?|PFz zG(c{;Y{N`dEwVtYX6jYRf%G3wxR0_6>{w5Z`=mkDR?eZCy25nC27-4CAX7FoZXyV1 zCtgMUJ4>*4d*DLWtQ7#`zMzK}dWhTsP+{WhsqCZg65@$zW_qr&eoNKD5N=?xe zPp35e5l>^7){{O)PKXL$<@MIy&^Q4T|7Wy zgO4jkYEI1K#3Ft8gTXp7nzF*LZ~RSxY{_lZ@>$7L+=L} zHPgbuRI%-HIcwW_!YUWkU3*80(`AA~-Ob^MoTCz;d^m|t#=W?e)U-@_dvHZhV>po8Rys8a+mNHEy?2zw zW#`K3hX}2r_{5*`0rVpu4qGPN+1cU9tww7R+$!z;D+A}%-eauQ1Vb*qGmIrXM>(3_ zf#Dx6*d$AgH)5mI(jC)_cON?|;0{oetSJ(BjH?u%V`B7)LKl6UfllL#_e^ZjL7hc2 zqt`mS7)fxd!hG70r6=hAsTOt zk{WeeV^2$~%+15ht_cvA=$KKzLu1eba_M(-4cRr4>^NOCC|3gor}bsTqXv|8k)&T# zlFxJV%&|aL`s2Lso=Gyo7%G(##TziH$>Nz>ww5mL| zguM4TL$hK^6>}y1e8RU)%`QX5LE?Lzw2$CnW5GFX#q+^d%YzWngou-ixdG5PHZ$Kl zw$$Zg5#*t-c|-ddbas`CDfJgZ9_b2Dt+OwYtWXuJPu`Ez4nOj{+TH0z7YJv-`$xaa z`r{0Kc$MQPapYEtqLuAiVYKRm;|RfxlwtKXLmz9Ou^G!IryYDJR(hISdvs?jMGAk8 zR)yIj^q_6`U;tUZ$Lh5f!0*v4m>Wia!VBy;f1PwSIQ}~yZQ}E)S`Y{0g9NMVlTH?; zBQM2qw?(OdYu5sJ!us`ZPP+`$P|(RwN^sL8nPgwpaEh!|uMz0EkT?g?wX_LqFpAo$ zB)P@Hv(I;4Lfh7bxz|qVim?E6?X2rl9iiwzOHZkSz*Z#-TN9zKD7)ot8orS7Mxz%N z&)w zh4G$n&=)H%!KaOD8qU>N`xvrq;r_4VFW7YNCKSFV@yVdN@oDihXL2n{7yZ}m zC$HN%87)JhTF$VOCcOJl#rGlYXVaPrqW$Dd8VCYSPLb)H##KhRku(3-&s|q_M!6$U z?v{q0E(kaC!y>IP=+Pl$#V*M{_d25_N|q}qX{%|P;$jTQWLevKNW^nuWU~B{DKwue z;H^b{Wyg8(rHY)5l=3z*a3Wq%l)ocgsU%R!_gd(~wv7cHhL~oDXU*29w=TjJ>XL-F z=<@J3S7MQ_kze(r?FWcsxzlYkfnCaT2-9u~ds3|!>FKp1!)Dtro-x;fqR9$y8i(ZY zVBZ&6#V1}OBLaIHq1ezVyN(;q<>oz#GfwpiqT=UR_`#zq)RMe|n|VYTZw2btUqLh0 zEa}sVBUISmj^5I|B+8Anr!6m^mC7`dUX`5Wnq_vmEZ}j~7Xz(>@q=PhC&?wFbYINB zjL7B|Q3O7I6Rh(L(Z~G?pw`iKda2(#y!D<(r);daAJwVoMStIVWPxlP@WH{SS9k9} zJGa=gV%y(%#JW?+b6rUS=ZA5RbG`CsForh8qjUr~G>DWS z-SHca_a4E^ImdJF?|q(ko_GHF?6vn=d#$~{OA&;EiU~M-ka9HSfB*Q`4<3LVAZ7(K zHZ!rafxykIV76TBswx-&lnqLIt+UteDHZ?;bpZtc`29!m57H*H<^>Y@B_xP}ff3XQ z0k*RFPSpsq*J7vo2UN04sMc22cGi|gFuVUpCfv#jY7K!ILH|o8$t8B4nWzu>)tUe7 z8RAQbW|j~WBisMdK{x*#C~5|`gjoLz{EvNf+1$_8^VjG8b1Yp(G=^G1;6}f?nm-dS ziz8b&1a4>h-&=WE#9P}~nb;WF{t}fCfDAwfU?XguOMalG7C_xZ#gxC&io=6_zO9JvgyC2W1rxcsTNZ8i)8cRh61qBs{<9h8s&oNKY2%91#d}Y>|e9Vhi zQh?)H+@zOn`d;u8Z7mSlOa$RIRNR_5U(ja)2m_(ooflUMh(gm(9Ua}~oEe8nSx#@r z8k$ku?uq8CK@WX>B8j8&shpZ~|7hBOKm%uNCr`0Ir4i)3(2br%k6e+#C%J|n>gn|* z@&mMb{Ay4!uz7PUy^|aU%OFU!uv__FYE7TBfTyX%{ev{Dn$U4sJ+nZeh*NAeCPh0f) zZ<}I$Jj;ErE5{Em=D+RRX#}#7r?i$@ujb92h7sw6Us5fuYYM2&uEq_>&wdJt zE4KHUDN9HhF4w{Bt|a*s{ajOhKq7i4W}P~#AhMnh<8HfMDNmhSFsq(sipaOhvf=t0 zOh;(P$Hz0U#}yIJ#7$yiqi2lawj$UH`f4JdukKPgIRB>geqg3vuvV_TR2V*ISWoz} zBt8pr->NllbM-2g3(TFGXcP&9?3r-4`5c>vA2OzuF12OaaMo=9<4r*_-O|)vWL74z zu^m=bff15;y`1JnAeq00`TReC>&T}lHMEt*9BTB4)8`MIETHOyvID|nSsU6D z&j)}m>`D8p3Vo@Q)tnsyp5EK^)v2FU@Pom7&t!WKD_Z#PFGf4&(Xct+sh7-^l zm?sCG*Eb<=qwZm0b|BD(v7^E~p^nE#9mu{BgFjId7^+Ni*%;nLeTV<^_mJ<}4HD;;3W{JIg|ti0IJo_EQN zPh&QP6n>+rAK-f;jonflh>k%SAZoV#*@cY1dj`W8tOQ?940hb5y5~PGkd;`32`N>s z_ARA)?JgXEA=}{v_av{B2Q_m?Y%1~BC#}r1>Q(o9CJX7U-45&-o_K+sKBqHPU9hU< zIq4E^>a=R$WY^!kK6xulFOF+fVW~>c@pS%#t>=_eXY+VsONJ!>C$BEu&Qn6 z5%sp!$92s?4XIQ|^DEho>t6PQ5m?oyEzDeZaT^VlIm-0WQV4(Y7V`+L(snRyN#7OW zmC-SjkfTv{sfqvA7sbm@HFBQ?(?t|&l(^x`EQ zqp!j0GsVY|sB$360_W%Y&I9fwOa59rlmRO?z@;uZn5)kLLCLc>c%>GeZfjf zPE(+{Yt4!}siP=ak^c0ReA4+Q)o<1_y9lNu^UCHHgjG(q-mTqO|JG{t4C&&2NHCf7 z%|&=18L=PG&-Q~69P&pE{*%2B5DE&4yh5d^b48J<`+u>Ob3y!Bis}BbI zNd@feGJr_6Vg?S%ggUTNL5xpO8ijfMbg8*Yqnw?Gk8}MBX(x0V6m!*6axOW1zeFs5 zB7vV;d#G8POcP8HGtW<&&CT2#E?6diHH;AT&K3cgsDX@eslr}ul9N4YyM{Bx$612~ z2stPOhjfTrTaan{6B=@CNN~Xhk=vu*@g&nvCo^@V%=7T&3lB9F0X@%B`j>{cA z*<5%l@s_S4YA-+Vz7;hg_YrN!kD2v?M68g`vEb;&6%41CpgMX>|3Wsvf@8`jKqxE4Grv*$HB0(>Yt;AyrhXmV z2c;v_qvBXo*7l{}q)cs2vs5+o@1~uO=2P(!D-607ZCt(RZf_{R9}D6h#D;i(cefeB z;f~fuf0g=x_3jt|Ds?FB|u@)q< zeV|FN>gV)DgSXmAHkK7ilS1(X?dmEQ$G2YAN7QKN)@ZxlPKeJJ9aQ%SCUhLK3Di(S zDGx%L+sv~Qtbm@F#qXZ*HIBSGm_l1kO#eKgePd zKY}pE2MdOhT9j!p*q=bJeA82r5+YB!1q{ZJ9NbCMkUF57>sX2}4ea?E$zkPD;}gx?0Hb(y6o6)~ggOqZ zb$-TifVbeHw6NbUn8EDCO{)~T0Z_yHs^O9PG_;66Mn_+}SywvGgMuLqKL$ySeXem- zNKVAzC=QPzD1raL#jgyYC=W!&)u&c^rKp^?f9epx;uz?nD;;ti?8~@t%SIxKm^1sN z0bt))EBpjdE5f)GD>}LEb|X<>b2IA{ZSAlo8PwL>>eT~kcsKtAcAQ&bYzb*KBAOE( z3t4idS^_eALjvk1uQk4u%nJX&v_JvjB;;Qz-U&d87SqtwmH74*W0jR-pLPK@@`i?s zYHW$=TH~AcdRGcrbov5iUJbQQ68a$yhLg(9!B-rQ^*h~;O^b8|#j?LVv9Amx*WKr| zv5wyaa^0m%R$=9nZ{3$$5W|QkEAK5@gwRqEQ99%0`9*)D;@H$3Ot{B%0#->+>`Ha< z6A<=E4rW{FF1+3tPgXi0tmq?d-ZL8<#7T;b0l#6`B&{Y5Pvd;dsKJMV^3a~{o_sP!;7b)MY?3zj?U)U<_fg)Q(sFryEe&S1 zRs$%{tdJ-CGmx~EA1}GdP3`h{=(dyB)F$_7S}@TB_eyVt_!^QJhGcd(G2UnpCqCVA=b1sI6K2ntKTb|~s@ zC+HLDAf?>AO^)}a^%Z00vy!2(2V;`%Eo`+o{3qVPHl7f@xAm{zl^}`ECshzu61a14 zy#uCu?>f0qnpg^p5t6@NIcZV}h)cU2?Lln1I zOT|~1HIvFGkqc?q+nC$!`^w(X+Py)uNLS$MG0MdZ@ujz+1&Rg$Y|sJ#n27RY3wMMX z{cV02*RZrgTmnyObg47z3XnpX-dmD67kcJu?hc}f_3Pinib*z14r5d-j^7&kCM{EB z_&9rJP@b1Tnr5S7cHGv%)1QOc8QaWP(tO97c&>vKL!IZAY$maC!)9}M{Cc)=>wSw`^Q*Vf`ZJzPR5UZu zGRcDI?m&**R^9x$U!Ruyz6;3fSIk}4k$tpA=EpEuBxFU!m5wgUeuE{t9p|IfC(uJP zU%8SzN|aSwiO``RK&NpdqJa(yH&S4vvGT>#<+-Pb8AKE1SJMEoXMdTrq0(kKG}0 z68W-~7Y_*^#4OZ5bjrzl0F!--iwb#S#LLoLlXfkdX5Pi&>Ssa)3{cn6HBYTJ{d5~f z7w%r9cq6S(C`bA$PpUSN?66Md3n%h-(FIl6RAU|T`K`enQG2#Xu@ww8`%z&Wh3z9! zig-fauSQEoa$PV-F*_Z)G&^R9I+H;4jT(gaMP#Oj4(pyhVAFoC(!*D?ONMUZ684SG zO6u@_jmz_I5{H}?5Z+UbP?g+{U2j=aE|UL{{s^STupA(zO`M zUNL+x>e|v|K_`HxElB~IW+m?*+RtJ>;-zj#ffE~3Jeu{2B9ro@aegv=Xu{fjqf*T# zl4hPJEo#pvm@EEGd|1Mhr*5L?q|)frSHRu=@fiW|#)L*NM{chqB)7NjS}r0)*Hy2=_vVJX6HSzlr1Tz9gg zWEQ*CcbA{nGHJhN0_c-TuGOI@@%8|K1_BRbRiA;}m#&Sl{SUplP;*4wXy)H?!Y3E!} zQkdI=HJ8r{E8e6_ce4uSu63%=2s?Be5-%r_JM@v^%H|e+~nQrtJgndK6udCT$RWfQmfS5Wa)_kkBguU7G z?x+qYd&}I@lF|r_v~0A+YWl?MK?%t_a_dFKz9WgvZ(T#~7&jR2h_#pEn6AekgE*U) z6%UVG{exRx1kgWv=&a_Y9$pi}Td=)-fMzd6)HbSDGdMJR@^oq6omi_JYf|dV=q}dn zJJ#!-kI1lf_D(uoz>+s*Qqo<6A~oq|GuiYes#wzdE1z2mDzPOu4g}$zzVDmSJ*!uj z_YuxiOBGW)OMMu`4Epy9^^aSx@I8y1b;JQY5^?W3lYP%Y;Q!{aS;I{JIFw(~lvu`I zvz?y{nbFtNo7#z5|KcHet&g*!M-=+QkQX@20IS9xVcg}0ZNxVShi0|PM;(ZIk! zNlA&FgM-2wA;<1GeozF=QMvhbyCHy0vb{yWeiM)t#O? z#NF0ts;gt1TAU$KezLrxDwSjmL;|-)00Z4S98U5Dnaa&pvjw(^Yb#A+7!{ta1Ri1;zdv|@6_K;A z|LpGozxQ{3(hts4f7Z{*FHmJJpXqkr_z=@(;j;S&1!UeZ73U))_n2$kUH*tytO`l}JXr`G@M zDt*ri2;Txy-9r!~T<9?&fM-3XOB2G6Tzye5J`cn|2mGWS|CNcCCoYT&QHbb4pBGUP zxIYc*9|m5OP|q6}c^-FBQ2i@Qf1odFgXhttSctj*p%nfa?V|K_o`#Ntp#334{Tl6} zv~-?kkDy%?ntqLV@hW$fbD!d_!Cy`uTztcMzCA?HM`{R3R_8cn`L9DO;sXU1F*K+#ps_z`zVi=S@Gja>58Q5|YY)brZqW74CLiZU|>v zGX%nx)7{=KM!nx*j+?OMlnr&!p2NuIUNVYyY&_BeVR3(2f()5$BuU?w~0}WCyx!|8CTcSAp3|4(?Z|+ZChuo(3LzNJAk&e?X7#wm& zgKL;f7Zx)@0J*;>hASJ}wWyU>Wmn4{5?ovXisK))G$P&)9(-|Dsy%BQlk&ye&q{3| zR=2+p&8FrzWuUG&jGw79&ve0{Wt3^Trca? zvDH4`FY9A);=>UKxEM>}S7YRby3G2EaVKSO1bWkJCKnVP8+#_uJs4%_i-_>Rpf@{NyWmY{sG8V{-i?wm4V79D)d(Z#{GJ7y@N*LnOW5%mQpXF$G(*j6I|C>!;SgF z?F@i~_ah-*zR@m3AIMWwx=Dxz*`G z5=o& zyB8NnDtE0>>7dQ!Dj|GjMv<~I9tL_zJx`3y8c~T>yp$kZ14)5%*-%b+mmkR%?CeMD zl65uAcA6F!ZmjVG*CuggtJ*xv9Q{uG7Fx4>PDbSn%&A z;MD4cCi51@MyVp?RulVptmre6-7TN!R0_4F9tNSIA_s1~0C^_DOv0@qO|g8|2+EK| zQdU)1djp5)FOr0vYKU1p%I=25;M)ryk^?M0Jq!Tj_tAw)w<+p$FQG0y9|5e<>|HGB zUk0^)R%$1x_^bdp4rO(uQTHqt`H4TZtMI5>Cp1Io-k_ij%4DhJB5&3azC7@4y7NAR zJ`UbzNQPPv=l!(mJECdsj167yhWLKqo)~DrP>IObRU0Q2)`gdJzBP&gjU_jHK=JM@ zeohh51VeUE)+MEC&yEDADdTCm3dUT)KFxs-}F3C^UaN8$Cl7}A8 zX!VEk4KCVnR9nvUB;b`>{3s|(lfFYt9~wrDH3tU?Lj9acJ?aTilX$KVA6ig<*1)St_# zfvVMO`AuZt2NOzYH|N*s~CmQ2J|PCPyReypA>=rsh*J?r`=WH{CC z0V7U8FdxK)ed^8KbqqK{E#&PbKBaIuJ^mW^CE(4Ycbz)W9NJA{H)bjDkude4W2k!= z88j>t4^9DmZJ{4Wj+-wnSye)DI}*wvxcDQGh2M2Cz< z+zHTLE+^kreKeLC`}{~~!%mi!`&@~*ZxFqZvePJ_&OM|%PGUa_!xKZSc3-hKOF)>4 z8TEdWwcDLV*t~+DE2$h7<7e`A42KK1@3GX$%f@MRMkVUYr1breesE6UbM)roFbc(1 zWFp7vy_i-ow-zAU&TV-GvKX#cD7H(gs;+5|Bm~;=dMQimU^su zLlx|ykis}%f@c$t94odQBEVwh4wnH2;BD{hJ(GF8-F*DZLSSC?$^^^nOYIWfSNBj# z_+UslqH=Ukms6G^@(xV4FjR+$Ek3a&zvz5A8qnbzV5bqjl5LY$`GvZewvzG<_;t?O zaV%wOY=B0^JH{3k)&^LP>fIEB(INLD^%{D)z~k!2&xdhE0#ZIJIDZ@Lu7*8h&@aUj zQZ)vCts_G*2Sd%lI|fn+M3p7`$%xLLTVVT~KINULm6iW7cZXtG;a+NOKqLC{ zfWH8Ka+27MxTev^1!dh0V9ft)6ELaE3 zL^hjfqRj6EPkdLYO?<3-8dpKDInM)U&JFu%8g{H7PR7YaPgr)hpUFWy%_LlGsYyxo zr+XT+sGHPB;s_L!)QOAU_(Y1~b-P|TO18OmiW6w=AcIHWe1R~Dl55UUi1awHZB86r z?Ng4pnI55mNC8A&q5D~4{4n}a5>8>OQ*|IB(V_}5;3ilkm$&(18zYG{`|-|sXJta9 zG6A=#(98@@N1I68IwPWIalZ_G}3mn1Iwt31?@_U@;cEddt? z`zsV$!+DF#R*Z44X@I58#MA!da$I`V$535PvyONn!-9T!rIM-sw?X@=T!Tk)<1YZi zr0V7RC^ZgjDorjOm#@vI@bHcEy|TIc@UVuRsh>RdC>GI0O1#jeQ&nSOB^ZamiZpObN1d@O4 zdRC^+a0_);XKM$`-z#6thxW)0ZbJ0s^M}%@O+}Gqbk<$i58JVK!8O%V2nG2rZV9TR49r8*54!1dJC%ocdfNdPS z!Mh9ca(eqy{2b%j8J=Hzqz12!+WuE(5M5nifiPEbMmV}~nIoLxzX6ll;1#wcY(6G^ zc36^FBW_YCzUl$Mil8IpH%F*TuIij5k5;aje+R!Pz(Vs}t`mBYweZd1>5#b8 z1jstNWhET3T~F4o5{DDLJ4_m?@887fS{cd!`8d>(u$=>p0tt?D8__ zmksz8b{A&=X+WJ17+bFjmAQ;(>onbMouF-#ajJWk3zR~bW$cfh#D0R94IWzP)@ z`}{s;!}vOK5{&1GURRLi@UM}(s{WhEnL0XN2QD*S5z%o?3Y?Q_cGb#R2Y`~$?bDx1 z%;HncmJa~Qc_1|MY8t;TkgeJ%viGJY&Cd&WRZnLC-YSzHts-%f0*JI&t4qhUkF&T` z$>{hKPAA1;VhKmnMaGm7RNr6HT11#g#GHWNl7htd#E<>e=L(c8TQcTS zxOZU;a8-89XrgG;v?ew}!|+l`ZxYsW=mL4&@E9X?KAzRoF=ljv{_J$$xs0{RG3)kJ zaZw1BYwuq+p$^XZgwEddbjpApExaEP#ZGH0P)uS|I6bnAmm*-!NiU61%W_iV zjUDMo+gk-B3TjNK&V&%3(RmA;ec6FrDBxQx9e!U#fV}MUeJ$t6ug|qIiR7Ph_0(Uk z-hV%rz9hoI74G0_tl{AZcQO3EEo%%bchZuy9#&d4q<3tz-x+tn5RHA?<-vzp$x-2r zIz9i(D|Z5(R_mg>7SZO_%4I~GVQ-OH)-IT<8O!7z4OD@->|0 zAW`Wo@T!@gKNzYNP=IfX!m5F7&-l&MR>ADP{s&;+CwFh@R4wWxWuJAi@jZO(x37Gh zTd{h}9sKjx>-MA61oC=52CC8wFlfm!k4Wr$`rCFB%oiGuIEwL!Q~-Mne^!`ic+k>X zz{uV=5gnqx1u23|vO1pVw}aBucWFv+@zxuKnanFeJ$`(mQ@(`0Y zh<-m(`-o1?ye5*-WP@&1fdti#r4g zWZ$dnS9}N_2KXF2qydQ5&o+F1paWg#aytlDQlv%RmA>3^iwyX&_mOZ(5X1KspT0ol zr*iKd`QYv?^3jU~!drAJMLZ1VAQ1x}ouIGgsF%-haw1Qtwa2+vLSH zv2`QNSGW3u=K53lHqyH0H*Uo63V&TG->@Bjs@`5PuA$hC?4$qtzW)c9{b}X)IKEy- zZiEZ>x0V00ocw9$_9VL=-Zvt5b*}4!{5i`1v~atlUQfIm(ZavgMgMpH{i%Mtv0S&^ z8zH><`|t7juO|Gb_U-b$Zaz06d-W{;*1l;)e;T-*TK{Q)7KnuO|J1yS0@_uWkdUyi NUjA1@r2Dtl{{RvvlC=N; literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/one_col_blank_line.ods b/pandas/tests/io/data/excel/one_col_blank_line.ods new file mode 100644 index 0000000000000000000000000000000000000000..df5fbcfaa0357a3b69606bc2b003dd36d6b0f389 GIT binary patch literal 2882 zcmZ`*2{_bSAD&FMhJ<0bT|^{=u~Wttp)rhYq!CSw?T(=_S;nQTMYhONq8VYbmNb;O zkuB@BjIpOFvSyMk5_j}H_v!n(-#zC!&pGFR&igyhbN=spelHRZ+q{IJB(04ydT0O#Y5A^P~^RfF&zD*l*2pFkD=04(0a-~Eao7Ee?O3?N`J z9)aFiED_0$2?lU|k01?rce$_u4+H>g*vIUh6i5ukVFOh{{BX^vcKjIhP(5VUkcW?S zTTV|QjmMVY-RExek-HbHph2;SEVx8nBvZwxaUa_`IJD1}N zA*_SYAYnvncH@!MCVxj_%aaxp%TY4Kx3}rrj7@NYnzi?YqWZeS==dwdT^>#cAW>iq!9 z%L{Z|b>hzG{O{T_UW)Q1Qjezr>*`(30h@I41KQ~eVCsNRaP7Flgvj8>kA6(r@q;CD z=@U?2X8CzLr11n2Vf(Q}W~8Ii5Lr)qC5qnWl3L*_8*M!R{YHkhRJS7eE{;4nVdr7I z0;{#*^E@!z38jR>#pXaK^Ypx%SUPy zg47`)PNRiSf}-9Ti(b5r6^T#2l+UQo^!F#v*G)^*a0Nl0)0I{ZX!Z#rv1+_aTlMDO>u4ZM-Aa}1%6?m(FcQ%%mOZV@`~y;FdD-fA2`Ae zWs(z%x30C@QwNWuv!5c3luIi$fs6*&5W)(eRLjSo*SDR&dbRj$O7KJfJp0+M?qMIo zvom4`Hjymtv%M>?IQlmm;&vljOMSDK3AVYqX5nsnv8%p11-)6j_wVAGj-yUktjuak zCSP-Yk~Eo>{mk7?L&Y>YQK-&rMAmZH)#btkR=Xqp;XYw}WR0V9M7+8?jT9y6zTbTL zNQX@wt&J<=#gp2)=hNz+3>62U>x&CJapewDUhT{?eda9~QCN+`qw#ymZz;yt`$#gT z&8Q>YVILx6!V?y>Zk@QM=eK(8)Lqba{+Sn}QIto=ydm1~kVqhmA%V+vM53O*Ac}SM zM?W!I3px9w)7eg?Jn~ z0P|rKO)z7ueHxXUlJ791+tnj$3 z3To)L-I!Y%P#%te1ryc|GI+~_20gza%Z=2!V^1!XyxiT&twSlz6u)!u<$N*EQL$CK zyGbp_qZ`T0yK551ZL(34hECo#&sztgO^N=F4~m5=Y@Y&MJd?D^9TZOvd_L=GDoz!a zcgLzHO!sRc6&N%>w%JMeAgB-PR+h*l9Xw~XvKmsFC24TV@_dpri*Jo19i|nSX%ar> zD9=LJ4>du{u2X5%ZKRd;byVAgA|#zeXX(}wAV}e~HLSiP zCCuDe*`Cvc6+;~>)F$5a2`xi8%LwNhqQ1&7MKj>%JFW#>?EhF3zxFI~%iPkGlU3qY z4CYpPtnelU5l1v}H3*vjR^796M!1FGO06G~$V|HBO(Kfl_goUgne1}zDkCB;{U*p| zmnm`DZ?=KwD5WoG(7(6ebt{U8mi=*e87cjz$Co&#@IOm4!>Q0Ka~?%Pkod~sQ3fcW z6s5xrT0N|1RI1$1yAPsrqLI7tn}XFnRezgw$O1ziY}U>dutUyXuc?zp7SLyz9?=>qd767bLzDD zhtDxlJv~hNfeQp+N7TjiSWUPFT@b|ekUQCA`}69S3sRqU*{ZqhYLiQL75~|Kruyn@ z`5;}r)<_GiK(iYksx9;;O)2j|3?Zgbe3S=LI3oWJkDl?`7FyaOzBkX;8CSlsso)#v z3j*HF*?K8hZD)`BOc0M54jydUns#$P(`iDIzb(8o zY?1No@WT6zBrO5S#ljJKXh=l(=Cmhe$trcXqGqn)%a?M^7w0?5Z>!w@?69N#mY%q) zwZ6+%;6F8by+dGmn7sn#002bTJ;KbvSlQD0jH(|7@8gLL{I1rrRAXPD8lTxl?3>0A z!=NgqV|k0DsCB?Ah9(zU`zYe|xPdh$Ey&h_a>`Pa6&`w3H>=EAH?U-Ct*6dA7VK-a zI=9yLb?j4#2nZSD!dvfpYYp>z#3+QUDSdP7z@TMw4Pya&%qdd%R8-WW?6c9-OZ;$S zp^d=F3lr}pP8BAecFS?1iH|>>($`FVFoc?H<8+TQW-wNaf79bkjYx*sYP#*~3Vy`Y zK5A+plTe*<*qM=tBh98Tu%$j(WJdI8I#Tr5mX(Bu1T%S!T6}B(eb-)B9c!E z>K|jRtQpj`Y@J(}1>JLE+&+%rNoe3v${5L*^;-c?QeJc8Wrr9>@z!tqYHR;cTnsAR z5A`v#kLJD1A8DSOy^Q-c<@RgI+Wxx$^!rAl}OSZ<4t z*#I@rw3cUld<+)TTvdQsr-L6T%-AYb88(eoBSvvY3Q7kL2-t%WVJeFG5AW8nCQ2{8 zehdUAb1#%Y&YV|(E5WJAI^ Rx%O5#+4TsUjC|jX^IxUg^cDaB literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xls b/pandas/tests/io/data/excel/one_col_blank_line.xls new file mode 100644 index 0000000000000000000000000000000000000000..dcf2ebecded612f49ecbefc9e1489b27824a1064 GIT binary patch literal 25600 zcmeHQ3tUZE+h6B&suQJ)L_((vN>^RoQVO9`avzseP8X#(io7&6WXLtSs~NY6$Tft> zeKHY};>Doch6#f~V`ezt^X%O@?X%CNdf)H;ec$){&g!@J-s{sf2BXYI95 z71s@GwjJtNOIV>l(I8*MIz)>>=fJhIByB+8zF3S&$=M06fus)qhcuwWhlbSBBwqzz z)jmhKM2k;IEu0VXFh$5Ch!e>SqEEb%!RyIxS5K)G>TA1dDp>S#$F+rW`S zj!|XqQk0&g3;7xH^GOz(I+q;P=J=6t5>H}D1QEgUe@~7cBnD*kkV>k@l2kwfLEdN* zLy{<|?KvFCeG1j7q+YxxbNmeYQjLMyGnC4tOnb&n=IGaCj{Z&L;1F+>wZ*!)HIair z4%L>xAv#TBv}yu_OWHS)g98?+s^w7YI!ZCR@H!Ap(v}cQYfCG8D=X)|6CEU%U3i^g zgzvz(Hz8CwbrLf}7orUvPFmQ}nOPoEU3gX?UKiOMEEFo6JL!Q>k1%B3LqJIq=0MBR zm~6Of>0k->!W?|U9J=uAi7n}l1sk(XWLzU%CchW4C(!Ykj6eVuFPhTG&fB|@p&PLy z@GO%ul$37ZFDXi~Z7f$qjG-L}`5+chXZmnfpr)fH{`>X7Ap%;(c;wRfns7MTCHV`K(3RmU%YR4-&VO2-6BOTea_x0m z3H_)N`Y|Q+!%FCMedYPm^0bzdXCH+Fvme^7)*2daXuY8|;nD44D5Xb5oiRUSPI$CD znlOS;cK8Qmpn}9cZI#gNmC$=Aq{C=V)89A1H}tPo61vp>X*<-G(5pxYT$)SzBTV*a z9GY$-q3bl#1%#x-1A>-cN@wxaljN7`hqfP>TT1Y!z-31Xj;@3*m6K(kxrT-&txZ7B zbiZpO$q)Fno+bT4N1=YuYoYYdlG9j1uYzeE=`YoTk%Z33Kc8<*=A(k7p+89&GjVMy z-+`pc&@ls-=)w4}0arM6q@E@4d}sqtUaour%+jI)^t7!798i!Jap1MI01l}w;(%?^ zq5^2NEh>PG(xL)r&Mhi{{i{U1^UVq!2ZV^1URYu0jyD@CuGr(jvDL&Tz_N~CKpL$?t~&CfgD=ofe;nOlJQgz*#^pi%@kZ%y*Tl-}ZzBVeOSBY& zh%c!E@stG_t3(oUT`ohC)vV@p2M}$lIZXo(Qc?>T3VVIn->E%I|49i zIt&`zqCzVLv{XJ@sNRs5DU`Qjq29fFCnuD*x3`>7OqxU}q&3JyBtkK1@Q!Fl*t3_d9p`6h$X_8tX4X~*o zF&ZXKe#!*?IHMM52QsQn<&S3d$9_M;j3)VqX6_eMoSt|gRe9Je7M z-^daW<>MAkj2l7Zq(dER5MdNkI{6V4rt3|Eln1H?>C_w$_TbO%JytRxkWj1>oHHud z2|82F(1~i0_RRq)4^$1(xj7&%uBt(-Wk4XISSL6wR<09tOq!t+)ga)$H4Rc8s2T)T z_?iZBaa9e{RR#nSigkiBf8{zs=eHR;Q4Io1noWb02dV}E|FLNh7gyCF-DE%@p;#yA z+$h%xI=0QwiE0pV&zlA*4^#~zXby;rt7?$$G9ZvptP}L#lM-^<)U1(s#7e?K$t5?jX9h;4ZJR27kZ0Kn%r8FI3)T7_-DrRGXY}`~^nmMp> zYlfwj2OcT?MKPQ9Y&Hrk%}NCurlpk!{&8iWVm2MvY!p~pcNJ`ymR25k@7_JdY)si~ z6j+*_3N}njD-RSu+oYIHM>ZP;mgb~_4b#%f150kdR?NnX%|?NxxvF5pw6yZT(wZZR z*>plS9;z)(0Bk&(VQDU|D|ahto6c-D3M|c91skTNxwyVRn4!2fUD#|CSelIrHcU%% zaV;w=Q_RMk%|?Nx*{fi~v@{o2@tr8eYy@mJ3M|c81skTNxwz(>f2Np?1)GfmOLJ4f zhG}UouDdH%Dq^EcEWwE-(&y#?)^oG7SX=3{l8}L-i!>-cC?PaFD|LxgBPduD8`OdY z%CsUvUZwU_3~Jp73iiVW?aBgW+6*DMmOwq#*^+i`1O;nhgW9q{nHB<7K1&pXc54I$ z+hBt_ut1siK*)rgYQ>=48$rPm*q}XGpv=}MWX3*!MWB3Q15a1t8$LCg-&?ljIc8!F z;tR`-Q>9N}O}_Y04YOpXsvicL(&=6@p#$TbnSCiFI; zNRkYjg~DO#6-yFf3Wn<_`0PbcT+Wg(z~v>1ffj)`L~s+=GdPf$nc~Z&bYxO%xcac0 z0^UaW#Rwzg1b)K!_-uX}ORo0fRv=dlOa>!K983-U;3^*em{KV;ABj}B{-h<9icU$S zqEiy7=+qimw6#mu5NGoRvZ82eK-K*~CDY+A90VW)^V+9tL0%hHUM<*l*qB#9Ccu+m zlC+FNIuFWpW0m2F`A{Yq_GpQyxo;$yD=p!et_>v|StYc^x^g9Cd9@=>FasxpQf9=5 zCxj;@islMN2~(1?`Tnv{Tz9bn8AMWGlN`2v0@ePBAjn+M`6v)41?&MAuOzLHjon~g zI#4%Qp7Bpi1VimBi_7UK<`I8TS|ZC-aYbB$1;95P!VDMm4=>)o?tE{XX0xXAvrHiNm0-+kT&w)!A zou&Py7xtIoiAln2euOLrw-xjk#7F|62-Ht9^5R0ZWJtqB=Ei}RPDzF|Iwcv>=+qi+ zYtoy*qE>Kn64l~!SWMdibTFj@`8H<``8Yz#v^f$QC81LiN$8YB5;}zgW}kFT(4Doc z2%4JE5Ft=!k<{@=m7%HjMN=IWn-mY)>M2`FLm!%F6j(iOhOA_ ztvzU%QV^(cy7hUKQOT??px!0p2whuA3Qf$Ac)`F!#)gX$aLA&ES8rKCxvgQO9t#{q zV91hDOVO~34{=JvEBWKdO2#5d7SzrV{zd^v1XbISAo5)5DhTS1gJCk*oeO9&mdvIs zs4)j~8?6VKLTwjb1hopmH^g8WBV&ZZq@J7k9mr+|Pugthwlt_SmUBr+fCkSPWJIzBdGSF13@C|og7-je;XJ`FF(C<7mBL2Nogthx$uVVm{NB}9 zhF@?t7*s#x?}bkthUFIe@VmZPaB1bG?eow6w8`3}?4O0ZM*meGSUJi!Utkh%f5)}{ z`@WAwCFa^kFKo=+HSEAUUpwnMGyh3>4?TnL>>HE$O~(%X!t?YuzArv~%EohX`q0eH zp?UM3&4>;x^jeuEGSB*<^3Pt}EAF$qrGHLub2WLy&mHPASND2c=6Cz4M$y5}MQTou3yfAo;;vIa34pQNEkoM$q|nktFBMG;HywIWR zY^S^|Kc^k3q}C=a#?tJbS>3(hOIoc?@-u$;t$w%Hm;K8=)vohh1Iy6H;b;yJOzHqhY5)ZtUoHwWnF$kRs=GM*2(c1fL4s?>qHInL*{S zLHFDutkZU_o*8!AeB<|~H6DRCKbVBx_MEbRReqWH@CE1c?}N{!9k72oh?n5GHZb-{ z#I}5=$0JL!@`ZbOuJ{^{0dsET|=KYE!q($-3hidD3Q3MB$bFCqELu+>Qe-ZLQpL@AlKj3q7wrG_KAG&>L@` ztF5R?Vi0z=}>lLMcao%f0{KS)v7vc(Y~Yi1-Fjc zRa^XKe}D1`>uFxg{Kr1OS#SGjP@P@3R@`2Sg&$ry{l;||pbwf8g1vzuHeH3DDE7{R{ALwjPHvedP=w|<~yQQwO zw5VNN99EI`rS#f&dPlt4Zx^lDYjOKV+N={N&c+n(u77e)aPP>d9vga`-1N2HR#Cg# z`-|*e4ci&4Q89zxJ?O+g`xN9{t?U$+UU{I+{e9Eo3Sw&YK2NaAopa^a$Xa)s>aefQ z&FK?#aQmX$TaCjepW9$|bnum12L{-lE?jWHWcGx-J$YdZeTO|NYgHVwKi<9luB(E` z!S%UszB*C0JK@^8Uu>3qYhPSt5@~WJ`H;!B$kM(f#Am5>rdRKPHy!jWmgX$U=5&A1 zU&oh!iuYsSSyA}m3fuT|X&-D~B;MT-V`k;|)mv`22;tDTZ!c!88F=fn_SR}oLE^(x zz2|-T^W~?+Xs5}!USi?tV`GG6ueSAae)>TC$A^ax(`r9|sMG%~?CUiNCCl4)`*Qz! z*y+DMuhQSWxa}`HQ~r8YFy}y6$>-Mv-f91MQE~dpjk6i`Lo*i5Ne?dZw_l*q=a0vn zGr0r32VTtfv3Nc^KjBIRw@bM}a&5L z3uo@)blFp)+q!<&%$f(e!9hP;WelG<&vQh4@8e!Uf)b+*m!p zhjdC;-t_8vu`q6fZtu47_9eS6#t9y|4c(%<*XQ!0k3)E0Z#uzWP~dPcX-kUU;7Pj@ zYTKIW9Mjz1=3Q!_gX;sok(rhoF85!WANwZN+OK`tgXQ``6DJILlay}z`&}#hjDTlZ z7v}2i^3KqGJbT`TYnEnZT{j(1dTmu2ICWXjW{agp?Th>$Ra|_~zAb;mlg>R~ESR$? zb#Z>&8;zy6i~rI&xBleyVBRW=6Pwq*BO}fPp181~-udvOW1c%AGbW9X+TcHC_Wh?9 zO*^lMowDd-|5D@cwi}i{c>GrzExq;QJgyJ*=35lJ3?Dni%V+l9J#`&g$E+C@c6hU8 zeda;mqQx3T-v-YqTALDOv(g}J*C%o+;qEED(bqN&(|<7|f~(Izl4jsEd`G8N72&-4 zulD&{6i(YvZ1OtgyO~$g+Ml>obGlzq%*2}0Jk$BHrbAxtEZ3fX)4%GjK!0QZ1zH!@ zB@DPIN^{))>mHMwK2J+O2IZXObhIjXm^oyA!taNxKCTsiS9EEn=fvO(kH^I4?F}gE zQx|&qLFmH6ZR*YA#}{yM{A!C*qENVdh+aF$E5UGQ5?T^c%Nm1 z+^Yla3Vd9`oYo#r-WXbDJZbl{nHK3`SS=fw|q1eQj#R%PeEE_fUb0M{%wPng-(=)8Zx zey!N2kyhHnkA1iNQe?liJMXP%Cw%eV$YuU!yWBI=-WDueXPvX);J+w_+!Rt@Hmqmjs%j0&g8$GHzDY|Fe;z6e4 z3ZGUlJ#}tcc*IY!QHz2{){8Fha}eDR-I6t0$2ld++rPTRXMl75)12{D#*eGr_KH5v zuioEzna1~iuPg_h|LMHZj7B4P&fA3vkB8ZdVTxk z#sbGV!}2QjXWVi0uQ2$K6r7!>=TQ7RZpD;;e(T-H+EN5}%;4#cZ%&zyKzhP4-qs^tzS7%o&v#rLetJ{v$qnC? zwT*On&@*OFM}D6d>t=2~H~sj_l|G>+!!z&noqBM}f|ZG$ui8bg4zLO8W*utd*mI+( zP4{7Iax0dO`jo!E*N#=bKIpC+06+`NiQ@6g&;mrThx+KD2UNtdYAF^3|_;at0 znbA(>CX>F57%=P3B2Am^rR6nNIeP+mmnRe#UwkO7I*023+hdf?Y9SjVD464h1jqug zr+JN~ok5?BNgul)d)m}k&=lsA%tm0VpN+(x!!#n@QiMd!XxJERBWT390}D4yX;hd* zNPs48-~c~PlT3ozSK)?dk^>G*4#Ty88)P}qmNRIW4>Qx>lN!i;PuiMmjJK%(4k?L( zy|P-cKT8iBAav5X;DYH>*M@K;u^$OtQ47*Geao5$zkMlig#N2S7rK0z8L*fdz4 zlcQn1nfX3bZ9wpAT?k);|BgtQs`17YVqoeGmBP|gPM_P*3QHi>4;)+k>kr!kiYQC) z0jeY{gEn6#Q(rFW3hVXH`pZJRR^^O)EZE0K&=6_2Gkl*Ye205wFcB0P-{S~0ksDH-)lgX|Ie+yTAk}? zX0T=h`2Y5w_M`uQ5hAQ7QvQDkoN@hV97O!?ZYo54QI6Yp(6`4OP`J@xK16&qn*kBm z1C~R?^?@xAndH0kbsyN*yk?51lK8WJ%ZkbL3DwHkhr{n*~cb|;%4Cv zjak$xm1Kzu?x=%W18NPZHK5jjS_5hgs5PM0fLa4;4X8Ds)___A|Bp0)9x*!1=yRk0 zjeApY;|KcG_)Z^vecacB&NTY$=)dDG7WDhk$HyP-!FT@X=Sz0pps$bX0rliJ;b5}+Lk_;@7bEOjJ<>67dc+e|_k z3*1HUfm#G5N1TM*>@)lq04aZzp}7S`u>TB$CK*Y6{Da@6V%_oUy(mg1q%?lWOXrq9 t+5ms-L)d?s%ikPfu=7CBBm;pPemFcGK1IS7Xqh~{{)PJE9#IMZ{{qyCyOIC^ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xlsb b/pandas/tests/io/data/excel/one_col_blank_line.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..9257d016c762a1cebffbbf28670eb311967b9dd9 GIT binary patch literal 7992 zcmeHMgrDKpTVE}1qq`NyuI(BFY>2B#zQo2hC=`QIG2?6OwK=_UB?$=%1 z-(RqM=X3A-na{oFnYrhj=RMDRUL{#Lcsu|i02u%PPy%KH?M!`P002{X000+&467?< zZ|4NIb23zOcK|!;KX$XV0cOL)(q{r-q0j$!{EIyhuQs62!G_gDb%YsPPVX`RLpf1{ zM2|yIAdYQ5y~L?ZHh0S_?#LYVslDUU12Vw<)w(vx|qpWb0*zkgx_9a6`&SSLpIyS6!t}FsG5WF z4pmCmaXP;l9RlO4v2El`JKDN)2zoc>I?Z&(H}z@4$Ew!dUCUzolTVl8Wu7sr*_3>K z_*RLDjTFtN7X{dnsHuX$T?;YxiIOtGHvbAo*!1XdS)#$U6=fFRS7Q4p%Emy+!sRjl z%$gwmm6DazbM{VG?N~)+lB2cuqT0Z0{h-KZjJlWZq;2xX(+DnY&Dsx~S&7Bii6LKimN3x)Y>%L??}WF4h${6G=9a5HpeWZ4~X`0PHn5x&o}DNYPm zXkZ}RXv0(wBz$hM$r`Fe@?VDlJ8`1C+#l!VJd#&?+4hRx_qo}VEsnYDZxRtNq5QnN zg99l2jk#ieYh4%6bd-Y<6Aj8-LkQT$k@fNS>wk>>7t{1FuO1OM0%a_E_kmQC*!uMs zJ1OL>+D2gRA{Ha&3XL5}ZDbC$@Wr_qT>)nu*|&p}kvC0?L*oS>c*L2?{G65jO!B+5 zrqvHeo624bcZm;wq>p0afduG#y{z}zj8!2IYd%v3KO|t#PT>vy9=im<{eJ z=RXNq#tHK8RU$3nPw@|@D0m5d^rYU0uz6k{t%c!OX;=wDYIpt6nTFP-{OXnsqu4k-~rQ-f_f}y$FAE_?TfSH^5Tywm$NBO@NM4 zoVOCGl+S|CA@7jWh*Kc<>d9r5ym1hxJFQlJZ>r7f3%r?_7Mdx))e?m%?Of56hh5cS zvZ4}NIL84`Rs7U0Qkj-$*hd1$-_FwJJzKpp32Ss0$tRXCtn_wYJY`rDsNW=vAxM%h z;-V8zaQ+$r;dcrh|Fn;K($z%n`6X+P^v1EmdYmAy?PS%W;KbpJY4X8Tm0|HDsfNZU zMl^f-v%_~>7m-wJgM4~hHK%CSO=H%k@axgiN!b@)2z$D@J>o9+P8iLduP%CqHa-qY zm0zF0-7QWo-lG&UZb2;qv%(k zn0NBET!kjpqb+ode$Hb)vaE@(Sz0cRSI<7@9CX4^)wVIwSCPWMz_r;SK`5UbAA&D*JbjjY$GVgFbvr0y{zmun zo;}C$?rOgxG~B=DjpnnCy-(0PkDz%&2p#{RzoP{h?BvM$V*q`h2dvD@k_YL%;{rq5 zd|}XU#b81>0+j*cl|IPy7GSe2v-rji^fiAAB%lHeDANcr2Q^GUDH)qKSlkuFUISt; zr~V85pE)bqJmX zK0tpx+hGxc!n$i*10vL`3mygDu_Lo_a!Mh~HCx(#rsV5!^;=Lp3T$fRDh6?a*uRtD zSGTkgV14jK0sy*!-*x{Fw>VjVZNaQRuAuL_e^+}boJ118g?0U<#nt1TVqFS0s~Umh!~r&h8B8NE5Ls)%{;9v(74$xI>zq)oa3A*NAZOYRnb(Qr*r zt1?HdHcKLE`>21_tl9G7VA4ZTo!ee0ywEl1J&<|(a+2iXaz|hMkW|&yWi(xm%D_c{ zcptiQ;!0FYcYBN1&a0LMaU}M3(7CW6Hd4Wwq{?H=elRG|nA)y_Mq#CqcMTMG*iW!Q zB)OT%{dPkqZ;ZYtg!)5LCgFr&@<<<^2X5Y2ORchC?qRA_7UJ9Z=maYpX^Y+6nj$O{ zmaBLV^AH!$Wb^J}y33T)r(ppBtbE=QcV9Pjxd*m@xY>~C5=!o|mnnL*Z2YC$ykA$T z-*6(gXO0e?SpyOp-x)j35OP7hh<)CwYFYMoP0f~^`J-F=j7bLXi#RHk>$r)%^a^(5 zR--^G7b(0oLJ}fAYYW3S@K^DC=a1>Jx70dSF2eAk(%)XA!9UY-dpa{DZruq5n+0}!qs*qTMSiusM-DNNt$N)QqzqlSG@+fk1X5Fxc}SH;_}_MA%dj?A@A#+q&unnrEWCbyZtvW zy)FjI4^%Ok9?Kn_7gH>KJMEY+zdN75FyyOo<>do?!o@NDwAiz`M1|i0OLlXF(vsdu zE!r6;jJ@SjK;GXAyJSf9rTwhJww;c&Dgw>T{A^;CFX~bTr)TgYdj^)Av7JRc50rBu*$LLdDV2jkXI9W*EAoCcP9w zz|*Ge8*&xzPX*}=FtPKcY1cbZb49meI#-fv!-4PSs)mkm%XGR~>+SoLQ>(vNVj3wZ*uB$mWz^$$&-A7GtfcnkL#xC&9uKrVuD z+f1d(X!9NkPjEBsA8wEhmRypRZ<88pCT<_k^q1)?q>qm3vXp!jY){U1F3Wf%SSP}^X3d=rWbInnW;EBPviBP;;k%%jBt;ft zf|(?sKPwAd=wiHkxh-vo4lW)HhsZ~xcVjFeTKlZxYWkBVO!*57OErNcnV_nxjAUvG z8}GWg*M}|R=R7VQJ(g|@VWsiTeH!M5GQ0~`4(Fe4k>*ojbZ9U^YH^F{&Wn?Ju1{|7 zyfmxrSq(4Pulb^O47!8K*BX-g6f^`CSXFCSKh746N1et?zWLhe5$*XoV60M4)sy

    ^n|Jv3?E%I*anYp*A|<2#PjM6Hc=#1O+ZdoQF%PG_}dqhO*FU; zCb_{G4G&+Jt(tPlD)JSpI~S#BX!x3e3{Xyf3ulJYf@g60qeY-8ElhiJ1{a!3TQ+b};d z^G$9^Sm;en@3rUQKLw*cr3lav$(emY5ng;nyk6%BCqF1Bf_u=k#{GpdU=WXm|CUrC z;uFX~bQk%o7;O|muThRhk^4rSeIO2vSNM_4gR)l&sSivZfiCDn`9J7*?<1yek!HVF zw~oQOty!Oo(YUo)Z{4nYb>tb9Kjzd3%4W{^AR46T7whL!sK?=CZ2aO`JyFbYgSM>&Jx5E2D{>$Qu^2l$H+;>Dsy+ z{-TyZNy`jRv*WbE8(x#pDMNp5=O`om+Rahc(X@Y+eVpI~EU#y{plN;0y8jebdec3J z%W8VQ`B|aC%dJ+bex=3<-<|RMqPil9W zrqTKqbcHh>Mua_&-k!47zVXz9o>Nm9?ZcD1=$l-~Hjk?cZ{#Jk$@!}kipXG*rh__^ z3VP!G;aCe}2-r;331Vqy{=>hiBfV|Ct*wCe_IC7LVaDjJc*-QoP|Ad?kgQ~57%DiZ z*{{G^%CyOC&^x5isC@_jyY&2h_3~4I{#d)%jI>!nH9O+Ub>@MQlVSQPTX%@+jB$+h zHo0(hLMLZpBv|J7{LFT5<|ye>DC@Q+P<@d;JlZDHT1Tmqi*Kab_h4Xpho7FI7h}5D z%!tl++`(;oq{gI>M%xod{UJV_EW@^#btM`?JT2{NLy8H8mepXjf<-ycE5W+GwBcw( zbj7#C8NLDy7!zQ{vttT=x3rwjLQXD3vi!dJJGJK0H?KhlRQz*Af$ncjRyJK{o;>j( zfAQ^f+ySa3fAzoyehnWp^v%VfC;2ZPfaa|+#9i9X4D9wpf{(=+0bmh;iLjs;pP(?# z7;Xy2_FjN*gRnOv)(FCCv9LfwDAbkz@H-ltEsfI9TlP?HU_%#&X7;8^5PJtlR#STj z_>Ug;e``Xh>muR|WjomLx|R`I#QHqaYMj|c=RZ*dVPe43h1pZSpYKoQq)yRqX|`;f z?Dsqb@owHihG}M3>5Es;B3P^wa(Eum1G=o0i)^EorUo3MV0V&nN{Sw?7a)Y=cb%zs zKiR1DB~Z0yD}yI-i>r#?A>B(K$1@#p1*SwciMju#O464ul*%#{aWISTAll24xRPuX z^$COUOKgaORGVb3U~&->a#R*9O20O#B7t}= zRQFrUY}!53%Nx+G*3*`A^$r#1##Ns%IfwoSY?^7S3Mg>$hn>XZ8o^#cbUO7tVcPY_ zOv~}W{dw8zysq(U_6}SiEJZX1c(~u=>hPUG_9IGZ(l~0l=Q{yP>^T9RE-2nZuD=DN zx%VF_n#kCH=9H)F>^mF`b z;?j#`e1@knqg_g&8`*dW@Fdm*=Lv3eUd>m`sMNStYox0fQe5V@E>7<^Lf z3LF+?9qG`zz~L3YjaC_4NAr;Rrld;C^uTM2Wjq?#ntjh zzBymaH?BRMtvX)Pef-uT_Ju9{X;j@oVZr_gW8o9ix4_6j;8I(0lSH9R5_*uDZ$@xPMGQd z0Wwv_#8Ddy{N;;u&7=?X^oWCfPyzyFJ2R*-FLP#-vsV2eRhsXvKzD9mYoB&U(`Zy^*!+X?L>(-EZ-O zdS1TverK@sxIZm!*_liA_$KMh9vX<>(-#Jo1zPR?c^mN0aQ!*{!=|8;?B4csegYIhdoABo8A{OKTY8t{%(3-(%eV5 zuU38{=>3lHz0vxI5B^cR+y}g0{C@%(;NN%ZeJOAs>+c2rCla)Q1;zTM+}}6<`~3V@ f^JheVG5=$>R+2@4rXB!*4*ifpvloH%`?voC>^oRm literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xlsm b/pandas/tests/io/data/excel/one_col_blank_line.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..c249901ecc10e1ebac33e4676be6a56e1d96a0ec GIT binary patch literal 8650 zcmeHsg;!fy_jYinXrT~_yL)kpJHe$m1Ph+vR@|Xb+@ZA4A_a=JI1~c3#fv)>FYXk6 z>AW-F%yj1a3*PtMm3vlJ?z7j)InT5A*?WUjQBX+$=m1Or06+_%t!nahMFIf)Pyqm9 z04DMiSyvYr*ac>;>+1%FnsWI#JJA%QBC`|%kP+|y@A@xZfr`{Yjcy(S`E#Wk*)`7C z`C4gA!9C;wA~r3_j(TQw-S#zi)vI&0(I`OL{c)G0+Kv1j_bQARz1dD^gphsguNxBS zAv$K0E@su|ThO2(KDAK?hnBF=N~CFfpFqLPDijNa349#=mn09vrS(G6;DG=|`j<)i zOnwPa!V+V+Nc61Pz6Q%aQB!KEei$BxCTQTL@^n+t5G6AqZtbo+e+`&aEz-h~kywP3 zy)+nf#2KOk7RD$SUBa8TSfMGRx}>VE^ndxa$5>Vl`P@q{P#K zH%cL7*$|1rIC-D2XkOJLwH!DK7nj1*;tLnGOK?r*It=!#CG9^jjbM~i$Xx`J zd0E<*A4|&+=-^#H0$xp@$qh_TJ4XriQ=A|*Y-%JfGb1Q=cZUK1{Y|I!dOY+eh+otY zGQ>gX)Z7E?1m))X`TUcTWs9CXaLl z_lo5nLa&L_iL)$46<-#w)+Dy_rm`Z{{zZC)u|v5Uf*~#g3M}H6z{vX&Ss})~>P8Dz z7gb0TGCF&ek=2dDh1<`Ea)Kw*Uv1$@MT@BIj%JaQ z4^cW_a~r3QW4WAm;vWH=8o6+7Isu{!z9Gdv#=_HJ<2r%E!93p{VXW?Q_(6YY9Cg_O zB4qtjB%xXXR(2==fHfkv-9xO5j}y1Is|Uo&)fMtHkX7hCaeeWKB*@(17G)(b6pJ8K zMREC6<#^&7p|LhOie`&g(vEoNY~6{QbCJkr3;N|wcF>xdz>AwaXq$CG(7a&%nNm|> z1)e9&YKy`Fsy}>2JKp}5uX|FE*8Ry+c0I#*LG*BYe&FQ}Z1Cb+eoX>8pc3;`Wq^xE zJ=vh7b0yrfBqA;45M7o02tx;!yCkC<4u2)&Jb3g8eN3O45IG1Jon1BidH8yutg%R_ zo3=I9iM+fRZ;{|7uN&UCoHl-suP}hSCWNg39b^hQTDmC$0~aTk&hVRO61#`Oq-qOm zO7kqXy7losix5|&G6S3wOQbH6IEagcX{Q3?JKdEM`}^tjM8vCC7MP8EF*x?oaE*fF zBc+YQ^pPaXP5GC~i46?X)c`fZu$rr@+a&ubV>ccQmu7>#inOFBkkf!>4eKFW_woIPa*oI3gT_P)R?1diRfd-r-bzi(J*6_h zC8;ufULdiKij8xE)+Q5o3|Nd~=U}hg@GR@=y!rq*pCny4Dy4W!Oed7$xf41rhkSX; z9E>U`!F`64{h|LF|49X$v8>_KDK!b(VpEVZM>|9 ztr(?}yR32IVqB&ZUTQem@LwoY#DnbS!X*c(9#Fw0ZqfwIxUY8RmURucK_{6J;>i)hxEj#FImq&+9 z?sd*^6gK&b=2E620LeR0aKo_p@ha||*W4$!bTcOo=OIB8N)fM>QWt?JK-pA#u>i10 z+Ih3y>)DS4QGLSPi8lr#5q1yDBrbeI(8IUtfs-O|6VI?^WTP_~qi1<71Rk4@ElD-W|^?Ri!oHG&1 z{_nmjwAEF6cnDf?Zlid-bG%3ir(xW5yLvnKF#GFRXPIdEgH9GH+1{HPepTg0agFrd z|J>(wE{MN^LVViEUKUG)>PPIfFNWs&`S>#mrqy2M!%#&uVxryco$@^#{A?(3vt$aK zBcJ5aw+j}=2T?Cx(aD`Ok46eOe&O-tbA%344IupZVBOl&t8ks&%Fw#TMw)C^+x5F< z+1WGnf>2-Tk>dwg%HnvwfRW8{TK0=h`7_L;mK$4j-!G8P7gxCJz;e}xZv-U*B4IR9 za7F51))eS^an=XD{biWU^#E)8>=DP{$-eDcf8WF+VyFK}nINte85{&%RuDM}IpU4q z-8j@942D6ue;asz`t#gW^`smgg2?@}D{79#&P-{q#~o}A6rT6e-UQ84S$AOf!oAvb zPusl#jIXnSqiNMPltcXd`|)|LK)txLF2mO!fI3ngN-&d*+H&a4>h+9&FI@C%TE#>j z{7~8OM!O|AgQ{+NMgcSG>F_jA_{%9PZMv`w@6L`H?4Wd1ffqwnhj^BBE|r@#9pK_*bF=$~w9(38-A z9GiSU7U^sZzgD8}9U18$>&ncVkG~eqPhC@ZVk$xnw~4(J`Vr0$eu%RrbQi@VK&P;~ z4(ecM3wWX=&{o~W?sTRwb%RW~#> z5v!$G*K^}z8{g{1ID3@|hMd{SaBC~AwKVn8EH+9%P3Rz@| z?^EicFSy?aqzYTeywCnpyONwf<-#n~3gjQiM@zqpm!=Zz|GGtUnKxMmFP>%5;7tu< zQO#<1txR;pruKI#+LSBEJQ>>_Lq!5Hpm*e?~mv+$*rRs$+{HY@inVi z1Q?D$kG47;M` zS92J%wkeK9WT|JCaSW5lHIj(QqDhw(Pet$Itz^jUEBe?59=?(^Q;QfwO6@ZrtDb(f zh8y<631J%UdDJ)zm|@~qzLngts6qP2*(L#E$TA2u^2X1!IYL7TIV z_agTeU3!x}cyQpBuQvbuxH;xS%y2T~JC{lY;CzF>&|pP-I-wf;4KJ1`=GCLlQJwZ0 z*?6k*53 zTO#Jc6BPY=y?4-_OKVqKM2=UGsx8iDjn5;vQ<15Pn?cSmmi73`nj>8Gn`qQ2#p8Bt z+*SPRho(J3DXJcuqs?*T{K*ntQwbn(=!{wlyeEhWpH@EEJ4~4@AZc1mPC^z4s^y() z#A@jJVN2J9w&s|f${c+K{z6sL5}(ydO$H0Cjswq#H%!%kj^P&*J${`w^MPCk7T-(m z-zW(+9Jb^i+-M##ptDG$+EuAwhkwyK&9y)Rr@VoDW%bCwWCx!graD*Kxw?n?+-Hd) zTr9bykegRAeYBpd>z9{^uf8ORyiUCqH^w@Nw#k$7p0vsREoHd?(CeVU}id#lvV&js{heng$n#kpq8#{eZ&6&y(x`#rdpx;XT zd@bSQP)sfJjrYJqZzL!>TGErl>r~Mk7Vw5#$UCfaY7U5>eg0gBTS^7=tFIo^Y~teX z`i0`&NmVl4}rv0PMe9e}cb$;5^?}KO`Kgf6oqPC30WAK#C zYQ1M}c0~F@cabfa$uXBo&$;T(B*n}(lQfKLEDN6R$c)uyq;{zB#WPSB>Vw^0T<%k&tzrJ60JmM6KE zIlc^unO(Tqbe7==%y0y>ol#`0;J%K{<4j8GvwpTD-?|?4c;<1WEiiS%nzXbot0le) z`%9LB-*f)gSx6u&Zho31n9lTjcV9oj>i zP|kO`@U~2UKvo+imnv%VTw?&8H#yTkwTIK3GdK-TU<6p~%x>_MK+X^)QTdyAxv0eXvocukr?wdF3e;5 zGjB*vHEHttrT2=(fc<1k?5Id;BE4uF$g4QhCcjHCrs~s(COb8`=5%&v(GHh*<{Ol= zR?T@~>WKj;y2CfSS0CB7$6?R6qQutl%8z8nYuR49{LtX^sDf}c;}cEzs;!`(j(ddqS_fBiJj-LY)S94tG!r4>dk;GvYvlKk=;`Jiqzs5uo@%D?J|m%!CVQ zM;N=_qwxu*BO^HIg;5zsnu=IiBju06A2jUN7VE5mHf?}&#}5s6|C)yT7Cm(;L7d;T z(E$Lw|Ab#C%-0DF{ds6B(Q}5(@(|ogP6ZQOeIM{z<-lqf932O3_7+m@mRk+Rl$pgd z*nR1}z1HwD1By0`1jT466H%2}MVCh#ZPry!y3CmY#lv<-87%6n-pnW5bELMftC*?2VxmUXqa z&qx^XyIGS{*~Mu%Y%Zl}QnEK1uv9oHw=c4BzMo0lSEx0ab4?Y3aXxhfV?(IE02Y>8aE?$tWtu->(tj8@JJI< znJfR&B@Kl)q|CK*#b|5)%|Ss6G5t5tJ?wpLg zuFh8BItvp!OElY)&0QPz-)G40v#fE8#&NeI^+#TU>pUCceStky81|#im8LaRnbP8_ zDOk=GcV5w*PgMFU-1q(0SE2oq=c}6oI-QOfa(IhQKek~g?T|fSSwSgjDP?8Z!Ao2k zW|RTeEP|wb4R1j zwv#+gRC#t>@c5SU-_ns?lzZ{{2pTscJ|w@>+1}CvY@-A7aB#8vjnx8)kFmRc*2Y7< zG2b=nm_{pQbD?S$VnpxRqkJy0Wh!nI<}zu!yKHg2_RjqrR~z0#<&<+D_wZS7M-qNj z#_>0R!uav#M$dR>6sljNnGvC&=iq2($$KCnIFrTCVN}K>$A4f1^6^I6i0@L1i7tWv z`;1r82nITTNtbb}1$LBO(O5xV+ERrm*P;HRk*?h#nI-85Zp3W=nO^C}+9jwWgzJis zC?O&vw{f)wdAPbkxvgD2z`r+q{+Ea&mM%WU91_Mu@^KO48b8HbUDsFvGr`Pl0`PZ7<7%!tVL2eQ9pbg(^3Q_KRMiT9+%Vvm|0-OCKueI%d3>^q^WO z>=1p0T26^3vfWXv=N_2muI_8SG%Y|8U*3V9U&662s_4HV@Nw!$LdlkXA7nNsq`#20Hj# zYT8cSJ}gnp4o5Qf`L#`X|5StX_$yi50;%5poYIUUN5j`a20&0UmPRwuHwv9OHp@c1 zhenY1S3d9qwViWA{vDy9^;zs5Q*Vd%I-e_$Z2+>|(&|Xfr^ci71*uesE(1f1Y)W{4 zu^J{LjtX3{U}-Xd+Pmf12JnuSYmnikgWA}~_zOH1zJNpd*nelBr7{*l41$4#i0qBv zuMD(wbNe3w5zPDj%1wFfI?IC_v5#^^jXO^%G7rKHsjyNqIH|ToN@=iDe`Q@%%Me|W zygQlHFfu;vx1F%&?{g)mJP#B#KY2Ft5frmtOoE0=2_ZX6^O>4A4I)KL!Ky#xm1azQ zx}snnpq&FGw()q1rifZ0r<$s-UF=-M-((?DJ=!z(U`1#j>_lO=EHsmWkKA8g+mh}^ zgDE(ku|>l2sz*u4Ho9L zFJ5q^el;V^bX)SX81#&LWt%yRl(MxNAY`F|{-+-gsItnq3iS-PM z9Kv}HH6G1|xQar*2%BYTUNQSqeX!|Dj?g*nj<5?*`rre>G@JC5)SdlmQ+A1_u_K8$ zF5${|O=~lsT^)5baoo-$e!F_oeR|}l6~uLEYRAvGcw2jm^M?aPLgqv`|9|hA|FKe*k}uWe83DrMdSj@Yfc{pU}^U)7CEyk6*!muf_fe1pq9ue*^!2 zDzm@Z`L#gzr=?Wf|2K($lyD)^+0$+zB+n-Q8V+1t-DX8*dzfCb+u?0to~Q5Zs*xLh#@Y!QK7s%$@mW zCNtkJxVLI`)#}x?&#JE4`|R_aqpAo8j|V^mAOipZ3V`WBwzVM)01yWc0N?_UVfCaO z9b7FOT#YonoGicwte*CE2m?F(Ch!V{TDxh(xgGud|SRsoZ)#N&f_8LDhK=8ps?O1b+pJpNV}FH z2_IWe-yTgH;nM^cU7f>RJQ8t^r$$E6mYIMw=ejO+Zs`>Xg;HW`d;-ycwd~oxm)J|a z4QR!>MFNnyj%DSSi`e`FpT@D)MsX)>Z5VtEPuE!F;Ug)1<_X*lKd4e@YcQCqH@MuS zDz<{@>$xV4&0rVVL*iO67QQVZHns9Sd%;ay^(o@4ShqJ0#MI`i@9W)09}{_|deg{@ zZ*9b)mm?lX5i~XpPz6O+6O?f}48QIc!_QY5JnAH0xTRUa1$Hlym^bFc#+fS{!fno#Txv} zUoTB~q0+^U7IG+e7YezYT!_PxRP>OLYogHf3y`13tclL2CRu2s17c|s2g5!0Z}Gbu zm|GBz-hohDta6t|;@}BU)q0eLq~18YAu-arq{up#u5@F&Po7U+q(4{iqIYkJWh|*L z%2VuLqLLjymafDaW_?ABg8Km&LLibJsN4HOXVLVk9A;8dW4|nYpO%JFmj9hXVj?p;m?oeKMYQ zY#xp-AX7(2&`;kg)iQ7_;m3MyWb+8OnjM6KCH~alm>BW>CD*L~Y5$3t*tB4%2Mq?nQTX5+P#u0W8Jx*5uujEoRR9?vz06Z((LRxX$C zu7G245aT-}RRhq;(OsnlP;3J00;6FzrfZ{vOox4XJhT05TMKOG&v!!qIs+K0PrZ7Mm9i z&$n&scU#wFwTi#j5Tm9OXeLmOZC04;oj3$vE>sorXz!;BeQ`bABj02y$1mu9&GL$T z9p#x|*D36rRt|ZT3=4{5B{c_#X3L3e>d@AqJ%$PK$y9m&lE?-;G3psYWoOtamRJ}Q zGgH~7TS4E-O=!T;6#nALCh=!nO5Oyw-Jl6+*z0qW@YUh_BQog-Fn$9{UhG-#ZLAF- zML6q?Crtz5{b{!{@_UpHC$_Io4=idvHwMMHKHome-^MwlMXl=0A^6N~Sb9IKdKzj{b*~9i za=M?AQ06%qs%zUMG)rJdvW-|pQM7qExNqE?|F%@zIB+9iu>O)TFBYF&7OY|~Nk(rO z|6rQu^`kG{f|-;r7~Zq8vO;9a$$ou0{9WUq0JV7XGWv%VY6RP^(wg8K$BFHTWys6Z zICF!wJZ~g1wvhmKk(4z-q?!^NC$u;Vq=DeQ^|x_dXp;Hjm;&ET9S~wnK5$Jr-L>=j zT3M1*CQg46Qb}~(F1{&ZleqHF3JqTD+Q!Y-ewj9i-*=5Dsx(!gZweDc3GkMUzrbp}#aj?TZ+I3e;n+u`uxMqV4%e%RNw#ITdDdB#cp=27SxAO zc8^SVeC!!lHp)G%T}$!43-;@4N`L#i0^{D$VN|}qR0!Q zdjUqbCMcM$I%F=8Pnz#6UwC~3b>!cmZ2(IYpB3|n_=dQWheDo{`7 zvyxuKX0l_2946n8F)wwbiMva*GujZlVI~**Es&bEKk0?Iw`!iZc>riWWdO&LE6hoT zxwsCavRi;!kr$oXpPGOgV(oIS`l(eV;JdZkS)X2r;P>Rx$!y56y!M@XvwtdS^~|g+ za%jNF43O{JIRixspCrfbuA%Eu;g~E3lA;Fg97)H(s9X&-T9AO6lDj8m!4K?^a>o81 zh0|=D>`Zdm>aP-awec+?<9Pn?g=sSf-m#!bI8!RUZDlGvs-E$w#_EFnkkbU<1l=7N^Z{-u~!f-BPRJKw$6ZL1wCgt6uhO8!_oEx4QP5NCp}dNY~V^y*M+kYI7d$lyx@X>sh^ zsa#F}iV!li#$_4Q&k_ux`dy*fmW5WkQ@MCXdRkoT7jLSaoXo)&3qdl@a8s`bc_#C?yd(@+a8Yv z&ns5v@!48mpPXzb5-ztqangn9p9>a)bce6`&Cx~Nj zxfdM_LSPN`$agy3D)u|)fZDwX&BAS_|6#TkP~M@~6P z3yG%<;Z5Nq@t^}%g;K!fCRgsORrQ(Z3X31;5jf!=`8&om+A5`@NK2Ze!ZxNvI9HV_ z!p{z{J_m=5(@41()2OpnOPPiZc3P0XnLQa=wEx^y1ez4_HK#Ix-Uqv1*r$!&L2FjE zJ#_(T4j$6hNb_}07ypjuwB>Bn(%umar#EKIZxnwBhMp5kv^_yv!?=BB(9N5m=(07| z5J|)pFXBEOttt$jRZ4(#`_W=h$i#aD%M<#>&In11NC8!=IOgk6YCG>ODeDo|Z8MVS z!fq_SkybWGrMHj~x&~E5TFeU9PS*^Da|sEaeoCHgBGPb;>Lv2269sFJm~ahlHjKWa zG)^YnQz&JEeA7D5GKR58CvahgmbPn<)pby7hD!3+=UQjUL-^kMR z&W^!&vCIRzLAD+_&M<|r#h%dACchhvtYekxO>p3A;KbzT_@&R-Xi;Rp75=HO?NjoL zx{Dz!HBgYSNeJ>2G{S|TF#HF7>Fg3+yMv_6=}cZsI6T>xEx3v6(LKZARdjbA1J68Q zz=#O3ddIg(f|>Ldo6><_P%g=sLEhFm^X*OvrQq*gT42M;tH;}7%W*@|&lcZ6>8HBA z=NIP^w&~5jfdjOMCtEox&6OKm6iEkz2t7@b9N28F~i{{iC=^AWdzENIf@T4)% zCsDDiIn#>Kan8n$py^6kEVO4uD4lD1a?$3R6@wwrVJC%=m9XF5T%XcmR885q z$YWhKW*VKnY~5^%TVx7YsG?lIM6-{v2NA9?3k{{*>hw0IhfZ*+G?+G!xNz-6V`773 zDCz10X=yb^x`iCBYiyD07^%2H*SK3mT~nTj4OA0)gzq)#-Ji(u@GZ*uBYWQk-x`+b9in?LREz?Jk6NhEF=i(nRCoEF)VS2?A=}be_~q*q zAtfsa71fd4=iZYunHg^|yf$wIT-r9(KK|=s+$%4@t^iuUsYCl&=>M@_u&bAy1^8#t zR-k1Mnq$X$5S{kNx;YqdUt>n89UPlb-RjLH-77I23@PYhIGurB8EsNs^$>?gjnTyrpTs0g&TM`y zMxKzdS&O2;Lb7v}fqHNuLLghEKkt~t>&g;fOZdV%ywTd$bRyj>VeytScKyTX%c+AW zPT}Ps!>Ux*O><>yFIm|2DGC`Bl(ooNp;Ltd6b`$-7GHi9P+6AD2Zv-hj=&tLW9QueM$gaWVhyn4q}$ZbW20_#Pv&_#^lzEsxw zh|eK)PM=AoQy8kVDZUTvGDIV8m?QtmbZ+o{wIg{;kUWX;O})RgBih1qXHG%!n>SvK z-#-TRi(am64QO=OB1xk!+1>BB!nJ|+fkp2M@e4`HijF>@k<-BqFr;I}`rWim5>6XL zYcip3AC#bViJZLP&rpSQ0FOeV`^fUqryWCN3|@z0hEdHD7N~5;PYHFsyLZE97eGR; zc4%pHIQeB|00Xn*H<1gz^|Pf%xU^xsMF{Mwc?ZObgLjluYsIIQfq^^uw<}MA>k30jv>l;5Z!+9&=Oe))9u3*(+=ba5N zgVR&8ZSIwd*B|C{GGIkXt;Q|PpW|EQq|ezgsaBFNX~q@?Ki7sQ5u*@y`cC431=J{Vqvb~>SE(y^&6|XV=i4JoWSkKz8DEYqNWzBNX1d z&QJ%N$8B(|qo5IpZIMRrZ8IjRpXoC&3hKEN*WtWYqos`II+%5j9YsRqD(KX0F@6$i zl{fw_J9)WOkoEZGl8&a;F`)^56C3ny|5+VVY(0A=4;8K>RHE3>6}h>inW~GU6PV4+ z(Z%BTDbN2F;?PGIm0+aU#g5mrgwQ6{?~_@*AT2q!LKcFF0Z$j%fbAO zv9-YH7vs_|X8Vq=&F(6_{q{>jKJ2*nz~HAZCy^MJ^Y&!pAZ9Fw^L#ABxJ6T`c8JCQ z;;`|ml|f>@F7-k&vhdO(giah@^cm?uR`TW9igSy!jAQ~owS3ywDU>{OWt8hZoV$#} zRSd$Dq_ub5N#88FTe7i+11P%)jS8;*b)aD)kAf8rB=f%_&ag$UGCN+j#UJyR}rv z(97~kua2BOl+a2i>ynzkgj}WE6n+sfwJW;vt5{CXij3Hop z_Ikq8EFtbZ`&v;Q?fkT6$7VA4LmH~9zo-4H_gD58YDhqbe=^Am41Kh|plD=wO)hpGqEKhh@*EDJRC|NEf)kJt6b`CrE7s)~OP@b{74 zAHbjIRH!EYGT{3a`0JSCPiQk#YQGFUeg*%%6Zd@uX5Ft*e(llx8D#+DUpqFx2Kcqg{4;<&!EXWntT}&${_5y|LW!RK2l}hO{~F=% w9{ML90N8>?_=ltZ3jccw{43lV_!s!^2~kxM0s3|UfG5yB0BYKFWIwNQ595hUn2_^xn-dqPJm;9yJM)AVQ+|E*LdhF#0Hw=mgPY zv}g$letEzDUs+%N?>lGhb-c4=-- zZU`TLPXx@<)78li=HukyE{t-w7xI81osdEvo(Ok)4?ABsgu9Or($gCOvqw525I(w> zF@XS*e3B~v?KP2dR znOxTyoo2oi7XEt7IbZZ-OkjDYh$fsipfd1|{i?^;@tq@7J$|n3S?lmTdi%3;&@_I_ zm0-0_+8{8aQgsEos!YvO&(?1J_#K7N0_&KC^n{AnJ4>3B#}OZ`^jce5fYz}~+2*3N z&t5Xlm+}%bj8TsR$8-1-E7GII#sdXhtGneF?dHRbewq>#X4Mu=%`?Xk+)+zXi79ss zQ7-DTEqv5UMW5P`M1~BYHX4Cv%io<9YgBx1*D;cc)o$_g1+z3Z3Z^a{ns)^up8%x> zf)&((fC{XRX|@*T{exm}yQJ=-FkHH(rgZ5+vfmu*l+}v9$Dq=n6iHmeVnfTLXI$oCeSF@@;oRa5N<5h!nRK(r2wT6Lhy?H|+}1%cux?%A?m3WX}y7dSFgc zL*pQK+w%xf8FKtiItYw~WuHf{Mp7hzpMNBHKlizY386ArO13jKtI;T(odg?50PC@k`gR-TNKkX3q*(s-vlWTDyquKJ|6ni(9jKaFJ4a zSWbB)=t`X-RVb}R17T6(%G&&FMR(YQdE_Hk_2uIJ^0=4fp0{3xq)KaLx>A66tx;s# zuZuBzkZLuTQ>t3gw<$u0ehIls5wBZy4wg?ySa{K~7_-Exv;7}Gd~UqZ%Tt9mMr~IN z>^Ja<8J9seL{vRv*_BVC8tOfC{rSosuQLf@8J-M)<9L1s1UZ1l*Uh{e#}1p9?#6~$ zltSFK)%K>c5A0>>xZ$AqWS8Zz$g*oM2B8?hTF|8G+^FSZTI!YqWlv6Oj@5Me7h)PJ zQ$NqZ@^H)mHUev#kok%jzMw}ouoz{@o>_mK>UYNd0_a9M3(MOi|zanMV6B{Eh6#5pm*xHbgf|f>gR&*L8qBoX~!Iw)V;VM1pGU>$u05M zw8#N~Ze0N2lHUM34|kv6r@!2!cj;Zr{; zg{(5T((=7Bh)NLmp5*k0MBp{2Jb;I!+c$+lx0reF+(pProk+Js75{dVzu0;@VcC=u zgAMeym?>>dOj}W5U!|}6_HFeu!yYY?PU9l~p_-+U?foh6szYm=rBGRHP3}1CSstJH z!6>4v82oi{0Qm}83^se2ez#)6rKQt_+#+2^TK|pr>mIEj-mFvN!GS@3z+un+#=H+r z6KVAnPQ`^gCSw-&(5W-9%F>17R@VcK#K{qzaQu$Ql>BR=hQ;muNy!nE5sCIU@2OhO zqC25@wTTfOe6lqpD*atBdlo3*N*ym_RufOZJar#0k=TXZ;rHrntQGEodL}MG^zCa> z@4K;5GpG~Lbnkjg0xgs&;#+7Q@80d36MDpNv=ql_c3%1xa>Xi4!=+mH$}!=(hT<$G zOo2_)qx^{a&ulety-zZlOc%<(Uf7ezk%Af58BN;>;0Ps6DZ_-sE-SJ*q3>SSFGC8l zvwrAyuYS9(#Tc;SO70kMC`-cu!t_;?44+eOb~kSM%*xhte(1UL#D&x>zpuZHEt(hE zMcGfIMwTp{TI4M?@F@LY!V@AuYFk)2l>|Jk3k!Z)YV!CFWhTC}umX(&2jeN;HbuTF zI!cINJJE=bZb?l?@;!BgjpxIF_$an3f#n){gdD=H_}GrrE+g;SYqrCbN5JMp2PZ<2WBp;@mJHpv7O^pp z|4Pqns$3dTarHCnlV7+`O=r|0ux7(*lU44qsza~)7pHoWA!nBcQroE3Af~&@TguvQ z9c|8xP+)cIm>}J3K|KntvVJ}4nj&HNow1YAyN##z%f(?2wmzn!f3dDc)v=?!xBotN zO0GyGsb#Yd_0(AJiL3l!6+MyM)84iX;cL{qo}x{ElF*#cqI%k)~61-^;_Q(K@{8{4Rx zunE1XL?pM{lupua3Wmmoo_soGG9;70&>eejs0}(mMN^cY7K*Z54Z^9?5v}$mZ{ERNNl|#j zPgds6oZaV}OO~5uHlv@=Q_@#^Ba({Fc9QTp8^n|2CMj_cYsj6JeXbYx5-8h-k=W(4 ztQawB6I~DPKC>Ls80^i}Y$6V`zEHZ}PRBSq7`WnL4uh*>5af_hG7%ypy5XhD2wV$6 zG`HcG$r6ik+M!e!zAbL^^rsj;v@me!?Y4CJ9eb#(5Gg~tQxGMpRhKGSf(F#@2uTlsX2foj#78U+;-av^N-w|a z7A}#a#1v_j<9XsB;@1bjGKz3zI>QLhdJuXFs!K84XZ7`=w8<<-xY&szlI&f4^q1Uh z&La1>yAlCAs5Hx97lrLgv`@-h0s`Rg)gdSi>$TyrxlYTGqREsBQ|V?@Xl zBlHX>FZseJbr{-+QGGk_$)|d zG?#CAf0#pTdrUc5mF>PBwlZFo$Tc>$!ujlAI{QiA2SNPIc2n*>Sh(~zJ)px@6J`3I zI~Z2rJ!Fcn9VhSf?-05)mpH=MgzKN);ZFJLMy!n8wa=QF(L0!d$v5uqoNGe(<=O%t z=!Juov0LRueMgi zoIkRCMZV(vUPp!iYuK^#v<-_)ZG**o`#stL-L#^tF__0}hFct?fX>jvuSdN69(%HUxuym0u)yl;yZ%>R*7& zCgNNJHf_ejA68GU%PT1HepNdctIKYWP!Ie$SHUd&9|h^P5iieuCy; zFP7*vR*|>O05_@L(9k1F1#umbISQ%YMqe=FLYv7!+5#foyK zR}jre?JzS+UaI#{TzpVNP28nJZ=^)(k@bzkL@n&5lc@QbiLAG9UHIe%?k8dQj}KWr*Rfs_&qr5u#A%vVYc)r*RgF~MTNyVdG^I@8o&Ki1PQLC zdMI{fLbB$dGKU))bK}`Cwupx`isQ{v20X;^ESO*Zrc**PLz_t2U_r9ACSxXl3_I&2o~n+ zC+QA1M!$9EcajpUwcI{>_m;z#{7Z!kE89{GD5WLSSevdn5mwX}X%kB)@lv OFDK_^CUVpM_WlQNTI$OH literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test1.xls b/pandas/tests/io/data/excel/test1.xls new file mode 100644 index 0000000000000000000000000000000000000000..faf5dc84700c9342356499cb8de5751577fcecca GIT binary patch literal 28672 zcmeHw2UrwIw|32th9n?}Vq%D*vBwV7- zBcuwhdsR=tnUIGN#*hS}Mu@jCEH0k^Z#ujw8NxUyLopa$Zw`-efTluJNiJLshzTWM zOX_S$oeQYc$EkB~(vplIqS?tw$${M<7lGU#Ii6IyWz^Xm&K%N$Iv=L;+@k2tq$BwQ z(u-zipurRJMS~MWgd~wfkT4Plflk4J6vt6Kg($`cugmcPEyR)-G9I)NL-@oG&QTClkK!ATki;t16V1Tkl)VxftXdXO%}g^*Abr3+sonK(Meh9ZpV z+*3?2w>Jm5Lv1`mfr|s_41bW7nXe0Hm}I z9Po|}2dz&NAW(WZAbz5{h|<3-y+8q7QTm+<(r;Hl-=KiLO95SxJgXF>&rv{Egs(`R zeF|{?x8zx^0H2npjZA%=q|(dM4=SJ^RzTmcfKHcJ7N3@Xpm&i=HwH`!hvgq&h)F); z%z;JMm87q47dxnOX-Vj1WC+}v!ibQfYfIC0By_l;`IcehO9eI3>Fen%z8a8To;)xz zrg7@^N!y#DgkDC*!EIX!zMh0$FF$SnFm|SKat#cOwT+=(=yqx#p#wf$FLb+BgPvZV z{xv1(S@J_~Dk*OmA${ug(@H{Tr6SP8l=>;J9W;<8 zfGDpW^lD82QC>UPRW<=cdF?>MZ3>9;+G*D`5aqQ4DV+7 z<+TGIs40>tuN{8VK$O>xS<^t2*A7e}7{!5zJkLgZQ^gI?qG=$?Ysa!_Aj)gUs%ap~ zYsb22Aj)f}Q`10{*N#m+2+liliMmLp-@yl>3xA3lYG(zj^Xd#KG^oxC)Bck}AT2Ev_NaKsEm?kkzKl=;fj~wm zCQTv~(r9QxBtkK1DLi|YrW_$Tw_a0NcqyS|yL6bPidzqA3l@%~GvK%YADpjY)(?Ad z0aJ5E6GU}LaYel&QMrS3Z3>7;pd7@lDIjcD%~m3=l+#K?sbz{IYYHBatie77f+_Nq zC@NheV+#L!_Q|f4ZuP2=+h`<>L6pj=7fRMVx=VrN1}iWR(aT&0$TXHw!eR=HF(sxX z5|!4gN770Q3kyX=3?dPXqKVZbX%I}Bq(RivfQxL^^aLuiY6SJX>&LyQ9CIQ$7=0UsBsvC(m0SzJR^7+G2MUZjTi$P47N!s!aKF<`TC zk!549gbnSqC^l-s$ghuY$Y;}%&Bj%hjkOXsv|FRts0$-szkV&BjUk(jn=G3yO4!hj zk7A=Cj1;{umCweA&8Dj?8wVw9&^3zdh9``?`?5|xn^tT#?y_uLl(50~m|`}X!pMg; zx8$=iMmBEBEzK0zxHZAjO8pL;emS&-Z4b##}{rk}i*p4!jYAsa<`)>BZ_dJBTzC_LYS6O*~73Vn+#w zxUbY977d_aQEX5%7AVt-2zj02EFaXe0Tk?q4Qj;#W!elOI4h!hb+)8d4WM8xY|zdu zP^N`Ik&B6ZQ0oRzunjh-Jqwg+4}^?fTP`28Qv)bi0vpts1U3lqNV~x_W_kmg1;~jfDlY;m#PkFZCGj5 zVLMhsT0R*KJwZr)9{W^P$YagQqblM-p2@IHBas^WhLh>_IUG_oAcq4hhlWT;CWkby zR>ToT;H2NQgxIjSu;lo}>HHDGX~~&9Uuh_ASCKyHN2bBB0UWCGkL0yPF0O%#_= z6Gr+loA-^62XpT!4aI3IQYF3s63;StT!@fJ1R_f$Y5}teScn=R42y_~i{itKgH>D= zeW;%SAO*G{lp+cwqh}fjrvc5b?}!OB92&F+NJhKUmE0D@l?e_Wlnrhm2b}hz3cy8z zO}|$`K@i-H2@al=4c<}?IPFapfYSpEE(P9|2@W2Wb;=E@8JrHIw)I*bO-rKfN*khVi-6! zR+qqO4_!h40AooL!4A=-(UTm?lfx29-WR!d?y`~_)a>ulB-R6 z5?Ij=oSaNm@KhGlRsbDH6QD3qRg0qsVMfy+>@yS@> ztVIqL6AA(qPS=nsWkXU68K`%OtD#FPiJ?Uqz}7W*Egl?}7>8bs_TvI+LAlz{B?JS9 zM6g?lW0U8 zKp3On0>^1^U@7q!M{QA^2emmJS399a438EHlbw+=N5G)CAf>V|gW`&m^8O5pTMn-c z$zjwG2Oq+L*Uhjccr)V}(w9P`=fK}7*oJ!w68RtFlCR_-?tOrbwp2bZAbUgbfzTJW z>EeEX@c3Bt0qWqPB?p&&GF8ZapiVzXijJQg8=>G~OjKNaqD)R;*bhQ~Rf?fAeQ|;1 z#u#THhe4Qz5f``_4`I3p#O4uSJA$T9&cbyOTwlR;A6&zE#tY~I zxS+X;r>VP&LDOBzB|`?Qs2V|`k!(hu-_t9?Dr^tQ4dFW4Cq6D2)+|FuOivIdg*Z%` z6uWE1rGPU|`u*w#d`){|KPb1*lV|mO&V}>~n`RdOvBuK4nuzbDMACI0qZsV~a zb>O0$VfizjCPeua_D;`8G|kv@?VoPkOI@c}r+!Oqae4C4J8fPrTG8!MiBH87l|y?w zjOzX7$>-IU4>JxN6}SiN8sx)E+h3en_G;Vb{w2oV4i}Gfs&&|6yJ+L(A5M?>Y_hj{ ztg!3VuG{YM-)k>zd1kN9qR_vh_$KyGi+ZkK^y>DFA7YQ+O)pHJ={~kJ;FQQ{T&ns1p)@Q|_CwHhS+~E_ND2z>Vbini6<$p~$ za4EWKO7EiCb4uoI*=ofbJuPAW<{7sSh8`bsb#t%F&h7FC9CG@#rQX8Iz~jUAct>0< z(Z4pR-)*<`21 zC}AR6h)$`$))~IOXI09^sTq|X;m2(HjW4{k=h!FWliSwoLf*9vw=14JO7r;hzESzw z!MZ=#)x=9?cI^i6t+T&6tFN{E;`?gni=e5$jwp;$-ye}}a_UORKPQ5Vey%B>5jfH# zUa(~Q)cHsFcF+GVZ`bPnnUbu_0}8AEUfyZ*uuFAw`V2A#A`+u@#L@k^$b9L1D zKRf-s=40>0bLTqtnAK18`N?RHOVxbCO^M5QnR#5By=IKHQ_tPcyMKOYxxtNhI`8V` z+KJ@XW50(b-E8BYn9?@&x8b8=E}pwlxvMr!XJYC#-vw{Hl7{i`}V4$;@0p1i&DQ@a&k#@qJ~G%@9Payn;PzI*)hqG!=sL+|)z zHKC=I>J-!BS@D#)>-=wlsC+3{?>V@mf&RsAZ9M7egUi^c5FVy12>Ds-MPIqcwxKw*uRBhSA9_fD%YI&iw zZqrR}N7w!{miE+52)ryDHLY-v@tcy*7ti+gt}g95)zoTL+Jc={TY?`Bt6aXf-v_gP zi~4Hkj(fhn=4R*f>E$T_3xyToE>HaS9{Jd((^x%&tplwepVivpU7d0ELH5BV{NHaC zUVdj-aADiTa+R5p7Gb0LFZhB}WiF389LThpvOPC%b&k5g+a+;D`-0D9XLaYP20A(k z?)O1I-P;Z0|Nt zzu!F{>yWqMkv00YEoOO^jGZblyO~(G!0=S~fIZgv=ALC9wr}1(=}@}s?+yje+lK|O zdj9_A#XTVbxmT}_w%>K7&*{~hlHAVRobh#gn%8oB`?amN)b!67*JKD6MkcR$;{+cmVM#mT@ADQ}O)d@vi)J*rEJ=hm&(Z~Of6 zN$=sgm@Ex$a+^~pf?uBD{cv#b3GJ!J_Z)drwB(iZ&Fnz?wykew^V1ydRTkaNnqDx; zdf|_TMjThKE8~8@X1{RM8^^vQLdwV6ZoPBo-hp}T@|+9*S-0BKG)9x(Zoi({6C@OSN_d633K2?3!xShlK(Mr#GMCRSx$#=`gOzrwzloGr@-Z8Y! z z=61adPU%(I^(e3I7d_@i#p~1g#=V7Fy-nu^d2O!z*>uUyf7(7il%(Qc>gQ!xbNrFV z3$wYUM)p0ww+~oaN}S$GTXYQRUiMmw>uv`Q8IaSiuYKn;s-~* zwI5l>RXddzKc{q`Ge3IQj7+yl#m3XE_wVKe-dnsf zU39$Vl(bp)ud=)BP8g;2!YjW_>sakD51p6gVWW2I{Cu$WkWmpv!Dl`mouwMwCV5@R zt(WiCE%`DnX>d~b#fL7}TYnJxj|sgKJn@U;o8H`k&!%s7pHOl8ME~s}HR*#+n2$yBdFR@a4Rqm+F_}Cbmfq zjhLjftCzp&8vEkoITgEcz$rW8(7l?m;SfA&2jh`WtPux2%%Zw+#<)W~?DE$-?PN5! zZq8I*eEjvJ`YWvOF1)h4Ol9uluDb35%b~Ac>+Z4RIV=b}xhCdV_PP?oaF=_|(L37m zdOZL2r<_xhet(tjIm|dS{o`ArFZfgRwE#M$mw<1lG-&(kL;BA%oS_F{2v% zg+S@&<4B#Xg$dGH#$*T{kflId%AjF7;6vyip6W~e@Kl?NGkN;JEX!eU&2!k9q6=#y zI3wi3N`W4AZvbZktI$01b^E(({^N%TLMeI|if&7t=TMYXY8pmkTEPkqfqCmM@Z=Os z(Fkb;A&p8smpWV5r>+*@ugB=4T3}Ou|5nhMalaAXrg1mStr5@^MCv|f?X+<-b;W5AP0i{S84*}fM5jT-Dx0|3 z1JmCZAu4LYlZ+FiqCM%6H!V`G`J8=Ap>iRG9tp1)dnc%UK81llHr6Av^nNn;K6F-0 zLDW_Yvz}gYyCgtVl-ZM{icxW`Rgd^S$h%xreV;;Ap-^2CRZbrfJ`+T%FY;M;5;Qy3FoAB?s(!0?Jx)-);23hZKey zrGWtVlTrOkK8w>J>V~w(lU~d`N}-}PcoIIJt}S&6H7hMF{`$EW6oxv5+CR=BR)6VV zVhYMycQh&_xSZw#OFuwa_|Ef8C(b(x746BBc#2VR&0df6Ui9EYMF+YjG$_>Y#Lz8I zeyXKVH7HrnRejK}Hv1r^Agby5+^)gP-W63qSKtEGwG>tlgBX8(V51lIiO3I+2ok)E zkR{-?mPobOVaqxInhF{xE_dNlDp6enVpMC@Kp#UdaYgZ3lPE?uFf)8huF{u~KX?WZ zFtaC~@TE!993duL?Fql@0gz@#NIODGdEjvV-*<%QsIk_W4)b4ij+l>jhv*>a7&ckd z10BPLg$(q>x&=h0Q>ESCI4SqXbgH!bV>(q8 z8K+9SKc-Wq-CwGd`}1WuRoeYAovJE@%5U@rO@nhUo+hRz2a0{sjd3K|?8Q-c`FUIFeY z8m`ctvHD$#D5u}$!gn61eiz#>tp+tUCH-tu<`yXBOJ70?2K-;DdmKzi>RfXD*Xtg2 zfV%@4>RyHLX@g+QCD=5qZ6=MbT@LZ0Y(-)fUet>lj3J;GHJhPhXg{#eRop?L{a}ZV z&k)%Q;*uDq#5oTA3{&FU+zLp!3$8Zs#-s~vLukIl6HnqpcI+m$A@t5@LpWOFa2l~8 z3EF@7^*c;gY2`~76gBl=-iG&|1EckKuHX^dPWvmJfOY@^_D=ZR69V?t(2A(0J`S$< zGB^POzFEzJfR&vG0pEOL#bGr+hD`W7Q>@-#VG`dzd~i%$oRHd?hIDir)W@daigA27 z<{uND7@rg$nauYO3y+CQj!%l__nQ_jjD=^O5bhhNWLI=8<=}m4*|w-3)5vIcKR!`Vg7IT zc@NJ2A49-}Je>azfq)A(qafg?!6G2wJ^-AP+O0?zwqLcm?rb0Oe*z)}c05Q-r% z*8^Z#fI8@TyFP`51Zr=AoNGs5f_ZQ~g5C_@aO=`=>)LSZMsy$#^2Buo#0`#5oS1|g z>656nFo_88r1@ypK(hv#HPEbqW(_oJpjiXW8fex)vj&IAI3R1 zPJVHh4eqGGIW@kA$GJXkslmB2&e?JPj$1Zx-j8#9+?0Xu{BfQy*-(#jeOwQ~FMXRp zz#oa@XR`4m_x9DULFEMMlc7k97p)-Uo5NsjXLBQt@aCL;> z1i={seg_42IJrT<=ieaUwfSgV1H<5B3;6vAK76SMKaGiB=lSm(#zc+U;*6Zoj`%!0 z(`!3$=}$#iC#)L@DHymX!q-s7Q*um{kehsk|NpC=Kg!V5j3xu=lxk!+_2~)x#tD`k zKNS;6$%K@K?^P+nwE_OvhOqrKmA@5&VdH^Ajr0X>_(lq!Ezr5&`KBNdb`#i|!UdxaJt*sLUPeiSh4{qh~NM3`Y-lCePWAp8#gF)OTJzz zk=?oqH<&=L3mDpt&IDYbB62I0k7vPq({SBhfQjGEY}O=MTj}%h;d1Zm?SeAJ&*;FL zmb_!j*OUCUl#^Z4D#w$L_re*$k4J|EaVXM(Rp~>kJ=^e)Kw@?MeqF-6CNdN)V50P< zsP0scL6Ld>AUCODF}@HwbY-72#EC!fp_Q#^#n3jM(~*@aPnng+qaH7-TTUY4gWQ`KFJIz9Zg_h8H-QxL z%@*q-k;#}c(uViGVjSJ`f{;vGJQN{uJbC8yAQ)#9>Tr(*SB!i$hV=3J?(D!=O_!cP zohTRSokB78H0gTW-RXXPPmB-!09V%W+m(6OD`J3q&LBSfI+<_mGZrwH_<7OcdfYb+ z(oLx4YK4zbB*GZ4%FPw9aZp?7x(VDqW_~k+LAi(FFf=EYM}uJH*%>N8^(Sdd1%Bu_ zMBGtCC>IAIZ6hbRtuq(r_wWBn`oCDmKfHQm+~9XfcYc@j;_)YYd90k;I!OL30UQ22 zqy6RDhj|RJ!vk~XBEDzTr(3&&S@p96Z;KiQrP<2@@2Lfv7Ix^4Yi$qJmpy`YNWXr= z62&g)6l~z*_uOYWR{ctN!@e4vl$?D<^qc6{Ilt+n8U5FjI z@;oC-evX{t^0w&EN7vl5@{R|@6gPtPP>enb-MfvV_YTixHW%JiF0vc;jwm#!Ta~Ir zBsD04rpWKNIx%NvWu-wZfhK4G03`qm=x)pPhg94goNP=T9BepEt?d6L8z5pOMEv%@ z`>IdqL(qmm=1~45=$h980|$DJ@GKHhwM75rwzg3Y?kaWCs!?ookbFfSC)cy+i5qL2YO{Oo z8R2xm+S*g<<%IDGg>aYXUZop$!d3=WY;%1lEbNO`R1PnWo&YBm<2P`C7Dgrh)S+`i zXQicmRXsa~%jMK;UY&tWHqQB!L&9oSY&9s}5nlvv$#6YH@xAhqi2*b^Y`kOsaQd1O ztondRk&>vh+yrH(^sU7(W6ENdNgQK}l03LdYE5zrbytgP(&&E8^G(+Gi(*OMqVe4g zrSIZi;q}bNbKm_k#!9ue6|z0q72|^lw7-U7Y0hcwPYAwvB4~q;_>Z91kHoUJFu!BW zX9`1Ag!!UN;i1yU@T10m1eL&^(FR>2poD_C`^^{C05@SS#Z^!q?pXiBM<+R$V}Uq-4I-%o9ReG^#xt@`r^#&`JuuNdcAumvv<4@)&b zD$Jx7K>vn@BNFsoE5EuWe&!%W9^o)6bN~Q+;TGpRaJY*z*ZDu5@4~Y-H&5wj@r?_4 z&=LRyeUk!)@rI}Yr0@D;L5zV5+Ona_vM+`Dje#uh0r?s+XcpYX+1;p*!8ya-viG1o zHBg?S?{3?H@+mGEo0_Sbx%c*)_2X6zqm;nuBqdO#fk6!YOcmHEEs*DxyUp$p#&uCe z)kKJz>y?yvSnj$gWFG<`3K0U@qxOSE`C;oplg-$C=q9L_Q!uLosMB$|AAa5PbODV~1f2qidMJHCZ(>?%Ie{q%7C zxQ3?fJ~xN^x(=-JjM`{}ahaYbW?iN6iW{?{1?wCB6q9==Avct#qac%9Q_!VJ^!T?2 z;Da`*ur6OrlH*yLtAcfB9X<@l55rUNHxzyFNCGC`4OzOmrS?cM9CIn3xbhaQu@{_X zW1#{?516wj{5l$i1r`S67>>0Xy$z2w7MbHtJtjMEQgbb%$QaLkXlx;^_dL*0JeE*E zL))Hs=WlxUItt&<^WG-vDMrNEBe`GdZ}bTi7iiQK?KyE3x!_D)l*CRtywp*{rUJh!3dx2-$yMNEV2VVr6 zTM)OX5Td02g?)d7{|gSD2Y=hamK*Kd1Q_#m^CH17r&l*6q|-Rv2|N?`ft}`480o1o zXjbp$W?VWxh_{{L^4>mpk;?U&u}D|&-gPipp;))+`_r#qx6ukTiB!_D^#xxv!|R(a z&$;JK-GJjhNhy%8tQbpsL>Qqjq`t~mSu;=~q;3c0O6m3)Zgt;0mg{fA1v@D6df<3` zX67EJukG_&2j3pcb>7I39r3{16em60V8mK7BO3IoX5qTc;e7%cG{Euy;4dKl@Ezj) ze$^kLmb;%CLVvV>uRrKN=&v&92mOisq(A>f5C0S#wtlRowszQpK_tj75>#v(2~zY1 z;c5t_On^tgk9c;$&b-`?+`9fX+%-_%39JAx#1Vr9h++%c=m!xK>RpEDQ83UBg9sXo zPho^HPbWQ$MPrCuuP_myqn(S>@&|F^eHUlNnlV)j9#hO<^GE1yua#ytK>RN87cLUw z!?{OV0E-k#E(2@GPzS9e<$+z4mq2Uok*a0)19saOL9f!X|oCZw}F4FM2fNBD6s<2YUYO$ zeyX$n)R}(HTe`|cP!RAd_lZ`IVm6NgQ14JG2rlYJP;0KDBFs=oicJwDj~W6`o$!w| z4FzRKsb@bTu^mnYt%5vY=rSv>XgQriCbv2v{e8_Sd_dDCTDI{piIjNYVwfUEHb~lr zE>bNZ2CXi;IY>Q^`P_3B*Q%N!(cKf`TAu zluZlVMXfBq_gPU`Sth-s#}&|zVf#Xo4py9l*GI^qVno+UtpjepH+&OSQy5yCuwx&%Oynw9XD8kj zH9s0|@$TxJ-K|o-L!N4_*B;cQd=*=zGM+XWN1QLwjkF4$%hk#njiQ)co-ntito~8~ zCVWl3K`{8f{keCCUyc^`m|u2d@JcY4aT*U$h^M5ZdXVi<)=5@#?KJEz+cKVI6dN;8 z4B^;c!&VD9L1O?W0MH5fUavaOrd;m8?ciMJ-_Y;%s`Xm~5trr2nz$DI?tJB3d#X

    ghjW4ZOG zpS0Z)HLEN!s?C!rTHY9ZZP01*+#2;#(GqYFk9gu1`Vw+=^~>mG(syk=2?O#~D_^3S z^VItegQa^wYDx1^O`WYxK5Ow!Gt!tmtC+CPUr(P zZo8L!i9&8UTcBV`@9{88R~SQMayEEGEM>5V#Ebaxa8s?CSpIgJd=5rILUf|Ft-_u4 z^_pS=Q;x3*UY235-YJ%yubIE3?p+TL4(1Z}l{q_F(iiAkfe`09MVHVD4Ev?tX5tnp zT@^Z7V94UbYRw)R*tY>B*1a%so&fVZ`B3^7Xy{t?c8pDy><58t{D!us)Q?6E0_tO^AU5C&w8W7t{re6V~6 zTWv-zA;WQjI#vcP;JNfAV(!~dv}6;7Pjs?i;#SeQsF5J64xE&*OxlhvVh!b_st@nb zOB;b6$6xo;lf@f6j&{>^%IE4&boifZ!+WT6%}shwcV^$6oeq%CZHfCHcO{?67tVF! z5T9*k`S~37m2YX_vvVr$96Y6&JKbxWEa!C9pdt?BAZcEZwq=8 zeaI(>c*_XS2kM$Q7S|^k*(qrl;*va0u~g+pBki~$XewOX@HxlO5XTxELh7iciNii90n^|lT?Bn^udJNsf zX(yZx?-RF}$(J!brh|^miD zMD&d+yFS~mtafQNg)G^8V1{k|9A{9Hl>st7;Tx>ua4tsP(vHN3QYSMYHh>#L!w%3DtRX&aJR zRrXjclYKGVOw|e_)C_6q5^x5e0vuWAm^bXlELW%S$T%MReki6UgK->UpDZ5C-}mgo zM3~} zk#|{?um%79RpHSUnLD?~#y46YlU;{nU#AJymdKm*q=|U?m2&Z!GpbU*q6G0){Re?h zw88x(93tPSlq26m4JFsH_MhSmq2I1kWKKr z^WDTqTcOH*sbv#G@U3QXDn|R8?PBxlv-lnFsKQ~FI%w|I%tpyj73Yv$eGcDy6>;Y5 zj)#J2nO@8;9W=C9l@+(NFfs^%?1tt@Ei4v}f^gSo#^h(qaFM@$}&X+QL*)F{RmQ*5?g9_w;=v4H=u~U!GEF9ipJOWNRy0sA)vH zy{+h-oGdaB;m0`v)JBk8qF)o>w|>5!G>*0G!tMUJ7vE`G4M%EM$><43X?g#EzMbo5 zlDICws~OTn$vUFfu0nyKcS^kC`0Sd=JeaCWfhu$1@lj{>M6N-;9P7z!rH;DTpClfS zVAt)d3xL)}O>|2^p0(tvy5=N0ZF{eL(R9ErnFdp9JDCy3?#?%z&3YGj-jeUam2Ml& z=-3={ZC*!FSoX-{w;rEvkbPpX6?LN}dz`KI_@SZIn-u}fJPC2zS~)K7EybW9`vw}m z&{FlhsjcKdS(zb++M5H}+G`jC_BLP_BXc`YiKr4yDo1mD+IZ>NZt_}MtWp!FXg|M#8z~ET15o?ZX+=FS0 zq_~P_x@74?ehZQZjaxK!M|REb@HAFI77f9GmsBqezL^!SL=^Sc?#7k$JMW!6aeZe< zXC}miS0jMaCpp^EHMpH)16&uJc$(Ge@LLZQiM7VGBktrOGMc|Bb)3Jutx6qsD~uqc zq5XEd)bN9ciF!Cz>qIAHJ0iK+*_m85+xJWCYBf)A2JEZA9RYPOCsQwP76fzDzHXGj zZh49pK`q0>{#@_1b8DP1bGfjJGqem8HFEmdIf9L!=B2faE`cYog)eqPia0yEZg~NN zz?4O2EDNTM_GXMt;SQ8suaKS)k+!qmQgX5u&+j4YE#8bBv zzil;9j0p@%zwiutFdxf8jyotVM0T5KiQ+vX|NS*vn=D3m{Ez0I?{DW`=KuHHH_kfY zFCpfh7_mk8o4G%KcW>o7dlU%})fO$xF(yxvPcw${LFgvIyTH%X3`Fv8c8Ew$RIT+` zh?lqmyWGU+63iL4*88WbeMxN}pBiF=+qN@?B zp00I&wep>E!FaJ}8|km3F+eH>l$imdukc3TD*K;lMBLN!+Mn?8V^A0NOrL2slx97G zZqbWO6^D2fn9eV|Oa_MG)jLL42+tK7W>m|Ez%`cD^P~ z9t!s6aQE{{=y04d00kYAgaVE64}I$=ixGOHN+^rrQiPMRin$OCyN#uJL>PvX>d!|& z&VyijRhpOR;sGAn?yUJ!Oe-AvU!4~z`Vx#iohv4 zr92X|GiymZCbp?b`V}*zgzjp53cTeuqJJCOXsO?9LeTFBOuA%2VISaojbMn7^^1Rr z*u6O*T33t|u$NMi3GK|CW2N=PBS%B_wd7QRQ)blNF54EbyN)!GRFU@#R6Z3&y#Gvm z(&*{AQT$cCwr;_HSl6-l8n^x@7mnSTIy>n|xi;4TAzs%d*M?ZgX4+om$QZHXQbV^- zq~BW9q}ECcM3g-IfK<6x_?2IG5(_S>(+5mm@eE7z8->BLZ=(;6s*mlueO0NXPUINc z@ymI6@`f_vjHN|^!<>glUGhx`!u_g%a?ZrqB*Yz4M1&ytk%}gcj{jrgclrKtWyH7G zPk%pqfqH-!y7@4Jk0lhQ4b8voIZ**nsHDU@y;(;qKUe)#aY7FFUab zBd0k#w=gdmfI?CRonj>_DqT#B!U|`{(^crCcyjo}Fj(3NRcF?sX-1ej$cTiRBRNpW za+$1k)N@O7pn_&X73v)$;03-7cnO>3d)ldS43r+3NADB_34)1w(+_M}WV^KF&TRB# zr+44)Zcgx2XlG52iP(e{j=alMA~chkOb$XXU)0o8e2#vcMn!$EY>`h(-$t}#Qdg&w zp2dT;NRIn-n7@;8q&QCtGs(2~&FRNl4(WqF$ev)xilP$}f!V&D8Bcqo%#E*>id?#= z1Z+LHW_Pg)8{STE>^t3=pMO;;0G7~wAv+`V*i!cu~}G1x;c!){O@>vN_8 zrqxcP%W)$g9+cXBf;R6Ay)H`0rZqOid2b+u+}ql1r-dM6)7e0CdWwPND4+RMT{YS} zZP6?Ko?YveUX4Nh96zPO56MHjb+>%eccoHC66(G2PJbKCMOY{o*$~rp9r5|cXnwxn z{}j!sL}CPJ*jl>*fv;eOPy!mvg(r%C(>EhjLTUWov4H-&r<|vhKRtzj{*R|*nMnNv{U!eFCoTL0w?ng#IuCiYsTqpbk;jfxIvMF-4_`!c8m;wym`F~+UWkDBGh==j)m#|ZU-wP-{RQvt4`;(T zyXH81jahS5%~dkuz#zx~U;q#R008&^npd4jo`3)Vh+qH!NB|H(>Vno*4u)0^+6t~V zhW45?E|wN}IUqn}SpY!q&;R%Of4l;t@l#UWbjX3{lI}tI4Mvn%^!;>=GOY@Tmw;kl z`ca$f56K~{&s70)!T5nfRxOT~f4pf+FL$hH&vK;r4GgEI*VUkkcZ!x!Fb*xP9FVu^ zLxSnXPtNm=CwiA|w6q>XCc@EWoB6R*3fkPI=Tc>(CaZ!68bKYKBbXeJu0pb-AZ=zS z9Vd#L10_cX3j5tON67@Izzyl}cPJC#Fjarot*g8oPO*_vmIEQ!wa@iz%rg%Pr`W-u zq6|{@@48nCGTL+Yb6kV2>sN+~&A!-Do%QcROYKnS#a8DWa|&aMXD+CW&SInj6i|Xb zJnamw0!u|oSb{&Z2~8N>DR}`OIX{)QlvO~CPpC;v&CLFRr+{4IIDbOqi90yOjud&! zxYhVT8p+2B=i-jz#gsFe>F2Fm1Ss9Ql5oJYFXtjt(IwB4fwft3O9R&|ENG1_7wF2? z3Y`{hRxB?fuqg=omA;M|ga@1aEz*jU39^LG@^X)U>I?2W}Z?~CB!w``<3G1E90+SwX9l=K1-OSgIvoaS!kZc{|WT*;i;q9{rlzh#RLZ4e62TnbgePtYi1KqBR11)_4N_^9lrVVyAN96-#lO_%A2s8 zR_bNiaS*s_8(4Jy5K8MndU2%bg`iQSDrXq+nMWGTbuvf-TufK!1pcg{n`KD zS8IHi*)PucwfRkX*v2}op*y;f`)M_t0ZQcbW`|%?IjXOFl2khR#A*uAfPKDy<0V>J zS#w?qmcCZtEWr!WgEz5Po~@U=uyyxv0pl;V%*Hg9<&OesIeE2u3XIsZ36WTffuKl4 zM&CR8BsL6NpGvTh4Vy6ipkxo>LCl%))ggc(VkhF-iy05oG*mFV6E_qkSbqa_|E*3_ zb)Nl9)6;6|3Ksfd_c;WH09^Kz(f~9Gc__O&6o`Isg-+VOJ9-Ky%=tEZ!2uK_zffm7 zfHO|2iwL@hNS-cDb)eS+De<+Mj;UAvHp+h4-bKvC5m$gcJ&sz1mMiMUTJZaU{-EbO z1O85>?@&@(e!u_#;qO(`hxaGmsq|M8l`3jlr?Vk@Xq&zPZ)Ys-nF9nU^w60Q)+xpp z9PSJQN>>6U*GU@Xe0ytK80CwVsui1wMEBB)oM_{@vwPHCV43HR3k{>G8-lE-o=?cx zUb8gwYNJRWtdB3y{De7GHS4J}{AzQ1r(LLye{SX9SObtHU!-ZiMha0>ZA7g|nq!Q8 zV&QL56j;>ysW>TIsn|%O0oINlPFCF2_#6T80Trfd=^}PF2TN`KkUQ7_c8*Vbu2mUH zO}!noJpan-AVaQ}n{V|Z@Hhxzwtks`z2nqBj}^ADjnJnq?!y0q6dl8v1F}9c(#(}S z01?vzXwpRbanQ>b0iRZ&k~6YvCEA^6iJR4#ttNJhQ#F85=^jM*!a5frdj4Aii0_?L zt9g6ysK3}Xyi%r-QM6B&aVdI)~ z+C0T7SZoXrP=UK$!T@@R1tHEu>pk%Af25+z$45Cz% zX!lUm)JSf*G~de0_*se~+9_;sEL~@8mZyWD5=cZ)jwGAMGr%@bwt*BC_uGv0$ z@;B5nb@9PRaPPd_?+*glrbsNePEPw?y1pWu?L)T(tpG_X1R3xI9S6U9gol1=bqGs9 zGFo3AV&t{SAyeb{gbwbCwaXr|eZCn06DK#xzW@%3K015brd-c6wN@)=!*FqQ_$=an z&tC9WlycjSDJ@ynvS(3}S*uqte};KZQ3V=6C2+B)n;Kbj+ISbC+$?43jPR&arTr9i zxq<-^2ajyR))J^<#w)Cavo-i3BOuDY&?e&Tb=mm>Xy2zngL=)72T@=GJ>nBeazxu) zM94)SvKr9b$~8$NU1A8kV5V|!qk{n~vt@nYrNwEx8EkfDnJ$<5mkPC~iPHB!o^r!U zz{Qs1qkRi7$wfd~sw@+7D8b3`=aee-4S=tgq`VZ|PVn*BdL!>$nmXx0%uhihareTADs|-1S%9o^7|*Ipv9^fv(z}(`<4#W8QzvSE^BcO!4>H$Qu*@0R7K= zwKp*|bg-xWbz}Oe{^{{6k!f`Bf#;Mr*i;*&d+^noAf<@0ig!73iZ?c=$@nUO5|*S( zigV8A$xJ7EHV9=P-TghRZ!Vs3TwJWoOFk@eb>ue#IK-1sii1uW)3_VBB45eq6B|G) z6rssI00nWYqi|!}rZMFKReb&D^J=T>!#*gWoeL|uctm&uhClBTEdDiQFB@&l*W?=g`vaMpRAR0xn|e6O zUD&SP+5Xn_*VlYiInk5Kl28*ed5#}DTzORDZg0==m8{nNy-T+Fv;EXux)>iI!OY;t z(fxPy!P#dW#q7O&6JOT^*U)1ublS0TAEMd@Vgt%9N;hT6v%aq^d&)me z;4LKdHON!Vxf0W8q3VF7BKL*+@jv*gXA-NOLf)>35Mw(R4n(ctXg<-2`$K26EXM1p z!<$q#Ywmr4T*=nEi#l}?iR^&8ZLLI#yo`El2__56#J@wz*j9E1E) z#W7o@t*>yEmh0#<31Uk-y-Sw&&PH$lNWd;M_;x990DykHpMCYO4a332(9)3h*C+i? z?wzQPg<-NHcc4A;!q_`MGw(zZuWwFR#H|q-WW;0D)t|^JF*3xqVj=+%v){;(<>e;{ zSaZiu^8v%QT}MF@)gIzbk&Z1AmhBRyS%|4Q!G?*vwiOoMWqP{(aJZaqOLh}UhVG6{ z)NYr#;!Y1=OeUI1^fG57V{(@kjSojKgtU5qrg|1%`RZwhU^xUaun7DhWRabS` zNQ8F5R0XZF-WZxBblg>@?5_^H%%<@m{M5eWYXg7N7`QopQR}FmE(A1T@=YkHU*tGW zu!`1&m3f@YZc#p096?>5)YhzN!Y+;qSjbZFstm-1Y2N$MA`)2P7@eBl_bs!>%PNM=jteE0`1&_~b0byu89YbD% zrcQ_orrfQIUhj}Z{WE3O94j7eCij=ui^Cs59o$~m2M;~k)lGF2;#T0q^73bMQQB`W zFKFx99j_Nhm#l}gq?g^j9nTLVNgZ#O95a~acCPJH~Z32*vl(e*vS|8 zAypUUTl(iDVY)tWPnUqa8NGA@y|H`<2TtG4Mh1bj)bWnG9V<_}SFlw2L0h>x%$J&R zFPn%WQ!0e3Kv(s{ozR^*Hj7h7E-q{`CD!78G=;Q;Ovad>(TJ4--0d@FDR0XyMN}H@ z!GhGUns2nO#ia5HR6REAui!B_YN&kzgR=@84&)C%eak8~U2`7d9M zty@;~d^4Zpb~hl@eg7SFp20=C{UatcvL|xvLiEho?O_Jr8SK2eL`ceMiGmj)htl}TO3k^QDAWdfZ9>=Ut4%E58Pn{*vO!`Nc;d2+iX2uj1d z%XrI5V{2Pq7gVwkgpIrh452kKO}5>11-FbyILS?Bpk3VWVKd|lpa*oZEgheaBOp|b zliX0x+%;_|J*{g7bhOvGPuoF0x|@B(yKT4~gO@e;<ni>JbH+&txnTK~=M4Zm4RtGmxI$@$emrgUr7E+c;Y*$8M~ zi-5BSNb6KMINi4u*=EuvGr)0z`&8y+s?~Tx>K$7WUQ&kTs7V+#LBqAq^iYW#1s6t= zEd9J7M@p2hlVwj*KU|_BW~7XpN}E%Qph06FT*x&d1e3%H4%E2M*UM&)Jr5L|Rg=KT zmS)g(C5$j-9nodA;RKMijQzEiQdFiKRHkH(lCsr=ZFiyQEKyKgc5gR7we1G+FY^Z) zVa4z3qZHPrw1$^Mji(5^#W5D80LtjmGU7!!$S*b9x|%tf7Po!Mqb3Y@sEMGPrt?MH zqT(D&d{XUiP6?W2kZ#gz!n9*NiuizF{)vFJ(T9XYezKR->Y>gIqJe^t$@~H70KI3y z1+0m1Rvj%zzR~I9=S-`6XWhh)Xw^YN@f;d)Qo~^MG09)zd#Saly%J!Vrm%7>DV4q9 zdDM1N`6zMIY~dX^2VEGt2u%am&}B2FaDBGq%WYnZ`!=vkiI%Xs+i)YI+JHIsSrPJ7 zRA?!|BSXBy{Opd#=zY>EQDSxA)i-B+3q}Nz+=Z5XUO!tfn`Y4WO9pVmyIKRV|TnWxrQ{-E{NKxCBo6|JG zAIto>kzJ`}q=JBvo@KeWbQEUezAPu@9n2V2bL68s z7R$JQ9A0gmGXi(^;c~O_Vk?w8kP)y>A|!j9#vF`i(Ft3PQ;mSwXk{m*e)A-^0ACYZ zq>jhX?SNPClgmb^F9~o$R~mV#)Hrga_^_!HBdY;7bNmRXp*#7>$+Ti%f#z&ymwGG! zAuLQndlap1>v6kXW69n2904!msctw+)zZl!OVi7H)aYO6Hgqs?M%K9m0&*&@)%{8+d{k=eGvTm#h~&N`0MOau0)cfIG96`nP{< z{fn$CXt3Ywznb^f|3B1!jDOUBD|r8^|KRUf`q@wVtJ}m_s+!Rfii?BHeXAn$!lA$C z6`!wH&9H^!h)s#rgFIi*ztO>WU@1g6b*mJmfyncAiZW^JSCb<>zo_qV>lYp7ZU{^KkULv$31y5+Uhv%t2G@of2%IhmXYEfS{P-g~Mm00I6fD;6(lgI|&ofECbD*;cy@i|6a5!-8Bo~Wm>A2;N7bib@cWZ1QjE*aM_lny+Nl&jw z+5+5~)qNKrL<&sV+9`bQFA1)aM1y2$n7O&p{z~`hw z9qc2Xsk`Y{A64xdZ5TC1N-f71`Mq-A5kO}`G|-j#vyyc$#OhM4aH%eJWh_}w4XBZ9 z3SDPjECEf1f$7BqZeQR6n>4j~O*D)bYh8>VR!3NSK*PQ)M zsOx4U=9Ug6W#x7I>aYx6A%-j_AQ~A+F4arba_&Y2@e!Zu$Q~8AbPc4MpxjM3*PI~T zQgv$Q$4wp26ff8y=y2|3XaeTXR(%9Xd+g(0--4DpqNo1u*z67fUp4u@W zL*zfB9{I#oa~rO4CM`(IQ)(>o4R6B$k7|CF%>t#h9m~oIj}6^ILfQmnG*P#rqC#vB zF9%JPAFbPkl6WBl8^+qC7F{~>J2=wzf%;HxL&4VgGTn9cV09T*xF9^UBswy(Iy_SI z-H?)T^f_PBiNnWhO>#e)-8&nwJMwRG7ty|{kGM=u^g=c_HE1H;7qeu+K0RFh2BIii z)<}47YxkxLjZL{l*7FuN)Unz(fdjQMJprLt(50d&>fa)t!yZ z@<0nri`P3yHbF2s9#I2OPPWC_3** z8*7D-8hlS?W88;*)3O9h*&eUZ;g8?JpA#OGYkE+ z8fAMXIA6fNJW;63E*L)>=joy6AD4~~AXsA9KCTXNGxpiMM5w)x%|L8w49)J^mGV}-LE9BXn`usSI|#8yHt`Oft3|rKY)#o3xg%A& zl=8*PUHJO^c?FQ5C+R%OYB482tjq1GDge?)d-h}Ime+ZM^+L-N0i4p76AG1;Dq?Be zDUB$q^U7MeN|3F~o#n08bH6z~m-XcKl->-Ki~MNviRkN=-pGZ;*5QX@fK&E6$5xmP zQdxB523=&TLo@M6#{wH1&6%V@4ZEE8G37JEw25Imbrk-KACAb&tIhcE_M1MA9tsb`%5c+i>1hB0i&Fwb_NC&8 zOsjLrXGrQv<+}#g=M9fG>v@RAidYWIUka;5Ap{7>XIh~+13!Cw2wFS;FrHKwH@kqKtl;_5+WiDL~l>j|ZX30lki zp7yoj8qJlI zuvfH%#^AHVTwI|$EA*Qx4eJQF4;ligI0pGJPEKE-B|#p2AgP30!7J?kj3yl17e#){ z+nNhN>3Ir?y%`o>ZTlb7Fy+8WR&R6xL+L&kY%uOz4f8y1o5n8w-c_c6ne<~CuNH{~ zWJX&+f_rHlD+I3?Vnj(8J;&|q1rgWQj+K&_HMM=(598R`WRoYOJRTIQ=eN6Pak!Ci znNZPZHH=GbWfF1xCfO;;_Y5i2-bk-thG%HF1~eY0&Ajs4Dzd(0BwR4{1-5Cc@`3!f zvux_O0V|1|)%BN(N7E0r^|XLOP?KZiH_*t^-n&M$lm0<80MO5{5_3PWB;$-_zGe(0 z_Rza?*JDJ+99P}gglqL99-K`R2%WB@;&RQ0T~W(*g*4MN;A#G-QVNFXqfd}CxgU&4 zhZE876b+4^4gQjhLw>A-5L%=I0v*6Mbp>YQ0k(~9PhBJc)Va_|IaG;vmUvPnIFyTn zR+|8$&KidS-5reaP;&y05JsP^B8!h8+|FJXDeP1>E`zM^gF+%I%aq#e2<`(TNR?8c zVsZ)BBsbJ9m_GogFfIFvdSXE}6H1xa$l4ARZEkxoYnov%ed$Wkn?lTMTEZe$!aL3(S{pK+I%gb8!u)pPz;w zL*k?PDLlMWU|yaYRKUU%MC}TO+1fZK`80ht5z6ci|Ja;zw0 zG4tpRwLK@E3km&>x6h1kgwjb0^Z?LvVQQCuty6^3SaV@FM7Sc>(bn7Ns%N--98Lue zs2PDwd6+O6I$|Q!?fLea6I*{sl1i}SkFB&G7oKxFY*Y}!AUC@0DBE!0UqB305Ee5I z8d`78-JLT(es~9<62`1PYU|TU<%^XU>rb-{x)g_z1$iRzgS`%1!~qkR5i9DdnG&;5 zW@#n9@4*ME!a#&+>G*!H6x|}<&PgSG*!7$`AJl5*+kLXah|yZwB8b&zb9yrLb#)rI zZ@%ktWdnOFf4=h)GDryhq|EUp!Yi(^m1N@m;Q1ef=h<=u9_w9r(*LvYF#M{n--Tye z7~UhQ_D$zu&1r)cLIrkH_7-0bnRv-qAEOeiCFHb;e7K`Lri^?xr=J{+1rwV<;`n0_Wvo!pi=wPVC%Y~D>nkXV#MFaQgX_*+-kKwEP ziemlvJox5|uWBMVEwej>`(dTT-BU}#FhPZ-euE`UN?iNi-L+#*92Kvz+r(P|Q!;U# zD@D4ubfsz@${2ufj>p@ z+4alIRfi4F8gf>pKFX{SC-nIR(VQ;BSEtv97P^VC0}dM}4fUr@pbedmARjU8>6#dY zC!g(82-!(fq&Lcg{O>Z1y(;`1=acif-Dz1)aZ-WZ^!4kPcCuf3c;3uH*V+TLL2c~! zk}b593x>K9D%~W;^UJGQ(xlCwu)I6bn-7zloVt)+yKwhsR{7Dq0~BE4T~l*SuR!wm z3K#9LdXlQ<*o0~H57~q#sz8c4lOVb`GzfGih#4w+ZNtA1N9uP|+G52mh=XvhscmQU zG@oc6~)mS6Fo% z{IcPoO38SiVj3JxKlo&J`ZG#aO>P70W)oYX&g+U$6)g}zdm<*9_Myn(Vd`OnPZR=6 z$%O|X%TbIXw5~7bGIkaZ_*AEJyo*YGKOnb#A@?s)G1nWr{NNvOY0GFBge`w-pdK7o zi3iM|tWR7^IjQXIZJQ+?4JPYe&J8p=9v!bBo^6BSnpF#)ts*M5Sq53zrg7HYb%iAK z{tmN@YXVL}{LlqN)8S^YUpypNaUfDZkFx2I#jU5SW!P=0c)uzzE=EhyRG3fWqX^@b zZdc;GcJ`eOI_yMu*e}w{Pe?#J+R-y*>xhX`_0~1jVA=cll--oS5S;%85bY*m9|EG`06XzXILS+i?j zRe}e47D}WE2*I>{p>d#Xg%KMbcfjkyvik;6$;FAeP;!i{Smk1issPo9mPqrVssN=1 zT;j8fOS@Whv@ouzlwCauFMAKfsVbI>vd}55I9@k$4C%OWxv|GBf##3wS2)bQ<&z$} zQqjp2(z(5>GJ)rolH&LazpoY`?&FM!*SgO)#s0`$#$bc~vevIb{~ zRl>IBV8YT9zE#H96f@xUsRXAK0-y5T2U%R{?}2YgWIMve=aG*X%rjsJAHq-nvR|cn ze4``Um0v|m=S#GOc&W35?@{3d|A5d>wAa$cTV$eQSI{o96xzC3=saxX?<%u!~c@{oz4Gy zg~v*p_u!!io%?wNZF19_3YZdqUcz-#)2)n*Toc-;uQm}{n|6On$s1#6?umSSdbq#W zYv&lExChezR0b68whe(HC+hu^^k*Z`qec{nnaRZqvR!Ryjh;bSdgt=S}DA*z%+;Cl)2T* zcY{Tm_C;PWyIGZ<&v5*sF4(sc$tEO4l+`+}fw(a%Z0S<*{0AQBf%ZF5$Bh_SNP>O{EqT_yYMFpGr>Qi{AwP4NBQ>>;ZF?{-j literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test1.xlsx b/pandas/tests/io/data/excel/test1.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..862574e05a114240e403a653b7fe22d2feb458b3 GIT binary patch literal 12074 zcmeHNWmjC;w#D5&xVyWCAi>=wxVsb{+}+)R6A13^?(R--C%8V6`}%d?bl+d_?)gw> z)Tp!OsI$jhbFVe`l9L1lLj!^Uf&u~pA_9^COf6Ue0|8M&00E%_L4jxr*;qRmSv%+` zx!M{5wCG)|ED3YKKq#|-K;EAJ@Adz91xDhvEV>xc0?!~H5y|RvBbu1{D%>ighNY7q zfz<)#er724In!S4(|4U%%E$q((-XVtw;pa{YcIC)M}`Ra32aBuF9=U^K&Vp_rWFFO?(j(b%~*vvYIDYpP(_K2Ip36?@$bY;_0r^ju5wB}!8FURF)D(Iu1lU} z^?}QPB&q~~svX-(ofS-hzr}x53+JU@W$|Nu#SDo0zOMz%a_8^K;*L4oA{ElX>a8A-0*evx`hLfE$~)v-6S1TfJ5 z{QPfr{y$F1zr1=$th8)5BV6E_*i+EJ?d)nalAxrsfLJq;lBc)$3Sw`jWO zge#>}YZUdjhOgO@gX<(B(-*>(NaOUX*wCo?xPcgaDLxu~(&}sacV)n{f{G`lffe-} zS%-1sXf#H=WH%;Tc{WLei<8a1pJ!x^qU9MIk02F?e4!w9~vy=_>3PZA%^hC$f3 zU-ZC$fUtp}fL$yZ{*@=rHue_!HZ~SNH?u!-2Ka49dwcf(_tg^LY5t2deyx6!9(J)# zt5}Y1RDRm^r@&Gw1>r#p5v*8n_ z?o|L#9%MWjUmOCMBeo;1yx0hF%tD2-I|xG2gALZf_g-tY)aSTPwLGmSFX3VDcb-Dv zh#}=qXbi!V&<3-sLP3}YmKkLMUD1>HVa_+%^A6zH`GvYu0X%Usoushcq>7Ab>is<) zsEIFCjI2G1H&K8kfQy8SBY_}SdK{e^18>x|jnMafg8|Ps2K=2$P}%e|exN`=;crC} z;@cB%RQfB4N|dc^(zwxHJo6t3kk*T=82z&f6rlv_r`VOX<~>^MJE>HF0anv_#W(BD zQXp1Ihs8%2$nE&}{Cjs?LDPuM1x}}pQ-XD{N$I-tn^r`UlWX)V<*JRC&5`USbn0}P zZMFmrPW|_kx z^)#guz#3k%on`J;tzygS_S0$q)(HGrSzN9GF!*{DuobPA#c+9G?@NU{drj%gkzkEcay|H+;wbKOFL-7HjGg(K`Y>2Xp z?&U0q;mGCuVgDsiTWWr$d&3=zR#nm~1>S#xMMOv*+*s(WMu^zGG9dB8bzLhweYmpz zxZECDB|V;`cEy3}T`g<=RhbFP`2I-diP2^sz1TBEK)#s$NBbXdnW(2D6 z+=L}Og*5SfH)8gLp}PpB0_3aNK@vlBV6 zu$r&`TwYC8UD!L89N)mS%EPSQ1cie@&GbVymJA5(D*`q)SYDeUY7d|*l6;SY?08mUPmOwsAIcGq{Ew*g z1J!*r9Xz}R2kSb9=6MrCU#B8tMi5dNhTBbU!^3}F$*`kG#0eZQyFkV(vS<fq}VSm&rzSbk7q(5v%^FqA7CX#7Ilh*EM=;dtv3x3je zlr2OeH~>NjGQeUhC5%rJH=R85%U76jr3_k`vZKx|o4IZM@hf2yH^%RdNd+cuG8kuL zSGXm%rV9P@=K)8xmheT{$2hK{A@3cQaJCnU$7*lsa}4uKlt=AUHoqWLSgm1ACrB)A z_bgi7IUB$JBLO=xk=kS+fq?o5f40%THVg+-BP%0@U!P1rxp%BS8ivDx){gnW4+n65 zV%v@)TiY17j9Vo&%!tRWtvi-iVPTGK!9fKfwdKCptT5~`! zNin)WQo2K$W+|cJ1Ro~)(pp$}o9XHH!{K78HQ7xx8MZ4nQKwDrk}o}cA(?bK(aVC9 zlGR;SEIu6B2-^A{mi9?<`HQDLveh6|{{rZ}uw`~4fjHMPHAXK~U`#`YF{jTud3e8A z7s(fleq21iwPXyMe1%dhlGr6)1NIe8@raf#^)i`w@V7os-BO~Uc$(gmJ?~-GGhj@rkPGiXs0&$Vy)rjS>AI^; z0xl0a&8G;V{4_r2>wtd!)PH^StkGUKRS0a#>YGqdx4?arU>&UoFZVE;-K=`1JdCk6 zp`%sZh*umHFrTI5RS`%4*R=5wwp@Vf{ip7r4C6wRqxmc}9nJ1}$rEk4`YQ`LVR8G0 zK>C1MbLYk-bi=OwNYphrKY`X*s&V;f_;1Gwic)IC96K7}r_Aqxyg?L5#kU4~;bbSE zsv5#5xi)&1$)=%DYywfpbsLnZ;3bsr%HR9E6p)10F*4^RXz7Ni;V9g===Tgt)jiQ< z&2kVjWb%D}IY0Oj)XwL1wSV8OQ`J~YEolu&rl@#28>RF5{EWG#)BbXPc)@WnLvhj7 z)BbcnoYek$!F^q^wSvyj=5cv>5Qn+$alI!Sg}1bfil>F@xEwkF|Wmt)0A*D|hJA9xFIyTxJ?!9^o!WJVu! z%|gLD^L)%(QWYUrr%2^~!P8_y_PS&W^ff~I!NXlfg#JsHqia^>-Cr$c`P>aj^xpml zqi1l@PT#O8z5KBPmoO6>UR#*qcV>I9PEqnQ22x?SP^upf2DJW?yWBz7*aGb^aN7uv z_*y;ev6A-tQ;lI*EHQje3lVa>fF-HefgVqC1fq{I&VJ&U?oo?8!hAxwa#c(#_0V-) z&&I?J;NQ(t<0*n4jK=UPTf$RX2{0XeE5eMHcaJx=<7rdfudB1h zchUy-&fWYS;Z6O`D3ZK|FRxx86bu~Zs&6p*%O~NCZyJ`T@o5Wb?1-RfA}Xz@ao;0) z$3v)wpKcGfrT*2L26r(TZCN0ls;-h+BQ%cYbvuvY& zBaS`GVFV(w#YbX_1|M>A#fcss>-$=BsCsG=R*QSoeXQn&Uqbg!ONmXNCw3v}_=dhJL`ehS&hb>9M< zuvCVhD?S}MQX_8Y#>js{m_B*{{-ihY(aEg5f1dtydxvf`068p7N@oPKcJpDY?bD*W z-5D}r#$)YJmb#Ubd4O>(A0=3;Ozy<-9Cv_rspc8H+iJxmF7J6O2|U8mthL+`2TZMf zy(W}7nUQC%`d7#V37)U@IL}$ngfs@aJ5(NqZ9uo?&2_K;*!maQl+)wA)qmA*t^a?h z|5*O0|CR~=RsSK~viETv_f@q@uva!=CKMM3S@>2)=!e68&nrG#tDNQx$&r|pr~`XC zXL@BsYR6TIaOzSkN&{2m?+|1Cv{yxi`t+>1%V%JSXJM~$Btm*eN$V-j?-_i%WNAs} zFZ99G(wAm$DHv&(COi4skp^21#tIEE!A4eDicEWkS(NH8z(^K42=N6f#??PNOwhh^ z#{HY{whR)9`7WWS=HOhWSt!4+;)rEkX@9LbaAjhxLqC3K%+{plqG|UW+ui#KrtN** z4cJYBjQWHAGbe>u94p67cfvTuk=q*+LlG)+T_TbjpkE zl<8(f5`OdC{A!&KNrQ?7D`}vVw)%kIcaU1^Q!T=4L#DR(%aQs|YeqiuR^NnwI+Yb- zcwb^7`jv3qkdSt6hSL(gr485GiI5Y^QcBhoeI!w@yu4gumoNuYT>!Jog@$ZC0}sx| zv<6Ey@;fBz*1qOoZhgV#*b?Ja)j(A#ZnzKQZ&CxQFGDphSAv+8NPrrd(u7s<7&mgPcm@5874k&h6()t zu9kzVrO~fts!@@)S>{A)#awv(a9$>-Ks(P)itdOD48ElAW9ZX5g<^!d6ZAn|y)o?N z7B4imG|NmYZ$Xk4A;@NJB#kJp?I{0)>`2Y0Z*ZaiZbUewI%P^D@-Py2KP>ezYSf-a zqmd$Xlf}f-93!(=J)t9sk0zBtOVHt%-)F*F%YtHvRIRvkQd;e61I@nIm_WInoa*_? zp%~rypoKdqnX9R_jMG8ZROUL^R@h8#Avg?+LfDQ#B_W2zKitYuF06sno!JnFxMx>-s>j-XdlQ$eNk)#!Ma*pH)g<>X*iso0J&7j=J+9sd&&sKB} z8ks8T_%%6Q38Q|z{Ls^oeu;XY{RL7Sax!LfrFuG}_EkL~5kLdJm3;u{yUxy2{II7t zP#}~Z^mnlxeA4=I^nIzaJm%1l=p=}`+xE3#H|4!R{KfQqAH%28T~qwe5T75Z)n?{R zo=o!evGR{f#`=-1u5j6Lxc&`sFpdRgNb*kHGRfsV;q|L5<4OY3p zOFo%tIZvNu{4R9+)LrLU@PV}y_mor}Ii_XHjgWcMng*@dVC}-YI+RjYcV_i4JnsRh zMU1AOG)|_vRE#m&QplfJU+)VK!b%C!a+&CX0*g}uME7LkNX@Epsiw*6C=@#f*5-^3 zH|pM#jux>Wlsy+#i9rbxQ%$$P@C1r`AO@|T{TL~+yirOWawM&#Qh_>ILqmzEgA)!!n+p|^UtB|V$xsAhSZNtRH-@DQbIFo7g(~D(d0j2RKh|q3Y`!ewhwghPs zcK1=+T0z8>jbnv0PIXPM&ixo(Hs!?e$a@d!m9v{2%s7HbgiM&|(`uH*)>5fB0n_Z1 z0z-O_lO}$}Ej4*xN^)Mfx&phj6~#b-n;A~cn}Fp+o~pVF<-@7_ znmPtxVVH?gs%uy@S??WVh6(>5dLY;*c&XVRxYBVZa$hp~6T6w*`RcGEV~#4XZNs(u zQ1(x!h=otqFbH_(!Y=6)IzyW1>j|}fRH_6+^)e+WnBEPc zMGh^}1%nOXoV!D2!*&H@-&Y?a zA%`(#tH~1~i?ngoMv6F(}J_@I-E$+M<5IYRot3DKq$sF_|MG%5_X3FY@AC{4+~ zU>sZ0PKVOub+Gqnz>^q!PS3-FrOH!ECb?@dDIa|(?)IrpcS9pWE9e8gr|iH(>W_y8 zVXNG|!|CfnX86quK%0TVDk z2~~6Yff6GY^==YuMmwqb662<}Il%2$ZlBTtb`l1hAvsozrI>BxnhwB2=t9o4?d>z| z8=-QX0^1MrRG8Z7U*i;^GTKzw1r@H0d${>3UipNOkI$pT4Kpp6sR$P)$4Ewsu{GCL zeQf6sO4otQVq2iF7<^U=&6j@$5}5D`Izh`fYVTicg%Ano~E+wvQ_x8#$Cytii#BKbwfHj${)|EQl+oot>Ji-5-3)59n z#EUtFlLD};EQ=pDO8ey8sU(4vguIg~EZKKTOg8l>R^U%ve0uf#eA#aMeibdN!T^27 zmH z@k+2_o+PNQ^-sjQ<7CX`J$B)r$s!H9XzXy~<|V;+RyDRVx|=+lo5jE>4(%JkPi{Hp zV34yp3lw1B%j@%b*b@07`@Cb&sEUxC8Bdsw^!}6;7)Cx|yYR+DB_iMr$&~==W7K#& zC#l76cAeZqj7KRmYDgmSCfP<&!D`{qy`eB6jP|`nte3d8+ye69;3~<4A7knr%@BPu zJN#LsD<`%<^|HyVG3N9{X^ZBGVLee2&3e%l2yyiBAjby0g76e?3^jw}DhNRZk`2geXeLyhz3s9j zqaozo%lJS>#-ihuB(rU?U9)N+v(?08HcFu@T0fn3bzY(hzrDi@%c`K0umEfU=~TEm z{AUm86?~{v@exj4%DA<3jSTxuHSZTCmW5~;`f`gYB6JbLlC28-myX`k0f+7AcEAFY z;rTd`)=j%|yJ?Tajsky4)0(BByjgh&H29r7Zk z`3&8Mz+-$o`Wk>=(@XeEP@eQw3_gVsh3Z-}V`ZrhATXK=P~}66w#XcF<3ysLZXyAz z!y^P9FmSaQe}BoF;2)1#9D7QfZO$?G^;>ircek}#C4iBGn=j>Siu3jtsI9ZI7~Zr- zEYt;+b#W2%@0?q5YVetswtufW{`zK$|GkvqChDZgeCy;b-}DCQ?Wo?+#z4;A#umV! zXKVXYN#2H9{(C_GRs^QUby_d~9O?kQK?ppJNN1%C{Gh^=hvTMJ2_)XkLNQ$_r_L-q zCw6lcXEozkcU^_k`wDp-6F+lOWTze!V|gB+MsFXJ&5>R6q88lWJ^xLXm;^${7ZxAZ zP6Va?VH>hGEW39AgG!Q&7d^+=nnNMBs1jI%bde$-rV?1D-z7e~xTLdLR~!3^R@K#$ z7BIh9i(R;;+fjmRz@Ij*V&wG_xN7uSyo%xjvjJ~9sC>Oem zL>}c%NcRLiM_d;jjF*v|kC`@Hdud{jX4h>F&uy&iOg1jbdV;T@PB^I;Ph8N7SECOm zZkC9f2v|C|fE1-iY~n<-{OOkh|0daQA!Q6zrJV$o?$=LrS7(Q2PrPof)dS@}uH=ll z$ky9JUfw#~zjJ!h2whm<=lIE+VSxNsP8-?U=l4@P6} z^dP=|lWs&+MqjDr?T;I^_C~L@-Zp8)>^NH%KjZ$YvDI1im3ti;0q2tT+)H0%YRBINIG(HR0d0rlZ2|wqJ75sHH&g1L zXL$cu=0EQLFwrX~`R@S#K3@1o;Lm%(n?V0%*zotje;>K}YvAzPJn?@X!1^8M_j#K? zkuaeDHhuGZ@b8EJe+Ku#{a^6kMge|D`TYd5qvyTkBj=r_E7e1%_r!|y1+ zw+nxwkP`nR%CF|(ca(oG5&pyg0&*e;0{XpR_&xgH#rUt$5>$VQ{zIDOB*EYK`qR3E O2U2*Wd>75nZ~p^@q!D`n literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.ods b/pandas/tests/io/data/excel/test2.ods new file mode 100644 index 0000000000000000000000000000000000000000..2a90db839026b57eb35a1f5c2120b8a257ea416f GIT binary patch literal 2877 zcmZ{m2{@G7AIHZQh9p}=WmjQ_OiE*?EW;QVW2eE4#xjGMm>4_NWx^GbqAU?1#Gv8I zUdWQ&jFBax;i`zP;!gda`}F_!zxSN?Jm-Di^L&5jdCvR$yuU93#>EW;{HMtQ5(9LS zKdtVMj$8(fFYn|ZyJw?k!+iM4DhVh~ zlTor4p41p}csoAu$1;~?ZEHcR8_*?RYxM2s<&Pz1lT^xJ&2K)eQjJn=U-)It*#yx3 zN*&2m%M20!Vpwbh(Ce3;*rd01APVf(lmPi{#aez^VoSctlTrE0@xQ9l%0oi~7#h8S{EyJ=L4iJ-1)(I9d%&5 z-*cZclqBe*t3DZaQ2ShQntU?Vi{)QNMNV9dpP4P3iE*GiaTFh`RiRgC`Uq6H6)GZp zj6V8rh)FRlFXEH=t)(p+qQW;EY`pFpXwWqqj6F4d>%6K&@$OPq;avkylq16XjDJ62 zN|9Lr$tr$`$=YoP51yP%l__(RQkr!gFzrdmKjMIGKZk*^KsBX~i~1PW^9Sxq+ZSAQ zL%h#0s1sf@NpxlgqQ9_9XqXqas_ghYbFmroj) zTg9zZq2_Pp0aY>^dRElP?I)F(c(jUhZ4zh9=`@7Cv@W>l*tnDIaIPsb^5b2yZ)g|t zX4~mD#Pe(6G~2s*5)x_sJY%dg5D&3-%B>rIoti7?H)5|!SJTLzYtaEU?+cx(W%~>5 zNM`|DbmdX|#pH?cF<1gI@pL2*}V(QQ5Mf0Bu%pfNoHQ|N@ zgDq3bHLn5R(3%>qOlZx1K9&(IS>G91eeDJ4i++E4+l>NndaGJE=AlcY^O07Q#Fe45 z^4uLr*_RQEF$s~tcc*{ri`NO>dds0 zAO@?xUw9`jZdf9(fE?CBbu5SWeJ~{KVA{r7T~xRJ1d@Uj>Uf1?-A8@bE4hy1hLv!2e=-M7qSun~kN8006+g0N!{U z@f*i%E}M|8Df7pLn;%2=gg?V+xy!egsp)+vvj|3Se-z+qNZdXyP6m88NC0X~7^it`Zy&uoc9B1Wr z)b>UB_`t)N}>YVea25)1~T8R}ie!P=9znLU>}o%uzK9o(Ju?9o(*$nhBW-ZVR# z#~W-Y%r&Ej!`{g z=(SIAYJ;9du}IeAV8Fft*Rt~Tm#k>U~vX|bx96U@q_iN+YPL?DIf zn!Rz+NI(z;cDxl~Hbepb62;}R+_Xb~r#L2Uk*a99D{;eYCGZNl*n zyz^Q4L$KErQ}>49!Al%Fq5a6?aK%7FxVBRVx2Saf*|MpmGfBPai0KfV~~BW+6vas8TxQkdZ?{= zKGZ_ukH{<0XK$6m&l1WOHv5|{C5ri3Z7gqg@6gxF#03xuZv4$2zipz(QB#NRL*$a_ zz)_2i`muLtWye@iXj~jiv0*CBYd_3LWQ#E4K0SW~`ha5MdB?F%ic!0u3rQ;+N6vKf zc*hxyjeRuo*WpQvO3|~0cyjcGJ)YH8Fx8h&qNYkak5K}`S3F1Je5=Qo3=B<_md$px z>h3>_H;>q1R7isV@hE1hUu1i7000#BQG9nGXd>#{KZ%mw3vbsDKEGxDCH_tGd_$UH zxuW=7wTbz|n2!F5VXxV#%+-zeTMN#al}_P{c~C7!(9%wwk?Xn<)D@LgBpI6t$I{By z>2tp_yu=EsEG2#O+j#j%9-bZ!`3xLBMr|OL%XeX6Q+CJ~!07IV*$muayvUNHy#;XJQ*|yhe<6uMz}J4&=?SB` z*U(ltkYYTmG|3xbd8Ux|O=9GkX3)+N?-9Mrp&W%+@BXRzx& zE&e3kJf#z5493>WS(-ldJ<)5Pb1(}$LGkp}eQ98}*V5J>DGy9v^g*gQF%M2!!>p#$ zQ#6HQtsgI|Y}_l^<@vidaB!-Bw`c6H0{DL;`0eeF_O~rWz`m*e-Rk{N*t2hc7{8x& zKi8rkx_9gf^9(E6h8OOOnK9mcBZAMV9QtnEwmaN7h(){_^C<)LQ0K|F_Gyoh00(_852z; z4k|lIG%VbZn8t35#-EtLLKCMu7a9!-3tbg9hG1kc5<^1!{J!(vv(L#)nK2CpGiQ44 z&w1y*bMCq4=f3{G zU3rFK9AX5>GhYEDE$a1m(U$6})?k5QnZz$A2^o@?(ND>1IPvOXH9RILu9}s>d{GVm z?Re_)7cFo*Tk(0&{>`x{u_Z!z-Hh|U<mRp>|^S!z!(z$PCTUrhm}ag7}Cu6`9$;!JXfhlLnQ* zE|?_Rq;y@U4N(hh)C3#3-u@Hi;FfIU%X}Qk9M9o8$!-AT6adpPf`gi3%P6R!a%z_KfWtd{OCnP<+?`QyOvPB%aKM zH|vT%ZQoR0zi_>!w70e0i1pWgBm6Sh6Bzeme4*zA{;myf3XG?v8^!VC$k49g#x=mATD!ObEBE-r}K z+!$h4TX}LV`aKirsmbG$sAeB3P??wXog?7hd?2OfT?iHSb91Coy-M)pF7mjWEs4Jn`dVg4ahvx<3Xm&a%zH!GQHmX+mc zgnU=B+m>Z2A$DTUl1GThet8A^KZ;u~zx{XNzX0QUCZyzW8=eHEGW2@#neeOKKXcG0 o-YX-r6B5#@Lq`7tQJX$n{>Au8Z?H~O9{;%c#1!{I7VE_OC literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsb b/pandas/tests/io/data/excel/test2.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e19a0f1e067c8b871dd21b6ba5ccee17adbee404 GIT binary patch literal 7579 zcmeHMg}=O(&%HgLbKg%LEmaImasV~}2LJ#t113V9tpYCq09Kd)02u%W zXe96AjI?$}n(6ttS|dz&y`A7p*_c4iOaKu5{r`@C@eU;Db!jy7gX&m!h+|)HdUjo4 z9xlJgNlKNkKw>*KBdksTezyj4>I+?!*Q$8#{@~HLP?^w7kDZP_`-Dux;6w!BQ2@mJ z(NM05&AQ`R^3_tzoN_}NTV^XG87O#mCSuu$I0-aq+4hk7G#-?Xm(S+D(m1HGDFtvY|Ei)Uz%)z84j(OK*9%07k#`+Vmp&Q&m^cQaqGd0 zmMNZT@$l2(2ejidH7DxpV2M_^TLeTYTawrwVNlnxY|kpzcs1ZoPzEZl?nJ>&wd_23 zCB_%7avp0rDBB3Ocp1cMYSZp4G!;~YW}DoLtS74U_n~W4w;03nY^*mR^xy+GRb;xX z_dgg0F-@+-cySDRi#?cH@_8;JQBjg_;;o8MJ_!8OSlCO%rZX71K-1Rw9W*Z&mzFP7;)T#tzBLo1e`ZA-aMe(}pE zXJwqMidv?p(;#l*IfyfDMPv?}4C(;JkuO|Hf4a5Tmr*y}GnD^EOo68)$U{5GGOzX4 z*p2Ohx{`F6R)yYv&L|!+_Yf05|0=(gSRICk_50e^6jVGPb$9#1v{91Ums-lY9^GtG z;JTkBuX#H&%1t*hn)=K^roLa#S66mC<)T*4z)H_pMXpovGSxr^SuN5AYd3Wq4CP@I zG*X6|<3+n z*bAE_y%*fBMOP&8BsiQg-yiMzuR8g(zxVhMy;6hL2@L?}f;XJ+52<*$xI0?9xH$4! z+B^Rxn+xbfhgRFa`zX?B{6?GlZ-IVGctZv!E4NlnxmNBdAm+7t1@yUP*}z*4?jVoB z09C%k^viEiyYu_YIa@6kSSuQNI7{g;Zy6QyLJm9OX96T9Q(MP&nXQ$ zS4D|i;~1itH#?YjJKkxAFA8&=;Vn8}7g|Ird}$I$dw*iIyx}6WyFTBkiN4*x@}|3j zcd{G3f`#S-G5U(;%|99AU<0!=7q*naP?NcfB~OIG8UqcEE0U>X4#ox12vGBP!xpsx zD&?Vp34;9jr2>NddqTo$RQ+UF?)B6~)Rxq7)H;DSi$aS6FreENc>#H_)>9m14j~wz z)jhyrn0gWnUKZ#KQolf=O+Wy-O+Y{bLNqo}&_@uQMi3yVZ^#(Oq}&R49o87njP;QP z$|ON;XUYvq1RRl3gO>$WHi&J2g=!Z%&kT-s;#_nBq?Ge0o8d}I*w&PpR^XfX{J==K z1?(Hme|7u_wxGw3cG?In7{WgsM<9LR)`)LHc^cQHf*}i1>{Ao%C@L=lO??4EuClzo z*{uTs`8VX-$&kpAPpvY(9-Q6Y5w}^v5H{fAd?+}x850wkEyiC}csyFKYa*2=AUdkl z&~i)Od%l^v^lqqdFCLYF)qQRXho)FHg{gM~ZcUt3iH?pCCbQ2#p{>~AOE{lH)`?^I zmi11}9^YFk@hj*sm{6R?zDqT2ls>u3HRg-CGB>f?|DmQ5PZDKkUs=5>+qddyue65e zn>^*5WZAWAQ)D|!$oOnACw1n=D++a=ctH;$`SK#TJlX8yN>G7HC(+`~t>=b=GnDTq zQ!Ju1n`1`y`_;U!w@Dl}`{Q7uX$ogbjzW; zOvCPlSp3z>jg;PKYy!<^G--j7HAKVKn)|zq65c5}EzgCa*z|cF@4xER7iXjkY_UpA z7KHjdvz%M;8W#}>V7PtydC2wR?^N0!h@d2SuQl$UX1qCE~o9yP%$GrC`L{Eo8$Xf5+@Kk)m-!Xrhshzq1?4on!;1 zqz8XBvVQ!DudIup?HYxMt)Nsl$G+rjVFltVo=huyiT>8e?TV=kVCDeM&1ClhzvDEU zyfxcfgb56Wcf8i79VQwa&kAW%-@iEldt<4b`qCcWl@8#fLBHQQ!=Bmu^r%5-8!utr zk@oQR^XdIh*rtJUD)Tvn$c`5`f|nc+?I8nFJawzSr?(-9H*&HfizAS{q?fngg%vo{ zR@WH;q{^{OuUILCBxPCi=Ke2 z)}Qs?K~-g3@t69tITHV3-MeJL|Lbbm?A_BrqL9{-?CssPJby|5R;UHG7Ty=APg){mr+g$M7{f-m^0(k`jeDWZd@U?Ut!nu8{Y^d?Yl*ZclfQIn^ z{S_2JNihEte9I?ChScB&j5fRbkWqkfJi?xYp?BfA-!80V!I>EePmz#$6+1a|2fI@R z&KnG(6?V}f#}{rDpp2?(i{h^=+T>+cGA3^4v`AGCK|B_tS14iz$I0i!+*I;q3#z*W z$7CK+f)j0u(JMI>Zq8eg;fi8rQ|`*EMe>^}A_7@bx|kp4Gi>{@$5> zay{!_jTBgcrqJRR>8l=#fT(;Yi9fOk#y()W2F>^qCO|)7eN8qj`w5{0lUu%(^!zog z?Dp!85W>3+-4Y7v@(fyaBGXZj4fw zd>@5*pqpQjf31dEJBHa7*FHG(CF=Op^zHQ5`nxW* zk7wUFya=HYHg={Mny#6wy=GLkNhY@{bBS|i#R1iuB^CLFpL1i_il>)x$%dJ(Qv%G+ z?>dCZMay9^PsY+D8)oyem9KWNBf+*LZ|aEvk%YepeE;$el7arE+i{B_Z&%)so}?IQ z;Z9sebo03p9PQut96oe}HiE{=&P48j0qa<~(b_rD+ zYRYDw=F_cc1m-Up1fIrY)RrW!`I-ZjwzWbz8M-8MpPSm~L-w3(ZX5AO;sx}ylmc%) zaaJiYdd#8!IB`;3&?%#}tZauYp%5da<95~CME3F}3nodb5pPm<0AExnPe?pmUmC&EW`U%7s zNkTjnz%y`tgoy>nK?Kq6deqf7Bx9A!{LrWx7_{9R=NME}=eWqUY7@5u3%Vw!Sqo)h zO-m1I$}_a6FX-kYN=$?ktullxkgde@#v9?Nbia6P4R{yQjGyUF(<2Zlnp_=I{K=(Q zz`C^Xt&W~ZLKN#2eC<(fq=4oJl}^N!PnV(tJrj$;z=BXV$B_HF6XyGwG}FRcG+g(K zO;B)Fu(-OgzSNKw#e~QPw@vp;IELe^x?AyMw=T;QBtP>y)Tq3RV~JUql0?Z``gDEpK1v%I>vu$#H{@UdzbEBzNmC7pdz`8Ut%?j&aa-^YfpC;5$90$*|m~B-wIkZOqd3pWq+JU`E;xZ?KBFRV5mncD| zt&S*0bQ=4$c8WNtH)uw$yh5jX>R(d*k0g)Oti@}R0j1XC3n<1QEm%^^Mn5R>Gu~P# zH!RAQ@B<6RD65wmWRD9mlJA1$*vLE2U1OSy9xzjJ(;?M8?Y+F=d!N&bi92s;aRYnU$5-QL;ud+nIq*WTFF&;V#^sx#*f zVSbdAlocYIlo3e5iMK%0v`7YK0&!=w0D|$U!Dxiv>~1zAFQF-VXWh`1DG9ojgSl8~ zxx2U`_^e#qt^XWV{@1~wx9(1yiDoksb?BD*2{GYVWkN4B9%+)@<&1nVa7?lVnsI_% z77qAwh#K@vF%mXy65nbTcWHn3z6Nj9ylB`VM43DXKt3i{(27%^FytBrTuUM?8pKnw zyGR)s-W8rIn&5~Ltt4umib?AoSDGNoT8=EqqBeRHl04BakwCRFIoy4eV=#Abhr$S; zF)!9EBF3I%^&0D5Pi)wMa$|J+a+e;xj4J`faAs;N8_VIlwT9t?zCaCex6#23npt~B zRKjI%K89%{p6Iq78!{(s6M~G4In+cmLSH1IV4C8h-eQohbAY(hLtGAsFiy)7J)2C{5lus2)DtRviHUm~Zt zt~SW8As_dMEo+P`vMl=GsO$*p>aV>ef1+83i;Ejy&IYC@u#Zaz5eH;<`JG*V=<-`q z8VJ-*bD~#7(I*4?{IgH|v${vOOcw4w*PUV3-ruYH!8mgO5Q`}hC=e5H4^t>cl##os zy`eo5A4Y|-qsNY}k$*>g5ga#U1e*BdXzhXiNqh@e*Z+9`ZO{HVQsZPb7x+P;TgqP; zS!SQ>%HZ6HB5^Qq132YATb~e%Z)p*LycjQFjgv8RUS8 z^URoz<=IHpBTMAxkB*%TD4L=e31+5PX}fBUdA&*8(!L#dRxrjrC>^$obp{>(s|1EjSHDmP)kiuqMV%LL%6Su@s*0Ue@FD9jarLKfQEy z>TK`snxxbQjQwy`qDvZmEB;7~7l4=1x#0Itn*O?De_em`+(}FIXMmpv-hUZ>yVjs( z@sIKMdBdM4fd4d{N9*nXo(G=CIX@BlfkcD*x9}uM^>c{+fdl|NLu38@SN;sx XTB=yzLp=e206>psuN`_Y0s#CEceHL? literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsm b/pandas/tests/io/data/excel/test2.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..31cfba7ede0823740e16aad889b6b6c2b025049d GIT binary patch literal 8086 zcmeHMgn8K~g%TyF^l?5u|Yl9Xcf>MEJ&i zci-K0{r-Y?&-|X}%sl70@86truDhEkH$P%L5=I-v77#53fLF>WF$DFM<5I(zP7?UJR^W z8dGQ=d6?z&cE6b@uMpaJa^; zgseS6Vsa{u3EG2?0^)}j_o->=J)g6+Kfc8uJlvlMG?>L|)CYmoHpv;3V9^GBOV2$f zSOb_A4G3Uo0oXaCckY%nA-z7b6D}wnd>)s#qFsbz^s!ri-$D}7!7_(Z`Dud8l*^9Xy|bB@xJiyk(R+=yijhDu2Xp ziZ5iMf_1S{6X;Tgp(qA-23-#?!NlWt2I;?k60D3RBoj!$xoXvdAP*m|{^Xg3GsAw(A2Mw(-Dt!3zu#RAYTc7d{Q89HSsYFJI;eeXKviU_d z(u}OmepO^`i*Vjf%0yQ1Tw2LCu2ht$>fTfa$)LNr%|dm+pacEcC8eGYMAWv{JlE|3 zgRhCDL-)zUtS+MO0r%9i2elal2`~636yGuDodp{<3Vs>M@$DDB(^oNgG!z<3SGJ4@ zS^qSW5Xsc^6U2(NBaDO+fQjVe!1EtF@pg8zH+Ocn{}Imquo)yoOhah--)@!KYM_2z zf==ukD6e;x7YT5|gNI>HZxFq=KfMaq*{cd|Y-zch$!@)V+a|a475!zXFWuw` z#T{jFTwlQC)-*lmMUVVf%;VSB)@r`v_C1AHIP26EAlg?#5&@AOccFudbiwS&?&}2^ z9eRgr9HuOW>ye8{ZGh90#UafAmEaL$baw=Zy+G^SFlv-syQ$5 z;&kHA2NQT`z$XOnn3JhxnIT2yZ`FUT9Q5FNm?p05q>c0KuyEgZcNPTQH`gXSQ+=ao z=jmHb;)}XlG)aM5io~23!CoY3d3tgHvs8~hF(Nafz@-R8R^&NpaR#$mB>Gz<&swkm6a3>x9l*8!8Ucy!Ul!|eY zvXM@ji|v3fP=9D39@6B4Yc#fB=g87KjVrrXoP1M`QCvhivM)n5a*j133Ehs0JHXyD z#Inz3|4y7gp9GKf>w6lprs?*Eqd4?ih*xjc24hV}7n=&PcaV+}zPy4>t03!zV_)-F z`*SRfKBj2o;p3Ou9}TsohaRtcv-M&ohS*ZeYmr6EKVioQyW`I(_`{DdzTFP%Ck8&D z(1m{NNe3Htmux{s^e@)Uht3buzQr_xaj!)k(#;LY*Gg0*dv(KXz8O4Av!X6tV~8Q6 zo)?(C1S#K!dUHQOCg?Hymgjh9#Z7zP&{ zg_8latCzLswDA06rC@5D2X!{{-^o{{{6HQ&x1~;kve_-;PGS-4b)e`37^&II8y__n zX3c}~(-7O#$UxNW?--jWB!-rT)~EI_Q1!`r7&1_^L~@u;e?16 zKRDmEv}ql!Ur2fwfInX4tbdBC8Kz0KKd4=RhVp9YnvZ72w;Uh-5@!7d2KUPmBgcq~ z7;zzB<_bt+vxta@;|N-7^|5ouMU;Egi>?e&0()LD^zNZmnen za>ys6z&CVXvN$y=29^u9q!y-!URL{#ZiyWLneqH|=EP_M?`k?`Dsef;s1ef=(FX6~ zVA%BMJbMdqRS3R3oORy@Z5&*Und}@d1pabrv7t_nQqTc_1j-*3#BWpcfLJ+N@%(n@ z`!TaUgNaxwVd6JrS5i3c-q-fl4m@Ab{T*zU_E_k5i{NEVR*ChQ&nSjE_~I`-bppU=s^7M4OQ+OpskcT(avDlFV1Y)#K(mPz@{LSv-I;3u{eBtyHO9nTFU8@0G_SNJx&10Zfj5>-D%i!2{h?K;Hco| zf~C`rDyNBwI!d;V#`yywzq3~%b9-}hBHjU)0!LV>3+J;(VKSJ;f{5>Hfw3BtP49m^ zUJDZnD4g2%#YL%+1~Lz=QMP}=ICBwQBC+h%fQ>?1)|OMhIg#^xw+yO+wir%6bvcr=C}kgwu~Jb zlYnmvFHM5?uD`&u$P(Ywer`*+J%eflU0%A25wJY=S-VV64EDdexYU1na|2_+|Jr!p|M|{D>Q#;9vn|_bb&wk{>b2h|g3 z5B*};-LEW0+o~p-h&L+GzGFqjO6>W20J8>lljO3-p7g&+*sJT~mHje0oWb-g;rmlM z*%*e+2SoQTXa!aXeDPhShX!0$80In07i3+CJoHL+nW6&tHSX2Dj?1`|(3Ms)kF`gFr955s9O_PRHQVx>g^L?;2X8+og3 zj4mCk)h9-Vg727PrqL#PRFmuzY8q_<7S!OIR|byJU)Y!IZg~^W=S7z4z!h%Znb8=E z1(2GH3a@#`C>$EWm?!hYuNUXkXPkVMrN4e>WoqnECG^CO9;gQv9C zkMYPg#eEtkk#-~@c_bxTyJOe!COrqWy97&1Jk50&4=ru-d5`FvlCvAji~xX?Xat9i z0hVketgbQ64QkSlYLt-$38#doMFJ^5i;JBVrkuoiR$!LPXtbMO-X|8M91{lGvon-7 zhOo42zwZ}y>X3m!Khyy;n_zeLuGjc;We!~04&*(s-q|odv*x+pWgFpIFM#@_iW)_$ zm!R_jMLgM}m#VXGIH({6f)8CQW~B+14c+b~J#UGpMup+<=c*nlZJEs(7L_(U*1S;V ztklpupTii*b--I&P8@hQj+9d>Oi=VkA=8lA4_M5e&SEpQf6Tp3k?&pWfYfzOMFG}%Trjb7zfL6vHF zHl7DRYai3*#Az=!Cb;hxX1pc*IP6=sdr`M>DXpJOmjTu*vq#6FF-0p5&nbEy(kpvB+BX&frb&;?r;ZO^dH4?#k==Sx^ zr%r)W^0z}<$lJ&p+u$rL^iA!=6m|`?3K{~TfR^*p54Zs>i=t(2x!~-B#)96`sjzG` zrBX}TkKfj=z+K&v(@IVCNe9itym93`y~^Pa(d*vrk|atg?AEQ|#NwTS0;H)YgJiC# zX)l9pE*e(Qkx~St^BhMaKJN&JBQ-oQQX#m)6OmeY;ySLCM@Uj(%lWbI(k7DUm=*j5 zJYZA9bFJKg$IwT1IquDoc}2drdcYM&mR%+!-X|qIf`4*W5*239nvIrXpN4v3*@l-q zWY=a)cEXFN-=z@yHBx9dH%vU@t>Gi25lEyZl=zUR87DC{H%Pcp+J7{VR--RbnHc@8 zCplUQdk$K|wkIL6GI335$~p^sGVmOJ+p0((U2t>n1fCM{Ir&btdglXAh(iQR$Q6`n zu$%~fJi9UROT1!Hyzw1H6tCGR008-a@cN@_afeu0dAK7oOGNiu;uV+jTlo@si1L*V z6=pAb(9RvXU*M=&2}P1ScTKliBxQ)sEn*t$qI2X?cyBK4RG=)ub2-IvU|eP9eW9gi zsh7T5dAbC1DSnR^Ue+}EMMOrlfg|{U+RuP0^PA_H@+%^LtmU;TFfB?mAAR-pwdmv&b@6)b>o=aI8>ArHAr~m#5o$}X69P+@y ztk3A71!zew$uKTQXUkN*wJ+U}jA<)!_=SDt@m{cSg=~D&xvpxhw9Z~dBZ0r2+V>HO z>`gF8J!!N+{(7XT%x7|V8IrSEy#g5<7!$y&4|AUKUPQCb%ZY1UADvJyzLUdk#b%XFr|EybSxs88=XQ#tYGz`%EdBG69Jf;{eK#VQ-bhC8gu9Z&&@^0Cu#wjc;4?yh3 z`nvqMU2kKESfM?y`cqtSWrN-wCwCTA^K?cyWzgcpk%p>=9zqZ$9uHycjd! z!8_34M7Q`LsFmW$7jT2xRp~_6{wiN{znVOce>093W!hGUd&~G})wt_Y9!x>6WWP!lFYs zVZ1~xwXwANJwiP4l9i8fJKQn__yv{CJ>aXsFwZaDPz=DSGadm#VuTwa_$vs_TwMOe zAY#dWZ`sLzx?vm`l_(616H;lebRS-ukMya?My;M)R}&ZLghpIv!&Q zCT7~is<8)Akk_2^0cj05*AX!0B8!ZQE4?*qiSGKR(n&?Bogy(%baNSq za6OVECe1E%%F`18Ii^2oN~|hK9Y8tvbQG+79pu;p?3y^iz*9ulm+>Uyj&$%a@;EjA zFua?Kf4~!ZaS7T9Voomp~h+ws2mE&Uw+uP6#7CwHNeAZd?7J>(EqRt2$5* zCY3p|m#LIF11+%;X~iUv(m;;+8i;Yxg za?#Gy0srl#ypPJ&!#S&)53dCe(7f|cIptlJw;Z(`s7Ufv6k8pxwF!0`M?ce?tof@26i<^LzIf7bIehxr#xcmBa>{w(}6Gxevi4PwE5 z#Z&#P;b#KqPYtp-zodhHR`7H7{igzS1flffFZ?$b|5^0sMCeb^W|IF-{U2%3&szQ- zO#jpY0JM?;0RITBKa2m}4gOVJ0>O#>SNwOUs0l(t>@EO+h4?&0L~j7ik6-@>=LpN4 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsx b/pandas/tests/io/data/excel/test2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..94dd951e0bb84dd566dc9aa58c89067771f49a8d GIT binary patch literal 8067 zcmeHMg3ZDe3MGDM7lWVJHE~p;O`;_uYMW z*Y*1g-aYeso-^~D=e~b)&bjWoj+!C@B0c~afC>NrXaI)LbaOp803Z$#0KfyF!s|%e z**crpI_qnA*qb=LS z{lH#a)~8|}-3dL#MmiZ57WWz9-*Kd}2rLDva7P!tc#ek6t8H!S1H*b+)G;Ch`Z_fA z$OJe)_wF&Y6OqI_YHJ;2;*&{mlj!S5o2CNZIW%;s^F3abR464iCma z%hXHbl~udAFhUOGZbJJmrXX=RQi^VpaxF9(zW(xy*mco4^kk5BCUP7N(jbH(trQ`THjd9?Y(-v8*MH zg;|RI%XG3|jviOxjB-3DeSnuo9zrPkK0v!iS!>DgvK($o>e+r-NM(aS=1#(Bs^3gf z{x+s~7)WVv;ypp1lcCvMxp$v6&Dj;H<}-7UMWtbeqY$l!zOi-3$>Y>^yj$;wDrtS{ zw0yXiJflx~88XlOv}^f}2Gc#d1s-&j^d0sGMp6|n!6MW@jU+%UG5G|xBF!)(AqAkq zxm$Do$4=br9IXuP?5uu7uRm-C4i>^-TK>0Nsk*XaHy2JT`duiOTdFGoaL$>Nc29E` z8@0cdWr2}~*Y|XpjJ5gYi)lqp1iKKAgW+D+3x2FM1iZ6Ow!#QpL{B{H17Re);gewm zRKxu;%D^W`c({8zyCwVRSSe0;O=9tV>>2oio0rTCB%wL^w2x1lCPMfuN4Q*gES<(E z24I*OtX=oNe*J8(EU>nrfjWi7bmP8Bdf^=TL!bxM_zB4ac@azx!1&fA4cld>%sJ}u z#~V{+j}fcRoNJ5?@)AYLVt!HY5NGPpz9&?EEb&eo+3#C44_2L}z7MdpFC4QEo*tO4 z_xDaM!*=?gkP!nSZVAEQ@*bA%V8h;kLFTVuDb-N6Tj0WI#h&%UaaLU)<$GXAq?~F1 z7m~GAeO}t<%>Fn@MBY{%7i0C>+fanx79B&6HQg zf6-WrvPp{m(VoD+f*>;RRgWbHKLt+GIX`hed!9cT-8^Y6l>|F8l-uWde^0b|oja!P z@P3sIW7=(qN#S#hrGe2gc=)QK`!%@C|u?x3pUsZdq&o4{Sf8*{lN2BpO+*Wp^Kf#VC|0lE%U+Wmm7xt z7to~d5w&2<8<2Ifp*HbqzJk~%jUcn{T5pm}$O~3!BZ$and8e-wU`iR7gHQ@bKLHaJ`SBb;+7D?oGLJRe3?Vo^>qeM~)#> zWj1?Dydv(Y=*)RvV9O_!)-Yl#9L!Rs7#;&5H284mu58bs1|2yKut*H?K}@?v`F28} zZLDp2YV{8BIZ-F=d&E@1bovt+zV}QHd`&90*~*?(O>?DR?Cu+yo*r&oiaBe67q8Pd z*1{?VC=#upm2;tiT&fy|Ln+Y>#|OWJS+|zfqaxVIG3>&JT`-tAnHxBo7(a7%w6HaE z`r(*#V9ifC!$bJ08Tw%`nf=OuWH-WP4{N23F;-41-jyuW;0%glHO zqOYzjw^Wm}*@1^V)Ez09qQs9Z7{Lx7`$R@U+Mr8p`qV_xI*W(|Tia>D=u|fkSj5+m zn3EiMRqipgB@6{JV0k{95vK5=R7lirsm&wfZZX_i)hk;( z>uVol8lI&q+ptZsrGBW@#glV&db-^zu$;h0#0iNma(ewt<;8?+2@z{~Py&!F!Fb-) z4htnOf7d=Bo;#bp)5a~J$;;yV?FC2nL*Od0@R<0jPT{_+)KL9^iCCa%H)kb@%F5j9 zJZ@b3RF%Q6_!L2qcS%-n71UyY^14it-0xZ+y^jOBhD_wPwDK(#*r%d$kje5vlJl#^ zr44#RHN#iXDJLdDQnmY^1dUupy4`|z)w+>H%v%V~z9ltqCXIua!Xq$!>U5(jlMZlr$KK;ktA|}LwsMe7fxHsY)40{NSRw|XQBL_ zXT_H0z#-Fxg2$qgpC3BLKx^fc6VbpP7y-JSt^SZvgdj8?=4nl|iF(Dg)LX4h!b41g7Z}AoPkg`Xo zaSZl^-?R}ijp3;TaJK#(G_nt%_mbMO%cW}jqYevd6sEg}Z|_PoRi2Y1JDq;GAwpv> z@qYE>jz60t`*2s62TvV##)!)z=Ia=1qflmUsf7=t5?bNI#>56en=TQ!0r`TRpWckl z()1+FK^VIacPUBoqH4nJZSe{s!=@Od9beI_k6VOx<0J=1Q@@@+9A0`@Fj#0aE$VGd zyBaJpDdihhwAwRh&Y@<_3wq2wMbQ@d*qE-VUG9lc`6$w3&j=Q$Yonp2ve7#H%@U+r zw6I9gJuhcqYM(}&bn5WS?sqYJRb5EFcM`cp|o(0&ubdvr<3yNu(3?I6+L zW4}y0i+V99Wsm2qS)f55=FOw}uapXf@SEvUb{f+;J1qTV6~tSq7s)n*H{4# z2-E8cpQ=pvhP;@eND1) zmQiKZCSD_Q@LjQ%P52SZg5^C|?8U6$LKV2g(Jdt$@A#bt@>tW6jO1z*I=*@2Jq zGb&TI9`X|Bw@mc4ol3Ya=;1xp!0bae`pKb@V41;B*(}4H(sfa50|XK_1OzX{L6tj} zEp5r^h#mQ8Peqa(2C$Hl#@}{=X5{P~8K-ywgm{A(%(RerT>%x<5e~(==V7&yQi=iz zuaiQ6q+dmZ&vFt@qFhQ)izHQ>4X+;I^N|h<0IgVP3u*%xo7Km0TTqPfw8z|_)9KKMVcE?I)p3V2z zp=4SDXGz=fVcgq>XmUgd22Y04q1=|ijCO88jgH!-5?iUN=EV%kV1_l;>QZb^?+9Fa zfdEeKyX8U!l$Zyh2$`0f!`uOQ!*AmVuH1^bHmXagSc=I9=7H%l%oWVd@67BOve|@o zY&Q`0IX>_TIzxg{o@u?hcbO7x(NG+<$kv)vlGZeys*T)EWjiJ<$qpke<=F-=AnTdG zv?_9mE1YI^ke%`g4$OSYhRj%5p=JM+~vt+omC*v}AM>2I4KK|j! zPi%=~**SR60`f}3O0c=6<~Omqu;9jis>92k+iZ2gv0uWb5=G~IN0cX2eCo9jeL%u(Opi=bk^ z{g*LpiSGDIFf0qKLIMCVe-F>j9@Zwm#pPTL9lKO6eBTeX_ph$fNzMD>Q45pM)$?pq zCROH35E!d=(5sb`t>^s?xZ_?Ie|c2Kd70LAhgJt zqf4JE;#J5S7Y$7f^Uplq#)Ql32g`ZW6i6-0yqh=@%&DubPQ!sXI?(sI=L!N!ogcKx zQY&Le58?GOlua~<_Z%icQd;C zgcu7wxe><s}9}1xm#=HJth9!zXiVyzqt)gMMM6k z9-F|Tic~^@!|&a2QSb%RyI~$w=$HXcgVtt$DwqgLLy{{nmRkJ2dJS&x5Sx^%tB!-# z4{${laek72{TR8bcb6bmTz0o=86y(wOwn6{eB4*^nw;{=*X*)p85u5tS0dA9F!<|^ zz-zb~AzcNWYb-(Wxt9(jPcv}|N-WqGyROVaIFFgYN8lc_3eFq(7A)E>qN@=%)|6}F zy%i{X6j53szetz3z##U?S$jdYiE$HFe7|Lr9?=OG*7J7R$ny~X-HafS z;LqAG;0Db@j6?AcIO{QD6El1TawNQld?;1BV&(CXKf4ekC9tF;)oi=q;>+V#6eMgg zvcv-~*6$mi(1d5-Jv>=Y2>u%Xpj@R@$i>_`m@(iwl)kSBZ~b_BbM%*ZMI&kR7=jhA zX$SxS@qh68qiS(7H!*Q`f@PMl?zhA%D&e>CCFB6%oC*v zKW&yJbyhp3L-c*_bE8H+W+#n}Lju%g_MrM2hYr-h28SS`*e6yWUVrlSU-%qb<8 z0<}_n3Bi`VW>Qd2gI5cdNMstq=o}+B*s`)a;-{XdOevblE2XCk5c|9_tk;Eg<41y$ zpc+;`D7mK=S;}{pGx=gXFSMoAGB71VJvU9-J2D^f6YWx@H^tfGe4XaPCu2!BzHh~i zbu~V(kJ@2|+Pj)Qofhx#j~E+E5hUBM05%w{^7<%*6XyrMVb+}Vaz`{$xNFSEynD~L zB?oW#R8aiGWJZ3l&qZkf7^>vm+4X$vC6)Yn2nMlFU+P!nz-**A`*;Yujh%6#=IT+0 z`TI!|;`K|b(&IfpffA|cx(f}ZN{MHCCABzSmddw-qG{j2iYjqK*)lhSb%pNZ154)V z-^!QGhkJ&3v8sdYCfw$cOf%D?8aIYURq`IB(_!U^Hmqw$b7zanh_=FKW_?*tsUhUf z2y_4qwiMsN{L(+`*3aWe#7Zy(c)&)1Fsj4Y&PdJC&fbaB$j;H^car{Jh683AQSthU zU0nE|myz0~`@K^0Yow(X0%Al#IO%?e4;kHbDwP#*XZ9CT>vhVMI(ygCz7K+ujca@~ zk%hUymT&2BKQjY9*Tq>TGIxnKJrfx%veJs#~b2agzxgC&Zbj8gwyg% zsYXDJ$$E{JSo7{NqMsMVyMa5QF%~*A21wR<2Kp3Y59cgjoVnZlOd3B0&KY%^0y5`#g%Kkck7Ihmworcke(K zS8mPPw?@?PREg(xDg%S-u5H?xwBhokJr;8LZ1H0yi9KH$9`(dtJ#zPZO3`(b`i?PC zH#Uq(;>~K!sJizr*|rFPZEG}a1Rv&vaQ+HG1AF^_5eS3c?=3Cz!1Ck#_#p zAvAD*fXYo@DHwAhTW8vfx4pPp$ZzXg9#-Mtt3k&&lDG!@t#J+5K@>Q zb4#BvmF;Jkv=d0hKC4ub$suPb)qBJ!ZH=IwI4aim3PFO|9XIMTO`2sv)fM}5<)d}FC*qL2iv?IG{KCFHTMIB)r*NnjUB28*v?B(!p7wZcFJvytmk*?`e0zMu z2SswrI%SiwU)r*HYE4FvrSPQD`bHgRw|2iTCrKQowrYC3?O=>EiK{w(}6FZHJ|7`9-)VyS-C@H74Mrv`3}Uy?yT zEBHD0{!_sj?tlKme>3r)MSo6%{uIq4`0v#Jkre%`;C{!?aJ!_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.ods b/pandas/tests/io/data/excel/test3.ods new file mode 100644 index 0000000000000000000000000000000000000000..dc78781caa6e9a2e8d8d3ba8afae8815d155266a GIT binary patch literal 2889 zcmZ{m2Q*yU8pnrWh!$5TqJ;?&y+^blIzx~!O2(LIqXc7=h!PSlN^V4#8RhC+E&9C( zqDF6HWTN*Vqa-5S;jQ=bUhcd5ti8@T`<(x`*V*U$?e7mU08-EZ{?+&akw~Q&anj9M zrzUlG7k7kjfENPh<%M*C!+c#lJtX`*9K}6hJ}y4uo?ZwKM^8A)9pT|C?&IZ+fI0d& zBM`n2QcM;A=sW^HVE$o#s|OGOFd({iha6}(^Rd*eE*I&bOWFzl{FZ@;gYpkt(rooBB}ADH9tQ|uZ>pt-yvF;!|w zp4w7YdHfjDAxlS|B`~?Cz>Z^56~!1a1f7NR(}oxf`jymG+b$otBT)xk2HTbkqn1ui zw89dzX}I-A@&ZoRt`_tDz63as?zi&#I)=%u)wE?P7 z(S+^+%~YQxHozcS@(_((*Q48`dJqIsSiQt{j8WR`@VTq=Yj$jk)g3&6Rf@nOpdIhc z%g98jzOnNQn{)D}(*=w2xT-H@E7B_?Bk}9aOPA|Fe(VinqC`d+)Z%D4)1v~qg!V>y zw3sxv_3lgt^EIuJdf3zZ4Ks_NuOs1;$gGQ86Hq7vnr{{Gu5Q?H`zO@vgtk zLGu-|Ew4XBY$qT2+WPb~tgFN|b=TW8OHn)6UMX_VRUvOE<@9wr@~yDtbzS1BEKhu> zRn>DsN@_jaOh#PmNgPwN_Ozhhl%0*O?RJmFSRom+M_8SO)#GSscx_-f51c}0m#^2P zx%M3>xv8qLxnW6qNA23E0#R8k{lPal$~j3OaPdI13U^zc2y_#DeN z5|(r=7pK%HK3Jhf{oLF-IG@rhf2!I7vP2t4^t|BfL47{V73{0cppg9it+%ejTJ$49 zUJ*}KUT8xg*+zsVs`8HpF-uJ9@~y3n`NN9s#iDykCaeW!qJ^={_4CAo?@iA=xY{uD z=_oN_lT=Hdkh{(%4ZWjTTE0#e`9;j-W;K*H50d3EpRumwL$5sp z6RORzI%2_6{}unhbSO?@pZoXJ<+a`05-N{;U&wM*f8biD1P8e;3r8N^VW-+QvUn9t zAnY9~;6C;36Nx^Y=vXWy^SR=v6{l%tG9SPBc=ARDDy_2E4ShdFdU0&D`naC`<+!v3 z-=4mIqm`?sLh7p~2RU8G9AcA0h7ca(t*6P!$Y9vG1+L^tF+CNIF8tW|ML&PaD$HoN zssf}dy10faznZDqu$KYvAeU}P zw(C0AG=2fmZ{-a}L1L#*5@yH)z;D@`*WK)DH?1R!46agLdzi!#Tjt&ZsB`tLtyVux zyex|o$YS(bZf9y{$Md##1k|@Q1=)q@zt))mIV?q3`S}@-+U2^SPM=;WH@pBQ!gc`; zA+4~=6Xz}@iXlqguEzX@NY@NQjA)w}cx!D5zr0s7C3`;_8^qx{oxY#p3@(ZnDy;Xq zZN>u2Np0UK+flUFEm3-RT!-!Qz7^Hmp9W_&8xMx&2=A8_k=($#snwTsrbSOVfjvS{+x zA{XPVKfSNJzL^v&ZwP)~{Ywn99-2>wj!mi|ngARggQ#it-)jhawAzim ziN#d=DAwLeu?el>n{wnI7$IAaq6Cfsb@(4Qh6HvCXX6yX4ikUE#QUc~`F38Mfs6FY zncNHNc~!u96Hv)Qb0-clXnIRCqKy+r&7MDfLcXG~v=bWAuKl|FTleSi3Q9794$Rmc zChK$Ol*{glTqnbzf(CW7_>TKkrB12w9Pf2<&$y$v37-u<)TciK9Q_%={j6_4&GK}AB7;8qru_&z_?hZrmG#)Rg9AboQ%a2$eTo1lOUROvvS3D2*CltW1IONkY~RdS?-e@Co>aj$tIDzkYesc7 z(GcTc6vhGW_}|(kN~o$jgu!byH6Ju06Bu_b+31hqI8*(z z?faoHCtdz9fj{ehE=)gk>!fn^UuEiN;m_Rs5Jph_14TcF_&MKxgh(KzAIS=zXCK6X S5_I;2l63HqvXlATKmP^ORQ{9z literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.xls b/pandas/tests/io/data/excel/test3.xls new file mode 100644 index 0000000000000000000000000000000000000000..f73943d6779517e5448aec76fe1b7cfd526f37ae GIT binary patch literal 23040 zcmeHPYiu0V6+ScGwa0NDcH%fs?6ni;;n*SBbwWZOo5VbY^C$!*70_g3uM;cAj+`}3 zB%&Bv{uD~Vra>icDW#|nDDM(bX%i#_t>m;7C8DGeg(^ir44tNpQ9^@{x^1gI3qcM) zNoymsDGDCca7q8-hB?i1<}6utZA=F$v4YuMCUAo!>&`g}5bD{`+}|2F+6KJ*{^&>!}pAN8R>;zNJPhyHCJ z`mH{6KY9G{pAa}TP>b+GTMysypuXs`+>9&0>e>NojW!i$%I*h`EoaJshs$2ANIeHAIlk({Af+~_#ej?6= zJhpx;|F4qtmuV+XXG)w2l5WfSU{-HgoRO-knu;3e`5r-s05-m*XG{O3=o7v4Tqesa za!!@>moW&WwYDCnNV-x!3c83h4m;jxRMU;DP&*~*rKfJGJ7l!bAs3ws=rjTGzXWGI z02O@<`i+U`t@Zp0))=*j3f5q-hzcx7Q6fc(t-vZPiUaqJMO9$q7FB_bQd9+Q7DZLy z_EJ;@Zm307;K)=|1#V(RRp2&UR0VF7ffd#(?SZ3ciGO>Dh=!8xVwaP{szZhf!Xc#q z5Oza<5RTggfUs-&gK*F&0EFGuAEdf45O!gI5DsevNWyOI55j%=m>{F=!PdbPq!5r` z>r5{UB-lDL3IhqY4mzwt9w*p3vkC(VwhrcQ4oPs~?n|t;6Hf$N2XnhZK!UAPTNp^N zbB`D70~@IanW$z(E5C?`z{#WV|=NDAeo z<&R?dr9$AiKpiJH7h9pF*b0k_t$^j9>@%>n*?=y^<_}AjpqSNras|vT+!dscz3_LZ zkEvsur8Wqgle3E}`wkO>!$`ak5HlGJk|+d3?QT@NwgM#Q2zif_@6O9mA=k!|$s4?t zXpVl%lnwRMZ)2)amfNcEZ1}Q4n}eDyRK9y$-~t&*`?!bs_J{*SZS5;;^{_7U=9ALL zOmox*k?MK;@yGL8yOSncJJWzIl5FixTE5LzqO8+qNpG8Z#l_@j7J@E>P&v=b(^P)U zTIH&sx~g`_xLWBBR-;11e=fs%SDUJ{`O5AfUy?PpBZJ_c6>)IU@D{KvlI}Y5<~wGT z&sj}i*T`!V%%)0Z6VGSU5WvO?M1oB;-F5EFAH3O2R@o%-*~|-IV}&ZgrZnC4_B*e6 zvzel@Y0YQT6u^dqSypLf>8`VX{Hr&cYL(5hd^U>%*l-BUvN6(Kr~md3Z#Gj^Hp}zb z!~@t^K}{&FJl*x-UtaZQQ^RapgFUSdY+4KOw9)pbN6&h*nWnPw@U%GrY@D7p+WzL# z$Gq80SJ`-Y+WY`EPEQ+c|NYr#z1hrA*?4%`q5w8dPaAC?J#pHb%}kYzho_w%z{crm zqwU9Dd&`^6ER~IirzHZ|I6ZB&{ex3adb63$Y?cRmS`2KK7vO2}#5bRK#bUz&4atsa z=c#NwJZ)|O8>gqm6Q_^d<;|v6W#i##3j)|UJuRMi{*|NNZ0b}t9-h`5z{crm@x)`_ zebJjuOl9NYX-fjwI6W<%7&>{zn@zpS#>3NE1K2n{EuMJ)_&2=RjK`~M%v>&PWu zpaw1AVcCXM=a8||ZFy~$Swb5yaqF~u;3$v2uqeQzBR=PqgKQ+J#Lfh);kTuFItF@D zneM*cA>$@Dpk8BEkhR&Ug9fn8CxxL`H}zo%#v^9#dl{4m#*zWADhLLpgf~RE$rDcv zg`HQ1>#E#!HLOpGfkGk{8|c-&rUOJ!Y2sjMq0m31|&M`+Pti8*A%+@eHFz@!Z-xgMVsSP^mA=D|{w zU7(aL#s0-y*%)1ep5QvWPRn2fb>=B`BBp^l1K4TRFUGzdbkMG`c(4pL7ArN%%<*|O z+`Oh@ZwTIGTzp_(PpUVS>FYlj+nPR*88R+)L+Q)RO1hX1V7ndLzE{}44}u(o&bLCG z1Mml)rbz3vv9s<~j%ULJ{nEZZIO-}lt~S$*(4|mXpW>>#wji;5&Kl| zDi3fg{`r8L$sZi&F+77OoZyIoD)?j%a4Qb_fLlWYU4XYb!4V6UfKW2U1DxZbWwA1+ zT88y;v;OYG$KrmER`x;JjlhtzU zSp>wUzD#<^*yYC1$DzG2Mg~F=o=-+zUMGn%L>z}fZi;0kZ*Hm$ma;@vN;oSl2}(#66;DzVqI~-yl^lI-OY825RJkRTky>N z_Qao6#;&@GU3F`BrU%+u>aJBX3Fg@f@5sP#tgn1;*BD~8fauGx2PkU~B{KvBE8Mbv zL^zdeeS!5ZHQfK;{b!A5C^KH4x(5v~jNW6^oHLRzfMV4}=Pn{em32=khX{1s99tKv8h zrMpCWGszoP<@o{|<*npG$N7D{r4@)8IB8HJYat=h?~?L6Ln9brjO4^X4R>12osURY zLkbi`AI6EF(K3#qW9%T#n^~zKP9mmR#>>$@6uK2#uTUte8=}NoYSLZ-?gr4R;Ye$- zO!pMO%cFIC$fjFisasuA>e$vhiAtmxx)5l$F^zURib2UdS1!3I$gY<3lGB&q$e$mZ z@&ig+%5RM7Kc-Ek4zQ1mtspMf<%qC=j}@#kmh}l1_{3H?i}6%wh0oFo4zvDGpMHeS zu2ghIZZX-}UccqubMI~5Q~BVX1}&QZ#H-vZJ&Me4@%jEbWR9wrA#+sNi98wk05bO= zcOYYp4zpmQDab!W=74e*m3U7GM+WDQif}n5ePee=f8YMTu1xIW10CreD~dI(cxLnY zC$)8`KX~2l#xLL;W9=}78VEHIY9Q1=sDV%ep$0+?gc=An5NaUQK&XLG16d6O&HwdN zCr^zmo>e(~H|GD%?>)@(|L2i;uFvy-p7-<5ceWt&&(pRe^PGMsGSBz-AoHs6K4e}8 zkm~~X;asi*So8FWqCUvJ-?aedvb^?SEtKa@KD$Vcoc^=O@l=!b)c&=ZC%`!}4ER@IO-U zA4>4lybd{rT#wB6=iqxTaw9VT55Ron1;`7L`TmE<{2iuGR|8jKlOpfCjA7$B|I(8; zn-`O6C@S`Vu~h6wF@ujpC&ZC1vA*0#wxgtM1NVOXs9=|nW3MC^{EdHrwfVCQg;w02 z-FFwItHhQ~{xtzVJMX#f5;8F*_wx%s+y&szZHU`Xq5SzjVnpM?SClRSH~zWD_1Hkl Q7AQ`h6(8gI8yx)q1OB=L6aWAK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.xlsb b/pandas/tests/io/data/excel/test3.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..617d27630e8a06a5eca901b92f91e896fbb4ca43 GIT binary patch literal 7553 zcmeHsg2LK3AMttLf8hJfbQV;t<|=xfCv4OEyebB6Z&n=p>3{WY4DZ9^l`GHj;DzDLaQ6et;EkFgX!CK?53~-+FQ+t`~|=r zhMpB32#a$o35BXeXH1N0HgWGnJ%B zv_8Lm!vyI6qFkkzm2MQ8j@oEp5}=i9v% zT0QyXp^jUCXl8yq58@$Nf_Ts&l8e~oQD0!3rJ~Om&UOw)9<|I5jhDWZQ0A?Q@i&OE zFX=X&y1F~sQk5s)tvvjOD}`6WC(bH7vN3%9jv?c%)hN(cyRA4`b-mAC$s=uj3BtcA5sbQ^l`KI z^mOC5zwYsuY%tIZ9a?Sw+gpWU`*+&3eoyo}qFeH~1;x$U>dlHrQEAV05YRIF`q5XM zJTd-bQCb37d6!?I_LmPo6zz0jup!!cx$5Y#LdfA8blBCC<3pIZ_FI+PF>2UsWbpot z@Q${mtcmJ@n)L zoi~FB{@FqF8EiBkNYF<#Z~n;`S0|W@t*E^`rnYDP#&ibXksdWR7SML`zjjhL31ZS z$Q9P-*9HwCpvbmT7kN@eia8|H^RdOrR-Su3>fOQBnC0dMVYc}U6xq2m!H4@f zZj&TU;Dhm*&Al6IRpC$j%w|;PaU!VaE%Ijfxu-(0)|Y1X-@I>nhA)G1x&G`0Qegz? zc3pJ?KlH(z$9?;OeWwcNMIxprt3|mBS8FMCf-{8uiIo~E;7a6+_nv{CYW5SaUfU@% zA6uY$H=ARZqSu)=dH6;q=cE{=|VzJDg8A3uctPDSvMc2~3n zah6WO9x&RCvCd zxqcT^YNS4TXJBeWnv1)ac&Zm>%NafH6@(mVurGs{g^^vQq{7tVMk=}05@2U?a4a`v z+Y_6*4%X|sR7+%BdA5}^oQgxJ_k{Lgv`iE6grnZ!K9h7%PEl8xC=`dGr0?A~PBrrF5tcd8UGsgLDw=NZ1GpE}$WpRRl8UY`>;tvW*#)72SojKZi?3L5IOlbTmLAXfz8aQ-ro=@^I#Le{aC~6_>U|}4kI=04(V*5nD z5=TzeIJaM%zOfY4)x)0mZ0=oWM8SJ0%lx1uuG`)NR<_&LGiST811@TD5*@_Tufr}O z=JXS1bZdLn1e3l(pq)aNIW_ExK;4Y%-nseYyL6-W#l}x5*_7?4ZW>^DcxrRN;vN?G zA-DM-qPTKz7)E+{ zqkKrWV5-LO+R4r1PXhG=ggt7MV$Q;{gPeyli{%L4Z}{@v991SelOc$?N5K41+-nbf zM#GOEI+biVn-XO*nui5$%)8FCxt*8O<-U7)3J${7JPW1!8X*_OMT`EvcaF1g@ab-w z$S!{7vK!skkh1y1PdL`mikizsM1FnMS7X;)eS70ZWqDhWp=Tiw-QYT=`z5;=RU~!k+u?73-}f|;k{w;guvDJuTFzMsjFaZ z**-SYvTlp>B-FS<%mjgPm~X#cnw(^=wmJQJ+%2+xc)O$|HSf`tHH-oBHBEKQ)7bDKbh~v7e3U~K6N+M+Wa5VE$-VVg|@=46Z&$@2$hrx2`FMbZ*Sz`5-lBK7eXyB<+L|v zM72#vnXID-t6~MS3GB0v%xS=Fm_05DkhcKK4BzXdjKeEs;rp;tW-o7f|eP;K|t);v1#%KY33J_;y!HJQKaVzBOX~P*7xSE62_Z$JQ<2niQeQ1Y-qQxJ)NUQwh zSt;aLSN_V-SS&MzO^U$at%0AA-h0h{U#U-gsnG#hm0;l0cy_nA1fv<-!+hF#(H&au zgi0$EoDD3gBWfZ$u1`55w#DN#SOdp&t8FpOm@>VrMEKxI;8)|ODM;gsi6%t2;sS(u zDfpDn3Pn$(Q1tAuqC6TXmmC^wq-uI&uI6M7)M~@sNJL0 z)y}1E&g7|jxgOK;6kb3ONmmiUPV~C5*Imu8U(%STK|~XI?01Qtt8Cxid>h5)oR^k$ zlE2#6n!kK0-mr6rvgN4>kLYOt;?0*c*B9&ewB~i!SVXBvRUPHdHpdcB27Y(K!wgpF zm~0EzcpJ6Ys3^0K+YF7nCNJIhaXh(1lMwY$ru6FZkZZjWuW0#JMEgbnZM>8I21OGh ze_<;S^VR64oZmwS>@TyEZRLlw@#d1a_|}!|Nbv{pO(taxts%{$2eTyg?t~RGcT@SO zK-S%EC?@nW_IvGg<&v;yGx|(Ada0-RZK?mc$isA-@jK)}xkK2=AVsNKQUXK`*=ZEk zE0xm}E-sgKf-{KQJ5!$Xc&WffT0h%Bj{s*>0LQe(0X`uVMcyF*FBtn)Qk^CdfR*3+5jCLErfjXJaH^{ z3+@-h*4pcQ|cFUkPqHU*NQAL$fLs?cGk~^J}p1rEg zb@FsIx?tHZQC&gcAkJ(0KGc^Ih?hI2(@(q_v<$s#KWH&OWY^HZ%hTGHEQ>bqXI;iLyhUDj zT&IGW79)=VaoNF5=04gBsVP43%{Lg%E#5!j@ssM3b=Dfa#fcg&%kXZV?|J7z&bC^z z8h-qB&Nw-a z&N?#z^o`t;O|wWhQrGA$Lya{mDF|y-XL~>|Jrd2z-$_|{A2Gv)j=4B`XGHIRc7uP` z@aV?K&L>#I1Lhd?qlO<#w*>&PnX`aGX;C+@MAF2WcshF9dh-cjCBQvn4s<2_Z`TmR z_4Z3b6Q2To2cUlv-_FbHKi+?j*&kbOx`N({ASixE{e+2ivCK#w_i75MtC=^zz4*!I zj6_CHNA2Yi5aRUMpUgj}Bkv1TWCwY4FufeHI9m~$*wY*{-17N20)SKn)=%qv$pq}D=jV+OZsaHx<^tJbVef->r)XKt%9tM=ZyJ$tr zxl23gtNsHE;!vQ8f`g=;Q(ZnX`?i-7sOmAR$N{qp->6An3>zoQ&j zpioRTs?k*`qK8y0K%q+Rn1-3J!oyzUMCtFsXcCkp4|Uyq+16uODA#+g*kWmPNZMcT z1)e{h;u(`mTsK;dwp(TJhRQAmeaQ5D?jwcN`ZCt2@FLdou0?xlau9Tzp|RjIuZq{) z$0L0>EoFgbs@H@b=3ZtN*qx~kp7~C}|8p4iF)ETl?nUn6@cxF3>=umUD^hwu4*e+p zPzeU`GCK4B`}?H7p4eZaZzprtXB=Mkb~Jg2@XR>?c-Z^@w*TQ7C`}$#>*2zcJy*DqUVj(`t(HLL z+lL>*V|^^%S;wTR-LdYhe08oe84gGxnsPX<-pbx}E=S4lOSeM;+8(0MY>A}?X&KHs z7*?L|xCV^#s7zX0w}u3l!OdHF`SZOlM=+L~#>N0c2&i64=mez<_ygqWA@N{FpIBF5 zf!=^n#G>J$8uKAuV{#EV1Peu7V+f)+-z6sU3h zFi6W>0Hs8D6>Hx3Gfgh#C1quqFJz`yPn!P~cbkGTzl>>bczpDmbBq9k0(WcfaJith zr*I4okf?yY^$|eq;KDuy6|H*;bF=nV!-^= z$i>{wm6PMg^<3ccK?Z!%T5;~Y=O(C{G?VMJnYg7o@Sb(f!Cl*3I+ zY3-MVRW=CZd`%k92%JqV+`)PjA*8Z5`G#=7^||GIx!-^t?b#)nj+T{>b>;JH7hyVY zBQv|s6Pb(-{9C_=YMBF?bRfJ7o^kp92RUbfdbOZ$!&%5MWbf@7HX;b_0dIUn|C1pKou zw&Ex}L?3*+LlGp$(UVaG)aUzURKfB{_;`C?cT4s$u+v@fo5T|b*s}=)H!qkUkcQ_M z(#f1QO@@JN#<<*hY+OH54#6-pT)Y0_>ZR6RS#WJb15G-M#m0S;^x}8qx53`j6DOo- zilSKFfQjuXTDFTW+3%>wtv42`-eb01`B#`56eY@3@A<_1!rW-W2jr;(SrT10^4_%Q z9Im-ZT@SHzEFQBDpB`GQ5B5*5z%2bw$cO_Gw}oMFc>}9|aA0r1AoEwSlxnLwE^=YE z;mif%x~Z>^gV3H6t7e$Mh2?Hne=i+yW0y%4RdmqAygtm|_uidR4&Q&SiFc;bE^p)R zT~6qYxLYtmid6*nASaZiK-}!~7I|VoY(R=d>JKl^dD7qrW-^WUHH|}rs~xu5 z@UJl&iA1+KC@h4FWr9@koj2B^Y?5KLIuX98AdC(+?z7?Gqr^@9&PP(n4tYUNzd%+? zEy2!w!0rEJurJoC&I`+6bic};v3m+jY7dfl_YMV8Ks3BBNj`jzJ}e%-6A^QOv8{)0 zo5^xZkUN*K9_`cfB6Ll|rJWTLd>!P`ov}$@(bB=JMBo{qrGO(VXW0m1y0Gu5A8k%S zSMOnngdaM7tGT47DKV(O;mO>M8Xsg$A^Vs(QuY}O4%iiER?c_*2<7^I=t;c)Gg9sF zrLHuvUT5L9)$o&xjpu{s2dURlwP36pA-lBadL(OwO5)wxA(q#=uTsq^iq`0&h$-fH zXD*c$?}I%#gyBhexXgqu0;^wDG-tP`Ik|Lr?WeBa(72nZk$yn)_St-UwU45E!J0qe zN40ucc}}&Sd#n&hfhk;NId@C4`p8Gwjq|?90VI{#FyhwV4 zVC*!=IyuZAG4mE>>x59xOwZ!f_BG-Y;x4*3h#7)e3@5UnH%!i;CN+mVRiCP+`O;60 z_YF;tk2WsE-E_fAS6LgMBPxa{lWh+w=fi`!)U}_Fq{lWKAN~?%2GKGm8L%g{u#*6G z!eHiVW#VFPrsd{h?O^HpBQSwRCDFUNa6{MHRw=vp_O*%W5(_Z8@TU-7dejD4X!2@J zERqv*JfezU@i+u!5EDkeq1?+L!u*!dKY)YMaMvsn1jG^z6XF%RNRZfSAZN=f4A7-e zc|T10I4n{#fswPZ`x8Bt)L-kH%a2NNCa@H7b-0Ei>|v1a&fFm6`n_U4?wUAcdZWlwiJnp}XOo%rez zrmN?Tt%V)uH)LNs%f&rL+TDWn;yUyA>hxEngDu>&8i}U48tJKq=xt{j3&?oe4{mMi zRjsuKIzBRu&e4}`IHWt!Jk;&s$-g{3-Dwk8NdggbLSx^#zSL5CI_XhD%vv6j1SC%~ zTkvqiMu8OWIt3+i=dpL$dj>W6T3_FubL2e)u91j*e6*%tye}s;Qa@xa9&FLeSxKt4 zI{y;Fjpvl1HvENvG6ecM)iy;*BMzu&z$D52x{d!$0?;FDvY@36v|QwrfyzNH2ZE** zR!vBo^oQ$2u3}J4P69)+5YE!h zhecv=Y;8u|d@9MNPW(8xOZV6Sgu9Ia`P8Q&DPl+x`M?p-m%PQ(mNJL&@hURrmfE=k zKA*GqL9=_avx1&}m%K;l$@AwkM z@$@te=Ue~#<9Yn8jhcQYoa}7Ti=Fkgo+Ok<7WFZ4BFt~TQX2VP&%ZSa+`IXc@vRRot&-)rqMEk4lq>f-Xr+q*j`Jw|I%9s;E}1NY{bj^EyrG53h7 zcb8cl&Sc<>DGBZKQwiW){p*A10f6B%8k-*Xs-0GS7StFlFYkc<)n;mlt0X(UQRH)k z#(t9h>Zz|U*j(60dwRTi>Tt5h+?TLk#@U&MGjmHVz8#m)jT|*2F$vjpkHQNo6zuAL zH9kk%mpTt+>^<70B89}%L^?U(7ez-*Kah4YX3(6l4)4WH3yr0Dxo|YP{IF=a*nURL z&x~#@RANdhAmZIx->?;jh8?ev4EHo;d$f!heN%^mym0wAl8jFji|du?NK@H(9l>S^ z(k*&KwAh}n8!%%)J3%^Q^jYugxV@?#E~#%LLvI*f#ofN3mWra=62|AhpyFM@^~Q0Q z80>Rep_@ZJpOatsrK(*|q^Q6T|KJhxsu)PU+|zTD!*kwUScG z0!c4Z!+>O8L`BZ>lTKpXOHkiQsy9Es!4_RH)}lEk_S#r2eNMH{g(8vEI4m=nTOdo^1{86g$yFq3X!>ig6_yJM6XP_`d!z zxU3=p+=ACD#Y!k~XyFJsHk+f|LHMI7351uP@3}UrOQ>1iQw%Ktvt*eon44c)Iz7l^ z6aMP3fw0fVqIy_3OqOjeQ?K~~DM171Wn zw0dUy&OM=chSgba+CMZn=P?^HZ?R(EE)P4Oh$G*-tS`Y5Co+{?jwNI!XT7`NfNf7< zTl1i&X}ud0*QRrps4o7j%|=nRBTl53&9 zgdJZ(`5v+Bqz81YPq!((+wuGgi94Jv@$b;vako9st&CtUznZs9gUghkkiGjRTUX)# zA~H?hfmn{6gZC^5QW{Z$!!^CIiNl2rH_=2qg};#O3!1 zG29l=54kRPE$B2XqVkdK&_#c5^7J@3s$j(~h0L&Q<{Md-#mer|cp5j?iHf+3(FWmx~~ce0Kob^A-j3ong5oF3$*neGq?x>-qzk5UuKb64J4u#r($SA z9MY%M7D^BptG;4XtESm41RioHynO$Ozl`%Dv*(*K(WR@M(!ATyLmEi1g6xDw16Yi+Kh!MOSdZwkIE38-|v z)gxD=EVhuhGbJf2?O;HP>~T_J*g!u=#n8ROh$z^svJciA4ij$Q2yQVA4}HZ>+#mwM zf}-d6L@?Hhns^Sec?~M(mz6^aJAuz;Tg@tT#|AS2n%OgYLeWZ4R+Kt;D{7N(#iyP+ z7e>o-z8mjugR+YO$}bhOQ}wJuD729y+=*l#?23W4kIL4T%L)^tO&#Ofa@jUlnS=^9 zV|!0XurbmaaV^1BOn@-a(653P+HmM(>*1ydYlNWc@SU5x&uzS?Bp(L1;dkIScfc9w z$XlB6Ni6C}C6u^)ehueEpRoKI7KDmjvcZ`LwRzn|lOdT%3Pom8OV?{x;Eqo5DTTV~ zgoFAau9$b6-HI<|kgNK43F9Bh?N+T|Mq{5T`$Tkz5RGv~3#Ie3I6)@(~XmzH6i$4uaF;6BR=&Kt!RY`Pxe%P~*Z z^ed9R)dThz;>=<`(Vj;F!#F2rg%KgTjhRSEwyB6GW=+_MgEmcu#3x+XPdenHzlZVd zW`~G|e$aagH*6JV7EW-;S&tc?oE;#LFX21lPo>@yuSkIW!JPyti6slEX2%_mK#`!L zC~1R{B@uYOe%~lh8<}_a@MJwH^h+XIxmugByOmuiW6)JN!@xWI_2Ze%@n7P#wMqr6 z7q(orMgRav{)5*a>l9Zjb8|OWSS<^$fdLIwObjTHoTiqx=Dfrw4!Ecy#rsOXW`=)N% zz0}!mveHMm>u}Sk)W6iNJky(hGA_Km(bend)r=w*^NI|gHIlRC1Uop7P-E9FsTi#} z=`nvwKYqUZWTr{t$p*7h)V+W8yD5Vf2z)Toq_f?z5`{HL7AKdZb@cwI__bGpmt%Xx zzR|#k5c4HP>?7SP$(BycQ^ikwmI1&lO9J^!3&^aQL^OBavzk}sppQ4+-&1R-xXTCk zPBxgV?|x}{+4sZksxZrY!=e=sMrxQjnrgT>I=OP1I=Yzu&d>kLWx%E`CecW_hl}9D z3Q~vkpl?QDjkMHaP@I?$ZdTyYLq<>iDn)6$+5N?gdi^q$uKxAR>tUfZvzoTjU`i7G z8CH3QsKDuH;DmgHryw>CPM&g>3aq6Jy%6UIbDc@a5Q8nN z-ONG~{dS~5yLHN;=-mZ{&6^3kN{gEidjONO=m$Is{1NUYcxf@*2_VbGi>bGTUjf0U z#S+0TL`e~=jui;>`xOXTJT60%l(mO$F(MWi%h4gej>BDZAk`_AW)ks7T@}ME{pB`K z-+Mfv(0GNW)&#d{qct*LPS7QNdq7Mm8WJ?rj2Vx)&% zm_en_75LXHbKyQ0SgID9=2kOAl)l=VRj+^5yW{aSmYUO0jNt)QeA%dQgjFlsoL_1U zX6E-4=kVbDA!<(}-LL`!NU5hrB|eRBGWDYf)l)b+(oS|=LH4K81Vown4mmo4%Jyl( zb@63+$$bdt?)IS4)&To1V8{3g3bs7_6G?Ysjxakn1GiJdPeZ%eIOiKI48|<&p~kV` z_wq0FyrAl8lm`L)-RI@gP>Tx=#gk_fe(_WABC<}4|e4`r=x z$=rYrkUVow*<_uTx9uO>krU=B$v4{FXyWeHj#&97nddLTy|39^kqf-Py%^lM8D_sv zF8ZWq6KvE`bA#~*gyG;H!q}EyZ*~4*vOoG?Yp#BuLgiYA52&tbn{LK6OsR4xfOBU#7 z1wR+ve=4}Z`_Cu*w*>!L^ym8LPtjt+{|@~h)zHsc{+>(!)BynG5d#4K$gDq$|2+); hRh$tKGS zzPs!C`wQN^^Ld^-@tpISd(Zd8{T>Zv6jWjWIsg*@0MG+W_A;#ukpO^rQ~-bofQhUp z>*(NS?%-ymL7hRqGY+_0&esUwD7>jhkE9b^;O|aamv{D&`6^KOg`IpT3J_vBR zw+W|2w^*!yp>stIvIGHGP#v&BZ8Xk0c6 zgRG3W^|GXb=%Ev{01exyI+Ah$hcW-2`=C6Pp@S|u*cJ0C5mnDJrCD=UT)deY8G}97 zNHhx)E5m7hc5IK~>P{P`{MH{e$Dafb}SKnFdOf7tFy|CYG# z&PTXj@ZMp{R-nKQIskBcivrO2n_Jdv^D&+vu%?3WI&6en8o8L;x$^S-xc}>p|HD4` zm!+2_sH%7K;e_r#ybkL>n}Wp?$S8YCKWw4b3V5!#h+iL*$3zKhXQd+0q6kJ&2y6|w z9$bJ)#C+|i|MnSN7DY%b&e-5t9-4aT?2f^~;*#>fxoouu=rMIV_3gESk~f=2YaB=E zyW$+>fn~-ApAO}!3C4JyP~0IZpb90q_c}(72rd8HqfAp(xrdLS75gTP&okWv1e|l@W!TZ) z#>X6}V_#sQ7YsOArsQZgcsi}ji{cpSy+6|DaVCWK8HMPyle0LI5Y>msZeJYDapZUe z1=D1=oCd0ZMnt&tb-Q#I8!ye3s7W%RpDT-4bp4!-nLI47kU{RGX(AMCGs@>KVB`9c zdJut`p}Mu_moFagltb$p8|l*6E#S9JvJ2nP-$1=-$B)VHC`#aZ1I9Ne={e6k<-cJb zy}!0l^&YkD%)7*aQsO!+WKVE~=e};y-d}Z-xf*0|UpV3#I@z~a z8|a%_Mp*ivkdXwUZr(+}VH^R2AkjY}>?LeW7J=W0K1*L!<-V0ZM zWVhO$rE3ybW~U(G<}F4+KFQFoH0974){tb_R(SLt_NFeDZ3g>y;+)y|wJ4wN=OL>a zE^QnI(5oPiuJm=r%9eIEC1THj#}D!4A6UKvv!2^`H;gnVW2tvDMIa9zz0q9M)s!00 zgL|@dVa5enQ^`LfjgU8B$A`G$&phy5JHWWQ9efh!Z$PdUw%C~p(d{VQv>JMH4mTM% z+e^8Mtb^cQi`k``=#s4#DoJ)}1zTR}yi75tDq3ZTB&C`aoW4+2yoGx5+(jl6;4>3D z53G4v*__pu>g3YywVSebP3LZ`M*iWBx6k^U%UukebB?@mKbn<`sxz9koTG<$rmhp<;GN5sNNjNjaNsX8%h?2VoTSto`1qh@@^*f<91 zn(11c*uFx2LfXmj8Z})slj&F<{F>Dn+@$7^tLjtTG*|Y?@wTz)(Em9dMt*<&{sYX?i$ zAAt!xB8}C>M-Z~wxP_!lN$8LR0YZ_yQ#<&wZM>WsiMuIw>gP zHT6z93C>}BUq3!Z<4v=85D-@)R7_CpJYH&}k&-jFFhGY&rDTZwQD}r_JPU7+d(%b& z6q155sL2*B0JKg!{MewNLND-&zOt+y+X8;Bzjf5uNZ6{qoLDS>N)p(sdusH{#Y90J zo+cuylvs)%M}*%N<7Q=UZ_fMMo&U#Lc67#~C`E|dNH6c>xO!gOTG;U(qWjueF6^+- zZ|AQS*PF-Hq`f2`XyK>VNHERONJ}xqYCYAMMT4zyANcB8wSlKp%y*7Rpe?bb1w?`37yi_hQ1I}2g6>a z*d{A!!~zxdS*7`3wFlVUP; zyIrED9uhsCA=?@~XcAT}pwo?%M&6Wh$WlZku77<1%4zD^ka#q{tK~;}QUCGB*Q>!oet8pH-nb~0Qb6W`Rf^`%7^hBR3m~(u%~;UV zQLA)b3MH>bJZ|Yuf~?9@24T2v^j?)40B|S&VSEoTiaaYDXyzQwCiXC*R=E(!w1mSP zE$dnUA=mnPV00tlULXN&;G6JKHtcv&DWoWgM_XXVGQSu0Y-);*_l^JU(HzkRyta=O zKPyY(d~0pBI}zi)MMHF~INR&5)JA?+b8n0ScdifD(n;gms=hSE-kyf32V7jZiW9Kt zd97Zg#s&Ibo?kq9b8`b@#D1Tci$X2Q#J@hO?YFb|jDJ|wyVEQdeg}W7FXmUQ0?5>xl!%gL5^~CF?Xy38Iqwekax&hPswc=&d zM+|yi#qLyh^T`|z558u48TA0&je0sexXnWrkVIvpE?jA~)?K zEv9fk0d?-`_tCE}BzZPbjSB?mu0qrLn8Y5>t4B27s!i6Gxam%cvsbt2iWlYk5$)Y) zTe$~TuQ2tpoW`CN{~l1ZymxEZF48)*Q8^!?5gbZd>R#^=C&B|`eKFx*o$ZxcIwc-< z1g4S7n}-y)!fiN3qE-Z!*~s2mM{3csntx_w$o-Bv@+`tgn{u3eOjW%}(3A>tQ=(%Z zamc=4bITWZHY>VN4Jmc;OpCx!$hE3GFZGzU567VfM;goyzMr2_n{x10l=}9am8q^% zh0q;4qPGT^d*I15IXoI7Kh%}WKEf+oAN_d{BxMf*J-shh_0^`OEj1IhqY&$nM2hnu z9$LzHa;MnLLq`{uDM0`U(GU(B11v#bL`{8^N4fqT0QK}&h@H5mh_%e)1JH= zR-Yx~OAFrXZMGpUcy5?il9+ykS|K_=P}H3*Vxc1AhJ(^77cA#oIxR)8r0c>RpWGNj zg$lzF$Wl3YxM@72n_pC`r*W>rS*EUiHiI#gWrw%A6xZ7~ij-L-LXiJzxmXD!_D&c| zw$1toe-P0~ay;n5vxE;`Q%cKTLNz!K%#>%VWNUt9>BOANdH1UW9A%g1ji9I-ECl1R z&a+$hse3J2%462KI+H50+GbOAQCsPpM--*G;S^;8TaX2GLn{N@x9;)9(;UtZru;*o z*^fBU1&bAXw*|O_#2tm+W`2Q69m`d9I+juxpZ07+0yaJG+L{MFt>D!DT&hlcIdjLu z38Nb`0; z008&*gzV;RXZ~9z&ezg&Oy?sGcvE-#>>`uGsy_jS7B8J5#dqvUVo4 zh;An(COFm^CbrHEc6k1JwLMgGDD-X{9NJ4#V4NH=SEAjzVNr-gR+VN$}beN zQgp3?skG3;-AUw`x9>r;4$4=T$_o>sOdVreb2!&mSjFA?+QKlMm}_;`bT``J&(Qb}7D)L$B`J z2F2ZfuwA{36NPuG>?cJv9w2>5MRO5gd0xAWj+7`Um2E#1^5v_@3#8h+`bq?sc%t{` z44g+FWfOu*tvMIFFDyfOk60mxkY3A5-fP7cJce%4i&0OGv`ey`l|8O#(u`suiSGL% zL-@z1h2g&6@BM25g!PNssyPp0q!R`W7m*ofRw*@6<_-L33GSb{FYnEKxmtsPCTkNpy_@2gdCdl1J}YZL&0>_2$@aZYiy zGB9@iwDN)rigO4C|KlzfD>x>F#Ar6cg4l$4)$kCpvY~e?FYU^9P3mdt) z!h2AbTNrlo9E|eCa^1gAt1)i6it#*qeCmC_%V-DtP93Gq&2b-WN?}wcsUi`bdh!}4 zXj!<@NSpQKiVs=t0V=7DWUpwl45|<`nZElDmH*bb9y$AIFlHjgT!HL#xN24)0mP<{ zRA$SltD|iO%cB^NL|>ndCOR4%NOnpLNz>--)Y&=<+fvHO#2mND`ywVSR$JmnM&&a{ zG4cKB`$QY_qBZg*8#wh}*O_U<{PhH>H0odK zmko61o{Zhygm?BhdNpI*i+)K##1X;Ua*PMf1!-*CB^KXlPJASs+((q>K9OOZa13Wt zioEr&c{{1!Qh*Fan{c)}Qlhd3%j4(pydSweD1PM?@8#GQzH8L~A=rFT5${0fQo5xB z=S1<7kYxZc(~?*r!=hluOe%^$*P!-g1^DB&_qUW<8vcrbonttw_02CWpV)Y)s)De* zHzHaA5u}Ehqp60AqmwJIsiTYe?{xgXTn1w6q7#giyZMMeETgr{4)~@Q*2>E)1jXJH zBghOq;AZjEt5#GZoY`GS_s}a>>Fis}xEc~mUHg<-7R*hiXRS>~_<;@Zp}u&sSxSA# z+bafn`xRl?G!|AJMpPjjE7{vla7 zK`an#yU2IbIsH6QbhHY!57iaNv!jYUd^4QbPfGYr;T{syv{)MILXsH1;#i5oxLb+R zD&R6WxJ$XbMdmq7a!rS~msyT(y3p`Qe9QFw8nvBgkLIexxS-j)Q9%4th6t%kR2(ynj-U7Z-%-r{PoVu4%~9SPQ30 zR#chbQ#3PEEhr+*QMfZp#TBjGo;D0Cm?8!ZluQVa&c2`V#rD7;pcgY-GyQnk=Dr-muK z?B&jkdc(_}Esw7;w7iD*n3!qe%17>oTfOI;^-HP6$@rG+90uJTr1dn?3C-6pDD%{) zBBJw6qP-ukdV<72-od3K%Jo#1m?R_5AzNEi**^7deO!5NQZLGxyFIwIKNG8^L((AjmkiDo_F_^isA$JYlL-Wiz;gok;+O&UUM+wSN zQg~-~tx2$5H*DpbXr8x-R8qUX{2=i5`}qL;dWh>bspylM4b-T;_8R*So(~C`8$q`G zy0Q6($^Mvs(dN`p{=0&|w_yGd{4u8@{NgVSnV$v!PD}q)@I8V>`Tt4kpY{CARsN}I zAJM!0iMjk)_-6*{PhkjR!+ym_{jA|<+UHLVj5xm}fqqu-bItvyf(^p|{DuEk;6IE0 zT>ktinhN^w(*IEm{jBBhsq{}B0KiL90N@{q^=I+Fhrz#!6C;?g|BC+}6g8C55Y`0% Qun@23h}=Cx_2bk30jZ|Q(f|Me literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.ods b/pandas/tests/io/data/excel/test4.ods new file mode 100644 index 0000000000000000000000000000000000000000..c73a20d8b05621dfdd301f16a975576ad6116d21 GIT binary patch literal 2992 zcmZ{m2{c=28^>b_jcB!YHL8{>MWSRZliEgwN)eQ*JtBxogjkBA4IS%PTGTe^)KY6v zTNS0Wwo+>Ah$WOpNXnGd*6J6X?>jxy`R0Akd+vMhd;ZTo=icY{Jbx6Di<=Md*N_3+ z^3_Rzux}35A@;)K@YujmA{I>~`rEAcRB= zz@j}!-dJoPiXBrJ!1FbN48Z5v)BB@b0DvnP062IPDKOL*OM+a(`*zz65-8e7+r{35 z9JSw6LKDodUmhiQP?><_AW` znZY7&RFI+-0+=QxWE9>0c`sx?Mjc>Wlp!h7a|ddj6Nr(7+I zAfEY{>ca#b@mFSJf>%qf-ZbHJw;qU3ShhL+q5;VS>TSAgp-dP2mIoQE#zq`r@`a0+ zS6)~MpX=67j*bR8L2FAcwA^>0P?Bs9NRr2`6ivvBzsuvCl)dtMv&+NS)h@&Gdk^vp z3c#htXq40yM#&X_ukkc;fKP0B;tEP+#O@^qIX=q=_g%N#UcT%$u4i|H4xPI5c~-oR z*FIShEPTN`@;t4V>cSZwe$ji@>KSWivBIU>Aj+)~LyX?<&TB~Mv}ipNq$VK|<@n?C zol7y-k#XWWz0L-viyT#-^`9E#dVEPoExk|#&mj~76!@hc^usmP?xRD}%-haxoyC|y zmWfVseQ3;d1_zWMVV|G z>I?Do^8HcANsj;4Cu={ggi%L#4!V=uV7t-Vaes$D>#VA@5fQGBV*-8P@^g3hlFWl6 zg(XG1L;3kE#X3}`epype_?R`O`?l4vkeLNkbBqa1zePtkf3PU~^)_@kBQq~3f6OJ_ z^@x?NE@Pu?lPk6LFTE6#%PUiOdb{{iIy2q1~c4s55d1o=- z0d%bfcDODAEwa+(UQ@SH<|Ol?|9oWB_2mLHPSYpL>w=I3TIQt)=_jy>9!T>rC@XQa zP2sUlwOF(2{Fn-u7T@611QLMJw#|n;NykcR9lSH&6kUpr5U)K><&*%8st6qUWIJWX zh1^djkDkb1`mnwg}T~B33rHlWT2JR@9Dsd~H z9Pq!_91W)DEqvg<0zGxUeGQz`GHgHOoDCm{;yrYd*1}pF7f#qISlPr(3qMv>oUVuw zSe4w;=;Yi$T>C8d5zHhHBc_H4dF9wfWsR-|GadSfWBxCI5-Ibz$+Nl~4<*Mi)pRr9 z+{l9X@lRD4Z+KZKey{q;_Ck+qqdz5*4fdBWT!6o$U#K=G9mPhUb{GI)(+|M-5dsft z1)Kfp%bf@8w}~z5i??Vb!JuEluSUW1!2*Qyh>-4w9%Hl|y_bRF@-{rYbz;`7J42~s zQBUQB0YgAhbX<&7ZMSyA`>nCbsqCq16eEAR$IlCO+gvD4`A1@TRL%wpV4!fDwmwhm zl{e%Zu$R2CphBEdTPI)2?$19B)&gG(&N)qJ+OTflmb)xDXi2m3=b`k^aCEF`D#7mG zW(oFHiuhBF&NuZt+MDwB%Os1%3|rG{Zs0~a8W;xRWNu|edKeA#BO*U;(nH%Dl(wi- z8hEDO^XP32pT|$8>uO9?GCPjlY$2B38&P3g&96e`nNHm}M{yIf$=VL#&f!@x>6L(M zkp+Zw8c)LlgOAuMU+fdH5j~W9PF_{YT&mXXMi}*GoWQG;KN3pMCf~{$^c%jVGu1Js zEYp?LY8cz^FM9q=_{n{b8u*YS!#Vi}C77o;`CbaqDrd%$7-^F%ur24bNpDF}eAw-f zx^5)@ZcNNmBZLJl-ug@lzgV4-^ZWIstB^pE?9G(6%2`Y`ICNJ}^0C_E!aM8wg#2Q2 zxqVEwgvjwAFNIfIDX$6&)>W0_$1f@PnKC(d!doFxHvSQxO9O&=X{+#=*E>p2UFTVX zuKmFWH2#y**nWXO->~@<<^%vl*jIDoONOc!ED%sUnt=1flD-mKnQG|6b(Y_F>(*@N zHT~d6$_n|6F!D#hvk4fFqo$mB@3fu;`gX9D={@)b(Y1)sDDAtI7TTopMb=QO_bp+c zi<@*-&mLvHTm+1YaRRluq_NP~Bd5d)VKRvnzVQp)O%tnFh08aE;AApGzIiIu{V>u{ zaEmnOJo8oxUYum)ntQoHV!D3u94z(zq}^N(5JNVcnAkA9ssl`oOxCr6xpKS=c{r~r zcV166p)p1B%0!ZH*ox~I*{gPvrf*<$if*#6(NNeCZ5qBeT_q0vza=Yc^=`K{2LN!L z{VBdWAZ#G|pk5_en_G6B6mQSnmOlPo*xuRs0_C~5Zk9MSQhxSO1Uz0?v@m#Y>mw-} z9=aIO?sB5HDHdU(Pi?I$F;FQ{afxM1SarBS z&B(xWDNuRHabX&7ceq6!1pVSTpFu;wsG+IT?>kilBZuB$HenK#`xPgeX;WsR%gQmj zr#t{a!#ta@s>ghANkEG<6sRlke?o`Pi_ zc@cN|!D@CGXenb6%rwGD%}7mro_-tu zbD}8Zf$HDg2{3>7o literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xls b/pandas/tests/io/data/excel/test4.xls new file mode 100644 index 0000000000000000000000000000000000000000..10a6ab1cca6a4d8e7bbfef77d8bfa7d9caf7b2bc GIT binary patch literal 25600 zcmeHQcU%<7^6y>JN=6U`6qbyVqX7^V6$2t7Cn_ew5@ZDh%yOW@sb@m)OkkD+6Grq@ zR20K;tfzpeD40DDv)*|qC%3<2 zlT@9%6vdeslUk_3BAICFOyaG=@Fih{OJYelNrdx%q{No5a*IX}l+<<%29z#Fbux(*JY%Bx)e^O!FZmd#J)IVrWVK80t0la?TB83~N-&6z zu(b^7axjRSu!K~7U|Zb2%B9m+F;u^T!6aRUC5S9ZSwb!{N)XaXCWf&hnlIGQFS`Q6Z+{1s$B|ct9~$w1t6WC0SAm zDplN<^dkKUnaXbF*h?VkRJNxGsaaleqM5xJMHn!ZZLH`(dcq&Th}Q4L7OmKeEmmVB zu_Hqz8G}&bY?>h?=m6&9(<*=mv4s)WDuA$jiwvyNH8;2 zG$eVX3)~1!oXvqd%|AoKxJewit4Dq$T5xxCJlqqUqN0F4ndC#u7@@?19AMiM7cv<< zIh&XR(H#Dqhyx@iG8NS2NkT~w;gCqGj1A=E*O>%$9uhqmA%uM7gWa?oC2XXFYZB1^ z6yA_eN4G@b(8|lABdWCW|DWj$9G}92DbS}9Y7+W6NRQsE!LGd(?Au$xp}iIQwYP$&SOpw^ zqE3L3!Wu~P8ZajYe<2*x^a@N?vpd0oDV%;vg1EE=gzqCC#I-FTF0SGrbSLNrAgxv+ z(Hu$9om#k}cLRUS7gZQj5!#P8uDtz~FT4FVVj#IhSvm;+syvY1;vgIulJH;Vib%4K z)}8JEqC#~iO6}t_M}{Q4%`G4*%}q>AP2~$C5bSmYKpMOG=p;geThyT{g_gpi3)L6O ziVEc;U8oNqK1c}VHIA+1>^A{0uLCMgs|qas2zrpQpYG@<@|nxclw`|#wX}?~f5=HS>AbwW77(Eg{QJ~RADG@Cg5sZ9uXW zeV~}YXs{I$(;4Bw|5j>8-6mI}lTC3hNu% zc=Un_iY@L{q7|rjY@Vo76Lq=5@hIuH%%E1Q--e82C)0r?L2zQ+3L+sL>R5qrqC};W z1c7P!*Fkvx@NA+MpBpAQ0w!9fapEAH-M;1TxZfA|J%EEg?j`Q_;Z7oFOm6Zs(Up5W^sJb(EhreYwFk**U_kZfjWSo?5x=R(;TM4hN8a-!;9Ht?13ntIw0 zz9p_rAR8Sz8y877W^&jFVi1ar5+~}*(_7Nnbf&X$m1JWjhm9agq1Y&MqF%p#EuD=n zosF9$n_hC*U>wt|G!;%1{{v4t8$CK3cS$x5a@b(F(9DL#iK=<@K{}f*bT%H6Y+U59 z5lm|-rFG&&J$`#vIvagt<0jwIOn{AB8!V0IfA;np>1?{v*+{W8b2)58Esf{@sr;aH zHr?oKq*$7@95$ks#`C{-@1Arv26Q%3EX`I98&ON+`SX9&c=w&MvA4m%3&jFX*~Zc506V{(;eA($hR~$u<>YvrMbA~T>MSI21XNZGHLN? zJ?LztSek_#Hlmj1;`-;|bm?r2>1?D}nvEPbqL$|3T2@vjos9{djTB3>lfy>T(p+5m zHBr*pu<2~1Sela@Hlmj1;<{#U^SGh+jVYat6iaiH!$#E7TwM3wSR<`9>ckAEkuA^7 zfwbpl559`bXC+K3a;l|+nzw?2MbSY`X`rH3M9AwDXX&68t)O5(bWlqgsHn{lf@=xX zQ(auk>M$W_X+BsB9keG6RMbLXD3uWDpt#wgS*KtdbWnR5sHi;zwD$>_dC*T9D2v#@)0GSgi_Bv674Lb55nq7}g5}1@mM5?hE{ebkA?gvYaT&4M zlW4<>eOSZy4dX`4=7uH5Cd6m47K;Ni_4#TdIFI zxnf{47(wD-YUm4BT=)}}N~-w?rNT8S;$_ogmvuG6rqk4t&Sp}1$Ab_p0q@2 z?i)eox72V*Q-K-|v>GaWb%`3{yt)ua+CV?C zL&!YX6o`EvPxXHS2r?gZ9t`5l1AD;5D`D?rXE&QyCukci&-f)IfT4Pd<1!5Sio_3; zmOwLAToD&y0W%?%ChW3-2x1r~EHXAenhmi6O;81G(Cjb}2yH(W1dRp(1#ck`P6YpEk#ND}&LbcGP;UaUZqZQy9o2f?n;u_uP*~wIo&!sW#0??x=9Z2@M zsLMwWDc0vmXq7~e6G|e;2_+Ha&|&sVQv%&th>M`41RW6wZBA?n{843Ss-9@7!Li9) z(3Z1!Ed?#;o?x(!Wav25m!!2b5WL78gcsY8H6A{esJYe{pf_zG3k@7oFefP) zss{`NTG-5)Tn<=aVpw>1Y%(97{4BBz-f4sKNCZnC5?nMN`pv&gvVk=~eS^rI@5V7n z;7A47AgN3(Cz_J_`&#EgZ=^dg(uLnp--P&NSZJCWJbxx9X|lt-8QcTw$^%NBw1+ed z`?BzORlcDx@%F%&zmju%kCV_|K{%o|H|M&1#Eq;U5#tQHqR%CXH8U! zN;l=^k2v&Ukgdf_BfrUc4|4c{ycrI*rxZgwBakZ1m!LKZDzFp zQSUXGi6)tQDjWMS%iZT#r8T8#UYk9pw%f}U>-sz?^S%8{;l$w{X+=IEM59#eP$7x;S9KKlHwTg6KaO$CkcVWudq zue@9N&r3j09?)gDwPPx7`ge4|bNbrr(@UMym%Yt6yR;%@Ol4^|%Q=pDnZAy@Q^+Hm z)EG0PdqywsjlQa^dWMz0=WWAY?|%wcetxw6Ia`0rKIPKKzCPtKkLGxvPhC=$u_xb> z6*_O`ce@wfE1FsySh3sdnzK>fuoF%{bk&6;}K zWYac-haUb_AN7N7_nua`wxEoEtkj9OZS;lILw3)GD8~0*?;ra#d}o2GQ27Xo_S*o;5dq$N5PyZnv;kYqRtzoTFTZQ@F->H z+{~KZ5oc_MgdZ(0Jo6Xv%{3f&bw}m)d$*rGS=#&82YS^xBQ+-2<*I0W&$;HX{N%eu zcKJpHcZYSqY{z|cwaahy^SYHCU#=SQ5vr9pgS~i4<@u<4 z>pe2QsHM1OJu_HXTW8mIk7eBWJrxc27yWU+s$pp5GusC97u}zv>*}AZ{g9XS^5MKo zS1h)jsJ&PE?aoL2CheHF%4X%P^2yWQOsQ#lx3oOQFTKx2%{x~YsT9=)F0TnV^0S}Q zs!x_4rOuOb4qFUj24u(1%H5&+&Zr_hFL+-U^TFY_SBz1NPw=;jOy9aQx&DpA)&`s0 zD@)23>x@#aboa2+o3ki*1MA#F#;Dwg!J!U3*C=b>MN@y$95SgSV?$_AYW%E*M4i`> zLxZ_Ts$Vs}a6DG7e#Nhgd$fx;&-$a+gZ)KGeANM$tqMclRxc>3-Ok)m!5X(Eu_C2$ zYgGQr==iPiaKcLZeUL`lQ(y!u9)ql<)uX;)fE~ zcdF|j#~FQEu($5x3*{+`2aR~VTO+yb&^Jq;O;7qcy~5kJxPD>N0?VXxg9f@kt+veH zba*^>e-W!LVz>$EJZv5Nh`GM2*Ff`v$7D?6Chh3${=PQF2HzVjdlZy+)A5@}=fBK< zF>Hw9>uGx?jGR)Eo}fOwzov<0q}9YXS07eavky)E@~QE%>+_E*^Yhnk+upzN*`M#9 zRQdVjpHnaO)*C&$G~_|*`-YR-`=xsS{iZtm(5rccPg42y#~ys!RH>PMxp3FJ4=?jm zy;2)L zjfWqVc6m%v$+>VQJFVnSTFCPT^}?l2o7KWMf1cRQphV$H%reVcWKwogtd8-oPxhvK zh%9^(Z<@@_@f>hu-QD|(lz$n)by%ux`A=FEDq)I;b=m^{K1c1^HN6LY--rGZBE|oF$va} z7R0?iIQ+9mZE9f2nHb{-mu6n)dK_+OY?^Yuj(_&sgfkl-oSS|q-Oz1Pe3z=a(|6DK zg`Rw)JE(quoz4B{(;I%>>Am`IC!5zdBW!;)c@|~b{TELIE2o)Niam|2!yfCJT^zVP z>hNX9GnHlQqYLwEH#=*3rn+-K+P_aW?s0fl37}t$*82dNqIMSSo`k$ZE5Aiih|z%tiC)X zNn_5-y8|L7_l$X!d%SjH^|CC3ofFEpmSp>MAL?4r!=d5q@_*ud4SH%@<|`#kg4>`>MS-l2^gay7kFZAYTvHRr{>x17p6 z_#)w;Gw103^FhJx>(`j~QyX(@OtSOnMi5EK?czBb4jM4`Wmv?eFB@Ak?c)x^ zI=8omJ(nGv+`u(A=sU_S#Jei6OO5;MUQxWmT!pwJ#pAPYkGJ<#-}2+!Pr;{eTb?|1 z^K?XF}F>v>ZExa981_{3$zWIuK^Hq;>rELRWdyN`2w&`x=>xs1= zjKj8SDm&i%d?Ehq^stztwITb4?H_H@c)Qb_d0LlttG!+PJb300BR>6=$-bkladVr+ z4;nuWd9(7>tu*uKZJ8VX-tqM0*&UBnGJ;Cqb}8N$(#!43oT3|lHBCSDeB;QvfM=#| z`Z_~7K25JGmo$wW&D_;2|Z;ep;>uz<){*Q(k6Z^7MryR7X z@4Y4CbjaT0XBV8Q_~WkDu^H+Iyna|~r>{Nba$JK6Jjo;<)EX6;+fTq9BY2Ek)1GyL z*$MUZ6M0-W&sIjMUUbq+Z&)&ym5@+%T6>*U-SQg;Zz`leb=UClu^98}wML;G%VAmA zxy`X>Hf}A`jc~c|9J9}m)vx}CSz9j5`03S}0YUl!D{4F=4^LaNCZYH1F45~o+KjTY z2(oc--jt|mJt8~z#`nRW(+d0SUOVoo+3p_){(R=fu!h+wJKpCo7G1VD8?L>uniy;d z+`>QB)W>i|w4;gskq!LNSp; z%49Zhq&NV5KG#~?3G^v4yQ5d6&yQLw8oji43d zA}rd7N+ZW40!J#roCyPGB{CUWe-<}8lN<6o5{i;W zMK1zOO9+bzv12TTI~O3bA^2e!SxS{or_NSTI*K}e^RX7WwrN56-KVF>^-g$mfQSr` zj{$)gMoq5NQRO52)jFRGYmm7#9YD2~!wZ+EsN)-u0t&(^7ao*~0T(uOFmV$YZm_e% zEL><&G|bRxU#4{@qvqKsc;S1^MF0DA>b{?H@jmj?vz~jzw&(kJUHlErF;52_bQL0jq^1B*rD-j)f!&6-##<9UAD+K!*l8 zG|-`e4h?i@phE*48tBkKhXy({(4m3>d=C2FS%N`Q96X5m?|PXvC3Qkmvc+GLXjs~xHt!v8M<$sxc!5kA-o zr{steE#9VU_+J28_@fMMssx38ggse!D3P($$1wPvCbk{FW{aX^LQ3n0t%A~$XB*&; qV+hAjTlt$H3=SSRl*nM;Hp^IlmJ`+9smGf1B^#FTcIuh09M!l z00jUKXasX{_Oy2PG}G~Owe~O(@O5%z&A|q8X90ki|NnRV4{IP%r(3;6h_s%4ha~P9 zw|Dmiw&9l-xyfk?YAE!nspm(Aj zP2qOl0+M43*SL@1_R2QGtv&{E8k!6{3(ZB95jiHeA2fifLi`w;)GS7Eyqg*fh`j{q znk%zh*5BP7CS{#iiS^+c^cBB5x#U*}m8>i)F!5FOP(BHTL>Xa?ag{OUv|v#SBhR8x zHaRSNX0zZ6?pFx=*ohU8XJkyZ<+Ayik*uxc(LsI!dAXO=bV8aEBvCeq%O6E1a!wVZ zLNNUN`V|WR|B1P<(1o@`j5*%G5R(wYTr+oTM-M@PZ|DCQ`#((6e=R*Kz8}L_qV_H2 zdf4Les(tqX2>i%BJXUt43h9~p@ybHdLf`3xQ$Hm>j z(#6F=!19*!-?X`a33M2?{cj&7T20@~rs12T-xA$`;$`R6-B7NR`+|&prB*3cXjw7v zhKo1UYY?d_n4I~?o5S7t{pH-P)(h;FO?=$tjM)AZj%y4!&qjuNvG6Q6N;gB5aM;Nm zyVi>LMZD97$ZNb=q0-i#=8hD`ArA;Lm?BBD2F-cjJVH_(>QTGGbTYN2cvMHV3lpxad#1V#sc zf~U+Sf&jF+BkhN2C+O&wAzh(F5*J>UWg}5@_pq>F*saE~LNJAk>@XSj7Dh!XQj$eZ zYE}WuZG`fL+DPIK5N=uuK{yzChOKzJCQ9o#EVYqW8(AKr8(hDbPH9duSlA|%9v7D# z5%XzlThBTwSHRj_1975Z;n=`49!K{|O11Yf)K zO1DP~Od8TyU+O;UORlIj;gTTsAT>KMk*d?t+@u?tz*L$vt+)thChaS6uzQWW zv3N~JTi|blbhakv!zPR(88Nf$Ka9x3&f413<1at-{RiY9FXY(YWRGDV?(A{ambs@{ z#wVfnNbNC;SO+vSv?8fhH2HzG%I?9w)5h3N%4eDEvzHM!r?~IOIA%E*!du0z_E(^` zx<}U}xp_K3qa6ryF4T~#FS`GwWg*xgko+bUHI^z58s_kV5GRq7bG|m!5~Oe6Q=xA6 zOg!OQ)kb<>3@(wzQ~F0JsanvmwZ{G~i==OQZfl{a7%o$O=ZCL44W*AWAzSQ{6Gahz zPc3Ixe8w(cMl$=Kej0MU_&bxjAL^|C3Ksx~rvFx8e5aA8owbv-;P*4+TY<4|(0~7f zBIRqEBN@CYrmyiQR(G$Al|OQ-NDm*o-s$$9*(ibOR+LRW0|gK)K~0c%3!V4Qh$PR6 z6=gd3#^yt!Si}iokVyOzUBKiNzr+);gZ`6&!|A}i>7hf#!6m*!_EpA(a0=?`$=$s8 zwOQZRc8<)d$qy~T*>hKovwWkt@4I!Im~WbnpKgbB+bM;MH-pCB241Y3gh!66zvxgB ziuw!|Yk?T2Kj)AGswdoX%gCbGW*D%{(QdHhC6s!jGxjwP$wDh_3Q<;3@dpffgTYXE?A)1E0BV5V??r%R*19Pp*z_ILQ%)@ z#mM^6s$fMoQM(e=Wm{qC9Fn?m8YT6j? zLUjEahy{$`G~9eUJ2Jxh%{EDR~h$YW<^EbG9`WoxPL7!I#q_4S1Z3kFQg7vu zs4PqOR8>_1rz!6%{?}*Jd zqUZ?P6lYR&D&Cy8qQJj`okP6~yYWECRKWvvDZQKRUIFuV5QiK?XVWD*^ebkXSTxVm z9`pmqidZ@WeuU*B8+#<>Ghxi|`pZ*3cw&P=Riu-+CL=th1oEb-T*=0!Z9N4uO`++G0kT3!1c#hxD?otnOx`q~ieQkOFG z+WuKMy{NG>)zDP!M4ggR^(KWJ8p_W-y<&gZmo5F^ScrRL*ov=@Wyyw(u}hMUQ^<7) zn}>nRVxHnsvuvcz^ESb{!Ole6vi!+Z2uL#Ivg7_AJD$u;&)tq%4F$UMhjgUG$%}X5 zAIG!|-Buo`JX+=9_jxv^J~Ff$k9-+KK|X=tyjv6@A0)vY_9U~H&5oMf-r6NXVW>HW zZAwtPvI$tQWPmzNz^W@tUJEb>DsICgxS6}9@(N9D^uT*gHvUFJ4+xOGt>r-7RA-ek zqZBT^l;jBsVW-%4UA-LHmva4SNd{3No74R{;gLYU=F-wQ!qNvnx1U3DpiSeF;S z(bBn`7|ni_Q1iW}Cq!dTrOV^$Dt`>hJGqn&SQNqG5PnB{+JRtCvUgIQ<&PWVj@ z8Hr_ctI~S1IO$xvo)51(4zDwH<7CQiNU6zbpH2*J7*6goTRZ~c3J!m1a&EASB3l)=MgX*5YP0b2lygI8I_BS zl5?v&N>8Lmt{qvV=IHJgNLa`fi_dAUPA*937uR~F`?q506B z-^$ZmGtr45nMzf_#(i*}JK05D@j*U^6rG2$mw1~o6eNyH+{Zl&Yk4kykI-E&M*bMoBaABxshT%bUtz$qlI5} zbRdp@pn%cM4a}hYQ%4?l7Vg#vZBO@G&bHsv%(VWFrmn`uMqq1my*Y0~PtK z2`GH-)wi=9tUdhzt;7yEAY20KXCaF#mIWn)bK~QQpt@*;gfW=Exowu&lND1;2Ihu& zC^Ae5h;XrjySuo02wJ(gTmRMK{IBxEs4g(xOru$dvTa7~1f=?Ssy3AUT~r+RoWgaY zKAD};DjK`7iM&#x*9}v+cmtouGCe}1SDvB~delwSQAvgm$nIKFBIga2E9rAn0;xfo z=1kh5REABo8%;Uay@Qf=@3}k8c77y?N-)RL&`RF5k9yyClhUet6fVr*cV~BAD?MH! zM3m1M^4fy0u|b@9mYk8I2Qmq0S(TT+_+*v~v2T5knpONZPzBy+NltdM;AO6IyaFxG zkWfV*ByD*{2J}(aQ)p~HO5(CI8P%L}vHE<1{idJg@rR;po!&C>EPkpCcdVXU32NiI z_sp|pPIE%_gy8xq^CGZ{o=}4;hXH0Pq&~;%vH{PK>E>)#c+N@#f{pS6u;pj52Fv<1 z{nlnQ_UO9&A=jJ5@jF?$r2W(1`ECzLg+OMoHwC(2kzeoT#>PHRSwujOVQEo)kxJ0_ z%dby3#;^R=D7P5jw~JtAzEz*hnDJMA`B!OxskbcL{jNJBtbMbmv88;>a%rYe8;W-`n#|t74d9gbbbVYrp}qOVAN{12Cr4i7 zUg^!52Vx>y=r4O?^T9I{C1H{6b)mhs65Wlr%;>85#;Q*I{N4(4Eb&@5goQEz#v4xD z*xnCZj6?t`DCeG=6Km?bM%SWr=lDAXcq(H`bB5cQ4!xX2OX|<;n#kUC?HPdrfO@iC zbZ5Bb5VwJWueWtg4+B3NGRpyz=h?6wUgmhJezElY^wFV<`H;S3R$G&!miS|g-A7iB zru@<|J8O&7Q0r3u(Aj*HFK|TAsq}_C&s9Uutj@PYy|#uQsJf>qJ1~iTbq5djl`uA{mG*1q*w>pSEN^uH&x%HQ2W28xwC7P4 zi%f1}(lfrxi7v0)ucB2C2CHRj!i$?wXYJ27>c5TC!UPvlV{^j|;#*FoELtFRT9E zvA?gsd36O>{Tblrp84N`->$V7TKubneqQkBCgQ&Z=P|YI|GTAl9_M^h=?4-${y#3@ z@22E=;q%>@AHpDl@4`QNHRnao_aT0Wni2kU-~V4%;ymE_vi=941<`lF^F{u7fb;3n z4*(TRo8V_>_#=5bk8(bt_<`b#$uBS{zoZxE5q{1Me#ip=OBjS-GKKTv=e_t30DqeE t%Q)}T&tv`UqJJO(0B8)>KacX4!-lKkU`!7HAi{i@FxFd)X|MnQ{|D+kn<)SQ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xlsm b/pandas/tests/io/data/excel/test4.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..a5a2ff8caadd65345e9ee5f8a40aa1b53a25609a GIT binary patch literal 8360 zcmeHMg2?6=4hiX@q;V)gK!$Ea`Nn(S z`+BeUet*Gx&-~`>nK@@Yzd3uawV!9Nt)YU9LIOYoU;qFBdH{V@v#&D}0N{rL01yK( z?ik8BJ9$_+d6;PXx>&hC;r4NKpesPR1Iz>5LEQiE@ju)H6{#cOUS0x)3#A*m4X#*N zEeJzs@6Iq0`y;7$4XkR~T^p_{*B7c&QGm3&(@w{=TRFR~Rp^ENuN+Vb?GLbKx9+Eh zXc^Br8CPHIxCf2%t4`V4wugmQA}!eX1PZ;ZLN-;L!Nb-qCZP!j>4c;W1_B;17ANYm z_&s+gEHW4ri(WE500R$*no~=4!*J2nHHM3o7n<|OsaOedYT>Q|HCCjmk*0Rc#A2iz zrNJ74{dLyLRVH1wWnz*fUs{K130@j(4qtop|)Dr#Hg3`(#V{obWC zk8#!jrbT@M*jWH}_9!M@Ndr>*p`A!U>0nxX-il@sj?u?=kM}I3Y}!~R(TghNFN4Xv z%xoYhAXx$}+^?e4*9+(J!wUbn=GD!#B&8-)z0tOxE&#owSv~0v|tIc!V z#2I`|EFIpR$Y*sCe+%FOzaG?N5G1wdOZAD|Kw!N2T6_OS6@xf8cYB@c4ukcfW11Ts@Z z6_zl_pSDaz3fhhFdh*-3Puv?uaAu@_HPvvn;UbB>%IJlz-?m{~!L^xr9iO8WY64MCSWGynh}aR-7je!yg>OE8yx={gW4@XI&wG(_nfogWae zPJk}>W?PRTKQ*UFElBJ2^NJnp4RvI_m|k!AYS`FVeF2PkR`~O%4hcDT)fpFwo0eKaey-{ zKVRWT4uMqbjP?N0`%iXIQ&(R}fn-2SL*MXjoJrs{@hvyYm{`V9pKt>!}GCu2a2BjrH0k}73Bxa;7agLJcADFGMe9bN-UlROdi zyRWBC!IRtPi&7RS%Zf{e1*R3UNXSC!N?{XD7J?SooAXur+ui|pAeo$c#U#<5cW9`% zs7LJG^VL3t70HLw3!Pbe z75EYUd0Ly!9dh`TW>I+A5WjzBtlIih+QVK*xtdkiR}<_{OOa*XS|VNzG&s+Ylw#clDrtH8W_zp8xIu(!id zjhSpDFUFpy%6ky8CneFr!zS!1lM??U7Sx`!H@EYNFh5i%~2o5adJ1+E7s95 zRfK#3*VGq12`HpSygRifKMt5yw$jx3w6>&x(=eIN84;sVu$K$9(NIJ=`rE%Q7 zZ|t8s@EoD}J6JElS?G6*Hp&{T5^G;2Q4F>5(Q71I7D* zHP|)53Y}-H+;n>7M8~Dy%U^hPdb-mgvXUZ5#shm^?jEiM)|>Kzkg->Vr4Ul3ST1@w zw-_}hN{cEMf1MYu*TF(JKXShlAqJK8vGB^CO#kEfafyt)ux z%tz#s1s?fCaxV<_GTlDyp+*9svJtBc-^&i+%ol`SkyAx&9fHfHE?F4dREmPIjFOs3 zIkSN%otRZ@nyG0q+4{X6Q4250KJSQKjXqRKn>Nz3t@I|I^hw?2m{{Dvh9Km#jEfP8 zczk=iQ4har%IT8;p6v=a2Y__9IjE5KG%W2tDyWcfRPa;5(rH_z)A)D|C9ti2eqYG% z>`llVd~QzEJK##-5G!@zV)igh2E$ko@p&yUPMxx0;>XWxVL}0gQ#-!6$ZtV}%tLFG zt?TG#E@DfhmOa}Eq|3)0GKJ|>JQ^=>A@F25RXqmbO@sLTY7YPavq%KWz+&jLc9oGB?0&tp0P@Qy`D8Y?S^a^@wh&|K?AF;w8ic+oQx(h=E%(h zSJxSy9 z^!@1zFqXc(4QjuT%Yr`Xdzk>Q>E)F@8XeALN=o?lj8-<5VN0A?_>x9og}@iz6*ScE zvcfQraj_ulLhPYas?8J~z^~3#(;lCBCB?msVphyg_cij>0F#*3qIyj0EqJ=_jfcUs z1Y1p~fkbIh0P(&waP_{RdX{?v^+4VqSEqMi=7d{VE3iL4nQNONiIJV>He^!PqDB!|jW6 z;29@hWzhLIR;K!HRU%L9nEqPAfCv$Wg^`{DHYkr=L;U(MDaetOR8Ly0`ios# zXGS*4yArHNlIgC)c&O=6cPm2ukw{b^M>9m|>exScGHV@gOtG-U<~AclkMmB%FoHvpnLP6+eY^M1 zMc6)`ay%}g2-`qdvgALPBm(Y_e)~SI)yCItRwAK+DlJHazs|Q<~ zcp=Y9j+1Vx-auP&Q)^g!&bz6Bn{REWdC*y5KmuKU9fjlTUE=-<8;~p2HjQ|ImTh5k zZ)JJr9`2#rrBVYYUN5tjk(UP((`TZ{Dt0#0!uMdAEyd`Nbctzas)ePPl0Ih9_fW67 z6^u$hd6L?+eL)c0BL?3p2}hsF^_Ij*E;3^O`SxxOw)nCA>;OSl_?L4xaW==Yh z`l=%<+MD>QLd%XKPIdI)P8HF4>Dj)cb&h&E>Qqsdhwa}YW(`d);}d0>7W3Z`?~Tu? z3d^0GUijG^3XX2q(W5pU$}~w^?oh%*^?~{8ZL@)sQHgZB{c9g2Ji4=zQcH35LbTrP z%fO4VVGP6v^2)g^f=RRjlq+114l$mfLKLXE3&{D*t+REtMDR@+%{1E961K`G(L=jI zse3Jn`l+ke6#zI~W$u!OSzpjHH;Qe^pVL_BmFp3yXhpW`r0>UdtdHgsXZRC)q;wVE za=A}0&F8AU`rmIQ#%Q&ZM>PmI9g#Vyu8OYY%pJyL8F;1FYvAg)miEP)VIvz!z`lmV z3VUQFR7dOW51sFb9`U|I(l%(b{gfhjn|o~Xk9B-AVZ%Tb;>r=?P4YX1+-=O=T$PfH4#n${JKIxl<=q30A5 zXYYFGlOh)C??h;qvP?jGt~_2=$kh1Q1!Q1wwZAai?Pu)ew6lrfs%P*m6q@5w9RHA- z6uJ58;VHjqi|rU{U@uoeA9x&MS6qmX@&A4!TL#%G4!U(;_KWv6u8P5z zy9sTqT5ZOzk_?{M{(-7&oH654`7v z7ma5MfmDsbKy{?=tYb#fZejZ1p7|U$3c~Sc6(fOHGPTZ#ZvDHD}k`-xAo&d7)S9!GC7kWq+#;@HKDn9neA?u?EnH$tuS){7=i zZl0r+E^|CZVl*t`)E8x!@1@q&Ot!5?HYjRT$zB}56D>ZX3w&Z2M&-OL?!oDNasysZ zn$*#Qg~#Q;q<(z=_O0zE^xXrVO*SUeN*Y<4N+j^x>G4^4_0G?5ig-_W`4CManj!NR zf10Sb&2qtJ%2?X#QakIs_nwsb_BNPl0X$`_;bwt`)k*bVd}j*HXa#D!rtUZ^oD*Z9 zSIeFfaEXGSg*iOzZ^rF=bbEM|Mk&w2oMOtJux2EYxM$r#jmp-17Y~hC$9VA z;Os8LZJ3W3D-8w9GN!f2p_;%}z^9ZCMbhnao!F~jKdt>VN!Bk}o^R{%i&p@jd<*9k z8tm!J)1=mFk5pvt7hCmv@#>4L;^}1z1&c(g_+*Yf;W@Z_RVo>{S5dO@)ok~tBZ_o~ z3y~-Oc~AI{k^7_lhaF-KmA?k~>t@v-z#nZHLLPtGy80FPR|EggKqz8S|Nq%)fcOzk+`?!G41AaDD^-w8Vae{%Sw`gpT9=qZ#pQfM09Bp8--x|NF=PRtf$Z z<=6E8Gs-O4?@#b+G4N}Izb4q9cmSXgQIh^G-Tn&y>umUExDq0t{}=wBxluy}6*0O1 Q02bo)43W`^XnuVAKkIH+w*UYD literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xlsx b/pandas/tests/io/data/excel/test4.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6fb62272caf091e4294d9b397023b81f8ae7d706 GIT binary patch literal 8344 zcmeHMgObGUxs>^PKbC`O#>fTP~uKEwW?Cq%XknS<%0`9@D|9ATzegYLKBPzX|xN_$TH!`2tA1~EO z0{M1>2k=;)h!#QG0SvHga#M8S5-8ZL4w1jA8F{(TaJ`AQ=PJyg(Z`Zav=z7;aFmhAWhGZkVQ zd$!fWfoxf@kN`_?K=I&ElVBHk*Oc7Pp=c8I6O!D;9l;ssYs+z$H11FrHJyLN?&{!I z5ntHErf$gIVcOR~?r$gnz}+1p0P;6kHfnIvp1`@L1XmpfT$Y9|=60?e55DjJmE-@g z2LE#F6-mnAUQW#LeTAEd!Sfkt0X2dU52c`IV@2t;V#cvgm|Upc#@vM{)$$vao9^?^KQPG`P2nStFEI(HH4cbwkUXp64SqWL!m}I*M(|8-2*>xSuW_9A<`mw9L z98{&@#k=4he>gy&dm5nvqOUcp$bsk>?!7-c;Bn4}y^e@~+Ra)LjfdoeZ?`Xk>^OQn ziU>5>t)vKkh>VX1+ukYP#lX&V#cvi*8f42R5d3_>L{ActUqmB&(mWN;Ycs~_&TZp5 z@!K$*Gb0V3o?V5i!776r-oB;EWVYD6YnE9&L&*vDrkp$`L3<>MkoG$h7Sr-R>1)MT}B};*ZzT*NXfqzcJ9qy%=J0_jvbc4+^ zD}}|-g+~9Ev8ItTbYphKNE)~Mt=ORf5M}+CF3R(Tf%9x%JC*S)8qdAvDd{v24I_n% z*N_>@uzP4#ApIUtXkmi8u$j#}^o?HraecL+ihwGVy|B#{kUI?;jxCjrl-bJVmok<-rdxFrX`l-iG7xIaK?PaxM=@;&Z2C{XL=fREuiF(JMD$20OVGG#1_pb;|IsPCGWl+(kb1y88+ zBlGBMjnm}&Y4hYlLNgX==)E6+SjH>@H(^pU8hSD0ZG9MRTs@Ykr|bkux#~pCYh?xD zU^8n;lItr!O=R{ny5wA;t)#osuyrc;65#?wc5`a|ZDhU-wQ^DE3musNFWMu$iC_n{ z_pr*zraGc86C?^?Q)=hOSntLgWaH(oDQ)uJbB6>y*Y9gRw6&meBJ_yXCf@Aw62HZ! zZ^}D|;7)k7bQ-uC`JG&MlYtou=FXS2xtpCf){H*)7i={LOmu!3*!bv_7E3{mK^Uqu zwP1Elqx&9+b>-L;Fi~`j%TXbB<@PP)?OoHGz!|rWhgqfm{tiehUl@hvOFt|{bEjtU zr`iipASKKfxi3Xen}N2U%P)7IJ&$omnM|Sr2_*|QHGu!W{R610{c;r{@8`|m7 z`&kG$A;&2VKq$Yw>l;Yi?m}3=IAcxbP}0ofAT7q#+R2{UHm;RMYu|*F$O#!(yh*jh zUUNt&-QKfUF@d;{pdD~>hXmb+q4!|6>oY=ZGT*`NkU96zMd-^+QZom_2H&0T zrUjBSyEcfmmr!FV%=6CH*yRJ>>1QG0l4sm*p{sj$pJ5!oltWJBj(r+j4oGl0VErKn zS2u4vbJy=PXMwt|?II^Z03}QU*xp(P6;uq~t>fh*XyLL#_0_Nw0?oWrc)Vo?>)anq z?<+JV+l_G_JDl|Fa8>hx-(B{LTD@}2heXu!N~taDHtB2CIC3>zgTiGjEDa_k;@mOS zN9X&hJ8{9#eG*q7Rm$kWaTCNs_$#EqE&F^^8#%3dfGQ!v6{1nNx^-_f{aNn zVZNcMYfbd!Kx$S!X2Bzt)>xSiEK~biWFi&(Q_B%d35slGqb#6Co&9>65Dte zwiomPOq1@Tif2hXNo;8B;|hRl;Q}UV<0Z!I;zrG}JXOk+kR!wv#&#d|t5%j94o6t~ z(eyV-u=+Fl9y+ihdxI2qG*+|6eL4Cav8vKl-QIRxWqP84WpZDcxtwmWOG%ml-ngkC zo8G$gORaa=0OBJ8^t~5PkeRF!DC4j~pK_9kDQ%tO?7w+Y;UPJF0A0>e5jcR_Y~`AJ>o~W3h(^zOjN^hw^(qO?d4KCv_ZG9YGEsO?x@D22 zm-Dg%m=BD?G>M;Mr3j3kHSHw1-+1ckC;4(BK>mt=;xfSUqTY}bA(=-qcQ*)QV1^Qi zXfczmhB?9Lf;9QK6$54_{MumVBcdLOT|2yv3-W|i_O8?$54-Td5 z9+S$7XQTWwb%>YO_3XX)g~cnQOcU8IhW*C}rg&je&Lr~Pb}B^OCIJ~pa(M5uF75o2 z35G&_kA|Pp&^_Vpz_U-OAWv6v;ybf}O-!RJ-#g`alu(QR>BeuX^Oq$+Ot6DiGMv0| zq~DjXzt87xR_6BR9KY|mzS9_{Jsv|QK+r*SC4uScd1Gr~$8m__YiGF#W1!wC_*Bwp z9$%N4NHWyQMGZ+ZeGSP>GeB=Yg)E@peWAa#eWPrxHrP4AI66;Tx#^JUK*g@z%bkCD za`LrZU?rKChy(h()HPI1MQh5VoQS0=BpF1OY_{Oxh>coUwBr<%#FfX^ZSNV>>}!2} zd;TDg9kfO)G9j_1Te2%J{r=6cxp=TeA4d&|%Ie3^LM}X~ER~TBg5N@*FVbvNA4B3m zkMtO&xL&mLXC#0;!lw#a+j*CZow9%r$mDsU=|#1ZGR6ZD8d0kl6jRee(ha*kf~Fp# zeV$=EkUnHlt5(9(t+clsX_K1EQPEibjRAE|ONu{gFi@7;WANT-keIKEWCSOJ7P zO#%6oCn2d~$ddV>_q-c4&&psq)e?1^Lu%wPrpl^1N4-S=d%YP zQa}S<_~$kM7%*w$#P`!{A$)%MQ(wKY5UV9Y^h0Z;E$gVKPC|=>W<6ix2$zrArSj9r zI3NjF<*+0fB`q5MO`X`i8aDs{tw1564-`X^oeMH^j${&2h^kjC0?{pF(#OiU7HX2H zZyy+b6Ll{X4SvHp<)diO6rNF1B8(2QYWFT!YMhKeJ{|L*7`{?=yw03%Lzw&=yz zPiwu&s1g=$V&g=ZGPZv+^t=9;V;Bg#IsBAG6yH&^(HwVo8UYTtymS@8Wzh9nyG)M{ z^u4;ce46v^8Xuq7j|_sZVgW^p)EfwQK>)FvmAp!qj1_F@JB z2KrPsz3#PN+jNCAFhQ&4`UdKD$Tb1r-T)_au(bQ}?HRgfjFU>`;&t#@0tUIp7z^ zM9$F5xERr?PFhFw;iiW@rwUy-7+q#B9x1V(74tKrSqqb#mJWz4UF#pQdH}KG5t8Ma z`K=>H){M5fQ{ka-)i|=OPc*aZmFfHD%JD{m&*jLs=#epEFkd%N)}VTVOxCD=--|d{ zZ7-+v;rrnXy2QBKXOz;>G+V;>{1+5FE4bb`&XPm@PAfF?!1IsNPWWyb#p-mCe%xU8 z+P2t?OYsL^kcX3QS=8iM| zd|q&|RMY+ekeI({@ywUpe!)*(5Dl-n=k0j4-8R;6jmGIm#qWbGV zc?X_!)9=T^rNAvF zm;400q@w~LTV|T#h9HI()rmeKhgK|K43|kXu)x_ z!!*LSnHS-eBBU3kQiQ?<5_D&ZTC95cjfKoAk5|^Yd{z>7S;vJvA@%KZawI4wcec`j z!WZK?or2&hva%gBcpK(FMOs+n3|SUS8p)k$z$1?ZpU&`*Zj|t2s0eC)B=t)U7U8 zmL_z1KVJn9+f`liCnf$>!e-VZJ{mnCc%23HCG6Y?Cqt4gugd!jDYeNuvwIJiHBqwP z?{r+@0xt0Cx}O8;j4wuyvlyoa@@=iqw{TWKf#Ov?JNa#i*h_dck+29&;hN#-Xs z-c=&@UX$z1@ssRC_`dUP-K@;9Z(3Xs*5{c{b~7mF5oJ`f)9}En_KYZHu~qqI?FDRL z)W8l!!Fh?2yYP3%=al$mj!({gYz}zef2pTN zetRJGR>JHnDJ)o<>GgW+tp8+0Jk?JB+6NK0?ySU=Vl1s7wXQuWSRn?K27h1nQ4Rxd zA|(&$3j31-pgwZB9QlJhVlETQSDI=9I7YO_kk+-htM{X)C6iAsDl4;2!9j&K_OQwV;6HXH8&S)2g~2fdA&F(^q%kb zZLlY>eop-f=n)M~lUpHbJ=BR|9~Sq8HV7=E%8$$AZ_^S{V&5oktz!Es0BBw=BP#l zGH8UreCJi)cBtyUc!D27Sydi7v8 z^cat)1dwd*ObBw{G>c(^nB|z-tufJNTX3*y7C}7bF1KRd=NenS^okqDauze$|_=&|selEXkeuVgP? zKkJlu$meO~CJhmZ@CNG+cG!Hnp<_p6EU_MPrKeUTep1sMr9_GS2?DKd0h_iUi)=5s zrfQOP4WdrL8%22E2wSl5kjh_QH-yY_S=f!u@%Tn%J#kV)3mO{p`UUw@vAb&P&C#xh z9GlE^MwJxOR+R`U)zjm%vfz%-Fp^kzSm_W&Jc=&;7I&(kr`2-aX7X6-%VHbL+;{Gz zuWhZ+QvEne7(H61S!&62EKNPL9mnbRQc9U%^I?HZ4uJVE^5F-T%!!nwsz=1N) zmfuG5hXRSVxekm~6(6;|R8hw5EcfaLoWd2rhIjs)T%#?ONvil--JznCSfP2p2dB2c zDz?@uzCeL+MX!u8eUAP6m&Ky~yA?&Bu8ntoSfU8`*x`BN*FE4rNAAz#FFL{y#lHvm zd$;OO;P+!XTpoYvUHuIFyMzB%U>kf<|Nr~>Kgapm*8LG_AKvEv$>jYR{Id!61N;r+ zckmBO>}TlD_QMZoC-yJRh@S)eT>Je95JLFhKmLzO@aHH$r~e;O`icIygP)6mpCkM| z!T!Jl07dYU^dIT=XZYV|!@t4>;raZ(@c+z>5JhD8=mG%f@YgeVM!!e#{oDTm69r*W literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.ods b/pandas/tests/io/data/excel/test5.ods new file mode 100644 index 0000000000000000000000000000000000000000..5872e2624d0330b308defb8fcf5af335e4cc92f7 GIT binary patch literal 2906 zcmZ{m2UJtp7KTHKNCJq`2LUO;f7hwUs8Xec1QMi#mPiS`gdkl8VWdfD3WAD2 zKt&~B5J4FfsiBA<(h(6s>I=?#%lCZq&Ru8S`tP&WzWY1-w?eS7g8+XINdU=DJw<_e zb1)AxC(av(A%q8FP=SGd-X17|cYwcQh`*;o01EGoR|p8i_002woG6yT+3E_Siyh0ex@45X;e+HEIiO?+UFc&cg zg3&`u!g=J0i&eXTlu}Pw{?l2uej5v#qYWw+6lZ+$HNBFLuSH%-SfVA@WARk+ZeSlq}LS%E|hE#)ty`u_{$VbE+b=>~qka z`bJ<*-g52eeB@g#90u+Qb8iPMV*cdw`p~G);gO`=o$+F7IjsJV>h6@?=$q>Dpah>z z&+}<1ns<^sb8Iwiiv%7^rBcz0zSUIwA@_ud$+C%9JBA}mr9{I?My--3_`XY-jFqR} zn(tSBkx{sNKq{xXD7-nE_|?u5T>^tLl$!OCN?wiV`+NbeB4)&nzv^Wph_CdYo;FN7 zKCN&<=1XkuUb|xNnc>Xi)h;5CN$0-|yAmm4cHZq8Sd~SE2UQL;I@ra|3RlRck+W8? z$N5p}5PBkoF`i@9Q`W)L$AMUqb?Cf4^8~v5Fmq z*&(%Oea+l7A;xc7%OA4)h>%F)_gR;R{q%w9|!K@zOz~XroH~#jtymTMB(mduP|J$mitVrZ?jNgORGRL|TJ!Ljw zY5?pT)kq{7ooPC(+#^>G@#2Qdnm+$Fe|LQFwQFxD3m-PN=|XV~r!p=lk|b`xjtEkO zXVxf@K_4HtG=~qJoBS@36C~V3kE$cTRM^q!$!<$8Rm`THC1P(oJ-j4FGa${sQIleS zZhzu+#7t~r6lmf1Yp)X2abIIK^MfOE%P!$)y+V8q-~vcXYn=EU9%_%tDY=O;M=q?l zJU8!le0-l^acN}eGpa+4=BqEd_-$<40L@8mV+#}(DBZ}x8<$XnJf)-|$x-uOT1%2R zew~fZJ(?)c7+-V!nAHB5Km(zqE}!xE%8i2RXR)k?H@u^(%B{NhPxoa8+D$zwZlwZ+ zuy>!FbZ~b)a?jN;y3~bt#6+%Z-9X7a1dC~|Md%PeSK0{QZ@_b|{#MEHoOk|KQe|>E6a#JWaz;`pUdrC>gF~>V!Fyv-BRGzMY$otlY#WX zF)~v15e}G;?>i7$qJ?q_-*KbBKKfXSULAO1*H~%M?DU2w@J9bgY!`X{YLJ-6Ez)$v z-0IPn(=FsOBvE6?!b3C0DMRZwOG`Y0TyZTyJ~y1$(vLVIQ6y8_Q5~8@jH4GnQioHX z2BaozR4R0%ga_K&ofuJG59(We-qxmi$*XzUf>#l4er}Ztg zpWM!jxZou30n8sav1~#_%?fr0BWsLO%)+9Pi^?QtPX+JOY;AF-O$?cLpD@#~i&DyA z&O1gGmup%nVPk`3rYEQ8aJNQrT}tD_7rtrT?HgaR_QN2|9O7B~@Ex)5gB}U9cIbN^eNJ#Zl0lV&}Hja-7!HLI&uGXoI z41fdg*bk-yw~ADHeNqGU+eE;KnS}Ryy!@Y5m-qG_ZWZbv9fe}A^LeTdLChk+mHq9S zY0b*OKS#-Qw&YzFdg+#DPyf6aFRo?wwv3HvK*TA3nI_?eTZ6s@-F=h2v5=%?)FQ^K zI&|0!F6L~7pG+$^X=7KaIPW-VjWoHXPSth#P|!Vst#-Q@Jnd8VeNO94kGu0jO~B@u zED3A-z)k8#qZChM@~nM07(Az|{(Los*XfLyAQY!jk}G!DAj=&Hab3}(bae8q=5uk# zOJ-|*%CR5~aOnpd$%p1S3w}$cA5)XE5?WSK$tOj1*9)ZZnTlq4iTa^IeES)?eY(2> z!5uy6@7oPCmN>bKiVXTeHo^29V4Lg_Q-raN@mb*#akXQsYwdj=)DPZi9+KEKQ35JM z6}k@Zgi5dUQi2PC*Sf@$VIqqeYl0DZ(#D{4ktFw7X_p-A26y2^&J^3;VWFr;%hFwP zb%HWGyjRuNNyD#Pca7)l7BXX=?reRL#ty;Ft=qr23OS_`wg!{zVd%Tym({nouGWKn zIAKB2xvJ@#H|f}S-nKVym`rHvL~X^dz)c@iqQeaWg_7S-T{&6^Ut~PXtuIhRewe%4 zWBFYktaxM7hq}%2Pq>o1j;szbG393k00fw4^9#0m@}?F>ia3O` zN3zh|2)$z)_zBmsDKX88Gy`OTL!-!O!?c!4R@tEa#0#KDyxzdTnx3yZFe^Gk+gb(5 z@+$P+q^h)`j#LUYQ}oh6njdi<`7Yk8ZtxRK*8nnSymzj#q&2}LVtce!Sn(fsE{*w( zJ_!)NY54Ts6Q}D(QrDJhwF;l^nH~A|3jZ zy7y$wVyO$VQ9P(kaS?Aiv_#`r@*`c!Mzh;%R4i4t-MeEPoM4^Yb_P{Q0`2Rzc5i(t zB`h@NwqF0^wVyK1+(&)-uOr1Ss{Rj4&DHn#|dW6WPTXxe*r(Q@e}|6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xls b/pandas/tests/io/data/excel/test5.xls new file mode 100644 index 0000000000000000000000000000000000000000..f484f54a60c6195fd9899e7ad9436d99c32a17b6 GIT binary patch literal 20480 zcmeHP2V4``*1rh>0;qr@q9}%r0+A-Nq9RgkG!=V6N@yY=0^+VbScMgP!9_t-EU2Ku zuD#c_J+q1h6cOv%*0t+%t@+NGAz?BkF?{c9`~7zEyOWtS=l;*R=bn3RnVVEyFuc9? z=MHxW>*zx?$QOwY(PGd!aE*yN3<$A=3%vg#kw|DFA#nNk@fWhdXVBFeDo+Cv50WM% ztaEKhd`LQwbRp?MY5}Pwq*jphA%P1JLrAS5wSi;=sVyX9NbMk*Kxz-E10+*O9U*ms zWCp1-BmpFINEVRLhcMYNuKz-2{L8936zu))99-}iY2mp5B;ezjkfYR@_|sqXd9ji9 zq6NZgjuqTy(`;lUNg^p^5{ZECB=~Ob(VB2M=(kcG)X9&ct^3lPnq(MBBa`714th){ z14$x@rgTC|Dv5*ZL=~WRARl8Owjb0%1*_bn7Y%os#1|w)QaXr0Qff2x@c{YKva2f} z?e(TXz78K+v6d$J93$2~(^rFNLF2zexa23^F*p-)7t%PAjIm&3Ttb4tdwP`Mf8yvz z=@8CA9eTr=LyCc72=N3dQ0s7RL!B!ro`W2#8Rtk9*o zXhSv!cKq*fJpGDr>jhRG4p%1G!N zCNSd(Ng31&z%cW76)-c>^&-nLn3UsKfFXsl2`D8mrU9Y}9mIpRWpM6f3q-a?1*H5k z9U2L&oNdWg9JgAG7EQJAY&ZucS(x!UkZm}4wFpf~6Ty>)FaPj_sUmo4L@IE|YZ06X z&*)QhG|8`*JGfOH(GQ*>!pPX4CTCi>6W0rPUOeM%253ukA;174ZD{blq@ zCG9jsg>9jsg>9jsg z=|3yMSEi3L{#{CN_9&t6P(nYfgih;&GiQ1GY@_&b22IhI85%CvTdqfVv>k9pP1B>J zk90oLneb?RG$H0u^gn@tI;8$tri5OuguYoJ9cSbIQIUt3DQLPB6Z z2LlOOA6zw{_$fLr2GDd|Nob%8;1U)SSagAW%i-{2cowWXChp};sT9>s}u5i zvi!N&(9l@V80<{9J6y$(;?s7PwhJAa&ayL1jA%MbPa`S)3QQNs&oVpUstAS8;Ge-a zA~VoHvZD{F(q1df@Ti6&ibj`{4y-cyY!D2sYJi{{DhI)Ktp*6Xrg9JrwrYT&yDA64 z$fE`by0CH(>|WIXLAO>8f-AmX1rZ0Rt{q%5Rs%$J?Lhcd2Sj!4;G(q}ny9WFT=iB1 zM0M@pvbh={s%r<=-5WvRM!qkh+m4H?>e|7Ddo@5**AA}ys{x|AcJS4K8X&4`N1z^v z>e?|^4@7nCSf~R+znoFt0P$7aS8YPS-BAu=r5=dt+Obv-M0M@hs0X6Dc5KxHQC&NB zG7uau6%#Fq+&B!|4nB~aJ}+rB>=IVz)de`Tsm=>|KV%ZcAg^A%l7VP90B2=oNu(aOf~-fSnD=M3+I^y|siug-yuhvz6)!th7=%u%iy<2vb_J($dlc`X4Q0UE#tM{knWn=MCt#x+ zFJ^#QgnR~RO`Lkp^fC6>2A0DZ5rC>vW_6dxIELt`8B@=8~1M2Wbti97$-wJ~<3xXVnTu;EJ^v?QA&Q>v+Z^pFE77AbKL z$&+FR2)ng=$jpPbY%-se`j|u_mVrn^(Dv=yo3wVMv0FRR06QC!thFPpv9+_6`UY5; zbq3Lb>9bO$6vf5=z7W(Clxp<6CYEYC^Y)a>z;;zmCVx1qJLm-&c*^H8Xm@r~Wj0@F zchFRm-ZD+l-Jmk$?U~PQ4Qt;pxpQ%{7B~vLBO^tLsm@FJfn+K6qzwm$XC`S0E7?xO znh1y^ByV^>9SJMf_;#8OGaOjH(dLPw>z}-nXyP?xCIUN2U~K}$7_!B< zC^4F%=uf}ZC>GP2EylH}7#kI0=ukz8(Go>Jefm_fm^N%NZcW8>RUrn0nFcgEFqF0^ zTJlP)Sd0-{O!uZ@get^f2y76;7e&|Jd8JrPTecYYrea)Fh@pcTRT>>p^xfyz6^k)O zF>b0otuu&mQ^V850f(+WQ!J((TZ{rvvs595>1pDCKhJGbEXIT_MuDf1pDCou?iu7Gug5qrlT#Rfu7F znmFLp?VXCnbVM=ksy$5rV%*j6G#A&E#~;vQpf%x+8d*&1#1^B#)2vj8VS1X2>zki) z6pJxqi&5Zdb}Ga$J$vJk3po7^bJWxRzBdR8*Q4!~%vZMAmP5v--_W5^be@C9IyUtX2$a z*$4_A#RfHJfik^_kf)i>ib1U!LBW66pw=u6o198-w~=?4?HC|M7ziK8!A6hJ%THLe^qL=ppdu@7tbKH&+GX$j$}aY>0w z`QOU}a*ZW=gx-FJTLTjSx^XL_HZ_t!P#hRb1>mXzB|r=ISm8sQ)o{QJ%sal^osQgH zK35-h>A>3vpIA}kB!Q18Az>*$i=|gPi7x0B3xmN(*sLjlYY}Ld0L5<9N}>BmwZd_` zmP{-9PO24sC)J9+%jfcleYS>VDPJJ3ilzn>-3Rtg(t9^S1A@E`*;>GB$Kut3o$QUg z0$9m*f?XKWJdW8s$YaCG!;|nKPa5p~OQFWTk+3^dnnRea4LO9Y9NLl=O>)S~Y72Xd z;7x{KMsh-UVt8s&%5*`fC?j<#zrQ>bx4XoE_>m0Qc!_nNNYy`Xl${PX4+V8Hz#nj$ zBCUO_><0JJfwI8_y?;^?II5>SE~kTpNBV=&l31>aYYS2=5GKWH1>=2K9~&SFkBUo- z5x`u5rKpAhSatv?guWjKipGF~^c(@WLIAPDZt1cw;N25+qZoQ{J^!0Dj@mjZWVf5jX31|}w_fh(lRR~gp`%7eR-lff6U(iH2UBIhZY#F1?d6$lOR!AYDzeS7gk{A zbcDbh1i)C*6!0kwEcA?qil?x`AuVq)EQ2&SJh)Ugf#&EcuPcXu$RP0a6l!xfLN!>Q z1D6n;W$ncS+e=VVs%R-cLLP&w3+)9lQb8#K<-=JIr3$Q*NJAQ~aWn>6`c4|s=sRgh zqwn&$dgL2ilnqWxrD}W{OK4kw4yJ6-w9c6!A0wn(og<-96Z%f734JHkgucUo*)v-c zY-c5}f~F=^L1}#v;jLD4ijELIEU#D(x^(c{+6!2xZ4$h(9ZE0V~Fl zbjpJoIhfnC-N{neWRJ^pAPas!P|w#Qo%a~Q-4*ibwI+GR&mm;X?*t{1D13vxm@J;s z6LwRUg5PJ8qeqXzS2igul#oK#XuuE?G2leQM*?&b4fkRw2!Td|AH0N9HuXU=@Zeez z@L)bM=z3L!7xLZBh8(Gub+76To)F%?3J2KeYgQczGpfPuIc0s&&EE2Kgy z0nHWi0%4p%D;z$dEJ_7JCQ{q)bD=gj;KCKOh+(lJQL1wh--Hw~Y>m(G;6YjsKUY}K zdNv=PaNu){!-oVvU$Eek8sMv8@ZoycC+HZ#cc}zep`xj&a2uYSoFIb6m=Ycl5tjsKFE-Z9;zRGm zo5yuN<^9yz=3&{TEeES~&we;HWaQ%uzYb2X93D}kr4>Btv8BzSipt7={^~q&=(>%e zk5_-w%OpQxk>!RP+E!&d`z$<~vS5|Y#64AO_c_k>jO^{)I;Pk3NBd_SH%`~s@^DtK zH3NJrs_&I0%`JD!aq`e}D2XmF+Oa>+XTRXau-Eg1W6z`vIsc$+nt8p0&hza>k2-{Q zHhACC|BjP`&)o~J+Ya!b_g?hC_2S{lt4^M(tG9b{Cg$GBKZ3~?$(%3e-Cp0S2?J9| zV3aLytkRlI50AnVcd$2ZiwR2+B~0xm#Bz1m}9#iGq;B|A-^MM(PU9K#yJqilQE(cbdMI|K;07p zgT8k%*G^kp+avOTonOTEa}@{vMSO}oc%Lr0w5IOr{d==|oWEsMy>f8N(GJDhE$51U z73S@EF~eb#>E`PLI-Yb$xO2MggU1;rXLl}Wb8Fz4sY5a?s~6{P+f^^9*=1jC{@kH{ z!al3`-t+p8e02F;m%DyX>}_WWuq6VU3`1?Y2+pYXAEPfUGVN}3 z*z%><{0FNaToqf5?mpH!e}i8A@`ip{+WdPtKW&a% z-tET(z5bT&HWzos-7|WfIj_cdJFufE}8v1Ikxy6#nes;V)J#5;z9IxrS zcjvp2=MNZ}S+36e!EnLH-laEn9*VE={)zzk7C3@T7UN0<2bi_sd8J=gInJnj=wpP} z8PoKhd+eI~JT^mT(94^~heyPx6&L-myd-;R?CxCQz6sS~3*2W1nDll!Zd!49%W#*6 zlZ?ba%t)Ujv~jq5;K7)@XFu`fI{4;mYeY3j6P#n>Dh|ByrX#+NL+SQlzp1izLqb$+GzPv4W0FPCGEeEv|{p8q8F*GKoS21_!&yt&_F zjpY4_HW~-pu3xL$d9i_QP1=lz`Uvy+fm;4C3unJvIl2Yc*G%XMn`m=~u*9Qg&Ka)vrNkfgMxSce7P>^vt+Qig<+HwQOivCeXJ8}Z|rY#>e z*>}Eq`^9VBm(=)cTjd;d2;`Ypk4aegY}91`!1d2=M|RTD;||ey&)GP}q;j2R|FviD zlibjjf12O@eojxyT4?|<>UvG4P0gFzo> z415`GG3~aFQOOkj4!Ns(eS7zlhr9mmO{30MegDJCQvKfa!8;7Di zf3&^XmJ|Nb#%kI5)Yii_=3Fly!W+1{(mYnHbxKrHuHT8H>x4HdhweJb-F$!a_nnq% zek|YkwC6;we{w=UqeXtJFNIcax6gm??s~|6#q}0;$2KiU(Aw%+Z!m6JtKj*jdvm94^0%|s-0Aor$Rd4rUeKL3*YZw3o_Qp1yZBztw2;;tILp`N zmwONJSm-$D_(sPc)@=ITf2fbuo)k^qomJLX$>`}<&v;*YYjy7|Z_(bO^in_Ho|(Md z;9bZ698nb3(ZQjXd#PUklx@vo;oY(PjVs35&m#*5a@S=1GtTWa@9>}p{dTX;GR^Ih zKmP5Gl474Y&vmbfsI6by*G9t9UYjh}yfU#1u3ceXGHAf|ejdehxm&b`Z`nA#dcZe- zY~3GYI_FH6b^9s`i^}uL<3>1dn?1b!wY>W$=N>#B-!{T-d3373YOzpSTU9}##(9?XfmeyYxBUiODxFS;J` z-*xd{x(njBj7;wN^xL&}zI)p5^y^8hCY+x3WWi0Jx}F6SHw}N=sd8+$4AHG_9uo%6 zUw_l?O7Wo5`&Y)it+@Q{il;FGUfIGZ@9#YZ&-_S_o7p3fmKPVV*$sV~DRkSmtiFvN zjZnQEqkC=`odoJ0cQVR(H**?4De3ZlgC#cgc~#r4Xyp9Xy`{UC)sR1)wybdA3ulKP zE{r>{eAU@DkuEo#W6L}6dp`bgO3|^&d;eJ2cc^jDoLbMQpC-;+nAGEG+n6PT?E-DA zhS~|8SERJE9h6^OH8=Eqc7@0KMI(N*SijNx%z>%_@6s|$UasWKIB9h#!r;4V(!L<1 zNV4OzM~68v-8vhO|1!jTYHhBj-MUla+m*Q+6Kx(onLt2GnArFO#RzgYmi7kv=@2mgNsy%OCH7DA|d zh+otFQ~tihKnD8agixWQw{i8B`tu2kZ4}aHZmz?(o|ekol2o1xBS6rcrTg^ZyX{>y!=Bo?(m;jn5{yM zER?TAsKclxPjls~39B~SUs*l}qC1Wfu?+10Zn6RkG`2OWg$cgt0Xs)soSfVQGS{N| d2M_;&{cwy%aS17J6w)s>)HdVajQ&@9~uXnYhuZ4>T0uTTQ0RR95FcIltkHi50 z?C}5q8UP{CO4-xH59Z-#V;tlK^Sv$@=niK^;{myI06^^h|BnA*4I~@)>9z?|Hn43{ zB$jde_u)X^R-NOfrhBYHX259eQZ{N#3$ez^iKuga3a31V-hC`msv9lK~nkw!TQ?Ti5*Cs95oe$4K( z)H0;|`3eVHV^sczVaYso`?2i?YjPzOT-&~_r8AGNdttKO!E$)%I>()%ius^`(!}1h zo#|vvHC|(Q@gi-3KA$iXNmw@#Yg>w$A-+_#k6l=tI+V)k02kbFabrfcKHw$M#CfFZ z%4QtgTHo1UPws2Z?2| zN%XO5Y$%qWr>D38{hyet9R8tW4{MIvSYnc3nQP+%gZl~zemnom*#BXg{$uH}_Xe?y zCGA{SZ&02;{OF-hm|OjV^$~`WmtvN~gR%N<9=qb+7YDA#;x)|2>)V6b4VYKMkDH`b z_)5bO2I0`c4)ZaSjW-RYS&AJh1FyN`_@sSqUB4On{N_@kAxli-t^tgej&Ip$YY=6y zCuhLlUfFlwv{i-YcCNBsa88`JQA#}c#8t8JwebxjrSXh&`mf$By&9{|cQ09_dy`G` z!p{+ZD|4J)lJA13mAa%&9s@CRA#v>T^UOH)X*znw4Y@aq{^--Pj%XTs&0Cg0o5f3r zYe35xaz6|j)>S6Ejhdm3is;s7Dwcf^ap%J0Q0&rw<;ln0?bLnjOg)w-3;;r$K)BF% zsswoYxIsNV-2|a79)HsY2OH?HZ2R9H#fB~4%%<_1qhA+aQzXpIf1$1Z;>tl-Lak1< zkm~ zReUyTc<<`dT`~XkVd^@6)W&!fO<^lYRe!&;cj@c!Zy;NWY_l zgR`9jb}heI?l6f*-WROtb72!fvhU0F^$UW-e817-kx`#6trDetKz(Ds<|1c$tuIm& z-EuCe)+;~p*%f7=0$?sD`j-0Zc1+qnP5?qDG>lK`G7!Fgu7Bum3?K4V7cq4#-cV){ z10iEyuK<{e-=6fw@Q4B_uChSzsb?}Q-ehWIH}gG17yc0j{>px8UaBI= zs$0~y%l;T8OWqgl#mJJ;iD_VEs_fw0gg6HIhy}-3u{1xV^B=UEx!YUZ5t{jlUGG4o zQ#jsC=Elnrklqn~b;L)B-VrUe42Aa+SEj_XUi*_g_nzGtu|tkQEeL(cfc+QM&_0e; zCXY-m;cgcr6ov}C=u!tBe7^JYWPl#IwsV(Z#*?sWmDET;0$tL%6o41Cse2d3^Lou6 zU(+fw$3@v1CDN&xQ&B{c_D0SCY&L>$eO4b>$MbRApMjzGK0>3qVY;AamGu6?9TAiu zm@a~&q8FNf&Dwdn!7z^HVvZ#Q3?f#|4c9GXV}jcUy)Fq8(6INQj8m+Wd#=JxC*O2w zC+M;SykK0X%!h9h&;Bd;LiwddU6%+Oo*Khz=*eB+1<{aR0@a&Yr_xg}wJGhaoosZ- zz=nu@@^d2c-{u>+2ApXvSi@k#&a!@$E6AbsAczBMRrGC4l z%wQ#Qy>Dz)mYb)We5~8SmJ2!T6}U231%1L{5khT3dmdMdXGPhqoCH6alXI^A0hHX_ zwZBr=xlB6ga?M)CKs*7dUID{Hq+C7uTbSPN)&<$XjJ)i7=MnoMG-Tp+C&r(r|c1TzmU3oS=CVopDvUrOOSk!ZlnKY1t+QnsWF`u06AfMi{|I%eHDkVtcL z`h1hADsodrzWCHZlM0C5dUaFPX4(UiaNxTggq+`-%HWRwJ_dMG&RQ~YuNK#q< zzDmdG$*@!N@z;EM!d2&2%L`+Ar-3Is3sTvq2W>RC4YYZ=#1|4MxkeEoPz)3jt3#mX z6nA?w!-5p3B2EGd^Nd&(q!6Fo@V_a={K@)K+EzFE=PrUS))!GZXd|*lG`jV~F zIisT!aE<0B4;2eP`bph(VD6j;#1H;)>S|)j-a)mP64>Pk zW>gDEr4%T=>Hw;|rBjPYHFsBuh0rJDN+yn~onyb%z^{V`8L@=NeKvPj)5-fhYtKwX z0yMxqz$vf<7-my6IOT%`ai~b-%EpgvpKtgj(TKkyB>TuM)!q z5RMMrtf72Iuwcwn!i5T~M3L1s#5|A;J=Bs>GA|+}d{Hn4i5>v0SKZ#Lz*Jd@hzMpy zkd;qI9#UxfofAN3fK5C6ql3#{uhUMR%<%1$Rv&#q1POHH3_-=xKG!as$iAyP&Z5*j z4AI!MyBIJ_1j_NIV2FGu7d&?)ElDw3Xx{T08I{qgK4~uUA)AtA2Uq+y=ElT%-2#op z;B<4}l$TEoBpGj#$X&m66`q+T(+Bfz)$U+Z3c*gie^^woCSwa! z^MYRi#j9ow8JAV?B^^kLHS(oRl5)2c-h~qsi(x3Q1(55-3O;mQ>aJ3|^1wreh@Re} z?H=H+x1_AI8EQ%~!KVQS9P zRa{=dx3k|fE$UwMuR-a)rx{0et%NtDD*8X%3ecy1gp-O+DpFFHjuX&qEf9#xE<~ys zDziWL!N+_5kYfyB5j57VG_=e9p$9L%Hr+p8RYm3|UZh@i>%M8a4HG~A1aJsL(aANM zAUJ15SdeDLcLxxs0n85UbrJ8vYtmf2Qj-&u_402R`7rX8OpSBRn*f1XU4#0FK?6#V>*|2GYxrgjDaoq=%iYOx zhFSWz14itikv&rnskxLfLi>Q%fw{dD{u!0$(>b5s%#W>VXwvrtk8iM`uAXaJ5aiZ# z;C+9>s`d=>u}@xum+_g(=WV#lOCdL^q=Ms|`5LC?xv!Cin6>knMMk`DF{ah8eK4+d zt0`I!{TCph^DMVuQjIbzEp%M<{et=qhoc;twO=3myR^4nOiE=})wpJjPUd?dWWWzZ zZ@+xH!!V*Y#V1}=V%@S*wHg_;g}0WwrNhB#?)J9xPeL5Z!!|Un4^Of2>en2%NglE_g`GiQYa{S4G5tq0 z4>9N?$5Emz&bt0dsa@wpEbe@9ZE8R*FIER=7#%e@f zUY5G}kq;qR{j)%6VRNkq$sZPlsrn!J6N_^O*gPGKXwzjl${I!88Lq3d<#k@fFSzxd zQ#7c`izw!h5T##;e<$8HjZhz!^^EwbCKMF&*;a$3`6lnHjm_Y!!P=s%!AD;)_Jf6$ z!A*nmZ8pbg?4lt|9jk@SJZ)VDDdFzp_PAk)9Siu};Fl7+5OQt$!W{W$wVk~h(N@Lp z%B4Gd?OU)B{8xq05H19pVHIJIJwV?T;_GbZ19LF)^KtQT`ZF13in&e@rVmyb91+sk z63E?iCUR#*lf7Gp_~cLO2~_jglR6EoZQ<>^a+Etl-_N%eWe5AiXsXhUPmxZhqk|%% zxp65`iAd|}aCPBs61h0K-#ti8#|-c~Hq-LP?hF;SYIX3m*Z1-C@)ff8^nv}=mHoF^ z#qLnZJsZ7NVNl16&Jnp*Hm05&G8CK0{a#fgc0ggXq=wFUd@{d;w5bt8AdTKEmtQ&w z9?%(HDdi6r0P|I+vr>0(13Ib`TqTr*4&)k?kO*xeQ)hgds&jV=CKgnC`Fg97SM&IU zb;HTzG%R|b5?RWc8N>?MDEGkUnS1YOxoa*Sy(#wU*pDg!aCytMa!7Ng+1KJkjYOl! z)m!wsKlXtH6un6ClXEgV*e~vPt-ehDGKe%?xumu7<^H-KYRZXZI+|;5pf{%fu}NyJ5)Mg5XAWI)z+ondugO?f#7=vy41n)k2aU{ zf==K~MDMOA-=;jJ>4HG)MEiSC6UZob)}_dYTH&L$^)zjBLVRsOABX8BHz ze^db2GSSW_NW;Sc7WiicFm%rr0K{ia0g5DqMIq%Aq%QEbc87)4E0!CxW|6d;A=6S5 zku9{SLQxSc)^E5dLFnxli=_Yv7XYCACk5=hy#8gtH=TVSnfH|RJ_u7ruB#tjxH$8~ zNRiMaj>^@-8{nQ_@M%Iisk60QV34x<=qrL6k*>b$|GOKS(Vad2N%YI_GxPJ@9X?gBcgqL*a0c33&3Lc3dbpd(DfI3aD&WaeN{F2hy03eo%nEgxGXqsrr25GvrVNR!bHS>3jb&^ofSRXZulX}OZwmc z{r{Q|X93Sv?>_+fvHdQrLeAFmX93P8OFsZku(`xv?ZLmE=tt^w7UgV)@dJf|_Pa{X z<{M`beohU3$O8arScG2^g|p&kz4#9ReryW(^D@r*^s`t$yXYTC06+#7>%Wikm&4ZA W!pE8(06>cUFk`LvIyTAx0RI7F?$F=> literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xlsm b/pandas/tests/io/data/excel/test5.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..7b8d489f243219ca0dbeb5f8212ccd08eab765ca GIT binary patch literal 8642 zcmeHsg;!k3_I2YB0yM6{-5Lq*65I(c!GlY13lKB}*PuZf3vR)k;1Z-ExTlfE0)Yhm zb>_YKW+pS=U+|`Sty{NO-E-EhTYJ~3vyX-{5;6e*6@U%^0B8U-Rn5LG2mpW|G5~-N zKu0u^adGyrcJ_Go)YsM8-GtM}$&tDM8Id^;fCzv7-}XQJ1u9ZT)%v)APc9X1WVSh? zSL!6u`3@0B@K`m)yBnEQpLT7#DPLWxOh*DziDsR@)l_uOAj@~BMP*|mqhszg|@^$FyATZLpM2f@bFEh3-{lhg@G9SQ`<(-p<* zGWx~3m~B$$Kl<^grqaJ?uHR6GKasmbL789HvOhBZO~xhWOJlI7 z9e1Q$$c8=wy>n6YOW5`pAa;21kb;uNGnKj3=pJY2_;51NVD3SKp0ctkl!R7c<+VZI zQmV%!Qvm&<0dVD80A|h@26b^GLi>rmKtah+YE0gm7Wkp@a<|^0rMPVy;}jaWLiT$Q zv6s1R`I)3NP#f!7km73LLUv?f!6}k|nCu+k!>(G~1``}&_xDHujlb};QHP7}96q7~ zry(Ysr_WwkJGz57e_sFP=l?J%|1|ZABvti3u7?rF3OA8MmyneNptQ2LltLTL)0e@D z%Q%g(h4kbrT}%|fr(~f>@U^5TX0vT^W&n<>raT%J?Ep z&aGmjAJ+@=4RVnoujI?@)e+BH-dvimJiJCHH*+dm3!LQCBg4QiqKF_8%Lp+XP&HV! z_+Etok=8z}jHqc6$lFhz%nF)ME8fGBh!RqPO=l1exm!G6s0tWzr1|!POh?;R$gajB z_k}R6?=vgM?lak}F8td74z=tdE!xL;-+3nG2O09d1sOIxJ{`^R?H9o4D<3);e)Wc` zbQK=5{!Wq*@s#v4_!nt~lY|U_j^N`6`VUXMU0yg?xVSj{3}?S{1_2(^;9mZ>TZNXY zaz7Wa1M@DD%R9@95O=`?L<`e7z(F5wU|C|M;eC0&M$X!5qCck$LUM`lJsuzQy5z&& zM8f~p!&VxNhwO*%cr1eAGJZCWgl=(IN%=}11s@N#e^7piiJj?=-y)tg#GXqa_~kn@ z18HPoF|F)*%XGwJ`w1>j9((ssk4E5_8Ex1Oz6#TZRlaIyYNF0$vDvwAky*MxefP?j zYU+#>Ls1mV7cjLuOT+fP=g9^7*Y+D5Ro@ARp2Dk#I~3*0l<)b(0wO%9BZuUvf>@H= zcM3AvbdEPXq_0O#P_VEde++OWiaX?n2 zM?}y7tT_eCZ-smG>$B6JQVnP7)zqcD2cs6-hkI5Ej(beBuwVwjd}%n#NGQcue43%fV~kqf@~x|*oU5a!h(&HW|K5B*br^Mj^IrEsb- z)@93Zb+i8K$7IZ2esu&WT?@kF`JT7QX}X^gg-{?iNf>?g1c9Mt#%II`kl8*tf*Pgv zPj7I=SK3^v%n*x&K@ar^`2y(KkS0Dup!J-)HS#UUlS1h?>U0EXXtNaa#$ZH+huW7uc@31##lq8Vx0z- zP93bN-BQovGRSwjefT?r@rmvwT~D>^!Zu@Z z2`YEe__8iENBFR^l&9sW)bXm83bEF+$(4;Kxnx%`x;+pkLR?N!$6$^w#kR$Jel+sR zq;<7Sc+GUdA*Or9)}M_aMu*lc&xUjlcr{PL@!44DLp@G!u8MAj=7Rv?S9w@HaqWD> ziymUlir`&y5?L7+AXVF4ii*j*{#+4?h)(jDlF)T>(s;(6*#|D5b#UYNQJmaabZ-}T zdotH%K5qeaN0F5$mpwcDHTaw1QpuyWMRoqy&v4XSgkx8i(LxC{LQH|n_mJ#6lz(CZ zNa8EPFcJXJ@DKpN`VSM_J$xOl-G6SHU_Gb!LVn<#Y1uu(Y0gJUGa5uS@;U`3lX9iD z)GG)=a^u8jT)`!<&i(bcIy%+%y@SezN;Lcnh z`l;h1T0eRH8cCq$u~{ePrJkr6A}bVm!|}q{mJsZ>gd9kY%^ujl3B| z#(a{it==~##f=}C(E7nG;rWbZniPBe!)YgqEGMs^IftYq7TaS>;B5J|nfXOZLSv$~ zo~LYa=mkvyjiNT@V&DF0511*7$La&M!PqkfZ8cuYmpm`1WaY%(ge9OALXcl2lTER{ ziXY}r;TpB#8p{~1V^)eW=pNHaRaDnm*r!Gaa?#4Gtozn5y^6mQHaKR#TnofEYt;yo(~3j zQ~l6ZMj^%**#*%x$eegxAc2;RtV;g4&t0`ezO;1=vwuy646V8V_A8J3Gd7D}5<%lO(;ldzu-|{gxX;G8zw$1; zfycMMM?V51E{Y)dh!)w;?Ekmu=V5E@WDWZ5%Kel1FoVf>az27ix>JdmFS%dl$LJ0C zj2M!^m@7`@&W5vv0&YA_4v;}VqJBTnWj z4y+0`q*OlD@CK1iLZOq#VnLG^#_b+>+EDb8>>BW)F6gN-8WSBq<8Z^_Dh0NZ>ih;G zgQcv&tED4E_si*v>thpHBideWue_k%CXJ=ou(wXBqXEd{QP0eHv%A|clr8DkxpUG6 zS%gvBxH;xoQ}Zb3lA6_CS#bF~@zBu09u_Hj``fjvZ*b!p^T*h>f>bRFBYWd;%JF)L zMZsn6VYW-*ZUZo%DD|)djeeAN+cv^)(6lB{+LZ3<>u9XN#+OLn(l19vVsISn$2|OM z$Y#$1K)V$%HUQy4^UFf2^Uzc=6v;x|vBz5lOXqEs&Xbe1WX#ugwJQ$i`#MO~3)R(b zN8i7$b+c*MtQqy;pwjqE!S5q3ImGQ|r6+&h;>?}IDwRO_9=0PsNuiH)*o~%|?b~tG zOTZ`;(e)Xg(uH$DA||be(hb@*eO581IyqL^h2+aX5rny({SSy!LE!uW;sCUG4bZ<7&NU z@0N&_Nr^bW#SA9%fH#aSh&<{$@&S9hSU1GlH zuyXaF`=fl6RmG_gY^o#@cPOj4mvartmtqP_o{$XMBtAp7AkS0}HZ(O|NI3^zSFp z2yx++?Ka3*M2+^!0i~V3+Z?COSkLkwP>S;;)=&c*6R0fgn@@Q%F$tDbur*aYBadlu z7Zi>_(}{5h?g)vw79-hrB<{`!?i}Gi?4`O8znbG*Q=0TM@(^x+vT-NoE~fK`am8qe z*1up>1Y1h1MLzbq|NJUVUv@hPdqt6TDux{jicm&GoE}vqWjVBwO`4nV}ixo zHMaBO#(=w4V+HO~kJ>HW>tmm-3x8?m6=KY2@0P_7cV@{ggvaj-LxicS;wEsb&zzMN8$KQEcE*U9DL zJzZBII3x^$U^Ag6=7-9;B^hSuJ)bEPKIOz1>bwsNG%5ziiVB_~nVfCR8Em#%VuE+Bk`|0Awimr)&usfR(}i|U zOLoE*N!6n$@3s4~S7ZR)$cwH*OnJv$>E9p|_G+iDs-Pe(I}gEIlAW z0fbLB^j_(D1_jo@hQ9!hZfyf#6rX(kkffr2#J=}ff9u-3S!T0Jf!~~JjA4)%^!$M~ zXWLa{-bjAMqbY;1uh9i>d{OmiTMG(v>lDjb(XbyIn~6iiPs~^`VO>?NAP{8QJ=2zm zZ$^XAfbY=a>PvL{z!Bl`D^O6}nNPHCx6rG%lRv-De@q-mKs zO_{S3ZZdhkw4c|TbYg4#Ha*!S&PB2s>m=DPQ|(DhCyB3!QN3Kz`U}y#TijWFy!`zU zn*?k2Mx8@>7lIkwX}=aBbY=T0Egr2zubuX0Ql+t~c0K#LZ(1~aF7i!l@bUoC`K!}oEIUKb|lq&t6Ykc)vu_+@&UC{!elXpmg&a*0e#O8&x%&N6+ge+DxPyM{ zehXSDDQobw4LG>QbQn}~WBQO@SzETWKn`-nWoOuQwD14*18hcX01}zSJZ% zu*mgI3)LF_kK1xxE1U^s2i?c7EtI;^jTC1pcB|jT3n(iEtQT*Nkm*m+`S?8xxJ#j` zSh^yj2GWys6*!@r47?@gusxz%3hBvO6dp%vtupU)?=Ig+)Mby|LYXlp6UO85#^{@C zB&eyiDCr5q-IP2H{&ubFbtgNsiI@Gwx$VF?4DyIpsmw|cTSKW~D3{Fg5s2{OHgl{+ zuoeya$W(cwqRBG~I5;4^b{Q~uM3NNUp7dEt!fP|nbjmZSk~oF%2iH4d47B?J#*p;M zJW1qI4MwG_)ATv#>ux5|=)~Q~;M}1n3I0BSBA}=|%6*K`mq0{*YafSqbz<1B0=>c8 zl#~@$9Oa)Zl#F8@*mr7gtZncZPE7lxfu9wRQeB)7TFsyayejDf*X=hWfHXiAyh%GHE%1F9SlE_;8qtJaKy>%|H-lBUQfvV4?E`#LX2TbTNF7|lc+ALdy7h(16C(m!*BWj!qN+Z}i^bPOL`&=Or2K}Qflzb}E)doik z27Hv@Kd5oHHGg4krS0*;&iVOo3652Kk3C%6mOT!+3;mq!AQX~@M<1@W%MDbJFtc>A z2x-)6Jea-J^G_7o+C`9-{?&DRqd z#HEuOS6cfo9B(3}8XYTAo%6z`x1~jyN{sRMW4<{SJ?%9`rBy97X*#S|B_UNwXVS${ zH)D)@hu^UxDEPX^PB5Xgo-F%zr@T)~t~H%7e(-yBt9{z_%E3|R*ig{OgkJAIrg83= zxQd4Gi8{e)f(zHctXwQLUbwirgDhQMSpT=2^RM&;M?pZ6rg|tBLHL^L4G#ENV}zm= zRe#o6os&Cy;=ArrTP5xTyMxbn9VqY^s&}L)2+-?^$(~N;pQ>QH7Z=Au9)WsAyL>0J!n?9MQ^aF1tSoZH0I3kfHzx> zdCF%{$&zeAWxLD91CJ>G5u$ctw9AR1p3jd}KgG0?a7y&JyorQXy$9*Ev0*bSBbOK> z)u&F_II@LbJ`HSs;#-}U&&A)v6ycSD!t1qh(V0Yjbg`gC6z<(djB!^f1PFun zPIZoG8pItv|3P7`sW9+T@Io;=Im$HFj-;ze{oQzz`SqKx!1bWbejH1Q%xxwa-az4xzDK&CdbmO^ zw4(7aq@qAU_`JpfVbkNeTCrvR2l}XrMA*Fghs4;{6YvV9b~T1j>_NQU4K z$~lXJ*n&l;)PW{Oq2w;B!XE4{XE$n_iBmfID`9W)-%{v_-B;V~jCad}c39}mDk-IH zD-qPHXD8=m)jPkyNMk%o>W0^+tq@d>IR&mHNci{;rx?E2WpE{@r}AuWht>D>j5t=1A%pH{T#j^fe2-v zj0qFaG0~3_(ZIus;_YkmgFm!l1Vj#a%=qVO`R^NU=-wuMmdinJz@Q0Ue_<8c@41PTr{OaMa@%0aT u0APv~0Qg(9{nh-hyWyYBEhzqE{@=Y(Lm36Wx&Xif_%9e9(AlYe{`)^MG3}cG literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xlsx b/pandas/tests/io/data/excel/test5.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8660913c7481289e2a1fcbdbfb55744d1112d11d GIT binary patch literal 8626 zcmeHtgOiGaqLQHzO`@2YcEAWJH!c03!VQ|F!?(5vWWaRqN*lKDku5mHEo4 zv{WyNE^rt!gwOU=yk{V3upDfVYhwe;eSHIz&L^@JtizvN^5Z=jx2|_|coz~m*sh87 zCV03<`vrv{==~7%VK)g`qN~2%aULOsBoCRfag1daAl;?8Uz1;UT}r8v+?t43JZLlL z%a9J<>R>BYxqhke@M7|ISq0pgXu!1KLa%E-ZTo7~uupUfXGpH1ImO<#zN$Or8?>#C20D~a`H2y-%CT(tpGdR{%;H<-h)6&@0!rl$U_4D{Y zI{p`P@K3K^nW(DX&y5v+tZ*AKd^xuq50qB+l2T}=d-gI&aS696rjU_hxtp01_>4Rh zNj|XiwuG7&&e?eiYUX0A7=#X531^|m|ayP%t>n<8B=?XkT{hcJi;>l^J@DXW) zlY|_Aj^J$%`WH{UoLue9oSf`_Mz8;H1_2(z;9mZ_TcxI|@&Grm6Z1ZT+bheH2yek1 zL=V+Iz(xPi$hycx$M^DVm4dC!NcWR62+1kj=XiX`^HKn31Bu|gm%TI!AK90{{#X>n zY5a5?3Ek|liaJCdg#aJAe^7CViIeF@&?=rd%#lkdw0*_$kSwCGm|pg*bvm5ic7of3 z*VgSL)d(Cjqm5sKuEVsTRUwVd&9s@UmRqn^nZ*m#w;?_>Q>SDYielJ4fT`VCI`*sH zCl~17J8msieJ1RB3$L-ZC@Yky%LN|$hr82849n95vL?E16=bw)AFsPh-;A(!FMj73 zJv+Ai`eA5p6+Y8{hYZ3t3Sb!ymumQ`iwnO34w*lL1)^=_l*LUL@U{^abgPDqL5H03 zOqE3+;t=(vDIlA26S*QN!-2e@92PK-JRSE;YhLcPMD_Yru3KErOhfT-l^+ukiAC5F z0h(SVgnny_NqW8;UsN)68ls}er9_-hrL}8+eLlLUGu)m6^dbf%vq@)s(vGYvAY+dd z!6RZfwcs?D;#b*1tx)1kERW_f`s%$VB+i8nnxdvlKJEn`2$XOTH*)EgUD76Q$;w=_ zZKmu`J-DWB2^ns0t1n|=3(`HwJ}|n3po-46Au$o2!1`2eYI;cb{WBd)UNr0N}xo+2z%?*|$O3}ehgJO(vNRefH} zO3<%F4^sXzu@60qft^{&7-HDKFAYwQ;`GCd_YkP?!KtX;6eS2NLA&I9sFL?=r`58a zF?0M$SudGUfcQZOv0pC6w``9O_p^q)wIa``b|saq6?AMB-7V*MA zMp0$L0uP>V1E@r>&j_~>OJoUd$CST*ZiKBfNyqS#&VIJ)py}2@#ZNOmPA^I&jwU(r zV(BGnjR*;9o;mX7lss9x4Bf#-+Z=ei80t;A$gqkDRpd_pj0C$~I!W?@s&vSk(A~v9 z&^)MQukVeH7xV}3q1jv$&12Z*iXXz(+-VliYBFCQPH_t~PhwjcoSUQZHDwySC^lQv zTsQ|$qM0pvuBVoWy!l9ro-G5V*v!JJ_XwZ~ksGwyJxoDqZ|u~5*VV~8pN{EOxEG_r zok|xGYzT3D^RZh_Q7vtnH^&Ef0V#kqC9lm0Bdel&n#bH%yDx@jd52M37Mg5hF(WN1 zFAZEQy#`NOc3H)3m}<|b~7%y z;yLeFAKwVlqVR7>BIKR7 zY;$kx<%scjj4Z_Ow>mTtl6%faJDy5o$A+7P)(||I&K(zJW4p3kYpE)HOfr-~NpCtb zX=6M|r>}4}6+2^xOvg3wz8r`Rkj{|l5uh-~#@yi#6hD&e4EvI9S{wkY^Ar6D1`Y}$ z-a~4~1Z}PJiEB=Tl12<`)k5=;R=d^=DyVK9lRw4R^EwGHvnisq@rD{Qt*P~gdH4C= z75aq+>{7NKAG`>3#3W&(33bj5VJO*)76bud#14b^rn);HJKv)XP&BNP20T4B>B78x zE@pzrwu8KGe_?3Ng^h}2>DLk-WP-|1oysai!U=qx>fsJLJAI|sGXDcqxphO0GiNjJ zbgR9GQQ%y23Q8}dZB1rl*{Qcc`=q1WT1)vZ%Vxg^c_WaVr6h{Nrv;$?9gJ<9;yTn9R>tir*w&KRb^ddRFDM9PGhiq}^1ziE1q88?t{{53) z2y+&%`8!&@G2@3?YJA|Aysk8|a*yAH#iJF@A%`T9PqBr>eR!12J!;N9mN8n-q7<#y zGp3!QsII-RPm2&}Z~bUdqpuiEU}3)pTGr`1WqI?1=Rh$ZXUsPqEYsj;E1~yD{jg3h zR@iaZCY9_!q+w4A!+_YnDfD{Wx3+i;FQ(u@`1qqmlCpC9Y^>PwA2;Dm)*Fo698-$C z-ec#Mq7R=YTWm)$Ge(UO%W{uVip6-w>fshF)CY;34F!79{LoTHAt4am1u-5H3VBUuCb zJ|54!z`kaU#h9>khm=u&JY04YC8|-JX=a0B|~D1T8J69 zp93!)0~D)B(aX=KLw%hG&+t*SbsI<(To}<8i(7%;OCkm-a|^RxeC09-^^Q~zJJ1+F z>9B4mI^Ric2Bl8vth|oG4rqFbbe?uODjJPzXFKNZTT4EB>JQqjgt7yO4q9Fo(wv2+ zJVuc$#2e$^ELc2iuX3E6tRrW+v8h|OJKNVrs#&P1aXGsBzS_gCVYzD1kBds@Jq14x zzvL9RnU$XWd5Zh#TZ~f4jsR8%;*(^?2)o@Vn%Vv>`{(f(g`zs%qnD^L`i>ovQjaR@ zm2r4XUw=-v<7w-kMgTCz(@WvYb69$Z$i=^>EBH$LaDppCP8uil9ryH#l`2EmCq{X9 z_y&w`%2OhnPM74>9Eo~&;K^Ct>Be5+%#FI*G0gpXt#|K^hR;B5h$>m-Ja)4C4mw#Q z;d$?UJAN8r{Q1Oi2zK4i+?{plDh4% ziF-QMl5Q7Mf_aC~_9PB?qJccVdFb{ADwj{p0X1B}ruDYZ?kiP&{+H1!>kBUEYmcXcw!r1^)FR(UxCmb}rmQl}zM zUQ23ao2(CeL4n{=s3U>dl}B8(=HP;?Q1k1VCj6`ZFk9udd%cnb)k7#bw3W|L!{amSMmNOj)(zVWtjvs%B%HDup5iyCIWwXVXz2Ibu736{pEI60Ft0;!u}>i#a~voktUxEx>B*v zExgsb$LeJn-Cy=ppaRS(4mQX^*gm1Auqv6b)z0PSJ6Tg8JR}O7!(m2E$PblsNz~7H zZZ%UTa>9i%+yx5@dz7wN;7B*6`$(GbJ~k|Kp|#dpPnW;whec`XQ^Quo(y9VPQAV^2 zZHM{G`WZ3GEZ}=ESqapnJGvTcqE*f}iLWddfvDxnFkfDzyRz%D&|~3tj=2|f)2JAn z%PRQ#RrCMO%PA98V1M6W^!K5KTXved4hYw;?{znqk?Tl@4T;p-)tm&r~T-o ztWD&GPE{~%N7Gk*T6QN2Ve~dKbFqxIgpd8zO{E8YE80CbYY{4FsB-cFVLT7MRf>(i1o|yWWoK7%XQgK&?<| zo2v^Z(Vy41rD7~^QtE*0cS%cqEF}shn`PYeAWO~6Y0jLTaFNOLq5r(rtQ}M9yWznu zaVDD8R4>VKnPN*;I!SU(g6ipvHc*J>)#}FP?dj)-*eq1LH|iM5w-EHujsANPLU*>0 z(wC#0veFd2x`GOiUgbq40-lWiSNIZ)=fIees^Ogp_(!miOd@QzVyIQBT&S|m;PVPa|p z^>jwfUl{*mw)SwCjq0cFJO~%3Nq=Q)Zq^nS?rxwzx<7(ea`GxXZ37OiG9L!k-oC(M zRMwI$Es&eL=C;vqK3Yges=PvL$btMg_6q7N7f4A&gNWYTw$iK;{J5*oF~^-yw$tH% zZKl+NZlE|*xm)u#PEc9Nf30|9gj{!u!Q0o^|2~a4tER3Pf~qmpRrdREM@h^+I{Qve_dNI5a4|dg(uO zM4A}Yk@#6k!gC|<#gs>46-hGD5AL@l7-+CTrr@;6JW1qI4JM`QleABcH$BW^Q3<;d zLAk?E;{Ch94=L$~)bF<-fHXjrd`cxuf@0t3y)Gm*e4!t_I*cKVGB?nE zdXj)$@?*TMx~%}b9s9Td^xJs;^=WozQqgtG9hU}n=Z5_1uVSSno70bYJj?A~Qb#a! zkw(-J9EI)^KVRea>??w43s!$FbsD0$8Z=^vVScc0iss1bvY9x5_odhK&T`4@RS|aY ze$xB~3{m4mNE*S`uK&Z{y!SN{(LZN4NhhM+?QpbU!1Er$f1$?B+SJv;T+7|n#?k7J z1jjtC*A}j2%N_^chknks6An(rXMCl(%L7!9FabN61vhCn9ZcT`7)rB1hAu6)xgBqE z4Ob^; zHxSs#)#BgPoBv5)a1{6_8msql6TV+ny~R;pm7k}>lfi0x!t8q&U%W&H4;xS< zBCRebMjGmDGNt&SW>>e!-JI09?dAE;4y%Ro#E7z8k;dkj-quwS_G;a&lMr=;1W&gS zUSb{cDqAl19S1y=S~@%};uu}?1ZmzHoIy-T-oZnrTrlYAJ)H1Y@XsJ zy~%JyQp3ds;GYp`>g@bKK*ERY-z_^))oJmkevfoPgSAW{ysUv0Tv?zXa#m}Gu;Ff{ zRt(O6#~4|e0G(HVmoU5F`6Y%1^g_I#^eE0K>zwq1@AJqldJ-~`P4)wvSD(|2ow_2U zNexk|ymTW85?khJBuX955Eu*~9C|`*vi+1gnu#{GNcxa>%J7OBj!@AFZGe$MD23CC zh&zYV>8;vE!j!h|a@d>vbjs(CVKp{e<2~}AEmlU8Dr#x#Dg?Ef*~w3`>RsDVvS<%z z*$3)4RD*{*yeUFn)++^DNfRmACAL<1?>)%#?W{3U{6VG6VWt5FwF!;;K68bpG<@~l z(}+$_E(p-kYNgMBoI+|rq4r9HE!YE3Vc$+t$Yq%xCYi9st{aNR9a?o#qOi6QMcRQY!NH&baMm(1;drSGbW5c$HYHM!~za0i@)BO z9{duE5fC}yLF1pR}KGX8=2hw(4P@ORVSrG{Uohj85;e&64PhuJ^0^tg1>wDeJ}9K%M5%&34e^g?F@eR@YewQ%N_veAp-#Z7H)qx|Lbb_ gXLC);Kbik~ZPZXkfzK`g@Bsb^f=6^pnxEhP4^`sj;Q#;t literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.ods b/pandas/tests/io/data/excel/test_converters.ods new file mode 100644 index 0000000000000000000000000000000000000000..0216fb16311d8817a64e1dcd2c648fa07c6cc0db GIT binary patch literal 3287 zcmZ`+2{_bS8=veT6$Tl)}j2XKOvdeY}MM9YAV&YcfB7+baCTSvM ztqDm+WQoZ(!Z-S!@9F!x_dDk~&pH49d4BIX&pGe=dw<8lmXV1A@b4lGNDtD_0?|K? z8XLU@`v)T;V!{xxu&^M1I4r_HG(%&oQU_l;v2@Y=Ls7T7lCQNDUP+#mj$Yw5{N_aE=_5`LWxEFiN8ay*irU_| zyD{44wDWXya>=~#KBdVkAb!~JA;R334C^VMbS?%WKQ>q@z}sXl4lYn>@hwHwHq`yZ zG6N-!8`XW~6PsCQYqKf~oXL>8lrM%i+Hj${5S1JC-7HBiQu|Cb9=#k63DQRHs?YPZ z3lOQU$uj(ijJ2epE5k7dkjM=nD z%T~-5eR3B%9Nj*1DZ2rSP=sW}D3j=!R2rt=NFA)p(OEyo*4ip6%O0<&#@rshsx*w(mma^$EIclv$D1Fdr+!nRIwVPvfSX~weAufu(A2gV}uZ?qynrv!cLAf z?w;5});uOCmEItNp(QpxLgMap$bQ`|FOy64#`imDYc^`6wDU>C&1LeO)=_@*gaIlI z8%rP1&FdFJyERLMpt{#xw)}LMp@G{OYcDO49^M~Jvt1a0!{^zQJ!cknLG4nKp0^wq z6gh{!yJ*`5J~8wC(z1<4KQA4z{J@y)%(Ca!8hHB}#z{7j)!Iiow@gVr*2>O}aCv9z z%awdPflKxMNFor~RB$aGqbVDYQ^&4^+*xZ`Z)J~#X$%Um=Sv|!wz65E4znL5>>;HG zdw>`2enov+oFE?037+NS-_sb<7Wb$Wfs#Q|G-C~ec!xi|wwlDpQHr&}kgi9?t3Hxo z2>h?b^ZaGQKEiz8FZJYY$x_r;Fv}mG%XtsY6&%om{qT?x@V_)rh86jGl?ec-76Jf( zbQ*w%hD03k#t{#=k@N5A@Fdu~91xzj=d1?@k+xhBw`)nZ-a5MgcQ%YivZBrLxo7r3 zX|W*n-#5LYB>j4a-Yl-7+r~C1uWU)@1|fZENKaK^`=`7EY`OW%^_vHK+hQIuryho# zfc0v&ZQ4O%FGoE)Y@WS>$YU6S5Yq;QBJX_EW=i9*g zMLABYaVZB;yOyV}#;n-DJ8-JiYXQMzV$ss*43+ex)qwBGa&}Fy^uWNp`da&rm4*Pi zFSZn3X!;mKep)ZP*RXjLJYTVP2cZu_!a-b#_1<;+OSPBy1#dM%z1dvdt%&z?0$i)4 zau{dc`B}By$JJ%4=d*$eu-qq8qJR|RDPsae;(fi^;S@%uF%*Mu% zeM`>3n?t4(Wxcr#goj`tFR3`;HoQW)Uc;hosCl|ftC!h%+#Ed|dpJmBG#TgxLHgb* zl)pA+jyy1a&^Z4{i2@~tyH<)k82R+fuRS(1{MeN9gEW$3e@{pG4(q#-VxW&#Ac@61 zC#J-5=>dio-s)wfa=uz(L#)!1CxapS4#PB|W7pRJ%KD-5AV&1#pPxy%5A!Y@vo@MZ z@<7_`E%}U7;z9%slL=dCrH`#(r+$Z#bJtU1C0f_+dZtf}dTu$PFpzR_DHL?TU((BJ z#KF8OPYvw*S`9Bfhhwi3xSqn#Qf0`Hu9iG{;L6bKKBZID=D89RVW4YUlDZ@lQwdqo zVJY=F$HB6agFNq4#XH~EnPLepwfQJI|ErCl0O6WPH#dOq>Vz1;|WzHHQkYXo?yCBa3 ztuNZ{{{?5E1S~|_|Ft-r);vyVt;(K+f$=^ATokR^i*kHg_uYos()3C|HIBZt=)w$F z#tBvn^4kg1isH3okY9FFlV}RZ{A{MP5XA-jq06AGr&v(np)*&09{8HineZyIL9K!1 zq{Sd|cXjPT!hF!&gw(8*5v)|dk@OmaOMr_f$xcgQ8@Z3KwezJ8uP!3>JzX@dvW0o9 zF?X!T_yT_eQL*E^0{zfUCT`#hPQy+}!2NI-0Q!{w>DdsYcjJhS==XJn3(NPD6JhFi zJrN2OBe0-Sx4NtCt5XtF!z%49l&SfT%arJ0&_Op*`iapK27hrGlunL8TyT*1r4aMp zM|&TR33qTC#v|uAo({9hb2Dx)Gcum~&ZOU}5qTEOdHTgVEpddc3nk1BvR-(Ni|E5E zK&fR{9>#_36Gh7Jv4}oDjONq}Oj7>?pX8TR@u5=}7_5C}kE`^qWMq!Tji|3MIH8NM zP)jzeg$Nsvw;$~WtbeL}F!%0FPhGV}C~}5!`_*HS43h?$Cw-8t1@SctzL0{w%jYVyni7}bwkZKqEYqD`V2 zmF53fj!oGDbdPJXcQ9@mpQ6$Q0xo=8 z+vbV&1J@9@6035!pBCseK_olWK#vmv&Oe?iD!O6EVClCmuOa}r>iM|6_qD>R|9BLdJtV!zV*mh| zeiT1cAR+>G#M3!WQ2U;f0tve7CR$IOQD3U%f`#DU2t?G!QKBwG%k4fNrP1I+Z(38o z(QcoS)OJ|8*Q%}oEqPoi@N@xDz{)4BkW{1mtvR+!S26q#v6oHS4i$`g_!?DJFj)%> zxa}GtrY0HMMO7>od2#N6IW(|%-cH|dhee#4@>ZqTz1!|>keiWR?4!f#xYdWQSsV#5 z&k$)r?XEAHq5Mj0sL##@`9yXz=hq#sl)dvaD^cqpF(w;F0bf%`tto0HPGF3>vbPec z@9UPDTogwhw+G|L-}aht3*M5=^^XvrUSmduzi9fbwOz5f-4Kr*!k&=R*AyElbo9P* z!wT4wWsNGm9?MneR9~`Hfxj8T{rI?`c*easQ+9I0C&{G(d3UyJnr&dsB9L424)*&Y zU);94qfhA^-c3%wz0J>EQb5yS6(Hh-(LTXiK^Th&HxxUZCxHrt!^c#WX|eKpzsZ$& zBbJkl2c!abvNwNT%`pa*AHtb_SAf4u^rOE&%YOvBgY6OO|BLXS2nc=nQtp0oYKLTndNAPnO;xQownjg HTmb$B5394} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xls b/pandas/tests/io/data/excel/test_converters.xls new file mode 100644 index 0000000000000000000000000000000000000000..c0aa9d903adadade636e3abc1420a2e563b23527 GIT binary patch literal 6144 zcmeHLU2IfE6h5=vEw?{(w}logP}eH7w1wE<4~9UuEnCG@F>Q&)kVsp$P+7WI*90Hb zMd5)&5(qKAkSIyn`0|VuI2Dwmf zArD-0#>zGHViA1+*L)FR0pu4u^({25g`wu@fi8*RFD^j|%Mj{ec^<2sf2l&mxJ~7w z={_&=k)KUw!Tz)dX7d%>-TvqK&2zi}$hQA^-gEz#0HwfUpbYQ>NC8p~Q~*nWr9dT6 z1uO%W1GfRyzzSd`unMRFZU=(E9RSC3r&E(&1G)~V2i7|LMyH;h=>l~#JWym|6$^dx z>Ef?>9ba!Wq2C-Sap6>PBY>#33*G7>n1R=$h1qZL}gI=a1IVi zGa~mZNTgm;2~a-Msx6e7Piu|0R7RL%9}GVl?rQC<@m9+j?w#76t{vgpP3fwmwX-{1 zJE1#kM1D^NF@|&*4U*5HEC09r`1EHs`CNSVnoa)y%O8PHgPfjI(td8V{lJjXlIE|? z!EegJZ^*$f%E}kSiH?Vdv&-%jpO>^5{4@WMKk0#TLQ<>)i2|+56NOq=B)nQLNi5QO zX~LJvl-azd^kvE07!uQ0zf}ohoolP3Op0 zly_L4Yo_!swbZ%j`YAfuverePmRgKvH|%gVJh0YeO3v=W|3pi=HnIaP#qxqqTg=g7 zW^5D>v$uWD+Zo^T(DS65;4c*87xMU$$>XYK^6Z<#OkKuFrW`}b&AX=e3i~D@V!s)9 zns)6skNQ3RR)9C!`W*KP%J?-``DGF>l}dY5&p@gQmPB5}s&d4BwPe3}99iQjL>nd9 zAN%#oA4sTv`3WkOs%Q6Cpi&$?d*IA44k)k(FXWb~Rj*3%mlLHZ&6vTx3jX$~&THq_ zTq;>_w%G0Iad4(S)n)ptUs+OKUuyxu-*sr0$_21AwCzdhKFdt7{M1>QW^5qHh}ioxx# z0Nt?1fJKjqjOtT5cxQZMuzxuAY-}JN3?J-|M1v39zpkgZFA|MLbjjNW2L|l4Z|ltY z3GdoQ=d5WfT!$aH2Qv#-ZQ{rqzh2$GH}K90OB(Aw`hlw#Km+g4Y(D@{^B}+oJPa^y zj{{t}Hvul(y8xr(6M*aRCBTLG9^eX00KA`H0eHy$3DAbuncUCU>^nDYvhe=xrfKWp z$iP6hHn*Ic&eY*NIKL1(aY`snpeCJoX7a?@@pDG$p(oxtdj6crnO;_LB3Z5^6w-dbO0LMlI0P1qix%g;aD2@ki=I?*?f5slp=4i&c1}v(_@%3R? z_Tk3O?-2X4iQkY{fz{@EbKj&-r5h?Zjcv|>W~3c<_hT<^#~7Z@M|Fk_?Q=e~%`&4g z$4f2v+nhE7yHSbcker>(3};)mpR;R-+yx;X?kqm(mgg}4{rKg>Z$I1c6=*=$gq%2j z1;xP0UFseCJP-1n|7ysFoWsN?TVbJJ#}LskP;(p4#(y#Y2H211p3v K%c3wJ`+oz*DjZS( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xlsb b/pandas/tests/io/data/excel/test_converters.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..c39c33d8dd94f0a1928bd1658333625825d32d77 GIT binary patch literal 7810 zcmeHMhd*2G+YgGUh)wMTv8qOqnzcuViqV?2gA%l&w%SGQUA3vbsZx7{+G>y1s8O|5 zt5l6Q?K3>}^!*Ed&&lULC+BnL`sUo%eSNQW5I6A7Q2_`5KmY&$1xyCmnfc%V0A}X^ z07?K5S6|-V&Jk(nXr$$OABn!p=VFV3WS+z2N(bO#fB)a{KdgaRt)5$L0%Y}UpGc!h zxtw}%pd&8`xhQD!6v!>dXN1+6-tN@Gj@;oZ@`$Q@hv2vgp-Q2dJ}V7vj!Bv3p~+B^ zZ{9HDxZ&)(7V9?0&#qRS%X*6e+< z>qaODSHA2lR#h#pmKX^i3v-c=LQ|PX zilJUuex97*0T911SKfD_eGhAnH?hPd#xmE)0f|EM^Zhvg$Jqa2n*L+yq0s|a#)3Mw zRO;myzpvV<05huUAkU}Ccu419b_`WvS?n@?5ZQFX)k`%+!+W`-;-45 z#Z>mh-Dy?e4$P2Ob4w3@pcx-=;n-TH;f@7euD8ysw)!?Nvv`zX?A`C&EpIYFXyB)RYRwq?bdGk$$mA&{;XBaSgd`<6%f!QH3R4VJz&grdP* z1ETHyJV6rf#F_Mg&kv=Y^VY`Z;7|9MrHmpYF=h$O5FRE5b9^IFcN?kBo}EJ2l32Sh z=PprqK8$O_QqA>UI`9-^HLW_)&2|Fjr`@TS5R6RHkr!tv)iyw~Y!vLZn=Lh7sWvd%kM zA&u%r(dy)Ic(A+kN%#rIZoB^SG+JCbbvgNS{Z_Y*o{tv|ncVjL7hrZZGh4~u zh7z1Rfr#)l!$ByO7N(@-}RO&8@_DndAQ8nQA-%FWz-qStw7NY+OdaHGC%rn(Ln?F6`s`b^{pt0iS#+Hn0dM}OwT;sWF$JUp>g)eo^HAf=V-eYz9Z#&CS z=XlzJRXQVf1K3XO3~hx(I->t{RX=Y8|8`WUffj2lK?&x*r*9cM8YO=b=m^rBFp0ET zVV0?k!wAQRA+PUz-L>7A_>{0OopJmk;Py0ENCYa~Mh}63iw{)#Z1s$3UGyrl!HW!M!4HQ%;N@%fT^Un`jEwvF@$BWmPH~bM@s$Qhx*i zNG*px#YeIhJc3l)-C>b%Ny@_H3&RPRa=YH1Xf+h4r3!AbNlX<4xaOG7Ejv$$hMD+Jm8Sn zQz&|g&i&mquh?^#jn4DIy%~?sGsAmILrWL;*j5=A{3&Uw-|b{aug$q&I@nWRzI)sD zDC51jLApyQSIC2&yT+S_lSkWrJyy#8qOIVG*B*pb?+`(gx5_(}1w!{>@HRn%q%w9n z+*>jC9wetzZZiy;W@{BtvniO42dkg6v%kq^YEcm@8?a%174^R{1M?Kpo1| zB9@n>dO3F`XA7&)C+B6_IZCv*M%}93rQxOz0`EL?81y_yvB+Js)FFvw*7I;)o3@^8 zwmB|jNPgRRNauoo{m7kR|B*Vfkpj~KrHzYSVA~o&m!P1vc3&YCNxs9MUDJr#xZQ8S5pZ2QL#J!}xZ;r;4 zuDg^hc%7_N&OSOC0{gd@WNz=Q<$6ha9qc3xoE*Ln#-F+MCIie84MZ=CUwvR#4jjwL zG`{>guq#OyB(DG>rt-B9SxW_y%gO zbyx+$#sCH}=zHYM{R{b?JLV+|c2GxDl7vio)YOazLHBhOpC1{6u)PKqv2eQ}O?Z7r zxWLPzO+KiS0cj_fNwTUxIzg04p$JMBEuR&6N6DStx1JYu4mzrY0&j{kDcKfo&YMvZ zT{)LYvm<{qOu$eP?ZcYX1AUUmyzR*@$I#WnO1DzOOdGkvozuG#Cb$ewVjwEgzHuNz zM(aLB`jPc=R4_ytp7uRRkZBUBEE|-WNFo7wkY^@6uZ+m-eAVSo@~F92LLpV28Bs4X z?F@tudA=ljM{swPyZF3^;53=1WNnoVvM6ik zuN&%$wJgbfmnaC9@Df4oUi#$7%vAQ^0Hep(lRK;>DN0fJDLO5pZTPXuVAa7YH?MQ) z#I4ccooMeDo|F_*<{ZHV0ScaCTz=0}`=C}d6xK-l0L9_fOz1SfW>pJr-jc4*Q4C&P zN&K3-F|N`!B7lpzM>0F#&_Wyb+1A2MUm)zfcORw#_fDeS^%DIAPVI#FDY478k#D;D z*fUtO+%Av~Ua%bHRvwUFP(@E0WRIkC%4dA=8`2xhyK)@Xu81aaTAPwFyUT60p8VpS z-e_PnU50K?N^^`fqmob_6W?A#(5?5olPc^hBwpmmM3{jiIq9(X#ldT%5LR4HFigEC zu4iCa#w;89ME@18?{t@A5(V} zRC|BD8!f)d8{y*=UrdKv5WsHZAE-HLyqiuxExbj~9awyK55-0&rYfv0HH@I16xrah z=q*Fx*_7Ao#Ek1)k_SD@ao*Re9fwuF8mX=Fl$(J;=3EbX@9r^@$Y#BK)|<{jXWv~m zvW^;AXX?REmEDk3mD4<$8rsl%x5I3b0w&-e`PO3BU~OVm@lLxyrBIFKV4TRwg%p!T zyydg3bwBR<{?|yz1R2RlDtjDBjpAm=`j|JHWol&n_w>c;hV*&D0FAaS>iPn09^pf$ zsyAPctY0lB+?c+#1Qn(sS3*i3tq%q6sXIn_dZ;fjuo!19U97&rMnj!=(5#__86|w` zfc#E)A<+AyWZvzAKI=-&i^7E)k6PB8=mRVeYgDyW=`$P7cuj-r(vB%+_+O_+n+td8 z1N6jz=M9V5SFSt@sMXGY(co4$_<4%7(iT)C85hBe&Fk82_E@lC?BAiDuEEY3ferJq zcmTkKe}($fAn&2s@T1exSoxWgJ|hUfTtHR{)9I~(iE)$(dJR;eYUxnJR~t%M$9+2JiX|w#qP^o&-o0K_iLDy_GMO3vE#Ew2vaZrfggHWpGN|1~}3W_Z(9!sc3 zFukZiOvO0(QD4&u*5ro1&L4?f(5g~g6s2D4H0|dGzR^V3oPU^+j5}DW>oY00*2-f{ zsJcys=~cq#`JSBId?}qd&7)tbaj^m28`8sE_vPGr4N)rYRXo9T$cAYxj~EW#x+7Ci zZ}&pX8ob#uybdU2lJ@9B&mei)uKQ3vhTt>i3w^S_zga zC_TPW%a=LnBFw;Va6I-(YBj=?)9Sd1AxHYRyFjfFIerH_)t)+l3ARAHkb+Gy<2gfASk^K;?Tru)|tI3O4H|#}=mM z_GSnN`}=5qGkXW*pN-f5>QSufJfe-%`~;}lXH^fu&j%_rfFTsd!Ox`jIR))PI;wLT z$}A8nZ^Jg;>erI!3Sa#?gZ7eIez}4&(w+gw(uDPrC7$cV>!Rl~Ps|u~*qmimW8e`h zrYE8k_g1PrJm5jvw$kI<7u=Jj!2|O& zk|JCwrLkj^?}F7HBEDVut&u*4N~;TDXMWVk%-Hc~75ry)f-MhC99*y2nIm0(RwqNz z#sFM=NIb4!r1xVVsmN+dp4Lup?^>DiN04J6COi}b3h~(&B2z<}VzGY12_nKvkSAD9 z&|p0z**`g9a{vB+l=z{uKaS*RS^KvFpnxsBuPmTx2C-=b$fqdN>dKcg6P$S`%R4z{ z85QP1MN#YHY86rKlTK4%i|#H53fHEY-Lyk5bP*T@?-o$uyHV1w?ZvqiG#PkO;0x{( z7;NhVvqjg8Zq4S~g7B1Hswh6$)(Q@Holv=!)5dHSXPy=d$1salJ>ZvrtLI1BQos9A z*Dzrz413Z&LfJE!n2v=zsGoqJ$+!Q){-uZ?-AMH#cCkfPv zJW3pMz4U|@()%>-0#t81`F^FXUCtB5h{#bk=#7>1vN|IE%YJciFJa@z z?_a3>wO@Z-fAe|`apPBjU;Fxh3I4d&VoC9jF8^7dy-q*G!1^FM`88%t+J&o)9%MSZcl{?+mPp@!3p>1Y4;Ea2I`!YQCX z=qKRWF2h-Xv-#R7fGxI9@#_B*G%D59sqEs1_1t* zH=Gqe>%&h0AlNMQ*JYgb=x4Ehb+eVT(_JHO;A2e>003b>OjzqxWB76R Ef8hk^1^@s6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xlsm b/pandas/tests/io/data/excel/test_converters.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..eaf0b1d0219c56b646556c52b931bf78c74fc8c0 GIT binary patch literal 8467 zcmeHMg68!<0VPF{9*Gf7u$kMgOF+uFVk4yr<#wR!3%m{E;tsyK;F8p;?Bx+%%cFaVg zB;e8jtwXLr9V=mAh1e4AtmzlJ7u4s}wN*aFQ@w`rLW#T`%Bn&N7QNx|(V3^%{*Q{> zZF$2L0}&5Vn4rZ`{@W|VfY^b#9V!}n_f)nvqZ|DGy`51%gDI@W2db*-Yh(<{^N|KU z@Km=^R$rz$1A_SzU+mmrOuCXLl-K)q!Ud)MsWJJB+C?~zKEHXeV`WnWV1S!-EsHr+gXSe>ypzKp>~;DXZL0uj~Kp z-e>%#*g_`C`7Tx}0$qw=RHc9p|I0!6yhQ9~KmE5a0+rE(#Nv$2o>gJ#7cTBV4i?um zMVCrMFUVu^Wb#|4l8P6bM@KwIMN8QW)qzDu#ZLzcbp)f_4=6B+imAfxNM;5azEd|? zFh8qCnUvGnsS2xoC7i#RJenOaomR4eD;*)KwmqIn(hoC#GF$E2??8WYPNAn`BWhb~ zp64pY;ALXz@a9M%yNl?`mrEn3UzkksZ!L&+mV{NXgOm_~Dc)1l1|bYj5Pe$EhIR z7gVB?Zg5!?A(}Ul!=CtUr;(!(R1EW-Dw-gr+eC!hn_Cq-*mzkmqE@M-e$G5%k*{ZL z%;e#PB@7D3t>a+=b{}}$`RrgHsRxmm8ERa4b`hepT@}>$>J?oUyY=c#t33Q0`tu+! z+OZ>Y%zF~JUVyRn33~8Z_uX$8hp#WK)xAF0cNbpZtWs5|(!3Ou^bK>P3-4E=4PZ}# ztrldq>+K=jh+&Dm8(~8M=jeJEjXA|XuZFT=CK?SiHE4}7qIny2`86zvciHE_L1?uv z3LeK?Mb*)X@km%!1EVviym}bh{DZ|s4`9yO02SOf>|4fAi3C-QOo@$B#yZj-o@0`aVA~Xi zcy>4O%CiZPc#SzpS$Ih`J*qJ(M%WhQ?swk2RrkKDQ{WXrTO36PZj+hbLBv0!y*LdX z&lstxi6*x?fw1pNCVo7WeXm4}K#M0`v${{b!6I~`p|qc-)B+P?2$!C{G3iv?ZvXH? zYz3tge-IV#Zb6mta(Xn0(_6`SYVy~4XV@a#e90OiY89mYoC;H%E3X}gT=O8&t^fE9dVDNlrK@0tRejn+u=czH&d>w2~ zJkotq!E+vU1;Xcq7Ky_6wrXI*w8A^v+r=*I`+WyY9#mm9w)b5>ce+877E;bq<$9bf zrG#J?>dstRmu^MT%UZRcf~KT@363&s%2LP8)fV~$_G3G7 z_y|`)`ja0pGOmMLv@O-@9Xkm?sR-Q2v&RR3N=s~#fl zik#N8GOnrU)N*qwONEJ~o$4(bhsek`#}dijHcHkid;ByvuFgq8pZ3+fSGFtBp9HKt z(9B%UqRlu``5rw*GSB^Lfg1tEJiRf6qrMgX<$R-qpbwML0f0D)AH~3LWN@>waXi8z?PSOi$9GK^&y|n}60Nuyo34EJ{eyeDutjQ|AJ}Z%Ypq-aqE6L)8 zR#uuZR>z6f96I4T^Ob$Gx~)!s*GJZo8OEwrXcm-?%bRw;OizuBDhb@`bEdZi4|NTP^;Dw8z1zlDB0FLBj&e z1@PncD(L8F9R*u^Fw!h5l`Q9{(Y>J+0&{0U|9@f0pxdtU$iDg)5jk_ zBZ38e3&%ISa8YYyK+FROincGn6K7F4iRIh%IFg0K4%xyqN{ChhZpC(zyxKzsp;g0} z-C8#Q0JBIrp%)ZQo|g}@bctXSRgP>>Edent;4sI?!;1CEbvO4-t|i=yC4!oH#=TXI z--Of%-jl+kEw*Ht+YL9JoTP(1_q#cqC0bi;c+ZNTmnU(yv4ZGH21;8u$Ha-VWo}ZN z_}%I8Toc@VPiUfAIYJdY%#cb#ehJ zwGv188;-(!6KZER}TJsRK_53 z#FES`_^W#qVPJ_!_uItL8Txl=v-2#y`&%^R#W4+$&QPM#=!i*Xc~?^=?J?W%UV`+{ zSh|q8{gDN((xEcPDM?>T21KaLgq(jwIpW=r4Y!s9zo-K5Bz0%Bf+b^Xm$H&r_2_K{ z?8wtqx&9~ z^`1#oXHGM+?M7pw;ia45ggAR$r=fUhkuTA%G~1G-fM&JD(9`VB5^Q&$!w&^$WD4i>%Q{x=z{1f>e2Z*kEw)j* zw5(QN7#Rw#Fh)!xP4pHuAW(uK&1kk znzIUz8OI15YJsT7GlQ?^rZpy^UiW0aU9mDXcB>J(V@JNL2NmplGEEGB2)#S>wt#&E zBHtAAWspS1k%Z)-v}o<7U3+JGF4~(CtossaE`xZt)5cP}MW>aWTv;ah0e6UoaM&2; zlO73cXnx>UZF&&VC@ZHboE(xC2BKJ&5I-qQK8kU#z$lm1Y%@O>A{L++5eC__Gn6(4 zvb1S`>=lK!%g%>?ssm*;&0}s~BKY!Tcb!{z@48{Vf5Mn(4Y}N68{%9o2!EO)`Y2MP z1f3To;?5QcugULRN7#qb*2Wc z)YLnj1`g#p;2{>`-@X5Ul3OZFP?WJ)rUHz^3`fnk`#QoKNHmg~KyvQ+l4rHPf|mUy z)!-Z`_byuvTU*8xXXXO1*d}xpb%*;ozlhs>C{V}1^u~QsvRzko)V9E2LQP)Ja}g01>u z_DMQQj?&oO>w~*ZHGa<`)0Lp4ik#g1CxOM45f%76lXGA3dGJuiT1h9+tGp{HQw&c& zcZxPQcKH!laP--30o-T)MiOy@|~ zr3oaCOKc)n=i-X!;uz^r?UWM1q?I3E-cJ=p5i!vt0k6Z-bOJU} zwbT0bSKbPgX;lemRRv?WW^NJYEQ1ueH&pd*#R@++wED0s_1Q#pYnwIh@{9E>q}xC3 z?8@3en_cuXk$d?tP0OnfzBM$cf}S2jK*n6;iP^ot5(hjFpUXz*?w^d6e~5I5!% z@JM9?L(oAktbGZ047Y4+UrYsx-^GXqiaCUGGUkYrRKrWt51D;G+xQ3QU{8q0^dpwd=Y(xL+9p+n!#)>mqIOX zt`QfT)dmD$su2pj^qk$~9i2xse(W?jR9Cw=w|3}HZ3XbSzwqoIexf^8wBS2nS^hLL zw9I1|qjXdu5)b^m+_g-D?oZYh5W-A5ibIsb-#Tqq^YJ4#L-#)Ey)z%?%VlZK?|8o| zZ?qxnZC6M(_#xRq_B$JXls7OND=Rk`_`f!pZN=MM8 zV#ln)rlH+4#w8{ymQ;EC+w>kleX}Jt<-!z&|FzcTENDU=*lw`C1H!tH2Creh8pexkAq%3&C4rvIk)?h~Tbl%VfoA&9?s zI^jE%X5=k3lft4ybTgM!6BoSsx}3`qj;>^caU4hW-2^e?xh;ls$EhO!QFvBgGsjGd zShluP@D1+uqF|;c1}B0R$eHheanszRNwF2_qlhR?uQvC@*${_eyI zrHf0|q@I=rb2e*s>yD0t52yz_1J8$M6L7!Zn+Thxz|?P1b@B;3%%yjY?e2d!zof-EWRjtB@{9mQFT5ai$N)whxh;gE}lOXbV}6oz3`+UY+E9DmjvW zEb5aGCCnfnxW))Xb0j0R3KQBY_KBt{jx;T@L$h4Rz#2K)sz_$1iT!!^nf0hj_g&B%@#lXDu@$^Xxx;ohy+NsPd>d5sFOE-mP zaRUQDAsI)RtW78?t6U_HtqC_$DCCkwo-%T;j%)!Rn-pzpY=kc$#o(q_X)zH1r z7VGPBzbXrn+*<^nR+&p=iC6U@%uVAA#K<}G#GS^j%sUjk)`5wXXV8_J9yZKnrYPwP z63AB{BKa3mTo=;39#=EC)?04~o(!mq(C48DrAI}$J~5UCBA~_&d=>*@ZI=(ve_gIO zq*+RWEB26w3dAMP) zXSc-Zx`A=}s!}9SanTeen1j=|-9mmIpq_2YM=Lq?o*uK>oa^EoKCXtF;W zgd6@a;VSpb>pOiAy5WpeGJ3=P9OniHXvrLI@BJH`gGez27UpDA8jV%$_R1Zfjy$yT z7GmpQawaZrko^$ehEjWI{|%inB$72O)AQY|Rpk}`*ZVX?(EMVOQ$ohZLW=1(*1L~T8#r9rtS?MM?_~Pt>IVQAeZ8I z@fGD37g9_6b^H0x1^aXSMKfAU^`8#@*%$lM@W)t&WX0dQWIr4JvsdtU!x5wc_WySd ze)jXTfc!U4nE#TKe>VPEVEoIN8v8fnf60wMd-z$h`^y6^&MyVLpB?;MFa70UjPUor z@Sp1GXVagP%fCz)N&YkSf2Eo~d--R){mTacKu8V%{44tYZ2r$p@bBiYDgS2v$Cjw2 VdKG?|nOSUC%Smbv@_IoZq?6xqrVKYyco*CIJG0Bp+ag+9W58hVb3i z+Y#m?B}u%NCHLq9WEi9I_hJ)+{cBmSI#i)zu$iEHVdaTt~?z*?X(q}r5mdQFR>JlPdH!?&d;XbS>!^wnRN+tL-Yk1pq(XOTVb2$l08T( zxugfJjnU?}RHy6;x!*-Qj5-A0cek+x96QW!#}sdmIgjl>f<*7!e^ODrs1xn~{njZR zmEI|{yhMo|5gjSH2BzyhS4~WFIE~)Jr2O}SjesHpR&OR^yq>+iSrwn&Zs|FcRKyfJ4FF za8Dnp+n%110q(G8Sar7+X-0|(R1-SpRqf4=bVi4$v@fDqdXFDY%s;YTn)8C@CJy#X zB;+&Hc<^hhV{4=KMN_mg9C%f4TbUbSCbzi!mp)n@TYG?#z^W3$_#6Q?kc05buG=Ol zKPO0=)q3G0l?+VOaDfc+LaaklI3EeI=b29W=_OD7yy2;Ya)AO=B8t4(1?#vAz?8pL zpAI!%?)Pun9JAHhCc~?D7AvumO>e5MMl%ZK3(iirjdAlg95`1OFO-xGl%l*gWbeX; zbKPSzqxk9$$)c)@GaO4hhOX6H`vfg|1sx{so+1KNY2E{xCZz97`Zs`>P67mldpi;l zib}S1iaaI_ev~r{dCvMzx%UECJ;&`t7pF zxHiu@W?)5!Rf;&;Axk-viVJ5;*gg%&Jqe8P60aw2f_ z=@Lh$x94vAxdCg_{Uake@FYU6vr(Y|(p6@)gSXTd;W0Wh4Iih3zZh%6OeeOg@Wpp? z6q;eA!o2FkBe@&Rb=q?qxl6NoMfZPI1JD$_b)8U&)W0rQPC_*TU{e3IvOnD0%^nVS zBUa>}woWo6EL_?L_(K+*oi;s6XuL8i{&M-@E@`{*6t-0{d7$PvM3T#j+Hnb+g8jD9 z2-;eZf3y(OdWDvvB#$hrxeWnCg&(dgPjW!I^2R2d&m${%PnR~F84P37ld`PZP_~e? zM<=S;6kzm?ioap%BbSY(KCSEZWs!EVn3P!osy)QP?|sn670_>V^tSoI+0q~BsJgs) zlH_`CAnhl_rDQ2L-S=kHXEgcG4hnRZM`qtp^rz~#b>hJX(ZH!j%D^E@2Z9^xbs5@a zybuZ`(A@B2lMTYqwWPF@&dpkOq_j-+B6oFKz>O8op6f&&{{q&;zyH!AngVh=Gb9wXtG7Xm@P+p9;2 z;foPWkK|iF-{*NP;t5nTx@xF)eoeZ>==y-Zrv1ap8LDChUTcG|tcydPH)ZH$`CT!H zaX*II)~j|w&-K)+dF2XwIFO+#%6iQ%0etG-2uNg==6Y$-XRog8j1B50SDW6^(;iPe z>+bJ|R=9_)(h=Kpvi8}%8bVSUe{E0pf6^u{1s`WeM_(VQUpEL`dYd`g)yq{)_P z4<$&7Mu^+hM0p3_@GnkSDf5L3A!1BBx`*gt`-^axFrT6FvU)gUWyf^JUgMiJrQ^-2 ziRL7+t^)I%0Os7V*qf(?=xH_&1ATnrPPtc{OE$O z8`}9^`m2i(%`4r?f>RLLVrrOO34N|4OIxeESb%auo`t!cP$JT^=;%6ahU0}ow+sk5 zOFN};DikoYnDv-bVotIpg<7_q&eZmNc%|HDg(!u{9dS${_1D`?mok4wqGuXfDeLoR zgMT2N-q|C2;3V)W+HY_sOZubx9yN0p>%7n1y5|-QbE|Cm!lN9N!a^|@)kR&LUAZwD z4mm!7!z9(Q(8KQC_x9hDsHh?NCBwT#7cWoRhFzLB^`C^7IJkkA60iI~%FDTf!W#0q zlTE&i)|Jsl3%>tmFBQlYhNU&b4W8z{{^$@0W(5rzc>+ZZGVQfh>UND2Eyp#_9l>HF zltklfVpS^GbdB#Eb-ClMBzlm$rjUg4pnjfmRI1c~!?aFqIepX&s&!jrz%+*>atm}% zfx*jvrHA9%H!c@|e}`d@;z~s`$IA?ou_o{*G3K-!C*A3qEP(fNhy*3~li^Bh zxokPD`FE4#M=rgO&ale{4=r^C)yP|Xh)>5!B9qrTEcX4#_qiq4IJp|K!~>~QX%ZpB zAJI>Ix)MPMaYr)8V zEc9Rus=k?C&BHh0(Tpso4#9~USVtg2o1yYXLAxlEgYnw~V^CbPCMrJE+w>rXwEosK zB*j)M^D7q3ll;Rp=^7=-LkI<-x0&j7?{QH$JF#pNz2%GT{O&ED;&$3I+xpVZn{h+D z{2SD|T{3C{-Gf0p@$r$Hj)fEzW_YQzUW0M`I7TQ~AkA@i(8ANbD zcW1mgcjw46WNM!kd#)=#)1YmvKsvqr(h|5o(DKq7jx>5USH2CFQb1%Yjw;-Fldrhu zvRrswU+$;@VDtU$mFg%z(A2J?{xAczEY-ry)E5h7)AJWUoi|n6);Sy*NFw1d86J#2 zvNBz=5&W(_fc!aBTC{8ucZv&9YOS4<-Rfi1#tLqk1k+xEmKW+5$D?VlUMl>NGLuzO z6Z)onaLhA{(iuPiMKgezwx$Ntko1%rekw*o}k^aI|1`H)jzzLyA5~ znba{kab9FD`3^1sNJ!QQT<^d09lHM=lfHp4$6xbtIlb3tM4I^leup_t3ZayoR{H@I zK`qcHc%PBk9}90(eNyam!29FNCr;FQ0c2&*{of?9r3cYB1vP75)v1#>y;;Oo+vM8)<)BDDRt_d z$5-ezR|5}D)qD3vQ8K|ZZj~WHW>=nI`f9p56UHLigZrxsN+M$;UTb-r)4$+(J}Zsy ztc$*0r*{KW!1=f6B(b_6QA(Z?z~y%Gz^~47;i69F^MML*3T=LYp}D9_lZExO)E2SZ zo9DnXefB0-(NnT-8inO9501u17rZO`bW>iDWTy9Qf?Ep5!!0bNpz4~U*v#ui`N3MF z$30zIH8Y691;FNzt zoUTUVx24BbJCL>N?NTX4-6J7K*q>HveLIf(bB_DL``?#M-EmU2Y>&H&Z~2!Y*}F>N zr>w-Jvm@^YxM;GQU-jP~JknJTX%(et}Z00 z&ff_Pf`RpK?ZNPSJN*~4dUV4OJ|azv!kYo>m1nksqgKyI6s4Q=Gv4D*^gd+1O zP$N5a)@2R=Nj@(3mesPOsHcNW%ED2k=!MFQ+4@Jhz}CmO2Px1g%v9ph6f~U!uT|X? zDeTFhN->vO@K)0gn(MH2oEYr|MuLk(_ts%3s>tMqyl?(He74zTxoKdEz4qT;QF~NwvP941--u&2(V36Q=wg0&RT|>z}aR{laATBKDw?vv55S{?_M6 zxE0$hWTgh-HcJZ+fX`-l<=uY&bPi4>@Xa>*(QYdo$3}(Z}W&LrQHnXpv@a z$M1{lzy9)K;~f7og7M8*poz8jZK?c zQo%(Vi5J?UvYx|Sivu+Q78>8{>@_lPesU-2d^=z)@sz#gS!w~-JPV6P*z~i<9k;ZP z+G=R^>s;y9>o33ywk0Ft0#|vW0#H$64PtIe7=y< zkB68?a!t;A38|@2It4{63mb4x2@`<2#0#qQN?I^N@AY@O+97wIpC_k%C2zO>AuX&I z?FCzAGK$9EF=j^&o`+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_datetime_mi.ods b/pandas/tests/io/data/excel/test_datetime_mi.ods new file mode 100644 index 0000000000000000000000000000000000000000..c37c35060c6508a0958b0ae21bd0181bd63017e5 GIT binary patch literal 3585 zcmZ{n2Q-|`8pl@`YY{|?&Jtqvo<#H(HHg(MHtK4-dJBSRQIhCHi`9Dz)+!M`Eu^fl zdW&9yh={m+_ui9pbMKw^%z59LIsfN9bLN@f%-`T15iteepM?t$4Q@*)fux%~ukow`|$j`_F4NUO*!ayuuS7wAedN=;qYljPC z$>G9qhpw!2o;gdm$Z1w#_8Oj8|R5anu^7~I+2^Vi)11)s(Z!@F6Iq`cI)c7379{!Y)TXV%E zBF^0`-ZqWmT6e89JFvLP&&uhiJFi-CC;3pzI9W!YNU5lY$GxmfEBee+E;Gei^>e%Y zr`fT|CgMT<{+g8xCrZ}>Tqm}88y=|T(Wnm_n22W&r`itr9YqhgN(j}yt@%-uM_@8h zK&T_~oPW^O{t4}rxJF8hCiN<^k%C;9B){d2Tr4nEqRG6i(n73@BN=fur(hS{M=8Dd zS{tLJJ1I8stR*+BE5V(Y&5N3aJo&o={y@dt+PxuMXh!n>@j|ml@;X}JxvXg1&z7vO z8Q!{r+)EJ;jBi4N9iFmhv}YhFGx(Wrt*+Yp>4hgP&}2W1ek3VKH>DwsZ`PiQ+?vR* zfKlPYc0+lg)_YVwKH$f4lR{*O)0F4_{f%6AB#vm?Pt29$kv+qERd46z-Ohr?#*LWwwu`Fc36fKM8KZ;e#AJTS`{zq zsCRq&>^Am@Wqdxz1-dke`jMM10O_OqxWB=Q67AC z@4P`)K7@Xw_<6Y@UoJ$;E!B|Ep7W{>T(mGM@Ij@b zl&%ML$@C8Z41FC&EL<=VC!m6(-NChb(e&rIY+tfc<620Gt%geUPVhK|bc!My>~QBz zEmkqrQ`*vubylIfFYg6c-HDEORbg;EIGE{ic-e+8>Yk{>;uL$=2K*Hb%}ED(Yz%S* zk`dOCJr{%Rt8G8ekL2MAht5nTg4Y|ayDf)hv=U8nZo;BNIRc$P+s$Cg`LoY@3VX4y zBhBAri#Xa}0fwZQatPc;6zMx~oKdQ}f40qm%{94}?Y~%I-W;8?n=yo65A?0}MLfCL zL$I;O^t?Hmij#;w;gHVSv~$+e=+~USZ*a@Yg%`(Jd3QV}_WY@~##5UaS&FF*tYjdS zK^F6K;1G2`*A8=*9CYIO;|Y?a$~SuN+q+AM&(dg)q0p*{P57eDEm*Q5Q){%c6ZXY| zDjBUlPjpu>0f`C7QiWMi%&6F2zh!X(PLXu-;iU)``_uP__m99In=45l(v|dt7xo25 zs0q+A86J?xJvQ|0tNN^{srLAXO#2;D1*+J)J9_>NQl02KOsL~u%(jGGJs#VhXXhXQ z;QX8j@Sm{D1j(MGE@1cODgXeyfStXEJL-~-m$ZZ8FK9;-A~F^x+W0O(Sg*d$HpsAX z&HiOGhhhqk1yq|}bFlvOh%L2HXs+3%?s}tQWLK=L%2&_P%!PqX{eE*Hs(chUTlcvF zc$|=hFW}Rb^!V<}FtEqC^7ScCK?kDjHDdW6jc5BYO+rI)N~7RS7bi+sz1VlZ@J%Us znMRtmnKHg~V^et#Cy?y0_oOl%-wNHA>FsT@pHoOQwI<&Ih;?Lo66e>}B8@&7$UQ(w zn>M~|Z#e1`)kmj$?<^NF6hCJ=YI{}EW2>Jo7gv5#AN%cXV*u-M$oWT+m&H?k>n=@k zxl$}o7TpeAZ-H|d9mU9ND>r0Aw3p`bC?5hBVP!oKCN@bSzjC#V(29dqKMgsN42}lJ zGeoNL$-gT^BzwjVg28Y_SZ>(DZC!($O@Fy|r+@541!ysm2a!5Dm({Ot zvOgJluX_l5-blyi59Jow!h*;B$Aa4Jy0HD#L#E5aBEbrz&etB$!v)9U@2^G~v(k&z z&!~bJDxJiknvp4Z)rU4xK!{o&BM0sA1?~*&w!gO}mHVEO>9KQAfUXoqa4y`cta!rz zMpX%j5=zf5EmrQ8%hM#A5t!Pkp?WxE3{B*M<&PqB5VSE|-yh)e#`3Eb@72->jnFUL zX~ci!o|oP4V!8#}*q}Dn&T1~wdAm@%5wg@;EE97SxF-fFp64=lSMwluRS-|AkSEJl zQy|}wD1Cc}Qfx0AR?8}LTEZ-DQ*qq1I({y~gMT*t>9gi{tg`x7!-5xu71^8hE2K4F z+YJ=4$%NWvqBU-b)IAQfn3~o}Jk>TNC}6}IiZ}?%8~nuMul>#Rv!?^a@X#35PRvgo zn^ERDXU=*llf)?Z4SQo5`dPfn8EA~UIwg!S@I)^kgb{h%1y5wVYON2M={CsPl*Sa= z-XEBeN&hqK>X>$~lyh0vb!f=dtmr06tcx0Kp9Rq!01omjym_ZB-xyu(}9 zYTV|0V{s)Dxe$b%O+oLd`<8Fn+M^7k_$^!N7y-FjcaDK>a~Z=lhbvm*sZ|R6T05ac zozARGpQ9%+H=FQo*Z9rc9IKaLoRTuc6Y3b3#;bK6 zQMi&>$eZ|VrQP6Ap|w*=O(mM!k#RlqHWm}iN{7krD%86V7KE zPIoIJVvDzRD%lQnPl@&zIcGJq#CGddf{IPHX%q8wn6V3Ra615Yy6UE%I}KQ3_6{z zViLKS=znwC?DZUX2LS-!Mf9(WE*u5Bgmi+DZp{Z#nhu_Q4(>U+{&ljC+Fz;2d3k9x z5549eZXxoz>v-@1i1Ry7b5#-g(~C2iGn13LXQ*|PZxlGKqU)-0wDA1bSMU3Q0u@3B z=Zb~{N=S^}xC%_>@2H;*#;~sS7T$D9=dZ zk@by323`A2`b_OJk)WPgZxWBO_+z3zf2JzIgzegSCVCw~Ha?PWCtQHx{d22@F9)w> zJ-EYJsmNFxZe)Mys4G?87C-xlsVa)5l7y}%BCy)-i4y$h5c9`>b>3)@`o7jge*GevMlb((vhpg; zTKokPXt?iqniayFn?5srepR##+O^4RmL(G8Yg^Pxl0q3(A1Dspp7Yi-Zx{fqtQkdrHg|YbO z3KI}Q{e&M(-EZpC@ZE%l-^m2sc;^Dl={UOTB);|Eh14Ur~ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_datetime_mi.xls b/pandas/tests/io/data/excel/test_datetime_mi.xls new file mode 100644 index 0000000000000000000000000000000000000000..aeade05855919877556f4adc171aa1c4b5c22c8b GIT binary patch literal 24576 zcmeHP3tUZE+h6B&s?$wZ>4Hud>0Y{vbVI0=gfT8joi0joR5CO*$&hPuS2M;XBG(Y5 z+%gd%DJBMmj3x{QjhW$m&)U0F`|NY6_nY_q{l4$_o_&66@4cSA*8h3dvz~Q%)?WM6 z-PCN}dCZ^%(T)*F4t?e;Aq5JZ0rz%-xCVmfd_Il}uC{OwB-!!*NCQf2s7M8Q^kwjM z#S4gu6xay0!1X8##}Il9VLF1E1GgRLCgAvc12?-<@hx;4bvggEzsGXtF>^4udR zLy!Tw0_ph~x!Ba1j1%$ zoyicxg(f8umeLBvg7mIkQmWCUmdaLtWjFcVXZ4k z#2CoxkK};|GBGnTwJ|ky2)7(I-9iwVZaI);4E=w9q36>=xluEO6PX}IG@v_?or5qA z#4*%h5Y>d9iiXyi)D2l7M})$$C{`SSWS!CJ76f6s<*-hIiLD999d6+r4qWVzCHx>O zHJ=rSn#qbIl*0?zAP-??WC?`%5u#C8dUM<(jSw6^PiTdjNq#)QeS5!&`(OBpOQd7 zE`d&#SClU)kE)0~hX@>)XpwfcQB!k^$`%#GBI`w~iyj?)&gh&GVv+JFK(|8J;UAEI zI7s_cfF4df-9`f4N-Q1sGz8AOPWaCDr6Qnr**|HAFf%06H=q!>H4?N(C1EYRKomQRn30je7kF^E+A#o?rbMB^BjQjNLrZ7Vg&GQJsX}>)7wYZXw<1D$d3lKl zMWqRZ!n97AVCqB)Po;?p1@l3QP=BUNP`5ats9O1FnxeNxR}hM9jad64LJ64hGZ|f) z5-pikSIKbu$A?&xE-WnU0uk5%RkZ?UR9f%VO>dZrgl!<5okp`{?4-*h zLHpzLYr8TCge@s4;iJw{2!thQoe~R#rP2hp(M1C;qP?iSkg&MAg*2GXo!QU~fPiouq>d5(0sU zc%9%0on)Qh>02*!A{|7(Hz2iv(m`NG*|Q{$PSQclg+L%8UMF}?DOo2vLLd_R6X_r@ zVeh#(wSm$>IK2UJbdnA-SO^3n;&p-@8p%4r)Bj%BiF6Rd-hk8wN(V9S4Tz%?6=VZb z3u(acg&Z9Qacnt0k&)cQ*lg`Fbl#PiLHl(XTu#x9R|NZoics5#eHcp~! z%%!j)oe_eK0yp~Q%a`KWXw%s^i?Xqj!Unr5ok~;WM)Tj+if5xkXX7Hu#!d=ksf zVRNG&w0DSS)0fW1Rg{gR6gK4Sl~9@zH~R5!_r$Z&#cZ6VTbdEDaqfks)drrZ{zW{S zesnfsEX`C38>*$%2L5sFka#wFbT(ovZLkzJR7U7jHTI2VMDdF+Q7=Yuf(%4q_YuYX--nuP%W)Cu)6t#csBhp8&~O;#sN02 zy|6S#r?vYf)Xe}o8!?tOyGiVkW_1EpFKLNC*YhzB+6 z1_k?}gASsBQf&qi&NeZs()CH)Vv!MY=aJJO9Q3a147gC z8^wbT?gj-*po0#ffl_N9p?Qb=#euSs1&pr9HzF#JJyf{n8HRj0AXAsGg2?>^kthZBA4I^^D-I>W z6bvtC;jtHk;#DgF1H6hvFi=3S1`H4JdIoGD5Q?#dVoE}>4NP@-^$P3uezDxhc#a=8 zAt8^QL6fT=Um4_zg~?zfiifG8AKWFtk17>js}V6DfmC?WMxje8GA59Uj0vP7V;f*? z);d#;pU371iy|)vO#DD4bKw^O0w9Fb+GHv~S_@iQ1=!5gotA^9!ALNpE01j^3-XxL z^04@9$ddw_J9xz0Hxe!C%3+tO2s!L%ITZP-A~}S4^+onD14pCM<|RZVMkFWk7IDJ3 zX~}tPe_<%53tt0`LTRw~2G@NeQU6IG$Rf~r7>JVw_JCK~1htPVyVJasplq-_63tZc!nXhmmBT(mDh=Z86}uKO}2Rk*byrOl9N(6^X_s&PxGP zND;0wrVf;6ENFHf*dng zL6iusp9@t^?E&Pvtg)*v67)WJC0hX$r;6M_el(t&lFW;M?uFJdGx|fvJRX2?NqJyX zxMLwb8lpdicOruF7Ql;>PJ;)N%0$o{O@w8IZBz;XhFcBpATSi}B9xFjZXV;4lDT>8 z*}_?v%FsM;mSm6tuS*L$TD&eT=)v%sa(BN<#sobW858tiWNZUd1r0^8h83KWOjPhf z8q>Z29ZVEiw2G-cJnj&Msu&ZxB_U%1NywN$5;BJSM~_T-P?edm2=elv_Yf#EuglLT zm8Fb^Vap7QOHKf74H3>Irw+9d2F8#K%EkH;HEsqr?%@PPQZlY^+ERm@J^_IhPSy~M zupy!41gv*Kw?URx5W^Owi9BI2L{lPoiMWR%ds8oAL76JhF;4*wJg{3qCnc^I#bY+1 z;kE29&|1oxNG_C46MkU;GMgyvL=brqaTf$-$9-Q4n3*GJF%G2?7Sx@By5-Il&0^qJ zBg>#v*?8UomSO7T?B?vgovnwqQ+P7)LpbntH*D10O?igttHF#J@VmfZLqN#oS0?%! z_E`xIKGX*rMY44v2xtiDcHT14X7&`-&6Fo!9HwAFT8L5VkeP+QjJwU>7-Ie%rF znQqNbCHp4*)e%@9=3B(kO|WTl>iB-x6JDi};>oJ51^dPwdFyL!)^6xOqwtYiaMPj5 zIo}xQ4UZ^P-}K}bRlk0NJrRX8&t;%Yxs-vno;WuMA`nJ!`4f{Ul#r-=|6Dl68 zEm^z7b!P4O3w)hf@4cRD->%J=mH`^!!<am~Kwb+(a&vO}BHOuzw8^oTLHt*Zri|?NduL!xdd-(MshJ|BF z9X4sHuV@Oc2tDi@b*o0Be%z@0&a=%j_N|{Ee%EO0_xjDQfw$l5hTe6Xb$DG-4gYwR zL+$s$7c!36w2opWx@`=Mdpdh(k^Pg2mAOUS0}TV~PM-dF=Mxk~w4m{eHtQ~h0V?y^HCA8_uRv~D#1&E~<(GP60JtNf=tzujT^cvQQ!xiZsTyzt}e=ia&r z-SJT)vNce|CW|lxi~VA1K$4-G#@T+)Gro9Hc`T}3Q-8f&d*8vw5==uwpN<*ZU}yV^ z=M$QmY@IjJUS4_6D0}&@+ILzQ{J=1HS8o)_@yY+ZqEA`9o#li#a--MlmLk7^bf>@F zorXCLTl~wb<14-zyjA7mbj$Mjn&;Kg zJ42dQXyl~ZkJc>m?7yx?V{v+6{iCTCNfS@6E(}reySjLsOR1T2sF zQj9+6AGw|B>-)!N)*tEf;Lx1-;@B3oPt&Xm z7GC=`vc<)sG5pI53q694?pk*D2c7Vl7q%Fl9DVK15g*I5C0R#wQ>PUkC=6ffJMM9f za(T$%1ebpMu5%(scNDz-vaDf$;*CweSgiQgro2HnQukWQG2NY!)x%JT_e!%I&!GXY z_0)`4=C8>t*r6BjJ^wUKM*~eAQNW_S&uU*&SoEmo3Z; zuJpIblJofE3FBPBNUxEX^1O|orxqn%t78tV)ktYE_{p_JyU)~W`3g7B($gP&M{K%u z?Dr~%lFS*^u}kOgV+=e{sjAYkZ+`Q`g5aQ^O|!>OU+gv^Vd!bkAWo&$maEY#Y`6RC z{HP`GUR^)&+%cuMN!KQNGrFhO`=dn9f4|-o+_F)X`>uqK1tYX{y z$ve{2M$g!n*rIKybV`0#pSS6OwoVWICgzxIxjJHHQQYfvGrxW{4_B)PO`kUAb#kWV z@Aph?vICywRxMK7=asGcBz5tY8zzP|gSMSceq~x67_};ByYWh`ex?48>n=U)r_G-5 zbik0dtcBasmlwsqmRotZ{4b>oo6p<~X001J=aNTJ~W?wa$0DG^-yz`KynD+U74@Z;th18yCNbm@>=LJN4j!c0HBY z4PoKOx0`h29Q7?-E?4?(@WRrKY0(yIHNy9OL=}nmD%2+3*fvhRZQg9AI{QS1hW+^6 z{gvw?SRG#;@;5G-v!z`3RoZv+uVwTryWD(scxmkP=CdsQC2{&=UhJ(^oO|29;T}hQ z>xe9cs!fSLmv|X=yM8^OoA1$D{UIp-48y>*_)*T7C5gWuZ}_m0|6S?j`EJvLtDa0w zC_ETY>d_v0^ervVI(W0mCPHV@Q zo;S-ra-jBtg05Zjm}_YTH8*oq7aNb8U;Qht@9yZ>n z7$1DZX>VUf-E4akPDgUk{27afr<6N4*fqzk%GOMDRhp~+pgBilVD>M!_D{C=eNuAv zaI4+P=av(nf8w0r88qHEnmvC~_|l(wi9a0MW*Ph9yE~t*PS*F0UXfSs^=;0OC2O=+ z4A}X7xrSQv$!j5R-+g;-T=dw}xwd8(8=5`Z_B3%!v=*$(D|%J@BnAL(7;m38Ut8;< zf52gdxYkHh#qp=UTYWil_{P2W*YxGKz0-1BvfaAi+?+SXOE;P2XPsHx^1Q*Y&N3sL zbLnd1F308-?6mnIfro~gyf(K9h+6cwZQs3%?zx1IYSSBcS2M`h`G`fdV{nVdrkLb+ zlcMFG%=tRnp(rA6k#T?4f%Dr(pF9vZ*(J13eDSM~zE^pp>uclpZkiOe@Ce)%Z< zFG^Y)S5{n@6EXWrT=cTwi5^(u_tIq4#VdbqT6S;H8LrF4vH>&HC(JuyESD_K>L(u&!)|Fb^rG1 ztt`8R;|l8zXE)jT*J->@4$do7vn~G}Z+jFSztwK#?kI&PhA_J0*P_%Rkesv-6v`;x zIf)5UzUZKn-I2ABos@LDTw}fYgB5j$8sxH{x~RE&nN4`{Qthw}+irQp*==!WwtQEk z9qIURNbCUvwny8h`P(ndJ^f;>cc||8oTgz>M`vZNO>%qLH)ef+MUc5!sD<5-t-L;i z$89L6TN(B-^RWBwb(5c(?B45hVsy7c!d*mD}ruKoU;f@CdD-x1dcGY3-k>< zxd0B2Ijq4Wb3b#Fu(-M01)TBRg`Ci&xez`pNX@ctGc4zX&n5U!!%`{Ct=L`aA|KztVkXY71NK^}8h9dks+EMJy4%Yo&| za%YW(D+lNyXel9XHgPrwah-^x%?p1$Be=JK`!%*dW!e7p+RKu1zZ0;PfCKgj87K?c zFl-otJc28HLIawA^_bju*g)V-iTgM(NnM1{X9>1$QKHF!4TZ?CA^6Y$o1z zhAB34g;zVc&j5|GeId~ zV%|W3uVo-DLaV@-Rtc5E1HM91$LsU>ASFR)8;Fpn62tC?Vkn8hhVc6rxsjv+l3D&M z&!72N1!QGX6@c~EylltYkHN7T#p~I~dhqck2cU7_0p56zpU;DUbEZMSYR`s%TNPd^ z!S%HV0xb2RqY$w2Dg6be(Q}`JIZvXEfkVZygs{%M)Y|w)%#(xT+c%K-?#s~c# z=k5MDRrEPEpQN;SO&Q~AKnRM9)Jof)XAmDeuUqHYg$i+gyFTLkMzCEE;@v&=r?EedeSQ3n9y`+5XUG0K-T;C(gJ2&Y|GN%;myi8?ykn;?1nleM z^#DBx_zzL=AAsOLyufP(_`kxjV{Qb21Hl-A2?SFJW)SdmbGYI?X%-MHAy`4MhF}8$ zKmP;)UuDNX*8twEgulJvz_*6@&pq(>h5yQ-3;#YXkOJEghmFq&ohQy{GtdGGR*h7q^md sEr366L%99)mOuU4*RTAR+?*cmQ-nU2z9{ z7pT39p@xSe)LEb1-OiRW8yS%?6MzW+{=fAv)QGorYB z9y=S7rp2TO6=9^^cjDyBi?#rqI}##~N%>CaTLsb>YMLJO6Czq$a6vX`5?*MsPrAle z?XP>CzrPzXzA?@VB(?!e+j>6dc?==vq^j+ShzFk&QK`hM$gT~;@?LcY%1O-x#UH4Z ztz&erS{~EJ)!^DHm=1R2zv5wB>+Lo(nAo4EeLGXV_ROs!e&F@-f+Zad7IQshlOSJ( zg_BhN#k)I{;$197{wccOZC;einBWq9N4nQS<+wvVFR&D_7u2ri7ITmuV4SyV$XpLC>mdrp%oOG2<-J+25~w znL{gbc522@$X!o%wSP2{(K0_YQP{>S!BX!3Ox53{phss?^I)u{JYA$mVtAA>l7-hP zK;P$igU@D+8hKdji7J$Uh-FLt>qv;|*&|if?wY}PtxgH1;4E=v&&IqTzuB}B{ zM>V|EMW<5GREEYjhbHUt?8-KX#xn4lUCdFwrcMzFe0`ZQX^#+ zh=}hWJQ~|@&AzGV3B@Cp3wVNPxWV^K5An$&w#$4rw68+oC}i<&5bT`_X{kUGES#e7 zIsEAVj1v~uY9}zfpiROb9QYURwj6E_PBtbE4mRv2R`z<)BXVM@px)=E>(sG&avccW%<7>o=&s%lP#1GaOIYrGbjamsQ%B(oeKs(YFoMUNJiX#WacZ)w1?f zb0iF&D3v%^RXwbVWu_CPFDPxV=66;@%*u`w@kqk5wzbYKAt8qepn?ZqekZ#-Lp7Y6 zmVVb48foECvt0Xl3MLa&x$2p*ZEy@^pmFDqMO0)GjI)rRF+Zt8DNrL;^rZfNS^nMD$l3|Cy8! z;H&{|g@5-djp?xN;sifs z;YPKV3wxHWd`dZ<1>NlKSemN-)DGgi-WAJv9&KL8Jtp2Zy-TuP3YaP9r{=8%V=}(o zu<+9Aa0@J$jJKGXz^GXxdW?6v3zec&wSu6ODNa6vUi>;z{vx?bhDWh}3DxHrFO?xu zuJ6zHN1qLkL#7%+Tl7RI2On*qXqc)gXD`bWRSY8;^j4IICh7sxw!*vDOT*?S42g?v z%)g;;wvdZnDCKrFZ?2$0$K?Qru!~ifI9!UYGp%GG!6hm-H)-+n2zh8m=^MT6sjsPt4Ph&i zMf0q6i!5Qu+TQhJlu?5f3C`gd^V*7(MBhv(7~5$lsadNCD?`clOzxw-GmSqCc}nJ$ zT9V~Q4RPFR<)0-v^^o@4H0K<2joH2F;9f3fcZD`AZOlI~O@A|a>2dP$PTP|kMi8~E zl(<2ve)TupQswIK2(#{Pv2oz)l9D93^K|%t*(s5dV}mVNX2?zhLS$< z5{#Ncdm+77vZLh|uTgD*T4R<-+%c+u-m3l9bN{24vL=s%aCnhh&>KqD?Qb9N6D)T1 z#|_C;Z+(km$Wa?S3y|o?R83fleB0ak)@S$C+c^oe2c6(k5g}Z(!c}QCcANnyIMA5Z zzLHL9shMvT9D6W8v`#F&naPvC{v>alu`h(SEiv=n2cg#^{rFyZdE;;ERE2U6l4Y_` z^W&o8t!-s3_x5UwK_+bHab6Z7uHLULdWRXlB^?Wf1q5*L`%2z?U)SXs+@i$Gc8V&c z;u(LQq({#wP`1tYeT6oI8@)4gZ0N)W5Z~Nr>^yZ3;^af}B41s{YM|%Sbm<8U)8@sv zbnq8ZXO)U4?&8mVf}MFZD1a5BMb}1X!XzghVT1-SHSb0kj@Rdf_D>a}Ol?dySp zrq%XjW=QOs3!46&`zP1tb$n*STUmGu*f7eWS*vNi;di}RC|MB!GC??)-x6$>qw+K>htK+WOiks8fGeiDbH$HywGTvR&<@vtN z1scLGM6%1nJ8x6FX~nu@MR2!V3&{uGAubrw>~x+~+I2FJR!0EcElxhH@JD`=yW98v zEPD!(oJ-0pLIBHAc?hcGUM)xXrT_atx1iY8`-g56bP2HF61rUN6}9Q8O35QC$xsPk zs?m1Q{uEPhWnaGu1WE5%&w|-Uq@aNQK*;YDXQjs~4h_-eUPr6;ZWsFFI{8+h zTTA(y^|#@B;HgAqHWH$#L`oP38&c?eoJWNneaI6$d;~7hQKQCjwh)~!91otIyM3(^A;_*3YCLyq!4Zb?xf2a-Rz;i+k3uWnn1CH)rj5x_pf`n~d;; z4hO6eJD>V&{-d7T!|NL#?HUIT!?On${E@o`y}{(GO^N+VT0%-3>a`rB(}fd}$8pjb z-@Cn{yf*^ItMt^pX^)GdUKn!M;9@fK`OJkL(X>O6ql({mAQRem#3UT@zRJKQeo0ds zkdswh5yvW#?@84{hv#UL8=TfekY2uG3XxalFVTEfoTR1YX9hO7bM%xqFP5okj8$ph z=_L8HSVpxyI^*<2R1>PNc%jWy6 zGS-a6@r*Y)&5OaMhk^>ds`9ZWYAP-Q5!%NO$y|s$#varxD-I=`P^-z7uyJr)Bu!I< zik9kov-Hkq%p==d4Rffyy7lIqg~qX^pmbIZzlSfaH78oEb4gFN=5c^bgT%5mg>coY zYEGo}loCUrMuQ33EMHMeUWQT?BoPQH&d)M6wDv#K^$|0mYhQYEMXGs*h|-a%rC_eA z9^p1oIWRq4s4oBkUIOaE@$b;=@<6N`_7W!1H(fa0^9FF7W>taWM~a3;Y-JVihji^+ zzv9RC0h;Cr;>DWp4!iRC2U(VS#d6!V6ME3slp!cDguRNqX}RkP^JUvhWG-{m!PpVa z`+(7WqQ--{`_WjZ4AZlYNJYmCU%TtL$roAso=ckk%Z`k+d63u?|lUr%)XU^ zJE%-8XJ$XqUrKV!p-z4%Q)?G>$lm6ji=mkvX6YkEiX^u7mgLE@n_pJ!fxszEA2?f5 zg}41!zgKK$7Y|#g^AC!Yt0w=0BKdI5_jUwsvj!|dNILFG(=?_VB{2KBBbZ8{VTy(L zH@hQgd-{>Kk9QN?GEQp|72EKs`Rfr;PxSf+luCQJN;;S{$RDg zcR@e7?QcDh?Z^-AkBPc;k-2Z=Vwif&*&CuhWgKm@O)gRs-_4!y5-Rug^u+G-)M4T` z;jHW0K+So^@F?3%n(a>ci5Bf%#0ZPCLGsY_9QL?PyeI-=?Ib}Es6;nB%B+c(p6ZQwF^2t{U0r!-g z?jmjoDp^7Q?2SfiSw=c|pGIJ&IM5^CWNFiF>fyr|En0a*jP|Y$!tr$Ix4oodFx0uaS*$%B8sn z2Q`O|j2d{`*maE=+DT#&WwanB)Ah$eUg$Zk)uQ#9)5emih2dK9>!Q8D$G5f=$;sKK z%@cluUjOttA@Vw9jcVh=wt}>I@p^n1ulaMnhxGQ4lzw^~NXt!}&>|bTcuCdxRVi|G zb+mt0C&_^SHR)CVvkD4INAxHn4s;-?uI|Fjv)V1vVrGpmU4kn9cv{5A%_KWfh`uxr z-P|`5&+Eq{mH;%pu!=+S7H*fML+Nn1k?0QaREfiAn8){BWY zu)TKG#d+T}0ZVk8P234Kvn#-C^6L>pRj7~%Es_o`FqmLBq%+z11KBYKQK)OEmCqhQ z-or1A4!SwZpLP?;Ne_>zTh=NW9I?19mgl1tBX|9-Cu|-5WVM4; z9WS+dh~PU-|3#uhHe{RHY>m)MsLSz|E_2)dL!FMzTp8tQf8Wp`>}q`Pwy-iSLVGog zY36XsZ@3`0OsB5_zYh z`O6K)ZNb|^+@GRj@Vaqpq$1}G9Zu7&R0RHd+5BxYk+2U={+mz*}C^NzB6a13E+y=bO0Dc0p65a;9 z%?EB{{k_uvL;?V!;aLBu`?tmao@@Uqu1NeB@jo)Jiag41gaZh_JDdG6yC(f1@qcM{ BN0|Ts literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_datetime_mi.xlsm b/pandas/tests/io/data/excel/test_datetime_mi.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..55fb88912afb9cd6685123f34c65b612786d329c GIT binary patch literal 8700 zcmeHsg;!kJ@^#|`3GNWwH8=#fU;%;!8YeXFPO#t}LesdrOYla6Lx6-vLU8Q>0fGe& z@b%1l@0*#-{QiRX&Ry%C)$87>b^4y#wd?HCQbR!{2A~5l0RR9!pz=jep&1eYaDoB= z5CJfejpSXNJweW%X1ac^AP*BBUnfVpLR4h7d;l`y{r`>s;xAC0Jf_jlhp%{{{8N65 zJ9fE222}6+Ooh zjA}qxr}J{5rOWB|NiIox@${502FW6zVf6CaUT_uleP{!Xezgj_1ZGGj5@*s4;0;4_ zqCN|7C<0TP8;(41VD8Ea@p)j(p;Om8RK!{i|4?kX06VNn#zZ$fZd?A+$iX7dR|Ag8 zj%}V^WrM$Y&n|G-TZ-!|FPtu_y2B`q;_X*AlljG-&6>ZI%x*MtC;sxt?dtn2IFEGK z1jtpOV=gKTh%u@t%oD$Ier?&{a2H~IndNuf&WbV#tZx;` z|CanID|jxobQf1TT1?M)rs{gCOB0|EH(J%=u@*z$U@tu^hhvkq7-%BS)j5akS+t2#WeUNTy>CydNNl zYYrU%z(af)Uq@aa7k39s7Z-=$h^tu7#3hT5I54m2{&lAf7K;p_E`LcejKfQ3*84!M z_yZEFfl&H93vK1~st+!`Q>LURx2og5!<$cC7sfX~Zc5ia$Epr0P%s8;FvO)+j2J-U zVTX$qk}|1N#p9S{%n7hmjvs+P&c=3uBg0VqL{b2RLkap>Z%dS8!cKN3rrh>HYQ$SK z7#~FKsU8@t4u^aTeUiHusUd3+0}NO0#*pVC&tyd|}Id$=# zL_z9vN=|Gca)vu=;k0?6dT0pS0_X5fQVHAwgmr=W{^PZ&pN9%3CiR92%B-Xk{&8pL!y=CFL0?VDBn$m3sRt zsxWM51e`Q4Z)(*xYJFj->Wh_PF5WmPq|^>eQx%ciNknQ4+KCvY`{C#k?=L*G9TNEY z6vh9QRL;XAz6_@R3Y8Qn9Rm+wt$bfBOKl-Hae@X()rC~4Uer#7jeDKHy)-V%sp1*S z#LqT3c9o0|jX+sPvQqFrq9!3~!F3IR7H$N!Py#TK5UBYVyZjkD|6v*=MCFI@^4~qG zlg8|SuY4lz!qNjJy|GFlBl8fsmcd&9eIo5ZYPdD{bFkTZ=(3Em@H}QM`mzU;laKUf zz6LS$LPI_Yo9moRyfS3*n%O_ za-^jQ=K2){Drgr#ols0}gfsL=U9D~Olq~-KNxsCM?hR0WY3@^gB|Fo}f%B?hSBZ~H zBBL1ewM?p=GMQFN40lXqPd4+@_hXk6lE>iO2>oknH`6$Whl7$d-GLIWPnoKQ8wN!+ zNhu_5wob_YDXSV788(d~VgrQ8s^o}`|94jPumyoUJ$Qeg9{-kJvr}YU7WjyV50lSn zxM;nBZiM;7sm1njTK)!lDb7!_2I%!SMH_G-8p)+J=aT2Rua-coAwflwPnbQ6l${t| z-v{@s`UeI1J-~BO2>GgR$1{d`!;QzN&%zIf3-;yJD*0oO+sThXNE@sOO!*V=Hd|f# zB6R1(ZN};7AmV+{$Fr~TbGFpAmcb;PvZKX>Gw;oWnA^4EOFt?TGaYpXGO+QwJtHsQ z=CevY4#x%Y6IBZeDZC4%j5gcuoN~xi&J82dHThK8r4WmSPh{%a=&2R2z%QDWstr>? zy6HZCA}A|@C505Q=pf=8G?u&;0|r``qFHGVcXr)M?Gj}Q79ZS7V{iIITY5O-xrSkx zl7Uy940_Q&2QfSLgv@(Z^v0JF+Fg)>RuiI-C z>h|la@p(Dv>ja#fc*9=8{`XOduo{SE_GDPna-Wb#N!_+$ z&*cgrAW!{tBEx|7iqVjb)^^Fd0}PW+QeOB5z?8hfT_* zt}o5aF|Lzs=l336Ms+508MOjhF!meeSx6cs373cAH<};fw@=A_JkqoI>>yb{VF*o~ z#eFq_#zJHlPpru<)WCATc=hMIdc-_j~Ac)f<@NuRNj*z6BMnYrzg zn?Aa_v>Hp%b!$kloow#O65^%+X>X^@O5!KWcuNOtOjI6fuU>aYH(h((Z{FQ}b3U^< zySdx`_E2mOJIWM8OcBai%T#_c| zA_^^X+ zS+~}SWOn?Qbf)RlUZ-Dq@Qq zq=T4|Np0_~MMG9u;)j*{81qPnscFJi(n&c>v}?#2!I!3?mRkOS{Pc|bxGAc^{uP~? z>yMMC(ABn+#i)=U1qa4mrrbF z)IXG@mSs(o!8bpr+wI57S@ zzZBp#vNn^-#^@l9<|JlATO6C%+Q1_7G$x@-Bq57=n=U)9s@`RtRLET^V{8i_e{qV5 zd5i&-_PMX+z^Ho5PtN>V5jx&nN&mZ}-P5QZ$>1M5SN&!U&}I%bXAEW?o$s@WX7{(Z zWGiMpcSqljMRsRckNXCCeqN2G_1qtepVewYn7Q}M@O!?>dVQe(3p;x$s~vGJgU`zi*14Y7aQ913H6|_ zxUqyWrJ}u4Iz8{?<7uioNNXk=dH9THwebCZ(Yt7Jxwq>6QVU4~9eWZEU8K2QbOXe9`9|u~HX!+7S5hL(K z{Inuvp@Uq=W%r0j}9tyFNeiu>lR>I(?mtp7YpUm76>$`PB|gqzN17{ELYU|QeO3V=#(a4bG25)xnd`I=%EJW6&!Hqx3J zvi*{ztSO^2z833ahSIBKfr<uJOT$r2G+I^78ae!fhW#a&GwkB9E^evS42DH{d2zP zP{tV}vO=9UZwi%M=Sv@pR0^_&Cob9ID7}2#dE@-g3_CUYV;dWvFE$BqVd7}c2wysRDTzuR*Ro2F7KSoUD2MdZXKTKEUmefG+2z`9r>;lec|@b+aTJYL`aQND&KWW_1#or``ygpm3K8IxU_S$syj!$VhcaS>It9h_29n zVNApSb6ba?n$MnFkAwvH4kynrZgs2Y4%~9i^<^{ZaU4LdnL-M<=!j`I#+cmH1;f>; z*KuI1K!Z^$FOn-&eWp;6-QY7ORDt$AvS1u@K}wnlkPwos0LgQ&dh{MZhx?RMvsy7q zaF?<37ZkC9g*?Daaxd%7;<+HN#H|uoP3-~h8llNtr}qCFNy6rMa+;m6Ar5>C);#eP6 z8uvbRxn7&^h{YynAno#wPj?BEG|i`H8nw+=o#@stgmy&IzWF})4ZZtf0mwEeQ6nm8 zs~9mhVf(l>{5)J^yk1g{Ro1w95*?O2%Om}9q!(|8H}^oySjQJOByek|V77&EOx=Ga z-;D!vp&wF@eoA(Rccxe8byoe!9NZ4};NDKvlI)*i{e;aU?ID;b8Rq46a$8?s#e4bU zv;3()5EeX%d~2I%I~{#`le{FL^CNjlFJ-Qso=;`5yPNe&<0uU5NG(6vV5;!c>5`St zYqKys5WR@v5#5+tS%}q{;^M5?o94DMR_Y< zV2=kQJFq>z{$sa;#hdrM;uN{-J+v)7H9mNX)YW|l90p4bOa`orCU3dEw|1Dkd$@UY zmp9*dgOccc&x?e`0CWT*X*N=1(~L-KwBvk2)$FNb&6nKnev`ItLKq?IMy1qisq&)N z-Y*MPk#MjBZh7;Tcr@Psc+jO?2!7~U0#Db zihnAFtIwXhMj}vpgg6ZnAd~_tmzP@ZF0LNDFJ0V0|85QaPar@fsd(_J8bTl#UPZf) z0$=dl_Lhd%hJKLI!e7=3h#NI5a-sz9eZBLa_N=11UNdrE?a=J@N!!?{swfgNC<98e z?lRF6*uqpxs>BSr$&SThsNK2!mhID%xkcTd=jIak@xg#?tu;t-SjJl_3y&?nZqdN_ zRC9czB0FL}O_~NcIQ7HKoD^}%@;Z?-nMdP=UmLq`HDL@{Gl6NeV+riUjWAvl6WiO$ z6cz0rr&)bB$%W{_GTZqF%t%JpcSe%8kMp^KLJMSSXu^@kvEnS~}nD?X-B5S874=E@U2P7=R`Z?aQUH zE)!;!)^p5OvXK3tl5)5DHzYuVs-QEF{$zhuFt@scD;odFZKDOyAI{#>Zpx#a!E8v# z+9*1aYWMMkU9BN)iWOfMFj1?M>crEd65r4{1EN>A+rA) zc2*@ya^f%syCnHI%6f1%g*}L%KsmI7U|%XwrJ@;BT()v3X7!y%-8(9lyxk`!yg5Zj`mp$iWk;@rS`(;>Qe-E zG}#62lavU5Z*ws!T3^{~eMEgc*3kL2p4AmskZj#{gk|_l=H6k!H|Of8LMj}UOA$A) zK2`=Rh{Hki!{<^AsOsiR#_n+Q1bqlYl63r=M}caCb6x11mjUckIxYFDg#}?;XnC!c zRVfi23iL)G0#s?-0hIFWBc+UJZJee@GRcW(nP%*pYWkgQ;p3+7WEG33OGK>ZXX2Z2 zm7h!9Tq2fVu%^O?JM3PYGiS}5Rzyd#-RH;`QamVT=`@0T=P1H4B^{mSv5{o4k>e_5 zQX^I8ymFkN`4L0x;?8-AQz~*{b=&`T^m_nXOHS-$aa-d2Ba99DK~g7S=pJNI_VmSR zP!jL0;B6qZ(-iUoB);Lua=Du}wu;!7e?>DAGB@D=>eT;d_x>~gL*HIY?XM30+EVY&kVN{sgFlMZUrqnYrhga$05p^Uz~8v_ lSM$H-vp<_JQ2)vN-`Pz|4Gpn30KfypC552xa@ya1{Xh9wA;I?87=5_I9J zE*0Av>cLPLuF6iZmv@k?|McYv;}{Eo*0yx2i|S|Y z@HJ9FeHjd}uHH?AmUJkss%QEFd2`#vK{~Yf)z9Xa1)@w7@yFV_^RzgKLna!OH#IAA zZ)F&+5L7Y+kc7yNW7jz)}yb9It0 zu1ey@7hDQYDuqy5^F;pJal0#1gV42T&sY!rDQ9PG(nFx$b!>w4JyKFvx&f5a>UBmV z((yca?Cl5W*J_`0I2%{c&-_55QPDPZY6K1&evECk>@{*8mqtpz1t+0sM5ciJy*-{rBDV8Ql=o7i1j=-Ov7nX$M_MnuP>)aJF zICw&!Mz6|<^jjAXWJX%oG+CF5wSH{R>5J*hOgTj#de4po#`5NpLZzV zTg_GZL+vRqZb-B=tOaap%=26YseDWMNC3cZH1f7*^Kx>vGk0>b`z=ou zX&E_Xv*QKi*FU~)w?d5&$I%olrnXLMEwydCq*32`jaxx7Vn0<>cDv#Qq`ZedZzZ7N zz2q6(UiPaB_!(f}x}T>wAdL_5-=vDs&V=eU#7`WRe|=7=WTUhEGmtE+I2>mZhEC1_h71v5pp+t~`?f`M2{pE3ijC6cr(C;W|` zDx{GvF0XgbD_(~m53+I_cj0DWG&1VZhT|{=AdIWX-F6545G3kC?YU$}I+7ychb(g? z_lYnO2PSGY52EJx)ZE!{qfgp4;7A;cMiTsJ@!8q*9^mgdEV4^#eUMR1W{782Ny*z% zo)VUoB8mnE@K9#o)|-)5;+Zk03kQT75T2{{QHS@!2LxA-Lztm+?X8e+Jc4H(*d+;NFUA(<`QHCp)c6lJ+|$tDG6?!bFaatsu=O;Y?pX* z8?q?XD2{tmm9d?O@nc0hGxqr46=xr41@nnt&ihw>?1~>QH{AsrmrmGG;dpr z7DV6>?$+6-J>rAbSU>yDlkNS`Jl{oZjd4F;T6&fJ@M3BqFShSHw9V4}T^`M=h;Nq> zS}n8v&?|Q85#OD`O=jF%1glFSVx$pnO5>q!}7Zr2P)#Ni?9kH#ytQFKi$6WWA9P z%>?Vld1PCstrVL?powsGW;@|S^yl_YApy@e^i1<_L%UV1Yy^8ZEV5!3G&B9E*5Lsd zW%+f*MN1rW#`qN1o@NL!$?;*6IEO`Ot*!ng^+QFO`R@K^_|R&r@8>#GLnbg@{GB;* zZ|0p>VRu+y>4gM<0taKxKQhdp`SY)A0|zViVEp^<9u-N$w!arU;SZr10V1BL#VOFa z6v@VcdjMqud4F1%CFEO>$y&&gxIFJ1N)^(Q8?}R%*hYa0Udxq=RMJ)fRL|t9YWi3! zK)55soTG|2)T_vhD#GaIy5_#9M_#bsh+MarV30&G={N$~_-?}a`;!)K8-LQcA|eCy zPtTQ=nupE^V;_YIg!eV?^raSO#tO<9s80_ZSGYThy_^z20xhq_(`@7k)l#Eaq9VF- zXym$wuSZ2rAbH{1x1=t{F?JXOB4nKb!p?Hk<%6{Y{Hg@R!grgeg#Qj7x(0gnLojTB zVUd*xw(|WgXJds93nh&UNl;vC^FU*=gz%3UGIwCqmp3e5j&u{tI?|j)B9; zt5Fbr%lC*D>Vxea_oBPFS=>bj_hM)pUXkW*j+oA&sK$hl6$jlOq;G*V_Fchq?xj7k zAFvM?y&0L46uXT>&yCgV!RhOM zlQ2=4VxBV|lDh!UZ2Bd1vQb7T>2y9TzejQmeu%(jGx}1pFa_+coq?<8zg9XFyx!C* zQhk7I)K0**wVrZf<0uZ)7r`=ofj+sFxQh^tw^+}cgjrhXQnCr(<0Dnj`u1}71Ub=p za27GLuRn`a@0lI(o3rLA(H%d^%syHO^1XMHO1Iqx$zsQBLqnAFc=P3>TZcil(M)C) zm)Zowo&0o4Itf0ro8BJTx!}DV;rVkdt8aE9g~WO-Y12U8QAAo?n^-(m27!8VBmsX& znu3jIlo}21G6z>3s#d+Z?TAw$(>Vtux{tW+0e4N2@f|%M(J1#bm*P>xjNGyQe#n}I zt8O5ZSpY|cci2KBec>GmM)CZ}>n6d;Bs2yLa8_RH_=cCJCb`ODWc4cC#7+*&crjkR zznG^(RvDhq66WFoJ@qp;CEwSbk@dG8j~fqndyW@Y7k3Zadl&-yXm5;>6&~U;x!EGG z*W z<<&9!&J&(e*YA=~(2dT5 zDz~V!?G^PsfiZFq9Uneic-_CRiRfF{*1J zD@zEUDbNh6i6BRBU6nzrmS7My?0b`I`N3*`rU1{xpl?R$z*w#6+)hne!L>PpHN3&r zt7RVFqIV}AcNslYq65M(_A(Jz{zF|uS(l1$OF#CIQxxzTUY%NDeP{q(bsVL(Ifh%sC8W}8vO7c6y3U zh~LBU?%A6zk)ZRPn?94;mIg*8M`Riejl<~#lgIme!ex`LhvU5yzTK~MC%ye$KW~Q9 zyB<#jFKX5o@!2{9PEU7}30DFxwpA0z<`+oGGL9*uJ|0)Cy*i?eF$>1HJAo6+?Pr(l zhlpeEc$ORvL*b1LDE4|iY7TlANOeCUwhMRJ&9#!Bv{1)qR?B`wTznPlN#rHCT9!3@ zeC(X3yp(*_64e$po@95(s#pQI+U6?ISy!Kns{wxpM&m@i=kJ-+==vxXOIF?{6}dGd z!nv+o6Lo%wRUH~RMI+^EN~6wNFJ&G%+zX}%n?D_0cBt+vv6~k0w*Z;J*1;YaS-SOU z*qZg_4|M@)4j!`37>i9#*PtFb+A20`Y2Rpuv)fn9VM^Zx-&~MLbfKbeVB9@7>gRk_Ch&0?|KN0ygiMr{In{f?q zw@m0znWmEcP^@5re$l$fF@*y|%Iv-|xTd2pfv-+d9BQnbTtd7F=%cV#if%}xmlTco zw{kRnbK@~ytn$Ebk#ELKG0Y(DutVCq6!zm#^sLf-2@d^@oS6ch>IO|smPHOa5s3Y5 zi776dE=RFm+Jy+4MWCRf6E1~B;y)Nl=a%c+AEso_X7ge`!IRbQz)jwa8yJhKrTgji z`MDRI8xmrI!O2~UU^YE?TROPz=@mJ%ov(G?Vz)D-!tI-nmYebP_2XTc<&?2#HTa8N z=9&Jdi^~fM+stloMOaZZMxX_Bxan2`Dhgxq*D7{*DZE56^+7dcw4{JI~wqv@rWjzm8~e0=Y%%T<|yRm80mZu4#j#JahY+qY3yr4T>3wR=eDjr4wZ**UnJd>` z945A%3>95-FfFadc)yV2O@l2;6C;S*?gn>yqh7~^y6%l~1ZUUaCvDw$ zt($(fo@tMs6iA5&3C5^-zRj@6>*a~68ktaKA}3Ov%jzlIXBEyUdve*Ky2M95{n-u4 z=DXGV0mi+r?#a7vgf@ZYr&3>Q8H*k7RajiB>{wecaHf3}+ty%Etb)6?3fs3lM%v(wcGVh!#RJKBW#M? zyN1sNdJdkp;g$^|^avd)@5X<86AU-5G&lnKF4ats3LkfXfEe@Bj%xdSs12@71}L^P z;J|l!CCI6YLM2Vl?hEcIm|mG~F$7q20WCO9N{(?P<%`hmrzGDKYQKMl8Dn*;#we)nAxLq7*E}vf|cr&@$L7pO)Q(Tnt=7E0~u-(|^mVpGD+h*Gb-ZDvxG9%jLD(=ToLqo7gB* zEXshk+g_5m;$sl(+R_cR+A`L``cOut4&m}=AJX77-L$EkM!*l9Va>}(dO<^IRUMoAR#<&X^2EFD!4h+;>&kf(p2R40S)=DF zB8Z%McL|QlFjDEYIZC3&Cc{?5f2jawgm^+sIT0 zFMj=8MwUVIT=}VH&3s5(1bO`7%pOwb)x19afN&+hh_y`k@TfIMQ`lvg%1DifB%Op| z!#L7J@-(a17ibUW4qM)VfT4!>#3!zM8)=hGvq82`9==%8_z}7nm1X)gBiW zW2TT+h#Sjxnwm)8B;6PqtC$;if=H-`$LW1dNjclK&NrzuKmCcIark@d1na5Dv%BO) zE{*f#MXl7CR!Vk-h0ad88)l|BSpT*xd; zn_S<&7+Fdo5q>Nkxyz9xuqt-dqEAl&@iB7!Tn^VBfgY(E>Q+4z%LD56-q-GLS9)L? zn7-zEf^&-A2ziaeZ<%}e#oDzyL*!0_{au_N~LeF52sp3 zoI?f`Q)xvXLs0FirYjKH?u<`gGr|d%aCs)%W3HgnW9yTRAcHf|wq<^oh&L4Lclvoy zfcoWTzK@*foGxqTs_n>hMqvv91Y!M=U*eN;;x3!+1M$Brg}o$S$wa_ddJH=aV#AaI z3#V6Vu1?NwY_FVL!T;)SDUvd*kFAo@N`~<y5+(($91lH!t_c9{@e1(lC||@9q~G9 zfpSPLrbso% z3{b?+nW)4??O*uTehCF0i50V5d8rmo>+S|>!^`W*_o$fx!4g6751Xw`m{|N_gE53* zi1C#yP6v#w`=n)znuon2Y9YAV#OG}!dzkQnARaI8qx2?C3N}9Ek`>!7Ni?SE>dq0= z)}V{(a2sHixNK(-ud>|-;ccCnvuEm-3TgdAdO4P7+*UUXO0_e~HxdD236$gZ2lt^F zTW9BC#oNp+v!FwOSwGK=*0z|sP205#HWF`yq}-dJ`kyeN;_v6U%H*<%3>bKsV95mQ z&)_w4cK#oBVW9oz$bpPHF0f;T9U5 zKSTo*N*lmMAC@nz*suAOT-uHtIK-tlj0VzTAo#2GPO%GGKR!Nqy7Zw^Z;^mILZit5 zbCo}@CG6i~VhsTZhI-8g1mD(tC-p$?=#|lLtG5aGfnUsf*z9ERZf$wL={f1iaBchd z8aihnH{qH!ly>k!{LyaSJNM?eR5T2MSq2zT6D^J!$Y`hf`P+Ns7R8NMpw2MUIPDaw zB(d1|=K)FsGaX1w*ZvHX8jS@jZwo`25%ZhO%TvSKq$v%+*a%|4{wF0l$FiA`>ge^) z#gh{dvrHH^l(gIF!$yohO31t;E#|YB`x@H-lz%CDcMV&9;i>{VaL~0XcgB(?{T&7V zcCS5qaM6H(xdRmb!JdzCQY;__x?5in^wJ=(qA3?wXOIs!{6pNFpB)C$@r_`udS9pOe}qLzxw%AmHfjK68iry;%^$|uU>u?cmD8#2x}t2R`aXW^Q(hj z%hNv`gc1Di;E!VUSJS^{(?1Mh;fVwQ_*-uM)%>sV?9b+-q<=F1XLM6jLWJ!N0Pqy{ N6NRPkZ1Ug!{U7#%BdP!Z literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_decimal.ods b/pandas/tests/io/data/excel/test_decimal.ods new file mode 100644 index 0000000000000000000000000000000000000000..308a851809dde96599af4aeadb70726bc5c38f04 GIT binary patch literal 4406 zcmZ`-2UJr_w+_977eNAoAks@{p$7q__a;b_B7uaG0HK6pkdAjg|GUrk{NF!k%{ph#%=hiH_L|vy_BPQcBc}!Yt6c>|dn(3*&V`?y z`n-F=y`X+U-cToRZ%_DLCqFn6ArXLZ0U@1Ga1;pX4Mn&h@A`W|5q=<)w=dMm1qFjb z{Y=g|Spby37_I_b(Q*q$Q~0Clm!jdwCApdLf@_T{yi{c1H479igW7 zp+m^)v>|=lHNTV8a_(#N2{$9H*>?EXs<$^^bjV4)v2KZdEVQ|a9mdPxa5z-K>sOx4 zTgv@=z!FsJq?A8HF$v?6pUULftAoS`RV&d2F_;K*@T7xjRoLP25>tUUOE*b&w8E5^ zi48DlH@na-a$zLY^7{+&s6cC_F3gox+!%oe%{91lskmskf)Dbnz*S=Eong!VgU3JE zJ{QA2=JaK>89ITt459~Gy9UD$AG7*8MKW8o8{w;&5Gh_yax{*Z0KC)!8PsPkF-D!# z3f1Ef2Zx7E`;gD`p0itqC8Hp(@us3qwSKQa>l?+Lg$c|WkFx_q^6psm{oOJ|3pI2D z{)i5HdHpcZ@s^C`eHa6TQS`{d&31&6)PTv zs**X<=&ET9b2S#|xp)-ZqbcXJ*Nj|~<4`-|sOC<%*mZn7%*fWZGb}0mO)oaQg)=TU z=ZSQfu^TP*lfPXz*>Hoj@koL8YHIj#wP7xYk4>R$f1>1_UQI@$a7;5VzK2C;%}taW z4a{cx)a7`*Bl%esxZKN2ecwc7(V)*g#^<%+u~p>v=%zXOuo3z#HNL$Mon{Wsf&#V7E_l_rCH=^6x__iflkcgp01EuLj+le$6?)^Dk+sQ6CwqSpGi9!c0R& zq+vT4U&K(3xy%il?V$pRhb-5RjKeKPI&P_iu%9zkiz1yR*eYLj{~Q~Cmdd+ss;Iwb zhgVyhUpLkLi&zR7<}xhFe3r^s^-Hn)n!?@5R?c)kZ(`~5xL}KU(ZS+z%JlIbswBh` zncx@akN7I1pR3DXNT2`Q9jp90upoD#=s$Fu!a*Rn!hg{Ds|MPY&fiAqAOAz&{PO4v zRpCF{XhWkF;R_$CF%@+cK7_wxhLhi@7+e)o}{?l+Hd>+50S;rBiL3@A~N_P}woa zg(sU5qs5m7nY2>)uo3(GrFA69q{Q`^%`w@u%TeU5-Xy7aUFnR%XpJ?`O8L-R+ANSq zud3|v1D2`H78;@rVPLQR0nSCPnFv@QSu(_bV`h$mfXC16PMo4G)-00(G?e^opx6 z{~OF(vWhv|qwTj^iZ01uc|i^kPxEr$*wHg+0Yo;q=~~mU+&Ej09<~qk+D5X(H>PSX0O-di^;cp^1j3iR@k5UKrz*zb~awyIt7dQoV)||P-mI6ku4Hso|dECx(efFW?HU$t`U14s%h-w-FXVHqHD)6x- zReE+)ncUoI2{;59E5UgMdQ~<|SF79vflFVj7_mZYBD0b`cO9kJ8NS;vx9;pVD&mQK z#=d?UQ9}5=8v$;TOs8Ou#GXm*#2&WNC`6kY)dr8J#c>;zQ`#8`0C09j2Kc{G_c+5R z^)Uqifa3xH=+2|=T_nQqXP7<@yJOb2$Z0ti40p{L4|VsIn0NJ|4?G2p-mhR;VIQ5g zYGR#$u(iHRHy()ScKd{#FzUWf&qJq2+Nk}Q*IK^XhAR2Kc1*n4^D5+m^vdBNladM{ z60+!%E#Z1vP=M&0I(S|bWWYY`^r>3?N3{gzcp+C%m!kvx$uvv7O6LtK`Yfc;0aOx? zXV=T*E$|gFR~&*bYt{#R`#CbbMMIx*_RO3U*+vCYIHCVeIy3m zsFs=4x{Tf)={8fT9WpM^ufmG}MWL~wGmWpi8nd0jG| zJv=(EF zy&Hyf859#%Vwr&T#hDx0k2U$b?)0E|)!`azO~PU#G))>RUreK;q-ij+!r}2SU0w@j z4Lw~;Jx!QtjjU;IMMiG;nh#!}rI>L@1nQ%qDaLI~bGr}}(M~2JLTq8C_3FUX$>{Ds zkyN6pf7HQa1KhV2c0Vd3Ej#T-Y0Jkwq-n0RR3CAgwXygA%gxIWJOcOd+0?avpvPMX zDCg-pUFLJ1?tLg)&(U}tSWi4reY4$47-#?}&&WGpn5PLxj$5v$C^!zIc{iJFw_K>N zEaX^S>DS|Nw|C6b;jfA3#0q~B8Ss1O!*n8IXJ?n~%9$JZ!tMBG+t74iaGIp{{f!1X zc^DgF3N%w{-#FSe&ygN`{a_s)+till-zye#I_ZqQ%0GKw@N?BiNJ4o0GD2-bI?@3b1W8^DxH&G`%iF zVLTndkslT5qd?Q!u`Jogc8j`__*iXHJp|UZ%Lk!hg7g$PkXlX3$MV13f7)N?PFEf% zP2Hv5CeO%3Ka(I5KJjB&fpf`J5+NF_j+~*R6zuBoaTxbNYP6kxs~2A)4~5f}CsTEN z_LHU$p;Folu~OFzpO<}#4yo$p^o0N+vILmjqOLU`mEL;2?;HVtAW+{ zx!t~+SX^@~zKVNC6iQzkdt`ZhI*4&7dyQ3yS&_MEL$6Y(8aZ^FJh@#Zp&FzbA|Dpo zNIb1V9vX}PnD&{mb83i{T2JDdT~wJZglf&E9a+YU}Xo= zJR&-UVm`7Jg0Ch_(~j%DX_(l63Rs0-RE~()@70&U}C(*v~F;DHkW+bjY!) zW1o4s3<0M+o3teKQrwMDpP1NJ_fVur!6d1ef*nW(1FIM0u4!E7jjvC>Y(0_a8NBW= z7U5PqxvHvmlYdS7RHm-5JyIv+Xr>Y<@&B5_;%uc-=RCq;&Q0+vBZc}o{S0D>W`@RH zJiymE-?=%yh({wvAJr=ZTiGQ>BjriA1s-*hj^V?Q`qRbu1ns%ijg@fUK!o2O1zyxc zL<7so?--Td&nfz~IZSWcbdgLx)x)Fk24Vhv)$3N_t%X8zNx0_peHu7fxL|A6u4k0U zm{OP6LBCAbCB9Z|V5{Otv9kyt4O8h^l&NX>`r)^m!S+2|NsjHij`k-NQMkD)o%$8j zX@(hsnF~wS_j{P^5-j)MHJr+26pV~#&!vCw_XJyMQFQx@aK53gY<{i%WNH#7OmWm@ zH>fq-_j%v^E!h~jgpZ$ZXf40uw)%&1biroWMXG&5Jn_0<;w8`4HC7p$#j?iQqC;@o zy<-rO>3^(ELMr|%FFzju{Cfr9=jiYDPd&gy|0n7{=lH)77U!S8GyXquf8J7l>DU(8a4k9c3KkWwZ?WyE}LJQf6aV0_85FohJABgOLN(#${YWqa{cR>zw`+C%rsgpv+(wMhW{sI}n~NN|ZOpB9!!dddHOZfzqR*RQTF40|0FyVlp1al*4&nqg8lA^U zSxNz|xu_rJb(pzjk(99-nX=~SZ7yQMZDS@hCpe9B0^M0Ile2)u9n(Tg46|H}>y6bI zR~oA^sS$+9Bc_4I$RrL-@rk4vp$rN#Aoax@6tm41q;mZhymDm`wV~!kab7G4KQw8e z4f3^L^Jh9`@t>E2O3=tchOLv}bHo+p|5|gF-IJpHTc^KlA$^&J^feaJ*IG#5WFh@U z3+a~VdBj5b%@)!v!M9ZYRSR(b+w|-Z@p2Vu+FpAtq`z(<{S6E0+bpC@{IHdv&+f-5l(scHyd6O3%;VH*DW92Rn%#d)N>${hufxUb23Uv5@X& zA$_dbbgsfh_}@2!Z|+}Q!HelF`Y-8WXF2_pkTh8{d6+|Ud9J9$@>05^oX(kJbGl}- zX8*ue=?c8`7Kr6rAN(6eE#Jd!bg&`2&5Mqp>|Q1=t=px|1rAmOB| z)9`ufal$;4bQZ9IC#9dbV&qhlgC~Zau{|+T+qAC0A|0wAP4hZ{flJp8Vc=S`LltOh zJ5+&f)u9Sh<_=Y0qjabO)xSd(*zY=2frh3-71(1sRDoTxLlxM^I#hv1u|pN;n=@3P zDjN3()jV~uq65+CXWP<&lof2$%{_}cDoq(Rn$|$*HEa_|Q;>BabUU^Q#7k!iL}?Ga z0mL%@um?*1>G+hevZG^#+GH#+8$0mNj3eK?*JPQ07ZV`sgihvxG`(#N2;Hm(O4ems zprq;gIwMLRP&6lfww=Z1B-KGdbrvXTe5?gX=Vlh?&!69nm z+B~(AC_0Xfi3&1L)QuZAS`if#6x50+<20ElrZv-qRiZ?^aawDlu;?)&>R{`rgD=sK^Hj=SXquk{*xj(kHTTZBDu$G9J7<$oe$v`aI zEzTVcKw42p8QTi_d}HcbQ^GB(Hi0z8SqB22N!vh_#$VZcMwkFW#Jo&6eX=YQt~c6b zN1tyU2)FFo1kxC19SHaL+63aIvkt`B1PCJLWx^SvWtnj9-X380%?r14rEw+fOzSw0~uuk1QGKx z;XL26Ot|)IlO27&bs*g0ZWBmjoOK|Y_5ktHSqC!O1PCJLWx}P5Wtq?q(k46leCt5C z<=-Zd#yIOhhPMZZm(CQ3avbM|zhQ#iYRG6kWb(8@hbr)BLWe5w97BgHaKE%e6?p2w zN`(h)`rrTuR)b#Jk(zOu;M`n&S&io^b-b{O3f6qt+uhpK18#aP4rXX@_(Frt>RkLu zTb0r}Oof9Xo$d7bXD{8rHZYzkV9b?9(wRXty_IOZTBC8Yf<{`YiD>Ng`M-Sjp?NfY zlxTFV(TuW!Mq0~>XgcZhzxwJc^Jw}i(RjB;Gu8?kuJ&x+_p#G@3`#Pl?8- zH5zv-Xt=0qMx)l}pTB<7JevMWG`_9Tcv(RsP5wo0y6E#i{pKU{XdH>g+q$L=0~+sk zXj)_3Ye&8?k7j@pjTucFVFit`rZvX>^Vmz~(F|0gF{5dtt)Ma1w8pq|=gyf&Gf0WX zjHbC-L1V0Gjd4w%KV}}yU?mzen&x2zjj^UR#_c)twRto{lxWOon$8LuV@+#}J960 z%}aOm=uz`%hAGjQ(KI(JXpA+@OV@NZ-#i+P5{(&6(^^4etZ81lwFf^pk7l?MjTuez zwt~i3)4X&q9A9A$O?T{bV|LmyZU!sH&0Cr}TO2C|A*|}K1e60MuI90_JI+D20EI>= zfeu#yHP#|Q_-dY~d7#b)pwJ&B(2)wD#yTSi^tgzlx{}h72B6RuCD1VnpvD>kmiG(h zfsQf&g>EQ;j#B_N)(1hDzV@_vprZ{yp$ST$o(iDGtuF`_FGZRIsuo-@x(XrLd8^a` zrY)};(qt=yz$u&8G6LJvbp$^u<;Z6`TboGYEk&|iGbob`;e$3kvB)q24%?>$-rEdt$^KaY z-W2rAo0q@{f$NNc!v-pW_b~%pvV#_YOH%_I5xBQ8aM(hHL9p#>1~~1Z)M6%LdC;aS zj(S)K7QnVe8(VCVY<;*9&=N{~HC5R#FABq~8VbXl z0Knw53OYr@BCTjddx|Y3nP2WZH7h(+YJq_6s57Ber4oQL0h(SVp7bTCX6dW&2XpO)IJ?fwVkLYT4WNLi40SJ8IBylvitQR}sWS5kz?nH9+Be zupK0VpunZpw-c4h#J@nf%XUQKR*q4{dgH`=cR|R^t}3I+lFVz6DWNvqV5u{Kp$dvD zTP<^Dm4mn_;T7sB!U`iN$x3k78-Hm4k|T1<_XuW-Px0WL#*k;sy&%ORp-$8wLkZ(^ zy1v3p6}Os}U=!mcw_!kijlQ}jD^H(WRGM9~MmOxAoK%x$Kn7A`TX5R z_(gD3(WA|P?i*TOQd?T)+4AJo@m4QgoJbbfw1FRj0y7iy4ve?Na9^2a>L zQ|qtw)VX=)WRfq>Y#JNGTbw9KY{y`xDUWS z+!fq&)s6^wXuceq?>0i6dKUuaJsJ4fDmL&D;Q`1@u6~FiB-iROXBek3)(~#UBZQWh zmFaUioz&EnYibJhn$gCkMr$g?x<$xFUVV90aZY)8F@20Jmn>N+K}Byaf-#igruy=T zIJld1LZ$;o5~>`_PMop=kTQxK9U}HE=s-rOmF;SI=;7C@p#M zfny1Wv<~4nqkdU*Wn9d374z(4D0;*p-0zkhB`r32Kgqu7!#^qv~B;YQ(tcUDdMPOi2FM`M*racoa?fu-<^6e z?WaMnT)kcIbHZoi1bgt4__=Rxz7Y7^mV1A? z^w1X?$2FTe9r`ph=vd*Uy1;$&7am>m+=h|r=?xWkJv;Z@>sh;#PCPsDUC$xwqqb`w z?$c}O*~Hx`TSD?q9CbJu6MoJ+$9evS2P(79414s6K^J}FKKQ{g<&59VE%$Fc+O+ME zw(*I?1M^>YyAp0!=J!xs(PueNZ}hmFx@YA^{iaj59)JDKpFaE*th8pdc=jWXi@@ML z2;5bLQkLpHjDF)WLoU!)QA4^n#&g1E>&Rn^?$nU*R`kh`I+heNRy3RfB(ZB1y^o?)taN96s<3~|L-*PLt^mhNxuQd!j z`r3VcKZ*Wpb@IFsr&lg{>GkuP4_|jZJ^UND^SAGCo)vg+WaiZmZjSjh{JQI?t~UPW ziBC9Ldg6VUmsf^KuRa=)EwSK9vcH@fTRz0kVef#e^QU~d=heLHy$3yDd%geYS4&2u zqE`y&^4gUV*uv|^>+TWJ;+;P%3;FoP0&r4`^DGWmd+XdXt#r(?#$Yq{%zpUUT2pc4n9Bs<8SV$J9l$& z*mvq=$ICl|4jtUL>mS!f*B{^By>fBj%KP8)-SqR*4;}sS$Xx>udi8l@%EH`#mNqsl zcE0V#%-q8##$3Jc>XH1Xlg=)6ST@fiqW8wYq4ytkm^*L%$xo)bl&8MAe0@^4(0As> z_-uFfuJaxj`@Jn~^%sM{*wkBtOJD@rDTH4svV|P7vXOE|<`k&dd-Sw-O=M!y@SExtF@A%vJ z&1>I1Ikb4e$(MVce`!|n=E6%oex2^RuKw8Hb1(V0oX(ndpniP(E6*%B^HjgA+YhWB z@_NLv4_^)*vv=#lmmTY-uivyjYf(tdr$@W)OxjZ7GhoBJn%sz+>%N_~+p5 z%hJ2tcAj#~bv#!4s^inSM<(D5>^;uQ0s~^d9oS>|J!_Y)QjNYasY{4@x7}ND`>V3I z9UoJ2VEzwdu9biEY~hd*q0_#z8I_}t{_eZOD<2H|@Yl{yo%Yj|f3iDZ?r;D4@~83w zkK5M;HtF}ik)c2O)zkjkD;Jvn`NJol%)j*O57&ErlXd5VWqXzn81>uv_p|o?{OkR_ zUbwsO7tc5R{MF|Am$UZ#`t|0Z`E#xv-+S!D{>3+=7cZ$_khmw(ZK3V>e_mGYTNf4- zc6im~;aBT6mK{58b8DkR?WMtg^S#u!=hP$iyZr*UzxiXxq=yf``j11}tqW!xDO^;! zL3Qh9$A+)#P(!n(xxk4G#{xNWXqa!J6Of$^F>eOAAdzjWN1$bN72vG+f6GIigp zU5>2yAaLa2t;MUm2lOp*+q27{;c zzJo`?vqQTc&$hcc?WM@!TW77_>G*ZSByn%tP) z`C#m$3zj`_d)?nN8phwZv*e+K@%M)NoQ`wdJlQMD2~q{mvbBP8z#I{)O*HzLWUE<^dN@zqdOf@nzlf{Z+?vJe)K)YvLf1it7B5H{;GMcYVhfE!*VLqvle|^ zRrb`YkBuq(^6?LUeJ5j3NdD4QJA>|8=6T0`eU{$x^b&5uFp>czpTWxf36H z{@i{2_1C`d<8{Xx*LC}5eYbhh!_I3L?wWh)>Zu{e$IM@>IsDG)XS^;hRX0>7#k~~Z z^zA6O*u2@ljO+jW?2ml1!mkaCInz5n#QSBJe6Pey;~y@l`93}0_VTPd^R*kZSIr(i z)Na%MH4(3GD$4Ll=~=w_>z_j2sfsw+Sp59M>1n5H3OtMN4j(jS>y^{@>^?9nJLj)O z`AZU0Z&tnY(zvSgDNn9U@1kwU4~jg!XL7K1ak4Em&`yAYHi`(rl+-~l}~O*&Jc|5qZQ+~G-(kBMWa!?c@h)f^`N%j z;+qTW)#c?M>~wfw)cK{yUpi&G_%oj#zCq5(Uw+kNi<{d0?(Dsf742I6_|d+(UKczI zHw{*gzxHtDnggY8ez{_DietjEvlH@OnYnO9x!+g)3m%Adi67;h;^OZ4XjRYAF%Pag zeoxv@3%2+_dw<4fPR~9c{MTK_qi)vDd+NKjsyp6t-jm}n_p~tRk)$61qKG|v>VkCYgB;_2&0O><>ki;2{5{6O<3^2}O(Pm7I6_rSe zwZ~M7Yan}J2Dm?kwU;2Q#oBhQ(b57L!g8c7H%enU7M9*3Y@rr@dEvX0@i|FBSz`KFG2K=CT_~n35EnHP%#rY& z;QJJ+zaZxT7Ch2-FcyiW7mL56P&!|{J`dtclJc`l3zVxtV~n1k##)jPxq4S&70$(! zPP{${lJC9tt%IX_ok9i@Mbhs?`Y^>S9aN}=dE&}cB{+BMMW2%x=X=$wjN-PyW}MHK zkn;t&Qh8B^(l-y(Haw5XXE!zq7OlMmrZJ*Sqhyt7T-qwrC`;qCU&VHnzBwWYQcQ*R zA`U5LBetF#vqg+>r93;t2vLgJBSz{OWBp0MB*#>Uak(w!*&s&IN-VP^e;$xgvmpmj8P(!#4of+F~a1*J}E|+Jkln`2$RcyDMpxFqe(Hs z*jq8fMm<8nyCLF5@v)nu zF%*ymTlG)`>Nx!`@PJWNEqGvK)ZGybX?E%o!47dfn6E0YE6USX1;!WUR+U$m=hwK$ zXIB^2R=6jY=)-c{Jtxq!rb#WYt;*F0I%hV!U7RPxmFJ3ke}TpXPatLZ;6k1Ta4Ymx zHAVXBKqse8YBzkPfqauNdvJ}d6}ne(iQ>kcTFi^3I;#gGh*2>vrpcZ;PvcyIU_foj zZ|7y4OD$w*$<-O>k`{)RTyNuCHZ4O-u8(mpsb*-&^({~j5DJ=UMh2)7KQdw)CJ2@4 zTM(EiU1%n5p!1Rv8q^Lp4PpUFKE#VlRuoxmWQA#lbU~Pm`1@HPM$NI_`UMxCabtBN zAWvgpa&=P1bduEkZ19$PHZ}Pz?6g2CE0tplC@(WG41QKGQy8pI=9HHi7zPim&J+gg zQ+mqF3=D&B*V`0^Bz+}>78w}J!{8P6F@?dllXcn{k{LRD4ccQ0gZ1mxz6iz;G`M=DP#zy-mrGY#p=aZ9#i`LM6iI3O1XTjz~0mBxwVtrp{LChAn9ZhEs_jY3?u zU#?hRb9{{v@pAl3!r0qjC7uTQ0#IgZvmKPial#1)-CB_kPvFucKLjaBqV24M-7f`2 z9ET%OlJsdUlsCnnj`9(r-3^2NO%vfKh;hyke3#}Ua-%cXL#YaM&!R9 za!W@%0QgA<=OIeIjqynGGo~*dbbFzmar?{?<(w|gTMB`v6mly8ZfTq#9-dw0!Omdd zr(8)#swlS-z)KVBTaKp#UY*E?#J|L^vVAfz?iIi%wU8%R!J0HxkrU64b zWDt+s6r|*$etN6+Peyr;tL0|oWK9lfB0Ui}1x3Be0FE5sOaVU5A4z94^xGf&DBHVr zdvH9L%Ng1`2QVoY>QfEw(?q!>i29Zy#wGon3a+M!av*=Ms8xV==O63GA&^HYrTxF9 zsFB=UOTH(7qj+$c2fji8JrC`Yf|iPc^n#%Y;W!2y3Y~C8YlO65oQ2d%$hZu0ib6jK zMcZY9;u@q>A)U6PRJ0$))P=Uxmr|2aL*^?On3!rP69(8J==qVLsTz7nO4Bf>j{yI9 zXaQeft^tMhC^HA8vhPIF*tthp6@M++HxDt)92cwoL{!l=w1y}ii zn*{3i3$@q+(#30vt8?^F>?LLn7?I3BXu~nsbn!hW1T?e6*mE-@5dL7D>DZ zV+O^!YY`+{1WgVGH-0D;hqQD^m)I&%vmVeY5&Y0Tw8hl^4CuWc?Gu9ML~s=aJhb^a zkV`aDs!`fcoc*%KG^CV(mLxz4K%ktnL2o+nRzjvcFdhpkeE@eh=+6MG3SjpD_G*MA z@Q{Yyp9ie9fKY(kI6z2*WYQ65NzMkVk(L5ZW&YS&-jH1_AS9#QER^>H{3(zG`xL2& z08Hw0Js|PqA?;WpASHn(9dJe?*B7-@fISVVd60e>V0i#aB`_y~@(@r|ho>q$d5KWd zPEzQ}zm0ClD35aRWYCs^cH(R<7PN#zlYB**t3@rG0?rD+H{T{(9by*%K~09NuJ3R-i(XEE-cfWQOX zC3toP7nLX-0qGQ=v@1%bf^!d)(V^B<=#xLn)`P-Cz~ss7Li{Fzb8qBxY$*jT*(f&$ zbvXY?0rjp(<(#Az&;vk+4tb@BX;H%im`Tx8P)<$q0u4z>X`lYpqkb$X$p$oo{?R7O z`WKHUN&j+?O1nmXi)81dd7WYYo(pMckuTe$XuysJWz>Cs4_fL6?bka)$w**M6#J7O zV3DQcVn9trzCTi8fF%#l<*3U(KSyli z2*l&TQ65sYC`(=Bng2r6nTirz!|9RkgPOD@8Dd%LP8d?_L02qNt06x>P%sUA=|Cs@ zt`4+v4#U0?j&kwn72c4DAJRMkGey*6Eox9QUZ{}_dUF6HMU?Oq)Z!>v21-HzI~-Uz zd!RP5HQ6GxXkm!^fmTmo;4F!6E+hj(y;v$3sg$}h&d_HdX7^$z>opRTnEI1 z67%biT9iu_H7`t618L+ef_>ftv3y8MT9wf9W=7VBwod z8Q>-g6sMp(`*1Kgn*#cCK?(bO2;h?90$}q&86EPtT1h|&f54^HtOI?NODw3-p;oZi zUcqST3gDsztORehz*mkKc_@WU(?xA87O`_gbOlY6SQW}70X|ze7U`UUv4^ID$}mvE zSvc1aq=0kyI`Euq>rKyRO!GOF}mU@A9=v_z7HDMcz*Yf1Rynydo#Jy6;o z{CWaU9wf~^9u8^8;h8fM>NtCywEE)ML;pk?q!b58x!|=@EXi_VfFk#aaBw6)x92X8 z57WR!3FxO^Nx2?Q2UdA}kn98dc@g;J>X%p~Dv8-JPio&d)fSEZB7ByLKnuw8Daj8- z=}!T@vfqLIgZmKje5#=Xa*aTnl%i%j+DOJ31`U+g=NysRqBRWdEtl`O-J^fToF9RB zIk%4&^+BSc261D0s56qa<~p+$+LeSl0nl7|Udfpl$IM*lk>r1z1?d$5YAQs>Xa(ZjfBWBA@6caG zUx=*FTn`!6U&Y{y>%RZa`YRe%fPPf^8FOGoImXkE#(9P0AESS<4Cz`?-Z7A9>-B=< z$6|lwe2DUv{cZmVxTKF3ivGHR$5H*x(192K{3H%N6OB{jHI(&RlPofPSuT zxV{Ys9ImfrKXoSbLH5JZ-^B8KVUzw{!#bUQXzCO95&e)8+RCo|x&Hr8tpEQP`h#I# zVGjC->HdH+{(JT(WIg1(!g!w|0{W%&8@JDYZoCik=RU&k*+)>yU9xlBe-FcWq})F; z_}jRSmB%lZYkPl28mA2VHvIYkSNev16^?J*CrN;1lJ<3Im$@=1X&x6do9!UiscGVP zNWZ1zKa}?$Bs)jn1=qLHuzvEqgg)75V3qd|==I<@YuMK)wu*=IPQyM&NAM(pZu(T@ zb2)}}zck*r=8OIS?t94lT`VW>Z%K4Z`!8Ipas%v~OzMF4p+% zsGJS@oG!LA_pT||WRVZ^{7QZ(%lnJJSKrvKDQGk9<8qHi+Ak#k8D`q04yp8u(fi5$ zp>mYtexmk2s1LXW5Ha0QqoVrvNP& zk5;7@vLp7v&_7MjNph7d+ijj-p!_q?9-JppU!?Jh`$Rm8=LyVFkZCFM=rfec%jZx! zs*i?qoTlgZEXj}KQ8jQ|J6Fharpj|OZJ+y_1-?z;(l@7sBk7eS7f-BXxOZWC{%tdcuRl;o#eLM?m%y)a}OMs4;(ntUP@6{Xm{Drh0jThmV@7I!?YQ%Af z=VWN-J2sw4<0tL5@p%}oZwtlsD9<0!AHn`^@B?vODW&s_1Nk&hpT1SQ(Vu2~u2`~P zvb|KcS0ei7R5SX)eY_0xPo=`_c}2M-zL%2>8q;ot)M_BPj{`8Mqid&&QJUWI;E#YjPrNW zRM%ve#-1*K;6arsPnu5>=p%8)Z+AKieyQ9ky z_`>3&2z*!PGgRW&w^*n7pE8?`^KH_WHxV`A4Y~RfVKUMd-2B&_KfZrP#b@Dx{^I{6 zH}}REv)j6?@o#hb-P7ybhd4aA8vMKcw1xb?hCsS0pC|;rkD7$Q_lw9sX`YEdekljO zy;^}lIW!rngOc_XY-gE{;%_I|b{G32 zn(IIhbK68T)ZkFx#SAa=Qasj7yWSEaD2= zB{%s{=9FKH4wEqGg2Df@^G7_Z_6Jza=J(Ytxck>maVrzmhoe{8aTY1Zp0m_A2icoz z<~G&A0)9Uh!GvW%a71ms!oG|QYl2^ekzv{=@O|oj524diM2Kf6xGSW$a_uI=E=!f4 zIg3bO$5(WfGb^_`gxo15#oMpf$47WtLyzvRPT|RF#dv5BQW(%q&xCQhY>pjn1d}Y# zX)j8tK*Ya&6gR0#=I*BK(7EaYTc_$NW&a~}w|bK@LcR{K2pyHKc!_l};%!8pAIaJq zX0fFakOx!vIXmOANk3LF=6bO3HuUH9h!oo zi1n>UM<*g_FJuGg5MJISnh{;hf@+Lx?!IV8(ww7?kFh2?%^Rf^er!tb_DPpXkj}4t zfeQQj3JxOoH>(%&-|D{w-uVbLYB-?P>pB=)IWjW*{QaM`{~tE-|LS@~+@wq|6Z`-` zyiI8P@z6#bCa1oYD1Q~125E!b=52js9vT1bjS*!LTLbqb)7Jt9+#1vH&Er&y(wzqGi_g<_5L3sPMyfFqA?Y`p0WT^Ztw>({6 z?P#J(k1%yej*zTJc9gwhQZ)991%LaLlBc4;Vmh?k`1IcRLj70k@?EUyFKDe!CXg2y zi#Y7Gge;ok?7Dflu=s_r5&Ml9QQ~V@IB(Cmr*~a)Un~2=&~PLJHNbRtIi0n@G}aNF zR&$L9E4eO1*HME5hUDVrtjsieCK>{+z&5F_0*1^)i*4C22 zz}!Ylo<|B?0NLBo=UrGc3ygo{bdR2c1!{dvid2_im0T4^D08)VC~h1$DM<#E8L=7V zrWj!ToRDg9aS86h1Wkl>aIB;GeJj`HjS_Nb06}d^qkM>+wCff~b%6R9bAdf=4m0Wx zDSF5OXh?4qi1#km!RCl?XH|ZfE>lKVZYmhiV>fd8S5y4M8*%!tro6NoX9$Y@yrr~d zHwOf%>?0A*n<>qh?EdjZw##}c`KEGxC^adjr8qu_u;N4$-;!e~qa5LrsYDWXm>|i* z6tv!rPS2Wr3{l3xGfXorxz&K*Gw`Td6hf{~M4Ih13paF2kavZVTi1X_Ec`ScWiXQG zP27a~T}qIZ{NX^l%x;rRu#ZmzJ<`Uwq)7_@unDtG&8_y#X6ZrF_hh~&@bS?xMJcP~ zKtkH0qII<*OJog|xN~u-A{qQ)DiepmNH~W;ImegRo4eAclHOdWHxp(8Vjalye~w-s z#E$dufl`?U2LZtafdK_d<<9}^zh(2E!3-3*bOAp5fBPtl7m(^>LJk7>zY=fT%v7|5 zTD4%I+?3F|MIO{4tdNnjmfoztg>BGOTaaS(a~E3ka@S{Hv0bQ<4K)i;<;KD>-H@g# zuf*_O;algn3(og0UfB>hCy25Ad{jCPSC=+JZeSYYjpYV)N*=hZsi-(w8#JpP zr6#^5QNOJ4xjs+wo`8y!zZ@(KMPN=sZ|diSMP2y&ICx!Rf-2DLFvA2n#LOoJq9bzs zF%;^i+pDW{-N2YU8hXE0?D6L~w#9vz9oqgnC=QhIZK>(+^sqwS=?ykxkRcDQZ5i|i z=Ro!Or*gT%i0PyN6^0T51OywXHGi4BqnWX>lOy9F4f9Xi_lj?o1Q$RLo;)UhJC42J zCea2Nh)qW)Oe-KRzGoSF2g2J;gFQWwQq-Opq({l?U*1rybVYsjWa{ueCNfSsvx)$ZMI0l7H>4GB;_nL^+Z}`VU5#Tv#e?{ zyS;|SwlnG`#(Po~v2O@XA8I}XAU((*&B4bWVazVvf)-y;6+Gp|Pi6%zqJ!pmd)bT@#NHY*4l{7@ zjg-YK?*-2BGUnev64xiE1|O${V;feU*?G8nQJ=aFswV}`bblUOTHj?pxqNj=w-I#545yZ$ zvH!7m(BUIz&C_{OVa?saJ$EoSMF^DDKc(4?h1}r>4FWPi^wYihLz+%z#@5D+fBZ84 zbZ?H;$3MRn!|Y_-_BDH8I4ad73^2}7j~iqEd}1RIO1mPWP1dL%V9H2VmY)R{6ax-{)f2 z;ASGagS+{+80)>m3FG25`*+clc?zSqfx^S^@<|&}odZ3c-beAB--V%BdYEtcc~PK? zw!{<|kVcG|gY?O4s>o$FS~<6v zNAvyYJ`q~3Lv$JO`x)O{T#G4!~pr)Bx*)EWCVAoV(MMG z*hd3;QCS>9P<~MAII{C?kFY&6So!Z_2^n^q_^~1oevQI!(3rG}l|>UjJgI)+=QEGa z1&@F??}tkX%_Qz0L{pSWs@YnHEbjnye|)%3Pv&cPzrRdZtz2t+Qe|&cF&@UxHPRn> zK3`pbeICbJ1MvAg4ko{f7p@J!p}n4d@%6qPtpq3{(K38IzbPeLd%o&jseHXzxz+tp z=gRqk`3D-R;g8k9y){zIUNHQp^EaIteZWaDjvr;;rHEi;2y9K4^swi;%DRUVuOG1tXACKVv*gIE3@}z)}Qu2J& zO@*cCDv?WKkuYJn484P5z#{cP)V#M$sPTd!%g)ED<||%G9VC=O35xAL*nGkLn7)nV z`p-m7%QfTYXqAISmh<3VCZzhBM2AFt-E%Bk_;qnqF2xzyDQQrAf-DZPc$N2aeF#Ai zvW!f|Yj(jwcEj8oRXV)~w?w^7t3DqWsw21KGFqGnhzCAyf6BZeMQ7ArWR@U7do1P% zLKayQqR>7wGyx!ABl_?U>$vg}FFM?YpmiIHS5SN<;h%d?dwRBmKUQ{!UwMG1ubOml zu{ctpEt4@ltw~=t#oLpT>&Vu-==oF}AP`MYpN^Fe5RFkrtfs9xCE-$;_w^Dz<26Mvh zZdX0(g;B4UYKL=et7zWo{Cv#3m@G?=i?x_c6u?Li9=4j`UTIAcs)2?M$|^9W*EB;P zs&>jKtfW0}KwJ3z`&wPl+id2Vhs+c*LMxw!<*c*L*&7a*-a&J>@8RVM&ciCEx{{pV zE$nW7yg;v{fohN=F)PKbW;n0TYq`FAdG%JUwPn=3WqJG%b)+*8La^16JS?NaE5oQ* z$2hfAG#ho5Aok_H&okOqM*^a8EDeF4WkXxx znOvxn9J~z4Z5_+hGft61pZAsIFDaU^PsfUK8Sp3Uqp!(lhp8%3?nW~#o(1*U7>DlA9mLC7Sti{+Gu{p zQ@#a*?9NvCXdm`dN(mbOs- z`mZxK?-1Sr7%&*=hz|mS@aK%}=;Ur??D%uhHK(p^g|3YLV&Zx_0R~npC#ha_71Hp8 zvCnX30_!gjk0C+yqnolzyXYbr3WgimVw3e;!f<0a5Sl=|>S7~+VgMW7-q1@vpgfG~ zn^`g0d?<;@)5PlPsf#I>tKP$=m5a;nDCauoZH6O&hxJhfo^Sbv1mws=)ZJ%xlNyTS z0?v&&WDlR?QyzAg0Q?1>4wGjfonnN&-d=|)k7Y-6H3H`a!2n7I$)m?;Pl%I1tGtTE zelWg@9Xc_NC22Nyiye_Lgzy;)>aLu5_cJlhfYh8F8hy(=n8V)`L=c&){mwuJF}T3d8p*;R6?hD z#0?u!%cK1gxA1xj?Yj6Bs1iP{VBX10qux>;XTo|{A}M?;nLA?xtt+iqvPM=XZ9Bvg zT}(!k5`t$2!ngp|CReo!J)9G59P#2vYmQg>4S@lHig4Fg~9?e2;b)w{SjOGxWuw#8zyYRZE&a#aS%d&moeIllrRiz7z; z$2ty%xCu#kzm4P*O@6Bn4T_7dA0v;pKxver4N}54(2qq@KNdpQGd?GLzg*;go~W{$w*q>~p~}fko=g6eEs@@c~SNg#z_W zZmz6AmXS0Wsz6Y^A6dFIS!SA%5YaFunnX0kFhrcyyI65`+Oj#a^e=2&3bU}8eq;m3 z;sUf+5OLVGvjr00Ac1YIWV{tT8MBFLJ;g8-@IR?k#HLnk`07LcQtY*ezA{l5GgnO$ zRT(1inL_ROGNhlaRJ;}JPnt-IB|LG15gCmJgnnaH_K89D@tGP~s=7yXX?5|@f$S## zZ<=f(y!2dtJ-Av-luGgf0Z^#?on7xI^OvFz_>xjY`DVTW;OA9L`AjYiolTJy&4@{L(NrJf8Q7pl+&z1LgAlG{ z5T-W8Me>;aF2|%^(72<9hi(u?ORlS;vK1oBS^?NJ{udGB;ZvJkC*(oFqIVLErD_SN zrt(}O>D||}B$4$7hBQe!Gl;=&q-!|}t73Gk6rjxxil$W@C28qtX~R5s<;vosw3#8} zBA%77jc{q^HBw1$t)B23jbUEKgGnmOxI zJ)oG?AU@Ry8zCTp=Q44yw|z8@~dCsymVUG&9JcIJ#p`u zqC5@vbn#gdhY33BJH;Roadu+rhtRAz)3LfThP$b6S)}8ov*;nQZ!B-MXRASBukyT$yJ%v&dTMv0)k*OC8f&~q!pDKT{`B|u{B(0=eNagPr&w+mMKUwI~*%|W#$hv$W8%h6U z;mPh&TqjHyeOGtk0hb-mM0C@p5+d|N(Ws3N{a3thdhx?kG+`-qoDCKCa>5jFtx^Am(zq}>J3?Aj&<9qa-6bf!8 z?(O2poS0k8tbO8nt_O4`>7FiRQHbrivrd{|LoezF3aDSwmcicj&0UiSG8Cy#VuVXL zE88bS=7c6)pc*L;3GMshsL zuges-&jDIwLh*81V&`1wi z&&(_anFFPAY`rXLvo3X8M>;Eh`4}0K zP%K7*gQN^(O91cag5xm$MAWGH=iVjH?9!X<;M>4E5lMFUg8!_tXq-!l(hkThPabUi6G^)Ckr;)D4V0fhsXD_}s> zKpl)x08v)7+sNBk$*}7AtSe;vW7(4eV z7z#L(H%$<|uU+KSQK$=Ldja3)bM z83JuuI?Y0y&s+QcXvmgz)KjXp3GmEI@8OVv#)y+?#IWv=*IBdGXvkyZ+`e> zAoIv49cl=*4*E3MFld`mY2GaMEn*!bp;LX>&`fvhh1NmQQml9TSbKSHJWSn>%=Dsx z-0=1BNp*wm{rdv_o7e{1a7@4yFIFX)H~wTD=XS{uqx6@ku(UuC52yn2XSg$97%<_o zuP!!)$$oz#<~#K3r`H6|O>u$tD97X@KTha)}!yvU_73ArMXXc{V+e=nLfbs$~cdd5UDF zxFMt?boTmYLF;IdRercfR)D{hni1@|LwXPDHGet{<4r!h;_no*jNF53qU=W5T1shU z;t>?b%yo>Rjmy<5n!c#M)0}>VA;YqR+XTR2Ki$zf?rc^HS)$*=S;_NrWnO-NU!@A? zPI@NVRkxwzlB!=y>a%Azlx<9SsGVz*+3;zbra>#m5Q?$z%q6PHo&N|zMe$=9-63z> ziWY`I-r^Ub#-1$s7_#gV_4J!A7SazjQyJNxH*KSM?d_uWI|muNtLr*!H|rU0Bn->J zct*~SvUgC*y{{OFZ9$@;-N>p73=G28;Ig*6Rf43TsU_Mt$uuf+d_I+Q%h!p8>x3*; z7*n_CdGPNNKYI?jWy{7M*-m91nCY_gUb+<=h{M2%1b|akDU)+Sf zQF*w;&3f)L9C64HjNS{BG~wB=R~x!_-}i~?bUU>4Tku9*FFq_flCbHi!;H4+;7=06Ms9s7F;tq51;RywaX!Mb_lxQI8igl>7Dj; zSSKCh0QZez8C~F5pprR5uom->ClOvpnFS16nPe9 zb5f^*+XJHd$R|Z7O(H-N*8$T&0m9ozAzE$LO@b7oYELtZv*eIyGuZ%b(kqAr^%%Mz zN^YWZpB%y_q_dK<&S@HGHZ)io3hHzWFvfKyj`0gk38}G&CP~h5(=8gpNQ*-=d%kJ% zm|?_QKQLMgM^`CCkc#;3>|j3^oWTpA1slT1hTU@Bda7My=^&7O>8XbU;K@>6DXFxG zUL<^TtiYWj--sC`q1`8l<@OUwWIw3WEWoH?s^ zc~I-|vO_epl%MTZxO$BGhAViMyhVootEeC*!%OH;POR^9Y%L#O-LwJ3i=o@sZttiz z&OP=$D2338FH~_n`+qnO)gSsHI$@ABM)HW+Sr{r# z7hlB74EE`LdBi=j=Tb0o-MMAm|MKk)DQueS)R5@t^}WxBKDE^8RYNCbHyt6LFy@N1 zNpLZYHRvJ0jBKaj{<{zYQq4>8^mWkAn)+ez6O^)w*w{T?=lZj;>K|`pPrKgb@Qk%@ z?gYP3?mu8={c!Df6vr%L9-$>VjS+b_2$y^aJN}sf^QvnFqViaK)5o(t`(Ckr0y{GC z4I3{~RRGPX?H;Yyh+OiSt$}ZdEfm9&th+wE&b+4jY46k<08Bf}UE!x^{8NUeK-qU; zhcf5#k$Lu-jV{REnV!>@&kJC$XnU;7Sa*u&h3+g;!m1k`sS8mX1@GcSLv|Cr%j`g+ zu&MTj#*rtdOwu-y~g3wf>@_4v{t8D;eB@u zmo!-qN4=J|!>%sO7C1&$?iA+>+~(2_%I_Xu@sLXL(XmjTPL4HeToyRi+ElfQaP784 zZX)7BzBj?%K^&#y@?LMrIR|ZO)K)yjG+v;G__a~Ei47(5rX6+LlRH9gz` zQaU^;?6HOrHU;gs_SBOp4cQ5v&bJ2Q2cmbi%X+s}Hcou#4`+5s+;%*P_^_nEJ;gTY_{ zqM0PRQ(Jq6s0T6jFxvOT!z^oDlecCzW-rZGH#x86GUlnaH3PS)zK~_J><(_AU-P1l zFuQGMuz3He$*HN{^=jy;yq>>naz|>%cS%CIeZU-%mjNkjrsAo@zN7S|by?k0B#m&z zMGN7)XXx8of)sMycYzm<{T1{t46V^;tpoP(tEPRgEw4MB+$`^O(cLTV6`Z5LIgJQ~ zQ;x&6-m70$4=Bd>o^Ulk<-3(=@+7Lh>xA-g9|f#qP9hxbp6mGu z?!5~mY7Uhi*4sN<>Mk@8Sk?Fd_}~75Ya1CD&xI{txbINXIj%_m{R!mR=43>6hOyzZ6sq` z|1Ieq?prTD|4M&+-ObsT7t<|KPeq&O+-tL?3wLiVF0S#Drl|+HXP5NJ?@tGZ7uiws z4=#EOd7Moae1co6_qy)V&Z(BROV}QkStr)_(npQgZg%&hlga7!a`l!`CwEO%*S7vC zvtFou!_*x0hF|ZifJ(%eouu1v-^`(k&n&>GT1f>M%Ch0fv3DPB`+ECcR?}WNNK*OP z#moALtL8F3ov?4RcfxjSHR2J53J&#Xv3>+a{K1Wz3|tM8CHR0k7_eCu?-+Jfm+ z7(Q_q0HD7u40`}}Iu{%O%D9v6%8$`L_=VoX$&V}Pk3H$Pg>#mzrf>kyWPC%xi~zJ!_p%6cC~?UEZz$G}6W_DcD$r4PD~JIf(5252YiB@C-`p4tZkr@oMBuJ$g)mdu~dv0*4#68y{VXy7V;d{g#Fw?49kW{2{@YQnZC@eod zW0pr=JAb`d`o`Q0VE4ytkG{T^B$ki`OQUO>RWg8N8ds})b3k1$NKm28{Rlbih+L8F z?oTsj8}WoQLn%B8>BChWdNm)>fkazrPNY8n?%=ev*7+WXgMR7|w&USNum3vZlIO;} z%RICWlVulAcgT;HZl;QZ(kK)%!SIa6*X(4~@Ey$uZ?3&4y7p`1^bUH-DHS8a;t(6@ zrv3C>);n`s_a60iMuIM8n>^h|Kc1&}Q4fsr@FDpLfL^kYhKU?teZd zPFyBqSLt&cHT!(>tXTF(B6Zyy_8(sXV}jxf?+xBLanyTiK3uSejbF`|b$o8|7n@PA zQ0C4X7gE&abEl0u!L%z9as_|pebQ7DG?b*O%PV!$={64&#eaa`P!cM{WM3yrO)d;aIHa zJ@vmt?&7i{5`}>L(**{jQGjKMMz)53FpkmC*1`C%jTHZ@Mg%HZM4Y@-FB5vuI^?U+ zuxENfCP~f-zp*SL1Z}u2ar275G;W+(qp$6w=F|tsqz8u55_=_Yy?r*e8 z#^$01f}3om!~VNC+S`QB+q%!*ZPnRPm#;!?#fpNpkkSB?KD2C_v;O2FA0Yew8KEZI zGq?{1Y90hoFOmOS^YrcP{-=1q!eAM3va(xD$o&9uuaF_vLv$4w<*4a!zRjvoNqRdi z_e=@HAB(Z3q07%#ToOuYV1@1DW1b!_?pDt2)?VKP1hIsOcWbqbkge#n7tuG}y?tHC zS;){Nw+ufC9)tAP^YX}#qHBt8gRn-ysD-8#X^V$71P^RP>~u+DC>n;Y)WuepWCX34 zt-pmKo4kBd=t&|nA<-t#Loj{meJZ=OR)TdhLaw30sN}2&Pwyp z3w9tuA#$d~1cX7^8auntiDrJjuAxUS?k713_K4n`43EP4$DF+Yzt4t-giQWbb`ec3 z+n`@czDisnoO?-fGexwKB7)8UP|Gag8PCvm?Cf~~vu`@J+Pw{k zh3Y<^cjwBeb7mct`w zmNxpbLp#2*U7$00fTkgN8#Hbv{Xgq_O!b9**qzS zAJoJba-im|tI1w%L+r1jc>O)g`tGA)9DU`MKOAGx&=iE#K$g22F3s1U@&uT_b-b&8 zhWt0nb>idn=mGYc3fOxBpymDw=>FOJ|2M4rzjAgUm;Q4EPIiZbF~dV`v~F-Ee9xS4 zLykRgQ^#xzYuc!HVqxt!S4^XNxWSaAr8{PRbN`aR%GXnNnzE$GHl_5ADk92*Pz;=J z4=|63{yDNelRDRrlI6H{Czt1k)mOUnZM4!{VRojl{LF916!1y3lGvk0*?46MEYTm- zUF%5?owaS9Uaa|V^-F|oi3lH{=I&D^FJ9RZ0$lGzPBEEA2vL?-)u^<3yv7omhS;O2 zpxX4@8q-R?!?jpm5Bjdc{YMKx!RUd>r+?pu`JaCNr~Q{rnQ~Hp2l)GruKy7JY;S;* z!k^0YhYtNt`1=-@U!(-UmEvF3@Bi8G@;mY0cVhe^1p%o8ZZ8EA|M}v7?92Ea=l7!f zUq~oGhW+&s|0u!#9p(38?_VhE@P7_0zm-VhmFRa)< zv3|=@|4#pV%Jmoh6)}|pY^}U z|5^k7PW}75`-|R>{l6vhr>gyu%fAjHf1k#FL4knyzW;B2{0a8=Nli}b=ic+5`Ajqr NIN&51#QF2z{{wFRtGxgK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_decimal.xlsm b/pandas/tests/io/data/excel/test_decimal.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..7dd6b1e7da036706e76f0e89c931ae542da83e6e GIT binary patch literal 17971 zcmeHvb9i0d)^BXvcGB2(lO}0w+qP{rw(YdBZ6}S{*fww4_nfci^t|^z&;9#5>yN$G zUTcouv*%a?a}I1731AQu05AXu0000&0FcL{v~oZIfEX|U0Av6NAax;YD+fa>2WveResHmo%+K;AG?u1vmhB-(!TawPMlb8nHjPq7dK?Gwe`dcq)_c@Dl5r75##id zLx1C)_*l$H9OUs4Xy0yu{UTPIBATAy1Xt418v)lIPXX*IgK_W2HUM4SKvbKCLcBt1 zNtG5V?nTG4;2Iq~!o*97cxHn1kNppF*L-bpiJj})$>$zl$ft6_Q)swn%k@!xYZ|fa zA;-=iTV2XyNSmdq?6)u7ornY}&%s6!tyR4#!q?yB^X&~7K<3|+yiSpx`1<{c65PdbfE!4==BBM;;Cm-fl9Nha;hI5!X9c1SLJ%I)YP?*(HkFmjCEQ zbeg@Hy-oS}$(6#XEt;yVu{cL!Xq{Me=0f-@!UU}{79?^3P7oSTimzI~l#an)iHS;Q=gP2#4hHbPDRAy`J%6rN^KJ;mreNw zD#-|&P3Y#kf>geHLil&ixLD9RTicoISzDX`^n&?{>(*)X2wqvWZ@#LoQe8ATAb|>| z(VI#xul0&fm3mjzLVw`XiLw=?u<>ky}v3G3qy&@}nU40(fRz z3O1OiUg9-I6bFCXCu5`r&>I3}lV+i?`$m#lWnVHV$G?Dj%9Y^9T$K7GX1e97Oa1*S%vVI zR`}c-=M@iTtjb26hDZloIg}1@BWltw#38Kc@z|pJ4p0(Jy5R5%s_97@KC@>xFY?<_ zPIm1IaE8=!Q1-BbRzGr>{B$<%PSOY>zS2wX^j3$C!tUE(&l|HS>r2Z)LOH)ycaSQ> zU_oDle(e^E4WYRqWhjxSLd+AdZJa`Q6+DL>p;SkymvZF73{j<-jm_DFkba0OhVGD~nZjR(HFI*=|NM|BNQ^CjA^PuP=*IDdWc2oU3H>5@~I&pTk(h+qg~Ge7L)vCr69D8*pG@mi<3 zpStcjLHjYh;hz#~tyB8;eWq%4nf>$3gMmd)@e16eLCtQCw;k~pF4yG~&m;Ihen(5w zU#M3w;3?tgbA(-v7P#d${_cdxe^OEJgRc0&jRBNnRexC#5>eTSt)US?b(qrdmfTPD zX}rU+9~Z~bDC2cY#?_>XWALVh<=ILe$<-;wLFWHCqHCA=-sMv-OX_w8nXQai)!0wIF7ca_&bF6tYwa}H{#9$~G@R`WKoQPxV?MZ+j>TURS`p&J zsDH2ynu#MsL3cyyQs}zWa0FS12jSyKDG*=wGJFCNGDb6M+c*5&yXMt*6YVUHL~o3W zCCVjn)k*?8%c*nGMcD5{mP_q=`dK=>+h)E0d!&0#4sUdD8NsgiyFC1-U`^6r*t)+9 z6)hA10LJ@>f7n@j6GKA>d%9mIhMxvEJx@Nu%PC5~*^i6a$`o^& zkk%(LWg4iSWYZioH=_Jov>-|-QeE9`C7}~(1n7kG{q=-WTjSKkhK<_lG)rW$JZ#}| zg@Wg={&Km7kKZaIQOlsNc+X?LZ0lTvo#SO1Hs4T_R^+pcqj>okI9+;Vcy%L@8Avye zzmdV#Ef?#d|fI&(mx?9y$i)GHV>g(;n+KU^4B>Fq_m(Bf>r*>?!8K){m?R+Mi! zk-(6~dJdW-NYYH3O$v1|3)jxCXy|~cAN+R0Y!e1luI$3cSN5SL=+#IZE^tBVwPn#8 zw2qc%IErDcUV8eu5rF)wfUFCGV|_shg0FP0;!}~Xf2fe?*HQO`CnV*2pXhFkydkJ` z?b1ZC&=pSW>#T<4IqiwgQYRQ3x`+!Q_HBHhh&2KV_Xfmr9b=FIcx+uwU9Dg4_?N82 z$4goH)1w4b`FZ#taeZNp@kgO=2hSB#Es%}4w0~MM4Seah?tx3TX1ZE5an3AUgt?mB zU!KTo#0^9VY=J}H%Yx&Q9U#aarx#ggR6=Ka5BN(l>x*BBJ>EH!p})!NYteA$-b|kY#5Su$PuSo5odv%(Hyn zQ%I%Eih)N5k;#3bMY4f;Ed(-#xC$8^yK;O^;!z@{Y%I<^)18>_NZi{^dsSd>G$z=Z z(QfvzyUmX~m|D3KP@}A^f4ea1LPu!-3=RO$Pw+ED@JnGGObjgz>3)6w($!cDaPO#x0U)zd`?lQgHt{g6=+mhWrCPR0}CTh3KT=Jwx zEF_c6Bzl{(Q!sl-i^WGE8bVq^H_HNgWTBDf`QVF0*L@2!A#A0&U=8je(n^7qyQ1=^{WAX1|2O`UTFT1gkH)uriOc zIW5X(iX&)iliHd!O}M4ef%Dl4-qk_)FwGkx&{YDIyc#{h8Ae6MNAuYz+Uh;?5+_aOSqCAJ=`<=(!hTY`uj2K6EhG-Br)SJh(9{W4!Iryq z(d!+OtbeA?o@FDT%j9u?JwLb#?%?sh-hb%P{?=4SC1C|lDldOJ8?F8J@`Apm-SK*U zc)@n?o&2J^x8wO?B&p-=g7c<&a|M;I-ShJDAP#-q^JY&v8h2?K2RHegF!bwrv!ZpZ4A?qwX6e$ZC#4)euk{EH@%sFW(v zub?aX5l$G+oExR7Bj@KfnUbq+znpCVNGhs=-)+3AqV(bfv-g^Y9?5(F$=j;&c%^%R@W z@^}~!>%RXEdasa@o&FIMTG?Yc4q*ls-1cyTZALrqu8(AubR@!VVU$;o`ZNI&yPUx{ zSOOhTFk5g>c$&Seu@ZLs(@o(ROffu83z0J1_DhnngS}p4aD*Z;&i>-)9?^?j!aPDa zGT#_h8X)VtUyO(vLAT9Pk1ZpQE%(f}l3MyNI`}Hnz6) zb3vmBMO@E+z!F~lq{+UMuHcpt1t+z^0-j54uku+tML^6bYefoaBaf>Y-^v z?PXmvprgIUbJ7li>0yRRaNBS@h9GP1$E_O#0R@A;>KB6gsv(?Fre<*(pSF<33J;7T zs?>%Yw;kCx5&Dh%*?Abx8PFaKG+O=QDV{Tp!f;R6w+8YKpT^wHBy*+HCbryu&Q;M~ zYxe%_snlpji?_;f+&twb%}PA+hb>z&K61vT=t&qgA;ZftyO$K^4eE$R!sth7&~5G7ivMOi`I~P??fBOv+IcwcUZDw?svC*}2^iY1;`FSP}>_!inG0M=h#N zX^p6a8cz{*i(@KD0hH0BVPgNAD?@kzD6 zIVJczqjZyA6Sf`GVI(}Fxd;(?qc0hm{A4ee)kB>bL<1Euv-tz^K1R>H3s@8JcXjk! z`9>#9@l30GXWc|h^l!n!@thiQpNGL1Vv^nCdp~G@@J@hbp2Eqsq*nGp;8oj7<)_9^ zvqf;=8gyaoA~p?L#gNVXjPLtHzS8Ekv~L}^oMaKFyA3}wx(%3fj}0+jMTL$UJSx;D z+~4kSjKMdp8Z~C;!Ff|>*krynY?>oJikLH8#wj=1AhVk#v~qMxni?NNdNH*t=ZKa& zxfJ-eO?s6Le|Fd&%yiedu#f8SyF=VTFxNJ0*`?6;Z&U?VFP}fyRhrW_!6D7Mero;k z-7_4xi~d8_D5I2W>*t=Zni|>VS|%C@80qh<_m+;L>^v8hhtkEPpz zpx`f;gIHe{=!Bs(@=~pF=tu>>p%Wvk0XK8>2&kbuDdJ>WH84+my0!CRED$j~TvB@! zz3#{3X1m6shwT|6LB>4cIT)F9g*3Iy;n}2JHZMW-aw^f9rmaBm?B4 z--Gqnf&c)pf4N_K2UiP2`=5*CV-0odO;HrDs_$<)s)xi!TpJK7ph(urONG>yrMI&f zLM!)v$GQ?~HywBBUoS>MAcQvQ=@e_Rk6qgBpLWwz^wXvWEECPvx4wM}^C(xxm(&cu zDpnzRFe`s^!oLn`R*Iq54ls=w(0`$1CJ#Agbzr1SUM9?kUa< zW`{0=@8cZ*K{t{+DhL$3Gh94ph(k+-6JHQynd(Ul9j8%+Souv1j0AdQV5}J(b&-=~ zh+7=_QuwRGhww_?s>@uMJ<8(dx)u8AdZmuqI?EXkGeFN6Xpy-rGLFVxm+)F{L1LZm zHq!v^ZHWV;mdwf$7&jV6`y1aLPqmYjf!i&Wo=*TPjZNKVfb)olR$XTkgf9^ZA5}YQ z4lWss7JY+bsMZ)8a6{m?s+PRcS>VjqB_7R6Avn|PZtd!$u?Hf@a4<{h6p6x*q?XM_ zb~qSD2J%1Z8tYqGY^&CvV+p;NV*()8XwZ_pe{P(J6Y$|n+87FyQ2U6Ohg$20l$vj< zK7|V&&X%4Mp>ra%$q2u}Z_c~USCQkeS1l>jfOX?YUa6JvBhGNnN&K%`Pu1 zt$67D`UV~~fr3astpI+emoYZfBWKUHwYM=i8BLgUC8=6shGdLvLQNn5rj=J zgl*M(#vs9% z;iPS!Wpw@a7egewjkR;wvEPHAZ+ENed}G zAUOIMg10td!6*)jOWX?XM6a&xa7pLzkp_N;T_biEIyQWXucdM@=uXL|qo6kDaxmG4 zPV$%K`W)IRLXG**ob=KBSdB2$P3h2#iU!PEb?me{^>Qa*BB;rG zc`|<5pOG|a16)u8Vm$;SB52CpYVOq^65M`Z&awYMj zWsKESp&ys#%%j1T*MNiyLK@acKT@}@04E$F;cYHg}3B2g1GwXds$A^yt3L5Xa;mpty-XJ=irJ^qB0cjMarx7>ck!@`0FGr9lt2oE%0+uwhh{odwzdP;QwukHD`ixHRSXo^`q(=iU+wiXxQb_swgKy0S3O&-wcHaIp>bvon_g@duy1jz&|MNe zXPk=~v^H#;-K(C7fM=@9AE^;#T+hNtRMW{GXq8$ZE{qAqZ0`LK`)QHDo|?mk{yx%o z7*|KZ`M#!MeUJ8`{A1-}qHAYpAn#yjYGwRu0b>z6^|^-tCFo4bBRJ@YN@hR1@FaKU zz^XMBUPgH(6{R>Zb*w1KFjSSjZJTTF`T5P)&h@+JWuEA~syv~av`p^kczwpA^1gmc zdmM#rJA>tndLEU79NdSa<+N#ztN_(5Du;6~m(GO~xKC`SYjDE>!%4V~TRpZb`fL%U zrmF&!RqpAC-#VKQU_LoHF}4fRXmF*Ry33V$%Oq8nNoZGD@R@R{Em!mZV`8$wKBTgYVDK6v>-J#4-Bpy*Tw+lGk1IrA~x zlTncw-WACy%A#9QPCO`tUBB}jEY^5Yj~TQdax)zHnOIL8Mygv=aTK%e@JH^YP9k|6 za}0SXu7ZUKW?~8aLBpau+y3|g_F|viFyInLSqp_{1OAsj6w4yQdQHg9YZ#6PzU8|% zmfE{r);I}1B8Bb|VayQQ?0kXYyP$Qs;b)rSW;yMh!}r+O-%LI%SpLiG`^5L&(|1UJ zO>WcK(VL zVn%tc77jeIcp%ZoZn1A~zG7y>g>cDIhOZc7ePCkusz8MCx(es201_FZy#_jDPX&^xxX^@hdO_c4B zJ*waXZAo56{XxK916XEKMyz_Cde>C`5&7NRcdInc))JeP;Gst9Ag)!6po7Kv4yy6~ z`P2u`2aeAy(KQ)DeKG!fb0+EhY3;5iwb=O5n`sgUd^(>_za3fF!dvU!92mi_e`Z)0 zXU-6wV0?o-{+X}(8zEp5S9$7s7XX&`{Ov!ijf1Y<`#y>8*XggELcI2S5*MX!o&1E0 zprZxD;#EY7Izphd_C)t2lN>#L4r^nST{G8o-7{&Dg?UGJSt8zjlIPi@aXxAOsq!W> z9=G;jo}!UpP~OuLYn88jDyapkXb?O77<(~Qt71w3cK`A4rA31LOnpbd`TcR8{c^$? z{7v5BwvBFmX)rmqKG#8ZE&OYB-6z(bJh@8qmoHxy&hk&EwyO^hI~We~U+R}PmmJg& zuG_5YxNzFO%+Hpk1+Oi{m_^>sEvr8ccv!C|J2%%G(PKLU8nrK4I8?0BgN-&EibqeS zm+yPK_0ef{7GMuzai5s6y}pQ-ei2oS*`)tWX{0+!UqsTcAdFZM8((Pn2<_T-ykYYh z_S8JjY;_3DH4?ABg4;vQ#bDMb@=`}pWRT0NvZV*@vEwYBXC+d=*CO2_cw%?pB<}$b zn16D%V1+~eV2<)=4EGgrt|L`6pZ=us2)v55WvxOMhK~K4s@(&X_?6 zBghJyL?++Gd-X3gQuc7N$8NBD$)91xKMzS}%49~Vn9j1Y$*)8@$Q`rkqvp(ee2lVr zCZfYhH>QasP7h=6r5R)#?IG!`9rgc$&Y@4kwdM250Ui&HlReJ9fHc6~NQa}?m&Z6E zgaHefM_3B9mk!DfOPmTN02(^Zk?lPWxB~=U%mmlm+offD-iYMdZ6h8AQlr4nC_rYD ziToL=pQ$UnhWmd8M%jb{_us{T9JNQ1;in+h(n%bk9IyGJM^p;2S&WpKu?P5K4@ZH{ z&YximB7t*}o<~7Re$JX6-`X2706AW!moA@89T+0^C4J}jz%Ma9SszBvTX`E!Cri`3 z^fNz1KY5_=m5QD>n>V5Xtb>czGA< z|1|m=vWTu7?{@F*-NEAhW%q0y4eeYN{*wFO*S~~6B2GQ}y@DZdo%)84paZ#^)K^qO z3DWetXNl5m<)aD*y^o@Hv87UI>FXWbq!X}3e!H#FGj8;Fa?4ZA#yGS|PrPJkhYAw7 zK_wJfSv)r%?rH#gznrQ}B}x7^P|%9?dPmFClwUjd6f!vYhkS8yOUs%I?znk^V%1_W z5~oiEpmh|EJ1L^TA0%OQK=1r^>`owuNicC(iQe#$1#a%@ZS6_1%oam!jp*9YZH3!i zi}R;f6(~8(yOqznaszN5W^wWW%s1)Zsbq2;glO^{0hK#@)GE*Jmr z<2cn+o=O}o=&H!!T3KHWt!Y?T+If&Hm%;`#-r5g>DtnNMuiZHFj>Pas`${|RDgzsp zN-5Qqh3oC;+v`Qy)9RS!`pt^8)#1|VT;xh%ixco2{#{LYv$jUSdX@{5&4#KFcI17*!`n_k` z1#|~GcN6G*$OH$mH^ZoBKQH);@wru%hGJ4(OFVT;l@%DNYElik>C@@CCqVw!76--m z5V}+nn_Jas4oS+9Zs7{;+ogxxsA+Su1V(|QNh+f^_(GBl-j?$QmX2g2Z2xT6itj7! z`m$BDdD>K&gV(nAd`4rwtPLy=lZ{~)bKtN#8;Et_9g|6ue=M3^_|+@e`0%8XF6?9Z z6ke^tDxx`Q>c>%D`vIFdTV?(x;72$VJSJ}-eDQkRqHu^e&WLu~r_tdSBz>o*BR?K6MbP~r#&1KdTqY<5mR>d7bUtm7 zFl_ghO&|7)tF^R!n4DOu!`_%{S%3TpTcGjuX)vG$f}=pAx4c@FV-3$HVxQMtb= zN(O%uGbI=-X=bbRo6+dHf2@3V;NzX4$XnRm_PyD@!UoBNe%Xcq#MidLoF0GDFDps~?E{`sWI1R9F!ldqQ ztQdM> z{JcLJOuXp(UkVNkqn`>FFMG?SQycVUs$$gm2L>GJK`WGZ39WMM0DE)$&=;RMo%c4r z9}b~As+2)ZkDYr5psgm~B{SveaK_O;)?=V*H&u1_@y^5A znKKo3-*J>Be}iE0YXF@rDm3NEWW!hlT!V1qY8^|h;x*uLnv4;mpZl`xcLs{Iu3Iq8 z-@MZvZe-|_#e%)o4;Gcv5!h>ZMY$2S3Acb05vqU`y9M*vNE3-iGlG&$MlPpO|rqO2z$gmfwd?wUl>VCi>ar!t5>mRlPXITM6msyO04lIjQTqT9B^@FJ;uv=ZQ{tewe^FS$1j;wN8U3 z2XpfPy3Z0tZYtq7o#t$b_g4sL_TDFvU*Z)`%&@S3u${H6lkQA4@QOczaN#{0Bs%e| z;l#f)bmRq=Jq{&IA>M9)&qr&8JLB;JqwsY!7}Hv}rWWupQ4wVWaErf@y+!cfqW#{# z!5jg<#+Tpe44AcCwJm-YE^jT{>^Pwc=jP`@1*GZdpti;q_DW83I?NlTd|C1UzMbm7 z#cNYqF6+bO4`xMIP#$uPR)M*V8u8Y+UPgH5dF2$={)4m?^Gb6X+&l6?v*2UqlKYWV zxs{(&LUNqO5OqQ*rBS`gMCbajH)_m0Shdqbl}GkaBQSG=ugEn`m3nfP{4ROs?Nk?> zzQmwg4~>Xx;In(iML6oc^8P;Toh`-D${xCuMSh55!`DjSwWLn%YAQnbXG}LQKn258 z+oxl7)UNH0_Wk9%%FJ6}&0yToHguj%0aaD!k4QYq z>j{~ft^APQc0QTR7r9{`$AQh6%mWtBl{<+m1)AaMH0;u*%B=Nc1SxJoAMk{$MqI= zV(OWBqj@?v>kM|P^c_}THwLezZ5jPV@i)K|g3Gns`Q;-?j$||zg`w0Z5rc;rTxQTb zxt|d#uPxExZl^#Ws#guQXuOjQqyn@TOXf$4cgV@n0%y>eK8?z!%WCe;Gu-uR&(Q># z^6c;_a*4mi*NNjRGQCuTFB59jBj%g(KjH0an>5#9~lO+y<3uW6A?V0aBKD?*T$ zTYoAy%?=)>&Yur!Rtrx55r-FaZ+1_rXbk(D{=7V2pjnD0qoII1`(sJ_k#A8mgzYr#)2a}gjYxIGOhqefQ5n5G&vF{6a;xd&sq=~s6 z`L`CPE56l&7cT`k^ZiSE*VgBTO}}I1o85$ps^_-NfH|SMi_(?0EaVr=xTh>~W&9#R zSNLqv1h2;NF#p&)z)bPSDW;C&m#@*`H|~5N+9vM{`|uy4nbkILCg$f|w^KJ{xqRC#;`J{wczpzm`mAyWRfhWgh3=xj%ja1sy9f(? zzo1CHEsv{AQSk%Vd22`fn=-e&5*^nV?MZF;_wD&MJdb$qv!eBh4mXjoXxN%EYNb5m z2hl#W4~84g9<^(wAL8$xG40M&Tb0`LIF;S-w+j^2J8;-hv#wf|TNT#wQa^i%J?1x_ zGkD5zC-TfTdaMsUksrvX?3G%3JVVd#Jnh@h&Nb=0jOyEjNAEp&j?j51t)uW!jXV;B zfhTY$d}O&-!n?82EKGDSp|c%!6NhKk-g@Kqlv#;scYpF4!!x2AHWx0;X(f(evwmRM z**_$qIFrMpd#mI;U}=AL^Bj++e4|#;Xlt&*)|z*ovNwOF3q4Njf7-jhx{0WQ_P{M` zm|~UBUSgyf}enN6$KP!=_xqO;;xebiIjvL`gp7;>9+A?+LFYus#N_yc+Gw;H#+ zwfHJ+g=jSGizBVi`r$&Q@GNKg7o!8xSxLdJ<%i3avw5o7W?AC5c(=X^%8z>kg+g?F z8Mkqc@-OwJ@N3?nua{r(aMZ>w6!>bRm4)K)kA`9vXBIZCFNQe17%aoM^Q^TdP^-Ys z3)ZSHDe>pu=iAa+&&eh7dLBQXX?P=jb41zEX2@H3pWWQ%m!3e!Eq$in7A^Ski!zvF z@}b$oM|^8)2Q|;<8Ly(_kGGYre1%hyUNXZ} zp$*4xXZ&Zl9tPz^Zmm|*^G9d=RgYGOex-6x!s4qH)gNrUVQiL@QpytRTAF!T^+;LZ|DB& zY_zZMpUUm{H~!W;wxDIfvsqq4(Ff0+X88P1wO#Fr>&hjxFQt#s0(eF3+daHTcl%X* zr)9=TPqGttTXo(@w<*d`?T}{rJ1v{k`}*eIayMyb^QIkaMh^wI=J5wqZ^|7Sp8F(> zvTsVR2io)6uWyITPoDipJ6@%H9V&G%3$tED>(Mat9j(W}+I%$~7fY`qGTx7`CS#Ag z?Qb+Mldpmc@UEqM@OQG$?IrW!)~g-r{;@iIa!=k9C(9jsFrsf>>%0pc^WyRuiP&EV z54e@0)@iW3#o>3CoL*{ry6)S zsWe|5#P9P*BW$u}v%+8p416-aWjr(4lv$_t>rLdy8na3`R5`#I_s<#`I#n;>+8n~1 z(XK;UJi?iyiEy&SeXCR%^etxhs2(|*;0m~(;ZNsz#T z>7$*Yz4DER-?ZcL%#y3j2^?O;2v7B7O_;WY8^O~eT*8?5u9 z*=g}2NQB|h7K3N9^XQm>N4H!eaLI!!7%_QvGsYtGCr4M&@gHP?79$aSTT-Fc87VU< zW%?#nLOpoIoTEJ3k>lQwhluv*tx{dOzSbEm8GMt+ecu88jKpL7$4*hk&f3QQ|7*Kw5G^Iq`?D^$`!$&Uj`aZCQFDp5 zaF@Jm=O3jSt@kXAr+yW z)Z>$ujGXn$XRi(lTr1n~dbg{IKCA%Zq=|B>U3 z&^!OM-{Xsj@5S&2*80B~N2hOXXZVjE$^X{CzYAGJtgJ*2Jxbs@=$p`xds^YDu;8-q za3&6f63V)uT?1$uGs%?I+gj~va~Lm#>ktq5I?r9Ko}-M^HR#-sQc?&BWf=dJl2{w{ zltf#Uwd^+cj2}xy5E5=zj-sQdqK%NN`H!J`u=1phT>j}OJNewUyCA*edps0-ahrVt zEA_Huc;%L!pbPkV;Piq6Sfz~=oy;G27-^v)pT)Y!^B-!*01$A5fT3!glSx3dTk*>6 zym>L=Q{=_HnRN^0S2)zqWz$!{$;tKS9mSL283euj)>)$`dWye(f2m~8E=5SUgC1?D zH!}cN6@BN~`ZfFW%bSqK9A(ZTV>;#qOfl5+k%YqfJNy1K&O@@N_Ym|hc_8n) zjqtCMr)y*LU%~qw*h-I;kluP1yt|*xsqTj;q7aHvQz5}BFu8K-DpB0yQu;p@V$Mj- z_U0#~h(1A;8#&{D9LS2UsC15;;1t`}qv-WW42SWHEP!4Xw3xdyRwfc+%wgsQqv*wx z(Kwnxu9N&4C=5rhAMO)a9J&-3Z(?o&Gwy=xBH#BNW0Td=u$;>H2kTM;7|_racSn3R zF$8J6@ky6VL{xax>wIlQ{a(-BO}n8^g;#xr^Blsu93N*IQGeLCmJHCUZHJO%E702y z97{(UL#Aa)b1sd#`kI*0+1msBJU$0P>R3rKWl?CjDq)mDcS-tO=!^LNlUpdcOhYOB z5wj!xM#Y}t@z)ZPRVu&;R6~BY?D6s^wN8B39tbwJG{>-_2wFJAmAI~*MP{Z*olAx;^P3JG>5L(GyxE0ElNGLe2g}Q@%-8Rl+ z&!nu(kn3gI&D&`o@4WE%>N2&*FT>Nb>=@-Qx1J9FKL7jNhy2>{{)(928p=MhAqELm=%cKEd^ii81MSRN_OPI*;|IRK)msNX4!7?2Clv2<$9vb?-|^YM z=KeoX+W)pty>scGN5Di^7!U&t*hb3+SNuxGxC=tefr~0yYiQ#}odY9NpQ(H*#p5lS zG&RjJ1LVVN{wiO0`DyZ^JoBUi3q?ef8~#UNzP+x)Soa$46<>`;~AqlRh{dI4;?kF9bPT@?{rIrtO@WR!M;DF ze7byNh4pv77db_vAI3*qT2-OY==KkzZvjeh5o1Y1_b)> zo`3xN|5y0uJpXzAn}1u7k@$CjfA8J?C*jZY&bv$aOPGGi(4T~V_Ot#bg?ex1_@le^ zPvU>?{`*Y|0D$@4w)KDS1^g4|&t|XRNOmy)^CSMF_3KZRKRZx7PX`zezO+{v!Req~%Y9KT9)yBMcJ$$%DTw)c6zd&vJ*~ zfPF;20RJd{_>=z6O!;s6VUl0;|FF`(?em`qetD$Kv1?hrJ%dvJGmcL?qhJa}+fxVs0J;O?%EWS@JpPtJaCjQ9KA z{?T2lyXH5#s;5*9@IjjSDY zmE3HN9JJ|Ntt<(1-+@r%0D-*q|KH<(@C=kC4qGoVAd23CJs^NI1c??!5K_>X#MTd! z325bL2IFc5-cy-m%==^$WnzHz3+Y*jNNg>8h{vkd441o%@*37MI&PF@hJ*V@_XR7H zztDNh(uoA*`^(dT@AbgTX^pjer_wq@RsA^d&D;x{LZFntT}P{D6kN|T8qq=G-$|5~ ztel13y4}ChI>3YkktsF}y+0zSneCImv>2|uCB?!G-!bQ}a&;0a*eX-$$-m%BaiNPR z4@sgv#5}UuLsZ;VYh;h!6|^1ELGYm z(s~X6p)G_R(nAnft*f(Yt^Sev80v!;SQYq=D5Cy6P0Y!Z0Y9Kp-ksh^tgD~o|mD;3xJs}HDK!@MwH zHAd#5#1r;|&F9?L-OSo|FJ%3JMFD_Nblw5OAMXd@--Gl^uWyGD*hBQa67y58Bs{eMNqtjNXgq*Vi}<>DvuOrrJWoXQ3*Q$ z^n*{U_tVhQ3QyF*0MX3`OIZXmDmO{JOL=hegPjuu6`6gKm|fYA9wg`4>)D&s50Y*a z&aE+2rH#d3qz2bW#HPc&#j9`j?LURY;^5u<)|-G- zy;(klx0!LZq<68gxA^sk;@dlTLsqN?T2t#@2t;@M#qZ z*E&Ki?K7@7D=v9C_XYx zoN@nQfH_HAGBA!tG-f2{bhA=7dQNg}jmqg1*q4QJsGK&QkU@P6`V+j;AgP*)Bt}n{ z!WvlNOr%^qvoQ?PRW}1Xbk}=xB;;p&*gB2#NEu}nH0ecj5oSS}33g6;nGhAjQ$Om^ zwn|BUMNufk1})pid0*nnfw=ojrL<*e$d7n98UZB04 z_pKQG@Su?{3Ua5r>t5pB0yPjP=679^{RD<+?RveHdK^zI*x+WY8WMzV1R@bSjn|WR zuDxI_)&+;L)lV|es+4{_f|0y=%S>sx!fGX~OA8aqZW?yL-(MIG2y*qFh<6>1y8tUt08df8Ztnx$FN5&M8oagre& z8t_)`_R?zmD9prniJ}pQQr^aFSsx)Mx&hD`XjDP=a-NV+>zxY5T%E&3wwMUH)B9ie zu|#6|Bcs7z1{X{>Lb?@sLQU*YqtBS%!_ezo-Jfdtp`75 z`>-Elbo4SAORzywE}qzQ(j+Y18XR||Kdt2GB=k($Lc9z&2O16Ku&ZDWBX)!oYlaS& zUDj2`Mq$FraE><}mec4vIn7eIb@dffU1>pOsiQP%l8r9B19%Mo0nUOEFR2OFrTd0e z2?X+1b$TxNdO1Ok7JOjZ05V5Km_5huUY32YZ|we`0od$Sk*653nKIf_# ztNR;fKQh{cqqqn`*}MIao~09SE9y2zxy{Tt^*BktvAbNm#JKE`h7b(O&^l_zLp-Vl zl0j{6p9Zgf6^ADgr|H;Gy6A+m)D}3PRP6G?9+$mOMOU5X(MH8N(>uk~fy!wWL|X0x zPeY1FFz>tDToX1XT%KK_Xq?M}6_EoQgju0ubZHseiBym`fl= z!*oIEQ|Pj zbucwDa&(~obz%Ieax>y(Z1NZogHNfCeX2ak`$)fl1t|>{PTtfnj@K9j>wy`3cW$d) zd|mZp$ycB=BkkzJ&$`V#XK>@%3qR`MGCGL75EO@+nc6DRF}a!>H#!p!yUZOEH8LOA zL&Eh+^L}Ajz-h62?@KOTtSu&Wnx4Q&E;Z2wCk@Jag4$>P<9xOv)q*Az@F=cI1Kpis z_Ql+tIR6Gcm=gNEzW$bq@Ubi+Oyb$zT4ITvNm^3FdTmv@6^eKsj!2nO!PB<@gWV0|Qs_&*v)IpDdgtM1_*q8H*VOOJ^I6AGy?qUxFFZ57yO7BYWt+!d$zTZ>VYp}U z*TK+MOu}M3R2==_P6K;+%V?8j8mPwi+81o``N)6pgyvDeiy`^I?;u1_V3Hxl;G0Ca zF-g5l?~|&!c3G+P2*plf$tD{N6~GN!Ygfh7x( zHq+(2hu)usZxc{9a>OzSc|B&a4F@Sxb>-(TrELj*F_wT2npb&gS?~d?qvIWlW*n`T zoqB2nBL5~R?~3SDU+@*tPc~Qiu}D84OjzvOh)3chvg)00Oc!R}AasUqNs@TjGMCL& zc09^FJ4JVkg*P6Kn<+) z_w+^P{>&B|fXdJYkt?oVARl?*@VICe^xS9D4WDAee7Ruil2y0>dpWVUG@jRp7lazr z0*|qq4bLslW5gO6a+;jF%%Q!LH}6@j!Qu{kG}!$Te|dUqoHy)|H+;@-F)ck+CdDku z9B-_dc5S9IIZ~+JYFJQN=cs7W40gxcKV=I6=dgj|zSDzII~vC8I0gl!s0khx5f4^x zfey)_E#P04B-o+ePna{tAkK2fXT}@g3qMRa38DT)qejSAAMEoy6QtUay@;P^H=(VK zJ2)^&l3*@UwNR^Z5YRLls+uL%I~r$L5OlbJj`9Pncadt~VPY>T6r1g*7(dnVuj!DE z)P(L5fE8^(t_E5nCQhh-^@dy#I>=`hCZJ3WU2;JMe{R{eZCnw|I?d-hfl|pT?|*O< zo!ApzAnTvgK_p|0FPGD^FC*Y086j55!RE>{+m4+<=Gki6tptCiF~!l2ad&{*X@1zo z(#e&A9$^FgW}1An3>`LI4B_RIjG@l}b4AAf>&;>2l>c2jG)@ZMvDgriT@lPzQpXWMEwEnCQC-*S>rA75rc^G|l zLRY({39lq3Xf8*|rz)5Lws~C?rc#iSPpdm5)40gwa4rW`SF?Lg>R3mv;mSfzM8du? zm@cT!+@)y|&9G}P8f6tWK(IBAa$Np1T?}~ z&+K6k>WLFT+H)>G86E)9(Ih;?4I{jYQUhlmEU9Iq^(BUp4!obCg(N(L;?qZ8sUQa_3{kQjE z@DBaD-8XDXCx4{CDZas=obvL)fF@9Chb!b7Td*A(b`$;) zU%Q7bPRf37swo1KIhNOXK1zu9|VV0jj?1 z*_gNyY|A_?fjsoVXbkTgATqU;0K?J0D#B=yr+&U}G>n;>>+);jVhg6CV`LwuM+2XO z+Jrvy@NUzjDzRQ7-jQS(^}v|&b%q|WQCx||cS`$YNNS_o^8~9Z6B|2!S9FRnq_zBe zY>^d7ZI0~>CHKr|c$sxpkZppkh-u0>usw#jmUfB5C`fgaWOwuvPi6B_ z?q(6AUQ{-J0uS|XXH8kQ#s>GSDq2%xB9Dy=B-TT#8^ zVb$bME<^Y(zz*PGF`DO(30&zEM!O<@HBh$%G#2irS<4-^ab*tQ-IN`4X764fON^&= z_$rOYEK)Bu`mS%T1f%tQ$%!(hEFLM~qK0F#jr48=?G&E!qy zz+=RBX{;$U%Lyd3n|5UUMk^f|;nLSiuFPaP2KgaQ)M(ig<&PgPU1OrA zKbka^HK%=r0gJnLrPPWNPL?d()8IW_EuT5`-cxc>PX?!0n8wtXHpY>6!j#vA7eduB z3Dj9gRhx8Fo0L09{-Pmfw++o;g@)$3eX}mwx*a08C>U&vo3LwuR#ck`h^&AfOBHjE zXa1TBEcc0?nJ~staiQ7H&D_bXq*XMZmL$@#CW>K-!5`xX4fkl$qk3O+YRC+eY}2PE z9DC-2CJX6xF0J^FL*R_DDIN(uw7RrDiEu2FxVcu;s=kPP8k=bX)CB2vh>qL?u1uXI zX2C0%@>w4V{C+4_*uIqXuHluDF5q^x5=6zcf^zM$Bju~9(NjZ2hxtYX*dL5C`lVN) z#ctobZ0HS{&H=)wI1{2txFY17b5jhnx>&<1MkZyc2{2_B(mKB!(($B}fZnvquCNo# z4mp6E?U)qyQXR}V#_xx4Z^4yb2+veg6<9xiq_wZGplgCho^_K1{Fw2I0PSR;%^qQr zQ3rhN4zH<^U#exMfrOQvVY{<(664@KuOR0i$Q;pd5}-Mh%)EOTS_aG-L%8_yxZC=0 z6e;Y<3E3o*P&`cG3?#7WMJ&gwM?tOuILT>Wy(ujrRwUzNlh(t;5;s%`OoM%F61HElZFT*yVTl1Fs*MBs>hth!xF>mhqDS7_aRmsgPrttEe|B=3w#&!E` z#<%m4caXvx4D`yiI7hB+4v#w-sRrqj{Z>ilYn#=Q;htrh1k&0Om&IzN_vU4<&IDJ% z%_^}Bx`Ae~{RYpJ6<0Cw1U{I`Ta;1lygiJQK*W`O51>Sc)ZN9oAsjHJ2)$fmKj?>Z zM}$B^wug$p8{yJX;U*LWTcvrCz{G1+B2`q2gOkDx_m4JXpe=Bb4)RE#T!?&gq>ZTH ztGvjC-K8vUu3Ki9s#j^Rt+Q%E5gI=k9E>ZbDfuM92UH z&@I>urru^)mAFX^%SV>M^TDu|#$Lfp{@WnbI~1W0<*n``5$zX1uiz%>!(@2}Hpd<* zU071=WO>En5eg)T+_reU@geG3x{NT>hB{=f3rr(%S6}(8HleUjcA9B`cQ++K(Z0^g z6Ph%d!~~e!98I>7mO|JsmYj`)s*FzFWJ2&tgjHVU5JoH#iyT%tX%8%#h!s`CGgfJh z_PZkrSXW73>dkZI>ywORrxKoi>H>83(l~&SV>+6rc8Eq3L{ZDNg zFSJ$}d}j^2Q(y)n*J{v_zI$pMj~DdiPhKAklG6Bql!sR9kDQiora6fR5y7638mV_I zyupO9E?~j8#$W!$akolZxB>gxiM&E5@khMT^u>Gj9I7cpKaDbD$Ic1qNA*{>lof@h z4C87ITwpl=g>^guo1&L84;hoTHP&Lh4%z;kIs6w#Yren|=4>{U@5hpZ=&~eG6%nHd zpli1wDn1;z4;2PYbWRrleaATZ4J!cV;q%yXZKWRs_TAXT=~7PUKK$3>2=jGc5YTZP z_avK^vzWrY+qoQzZ1?7aVtZ_gaH=A|+~O&;!b!m$8)HdbHziIuRMEs~0eK||Jd<_5 zi@$X`5f5=dYnBc_{6+JV*xV#yoDB4FC~wFthoU3{+$Nf8MZ446S|{KiDb1pS=PI-)T(6PZC3^isBbGHY`Pb#)Mk&FrA5Uy7(*-Lnyt6 z``zM5qsK$Sv?d^;)Y@e|4Ro&N@w%^&WKta<&bH9zRDKk24$7WuBa1V3kJxfl2a}#j zZ()du*-qveM`BuRmj50Ch}=2hGwx(iRM}T@3lesrF)?f3{HW#7Xti1jHrvfTH{Sp~ z(=bt}(6*z|c!Qf~H~RtLm@M3BFq=ZQmkH(kt>tyGf%9M-!a*GCo>2SzNbWR12#|47f^La^obUJsj=mq197G68f#W83X)jhtE;?nWUR)zQ=a&c~A?`%KdakD;^L13X?jk?}%N`_O z?Z?Gd1uU5X3Ceu|K4Npm2I5sV1)|=}1Q(Di1TMY=p73D5?SzTs44k&U&oo<54)L{d7bRihc}dSoL<_SN=tOI&T>s+y_X!PVg!q< zvo4r&CKGXoH{q?T-^t+Cj4==E@8iEuSL8u9mm*GS_y(b_ky0)veXxqPo-Fj|)}DSa zy!7suQbSC~9_~Z#(ii07$ACe^rWp~yMK=s^_$J^PB?>iCm4oEXhOQI@4s!?^0@n1P z)v6~R@cv;n{;Ji3BHEFt_Z|j>CJ!T~cO%p(r5hxEy!Vq`56hUo-{9`2ZycY{*F6!0 z#(v%-9@Yn58S6FQ+}l_-Qp?IeeT3Ky9Zb4RCRKH#LH`h28vS1l)9T1}*SA}YvW;YqG{}N0Sb}HRxm4DoB6c zp=2y=j8iWV)8|yYs-WIqF>9PR(vD@Q&zD-S>obk-lFC0j*{X01ckJZN7)eOs?U~GD z3)olz3A+RyE{2^rcFqiT6A!w_A4Im42dJLTL>&Alw_>M!{YAq7717(smRw%v13nsnD zPAGmKTrKRBHh{=z{^C@?!?p2u~LOp`Xp%csSS*7+nA*TfcDP-Dr{DxVN#l?z^2>x;voU63;lV35gOJX^E{T<5mp)ncRLk+cAVJAKgfY&(y0gI9^+n&dU@Qq-WcZrAKl z`9uslT~+o#jU?xG8cwR7L3U54(gJyILL_c`=a1A!hYbGM96tEZp1yT()t zaZuW5QL<5(I!Egk_wLivtDn8wjMqh;*qpi|k-My1?#Nhu=7Q>;K}%aagWPO!sgGQ8MX8i-r6s?au+-5Za`q(a*bZZQzbF3gr7O~=3{L#5 zgii^QYm45xRcSQ-RTj}60MZv#FJ%Wy#%xSq5L(hyV+%Kwhp&7CGvN4Bk78#&MPR#Ky$Q=KY^aORm zz358<7^Hol%Xc{Jv7&Bs7=M&zc=A*6?s&{J_om`#7X6_QJc}K~intb-iqPBz^N}nh zQUn7=MYnc+3H=-+c?2=BLhb> z8|zj>GuoOos6a5ex)}_OQ1ss%wSkEBk^NUn{W`p%AW;R@YmRACT0}c3gQU5o-B+qJvcB2&ONeMQ z#k+0{?D*jUJ!-B19&2|d4#*SQOFSNr8OP4b*>&P*kJB1GPN^Ni*G~+vI}3XFBPKPX z?d+%I0UVDqIU;2yiKeaMx|PLO$uV$gyzka0nh9`IKi3j>Ofr!ijO`}SWbr^0by^{f z$?9fzcl!~|5Zx-KGtD)_LN$6FbHhSn(!jdE-Y54b>LL$NQnQP(A8VJCMC(-}9{1sD3(0vQzqV}$lA9EA7w_sYnh{{k$ z3YOF!>mO&4V`O~CULWDm&NW-}N}gb4+16i@O7NKAee!IaOP+hIxXwzztKFZYXe1m^ z^s>TU;V+v^Zh}KdpLM*ks?3U+!l0sdz|O6lz578op-Qht6yId zLXKm=y`NKy@KRML$=02xP+{@>`Sbi~{>kK4)xkkK<9_~g{nEywqvrlqt92bWZtLf{ z+0yio)%jTSsGILgnh*V+Hft#^&Gp6%I4;1(Z3~u;<*N+fBMk=H^g0~{ zI0M)`$L8!W&k`k{#gt<=7(P-O>yI!Lk@hKxAeF}@6dFCixV0Xw+kS*QvB)!D8ANxB z!mlsq@ziiNoHdTR&{Gy2;P$R)=|+EOKTY6WjuP~<%&-g@-|0Wjy9Wjpn3yeC=2YDO zPI)+r_k#4jJxwg1;ke=uqLQs;wOk&So}*g&ffDKgSM+lMK##a1b^y{Cs@yh-*>B-a z^D~W%1HAl^JKS!{M>vU(gVI@YSu$o&tkIAk*%h6z+LO z9t9=&8CymIpeM2)YOM5AhGGtNP^kFljP2hYKgV`w(~g`0_!>?oO47XzvS?!-JyH2f z#7>*vRamL*OD=WFSTNGv%0CjWiEo0fm#tDY4(ogMjeXr0&GgfHeq-wYWchV@B)9fA zy?6U&VDbObdv;Dn_HIgl@%``iFXoSo*Nl0~VF+5IzUC)vN9lU+Cnlu=Wj5pWRb{r~ zL5-8aS6R2%N+qo1&I<*{ac492uOK{~8m4H?3)0-CHefrlS& zC6J>}L0ztbG=B>uc-dyHz2$Mzzl~=S1p2g> zRSc9?8cq-F)_>dn7;5M(Pm7KCl>kNX`nKNA;XStbLYSQ~eJh5YNSj-6{?v*RC8veY z^IY>|Rw_)Ag^`jOhUw1NYD3Nu<0}K%ZkWT1{UcH|)8}(bL8&2xnJF7SJNx(Vx6sFV zP*aFk!*8uLf;Wl?9@U-IjI8UqJ#F(i!d6Hlgj|x3y1fwJdso5uAcfSc;60YP!vMnB z;MzT{d!eo-IwI|Oz1%7t&&BSG^D{HO^6S%`Ro3iOB!F!Yb=CC`Wjjh97Ogz$F6rgW z8R|%WShlzs*st(1tp{}~X_zTiXWeGdVMUsWGb;knkPD8{*^7xj*z7FEcczGN5`o$W z!1j|4L1C>D=P`!a3yC!h%>KNu+A!dAYBlg~J5b9d|Mr0zzB5MBhmEzw%IEFl{^zH6 zSxN7md~*l+)BOy}PyiV?IXK!_saaXln>iX;{rcX7xuif8IfQR~@&7)K(##a8B+!E| ziyW_14K&c3hD2mt2FP+LY{3$2{2{4w2B`SkOtNlCjefK(x8bcYvQw#*P+eNO-Hg1x zoR>bXjB2l4FUwjVES`LiS`KP)2E8S?t%+#X)e2n8c4fAu&B{J^Ugx@T-sn47evXjW z#k_2J9DP1uI!P!hsu(cYYPR06PB~_TbtQg_#74Tk*ffl>f>>kRd8MDjv}5ozfz5@E zbCUQlj(GL)K|GtBSyyT)C)c$kP`6ZCgTGf#t|2#jJUR0M%Kz5lsQl(amq})KuRQre zntG^TxJ>tY;i)iU)|?`RS>R-v#^eJrpDc&J>9UTkC*26wH`}@F_d>U}WF2FXK3VGM zz2!5P*_bbH3&+cBYt+dSG^EM?&L-%V*|aGj4&6TD@&$ZsXhKCF?xAcFzt(UC$>M$5 zhY>!9e%tSMssc@*5Adk?%swCl5_c5#aLYvYF`yRd=U(}2B09pWmR;KLDWW?%0lec> zyNx)+5s!Bx%p$%qQ}Ddo z0RuSS^95eQtrB$D5NLeFYS8-#lY&;PyJ;47i;_eBg=DOS&jrc2EI=JKQ=o~35*+S* z7C`pZcw{XgPBACw;nqm>HT?ErZp#ez3B~XmsLR8sovx@(|N5bz4Qd+MvJ@SOVO{_3 zJ4my}BbwuNW{pPB9pRcH8NIMV&Qhmg7e6%Yl#e5k2kN({|REjl6Mi9d<}3 ztI;*Su#Va32IetmiD6P};|A0WXP}6oHrk}qr!eQj8b>9b7)l4DP|!2)Bz;rnkoh9d zS+l}X`~7p8B?-*4XxS&wFv|-|ycD$QO-N?aR7}*wAH`6S;SzT#>Tr#i0us>k>Q#n! z1-cF}OYh(wGlVO?*@cn2o^=9aM zF4#AUc`RJG=qZy;YcP_gjvGBy@vb#QWFo@x#Rth~e zdgc>|zLIjA!knka70>WckBPSV4D7=D{oLQq>BL5QO2Rh3sj{n=Zw}7hg1NBkma{aa z8j{t&0c@hE(2O^W9diMA71Es>Fq&4$XUOY35i88_{qvIlDH!sae!-ML^LAT=v5{{! zE6!>kc=VU{pdO=3%JuLK_<7{WFil)>!B)W57q`)`_xr>==&so!R2r35Di7dV7rw%- z>At6F+~*S?3YDk(sJkmd8^LAb=4lNiJLaHI&v}eA0G$9>)yyK4y<5~H27Bmn*4nzN zeAQ+rAZ=4BN4a@6xG>_w)A+1MU^4_wW_M z9jpS>y*bAKL=1DCD8L(B)ODRLD3?SRa++sz#3$Q7Oc9!_I_(0{4 z!iZ8ywi*!f(E;$Myx!mxeols?I%_u6f}W;oV(dWf3D@#Bhyj~)GkxnUkqE2&`5i96 z*-MpM5~mT0HuB9*?}E5ycF7?cF)X+aBHTG{I)P+x*Jh60Zq$ z5(LW3&s7i$CVB`%`S}>~-`9p4^z18%Zu@ViV2nUlbx6lCy$6<*A<4^ZB+JZlLWZdG z=faycLNb2D;|JfF-@R8hfqTk$TAC}+ElZ#Ny% z;SJywjm*L)(Y&j*+Cyn)_7(=j?7MpEiV_r|R~DG~J@X_jQRzaNT6`j}ZehOUUnzL@ zR)V+KyKry=JT+|iAE{pNB$ih`wQdA{7p^-mS#HfndB%!=%qCYQC=zl*$Pr8QZX62_ zh`R;Ol6aV8Za;ed787yp!B5*daaY(&a38~>v2i^6 zE%+IrJ>gZAM^S~Idz9|DHezOLt`*-i!RNGSZM@xGG&}~brj%MG&*WaL*PPa9-NmzZ zwS+d|_6f`WR2`twmdB;)POw#=tl5ssftGyjg9fFJ2o(TkRa?U`n=Y-=dC7G>MF&c>z*I# zd^CsDH8>yXm(5NNaVtAms+i4F&oR%IxWT{iQ&N4{?JpFj@6Ei4cT#+=FF{!K z0eiXlhL5W;dalG@8>1>5PjEOGyD&Y!VRJsn<;`dn&XZ@OGmcgXepawrbwNom_cq=Z zKs^_?=*wBcSeDVX?DZjKd#e#&;ayI1tA9o!J&)|EL2HcAhtI0uPALcG_a6vss2w%D zo~FHvjy~K}0Qilbqo6O6qf5u)SEzI?Xf{<$iw-cLPS(|1=Dg*GsKOeKUQY#1@jMO7 zh}{9!vU7*00+kQe2mU1rkK_aFpRVwq4ECb1TN8a?TN(3ITXJKr1-)(?HuSol>JLbc zh54h$VE}3C{838=rE4X!PuJ!zXWsmG90-jd&6QF!|RIjS-T3&mkO!BWPZu`1(x-YK> zOOIZChuhvI{OxLW&-1h1MQbszbM1g5P+k6-_VdLTQ8}N77t_&)owiq+=ZP1gc?7qT zU4&ctr?#(i5jHFBngMZo{0fgg9On+an-)mxnB}x}deQ59Wo&P#uZi{FVcyw|T8~Z^PWH}tkzbO-DlbJf5T54cgDcp@u!ZpIX z6*cAqb%5l60g&m`_p`}l%@j)A&;eIZXwn58dRJC7F~6&IcWKh#}_lv{=c@1hA}cyJwNkuyIw*VZrS!BoU|9|Di*r%0iphLpUIX4 z;wdGl{O{Ie&THMVv#@raO=Gg1$q0rZ0xANCO#~9+Ij~)h*N>eBZUlr-tx=h|X8nXY zZdzKLVMF9akci!#!iM5%8O(HCA#|11;ov-|nEHktRI)?JJjxb?`@%hRR6U%rhjwtp ztDPmt92t5WaVg;J6;uLoF}A1V^BRcBu2jP{aK3U3%~D~Q4XO!mryU))WPVw@c=B$i zz_YfCsCT~{@5SC{%!zIik#aM?r0J@q6L?H|nOwRnwE5>b{9zR=C-^phx^MnPq_<>v zLmPu%6Gv}gV{i1Ys>uJ%fq!GN$T)eaZU)q#HLzFVL67u8JYk_Fzo9H#$d9ONLiTfD z=`5s^)~~CzE6qcmXNfak-EzHbFB^VLA|pE(4d_``79H+TbkL;76zuO8J!~lYqE7`Gli!8FZ&cKU-=Ed8vPJq- zW7{SbN|kjmck*=eGO(c_Wq$UT6~8n}2PWhQ21hkLWsrbubLCarezC+%NL7^ZVbL#A zT;|j~lh0U&ASXANbCO6#U=;H9Ut^c0?=JZ^^IXo6Q-YXb4>QtGZ*B;oA>e0H_u>ub z;EnRJHJO`m<5PXU8y86yu(Dreo5=M)$}Z{dr~BYHzI*rf6!GtTr*CWfzijv0t(6fc zCA;~?cDFz4*E|kT#UPcVCqqNjU~?5T)uMUEWDI`H$DY17-<=zmA(n(LGj<{P(4QSs zUf~io&LzJ0iK53dDFW6%ssLt5$nyKGi7K%$(-#&#aEcy$IjzHKlsf5eK_c)B1`)nN z#bJv<38ogNuw$-xu8O@gm>XhS?MMg(Nzs%J} z*6()jUbh+Pm3!BhyL?AnQ{d-HC+-WcZpj3z+;aSyVhwgf%ei={HE33<^4+yj-#{BH zCTFW(fY*0lSQ9&0t~44QPc58M_%_*q8)JbWU}6(Bmw7N%AaZuN&$!qtBH>C(x>5}k zk!sN2jw3aqsZJ0*-Mxw8yNwX^`gAAzkMQ>H~uAfyS~+V{3{;wS2Xf>ROVmN$kCtC z$iP$o)8Em^b>#!lvi%!~B~s(O`3ClWX#a)5$gUWe}%uR>?CDD zk!3=Ol_E0Qo!|o~|L*B5I%-5zYbIr`A34)e%XTjB56c3Y^DUIpToE>gu>8#LM`W-` z)RLIPhB>%p2~06u>MnI82Ts~HjxSaMxB4Z*HiQHZ;4}BBk{7RRZ~-oNq9^DKLj*{R zD{2&4U7jNejRR~k6yN|o*M_u`Wte7*>wcdVnEzUN0fW%KMIZn9{}lc<)L+ei_;&?4 zsecFf_uB2h1I)geguj^S7Z3eO_-8%qZ_2@BP1O!w<3G`1<&eS7nF zfEo5b0e)|G{zUor1^(YrFgbyM{$siSC;7jx)BaBWnfq_#|5~;EN&Tm#{7o;-_xCUI sk9PA z;tuf+@PdH6yr3@jAa55>4^dwa2N6#Y+yyS;=>_p{@U-`Fhj@64z`bA)kOSNq0`WG) z_M`z2{OrIESo$+&8-fP_NMN`5u@l@o01APN_`5?tntk$^6{BdO+fpVX4NT*Ps4^pA z{v=cA`L6t24VQkt75v$<1*_9SGkZ&rlAl6Ap|&KyfGHG~W~k~g zpl+3KtK3#(mF66wkbC0fc(Mu?!rn7~PgtuvyX%0!zT^!kbors&v;e_1U zF?Yra+gb1>`5^`xD1Ikoq5KS!!pY2uVXx5QaJ5B_yZxq9lQf#AX(_?%w(gPkF=D>-ddS& z|Fvt1k^Y{cj~O&ps5>@a-mn(Y!N~OO`Ryy>Qp+M@qN_Z`>9VdQvSCg5+@CWMm?hN}BQ< zQ+A87vkPg4g9Sb0V-bbEse)_T#}+2uGlJ%x5c)a11;ThHO)-No1noYZ0{wKD-3@I< z3s$iJmvm-zveXT#HxL@y#vbAtr=}Z(?XiRT?2?g-Yx|;R+ZSi2dhnx{(+pMy#C4QW z_moa%6IMv!09pL!xAmwPoUQ)2_{u5}`S%G;m+hD+M zePPT*^a$(8we})~A{YF+dKZKo2if8qI!m8+Nl^9m$@n1#J_RNb)C6@QUGU!PwFX#s z3!+(hgAA)eXhD^B@bn-%{N@s)7R4+!Rn|}wu=l;^=18DF_el*oFxJid>(jvS@?@%{ zMPGbj7MHS(?;wkf(w+Ao0DP$_@6%~qzbHd4DMehZ(bVJ8V;yg5&sT^`cb8P-P>Crd znsGHr){}Ir4!jSdcHtF3g9ZOte33nC+(!f6H?u1YBO2bOQUn9-!Oovw#pLf<44BM% zHpV5If2*XG*%QS=?fM!I@c+T6JLBnGL;wKDSOWkg*bIA55APr7#i9{ojWk}BrD*Y- z_^#FmmyStA0r*qo9NZFv@_dKV!2L094*lHxT;Hw`T!Ou_D)E(U@v?wQh|jyPK01N> zTN|57yb(btf6cf63Y++F91(D~LsuZ{xATL=5S3F%-`hd^KBX+>b2S{>^$ay={F1FoSkQAXM>+k zmoV~ZDD7NoNnW~IAMFGc5zUJ{~L6}Jeo|((uCM3aIrb%LY%hwGGx9^~nvE1#1 zv@5IJWA|qjqWokg6p>eNJ5@i4Ky?z09H4jxX!J0y_2frvJ*ScG~O?I>OC*XwxPVH3Y#U4f~x+a;P? zt9%_Ps`_>uoxw??)3H_R;kwW&a6A8}6l7iUrMN&C9n$+*W~mC^Z*fI!{87h*vr!@?+q{t%r&QN@b8p4%jh zm$?)&p#tuPMTw764h?U9R{10?3K(z9>1*CuNkvxBYqoXV(&xv^)8b=1wVw~pNH6$?HoYO$l~Z z%zP(Ognm_{xGZ($;s)f~q}3nP-K;e%9%&kA+T&<6*kw30a}kY3m>J#l?2*8Ep^S3D z6=4&idH2>`kIIE7P9Uz#`N&HP+#H$A8yrz-KIc3_!Eu74mIoZlKWBOv$W(Rb@^Tl zkdbxUXb>m5ge+N@yi4uI!diHkUi@=nSh_G+Xj8Fvji0VxUZT+|(IaZY=>k|Wb26hx zw*3t3$P@CezPG%-(&{mF-OAg&k1fwJ{etPyP-XyhSIqh8YF+0#f?Le?_T0ppzgDqU zIq15x8^gx&pfNcp7xq}*5+McC@k2vSnJ#@SuDU~P%-U3KGfxXSALTEyXmdnwC1!~9 ztdn1jjIvx7tWN^l5T?$H*o^{<3fJ(JoeqX(us<_m!+`Z+YM};6G=x8-w zs zzPNu!2C!4rs&r{%thN6PWRT#K=xUHKP10T0Ojiq!De7+Igkijb)=DbR)!VR9!J>(M z?cEdviJ6-d{2lsAc+xkQF+zaNLd?7PwBdBz{MN)l%Wg;*V^btJPHh6mvpuljQOTSJ?#pEZ8E1seTFF}VNr7Ian_-^#&-$lUO)`9stQ_n zwf9N}9#;C;et6F8F{!E8Ja7p`qD3D}5+OpnrjZ+Q8;yjs4Riw`2s=1JVnouaZ3Sa@ ztnP4bC-28`RE?ig?bAU1cWS`UKGJu;dDDpmkfddvdB*H^z4(0J&keL5G*nqSy%hs&YA(Xm+4aHZ-F2!qeM&EP3hTIh zmunR|^UQ+h_YEWT`>8X5xBLmL!n)~lSjj0%+H}uJ5&8n2z?hdq0pLW>s8{re;sF#E_21`ONfhlAmqV+}Xn=+I*!TO@{&);C%nC7*(VLucsnq}bzTi6`!VtV8cYcpipP%ZGp=yOFQ zDO((r-}_~0PIV=YSVSU|1v(xYxMMpW?o>IssjQ;Ly$$>>S)11$p%wIXuAKg#sr?0U zyQ>ur01$~iil5pY;tl%2>+vZD26*fgT?ag&gdH#O5eVN;`_eO81O6r1{+udS#&4c=xZ{8K_7rbRogJMa4$%V;wyxPhiwYYjX zUpZ;XPh6jP*ej-lUf>!$_&{UMFyYQ1rpFhxn#)_Tw;V2`mnv2LNTph+GlVYuaPZwT zV{`pUzp?)2MNm=Mk-}ZuR+nWPui1+QVB|Ij`;u|6+zrU*Y$hWn*4c68=+BhgBq{3` z-B7VfGvELj)uU5RbkKV|{|%+1PQC^W82?n{6BqmTF^dg%n#`4@InF&=t)t?;v|%|8 z299GgJqC7Y>o?Y`>+qLf-#8!QYgd!me24~S3|e-L4;<1jQjDKuhpJq=^x6Kew!*;` z{%LBk3xNN$Pd`?FjsG!IhB`m0{<}f?Rbh(#{AHJZulrrg{i<8WV&(rRyWb0c2hFd- tN~}TqM+p7i;`ht;t3@+bR$@)#&#PysgOAmk002JrVZ&ZcMY12!zW~+rm5TrX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xls b/pandas/tests/io/data/excel/test_index_name_pre17.xls new file mode 100644 index 0000000000000000000000000000000000000000..2ab13105e792518433c4c6770786827d091cbbca GIT binary patch literal 26624 zcmeHQdvI09dH>G6S65enk;Fszf#D($2q8eyl`#aPE07)A6Jax$49o>$;OpThn&hWbEAj ze!FMS`ySovxb`@?M{~}e-Lw1qzWw&w-`U->7cTr=<11fy{K}t7;OLR0{IXalX-~R^ z`z{l25Iipy88u-i?vYeI-bWUw%b+3CHL`E(^R?%nvm{+B@>5C3H&W+u75N982jrO4 zi1Zzv96g?WmyTUZhe3-vKrPu}WyC^SQc`jbVWV83^joB^i`Dh(D)%$$xa^8KN@HmmDGTrGK8m3djEbjk|3i2ND(G#x%6zh7(h$)JqMs2r3@ zT;I(c7W8k4g?eYP? zFkJ(`Jf)gPWIpc--69*%Ph6p9zgC$vQ zVqOy(pW_#0|EKInsYjIk!|7iwk^UDY(!W+B{cMT!ua`*wN{RG8DUp7EiS$zXl)`^f z;Vcc==W`{}wLaneTAy(GpO(ORyhQp_CDQ-8MEW0;NdH!e^h)*lONFx_WFPG(7&U3X zda$waq4^KZm(&@Rf02_uGV<)oXIDz0f8x2UK9`%6Uy z*ikDgKxV3_0J~U41=xoxD!@)zQGuRH3v8&gKzpSHHdb0-Q>6tk8m-Jq+kFMtVZE$j zoyzPC&q_kgl8`B*>cG0CC+aRGqNu z9gbAeA&9-`HCQ5r`t!ke|NU=^rR(47134$@XprLf;y|wTgA__=QvBIDk0zf9H0Lz{ zsa4HMt@HVhm(XPPvkpkDUB$cazPo5vY}o7s@TS#y((p(f^CHqtWr9j=Ltd{^`Dmps zUAh!fDxc4Xl=7w-rI=>xBt|K3T39Je4LwTz;#3K9M=9mC%CBarU~k+4rF2K4^+QS- zVZ~|7u~fKbZBEVT|2!gH`q4)pbwG>{cui}BdDF@_cd%pbfEv1EvSw&2zYm0K!Ie1e zG^|Bv{9`(1u{ay5#S>3F5po)DnyCfTfUOSWG~TrE7;7`~y0xGm_)L2$^Wi{$ybc9Y z4!wqILA$ehUQ(moO`7SCVcu#+DVK5)2{*C6mvJXB}nOBpw6{X>)_j_Q!)@A0h#TO2U+0*K_uEH@gObbfRKISY{FSz88(RrxuP79+5UKt zl|B$eqHPinvbG!$e(8v_$r2w(Nj`}O!90I%!_M}{gJgXmh(y~Y9%NlPAY4_Cvk6zb z%J7qTkgLi8neC4US>*#kB-$okkkg4p(ty{Obak|3H)eMZ4Hd?xI%hJwWrn7+4=c5z zco~-IJETRjvJvM_8Ny#d#$}4j;V^(D3#2wx7`gb`8^s#l(>pK7bC{& zX|w%5c=efRF;@h|M0nbU7%^T?o9&-{?&WAPR|dsIc-rO|FoT6c^Xucvk8e(~(5qQoq~cW#WkocHF=zw>~Jf}k4%pk99v`OsIs8x6Y71%)RBK|2GW-qsiSpEDnf0?kM(-mbE1 zaCj!O&EN9YRmG(2!gAxV^9HWrq6k(9IXdD!ml16yX~0GXEc*8ijt!j{8=M-Q7@x`9 z;|EMM73YiIdo&~`umfojL$6Vpzz~cx8NPc7%H=F0fXhotKw9ug2@koRVIi|Omhr{v ze6iDsdTh_Y=Zn6>g`p$azQWkpOy=%@UdxK}px0px28ZMbhK7B(8^fPhD=tN;L7|sG zjaFQLN;_KVn9)kdj8-~!8jH3Yrjx~)OxCYTO%g)-V3MQw8-yqk=53!&BX4UUZyFmK z-Mm@(DBc7gbINR-PNB@&K$%oAgEA+u@oQ3zeTU>ur^Ke|T9nunC{bHn5Gvu9wNy4^ z1THt6JT^8sJ~%Zod1rQC;pEgz=0-nM;+kTE+#n~h|Bh`xuG)VBirfjC?}Iug;SXHA zGOf?Xw%w}^b;I(^jS~}a)a`y;>&jwEZiLY$07w zP(Yc>!At$Bp^u=^hkXo?biC2wBEvph;!eYG3*=hpFSisU$1fRBVTbOHB*9W6rOuB@lTFJbh9T1fgu}Tex*6t997E2#8xI zrV2BegMN&}JoFc0OhGBGH5%sSLbahZE;74Bt7C@Jbj(nij-5`-mu+05yY;>yCG%-k-Y^emEm z8ntW0-#&mGRJFYgD&MK@cBAej!xQkkZLs2~d|Y{un}a>*5ZcG|;lRjY{HOX!KvI2t~Jux;lIxy}$SuBt6YXv&A z{e)N#XD*kAJ+;ssL4S|=683|C$@7ebA3(>j@OK`2u+Jm&q1P;VQ}$b*#WVd~O8N%I zItC7Ppi}F+PUo)6xyw0s-S}LzmznoE#q=GE={pwFcPyswy3s%PcJy^H=+r@%4str^ z)3c7SGr-N=CboBAFGz6)jO9wd}bnBp_PpQdvBfNm{j}d0@x#|MK zUW6Gm#=Qvp5e{XVaN>b|G}&Q7&Y(%pc%JO~Cri#E?W`w_F7N=-9`K|QfZJH~PXro# zKN0vXbZU52VhJ;;1C^CLUim-5%kwhzbOElza93`yZ&WD_sMni+{>JxrAKa54P$_Nd zxiGM-2?xf`VH9{SUj%?qY^rnM2uSK zSPC)9*RdMJD48*q=pk#X7)$g(l8ISpYwluV&IJDIImWePNyON8`dJDwei6{I8pKxG zG0N>xcH}d@^XpYF&@p22Q=X0ylU}Z4#N>*)juA6!$Ebae@&P_0mDO{ZK;=v+Jr_AK zV)8qm&Phyu#MCijuCimaK@SIi>KO+F^NGT^clvyT+JlW7;xi*%(z7nRO@KH?Q{}G#G-akyisv3qc zA?EXMlPOdKs1|>(n1a5-hJB}4fi)r~dDZ0sZk2U;z(S99d4Nvk@_>cD;qrjy7V;@E z`Cf2&Ky!=X0sbV+<$;94G(5n8r^^GHTj;&S)ZAitAnD_Qq{1{jz@MDCJizIV%LAHQ z3=e2-F+8BT#qdB%VHzG_cXD}vqg$5;G`AQY(A;8p0N>S+)9^rz!ZbXfxyA56jl$GC z&>#n4&|&a^j|E@}DUStIPBel(-Z2)K*e0?Wo<^@X#@i1}Im@I2xct9ph+#vUDts7^N`A67Fb#EYKrsoM-e}Gd9pH zV8#ZT1e8Jvq#tlhThgCL8m@j%{o%4(mT=t*@mpxbqmpv%K zj3QD>LbHt$W{)1MSZljz_Hem`5?n5!1eZ%F!Q~QK$#BV+(GcX4gZ7^qQ9#0b$0h7R z-`*keSnqExm(T|6+wUEhki$3%`2XY=dbeg9_Wt#DOf!QSfoNth+@qPnaF1pN!##Sm zV7Ny!gW;Zpk9#yT(7Ke)v4hJ!dhB4hM~@v0_h@D?+@r@1hI=$KWNl2%3}$q~k%Y@V zdL&`EM~@^7_vn#?;T}DbFx;b=!Elde1~XRSxWeTg%?yTn^ti%sj~-VT?$P54!#y=V z?qLh~XD}}Jq`}JG`HYu)(rPqhxF@YfE{1#3N`mH|Me+kQHRm(S-xc;jb$;ECUt(=D zonQy@AAFs^a^EADetzrWhQ~gWkqzsf{9y;uzKoOIfX{nz(rs_TNyohdC*ATGPIiY+ z;H1+&h?CCrd7PB=DhjcA*$wqSDzR&Da%%7hqAHzOD&3AV-|o?&$%*3=BU9NMP7W2u zlx+FdCw}ky|9rvf#q;UT+aFB7e!*f#tsYeiR4q`oK-B_O3sfyowLsMZRSQ%tP_;nS z0#yrCE#S96-26ZB;`1+_+jLdK>4z}?Z~yt%IREFwpG!!b|L?)c#pK&@asg@>Cl}hr zaB|K*g_HCCyKr&|>|UH)4|o74*9R`*^sWbRErExg!|M+Qj1!YbFkAs4EJIAQCyM)< zlyH+^Z-r8d`gcbu9X#8jWC8G~57#CrYyZULk>iI8g{k98C8HjmRFA3!surkPplX4t z1*#UPTA*ryss*YRs9KN!s)QLFQRwj8rq)*~E;9woy&zkl PGJUqbjrw!joRR;33Ir}f literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xlsb b/pandas/tests/io/data/excel/test_index_name_pre17.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..5251b8f3b31941aeeedd0a45df83d2018d75afe0 GIT binary patch literal 11097 zcmeHNby!s0)~7+bMH=ZCx<$G{8U*R?MnbxgW)P5;2Bk}okPeYKl0j9ewY; z|9;QC=XuV|Jaf+aoxRuot=MZF1sP}r1@DLBkLnBlqzJL=q{i7RJV4% zQFcEwj?P;&9$1$36UIOXPQM6qD}&O(eN>09<_4_A=5A@-?&cZ}j|9fmjb&fhN>h?k z(lr*Y;^ZpOGZP~DbV-r6hCfw;<*GiMfgp;(7PAT`X_^;+dxyQ@=M!801S)K3>4CnD zy_s`07K4+#$~OfINh;Ph2@QC+eZeZP#3{@9#ES zF2-qC@Ztw_&2y8cTA@Pm2%t(*C;JOBMnVsDn=nO7R=|l~Y#+_^j#YL((4QtoZ_N=`(~g?YLH1*5lG>$YV!jcAGk}Yx*xHf=;pXd_(mp0}Y}dp-U9= z(x8MwC>bPXbLjv?=ISn7S{KvjT z1Tt-12V*NoCdTid|1;_TVmtm}>fylt?x-K3W&^2&y8o zC z8+5TV$xD9@_eBZ-YzFTPX|_#@NR@ z_=b!Oae7E8Y-o|VtxUx@pS9N3Q2=wf5>ELjA}q3GX%8b)b<~9P7Ng?Kaa&60L&&PC zE26bm;}cS$PEntpNL%xoYn#)}_FU4?FPjtE)_g61oRp2-L4q{VE%YG@n&r7ED(Wfk zKG0b!A)<5d@cU@#n2kTkt7uMF3FQ%X&aENF6anQ$yh=rZJUXOzU=%UU@kFrV6jK%- zv!g^G>Y!-YWQg+ha=Jc{^7Ru*ymFB(;XUXhRi;VZ;L7*AvlTYG`~lbz9?AV>SF8`i{JqP+z1B^N{+G|)GQUOy7cDoQ{`SRcE?oBEwt zcYHjlU76yxv%T59DMTm<3^LaCP)on@7Dbx;qH6Sa4JE@a`3(&jYhREL#@yWI&Mw{i zj#4ZQjUZ_?75FGxj-LX#jHIq{Gvovl%T^HSwj11$uoEI=(*~C;%%3bkK4Lc;8Ve`B zpR(Z`Qmd3(aucAu^66xI`^oA}nCMfZvXR+nz1Fu^-mXNxyeXkHb znA5Bqj`X{q0Zu#{yc&r(-_geD_42gf?pT}FczVV&6_ZHS>~Hop#CZVo9&uYzlo`DBSDMMu z+?KSmu*k$%6;6qvJ$0(?M!A;vDfS#QNM&ky7o}t=C>6G1#l0c&OoU^>%WvEhq+IiD@QB{V8 z?F2b#ENs_bz0uAeKY4ZLjKEw z{NW?-mgUw%TJ<+ew=tu`&DYI83aFjl*cB2@Vsu4+5qARFVKfDooEQybJ~#WtseMVH z^#+Aa>$E13sgE*Wo%{J?Jls6~E`x=uZ{PM|a#b+plMyw!YnqJfKN8HkW=%;OqZGW( zl`JbAOG?8C)8tXwU@xoeE#y(MW?_2Wx+FPKx-58|tnA&e;wn!iphgzk3A@lNP|^61iU+$!E!jOCVzIx0DPfeMD>mn!U(B zQ^iL_hgC_O5n&i=uavsz5asC;<+4g zqL0n4r*6QJ1SzFX>!|mmx`i^pZ;ivArE49ueTz>*To7bgzCqP zEh^9~YBr>3UFiYM7Uj>`I=NXH-%c}IsYe=G2)!`X9Qe4sTWEh5 z=@lLcscYElENRkfAH_kRhXFLM!vg!+A2SX%(_bf}4J>EBp^+|3JBSGjTWlF|I#U@v z9&$9YqcwmCN9B4MjNxnk%;k;YP#X_}A(pC&0y!N^ORWX)rGEhemH(ALo=6R*RzR48 z*z;;wF{f=5pg)}UN1Cwh9nz@>XTY^(+6;-_YZzo*Q&m5O~PX62})>1AQvT!Hi0v&^-MGNN2d zhOfx(ttCxXGOMdD>A!dd4htWFqqrxIX|EhbkOTC%8+{Tp#d>!QJr^YvW zYG&#eLf^2nhzxx!rlC`~*}r7Jb@y~st$tIqQ6b~Nc6cd1xs!p-HY&#bTMVGsoW(iu;XhE@62B6eR*7L-FP#r zTAv||i2-w5_N9r4Nri4wT_c8{!O$&{nj*+Wnek!aydn%emT#1gZJQc?)zp-9fcbU? zosgty)pLkNv`$U#KHrMdD5hQ*rk=`V3Y`8}}`IZ4GJ;Ye+=oOLh4pf(BqQo?4 zEDSjH>#iI}OGQ>X+RX&C7|JqcNKVj4u|qj9vHn3Zw0`!CmHcMaC01*fC_V+O#dCXS z-%e7}A@%0qWr`TZ(t4X;G$bD|_-uEw?&%b%T7Ec(x)}xb?by7Zw#fUSo(pFrz$tTn zGWE>y-NHOd*PH=z{|HFkITY-zCt*Ai~`Hya2&%-S085X^-7Gib1uIup;PO3_8+7slx z+)tO=pQc}5&nzLH$2ToK^bjR~I0$eEICX=me$ZKy6kEnk(oWMhlF>xAU#vItGy}Sx zqq7ikoU-_67tsu$+O$=15n0@-@EN-(+JiitaGr6kutzKh+Qv)+7A?xeUIC!>Ikd(z z9PORanca3kwSdftN(9@0}UErd|Dlsr?H-=h&XI=ws~CM=|q0pF$*+$d zIwX@E3lmbRPLM^kK0JkXgP;u^Bz~&8uf3}l;uXX_?`mGNr?ePwaV%~Z3>PUK1~X`D zlKC)x{GQ993%9AVtZagB(BAPuNQd|M$b-OQugrq?vmDRs=u@pWHA^AY;>P-3sx=~? zkyy=7yW;L)~{d5(XaJF(p>+MDaK__|nknyDy)HlCH4I@~)#3?Q1ydsUW^#O9!! zRIA-`%Ko4!GnWls4M?&GMCad>hJ!?9M4uQSZ61|S32a5DW?meB+)EKnyCs+zAoKdN z*MZVDBG}QtoO(2#zjdOItZZrptFHcSLPeHzUxyja>mxfVGgUV3ccr8Bw92JB&jUuQ zluYB*xZWn_u1oej8Dr!$ePQQjgU_%!8&C`1TnVG2<4+F_z?Tzw9rN8^{W_D)HDyF^0@hpY31{_l+ z%on+M^02V>ZUd*0gQ+6%9ua;y-dXI($$b`S(oLnPq}Q-^Pgm&mhp$HrI8_@%Odhd) z2Ke!QNm2ipfb9HD@L~2;2Ww@4zD^EplL4T6bI#0Hlj)OBsErdRKy94t!8He!|?4pl9x9C+rHb$$RLqLZW)hSDWZXVqPTL$K?h^)i_nq5_1gSAd=FAw0u zVqjA9Il1T(d6TpN6)Vt+TfeE4eBO`GsXA8B)!nYvyX;NSGi4qn{TQYe&j zg7K))z{T5;-f-2*j~2NvExd^agN&N%B3RvY!dxGe?RQ~)q#G@AH8+;BqW=lMFVP|$ zPEuhaN`(et(!biMd=J~?#A&abLE}@Gk&2LZqIy;5a&&3TuCL__+cO1(0-Fy~Q*g+; zM%T6d=CBdakC(BxdN+)|&RSZ|r>A*XvG^<6kZ?jjS-sI}_uy0Cej7grn4XnqBqSHp2mb zUd7SEXPmW>MKRt-C#6f4b=4!I=##OOh%&?5q=rz2jmGp|7#*EwWCPRtajoIB=D>T6 ztC)Bu>lp>mG+y+B(Kgqxzl&|fsYF7}pqTb=b9%PFs~%uEy%UaT_J$vP##0RvJ=t9A z(!_g-q&w5e$;Gahrsp5^gO1WTkHo~XO8u3`{mD)-A2OlNGL^W<>s3C4{}RyMEQGjr zaBAY3gK=)EX>+PFtz{nfh3)i{Zk zn?{-2x0bM0``R#XJuLro)e*4pv*Ynif%BZsLqi@a)JiU-9^ujE&i?%jOUP~RiFfH8 zzu#s)1xRJtfHEn?f6nP|=k5WVW`tJ64jNA@kSkLq+A1{b$!UYGJYZPlkrr-ARj%0= zE}n6as)yaW$@{2AWkPwezf*OZRFcYrtlnY%w;IqsvpP+g z7&))8gxU~ZKI4jduYfsz?38!HxKxfqUFm(Z2%zBEIR6lWZ&H=(iirwy7m}6gnMOv3 zU5~+Hgwy!eigvzynQfFuPPi*WWE;;Yx{V_#5{!YfC%jUOfIPpjsaG{4 z6Jm_oKrItV3s!lcOe{qL3Q)^A1(ETC`XXyLqr)V1ca9ve2TBU?RMKG*1TbDOpE-n? zK%5K9yOnWeXL_lT8|gJwYv zN=;;?MVEGqH>DlGoglOKQTtvF@dK4ZT<_s(g>Y!9`_;gnyR4dqiihX4eZ`#qnrhyk z@}+i61`w0++H(!A#^p&3axDI3U@ZN(mz*_udeEkP2WOhrS${2YZJ8lsNvam_hOrzl zD4cjxipuk=Bs9!Ojv+0VSQ8Cg7u^xSFEFDwSk!d)90G$BnZ)8NA`8FlasVDlbOcSC z!oZ#i?~5*nX?0-b7eRcR<)8NAwth21uS`LMXhST4p$OT1HCq@bmE zKJ4_6;!q1K&a?Ve9HNu(38<;al6OdnY?<-KO1+FQOSP(`P1D;|}Cnc(2? zA%1anJZ$${0|v$bR9Qf}kq4c`p!3h7=byDa(9T@n!A;7>$k_F^qBjK8gMfl1jfY~1 z_6hQR9L+^S*V5(V^FgqzR*<2$tBDyaq&*Q2FISlPx9UEzzLr)6*@`)6Z;Syd;2YT* zDmd8MIWigAIvD@i!Tw*}9;7-CprTAGGj`w{?2Slwg>_P71fY`0s{B4th#dBuu*3Vn zDk-VVnd=7gx>3~j&gGPgl@ytwuVoB=Ow4K)ab%e7G!X4o(H03AgY+|`c>}Y;K!wpPpaY*<>5W?XsYyfH<*i4a z;} z7GUxcw!&uTtRapRH}6u2;66ePQd-X0_1NS0MT5;kF#LEs7Fw^~4<33|2)kj%+T-=N zxYm>1i!kI(^J4GU?d1}x3p*rS^j>73ppfOW7nZ=120|V{bxnU6Pba?tRS14HeLzhE zN*aiZUZ9#4`j1@Hx3l{n6+s;Q^GJ!Evz%r|@0^3bAPJu(sMV$+@1i50S#RxtlA7*x zi0v=SXLvBvPI0kC-APea$I>yo+nG?ppBFna(`c^|9AmW~Sd+H$P{dZ){Hhu~Dw*ji ziI_NYBg}n1+(jRX-Qx=v3mlQK7=Thd10PPNiOqc_U_lvVQckseDqR~KcaA;1h^mJF zS@SmjF@x>lrP^ZJV1l|%m8l2|HzV)0?l9}?w5T-BU`|NOlLG5hjwW;ItKK9$8kH2! zSpp~N$Yi>~qQULV75~0C!aNJ>rUljqTZn`izy!Y+g6mAyu$(|OA|_sTLId$r(+|}I zF;@^R#ChmMqDX02YD@$`!|i8_`RO7aIruSB-4uACopyypozBH;5<-Z%Ewg$MQW3O^ z9pxh5bc#q1NkY~P7s*?j9?dJ@k1m7-!%@F7osA{E6dT?qoW6g927KQs+wp>}iH&?8xn9Lw?%yUy0ag(eF@L|?S!+*}eZRm1#U3Y4DKlS)z zhv6nVPlEj|t=m`bZ}&l~^DC!Mn%#?eKvz^j=f8C3w;9P#&RpX!XO8^bH`CGgoCST2 zjg|QlZqbGjs^$l%D;^ux5@fX4+V=Lh233Er8pr(pa{|j zb)Z58vfJN1@SzZ`A{Xe&zn$f6(EjOx;IpJdhX>743&y~k3NENZkT32f$RqqYM@T3D zs9gQ~*RQ@0?8oCbFJmdl{0#8(OCCQ8PlG(r53hWH3;x{o{!tJb6w&{=2M&${-rl@J zGPw75-2bbK4lWGd^1BlbL;PF#mj)m>3V5ID4uudzuAftn-*&CQ0m0iocYws8+WXId zx1+nI1UL$KgW?Y51L`fxFYO9&1n{E&9YQ|Z9|(W90Ki4TYuI<9*ci8>;1z9f0PsS= z9l$=wOZ=>ZAC!7mHUI|%-*?{u9)h~PKLg$>^%e>o1$<|ChZ2tuj`G8>?)Hh`fZ!Xg zJHXeV^8as!bqfWK0-o{Tp{Nppqx>+eyZj#<5Iljt1AGc{!9NcRJe>x|`Z+(oLxO;4 z17ZD=ErW~yoYMR#u0ZyK_@4<5xHNe1xRa-#_!krTt(L)K!W~xPzry-0k-**k9Tp3ySoPu?(QzZ2`<6i-Q8`1O9<{BJU9V@L$C=6?h<$>+1)q0Z0=uh zZ-40iO`oc#XL_D`PMxZwEC&gN3HA&O77PrG4D97;4~8E&7#R99Ffa@-SO`5a2YVND zdly4>Pe*g-m&_h^wxn;MAZQD~AVBv2JN}E`z*x$(VjnAJ*pY%ycyaS9h61)h))wUs zb@UT(S?@vYwx%z1untE$;DygbAR6U8Y(SyFB^V*kr_NRnw8U7k1f|DT}jVA zvbYSS>okRXW}GrLFFcVJP_^FP0gOvSW(8OW^D>AzUgZ=r0tqFF%s$4q9o?1+cs{jIT_7#RMb+1BHwW?^lK@y z36G)Q!e?X%*9z&q)(n5O>mKa7iqJTy1)l^s-qiUR(uY-y)>hMgXw{^;ubYfHP8c%u3h!ZWgI&x#m!Bz+$~d^NY4fGR5I zE+XAdrtTLYvx3qXTS!T;+D$`YKW@f!e6(6l=+#= z^`j8w;_|Bda)i>kI$tGeqzS5}v)0EZ)#KMbz8=p+s_Xy+7Ahj~Q0?Cy?C-*2cO4_; zR}*3B(=c&&4<%$rk(#o|7XV19GdG&fu)b7$x%n=k&ttb@&;2<`39O>T06ip8SP`b{ z(0h-KFLma54`2G(PoE+o-0a*(B9gY^anhL>5Docj`{iKE>w0ShkB z_{F70GogIRioKKw?R37Y)h^?mmz+IhJzPmdd2^DPv|0G$FC4_S zfu_TLAO`%KN)vmTVnQIT9S2bf1!MuD(%(r`q^=OZ#EROfweMf!6P{p;jU}$#*r37N z&~AOTg>J=330ZqAWzC0he;1eh3P*&&ZVut{Q(k)J)f#L262nN|)C*&m!L7{*_K%@- zp=)OMhnHs?L>3dl%p&-)ctQnRT4&7;ulP)x8BCjEz#2Yy?Dz`Al544#^&lz4X4%gV z(jQ|kDQsIuE4RTcocjc-N0I=6LB{TA>jQCSlf{%Q<~&dRrC3l@8~NmItdt6& zJJY0v4cSRoVXnwae@Lt@a^B>P`%+a8Ms~rHsc*+Sq&s6=7vYl9ZLzZ)cqf(*j03Ue zRMmjQhct2E<`?56>JgrfqLMYac+KFU(CIv$`|*dz(>W6sda?bQqw9J!>=DesrkfZ) zKt|u_YQQ^fQ}aQb+4~8VyPG$4eLI~6%;b}d=Azunwm)CJ?VIM1^+5#3>*&;Z2AM!m62#@EGYE(k+R3PI z*rsLTRZ!a|J}H~5XO635en@gvHFUHwG+dfl;noUZ9DZwTMIbZt!J}{ax{}Tc9TWJ> z62C7RTA{SxjWpu3qtjde9*oZY;jfR8yWPL#nyM2~XFiB)h;U$FxWBmOY-w)p;>_~% z!tq17Qqm0L->_m1taC3O344&-lx|rjL{fa7cu$!+6 z-0mam!I7qrE}>2~8J5Q7K$R|f)d{E%JFug?tD@t1787HEvML!^Q-M*dpE(Ra$vJdo zE-7L*-P|()brAjZ=nt7pVW>JR5SBH{OlxEau9WP}IcSDdA58X08a^FReLH|kd)Jm> zVaSF>*AHVZ`LO-MN}e)ObYcSAHWcP=4Yjd$&ByBlUTC?A7Q8)^vZ}B{=`d0*1|#ct zrGDzoFa1lDKDpBJqVX<>O-oIPVbsKch*xFZk^wVfW3V^c>HdxkR1KIgW> zTY}17DSh7fq;k6Bk=%!IcR1{-Nt4tulB8tvM&HR% zFggO+z1Rw{xXwEV>S~*s@9FVwBhT%NTcd?#kZt#-PRH48S#Mmvl_2Z6imQbezPMBi zuYB>j+|+@x?Z+feZ3ueD0gMP$1SRis<0MlInqgPJ>tRGCZ^i!lHZ&1 z3d_0JILNf;V_UtNI1^s1gw#>x{~P281=Eew?^cGv#eu8c_5TJU1mXTTbw#>9s&$^V zm(2+Is;GUMPvT?Jw6;sbwgFnB89(+a%OSOV_7GdK&Ok%sbUpNkp4O?|8PK>{QG~9O z5cW$ZxMUxXQa;K%w@cIZ{3fP}5v8%yx=l%mI(y1@3B9+OcHG*JZwsX#*lw^X$`!98G2{EXa3dH8MfECp-|-vj|wxq z<~w-{lk8`2P17)4o1Q=3$K@Hwi%F}}6q#|p%*3G|TiCecegui{c8V+{{YMk?T~#i* z59qHI8OW)t0GzaRTUd!T+nH4OiZU}<6*EqD-u4^K}e; zRx*^|47VjHs^==M^PI3Y!%fdH>nj4$@7^$O7c*JoYjpTbg0!~_q`kRxk?$Ip2g-aj z`g7pYM&VNG+hXIq4YUyu=%M{og<0*N#^o@G2+L)Th)W-=@`koHm^e!HAXkVe1G$wlIU zhneUk+c(PHIn$cxiN|VJakL3cg7D&lZgR*q3O?~CC&Y+Tv>ghm#F)-Fb}`|mF5&JA z8nj<+MR94?tXW&t_di_DI7Y1l-^Pv$_Cf)PZL`?4#Dp|RY`HNz*UG6j1(JS9UY-iN zX`k5evjjmnmq7qE`JNkX+x5Ye`1T5t#5;Fu8Tmg%nT0lzZIjFlQaT^XD_u-MeBwaa zC6RsR|IK<7USBnLYl74kf{KlrKg1U7OER!~@zM?M$&*4xdaWlz5 zm?Q&;FAINsySLQj795xY+abNPLbG3USLLP^GVOUdC<{qKEF_p>l1dJs0>AXMqnRpg zG@Hd{F-)Q!<$=WZD}DH;lCkb5jB9+g!EILn_2R*i_%SFK{dcnZ=)$D)BZ2Vw&m3m^ z$ze@Bhb2}_zx;+r)Sb?4Nx5jz3Xmd(%govz4f29l%H%9EH$3_cO_0{=PC#YC>$J!NDnTDk|>C-2dAATH7;@riC9Xi!Z|s4?5z^>)#EO&daa>`7 z>7-~#V5#9iUO)}M$KqaE|&hOtilvCkj5+ThRg$YicU^04DP(n8=@lg5pz5`lfh$QyH4dKKba&&bs5no5;y)wTQSG)CLMb)PXUyYwTciOR)bfesi0!V+HGjpeDmu zm@kF_9G2(XId{Kn2-|iwZR}k6bI+T+;G!0AZ4Yz;Ez>Lvi8}S;;PC9o`V^vRDT=Ih z-=ZJ$`?4H&rhRZkF z2>oKz1d%#;CMq)h?ldFIcF4!kgxoZOcSUfPM)Qdk$;$O2CmZKt93C%{U1SQ`-W1Ah zjw!2KN-GHE0#}}ps*6=@y#GMt`}?V@<2c(*1~k2S*>_>19A22w4^*%;OA+pE-zSeF zY+F=2c;nozM~+@A1$cNl&EQTN}ZCa>3p_zoJ01|<+ke`LkZE}pjL z&Oeykpt0_d&5HVi$>2A+_?E*-uw_{#(y47$s(e=OkTd!#{7FsQie?|K^4KWV57NqK z4bazb?pNKfauu9vF;={b+KgpOX!#Kmq|5=b)ZBEpZuxiY+niy;Nsd%jQI`!OIEzCl zGNzX~Yj$*;#KObW5_+cl(1QxK#X%ev6(#6y^u^F@M*B$i)(SFjnrJGxX7wQv~i}07Q!;y_=I#0C3Bg{(ugFLYCQQ$n#V&lq`pX5-)d6W zB0zgDQCu14wFHlz&8zqH^r-hp)Q`rz)tUJ5X5e$#;khu2n z)-6*Oeg{y86WgMFKR*JDrM_d0l7eweN3BqA3n3G@^m84_dju*)Oy3c;BND6U)Rm01&dG_5q~M)(4rMl5i* z=hUokdvQ9^o;@0t=Z6PEzdfvR#tBFs12{>O}sQx*>0E4*I@^9xZ}q0 zMuqOZUhVq?7{H`rJOlr?5Cc=P-^O znya+dL6c^k48c-kCaPg&g@jAZMc==K%4Xmm^&(Y(epaLi*wWQ_MV_-n=we|{fcxtD zN_SlK{LJgds;g~+!XKN_;$b^y@PXf;`rgB-QUC3!Cn~g7x1ne zD-|x5=62>RKhHmP|C`RZJpngr7Y>jQ)mw>iG07A!5mGMgGuLaAPqI;jmW&3-k|jZRSKAu$?so{1S0mt5dl^FN|D(4rfG z4Z3O79~!6CsL(dlO=3ed^~}Vf47U(teqOavDV_EVf+TPyhKWgwzETda#52SxC-Bqq zd$L%#eXGRjL2-}p-UWRb0aW(dcM=#5VZF%675HRBb>mPk`m6AIUqJeOq(D&*!A*lF z<|vH1iA>|FX5PGTLRUlBeR0;dTbiS#h)c+r%_%XH&#ZeekFV8Ka*7mC?x|loYvo>{ z=J_mO3)jus^|kqYo6X$3cnM@3$)0&L!-eel5Z|fRu8Vzx?pv?Ij|1?Wq<6Jk;f9q} z^IYiy9pE}XzQ~3)c1_Eb#A^YJey+&toio~RZ#*Dn@y1bhrcMWMctBZphYsMVH= zbzSLiv#X}S62AcFwz~+>o^Wzh4pqR-8Ng}>i-w%e^Q+$<#iXoGS%k6%&p?@!P6j7hf_Y#&*=d`_3^ zX;`{$&##V%VK%ZMu3mb?zY>3=w+(x6f_V7~amz;W_OlPW7hTC3VouPv0AQS~z{Xp) zS7iFp*Msd00lK0+UkmT@s_)8i>F~90B(pqwh$eKRqYJ?8fsKR+SCXFS2Kq7jE5w80 zfW~E>EB$3Wte$ezy0a#S(tb7P^iBQEkZp0Y?=d8(=oxPDs*}}x9N7A&0MRR2Yh!#4 znfS|2;Vgxv61;13LRD1#gT+^S%+{~5moBh*&l3|+GoVwB&~`IVUJ%pego%Sep0s>;p=yDt{T-@&VLh=;AXPExmYOgC@Nr2{WGCA3Q50gmuh1u5 zcw#IW$oJqkv_9>h*NwAtI#7QZQx^E%!nLh@sXkQVF5!98yTO>1loI=NM}DL>R!e;N zSq;VP7V^+|^XQFD^uTdK%9h)R3KyXP=#*=X*&ymV%v@=8N4V5(Jg#w8Ub5weW=weR z+46Z;6w^#Glg9TjZXN>%8l@)kds!KN9(7Yu*pBl9n$;bvCK{D)lJHESO*DmtZCGHKg??rc7pDO$|)l-WCobTMnq&S8|28ZT2Z ztd^o3lwijf%|;@rhv;B-EsxZJaM!3s=lZ6k@Cu`v-GQsmSSK_a!{(|L$(vspw!EoE z-p(>Z${e<4MIRpbd-_+S`RJv9*mLeF6LX}XO@~>CZ@~66#BfT@>iS$XX9g26s?^g& zauXpULVzGaqyXaK0s#IFPZ{otEDY@&r)6n&o-5wRi8* zL?vXmssn5|-xHxSISL@gla+1wE#Kp8D?c{#T9{`vgbAu0OhduJ?op(u9XxaKKdW*9(Z!g|rzbCp|+vG`qi9|A4qp(>8olswv2o zIjKhs-0w-#c6-3HNz4+volu#A&GRiO$(rf4LB4b*ZcMA)b?eX;lx;BW{8N) zh{{+@r@9qBI1$*;ggO$m^$aDgU|H;gv>%`gOdSsSJy3neytpWcas~#YCt)el~PF=b3Aoy%54&BkTi@?C zfJ`1ctR}Kc>2N3;s%UM9cQX)4>n|4MSFaI%m*u7HR2O1{+Ax@)Iy>f{S)Qe_lew9i zi<6bT#m{8Vc2+^LpA-ywqchUo@)O^^bqruEs2@_^Wi)GW^sW8f{ze*VFFp{pcZENq~BHK!5BW694 zads2`8+Jtf5FX}@V8_lbvrLt}I6_4|VcCIp-IRzW3wCgMRD4kN@_YkrEj>J( z1v5IiqEM+uk*iJIN>mvti(7QxH`1W#t4AF9UM6;xJx2-<5t*rCIXj48lmJ1giOr}J z3I{sLEhQVBsDm5RbO@y&LrzpI+;KsBz(;#YfGdV%k=Rjkm`HOY(kyf*^*VSU7=%j;9MN@}7DcJf`uSd*0`9H9Qj+x=+nX2S84a{BV4OwKGvsg59- zA7sVa-L00o;crvWq?$MVc_8xnfvAc4ck&rKI{uG)pjY@+uaj z%M`=(6sy32K?s@l4{t_>YReemaxs#>Eoj9>nl_uJtmVe1YfVlk_K$bf>L8%Sx@Y%>`dY0t8C_)`;TUR%73r5(xhvN|lC~$32t{a|gOlU+FzWCyYAax3=M+{0RCwg< z(qE*;$G%A!WV+HY=UDOTwCO?X9zC-`_J+_B^+bCnHqXbb>-Em+i|bH0O17AEkE#z+ zFaBFv1v-CxB7xheItOng&EwZ{AmmDq$ts7hFRgHBu@`H7%*doWyA}fLW%yz}{ z;&i&Y4Fw}Zh~w)nc3O;gg{Uubf*R@ZcW8}W>jZfRxkx2$mWODnz+s`4g1S5qhn8O3 z?mGEl78%ihL-b1cm_crjsMBLOCo6FabK)cKV+Qo}CX_GK^>C>Ogxl%yM!$Zv%*v%Y z1Fn&JdXAY@;RG}KVx;gDd4>bSUn_Z#a6M?E<+yP(MY+BuPqA8kZyz4iB4-ob#I7?x zY6sxAGT?4Yznt=?@x1X&^B@+Z7O{7X>5A(%ju(C>H3{q-OIb^Xo0Ic2$j z0{nB&&R>o{uFar8`P&Yjr;h*J$MAQ@70^QOKksUIiu1II{R@c()M|Xv(thgvv@7|` zIT5i^nZ6(r~I4ypSrIs2MyxukMbQ7m@0_!v@}0H{U434%lQBR literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xlsx b/pandas/tests/io/data/excel/test_index_name_pre17.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ce66c40cda1413be7fda819598f5185a24e4fcc0 GIT binary patch literal 10879 zcmeHN^!#=uYX7mhK!<8fgTio1wcKq-z9ekVZgKQW^xLK~lP;h41LS_v^h} z-@o8}&kyH(=IpheGjrxy&t7}2t)c)62Y`A6g$M-&MF}PEJn`8U8VZX35fl^-6e5hC zguR`Mxt)vQGmwM1^K(`YTN{eka4__FP%set{~iCuZ(u0-xpfyiAoLL727|USKc<Gh;v`{ZROfd*CxXmV^b>%zxVcInPRb=M4?>=ifv?hDcs ze}?jBFA{4KzSp>A?BiR3 z>#;~UbZxB^hK5h$x+5bH<2WK4;tqg{~QYex&RE^-EO-Q$PEt#3a^?As4A+1s%Gi>bwUOmPKwG1(T%XTJV&3mEk= z`833f?T<+?pAiMY%J{YV-hP~46pY#Fr#%0{RUU;65TI^wuLw=Oc63Exq;pDLlTN7lpdhl3)ERMxvYeq z5?9}^2(4=5&D%*D&Gef|Dg25i63(xS+a$LT2* z$k5ED<5V)U9p}3@i&9p<8Wk7z<&#mFKH9u*e!6vB$Aj6RZeFAhW&KA3fl=heOOOfc z-$4?f_ig|MGGYt|1w{mf2<>6R_D7t!+dEmC*xOtGn9Tl)8ED9i2C@C`KHevHzWm9V zpw^&qAIC(uMFLk(h9HB+18Bw9-QWmPW>>u>U%D!{fW+tGtdHa_??q^4=NH}Q!ZC#W zK%>r{sngf(gOhSs!_L*MejevM=jooZ5T3%kPVOw$2z*Zm0 zyc9sWy@^eFfiK2nJB4!oIVUy!Vu`(Fj%grg?5VL!@7iiO=cf>ckR`L*gY(lBQj3uw zRxzR&LXo^RtDqSw=0aDUaLov&EkK z!Q!@2@h7d^7O8nKa|#!Z?Hz4UYz*;y3zL>@j+x{lJhr@J?ujv{tpq3BrAThCel<>{ z8{m6iP`IQJry0~2GM>YCJA8M4GG)TXD6vy@cv*{uHvkA|xQg}#r*(Z?^na^uYTk=K zc{`$dbM?BWYoj%f^({u*6Hk>d>IMii<8qCNNnR#h8?+w%aozsTZ|=p=>0#KguqJVr z_m$-`Mgyk~;SUy27Aehc5WotxgC2Kf{iCG4CHas=>^A#Y^2rJt<$sc1J&2bj*CZY zSz~Kh?-E_r3>~Zu4d*5nc(weQ`zwsCh~*{*J-X&E%NVS%0o#u(iMpcTfkoYJ6yaYS zoGSb}a9Vfzckd%M+keY7H7C;6TnN`tk)WUmesRs&(%js|neFF=>xXhBrx?b)W(V{v z^UfZMdXQfgty#uLP=6WuK$AIq(56K%4<|oY%DL=oZ;b0njN?|R2;YVWQ!ewrUQITuC}AXBcFoO>1k^8Tq@8-btP*5XBdR^q z9@lk~;$w*j{ubpk9j z#v)?mqTdF?#yoF1CEvCyF=}mz5I)S5i@rIpiwKgT)2tFc=FIp^0~1^h)pQUUCLYI) z#Xlo4E;AyDli9?{=K7AY*8k`bPj2_`sG z+E?rd16>p~Jgds-Mv?FOiT5>0lw9>sODDR&ELkG9WD->%W~j8r*wEsd{%3M$sZw4Q zJrILk@HVx@YX~-BWM>Qta^=F!XmH>+8bO!)w)B1iN+!{3`Y|+o3@LK{8Hn|O3Tix# z!F%74Ja@OS*IrA_S|yYM*J78I#UUCK?|qs(hFThjNlV1bRWarO=yNG@1EqtO823-< z1=SA_58h;PfC!5ppIwX4T_gNXK+RI%s+=ICUWbhS2U0)zN$OV$ahdFZkc(H>lHR@@2ZLcMQhwG8 zlSdmvzWM5#^-Vlrwe2EW%zjf|f&rMi>Q;}4p|Qh}Ekik0?=zeJHDMLc5odO9nxaJI&~3KHPKG&c*somn3eojkCDp_7pPs9Sl|B7ZV(ROj2IW@d zoRc>%TPxrAY`F!#FMDCS!!5FyHotn?a-fy!<$wosvxHiwOKxZK3tZ{akDHGDHdA&ru#P#!6>&!uNGu$Bb%n#J2{NdFA7@51*ASD zj%&Nrts7v~n+f7AvK`PVX7+Ks(dntHAFqWU>}VR>oPdm*6@=?LiQqkFK}z!WC=#H# zaXU9{$!%a68BiWNsachls)m}EYxFiiot zHaxk%jmTwuoiJRP5LXlCVx_Z}j?>&f!4jGs)*H`RC)2C*HLn5gK=!QAu= zYq$wj>*=&aN^%n!r4vrJUUnJ<#lw|diOcFiZ>+E;<9qrdr)!wHtmJ6+4A-ToE2l~? zbDVHj!%R;BwWR^rH?Nu3->|$S(s=JZ3eny+i1ubPM7*t^?ycv|W zjyT*FRR`;UL>Ny2_y+rIqo89yDiWMnC7ZtBGMw?WBNr21xCJdKG&8nqE zeZPbGv?I(K=yklQ2DDwOy`@^VzMO}ixeAH@qJx%7hR zsJ7hbn=kjrB-ag9f2Tx`@h`6Gbqt{mM5}R3>CrGD)V3PNsKF|O0?JDS~ zEVuwd>%pCqB9pt>iwfhwbUQvSntbwL3n`Z9#KL{JfUg}bSfK*McsyxJagMqWFbI{}N&=7f+Z=wi9iwahRz93(nH z$|b4ffxtqAi;t$t2bdRigORn81d?_ed*E{g*ZUr_8bb|CW-yOW<`z|!Tr8`Nxz!JQKV1+#^;>Gtj|avyEx%>}%h+@B)!Gx_>YvC1 z$5^)?d*)nGdLWK5C@6#?oz-~3l!n{`hNIHS%}R7~mr>E3&h{B?J zJKVjF7`;>vkDb7*qXtLr8^gQC^ftZ=^1CK#Y!-DWWxo^FWEu+f!7)I>1yxMB`(DD> zw5V(2Whs%Z#W^CLNcd&RF&-7*Z>3QULN`*CnT! z=Z08S^&w68MNjcl*AQOuQ#TOqI!l{yMmqPNbtAHWW_a!`)%+OPFu8cQ%-z(hkz_j~ zATW$gSoAGAYeQEjFMp`dXpepdZ<{`fBMnvF`nYhW`;(c#=1@dLG-YA!788m~@)be! z;-pDHYz;xsKwh26PNw@;H1?TKv7p0l$rj2&X9q2a`zZ&3z{h&{ryosF>4GLABT{cp z(lTuNyd6xaOvCvXgeM z5ZIdcMV}Pm`k4Q_eg7SVy8k9MRjn?}cgRQ-B!NKtBP(`x0oj;4|6p>R#)Vp4MIJM`7ArFnM>8!$I?GKc!^S0DJlBcG3MJ8|YMx zv*1pbfC40Jg*bOpY6ktGd<5*vSr{V6YZyy z(lgzG?*&%B3FNXUEyQ+Xd;`y6w1Z~%5-2zOSt4+a9ByNP#76J?8ziF~6y|bo z3as9X*H0AC!&qh-A5%KT@;Q^K=r}V2 z;Viw}JKIcH5C}Vzc#}`+1&TA@wxn*pXL?o0;ka_I8_84T#}WaX9VcexOq=_<*RMr0 z0-by3s1c_vp6L6ytV#5r3pUYQJKmS zB)`6}*ZM~4pCJ?DGd?X5B@DfM_{1*FJO7Qp&Os~{JWo-zz2@U(N)$_t ziO9Nz1#%vB7k$4%TI-%$%x|xR7$?OFw%@hYUr=Suk+@jc5fi+)ywDw1J3IBfvTAD{ zq4vXLwzyl*>b(;*sJ!)Xs@JbL0b#;xwM#2+iwNc|N?=S9DB+=nS!iDLoYO9m`Y;d? z#`+c^kC8d&%1o|2L5CF)!4UU*Ld)sLBNX>t5Iuj&rRII?sYkbvFZl&~hw*lsT8j63 zc14wu^}MOPvCk+}0asyEM|p6$aU>J(eb+N zH=_1J09#lZQMRv>?||clq6T0IC)xx&-1&%ryPiij5jT!}S-EkX3uQB8W~ijtlnkSp zs^6Sw#cATM{5Rz|nDF>GuuOYm{mm z>Lzlanu5~tX~HZdSf7-yl*y(%f*}uBh-P8YVk}c2ECdBRWd(em|3Ddov}2Vp-Ye-5 z);XguCxppa{Z&g7Pw$Ys^tpC;03C_F*)F9GQnOT4@WcnsQ+$V6NwdrD4r(5Bf|EUL=o8qRLC(N*{e3~a!=smQKw-36Z zwQa+E8QGFQv#%@qa_fY4#gzxFJmE0L=GYu~(T>3fnb4_$H25vFcZFJ)1$}s^mCIM! zP(iII6P6DbP1`V{u!NdiGB5SxDPt)k=lOO~BB`IPWGN67Gix=!!o4i>v))t(vc=6H zxoysZwMWpbfbr4rEYCQvF$<*8*QZVo4Tbz74TSuhZjWY=k3JEXG zNz^d)_h(;hvA%S~oBM{xf0huBnFgPHh_#u1{FIC#D@5|#p;td>xtkGc-tGjkX&e&>L7^L%x+19JgK3S`Hm_i&OB;m z6+2*4}9{a#$+H}4Io?gnFG?Rc0!(Ec7DqFx<&x9*JR1GD~4&hgh~Ae z1UHYKeT^a$#jT7qUyquxNIZw>9?i=4izXUnQ{uH(dSygYs0^NZnRbNUeC^x>S-Lno zWN=g1+92FY)VhN)G-K&Ua|aD^x$mUde88-U3}o>WGM>w)!lguyfhOkI($M5z+fD)G z{AJvApBuIJnU$PU%UHDSzO*(TPZZ4NfaJFJBV5dyGP9UvWQL1X4J&2nd!;yu#52*z zYhm8Ax|T%fz_@EvWAp4O17F}&a@zBB8S8{(;#glap?L|aAeJ;#DcV}5$(SQnE$AZ? zd{5mqnvRySn8maVTwhKt6?8?4e-XJaX zSx?2iw&~Qqr{EUewg+wPepd4g1cMJ?g53`wuuFlLerlMBh0nOrAuc7qR_Sle{ecvd z#X$%)ju{3e;f`8c`HF4p0Y($f$mxfOOs`kl_~L6prR!I z&d09O#m8a6-?$JecWQ;i8G@UQlM)}H9+=%;Ef1pZJZ|p4D$*3@NgvfC+urF&(RRBd zv`)y7xE@g*L(K6hEXOR zq6v2>Yy%3RC}o@Ng0%{)JNq2gHGn>6H1XP|Q%$oG-$1$(4W#o`X(oi(CrSPyt2YPF z;?Bu{wXN-1Ft=I(#jC-MY0KWt+uggWW`aE)@x5 zMGdPN^=1Mlarx_`!*5ZRs14Ebr?B61NV{4rL71{`&wjERK>~O!I{m}-pTw&#ti8a|F7c(Ay{;xnDPe-g3v>{ zyRc9%lzlbVg8jtdy>)CH3$7(Z@A-aYT7S2jiwUK^GXvS_!H@nS1LxXF_7YaHGCuLWKMc;1M8=bZ!9pBro^lsscra=%dJDYv+RF@#&hwBEdB9P*+T8;X-?>}U^y+WU^c7*0V*A=JeDJNb+q9R9~W2$VI|6&Y-CXJD7F&n6-fbysZ36uqvN|ZR*okaWSuxds!|t z%()gkTdg~=+CQFJqkF+UzGl`s&&jhLI^D+o9%-)=5-B4`lEoA{Dxh ztg-h((>!uir)lQie$M3*-?Xy!th~OaO0l;_4NvUIS!^L-ht>9tq9lW^Ze89$AL{V3 zi>(&(O+Mz+tiXCkq78au*BW8|ULFdmtNFgi<E%ZzN=)9ob+5NOD(96yNJ z2Pave02H6!7eQDoq}~#1Eu*a>iL-8 zW6%{S#qZ-JHPIBDh#$!n>R+6OhGBtBLciY__t$^;*Y!8s;5n=Rn9z{-^W5dXx`Q z9=3sgp=hE17UgGC=po9(BKy{e|)YvT***FZ#K7eu(n0F7*o~ z4euw)uPW6;*N02>U#=a5KV2U#+#dow%$9xu_)`4w?1#D2Lxg|k3BUZIpn55xp#GIL mJaqr3(*NCEg641Tf9k%90z8DTKgxG#P-+m$<3IlK>Hh!~7seX^ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.ods b/pandas/tests/io/data/excel/test_multisheet.ods new file mode 100644 index 0000000000000000000000000000000000000000..39058e67b4d5bd7b32a66018a2c78dc9aa256328 GIT binary patch literal 3797 zcmZ`+2UHW=77e}kB1MW+ktTv5y(olE=3n*12F zqydA%Ajq%)2sj|X59$d{}c zh!m3(K>bIAa{xA{j_q6u0APi*&5xZB$S^+$LM{~M*J0O}TA;={<0kbz`s|E2lh-9< zKBhu_tW0gw9z}{mo#O8X^$#}igpy%rcgzk}Osv9SBh%0Bw&3wsb~ZR$*MjTo zql1Va`daYz{adb|ucdM*Gq#z(bynpVU{d2(9Mune<;wukHf+)bCRzi77=T zm}j)NQ10&L4mLlETOG-)MzK+x#7v~NGaaU=-r$NY4CN)dU%C|Obh&e-jP@X#RUs(- zb#!_+#cxMKZ*=m#W{pdOJnzSsWF!{QU8O1imdfq1qd3V zn84dQ=TcqLtY5?hr&$u_rnKJjcU~xi$%=Zc119QD<99&~d*j(cT#(@9 zZ12{R=?}Tos)Nl0-$Dy|=Cn(Ua`y>jXBb6umo5&H$q5zRJqymF?L|2@PB{WLq_f{X;wI1H8;pr9^^yVsX0tP6$D@MS?pW zPc^niqvUEkl9Y4Tw^M<5Ymt4pZ2eAz+eVaM&DNuFR!O`kC{&HYdq`QMBzIiVelKN< z8rtKSP@~WO;?~i);M9hK&q5rX;&V$|$5Vc<$@2${nzv~~4w}!ud5$mOw-M|m&dq1g zy?LO^oA9;u5F`MuNt|7E3~DUee(iNJIfJ)c8Eyys@*WeSoJvdfg2`}|DnJMKG@;YY z@N>H}yg!a64-t`BJK@)^WXDjKXhts|;NIaI9qul6-&RmihPt9Ii~7zjU^|pWzUSK) z@Qv#60N2Hs&$}>YCmI^yt4(sUhvXTP9CzAu>u04D)I_gWew1vU-fohyIiI7QK558uxN9F z=r@K%^q|2c2wqSQsgag`3o|=dZg(;Vw+-XPJ63axqDPpB3IwD$debM6k?C|up|0?K zbNz|Q&95{3ZxrUAo=^e+-@j7;{#U;;DiFqLY5)KW1OOOF`t|gOBYy~kX94Qr&bP-lJfLI_6u0=ID@+OVr9>J8Ay<~%S(O3ZlO6q+%{ z-nqBGKPP%{dA(wQg--Leq-vUD6S8?L17R>69Uq8lK%O4#o@qkj8{43Fq)d8-!^EZ! znh$q!q`CD2b-2$l)YBPJ(K}rDIICDHzsL|n;57E%LbhK_I}quli|K1eL&R)ZWb1Ir zxR9{o%BkJ2y&UUdx`J`*BQ=%q{A6JGKr-%d1e|)aJ{#glTYDPX;U^UNkyD=$Tjb@? z!RptNN5fa5osBoR<2TpzdS)X+MfLhY;cEX9qM>osMu3Yt_?2J&2-b$DF^+wrVtNO! zX`f}*(37`NFL>MGm{wt3-W+*ju$WYFnCDu^KAvKt98b!4fCE zzLtaKgp1Bb4M)bAsCN$Pkrms_^aRKO-7Oh=V(O8t_h|jgK;m9*N*mubgW3jaJ}FnQ z)@oIvw$Ailvi9Vik&xPUSh41c`6SXC)P4Td9XjpgagRcdV~Q%Hj-Wf}lv-ay5ch0n z@p$*Or2)xFi}cbUCv(=mYX$D~`$n%zQ3eZ6A~QE%K9|b*&7<2?VXatUWGGZ)@A%si zIiKgcH#{i}9}-TMAdRZ)JKvqm&przio`ea(zmREiS1n~eA^7qKf=oo^ z2-86HP`W=YTSdh%G8+2)Njj@T(iOI05pC;+XrQ?jbL4a^1KZPrE2C!S0fK%YfcZU* z-_K73#ROyz0mXPGVLamcSP<+&#eJ9BWGK%^jfDaK#RcYw9=0$5@Rjo!o#NXQB_RfZ zd?HDNxG@NZ!$h%jvv=r{)Qn$%`Ml&NbR#A0;XCA~qV{f!e3f_l^Cz2jKJ*Xo&&ZfHIo7m2$#v(~ zB$H86|MW7T+qIgY{Eez9XIZB3Yj-|&h%&6!|IrY3zk(H=s1P&Pf^>)09Um@EJ$%!H zdnoa+d1ABNltb~ARe5(_rMmB$%P5u>LC8bIGo24sii=B!BhfPTnGFIfik|)oDPiep*7i8ifnL zQbM*2Z3lfGufuZ?d%HXQK_>AB_4BFS9tFn{@$avSZZ0Mgjn3_8g(Xl~4a95Tk}aF6(TlxZ;f(c7*nO4>mpi!w7o!*+tchK*bEd$Bi#G0>3eon)<&3W zcacQ`TYt^cWwu!23Xjqfx0S5z>pAD;Rs3kA;poUF26d=hF8R)?W&JV{fjo59EG;@h zWMpwpS(d6Lo=~b?%Y~nLH+#+f!i*Q)=?RzYH9A#@Ee0XWxPd`sS8&$GJ!R7>E!qU`^OqI1s+Sgt&)JdB zV^CU>$K%Z5_r4Z7SZ?(9-BSb23|15$Dy)0kHNnbe00RS%2 z(@ft+SJuefKpqB$L%ku0KgwZcx~?yU0;B$3{D<~X?T}h&iJ~+2X zoIHb3I+r{}KWfy`Ft!Sju#e$TN2Axo8Yj~|m`rrp_Yku#GxGxKPg1VB<=emHovvHZ zP)dJ9u$%3o@7VOH|RDH5ZAeDxEoPIdT4ME*SPOZEr3n0^x()roc$6K9Y&g}*O5-(yW@j?g_Biu$-5+#NY{!Hzhaci=p=8cW&FT=)$WGQ9SL0;LVM=hy4m418xrINCN{l_ic3(6Lya#ja_20N+7T$^e z>;;DteN<)Bv`=%so%uTp$;f5@uuY_Y0r;mu`?31-`rEF7On$KbzXkirupzzvG-SW> zer^4J@;FJY;(s-QzY>3y%TFRNsfGMk@%$R%*EIVXLX44QO#ezckO>v3H3a~uNVhO4 KDIYTZnEeNEl)i8P literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xls b/pandas/tests/io/data/excel/test_multisheet.xls new file mode 100644 index 0000000000000000000000000000000000000000..bb6cb35e34a578f62038993e982f83a3df7f361a GIT binary patch literal 24576 zcmeHP2V4}%((hdsSb_?Qh@!A$B&;NnQxuS30x@6?NLU0!P?G4$lT$b`2Rs!-#ee}$ z@XYxPXFB)Hm@&um)H6NLQ`xU-X2a}k0N#7w?|b+CZlGspYP$bbU0q#W-P610&zas` z^Fy0^gmrWyI;2iwK=c@N4qRiR4iiFL;R5gLBoYZtBm^%1KK?=)_yV#Dq49Je@gV6! z!Zz23#D`=6$q53=)_CF@@9|QVU3CkXk}Ahtvv^1*F!H+CZ{|)D}`Z zNLG-nAqgPaK(d8o2gx3i10+XCPLP}-wTI*a32h9MJLCE72S`Aju-*;V%Z{m`r++RFXi+gp^E@1lOrrK%Id<`atYID1(-)N{^mO?{rBB zaEPO15CNymM#|#{{AG1-D1X$K2?`B|0Uvs?o-X-zoLK)j5l-dof)R!DUl|fQhFJ8Sl zZh^kEW1#j-V!iEVX1X=2nhMk9&sXug5$NHf(Nv0n38;CTz5l%GJ)&-J6 zBd8^&&IF({SuVEjH0QJ>?x1x|Sb-Fx74K`RMYISJ4Go9P>k~mWpRxeT&t>cvffJv#dH?1G0K+1V!;t0rkjAw1f zr3-CG3hdnM>^!qa)b>okA(2(1VQ!6f2%9*@&Sr zlP%^{gGyx&CY!*TNwh#(z=f7HTF`|-lFF-0v~{ z!IcF{KksCQMcWmwNGR*Ag2KU735veN)O4BAG9$vH^@A%F6#X(81DCKMLF3?RhH`#d z&$!y5jK77#*9Bih(f@!9)FHF;%^K(x8t9wU(s6}FnXk;Q4AkbA`>|3}Q*$G8s4v|w zaD_%zuOF>{w3g`sS7|6ZOP?_5Q%l#E(JzyJgyg}34qabd8KQ6~IxhIobX+Zx(gkn{ z)nU{j*qKKwEPJPwEVPP;oO1pGdP9C1N_WXz8RT{0?Cg65|-jMJ(urJ za+T7>IN-8Dqc~v28&v>Jq)`RX78+Fm zjigZpuu&RS0L`jV1<*e)OIz5yWGTL;{7LqN2*4!#IzfF#;m2j3(#07QH1;H!uRfM{M@Pv1u5H_SUg& z2nhYcOLGUrS5o!rgnnbD8N|L}Alh5Up~>88B!9Se(N3HbW&^AW#J$5L>9Oa-lG=!$WP&%FD}DgbD}H4#VY3fpX+1nMsby^LT$&Fb!bT*M=q4Ad3Y&-l!`Q%hva!F1BBh%edOvvUE*X> z$vk8>CXtBcATk%UZQC}L-i|bOZ$}!tx6?FPZ)XeDQ7#m!2R(yYq15?B%*^(SNMj2{ zyBlS{NYi9Ou`SOVL{V6til$U&-kp>RuuYYd%OAuV4*a12PxD*`_0I09%ti2Z6gFC@xNvn(4WiA4wMD*eg9aJS$04 z;svkA%3x2707fMN34(nBaqy-McL|^e#dqSyM4u;0sD1uHqKnt4S`g6z*E5ilsR}1A zRZg~AIniB(;-o7|`26sSdQQz%IC-mba@5KRtwdTUJyF7o7cbOvYN5i(N0pO{R!-P+ zrJVFd36eKr^_mJRpsQRl@kt;Qceb%?WHDcR$JfUON>|YHZJ5D<`Jy zd3nFvny;Rdl?o>{w&$#s6Vvv*yicD#t)7#$3MVzT=cbht)Aqc)CD#+wa}uaiW_Po3|pP#R;PR1~?gXyt+unl4jw(TVP8i!Xx zMy#k&4{E0X1^Z%y+OR;G7DdR5Tu=3&_6kriB{rx73zTU*gy77Q3IvoDZ45Dly!v1@ zY|!>BP^O(g&pJ%hgE}fe!9duc?krHINe~i}{*!u8Cj}_j0vptm1~Dhg_n>T1dS+5ahG;xQTq@99BS{mmfouq{(y5J~86*R)$4dh)3}8{Sx)=h&ztq+6 zVRHn;dEL`e7xTYTHY!dVi4N&5-9Ji?%vczwoH8Jsmr8+*V9yb(0tLsU#AT%{=1)?_ z;+jkJU<(ZHMZ~?LDbT$7RP=|47|Y8LaUq04)PtUd4{;*Kfy~T1zS5n6(p?eP6n5Ld z>kZuGnUx|Eq$E8PE#^nC;p^I*l} z982&*xH~ulS^@V3XTUz;80s^Il2aCaQ3e7_S2784eM{QLJR;1~ku2srDl1c0N5Ur& z0ENm-0tKc6I37h@9_f&$2PK585_+J1b_oF)0mJ4fc}b5v9+Y%umE^&fIwB~edGIiB z4D6IeyG125fyfsoVgp3uW6~#PK~<8K0l0?HTDakn-ZDNBfUu~pygTB2U)K9@uCC~P z^qs8t(RZ@mN8e$;=!kk8myO2~n>ML&R76DU)TllGMa zkW=A4(IrXS*obt(+D0XT(Uhq)wN5jrZ4{U&Z8K$Z6n${HAyLT`3a5x`O#E=rpil%b zl@3-GP{#xY4F!xqI(bHuV*+u}Md`yj96j74>{kj{7~k2jrYu-`eAfUg3HV<8E0{UL zc2>Z`2+xLX&Vr>!cnz?049ulqyC`5`jPJ-&LJJlwj`7lt!_X$I8W<9Pux1jKmKg&B z5zF!Gw1zrFfdQiLz`n<$T)k4?L*tj}kv2Km9XkHDq$_?qXz;W2qy);M(ozNSB0(HI z+$`o7E6dGoC22(l!x(`3?-L-&Yzn`ZA%I=SXmPkIq8N#oeL`(OB3X0vdTgZc zn0-PIUIj3p&F2p#^BIdY3!(m|@D~NThy?*e&^0sROC)#|0Vy8D#V?n5fx}6H$O5&d z$idv8?@JcLS1oY88A1#A7D+8%k67 zzAj-b#ew^to_O-Y6TZ%kkI58C;B^q6SVP?~UmV{Y5?n|RbLl6StN`>Cj3>a9%mvz9 z1`RVW<+~d%WzqWe(czhaZ$z>US@Cp2IKS&!`ab9KYiHLk7-c=@osQn*lsVVzY$Ce9`OvP5a7lA+K`QT} z_}~Zerjgbsf?jw!KG}S+^1yk+(|?}mKjhiDU;1QM4~i|*(~BJX%+B#(Rdw~xzj%%r zuy#Y#vsGXDTNI@%uv>pi-+uFsZu1YP&s%9QX7~9ud);SuiR@&+d-xf-l_gY^Y~NQHyiagz;M+OE5vS7opMAV}l1;6f!K-a0 zPuoOUn|y2%cF)5t`2M-KEqjK|`6znqec{mfmB&xqtaW~VYV3pKzeSQt$*j7wK5y?_ zi3U|jAj(!&R{52UPY%NqU(h#h?1@enrDSvv;yHi8IZ%@E%vS{mXI#kbfAN@wLw1Mq zg~1)x<&t~O(-LhhZ(2UT8F^CAa6dnP{i_d~yVh0B`FL;16M=b2h2F9I!2xFy?`3!W zaoY6L1?x9D@JHlK_;%gbH+M!K=yzeA|1X}F+G$C6(!sugcyQq+QN?Ae^E>x^yz&LAqFT{$lb4#$fC|orgc}kv6Jcu8fpLzANsSpRN=NfB z7DpH!6PN_v%Uv^R;q^{&`<+8$x1Fil|4$NJ+9v2^*~Qg2uReM(qtn?tW;HANG#Tbr zs^4U`=oew(?%$`nZM58Uqi5UWZYlRpwtW07$Kv#kc`ferI+f8s*REz^!Iqu1f-5^+ zYiwS*)sEh4pWJm$k0DPleQ19_^tr2}A=gj6@HcYrr*DF>e*|pmz%IiOo34TfHw_QN`ka41@DR6D-X(f!v8y-Md=p7Sf2m~A=eyMAAXbnkBV zlkO}_eO^?ugYZ%HFz4yE@1yTt_b$FR>sacLu4b2O``1y}_0BNsW^cdQCknUmJH2q|`wipW{o*y5d5!zY z`Fe0fm%CFB4RLd{OAI^h;Cs^az>2MYA#JT5Bu#9)uEp<`7h}t#-psbKT{J$_nYX)> zV+*gNR|5C!_%P(`elsh(RfXT0&iiz<;}tjiY=2jykZs{p+l)9_=sW)DSaDyyKf3L9 zpWVD^hm!1Gb=Qx7ITN~LS?m6Xjt&0d`RO0ULT`(6F5AxKbQX8XYJ2td@V(Ov%BEUg z-I8^%YT2hE;?w3x&w;{<$lbMxlWrMT=3QK^cW34Bjl-|}K3npu`IDutbsZ1z%Ij9T zPpy6uY=+l4lZ;;Z?VSB8F~^|y>)Yms1}A5gmVCRcEN^k*t^(oS(KXTYd}oGRboDrD zS#_y$kk^xOX5w$BX3r8jy4~OZczEH5mwZUNgwpvfBbpe`}zAh8#gXkwYuA%kKVn0 zaH&VY#zQ{Gbj%_K9UF3I+UpOyR}Y`o_0!9mqH6IUA;%?km&X41<DHZ z&3x{@cUbE<((?I{{VitXFV7p&>Vr?!7s2QWu}?n^H%UHhcOr4c@e^cZi`@36R&HxI z?|qxI=6xFB47hMU{x6eP|7QwTr8J;r#<~wG8v-TUrzs{eyczH_dPadNko?Q*>CQb@i zd!}EH6Q$ztZp}>NT(%uc_($H+y#_a(`j^E9^iJ>yU6fTcvBl@YXRaas!B+itc-Kzi zU%8mO%jA~cqp0>9H2Mw(-=I`Hd)n?XNvsXK2e%MyKV}z%t zaMg?^Kin?()U0==zWdr07K4vx&ze$w{#DBxb$49fRqg!z&#cMr$IC9pdM|&u=TTjk zTYJB(GU@$kYOmKZwv+A}n3YXz)}~;k|CIZm{d}9<-8l60`ER~maiZsqp`H&`n^?Wm z@tIls^>y>U8+tvSQZh7S>EX@&eJyj1{72>YNGV;_`HX1d(eUGTK~rl-r@zhi^ZecH zMa>nb&9@H5clplgc1upoCrA4wXEU1*(wTLmqCc3vu2y)Y&Kb{j|7= z7e>yg-Qat^=ODv2!<-^+R_^leWKJIIqy{oYAy%yIBPd=OeW8pUO zgZxSTny=?9TU%5S)YEUid+(zg+`nDD@td##!S=h;b$RzzI$R~gCSN@jbn(6YgVVeP zdrGp)Lqj^}@(Ln%9{po*NpxE`x9i-CwardAU0Eo+KZ3tu`3ToJWPUI1>YSgGd`|KX z^?uxa*Q#lj1?`JQz29C|8l2Q+?OP&h8QSu#nXue{quuH^7S55^m)n%}?zyeIU+HXa zrQV>*4U=nne)Ze;`^H+%I@Nyd-m2n~io%Md!Jb=Y4r+a^@X_(v2aYDUjCEd?To76Q z^}uvX*DW`%IXxW2<*qt;u3y%<{w8M0&(HcKPk+73*yxwD!{6Wdq0Qaam(LEgH>>$D zIJMLLrC0fJlg{U?`MFv4itXklj+MFPb9Wp#)Gy8A+V56{J@gMf5;{l5yWFc2gl)C? z^-R}ZE|yzEUcOC9I^1ELY0-u+We<0?{qg>v1p{hdn3q-zbh%tRc<1@_+T^{@m-bni z{m915+$vwx>0sw)M_M`U6ASyrf4bt(d(WSnvXg!~YFo6-KD5`!(QB8uUU!&%AV2BG ziJN9~^8WPy-Q{4|&I|uEoR?fVWJ2c`Q`X%3`bGDXZ^x}1eR9(CdAEaac3wJW7VF0njL{Oxv+5c zE*R4+Vc5nkQ_QiAjz%a7V|bQEClQ86Jb@Cr}Y~mYW z-~YE4O{(1Z!kICLij(#)TY0)goY!s7#ELfj&dw1^?tz)Lo zPwVue<=91ioFg3V2RI8om!~&%>RnWNesWy?m)%p zn1sW1hxhB6VpYh-IMhV0{rV^au;VvHX?M^kBbs9uWXD5_g60r6GaG@eel`+2I8h*7 zQinu^Dr^k45hyUKVSgM`8Z9Q#ua7RoNH_=5C8MDB6}YvX`b7@xmZe%IX?&haqO!fR7F<1cvd$>+_lFGj@n3y7xZE)?CBI!~u4dDIky z#&m!MN%&n4&NuMn2ux4t-}{(B<<6(hj*vTnIvxi&F|M8EC{F|0Gp^Ubiy00ea^TE? z3EIL|c~fAb2*;%W*=OaGL4a*Ya;fPS>Yy1I8;`mNm2hhKxb+<*o%m$m{{G`x6&b2%}7uX`3Ef9paxMB_JOc? z0N02|AC6FlLvHv}>rwt{Oq3o@Y(XUM`M96AeEo&~i+a}K09gY0w`!pI`42K;mj zpuobeG9ycnL?MfFARq2@U>ON!{#T+!H?B`BgDdfd{X+7`2(GKl8*b%~Jt*FmU!)C)^+sE#c^{^DgNI@A21(_mX~~s% zL*vUI)8KOJpurWqJt@)TI=>3P^>G&(JO`G}rjQk+h)jjQqa1%g1REy?lr^AU>1!;A zYD7BP2Ud&D!+|@vhSi)}{chamWEWP61ma10oZ_33Q`90&AVr}r+kxOZh=QhuK3Yeo z7`WB&$MRc|v5X}h)Ce3pA+X^Y-v5*`9!c@#AO5heL?7tu8{+2Yg%Pr{_g~4)WBp|A zUG_=H-)>&kjmV&ffs~tv_k8H)IRyS$ZvOWGwwp(L1+%Zy;^z4jEX&QKtCevU0B3Z2 zl=DEc%s~S(I%uXZ#ZD&kDFjM^8^msprLf~Ny*{6MEbrGc(0aVS2_=dKuOCQBD)ss@ zo=o?SJav>sf?K)7G`L(Olo-8&tU_i`8V5v%n^U;=F%)m9dzbNMx_1Nzzr$1q)4kK+ zO1zow-2zGjUnXybd(WeIgMVUzLvAI>!JjKXR@9pYSK`fd?`?rMYiL1VC>>g)yrtfs zZfB-@M{w}xOx{fQPJ=7)M!zf5xd2K-tRt&8y7y~x_g+CccXaO-kO{*Z107U755)yH z4wn+jb;AWAfz+8kX?ESn9QzDbg#2H)?w%0wjOq8c>t;sEf9(kU?pUeomPY7$jd9(| zfinj2iq3(qTWf?)5|J~yZfXFK#k|(gQxwhtJ4QVb`oHe0Sw0K<7VS3#AlknYp`$2T zBJ>8haQT?Tj?e=sk+ekUG&tqTup&4F}X=+(;L$ zAwtiicw;=u;H|-h%Xl}^h06yYE~z&{2hE}brrbG11E_8^S6PIPV=5h?WBXBVUKXKK zB?GB5ebVga4WJ})F@Qe~=RX{wGbH%yU7}ua5XF|p-&aK&1-qoyP;icdG@ORu=ikTw zKMmBu0=AAJT7mL`l)DA1-+YWrG^t#~C$3K0uX_M(4pJFeL5?32kL4q~?$^ zA>qb|<&f}aFV;iCO$%Egp{<#~lDHLoVwv8egoJd_Wa=K*69>+ZjmdzWZ?Yd51rqfP zUvEbw#igfZq$OkuB4Xl_QZv&s5(S|-uyF{U1wzW}lzjDK@OchiPY7;Rv$Nwx%DKX= z#zR~Exi!DklnO0Pik3nB-9A=f{qI7;Z#QH8A#9@}SFAVw&_X;U+;4#Lm_R~3V7;e8 z!Z+OckWe3UA)$V@Lt^R!b%_V|Mf{~3#KJ?iL&6AT;cayan}{K-Izk5xx({C9USls=&!M7uz&m44UnYF6ubnAR;1eQ z=dU`!x8S@yJbWl0;}_PeJFenV&x8XLMWRfhU!x@YJ9A?+z=2)dX|P3Y0*QnzY{`u& gO6A30;bnToh5%syX*?&*$c^Q%8R=hp@>k~nKkY6z=Kufz literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsb b/pandas/tests/io/data/excel/test_multisheet.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..39b15568a7121f8f5f1ffd850aa958b19203f858 GIT binary patch literal 10707 zcmeHtbyQUC`u0e74oD;2jUyo_jRQy{B}fe2-69QwNK2P=HwZ|V#L(#gBHc)cAn+SK z?|FIU^_=gl_5FFythHxP?EBj5xu3hQtt5wt1O%V}&;S4c4FFjph9Tz`0Pq+E03ZaQ zA?Qfh+BiXMob=V)?I4a%Io+(S?qne$FlPV|;Mf1}^z*EODn~M8MlD)Y-0fDO2Gh8+4S#>nHlb#e{Ph&YeqDXmPXPKjw-Gc48q}&5EFXyN-3R!6vDJ3mrA;V;0 z;A|WWN26ZH#=98H#1(H0dwg5F5mY1T3heS;R{Jtm0N(;01fR)yEpX>7!@^ zT~v>(g$*ZZ=_{kR2?Ocj6jzZ^db};X(WfC$OaJ`9fr> zL=lChBSzb4kIknDDohLW1433sr5+df45YvCMB>-V(~ad?|{zrr&wl ztdFYnatM*oK%U5k)=cO8UKmB%1#*kGmDSix#!ObpyOy?PL;&FG3K5|6i+4--eeU`S zH(7bO^J2oiTi*d<<;cbPMm&nKg}f-oo%$ zaTghE$ZI09=^uVQF=oo=ucf@$JA|e;!3M|j-wQ~xmHIg=`x)kSX-=u{k2IC0J?xSk z8fJdQCg2e8)cbjz_ga(+Rao<}GKA#D3Pl#mo0ppBrvD(I|>puK?O1j~Z-lnEb(ED!|w_bO2o z3n%)&q|Sd1U;5wuq@V4!{2h1})ItIP$N*@!+^o3%;VQ1S4wi z21VI;7>aeh2JCo0hYGSpq`vJuF>8NYSsEi6Te{k;hN?ve9djCl>0>j$;-4u6F1oB> zO#7_+AG%xXs7JKR6wM3sUj`swRqs9|8$ngxusA|6e0eIM#b6t8%ZF^$k99l5s{3}oekS6;f(VDQ6+ls$1)O_WIcfHYU9SX^(CN$LM@_uiq?;MXg=!=z3tY~4R?Z}+c6>jF&*JE%d zYn^ysVfJYGpcm0KOcWaa7OUE&K8`r-5f5Ly%hFR_i&k#z>pvnwlSKl)KUl zTd|+Q)*m`Q^5I~~)b&JA%=u<%+t&IZ?mEgf4Sd@P zqHkA+UvW~nej~ zyHaZ|Nl)Y1qZ6Jj40O*mTwHUVzI)e~%H!f_%ntQ;8sL!6=YqmluMXD_$=@_^y>@tv z26sHf>uyo77&B8kpw$!4%nb=qYr=Til?}Z^)w!1M7dHPrm`Y?z?uaq1SwoTaz0^S6 zoYowFEvc-F=}B_p0=s9iT{f{k8x?d(}WpqObMNxmB>tLtoUI z0mt0X$H(5GJU#_>xv$lTnzWpDE%b;KyuOXdVdrNK0aLzBYcB?!@LNx@4teWdE_(Pg5X|l!Wd&4nOw(f)mJ*F@TP#}(y^}bR?v?6u*sh}5^6%@^g$(>8unXSg%2t$5CZ*xO)DcwK;IF72 zEEZI;=HYtN>pj};wtFr!(1J~3E6?YS>HdX@dy1~6|M@nF?qrtZPKxxnJI?L{qOUs) zXe&kpP|qr6E?o|&h4eRS+scS+-h>qZoQ0RQ0zj%Ef95Mxy*2Xh-! z#~`Pl1L0FVr%L(!#aeBER31$Mm@2(wQMuGdBnzFd zUNLaEJ*X#FO2}0nX;smB$@$}2UCP+ryOo1McV$<~laFr)guLGJ&o|BwS?dxwHxdMKUso1sL;obj6-=Frk;Tuuz#E=PtUaK^$Ordn;7@>q8k zkT*v=qgFM6d(JwB{dV)1HxI+=HH-Z-B_AFS*(QVryYN@brY zB+3cmw3w)uz;a@f^jFDbpBUacmh>mmkmNwNQi%=}`;J~n6v&WuI`rAU109- zk1YImUj*uKQ)I$}u?pFbX~@q!?PLbAhH(9S=lL-W+17$alHUcsr#pBMzM8f=H9)U* zSBD|yEhfymh>>G+Lgq-Mduxl*RZ>_>a7zc|B} zI33zy^F#zV4wLIXY+i%z%`8;)B$R!`EpQJ198jKbp1>7ng(*V^VjQV9U!Xd|aGnDT zgms5hqTSnJc)ZPaPVHkyx^n;so<=xlo{>-xUi5nZgq@MjOuCN&)PvuYTrMCXwIz~M@Ad2~6`bnl@= zS8kU_EBDf`zI6~HHW6iKVo<-vW14jKGK3k@ge#y(o4yJ@NY=takmSb%`r7($apLkX z9y4Sy>XL7@H4^#ea5MwX{O^?xGPTIwpcL;X z>{FVh7oo<_Nh8&C?|pi`!IC?$`N}@}jg?I&w;sdB)I-q$^9Nb|N;2Ik1M;G{_(rUi z9VR}$?G5O2Ov)LC55OkeQc=z^_JrRe)~{R&e{8r&!vmjtJvuO;bzMXUX4vDB1*YEj zSo$si!d=YM>>GX&l-w;ltI6{@9gpe=kv|ybIYaz7_tC0HlBQ#PYk@L|+#B=$(*PN( zlr*7!h<&?!7oC_FOwZxaMSy1?xHB=yRPeFcSU8Spp6HgrIU{}SkQ0GAV(4t4GSZ{8 zG8!`!0taK;K&g(_EY6Q*Dm7gQ`72s4D5H_j3v+jbo*_uvS&0K9rP4;#$|~7nzJd6f z+1|`zF}LMKSfLd2!SHllaTLQjQ!UndtEI&gZG_NCNwqqn0TK2fVW};5vnKOW@~v2l zBT42;O=fuT+pzrItG~*lfqolJ8p=y*?cvVokwu`CyLd_v@W$Fk3SJk zhxBatwS-g-d=79`B6xc%AuFa(Ojh6(2e>_#BPc!Zg|vzUeVqd`(x+60DS(#yRIk|R zG5zO0q{#XtmmDccA#bEW#hUh0jU;^vcJ>*BQ5bGF)8uQ;B^|WfH#%%D0k0k*q`Q4E z=kGy!4_+0oO?yrr$P)APxGEz>!dy7(5C9>PB02vp2KD_4C$pk4>!-^*(&aRp{UVQ8$;%~o z53S5wxh(NxaxXHLYbjcm&H`0nJH#o4s;uKMjhbNxm_@@@yj-fw3Js8$Dgbh?S!ko} zHq*NSP9@{>Nd|EJS-y*TXFF|70=?8DUA?SWwni>xc7&|M)}kY_3F$dD{=yQywvFno zz@WVvOiE~(vj^ou$vqET>n%;cRE-JOCY$EPIPaE4HwLkmdFJsHaL&rO_HpT$#~W*- zXUhd%#udK%V7>FJ-)6MZR7$b`sJi6)SJkBxk>uMAKh(@8TGY?vbFIr0(G&0N%q1t6 zx_vi2Z7~QsOamQCNoAM&t4{gToDk4*;mmWDxhg)ZrbYc2(AOr0v377`;+BnV$+kgO zm6Tg9q-i+_oh;O6$#jVhoimA#F0a)jP9Ax_B!2RKkJ|d1b*mdBiA8`}Rp3PfMZ?oe zqrCN(`2#hF(ZvIfM^^w<`(K)@S+ikUKMVp4o;2h#an*x$2{*iBp2BBRwwDsPqA%VD+&kD-@A40mS-7)X zw^3|OFo#9o$ag+CU>h%@RIhZXco!A@XO~rwq{T$4*Sj_&M6(QtiYMo!>G@r|dx<;uDH;XN<#tR8 zG=Z6Yag&k-s1C924&7!Vc4xuilZd1d$}g%Y=_`iY^bPO@eHPAT0U+#!&CFn>%&|JksFN9iyJ|oXS6qUw63U@^KX2Vm&6qR-BawgNasSQBc<2dPV_mr1^}qy$y$7-@w#2G6c)F7rtv62m^D6J6@JroK?`qrnhvvWRR}C(G zd#)#OHQ}7(X8*)B!n+1C=+H7(SxV|@)9;bLkNLZ*PKH5d;T_xpl>c>`{j;|Uj}ejU zcu3N7l`sw($&f-KdJ8m6MShZ5An@*;a7cPKhKAE2HYG*Mv0Qy zr|a;2de^TX4^JA`q9p?5YED%!U$@DM0$HCF_7x0?$>lE?1*%TG`&=0-Uv?sgU9VnL z%_skTeE5FSro2JMZ~+@q)w;fn7^USYM$8&o$S{b;A20bZ`pn>+=7c@L()S0|`T>So z6bi+$^x|~*s+pk?jS=)DWDRq&NQsRIPc&XX%oA_wrZ?=~HAN{m+sYnM8CEQlK}CAB z;lE(>F>1CO&A2sO&t~kA$Og9rp(|3M-Aieh-{@dw+%l*22TAc!P_^nu1IY*wN2CE* zy)=B@c&_hIFoT^eFIswznRM}Om;$2_2oaZiwNsq#IR8s;ZdzK@8iZ@~K#!kKku{Hy zfG?9Uv4!x2l+ufK*7V4)i}=UpHbkKxc}}cTa#J_o9>~=%l!U&_mCx9FlI`01W_I`I zZ+(Scf?vKVX0((^@A-aPZ+-^yo|Ac=X{S18IJC4^>oeH?ETR_o_$#;B5{K>s?k}*o z2DFQf4^0NkHEf4b?bN^Z_#u`@RYLH8)Zqse{P<(q_Qzle-b@-exIeNnhPeIQ_0i~O z00iVa@d!MxeS@?_Ue^(_w)gsCIlsudcxct%+qyW^+1mPHpw*5fgj|b+B$t;54*8qj z!8lprl^Wd2jNx1hK7593Y-^jBVl?iWv9-FrqE{H$1nv}<7=>fc ziAfC(2)-r6%we;DsoUBs%%s`e3KlDGCDQy*kW-I%b}TC?LvKn~c{vWjUBr?=Cz7?~ zPQ?HIxh8|{&{U#KJ|(W)ddEblJGD}fymQ2x+mIimz-O5y8q}}2f4+KlE6a6 zD8OPnvh(!4v=+(h2I>GGyVMwnyq`db=({!OS!1|Frclsndi@dlL5KTeV*6#BL_9^` zVyO2eSLN?h-bEWGPYm1?rQoC>-d|18z|QV}miWVc|M5zR5wo4+#tPg+JfX&dkv9w~ zVZA8MlDl_QZg4Bw#Z)2JD6^72q&RAOD)LoS=Z42@#ImQ`sr<;M&D6K7;Bd|$ur3s= z{II;j#Xu)CZ?LWin)Q%36C{d~-$|wPaU+s?oV5ojk>Xh!{w=l_^6~f+k0ByiS{Dm{2A+MH%*r zX)eJR9WxX>AT=+T);{>LRF{)wdD+Z}UIO^BPEBohN3_4Q(S5(@wHF>peC*Z`UFi}J z?W1Qbs|YY^y}1ZxAM$`(c?8XULr8|jZ6HwaEYsWK=Uw0ZH~^W^RwTi}4x6cyyD@3h zSh~7o!M^wbfpVP@bhxKgT8Z*|c+k8~)SxKWPOVx-&mM9C#I!=sbnU~^FUafS^%-MTh)JER?$~v&s)}$LVa-3j_lWI*2#3*Yphp-mD#z z4NuioIp}@ikT(IR?de}3|1PFtzJD{e4Bsn!_}mT;euhVKBLgcV2{QvY9S0BOy3slW z?RW5yc(R_S^|5V6#*@c4u{F3vn$)oilY$OlA@+Q29b{?T~?xz%F%c46esnMWJqZ7rA+D50N9AV{WX zmoE!Hlow(V#ew_8`s549jJDApF`-B6Iw&beN719ot;+OJsa3v8W$i2UBkC&7@?$_{ zU<=zMJ>J>*EoI;$M^N$~<+h(S?f)ycWqg3=wmx`nqX^OYce(99zUxn_=Ktrew-7ks zGvMDpo%Dy-{(1V%vr0;GzXJUF0K}h?@DKCw*!Y*nA#Mu($_W2e5Cy(3a69}L{P0bj zo6O%ek_q~smB~LD=9|Jd8M13(PmDi>{~cd;6YwVCbPebYubh9aldkEfnz@OrO)HpY#Z>ExK z`83*}3;89n+?2i<0I#LX;nel7%KVEOe!DUM8jF$s-(dX=tx9sp@Fw;LIY9`Zfv3zA IxC8+3e@VK02LJ#7 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsm b/pandas/tests/io/data/excel/test_multisheet.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..eaee98a801cf09b128d3a732b818f49ed96e878b GIT binary patch literal 11313 zcmeHNgL_>2)}Gk58ry2)G`5Y#b~CZns6oTVW@FoKY&C4`G;H`yd++&rPS3f2!M*#L znZ4)v%~~6?-gi+^1_BZj01bcz005+b@9!)~L%;w4cxV6s0{{!IBVupoWNPQ6uj=k# z3Vg%lW@|%|0|`!(1po*A{(swl@e>${AC~W7K^41^d=S}Ul-3OoLbX~F^QRy}SDLt(RwDGusdWG+p{=pXnSLoc6YpWnq&W5r5t* zvTRuOz_-}Is@Ld&Pv5odYtJO%G;}_Tq2+cY#H#vZDemxTzesLW6xy0+Nq8}XSU}T~ zK31nXCSJ=L85z8HbK(FF#E8335oDGE$44J>;I2CV4CQ7o4HYA`ojSXwJCz{PpKwe_ zC#`@JLAs?LVzy--sJ4R9KoyT4XAXOgM%VwLF6NYJq zf5C5^8VmU)XhjwTPXrKt^&L%ZfXqxkKL3Z?|6;KI>DEhQ-F- z3d^_(Nw$!x`uItGL8*((r65@8pe96B#Ses#@NM&X_`JNr7kMy9dcDa}`W_vVm%QG! zEcoNyTW1(LD#t|ex1}4sXfCr?v)9QI((W`aZP9ckjYZiqLu=&X)90d9sN+l;_;48c zgu&SS$^P2?a$2iKx8-27!YapQ!IcfKvJT?LQ+?+X3-*x&L%C&-rjoG+fkx(w zfqWh4xBuN+Yy7ws$Y%Oa1KR>8z2C;#y}@(#Sirq(c1AI=r~-75G{i*zrnP*($9O#v0G&i!Utx^rOv$_^$XpjP|kSdQNdIOM+L(I+GV zyiH0rX9ARw+$EOY@wv#()xy|ECrz`33BLR{`$f0x)|a^;`{_! z%W!70j6z!)Cz9=uac}UbbOIL3;Y7qeW=^{X)Uqr^YcA18dOopa+i&hfRwM?y4L$kD z-J{;%QQwem^>B{`jtc~B;{20Iv2%_&K7u$G2?hW_0oe-3r2cG7d1|)yODw2Q0+!FD z-)?5!f`jD*hrgcu&bWN;!LR<#N15=$gguzp8cMSiosr28r0b+K_g zfr=RJA%{8Cta-H&rCXvh)=Z~zS!Fq9Zt@r&`@7Ee-FJ;yt)e|Odt77F_Pz? z=tr?dS%r}DDu8M}|A$6`aLiuui`XK>cFiFj=6F##m z^v05-XyMFj2&!d;ZZuo%DiFKA5}Z503Yh%1ryBRlfOh`qozSeBG7wB}^W7?7V?)Rr z2!&uVD~VemZMC482pwN_IBu>sVH6F7O97TIW~R_XD~FUr5V=ip~GXg(6x>i&{?4H;vr??C;*vf`|bxdCa8eL;j zj}@3pm^4nMF%gh?35E~8H0Cm34UKjB>~j<%#C0w*e$}G485J>Sk^=b}-65k~9AoqTFrh?~-A&F&*{+sZ)Q*OGzN8>VdSwbbUS)Hf6D zs=|$}JJ#MLmtmf%qJ@N|nDz=2hD3`dOy?RnVjguM%O`P1T}2xOzHIOorp4OV<#wsW z`v7+S0OqKyRZyqwfY`rY_Qp;M~3keDNYuqwx-NKKUsfJ z?~&GcGyx}OJNda_#7@S}{0M~>rw(OY9>R)kiJkUL?yI+KCm#Ki3yUj7pMsXi(b02A zm?Xb&kz^^;zoqpZ6-L7>eg+@Ukm_IMs*NvGRdi*>8{gd>Kjrrwzt(MaLRZ;^EljJ- zJ62~_)rFxZe@Qh|d%Q}BEG;*`0ZwTwsui$&0uH>Hy1qYsBdSBz&FYfn+ufkJ92t~k zn=tGJIU1^Oz@FCG0w-fkvCf*FI6%t--NMQ^PnVEINFLiLA7I4dY0E}ReuS7Wl6RdC#MQ@}VcrN7YpW3_k4pTR3oquq(j$5R)?uShF7ahx0zcI2T(l96uQ|(? zEoFA&<5l=H_m)*F)|Urr5EY9R6>m>&zpZuBE1Iq8^q@eKx=n(PgKroGEN6tre;lIB zokdC)?s6iwf{Vpdgjnx|6VLQ)+h}}%%jH#f8@_>#)V6DVE%dUqS_YZbFl-^-nzgxS z3JgH;flLTpg2Bu!K>R}=Y0ef29EF6_Ee$36q>Er0sV2_5 zSsaB9-}B3=i_QJq>3aqF(`To<^{)L#Vs;(z0it->tElmg$D{EIL6;}DhtZ1={e^Sg zf#W+tpCL-9wB7@7LVTlfY_sgRa1>fhBY^YVKNq=PGSX|&Poq&s7CGJiEp)uP4 z%)a0uYXmB*M;5G&j_i$?37~=rwQi$%!D+P&lcE>8m9N9tVI_L1i6k+mLbM9(i*baD zmX&iS7I)&##s**V5{$ww&k3}!M17S=JZ|>!xOrB3uNhlJrO*YBSlDZoG=q&n*CSNp z;UhZ?qg&_b2<84oQLcutX|djeSW<2l^y0l*5u?!IZgEs$+gr2K#A(x+mxnI|*kUS4 zQ0qPr8(B4;v!x(lF3Tb-$vTIelA$e1o-j|vL>&UbVls?Ar#;~U?GAyA@4j0Jb%1{} zM7e+Qrk9EhYiq&`h{IYy?s>!#p~zR`NF|kLEVv%>A!o2@EC{K6U<8XSlN6a|5%)v~ zl3>H93H1x)_6#i+9NF54iVOp@v03>5R60qiDs<4w#92lMc3X{(``~aLh_AgN6NkT@v#c%Xb!;8agIBQ2cGIcCv=~bTQ946Wn0? zb5PLBB&i%b(n-yi!kAA{K`Dz(l@?l>9Qof(ixQP|o4||8a=>{hV6N3{=WnW~`3O@{ z`;74lj|@5@$`Qw!#69BJi!;Em8kYvyvqGI@99Ovbxmy@!^+>0n;TTtB(Y5i#Eg~+C zW-0{7*yqZ~@}T+m!?&$yEI3l^$>x+r$1EV^obt26fL~k!n^9irl5<$MU~skdjJrWM z^=yw}Ptxl8M~bE}-p(r}YJC<%Ii~?OqNmFu8Id_0D55vK;eJ$mIU#Bpx~X2~Ptsbq zrArQ(jkehzkXHH(uL)r|#_QU3oOLgL?94;dMkf;qCz;6#hN( zlhb;VL*r(#*(xDMV}dT6R*bJXyb4pxU0v4a>{6mht)Oq=Iq%(}HOQKCaxO6Xva zIdlyKcHf;D&>s&xsmGZ#N;cUBp*=2K`r=q4PrpDvZ^bo)eKsbyXK z^W4e?6)GrgS!)bXvn&kmLSj?)_0mWc>W6NT@-?T-oN$ylsP0XEb;q%w}AaC?{ zf5n{|BC!tv0Kg%EqPu_jE1;9RjVbVlr^?f?jZWo4eezL$hS=nN@82nmscbt)rgor~ z;6i^@0zqXxyMV^!Ih20V;R-!Ce6rN0XPsv-?v~-snZdA1ti8ckQ@$&`W9J~E8!{w4 zt7`|m88+U}FQ2xiv!;tTY4S8_yZ8Bi5-nB{9l+1WT0rFY!TUtOvMu%AN1wkc@%ok3 z0EGkoY@KRP_43_gxpD1@QW2Jqq~vBpyo{@8Qp4=HUv2Gu{hE7%m8lRiYJGmMhc|i*3SyJOl-N=qUn9#tNR<67e-aQAJ9y#!*>Wa_0BiW)iTY3R0*l zh3?7o?HncL8~OZeWszGjqR0&2xC3S_GK8?N<=Z*W%kBl>H2#OoI0@*VfEtHgQF5p~8vy6nHa+SddOi@D^yG2d!ZZUeiwjdGjut1vsLMzbC^HiOm!WZm-4*lb!xyEP`G#LX?EmtQ?W!84@}mmazhKNQ9#C4I`I)U z1l#6X+_t&awO~S00*u@_gwc1^TJiad^p0zjv~pVS>Fo`$v%4~12x^VqFs98|&YM4# zUIJoTK7N;w_5F}`NE{E;s(~m$;6tuFs!ugM=!A^rLBez!r42FM$w~?nkv`g}0qe%qNR%UNc!r7^}) z1Rg1-{mo-G9*W(GP&&t1J~b~&X@R1-MSg}gqt{||@C4L>xmHi@hKwm(aELNav4}5# zZHw|!n5M~1n{Z)dHBe}X>@W(s>Be(`7WE7yD39m*D-ur=g~Llmdapf~HlYLc=#}OH2?aNW%dDxWBSJ zpoOWa6Oj4mj{OHc#H+utPi4XM&a8d*d#JT!)R&UNM=Jo7%!Noz&)AM5CmCR|)THM< z-n6_VRTQ4Sq^$CeM%mTX2cCBbB)qF4*Hu(!j4@ssBvI>D*0xRDyXjfEBE%Ekr0at! z8ARvD@Nb7E7QFTBCl-?r8%w;>Iu;A?y8QUqA(k(r%14otvMm_Wf}OM)MG*jlWuY zs@NSNMocH1zw)#iKDv4*#Kt^L@h1*R7N4o-`)l`tT{&9?rHZf*uBxnLra8O{(SzcMA=&LOkK5gHi^nsr2oJtkeS zVc!^^u$uWd=ZfACp<}5h!#^;DYM`tMG}W`WeJf8JZ%?3IQ89Pnoj`3~nl#{4F4015 z)+fnoaQWuMvk8sr?8ijSe5tUSt#7FFSQ4dG{DMy)tu>o@3gB(Ty+a{3njc1WFohd% zfPf|+bq!68pJ`)X|N4zE_J9IB{NARK@XW%?Kk|K1D?p9>?g$K8+}h$F1@}At!&Gb+VA1*$C8K)a+Qv{Y|OJ*J&_M1 zQ?FJH)eYBP@}gpSn2#V=PSRW()U!_{@rR^GrmD@l?OQAVT!p%N^1ws+^D@fFOSy`D z6wcKT#7XuRlo0_Y&Gi24tF?^FK~*Mo2WsrI-bq1G(_@aeW~39WHW;hsc*fP~Mg1NR z@bFI{y!_p}s%@{IkbqX~{xgz(riAh8a`vgOQQI^Qd|#(&b8ajXSFyw(MW%@4f0A3i zz{jqEuXJ)XrhaZ`H$d!<`l`#Y;C_GImaNtPbPSBKCZMDEx~h679(k_qY^4q zg^}%NX~o!?)bKA(4AwT-K?xo`EEpdh&Nxh2Bi$ZUCzLrU0(M0GeUhuB>utom;NA%J zepPWMy2t?!#~M!dD3)?A3pKwvj76ya+;vlP9Mia6t{=6BfQ`W9{s}RQogjN4!+wMS zBMDs4a2+sR4wo`^^^X5NRWshLv(FPtb3&WBT+A$U%CzPreN(5X} zq}Sr;^!3@KqZducr6~`tZoy506lX2F*PSiRW`aTWWU;|~&%@{mUBwdwaB@Q`<;{J? ztjsK+B4uKA4l zu1i^!D~eQI5%HWEePY9L%4qV~2+EgKMKR&Lx*G5=F|tg>DtV8SZ^AJr{3f6b{Pl%j zyz)OxCCFRpOMFan#znYiy=TF`G9D8!3utZ=-6cXtdtU%9h?LXUGp`V3>Zx&(z<+m? zN(+B;6vmHvI+J*mZKo?An12B!I`4U$eFu$pjauR6u(knKRTd&95!u})G`G@#8O)i( zq$3U~cez9Nj!BPFuKG@!VP(v)u%q1FskUPgg0T+H!<~-?_j0n2|J_3RB6$LWef^i> zpl~Lhgrx9E608>mIST&5wnyHmJPueHkUG3)S-d$l>*ZuHw0;QgZ{blr$)NZj_gwkT zczJeR4M_Rm>>iO*=y(uLrhL|2ACNmjy_Zi-)?K~EPCJF&(7)UfImtU=f60IBWUk-N z&y?$JKX&CgtejrSaPG(;kZc+>^3w!+z#{ z^o05tgp#^ls8(xixB4Q6p_%OFQ$Qp{vQ55PUZaD%3>VI#KtL?GhbXYypB0pWh3T+FT@#xTWOq)z#2| ztl?*Y9xs(OkqSGm>S$G=nC+|&DGMIWst!CSI0T9eQQGqo-S9646_>w{u)LRU=*4m( z>ruD1<3Wt7;UdZ3Askqz=sVAG$DOSELMpxIoV;sIlt!&6mnJ#2hCU z)IxQ+Tc=QCLplO{&Jby}g!)}h1~V*!{gWm6CvS7?<%J}!u|DXX?G<-3bAIFw0ge2s z0%0~i1-W@g*}ys4am}{0ln&z4ih%-BtYUWMZz=MlICP^GlLIIl!SAWH6h9$46m8Ei z#9L9-GfoGni)y!KbBa1L(dDeGGuSS|O0zvGt1V@Gg>(_qw5>O9B7wBb4>lSeZ0)RY|CuHs?HT1%9PwWo$8ytrfl+kGtla?gufrFXJbndA#=C zKqLJsRfi#lw%y)PY;0!IK76P2+ClpPb$22on-+T7NU+5BOoVBQM{k!~Pwc;5(-)vv92{T**tqoe9mHS}AH+1YEg7;QXcII=uoGiaBYa}$NB^j=XZNn+^e ze3^RutptcH?zwA$Bc5BrT^>!IDMcT(A`~txPQ3m7eClKrK9XBPy`|o>a*XeIj(SjP z8P%U}CJ8Xxsb6~>BIHfj*UNA|BvWC&Z;DA(qA$&TA?cMfD?xgF?zX?3l>zkC z4_Td~4wPwHa8^9oV0B+ga zU3l7P;f7c~_JaXW+y+FK~7AIuC%!2st6yll~afP61R1wj?G)L~$Wu*~Vtc$r?fpK=V zNoZ-z(R_52(!i?6QskDW+mo!+!Op~OXo~nKM*<*y!Ay5#-?5IaF2}$~k4(tc?cTmCAiHxSKDJO-t#_Hw2a%puvYq z_&O3DNMI@BagK3mf_v2@o(Z(YmoGLhP8QfI)f;RN(>Z&6i;2_a(2T_vbE}vXdwK3% zD%TUzmN0>KUG0<-66y5ZwzG5iDArFwxyBubDO`r@`C!Ry8owrUZN&;_IGE46Ldvl`Hxgc`m=vfpT zz4PLgK2MP|AIOXBB{tl6FRQO;kq}p%%jQ{Bsyth!DPE3NmO$s2>LXv#DTgXk#p5VQ6Xh!=O##-~5oQkD`A`39%Sbmcqi?wXz;?byJgQkx(JcB&oER) zdSdX#Lpfx~C7qB+O8$NdZ@BuX`*E=78^wZet ztOg}ug56fXcOSy^4C;CQ-Ay+xKXfR9{t^RF7YOao9Aj+nX!<{r0U7DPH?PlY`?+M;qm+U_o$qHM4$zG>Xq3_Jx331V=7OO&zWKJoM6N1ueF?F%gxbe@FB z>r{LV_ZeRnBHXtb+sX2YZ-P3`EP1)!#hzzheo+~5V?iLri{Ygl21kuv&v%uFdT)>Y z%!A!esR@91QDv5F|7B_F=ntm&ORx&p&EK)UER+#ACkD1!$Gy?RT(zYqf8Yf+uKE}R zT&AHA9E}gzxjcWcQ$$DSxslq7GnLkeKo>VVU75w%VNG2VZc#Q=C2C=I#czB;E#PL$bbH6 z!s>Uyzs{NbS#TTF0R7KXC%^0YeYoJ4CN5A6{M)F(@4~+oo7r=ih zKnL}2L1+I}4*y;B_d3}xQ7X_B#lJ87Z&kD3wftTq_@%`G)Iz57yV22a{_&pN)UHW%N_e;Kk;^#H| q#qIqr{Z~%@B@3!#QvK=Ne=xM73>3&tf3!W20MZ~+pQQQm?f(Gb0HLb@ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsx b/pandas/tests/io/data/excel/test_multisheet.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c22771232961fc9c9a91f4aab55990461dab22cc GIT binary patch literal 11296 zcmeHNgG8SX{5VDK#&-^WvHRMLsD8=knZjl>F!XvB_+Snd(ZV8&$)lW zclR^1_ssK~wRX&U-<2xzFt9iPc)$|?06+ogcYpWR843U(hX(+#0Z*V`iQC&bnc6uS zXn-9|LHew2wl?HBu+a2b0BFeX|F``=egdBp^yRzRae|NG9>oVsYvyS~2b%O1TBrQZ z5PH5kvB<~2jUPT-^+@$1S9{)vd!XenLI6!6H3 z`RJ~YJo}T}z&d!xIw+Gb;}_Eob#HtTHHkm#5c+$pk(eUZc6{b%=T_4;h-=;`&LD_TF2;(jpJ}4N)q2+QqP_mm`6orZ`Fr1_Q#2ZklC2 z6Ai*`Lmj>r3Y5NAq^Ys1s4mXsH9T!j*>Y%6+Gg%1Z+b&&BMus5kn>r><@~E0 zbtchFb_AaU(Yz>h##=1GOPmLhoyNq+_7|*j$W*t{d(!r=y?0o8z?9CqMHlpD6N#e- zZ2;J9xD97#*~2p(S%VNR@C0cjw*jlQ$__Hwn|XP3lc%&KPBGsn1v1_D-(n4Se+S$S zSbbEN7F!dC3mLQWxI>VZ>yCJdqPW7S!m=W`b-RNM06adz095{h)>=(=s&j}5DnP)9 z3_+`bqp1yujrGU=e<=Nb49!2?dP$t3ayL6l@UhHY$iUU~@>>isc~?=HW(o~&U)d$} z+NfM=;^lTa5)2KZ02pbXR`0t{i_3yhdjk}g8{8!kSU3Vyb*`ns?{3~WBQVlBCP}?1 zS?|GgnZB65Op%rY)4R0BFcvoyX3Gz*Qb|poN>pNuvc4oj#Lg!P#uG~M)9q8#SuwsY zgPImoKP(NdsDGZd7eAWjGn-Vfiz*Vzr*JTlf;#{*HlHu^9I&Cd_)es$Zoy|+VVvp6 zPYE_KvFSLINNdNw_heB@A5f#@#=7PlmF}m>y71Ah;XWP80QWpc>@FTS8T5}JD_ns% zuD^ri`~pD$8?vGU3jh!SoA`suJ(@B#`gBsKb+bhn1O(4X3j-^G9tgL$RGq zUOLl?#DoSk+meKldd{A)mo4#o%uHXwsdkNz#ZbKp@ual?pDAL)4{Z@6kJimqCAe+8 z7xOu&_{`V!XXc!t6yb6PMi}e`q3DTGn!nvpj0026~Rv;$zXJg9Kw6$Mg$9NF7e5CkxHN}C*Ac2Ytl|$i- zy*G3D#RN;dg#V410G!4}`!UIDIe8vu7XP`nX2G`Yqsqnxy?0$;LphL6Z)iGrz(|u8 z^qK|ebjIZDF)$8!sJKu~C=Tvo*HrD6s^ZLw z)b|lC_Q&(kkQqw{4L@+}alGYsBHihTH^a=>C*%p5q{pQ4%=IIC{f2^ei@!ZxY^;!-{y~Zmxn%v zG3L@H@LIjl?fB{oJ1<@5*qOdadU|s_`N{G_kG#GQPNcvf_4yz#7#fc;l2evg{UFtb z1Bi2N-_SOBu_E45xc>d@A!g<1%fjh2@YthD;qVYNG%qJI)XAE0L0azEXjNo$c}5Ov zTs-rpAEe$>IJpl-SbX014$efr3nQ{FQhMhw5w5u=SG~506juXrWX-YWDy07Ln=OLKv({6-)|yZn0N`+YQV9Soh&#WGXz`TOol%{;E+Yb0Gr!SKVbJZ0BV9F)WG8i;qYO_KRYzjp)H@F9NRM zI&XcdIt?H|_&fXJykaNufg}@ZL_ZRWpHOkKFts&h`?=@%!Mz7MqcOxhIBis?B9U8} zTeF|3b$DLU#OEO|+ZNmDPUSv-!+GT1H$FGNT=+3?feH&Nhn!VriI+S}jp+@8&#)LK zPSGRuXr^r63U5t9sfLOx8{z2o_UN&Y&*-IIixZam_LC3k6?unRY#Mq9bX3o12Wt*j zNYLaIXV;-=fD$_Xi$~C)tBK3oV||HNlwBMySw3C$DvMEp$+n3@p0LBA28LYe9nFaH zKe0d6ib@f%*|iw zZ7)U6S8|#W))Xl*w17JHi4}82sXE3)H$Xk>8&plwEB7rP3AtpXAtXFtNO$Ua9T28Vmr6Ps5G zSCS@xa#oDm^`<$el~Nq)9{3i)cA&#@%tRx3Qf#?ICDaly(nZJ0xdWFkX={C5Fl7Nj zd7J+TUQD{KQaphm`*74eE2GDZGqOVTj9)VBg?hT-`he>J25{)W4%_(JIVMuAZ(f48 zK5SC5XD^O|j~%OMr$*d3bf`-TL(KNt>^N!Cbn4muQ(?~73UZ9vw`9gv4X2!`$T*7% zXsQa%A;*-M^D;+l6S2|zAgI_(<4@@i1R%S85KGW^E75l7Z${|1PxX6fIdQ*^nSt;* z3aC5|*dtX0s~u@&^ME32A#ZaA8b<OwERS>{``YVwRDhjXyd@HH}DF(6#17Br8ZJj

    * z57B|C^Nkf2I@;X%-%SgXRP`F63rlmL1*j1&HEn0FswM?V(lB~~gdYwJ+at?RMjEBu zP`(2Dq|9o#Y*td4>3znWtYr-|K>9|hdxA2?@F#Jo(rkvc&D%oO21T=0vL*eb96GKMigZ8o} zHn!;rP^twU&!h^j4$qQkpi-iT*`Dd*PeHKQurH=LP7&IQtHt6nf)C0Y>{j^f|o!@D>j^yDJA zHi8)4Ts%CW>v=17hwK8l=6z{El;859+S9>u3rEZ0tEXM5WJ!#Q?FQfWUguAp3tXu6 zU+qt~8I+zk)q1FCOW?e`KbRZ@*ro4KKKkY?%nIl949V9bQ{lZ!xsg%R>Y0{Ku(Caq z#tHG=Swgi6&}yAVl`C~OSNZ0?Y*vH|zpTdggt%3d?L4itQ)re?&9Gb@y)|o<8E%@L z87wL}VP7YQTCTP47Nt~u?0mGjNRSMsrEO&EXbqpe{AYi~mlh(u3j+WkqC&#EfB7qr z6WGQS^utr-y|j%<x+sRBwVs~CUPck|08tr@Ku6HFRC zOj>WfzaPa&mdE%D333#W`o8r#61HqjyY)5@s!Y0kZq-liKr~&e(OtE8b6*CmIZ`de z^_G#@s85i0l}N6i9`&uMxvg6TCtA5`oT1|Dks&b8S{8o~Db*lde!_l%@!4XN#6J&N znFxM@n1-dCf4W#|6;N278vOF0v^3@Y_nIbhsDp9}xJu=&@zc#5RkbUn{7W_QYbesF zOrQ8YHXTajurFnsIgg9r0%&@_eK!0=tdAgWSYl67`nWoqB~_Kq=cN6%sJ4x~(O)qT z#k~u>SVAl)#-#-cUZRWXg;r})5CH-=p`puk=;XC|C+|)hU@ow%fPsXU_x(q<(lMe0 z28h=IU7NX0h@Kley#f+dfdFMB6F7+%}QB?t-Zydsy zI%}*1{lt4lb;(N6msaq3o4!w$0Me<3 zDMl7Vt2n4jGurEbjpIkfaT{g`G1|&X4ilHrS>PS0-sKn~Ptt+keHK0ZtXX8AM|KdS zW|rg0r)id{WD5R5Q`@fzl7zAA=WSsYG80q&F7-|AT)Tt8C@Cy5W?w#xBusuzejm96 zAFDdbO!0NZg2!(yPpZa$4OoRr^O=(n1+-Pg9F7IVV+TnZO{nA4@ym3O5vLA6OQ_ft zRjlJ@_pmB|T{G|P{n=fu5T^R+*SWES%mwA7VRes^SYR}>uxtHUhs z>Zc_PAc@>UU!&^bOG!o~V!D7_tA{otmQ-Fu6nUpOl&7H9c_mpK)0D;y#4yTgI7}2S z1m)Ze;7ou;9Wy!l{i(sS^uySP&fXrh8z8z6UQ32oKj`ZH?X+!O8=E*AP4QHkB=cpJ zTkYkBPNi=7P-I}<_0>n``=~x=i|TAHM6(l3+q`ldejn>FLtu9-!5D- zE3AQtFwl?uJJM*lFD9tNTRG9XoH<`D_nUf}dfY{na-IgPO)--ByOn{5VBdXM*_cw* zQ_n8)-9%46QBFBwdo)T~{l^MI{Woh%90(`KAp!sdztTOBg{i3%i0$Wr>jym~XzAOh zvEz8XuX*&ntFdG;kd-CEEC3YGgvd@#*^Z(m8{)E8XXM>qH9wx7DXYQ#{x{SCT0t0HNs z%?9hJ6K!O887g4!DL>R&%zyfd)r57{Z|jv&Q3W5)!&2xulQ%#g-+NzROj3?~a{pD~ z&4lCrAflHn_6Y1y9-@;L)%ny}sk1ed`q~E^lENC|bQAd+N4!%M9l!Fkvyiyfoha1A zBOYK3qr-dFSX2~$9BS1*R1_Zptk@jwoH0KxoJZ~=BA9&VuLm)&9t6B3L>epm{3S}R z<<(QfSpbT1oGmOS^}Nrwv1c=RkA^v}BM#fKn+9oY{4_JtByUKtV>=N2)Fw5Fu(Ud0 z)@SL9K61;jdrv&xUV;m@6>XJO%fsHfYH(0e%_9wd(n_eCbg)$CNa8)=TO?Ky_Qm7T z>fK>3D3^gkMQVa(K0%iG!kTJ~iM}e_SjW}+tt@@CEsl4DSO=rYfIO?-<(m`#20VtdFDo6} zx$;W3fzhk`;%J?sr-H)iE!k`n054;{Eo#Z({4m_cr!*FZ9Lm z`jwH8b~cRF<^m?Mql$Ma1uX(|^_+<`p9UvB*97mV(`Q68-o!ITNmbsoT?z5@d!Tu6 z9g&r-_dejd^!O;Za4yp5Mj+XbB$FEEsvdOOm}hZ#pzTMcU91>s8Ld7Oz`%7k|BO~K zPJd}w$2FEL6p|5@raA4lYpwQk74G7}oe=%c%jln41(fV@F=S8|M&-x-+C3`GtRH}IXN5#c6BD`v(3MW?}-D4YOx!q}usqs+zZ%O!hr&P+-*+_aK>WlV#QFkUi%YhEZ9ZGR8l65W( zHNV=AL$3PNc~yNF+pt+?5WRzphs+B8h?2!coZX*k|Cty&8Ct|>4KP`Tm^yM1Bosm0 zM0oA&{lMOo*lMmAJI$6lsXY$+DbG6j0e3s*NlOR~GJz@T3n?t7x@?N!vqrR%)H_$V z;6@SZljiNqj^-vak-$32xM0D@A*{sCqA_Ab#X8wMx)B^3 zziED*>aR38oWXH^8r`DrIY!rCTu&nfmm(0-jYid93IYW$OE}cZ3pHF(2%TBHGL>=%N8^wa1cE^>xs^>vrHu`1P)WK!g0oY$KVY848)#3_uEe+&RgzHx=(f{K)&U; zWyiY!jtHCiH?>M^lVV{;6hMoh=Ja;YDo2}oygW)2x;aQ=K)N~z6T&&3N;=54(^Cq_ zKZBE)^*GGFfycbWD0g#MU5BbH4Uv?N>S`68S+2(k=E-4wB?YT^zQq{C`kF?u>PDA& zdBo^Ldl}fNrhOiUr54d0EJ#mqKHe)7G?y_?m56L#w^S4u&dQ&d96nBt`?MfO*-y;& zzzc)l0XGx&mB2}sKu+~q86^UPFEaQI5{3sQoY4J_tKf+M|F)|kg&?BcJz6RwKl0Ip z_nPY+T6?J1;<3q^tLMmZhnO4I(haGTk`vxi{(T2q-DduK#h$i9SN;PbBd|V% zrA_!Z*F5)5o1cx03@_PrrJN~A&{djrSMlQ;SKu7-HSc92F~BB~(d)#pTIIYp5H||d zhWA`i8_~S=@3wqYX1In_8~=`~)V(acVu)YbhrDtA9kW^(IhvZNJ2_g~ng0x3O-4kK zy4W$`##+}%yAEs@7%AflkVOT3pOV+)qH@PCq=c`mg!*RxQ4q&3rY`O=6H*9$-E=I@LQY9U^8Bi25G+g2uU8z?A%L5oY`mUIZ{m`&}g zVg5kdK=5s~@-1lAD7_z|E52h)MS%g{moT>4eMNJ_<>jRJIWB2E` zYiw=bmFBBdPLKqrds9r<5EB_}F!QXy1Bu_T@B)={jpwXXjJ}n$KX1M$At&I2(q${u zxI!USq|%q|E!J1FFs*-0#7Ge?k-bI?bAvEdMKKq3WO6RY_!dUaJPXbT(OFO0+oF8j zg*c6@Fa3+QCjlr1IB4{FvEOFE?tRWxG>&S7ifXd-;KAu4-RK)>2wB|n)qp;Gd==|- zZ}dnle5Vtsd}eXv<>%{NE3f>H$`bAk-Hw%GLi=O%ooe&2!E6(`zu8vZ>ir;zK;rJ} zOy_+{b++5an0YeYnsQO5lH8{iM#I-xhpNREBT4q|T5YUxCDiQo!jN)~T!hJN^@l1vp1*Y3!- zZRHwpX8E{HfAf$0kduacnFfh$l_4;~fONo2?13td_6{I6BL{~c(1G-d|JV0|@Me0v zsQn^4O3*RPB^k;xapSNGieE{N;`8$gW2iV6bIk%^c9ltJN$kOFOtfnMiu*#;R}Z%b z1=+o=vFnn6K-OR}Jyyq9(TYkZBZJTn%U2QQl;WJ(+`{nmJ(wy>JEQNXXnXOn_&Oq~ zpz(5OtzN3-lxA}@81hx9^-d9NJUcWkVKiSCUZ{tM9xN7YPjVoCLX&`hNI)OlqbdDJ ztShy6wtjXr$629OZ@Ztt)8kV_mZ69Vl$_74WL4?mzjditOH5zD0oir7(MU^YF!9+= z&k&(oKLnOVN~?SmR6#eqxv-6tFVvJSMq}p0$C~SxwjGKq-isORKS>Nbo|$+Ma#V&;wRp)XW0v!1PPv2 zUofL!E;<&?trnG$idJJT^4p3j>iGoK?e?5Cd2HG-s84c;ps=iS*Xji2XM6W;46 zhuobSttPh>=}z+1((}Go6&QVoUg|Zu(0M)VR%!Qrn_DR$<=y*9o!cRk;qm&J5%Ka# z>iJzG%q-8b(fDJPzO!g; zI`n*1Ce=YCh7(mORbm+Rq=7P%;iW`}I%T21^SHmSD<(^4vW2KhFcl8V6DwF#UXyYE zEGd;>Ym6UXNm?~`U_11YoOH8dpg)}4EPCuPMvAw7LZB^tEr|pi%%Map?tn#A_48HM z=k2BI!^dT6kPJkNdJ`eT+4czu${^vyk1fsK;JQP4Ajf!76HeF!ueEOHHiY#N()9eh zo6hm^8B~G%C5Di;DCVCz259eS`ahC^80o(^&p3VSfFEO12)F2Q9*Q}(Isv+uqHKLg z0et)}_;Em{NqX65`|SobrPVeEoZwYKu%k>1zefHQ+yYLm90ux>Z!f}h@vPou&DtEU zl8Wg+fErSwmuex8%44b8kkSgRK^hIq$jW2$1p14n#`eNruOHbYj{0bO$z+ zp&kBMq5oyYX(FR4pEnqIYZzWRS7$pW6%c&(;oA1>oKtX`O+$nUiasjePz^YQ1`CkbVs5R8GsT`D{$kA&cmlIgMEKm{lX5qR1c|4~fKj|0ltOf@Xn) zz5hJH_{UTKy z!7oj;kQn&4S%cq&fA6>c5>`d|$L{Oz3Vttu|55-2>EA-m{;M4RyXfzAvR|UOkTHsX zU-;juX1{Csy+-g$ivgsE{0~}wRt$dE@_SD7OG_>3zi#9A4C!|bzb78QG{imq$Lsi+ zhWxJOud&xJ9RR>I1px4SB=)=X?~d-5d;#^(Yxs-X`(664ocv1`0N|tj)4Bg(Xcc)l Uh@Jjud!Pd3Ag10y|Kr>L0c8%FX8-^I literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_newlines.ods b/pandas/tests/io/data/excel/test_newlines.ods new file mode 100644 index 0000000000000000000000000000000000000000..262529800351ca679217ff41f97373c8f2a3d7b4 GIT binary patch literal 2261 zcmZ`)2Ut_d77kr$;u_F}MUj#qB@`(e1;oYhgb)Z#kf56Y2}DRBBy%Ax*$cmQWPwqEFfZ7P_S(9J@x^g&i&3eckZ2k&dfP;{vQb!64nN+w>Up& z5`;fkvuyw%fPg1pDPaLv0f5ZYu zERg~s2Lxg4OnA>wgZG6fP! z@cW+A!t|rWMkN{8GBnOGBFlg!9_RIRSDBc(u5BvXJck4vGkf6R>(U8I4~a3Gf0#BA zT6^c&Xux=CTOFz*L9`9KZ^~nQhR29X;Ne4TGJZ?ZTAA$bk8XPCBJ|q)5K5Ze2J+Ij zojm`hRAE>8)Wa}SU7+5u`E}diUSwt>XD9O29j<$moWBN7 zmPG(IN5vtHB1_>%g~hjT(Z^CSKMF+Yz2eqRdv&>@K@tV_S2XD8ry>H!S2kAs~YW8#@w5^-~XF4^@&mSMspvrMia=~)mawx9a zM7|t~s*2+7W=_zTLo${^ z!4tj7KiYBOHH2NGu4MfDs;u+dQ`Lz=NrQ-h@%UJDz>WeJEkja&sORc3(^qWO=bWCc zGvUa*Z?zS;^!@>ctkd2$oQHu8O!$Q3mGagm-X}<{T>f%bc16UbML z7?e&H1(ZRhGEKtqQ~{ln@IZ9PjnU!^R4M3o8>Hle_L$Q=@7T3OV;uM@Q&c`-zcWnV zwvWXO+igEnV?6H~iKtM}IOdi-u~X$j!GEp{2RwHNoiA|TUlR?npj@!5+(VG|ZQUVq zTJV!15L9IJdD`vGaZ=G~_y_yp%Ze9i%x_ylA0+d zR<_hGYP3?yFSR{sBEx9i6-NDKoO}I=FZM&Pa!1C3Ou$bEDB<*3ZM~~dZ$-tP3!Vel z$8uN_AC}n5PO(i zE}M$bq6(|@ul5ozfxCs26eYJG$wzwn`%4%awZthjNb+7p(uui)C1QUcjd+Bq7!p(g zN8NdTxse;Gv7_4vHb?DEFX082yz%(?m5+MK_Fw^?N&-D(3f3){j#UJMEJ-}bcc z#tE)%`sd_3FKu-4omWgFITIHhvlA(t$lTArTwyk$m9-FRY$KHf^`dxdtCkMV8Q2qU zMa|0RgNOye&849#k*JWuCySeW9+Kc6)6<_wS!0+V(8Qc8oSdaAQ#@1M&}uOX(6h*owM{F)9$k<##BD@)QmYk7^_g%9n?;Nvx65=9z`G!a$!+-*QptU`?I;zH>UwuDrtA zqysh+C*usE=E{#3hKz&Bz>gZjdAW=zx--No&zW<5+)V-}IC+nIt0Pgxvp8+_g=iNN zf@_tq?$}1q_j2?u>B}UQ9hAx=sY$sNY`=RKIPE6*MX{I;><#t15CZvgMdH9B$bvVb z1GUiZQjH#24AV&l}I!Qr8lJv z^)M*9nK5lhrCf?KKu5!ZOa=;z2NuBeYoTAivX<Win|<^q!+~teRm=J8>Y?nLQg3nf!1}^TUj1^TkH(TepeG_7 zB+{SkrGN`%Er5OP4eHIDXf9{zMgT%}6|DQT%JOr^M)vLCYhvn0gNiT1> zHh>R+TxOh-t~ctoP%5H5+$`8xkAve4jvV$|88-aWEh;1w*WM&y^HN&z)Kr8SeZE<< z_9_oApZg$PoNYhfq_y~ntC;+i2ekTV1T(U=k2IdEj&>_3GQ0kDy)HG4v(8$3J$uVZ-$o%oLc%~os*h2X^Rwg$W;N`mvet)yAWYUa#$Y3BLwhS@Fq8>mV`r>y1Tix< zhRXc~vu4DT8XFS{>8220V=9^1TN!}$L6#6^=#MIs4cIhDPDUIXiv;WX5^PBc5rymH z8WIvR62|T8ng?lQG7{3Q5V_Y%qS$z(gt)lWDI)b6C?*1w z5`Qf%Edo?}Eu#Qblm{wnD2R(GN{hAMZZf)ii>&|pOBmolbDno_AVnP zB|9Z8BR)MRJ_DANk&~1KOOFlBObE|TipfY#&dEqF$WG45$GaS%grp#%P1-; zN`GITRajnFTvAw43@fR~FRv}GYc6@8US5z>T9{W=R#;aKE2}84sH&-`ZK$a%t*@=B zudgqvX)UjBt7&SlYJpd`bTzfZ>)U&py84^Dhg+(P+Uv^On(98bHukhsBiiZ*I-3T& zn%mo2+TmTDJ)gSa9iO^85C}xar{SKy;oiZKPs3Bgh_<2Lo{8R$k%8Xnk=~J!k-@Qv zk;&P~v7zaSk?HB_v6-dmxy6~q<(Z}Rg{h&%x#^{a+0UOpFRpH`Z0@YDu54|rZf z5)#@=NfALMr>UK(Ek%4)Vst%CmvYqU(kgi-WI^m9xri7%AqpJ4&kFJhLmF&m1k7(c zBE`@d6CY%MLP2N97OWVezf0Ibg~9-mkP?&hp4JD}wQrxksS82{b>2VHoVai>;i;!d zy5FD7wdU5ZKOT9)J=R#_DB;<+ER7iNFRsGqKc#9lqm8@Jm;0=yv&T)_pKBZoP)FwN z9?SXm5_mq^dk@U8cR0sH^Og&BGr4AiI%}xOXl|ofS)9ZgV^H0uB3FttbFAAR61q7> zpc|({DutgOozvY$LH=E|ZSZ41-V&wn+vo@jA9 zj$7|o>*^;3F--T@r%%eda-Z0QE9}b8Y99!UbEG-B?S!wLx4xz9ai_arC6 zfJa8i7yRrGS@%ZnYN+9o%O3Qz_}^;_UiY30uAStoS}Wy_;fwlI;IEHo?N4; zc5S4>Fy&TluyX|H+RNuhR5~Ca0j6auWV0OyP4JfrX*kImYvK%!m(D(+Q;!Mzt3Z#M zPJ*0XU95hT$=OZCfgk35U|ZalnsB)vm-oJHiHtJWAO0wr!MJTPlLq4N$9mtb%+6Fn zshV~LmbEz*{I&ZlURIvkXwJwAx8I@`N58LGro%xkyM$WXK?eY#JIsjde>mTHUvQfS zqpOzc<7|`J_kya!oeR5)&2s<~ir@NnQ4PErb!BWgjN$xK_F9xJi%7~Ct08aC^A9x~ zTC3BamJ(4A`R{coH=bop#m5|v^*>Q+9Ae;GQ>9D;fbzhX24~cUd9trx7vkB;$jMyA zo1Ril8WlOrP3}0icLGM6pa$mQmVTJslf~onR3Eb*X?zZ=)_DH1Tv3>u-FdwUR=gV$ zaG~BPju(plzGTQuYgG6!az}j&MvIND@o|D#>{gO)R#G+n5a@dzRdpf~2YS zGl*$*L~NNXRhT}1m zgYF!}Je`#9i&O*z$X$10Ls*#r(($Ocn9`Cb)( zFspN@55spHn~Z8mB1=tx@<&`81{1{EYm30Qf~T<0*&p_$iO7S`TvcA=R!2s8Zk)F* zr1)#`zC$F;Ne8>jo`1zK*0G)HydQ0)8!taH&9!?xU5Lg3)2y|AaShcsD+!M zxv(~U^?Y+F$p>Omx-xk7=84iI*{#5_&~t0WOdE~nB_+t`kZ{g7Y>DoRDeCM7QmmS< zgA9i@zBy&tBBo}HLOMIDKW#@L5Y-nvJVb6ivV;yt*9{41L>5kExL#arxqR1lCyqGeUEemo54GFc`UyZ)w{iNUpcIXq z?x1Ou$3hc(fypI?`FyN>JNI;KuP!vh<{#jY;LTfGaa;c|-d8l*u#~EcziOD*xFlq2uC>wLSh&!2Aa$-?;g-E0MuyNyL9v3tB4K| z&GwxRt{bK!lrvUmo2TOg+PK{CUeF!u-PX||5b4;X<;j|)t_q;NJ6IS$tU#ZEsfTUA z3)RU1-Ax>Zh*b!>zh6gC_F)Ow-&>fcCBW-Ip;k`(el;c$9p82JfV{cTpHED%nNro4 z2RUn#-6ewS3abMb1wG?~1=h>=ZYJrxWKdm?+b6=RgB(=rif!@Ks~ak}`@a_Qm>HK0 zmqpl7fg4)^!i{1%1I4%}k1}5k(^j(aqPbBqe#*+_*BM1I!n8HJq;@K%s3~t-edUJOm#X>tGj;ddkB^%zK5?WLZgW??dk!iAdmyL(^G3-5K_T zbI!$Vs=kxHgFa20wZM&HaWAkyCmg@e%k92kJHwbWj4R{7Kqlk;P$ozzf<~F_mV%z! z`-)+lM=vva}yJl~i*R&S9=S)>c%9xdRtEdQ@{{uwKo`ZCKh|D=Y)nVQRg z0{H@lqRXE}g(Ib{qlxEVieAy}x@n+)NiJAqC3`e~h${@Dh$Sgtl2t}0?#XtHdG!+I zvAi6AWv*t~r_IFLFwYBB{UdzPNS|4iszisO}rL_O|wn%={LmO)wdz%~8f4Q?K zztIC(>6;oun1w)4D}5WtzbOB%Q3zBYYX9F1=kFebzM-M9rSbK1vHrKg`CW@Q55h(t zY;5`ed-_jd!b%?uGBJigne2>AdZT;6J}fxCdwhPzeG>xAsHk?dJ+zQhErg^6(su-Rm zlI@%O#y!0|G{VIjVfU2jFUIK41U-$SzeMswMq&wK-4&`}TGj^6b z^%{qLGWqnrFog9_%mv=fUC~)+EZNoanRksfbJ;a?u^;Z(pQnt`OJ>`Z-K+;VU#)#- z>c4upR9xX8Vkyv&8avx8$v}!Lh`wOd;U4f-nq6CLu)YfhS|6d zvZHE1>t#-y#$#0s`hiEiCzJx2Q***0FR29vExmL=QBgtEqF&jI=kt!VQfw=gWLBCL z7 zI&I*L=J@QPG)O0*Zapgmz5^Nf^-5*d^pf^MI}mdWA%p4wRpQcJDw_K0g^B6?tfbBD zQcB&jB!0-9u*P@fy-KFxe%bKd+SE(|O7I31;7^CSF zfz$Vki%`uifrNWCFwQbj*5KOD?rb4zx`^1xZEGtpE?~bf2f)pZ)XP!+P-YuV)1Bs z%uXsfu?yMf>KI6~(~f71@rtqpEemcEK!HBs8uT*7ddu0PGzpxe@q7&PKN|6@hQA2?bfRW4G3u%2bS}-oSl6 z+`@(|qf|>4m8UFzW|*>#U`P3?c#I2hqDI8LaKJ+Sj$!WHbqvop0}* z=}8*wMP(%pQb&z`G6Rv{UTi5g3BQwY&Obk8m(4&!5J~Ickh?kz0@$TEy!xwA4|^99MK0-})wV-HGjyxlb3cfq!4m^baMHg%zZf46<} zI?+zY4370WjmQ{jts<(#*4=DY;2GSt1I?=;XNnh0R-pg3{+OdFdC>r)>oVN(xb4{& zIsx%r9~&L&MdpagD{szHU+n4!1n%d8aV)aQr~~eCnxRHy>qnz0&4sm z5wFah>}Gdyg{V9(aa;5E3HVgFr!)JzwSXqNcvUXUM^-^ybh1Q!M%w_&D1*mCFLb!C zT-}#-xK$BpOwI)IuUE-ClYFy>dRLxPo?`e8yMlR%`oyv!VRniAiZKzcO#8{ERL}iZ zIbDhZ*q&=l%GqL9~>m;o$!@- zF~aXx36$dM4i0P`c^af4-;2_Xd}|lhK$_bXm$x#``8Zj7x+cJ+LCnA=6=d^LId}1k z(mVeuOYhHhG)+WvZ`Ma%%{5Q2#Y*^x<&<#1O429wxUi?<){<3o>5&z;r)FA?(_B_3 zh4${XIeJS`Fmm%_bOtU4+onPz(Q1OwG2$(id*aP9l6OZhKH*a>3#s^vw9^3eXg)~U zKgS=_g~6(Q;S_fP8G3yqK4pHHw$(A)sUxdp?HB!g*kxaczG?g zcK!ulT2GOj>-Y6Me$_&H7Hh&~wGhw(BO;c4sl;3i1D}g3VFe-0zO39!c~}st#l1)S zI&JE7kS0bbs;xpv2)_qR)YnhDe7c9741$VrjAHGtk?O8W z=d7UVpx`%c=|}4rkXD%W%)rP&I-hHTCemvqoXbs7=^9GCr2afzw@pV7M^;=kZ9ck* z=mN42Zw50AByuXNYJ2Ez5!**JH@tWhW00m05~#m6ueO6-SbXjc0Z1H%p)Kkt~$ z(-0khX*7EUKVK+0Zl!d7l00zUuh;8?cP7xN1(6IJDtrq&fl-kZ6ykg*xl^fYD}9Ud zM6V&7BG@@Nq28c~AUG+e;*1Ka>L$fNl))=}x=bWzo0ZH?5|*lUvYGFZTJ6m}nx=wt ziI29Ay~cLQZ*+XFEXVSZ87qk|xLFu9=S{Zy)J>>UsTbF-*i)UC{IcS0_Q&cQhilpXw{ zeWFsV``F~hoB#kF^WbE;$RKM3PAEWiRuS)j8E>N$(EW_=Rg7V=-@?)byY~@9qJZ+! z8Hf6KTJy?_)#;2|+&{YBe&M4YlK5j{tf8=OKkp#+l{Pnop{!=Vd{+8SO*H(rS$*gC zBY9BL<7_Jz@RZOc2(ZKf&I}f>+>?&To_G$B$}(8;=5nR9aBuF93K~W>pJ&Ot^F~c> z#y8+;r!Rx*xn9vC%2Wdh-4q2kPfKxK)@G5hw(q82)BI4O#K(bF36x&EbqP70ybr5|nerfBD8gJ-p!1{2CtaOQ z54^76`O`!k9~~WSZEcrccqX+)!~jM;mVsNV8aJ+i^UkRMQZC7*&q0opoJo?!%1SnA zzq!RbIT$R`eE_AnV_AE|olz(7u^y)xt3@hG5FJ;Ln{FA5K7tdGz;Xt&WA+qPk_O*48ReJ>N+5pXkZB9gWJ zYsc9wx6;Js7ohTlobcu?$ZnOH5VeMH*I)*lNC>(hMH%oS`t)*VLg;2WkP^9^%ELx>r~`>?mif?+iL?N#p- zzNi_ozm0^D)je7!1Rk)Sf&Szj;9G7AZ7XgC3on%Ro?%6qVOzC7$s|HTW2<~Hus?$a zKq^X?IL_e4B2-aLVq@v8C1j%hg+jmPurcQ{#LKW*=RE-aA)hW(X-H}eZ3d})Y9!75 zimm4yV?cTR8B08n!O>=cF9VJn=8b{QJqf118Yq?KU<47M6BsO)zY%Z5V3w%e}>?6ZjqG^ymBZGIgy97MVm(zPaQ+p`_mi&;Zdg01xOs5jAI^Ik$w9V2McgUPvLgt+Hrv&WxHv!n>3QNwc+%qnHL|j~^Yi z&a0zNxLB21OdK!fe>bY%F&}2AK*Q_akc)Lp02{a4iEJpBNoH=6OPc50_nfM(MW;F? zViwQQxGrJvF_XHRPZ?E6y?|sJ*an z$E8;Wv6q8dCOEBcJ-%g@Z(fIDmVnhd>F|O|=EG?Ma$0zwVqiLy_ZAMmbOC8t?UkUh zHr@*|=ayZs2h;iYJ`U(+M|plI0PIIi-8P{Y49;i_blMx7VCrN|t{{|smcVvT#i^3^X7;2eG>qHd_^pAvt)-v296 z{X_G|UW1=k{z+8*Ja5PTPx9%nwAEkl|EK1kI}CoFU;YN^Cyn(#MPa}9pZ)nsYyD4A zK)*rxNpt-($_)ebQ__Be@+4*H0P$4bFeiVE+vE*G>`K|IY#Xl@|MFoEu{7 zr@a0R&R=P=e@6OikNkgw^eb)lC(aKe{*@E@s}VO{781WBH!C$hq8_wvb zT;lwNLHfJmuc7oNnfoaz*RJztYWH`&U%l^5p#3R4*JF9TTj!6k`**Ehy~mAb{we&| c1oChFw45~Rb)-i^!oB_&Upt+OKkVoK0Yt*vY5)KL literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xls b/pandas/tests/io/data/excel/test_spaces.xls new file mode 100644 index 0000000000000000000000000000000000000000..316db172360d0d7b849e640136da4a091fbab64e GIT binary patch literal 5632 zcmeHLZ)jUp6hALb+Sj#f(ypDlx%#Xw>((lE6P(Cwwr<)9V`ZxdGSD=AZL_`pA<1A0 zVm3EHa59q(bJh)l!*<8z`H(dPu~FGO@XAHk*S{T%jAt z0<+#+=|ex97y#4fRbXP(UVZ~DX=SQqSm1z+;+GOj_Q(kGgggb0mriT3WAfFNBAFjA zO6k8;pB4S{7FfzwY|pnp<2U1Y1-S71GuHF`uL9SAYr(6*b>JWvSAf)m8^CM8jo>Em zTJTNao58n$*MZlAZv}4vQ~zyV&T3FKjuf<_-*UA!`)g%_ z_v^#oTNq!h0vC>|`o7>Y!I@D4w+NH<+-aDcl(M zGFl|ZqA@#(T8EWNMB`3;WYEHmVs%)K9qmUA*TdFaH_IA6>ZIahj%`_~_^_SK;`T8m zg1PhdiJ47c6g=W>L-O&8azj~YK`5)mlKWu_9{ze=eh9-|f9uM3`}%r1pHMsvBu5+R z6xioDTBiJ4RH9AO6WX^C2NylUUE!Tgm=SCh?&^sS+E%#dIqh~*(=dkIrgXV;Q(n0h zB{0 z2Yn54K|gdO+N(#?H_+@y`v#7ycOw*4>4(uEj>6#lroj>n-p>@{Ml6R6yoM?uMEQ_O zTj0LaR374+Xa}RRBZj!<(vL`di?J7p|h`I{q5J@*$pgEfE@zH=u9Q#5SX%DTQMBX z%SOxj{ipLU*u#rWlqY?Xjs`u3ug`=Wkv{znJ>r68qz$kZ?ZthRJA$Mrs>xf;X=h85vsH@pcmzEH*(23t_q<|ch zqgekiUcLPG-;4hO45&|tr0^OZ1yY<^J^hTMrLO;W^!>>jG9bI5A*OS%^*<2h&1d`H XSpP=oPI>y5BP-Yb`QP9Fwf;W=%ZE3X literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xlsb b/pandas/tests/io/data/excel/test_spaces.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e38b6c2d8f17061f1bf5486d18aaea6013639578 GIT binary patch literal 8036 zcmeHMg{B_*Vm?h+(KX^<}IuBGcPEue(JqU1`6fP|EEsz?Yd-6b8;NXLRm z{Z_sAs_5@8_}?)2Kt(?P-}XQ3fjF%$)n;CTT83Tx*b-LH zE)=@qN-S0)vOF0=o6#8o4eH6g8m2Q}!8K|1s(eUz{1|@)|4fgqrY`fi#QVYVh%3hd zOlI*z*~Zoz4(G{Y<>*JKok<5Xaf)*6 zT8@Rw*K*W3c&UH^?Kq(31YJ!Gp(==ZK$N@%q0KQGc`g0UtZa?fdz^97K-sn3C`S`@ zTTgD$u>~>KFY5O4);Hi@`mt&nl)DQ}uc|^ajYA{ru2w(vp=wkyAI0!&tkb{Z!9&qh zmF}|9A3jU~nplbPVj1)n4xd``$(InVD$Xpr%rp|0eBfSxD9ua7r zNxN@4$AkhA0`@XokqtN}q^>EK%uR!W;6zdWKHS+^^eS2p8=r~>T3g**<)6qrlZ|+Y zYMRQ5i{0Adom!*g=I*q-A|AD>s~@M`MJGBUB8y^J;|}>Sn53kD?OuC*-k~vWCJgV-?>|3?4^uR z2y)QgFq9WG&AN_Dor@c>`Z^^_euj*Ka_9EIvS;Q+NlQ2}h2kRvRMTY<4UmJt)ROY3`CBd z>jemY&*UMZx1OL_z1wWA-OX8XR#b<{<-F-?PjA6Z+=9wtg9Ux3(~Xvn>muB)vi1qL z^s+-meSw+R!(N_>d*!Wvm=jDoqPb%l69czQqye#0Q(9q|3i~?TY46%AU|JGs31UIu zbmo)lcdiNHx4>wk=(alO_Bz_so-7Kmo#QS#EATJ6%bYavrB0q2E^oTl43sO zG|XDQE0(Ma*Cv}=iRNofAzH*st%h%{=ZQ(89N-ySZx5<+P zp}crAgx7iWp3Jun zzVw1Tao`UP{-QVn2iVNVs9ipuY7m2o4b!A*zGGVU&7ugN=uT+ z>Tcn66uE0+uZ>_U`pJ~)DCZMF#m%$?(y{4qJ$La74}|>uqCCYUo=TCB$cFneGs2}b zsFM8q99()++@k4=3snG?4ERmbE^;w@rna(dA8&IwYGf#D3#TX(AT-3G9T2Ctr8?*k z3^m>g8oOsAfvP*_fVWZxBNy6lE2k<=Z;Elfm3bXFtN%KoP0+syA=W73T|VUjG1jLL z^K!APbTgN;`-F&SGF40Hh=~5I1FKQmuq~W4hpTiIl!={Kdke|BWP2pWb2(tF$K{pL z7RvfK7_=3?Iab$>d(Bd1pf(a@LFvdY*n&qJDp@c+0QHHwE4r6mI<1YP(~waQ7qAS4 z3!b^o3%mLB(O~yz4+pvQTrh~|r--ViYDF=KA^rEeXB)`~>I1O=fL72~Y5y&O-EF~6 zV4iPBzOT}LL%-(<&_trUOqSVJ@nnwW+~>{b|FL4AzY% zzC#HyLae-nyPDkionXEYb4KS?m9GdaxW2^s+CV-1l5ARgSw25IxbyhY zBc59iWG{}F428N@LByGm=t4T7!H3U`n0ZBu)D5sARGYk@7|cZjgbpN z{I3V(+|#q`Z22%=hzQ1W2pGH@wk7GNUSi-a{m?(mO-PFtSR$E!ZiXd(9ntuh)FfEb z?@chi@8-<=4<(Xp^<}{>IyJ%Rc59>QP}rh7mN5<`;@rAQ#Hx2SgLvj@utr4!8;?4{ z6(9F}wq?%P6ALamH40i32^2OMv9joauVrwEd5!Y5<>GIh z>b{T=w~Nk1i@>vM0VY9H=~~)|@2Mt~FU(>T)uZ^Fd_g=<6tD9+-hZxJGE;l1EBIOm z+(DgbW!`zVJ3V`G)b4sPY$!vflwvxDZz zl8eL1Bhy=zULv>nJ`xjIew=PwnPDJpMx{R8#c4=^GfKf>5`?Rsc{H8vs57PvpPCS* zPE9OS1!= zFQryv;B$#pPe5-b%6oQ+OWN`rM$r7M8O#kqpFrY`!#5VFD`Bjj`6=m1$(Z6&Hz2W8 zCC9_?Du z3iQM$`NAOtvNO`m#yghQ+XRTK4|I*W}}Qw9bMxeEKmnvycz1e(WuPxFH%qq(@AlfB%Qi_yMX#!+{C@aZ_85+`P;0Wk>* zzhuAJn?Ww9-WHFHmhp%MNAA?rOl1gVI$!yh)Feh)#|PCDX*&%=hr*uCZFb&MkBj0w zI&^GI6-B1(T@OE=V@)Qb7%<`UX~CvbJf=sCyl$Rf`0G}<@R%OmIJp(IZqgb?vrv=R zp{gUM%5$%hr+++eDC!{Y?u%o%Z?xa?qrozxdwz@u`OyKU0u_XK>>~bC;rk5l!RXPi zx*O3+of~5lc7>n5Af$+7sC<-_@v0<_Qzqv=T`d!_tHsN()S7E)#q*YeN@}+Xv^`!u z)6ogE;xob7|4TR<##S@Hsk#k8ByUQ+D0jwY9q)?aWv);dre;ijE(vDz&jHH0#q@KW zuyRgZCFRO~u<(kSRD$aW_jK`aF6@3AJ{kcEop40(p1>tKJAl&FV`2V-OyCJl_iO{YZrt*(LL%W;SxQkQjPe?XCk$MjVJ&PU>LnqW>OP6 z)xOaM1Byt{D_$vns+xSof}Zb)HC(jb;K3GV@+wv4TWyCJg7eD7i5Q)8$Hn(+)lYZ* zqH+h_-|=N~rq)Y^s=0;i8*)AHD1}*ZxE={7r~0zEx6smImzC;kW2O+IkYSAsC-b8U znfuUl%)1oug6&3Y9}n`yMKRPwsOxBSTctI5u=lO4zMjHtHLSMg%`=A;5~Oiz1>TIb z*B+|1e@S(yGmQ^q>!MJo$V0kbLwhK-rI11pY}S>a%dv-95E)LF`|O&S%Jd}L>$(mP zLw_j~ruTDiJIP%#?Qb(F0&?nQVFm3EGg=NXF%0#5-|Pe^bW)`bM^UvDIgVz0RK zdS`dyLnbwV()%i=`CLUM-93g*o_i#)HbBkPwRov_M7^FOL7}#VzAyo&_oP0|6-6j& zGmoD}UDOhJ20!51NMtW^)x-THnLUjA4xuT8w?1O7SA=)JicDS4ibS{hpymO(F1dTE z@x=OW>XQ?1HzhaA&iNZdWcy$hBhx8ehZCNyJE#gPK3RhHqmy-V`NrE(ijC=`>{Ta` zCU*U+LReXn;*M2!dHlAOgM*#xXaz!xG_xkQ6NBVr2VAO@deU{)F?*aH$lXn?obU>7 ztI$5cER`WoW-%P(H^PQ zG3QRwx6Ry5Qx14rq4&njV;t6KBr4+J0tt~|#l1s>)8^Q2;%D)U^U4tIY1Svvj_D2t z>TtnZeHDS*U8C!wtZeO9N87E;SOSM!z1R9GE%KT4{fM-$k)kQFtw}qS0Wsp3ndfVs zSzOh#@2*g_EfIbyR=xSWHyRU9E$4b_;O&~L!(cVU9<8YN^DKD2fFLGyZpY+>R$bAH zG`?*H(TP_fJ~Mw0PZFV9W2Dg%It zH7A0pSossR8oj60^A<|4j*Nq}M8_gWPcw#Q{naC8xj=3gwjIW}INSKBS6FoboHd9;A&#%DYSK66~YFpqA&QNQbZ9WE_+e;WDw-g}c@kRyg6bu9V!k^dCC zA5pWnvP$Y^eEuWXgiDS#7~qt5$LEn(-EbZLvVAD17N=m|3{; zX_Qcr455M-_dcy4sD?jqi*U1+R za{qgJ^>?l#wZA#UN5R<&?ENj*2VrIaR18o8Dql=MXyBa~AzJpP_JDvIi83uv8W64< zn3NbB*mw<^dxH;&^@qRPy7jz78A&S>WOxXXT>vW=OLd5gs~eA{3k3Xkm;1lTKhkxN zV1`P~yyT(V7#GqV{?BPrKJ!CVYWRiueFzig{a7J0IIKdW^~yL~KQ zJvADCgfRh?TV}fq$$_cvbxg(8ddJ(TKN9+%_SU{Hm9WGZgdROM>9)2~ZRNF9@+ zB9F)~KzUCQifbS9VU88Lqc$2;-17n})apZF$Yz}Hkfc}dvLjyZ;S4l?tlFucIXHAb zdS`NIp=(FWwEck&o_g$Ga!Hu>l*YFO(!rrNqGt!}_8U(r599z1n~$sV*tc^QD9%$w z#^%I`0)2Z!BevSCQm3X9Ike#BRA0)W#CuZe&14HDlU_}c%j>9 zhqQRplp@pWc!7nP@}dW&7AP=J8{2y^=)09cK!7oNac^ipP0 z%Eq;E^L;AwaxX!y50B-LFz{%g-h)8iBylP75t?LPM!n*8s0G9banO+CQHe%dzY!H4 zSXHCcA?g4%&3}>7Lue`8^0bUw9bWA%thb}s3w|u}+HAHwD~mfmCD+~|GVEq_f8;K< zVjA!hVRvbLvHnV;=9C|Kih5oK!f`WMjSYlZ+R zCVk#(ZLY&BCmFF+=};devBy_&|a`VDo9%zm~? zI|w_v<=OFwL1-q+g}Zj%ZKB0j+SOnMtl?z%G-H`o7}1A;Xn)@QfLH%D^G1f4-MKTK z&CZSK8j~5$PBo^B^}?LR<5vh5WH`Q8XcSZ~WHtWtlHos5`{(?J1w?hFUjcqyx%kKM z>$wI=ji1&rE*t*Z%Kh1J9`joT`F|R_mvJt)_I@B;$N9Gs{nHx0Y<#(4^}{$2_nYyL z_SI$6%N>~?rjx*LrkDFOmjNyd#2)~W$PUl1CG@*5eh80mE$+*Jm-W^Uz-XlWLIVD# zzAhtN);&KE6s}z!(vQJhM!Bp8exUe}UPigB4K8E-TIzow0RXE=tltX%W%FM@(?6R# eAf?Q|=6`>*)s--isRsbyA%E1!?B%2Sdi8&y2U)}b literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xlsm b/pandas/tests/io/data/excel/test_spaces.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..a41ebe5bb0e655ed2c39c52a43baadeed592ae94 GIT binary patch literal 4848 zcmaJ_1yt1Ewx+u~q`Om6MWj2W5d?;Y!660d6b6xop-TZt5u`z8KoF2_q>+%6PT!3G z`z}1+z3$&y6KnnUIqU5G?frfGXse;3ks)DWVIg60hiD_+GF^ay(~0cM)q4c@madRq{hI(tG6zilXGul41U9z7tMG z0g+~6tpRzx$GJ3`P0j+|XMUPkoa3g|3sKLq>h){DX>FeO8X`eiGNfx^71!>P^;n8= z*UbqU8@={p?L!>)GDzBLm{_)@As-ju$CgGxg8%<-fgyyy;RD|T2y(Il0-gAMT%2R| zW@WkCig_R_C=b%pE9P%cjJ}P1znz~DRkZ@;}+BG50W+}4EKC1 zo%^_9wu={HmAT)&ifOgUxN1uz6C93PUGVlg43Lb}WN<=CEZp;qBC7;{h356@u2c=3 zRbzzHMIraZPtK@{rj(}?z22L=yR!f0tj{Sw%Pn0b@JRLI-M(-(!z&HdDIuPQ4-aDv z&JWox2qblt%v8UV-(kwcTz;@hh!vZp0_dKSqDRl%+Vydgf|zoTh@YhX`tnT9lGL0s8zGG2VP3Iq{A-TDhs+!aP)00MBZ+0D zn4M;B701S3MeJO{s96D-Non4q%(gOfS_KZm8H%CybgwX;)@R{80Cy$IHfre6n7vo| zzM++nEN4C_u(56`dKfz^*U&WRIFJP!Up&qXHIue|z~=qyRqi~l^${)tIm2%~UtA$0 zA$g+y4LSJ#AjjR~xii2Wftzh3Ev*^BJ1x6Ky?4FV2B^)yXZ6}vMw~DT(~7 zz8JAYf1aPcLZeQqiHq9-NltbuFGc0BDkm8)W2`T_WJZ8J(1y%nrk7L{)rfo z;S_5B1UpX(s;4$}Su~|ect&Qvx%=jeRy;>6!bB5b=!vy0Nbn#cJ*%xeH2r@c^FG+; zi$gVPCBIenIUeyue~|liPCpuSxk;vv@az6i#plz<)!Q%D*BHv%LPh)s6(s8!kE%8I zkrZ}HC86`*vN8x0nk+QGu(>87=N6v+sI1FS@CpX&q^;B4duk?90GO!j~0Qw#9DcQgB(~J)+L`)W>p=4OG;u!ez%F zH0$@OLcXXyJ~&3t>2b@{d@t%iX~#vG^r@P}fiq?QYGo&2=Uy|^0)@bKk3OA*FfObn zzV8n7`SPyG9!FkKnSpruzSg}m?Eq%6sMAH*d~UUfeKimEA}LuNuEUfUDhUYpMZN1c zBNY`f;1~sfGDz+djRPH;YUH{7Jn0gk>`r;YBdcxQvWKc4`z5|)?J|%B2GA<)MvbxC zB`BMKF`)thM=yus&tP*^`-OwgXhzDUxlCb(b|9V474iXuuVx_)dSg~mEQ;gzKNwL! zpBd4l2R@Rd@UJynSiGnfjJuxI&NHQt3I&GDB)DlP)?`fCh~<2hN-1W~X&2HQ86hqT zcx!TpkMDxYJ|N`2^3Ri?2(d3j$ElaN4^v2rx@71Y62`icEAb)XT~=evxai#IYq|Vy zjwE@Gv$TiXw6EtU#SIyiys$)y@O?ubS;c)1Hcly`A&ftcc{CSPtu_Yyg(kOmP59u& zdCj+>H|A2_W#E(C&K=rscFUqu(m{tT>U32b$-x=}K^pw1O%1|T86uV=z3r}*cw5JK zll=h88XnY)EaavZJm&y_<$;K04XI_%0&aP&wwJk|LX7G#R>IN~J+GGROcUjaMvM@X z=7RU?70T?ggwyyAE#zls@+7c*kr8RxmqmtI`|HppXjyW8fb#bAJh|D$Y(AT7I@^*vyu^c|oW!(eGCl0Vlbal=IMSk> zV}n2IdAu!@84aO{nCXt+o=dRiGdp5`71KUmu*SKR&sog;<^1%1VZrnMGvLLwzAYDs zeurpkpsX~|{A|r1a_F^2lK+vPz;neSr^P}#on4MqbgRO9iV#uIV>hEU^XC@q+2X1gNZvJ*P6kn` z2$<9yNEq{5b-b1~FYHm(ESczO4Bgfd=>4fOl8M|;u2*h~`oWb)yUD%dSC#byK7mDn zUyfimK6XEdK3-%mZK3tReKUtA#eOu2Tz z)+Ot~&yVQ}<&|^aUZ5Z%qqTSDI}ATJ+g~Lf*}sU|{wWAxqvru~aJBo**Rk>aj_{a# z-u1)DyVxuQ3r8>0WB7$^BZCXrFarB^zZ@Tr;Dl!>hVdr^BwQ;W|4b$|D{(IabnFW< zYGZr80;KOGh8FaOfv~dPQYl{8jJ8&rMW8&1^6Eh=X94hFL?v zdIm&j_5>*3Pr(n~3)eCKUH!r-0?7twtqlUYxeHhWL4e=-SGQLaDoE6PKpwyQ^1%MN zyaqRNDN#O&u4!;RwOs}c$>;aq-dqU<1TjEWXhw$V>$i8ncHUn1X@I>QQ$hM=OV~K~ z7$n;rl7+H4?1ggbC%SbYdtfs1NVM&TdCz=>@|oWxhuIlpzlH>Zz(j^ZEu?NK)1fIXkNZe|G#+mj7eUTYFQxn;wFc&08IN@yZrIBx ztFsZ|r(2^fRhY*$%X^<3V5`c>9Gc5`K<(*gVMC1c5kQv9HGh`L&SZ6lwl-F8dL(^l z&1ytKaDD=|OGSF!g3hNE^u@F3vIpop{I-0ZDUbpR`Hd3EUkS2aP125@(I%;@t;c~G zhWl=AmNk_2cCV|R_6Mp%>q;~-&c@EKP>Xl^reHA021+A{A^URZ5EGHGA0o0~5IkAC zf5kG$Mi5wmWw($UYrHBzhllc0QY;k|wW{6Z68YDE~lz9a)}qI5RMl zyZv1J{{8*L!%45|Hxjby(@oj4hIhHzRVqJ7AFlZOa7`2{b7qosNw4=%lT7`TKBn*w zilWht$L2p;iJ}rqVte4K^KnxiAx($kJQ8P z3|{7sl2c_)isBaU2*=eRI}4%&i}>y$Tc4` z&=@Q8n(psCZP@9m;B&?3y6m7-E_7+}h1BW*pzD0#At%R6T11>EK?BR1@F@8H71*Tk zH%#S#t{wna4-5V0ZUA@l-^xb6U#mlqtnI*%w+T|a@QDh#kn5vQvhVD8Qup{gxo+Wf z*Pl)m>uYelkIDE+fW>YRofa)y;#~;=@YfMy>NnW-t~hz`?)g8NwVT-R5T+6kqj(Y0 zGQH+N7t)e9dO$;V05cji;)>PdU8sCl40zGQotp|>Bwtm;9Av_cXUPKZ=GyR3Cm4r* zHZ>1>+p}FKpt{OcV}SD&CB>m^SThrK!!O9m{7!xEgDhhCy8EpZ;}(}?mY$xBjQZ*n z15Lz0ibf_EWT7|-R}LKI?|tEzZ>oY@VXR4+l3J;{EE`SFdJU~p3In%vG|up72og?d z7>PRTMMV0auvx}p`^)!4#-as=e3j+aO9_#CdaBA4=JO+nyc|8;z+{+IxDH8cW?;RK z-Rn~6g)?Q7%zmKB`^xmtB`%5P9=rMW2i`e^uz{><`gtVGHu3#r>3oqG`Gt^miz`jy zl3$rLOXd8zrxo|9Mvhf7U;LiHr@MY+Il|KgKDwl+r0Z#Sj~Cs?@i%{2F?>unZBi02z4#`yQX|4&N)xyo(E zLNNA?En)v&<-hRvpX=Oi?+AXou}ip0Kpf;xw)=C1+x-SX12;yA_a8{%Pxsqph-gYT t)(sErzd7Y!E$UD2+sHta>W%FY{Kvx8RzrtJ83_p+{>Z>PWXA73{|DbzC0_sl literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_spaces.xlsx b/pandas/tests/io/data/excel/test_spaces.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9071543c4739bd5c1d6a886f7f4853de777631d9 GIT binary patch literal 8622 zcmeHsg;!kJ@^#}*a0#x#-8D!^kf4De!D&Khym6-q?iQRtfCP65(!o7=2u`EH-Tl{@ znfJ|1X1>4Rz0+&m)4kTM>U+=GyQ#&u-*aIhE$B z9-|BHLV5_;G^AQP6S|Ac4YJ<6xz7r_#*@t@c;ElE*vg>+NR~Q9R})H#Uy4Ob4Vm)^}(M$S=t#l{~Z}CXw=A$(-rY z!H0D>U>E5Zi1yC5Evo7);EVJZj^nM25`MFO!|G*pyuzu796|3jN8)OfuTH0{#cHP6 z;B=L&`~}SNnSa8>9C3jsI2MXKKLkT<`XWsIT;O3Lw`U7Wbi|4J zRihBGl`*G5*5d#=$k-G>9TZVRQp#sH?9(YtoU1Z$&`vvl&9p>F*$JbtXv~U+#XZp({$}HGt1Mn$D;=Xx$nIkA zrnuM6$eKam-r>7VKfXIu0O0-}384NrTGndwFr2`#rUGXj7MzyGPCy$lH`mYW|LFK% z%)!6BdP)3qwGJNa;QePeA-(4l^Ralc%5E~xn&@8m`YX;osEx{Hq?m7cM2Yv}VIa~| zKdA3b|LnX()K)Lu*|I=M1ObsaL!Db`aPpO-3mO}<(>n#nlBG_3*NM}Kv(%?b9xSfV z7`Eci1v$!nFb0M3L-|U)VXl`CF$nW1gGnS)1N6I}>n)gFlp##WYVDQ=SJVq&X@ryH zApjl0-G=*jp19dNfz0gfK|izCe>j5xPhoH`|J|cR^SN>-4;~cjE`-M|-IW-B+L@bv zM|=ALdS4CeEHfRy?+J{8twA@YG)+#_v@zvBx+pieVJ zt!CcqzPPkvbIxN|uj3pdTk(S#D0G*TzD$0s>@~32aGqu0dOrDlcg`_C$VN-vRbus^ zE;irFUenpvby1;rG`N-CEtkGKNeQ*d(hdXSdF66Y0$LA4&<|3$ddSU7*r!@G+Ux>Y%4i? z%7emVS_M_+&oT{qKO%*KG_b(Z{`8+%<2%xI*jx<*XF&uaAQU(B2Zz~*J(;a1cEt)r zO4bbm;(#dOJlLh0gi3EQUJ0yc(fk<^yYC|#-1?Fi7m7MdB<$X(3=7Gl=+8hSN{DUO zjQSO8KXRe-`309<3XQnijm9OFsYP*G{I*A$WJVvYFy$JS%b8!mfY5?AB?%h$C1`0z zOp?|S(>4fCjV7_s46{8b0aMJ6CL);z*Eh{k4kYg9?s(Dff4DLED5Z& z3F=7i^uwup+q_lZWHpRRq&GOL@(W8HSlW@jmHEM46{)%sK;YZ!tuRMqHm+F{qKmMVcCgm4jz z@m6~u#u)G3qWl|ATuFuSmw)mD9x{o3=Ly)#)Cp*z5YA%U#tfgG@A&Zpu2L0f(v^YsaFa%35x>jVzjS4X- zC%RGv7X}+r<3{JmiKLs5#cc^aT&1DmsP-QsaTST+cxvvR!iu&q5fn~OQ@cj1U)Ogq z#u6z(sHkcQb3sdx<3N`)j1ZBaEn)I4JMk=su5FaM)4j`K26Y;MLAQ15z1gChM$?et z>^5bxP~nm1hlgm61@S2}K_ln1oSk{^mW;hO=nARU?78>%-4Kq_7`eg#j6YgrBYlVb zkp68=+DP%fsG_zC(GJPCXPKu!hxQ|U@+Ef(u_VDoMI?l zV@$VE(s#v)czOr@^yxgy*Ynu?)>yCntHruSn zXwd`pz4DEb3SRY`Zr|#+)Qx>Yuo8eIeXN@j+fK5JHN{cCn#gH-xvxDyvM^$7SlvKX z6caR={lc#zmAjP2(RPe%S$pe3Dc#}Jaaee>{F?`Vb7irrsd9( z^twCPJPv7*^gG$O>@cossAW^OLu1m?+M9?mzQ4UCT`+FBJJ>oD-u(9Hu)VY8=5ipp z<^E9Yv|@RVnEQ**(a~lCDa_||T`h)cW|opF<$x}%@}O+Ve4jbOGywPd5J5V#lSj5Q z{xSZBYr)#cum7rEWJ$vqTGMx{>CvfiXI@T%i(W&OFkU%uQ^uC~e^2 zz#&UzKH<0_tSM|H4z$OmR023(=g)q*tT`Q30sMgzP7wA{q-{*ArBW`Es<=rmbZt_S zZ&{@x>|_tGJTP>eNzTcHNt3Ha&Mb7G9Z36j=4f!iw!EVNG$HA2!C(r14j$jo!nN)J zD=v*4O;LGXL8>ni7OQ+ter->g%eWckJ;Pa#ugp2#D*q4*IejS8f{DF?drfWFDHN~l zv@zBYLB<~^={gmqE&-lViSO<7WyYm@66Y4ENa`IkEiNx9N2y-LJ6DfU*M4hB{~2Y~ zIxX>0=oN5?sCe1XFX*oxBlX=xkfpter`3Ke; zMqkpKBvI`sm9X~?X`g18AOPcwK;K!NlF`|L=ZA^56|e0bA?_qBVfe7T%ZKvwN`@k9 z8819Dqj8@v3L>u2tVWEpPNHn^#5c7lZbzZ(y-xNd+4DAZVE47J?lCc5klg)(Ozv$> zPJ32=Hi)MIf=HMKqhn%|&O<_p?_SAg7VF#WC8kfM3q3$0QqX}CCagwv4Tn`dx^e5L zc0&N8qQn>+UMGsBvjErS1FA93X*fWhR#|hc4)G=6?;hGmti>BdK!W5GeIn zzx(v;RK_~B**l=0dH-l5ORc$bjh`-YZvdsMN!HB=xp_1aji+E%qw!ILHNp`6MVep= z$6O)<=ZYh<^dr8Rm=SD!IpBP2M!3r93wM6zZ1cPj7k1+JBc<1>$L=vvy?Yr^9O)nXhWqO>hei1;;f^9$R8!_qDUdC(yMJnHia2OUlG`bK*}msm+mPm_w-wo(cSgsR-(3 z-2AjL&RWxrjvm%7Z;GH#@;DhHovWjG#&{s4OB~{Z@2>T_8&gBR@u@YKHBdP5Z$&-8 z2R)&G^f`c;S!<+I-0rg08oi#4K>&0~xIxx2>5kgK@XdhqgVtwPa>XpW3pdku)$9dE-*N5<5AKIsKAw1V$-8*s{O$$CJ`uNqfvrlkSM;EZ*zR*T(QBc%Don zOO+;INvq7^uBZz}Rl+z;*$$FRQ`qeX}LM+7=;4e|!0R}jaoUAirs z^jl3fhmVOSMl*;-sJnhhvB+u{3@iIOs>V)3rZ%0{mb1+zky3NcD$5#6$bYbhFAR>8K_DH%n^Q*qgV~%a5%!{l3x#kH=G`9c=PX|AdPnsL* zvi8XgELUR`y@^655pxN8QRuK{(MO#3zN|KA^GZ{ki4RR%@LmLJ=HS;A2f32z4BS?{ z+HzrguxJve@d|f*`wHQe>Btk;H|0H(Tze~PoP$C55fLiJUrB41u9m;Nn)Y_w$EVG_ zuI1;Sk`6fZWFhEo}3T2plwOT>uV!5WvyEQ`z3{|X40Rp!O)oy zk-nUBsc?7dkgj$*uFf#+9Sna4H5sX%NH&u|+_Uj70VkNuw*H$dclc0VXPU0Q4ZtSh zk4^n_Oy=vkm|S)>lwvA8LGS9*;&GgtvRZJqZm+ZD5Z23$U>RB@8}eP&GnnVjS1C)?w_)1NNr$XkSg<#fauydh z((d^m3WwXo&P`04O`eo4k>7dL-|!gE6Fc%?C0Cm`4aS>falDk_YcMR+>5_=-m^IEt zTf~`c@hd+f;`-i>>CX5yXG$(C=CyI~)ARse>D)<>T7rGLw@pzd8_O$5`b~v6h;RQI^?|A-s6a`xWfzDv=-=^Pc%8bO>=yV>u;C=Q}Dh?Q!c?aUJ zJtnYa^rFbKS;!K=?jN_C@#1t|oDtJNu)*YId%SJ;``0)3u2;@G`c-K8K|B0g(_Bm= zV7>4!LmM&s6x(qGwz|euI%lz?gjQFHEq=+=c{5w6t$2(lV!2Vga?2uV!Xv!40|=fs zT8b0AfpXg7p6adr#~;dEyeFeFqQkO`(8JO?Vzrq>N|<0SBk5T3G6UE@$|-m^*h^Qy z4pdsR^CphdIH4@^>|Zyr^Yd2v!1Kyo;UwEqo^&g#1qa8KD3i8B$HL++5cWbK(1uYw zIm3P+CUeTvn|}VgG}cN`Gz3b9;w^J&uiR&#!f}s6(u_~?yyEcliz-5(f?tp0V!7;n zZf$7p)DeWILT4b!o^yUMxBl>kc1GUMD(f90F?BILW>F*;)Ph{&k=pRxCF^4L{U%{Y zxcvyCu)mjkV&oY|-duFRu*h1pa%OsMdbMN=izW{ajm5Qc;zt>PQ6K?Z9cb{S#|82S zJM19^-R_ORJc&{numCN@*00}s5PLw2=>c+WC*w(Lclr|3R7(?&+9g?I=Y8my?x~cU z*6Wx;C!1$!4h20nl-;T#`%<{XsZkwM80)Z}pz>DJowrzfDv*#>wfY24< z_QO)PL}uhC*`GOjNdCZ=P>4b2r*;jc@(PLjf2k}S4SzMiB+3;i8`l7?OICyYX*p*J%FBLd$S zeR%AtJm0+pdaxh1Zh%U-SNhadqKmWcCu?vm7ju*6c`Z#!zjSVtAPkj%Q~RFxIA>`Y zBl488fb$DAwiweoUy^9O)k6GQE>}{mal32_Lj)*LiH0Bn*sV)rFhQ8F(=+sIi z53*C<+{&(TgWQf#LLkYAmBqhEn;y^W8Yi+zLkHE2=*_brQR(8%0IgR-M+Y=w1Hnrs zJOQiSqRT^7a#ssBw1?86$>8NU)8$NuIwY6?ct}`ERPCz6i>eX*E`{RLM*3;_38mW- zH5qKu!|25<3BmVEG}Ub>;&0{)7EXbEjVLWWSBA%K<|2W~o8M25Ts4f6FO6RdGD(-| z-^1(4-%*T!$O#v_{=GB)pI!UU{4afSb>+W1_ec($V|X@b7(%zZy2f zpY{LW<@nXluVVL~p7!C*|6io5#s1CsA9~=g9)8u!{`9~O4;=8n|5ZKv z)xob7|DO(u;F=`d0r>yb|G%34T3r2U>Iu)4f4urXO08eL{5^yI=>q_mB?kchkxzd$ k|9d3-tGOoSU(ElAi|WcK@ZALfFySA6c*-WD`T6Vr021!&tpET3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.ods b/pandas/tests/io/data/excel/test_squeeze.ods new file mode 100644 index 0000000000000000000000000000000000000000..10ccf0da2693e7b612ece6b9ae28b4c2fd811b05 GIT binary patch literal 3218 zcmZ{n2UHVX8iqp&NC`-9B2}bFlSomJrbv+v2BgLi3=jg6&>{pydQ}kVhy+BcH0dBk zdh?eblpbk;rFRht8{Fcq`|q7|&z-q5?>FbneE0d@Yhpk}O%M1-69R-HRib3b7pMIU zxx2f#!!iDzaHyvz(gg;^xOkvseNav^9#FIkTE@c@j&kyVdAq|=7#Xyu7aZz@M!?}1 z6LL&;0L{+`LICTG#L$b>0Dub<0C4&yG{zqZN6YxRBl{tJs0n5EDQ3A7$Fe1}>UmLw zu+{oKn#zokep(au3uvuplzLr8@0*ilBh50jQgRSY5nqpR8R)!o*Bu@<`@hmxkdU-M zqN`p7cZWrmYMqdFAo~%Do$U&Vwau=hhPrl6@M+P1)3q9v1qK_QrJMTEbe(hUFlGAJCyjos0`8-#XjsX!Sew;8 zf=w`?B_KYIrL}~5w>srx@vD%PxmJi;6b0!KGMqqv{8*)kgUYB81D9Lo3CaXF%t5)f zV}#)u^+i~k) zb(iR&Nvp64PMcMm2de{T7F1nVF9(+OU#`1RpM6p}Z>>$wTCzbZ=h;DB&|)UUAP86V*$gd!a0X3Ph{fsrFl4Jn!SiXyRyzEX2jchlmU9 zRG%!m_d_k;Fh>veMs)@7%HI*2J!-c*N?}W>ie@ryGu^UC%yEY$cYe1DX2@hjWA3^p zq|ad=Pp}#d=qfW8Z`Ffp@jP8`w~@|V(4}Y==SV~PD+KL$OIogO-7n6~-t8}Q-+?u@ z`RI*o{4NnEj2UP<>dkXDCn6v~=j_Rbh)189TN3d*0O8H2dqK~+n}jZMC;KP>&DHGU z*g8PAxxz}`9K88=g<`rgZ*0T4fYx49B}4`-H%i>(wU0z)4r5rmi& zSfm}D1j?Y+Y=6F(%W7R3^!nDhTg}i>8(w!DA}n=K5JYiy>V0^Qhh-3q-jO6Czncjs z$)4%>oLi$LdG76bdqI)Fu0Dwp%cY=<~YeR!{?dz)*4kJZ{_Y;qa2CWcTK}(rl?Xy!; zv{K@!x;xvLGtIcebXA~Ko=d8kjJ`58#H5utC-8D#xX+8`fuxn4MgbjLUgr?UI;7BQ zI0Wx4-DhQzR8*S_B*;bI1m-!gAH=|-*JviE(aAMgyQ6NkOLaxD?gBQ;D*;0#4}!cU zT@<*+z6cK-FQle4e9KYpp{}sEupsLC1f??!^=*U`ioH3)p~GO`fkmTkb8a80W z1Y1(YU}@onnmZBfRc+nD=W)yOb$eZq*-;g55dZhM4QH#c>;`Z0}V7!gj8?M4i| ztC6eY?ms~bWWPksu~^85O8VJ7*UhvUtG`F9ccJ`U-k@g5WtNMqOTGyjVxsjh%Kc`! z`_uCU*ZDg9C)Y=ri{(fmYT>G#DZVMD_Yb2IPg)w*jV3#Qp1dDn$}xfy!>P)w7cInM(9&Z-!PNzY)2 zP_iIeKkBZFR#B8gBxkM)1JfM|)jd>FLADrC0)N@kBVRP*BbOc_I8G8RuYspnjaRO0 zyiJ!+w>e1THqiRzVAM2Uzn*2r_NYjP67TLVkvhwFN#|GzJNINQ_42SSQ)i}rNr5n{ zWHztaFgQKGq((ne@U9|_K7!q6sbaa1MR$1UZKF15J&v>2P4N3&Xw>r;DH`!ex6;^; zkG`H?>^0?IzK(qVl?(0QRU@`CYxp<;uAzXio4B)fEj9xbNU~&AkP&yyQm+Fwg=ZTO zMGGe2<@+@rl}hw6UUNB_)b_Geq!v;2M!uEy3M*nlqd%8lCF3`$#Kwn8qf@6d0Xv-a zS9(ga@HoZ~KX6C8o66&1Y;CITZ`W#L_D1{!T}dCxA;^GpDCWv?i!{Do$F-rs3Yp@Q zK5Io0rSz@I+NAyCZk5gDJ7P*iVfZew2gvkY!pQdyC<(3K@)sTbzfc6p6Z9i`A8JNd;9`)+ zY}bN=S4C^bpWXu*XtExlXY8it_^;)~Xgj1s-tkOU&8vc+;y>S+=>o!nHAhExG~HBy zPjPW-reFt(e&4rW6-91b7mliq=d&J-L1O>r@MrM-^3Ozd4ebldw@3=_vRj_$K0KN% z;g(}}|LNp<>m^p);38}n z%u`G%I$uj1*XLZ43Q@c0G;@{TrbZ-5G#mQm6C0;O@S+;?-P8@Xrj`0y109OKikKMj zsKtj_mQ`8{UPnSr{ayS%jxCXnc8A5emD8hN;l&YU;Mae+eDi#o-P823VnleiFlreFRyK4aH{(M{`r@}f_(bRP5-X@y-@zr;mPImKg#Cs!r#}*FX0$D oU;c9C{2t=>9Qzewo{qe$_-DqM7yxNb?*Pe%AUPv#PwP40?~_=RNdN!< literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.xls b/pandas/tests/io/data/excel/test_squeeze.xls new file mode 100644 index 0000000000000000000000000000000000000000..2524b975bdce6f7e11e9a88e6641efafe2694c6f GIT binary patch literal 26112 zcmeHQ2UHZv)~*@SkOUMFM06yBB*`ww5X_PcYghwI7{Y>}hzhF+iUGqz6cd7J)itc5 zqL>gdyRNVzqKFv4yoNQ)Vw(R}H$C+9^o;P{f8II&f2QVCPuHEQ@7}s~>vrAhZm7Df z*RXki*G9xhK1d0D6{#at8l3~@y{WP;g5O0VT%^98;2cPb^nXYL>O2@oRTVVSvr_HU zNe)s~L#PpP(LUuYHFMS6pj*59Ew26aKtrkwXepaZ(^|4kdTA5Ml8taNlDsLV^WKp z;e@1i91hetj+MwotQgAQtZ@r;Bp(CoLysYcw8r$mS;PCAHGIBNgM$QAZ#nE*=O9PzP+2o@8gCGqb5jJGDT|0q_vWRlOzt5F_aZd7*l2~23kv14`hp+5embi z*zzgKT8iVX3Bq_AuNH!-y(!2YX6+jWTpW-Me4r`4oh_f<$d)hBLV)bhcaqG=9!T;d zM5D132wIuCqh9!$CuT~u7xSeW6RcZXHXwblB!u3J_{2APILTMgu_pfa^}s=VQpNbl zCGoKfC-i@?pclxX@0CH{ErY&Z27QSPx-5BQ;U6GyI!ozi zgA6(;k92)f9%=ek894i8&=1O>pO8UcEQ5Yj2ED!Vln^+YQu-n7M2DiU)YDs|xkeKy zuO{m2iRsbNOOL7{NkgPM=9#v^e^I&`0uQ@xeWhg0Z2gU;ZqP1P6chqQlKX;SzuYi|Sl_BL>6Zv$9Fwy)D(5)JVDqdNgkI?s>cHSoTQlGtl7 z8*q6o?gURvay`uFOM}3|`r9BPfqW2`wtzUhNP;k(peum1T8UJ5q(yfUbH&~b{IQ6x zFs?H6AOCRv<)0$i{kN6?DMYI5AfgNMKs+Qtgfb)%Ju9S1vYauTZUCZ23@4UaAeb&g zl8SsWh+0b*Gcqzn)D8jK2f*9n)#65$tlC%NoUf>Nh(r9omq?R+( z#1x4tqAt0upLOKHl6o9V?KIA17)#$mFfl7 zJMJFcsbRfSG-^Id`z=#w)#|sQAU((wh-|-w7sjn1Qqo}^Dw=4B7%x5hx!7 zHiT%BK!L40Oy|#>J!WkIL9iH>cP+!gR2y_69|ZQlz70|lC?5p&zi1#(VAqL!5c9Tx zz)n0=lWiwBFlmEM75Gb(gL_UZ`TR^b)k*5IF=Vn~ zu{0|=Z0MF&5qRUladtLFOg1c*)=Lf>x}{YFR-C-a&ZaAq4U45Y%3(vdw2HvPcb>4b zF=n!1u{0MsZ0MF&5%~GqL3TFXFdKLImc|D*?rpF%XO~q6YDqTOp+OS+G!rHp7E3di z!-j5Y&Mr4suViP_oyms9(yZmMp<9}>%h}ojb~ZhjY*;MKP7WKor8&Fo-*kbU4WG$| z#nPPQu%TO;v&)LgTkLFlGTE?LnyVZ(bW3w~`B=V;jg1!Eo`!Xo_`2!GxNe$=)MQ>O z5ek`8#tv%M3JMm*1ntQHrCSl)`UnkT2Q_a61^Z!wS};KAHiJ+>=~#A9i&jvu7AB|- z1C(wd2xUh7!VYTL3JSKt1hr>?((M7Ep}WtsgIcwMf+a9Pdow`kqmNM2>H%z^JY)@5 zSL7cawSw12a^yM2A|>Px+l^7;E3gXQ6oDN=ygI_?cpI_BljyJp_qt`0m^0e2$;Vug|M`~YD>!U|pnL#{3&4Uj7q7K4!}9+rjyaFzfc zx>R^8idYoV9VjIg-hWaROGTC_smKx~6Z-ysh^=+`XdGM#CWzbEXDUM*OGX=I5f9o-gXgu9iM4MeN)xwm$W((C z4vZFRA}y&FlDs-2M_7TQ{;87_!V|+&l9JQ-BZa9cD|iDXp}1}$UDO|?!XtBd>=TLc zPXa;GK<6VtoK&y}ym>{9J|65A^HPVt!S>9+q$Ds@FG*ZZSCKLr2ue$0m@3{8r?7w- zh1CJp`w&445{5^`CC2a}R$vIKqzjrI1Oj2(j{`wtKtS>?1YrU&VR{TsC*iOus)1yr zBh2G=fFOeg4!(~GuEzpS`ac=qBEj}@yoSf%E_874flTm@Ea0RclmSjI4Y&ljD;*qs zA;Tdkbz%X>{*W9oHM&~zxEja}Z(|KkoIDLoVVdNSarL1;gF&;C!4~nLw@!Z%?E6!L zhHno1htppd7D(Wi0^<_ng;6c@e<(v3L-0$1#6ZWxphrn~041?CihU8~eef2yDkx41 zd4T*VSU4>uIUL*z!!dKZfnyE^VBFJWuqo_V$cTpUr*N}`>Te<3QE4%FFsV!g)6qpz zRt^G|!C>jhgoD6PE!yY6X=|{m4$mb5L~v4ya0M?y5`(J&^95t1fKYg^k@CEFqndJQ zc$2x+(UK*~rI97drIDpPt|sb(_vk{Vr4S=NgTb^jK!*@IkREfo%f}v4V$3n2RT8p9 zNkWz=Nyrj*nBQfpfbPsCMNmUf5Je#-${Hwt7ppQqqCp841>r z0t1KjC2j2-4B<^cB&A>rVC+3884(az;pFHm6GkPmzku~l`4O_WR0*4y9_kBMM>IM- zIT5=o(!B~K1?6gjrydO)l3`v^UW?7G;)^(;;T+x=ltbGj$%WqO!Dl3ZL=e3l0V1an zXFCdxrd;$Hu0-I6V33G* z;XsZM0T&)R3Ae&Qg(#9|02xkNk^yysR`c?;R?~jLKZFidQq~8v#^j#puRA*Bpgq(L z;5)(hxPYX@6xePG8<{p)ICX+U>ZF9-%P$3=ankMIH0X2IL;E3x#lAd?zvf=Zx$w)Z zlfUPi8(h3my#4!6O@Y-T{de#U671?+nl^adPcG}BR&r)-;r1bW-}v`3e`Y*z!nV5} zA$5C3Et=ES&^vsa&f0g!P8_%PSeQ9@(YoQ=W}r(=9W< zWOk^WHtd$+vqj52?_Uh4eW-M3pUIfMe?M$qYkn{H;4y)F@a`c2ysQH!SJXV;(LCUy zfxp8aN3GsFZ0)sZbLH66Bb$x(J((zUyW+Ov4*!kj(vD~LX)Ox79K$!Ve^lthp+-_9w{ndLsQBKVX@|Ht=&C!MZVWQ@xIjfh~qD5&r&weaQqvaBoV!>Z31T1B^@Z(Qo+1zTp;mxLV;y|Tr- zvbXWJL5G}Hb=1kO3pqZ#$Uo}JMcwKl{p(#L%rmwxpAuHvW9fiq4O8$wQH?6rH?UpdiZWnkQch|N13?~f?U-67mn)4i(XNb~hC&=u*9#;2_| z$bt?ohK%nMrNjy+2_0$2AXpj22$QjeXjKGhoZ;*GHl}Z$kz3~xS!&%sqWDr#=?4^0 z*wydCrs|FLwGZ!SdHi`-|60LN?Xh-+YTAp1l@8g5U(K@HZoK2>pl;{v5*jabe)LzW z;l+dh>U4MT?^B1Rn_bJDzo+Cj|9VNUYdv4u-JW>V{HMN429ADmwaMmQ|7X1{HMpMa z!aM0?$Lqlb><-+`!LxxTHra*VSnTIxLz0a9>zA9oHW@H8Dbi|s*MZY^-Pk`a_UeHB z<7)1w_Wcm-QhH#Wk=1MMl7Z&t{rtw~9+~3nyKU6H;XXl|c#CWHo-D|}{XW<`v9P1B z@6_0`1LaMR%It0jA~;2fOT;Sfre7-m^z_)Ge*QKa{R<=bCp8Ua)NjbPf?mCx=Y`ujzFlelD)DUprJcPOzy2e% z!K}*oz^ZFQgy|ONUDZS9MuyIeas8#IU-};B8-J{tVd!l+^BhmN;_U(LX!>{QLN_P!XLl-o~1)3iV%Bk{McFH4QX?!1} z?6-s8nFX4tcbk*A>}k&8ZVfMNqJ73MIr89krz-DRC!gi1O})d4ccPqh}U z^vU<)ezsTnV!2@UL+wuv1?L@xTmRf}HBEEs!~uW5%9=j!e$B!cSIliA@8+rH{$luI z%EPRe$%p;lxt!%#X`V@6*3s%#jK|#4l;>+*hw6TbHuc_rJordnd6|uIc*HTSgDKlb zTyRaBP#>-T@zDClN%e`U_4~4$7e79~GrEk|fRW;qiW3qk}DBJBlvp+0E`>{#I@1buKBUdgw*ROG}`mjeO)67o? zLeXa<^AlKU1wOd#NQ&}`i8MfejOCk-Q)Av@8@4yqM~wt z$eRJ69U?oS>Eqg+il+p3ki z{&MudvawbVI`!}8*tPp+PC<6MR-;;Qi6FqX@$_l^gR?X1-6{{IJ^XNe_tljz4{H6H z6S;bT=d;xxr(5pz)b#kWb>AuPRTWBij~_CA{O8NXMgJ^G-d1>CHrZntSKWIuhY7(oEhVI_wrRZEf*_tW7t! zN19vPS=n}+_uOxsbKQ&u#XlW-H)fUHh$!b>QAL4!>XecPjR|WyIqi0qlD^-rg2WXi zoGh>P8Glb5<(IYp&X$$WD*uW)R~;AM@bb=)8xgA8gI`#re0}t}i>1MDpAz05cr(B| z%9s(C_#hUy>Uy{;Et`0f2I&+L20FF1{P9y9!in*YZh zV?;Z6`Cle(?o#7(cj2AI4R5m^>iCTctDd0kJji3vr+DbuCf*^Tzhh@&V+|a z!)Cog+%l!ZUpqyY2Vd0R5NNV>=IN-~Or7m@A(k)Z#vcgY81A~l;#1VTC$BsO8Tx@Y zIu?fwdShSg7vFn*?EHpxFRL0fqEqdE8hyz>^?gF4|LF=X`%+!^E2IF)bkKhGz^obKsX#5Hcau8FCg; zdmeGL6xV(tz;DHo>50HveEv#cLp$G$-#5VWbg{i=R2eA*2^*9}-80py2|OhU+tFWcbj}-eo(P!Mg*)j&q^knz8*XrX!@9ij-YStu z>yor4JhGHEaagR0Ny9_Hq~Q^#(kCvWN!tmRv=bYmDu;N@l*9Rkg-~e!$G^YARu8FA$&O%!H_YGora2^5_qXLd z)XVBXofFVDL546|tW@k$20_MCX$<6!kYgcZmy`(^?(d_eknv8^TF7|l9zY}f03mKO zL>L_nU*d8=Y;xSxmNIrxL2;4ENmG-eQ~3Q;BZUdb7igI)uc>TE`;&vu(U2X-lgC}* zU?Wf@g$5KFP-sA*0fhz>8c=9Jp#g;k6dF)yK%oJJ1{4}#*MPkEpL?zHTA_onZr&P* z|LvNKaQuG*GJZ1&?+=7R#@jGB?#FLhMnT5=xCxMP?2ey#!*TyC$oQW7Jjm330Nx)s z0GYlY0NVmYB4c?S0t*v}yPuw7_mTGv*0Yi;WN{*izD-@g$5KFP-sA*0fhz>8c=B9|FH&e9FJpY9CPFN8(tB@ zFE|nR`0@RD9P8r;VsPM$<7^zic{?{AZf*J%1eMQx8PpSRd~P7(&K>SqJ|m z8T^-1@Jj>uk163mya#0beq41Q6yJ;?y65D!ja?%0m_JbXmxBymIo$S?93 zYW?+xe>q&^;PIm9qk(%eypSA0$PrJG+x!jx147In%h1+_eFS7p6*Pi)-3z~ojQft? z*Ni4)!j#rmw`Jj41Ajb+@cd~je?A7olLwLt>IdBLtKyU3#TF1>yCYG4P_u22>=6t1pol(0p&TKRnaH_04F*C zKn%b_)t9libAj2p7;Aevz?==aJ#200GSN`k(gCQ*`~SKAi#<@U-KEybOIXjiO%VHv z&21bgoqwM|gNa)soT0a*!8TL0>a(lz@uAH?X!^=^GVX99Zwt4BkF+{DiaLa)4nk&z zQzqDYpIuCrC{9x9vk5qx*@_c7pq(R>F?g77gwNiO0a=2o63fKpFWAruuPKF3c$T{G zJ7>USC0Y$U3P_#sGl!Qaa6j9iQ4Q)jmr7kwSUBA`bso$p9UT?`P3(oX(wf43gIbSd%C;`@SmMTsZeblIo zqc;E^x2(EQGB#ZB4Ja*gFvf~f$?mT0?B*0lI`)a#30)_#%1dD0ziDsfR_vT9O+JiK zs#+@)25xUM!04mp>vMQxc8({`4B6;t_RLmS?iw(P)sgXT z4tx%2@D}*OIF>o;8c2&|;Kc%LL4~A0k&&kWq?@3}a&_n2j?J_qX4F!uP*8 zX8-o;QE>=l_<}k%73yUcPFCy`urey^=&~mXIS6K$?WigrXE8wz56oHf1*)jeHg^$e z^^-k=`ENyJua^Y4-VQL$Yu6jTv(;Z;k_u^;?d@ZWzAoYvWa#_ojqg&dCQU@c{%zP* zvg=>8b`W8=55;eDwpDaJ)oGDs56zHK_ezg;)Jk|le*OT`(5LOKB{h#glvSf=Zglir~?@=SWVOO{2q)&%ozncL9CYF=aK?VeRrD#XKo&QIHyqr5twe&hYf z`}EZp8yX3o4;kkXpnQuY(?Pbd2y!PaAp4blWNAmPR!^AIj36qXryanGsy)=D=q03zJ`kIbl8>CX+k8RlQ`U!$&ZcfeAFm-qorGmn-Qa8mgSj~Ke4qHg#gR`80!K=Uq@xkcXjh{sR9+@M%1(vrYM&~X3j7@L z;!)X_^oQOAW~di%vAbK!?&!f{vh(7JYc9nV9c>A75>vgK4b^&Dv8ua^g7n?_wQsm~ z8b&;((?;F2=TCB>L?%mjy$lf8*cqwJ`KTeu5LkcHozQi0!gRBK*(`m2sCbVkh7T%9 zE@dQvs0$m%24(cq{)z5jr*>|}=@i*@6?Tz=x7D}cb0n5g=T2c-DL!9}xx=0X+2$_E zZxy{YUaVa{6K>s?D`M!a6)dfln~Q#XYtFVvi%Ei|{*P|i95%EYMXucZ&u-!Sms@&p z0yGGN5e9O>Oa^ElbMI8zjrJ-d1_JdoH0sXtbyuiC6KGuCfwj)ADqza3=%wzOv?$yb z|5{yLsn$C1i!S8j))PB7rzKwJ%d^;Lo+Ho6g`m1 z^mmqoe-&wJM!Mq~GFTXYW68g=?oY0K&$%{;W~DY>LX4TZ8L^<6@#PIE+2`CIgkDeg zQ98`WFp`sE(C$x9O}MnrOSWF%fej9-lX!ZW^YuhrZ<3SdNpzado*f@=q2+25sVC#; zi&Qtk>Km!1JhH}=V0ZGh=-=BSLsru<;mh`J93fhPZz7 ze!`NEr~gQZUd1OD-`X`gbxx3hHqMuz8z&gSl>rY5wumNifm zgyTwwf~j6pC-Uw;xYYfzqY?|!o%%?I;r#{Z;%Ni>j+7lIrVNxK32vOEh%0u{#5#Bj z3E%8cfK0aSZcgm`B<=n;oOzZ~E8I8M z9J?zkukiBkv5D`PfS`}0!M!+3P?Ja;SYeC5itI#tvq=k`aDuWmGoA4kDtI+EH;LFP zc#R;2XF>bS_}TR?Td$&i_EEWYjE7{C&{WN%#MSx$=PjEWdq-P+^i=QCAuYT5Vdvw! z;4J5GYE?Til`<1g@TZE+5CW~e+iKoW8>p61p6uf@LM{&d!8Z4t7ua{?VBStezFLAk zc^QZm&wSQ;rdF;KO^>EK6gpzrxaH&bwA_g6Sp2&i=@amygO$qp#0(h*rq7Dte4`SyFeZB10rLsa z#zEaPAylTTY9Wc+K>~s7vCWmm?$b39!gpj%10WKy$<-9HN;%2buk#0)aPsWJnYQfO z_BQ7Z9!=@STeRt7!>U18FHGh9(~7;JORf@ATZ(xSEs6^WTrhtzXki)OnsVJXtby zU69Oppi#)uIqTztk5oxqIt3`BX_YrllU9#pCe-&QLuCNytbEk&Nc#k#Ea#eW)atoXNxm;F;_7 zqZQm&k{1c$w43*!WBWurqZ*)Wzox;q6tZWa5E77ENKv0=nje<*Ru|FeyMM~=wSW5l zeXAua!6OmRy)O&1hdR#~J%Ft9R{S8@-Ocf+dX-wk01O_Nz-iT)~t^ z@8R^?K8iO^z3I8#{;u)i?g((2)I%fW@V*LDt!i|i#FJ)WnJDWU6k)rHdyFd7*w_5+ zo~%SiT$@>Ew~#Q1tX`NT?i2TET8+nyE^bgzA=&Tt0Y^~Ja4)@V4}RvC>{UY4dZx&K zpw7(#n~>-TeOogw;wU|yl-(z|o9$92-$+Z-PpZUC#y?d_yJ@Gz*&flb{;{M(>D& z&C)(dL_?Fd^!4N<7`?TwBsfwR&<&Wih^&CZ;Sxz=sHa@$XDlHPKjWQnCwFjbvU$x6 zdg3}8$!}~0x&_S-g|7EAahOfVW5#5XuRDGY+;n;)y$5bXVa09reL}=KVJRF?=dzY4 zpiFpX!605%uT~x3*uYL@Vpcy<>xE;oLY1mQ3Qb&PB1J)sr zO~05&+}f4=h$H3|+t7#O8G#o*p+?$WWYfJR!xf%l{Gg|Rhy zGo{~xDzMg)o&XM2)4bL;fc0=iDFpl4gx)>IKr~%ijjQ_%DJ^dsD6N>yk_u>);ZE2~ za^SU(#oMKax(BbJ!%KwYH<)zle5^u;_Z%KhunmzHbu6kRtG~{@$|*KF_Ed%a>j6vr zoyyzF2pz%Pq}(svI@K@Pg_fMs>mBegaD_}P1Mf+qalUFTGgil2qVDe0*tIi*hqlYk zzQVq}Sn0~tB4aAE0D3m^^7+VTc7dfdH^gG+(Fms=aG+PHCA)p@6dJc+&>Uq>C~m0A z$v#Y~+;z+ly!kvPL_v~CIclo%oj9^sa7c|!!g^^Lh|~d-?(5;mFMS!PcH}Io3BB=(Pjzq zBC83Ci z&r!%Y=K6vM!a@vDR^KjD$8b5w;J)(rsWuh5z;(*Mcq3@(nphqZm_C+g#>B?S%0U#g zCZ*3m>K;=5plVO=DW{e>f1!DaO7SSrNkPx}u*r2Y5J)0bm~0nUi4rLG;?ah)j#OaT zR2F=-XW6`c;J)?D=2Z2FDMp^!IX$j3@LGs++vl9&#iikR6A z{7fLFV?1Dp$FCr)aAFNOp%Jr+S1c+YvJj)~x%*bncuT%A@sMP6tQP3 zA3@%zI?H;1b%k1*i9j6liFweMEfL8_Bq}3B<;Ds-T;)>Md@rnmXP$5N&EQyga6QoN zlJ?lQnU!wo265F93XwgKK;QD^w4OM&!t@w7FCMO~cFNV;v1HS4@sNZ~#Usk$qk(hQ zzmslD>J*#W-b;wpkeMP_ud^5-(A;p4|Hc1Eiiy`NChk7Jmq)zQ02&=8>G$Rv;jRL7n%0 z{6f2-I4zZLlTmD}FxWHKbY{tYL{QL=#_Mcv&;i*g{O*BWGnYC}qz7b>g9bVN+7$k^ z8$z~tCQhD;cIGgT?|Y(wxO)Ipbh-poz8Jp{9LbnB#2hW10RLLZLk>a&`dkr2{AoC{ zRr$knOy<;8;K(IGWVcO-lrGHe%`}|s9h`Yg931|V>HiTdke*A4m$Dz{1qE*c4`@M? zR5ha-Ape)93c`D(CMa=k@1*0G_3}9>80MKz_OsSm96!nUPb$0(T~dd?v^#u^5LNhM z%GS2eNk)N2rA$^8>@nigaLmNdM65J#<|lm(&|duTLakWShCUY9Ey8e*{z0Z2J@XhP zWgs{Kc>RvbV|u$mosL2$(v8eXK|+Pz-BbCNR64kZQDq*4kA++Gc)E{&ER}#(I5fe( z@_3?Z3M%8HbT&Ck!lwiG9o64bQj=?Uu`aD!_>%6-oL78Tf8OMMb1grO)cak?WZ?3% zDt_(^9cnJoQWC9@4T4v;+~vj?8SmI-E#tuco{K7;J zckp!g3}$ne7@sqx(4Jd-UZyVGJ!GbOrz_*`FpJdtH!ed6petfr?JWWH6N^oTxR(*` z8PN0E1*)$vxJz=u=P*7gZt1Y(0=ZH7Jz|n|1f$iEOg2HTUimwd&Fr0E|4nD zv@UE5y+f&(UpA8*J-wP#vlzTrdp8J7qW2DdFGnXP1E?o};Nj6U(kj6dr!Wk3B(j-N z{Ge%MX*F-4%2(wByjl@!&qhl+rrmv%1D$`q&7}{dY2QZEok_eGJ&Qs9>E;#c*nq>% z<3TtV!~w*tou1Op#PBt5wMqK`0kq|}(ixzBaFLs}5yRy{cCg;?)YtLNxLjTK$E|a; zMjl$@vAhsX!HPM*9c_mXc)WS44+-RA=bGig^>NTQd(PZUVaYP*>5p;I4A2oy$#c^+ zOO?meB(gKXR(X$*3B1!*BCDWG(pHQO{+=U)Aov8*XBJFI4|Q`@W2GS@-;j z#D)Fug7-r`zifP2E&ORLfb(zT-?YQaD3`^xpD4<>e?0S%A>&7TCH%Ltb>ou3HBe~x1RtI z(*MOBKU=xWfR}rrpMcV2mjN$#MwhXE?RS160RV+atjpccW$RyS%-^kDDF0^tqvBjP jznmR@0vuiYUtZ)d(xD@x>)QzdvSmg?mN}%j1OWUG0vzP8 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.xlsm b/pandas/tests/io/data/excel/test_squeeze.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..fb19b5a40efb73d5511ca0631dd0c3922cdeeebe GIT binary patch literal 9122 zcmeHNg;!Mj+8)ZGgrTInyK86=q>=7!8FD~cQV@`kZt3n+VWhiDNqK3!~f@NG%3c%7GgF?8& zUWTlxD|AynT?|?puXac0xL=;%qNS?4)s zMo`TdjY^(^vkY@vmA+_XlVA#LOLS^+C=bf3a74y*x#>_6lJo(U*s=Klbl3DIeHt84Ty@ zml`(x)VuBkS0n??dRE35xMk>ddKjuvJ*KME>Y(wq<(O3yT3;QJpGHaHfnRxp28WZ z48Mv2cUDt3YY3E+ZE-uWl1NqzV{)?HVIFPiGr<8mPwbqM3 zh0oZH(fM?wOKpr~AT5#*q$dHe7q^3pOQO-c{nY1cd=*i+_#(7To|R!K*RCEw76!Lu zIoFESZk*>cXEWz%PZYhGp2K2U%HNgbDh;gA%1s?Tt_6*8=#rq}6_SNL5=#p*=vC2Q zwz#Z9n32}}UKv)?ESR&KFqRQ8n_RT>P~w%a^4??`K|j>OcD~B5A3}Y0MWU@~Cv0D1 zk?jVi@iw)Bw4XfAXv4enV^_`W*P!9Uz2q5t(np_j7GTi8cQlmc-7SdLRo;I%5FABW zvJB5#e|Hka(jxO~_!D`81OSi#P!YT!od5C@PiHp=3ukACpE>Q{K7#;HY;ctS?5#C^ zVLh;dfmpKlWrfGr4I7^O49uTbKuI^afQz~ zI`-oW((xl=jDZM@gq#@mx?uZTlSlf9DBjt*MhUB3eMtI`#a+pC>57%!BWvFB4qNav zJ9Ng);EZ7OX^Ddic?~(~q4g}7vHrW)wXA$266bC@_q!PX#2i=0Fn&@vYuJDQ05+Tw zaOV7(GX>g?&I??iAL91+)W;Xo3aHGFLnBl2s6Fs@Kb>1zlgY5*iqvNzmR+xSVxbj& z=ks6>zT9*hTN1mN!{m4_yD!RB_#PkacuaJIOUin%2P`GnX5)k{ya0L=Z-kzlBQ~{d zad|dm#yVreyxEHY$i+(}*6by6NUsgv=GEsxNKVp1;jYngs8H&WFXQc1ezV?wLSqEt z7)PMiIp~NhCn$!n*lTl{V^KwJc1vD^*9)B{(61@p1(&F3f6#hFFUjUTI`Z*>tg6)d zm4Uo$vT!2oq-9jKLooO)HrP3PKwKwo1}!7wf>F`u8a}D5nnc5M z3mNUrcWc6W{+!3xsMxW)hD=Xv-9!j8-hd{j!SbNfa4k92oXSXh*q3Q7uG$BOFGpkC zx)Q0vR0_DL3W~Dc5~xIr4baI?zqe^)H36c$kvu!3<6}^QQ87SSk{D4}>~fHty|bYj zasH8|Vpb$6=dI*GB#7NGy=nyYO{;Oqmd$i&*nHaoiG* zAlZ_7vK$CqvrL zwqF@s*{~jz4wp-}7iyET1&O<94V&kEEp;|mHI^ic9*TJ4vW7Dqv}l*Ofsgfy{Mq*S zaS#~`N@XJAyjuGt8F~}1?gA!|D_F=on4)iKdG6aOl3?Z)ui)uTwg|@Delt z{C;l~LizoS@&FPy13ZYJsDcrfs+Eedo`IXj^y_^;8<7i;;F@lWeav3CCOrs%3AI#4 zTsx3q;!9o_NW0uT5xj+a%iklg9|AvZGQ31VegFVG{Ffg=-Mt~!(4UdFKx@${lMCcu zJsnVUtBPqg&%sKyYZHRcT**S8biIpG@6bbNm67N;6L3IuE}d9b8BZw6`+6+%y>QEJ zp`_ZdUX^xCeiVqdh{YJ%IC3-_&ZB2$MXt?HU{FhTkk+HybK{xj;oGtA`C1qmH2BqP z%czB}TStVU6gJMTd00FlrvWUR$6X|mbNI%oAhpJoiKp6vq9 zh|$bU!}00qUiWoJRZI-(Ywf5CGfCw5P7XaOY%8Cq!^cs4YKvuR1d$r{%#AdW6ovGK84IqldMBh(U zr)S!xlGrmutu_;oOYJzITsD-sP|sB6h@&=pC_PfSrWZlIt63xfywvHL zpB0i!pg*#s!^+~vLffwp=+45>4SPH5Ji@#8QK~GV(7o9s#{AeHCx(X34fLc{{peD& z0q16V4&@9kYMT52W|@`AgOP}L7fBCc;gjDp8K+pnyNBcYLq zNhSI+BC8at5>UTniQ05e9x-qKRkekOAYt*L{Mq3?GgwPKmx)W>=posCtG>Vdte`a+ zTjvZ-s&O5ie8LP^-e$J^?1OTbQ0ou=`31qfy~9p?Vzy6{=yMRL{CV8k-F)(TgxeO- zQI8d|q4@KZ7sU(ZJT4~AE~eCx9U8r1&V(H0JTI}gl}_wZc^6PEL$(;S~Klfw{0>Iedp-StkG46#tP3-0iF#tvUbL zbN{6Ap1zVZsUYYh;dTJkdA431eV+ggFqw_sajm;1313*FWtzis-%U!R$%8fCG6+$2 zDlY{QcJ{R4VH^Nf(AMi1DS&cJ2&U65FOi2!k%@kORzcKDv@>{RiLhz8XJh zPK&>m>hG?yQ6WPQi9X*Yq3)~eLSE>rAZQ2UNNgv_5Kx5`1`}51*u3cIyq-a6)d6wy zhc`*j^0oz6QN22UG?Gq3kr?&KH+qIOgFXPs?AfE)@6Sb+d{_!a?XXz z*@qQ@dz)+rs+IZ3NCzFy!VJ!!;eH0FHZV&`# zSp%IVAGL{w92n~xJH7KlQY0EVw;5l^g_(4~LNSF~R$o>F{0ep2i8PLTlAO9XG-$0jZ%&b9{uTHz^ad z`-$~TY~5VjJ0fjY5u$^ePy1hnT^&6WWo9yw3oUO{rOCLPiAV8c6y)$H4=yRF zDZcM(?#_8cg#{~#Yi*0mIx52$cBX>xmk#d9_;aVs21)dD- z_=A5`v?d`l{?f@&>r3Hk8o?_Q#t!@aq%<1^f}5bvA)p?+3|30Ns21Y9gAQz=jBoqC zi9-A0Fwp11TG~lI$uIaeixUrTgUDP(HDAXm?_wzF0kt(m zH=6i8y~+W*IZ~w0;W#FW4G$ZzRy1fRZ^YB3wms|tT9tSn%e;yUmcQ@P>o>J*dVRQ? zMxCv86d6IuWb2M{TIiK6X#yEOShYzq7L7f$H(sS+=VZw6DRD;H^SbeP8<$T#$yKe( z79OlKM)mK@W}@win80O8EFsvo+9BbE{JHf~Cg`BPnc*iuWkN^{i7an=6$>zj&z@VxiC9 ztQUr5(D4jfOPmARsx(7Q;4mr@7E;+Y0o4=5L`l;_rnA$d7#501s;~3Qu)+xi-q_>~ zDy$pHH%3@n_}iHfR9U2@4MpEZjf7z}3FZeroW_u^Z9t?xZdO2e?ry9UCWBj6R?n(7 zT>BCMKPfI2AE!mZJ@5g9?m5>xN=VsRL`f(TM#A{@1J&7v#u3)$rbix?K1J3p=fXDL z!BNjj)odAvZC4pq6p4qkl0Llxe(<&y9LzE$$U1UXOnOFiKc+RYh>%IeCcA({O}3Bz z+Wlh_B8OIIF8U_gLtn&EnDX~PyC6IRuZO+iv^FQEasGEJUo$t&CrKViHOg4hF+mtv za6%z6`aBb!==DCeXL`vMW>z$1G*!cub4Yb+dZx4O*2W=7iIOE5&omMdLPRJ6_Wj>T zh3H){-B0)SFr0XMdcxi>Vhv2btxdAiihN=WHDmbI+xjU zvaM6OFt*`>6?#fJcVB{UL zd4kBVr|i$AHb2A_654Bx+3(dwxTQedjy67i_3Pze+nf79@`OST6L#3_~m`pWh7 z)wuTEqaGl3`$UGzu7t(;x5VMq(G97O?8@#Bz4$md(i%vsqQPQaa}3tum^jf`j3<4y zQu&v2ngRlW0<^fu1`6*sNJ)&O$x~5Em)MoodZr?j(Cc{;_u}WsO0J07FKVT_($ zJa}yzKk4Er)$VyHKYy^Q|CKk*a}-=Yu$SYNm(O9w{c4k-cGlMJP|iONf27F7#383eE|Ba6$~89f z1N(p`6?NJOE=m@K#iJEZ`JSrPg1{D9Te+>#;X>nOaR=F^FRcwW z3XJ-yXhi0u_8;i)j_!_jY3apXrd+Havh7fa!YY^BSUo>g`eTDoabKHGiihT!4M$P% zU8Iq>n-w*xo&@U7a!Uw(QMs096&N`|nKB{)-I!qg zkeph>&HU;FBd!XaL85t9YBk5mL~nqvmQ@=wi@xPl~)m??OGZMAyr(9-=} z;KdKhOYct3(L>AA1LbwM-YqU4o#{q7&-7@oac;~Y3kI^jS3f3XE>XYOWgrRnZw?_~Q&Pw+TK z3cd3uQ-VEF>zhjn9@8lh1~ZN$bw6(iveDqxoLD5?>D=^v3tVY95Go_I!HQsml#}3V zq+Cm4W6TbDLjgT&r5Bbo4Tn-;pDK03^tJ3G*fD9Q)E)L8yv{D?qfXk)c01MJlHgM# z_8&va>sA-^fXz+-5oZe1x;wF^l~;w=?N=XUu6}tfXA5imZ(RBN`!~Y-es5hxb z3kB{OivK_Vd486NiHSxr?X?l83gpByh}XxeAA~W zWd3&IFG^y1_df%^py4)@?|iw_D=J+_V)}~98{8|HRU7GTv>@)0kMgNkMzKgXd8sfG zO@&d~zS=ys3Xe=6?0~>dLW)iPD|dk4tBDc5p6U)}8_%1gFj9UR3)X@|P8`LEwM}Ss zw6$S`$J=B2bzTvU_x$_%uZTcuo6-8d( zK5}udhOCWT5`gbfHS(OL6vhJ_hB>^%oB2mTJCcalilDGqB<1C|Dck!$dYL+UiFSnh z9elAa@qN#i;@P2xb{muv*9hTAw)rn(z8Bt`$CyfS50oRvp)_Gr*sx;dEkN_1%h2&+idslpcQGCF;Q95Q7XvQFpR3 z-#fUmBZ>7-)kS(y9|*QVdo)YDz9pNQ ze`1<*1=o=N-dH>{fx*<^q%wxn2M4ZAS~*)jb#r!sa#}jOS^wk=T*m*eBngiae(}Z* zAzb)j2f!Qbc;Dzx3yo6jH#vlsFFVM{TQCM{A5JkT2z+ZZo&KaR@I0;6Ia^ei&V+KP z^9r$0mL3n^OWaMeE)wIt+PFiLYfAq_n4)TwqI9sqxLuZsX-@@1g~nk;8A&2e4!Kf% zdJR|#X!$I-)&|H_dJd6U;HuVAh&C=0t75*%@onc_G5vyzxjFsxGs2P&T^?S1J&t}r zSsu_OEH@ChU_$5+@pg(oNAYeeNIZ>C$$Vkq<4%EyHB=zf@}{Im50c*N&}q_j4ZL)` zIh~Z4A*jnBKuYY*#3>3)S1gQ;+v3iKxOcIZ2pEcN83EcHrm~pOx0*J&e`M+FREq`W4<|x7rV3b^~gGVnj;v0kG+e>T>8y&y&enq z0};CG`+nI69let;{i*joO*zFQFb6^>FNtzMjFPUED{Yz?_=fTu0A44Y8W6vFOFXXQ z!{`o_p`av|qFzw3J*TC%Foxg+1qmu$iuVipRe6vi6X!x{E|EwT*vDatXCWJ#@_BJ` zP~-LIre%E@KPCSyNF)*K;QnimAev9}DfuYX8=s3(p%~|JWLfT28=~5V4x$xTsK&&x zIU7)^rQn^uW^RsL9r8+W4ZddBJ~p-sbDimfTp;UX+j!m}qMGLD0np7O<{e6IF^h+i zC3K{{YoJls$(;Z6P)B>m`&wtN;2M{hchC)lHeJ$$ZT__&^9^#>ZO>c#dRYlRr&be>eQ=yy?$|lW+;?|DQhnj`REU;TIAn+TUgozZ?H9 zx&AWN!u-SdmjL^_>F;9KFH-{e5Cp#d-=(kL0ex~vL4)-$zX$fg1bY|KyZRfa3?r}y9Kx49tgpLdxC`E5FA3V;7)LN-sIeO zU(Vs2_Y3abYfbMpYo@;5-Tl?Bs;~Nm90DRP02zP^001ZfV95NODI5R*K>`490I2XU zCG72-&Fq{F)I1!_pn5Fswl);+5aH=^0PwK?|9AWc-+|7=VY_xV%#8RpLc9 z1)SF%cO<;;t5AB0B&f$|BCvuQ-Vz(87hngA&g3f-Utjw=ngDx#cAGPXSvE6Dwr+-F z>aNu}9;-Afy*(CuLrGtlbc!uo1&6ybdOG3Ih~`gdA;=`DfV-F9`5{EgXbFLDKJ9Xf zZh4X~MNA!o8-;)K)Xr>;G$1ci2aBCZxLux^*-Xj$u9-I6P}vkX`{jEHE(4-9gQPrz zrsBO*pBpxkMFFH#;R|s$sCqni?sC>J<@{@YxReBs@*91%4v3TOT48J?aG#*d=nb~4 zNJbiZZACJcX3iIom{loUQI(`lq!eMphVbIsK;?pbKcl!aZ<&i{F>NA_pasFWEe~J% z_<=V;)V55E`F?)jr|Y^Us8`WcZkt_jzOTra7q11wg(=*eg2ziL?I(ByflP>RFui%L zC-d)JSLq#Gr*pG7iJU_l3rV?hbU|Tw?366!Si02t$aUg$;wAVR7p3P9j@eU~~n$ zBhL{403rY?oVyL{zx>3_-pSh7-ro9G4Ewjwz`+6;4COz2G{+CV>SV(V-hcf$Xtc#K z{_e$B0WbR3E%mb~Y*pY;1sjuM5s&Ni#66zM_bDTJG+f8*1LoKKC(ESNF7xzs)g*|G zGZ{m5==$eMn&%W@b)1do*mzOWm1QifJi}Agib|$UqEy9vDu!ttL5XTFNw~~}bL3LG zd&HnbCX7Mlz(z^!^~g1tB5RRklQ*@r!QvTrM>L}mtJ)2XJ6uxLqd$caEyeU z80P9gs~dfMZFnS)>|EW1rH&p1?PtP{B$_mZa-R@YZ#V~xIU20n<0c*rV)Up93*>XY zWTl1HFkr^|Y+qF|at(@{IceN)WBe0yhJyVRiD9f^LID7PFiOCf^JmVySGTpFVFTR> zTisJ0oqv--rGFM2(O@Y(r{-dPUBwYYi65Rdt(g1aYQYUVV2ndSFOtId(+{ZI@Kc_k zNLy__pu81}WZ(@akYD~qnq!sh$Y8~{UCQUgR2(YOZQzq$u11LOEyt8}oxb!ZSAggy z;##*Fdw2SJDb1CY4xX(i#`RA6jK0sZWZ;j=;4E^X7_|1#!RdAAuGAoy|(vtW{i7ls1LmF+o zfp38{-0D4*KH!2?H5ulVbxgxUmZgLlAK>*}$x%b$X)BPvGvcYkY^#I@!*0F z+;NLi9-K)>TRg#zYav&t+5}KV#~5Q3_gRK^H&#E`8C&j?Q3a2hUbMBo|HL^}MeM;r z&S%E8Ot!Btz%|m@dO(}l1w@R$JCL(Li=a%l;xN+yq@w1>l_UtOQfv5K< z==>RwcRq@%@LP70Iq9s$%pMs}N>g-lr>19YMPdEj_iF>zf(9&Qzv!R<_|5rjQ1g8r z@!db}z}|4&)XA_E1rZql!1|XTL7hEp%%Hz^-uG&=cA0D-pNemOl{bo*rqe8p6x-%O zxb$TVv`JUnNHx}71g05@wiAB)kTdbb4`uNLLYz^xL{ip2S!KkQ*)nltKDAS~t7zOKP!)UZ%;bz!=f~K8T(lE0G@>b4+tsj= z;Ds+b8;#N^id}y!ECrEg4V2a=v~^iw0VHk~Ea7?eansW0P7tCQ8ieA~(nR%iMwX6# z)mGb5req!8J8iCSUEBbC~= zPd@)r;#@gXo+Xab@QrP4u@V08DPMYod}TMBaz}%ppTJzZTV7UBGM@I}mIfn(Hv@IA zjK4F(l`xyF^4OIFpZuxdPc_rLw1I1R05x%zY$GAJ0ulzeU zI@S)l)WIz(tpV1A9Qk+dPv4f=#dD-NDCu9y2Gc~uR6HZ6mFuC!(IxeA*%|qUt^j|^ zDs!P0q8N94tak&W=-*QaHei_bF6@_i(qBdJAA!Kx(#+P3^^YU_FBut5>u&nVT?BkfR`M9mjd5%`l1#q4$$(x zwc9p=2kD4FfJU<9COagz z>lQgixOjAnN9ID*Wrp&m4z{-$u5xUUZAHv5>NTr@=$+Q>g~*7cAZ(&DegdOOR_6-dy7 zqR+O8D0`|q5NCQy@xBN=64^|Uz@rGs4nb?s**6SufCLt3Br^<4+2_w^AC&sa-%r^bQ1emt;%IjG?&iwmq-We-U-+&)HbwoWLd5&1fB5ZnK%AIr zxW4!0YJ-K6zW3?Y{=(C zA?l8Tkp1iLdy4~(wr+_M6PbwlCfADMBp-DU6w$w9O@t^@<4Low@3D)APDHM(l9%ncMYC(s(jFv8$3KjO)6nuic9U?2~N=>^C@iWgK8i#k>a#A-?GyqAI}sJRb1tLzz!wgd1LgQR168l#aFgO=6Y=2>9oaK>-$5g7TZSjplsp~ceU z1zEy@tfcQ@D2*OwykD~n@Ujl=Ws_b(?uXSzXW=p_m?UQqC`tCvqntn2!?UQh=c2En zVR^w1waD-KTL$3hxMOvPQkx$e#QEGVtY@wnjS(S>)k&Dp(Ahj;cob|Sq0KSshF;@Y zb*hzIYG_LJfvS9!LZWC<#w(RXxFA6?zdfHfVnJHxbobx7yB;5V zxVg1_n#JxLds~%csTSdD#;A4L!1;t1qWWm48m@oKsor?dvUw`A>3CD4Y-V`X0Xz7F zeCoakHSJbEm-n0SLcmPK;&BijnV6cvm^v1fG=a1ixBurJ;o7$M+S7Ou>nF^vVrz|Y z`2<#K!&W=h;Z7+~r^D6H>)zcgOgmGJM9;}ogDheTi|k?_L&F@SE=SaF@w-rfUq&+= zwndE3ekKkq4XuiOW|nuxa_3@UNvkC;j}~~^G4B5{=2~-^C)$c=5F=u% zM%vrM+rw>YT49HA2Qw_DEi$2&viVj>KoBZ8%3w@5IM;9>l8oy-jr5CQL7n2U zzvd*n2;ZW@l{6#I;4#v;E|CBj-3_gyz6`gr+BmN*_|dZHq5ti*KK7mH_%b+i-L8eO zJa__u>Q%An)Dt>d9bBcXs+dXi4d3j6Pkfc*Yk2x{OQj9Ay`Y|MDT86}4m(Kvhw3+c zV!DEkBP?5U??1j()kv}u5ux3==}SY@_d`IU5)I-}22UEiY>IiU1o`@AQ&o({>Cq}a znJ7Kt@JQQ2UN;kZWnTN%WzJbD=Jm4l;8P*3Q#qd8=Q7?9tXl89b{P<{2bynPlM+Rf zu;CW`Iqsx7*EODdfUuF@^aWc7*vsi3;)MmC*lbNZivy+vb#UC}nvlub-_Fqg1bVIFa zD8_q6BSR4QWCWqpwKl+9g;RBOmUydu&Fd}7LhU}^2Lf~Ka3-4)B3zY}D^VcEWWNU# z(6wBAZbDUiAQtkyOfy7V%_^K3lWJVqdJj1&yM&7}X)W97M1@U+ONr2D7~x%~GOtU^ z=^qjCLd>K5xCHBgBs24m~l< zh%YAZ6S2|+KB4V{E5(tT*OqK8-f9&TuOQH^gE<4cd9$h_JalJ-UGk8=cT30?NG8waN1!P@5x1%^N-f7B z;R)HtvlJ0yl3r)`;|&`f(&Bf%@k>O%s~da==L(zU!3#Wr zdsLNo_F^(4e%1pl?!pb+gP<)@c%UpOBo;w_{%y+U-d#6cTQ_8jzt`Fe`vTYNY%ZP| zdSJOqK6(WgieQoVI%YTj-YCXE^dzWfPEd_DOtqes*ny-eu#bivlNX%k{zNu-OI&%n zxS)$Un_qE6y;=u%nu^tqe#vViIz6vT=!y7%Ylo0EgLMod3X-y&snO06csCNwaCNb< z`#d#X!UtPHBOiq+ZRPcQP8L*W=JK$d8xG4u>=CNKi2j^4LD7Cxog0*NPoZL-YO>Gl zvlzQ!o$TnOSI@$;bRf1Ql^-L0rifyFQZ*5LYy1Vw(eNh;S2iS5^NtO2E@2wd-?z@- znP3tn7^(DN^mzo+CQa>4UO3r1Kv_-foy>l51}5YGSCWM765jE8)Z<-#pX1d&%EN6f{CNUPYQ`iAaBwd| z7bE2ai&(|I)$f>^916u1t|O`;5Yrz2eVmyQO?N zT=>j#^ETC$pP( zlrQs~!STI7`x}SkH~i7~T$wEmng*HnYgf@nGzTJ%TcJ?D>Ve*c~QMhLp&r(Q|q zw(halK9qZI2CTy2n0>)x7mys#6H&*qg;v!J+?Ud;08YC{l^|~Arg*R|R&*Oue?SsL zK{u$-iq%A&AA@(4j0lw`#lx3&Ng5TB!6ARgW+>XB8?o7r52a zz|NAZL0TqIiK`m22Lw9MR~sPbqA)(Qi02G|R5nESfv)lCx5(L_8e@qT(GYhpgN9m; zr+mKkw|!~*RAtW=Sm_Y+5xNT3s!5!%$-Nw4v`Xr@>2_n4C&IOa>D0cad938;7;3~M zf4p${bE@`nrdH}ZdlsQ(pQcFOkVUK24avGPg~0d*t};`GqLwy-Jiw7YqtW?=AEiFf<7ULIA1KiEh*g1?l7K;^2&Qb8+!A zAlPI9$bMS5476yf;}l2$pqK^#eyoJ_LwmR&uLO8{ben4@7s)WRY79JZL)RJk=7&Ab z-K4o2NFVEKy^``BaH82tc=5>d+atTDd(=AK-rbVOEXKpis#dYAuFQ>ge|R}0?O*CE ztv40LVmuyyMjT8%ZG%_2mC~|Feli;6TzIB#m!xv$lVa0HQ32B|38PALN8(I&cymTG zI%vspnz34r!+wIhH=#OW@VMO$_#JKPOon!81I;U z95jO?s6|S*bLZECCX_au)tAAJUBD(}C`0;-ZDHC~r;OFiL6-;UMeT{zmAN^q?OG9< za>1GfIniw_EY(_)gkNkju%ql*-P=J zW8^1qqNa#d%Fj5*vAbn#B8W^Y6Hp)B?fINKNAt?DE;pn(QS3A0;>b(LBqIKT^qiBiHsNe?i$W(?!tcT*}?vchs>+PyK;Nk`YAIY)50pcI1&x^Ma#bFZHMb6@7 zy&T8sFshAQS&Red@#zeU(wb^KjBUfi#77{*!AG}+E&##Np#<7H^eO)0ybK`pM=R4z zBleF%&ASU38BDp|N)V)o%G z3o|D5dD1;w{?Ym&;>PSK%XHG0_PnXacMF#l{fY(QlI;|w9q})z5yNHbJ*^_~`uaN9 zbjCCNuhO1RGPIJiz##KtiMV8O+8mVDu0^5iwHI>1ofVPCBahwjVlk%j?qsqW#%UcA zRN3#@UmHO=MRwPUgPLSxSk=R^IZm-}q%JM+;O}9De3aypW3u(P%Q8aAr&vdb4x&t- z(Z??i3`{wOafmmsCcVFu*E|&tj9pO((jSUzc$jX(-VB-S%hN1i2E$7+i7Vp^QFO`s zooL75ofyVr2=1$paLp?UoWTf0+ry~5Lz%|n(5Ejk(wz-Me#Yln6ejo}|3hs=a<=37 z$q4ktS-ME)*xU}d5gHoiYv)I-ycDYMm8-!J7VU9X(*2oRp2&LEfvP01>7sC)Rlpl8 zo8+GgqL=rA?MBCAdwsVD<}J=ZyrDURlPcw+Ec+@GYkiGkzBUGp=9HJ%T7$K#+g}s4 z7VsTOZ-tMl!`^yv45I6)rMv31npMA!Db}F*|f}Ql1g1*lkq9m}V)ydg}}z*9o7rcg`+3IhO4eiXtPvIA>dDn9jz5 zam{-ScU{7&+z|HIyB(E)esynm94Q2$4d5unC+?qq;aQMpF1y3Y)NB0eo}P)q)B3ja z`cH-|VrFCIgnS(EhDii{Mt; z6M9iY=iS~D9gEk@S5ubUcdQ->xRz!LZ=<$g!K;!3xm&@h;(0fxiA_9$%`B}a#z|Qa zXYQwrR|Bc;7KC0iQ|^x$rIF}P5t{8iQRPpSHr4uUF`jphleaIOY19haZA+Xy z88(HZxw>;!G^x;zDb}Ii@@kF%ni0e#~8F+5TN|Z!rV*%e!3`t_c7xA-1=rFou=IIDMBv zwVlMMS=&R~)^_g=6SA-CUDK66X*xLvK*e{PWy(RF;Wto zqhxeo0wDB)f_!rMKtd6pkVt>As=(khrD#$T`G{M`=P}_Lt>{l2HDF%8^vqZtTw7v30k-7oI6*C%jLJk6t;-iPPLfYeqqd(vuacJTnb+o80!700nB$nrOK^k zm$-2fc;b2@9#k;;6wmG;ueP2J4bc?j#cK_dQhFpnoU5br5HGjv%DxTF%+_V@Cg%+e z1)1fa8rLmI>n$3;f0|R{@3#YuJ)V%kAnxyH>YNa#U&K~Myno33%Esg!ol#rfeWxad zwOY*MJ&1OG|9xLja*0~a(%Po=6bw!2wB}p;jk`!iaBT*ONz9`mE>YoJwxUIUKRVrT zyH&}`f5UxlU7BYX5hrGvoNz=TA zr>ofj)lj$c`Uy;WWL2@P^WFE1M>tPm>6>du-43}o%XMKD2d~a?JJJ5QDK03T-JpWw z4R95g!+E$;mYSp~AhO_*NGH^dpM`mrA357^yWHSS1!C=S;ixCrh!EV{@| zyM28Csu{$?9~l)y-Lt6ZaK+G<|Xn=7q;rMh~qvRUn(j2B6~e(-^v zN-_F`^*M^pmZu;)ZEvsqJCuFIHQp|);hgyI=rVeM{>wChCRqrhtc0honh8`=Q%_yW z)4|K#)dl&JII$_vyP&HyYR6HF9RVt+8gZe#&A{*@pmP#JW+lU~`{%r}o~R!QVIO{Ox15#QAkvz$#rDymLr};pdp_iH{2bMU zW_QEnhKrDS7_(w{_@+SPY>FeTHk9cY`QBy$$E{eLaLX>+yzat$?XnUirEtpZeIKb) zICOGyABvDCO$kZ5VFe{*wmh|ic30YDJJ6+iP`7e9v|*G7Vou6_iVqc!{Mr8_&tEElyv z61|^0x&t0R2|&zd&REPwpAI+|etV3-$K-xJA@qJX7xd`Y%-le5(6$v3gy75r8gF zBO7M;bo3kEM7cyWk43NvoIR9bXnQYFjohI}8ZxZY5~(FRt%^5*X&h@<(y`_|OmoVo znR~I!vkobkVF2L$Lcb|rhE>+wSm#THCyVNz`Id^iiT#^|6Z+YJL`Fq>BxpVH!IcLr`XXcKa zI$6JJ<(6jG3FitXR^+|fj@anIdGL&dxts`bg$Jy^SF6<&NC-UsbNvTN;2rSl3(B*C zB7kBI1$AxKqM04osfOZokY+ddjIR}mFr6@%6V>3&DR8y!9-57NIDq_zjFAk z?bU$rd1$vR50>;t4*Q4Fmeyn-8AnFLf62hdMADCBl2nE#gk+OZ@SJG|sRz)fus|5sNS!$UF;dFTmu9w`P$eTXLr0jl% zwAn*j!kZQ3CpAe4_^2DS!1?JpFO+HbFXrBsxz~j|kL+OjTw)-;5S1Q7dm)*J>g1D? zw!Cn#fDEvJkzg64$iG_)yAN_&mu*3ekA+79$Q~3V6jLqiKG1>>kzOM)6F6{=FxSEp z{HL2)DuNHTxB}WVj{|iZY?cpf5%9qlJwDKaM?%bwCC$iKMr$lASz}{7utM{C!8zHK zOj`+BKvaSGx2;=SS5H@0pQH}mhkLM<;T>8D+Q1ACb1@5o_rt`}lo5uF+CmBdlQrAO z+s6Ec`6_6dqxzN!#PPlxeT! zbE!sd8hCO2`i&hLgPb=oa9qQ(_^F(!{H-@FgDbxgKcd)pi*i@0o}b; z9neo})d3y2Rvpl@Yt;c=zE&MD7SyT(1_QGldU5;#qlA@de-JQ^B27h;qdBKYlq=*S z!H{AB5;Q~8NHES=fCNp`G!hIx79c@$HH`!#l?6!9giRyCux0@gG;7mHa4Y14NXnwj zYX>(REkI&kJGf(M0TT1t!7WbL__H`~Xj^ydnr&=Vo8pL^dd1{g75*mTo>A{vRot%IA-ClcpJy=j`YF8hU3k-3Y z!f!H^DNO9}GCH!=t5?@xDkLPN22=VlmMMgBnL?P3DQ?=^3D_{37jQe^7o}Qtz|T*$ z>VV&>YSjThY|##=ZB(FvU~8r&KG2K?H0%JY4#N(tjU9QUZ)4ot1~$6%!U(F=fGXM! z0?h|`*xmau5_O1qB>t8l;Z8SPy#@dhoz!a#r=~e~fetz!3kex-T!`wjq8jG>Kj&jy zrF7Mb!l>i34z14TX_=~N9)Si(ilrvZLw&zk4++=X@mq?{+iR>RSsPQU%d|+?5L8l9 zQp4NzVXU_!jO*qq zfQa;h@23(%zn;=S#)Gp8wgPGDmD^9$*7%If!GPM~?gx^o&n4qmlT2GPWN4^k$XH8L zpI*6aoJ<2Q8ULDO+|7`o!H^+iBTc>kq|!KAkGBj*5qp_2wzWei{aWajOOn`Yy zYYk)qEU>h)sGViEjFV}?C1b?WT+NWtx3sdT+dFp{C*#5;W5m+hn<1lbX=PCtZtXEn zrYV<<5lic2hK#8g zdV6l0aWXBCOjq-kCIT{DEwD5{|Akx6Q!*IP;L^slmRvGMEX~af8GTFh^S`vH&^Vb^ zTrx&1&BF{CeM|H6KXiVXaWbvBWQke(^Yfp7 z;)-!HZMbBNSXzJ?GWwS0=l^=|Tw`SFz^8av8EfawUYvQerP|iyTuDgmWR-EGt~#V( zQCy^LI7s!a2tLNdMH@%zrb7z$!$sPTgH+#U2wAp!sBxt2bV$KkxJWy2km_3qA^9oa z8As}_LkhORMe4;ts&5Z)%IwD@#*wzyAq7j|BK74U)z>~D)r%sGAr-=TNyx*(l2hgj zgA6s#Yp%8;VX!nz(aylunBT#+17=6~jC&KBNa6%%E#NCbcyfkhTt;&CD0$|5;bcRE z{KjerLXSt`(X|Xftvf1b%VZLW6jN`O0Pb5b1Z?2^D89sX6A#3oUlSVC>tkQ5x<-h6vPLLo$T#6c% znJ!Jy_va#(H_@l_sRJ)_84jPr60n+j{>}6FC)H4XIBO4Tl@~Ua=JVeohK_SmZ z3*hWMST$u(punm21&mP{>@T3)**HQ+%hu4u>XV+JBQiKSD-%N&4X+`FjPe~IQV#|S zS>RV})G`)UaS~@FJV!W$%+cE^%siIEsvC zEXa6^05TsAf#U`s(1P>rSA{mD^$usKoh5IdS(B&2sf28}N01^(!EfaA$h@&1!O8#C zVE6fC-@bkDlTRe;-;=9eenkD^D-v^KFpB)bKx+GT@_# z6hU2GR7N1i5C}BHKUGcq%gO)_d}1krl1fNwY7tqqXc2)1#J>mt5JUZ;6fp&0{?Hcy z!=374`UDq_60c90nG-{A#LO1_8V^xt#deFblL z@ZWx(5DFnxZTX}E+EwVa!yUJ`FgfAL1}xxx{W7z|B14{xj}Vx`fMTLYLPDTm*1_+2Di zRA-SMQvf1y{jCJfzz%}jkY#tr#J+{67Qk;q_#GB5&&-BRx}-r9$4C_;yeEv#`0?xG zpB?mZ>Qg=KeZi^i7qWJFtiDkG-P{qaXFs>HnV2#Af@_=Tu&326yL%Tl;7`jGoGDXP zm#rP%`be*bzV3I{o!PMGl>M)-j|>?6=gAZO$8Y{*l?dIcbr=Jv)8QdlAgW|WIZO{-8JbzeJ1^b4BXfOnEgt6+yC@0qrW+PxFkjjhC*sId~&HYY0{p!)8F?v;oE$5-|apN8`PV5F?Nr5Q&`IBU!BfG z_PHD|(k*Yz*JG2;w_fsX)2gmfzrScKJ|Fn$rnzf>Rc}4$Q}%7_{=ChefAkS#1{Ow* zx;1j;+Rirzs^+bgu0PxARO!xF74KjuH3ynLaZ%#}7~m`@cv>JkO*&fIS??YV$)rr0 zh2~MGEZY8{$SL%C?#i5b7Xu}`J^G9+IlgK4OA=n(tk=;MXO>?+|HsXO!18O(=a%(% z9O_wY>o`k#!h7b9$CEtQG+%qEZ;Qj88P|_Cy8Y(_mtVL2tKqeN#}or{UC+&%wz2f8 zsG_vfxi*hHuO|NN_F2!_9}m9!dv%8!eeQR1x94{=&b&{(nd$YxJ1XIbLStjh`y$&9 zhQ`X9_i^6q`lMxqQZ8vfzS+m))?e5%EbaG*EyK>dWCS-+0FZw6*CuWf%yeth`b^4|I)A;~y|^pitx?SMB~PXX6m_z$!~F0uBV!BU3e~1n%nM3fL+`aNt{v^ z@Lii;xf}g1{I)R1rH6auy~*>w{5C$ig+qJQCKFN1k zWSikn z-^A=5==HoQsT=?Lu<(EFK#hW%XL;Mn`(^H(>W?bM}7(wV(oWs2|5eRi&SkyrdW z-o@pmL2o0Zs_m+KRgTAQ+*kaakiPqBV$NfmlqXR6YvGa4PaJL@cPK5(XGR6@;duXt z)+vWBI(D99Rn>f9o#60ZU7atkUgPqlx7Tl_E+JLlIS|K`A*TiFMny(+o9dZ)bArgD zqG46^l>YJuaUOT3KOf$A`jj8mH{G**z$Uz?*07L-~J(qi=Kq!RiBOx`Qn~!nDq(I&!4#hU-tQ?1_$5u z^LrWV?Jm6%d}PKiKVR-PHtcfIs#9H_t;*gh|M-xvV(#_c*M0lV413)&M0Dup)!fLI zi8-S@cHVNG_Se9(2hR5Ix4dp~@1=HESNI;1-h0+G?BSTL%6g^Uo=sGR*B#;JEJ$s2 z^Uc(tfW)H>r%9Io7CdKY(c@u5`UJZw2i9LSw&gm}8Oece_v(CE7+lny|JKX;o%{4J z{&0Nbz3hm$*yHo6-zPdK5+nY4Trhs>&9gHeo_6afxwgP|-ghn!$No|9C~HU9bN@rU z_6`Sg=Qe16MHV<^clLuN0sWocrMB&{Wlzk`1$$K;nkSFkRcBlFnt?|HCXTqA>il~9 zH`hmB&a}Dw)67@1?i^W{suEUr&N$tD!u$vI*Uoy??%wDbUtC^Upq?}%anSK5*A~k* zy6oCkIq!mM`^AYz9Re#|e;X1T-Sp>nCn|>>;0HW7uws$_j2Ll?LuZ6v+>4xU*W${M zPhATHgImWndeOG{z%Qz1Y3(FM>ss|M%(A=F$N#~(jZc269{h6K(DQekE(f{A@1B%4 z$2g;e(Ap8lhOhGPsVroq3DOZW$`1q?(s6DF!bgVRsl(Ph)@uyz?0-wvJqLtj~|z_&v`nRa}(we`)yu^m@k z+V5B58?d<~ZcFczDG!r2p3g2B@s-uLsU^jY(wom~t6tK`>9fFXDFJO)cC_6c@ZHZ_ z=MHUut6`tsotw2<$y+uvx6XCjn9`8&j@J(yaNhPs{^c$wwom-yWyO!b7e3lnr+kiN zQAD@*UCfzY^4a$1Ll$}tOz~TvvMFlgMXRj7Lz1fZjk{W4<=kuivdsCV zyn^oE(huH zNv_A|+->h+3GIy1u2uWw-AH|gX}tL|wF`fVsVo7%ngiG*K< zBt4EOT~h6Q@luKOMs7^W?C8So-wd4Hq~G`>KOHP>IjnD&-Y>JiJaEb*WX6(xOX`jI zLq1^AW5QQi?ReiXb#KhC&fiA0T%|maaz4M_nv1dS52vJWjai-?Fu&cKl&N!8_B-z?-A|pCid|5U6NI|edL1TQ?mxW z%HP!O`?>MA+J65-uVcGU^{pP4yW+_*-lW5Bs*z60bEN5_xTWf?@47YnQr5Y3;}P!% z^io`$X6^Cak+Le+W$U8^zYW{9>(Djr(0P0>_{wf7DTZcxmyr}XuD>AyZlb8`?FI7G zOB=ZzxrvvqV`E6r^%5|ejOmG=i;0^B=`j6n3=@+RadB{sK!?Kxc5U^!F=G;O{jFhj zgIgKaWCV=9A!g3F>#O;f%1#FVQrUrz3sHJuw^TU8a47`;N&)`+-a2qkf#noF*M~d) z#R4IDyY9)E=#g`#n}!vAmHx~@Fv5X{jd(Af8U^hX+4#> zuY_+dJZOOnJRTA7bo^8SA9z)T{Ql)9(AFN#(ZS{n{CCbCUkP{wb^-BX2F%=reAsfEsL+YHHT-FJR$&CsDx~6Z>PtcpQTF=OH{|%@+a`vGJJpJirsoES(V-ND$tj zw!eUV7-$W+MA5fG5f_dZv5-)_^Ft{vK7)(W<}I6BmVmLxQAxF4{2)dhB%y5K~MO;pyIGQvKE`c5{0fP%x7Yr@| zg9{BBapBHU2-zpT6^b?F8gapXB3K1(xlJtz}_KqhQLWXwz26TAgKKwI^u4oZRl-2bB_fF6pTg?@p5O9kx^fB(7u|B%3Y zHR#@+PAs5$?Zw!|xfAD#y_-8NSS%!+JYQ|{fwzaCV64aYaZoU34S|CH{3ZnoMz;(o zIFDsR!Ck^hP;hHyDioXnfl?$)Nd;wf_z|n zrCYP44uB9PqQh>3s+VZEbV$+;9d=2_cSN7QFBcUaer5_W8VgUYm!nEgfM3Gg`HEtRi=y~%1`e&1mHZr) z#D!@IEx2{)$CoSF9_b0d;s`+gT2Tz$WI@mk4p>NBVifQTaJci+6U2oQm%jn7tOP>v zn^-MZzGU7*%}QCBP$6+)TF@T6_)>~Tu7pO(z{MN6(ke%lr%q2`UhrcSF$&;KkA%b} zzEqz?LIszEEXa^VjtW_2NXUT1Te(u8C`Os5Com0xFS$}#DGSn(Q51^{8M5q1_{vIg zVL}BRE3{+k@uj-4BU^OL3?Z*K=?U)>Dqw&Tm-`I&aA%NdYajxIG0i=y(-W``x>6Lw z9W=-M(wYW_Oq0zdNA(`MDT)&+-sPzsa#V7qT!|t}td<2OR49t^IiUiiUJsvQ6mn%^ zH9qC2l$EHD_|o(Q+U8ZB+7ZVkF3eLSate(vO@v|Ps1hn*oPa;E8g-_u#HWM`)GXow z{Elc|D1M|bSH>uERN_Ke5Yl1#!XSX*jll3b_!Sqz9nhsG#Fs{jV-#Udk>WfxiY>lW zT*$H;RGyv?qlgqoiqW*foFaTtQCJrjQg*|fg39Ad6Dnd9$T;3{kcg3G7i)55&qpx| z6hvY*Dvq+NDDL@4;*wC&^HGi}Mv(|Y&r?VEB5vqw$Zm{6$F3|0Nkxhk#VAdDX4#D| zRaQodkr|36p+Xjf5=OQtyJ*1Jv*#mp3d0 zd4BrmvWbd454AqP*n5)3-sMVJ5Smer>LGX^+F*|AX`Wh$?t^B8_KzKum2UtlaY0K* zksxb0yw}jH>qhSl>7PS?z=E#Fup@&CAO=rmPvXVY)?{xScI8~%4 z+|N;A{K-*4V1dX3HF~Eo8qxp+lSECVAXodgiYrFsN&^++O3)1zmspKffpS$8M~YEB z7>>|EG=sH^@Ws%EUP^-(N)}Yg@84ip*9c!q1m%k@v-k7g9Oe|}gmOmTjPQl&hrxp}QAY@UkRhZ|AVh3^Q>5V(M}&TY>cfzTHqVZTdJ6g) z&P?bJ%!oj3SZ7p7r2}K4p&9g+*!$)GFJM|5nhbSvChCj+vTjLWJ4LdLi zF;)czL4Bf?Eaxg6t$|Ox2fI~PhB;A`XlBYvovF%|VNMuFB78A=YfV)cDen0w!WW|~ z^@{jXB+|b@Qj?%^AOcFtQ_Gd`M6Hh%DI%eSfMWrMkqBQ_dlY$xNSqkZD5(3PPeh7? z%4I?NgwQJ(LX1zKpJ{zU4nl%4K~AGH6C{QK0w*@s1<}Xptc13zBPR>Oxg3e%j3se_ zaRCvW_E;yx=~s_nCyu|MbW!$N2Ly(+67Q%<02s%+$GRUs;)(kxh*)eLoD)15BNmPX zBdor`V@#$7FAG9-<6ChdN}P%e^+de}`Nok@0|*9ffkt37*h?0q6`7P0f-H`;FH|Aw ziV91vq*FMe!nsRB$d*Pg%p>p(GZXTY1}lnvzoKN&ki>;rGU=M}P;OylP-pE8Cq)eN zXaqP@q4c$}LP(7u%n5x9Ct4;3!#Ko+y3mL*I7Xo*#*PUmC~9#JKrc9z(O{3W2%^PU zf$0Gi9nLm%&cUe-$A_Xrfhmev(P2+58Ium26d2o3zKk=;u~P$%Ltmqq(&eaLz^qOq zM`p_CpKcrm*MP0<$%K0Goi5>-69Y&Vy$keE^}1R z^VIgJIpm)Dk6tvf!(^$;#-MT<0wR3T*C0eQmWrdMtfN8dkz^em9adkMkz%xC93jr8 zSWuWW(bUEjJDRLu06>YL!6j7CSpgGLIz~(h^mN?;X3oE+}bn0RQ(+AbX zju@?(buNoL;mlQQTf+IvLc{QYChbb*AQbt?0|&Wez( zfy9Ni?1qxaQ(N4Pl>_{hp0yY literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_types.xlsb b/pandas/tests/io/data/excel/test_types.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e7403aa288263456f3468a721206ad1ab786b586 GIT binary patch literal 8053 zcmeHMgFa2mq)+ z{rgUiZcs-zV@+>osH-8DmxDb`9uklx2LObB|KIC>*aOL$11cRnxJ~qjIEhs(9s>xp zlXWO8_{7CBcvjO({A!f*M~#eEz5?6#RqIP!BA?Ch)$%P3S*zM3q9dNG5`CTv{gvos@p zb6CNFdc_)k=atC;O>!-sy^?tctUI5$cYUtYLT_TzkTH6^X3g8PGO<7HWGPwUA)BUs z`No|>RdyZ<4W{CnHY#+wi8>w1* zaEZ>WinDxEwUx8v?eu(<2vH+HTx~C{56v?Si*3ekc;Za~Q#PFjdBB<9HOJT;owT)<_k-h@k1jkMsBDD#)i()_ekcDjH~Mad(SvF7N6=%o8|2 zZ*C9)s=qMze$Z;yIXoPd;Kam$GuPM!YVXR;_5JxTWB-R~`j@H4B#posi`l&|*K~jF z;*+BsYHs~InwN{XY&gq|j^y>R`3#chXBN!G{0)>>`^O{MO^ZVl#VtZI?3F?8YC&d2 zUAog+2V+f@FC@EUhDTZA*o9m|4E>+H_TNlYr;2JmRfFCkX5Z2{8i`Ol7gOWttQ~l! z-7do#o_inSlN0wJjg)wjYg@_YQB7YBshJEE)uFM?q3QYphl&m2v222OZkC{<%o$Pv zc4}UIIRWE*GIYu!^q8&JnQ?MU#H8d0Vq+T~c{f#Ekp!d)A$maL4Pkc!pxy^;x5YeY zPnGC##0UJa;9gaVa?$6((bUCH;8XuQPd=RnYo%Gj$gg46d0wrd(W39|= zLGI%LirgtLI9tz;R!%qa_d5~j>tXCHH55ob1ok`RpsJ~fAw*QOy^7C4vLJeV`~ID> zQ$COM3H-MnG>N)mtj>cS`N3;J*1tB#xMH)62z3p! zMTq)h#@mhK-Wk0%R=YYj>}A;HZpoUAQZV`4p|c zgy>QKwjfSd5>*`S=RVq_z8*;Q8b9kb`kJFW-kaLK?V zJcAZ%Y5+w^x6&FgFV8U_zY_5%0Vv7~tPAesVF14b4)t2~n)f2DxP_U8@#e_kC8${T z0;1LdQ^nvqKHfp|}Crdw6w5LwL}m}F|7yC zkM#suu zqc}2qc-+HWQF7*x1}2d*rq$^oS%!tNi(SDO_|Woh%3SD#;vMe_$O{)uwe9C&zzk*1 z9WL&agFb_%>B6GKA=!#{=_Rk#--{T7wU!@_M%4BA686Uhkz4@tx#GsrHHdsYfja5u)-|%Et=BDsT_!qG{2ab zR|oacTj$;ZLXIc*{ZB?uMj;K`z%x#42Zh;1Q`ZL#IThPgM^F0`y8;UohPufA(e1fv zOldIq2nF2r^uM|Nd%U??L!oZ2zo)DpAHaVmEc+3dEu5&x%NIHOCT_-=Cp_H|8Z)K| zcH2~vwa+^FQ(~bCM`x!FdozcrUnFv`>q4~_S)$|ZbL{j~I|W2WY6JHNrgubHSbMRj zdo4_u11FrlwnysBN*Evc;cML?LR4hkzHj#m1C-3fw9@$83|rTBuvW#oO6aM0!(PU4 zJTfMvkSr@utPy(>3OPNZ7WK-=?=0aLK&C9}o4?U)uE>4?-lrFxD-HE7G+W;EoVj~9 zfXe6UWWpKcp8^j?jN1|jAL*v~-i!QToSQY&0m}X33I5)T>^>TbCYL2@g7fE^qka6PKD+`YWC=lkBsa>IAH}NG=1k9OQB|OsSV($a=V|ML3<_^M_)Jn952bijR-Fr{9|{gNWpJ!!R9Su} zyb4Y_=qFw$mEFt{EL_(s7-#8;U}#CrA(<3U8|fqTB`6qgtydF&d5|ubi(Hr-|IF52 z-g#hHQS+_MEYyQ#-)3cW+ExXGjEG{K*0eHFRzIyWY>1p9W*v1&qsvf0lAp zt=YJ7WE8CK_a+#}cW?)6hk!f(`(CmVT<9dT!JO+V71y5R&G^XEN{0G)(wut3p37#5K?LcNd(p;hQl?V z$W3c>wFTMaOy?EK#0N-$uVv5(cnnIk9zGMf)XA2Vu!+w@jKQ?&!bpqAqV4J-&`?RK zS^WU2XhHD4_;#9+D$(ry^*BSPYN_c`N8q(Kw2v~+!nFVDaPh;<)e!O0zQp5;p41z; zqNQ#Of}79TPyEjZs`fQ-*twJr&&sHmu1-4Ut8UKb&y7XuJcUKT9|`cyKQ8ucF3}Tp z04XmI(ONS*8KgUtB=NR9imCd0flJ2ppW08W9om^GYGN?FtWGCaMB=_E;P(uk=gk1A z_#XPkh+;WIhM>A$H3~$Z`@Z&f-jm)Myz5ELm=YXb#`scaMSV8D`oS^ngGd>SOq1=B z{TbHoxG8^?2f}qSW|b3VWprspyD=KVM-F}1 z56ji?>02Pj{s)aV$F!QlR&4%~MMX>p;(=#4D_u6pC_pz7ShLPf2kN7G#byl8rpmYL ztD)Z&_&!)b@`X=3X4MPMisOboAJGEkso$yZ32Z87l}c*gBtmyhfQ%N6=%$0c$2L zX1Xs<8F=2kzVX+ob>cQY=e-b#+cD}6r&?`H?NiYfSK-#E;~t$Yo`^e1md*a!=^O92 z5i3h=I zHIAq(vjYh{Ottcplnm(^Qcwo}LX3y538U;6EF5##M4SbWS4%00BstH~FW1hN!+m;i z5OEP`g<^uw{0c9V8+}#{_Xgv)xBa{8=m2`V%1DGS9N8;$;RrypU9m|Xke9jUn+M@UP zGjjSCMc!L2y9C_py0y6k?Q8qBw(W+ehkkKI<8JT3c^p|S(qRzS&|`hh$L`fh7VOUF zLg`t)%x+!Ow5YGDA88?H;vo>DOiiZq0R>IH>DX7C%6OnQ(@jz1;N&>^#u!y?wLyy) z?e1(N+gq;}kh}F8EP0Aelge>laA*eJjkVR9XtI4tai+b9gTXpLDqmX+cfFd{L{?Wh zsQ}buAVr7$2)QgalD6pi9dYHwdDhp>eeU}H(ngGJ%WtnJw9bJbSdO-mrJ6>J=R|e? z>};{2r~t+#pgx)qjebW^!1ncS$~5Yx8;@5(KaR`18pi!&W#bagimJgOeFu*t!lWKR z`TXV+#k&axrD<=bUX#>C+WA@csDjrJ`)D>stqSy@Cx_ z4@v%3v;3@9{E}L~OGn-NnYcxhnzOhVW&v@iB^U&eGmpHGH4jNZXbdJFjX{Wr@#D`( zBTM8kLGBhNC{I)>J#3Pnu0TA+KkN@5qD@d%wRA?IyPWBhX8I1(DC(9=jihmT`BS`= ztS8it4{D=#GLvYa)-h%Ai-fgWh*-@jq-lmDcW5$B7L4?s%*n9sHRcP7o2oL6>S@&s z;sSgo9ya96^-^h)YdJ{QCnDs`<#3W{l25ji7S{>pE2>Rr3-xLuT#4%NiHt$0UWW`b zk~x}5#J{f$$q~e6>H_N7^5>PHj+Gv4+1hH#Qz{iUwA^FZ%ERWIo{CVBr5d8X0#GU~ zrf2wgJK8or*2~muw3o4hO)B-AT3=KKVf5@DX~bjOaZ%_1?PTUXllnwSTHK5F9G1$O zPwwdV-R#Gmid}!TRbSw}*Lku0Yy=XOlKAcP3HIf=9F#xK8=Fi%J#syPXaq?_j{AC+SA(G z!pU6K#mU)~+uX?o`nv}GZ)*-;y^thhNIMT<*9YZGY{l%wM(nh4d-`iMZUr+pC% zqa4$n^agZ4-**TEd6DtWnv2|T3F7z%>zM3f*AvXW!#;x{B+f^diaJ*P22 zu(0pKN4~29RPg)vzW?_=4Bi5px_HYwT0p&i^kL&kCIBFaCItvi2nY+jmmo;Z*4`Ts z&?xz8O0vAWx2+mC0-8obQiyn{$@E|PZBuzP+(>xv;~@e7xPJ|PQ)lOYV;{Z(zh9Y2 zQcm+cn4$ZKXVjRBGSQw zsK{I(PPS)HXPchCe4nUYgi}eYXa;?ESX6j0sA|nXQ|UG6B88IDy=1LPOW#hcTv%7I zo1Vp+s#un%V_cvcI$2tzg_B~&HF~vDZy3E&Bt_!PHkPrgMAJ=Xufg|ToECb8f6U$h z2vMjpC5u~M5UpBdZl=I8Fdz#LKnn>~?98Ia`I0}IrgTGNG#n`#7U|hOi|se@+_mMf z-*23Hc4vl>AdaI+=YxS)S5aN`dDwE@@IdZd1JnH>Zs+k4#M9o4!N;hNUNi0HihExZ zh4nmn@%W9=5?6ml(g$||jibxdQz!UB{KgjqASZk?`R6mlKbGr{_dlE{sw(~p@as{= zABNxGjc`u<6)ircoGw{_g*$0Nn<>Eus7bjKTZ?cw11p4RE`n`w1WpuO5EQg+J@N+bFk- zj-MzW@BHSH+a<_tgkLLypY{Mi7!d&QxBB3=`RyeB69Av|_B3v%^xIg!=I5VC06;Jt c>(9UPd$v|p1pUbKm;g)wB|LhiDZX3$AOEjL!THE~}zFkjMb=6grfqH=ffC0b(000sINkyZlJp=&Y^#TAu2f#t>IE2tAwp__i37d>NwR`ibxN-oXS96n z0q*c6y%TxrlUI!ih3dh`unLMp1#cG`bH?zf(2!~l9a*bQF=ayytSHdAG3g5Z70>Ad zRZKZyOL^9i7YsIta|rGTs>;0!W_z_oIpUbxq+~e6jC;dkqf&1W{dDtPEt$h40@pPm z$n6Ru{SLRj0b&LhkMIdeTob9Ab)H`ioE(k$YRw|lYRJgQ?ck6~fg-eemJ(gYsC>v5 zwNOD3H1};2$0e&bJe`=t1pPbf;(QO69jEKrn zd1zGwXZC*lSi1jQ(&s&7!EkQb!^u?40cRugg$kbm8^-+AJ0?__OcZ*Tq6o&CWX2(V8BOZo3^Wy*3gy-cWWh)-cm z?&)rrXbUckq=#w;FX0AjX_hESSp6-#M=#p|%V&!l3P zO_QN)R-;U=ELP6r1Vdn*8Lr(5xDQb|EDx$}Xdq6bG24D_5?#86`4r?yG;#47{w*J} zCtzZCnuPAQOZ*z{y!F9M&U4hdEAJj@8^2VBu!No8C)9;FY(SF8p9bi>otxUCcCzjw z@?(gmW9j_O@a2iw)?ok4DtM>=sWPlz+ktve;1O5=;3fDDu*&>ZS<2Lu?DM!#{RH=( zpf)lWDc1nB?$$nQ2&`6bsKtMl-dE$_;kL8Z#+XDMT=-+YYa z1rM8NTXv)o7j^`=yW0DINYCbWj69BM91x%J+iL26QNQN9A!@u9%y@iS+EXuS!J|3h z1MIpUbM_&<7HPKf=)a7>_js8Vqe{SPP@fHeR>B1757b4)fzm&hD2>QAD80lLCN7meR3qI7WMi= z?}~g^Y`0(+Q^?fYH7{3HDq4!Vam!;&v20bUYl;=Hh9k5UUqYCz4#QQemt@Da zX(6P7s%W_ zBAXIX+8zE-+`|Pt6%fBH_Xle{^{w6L^4jdooM$(YMXNu4uvXDg9JyBxipkep$Dy{hl0kq-P?U zCPioGfz)bh%a5yY!tv-OgB_lMEOl2stExrn_7mAxR^3Up*J9$DFV<;uyGXU-13q7v zL=1bNt#>nvj8Qk~U=}qk9K56Qso|s#JXjj#XgMuo@ebvwj3`~X)c(4NkYG>Mz)Zp= z>udfDQb-IUKh3-=ib)p^hMWh&F@EQYfz%z{AeNqFlFHS>DGTs@JvN-H+gGo9O&0e^ zq)}k~-sb*Za}rtQyL9zFO1oCF&leNikh<8Hw*pix(ImmF**N8cJmFH2!P`IyVcjzG zvkZ(<7CphZx`p}Hi(KiC-s!BYBIrv`u5M=iSB6)OjSGzUL)W>mUxvG}_D`>4IVvjO ziFbITIC)m(l@g}axL*_fD;0AY4ZP36mRl4|MdaV9=iCExBl>AU2NT>R}b497vFJal1hRQ$~A*-%KCQpmN*@4Z-PL}%w}lBs+nuN z)}<6Au73UGB{wx3$%$ z&*`z3k9n~qSoHlCX>51Z14?a~3b?I;Q{k1YQ6zZw(0PG75_8S9=A9JHZ#+k{np=nw zMJ~|c2N=nq(aw4#s~f(*&!=HN3~V`HXH$>)mS3(^foGbNA&IuHAb?4nA;-L5S~Cm1 z^%fX(xPI{jMQJGuBY~T&9i#N|?ZD2t-rfnVvn2m2NP6=g)+-y5JjZxT+ambQCBzyO zm6B@BQ(qcFJNq)c3~kf=&4zrXb>@23mg2x@&I;t-G6V%fFLNgp?e#9FlLYmbz*se4vB z53-)ka_gsT+c2q9OW(a_niOulvbtn;K9*bYn05Fk&uXuRd0mqn&KnJ9dmh8MhH!N} z#x`1MfP=hd3d-<9du^eqbK)jF*{q2n1sn5&^ zK8)$Nft;BWO`M-VwGrFx->F-NPSb#3m)lP&V>R?UbfHl1N;@+3w=16WXGG7ZkWVG~ zTXRvf`6x&wM5CF)+ub3)d5~H9nmJnTw|vhSKumT zbiY(L*(a=ie1hPW6fC*|<#HS{;H_Sek)pRR3klgFa-y`qcX6x2I&+)|ay0d-MoBeA zPW5I_jY#Q}G)Ebb^Ccd#26u_n|h#kOIzMD4PlS(;=G zd^9JV&-c2lCW+yKwY>}Up^9|-uMQryJL)G3AuQN}5Sp)z9kjmt5HwM2V zDU8F_G)7T#uk|ibOu@af4}C>x(5Om-EUk81DG>PdnJluNi6uYrok4^qp7OPaQSYE^ z{R3V03@0&T7Qgq?>E1w%-&sEjNGA3iY@A>&&Joc*Z)s8|e!Qv7=LCPsfMD=OKy738Rlwlys zk246t%w8suUZ4=#u3ORZ@BpNq4)K1sYt>Qr624|1bSqzn^+GezSrhq()JlnJ=w;(* zHyn4KwUV^q(^H2mSy22%Q$$O|c%1bygLE0-W`{LfV?%i%rpoj?a^$Os&s<%TDjn6L z(FCO}qT$2XQ5HDR>qm(~A|B@+{1)3Mc7a!nUld=lhIzRG zzP|g>jImv6r)ffi)F6@IP`Zq6U{viY!vMk*SYka(meH(ZaPDp?2d8HU0kAYp@o!ZDw8#WG8^ZH^Pt=hHb}LSaa#x1q;x z#e5x$sG)jrA0l*zaE5`7)j9i-z>`jGx+50&1^$NUjkT9W)^eu9lvlgd9C>k-c634ozEHpcMTDe-5w29}_lSR9HiD8;#3c_B0!#k4nL#wYjHEfzi zp~BcrUNrU7ZzN11%Yw^&R>yPS2bNOcU9(4tB0equcAZ zS$4PXhDoonYr@15cy#0ChhUiFQoIv->GkRT6Oq{_@N;eHGy+fsv^Ud)=!nuCQC)Zk zJXpHOEJHy!N?Gzmf$J(24o@XtR|(3<7x25=h+<;fpm=sT(egDl8R=j@LSm9q_%@+HM}(uWkk*xRz`&PM0(IS{ z0OBN?8FGfP{BHjFEA9TYOZ;9K?-p|Dx#)BaZGqjRJiSweHA551s~Jz}*7a%MD5x$b z`cL0jng#;e^x;yIFR{G+g0ezfpq5W{m^pjj2WzZ zAfK0mKUbmhuA+#2G8y&#B;G&*he6bGyjBd{YAZJt{j(p94J=4{o<0HBume>!SUCr+ zzVy8tj{5LpmF~VP?aMWTI3-<_se^k6UBhv4H_OWYIfmoSZTgY-Xi-tJ`rokY*6%mk zbr*abPtb@nf7A_SYuUP4y*I1lr+(2UpErIu%ky5ZT;~MY3sg0M&v)8JhK#Z}W2bn) ziBRX%paW+`Vd|HsRRo(T&0Exf_n7@iOlNGcP2+3Q4!E&usek_0%tAiOSup|ln8*qb z0O0)Yet#Yloh?jFU7Q(z>wfdW8A+?LxlE}2tBg(0ftT;Hsi`HFsio|Q7@C&Hb`aDC zA-5OzkH0%T|Bx%ron}9qErWOASxr?i=Q%iBFQ6@l+Rim_4XHXJjhcNu9OZ?VL$o^8 z^+HPo$U{X@Y8j$eEcEgsmdI=@S}=m3U=s4ZwwMK#DXS}f+>wQN$Ox`>uxXEnIC<-N zIT^0}+Z7(Qg3TtoHP!s0IQ5isZ?<_e6%#hYh4$RKY0``&F_9-D4YrMZb<>*xwGhFh zbm6`d81h+ht${gp3@tSd-{slpcvtvkMh`KTgZDk3U{}{^WLx|&i)mR#Uo1)CB@kp@ zu>0+XpbYX9SU~W6Ai3Om$cY3?mj*<<@Y4#4G{}Edw&{%8ovLg7(G29!nxW zu&>Qe%}0eIld0QDtEQ?uLrcL%!NcEX=5o@w|yWZ0e&;j z;MY;~mC|7o15xm`ZF7AxwK{;j4oU!&^eks`!%DI9E+!>&EH`{j=BTunVX*CKw{^9| zyRt1FY8h!C9yoF1>YRlTQ2-(D)>%m04{w$nWs=qIparvu>33`=6jh;3@?J(j!+;8G zZN@F_muX|G1O=A7ZN@j;zaZ-{Z{8;N&scMOJf4r%rlPg^WNTDqsg;*ie9fzp+2*C1 z_neJerwTYq-mb-h*$|6~tGTF?jE+|bv~rgVqgp}cay>g~vv05HaGbiN ztryKpVwPGpQ8D~sGjDN`=qA|xda?LgFp=SO^jdGvAZS?*yOmq=vc$gSam<08=_OyN z0jG{Ch0$r+lz0n~=g3*^V{B`dw=2X|>uh{2iOzlMsjU)XX@0uf;)l4eS6ptk9!X_N ztR6`Hjb_R<`L{u1NrPe*$^GuWqjQjo~H_qly;Ka&5GcRxFFi9uDRO$s24&!%9 zIa?SynVP7$I9b}6|90xLnNX5}Vn%{Jb?-V+yg&Bh<&2v+=p5`czk*6=vC*)I*NI1e zymo^_xFWoe5qUywd6rf375XkTzjswKflyUQveq4BJfJ2up{7V1+=1R zreuoUG?h0{%NqIv4Y-3T7f|4wo6(xU@(Eet^+P9FK)%J|XHTX<*y)nO*9FTUD|X|? zZw|N6=G12#3_UZEdnPd7#R)0WUA4V9wenarF7Y&@SG_f~Ubh3AFW;C{UGW}2O?SFYn zuyRBLbrgDtaY9ez9$q5f&=;hK#nnfJ9S%`Yj(+oyTw9WOlK@z}T9s)jpZ$i&xbExe zaVsLQT1tBld0F2!T6#82d{6%l7Frz89x2${Y zL4sZO!-dID#nscGGOlbE>NQ) zH@_$-`TeWjb?a%teFWuX<in7(>LUCy-lojFL&`C2JB~+3Tbf3E2i; zclAS>?}U&+Fe(Vg2bsa6tQYO=@?MmJRlG=VFIMpYRu=69OMha5iS}UtR@0GlA$&2^ zsO94DINR)+FOxliSCWRQC*4`+u5}sX=AfeNhtjO)xmpynwY+2yvzbA8d+%-;#AxNT z!fd`!&iV#?^c8*)eLEo%55*2kB6owu3UE7*Au*%S%DQu%Cw3f}ON2rYVIR#yRbRe@^9M>KZNt<%iK)wl<>{sRt>z(Sh ziD9LW!OrUjOo3Z{+#91cqW3E{#Akxx$<7;bh8vj<^-!y9&ZC?H+zR(S-UQ8XPbqkX zyXjYzm-z1MggC2no_eZFSg&U9VC4@4FsF@IOq_xTo1r`U@84ax8*>FG@6BGFyD8}< z-|3sPQwUaQKg0ZCJs}|J!QSHEzh3xb*Z%1L58u2Um*No|(;3%Zuz<=gXe}(>15$1W<$K!bM|06+l$0>D1ImH6ks{{vj7)*}D_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_types.xlsx b/pandas/tests/io/data/excel/test_types.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..561bb2c5d6714cac96aea28a2463a4795aea6427 GIT binary patch literal 9010 zcmeHN1y@|j*2NuyLxQ`zLvRaj2@oJOZjFT|5L^-*f~0XzaED-xO9yv%C%8K#@O5V1 zdoz=n?-#sRz1FSO>)w5C)vdE@SDjOuDu_sga42wSaBy%maAt?;HpcL9a4|@5a0GB@ z2!^swj;@xDu7Kwr&X%B8oNpcMsXrkhFlWIbz@GnK`7izgCE$MbPHtTJt7qS3H#n3R zs-(~a4g$OJS+ykEpdY)6%ndVaZ67ki?r>$YiR^?b@rM_@cuxjxsvI3^14FwSwXq@s zdfHwXlL>Ks?cQf-C3zg@V({`Pi;zr;=P>{fWt9q-^t!H7n_q5KTDjzj4H2<~|9bjd zw=Ujt7X+)wpg^Q&zI{bqcL`6puW%f9eUxCr&X&c?_-vg+2`PfsYk}C!I9HQKUx&p^ z8{%@CsL}#ruIHOHF-KV94vuNYSsYqMX>1XqRO5eARTzF=WY84@WNY;{^7d?H{t$7l zdD|dJWCP$Z%#aG82^^b&(*#CT6PNNj4*NhQi9W0LA9qkM-Z88akVBWrEE+PRV=UB3 zX&u;Ca=SC+G(?101AX&*dh3PTG!D$ky&dyLQ8$p}uOA36@pc-{+GKEh+i2*0LJn4X zcEr5)M>Y-L9iAlY`0?JOz`;E{Ai`<>g_bohxarPezNQLe9VU#H02fPp5Etjq>;LHZ zU(CTjb-g4`O{0?=EBNTy_mG~e$;B9485MWwXN@$^ef^ad9@RvBrYBo$Wg^FY{^T8^ zqF=M`_rCeXr%}5-G?#1qB@y_9Vsy3crNN1}uiv1uGP)!vye?UV;<-&;OkO4{Dtj=y zHAk}+*B4}~^e)pWjGxF=;tq4_JwYePB@ZSRPYy8XQhT{%c3lQPDWh{x8eCB)l(qYD zIMr`DA#Vp;GE_u$e=M1(2V`bFTjt$kPjhke;$9|JI)Ccb>RAxd6?aoPa;o>))J#he;UB%71q$(NH}8tsop9$&R2f6Cr} zeq^=L+da7q8|lBh%u4Mx)hk5U9aK2DN3bVgF7sz+DS4sglqQVpE4g!zxRyR+)sD%W zGZ!RJQs_>SYkT|Fr45PAUu51wDExNWX@D~|r?7~>*~XlY^6Vxx*=b-WKIQYe!n>QIW7_G_;0q$4<*?3G0Pw1F_NX=8F}I;H_H+o|-#FjVd?1E8 zr$5+THM1w+YNarDHwj*6kye~}h;`A~v4Qx8RA+)`w9)ImTr;xlR1LESXMg2-^Xxn3 zC0@kyH*_fCpWkd6u88IxcD1uv3c*36Y=S(Jq+aE-eLR~tW_25vnGwSm6+@Q9ctGl0 z8D$8akL9Si2bvG{V?V@H<|`Y^G1LO*GkAU*It zfwxsNP@?uYlEaK%@V?rwCgx~eQkgdGJSN$*&o!8~O%_a5CN<7I#8-uEauH7mmne<~ zCYpqslcZB@dS|DFPutrM5}|nnheUz9d~MtVxphldi>!mKtAw&TWsO>kAn0O1dgcYl z#={4h*EZ}UBv1@GHUhWLTuHWRS~Z0c5VvDJ7L0ULZGq6G zBOUb*rjHQ!QzJ1ZMh&W?eYx^Ag4sv`8m%XPO!P^UpM`-;4W#M+Rdvzigkn0=UdRhg zT|rAc{UP*`Uq*WCW2-ZVI2@mrcD zV-l}kc)a`ZV8g~miY@Mxgv82S+P>&}EKYwj&6p0%XOYbpDJaHCBoWR^r}hNI8Ws+M zyY9x%@luw)4MdJWYyFtSD$QT(i*ruh1J%?hC(o zG&n1E2P4|wnfQsz#Oo9`am&J(i2XYgL9QP5mY|=Lc22yZ(>OPw|8ng0ql9HXCR?4k zIExXt*qAPUTiLZqwX~rWuJ+do7;CMdv>b;#8Wck+gS(k6F}=*Lty046faDZl{L0Ee zcW9vziO~W6_f5gGZ={n6MS&Ql`hiy^U0XZzLa!aJ0v8u}tnjFxr>}?siy3I%_;%Bm z4!eFOOasq7voi&{<&kQ0TBYa(uGBU+4Rj|?IY<@`#fTMn z0lsrA4%@o^#SYxLB6jbR(Y1g$3Zgp%VsTpv(+$D(=Sa|jW8c$dGb)yDyO0~+!EVI)T@pd&Nh^W)lcy^1crjR$3;sWUB%{L5e-m#y-4CaCF83~X}wIfWN(<%9YU zq|NmwVptiWQC`Y`e#QM-@^~!D+=!(Y!yGg*rn6_aQR~sR;$H95rK;|!BIZte{Bp$4 zM?sDh7kH(tm92Ig3CSg77_v@H9Zct?qY1CbbWh!~-iY`YC>HvFXUxN6_Ii;3B6>(o zJ2a}*4)qmAKWGZu*ynVTy{8$b22;xX71=aD1;$t7U0%LTS{c9PV|z>2xn2sq&vFQP zc5Lgj)4-A>LRM0fDDaf`T0UwO{obR}vu?)7B3tN659B0j8e12tOU9K_rkAQ#do1hs z1IT}3%yZNLj~zvznpopsXcKG_g6pJSg%mB=e77iuP}kkU)sg(bgW=7UdL1B?J_-@a zRHNmJ2z2I4E#v&e{^W+T*_rc}9_>yt z=pSXOodmZ<9Y%~U>Yuyv-($fY0nB5WnOmBa$Me-~JfW>{Jo zw*R`X3oh?lyMRVk4Yh7H=jqN(^v0MjWP|+(3Nh| zr`jtUxM4>q-Z$sxUuF1up1GckH79v0CSi8OB>-A9PsP(BXOig06a0X}%>3RO&%lv* zmgtT*nC#zG<}-a=@Em&4x@Qn?!8ad?k1;r zx0FQ8maAP#N*8x+9B8KL<-f%fCqg?DOeY=C*6%m{iLDPO zq8Alw3q)KkKBuQUA4!&dF19qHDdy(MDUyz9!YYr;R?UwXBTNHu?oQw()1lll&^Rf) zZMTBM{vHHlL+ag*Hx&mR^W^$n$W2dMfwK)%ClLA%$>j=_$P4C?Zlvy_D@7^&$H&eY zs*4}bAYqMRBeB3kPURA~t1Z4Py*2IGs0zz(*x~qLdBW{uI<1wmkrc&^vZ0&P;=F6B z6=COxxaIFc#~EZ@Oc=B|t7XkX`#UVD-_M;6E;*EU762#3y)EcWVeR1d4K3X4>bK$4 z+SeA5;}M`}iLltdS~jBOmMjPKm@jdR9zMT$Wt3if5q?y|&|r6>UB8hFuDn zDfHHIh@!G7GP#+G#5J%Y!gB6u?M%&J7@wHvSz-KKBdLySWEZJdodigK#FVdp3o@!l zYZ6bfuUx{`GxXvj%>>>ut_V2A;*yBQW_fi2cBrs+dL8(dm^ln@Ip^ky+@kU;;mx$? zp6MTO)K&x#HmNou##yG3x4GjQTb1^r&|X?6dJ-Rczj9{tb^6k60$371XhC}HZTFb^ zvhH#aR|^>U)HE0k1B+xaFqG)tNG`qD!2S@NI+H5+2$4`hx0&GMM%35guqvkS?tPT* z@E{cAXv32`uxKi?<(6E)7xXJCcA%$C#zLEOTnT8(;|0ig^7`Sf$ZFhJqTF%_n0#i? zb#ZwiZI|5S9ni;kbh@3P(Nwv~M*}|WNB-I<7Dx7+vuoqb!$jTLrNhg za@@^Z7NaQHMCH6~1M#D!(uqCa?dR?<6H%CF$4O09t;n?AkZJ*NWC)F|-y6>+>yNtX zk818FW*aEH-o)T`AR>CZdATCrycr@rCmn1>4&E{+%CAmtiY&tzN>=dv$XA#QuW81` zM;+~|Gu!am!|si3QL}sw2VJD=mniNr?m&`NcCo>PyO&)J$srTG8W1xGnG4@;)FV8g zJS|gw03)N$2vp4Rrp6Afj+KrdctfyF+By9e1wuDrNb*Ui-tDnchU2xnX~JWYst`G_ zsL@CDJ`|qVBrhgz&&mUL9U_C{OjVkpuuB%wjTi$j$+u%h~tqgUl zfK{C`=li0s%M>N_v*aDkR8i5*h@x9Ucscs|Tx_Tx!u%uNx$F+|1f*6F#ctlXubK4O zOgDv(iGV-QiAHF;eM+*(=nx1i8yMAKqaxLqO=-{G<9wP_gm~Gku_#0}*#|>}XgT6N1+cZxn>ZPs*4id(sCC-bk_UDW_lG_AN9`TA{iJJoR+;6VBJ()|7Ed zq+`AvBkKVRnnWyod>Mtd+$6%p@!-p1kGiNl!vTI`+KT)9opv@}ZE=trsc!#|3ZvaO ztdCYqVzrEL#`kXFjZ8=6-E7Odr#TPTH#r7_@FF5q0Rtp8tG8<{Mzh|pkMO9|?`rz8 zUOKqh1zFXIGb1&te;(PN5)CpgH9W%hT&x%+e|p?ZhmA8g>8QCUgi+&CXNYFUVCnn$ zWdUluvRFYK*^jIr)NJM^n=C#SEpS(MjkOQ|7+Bb{S&+en?GhQ$;owMrm*1beM39Z8 zr7MW*x9T?yPD^lz{=|*jy$o23(^sjjZc)1YT}) zz2AE@+Wlyiz%trfBXmS6`x~YYmmFUmd&Z$s5JZVg`yE4mNe6=Q?9duEo$y{-(U^<< zd>yONM8or>FJAmM86+%Www?vgIC9YF7>Hvemr1ZCi(l-ByAWALhjcx&o%Dy+GGm^M zh3y)|4@n-`^&QtD9a?X!J@>LY-fDAq!5J?F$QOl!wbVhTfmIBkmaa!7Z5_7du7iGw zSS}n2ALI?&r29Mo$~}8EY0~zm>O8F^!03p z%YQNcnAUZx62&&O#b=Pkv6IskBAby^KJscV4 zmd{ExSnJqP)R}hM^98B(v0kX=EK3QzjVf3W(u7dNXUiG%T3QSdc0Q?gN+^jE>%t&p z-{I&F1M4W|H0H?iZK0%Hp$KbC0U)+6M9Bf#{Q}RHEr6XLW@E;_k?r^f>Qoc#_;IEm zVO?tNm;^2?#@rW!kab# zu>KFOv~9_MT*N9(6VZ;r?m%H2BmA93ARAK`OA8%W7h6Z`-!^*oqgpD6JXom5?(IjK zw}+l$Lb0QJZM|*Q7l`0Sdp(NPUx<*|{hh#i+=q zS%HinIdwNGmg+ynk-(S9Z}q=eT3ph%(y}ChjDPN_<_o^Vi`ybn^Uno+N^1i1W@2kR z{@zCCpJOwZ=fT~JI#HDWICo*uPQbi=!1)^4T11%b5P4H-^vUZfqfN!=*za+Ycwd7~ z^m_Z}IFG+1#;q^+zDaMN9+`3m$O1a^g0q7iu#ZvmyxV<#MwR{+$Nz4KlqUu2zbR~| ziw6r)S~!_&x;Qz5xXhhgEdP~U`(I=d<{Xi68XBF{q#;L)z9GS0Dm$*Pa}L%r_jAI+ z`1qF4n&v};@TD4ky=)sNQ#_8*TB#mAa_UJ$eIXApNiTU5@GR~V8h?4vL?c3TncfL@ zc$&3AMbVUI7BRU&LL!NX5O|t6!@;4=6P*)-{(xBsdCtp;x$DD`G*9CDV4T(3+VGo- zYB77#2NSWjmnlP)2*XHcSnh2zk#eqb7!tID`K6xzE2ONNS>vaYGXs=!J+wwsZX>{8KAF+)fmp33`?uY$t8oz!I+YUQN{x z#TgIN)i_owpA#2(tV~K z<9OU+g?FfEnzbW(ax92dzw)F3wU$DI6Xr`6 zF7J99kXyTNU!A#|3%^U;nYuW2(=txH0ayz#NR}Bq{3i(tkH7&Fi+?_y@b6Lkcl{4% z6*N`;YT&PV)PDni)`>7`{3)yYEAX$mr9T6kU~T>XGfltR`8DD2hovLf0gPYL55Izc zjgkHV_Q3iL{71y}SLm-1q(7huuy8Ewy}!njel_sx8u$+bL?r)d_}^B+m@Xzomia){sH7{zaAj3u%4h{qM^M^@x J8uibA{|8S^yfgp+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.ods b/pandas/tests/io/data/excel/testdateoverflow.ods new file mode 100644 index 0000000000000000000000000000000000000000..bb05267865303511e1e472b4f0ee48ba5a0279c8 GIT binary patch literal 3422 zcmZ{n2{=@38^_0BY$My)MuS4OtVttThO(3Gor#hxV`ngSk{Meu)?`=85<>Q!EJ>D9 zWMtQCi?O`+EqtT*`mVmO_xql6J=ZzUIoJO_*SXJq|L*5D)TN}N2mC$G0m5Arq9nQ@@qW@Qg8qyMBlVTOR*-QQQdk2y7$X8q9;24TnS`w*gXES zzWhk+LvFz*r}FFE?AM8URqO|Q3v2sO^yob!8%gStPianD!G}k_JR@?1PZi0w-wUMc zMnv)KCr&Knm!bEH;O zb4y8CnZu1eTw};~i$v3zh0>?*DReE8L`MI?a=-J_N6)fLI(QUfcm{rHEBizI zb_K}$>|8<(YuLt@BA^ z^K-*Ke3$(fn(??-;Lz^nVO7cqmOgBs83P+1lZKD98f_2MY*kvJiXMsz-KnE0=LrvA zEyp|F^8oVznjD=Tf>$xgt5V3tx@A(JK2`rXwoXpkbk`r@1N96zsFf}t4&{vqx4Kp@ z3D>QNl+D+jY{Q)e&22w0`;P7|4NKRh{lZla?j9q078^K0+n zc4I({y)I{?Ai{mY5PwL&0-7mcRZaUQt*;Qtu2(5Nt^o~bdM7&GVcM;r+}qaYHQ@V- zrTMM!4d&5ttD*M3ORPQSlB+8+!XyJ|_VE*H10Fx+i=0f@2c4rww!5%3_P)V8VxSEG z%ZRY46uU!2Oy-7UsYanw-~(tS4_$u}NN7a;Kmm0Qc-9t}kA%Mll^es=T#BdQI%o40 zk^m`1A;V^1kK0JC)#Dd$zY~_r@l3sZ?n93WyAX8*@48m!J?Gr8z7%(0av;>o@6au_ zU%$)($5)}p+JH1JKU zopyW!67{-mAyh`M@66p{qo$cUutska_gq8p;bD_JzYkexGU5%p46MoxV|GDi=!AqQHFXZrss&U4tB~+%beVSuX8;vJ|@|`UA2%CO`lC;|TsAexX|)t6)dp&A!^nJ9;53g+~2k%b6R`z=%@ie>H zUXq>X2^x>30d2PF(c9;LxbKSBpP_Uu&=O4na(p}Ewdc8id3s=|1mg*5k z%~e99M_5N(wNwB;=aM zD&8eK?h(e%#+}WFU z>bvpW1FVqySBDx4o zm%G^W{j=j$Pj6X+fjcRg*0^5D&emZItYMRO*m>9yH)q;mdoY=a(!FX9F}+ zU1fsIx6+;J5*OVp+r1xgXD%^@n0-`ICwL+~E~XeWGrnWZ3H70$DM2{ol6cd4M25NS zRSI!pHRr(fR8LHdO-Gr{lY=w~g4-IN?&zw#$=4P1McAjZMaD;7I&IK$5g-$-=~yEN zbbCBl**-Zs$-M#CQi~|@zp~m)v81~r<#c=TWhmuQ2bW_LwVL{lwWJo$lDFY z+!mU;OFL02pfr;p8-;7xVX-+lZh`Z z>*YaXQ@mXzgIhVrkKPJQIt_5qJGOi4V8dxjv7rgJ2&5eFSSr1H)5)gRd`ApDHaX5I zGd09?zGAm(vfaeO@78c;O_ajt!)IGd{ozND!b%^~3Aiv&r{UqTaP=$QLs-Go@OokQ z$)tWGY|D3jr|`&|DSR=Q_0BQf|KeF>57YWrGRGi50Dzr**1utjkkB^JkaD$hbF@Qa zf0C^@9^phO1HQ2vKH27{;!`Pp;puV!eh1J_lmp$C7tnt{u54hH=wqbyTv3~2?Y{p5 zcuKJW99uNI{jtd*9O9(6wYc4NIQCZ&JA+}^9lGYb3ENgd!K0kna_6GQ=!dl5))GnR z3n(0$A|AgiST`DP&8UlD-NjB>Ow7O(^9bse8K`RR@v7Nta`8FC$f+)B8$5zY+(bAl zP{#+yDjCUHQuO)0{w6PQUHN=eO&pIIk>C=rWcdYeUp~B`qN*;msCj&;I=eMgEAVi< zgiDJ2q5nj?HCLdkF$Dl{pX`dC`&G1;)hX8r#`-rq__^>|IXQgpcg;-BbC^JI2{p)h!xyy^ms&IvnA*Rn z-KG?}iqjlh+Q~0FY)))-7H6|p=gg9=Bho89 zF`cw}tNk%ScyZy+lN5Nv>!`DXVB!5(bWr8D#fj`&pYa!)KQNZO++KV+Wqd^T?B9N; z080GazmtCi;Gdns>D^!B-+hCj?y2hk_X58ZrsT_Cox$(A-%I!}-4MC3|3^XpUHE&h u{1WDobL)R*&hI9EkFj4S%E;{uxj*_DafZ6opwlPRQ} zzTNlU-F;e()lNu#HTS)D&%5X6o^$TGKku%-|Bq#t9)D)#PsEezq)0wU&5{xa+`==n zN()3*%is12T`VeX0eaN~7#w$V&A{Qgm=B3CX?=KMZN}k9`bx- zxPX))FGOC1ycl^2@>1kw$jgyeAg@He7Wq2la^wo+Rmfpv`VRA2$Mc6seII&J+hq`c z!?49ZN#LH8ldweLX(>F$Rzd)1;SjGP7t~eu>zlq_Du3zkKK;ZiRwasKx|-Y$?n&vF zBT9~bXZ3=f7fQgQ-`X-1f<)FEZ_zc2r9(#K82+NF?FXQRxRQyGVd=wj!iVSv@MjEU z|Dg;%Rb8*XpDq_}gtSlt1AqHWr%TMD0(6`5dJ?(jY@{7MHwzY!M9i?kS+i{pT%_n;tJou`nWDq z*K?}wi|Tp{-dwV7ldrel6yAv%9H+Vh4XWNBt81mY&coG`Zz`N0DM+>4DsP~ENFJtH z1HU|fowZj+VIl1a{qTw~{2~UQJ&yYbN<*+FBFMj#`%N zlox4M=)M%XkD_lLLEr4d6E(WURpZL66P!U>Z@`nXl3$rz^uV$i^uWb4NNe{D(yE&= zT9(vjyyi-EZ%OTp(Gt*_sYqgBHfHyXD{UiZbu%Sb0JFWBqGciS&RiR3&464L!5cC9 ztq^OCY>_qitHOX7mP!l?jaH@HjM7#>bG@qw?!jcbB0uUK(vGpBNAB_^vKO=QRZb$? zAcMaYW{rwq3sk#yDl+*b%0=*mY?57K-HrBdf<|)dfN!;qrmADpsRTx)OUIf9uMNU$ zMeu}d%+LC6nC5O6N2Ad^CTn*c)?}S5YxBL$`qMD=o?NXqRg_PFd^O!-)FY%hJxx>N zH7m;IsYG$N!_-Aq6nD+sqL_7TQP#)CyAM*C|lSqzmDza4KWw^mTmKbL+H{F2hY z3ID4+@W0Fh|5_gSV|n1`^1#2E2mXaT@Q3oi^U0Hs{_cz@7`C=ZpmdC8GH6HC(tP7_&G(y%Z>bIcp|s%< z+flkc_q@_{2*u8vw^MLW zdl4#azEP37jGWwWQ}sQ3xfhoU&ON+f^&aR z!87gE@j);ys5rw-2PVXs`$LQ6ekwYX>gDvu`-e}3HpnR#oE0qWIWAWFD;7#&4ip&N z&ANPBXk9KZ3$4qYokHufk%iW!<_oP$>lIp;o?U2ND!b6S>;Z+=Wv?!@E^4V3$0r>jk=qsQFqHU>fSt!y1S=Qce_vBt)70o3v~mTe#{>wZJm*xs$Dx%erT!#K2FzJPPKKN5S&SQ|prFx2H_~=8|XT^yBSY6g2BEPI+uQq{gg_ zeoXyt%e04n&E)mhr=EH$gKyh==Jf0BYuw=1eT_bZFEANpVPDWidA}oto0wM@yA~~1wMNVQr@wL z%Z)7mT} zI9xp*Iw<2xPb7fBG+2?EClP#NSO>|sB1iEZw;Nw7ctRwMudo!_!g-+5U@ZRQx8F$> z^Vw3!DKaTDm_$q_mA*_kgHAIkj>Rv(`G&z{p~0laWOBVP6Fv1+Eh&k`|LvVO4JL~W zCbcG$O}sY6rNJbJcirI2#OYm=&EI(Q z=LVB&4JJ9ftHzg!)4L{{-+T2rgUNMdvSSMG3WLdxg1jqI^SNhV(@fylJUzwORcGQuF$wpEH~48hrf47`P`hCf0ez z#PU=KA6xT{gCeaDe%m0q#z7KZ%*#Nh)nlQUbU~Usu=k`7(cUbzYA5Za(hd$x?a~}fR zUYr^6ncHF|V8>@J^DUg|aF#-@(kxeLBCs0A9Xz6*l&M5r;t|!0;S`lSP+3vyGoo_W zCM&8g*`n%_Evha}1ZLv{5!RedgGZ9F@z74U`mB}U!UL~7&=IwPmaXSP>6aiPDCN6_z_NSlk6! zL?#03G2JZ_>o$mZOx{%|lcVqqp73n6is2cfcuGLN@ld;)L11~ROs2-;1j=qEx87_o z;Fenn1Xw29c%J^Y!DH_x`=jB}=x8F^Jvx|(j)y+uVYn={&|o+U(MFYkSy7#K=FS}1 zb5+Oz&vr>#uO#D=y5zKT2|iD1ch&DE0xRSO93is~fnhIn^RDVW{c3eE9%^!nTC@Nr z%T?3XMk#ihRvkE{v{8y3A011{q$)QPTGz{@EGH8kQB|8=Oj7lKe&JuB3o@zB&7>v= zlZ82%=oo7<(KC#IViL*Cq!vtec+|BhClf|pOF56Nu8F`rso_sF4#h@>lhJ_$dR&8> zmUSI|#B&HeWduvNB#box?{N1#?(W+zc^13>>_ajpW*@QyG4>ETKvEr-Cnff}v`m@h z-+WB&@d2>)$UtBBXe`kO*Bx~W6UZ<-%LlS$6?W)s{o6WZh%`9^CzRP9mWd^lb9y*Co9BRWTuoA4lqPeglR zg&sGFz-*8>43jYg(GEbdhhFUB+YZAW`0N;_OLlD2B|En168-}UX**#VYjBOr_($ri7!#(26kz3$D)<>Zu&Nmni(2{hrtZP$6qTks|Z`zd~ z^fFx*AN)QW>XOeo@E!(!8U8vUEPGiD&N>WR@Tmp42Y$w1z3hVj_sPBRz+p#?X25#o zC}7p-1NT_`tnw)wd5JO1VUXC9>>oKkT1`+nKd59t5rUrJ2j>i^=1ge0oNvhtY~}V?DErKd{p~ECCp4zIrahl|Qx%$$NkC&JS9SwAJ6O;F}bDv$}plLB=>m zPN#K@;!b8BwOGd-707v zhug&ABw^`-8+G9B{V7uzzRw?S@aHngv|RYLR>>RERa68ww(xgmPA&Sig|>DD*m7R6 z48s^QbBVTL;+@oa=j;bJS86(N=y0Vhu$qhJ%HRLsKW&j+b?k!LS5*a@y8Y(r{r`&w zoR(jJLPtyILAD)VMXf98CkGNkBg2h-3EAJ;-qO)}xP9+FY2Vk;)Z8qG6Mf0paXuk_ z#gYTD=)DMj(z<`Y95@#3jdh+JipjyYeIIFw^+;>74;%c^;l9Cyw6(T(w(mL6DTsgr z2!slyJ$8I>U_`xd9pbH8+M+|TWJlj7VNDyCr8a-k!|{FoV%%!ntbY{;q>$Om7iPEL|n`e@SZrY zp_SjoeK&qi&A+<9LivXJ$k_lWQ~5cOhcFU7=W%lHsulh>t>7^0@6)3WeO$uo|HZ&*HlqnQi1>dQIL+bxYWn@u zzz3KFm#7sU4DCqti%XyS(&gW@9$oPCpM+%d`e$FS0_@Ak^bNjmK<40jD>4U>F64#C zN0B-Fk0E1Ch9#Z;{~&*cJBP5J0*MEAh@<|g0PeZU9B5klx|4&)2jj!xefM?81{BY+ zi?2NR3o>^ zTZryO*rQ+@y#Kpp3mT|>Cp?rK>`g|GjfAuPu{9p=>yGJe8IDv{)u?f!u`eFqi49N@ c)nB)D9F4_>x9yZ)?K=N)TVGf2zdIWE9~>dzSpWb4 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.xlsb b/pandas/tests/io/data/excel/testdateoverflow.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..3d279396924b998804dd307eaee93225ea08260f GIT binary patch literal 9856 zcmeHNbyQSa+aDO(p@&8oNTTU$X=(NLLR0#K39|9kxpTcB30(PokhRENJR zN?wf@8-+*iPp5#djk)`p#4O;2T2N3}R}|#+i=BhC7(PuzOPK3wt3O-a!69>$o(`z) z6%CyMnH$5h-IZ%wSH}AaskV>zZ-13im6_1~=o27q1!!&kwB9`LNQ}ZN?P?_{2Yz$S zFVjyi4MZrknRn~M^=q^UxU5!r8>hHzcr#@9mp=4T87z|uRV67G@UBZ~QL&)hIc*^H?&v{V!w7usJE5IC zcy%GDIZkYHkf<)&b+;XLq4Yr(LF9_5&zjSLPy=^$xI9{rz<5}Z#8`#O;+&u^yigNz z&*{>>SNS#1=p|94v!0#;0g69FyoAp}>j82`9vRS=kP-jD&e+PHljGa(zmfhw9Nyo( zdT3O?LNgbrZBwRJV)6KcjSN;wSq&upJ;62nStc8*vamEpv4efs)okAK%U?Hl`jczl z_YP&(^GmW8_&iebG0bY!98=pGs4YkmYnAMK%N)+iZ|AG)?pfi!5~)lRT(_rWOhU%G zs`9BnQ0YKOiLIri`>A@9BuhYw#J$Ha!yl@|J|RD`5UYEu>Z&3>o`|W~JFwC_R+eF% zzf3leOkCq&g8u3GI0YXoEw{D|--9$toXc4_p{o_o!)2z)D5$oC29_ODPYYXvh$&=! z@1s6gzWGQ8_5KW=!~0a@_Cmp(z!@TcM28}Eo?tv8gf`prmvOSto+*Zjtc=4z0DuyJ zh2m_*`Fp51+1gne+S*!j7@FJsCu~rV1rqt}|MpcI-EGm#MId$HcY=s>XP-2ez>X|o z@w1xT1**Dyfs`}y4h5Xn=aSvQZG0bRg~o%}5ax0!ui308PA1QOq%Izt4tYzdQC_aT z<>*}RiYA_Cog-+f)aZp)Grwt$-;*rcHof| zX?3_oB=yx-i*;oX&gMl8|IQWt*R0Jjd#>M@46+AMx;5~iehzvg>B5*hMa&?4UC>t+ zF5Xf5{R6oPdm$#FnkO05Gi*T>dx@$yt6JwVf0b@2zzeS?k$!O(=^psVf213I??4tN zFf)B#LouMdmR7JrR{Y zm7H95mzYL)!mls09Qi!jDZ%r)9P?0@Z;~FybTDqA^bl>fHj=U@;OO zc`;_*CPgzem5$KVx8xEj&}KAg|Ef^s*M$^~T{n&TgQ8#+7eGzibN^LBOWZCc(1J;D)#) zpDqF6fSLvCwCFC$}GsjaQpmlCrFZ`!!E(b0oSbREN=Lb#CyO-Lc1l^!l?MfowA?6+f%1I>( z(OX7*a&y$H_|!_AZQZ$#-#Kd?&V;6>IDeI*fubPc#ttjP%$*Cn9xLoQG@99%dC4q ze7D5zU_~HSVW`30Fme7ceEQaww|zZagv|8Vb39@o02QiQS06A6HK+&x6~uo{jbIGL zEGKJ28IXrt;|M16LrPIHyvm{8ENta;rR-t@7hqLN${N`{N zvo*7VzEzWJNRLb4a3*;CbPuHsHi3~CAAx2*Gd<ok(e|T9kaJanskQA~{ZYS3@Sl(L!lLb%EkY<|_{CiI}HXd;h?T3O-Us z--x3P`3Hh6&wE_lSKJP!ZrWSTDYRfh$k~k|9ML7<1g+qv7`1fDC@}(Tb*mdKhZvU2 za$Adf@Fi$Cn_7nxtRw%b1J$KrsF-koL>@zk?z%ql9p$BnOee8K)6(0FOVW41Yh*zt zW`*d_=t(J_2O6mGNT(H;~`qIZiOScx}qnBEmZXgZ*^O7J6KP

    O1s(`2|7~Vev~_2Ol3ACU93I4 z;4D1-!o<<^I*-B!DvKvFGJ!sokcSIiUb6Wu8DA2_2*Ej?-cBZtV}kFl-vXhdv#?Btk6Fo4^mo5WJaf>HEB!hIo~rBplQ3$`><9 z8fJVb8%gb|^YxvDd@DWOMk*FU6*;p@4#1u0!3?x$gnuN9&qIz<5wr3_tCcUfUOC(a zGn)rTd(V?m^k*>T@;RiEdK(|tC2q5Mt+#V;>gB6hRv)6Cjzsl+w5gc1$g0-N#DIzN z$sO)Z+_!%f{!M4#>%%f44|qo1sKYh}n46L|=B>a>gm{;5fc(wL z9N$8qTU~4u8FPofP)%td%af7UIw4!`92kJNV*VsykqK8}!R{UNG~sPNhWJeU%S5CW zVG}RQaXj~IhukbIww)2Ps4sLs8Jpj+^hxJ5OBI+7Xc%D?(-S^D8QIBN9of0sj-Py4 z^vl}eK-xlp=zTrBIlOQL{am-L=<&{PPX=i3y>a<`7-CV&Ht+d$F8YDTNi8MTURlv; z>$7_vuKSCv8p$Uo?-y_mV;dIe-6R?42iWa=_g&D+DcUO&qKo-yTbWyiQyS>D3iRJ= zqyTHLx98!EF%|4=;Fz(iHT+g`9A40@*aOawaAOFip5<7{>z2v@+L&pf<3BNZsK~C< z6I|&YivJ3B0AKel7nOUNb*n4Zedbb7+A>XZaT7{V(5<7(qL6f-b()Ou2TnPhm5VBV z8%o2irGrci0**xH;-wM{f!(#T;#g%1iSKK&yS1K;088)yjfUEm?uL4xhd=+Uvw7vF^1Scyt~5LVBV0BF zZNSzfl|FV1+v$@Nzp0A6{4;O=&E4(5HqWtP3cmu6)SQax>yK(!Us!$6E<#a%I@%Yc zUJp8;wVIuDCfy{=j<+LrAA2*iXSpRY?MWEG_lLQ137e`{jwOnBtTK z9y^tUD&3}iUW$g)Odd@2DBAfb0+A207$|rg1mkZZjU%^J{hA4`a?Otk_cBJX{3e#_ zD;M{r*N(|HEWjS|`szrmNb`6fUGc;caZPRRvywE~zBV(GxE=TvGc_LmS4AVNEGk7G zANh`yDVxTs-^h*6T$Sm*JIcXl`WWtFL&>&0?OTP}Sc;~nhhPP=Qz}TrMF#EHgej4d z68nXKB0<}}>Ci#PXGW?w#JlV=V1eb$R}dmjN%LhsrR65a7=wL8>8X{=L=Uxgrx|6n zFgR1urW9fsLPAI!J_1wEHQzh~IUb48r%$wTm*yDgWw10DvUh%%G4s~u?D7h>ao}TDAETo!;-nk*%4z%dD<^OaEz^^ zn5W=?=2odiO1>t$=B$$)Awed4>jOR-2oG6+${Y5$YW>rFj;(hdL`Zy{8GXKF&_&r7 zZ;^~4qd5LVl^N?Dq8#>m6Kt~Qu=$!ztE&U1Dy|!^S=PQ7Q53o1ZTZ;tKoKj)ruxnV z2519zQq^yc9tIR%1pn5%2Kzj1X*rvma@BEX|CtSPDhx!+71wW zHH4a6FUbWM_~pNR5&P9L7v7gY>X;YR8m68}Em>>@Wm^U+p^nMJYw{ph-LAQrykQ%l z`VhD&VJo%uKEMRS9H`~qiAN)IL#Bq`zp;~J0ydbJXE`)*$CZ6DkJEXucPR;8KDy4f zEs|BTGuK11GCVIOAh=hwU|CZ>JVNj;nh8g4XdPmNYE*B`>Ot7nen9tja_ebxD2sU% zcKtFDxyfou4v^V{bs)m#1pQaFt>DFF@qg4M^88ho=s+TwzUG6OqM=2rFP~{$6ptMb z`8b)FSm6A{^ze;=|4tI$j+9hd5kh4QLAOst&xtq9S?F|6zmy(x%D1yo9A{;F-^4i$ z*OGOOvLqq1NI=uFw|_ME0ZWQwRM4bJY*bOXCTZe;=lrex`b}Ev&(;mj#tAI^%qsle zRn%3wCq`MTA=y1;J5hN(_PeJ!jx&1nMmMkEmEO42eQTt#y?-mk5@nr#{8e(B?Jui4 zUV+(&H*$s)*>wHIj^pgzf4$>i^Or2;=62xj>NSuiLj6r%oG7j_+lL-M#!xcKA6i3B zF+om|#J%HXwWv>k@+!WggDWV~Kp=!zl0*iSs24}UMX8G$==m)Ej@KMsi8v*>P)Wc5 zf$(|X|CjCkKT!LuvHyRjc0J;`*|%Q&GIG!$#~*aSAKPGLLvCQ_a>oW{?0lyDV3a-p z6`jBvBa<77eE1i=8GnHKq9StT43X_JA(Ewn*%~R@*~0BPjco0V|2Vz*mz+YH&Miv( z9)gS9fAh)-=3t2;j%+!GW~vF9t|2&Wd$e?hkDK|fw(;=+>3D?HLb3adi2G5-n&AVD zX)-evK5hv{tVqnL8>umgpgK}HPk$k!cyOKtBWx9b!TzZ`B!;z%3in>M(o1XXHhV^M zW(|so`>&YW{B2WAuE25EnjWQDn!H!GEi!a#=krSAl858QVw+HGc-2z|tpd)wXC?T{4>GAktS2$7r!4tBeP%SC zaDG+!s_a-Y@AXL)-gTX8a#g38G#7l&v3gCdXZ$JEZZ+CN*A7}rYhe_PY5OX0khoAr*ifCyFFJl*d&w-R7VAw$7v#TyG2b6Y21L`AbnB>YSO$b z7ADWr3JlWVU}LhUT>)XK6c6LHuRZaowi_)zil|aF z-)C{TqGrTiO>p3;xG;G|+ltBBCb-dHElZ*}Tg{7mdPp3piTjjOk#2`|{uE9ZV&&Ca z;mBvZ_=|^jG52A}2c0Kiq`Vj&YV9LbV67~82BJ2-r! zOD^7@5tZxFy0Xn;1nsBhlHg1GSo63sIxqQx(qVVU8F>qfFL&Rn#x}pUsw5QkM8p;E z!EaVp613>JtdDvjU8yH+;pItE3OOi0w$yRTPF z9pgV8@_5}pO?C2A)DAV^hUokq`oSC)_m+F|UuKihs;5hD$TmMomr0dZ5R&11*h~qaFBkK>x;h>u`iyJ-j_Med^{W)d)<4Sne zef;-II56rK#D-ir3FP?O%6hhLzvDq-QK1oD{y3r$75LYhIso2wVjj!{2Y_t@NK#kJ zw~+l^ZxmE^B!l+rZzjGy_5JmWFDMk{egydO2=;s7PssfF-GS^y!5`10z89oK9zGyl z_AgGTF5+A~lsHE+!2Yu*`Og{GMd6D~{<*L@&Y!}6$Ln7NyvTW<14dlB2zbtbUqrdc z_nxCnAQ|Ex`>fy1bcS`#{$2#UNS~eqP9bX`67VN7^&-MWF7q5A67P3}Kbg&oq8B;0 zb5TZuGtr9-+(m$k8=G?g0NLM|<9wTQ5%6NeeGZ6<+&27Z-!sdeLH%=-s$Zj=tJD=~xZ&QCK{zo5lQTk%lIhV(# zJ$sRVQiCpn{5Zv8@Lm&_|K+xbp8ixdj;1=8mf&>U|!RRR?pycwdsDr-YE@UA-BqXd{*HX5tN;g(3qS-Q0{{RjK!4c`VIm9waDxB8yFPZ{X9MvL~6LQ zpo$lo5lQQpSE{DKK9beIFx3)3=kd2Qi?hZEf1?8FP7)SCqQb`!6>Af_fwJ$93~-0FGC>;lDj70Pw{ zYiM%h`D(>&@Ygz=E&=QT99l5SI}ypuocWI_O~CYDEzWxKA`3%o?+3Uek|-lxv>b=&&ay zrsoq+I;&l&&Pt_5RAo~-S&8unc-^b!2Ldi>w-3p}_FBHI2E36s%6K^xIg@EWFf{G! zJmaPo1}{p{^ZWx(u^=(S+?e-w3;^Kv77n2N7me0vJfS&;5>F8-K{UW;7(*vB8)r7w zyESEY-#^;?KcmUWxFM(*A9U>j8zsM9ZrTBnv+^1gfelpZe7MPtH6_7oTGVB0A#_K8ZLvBgL{(R;Js5>|ynTga7JgeKaokdeHR zibO7i&MQA~f$Tll)_hHplp@749t!rmtx7&eD;%18pCZvO7le0|Hiq4MNeza}cr>f` z^!sfspCh$G_%LwsNa#z1v)I;F0Bfd2;aSsXq2+?$C6MWvaa$**x*_zj{hjLzDA)9` z(0DWs2LKQQkYPM**nX$GyS3yV#FNMNqVbaTV!frnWl`%My+op}P zeop}t5pTG9PNMe-fj)3tP57JSGrWEi=upB#sT36b`PRCj-cG^Can(FJu}rJSp5U`7 z7JtO%FeOv64sCbuqwo&Ovi7gMp}k2)l_c$^3Qy*^jeEZNPY8x3OVpQGO&SDx>fnWj zv4*}teGqjVcfFEwLi!YiZ5PbY?LHfpHg9r(_A3UJo#AJiKw04o9W61T)POSR&)kVr z6~7C3jWh>H{q8B_wzEZ6ssVrzv}M4~Oh+m-;>p%Z>KF_OsK{fJ@_9}Pk`F~T=}<69 zi?j+@y2y9uB;yR%VXN+`U7TEaRK4r)EJb)GBPsuM@tZ(HvrbJH`9c&w7VR)=Uf$_3 zM}(e%MCW|Z_&Y~cLS4sb$h>T6ZEFYi zv1 zTOu;ON6r<~EXg!G8;_X{ehgIvBzV%70+oe-Hy1x&x(`9do4DRnLT(P^rh3Ma$DGlH z{OvSDY*!(36hUEGj9kCm{d|dKUf#ugcaog=l)ozpb;)iRFzLonQBJ~+f!ngQ4g30- z=<`kJQ`SYT%%ymx#dnPBjNRp*MthBV)9A=d&e&!Mpw9lgpbrt?kE0L)00)x01oOw^ zaIrA6HDmj+X1|m3wl>I~M4GUf=72)R+aCPbjab&TXu3kzVoat&cYMV|cp=@(&77N4 zX_Le%8VGzD_>t0a#ou2Z(vf$SB8H7y=ojGr+--{UAe5F47G{ucVP^gMCfWUYnSpom z6Bn`ITp?SeW&jhO0^OC-@#Il<(IM0#^xr|HEWYMr!H$luiJ z!soT*q39PGo?K&3i!3%$65>e2uw)&|9Qa09b2FUgMDsXX7q zc9g5{XOoQ&7?xMMx+2)GNhzj_Z4Dm}8Y(3{j4UXY$$u%6H|*6iQeUZd$P36%bZjyx zn=4MbdH@KcBD8UJ^yIl9IaX>LgPFz&vxpc zcmd5S{Yz-Lrjl<>bvl9zHWLyQ_TgAB{Kq^(@j$UD=B9y?Jv*#QGJz&6NcK3}jTH)6 zdOD-ju@4Xi)df97+m#=qy(lyExxDD){C?3nmLoe9O@8h0{p?^QXOD#~dLL1VCTV|S zD)Z`QHSF65bHgUDLv3z@`m@ZI%Wnk~q7Xz0Di$uU^Ybyo3{l_n&hC~MN4c=gEc-nn zn3fW>^&pH9k*BRZD};v5960Magk8yAxO|8UcqQJ9C;lJSlQotEI@}k8VT5y0J$z4E z$6T>qz0Jmpv+oK=`fWAM_2YY&RuD`7!M;PH%*3b8fzBPUk1}1uD>b(2CT&FFYM}hk z>3p(>)h>G)BWY{`eeRb71}ePC`GWDIEmm}dB%kbI-w1(Z&3aRytN}I4OS@tCWsb5+ zB8i>z=cnYG4w`1i82tOO%2>%7^rEx_&MU4$YJ7zz{2eIHWG~Q)Kg##E=>@ZvKE{(0 zppML0NCs+4JInevDGl43Sm?en{$h62A&V+qpdn;${8|4iMy4A$%~WF=1_UO!%t(`n zFsbg=!1GDE#LLd4!r&8T|LxSs-5nuq0eN?{N6j*p@?uc&o0de&$%Vbdn5cDEqy%p* z|9&mehalVOlp^`%t_pk~oL$3d5BGiMd77B5ev+ZN=F8eW#M|>`Z&jAenmJZt=I;i= zLemG+5Bhs8s}6%Xc$A%G?BqCE6{b16e6_~Eu6+mg(Wv9M4>fjse*_vy2Ejk0P{zYF zNo#wrkI5AeTq~go=U`pRHBV&nv=)lo>dSbTdI4`d7uaEy6uonpV!(=lN=H(4B(e`Z<>RRm`t#dNHb8_jSdmW_S6%s9tVbNA z+9D%szU+wT1$RmysNTwX3N|M)2J9FTx z9A%W$lLGvz!b52;-4Np!*yBqJpDuiRjmj`hC5p(H!KuV+8X2(66LBRs#UO_*X(A)e zYxHWZIe*i$?rMPOMHXE=3d$Q28*}N)&mwD(gdtq)AP=9HGB%G+*nSaaxMY<-4#=B z%G=xD1n4>jI^;2O=sp_q`^;p4yFt|r;VlbG@GgS{w0Iv@bUu(d_nm&g6>_V9$Kp#?97R9Dv0i)`-2kV zO2O*E11~CuNj78lB<-d5SH(Glssl=zO_4BjNpkf`+ZIg;Y&k47Cu`2Xsq@ddSs5xtU9c{IkMk(UNXw?%wZ-%gDo^UAP$9{4j9BLJdf1S!U;>NseeYS87V!$c&EIfQScGDX$5@+F;FOf)!@Fa& z@X#tkRA{dW+h26dRJ=whj1gIzkqh^H(tG}jWVocr%J$F@shUlxW{>%+@3SBQVO8y$ z?bp5(LUuVVpMi%%!E*dDMk3ij{vPR_Lw^(TE%%cQYhMibeA|KkqZ5>0#qTIt+Fb(Z z-Pc3Egn#qEk9+613*wQ=ZFjZHk4C~C*SwK`e(l3it~O{xVKJjdiG(G*xFcu(G#A1{ z<+!wFE-HH2WOB4xa@K)`evX5$jRETmiaM}G*|Tpb_Qr~RW1m15P3Hp+2Dy5wJix`M zkTi|O^jo_v&!%H%ODU`X#;;Nk9u74P1STWYr3cIz4nauBE znUD?3Fpu@ubi5FC%a2yZi;}Avi9vp^;GyMT60xxz)<$(x)cMWisS6>}QxM8I@QvTc zhTcvEEOaZ^>MQwnnv~iV+!;`EX-a&|4->ENP<#?MN+bx(h0LS!YRA|j6Ib_)eNQLMCHAB(*}i?EI8}&f z2dW}5>Oih|N2hvS`Ye9Mb%t^DDj+$ebeVU?O5Z^xiRuUw0qN9aP}z=LOfqO#6^`!+ zX^!yd8`)lO0?r5m($fU=B4Hl@1VvBT{zWG#RX{T5_^PLGpBsVBL1!htK;78c1gS{C z>o^;6VahK(+9*hcIBiOVa~1t|>TO7nH$4`cn$w*k z^qeJ?TYQ`d`T`Y@aVSu~77~=)Hc%Z}BBq1v;?d{ywldTNpc;nX0)sS=yO9-|5gax=kLQ9ZH?RJ!LOUxx9W6viE$*B`snNna#Jo9t{tF<1tJBK`Rf{%$CtlugH zp`g?dTZlVe$QL7w83*jxpCrbASf-F98-GpBk;T0XIbXOfe_RMzOFU$lE|9>9E-L$e zFf<#o1Ujx8RNG+%GQ}9ib%=m4>HU!Mo3>hhIU;GiU($esiii^`BJ@AJ;Nodx_T%;w z;z6(Go?wOUh1?)fd2N@bSEE~2G=+tTW$B})N@Q(_zc!UE5sBpb+;zDn7^gSK&oV(p z6!n~Zu#uR+odD9K&Bq2|*x2MLkV?*39j&IjuKO}?!*J00z`T!GWmMe5>sgbpng?Nm zhY$)3Ja$*oclcr7UJgGYl))>=PHqiYkx#tkBXkJr*j75? z)7Q1U{;rCwaZ1fjE_NcSaSo@6V3w9I^V_waH(@IRjiOjlTVip-i`m*5rZjbo#MY&3 z$ghj`*}e|ojy|KhvUuYPUk>+GHriMx_vK|0O~m&OumwPqHHkW(_1pT9*ey&3D31#4_@*~PwQW41- z2NOmb4&q&oGB04uGP_V@a3xRoSUfVAg};eQoJO<3+PbayI;wb8yzThWfXTY=amm*B z{_vL0M}5)*M+1`yt#ye=CWiccU~);ENU*6HqCn1;0b|T_Mi+e8!YdAi)UYH3&6O}h zsmxt*?K%ssG9yv4M3>qYG*<6x&xi}EA$;oW+^TC9#a63_1?D%i#lL3DRh7*Z6I3r; zP_{h&oh=SfvACEyshPRB+^L4`-ab34iKX3kyuMP97*_W!E|`lamWI&&-4bk#vLu`Y ze#&8lnGH`C8jMx8>;X{ZVw>LwQU#Qw0DT9US6!y2zfIrXmIZ|4StVF}6n)o;pept9 zErzbqBG9w3X{;H==Yy6`lFAHUjU>l-u(FuKVKUzBg&N2l>O$tq@>MNhnPY^HJAlk=67(9!Rk9wNW>1DMPRBb2w zRH;y_v?J18vOehMUVv)lZN_R7kU&M+6^_e2P-?-J2o<6ONW@%fq;($&+))0Aksboi zKCKzU6u$sp0Yg_Jg%}5kXV~F#DSI6MEthu*(XEkpD+-k}a~pdbX*=1IaT@IO*uZGu zwdE!`C0{5pExoxTY3Et@Rx9w*T(V3ttBB4_?^*k|-E z$S+iCH`>8r8@%*6CQ@h==8`fFnnDvcY9CNC0h01qTbfCnC%LvM* zXPbh!U!V^BJNHs;WPc02(>k;hkM(En89O-qL%loF{l28d4=Qav!3sIOo3s&j)xnZ@ z0}ejJe)Y;OuXa0%(~?C`@K7-)v7jYhDvyDnqP6%fe@@QON=5y=t$7=p;cjiEvCB ziX@V0`EMdMR+)vQ8s^0)`FoG7Q3L~(B|KI9h$9vpXNT=U{T8|+KKk3%h?AS8rA<|# zXNeb`Gq~)U3BtR~9&};ZCiVQ&wocVkb#d+LgOCGAf%WtY|5MeZx#sD$tOUSF1x8meZW)jLe{Mb5uz-X`Yb zv$a9@Ld91)$A2xf_{Hiuj&G1Rf5#4?*EH3l2l83?Z2Qv}!(P)%o%Q1E%LMO5SsPa> z4LXn{9g@#1RR*UXiT6gk3`Zjz540;abJrQBmIodVAGHkcc3a<~eD~n!J9n{me9z#i z=Y9=s@&C?U7+4l)9{20_8vgaH{+{q_HRq z!26}|Pe3Z1`+z^Y3jaNd9dvTx52yTbSwA*E%IW(k_bbPr*U|v31b%_?qmI0fa=(E1 ziE@Ge1Lbe!#C?SO$@fo$yH*SII-LKVir+WApNai6WhMS$dM^+AWnOk4^snj0Pe=eD r16o4;E%~@_{?~}{XLDlmKbij#Ka>>^pn3oRFrc4GsH7$-@Am!&z9{IU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.xlsx b/pandas/tests/io/data/excel/testdateoverflow.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..15f0e8825339e4531b1f25ea1b6dc1c70f893dad GIT binary patch literal 9351 zcmeHNby!sUx*oby8l(}C4yAKYh8P;8Ye1Tz8$nW$?ndbv5DBHbrBm1-AtBuzcXaP_ z+?(yW&-we_z2;%ovsm-Jv(`87`}=As%OfEZ0Z;+x004jvkRN}R9Ebn_5TOD9cL1{p zx*CUln(Y#~ zMMZ=H&VtpTAv~@<2{yu+eQcuZ##m+L!VwT3m8Wc62Q<~3dC68=niAb24ctMy{Tkh7 zrLw$Fy?)TZ^MqbXRkOHN8`OcT940gQw8v%@fx*C*n^-W~H;S-=N-n7U!04>6HAs~} zJUI98fo>sFw{ep%Gp{)Y+fhL)e)d-k5-WY>!}{e(FD56*Oun0$bPt%VGcq!olnxf!%zBd-(Zr&%!kr- z!d@qw;khmGeqPe~%NJC~_EoiSqu+~- z%4EXHbaR6QQ2vV|t2DS6j^Jk$;DW;f3?LXfn%OvUa(sWM&FcL}iT`I57#1_A*v5t3 zxdW;Pe!WE^jy$GkTUK~>nR=HR0}xJ%fA`|%^!$-33Za# z*m%K_X;FCEFd(v+AGiQEJvDCWz*9H;CD-RLuI?kkqt6%;06+miM{u{{{GIM@P)BPM zDAf9U{P~0W$ls|C|Lp(otv+_p>IZ288UluW9AfOu;)EK)B+GP90QXcDhuR+{95X}3 zV}Z32JsfXI0~jxT#E0^)7V>x6Nl~rRHKLeU&?|F`b!P6L3N(m`1kg@YkEB-=7)kg8 z9eh9dwkbMYO6MIhW`#1L1b^Pi$r&*!V{xFiO&jAlOo5P7d~@-b!08nMf8eNJ6&X@`T{VF zwFuby+@9K51d*u*>KF_PsmKvf33yBjQxAqU=+Lk}7Hbx=be8MNP9hwtAyC~>J3l^m zuXxk$QHt{P2~bXHeod&ZQKzbtdM;cLpK*vIH}~X-CsfZss$;f$?2UsenXW@5Y*wbU zx;cmy!8@0pq^qJ}QFS!K*z%^$I8dmcnh~b2FU|A_ zs*MaK>`3>;9|-_`Bhnphjl`K5a|>5DPHA_dFf)(kOhCWyo_)zWO*zHQ`N+(MB#JH+ z7C7NUgUK$qokNoMxEJM#7e%dyliC||U=7QZ2V8WH9{2e7XE>Y9MCSt@9`M`Cs634t&=0dFE{F?`>54zs9 zj`SGyq%qw!IpzFJ3U~J3gV#O^@=-WEm9(e)o=*PwIh-xbY|S`-ymNn-^QJZ!O8JY>`EcyQZ!YrYccwyTz72AU34zp)7AVTui`qTRU`=X(*Gl^!;+t$9IQR} zGDU)bsL=O?+cVcm-o0Q(CPajOrn%3nSJz2y&&mwElDM2D0&_%c(T@760MUG3At_I` z=C|MZS~5teE;xsgE6EBc-67YT@jOW{c310Qq(fg*t9dmeS)yKD^PV24)w$0z2*TXC zr^l$(lKoP%6Ojh-OCYRd!fJyRBhS)LH4QMMU%C={XV&aO9{{c!Tt58sX*LgEl&k0>t)r>bqHY$37Xpd+*|c0$q_- zNSD|cI_fu6OuQ48U;HHR<&)eY&!*wp_iFq6fV>2U27|Jh;>1gAKnNX~jf;cFqjSn5 z#guM22&F4l#BZ1+Q$$^T)@p^0O;FAhQ_uOaHP!?|ovZ6+ zUGSh1Bu$NhJBF4pe(!pOjaF~MMwPw=EFx22vZ+pcVE%es zoct~l$GP9AdoVFLI>p@7AK1Ocq4>n#gdGh^xLIE=|5i_DggzP@rC(jxUA#@18S8nO zq4&jk2k+r|$7r_9U?laG{o(1}aP|&6XXGxbB17Wt_+;kg^>WDC2Xn&)&wcHO2DPV| zO&4qVG~zH+DLQsO&$F{p!whksvyQH&=Lb26jqJPKL3ox@jJ06gVKJrVM@wXePCSIG z*<_tbo@V$xkQaH%-jhphojp6DUbojYJHi#*jaJ4_ z(zq|q_{nL>MMO=Y&_u8u!|CpGtm2PyJuP~H9Hq>}(n9oM*>g!C?Z-|sehrF4P!kK? zc;hc-2kkPLkMlJ|pvD9GUvV>CA!()>QwU%P=|x7GRH#X1mjX>;h?Bi(A2(X{9N3Ts+YO~>a@sZnw3&M+zdYQf!V@(%&FQz=Dqi=E{p-h|tR zQ|@lNY_kke8-0|6GmRJ3JE%8jjb5tknN>3!6l{kEq9Rj!Q`mhymKFPfJdc!}p4iFq za>!5dcKT?IeO);O^)jfFv<=pGd3^*K0Rxbq(kK(-nWVK8=;QIlf>ufxUh!}&V;9(1 za_)GzRo;@vh+HgPTwh*`$u}_B6Z|l)`DGV@ChK8)>6WmtsxMn&dVw9%3-mxnh{!#U zu*%s!?xIv$;u;e>bXvCuCOJwz`R@JE6C*Y^{*@KTJ;aj&l6nLUO@z7h0i-G;%U&f^ z+Uq3D^25f$77w4MB49d9;NG*Z4a$^7RY~-`Ze#O*ovn4R=`O?})oZNnXs`yZJ=akl74tnbwk_4YW3R z5nVdz+8h)r==_!~&)Bf}(#R9d6iaWQ)f)8Rqs^lm6v3ez+kS28ypo08{A0^NMPtpu6OO)^+8J~`*GTpPPP{z&rXv58xfTV)zCcfJ*C8-#w9{A~b`4HuL)D95(cz0Qn zH=y!GNuwzmUJhlBK2^)SDXA@wrRGG{*;-D6rp*MY$hQQ?NI_j|*97;>(a+2FE3Xf% z_UO~oy`~PT^o57V@jovyVamrYia-z{VjeYy*{5p=$ew6+=jIBsu?oA@@9Jr*`8GGtOe$q zJP|$T8zxFyL*|C`MqKQ<$wz7pHuM71i18u5f8@1 z@sd%F7CGqd?R?puYh3;GmP)sD#L$(jDNTpq-fXE89^^00V|pkpvFVX2=fK?(X!&5e}oEDaGLw ziFyNI7<$Uk=N(jZFOqo2mOXrWT}gHJI^L5A)r_8wQ;EHpJ_cF7W;8q}q|EL~Z?sW< zPMwCc&ytXrhQqq>WbHe(x0EVPP^7^DI@dwAF*B{8|JQ2-lzIn zjnUORM)#C$OEWyea_#;-aH&AWo`2xO6c#PZV9Ef#)6vp=a}>U0zPAwP&Us59f3Fh_ z!0isN=5Qkl7jx8U`?Z~7rE?4l<L z!r0NwRL$AZ($3uJyADkwTjY?r;neZpQTB8b=D1I9OZx1weM>BBMxR0&$&6`rPr=2p zrgY#D;pxG0fVk^{_IuBe-d984`ZV&1=OrMmJ>OQZt~$as*$}#-Fj~Gbyqh~c;O%28 zs_9mgi9VYkqo$8*W=%n$^vKomuhe30a|+^eM2?E-uiYpGV_;NKSV%gY%N3)H8o$_r z9w)?pSfl~o9gCpndHZk?b~bnOj=2!FlCaM*l`lmYSyXnoH#i-%06wbeSKHzMu|^rj zw2Oi9?)#$UHEcBfaz)Z8SkOR%i-;F4BAh?G;Ot>z_T%>BV!^LxxbTB_g09i%JU2_z zD{(B#8$yC4-s)qfO1=Fi8DT0@A{NFs(0Q>T9HTcQ$UaU-9{!BFzn+5BjTF|cEx-w5 z`L_NjUpgs!d8Cr*s^-hA4a;6Lws|jw%7~=9=hFrsHFvT&cM%K(WP;AbL*yZ!9v)v2 zjQ&g5R$xnPr%*rOLutSFD)N#Tqh&noz&qCR9u5q1bR!*ADWaO+&qaYFMzPV+*-lI~ z#{MJ$!rt^{cC*^!I%G+xUK~GsLn20WK1*A}l%eJUg>@+>dPLDK=hsg}BTwlrE#h5} z-ywaKi8R*9d3lk@5PH~-{*oC_^c$OuS;kg{g>AQOQi2tzd;?vWdsQW~+t7Ea({lu- zU4rVAqpCJ(y(uJ6lm!8_Cf~K|w%7^5mFGD{$GEd(@XGEkrRIjx>p(UGE#fK{DjJ_c zH%E;iLL4K0#p?WByy~_u(}-H(sEH+2!7O|gzU0{pDq?TPAY@^N{lpg|Y;y!MY|b

    rVTG7w3MBxOI;%;wQsECj18nLUu$ z`KF*ZtO6uU13mEwzcXc z!P+*$zFL-gbiEIXAZH4@vy2Ww5u6)uK4DBA`)GVSZ)AYrVyc>3?$&t2HTd^6>hVSN zfCSv%y703E@NTFn6r$`1wRhr#KpoA#FHp2u|1?42JOqTrsK~c*5e0ogxdHZgrRJpr zrDk6AW>BFk5`B?!oJ2|GV;r`-nX8<&G7~#Z@*23>u1O>+4zSANe5Y7lE6pgYf}HX} zYQ0vPbH>H{HT`NbNr~mdxHWWKSkXqYKV`2jmQwXYqXuMBlo;2zRKl$Wej-TM$(tm3 ze;}H}q)vyo`!oe*r??i5fcz+v=6teI#sW22>Z_NEpcdwwv&wz|J{1rNv(hbz5&6bl zRBQR#hAb{wP1cvsIA85ILET-IwBOK}nIV%dvWa-yQqTRrJc=A_D=Htmf(g7S!cTL= z`t+firz9Y+xkApjAy}{pBVs|Sn<{z^-RVmtf}s1(*v~Z$4lY8rC^WR?5*%t@ z9yrVMq_A5iG@Us49-Rz#`U`HL0-wDm-@`7z^xnH$1cm$V@3c#`k@+q7yUgKD zc>F)p&e-1mAJ%>6+wWUiY`@|L7k<#m_j!$|iw?e2JS6ax;Pq>}-0ID6UQ2d8;eCbd zg#4yh>0B1l^5)`X!R+k8rSjTYTki=8DN-O^XSSXxz73nc7O_cco0!X+uTuX~KEOXCiLRIMHo3cczdmlUl(k zTgS@DnwU2Ae%PMVV?UPO|Rr)9IN%llK4@IIJ zeQHx|d{|?c`p$oU=%8t6yUY3putlo4zK=f_#EB=|IeeW+c>vJ7(bDk;m>H? z8fe@$zTI&DX{?0#%l7+ifZIKfp8z-n|Ni#>tpjo!@OIVv6A+K^HsH^O!hesV2cIka z;gmlv>&Na#C4C#^cG39rS_qvwf9)(2u zno?te0ES-?M1U@_@4I!h0PvO-01j8e`w}onysSSK(`wa)n}UMIcxN@31bih8F1w2* z93$|^)Gik80i?%ZC`dBN60^Awu-$ihiY)?1(q-d@-jFbB z2b(2#J%mp)3e;+iu(YRR%OP%L&D1nBc%5P=vn0}% z+VxqzwDp_l@R|zRH}YfLr1Cy@uT$70tG4R)Qx#nX)vZG27^e&T#A}Ohi-F&n<&C8L5pS7(u z^2HXQOpkDv@381o*<{M)(T*^9y z2KH$}5L)^>oEJ}apAXl92%#@*j@!hFBgQ`8=)UITlB!XtjJO1jA1OQce)QB|41{*N zk~S3~1qVm0vL&x?Dbqu4>FX|dMh_tqD|T;PrcS#nnev3C3KBSy9>fBrcuM!&Q6SUl zMw|frOQRUxq4o>bgCz}KF^IfE-oAvyZIH#Hg_zlhf%!M1jY}6gk2DipS`I>+i##26 zrcE=HvMJWRNlr_C;setMd7|hOZSL3lR8`_T1aCdWOl{sU8I3Qom(>c7<09u^9zLc7 z?TiW*;rsGAxpAM#mdI^`3aC|)b-ui4br-kRcBI9KV!moJhlz=RD7CnT<8iuYD4 z|6rYjl-Aa?3tb|+ipkJB6)3Vldb)Yrsp(8?H1@06?8iU@-O)0KiFDFp)a;aL&zSwE zVi=RFVCnJ%#g-oZAX?Y@tEie3>u5vnvEoSGiv<7S;cJwQSk?2N8mwH(CbB#amhXNW z&|f?Sr#m|TQ}OhW+vARt>XV5tZHKZwQkxYhrQ)4-Pk`;)S4_>{nL{&QVfsfO>}ALT zoy}hYB-R~*fXF@&Qt;)9MA&) zP4V-7dKq?f03gEy092L(u3k9bLrtJ^yxYbIKdE%A!E11zvGXQ^4b1KETEOpGV!)>{ zA>l93YMm;Fdjc)5#vi)zrKdK5zzWLXR?m%&fRYieg_RDn2?btg2uc?w^@cHn48gW# z<8%jmySxNevCI9U*$$vmX5*}|m67pa`&dZNiB_V^m7|ejhM zt`SN%xQxwez)#rpj>Hu$4I{_Yb@p<>#bJs~LdhPmG=xR{%2Ye}-PT+^7w#j1yX3}a zHN|)6C)x=W@>d;SiYx^;c(aN22a5$=b`%(~i=deadh)o>H%sC4(+|+5%%r}UYhH-z z*=Gl8tmjN-{3Xs_{lRC(Tf$U_50nj0=86s*;L8`2SdEx4H9oeemhgl}F7rVI7Yc@l z!Y|i^;kB8V#tiHPYoA+iraNt_^Ic--TV#fv+gfBe$N4e!Sbb+~SM6d;>^B(mI^!q! zS+KcV%<9HD4<`?zda7hSxo4(mc5pAybz2SXl)F5K?#(MgRju1Lk|6y1XX&+)Rl4pr z2chk(9_IO^c3n#~FmnbHOlncE%RMc$Zo9ON>^Uhfu+g|6NB;CNy2$gd2T<846=@KM zae@UWSaME~GTBG6cGWWt9T=MA0L~YeB~osKVluU{9F-UH z_;g@*$-}n!mj_TeM}b>)e%WOlwrxIk#4w5XVmYL(GhjP-_PNBHhE0`-^8Ap?e!^_` z>C(*ZS+k^6M~0X~xx4~RR}U_@e#-Vt3x%QQ{alN-&C77WWp zMaat`FQIxltggX|#9HEZVgX+oet~BqN>CH!<(xZ5W8qG>pjw@RbS=D-Zj*#LvS{LQ zx0(+NpJn&Bu)dqNFFjelH}+ikmScUT)uv@=0~8?5t+EDwpB`!~mn;dHwIJ@8+vV)2 z2013Mxy`nar#odFqkF`Ex*W|}R`LuD^2{2uroZZZT%tzqJ$nIv_Fv&Ti42zT%t&42 z-Rp3ls$I=ToNT?j>F0^dMU@MUAEsO;w%4cKQ56KGt#_m*GA^AbwMV_6H;?uwYivPA znYBmnPlP&STg3?xvBPgr^0&=1Oc=L#dHN^6`rD{>&t#=OJlz@r|VdK7@^p$6KChV7;4 zkt~}Ma+L0u%8Ha%9HJ|o`#%RQS8`@n?ku03Jy;v>o1AV5s;t`+`a21V?&SFX7gc&d zM*!eB^=_nX3763~){(;^aAE4K;3N-4$0O8CMr8@=_YNRX%D`rJmx?#Q>oQ?+RaY$dpp%llNci~!u zkJVexYX~o&@aB@zsmg_VdR>u2OKn7jfF(|dzBXFj+_-QVKkswX!`(yIzPPuNMx&E) z3B!!OBy4J>RIv0sLMYw3B0^XTcSf`lHI2}p2w9@Z5Wr9 z{D}CM-QTll%j>N|wVOuGJ`tO7ABvsTd{qh#UceS*tYf&AT~*J=CB`cf*I?50RysFi zX`eE;d9M?o!-CJh_0J`f-PJ83IiIiHl0p089{Yp)>kidR)toEy-e9lb}k zD7q2ohsV6JjpWGl{yp(1wnXfFrsme2dvRar8@c&gD+kT+;Stt*F&mTx;89KD&vWL> zB9S0WSTNf4b-y+b7WDCJ)~Kc(uik2YVe)%=Q3FUWKdIcd{mo>+zya&xfVfVAUQ}P8 z<^^bGIRwksU-8;CP}9^x;r7J?$l&h-pgAJ*%iU5>1o&SceK`BO{o7TW8XThjzf=B= zu%tfz_RW9d{w%-0af;NU{2#^mPvD=V`3sR2F1 R;TwADB}`3CokN!Z`~&E6gN6VA literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdtype.xls b/pandas/tests/io/data/excel/testdtype.xls new file mode 100644 index 0000000000000000000000000000000000000000..f63357524324ff6ff97f4b1a15b6a371957ec6e1 GIT binary patch literal 22528 zcmeHP32YqY6@If`+v_8V?Ig}gvhk7FjvXA^3FPuRF;|3h2tq@JX>jayY{ju7$A&~A zjcGxuN=m_@K_za12Gj!z4M#)O&;$vADw!5hC?$GdnZh zZE4l2Gpm1g=FR`U_vXF#zyH|Ze(C4MZ#{5q_S<4Rnk7p(R|dVeRkdt`lqFJ)4_M87HDNPTP1wQ_ky=@46{f#|Ri0AKBr+073SBOX(UOuv z4gQ2&gIS3-bIy?mrBq}ej`q4;yV~ozy4u4%Ju7$ZTDfP>TQUvq6ZvZ_tiJWrsW9nC zv_Ab)v;%FcNtj>y5wO|$$v8cDpVj^e`uAPv-*cfq>Ow!_LVwJK{)h|x8!q(QUFdH5 zxZyvcaHhj7Dn1i-_@)b8*C(~Uu1_lcK^L53F7zi|=s$6xf7ylpBNuw6`aGp@3R3K& z$H^3fes^*4Jq7m^Nbs<#U!0(Kcb}Ypa=rwQD|$9kjI#6JQ9&I>d{(>AYhCCyj`XR( zRQP|1!;6leZ_pF|)#EwWpuZxUaXBBuFQq5Og*uK-$Ls6#yd*u0OT4~~Zj(F7sGqQp z9{(7I8u&Nha<+j}XwVaSJ{-4Oy#82macMy*?0m1HPd4fUU$?Vv|7>)ZPIl%5$H_j^ z4Eie=0ZMbi4kZTN_FjU7pT~ZXj69HMZVpY8n`yYj5s_b(-xS;*s+0W|Ix7TZ5}31a z#nC7GR`g0WlhGptQW;n!BL;mGn1KW;l97a9rVQ-rGLk@FWyHW>&WM3d$%ujBpAiGw zT}BK{G#N3l#bm_5CYccf+gL^nOp2ZiYi;d;*&xfkJp@%liFPyO^Z{Zxygy_IVaiPd zgkk6o!uFm92t(5ygh@CJ5QeKe2op;hAPix55O&9DfH17xK`^kL7-YQ7+dG(@qyysZ zotf!@czb77dLZ83L5G#j>v(%-PI@5T-odoaqzP`UeTm+78XCR5gK1wnAl}}YmmY|> zchK{uqlve7!s&r{duKs4A8AXJL9E-riZ1 z9*DPhsuCa^FO5k~%rXvR+u?)Q3qOk`)8k#X&Z`utH_J{0Bap7K9iUT_yj&JtRLl31W6^TSrlrqx{rAX7EMGU3P zwA4}TVpR7W7ud(iwV5*1Wy-K5Q-%u@4D>b~XfbT+u%sT<^e&hTm|a*Ij2L_VPv#i2 zk8Q9jWlAG-sYNPdheYo*f~B~LL1Gbakj8XC?A?vsuT20+213f~q=xe{FeKYpByyv( z5o6mWm6Cg$kdi?RnQ(C*3X0&$FAVnQUYd6zUeYOI1%|44b`^+gXn$wM8Ml;P&%A9Tt zrOdR{6oeIGQ{<&sDwScCg~1-G+Wycrw(ejx7=rF|8QR_6RL#xT=nhgfS(DI&;Ra_| z?U~BZoM529ly^N8D3Ky4Dh>6O;kxkJ&dzB6(9+S+1{r178$Yz*yx1hH;@3;1gryE; zt#slOm3|rGYB}CUlS0W2M!V0Pdq0-VYs!Ltm0ZlAm}0w_hEy?S9%A&&rNm@MyU(Bb zwX>KKyO_pQF$+D!=($aa$%%Hq`~DlwVy4=~G^L8E@esr87dKjNwEOIve{dEv%`RqH zs+c7nVwf}IVnWgG(|`Q4vzY02G0RiMG9IQYiJyO@r<*W1$HqG zk=EoP#vEx4jUS%;s*{*PEdOJoo9H*!+WO54V!5t;rO2jRo^b{(Pl6(%?4S#5pynvT zZz!8LID=LsK@mT8&`KMqIc7wT{P?rZpp{8bM2j7?+6HQl5Rs9tZ#jc5OoAdd?4Wfv zP;)$pT=~fJ&Y+8upoj!J=u#V~+4~}YJA9cFXh^C!j$7B!H5yuB^}IGGmL=;j)9Xs~ zz}XzGVHJU+BVKcXiWu!8pWb-%C)AqzwUFhEM`>1V1U^A*VqaDx2>@mb*O5-J!q~?B2l77p*5{>(-9spl+3|ZVq-jChLafI`jnBCwS^cg5X(b z;|az>;2Flgut7EU?Ua29jwK_x;8g+{4a9(40pU#elS03ICGy2O11`w#L zthn0jSWqs9(FSax%Ebr+3&ISn$r$frLG6lYM^|tEjxgp5Hbt|FVA(665M#d=itd1d z`W!)p02<8v_4{==hGH%>(-YyLz+_A^OyJ0UcJN{ca6SLIfX5==KFKjW!5hus$bok7 z5(jWS54wQsLxX?H>@=0n|Mxn{E*3gpW&ZVOu9zk3*=Fl=>MU>e?M zJuJH$u}G&Un?Dyn45x90$5MHnC4Pdk5Grnc4VTHuY997K0%GI9P;@l3-HH*&M|&a05R?+U zpOJaFuwtY%E{Y@*t$t^uH2uyP@|htm zJ|`imiGF8jqTd;s=yyz*D@U?ny9%oc+1YT!X1w!YV&YGeF;rJERJZgF^}$+8tz21C z;GQjrjv+XX_DYR*jS#C9#J~_Ez_#{~HA_Lz;JWvNDyS^$3$(kDNA$Nf?igalvIf1Q zZ0#8AXUfvkYs9K(pa7YAD-R~Eh_bgkTBS|qs>Z&!@Z7JzJ5Z+kUV z-lwiM;N6)FhY@!xV8vd!QAJR)23yb$^rBo&SuQuIEy}myt&VVU8j*2zPc%BT^xn`+ zxz{994_kd*=P0&yA2mHA-$8`2hR?HF2t{PaIZZx7XTWMKAa$T@N9jcALK3R4U#iXx z>fET#O)(4tLQ()-U`Oc;m7?Gxew4<)19BKT95y{!_d89FfOf=0gB&>s+CdYI0Nf>^ zZ%r?G}cg3<4tUufY$rMWZ@?5i!1{ehh<*v7{Q{upX<~|ES{{Xc3bA@IU~c^(9l7FVLVS ztEaSyzV;sqa3=+?YiVjYd5Vz68#|4bwbS776dG+~uJ^I>mfHG-C;FWR`%{K~7f{|X z?y_)4+-I;+pLMu7jdduCb&6D-(s-Sqs>9zdsYfu5TGa9y+Tv%R`et+u4fs?@ zGuAYJs9ey3+B}GMJ0kmbi$C+RpBitERZu+EpfIiiJ3=#Lhp8IEkqZl1m+0+)X#ck3 z6QrQ%}*ucZFo}q9VH(Su48bhV<&DEomh$jU?pnAZcaET9g34qv3ONFXK;`}aHu;W`A@BlIOv zDFlg-TwY-HtpkHQ_i(jzk6H#|ZU68s;9J1AfNufc0=@-&3-}iBE#OB87f%)j5`7UE2fQpr}ERj3)`C>-KhW929y<+k z+TYj^l#ovy(q%l#O~^R1^9J?lMZuf5(?M@JoqMGn9Q-~s>uX24Xavuz*-0APy+ z0FVK2F-;X+oZX?$?v@7L2&kJmzt>8OeDO@Q$(+}6X#Xcq1JTeu1 z`AYzsRmy0AIc&}GEM2w&E3fJ{tsS$ishlXy>`e5MDN!0R(x&qv_17fgqDw_rTtBsp zXl*F4u{MSkZ0VIQkhXud+5#t6k~nGDwzYKTQFSjO+ab3{Kbf;djZ`dndzK~ir0vZl ztKHx>a4K1%c&5W6NRJ=TjR$T^y`={dt9G>xh*hy6vHJp~ZeZD(Rj%`F#+#xHR9@MR zb+XWbd+o};uuC{xcBxvi>ZO^wpuMW*M4~uCeta;!~-=b+f0rF^RG`W0F zfTp!N+huJia*P;^{1orWG2$g2`NyJnv7A(OS&_Myx|_;rkWjQKaFU~pA-4@kc{SiH zIuMr2v}-xXhT$rPKfrofIpBz&+~I2K<6jsDhADW9HPxp15HePG;=Lop-yfB{NK+1G4_9$rvF-cbmAbIu>_r) zDh-MYC#%jXxH;AJ;QVP~ZlXChXS(W`yeo2t2M~@Tks5}to4bQg8>ah5i{6SW@stJK z(+#pI>@c1%+!}5u%aZF*8W`e?xcG534$+V4|>9^=EteO)L870{p{Gg`l)f$XAW|WLk2$j@{<`jI{m|+`X{Oj?vyT34L>ETcZY#? zGAC(7d6=%6s)$_570~hl_#tFyN{00Gj(X5&;oEVyqpC7q>87D ztD}vJizB~{z4PC+!9WK(G~52Sj}pC>Z)Vf@&Czd)tjpo%6x3^|)GK@mh_BVG7A>}^ z9B$&^4!SoIpe~S_#n*JWgW6xp+ib^Rt#0Artf0rbPv-QQ4pcrq+7HCFSufoPx(Q+> zb?W*2Vqe%JeU$W#2ROl4nhVj_mX{2nKS-|-%+^klY`1U=dX*3?>NA~fw)mwn+KrHN zP`qP+J3`6_KbJN<|Ma?N(dYMbqG^YWGM4f2?Y55?!Q2dV5Rj#~kE2X?@6HRhMTw3e zj(xnIVoH;q6)~cYM8;U=jd#pD@4B?37DTwtE-g5#3NN@TowN!)`EY8wwC*Cjvxe%? zMsN48b`!0oUf7DB`GOuq=z+GIe_ML2pBICsuI51XPXSlz+}m#rdHddre=}T3J7{gcq7dJMwmo2HyFbi48uUN zU~7kwXGcv2tXXBFBaboWg0>YTt(P<9ESh9(gFJifX$q6T7k1EOaOU$4!Hk52oX~`C z_WkQ}=R_sB_0dx=&@{RH=W^ZLy`7+L-{_IArzxPQODwZFJ^Ys9HK`5Pc1pGy8>u;o zIg6&lRLt`sCD!7^$!(%K6ww~e(UC;mZmD?!vBGMfkAYZ7ZIYC*=Tg=#Y@-24HNip{ zFzf}IuBvBBlqUy0MAEYbTa@UN`ilhe^~B6c%f%5GM)L6_;D?N3UKZ1k35Pm?<9HvA z0hw`}r49~c2v7!gX-umw6aK6zy&6Nb^WTZVOr zAq_J%eM#v>{0H{sF0bR;>7yzIG^@O?KWgdVr*-;>v}}qgR7FiLm{Oj4mf~l{zz;g+ z#`DLb5CTFOi}mF!F{C%2&tQ7S3Qw|oAe&N9_Tf$isiwQprFVzeG^{SUVEeTO<}7_2 zraQ~mK4CRyw%(>&P71FJ_s`YDK|t=>%s<@NiR!Jy;?sN({nS{CFP6`Q(7XZoJMHJ_ zvZuA6wN8)T0oEVJ<_3pC-QE83SKn`h{_$2$gDnndmSWGHWN%uzTW0PFc1Gw=TE{!C zFv?Y?w2P$1K-G2*_V27uZa+G@o^w_eYBh^g}xOcYbKg5bjs9kS+|*P__)|g zdC=ax_k8jd9eni0E7P%gFPWJqFb}(H!dk0z4sj|f5ka-!yYm47YNhQ3r|EJe#aOwu=`N9P4Q(Qd_mb zT#S%ziSEju?~mLoyYu#Z#;_qrfs_Pm@6{j83<3t+Q^qC4auB~j!U zlG)2hwoF_Zi{r|E3Rhy}BogAKXjt~TwmV#A4##%~?k0^CU?l~IC2$4Z z<*$;3*OWVbI=viFe6?(L^YC~-^C^baERK39zj9W8%l*cTZQg*juB)3CrG}1mN0{g1 zi397-uRk~x))_y;hFlj_KiWs$c6;%0?s7NEl+CxYe*AN(gcq>lsG8ItQPB9_eH)wn zDlJp;kYskSe2+}HhQx>BkaD1JV`?iEXLp!%eN_Zk+<1jq)RrGVHeg6Dj3Oo>odfdF zuN%b}+Zkj-#3sH(O6d@T%&xigVBclb*TG@iD|im`iTMuHUez%uUjUvfITPMI&Ld|b zbAC3yTev*F%h5&jl;Qg6%F%G%eE9X-7K9%OV}n^f_PSg@+5H?h%%t7q{pBdip@A3m z;OoaEOaHS5THO8Wm**XiwEcY!7CLS{Jv*D8zjTz^JkR2*#L6KvwbY5nR%!WoaeC{K&i|#O(@aG3Fy;gZuc7fMFl=*gJEHT!#28IznSXBa>zQl! z>Uo|xt(v~XFiLqp5NY(5;E>5_?t>TQCP~p_S2DkerrCX$&C<=;UWYHJ`h#3uMWTtk z+$W>`V~l(Z$in1TIqAZ#`sr`XTMtBNnsc8C;}|6}p%RH@SJkjF2>FR8hrli4Hw{AD zh&cpN6Egi*;<-M{<%X!Io%Xx3xx|FK1y^v4r^>cX4lut$E|S$ZJbzS~r#8?Dr%2mH zu)__7#b3M}=i$Sy%+~ z0QqRO6w?wS59(udDJjW9qX-fRwnOrTMm!$b8i>jFxE4SnYTDR?Wj(V{)j?G?uoLW& z0n+^3+9lGvCVMki)J(7iaUd}4KDLc4z0s>Et0g%@9y_%qqy}Eqi%U-f zA#N@5SdX5K*@zmxjetoB_wfZu{*h_i^oYRYSdJwh*?G09*xI6itJ#L{-Rdkn&{Uu& z;Gwg-D4)>;GtJpGzUDY*_&awsUqb>9Yvq#x*=ME6gXAP_2-lEKC-wuO5X(aDN#PvQ z4MmNcLqv$`!p1umZ~7(p>QJHSEmQaU5{V1fjUK^`-cdT!>Xik1a~I*%7_K@;OEqyl zTM+0@wxK;A)EGX&Gt(*2a>w$mHKC+lQCsmcD~ea7Qd3wZ!XhAi<|$Xxyf4@gmLGI- zXV)@b@$2mS%th-S+JVOoPq9^WCgTh^ai@Z7AQhYBuzmN<3SN_*Zis=r`D5eiu7&uQ z2^)cq_gxNkaEqPmRFT*O8<4X%gZ7{ppv(gK=l&JQml;RLxu;paPC~)D&P<{}&82g* z4qs{Gwdbib@pe8g!^B^0gkaYyp98EzOS7M(esz3~7$|-3QIgmZWAuz(>6McZ?-EcK zb3y}AUjpXv<@SX&@eb3iMrczua-ti+LBPAt{qr<8` zd>={#yhi#Lvk*1!*Lb&O3oCa&_ECHqLn%v2?Z2FNtgjgxC!R`TyQDt42DZhteGBFB zC+X}uWFGpkmC_c?WuJ)mc8Qc4wwzN8m;ICJS{P61Kte8sCD0$0aR0VzO0O+^#!lT zZ=RQ0v5RwpU}Q;!$Yk;M@O$*xos8;&mE!h~``q4-fRbx#YESr{xED;Wv@Sd7`|Tl; zd+a03Bw`YHx@NH6xE_mC5Q=fat<$wG!_qaZKlW!6zowD|!Q7HyIsi(4P5E#dL%D7W zvuE*?nOxu!u4Uj&idD8yQGL#HWYif;o=iqTmZ{0015`k>mP(qcaG0T`f2AyG{^_6t z;ku_IZQuH^a%grNudpL%tJTf0Ib`tGjw z&UW8Z*Yv^emX_vbKx^w7L9`qXH6eI-*tQo2aWDEJz+Yt9eS4 zKd8V}3v~=&%b1Evr|y9JIwI>!X~;*6rp_8cqLAUE$t<^~EVog}T~7Q>{HlZ$H4;39 zg+NqFyhBFrgzx%bydAH85JgH5IYUdfvWH*FR{D-wTT+?BkWtV!y(&%5XY%!uQm)Rh z&1?uef^e~2vp^j-t?P2P|0wGY#!YMS4mM-RP0pHA!<&RT7o*`8D4sA)W;pN)3c+k5ySlg2hR=+ zXyjb7qgBnwltW>`cB+PHjrNub{E@MsEz|T{G1B&(wCP*O`wB#*l4u3M z1c6gAh2jIkQl;ZxlX16p1A^=168ynuxa}>`1O&Z-Ufg78*YulbS*zj^N1_d%1U*gs zZ^O4nApWy^w95WEG85%pJ_r(oZUPUO2&U;I)^!L1OKeo6_Fh_JB-YqzKC{iKxDrv4 zur?7_lh8Tkfs9%3@j6ygoo0}<+>0OX(1~2kB?n>AIZ_>@cp?27k9jX$A#<3w4N#-U z=y-B=B!993_XF*O5$EA@7*zK z4Fr$rSQZ@039*&kKb{?uL<2C3c zm4V@2@j`>4siSm>gt1sBE2lSO(M`d&5z$WQ*z-a|qEs9Hp|7ZF^C(oIJT-!QIAcu% z+)3-CFZ^DX8TyrUm!}30qE=x|8@u?2RQWVVBmHGFGurR~ypT}!_9v`FM|o3e8s}h( zfykR-k)EwngubJX-QI5Z_>M45eV$|^i{)*&HEZV8QCJmq7&ccm(3^v-VOQ)IK#U9m z_q#Lt{BccQv#;gIdY@5-bv?-PueF%r@5xA7C|9$K4 z?fU!ro3C9u>OTYg+-Cn<@Y}Tx&53_C-7gCM+;{x9APQaP{=d7B7jZ83p?)CI;{8MZ zzx$XMg)eq>ehA}T`Y!yV$8%BiVoTzOXgK~q&;9>3C@unCtnhyTMiP7nyjbsF1h|++ z{Q!WXI|e_y!5>N0MU;z4!w-~3bPj<=`6Y$8i12eZ@IxK|V5a~8e#s3kieL2KKL9GJ uE-vGuH@}GWvw!}91OPCh6U~2KMfk>Z z@44q3&-wm>d)I#UUeA8ke&1)WwSK?#76goe2?SsRZ~*`S6M(6t*2@_U0Pw~H04M;s z==##mP9By{9>$tpE|%_woKQyx#%xSihq-|Kb%WjvG+z0Fla`DO^dfJ_=hX zm%!!UMeiqn@Kn6Did99kWz`jYd8RxT3W&co;dEHOk@3~F1Sh9E#Q}@Veh+_YLo6Xs z!)V&csPt^h-EV|PdCbI~%^B^fvh9)A#=X}LCc;sJ^yezh)Mky)u#yp$@3``oSyC#)P3)K{gef1s z_lF!l3e>O^#3_2bOf+M%!k9&OL05`|kVtd{c`JkvaKMUNqbWd*lK&Fgzg!36S9z`xK{FYd9pkB9%?xNXw z3EHHT#x4?GS|gab9XpchKb?@bMfBvAu=37WGG(v3ne|MGPp<>h$py8RhK;aosab~G zTvNcZX##Oo;P-R}!}!;rs- zGFg8oNuYRK;t^^^>QN-22H>JW9k~9*lNZiz_GZq`_CL+p@0>wH*))`v|L#_-t^)1^ zkv0)rhk{px37|Yj)jTW?|6ykLA{~Q6qpOc8Sb?=)bsp|pLhj> z;-u|iei%8XH-*EV2$u8E(GUi%*)EbkNFIxVd}sS>(JldTiaSM}cuX%x22g1IoQ?TT zXinZ;nd7=KIG^1x$dkv;{UcpJ3Nr(hs{xlG8av3K%9y*aQ|xy^UJRp0 zckmQNiM#-#n-feA&)Z~AaSt1>tW>;)?b~uL3D;UO~Px3v~t{799OFrq7X7-|eGBdty%o0OhlpPwqs&aT1?$F4wS`NGoz89g-yC61D zl%u`+REP`7ZY!X|P6pTI94bM)sCmcY%zl7Ih+`SSwxtO5)@t&r64j|$(rIADHStkQ zC7r#iC~6;q{iF&Bf>uG(Bl?YNPzFE~avxG7S--v_y12kGciM0FVN+;AC-ofekm z6JK!pd5q!l$s&+?E4OvfX#9z?n9? z8eL7?8}dOIGymOdv<;=3VA|HZTzKTXz$ZI5ECqUUMN44U{g$n_Ov2&{9lKeG3qtt~ zKDklO(Cc-{!;hKRM;-?rr@Fm{6iW3;BjeW>)|)?kmLL95Qr>%$D#Uw-*vw%1sW+hk zlk3Km=vSuh8!9%q+$-?A5~bU8eaP|g?#F`SGE0CtVfwx8;pTp(C>*T3Ldzq zJV)yy&nI?d@z|Gv2frKPsK_q|?_`o~o$9$yn&3AM(y1q3t%+l?&lvN+5QC@C$S5L> zXe!3pWzeCI5Ub=c#DM(RC!~Be>^2UDj%D&15+*ddGo0ltU1rNSw8=xqxWCRk0MglV!j6zK+Zi>Ypn^@2Dw? zQIX%7*Mr?Or9!p3?la+TYSe{&l`Ph*t}kVjhK8?sW<&!QPwEs(gY?NB9b<~4HRI{) zaKtCvCRZK@{U5!!A7ledhBlAf_Zr!&_k zF-fO?vK1x&K!OmA#kDQjcZEq{FL-<3PPKD_a2DNg?rPOTSaQ#1w3t2n8+6pILA{}j zd)*nJtnx(H(f5Z!*g=ZGT=k`{N8Li4rivgohe*3lko2Hn;lQDy{3~jOI_d z>2hdblqbfsUn$wvl{arL6ZNhXX`CyWH_aG4-^aE%7|=LBjhJ=o>Y{-R+IaK5oFaCt zasOtn@UB9I%Li-uVJ@w>q$FQ>4w!3nqacOS9#4EVn+QN;p)(3BFqOL$3So0vK|Ew5 zhQKvAKTFz{O>jPHySX&(GSE6cgvKwUr~3l?3*`04?@LSaYLR?HpUwn$9UaaIa=ptz z-Ra_fu9xSbyv=ZPIix}2W4P=@wwQ$%Z4;W7>6;PY)D0?GMLb;3+3ZYu3>3m% zq@Q|3DbkwGWF>A8T-yy}%&a^QsKE(+TZ~gi#kQ?`VU#A+>Nu<$Oz;8i+g26+t#5>5 zy)Cs=K-ce74Bw}mRwi5>?V}`BUPh%?`|`59O@a}=$Ctm)jKB5b>J=%;jI+Ockn;fEFZ(D3X`XI=Nyu16|y#? zBR6iXH@=!*mLH;GO@3yT(k;6~=Y`1>HcJ0Y{9ND#ugp>~gUjRb1rj#bd0#)KTD5!f z2*rR+L5$A_YmV&;KD6cHEX@xpILY6amPiww$;z9P3THLR;&!ms%H!7i_uP{VwthRr zDzy}mRp>x&Y_ibhRJ{E?PGK{`gO@iwPhayf6HMZ+xOB$u=j3+YPTQa^j$x5yPgW08 z=c9x%!&~%}fg(U}@mIG$Jbli*&xZNJ{?>F8l}JNEbK*6W4!_6$b{}`|K!?{6xlQjp zALJ94Ln^V=FnVmjmPuH(&)O{o+A}qU<7T^FY@_<-)Jh6L3Bgr9VLX+ksRIx`vFP}L z=7-1hbLHu~P%Ev3Azt5Mj`^0-hMCr2kH}Uyk70dDN@~$M%lU|)V1?U7VQ!tBZHXTB zlKdy*u`%U(DjOFT3VPl>MXR^4%eXQOehUk5%InAwl2jua`jn?}$?|V|{B;Qv^%!(o z+hoycsPETa1{9t%cHbck?YDy7=}Ysx|FoNVFnrLQSB}{ln%c(pGCC^RRr}<4U{=kn zY6y0Epzhi)Y-NrJ8@%sX`7y@IWwa~7sd3KC^5LX%MO|ThgiW5gizZD`v5IKsbGasEdEn0N^YVIowhZwRv2tZRxv!== zu!A#nGQQ-r+wl*b(rGumiVYjkvf>atTx@{a^5vyN6*qR1JuGawg>}PJws#1SiR$7I8QRV! ziU~`MvdFd{k2NLV(Z_=n1t}$$TC<|6tG(84ZHR7#$#Y{{zcWwh65^_PEGwVv@Hvk$ z!V>gCTkjtI=6d`Bn_$ZWmNhQLcr&V-5zW!>c;VHF-m@4?X^Qya9(fZOoSK7K_?Fy6 zbLhkt%DVwJfx)s+B1tUPob?*Up39mc;eieWzcSmTctliYDMNt{X*rH%pUjf*VE@IU zrEYmz^e%{HrGGGNfw?Ed(}T!h3I+=>!}ZiqIpi^MvHmjBG{T!tv&FLUZua7nMMjNS z(oSK*bx@N+F65X%UvjA7EIGr@fOxUIY@}{-HjF>_!0e=5-w_)J=)zgR$RhPXii$5~ z>UQJ&=kE}Gvb2=BQ+!EHcf*XggkY5sEGF*vEa`|{&# zPtQ`F&pWl)3T>BTnJQDIJRwm$PKV5 zj0CfG20u$6v1u^GEdybH*Iglj>Z-E~!(WO^2;PcR_f4!5*)|vcz`;GT`P!v4gg6Ic zU~3U5i#w@%pRDq%mN5;<@9soN7QW~0zDh`U5d6+WLOKz`(6%=g2o67zE+YeeCDcV% z)+@9mrr|5zd%+))$XHLGcLeKz<@9leuZ&`~5F|0%=YmNuYKNNG!9qw5iw`qaU;U7U zpJB*itVb4#eZ6vu?S{^0g7BAt+E0h~=B3Z|np&NT$b9z^1}AS7YeJ;pmXXuWwAWO~ z9{RR=s~;j9c10N1Z{~vY7L~rbF!k|*Oa{APap^_FV=&q-15AJW6y3*JcT!yK1A z+qmrbzz2~7Yws_%c_JHiOVhk!HK<`n(7MBwzAm?_{tZc>!4>UL;Z2g}f&o@nNbJkc zppj$94rFCvG+|{lPpt(e5p^nb=EX-e`~|dsIL>&?qQP@yH9r1n;0s7kg>_&+C{n(z zXD=-xkOBUE<`ev&okvnQ7FKdf7=e@^cesIsv{<)4xwf@z`A5Ep2O8d3q9QsX6kn+R z#h0HEox6>trH4D$Z{2SWYh0X4ONwe5YsHIHn#qv{7RPg zef7oV8J!3Lb5{H&mcz(uy0w*Ph8TmT>}h5M#8U}JQU-6rU@dzQas0hzvLl4tTsOT! zX)oz?=_BjZ2sk7zgn+V)iL#QsIRU zg>`3HWalBJ>EMRBzhU9Jyv!58=iXx9f_omvE$&QkciqiS84zqtV%&T2{(48~caz?obqwRa( zVxH-@YNEUx|2D>#N4}EQgeIk+?;I&FVS@=RbL-R9lGD|-)m?^jhqD5WpQJh)K;1VT zRIqybwnnjtV1)cS!k3KGE5h5yw~6@9>FV`1k(e&mjTh{3Ba{1>VLAQx@(WuS0 zLer--_ntB;Of*BmlMI!gvT!zsxH-GHbD2B4S^m4!^1rkcYUzApB*7gZD)?T|P1fpS zp6$ZNvFD|Is>GNN!)l^@=Gs&E?vFa%ELJYqg|2XWEs1&^M*FzNdG2F%hsjaClHtae zD1#GJJ^W~2MZEXagib4MDk2Cyy)|3b3l__pUe?`OVarFw+KQclaOn03JRRO5G#@x+ zEQs)tzHphNQcO2ap__`M+az`&?{Vj7d#Xy;MF*O=N0uI)7NPLNcALKS4xIWvn>F`q z-0Q8!ps{POOf?o2_Ibtcg6W>`9fsj94h%@DH;!|1j^X%6-Vz(M^=wc%y@4{tiqs|GKvedkByI`M!O)d*Rp|Fm^Ajc zPvfDV77YC;v10J54!9+l-v+G68T+WG(o$Hs1z^cz7R!L+wAHg6vv_MwgiFUd=kKft z>{&WcTYnaqO(aI|D=KeHaACydpGe#SvgLJZsWA{CbLh_ZU0)~@7Ejj1HCDjS6E5FZ z2%9-liWct&DKJQ?+e78JwF$ z;NcH-e3pIgNQ#}Nbj%_`@v01aZX&!&O+oib*H!oFDH*$K%&=Qjyp~cy`TDVUKm>QGA~`1 zUlaTeUo`YbsKnYo7p#BZuHXBAC|^Use^v0;O4{#&Kl^+XC;n7d`&IC-)rCI`4xwC) z|G(DotDav|&VSN`_qPP}ufo4(8~+fNLM<5TC;pau{8huRxx7C#un_)ZX75)8zXl|K zD9A?T27ms-f5MYrMSu0p{tz9z^`EK#%~ShT%U{jtA36X4$2$PP-;C<7;(t8?|17>w c^C$6tABYez7HW3^0DROHfU?^k=zqTbACj%5p#T5? literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdtype.xlsx b/pandas/tests/io/data/excel/testdtype.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c65263c373a3a43ee6e7adde9ec0a3abba219a9 GIT binary patch literal 8501 zcmeHMg8uQ5RJrrDr4gas}w+zOLeOI5ZA`*o9S4Ab1=Hd^#SIXDRpXwlU0HFfWkNcFfUO)TKv7 zTc}bpIdIJ9bf?Rx2@Bp0^nH))sS#>X+cu~5cFY~Tv5YBudJQ@z*{C~cks|JCVZ7rL zvOSO75P@xdUeOQwwwJKs$8&`R09;>V03d&(Wu+GQ_8|&u$|%<1p=fFBYH9Dr&GqB@ zFCG7fIrx{M7saZmwS)2DJMxzy$di$oXksa)=aTaEjGDgw3e!ZD5m|R=W|~=Pi8ZN% zFy#CieJ^{bW<(=4k&MTSd_`fTAd%Zu&k^v%a~F>rcUfE$9=Q}PbdWq7IT|@mmQ(a( zd)63rx3DHJQ>ka}_M_oFnNs2bt|!zuWI42O3bEt>y)G5qS+mn(v=J$dZ3MieS}>M%cE*EZy}1lc%wTeanGNN;BDa zZ%)-zq&gEH=_${ETsL#Zk)K`#-(FvuXNMq8dm(bSCoqgYZx(ez{gWgC;;$1AP%~18 zA_+AB8x3mD{U4q@cXG8eb8@o#aeDp188p-phVt^i-HOyzlsdq~jd)if;O8mN$Vn#L zxtX@Kz7k>gRIpF6F!K5y&e7bhGt?PX;>K`-d+zjiKRe+kT*M$dYQ2{iN{Z=4X1^l> zbm~9o$G|q*M$iSy0m(?WHoq2b;}O1bBdZmUMRKHrgjP@4n5jata+zcfYlq-`Pd|Y@ zc%Hg_zSWDuOkc&a|9P;+79y~sx|-n)yVc5dt@P9}*4sc&`oRM#90gGVPr%^%7sh+1 zt+L11`wf>?DxRP0TC>jaS7-~B=nDA7yy5N)AxJrTKlWI+m3PVYT00BwQWw4K%~Sgv zeTO?%%RSvAbEuX6CuH2GjF-t#aA8A*J4C2AP>}g6Sh6+xVu!)R4H}&f^W4=g8B&;w z$MuObkZ9v(#y5>wVo3|%g~TkY?4N|&H*hSMfo>|@j?m|w5$h|=(q4Wn#EoFL6;NR( zf$MVh7iT}Oe#_#-zK4c?!!m+xLjmfg)#zI(s#86uQ_qTR;;oQEJjtXWY8Q<4uo3}= zmRG7uw|w3voqK>bEhvSFh!s^?dxd4Y?hmO+G#TI4=Gkj4Q^h00B4!0)a?A;q9o>Fo z#T_s_e{3jXs$Gp;8lwo;AX0o~)#+tj6co``6*bZMGJJ~HhBLW**0CLw8JmeL_bun^ zDa%}WS{tu8O;kZ0T%)P~3Z&2is^!34dGi(q+Ex*oT#T)+*6yAE;8m%3geH z!3-}Ax0uJTegv6}X&TIi(I8jI)vDzF=M1a^ z_dWMxU8EtIVqMa}@YR|1`gia7d*2n6cfwMHc&P}@48|UN;Ttfztc{9(W$e19WP?9? zsq{8^fU69+y~Lf6)x(!W_}S*Z`HQY6?@p+->D8K&X`mfNnZ+*1!neOfP#CsaYZ>YZ z7`2&z{(~Sl8&g+H3k`QyTSsfRAA63)z)V3K7{6r?*7}^Sc5IYrnZcov*c;u)Kb^Ci zeAJyQd3{QsM7YH8%8xO4y#03NW~5lRGNHB-25x?@M|NRQ-&w^eS|@29p#zKit_(cr z?Epu4UYXKX2Fb>;p4*5CZo|i0>dBWY;z0HZWB%u2@HaFv3J4>b@?mxvbgh+u4oP7i zNCN@%vZ|W4k(|DW!s$pvd{+ei6#s}aTvx?bET{i<)pd4X_Z9G0ENRSWZdAGf0Ni5w zkre#K5_cO*2TShXuJ?ZoaZ69hiAIpP>E^m0!*RM!8FM!e(~Sg(K@ZN@1ahE?MJe{!sck{I&jx zOs83|*I;+$;A~OEjg_xCfEd1Iu{V#7=6&D$I5117FI0q3sVRt2l2T3U!LA!qpjw@G z7_rwiYD2$D7HL-1l`u#{!&f}gV?eV z-^B@VHf<7GCtrGowK^1J`x5iGWayHGw3)!mm^bRG%b|f$mKe`|sc2hU z*0erPfLtupI8`)noG^H{i)FFbr*V25G3nabNdx(4 zy`Ph(aBzxHL}>7b=Bwos?uRnXV%M8T!vmL4-Hz+a8M%kwPxI-q&-e$gN7e>>J@>Xo zZqi?$_iGS&8_qkD%x2=m*o35J_@w(gc7cnRv+u2Dt#>3n0108u(v5PGi?pOMT8SG3 z)pWraGAd5}t8auv7TqYNWZTp|GfEX|armSgg!cjM+eRfW#W(yRWOEHA$mN2P{$kW| z@r#RtU9_aii|Di}A6}NoBpCjCT)8{UxEs$eUlJ2dNGsl|cN~T|_gvtLW#49!5RiQw(+xMB3p=PqQ{}|)T{Kxl4Si;X@qKT6{gLjBmllBDY*iuh*o;kOLV$HSP zJ;w0_gG@=rVsS-pk#s#Sl*>LG+`|1_J%0$JEJz;Pp0ZD>k=7d1dB;=$#)7&k(V>O#ApU)~MZ<%U1AT7sOcDTXATc z@un>kXKA`y&PDpZq*$8ZL{`q6SU9s$7Q3CbMh?5qulu%Skac7~tJGXXW`RAavB^xQ zW6|ctYx(sEcV6DKTz$<4j4%l%aq0B!h2%Eg4%@&^j!z=<9<1)BP6r7?h7@$<0U{tT z@s~G0JbuQ!%ZB;fj$*8lQlvhiDe($=3%A?vX3u?OU%Tf4sSR?P4>HbepF(K$2|dn# z!z8rIdxb)t_C!tou<0oh%c!m?rGiXQLU5T+7)NDpbPtS6C_22S`QZWGR9V_K)JiL% zpV#LT$8>W^{X|QUdsGXY$FQ#WO-kV^%jtliV7cpAK~C*c+hRTHIk|D;p&{iuN*iYu zGCJNJ1*=Hd`Rh^)ehUkwH?N|CiBgPc=-xbjoh%pG?Wc>Ms7J5U(khEiLw%>_+`r(I zp^FMXq}K{c)syOR=W!SF$MBElypNczp((9wFJhvTU9^u5`zF=QD*Iu_d+IK|!dB+l zp&##fRD6!LavtnVaBP?|v%EK=TwYrcA7PVg?yN~uSfnCacBR7)6{=$KzJL4l6V^TM z_7dGbLN`8}53tE^qjI;a*K@c>WO?9DZPRjkJGS)k5pj=7d2(KkwqpgQ>!dGbwLRtU zKfa}1|1vJLPs@r!aDTQQYRi|K231(wPIkAjy)UdAs=6tOsM8^Uye#+n9D>Gaz(NHz z+}yeKkx6I;JHEu!&ygp^ISpFAO4P^maT+o69`tNef+c&hg{Lah)M$mVMV(hwV4qvN zwJ0&i|2?&#a%c`YHoRE{wb-7nnNp3Se!HXDi>jub~E1!Wy>aG+e-<#2!4bux35Pg!= zH&e&BlA6Zl5jTc_LkZnne5%Kp1T6i%&x|~u!!{g1sfg9IZ%{+z(8J-4PqtewZmV2t z%ZpR>183oB`s1Y@rNc)_)Un6*p_|7xhm7X5=2zlbr>vj`7S$Zy$7edd71vEmcN~I}y1LFkPj+|D)q1~Gi!0Z5 zK9s36Rm>F<#R0!7gD}UXfc$C)8mS(2k?N8b-%;6%r1nnmxs0Z)UpjPD%9jN&s)SaL zj^fP;b|&DOscNb)r)nzVnVA*}Hs$j(=eQ(do@6m3od<$n!Gd3~U17-(e;@y1lqsXa zq0jqitGK;Q9u!s>q4}mrIC~OKrk){?x@kk7$=is_q>~x{I9?15Vys1eZ~d#m+SN2` zCkc+pTnMPkm-y&~^(%WBt|G}7PqTsOyR*UPafPJe!#Iz8=(#mFgf*9CZ1ju5K=-wfz zIH_SsMew^hl9Pn*c)2a(-`Wd$Ya$_?2%&G?846GeKaegZ0e!{SMOW4Pdn4nr+SSoj)f#XyV(XukqXtpQgF+tF(=w9N<=qZYn|115ss&Y z7*}CaLAkSvU!CcF>|WS5 zH)3E7yR9s>;|@^>yLj=#;aGk%-`zS(wccmMsQV9T;cfY0lKL=*d5>1^r+i9#QGF}# z&o+6Y8gxriJ>xW}VF%FK{l%V6*UH{CNsz%MZGXXalIDy7uroOB#R7QX5V8eXoEc15 z9L!Z~hDk&p3!QlK5qw$#@9qyX9I|Ne99RtxkM}Ks^;B5<`h=q7YP)w*BLe8*7Zc;~ zy*3_6;W$|FF@6L>g4F&R65MRv_VCKqviX;M5f3oDwnSxg1Sq~x{)aC=GCDUKOG|e* z?%%rK0@l~BRiY-r#PFT?b9z+%gcDH0!KBN{@I_3spvu^KN%^HL?fa^;^OIX5c+8pc z>p=T~K-Ikd)U6qXNTVz6Q)5>vc>~n^0(1rp3ElM4RSZ zR%WP_v(DY7ILMT?jC3c*eu_HZY7xtsm}~6DcCqKYq5xYdMSKY>x8kei;V~5ipZ=4i z6Bn@AWzn{v;J&&uBK)qszpE*pSIU9LDd?TLKp)U*zE_nG#O_0-oQOkEp8R2e2U*2=+gXWIhRnt>@#Z(d{c1{JS{na=pmB3ItxU`-QubXIf>Qsj^=l%g zYGz+CoJ9p5`L1=fQIjo2Ar-~bPddzgO)$?k?T>C;$(9q!g&3_D0kgTL->L|5v;0~a zUL5#HTH~9PfG;@Synua7Xr5XftCAe6s;TNUoZ6ohXc(93s0Vjlw^PFE=vo`ZB7(Bz zR*0^C7+Ig5UUSM3*O=ehE$fO+jxJRGMgp zf+q<|pR#Z=hqyX9yK$R4xmx~SS@~C%LQS1_tfXBKm=eAd^!-MUp}Bj!K;DaIhZSum zkr6Xe`;`PEYzhKXe$9D^a{H~M2B&nqRB`~KRft8(s2{=|%-~4@fNe6-Kr!VcrJti(?{ z%jFcwg_62Ub0nAQ3+|+0mOdbw0bLW)mYmS%F^F2r+_<*er}#i0M$nrT{qfTITX!}C z9yKdps*n50Z0ZhvNW`Y=#dSvqYYhD(S^4$$z(+m7P~dXo$@T|i^Di}0Qw1F2SEwf> zsE~sAuNX9ScK#QFDD3^-Qe!2Zroi|wb}-KA@n>j+XCU|iMP~9(4oghYVymoGa?LZ# z?z}9D*&2(g8vHWhwHdJtgPzMM%+NkCK8zmhguGgL2LfW!*xfvihmOw}`jP`OYq2kNy|sYKYR`9sIqN_J`q*J`u%I<{`HT7Q8zMvrh+16$ Q02lT1M;+X6>3-b(AMX;Yj{pDw literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.ods b/pandas/tests/io/data/excel/testmultiindex.ods new file mode 100644 index 0000000000000000000000000000000000000000..dca8d70abdc24f87706d06dd52c47ef2f73b5f09 GIT binary patch literal 6504 zcmZ`;2Q*w=yB@vQ=%RPgjV@x8XffL8ZHUoZ^e%%@Mg$W=5Jaz0qZ1|isL>}#^pYS% z2?|MFTt?M7->LU3^8nykVXWUXT4fV4i*=zTQ4CI|pB97|c)q z)+P-A|1S$1fR9|^_g!!S02L|#;Lo3Y{es!MxTCl10=YUqh3+0d+Beh;c@Dg>^?z_G5R88B|28+sS*m|=rgIju% zca=3sm_=n3jAOJeq;Fm@k4J|&*QA5Fwjkx}0{Yd|b*q|2ajbp&{ny~^kDh(%?PF}L ztWB|rR&VwMhsX+{4=dg_#ZXy8TE}F5&W)Zu4rI5lj*~LEsQfiVuRJRcdAgHy6x1+> z@5L@0$p3^g0U8mPs~E*hzT@GoKAi=5q;%h+28Z!8#gH_i$P^!X2c*A~kkRDL$pXtr zXb4~K)&PlH>Nor8ugE*~U9AXf$gDigXktSAe%$W0wYyEKl0(FEV5D)Cc7|+JE0Xdk zM@?R6KDd9USvs1mjL$s8@jN1ATc!4>hT(w@u2JD^(`%PyPd%5Q;jkYaNkxo}UWibcq>}DGs4^8rp8aOo|f4rA@IO z(ZY&jYT$dlzzb3hnKEJLic;SMQhOmU6qo>u%aWgX%AD0NwSd-9pizZH0Vf$5C>A4_ z2JDdiyw^$__SJ32X28yhA08?P#X3>L>A4S(jn|DXxfA)~729F43(o%POsKTPa&y*; z-q@bcL%LnLsAoA`L_y;WFMqHRDo(@+*e+G@hE|TEq0_bghYKxeFSeqeV)w?KMC8$3 zUk|#vA@xjB={SA#wm4iVxCfDCVG+l|^oRH|8;jp-h%-7(TGSoIGTqLSdS{#B9M{K9 z3uuF6u@}Nqm$w!DWr#bxZ$e|pK$u;2>{p$ zbjYlut=DKWNDa&WdTE+MB;Q$<&dqyLON3|*2$l5?Hzilb#*4VfhG_H7QjuJ&gQexh>bQEOstg`a zarI6yjrSb;Lnx~Jv=ctlT{z|YH6J~tize@4{8>&lP|W2DMAxp6wGQf# z`iI`>ZgxNxPR_v$?vs zW#-a2_1AaIzPwy0M=+Js?r-#sbl&S!Ai6*{ye;qd&l8HntFKl3(LiF95YKsoj3GKA z^kUyO!lBamE=oNOfsxYE5k*gUZ&yP=3XR0FlEpO$(jGChm z?j9onTZVD0R9^+nQXhqB+2|~nE_S8_7bzRkiDOji8u4R`5T^D@ zpE9uZijpXaCbK=s`mkGbwdiZ^Nw>Y-zI=c=IJSKG;Pq8tVBq$zy{fD-{_%uE4kI1 zsMQl$&6L6Z^B-SMD(X-X%T;?E@L`O zWW;i**2iwC^3~S1>afSP@Rgu#yiGGc zvS4T)VViT$Re0b5Bq9;OCg8lwxgHY4>lA72tTaU zpRGKS&kD@=>@?)ORiQP}VC^)pE92dUfH~SKnZ6>t3E{ewx#JssmjChAg8a_2Y~?5v0+>txxU z&Y7z*$`5z?luwkyp8D=}Io9VCunR+rsItxZ@ObJuUPKz3c)wEjk|;EH-@#Sym*m*& zqYzgCZqj<*$&yAJ*0A$1XRcLsscpBjyMW^M5zo_xLXGK3s5ibe&`;NK1ATX^oAJg1 zJfw%SO&wPbG=#8HVnRB8-jD4Eq#(Zreo=^RP76ymY^(C@#YjiLZgs| ztZ-)HWpkj_dXHSG;(V@;5w8`Gxw5W?wSCXRNyNh*O%P-_?Zhq|QB%{x#v@exPM4v! ztW_*OMzm`xo_b%BUH4G@K&pBmU%r}&{H!OGN;N|Kyr#2NFEVm`o8&rCW~ZKNe(P1k zCF452h@|VCC`eP?rIn14*-kF}^5^P<8>uReHN+L<$FKva2c-)96q8UepKke-j zca$CroD6-IaIVw0i8bxLTLf=Ns%ub+5r6|1>_==|S|l7b8wcl^rj-xHc~?ZqObImm zW~Yk2L|VA8SJ}~HLHSOa%@&WfMB|q(gg%*5 z+WOSN_J-y!6l8}JLhDG6r$RU!t0g}R^Sk_zg{eK<^}(civ*A~n0A5-z5Kz`9I$){b zg!)O~FAX*bE(!=)mm z6p_r?jeUEc+Lyri_pUduMXicW9d9@^Q;YGJ09t5puMHc4V7$^cdz)3e>Ioz#BzrbM zwQAL%5m2eEoID^Z`13hVop+mVqBfAgfWmfz%m}~jO4B-d815{uM%4(fDyq)c<)T>M zGhTX^IN4R}qm-a+QEtj;iC)1Br93Mrg{Ng!>Or(=Oz_c9tD#_3Z7Uv~(jZd--T*=K zftN4(JlbpuoJY1asrax#4YYi&gjx4cKdqw9;AAMFbJIO}Q>BkB8ujRldubD-5*ne< zB(3xbp@!q1FF;Jz!~L#cF|z*iwmbB(X2;}FoR!W+ZY1U)8)HP=+kp~`z>?f|<48KfluNf}KU`f>9|drnczdy3ViW4V)3BG;>vp};WG!SW zz3=sUuEW%JmaO9u#3sF>W^AaBgfMRBi*&VtIVnN}!qky!LU1JYnB%_s4GE1!vVF`$ z!CYe`q7|vpuflWnlosgfc}}9Fy-=-Nsofnq=aTE=vBV!$mnZYw;k`Ro)Y-QTRov~j zIaJHgpGkF3OwoUHhJKw#O$BnCW*8$N{a#~DTMPHFhGCaC5$^|*!7^tAlhv~}SlMPls$?d#nJ+E#!F6D_0Kij^-|xJNh@|hw z5BK$cGs6b*ZXW=hLO;I3OBF0A=yr(r>RO5s+e_FapL(^Qn2pEmvk<6%6FV}--OzMS zKt@+IQM70`wbYy5y&Hok1S8v#Cro^8gV%)m%}+_U9FgNia$4+Gs;q99)4{!kb*)5G zsEYnn0HB8=DI5n6grc_b&=z)6)B zDeX9J#gQMgJnSBV^<@`)mk}abeH?lEIq-wj93*<{l5Tt4MsMMh-^ofY}5e~nu*-^_iH95ZW4LM{ysotiFK z_BiE7iJNAXS!otlrIGlO#83wSl5m`zIdm|nKt4ZEvZ;$}KUmbVkzK3mZk zUlrwHc$^hN;Yww)x42l@whF`Fr38N7?@IL&3lEC@D15K-F8F?ij=XW%C=X+4A6~Sz z$o*`ti^6r5ChxvG2f?1*sI5BVPzg)}7H{7tzFBSu))H6hEZ1akcLfgP(cAB+Z|jRJ z_Bf1PPGZG&t|z~xT;)PmHZ5gk-UQmqGwD8TH0&%GF|F7NXij3EUnyyFNvg1z^fo02 zWs5D~_hL0Au`6^3P(Kx>B=J>^MrEbuE5dtw zJ1DKD@AQt`_}|WXe)oX+i5_n7tNU}d9W1qx^M(;d?MI)?v&8JPe`2XuWqoAt^KgTa zc;g|ro*nuH4o5ZojG_L0YiS#WB_a*2`(C7Y_{;_AZD-jTkZ%N-fA5Z3RncD+t zi6I{hXDIcfW7AZ-^Cq=Q)qeYW;%8V-qpC1I78SC2$YP{mqp8@^<zK+wgqY{Sa9 zaNj@^p;K1Cvg zH^rW{4iBLmDrDX?NoSXNLQLD77LGI0q~eV(d3WW0uV|-K(OdoF6WNJv0;~ebl1Uq; zZxY9NJLKxUjSo<#C1Ig=d3uCKljY-&%ZMELosm7NF@bq+@blB}X0 zEu+5<^t&KIbBpOnwrh(2#6!f5P<&swEa0U{fm*P zN&&gzh?AT};!>*aM8h2m$bw%Mld7(X>`C<0Ro=S@}z@g7Hk=H^a7ZgO}j z5zp@%0By0*fi+*Gxzv<6kxj|W=5x>7L-uTEqMYjHw;w30^6Y9{OTNqRj@Asl`c%yz zdOOj7r(~W+7E`BT0RZc_OYs-)4D+-5!^b=`)UCm=Q+Al{Mb@@F?hg3GuozKM5n)wf zGi?$ggS}6KP{wT;zdAp4N^8mpbO>||2w1q>4GF9UGlhc39+1D1w(rhcfziWbq=HU*P7)xdq>iE&({XCaaENFxk9eQ_)S>B16g=A_})!vL~ACn!g2vVpXtCL>$|JV+B^kv0T&1u6K zaF6QE7(d3YZBS037P-w++P>|NUhNK{v0!BE0}h%8L-9%vgHn^9Rkx#eaDuAxKhi-Y zYNTJQL`TR+D#-_4e{w@C9WTJF-v(rsV|#Nr$0B4Ugc!djB428lBWFhop_VUB17Aii z*kIseGK<%@{`r^Zu&{;yV)AcY3HW!-;LqE?%l{|{`Z|A5|Gz5mHv)3|__s#zPuxGr zyT5Upw;IF$qVoO;{AaZN4fMDTzW*3||1|N>UHZ3)Ai7(vRwCVr= literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xls b/pandas/tests/io/data/excel/testmultiindex.xls new file mode 100644 index 0000000000000000000000000000000000000000..c91698be29b137a5c5760a55ff18e60991602af0 GIT binary patch literal 40448 zcmeHQdvIJ=dH?QeS6V+}OOD@h!KC1Po#&tzwNOt*b|D z&)|u8_>YnR!KPs#E{{S=At{t3G<2GfWTpfCLloM=7(xa@CuHc*X-R0Og-O`{e&^h? z_px_ZyRzc3$=;oFch7gf^L^j>&i8%ianHT`gYQ?o{Dp6<{)PCCJyIfnNX?T{7u>?{ zPLnSeTu-H#HQych4N&p8hBPoQiiRwW$h*3~SN4L0q%3w-}gm^qPuLpmlWWwA)h z=)hRwRO~?f)YzHf$rG`+^k`Kw#4XD3JPhv+YmLlbwYNOP*F` zUR01BQYkM%lBhgHCkV-rGHb8IWmtx!PsVV*k{A}`c!r9ZwH(J$i(7nk8L?{;S9Phj zEMVl~5}py`C1^1>4eGSQCr74Em(GZ>dq#{sGhkTKl(V+1OXCa}0*u`DVZnxT>6H4I zKLd@Bte62qz$LpJtK}cTC3U~Sh*&Mkf@#f_;g#^P>qV+IRIRUGzrODFV>RYyW%x$; z-b&ZCcOOX%yvL1D3AbG3Ak@~m%Oh7AUJui)^kVp^D72ljRq8}~$$V=}Nzz-RWpT3h z)vB5*THB8jQ2m<(cX^6wFsmWp3DBEPai=ioP z^EX8mTX(q|n zy(~-NG$Q|#im7jXdOlW!QxkKq2MgAjK0=y4hoVaVPuW+(pHuqJg#UFG_*b*Qzm^65 zNEZ0lv%nwE0{`!AlYH zl^y;c5Y%Dpb4wO@br$&6KsY0fqW|AEdE37h74kwJ!$=5n4{?g;9DH_-%?T09BYo1 z+wn)xWvjRpMnN7nob-e69k{|S0N=~kp%VnJ5{}8#k(WHc^wldj{UANCzTiv8;gJ{L zOgce)-ydBn@25ehk8b(-g|_Wyqcw8Y11CZVlendr6jC0J!7vKMKx+|(u%XNXL@B5M zBrdD~1u2LF4XL04R9Qg<*f$nbfKF6U0X9lO1sIkJD!_PEPyq(Yf(o!_D5wBKTtNjG zLklXv;8#$Ara}ue7h0gD&;qT67T8;8fqjJ*Xe+cpd!YsP7h2#zp#=^WTHsKj1@0`g z09SqsvQ{o57gV6D&;r{EEl^!(fto@K)D~Lc_CgEn_7q@$ls2d zH8o`jMVMbG!fctaeyW9c!!m{1;t}fiXS1j~NT|8fl;0b7K`8Bw)P9Cgrdd)`R$5b; zHQSo58SS4(ph-Xa=%Z;O#s=n2-G0a112ME?vSw&2uMK3@;#Qz(qhT#FhWExXQ>p2U zT0HT@6B$NBnBQm!18sE}qjAGBrx4qL>(qjF;8E?_=v_X4yaNTo>F}CS3+kQKa|bnd z$Tj|$8MmrX%8A>^WHz#&a3&GX-y9+t(ox4Hk{EPLmq~=P`WcB#cjP9rCLbdH;P3aI z^&TR~1nY!zsO+(viyAXlCo!0t$clW3On2laa$`P3>Kk$s+2A39Ot4NkP0X$nE`;Tw zliWnONHHUk>5kk)aO-GhBJ~ZqiEQ)`K_*xyoC#;w2^WuMtWIJuHxb+`o0-UTM{Xi( z^C42-kekR&9wNvD>x7g2>^fQMA(Azoj5E>5kk)*5yN_zQIl8qtPw$Q7Xwk*rk-92V5u-^qiM2T}rKzQrys9 zgB3R*)@C3x>f;hil(gv0K`ikQHgf=cA+!W*h*(RhuiF@_iM90iCq^fBoR4Be zN-IkYzVh?`2xha?&t_*Po7x;~I2*CqL=%HA{^xIl*(~$3*_FwrJ_j2;YF0{{ml%BW z-+vO!W;xk3=C-u8VAGg~rA>EyYx9j zuyI@3bjOchc_x_6DnFY5OWT%%joZ?uJEot1F__J2Kbrtc+n$4s+tQ{xp8nadg4x{Q zXA@v)4LR7jEp58vjUPP~%w`SQ?8;m%;98yuprA3xEJKB8`$5qaeA?+GTl z(IEb3`wJ0Jf}Fwsp8QCNbX=nfxIck7G% z@5AjuM5D49y{ojw2hK-t^|riqL#jktv6?xM?twXA;{qf{NBqrIO1s6&v4ab@%=X5I z`zMFv6GO?-^U)7@35J%Z7Kq;P+AovXC>Y1kYeXl+pik-T= z$mJ3X$nIR!lbh$sT?j44Eo^93{c-|B!@c-5 zjE`F?u5_tEq1(`nR9x~ZO-rS7Mk<{%Qt8|UtO;+MDoLG>#=N3LO2A|?J}@+TZwzw(Ch&SMB5*Np!Y&RJx5R`0Ebz7@T?|BQ!ImIIuKq6Ey5(j zMI5ouPrM?4xQ>5Wh^Lyq{2a&dB;MdAju_}Cz9fLSj)Pf<>!CqN5pQ%8M=bOOgp%t6 zh%+AQ7AteBN3dJpyn<2k+B_=1v z;s`Ikz-+BSVD188)^rRu#lWIxG%B9rUW{RJ5jSjXgNI2?3EZ*4E2|}7*#%1zYhCbG5zgIj*Y~JWAzt8^W;`KP*-vyaX#ATB^sI!2O`l4NXfaj3B_C( zH!+Tj+D_!vITPb_&cryKyAWC+w_%zt-IEi_1Wx&wE(7UqrIbt~aQ6WWnI0n`z>!4f zj3hc|B+)rW(A%dXQ04}&2$2X{<}m6!mR<><4r#l)X}di`6T{Hf4sWTF#b~b{n9BrO zlKRRte~TpUP$ZHQGzs5gM9C^ef(qB>5LQ;^S#Y4WrH96W`h3tV@%H8p?QN}PG zZ=1Y=h87@n-wh69a5NLfg5oTDb*15tMc*kObFG?u0=28arw2s(RBew!VWOXYyY&aALhrJ#ljmI|(mgPrl%3opESrIMLgm9G)B*?H!GeB+j8uukpS( z0^Z1wedQcFv5_VMqXZ6(4kS*WgMgP>2@6YKa^mA~BYh^w}OF%~Jx{z>V_D zcM^D`1N0OHBVEsRGEWT&R=E&ICWj}6PC_G!**I0TEKX`mTu9?5hE9$pPvPovxr0`Y z8Y`7hTfL*nQ792pRK{-H;Ylm8H!*T@;!H0~eg+Nr8CL_ck$CX?i+Et{i>_;gKL{Oj z2joi39dIn=$v!%|_qKHO?rZ63>pj|bxaCMoPxq0P+@tDjALMU?c2946_mR$)o|O_K zxPmA9&?pDG?>dsk;axC3t&%=8PUpd{yTH#-TF1*v(l~}$XLnc6fi%)bSh*Dg4wg;o zo}=%u#ns9L5fHT@^bQ-c!jf3DAKj|~^iy@we#CPh1l^E< ztmNj(po$%B+uPl>FD-KxW%nHH^e8((_k@&zj@FQ7K}zG}%Ec>3c36yT0E zfZ|AfNUYib>M@v1;@lS>LnZrn@Kg^43=Q8myu<#k2WAUWEAUPv&ft7sd^m*$h{}y{ zYlJ@8i*Jsb`lHK{@WVcq+utF17*_VM>q_e{EqMg6M_e%QkOu*K&;=s_Oc_K!)>!U& ztnt0DP>}^8$2{3|zr~&Avj`~HsjCZd9>A7pJ^)yuAmxzzjjz1+FP(jdn|c*wt2%E} z=MUm+p}TiP*oZDmq}uubuDpN;a)giT$cGg6ht>HeU=OO})uv6Z-<#7^e$}+a_4^CT z9QZ&UKVhLR8r1QxP3BwrE@HxP(GdeYy~l2jCDm~kq=?E{)e>U;G|H`pI50%zL7nJx zG^989Mo=3Cv=$3t%=lr{tRIG!&wx=kZoD^DYheb$;fCm(g#kPdoeLpHjyhL@9C_(n z7&&rz7SEIsrwvJMXqVqKu~y;cX8HUFUunijFcej~TZ}WYNrTuLPnwCn5ZKSBu|v=_ zp~NN+MO)!T)p-f9Uru9}xUgAAVr#r|g}ofu@*yKhSYcao30_9$IxOvY0*jwLxL5JF zC@TVdbx?)}VC4^Z+IBwrkHu1ADbciHl$3lL)%t1l-slRs*M$rNF+zCt{M(datt zUibwzZd^N_!KxaMFyCQ##oTxXbwcY!%NF1n`FH{8k*?b_2%}H9VOEF`Z0t&VN*62% zaazGcmd3z?Uyh%d>mCDz?7zSc3u_&BX))%d_=UD z{SqCT1Lt*CeNKFjn)uUZ^p369y6=_FneS02cuxV6@x9W3Udaz*1NmX}M?Z{~>V|#W z?v+}wd~Vr36~b>G#>05UD@w>GGV|@*25PmKGHcbj~dU@8%QF+ox$7TtDG8%0LZi z8935+WC+{ky}g_+k2A{aUhWAfi@n@FX+|ormjiDcR5y<~?&W^F7vui+Yu(eZpg*U* zT%n$pk5`#q&h2TYmvh5(uVrEBn)b>?8h0;OlJ4c$uQ4uY{7AZ&GbQO?Xj~lH_ezj6 zKFe{m`x=+-%{A6Vb2R>y&#WtLJ}uB+J_79?AOWf4N7p zLF3-+BUwG})g#%bluzzKPdcmlBI_4wey z%ksg4$_Mqzi-jA4oagZ@&FKJT{NAQ%u$S`olp5RbZJKZ9I96l( zy^YxUdD~}gZ{z!MX0bZM;kxMoE$c1!Hpbc)xZJ+`Q5BZu`{gt?AwN z@!ouTEW1xhq_34=E<_X2xiE6a?Twr$@G#@hIT?S_ z^16?RH1u5S-Z=ZY<;VgR!F~4@*aMpJr+eOK#-Dx|8^;f$XZT?pf4X7Xk3GsA$9&bDp;Ef!y(i;yIyq@EDqUL#do~Ua<8ng36M}~Yo@fq6_n`IX3 zIqZqVJ8qjwan%j=#|*NxhBdS|W|>AEo{%x=BNks-hJq9xJ} zwVlT1CeBS~)kAafNzYEv{1MYbU$GS_Kg$%9{cK!6CIZoCptWFq64k( z#EFh{EMS~)UyC4jxp9Ky2F8gFHEu9{c409>OF{oOeRkn7Lf32dF~X4{-x$&AiV?FK zJ7mTPjvcOOjM(^IkyGz0#t1XVzPe*X=Vir+PEU+bdqLA!;`b}b zbk04m;LS)gBX{3b<}L~iXK%ef9ShiF?*|V(UMNB7{b_&aB9i-B#C46{>C7v_DC5j4 z*o%AT6?#n1GL)EJsp-1=>!2}%z6Y%DLu=1ud9GAwDOuk5XwaBJ-vicMsaK9ezdCzMO<`rbbc|{bTP{=*6;AD=C z&oie}<_>h35_*pAM6(5xV|a{p{A!L9ow5X5vkznD)-EyZ_2Gjx$MF~lA1vy}IffS_ z(0XIQ8B@>eO{zz4P%+nGg{M}%B#jG>j!dcj%DglH)pJKQ8$q85ox6k7dGMRM&UWxV zD*c$fwZ1iz4fD#vr@(HvS}4Vho8nQ_Kw1OFveSU#kGHV~7(dO?hDY}J{NW_d=1Ke* z;&oicC5DL2C$)zVr}>~?Eg*;0gMYl5K&~J0n=em_+mYQw{T{%Rz$A?p^whP6UGe)W z(*PgMoIo4jh6nh!%PIA#MScK}Vbo1?`ae)C9|BJ{x#QKO4AQEp}LV2pqrc{VsE|vxRF4TPE?}IY@7} z`{@$WPsvgT+{Er!Dt$u%U6$&H(X#z8uJX8H#!}UU(6&YkYbhkI?T=5m8oQ-6E>?E? zELKWLpXKzbP_&u5mzQ8sLqqjiPdT8ptfv6ojrRbofq5v)QdMdOln1F~T9*CPslreK z@z)GCrICy0t5gGjNb#{?_tZiQN^^Vp+~;2ZUH9?w$3792ZJVC_NgZHcLgJzzuQwyn z0P)9I4B#V< zToX8q#Kj}NKd0T9_d`r>p%H#>Mji5dp}j}obu*{pGtD?n?BXc2c60|ulSUOP!ttSQP@Ct zC&x~VA5SDE#<598>yT|_)>LBn&GzO0^08Qpo#$`1AB*=TlPC614h?u3a5FCM>Km*d zY#7*Cvtyt!QPUW&tFMX2`*zgy??~Lzcx&CRzW!ZtpcRjz28tReYM`irq6Uf@C~Ban zfuaVA8YpU@sDZam1Dw-w4##;O@5ghq2zP?;9zXBtb6(HQCY&O2e$Kfb=hvL$b3V`i z72rL8&bOB$EkokGo^$_|Nc@Xq&Sm)*w)~fG{;M^ooNJL{Nb8WQkk%t@K-!3O6VfIm zP93)(ZAIFKRE<=F#BIsjk?N3kAk`x^AT=U!+uAOqo00G@18Tl{8@~CMA%DPY(D6=l zyrMwA&aWBYEl90Mdy#m(4d3lZe9Y+p(m|v{NOvOf`kP4n{$n|gsI9_$yr)fV3FlvW z6>=<>hY-PWh%IAOqff8#COY5tTU--7Y` zG-d>w9r-^v{LePlo&Rh#sAM9`am;Y{(qx0b4Ye$mUq)i$0KN=S% O&n>g4|2{Id`Tsxm>~s|X literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsb b/pandas/tests/io/data/excel/testmultiindex.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..a693e0c66afc20f6aeb9bea017eafb5c834b25d4 GIT binary patch literal 23620 zcmeIa1yq&Ww>P}$?(Rdsy1Tn1q#LA>?go(#r8|{I2|*eGr5i!{ww!Y=$9>TM zdB5?FcZ@sk-D7Mv@a*-Q&zfu2nscs4NfrVU695f>1pokKfNd={X9{otpb-TCzyQF4 z>xeqoyPDa%8mM|Ynz`sPdDz*K=0k$h<^q6U`rp_8WDc~ZJa))nLG@pv*%czyr*WLL z)6)q#G5S*2W}EcEYOK-+uC)J^Z3)GVFHHReJ{5-rrG6mng3ouvp^uD}8QN5e%idHgRf>QD6wJN7#)aI97#W)dD0W~0+HR!S*I&-#93W8x04B10I?Mp!X??P>m7u(R2sLyaq*-hAxx%ZXy$u6 zwp!tWwi+CHX98ZPb(AR7MyW>kMISx3T6$~i1``EntNK`6AYeN3%Pae zb}vGo5S)w3T1w$YeF^-2F~yTLILARwDVI1nU?nD%N*k{@6&}a(m^`Nhjm#)tZ1j(h zb~y@>Zm|g6KR*<1{~ETKN*(8MqAx7t&u@JG< zTbNdB+FeVZH~T0oSfxuggpr|yjMp((!stf!0FDmx?uCiXrrsfIL` z$sHeFz4#iD(c{I_Ir8#1M_}CsDEjgOa|#H=ZeL^SzHS|V@^rAqFL0FoY&cPb@c=Lo ztWcnT5jQ8oc8Rb4cH9_U_xaPI4cN&mauXLG`TFMUt?LFuR%yoPkm6v>$KG^IV2o8T z>9Lk&7xET_%RwR?G@m2eoHo0gZWOVfgh%Yi@hqk1co-frzElMA;-8u`!-fA276Jg! z1L_sfF#|koncW?nZHyfpY?zF#?4wk5Y*Zehe&?#-lxe7euMm+nuoW+@Z<^7OE>JGo z9>I*pM3%<4VQq^O`#>%t_GsxuzIUr-Ipap`fM(Wx3F@19!jy}10w&I<%(=PC_Oz_5 zj~dWw7{L}7JWHZzp*NaqUQtg`$;4iS0tLhJdI`ofBhxuo~1l^a8u zVJfYS!Dh)tH0=pdC*7X1 z%~Yun7*%#7r+Bjgs=%qt*P(6}<-1e!kdfBAJMvlrUy-7Xtl;RQigMTFb5jFb(^@D# z7IaXrQ{skk=sut=iGJ0GU8nhBAd0DCi+V~#wt}#hjIJ*R-4}Y~k)lNHWHzDOooR8J|~llMU?P0&8HQPBF=DBPzL1!U*OzBW5R- z2%@NhCH6B(H>*1da+a4?gW`qLxfi>Ad~tYn)Zc>NoW-^9_1g@QS^N7ji-$tGOSMEL zcK2z7ic}Gm)nahFCdXj~#sCya0>_?q(zSyf2hU9H<9_QWd*p}E1r9fvDoFpGyC>{L zdwc-4cc_}q(YuK(setg-R;`|ogUA%js^42Vx~dfszlZj1YC!$Ji{3g9y91z3{8MQZ4Cu49f$ru24gkOhz=8pl<`0kach&jJ`+xx> zTHts8XTMsLhpc;8P=)tHz6VXTB`5Vs%OvW`kj-#?1H30@?oE!8&^tQEvUkFlqQ)tR zd_*B|>``mpy6F2_{sa!&rhp2=shfN}ZOBGX=IoW9uVyW5)?|})6!cJxzimrXhfarZ zL8c$v%jTXziwRn-4c+rWUURH^c$2AsGyag83TIy{*~J$-xiZVoDW#jf6LfoW|lc72OdpdX%BCw%8+;hW@TN&=C=t1Po?eVcyfo^UHe zeAXgIk*PSA$WCkQL_AhFISOjZ(NgDXW$Ysji*HnYQz_oq#g-y1dG8BNvN4-Y*7)B! zw)o8?BC?ua=A~uAhK?KfpJ{^ zmOl~o!^6?!tSpme0~D7()5b^bLFMllNnn!qQeV4OIj+L zrKV1vPmn4^K~nOr8CEG7n*lECqV&hsT~7T|v$5iwi7q-m2m7qMEF}(SL6k2PxjI&Q z`!Y#=;T2Sy%G#dF0=a?R+UFDXRDAc6!S(#(^iU)vGTkn694V-`rGtWy^S)B(;hz}V z)Pxt4HPkBBz0KPBethIaX8WqN=LG1R|GCT^2t-LTz-7(^Ud6qC znb~jMcYKRXpD=3Z&D?q_7B(EVKK=v2q>yC@$_s{7mtG3^51yh32qb;eAC#dGuw<8n z&zD9h2`ILN+4yH1EZhtnncki^J&GnZs2FdIOQ7Ezd)w<57HFc?G89q-0<;Z-ADti`59EN~_u41UPm^-g>>Fr7^gA!$VU`lSO%NGU0m7ub%Xo zR*L$bk?nD43dGI3)VNg8MVu*CBoyqp@%uFHJ1Q4ZVr#=Z=vBn$c2TP80OMJz zdYtz--%4GkxH{?KDJFXt_q}RXnML%d3wVVPxCRve;Qt@K;8xjgSK-zZ*p79{^s=Br zFSji72RF^TVu=-Q2g2lOXptDo728#(!KIPz&Szv)dR$vvbQpymXLB8kix*S}smuhC zouQL6BQG*nxhophlfx_o4|P3)-#k1s_b5QJd9aONmtI`Wr)e`XHeF^wm*jun zQ2k01J7dCcRphLFpTzFcuG7OTosNfAg(skiuu1Q`Ny%1Z>2SkwLiw=E$;}J5C4F)e zUTWleZiKfY@4E)Z4)Sckc6erA=JY$<%K`sN(i&gj?B#%l<`)vkaVvq(`4xcE4!v=z z%^=mH&0n5S>Qcv)A|yq#c$7)e0!z8&0S6Bs1IYAGPDhWU1w3|!IM_Gkieb={s2yO> zzR1AiH|cu8AXo^E)$09PWK9mfjEsf`g8K|AtoCV=ha|t6!aih-tkzmPobKd@ynQK0 zQcV*0VS&%b3y3V;UdB1jqyx}GT~0Vk++dm$CO*Nj&cvUnc^NC&WR+GVK8LQ$g{hlc zxxDXnDK3(h!+W|T2d$WEnvCdI_IhyptZ}j}yr?ZoGwn%^{L8^|b;+tjEN3Y1r2Z*_ z2Nh9O2}F(At!mqupUBYCwUo*08~MkR>5}<+y8}4fJE3QWJIVSBYnmKhP9I%mnRjmC^yqQfC$0mnvc;n#&9YC&PGdEdDF~q~kJ@g~ zr~^svT{DeNQNBXpnsxtr&3ON7&4dv{{S<3W)t@Pebrs|VR?x=r2$PQn@^9K2D~v-VjD|IzL}RqW z_;?IoT&f<7qhKU%ybIqLbYA<^9sLfeMlM1-S!FYZ)tFahO3at%t5r?}y^q4DRd7kK z#0_me3h(~o5% zg2we0OEzU2Yo7RNWsvVBLeFNzJmrM^na?knIel@#}J5_4icQ@MJB@J{+zW5TF5~i&2 zkmQ{D^V6DK1dMeDNjxKl3+Vg&!Ft#%rUh_izyI1FaQ@--f!g@T%4U#?|2u6IiZ~`E z*GzZ1#`PQJNrP7~+a_}zT-j(r$G1pOj&6Jr+ZYa6ax_5jWD1D9+KZKJ*G zmYVU?9Y&PR!2&51a|rz4npVy#wffdmSW3oOADHP5X&V#N{v#xYy-sZH>KQ-7vk0Va z&GvvIqFG~v(6&*|Y*?mlJHMjSDA_OjZCxCj_Tqj#%f;6nTNcyIIFh{?Ef}qkOnolZ zOcXaa zP62PsvE*7-<6s9f>De1(^){PFLVRqA8)DN2OT-NBDbx1cAGq03S}KUopmOfDa^PE@ zb@sp41+Kqzq31_zq{SbBhFweR!J!)W3_Jp2c9kLAC6zQTca4gcmIvGsT-#bkI3FN3 zgfw{Fl06&q7}v^OAU4b4Y{jRmi9x6j8LIVKU1Uv!t&Er^Npm1?S##Q+e803<>Is}isKk_rFGk^Gv_8`QkQu()R#{4+kmc%BqM=~3^Q2~$ z`jSgJQD%I&&x0+sgAzv$Bd-#IA8St!2AdT5(3scrLg;NO_e(+fI;SLHFkp_>ea6+$ zDeqYmkqO(RzB1-HxKKL|#L^JJZYJ`EFcs1+ab^}p2!-vs-#K~aOm5k3qSRZpJ=ZLj`L2;_s!i47SH!{UrzyCue zn63?U#rG2|AsJrZ}VN>iXp#8`yK8zSCk zuU1K>*IxtIsQowF6qY|+QJYM!FzSE>SCcGq4;sU@W%&s`LRUB2g!%ywyxdTQ+jFns`>fr_^&e3aG2V%?;fcx)x~(rh*u>MmIy@q1d~B_E2vd|F!^AVZpD_yWj2s!r4*LkW@!>k zTc|})Yd|=-0G2*n>miLt^+;($ka5(?1GI)Or|;z7Pw1UL1kx@?eb^D zPFs^f<4TvFHClG|6EEvCLt=00EDP1?~p3xt_HJYqgT_&WvK=Do@UlX3NE^&F6BcE?oxf}!?ieA zHmgVNTWA*uAGZnJY_{kP8Tetmn4?Nyl9KzU(mGNfrfi41rk{-0NTM(IT_P5>C8a#z68#7bf90dt{#YYkg&rl;Pzzfc zH%bI8Oa>&Z!hGRXz$+evRYGcDI6wQ3(C|Dn%uWuLBHF_5hGFQ{o0`!j@c_)#Ef+al znsg{?bR5*}v<0=fnW9L?9q0V&ee)it%D26A1%%2bw);JPxwR#=lNxa5g#z+{*@zpQ zawH6N@oF;b)e5vPA+=5%nh{gXB2KVNG$Gd7>!gOGC`{xn;T|Z|Oq5OpGbcN&6AX-9 zz$n;0ae_xKRmxNj6=%j$CxUo8z~@m9^(`=LzN#u6{cT&KzrLcTmG=WAv3g{DFiUW4 zbF$B$v(m`|GO*;=DNq*5ld!M?<{y02jnRK*xeQhhG4doAxt{I)h1B=NLF#hcxQ-~P z5B_a-U2Zg=G?ieu6r3#jPXs(y3!wz({e82+cR$s><{!UFZEO!v1NLYK~5e|hY z0Ywl!0R;pGiD-{1GPOE1XK2Z=04ak^Q!BQuj59!2Lx!~>=Mp3F*HZbzFduogl@{-aCbHi719X=Z1} zeEXC2N0-8m_J{+4IA#aMp-|Krat#K*e10>n+fFE#RlzLdDvy4wdr8_yt&ynbUqe4o zprhxLGD$7)kmf2saH8{*zeFWbzJXdCE}vmPY?POQ1W%9|+1uIo8NNz??cj2nb%iH7 zASU(ZSY6ex1)D5kz+3FN5bGn}$>N9bI8`1JqkfmkXT@R?`?_Ao(Jwt@izq2zw_KV+ zbRL(f)-=?Tn`rhQ+3Cj-W=A&-RtM_Dmq$T91Z&6B(W-bqt-&UD47LNfN#>3|nL zKkpe6qw%my1H&|%cWfjvfFsUG+@-J>3U3WJzcHvTFPBFIYP2ah&pFcM%{R&`(RCMaBwF>?zKP5Sk%m+*a}0hvp@SMd~cMO9EGNS z^u{($sQr)K#R?H87O<-g2DZ6bRqJx+&*wOf<3%P4P{#y^n<5uhB(pFr#9`-4 zJU<&RB}t;@;}SVy1@5~DiJk~vfNjct^hrS<`&h*U>2A6Df&&Hhn<+VXd#getPRBPo z0u%Hywt4bn_%B@V4wPK$X(Dhwcm=a~2TBI#^6x`)SRQxRUFvQV(5WWrp~IIG%*Lwu zPr1g*Fq>XcuE?IuJ+y&m! zzukNB&;oDIVD{tAvb3-8val9|UdqUbnG43Z&5R7QChxgP1{(n;opf03=(owmnPs|i zvh4fDxm^>Y(EKZ`$Jw^qjTUlvUtZEtcumTAQ?|}VJ*_U?9xi>Zy4FD+BI`3806H7=;7x_KK2xUCg7qR?6*;Kr=4N`~bu!!fi?4!lmzH=sO#tmbVZl3bY zW1+fO1O;lw)Gyc{*`#-Ag;eO#rFAdrOKR#pcw%n#q#%78SHS9hazn%j2d-oqJ_wtNqN_G8l5aTwGqU3c-D5ae3`#Fk7tw15frIIr#+= z(s3ywSh9_@`ptXl)YE(})KUqHDPuc`z!j1y@SUxeS<{e?dP2a z-UD~{hg@k!D!xQqNF&4Flb)%0u?VR#wvpvzwe3lq1bJq>Pc?&rb%_ZaVcRuO32`BA zcxBAEAKotMje_FdpAu3#}b_(K{MNmIjw6S1rv>tH0IH_eUTh)Rz_pf zZgwwTbk&upU?%t1;Q;OO@j2dJj?%%3pyLU0tGaw@0(I(gMquZ-=(#+x@R3eL=*frs zxA;#pO3?X%5sd;A0D$y|lXh|Sv^8`25up~U$}3_%LiJ()$l$^#P1)Ov@v) z8O^M+?6Mb6)!uJ^?n8AbM*?KhGMNaSU-simf?+Z3KG$8_8eps>b0F$wn9+&EnZ}nO z5^9!cBF#nJnnr%tWB=y3@!5Q8H>5f32CiX}!Y(09_5_VMqKvAT-nvmNuuHaDd38w_nXIA$ZOYRd12*n<&#vBu|cg1`$u?td3Q zP!9b5VbvtTWxU~RmoQG#Rfb≈l|1dKW1-A=@4BO94{l}qSch)d=#xwvOxc5Y96B*)@SrNkUo0k ziq4^%npeF0oFco#EiYbq=oRi#lRTqWSS!yN&-G*oRIp%;B`KF310aA;Y(_Yzq`et- z3A)_8&=a>kogvA7R!}C-P)6X*ja%&vRZYAi>nf*SglBewS&3%ziTS-xbT!%vOTmc# za(oYlhU&?~BV3aQQFgJls|T}aW^}!Z_lAP;ffvkHz$?|jH|C#GW@+SXW~$=qY-Mk8 zD{SdwgA*g2ot-ef-8+a;!t`kUQh}@qMp2m1M)uI=x#=9C-lW0K3I5v8aD#dAQvKP6 z5JO>$(SlfnARZ`jJSPjwLktgaE(I6HLe(KMLZnHpnL~Vmn8c!hHy%S63Ka#+p4=Cu zp1Hh!EdUH)&40b4{-eqpFZiSS60*c&L=+c74)2RkKl>xcMfoGg6)aBrXdovycbT^S zU827jSaaV@f;{_*BDXK_;~Nk7 zt2ZD+dqyn;aCA3YcFksqCF>hH&X$NjF{jZTX4>t;_^9H~e;<=sQy4cXitj+9N;hvP zq#_dR7rQ@++lyO?_7uqR@n3VC=jVhK@ADpUYy=@-#TAtSlb{9C3$1sRSa_%YRg^oe z*li1kum()>vb%~0C>L2+yU9G6f#R6KF$+AJ~yl#F*w4m;5 zGD-b58qjWX84_+k_h{SBhz4#2vwQRfo5>uVo!afr9;RFf<$bRYQTs?68Eczsqpj4-#Wz+L zu>W@E3qLL0Ao9$hsUc0kl%vP?SiSKJDlKnxH7slhS-Nden;HbDL~82Q<}_nuP3z%0 z18f1}z~k(153Pr?78 zEX1?eczM%(mXp(;`02NkDM#Rb^`AWg5f?n2|B3^*d7ZyF@b?<^_TD40bS4kfr%9(^ z{iYwFGt%N9t(F!ST>I99dTx-|bX0MO(X6+=?O3nwlBKD{7yO(@-d#MjxY>}ydQ`tO3`A9 zGJsGUN-M&_J{*9xvtSc3G9Sv7Jk8|VRG(OXTJD%Em|VlxcUr({uXqjKF1zNjDEnaT ztPg7;YU!nGjKH%KLo#-({xhS)590H$&&?@jMPa6e#EIF==%aRZG#3LH#j0Q9RUgQd zbkDYjU;%4lkUhSexQb9I+pm@oOI|a2?{qlThJMf5iiKYIdh@IfHw*Sq@F0B;sw#r+ zAi;Kz?~)mHJ6^M^0~6$ftXb@y>#`>E#tD+_fodv5W6-9jBg z5YYZB4&2s9{^URnFr))|DWVAX@YbmlTtHqi4J+vr@d1d`UONhW{$~{L+6Cv9QP&*4 z+%TYQ3fg^&d?sN8Q&I^dx)diZpfG{+ zIL&60N-J%1>qcd_y-lp&S&q=s0XsRQEGRZ-#bgG2)~QZ=%|eb$^r#%bJSu&d8;siq$d??7d0G#DNV-={SGW&nWgI|Vg{|$J+`CA?^y47?2J0AQ! zU>m!YgM-_U)gkdj=|{+#7x!iBHe_Y^9`qw*J^m50KELo4`DS+fve^Y2Cn&Vi>X8%6 zL^F%cMXO$HRc@hDb$x#)z8iKUVpz)eAPxEk%1GJ#GIX&XWtpdrGJ+$zJ_ zGSQy>VxKb1`&H(%S^toOdi*0xex=txjDwZ6Dl4!GH<;I@G}(ET5E;jXbVIIiml4?_2Y+JYv0+$Otl3lxl^ zL@IU80oetV#Wl9JPbeLPdT4cI`(qNu&;2IdxUZzWjo&cEg=(vl#@TKWBH=HhT(wA| zrXf8shY!Zb9B<@@l2>kI8lxy<#-Gov6&kKe?IX45NH>+6Un9w$ z`$6@6kW2^OxGszMyp@#QYcMBGW9jcXL|+%Ryy4E)<2|TE`qsl)+sIsaS#av5A!?!N zsw`%nFbWX{Xs49l#GQ;5$BB+jLRYnan{NHIJr^%6D@!ygq)v6XP`*EQ`FZtt1P92F zf2xcRWkl$|6PKSNs6Wky;g20oV$|a||0OOBNxclOqU>(P<;&KeJDf|kk342Hk3?tc zb9r!Un1=D&nKn}>N_A;9wW+E@v)D63H%q=h2Z~D~4o+Q}%piWU@+!YE7lPNDXffm` z?9xZ7)^h9;Y)c&M?XPyhrVJoL`BkFTMe*%mKGzOBz0BFol1?zAk4B4SlOGjOcG3~j z&8?5^I#OqpVaGw5Su#4Geh0~O;&J}&{!KZF-2<5}Ab0-bChxYb@qYJ-n)`q5K8d^j zpSe$!-@DH%)}<$?KxK&rzJZ6T!Au=Yl$;$LU6@UtxVSpl{duVRe~+L5g(Ndh&>@Hg zQ~X@!8%nbKKtC8f8Y*X;>uy|qU5$Jo`ureQed%Yvs)7@f{R`Ksm{Fg;uX=i&loem1 znT+a5QKd$rw@zzeQzn8QNvei{#5m033D1J@gUBH6MeCy*armApw>>i?TrwNfJpowtTFK)!nE5e4_q>1FF!A}`P3%>5p z%<0#M+*;R2SsC%vDYBDnDUwp6G>$NqpfxU!MvXPa-lVkid;@65>E%A*??E1f$!gvyA6DQB9|=i2RT4c zvvB--;qv~$P8OVSs#ILuVm8+MsZBwTT>UF*w?XD_sr3S>VT6nSjv7Uu-p!9=18V@SieK(fDFy+2~e ze?ij#%!wRGljzqpH6sgK{vFNRnC;)uWB}5{`ZZ0TKqXV2KUw?xdEUlIK$^yehDzYZ zbfgl{_~xg=d)}p}6cB9hpq?o}8LLY`hZ>Xm!t}ln4k88|s!{`6tM8N5*F`oVEWj)O zQC4qblAq@JldSd$GYkNw2P+&Z)t>_cn3K9kFVLLXg8mh)fAaJvT1~>keLyut?p5^n z0Lsq2v^+-9{TP5(M1k+$GxIir_-SI#SpQVQfM#J|R@8mS)f-`fqzIrFx@S^v9yeSv z{VPKM4FXUme<>WSN{^TS6~TY<@Sg-900}NaIAQ;e;J-H1w`rrF7VZC!p{@*N=VJmc zD=LuTKUxQYmc$x==glz=QZ1P1`nev0WFoX!3>q5}%ypD*tOJsV&|7rGB~ zg_7VEJ)zc|o9v_HR?}@56a6t{y*|krQJU{dhe3UXi|fwOdh6y2Z&+cZsGg@muKe5} zVmzrBvD9L?vOU|0I>R^KqbM`w>azH_JY+=e^%)I1jYMvIeb)lvdGx4FsY#_uw)zJ3 z$5BX2A|rCk=#!i?(UoaWiWUv~E@JEel18b^_zW1sYzq)Roahs9bQj^OUV2D?E>=~g zpnB-%{>wS#P`R{SB{E*r@D`&K69(DML)!^h^7sV`sgG6ll=;$K z={N~{((`QKr`ow@uUf4jYp|MV?Dc;^@AXMplFrLJ&DMO`V%;HoXx6&|eMNy0yy?9) z@qNKFmV`Kx>P$(`?-Wx3#+MH7*Z zd=Wi>dX}_zE#eoomu|$+uM6Sd&Nk4dEdOj8& z`W9>`FJ-Sg6dz-clO~V?P8`jXn9JH6z19L71Irh*)9)5&^pdCQz)zf=!rJfq6PH?3 z9wU@#dODs-#^)r;JZthXT~m+!jVDk|*X@y3mnpLoQpI~HbPy^HrMsm^lVGq<;m#0S z?LRz|-NaB|X$24HhtmIE{OV(9SF%^>E<^V-{vpnUiu7sXbWN>Trc1MhMI6AEmRK;? zT(IaKOLCnlpA9ai9-B?bG#pq8F*iEaMb70j{#Ij8K7yzAy7e(SqS;I@NwFk|WU{RF zmHD5()>D1s{sxzR`!H6ctD2$GUHrjI?cC(>LuJ%b6k!ih^MXO@9#RuFH>v*%SQ{-Cq zO^00lqnR_sWVwCn!<5uB_c)WsH)As2ffDdXRt5~55!fGc|6|VX#N?0P?s@2$lI+g_ zKR?Up?*O_$P5j%FjX(#2KFZ~8Xfm)s{O6Y7uLrw;4*vO(7I#AdfVc3sgMaaW3s4-; z2N2vLnE^Yse%_b-?QsO4FrWwA?_lIm?uEIH96?b)k7?hb45Nag{4LRbmsb9 z2RMob3V1ua+o%r|1@s*59ZDdu<@_EV-EI^?Q9#dp-l6DX-HUP?5Q3tB9!|VNxx~H~ zh4o%egC4rOgRumj4!>tgw`l~>n*lu*c87w9e=o`(+gVT;(DO!jFkcDog}F^KfTDmN ziMd0`BDxoa^>$AI3Iloq;|}HtaMN`UXISr*8PIbHcQD1Iw=kfm6hQ9-v}f`TK$i9v z;I4xbbSP*e+}+R``agz(w#9)WfcC51A;>+rp9w#DsX$Rcd$8_M+JXDndsum!&H+UM z?F_m@u?KEae@6M6QMfBSfdYay+S~y;GXDnfu8IQ+2->%D2j~pU4g7qEw^rs}!2|8E zxI;k(=E8nP`P)2irSCRX4tiUl4GVXGsKCtOF9Gk`7eE0)>;HFv=p4TRyz2%41q3ap z-vMF*_vpX8!@J@-C?IHc`VJ7A>oc$ATBVM_RBl`Tj>`R5wvuAhe!y_ zl>Hj<*YXTnh`d82<^3JvuLT;ktapb<$@e?NUpr#Z>en5jBLD9Ye=XghwWK>l6@lL% z-c^=B7v|^E%pDvsF)RoG+*7Cl1pv+T+(Bduf!+~lf*uqLG~a%Qg(UnNta}|JXg2*0 z%jzNzioi9aj8rvF;6bKvTwdU|)X= zcJC}flfrjkW@5jY<-Obo&G+76P5%z--bwyi!6n6ib6@wy@t}FyyZd_e2iCvI+JegM z&zZ&%7k)@SihCe;-~Atfl;y1^hFgq$CSlp7=e@Lh+EkweNsOphhVk!fGx(;> z+t*ZMOU+wO&B7PKjp`xtZLm^mx{R~&^+)B#dU7+fb4}iLmWNP_GMriWvaP+KBJ>T< zX|FGNAMOt?@;O&Bc`Iglrr?nK4z{NcHCrG=z$xz5ghW9sMZUA?9OG66q;qaD8ux*2 z0~gnz?7LZf_S{U7^0s3OlILH=;hF~9wbKUeY7^mJ!fvzE2ki&rXC{Iu zj7F~{*A_Mwo|T?|t|iYv}xu~d2Ex9ZwocyvM$D(FcTpsVT4|&~? zR`e!#oh~4BKR-c%bbsFe2lxMt!Tx{TdPTgn zBq$?N$YsDM(Rc^vD%ICnD|+E0X`^>w#c*Lcr9y0n~8P!J&vP6y3A zcs^qXuWv69c0~1EOB3-rWN2qXELO9jAIW=Utl>7CRGnf&ncS#-`A5jSnC4NEfB^xG z!T>2q2R`Kjh1SH2 zGKIonlA;>1vy5G|vW1UF9CDJN>ZH~-C|s+lF=pnNp_aFLNrUs?utcxaH6sd#wGPz;g7$!PFdlHQz6|2QfZ=~Pz<53>cWcD zjcK7b4C#6d5{fhZ$SG6RW>QC&dMRe(yEA#1>8_|ks$ETFSD~N2UopdDR*a%rWj0Hg z5xNa7-M~qmk{%^INei+~W#DbtfoWHQ#J1h z-1y9BR1KQsZttfL?n#Pk!xtaZ$|w~Y6b!H@B*aRfAWU;eJtyPRhH<^K6*Hb`tm`cC zy;K+7W=;##4!YB%@a>IqWx$O!(uj(KFt)3q^i-1&Pm)~O5iDIg{Ho>-XJ^pTjQw<= zg2JWQ!sv$~M$l>>y{L|?*XjplamdDJg8HQF)x5^uEoL0~7GEq2Z?ZFIc!j#w?+>FE z8YZc<|LJlEB-4!kc~LOE_37jV%Gx|cFNa*>+00w5DCWNTd60qg{zKm#ZewC6(ugD9arN%Z@sLx92$yY(J!PhIbo2mdIXvaO5^F=5vuOK(h3o0ls#8j7J*#wfOq_pu>LVdW2F9p^wx_-&R#?@17^~+CPa|+^EAQ69|78mIwN3byI zHe0`~beQ2EC%)BS0_?GO!8F$}McU{Ig?@zwpeo_OKAz_H2{<Aoe^D|pK#Bv-{`Y>hCreuNF(C0h255e&|kW<#wQ3hMB-U0RE z`-ahCMZH{~Q)}5-K+VxbehZ14KDss`dA55yS=j=G`=(JS6@(1Sx8G`}lvLYvd)pTc zZdOQTRE}O?1!OdI_Hl6qQdw_V6dMiQbB>c$wD%|SeH(#Uh>;D*8s&-o|aR#J?)w+Wkn+lp(>N{lUHR_#%%x50cU zHb!?g%r2&3sYzZaF`>aUofW+95w4EBZJ-I8v1PmNv;GX{pCYlpHB_!1pak0haf zQJbTgv9XgQ{a?Qr|8QJJlD6$n2DFe%@^fCo&Cn{o^shM=0vcs>_wp*p(Ouz!atr+mle51s6`>Xb6%V=JmqR4J^h*<*-}Y zmPrh`uBR#Ls}S!Z{jlV6%VHK#%{Zghpf>z!_5=NDEP%Dvv3!`-1^r8#H#1SYMYlnp zL{XNeqLDI*$J(OSmNc|GuRAc=VuD{4Vwd@2s>|D%I685FWEM3}240_6)<>n|sopGS z=Amy=6b5?$eyUD)&!{g!JjR30hu$~u;^hjqxjE82?lF|RRqto1t<5<|S9YfwUWzDY z28jk zM<9=zJT=T<$n`bJHN}yefPT)T3Ocbj#gUf)qZLV9v0qbivabtR7B!P{hu=VSpU#iM zoXthNIdS*(GNf4ixJ}V?1#yeam~Xg={lMT(p1dMdO$y}J| zU0Co_!%=g5JOpuqZxa@buVKe05VpsrG>sL>D&n_|_qn>k%Q#w^k94+oAvvxw@T%de z>&n3N;xN`1E7iRmhh=Tkp30vq(w~5M{&(L~VD;p247gGPD1-cmNN4&h(v!7qSN;|0 zLY?HgNICLi$~g-jjOOVN_SU$N6=e1%IA(t$o!th-ZYAmqKjF|&RkmSv`_~p98rVa= zV54@8obLNLLF!&+6>E&6*QT}^Tn>e8sy^_lBM@%+AHU{e2;zw`5FJEI9r$wNVe1vk zqPb<`@n~zD+@+~8M_MRq0yJyxjc6$*)5VRqgFAWeCB~_Y&$k&I#Bayrd zZ#^1klHjhJn`SpKpa7h-l@S+C7qT@^7+3&^*cWS}=F8ppfw|4f7wx%j-JpaSfLCtV z8IVmbT?d(1cSn;hx>XNa>8W~q`cZo>(~}Q2Q^c`zW-d+YZv#%n7TJousJ6|5{C3nm z0tuvS_>K#HUCulgbhL((Sy2q+g{sB_Sv2e~Op@p$M*hRq&tuqev`o^Ryizw6EmO9Y zJ97`%JQQ}_p5vDcPoFg?cctwN0&~D^$%5nXbAOzbpNPA5={SCxCq%KxS!`Oa4;{2 zU%9^%hpKdBXpuMVec5$biH)IkK+S|3j#7~2sT-a^HF|hMzggBCc6N}oVWcyq=P4$` zhX3no)3E;Af(;{r>042O!76*M6V#@q;%vJ1eCjz)MXh9$pZbtb;baF#tR|W(L>Zhs6FF9>~fg z2vq=hfc|HAF#j_Vq6;7$OnRg%YdB?HSBg_LL(Bvhoo zB(XLMSJa5Kw5D2J5*w2ev#KUY)5lI|40`fj(0TvqSDS-{6pj(@SDv9jbnpqW(ON@k zY;!60((0f$VR6lwmW5^9HtgGC!?t0y^?-pa6{SuyMWY4vJ2x}Tp;wsZ><%E)Mdr{Z z#B5`>sAyS}GV{SSz2N$uWkwF!YuijsNK=er%1;^`dOLOwdFAen(N8rA9k=Tn&$@lc z2^9?H1}(1dvs@y+>q|=@KHT^FbXrH?-V0Ihf+J+ba{9u?_6q$+8xbx<4)IYJ*_(+O z0}Bm>EA~^r6R_Qa%(Y0J$yPFLhA!7e#rCycT38#)mB^cp&$<=LJBGrLec zC`hf~!cG)KpCu)PAUYr&fBR(1hm*>D%gCfH$%p(|G+?3XHgi%*o*-RR|0sc*Unrpl zi9hIXjZv+q04(V!u@>bB(s_0t=TvqT5>Bre%&T-(6uvLK4fH!0LBo(I6NS2!=TVRS zGEg2&H5_?;acP`5Z(azH_~Wp7nX&myqXu0W5Kfy3+!3IN)}MnUxJt62wLqg3HtFv7 z@<9s7S~GyKy70|dsWQ~HKEY`1#gb=N6(;%Eq6X(UW~XKPZ`% z&+7IohAa#kmNAOghH~PzHE=rt9wPZ5abPy=r#*92Eusn4ZIHYSvpz^|p(FlmSKS3o zM!lHe2U`z&*1#}h3W$r`#jAqD+n2HP?!im2pY?hUEoyedb5QR!*)hmkcbGT^96>uW8P!pMY%)%eT_vVn7kD4h|rFq?K>N}DJWum)}(9ixu8 zdIDFWs43fn9nXbdft8`xGHG}1SNDPp5#^JECb|-qVvjuzLz7hr59)vQ#+c<0fDy+^ zUfJR!lz#at0x`a16n8gxmt(}KG2YG2SxI3cdCY6twmC)3#EAG|UZP8FM`{|ve zhikI(z(7wSz{GaSnlc$~phyL4&0#t}=6zhgyq*=g@*SUH)(tOTyz1gkDqX(G1tXHr zZxu`YFw3k%Sj#NMGEW+9z&<9g(E~IbyDFdkTz($hDQV}97On8M&}_v)y8bvKQ$X__ zm^f;QG5sF1=c(Md#IxeUjZ~j=A)+O2I?yVj_DZwd=24Q|FBu&CcU_e}j@t;&BS^_= z?xz>Z_(G$;^W30*lNn87l2tU(88j*LAW|Or9Rekci#nYV)q}9bbSS_;(Eb4kwB`1Q12XefB3W-)-YmVM#8;@Lce}&}?-`4W1xW2;Z#F{qAUcxS!X`E;8jl9%U9PXx zj2K_`xx7uTF3TdA9lf00AD*M)DX0nisHh_5xI~(4nzactgsk7a=#SL7+M1L&E->3v zK?(G+Akn~R&C6i=P0mJ$GL6JyILqg&m+pKomHCECFSU{V zIu#zy1*xTyuhN9mP=S&1qm$nf7>xE*P41fGpwS~x#O}uc@iTx%{v8}8*e>R`0C2Sb zXK;M`n_x8m*lC~a`cJ<%d>eljVZH^evU7cao4z^2Oi%*{xM}M@9^n9P-c0mWxbrE9_p1k6`d3-1HRa}QJlbj0q@yCQPdeYN z9UU7AWqd-XT0We@7aAy%<>@by*^l+0vJ=7)=G$`UdvdD?eO%N2s4*_k);yB0xnWFxB{cW zn7P~#aFyn9aNjGvLt?@OL+|LkvxRXc^6w8Skb;Ctf+kZ$3Hh;m zEJi#*-Po9G1F+)9=Ce2Klt34Pz|-_87&(ENAipv|LXfnz_vCo(2a6{MHkneYjIt8* zaQ1dOrhAIeCYYVVF@JX0FKb$!aay)j%himC^Wm>IK=PhFj`ncnX1yUg<8jHkicO+0 zN7XetTHg1bVoZpYgC6Y-+-r5#16(xj*i-NBw!eY$SKZ0`Z=iI7n1}mO z!H+G5iqF-@0Hl}!IzwG%!9x9KrzNL9*gPGz#8Wd5?(3STw~*JPlImiX9lv@4;gn09 zaC!!M{W1x_j4v2P(-7G!ZiTaw@J|WlwB5b8J#|0j|3>*rt7j$@(5TQDw;SdqX+dMP z7!M&LF=$D%9cPO>;}8J*uAt4)c{-VaJqLjm5~kbNGa7$DV)*=vEQ*CBLrJaoWgxk6 zr(fl8X?h`*_hLnS^5hG}Vtw9?dcxQ8$^201K6V||oYxUpI~#8Mr{}BetfHCg87bBY z4%CXoZw#C!6Nb-Cla+zD(dHGORM-H&v8<@voi*ya8sA>Md*fMqj5A__mnMM`<5Dj+ zZTsjZwRKXOz-e!K;^gwiZ>}X*Qmp`R8)RnN!sUR{Q^o5cBq^l5i*2ck7V|?11il_N z1}0dk*e~q9Uc2Q4($X?8?>hmrW8HM6bf6EC)L*AF1vC<(hX&Yk+SUi*j2EJ zb9YRLx@UL>e8{faK5_vzP3WJ{d9B=eI~>GcLw$msmwl?f_MsFrnnJ=_T=tY{RJg=r zOz3zpM>z&-Q`iLnK_r3{T+F$jo$22{Bir+etm%ABHVpFoF+2!$iE^zUc}0(0&Sr1N0W=GBl&5=7YLuV(N@Zm_S<*Xv_2L;T5H8C1o1BeA z{(PFPD9DrJ*Vh?o*A&-%lG<3Fy>8+^u^gmuunI=ZbiVz>dU-Ksg^B9{w{g^pWJ z-aaC7|D@xQdl+vDf&#_F<`L8+S{bf&Yb0eoAhQ++%?hZ+hIKQ>iD-)4(y}h29&0$S zQQocPPbV^Xw42VOV2+=)QcVysLlL2(A@u2_)$}nauk$>z7|(J&r^t7PMLVsf8YurN zERrcK`!j^8{TJV3#P066oBoyK_O;9q#p{B&a~k8QIgylJqtp1(P4VJ!lr3^X^@AK;apDJ{QyjqRm2;mLA$ zL3U?e)9`s>9qQ1xAj(goDSV}&r*gE3Xi&S!ktx5h|G?tLl=Fbhv#4nolVSOon~^hb(<1iLB)A&&DH)VI!QA9NvNG?+hwnBb+2PatGbf5l~?x)Pbxa1lIn- zH}kF@t2Yc{R6Z8KJKT%BGn!F8Ppg$w=^v(~sm+WTHuu`F6#9eps>%uc4{XGzUmqpo z^+=D=E_Y`fbWZrMU}f=m#XQO?H7zjszKq<<>nD7%rCyB>4kVe-HtzP1DZ z9JX9!)hr(;5Y+%7Nr4tG&l4UihlbQm(7=t6V%{Fq6 z?H}<|z$O&&9e^KBK;!;D_VX8O^!IPhd0EXpeIiz5=~})73$Js_*~F!Fae+dHWJ5;F~QF9847z zN5UsL7I82P^}fQ(tKhJmd6{0l8$Bz^cm*_;3c!qrfj%63-%whGcrrMY8p}udlsQX5 z?xs%{9nu2+M{hm1_!VXOBFk&qMY*BhmzQ4W%w>a%ywfdJk7uFTzM6nJu$dKtjSqK0 zH@J}Cey3GyBP+XL!R(m`h=C@;DwK$PBlo%{lMo-8YHDBvq;9|Xz(q&)+bCuuf-b^E z6n$qR8G>4K!s<%a^M@pH8()R_Oe4M%>5r|{>t|Ry-p`i!Pb7V+V?<;Xf|_@7QKyLf zVgwp4jdcaT9=yyGnwPZ7kT6c+E+O__YG>YNRvNe)&<^NZ+`)}-0mQ}#LZCCP#I)z329XW~Km-#WQKv?SXv&Lu=z!xPt*Y&u#i z)#OBqL51V1lnZbtp$2h;&o}PT_?dRmp{o^Gj8Rol9Lmk=Gm}cd)HgCk3wBv&_kR-6SH=nT8-{#gON;HO^E4vd*lY54*~)rVopD7^VX` zI;%Z~V2#E*_UW zGoeAJZi2G<*%N?u{o-HB8fLdJ6HL^&r(#$ z>>wcw%t>|XO_?^AJa#fnc1iNH!N{O3)ltfo`i}E1rDvR>SH7AhDh_YuG_EUSTojfW zitT_4j(GJgkjpjYZ~Perd(Hhg5PMwv3x5ZyQvm!W&tfoM!Hr@5Z~RUD03#Vw`5S+w zO$Ax2+-$Hy4WV5O)}kn0<7Iv(ymQ*lezykE)|TIOQy0-#^L3A(QN@+}CQ@fghs5ay<{wKJQqWTR=z&JsPU0J)ho0|BdmjiUp*5{7);Lg+se?ES10 zLR9xa=@|(meF13~unTN_?K(Pa>=dR+;*N#VWG|wd7J(14AZ&%XbAYUZo6`2n*Q-)18IB6Rq%_7)G!yQh)w5Eh_bX`g47DE}&w zPjbdWRs1bJ*}X(wt4GlGyRP4>ahni+k~N)np;-LPaTJO8Z*hg_QZp>;g+9OgHl2S2WsoEiBacXzjZbZ-E~Y|x&zkZvSQ zrJm0PSYnBvgn%A1FULjJ#0p5xgdFG%(pFNXv)MvEd5Yw;U>Y_Hph%P-2-D1dY~OwW zJk-CJA=W0_fyDth1pa^E@b9+%|54~}57i{Z0IP^cTJbJXQtsdal<4aHN~LSt%3dz< zR{g=q+a`V`QZVrFWbJZ1kTiRiNu2vpQj6i8H0mE?|5A)czyCJx>m@6~tZhhJ#;g>N z9s0s-@}hlrEqEn%1iGrfX3;HIC%4&Pz5A0)(zRJ&oDlw(J_&if0B!qhMaZI*-&3Xd^ocuN-uV*x)WKVT$D)5$!NTdcDcp{gDoEOfl1ro1B zBd@!;30eVe+n!zvh#eD>XBe5kyWiU!v5|8SkTRluXl!#RpFn`Cw{bjMMq-966Vam% zklKL@Fm44@Diz2tJ{sX}^f2zAiqEO|_EDzT;Yvr#s4uv-1jqO6?e~_5M zDS}3!0&2+`n*k2d(q=o78JNpem1^o91jQDyrt<3w07W$sJi+T3jY-#jT#Q!#6K2fN zs8xy#P;t9%8+u4cSn+iox|ld-@GP|nc`dU1U08634{?775LC=d^o$HnIUpRbHV_1r z@I0%P8z_zh7UcNBa1tzMdXkZ6a`DkLsRAM+d7W4m?+6kE{4e{M9EuD{U*yq-OI&mL zCV464CUIHfQmjdF{M(bLrJC}L8E8p8eViSMALW;mR2mUeC@>{{ROM=FN6>5e5+zbT z4fN}VU6^DgM+HDwUtRWuS=OO{<&L;5x_IKhnh zy%Lfaoa?xCoqo0p|8sSNBZ@YY7l1$Z{|Em59yU-}1~i(`{?%yG6ca#V7!CRUr_nT4 z2TH~dnz};5oL~02VacV>ILE>)|D_q1U~|{=4CnKrbhv?8pDW+kk7iv6o6<1dXrG90 zvr8Bi!@PaaIKWXO{x99x3(%dSqxes8u^ONu@sLMqOG&1fTs-k0j+rV2LFAOi;J>z$ z@!)hA0}@Q+;&2+xC!;`2N#o)0+kyj&%)cbiM#+LxS^{V=+7dv6b>X_d!=Jvig4Bg7 zvi>la2Oi)Vt9xl764pTWd<>0eYM+-B~On(G&3MpqxFC^oMIts`>8 zAJfH_1O?Eb+Nbnix^q$pK!bmDXI=8x#c&aT2AhlwT2fs{CUM%E3qs{vpO)* zPYd>K6sV}=8eeby7;I2qeGLHxuhCQZ311U&Z@Adsbxhjm?9(Q%Cs_NulUF=wDukZw@}Ov0iGre}S{}<$zLfKyimU zZn6tLT*Dd=BLljFNTyKBjYw(%Auf-jIm>o(^8^6xY5g&&d4sW~{{c(-$=ItjOF&!! z@W;B*N7zGrc=I<0Clz3rYcrZ2uBpo&k!n;gq`k;G<`#9zpq#8o_aK35J)lj8blrMA z)Sc>*Vw+wP=HG4y<0Uv6nJiAj3ti%;YzEn<(UitUv>sp%`F3__QNWwy+|*YaBJa>C zTWr5foHZKoV`WC8iFeAx)p8KKA+#w_oh;hqh&iom#snq-Q41@6v8N?k3P1RUQXDRX zs*p6aq4O2vQmSz4u zcO4`bv+wXr1|BG(ouKaZ^2_7j6cHzN8FElI`4c>8JJUVO2dJNtPAiQNPmAE+VF-pz z#}U*=8Zru`H-V=Zms6#*pJG(ND^fLhBUU=!%FI*eHo-jPjccdAWHvaEviSc!uaZbQn2-~M5qZu17HCl)JE$eY0qyHq{fKUsQOqkfOY zAM*V&2yZolux7QzJ?i|B%(SB49ARl;x#n?uZT z2`N+q3b+{v8fD+g>4JGB6YB!bs58S)$fumKc9R;? zrve8oIEKh~G3?)lpMK5DNinPxil%NFOUVzbP8+-55`{;a#S1CmBA<1@mT4GJjlc&U zen@?_@M0UpIv9eAg%RK;`GvX+87uqQrqy{7&$GP~O}QLF_f3r8POvwlYLi{|&J*=r zwi(dIx^tFz_-b6RX*95WYMY^-|K@W{J}NpWu68`k4o!iB6$s2Z3*BPhDYN@lF&SrH z=4h00f{RuB{ng^cv5l1T;m0tZEn1n{40<;T@v>@|A-`tCVq$P4mZ4JblPz!vm8E;6HzeJ+<^z^uL{THN zXcG6#wsl5^HS-bPe=K8g>;eXB0D&z92>bsO*niJ0B>pwG@UOrQ1PmwecT+91oaiW( zd9=Dl0#p0No#Uz~Z*~}(4S!pfIF=yPk9mJ*{(vk-9BSnE{8b;T9pb&z7v?JUC9j<6 z?R|BnWk?KX1-(8FZ|aJ9>50hqla5=gVS)7!0yuXL>sQnEFMSx1

    +
    +
    + + +
    + + +

    Federal Deposit
    Insurance Corporation

    +

    Each depositor insured to at least $250,000 per insured bank

    +
    + +
    +
    + + + + + + +
    + +

    Failed Bank List

    + +

    The FDIC is often appointed as receiver for failed banks. This page contains useful information for the customers and vendors of these banks. This includes information on the acquiring bank (if applicable), how your accounts and loans are affected, and how vendors can file claims against the receivership. Failed Financial Institution Contact Search displays point of contact information related to failed banks.

    + +

    This list includes banks which have failed since October 1, 2000. To search for banks that failed prior to those on this page, visit this link: Failures and Assistance Transactions

    + +

    Failed Bank List - CSV file (Updated on Mondays. Also opens in Excel - Excel Help)

    + +

    Due to the small screen size some information is no longer visible.
    Full information available when viewed on a larger screen.

    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Bank NameCitySTCERTAcquiring InstitutionClosing DateUpdated Date
    Banks of Wisconsin d/b/a Bank of KenoshaKenoshaWI35386North Shore Bank, FSBMay 31, 2013May 31, 2013
    Central Arizona BankScottsdaleAZ34527Western State BankMay 14, 2013May 20, 2013
    Sunrise BankValdostaGA58185Synovus BankMay 10, 2013May 21, 2013
    Pisgah Community BankAshevilleNC58701Capital Bank, N.A.May 10, 2013May 14, 2013
    Douglas County BankDouglasvilleGA21649Hamilton State BankApril 26, 2013May 16, 2013
    Parkway BankLenoirNC57158CertusBank, National AssociationApril 26, 2013May 17, 2013
    Chipola Community BankMariannaFL58034First Federal Bank of FloridaApril 19, 2013May 16, 2013
    Heritage Bank of North FloridaOrange ParkFL26680FirstAtlantic BankApril 19, 2013May 16, 2013
    First Federal BankLexingtonKY29594Your Community BankApril 19, 2013April 23, 2013
    Gold Canyon BankGold CanyonAZ58066First Scottsdale Bank, National AssociationApril 5, 2013April 9, 2013
    Frontier BankLaGrangeGA16431HeritageBank of the SouthMarch 8, 2013March 26, 2013
    Covenant BankChicagoIL22476Liberty Bank and Trust CompanyFebruary 15, 2013March 4, 2013
    1st Regents BankAndoverMN57157First Minnesota BankJanuary 18, 2013February 28, 2013
    Westside Community BankUniversity PlaceWA33997Sunwest BankJanuary 11, 2013January 24, 2013
    Community Bank of the OzarksSunrise BeachMO27331Bank of SullivanDecember 14, 2012January 24, 2013
    Hometown Community BankBraseltonGA57928CertusBank, National AssociationNovember 16, 2012January 24, 2013
    Citizens First National BankPrincetonIL3731Heartland Bank and Trust CompanyNovember 2, 2012January 24, 2013
    Heritage Bank of FloridaLutzFL35009Centennial BankNovember 2, 2012January 24, 2013
    NOVA BankBerwynPA27148No AcquirerOctober 26, 2012January 24, 2013
    Excel BankSedaliaMO19189Simmons First National BankOctober 19, 2012January 24, 2013
    First East Side Savings BankTamaracFL28144Stearns Bank N.A.October 19, 2012January 24, 2013
    GulfSouth Private BankDestinFL58073SmartBankOctober 19, 2012January 24, 2013
    First United BankCreteIL20685Old Plank Trail Community Bank, National AssociationSeptember 28, 2012November 15, 2012
    Truman BankSt. LouisMO27316Simmons First National BankSeptember 14, 2012December 17, 2012
    First Commercial BankBloomingtonMN35246Republic Bank & Trust CompanySeptember 7, 2012December 17, 2012
    Waukegan Savings BankWaukeganIL28243First Midwest BankAugust 3, 2012October 11, 2012
    Jasper Banking CompanyJasperGA16240Stearns Bank N.A.July 27, 2012December 17, 2012
    Second Federal Savings and Loan Association of ChicagoChicagoIL27986Hinsdale Bank & Trust CompanyJuly 20, 2012January 14, 2013
    Heartland BankLeawoodKS1361Metcalf BankJuly 20, 2012December 17, 2012
    First Cherokee State BankWoodstockGA32711Community & Southern BankJuly 20, 2012October 31, 2012
    Georgia Trust BankBufordGA57847Community & Southern BankJuly 20, 2012December 17, 2012
    The Royal Palm Bank of FloridaNaplesFL57096First National Bank of the Gulf CoastJuly 20, 2012January 7, 2013
    Glasgow Savings BankGlasgowMO1056Regional Missouri BankJuly 13, 2012October 11, 2012
    Montgomery Bank & TrustAileyGA19498Ameris BankJuly 6, 2012October 31, 2012
    The Farmers Bank of LynchburgLynchburgTN1690Clayton Bank and TrustJune 15, 2012October 31, 2012
    Security Exchange BankMariettaGA35299Fidelity BankJune 15, 2012October 10, 2012
    Putnam State BankPalatkaFL27405Harbor Community BankJune 15, 2012October 10, 2012
    Waccamaw BankWhitevilleNC34515First Community BankJune 8, 2012November 8, 2012
    Farmers' and Traders' State BankShabbonaIL9257First State BankJune 8, 2012October 10, 2012
    Carolina Federal Savings BankCharlestonSC35372Bank of North CarolinaJune 8, 2012October 31, 2012
    First Capital BankKingfisherOK416F & M BankJune 8, 2012October 10, 2012
    Alabama Trust Bank, National AssociationSylacaugaAL35224Southern States BankMay 18, 2012May 20, 2013
    Security Bank, National AssociationNorth LauderdaleFL23156Banesco USAMay 4, 2012October 31, 2012
    Palm Desert National BankPalm DesertCA23632Pacific Premier BankApril 27, 2012May 17, 2013
    Plantation Federal BankPawleys IslandSC32503First Federal BankApril 27, 2012May 17, 2013
    Inter Savings Bank, fsb D/B/A InterBank, fsbMaple GroveMN31495Great Southern BankApril 27, 2012May 17, 2013
    HarVest Bank of MarylandGaithersburgMD57766SonabankApril 27, 2012May 17, 2013
    Bank of the Eastern ShoreCambridgeMD26759No AcquirerApril 27, 2012October 17, 2012
    Fort Lee Federal Savings Bank, FSBFort LeeNJ35527Alma BankApril 20, 2012May 17, 2013
    Fidelity BankDearbornMI33883The Huntington National BankMarch 30, 2012May 16, 2013
    Premier BankWilmetteIL35419International Bank of ChicagoMarch 23, 2012October 17, 2012
    Covenant Bank & TrustRock SpringGA58068Stearns Bank, N.A.March 23, 2012October 31, 2012
    New City BankChicagoIL57597No AcquirerMarch 9, 2012October 29, 2012
    Global Commerce BankDoravilleGA34046Metro City BankMarch 2, 2012October 31, 2012
    Home Savings of AmericaLittle FallsMN29178No AcquirerFebruary 24, 2012December 17, 2012
    Central Bank of GeorgiaEllavilleGA5687Ameris BankFebruary 24, 2012August 9, 2012
    SCB BankShelbyvilleIN29761First Merchants Bank, National AssociationFebruary 10, 2012March 25, 2013
    Charter National Bank and TrustHoffman EstatesIL23187Barrington Bank & Trust Company, National AssociationFebruary 10, 2012March 25, 2013
    BankEastKnoxvilleTN19869U.S.Bank National AssociationJanuary 27, 2012March 8, 2013
    Patriot Bank MinnesotaForest LakeMN34823First Resource BankJanuary 27, 2012September 12, 2012
    Tennessee Commerce BankFranklinTN35296Republic Bank & Trust CompanyJanuary 27, 2012November 20, 2012
    First Guaranty Bank and Trust Company of JacksonvilleJacksonvilleFL16579CenterState Bank of Florida, N.A.January 27, 2012September 12, 2012
    American Eagle Savings BankBoothwynPA31581Capital Bank, N.A.January 20, 2012January 25, 2013
    The First State BankStockbridgeGA19252Hamilton State BankJanuary 20, 2012January 25, 2013
    Central Florida State BankBelleviewFL57186CenterState Bank of Florida, N.A.January 20, 2012January 25, 2013
    Western National BankPhoenixAZ57917Washington FederalDecember 16, 2011August 13, 2012
    Premier Community Bank of the Emerald CoastCrestviewFL58343Summit BankDecember 16, 2011September 12, 2012
    Central Progressive BankLacombeLA19657First NBC BankNovember 18, 2011August 13, 2012
    Polk County BankJohnstonIA14194Grinnell State BankNovember 18, 2011August 15, 2012
    Community Bank of RockmartRockmartGA57860Century Bank of GeorgiaNovember 10, 2011August 13, 2012
    SunFirst BankSaint GeorgeUT57087Cache Valley BankNovember 4, 2011November 16, 2012
    Mid City Bank, Inc.OmahaNE19397Premier BankNovember 4, 2011August 15, 2012
    All American BankDes PlainesIL57759International Bank of ChicagoOctober 28, 2011August 15, 2012
    Community Banks of ColoradoGreenwood VillageCO21132Bank Midwest, N.A.October 21, 2011January 2, 2013
    Community Capital BankJonesboroGA57036State Bank and Trust CompanyOctober 21, 2011November 8, 2012
    Decatur First BankDecaturGA34392Fidelity BankOctober 21, 2011November 8, 2012
    Old Harbor BankClearwaterFL575371st United BankOctober 21, 2011November 8, 2012
    Country BankAledoIL35395Blackhawk Bank & TrustOctober 14, 2011August 15, 2012
    First State BankCranfordNJ58046Northfield BankOctober 14, 2011November 8, 2012
    Blue Ridge Savings Bank, Inc.AshevilleNC32347Bank of North CarolinaOctober 14, 2011November 8, 2012
    Piedmont Community BankGrayGA57256State Bank and Trust CompanyOctober 14, 2011January 22, 2013
    Sun Security BankEllingtonMO20115Great Southern BankOctober 7, 2011November 7, 2012
    The RiverBankWyomingMN10216Central BankOctober 7, 2011November 7, 2012
    First International BankPlanoTX33513American First National BankSeptember 30, 2011October 9, 2012
    Citizens Bank of Northern CaliforniaNevada CityCA33983Tri Counties BankSeptember 23, 2011October 9, 2012
    Bank of the CommonwealthNorfolkVA20408Southern Bank and Trust CompanySeptember 23, 2011October 9, 2012
    The First National Bank of FloridaMiltonFL25155CharterBankSeptember 9, 2011September 6, 2012
    CreekSide BankWoodstockGA58226Georgia Commerce BankSeptember 2, 2011September 6, 2012
    Patriot Bank of GeorgiaCummingGA58273Georgia Commerce BankSeptember 2, 2011November 2, 2012
    First Choice BankGenevaIL57212Inland Bank & TrustAugust 19, 2011August 15, 2012
    First Southern National BankStatesboroGA57239Heritage Bank of the SouthAugust 19, 2011November 2, 2012
    Lydian Private BankPalm BeachFL35356Sabadell United Bank, N.A.August 19, 2011November 2, 2012
    Public Savings BankHuntingdon ValleyPA34130Capital Bank, N.A.August 18, 2011August 15, 2012
    The First National Bank of OlatheOlatheKS4744Enterprise Bank & TrustAugust 12, 2011August 23, 2012
    Bank of WhitmanColfaxWA22528Columbia State BankAugust 5, 2011August 16, 2012
    Bank of ShorewoodShorewoodIL22637Heartland Bank and Trust CompanyAugust 5, 2011August 16, 2012
    Integra Bank National AssociationEvansvilleIN4392Old National BankJuly 29, 2011August 16, 2012
    BankMeridian, N.A.ColumbiaSC58222SCBT National AssociationJuly 29, 2011November 2, 2012
    Virginia Business BankRichmondVA58283Xenith BankJuly 29, 2011October 9, 2012
    Bank of ChoiceGreeleyCO2994Bank Midwest, N.A.July 22, 2011September 12, 2012
    LandMark Bank of FloridaSarasotaFL35244American Momentum BankJuly 22, 2011November 2, 2012
    Southshore Community BankApollo BeachFL58056American Momentum BankJuly 22, 2011November 2, 2012
    Summit BankPrescottAZ57442The Foothills BankJuly 15, 2011August 16, 2012
    First Peoples BankPort St. LucieFL34870Premier American Bank, N.A.July 15, 2011November 2, 2012
    High Trust BankStockbridgeGA19554Ameris BankJuly 15, 2011November 2, 2012
    One Georgia BankAtlantaGA58238Ameris BankJuly 15, 2011November 2, 2012
    Signature BankWindsorCO57835Points West Community BankJuly 8, 2011October 26, 2012
    Colorado Capital BankCastle RockCO34522First-Citizens Bank & Trust CompanyJuly 8, 2011January 15, 2013
    First Chicago Bank & TrustChicagoIL27935Northbrook Bank & Trust CompanyJuly 8, 2011September 9, 2012
    Mountain Heritage BankClaytonGA57593First American Bank and Trust CompanyJune 24, 2011November 2, 2012
    First Commercial Bank of Tampa BayTampaFL27583Stonegate BankJune 17, 2011November 2, 2012
    McIntosh State BankJacksonGA19237Hamilton State BankJune 17, 2011November 2, 2012
    Atlantic Bank and TrustCharlestonSC58420First Citizens Bank and Trust Company, Inc.June 3, 2011October 31, 2012
    First Heritage BankSnohomishWA23626Columbia State BankMay 27, 2011January 28, 2013
    Summit BankBurlingtonWA513Columbia State BankMay 20, 2011January 22, 2013
    First Georgia Banking CompanyFranklinGA57647CertusBank, National AssociationMay 20, 2011November 13, 2012
    Atlantic Southern BankMaconGA57213CertusBank, National AssociationMay 20, 2011October 31, 2012
    Coastal BankCocoa BeachFL34898Florida Community Bank, a division of Premier American Bank, N.A.May 6, 2011November 30, 2012
    Community Central BankMount ClemensMI34234Talmer Bank & TrustApril 29, 2011August 16, 2012
    The Park Avenue BankValdostaGA19797Bank of the OzarksApril 29, 2011November 30, 2012
    First Choice Community BankDallasGA58539Bank of the OzarksApril 29, 2011January 22, 2013
    Cortez Community BankBrooksvilleFL57625Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
    First National Bank of Central FloridaWinter ParkFL26297Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
    Heritage Banking GroupCarthageMS14273Trustmark National BankApril 15, 2011November 30, 2012
    Rosemount National BankRosemountMN24099Central BankApril 15, 2011August 16, 2012
    Superior BankBirminghamAL17750Superior Bank, National AssociationApril 15, 2011November 30, 2012
    Nexity BankBirminghamAL19794AloStar Bank of CommerceApril 15, 2011September 4, 2012
    New Horizons BankEast EllijayGA57705Citizens South BankApril 15, 2011August 16, 2012
    Bartow County BankCartersvilleGA21495Hamilton State BankApril 15, 2011January 22, 2013
    Nevada Commerce BankLas VegasNV35418City National BankApril 8, 2011September 9, 2012
    Western Springs National Bank and TrustWestern SpringsIL10086Heartland Bank and Trust CompanyApril 8, 2011January 22, 2013
    The Bank of CommerceWood DaleIL34292Advantage National Bank GroupMarch 25, 2011January 22, 2013
    Legacy BankMilwaukeeWI34818Seaway Bank and Trust CompanyMarch 11, 2011September 12, 2012
    First National Bank of DavisDavisOK4077The Pauls Valley National BankMarch 11, 2011August 20, 2012
    Valley Community BankSt. CharlesIL34187First State BankFebruary 25, 2011September 12, 2012
    San Luis Trust Bank, FSBSan Luis ObispoCA34783First California BankFebruary 18, 2011August 20, 2012
    Charter Oak BankNapaCA57855Bank of MarinFebruary 18, 2011September 12, 2012
    Citizens Bank of EffinghamSpringfieldGA34601Heritage Bank of the SouthFebruary 18, 2011November 2, 2012
    Habersham BankClarkesvilleGA151SCBT National AssociationFebruary 18, 2011November 2, 2012
    Canyon National BankPalm SpringsCA34692Pacific Premier BankFebruary 11, 2011September 12, 2012
    Badger State BankCassvilleWI13272Royal BankFebruary 11, 2011September 12, 2012
    Peoples State BankHamtramckMI14939First Michigan BankFebruary 11, 2011January 22, 2013
    Sunshine State Community BankPort OrangeFL35478Premier American Bank, N.A.February 11, 2011November 2, 2012
    Community First Bank ChicagoChicagoIL57948Northbrook Bank & Trust CompanyFebruary 4, 2011August 20, 2012
    North Georgia BankWatkinsvilleGA35242BankSouthFebruary 4, 2011November 2, 2012
    American Trust BankRoswellGA57432Renasant BankFebruary 4, 2011October 31, 2012
    First Community BankTaosNM12261U.S. Bank, N.A.January 28, 2011September 12, 2012
    FirsTier BankLouisvilleCO57646No AcquirerJanuary 28, 2011September 12, 2012
    Evergreen State BankStoughtonWI5328McFarland State BankJanuary 28, 2011September 12, 2012
    The First State BankCamargoOK2303Bank 7January 28, 2011September 12, 2012
    United Western BankDenverCO31293First-Citizens Bank & Trust CompanyJanuary 21, 2011September 12, 2012
    The Bank of AshevilleAshevilleNC34516First BankJanuary 21, 2011November 2, 2012
    CommunitySouth Bank & TrustEasleySC57868CertusBank, National AssociationJanuary 21, 2011November 2, 2012
    Enterprise Banking CompanyMcDonoughGA19758No AcquirerJanuary 21, 2011November 2, 2012
    Oglethorpe BankBrunswickGA57440Bank of the OzarksJanuary 14, 2011November 2, 2012
    Legacy BankScottsdaleAZ57820Enterprise Bank & TrustJanuary 7, 2011September 12, 2012
    First Commercial Bank of FloridaOrlandoFL34965First Southern BankJanuary 7, 2011November 2, 2012
    Community National BankLino LakesMN23306Farmers & Merchants Savings BankDecember 17, 2010August 20, 2012
    First Southern BankBatesvilleAR58052Southern BankDecember 17, 2010August 20, 2012
    United Americas Bank, N.A.AtlantaGA35065State Bank and Trust CompanyDecember 17, 2010November 2, 2012
    Appalachian Community Bank, FSBMcCaysvilleGA58495Peoples Bank of East TennesseeDecember 17, 2010October 31, 2012
    Chestatee State BankDawsonvilleGA34578Bank of the OzarksDecember 17, 2010November 2, 2012
    The Bank of Miami,N.A.Coral GablesFL190401st United BankDecember 17, 2010November 2, 2012
    Earthstar BankSouthamptonPA35561Polonia BankDecember 10, 2010August 20, 2012
    Paramount BankFarmington HillsMI34673Level One BankDecember 10, 2010August 20, 2012
    First Banking CenterBurlingtonWI5287First Michigan BankNovember 19, 2010August 20, 2012
    Allegiance Bank of North AmericaBala CynwydPA35078VIST BankNovember 19, 2010August 20, 2012
    Gulf State Community BankCarrabelleFL20340Centennial BankNovember 19, 2010November 2, 2012
    Copper Star BankScottsdaleAZ35463Stearns Bank, N.A.November 12, 2010August 20, 2012
    Darby Bank & Trust Co.VidaliaGA14580Ameris BankNovember 12, 2010January 15, 2013
    Tifton Banking CompanyTiftonGA57831Ameris BankNovember 12, 2010November 2, 2012
    First Vietnamese American Bank
    In Vietnamese
    WestminsterCA57885Grandpoint BankNovember 5, 2010September 12, 2012
    Pierce Commercial BankTacomaWA34411Heritage BankNovember 5, 2010August 20, 2012
    Western Commercial BankWoodland HillsCA58087First California BankNovember 5, 2010September 12, 2012
    K BankRandallstownMD31263Manufacturers and Traders Trust Company (M&T Bank)November 5, 2010August 20, 2012
    First Arizona Savings, A FSBScottsdaleAZ32582No AcquirerOctober 22, 2010August 20, 2012
    Hillcrest BankOverland ParkKS22173Hillcrest Bank, N.A.October 22, 2010August 20, 2012
    First Suburban National BankMaywoodIL16089Seaway Bank and Trust CompanyOctober 22, 2010August 20, 2012
    The First National Bank of BarnesvilleBarnesvilleGA2119United BankOctober 22, 2010November 2, 2012
    The Gordon BankGordonGA33904Morris BankOctober 22, 2010November 2, 2012
    Progress Bank of FloridaTampaFL32251Bay Cities BankOctober 22, 2010November 2, 2012
    First Bank of JacksonvilleJacksonvilleFL27573Ameris BankOctober 22, 2010November 2, 2012
    Premier BankJefferson CityMO34016Providence BankOctober 15, 2010August 20, 2012
    WestBridge Bank and Trust CompanyChesterfieldMO58205Midland States BankOctober 15, 2010August 20, 2012
    Security Savings Bank, F.S.B.OlatheKS30898Simmons First National BankOctober 15, 2010August 20, 2012
    Shoreline BankShorelineWA35250GBC International BankOctober 1, 2010August 20, 2012
    Wakulla BankCrawfordvilleFL21777Centennial BankOctober 1, 2010November 2, 2012
    North County BankArlingtonWA35053Whidbey Island BankSeptember 24, 2010August 20, 2012
    Haven Trust Bank FloridaPonte Vedra BeachFL58308First Southern BankSeptember 24, 2010November 5, 2012
    Maritime Savings BankWest AllisWI28612North Shore Bank, FSBSeptember 17, 2010August 20, 2012
    Bramble Savings BankMilfordOH27808Foundation BankSeptember 17, 2010August 20, 2012
    The Peoples BankWinderGA182Community & Southern BankSeptember 17, 2010November 5, 2012
    First Commerce Community BankDouglasvilleGA57448Community & Southern BankSeptember 17, 2010January 15, 2013
    Bank of EllijayEllijayGA58197Community & Southern BankSeptember 17, 2010January 15, 2013
    ISN BankCherry HillNJ57107Customers BankSeptember 17, 2010August 22, 2012
    Horizon BankBradentonFL35061Bank of the OzarksSeptember 10, 2010November 5, 2012
    Sonoma Valley BankSonomaCA27259Westamerica BankAugust 20, 2010September 12, 2012
    Los Padres BankSolvangCA32165Pacific Western BankAugust 20, 2010September 12, 2012
    Butte Community BankChicoCA33219Rabobank, N.A.August 20, 2010September 12, 2012
    Pacific State BankStocktonCA27090Rabobank, N.A.August 20, 2010September 12, 2012
    ShoreBankChicagoIL15640Urban Partnership BankAugust 20, 2010May 16, 2013
    Imperial Savings and Loan AssociationMartinsvilleVA31623River Community Bank, N.A.August 20, 2010August 24, 2012
    Independent National BankOcalaFL27344CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
    Community National Bank at BartowBartowFL25266CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
    Palos Bank and Trust CompanyPalos HeightsIL17599First Midwest BankAugust 13, 2010August 22, 2012
    Ravenswood BankChicagoIL34231Northbrook Bank & Trust CompanyAugust 6, 2010August 22, 2012
    LibertyBankEugeneOR31964Home Federal BankJuly 30, 2010August 22, 2012
    The Cowlitz BankLongviewWA22643Heritage BankJuly 30, 2010August 22, 2012
    Coastal Community BankPanama City BeachFL9619Centennial BankJuly 30, 2010November 5, 2012
    Bayside Savings BankPort Saint JoeFL57669Centennial BankJuly 30, 2010November 5, 2012
    Northwest Bank & TrustAcworthGA57658State Bank and Trust CompanyJuly 30, 2010November 5, 2012
    Home Valley BankCave JunctionOR23181South Valley Bank & TrustJuly 23, 2010September 12, 2012
    SouthwestUSA BankLas VegasNV35434Plaza BankJuly 23, 2010August 22, 2012
    Community Security BankNew PragueMN34486RoundbankJuly 23, 2010September 12, 2012
    Thunder BankSylvan GroveKS10506The Bennington State BankJuly 23, 2010September 13, 2012
    Williamsburg First National BankKingstreeSC17837First Citizens Bank and Trust Company, Inc.July 23, 2010November 5, 2012
    Crescent Bank and Trust CompanyJasperGA27559Renasant BankJuly 23, 2010November 5, 2012
    Sterling BankLantanaFL32536IBERIABANKJuly 23, 2010November 5, 2012
    Mainstreet Savings Bank, FSBHastingsMI28136Commercial BankJuly 16, 2010September 13, 2012
    Olde Cypress Community BankClewistonFL28864CenterState Bank of Florida, N.A.July 16, 2010November 5, 2012
    Turnberry BankAventuraFL32280NAFH National BankJuly 16, 2010November 5, 2012
    Metro Bank of Dade CountyMiamiFL25172NAFH National BankJuly 16, 2010November 5, 2012
    First National Bank of the SouthSpartanburgSC35383NAFH National BankJuly 16, 2010November 5, 2012
    Woodlands BankBlufftonSC32571Bank of the OzarksJuly 16, 2010November 5, 2012
    Home National BankBlackwellOK11636RCB BankJuly 9, 2010December 10, 2012
    USA BankPort ChesterNY58072New Century BankJuly 9, 2010September 14, 2012
    Ideal Federal Savings BankBaltimoreMD32456No AcquirerJuly 9, 2010September 14, 2012
    Bay National BankBaltimoreMD35462Bay Bank, FSBJuly 9, 2010January 15, 2013
    High Desert State BankAlbuquerqueNM35279First American BankJune 25, 2010September 14, 2012
    First National BankSavannahGA34152The Savannah Bank, N.A.June 25, 2010November 5, 2012
    Peninsula BankEnglewoodFL26563Premier American Bank, N.A.June 25, 2010November 5, 2012
    Nevada Security BankRenoNV57110Umpqua BankJune 18, 2010August 23, 2012
    Washington First International BankSeattleWA32955East West BankJune 11, 2010September 14, 2012
    TierOne BankLincolnNE29341Great Western BankJune 4, 2010September 14, 2012
    Arcola Homestead Savings BankArcolaIL31813No AcquirerJune 4, 2010September 14, 2012
    First National BankRosedaleMS15814The Jefferson BankJune 4, 2010November 5, 2012
    Sun West BankLas VegasNV34785City National BankMay 28, 2010September 14, 2012
    Granite Community Bank, NAGranite BayCA57315Tri Counties BankMay 28, 2010September 14, 2012
    Bank of Florida - TampaTampaFL57814EverBankMay 28, 2010November 5, 2012
    Bank of Florida - SouthwestNaplesFL35106EverBankMay 28, 2010November 5, 2012
    Bank of Florida - SoutheastFort LauderdaleFL57360EverBankMay 28, 2010November 5, 2012
    Pinehurst BankSaint PaulMN57735Coulee BankMay 21, 2010October 26, 2012
    Midwest Bank and Trust CompanyElmwood ParkIL18117FirstMerit Bank, N.A.May 14, 2010August 23, 2012
    Southwest Community BankSpringfieldMO34255Simmons First National BankMay 14, 2010August 23, 2012
    New Liberty BankPlymouthMI35586Bank of Ann ArborMay 14, 2010August 23, 2012
    Satilla Community BankSaint MarysGA35114Ameris BankMay 14, 2010November 5, 2012
    1st Pacific Bank of CaliforniaSan DiegoCA35517City National BankMay 7, 2010December 13, 2012
    Towne Bank of ArizonaMesaAZ57697Commerce Bank of ArizonaMay 7, 2010August 23, 2012
    Access BankChamplinMN16476PrinsBankMay 7, 2010August 23, 2012
    The Bank of BonifayBonifayFL14246First Federal Bank of FloridaMay 7, 2010November 5, 2012
    Frontier BankEverettWA22710Union Bank, N.A.April 30, 2010January 15, 2013
    BC National BanksButlerMO17792Community First BankApril 30, 2010August 23, 2012
    Champion BankCreve CoeurMO58362BankLibertyApril 30, 2010August 23, 2012
    CF BancorpPort HuronMI30005First Michigan BankApril 30, 2010January 15, 2013
    Westernbank Puerto Rico
    En Espanol
    MayaguezPR31027Banco Popular de Puerto RicoApril 30, 2010November 5, 2012
    R-G Premier Bank of Puerto Rico
    En Espanol
    Hato ReyPR32185Scotiabank de Puerto RicoApril 30, 2010November 5, 2012
    Eurobank
    En Espanol
    San JuanPR27150Oriental Bank and TrustApril 30, 2010November 5, 2012
    Wheatland BankNapervilleIL58429Wheaton Bank & TrustApril 23, 2010August 23, 2012
    Peotone Bank and Trust CompanyPeotoneIL10888First Midwest BankApril 23, 2010August 23, 2012
    Lincoln Park Savings BankChicagoIL30600Northbrook Bank & Trust CompanyApril 23, 2010August 23, 2012
    New Century BankChicagoIL34821MB Financial Bank, N.A.April 23, 2010August 23, 2012
    Citizens Bank and Trust Company of ChicagoChicagoIL34658Republic Bank of ChicagoApril 23, 2010August 23, 2012
    Broadway BankChicagoIL22853MB Financial Bank, N.A.April 23, 2010August 23, 2012
    Amcore Bank, National AssociationRockfordIL3735Harris N.A.April 23, 2010August 23, 2012
    City BankLynnwoodWA21521Whidbey Island BankApril 16, 2010September 14, 2012
    Tamalpais BankSan RafaelCA33493Union Bank, N.A.April 16, 2010August 23, 2012
    Innovative BankOaklandCA23876Center BankApril 16, 2010August 23, 2012
    Butler BankLowellMA26619People's United BankApril 16, 2010August 23, 2012
    Riverside National Bank of FloridaFort PierceFL24067TD Bank, N.A.April 16, 2010November 5, 2012
    AmericanFirst BankClermontFL57724TD Bank, N.A.April 16, 2010October 31, 2012
    First Federal Bank of North FloridaPalatkaFL28886TD Bank, N.A.April 16, 2010January 15, 2013
    Lakeside Community BankSterling HeightsMI34878No AcquirerApril 16, 2010August 23, 2012
    Beach First National BankMyrtle BeachSC34242Bank of North CarolinaApril 9, 2010November 5, 2012
    Desert Hills BankPhoenixAZ57060New York Community BankMarch 26, 2010August 23, 2012
    Unity National BankCartersvilleGA34678Bank of the OzarksMarch 26, 2010September 14, 2012
    Key West BankKey WestFL34684Centennial BankMarch 26, 2010August 23, 2012
    McIntosh Commercial BankCarrolltonGA57399CharterBankMarch 26, 2010August 23, 2012
    State Bank of AuroraAuroraMN8221Northern State BankMarch 19, 2010August 23, 2012
    First Lowndes BankFort DepositAL24957First Citizens BankMarch 19, 2010August 23, 2012
    Bank of HiawasseeHiawasseeGA10054Citizens South BankMarch 19, 2010August 23, 2012
    Appalachian Community BankEllijayGA33989Community & Southern BankMarch 19, 2010October 31, 2012
    Advanta Bank Corp.DraperUT33535No AcquirerMarch 19, 2010September 14, 2012
    Century Security BankDuluthGA58104Bank of UpsonMarch 19, 2010August 23, 2012
    American National BankParmaOH18806The National Bank and Trust CompanyMarch 19, 2010August 23, 2012
    Statewide BankCovingtonLA29561Home BankMarch 12, 2010August 23, 2012
    Old Southern BankOrlandoFL58182Centennial BankMarch 12, 2010August 23, 2012
    The Park Avenue BankNew YorkNY27096Valley National BankMarch 12, 2010August 23, 2012
    LibertyPointe BankNew YorkNY58071Valley National BankMarch 11, 2010August 23, 2012
    Centennial BankOgdenUT34430No AcquirerMarch 5, 2010September 14, 2012
    Waterfield BankGermantownMD34976No AcquirerMarch 5, 2010August 23, 2012
    Bank of IllinoisNormalIL9268Heartland Bank and Trust CompanyMarch 5, 2010August 23, 2012
    Sun American BankBoca RatonFL27126First-Citizens Bank & Trust CompanyMarch 5, 2010August 23, 2012
    Rainier Pacific BankTacomaWA38129Umpqua BankFebruary 26, 2010August 23, 2012
    Carson River Community BankCarson CityNV58352Heritage Bank of NevadaFebruary 26, 2010January 15, 2013
    La Jolla Bank, FSBLa JollaCA32423OneWest Bank, FSBFebruary 19, 2010August 24, 2012
    George Washington Savings BankOrland ParkIL29952FirstMerit Bank, N.A.February 19, 2010August 24, 2012
    The La Coste National BankLa CosteTX3287Community National BankFebruary 19, 2010September 14, 2012
    Marco Community BankMarco IslandFL57586Mutual of Omaha BankFebruary 19, 2010August 24, 2012
    1st American State Bank of MinnesotaHancockMN15448Community Development Bank, FSBFebruary 5, 2010August 24, 2012
    American Marine BankBainbridge IslandWA16730Columbia State BankJanuary 29, 2010August 24, 2012
    First Regional BankLos AngelesCA23011First-Citizens Bank & Trust CompanyJanuary 29, 2010August 24, 2012
    Community Bank and TrustCorneliaGA5702SCBT National AssociationJanuary 29, 2010January 15, 2013
    Marshall Bank, N.A.HallockMN16133United Valley BankJanuary 29, 2010August 23, 2012
    Florida Community BankImmokaleeFL5672Premier American Bank, N.A.January 29, 2010January 15, 2013
    First National Bank of GeorgiaCarrolltonGA16480Community & Southern BankJanuary 29, 2010December 13, 2012
    Columbia River BankThe DallesOR22469Columbia State BankJanuary 22, 2010September 14, 2012
    Evergreen BankSeattleWA20501Umpqua BankJanuary 22, 2010January 15, 2013
    Charter BankSanta FeNM32498Charter BankJanuary 22, 2010August 23, 2012
    Bank of LeetonLeetonMO8265Sunflower Bank, N.A.January 22, 2010January 15, 2013
    Premier American BankMiamiFL57147Premier American Bank, N.A.January 22, 2010December 13, 2012
    Barnes Banking CompanyKaysvilleUT1252No AcquirerJanuary 15, 2010August 23, 2012
    St. Stephen State BankSt. StephenMN17522First State Bank of St. JosephJanuary 15, 2010August 23, 2012
    Town Community Bank & TrustAntiochIL34705First American BankJanuary 15, 2010August 23, 2012
    Horizon BankBellinghamWA22977Washington Federal Savings and Loan AssociationJanuary 8, 2010August 23, 2012
    First Federal Bank of California, F.S.B.Santa MonicaCA28536OneWest Bank, FSBDecember 18, 2009August 23, 2012
    Imperial Capital BankLa JollaCA26348City National BankDecember 18, 2009September 5, 2012
    Independent Bankers' BankSpringfieldIL26820The Independent BankersBank (TIB)December 18, 2009August 23, 2012
    New South Federal Savings BankIrondaleAL32276Beal BankDecember 18, 2009August 23, 2012
    Citizens State BankNew BaltimoreMI1006No AcquirerDecember 18, 2009November 5, 2012
    Peoples First Community BankPanama CityFL32167Hancock BankDecember 18, 2009November 5, 2012
    RockBridge Commercial BankAtlantaGA58315No AcquirerDecember 18, 2009November 5, 2012
    SolutionsBankOverland ParkKS4731Arvest BankDecember 11, 2009August 23, 2012
    Valley Capital Bank, N.A.MesaAZ58399Enterprise Bank & TrustDecember 11, 2009August 23, 2012
    Republic Federal Bank, N.A.MiamiFL228461st United BankDecember 11, 2009November 5, 2012
    Greater Atlantic BankRestonVA32583SonabankDecember 4, 2009November 5, 2012
    Benchmark BankAuroraIL10440MB Financial Bank, N.A.December 4, 2009August 23, 2012
    AmTrust BankClevelandOH29776New York Community BankDecember 4, 2009November 5, 2012
    The Tattnall BankReidsvilleGA12080Heritage Bank of the SouthDecember 4, 2009November 5, 2012
    First Security National BankNorcrossGA26290State Bank and Trust CompanyDecember 4, 2009November 5, 2012
    The Buckhead Community BankAtlantaGA34663State Bank and Trust CompanyDecember 4, 2009November 5, 2012
    Commerce Bank of Southwest FloridaFort MyersFL58016Central BankNovember 20, 2009November 5, 2012
    Pacific Coast National BankSan ClementeCA57914Sunwest BankNovember 13, 2009August 22, 2012
    Orion BankNaplesFL22427IBERIABANKNovember 13, 2009November 5, 2012
    Century Bank, F.S.B.SarasotaFL32267IBERIABANKNovember 13, 2009August 22, 2012
    United Commercial BankSan FranciscoCA32469East West BankNovember 6, 2009November 5, 2012
    Gateway Bank of St. LouisSt. LouisMO19450Central Bank of Kansas CityNovember 6, 2009August 22, 2012
    Prosperan BankOakdaleMN35074Alerus Financial, N.A.November 6, 2009August 22, 2012
    Home Federal Savings BankDetroitMI30329Liberty Bank and Trust CompanyNovember 6, 2009August 22, 2012
    United Security BankSpartaGA22286Ameris BankNovember 6, 2009January 15, 2013
    North Houston BankHoustonTX18776U.S. Bank N.A.October 30, 2009August 22, 2012
    Madisonville State BankMadisonvilleTX33782U.S. Bank N.A.October 30, 2009August 22, 2012
    Citizens National BankTeagueTX25222U.S. Bank N.A.October 30, 2009August 22, 2012
    Park National BankChicagoIL11677U.S. Bank N.A.October 30, 2009August 22, 2012
    Pacific National BankSan FranciscoCA30006U.S. Bank N.A.October 30, 2009August 22, 2012
    California National BankLos AngelesCA34659U.S. Bank N.A.October 30, 2009September 5, 2012
    San Diego National BankSan DiegoCA23594U.S. Bank N.A.October 30, 2009August 22, 2012
    Community Bank of LemontLemontIL35291U.S. Bank N.A.October 30, 2009January 15, 2013
    Bank USA, N.A.PhoenixAZ32218U.S. Bank N.A.October 30, 2009August 22, 2012
    First DuPage BankWestmontIL35038First Midwest BankOctober 23, 2009August 22, 2012
    Riverview Community BankOtsegoMN57525Central BankOctober 23, 2009August 22, 2012
    Bank of ElmwoodRacineWI18321Tri City National BankOctober 23, 2009August 22, 2012
    Flagship National BankBradentonFL35044First Federal Bank of FloridaOctober 23, 2009August 22, 2012
    Hillcrest Bank FloridaNaplesFL58336Stonegate BankOctober 23, 2009August 22, 2012
    American United BankLawrencevilleGA57794Ameris BankOctober 23, 2009September 5, 2012
    Partners BankNaplesFL57959Stonegate BankOctober 23, 2009January 15, 2013
    San Joaquin BankBakersfieldCA23266Citizens Business BankOctober 16, 2009August 22, 2012
    Southern Colorado National BankPuebloCO57263Legacy BankOctober 2, 2009September 5, 2012
    Jennings State BankSpring GroveMN11416Central BankOctober 2, 2009August 21, 2012
    Warren BankWarrenMI34824The Huntington National BankOctober 2, 2009August 21, 2012
    Georgian BankAtlantaGA57151First Citizens Bank and Trust Company, Inc.September 25, 2009August 21, 2012
    Irwin Union Bank, F.S.B.LouisvilleKY57068First Financial Bank, N.A.September 18, 2009September 5, 2012
    Irwin Union Bank and Trust CompanyColumbusIN10100First Financial Bank, N.A.September 18, 2009August 21, 2012
    Venture BankLaceyWA22868First-Citizens Bank & Trust CompanySeptember 11, 2009August 21, 2012
    Brickwell Community BankWoodburyMN57736CorTrust Bank N.A.September 11, 2009January 15, 2013
    Corus Bank, N.A.ChicagoIL13693MB Financial Bank, N.A.September 11, 2009August 21, 2012
    First State BankFlagstaffAZ34875Sunwest BankSeptember 4, 2009January 15, 2013
    Platinum Community BankRolling MeadowsIL35030No AcquirerSeptember 4, 2009August 21, 2012
    Vantus BankSioux CityIN27732Great Southern BankSeptember 4, 2009August 21, 2012
    InBankOak ForestIL20203MB Financial Bank, N.A.September 4, 2009August 21, 2012
    First Bank of Kansas CityKansas CityMO25231Great American BankSeptember 4, 2009August 21, 2012
    Affinity BankVenturaCA27197Pacific Western BankAugust 28, 2009August 21, 2012
    Mainstreet BankForest LakeMN1909Central BankAugust 28, 2009August 21, 2012
    Bradford BankBaltimoreMD28312Manufacturers and Traders Trust Company (M&T Bank)August 28, 2009January 15, 2013
    Guaranty BankAustinTX32618BBVA CompassAugust 21, 2009August 21, 2012
    CapitalSouth BankBirminghamAL22130IBERIABANKAugust 21, 2009January 15, 2013
    First Coweta BankNewnanGA57702United BankAugust 21, 2009January 15, 2013
    ebankAtlantaGA34682Stearns Bank, N.A.August 21, 2009August 21, 2012
    Community Bank of NevadaLas VegasNV34043No AcquirerAugust 14, 2009August 21, 2012
    Community Bank of ArizonaPhoenixAZ57645MidFirst BankAugust 14, 2009August 21, 2012
    Union Bank, National AssociationGilbertAZ34485MidFirst BankAugust 14, 2009August 21, 2012
    Colonial BankMontgomeryAL9609Branch Banking & Trust Company, (BB&T)August 14, 2009September 5, 2012
    Dwelling House Savings and Loan AssociationPittsburghPA31559PNC Bank, N.A.August 14, 2009January 15, 2013
    Community First BankPrinevilleOR23268Home Federal BankAugust 7, 2009January 15, 2013
    Community National Bank of Sarasota CountyVeniceFL27183Stearns Bank, N.A.August 7, 2009August 20, 2012
    First State BankSarasotaFL27364Stearns Bank, N.A.August 7, 2009August 20, 2012
    Mutual BankHarveyIL18659United Central BankJuly 31, 2009August 20, 2012
    First BankAmericanoElizabethNJ34270Crown BankJuly 31, 2009August 20, 2012
    Peoples Community BankWest ChesterOH32288First Financial Bank, N.A.July 31, 2009August 20, 2012
    Integrity BankJupiterFL57604Stonegate BankJuly 31, 2009August 20, 2012
    First State Bank of AltusAltusOK9873Herring BankJuly 31, 2009August 20, 2012
    Security Bank of Jones CountyGrayGA8486State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Houston CountyPerryGA27048State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Bibb CountyMaconGA27367State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of North MetroWoodstockGA57105State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of North FultonAlpharettaGA57430State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Gwinnett CountySuwaneeGA57346State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Waterford Village BankWilliamsvilleNY58065Evans Bank, N.A.July 24, 2009August 20, 2012
    Temecula Valley BankTemeculaCA34341First-Citizens Bank & Trust CompanyJuly 17, 2009August 20, 2012
    Vineyard BankRancho CucamongaCA23556California Bank & TrustJuly 17, 2009August 20, 2012
    BankFirstSioux FallsSD34103Alerus Financial, N.A.July 17, 2009August 20, 2012
    First Piedmont BankWinderGA34594First American Bank and Trust CompanyJuly 17, 2009January 15, 2013
    Bank of WyomingThermopolisWY22754Central Bank & TrustJuly 10, 2009August 20, 2012
    Founders BankWorthIL18390The PrivateBank and Trust CompanyJuly 2, 2009August 20, 2012
    Millennium State Bank of TexasDallasTX57667State Bank of TexasJuly 2, 2009October 26, 2012
    First National Bank of DanvilleDanvilleIL3644First Financial Bank, N.A.July 2, 2009August 20, 2012
    Elizabeth State BankElizabethIL9262Galena State Bank and Trust CompanyJuly 2, 2009August 20, 2012
    Rock River BankOregonIL15302The Harvard State BankJuly 2, 2009August 20, 2012
    First State Bank of WinchesterWinchesterIL11710The First National Bank of BeardstownJuly 2, 2009August 20, 2012
    John Warner BankClintonIL12093State Bank of LincolnJuly 2, 2009August 20, 2012
    Mirae BankLos AngelesCA57332Wilshire State BankJune 26, 2009August 20, 2012
    MetroPacific BankIrvineCA57893Sunwest BankJune 26, 2009August 20, 2012
    Horizon BankPine CityMN9744Stearns Bank, N.A.June 26, 2009August 20, 2012
    Neighborhood Community BankNewnanGA35285CharterBankJune 26, 2009August 20, 2012
    Community Bank of West GeorgiaVilla RicaGA57436No AcquirerJune 26, 2009August 17, 2012
    First National Bank of AnthonyAnthonyKS4614Bank of KansasJune 19, 2009August 17, 2012
    Cooperative BankWilmingtonNC27837First BankJune 19, 2009August 17, 2012
    Southern Community BankFayettevilleGA35251United Community BankJune 19, 2009August 17, 2012
    Bank of LincolnwoodLincolnwoodIL17309Republic Bank of ChicagoJune 5, 2009August 17, 2012
    Citizens National BankMacombIL5757Morton Community BankMay 22, 2009September 4, 2012
    Strategic Capital BankChampaignIL35175Midland States BankMay 22, 2009September 4, 2012
    BankUnited, FSBCoral GablesFL32247BankUnitedMay 21, 2009August 17, 2012
    Westsound BankBremertonWA34843Kitsap BankMay 8, 2009September 4, 2012
    America West BankLaytonUT35461Cache Valley BankMay 1, 2009August 17, 2012
    Citizens Community BankRidgewoodNJ57563North Jersey Community BankMay 1, 2009September 4, 2012
    Silverton Bank, NAAtlantaGA26535No AcquirerMay 1, 2009August 17, 2012
    First Bank of IdahoKetchumID34396U.S. Bank, N.A.April 24, 2009August 17, 2012
    First Bank of Beverly HillsCalabasasCA32069No AcquirerApril 24, 2009September 4, 2012
    Michigan Heritage BankFarmington HillsMI34369Level One BankApril 24, 2009August 17, 2012
    American Southern BankKennesawGA57943Bank of North GeorgiaApril 24, 2009August 17, 2012
    Great Basin Bank of NevadaElkoNV33824Nevada State BankApril 17, 2009September 4, 2012
    American Sterling BankSugar CreekMO8266Metcalf BankApril 17, 2009August 31, 2012
    New Frontier BankGreeleyCO34881No AcquirerApril 10, 2009September 4, 2012
    Cape Fear BankWilmingtonNC34639First Federal Savings and Loan AssociationApril 10, 2009August 17, 2012
    Omni National BankAtlantaGA22238No AcquirerMarch 27, 2009August 17, 2012
    TeamBank, NAPaolaKS4754Great Southern BankMarch 20, 2009August 17, 2012
    Colorado National BankColorado SpringsCO18896Herring BankMarch 20, 2009August 17, 2012
    FirstCity BankStockbridgeGA18243No AcquirerMarch 20, 2009August 17, 2012
    Freedom Bank of GeorgiaCommerceGA57558Northeast Georgia BankMarch 6, 2009August 17, 2012
    Security Savings BankHendersonNV34820Bank of NevadaFebruary 27, 2009September 7, 2012
    Heritage Community BankGlenwoodIL20078MB Financial Bank, N.A.February 27, 2009August 17, 2012
    Silver Falls BankSilvertonOR35399Citizens BankFebruary 20, 2009August 17, 2012
    Pinnacle Bank of OregonBeavertonOR57342Washington Trust Bank of SpokaneFebruary 13, 2009August 17, 2012
    Corn Belt Bank & Trust Co.PittsfieldIL16500The Carlinville National BankFebruary 13, 2009August 17, 2012
    Riverside Bank of the Gulf CoastCape CoralFL34563TIB BankFebruary 13, 2009August 17, 2012
    Sherman County BankLoup CityNE5431Heritage BankFebruary 13, 2009August 17, 2012
    County BankMercedCA22574Westamerica BankFebruary 6, 2009September 4, 2012
    Alliance BankCulver CityCA23124California Bank & TrustFebruary 6, 2009August 16, 2012
    FirstBank Financial ServicesMcDonoughGA57017Regions BankFebruary 6, 2009August 16, 2012
    Ocala National BankOcalaFL26538CenterState Bank of Florida, N.A.January 30, 2009September 4, 2012
    Suburban FSBCroftonMD30763Bank of EssexJanuary 30, 2009August 16, 2012
    MagnetBankSalt Lake CityUT58001No AcquirerJanuary 30, 2009August 16, 2012
    1st Centennial BankRedlandsCA33025First California BankJanuary 23, 2009August 16, 2012
    Bank of Clark CountyVancouverWA34959Umpqua BankJanuary 16, 2009August 16, 2012
    National Bank of CommerceBerkeleyIL19733Republic Bank of ChicagoJanuary 16, 2009August 16, 2012
    Sanderson State Bank
    En Espanol
    SandersonTX11568The Pecos County State BankDecember 12, 2008September 4, 2012
    Haven Trust BankDuluthGA35379Branch Banking & Trust Company, (BB&T)December 12, 2008August 16, 2012
    First Georgia Community BankJacksonGA34301United BankDecember 5, 2008August 16, 2012
    PFF Bank & TrustPomonaCA28344U.S. Bank, N.A.November 21, 2008January 4, 2013
    Downey Savings & LoanNewport BeachCA30968U.S. Bank, N.A.November 21, 2008January 4, 2013
    Community BankLoganvilleGA16490Bank of EssexNovember 21, 2008September 4, 2012
    Security Pacific BankLos AngelesCA23595Pacific Western BankNovember 7, 2008August 28, 2012
    Franklin Bank, SSBHoustonTX26870Prosperity BankNovember 7, 2008August 16, 2012
    Freedom BankBradentonFL57930Fifth Third BankOctober 31, 2008August 16, 2012
    Alpha Bank & TrustAlpharettaGA58241Stearns Bank, N.A.October 24, 2008August 16, 2012
    Meridian BankEldredIL13789National BankOctober 10, 2008May 31, 2012
    Main Street BankNorthvilleMI57654Monroe Bank & TrustOctober 10, 2008August 16, 2012
    Washington Mutual Bank
    (Including its subsidiary Washington Mutual Bank FSB)
    HendersonNV32633JP Morgan Chase BankSeptember 25, 2008August 16, 2012
    AmeribankNorthforkWV6782The Citizens Savings Bank

    Pioneer Community Bank, Inc.
    September 19, 2008August 16, 2012
    Silver State Bank
    En Espanol
    HendersonNV34194Nevada State BankSeptember 5, 2008August 16, 2012
    Integrity BankAlpharettaGA35469Regions BankAugust 29, 2008August 16, 2012
    Columbian Bank & TrustTopekaKS22728Citizens Bank & TrustAugust 22, 2008August 16, 2012
    First Priority BankBradentonFL57523SunTrust BankAugust 1, 2008August 16, 2012
    First Heritage Bank, NANewport BeachCA57961Mutual of Omaha BankJuly 25, 2008August 28, 2012
    First National Bank of NevadaRenoNV27011Mutual of Omaha BankJuly 25, 2008August 28, 2012
    IndyMac BankPasadenaCA29730OneWest Bank, FSBJuly 11, 2008August 28, 2012
    First Integrity Bank, NAStaplesMN12736First International Bank and TrustMay 30, 2008August 28, 2012
    ANB Financial, NABentonvilleAR33901Pulaski Bank and Trust CompanyMay 9, 2008August 28, 2012
    Hume BankHumeMO1971Security BankMarch 7, 2008August 28, 2012
    Douglass National BankKansas CityMO24660Liberty Bank and Trust CompanyJanuary 25, 2008October 26, 2012
    Miami Valley BankLakeviewOH16848The Citizens Banking CompanyOctober 4, 2007August 28, 2012
    NetBankAlpharettaGA32575ING DIRECTSeptember 28, 2007August 28, 2012
    Metropolitan Savings BankPittsburghPA35353Allegheny Valley Bank of PittsburghFebruary 2, 2007October 27, 2010
    Bank of EphraimEphraimUT1249Far West BankJune 25, 2004April 9, 2008
    Reliance BankWhite PlainsNY26778Union State BankMarch 19, 2004April 9, 2008
    Guaranty National Bank of TallahasseeTallahasseeFL26838Hancock Bank of FloridaMarch 12, 2004June 5, 2012
    Dollar Savings BankNewarkNJ31330No AcquirerFebruary 14, 2004April 9, 2008
    Pulaski Savings BankPhiladelphiaPA27203Earthstar BankNovember 14, 2003July 22, 2005
    First National Bank of BlanchardvilleBlanchardvilleWI11639The Park BankMay 9, 2003June 5, 2012
    Southern Pacific BankTorranceCA27094Beal BankFebruary 7, 2003October 20, 2008
    Farmers Bank of CheneyvilleCheneyvilleLA16445Sabine State Bank & TrustDecember 17, 2002October 20, 2004
    Bank of AlamoAlamoTN9961No AcquirerNovember 8, 2002March 18, 2005
    AmTrade International Bank
    En Espanol
    AtlantaGA33784No AcquirerSeptember 30, 2002September 11, 2006
    Universal Federal Savings BankChicagoIL29355Chicago Community BankJune 27, 2002April 9, 2008
    Connecticut Bank of CommerceStamfordCT19183Hudson United BankJune 26, 2002February 14, 2012
    New Century BankShelby TownshipMI34979No AcquirerMarch 28, 2002March 18, 2005
    Net 1st National BankBoca RatonFL26652Bank Leumi USAMarch 1, 2002April 9, 2008
    NextBank, NAPhoenixAZ22314No AcquirerFebruary 7, 2002August 27, 2010
    Oakwood Deposit Bank Co.OakwoodOH8966The State Bank & Trust CompanyFebruary 1, 2002October 25, 2012
    Bank of Sierra BlancaSierra BlancaTX22002The Security State Bank of PecosJanuary 18, 2002November 6, 2003
    Hamilton Bank, NA
    En Espanol
    MiamiFL24382Israel Discount Bank of New YorkJanuary 11, 2002June 5, 2012
    Sinclair National BankGravetteAR34248Delta Trust & BankSeptember 7, 2001February 10, 2004
    Superior Bank, FSBHinsdaleIL32646Superior Federal, FSBJuly 27, 2001June 5, 2012
    Malta National BankMaltaOH6629North Valley BankMay 3, 2001November 18, 2002
    First Alliance Bank & Trust Co.ManchesterNH34264Southern New Hampshire Bank & TrustFebruary 2, 2001February 18, 2003
    National State Bank of MetropolisMetropolisIL3815Banterra Bank of MarionDecember 14, 2000March 17, 2005
    Bank of HonoluluHonoluluHI21029Bank of the OrientOctober 13, 2000March 17, 2005
    +
    + +
    + + + + + + + + + + + + + + + + + + diff --git a/pandas/tests/io/data/html/spam.html b/pandas/tests/io/data/html/spam.html new file mode 100644 index 00000000..a8e445ff --- /dev/null +++ b/pandas/tests/io/data/html/spam.html @@ -0,0 +1,797 @@ + + + + + + + + + + + + + Show Foods + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + National Nutrient Database + + + + + + + + + +
    + + + +
    +
    + National Nutrient Database for Standard Reference
    Release 25 +
    +
    + + + + + + + +
    Basic Report
    + +
    +

    Nutrient data for 07908, Luncheon meat, pork with ham, minced, canned, includes SPAM (Hormel) + + +

    + + + +
    + + +
    +
    +
    Modifying household measures
    +
    + +
    + +
    +
    +
    + + + + +
    + + + + + + + + + + + +
    + + +

    Nutrient values and weights are for edible portion

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Help
    NutrientUnit
    Value per 100.0g
    + +
    + + oz 1 NLEA serving +
    56g + +
    Proximates
    Water + + + g51.7028.95
    Energy + + + kcal315176
    Protein + + + g13.407.50
    Total lipid (fat) + + + g26.6014.90
    Carbohydrate, by difference + + + g4.602.58
    Fiber, total dietary + + + g0.00.0
    Sugars, total + + + g0.000.00
    Minerals
    Calcium, Ca + + + mg00
    Iron, Fe + + + mg0.640.36
    Magnesium, Mg + + + mg148
    Phosphorus, P + + + mg15185
    Potassium, K + + + mg409229
    Sodium, Na + + + mg1411790
    Zinc, Zn + + + mg1.590.89
    Vitamins
    Vitamin C, total ascorbic acid + + + mg0.00.0
    Thiamin + + + mg0.3170.178
    Riboflavin + + + mg0.1760.099
    Niacin + + + mg3.5301.977
    Vitamin B-6 + + + mg0.2180.122
    Folate, DFE + + + µg32
    Vitamin B-12 + + + µg0.450.25
    Vitamin A, RAE + + + µg00
    Vitamin A, IU + + + IU00
    Vitamin E (alpha-tocopherol) + + + mg0.420.24
    Vitamin D (D2 + D3) + + + µg0.60.3
    Vitamin D + + + IU2615
    Vitamin K (phylloquinone) + + + µg0.00.0
    Lipids
    Fatty acids, total saturated + + + g9.9875.593
    Fatty acids, total monounsaturated + + + g13.5057.563
    Fatty acids, total polyunsaturated + + + g2.0191.131
    Cholesterol + + + mg7140
    Other
    Caffeine + + + mg00
    + +
    +
    + + + + + +
    + +
    + + + + +
    + + + \ No newline at end of file diff --git a/pandas/tests/io/data/html/valid_markup.html b/pandas/tests/io/data/html/valid_markup.html new file mode 100644 index 00000000..0130e9ed --- /dev/null +++ b/pandas/tests/io/data/html/valid_markup.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    067
    140
    294
    370
    + + + + + + + + + + + + + + + + + + + + +
    ab
    067
    140
    + + diff --git a/pandas/tests/io/data/html/wikipedia_states.html b/pandas/tests/io/data/html/wikipedia_states.html new file mode 100644 index 00000000..f1a4c4d2 --- /dev/null +++ b/pandas/tests/io/data/html/wikipedia_states.html @@ -0,0 +1,1756 @@ + + + + +List of U.S. states and territories by area - Wikipedia, the free encyclopedia + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    + + + +
    +

    List of U.S. states and territories by area

    +
    +
    From Wikipedia, the free encyclopedia
    +
    +
    + Jump to: navigation, search +
    +
    +
    +
    +
    +Image shows the 50 states by area. Check the legend for more details.
    +
    +
    +

    This is a complete list of the states of the United States and its major territories ordered by total area, land area, and water area. The water area figures include inland, coastal, Great Lakes, and territorial waters. Glaciers and intermittent water features are counted as land area.[1]

    +

    + +

    +
    +

    Area by state/territory[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    State/territoryRanksq mikm²Ranksq mikm² % landsq mikm² % water
     Alaska!C 1665,384.041,723,337!C 1570,640.951,477,95385.76%94,743.10245,38414.24%
     Texas!B9993068528194 2268,596.46695,662!B9993068528194 2261,231.71676,58797.26%7,364.7519,0752.74%
     California!B9989013877113 3163,694.74423,967!B9989013877113 3155,779.22403,46695.16%7,915.5220,5014.84%
     Montana!B9986137056388 4147,039.71380,831!B9986137056388 4145,545.80376,96298.98%1,493.913,8691.02%
     New Mexico!B9983905620875 5121,590.30314,917!B9983905620875 5121,298.15314,16199.76%292.157570.24%
     Arizona!B9982082405307 6113,990.30295,234!B9982082405307 6113,594.08294,20799.65%396.221,0260.35%
     Nevada!B9980540898509 7110,571.82286,380!B9980540898509 7109,781.18284,33299.28%790.652,0480.72%
     Colorado!B9979205584583 8104,093.67269,601!B9979205584583 8103,641.89268,43199.57%451.781,1700.43%
     Oregon!B9978027754226 998,378.54254,799!B9976974149070 1095,988.01248,60897.57%2,390.536,1912.43%
     Wyoming!B9976974149070 1097,813.01253,335!B9978027754226 997,093.14251,47099.26%719.871,8640.74%
     Michigan!B9976021047272 1196,713.51250,487!B9969089575466 2256,538.90146,43558.46%40,174.61104,05241.54%
     Minnesota!B9975150933502 1286,935.83225,163!B9973609426703 1479,626.74206,23291.59%7,309.0918,9308.41%
     Utah!B9974350506425 1384,896.88219,882!B9975150933502 1282,169.62212,81896.79%2,727.267,0643.21%
     Idaho!B9973609426703 1483,568.95216,443!B9976021047272 1182,643.12214,04598.89%925.832,3981.11%
     Kansas!B9972919497988 1582,278.36213,100!B9974350506425 1381,758.72211,75499.37%519.641,3460.63%
     Nebraska!B9972274112777 1677,347.81200,330!B9972919497988 1576,824.17198,97499.32%523.641,3560.68%
     South Dakota!B9971667866559 1777,115.68199,729!B9972274112777 1675,811.00196,35098.31%1,304.683,3791.69%
     Washington!B9971096282421 1871,297.95184,661!B9970042677264 2066,455.52172,11993.21%4,842.4312,5426.79%
     North Dakota!B9970555610208 1970,698.32183,108!B9971667866559 1769,000.80178,71197.60%1,697.524,3972.40%
     Oklahoma!B9970042677264 2069,898.87181,037!B9970555610208 1968,594.92177,66098.13%1,303.953,3771.87%
     Missouri!B9969554775622 2169,706.99180,540!B9971096282421 1868,741.52178,04098.61%965.472,5011.39%
     Florida!B9969089575466 2265,757.70170,312!B9967419034619 2653,624.76138,88781.55%12,132.9431,42418.45%
     Wisconsin!B9968645057840 2365,496.38169,635!B9967811241751 2554,157.80140,26882.69%11,338.5729,36717.31%
     Georgia!B9968219461696 2459,425.15153,910!B9969554775622 2157,513.49148,95996.78%1,911.664,9513.22%
     Illinois!B9967811241751 2557,913.55149,995!B9968219461696 2455,518.93143,79395.87%2,394.626,2024.13%
     Iowa!B9967419034619 2656,272.81145,746!B9968645057840 2355,857.13144,66999.26%415.681,0770.74%
     New York!B9967041631339 2754,554.98141,297!B9965988026183 3047,126.40122,05786.38%7,428.5819,24013.62%
     North Carolina!B9966677954898 2853,819.16139,391!B9966327041700 2948,617.91125,92090.34%5,201.2513,4719.66%
     Arkansas!B9966327041700 2953,178.55137,732!B9967041631339 2752,035.48134,77197.85%1,143.072,9612.15%
     Alabama!B9965988026183 3052,420.07135,767!B9966677954898 2850,645.33131,17196.61%1,774.744,5973.39%
     Louisiana!B9965660127955 3152,378.13135,659!B9965034924385 3343,203.90111,89882.48%9,174.2323,76117.52%
     Mississippi!B9965342640972 3248,431.78125,438!B9965660127955 3146,923.27121,53196.89%1,508.513,9073.11%
     Pennsylvania!B9965034924385 3346,054.35119,280!B9965342640972 3244,742.70115,88397.15%1,311.643,3972.85%
     Ohio!B9964736394753 3444,825.58116,098!B9964446519385 3540,860.69105,82991.15%3,964.8910,2698.85%
     Virginia!B9964446519385 3542,774.93110,787!B9964164810615 3639,490.09102,27992.32%3,284.848,5087.68%
     Tennessee!B9964164810615 3642,144.25109,153!B9964736394753 3441,234.90106,79897.84%909.362,3552.16%
     Kentucky!B9963890820873 3740,407.80104,656!B9963890820873 3739,486.34102,26997.72%921.462,3872.28%
     Indiana!B9963624138402 3836,419.5594,326!B9963624138402 3835,826.1192,78998.37%593.441,5371.63%
     Maine!B9963364383538 3935,379.7491,633!B9963364383538 3930,842.9279,88387.18%4,536.8211,75012.82%
     South Carolina!B9963111205458 4032,020.4982,933!B9963111205458 4030,060.7077,85793.88%1,959.795,0766.12%
     West Virginia!B9962864279332 4124,230.0462,756!B9962864279332 4124,038.2162,25999.21%191.834970.79%
     Maryland!B9962623303817 4212,405.9332,131!B9962623303817 429,707.2425,14278.25%2,698.696,99021.75%
     Hawaii!B9962387998843 4310,931.7228,313!B9961498523982 476,422.6316,63558.75%4,509.0911,67841.25%
     Massachusetts!B9962158103660 4410,554.3927,336!B9961933375102 457,800.0620,20273.90%2,754.337,13426.10%
     Vermont!B9961933375102 459,616.3624,906!B9962387998843 439,216.6623,87195.84%399.711,0354.16%
     New Hampshire!B9961713586035 469,349.1624,214!B9962158103660 448,952.6523,18795.76%396.511,0274.24%
     New Jersey!B9961498523982 478,722.5822,591!B9961713586035 467,354.2219,04784.31%1,368.363,54415.69%
     Connecticut!B9961287989890 485,543.4114,357!B9961287989890 484,842.3612,54287.35%701.061,81612.65%
     Delaware!B9961081797018 492,488.726,446!B9961081797018 491,948.545,04778.29%540.181,39921.71%
     Rhode Island!B9960879769945 501,544.894,001!B9960879769945 501,033.812,67866.92%511.071,32433.08%
     District of Columbia68.3417761.0515889.33%7.291910.67%
     Puerto Rico5,324.8413,7913,423.788,86864.30%1,901.074,92435.70%
     Northern Mariana Islands1,975.575,117182.334729.23%1,793.244,64490.77%
     United States Virgin Islands732.931,898134.3234818.33%598.611,55081.67%
     American Samoa581.051,50576.4619813.16%504.601,30786.84%
     Guam570.621,478209.8054336.77%360.8293563.23%
    United States Minor Outlying Islands[3][a]16.04116.041————
    United States Contiguous United StatesTotal3,120,426.478,081,8672,954,841.427,653,00494.69%165,584.6428,8625.31%
    United States 50 states and D.C.Total3,796,742.239,833,5173,531,905.439,147,59393.02%264,836.79685,9246.98%
    United States All U.S. territoryTotal3,805,943.269,857,3483,535,948.129,158,06492.91%269,995.13699,2847.09%
    +

    Area by division[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    DivisionRanksq mikm²Ranksq mikm² % landRanksq mikm² % water
    East North Central!B9983905620875 5301,368.57780,541!B9982082405307 6242,902.44629,114!B9993068528194 258,466.13151,427
    East South Central!B9980540898509 7183,403.89475,014!B9980540898509 7178,289.83461,769!B9978027754226 95,114.6013,247
    Middle Atlantic!B9979205584583 8109,331.89283,168!B9979205584583 899,223.32256,987!B9982082405307 610,108.5726,181
    Mountain!B9993068528194 2863,564.632,236,622!B9993068528194 2855,766.982,216,426!B9979205584583 87,797.6520,196
    New England!B9978027754226 971,987.96186,448!B9978027754226 962,668.46162,311!B9980540898509 79,299.5024,086
    Pacific!C 11,009,687.002,615,077!C 1895,286.332,318,781!C 1114,400.67296,296
    South Atlantic!B9982082405307 6292,990.46758,842!B9983905620875 5265,061.97686,507!B9989013877113 327,928.4972,334
    West North Central!B9989013877113 3520,355.801,347,715!B9989013877113 3507,620.081,314,730!B9983905620875 512,735.7232,985
    West South Central!B9986137056388 4444,052.011,150,089!B9986137056388 4425,066.011,100,916!B9986137056388 418,986.0049,174
    +

    Area by region[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    RegionRanksq mikm²Ranksq mikm² % landRanksq mikm² % water
    Midwest!B9989013877113 3821,724.382,128,256!B9989013877113 3750,522.521,943,844!B9993068528194 271,201.86184,412
    Northeast!B9986137056388 4181,319.85469,616!B9986137056388 4161,911.78419,350!B9986137056388 419,408.0750,267
    South!B9993068528194 2920,446.372,383,945!B9993068528194 2868,417.822,249,192!B9989013877113 352,028.55134,753
    West!C 11,873,251.634,851,699!C 11,751,053.314,535,207!C 1122,198.32316,492
    + +

    See also[edit]

    +
    + + + + + +
    Portal iconUnited States portal
    +
    + +

    Notes[edit]

    +
    +
      +
    1. ^ Areas were not published in the 2010 census, unlike previous years, as the U.S. Census Bureau no longer collects data on the Minor Outlying Islands.[2]
    2. +
    +
    +

    References[edit]

    +
    +
      +
    1. ^ Census 2000 Geographic Terms and Concepts, Census 2000 Geography Glossary, U.S. Census Bureau. Accessed 2007-07-10
    2. +
    3. ^ a b c d e f g h i j "United States Summary: 2010, Population and Housing Unit Counts, 2010 Census of Population and Housing" (PDF). United States Census Bureau. September 2012. pp. V–2, 1 & 41 (Tables 1 & 18). Retrieved February 7, 2014. 
    4. +
    5. ^ "United States Summary: 2010, Population and Housing Unit Counts, 2000 Census of Population and Housing" (PDF). United States Census Bureau. April 2004. p. 1 (Table 1). Retrieved February 10, 2014. 
    6. +
    +
    +

    External links[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + + + + + + + + diff --git a/pandas/tests/io/data/html_encoding/chinese_utf-16.html b/pandas/tests/io/data/html_encoding/chinese_utf-16.html new file mode 100644 index 0000000000000000000000000000000000000000..59fffc0d19c5731ad445d2b1c15b68ef5337e8ab GIT binary patch literal 824 zcmb7DNeaS15UjJW7`%%{@Z$CY?}8T@jT%7>PCO`nKtCYfe3cLJ18TLyXc7_z2}wHD z)z#HK7=wp$7w}W@1jnIM7DQ6-|#YmLWb2f zUtQNHJC#f-+X^{P+JWG|+W$Ps&J-%~aXUzC*X$PE^gdpG0#{e=t9TQx&i1>VuBZFi k@2i~mzprND31xjfhONC?4{`%n6K0~fk)=czo17f}C$`Vc17Z4c6m zM^#4s{23V;S>Im~yAg3O8gVSH#hECA@j%>(Q$d{fJ+UG-+Go-8L@*bhQHyoVCGK_3 z^^Om*8-Xu=`=TdGi*viw7!7*7YF$wbtjC)%d9dcA@V=ZAaXcRc@2diTbXVd++=^{c z1fGtQ4|8@zSKdYOjPqh1xiel9csQ2#T@=sM^Vju#Q4h{;T92%w4t4$W_rJ~xE&FzF zah{w{?O7B9>$}#q#!qWq(?#m!KEu1GOYk&|I8a07y;?KP*;zZUUtV`oEnI_1nsN1- zr6+2SyubJ7h&S=^@;~*O$8Q<++Ej1kb=-He_QPjCV(T#C_2{>sYq|`-ypOKl$=}D_ P-K=R;8+88fa=(87IBImA literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/html_encoding/chinese_utf-8.html b/pandas/tests/io/data/html_encoding/chinese_utf-8.html new file mode 100644 index 00000000..ad1ca33a --- /dev/null +++ b/pandas/tests/io/data/html_encoding/chinese_utf-8.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    0 漊煻獌 漊煻獌
    1 袟袘觕 袟袘觕
    2 埱娵徖 埱娵徖
    \ No newline at end of file diff --git a/pandas/tests/io/data/html_encoding/letz_latin1.html b/pandas/tests/io/data/html_encoding/letz_latin1.html new file mode 100644 index 00000000..7b4b99cb --- /dev/null +++ b/pandas/tests/io/data/html_encoding/letz_latin1.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    0 Gét Gét
    1
    2 iech iech
    \ No newline at end of file diff --git a/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 b/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 new file mode 100644 index 0000000000000000000000000000000000000000..8cb4eda470398cf22278829cc91b531dbdca661a GIT binary patch literal 106271 zcmeI5U2I%O6~|}SakAMsjgyGth8AuMt=$%!wbQ1CN@&;drZzaX@j7W*R5qLJb>eot zUU}DPT2z`)DpFd&2vJ29rFq~XKpzZJC5sm$=!#~0oR=qL%x z{X_PHxvSJ^8$K5_9|CH<;(IMNzS-vQG?<9@Z2ZV@Jg!Jm@|UZ)L8Pa%oG^`;`w z8;=cfgAdwPi>vxF$jr5K5lLD*lR531F z%9mpO@_YJYi3H2jPoqcvk>SDp+|ZId=N?iA;=_r)!2vDLLr3!OXpc~c<-7DXKqHOi zhaMY=b@#^;$-RTa`(h)>YUw-MJ8ZhGI#2&NZMFl>S*OL+V&imZvyrpOOaSMbhJb&Z zI=1CGYrh3VA$2(8QXn~&l#lwB=(W*ikzV|o7#QpckOqt1g{;0xei0iU^3`MUoNq1( zv3Gu6C42N~vv*C}-sMu+bSj@LJdPRbQq4%&le!8`s+aEaeUpAHn|4nm zk4$9pI$EdyoW5_uZJAt0xUDY|PPA&#)^M&fT*&PTr`okZlZxtVfMx);`^Qt6DK}rr zkmr|e^ds%i`6XIot>ILnwVTc+8%izpGCZ;q*`%pk>V!*m<9r-{K5czew+vOto1v*( zsa~6Pz0#>difc7Bp4BZ~dcE4HRIb072eYYca?;I}&4U!LV*5oOHfWL=_qfY_!t)>frz5J*l1{Gv^2XglircD%>BQxE^+@xlGMm(l z&970{b1XYL!4WJ!O+S)RIYnx!$2r$86?+E>E^sXRrJ5^y1r@KMfnPke*~Iyxx7lE!s( zYLQ*xd^@i%xNgn1g0h}Ja|=b%F>lxP4eFehyOqMcrqH^iC2F6%ykl0>@UNcEo2J`} z@ffUic+;>P_X0O5*kI6yY&#o*!9zTL==d!?B+2rK#+0cQr@7k{obl0<35H8ZexAjm(EU7y9QT?|Rxh-t;P%(v{?m9r3P* zd1)a1)7kzI7vqul&39H-F@2n%eaQ4ohPZ3T7 zhBQsxn(XR%~juSvKZ z-ei$-XpV3TW8?1b2Pl#bfBm{^*RMEp`|H=8tgn9Eb=Pmb)|-Lh!ABER?`rxY>SmXnrgtoiVx!AP(eRoe&L98+AOHd&00K860cU3XukPQsskxVvw_R@7 zF6$=ci|4hmK78@^F;w~G?KWUpykvtb2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfWRk1AfOsmfQoLh#W$;F z{V93}$0{|i&q|F9$K#45ulf#qTg^L0J|q#i-c$s7Z=|0TSZ$}DyXi;D>*M;k zdFRMPwvZajx|7c6#H3S59nQGU&h+R`{fXq8R2!W+oyl)ce=L!pFz2^k$J2?9k-mYR zI6)}MbLk;Sv8ywK`4J0S!ceF=zx`4Vx$F=B?qE?x4*nF3f%mvL2GY*4Q$Kmwd zMm}ouaUL+?db!%KO=_8kgF&MW@S8<=n}`nTaD$<;R|N*$!4tuM}&g zmrGcu_axQ#tEV>xwN$=IpU;dPaZS<1^rj?7hlR6JmD3+NHkGwiubtjxN_Ccg-Kgt7 zkt<}}<8DSLrc*V3Jm>IHN^j~v>87WS6vif~%k@z$y|g2sHW}Tq5doR!f+Arvvh9`S zpR`}}_53|Xe%6XNQm=DNu)$=%iQ38GfSt#J`hGy=k4=q^W?ZWytRI{%pwjMWCRK2C z8qP9gSEvoa2C0HBkT7cX+Gca$$U9zqb$>}8%k_N|*Ml>qLr>SS;o;b0CT#3jqt#}m zdfP?P-1nc`Y{Z!_*Op^F+i)6RiZX3EV|d@0=qJ+9&hM|%49nfg>7&jwLIg>{}YHsgB7L$TeI zowuBN$AdKwRF}HPgHF0!=^vHyJYR=xda=DfP>8P9cGjq9>!dt!TN30Sgc_4i*ycR{88+e~+) z|Atf9qi$)qU3Y(-F7{W8Dc`OSbbwA`^jE)sf>N|{WGO=RDIZ^ECJ=7k=fe#oFWjVW zma|{Hc5t7Y9!q7NN$#HIu!D<@qvxq@d{z(4=h|qdDjD8J44&ZBg2D_dRA=3EOk5NdDPJ%I^8c8O(GUv z&FV?1p?hn~Nhv44R9mjsX5YN4wp^E$ySTHq+``(fUP$z$*>;{hXP3rL`96K+`6H?; zE$ck*^Qc32r1kx&e8Cx>c-qOE2h-#F!PL25jF;^9O9H>1%l?X7rT#g`lpy_5ub%6N zLtW46&y`#P>G`FoZ49N6WGklURHQx(F2(POcq2~9LU!DK5CfC4AD*?>aeFoC`-dez zn@=%9`bYSQT)kK6k7H%}WJW?I{n$!{NclzmkcQd{R9iRu_xZAJ#vOOF1-&?-wEle- zDTnuV`6%W~pG{{v98K#Qsjt~TtT$hNAI1B1vi_HAROeoPnt5GnX75?e5_?ph0x=)} z0w4eaAOHeuoj|qq!o5`*cB#sGp|~Kek`B=z00JNY0w4eaYnOod_xk<)O8d*}X%8c$ zdisV(*oBApLA?7LzTF1esi^E{8|gQaOZBsz88q@yi#GZl6PA6@wM!f|UkZV0ezt8X zqX)7-c;1Gub-&qP7s)qj{bYx%KxrSZ{JrKjk}e7b*Z!fgqHic?QQ8MDe~$$J zQ052!P}Y-r{ozDkuM8OtoaIdDsQ0~v>d7}7q$7F`F3qNE4J#F;`<2d9n-g_#w9zn)c0`<`_cP8 z&ij5E{-Ia3BGB4MFOQaMJk;M`rk^XlAA^5rO@DXzhmw^le@|{qm%MkZW>;2jzJY%z z_dvCEf%t<;|44ZC`w9M`JORKz6#k+0{l3+w-@bX*>+laXt7#tV=J(*t6Ns$0yncu` zlYi*xq$Ukso~SL?C3Shcww#Eq_Eq*NBDVA7IeR&uZyairw7%&7x6J;b=k4#;OTW~s z=VtGm>$#|ZX!BFHG4f2weYR41&Uy2(*8D?Hm+6xk31W}E`xQ5nf9T_;JIl_u{YplN zy=4_r>`_?)M1TMYfB*=900^vk0`>Zb=Bun1WF5QafuI2(00JNY0w4eaWdy`O)c^gG z%k3XJblmJqEY&~s?PrX9)YkZreeqV@2Q6y}B5ouC)%-(yUp9In`^{Hu_*(ay{mdla zL-zNkhO9vS{-NFQ4<&Z&muL1Kqnc!CchY=gT!}0ELm5ytKM?sI3-JSyu;^>ccMaKD zO?hSB&vDkP{r#b*zhBZA+q4CjHXAyBvbG%SAs=sBP9{`-7CX}QquTjJZPM8nYs;0F zusSi-{DbO$9~aRTuJpbI{-G7egWfu+=RKa+|HAJtU4MT?tM(WCL*XClw^s6fz3^+h zcE2{S9mMyCYCm~=f2i+!_hi#}mr4{FPm*LZbe zWS?T_6;nTX&R)*v8;6=AtzRks&@1-uDoMZ8tLO4wz1DoqZ6H05{K7Vd(nzuj^Yy~# zbMJiZolh70o?Ov_wdNmsu1ufINRagl{6nQ@&Gna6{5`}TEtEzG1V8`;KmY_l;6@-& zuYc(Os;n1^3(^|_3fceyAOHd&00JPefPnai`v0!@wfcvi2>kc$jkGZpT6gxtf4}kW z?b+s2BYzuxA<(eCsU>n~?qszOm!7_;F1)L@aQXmmZ2kG{<-zeS8@9Ij(w(||D$q>( zfGkLV+xJd&GP>)`+~iM=?|SJUmp?ky5^c!7@(m~OyScmm^Suij&o#FHX7g)@QV%B6n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^ zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb% zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cHh4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY` zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZwdvY+W| zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6 zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc);hzIeH3Ha-vc6q4TI}Q6D zT{k-JCp7Lj+=Td`d#Q${yzv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 b/pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 new file mode 100644 index 0000000000000000000000000000000000000000..50fbee0f5018b48c8df5a1f4c82c2fd7092a959b GIT binary patch literal 4480 zcmeHJ3rv$&6#mR(Z8NTH2IXEg%S;wf~F~j!8Z5*+;opfXBwVK8GLLLfNSOY?u(`OM zipQUf`;^6CV9$|6d_v;Ouy!MZW@`}8(~mvjQ~it?ZM>qRjHio$vMZ~S$`x78GXU$FuI6ZmBQ+TnSn;QS%d{5yX` zzX*#D!yN>V-3Z`I$^c(KPd$eT-r3>2CE`uy@k@q%>tG~oy5TIr3rE5Xogh&R2BM8G zD=E)!MEy#t9m0D<$6RM%t~$IwM$fH!KbcH^@c|S32BnbP@K!2tq@fkF@hEz*^W*d# zf1Q*D|LdOn6MKq(^6o;a8^3d?F&${n?uF);hEx#z;OsP?eS@${7wpF4cfe;?gb8^T zqj1Q`J*$S+5A$So=Tij_(X0#W(_%9kq12^2bI%{0;P7qng9+szRIFhR4|mC6#hM|O zvxOX8=jFQlr%Mq}a5Sg3zZq`dB~j-zKSb<$`$lh`7`ck;KV&c`ka1jcXtYBa{6=!% zI&o@|n!{w-*|ws^WZAmfh*ofNesZoY91{$>c#5OUdO&3TU7Y=x5*S&ed*PLLVClMA z^ZCmA5aBk!-`Z9Qn`;}y%paej)zUt8e)uRFSBZOF@*APmwN|@VT?N;6ShurJHGsRC z5*|Uh3A7s=@{wB&+%Dv3dJt_{pafyc zM8$cI3K~{Qr3zLHG>}#_nSA~T#ZlI?(}qQeSC%*~VGn@6d8X26OaPYK*{)rIj}VwT zu=$4-Xoz;gs%PXW5?rtq$s#*p(Vi+6OHzln(t-ny#mQl*m8J0nBM*gYb7s@COVQAh z_>XAYL~!M5Jbzx027V|%oDzPz8%}yl&R*VFhh{9!mNXUL17C%*blJ3VR5$!pfbmHl zoDO#1;HYhaRpavXm6;uID8QHHwMC5tt!B&bTPab-K}6NEgowQU;DVT-ekeXOvu&dM z5xj5J;do-X7VXXNu1wg%E`(X<%&M0vpq{+k zqM+;+gbZ~$pE55(2W)rI%xn74;MXn|nuH3}TlCb%^5A8E!viS|LxtvtPr;zD))8Oc4%0r-?jQ1FPuNA2-g%sYlFM)|?wzLQ0 zG32fiT>i$H4+Zv%>K3*LQDUJ=o>5%_yPL9XexZ*-MO=rC5?mo3@g2(7MuvY@pj_w-EUTdh=r9 z)teuKBcH<)N4|m1`!k)*(ggz^s_ZtM*_nCs-pt$Id-L9WF0HKIn0hcJ;4w{+5jlKS z#V>x29#W5LtMT{zVsQP5>mMn4F~qolv3uN~WB&7L>Nw&4My*&Bl2Y+kH4ls9ae6aV zM^axa4U~#j6*k=DT`QCxGtQJU+S~M|@K{mY3 z2iZzK63HS;|FUe=>#`r{$K+qpzp7Q`j-oH*7iA0RpJ8oC zO9T4qX2W_>F4kMCmD+2o(TbbDlrQkSS>zeD(;OSzDGR*FPIJ@9ZeZ`!O3Le^fvBAl zDuw8=|5kdiKe_=R7ivJ@hrRCB?qch}-rIKq-FEv<2bu%C&qHI#Q(v#F7?O(ND$EQK zDLYx@b~Un&A*`--mpY$IbEF3FMet~UG%Eg%{GK6K2L7&Rdu=;t^$&Xvt~csew|%<< zBUz#5&4lM2IDX)EJ@Ur@kpXVr9^_4g z=K0Rf{+{hiJ~PCN7p}g4^Nu8z&nZ=dGA+3SDe!}OjN7SH3OShU`O`)-OOu!}HN-96 z1@u00p1;%jAza0si&m{>ZOS>C{`_-sDgPKHX2m?#bWUf^oYPW&N`**Aek8-IlwA}V zfK6es%sH-94Xf)JLth-FL+D_pg&bPv@*rUv09!L<5Ni z5)C98NHmaW;BROE?jhhY42u7#)N!kh<0vj(En6Fv8pW02`w(xx(}Va`-)~G~T-^6< zFX(jxr;TC+gsUWjqU3ma5V4wK*Y$Abus9r|dc8iP_b7dcjq_F9+~c^dO3RXbRKB6( z^1>g*mA|eTL(5NOj}m@=cD*IU(kCj%r-4zr+89lP{bHG|%9~M0|UAAri#q zeH!1>h?AoMPAJnbC>>*)P_}WlPRsjEm>SJ58AX4bmL`+%lRpF-yFTK(tPL8fAM`8-j9Xc$vJuc<^2ilH69Pn QKb7ZS1rKo3G-~J0Z%r>E&Hw-a literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..540251d9fae86f0d691e0e7748e0249af55e1d54 GIT binary patch literal 1064200 zcmeI(&2HSr0RUjGWIJ0~t<|PMe-F77FnSOyqXB#gnvEq#u4Kv3kD&l9l$Eqe)mSTO zwMyfnhaCEoyg?tK$38-jKK9s0=rM25a{k2ax{~YGMvUa6AlST<`~L%yZ*OvcVoFOz}2huU)Xoqe~eVm#^c{!?yRrZ>qGp@^uhYFm;UDLTu%3D%yaxcoyj-c=Xr-uyH97X#_aOZq~GRW9f9%tw3O4MTz?md?#KHl zq2z`zl)#;bhr1u%*#4;d@U(Z*Ebk9{2VpU7pBvjgw|%yKePwmNRhQsgT&wn_kgB;) zZsg_rW5L-Tq$pWPAANs27iSyk32O z&^-uSrmK?f+Gx6udV`bw!+xrdLlG(E_Qveo8?!e@(|s+b>-BdJ_xAfA#GTc%u)pvq zD(O~c)0W2Vm6UEUI2_a^I>aB{&&hDGzdIb=&*gY#{k{EzkW;9KQ`vqiES~MBJpP*V z`bOMOyua$(Ww75HKC0t~{k<}hZqWPS^kH{U=W{LPl@GjJ|E+J=k?ZO66r9Z`>)b=s zkY5Nd=jDlsiI_t>xAZ1kThl2~zVXsR^{spsA@c8T|M=&PPbydEJr_G`Yn^xNG)?@A zpNltVe-igNUA-Q|^qju*#B;j#D}*s#sz<`HL+i@T{pb2aTLwcJX>qlC@=LW{LseYoF`DgRSDJhbY^aQs?;B0KJiNv}ymYp3(x$5P(%JO90&l%Q@zh|Cbj<$0-DsL?51eBwHzrMfB?`O^R zy%i(0s@GyYl>5D6Kh{XKaKZa}suv!gUFYOq>e=RcrsEi`7ebQrujjAT3z6)Ld47a` ztZ;qa%Gb5j5A9rj-iyc9%qHvSYls@EkI)AZ{!-F({AbsBGOa%AmzkLO`w#25acbqk zje6l(?%UCRqzB8XXQrvAFYcF_d&Yj5=J|Ow-Y@sp7dGctI~%L_(kYBTzYlkQ)eHS- zVVolL2bXcg5+EPMX2hM1TMR0t5(rIR)Oiy}BHq`1ygKp8oZ?Nj}|A zCU#?2#O&_jpf`JP`f#}49SpjUrnU#Y-NSx8e0RD#oH}l$SX1?2WZZf@u~hwjv$gZb zwd2X_ORezl4>`(S7^|E*zO*^9dTXg#YSmFLUtgNsZ2fj+W#wm~y%J|jKOOFT_KV{y zo2~ioa3_x{*5AQg`*L0fXGMSj0RjXF5FkK+009C7UZlWzGO3S4#ZnpYGfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly iK!5-N0t5&UAV7cs0RjXF5FkK+009C72oQL>z`p<-NVALp literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..3863d714a315b79de100e8446db766ec7dc2e82c GIT binary patch literal 72279 zcmeI#%}yIv6u|K@5O5q*2$iaeZdlb`Axyqlh89yS8G|rwQmN8(j3>yd8C$l4(p{JB zTXdNR=^~HPw`k9~cMcu~oF-KlRq`M3jOWh%IP;r(?_B?Ou(!WlS}z5<6^p?_uA=CK^3k@feV0jg`TSouciU}A*Zg1Ir>&_bedmlF9zS$Puh*6 zmdme0$MV-Ux8zLo@3><@Mw;e7{{D3Lt7g3&?jM~T?w*FT%5QJh(sFCA&ipuCO$XfS zg3A{C(>UE(_W9Cc{)GKUBQR^64%7MDApYxd+HM`~6{H7SzbmHw)yBzIdVZXX!{XuS z1#G^{drjw~|J!_5cJtjZO1jZFocui0+27Br_vC2L|HUY}h(qgjI$!#Evk zDm)wX$1d5G|H57O=hgnOR;o6(O6`ivS}6^;OOxS)QncwZl!K@4Sdcr?dH*u%zl_Jz z3ibW6CLfE()z_{>l~UBMbY$U26AKWyq zWfxaBnnW7a%ZtR_>AS~Ol~y%={~k=DBpk)V8}C6$cloBz^{y)sCuf6hFFAJ?WvA`x zlEFM$SNCu2EctSy(O@KzYW{3L7*9sMvq`r9EIIjpd>QLA)c2+Q%l6dmtsOh&#62X04muYFEll^ZWT? z_PY0pHY52r+|jS&7jCTMWv>je2u0+zMN}hq~-{<1o99xBh>7x!SdI zdguP`gCNq%WV^lG_RMlN7N)-gmZgqjU~%p@noX@YI~MfZ+STv6-QI33wDLCgZkDs( zAK7-dugbl9yV>{RPr~Tq(sq~RW9{~mNqioUGV%I(I|_Xn&96h!@@L+AZpNSX6Dy}P zv44*Gz^&ZP_k-_OQP_2BjeK1Cen`%DZp77jdt6s<;(T14@crWIWa7HsA199=Iuq;1 zixOioXq_BAYfJj9=i@rRT-V=yy1uq?IX%k7Tej!x*AsP8)lJ_GYzQEL00Iag@L>x? zwt%n&RVV$y_+j5RcZUE12q1s}0tg_000IagfB*srAbX?KG8I__OO;nmP@5^ zUy$DVj2`=h9CPVR9*}l=%rQSmfnC5o?_HDe!S! zu^_6Z%gtMQe)-H|c+B&A)Nu$tBlzo_?KtA9z`0KeU#0aI=Plwj;Rn5TTL2}m&yc3Y zi>tQhGGYcVl?FE2t!;ea9eQiF(idcBtc-*@Y>yi}@ju~hve%+rS47RMi1zKm-=H(t z1OpZy!4GA#d_x3Hk^Y;_R=*E;q`v|`rvJ9reS|NX)8p!{c--#wJKZfqkFLY?&3Xey zNPpiz2Rseww|57vhnwxbx8Ci2*BW?b{LA$w@m0;5ML(^Q!#yW~|K_KE*+_EY?`H8? zb7?eC^iyk#lca-d*Y8KO6TJ+Q)3*S*!v<5&UpG;-8BcO z$XtcHV2RjI8Z_Q6?Bk53E;WkJ=jt7+28>1c@*sNW1PXbYHfAZ5HcShXF1<3BhUB?xJMeQ>=-yu-Zl6!U#bo#BR_@lO^c%m zJP+`39E)IjhWx#29~(09+5$L}T(i#H+?*kGVIAzuJGF`>VNbK859w`?=G7;Uc8m5> zcHXsmz1A-9%PKcM+4O?kbrI|gl|1!cF zP5gGZjKbpiJblic^T{v#?5of17XKjlCqavS^MGDp)k2pbVP{~bftdzo8klKdrh%CT z-YE^t!#xP*cE!9k&)cqIEZTYh%S|m(%rRqS7!1 zi>-(^X5KNCmMacY^#dZ1{&)DX-3LLO9D7jRNoayN(pfqQvN)AVQ`Q1o_0}^Dt14ZO z4-T}-w0t)P9k52!|56)QKy%3C5V_q&am-Y}T7{KvoohEf@Dx4P4mh!S74oFC$Nz*qZRBsxGL^5$?yQN$RAu(W-IFaK> z?TH=`f{y_Q#E~O#;D|VK0zLsp)C)%x=9w3Jv+Jf2AP(d|RlK`1^UUu2JUct~^~EmcLy^aGwf1MEW7_MT-OYDf z{b#8@qYg?M8?Lppx@jMI7Y+(AW!k2?Y$t&ha>tD3nug#w{mf1DS1OLBe&km&L_j;ryd%gol^$aoj(8c zzWC$jIzoTLyfQrQ(nI6IoH9Ia=HNIpuMF?!<$ZDKb#ZoX8Gha?`{K?0+V5wWUxuIe z>b|)2d9(9OI(|I${mw5|tNQ)w*a-8z9tUUL*EBqSY9$@-V%)@$k1yMHA~_{b^v^%t zj~_PwYe2po_hzH*4Ek}Y-%fg7KYaYmKa9V;FFrgQhMzy&pXqUar5~@)3GH*f-(_vi zZd#GtHexKL_)dz6V9-?BVx{u>F0{)ysZvpWmOO z40`lGUVdIGO5G zBKL1d!T$!aZGZm=@2a}ZRG&80%QE7Ae)#;`vhL`6geHA;&Q!l-Rzis!^Dom6%DNMN z$xYVKu{FP7h)arKKKgYA(L zSg0h++TlCC*RkLJaKViWe<<7C*3Qmuw-t^%tDAN|)VdZ54!NE1zr&A;YivuXygoH2hB<}aLY*GgW;mUetlU(Z&Q+m(7_v9@IEkC^gszNyKJ8fK19x;$SnYGQP^P$<}(5pzfC&P05} zlxQl;%t@Kq)5c7Se7_?f@3T!^7dH}p|GX*`AAQ0+di?(E$FI9z{AJ01J$?_GN1n9(a95^NM?A!G yaKGdo)A!f*z1|0X{LtC|&GvfPb!FCV?=O^>SG&FT+D7Zu*rv;w<13lttA7Cz{LOp- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..4786eea077533929868e41b46764b039b2c10ed7 GIT binary patch literal 12336 zcmeHMeNa?Y6n_h->qiK+f`g8($`sZlj};NrX%~fp8G{uVL@QiAmW0>^m&HI0cIXfA zqx=I6kx-FI)GReKKTvcOO!*MeX&5C$bHIvELCPm9bnmOt22n~$K$dKQYv5b^=fiHu;7#t!dS{)M~p;!0+=G$WbGM1gqy3J-~tlM06 zE+`Fvf9P3cWyO1bFdE+gJdP{hRpEhWD^Og)r$=qqsAqdWOZ5)F_wfCb_#ko%&S&Ae zSr@?w{$WAyUT}@76ak6=MSvne5ugZI1OkLts_<$^IS@8u=;aN^E6+|q^GQ87v^RRy z(hxAmD~0@!tXss4F@U$Y@LeZhpacixC}=a2VeF-H%o7xh`&h=k4{%fw<~IX8 z2zef!_aI)yVnPcwc)xKB%R7M2B?f;X{vnIOh92_@IADp4W&vBBIw>hxrjj4 zj;C*k4_Wu~Y_a3eKBf3FOFCxENj@IJyxokM#S9OQE40sM2Y^772mvOH9};U%8b1ac zKX4xO`=s#`;b!c7PN-?_?rdN7$RoZhJF9AcU2nMWnTBfD zYuj_WtA8u<3atNQL79rZ=$gH~{ReK@Wo>cmdCB>R6K9V%b$Q6Ei)xLYWh>pw{Zq?Z zj&i=GORI8bUbv&vahb9nOM8*0{`5J807K zi`P6{wBh=JZH_&%cQhrN9<+4c-xe1$O<@4rKQ>}JDeX`YXe9ibEHHk0c03p0H5q)- zXrl=H_YfFdC&B+CnAW?n>IHQ#E`0xKeYDr4K7yWxs*lFO`@oxl77(E^sjcCN3Z9-@ z(?>+ctHIsK^GMCy*_yeHA zsEdtW5sPZB!IlFfru8pBCl1wR0~UjOrgLJWwaJ=DwT@VgB^@&o&V@A}$e8Z$C7&MlNkmz1^?T(S#N?%7q~ zZ6z5EsIK*cWhvVi*zS(WySLbW-P--_el@4>8J#wltSU{5={~%3~XxIs_UEHmBnY?Jsh^>Yp%Yt)0+D;6j;@| zHZN%?3Y(R~H7>2_%AefCg|`2+E-5$i=<>#6JKwAxmzjF=_Nk`g&HL7;P!>(M66k)) ZQ)NM4;4Rlvxhp{xOB+RiBJeyA_zS3>0-yi@ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack b/pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7a546a82ae766c99af5056604eaa720c321b4abb GIT binary patch literal 118654 zcmXuk(-I&GU_`;aYumPM+qP}nwr$(CZQHhOZ&H=cy!8CR8MdfVgU*ANv}oHoQ^sK{ z|Mxd()~;iVCY^>ZZrh+ull_YuH|X5pM1cRY_Yd&@`_Bo;Kn%9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO z&KaD^S)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yz zUEIw*+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIX zyvuvM&j)iSA5Mke9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~ z@ZbNP{}_;g7??pAl))IBAsCXO7@A=imf;wl5g3t?7@1KRmC+cTF&LAv7@Khzm+=^% z37C+Hn3zeJl*yQ!DVUO}n3`#rmg$(D8JLlon3-9amD!k`Ihd2Vn45W+m-(2V1z3=U zSeQjvl*L$_C0LTBSej*6mgQKU6k7 zBQY|gFe;-lI%6; zFe|e$J9986b1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=Guqvyu zI%}{dYq2)#urBMdJ{zzh8?iB)uqm6dIa{zLTd_6Uur1rMJv*=?JFzpnuq(TCi2XQcma43gyI7e_KM{zXAa4g4hJST7>Cvh^Ta4M&9I%jYuXK^;? za4zR@J{NEy7jZF{a4DB@IahEcS8+Aha4pwyJvVS8H*qt!a4WZQJ9ls=cX2oOa4+|9 zKM(LA5AiUM@FV|*rHLMCEjCSg)0 zV{)coN~U6JreRv9V|r#_MrLAWW?@!lV|M0XPUd26=3!puV}2H3K^9_R7GY5qV{w*X zNtR-1mSI_zV|i9!MOI>ER$*0EV|CVGP1a&<)?r=NV|_MYLpEY#HepjXV{^7(OSWQb zwqaYgV|#XBM|NUoc41d`V|VsoPxfMO_F-T4V}B0dKn~(y4&hJ^<8Y4PNRHxYj^S92 z<9JTsL{8#lPT^Ee<8;p8OwQtL&f#3n<9sgQLN4NBF5yxx<8rRxO0ME+uHjm)<9cr3 zMsDI}ZsAsL<96=gPVVAv?%`hU<9;6CK_22^9^p|Q<8hwgNuJ_qp5a-Z<9S}-MPA}% zUg1?<<8|KPP2S>d-r-%|<9$BhLq6hTKH*b7<8!{?OTOZ3zTsQG<9mMKM}FdGe&JVs z<9GhxPyXU>{^4K#V}QW=&wvcXzzo8m494IL!H^8a&Lhq%*?{9%*O1@!JN#+ z+|0wg%*XsJz=ABq!Ysm~EXLw2!ICV+(k#QWEXVS!z>2KI%B;ewtj6lB!J4ea+N{I6 ztjGFnz=mwZ#%#i-Y{uqn!Io^r)@;MJY{&NOz>e(1&g{aj?8ffw!Jh2J-t5D^?8p8b zz=0gZ!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW|*#!I_-J*_^|G!IfOa)m+21T*vj?z>VC*&D_GR+{W$P!JXX2-Q2^y+{gVqz=J%*!#u*H zJjUZZ!IM12(>%koJje6Az>B=Z%e=y?yvFOi!JE9r+q}cOyvO@|z=wRq$9%%4e8%T| z!Iyl+*L=gbe8>0vz>oaI&-}u#{KoJ6!Jqua-~7YB{Ko)6^q&D4h=Cb|K^cs}8G<1h zilG^XVHu9$8G#WQiIEwFQ5lWV8G|tyi?JDpaT$;CnScqIh>4kmNtukvnSv>qim91~ zX_=1cnSmLZiJ6&&S(%O5nS(i*i@BMHd6|#-S%3vuh=o~%MOlo+S%M{5ilteGWm%5p zS%DQh8VP1%gi*@7+Eimlm(ZP||P*?}F| ziJjSnUD=J@*@HdVi@n*0ec6xwIe-H>h=VzVLphAYIf5fOilaG(V>yoFIe`;7iIX{n zQ#p;(IfFAfi?cb0b2*Rmxqu6~h>N*|OSz28xq>UXimSPXYq^f=xq%zGiJQ5FTe*$f zxq~~oi@Ujpd%2JMd4LCbh=+NEM|q6Ld4eZ-il=#oXL*k2d4U&siI;hWS9y)sd4o53 zi??})cX^NZ`G61kh>!V%Px*|``GPO`im&;GZ~2bz`GFt#iJ$p}U-^yS`GY_Ci@*7Y zfBBCAg6cm5G7tkZ2!k>hgEIs}G898I48t-U!!rUSG7=**3ZpU_qca9$G8SVq4&yQ& z<1+yhG7%Fq36nAzlQRWVG8I!Z4bw6m(=!7zG7~d13$rpCvoi;CG8c0*5A!k~^Roa8 zvJeZi2#c~9i?akvvJ^|R49l_{%d-M2vJxw^3ahdjtFs1cvKDKz4(qZW>$3qHvJo4z z37fJRo3jO5vK3pi4coFE+p_~ZvJ*SA3%jx#yR!#-vKM=^5Bsto`*Q#Xau5e|2#0bQ zhjRo+aui2%499XD$8!QFauO$V3a4@!r*j5pau#QE4(DU62#@j@kMjgi@)S?= z4A1f$&+`H=@)9re3a|1Suk!|P@)mFN4)5|F@ACm4@(~~N37_&ApYsJ@@)ck64d3z| z-}3`M@)JMv3%~Lkzw-xw@)v*e5C8HX0|e8524o-xW)KEtFa~D`hGZy)W*CNLIEH5g zMr0&LW)wzcG)89(#$+tUW*o+4JjQ1NCS)QeW)dc4GA3sVrerFnW*VktI;Lj^W@IL2 zW)@~;HfCoI=43ABW*+8cKIUfu7Gxn7W)T);F&1YDmSicGW*L@cIhJPyR%9hsW))Ut zHCAU0)?_W#W*ydLJ=SLfHe@3<{6&lIiBYQUgRZS<`rJ$HD2cp-sCOb<{jSUJ>KU7 zKI9`l<`X{UGd|}FzT_*u<{Q4{JHF=!e&i>9<`;hDH-6_2{^T$I<{$p$KL!Y{{|v}L z49p-5%3uu65Ddvs49zeM%Ww?O2#m-`jLayE%4m$v7>vnSjLkTV%Xo~>1Wd?8Ow1%q z%4AH=6imrfOwBY*%XCc749v((%*-sz%52Qe9L&jF%*{N^%Y4kw0xZZvEX*P-%3>_e z5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj#*C%X+NO25iViY|JKX%4TfN7Hr8@ zY|S=o%XVzf4(!NI?949g%5Ln=9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht z%W)jf37p7DoXjbl%4wX=8Jx*koXt6$%Xys71zgBQT+Ah0%4J;6613bt>Jj^3J%40mv6FkXNJk2va%X2)>3%tln zyv!@S%4@vN8@$O|yv;kj%X_@f2Ykp!e9R|&%4dAe7ktTAe9bp}%XfUw5B$ha{LC-> z%5VJ6ANojI73 zxtN=In3wsOp9NTug;tLmw1_1c$L?9oi})sw|JX(c$fEhpAYzukNB8R_>|B1 zoG@KzxbPf_?Q0}Af)~?AOkTlgD@zAF*rjo zBttPY!!RtvF+3wMA|o*}qcAF?F*;)~CSx%+<1jAcF+LM8Armn%lQ1chF*#E(B~vjq z(=aX5F+DRdBQr5GvoI^OF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlV zu{##2Cu|6BHAsewVo3JUHu{m3?C0nsI+psO$u{}Gm zBRjD(yRa*}u{(RPCws9s`>-$ju|EfJAO~?Uhj1u|aX3eCBu8;H$8apiaXcq*A}4V& zr*JB#aXM#kCTDRr=Ws6PaXuGtAs2BmmvAYUaXD9TC0B7Z*KjS@aXmM1BR6p~w{R=B zaXWW#CwFl-_i!)waX%06AP?~{kMJmu@iV$^He++PU`w`QYqnuqwqtvCU`KXhXLey%c4K$;U{Cg9Z}wqd_G5nz;6M)IU=HC> z4&!i+;7E?*XpZ4nj^lVv;6zU1WKQ8!PUCdW;7rcqY|i0a&f|P8;6g6qVlLrQF5_~p z;7YFIYOdj0uH$-c;6`rZW^UnDZsT_D;7;!1Ztme;?&E$Q;6WbZVIJX89^-MI;7Ok1 zX`bO(p5u95;6+~IWnSS`UgLG%;7#7*ZQkKs-s62f;6py*V?N{)#nep0v`okJ%)pGy z#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~ zs;tK9tihVB#oDaHx~#|gY`}(W#KvsGrfkOMY{8an#nx=Ywrt1t?7)uf#Ln!(uI$F{ z?7^Pw#op}0zU;^T9KeAb#K9cGp&Z8H9Kn$s#nBwYu^h+ooWO~k#L1k(shq~?oWYr# z#o3(0xtz!OT)>4~#Kl~~rCi44T)~xG#noKHwOq&b+`x_8#Le8ot=z`#+`*mP#ogS) zz1+wBJivoI#KSzoqddmrJi(JZ#nU{)vpmQ1yugdR#LK+GtGveRyuq8i#oN5YyS&Hy ze87i%#K(NXr+miee8HD|#n*hpw|vL<{J@X=#LxV~ul&aE{K236#ozqHzx>AlVf3E? z8Hj-ygh3gM!5M-f8H%A9hG7|w;TeGu8Hte@g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy* zgh`o<$(e#FnTn~IhH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45 z#aV(SS&F4uhGkifOmg zhGRL7<2iv7If;`wg;P0=(>a4PIg7J7hjTfP^SOWvxrmFogiE=M%ejIpxr(c~hHJTw z>$!m&xrv*(g=Xrq_ zd5M>Kg;#lv*Lj0Cd5gDshj)38_xXSi`G}ACgira5&-sEc`HHXkhHv?f@A-ir`H7$T zgrGYX?J z8ly7?V=@+FGY;c29^*3s6EYDKGYOM28Iv;wQ!*7(GY!)+9n&)dGcpr1GYhja8?!S9 zb21lmGY|7JAM>*S3$hRkvj~f_7>lz6OR^M8vkc3!9Luu;E3y(RvkI%S8mqGgYqAz= zvkvRB9_zCK8?q4_vk9BB8Jn{OTe1~fvklv_9ow@5JF*iyvkSYj8@sayd$JdMvk&{S zANz9v2XYVxa|nlW7>9ENM{*QLa}39F9LIA4Cvp-ea|)+&8mDsxXL1&2a}MWn9_Mob z7jh97a|xGn8JBYfS8^3sa}C#W9oKUMH*ym8n5#PZ}Jvz^A7Lw9`Ex3AMz0&^9i5w z8K3h7U-A`S^9|qf9pCc>9|MHbe+FbA24)ZjWiSS3 z2!>=RhGrOsWjKas1V&^eMrIU7Wi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb3Z`T# zre+$ZWjdy324-X?W@Z* z9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO&KaD^S)9!|oXdHf&jnn_MO@4!T*_r! z&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yzUEIw*+{=C3&jUQjLp;nQJj!D{&J#Sz zQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIXyvuvM&j)iSA5Mk ze9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~5JCSLkbxMOK^T<57@Q#(lA#!yVHlR- z7@iRrk&zggQ5coc7@aW~ld%|^aTu5J7@rB4kcpU>Ntl$$n4Bq?lBt-QX_%Jjn4TG! zk(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@SezwTlBHOhWmuNwSe_MFk(F4P zRalkPSe-RkleJizby%16Sf35pkd4@wP1uyp*qklclC9X9ZP=FW*q$BOk)7C?UD%b~ z*quGtlfBrReb|@%*q;M9kb^jwLpYSfIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGr;% zle0LRb2yjtIG+o+kc+sOOSqKFxST7vlB>9yYq*x{xSkuhk(;=gTey|mxScz=le@T^ zd$^bTxSt1jkcW7fM|hOSc$_DAlBal@XLy$9c%Bz{k(YRxS9q1zc%3(Rlec)AcX*fg zc%KjWkdOG7PxzG2_?$2JlCSuhZ}^t)_?{p5k)QaPU-*^Z_?dG|R9o%dtEwup%q5GOMsEtFbz3uqJD#;r?upt|LMGrO=WyRkcauqS)5H~X+J`>{U)0 z*Ks{Ja3eQyGq-Rnw{bgna3^@Fs8ZHt+B*@9{n#@F5@ZF`w`$pYb_g@FidIHQ(?p-|;;^@FPF* zGr#aFzwtYN@F#!qH~;W2|1m%${bxW1VqgYgPzGaghG0mBVrYh8ScYSGMqornVq`{P zR7PWT#$ZgwVr<4?T*hO3CSXD)Vqzv?QYK?^reI2@Vrr&gTBc)qW?)8UVrFJxR%T;% z=3q|dVs7SPUgl$d7GOaZVqq3xQ5IuymS9PiVriCPS(amYR$xU|Vr5ogRaRql)?iK6 zVr|x8UDjiLHef?GVq-R8Q#NCBwqQ%PVr#ZxTef3+c3?+#VrOdpRbJzD-r!B%;%(mHUEbq;KHx(>;$uGH zQ$FK!zTiu~;%mO)TfXCae&9!b;%9#0SAOGn{@_pk;&1-pU;bl&$okKK48*_;!k`Ss z;0(c#48_n4!>|m;@QlESjKs){!l;bK=#0UbjK$cD!?=vc_)NfrOvJ=Y!lX>ba4+1Y{k}W!?tY4 z_Uyop?8MIO!mjMb?(D&y?8V;f!@lgt{v5!89K^vK!l4|-;T*w{9L3Qb!?7I4@tnYk zoW#kT!l|6b>72otoWfJjBC1!lOLK<2=EWJjK&I!?Qfc^Sr=|yu{1A!mGT- z>%766yv5tR!@Io4`+UHMe8k6m!l!)3=X}AJe8ty%!?%3L_x!+*{KU`v!ms?s@BG1^ z{Ken=!@vB;08#Xx0U3ya8H7O@jKLX#AsLFH8HQmQj^P=B5gCb*8HG_9jnNr{F&T@o z8HaHhkMWs+37LqAnS@E1jLDgTDVd6?nTBbZj_H|!8JUThnT1)IjoF!lIhl*OnTL6q zkNH`E1zCuNS%gJdjKx`kC0UB4S%zgjng@UGdYX1IfrvOkMp^J3%Q7kxr9r( zjLW%#E4hlRxrS@Gj_bLB8@Y*_xrJM~joZ0{JGqOyxrckXkNbIm2YHBxd4xxKjK_I` zCwYped4^|sj^}xS7kP=7d4*Sbjn{dDH+hS(_ANh%&`GsHkjo1rpG9KeI0TVJ26Eg{uG8vOI1yeE=Q!@?IG9A-1 z12ZxcGcyabG8?lq2XitPb2AU~G9UA^01L7Z3$qA|vKWiA1WU3MOS25ivK-5^0xPl- zE3*o#vKp(i25YhwYqJjPvL5TR0UNRr8?yXLAncavtY%0T*%+7jp@hav7I%1y^zvS91;5avj%m12=LLH**WOavQgE2X}H8 zcXJQ-av%5e01xsI5Az6*@)(cv1W)o5PxB1V@*L0e0x$9sFY^ko@*1!625<5fZ}SfC z@*eN=0Uz=aAM**H@)@7=1z++NU-J#$@*Usv13&T;Kl2N}@*BVN2Y>PxfAbIj@*e|4 z(|-nJAO>a-24ye?X9$L5D28SjhGjU0X9PxMBt~WwMrAZcXAH(JXAb6MF6L$)=4C$SX8{&uAr@v4 z7G*IOX9<>MDVAm#mSs7XX9ZSdC01q?R%JC-XARb5E!Jio)@41`X9G55BQ|CeHf1w5 zXA8DuE4F4Ewq-lEX9sp49jL!s2$V5!cBuvU=OwJTc$y7|uG)&8MOwSC=$V|-4EX>Mm z%+4Il$z06MJj}~{%+CTW$U-d4A}q>cEY1=v$xM$W7eLE!@g&+|C``$z9ydJ>1KE+|L6% z$U{8LBRtAuJkAq5$x}SdGd#<4JkJZf$Vb5JG{$#yw3-G$VYt4 zCw$6he9jkq$ya>MH+;)?e9sU3$WQ#tFZ{}H{LUZz$zS}8n2?E>m`RwF$(Woe zn3AcOnrWDp>6o4wn30*7nOT^X*_fRR?oIFqwDn{zmq^EjUixR8sum`k{n%eb5?xRR^5nrpb0>$sj9xRIN< znOnG(+qj)OxRblMn|rvI`?#M6c#wy9m`8Y&$9SA4c#@}hnrC>H=XjnMc#)TQnOAs~ z*La;bc$2qyn|FAZ_jsQV_>hnIm{0hW&-k1#_>!;qns4})@A#e{_>rIZnP2#o-}s$B z_>;f*n}7J1{}>>q{xcv0F))KLD1$LLLog&mF*L(4EWbQGcY4FF*CC;E3+{>b1)}!F*oxt zFY_@!3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A z8?Yf8u`!#lDVwo5Td*Ztu{GPUE!(j@JFp`=u`|1{E4#5fd$1>au{Zm$FZ;1S2XG(< zaWIE)D2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyMNeXK*HGaW?00F6VJR7jPjLaWR*0 zDVK3MS8yd)aW&U)E!S~9H*h02aWl7YE4OhwcW@_naX0sHFZXdj5AYxl@i33@D39?t zPw*s9@ifoyEYI;gFYqES@iMRQDzEW6Z}28>@iy=9F7NR^AMha`@iCw9DWCBEd6Id24Y|aVNeERaE4$=hGJ-j zVOWM^ct&7EMq*?}VN^zAbjDyz#$s&7VO+*zd?sK*CSqbHVNxbza;9KPrebQQVOpkR zdS+loW@2V$VOC~icIIGC=3;K84j-r{ZE z;a%S2eLmnrKH_6O;Zr{2bH3n9zT#`X;ak4rdw$?Ye&T0-;a7g+cmCi{{^D=`;a~n^ zfY|!afDFXI48ouc#^4OWkPOAp48yPt$MB56h>XO@jKZjl#^{W}n2g2PjKjE$$M{UZ zgiOT5Ov0p0#^g-FluX6cOvAKH$Mnp=jLgK$%)+e9#_Y_&oXo}C%)`9Q$NVh7f-JNj_kzF?82_>#_sIFp6tcm?8Cn7$Nn6^fgHra9KxX-#^D^n zksQU*9K*33$MKxNiJZjAoWiM`#_62FnViMhoWr@C$N5~qgJnVE%I znT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpdffZSam05*VS&h|M zgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cemfgRb2o!Nz5*^S-VgFV@c zz1fF-*^m7>fCD**gE@plIgGOTWA5Cby^ zgEAO{GXz626hku%!!jJhGXf(r5+gGTqcR$!GX`Ta7GpCG<1!xOGXWDa5fd{BlQJ2T zGX+yJ6;m?}(=r{?GXpa+6EialvoagAGY4}r7jrWY^D-avvj7XS5DT*ii?SGtvjj`B z6ic%V%d#BHvjQu!5-YO`tFjuavj%Ij7HhK(>#`o}vjH2j5gW4!o3a_3vjtnS65D)VRkMbCg^8`=w6i@RE&+;74^8zpO5-;-# zuksqN^9FD77H{(o@A4k+^8p|75g+pjpYj=>^95h>6<_lW-|`*b^8-Kf6F>6{zw#Tu z^9O(O7k~2)|MDLL#M6HUWFQ7+5C&y124@I{WGIGa7=~pyhGzsuWF$sr6h>t?WG&Wa9oA(% z)@K7YWFt0a6E?yQj^_kUZs!i}!9`5Bn?&kp> z_!_=}Z{a)m9)5ry;V1YRet}=%H~1a?fIs0c_#6I#f8jq!B}e}u16jyH9tu!|5|p6= zRj5H78jwH}TF{0LbfE`*7{CxFfC*tDm>4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&X zm>Fh)Sz$Jq9p->JVJ?^(=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6 zfE8gSSQ%D3>*u`!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz z#c&CXhfCoyxE!v4E8!}*8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@ z9)JhoA$S-bfk)vncpRR9C*di08lHh?;W>C7UVsFp#J}(Ap=>+K^_WFgc6jY0#&F%9U7286I#%Q4s@XheHg$HCV&ZHBA6H^ zfk|O9m>i~nDPby@8m571VLF%|W`G%CCYTv!fmvZTm>uSTIbklC8|Hy|VLq527Jvm| zAy^m|fkk04SR9ssC1EL88kT`&VL4bHR)7^@C0H3&fmLBOSRK}YHDN7S8`gn!VLezM zHh>LbBiI-=flXmE*c?W{7O*9Zgi){+Yz^DMwy+&+4?Dn)FdD|dPOvlV0%Ktu>GG>2L;|31`9Ca1NXc=fU}K0bB?d!NqV1jE76%GPoSBfGgoDxEij3YvDS$9&Uge z;U>5lZh>3jHn<(`fIHzXxEt<)d*ME~A0B`Q;URb!9)U;UF?bxFfG6Q8cp9F8XW=<` z9$tVK;U#z(UV&HPHFzD~fH&bScpKh4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&Xm>Fh) zSz$Jq9p->JVJ?^(=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6fE8gS zSQ%D3>*u`!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz#c&CX zhfCoyxE!v4E8!}*8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@9)Jho zA$S-bfk)vncpRR9C*di08lHh?;W>C7UVsFLjNHHS;#>i3Q&X+l%WDus6ibXkU$e!(1s3lp$B~!zz`;Y31K3b7$$*9VKSH; zrhqA7DwrCkfoWknm>y<;8DS=v8D@c5VK$f@=72e2E|?qUfq7v*m>(8^1z{mr7#4v= zVKG=7mVhN;DOehofn{MiSRPh@6=5Y<8CHQ+VKrDC)_^r(Em#}YfpuX$SRXck4PhhL z7&d`TVKdkqM!*)ZC5(houoY|#+rYN49c&Lfz>Y8)#=uUnGwcFmVI1rVyTR_T2kZ%Z z!QQYB>9KoeTf zh7NS02Ynd85GH^LVIr6qCV@#|GMF5ufGJ@rm>Q;mX<<5;9%g_UVJ4UvW`S8@Hkcje zfH`3cGSd0{@79~OWGVIf!;7J)@!F<2ayfF)rmSQ?grWnnp39#()AVI^1@R)JMv zHCP?ifHh$)SR2-XbzwbNA2xsuVI$ZWHi1oHGuRwPz!tD2jD%6J6>JULz_zd*Y!5rY zjxZX=z)r9;>;hw99PA3a!S1jJ>Db=3+KW4Z~&5foI`4cphGW7vUv%8D4=`;Wc<2-hemZEqEK=fp_6OcppB158)&D z7(Rhd;WPLgzJM>`EBG3|fp6hE_#S?MAK@qX8GeCZ;Wzjl{(wK>FZdh&fq&sYNTosl zAp=>+K^_WFgc6jY0#&F%9U7286I#%Q4s@XheHg$HCV&ZHBA6H^fk|O9m>i~nDPby@ z8m571VLF%|W`G%CCYTv!fmvZTm>uSTIbklC8|Hy|VLq527Jvm|Ay^m|fkk04SR9ss zC1EL88kT`&VL4bHR)7^@C0H3&fmLBOSRK}YHDN7S8`gn!VLezMHh>LbBiI-=flXmE z*c?W{7O*9Zgi){+Yz^DMwy+&+4?Dn)FdD|dPOvlV0%Ktu>GG>2L;|31`9C za1NXc=fU}K0bB?d!NqV1jE76%GPoSBfGgoDxEij3YvDS$9&Uge;U>5lZh>3jHn<(` zfIHzXxEt<)d*ME~A0B`Q;URb!9)U;UF?bxFfG6Q8cp9F8XW=<`9$tVK;U#z(UV&HP zHFzD~fH&bScpKh4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&Xm>Fh)Sz$Jq9p->JVJ?^( z=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6fE8gSSQ%D3>*u` z!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz#c&CXhfCoyxE!v4E8!}* z8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@9)JhoA$S-bfk)vncpRR9 zC*di08lHh?;W>C7UVsFL;oQIS;#>i z3Q&X+l%WDus6ibXkihuJxSpM&;@d{Y<;@-6zC%Q8?8*xN_g^78GOEY&mN6YWb%~0N z-M{$1S^rJ?Z){0)M8~N8%VN4jMs6Aec%A_pHrX0$pJj$m6Dx?xn38_R>Vk!xhluAY= zr&3TUsZ>;IDh-wPzimoSWuP)rnW)TE7Ah;1jml2tpmI{VsN7T@Dle6f%1;%b3Q~or z!c-BeC{>IqPL-faQl+TUR2ix)RgNl8RiG+Tm8i;86{;##jjB%7plVXJsM=H=sxDQJ zs!uhb8d8m@##9rkDbPDM~HsFqYD6-Bk8T2pPPwp2T+J=KBgNJUdIR41x4)rE?s z;;61vH>x|;gX&53qIy$(sJ>J`sy{V=8b}SI22(?*q0}&HI5mPANsXdLQ)8&H)HrH9 zHG!H)O`;}KQ>dxbG-^6EgPKXrqGnTbsJYZUYCg4qT1YLT7E?>8cxoxNj9N~upjJ|= zsMXXOYAv;nT2F1DHd33Y&D0iZE47WMV7RI!|4oE>f4M%hVO>Ds_#zPTinxQn#qv)E(+Bb&tAFJ)j;^ zkEqAg6Y44TjCxMJpk7k1sMpjR>Miw-dQW|zK2o2k&(s&{EA@@~PW_;MQopF*)F0|E z_3yvqr)h>}X^!S;ffi|rmT84nX^qxtgC?{|TeM9(v`c%mPX}~JC!iD3iRi?15;`fJ zj80Ccpi|PR=+tx?IxU@!PETi`Gt!yp%ybqyE1iwbPUoO=(z)o|bRIe{osZ5>7oZE$ zh3LX`5xOW{j4n=>pi9!F=+blBmWE7Fzd%5)XFDqW4PPS>Do(zWQ?bRD`b zU5~C$H=rBRjp)X76S^tgjBZXx&@JeebR-=`x1w9qZRoaiJGwpHf$m60(=l`>x-;E{ zj-})1u5>rLJKclsN%x|A(|zc^bU(U3J%Aoa526RtL+GLOFnTyWf*whaqDRwX=&|%T zdOSUWo=8ukC(~2tsq{2@Iz5A)NzbBZ({t##^gMb#y?|awFQOOIOXzrdDZPwdPOqR> z(yQpz^cs3Cy^dZ_Z=g5Qo9NB-7J4hajowc0pm)-{=-u=ldM~|?-cKK(57LL|!}JmQ zD1D4RPM@Gp(x>Rt^cngreU3g)U!X72m*~s%75XZDjlNFbpl{N*=-c!i`YwHszE3}( zAJUKL$Mh5WDgBIoPQRdE(y!>(^c(su{f>T5f1p3opXkr@7y2vxjs8ympnuZ8=->1o z`Y-+OzspZE48t-U!!rUSG7=**3ZpU_qca9W7?ZIWn{gPI@fe>8n2<@pBxDjXiJ2r! zQYIOboJql?WKuDynKVpVCLNQW$-rb}GBKH%EKF7=8GcyoQYssFfEx#CW>jrv}W2cZJBmVd!_@^k%?wvm`+S* zrVA6x#4%l&ZcKNk2h)@3#q?(SFnyVROn+toGmsg?3}%KfLz!XBaApKEk{QK}X2vjM znQ_c`W&$&jnZ!(HrZ7{PY0PwH1~ZeH#mr{rFmsuC%zS16vyfTDEM}H4@yt?Y8MB;O z!K`FfF{_z1%vxq0v!2<&Y-Bbuo0%=lR%RQso!Po>WNtCHnLEr~<{opO zdB8km9x;!ZC(Kjk8S|WZ!MtQ%F|V07%v_m1FJDHurPGzUD)7cs9Om-GKo1MeXW#_T;*#+!Eb`iUnUBbq*OW9@Y za&`r~l3m5FX4kN5*>&uCb_2VS-NbHYx3F8;ZR~b-2fLHq#qMVJuzT5k?0)tDdyqZE z9%hfQN7-ZSarOjzl0C(qX3wx^*>mi9_5yp6y~JK-udr9yYwUIQ278me#olJ`uy@&e z?0xnD`;dLaK4zb=PuXYebM^)Ml6}R#X5X-H*>~)F_5=Ho{ltD|zp!7~Z|ryW2m6!# z#r|ghuz%Tq|An9C7>?yQj^_kUt53VQIi|ftx;rep@xc=M#ZXh>^8_W&ihH}HW;oJyrBsYp1 z&5hy4a^tx1+yrhSH;J3fP2r|;)41u}3~nYji<`~O;pTGlxcS@yZXvgbTg)xt;<=^V zGHyAyf?LV0;#PBOxV79mZauew+sJL=Hgj9Jt=u+lJGX<|$?f8Hb9=bG+&*qUcYr&{ z9pVmiN4TThG442bf;-8b;!bmCxU<|j?mTyayU1PQE^}A7tK2p2I(LJ+$=%{^b9cDA z+&%6-_kerIJ>nj7Pq?StGwwO}f_urm;$CxaxVPLp?mhQ``^bIbK678VuiQ88JNJY8 z$^GJfbAPzM+&_-uX`bO(p5u95;6+~IWnSS`UgLG%;0bT?7H{(o@A4k+^8p|73HXG3 zB0e#ngip#R_DqJ~f|)Ps^v{)AJeljC>|OGoOXe%4g%V^EvpOd@epWpNG%O z=i~G91^9w|A-*tQgfGe$z1nzBFHkFUyzX%kvfZihL!$GGB$S%2(s7^ELRI zd@a5H+4fuw9Bfc@;gm20>_6q-;eLl58wy#gZRPx5Pm2>j33UA;79VK z_|g0rek?zZAJ0$VC-Rf{$@~<4DnE^%&d=ay^0WBa{2YERKaZc!FW?vQi}=O-5Fn@$U${*v8^C$R|{3-r4e}+HHpX1N-7x;_(CH^vhg}=&Q^r%0J_u^Dp?9{44%7|Av3dzvJKYANY^_C;l`4h5yQbS_+Xul+a3OEwmBZ3hjjULI-N6S@lBgziEQp{LMG=q>aS`U?Go{=xuZpfE@nEDRBb3d4lq!U$oc zFiIFLj1k5P>=JehdxX8hK4HIb zKsYEI5)KPTgrmYS;ka-@I4PVGP77y*v%)#yyl_FdC|nXQ3s;1z!ZqQ#a6`B$+!AgJ zcZ9pbJ>kCaKzJxT5*`argr~wY;kocacqzORUJGx8x57K&z3@T!D0~t=3txnVkR-Om_^JgW)riEImDb|E-|;5 zN6ahc6Z4A&#DZcWv9MS~EGiZgi;E@1l42>bv{*(gE0z<>ixtF*VkNP%SVgQVRuij> zHN=`?EwQ#(N31K>6YGl&#D-!cv9Z`hY$`Srn~M=*3$djbDMpE{#MWXPv8~unY%g{Y zJBra_jMz!+EOrrN#W=C6*iGy%_7HoDy~N&PAF;34PwX!a5C@8b#KGbaai};<94?L! zM~b7w(c&0!tT;{_FHR6Aij&02;uLYJI8B@`&Jbscv&7lr9C5BVPn<6<5EqJz#Kqzg zFAMqDed6W5Cy#Es%6akIEZ+$wGpw~IT(o#HNWx41{#EAA8b ziwDGm;vw;{ctkuZ9utp?C&ZKDDe<&;Mm#H?6VHnm#Ear3@v?YDyeeK3uZuUto8m3; zws=RpE8Y|Dix0$y;v?~~_(XgvJ`usk~G{swh>GDoa(Qs!}zn zx>Q4|DbCy~orZh{MEzObUO7o=o(gJCrv`AVk zEs^4-rP4BKxwJxBDXo%LOKYUH(mH9qv_aY^ZIU)iTcoYhHfg)GL)t0rl6Fgbq`lHU zX}@$pIw&2I4ogR*qtY?yxO74~DV>r|OJ}6B(mCn8bV0f(U6L+KSEQ@bHR-x^L%J#5 zl5R_Pq`T5R>Av(pdMG`T9!pQ8r_wX&x%5JMDZP?jOK+sN(mUzB^g;S4eUd&)U!&p$~hH@jhvD`#%DmRmx%Mo%5xuqN_N6D?^)^Z!Ut=vv- zFL#hT%F%L++)3^%cadY|IJv9bP3|uDkbBC#Z^eostMGDY9ckUnnX>iCR3BEDb$o|DmAs5Mop`xQ`4&%)QoB-HM5#U&8lWov#UAO zoN6vLx0*-ItL9Vls|D18Y9Y0-T0||X7E_C>CDf8?DYdj(MlGwBQ_HIr)QV~)wX#}8 zt*Ta2tE)BCnrbbzwpvH6tJYKNs}0nKY9qC=+C*)tHdC9c5o!yyr5dS5sjbx3Y8$n! z+D>h+c2GO2(Q1s^N$sq5QDfCOwX51q?XLDvd#b(E-fADUui8)TuMSWLs)N+Q>JW9P zI!qm|j!;Lcqtwyr7Jl|xU8*iqm#Zt(mFg;WwYo-KtFBYms~gmf>Lzuwx<%cpZd13bJJg-(E_JuM zN8PLLQ}?R})Pw3F^{{$GJ*pm4kELvBEdPTjeUQ@5D zH`JTzE%ml~N4=}wQ}3$})Q9RL^|AUyeX2fFpQ|s_m+C9^wfaVVtG-j;s~^;l>L>NH z`bGV!epA1zKh&S(69jn@QC)Fe&T6iwAMP1g*KXr^Xqw&rNA z=4rkbXrY!sOQuS~;z}Rza(%RnjVJ zRkW&FHLbc z+G_2z_F4z6qZX~jXq~jqS{E%=i_^Ml-L&pn53Q%xOY5!m(fVrrwEo%vZJ;(t8>|h{ zhHAsK;o1moq&7+$t&P#fYU8x=+5~N)Hc6YTP0^-m)3oW@3~i=1OPj6D(dKINwE5Zs zZK1YETdXb7;1`= zK5JjJui7{5yY@r-sr}M^Yk#!A+CPobX`Rtoozr<;&_!L+WnIx#UDI{l(1~v9mTv2g z?&_ZI>wzBX3G{?|B0aI5L{F+G)068d^ptukJ++=jPphZX)9V@ZjCv+Lvz|rIs%O)) z>pAqCdM-V;o=4BC=hO4+1@wY?A-%9(L@%lr(~IjR^pbihy|i9NFRPc+%j*^Nih3oz zvR*~6s#nvi>oxS6dM&-SUPrI1*VF6k4fKY3BfYWSL~p7$)0^uNdJDa!9;rv^t@PG< z8@;XGPH(Sw&^zkUdW_yl@2q#xWA!+_tKLoTuJ_P;>b>;ddLO;7-cRqZ56}ndgY?1r z5PhgVOdqa~&`0W{^wIhleXKrCAFof)C+d^*$@&z1sya+CO`W$_(K2M*o zFVGk2i}c0%5nrq?`YL_3zD8fGuhZA-8}yC(CVjKMMc=A#)3@t8^qu-H zeYd_x->dJ__v;7rgZd%;uzo~8svpyj>nHS+`YHXienvm5pVQCl7xatzCH=B~MZco4?|`YZjl{ziYRzti9AAM}s< zC;hYjMgOXQ)4%IK^q=}K{kQ%{|EvGgDT6i`gEcsVHv~g8Btte7Lp3x*Hw=RqrePVj z;TW#r8NLx1p^?ByXe2Tc8%d0$MlvJ0k-|u6q%u+)X^gZ+IwQT2!N_Q2GBO)kjI2gB zBfF8q$Z6yO+Ml++i5n;42S{ji?l+ns) zZL~4k8tshsMhBy#5pBd6os7;#7bDh)GrAhxjP6Dcqo>i!=xy{d`WpR={>A`fpfSi8 zYz#4m8pDj?#t37iG0GTij4{R<RjOoSRvT-KwZ=MQy|KaAXlybz8(WO6#x`TSvBTJD z>@s#6dyKutK4ZUez&L0eG7cL@jHAXe)*al^Q2+%j$(cZ|EnJ>$Odz<6jpG9DXGjHkvk1SBCzL=uxEBq>Qol9Lo9B}qk6lQbkPNk`I?3?w7TL^6{sBrC~AvXdMn zC&@)}lRP9Z$w%^&0;C`*L<*B4q$nvyijxwgBq>EolQN_%DM!ka3Zx>bL@JXiq$;UK zs*@U|CaFbglRBg>sYmLQ2BaZrL>iMOq$z1env)39g0v)&B#N{mtw|fwmb4@7Ne9xA zM3WfOiF77iNGypXT}e06o%A3*NiWiy^dWsoKhmEJAOp!DGMEe@L&-2QoQxnN$tW_K zj3HymI5M70AQQc|;zQ zC*&!4MxK)wev?1sFZo9(lQtQX zH93a^n6BxWz8RRInZQhFCNdM7Nz9~XGBdfE!c1wV zGEn|aKo47BUN)Ma-gR zF|)W?!YpZ)GE19f%(7-Vv%FcstY}s;E1Ol!s%AB_x>>`lY1T4pn{~{(W<9gM*}!aQ zHZmKVP0XfdGqbrFVYV<^nvrId*~)BfwlUk9?acOO2eYFYZN`|L%+6*PGuDhVyPDn1 z?q(0Or`gNwZT2zyn*GfF<^Xe`ImjGr4l#$C!_4942y>)4${cNuF~^$Y%<<*~bD}xP zoNP`pr<&8u>E;Y`ra8-;ZO$?0n)A&0<^pq}xyW2>E-~ZHrRFkoxw*nzX|6I?n`_Lq z<~nn|xxw6MZZbEUTg_>P0f>t4`uvNq= zY8A7JTP3WLRw=8rRmLi7m9xrQ6|9O@C9ASk#j0vmv#MJ)teRFWtF~3gs%zD=>RS!0 zhE^l1vDL(CYBjT(TMa#TeCO23mux!PXFKs5Q(QZjG=;TBEGd));H7HO?AuO|T|f zldQ?s6l|8_jn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnEA?vVp#5!snvyNLQ ztdrI$>$G*oI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E-Lvjn53GmQBkQsC z#CmEyvz}Wote4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIc5TR*Iy)-UU~^~d^a{j(^W zwi%nXIh(fyTeKxxwiR2oHCwk0o7kpp*|zQ2uI<^r9oV6rz)ol3mw01f>y`90%XlJrB+ga?ab~Zb^ox{#)=dyF#dF;G)K0Ci%z%FPPvJ2Zq z?4outySQD#E@_vtOWS4avUWMUyj{VrXjif;+g0qUb~U@YUBj+v*RpHdb?mx!J-fc$ zz;0+avK!k??51`zySW`~b%5H79vD@11?DlpCyQ3X#$Jm|h&UP0&){e8g z+THB#b`QI!-OKK6_p$rh{p|ks0DGW4$R2DDv4`5j?BVtZd!#+e9&L}Y$J*oU@%99J zqCLr;Y)`SL+SBam_6&QbJgGZ`*h5yY@Z%zWu;{Xg{(a z+fVGL_A~pr{lb1}zp`K3Z|t}BJNv!;!TxA}vOn8j?63AW`@8+a{%QZRf7^fTzxF?y za%hKfSch|XM{q<(a%4wwR7Z1k$8d;aI+kNQj^jF><2!*9ItiSFP9i6bJ9B*oQzH;C$p2q$?9ZtvO77PoK7w$x0A=o>*RCtI|ZDAP9dkT zQ^YCi6myC@C7hB@DW|kk#wqKRbILmvoQh5*r?OMUsp?d7syj8DnocdJwo}Kc>(q1V zI}MzMP9vwW)5K}&G;^9e5l#!Ir4#8yIjx-5P8+AK)6Qw{bZ|O4(N2uh$?5ELable~ zr>oP=>F)G!dOE$F-cBE?<{Z@I*Xje&JriyS?VlvmOCq)mCh<> zwX?=q>#TFuI~$yh&L(HGv&Gr!Y;(3dJDi=)E@!v1$Jy)bbM`w2oP*9G=dg3cIqDp9 zjyorulg=sUv~$Ke>zs4WI~Sab&L!uvbH%ypTyw5FH=LW!E$6m#$GPj=bM8A2oQKXM z=dts|dFniKo;xp`m(DBawe!Y#>%4Q`J0F~n&L`)y^Tqk$$!gxS^ZCP3R_a6T3;=q;4`d zxtqdG>85g1yJ_6CZaO!;o59WKW^yyTS=_8{HaEMQ!_DdDa&x6UU!yJg(6ZaKHSTfwd9R&p!5Rotp>HMhE3!>#Goa%;PF+`4W( zx4zrJZRj>~8@o;1rfxI0xf|iOa9g^OZj{@~ZSA&k+q&)C_HGBaqZ{qUxSib2ZWlM! zjdQ!Y-Q4bO54We=%kAyaE za#y=++_mmHcfGs8-RN#|H@jQht?o8=ySu~P>F#oOyL;Td?mlE3d0yLa5X?mhRu`@ntZ zK5`$sPu!>OGxxds!hPw!a$mb|+_&yK_r3eU{pfyjKf7PtukJVZyZgia>HczmyMNrj z?mw6EXpiw&kMnp>@I+7YWKZ!_PxExo@Q7!6mS=m8=X##!dw~~v3A}_}A}_I*#7pWW z^OAchyp&!lFSVD(OY5cc(t8=aj9w-$vzNuo>Sgn?dpW$EUM?@Um&eQN<@54;1-yb@ zA+NAk#4G9*^NM>Vypmoiue4XjE9;f>%6k>Oie4qJvRB2c>Q(cqdo{e8UM;V-SI4XC z)${6m4ZMb4Bd@X7#B1s`^O}1RUJI|K7wJWLt-RJ=8?UX`&TH>=@H%?YUX0ht>+E&$ zV!b%8tJlrz?)C6`dcC~fULUWo*U#(k4e$nfgS^4s5O1hA%p2~F@J4#0ywTnmZ>%@Y z8}Ci+$=(!isyEG>?#=LKdb7OQ-W+ePH_w~zE$|k4i@e3&5-;9c>Miq@dn>$^ z-YRdkx5iuRt@GA<8@!F)CU3L1#oOv_^R|0Cyq(@IZ@0I{+w1M~_In4sgWe(Uuy@2e z>K*frdnde;-YM_2cg8#Go%7Cn7rcw!CGWC##k=ZV^R9b0yqn%F@3wcxyX)Qa?t2fs zhu$OavG>G#>OJ$GdoR3~-Yf65_r`ncz4P9CAH0v=C-1ZO#rx`g^S*mOyr14L@3;5I z`|JJlD4+HjpY=JP_XS_{C13UxU-dO#_YI%;rf>PS@A$6o`Mw|cp`XA{=qK_M`$_zy zelkC~pTbY+r}9(#Y5cT)IzPRi!O!St@-zEc{H%U9Kf9m9&*|s#bNhMxyna4EzhA&F z=oj(}`$hbselfqeU&1fxm-0*dW&E;!IlsJL!LR67@+T`}+O-{{8@epg+hT><{sW`osL;{s@1hKgu8NkMYO) z@V@-{iXggf4RTH zU+J&%SNm)Hwf;JPy}!ZV=x_2j`&<02{x*NRzr)|@@A7y1d;GorK7YS|z(438@(=q* z{Gb&w`V8>9=;2N{BlL8c&ckR`|(WDBwfIf9%)t{``iC&(M*3-SjA zf`UPzpm0zmC>j(CiU%cvl0m7UbWkQJ8doM3J+FPI-J2o?s5g2lm-AU;?cEDM$g zD}t55s$g}nCRiJ+3)Tl4f{nqZU~{k~*cxmLwg)?cox!ePcd#ee8|(}A2M2QCO8|M3(f}@f{VeW;Bs&!xEfpwt_L@Qo58K%c5o-S8{7-- z2M>aW!K2`D@FaK|JPV!&FM^lBtKfC;CU_gX3*HAGf{($c;B)XL_!@i*z6U>opTV!- zckn0p8~h8XkPexU4Y`mHg-{HoP!5$)4Yg1YjgW+9XoYs@gl_1Cei(#dm>^6TCJGaW zNy4OIvM_m=B1{>k3R8z^!n9$!FnyRI%ot`0GlyBitYNk=dzd548RiOehk3%hVZJbb zSRgDI777c8MZ%(Cv9NepA}kq}3QLD&!m?qxuzXk{tQb}bD~DCWs$sRTdRQZ@8P*DG zhjqfbVZE?^*dS~eHVPYuO~R&Ov#@y>5w-|hhLK@Z*eYxtwh7yY?ZWn9hp=N99ma&6 z!p>orFgA<}yN2Du?qQFxXV@$39rg+PhW*0+;ec>pI4B$(4he^b!@}X=h;U>$DjXe- z3CD)x!tvpRaAG(qoE%OGr-swQ>EVoUW;iRH9nJ~ohV#Pt;ev2sxF}p4E(znqrQx!0 zdAK568LkRfhik&M;ks~rxFOsaZVEStTf(j3w*R$v&*73J>B7LBJx5c;Bx6QZR zx5Ky7x68NNx5u~Fx6ilVcffbhcgT0xcf@zpcg%O(cfxnlcglC#cgA$YVlndoXc~D-I59LP%P(f4(6-Gr+QB({SMSAQR14Kcbx>X8hw7pFr~&dv4N)W17&Spn zQ8UyWwLmRVE7TgbL2Xex6oA^J4yYsQggT=xs4MD*x}zSbC+dZIqdurF>WBKH0cao^ zga)G_Xeb(nhNBT^BpQWAqcLbK8i&TC31}jkgeIdYXeye9rlT2XCYpt2qd90Unuq42 z1!y5!gchSEXenBTmZKGDC0d16qcvzPT8Gx74QM0Ugf^osXe-)=wxb|1Cc1@g zqdVv>x`*zg2k0SsgdU?O=qY-Jo}(A&C3=Nkqc`X+dWXJ4-=p{F1Nw+Qq0i_G`ij1x zA5a(^7Kg*(aReL@N5YYD6dV;t!_jdJ923XFv2h$67stc#aRQtWC&G!b4TDh7x%;c@c=v!55j}-5IhtQ!^80iJQ9z>qwyF#7LUW@@dP{( zPr{S&6g(AA!_)B$JQL5tv+*1}7th1<@dCUMFT#uQ61)^I!^`msyb`a%tMMAV7O%tW z@dmsRZ^E1L7Q7X2!`tx=yc6%jyYU{p7w^OS@d11gAHs+65quOM!^iOnd=j6+r|}tl z7N5iC@dbPlU&5F16?_$6!`JZ*d=uZoxA7f(7vID8@dNx2Kf;gk6Z{lE!_V;x{1U&y zukjoF7Qe&a;qUQ#`~iQ&pYUh=1%Jig@DDf)2}{C}@FW6>NFtHQBnpX2qLJt%28l^x zk=P^-iA&;<_#^>IND`66#77Xpgb+#?;Y1Kg6w$;ZhFIcA5|Wf8Bgsh$l9Hq%sYx1= zmZT%;Nd}UUWFnbK7Lt`@BiTs~l9S{jxk(<9m*gY)NdZ!j6e5L55mJ;CBgIJxQj(M+ zrAZl5mXssqNd;1oR3ep06;hQ{Bh^U_Qj^powMiXPm-vx-q&{gt{7FO7h%_cmNK?{` zG$$=cOVWz8CT&Pt(vAd>_M`*pNIH?uqzmavx{>ar2kA+Ak=~>a=}Y>N{$v0dNCuI? zWC$5bhLPc91Q|(2kx8`D6iE zNEVUBWC>YHmXYOT1zAZ}k=0}kSxeTD^<)FtNH&qpWDD6!wvp{*2iZw>k=EwWEzD=rO{|~8iU59v1n`>hsLGxXndN0CZvgIV(O!aVoE5bjB+Zd zq>5_lQ9~_tGzm>glhNcf1x-m)(bO~zO-s|!^fUv_NHfvQGz-m2v(fA{2hB-y(cCl- z%}evq{ImcqNDI-zvCbTJSMw`I(ht8$*=zO|>E~Ja-V!DJbrOW7Yx`M8xtLSRFhOVXS=z6+= zZls&&X1axLrQ7Isx`XbdyXbDZhwi2O=ze;D9;AopVS0ofrN`)TdV-#$r|4;VhMuM8 z=y`g9UZj`kWqO5PrPt_ndV}7ix9Dwphu)?4=zaQtKBSN6WBPkTqhBSrgWjHDk?L z3)YggVy#&l)|Rzn0jxdiz&f%{tTXGvy0UJpJL|!EvR%;o8eyl$mzy`8GY%m+b zhO%L7I2*x6vQca_8^gx3acn%Bz$UUuY%-g|rm|^lI-9{}vRQ04o5SX^d2Bvgz!tJa zY%yEHma=7RIa|S2vQ=y~Tf^3}b!@YjRj33z%H^&>@vH;uCi@j=7p0a1`IeWogvRCXid&AzcckDa%J$uhSu#fB$`^>(uuk0K9fra5= zc{m=PN8k~8Bp#VZ;Zb=s9-YVFF?lQ=o5$gCc|0DUC*TQrBA%H0IO3QSPC4V83og0h zntR-E%ND!EPs7vlbUZ!Jz%%kpJTuS2v+`^_JI}#$@?1PO&%^Wb zd^|rdzzgz1yf81qi}GT;I4{9V@>0AsFT>07a=bjRz$@}fyfUxCtMY2RI;w$ zufywdKVFa5=MA_&Z^#?*#=Hq{%A4`#yajK`Tk+Ps4R6ca@c`bQci;c|YEt58wm&AU>E6;Y0Z_KAex>Bl##knvdaQ`8YnFPv8^zBtDr> z;ZylEKAq3tGx;n&o6q5M`8+kDup*oYFCvJDB9e$KqKK#>nuso9h?pXlh%MrX zxFVj2FA|7^B9TZed;$q9h@gTAE`*Rm2`xNfgcVLC5lKZdkzAw@DMc!gTBH$aMLLmQ zWDpreCXrcW5m`kxkzM2vIYlm!TjUXWMLv;V6c7bPAyHTq5k*BYQCyS|B}FMwT9grG zMLAJkR1g(KB~e*a5miMsQC-v!HAO8^ThtMCg`cP=>Wc=#Uo;erL}SrJG!@N6bJ0Sy z6s<&S(MGft?L>fRFFJ^hqLb(>x`?i#o9Hfjh@PUC=q>t)zM`M#F9wK#VvrathKQkJ zm>4cbh>>EH7%j$#v0|JUFD8hIVv?9FriiIxnwTzTh?!!Rm@VdrxniD}FBXV}Vv$%Z zmWZWdnOH7Xh?QcMSS{9wwPKxEFE)scVw2b`wur4_o7gUPh@E1W*e&*my<(r(FAj)< z;*dBjj) zFCK`8;*oePo`|R7nRqTWm&mQiF>8BIo)F=R{`OU9OQWLz0f#+M0XLYYV=mOhCjmPAs?B$q-;sic;k zG}1~ZlgOkpnM^KI$dodbOfA#Mv@)GcFEhxDGLy_Kv&gJ6o6IhA$ec2l%q{cCyfUB6 zFAKcZsE8_(imaljs4AL@u41T|Dwc|^;;6VP zo{Fy$sDvt!O00YeDXfU1iYcyyl1eG9JY|$sP9;%ERWg-arBEqVDwSHLQE62=m0o2~ z8C52gS!GdKRW_Afp@fNHNgsE(?W>a4n`uBw~ru6n4Rs+a1m`l!CDpX#p$sDWyb8mxw>p=y{K zu12VlYLptS#;CDsoEontsEKNlnyjX%scM>-~sD)~gTCA3+ zrD~a4u2!g(YL!~8)~K~=om#IpsEulq+N`#yt!kUvu6C%MYM0ur_NcvTpW3eusDtW| zI;@VUqw1JCu1=_v>XbUI&Zx8MoI0;AsEg{7x~#6KtLmD%u5PHC>Xy2#?x?%!p1Q9d zsE6v2daRzPr|Ow{u3o5@>Xmw}-l(_go%&9FuimQ<>ZAIkKC3V4tNNw_=Qq<~bvPYf zN6-;wTxlW-|>Qp+lPNUQ6bUMAxpfl=BIRdXv&ZG0{d^*1_ zpbP3ky09*yi|S&!xGtef>QcJ2E~Crpa=N^(peyQ1y0WgKtLkdHx~`#X>RP(CuA}Q} zKV47P*A2A4Zm1jS#=41as+;NNx`l43Tj|!ijc%*k=>XkcchDVmC*4_h(Oq>n-Cg(4 zJ#{bLTldj@bwAx-56}bkAU#+Q(L?nxJzS5_BlRdfT946V^*B9VPtX(fBt2PA(Npy_ zJzdYxGxaPzThGyR^*lXaFVGA1BE48I(M$C*ydC0>a@uwN%F~|b8P9soOX4NCO|O<$+pFW%_58egUVX2D=kGQ28hMSq zCSFsonb+KF;kEQyd9A%RUR$r77vQz`I(QwuPF`oPi`Uib=5_aacs;#dUT?3D*VpUk z_4fvN1HD1sU~h;w)Enjv_eOXly;0t1Z;UtA8|RJpCU_IQN#0~{iZ|7p=1uozcr(3O z-fVAZK6oF!Pu^$mi}%(0<^|4gV8WVkCcKGYBAQ4hvWa4%nrJ4viD6=zSSGfK zW8#{4Cca5v5}HINvGEyXupx#TX1EbX8fCQcj4{?Ylf)!7$xL#S!lX2*Olp(Hq&4YG zdXvFqG?`3hlf`5;*-Unm!{ju%Om36M0x@BUZ%I{WBQtYroS0r2AV-;uo+^8 znqg+R8DU16QD(FmW5$|sX1tkTCYniRvYBG0nrUXbnPFy{S!TAGW9FK9X1-Zq7Mew7 zu~}l4nq_9WSz%V1Rc5tWW7e8=X1&>9HkwUlv)N*{nr&vg*VTX1_UL z4w^&eusLFmnq%g;IblwkQ|7ceW6qj$=DfLJE}Bc`vbkcenrr5|xnXXaTjsX8WA2)J z=DvAg9-2qyv3X*inrG&@d0}3fSLU^OW8Ru~<~#Gfd2c?LkLHv4Y`&PU=9~G!gt1|5 zI2+zZun}z}8`(y&QEfCE-NvvnZ7dtx#<6j2JR9F8unBDW zW9wQ!ThG?F4XnRyXdBtawux7kD+19p=ZEM@v0NdVnupMnD+u3%pU2Qkp z-S)6OZ7Wp#?I=6ijuoLYhJK0XL zQ|&Z6-OjKx?JPUn&ardtJUibmunX-XyVx$VOYJhd+^(=I?JB$4uCZ(FI=kL(up8|r zyV-8BTkSTx-R`hE?Jm39?y-CAKD*x@um|lSd)OYaN9{3t+@7!}?J0ZOp0Q``IeXq- zuovwmd)Z#GSM4=>-QKV_?JaxT-m!P>J$v6iun+Ac``A9QPwg}N+`h0c?JN7*zOirZ zJNupe-oCdV>__{_ezsriSNqNWV8ghuE}RSRBDjbyl8fx3xTr3gi|%5$m@bx!?c%t& zE}o0;61apekxT4+4ms?IqmDW5gp*D=?L23kb+X8Ep01bc?fSUBuAl4g2DpK4kQ?lVxS?*C z8}3H9k#3Y5?Z&vVZk!wMCb)@ilAG+NxT$WMo9po?e4g{?w-5v z9=M0@k$dc(xTo%!d+uJim+qB&?cTVz?w$M2eed485ALJ;?A*3c`jY%c`X|>X%Y69mz!JIvVmXQ zwtk&Mu1dGGfM0-rqgHL3)c0%t%l9p+AM#4)po`e0%RF!iJZ}rL?8wU zNI?d2P=FHr)!f5@7ugHB{c28vz{~E1+~8V)ftTP5xg`UWgDJq2U@9;*mrUTQ1 z8NiHSCNMLY1q^}zaI1JECA2sQ#6gH6Du zU^B2e*aB<`wgOv&ZNRo*J1_uj4|V`Mf}Oz5U>C3}*bVFs_5gc=y};gJAFwaj59|*P z00)ADz`@`Ua40wo91e~EM}nik(cl;1qBwI1QW*&H!hEv%uNl z9B?i;51bD!02hLbz{TJaa4EP9Tn?@PSAwg+)!-U%Ew~O`4{iWAf}6n2;1+NzxDDJ6 z?f`d!yTIMx9&j(X58MwP01twPz{B7X@F;i;JPw`!PlBhw)8HBKEO-t)4_*K-f|tO{ z;1%#Hcn!P`-T-fcx4_%r9q=xA54;aP03U*nz{lVd@G1BVd=9<K_znC447?(3@GUGD4h#=Q03(8tz{p?}Fe(@gj1I;CV}h~3 z*kBwmE*KAt4<-N;f{DPypbtbK1_?+(269k<5>%iDJYzjm<~)2W&ksSnZV3o7BDNA4a^Sa0CR%5z}#RSFfW)7%nud-3xb8f!e9}wC|C?E z4we8*f~COHU>UG1SPm=?Rsbu4mB7ki6|gE;4Xh5<0BeG^z}jFPurBBa)&uK<4M2ae zA=n6P3^oCqg3Z9@U<j!8Bl6Fddj4%m8KtGl7}GEMQhJ8<-u;0p(O7D}j~4DqvNx8dx2y0oDX-fwjRpU|rA; ztOwQy8-V^`L$DFp7;FMI1)G7*!4_akuoc)EYy-9h+kpXKd$0r85$ptZ2D^Y=!ERu8 zum{)^>;?7)`+$AHeqeua05}jF1P%s=fJ4Dy;BasRI1(HMjt0koW5IFYcyIzZ5u5~0 z2B&~i!D--ha0WONoCVGX=YVs;dEk6-0k{xc1TF@bfJ?z;;Bs&UxDs3it_IhDYr%Ek zdT;}{5!?iB2DgA)!ENAna0j>(+y(9i_kerBec*oZ0C*5Q1Re&DfJeb&;BoK-coIAX zo(9i=XTfvedGG>w5xfLm2Cslu!E4}k@CJAjyanC{?|^r~d*FTW0r(Jn1U?3zfKS0^ z;B)W=_!4{tz6RfbZ^3uqci{Knd+-DJ5&Q&x2ETw`!EfLXVBl;tA=`g292g#q07e8O zfsw%|U{o*~7#)lO#sp)5vB5ZCTreIOA4~uy1QUUYK_7@f3=)un4CJ5yC8$6RdY}O< z=)fdkQZN~q983YG1XF>j!8Bl6Fddj4%m8KtGl7}GEMQhJ8<-u;0p(O7D}j~4DqvNx8dx2y0oDX- zfwjRpU|rA;tOwQy8-V^`L$DFp7;FMI1)G7*!4_akuoc)EYy-9h+kpXKd$0r85$ptZ z2D^Y=!ERu8um{)^>;?7)`+$AHeqeua05}jF1P%s=fJ4Dy;BasRI1(HMjt0koW5IFY zcyIzZ5u5~02B&~i!D--ha0WONoCVGX=YVs;dEk6-0k{xc1TF@bfJ?z;;Bs&UxDs3i zt_IhDYr%EkdT;}{5!?iB2DgA)!ENAna0j>(+y(9i_kerBec*oZ0C*5Q1Re&DfJeb& z;BoK-coIAXo(9i=XTfvedGG>w5xfLm2Cslu!E4}k@CJAjyanC{?|^r~d*FTW0r(L7 z>5%=uWU{#Mk3x>jM-DnJ5;jb@Fya3@S=@goiwhemY~=q=7Wd!D;{H2X+SKfA$f|Je;^Hwsz#f>!mK`qvK#Iel>c z-=3RIXEr>*t|^Goi2l`uRM+PX6u2{QO+bF|+f{&i|K6IrP)*0{mNto!2g) zRqKdB*}1f>U&}`Rb%G~0hS`tH2Q3M%E%O7*qisN#kmZXD22N{SI4H}5KDRtL!|Sy0 zYw6d>zwP`MO*;8E2rY$ip`{S^m!%LkxD-N9I{!xM9L%Pe0;jkc0Q?)CmKasAd@^O zaWrAvKS^_F75{5F`->iJX_?^e?YC1peqO^?tAxaAj3IHjyqAx$B_zz)7`OK8q3Lz{EO>c8Dg4hD8Ya#f2H{^#TUnScBPLc6fP z)}Gh?>IW1kRPnE*eAm!A=zaM3VgAHFPWj(!Ex%5ArHTdqO3K50-8KJfV9Ni*->dEg z{&TAPoEA+&>-d;I>GQ+D?XT65ztAuJv_%bBQ)s^v+OLH6Q*rYCNlO1-ZT{Qa{m?2H zx*?AM8TF6F|99fPz&7SLe$3o@er@NrYUv;HAfRKbpa)$-mvosMx@3Mp$bJ#-7E}7C|rkJgoe~@G&^F3#~>!Ztwmr zLHu8+PCo_me_WkD^!(%3LpOULdj4w)G^jXs1%~kdhCqH<=*H{k+pf@d{oidD{2woP zZ}8V!fPc@n>p#B6EDnx!>ijHT{qOEy%O7`B|G(e;`<*^Kbc+|drTc3k!9TwnI3;M8 z?>Bb?L;tZ9ChR|sH^uEY<=Bb9zpnjqylHOWR~^m!yPM|mp?i_D|ATf$0;99u^V<41 zNN2305m@3*oJ4_Wc z5E%OLpqS@32I4}i-LC_!h)}ALz`)_o;9C?hD){qi3K3A?IAG?CzYzfi27?X!JI!nV zzm=#3L2C*mfN{|WRSDUR=yGB%iB*+Q-5M)&Tyka5*jVuDem+39qGIUM-$-ys znJ;MV-?mAs1_R-*_rY0PDl}^&KoyJ#Mgk*)QNXBRG%z|C1B?lVWbNXRRn{m4`=Cq9 z1}EV+lDI77oe-G^$KAh|n}Hb;n1#PHbQ&jb@XKLBhiXEHenN+KB)qQkhNSS9VfV7& z(c3?0H$RQ?_YbV4K|S=6p9VL6acm&&yLI1Q9+SWBt6Ta7^bYDc<_11k(6UwA7Jkit zqo02_Z|0YDzwYOQzy7B;2M!B{9EFJc+wZQ|Dxh(20`s+N;+Ljkt7e^B1%JcrQa^rf zVA4Ws>;5W*f%{Qy1251JeE0&Y%1?(9LTbxTyR)JDqak_zj|Ue1v0qwHA^7VzYFKj0VJ^?$=JnA0HOx04?<4E9gbQ%25+UT5scE6Lc|9&v^i%Ru>Exh=7nFo%71a$eGj1255q~En{i37u+A4krA_*QRS g(DTxx-iHeegXDBItlR|%d;WBg{ujCSuj#7)ANK+`SO5S3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9777319465de6a012f3d071e436bc07600ee66ae GIT binary patch literal 127923 zcmb^42bfgFx+v@w$T{bnbIv&^l`x=^R1_5xD2gIcQ52CN5=0Rt znRCQ^d*F4Sz4zVs-2XiPt$FCTS9kSVRkdo>S9I4*&q}j=^YAc8O*2v|Bu68Mrp!o5 zPBSiPaIKW&0%Q7(8rW}qwE?5YB~=?bYGBfor191I_Zy#-lANi}sHDk#I;1|9vMeRJ z;D5?Fa^i>yLlarMr22Q()T5*u(XW5fi18`C68H0tnmBUI)XRss^n!ll#`T+;lANW_ zxTFE2M~$B_ZsLFmsfQ@^pAIo-+~{dZqf(Nyw!3tvYnzsrUcD@(Q!qEBb=W^;eoFV0 z3Z24E8Rn)Wr@eHjgaao`9g~#0&uD1PM9xm()cI6b9n*RCD^N~B9Soq`D|{TH;( z5CmZuMRA;x*c_65NUqeq56SaS`yNs>HCK4~CpA}6a>nta$4wYLZeY^5l&*;yCrwi0 zA?b#sNlyFE&5>qQzmbV1O*?4lxbYJbjX$v8gnlVQvj3yJ=@Td6pQ`m=2TqeHMWN)V zTZ4bD#E@eDT7~ARRY+~=xPQNKDXF*961Rg!k51f+W%VFC~?e2m-jb%RHD@Jgvq1-YadC8LMJM7>C_IXKBR5Zr9F>N z&MUFOl?E^O5bPv#Qv8a z&2sr5N|Kzm&!v`5&ik)DYeLevQT-CVGdWAE5u*nT@6vBnzrl%tAQj0NZ$drHF|KCnorpw)=%RhIMG|6F;fA8ywJ+zKSjZaBjE{UHBDf3c0 z@`Bb^2En0X`&#~(GYE=p>a{r4kuASFo$B@@^Q!)oF!R8|(^I{e|K#AG6Sm(la!aZ! z8bxU@CcLnJMUPbL^s2n>V$RgvCPx#dC9aD!gT^I|P3d~M|1In~WKnWHjWc0<(zv0C z;b!!pLF1DqBw8hNtBK=>j!GIozD;6y_(yk0Gko&E@u{6E-Q|oa$r=7`$wV(mZP0;9 zBNCTW>WGl~lX}f2hocg8OwKrJ^th4zMhu;nl(JyrbHOQn@*Yj_lr2ZT+pF){$=3!j|EK=CrXrh#{TnOwk&9!CcPY~Z96aV zlbUtizm6KIEwf=k>;EUU9vCJB-c3Epsp(P!C(L+h>r&zW(I5Z+Ry9*%pQ(-UkE&j}9{&zY|8clYZO7E% zcF4#?MESQk_TRr?)c^DaWB#WvNDXqvmDB;MMzxxCs@1#{#M1RojVza}b|yYVSQ)=x~la9G{MSsd0N zag)dpUCuJBabk1G-hX>K?v&=ApFZ_(Pe+~Ne}4MAiAl+6`XwfNDMJQLOiW4oCujXf z)Ez!FF)>I^d#M5`6Z?-(jt3;FnOcjqLq|=hSuZ6y|38Pf#FI&bM~@pipx=nZ5Z?Nq zw*E1MXYTWNrqlzbOD_D+uTLE9(yTKvbxXv90g1t_!>9@M>ij*pC5}6!+J8FFT|S8u zJ>#bZty&bX>hr1nG&Nh8V zP1%h97F2_l9&g3gY{Rx}$M(E}9oUhb*qL3}mEHLFpnH|=?(D&72n^ zIFqwDn{#+8Z{u9f<9y!E1zgBQOy(WDlXr13mvAYUF@?*yf_L*C-piF-#noKHwY-n_ z^8v2ogM5hV`7j^hqujv9_&A^7lYELBxrtBn8E)pYe2!cAJYV2ezQ}Fd&K=yzUEIx= z_%iqK6~4;7e2uSjAK&19zR3eT$U{8Lw|Ina^C*w;I8X2$p5(iHkEi%PPxAwQ$TK|4 zb3D%r{D>d(6Mo9i_&LAem;8!f^BaE4@Ay4`;E()?Kl37g;jjFSzw;0N$-nq_;19#g z4@8WahH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4u zhGkifSxr9r(j452s6}+4G@LsOuDz4@luH}8apAT>yALK(^ z&xiR4ALRx<#>e>tpX5{A$W45j&u}xJ<#XJ^=lKG+@ZuuH-7N<{GZ$eY~F!a2+4yLtM{?`3N8720q5e`2?TjQ{2c+ ze45X2GoR&i+`{Mi0=M!-ZsT_D;7;!1Zob5qxreXtRqo|$e4YFF2KVz#9^gS9;$gnU zBYc}jd5p(-g75Go-{pHe#rJudAMit-;aQ&Jd0ya0{FtBcQ+~$J`31k^SNxja@LPVz z@A(6NM_d`aEdFeCGM3%I+(=k0WFe5WDGqW%&voSk! zFeh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEwup%q5GOMsEtFbz3uqJD< zHtVo1>#;r?upt|=M#LAPjMqR@o7H8&3u;6aSNa43*5>V zxsBVogFCs4yZI7d<{rMnSGkw3@pbOw8{E$~d4LCbh==(WkMM0C z6yN7*e!vfThG%(>=Xrr2@nfd0$+)ycD>+@C0iy?|eoX)Gi_9*6-kw;DIJDOPbxl#L z!|7AwaTr8FoLFy?HnCnOePUfn#voJT6ZI@X)*xGuJ;)K{3~~jzgFHdrAYYI_C=e73 z3I&COB0AmCPCAnS8g8spPU|^6G35h7CI?f3sll}1=3sg-Be*4)8O#c12Xlg3gWH0+!MtF8 zaC@*ISQsn{l7l;fJA=D|#lezbX|OCv36=*dg1duzf_sCN!Kz?&uqId=+!x#*JP@o4 z9t<7|)&~yEM}QbMS2NT(Bj0K6oM68oU^63$_P4 zf}O#xV0Z9R@N%#xcqMo>*c-eSydLZe-U#*wZw3c~gTbNTaPU@eBzQYG8XOCb2PcAe zf|J3!!F$1};Qioq@Imlla3(k#oD0qe7lMz1kAqKwPlL~b&x0?5FN3dwuY+%bZ-eiG z?}HzLAA_HQpM#6RFTtQj!!lvnuv}O^tPoZV zD}|NADq+>IT39`-5!MWAg|)*vVcoD^SU+qKHVhkujl(8k)390CJZurR3|ob*!!}{t zuwB?bydvxnc1$d@=^ShPNI+VHyY`tXME#;|92Q`jr)9rg+P zhW*0+;ec>pm=q2Q2Zuw#q2aJ_csL>)8IB4^hhxIA;ka;oI3b)EP6{W7Q^Kj?wD9I| zdN?DzC7c<~3TKCN!dt`J!nxtRaDI4uxFB2@E((*wJHk7|yTZlcl5lCbEKCWPhbzLn z!+XMe!10i7;X!uid^Ow~z81b7?hD@t_lIwW z2f~Blq403{R(K?QJ3Ja53y+5Tb@O1b=_+fY^JR6=1&xaSnkHU|` zPr^^b&%)2cFTyXwufngxZ^CcG@51lHAHpBQpTeKRi{UTfuiujJ*pAajA})-qdHODs9sb*Y7jMy z8bytxCQ;LJW8|Iz^qME>YL0TXbb~Rn$G|5nUZ! z6I~l!7hNCS5ZxH{jBbj0MZKdwQQxRv)IS;!4UCeaLDAr7NHjDW77dR^L?fe7(dcMQ zG&UL+jgKZo6QfDd zb96EKCHgh`E&4tBBl%7H5xh#5v-_-;kZa#G%gkwk4wZQ<5F?yxJ+C&E*F=NE5sG!N^#}5N?bLr z7FUmJ#5LnuaqYNHTsN*4*N+>-4dX^}Jh%#W%;(;~DWS@yvKu zJUgBf-x}W*&yDBB^W)p&1@XdoQJfs#5#Jf#6)%pL#7pC4aZ0>AUJ>6N-xJ>(uZ&m4 ztK&8C+W5Zs{`i4-UHoADP`o~VIDRC4G~N(D7C#<85kDC}6>p3;#ZSl2#GB)1Nu@z(gocw4+Z-VyJNcg4Hom*SV>J@G5?tMT6WwfOaTU;IYAKYlYl5Fd;W#fRg! z;v@0f@zMBLd^|o8zZ0K~-;LjkPsQ)Yr{fRe592fO+4x+1KE4ot6n}i_S0j=$^ttp4 z4@m=4S5zmbNh}C#kTR@m;$8U5i}#0JmAL!gFP~3cC~WzBzr;fA{)vByMSo#p$zhoI zWrJ`~;@{x^T1r2xcj^xReJ;`|btj2m$VeZAQJf}iy42~*u%U^Zsf9{irC7Vxe<;5a8`pvHtFws|MoV+|9A~;Ci~0GEX>Mm%+4Il$z06MJj}~{%+CTW z$U-d4A}q>cEY1=v$xCvh^Ta4M(qW=`h}-olxj z#o3(0TX`GjavtaNb}ryTE@Cq8;GMjSi@AhLxr`}X&K10y_wZh>kJTYksy`2&CCPyCq|`3ryLZ~UEq@K655#9J4aQpkug z(=aX5F+DRdBQr5GvoI^OF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlV zu{##2Cu|EHv!Zozrh>h8VP1%gi*@7+Eimlm(ZP||P zc?CPLBRjD(yD)VIotnDwN~TVyQ&V^LVCqaZHC@ANc^$9k4ZM*(c@ujvb(Wo)`miti zu|EfJAd@(VgE@q$bMn+QjKevCBRPtrIfi37j^jCj6FG^KIfYX>jW=^TXYdxz)0@8kV^fa~}m zAL4pG%t!brH}Ek&&L{XJpW;St;?sPFoB1rC;}$;87r2!#avQgE2X}H8ck?B_%sqUC zuW~P6Y}ipYk()&M){Szv9>YhTrl#e$OBHBY)!0yvSepD}Uqf{DXh;FD5>?xRgRhjG2aM znU3k1ff<>JnVE%InT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpd zffZSam05*VS&h|MgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cdb1v{`K zJFzpnuq(UqN?yh8?7^#f4X@>Oyq-7kM)u@Q?8V;f!@lgt{v5!8OyVF8<`53$Fb?Mk zj^rqg<`|CUIF9E8PUIv`<`holG~UeVoWWZ-le0LRb9gIn<6O?;eBRClT*yUC<{i9~ zcX2V7a4DBDh0D2uck>?J%avTk)m+21ypQ+u0j}eNe2DA$FdyNg+`z~9IG^B?e2N>n ziBIzxZsxOmj$8OVU*J~0$Zg!t9o)%X+|8HxGWYNmzRJCPjjwYb-{5|}$pbvdLp;p4 zc!Y2BD39?tPw*X{_e5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj#*C z%X+NO25iViY|JKX%4TfN7Hr8@Y|S=o%XVzfE7*Y@*@>Omgyo-yugiE=MDO}DKyqov%UasUS zuI3u9<$b)L4{#kHggm$`?p@Kx^RYkZyi_y+g$O&;Ju9^zrX#Up&1M|q6Ld4lioB;VzGJjM5U znji2(p5a-Z<9S}-NBo$d@Kb)q&-n$v8sP;1APWo(DvXnTBbZj_H|!8JUThnT1)IjoF!lIhl*OnTL6qkNH`E1zCuNS%gJd zjKx`kC0UB4S%zg9=w{@@LFES>v;ojWKZ72UhK_2 z?8|=a&jB3BBo5+W4&hJ^<8Y4PNRHxYj^S92<9JTsL{8#lPT^Ee1 z_%T1>r~Hhc^9z2-ulO~;;kW#b-}49l$e;K#FY*`u%HQ}q|KOkei+}%d%rLF^&zNbL zmg$(D8JLlon3-9amD!k`Ihd2Vn45W+m-(2V1z3=USeQjvl*L$_C0LTBSej*6mgQKU z64&!i+ z;7E?*XpZ4nj^lVv;6zU1WKQ8!PUFp-&KbOgGdYX1Ifu9MHqPZd&gbo1z=d4IWZuC$ zc^4OR372viQ@ETfcsK9iy^963@i`>TT+`*mP#oc^~FLMuH;j7%s*Z4a3@eS_hn>@gSJjBC% zi%0l2kMbCg^90}FNxsYXc#7}yG(X^nJj1g*$Md|vkN7b^;ivqJpYscT$*=e|zu~w1 zj^FbK{>Y#BGcWQN{>tC@JOALH{EL_Vz<46`hv~$B#!SPsOvm)hz>Lhq%*?{9%*O1@ z!JN#++|0wg%*XsJz=ABq!Ysm~EXLw2!ICV+(k#QWEXVS!z>2KI%B;ewtj6lB!J4ea z+N{I6tjGFnz=mwZ#%#i-Y{uqn!Io^r)@;MJY{&Mzf*shAo!FUO*p=OQC9h(4_TbgL zhS%~sUe6nNBYW~D_F`}LVPE!Re-7Y4CUFo4a|nlW7>9ENM{*QLa}39F9LIA4Cvp-e za|)+&8gJ%w&fqPa$yuDuIlPs(aW3a^K5yp&F61I6^A6t0ySSK3xRlG7!sT4SyLk`q zHSN_J| z`3L{xU;O*aFv9fWKVx3{MwG-;X>F%tdS+loW@2V$VOC~icIIGC=3;K&1-lqujBQ+fj6=z zZ(=X@W*_!tKlbMU4rCGsaWIE)D2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyQ*gPUj5X z!kL`K*_^{$c^l_)9_RCRF5p5gVlwaGoxF>Sxr9r(j452s6}+4G@LsOuDz4@luH}8a zpAT>yALK(^&xiR4ALRx<#>e>tpX5{A$W45j&u}xJ<#XJ^=lKG+@?WG&Wa9oA(%)@K7YWFt0a6E;{FdMGd;Y*5`4fNUMgGEH`5S-dAN-Sl@$WCc3^R)VjG2aMnU3k1ff<>J znVE%InT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpdffZSam05*V zS&h|MgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cdb1v{`KJFzpnuq(Uq zN?yh8?7^#f4X@>Oyq-7kM)u@Q?8V;f!@lgt{v5!8OyVF8<`53$Fb?Mkj^rqg<`|CU zIF9E8PUIv`<`holG~UeVoWWZ-le0LRb9gIn<6O?;eBRClT*yUC<{i9~cX2V7a4DBD zh0D2uck>?J%avTk)m+21ypQ+u0j}eNe2DA$FdyNg+`z~9IG^B?e2N>niBIzxZsxOm zj$8OVU*J~0$Zg!t9o)%X+|8HxGWYNmzRJCPjjwYb-{5|}$pbvdLp;p4c!Y2BD39?t zPw*X{bQGcY4FF*CC;E3+{>b1)}!F*oxtFY_@! z3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A8?Yf8 zu`!#lDVwo5Td*Ztu{GPUE!(j@uV4puWG8lJ7j|VgUdgN2ojrIpui>@4j@R=B-pHQ3 ziM`mHeb|@%*q;M9kVzcG!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW`3uoilg~ zXL1&2a}ICiZJf(_oX^|2fD5^Z$-IMi@-8ms5-#O3rf@k|@NVA2d%2RUxSDIYmiO^~ zKEQQ+kPmS^ALb)`lpFXMALkQ%l236XH}Ppc!_9n_&v6T%=L_7*7rBkwxq~~oi@W&} zU*;aZ!dJPMukm&6;~U)1H+g^ud5DMk7LV|49_29}=Lx>UlYE!&@f6?ZX@0;Dd4^|s zj^}xSAMs;;!cX}bKj#&1-lqujBQ+fj6=zZ(=X@W*_!tKlbMU4rCGsaWIE) zD2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyQ*gPUj5X!kL`K*_^{$c^l_)9_RCRF5p5g zVlwaGoxF>Sxr9r(j452s6}+4G@LsOuDz4@luH}8apAT>yALK(^&xiR4ALRx<#>e>t zpX5{A$W45j&u}xJ<#XJ^=lKG+@$3qHvJo4z37fJRo3jO5vK3pi4coFE+w%%`U`KXhXLey%cH@=2irv|R zSMwTP%jfCHJtK^)8>9LixF&Ji5RQ5?-N9LsSW&k3B! zNu10noXTmunbSFgw{RwBaW?1hR^GI<=2mj<>OngD{<@nEtG1D+D z(=k0WFe5WDGqW%&voSk!Feh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEw zup%q5GOMsEtFbz3uqJD#;r?upt|=M#LAPjMqR z@o7H8&3u;6aSNa43*5>VxsBVogFCs4yZI7d<{rMnSGkw3@pbOw8{E$~d4LCbh==(W zkMM0C6yN7*e!vfThG%(>=Xrr2@ne3%Px%=?=NJ5vU-4^x!*BT= zzvmD9kw5WgUgR(QmA~a4+1Y{k}W!?tY4_Pl}}*pZ#snO)eG-FPLhVt4l7)x3t+@;YA6 z8+apo@+S6TZ}wqd_G5nz;6NsE5C?MzhjJK)a|B0n6i0Im$8sFUa{?!F5+`#Cr*ax^ z=5)^BEu6_&oXt7BmA7#&=W#x7=K?O|A|~?=-pRYTm`k{n%b3FDT*13}5AWqluHtI0 z;ac9u`}qLZ@j*Vs^?aC*@KJ8yV|<)X@JT+!joiei`3yJnSw6=te4a0GD_`U`Zs!i} z!OMIDo_zGX;UcScHxsPvfKi}j59^@e&=36|%w|SJuc$_Er4o~u3zQ%qg78X}p=!IfJ)wCTDRr=kQkE#<`rw`MjMA zxR8sO%sY4|@8V)E;ZiPR3YT*Q@8&(cmn*r7tGR}2c^~iR16;=k`4HFhVLrk~xq*-I zaX!H(`4l&D6QAZY+{|bB9Jla!zQC<~k=wYPJGhg(xSKEWW$xiCe3g6o8eiu=zQO%` zlLvT^hj^H8@d)4MQ6A%Qp5Qw?$#?l4Pw{=8<_G+cXLy$9c%B#d5kKZ9{FI;ZbAG`u z`4zwBH~g00@q7NjANdo1=0*O(U-=t<=O6r&e=*1@{xf3CG)&8MOwSC=$V|-4EX>Mm z%+4Il$z06MJj}~{%+CTW$U-d4A}q>cEY1=v$x zCvh^Ta4M(qW=`h}-olxj#o3(0TX`GjavtaNb}ryTE@Cq8;GMjSi@AhLxr`}X&K10y z_wZh>kJTYksy`2&CCPyCq|`3ryL zZ~UEq@K655AeZ>hh%wVJEz>bQGcY4FF*CC;E3+{>b1)}!F*oxtFY_@!3$P#yu`r9U zD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A8?Yf8u`!#lDVwo5 zTd*Ztu{GPUE!(j@uV4puWG8lJ7j|VgUdgN2ojrIpui>@4j@R=B-pHQ3iM`mHeb|@% z*q;M9kVzcG!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW`3uoilg~XL1&2a}ICi zZJf(_oX^|2fD5^Z$-IMi@-8ms5-#O3rf@k|@NVA2d%2RUxSDIYmiO^~KEQQ+kPmS^ zALb)`lpFXMALkQ%l236XH}Ppc!_9n_&v6T%=L_7*7rBkwxq~~oi@W&}U*;aZ!dJPM zukm&6;~U)1H+g^ud5DMk7LV|49_29}=Lx>UlYE!&@f6?ZX@0;Dd4^|sj^}xSAMs;; z!cX}bKj#I<=2mj<>4DyKoj2JTw(=r{?GXpa+6EialvoagAGY4}r7jrWY^D-av zvj7XS5DT*ii?SGtvjj`B6ic%V%d#BHvjQu!5-YO`tFjuavj%Ij7HhK(>#`o}vjH2j z5gW4!o3a_3vjtnS6v%nH;En9b zo7juJ*@u1EkNr7-1DV7@9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4xir(>a5; za3*JQHs|nG-p09{$N9XS3%HPrn9MtPC-357F5yxxV+xmZ1@GoPyq7DvimSPXYk42< z=L1~F2l)`!^I<;1N4bHI@o_%EC;1dNauc8CGu+H)`5d?KdA`7{e39F@ojbUbySSS# z@n!DeD}0rE`5Is6KEA>Ie3J)wkcW7fZ}AA<=20Hwah~8iJjr+Y9#8Rop5_PqkY{+7 z=XjnM_z^$mC;XJ3@pFE`FZmU}<~RJ7-|>6?z#sV&f96I0!e99tf9D_klYcSDEB-TL z%rs2PbWG0-%*ag4%q+~xY|PFa%*kBL%{ZuuH-7N<{GZ$eY~F!a2+4yLtM{?`3N8720q5e`2?Tj zQ{2c+e45X2GoR&i+`{Mi0=M!-ZsT_D;7;!1Zob5qxreXtRqo|$e4YFF2KVz#9^gS9 z;$gnUBYc}jd5p(-g75Go-{pHe#rJudAMit-;aQ&Jd0ya0{FtBcQ+~$J`31k^SNxja z@LPVz@A(6NFe|e$J9986 zb1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=GuqvyuI%}{dYq2)# zurBMdJ{zzh8?iB)uqm6dIa{#h|6-}0q6Fyy1PYg{Qb><&+qTtX+vu@v^z@iLwr$(C zZQIuVyLYR>I0y1*>8;td2FXCf35* zSO@E3J*iI08rFC>)Jra4e3)@i+k|;v}4mkvIjX;xwF& zGjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^ol zJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XE zH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(` zKkz61!r%A@|KdMXV$*-rP)7qzw9rNe1-j^=j{!;y!6+CNqhWN6fiW=_#>O}p7vo`k zOn?b75hlhYm=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$ zEPw^E5EjNFSQLw4aV&u)u@siZGFTSNVJMcz3Rn>Rk0dY#~N4@Yhi7ygLSbU z*2f0e5F24*Y=TX(88*ij*b-Y|7`DbX*cRJid+dN6u@iR2F4z^jVR!6-J+T+|#y;2= z`(b|^fCF(54#puk6o+9r4#x-_fg^Dgj>a)K7RTXuoPZN?5>CcQoPtwv8cxR3IVV;qc&@i0Cnz=W6x z6JrugipelJrofb#3R7bmOpEC-J!Zg+myhEV*_l6 zjj%B`!KT;@n_~-XiLEdUTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sK zfj9^U;}9H*!!R6&V+4-CkvIxR;}{%^<8VAqz==2sCu1Z|!KpY6r{fHqiL-Dv&cV4j z59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3!L7Irx8n}niMwz&?!mpd z5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P!K-);uj388iMQ}J-od+g z5AWjxe29&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#c~*m<*@=*#7bBht6){EhSjkK*2G#^8|z?QtcUfn0XD=&*ch8& zQ*4IKu?4ooRv3n@u?@DxcGw;}U`OnPov{mc#ctRgdtguOg}t#4_QihK9|zz-9E5{$ z2oA+z7>>g+0!QFT9EGEC435QdI36e9M4W_^F%qZXRGfy>aR$!BSvVW#;9Q)C^Kk(# z#6`Fmm*7%dhRbmUuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPR1D7VlAwVb+9hh!}{0&8)74Dj7_j9HpAxF z0$XA$48zvg2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvF~{ z$Ke=(BXA^+!qGSe$Kp5~j}verPQu9;iBoVYPQ&Rq183qaoQ-pEF3!XGxBwU8B3z71 za49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn z@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc z@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`FaAR%KK(}xbu`dK z3vF~zpo<>*7@))ujDk@y8b-$$7!zY*Y>b0(F&@Up1eg#LVPZ^zNii8F#}t?nQ(mq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL4b zVQXxIZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mTnU>t%&aTtc88#yz+f_u+m#fCupq9>ybh6p!I? zJb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9 ze1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y|KM|DlqA{-cIE8fc=0HaaNK zMGt)pP+|y1!KfGwqhkz=iLo#?#=*E4594D3Oo)jvF($#Jm<*F+3QUQqFg2#Zw3rUl zV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDp7 zu{>75idYFNV->85)v!9&z?xVKYhxX(i}kQRHo%712peM)Y>LgWIkv!-*b2k2HMYUF z*bduc2keNQurqeSuGkH`V-M_!y|6d-!M@lJ`{Mu{h=Xu24#A-~48w6aM&Jk>iKB2d zj=`}w4#(pJoQRWfGDhMQoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZP zSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0i zPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6 zU*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|P)SJtQ9~UKG|@sE9Te!Ihdu@< zF$AMvRE&nvF$TuOSQs1QU|fuc@i74=#6*}FlVDOzhRHDnro>d38q;7}Oo!<)17^fb zm>IKRR?LRkF$dLkg}ZSN?!|q$9}nO`JcNhw2p+{_cpOjQNj!z8@eH2D zb9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV_#9v0OMHc|@eRJk zclaJZ;79y~pYaQR#c%i>f8bC2g}?C+{>6W&B%=SQp^gTcXrYY`3Utv!9|M#af>AIk zM#JbB17l(=jE!+HF2=+7m;e)EB20`)FexU(!!pc|$ zt70{*jy13**23CY2kT-ztd9+_AvVIs*aVwmGi;76uqC#_Fl>!&ur0R3_SgYCVkhj3 zU9c;5!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SDL9F7q<0!QK~9F1deERMtRH~}Z( zB%F+qI0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-Y zBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRf zB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XS zBYwiq_yxb>H~fx2@F)Jl-}ndr;y+Xp(|^=ZM*~f?&_)LZy6B;g0ZI(PC>Rx^VRVdv zF)Wvqf#u^Lv# z8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f&yw#GKt7TaNa?0_Ay6L!Wf*cH2B zckF>Zu^0BnKG+xgVSgNe191=z#vwQqhhaDl#|RvOBXJat#xXb+$KiOKfD>^NPR2-_ zf>UuCPRAKI6KCOUoP%?59?r)FxDXfNVqAhtaTzYh6}S>t;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X_z)lAV|;>7@fkkH7x)ri;cI+@Z}AfhJmLqk{rn^w7ruC5B)WjEd1PI>x}57z<-# z9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)BOJf-7)R4Xa}ftckU- zHrBzqSP$!C18j(murW5lrq~RdV+(AFtuPE*V;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol z3wvW9?2G-dKMufwI0y&h5FCobFdTrgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h z;||=3yKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s z;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW z;}86azwkHy!N2$qm88o54HY%i(LfU|w9!FqLqPRxb5F%Ra& ze3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?=yisi8aR>VqJ8LMDbtcKOG2G+z{SR3nL zU95-ou>m&3M%WmeU{h>{&9Mcx#8w!Ft+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt z5B9}=*dGVrKpcdFaR?5@VHl3XF#<>6NF0TuaSV>daX20);6$8+lQ9yf;8dK3({TpQ z#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74 z#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j z#9#Ou|KMNzhe|T~j~eP|potdR=%7FsJ@hd^i6IyTqhd6SjxjJM#=_Vb2jgNqjE@O0 zAtu7am;{qzGE9ysFeRqK)R+d-VmeHZ889Pe!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`I zAQr;HSOkk=F)WTHuq2kk(pUz|VmS=O@>l^YVkNAMRj?{n!|GTAYho>|jdidt*2DVP z02^W>Y>Z8?DK^9A*aBN(D-6Td*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC!rs^i`(i)r zj{|TZ4#L4W1c%}<49DRZfg^Avj>6G62FKz!9FG%lB2L1|7>QGGDo(@cI0I+mES!yV za4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcK za4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6) z@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v z@Gt&DB{}^^4RtioLsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9EM_ftbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6 z#wOSln_+Wofi1BWhGA=LgKe=Lw#N?G5j$aL?1Ejf8+OMY*b{qUZ|sA8u^;xw0XPr` z;b0tsLva{}<8X|?5jYY@;b@fE(tH~1Fc;d}gmAMq1@#xM94zu|ZMfj{vV{>DG}7yqG> zg8rk1IvQxAg*G}U&_xe@3{YYSM!~2U4WnZWjES)@HpaoY7!TuP0!)aBFfk^*1(!r3u|K?tc&%qJ~qIH*a#bA6Ksmj zusOECme>lzur;>9w%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_? zI1IyaI7Z+I9EqcFG>*ZsI1b0-1e}PIa56^X6r76Fa5~PwnK%n);~boe^Kd>cz=gO7 z7vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q z591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!w< zALA2ziqG&lzQC9G3SZ+Je2ee!J$}HC_z6Gb7yOFf@H_s%pZE)Z;~)Hs|4>Ou|4~C7 z4K&e08yytrqK7^PC@}=1U{s8T(J=SbyT1i(0EQZCg1eU~7 zSQ^VU`?!rwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z zY=vRi8rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq{c!*e#6dV1hu}~ghT%9I zBX9(c#8EgJ$KY5ThvRVqPQ*z#86$BDPQ__B9cSQ7oQ1P-4$j4SI3E|_LR^H4aS1NP zWw;zy;7VMDt8opk#dWwIH{eFxgqv{-ZpCf59e3bP+=aVw5AMZ%xE~MTK|F+q@dzHp zV|W}-;7L4%r|}G)#dCNbFW^PIgqQIOUd3y89dF=GyoI;%4&KFkcpo3&Lwtmf@d-Y~ zXZRdn;7fdkukj7O#dr7~Kj26FgrD&Xe#LM29e?0Y{Dr^q5B|k}sHCF*sG*JqnrNYo z4hnS9LmvZ_7=lqSDn`TT7z1NsER2nDFfPW!_?Q3_Vj@h8NiZoU!{nF(Q(`JijcG6~ zro;4@0W)GI%#2wuD`vy&m;-ZSF3gR2FfZoA{8#`BVj(PyMX)Fq!{S&1OJXT3jb*Sb zmcvjij}@>YR>I0y1*>8;td2FXCf35*SO@E3J*iI08rF zC>)Jra4e3)@i+k|;v}4mkvIjX;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSn zC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*u zB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>h zCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|KdMXQqzCbP)7qzw9rNe1-j^= zj{!;y!N_irO}h_|tQtAxb;SSgq)kIrQNlvPvWF_+kzp|+{+mk^s#b`SIy|yXi~puZ z7E4m`e^;uCrs#^Hn2M#?ilYd{RXoL40!1nzN)#ok5>1JV@?v6R?K93`$2Pl>N2 zP!cMMl*CFBC8?53Nv@<&QYxwbJA^b!S|y#5UdfQblQd}valvGM7rIj*DS*4s3s+3nMC>51TN@b;rQdOy@ zR99*!HI-UQZKaM7sO1x+&e29!gK8m(pA5qx4n!DgBiJ%0Ok1GFTa+3{{3H;mU9&LK&fqR7NSI zl`+a#Wt=iznV?KmCMlDZNM(vLRhgztS7sY4LyjMObAC*taXXT6XRr#iTSAHlz zm0!wl<&W}L`S;)TtE#5zs-c>yrP`{a3e{CT)mH;msv&9=HL4m-jjqN}W2&*#*lHX# zt{P8`uO?6vs)^LZY7#Z6noLcurchI=snpbJ8a1t&PED_7P&2BT)XZuYHLIFU&93H9 zbE>)2+-e>*ubNNIuNF`Xs)f|TY7w=lT1+jjmQYKorPR`D8MUlhP7PJds}Hu}1I!GO?4pE1y!_;tfxEi63 zP)Dkx)Y0k~b*ws09j{JMC#sXw$!erJMV+cnQ>Uvl)S2omb+$T3ovY4M=c^0Uh3X=8 zvARTEsxDKPt1Hx%>MC`$x<*~Au2a{m8`O>JCUvvAMct}yQ@5)-)Sc=sb+@`l-K*|X z_p1lggX$smuzEy2svc91t0&Zx>M8ZKdPY5~o>R}O7u1XDCH1m;MZKzCQ?IKx)SK!p z^|pFPy{q0+@2d~ghw3BsvHC=PsyMQlN`bK@LzEj_;AJmWPC-t-XMg6LN zQ@^V})Sv1v^|$&*{j2`_@A0ders|h{hHAsK zaBa92p^eZ+YNNE#+8AxDHclI_P0%K4leEcNq&7vHs!h|TYcsT&+AM9hHbhsL_4Y-(~fH=w3FH??X-4AJFA`3&TAL6i`pgavUWwgs$J8r zYd5r;+AZz2c1OFb-P7)C544BcBki&FM0=_|)1GTDw3pf|?X~tsd#k6!H`dR9G~o?XwO=hSoQx%E7HUOk_lUoW5+)C=i_^&)yvy_jBHFQJ#zOX;QcGJ09P zoF1x|*DL51^-6kWy^3B{uclYmYv?ugT6%50j$T)FxCndPlvJ-dXRWch$S;-Sr-NPraAkTkoUy)%)rF^#S@oeULs_ zAEFP{hw0(^a6LjFp^wx@>7(^A`dEFOK3<=oPt+&rll4e_iau4Jrcc*r=ri?M`fPoU zK3AWo&({~|3-v|%Vtt9eR9~hq*H`E(^;P<6eT}|WU#G9vH|QJnP5Neii@sIgrf=7G z=sWdY`fh!XzE|I;@7E9L2lYexVf~1HR6nL4*H7pt^;7z3{fvHAKc}D9FX$KbOZsK~ zihfnUreD`@=r{FS`fdG=epkPz-`5}L5A{d-WBrN#RDY&F*I(!_^;i09{f+)sf2Y6K zKj_LBsWqRDUDP{Y9o!2)<|ch zH!>I*jZ8*nBa4yM$Yx|Wau_*{Tt;pqkCE5NXXG~u7zK?&Mq#6fQPe1A6gNs3C5=)> zX`_r$)+lF$8s&`&Mn$8NQQ4?sR5hv@)r}fPO{11k+o)sIHR>7ljRrl@VsNHrg0%jdn(Rql3}W=wx&@x)@!JZbo;bhtbpMW%M@s7=4X?Mt@^~ zG0+%f3^s-sLyciZxG~&_Fh&?7jZwyEV~jD@7-x((CKwZqNycO&(wJgQHKrNUjTy#F zW0o=7m}AT}<{9&i1;#>Sk+IlVVk|Y58Ox0o#!6$AvD#Q;tTomd>x~V@Mq`t)+1O%i zHMSYsjUC2LW0$ep*kkN9_8I$)1I9t)ka5^JVjMM&8OMzi#!2IpaoRXzoHfoF=Zy=- zMdOlj*|=g{HLe-gjT^>I#nZQhFCNdM7Nz9~XGBdfE!c1wVGEn|aKo47BUN)Ma-gRF|)W? z!YpZ)GE19f%(7-VGt?|^Rxm4?mCVX!6|<^Y&8%+LFl(B%%-Uuhv#wdstZz0j8=8&G z#%2?y%**B#^Qw8xyl&nwZ<@Ev+vXkfu6fVAZ$2;|nvcxK<`eU&`OJK7zA#^! zugurx8}qIC&U|lvFh81~%+KZ*^Q-yI{BHg*f11C{-{v3luldhZEY;F1-7+lGvMk$j zEMd8pXZcoONh`#PVnwy0S<$T+R!l3F72Aqq#kJyD@vQ__LMxG#*h*q0wUSxMtrS*D zE0vYnN@Jz9(pl-P3|2-fla<-ZVr8|mS=p@|R!%FImD|c=<+buz`Kj|(W+!swyIcFt!h?vtAR5HHdRBd_ zfz{AzWHq*$SWT^FR&%R`)zWHZg;}kwHdb4!oz>pzV0E-QS)HveR#&T=)!pi0^|X3f zy{$f0U#p+h-x^>Iv<6v&ts&M>YnT;o4Ywk!5!Og+lr`EKV~w@OS>vq<)chqcq%W$m{1SbME~)_&`Nbw^)_v=N_0W1`J+_`$PpxOx zbL)lm(t2gRw%%B8t#{UY>x1>t`ec2!zF1$aZ`OC~hxOC?W&O7PSbwd5mSU^6X6v?L zo3>@!wqpz1wLROn16$f5b`(3R9nFqz$FO7CvFzA(96PQZ&yH^=uoK#e?8J5wJE@(_ zPHv~LQ`)KQ)OH#>t)0$JZ)dPG+L`Rkb{0FUoz2c}=dg3yx$N9_9y_m{&(3cbunXFS z?80^tyQp2vE^e2wOWLLE(smiUtX<9ywaeQT?22|JyRu!yu4-4atJ^i~nszO_wq3`r zYuB^u+YRi7b|bs7-NbHcH?y1DE$o(dD?7|?ZMU)8+U@N2b_cto-O283cd@(L-R$mm z54)$`%kFLWvHRNn?Edxud!RkY9&8V>huXvJaC^8NVUMs!+N12z_85DtJx+YEQGL+cWH$_AGn0J;$DF&$H*-3+#pVB73pD#9nGIvzOZ|?3MN^d$qmB zUTd$j*V`NHjrJycv%SUMYHzc*+dJ%?_AYz3y~o~b@3Z&Y2ke9PA^Wg>#6D^tvya;+ z?34B>`?P(=K5L(|&)XO5i}oe^vVFzAYG1Rj+c)f+_AUFieaF6Q-?Q)A5A28bBm1%a z#C~c&v!B~9?3eZ{`?dYXervz8-`gMTkM<|~v;D>XYJao8+du4|_AmRl{m1@m|FacG zbu>qJ499dV$95b?IIiP4z7sgo32~x0QJrW`bSH)r(~0H8cH%g3op?@sCxMgDN#rDU zk~m47WKMD?g_F`r<)n7fIBA`9PI@PUlhMiKWOlMRS)FW7b|;6E)5+!JcJerRoqSGy zr+`z?DdZG(ia14`Voq_Vgj3Qf<&<{HIAxu3PN-Afso+#}Dmj&%Do$0Wnp54W;nZ|$ zIklZSPF<&-Q{QReG;|s{jh!Y=Q>U5J+-c#obXqxKPHU%))7EL{w0Al<9i2{2XQzwP z)#>JRcX~KIonB6Fr;pRu>F4x!1~>zqLC#=jh%?j~=7c-Lod{=yGtwF5jCRI2W1Vr% zcxQq$(V65-b|RfA&QxcbGu@ft%yecsvz_oh8mvXPL9yS>dd7 zRynJkHO^XRowMHA;B0g@Ih&m=&Q@oev)$R@>~wZHyPZAGUT2@P-#OqMbPhR(og>as z=a_TcIpLgiPC2KYGtOD(oO9l};9PVrIhUO)&Q<4{bKSY&+;na^x1BrAUFV*2-+ACX zbRId6ohQyy=b7`|dEvZtUOBIwH_ltq#Dz=I3Vu#o%c8T3$kJu~riT&b$I4BN@!{UfIDvpWc;)FOUPKndvj5sUKiSy!u zxF{}(%i@Z-Dz1s^;)b{>Zi(CCj<_rCiTmP#cqkr;$Kr{2DxQhw;)QrAUWwP@jd&~G ziTC1z_$WS!&*F>tD!z&D;)nPteu>}WkN7M83B^@i&DCAQHC@ZKUB?x!>w2#52Cj5N z+$e5TH<}yWjp4?0W4W>2IBr}wo*Umy;3jkvxryB*Zc;ayo7_#|rgT%esogYgS~s1W z-p$}XvsaxE0+>Ze_QMTh*=RR(ET-HQicnZMTkF*RAK)cN@43-9~O>w~5=- zZRR$2TevOVR&JQv+HK>ub=$e^-41R?x0Bo1?c#QIySd%n9&S&!m)qOz2L+)YshS>q%j{+GvU=IP>|PEprj4l@@ji^yt-aJufEs7Yv?ud z8hcH=rd~6zx!1yL>9z90yw+YDudUb4YwvaNI(nVF&R!R_vK0ys6$a zZ@M?bo9WH+W_xqIx!yc)zPG?z=q>UVdrQ2f-ZF2wx58WLt@2iTYrM7II&ZzV!Q1F< z@-};0ysh3gZ@ag{+v)A{c6)ogz1}`=zjwep=pFJ7dq=#Z-ZAgEcfvdAo$^k5XS}oC zIq$r8!Mo^P@-BNs!Taca@;-ZCyszFj@4NTI`|17ietUnszurGj@l{{*b>Hw!-|}tW z@rCdDp6~mCFZ~cdiXYXF=12Eq_%Z!her!LEAJ>oP$M+NX3H?NVVn2zW)KBIo_fz;O z{ZxKxKaHQ(Pv@uiGx!<(OnzoRi=Wlc=4bbF_&NPter`XHpV!al=l2Wv1^q&PVZVr9 z)Gy{2_e=OC{Zf8uzl>kjFXxB)<^2kNMZc0?*{|YP^{e^S{ThBvzm{LyujAMC>-qKl z27W`ok>A*F;y3l1`OW!Rv_J{aG{b7E%KirS-NBATCQT}Luj6c>N=a2U%_!Ip}{$xMW zpW;vTr}@+U8U9RvmOtB{%zv5r@uld*g8~#oImVev7}|C9gO|Kfl3zxm(&AO27Om;c-UK2m%>|1W|&hL9`%x5F>~g#0p{uae}x(ydZv%AV?S_3K9oNf}}yR zAbF4?NExIGQU_^*v_ZNceUKr@7-R}E2U&uwLAD@!kR!+$fLrN(W_vvO&2ZG$^bPKu% zJ%XM=ub_9(C+HjW3;G8Gf`P%HU~n)b7#a)2V;V*2 zObR9kk-?N;YA`LB9?S@42D5_M!JJ@jFfW)NEC?0`i-N_$l3;1DELa|_2v!EGg4MyA zU~RB2SRZT%HU^u5&B2ynYp^ZY9_$Eq2D^gY!Jc4murJsj90(2uhl0bwk>F@>EI1yV z2u=p4g44m7;B0U%I3HXHE(Vu^%fXf4YH%&M9^43S2DgIS!JXi4a4)zYJO~~JkAla+ zli+FaEO;Kg2wn!Sg4e;D;BD|OcprQSJ_etH&%u}AYw#`j9{dP?2ET&e!Jpu7@GnrL zDmAG~Lz>c(wsfSBuJoiY11V*Qj3T4TXfnEtA!Eu|GPaB(%*z04pp%1kn|%p$YOY%;sdA#=)HGPlel^U8cOzbqgN%0jZR zEFz1_VzRg_Axp|qva~EC%gS;xRF;<&WJOs?R+d#{Ras3|mo;QfSxeTIb!1&xPu7JIT(ni|i`9$?md;>?wQ6-m;JE zEBnd*a)2Bt2g$*5h#V@1$#6MbM#vFzq#Pwj%Q14S94E)i338&GBqz&AIYmyD)8uqH zL(Y`5l#k?N`9waI&*XFYLcWx*NO1=WV?Ky{&dP<^NY z)DUU}HHMl%O`%|@859CFhgv|PP#Dw_Y6aP#aL5j|h9aOyC<=;(VxTrqEEEUDLkUn@ zs2$WE>Hu|wIzgSGE>Ksf8`K@@0riA>LA{|qP+zDY)E^oE4TJ_k4rnkm1R4qrgN8#R zppj4_GzuCGje*8O!9_}252L+3EB*8fwn^1pzY8OXeYD_+70c2_Couh z{m=pEAan>i3>|@vLdT%v&fwRKd;OuY?I47J7&JE{*^TPSy{BQxdAY2G83>Sfm!o}d? za0$31Tna7?mx0T|<>2yg1-K$y39bwW!d2j^a1dM#t`66LYr?hQ+Hf7XE?f_;4>y1t z!j0g@a1*#G91J&uL*V9c3pf-GgImI_U>h6`+u_!51RM!R!O?IG+y;(?A9i!K2|Z@K|^pJRY6^PlPAIli?}wRCpRZ9i9QtglECC;W_YJcpf|-UH~tI7r~3+ zCGb*s8N3``0k4Et!K>jl@LG5sydK^FZ-h6&o8c|+R(Kn{9o_-&gm=Na;XUwPcptnU zJ^&wt55b4wBk)o97+04UxY8gm*Fe$Rrnfw9linIgm1yO z;XCkM_#S*8egHp&AHk2|C-77F8T=f60l$P_!LQ*r@LTvD{2u-Qe}q55pW!dC8*m36 zz!P`@Z{P#cfV99Dqyy=JAIJbQ0tmnW00f`_12`Z62`E4V2C#qwJP?2gBp?F?s6Ycc zFn~V@0GU8$kOgD~*+6!X1LOp`KyHu+Hb* z&;#@Yy+Ci!2lNH~Kz}d*3 z5`{z~F-RLE7KuaRkp!eI(hg~lbU->HosiB*7o;oF4e5^bKzbs*klsiiq%YDB>5mLR z1|owH2QnBLf(%85A;Xao$Vem+8HJ2S#vo&namaXN0x}VqgiJ=JAXAZP$aG`|G837F z%tq!QbCG$-d}IN#5LtvQMwTE;k!8qoWCgMkS%s`d)*x$T1F{j>gltB(AX|}b z$aZ80vJ=^b>_+w=dy##}e&hgh5IKY#Mvfpykz>el1M(61gnUN6Aa1BT>VbNqUZ^+fgQh{#qP}Q4G(GBvW4Eigl0ywpjpvuXm&IQniI{1=0@|NdC`1m zezX8u5G{ljMvI_D(PC(Ev;0*yqY z&}cLUZG*<5acDf6fVM^3q3zKQXh*aY+8OPFc163P-O(OsPqY`>8|{PkMf;)s(E;c{ zbP(!52ctvKq3AGlI649yi6)|>(9!4^bSyd!9gj{xC!&+k$>)+&FB_%E4mHc zj_yErqPx)D=pJ-0x)0rt9zYMGhtR|55%ef}3_XsXKu@Bl(9`G{^elP~J&#^MFQS*w z%jgyKDtZmQj^03TqPNi7=pFPfdJnyiK0qI$kI={H6Z9$i41JEiKwqM-(AVf2^ey@h zeUE-XKcb(|&*&G_4RgmlFi*@2^TvFzG+0{97fXkw$NaDiSVj!OU<_aghGH0oV+2NG z6h>nV#$p`CV*(~(5+-8`reYeVV+Q7r1z?%5%vcsIE0zt*j^)5|V!5!~SRO1dmJiF1 z6~GE&g|Na{5v(Xy3@eV6z)E7Ju+mr=tSnXzE00ycDq@we%2*&)1*?h$Vb!qeSPiTu zRtu|*)xqjw^|1O_1FRv|2y2Wr!J1;hSTigHYmT+RLa{KcCDsbFVd0n^YmG%2%eXzb*Kde7C02_!6 z!W`IOYzQ_K8-@+XMqnecL~Il`8XJR+#l~Udu?g5jY!WsZn}SWnreV{u8Q4s07B(B3 zgU!X}Ve_#C*g|X(wisK2Eyb2$%dr*MN^BLj8e4;{#nxf#u?^TpY!kK_+k$Pywqe_` z9oSB67q%PQgYCukVf(QI*g@=Jev zyMkTCu3^`)8`w?k7IquEgWbjMVfV2I*hB0Q_85DDJ;k13&#@QSOY9Z)8heAi#ol4> zu@Bfs>=X7G`+~XQ?zji;iF@JRxDTENPmBBF>G1TpAD#ivh(kDx102Co9K&&(z)76K zX`I1XoWprsz(ribWn95kT*GzT!2R(6JQJQ7&w^*gv*FqC9C%JV7oHo>gXhKb;ra0b zctN}nUKlTe7sZR=#qkn&NxT$Z8ZU#F#mnL4@d|iFyb@j+55%kBRq-Ia8eSc*f!D-q z;kEHPcwM|6ULS9OH^dv^jqxUUQ#=@NhKJzI@fLU}9)`EXTj4f59Jk}G@d!K;kHVwz z7`zQ0i^t*dcmm!QZ-=+XJK!DhPIza$3*Hs)hIhw%;63qPcyGK9-WTtO_s0j|1Mxw) z10ReJ!H43*@ZtCfd?cQTkHSaeWAL%~ID9-l0iTFZ!YAWX@TvGTd^$b@pNY@HXXA75 zx%fPMKE427h%dqy<4f?R_%eJsz5-u~ufkX3Yw)%BI($980pEyk!Z+hv@U8eZd^^4a z--++ScjJ5Tz4$(SKYjo|h#$fa<45qL_%ZxAegZ#SW{5pOEzlq<%Z{v6HyZAl)KK=lIh(E#~<4^FX_%r-D{sMoAzrtVRZ}7MHJN!NV z0sn}9!aw6*a5uu8@E|-1FT$JfA<__O311=|k)H4)G7uRFh=2(|AOuQa1WphHNl*k$ zFa%3*1WyQrNJxZCD1=IAgiaWQKM_D=A~F+Mh^$05B0G_T$VucPaua!oyhJ`CKT&`v zNE9Lp6Ge!kL@}Z`QGzH*lp;zKWr(swIifsKfv8ASA}SMsL=~bc5kyoYsuMMcnnW$4 zHc^MDOVlIk6Ag%lL?fax(S&G91QX4O5TZHJf(RwTh?Ybv!bXG>cA_;AK|~T!L^KgY zv>{@NI3k`%Alee`i1tJWq9f6X=uC7Wx)R-p?nDowC((=OP4pr968(t&!~kL-F^F&w zgNY%;P+}M{oESljBoc{H#Asp+F_sudj3*`#6NyR0WMT?2m6%3MCuR^ciCM&KVh%Bv zm`BVf77z=GMZ{ua39*z|Ml2^*5G#pQ#A;#4L&Ra?2yv7+MjR(j5GRRK#A)ITah5nooF^_27l}*6W#S5P zmAFP+CvFfoiCe^N;tp|_xJTS49uNW#B1UW@s@Z;yeB>o zABj)IXW|RtM!J(8q$lY`dXqk68Zs^EOQs{!lYV3dG9w9*FbPP6L`jUqNrEIvilj+~ zWJ!+XNr4neiIho&R7s80NrUt!1ISEdW-<$zmCQzFCv%WF$y{V^G7p)T%tz)Y3y=lL zLS$jG2w9XYMiwVakR{1dWNEStS(YqEmM1Ha70F6uWipVgLRKY%$ZBMDvIbd`tVPx) z>yUNHdSrdF0ojmjL^dXykWI;8vKbjdHYZz_PS~av`~hTud$@my*lK<>U%-CAo@RO|Bu=lIzIz;#3K$BvpzkO_ia_Qst=fR0XOcRf(!h1yWV0s#FkFjjB%7plVXJsM=H= zsxDQJs!uhb8d8m@##9rkDHTjLqe7_WR0}GU3Zq(5ttcB6PT8r}R0I`CMN!dI4Aq8; zrQ)b~DuHTCwWHco9jJ~}C#o~mh3ZOmqqPz*b`cng_fz%+%K@Fyc zP(!I<)NpD9HIhoCMp2`wG1ORU95tSrKux43QIn}D)KqF3HJzG4&7@{gv#B}MTxuRQ zpISgIq!v+&sU_4>Y8kbhT0yO(R#B^|HPl*a9krg?Ky9QpQJbkP)K+R6wVm2Q?WA^5 zyQw|YUTPn;pE^Jtqz+MssUy@;>KJvLIzgSJPEn_+Gt^n?9Ce<$KwYFRQJ1MJ)K%&l zb)C9F-K1_&x2ZeSUFsfnpL#$&q#jX^sVCG^>KXN%dO^LUUQw^9H`H6|9rd32Kz*b> zQJ<+VlpF0%d(fV=7wt{^&}rzjv@e~GPEY&M8R(2OM8h=cIGdx#>J~UOFG0pDsWbqzlo7 z=^}Jdx)@!YE~K|x)I%&ZbCPugXv~;2;H1+L5I>|bW6GwZKK0!JKdU&pd;xhI+~85+t9Ie z934+5&~523(#7dH_9;9z;9n!SoP% zC_RiGPLH5R(uwpadNe(T9!rm-$I}z&iS#6TGChT!N>8Jw(=+Iq^elQdJ%^r4&!gwl z3+RRPB6=~sgkDN7qnFbw=#}&;dNsXv&$^e%cg zy@%dQ@1ytA2k3+JA^I?Vgg#0iqmR=k=#%s*`ZRrpK1-jY&(jy^i}WS>GJS=Lgw^ey@}eTTkF-=pu-59o*VBl`ZfKAeoMcj-_sxHkMt+{ zGyR2jW84`J#*^`4ycr)R4U?AfWzsR}89ycilaYZKm;nsJpbW;~48f2L#n24HunfoW zjKGMD#K?@osEo$wjKTOb0Zb+)Gn0kM%4B1*GdY-?OfDujlZVO6 zgel4tV~R5+n37B>rZiKADa({&$}<(1icBS@G84#DVX87gOf{xDQ-i6=)M9Egb(p$L zJ*Ga>fN97yVj43|n5IlH(~JpWnlmk!P$rCN$+Ti@OgLj_S~C$$BooC%GcimXCYFg~ z;+X`dEz^!^&valqGM$*tOc$mr(~arQ^k8~2y_nuiAEqzUkLk}0UHapnYbk~zhkX3j8YnRCo}<^pq(xx`#%t}s`bYs_`# z26L0S#oT7@Fn5`I%zfqo^N@MOJZ7FSPnl=TbLIu}l6l3vX5KJwnRm>4<^%JQ`NVu? zzA$dAJL|!EvR%*pD)3Uy7IyOD)$7WzNvJeZifJIo8#aNsrSdyh!nq^p)Y@owgOv`t;AMl1KBEURW^vN##U!*ur=9QY;CpjvdcVU?;MZ*vae^b}BoKozBi+XR@={+3Xy4E<2B%&n{pW zvWwWo>=Je=E`TdyGBKo?uV1r`Xf%8TKrDjy=y_U@x+l*vsq{_9}agz0TfX zZ?d=8+w2|oE_;u?&pu!uvX9uu>=X7W`;2|gzF=Rnuh`e@8}=>xj(yL5U_Y{-*w5@2 z){S%LJUCCzi}U7uxHMc^&X-HarRV&(3|vMI;$RMN2#0bQhjRo+aui2%499XD$8!QF zauO$V3a4@!r*j79&joOqxXfG@E-ROf%g*KCa&o!2+*}?mFPD$Y&lTVba)r3UToJA) zSBxvpmEcNprMS{u8Lli>jw{bq;3{&JxXN50SB0y}1##85>Rb)3CRdBA&DG)Ra`m|S zTm!Bl*NAJ(HQ}0a!CW&gglo>V;6k}Dt|ixsvvJ{^oomfSaFJXT7tO_RZMax2j*I6K zxVBt7u07X*>&SKDI&)pPu3R^+JJ*Bj$@SuTbA7nJTtBWqH-H<+4dNW!U~ULElpDqk z=SFZNxkPRhH<}y6jpfF18bz z5x1CI!Y$>Nam%?C+)8d0x0+kSt>xBn>$wfwMs5?gncKo`<+gF#xgFe2ZWp(k+r#bU z_Hp~U1KdIG5OdpRbJzD-r)WD06r6+na{#!<+JhG`5b&sJ{O;x&%@{C^YQul0(?Qf5MP)t!WZR> z@x}QPd`Z3(Uz#t&m*vax<@pMHMZOYWnGfWv@KyOBz8YVhuff;kYw@-DI(%Ke9$%kt zz&GR@@s0T=d{aJ{Z^no4&G{C5C?CeR=cIDdja$)Dm+^Jn<8{5k$Se}TWqU*a$GSNN;^HU2t(gTKk& z;&1bJ_`Cc){yzVJf5<=LAM;Q6r~EViIsbxx$-m-X^KbaK{5$?V|AGI=f8sy$UwAjc zUGNY*1uwx{@Db7oX$4;)oseGe6EX-H1xSDeARq!NU;-`>0x3`eEieKrZ~`v~f+$FW zEGU91Xo4;ng1-I)5o zhC(BuvCu?lDg+D7gb<;*&_W0m!i1JWE5Rm&3wEKk5FtbgQ9`s3BeW4>g*YKzND$fz z?S%G12ce_TN$4ze5xNT9gziEQp{LMG=q>aS`U?Go{=xuZpfE^q2!n+o!cbwDFkBcR zj1&@uQNn0pj4)OhCyW;+2or@#!en8JFjbf)Oc!PdGlf~gY+;TtSC}Wv7ZwN$g+;<* zVTrI*SSBnNRtPJFRl;gvjj&c&C#)AX2pfe>!e(KMuvOS5Y!`M2JB3}sZefqGSJ)@) z7Y+yqg+sz&;fQclI3^qyP6#K3Q^INCjBr*sC!7~92p5G*!e!x#a8-P{7VC(0#d>0Wv4Pl7 zY$P@on}|)tV6mAPA~qLWh@oPb*ivjI+Qe|tF18jU#7Hqpj22_WHe#$8C&r5jVq3AD z*k0@)b`(2_oy9Iir2*J;tlbpcuTx3 z-VyJL_r&|+1M#8wNPH|l5ub|B#OLA*@um1md@a5a--_?V_u>cfqxebuEPfH)BzMU} z@|3(JZ^=hWBc+vmrF2qy$xq54Wt1QZmVksvsDw$lL`bAWNwmaBti(yYBuJtpNwTC! zs-#J}WJvx}fRstfEM<|hO4+3BQVuDnluODj<&pAA`K0_(0jZ!=NGdE9k%~&iq~cNu zsiag&DlL_f%1Y&=@=^t4ENDMd-qQjF9_ik0G|cqu_@E47o_ zOC6++QYWdi)J5tlb(6YFJ*1veFR8cGN9rr}lln^oq=C{P$srAvhDbxDVbXAEgfvo0 zltxLTr7_Z2X`D1(njlS-CP|Z}DbiGFnlxRSA6~<4x*%PYE=iZAE7Dcznsi;dA>EX2Nw=jt z(p~AEbYFTPJ(M0vkEJKlQ|X!XTzVnBlwL`%r8m-B>7Ddm`XGIjK1rV?^S@Wh?y`sM zDSOG@vX7ibPAmJ$>E!gXpPWI?C_^$V0~wJ~8Iy6DkV%=6X_=8(nUi^0kVRRNWm%C` zS(A0ykp1NVIg^}O&LU@(v&q@z9CA)Mmz-P9Bj=U#$@%31azVL}Tv#q57nO_2#pM!m zNx76CA1LZ-oLmn&-k%!8|^FR`FHRDd`nIC4-VtffQH)3ZkG2rr-*pkP4;H3Zt+Jr|^oPh>E1hilV5B zrs#^H_$vWQCMC0yMaimUQ?e^Ll$=T~CAX4C$*bg3@+$?Df=VH!uu?=RsuWX-Dsj1XbYAbb=x=KBzzS2Nxs5DX< zD@~N9O0d#Q2~nCWEtF6tOlhgKQfx}NVpm!#5lW;Ir9>++N*g6siBsa01f{LgPHC@n zP&z7|l+H>QrK{3S>8|updMdq?-bx>(uhLKHuMAKIDuWb)U~Q@N$wR_-Ww zm3zv4<$>~0d89m6o+wY1XUcQsh4NB)rMy<&C~uW_%6sL5@=^Jud{)f=UZc9J9;&D6 zrFyGAY8o}I>Z_(x)2n`J1~sD!sjv!EL`79h#Z^KjRZ68*MrBn_<5lIn`WhZZ(gZSIwv9R|}{G)k11vwTN0&Ev6P%OQ~@eR_myB)p}}uwSn4DZKO6< zo2X6IV6~YVqBd7ssG(|@+EQ($+SG8>uC`Vq)JQc-jaFmSHfpRIr^c%ZYFo9P+FtFT zc2qm5oz*UCSGAkkUG1UvRC}qt)jn!pwV&Ew9iR?W2dNHqusTE?st!|!t0UBrYN9$y z9j%U0$ExGh@#+M1qB=>PtWHsI`+JI!m3c&Qa&8^VIq30(GIfNL{QhQJ1RA z)aB|5b)~vWU9GNB*Q)E(_38$7qq<4mtZq@as@v4<>JD|Mx=Y=y?os!u`_%pF0rjAI zNIk3`QID#})Z^+2^`v@AJ*}Qm&#LFt^XdilqIyZatX@&Cs@K%(>J9a#dP}{n-cj$W z_tg991NEW$NPVn6QJ<>M)aU98^`-hseXYJx->UD__v#1rqxwnxteXG5Ky%kTG*8V- z^VWQ{G+J8CS4*d**Zi~$T1E}hU=3)9hH99GYlKE>ltyce#%i3#Yl0?fk|t}4rfQm| zYlh~p1!$SH%vu&LtCmg6uI12jYPq!BS{^O0mQTyC70?Q5g|xz25v{0JOe?OH&`N5h zw9;A`t*ll~E3Z}1Dr%Lq%37dSMXRa>Y1OpqS`DqHR!gg`)zRu|^|bn01FfOfNNcP$ z(VA+(S~D#~Yp%7>LbWihrPfNbY2lh(Ypq3Sky?}%t;J|KdrwuKpUtH(j3}gZHP8h8>S7{Mrb3oL~WEd zS{tK{)y8S#wF%lpZIU)wo1#tCrfJi)8QM&3mNr|Pqs`UkY4f!O+CpuSwpd%DE!CE3 z%e58SN^O<4T3e&7)z)e2wGG-vZIiZH+oEmNwrSh79okN9m$qBmqwUr9Y5TPU+ClA* zc33;29o3F$$F&pMN$r$&T05hi)y`?>wF}xs?UHs`yP{pyu4&h`8`@3nmUdgaqutf+ zY4^1U+C%M;_E>wOJ=LCR&$SoYOYN2RT6?3t)!u3EwGY}y?UVLd`=Yt&?z)HWse9?( zx{sblPpkXt>GbrvpPoU_s6#re10B&(9n*20&`F)rX`Rtoozr<;&_!L+WnIx#UDI{l z(EarQJ(HeU&!T75v+3FO9C}VYm!4bCqvzH0>G|~ndO^LAURW=p7uAdD#q|<;NxhU_ zS}&uQ)ywJS^$L1Ly^>y857evZRrMgfnqFP6q1V)F>9zGbdR@JqUSDsZH`E*HjrAsa zQ$1L3ribXw^%i=l9;Ua{Tj@4ET(|43^$0yukJ6*{7`=@itHP7{JLnzt zPI_m(i{4f5rgztS=sopbdT+gt-dFFZ_tyvL1NA|=Lm#XU(TD28^x^sleWaeKkJ3l$ zWAw55IDNc6L7%8k(kJUv^r`wZeY!qFpQ+E%XX|tHx%xbPzP>r3>d`Z9gF zzCvHAuhLiRYxK4HI(@yqLEorv(l_f{^sV|feY?Ix->L7?ck6rfz4|_VzkWbJs2|b~ z>qqpX`Z4{uenLN~pVCk3XY{lBIsLqTLBFV9(l6^*^sD+c{kncbzp3BSZ|isTyZSx- zzWzXes6Wyl>reEj`ZN8x{z8ALztUgpZ}hkNJN>=>LI0?K(m(58bT`A@@Gv|LFT>mL zG13@m4PPUjk>2n#G8h>R$bb!CAO>n+25t}rX;21jFa~RI25$(4Xh?=^D28fihHe;! zzY$<$GBO)kjI2gBBfF8q$Z6yW z7%Poc#%g1YvDR2;tT#3o8;woIW@C%7)!1fiH+C31ja|lWV~?@d*k|lF4j2cGL&jm_ zh;h_7W*j$87$=QW#%be>an?9zoHs5Q7mZ8CW#fu*)wpI{H*Od=ja$ZTfNescyKSP`jH++l zovD1|!z07u;zJ^1%pBh|AwJG*NutBI_SXa>69=YsbK8CQyw52lD9FlgcE(k;t261! zhQxB zB!tAqhsAzv?Dg-<$Y3f#Y>_3KpGozge8*ZaB<%M`jrL4xX6{t^|0ttR`3CuN=gVW& zT_{yvhi}`E*l=^`h>l8hctw~1P69wNGm*Wxb-H5;BruIr(=7Ysx^?&^4MxEc?V=;i zQXNngGqa_-)mpk#>l}iEEPK+A4xd`CR9|J-5GQD&{b5K8iLUc{@MU8)BCE{nvQ|z0FVc79P;V?8tbtBSH2C z-!jw5aKAVgVEw+bAh5mV(b#*NiN8CUo>g$TG5Hfa+lOY@mm_JZcg9HG1eoxYd7i1n@GWdf2NYd1H%V|1NUh?`qzdas!lT`_U&bBlUp&sOR*VkQ4? z1LO8oY~1iuHhM>fw+{>bAK;`@fh#z@3iD+8wDU}iTJw(~hYybL4P5gNTZsxKlalYNaNFf&sktRf>rICljEc)eh^OY72 z8urHHv`Ht)jv6lL*akYhVmg>Db$A5Fm_|0hk*-dy94-enDzWm^^)e<2GN5uj!kZC&-3;^J#bI;9=S< zZ#Or0cc<0*MG|*6cMnS+!)yUTwoF!WKK9xsOr27*1^&#AdYC5h^?)+4naX*Xop2sj zF;3onQ$&@Y6=7Lbvk1!Mh4b)AqGpqw$Je65Y}Ktc1=^~FeHF`32g`X*`;(O2NuHKS zx-03TX_F{i(kk6)fBt`E;%^70Z-)0usSqn$*WV9Jwz_|U*xwB5zZsSOT)uC+eUW?|%x{hR3{HOc>R*p6qED{f&w9qB#pAklvglwhZ2uXjPiGq#WE&LZ z8Un1#|LVB=vvym$znStusYd_l-+DW~rY~WR4i%GRn?(M3^miq|O1dbOL<5p&X3IOM(Cw}3 zSKE?6^GibC{p*}S%K=(Fpo|kuer7@oM*p130-ps`E0r}p?`!`3(ex*&&%c!?lvGU6y4$5JU;HH zqD!qXsjq)v8)UNWzdX5Y@8)(y*jMTmaB~xuHW_Bo$WqTQTXf^`zS&-zG<_)B*`jaB zbFJQ(w8Ff|)fOFH*u(p+NpBq(UCW|*n`D`8Q5oDD{LZ8?)4N=>=$1T4miH#jxNCL? zi#DlSWbb?66z+G1?GNI>{}19dwQy5k|4h*K@+O1^l=n}>6%nQ9)H)ErxmpUnVM+G7UeQn$zD zd)dnUEK^DZFVNu~Z;K5JOEf2jrh#*6Thd%Hhpn7zt{7%97lAUMDTm_2m>*9_b_ zW5vz@?hXsK-==U6^hljzpOqra72+M9401ChnW05Ue6YFdAKWq`+N|F0Fn>!PIg&tX zA8J1Pk8%2&shlg~`cD$~G$n3IigHQX42vXPBumPY%)b*$cK8Ih2x(!n6x!hx6&fDt z+$qr1tnf!9eivF!-d*t*5J{2vA3`JrQ@@4CcL4Zo3H@7$m=Vp?5b-dBH^~rroC+eo z&Twn6a|-w$!{b-euRn~}{S!$3&Mdg13_l9>pUuMe;7h*$g?afJ9d&y!_1!Sc^hpZm z9&*K^19m9cag&~{y{5B8{ci^zz+Jw|4;IhEY4M!K-{I9VIy#pLO_StykvuMv*G2Lr zrjA$NH5KH$`1D_(GX-pah|a%;bIezZ_v;!`2Ak+qL4)6_z;^(zU;+GF75Gj`s{WY5 zGteuwlGOS!g-4)Ipht==Zjefi3Ru^c18-VdKQlV-(xmu~W>nxm)}$1keCrVW#?ZRL zbWRlf(g*w|wx1l+|21_g!a#d^a|ZG?jPS>GY#kI#zwt!fenXS$NMqI!;P5O{u6ANo zIHR>%U|qUa3tRz={~4VBEd-bS2%qmDxH%PkCZvK-hGh8UHrIIm4g93|V$N{fS1;^u ztneR8_v@&QR?<->#R>EN`FI>Q0i@>)ZrCer*;{0l`E6AlJ!4{&0m|; zuU@C+_9gk>f9Z9;{w6LoYWQCmn zODO+gK>ph)$p0c=!__s{&XvZdJnT)ZQ&Z~{Y@M2!CyR;yGv@UVA^fLq#J_;~iWXjf z=|cqlnBqGhqV|s|zSD|^skEY?b?x#Y>ld~5kr7GeI}^8e^Id%9-j{jG>m z>f@hh<*roUhjF=*uIEw`Rg$QdM2#ex67F?#9$BrhZ_=XhFIIp4sp}y>n7(giY;IRe z?FG)ZWRkqqCq+KyI_iHcoS(aO_HL%+O`ZFZ@vg~%89)EJxZ+~SKQaDow_pDbmvsG} zY8CeH!&1Lz=QqKL1F6`#Ifb3}5&sI@|AC|9e$COJEmyfXlM;UT{ysSz@qKtA`Tma} z`t#=gB}bp8;^>VOjylp=;hyLy^M!?oe_;yp)t;uFDgI{u;o%DPHAzezugPGJDy9ZP zCb3@3_<6X+VvM;f#>3sY67vW91yeX#?1$Zg3BR`$Kk41?rSMHeg*2CrlFfv#b?q|a z3HI40B1{A0@M<1yzDJV0t*qIKasmBbg1_8lZg#Hz!(T2rFR zwOmsoYi{cBv?k~FIcCLja~On8gd5n-lp8sywuKQOr=(!nR6SH1*6uA=Q} zO--r1be(HgR=HG#nk&S~JK}PEoHEmF`rmETa=yRg@QI3!jSMmI_-$FqZ})gRH|RQd z;(ELG=>B$`O6SCwa|^M#+m}flluzF6oBVQ9de_TMsrLK+!{$?S>D{@6ZM|w>9o|XQ zCy83`BUne;Bx=2XU>)g_X!;~-y^>%Z8Iq`Tsxas;W$x^st**(4nwCKQ%3HzP{CWSZ z7k@rhB##BZAEd2bxR`F)Y_nZox1jxWCvP${U93nks7X9=Gs|!uW)#wSn0CQ=n3l|W zSWB|j@w5E^&Sp5g60A);$qY(gr&81(^T%A`Oh~rFYgscUnVw*dGQt8~u-V97Qi4hwk04t2<30q$z15y!0!C^`GiPe^al?$@_d i8BBV#_=OGTt4c`=(()~UF literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle new file mode 100644 index 0000000000000000000000000000000000000000..88bb6989f5b088e55cdfde2423eb9f0b6d393bb7 GIT binary patch literal 127244 zcmeF&1#}e4-YEKZcMa|kEVw(t1HoNFlu3vY0fM^@9^BpC-QC^Y-Q8}_t>T8W5N@Yf@9)SJ-b9iLU^ZC zw-z598y1}C7Ze>ClIWL|=#c7WoBR=UT$6o$e_f8FT`@#2as|> z!jpoXQVC9No9ZTFgPo!Yc9TKgBW@GQ*PCW!tmlG*qn+Z_8b`)2&`l&J#W_diC{eSV5&bF;w^Dsr|z%=Q{ANZ zG>&0ThI-1|jYX7pLI$9odf^*@yV3Hgfq{W_ema}?PXLmX8Zy+smbU@^@#j?~_*gkj z22?-;^Z|Xre3hL_ShM#h#Jjn5$jY>4n|3!&CVETgS0)C?uGW8CH|@0Y-)_&32IsZbO zqH+bg8af5*YV5SXCf?F?HT_kPJ)JsQ*sn-&zarjBPK9`?TB1`&JvD)e>5vfQ9B^tP z6OtV1lyR){k#zd1P)|uXh2XSsZ}X%&2S+D|C#JeWl7rnuuu~ILT>;67p3AN7rbD0= zIL#h&dM#JQwEoIf#Z}ei@2cji;i~N%@zm)|e^*m~^<~bvHE+eP2A$il40p|Ti+?@l zxnbPtnE0y6iIK74iHU#eDJkeEoi57h57Tfv8C39tBOjKb zhsRI$_3?<$j`4GPUh}xKn3n&GqqKiP zipRFi${qTmdMi<(K&MfA+p_B)A3b%YwX5BqYnRgqU6KC}H78fRr>*>}`s`HY|C3t( ztLR*dpt>#nm!<6#?yu1QvY$K$JVx1==p427oR}w$GNrXE$5*+gDMq0bm|6Oktkr3A{JT}!W*6@A^ ztoHlU?H`>slXi=dMSk!8qXg#?{>2jjMKKJ=)1ClPxtwr+Zl+FpQ3X6V;Ljjv+CNX& zaPNgDy6Im-M$i4HcV&Pk@n4S7Tnqowbl89DC|ygVsyZi4bb5BqI|8g4-XStJ)kQ~D za}NI+nEp3IK`L#G=7|;X@5iEBh6g8j2Aoc`M*mebHyPs@o2CW`f9c_C#U&?rVz>IQ zV*fb~*D%=`5{3UdZ}9IiouREW!gY(@QIj)n^X5lt0G>ABbuljX-^p}0=XsLV_YUwq z7Z?5eDFnCWJeInlDcGBhC$ns1<+PW$AIg*#!KNWyO+yvHeTzIr~oemF5=B4d-2oc_Q$50&;gH884$ zbNSBT(dEf%Crp>{@Q!Jzu|clj#31h!#fdP4crKjvL=0-;9Uswd!n5a$p3*9VGf{|)4NY~PO!TjD zNLozPxd;rVOK`-Kmi&*}5#$PTMio&p&Sm_nSWyXo>_jDdt{;i&>=2lii%3-0v|}mW zW8Knrx_fter0w+d;uL38ue1Zbz2ACEF{+Q}5aqp?z^Hzn9W<)HH`rfG%@@MD`PkrC zs8orm^>$gF+-E%v_m8Zf?e5m{&?=1s^@JUo+iE5IrJ}Y`7ig{I6NlOTjQapJMvpxx8U5D zGvyERw!_o_pWos}op1sM_;{*}^hX+hWcsP!Qb`NR{!?7m6PK#dvP97m#XMJi{=6z|8n4st`aWSb;W1Qe=)iSO2QooB&pg-x<%u#r&!V)jXw3N8S4E0$qQu28=5%I?~Ck#MHcQ z#?wW_1SfV(b<;LLEH)q~JfH{Y~&Y*NYM|IOftOB*>P z|7mdaPqPm19HW7^;yQnv>-BD)>7i%C(?AH21S!xnmGo@hnWtk~3xEmeuZcrBS76i| zCuO%#!?QH|F`qg z^DIsZPyJ&i^6!lo{=2CQ)RzCfIjU1s2OeFa&T9x_c8hE3F^nhXd0abf(i@MMU%@ID z92@T3J<8*(2F-4Ge0lpxfs2k66MJ{}ID#;aT-5&c+|x~_ElB9nQH zf7h1&SnPBb3#2B$Rz81kQFzq8KN|8sJ#d^RocWJ6%DoW! zUft?Ec>YxLsd%RsiF3xofqy?Sfv??s18=xL<;lM}A^4j$5GO;TY^Pgs+BE^a3kgyn z4Kg4Lav%>1pa}kccgfC(G&DHcGir4@)TpLTkiV?O{klCW?e?4zR^Y#PSM@%5*RWiu zcl{VD{I5k|Jk(k9^>pD*Z=O`7WUAZ#z1BHjhIfc_`tIOpr}y^%ZSU9f-Ax6B#)Wy7 z!~VL=QST8BQ^2Cmxc86c%)jl(tDf?9$f&cthdX^XiBJgb^vx{7A-uChA|etZBMPD- z8locxVj>n|BR)tvBt4P=$%te^G9y`#tVlK_JCXy*iR40ZBY6;CBrlQ=$&VC33L<_; zA*3);1SyIXLy99MkdjC#q%=|nDT|at$|Dt!iby4-GExPpiufbdkm^Vcq$W}esg2Ze zhEa8qdPpErA8CLzL>eKDktRq}q#4p2X@RsvS|P2GHb`5f9TJ4JM}ml8NkrBv9WE3(Q8H0>P#v!T5cw_=H5t)QcMy4QBk!i?uWCk)5 znT5~0CEsHgd9eWAV-m7$Z_NZauPX(oJP(dXOVNrdE^3e z5xIn1My?=Nk!#3x}U=&Cz=b*jpjjp(Y$CrG(TDZEr|M| zh0wxi5ws{;3@wh9Kue;f(9&obv@BWhoD2zVd!wwjgCM^qNC8!=ooY?Iu1=m$Da%dI!CW-b3%B5739`BlI!)1bvD=L!YBB(3j{d^fmeh zeT%+B-=iPUkLV}#Gx`Pnihe`Cqd(A}PUCl0fG`{*FcPCM8e=dP<1ii*FcFh58B;J7 z(=Z(~FcY&d8}q@^Vd=39SVk-pmKn=}WyP{#*|8j0PAnIe8_R?FVtKKASbnSkRuJ>U z3Sot@B3MzZ7*-rBftAEcVWqJ$SXrzbRvxQ>Rm3V`m9Z*VRm>l&hE>OEU^TH?SZ%Bh z7J${o>S2LceXIf25Nm`r#+qPFv1V9vtOeE*YlXGO+F)(5c32SB9t*}ouuv=v3&$d` z4wwsz#Gk zHUJxl4Z;RvL$IOPFl;#H#ztTxu~FD)Yz#IQ8;7N0#+^kMr;$d8QX$w z#kOJFu^re>Y!|j0+k@@J_F?<61K2_A5Ox?lf*r+!=dlae zMeGuG8M}gA#jau3u^ZS;>=t$#yMx`u?qT<_2iQaG5%w5+f<48aVb8G_*h}mc_8NPG zy~W;P@39ZqN9+^!8T*2L#lB(Ru^-q^r|U;?X9W-^a1y6*8fS18=Wreua1obq8CP%> z*Ki#-a1*z18~4G};py=Vct$)Eo*B=AXT`JO+3_5BPCOT$8_$FL;(76Wcz(P9UJ&=g z3*m+FB6v}}7+xGNftSQf;id61cv-w0ULLQ2SHvsfmGLTgRoox1hF8aH;5G4Dcx}86 z9)Q=y>*0ZTeY^qQ5O0Jx#+%?x@n(2)yanD8Z-uwU+u&{Sc6bop9uLMt@K8Jq562_$ z4!8@C#G~+zcr+e^$Kr8#Jl+XUz!UK#JQ?qdcfq^jDR?)$JKh8DiTA>L<9+bHct5;9 zJ^&wx55foIL-3*aFnl=f#z){I@lp6_d<;GoABU&n+ucvMtl>#8Q+3$ z#kb+x@g4Y1d>6hO--GYP_u>2T1NcGw5Pldxf*-|?;m7e4_(}W}ei}c6pT*DN=kW{p zMf?(e8NY&G#joMl@f-L}{1$#2zk}b!@8S3H2lzwpQm&f`@~)AGhdD3W`Ji9p3D0;T zFiLg?IiBT&e;##t#|~-3uBw%@IO9RsJVQ#)1_P-7?P(sDmW(sFNZUN`9zC09=;_(K zYaWiWcMoxf9%-BNj>)?@<9+X@g9d1V7HES$U^*~8m;uZPW&$&VzivG%>}LbBgE_#Q zU@kB>mil3=9V&zz(1bj0B^=j$kwx1IB`JU_96fOaK$XBrqB540ZvX z7mMC41?&cP2YY}$!Cqi*un*W5><9J-2Y>^?LEvC;2sjiR1`Y?^;0SOeI0_sMjseGl zA>`01~4O- z3Cs*;0keYH!0ccSFejJ`%njxNeZjn7J}^J%d9m)<3W9!MA+Ru51S|>`1B-(tz>;7o zuryc(EDM$c%YzlbieM$MGFSzy3i^Z9!0KQPuqIdwtPR!yJ+I?ETV1do7zow}8-NYL zMqp#G3D^{D1~vyh?-xB=ORyE#8f*i$1>1o^V0$na3;{#IFfbg906Ty#FcORcJA%=m z=UuC3iv{Drc(4;iTLQ^0PZ=iRJl^Sqt)Y(2qVU~jMw*ca>v_6G-m z1HnPyU~mXH6dVQ)2i@QZa3nYi91V^E$AaU)RB${v0h|a<0w;r0z^ULga5^{xoC(eX zXM=OVx!^o-KDYo}2rdE_gG<1r;4*MIxB^@Wt^!wsYrwVOI&eL>0o({~0yl$Oz^&jm za67mI+zIXicY}Mtz2H7@KX?E<2p$3tgGa!l;4$zxcmg~Lo&ryUXTY=IIq*Dq0lWxa z0xyGCz^mXj@H%({yb0a{Z-aNhyWlYxFd zpaptf9Cwp1Z zU9cV)2-XK1fDOS$U}LZe=y`4D*_wgP!4_akuoc)EYy-9h+krt~doUOb0YkwsFdU2k zJAf`Q5{v>ng3(|M7z@UM@n9z~0Zaswz+|v9*ahqgrhwhR?qCnFC)f+@4fX;1g8jh$ z-~ezSI0zgJ4grUP!@%L78yo?S1V@3R!7<=ia2%Kljt3`z6TwN~WN->N6`Tf62WNmY z!CByJa1J;ZoCnSa7k~@FMc`s^3Ahwo1}+CzfGfdO;A(IUxE5Rot_L@O8^KNBW^fC* z72F1H2X}xw!Cl~Pa1Xc_+z0Ll4}b^3L*QZX2zV4c1|A1bfG5FI;A!v-cosYdo(C_0 z7r{&5W$+4k6}$#s2XBBk!CT;M@D6wvya(O~AAk?RN8n@d3HTIz20jO0fG@#U;A`*= z_!fKzz6U>mAHh%HXYdR775oN%2Y-M+!CwpgD4y28VjvC@APG_+4Kg4Lav%>1pa@E! z3@V@sYM>4ppb1)_4f=rT!1Q1SFe8`=%nW7$vx3>c>|hQsCzuP&4dwxT!MtETFh5uT zEC~96g}}mK5wIv&3@i?o084_Uz|vqDuq;>(EDu%yD}t54%3u|+D(DYZ1FM7H-yAtr zr6%mx0&9bHzyPo=SPu*Y>w^uzhF~MGG1vrb3N{0qgDt?8U@NdS*amD1wgZE}_Fym= z0)~QNU^o~7b^u*qBp3yD1f#(iFcyphh!2#eva1b~c90Cpnhk?UEH#h0=l zDmV?C4$c5)g0sNc;2dx+I1ii;E&vyTi@?R;5^yQF3|tPb09S&mz}4Uya4onFTn}yl zH-ekM&EOVrE4U5Z4(nW#ckCH#qMM0KJDQIn`e)F$c>0YqJ*9uY{?CmIk9iAF?Yq6yKI zXht+AS`aOXRzz!}4bhfpM+6b=iC`jx2qnUZa3X@}K)8rVB8uopL=!PYED=Y<6P<_z zB9TZUl8Mel7osbXLUbd#6FrEYL@%N@(TC_u^dtHc1BijdAYw2vgcwQ;BZfQwTkZ&A zBr%E@O^hMN661(eVmvW{m`F?_CKFSLsl+s5Ix&NoNz5W<6LW~U#5`g?v4B`eEFu;Y zONgb!GGaNgf>=qcB32V?h_%EzVm+~e*hp+5HWOQjt;9BBJF$bB2E)$h_l2w;yiJIxJX1B3=`3h_}Q$;yv+!_(*&rJ`-Pvuf#Xv zJMn|~NgyOjVkAxyBuP>vO)?})awJa*q)1AnOe&;GYNSpYq)A$&P5O}O$n<0eG9#IZ z%uHq>vy$1!>|_oyCz*@PP39qe$-HDfGCx^>EJ*s1g~-BW5wa**j4V!;AWM>^$kJpP zvMgDSEKgP-E0UGS%48L?D(O#FBde1&$eLs=vNl0^~gZ7KG}e5NH!uHlTFB` zWHYik*@A3Iwjx`TZOFD{J2HrDPX?1AWGER%hLaIw2hv4Gl2K$wGMbDbW63x&p6o;> zkcngxnM`&jyO3SU6tWxHo$NvOBzuv)$v$LXvLD%>96$~v2a$uxA>>eU7&)ADlOxEH zwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVh zT(o-3zj8rBn zGnIwPN@b(6Q#q)dR4ytvm51`B@>2Pz{8Ry|Amv9Dq6$+*sG?LcsyJ1GDoK^1N>gR1 zvQ#;$JXL|JNL8XLQ&p&{ls{FCs!r9QYEreR+Eg7XfT~N?qXMb=R0FCZ)re|LHKCeP z&8X&73#uj6ifT=@q1saIs35966-Iq;MNu88Xex$^rQ)b~suPt! zB~nRLGS!*tLUpB5sBTnust47R>P7XY`cQqTepG*I05y;rL=C2fP(!I<)Nsm8ji5$S zqo~o;7-}pvj!LD*Qxm9()Ff&$HHDf=O{1n$GpL!=ENV72hnh>xqvlf!sD;!bYB9Bh zT1qXWmQyRJmDDO~HMNFXORb~UQyZv_)Fx^(wT0SBZKJkRJE)!1E^0TmhuTZ+qxMq= zsDsoY>M(VLI!Ya*j#DS7lhi5dGjJ`E$TLP zhq_DMqwZ4=sE5=e>M`|%dP+T`o>MQVm((ljHT8yiOTDAsQy-|0)F$E|ev_;#r51o!qPiLSr(wXSY zbQU@*osG^;=b&@ax#-+<9@>}AOXs8W(*@{)v>#oFE=(7pi_*pD;&chRBwdOwO_!m| z(&gy#bOpL1U5TztSD~xY{&Y3EI$eXVN!Oxl({<eej-%u0PILmD zNGH+BbZ5E?-IY$EyV2e09&}H-7u}ogL-(co(f#QG^gwzLJ(wOs52c6E!)Z4?f*wha zqDRwX=&|%TI+Y$zPoO8#ljzCx6nZK>jh;@=pl8yv=-KofdM-VWo=-2J7t)L9#q<(- zDZPwdPOqR>(yQpz^cs3Cy^dZ_Z=g5Qo9NB-7J4hajowc0pm)-{=-u=ldM~|?-cKK( z57LL|!}JmQD1D4RPM@Gp(x>Rt^cngreU3g)U!X72m*~s%75XZDjlNFbpl{N*=-c!i z`YwHszE3}(AJUKL$Mh5WDgBIoPQRdE(y!>(^c(su{f>T5f1p3opXkr@7y2vxjs8ym zpnuW`gEAO{GXz626hku%!!jJhGXf(r5+gGTqcR$!GX`Ta7GpC$Ogbh#lYz;|WMVQi zS(vO$HYPihgUQL{VsbNi7+)qYlaI;I6krN6eoP^zFjIsn$`oUYGbNakOev-`Q-&$a zlw-;>6_|=lC8jb{g{jK;Gu4>tObwY)wn8r*KrYX~m zY0k7@S~9Je)=V3wEz^z(V%jsoOb8RogfZbv1k-_WF_BCZ(~*g0VwhMaj)`YFF$qi} zlf)!5otZ97S0;t&#&lopn6Jz?<~#F) z`N<$G%3>_e5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj+qc>Dcsa1~wy`iOtMr zVY9N?*z9Z$HYb~l&CTXvec8NhJ~lsFfGx=Sv4z;eY!S97TZ}EvmS9V=rP$JJ8MZ82 zjxEntU@Nkf*vf1bwkqq-R%5HPHQ1VLEw(mWhYeusvh~d*`VPn}iHlFRoCa{TY z5}V9+X1lOm*%Y=L+nw#f_GEjpz1co&U$!6HpB=ysWCyW>*&*yub{IRHb+aSbk?bgT zG&_bJ%Z_7H+41ZIb|O28oy<;Qr?S)7>Ff-4COeCr&CX%xvh&#a>;iTnyNF%PE@79l z%h=`Y3U(#Cie1gFVb`+j*!AoNb|brq-OO%bx3b&V?d%SAC%cQ?&F*3MvisQm>;d*5 zdx$;E9$}BN$JpcS3HBs=iapJqVb8MX*z@cK_9A(J>+B8oCVPv$&E8?} zviI2g>;v{8`-pwaK4G7-&)Dbe3-%@Ziha$#Vc)Xv*!S!Q_9Od={mg!0zp~%h@9You zCyQ_>hjBPZa3n`@G{PpjjPVp;A(QUxY}GDE`Y1c)#C!W`dkC9A=ij&%r)Vfa?QBr zTnnxx*NSV+wc*-w?YJPWJr~S{aG_in7tTd+9XJ;k$whG;xo9qii{;|Dc&-zdz$J1? zTr$_0>%w*AQn+qhcdiH5lk3Ix=K64bxqe)KZU8rs8^jIfhHyi z+!$^wH;zl?#&Z+6iQFV^GB<^r%1z^@b2GS^+$?T3H;0?c&Ew{C3%G^cB5pCagj>oj z-DH&&%iI^YaDxg1jGJh%d|+;fwOc_~LvCz9e6YFU^3I3K}x;9YzqAH{d%qxl#k7!Vl$#@xysHKY}00kK#x3 zWB9TBI6jph&rjed@{{<<{1kpFKaHQx&){eBv-sKk9DXi8kDt#k;1}|X_{IDZeks3< zU(T=KSMsa))%+TMEx(Rm&u`#2@|*b0{1$#Izm4C{@8EaxyZGJw9)2&skKfN9;1BYL z_{01W{wRNpKhB@vPx7bu)BG9!EPswa&tKp#@|XC_{1yHxe~rJ+-{5cZxA@!q9sVwV zkH619;2-jj_{aPc{we>Af6l+)U-GZ`*Zdp)E&q;x&wt=Q@}Kz6{1^T!|Be67|KNY} zh=2;1fD42`3Y0(#jKB(Ou{nrcg_$Ez}VLgt|gKAyB9wP2ol;0!9s`-DufB)LWIyka0!t@l+aO#7Gi{0Ax?-FItd9vqL3sc z3!Q~7LRTS0=q7X*dI&v*UP5o7kI+}>C-fHv2m^&d!eC*DFjN>O3>VzO2w|i!N*FDS z5ylGRgj8X?FhQ6oOcEvwQ-rC)G-0|hLzpSd5@ri?gt@{zVZN|HSSTzK77I&+rNS~{ zxv)Z5DXbD!3u}b6!a8BSutC@;Y!WsLTZFB`HetK4L)agOUy0i5q-tHVm>jySU@Z&`iX_a!eSAzs8~!aE|w5WilxNTVi~clSWYZ2 zRuC(SmBh+o6|t)5FIE$)i#5cWVlA<@SVs&H>x%WnK(W5qKx`;B5*v$6#HM01vANhn zY$>)9TZ?VPwqiRmNNg_#iy>mD7$$~`5n>0?B}R%-Vn;Drj1gnSI5A%ABqoT7Vv?9F zb{4ycUBwi!o7i3KA@&q|iM_=>VqdYJ*k2qV4ipE8gT*1@P;rg#F64CakMx_ z94n3!Q^oP(1aYD`Nt`TB5vPjN#OdM;ai%y+oGs1~=Zf>h`Qid`p}0s~EG`k3ip#|1 z;tFx4xJq0tt`XOY>%{fq263afN!%=M5x0ul#O>k^ai_RT+%4`A_lo<({o(=fpm<0; zEFKY$ipRv`;tBDjcuG7io)OQA=fv~k1@WSINxUpx5wD8Z#OvY>@uql7ye-}l?~3=t z`{D!fq4-FAEItvRiqFL7;tTPm_)2^&z7gMw@5J}w2l1o$N&GB+5xTKk}6A8q^gp?R86Wb)sSjRwWQio9VtMnE7g+%rTS6>siD+JYAiL8no7;2=28o( zrPNAlEwz!_O6{Z|sl5~|g-D@Nm=rEWNF5}X6e&eX9i?a~Mv9f5lprNaNm8=Z zS?VHnl~SZ`Qg^9`)Klsu^_KcbeWiX0ibX|gm$nkr3`rb{!VnbIt2wlqhYE6tPUOADlh(jsZGv_x7eEt8f@ zE2NduDrvQ}Mp`Salh#Wcq>a)hX|uFN+A3|6wo5ytozgC8x3ovvEA5l^O9!Nb(jn=v zbVNET9g~hrC!~|oDe1IyMmj5*Qq>Iue>9TZ1x+-0hu1hzho6;@mwsc3jE8Uas zOAn-n(j)1y^hA0pJ(HeGFQk{!E9tfLMtUo~lio`oq>s`k>9h1j`YL^szDqx(pAsUY zGA83PA(JvC(=sEoGAHx0Ad9jj%d#S?vL@@YA)B%#+p>?GPEIdpkTc4e|3;l55L#&p$~hH@jhvD`#%DmRmx%Pr)V zax1yD+(vFIx08e9_HwWsB8SRha=08JcaU9jq#Px8l%wSsIaZF7&}~DUXsz%VXrR z@;EtF9xqRjC(4uL$?_C=syt1eF3*r>%CqFz@*H`tJWrl4FOV0?i{!=f5_ze-OkOUp zkXOp9tV%W| zyOKl6spL{}D|r-OC9jfC$*&Yp3Mzg|A*HZVL@BBiQ;I7kl#)s*rLOw%P6<-lE5S;L5~_qL;Yx(kL2)UON|e%3iB@8iSS3!0S2`&PN}`gaBrBbj zE=pG=Md_w=S9&Ntm0n73rH|5A>8JEp1}FoSLCRodh%!_erVLly$_QnoGD;b(j8Voa zitWx29K zS*fg2Rx4|iwaPkWy|O{ssBBU;D_fMU$~I-YvP0Ra>{50sdz8J(K4rghKsl%!QVuIe zl%vWq<+yS}IjNjdPAg}Wv&uQ;ymCRgs9aJmD_4}O$~EP>aznYP+){2Uca*!zJ>|ag zKzXPosQRge)WT{JwWwN5Ev}YOORA;R(rOvCtXfVjuU1ei zs+H8rY8ADr>aSK)tE)BCnrbbzwpvFGQ0uDo)Ihbq+CXinHc}g_P1L4pGqt(eLT#zG zQd_HS)V69nHAro*2CE@zs2Zk*s}X7k)ul$NQEEpuT8&X-)i^a??W88CiE5IXtaetr zs9n_*wVT>q?VQHrI8M7I!T?ZPEn_-)70te40WbDOP#IGQRk}j)cNWHb)mXQU92uqm#WLuIQYAx=G!vZc(?Y+tlsq4t1xxOWm#RQTM9*)cxuK^`Lr4J**y4 zkE+MiILIe0s`bqt)eo?=w-_-Bw5A~;tXsCv1 zxJGECMrpLhXspI*ye4R(CTX&!XsV`Zx@KsmW@)zOqovc*YZuS~;z}Rza(% zRnjVJRkW&_zgA7FuGP?LYPGc5S{*GwtE<)10=4>D1FfOfNNcP$(VA+_wB}k1t)+G_2zAg#R?tc7TyT9_8DMQ9x~mlml-X&tp_Ek=vg;-SK zb=6X|Zd!M(ht^Z;rS;bOXnnPQT7PYTHc%U+4c3NeL$zVraLuia&_-&bw9(oaZLBs< zOV!3}6SRriByF-bMVqQk)23@Pw3*s0ZMHT?o2$*!=4%VIh1w!*v9?58sx8x&Yb&&s z+A3|ewnkg4t<%17qpAoCGE0yMZ2n9)2?ebw42&3?Y4GDyQ|&P?rRUU zhuS0UvGzoJsy)-5YcI5y+AHm~_C|ZFz0=-nAGD9!C+)NLMf<9K)4ppzw4WNHqdKPJ zI-!#~rPDg2vpT2qx}b}?q|3UZtGcG^x}lr8rQ5oXo=#7%XV5e1ne@zh7Coz;P0z09 z&~xg!^xS$L-B-`6=hO4+1@wZtpI%5WtQXOX>c#ZpdI`OxUP>>mm(k1W<@EA;1-+tP zNw2I|(W~nIdNsYeUPG^`*V1e2b@Tweu3k?M)a&aF^oDvPy|LazZ>l%bo9iv~mU=6_ zwcbW=tGCmG^!9qN9-@cpVS2b8p?A<-dZZquchsZx7(G^x)8q9{dV-#)C+W$0XT6Kw zRZr2o>D~1ndQZKV-dpdZ_tpF9{q+I*Kz)!tSRbMf)raZBb+-`+4>xPu0BtnuP@LS>WlQn`VxJqzD!@Puh3WO ztMt|S8hx$4PG7HY&^PLv^v(JfeXG7r->&b_cj~+J-TEGVuf9*;uOHA4>WB2h`Vsx8 zeoQ~EpU_Y0r}WeM8U3t&PCu_-&@bwj^vn7c{i=RVzpmfVZ|b-7+xi{-u6|FyuRqWq z>W}ot`V;-B{!D+aztCUmuk_dY8~v^RPJgd|&_C**^w0Vi{j2^>|E~Ygf9i;V8km6_ zgh3jVK^u(08l1r!f*~4`AsdRJ8k(UShG80(VH-Y1IwQT2!N_Q2GBO)kjI2gBBfF8q z$Z6yTHwG93jX}m>V~8=-7-kGN+{OrFq%q1EZHzI-8sm&q zW4tlJm}pEgCL2?Xsm3&8x-r9;Y0NTa8*_}g#yn%bvA|epEHV}wON^z)GGn>1!dPjn zGFBUFjJ3u(W4*D#*l27rHXB=vt;RNEyRpOAY3wp~8+(ks#y(@ealkle95N0YM~tJ! zG2^&#!Z>N1GEN(3jI+i$jtHw3sx^cs}Y1}ey8+VMm#y#V{@xXX! zJTe{|PmHI=Gvm4O!gy)CGF}^RjJL)+cvzpn=>}C!# zr?g z6tkPz-RxoZG<%u7%|2#dv!B`D9AFMK2bqJ-A?8qXm^s{ZneGxy#&b?lJe8`^^330rQ}F$UJNwF^`(Z z%;V+>^Q3voJZ+va&zk4V^X3KfqIt=@Y+f<1n%B(h<_+_vdCR=<<{8j<0pyg*3vI<*8tfE#itGHFdDruFnN?T>DvQ{~(yj8)fXjQT* zTUD&8mcLcas&3V=YFf3d+EyJaz^ZH2vjVO9Rs*Y{)yQgWHL;pn&8+5D3#+Bo%4%)3 zvD#YgtRSns6>NoAp;nj`ZbeufESD8&MOhuKXe-8wwc@OJtCN*rC0a>Vvent@Vs*7r ztZr6!tB2Lo>Sgt|`dEFfepY{LfHlw>WDT~4SVOI0)^N*hjj%>qqpZ=^7;CIG&Puh$ zTNA8_)+B4PHN~20O|zz3Gpw1`ENiwk$C_)+v*ue1tcBJhYq7P&T52t`mRl>VmDVb2 zwYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xW zj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot*b;Y`BU9+xRH>{i1E$g;*$GU6Xv+i3DtcTVk z>#_C3dTKqho?9=hm)0xmwe`k&YrV7HTOX{C)+g(;^~L&XeY3t>Kdhe?Vxu-@<2GTF zHf7T`W3x7A^R{4%wq(n;Vym`h>$YK=wq@J4kDbm=Z)dPG+L`Rkb{0FUoz2c}=dg3y zx$N9_9^2Q>Yv;4`+Xd``wx3b_KhlUCFL& zSFx+w{&qFHx?RJrY1guA+jZ;!yRKc&4z%mr4eW+?BfGKP#BORgvzyy3?3Q*byS3fM zZfm!*gY5QpupMHD+F^FM9btE{U3R1$Wp}ir?HD`Oj3C9(GT=m)+a$WB0ZD+5PPS_CR}(J=h*%54DHc!)>=c!X9alvPauv?6LMZJJlX< zPp~K2lkCa%6nm;Y&7N-0uxHw{?Ai7ld#*jto^LO(7ut*L#r6_=slCizZm+Oc+Nn zZlADE+NbQ(_8I%Eea=2_U$8IQm+Z^-75l1v&Ax8muy5M8?A!Jo`>uV@zHdLUAKH)X z$MzHZsr}4;ZojZ!+OO=__8a@H{my=Gf3QE=pX|@}7yGOI&Hirxuz%W!59)*Y;68*8 z=|lO@K8z3R!};(&f{*AU`N%$skLsiO=st#z>0|lWK0ZF_eA4@5@X6?t$tSZ<7N4v> z*?hA5itWx29K zS*fg2Rx4|iwaPkWy|O{ssBBU;D_fMU$~I-YvP0Ra>{50sdz8J(K4rghKsl%!QVuIe zl%vWq<+yS}IjNjdPAg}Wv&uQ;ymCRgs9aJmD_4}O$~EP>aznYP+){2Uca*!zJ>|ag zKzXPUky~LMykox6lzK}m6}>jqo!5Usp-`WYDP7anpw@FW>vGP z+0`6sPBoXBTg{{9Rr9I&)dFfkwUAm^Eut1xi>bxc5^71clv-LXqn1_6spZuQYDKk@ zT3M~4R#mI1)zunmO|_OO^&tI$52fPF1I=)72U3Om&tzTb-lMRp+Vm)dlK8b&PB^wx>?<#ZdJFb+tnTFPIZ^MTiv7XRrjg;)dT85 z^^kg4J)#~}kEzGi6Y5FzlzLh{qn=gIspr)T>P7XEdRe`qURAHD*VP;9P4$*~TfL*+ zRqv_y)d%WB^^y8meWE^9pQ+E)7wSv(mHJwJqrO$&sqfVf>PPjH`dR&=epSDz-_;-L zPxY7jTm7T{RsX3`w5VD%ExHy%i>bxZVry}Rd7v~Vp#i?1cn5^9OG#99(9 zsg_JrG*#0yq3N2TnVO~9nxnazr}omP0Oz3&~j?IwA@-AEw7eO%dZvC3TlP4!delns8&oXu9eVAYNfQ&S{bdZR!%Fg zRnRJGm9)xQ6|JgPO{=ce&}wS6wAxx7t*%y2tFJZC8fuNS##$4tsn$$uuC>rwYOS=^ zS{tpc)=q1$b#q&a25N(}!P*dQs5VR+ zu8q(}YNNE#+8AxDHclI_P0%K4leEd&6m6+8S-GwoY5GZO}Gqo3zc^7HzAxP1~;R&~|FOwB6bsZLhXZ z+pita4r+(A!`cz;sCG;{uAR_MYNxc*+8OPvc1}C5UC=ISm$b{;7452aO}nn$&~9qC zwA(0*#awBOnv?XUJviz1?mXd=3ZA!3SHBDRPl;))OvD&mPS5iTM`e33vT6p2J) zkwhdF$%G$VF*)L!WNEjg(rLw2q_{(a*;x$6sbgNkw&By=|p;wL1Yw}L}rmi zWEI&&c9BEm6uCrhkw@ee`9ywEKok^(L}5`x6cxopaZy5)6s1IIQAU&%Rg zL39+IL}$@ObQRr1chN)i6um@m(MR+Z{X~B;KnxUv#9%Q*3>Cw~a4|xR6r;pwF-D9P z%@AoL2MM8#AdNYY!%zYcCkb36uZQ3u}AC``^0{6KpYf@#9?tn92LjJadASN z6sN>#aYmdK=frt&L0lA<#AR_sTou>Eb#X)76t~1}aYx)0_r!hiKs*$W#AES9JQdHx zbMZpF6tBc<@kYEA@5FoYL3|XS#AoqEd==lsckx5~6u-o8@kjg>|3nl$svb>`uE)@0 z>aq0LdK^8j9-@cp@$@h~T#wM>>k0IPdLlisogOkJOXvDfEjm_JdLg~AUPLdd7t@RDCG?VdDZR8_MlY+E)644>^on{Vy|P|Kuc}wmtLruNntCn0 zwq8fCtJl-(>kagVdLzBD-b8PzH`ANzE%cUpE4{VeMsKUP)7$GE^p1Kby|dm$@2Ypx zyX!slo_a66x86tZtM}9U>jU(G`XGI)5q%*^ojZ; zeX>49pQ=yOr|UEHnffe!wmwIntIyNt>kIUS`XYU?zC>TDFVmOnEA*B6Dt)!SMqjJ1 z)7R@8^o{x^eY3tr->PrZx9dCfo%$|)x4uW;tMAkI>j(6M`XT+Wenda2AJdQPC-js0 zDgCs5Mn9{c)6eS{^o#l>{jz>Vzp7u;uj@DToBA#Nwth#ytKZY_>kssY`Xl|Z{zQMO zKhvMf>|Ehn}zw1BrpZYKTxBf@}tN+uZ7*UOA zMsy>F5z~le#5Up>ag7io)QD$<8R15R5#LB)Bs3BkiH#&iQX`q67^4m_6f_DMg^eOcQKOhq+$dp`G)ftzjWR}AqnuIRs9;nyDjAiHDn?bKno-@T zVbnBg8MTc%MqQ(xQQv4_G&C9+jg2NoQ=^&D+-PC6G+G(0jW$MGqn**-=wNg-IvJgf zE=E_Qo6+6qVe~Y58NH1@Mqi_!(cc(g3^WEAgN-4^P-B=e+!$euG)5VtjWNbpW1KPG zm|#paCK;2BDaKS|nlas&VazmU8MBQ!#$02bG2d8VEHoAwi;X45Qe&C1+*o0(G*%g_ zjWxzvW1X?y*kEilHW{0ZEyh-3o3Y*4VeB+^8M}==#$IEevEMjg95fCYhm9k~QRA3# z+&E#JG)@_(jWfns1gJTx8| zkBukBQ{$QO+<0NUG+r66jW@eGxy#&b?lJe8`^^330rQ}F$UJNw zF^`(Z%;V+>^Q3voJZ+va&zk4V^X3KfqIt=@Y+f<1n%B(h<_+_vdCR>R4bYl-HKtwv|?GYtvFU(E5r)5;#pxw0J*{3=Z>x{h*Xn2Ww+2`PtwGjcYlt<}8fFc*Mpz@Q zQPyZ{j5XF8XN|WeSQD*D)?{mnHPxDCO}A!PGp$+HY-^4+*P3U|w-#6ntwq*iYl*eg zT4pV`R#+>oRn}^2jkVTVXRWt3SR1WP)@Eyqwbj~YZMSwhb ztwYvf>xgyKI%XZWPFN?cQ`TwgjCIyJXPvh$SQo8J)@AF8b=A6NUAJynH?3RNZR?J8 z*Scrjw;osztw+{l>xuQ$dS*ShURW=!SJrFmjrG=gXT7&RSRbuV)@SRB_0{@jeYbvC zKdoQZZ|jfs*ZOBgv7_42?C5q3JEk4Wj%~-W(!9vTNIQ?7DV6yT0APZfG~M8{19nrgk&Cx!uBUX}7Xl z+imQ&c00Sh-NEi?cd|R%UF@!QH@myt!|rMKvU}To?7ntCyT3ia9%v7;2irsJq4qF) zxIMxiX^*l;+hgpp_BeaIJ;9!6PqHW5Q|zhsG<&)|!=7o+vS-_K?78+ld%nHEUT80} z7u!qhrS>v=xxK<(X|J+Z+iUE#_BwmLy}{mSZ?ZSrTkNg&Hha6h!`^A{vUl5i?7j9r zd%u0aK4>4Z58FrVqxLcTxP8JtX`ixB+h^>v_Bs2!eZjtHU$QUTSM00yHT$}K!@g75Kt zMkkY#*~#K$b+S3xog7Y1Czq4k$>Zd8@;UjP0!~4vkW<(x;uLj?ImMk4PD!VfQ`#xx zly%BE<(&#nMW>Qe*{R}Gb*eelof=L}rN)kD22Mk#k<-{|;xu)dInA9G zPD`hi)7oj{v~}7!?VS!zN2in1+3DhRb-Fp-ogPk4r)>ErZu`Z@ib0nR{YkTcjB z;tX|$Im4Y1&PZpJGuj#BjCIC2zxhGMrV_=+1cW3b+$R%ogL0jXP2|v z+2ibW_Bs2V1I|I`kaO5M;v993Imew7&PnH#bJ{uMoORAQ=ba1AMdy-p*}39eb*?$r zog2@ogdCm=a=)_`Q!X`{y9Be$nyK&sOZipM|#&g5ma5ut@ z?Q)zw_#>aO9MuI1XU;b?&Fuba=!?-p2B7x{ch%ZWFhu z+stk5ws2dzt=!gb8@H|7&Ta2@a67u4+|F(nx2xOD?e6w)d%C^c-fkbauiMYVga3{Kx+{x|~cd9$ho$k(XXS%c8+3p;7 zt~<}2?=El`x{KV!?h<#YyUbngu5eentK8M@8h5R`&Ry?ra5uV}+|BM5cdNV2-R|yi zce=aW-R>TDue;CP?;daux`*7u?h*H>d(1uVo^VgPr`*%-8TYJv&OPs5a4))-+{^A2 z_o{o%z3$#{Z@RbK+wL9ru6xhD?>=xJx{ut)?i2T^`^D#q~nGP%oYr z=7oC^UVJZsm(WY(CH9hdNxfvA;;EkI2~YP7&-5(M_8iakJkR$6PkNDFaxaCK(o5x~ z_R@H1y>woBFN2rS%j9MDvUpj&Y+iORhnLgK<>mJBczL~iUVg8DSI{fu750jFMZIEP zaj%3|(ktba_R4r=y>ec8uYy<6tK?Pos(4ksYF>4(hF8<8<<<7;cy+ycUVX2D*U)R^ zHTIf#O}%DbbFYQh(re|l_S$%Dy>?!EuY=do>*RIzx_Di^ZeDkZ@nZ-O_`o8(RQrg&4mY2I{ihBwoj z<<0iycyqmZ-h6L?x6oVUE%ugpOTA^@a&LvV(p%-N_SSf7y>;GtZ-ckd+vIKbws>2; zZQgcohqu$)s4q?}B&H zyX0N=u6S3yYuPrYZ}bMJ-s(tG8-_TG4J zy?5Sw?}PWz`{aH0zIb1~Z{BzBhxgO_<^A^lcz?ZrUKBs7AI*>M$M9qNvHaM696zof z;)nY2{4hVSyz_`#Joael9<^pU2PZ=kxRX1^j}3A-}L+ z#4qX>^Naf>{E~htzqDV*FYA}{%lj4lihd=(vR}oo>R0ot`!)QUel5SYU&pWO*YoT9 z4g7|FBfqiV#Bb_1^PBrE{FZ(zzqQ}SZ|k@7+xs2-j(#VUZQD2h z`!oER{w#mCKgXZz&-3T|3;c!tB7d>J#9!(!^OyT8{FVMHf3?5HU+b^)*ZUj%js7No zv%kgP>TmP6`#b!d{w{yFzsKL}@ALQj2mFKnA^)&{#6RjE^N;%{{FDAE|FnO`KkJ|K z&-)kri~c45vVXRV-}@i@kNzkBv;W2a>VNaU`#=1j{xAQx|HuF9|MQ~+QG;kf^dLqMGl&($ z4&nrHgODIJh!=zf;XygCarEpjc2mC=rwlN(H5ZGC|p(Tu?r!5L65*1(ky;LDisIP(7#-)C_6`wSziA z-Jo7jKWGp%3>pQEgC;@Kpjps7Xc4pwS_Q3xHbL8*gCW7tU|29b7!iyNMg^mTF~QhiTrfVE5KIgv1(Sm* z!PH<{Fg=(N%nW7)vx7Oo++bcXKUfef3>F28gC)VzU|Fy{SP`rYRt2krHNo0oU9djb z5Nr%K1)GB{!Pa0~uszrj>!PVeea6PyY+zf67w}U&u-QZquKX?#43?2oKgD1h$;92lI zcoDn|UInj%H^JNBUGP5m5PS?i1)qa2!Pnqh@ICku{0x2tzk@%)-{4;mMMjm;WONxr z#+0#SY#B$!l_4@z#*<+(Tt>+FGJ#Ae6UoFfiA*Y!NkytslS1m!kfyYxEgk7fPx>;D zQbx+;GKEYjQ_0jajZ7=k$@DUV%qTO-%rcA2DznM#GKb75bIIH?kIXCc$^5c_EGP@f z!m@}gDvQbDvV<%tOUcr*j4Uh5$?~#-tSBqV%Cd^ADyzxrvWBcFYsuQOj;t%|$@;Q^ zY$zMa#?k|Q&a#W_D!a+D8a)cZyN6FE0j2tV+$? zD!<9^@`wB>f63qSkNhkD$taOgBcnw|kBku+Gcs0W?8rEgaU(+_LnGrwhDC-)MnwLn zX0GLbx|T<@{7=*J|M!Zw|8~nI`){{g|Lp(Q^B(1Y8s`3Y{^yJT4h?qyzc+b@@O&u7 z!!QiT2#k*jFd-(w#Fzw=Vlq@vMGXb&XrPG}+UTH*9{L!d#7InzDKI6b!qk`s(_%VI zj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj<|66DH zKiB2BFOLyqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvG0Cjw5g+j>6G6 z2FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#0 z2G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|3 z2G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh| z2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y?Vq;Ex*he?N$Z(J=Eu8Vc0WKoc#r(LonI^f5q*k(eA)U`kAdsWAqxm>ct8Ud)I2u>cmtLRc7!U{NfF#jymI#8Oxq z%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nLU95-ou>m&3M%WmeU{h>{&9Mcx#8%iE z+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ& z*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl z-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6M=U;KyvQ!M}AiHgxMI>x}57z<-#9E^)0 z7>e;Q48t)3<6{C$h>0*UCc&impId3 z8q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!~K0 z8rxu7Y=`Z!19rqt*crQESL}w}u?PNVx5R&@7xu>gESmVw^u>PI9|zz-9E5{$2oA+z zI2=ddNF0TuaSV>daX20);6$8+lW_`8#c4PlXW&eng|l%E&c%5+9~a<4T!f2p2`Lkg}ZSN?!|q$9}nO`JcNhw2p+{_ zcpOjQNj!z8@eH2Db9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV z_#9v0OMHc|@eRJkclaJZ;79y~pYaQR#c%i>f8bC2g}?C+{>6XzKWkJeU{rVSX%t1+fqo#v)i0i(zpr zfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wo zfi1BWw#GKt7TaNa?0_Ay6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe191=z#vwQqhv9G> zfg^Dgj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTdWJh5EEfyOoB-<87iovh5~go&_oMubkIc)eGE`yBqqlc zm=aTAYD|M^F&(DI444r!VP?#NSuq=C#~hdwb75}GgLyF@=Enk95DQ^pEP_R`7#7D8 zSQ1NNX)J?fu^g7i3Rn>Rk0dY#~N4@Yhi7ygLSbU*2f0e5F24*Y=TX(88*ij z*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq{5Fg=Ve1cE$89v7s z_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgMaZKMv2M$KSsmo7z1NsER2nD zFfN8*D8|Du495tJj|ng#Cc?y+1e0PiR8U0?1?p&^i5A-Epo<>*7@)*ROpYlqC8omE zmta2uj}5RPHp0f(1e;q9kCAPxW z*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#L4W1c%}<9F8M!B#y$- zI0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBN zxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4 zcm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf z_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|KdN45{vxDXc!%1U`&jKu`v$D#Sjd| zco>G^7=iII0Vc#mm>82_QcQ*ls;Hqr9St8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F} z=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$ z@9_hE#83Dczu;H=hTriA{={GS8~@;6{D)Culm8eEqhkz=iLo#?#=*E4f}t1>!!R5p zFg_;0gqR2uV-ie?$xuNRH590$fhJmLqk}Gb=wpBqBQZIqz?7H@Q)3!Ti|H^uX26V? z2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)BOJf-us$}xhS&%jV-swO&9FJPz?RqwTVoq+i|w#IcEFC< z2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do`z>zo#N8=bAi{o%SPQZyc z2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZPSK}I7i|cSbZorMW z2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0iPvaRpi|6n>UcifZ z2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T z2|wc({EFZ3JO03*_zQpIAN-5|FiITqAERM(jDayR7RJUn7#Bk@6ysqShGPWA#{`%V z6JcUZf=MwMDyX7{0(CUdLsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf# zu^Lv#8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jq zu^V>B9@rCmVQ=h%eX$?*#{oDH2jO5GfxDhwuX54~X zaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{ z@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+ z@f&`}ANUi0;cxtdfAJqiiA(-tG>nchFeb*r*cb=nVhDy}JPgBdjKKJq025*&OpHk| zDJDY&Rn$F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTA zYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDv zdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B! zXW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{O zcj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|Qkh zZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5 zf8sCvjeqbj{=+CC!!pc|$t70{*jy13**23CY z2kT-ztd9+_AvVIs*aVwmGi;76uqC#_*4PHyVmoY)9k3&I!p_(QyJ9!&jyZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2> z2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n* z2mj(fj1o%zV>FD8F)${^!q^xG<6;PgVmu7PaE!qCm;e)EB20`)FexTO1y$5gppFKb zXrYY`y6B;g0ZNR-SI818ZU}tc`WBF4n{P z*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR! zH~D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$r zcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY z_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`FaE7|{8{=SH48c&0hhZ3w5f~p6U_wlUi7^Q##bl_UiW&;k(LfU|w9!Eq zJ@hd^iIJEbQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^m>qLqPRxb5F%Ra&e3%~#U_mT| zg|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$# zjj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NON zgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4 zi*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfu zhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?b zkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsRe;6f<{Kse* z9b;fjjD@i=4#ve048?dDhT#~2@i74=#6*}FlVDOzh6<{vp+FrCG|@sE9dyw{9|M#a ziODeqro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFH zhRv}Bw!~K08rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq{c!*e#6dV1hu}~g zhQo0Lj>J(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%d zhRbmUuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPx4=M$CknF$-qJY?vK$U{1`1xiJss#eA3_3t&MkgoUvP7R6#%97|wH zEQO`943@=mSRN~2MXZFCu?kkjYFHg>U`?!rwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z zY=y0{4YtL0*d9AzN9=^1u?u#^ZrB}rU{CCYy|EAW#eUcy2jD;)goAMi4#irsL98cg$ zJcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=RxA6|%#d~-kAK*iLgpctFKE-GF9ADr| ze1)&^4Zg*9_#QvtNBo4J@e6*%Z}=U5;7|O8zwr3~(f$=c`Cd5RT7?WU9Ooj@osG&d|4K&e08y$4fLmvZ_7>UU-1*XJQm>Sby zT1i(0EQZCg1eU~7SQ^V< zSuBU;u>w}aN>~}IU{$P!)v*TF#9CMz>tJ21hxM@mHpE8Q7@J^IY=+IT1-8Ui*c#hl zTWp8zu>*F*PS_c{U{~yh-LVJu#9r7N`(R(}hy8H?4#Yt?7>D3c9EQVj1dhZ}I2y;` zSR9AraRN@nNjMp&;8dK3({TpQ#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^( zSv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5f zTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou|KMNzhf(5_{}>IUV+@Rmu`o8q!MGTLp%@Rt zFdQQ=J|@6~mJs)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#| zz>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAq zz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18 zz>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpU zz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f- zz>oL|KjRntir?@%{=lF33xDGu{EPoEN&@m9qhWN6fiW=_#>O}p7eg=<<6#(vV+6*> z1eg#LVPZ`3|Jt<=Fe$2aQP?9n=bUrSL89cGBrB5hFhmK81OWvE9P)sGs3@W+A}9!g zC?GiqNY03gB8UWuBJg()F9LfD9{1Vj?El{9);!a1&s2AH)e7IY)~aR-rerFnW*Vkt zI;Lj^W@IL2W)@~;HfCoI=43ABW*+8cKIUfu7Gxn7W)T);F&5{eEWwg2#nLRpvV4r? zSe_MFk(F4PRalkPSe-RkleJizby%16_&Dpc0UNRr8?y49jL!s2$V5!cBuvU=OwJTc$y7|uG)&8MOwSC=$V|-4EX>Mm%+4Il$z06MJj}~{ z%+CTW$U-d4A}q>cEY3$+f+bmsrCEk$`54QwJS(swE3q=GuqvyuI%}{dYq2)#urBNI zan@%8He@372n=Ig_vPb-uw_e3Nf+Hs9tPzQcDpm+$d?&f^E1&kwnP3;7X0<{~cU zCtSj%{FKYMoS$(8Kj#-*$yHpojI73xtN=In3wsOp9NTug;?h12~X_IGE3K2#0bQ zU*L-z&Ji5RmpF=}IfgGYoMSnT<2iv7If;`Q!6}@|S2&H+IfJiqCST+0e1o(2Cg0+0 zzRfv&hwpMO-{bq7#}7E4A94W~@*{rCMO@5JxP(jjDVK3MKjR91&M&xzl$z9ydJ>1KE+|L6%$U{8LBRtAu{D#N*Ex+UUJi#CM zBTw=tp5kf#%rpFjXL*k2d4a$3A}{eWukb3b@i$)Q@4UgAyv2}Y@;{?6Dx)zvV=yLT zF*f5cF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQGcY4FF*CC;E3+{>b1)}!F*oxt zFY_@!3$P#yu`r9UD2uT;A7u%aWGR+r8J6W^EXVS!z>2KI%B;ewtj6lB!J4ea+N{I6 ztjEV$pAFcMjo6q?*pyH3NjBqCY|a*J$yRL5Hf+mwY|jqt$WH9cP7g{rMaRa3BY9FrViT4&^Yuz!y23BRG;TaTG^$3}0qA$8sFUa{?!F z5+^f)Q#h5ca2ls`24Ce&zQ))2250e2zQx&mn{)UM-{oAs$M-ppA88m{F!uIC1B z%3*wgFLF3Xa3o*iD30bBzRYlrqEI#LK+GtGveFc%8rV25<5fLsH29jKZjl#^{W} zn2g2PjKjE$$M{UZgiOT5Ov0p0#^g-FluX6cOvAKH$Mnp=jLgK$%)+e9#_Y_&oXo}C z%)`9Q$NVh7f-JV$^KEWs1j8CySTd*Ztu{GPUE!(j@JFp`=u`@&2g<%m_~5RKCJ#oX#11l{5JoU*{W~#W(pDXY*~&;X8bnbNL?M=RAJE`TURzxR4+5 zV=m%ie!?YO%1^nB%lR2s@N<5_m0ZQu{E}<9mg~5l8@Q31xS3nHm0xiiw{r)-=1%V7 zZtme;?&E$Q;6WbZVIJX89^*GW&Tsi0zvl`5z#n;%Kk*b#^JkvnFFeb0JkJaKl^1!5 zmwAO(d5ypEI)CR4-sCNYq?G>|g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#F znTn~IhH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#rY^pup~>d zG|R9oA7eR|X9ZSdC01q?R%JC-XARb5E!Jio)@40D&iZV?hHS*fY{I5|f={v;pJH>i zU`w`QYqnuqwqtvCU`KXhXNIy1!`PMG*quGtlfC#fd$SMwvLBz}v+U33IDi8=h=ch& zhj1u|@dduf;T*w{e2Jqtnq&Af!#S4YIGz(Yk&`%?5uC!Qe1+3Eoiq3-|{8h_(;{>~e`$y*FbCI2%DqcR$! zGX`Ta7GpCG<1!xOGXWDa5fd{BlQJ2TGX+yJ6;m?}(=r{?GXpa+6EialvoagAGY4}r z7jrWY^D-avvj7XS5DT*ii?SGt^HG*yNtR-1mSI^w#&Rsr3arRVtjsE`%4)368m!4$ ztj#*C%X)mA_1S<8*@%tVgiZMbpJX#W#pZ0mmTbk=Y{Rx}$M)>Nj_kzF3}qLFu`9c= zJA1Gvd+}-ZW*_!tKR&}}*`LpG00(jq2lIIj;ZP3a3w)8oIf5hk5=U_~$M9u_b1cVk zJST7>Cvh?(IE7RB3a4>8XYf_dgLJ@LkU3dwies_yOnhLoVP# ze#DQth>Q6NmvAXRJP4oi})sw-}OI{$~_MWi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb z3Z`T#re+$ZWjdy324-X?W@Z*MQ$}S9JS9W7}_Fzx;;?wNSKJ3eWe1^}mKcC|O4&)#X z=JOoFp&Z5+_#%gM1V{2Ej^b#J;mZu?SdQa(PT)jN;$%i}3a9cFPUCdW;H#X;*Z4Z$ z;4Hq$w>X<`a}M9(yPV7S_&(?H1J37%T)>6=h#zwi7xNP?;ZlCeWn9kBxPqVa3$ElU zuI86q!?j$;_1wUX+{De?!ma#@+qj)O_%(NO7k6_H_i`Wi^8gR>5D)VRkMbD5;cF(i%r&nS$_ zXpGJnjLBGx%{Yw9c#O{kOvpq`%p^?8WK7N!OvzMC%`{BQbWG0-%*ag4%q+~xY|PFa z%*kBL%{##2C@p0B?12$wMHf9qxPUS0{#_62FS2>ff@pZnzS$vakaW>!P9KOSMIhXJ8ea_Ntl$$ zn4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@Se%ct z1WU3MOS25i@-dcUc~)RWR$^sVVO3URb=F`_)?#heVO`eaIv9L^CO$(J~aqdA5zGn`{Nj^jCj6FG^K8Nn%>%2zmz(>a5$awcEn z>wJT=_$J@tY`)Doe24FHF5lz(oW~D1pC57o7xE*1%tc(xPq>6j`6-uiIX~kHe$Fqr zlB>9yUvdrCavj%m12=LLH**WO@+)rRcJAQU+{sl%p*L?WBi84 z`7OWW_dLNL_#;p9C!XSI{>(G{g=cw==Xrs@@**$sGOzF|ukklt=kL71o4m!4bn-u= zFe;-lI%6;Fe|e$ zJ9986b1^sbFfa2lKMSxR3$ZYZuqcbMI3HyRmSicGW*L^{V=Tw=tiXz_#LBF~s;tK9 ztihVB#oDaHx~#{?S)UEqkd4@wP1uxA@JTk~Q*6!_Y{^z^%{FYyc5KfM?8r{+%use= z7`w6?yR!#-vKOCbZ}wqd_Tw{rmi_r02XG(St^uW%Zta|U1KOuoj~`37h4O}@q1e4BIl4&UWmzQ^}Dj~{S8 zKjZ=~P5&+;74 z^8$b6MPA}%Ug1?<<8Qpq-+6;Kd5al ze2uU34bI}5e2cUBHs|mizRS6MkMDCHKj3_R$OT-;kN7bcaWOyP5-#PZT*l@6j4Sv# zzu-!);%a`$HC)SeT+a>M$W7eLE!@hlxQ*MngI{wecX2oOa4+|9KM(LA5AiUM@FLhq%*?{9 z%*O1@!JN#++|0wg%*XsJz=ABq!Ysm~EXLw|lqFb_rC6F}SeB2m9Luu;E3y(RvkI%S z8mqGgYqAz=vkvRB9v^3YHef?GVq-R8Q$E2b*^E!IIa{zLTd_6Uur1rMJv*=?JFzoE z*@a>3%5Ln=9_-0pe44%4hke?WG&Wa9oA(%KF<1Vz=mwZ#%#i- ze1cE18J}WvwqQ%PVr#ZxTef3+c3?+#VrPc33&Yrz-PoNy*pt2ZG<&lT`?4RO;j`?| z=Qw}^If#S#Jcn>7hw%lz$l)Bpk$j1xIGSVlGQ&BR<2arZIFXY$nGu}AseFagIGr>2 zDrfRFzRou|i*NER&gR>k!*}>D=kh(i&w2cS^Z6kca3Me9$6Un4{De!ml%H}Lm-92O z;OG2;E4hlR`6btIE!S~9H*h02aWl7YE5G74Zs!hu&7IuE-Q2^y+{gVqz=J%*!#u*H zJjQQ$oZs?0e$Ny9fj{yjf8r^g=FdFCUwD@1c%B#dD=+dAFY^ko@*02Rb^gv9yvbV( z$t3?X3ZpU_qca9$G8SVq4&yQ&<1+yhG7%Fq36nAzlQRWVG8I!Z4bw6m(=!7zG7~d1 z3$rpCvoi;CG8c0*5A!k~^Roa8vJeZi2#c~9i}O*IU`du@X_jGGKE`q^&kC%_O03K( ztjcPv&Kj)ATCB}Ftjl_Qob}m&4cUl|*@R8`1fOIxKE>v2!Io^r)@;MJY{&NOz>e(1 z&J1N2hOsNVu{(RPCwuW}_GTaUWj{W{XW5_6aR3K$5C`*l4&hJ^;|qL|!#RQ@`4UHQ zG{^8|hI1^(aXcq*A}4V&BRGXq`3k3TI%n`z&g5%+oo{d!-{f1I&9^y+@9-kJGqOyxrckXkNbIm2YHBxd4xxKjNkA$zvXxQo+tPNf8IV5s>gi#ok(HNaE7?ZIWn{gPI@fe>8 zn2?E>m`RwF$(Woen3AcOnrWDp>6o4wn30*7nOT^X*_fRER$*0EV|CVGP1a&<)?r=N13bt> zJj^3J%47V7$N4S4a4+4G3a(>1Y{G4BKC0B7ZzvLRO=C5VN^zAbjDyz#$s&7VO+*z zd?sK*CSqbHVNxbza;9KPrebQQVOpkRdS+loW@2V$VOC~icIIGC=3;K{)#nep0v`okJ z%)pGy#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZe3T_vlBHOhWmuMvu^h{@ z0xPl-E3*o#vKp(i25YhwYqJjPvK}92eKuf2HezEoVN*WAC)tcou{m3?C0nsI+psO$ zu{}GmBRjD(L)nF4?8A`4(sMZO-94 ze3x_i9^dCYe!%(skPEnwAMs-@;$nWnC0xo+xs1#C8CURge!-Po#nt?hYq*x{xSkuh zk(;=gTey{9aT~XD2fyY{?&5Cl;a=|JejeaK9^zph;ZYvrH$2X7`5nLK3I4zzd6GZz z6i@SKp5ZS%%X2)>3;dNAd5M>Kg;#lvzwtVM=MCQEErw*5{~3i*8I92ygE1M4u^ESP z8ISRqfC-t1iJ62+nT*Mqf+?AbshNgpnU3k1ff<>JnVE%InT^?*gE^UtxtWJ~nUDEd zfCX8Ig;|6}S&YT`C`+&;OR+S|uq+>AIhJPyR%9hsW))UtHCAU0)?_W#W*ydLJwDF* zY`}(W#KvsGrhI}=vKgOZbGBehwqk3xVOzFidv;()c4B9SvJ1o5mEG8#J=l}I_%wU7 z5BstopW(CY&*wOR138F;`8R?o_$p`eHNMU_IE!!cEzaiKoWpncF6Z(+zR!95fb;nw7jPjz;>TRX#r%Xz zxRjrA8JF`juHfhVf-AX-tNA6@a4pwyJvVS8H*qt!a4Wy!Hg4w*e$AcS#ogS)z1+wB zJivoI#KSzoqddlMc%0wzJATg-{DD96B!A*5p61Uy!(Vuo=XjnM_$x2+5-;-#ukspy z<8}Vd8@$O|49Ow?GYX?J8ly7?V=@+FGY;c29^*3s6EYDKGYOM28Iv;wQ!*7(GY!)+ z9n&)dGcpr1GYhja8?!S9b21lmGY|7JAM>*S3$hRkvj~f_7>n~!mS9PiVriCPSw6;c zEYAw8$V#ltDy+(Ctj-#&$y%(!fDPG*joE}v`2?S2Gd{)UY{8an#nx=Y zwrt1t?7)uf#Lf(57lyGbyRkcauqS)*Y4&Cx_GLdl!)Mu_&v5_;au5gec@E)F4&w`a zk;6HHBl!|XaWu#9WrlMs$8kI-a3UvhG9x&JQ~3&~aXM%4RnFvVe4TG_7T@GsoXxj6 zhwt!R&gFZ2pY!+u=kr4@;6i@HkGY78`3aYBDL>^hF6U=l!O!^xS8^3s^GmMbTCU@I zZs104;%08)R({28+|C{Rnmf6RySayZxsUsKfCqVqhk1lYd5quiIKSn0{GKQH1ApX6 z{=`!}&7XOOzwj*2@jNf^S6<{LUgi~E-?QJc$2pnl2iU?6h>tV-uO><=6vbitp$J#O&rEhBP;KhicL{E>En-}Zsu4uRi}f!|Jn-_8*o0|&i(cKyPm zck11?d&l5;g~#vEw{`a}{X2Fj(z@?}J`v%Gd$sM+p>3Z6?R)m_Sm5@{!eduy+qZ4C z$hS<6xP3jL6~kk92>kcmR!6<<_aML`@p<7yj{~;ZZABZ5&ZIQG(#d2A}?G9~*LG)tnoFkNpdMZ07K2t(!Ei z9IUmw*BDv24{d{w+^xN^oqAJ3QTaJXhYc3b&2LL`=q1Ovg;j!fedJT+G9K6@m?`Z6Go{wEf)M`yX=k zZSSepJCzr5PaTyD)=}F)9Tf;Zxm`!07ym8w6Xk9T&vo|<|DkHG1yAaF%fQ2p(3>4Y zZ`BJ62?XZg-w(a|`;MJ3@Q$!Vfrs124!sc>-yVK|`-(4J;Jx9|`-b-J*fAn3{oMoI zyd5aRG6((@Jf-Gg*#pn+Pes8Xc{>$F4+(kXkw+qBhjc}Qw_G8 zV2HXm5{(P)2nL*{cN=qf^vE>*?+H24{1tS>lk8(nZD{3p#pDmIBEG?r*$wP|%z|)ZUUj@xu=F$8aTDr=C3v zM}!9{a+{*JDR!IUk!AUxYJm?A9rxzn<$;FwuVwDPoqyLpsMT-2cSqFP(f-K4JMP_a zZ(`mX+!5?2i-J!i=6H8Tw2=ea|Gf5<4=Kw*I$`;DWjHPDr>rLWm5fVZomDH^*{;1pTMUFSN@&neTSk zdr%VnKP#w42@VnNmf(S4ksS;^911=h4n7T5p|8e>eXkdSfPw$2!yxL&>B z$UZD_AQx1x;3dHk)PJh6{!c~S`^JjFu=`KV>My%Vmj`u|p7-v!H}>|scSn@kF=|IK zjT#u-5$ssaf=>*%%l!W$-2QX-iELoO?oU& zR^)bP|3Cg69GU$ul==BPWq$l_nTN-3-?ne-z!F*OPThJ222B6S;*a{!NmtZBM7>@9 zsTAiQgXKHr87#+p)+obcv~J(FeQ0o9Fg#k14qduO)PAJT0~RiW)6Uzo#i&t2BBy}= zV&!4Y9u0tDMs~&t&4u)G5U1u z-KArn$ffTn9r{Lumb8}zO@ela8}03&i)K!{5Hw1G{1x{m`6G@!aBcg~FEaWcGoy)jE^^GBiww>N z+J)r_jFujDX%D-p@V5uE4=RqoF;gGL7atidP2Bz!w2F8GU> z_~0P?pm2X@%74$iG;Bzq5B~F1^uKpSHSSzd#rv)(JbGZItM7n+J%`=7X&M?^jl~O1 z7+8l6+!%-meGI`q!z z54!L4BPVwcxq>qfxq=7XB#{;Vm#1cPB;G3?>@u{q>$S;J_6J5|JtxL zB&5O0)#C#_He^=)rbmOGJ$@?RzJTTW3>g@7Y_w#X_eBn0LkrgXWBvK=(?9K6!83Wl zg20^t(1XS?fx?U2VluGw7rDjeod$fTB>qn>x<74EasC+bAHUP0w%*sG`h~~p)2sEJ zul0ofd)DC|a+ff2S^M58-rZ(9S2CFwNG5?)BAEn60{3Q;J2my7HXGV0JWAb?5y3BR z1ancY+R;M8>eZ`P>sDZke5f4~7++V8(xXpcks|ohH)3S46-2J^ZO^c&;-$cSybKFl zP6*n);)SC@SL_^_|8l^%9nua8dNt*N&Q}6ff3N$Jpi@gniGDTUi7itb1ufPx?^{=I z-v_+gw(n{2kqgYtLtEW{BR%k)u3!o&scqsxD~*XN+3moEq@G8Oz20r*<=NUMi$fOjE1Ap^8W85z7Lcv4a-Tu0? z?=9sAo?r0bfpUyoKnqURg6DT{nf^r?#{MJX)d;>jxcwUfC|W`p5F#z3YBu@7;0lO_DnI?zr~`b&KGR;DTV(;FH@6f_?fG G`2PT0b>sN} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle b/pandas/tests/io/data/legacy_pickle/1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f8df9afff65658d0f58a5b2afb30997cea53e4c8 GIT binary patch literal 127216 zcmdqq1z6n7qp0y@0u-oHcNc2Z-AkQ9-G!oEwpfw6JMGq~ySux)ySux)+nwybTd4Q@ z&i|Zy&z(NqAK7F>GM>qV?O`+On2%x(U89{%cHw?u0e<1p`p>rhfgxTI(Wb;V{aS~9N`xh>=hX9666)^)!HvCT<)!2 zKyd3ozlwn&K5g7B??#(<;@l$Il=9n8`}%e83wQDM3ipdPIeG^BwfC$ZEZ>vcn^OI3 z%P_Ct*7~M(b*-D~r(zTA733G~VX_U6@Cu8Fc2i6?;SnLB(QdZ-4}S6oHL_oH-Dod) z49S8cgF-u6&Ne7AFe1P!EX=E8v?-ovn4eEbaCk&mq)&u?2)5=8qfK#S?3gzhDR1YoRj~ZWOFu%Ad5z*dJ zuFjU^u)aPm$4})g+T+{q(Q-K_GD4#b2~2k0di;k+w=lnL*Ul?2QeTnXWG_?EQ?8RKW$zssAWuJ7{%9K>7~rFy zft%uH>!#`_5a|^eE`Ptt##5eLv_UmFcn0}Jc=>uoc*(sQl6#qKy&@uHig%R@9+BQA zP3DGZgPkS27#s}oV@GLDlL&|vk%qj#jYgIDv-17&Uqz## zs2;_|4eo{#<~TEyHqilM6Y#BGBDI{5ki=T5y=tW!6sUB1gh z<@7w%BHFpC^>p3TuZ7YxFu*(9B|;{qPC$^KEDk}TGJPHDTNh*+542a5A%_o+@8jg* zp~t#O^A8D$u1dC3{Qf4_S)(g3Cc47^GP*>B%!of^6H~gM6?mE3{LJSe>lJhTP>DCJ zh2JpJ6#x78%oTz;pPJIf-l@3|hr7hu=hu%`pnOx;*?O8k{$u8JlsQ#qPWPA;viTu% z%3tp)WTBGPgZvi}-A~VdQLeQWW%u3lcBc|6O0rHN4Rn3u+RW5lF>&m3UEz|q!f9!p zE!&3bPOaVWvhI^>_w$^QmJRRTS$D8a(*0)=>$^4Cdisa?wT(8V;Y7d50U`eW;eIl< zOKay0)CS34NgCA>dY;ZZ0g+g7Rs92^_*=N=-19KP_NmY zep&8pr86o_d|4IO^g z2FM(ys;YVKKii2>ktY@9YAaENzLlTJAv`23A|%Y$FHG+)=-md(F+4E5^80np*AFPq zU$@+e%Im+&(sEzYW9*7Kv8=Io$B^9c;JX-oCf|Eye$UVF_E&N8^>qH>NgEwyGrit@j! z{9h^BSV|V(%3gZHns}YsuP$ zH>~`&sxdbLvOUy0ud=z(x0k$JkguJh4LxEf zrzwuP_{knqlK7$%S3 zl+fCTtKamytY;G$93Zbz^$wL`mB}WwqdWnV@C=pR8E2DYgL?TYd&vvDu;Az#Q`dj_ zA}{>3n(p;wbjk}D@1N(FSmzcKr>nn-(+z(gr*@&TsSWw(y?;~cpY_<-nJKlWXK2R` zo{_PlgzNh6aLNE!|EZFCT-SykN9m zF&jV0hz^hJYi%t5BJ*GWCZQhxbwWkT;f^qUkU)?Pu<3$g73e;n!mXmaTrZK2dqg z`dDKL|AmA0iYYHCWuHH$!ONyESsFa?UHxiu&@ZJdF>I(T&sknJ%d3v+#>Db7bn_jI z$t??VxZuASt+js>tv|bTu|~4YN8beNvOF$x_|@{)iu*qkt>yKIv<^TR-SwrPN2{@< zzOJ&dl#V4z4gWGqZGOzDF`b@o^6an2p=7N8q#G~C$1J(|AB}kZ*b}hqN3W!;X=F`~ zUoD#}sUNNgEc>yv%MYf$8<49ov8L zF}+h9`(u7FHG@A{*jcY=OB>t83>-q{#0>08Dd@1{avZ=CS%LkUA|aKlWd6s!sOU0mw><9$Jg$( z;ep9E!Vu=?7cH+-&ACG#%9q2{zFbfL$F3TMBFvp_zi+P^jhAHX8}G^&%VqL^ro>yW zl>bSImj_@D-;XrtCG~&4l*P*T`}N!@t8ViBXMNF9cYmAjZRC-gSH$G|U*}?DF+J=2 zyQcDM-ihgf{BQ~To56V*nLnnBW~tE!xLSLOq{g56A4$twHcW|WF(YQite730F&E~E z8Cm(OV)L6*kJXvUsVB+TR=0#Tft0V+jOA9y&#D=derfC}hsbjyWDzi$^q0Z<%Mkr#sCZ}n=f{y+U~{=fZWHn-&eht?I} z7mVlD72kKQ-&$7)H#;|B?xp{x#Ke>1h?YVV(JB1j7MhsT{=3@syHfI3W0Kaf$lq(e z$Xo0o2eo#~xVN;Z#(jE~zhAx>RvC{;^wGq>8VKDw$TQU&Es636|M*J}$zSSh(p)Ro zk9JR#Kp)uDf3yy4{?F(ASoyH1yull*t6NE5{8?Am!@43SAO0p+#kc0Ff^v}hUuUbB zHt=heH0f&ahHTg_y`{B?#_K5d=#NoMFj_{R)=KxSw;o2Ws3S2;4A(=tB) zKc9{NvgZHI07tiPbHB-&$zwVjmRfkUbwy0>r+Z>u@i*fba@^W{Ti-lxU0vqfU)_S& zr>6XDgi|)6<{)c$bCAE>9RA7V4C|d(lZ_nAl|vB!&rZ}ZK9Z+r9#=75*6VJkueVvg znIQ4qy^?S4R2%Qf4;mlJmp|S*u|~NKacqlr7zgdq0Ua?e#>4n%E=ntnZ}c-*VzPG+ z363y)y`iGt5!Y|0Si9zm{E$(|)HL5mkttEQ$;lv*F>|i8ALd-i>(E%oE#u7` zh)(4{lJy<*d1$_VfpYqz%vRsrfj1XWIq&A{7V>U-0)57vdGl{_*SFK$xOMmQ&sp!_ zaf$zvTX)1vP)yAHli6rQQtjp`XXp_&b#4ACdPF@I z&JoA;30wL5&5w*L?Qwm3%J|ZwWiM<`+PnI0(flX-J%oEt04 z-)4&c{oAr0HrWLF`O91V|CcLN+pvJvhQB(D|5T=Slxe1i*oD$szek$lgv^DwsgqraMLW&d@Q zOP;l=3ZZG{`1)ZuEcS+Sl9~Qb-|!EE=izc1TBv8tRD6T>7jrTaek~SqW|=8N>>JqB zBjKSk<@{XCOL7iSJ@ewX)rcv+r)4Woa}kO*CHZC=f@PhYIB34*Z7oG^34b~cXr7{1 zIMx)spH9=(>kcKJHN*W7fiiiF3uPgVa<$i|hw4?f9)ae0$qB5-YMs9R!>1iAEmk%8 zsaWBkjz-6fGMf{5nunc=S|#$#Qv>@VlGqv1g*2;&_vxWKBEdC;91bI&BGMe9HTya&-Sl`h68% z$@`$`m$n$D8&u)>W!;U(`{jKtjq97Hi|*SL=UTs!R-G5LT6c6wVf$8k>%i!Gx`kWh zp01n8>E7UX($MK$FY0b7lsNZ$X@Xs|JLa`A_(_rE9ojaT)AQKO8vq(4d21hQXZNz|A! zb4qR+Q)V9MOMg*j9K0jKzm}RvlfA5!t+Rv-+P@qPj%m5R9f2@UBK>9<;%`z!@8ii5 zDf{IA*%E24=@$PfrVCkPTE9^Lq5RB{v23!JovW`6h2xJoKF-$)KCRxp)Vr4QV~XCj z)GK_wdC$WZ6idqEbo>2T>BpmY&h)11 zfB9L-+KGxUtM!cj3u&g;b#g?a+JCC&`m;1_Y_99v%%fR;E;wVt+ZB_Wcc!SYycOe))g@~JAOU-9B%ZL*}!~VD6c7t|5t9V>s6IyZkXyNA87EF|K+1Ns;~Sn zyDqA~{NI{u7|Zhwf4JkWSD<Gs}epeqV zZyZ&ze1@1D{GTeS){9Jipg<3L7fKKMfPo(5fPpzA#6&1Ng61F}ax(`x>TC{4F&QSu z6lgBT^5;}6r^Ymx7SmyRw4QoKmNQ{y%z{}l8)nBG=!`ir7v{!1m=|3zA6hGt0xTEA zLRc7!U{NfF)^@Q3%O$ZCmc}wz7QcJ+SgFAK710$dVP&j>*4wkz3$W_EUjyB+Cc2{s z)7wchtY=8~15jMsq*c6*#b8LYv(Gy#t7kZ-)`l28DV{0^E02;9k24WBf zV+e*~TMWZ+jKD~2hwZTgcEnED8M|Ot?1tU32lm8X*c*d2I0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ z1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T z1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN z1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ew+7clZ?Rkge!=k}R{hBj!6b{Gfk(E;@b zPW7Naa;k@T7#|a0LbN`7?8I_nOoB-<879XRm=aTAYD|M^F&(DI444r!VP?#N=F1^@ z9@$vVjycd7b7C&cjd?IHx?n!cj|H$G7Q(_<1dF0RJy;LLu>_Vx{kc>!}C*0aZP;Mg5^j zJ%pn^yH^j9*bduc2keNQurqeSuBhKr(S!a_f*yKcPwa)gu@Cmee%K!eU=$9-L1@Ck zI0T2{FdU8}a3n_KC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksS zT#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6J zJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHb ze2h=_xJ%n;wSu!U(h^wDeD1Mn5zRtpnUS)9RAZYToSO~ zgqR4OP=De@4@od7CPV#sQ9YzU{c%)1q{7sg2Ge3XOph5bBWA+Pm<6+<{-CQKvSSW( z#+;Z7b7LONi!PWC^J4)lh=s5)7Qv!e42xq4)SnL3gZ_A^9!g^wEQ{r^JXXMp=!%uF zGFHK=SPiRV4Rphrs6WxF2M?@;wXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z^u$)^h2H3c zzUYVgLymfAjRp)rBeuan48mXx!BA|AVHl1P7>W8*m3nB89k3&I!p^8aY^jH?*bTd5 z5A2D(us8O>zSs}@;{c4pfj9_FI2ecEP#lKCaRiRUXdH#3aSV>daX20);6$8+lW_`8 z#c4PlXW&eng|l%E&c%5+9~a<4T!f2p2`Lkg}ZSN?!|q$9}nO`JcNhw2p+{_cpOjQNj!z8@eH2Db9f#v;6=QIm+=Z- z#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV_#9v0OMHc|@eRJkclaJZ;79y~pYaP? zr%0-rrJfL|q5jaS9&FJLzU{Xwm$uR|5|L|BUmQ!OI zG~arb$DfYn^q2uNVkXRtSuiVR!|a#?ol$?1R1di@H|D{-=z{q$KNi4(SO^Pa5iE+u zusG_^uIiyAmcr6l2FqeOERPkiBD!KFtc>QFqVo4tWw{zw#~SE{HPIbCuol+FI#?I= zhd}jE9~)ppY=n)m2{y%M*c@A6OZ3E6s6S?^2XFL2U-UzNY>fsCKqI!nKn%iQ48c%r zi(wd!5vV_hsfTvh9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!eU=$9-L1@CkI0T2{ zFdU8}a3n_KC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_ zDK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1 zC?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd##IuY)HXet$dPa@$@1 zgFmtxz(KvchR~4aXa@v%woLm$~PM2~}9F@58hI)K?6Fs4lNO4jUD@l~3N-`z6 zl0r!-Z-J*)(kN+_bV_<9gOX9nq-0jIC|Q+kN_HiO;;iIUaw)l$JW5{0Maie+R|+Tv zl|o8krHE2gDW()xN+>0jQc7v1j8ax9r<7MJC>0e~rIJ!vsUn}ot)^60YRKu9H5GTo zL#d_IR_Z8qm3m5jrGe5=X{0n(nkY?`W=eCVh0;>-R9Y!sinrpU_$q#iztUPUC;^I5 zX`=)xK}xU^qJ%1Kl`th-iBKYyc1nAtgVIsyq;yugC|#9qN_VA)(o^ZB^j7*PeU*Mn ze`SCYr3_RCDJEsGGDI1w3{!?HBb1R!v@%K=t&CB|D&v&#$^>PiGD(@NOi`vP)0FAT z3}vP=OPQ_AQRXW1l=;d6WudZ2S*$EkmMY7X<;n_WrLsy{t*lYjD(jT>$_8blvPs#j zY*Dr<+m!9f4rQmZOWCdLQT8hPl>N#9<)Cs%IjkH}jw;8L5p9<)QLOd8|B9o+{6j=gJG^rSeL7 zt-Mj*D({r{$_M46@=5uu$lEjW&zlNWQ*BgR)lQA0+N%z#qZ(I@r^Z(ks0r0Xs*{>n zO`;}Mlc~wo6lzK}m6}>jqo!5Usp-`WYDP7anpw@FW>vGP+0`7Xvzk-QrRG-isCiWv zHJ_SaEua=u3#o6h)b?rzwWHce?W}fDyQHsxL9jFdcP3mBEh&ogqrVdv}s3X;Ab(A_<9ixs_$EoAh3F<_3k~&$P zqE1z(sngXN>P&T(I$NEi&Q<5B^VJ3FLUob4SY4tnRhOyD)fMVWb(OkWU8Am5*Qx8( z4eCa9le$^mqHa~UsoT{Z>P~f+x?A0&?p61x`_%*LLG_S&SUsX1RgbC1)f4JT^^|&A zJ)@pg&#C9t3+hGnl6qOaqFz<6sn^vT>P_{QdRx7t-c|3Z_tgjLL-mpRSbd^CRiCNP z)fehZ^_BWseWSir->L7_59&wtllobe!*fDDd>}MoBW#78h$HNUgK!jaMLZE-BoGNj zBH<(wizFhcNG6ht6e6WaB~pttBCSX#(u)itqsSyOi!36m$R@Ij9Ku=T6uCrhkw@ee zE+U`EFA9i)qL3&oiio13m?$nvh?1g|C@so}vZ9a%Ra6tzMGfI5 zY6^GZA!>=*qK>F5>WTWIfoLciiN>OdXeyeC=AwmYDLh3h;U&CVPd!#Ax4U5F-nXUW5if7PK*~5#6&SkOcqnbR549V7c<06F-y!A zbHrRRPs|q!#6q!1EEY?|Qn5@d7c0a{u}Z8KYs6ZyPOKLj#7416Y!+L@R5#6fXL92Q5!QE^Nh7bnC?aY~#PXT(`?PMjAP#6@vQTozZvRdG#R z7dOOBaZB75cf?(BPuv#|#6$5&JQh#HQ}IkZ7caz1@k+cFZ^T>iPP`W%#7FT-d=~QZ zOj9-a@QG%l*=lxL9L-*H&>Xe6T0AYjmOx9WCDNR<#99(9sg_JjuBFgYYN@o;S{f~_ zmQG8rWzaHenY7GW7A>omP0Oz3(44iLS}rZOmPgC0xoG*c{8|C6pjJpLtQFCUYQ?nT zS_!SBR!S?amC?#-<+Soz1+Ajys#Ve|YgM$WS~ab@Rzq{sYHIG9hgM6gt<}-$YW1}G zS_7@2)<|otHPM=C&9vrP3$3N*skPF)G;hsE^VR$`f33A<&;m50)n<2d$&lN$ae2(Yk8gwC-9Dt*6#Y>#gS7{Mrb3oXl;}>S{tK{)y8S#wF%lpZIU)wo1#tCrfJi)8QM&3mNr|Pqs`Uk zY4f!O+CpuSwpd%DE!CE3%e58SN^O<4T3e&7)z)e2wGG-vZIiZH+oEmNwrSh79okN9 zm$qBmqwUr9Y5TPU+ClA*c33;29o3F$$F&pMN$r$&T05hi)y`?>wF}xs?UHs`yP{py zu4&h`8`@3nmUdgaV}6Q=se6SA{_ClGiE>z`?)4~c9luv(Kfw~dyP`KwxYpO$vfd8O zoeTN1JWJh5EG#jCdMR~6q8|cOo1se6{f~Cm=@Dvddz?sF%xFSESMFu zVRp=c&X^N(VQ$QWdC>*)VSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi5K6)Rz7 ztb$ds8dk>|=!P}X9X+rX*2X$m7wchtY=8~15jMsq*c6*#b8LYv(Gy#t7kZ-)`l28D zV{0^E02;9k24WBfV+e*~TMWZ+jKD~2hwZTgcEnED8M|Ot?1tU32lm8X*c*d2I0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n z=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB z_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU z@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewc4Zo^d-Vmsv4cej| z#zA{@Ku3&=@i0Cnz=W6xoiH&b!K9cBlVb`@iK#F(roptB4%1@>%!rvVGiJf8m<_XI z4s^zxm0#-yj ztPAsC8nF$}{o0wb{9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=j zi}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}?{-2h2f5fg0MNE!trm zv_}VY#JCs_<6{C$h>6e%6JrugipelJrofb#3R7bmOpEC-J!Zg+m;O(V-YNh#jrS*z>-)BOJf-7)R z4Xa}fbiLgWIkv!-=!vb+3%$_?ebEp7u{9bn z0FBrN12G7LF$6=gErww@Mqnhi!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~^z? zAPzzk4#puk6o=t(9DyS-8b{%19D`$V9FE5cI1wk|WSoLiaT-p?88{PX;cT3Pb8#Nd z#|5|$7vW-Df=h83F2@zP5?A4BT!U+I9j?a>xDhwuX54~XaT{*O9k>&B;cnc6dvPD` z#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H z#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(V4a)71%M}ZpJpe@>A9JEIV zbi}wA594D3Oo)ll2@_)yOp3`cIi|prm{ z5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7U)*Jk)jinzTP(vHEMLUdx_UM3) z7#HJVd`y4|F%ddpVoZWbF&QSu6qpiIVQNf+X)zt9#|)SeGht@Tf>|*eX2%@pj5#qE z=Egjj7hNzP=Enk95DQ^pEQ0!%C+MLV7RM4;5=&ueEQ4jS9G1rlSP@-O|8@pFRK_Y; z6{}%&tbuM=6Wvk&QU^WM!rE8|>ta2uj}5RPHp0f(1e>D%9R+%5jxDeydSWZ|LT~gz zU-UzNY>fsCKqI!nKn%iQ48c%ri(wd!5g3W>uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$ zi~X=a4!|fJh=b6CgK-EB#bG!cN8m_|#!)yL$KY5ThvRVqPQ*z#8K>Y>oQBhJ2F}D; zI2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjO zxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8NZ-)VxMYjsW0`f zg3yD8HfW1>7zgdq0Ua?e#>4oS025*&bi%}#1e0PiOpYlqC8omEm$c`z@!U_Q)`1yKL413eVN!dL{0Vlga^C9oux!qTXJZ-O4m zVmU026|f?@VkNAMRj?{n!|GTA-LNLQqX*W)+E@qcVm+*n4X`0L!p7JHn_@F;jxDey zdSWZ|LT~gzU-UzNY>fsCKqI!nKn%iQ48c%ri(wd!5g3W>uswFbj@Su1V;Ag--LO0M zz@FF(dt)E$i~X=a4!|fJh=b6CgK-EB#bG!cN8m_|#!)yL$KY5ThvRVqPQ*z#8K>Y> zoQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8 z+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8NZ+I13F?{jEC`2|Kt05NQjBh2@_)yOp3`cIi|prm3lsD#v)i0^}lqj zhvHZQ^}jf;hf-J?%V1e7hvl&XRzz2f*q9c!Te2exDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf z9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@o zKF1gM5?|qKe1mWC9lpm8_z^$hXZ(WZZ&8u;kQ&Ea9Vr4ev_V_6!#HUE+AaA}2bLW% zF2=+7m;e)EB6PyUm;{qzGE9ysFeRqK)R+d-VmeHZ889Pe!pxWjvtl;Pjycd7b7C&c zjd?IHx?n!cj|H$G7Q(_<1dC!ZERH3xB$mR`SO&{tIV_JAup-KDi!g^uSQ)EeRjh{9 zu?D(fO>{>OtcA6)4%WqbSRWf;Lu`bNu?aTCX4o8CU`zDGR_KM^=!3rKhyK_a4H$q% zY=eOqguxhsq1YC~FdQQ=65C;W?0_Ay6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNeQ8*9> zp$P}$5FCoba5#>@kr<7ma5Rp=u{aLL;{=?DlW;Ol!KpY6r{fHqiL-Dv&cV4j59i|o zT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3!L7Irx8n}niMwz&?!mpd5BK8% zJcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P!K-);uj388iMQ}J-od+g5AWjx ze29B2 z+?WURq6?b86j?@jewGVhK`exYu?QB$VptqYU`Z^6rLhc_#d264D_})*#Y$Kit6){E zhSjkKx?xRpM-QxpwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z^u$)^h2H3czUYVk*cuHO zfJSVCff$6r7=oeL7Q-+cBQO%%VSDU=9kCO3#xB?uyJ2_ifjzMo_QpQg7yDs<9Dq?c z5C@?N2jdVNioNOhq7M-6Sz7VR(& z+M@$HVqA=e@i74=#6;+Xi7^Q##blTqQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^m>qMV zGv>rxm>ct8UUb2Hm>&yZK`exYu?QB$VptqYU`Z^6rLhc_#d264D_})*#Y$Kit6){E zhSjkKx?xRpM-QxpwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z^u$)^h2H3czUYVk*cuHO zfJSVCff$6r7=oeL7Q-+cBQO%%VSDU=9kCO3#xB?uyJ2_ifjzMo_QpQg7yDs<9Dq?c z5C@?N2jdVNioA9JEIV zbi}wA594D3Oo)ll2@_)yOp3`cIi|prm{ z5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7U^2-1$?LP|C&<1VM4&$IbI-n!Q z#dsJW6JSD2gie?klVDOzhRHDnro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$X$hPRxb5 zF%RZN7tDwGu>cmtLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XRzz2f*q9c!Q) z)tJ21hxM@mHpE8Q7@J^IY=+IT1-3*_Y=vIvjXvm$e&~;_(SQMH#5Nd+ zK^Tl77>aE%48t)3Be5N}#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!q7=;6I5Snl> z4#A-~42Rcz=gO7 z7vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q z591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!w< zALA2ziqG&lzQC9G3SZ+Je2ee!J$}HC_z6Gb7nENGWNH6VpoTVRi*^_X?a=`pF)qf# z_?Q3_Vj^_H#Fzw=VlqsQDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JzG8FOMT%#C?4 zFS=kp%#Q`IAQr;HSOkk=F)WTHuq2kk(pUz|VmU026|f?@VkNAMRj?{n!|GTA-LNLQ zqX*W)+E@qcVm+*n4X`0L!p7JHn_@F;jxDeydSWZ|LT~gzU-UzNY>fsCKqI!nKn%iQ z48c%ri(wd!5g3W>uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$i~X=a4!|fJh=b6CgK-EB z#bG!cN8m_|#!)yL$KY5ThvRVqPQ*z#8K>Y>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3 z;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@ z;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8NZ&K6 zm=F`86DGzam=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fzFr{b75}GgL%;f z^I?80fCaG-7RDl26pLYTEP*Al6qd#^SQg7+d8~jH(G@FUWvqf#u^Lv#8t8^K(H%Xo z7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEzuKOp%;3i5Bj1X`eSP}U;rAi4F+Nm24e_@ zVp|NuaE!o6Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq{c!+B;XoXOCLD}Ia3~JL z;Wz?EVlZzFARfZQcm$8) zF+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKg zGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~x<<|gP+J6+Np$*!i9mYX>bU;Upi}5f%CcuQ4 z2%Ru7Cc&hb43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-9r2oR|x9V;;17pF*d=b*bJLv3v7v=*b2SS8-36h{m>s88#yz+f_u+m#fCupq9>ybh6p!I? zJb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9 ze1R|V6~4wd_!i&cd;EYO@e_W=FQ_D>{YQZs+Mq4kVH~ta2Xw@^7!TuP0!)aB&Js)Gh-IairFwb=0Io6iMcR0=E1z^g848%7Qlj7 z2n%BoEQ-ajIF`VYSPDyH87zzCusl}4is*`!urgM`s#p!HV-0k}n&^%mSPN@o9juG> zus$}xhS&%jV-swO&9FJPz?SHVtUuCPRAKI6KCOUoP%?59?r)FxDXfNVqAhtaTzYh6}S>t z;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X_z)lAV|;>7@fkkH7x)ri z;cI+@Z}A%!rvVGiJf8m<_XI4s^zxm)<8F`iSFouwXinU!Ma!v>th3K zh>fr@Ho>OY44Y#MY>A%O3cb)9eb5*E&>vf)0Rzy8Z7>jnFc?EH6x(7LhGPUqVmoY) z9k3&I!p_(QyJ9!&jya)K z7RTXuoPZN?5>Cb`I2EVibew@RaTdx4=M$CknF$-qJY?vK$pfl#gT$mg4U|w{=e3%~#U_mT|g|P@0#bQ_- zOJGSXg{83!mc?>d9xGr)bj3gMk=?!5D&}*cQVu93wCi+hKd`fE}?D zcE&E)6}w?~?14S87xu%QFG>*ZsI1b0- z1e}PIa57H8sW=U%;|!dMvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj z2Hc37a5HYft+)-h;||=3yKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w z1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW z2mFYi@H2ivB{A(k3e?aBZP582_QcQ-)F$Jc?RG1pm zU|LLv=`jOl#7vkOvtU-thS@O(I%7`Eg}E^g=0z9GhxxGp7Q{kW7>i(0EQZCg1eU~7 zSQ^Vw{^SFD7Uu?kkjYFHg>pc~dicl5woSR3nLU95-ou>m&3M%WmeU{h>{ z&9McxL{DsmUg(WJ=!<^nkFC*w0cgZF7>Gd_j3F3`Z7~ePF#;p89k#~~*bzHnXY7Jq zu^V>B9@rCmVQ=h%eX$?*#{n3H191?Va4-(Rp*ReO;|Lsy(Krf6;}{%^<8VAqz==2s zC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;G zH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZ zFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL| zKjRlvlFd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{ zn_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFH zhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}B zw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd3 z8q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@ zg`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7} zOo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ z+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<) z17^fbm>IKRR?LRkF$d z0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4 zhwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fb zm>IKRR?LRkF$d0#?LI zSQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTg zcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKR zR?LRkF$d0#?LISQ)Ee zRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED z8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRk zF$d0#?LISQ)EeRjh{9 zu?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot z?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(} zT38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU3 z2lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$F zU|p<-^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X z*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<- z^|1jq#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq z#75W{n_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cd38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{ zn_yFHhRv}Bw!{!@g`wCQ+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*c7)R4Xa}ftckU-HrBzqSP$!C18j(murW5l zrq~RdV+(AFA=nB-u{E~Aw%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK( zFb=_?I1Gp52pox{a5Rp=u{aLL;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj z2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm z2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g z2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{LjTc0 z7d;dxQK63-4F(tv!(#-Dh>7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~Rd zV+(AFA=nB-u{E~Aw%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_? zI1Gp52pox{a5Rp=u{aLL;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj2p8iL zT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{C zJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7U ze2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{O8?P87d;dx zQK63-4F(tv!(#-Dh>7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~RdV+(AF zA=nB-u{E~Aw%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp5 z2pox{a5Rp=u{aLL;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZ zIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`Hg zIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNT zIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{M*q=47d;dxQK63- z4F(tv!(#-Dh>7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~RdV+(AFA=nB- zu{E~Aw%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{ za5Rp=u{aLL;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E! zxC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_h zcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP` z_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{PXEzC7d;dxQK63-4F(tv z!(#-Dh>7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~RdV+(AFA=nB-u{E~A zw%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp= zu{aLL;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R} z8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB z89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+ z8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{LI2S~7d;dxQK63-4F(tv!(#-D zh>7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~RdV+(AFA=nB-u{E~Aw%88a zV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL z;{=?DlW;PI;S`*T({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI( za6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U z@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ z@I8LOkN62c;}`sj-|##Bz@PXFf8!tgi~rC{N&nG77d;dxQK63-4F(tv!(#-Dh*O4D z`~L-mbqQ+{5~D)bVPRFmLI>NHHL`1y&>{aBk}xDmS!-xmNV4Jo{Yq8VE*~l7(6G>F z^1mTP5>@!$*|z0auH{+6l9sZ3OIyYYtZ-I%D}oi#ieyE$qF7O_XjXJ9h85F_WyQAQ zSaGd*R(vagmC#CLCAN}SNv&j7ax2Au`;p2@ZKbi&TIsCxRt77hmC4F%WwEka*{tkV z4lAdX%gSx#vGQ8^to&91tDsfLDr^<8idx02;#LW(q*cl)ZI!XgTIHTeCO23mux!PXFKs5Q(QZjG=; zTBEGd));H7HO?AuO|T|fldQ>Bm^H|8_jn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h< z2dsnEA?vVp#5!snvyNLQtdrI$>$G*oI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{ zb;r7E-Lvjn53GmQBkQsC#CmEyvz}Wote4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIc5 zTR*Iy)-UU~^~d^a{rm6uZQHS3+p~o&ZDsqmwv8Rw;q35s1UsS~$&PGCv7_42?C5q3 zJEk4Wj%~-WFo4&20Npj$su5H(`>)Q3~`gQ}mq20)CY&Wr++Rg0db_=_u9b&h# zL+#df8@sLD&TemaushnF?9O%=h)K58GckJ~5gllCe5w0*`tYoD{v+ZXJM_9gqWeZ{_NU$d{< zH|(4CE&H~8$G&Udv+vsv?1%Ov`?3AReri9npW83&m-Z|Bwf)9^YrnJK+aK(Y_9y$Z z{l)%jf3v^aKkT3OFZ;Ls$Np>o`|tAGj^ntF=LknS%JCiT7$cbkodixoCy|rbN#Z1Rk~ztp6i!Mfm6O^@+sWhPb@Dm+odQllr;t82{lvCO% znmNs# z7EVhi#A)S(I<1{HPFts))86UebaXm7ot-XDSErlP-Ra@GHaHudP0nU#i?h|)=4^L% zI6IwP&TeOqv)9?@>~{`02c1LCVdsc*)H&uHcTPAbom0+f=ZtgKIp>^rE;tvROU`BI zigVSu=3IAfI5(YJ&TZ$8bJw}&+;<*051mKOW9NzU)OqGScV0Lzomb9l=Z*8$dFQ-$ zJ~$tpPtIrOi}Tg_=6rX4I6s|V&Tr?B^Vj+J-_viqj_bOfD_rR+*LSsR+`tXzhIb>l z5#2~`WH*W%)s5yxcVoCQ-B@mHH;x@Nv8@LVKMs8!b ziQCj|<~Da*xGmifx0M^}wszaNZQXWmd$)tz(e31RcDuM;-EMAow};!)?dA4%`?!7G zer|tvfIH9~ox9%M;BIs`xtrZB z?pAl3yWQR4?sRv#yWKtRUU#3n-#y?SbPu_Q-6QT%_n3R!J>i~oPr0YvGwxaUoO|BA z;9hhuxtHB5?p61id)>X^-gIxdx7|DLUH6`Q-+kadbRW5o-6!r-_nG_Lec`@zU%9W{ zH||^ao%`PX;C^&Jxu4xH?pODl``!KF{&au2zuiCXU-#dC;kP}wEHAbf$BXO5^Wu96yo6pNFR_=zOX?-_l6xt< zlwK+?wU@?A>!tJ3dl|fpUM4TIm&MEKW%IIoIlP=+E-$y2$II*G^YVKIynVihCuzl3ppVv{%L}>y`7$dlkHjUL~)xSH-L9Rr9KQHN2W$Ew8p$$E)kr^XhvI zyoO#Qud&y}Yw9)gntLt0mR^Y0$_w>cdu_b7UOTV7*TL)Pb@DoUUA(SdH?O5cM6dt?#=LKdb7OQ-W+ePH_w~zE$|k4i@e3&5^t%u%v_h^Tkmb~ zHhP=9&E6JotGCVD?(Oh)db_;c-X3qSx6j+}9q`GML{HI6^cHq#Dz=I3Vu#o% zc8T3$kJu~riT&b$I4BN@!{UfIDvpWc;)FOUPKndvj5sUKiSy!uxF{}(%i@Z-Dz1s^ z;)b{>Zi(CCj<_rCiTmP#cqkr;$Kr{2DxQhw;)QrAUWwP@jd&~GiTC1z_$WS!&*F>t zD!z&D;)nPteu>}WkN7M82}|12k*@TlkWwn?OD&BIWH=dKMvxI@BpF#okx^wd8C}MZ zF=Z?nTgH)bWjq;QCXfkbBAHkwkx6ATnOvrjDP<~|TBea{WjdK&W{??WCYf1gky&Ln znO)|PIb|-HTjr5@Wj>i-7LWyHAz4@!kws-OSzMNoC1ojDT9%PzWjR@1R*)5CC0SWk zkyT|iSzXqUHDxVXTh@_vWj$G6HjoWvBiUFskxgYY*<7}eEoF#oB|~Lv*+#aN?PPn| zL3Wg#WM|n$c9q>^ciBVsl)Yqc*+=%3{bYYRKn|3Ho7^sU$enVR+%5OWy>g%2FAvCr@{l|%kI19)m^>~| z$dmGvJT1@2v+|rgFE7Z8@{+tPugI(Nn!GM=$eZ$(ye;p@yYimAFCWN<@{xQjpU9{3 znS3r^$d~e!d@bL|xAL8QFF(kS@{{~5zsRrhoBS?+$e;3;{4M{;zw)27l&u`)Do+U| zl~TUa%BVnvQ{hzv6;VY}kyR8GRYg1u|Wsb;C! zYL1$#=BfE=fm)~*sl{rETB??*Z})~WSsgW9Mzsm*GO+N!px?P`bG zsdlN|YLD8h_No2qfI6rSsl)1sI;xJTWn(8&Z+b2g1V?Ksmtn$x~i_J z>*|KOscxy;>W;dr?y39gfqJMOsmJPxda9nO=jw%esa~nq>WzA<-l_NMgZijGsn6<* z`l`OE@9Ky8seY;7>W})X{wd41eaClw&lkS*mGAr7H-6xU^TYcQ{D^)eKe8XikLpMB zqx&)Zn0_ojwjalj>&Nrs`w9Gnej-1ypTtk&xoPI7px1Yz)>*w?H`vv@hej&fGU&Jr!7xRnzCH#_pDZjK|#xLua z^UM1c{EB`hzp`J&uj*IxtNS(lntm<6wqM7u>(}$^`wjetej~rJ-^6d~H}jkOE&P^# zh~LT&^;`RG{I-5OzrEkV@91~(JNsSyu6{SayWhj_>G$$``+fYrem}pzKfoX85Ap~5 zL;RusFn_o|!XN36@<;n){IUKxf4o1zpXg8WC;MUk6o0Be&7bbi@Mrq7{Mr5-f382z zpYJd57y66*#r_h1slUu$?yvAy`m6la{u+O+zs_IpZ}2zzoBYlG7JsY1&EM|t@OS#V z{N4T@f3LsK-|rvr5Bi7v!~PNfsDI2q?w{~a`ltNU{u%$Qf6hPeU+^#bm;B5A75}P# z&A;y7@NfFJ{M-H=|E_<}zwbZrANr5{$Nm%lssGG>?!WL~`mg-g{u}?T|IUB!fABy0 zpZw4M7yqmO&HwKI@PGQh{NMf`|F8egx3sMt?P^a8Ew$3V*4pSmhtuJ81RYUF(vfu( z9aTru(RBw(`Q|HpTbsn8p=hOLh0bNiR(uH*qT~rs-#dQf?QkT-Dbs1e&m(%5S z1zk~B(v@`;T~$}p)pZSBQ`ge9bsb$-*VFZN1Km(J(v5W!-BdTz&2FIigo~dW)*?NwitLN$YdVyZ37wN@%iC(Ig z>E(KbUa42<)q0IytJmrEdV}7mH|foKi{7fY>Fs)l-l=!#-FlDStM}>s`hY&D59!1D zh(4;1>Err@KB-UX)B21)tIz54`hvcwFX_wrioU9^>FfH2zNv5N+xm{atMBRi`hk9^ zAL+;XiGHe|>F4@|eyLyS*ZPfqtKaGO`h)(cKk3iF@f7{;7ZI-};aKtN&@s z*v2ug@r*FiDB~M#j0sFQ6W&BH5lti$*+el>O*9kT#4s^UEEC(rF>y^i6W=5-2~8rC z*d#GYO)`_*q%bK>DwEozF=+UO*WI=9)G#$oEmPanF?CHn zQ{OZ&4NW7{*fcRsO*7Nnv@k7Ah-qa)O>5J}v^DKad(**mG@VRm)5UZ(-As4W!}K)0 zOmEZ2^fmoVe>1=gG=t1wGsFxv!_06q!i+Sd%xE*lj5XuTcr(FFG?UC^6K1BEsb-p) zZf2O7W|o<4=9sx=o|$hJn1yDMS!|Y=rDmB~ZdRC;W|diO)|jY&X}|2oH=hUn2Y9; zxoobOtLB=yZf=;H=9al_?wGsgp1E%xn1|+(d2F7Tr{==2XTV9LA)S-kRV7HBnlD-NrI$7vLJboB1jpe3Q`AYg0w-p zAbpS_$QWb_G6z|LtU(0SGzuCAO@gLD zv!HpK|8gvV~2R(wGL9d{9&?o2{^b7h2 z1A>9UpkQz?Bp4bD3x)?Hf|0?fV017h7#oZW#s?FEiNT~`au60w38n_qg6YAGU}i8Y zm>tXs<_7bE`N4uw@*chG1i`DcBrr3AP5? zg6+YMU}vx^*d6Q%_6GZc{lS6YU~nin92^Ob2FHTq!HM8xa4I+*oC(eb=YsRWh2Uav zDYzV539bg$g6qMJ;AU_uxEFmtT z%B;H-cXzkq?xjGXEzkmOad#Bp?F?s6YccFn|dxU;_uZ zzym%A0hvH%kOgD~*+6!X1LOp`KyHu+VUeS9;go*fQFzEXbhTwrl1*U4#GeS z5Dr>`R-iSA0Bt~95DB6{G-wCfgAO1D#Db2X6X*=$Ks-nQT|igR4Ri-RKu^#M^ag!E zU(gTq2Lr%BFbE6=L%>il3=9V&z(|k?MuE{_3>XW>f$?AhNCFeVBrq9F0aL*=FdfVQ zGr=q{8_WT7!8|Y@EC36^BCr@N0ZYL$upF!aE5RzT8ms|p!8))WYycamilQKeD4N0)L$MS` z@f4v1N~9!8rW8u0G)kuo%A_pHrX0$pJj$m+s7zF5DhrjB%0^|Ua!@&`TvTo<50#h7 zN9CsqPz9+%RAH(JRg@}56{kv2C8<(WX{roWmMTY;rz%htsY+C3stOfKRi&y?)u|d( zO{x}Eo2oPPjb22ca3LDXPs2sM-% zMh&M%P$Q{CY7{k^8bghx#!=&`2~-j_k(xwJrlwF+scF=7Y6dlvnnlf~=1_B~dDMJr z0kx1?L@lP4P)n(0)N*PCwUSyzt)|vcYpHeAdTIl;k=jIUrnXR9scqDDY6rEG+C}ZA z_E3ANebj#H0CkW$L>;D%P)Dg_)N$$rb&@(oouIQX_x<%cl?ofBBd(?gE0ril2L_MaSP*166)N|?u^^$r;y{6t!Z>e|Gd+G!Ak@`e^ zroK>Lsc+PG>Ie0c`bGVw0$?hb8m57PFf9y%>0o-80cM235I_n-h#(CyWFQMU$U_1J zC_)L!P=PAcpbiaaLJQi^fiCo*4?|!km>Fh)Sz$Jq9p->JVJ?^(=7D)(KA0aCfCXV8 zSQr+8MPV^m9F~A3VJTP|mVsqqIanT6fE8gSSQ%DRaf!S=8NjDfMRBkTk_!#EfZ z6JQtE6?TK&VGr07_JX}(AJ`Z6gZ<$EI1mnkgW(W36b^&K;RrYqCc;s0G#mrR!f|js zoB)&HL^ugfhEw2FI1NsRGvG`(3(kgf;9NKl&W8)&LbwPnhD+d5xC}0bE8t4F3a*B0 z;99s2u7?}oMz{%XhFjoPxD9THJK#>Z3+{${;9j^7?uQ59L3jurhDYF0cnltgC*VnV z3Z8~%;8}PMo`)CUMR*BbhF9QKcnw~MH{eZp3*Lrz;9YnR-iHt1L-+_jhEL#A_zXUW zFW^h~3ciMK;9K|(zK0*+NB9YThF{=U_zixCKj2UJ3;u=yC>2VL(x5<;76qYnC_T!6 zGNNDv5QQK@h(;JOh(#RY5kUeHk%VNVAQfpyM+P#Hg>2*?7kS7>At)2djIyAtC>zR- za-f_j7s`$Dpu8v_%8v@5f~XKGjEbP5s2D1aN}!Uc6e^9%pt7hODvv6lil`E*jH;ke zR25Z2)lm&p6V*bsQ5{ql)kF1B1Jn>TLXA-q)D$&C%~2R?fx=Ns)C#pm5vUDniy~1J zibm~Fd(;8NpjgxqbwZs{9EwK?s0-?fx}ol<2kMD>q28zu>WliJ{%8Oihz6m-Xb2jL zhN0nT1R9AF(I_+;jX`74I5ZwjKuKsKnuI2!DQGI1hNhz#XeOG4W}`W1E}DntqXlRo zT7(v(C1@#HhL)ohXeC;OR--j&En0`xqYY>y+JrWvEodv+hPI;}XeZi*cB4ILFWQIp zqXXz5I)o0RBj_kPhK{2X=p;IYPNOsEEINnIqYLOFx`ZyHE9fe^hOVO<=q9>_ZlgQs zF1m;AqX+0AdW0UMC+I19hMuDr=p}lEUZXeYEqaIEqYvmK`h-5CFX$`!hQ6a8=qLJx zexm?76`h(+LkH4n=^#2Cou1A>XQYE^KvOiN5lz#WW@wh?Xr3mtK#R0Q%d|qPv_|W+ zL7TKi+q6Twv`71N2%U+}OlP69(%I8f-!x;kBhu1VLTYtwb;x^z9dKHY$B zNH?Mz(@p56bThg+9Y(jH!|9fEE4nouLARmX(vfr&9Zk2R+tVHB7&?~jNOz(;({Xe> zoj`Y?yVBk0?sN~jC*6zgP4}Vu(*5ZE^Zuf z5&9^7j6P1Epik1L=+pEW`Ye5pK2KkuFVdIj%k&lcDt(QAKs zJJ`h@_HhW#gfrtTI4jPEv*R2%C(ea)<2*Po&WH2k0=OV9gbU*$xF{}$i{lcwBrb(Z z<1)A`E{DtG3b-P!ge&7JI22dK)o^uO1J}g0aBW-%*Twa4ecS*y#Eo!c+ypnp&2V!Z zhFjop+!D9Kt#JfygWKXr9EGECJKP?3z%e)$cf_4=XB>y)aRTmwyW(!RJMMvd;$FBn z?t}Z{ez-p#fCu71crYGY)wn8r*KrYX~mY0iW(EtqhoCDV#&%|tM5n6^wL6U9U` z?U?pV2PTGzWjZpQn9fWb6VD_tU6`&+H>Nw&gXziiVtO-un7&Lurav=)8ORJ`1~WsL zq0BI5I5UD7$s{tPn9EhW;Qd2naj*$ z<}(YJh0G#mF|&kO$}D4+Gb@;t%qnIzvxZsAtYg+Q8<>sECT26Uh1tq%W41Fpn4Qcn zW;e5k*~{!>_A>{VgUli3Fmr@C${b^kGbfmn%qiwHbA~y~oMX;27nqC8CFU}7g}KUH zW3Dqdn48Qk<~DPOxy#&R?lTXVhs-19G4q6Z$~SW4<##n4ioq<~I|-reagGY1lwEEgQt9W7D%4*o*&*yu zb{IRH9l?%d6WLMhXm$)cmL12AXD6^p>_m1FJDHurPGzUD)7cs9Om-GKo1MeXW#_T; z*#+!Eb`iUnUBWJ9m$A#)73@lO6}y^U!>(o5vFq6l>_&DIyP4g>Ze_Qz+u0rLPIec& zo880iW%sfB*#qoB_7HoRJ;EMkkFm$u6YNR$6nmOI!=7c&vFF(f>_zqxdzrn$US+Sb z*V!BFP4*Uho4v!{W$&@~*$3=H_7VG-eZoFvpRv!`7wk*+75kcf!@gzTvG3Ur>__$! z`A4JCMlP5G9K}HnaWscHhGRL7 z<2k|!oXAO>%qg78X`Id(oXJ_7%{iRQd7RIMaGAKwTox`XmyOHL<=}F1xwzb19xgAJ zkIT;$;0khuxWZf!t|(WGE6$bRN^+&R(p(v?ELV;z&sE?ma+SEsToo>qtIAd5s&h5C znp`ceHdlwM%hluRa}BtLTqCYA*Mw`zHRGCdVO$F?oNLLo;#zYNTpO+}7s*9&(Of&O zJ=cMY;bOUtTqmwG7sthO30xPhE7y(d&h_AWa=p0TTpzA4*N^MZ4d4cHgSf%m5N;?p zj2q64;6`$Z+$e4|H-;O_jpN316SyR9A~%Vf%uV5@a?`l!+zf6eH;bFi&Ee*9^SJrk z0&XF z%zfd$a^JY`+z;+2_lx_@1@Ni()O;E~kWb48@#*;Vd<*V`4`5Jsp zz7}7bufx~n>+$vZ27E)l5#N|^!Z+oc@y+=#z6Br7x8z&#t@#MP4d0fJ@KALftnNBLv?asC8`P7Goai`c{=F7b#@LP#c(nPeeZNj8$5r0A!SK9Ql3;G6-gyhnN%U6q$;UKs*@U|CaFbglRBg>sYmLQ z2BaZrL>iMOq$z1env*cnf`pTnq!npRB1jw3mPC>$5>48X_M`)eA+e+*=|nn{I1*11 zNEgzTbR*qK57LwLBE3l;(wFoj{mB3_kPIS&$q+J>3?swI2r`l+l2K$d8AHaBab!H1 zK$6HrGKowkQ^-^@jZ7yq$V@Ve%qDZlTr!W$Ckx0zvWP4uOUP2Pj4UTB$V#$`tR`#7 zTC$F;CmYB{vWaXaTgX?V82Ub2tuCkMzua)=xzN61lfj2tH?$VqaF zoF-?;S#pk?Cl|;?a*13fSIAXzja(-;$W3yK+$ML(U2>1yClAO&@`yYpPsmg9j65eV z$V>8yye4nRTk?*)Cm+a1@`-#VU&vSTjeI9R$WQW%{3ZcHDj~IyMhFzr3PD0TA-#}6 z$S4F0K%fLDAb}RJzzD3s3A{iAK@bH=kOf6h1x?TeLofwPumwkO1yAsW5FwM0S;!(} z6|xE0g&aaoA(xO_$Rp$x@(KBc0zyHdkWg4CA`}&h3B`pHLP?>NP+BM>loiSe<%J4D zMWK>VS*Rj}3RQ(_LUo~rP*bQS)E4Rpb%lCDeW8KSP-rAH7Mci6g=Ru?Axvl?gbOW& zRzhnbLTDqj6(WTwAzElBv==%EF+!}+QRpOe7UG0>AwlRObQQV@-Gv@PPobC4Tj(S7 z75WMNg#p4qVURFb7$OW6h6%%k5yD6zQ5YqR7RCr;g>k}oVS)v6lMvtg*n1pVV*EwSRgDE772@mCBjl+nXp_~A*>Wu39E%Q!dhXSuwK|8Y!o&L zn}sdHR$-g4UDzS)6m|)_g+0PvVV|&HI3OGp4he^aBf?SPm~dP;A)FLW38#fK!dc;* za9+3|Tof(|mxU|BRpFX&UAQ6K6mAK(g*(Ds;hu0`cpy9!9tn?yC&E+VnebeAA-oh` z39p4W!du~;@Lu>Jd=x$jpM@{NSK*uRUHBpV6n+W6g#a;?m|9FD28wCLATgboUd$k7 z6oW+|QX&+QNQ+oxL{{WPUL>L*ilQXSq9UrIChDRgnxZAzq9eMZC;DQDm`ThmW)ZWB z*~IK(4l$>gOUy0i5%Y@q#Qb6bv7lH;EG!lgi;Bg>;$jK0q*zKUEtV0>isi)eVg<3H zSV^obRuMzRs$w;!T3Db^Bei*>}hVm-0G*g$M3HWC|)O~j^RGqJfCCbkg6#g<|# zv9%Z>wh`Nkkz$k>Ew&Teiyg!mF;?s-b`m>_abmofAa)VEirvKSVh^#W*h}m!_7VGv z{lxy_0CAu=NE|E<5r>My#Npxyaio|ijuJd?`fABxRPeNLi(9 zQg$halvBzj<(BeDd8K?(eyM;|P%0!9mWoJ4rD9TXsf1KgDkYVc%1C9Ua#DGzf>cqe zBvqEGNTE_yshU(>sv*^sYDu-FI#OM!o>X6IAT^X4NsXl@Qd6m!)LaUaT1eqiOR1IA zT8fa`NNuG^DN2f#+DYxD4pNL1D|M7QNu8xQDPBsDx=3B6Zc=xthtyN*CH0p2NPVS# zQh#ZHG*B8O4VH#TL#1KTaA|}zQc9FYNu#AP(pYJnG+vq@B}o&dNz!C#iZoT4CQX-S zNHe8b(rjstG*_A@&6gHP3#CQUVrhxAR9Yr2msUtCrB%{uX^pg2S|_cSHb@(#P10s* zi?mhRCT*8?NIRun(r#&wv{%|E?UxQn2c<*OVd;o;R5~Udmrh70rBl*r>5Oz%Iwzf% zE=U)pOVVZOigZ=FCS8|qNH?Wh(rxLEbXU43-IpFn52Z)aW9fomtIIOrB~8x z>5cSOdMCY?K1d&>Pts@Ui}Y3cCViKFNI#`t(r+n1P9>+7)5w8xS~*BgC#RP)$Qk8e z8OW3jWhB!wmKm9qIhmJ&oANFBwtPpvE8mmv%Mav-@+0}N{6u~#Ka-!!FXWf5li$l9K(G*=V6jQMjTX7Uu@f2SPQ8Fo+l`KkDC7Y66 z$)V&_aw)l$JW5_ApORlGpcGUJDTS3HN>QblQd}valvGM7rIj*DS*4s(Ua6o|R4OTz zl`2Z8QdOy@R99*!HI-UQZKaMUCqr@s5l}<`$B~FP~5|l1VSEZZMUFo6pRC+1Bl|D*erJvGY z8K4YQ1}TG;A<9r?m@-@$p^Q`#l~KxQWsEXb8K;a_CMZeDL}ijPS(&0tRi-J^l^M!R zWtK8qnWM~A<|*@)1qAXRGDa(}=%1ULGvRYZAtX0-2>y-`4MrD(-S=pj& zRkkVHl^x1XWtXyB*`w@L_9^?71Ij_=kaAc#q8wF@DaVx)%1Pyva#}f~oK?;#=amb} zMdgxmS-GNIRjw)5l^e=U<(6_=xue`w?kV?`2g*a`k@8r1qC8ceDbJM`%1h;y@>+SL zyj9*Q@0AbAN9B|9S^1)TRlX_Tl^@Da<(KkX2~bn1sns-Upqf?G@N4b+BeBek*GL~W`zQ=6+{Y6~@7ZK<|WTdNUj z8?~((sYa>MYCE;P+Chy`W7UpoC$+O0r^c%ZY8SPu+D+}Q_E3AOz0}@nAGNRAPwlS` zPzS1m)WPZyb*MT_9j=a0N2-bHD0Q?tMjfk;Q^%_l)FgGHI!T?ZPEn_-)70te40WbD zOP#IGQRk}j)cNWHb)mXQU92uqm#WLuIQYAx=G!vZc(?Y z+tlsq4t1xxOWm#RQTM9*)cxuK^`Lr4J**y4kE+MiIL

    Ie0s`bqt)eo?=w-_-Bw5A~<|OZ}||XsNW+S{f}-OREKG>9q7(1}&o&tO1SE zpoTPB!y2Qp8mI9Z(F9G@Bu&;7P1Q6_*9^_nEX~#&&DA{3*Fv;RT4pVamQ~B9W!G|O zIkj9`ZY__NSIej6*9vF_wL)59t%z1sE2b6KN@yjuQd()Pj8;}FrvzzHPxDF&9yMCg%+;0)LLn+wFs?^ z)>ezuqO@qOoz`CKpv7phT1Tyu)>(_w;rGN_16Yy z1GPcgU~PytR2!xZ*G6a~wM1={Hd-5_jn&3!y z+IDS+wo}`s?bh~ad$oPqe(ivEP&=d@){baLwPV_G?SytxJEfi0&S+<~bJ}_Bf_726 zq+QmoXjips+I8)Qc2m2h-PZ1CceQ)keeHqvP33hflldA zM>?%zozYpH(|Miff-dTkF6)Y}>YA?WhHmPXZtITj>YncFA$le~vz|rIs%O))>pAqC zdM-V;o=4BC=hO4+1@wY?A-%9(L@%lr(~IjR^pbihy|i9NFRPc+%j*^Nih3ozvR*|G z)vM~&^y+#Iy{2AEudUb7>+1FN`g#Msq25SutT)k{>do}#dYIlq57%4jt@PGjU(G z`XGIi>f`kB`UE{mpQumLC+k!6srod1x;{gnsn619 z>vQzE`aFHUzCd57FVYw5OZ27sGJUzeLSLz`(pT$i^tJjreZ9Ux->7fWH|tyUt@<{7 zyS_u;sqfNv>wEOQ`aXTXen3B{AJPx&NA#omG5xrHLO-dW(ogGW^t1Xo{k(obzo=i* zFY8zItNJzlx_(2yso&CX>v#0K`aS)={y=}IKhhuTPxPnyGyS>#LVu~h(qHRu^tbvu z{k{G{|EPb`KkHxgulhIryZ%G}ssGY{>j6e8Bejvn2sF|fK}I?wy^+DlXapO;pbTgr zgEp|i7_7k=yg>}X5Dm$Y4aHCm&Cm_QFb&JF4aaZ|&+v^9Ba@NY$YNwQvKiTp97awf zmyz4ZW8^jR8TpL@MnR*HQP?PA6g7$&#f=h1Nu!ie+9+d`HOd*~jS5CZqmohCsA7Z~ zRgG#!b)$w+)2LJlG&UKVjV;DjW1F$v z*kSB6b{V^kJ;q*RpRwOKU>r0K8HbG{#!=&#aojjzoHR}ur;RhlS>v2>-nd{~G%gvJ zjVs1gZW0ure)fuW4fki`eulk$;@nKF|(T4%qz-(wXG8>ys%%)~Dv$+{&wlKrZmS!unwHaZyG25Dv zW|SFiwlmwC9n2Ur*6e6@GCP}bX1tkTb}_q}-OTQ053{G)%j|9TG5ebR%>L#8bD%lM z9Bd9ThnmC8;pPZ)q?u@rGDn+Z%(3P;bG$jhOfn~$lg!EH6mzOM&75w|FlU;x%-QA~ zbFMkhoNq2L7n+OA#pV)oskzKtZmuv_nybv!<{ERYxz1c~ZZJ2Ro6ODT7IUk)&D?J8 zFn5}}%-!Z5bFaD2+;1K*51NO}!{!n5sCmphZk{ktny1Xu<{9&>dCoj8$it1}mc#Yype1poJ{j z!WLt(7H9Dmu>?!BBulmwOSLphw+zd)EX%eW%e6eqw?eE;R%R=UmDS2-Ww&xzIjvk) zZYz(K*UD$*w+dJVtwL5|tB6(9DrOb8N?0YWQdViJj8)buXO*`qSQV{GR%NS-6>3$r zs#(>o8dgoKmQ~xTW7W0lS@o?3Rzs_i)!1rcHMN>q&8;x2g%xhKv|3rMtq7}))z*r% zqO54Eoz>pzV8vLmR!6Io)!B-(;;jU$i`CWYW_7oESUs&?R&T41)z|80^|uCC1Fb>U zU~7mq)EZ_Dw?%P zbFF#Sd~1QV&{||Iww72+t!377YlXGaT4k-a)>vz;b=G=ogSFAxWNo&#SX-@a)^=-$ zwbR;V?Y8zx6aEI%S=<&RA!ybJlt5f_2flWL>td zSXZrU)^+QKbxK2wdS$(~-dJy~ch-CB zgZ0t+WPP^2SYNGg)_3cN_0#%gCI86GPGzUI)7XJ_T06*2XQ#I_*ct6$8`zW$ZDi9n zwi%nXIh(hME!d(h*|M$Js;$|&ZP=!5*|zQ2uI<^r9b#v)Guv70tadg#yPd<%Y3H(Y z+j;D~c0N15UBE7A7qSc6MeL$>F}t{3!Y*l-vP;`#?6P(_yS!b&u4q@XE8A7~OoK-O6rlN7!xbwsxc) zWk=iX?DlpCJI0Q+JKCM>&UTy~ZztGY?5=hFSD21E9{l_DtooP#$Ic$v)9`j?2Yy&d$Ya8-fC~Nx7$1H zo%Sw!x4p;SYwxr7+Xw7}_96SQeZ)R$AG43!C+w5;<-z@Z%IAcuCa z!#J$NIlMz0!4VzFksZZR9nH}l!!aGpu^q>89nbNd5GRw9*~#K$b+S3xog7Y1Czq4k z$>Zd8@;UjP0!~4vkW<(x;uLj?ImMk4PD!VfQ`#xxly%BE<(&#nMW>Qe*{R}$I#r!& zPIae-Q`4#C)OPAPb)9-neW!ub&}rl}cA7X%on}sRC(LQ#ggY&rR!(at!fE5Qbt0W8 zC)#P}w0AlP;m7CDQZCC*Z3nX}wk;jDC4IjfyD&RS=kv)j*Ip7?04mpRNBhFFhm~-4Y;hc0%Ij5a7&ROT2bKbe&Ty!oumz^ul zRp**>-MQi1bZ$AfojcB5=bm%ldEh*B9yyPlC(cvnne*Iv;kKQeGrxvAYWZlIgi4RX`D>D>%&MmN|6F6BZOxwMO2 z#${d3?*G6YOd}YuIXB??K-aOdamz=xS8C{ZWcGIo6XJc=5TYmx!l}t z9yhO>&&}@^a0|MH+`?`Vx2RjpE$)_ZOS+}p(ry{ItXs}4?^bXtx|Q6@ZWTAwt?E{D ztGhManr(+DYyA9liZX>s`+r(|^HglW1VQvdI+->Q$a$CC*ZX36)8|g;5 z(QZ4pz1zW!abw+%ZYQ_18|TKm32qm+tJ}@(?)Gqdy1m@qZXdU=+t2Oq4sZv$gWSRH z5O=6M%pLBIa7Vg{?kIP(JH{RBj&sMm6Wk`rl~y3^e0?hJRPJIkHz&T;3u z^W6FF0(YUi$X)C%ahJNw+~w{Hccr_^UG1)M*ShQ6_3j3Dqr1u7>~3+ly4&3C?hbdS zyUX3}?s50J``rER0r#ML$UW>HagVyk+~e*E_oREuJ?)-x&${Q_^X>)rqI=1`>|SxN zy4T$6?hW^*d&|A;-f{1`_uTvL1NWi($bIZSai6-++~@8K_oe&FeeJ$+-@5PI_wEPx zqx;GI?0#{-y5HRI?hp5;`^)|926(Bw)Lt4d&`awDdFj0LUIs6t7wiF#@}P%2+QS~> zu^#8~9`OWE^dwLA6i@XuPxlPZ^eoTz9MAPU&-X&SOkQR$iDtHyWN?v8JiWllt^{RQ* zy&7Ikua;NatK-%6>Us6O23|w2k=NL3;x+Y}dCk2ruZ0)xwe(tft-T1Zjn~$T^rF0I zubtQ4>)^$Bv0g{7lh@gc^Wwb(uZ!2!>*jU$dU!p(US4mnkJs1h=k@mncmusb-e7Nt zH`E*E4fjTPBfUg#lsDQNnaUXnM_o8(RQrg&4mY2I{ihBwoj<<0iycyqmZ z-h6L?x6oVUE%ugpOTA^@a&LvV(p%-N_SSf7y>;GtZ-ckd+vIKbws>2;ZQgcohqu$) zs4q?}B&HyX0N=u6S3y zYuPrYZ}bMJ-s(tG8-_TG4Jy?5Sw?}PWz z`{aH0zIb1~Z{BzBhxgO_<^A>o{8WBwKaC&gr}cyUbbfk2gP+k4_JL3N&__P)W1sO^ zpYwU2_<}F`k}vy;ulky=`-X4&mT&ux@A{ta`yqZNKeM03&+2FMv->&xoPI7px1Yz) z>*w?H`vv@hej&fGU&Jr!7xRnzCH#_pDZjK|#xLua^UM1c{EB`hzp`J&5B011)%@yy z4Zo&e%dhR%@$35a{Q7!{GNUJRgW`y>33exg6hAMKCv$NJ;^@%{uq$)D&?@+bRK{Hgvlf4V=zpXtx?XZv&fx&AzV zzQ4d<=r8gY`%C<#{xW~LzrtVXuku&>Yy7qTI)A;t!Qbd_@;Cci{H^{rf4jfK-|6r2 zcl&$%z5YIbzkk3#=pXVA`$znv{xSc!f5JcMpYl)pXZ*ANIsd$W!N2HV@-O>W{Hy*o z|GIy}zv8I=$p)jmh(i1y*}QC%Wh zN3@R*PfDczd;OoSX}d>+caBR6O_dnbAvPvHGFc5s3~U|V{g0BqeMET2e->#YV-r%K z0g354M8zb;NBq^}B3i}9wEm-m4pFT-$NtL@n8Kb^Ju&Sc?qnrRGFh?+Orc7aL4S#o zsz*Ob9S~4GG5B8u$?kMrBO==U-H{tvoz$@CkmRZ) zr;;irC7}#|uSryoh@|9Ego)^X3m6s|7892gttX@ZRxsL1R-@hjUH>5ph|ZM47Z9EK zugLOOWc@3${T12&iX4AM&SX(NI@h0P?my2wDUP7%ynhydvW(96N6-J~Ss=xoDtS_3 z!DMAv^j~yha7=j2f9hCm4~Z`H2Nqp8S)}9=TqYqdDkdT>u3U0Cqwgg5Me@~(zMmok zLNkO0q+HNH{Q&*}R!S*&Z#K1qrosd*ED8rwDj!7v^kvyld za&pYFK?tT8|FQi~q~vHRQBqnW5tmJfi;wN_PrQ_aC5QX}SW{xA8lIT0T5OD2p>tGH z=IZ|({NHr2PDF>OWJr8unTWPgF)3XCw`oH1OCBDY;cuS*ftQbI{U^`=GW>;#f1nxv zoAuwzKd_W8PE7v?l*0Az#!UpZ62d#jM|A$rnf}}GH%ywpVg3jDhvRGVS0v@Ie_d(* z6z88WMq-9W$xZSf#(%B;(xv)Cmo5eLKe_!!{|f+r0D%pXdnG0Fzseui-;Ys-6i-;) zgyepW9-4BGQV$RPOB$TwsnO<7t|?ppYyI~B*#2FBpbCxh z=gFTph4&vdg)%vKP-wcu!1l>>e{W7u`D8WX&+Yt2{loL;MFjqHasMj+>j@pTSad~5L>Rg z%)M|z&7f2_=xRp zIlqdgt0s0j<~e2U;T-3);&%gExDc7V|@t&pSD2C(ZsMM8_Qt~mNN+vmj-7hi-JEf!N{%v literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/1.4.2/1.4.2_x86_64_linux_3.9.7.pickle b/pandas/tests/io/data/legacy_pickle/1.4.2/1.4.2_x86_64_linux_3.9.7.pickle new file mode 100644 index 0000000000000000000000000000000000000000..403ccf99c06d1eba93edae53e6d5bb9a6f5b80e4 GIT binary patch literal 126123 zcmdqq1zZ%(2akM1@j`+6#t(8cU*vWxHw5A=&L>-TN_gF?L`&8Ea*ULn3-5iUNV z;eIZ>ZnCRqed7po&K{O`+js^Cgquz2W4}2tB+@TD#49MmCD<#(E5I*2LT;^IU`Rla zU&WwMpVscyXU&$4IJYpHQhxhsU%&Q#5iZ_d5q@Tqqi2X;JJ0GN@;Ui_Q>vf6Gu$gA zK!0mp-I%xPhhh`r73^pBFxf^#dWA=t-4v5eL}X}~+09nJ<0tQ^QGLyI&0caJl7&PC zhjp+XZE#djWT01gxK{_WDV}GzpHFB=L}YlBPo%yJww5=VO>t!GSl%*1Zn%%xRrNOa zliPB3Q{8O)m<<_Cw!V=a!u<4)2>;^d=>FDMHIrJ}?BU^2>cguY)y`s z+b89?SKg{UzUgL`ZI?^0ZxBub?RXh3qDKnTnqBHJMWO-cf<_@I&NX+lZh*AN>g26gOKp zRX>0zuc!$5_f0mQ^5Dz{)#TtA>=)_f>lNuGw`xf4WwP~(jFc&^?}Dd)kXL}^ZVIoU zpwM;^p1x6Ga+6+>ex5;I-hM$5=Kc}#0HVB2n#?q2gPk>385|7pd>iuqV{EHLa}3`<{-fA76xCCrxS_V8ge7VXr45Y?Wenwih&Mwe zgKDU3sA8zv#cZf9f8Amtz&1Fry`S&D7X`IqMS)4}{9TqSr)RkqX6LFghwY|*Ifp%i z0=*+#B4yg^1P1%bvnM!AW``u6Q6XW0KCOfNJik0GGsggXMHzhPz_{K{9v*r+m^A;; zP;*uCtK#=3*)=8t!@fpfyT6P;5h-)>4;jdm?q?;2%&vZxLy!fNrGTl#6EUTSVT>vM z_s>~M6iY@oC1Io1sIn9TGJeu}dWLmq|22|4Dpk-6h>Bi5t!2uNkZ*{+FV;`F#M+fF zf6?+W`GFd1nm34ViBp5j)b3wX)aHjwEqBgU$a7YfKJq8h+*i-HKb{|s@?`msHWlq! zTTyo1IcIk=v7#jF7}`MBH?GYL-Q^R;Jku2}c`KZf*4eyui0+iy4KL|FzIrduX=&Mr zZk==o*(BX}IYGvCQiwIq5l36ellTAS|wQ+%YsOa)(h;2e)8aC zxk2Im^W$u0*=K78k2$La`CUUBv_(6NgEDJdf&<2s2ICA9^fP{vVX9$9tW=u&^F3Ml zUyD7%Cd1aB#fdCgD3V5hJ)`zfR&62XB#Iin0tKd*%?m8PtP-Sx^d<~-8R z8M`3Awt7+DLPnU=mt?jyD+2X;UDoxoN|o&jANlF(A(2H3$NIF4;vWimAv3W|+(h3e zswK-OH))|Eer8M7khRAPYclpUKuI)K}J)`eFRAIlac$ z>zx8$>v==xpVd1uqp7ND+4|2mVsw%R742%PsER7|clntdB0|F>L&JUj!u6Jn-lDPY z!v~|Ce6QcH&jEd(^89hTB2jt$&sp0QkMu};Vo9%<2z+BmZuszRq#A|%`-bF}-LSmk zXLOLy{3?FF9NJ$-fH95SsE08voBrmozh?9MU$VI|i@sZbSNUqPUGmecZb)dd{Y`Gv zqq&n_mzvZHW|LY`{<_NFN@in2xg&b*_m3sjf1FCjT5|JxLjB|n`La)zeK0nX?=&{n zZ<^>gKkV0+Xf(RZ*T2SOEB(=5#-wcA{X8a<>ihbov8sJ-thO*4rhaLBSc(OzM!lUF z5Gu!OCw90JHo|^*_0^Orz+7Lr|04rvw6J*?DS8ush1Vj6xO`G zWM@a-b~GD$$4**P9LpIhtNG}tpH#bY3uJ#yD3-Ioi|j1?v=tvKV!kFzr*D#__t#|6 z{ zZp^RiKG}KmjFhSTLxb`+J(6$Y%-Bc%VN=4GhGhMw-(^9YsE|O})7INHe8^At^C#~Wx*kJc3)v*gI5Cr{&MBH;e;c`HzKPsRe;>JaVY0Rl{SP`jQ@=gg zDJR&PDQ<1cW`ArwThoD;W75E|+2mL!N{$Qog;x&Io9D6}BO$8*BqOb#-CdR4K!DP~wlcEBA?@xB|ZF(v<|h5luf$81XYwa;T2V33jdWBds>+rE|t ziR?(k4!<7gu)gb8uO}*Recd`BV(DuOYiZ~vtf?sLpV_p4-DjVzRe0%&sKOVpHFCsvWjaf6nUyXKI4or4y^<=ds z?muo`KTBgGF($r#^_X~5-M+~?|G0T;Q@}s>f18Dj)%9cl(=u1H7CHT0 zV=8Qlg5T9ENy}T`m=e=sM$C*^F*`bAF3e-;SX*-CHw8hT-)EaIDfP8}`l0jo5AD(4 zMx;sa19{5f5}(i@^RFUwsADEZV+`%5yL z(_fNf4Az28AMw-2)4!<({vZxj3N=f_$-e2A-uMp@C%$w^e;;x2^@#Hd@s%?i`efOE zC+@x`|Cf{fPb2Ub9nK$P@RvoeChKWgk)~7#GN~*frfmE#mW96>DfoS|I9Mi8^jW5# zBnfSj|NT|(FXPzu|KYZfwJB?DJ)86be2{)KSic#f-wc&Ev1XSHy$rp-nG%v&QMlPz z+eFseKWL%+-MH2dXZY9Qjo)Xbcya*1nwug!M*Ji+-JghJJHX zzj+{Ue%4`+)y(_SRgdYi|J;8l4gYSJ?#NwoHfgSv>zmyZCD2C}^t-zwh$s zyY)3gMOhZ@=(`oS!Jkar>U$+KRMgjn4B8J18RQ>`wcqk&v;<;B`BzQ^2Yu;>uV0W{ zvLrLjcT3l-6VhMaAxGvD)Q@@lcX{XAwIvKj|NN6bj!Ekg5&ShG{--NC{(e)jVp*x( zJmqvfs;8{UUqz4NCrPW{)t5$;Z{uasmqZLb*dd?2^h(a*$MmHT`TNd`iYx7YZClFt z(j#ThuTKz@Y2`_}*<|M)7!oCm8#%0={)Z(SCR^*0y&oEAj$gW9CVR{HvRt9|-LjdO z)B5Yn))hQ5|M`W>x5uQBaj!f8Q~d9rmIbiMCdkiUPKW&;E{<)(0|N|yv>X4{N#H0` zN)O+h#CASGp%HT4(s)+JSIns$Wr`CaXI;H4>-d~amJ`OZ3e0Tip)dEcPTg6TdszzK zFGcS+i^pP$UOVf=jLZxk#&G#(Sh3>%#=4}?a9b_|vI+MKD5xjOAC8259juJiIW}o+ zqS{*rUXxpfmHuEj?8}gvfu#kOBF-`%^}AIK_Fsl6WSpp~5SnI*+aJdLW4}?Z?9hMo zH~wZIEJCjR2=n~9G*?yuzn_jw_~jImlclB%v1>$1sE9C`u6{0-M{-`hp5AIT)Q|Nwxmw3ELlkqyxh4F`gom}n4=fyOUCd9{W9p@g5-%om{t$us zfKWl%>WOx>*OzAL)qx&?mgR;CV)iv=IqMIfcCgll)#RsQg})sD{5r6w|7kg~XIW(M zk8#X9h~t=Nm6O%67?$!C7ZxPO&vi~({J{}*HeV;i|2vUW1ImHv>jUr)Fe^5w55To?V(Uz~6b z-jNYsPPr(Py)5Ko=I9ODk3-4UamybvRZjU=P4=>V^QHWC{4pEE`BHAC)!TJ?n@)aA z(c5%-IjWbsdV5YU(DXjeFEW$dir$+jF5ej|{CWAQZ~l)n&o9QPe>nbC@<81F`H=OU z)f-QGb@6}sA%8bQW*J-g<3pD98|6sWn8W=aq_$pL%E5?g|5mf}*QwvkQgpb~apbaY&BoTwdc<;0Qy(FWEKu}McJ^1J zhwgXuiNa>l70YM%nr;8BlQ*W%sQ32uptn)i2wgit$Vks<*Ww0!MceS8Wf#)ltD^|kFSOsII>SFpx)p@=K zx?xRpM-QxpvTU$~I#?I$VSQ|X4Y3h6#wOSln_+Wofi2M!TcH|p8V+2NG6t=~7*d9AzN9=^1u?u#^ZrB}rU{CCYy|EAW#eUcyqj3NZ zL=z6e!8inm;xHVJBhZW^aTJcmF*p{-;dq>Y6LAtw#wj=zr{Q#*firOy&c-=77w6%8 zT!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gmAMq1@MoWK3-d9!Y6Z$n;deG1YZBf65 zOb>C;9`!4^^x%kbF&@Up1eg$GF3QuJJ9_XH zQdk+7@4D!rJ$As3s9yo5htAjqyJ9!&jyw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ z;}JZH$M86wz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB| z;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T2|uG{tW%Z~s<0FnioP;N5AwQGOZd0fsU%># z`ju&VNQC-28a*V&B$yPFVRF>3jMGC(Oogd24W`9(m>x4=M$CknF$-qJY?vK$pfl#g zT$mg4U|w{=e3%~#U_sQcR?tIXEP_Q*zi3Pk#jynH*OBR=6qd#^SQg7+d8~jH(G@FU zWvqf#u^Lv#8t8^K(H%Xo7S_f(s9!av2mP`!J=Dhr*bp0GV{C#=u^BeU7T6L!u@!ou zH~OG2`k_AtpaBEXh^;XQgE0g{F$~*aI7VP3MqyiQhwZTgcEnED8M|Ot?1tU32lm8X z*cxDhwuX54~XaT{*O z9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o z8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXN*}PscP2p zR-lG9Xp43j2kp@T9WgG(!}yp06JjEC!o-*alVUPVjwvwa3wcuUI5nn0%QUt;v~)a9 zj~Or{X2Q&v1+!u{%#JzG8FOMT%#C?4FS=kp%#Q`IAQr;HSOkk=F)WTHuq2kk(pUz| zVmU026|f?@VkNAMmgSx@KCAM$8dk>|=!P}X9X+rX*2X$m7wchtY=8~15jMsq*c6*# zb8LYv(Gy#t7kZ-)`l28DV*na35RKRxgD@CFFcibE4TfU`Mq(7U#dg>pJ77obgq^Vq zcExVk9eZF;?1jCt5B9}=*dL>D01iYG4#L4W1c%}<9F8Na)K7RTXuoPZN? z5>Cb`I2EVibew@RaTdU(TN=R1zspN@696l2l2iBv(=>Ddp^UY9)=5R!OI%S28FWl}t)z zC5w_($);piawyJ9P9>L;Tgjv3Ra}&ON`9q)Qcx+R6jq8TMU`SoaixS(QYodBR>~-4 zm2ygXrGipXaaAfQm6a;;qReVab)|+}zF1RnS3HzjN^PZ%Qdg;`)K?lP4V6YpW2K4G zRB5I(S6V176;Gv=;-z>iK8mm6r}!%Yia`lfj7n=INC{R#lu#v1X`_TI5lW;IrL zP|V6mWt1{n8KaC<#wp{K3Ccuek}_GDqD)n$Dbtl1%1mXJGFzFW%vI(o^OXh4LS>P% zSXrVhRhB8sl@-cLWtFm8S);5~)+y_i4a!Dkld@UaqHI;RDchAD%1&jMvRm1s>{a$D z`;`OALFJHgSUI8`RgNjgl@rQI<&<(-Iis9a&MD`W3(7_1l5$zOqFhz3Dc6-7%1z~# za$C8h+*R%=_mv0AL*dTSWTiPRgQ z+DL7zHc^|Z&D7><3$>-{skTzRRBzQs^;P{;e>FfgsDY|cZLJ2W!D@&as)ng;)NnOI zjZ~x5wrV@Iz1l(TsCH62t6kKtYB#mJ+C%NB_ELMRebl~cKefLatqxEJswQ=iI#?Z| z4poP#!_^V0Sskg4Qb(&})UoO~b-X%3ov2PyC#zG`sp>R!x;jIhsm@Yot8>)3>O6J6 zxah(OVp+6GIhDSLS3n@Qdg^M)V1n5b-lVl-KcI-H>+FJt?D*)yShW&sqRvD zt9#VF>OOV9dO$s>9#RjhN7SS0G4;55LOrRTQctUA)U)b2^}KpPy{KMNFRNG7tLioN zx_U#ssoqj=t9R79>OJ+o`apfCK2jg6Pt>RCGxfRpLVc;eQeUfY)VJz8^}YH*{iuFY z<;a|nmj(z;*a%x;C*lZu;UFAEToF&i7YRf{kw`d+#3G4EDw2uhB85mPQi;?ejYuof ziS#0a$S5+2%p!}(Dzb^}B8PAmIYlm!TjUXWg^S21@{0nZpeQ5?iz1?^C?<-F5~8Fi zB}$7jqO2$<%8Lr3qHq#U`;?Y!O?zd+5GTbcaax=aXT>>jUR)3t#U*iBToG5rHE~_s5I4mwaa-IGcf~z%Upx>G#Ut@p zJP}XDGx1!!5HH0m@mjnQZ^b+DUVIQA#U~+qW}2$WOG7jp%~rG1;%N4ogXXBk)#7RK zwFFv1Es^G=CDxK?Nws8JaxI0HQcI!bD6`f2^OXl;NtP%~+Rw87dCZKyU(8?KGe%-Tq8lr~x$qm9+Z zY2&pC+C*)VHd&jZP1UAp)3q7eOl_7nTbrZJ)#hpQwFTNjZIQNETcR!1mTAki71~N| zm9|=2qpj7}Y3sEO+D2`YwprVvZPm7E+qE6qPHmUATic`U)%I!owFBBg?T~g@JE9%c zj%mlW6WU4bly+J>qn*{xY3H>I+C}Y>c3HckUDd8>*R>nkP3@L;+j5PM<@*~gi}wo2 ztCoJh((spy_Y&o>F5c^2JZ1!6k%!rvVGiJf8m<_XI4s^zxm)<8F`iSFou zwXinU!Ma!v>th3Kh>fr@Ho>OY44Y#MY>A%O3cb)9eb5*E&>sWPfPrYl))<7r7=ob~ zhHWq$BQO%9ur0R3_SgYCVkhj3U9c;5!|vDvdtxu_jeW2$_QU=djRSBXns5*f#vwQq zhv9G>fo2?uqi{5i!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn=HqODhI1lIJ0$hlTa4{~y zrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np(Ucsw)4X@)3yotB)Hr~Ozcn|O61AK^&@G(BY zr}zw?;|qL=ukba#!MFGh-{S}Th@bE?##H>OYJEbWhBj!6b{Gfk(E%MXF2=+7m;e)E zB6PyUm;{qzGE9ysFeRqK)R+d-VmeHZ889Pe!pxWjvtl;Pjycd7b7C&cjd?IHx?n!c zj|H$G7Q(_<1dC!ZjA_D^;BiSTg{83!mc?>d9xGr)bj3T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q z#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s= z#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU z#b@{&U*Jo8g|G1qzQuR=9zWnm{DhxT&JtLHiUKvXL0h!LIB1U!=!kJK9>&K6m=F`8 z6DGzam=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fzFr{b75}GgL%;f^I?80 zfCaG-7RDl26pLYTEP*Al6qd#^SQg7+d8~jH(G@FUWvqf#u^Lv#8t8^K(H%Xo7S_f( zSQqPIeQbaYu@N@LCfF34VRLMOEzuKOp%;3i5Bj1X`eOhZFc6K{8iOzxLogJ>unmS| z1V&;Mw#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K$QaR3fP6Ar?`I0T2{FdU8} z(2OH-6pqF*I2Om@c$|O}aS~3(DL56U;dGpVGjSHq#yL0_=iz)@fD3UEF2*Ie6qn(0 zT!AZb6|TlLxE9ypdfb2;aT9LFEw~l8;db1CJ8>88#yz+f_u+m#fCupq9>ybh6p!I? zJb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9 ze1R|V6~4wd_!i&cd;EYO@e_VVxkkWR|D!++ZO|6&Fb>+I13F?{jEC_t0Vc#m=!A(e z2`0s4m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zbD%Tk#9Wvg^I%?d!F-q>3t&Mk zgoUvP7R6#%97|wHEQO`943@=mSRN~2MRdhVSQ)EeRjh{9u?D(fO>{>OtcA6)4%Wqb zSRWf;Lu`bNu?aTCX4o8CU`zDGR_KM^=!3rKhyECV1`I?aw#Fa~#t;m}Fl>Y27=e)( zg>A7Nw#N?G5j$aL?1Ejf8+OMY*b{qUZ|sA8u^;xwXdHk8(S(C=Fb=_?I1Gp52sGnJ z9EGEC435QdI36e9M4W_^aSBewX*eBc;7pu_vvCg2#d$a%7vMr%go|+rF2!ZI99Q5< zT!pJ~4X(v?xE?p)M%;v(aSLw6ZMYqG;7;6yyKxWh#eKLR58y#Ogop769>rsL98cg$ zJcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=RxA6|%#d~-kAK*iLgpctFKE-GF9ADr| ze1)&^4Zg*9_#QvtNBo4JF=m2awXwDX1Zrr5wrGcO&>kJo5#wS!jE@O0Atpj6OpHk| zDJH|@m;zH`Dol-OFfFFT^q2uNVkXRtSuiVR!|a#?oiQio!rYh#^P&sp!~9qP3t}NG zj76|07Q^CL0!v~kERAKbESAIaSOF`dD^|kFSOu$MHLQ*`&<$&%J9=O(tc`WBF4n{P z*Z>=1BW#RKuqigf=GX#Tq9?XOFZ4zq^hH1P#{e{7AR4hX24OIUU?_%R8`Qt6SPu~x ziBZ@V+hKd`fE}?DcE&E)6}w?~?14S87xuJ(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%dhRbmU zuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAP7zgdq0Ua?e#>4oS025*&bi%}# z1e0PiOpYlqC8omEm?f(vSmH!UyZAW zT&RElvL5naUUb2Hm>&zE{*B{$D1?Qv2o}X+SR6}WNi2n>u?*^8-K>XlSRN~2MRdhV zsDBT$9;#qf)W4cp57n^->R-^Thnnb)9#{)&V;!uE^{_rRz=qfe8)Fk}ip{V&>R+F% zhnDDxthz}j3F3`VW@u#v>w7S0wXaB+hRLxj~%chcEZl6 ze_^y9x?(r%jyw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0iPvaRpi|6n> zUcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6U*j8mi|_C~ ze!!3T2|uIdOUh-rpxRlA2SuQUHfW1>7zgdq0Ua?e#>4oS025*&bi%}#1e0PiOpYlq zC8omEm$c`z@!U_Q)`1+X9%!opYti()Y> zjwP@pmcr6l2FqeOERPkiBD!KFtc+E#DptelSOeX#Cc2{s*23CY2kT-ztd9+_AvVIs z*aVwmGi;76uqAq8EA&Ed^g&D3c9EQVj1e$Rqj>6G62FKz! z9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;` zT#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O- zJdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4 ze2*XSBYwiqX!)Nkdi@`V`X4p4L0h!LIB5CbSn{I|Ja)vm7!TuP0!)aB&Js)Gh-IairFwb=0Io6iMcR0=E1z^g848%7Qlj72n%Bo zEQ-ajIF`VYSPDyH87zzCusl}4iYWhWge6qM%2)-fVl}LeHP8)fqC0wEEv$`ourAia z`q%&)Vk2yfO|U68!{*omTcRhnLND}2AM`~(^v3`+U?3W?H3nfYhF~a$VH*s`2#mxi zY>Vx%J$As3*a)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$= zDqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2 zDLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!F&FTw_SFBVp$*!i9mYX>bU;Upi}5f%CcuQ42%Ru7Cc&hb z43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-9r2oR|x9V;;0#-yj0cgNLG-7KE!e9)+Pz=L17>*Gb ziBZ@V+hKd`fE}?DcE&E)6}w?~?14S87xuJ(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%dhRbmU zuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPB2+?WURq6_B3{8#`B zVj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V|>_R>I0y1*>8;td2F%4Qrx1dSETAjdidt z*2DVP02^W>Y>Z8?DK^9A*aBOkC$>T_^hO`_ML+b%05o7A8nHD7VK9bZD28Dh495tJ z#3*cw?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKStvK9Ec_ygoAMi4#iGXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXF zSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}Y zPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({ zU*ao#jc@QRzQgzU0YBm={EYHjfh|Erfg0MNE!trmv_}VY#JCs_<6{C$h>6e%6Jrug zipelJrofb#3R7bmOpEC-J!Zg+m;O( zV-YNh#jrS*z>-)BOJf-7)R4Xa}fbiLgWIkv!-=!vb+3%$_?ebEp7F#ru1h(>IUK^Tl77>Z%o2E#D|BQXlw zVmoY)9k3&I!p_(QyJ9!&jyw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0i zPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6 zU*j8mi|_C~e!!3T2|uI!p8(eS9|dY?gSKdganK$e&=KQeJdBSCFd-&FCrpeF!wSOQC8DJ+d;uq>9t@>l^YqAOOy%2)-fVl}LeHP8)fqC0wEEv$`ourAia`q%&) zVk2yfO|U68!{*omTcRhnLND}2AM`~(^v3`+U?3W?H3nfYhF~a$VH*s`2#mxiY>Vx% zJ$As3*a)Jr za4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|g za4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5 z@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!@;?Gu>wgrep$*!i9mYX>bU;Upi}5f%CcuQ42%Ru7Cc&hb43lFD zOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-9r2oR|x9V;;17p zF*d=b*bJLv3v7v=*b2SS8-36h{m>r+(13wx#MT&u!5D&}7=~>y93wCiqp&Tu!}iz# zJ7Op7j9suRcEj%21AAgG?2Ub}FZRR!7>xsPAewLx4#puk6o=t(9D!yWiKB2dj=`}w z4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJtim*WatiK}omuEDjq z4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K#kK+kEiKp;1p24$t z4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn z4&UPk{D`0MGs^!5Xs!QIpoTVRi*^_X?a=`pF)qf#_?Q3_Vj^_H#Fzw=VlqsQDKI6b z!qk`s(_%VIj~Or{X2Q&v1+!u{%#JzG8FOMT%#C?4FS=kp%#Q`IAQr;HSOkk=F)WTH zuq2kk(pUz|VmU026|f?@VkNAMRj?{n!|GTA-LNLQqX*W)+E@qcVm+*n4X`0L!p7JH zn_@F;jxDeydSWZ|LT~gzU-UzN3_t@0q7hqT5C&rihGH1D!ElVgNQ}a^*bduc2keNQ zurqeSuGkH`V-M_!y|6d-!M@lJ`(rc?z=3GOK{yzP;7}Zf!*K+faU_ny(KrUj;y4_S z6L2CZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz z7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkg zAMhi7!p|uG8?d$hM}ZpJpe@>A9JEIVbi}wA594D3Oo)ll2@_)yOp3`cIi|prmGt}jX@ZUAsC8b*apKf0wXaB+hRLxj~%chcEZls z1-oK5?2bLKC-%bL*a!P!KkSduH~cz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_ zxCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+y zcnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wkJo5#wS!jE@O0Atpj6OpHk|DJH|@m;zH`Dol-OFfFFT z^q2uNVkXRtSuiVR!|a#?oiQio!rYh#^P&sp!~9qP3t}NGj76|07Q^CL0!v~kERAKb zESAIaSOF`dD^|kFSOu$MHLQ*`&<$&%J9=O(tc`WBF4n{P*Z>=1BW#RKuqigf=GX#T zq9?XOFZ4zq^hH1P#{e{7AR4hX24OIUU?_%R8w|$?jKnBxi|w#IcEFC<2|HsK?26s6 zJNCey*b94OAMA_$us=rQ033)W9E5{$2oA+zI2=cy8AswM9F1deERMtRH~}Z(B%F*> za4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$xB)lfCftl$ za4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR z@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC|_yIrSC;W^` zBIsG$woq8-LTdvriYjEnIwJ|@6~m`>7=z+DcHrBzqSP$!C18j(murW5lrq~RdV+(AFp4bY# z&>MZw7yZy51JHnhXvEeSguxhsp%{j3FdQQ=5~Hv!w!`+=0Xt$R?2KKoD|W-~*aLfF zFYJwdurKz*{uqq|a3Gp+5Dvy6I24EBa2$bV9EqcFG>*ZsI1b0-1e}PIa57H8sW=U% z;|!dMvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h z;||=3yKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s z;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@G~k-)c+_@ zLmRY3JB)+&=zxwG7vo`kOn?b75jtUFOoB-<879XRm=aTAYD|M^F&(DI444r!VP?#N zSuq=C#~kR4IWZUJ#ypr8T`(W!#{yUo3t?d_f<>_y7RM4;5=&ueEQ4jS9G1rlSP@;Z z5?014SQV>bb*zDISQFjR18ZSztb=v29@fVO*bp0GV{C#=u^BeU7T6L!u@!ouH~OG2 z`k_AtpaBEXh^;XQgE0g{F$~*aI7VP3MqyiQhwZTgcEnED8M|Ot?1tU32lm8X*cxDhwuX54~XaT{*O9k>&B z;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M z;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXH*hX|D!++ZO|6& zFb>+I13F?{jEC_t0Vc#m=!A(e2`0s4m>g4JN=${RF%720beJA9U`EV@nK27y#cY@z zbD%Tk#9Wvg^I%?d!F-q>3t&MkgoUvP7R6#%97|wHEQO`943@=mSRN~2MRdhVSQ)Ee zRjh{9u?D(fO>{>OtcA6)4%WqbSRWf;Lu`bNu?aTCX4o8CU`zDGR_KM^=!3rKhyECV z1`I?aw#Fa~#t;m}Fl>Y27=e)(g>A7Nw#N?G5j$aL?1Ejf8+OMY*b{qUZ|sA8u^;xw zXdHk8(S(C=Fb=_?I1Gp52sGnJ9EGEC435QdI36e9M4W_^aSBewX*eBc;7pu_vvCg2 z#d$a%7vMr%go|+rF2!ZI99Q5rsL98cg$JcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=RxA6|% z#d~-kAK*iLgpctFKE-GF9ADr|e1)&^4Zg*9_#QvtNBo4JQAtAmj{-HcL0h!LIB1U! z=!kJK9>&K6m=F`86DGzam=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fzFr{ zb75}GgL%;f^I?80fCaG-7RDl26pLYTEP*Al6qd#^SQg7+d8~jH(G@FUWvqf#u^Lv# z8t8^K(H%Xo7S_f(SQqPIeQbaYu@N@LCjX12dW^xP2M{PcYNwsP*S2ljwr$(CZQHhO z+qP}{-u-s>%%3Njrb*MLJy;WKVQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF z9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP z9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC z9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdfAJq$k?B7==%R-LCHkmPV}J%j7zV>)I1G;w zFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmta2uj}5RPHp0f(1e;q9kCAPxW*aq8TJ8X{~ zup@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#L4W1c%}<9F8M!B#y$-I0nb!I2?}? za3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz% za3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS z@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H z@FRZ0&-ewu;y3(`Kkz61!r%A@|KdNiqR@YI&_xdgO7u~o#sCe5Fbsypa2OsVU_^|B zkueHJ#b_8EV_-~-g|RUX#>IFT9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@ znK27y#cY@zb6`%)g}E^g=EZ!N9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Ei zm9Yv|#cEg`YhX>Rg|)E`*2Q{Q9~)ppY=n)m2{y%M*c@A6OKgR$u?@DxcGw;}U`OnP zov{mc#ctRgdtguOg}t#4_QihK9|zz-9E5{$2oA+zI2=ddNF0TuaSV>daX20);6$8+ zlW_`8#c4PlXW&eng|l%E&c%5+9~a<4T!f2p2`Lkg}ZSN?!|q$9}nO`JcNhw2p+{_cpOjQNj!z8@eH2Db9f#v;6=QI zm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV_#9v0OMHc|@eRJkclaJZ;79y~ zpYaQR#c%i>f8bC2g}?C+{>6W2MWz4fpo<;~l<1>EjR6`AVHgaH;V?W#z=#+LBV!bd ziqSAS#=w{u3u9v(jEnIwJ|@6~mJs)Gh-Ia zirFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#|z>e4nJ7X8@ zirug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAqz==2sC*u^H ziqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xE zira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)w zir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRnt zir?@%{=lF33xDGu{EPq4ibns@K^Hv~DA7lS8Ur*K!Y~*X!(n)gfDthgM#d-@6{BHv zjDayR7RJUn7#HJVd`y4|F%c%lB$yPFVRB4?DKQnM#x$4~(_wndfEh6pX2vX-6|-S> z%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%& ztbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d%+hKd`fE}?DcE&E)6}w?~ z?14S87xu^NPR1!X6{q2J zoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW*Wr5HfE#fWZpJOR6}RDb z+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p&*6EzfEV!+UdAhU6|doS zyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD-{E`wfFJP_e#S5O6~Ezk z{DD957yiaS_!s}76`lU0gD!d~P@<0tH3n!fgkdl&hQsg}0V850jEqq*Dn`TT7z1Ns zER2nDFfPW!_?Q3_Vj@h8NiZoU!{nF(Q(`JijcG6~ro;4@0W)GI%#2wuD`vy&m;-ZS zF3gR2FfZoA{8#`BVj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V`r9tc+E#DptelSOaTf zEv$`ourAia`q%&)Vk2yfO|U68!{*omTVgA0jcu?kw!`+=0Xt$R?2KKoD|W-~*aLfF zFYJwdurKz*{x|>!;vgK1LvSb#!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+m zES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|M zF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?Z zExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39 zFZ_*v@Gt&DD+c{X2VL|~phO=PY7Ed|2*Y4l42R({0!GA07#X8rRE&nvF$TuOSQs1Q zU|fuc@i74=#6*}FlVDOzhRHDnro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$F zU|p<-^|1jq#75W{n_yFHhRv}Bw!~K08rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^ zU|;Nq{c!*e#6dV1hu}~ghQo0Lj>J(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW# z;9Q)C^Kk(##6`Fmm*7%dhRbmUuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAP0*UCc&hb43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8!MvCc z^J4)lh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{wXinU!Ma!v z>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_!y|6d-!M@lJ z`{Mu{h=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO z=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T- z_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5 z@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86azwkHy!N2$q ztyuIQ9dyw{ff9XGs4+l;Aq<0IF&u`+2pAC~VPuSgQ85}u#~2tBV_|HJgK;q)#>WJh z5EEfyOoB-<879XRm=aTAYD|M^F&(DI444r!VP?#NSuq=C#~hdwb75}GgLyF@=Enk9 z5DQ^pEP_R`7#7D8SQ1NNX)J?fu^g7i3Rn>Rk0dY#~N4@Yhi7ygLSbU*2f0e z5F24*Y=TX(88*ij*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq{ z5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgMaZKTCwRr zI_RQ@0wwyWP-B1wLl_3bVmJ(s5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0Atu7a zm;{qzGE9ysFeRqK)R+d-VmeHZ889Pe!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`IAQr;H zSOkk=F)WTHuq2kk(pUz|VmU026|f>!!pc|$t70{*jy13**23CY2kT-ztd9+_AvVIs z*aVwmGi;76uqC#_*4PHyVmoY)9k3&I!p_(QyJ9!&jyZzFARfZQ zcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&# z_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fwBpczbkIc) z1xoZ$p~e6WhA<3<#c&uNBVa^~gpn}{M#X3t9b;fjjD@i=4#vfJ7#|a0LQI5-F$pHc zWSAUNU`kAdsWA&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf;Lu`bNu?aTC zX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?x}57z<-#9E^+cFg_;0gqR2uV-ie?$uK#l zz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS* zz>-)BOJf-us$}xhS&%jV-swO&9FJP zz?RqwTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do` zz>zo#N8=bAi{o%SPQZyc2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfa zz?HZPSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86w zz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|E zz?b+6U*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|(27U@(LonI6e!V0g&G4i z7{V|Z7QVSG%02{92S#w3^&lVNg9fhjQ+ zrp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E~>JeU{rVSX%t1+fqo#v)i0i(zprfhDmN zmc}wz7RzCItbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wofi1BW zw#GKt7TaNa?0_Ay6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe191=z#vwQqhv9G>fg^Dg zj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTd{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL0!v~kERAKb zESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh!}{0&8)74Dj7_j9HpAxF0$XA$Y>jQO zEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUa4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajR zEw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1Y zES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7L zExyC|_yIrSC;W_G@GE}9@Aw0M;xGJ-fABB%!rvVGiJf8m<_XI4$O(UFgNDGyqFL3V*xCPg|ILd!J=3Ui(?5aiKVbKmcg=E z4$ET&tcaDcGFHK=SPiRV4XlZ^ur}7gx>yhEV*_l6jj%B`!KT;@n_~-XiLJ0Tw!ya8 z4%=e~?1-JPGj_qQ*bTd55A2D(us8O>zSs}@;{Y6pgK#ho!J#+|hvNtwiKB2dj=`}w z4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJtim*WatiK}omuEDjq z4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K#kK+kEiKp;1p24$t z4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn z4&UPk{D`0MGk(FZ_zl0~5B!P0@HhU!zxWTWg!CUBbkReB5`9#tF+hVM41-}Y9EQgT z7!f03WQ>AQF&ak47#I^{VQh?paWNjo#{`%V6JcUZf=MwMCdU+*5>sJnOoM4L9j3<& zm=QB!X3T_y7RM4;5=&ueEQ4jS9G1rl zSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~ z*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH2jO5Gf zxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Gu zco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8 z_z^$hXZ(U+@f&`}ANUi0;cxtdfAJq$iReE%=%R-LCHkmPV}J%j7zV>)I1G;wFd|06 z$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmta2uj}5RPHp0f(1e;q9kCAPxW*aq8TJ8X{~up@TD z&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#L4W1c%}<9F8M!B#y$-I0nb!I2?}?a3W5^ z$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ z&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq z%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0 z&-ewu;y3(`Kkz61!r%A@|KdNi64QTl&_xdgO7u~o#sCe5Fbsypa2OsVU_^|BkueHJ z#b_8EV_-~-g|RUX#>IFT9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@nK27y z#cY@zb6`%)g}E^g=EZ!N9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Eim9Yv| z#cEg`YhX>Rg|)E`*2Q{Q9~)ppY=n)m2{y%M*c@A6OKgR$u?@DxcGw;}U`OnPov{mc z#ctRgdtguOg}t#4_QihK9|zz-9E5{$2oA+zI2=ddNF0TuaSV>daX20);6$8+lW_`8 z#c4PlXW&eng|l%E&c%5+9~a<4T!f2p2`Lkg}ZSN?!|q$9}nO`JcNhw2p+{_cpOjQNj!z8@eH2Db9f#v;6=QIm+=Z- z#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV_#9v0OMHc|@eRJkclaJZ;79y~pYaQR z#c%i>f8bC2g}?C+{>6W2C87W5po<;~l<1>EjR6`AVHgaH;V?W#z=#+LBV!bdiqSAS z#=w{u3u9v(jEnIwJ|@6~mJs)Gh-IairFwb z=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+ z*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#|z>e4nJ7X8@irug~ z_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9 z&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I z?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu z-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRntir?@% z{=lF33xDGu{EPq4N=pCHK^Hv~DA7lS8Ur*K!Y~*X!(n)gfDthgM#d-@6{BHvjDayR z7RJUn7#HJVd`y4|F%c%lB$yPFVRB4?DKQnM#x$4~(_wndfEh6pX2vX-6|-S>%z-&E z7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML z7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d%+hKd`fE}?DcE&E)6}w?~?14S8 z7xu^NPR1!X6{q2JoPjfO z7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW*Wr5HfE#fWZpJOR6}RDb+<`lB z7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p&*6EzfEV!+UdAhU6|doSyn#3I z7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD-{E`wfFJP_e#S5O6~Ezk{DD95 z7yiaS_!s}7m5lzQgD!d~P@<0tH3n!fgkdl&hQleNEC2rjCU>6PuzA$7U4~AsIJxCi z+pqRrIpG`ZKbi&TIsCxRt77hmC4F%WwEka*{tkV4lAdX%gSx#vGQ8^to&91 ztDsfLDr^<8idx02;#LW(q*cl)ZI!XgTIH!5YWI&2-Wj#|g8!J0?dTc$ho?6eW z=hh4BrS-~sZN0JHTJNm))(7jO^~w5deX+h;->mP}59_D(%ld8ovHn{B{yTo#c5K)7 zY+*~=x0S8!z&3Ws4r7P4!`b2O2zEp}k{#KOVn?;3+0pG7c1$~#9ovp$$F<|x@$Ce5 zLOYS2*iK?6wUgP&?G$!OJC&W*R|`}_3Z|BL%WgP*luDswVT<^?G|=RyOrJAZezE#+u7~y4t7Volik_w zVt2K>+1>3Pc2B#P-P`VC_qF@k{p|tvKzooq*dAgJwTIcm?Gg4!dz3xe9%GNS$JyiU z3HC&Ll0Dg;Vo$ZF+0*SA_Dp-0J=>mR&$Z{-^X&!pLVJ*gj$(wU61y z?GyG%`;>j!K4YJ?&)Mhg3-(3(l6~2}Vqdkd+1KqG_D%biecQfc-?i`A_w5JvL;I2a z*nVO^wV&C~?HBe-`<4CLeq+D2-`Vf&5B5j=WokUJzCyA5PN#-PXQaCA{R8DFqjg!_%=cIQsI2oNxPG%>Ilhw)QWOs5n zIh|ZiZYPhE*U9JPcM3QKokC7wr-)P3DdrS+N;oB*Qch{7j8oPr=ahFUI2D~rPGzTx zQ`M>FRCj7PHJw^cZKsY?*Qw{!cN#biokmV$r-{?lY34L{S~xA8R!(cDjnmd?=d^b^ zI31l%PG_f!)79zbba#3Nvb*Xif`Nv zRyZr2RnBT>jkDHS=d5=&I2)Z!&Sqzev(?$=YQ_gAUjC0mG=bU#gI2WBu&SmF{bJe-#Tz76bH=SF~ZRd`2*SY81 zcOEzookz}N=ZW*wdFDKKUN|qESI%qajq}!d=e&15I3Jx)&S&R~^VRw0e0P30Kb>FB zZ|9Hm*ZKF~({H$;vRTBe$nyK&sOZag=>o4`%zCUO(IN!+AvGB>%K!cFO>a#Oo$+_Y{wH@%y|&FE%wGrL*b ztZp_pyPLz!>E?2CyLsHaZaz1^Tfi;o7IF)_MckrpF}Ju|!Y%2Ra!b2q+_G*tx4c`y zt>{*AE4x+Ps%|y6x?97o>DF>GpDayM5fgZa=rbJHQ?24sr*(L)@Y6Fn72+ z!X4?3a!0#k+_COBcf32no#;+-C%aSJsqQp)x;w+2>CSRzyK~&R?mTzCyTD!OE^-&U zOWdXIGIzPV!d>aEa#y=++_mmHcfGs8-RN#|H@jQht?o8=ySu~P>F#oOyL;Td?mlE3d0 zyLa5X?mhRu`@ntZK5`$sPu!>OGxxds!hPw!a$mb|+_&yK_r3eU{pfyjKf7PtukJVZ zyZgia>HczmyMNrj?!W)SZ+njCdY&gd>G_`Wv=?~B3wdF@uwFPXycfZX=tc4(dr`co zUNkSd7sHF`#qwf%alE)*JTJbNz)R>Q@)CPVyrf<-FS(b(OX;QZQhRBJ+Hpkz-#C=@)~gdt1D%-ZpQ$x5L}%?ecbe zd%V5gK5xHwz&q$2@(z1PyrbSR@3?ouJL#SBPJ3s(v)(!Hym!I7=w0$Idsn=x-Zk&K zcf-5s-STdGcf7maJ@3BvzkxV2PDMU(Bg9BC zN{kj`#8@#-j29EcL@`NB7E{DjF-=StGsH|WOUxE?#9T2?%ohv9La|6J7E8oZu}mx% zE5u5%N~{)Z#9FaVtQQ-^MzKk37F)ztu}y3jJH$@0OY9bV#9pya>=y^bL2*bN7DvQU zaZDT+C&Wo{N}LvF#947poEI0wMR7@77FWboaZOwoH^fbGOWYQB#9eVu+!qhTL-9yF z7Ei=e@k~4yFT_jnO1u_t#9Q%BycZwDNAXE~7GK0y@lAXeKg3V*OZ*mp#9#4GSkjh` zbfqVSl+u?=hMj45Nu*fNfcE91%dGJ#Ae z6UoFfiA*Y!$>cJHOes^z)H01sE7QsJGK0)0Gs(;{i_9vs$?P(R%qerp+%k{MEAz?x zvVbfo3(3N=h%73L$>OqvEGbLL(z1*!E6d69vVyEAE6K{TimWQD$?CF(tSM{B+Om$U zE9=SnvVm+U8_CA9iEJvH$>y?!Y$;pG*0POkE8EHTvV-g>JIT(ni|i`9$?md;>?wQ6 z-m;JEEBnd*a)2Bt2g$*5h#V@1$>DN@94SZ1(Q=F&E62(4a)O*FC&|fjikvE^$?0;2 zoGE9?*>aAYE9c4ia)DeZ7snl|Tq#${)pCtoE7!^Oa)aC`H_6R%i`*)= z$?bB7+$nd--ExoIEBDF$@_;-j56Q#wh&(Ef$>Z{bJSk7f)AEcwE6>UE@`Ai5FUiaD zio7bX$?NilyeV(V+wzXQEAPqs@_~FPAIZn^iF_)b$>;Kgd?{ba*Yb^gE8ofY@`L;+ zKgrMXi~K6T$?x)q{3(CQ-|~T{Vsl2znkCP@8S3Kd-=WnK7L=npWojf z;1Bc%`Gfr-{!o9IKinVTkMu|Rqx~`dSbv;9-k;!4^e6d~{VD!bf0{qtpW)B+XZf@J zIsROKotNk_pT7R9t-rwMF^f&pN{Vo1hf1AJE z-{J4{clo>hJ^o&QpTFNf;2-o4`G@@@{!#y!f80OepY%`pr~NbjS^u1W-oM~q^e_3B z{VV=e|C)c@zv18XZ~3?VJN{k&o`2te;6L;q`H%f4{!{;%|J;A!zw}@Eul+avTmPN^ z-v8i#^gsEZ{V)Dk|C|5a|Kb1ifBC=tKmK3;pKmE!Im%U@5=tsxDWz4Qj0&kRDy#~p z!m9`>qKc#SV(yI(AqspW*t1K$3%BHfb94e>ErE;r0DzD0?@~Z->pem#at0JnXDyE975~`#s zrAn(Zs;nxf%Bu>hqN=1St17Chs-~)|8mgwMrE04>s;;W1>Z=B-p=zWWt0tZZD@9;&D6rFyGAs;}y&`l|tIpc8t0`)#nx>|!8EU4QrDm%+YOb26=Bov2p<1LC zt0iiwTBeq(6>6ngrBa04a&Z`URqPnCmt1IfNx~8tH8|tRIrEaS`>aMz{ z?yCptp?ahqt0(HIdZwPM7wV;YrCzHy>aBXG-m4Glqxz&it1s%S`li0CAL^(2rGBeF z>aY5zENyE?yV}!2OYLi=wGOn=Ast4C)!}q_9YIIbk#uAoMMu@qbaWj<$JDWOY#m3( z)$w$Eoj@nliF9I}L?_kBbaI_Sr_`x*YMn->)#-G4ok3^RnRI5IMQ7F7batIX=hV4$ zZk+$EL>JY?ba7omm(-~Z{0`t)%|pTJwOlCgY;lML=V-&^l&{wkJO{|Xgx-c)#LPdJwZ>@lk{Xg zMNie!^mIK#&(yQ@Y&}QM)${axy+AM2i}Yf>L@(9L^m4sIuhgsbYQ09U)$8doM3J+FPI-J2o?s5g2lm-U}>-{SRSkhRtBqr)xnxzZLls_A8ZIV2AhJ- z!Iofaur1gg>+&=fL-O%YSn z6f?z52~*OPGNnx!Q`VF-(O%+qsR5R604O7$9GPO+|Q`gip^-TlQ&@?iQ zO%v19G&9Xj3)9lHGObM;)7G>z?M(;M(R4DMO&8PEbTi#e57X21GQCY7)7SJf{mlR~ z&&*tU(QGoC%@(uOY%|-<4zttjGP}(l zv)Al1`^^D!&>S*{%@K3d95ctw33Jk%GN;WMbJm%%HAvffOL`a7GkP7Kg5Hg`qC`>49C|oFfC_*S=C{ieLC`u@5 zC|W3bC`Kq|C{`$TC{8GDC|)RjC_yM;C{ZYJC`l-3C|M|ZC`Bk`C{-wRC`~ABC|xLh zC_^Y?C{yTv+WM-%AbKU-0;Sj{*{o#UrMSDh6?ZQM3T=TFD!988FYfN{?(XjH?ryh* z^B=kQzU)k9zWqiv$>VH(ssL4xDnu2gicm$VVpMUe1XYqMMU|$?P-UrdRC%fbRgtPh zRi>&?RjClF8daUDLDi&cQMIW$R9&hbRiA1=HKZC*jj1M7Q>q!&oN7UZQZ1=4suk6m zYD0xnZK-xt1Qkg|QSGS?R7WbBilI7DovBzVj*6!es4i4jsvFgv>Ou9SdQrWpK2%?- zAJv~4Kn|HJlnjjieH(QPgN^3^kS-M~$Z@P)XE8Y7#Y>nnF#brcu+W z8PrT_7B!oiL(Qe;QS+$<)Iw?zwU}B$Ev1%G%c&LAN@^9gnp#7xrPfjFsSVUdY7@1Y z+Cpumwo%)u9n?;07qy$(L+z#ZQTwR_)IsVHb(lIr9i@&@$Eg$4N$M1JnmR+BrOr|3 zsSDIa>JoLCxJ*A#e#GOX?N% zntDUMrQT8RsSngg>J#;u`a*rBzER()AJk9k7xkM80I5J~kOl;Tv>*ti1L;8qkP&18 z6aWAM1ZaQ(16aTT9uOb^5lBD=3Q&OtbYK7zSilAjaDfMW5DYSdEFde$2C{=3AScKL za)Ue|FUSY-g94x+CEGP%cg9@M`s01p5DxfL|0o6ct zPy^HiwLoo92h;`iKz+~vGz5)6W6%UN1lmLLqY08G6KxYsO;y^q|09`;=&<%74JwQ*;3-ktkKwr=g^algLKrjdl21CG5FboU_ zBfvNuo|oZYr#6O9&7*`!6vX7Yyn%rHn1J+06W1hup8_Fd%-@i9~=M& z!69%M905ncF>oB504KpIa2lKeXTdpe9$Wwy!6k4RTme_XHEX|058ES@EW`UZ^1k89(({F!6)z;d;wp%w}lK5PIR!bY$$Yyz9YX0SPI0YhO+ z7zSIx*02o>hizdy7y%<;6l@PWz>Y8)#=uUnGmM3CFdinrF0d=?2D`%^uqW&Vd&54k zFYE{V!vSz090Ui$A#f-h28Y8Da3oBGqu^*b29AZ};CMIzCc%kt5}XXDz^QN=oDOHe znQ#`I4d=kQa2}iw7r=#Z5nK$Hz@=~*Tn<;jm2eeY4cEZ6a2;F^H^7Z>6Wk29z^!l_ z+zxlZop2Z24fnvka39)KfsUh z6Z{Onz_0Kd{0@J>pYRv_4Fgarlp3W$fha8sLg`R?lmTT#nGl5lf(RiRVZ!%qR=Win5{XC2 zH9!qfBh(l*K}}IJ)Eu=yp{OMaL#P$$$G#iBSA zj}lN9)D?9@-BAzJ6ZJy9Q6JP7^+Wy905lK{LW9u|G!zX(!_f#d5+$NhXfzsw#-ed( zJeq)#&_px|O-57DR5T4uM>Eh&Gz-l}bI@Eg56wpl&_c8bEk;YwQnU;$M=Q`uv4y{KU&_=WgZAM$rR&_Q$v9Y#mcQFII)M<>up zbPAnDXV6)64xL9A&_#3!T}D^XRdfwqM>o(-bPL@^chFsQ58X!(&_nbHJw{K^Q}hfy zM=#Jz^a{O3Z_r!x4!uVo&`0zMeMVoAHn$Q9*(h@Dx3a!!_t0ESfIuD(f&PV5`3(y7WLUdue2wjveMi-|`&?V_obZNQ_U6w9Km!~Vx z73oTJWx5Jol@6h+(bef1bWOSzU7M~$*QM*x_2~w5L%I>&m~KKhrJK>s=@xV--I5NY zThXoQHgq`MmTpH!(2;Z$-Jb41cci1~7`hYPnU1C7=y*DT?m~B^yV2e09&}H-7u}og zL-(co(f#QG^gwzLJ(wOs52c6E!|4(9NIH=oMUSS(&|~Rw^muv#okUNhC()DXDfCo& z8aUT(X;6}^jvx#J)d4cFQgaIi|HlwQhFJ^oL)h%q*u|a={59PdL6x<-av1p zH_@BvE%a7;8@-+0LGPq@(YxtA^j>-&y`MfnAEXb_hv_5qQTiBtoIXLHq)*YO=`-|M z`W$_pzCd53FVUCjEA&GLX>cG;i-T}FoE~Su z8F3~|VSphCF$7kk*p!8kL{g0tdmI6KaP zbK+b$H_n6e;(RziE`ST-Lbxz4f{Wr}xHv9>OX5X zTn$&pHE>N_3)jYVa9vyv*T)TTL)-{A#!YZj+zdCzEpRAqiNkO!+#0vR;kYesha+$# zj>7G62iy@y;~3ltcgC?e4#(pJ+y!^V-Eeo@1NX$eaBtiP_r?8ie>?yW#Dnl)JOmHL z!|-rC0*}OrcoZIu$KbJe93GD+;3PZ|Pr{S&6g(AA!_)B$JQL5tv+*1}7th1<@dCUM zFT#uQ61)^I!^`msyb`a%tMMAV7O%tW@dmsRZ^E1L7Q7X2!`tx=yc6%jyYU{p7w^OS z@d11gAHs+65quOM!^iOnd=j6+r|}tl7N5iC@dbPlU&5F16?_$6!`JZ*d=uZoxA7f( z7vID8@dNx2Kf;gk6Z{lE!_V;x{1U&yukjoF7Qe&q@dx}7f5M;f7yK1}!{6}_{1gAe zzi|MQib>6+VFH=7Oc0ZfNzY_pGBTMMiUAB{5JNMVVHlR-7@i@Fz=({*$c)0MjK=7U z!I+H2*o?!tjK}y)Fq4_d!enK#G1-|MOim^jlbgxIm?^>(Wr{Jy znG#G%rW8|}DZ`Xy$}#1c3QR?&5>uI}!c=8Km}*RQrUp}!sm0W0>M(VgdQ5$$0n?Ca z#587_Fin|eOmn6M6Uww?!kAV}Yo-ko&a`FPF%e886UDS=IxroXXeNf~#B^q2nK&k% zNnpA#U72o7ccur^lj+6uX8JIFnSM-vW&ksg8N>``hA=~!Va#x51T&IJWJWQgnK8^* zW*jq~nZP756PZcOWM&F8m6^s&XJ#-nnOV$iW)3r#na9j$7BCB$Ma*Jm3A2=0#w=%6 zFe{l=%xY#0vzA%MtY4loCqL(F03 z2y>J<#vEr(FejN)%xUHfbCx;BoM$dD7nw`UW#$TVmAS@TXKpYznOn?l<_>e0xyRgR z9xxA?N6cg93GER$*0EV|CVG zP1a&<)?r=NV|_N5&CF(Dv$EOP>}(D;C!34S&E{eAviaEjYyq|)TZk>p7GaCB#n|F( z3AQ9#iY?8SVau}R*z#-zwjx`Jt;|+otFj?%HMTligRRNdVr#Q?*t%>zwm#c{ZOAra z8?#N=rff5|IopB_Wm~dgY%8`k+lCEi+p_K02sV<9V%xJF*p6&88^d;DJF~HD92?Ij zuwB@$Y&W($+k@@N_F{Xpeb~NiKej(RfE~yVVh6KB*rDt&b~rnN9myuLqu9~x7 zjvdcVV3XL1>?C$FJB6LfPGhIDGuWBzEOs_Khn>sLW9PFA*oEvOb}_qzUCJ(Fm$NI_ zmFy~ZHM@pg%dTVBvm4lr>?U?IyM^7#ZezEzJJ_AG<$|U%bsJ;vlrNl>?QUxdxgEqUSqGbH`tr(E%r8hhrP?*WAC#M z*oW*R_A&c}eab##pR+I6m+UL{HT#Br%f4gZvme-x>?ig!`-T0=eq+D0KiHq_FZMSZ zz@_3+b7{ChE-e?trQ_0b8MusGCXV6&2RX#i9Of8~%d|ZC609TMJ#1-a>a7DRdTyd@h zSCT8mmFCKDWw~-(d9DIik*ma2=BjX2xe%@zSDmZD)#Pe%wYfT6U9KKipKHK1NalN@dTwksq*Pk1}4de!KgSjExP;MAEoEyQ7 zZWXthTf?p8)^Y2(4cta<6StY$!foZYaof2a+)i#6x0~C;?dA4y`?&+$LGBQDm^;E9 z<&JU3xf9$;?i6>LJHwsj&T;3t3*1HS5_g%q!d>OAao4#U+)eHlcbmJz-R16a_qhk$ zL+%mxn0vxK<(_fRxfk3^?iKf%d&9lu-f{1_58OxY6Ze_>!hPkwao@Qg+)wTo_nQmg zQ}Ly_nUyrZPH{cucjrhiV z6TT_mjBn1j;6wSAd>G$~Z_T&i!}+#+J3fMsb-h3avFW-;v&kx`S@`L!n{1AR9Ka3yFkKjl0iTo&jG(UzP%a7y7 z^Aq?aej-1KpUh9;r}ESI>HG|SCO?ax&ClWI^7Hul`~rR--J=CVz{+&EMhg^7r`r`~&_W z|A>FgKjEM9&-my33;relihs?&;otJ_`1kw={v-d1|IB~kzw+Ps@B9z`C;yB8%?FTF zBsEDx0!dmDMADJ;Bm>DvG7*XZ0tq5C!Gs|!;RsI%5r{}6A`^wEL?b#eh)FDB6Nk9O zBR&ZxnMoFsm1HB?Ne+^ejBl$@IQjioPg-H=oloTVyNeNPtlp>``8B&&% zBjrg2Qjt_5l}Qy+m4uLLq&lfVYLZ%{HmO7El6s^*X+Rp1Mx-%mLYk6hq&aCpLP<*! zMp}{9qzwrtZAm*4K_W>MX-_(kjwG7IkWQpCi6wC)o+OYiq$}x0x|1HHC+S6clRl&` z=|}pL0c0Q|~AK6b1kb~q9IZTd_qvRMlPEL@MI)5ohC(BuvCu?lDl`+C3oV3Dp`{Qev=UkiZG>>4t4N9Zf`6Z#7Sgn_~!VX!bn7%B`C zh6^KvkwT&{N*FDS5ylGRgz>@zAxW4hOcEvwQ-rC)G-0|hLzpSd5@ri?gt@{zVZN|H zSSTzK77I&+rNS~{xv)Z5DXbD!3u}b6!a8BSutC@;Y!WsLTZFB`HetK4L)aLcsEV4Xi-u^5mS~HP=!%}`i@{=MF^ia0%qC_RbBH;`Tw-o9kC<1?C*~Im zhy}$$Vqvj}SX3+~78gs1CB;%=X|ar0RxBr$7b}Pr#Y$pjv5Ht#3=yk|)x{cOO|h0( zTdX7273+!h#Rg(Sv60wVY$7%ln~BZE7GkK_QVbJYiLJ#pVz}5=Y$ryDkz$nCUhE)t z6r;r$v6I+Yj1}X=criijB6bzKiQUB>Vo$M`*jwx)_7(ey{lx*|Kyi>bSR5h_6^Dt# z#S!92F;N^Pjuyv=W5sdecyWT5Bu*43iIc@C;#6^(I9;3}&J<^fv&A{$TydT_UtAzA z6c>q$#UliJQeO;#P5+xLw>K?i6>4yTv`? zUU8qeUpycl6c34q#UtWT@tAmAJRzPGPl>0+GvZnCoOoWmAYK$NiI>GI;#KjQcwM|9 z-V|?%x5Yc+UGbiHUwj}w6d#F?#V6uZ@tOEsd?CIRUx}~9H{x6Io%mk-Abu1-iJ!$U z;#cvT_+9)V{uFI%=uvA1UDixE8OC_X|QYoplR7NT*m6OU#6{Lz%C8@GhMXD-=NY$k3QVpr5R70cOTxu(|lOm)@DN1TDb&xtr z(Nc`mN$M=cN^w%WlpuAHx=P)o?otn_r_@X8E%lN5O8un%(g10oG)NjO4UvXQ!=&NT z2x+90D2Qsx(cSF3pf;O0%Td(i~~7G*6l@Esz#U zi=@TU5^1TlOj<6jkXA~oq}9?IX|1$QS}$#oHcFeM&C(WWtF%qpF71$ZO1q@p(jIBA zv`^YE9gq%6hor;O5$ULOOgb)|kWNacq|?$F>8x~4Ixk(2E=rfA%hDC;s&q}dF5QrB zO1Grj(jDopbWgf3J&+zskEF-a6X~h+OnNT8kX}l!q}S3L>8xO24GvQh=OFPA#XA1Ld@Gkep6VFK3W5%9&(J1~Qb9Ov_kiWLD;6UM8|2 zi?SrkvLdUpChM{xo3bU_vLm~)C;M`+oLSByXO*+b+2tH^PC1vHTh1frmGjB@rq=az(k4Tv@InSCvELYI1eChFnvwCD)eg z$aUpLHC zJW@`SN6Dk*G4fb>oIGBhAScNaCUGi>ukGxmj zC-0XJ$Oq*^@?rUid{jOrAD2(aC*@P}Y59zNRz4@6moLZ{uBj(k_XC*PMJ$PeX5@?-gl{8WA>KbK#~FXdPAYx#}*R(>bHmp{lKsj1XbYAbb= zx=KBzzS2Nxs5DXJDML+PpXQhFH63l$pvbWwtU$nXAlG<|_-7g~}pj zv9d&2sw`8MD=UsvJ{}D<_nb$|>cvaz;6;oKwy#7nF<2CFQbmMY*b6Q?4sFl$**e z<+gH1xvSh$?kf+Jhsq=6vGPQDsytJkD=(Cn$}8oy@Mb%x-eCTdf)nc7@!p@yn0)iAY{+FEU+hO2GWc4~wgsYa>o)edS$HCl~P zJE@)3ST#O^&tI$52fPF1I=)72U3Om&tzTb-lMRp+Vm)dlK8b&PB^wx>?<#ZdJFb+tnTFPIZ^MTiv7XRrjg; z)dT85^^kg4J)#~}kEzGi6Y5FzlzLh{qn=gIspr)T>P7XEdRe`qURAHD*VP;9P4$*~ zTfL*+Rqv_y)d%WB^^y8meWE^9pQ+E)7wSv(mHJwJqrO$&sqfVf>PPjH`dR&=epSDz z-_;-LPxY7jTMf`sX{og|TA-Fz3)0eQ>9q`6MlF*@X+VP-(r689jK*r5#%n|qG*Oc@ zSyMDs(==T(G*h!QTXQs5^E6)z)-r2Zw5(b-ExVRO%cCuuw8mN!t*O>bYp%7>LbaA!nAS>bt+mm@wYFM2EkcXbqO|r}2d$$Pt;J}a zw9Zi)=%rN4bTQ^gS5fg5N)V7OdGC^&_-&B z+9++bHbxt(jnl?!6SO34qBcpJtWD9TYSXmo+6--`HcOkW&C%v+^R)Te0&StTNL#Ee z(UxkTqxMPrtbNhGYTvZ) z+7Iog_DlP%1?Z{t)Os2{P*1A|>FM%^*nlBJ)fRmFQ6CH3+aXR zB6?B1m|k2jp_kN4>815DdRe`kUS6-DSJW%%mGvrmRXs$nrdQW%=r#3PdTqUqURSTD z*Vh~94fRHPW4(#qRBxs?*IVeJdP_Y_Z>6`^+vwqXTfLngp-1XbdV9Tt-cgU%WAsjX zXFXPr)8q97y^G#e@1}Rxd+0s&UV3l6kKR}Br}x(f=mYgZ`e1#CK2#s357$TNBlSdm zls;M?qmR|c>ErbYdXhd-pQKOLr|47lY5H`1hCWlDrO(#q=yUaX`h0zXzEEGJFV>gn zOZ8>?a(#uqQeUO7*4OB3^>zAseS^MH-=uHWx9D5-ZTfb7hrUzarSI1F=zH~j`hNX@ zeo#N8AJ&iPNA+X+as7mTQa`1i*3al?^>g}p{epf`zocK*ujp6xYx;HlhJI7OrQg=? z=y&yd`hER@{!o9UKh~eQh%kt*5BxF^>_Mv{e%8d|D=D`zvy4}Z~Axr zhyGLlrT^9gj8sNyBaIPgq&0$!bVhn3gOSn5WKae$pn(k9zy@Qm250aFF$6<2Btte7 zLp3x*Hw?oxEWqqot==xg*d`Wpj`fyN+Xurb6KY78@m8zYR7Mxrsw z7;TI(#v0>{@x}xr$(U$NGA0{SjH$*nW4bZJm}$&1W*c*ixyC$WzOleqXe=@o8%vC( z#xi5MvBFqstTI*`YmBwVI%B=D!PsbQGBz7qjIG8tW4p1#*lFxCb{l()y~aLczj44g zXdE&Q8%K!MJE#GAzVb< z24+LEk=fX6Vm39Kna#}>W~kZH3^QAqt<5%OxY^chXGWNjW|Z09>|k~@qsm{>bC5aM9AXYNhnd685#~rU(Hv!t zHpiG_&2i>8%V_Mk|v=S-^r8vSwXE7!9jmTY&#G@V zuo_y8tj1OotEtt@YHqc#LamlonAOT^ZMCt&t+rM>E5eGjqOA5-2dkqMZN*rftj<=f z6=%g;304=YtJTfwZuPKwTD`2^Rv)Xc)z9j04X_4UgRH^U5NoJ4%o=Wuutr*m)+lSV zHO3lijkCsE6Rad_qBY5yY)!GITGOoQ)(mT=HOrc9&9UZM^Q`&S0&AhQ$XaYIv6foP ztmW1UYo)cyT5YYd)>`YV_0|S!qqWJ}Y;Cc&THCDc)(&f@waeOV?XmV+`>g%e0qdZ3 z$U1Btv5s2DtmD=R>!fwcI&Gb?&RXZJ^VS9HqIJo-Y+bRgTGy=W)(z{Xb<4VK-LdXk z_pJNY1M8vn$a-u&v7TDbtmoDX>!tO|dTqV2-dgXh_tppNqxH%9Y<;o5THmbi)(`8a z^~*~BhnbzqPHm^L1MReSke$v>Z)dPG+L>(11~#;jP21RJY}V#%-X^wSi?(FTwqmQc zX6v?Lo3>@!wqv`tXZv=ro!QP}XSK80+3g&5PCJ*K+swe#8e?E-c|yO3SjE@Bt8 zi`m8P5_UW;SG7azYIb$IhF#OHW!JXr*mdoCc73~n z-Oz4iH@2JDP3>lObGwBdYPYn*>{fPbyNw-gx3$~Z5q6{u)Ek@?QV8=yNBJ=?q&D3``CT$es+I*fIZM2WDmB7*hB4M_HcWIJ0&XV146*bD7N_F{X9z0_W2 zFSl3NEA3VGYI}{n)?R0?w>Q`u?M?P(dyBo*-ezyNci21aUG{E!kGdNDaG--6+QAOvuny<&4sirWbRs-XDeqKpDms;%%1#xhsuSW=bE-QvoSIH8r?ykasq55p>N^db zhE5}=vD3t9>NInjJ1v}0r==6-v~pTIZJcnYt<%nla3Y;3r@hm`>F7i|F-|9^vlHvY zIq^<{)5YoPbaT2pJ)E9SFQ>QD$LZ_zbNV|2oPo|DXRtHG8R`skhC3sikxrsB${Fp9 zamG61obk>CC&`)UOmZeWQ=F;JG-tXq!8x^AJ8PV^&N^qkv%%TuY;ra`Tb!-VHfOuD!`bQVa&|j=oW0IIXTNj6Ip`d6 z4m(Gjqs}qsxO2ie>6~&-J7=7;&N=72bHTajTyicuSDdTPHRrl>!@23)a&9|!oV(6F z=f3m6dFVWH9y?E*r_M9yx%0w#>AZ4YJ8zt~&O7J5^TGM(d~!ZJU!1SbH|M+a!};m_ za+3dH;HGj@yJ_4&H?14wrgPJ~8QhF+CYN%73ti;WE_NB0bvc)Ji7U9GE4i|(xT>qU zx@)+mYq_@TxUTEDz8majcC)xy-E3}lH;0?k&E@8H^SF84d~SZXfLqWlyUALZF-)-PF zbQ`&i-6n2Rx0&1AZQ+KxE!{A;mD}2FpDyaXY!4-B>ry zjdv5=E^b%1o7>&(;r4WUxxL*!ZeO>b+ut4F4s-{(gWVzSP#Br&$;K_3+_etl6%>`;$C&Hx!2tr?oIcWd)vL^-gWP} z_uU8XL-&#U*nQ$Yb)UJ<-52gl_m%tFedE4$-?{JI5AH|zll$5I;(m3%x!>I%?oaoZ z``ZohQhBMpG+v;W)(i5|dFj0jUPdpIM|r@59`a}pdyL0=oX2~_6FkwAJlRt`)zduP zGd$C?Jlk_T*YiBz3-&U5S-h-XHZQxE!^`RA@^X85yu4mMFTYp7E9e#S3VTJoqFynt zxL3j}>6P+Idu6<`UOBJ4SHY|3Rq`r(RlKTRh*!<4?$z*WdbPaTULCKlSI?{OHSijG zjl9NQ6R)Y)%xmto@It+oUYOU)Ywfl1!o9X$J1@eE^rF1>UI(wE7wyG(oxILotQY6S zdkJ0_udCP1>+bdNdV0OQ-d-QCuh-A(?+x$d-Vkr7H_RLEjqpZ#iQXu0v^T~Z z>y7iqdlS4QZ=yHJo9s>Trh3!7>D~-)rZ>x*?alG#dh@*b-U4r-x5!)UE%BCm%e>{@ z3U8&i%3JNN@z#3ly!GA&Z=<)#+w5)ewtCyV?cNS=r?<=7?d|dQdi%Wn-U08RcgQ>J z9r2EO$Gqd-3GbwL$~*0y@y>eZyz|}#@1l3fyX;-@u6ozJ>)s9TrgzJ`?cMS2diT8h z-UIKU_sDzfJ@KA;&%Ec}3-6`(%6sj-@!opxy!YM*@1yt0`|N%3zIxxh@7@pZr}xYI z?FIO${M3FLKhRI>2l?sz^nM0Eqo2vAeBeVL`LvIH#%F!b=Y8S}zUWK7>?^+NYrgIq zzUf=O?K{5fd%o`n`BgWu7Q_GA1`erG?{kMraG z1iy>l)$ita_j~v~{a$`=zmMP7@8|dT2lxa1LH=NWh(FXH<`4Hr_#^#9f0RGkALEbp z$NA&^34W44(Vyf`_NVw${b~Mme}+HPpXJZ?=lFB|dH#HVfxpmS^jdH;fc(ZA$h_OJL?{cHYp|Av3lzvbWd@A!B9d;Wd@ zf&b8daQfg3!5M=y1yjKw7zU%@l%aCHlZGa>N{D(A7!c4qDRW{_ zbWCi=untKP{YRt^2*~qe_JR+%MvavDa*1i8!{UF@t=o49PnwWaV{ppEx`)XV>;6!hCtm;BkW3l%B-xTV>S^+` z>YA;;e}@Dlrm0Z3UUEitk`qdf7T7j6yz~F05wSNU)$o-0cYo6fY!wq9@fXl?eTB?J zKj5g?$ zuldK26tO#MPVykV2}xP{|M8^@{fkUWByEyW3GtB~a>j;t2#b&G65b}fLws0LBKY6+ zzh6z;Jv=NnE-55cVo=AJ==g|aH6SstO<4CoO8O4rVV(Y2q>YG4NPz|UrvbfEtOJ z{)-^lovv$mc>BL^WC)E2iwlj9>=>S$KtiiTCNv=~JoJBy8Wy!Ixhu(~q>4@{D8t|F z64@g>DS04aBKl7QLnA_?pw&RQOi^K0-{#@6)XRWRe#0m zzhcc_vG%W6mn>>Tt^f1f@aMTP#Ss*>>CfU%l~J4j=v)3gx2CvLCA5m#maGhm`ioA? z6de}*?><)FL!!3-fko{|7Adu4Dw_}&866%MS3bF&QJvGKa14)%Pmuv38A1Y5cIls; zO8o<@oYL@!-HHEen2x^2o*$$uY|XA(&$P z$M&yC$D&JTYDMm}s+NY-CcF8vi8z-*m8U zc*n?ONPI-u@OF{WDO~@zX+rWx9v+h6Z=L^!SBP%&r_TQ|{Dq2tpc(&{^}m;YU@50K zG5sG<3fF%h+(fE&LRf5kcrCM4wi EKOfRpr2qf` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.decimal.orc b/pandas/tests/io/data/orc/TestOrcFile.decimal.orc new file mode 100644 index 0000000000000000000000000000000000000000..cb0f7b9d767a37159c5509da1a47877d1c8b411e GIT binary patch literal 16337 zcmeHu`BP%ox^^}*Z063;>CBy}lXNwVTQ?|S!os`gV)y=$*m zUVjn;Cqq&3Q<&Q^h>QO7AzzQGy5`K)8077Do<4p0)IDUxooCOUJ$2?7_$S_f*XcV? zoj!Z^ABVpk{^O$GF8U9wmjI;_AWs5pl>_tTKzBJ%Ta2A6!j^Hc<7{kU0d|0i&0%0W z>DV0_cDD>z$-&l8u(MgP^Z*>&2Mf|*e+oQIf*m_xiwI~+fJYydSl$Ir7{Q_raIhUL;{n||a90bqw1BJ4V67UQ zYXr+0!0~#pPzjE6ft*^fvj*I$2AiwE6)9L#0nV0#rDAYQ1Qr$n{Q~f?1nl5}TU@ZI z2wW-ztJ&Z*3luQHwnAV~{o}s$$EKnm=MH};j{ndX{9(uSLtXC=Q!PJ?Br)B*A2z5z zRBwGhzVLmX=lgc+_ba;Z%WJ#Q`Tgm=*Eb~q7`ZsN}-z*GZ%1qx3slPdpe$!m^&HUll zCGoHOgJ18uzOL{6T42Bw%D(R5eZ5Kjx@POEiG{E7JzsTLzgpFORZ;uZfD)5K|Ee|l z<(%ltV&<27BFxUp7j^zGrW{`sbbis@@Wn>?7uD=9#xtMi zt$*IY#H@^bUT*q)ME&`p^z)XY&le6a2;vt8f*1B&7iRa+vn>~jWEXmQ7q+MuYPUX{ zT=iwTgJ3cAw{G_Mhlg;u^YS^DlWIo<kmB@$X zlOK)@e0bRYVTi9Ul~Re$d9wHMQ?f2;R-7zuS>~XEpLp#pFAq1MlP*-Wi)kEsEYLWxg}G_xAqE+fDwr z=NxYrcfQ@%@b*sm+hspaW!@@Sf2(`;t&Nels!eZ=tKZ72e5<|at(C(!+f1mD;G2i8 zH(Pq&TxfYyAbWFw_vRk;&4#TvW*6RAQlomUZ*1w_sI7ftQt$?o{zhl=_4UZ>m6NZJ z4ZL1fiE2~6z9f3RjQRS|-fIUduQmH$n|Hic()n6{!)rrDsCxEm)0tNb*I(_KeRXr> z)f&^Q6Y5vdiCHCSrLC_H26x+*ek8QuPnB_QYw38koU?y^_8ZrbIoyNvFBW$ z_1uo`TwU$CDZ#k{`nm4p%NvoGt0!Nc4<%|8j$o0b)D&sGV1z*S=d!eoO zg{77k%49DL@m@HfzA$ute&P8N&-4A(=XZ6_*VjHjEqK0={(MjJxy{IP^)lqdz;pSA z=Q@A~>^h(Rq<+7(ocu)6iAzHScT3C2W;CX7m`qZB8 zsfOC8W(7|b(Vyx~KDoJosGWRra^Oj(;mJ*1nUnC%s#X?@=ygGF{^&4 zsPduSqKCE)AFPc(I2nA9Ird;@?}O_t4~{0`W4s4*b02KmdSGecfilknL)HflbPqJw zJ}@tMAZH2QpFF!8Ia@z@c6#7!q2X+g^6aMQYz_15#NPd_lkg7z{i}}qD?0BVZMZ*2 za(^rP{>99FrR(<%&fd2;0BbX@xbnWfqWg9Z@2!j9I~BaQVC>%R-g`?8@M_t; z5x@TqKo^sDUBi4Hkb@#N?-ZQ9#3+VR@B=6pf+}$vF_w2ykMTWb3m3ME6 z?yhCty)S|@qjz=s?^<`V!{tb&NQ3O%&X6oRG#TC zIENA(V|O+`y4iAPjqJ_|-ktfmcXn)@UR^j{;W<5OJ)P5bx;P436r3)l zpB_x!u^+jkY4VP_fjf!~cl0Um*b&{)=Z8)0ohpc)>h_=7aGa{{JT=~MDo=8%oqcL0 zbNh}1HZptr;mGYRrrQ_Pw+kw7A1Jzg@9<UYl-33e&wwN5?DR+*6F=l z3Zu95_;1;C+)~qd%S6L1`I1{Y*tc}EVHN8qMrTjtI8U^iPAsZVlvbV?EIP4&cym+y z=8X)rczCBmmsMUrRCN8p;qi78bUt{zWbAl<@A2K1$C6j%E?vTWa6_uBlT$@9=yfuDwF?h6e?C4 zURKa{S$FMa8-mNK>6eWsFU?!Jv_S@08Mw6EaOsHh(nHauEzC<7_AU`bFB$M(vgf#D zmIs+_xTHvONiX}7t<1%>>laVXUd(h}+-bUaU43y;E@Z6e;#|tbZSf;Z!6Rj3M}~Tj z9JCy1mK~Yr9qHWy`nSlt3*>qadD==YY$Ny7k~an98ajC*N!nTfIwnc01EdNAX;ewd zDI>KqNsD{L(kOAzPptI-O`XKK24b;<*vBUBWC(TZgsE9Vfs@c}B1~F=>Po_R5h0I4 zXpiGpg81?={75hUum#^D!!z3eK`wq^3%9p`Yw+M^t+=8#TyHIIOMt7TjL1w0XBDn^9^8$1ng&nyBSRVI%awnQ|QE0(t%Ai zrlt}zQH05-U^?RH)gZcJ3_aS5&e5XBeqZFFOLNhKTd4g7RFelaXGImaq55i3I|5Yh zS5rx3!4k5261g#etTrIWmB_p@WIGeNvWIAUXT*;PEl3ha>G2Eb3qGyqaN#2ki(C~*HPiVmcG0u|jvvpJwzg>~6*i4dND zNYIy)GOIv({D>rev59?YPYd!+XiBXEZ*eR^1UCZ@fMJ9@Wv^`4L!o~*fFWIk)0 zd0-~>V3z-IRm&rmkw?QTkJ0H*&}I=1(^G5yr#ZXNXiA=QH$T5U^n!5tMFZ_+f7Lm~ z9#T5>s(I(NQ1Kfy^_vRETlUD?QR=(w%JF}qLoC~Ik z&x0LbP`zJ9TQD}xH_NJTSp(lSE`0Ai{2^Zc6VLFo&hr;<{1+!55ZC|K)&F~7{trs_ zZ!+=!vFiUN;{I30#=ofw{-KUyV|_UQH%JAvnOwjl$^()*Iv^U$2aM|sV1~&8vb6<( zi;lMh3W0E%1JFykfU>n1aE$VRwbc@U!w>)(xe#!hOM!%pAlxeh41#jN-y#7rBNc!& zDh15+Dj+1Q253DpKrvkd@Oy~S5(U84)B);YCE!}F2NJXffM4AR=)0Q$-;^3i?=%DY zX_85;1%i%NfV!ju`Xbatdr+yqr_Im({ z<`QGp3(SapKvq8mRE_%q%f>ViE|>-A$^f9W&H;{K5HJm2$|+j_G;LwPJ+=sJuSWnO zYZ)*oRsg>x3SiUW$ZO~B6B0;2M5fZe+TsArRaYi}1w2=)MK z-sO7dKH!UHfHXY|NNNrNQx63aoX&wzQ&fnYPlMPrc@TN^G1f{xq%n^PaaOY+@$Lc$ z&(DVFl7$d2uLzP-b0OlPVu-E#ia;(OLa7o!WL-jt)mI9MY>Oa_VlhP3R1O&)lt5yO z6_DkrE49)ph{sqBNqS`v(N+z_SX2v{X;47219gz9uo7aSs35H5RrHQVh;pI{;@DI} z)(SNcj;aNsv1uXhP%C6RtAhy3^^iv1HU4n}B(u>0k+M4>W@Q&7WbKB~=1maAK@Y@U z)(eTYSs+gJwd$A^;$qn#3B>?}-)D#D=LR9ZeFr36It-EMMj$4a6B2h^M`eyd

    y zt#=%Ava}44c+JGdN;t&xn0Wnr>LA;eWW_>%5 zs+}anQk;T>oAw}dM;fAx>_Z&X1IU^*3*mGgLQTm ztrSBg{pC>8yaXCNsDM(%l~B393Tks#LziPRD62pY4RCLD_9>w8xjHB>qlD^2DyUc2 z08P0Xq2l!>Xcw~?8mQGkDNVP@0$Qjw-3pBe+n|hAJybQ?4jo=KKx6qvC|BMI)tbAY zvcZ$dlnE*ln4!j&Ug*q-1)5#ygI3Y|p%$498a53;>C<+ob@6sb$q;m{c^JwW9)W6> zolrN;1>LS1g9^LdP{Y(X)W73_Msn_uswbglhYuQxOhIY6(@;g_4AkB^3yn?&pzQ5A zsG2(u)vdr1_5~<^F$~q`M4-NkC1|>187lFvKuw7#G{{+lQXAHxD&w8Dg$?NPVI0bm zB%qCkEvVD84UNZlpuECesIEQ*_4e;UhrMUS*?nl2_y8KvXQ7nwL#S+n0<#uSVUfCA z7{f|~se*a1SmLgjhymlaF=5&<7R<9=0826pVIl*hI@t1DwrW!4fE4$U>TVlChe((nWq)7 z&~6=!#;=DdG%A>9;J)Z`BaEG=hN-KYVXkfsEHTvr<0rK+Jx>SosoP*_haQ$(xZjl9 z0Si_dVbrcpnB3O|vu$_7mbpDJR+AalIM@qwE?QtB$^$%UKTKz|!Mxr9SSn$MiHjVt zu7)94U|<+V36H>Jlu?*b@?gX;24hUPVXDn>*l^(lELQJ@acz?@ZO8}nWT#+B@ifev zhcJ%M!e%xCuxvpPR;8SWS*#&gIJf|#AB17bvIxx4wgk(19_GYWU>epc%&l01ZClo1 z!nqjCu)hKGmu|u`tqGXawFRr%d?dt3!f3U-Fhy?)W}n@IMfcJ$wlD)zYY$*9XBL)- z9>OezkLhcua9>X@JUvZ=OHz4o6F(mw)G*-G5hh%|!h+lK*zj=u<17;g-smraJCj^^ zoX3OnnoHoiAwJx@B!H)KOX1=w5u9#&BH$ClDLdtGS#bs2+9ZWX1}ou=NEKW~mBELl zHSm~G4p)Yr)NU!@o+2eY*-#G`*;R03xB)&xX@X}ZYIs#gGu$$vfjhEK(K)SfrAh~P z*xKN0Aw8UP*bdi}cfj54M)>x4CtMisg0G36@hf}a8LJsCo$rO44=nIdnH5gc_rn!# z8{8fnfJa$_a8CQP>b@bkYi<~x*dKxOMWb-N&IR|m#^C8SH(bK>z)iIi@KiOD8kmI3 z(>}OO=!Y+Br{S#88F=IBEZmtt2aneT;XLy^+`aLS zQMgRD3b&fp;1T~ie7oR9RY@E^+`I{o4JF{*}gUASl}1vl>O!G+3~ zvP~IymE!1;SZiuEbtNbLjfXMUW#BFLg!LP4C==-IJ zOy-sJK@~zGmLW{~8br`7M^HCv5%PjMgso1ASnjJwu!1UtwCpvfs0k6*sS&)fW`u5C zgYYu7h*WJWLTu3?x&m#8Kw6J5x4kZF?Lb&Zjflu>UV3~E2Z@0dm?SZ_;n0|;~F93s>iM9_Tm2*q{?VdsVs(Z)pt zdoY4fFD@bMigywfQ3T((iqL!45Wd7ZBF))ANE+e@)4(PoxR5|lDcgvs~BB-^h;s*`O<7q1e$%ypjb|KT1-AMh~ z$0lD7GPrFmdK;X`)W9fG z9CjhQC~jmxGLEElc#yJ*31nLSS)|a1WT>W)Dw`iU9GXVPvNK3-c>t+xpF?`agUIB@ zJW|qo!Ke%)XRM3JY%qeXI#@zl%2tr!wkVSBUPUTnYe)wxhBVE7&gqLIHFKLt_kIGo zUAm1F>UNL@R}$%8+eKy=fBNy-G%~pNMQApIq@@p#3gIEruBD)&&Kwkbm5Ng5(@?IO zJXE5Gj-m>`(x(_GAD@LvYYI@35jM)SQiuxX6``myE=q1HM%ny4lwA8Yt3-fmY!;%N zL#3$rk_g44iBY<$a+J4Af=c-+P~sga%I5qg&{U10IAkbUqy}ZB)}kU(1&YyGhf+-{ zQNvsHs8~@0YB~C?*4~8jgw?1dMS~Jmw4jU~TGY%$D=NFGLsfC~D2u8c#mfJlzR-bE z9vV@O@-Ea`dpC;XF`+c^9+aEii`rINP{RH`R3M4*AM~R#WdkUw-i|W62T`Gz14Szs zMk(q>Q1(72Dmpica`ybF7P(O_-8d@Y@}T(Z6DU1%66LG)q0+rmC`rJNGNq?c@#&wa ztpSvLbPi=(4WgFwLnxMf0o7;@qntB~sCX)Z;t7^fywuO$ktiy)vWgPZ*HK-v7%E`e zKvAaSDB12N%3898iZpMdbo^gb%SqHQErp6z?V-5cX_R(qALZG}ppwN|lt_JuGCC+| zuja2=YA(8}l7_Z)=ApxrbToZCAFbpv(T+wIdd*&d<}9+&so`JU6-DUn4lY{gEk+v> zJhY#~M`u(5v~)m-HZPQ-Lx&=?c;z>Rp&V`ZNYK%E1)5!0iB{KFp9qkPm(5Z9>T3p(R?rQBq2S&Tm=5j#BFr%&VUUbB4K{ICi(2%32o_hahIv@eG zfF6htG^8}ydNJox-(|AP*N*A0q(B|l@UIITSKg4gvFm2~Ev{R|Cmk^L9jVinGwi#) z_f*{%y^_vp-HiYMYv8lR>;>wHf1Lg>W+GI`GYD(iL68`8Iy-`2jHdC&HN z674@P|0MRQ;R5xG;V*?>2ftB$m%x~QWc=*@i{!77Up2p{|FHg@^DkXk*}t#-L!Sds zasdYo;M0LXKA>a*2^K)d(>Z{v2oM(oVIH9711SMuDFxVNfL9Ed2rLP9rU9$QT^ zZa-EY#U}T#b|y|-hBK)ElL5!I<3f`--4c$zi}TU(Dj_~oi+8r+xke!2!CS+4;WmCb zmtZU*uxkhbEkR=;P~C*kAfVVJq$orOmnf+s#+r%d9wKj)7!DBiF`{k?@E4LAD@fT! zlBh+OOHtFj`&i5#dd`6 zJrY?wV%Ryt%DZ@mf3aGAF-3Q=yYFHZ1Ck6~Y)@P)re3mEe2J<05^l>SA@e0V*Cq7e zC7Du)>hO{b=TfKi(kk_(iSA3SBbN$iFI`@{)R?){SqBM}U#4lejM{mb$8nj$e_3kf zGRNL!66VpE=qUFZp5bVC;HZA`C}ZiUKY6r~etA}KxvTbaS=;3a8^q?hTok@Mx^;P1 z?lDfuv7qdjR(p(QIp!TZX7xhS@ngf3E2P{j;+0oeny=vZToG|zVF+Boid`X$K-7g- zQYx-=H(n|4x-vO*rG5HJ@#>Xp=_^gFtCn{lq57+I9aqr@uk!h>QY~MV*}ck{e|1&q z)d|Jb#yqIdcJ=ba)yBoE**jMUXxC`?*HGoxcv`Pf^j*UiKpmlLB#CQcIoFztujN%= z8`fN_H($$eUF)B_HXw&)v)8&f*U6;UZ8u$K>%LAja$R)hx~{eBIGO7-I;gh%IIZEh z*LYm%IG&z5KD=^Vx_3OzxZWbVo@#|g+OIbZT+f=kekO9gI(a>XeuG|J~$y#}nu=%EC(@jR_ zP3*u;f%ThG2`IJjW{>1%MdQt>&YK-WH%q2(jzw=ar*G!5PB^GAefczb5>cIVXXRV%kAQnyv^XcveW*S(~Z5Sv#!&+M`ZES+lQxZMR$rS z?~JPN>@wZSao!o6y;Hk>r~eS&ZHa840oQ94xNen&sd^o_~|nd=3R|Na8~_Y zGlsj=_PZ#)yWC56$#?Hc((kql?-ncW&Pw5?{=2yocZb4v>$dNv)9&$=+@q?wC!@W` zX}QOxh9~Clv2NZY%(-`&d#|zTUbg1mK+nCJ(R-!ewE_!H9`jAQe5ZCljXyhT?>_hbRhv?^2?1wYu4?7zkuIhX^;dt2Ue^?lO zczN$(Bl8iC1QAd^qA@%|wLjvSd_=MINGkb=gZ`*Q_-L&5(O?6D*Z*kP^Qb=jC}aCk zf9_+AC68rgkGZsu$t;g)I}x_|$3&ZtMJbPWaUbVYJsxa+T-)P?hdEZA`BhL>fpO?~Khznk@)V^Rb zAS2cn44xNQ;TLAMUQp-0NGW;IEqhU}eKFblBHxY_&%d}9f6+vFiOYQ{RQZyw`6YVK zOFrjIs=!N;Nu=}OrK-Z06Oxy$jV}wkUS1x0**N_&d-df&`enlsk}5vusXwRaIG3`Y zbNJ3lme0j@&zbXI;g!C^+C}R1uP|({_$OXzTznmifsHpqZ zuF$KT#H+!a*R;j2(W+nbYF<;CUrUd^rWT^48?VK)uUR;+^QEsxnqD__zs?$YeP-r$ z_1fzc1Ib9sEw<;az`|Rat+%MTZ+m!eD`an{THbc_zMb@-V!^k~@wa)D zcfv*Q=qul0G{56Fz0>G?Cp-I&J&ckaytB=I*Cu&a)c9_+^WCnYcRADV2BYt4)9;G6 zQQorml=bhW4et%x-;?^@i!Z%r*?o_n|9(XH{#q`YrGI~>|9$nu`;^7^-P`ZWX&)p@ zKCst(Aa4D@RDw3SKj4Nw2yK3#%lVLA{GqSvLzU*kOwWhT(GR&b=tS&8YxYAS=cDC{ zkBm(pvAaJC41c7V`G~sqQAmqc6r4|q&pTA-CC2ly!Sm*+^SqVw;naCO<71r#?XUZ| zvHjz$?PHhsV_D?m?VXQpd7p>`pG4)K(A{W`^^@TEC)$NiXj`9nsh=u&pQfun9d7wl z+WTqdUoF8;`SDL9ho2gXK4VpWHlzMbZTgJj{LDT3nQ9ZA%zS2NUl2iJ4G`c8zV@zuZSBFNS>FtczmclGiFbTs8T^Jn^-W~? z8$;?F7UNstC`MiPEv5Zix9wZG_uJ&+xAvWH#d+VY@xL?4zgrGqLVe%q#=oO4eCJDi zr=otJDgNGB{e4x-_X+d&#u!Ez{C;`kd*k8v?4ln6(jPSHAE>4uJR?6SW`A(90Y~Ns z3H!%b`H$v?A91P{(N8YrPcp+#+XFw@CVvtw{S;09)J6Z9 zBm7xY0czWRruF~q_57?1|D4|Xc{ukk(vrW#Wq+|~|3Ym7BV&It%>RY8`Ii~WFKX^D zl&W9c&A-Tden~oi@pOUW*e`1bznBXD%B}cosPV75uD{ZU|LU9mt7`SHieb>n`n5{@ zYoh*FYsas`!C#kszZ#c+Wv6}()@ese7TCfWHdM*H2&|D7lQ z-LVSl`+jGP|Lza{-kA73oAZaO_zzk2AKRKgZ00{C`(SkLkFJeBIEQ}(Ie*hi|3*{) z&D;Gq<;dUCGk=S*u+q%m;st-Vl>eRI@b`%E?*_-;S^mGzto&WQ_jis2aIaX=+j%y$ zP&R-znCxi3e-NEXI?&RRVYIn<1RWZ3{^`vBML`=w+pFB@XxBKJJ>@~GcP7xT;z@L( z$%p1UrqKF`|4;M&uW9KlTGAOnny=$ zo&z|T9L!p65r$*oVl;tbj62Q4Yzz4qVXFXR7!_jttEHHTnF>hdVvN~bjtR|3Ftoi2 zj6zU}vA0xVq9fH9c2tH@)8!b(Y%Y*6DKPx$I*fi-iSd=FFzMz7jAXbGV_I&)1Zm9} zYPANV+M@xsDJ^DsrxnBEwP707dW_T2j)_MM7+$UsqpR%1cssi=>NcEs+l1-jnlXXK zUJPZ>f{`uuVXQg*m`H^U!!QnDR9-tK){+mzI71k2!!SlWFoN+cI5Ek?QH)43hA|r4 zm>JJFCL8x)xK0LOsh`Az`+XRCXbPju`Y{gi3}#I~i{Xq1Fq(}yjJseSqm42Fp>+Xc z2!=8K%pxWuTEa-%mNDkB6-;P7ilMRAFbc&w#zQXvqJa$zdq0j*mnJZ-)-6n8bQ{B8 z+rj7=yBME5g-M(DFi9C3FzxMQf`S7KRhz}gorf4(lmaf(sUWK+7i{dIfzIhXP}EZh z@c0Z+r(uHLVHTKLE&#=Og%zN{bgwb{V**Z?Sr*+CP_0R|OAAhmB8l+TTTwtXiU zUgiTV-5A*Da)ZvbaWKx90C}}uP}e&NdS`uLYHtb@3#UPPo)8E)XFniL2J!C z80iUtjF|;cl?sEy{0JD+EP=}EQb4=10($aR!DRIsC^D^sMt=;PNp65y-X>VpoB%CD zTcD#`1khb{Z0<7dv zh&7dquo|8OpnAkudAuBJV^?68l~OFLzY^OxUxjrZRAb|1HCUcrj&-Xm0B=lzO|g_% zaa}#Ot51au%r#&snMSNkq{dox&De-bgH5axRLoZFaIFp->utkw1A44B-H!DLJFrQu z5i1()#2Qz-u)?KEAY0RetumXjmYH5`IAy`o1y-!Gr621Uv0>L%2Cy9ZAl8su4Y*B1 z*lqtXR=7KYHI$5E{mm|HW@rp6U3O#5G!HgZHG%b4{&`S4HHo$F_^{DpKbEbY#;P4N zSXX2go1o5N`ISMezH=U%>8t_LTMJkTcM)r9j9`QIB`kGu87t3;Vr>lM~0rRhwAeKmw~<*ur`bx3Q`6Bvx$L#ddj8*g$*_Yu>H}Wc3-Wwf_Jcna^Sw z*+Z;KoP!(IQ*kkOE{?lF!)XiXxDdAvNcJ&sq97Az%&>4XA~r6oE5ucead4LPB3zhR zjH4@fI9ihua0K|cwX^`oDJ{imT17bbXc=yMRg4odBshb-0_QhNaf-ouK)P3jGYe$6 zP)iMt=9J?U(OR6HUWbdylsI-zJx)EX!tq(8L`f5luTkUl!_7F~vIdu?X>pS3R-CC@ zhYL=%;iySHE}GK_*wh`kWrq>RTI$3#=62(pl_p%gvj@lXnQ^-9UYwWPhhs~d0P$cy zu4~bT3*^{w6zL#NW^~}J-XUBhF^pprIdQ6nQJmVS24V|iI4)%zrWAK>G9%3 zo0B+W;S_GB-jB=nPvczPX26o2#f8OlIC^^!ryQThIW|JLwSq8?qg=#ktPz|$xP(h2 zG=Q)yiZisW;{0Q4xXk)GPRiQAnH6zd$g+u}%_VS({Vg27NDD+;lQ_0(7pGoJ;arR~ zE+OB?@q05k{p;vcw0|CetDXKXQi0rY)FJpFSG%X+;TjlN`hB) zRp5txQhaQ?63;EJ#%r5oc+X%BKDj8z>lI|9v<^RGRN}MVdVJNE3U4WD#D^Q2@N~Nx zuM9Wi9h4TlNn!vv9j$oHgbwfCY{PFCw&R5=1Kwck!23f+d?wq8mzH=e+AEIiQ-iwtN7uSHGC{DhF`8X0a{ZW@9}TqlgR{LRI-gX zHt*nPhLZT~(k{M=wuiS=rSU9V4?v&F;FUWEct`Oeeyxc@;5c#!nh2HPrqT%8(maB& zlTK(1nF0S6gODj=5u}X;1hbt@2!#s?w45S>qJm4XcN7z%6Fh=5+Y6{U0)k5=BqVI5 z1pb1Epg$}l_{z%(>2?W0;;A5*;!=Xoa)er6O_2A?2)6ke!t#Nfz!ED6jrux*)2$@L zWAy}HK?8x;ZUwx3O@!2(njp?J6S_n#gn&*|JMASmg> z1cz*dux4@+IMbtqRDm6Em$(Vr&Eo{&u!mq+o*?*XlY~r_k09-yBABQAgwW13L9G0< zk*E$3?2b7?G!i7Rb3+7m)9uy;u(UtFiYsFKO_YD zDMZS_Fd)lPiB>U<7}4hu8RK-KY9pUGT)-s8>R3dswScG%vWc=XCy*2s5k+lWqH(O4 zIJ3?pW|@3ql|n$YScHG}J4=c5eGxGoxY*HJPFx$65IJiVL=B^o=$2Ozx6Rc=;jD~k z*sCG>1+~P;mxky4k{F_^h_sppqN1mfXrFE(Mt9XjHeW+jYg&km*cgylZYA>b z+KBpUJ<-?QPE1c3h>~On(ZuT{2Gw0e>QFaP#To}}xn|;WWiOG{Wg#~D`iRbLD>2Ts z5qV7mMBSjB=v^En4l6u>SUODXGL8@fUMG>VHA<8fjS;O4ZenC$oX7}!h$@Pg7_&?O zF@ulDotPqOH~mCU;S4cZKT8zZ0z_kIjyRJI60_nEkvr!FEaPEfcw>=BXD<Ogrie7%9#P>+ z6FsFfS@5YnDPv?B$U7!d#MGOC$N5d8BldPD-}=0aFc=6zpMk4SNkXE~ zq$F8YCCRup14Mk)B*u=6q$-w^hMQ_hu|Wli8>u5{sr4j}R7FY}8%Q&Z0ASo|BFz*v zld=sOQkA`hWC?3YVTz7KuV^DFJM<*SL^~-fp944?BT1v`B)M%}r0q~QNqA@?8OqHh ze|s+}Gj1VC<9(#6-XIWC+DNqi0g__gPO={ilA>ipB({E-q;`*xTrnpp!E%u-v-5zy z&rR~pjg!**9+E`lC7E=Sq@c@3qOMPoa{bZ@LmXD1p=hT^c=}4oF~P#ArfzN zfuvgvlf3y6QmST&BsMRT=)y1%NJU8$!5T@{vQDyL2q4HARCNm9uYq+!z*DdyiM zDYc7$wq%#&X-<)nLwh9Aa++kMWk@qs2c&FQmQ*!$NV4ou$PQ-&psRDqN(YVXh~$yi zsQF}0C4;Q#WRl&JEb{hN0a?f`B%4Yv^V^HanJ||u&Eb*F6(!_Q2cJyy3do9tkZk9O z$Wc`pnUlW)s29q~u0shqQ7$F(4V7fQr;6;0SCiB18nUEbPB!(|k~K9^fSRo%%fx>;2;ZA>p*s7 zm|PVdAzSF9&AL5aqHKZgAi?masCZ`YnV5kGYti^7rt;28Yl3qO5-`ZxlG rKY=+7KOH~yz<;dzz(2FEnJmx{S{5I3LY$gwPl(Ki0VfCx86O*nAn33yj{0RALJfJ%g= z-yY9=v&depmn0*ZJW?2kTu94yT=M+Mv7+18fd!+++rKoYVb8*(6m+$6w2JQ%VP zoBWuE$qYVmaJZY`s{`<^gga2tKpn+nltV!*rn5eyFL-h~g3j5(B6iCb91v J6y2AX#Fs$e5wL~=SI1j7^ppUN+?iL7eMX!KP?|U(2PGykr zMnDEm3tYcYD8Tt@fkL4XffW78WSpFmVtur#7q0|w$w2R*$@rIbh!<9^<^mL0Lk+rG z7y%GVJw2ukQ|}5C{s@8!l@@f?W3G^{))X}OTNY*dP!`yK*kA&C*IDdcYs=phkq172 zl;I+!ixKH$=mn9X%L9%Oe|Hf##E4cjDk@BfpcCUH;z))m5B!%Ri3irhN|VJ(Ba8$c zuJ~RTmmMt)WsJ?{hw@(UIzZdC(#W!?aRcQS*!7#jZVTou4o-nHTCV#KW zY{|-Mw5FuLKI&`p5VZ497v}k2IwSz*xk~_eg@|YXgdiBe7i^Rw1OVVq2jF7(tUfX? zU@yP`$Tq>iWdIN^0LUFwkm0NbKnG$70VP5mcZ3H|krHSfJh>oc+L%Xy3~Fr+4_uU| zO=If=0FxHnT})gBzyTr@BEyC04BZ6)rNPT61?B*VqoYN_Fj08eeu*?PY(Kogo#?uj zr7g5DhRsBVNhN#3;zc5fP^%uOjrnh-!L9pc6=7+7%vM9Q$0swq zYndaP)cv+kU8bw-28{y;gN3drVcdBz9t7dhuMmb;CnuK$&m)N*ABg3G{t2zI3h)7JHQ`w$?;Yul6&u ziwZDH8s)Zzzn)e#+n)4aslGibo1Dd}Z!(O7*z|E%ly3M1cAf zYt1z)_kqG1Uqzcg?G&q_?Z#etp>J!{dl`+{7n_4>OPBj?lslX5vp~G+Qn9goge**% z^tY~Wx04^}8SVY?Oi#J%6t%R_-}~zcW<%i-&9}MC=;kgDljiWlR`QBJa-XMfic6|J zaZJ9)>Wb5cGce^ly-Ls2ct8(Y?Re=hf9M( zLo**0tb4TQEc>Otekshx*T)2S*OK9$LOJF>!JUR*2#VMT+3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc b/pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc new file mode 100644 index 0000000000000000000000000000000000000000..f51ffdbd03a43fadbedce302ffa8e5967a30ad59 GIT binary patch literal 30941 zcmeI51yq!6yYFY1p=;6DUCQt4DAB&1V9K%}IV7?2VLBo!ov29;7` zK;jJWhPdA^&v(|^XK&V0x!3*F%`9hK*Zu$fuj|p&kz)n`u+RxnF=92e&IS)ywE$57 zz+C*2r4<0gpdEwhZ|Y0^$;CI|xXKhhSJ^aOw}{T`T6|~omgRWf_uxH&b-{8>HEA1~ z&-69|wFhE}mp3-YR-3*LqL2e@k#{G>0s#O3LVV;S#4#{7BS4VXqbSISQDC9v17D&n zgOG4rb^e(|N#XkWa*vfFZn=GcBF8k-doL zdUxG5y@)$fL5*g@Y-!l5WNh)8$2NB;-b9edZ|fjsv@i|6|Hacd*q)BT9gf!s#8N^o zFTR2t^E=_!A-6rF;MF-&S@bm+)o%-ojN|2L-Mvko47YE_djj%;<7!FE5*~$xnnBM# zvD4-CfD`Gb*t$Q*$iW!>iNGkBX#@~*G;>r!;#^ETPG50A)`8U;Kx930LipKgJ&8EN z4}Ix}b2i@4z^sYWO?3>KdXUR+A0nqhTO;_DEoBn?-d)^39*F#SJCstA;l=B$)66L` z-y!0z=u-gHlaEl@AS(1OCe%6E0<+B2jF|PQv7zlc-e9srY_v`W?`8(T~E+{q`PMBnLfU%6#gJ=NC}oggCw= zLp@Luj++C(jDujty{EujCFD&0Y!rNB0;3go8R!B)ReBJL{L`momkbxoS~Ll^C&T=& zNBEvAAnyWL9lKmLHwr}GkeJJhe9V43g@1Hz*>4MakB;JQYLb}R3bRU_&O9-@yvE57 z`$+1iHh9-kUK20c7%uXLiK(T2@(=rzYRz9P)v8gHj zn?X&<+%uz0y~EFqlzPb{^&9#_pKH@@zy`m@+Zc4`%6AfazFz#A#Pu=0^dpTCk7*^Y zkGauKx98W0)iD7HPSYwz9~;)4&OU0mo%0*M$VX!B7BQ|=+)K5_t%=4ph|rP-$q%mR zWeYLJU77X%o9rU4WX|(t6YZ1rspwal5K*??-Hf!~L#H>yS{5SjOMNG4nRE2-7dZ%W z*<+mMYKHq9y~95UquaYIwJC!9rnA3q(=i9L(F3use#F?Dz5E2C@`TV+mg{Snf+)u7 ztm6|R4OyOyFqwLcJXeyZ9aI+KfI(MG3Jiu|^7sxB-up%dFhV_7bjSMpy|Y}mW(Yht zz&_sLphnp3O|A?$Zr>${{cU#sOT+`S5^)cx_{i!k8ImlS*V^}Icq|!d+x4zI%=9Mo z+2q8Fq-)o1e3;-(WQ(a(UHv$vLjw7~>(Zb?PeJK*rGo2RQ>r!9$&@SfHG|rp^Am=7 z{2!yOT#6plP0LRgd@KEwOzKS(jt!GlYB!Zwa^0Ou#Er7~Oyjpc4RQE)F4HYIZ$?hM zwY?k5^Nr1HLAm*jKn}6t4UQ-jl6>H$&9Bj%3dt~hCj-I*qD6b?| z@w1WHTrcYZLv?PF6ji^xPC@case=QUn^s~}qorD-5ztDd1ei;aa3XT znfkbTx<}5?zK$)dOMI|WV=FrAi&4{i)h3ke<(lRXREIfr``YVU2~$!B>U3L?(9imQ zgD(3~fZSHw2QmM zR__dD;jdJNiQIH{?rr1j?U_ieX%L~>7`@e-HW_{6m>fIO$sS_7h zXD}(s^o{wIv2lZjni{N9xti zpsBI$PT@)sPiL3aHttod#yUqI7m=M-=heK)G1gb;QsW_hm5M(cGZ@P5Q;@yv z?%=*H;;i8ZCv{%YTk9*GZ2DA>URMBhm)M=(SsSC7h?ID#@0AO^V~g)yUAIM!f3N#B z?USoKyS1h>iKYvyjpLtuaBaLxy|%>H=eMWEs2U?hbecu(inw$FhP)#vaQJfh)i%(U zyd$xVXR+2@)tlOIp>R^V+xIo=yKJv$m2W;M_ihup@&i_QSkQmvU`P?V6VbBi7Vw44 ze?@8AP01Iy4fPs0Xfp|(4nBUB)h=-SC zNWKP(e?(Vz#TbW^yAPAe&}D=$)Pt>Dai79T-wh`-&~VGKgfd{=R%%mExn>q7ZxF6q zkNw4!PI!jlR&n`j?kiz_-gx&n2xDf1GKxfIsDmEFBoPp`V#teL!asia&5nvhn?iSh z`&pjfu$D`nu(*PW`@{WZYTN;4%3_y1_LuT*IF{n3ooYB%SN+J4-`=H7VeVP}vRM=3 zYtvooz7IYj$h|h%*XG==Xi2qO`9>EaNc%CBmaS8HhhKr2MH$|(yh(7!=TRhash<8s|ToLA?J<8Nu;VvEW z8_&OAt^UYf@i2a8MT}-pRs87(cNLeF10{kqPE0v^88DHoFjK%mAaOnu$yi+4S7kZ8 ze7}QO_xY>$?y!IwV&jb8Y#|6z1!4!r_^tR;p5WNdYrp$;E1l#$;^rn(om&RTk;dP&WF?<4|WHJ8S*v zw0cYWQ5WA~L9UeK$=F}y@R8NNs{FAFn&`C8ep4axpY@u;mf@}Bheun!>npoi{_86( z(^*G4LuMTNL*M;;T3pt5R}bqxO$9jp&8b7pmgALOsl$Ndp#YbsE!VdSmwsgX9}gXy z9d8{2)=om6MAesJ3N*OnT!Bf1vXwaBsLe{VyV*;g2L8-twZeqS2iL`m)i`8qbRw>^ zBr4g652R|iKPAdsaTm`%8C1PC`}hu)p?tvFyyP>z%V(6ZeKWJX?u(^KS;@AuiPV8j zVtH+erEb}?5og`QUWQF<4 z!p-w7>8&?zi=w6(Ufly1uogR&^(V1effr)r4QaZN>uc>E*jr*J$f-g5XY(D%Y z24bF%f!@6WSLO*G-EDJ0WK4hScx|@#aC%dyWr^wtR(BA$PMW5eQTb*SfN;(JWiCuDtX^v0pu^;ko* zj(;x-8hvbI{0hh1*}y^f4w{>l5N)N(kf$G<)v>gRa9_D$Iux_&S5ysltv-3q}Gcj32C{eK|*Xl4N>eP5MjtrVQ zAGZ=(KoW#vPQWEGQ9F7O_tXQsePYqM+ze>g10Txx?+UV7VD-{6?Z7Y4;8z z-Mk*OXL44*A-Vq>z1h5Wb5DYAf~_=r^^5D|s2obISlYrWcvIJ~3a)e56b|>8+CL5% zgdUizJWd$AY-^*%-BVJYUK&mKK_%avjRI}*0&NP3JmDP$E9xjxUmN$%H_OpgeB@+S zf@PiHX5pJeDb)>v6hvv=?JU6Ql!gW&>?*;_ZnmR~Q@3DsHyvqv`>v!m-4#avZW)80 zot|)4z!uw}x~7^O+YrOog}~8jVRV$^`QWqXnYtl#&pX8pNA(L{p`kk0Y6vHWTu3%eXAs_<3b~Bv1PP-;K#OQ%m79a@x?o?ceP-TJkXh!Gm_&%%ZH1-JYJI(_ zbVn%?JT4E0;2|rI?r9_IogS&d(*2E37=lsdb#qfEt^h8?86RM zQM_S!O$A-v@8xKBFqrQ()NNyxZ*jORjZ95Bvb}a}%KLDawS2d43U1OE_pRy8tGfbd zDV?hC%pVXWS_F%Tb{QGN3H812JARg%fPE;o=5<4OL46n#1aU^ za+MBNk468KlLN(lW==NT3bhNCdym$YlLy7#hg0LUGLwfm@}kz6KW)ZXai!pAz|npn zN9f#^vCq%h#{ay{#Nhsq3QT+pRX-BX8TPNQ-9$r^U2)21uKLmR5jyrRlZ3)Ju3#p%;Y2r-)EP>)KGe% z{LMO@SUL6jaC*MRj&cHZys#LH$sLt@pPl%blC9Os-aMX&7ei%NKg{4~SJQrYE#7(X z%H-3I*8zhZlLa0xHPg3ZYghPIi_C_gl7({jl)JXlG*-AW>DXlxm@P1v9+=B^htEG+ zj|+aBe@Q|oLDIfEsQS@*LQIB|W}h`Z8GrocR9Y!|H#B~6qnm1I0*_~+t~|VvcW3eVLHV68jeog9ita$q2tx#3CqpknkKHY=-m^G5J#;L!yLF86?H@)y z){oIj%sQ#p#ZRMNyh1*IriQI}W}kbm05UGJiVsDM&G6j1Y2L?lL^S=qY5m8t?KRK0 zi$VrNUl!7CycW9mZq&Nz#?qJ7mj=mS9GZ+i;0N4TC|P}GkXYhSV>D87`0NIGiKmCL zN6FALvrPThpI=at9XS|I>Iq;Qq?x?H!M;)5ZP4cYF=O%ts?iIQS5{v-e%yLZ`!PxK z1&xoT&(1r%Nc@j+O)rRitZeaI1?%QhI_^`?@+#KjCc7Dx!Bxr(ecMMSvK;F{AMPlY z&1o37AVR6?u|@9aJI^{#yaQ#K`u3cUgk8QJ>xi`J@3;YQqoZ!GNPXY_?7g6t4aR^4 z)}zk;NFK1E0ag@pYi}NeLB(L^%T6rIe!WejD6Hu{2Ko~dUuO3<<>jy{UJNn|;>(t(0pPRo=Kp-k~&CT4}Rp06eb7zSykZAypVx z4UHd0VaQi|GZNlrT%6V8Y}a|-o@pR;pnI!5d5=P9(X zN;ydsg0OTdWFJSzhj=}fWlvNYW3PFkPq~m&jURALd?A0U=LW^&NPZ4|8LBi|}>nM(3Q-MuHt z_fVmOj(+7L`!X)uc~dK_O#z5k&}A~e;qv|b=`9N2)ey~dk+5IuTqGnpzaO+XzaJd@ zWk1M&em`hNRfVZGZk!f{scvnV5a{x5Vj?ZFuZ0gO|eQq^QKEyZ);=9 z-0P+D`#~2@OQ+`(I(h{K?B965|>Yr58 zfl?pO?FJ9*KI}|6e+`)&SA12a>51?E0{w^Oqn(kguj!H!uqUnkcyrR|Vjmg$I^SfL zUJEqK=ou5Msq&&+f>-wqtiZ}O)-y6&8$|QmJXn?%-{Bv-+Iy|=bGyeynjpU_8TY!l zCI(93^c zO+BCP*3|7%_G}ogFMCYA`O;J+;!Wm~@9Xl-c`6F{bdykk-14eT3Q`5xBqghFCz+Z_ zyGJL22sH9-r-9DUW^YRS1uFV>5H-(?p+i@B{1#`jq2DSs52CYw7`1Guw&X&;IR>nY z9JRU}=1uRhwiH4)(T`^z9gSokq)VaG=M-!H5(lx*$3aun7L&mLWF##AB@&7|R?lwX4ZIbobw0&;pS-FYMuKGyL z+jQA;LHkhJImas14%vaD}IpZF_j1rx>JioY_(BCo?jvTdcq`XgT%PdS}Bfm@7O za{JnUG!n+1kA(5(BH=XSzY+;Io%biVrqxb^+t1POw|!s#ii?XzK=E0`I*oH#L(@|k zzi;y-3#Z-on|zMHzuc7?|7|ZgwLbadFQ)u7j}LwJKixdpMXsWYp2j}^>7ju2tfSgg zWF(b3+C84jIx@Qp0HAS6W=Q5sK9{VPY?N%5d?z_9Ie{i5`BidD@&F2k;y{U^)KDfU zCz>Sm3iK*e4SF4F47G;dg1SR}Ws%zLFjD*m5X);1Fg&O3=#kN>e+bEd^xe2iLxlDh55A1LBaJCo^Ey6$(HpLwPcCNy;^q5Cx+i zE3_|rW*t;0#t;}-BE7uVhfl343jpc|Y3=pkQ*)dRN9NIYgicQ=((^a}KyZ59OGSTk z2?j94l?0=7eJ`jt3WD$o;u!{02l-8S?SvD7g#O_-cs| zX;?|1-Q+5W5xEc};6QE12Jjw|k66x`6Pd?C7ElJF2(!%7l-r^rGeZcv16on6D^Msd zSHmu;5fY__rihXUl3x{2z8!}41)Yh-_NfgE2tf1`bdf`j>9Ej{h~$KlkRU0E3LZvX z{a{;_HDt4PVR!AC7J%=+V3dMt6v%cUl9jNX=;}?l!RAOXL5@<~93wOl_EEJfXHSJMrTo7%_Z_I(05;Nd%Zu@9nS)d>} z7dr-L3x;#T7LZc^fKt70aGlR9zu={76z6e?>mIn}!)Aql@Q8qf_#cF&jYd@zr0^4v z5WlM2ONKKjq=f;-490X-Cy58E>qIXDm|`EZLx#;?%5VuvSzsRVc?G8LYB-zTe8oc6 zg7}W8r;l-P`YhyUoWjCrA(HWEorSw~`wqeR12ffN zx_)u)){zy;KMo+s2>pSB0Kg~ax8wqMKd|vpq9KM_TbJBgGJG!eXzgY5&01D`J(bdS)8;)rM=zr3!WvuiZ+(Z_Jf3N9aOiJkVHucqCSlUf68eynO#9IH}n*cj(v&m%M_ z<4}+{tjt<$)|mJUa({asxy%28+)Ms0a$jfv1-U={1-W1U&ms4?Q9Ja~kr>0m%xY5! zM-ak*s-{HPUc_lMNaKrYCNO(hsY`r7RBo-;Ae??;X?+F`@kb=zd#LzLeHd1i;H^dbJq_m>UnMu!>|AMSNkWIE70(pKF z0>1p}dP}u?@Z)m#&~7@psRd`(hekL{>}Sf!4&|eVPcn&4rID%r16VQBsWjq;ltx~j zsU$)l4q#6hPiH-n4`5}ir^9WhKV%fAn7tqeg;TNS-q-J-FQ{jCzP5m_rz#7dM)ju! z036wz$iu9|V697LR4L>D?>`g}_{@m{!huvkWYbO+5VJQpCklvw^@#%Vi9GB?0ZG9O zEvT0b*#;3N2n_Kj0Dl)iPRyp#KbuYQJb0int2ACmg6NpOjiZh0FZBVS^ZLiBS2X$pN!xplWXT%#~<#X3_Y~mDupj6G@}G zovw1k_2dugq~;SKof>gHXNanBwK22ylvORVn9bZo3uibYZaJP?*_+WU@vSi-DY>It z>REEH#!Lv?YNRZt#M#4eyX3orfr;K+o0)paCx@Fr)hg8_3x-Hi3e}Ie=~YIx=Bl-p zUbmXOJA}UmIgK%Xs${LT?f+VVh@~03EP*sGqsA)7JL{q&CR>=gSY;rxABI!=$lQg9vg0!JH5a#`#E|Fj=-Rm5-P; z6=ak%^wp8&p&Fu?O|GP9Gnb;;bW=#pum!-e;i}!z7zFLHoXtUE6KyC-6~Ph)uO)Ah zPPdWLTA*uI+_x2lVN;V$w_;AgIZ@(BlP@vK5^yz>Mbs0x-M&m}$*wo6@tp4HlJKm| zt&Y5p5v-MD{j(L`j4`vi8Au&lRXVqzpq+C{MX8N2PFRQAsX&&az+qG4SwYjV5_)lB zj1FsNzW-*Ppc0w=g)w$2!H2Gj%Te6lmkA{MgVh zxTc{}o{=ciz?OGMmt`Y}zeKuVq@brrOP)gy6Eb2v`7F%vS>nrUjvnR0FfF6)BG1pt zt0Pt7MPwe}X7OSXkI_H{N7d+ooN(!<1WtJi<9e5?bxNu;^+NOw^JuE|Bpfn52k)&u z$A!x|4VR7QPgdVR7cb@UK=aLKr1^4ndby!_1iNYY@t8|n#k^*8;Pdl@5hGhifXnlNmxc)* zRX3?VRyS2sY>b|FWMrOqWFQ7;zeY-q>-)TL+4%~%k0vD+1)`ZZzb{I~>Q$=&^!Mbj zT^jD2vd;(^5?3q)wTL#kd7>@NM)i%PW~2*99ZVKX7QC&0FVF26@#?nQXURHO)w(-g z&Z}#aos#uj+s&UESnIs{*WjiT15Gc9wu2;m{zHBY66XzI9pZt6Z<{0(iDDp+5D!^> zUsvJ#j+^bjdoATRZ*}60+O6}SN%oib`}+6&7};oQ01z=e5S&hrUn?WyM|!=LyC}r( zJ@|eOZj6j#6W)I`Mf#OO{a^Yq)UR}Y@P6pBnGqPOk%TGNq<)@4aTh1}4Km%&SC3In z&WO+2=c9;MXM9Blz)Cf0b0?r)(YUNdHxq^U&n#q`|{%2sO{3N zZtgL}KXNCW8YWIi)vJ{N>NAtY$=GY?SoFj;cziSP`>7|fsqom;=MS&Oi6?OvX|Pyb z;SoBsGS=<>*zup93V5}8_(Aw@P91NZxe$@g#N88K6|moR>Qp=~57-X?up{%OzW`5- zze62z)R}xwO4c5{V2ofQQQ4=ip>>fP^wEil@=R_AT}~S$`up0LKyWTr_x9W3EfXYb z(nhsznggYuP5O7Dk48YG>7NDq)@}ybuz(>CIOxs1G^Bn_V|xBfV_MoVPSTi3Uug=3 z9JO2s5jqGW5L7{|Z&%?jZF%RW;Z6R3g5wnJw|!NDY$m~aibVg4<9wI`LW)a%&trb8 zc}W2t-lu26kQK*Oh3phr0Z9Ro3OOk`xli#zKyBoQU5!BJHOafHsm(9Ne<^G^Sba2| z#k`uTH$?hpT5?+1asxcyle$=DN1-Nm2y}5En)Gt`)Omhn?hF989YTe?QmCF1zWf zs&!MQ{_iuM4?<89YrqA(xu_Jms1&)V6uGDrxu_KRUt1}10dFEN;LYEPf*0@xSugpo z`C~8O%|BNuashAt8&-;3z?*Z~Lok9Nt|%o@a4?U$M`>X8n6TQjq`kCHP{5$kUD(!fU zj|lN`0|5sg0?__~-t-m#A#*WE)#K+*ALM`_yI>1!PDb;wFp_85%-$?TB`~Dyp+u;a z8AR9#r*lKEzq>mxq?o~_%Z==p@jHwmj|!ceYVp5DDWrJNU2D7yqxr+tN{-pD>1ula z3z6EFGGb(xir<@q;2@iW95=S~BM*${{&YYjX#?LM01&9Ona{N3SAn&`;oqBhxEl$~ ziw^p`@0z~#d`ylTGT_dehN}=6jN~@IBOHQjgeXSA`a2m!=70-;a{+KJ0L}%#xd1r- zY(eb>!1ovcFhmq_@Kr}49C3VH+fn&=#dc$z$Nvkwb>!!)wwDfj-~ZsBwb^AmqgiNO#2? zQ~~Y3q%%KLoSvU4PTIdoaR$da3%DSgu=vtxWl?L5#JXdqvqCm{lSj<#jDx%uM0x`O`T7{N^+46Geq7`V@VWDc>p=v47 zn4kRX#BaT=P|Mzye2B*z2*fc5fbR0~8lw~*x``X3tbfTPq(M%R!vJXCEX@&k9_edd zn2l|h9^xIP2k~f~pq)z05NYyo3sxnTdIo;vZ)V~Q!Y-+HHjl%7E)OOU;WfcXQK`R_ z8x|Zaf9Hb@YC|1@6yg0)zeMPL6#mH*4=C*3H8qhe(eL-^GoMW%pXg6vr>5&%d^w1{ zFKiQtcDghe52zL#ep_mc5HX+13q`K!WEx=uOM-nLX&~fPW&7#!3g*}=4*DpE3tV46 z(j5R~0RSIM@BW=PY_!P4OOQ7WO<@RTM@i3sOYPh2)xR0$dr?_)(Z(zGZ^=>@ZM-hp zcwMydx@hC|KfaCE1)RBnGk-G*Uci|PIP+5*uZzl>|KV-CF5t{5oH^NzpR_GGg){A@ z5U>nH2I7ttX>OI5h_sH5nAw08l@fu@GZ@9xa1xDJ4(Nvs+CDV1+s`cYk-b2SQHcwp z6h|tY&c?lSyFu!TCq*(j%WDlm9-3!EmxiE`P-Kq~UFprnMYJTJlg1^$jxX_41{p|} zgSVJ*v93Y&G%WrM_1w4$l{t;r5--S@uVo|+0%G7e*@1v zJT0y{?gal?TqE&s;hBV|CPysH8=I|D?1iBqasU@n&LBRMGjLBDv7=!GX+e_3v;Zu> zJ3z_9w25P!2WWVY1((tG(2C@(T9as0f@Sq%W6^>batxj{mbFUEa=W^h4%-U^ zqLG^i2ED*Bo;s{^DUna3Eo6Czj>8KBfN&n$&?mz_Ht98h*RpL=@R8>sPiA{7%^ftk zDV_`~g@*ol`8=WM`l|+Tc(3?xS0CHM(b)iis*0wRTYL{ILy`SDe5AJAzw=)}my4>I zi>jH6s+o(bnSZ*A=>>H8p9)=?PoPVAt0%GIABowPLabP$7UYHNZx{kE+3H? z#gTX=cekJrCv6Uba7 z=Wf*+Ei=dON1&G$eAFGm+6>f|&=|J`K9Q>Q;BB7ngG%ww0BEzed6m%VB z{dTr0yfMAa_awLB-N;7sSNRjJ%nYO5ca04Im~>;Ju-8<&g1Lx^3GpsnjeNYq$!yZ& zSca2wyw3?)<7!5QX3f^{Cng;t;783)_+D4$U)|061EA1zf(%hQs`m-&+avzqC!sdd zs^+xi&EW8N7;~T#07MRxHbAQe8(;fDxUA81?nJn*-zBafPvx^)1|17-pmGBMMt&cM zJhzc#1;jsyByK*e+f#Fv7sW0YJmqgHNf$ijf~Q>Ylnb76!BhVA*P>i>D!b@Z_OHJj z?4ncIf4A47Ty!e?-}YLR3!ZYyQ%*WUobVJa;;@;w1jdvH+#dX$Y%%hiT z8SS*)`}-g9BCg)ZQWj)$c<54N(2nwHOS1j#yP#Ld?j0w$@VE==l|7v`9B&MYJ|#1< z4n0l&y=3Fbf8EU^_*9l?Y5j?tVE;8Y!9fAc4K7(Oq-ayZV9Z$gjR?`&X~fZyZ4<1^ zwr;mi6@>L26jmxK3iag2KA3(3CI;B2z|lu}Tk-QPx(ejduYsY?KKq9zyaunqQaRxT zMk&a*MZpB!i$a8ue+Atq_J*jLHZ~2vH-9lT$EEo!jawBUfOX5a?LluiNef=nCIlh6 z3M(cd#h@yiwjwrYNFzwc{kfXPFBI1{nIG%Q8p!^=T;0rIua&9?DwH-QVC@NxRtml~ zNJ7z;H4nC`MQ=tn8LT|S9gskXK$x#TzSbsfVH%5ii#i&JGQ<^ROyfTHv&tep`aLkp zcHyHFCyWXP09*$G=FCZ#eY)5>-nMe`hG|mAFCObdkIVLhl;dqzWm5+zfNz;rd>4e$Ml})U{+;AP`(E5)AB6HI2Oo z_gOU{(ZEFpTucb+7YRA=q!JPKyR4*+w{Gy@ik^o1!DztUtho54=%ug;=psRsQYbnJ z1@?VbB{=rG{;0U65Hxsj$XriSG;pIi@M`3^P~gQ#h=7+8!N%@*0EPROL4ntx!o^HR zdyPH;1)jRk3Ot3Zg2LZ-15YudL1sFh5L9VU`f1`%dz#Y-V9Q}B>P*1dXg+$`E_Mn? z59u*0-hwojzpNA;=CXHqtD{=iDe(~tp9cYf;*Xjo(T*4jO=+Kse;L9#3spj{mDp(EJgjE<{*pVE z#+VAIH-+n?L3!6C!myspxG2^5A`iY?>D>tYduh4luL?IG!}30H3RaWL!IvcasY4g< z&S6R<%pX;BsV0j;5sxPtzA*&qPSr$0Io1#r?P~GWF5MQD3^E%d%vPhNe{dnrErE9trY5Q z5_)OU)S3`PFPc%D$qT^|^ip-e!gW9kpQn)cl09oL=a)ahy4VQkSai0d394 zMD9HHu5aC3Y>VzDDpvgXalGICakx2;k;Gh>+W%Q2BPjt7*{{d&b&O27ElJ#=r@H+p>a@xvi&B`@ zC>i@1wcY<%2GFFN5uS^wCL1%;9pmvkt@5bX-LLo26~1SZHFG=8cidN7IgD&{YM$$B zXZAlgoYDDIb+I6G! ztS(wTZo^yx7h}C|@0_L|%yjdMavJ6_xtOtjt8Jh`TG-a}285h`YZ7cv%{5-AUm2-i zuFhE&2}5pRV{Ea8Gja zNX3Y?d!x{%m)#oE;K_2N%l1d2O+Wgz6Os!;?uM@HO;uXPFg6A>Jh?9Hwc!&_;7U(O zoMcW;`^gJq&kS2XAyt#%j`Nc)#!j(6l3qh?au?EKk3)sc=uH`3BgAu0?;?Waf<2aX z@x4#OC$9|=JI%vfobN(Q5!V+^#vutBj=?Rjv-2GzYaSL2yHCwaQFNIx>N3mh+w~#t z%S?1{FNpcaJCnF9a1sX4ztt@Dk8mb6$5F`7e;89Eilcb(Sd0Aow5;P{M zADv_{t$EWSWt_0Z0ciy4^rtAlJ$Z3X3VCK-cg^9F1nGsdgTp+4-06VG>$bk37wPeV*#bbA=iXwB*=Ky#fv80mT;tJS=cb-quB z)!D38HQns^0<_W&`I+acJ*tl#lUmoc%n@}WExBq-K?zfOWlhRu=!p{r<(<@9Nk!|L zb4w8elAEgZO92vJblh6()&n8QmFD-Q;f%h-($o)API4f|pUQ}yPZ76jLMxcKFEWqu zrJ0Ig!dw348PeBJs9Y_ZYFmu)Ef}EGZ@VZ@ufAssYQRS1u?ra%HKu zn=&wf_o$93x?Ny5?6?h=YOU{azFb9w&{bR8PgR_s-uFE!C=sHb@3{6UrTY1eM zvEj#sp}Dau10TE5$!$)ato0^GAG|a=+1+!oq@VydGoQoSydCM1nM-)Vnl`IJhT^F)db3>BbPF|R8$m%caEygNJlej6mGWu<{o3<}N z^tH~dWQiN*M9%p|OO!-+-c{2*k>g$|G#rXF{5(_I)9XTD=A3nFNn~zxd7zuRG(bqJ zT5`2iI^Z!(|CJm$D_0G|FZ?wq}9SrvfWeYMzPJ-vl8DPR4h1nOi{Q`>a?-C&&+MrO0H&kEQ#)nuC!%rBaJqjE7l`C7RMnl zf#pjG7>^Vhp+XMMS7U0jwsGkr)7~OoSgQ8eJxI!%Z4}b1!9%Cwo(c`oc-;?< zA||cGJ|ldLXvhDn1@#e4D(z;j=P7P&8g?myBJu_|{nsaG2%1@rKC??(_CCsKbRMSVJKC&xVe#v;K zXyUIG7k)EhY*O$f-FEziNv^C`x1Q8{`Klq7XSc$?O`fK0Q5H$>ys2b2xv4-L%ZVe) zAPpmx5oGq*^dw7VKGGc)JFO^7nCi9}dUI6n&W+>HlF%^y`lVIkNBp79-F&8eF;DQ# zM>Riwz4rLXrAOM`T$y=p9b!i>J*Lg$5;b#kYPdTc9u(p(XjXL1(C1W{;mb#-^i%4t zsdhYeEFX9o;8^mq7AD#pCTd-mSD`N%7ss?m}B3rt8XQcI+^P2VK_7 zGJxOa3Nk9%mZJT)mt5y2R}$RkCMyOLwzXP~IM!Pi-CQc{=2m95ih2h;Z2sk?E%l0> z$rZ^hkDXQzyY!0lOBv%EiS9eCJ4QQ8JCNDkBTqDMs*MU0DL#^B@IcX2gtxBM+k3U?p!m~> z=Du)Ka(ajR)igtb`VBTE-X}_|7W{uX191+YfzF@!#YTxkZdci@#tnY0sWw{kA6z(9 zF;2aWDB290BTrlq_ZY+4t_sA=IgqUrh%QNfg}2a z{F@9Djjz)}WD^eS6N-i^27O=6V%*kY>pz`>dRMWm_Y|S)ab6aDA~iTC}(IwjGKUm1)>HOt>exJH;uvFE=! zPl>^yP>HK4s6;q$4ma&QhgnA3dqc~IJ}nZP29pmXT27doX>h%Nsvn8tn)H!F!_x5? zAIJGHe{FQRQ#wUfPok{SX4`c8GYNRwE{H#mzoX@{{DSby8CtJwwJ)mir;?g`v`ht_ zGKN~p^;(n^w&P~CaoA+EBW1Ii33W4R3)Dm{%bYqe=fec8bZlB|dtN}}JEBFP5*=Z| zj>&dQ`h#Em48I(4PU~DZv|I8S_;R+aRsGJ+Jds-y_xe+#WyOj~e)Rj8*~C5!GT2Ps zf^28?&U>nB`Z+z7cWFz4S~U|pBKg}=_>VSW1WouKzbVVVT9p)(R!P|;g(-MHm-ohe znkHqMCY5N4;5T_wnm}?_vzxW`6G3^rloX}{FKc1tf>%jn@=FA$NMqlnC`-yH8^enCDZo-Vel9kI3P=Z+4J z7cI|U#B2M|zo-?tnB|x+GIeFSqx&c#q{G4>tN!(+Jc4UIhCTSJ_40y+56W^KT&yXa zEOn4U2E8tCa*3R*>1P@wy2YKXiN4j)&18yq3%NLX%K9YI)_M`%$dQc;iV)SyfK&)a z<~dlI&j~bCTE?fedKtwJoEG(QxGq2bMWx}>Bl_*hmf03F{fT1~Z-8;}^m2k;ahhI< zXpz&3?Fe08)SbbZzKGs8=C|y*XD1B%AFLy`X3(7xDP?JO&P}p(->_NkloT!FW-oEr zjkgaBShH1Im!)*xX3bt{7(g17hJ7uoe{-83Gp1Jgqw#&>NE2@%;W|BiB#DmmJ?k&B zeMqS70`aX?Ul-R}=rS?g!V&e=yXN$(`4vl#?s=@etCIlc(OJ8#D()@+JD)qZ2kbU| z2G`G4EULS!yA2a<*U)ceO0Efc5K2j%%Z48JM8mb0@>UJS>Ph;-m9zk5kpYDMrwQ3!Xl1 za6{?N7aKn2=qoBO>mT-opT=NU-HPL5SJCu87jD}k)}LNe?a{)~pXT^l{qa(0;Uw?O zQ=?W1@eJ8Jigin|YLh49>Dgr>SWK{)?;Fd!yE_^(7v}vi^_Zwugt+B9ul$g?h~PK{ z^@b}96nx<)9??lMIAHRL=v`I87t7{0Cd|({eHc4+v32Tm?4{~ccRqGpDZ4a2 zHS_vn^psVZUMG>qrO%l&&o4%1S{3NEXKp>eM49R2sPCBB`rIg9r~1pQqZHd#y8RdU z@h-+1yu!!3l>hExm2FpC|0@i=S4ZBMPStE&tETIU5`RVOa@pnEN5TN2uCTIKq%NlB zg!TeOqcJu2Xoh*t7867}=)FWLz0`Gm+uoO8T@3BKarWhintsKqFLg1V&1>|`?R=d0A7`js_dV?8l&Ww}|UIN_JagH2&V_OdE}RX2vwQ$Sw^Q|3Oy-75FWCjVxP%(W+J)HG&X#J z^R3p4xA7MxH?^<5_383In@g&GnLDAzWT8yhU*|gT4~Y})K~xk;-m0_=N!0!Pup5>q zj%cxK2hgYK_9zG44Aq@vb8J!kl9)gtspxYvOn#C>wM8}7jG;9k(MUwE#{Xi(e- zy^@QY$8^4`z5HC(_YSKhGUP_ls`l*XJh68aw;yJIWgZ-*(!kB)JR%06KPTlAd7KmZoVxV>64k>1J`No-^f~rC-1E8y&jWF_Q!htCG(WXx z={`-w4 zP8pbFE%R7?BQZhY)|S1VI=DoICg-Dm=p<~^ICK&oIsAFhXhfZd zqtc@v8;GNN<#L3l-N(MZSieiLktO`r4t9dbg6`O}dHm{E?Gip!md1=!k01U#XvcZk z=0#tRuAjDUWI>+Qe0=*wtjvPo3+C}J21N9rs{==~GEZF+vM%&e?Nxq^CfRl9)8M8> z=eGgdX`lX{vv2a$orv6DVQpLv`PQB={a9QS@uaema6}60bQfboZGC*!IZvaw_uZ!p z@@^d+N9H#)Pa?9_=HlWjON3J#9FLEWeI(kkx4bpU$8|F-zySEC5_hNWWM4~GU%cKM zm!j)Lw#sEwPP6qh{duCr6t`|>&&rjJH_**n*iG`%Ers3_mw8|~Yg1NrqU?R|?Si6} zqwnW+=O!En>Zi)`gq$htQeEx7+71Mwr7w{1Q(l%civd)S1#*hww?_tsV(-ukt$OOY zzNM8IqDx$ma`QavT0>es)Qu%E!|gUUlqm5vSA8=maYL_SUb!M!;;XgCoX~cq-B!xr z8f!&{!~)jNaL9Ih;^t#XEQX|K>c5j&Q_?g|>-q~X`X#k!B5(K>!)*;I>lBUO=5@i9#V*J&2<<#Yel zlQ8t~Nf>_UBphV=pPYmXw(I>%gDR-Q?cnM6{c~UAq@A6b$Fsc?3w6#Vvxo6gh(>j5_<=2U|Vn61EE;0Vk>C z_R7vk!nV#tQ^HjOka^p3ajJ~T&X`o5jH^{`IsG6Ap7To4%q&bG_tmmD z19lbBWjIWE5Yv*$4!`7HUZqZ))oXlT2>MHOe-r*h?bZTT7~KAd#!~MlBI`KnVcVNc zgxeWx1|&z)s^FT}D`wQyxRo`pI&ZiXLR+et<}0)@4E2!LY0XQA5p}~uQ+H%P2|9K$ zE?dtzcVymi*tP)@s||vBhn_resb;AQ_D7?eP58W9vB}L@*sr~~=Iw2d9e(7YUgb9l z4?{+@nU&%qII+2e!rb4o% zBBMfsFNL4{D79F|XBkQo!QaXkfkyhBgzN$kIpC$VMlYK}pp4gp2_3o7f=OLG^k)+4 z*_u|is~*zgQ9>ypya%}k3SOiRLgB&R0C;h1SAYlpW&n86oZnq_W?Gh9DW1bP4W0u} zrcNykGcu5cpTNq=37pFs60I>n-tf=XE1c4aAwtmyO12M(W!e=xBb|P}3#vaaN^y7@ zc)VxHsra-HFOoO`c#-f}naiu%o^TOuSAvH?&8QSu<{#0Oz=CHuey|H978?X|PMBv< z*@Vm#$H%9Uh(>=QAOa0Cyf=T1wC#!QdV9*lcd@@G>Y1(u21hb)XlFTP-YGCTg=gCSGC0%L zQFl3W(dweXg^?;F)y!V2t7yu3%26ha0pwK5T?CKw^a_oY3oqZgR_0tIIPGQA&D5LA zT4>%lowFKB+kHY5&>Y7gXC0J-FS)*DBy{lYwLe|-8TaMr^O`#SPctQVMeR@C8Wov+ zyBwfi?DfW;>DK4tv$dbZkdrkTV%4&A3o)#lcN^DE8i{Q_%3h^4T68HNB69sezh=7- zJGaI!IZxp}n!WjUaP{_rpA??57jcp&j1z9cG#bF|C&T8a)P+-!LveJ7AwMA7S4|kF z%$^)`=vZKtb_}@@9zQJfuJSu6Y~r_}oaJ7p`>N2Ra=b+^R*PRFAFLv$Gr>~Nxo0kn zz@woUti%~Wa-xTkM;~L7AwF47@t~O4;ra>k%j_41)n3qV9}^swzE+dc^?)^(qH#FK znJIW!I}Z5Yv$9ljTH0IA0VRbhk}yFnE}JwNjx?(UwdZMNZ3YWmy4g-UtFu4UdiF40>x4_1CZW>CyiVFVHKpvhNa+%` zlpET|=e_tcrPA8d-apki!*Kxz-mc&O+)wv;3PG1yf+Ve!7QaHlO zMV<*cc6iELD+e_v-IYowwn_QibKyk3R%U+u1s@&V6x#d?qE_)vEq7*K5Fll3+Ftgg z_UB*163OCr#B@z%qMec(oG2-8$17_)F`sKV5y`6O{ zy%C$?sH;CEj97+}IA8j7PcpjeU5E}yPodeGAq^H%6avX7Vw(s)A@O_! zqcBaSpj2H9wYBdmqvcH=%9~9G_H0_9y>DX8Y=f{>8ALB1QcF9881n3dvsi0*?pUa+{2VS9SEfzHNGcI&3da)4b9z&!+gB0VZbuZ zr&Z)^2DC!B%)tqBd^oV7{ZZUwe#y=LwEnaY#Z70poF2Tn?(jvt$X>bV#x2{K+5TGb z;*-ney(O$gw;E@WhJDRtuSu7^L|y(%GQ$zud9ET+D>PGyM;JUU$rP4*)r9`!xh)F2)7pId#S3yiqUc53G2L6 z;mUl+JFh6&*2j52Om|m$M3&B0!w$%b3AU}nLxYx)vn?Y18LD>_dY59H+PEzf6IvO| zQcdruW-LWDgpfU{59fiA$nh{1sF|3onI5UW%M~CDLpDDJ86+n|JEfxeoL3Ywx(aSO49I zZj=tNJAl2J^q|?(4t5{kYTXguRl|3#dM=`*_Ogr}Lzf>C#IDr70_X-alia8F6#Al- zjc@LQ10HW?wmJp><)xjaJ#ifn+*fwTFpu>*lmNez?Xm6wVF%`}-^^2Q4^At0GOkm; zxi*Zg7DlX(EX>IxCTyWZ_h+CiIX$~5J@9S)`NwVzW1dtkZt;vOb2IM_wP< zK=1?Okn<6^L8cp$78?y#ihG~y7QW=qDFr#SJEc4arWBc2)RbcM343=+>35Z)l21~3 z4kyKe!uh>&$aE}n+@&txxT<@V53)a{?D{nhO({6;hUhe`V*_7|O7CkX*=;=x9)J9o zDFx-enAm{Aqq1R#EfxYMo@y69P|~7Z@_NQ9%>X|V#YHD z?0)v6TbTzY62#gOo57P5pGqsr4_N@j?wtch5+~e4BS}fW0}joU=){PNt_k=)0u(8bn1NSUObhk0IJ_%4Um_SN@u54ODh?Q=1l(-JFt^rz z&r;G^OfFVe)wr`kTdPTGxo*>#Wi{h1YE?fy2{&-^v(M?Eca8`g@FYYdTyD=PCB@Pn zy7By!xeoerG5x+11eZVba-I=^15RIy80w1=9!FQvxgZeHuVUkfFio+em{(H0fN`oX z?oHf1G|;;MdKWzN*FM ztM$}3UIWkN2{EC}n^mO=--sqWa}zjM6`Q`FZ=RCIQVD}JNR6FaqwmT>foz6?BG);3D1#B4SJ}o_bgQDnQm#Al_mG77HO-r zlr}}yXGW!wzc~0l9Cq-X`NhGP`9}xe9Q!X0zVu%leCPjr4!*Ds3#_d6VBL)Pd_z%d z=;}r4f=oe6A)5{_wJGIzw8RO8I+13fJ1^~)Ww@7dOaBeA={)4%Ge7L$JN^G5vC+#r z6>`-);p2eVW1ri+b@x1C{(tLTSV#QgU1<6*dKZ`udlz{BXWoS`fA%i?!fYgn{;Q(G z|6cC`edr-#Bgyri zkXzy1a7WyhfuO$fY8yNi-TZ*OKD=@iQ>R$T2I9x_*9>tzl0dOkXyn_MuWhZtxiOq zl01_*!OWnWhyF-$h4UyLxNXAF*S+V3#TuE22?AS{eOawrtLITRpY@~Nr4B_GI_4Ei@j)d2aD zLmURBaD2^&!ILHcg@bKZO!yum!SpnF4P891Wl&FsFGb+(n>vTKh zqLq+aqW9KN(GpLuQv6gB%7lzG*xlY!-=&8sfq5*M-M3YS<0}P#JL)FfYc)!Rp#=B@ zoCt)He20{CRbkFGV+IxEuzE4J{K(@z%qwd4)WxoqykFg@oED^aBHEp;88%ZBqWpcq}SvYMJ*O8Y~tya zgKvTJQF}z$n4Ny_PJ}o?93XMHw9~mE+u7&{pnNTe%5kj1#v&ix(f&%2T^+EOE{I1)31rdUxv6M_k*J6z-uX&xPNWIDY#q zO35I^kjwrgq0xpC6_9{-F)#x)`*G)p3H?(#l>TAQ#;_ju&Fd-)N|yj$WXgu#f>j2Y zflgn(Cvc#Ar!cW?bNzc8DY zol_H}Gbfwq-AQ-rKV1vKJS=jc%%g#*+{%fG{0cL5`iKM+wQG9whGu_04Xy2W2o2q#E`n9^5=drb_#QVmF-3pAPH*Igu zqbJ^?I&{$^E}CRjFk`<60#&Nf+3CRUHE2S+cO%g7zyk&Rrlw-^LV?%6`%MJ~oogB) zpx+eqn}U8*&~FO*P5;waE9f`<-|9E*J2<7>WRZr$%ySz_E<8GZqK}YDy7>M3_X4ba zM^}c_=VOgHxqWu&3k)6iez3Vx&DS0rfB;D`BzanRY(e#yc1IvDPxR!OMWSgS9J6=h zg+(G7R@6KLBZHS^pWga)XgTOTtvl#Fee!R;r@u!tGt1tOngBF2H~gs|`CBwo?vDYV zEd!<&x;-@Gqd|tzw4Iuqj9xDskKy;y)W_D#6ph?j^Rseccoy@HF(w-W zBaWg%)n(>B$x2@JI#y>px}kScL?3|^E$~X}oI{vm`>==%L*olE3wErrsDil(M9lwu z$14b-3I?s`C@-4qd3btK;&IMO$uUCv#esU8qsgE-bLHKX+OW_wIgbW z$9I0){GpJulkr1J0-xraJh24w0p88w_W8^-CvnayR}r5hLD%y(C)$A(!6M7ug?WH4 zue1=M{DhfurqV)DSeGO?$~T2&=sr7+*U8Fzof&ulQUgo4z!EO7gbOU;0!z5S60U!0 z2^VN&4FHX-|H|tCQXY`<94O%ejjaF6OSnMFgQ7en-zkq293m~5gA>3@DhG#EHj!Ve zdiUkXbX8K&dB)C!V@4omjJb z$_fqc)Am9Mh97?jN~nVUDwrIgJVppqrO2L__3u$0hru}E1t6?Dw;A>=o15yll_Kw| zG2pLy0KMest~!y6v#XbAIKpo6`1#4)8Mi0Tdl+fV(kNnfT;YfUYbt>}4bzT*o#%(L zdqUG|X0z2{UUV`0!U7der%qtwvyXShv+b0H`I?~o-$jb!OhXGe?t>wVq}N_^98^&v z!sb_DbBQyXu|hN+K-Yrb;Rys*uP<=!)oZY<@tA5D@2^ zL9p>2#Y~wr7N8C!#{iUteUX7Fof%dL>r~Ta{RomEuz3V*9s!$2z~+(vxZo5dLH}Dx z(C`5gl=@Ca&GZDeH^U`_C7(Anjpb!iGe$0ZWo%;GppAJ}=TD8|YC$N(LxX$2Fji*v zI@kUPl@Hiq%z#F>yS?sx!DD zEynsjzDMp$-^Aas9DoR|B$V2InED`kcv!KIGc6PU@S^u?y7)aNs5lRlEHo70L?(PB zvx!L4i|&cY+?cAv6VZzL{GB1HFP#CWZj=4AnV`PNDc zLCTZ3^2${|kdTTZ_B9;a%~GsBlEC01$v^X;9S_?Fd>{`3m$HQZDLaMGZRX`@z0zu&v7i3Bfb(i{vbZri-_{~(>HJf!VM{* zpTsAhXbEV42MR}o0VAvJ20{bA|02t|M1iMKFz8^Zi64@OE1K%qG2PVnF$aBkh z$)@9x5}P=Nizyz*7(Ee+QcLQU{jK8xC?y3S_x{*xNexWY_W(*wXy>-2rckO1(4CU+ z@z+w5gCOYR0T9&q=Teh@5%+Onb)VX(+wGSB9Mzwtf`9^GKF8zCR$_N_G)tr9uG`&?+LY#Q3T zR}%+>tAUtw7xn%$WuuP!r%L*Y_XkN(*xw{U@r&8%_t4?N*;!A&M5F(%v8}5B6MlCR zm~O~-NzfAll+7i^+bkx<H+KRf6h z8dIooDwID~XL7V?3eKJVIj8JKafa4#zL|Qsc|Y6oJ&1<{NfHu~5(J&BUF{Y2SCX)< z?jaeWsCgohJ=4uD1kz|N_EsB$afOC)qNANT2h%F?4pJ`l3}t^&)1!p_)(*k4nNb4O z9}w+h^tTedZ-lkQ^t#ibpiA|PxQ)!O1jw0QeQJ(GY-#*Qzy}B?#RU0U;uwZR-^LN# z;QdR8Xl_=JyRuOK?ym&MR20!QQZM*35j4OO!c57r!>@*H5dSmT>9wY2mOfcoZ=Ov< z$s%8EEI=E0)u%bxct1A;@S?X5eGyT}_cM#kRA3?q?0o|X5ZL<$_P&9=Z(#2m*!u?d zzJa}Ol&4q!@0JmRy>DReo4}bOi(qM$x(_iAOahX;-dI_>^$E4o`0rHP6YdhATpUkSA}AL(GxsPzbuPLl947sd zE-yzG)H98f7|$moW+E{_rubF(4a&T;wmXPhQ>=wxx2c_ z>|U2&V&W4LhfxWzFhm_YChV048|HfSFwEsD-sph?c5HkA%aXp8x%Xv@q{Jm zLpKgJq+Dgzm#myRU#MKTy&2KPhk}!FNg^yfB;Np0N@+IIF+Kyr#j|y*- z_xO6G^fSeWsXgR_28>7<$~9jFQTu8g{|-X? zM}J)W#uD_`+2&vt=;J{Gl>0XcP}Ya+nPh+fRro#4&HF6@N)Nz-x0M6K&A}|tMW~X{ z4QO4G5cByqLaED~3T&Jcbo?5{Xk-`EAQP0^t)*zTv*Negqa?XH4~!(tD)Lp9LFowr zFM7UiFCCh0t}u&9RO)Q3xp`L~9(zzv%yvWpS)kkW2Z$1o1rin9gHk9AB!U$Wskw}g zfVIDXVEXuka>tfpl3843xM=kS+3L%(9#R7BE4_8&-1!VQk{!a==1aZRu7%s@Egf+V zw!hUV!~l7o@(|@51Fl1Kh?iuHW%Yh%F>IvlFJFM1?P#lDaTX{1(HB>xsIZCw#DF%g zdoH|-qEC5iL5Wny!b+rHb-w)!U-q*{EvdH3pXaZx^Jnb9=b5Rf91g zFb4E54bWf=2#f)NF(5Do1jc|A%tV@Z_a2kRPu>(s+-SpuU9gHQY_*yP_ZxR~A4P<8SQup0zrK`5aIMF%2YA~*X2zvk&`w3OoK$XxU)6Uw;H;cO!00Z7bj0ypG4YPFTxu+vT;EXqIwyS z3gO5+2P^YAfrd)U_>@*JqxgZ-qCO7S<;TCMG<0vg-qc%eFa^8j7=EV!nn`ji+Y(Srhw;vu%Uxv{% z#^*8eluo$`6-ti)O4uee(iGS3p4!`73>3U|bzv2H?{k=eEH~rE{kpb^wr;H8!2PE> z%G{8yKVF2t2kYe{`06tR7OEER`+DOneIA!C(Lm}{r3fYAVq!ULnAQNHB}vY$JrdL@ z{17f>%)*fFg1Z)=v%g8`@^TD)@sBK$OXHfCIjUhFsIa?SmNx$vx#S=UsymPd`g^r- zb{pi4-S3&3gm%`dNnK*lDSwj&difR$9@D#zf^zqoQ@WtzNret9Ryd+vF$)@w_dtsnc&uEHt4w4|i7n}3eR8_k<^{aT>LEr=UQ8!puE$1=@S%n?P z56UY*S;-b@>A?C9tFX~QMnuFpL;;{6JwS~ptn=4IC&vV&h++T>aqdbH8-1I9mLh~b zJ`r64Q4p8~0<%D176{A&fmxt`d=}`(h9EM`p;Z6YV%-jVUzH(jFRTlK7cMxFtV!G7 z#lAE`+XpjpD5-Nf{=NAv-BFowWmV<7TLzIoJp#4b|3h2wf>+W0q;*cb4y#<|yTwN3 zv(gaOe++{D?uMXG^L=^c00?UA&sc5vEeQIG#DXV3#Qq)y`rddOU)2!yB96tFLMP`P zbGk>N7rcpplMKBcmm_#c@)I@k#@x^;J&6jtL-R|ZZ}riN@sy9TCyida92|L2g|~&- z+ncb8<)c4)Z9{$Q6`WnY6D`FgD?K#*erAM9b9_Y)ZU+*}G%0PpLsi z>l}%z&B|Y%nSOKUqW8>Yy1&%HopYp#Lb?><{vb-*e!lbo1_BC#2|O9#enYJ}Sj=jZ zu=7p27kej2#(C~GwI&avb0G#x(F_ONN|I#b(WS9=M1><>^6EWG)O?u?qs@UfKyX9h zIAL&{H(@z$-nSQKH(;OKPe@};d-8jtGt0>^%T?JjUrRI=7i8rIw*!IOfxzuR;C3Kz zI}o@X2;2_TAFaQOds^#lqr{>N)}2VYaWaufI$V{?G&slP8nrvYl3^zkW-ujQDQ4Zti=-d*b@XCXKD zXA|m;w_4}Y2UT|ldAHJ%cfLXUR4X343zfO)IIRCaodv2SkdlH+X-*{{X}36&3}bmJ zgX0p9c^-+p!bSLsI<}eU zB`^NHUV9((WE%zgVtG_DhlhhwY$pj*9)ZW@dc{2uyA`bM(|E!l6YD{%Qp z)V8--8CVo#V2<<0JW%xk2$cP|@<1MmC-1m=DJI!%ZQt7x`aJ~Ne1r`j%K?-UR37L% z1X?_SM~X{s55={v&AkbcH-Vc@G6^sj!;X~UxnlpIK#r#kj2aIj9ZvZwlb|*60d51e zqA`ktnn;a8v4{88tDG;I% z8$LS;%r=yJ6>PhOa81UKiDA+KDnsWdeeqXS#jyYl%I$b6`^^*cwM|J)h1#%DU zT;k&X6nmuNR|>R^;~u@X3d2=&sZncz7;(`xfkwDT z6MXVKl(6b)>Ao<$oTD&aJsYT?iSzLkB)NM|PvFQcgzBn#30`sOtz!1)lgA$zVRf4K z5su%;*#Qgha~jvJ@8^{WT{vTNp7%a?Q!{ z$0DJ5Zstu$3j}gb1aU<~(eme7ptI^PG_L}MyjQ4NZvVNEH-bAJy$EJgBk^xK;i`MN zAA+55U>pdH1GRvia9}4K*a-)A!hxM|U?&{d2?ut(kx4kW$X2`8o59YIAV2ZvR{<#Q{ec>IV3Jz0qw>LVB^FCTR_Fu5#D@9uqTjrZQ2 zWBZGX>n@P<-b?@S$0=KS3#0E)hGE^`E$PjVIFMTp*7}di`&Ki0O7i{tUWhN}PL*^%5&k zPQKX!JMb<7^&2+6mrrAc7+NRv_}pKv50;G@LmsVLsG>o%$ zk!$~S+E(Ey7w@v5R!PKY$_vTw%Yjy0=$`YO-!}j1Ve)UZh(wA_9TRXl5Euvo13_RQ z2n+=M$2Obn|E|qORjjtt+23v<&eK;d3P-j~Rl}P!{93qJ_wh4Y$B!&?J&SN_VZN8D z8mt%{7oM=rOBJrncf9k8l5KsQ_rr8|rAK7xY&GnPLY4`(t;0iumXWh9BK;YvcNBV; zVw~EzEfW)38Ou^l@2F-hMKy$wJ*f}pfsx4ZFkX@UTDPJgt)vx3`>Z}eU{tE-A0G&s zIItWj^!+pw5UVcs)w=@Cy%8Q5it+Lj%+@|FGRTiOuhwmqv~;ZQ%e}pL4{suwfTr0%@zyHMvo7wbynCGaY?-zd#Zze`Y>6Qbzpbkx2!-7@Erv8!c z3W_s}>_y{$UHH>ERJ_SmX^=;FkOy@u#6%&7oMNMq?(IKU0Vyg}U1L^Jws&4yBzfVl zd=Ll4L4aIPue&FEh{*;U?@`Rv)?GW!fRLNU?taGe&KGC@T>G~2ZRwehu6J5i)}OQ6 zUE(%DB2aKg5V#`<+z|xwAdm-vJP71LAP)k05Xgf-9t83rkOwhP@P(gvL?_AY@Q>s{ zaNtw>?v9|{dbr%Y>m=iFX*gLW?=@N(*ohM-U+6P=z@p#LSE_w)hZ8Edd_;;46q9%Y zb#P`4;R~vn3j3RYMy%+c?)BOWlfeldyCt38(=>n{x!hi?o(Cx1f(G}#M21_^+>fW+ zy0PwhquSwK?@1^K1^`Xv`oAewXMpi}TMMfwm@#4B$n5H)#QpXNDY1!TxR@fzgjy3c zv4IhtUO|XLOF-l(_QN(pd|yYIc)9^q-z$d$qDfg;MSo#I2egvJgXM7lCKnW{rU5w% zf#5RYVnR^ANGPyF)s*1a@3KZ4=kGy*ZOYBD1u@XyZBve_!GYJV0%FO*E`8Ypa* z3BTi3;k;i)%Hzwzc>DMT!8R3$UtzvyVC8O$%Gf|e^w_cZibJAEkZ(5Q`nrQiYDqC& z#(|sJoW(4HnP*1*8~f~sXF{hy z`z&am1?{t-eU=N9OF+25E$Ar$y#s=d4d^OCA#TP4*j|aauHWSnphrc1D3mMUbYKvWKqd^cd|6Se z?T+1*s+6%?qf+@8X`S%7A5cZMVfyLeg+lauT`B0e#b5X(?O&qmXW<%%_VrCf0cyju zloVR0xYGJx%b&bj`J5>XofJ;f)9*dFuc#nRymfU9rtR+d?IB@?+-0#*B zaRMJ>AlvIr{kk3f4v(j&lJQ9#SyD<9V>gS5i)aeQqZxdw4PRk>eJ)?Wh)fA_+Iph4iKc(Fz_mNLBH6wd>q}+n zk3e^8!>6eu@7Nr0jXs~*DJ@2<1XfzfV9@+p*HmHKQ zC#X^TKPMPQoV>94RaoNG#wP4u)~~Kv;R73$wd&w1 z5EmZOV7QD71~pj?{;4cyILur8d1w~Xyk!f^YF!_i?SN>BBiYOw@^e_7T!ykmaW)cX z-X1$hdamx0o=@L$)=yS$*wX){GT-zzRW#Dn?Pp~^P_D9TrZsjf4^o~#J$IM^?XFb5 z<2i{x-l=(60DnHi$ET5~)Nots-jlDAt!wwqHt$xG;se=gE*xYqTe4lGXr%TvJe6tFx6EKdQ;Q~vSg zDaeE6DSXh^@^>;6Phfi^A)Qyy7>&)CXcUg;Jg~R!yC+#`?M`=na(^t!Ph=k@?o`@0 z%1oKjcj4~OPbZ7qj1zzK(XDi01^);j6$<(NWl7M1Fpy;3K?s!lZ;MlY9|r0l+T1D! zAkf_7<;uq2LZF;^T=+l=-uE!jcL;P>Qv~wSbCyGx?5MVvjLt3Z^*Zc}0ysE|t>@35 zdr!GlSBhpkD}MV^RMxo2L8cQCW`FT=N$}dU?pNbthlU$IlG#LLiU(~Xa>MZM&(+Dy zCc94>|%{KP;_={IqrJZGD=DIql2_;@^WN3vwaN$~HUR5*n$ zErVXc%vmW3WPuh3>TQx;Igc((AqC!UH#G#bzj1AN4HvP;8h~KzcC{$R1o>Lx34SvAP(>dZyy9ttw-$v+7tbp1k$rOR6eD=b=CvXi5a}QLFisQs7;*?VI-V zYhKSn$fA7VrgD}t5m$p4!(P;K@K0VN26KoA6iAP@wBAP59OAP53M z5D0=m5CnoC5CnlB=pPP(c7eohc?z(~SX1%Y2jK^qUB#Y}3~Hh5oEXG6u)SSiFALz* zcr}4TsM6QYkjF~!{=F}matXSDJI?I~e5ZNp9US(JsPpP#p!epM5Rrm2tkR;z&czE@dt zT?puueF2l3qUu{%d$m{S0`$v{$I@F+qXZO6GPvr>rW8F&X))H2x;ct;|J65pFc9?a z0129-^otRu^o+srO)Hl1S%#8C@VD|sppgQdDOe;J*!Nlg+MjZtb+Uf-n0pB<^Ix3+|2F~K+x z7zYC5KwumQj01sjpnrNCX!sxt64Vn8YK<}D^NNoQI+OGDH9oLo+4xrc3Ay4^v(<^n zQ<5vr7xv@Ty3u4eLUb7ScNLix5cQGn!=Np}4FvyQjXuy@SJ;a+;Ds8YfHt~@v$&Ff zm~12)55k}ae}qB*da{`**?ijrOg4$(Zx)7rJK1d4;K3*Q_F+&i4ys1~|Fn13e@!oZ z8y_%ogTY2BsK@}xfr!*01W`&UNs(@lM!KY>8XCS`n?R!=K z6@2Dlj0S5M(h<~8iv8L1is-LHK^5Z7C0?x$68(d}TIziz-7VU%tjm}~>A~+=mp#7? zesdh_f@589tP75H!Lcql*5zN%x{UtR)BNc==)ya9#m+&t=uV+=9Onggtnrq}F6v;5 z-MzP;OCsS%ND&XNH@5$Qy)f+ssD4LVEX6kn?0=-5#d3)Ew+096ieYm5ZM13X`Z?M( z{?BOh%Rmr?D*`VXW?^Apx09-qjXGIEFqQrP_-@M_h_VEOO*W^`IyaDhwYUuU3z*_* zk2s zJ6``FxzbRquH_SH^yJbZuzQ;8S1<Q%=UjulAt6O(EhDUauaO zH2mgt!M@nohD6>zWqYwN4(89J&wT&Y{oH5219+QyehxMPzYR9OvH2gNcGj_-O;h!9 z_0fNKHmB2vAtxZp%5w1Ee!9Oq8>DL+rwj>)+A^PGh|(N@1Kz9dK-8URwNdi$kqKIP zhp1-4%Y7qR%MAAOL8?0b*N3mo8wjUbz-y z?8M=gqvRrMjd1iox)wOxh##8ATC@naQW3jsgG9|8cnC7RRU#f$$#@2$Y>9%kR^dqJ=$vHLyaM2l89j-qub$$Q+ELcX9MCj^4%5yEu9mNAKe3T^zlO zqjz!iE{@*C(YrW$_g_x$o=zXuk@{mbE~+9_Z4i2$sP<8$AT!Yow#TV=gFEvyjjC2$ z9p;i|DU=8i4Cn8qe}{0F8P|H=PDFi&V|UXYqX^oiPOAzc!s}(MKH2L(zS@`vx$_M+ z0h5J=@c3jm{P@V{8N`L1c}qQj?iAkTCvC=p#Dh{J;9x0ciK09Zz6rrWK-L$cG$k;B~&FLx#ja)z3+D*-G(LNz3&I30v{jWY{u{Rq75H4`rjZf zzl_8$ELNWls z+Wcc{z?7>E*tVnu04P4}PxtD>{ARK55A>nbL7$sBIO!w}#9kuGhwTZFmZJ2%M7gp^ ztW5vAS3it{G=K+L9O~J_0N4d%D=4nv-FH}ZtDCv9K24svqtVrrgoDnb1>bUOMu*w=b2C7GvUozy{5>g|AwI3yEM?{S_9?Gt_R|tpE zIZ~I+!Kd0hz692UK^$qa=GgGUllPz%2ddK0_br@~l;!uZumWi>8WHAKeqYp>;U zU6S%9`-iwEpUZm674IfqW$4)5q)Ur?8fnT6!@!rEm{aNu7Ip0wG&dLYCAzW4m$#Gl z+wkcHoyP8n!&og0hrPd=rCAvt*`3&jG&$*sjP<#R3eVpu2$DosJyqw3y!Ma6By2Vi+i!weE$S}Q&5G~SjRkL3$ znR-u%UM-S%e~aMEiw(gWfoJg3Zd4{D+XQP`Cu!P5jXeDFN?*(-@WJ4XjiOs0Z#_X0 zf6NiD)bO%?LgAu2y#_|B+!P%N_Z@O>7*K2&ve}B8TFsbRv+x-}>=zh(URku?FPZ)< z>$hBd{7wi+NiJ~>{E&n>l0@_=BTWG?E9Z=0fh^PNAOrdG!y8)+j|a$mm!oNSrMNtE z$i3|b-c0@d2Va;$1l@8es;eOCjb&8da#RJsKO$?oSavgs$ICMADpdr6{~jZ-;`=ti{5;nG4VvK&dts7{H8*~lHIv2OM4Xmx!u@M zap`dVO&a|b4gC#4{WVv8WrnGld-a2Lo(XO{=BQ1cMAVC^;c>p^D^n&!GLZ#fn0Jc^ zuR}PO-aJwDyMvmy7Jtlu&V8a$Mu%9e64cKS>wkjTOKe&Bp#%^oyKyl-b6JNIh{K_> z!^is__D6lFqY6nho!@TR@pYf|{^Lo5!_cOEWj~OVlzJ9W2|z$S;}6Wg0C|-COoDth zoAhHgXn%$gjAW%R?0?8nL+I=PNuHr9K?;l(^vj4&gesB9P|w8%<@4n?l61c$TYE`5 z{t|Ss#YpikWtLvQm7_k`*&ULyMOFGvYnC=3EP9U$h9r>{zKA$^v|SL^Rq$xFApB!N zgy-rL%P`(S`g^$?uVtNGVrW;#Eu%6AX#-QEzwpCa=~buO;S%j0dTnNU?XDATJfD3g z-Y&U^AwJO?%yTrh3w)NTZ;Nq{%KDV-A9Q<G5wkc2HLUG8|FffoaP>yre>OEt|O-RN6e0jhu7~?DC`qP#4&KZ*i0!f!IQIg0Do9pfKj5wU@8{SM$>-Y9s zO24dnop$5iVh+A~xRZ=7002ZPtF^_u$m{$z)aiY3mph*O8?d)EfW zNEiZ98ZI3}gnA;zd)oG4Pw6@%SV$dGPQnYDZYdpIOp~YgI13FObEtBXQ96=y@`Lkj z#tzBxL67F7ovzTInP%+>RwNrqPrN!BhROYl0P3gOM17H{j#qwm;m=4-j`u{|RE~%- zL(W@wyaO)_f9bMJO3S%m8W%*cubWvwB6e9@UWF@aMrxlpery@vfSt3|ioyNH;yW2% z&rZR1m^Y-={v4$7C?!zG zkS({3jeyWAis8AGU0IxeOq(s&cKg1$Pyi5-nstFA`q&1!e8)|;{EBYnV7 zadr_#c%XaNMjBg@InRX_EH(`~F+e~uDP`zVq0+@;~t=@6(MK)DlqOH1~ z1L9xK&D4x^a&K89R0>;xk^qv|_>_3`v9~Yc1HhL` Q3Hk6I?h+6vtI5#+2fmTVNdN!< literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc b/pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc new file mode 100644 index 0000000000000000000000000000000000000000..aa6cc9c9ba1a7284bee1d70f186d212d8659b3ef GIT binary patch literal 126370 zcmWjKV{j!)6ae7Zb~bi4wr$%sHg-0)lNZ~zZQHhO8}IJFsj05|s%vKYpnK$%gi(Qj zU?5Szp~5H>fW!gUL@=O0KtL$J05?!jp^ma1eh`tq@1A}Tv9`9t6FR{afK&maC}F?> zqX6Rw0ON?UV4xTo7#K}W7@1I@%*@P8O<9fEutF}u6oAHnfZTo}q`h6DmbUJng*O8T z)<8tjg8tOc9_@jYWSzSnoQ!W~d_@qjR}X#!$#Sy$ObV}1z3Db-!_-Ok2|F*h&57^1 z;%X{_aQWoIAlH721+2yFgLtZBaOz=5KRzqkK-NYOyaAZcmxm`18i>4;k`{TLxD%~K z!qd5E?l>c|$cPZT-H;R_ujVE0AdD2MknNj^ab^b|Z_YH_eb7%$3`KZyqiLW zBz%`&q90b_q;VtSte5AUC}lGmP&V_a^Cd;l0WD+xyUGk5lqV9K$GDYzRC)ch3LDc_c|bQfpOlETngzg-thBzzbs z_Tm>C?4=MGgH$FDd%?^|?4Vooa+G7R17OOz-a?8^ae4e7{I|jfU7L7QbuSKE?=8td znyQ@0t$*2ANf<0?T#qtLA_vk3<}9~vBbQm&x-FFR2MscQ)#T4AM)qUy`@d7Nc`~vL zk-5orJCAeIf1aE>U@v<<+EvbIM9c!1rbiGwZ;ru_-$J6UE)t7LI06@F82WcWSH6#N zD~DL2rpczilRIP!kt?LN8XpXO%Cg-*Ad5v6oYjA0o|3N~)v(*&n<>b3#Mh_#w-7QH zpf`B@by*A@NJlPBd9kni?_l9CCxDR~$1mL6IeH66Q%-7RSgBs3m;~moNEL{%X=9_n zCL8uHj_ftqc4JL_6+0?YFZHE8NGxW)$X%$^9uJy4%mg>7jC_ z=vPih%x=RS#fRPT=i?s#<<1Mzzj{Gjrw`btY1QFm9+ZX>%bmBf;{gSM*!>bIUh6G67SUKI127a=%%W|iW$JgTU4*y&#QGz+1(gz(^+!MaPnn|Dp)4^ta`JtbxrUvoxf%)X zbhtZ;8Z!6zsgRGx#@YaeTy6W~P$Bt+rYW?zihFG(D}O239Yi}coRM5UyIR4kf}L3) z;YGFIu;^U}+eg3b64vEgP}!HrYu~_h5?iJgf2+A6Nnmt1CBNS18f$0d4rz<;e9y0( zV~>}j=QpQBt3-PmfYB+9Pb%3mxh*yl4LCS4Gt8900hZ|(;~I!rWE#j^zN(EztLf6( zw`8aVehRA%$xq70!6~+@Rx>K3ZFvC>{iRY9?hdU`rY}Pa_oepy*ynX6U9H6YlIOT` z-7-FZ&g35{nhjvSymc8B4yKSEV*Po<(OMg`*?wE3SK`KLfPYSm#8cm(J}xu<(hG)W z&n6rdNe3sKxEXA|5DHim`ECCChm6fHex=OB1NHOPjrZ`6-&NgC{odlkYoKyZw-oc# z@{c+QG2sLPfZe7RQ6f8(ula%#k}>?d(a*V&zU$d-1nu~#_%EIMMHjCKsCR;Ik9^QQ#d1g^}DO4SA@lr1NY;Ef6Pxp7i zRZ7(WOnq8u)RMIM%f`8WlRhQB9(fqy79luC;7XZ(p&ER1hk3y7mK1$^3NGeg&bqw< zCOY#(FM|@EJ_5%+)Xr7LKhhVzzYwqDmfm28*oJWBR*EEw*oPlVm_eL=%7V%bFSKJk z4gm}ctDaTnV=(PsDDVZ%mKBc}jJ6g+SJ(sjzeQ7j3AjZEpM(V9w;gH@doKS=W({lO z0ew>Z4yxV*z2Y$S_0fN&v-(XCCIapGxA+)%+`H;LLlBEtEG)ZL-nLbGMr-URrIO_m zz=eIf#JRG-s9)Kh8UQP_pr zHHZ$tk`6s4hq)JOjnMeTU_5d!gy$$HbPZ22u<(3^{E98vM}Q@$Bhnp}CIf~6%!MjA z37j~Bu(Z9b(sdJhTUdIR*VnL6#sOsTt4L8bc+Tu^>uj zf{BhIWk>#_Lfo_>QgG;%sX+&vziY~5d68MaxN{l`-FWu9>vJ8n&7ds&H37_m=)UVl z*DGv$XkCSocEqa~icq(o!1v*f5ank?+viYDboy}<;vQ^v1lf~y81)~&0k@ykbz1m8 zd5FU287{RG70I|Q@!Sc1sfceV@{C}pT2fGkxK&Tuvn&RbB}J=g(zF8rMr_6Z?bUrS z0$&Cvg!A={$LP%K01Tf3VG!f$zbc-7YQyAwo4I@IffOrUAg8vAqindyvGp4vngBA5 z^3_o>vvFQSgW2CZou^-In9y_KIsH!?*O|D7mNki-Lp=*!k80=q9RTK&NQ;K(J#it* z#a=D#V7TfLCAd*(|F42c1lSL+ICM-A7|bp7w80{BYjQn0yopFvG4Ic-@gQEWP~E-P>VF`m?+y-9n)GmC!JpfxC7DRs9v?CrqeH2!?++1|VlUtsd~ zzIlRV3_XrCf{e*s|Fc&HRRdnLAzOh2QZ<4DV9>L7d7$-%#Pm4ZphMo~9Sa_#er3B{ zyz{}9BE|V*qI_qG9EqWQqWB)0A$>3tMl9K3$x_#g2~*__TU&G!R2$3H84Am$DE=Bz z%D{LSFEPBh*=-eOB}ZmZ_#*hWvKAe!83i!o{GI2eKmusTv{SQUIw_~K0<9AF5U&#n zWjcW>{v!vD-UC<$FS7l4aU&+ST&phr1){Awc(KQuKt!>N=30iSxIPGZ)J*Y-Z~_p% z$l5Tptfe91*is%w$B64-@I%Zq;LRrhhH}SZ%T%YC93#+crI$HT#GWQ*zOX$UAt#GS z)h!9~L6ko>b(CX8wAnyw?V*C}W0ZNkF3wuKpvIA>efuxe0Gd0$mDBj>OuTsds5rrH znQmL%yb{uztNGmFw{(D&`*4&C2Y|WV5%h??oT|ego>fpcX6aspe2u}%v3h=oQ$lMO zXIoncaJ$1MuM6B8u?g^r9DgQko7c#Tb5Vb2%H1s@`V0;^AxFN~+f> zb`Wttrg^5BDqZ)IJVMJdrqYhtK&tFJAv)TZK>QL;;pC1waOmhpZN{*-JT#fi(484w zj-NP(>8sZb=i|nWh<2Z)07gISK)f_eE$@A^3R-;cZiQkr(b=nbQs_#IQ_dbArgXB6 zlm%u~boGccQ7yQ*r-RDVUF`xY+opsu{o!MF+e0NXi|7!ON~EIxKhU&ui|R#?mN z2P2fo9Zj>j2u?0K|EYHH;j%8hH`$pMwhD z9QVq%_?N2l(DGXk#Wd8Vn9lP`jeIVxed|3IkXpvBF7J|5x#yNYww5is2o=@p%k2Y8 z9ZiLoZlMBm$L{?H^y@)|WtV*9je355!##uUa07o-z(}`gt}Jc@Q7a@r_k1fJHy}D+U0Sh)Y{|xM47(H z(aF-rozTNnk)IyV@RPhE$oW1vW45yWPR(}AHj-KO5n9es&xFu3-`*^$Bl)r>Bv|HZ z{BJhb>9lY>-vyYB?p75)$$GZuz-|4F&K~{vtXQqBr+rTvOKMxm&{3R9Pc##EK5(=} zX_Vr`HKUNqi;)y&sD0O#9k)e#o8;L45mS2b7gfdA(WPw3QLC3ydxLhm#Zf|MGq6Ph z7`GqoRWA}G9P_vP5~u?7YZu^EV}#;lmV&Ttm|GNerg3Q#+UjP|w0)id*=RGI)eL$B z=08H%Bftbq$VHNTEyL@07e6miMg^ar+{Rfw^?k-l`1T6%6v{ZgpB9TwQX4CjRsco| z8+9dI*|QPf|CmjS;I6QD=OSB?&2h|`%O@n9=<4^)$|K0Q{aAQZ_FDb?Uy%e6 zNEwd^-aftJ(ajYUFm%c+3UBiy3-SO_Zibt@TVRLe$*I| z^%!@rnyugVTh)Mr+#MBVXcTuqOZ#_Oq;JI7N4j-o1kiXv(qM)00e9);d1WuQ?Wd0-lWiwRu%IjO>Lm~D2$t>@o<1ssCoe~O3+rwX&7^2 z*t~ylrD8mKJ4SqU^ht?G4CmT(bv@Nxd(cx(7)j-DViCPvP+M4w>Kn4u$rF!QQL6G; zZGYS~w=aLWAOLFUw$gyA3;uvK6+=O0vOUHC7f(wq3;!K zVF|ZqW|14JZoTA(-&2ja^=)*OjdazcZ18{Z$Fbp6(ev-(Cn^{p&vwp^Ye{wt=^>@j zl>&YaVITvSy!f=#{!ZzSeQt-ARJ$%(r~OW1@*SJ)Nk-F&pUVU=P+wct;>Oi|PJ}Vm zp&4I@E=dU=-Ml++3TdJ6LzU zm@j*TfhF9OH<`kwX-jvI-pVR&fG(7rF}CrVGL_xy05hE4CtRFiVJza@+Y1WC9mVZk zSz{oWD2yVSEXV27%uy~ftW=jW)s%vl^T}!#1$qH{(ZAXg4&ecaE!og0^wrMy-8SgR z4^QN@@o3wt&thFTtF1nIC@G@0dT~|$wUk(P zpVD>#D+33_&1V@$PHa2V4hooY29?>u(XIQCav)gn-)z=}nF3ZUhX55DINPf-j+QU? z3GIWgpW7rN4)`4FLUqc8TVh@IFNUpTl`x7eHF_sV=L&mq3OFQad)mIT(+vxBu?6v> zh!)91>r+g9ADpieb@(x{%j6DGY)?hg#O`z1e{WjrFzQqVHjMjzIqHAbEE%{KVEG+# zo42u#UBg;GzF;#t1#`-=8SgA^JB<`d@XK=^@9ES$qr1-)Lp(rNJb&K@$X*aK@-VJC zCozhLWBFCnMV&{j{e@JGG{V-06xW>UNkjVqjD5tdfM)d^Dhp8|Ol_JIYr`eyp7pL( z?HZ~q$AYm+b@quYY+yrD^Cg2tPonM@FF^?XE|Ar6O=`#p1N>HYa6@~&mZr8zUW{5> z_>t2LqIqBX+L7$K16!}vu8 zCR*x3mPBhBB${DP^3AZ@!HU3%^94D#Bs$Vr8*Y5Zn{{Hqx_L`p87Ohc41OX z(b*PpADeAC5g9i0J_@aOS~{rPhL#v(-pOkBc{%0ZGt3ZV}z6{}%ZL3zTC@ag&{R1nuHV^&}U zNv4;NIu&omltd81Y4Blz%~Oq!Ejk)pG^E^&(0eAt*Y*`R0qS6IKoK-xU>BkXp_=3G zna)8#L|y$R8;L^mYjbI#0A{|!C&(P(52`Nd{#sp9lLj#kg&onekmg;7$qHyVc$oujB#D1d*0k)Y?Rt@m)+I`ENz4Cv$WSjApY~Z1X=qzxFuUJVT}U zMkO!74xw^cky_Jg*?_#Ek~^g?re2t~tKLJoxVqBuhz_{Iwi_PznEkVvWN1xKQC_oY z?wovjMOHkKTItK00nFM6O~*!zM0^=}Dz+7#Qk$03V1wHU2awr0!&MPWgLRaxJ~=ul zf^F*roM69SCnhT{-9aRlNi88*O6d*|ReQ1_`EjaTVM3xR2kD-F?w}Rt!sv&myPvB# z^@Lvw)F&#RR?M(ky-{f!*wSdmil|eOQ<>0qAyS;CTAvq&>3~M2p{MW2c9F~>$#PL{&f!@2h`LIn$PI0ABmi4>CYWRsNfk&4 z2p3CmItDPpv1!18U0;o`@Kb4APK2*t8~fqp_Ap86)^hXJlpFh@mJKm@TPjbdf9KbZ zb#_}P8B@_tRt%J1((32EI{Q}jKWz*SP)F$ALCO^T3r_)UQ}{aUvEw06Qg%{-<5AXl zZ&(Qcllj@z726R?20J^}$@p@?(H<5VNP;)?!q1M;`IqVh4=c^7-7gAp#MVrXvBn)rdmy$ zqaiIE6i2|BBMJKELgb*0_@N;nP8Z|?emm#71n zm-e9qo#>wZboErwyK^%%_L1chOuA?&viy=cl^#3tmy*D~!fk)H8UX-fT+C?% zZXZ9rs+_sEROZW{HgSnRPEN6=m96V&og9GERgnJvYtfe12q;QDB_|Q_5FEK>H3{vm zB-E7NTh>%MfW4LwR;AseshLlzWUngdl%Ln2v2i_sx2d=FM>dGh$`hjzzz`wnYVPCh zzeMwh$VB+ty4mCVM&}Qd*cJ!-n6oODVQq@bhdzZ|o6Nd}rX%AZWw4^0R#5GU$ftGl zRw31AGhKoYX-l+^=Qy{vb_L$BUo8O@oq6xQBo`a$9M$(Z2Yd{Z%uQ}b%M3<}FrKLA5G{4s#38XSroNINg} z(|Lz8PJqwBBTs;FYE!&~>>EmUAYSJD-@e?zvYlk}&zr(sy1byfffzjQ2G^0SsGKs{ zzBTmU+a9Q_=TWYD?5>rq|CZA(ikay7nL_)&H3?8zs>t{NOlehZs?H;xrcZ1{v9l0F zI5hiewL)72;f!UYQ!(lAY(WdVtpx3tq%~)$4!c`C?RwDYz&94x#O^kQPOxInv>_+8 z{}QEdEm+~3&b98$CJ)|nrC4XU=W=6zZFF-cnNIF+E&wx8_+AmM7{rmF8SD&?sZzVt z-gD=uu3duXF?k|Dc4`&1({P+Ab@KC6$nYJ!v>Oc6E}>DptD>IJ2EL|0ix1qjB|zTq zYb_PXJ*NZBk<_h@T$Dq+&0s&-0l$QyC2e8)7gYnmY#j-bXMEx#lM|CyLL0uTY9^C_ zl^79lBmO7RW`1SGvjXS0Mv3Qf|(k8fqt&8GvFR8 zdPkBAQbWqSz`dY!< zj24{6$^wC{r)jTSpMp&F8&|A}2^dM5>{|dZH~7_bWhd-B7)~%(-DydrK8SV4qPx(_ z<_Y^SADWQp>dI4m3~PQa3xz>T;`PHeW7QBDx8voNn9OhAbg+K6p{TTOMz_9H|9NE^ z>UHpF_8HtBWAilFhT6qwq9iEcj_@1B9R{eI{$g1!F9ruWro|`vrQ?}-A zipS9DkGWMo13KJY;k?D5kb3kI9DFvH&(fi(NGPqJ5)3TpmLD=1#F+)1;+uQ`9%FLp zW_#b}ycgEWbk{b9c2o3_P9ET&7QMlTPTHvvyFt=a?H1JPp{SeT0 zm1_HWFB;R9op$Le36Os~h3wiQN-H2Wm41bMpVuuYpd)q*WNmM%s)_{TN`eZ%Hes!E z>Nwnlkv6AWff8c}Nih}z*>pwz!s;Q`}To(5* zoVw-&5k1ZwSDImY2`G1gPm%vYF03&$4iBD}1dHn1{4&R&;~TUj*U2M8P`C_H{PH}9 zxjIZDPH|O;!8e`{x@BM#&(n>}ZZ55+&8r>um+ihFd>7ON$h;^o0p_nYfS(D@UR zi-2J*Ua3dSrQ1*MF0v^T7#ecnr&=F0tw77SkFEOpe0{0c(BCDq7{;`FmSV{?`n_DA zrI-ZLe*#?r3~%jBA%bGe)a(;@_mS9_J+oR4@jw%1!KuYZ&iPBv1{e6BQw2On9atJN zY#J=~GEgU+wdN-fd~n@YRoI87>rhC2J9PbdGu_;VV@?St*g2l$QX&Dh<0cHywzq_>T(pdQcMltiV5;I}FkSEq}sq(&o@t^D^ zE%b92$sVM?2aOwEddhcAiK36@s(F+?+8Oh7yM)M0R5jJ)w;z_L(FouvG#d(Rb&-(B z+OPkL!aCEKkRYNk7FHhf1@om-AL4c&gx;J@xnE;b(FHS)h*6)zQL%@;rvpVK=#j(C>@;RL^ETn5aK~OqnVT1v4^?9Da4V*R zf&mQcO;zJE5La~U4=h%I_vX!bFPtZKT>cXmo`P~uhJ6;tyAT#c@{wB3x+JrP!YjO3 z8m2%sD3nJb?fNLPV!LX4WR7|8%9PX8&juFGpCnz-g1#_6`6phq-DzOjZ?az(J1xin z#(Ci!Qg&?h6T>WG{yZ!S^O#SBKM_H%>zWc6mW^0d3@z3ZJ6pIYH&2BeNL@m&?|~YD zQDECnL&9H@o$W|`*Opz$w5-e@W`KK(rrsyoKxK)_J+&&+hLxV$Ife4Ejc+hQ7QmQx z{M^?~zJaXRxrDJLP`Q+%C&CNbCtr-g4+62jPs+$h)%s6^juZFmTzl>+oDhUos)x|B zys9snnm?PCDBkrH*Hwh25yJ$FTxq<4DHqD0-FmfJO!R_v+oKjJX((h8CVc{!!*nDn z>xH4#DFKnq8ycDx(fxQepMcprdI}K<5Li=gZACG&rf_pctQnch6G#1k*vCW|Js0vC zV1rb>q|^Ss2`0`HGq7}`!j*()Pxsmp1Cb#B?xviKX9stAwQF2khn7fYkQPOX$jR(lhTY! zE9OC%w-GAU^!|9UL^iVc-xfICe}4|?IMa2`5nySekd0gyZfFnd0E`kWn}jP(CG^b4 zp{fbA4-@C)>y(6F5ChYEAUUUVaWnJf!Fq&&^@S`x2B z>wh{_Tbw!%eDHO{fgYY9_zZU)j5k6#tr4TLf6(4x1NK$V&&mNzzTk-sTGB8X+9{D_ zd~CDBc$o>BxLGzsQf-xa@a>Kn+HQs02j=)C1bJFd`Hi?~xusab0<>39`~sMSTHi~r zkq~6Gq|qp^Q>6;>9gN+A0Yh4Q(_ksOk*N|WUxlpAD~Cu0fVl}g2g$$Y*;f;wxagIf zl3|hcp+2pgn`Gg*bLdO#2PY~sD8t?^Nj<%j;~W{vVss-BG$L4kU#++9GxMo>VkV6g z>iO#oy0cLT3isP6?gN7AZaZoQ=zVcQ#Al+uijiy^!1VSm_cTz(O<4ErsGB>XP8!Hs zXXswT%S7k%1uB$lsFJha-=_S6<(J_fBzQ@k>PO5&5_$Q!5~K)6F=V1Q^*lKGW+-!0 zR;;(>HK_T$SA2Ea>Pd$nm*;q-&c4V}d5-vgEd*dvJRMt(6>qSH0`*|Xryapda~4E% zW>T~nTgutysRUwaa%FVrvo>W1hdeK)G3sZ%n6n6@$>5cXKWlB6PuF!|u3Zv2FE^3T zg)pc#_{!d-^KF=-Me)kdhM%U_-vz}JK_Y4ZOxePa3Zg{D;8N2(%TFUEQ`^0S<9?$4 zP`2)^S?)O~+g=EUUB4I2s)?{CFW0a{KL|FX`%lJ_HhMMM|8NY!Cp0fa$_7bmQ=lncHi zB-Bo1K9XI*x^pS1Pxi)%GR>_5_T-tw)@6mVk`In+vrI6!H-Do8(_IDY}o%Lq3d{C*d=Q2FYYYw>`vO zw9wAV0q7IP!8!EqhjMb#0`o(tqfW$bKdO3b$&YnVhXLG38Qi2+nn*=g>`lYlV=O5EV>T^ZGMHl#GH16$$`YA_`)bK5Qnrd)ZTx8vjd6-# zSa>p+Z#yeQeEyvwmm;}84j+2Jvj{itIkby6JS93q2}0SQzrAqfM`-y*#iAuTs(@g) zwEw_7?7d2Kgiu4*Q!YpS3}A?5fN#XMNrGDx36z;iX|LI~u{P3yKO)&#>7}cThzF0A zIM`WKDZDO@FFb)x&rnG{W9f^1+LRdo`2V&nNGiY3@jLUipod&mR>e*L6MYk_$qPh< zmTbLp^$1?5%4NJhFZmbN&J@9@2&kTR50jiTBTg4}nw7*Ufz~w<@dhkrYx2u`MG$%@ z;v*1WZ18&HiH!>g4R}imliP_N);kC1?NDI?$ZtJ`7GZU)CBsytMv|@H*Zq#FgYf+K zjE<>8drxfhRT6prHSdu_kMoBjg$na31~Xyc2_Mn%6dzXcogj%VQ z4nJmk>W2inhhYD!@4ZhenW-HL;BU(PIsE8~krmNceQx4>CG&JW_^M}c!udT|g9Fi- zWHeFSR|Bm8x%h8Qqnm*tbA6AkoH}NOn!%QVoP0fYgFrKClSKBmO>a%|pB2)wOnCZ8 z!$75j&9gmNc(jU86CMtUj&8oGjSAkO-P9*1xXUXdFxPnoCo$gSCW{-silBO23T|r* zR8DyyXmb6}9Ke7cCex=KGrqtjnYX?p^hnbt3)%%Tr0zdKi60PdFRj(wg?23!$@p<{!asx5gC%&!7+ zd&muy4`8m(8*(*~&Ht;mD_nn;U&qGZ+8?|f6nflEC=NTi0~2IqET>>Xhe`W|eg7=o zCs(6e9?XN)FN|#N8bEqoa`u9z*uD7J9OLQ8fsjYmrLW0xrL#@u}i$B8wc-Mn;A`GFk zhVm=GbER*@ZqrV^fzM`&2QtBbHEhC5cf(p&6-B>}-(M~6-@BeV77$a6-PTx6a!w@7 z|8>=W!oQ`V$~r)iX2m`kt?lMS!N{1Y&eA2k6GOrN()I6eV~Iy4(-~I7Z%{ibT5Vb2I9vK zN-4~#W7%w`o81iaj-w(cuV$6O2*UZC@N6LVRW_N1(X9M7mT<@xJ6px;{^!at3AbMAV$) zlv*A*y~{kkJrk9sLedC0jtb+7;HJarNe)9HO>wOlUrRmfP$#1S%<=h4DU=@@2gwL$ z?)GC$NVx+>;w=W4|BWKA9(FE$6LNy3R!o2nvCWQdp@!D@x@va=HYa`XakS@Cc9cxk zHkLNmu_;N1&3TuPV9Cu+&{KB$ymyf(IJ|`~?>gjBKoOZNI)G^p=6+8-j8sI%pu=d3 z78{8W6n2En0-er=!+1fe1B-$Q`%k8KnzK1cMqiuiM%T;dpd>i)2eYHN2Bg>PBsL)j zqT+SEnlc@KgH0GYMAMJg5tp6*0d%8NYRIROH&=B*u=n4gzDh6nbd!;fXr5&e?Vx!B zCrgmHN3uxV`1+6N%Mv`zN)+YhP&9@_jd&VlX6bA;cf${T=^t9FX{?joiVrfk6PZYa z&_FsaZ~RiSqQa$2!TGU+Ls<5ju-8>C-!1g_;yE3pA^_v|^W+HUvlH?)$AW%&6>tXT zrdVt2)Pz7}b^E2)1j|cg1b4QUgW0I5{1xk~UH9X(5p&ExHP4yELXIip>a^>jpl3BA zEWdf!T>q&_t^kuKd8)79*535vtIWZ4Fa)`sX7z6u7eSwxc&T!&kDR&C&X;$0U<03F zT;vzQFk1DZqD_jaxrRO(OO0bm@~uewWJc3Zjd+yLS}keAgevZbvXJGELkN8cD^7|7 zZV`8p`Ooy87TW%e)1E-i8R0s9E=40=hh29%B!CfiTv;CZ)`PiLskui-hCd*%!#9dp z7YK&|iJUa^6${3^7pI2W@!|bX=MDPlg_o*R{P%sElMa1!7q>cn;cvNS_Qg_4g4LZh zY!1mJP#QIvWuhSt>~eCW)*{*lf#sxtl9f#WQ*N7}{N)-1vqMa0`DBOq+gPo26@H_3 z41Rz5j5(;Zc-KEr)DKB1Zl)#2I`oYmB%VHJ%4&S0wl4Sc=B$#3!<~=5FK23it%u(( z$(RgBNM{gVKQ<{yr47nXn6q(lx7Z}*Uw2T3Ac%@uuR&gEZu#Mul1B8pO+FlwVcGKS z`&b}h$GH$aQzy{a5%FYMyZm-1Y10jSEAu5)^SMd)PwS2sIf^THTA$kb0ZB@EI z0h9D#pP#|Nmn`2gJ;rKPyyn^>D=?yjWKU?}DjT1u*pZYrRV*V{*9L%d(#O$Mai$4`rB(e?wssgCRfSIUH|7Gn^fKhV;Dj+Dz0;tQk_CG|;lAU%+T~yIs^NcYbM75SQ%&;6 zzn+IwAW0cY(hwZ_zIbZ~0nECWFy-tQ%ykB3P_JmxKqb+mbW{ktu~fHLg2KemjFE)N z51)o4e}Ci2A5S@y24iw8Qu8DSS^Trnc)3&(cHR-qU*UJUv9~ZALnFpc1hb!*dlTLxwov%#bu*VVa4V^_h5U^5P zV1@$0+kKnFk}yfc)&*K_WrbGS8nt^@k%vW6$9VrixUa{(rOUQF z=UVqt_e>g{FM96(s|b{f|HNjZL6lM^KyR6%;ek%Evjl88i4lxbKAK1o7fg+L#v2u! z=!w^r6W8Icdz}KkAxiIn*i~bl-XNJCZ}2BIsX$v~rsosy`AqivS78MsNN7m*$(zhE zlE*X}`H^I75?BHl|2k{>`cbkSon<}=YmnD)*Rkie0etTXICXQGNaH3;^b8lFSvuCF z<+-q`)W4Cw#U-e_d+ALHE=_p}qDdfAs&;ZE>~g{*u6cd=rQ+Kh7OB^&Prk?~8bup1 zv8qfg(LkvG4w2hd_HTFA=h|EHg(GO1*}o_ySAAur?9s#ZY3VEck+jZ}AuaI)c_d)v zKoXa0UJ(q$N2eooF-j#2;cDUm&I1)xF5J8@_VwPA_{n_)T~r*Vu#+jQaaoQ{3uedq zZ{#$6HoP0a#4CN~_u5I{p_3L}LqiCLJ5Cx9U-Y#sz+C%R+>J;WExE%_U)R?!GiuK6 zr&vp^AKLwgu?znLj5J4akA6;lC_xl~5JDf^lVuMI*DG}^PpOD*Ykv(0df=T9D3v#=jxh#-I#%vmt{h15K__-Ax=yB($66P)-dxb&y zS;%+8(3pvExD{oDyvUdvyl&>vLC3fJb5QUmwL@4myaUF)7|O$r^4R&-Z0g)D)8`%K zV*Zs;d56C=JBTHAbmBebL`LAHB4Lav_%;(!;L^T1f>+nTEtzrS6-6zO1Au@|V$$bZ z_EKDG`_qOFBBM0t_ml@RpO;#cs+VUmAf$WZh~K(QJu;lrIK`$sOzRJDdn~RZ_iW+* zmAHS0K}q?8+8|a*v|~q(Y`5XIz*siz&2ROSts+veZ@L}5=$I>R>kjP z^+WCY16ub=+a25?braz9R$Kkb!DE|gEu*V_=i-P1E5AkNNi#!kxh~3bU0zp+yZ-e$ z4VS{JZASJ7-_JofOh@hEk10beJIZOOFkdM~5GM2=%UGp!MDA|WS*$I-K%%|mtSt!= z2_VA-K;T735-D~jlrrH8Q-vEAmbmg#Hi1ZV$Q&sVaROf|$-h?@7Z|67vZglvF{^|G z1VAh?i~z`3st5u zJwQWLso~C**tBOaB3ranaj~8)L>z1-za@yj2k2rVqKTW!V`%!%N1ss_{~hXc?9OYK ztK^Nwx&<>&qpoBS(0SoSbV*klJWyK72kC5*N|=|j>85kfbfj@*y$e|npFl19Wo%dk* zKU?4G0b`jRI=5F$$LoMx+z)tW!B9$F9v1&{c4Cb8;mF%x{V{u|IJr0l6aE`2`)nvSD=g zpTxw9g)z{HWRCPspSYGk9&Pi41O(T3d%$PBRKP)a=Ve`PIpM#QBM%+Rg@H#U%1obd z53lB(xDu@nA5U3}qLK)Pb4l?qDeSVxQ#B99IplNR<4iDB1hfssVd0<~*Ojam8F#M} zmS}7jgDNR!-b@f#@3q!HVCVG(7;RDMcuNU3am~cJ^LvGx3BVv0_?}O9&~DRpRSuYr zn&czpkBf?@Q6r%nFBx6epGZC0{PAkGGzvQs8&ON@zFLaQ%uIJh!k9tzYll*cYL`o| zN*E`Pkd3CMq@G#{hhtx}>3Yc$S`$MS8BluNIOgth{GaK(#%{W!j23NuB&ynn$W*6Q#q^4stOG5uCy1K}!hn{)>lT3kTu( zy{o2`t(fU=7>ljO@da%L1)O!jyk?7rq|%}AaIvbE1YlUMgH&>2Ya{sLArU2TmuI5- zOW?;U1});YXDp&H*RW(Wzot;Sk6(`5bh~Q%V=NSUT21b+Fes4tpJV!0ORw4CmvTks z_gm})Ih0EuNxnv0+qj*)vgWO(UizYlPQ7JVaJ>M`nmH{fRQ|E-Y4;s~eK1nAXB0 zB;7YV>nQdWa9_7P-XCLidv(7zn#XMlb6N0?9( zBW@rN42;HtW(IH+H{gsE85{~$p9`}1-=_l4`Pai68s`Sfb>P$F zYu@ScIcb&Mscpf#EI7aQPC6%l*IX$87lOI8-)yXG2hYRAXqcwmH zc^9i(+6>m4ImYKIhKJ>p;)sxqwTfg!lD{#*crjFG`^U|plQKkqkIS)YLS~3iyK&yw zlvLKhjq}X%IN6P#w(Yr062QRw#DX|$vE6~}PaHLcjaN<)zHRBn@Q*gKk&onb^c$J8 zi#!D+odX>>vs^kcwR}kv{3(EI+ck1{mjV;xPyK4NpO?7$4gE*5PDzeO`+n~*xXzP{ z6-MJEKc(0W1Xjzw9qHeirh=rwki3+DSTMy7WUobS4ZLMB8oS?m@hiT(9EhWlt_c;i z@%{|@qhV@w&TW*>Vn9r$?BWzeI2qqcT7@w&ST96%wdMSFpj1Fdy-0Un0!nq7d^a;M zUZO3!^D>iC1RH+w-=-HjTlr+{D>U)~w@c1J8U{t0Zphml#C1Ro8}=TbPR7@Aa)k** z6%trafVXe8gpPv(g*1YN>7dkiR@pBYtgM>N@Q<+v8{LE(`8d#lf(`VNSzj!wFhuHk zgsgE`aTab$0t#3F!$r}yWc($p$xSDM@Nm(8LiyR0<&iVoQ+{5;OG#=(o0E~!e&|{| zy&YGO>5uN{%eT_e+B~F*Ak}TSE<1B%ybD{bb-5(v0xzb1C zpfTbDvRs{NI6M2a_Xql?SMly;d_S~~AY@$b708SI@$IbA6Fl+=GqXnnYDg{JX@~KS zd-n4=wEz86_1UV^fquLP{en>JWc&)-HR^CawuKvn%3xdFP(MRd4?{y$510ccrSZvh+} zN{6nIR=PbM^nlU~jR-27eS!gLeUyN7BMn10gGl3$N~cH&JW3=+KtKkN5K-d&*5>)1 z!rZy{?7hG5A8W1u`u4qU+XoI<^R2r#ob&MOFaPAJqd#_;Ie+=(54LZ)9vhLz=ickc81-u|00x4iz)b9=6G%76C1?$)i=dt|x({ySEd*m{W>*IE8o(@y*K z&%US~vcsV#7k6HG_cd$Hd&DotjO(57<@9fTHN)a-y?^Rk%ii(gxa*I)aOO?MUi5<{ z&OOv+u3hbzSHJn%_ML|(e{$2{mnXb&>7=)3-+KI$2WJ_X8f`D?{Vk(%hY<)AF^yM0ln{6yFPN-lV=`$wO!-B*B?1{i$A=0_9yjMufE~A|J~=8KOO(hTyS$38LBeZO3^|J0`^ z@AuJ+tK2ZlxQ8b#x8;|s>|VL~qnTf=tntN3$KSlc2A|d@eKhXOe_z56Df{LeKOyrDMNMPHn@&%HZ; zW2e)XoxIP)Z7-emkh$L;zwLClKl_hGKil-w1AjAl#W~+T@#%ZU-1y3gOTDz!!8hG^ z!d`POIs0CRZ}V04k_8`I^!k$@-@9?yUnXt3@$oLxU3ry1&T!Y{->v+>efN)lar%uv z+2%WMy*SxoLruHoMei zE?8mPSH>N-{O|rTIBUE8$4-9w8;`I2=EaXbd&MFPe0AiQ4|ciwjInD>eC3??mbv)g zofmrSfV0OAe*WR2jVDf@bn-s`dHVHRADy>Y^40$QV;=lw@8xfdAG^}^M~(LS?S%DC z82{P@KmYTD<3F!1a@?D@Om><7{QKbVzPb2^M;?F5y%(Oc$AWWz@ZB>nTIAESK3i$m zH|GBEfx%9Do2b_N3 zox3znymR}dSHEk_CZ|8}{ublE|A&RHpZdz*w)*J}vploFv^5skaqsJPJ9W-&=URF8 zDSx?c$G^`y=9=lh@#y-q9`>DoPniE37mYo8!WnG5&Gp`1lfO9rslyjO zZLcFHY&GxU(bG0P;oklJb<^`F|MvC^elYRXZH`^;k~dd>rv1dxyWf5O)W7`n>IeUH z^tE>_f8HbCyW!fSzBu=-9j-WG_4!`8!;#5Z7k+*2DHn8qu=YcHF238Cqc$Es_tL4a zOuT3L=X)Ei`C0!Po6opU>z^xryy~r2-nq{aF7u}w|2FN~&b7yWXUz-Ws-3aYU4Q-j zyZelNWu^-!e|F>^yB#_9U;7;L@f{cLKK9lZR~S9_lV=W`>5=^xd4K13KDy+=Z{G9P zxE&9gzJJ*ovu(TBfd{o`So5`F?*0FFgIjmo=Dp{poHn&s{F;xa-nstw7uY>V7q@=x z($SR0Dc_mrnb}5DCM|z^ee0)JJ#qJ+Ub^gucTQXAurCh2_JL>4Ikfxe^M~DV_b>X} z-!|j2tKa>=YU5`(ZQ56>9lr5BE6-NBfBj>s{r5IJd*w;XuCmz0@4UJ0eWx9;=Q_{s z`Nerxo^|Qi{a$}+zFA!6$MZkm+54(xZ(FCj&2e`eblAB1@_!n>dUowcAD=VF4Udmq zaHd~=d(z9dpE!2PKX?D==d;}K_<4W(=%V|^EV|=AW?JLO2VcBz-P`W_;@JNAx3`vl ze%)Ee{pDYCE&07$+b>_f!|!`Lertng9=-46=YFx?!}qw%ezmP1`PMd{ZvF6Pv;6p~ zYi2m_wx!;Ae5QMzJ?bYj{qPr8_ZOOa{zt$0({Rx_H@)iG%SXRDdivjdbFGgSnA+cP zvvrqWclxt`z0z7c&3MOum%r?t3BP%D%0_>lFztqyR(>c!(1zk0{ne*Dri zPdvD=%Y1kA%=SN6<*7M0I{Nfg_Iq~oHH-6q^tUg!_{(}TUbW0d<9@u+-!?kyf>n>- zYl$mAfBWe>ezxoHzxkW`x0X5L-_LKpz}v50xa;qZ95c^~mt6bs@~6GN_#ZD^X!@~> z?Kbg@r%ylP52vm+(;JIyvCBIfU%C13UfarL_FZ7N?u#3&f54~3H`lKoyvvt2uD$iD z8^68ojX!yM(kjOsw!{ygpMB=T=R17q3ub!ZFJs3(ICb;${^u`?pF7ttCeKsZ_o$C2 z9rbzT$aA*4@1Ey&+UAjOAA0A$Up;>9)4Oi_;~6%;VAG|i%{p$MIeHiGw`~B_Y&*<& z>toaH^S^)C|Alkjxc%7qzp=`@-#zQznVx=b@r~|VZ>j1B@2$SuS7#jk_>r@IHF|cd zb^o>T7a!cW)O3#)^IfpusPp{CliKfm|CvR`{d1E88tZMh+^xm0Pu=2Mw>@yoikIzh z#JO9az51EQRJ$85`d62EdcB#>`s{->)_CNVZ!LGwTkCH4gQ?few)VkG&2h)Pr~GUE z1KvD&t%WB3{E*Wouf5>hkN1mH7FzJ|FCM)0x4Ry`>v!+^{bsX$xYaK6@4w%C*POD- z@Usm^=Nqi(wQ4?jM4rsgK&U%Km(y}vqYt)uSy!?IglINj-MU9$a$=dQcyLTg@d+X9E( z^7!X3-mu~Jf4zOBl`neXnic;v&yVV@r`G*=#j%gvJ?-;57CH03uTJZZJ>i0X&a~Qf zx4ini%PjxB|9)qQHTS)4@yRnSedfK-ZF=(uvtO~?k{hmh-<5mb`iHyrAU=J-&wkgK z$_~rje!-JZ>@zHWJ@=w-?{s$S&CL$k`oWj3?oYXNznxCl@bjsa1=xo2zqTH{Zh zxW+#7KiJytXPf@yt35V3ckyRkX3Kxva?z#h{CBg%PG5S%*Dswq{Z8W-|MTi6JU8jo z_iul#_uEa@n*Qcn{`=g1la_kpd#`SD{!CAv^yUNW{PyO{uN=Sba=oQq-u$~~9=b(i z;`Z~Nx$xgkyy0&X=iX(V1D|~It(|vU?C}{NJ^zUPe|5*iwKiVrSeNc<+gBgBb`$_cWk!6@%MlJ`P^Imb<&UboA|RM zW}j!9JD%A3=eUHoj+Wzm1TaG#C)Akn+ zjDK|1KmYEmPh93Fo3Atb2XkEX%J`FR?oa#IgRAa7;j&+Bzt2(qzklPn17F->bk+Ue zy!MtuH(zy?+oqj5{)ZrJN)P;{^@0gC$78s zm{*s7=jN#wU%Soxzue-hzx~g|BThS|`u#hPKj*pWpZj>`tzSQU{>|54e(j@EI$!_A z=!ki*oc8jjM}0Y4cgc^&y*umED;)5nTRz!q!p94J_nz0T{`2!Iy*$UCgo%@c1wpi;s%e{2lv$L)9(i#gMztTm=ukhrX z>!17n`HwGf#6hh`o;qo>>%RQfMjzkxNbi8pS6^b6-8NX~>`#e!zkm7uTW{&Vvd*n* zZ*$+v`)#`0&P(k2-W&f|>o4Q}^X#u&=A%Vl|I^eHXFKDJ12*Z)@Pe$9$P z=cPM7IsNq4zHsv~mmYP}a(}3P@9bk|op-mxmt1d_FYbE(vL%1{{WmUjnH49$y!84z z9lCyZ!i)3oecQVyoO#8357n2yw^;ayC1<()!s}~CKXv@~r&MkNPZ!E&Rg;p5N!*m*$=F(P}q7 zdcq15m)`D$g%`W&*x5h&=**>8o^8x|i|=#kzRN{SnYiqY+y3K-hwA@5_49?+UgMXq zKfcp^Yt=XJZgj{0RL`8_XGa&i9R9(ir+N#1a>b@s9Cdwj%=|ZQRv*9ncb@p>KknP* z_KCv@d%t_nzx!h^TXC254|w{atAAfxdf|1yIB(CzK6vBI$7cS=`8(ZHd3~lWZg!a+ z+w1?&`kTLd)>)IUSYgZ+m!0ukbDb~FICkvh^=^9bf(^%C{`<x#yH0?fRc-j~=k_Ay@xow-ped@hSmj6WZ}z9&hj-on$>{i#&pB`I)n?oO zzbia`$kw~>@$Ru3O`LU)!CRfZ*B*U)Om*reZ@jX^zkYsOb>>K+YbHjPv9qxZ@seO-n_JkQ8nrXhbk2~noOQ){! z*@Aa1_voBkTsO~B%kMj7pTj==(f%Lq@bM2nJ#(u&$8Es-K5dr|rn~>3$rFxR`TYaW zb(u&1@q?3|y#JWqb!YGPXQ zaOIUxT72S@v%YZCjFWzM#gC4T(Dl#f*IjYv)n5B>?qw!i`|ysJ&GPRjKW>eiX|-i0 zEc?rG^DVOZ)=$2D{^Zm#S4{cppI@!A=XD>gxZ|t;eCq0Dx196WADl2|hbx|V^rr2O+H%%! zF0uDQVfP3$)gE+wPjArnro;BocXw)) zR;$x$x10TDb1-N$+wD=WSM&${>d0jV&7$ZQ&0e!P>{W)1cCSC|7bD+UbO$wG?2d;0 zYSFJ%o3;L^+AXU6VbL138-qc!Qt35EZFktJ4r-&;sNbn{n(bQAZPW(cX0_2^l-VZGX^)Qe%K)*2SWQLk1XRI9a4d)V)H zI*rz_)~eR~jn2Sb^}D@#Z8#cq+r81K*DSijcDLJ=}f9CU}3K|OoZw2Zyhu-}cMWh&Kft7|WM z6=Bfp3xh_Z=(PN6x7}zqIz_w4n$`NXO0!mJ+j9HS8d#T6e^_f)dp)tx@tD#=$n=Kp z-iq`qy_%nGHG92bp2k8}hRvZ5t2X9mSb{-@b$bQ%J%UTbwL-F|1( z6Myw)qn{Y-wL7DJZ_pq4;a00P8p)rc-K&op!@;oK8Mb>KJx^2V3`d<-t1tcPRa@p- z!`7%e>bmP%!|L>H#%NG)6^%xWIXv3moL*z9+^jiMrcU8Ybu`l7!#>baBlz-`sL^_Dwrv_)k{8rDYL zT6?4*wTC{dTQsWHrP6DRDnh#6s0>Dxk*eW?I!%=%DP z)RZO9S{0MUP;$4rtxm7fmMeo{r`<2Q^`SLs3~QZcr`NIdwQ4TX6r=U}&~9|a?6B3W z3@f#&@9apJGH(jSaoB7XcB4N~`1*C_rBksQBf&ec9lctk(^ij$qfx8TtrVSpHM`j9 zRYx_S(yDok_Mjy(n?nd`UYE{`2TJ>hNRamrsbJR>kR;oTn;jqZf+Ne>fH~S-( z?pT*53T?CBZqz%4E$NEbL8VjJqC8u_A@i#qw>A{4!&bd6b=1i`O{3Cj z^@WRl?+q%s?Ye?CtaKH@s;cCM6tq^mHSCQnWT!6r-1DH`A9kw6sNIvtva8jz6YYUb z?{@n|f<}3&>kGqPD|^!@dYw_F(jBUJZOf$Uw1Mfz%6$8yyvnf4mBk#^_ zHrw4seHMz_#{-S|{7rX*Okjzw4`Qmr=!KM5WL>icyjK?NlvK$6}5~ zN^-ZZbX8?lr`HlFt$N!h_gn4SU{LE;(?gni(a2WI6k$;4A;XTlv!~ToLpQAVTt)?# zT>ZWpB)w|tY;`DO{73((w~L{oCP-^);BZu}c9qbZVpEEhDz0ITh6|q z*AqGwiQF40K2;$zEJL6n;D@7DuTkr_t0{Tf=&;}GR8-Qwjn^tgT~i|;cDtIj#?Y>- zzW%1Dxw}*rMO>N-XI-(=?(`boQ7rdbZB?u?%AyTZ_S-ef)@kYX62d3+J-DA43=7F_ z+51IHm-VqD#j7>y+4z2IB&Ulkqjqg$bcl9aUv!79#;`qVG>6K!t~AVL>gu-EuJb9? z)lRL^YinmJV!I{D`<7aD5FL$vyWXk_pN=-!ANA|%0E|&r0V_)KP;d(PTCb^t>pVJt z*G6^w&2Bf5W}|zO%YF?T{--)<*gLJEQPk@ibw|{-{Zw;MYYAkcgVyVk$>#SRLx> z1u!uxQhr9tmRJNBAcK^9pE|7TVD(Bd5`~^i>`8PPDivL((rhYM_Omh^$gSp}s$|+2 zUDQJLG$8>$s27z+y#QE;DGg+@h_9sH|lPAbPcyJTE+SQ5#s?YBwM1Qk7A)(`{Ek zwq{3Nki>-wR-2|ms?rq)or>qH)->d9E~8LOwn4QCZdlBs)`CfERgk7x zu{Sz<(b8cciy`PUESk_t$8I&n+AvYp9$3X{$9H!D+J-XJtkhbgiiadI}3N$XIT&89xl6Qp`2B>&u6$}Iiq#L@wc!tg@s%W*|ZNS)VTio!SMW;~+FaV@l z6(x@^qErW-v`}SoFGCYRg=Ja?;IT zHRPe(X)A_qyrv04KA3>6S*>{7R;^xbG&^c@rR8s7dzS%p+>lz70JLBw3vKN>P=Br0(-Ims5JIW>*>=BPmQJ~zq=&2| zWDC;mG@4>bm3Hx3QncRfb-^Eepx*9Fk~(xc=(Oq`A2yN>F4*b#w^0zK`p|L#KUo2E z@7cg_b?=oFzre7Hib^qb8(X-C&fQ+tX8jdO;9txqit5l7CO9&1d zzPCMWYBS0kUQ?3;BWW+3<$S~DbQH**a9p9V8E_OQFSPPEbV9*N|5S^ zJb_zj1j>?K8HEVp1EZobOcnE(Dz+w29TXOBkhIX`t6+zY6+D|rvnUma)y`_+!%-K+ zQId6#zTAdB6~z==3{UN?7Gkb7(8Y(~wE8w|Hkz%96{>?>ojyPZ;#G&XvE9|PyP#CB zyi9jA0wQZ@CCDQTG_4LsI1omOvQ}eM>&U2L*l2=_UBL+G0%k~JJB+mhVYj+HsaCa2 zs0R*OX;=D+u+Qje;JuVi*Hb@KpYn0lA_=A7_q)RpBo55N9ni9bRpAh`9j_Lw&az0ELIPYv5%? zAoiPLSPjtgbQuUlAn771NrJBf54H47Rj&p13(!Uz?h_3GTTrNCgFw&^RQozejXSG6 z7RqHRntH2SlVNV9IaJ8nf>zD8uS1Jf8EM)|mENpf(;kWv-b(q3=F%Dtt8TMvm7vX< zOz#5eqh8k!Sin&Jb}?|7kVW6wE~;*o=FqC9qo|ZhR~?#7eFDh=Y#`VYU@uVv@@K68 zk^(}jkv0a983wBgB+Re@ZDwj3tPdN*;!1a*)xe?;kiSq$Sw%W! zv27dqEH_PJ>UdQ8n1;}(0WDBN9qM(NPPKvw(LJO>(E(6;!!*NwQKh*l?h>7zWjW|6 zHAMD1c5fs2Qa4fUB$S8t4X)J{O_Gc>M>&L*%lQQ{LoV^fs? z+SX7(1a4X=2D0F0Au^1sNkzoIY7{!pCFN*+9ny=(kuCjxZ7A}A>z)uCq!A1RBjS`Y z4!a1Y=t)CQLq_mnaIBhVTZA>_hDHDjo*+fs4Y>|21V)5$Y*SNTj^5J)G6gp@1JRYJ zlkj1!l`N6i?H08*bWg6RRe?^>40;u5w!h!$)oRkZWbAan8r=%bP#iUZx?pXM#QuQF zlQ0QLw%Ji9R4GrB`&2Gr2E{E}OA|&7WCCs^u5#NHv?K6CEQJ#y@@ln*qE@<>c1>QP zfMRAu7DKkc8~6y()|!d}iU=d{*`exMP8E394s;%3Ut9muggT&Gub`G_QEquu!LtY1 z79a_}bw4m$g9;Fyqh29WQA>uPZXen2QAUbpbOEAc*G*z(fu!^s`qIcA%4=yrwE|F7 z%uY3;q1};^1w^h6MD!vD)v1>w(TIOYO=O7jgc0?1s21W1nx+a22oDj9g3v%=Yla3i zD4G(Q?4etRJ2eTg!c83_90h2)c>p>Fo)1pYlW?Kh*V&0 z`l1J-HKFKHgOsTxK8Y3aZ-P{S6$Ox*N!D(4Jw^}f)>BpIw(QNKg9|EJw~2C5WatY} zR|`bX(;%=iPve!^@CWP?u;Jr_uAlEXjG&aNrsyCpfOlVuk@KnXMLou112OQvYJ9)(>r?P zDMTn5O+`xf>m!ZHrrRX7JP23)vFM#)DmST+W*THu?g2ssXo$n+oGJED`j|45Z7Zw| z$->k^epIkGC`C*YY!mWUy&8RgR5Iz3FOqPo;IoxCq|jcXa*k|S!{o>Wg4{+D*NbFk zur!pfm8Q+QR>Mf7RU={3FfnXx${fb;;?k_~^*&kD({y2H#Z)m;^8>L04s46IGs#M5-vp( z+``wP709bA4s4c>h0zG)1Upp&_^h zL-gTm9U5oUo&`kOV0OZo$RDbf4yR$*Z?6;L=TtlkCO zlA|h#SHwwsk?~1S#ESwcvDLlwF|Zhh?JKsUs;-D^)!bW;G$La?G%yT75^#F7SFmuH zHdqJ*x;@;fmbzhzqv@rYi*>D4($>UIg-oQ{*j|AQ++&E4VAUYVD%HYylwws)RA|}~ z%48BeZD`tntmqUBwVvn1K*^FuqLd{PIh>`zSIWzf-L>loA&L>$tPwXPgsc{Qpi$@q ztmPt7M|zWoGgI;pWsOR%gev9szSU7fN#xcpS}4LrT-95KC5G1Os2Uy`f`onyF`58d z#k$s&A;p(yuZtp$8HSKFD|BB{Yi54#PI5*bHD?HUqnk?$x|_nT=#eJqc+IpmaEnsj zR1@GXjKF$ZfF7XQOuFG{)F~;igsWN3{)=#l}5wfRsife~?0 zs0>ai$T$y-6%x><(*dTYhqBBRHBAXD#9N~#r*v8?a*gOOjNTAFwLXIsanvw2P)Z4t zsXcXk0Q$lNv_$KsyBJOz;uc00Lbd1_%9>dn0C*ltcxLlhK?y zDjz_=?2WKe^Qb&9VpCNG?=xZ1N^61>3&*dFYN};PCpx2$c$gd$&-AGDCFB`mCituE z=3(Fr9M-lQ*`sDJ#irh*4YN65JN5{i$grQR6pcoh>c)mQ7NbX9)q724g5eq==aQUC zlo<`wA;T~l)LQ{@$0Ip|)5}D{w#(Q#?T^HdMJY8vAc4;Gw5rpXXz3(v-5-25jBvX~ z!TlHsdYFg717StH1Yb5OglW7ZimIkU%!?{Uln1z7#@JS=@GXjyl4ODCKUFo!K#^pf z$rq)R(12JNJ{o+8{tUnfy!y7Oo4Z7h%%j@+SSGGrWC#K=8KeYIU&B2WVCBf5tcW#3 zj0q5WT-T7poZgn#RDZE2%7rvCpN`yzk_^P8rbMuFhmTbl+9g(5G7SsI8*QHV`M#|YC7yDZgZ`A=DmH$lJW69GA-x^jaa zA*2)vftG+rKQ!9NX2H-VWy4f+Lz$|Tgl(a+Qdk>lBz8U1RcuE}>dD|iR|CZCN&bMA zJc=j z-b$B!^$E6I7s5R-Y%Nm7aKW-Mi%}Cz@V9Oj1chsAxMD?>9FcuJ4X)~MEWc7~3ZnvX z10V?v0=%t6O(W!bkZ9*VE z9yJ!IMEPmU;CiEhr$!924p3c|$N&Hrhs>2S*b)yjcTx-?JgemIiE-3{zHkGeNy)KK za3PtL^wQSg%7zjkP~bUIK(AgIM)|-z@poC{h75SMO#*Hok48vSWsQ6wSt6=^07)Xj z6cDxIB<%vG$fOEObdc-~@>j!?Ad`$83~{BXl&0k|AjQuVDMI1Vf>1RYmeC;WY}`ZA z7x@~Ff=({5oeHN+vv z2Y?6r2AzTamvJF(CQWcNyxG`L81^k|1U^+$p%f08u6r&s8g)V0%HDVo>#HJnh}(9d zqe>9x05br|@SRIYpr^JVs2a1@V4scg4C{3ZaXZvTNu}~#?N>7F4f2{Pc&1?lygUt* zOTj=F^&B^>#Zj+S#sYDKOK`_je1#NXv>VaVjME7c>V~|>EX;(-Y1kqKfO7!`!bx?` zs8nJOMRyt4$`99CoF@2{YOE4~XMhP?fy%ET=;OFrr=59@764W8(kapg_J(+xKk5t8 zVAqzAjPG9KL>yMMY*cEPCY`Ad{aaNtO=B+W8$?DFha&7Xj$)M+)q&nD)-7iWQ?1Nn z<2;?Xsc@QuiFlZp9v|BW2nv8@;6$Y>8-dAqhG?jeG5}5sX|^7sr-@`lI9UsLYHT)* z5k*c!{*@%rt*{U=t))OH7C?`sa}d3hXzQ&e8&i-x%0;uU9X^kNS%qqf1?ZT%rVd2R zP!?27DAS05enw%@D+xs!V;2z7*^~w{qO1~#T1i_|!gDAEbsf-O*VTZoLL^2*{$(uu@& zgbX?yZ6%9Fp2YkxRjZFOw3_)O;t6FE^g#jx-r*%48l5SwG!qp?6fCtjl8ePtOE1Ub zdK8c!lLR>3ux_dl-kpG^@k>vj1A;Lxg{{=v_#Fn(M0p)z0_4E7Bia%@So9Ke)oGSg z%#AnzFvF#>X8}uQTnGpsi#ib~>|V2If>X-G+NKy`oGdmvji@*FBND~oX?BPOLZJqB zYL042$jH{xsXZLgf>O<*VF*P>HCz?~w86IT;Fm+qT_#CCs?p?up6Hou2I?JDYi`x> z#JVLx4S{P76K~c>%_FnItXi3%pe7l$`oUB;ZPPzIRE96WER7JCav80djfYShMNy}< zlUB%dnlT0@Qnad(PC32wZ8J};kzPj^OA5CURRbyPHqw+r3$SDw#dZ)yPNNl&rPhhv zkQy@g)%qm`IO{%5K;d_*3A-E8m|D6RnhY))s1ooX5HKeb1CLyl2Xtz+U4fEyfgC^? zNEP5aD3x1m6+`4zl?EC*4;V26I+1pQh^q?r2kPK-jk=aha|^FnVFu+37rgSH%K!Aq%BZ% zh;vA2dtlat@np;{tNd_0$|@OU$KL3pv^7HW(5!&CFIQ7N_0qPwky0!^P}ahXAJ5mq z>A`s*wgu6y)QeD%Os(O@;bbw6Y8xR%?uiRA=+k9%O=Us=%S-A>4a_?f6A>14$!CnzG44u(t z{JE%vMj67!l^BxH(}Yv+DAMtQyL%8&cZ@A9KD8t{ilCnJKWO-pjpzlp>m--_?H1r@+uRB2l=wLmV1%Hb=-m7qoZVWy!D@j%=~On( zRcS_ybd+%WZ4^k)HYpL9qz}r8SzVQJjeiln<%I1UV(Ec6Dzu%aDb4PYECm~g7>9r~ zU_s+Z;5Fj*3d3j6^@jHH8s#>qMceM zdn}mc2|IL8{I~RLynxqTfznJ-j(`-jHtxE3@oMaFL?u)o*pKD5LI;bP5A%*07@5nF z2Ei6AAnaIwM1ko&1G()FO{{WLBQ46DMT<94xWy}o2;&vJX^gg1uM~5DrinCX>@(ty zOItBFQ3LT{xvXNB;Fc|4@j=r_3dZJ|gv~(lwIALmKnF}Eb}3G3I<0_3gOtj3@9r5A zhCJr{q!NJEqY6^Zn*=Zu!Ahno?z&!fPZ=L2nQA48wO%Be{1cr9?iRQ%z~}<{Q6TwN zqs(Gsuw5N}CF^MYkgwuxgn`(`9;957Lzqf!w#i00la|;X#4hXVRhYJ)Dkbb|BvO}V zTh^FjanO0xOPNXb)4n7a>mS{)6fuEfb8JX}zXqp{e-L;kCGxbSNyBM<^X`5nKb$72 z)PPel8Ji8nl`d>kOmHJ45pybQp+-y;_w1W;usY? zU@E1Bn|+kM;YLFQoiA~F2EMun8lp@>ed-)3ny>|_4>RZhUX=w&&qVDZlka3B_0SH2 zeo>Xso|ro5V$y*CZ>Ck`GC;S~(wW^0JBhXiC&K*L4;KRMMfzKJ9axYyttnKfqL?Zp zVhSAxaLNyA8{dN3=5I$)_T43~7-(2pMl_zXZV#Xub9dq}r!UkbvCIdpUIg;UHSzMs z5&-e|6lzyV2~3NIBE@u`_NxtL@&dQ8hG71l24?Rh1b}2AZc9uTnMl}_h(HnD1bkpx zoPBTt?qaOf=eIE2ca|Z-U6v7uFc1V`tX&M1dNCCfNVofBTz%ZAK8uz*Or2E$VMRQH zpgsZ%0hlwiv3pLTuDzx`f+U4y*T=|EsjW!^27$ATX0zQ!p z;4SoB<=ACXoopFtSBNS)>`QhvZZ$ud)>4ZyX-pl^0hy4m(1cix(tcI39EbuBJOVUL z>5XiX?@)ALeENN9=izAxX*7)UQP_4BHaAK4_!$W3cSg=CQJzdz7*!!;D!DWzWEw`b z6f%23MY6#01=R(G)`K#o582D29D30-GHwGGsB=2z!09S~>9#3T&E#^Q!b*H zBRy%B%6tY%nw&4#>{>FOs7t-akmL~KkhLbg!akj7VTxlyKzi~?Xw7D5pof8FYAdwQ zNdsy?w18OPbcxi<(-MLUr64=oWBn_q95)3p1f$zUq(Y<>P{FI>lQttM zX)GizsbxHxqj%I5%Vcz0jV(b{F{uOoIFKWqxFdso%1`J6Xkx($@oZfhw{RgZA-pP& zk-E92f@jhg)R{f-NL|u}_*0fm12h(Wb`&O^DH;ikVkVsGTp`$!q;Y~tnPH1{)fjnX z7jy)qGy8;wLK(DQU`4_j_W}NcOgbzH0f2@`N(+L6ZwZ(#gNW*c@Qci*{lek)25b}< z%Bc8ZW^>mbhfE_J1!WE-m^sGLL%^esMPqT|jRjr75CQgsl*ZnV11LA4Q7R+5pf>F} zL~Ssalt5)NJ)r}r5)#IU7}-xU*a;ayFq4we5BLmV6UZ0Tl(M8zfrEs=rty(;;+B!t zp=)5ch7nk$C7_Oy6VZy872|ReKR8J|h5lAcqvbmrDcRWzh~8vo9oNI#Udc&%j0B9V z=_;TVIk!wtQG;_d4G6nd(jV#~7ctO;#Pf}&CeWHz6yqan+k@WO&@VR6AGJcm3H zlOVDeg)cUCghTzXASjweiwo2Zqyno3u{C2GY!m_)AXlylGozfECDL?(f((=lR@wI` z;^NJOC;MchpdqoQUYHu&7C1-Dp(&*q76?+A;Y{ER-!jV)Q3SKp8}eM=gqi`~F%QKW z<3>I7e3i?l5jUJf(nUyKU`#-0R1L*V?SfK4%P2G5vI^&oC`L8*^*B8#S;Dd${h=H;ep zP&nkL#fJ+Gxq3q9AvKoJgQm-H33!zaS zS0@b7Fb7ZyDyCd&eXL5{3FT33JJ0&zkfNuN69#X9xswlYFvlZ6i<5HP2`LZ{`{6xL zE0;PnXYC-61NjClcpuOeh5%T?)D5>lXlVoP7z859h4jWIMboq$lycBl$kE)8Xe0NK z{6Vx&jn6}1qSPsY??%e`4T;e+#P4m7^B+c3(2$!cb0#W&P-0F9!V~xr!itIO>46l4 zFKJC%_gxG&YNKgcgo6YD{TG9UW=mbbELai6VeO~VDCTs2V+avRtd@TMUT|lq5!XPn zN3SLG7;V!CH6EqYG%OlpOsCF#MqiBw4E3_7^3_)=m1hAiih)dIhYQ@8Eut1boNMv zbC@M)54PgF;!(mp0kD`vlp>0&gBj=|mxpFc2URl_5&i@)m}8=D3U4KGi%<>i7QifV zjtL0N?rC&X^+Dsv!kL2$??jA;N;Cs9CTM(aKp35Y*ad|se$>zGBV37W%2fUx>Jy1W zVye_Rs2Vt@ec6M6TT8E@z~1_s1HsBeOeutiG<)bQ+#aAZh=OUUngT%8QyppN&;zpS z!ys;JPF%+AWv=Bj;?7fx>3Tdjaa)_5#20V(5)_#|Qg>Vi(i9-kFXSL#V^RV%rH00T z8R|{X2RA59ghB~21@JK-skILAInC)ZmWk3CkF+q- za59iL!HWE%BJq#tBOqFwLsS_v49+kxfbHGR$7TS^8wlAO@^31KFajn6ZVX4@HY$nB z0Jb!Fk`Erpvo0`vo06fuVmoeE+iJ&?KRCzk3^XWAyN2Zs3iR- zYGdYDN#|TfonX`>ce^3CO^yx;r8t*dF=lfjB>1oLD?8+1iE11~EkXSG)SwRq2Xv*d zkuJb8Akvgi1Beq12ZfE$1BGT~WKnk~Kp&q+c=)JJ+c6~2@BLes^C98ZeI zN@&j%3LP4&GicyERmANQ*+${Ois{fQxBj3J=$eT%@ouAZUHsoeN*U!1&~SoIVE|yG zf>SYLtk4P#M1(UPA=+p8wuzh_CR!_dIqa^T<|tzh6vzmvV;6IoINPBft{j_0yoyhj z|AnuZYA!_SH?UO977m*U1=*tbF>nR!LQ-O$=AfN(=V3O`SnkGkPzlJIX3$E7#=+x& ziu;FJK)jVQ^d3Fg00cC^3e;qX7^z0Il5_BY>{RIB-q84{);`?_nL<&kh)mL)%n(14 zS{h`d{MDaw?g@nE3>>^7wbV~_ z*OTOQS~Nzl5^y{g?o6fA69E!xw>DB0yE!P1KoXPG1>u-!rG4kR)G&;=RDboqn(hgQ z!F^c)_>dpzYh6pxCf1pGp}p~7<$Y8?hE$KyaLfMU}S z39eXUly<5BtED3y;r=p-#1uChJg)8x)BPO;WZeYRh}Y?mv=a*tUN}?+*<>Py*5vd7 zS|}rAjgIcv0!AT;Z`S!bs?_*I03Qw^c&Su&S*vw%-kN)4$u*8Y5#V|Sp0wkre50DMM%&}HWi6&`Waw3aFyQEpE4m#L} z1!hVZ!)hGR(t#^fjh5hyLS{tD$|R|7~We!Sj_B1mu?W8o&3vDM;;CI&<=xMPN5*O>=^Mef&O4}D8>WO;mcEzI*HEX zWAeEPaO3Vc?t}-ChoIm@m!1k<`l=%>E)&FMP)A_S8A07mmC}&oSW}BhkReWlS&D&s zk%G#Sh>aG+$t;Dbhpcr0JVGXJhs-W$;h-p_(&MO)4Ci)dh=Svb9&>M70ougME5YQ13N`rQj&|oxaIID~__{Lb<%tazAyrg0c z&v-sruD4s zIfC99U6e(*qHP%_n;J3#nF@I?h?xmeV#P(auJ5@FHgpu(M@-O2EU(ik781xT-#sLS zV@)aN=jEcs7f61{CkfV=%##S_RY8(wp+@a?Rx6@I>H%@qI7t4BFbZ#-ekt$!x>qU<-oquqI_yA(SMYqs4514 z!Y)gg!BE~Uq*jPm8DJP-E8TNSr}Z*Sk7qSvj+vy{g)_j&ip?@k!oan~oYgbwR~)!8 z^TsFv<}(B&j6i^)@etM`Ir$uOeWIdPWrqR9uNy%$M)~Qi4(T?%nJT8!@z#tfQ~NZr}-bk*O?U3Wz6*62bnTdwTnYDYvq(OKP}O z4)TiAoT<9v)c^%?%SjyGRc;4Mr7Re?c}}J;{1~XEsaecOF3PR#&~aT$aKAnd9MJIpV`) zl|REt(bMv(G7SRmUZ0A9R;;^zB6@R8GYS&DFx9Oc=?hs&Pel2*l5v*t1eIpjrlVb}IL* z6VPl$Cty<5k#tt55-iP2cgm?sppv~2gq|kvHL*OK6P=Wlw$_ThiaW{~Y2mpveD0|9RzKznP;U#@k<(14Puap+dOX&oW zm6FV06*b4TOkSeLVQykQ_z}I$&??Sudxo|p_O(2f)D57ldAW=Jp%S`G<~Y<)fx~Ad z^Q?|V$MBO$p$1Yf=Qhv<9RhekU+5h2C*vyONlwrKsFn`zaF3=9#=j2eBQP*S-aX|q zh5(APu$7@in%E}jz}${1J{YfI!N-cY7R}g__vSDd;*DlAOkZ(gvyLkXXafOFA6GY= zM2>AVQ{y;4Qw3Bj>OOC$S%S^reOGxz54QzGOOr!QwV|Bu*XzB{O%UfbadCX6lLOC2 z?s4QQyrQB<$;yjK@_rcNok3kWUNw}@!CdVado)m{1V)P1*E1?2&Bg3VHt`=LChS2? zG3C_?J-K@{bxkKEqVNkOPGvc0g(ic?FsrvCq!H*Sj`M4>LRVyNca zzn+mUuY@vxqF*4SCk)Sll~5K0KaPQ@x-lkOP{j!s03-=SSu_z7&4!&FL?-9tsGP}3 z+)SlKQ#%41fRP+YiSC3T49EnatPvkB%O9HaBzDuMva{$F4Cr~?8?26$^M>SU)HChZ z$_$KHJWi}IL$k`(n2(OiY-q&c?Rr|>Y>=0zt0#pH`& zH-qhHR3>nQ;-o%w=V@r7*k?RC<+&Y!65ojL5l!+Frl17klJ1w(Yb?jb#GFXe6|-?B z=rr+Tlf%b&l*<@s$nhwrd6y1_N*(plN@-3iC}T!f%+l1oE>T}#6i^kA2j9? z*VIBi)YItIQq5)Tk-m(U2D70S5XAcPSS0{|UqB01#dn*zGOKY1|U8i%g@3 zxe(4CIDrx5kVXo)V~^du=QV1-kWqfI($wCB048nJ3k=}(=s(XUET#pRl$tt|(%eFt zE97ErRK4^DB1d!_@>Jg9Hi(Cs9KIS3cbt3^g{Wk{DNO~9LL(KUt|JxASRU@AIa3$$W&K0SJN-3^vafYqmTM_CrG`2 zQXR-rVy+o9#jT39>${^=0Tc3=q+%R_>R3np1A9>tR77?O%Wu_mG#lt-v_fw}93zJK zH)`SgWp%?>oCP2jz^kwg<(76uQE|T++q;ZT=!39gR9Wrg5a6$*-{#bFIdmjUqn)va z!dY4$EP*Xvl_(Ohf9T?iyj4^Iqpum2fc)s=N&F9iM7yTog-_W^xZNnh_K^ z2z<}^I-%u7R>W77mUOHqo0XTtJH-qV4i+)0$r;qV?anwug3vZI4i51!d*Z?3?BpjR zr$R_VhTs$DCbAbH!)a0C9b71Fbqq5>C{$r{$jvKKyiU#REc7s;;4;G6K!}+H+yWh)G=jaeX!eZHk@teWpVNL$U-0I^Nio3!Qh7T|)Ked> zEg~EclCig=P|!I&DQ6jSTu}$0t5b&MU+I34wO|>Jl)W(~M*HP_o|6J#GGgMYMCJ&c z?vZn3qErk+4O9zVMF<(<0=cX{Hj&z|e`+K~TH!kcM$}ci=D?wM;|nfReiab{txHWS zhT@#&gZv!+Bdh_?s$2}64)xQToz*dtRY!7aKq%)8-?Uu01;_yy3OOZJ(gilM|B?u@ zH&7vF5ywAN#vh}^8%*na%Ab77QD!zX?tR=R{Fv0iF`3Hb*_H3qC(4SM0+$rIkF%8& zrbPiokUrX&NTa1{8MuO}h`EK)y)G&;RaZ;_zcGuB-^27KZy>Naq)lF&`2Xdh2lv`i z^$7LV6!k2WYj+35L7SkAJ1RXZ2 zgh;VXj@^dNLB}+qpof46gASv8^`P_za08WZyN>gWKJ3%w6bp!s17b!lGdd$7y|PR1(nci&zRtE7Oj3qvqZU zn8CgZOWIU0W7myHrH{sb$d{{x#RL~OgjZsKk2sxV*OR4OtZxyvAzPd(NAnT_dDE9~ zGQXC#nUbMI5-lo4PXGE=3lo16_yHs@Zl!ouIRFsF>Z* zHgkG`H0#&`vQ}D*To2nVcYpcRZ*%3Ue9- zc}|i-N;Fh%@p3dKV|DT_+Hbe{EX$#nVUT4r7c*Uih~WiI#ARW_SOpAE0XdUV+Gak- zTMr{SrvO86?gQ7bv~hX^lVAi0VWO>v`YDiCaFvTj^MI+DJ~U+37CWzU2~>HeCr75y ztf`Ig8`O_4lg?or_BvA+7`mgixF&GOs@x{6o0Nx{m?XJ~TE|yl(G2|rQ{EN2n;CKT?x+&17RwwFYHwuA)D!r)^}IxH_fNXT`tFR~GO0O;W> zYGgjM+#8SX>|`3eBF-U_cdiLLwNs7()tZ<$bLMC`j%5=KZJg;cT9qAKmkK9_l|~+( zk`W*+LrEe}2s`s?L6t@GmH>xzBSL3H5wDLr&^nV+eff@u^T z->?Qwju#$uqZ|puuiR($CAh_uP4g>XW59Po%=Qu*>W_SMW`JTUc*GL#pb;w!Aq67% zP|QlRPmWM&)3MRg3{B2~P;M+hD^<|T;-FVO2`_eF$B3b9($b1zYM~524a{_^*v$2K z#@U>Qi!okvgC2}PpkSj;${Fa9*?}0#+}Q-qf!YvGyOXzrGRvt|)C{e@bP)3glKnw$ zpRC#Q`1oel6enJsVhN#^X+l7(nN6h(V2xaHIdlvA(00p74*f_5;*adAh$2bMB!Rh$ z##T+gVQeHXVsoaSCXeJeEP@^l=L=z+r6o+({UL1+C}bCpjXKV&mT3^6Nf6&r;oCl-6Pw9E7~`N2fXO5usjc~&X6}1nj(9x zu@8@@1Fz9BWt?@&Ipr=Q#^@%Oa6uIdG)oEMOyJU>%F=T4k_aR&Y|TgzvJ40oFJx{Z zON2!meSqAvNU1FHuD5xFMmT2Vg?>|92h zH&e2s)RdGNPn$Otc)ZNe;bCg5J%zMw54@bSIYuHm)k`c0JFtO>LKtIaz?@ofO5p1e zz|>;7=rVa1h<=aqzU%lz%O!Q`7Z4O0B&DXCD z5}f$049;Ok2UaLY^g?Hx5NNX%STz!-%j88E(25T+kIvY__vM9LK8?7_)0!jh<#0P& zE6P0_5+>#SBd5LdT0gmDFX#l|3wB}0xbBfgV_^g=r86o=j>vjl4KY;175-6d?23$~ zvt;6=>cMfIlxfZ=0?Wu@K$sIKfE$>>(rBO*fW^6gN1e!nhW+XZDqS8ZOL53dSd5j{ zY|YD?xyV?sm<*%gBM-tbZNPw*|1AP$j^R^KA`1dUQA0y6tuS9Dvlx;~0w6}<#pR35 zGDMGjNrASy#&%>I$rJstESLkU2C(GST}Ij@kO*c-KJk&+0MllN zhya=QOq=dwdukE_tn{`M0_X;%&oG70(4!+JIZbPAbq6A;B(g3D5khN&h-=`rs-i$d zla}ui4-5t2%rzk>V@l1(@Hip{qmD6oz^!>K4+2?FoayhReX@&zw-^27@JPS~Vg)i8 zHR`i5T#I}znW(g)0`%t_!7&UKAPR6%18@lu>4uGRMS;tpjOjrXC4uaGaOjqf=_OQd z8c9m2HJ7DgnX)_EWELXxpcV~;;un7h=pcS#3378ewy6+vy_p+JHXe)go3fJvpS<_hQAAe`N+RqUE%^>Z zF=wabSYAki&__tlo9Hn|EzizCR4zlx75Y>kJyIPskOqg`~lqZ=8 zHslnLS(8ee7gqqY#32=nr;3%AaF@5KMJdT=dj482L+R6ylnMzK6$}AXTFHE-52hBf zUi*n91=7)zu`>e+Hcrb57V7BAju#;i5_1Sxo6s%jh0sz6f+hdj3F&}>ZYmQQY+`jq z(8vU6Wmqdnh-fG=j?K$}G}k!F4W0=n#<}>sFJRd+4&LHY#Oi0yi9@+?IKuMkLr=~FD zVyM!@T0?d@g#(!4O=K7%ip>VInCu-oL}Zbz615sXQOnA_0Y(8E<;-`OGsNkGHLH*G z7o8!}1-1lTnajw#WK$030S`p>yys5Kq|o8LlzKsChj|NO!kGQb-(gY>7647qd5l2y zEN$XBev(4e=51C=n@1sqNBj2z1*R;`)!-!c+(5^vF#xAj=D}6+FQAck5Aqd9 zig6jks;YCwf9B^}E^_ds@{d&j_Q zMy|V*$6OSbk*-N3hL~_rTa}=583BiVI0TnBPq-Drwpeu;UB&d( znQ}sA(h)}_MClzqn@Lrg8VrNb7G5#HondDng{Oy%pBH3-9~RF3dtO9GGl)GlE*f}= z=jZF12C`^KBt6W`Sq(T_TL&)O7}P-I($k2#q^SsKI*Aoj5W*{|L0AdD23cHmqCh5K zKE%FCe^aVB=#`}GO&Gbf(AVv@m|_X`XhtzPBtpPXc;upd!V|B4$bApMWt)DAQ8|GBJDNafZuDSq zmb2aeXTWcrX%Lnfi$rtiM)V>!1#6Wt9Z|)(eMn3(ne;eluVr;SdM1l^DK; ziE3gnnZm_@L3e}x2q;A6v{Ld!^nLtII+zBE&AghQst8guua^@K6j2}Y`hIp5BQyWZ zh|4UMr6FuFgT!om6yNM>&i63A(govGMSe-BEDUH<-shtb(9?24HAkBWqUN|B}q!~kG9Ux@8W%Z%t`xQHb)meyhlhG@!JnAjscwIZV{x(dO8 za$>{N9%9+H%1$fW_tI_0Ap&b4#ZG)W38Q84F{s#35|o}%Ug_$$28k@KLS|5RWTPG% zcYY53f;gEuJ1mwrNQLTO@suKh66@`Zjg9h(u*xR`6k$vTx;mffj1H~pnkTt(N?RJbo|zC3CGNxVS!avo zkCQ%e3d2jdj^Kpx8RmRF^JbwCERU%0g~iwj=D=i%x}qk?^-nP;jMRo00xX+mnnhEd zvQ1LVhvpUPkc?{p=n*(^6tyb{f`O7?(s=grI%30g`jR{$sIh#o6dEBJyFtDg2oVWj za(q0$HXy}@^Z)C4PhN|sO*%T1*FNZE3|i{A`G-K~vsJYyA~~Y%jue+DUipQi8PDcT zCz>+XfpP%K=}&$XG0?!YM3>2V5Uozk#5*m2Ff^UOqEplaU3nV@6b7#`AnG@ERr*AO z)?s+16vIGc_{V9w9PEI+VuZq_I5G0~c^dngQzQzk!6d_`niQ4^>k391S(Ns?4lclq z+7Zu1tUmz?;Nr&+CHKXUHS=WgnEIrg<%g@9g9<3>S2EpJ89! z4)?KvGJ#8Q8!15lIo$;BB@@Ejoc@ioI22St%?Q9mKht;zM{@Q&>p}4@=RjJaaRyIIhEdspXiI5XV?HC@>l&CC%4GA^aS4oN}xWW#Ho#A68@@u`O7#yECgLTl72BAoWsF0tjJeU|%2&2}>q5vaBqD?u z!k-MH0;9_C$Sj}NRfLj{%bFGo zoGldS>SG+B+*5RKg_EQuD^!>?9I;3`2YFI76it(F!H>n{?61|Y)1fWhb7lys0swR*CoJGqP&@BS)9i2*<8wKthiUQ| z@;NJpcA{0R8TM5MIuPiUa4uu|mdx_RerJNGXBTp~iHAoyj%F0SRSN*eB5<=F3LQ{A z(PboI9uyH&aGE3tObp#em~*hK#!zUxoTN&mS&f|2R6#X(gfVaEfMCVBL+fUzJj!`3 zTz)|sdD09BuYztgA_}W>XBp%Hltg%~1A-+j;bYO&MH~w-5SFJgT{PQKxA8U#uglVO zNOAOif*@TZqu-42kPn8=913Haay<^B6njtMO={E}6ZU95+CyxEL@%R9wHg@B)3_sW zE;64!VW}vh6iE|$ohJt!O;)s&6aug&9%nU1H}P-&6_w#8Kuj@a<=wo6+Dyl9{x5L|z zx~J985Xq=GbZG*M%jAs{4&F*bGw!SpsAi}~%ZhMEj&e+bH`Nbj;%vyJER-*<0V>gq zoEdV&FjQUX^GLJ~cMkX(*Te8Wv7AYpaS^Uro}k}&C@hbH7I#2CO_=0mHNb>?gg+?h znieh-<4{cbc@v7EZGJC?H(6L2qYcnM8&7q$_=Ma9v{>*}cWoenxY`}#anb0+lnEOr zAO+ObNd&qfgkezxg$^jI#G3iMKTx3}=n;?9Xc?Jf|F8=3Eqlr{>`IDIjrak|!n??s z{ebn|ty1C56OahL4kzU|4MVGe;wn&HiGbib!;2AfF+n$|j?r9>DYGV74y7Pw(m-_( zEWdyRsOI7U_w!yxAW4;Wu+Y(Bd90_Y{p4-v1i@+V9~%H*~9BvUGmgf%r}(xTzI2mMqlFDrD6&6u6|wDN$FL-O&b zqTjrcaIIJB>uC;B@W85sRto~9$s2ws#VP_F8jcIR;0A!Z1i5sRhbn)3T>mc(>4oX0 zbPU>pS-DJ&yGBLCRf#H?rXmc(gMzR@tWBm-w01b}mdJ~K@pfVcYz1#(|9KT_9>L(u z@GR9>KgQT1Y)AtCXG+KqB;x6@Y(?e!Y@GX0cj0gaOb#lSl7lCY#3c9}V#`T%s!;?j z7gO8yQv0Tr=+Go5zaJ(H+7iRstUqXpt<_UgoZWyJlb$5;oaa&U)d_Y7>O@>Edc9ZF zMb*USSdi$9_8JVr2jW#tz(qj6Hu(mMG*L4ec=Co2W;QGWQ7C_h zPOyZqDNwkh-f+Ys@Zr_5)dQpkMl`{^x?L}cZ6`OEBLKPpc%qh(r%ku85Z*MNH$k4# zX@FwhY1zPLKv{u;V7yH^=VT$I$a$K9=OpwQ@1^l#zr@6hR6tr)QmwPS@EYG-Y`~c@ zs#f%=ypZev&*E~(!!@D#<|_~`QK9qz#R+%K;R)}oAkSnS zgaiHsrGT$^f64rEE-}+GPeXpAZ6~P}4{<^jQ2mHn#*3yjN`wF8@LO89ZkMRi&ht(q zr)^{uZ7O(0jfc}?Uoz3~-ajg-unPS7+MgD8ys?)cYe`ttDM z=XpmL@j-LS5dh#&bmmQCjJ=33gA$mLu_IBW0a%>EEgADuO#;et5G9vUuxxqk|A%Y+?qmsZ96fW#>`?Rp)7>$!~FUv->|Bd;}Ez94KTliF<{)p16(t_ z#~O7|z|G#0B*7-G1@jzs8TMyp%6_Rs`wUpotJ|DSOLeijKr+Upe*i%8Q+C>3Pbc!# z8c>hTEY?zSkck8u2njgCUJ_cNoq1bBykdb;gZ1PU^O-^kK$9g$3?f&h3K%9tmeZy@cVuUE z>i=lE?s%-zwy_pZB?Lq)kE9X8>UUg2Ai68bfRWRR91!I6u&z3UofeKHv}l z_k)H72^zug0AYhh2L1d19iS8fyB4qq+5(U)!NCNmgnmv5k`FKlB|)gqqtl8CVGwR0 z#YSz!fS7?U0i`hnPJrd$QX%gF1P|~4u?v8*ppXm49hfSBkg&~x>4Fsh1eO6!R@0p%vBtN>FAupgimq%vsIK%oIxA9ez4GtkT;brvKz zKs%uOaNz_1`-d<-99dL>2Tuu9;E84h92}q)APZ5w0r7sQoCA&k_8D|4pgW{Nw*X=0 zMw>fr>fM?gY3Aq*Xyu1kPz1rP_= zXQbChZ7cwL&=0gh9E9bC;v48Bfdm4zA*AjAnjz9dTtbQv$X@}>K;CnrOhFzEij@F! zkeCiJJdl<^GzFR)RANv=1T+rvPPpn8d^n_`kTn4T0y`WUoDmX$a0?AwK$pYtz@x$U z;1lQ$4Y+3%AP$^|lZG-NePL%J!XHExux6-Y0gwy`3CN&96b1<>lpujTK&m(hQLrws z6(M;-i3OY#D8isR5g;DuE(9pf z^8_c~Rb&9;FzJXt1D+q` zP~e%MRE}<EE^4{#%}p&@7iwE`O&I3g(X zA_4++Sin?6Jr9z5Btb>@%>z9NWbBFZ3-&%-o(rwLC<%cG3o{HQNvLXp8Q8E;_5nWw zaT5|6bQ=j2!(k^v4HtX^@W`;Ha0-Bzg@Xng7fQ`g_J>#koC~V)!*~Ibfvbib2U0(X zNWcg{wgIf&31tkl4_X`4Ap&p@3P%v@K`4ScXaEg?(}jtKVi4q?FdRTHh(`k-4apMJ z$e_#$n--!U^b!N80c9%UevzsSDJI}b3wS3;O;GRI$&YP;`~#pXDz*X%1vi?Y*@6j# z(14_Upq+*pKu8PeN+{{UWWtAFuyB|_7Iji=gjyYNy{L~BQ5g;=B|M~Kg^V7`$FSZI za#7Pe3=oRcClzN1X|SUJegcmGZvv*zLh%pgAN3c)WWkRI0HWz{6;FKmi}Z8#o&v=3&J^ zI|^hEN`N2@gS!!+q=~KtLZV4%41^~DTZ23_x-JYcm9SAE3ZkA)XtaRc3{3|ROitQ- zPP*R_OAWLsaJ>-Y00u!F7yKaLS8!!WRSWJC*jb zg}ToH?jjW^z!5;z;DF$qKt&ct1GpUE2xt+|MKnZU?w0s7E1s21JZ-%L-sPSZWv!fK14eVGAKv4KW^2A34#d!5bhJ9NaeQ5=Xj0 zl#_#I0tO6z4saU82_R5`0f8+B7y;e|-Nu6&-GIS>;y3Wdh-?I=7v>#64!{UNt}v%i zECG%Nl&UZqIA-AJ0PaJw2YUl)9{_iQItgyC1eA`hp@!uGI0D^O0C2$hLpcCO4#5

    q;3M4>@!$Z)dq5F(MCJJ8y?JK%+0}1Mp69qc!8TjcPYc;Kzj#FIci6QSA>o{m|~Dv0BZo$J75IB!%!}U&q0qqn1P%h z;2{v+aMpo*h29-VZ-90}2NF)nNz*Wl3f;GgZZZZx4rDdL9w6{Tzt#)$2B;nwOQf0v z2nC=T1zq5&APt32LF7TBK|RgT1p}Ef>LUPujA|$-UITv(pasMhkmkaQ!=Isu0a*fS zpM~2iAOZs*I#I~O`9V#5pzsD_3n>ACf&m=>{0B}LWSl4v0Cfn69HJ$-K~RGrxd>ok zP!NHf2V58|5bEzo3Sm@DhLj721$B4$fhRy|ATNL;3_chUWiSxX2{R~7;G;@e;M7KYn3<-<@V1_VsQlEvnhUQ+#h9I^4uQ3Ky z2T&_KFpubd6VRYRKLI31u!)d#4CploERbJ=Yy_IUz(2rYgtq|t8bCKfBe2%sb-}%z zNU;ENLM{u51OOsnr_ddkDBgho1_TC38(n|`)D5T#AbErJgpWYZ0TT*X5#AD@9c(>F z-+@5^FABU7EZ#{69JB(%)`DsZ93#l=p%@F(g4&os-~k~M#I#@r&@?bQP=JMzzy}~) zzzaaXCK3gsdu+iboKVmJfvOxL9TZETOblBN)kUGE2x%7-BY^aS;EGPfe>5YyFAm-T zy$3{nv;`n7LAUdwd=8|SunJI)1|I-R12-_hhC~h4NTGqy8EVFc91ErXuzY|JA#}ma zLV!i0ZRn%|=#08jP{kIGA?T3+t3%ueGjM-3%IqQc0j3vfkT4Fw3BZp7wTyr&`hjz( zFT>~2!+{<|*Z7?%6@lAE`a@L81@-`4XNpQna0-x=6e=9IKL1`0cBS4l3 z6;7BPSZ!D$Ae{cI-vC2}Za@O%32>AEWFbdEKob@dRf!-zK*a#vP7T!uR2>Gg3aPdM z$s?!>xeOd12v9)OfoBEed2&qv5Qj*F3zLKR3j}VV(FiW4LOnI`Z|IlykXQ?HdDNu^ zs!-IB1;s$r!waGylpg|t3#xLcOu%5F1d3E8z(RmKgoFa34N?jL_XMB~AUFaJkh*{w zbfp#=2k-$PkcE^RT`UFE7Hn8354pI&v-+{{j!2!sIpiB+a7%T_qPGJ&I zO9|9BfTjYP7v>PW4IFql*ARQaZ^CMT4+fPfWJ%yDU}!MG;02Mq7;OQ_k721`z7VW} zy#(hEw$3cvMg9OS2 z0EmF?VBqMk6=@U*U={&k!45}^Db%|G82~+xWWcD+2uh@=`2@=0P&onM4PqEjD*}@M zxDSMHAYlcR10@3B$k1QTBT@&Z8-N^;Jy4iJEWv+GOW=G#vjZCyFaZ=VAd7?O0Uv`V z5IFyc=!2bwI&i@G!qEhwz)3d~lF&er6E*a~E`@ah0166Z(Ck1Z6)vYiT?A0P1W$4D zi!l)4fu)CY2RtIGse@1%^kEPyK-mRY2WnDq9MSJo1CEEdhpuRax(mP}@GwZu4{RTj z^FU6Dy6l1BLYh_3NWejcT?xeya1dzH5S#>-8&&y`(h*&=0Rx9h8F(`oKe`qc$|*2@ z&|tx6Aqs#qfP@|VHPqx`Z^D59Di75ZPz(eA4e$^?0*>HBj0-0Z%s?>=3NC+=HWqo**y-hylVT?0oc#bKp+kwc*`?tN?e7%G^*VgaeIuXV@JOX5fS(c^h~N zU^*cY23(460)SHi8yX3`VH*I0577j|A7C)lM+Rmf&w>mJU?KntK-uu?aNz!dxTl@Y?9gBh&~D^)n>sfwzM?EASFPen8O@-H-uE8EidBUV#}0Y5)j! z;7Duh}%>^c}S#AP_vh){+4FPsl>zrZ&GU`99HKy~y)Fb;z}xmpHo97t?n zQNTKYNXUiY6oSMQ@_1-tK!tiR11ArTCLAoF#=#UET2P%rg%{m|4%7;;`B2k65t5?b zd!Y28#S-Qjh6nH!g-zH%C~bkSz{(;*9XJuRH$grQumyO+{{%2VA|NUOK^UO1U}r*2 z8OqX-0f0gi>G^;ZhJq!~35d}_*Kb1h2T&XWDKzCkFofd+a1@Fep!WmD2GB8l1S)y( z?vO%&mIEOhU}eDVLxBTo-Jm=~m2WtA5aHleAOQjY1uzk{M8Q@>x10eq0v-)EA*=%2 zIfe)TpaFqC1Xl%q5XE#Po=4Sn#IAzSP};T-Z2`!jkggToA%Wf)RIgBb0?H74oV!sNLJSj@7!fTN4ITj&0Uiw&F&-&y3qSk?3;rUGAyu+!5rL0l z-}S%{!uPQV)zLGhu{`k6Ut5?@o{EJ>ibaXONCHo1f-hrZ7_qQ0c(^V4cvQ?dgve40 z1s)j|9^PpzYV<*AJOX|Kc!E_6FM2`|Jk!pjh3w>|BJoZhP4l1G7Ggpy3v`)U9hF zfk%iT#o{KS;YJ_Fy$k;zDG?vrqmUI6-5Z*#P2OcMk<^X02 zy&?vK$A0q8V2}#l1icMB0}B_AgN%`Z9KM0$8-;h46z^_S6do0rZDGd~I*W!4fBNsS z$?(XtXxRThp)fn_Fw7!fQ_L)v)oX$P_f_;p6Dp|9$J^ z%USR&ZnWI+XfT5zz#>Pp51T>)kCYiL3VZ_({>nr|gEkZfzU+aE$B1XHix|U1=;VP1+eDJ;WUL%`qzu^b)hjLZ z=-p4@5nyP9|9ixL&nbatYX3J0Fk9%ERww@lJ_b*^1?_N*FZ>I^&)*ed8fsVFeQl<>d!gcy8WIG#fMCrA9hsV##y zzKT{E9!rNF>kBi7>lEeDatcESub~Jp151J7z#_vVoEOavATX6VDiKt-asbKfRgj4vh(P|ot%8=p>SU;RSXfwu ziC9=V%*6PF_>@=-q6XNcj?lo*DD=NLTVtJn-B&Dg{QgxHkW3@?bO zu?ex@KWq%HFeC9bL(wbNw$2XDwlr8+#01oMgb(TpuzA@exLD2=K^OO!y}1sT_Peg&fB5) z*}zO+7JK*SqBK*MW?{1WtUvx;mj)|06Sneo@AG}w$cpWIn0*-c?c=jmrYn=_#Yb8< z30C6C)N^7bnp8yUuhy1l?XE8eBqR{}R;nx;=x zeD4P{+C=A^bB(?dYBb^faa&A#Z+TU9_|#>6)~l?w)*bTP0fa*xJp*X_H4*ZrLb9Q)bk_6*dXdA%DK6|po^g-esSvc}%qP>*k)glf8OcqfYce#IS;TaTfF3lzMvLr}+Y!d^L04IR97TCix4PxG}SA>mXLg zeJ~S((dpv}q_Z}EgmaHA6O-wz_E*Gut~vaGB;PUBrD$*UN(a_E9}6Gb^s)S<3@6Xh zy{WfFoDOR@l-P;YHz|3KFZW)*rRqLBsHKv&n8cw%rS&1UU^?qXm8T?;B0h_8eRb;u znCWP&)hD)kOX6mlcNj-KuRjuBQeSz)X^EOQ@A$0hN4N37mn||Wdh}->B&qoI_+Bg@ zWcgX8H{$$x)bvI6xYW6T8`vgs1^emZg0{f#!r?-abm|fDxpmySo(+k$ATJ~KuIofqE8bq z*IC4l$-Nyg9_kJPibK2=FV!rz9thI5=K4uRxQqs#e_bGJR_ol5QSM9BA}4I}a^Dop zXndNl&+e}I*x1zQUZu-LnEheBr@xRBcP#Ejn)}}6hKhQXPjsQazbI>nS3D=ZZz>F( zK6Txk#(Qw6yfv3gLV?9zaV_K>PDhSGtAE4(PGjfL&l<^#{bYss`aPp^p}Ai?YLCE7 znYqZ=#YhQ}#1_Yv>eQ^r_%EN5yO|&5rBXfG8K~Lv;c5@)&tVwTkn^Oa?f=N*>Gz#+ zVrQ6>V3E@pN33bqu#T(DltEQg&BI6d9#80-tL3uP4iUW$_dIPk3>>-a8X}Lykr!aI zs549@;(Jv+&N>`GDa6s6=yUDH=;C~XL^5OB+V6W<{ttX;*Zy+w-Hp(`?~~E}W*v8n zpQweXWo)w{z=9={&Ezt|K(7&PotS!cr{mPZ=Hs$6_=2F#3` zgh?BaK3O0AY{Yxk;ZK9!Ji|ut~?;|MpbFDfigh z<}s!57@Na09=0tAQTqDjQ^$`-24k+NJ!%VL9?POWqOMH?JH|#z*iJZqz4nD}ff>Ch zqZFJ(SEf?cuD1gA#D?=jKY|SsslHL9icDN0+w2rlKQf;g)mkf>*QabYYa&)<)lJfx zZFPO)7!n^6!w~b+_vW8fi6-r{uU_AzZ@=Q&GpT*+y>vFMAZwkYy!doTH9005%uq7L z3h*7h?K!U%&r=epl+(9fvV@yw^@OE$hw+ABWZ=fI%Eb55GuoufE?6d>x+aH<6J>kY z4_0T!I!`a_sS|UEY;)!~w~2pP9(-f@^MbZyQ3^lr4Y#!-%G_&7 zDqE!DpY5x}e^uhiiyp8H=7<8yPp_Bvo@rj`0k z$`apTP&vQpXFs!sT$NZmr_M|iR1r!MMNx71-6zFFbH`Yf5`)K!9 zzoJQe$9r_yk3~W~yuaI9`c`J@(+K;!c}-EaWZ7c|*bPsU9k)q>&urDEZiAUIDn+&z zk7rD`zludL7bx}asnhjG6m{b3jH^a}zyHP;myWc0M8f8c+Qr)1V)~@T#8-}lsehFV zyxV78#IJGbE^`aI^H4d>eD{45U_x(L8dzmW`G-)o;J~Q4?*2Pp!?ZvpLNGI}%_A!8 z9F9E{*f_TzW%+pSW08{ozGAq6_{*&|{$@Yr;c z_K8A?7}?`QpE59H5dSm_qHR^m9My|BYurQMlIPvx&yn29+(?rC@DT%;VYPH-k74*( zJtE0!Tttgu`Yc9DHY}nT%vmB)bEEdl@6B6(8v~UEN%gLBYGEaX&OV~fr>W9&&$wtI zJ$U&7b~x_ z4iIa5M#)W%Ii4JlPQYAt@HWPTr*_}}PFPVI9mRLSq<+-l>)GBLw%lNbIa-0(mnT$8 zcRYu96C-}^=ff3(6(2R@OV!R3sj1NyEkZ12R{oegOv;s;SBMCWXH+?F%!l{!dAwQy zP0}6Jz>to~)Yg=+*9opH?}y9Y8lE=OJO9VHzVg7(Ssy3O^KRkQeJU`sD*n6F>e{Cq z?rrx!&eiLOEW^Emt@H*v6CWl^O|rArKCWEx(qUr#se(^TxT#Tbw%AAHVJ#~o?MvJI zvRALoqhFkJ;r7X7xYF{WDW+wbcRx{KTzFi9l1p3eOW9A)U1EHlKR3V(%cr6SPq_{< zvK#Yn8f*0izZrWxeKkC`vs88F;*20a9>Z}}46CWj&(?ms?az->qgnjPd!L;df1zwU zNF74`NItzON;3Xl-Q7$9(aoYS?lJbn)?6)R&d()ctQU>lwVC=4?76^<>Rp?$KDK1@ z%Pmq%^109aAN^5otoCfW0~$Jc4OvQjRbyWwZHK-3?yPxh^; zVDkVwN9dc5>enw0y5ElQU8kC_N)%TSre-t@g~i>;8Y{d^|GE1UiES2`DaYJWON%p@ zl71lnT75W9)NXib+b8@+&p*t9C9Ho?$=>z8?(yW=m@BeFI~=o%`v*tTu-ueDy9X}=`kd+?{7!QEBZ_issN=?2Aq z1lyU0fBj1hN(X)xUt1QgrhU=Num3)C`-#QtykXqYNPnII+8zn7#W67RN_<~;a)!xI z_Clm7iHqR}zGyM8H_tmJCNB;ugvgJDUyZ4cHpR=2HEC{MG*sr{^PkELbG%-2*yop) z-pnVewm^L?waR`7-%g3JKhDs_odied{i4=jdu~4YOUYWZAj3GV5HQmd&5*xQl19z_ z3;Uy*$}N`96N)YkdP540y#KCyWcmb=Tr*wr-mG8KYIj;l>g*~BF=AALHg0&X0-Ky$O@Av zuT`&CdGkpfzcz7a+E^7|?L;JXM{vWuN6wKLqxQUMv!ER5yPw;0yDL*+r^yRQs6(B@ z()ce}FLB?PU6jM)(vjJHp3=bO>s`L-+ntBgIAx4c;zaZ zE#A6J8@l?-yo>+FV9x3zwa(-f>d>BkhW?@_^h!Ph&E#ia=hcE4wL!s$Nn;DeRL{?S zDf4~vQlUNm_1g|E&ybz#QGRdiKibUqNV(jkv>O{4H0jl2=^*^TXSeBbyhPY_D(mAJ z6DKW+&!0Ol80%|%{Lz+o%tUvawf{AV`OII#uKgz~OqH|R+~@qkjKvjs7uthsGvsw` zU5s*;L{Hdv$68$;U-5lTg!iVmI@r`Q1Fw>&^xy2xvdMen9xV0ap~-C{=8J~CqcleL zM$;F*i8bbBMseS9#*&&2!J7(Vd~D|Q(6XK)M?9q9?OG>?flDQriBdBg_z=sIW%l#S z2=6ZvT^d|l%haeYvMP~@$V2KgA^7Yo)d|6-PxsZnrJNT?2`s^_y7?gZ;qLHv)<=C$ zKWcL9#C>`c8#&1PMyvjEq(gCmpfWLmmE)m&U(~_y9i{7Z%?3kY#xXQnE>KD~g^4si zO7?>k1^XRo3#Qk`7aZl!sNhRa$*fS1rED7wXkjeu@uTM@Hp+{Qg{5MiekaI%&4tac zRrhS?{M#%#Dg8jawp>M)Q(UZ@#Lt$3;*@7~uyN;C$W58glcBaL+vmIv+&@oyEcD{4 z>wi!RAbdsa==JqC>r!}{goiziyj^A2=gnC@%IcsT#VU*B(uVxeO~T8wTN`(EeV)F1yIJ0tj z-FNfHlaNSzcfMJtYcUQv+;66B^B3QlSxHdc@z!(nR$;0gx~+HZT0oxeXt#*H;LW#5 zMmw?-zk64m2p+m_)$k2G(A@8QapCkO4bOSqHR+oD@GKaZf=7MrfR{vIGCn1gZ*Gpo& zmHH8U-=gb^jO8}pPLawP?Uxb`n_kx*)9O)9c=l<>ctGff#&X4+9uE$glVy71;NISE z+o-Sp0xCsQ%Re;mgf^KK{n$B1K)3F313L|aR|Z-Zr<7?q#g;n zE5-fntzUBWSEg<6J_wc^Ms;L2r82c6jXtdg)x6;BsV2uB zJPXle$=fSIXXg}5Jb0_}1xxx*xpgqklN-J&FLu)oH=#>BzpTcyHK6g5_>dCXk zX)#Cds~ozIgT}Ktrdl4KWq5TxROjBP^MZ29QK*uwc{`p;-+9>??aKM5j9lk9oj#FO zeO%T!3XMs(>c5EnoUCoKlSG(>r%OR&eUesyTY`1*517$NSZ{Axe8n71X5ifXIlu+) zhV(-x+&>N&sq2c_X)Z$c4p;xEa{l}6EdDgcEGSSzr=>>~^DY1K-n|$87YG>a`X9+1 znZ1fWh;j}(lZG>2=p|`WPjvlx!cgDi+O4%mY2Qm+Tfhuw)Ro7}x18?&&`9zL;k1;& zmb-r1;5m_}pXB7Gp0bFs+|K0Pk74Z=TYJ-{lbN-+@!!;xy_U>Z<=aMurKOhFug(N+ z8(OD#?XZi;={(z1RK4~^L;OtTe)yBGJoE08!Yze~sO;9FdDs)9dyyi7pd_APzw)-N zvz2?leSEOAD0K$eQwbp~!oTs;Xc-U`{bnR<}!h3AX?rFbf-@6tr0q%Kcvh7c5_(>Dr}8@GX(vTxUh6lQKLdmsExcY4#+M?1|h zBvPH8Z)-}!Tx!MZQkF~KbE~u&oB8I`&Ys-rgGqCGj~=;4p09o$XS_6hU++0@Rz!&J z8FMc7^sKqz)W`$NjeRzS0Zx{b;iik&Wh7=EM~7gBDgQSA&YyuP3?*UZU%B}e+@&C2 zyA-a!rw1d#M%=Q(&-_lUFUj4sb952B_tbc!D!B9XQvxB9RIeYV(ge)p*9vHCO4kNl zv3l-pNasF!mOj06SN$HJWLEAiE*Z(}IObHUjyN!*NPpP-GngRyuA|^hyyk(RBmP{m z&wqz)e@k>mlQK*;{W2zsB%(P*T0Cz(eeJg3w%eWreN@FfJ!m-SU%B*9b5Z9)>;Aq3HDfu5EU=Mxo8DJ=Vl+ zuQJ@3MRQh-1ZstQLb>}ks_SC}RRX<{X5Z+2(kdgboLx+R9uYB{t;FnQSS%T3*B3MP zD>~p>RaYF2WFDW#-b{@ub`TBo%6P4KjugSuS77EHZ%j3N=!;L@jWl&V7NJxIW(^cq zzw0Dv^&2+P3WutV_aE0Ye6boC9Z%qK>s>Vbqven^+RZ5Vhe{XEc6|J?sJ&O20?ee#taeJ4)Dpj1krvETpdn|VG*XBi z@}#;G_Chr>#87BfC8Cqhd(GW#=@>Gr!<3f37;QAu|7c;+0KR z#omLLVii_5#%Z<1v2*pD6&{vg-wr(3897wn{zdDKi>d#^7W`Viu+?XpjyZ~)?V<9~ zop~MaLTB01*K_7~7)0B=2qn)op1Z7YgEbJ$ypZ{{Aj+&vZ1UjV^(d(+bq?XQwIAIy z|DKRY#cZ_e{#7{RR9c&)`;9A7@kLkp`GS|#$Po#sK~=dP~j$rfyqnJtGrcFi$hM*b3eq<^eZ;M_!B3;nF~yuxm5bd1YW zmhYlbnjiYME6r*1xQa7nw?upReom1Jo z@o#R|*p}2Q>~EBA{nih!wRslpx}X2}D*#DwvfCV9@967H+gako?|nM=bqDXvMC!8dOnn_*(h9}uEG1#y{%Ys>88J#=ZOA2 zz5voXTg`*2b?)h-F?-$h&dIM4=QRD7>f1b(I15Bt1%q3@=qpH8tmz#}Y;F6@RQLqw zdg7>ch@I&c5lUPd^p)SVX|FsjXswgqj8s@ovYpa-T6l_DKP<;3LzJQo2K6?G+QYfp z2&;tpHJzP4wZ9jZ+V)C%yIrvL_xauDA3m5mwcIx}Kcb`Y7F<34!ml>!A@+-2Z921# zjNXo|P>H@!FPP(>v{2$s3+11H%T5)pc3_6D8uv*}pwjjIizDxh-|@w;u`)7cN99E@ z$Qh%#u_{;sNu@w8j zukLA>ISooO-9M*fUKgEy7}j4vzf+&b`_uHnEQ9IN z4?=BHtgiD3x_5nCc`7=S3)Rk?y06D@Wu+|>r=(mw;&1DYQ@W5}*lR}DKgI%!Uc+pW z)R7`sX7ZCGzp|$WU)FNZP<&^NF2^!>1ZLt!Ro%bTL^1S9El%G_TFvy^kond2O4{NY zZGTN7W;o)OsI&d0p1;>P_UT@4yY4(=8fytX{Zet+tK(&7j_1J|Eym}a1~)ESX8$#& zF?k=pagQ)5rSa?i%Tnv}#XC8z8>Zz&U}nMN3t!YlHix@IXVxNf%GM)0Ge4gH_B8qV zUADVI4fz!zH#gesZpOSi&p|4hfhQgl)%eLiLPBz#;Lf%TwpfhW%>mD;bLMg;XV-~K z+d4!`Hu+{%-DbH)0*yCc%}vt|4y%MA71m*BMEhfA^8DG_v$I2s|Mpq#ac-)SDGlL` zdNT66>wEe=yZeuBw!P2&NJpcV_A=YRrJk^wtA>g#bGI%Cv2gP^Pu>uhNW1yKlkr{p z$_-Nfe-S)ker>NF4n*r%Pih~u**v5`)KHp>xt?H5##K7^(Z(=J>G8RQ-Vb4|kB%wn zoYgtXSJiEeJ)UrwD_Bfy$NqX@ar>ub%5v4b#P@fH>`DVn!|r)^ZhBIPXr8kotfQhx z;T68-d;7-)qSw*fGOQ}0YkLtgq>Ox9V5Z?q3^#?(=y`ho+a9{*R0Q5SWV;7F8bFJssbp5pupaW|1H(iGyNuT!I;tFxs2m%{bx=Dz@w9-P2Q*`AvdgJ1sE zhprV%{d|Lc!4fu~?9dkv0*f->p=DtSzd!CgA-n11I$QhpN|sa#Qugu*~t@B=gE z{&2oDD)%lWmf4Rs#~q)=f1ESPX1CAY_9_|WD&aSN(;LzfxVLB)_d9ItNJk<(VVVX-MlGN zpIu6eu1Y@HG3@&x^Wlu#Rk{x}l<`+STbC>Kwz&jm4p&|-FI~XGU*L+3HLD9wDbTAtU}+Y0sE>)Y zE8eysnW@LEy!n7lE5y(HY!bORtJW`7o;9O;cZvS|4zq9-pqMiGBbbsu8{8pIu|Ca3 zcEMt0xyI$JR+Gco+V`EYJ3Z`8U%rExY58lqQKwhfnTq5Y!|P@yJRAsS0uLF?1EQ(* z(w=26)tnv^Qoin7dtelp&S8;WltG>PTVSA)*fz73Ael8tYg+T1=7@vnkioI$P2yHc z*<29@UMX5ov+@PL&!bjzSF6;nJfbQu*}g>@7TWmDSMO`KOn&QX(q)mOYs#t3424=B zyY#7~+o^06T_Y^mo|GDi%xVcBmc#eBp;Z}c9*3;t<%Q|>*k;_F8i}s$ZSvWCpYIG- zs@DEP@o(Jn+vQmTaVOMHg`Clf6EQiw*US^0T_dQtu6XWRr-SI5DF7GjTsZ`@SaZf< z{{1Ss1Zos%(Q9wjD`CirGtaV8z8m&w^$RT%kmi1C%y82g(*HyKhNc?Npj3g7dLJu`xxYXO$r z&Pwdd(z(QE{ff=Y=T%xo5kYLxKhg2l_<^BlreA%n@9D}GyMTvJ|9vu**vwI0wNw$9 zeY2Oj`YpZ3KulEoY2V|B%#eL^_3(wVj~VQ-^QH|lZDN$ddPK5D1E(#R7$zNPy{Lb8 zKRuva;bR;bOg%F1ZsK2l3uX+%C>(+V);B7Ab{8faW{5SOKf=k;xmZJyVJ{wd&~=9) zSyxfr*Yan;`+>cp*M$a$jLI095sU5V5B*)-w`A{6{62T3U@z$k2kFbn`{6isQ=>7iOqc8L70?D12 zBDpr&5Vz6lYRaltH)C^tU)J)ueCdoIJ!ACeCku{5U!QO;KB)6sn%FfDA10j2c+5+f ze*U9PKw03tF-w^Gj6@dI9GLlL>a`+Vw&|4Fq^?`8eC^x!OV;NI;yg03rWo#twYSaR z-x!{B;KHhD``h95xixIbqN+m`rUrz(rJ1D}MFZyHl*P$_kI)!}O3amw10q2t%Swg=PNlS_-vcG*dMoO=?# zrAWkLr5LW*&HOPY+r9J^%%mBPT|J|0%kbt4zMF=a%=SAxJ+n+JUlUGsXE}4lHJSqt z3xAed?kh4k1;+k3vgA;7{FP~qSW%Xdh16tC}CMyK4uQlVx})C=Vnu6-8#)#cAO z1fHk9Xd>ymVEM(k(z?W_^S!kKuQ>a-_WS3j$mvXp20~naS!vFGjI_NS|K#nd*9JL` ztA=yb_%(htrlxT?Ha=Bg=9YT*IUnmS9j81w`o&}HXtoVs;X9F_9V**JJ1^oy2r{s~p($&dam@R6eCm9hMorLTU-}Hv z2fi=6+NQ&Y2RgMrBz3$7GavqOJ}p#T+F$EfWl%mko?^Q&I7HCDSZ%->hmKy8xf*MM}%sKaqzugPR4&Pc772T_V^fY*|>Z|9e^U)*Ht`P#m!dJ+68@Xe+ zcf2FEI6qEiCWegLM1IdCY88lSC8qR|+xzxh7r$2gj&QHn!2-19Y-yp>a zu6GXVxA$C+8=}k&`4eofU%mK(mO8NDK&DOjK>cr>FkJ^sA^RXKrUwM0;nDo4x+Qh0CLc4s8WZA{ftIuC5u?U=WXgrvBjW zPQ|cUgXRAHCvDD^zpP;9DHXPd6-hxS@ss<#68-5sVgVl&@|0PBQWtH^{A6utZSWqw z{P$wE)Ghhw`F&^i%Ha=+_=hUC zb>lMc`MUVpgx*yQP(R|iU}kt_NokCOYhsLB)rDb+-!D}1Z>&PTgyMK+HsM}Qdt1ZM z9LX|$8N=Xov09kLt;=90-1ut-u`y&Ksx^VlQ&-nU*jgv zRc_5vEv=ntYUXReFWLk%7B=gLoWU1w<;$IoH)V1?AMjj5w2pTBro&)>T;ou>iZCl% zO^eC3jXQLNbFl_@qVFq`x*bIYJtz+e;1kkkVu`X#F}Tk7F`6+$Xy>mwx#?+1Q4^Nu zJTJQ26~c81lE+jW7}2ERKYO$Il>0i#fmQjfD3O93mPiLfs<+9Xwde#gt-dod-iW|m z&CX}=t(Bwn8_Xi`PVMqNX884Q>+;Us`I}CX6NZf)%cCFS5}h7dQf6vVl7+P08o?^< zDtbqI>#OjeXxVpC^vI_rC4X`>p3{BOx~lqfoWb1!H~f)^G2_+`#obrn`hgi3&LI;O#1PpRnA#Tq`j)zQ+rlxt<~d7^3`X!kBw!;xwK=f`^{dy`OOrqt;*6h-@Oz|A{H_H^gPoe2G#L1ja?;C{YAX@Vu*D{ zZ=?#&wUkR-*P0IlGewP8C)B7lzc0V-o|Q1Da%?4L3~Y*2s#kpfep`RLd2%eOON35j zQiG^OtVGj|>$rnlpY{RIC%rnk$m%gn({h&^<#Ytg&qX`-Ya-=@r^a~P>ck)EbXWb6 z|5P#>FJS8Z+ddx5OkzbO_l@Ildg;>j+*rNwF#3h~M97%qaV$sO!;l*eb!P_u7+!aB zxKu6>94JjanfLu-=?|T@Be`2ml{<#5IJM$FbyUE5W<-kPTr8_K+GzdGx! z;?LVh)a;nYc}W<|?DsyP=b-$Qc)ICSBKW&x`N+Ozhk#AGtMgMOU%+#a)&*-%l^ zG4Gnz@l_|kWMRNeJxx!%hL2o{LW#j+`p4PVRnP`KM+h zo8)huV6OG0d7kzuUv}laGlfUC*f;g~T;J62GI^|s)l0eRlX!0PG#q>=G#V>4Rou6#N@$1t3 zlkq5$$ld3NGr&wlSSQ_){dTe1_rkyjsYVvXu1aoa+l_oM9<;?aB%`*BjKL|~dicc6 z(&Khj3}AK}BIn{R+P(9h_t?-n{ib-g3GaC8c2j4;$F~cSlj==u zaU#y~y(wx(U`8KzvaqI#FXBz=e9hArjvFk-f}aW}QZ$St^_8A92t*ea1&|0`TB)J& z{3q+(_@DQ_%oBW;kdHA?zFl4z-RXAtu}gv`B6+Bd z>BX{`@>sT|(b~~JtAV?^?9BJ_S*>p!e~@Micr#K|PT4dpRQXYE-}2^&Gez-zU)_W! zD&o_FJ-7D2Oy|zmmw#=K1`J&ig)GXXHS@Jz1;xxIO&(qGZo#enwQs zUrNbSd7p2#%zh=L&b>jR%Xn{-$|oz?V$N3B3x z&R-ewOS1D|#=UFlTk#W))Yd>-%-a>q^Bb{Q0gUHa4z?qeSV$e;XT2XUYp~_ zds^;U*ecoY31)ErkWp33Hd23Q(BojLSf{A3t+G;*kIlWPsA$8{I-rvB%36 zrP1TBZi(1I(>>BhtnE>@rNT0npIOMub^1JPq&?s3a~4~=y{j43&&j#jZ$%{#F3Y%_ zGw{6cl)}GBxrPd%S=Y(!N7u?Ce^q3qytymb{d|L6^#$8_$3vDwwOX&u#L9EKr%A?l z4E65w%_5P-zVE;{yto=h!X+tv65<-s+%uM<0#6K})K;B(#vrJq{WsxX=8EZ^ThWdj zw`(R@gKp4vG~g(`%_xmjc~apt?fEG>xRj{OXffuoOvEvpM7G=k=iOUdc-qOzZ@P~i zc)Y)02ZNd2;Yi+pWr_uPze~77w8JG_zm&3?gw#Gv9OIr3Z0)xcap0NADtOv_7}*&d zcwuMpjg&&VOH*!GY|=}CSgysRDlPhtMKw41?qX6+2djB(kF4bJnbNuqeu{iO(xXal zj?c3~DQ=3!m)=id`wijR)DmGYkIPCD9_%?+6`wV*)Qjv_^?PRZQ7@9yjqQre_911* znd8KlI?ZHPJhR_@l2>{tp71rIcdRw%*9CG)@}MWQBVLwP%0%%BFD({hmE0fk8hW0) zQ+oCZg6h7<_t|Qlc|+53&6`}S|8V_&cC`4y@MBn7-R)mi*ppg=W^4C~z3XP3?ZaLN z{V|dbGR_z>>a6+sZSz7h#Tjv{A8tB-98|C6j6XAc1eZx%T6)EA`Rdsht<>{Q@g*Kd zq~ElckRE#Y*lL4bXg6x7St(63udiwqJLc#9@Ap(h=dTW|t?Aq!Dt&0qpSKsUV9Q0) zw-|^iNLgJ?B~Ku*EL|CJED-0iP+tx`*I)nO+O3i&g2h<3a8ukBniY)Ie;xI^V{3cs z-liKtLgjZaPOn_7>^i+RN9>hI3$T?$Xd!TWW-QtWyZ zO~y64s|y8SM*X2JjRb=LuiNHh_cCSekFw?798yvlA>HkwUKA@=<)^uM;wqfv+b+iZ zxIh>3Ah}HL_RkJK{YSrvLp6@hYOlS}qBZyYf{C167JK+F+k!@h@g9-v((P&!w<&Cw z>#nZsERtei=7a9Fur-M}#vaqK3l7e#|6;vpWFO1@^RIsAN*I|i!8Et8%HNK!RP}<# zgv6A5Y7B)58CSa_8o&Q~cJx~IMzy$uZ0IWg z`4#i@iH~4Lm~FYdiE2Hyp8F%ml*-HJhF!KM_pM&C{VwjYEcy^R#k!ajbJ1e;2y1M` z_tMulMq;0td~ZzZrc6KAq!4BI9B{{2%bJaV)im&@(Gu;d^OV-47Nh%4Aiuoq*<7M92 zh|NuW7t9=+uKSk=Na3{U?n)c>=!nJnw&)P*-1$(}xqqnc)%y5x&+S0F*ZKkhT3ami zDc!p<8#?;&pXjt!s9n4JV$R}z*2@~`c)|R{GfyngQzGY_W@daarTHH|icjk{pCxc# z_%s)zqSuztVsauO{eJPJ*KyX!Q_o*_{Dm_gfAy?jI7j5D?4tb)Ptwo-ig!R)WALTE zy9*0*uYWEn(bSCI)n0qT;yKwM_Eu9xq>sC`87ZMj- zG+bxAz>IdQ!~LlBy_eVmq4$WiZGBb>rRtXL3_f5F`Q?vf@$T+s#y%2*R4(1bf5LDs zLd8x);JfSkyG|;_TgI686u=N55au3$60;6qjoz|KZQeK62X4?t7>T zC+%l2;~B`A^spz7?5F=>$2d)&oLlk5wvQk8<~Ap;N3Y5r;^7tsjrZy|645RxrR_R3bGV~ZpShpZ`tNM-%KuW$eV>-^5| z&@!LT=UMLix~}_vp6jNIy6{$=J-1EZk zo?MYZ`}V(k^tiQVWYsqZ%zbRmfO;FxY1;CVJ1%Y1xz1Okzn%Tbg8xk%@>tO^{Rhr} zW6&`xp8fLk<6keS_ifqArFH+)vS`Ll{pu`#W%o;lEZ_37rO%&w_1vqLEZpj-!wxy8 z+4)nu)H!7EF(2A?;-m}T%b2mM+&x8jS?L4l|;nUw8cKXW` zPwKz(g0KJS{m|vlcHjEnXTJEYc-qv3YtCGE)AB>MY1RMC2lw0Y>kB@dd}HmQe;w0n z*a)!YH#OeVV}`AJYxHIJ52<_6k~bC( z{q)dHR=v>S@>MsD-lY4S6HmPT`Y8`~cPg}_ie@WuH%*;Fk;OuNA5mj;Z-9h?EB1%yAR%RR^Jor{yzWio4&hrb?5(e z-}b!ve_#IW!H0I}a>Pkzd(4eLOgV1!mYXkmdB8m#j+xY_{wc?|+GEOBuNJ?4$`6Ac z81wSF4+hT}Hf`96-}M{xz>J$8t9iK9qgV8~_KqK$HM{VjHYdMUI$*bpKK^>o2PThe zG_!91-)C>L_r7)JbXl?A%;rZwbo;Cq9z1`-C9l@bf_`n|5qsU*>cDr_{k`kR`7?K) z-0iidFP!s0(ehsp9ks>GvE}oYT($Dc7wg^g%$0Tad%DBA&#wF6%?tm1VR+psmHVId z*$IO;f3@jn#gF`WY5T4tUweDgX-5sXb>de~oOizLq?d|`r*?9HwcNWjw?){g4_;CMPrFEK*xO4O)SHAV~m!~!x(Ww17 zn?CZ`g4Mqqd--2my}sQEt@b{C#_P8~|8v8s_4eNKi+U}7Ja}oxy}thN!2i~a?smYk zgP%P7`*o)+n6}Ty=LB_L&}8gZ-45t{*7bi}a@Nw<=TF*w$6e0q*mKMom+XG}>_eI# zGN$Cl@i#r$Vb$|3ulQxeBZHT0^hD=LZ`bNQe$7iCt!_WI%i&iXv1!ZB5A62c^2UcA zwO!fB&G#9-#r|J^vdh`MpEz&m;^K~HJiJ@=ekXX$%$7s1yzBl?emJ>Lk43}xX!4(C z<8MEA&O=@D`)m&%SEj z4x3zCb61_IFLrtA-4=aoUGU#Mo-Dqn^H!f7HfBM`MRPj#_~!SuxAfls?DVRaG(2YH zw;z68)wo;JfzN)|r}EDUbDx`d=1zTEpZ)n;PxL#Z%SCIpyL;O;o8CP2{;?Yk-|6gT z+pIe5r2#WnzP;j^{+G4yas78w7rZg;)8BVkc*EOmS6y1WQ?2>Wmfbt;wH@y||J&XJ z9v^mE*VPwvOBAU7YFU;}?E8 zta;yBr>wf>yh|V0?&aA#-Lc0OMQ0yg`dIOu-wpY5*(>`VbmH?TAJg;mH##&zWw0Q5yg8<>#+SjodzE~tK0N% z1`S-g#q;%k{OQ`acE7(QcVx zPfu_3{KSJcz3KY*YEOP?*fIT94;j>KW}Pv&&hC2DIafB>=!zYt{Q8^6jD5TG^<&RG z@V~>REPeChliSxCKK!rpefJw!f3sshKYZ;D`^{T8b3{q68xI;fXhP*JCvRN;!>8MS zHtWX&4?KMLNsCTuzxI^tyWhF`*YSN{-|ed6_LEQC>&CzL|FY!kGya+M!;@Q|-|N>- zueEsQvCq<}44 z;wKNBT<3zxt*8-;nSENxeGRG(*?ic}hqw80#o+HxKk9_~mksW6>#%dSX?oAB&zg>X?}Gd0w!dz_ zE4S*l=+bf5c3HXp@Ui#xJa_Nu&A+JiRD+wod2g$?1}*M3^XZj2j@{wUp)IF%X!+Ba z?iU}upiaFtwf=1P$*rTucD&)Uxt%7Dom%hmt~*_JQbqeukNW0@(vL@VKW@OWhs++l zcEKml9eZf2%MN>H%|8Q%?)5d@zkYhsro(<((X;cG zbEnq!n7LJ*>+G`qgO3l{b;=HtuerO!FP;7=x#PUM4%+ULj|QzAx#;+9pXsw;(AJ|? zE_i0l2d$<9d0_Oe3$h*_W5PpbK4)k+qs>7UNi2r z_ja0DzyHGrob}0~T6gVu{f?Ihs;_Ky)IVPiUfy@}#mkPZv+4E!Tz~VMPaia=){riB zpMGt`rvK~x+NZsT&3wF3-N~IksF}6XZN0MS<<}MyGuva9C6F3GjDsmrgh`; z<6bzS-5Em;SUL9e)m6K9Ui;g72OW9B!p)ZNwS4}~%l7W>F)u$h>cSr$f4lp3FZONx z%!gw-mN)t5vB7(eKcw#3euE#L^ZWP<7rr`RM)MJiDqkr(b*Eik`u4shPmjN7mo1+@ zVP(5hwtVU5=O0}6_tq!0_hrS9MN=ApWk<^d*!mz=MH+q zW7d8#t>o5^cRPCEMO*gi{l%OWPd?xO(GCZmbo{FWYMt8S$Q#-Y0DOUeeS?U zop+hnV{zSSojVS_dj9y9m3w#GZsNI5ZvXaHotodfQID=Woj$I`8&^NHdf3|456v2K z+>h;!xvFU0i$i|CV#_ds;+?O&_u{8p59)X64dd&Synbz?la9To{z0QEc5ia`**|=+ zwsm>K;Xke4e~W|WFWdO}i;Iq%`d8Ohzx`QxMZckc4*L3o$7;@f>DWh}Ieh=~T71#( zi4{9f#+f%R{jkMDJ;rw1cgePI{j}8!uN{5o&ri*HuGTZRJb(W5Q--W;QS(Q$ zfw%rM|IWS_?6hCE%NOms{kxa^@!{pSUAcGZ8SPKM`p(W9{dM-AAO16G{=W^Q3%u3# zub&6jySmNyzyI;}6Vq4DpRnxRcH3XF>+eN-EqP^Gzlk+NYwdJO$3tcv+U(>5e;oDx ziBAuFz4l`RYrXgQ-s^ka{rLPF_Z_i*UB^>z-L}Kgt)@LNWs}99uUv9bi!DE`{%r9- zy^kMvV#hWemK~BB!8fH79)029U4AS-{-T#(s41TN-|g-gvERn;J$mVW4X=Kq(Uduh z54-YzgZ?w;ovoWxo-nWXk~2%ctGf5S9e*0N#ZMj1UwhKHi{?yy=$SgVzPY&JEpvAp zG41GucYo1lm(gvv{Hx&upWQj?fF>6l_Q&E?9y3Uh;mmiw_Hh{<(dxcKP-zd!lftUbQ1F7H`Yy61z#|9E2hzEdBcyZYFKXEx&Dhq zJsN&<{VT&7uXttlzlWFpu*qJ{-+A++OV2#@m^cLc}3HKy?%b>r>)*ULM8z4LKynET%zn@xt-QS;&7~Juu z6?@;+?U~9Q7Vh1#MT-;LeZS53JG^+sfNQUQeo38USO49s!TqhjS=4vr`$s=Hx$c)u zXa6#7w}bB5XOogq|4wY(WBNJQMt0^ENE&H;=gw03xYqevuZw{*SV7&u&{&m81Wr+{2UGnp$wYzk$>3-XPAH8ME9s3Sn z@$oKmXI+2clRJG}T6*c;@10WdWitoHSHISNc)y8zMY;WP=pODMn7ypix7!_dbBlI$ z9&9vc_s1`o-lurr>C5KTZd>=OiaophaCrX?$6bAG{TVHH=rQS;5gVN~?3)Sow|Mvd zHLp)E>Q-5A%-Yp`FKfB(%bE^jW(_@LX}u2pht;02=>O1S)+~@W_9lrc#%IumKS~l$S z)feOT?=xuUFZz4To7Z<5+4`T6-`#oryyeY?Ogy6b$a6ke`s1rJ>Ye%g`}@^&ey-Q4 zqfc+w>-EDH{CAi3YX*I_{P|Yh&g$~$xI6l7+UM10J5O1>Zt?ajm+w)3TBqXgkGgeM zNyk;c3_kzE0bT2F)8~c{Zfo<;2^%kcHHG{OI!@p7;90dl{QCFbk2rJebyGUt`T8BF zY|>%=`S)%7P4{mOey3ioNB&s*?hYS3*6Hd$u@ z)#S^OU%okT<;wYUpE>`v!R=rC>W;3>Hag(cnLQ4C@ZZ-yT~cS;?tlMLcb9j5SlW1p z1_M9-bmo%t_Ncl3z+Yb3tr-yV1WT zPkqt2{p=c#`QM65YbJlSc7CgEZ~ow@V~?m+I-uw0Pu_WW=L4!1@AmS%VgKIO@2CFd z-5Tuu%#0qp?Dg-MfoorRWYt!)ADH{&xf?gEnswEzD<6MzMA`7I2OPOkgQ{)=Cav1& z+q<{lW#7BT-qy42C3ieE`HvezdE&XL7fpQO;%O(3eQLz;8`c%i-sPQ#Hg9mn zgx9Nw|91Bo%XZv*cGr)Zj~RACld=8&IOyBMp8j*<)pu+>tk=S^GoO3!k#(b**MIfF zzUTeew%^jH=3Fp!>RVf=R@M6Pt$+S^^o`ZE8`Zt-tG&)XCxf>CF-z}%aqv$UZoc_D z4Gw#9d?ETJpJ@(#cmrr}W z`rCp_Dw1B6!nswa5ult|>V##rhzut47w^zvEB&q2Aezhh22zuia06 z<<7cozj>n*E}3`xS#MR1+WFtrs(m%eXwe*M;;pPhN`U1jxen>%U0H^w%9rKrR4 zL+kE->Ov;~f4u630X=H>KIP9f{~3G8VefA@>GNxQd(4gBG`seO*ZPjRVe}a{-f-|{ zL!T~xr2EE$F1`BHYp$sO-}+C#aM85$FRpxe=&C~+_5S$N4}LrIjQ;oT(YWunv$sF) z^Tju{Z~oZlAB?|z>hc3`s2Kh5%ukMMwxCY=gTFpEyG^%CyFB;z;yY*m+-32?dLGmE z!xa^iI!);H(VgM(-X2=RMtf{R7Qj*kSIvs*4|- zvi5>`4Xa1DzN34$dEE!S+p_iZyKKGwhF#`gw(i+q7c_e0f?j_N8QFM5qvl8a_`;GS z228%W?+W+W4j`t7S?1|0a-S5aDw=bUZ>A=1-?>lFk@y`$3t@^AfJ3n6czxApg zd3i>~;BgIpn>cVn#o!|+|Ge$;y;_w$watz1zPDYsiFcR(dGnG>o;m-!ozMGh=`{=P zIibzmmNTy@y=U^Z4JUO!cb}_{e(=c`z8{ferEU6NKBHUn2RirOYS=98Rc%{?(z0dimd9Bxfn*W!_T;F_N>kIZg z=&VipUv=-ZN4@&Uh?XN>{Ap3!GwQbaVRq@8pPb!p`nFq~J*d;euXcQP%FFc{HLZE7 z!->b=_{zbD%=z#8zn^;LFBdfEcIm242leV-lbKd6fEL(leYt?_8*>cTk zJ^#7?)VrwHZ_i!2;Pn6WIpU4=T{dmC?%Ikz?_JVkM(tboyRUuM zb`}3?GxWKwri>YL^KJW2*njJr=C|;euV-Am)3}-xV(uiNv><^!5;zWL0%UO0Nc>Jx9Cu;puAFYdE;8M$fmO?K^Z zN~`S$E!^?7*B4wc_55jvzCG^VW3S!j;U^#Nc;BJBeRkAor)~1X0cY*ll?}FD^u!tU`#p1OuiY1H@pIEFM>MW!aZufn zU5~l>@e3Yle#O>zUUj;HPn)U)k*4 z6^&nA-KyK!{kk6V+NpzDj=FZc2E!W7|9OvHKDu$EQ-&<=+WGFQ=C^9O$?zMW8*swe zC;!%X_FIdVUi;Or^Y35t>ze)V9&paT&tKlHxO2rdzx3YQV-DT({)<1{rP)`P{c*r` zt6%tg-o>v}?~=?=ReatJ79@zoh?VL-uZc;DF8kKI_ecm+rDh*P}jL z{8yLOpU;|o#kkW~S6=Gq+L&*iIcbk2KfZC-Q_XsRe%qZ-G`O$2!E7Mo(yRFCkbY?}~`OeSRY`(?*tF9k; z(Xv7Rbegv3Gml@j$-K|^IsC8le?0x+X;qJId(Wa>H%(1?c*#kZben(wlbg+b<)?$* z_~HI#d%bO)e|qokVMEVu-}J(J))uWe|Cip+|JC5#-41)} z?WrTa+~Ua7H+$;d8=gC>$BDi7+W3sBdoEab`pdu1I_*D4Z}!WDJ;qlrs(J5^(PbSy z=JxSFAN4}B%^FX6@q;5qP3$%Pw_f!|)|z+05mycE{=^k?Hv0F~QyWbG&*Gyt`}^bj z`?Yy=<$aUSzy76pKaIGpqV9;lw*T*sr(E;UO{Z+~;>9Dk?pg2L)?4aDeV{xkl*cORHjtNxTLUthKF@cY-?)-7!bAFgTl#!k<^bl`Cp{b$CX11h!| zJ!Q)|M}Kqnfuoju+^g*Kvm}_U)7%>&*OOp9Z(T_1Fg=4(|QY|Nhhe<5k}*t24Z6>#gRlyX=jl zDt=$ue(0p$ix+v!#b4ZV?64NKs(v4F)<&}$tz7;@!^XYN>GaCPpVznh^qFy^I;yZfe$_U{DX6*cKYn*uRcEgz-HZs54`b+ z`9C}|diy=vHY}gp|DW*_8XfcKu!p~X;ueqj^OBkSbZEHvsN;A0=IEhE@6fzz|E7D+ z7}oTsk7|G1dgSEtE(dM+1hJ zYh8^3+d{@3nN>!_;rySJP=@2pKX{dm&RbN1*n;hsAiy!Pd+0nwq?pSka}FV4Kb zecSIw-G6+myH}08=E;4ZtUO`QrZ@L4d3BrjKI;C|ziVE6?BKh$E#GL+i^u(3w*N<$ zbzM01oGFbe4)}Aylfyf;I&_~7I}cvIa^=pS?%IA*gJ0G+t~+YGS~c~*xwg3Lkh+^~ z)9t}~HuIQ9GoLOyxpiraPUAlKyl#Wh-~F@0gBwqt^wiQjw!UlpZLNFU+qJm+T|b=u zV*AVY8h`Ke_rAZ~=vRv7UU9*w4y|^0;;zB}ENI!{lLL-B`l@#-KHYvq$B%dGd{^1+ zTmNs;nAzR;ob>f29Zwm)Ytil9ZuFQ2!@h4gdCy(nIIHKF>yA9^_X{@La&3#Rwr+Cn zphG)d{6MeDQG*5y-s-X$FD!hfd7qI@ZoKNIQD2uo|Nf6pTrlU>uj`yy-g5IJYu0~x zS%XHWt@-+vg`>KBaM;bK*7^1HZg*_5^x)C^mhDjMq`G}R9Og0G-tpYfXGhl`((=&X zT7Gxh6`kL{{<^Kp_N%k7Nvp43n$@6Thr9Mar_b7!+wL*z);3Faf2iyF`Kw-gd*0Xc zT6Q?M?~(Ukd&~M&#bwiOX@CF9x8{6s?dYFc4K6rQ%fr=<^+elqm5 zQ}^Czqg8z`o6z=^DbKb0^}yYl>~qPvjccZ-HHtfW4d)F$9c)|}d&GXuJ>Tx9W|itX z?3m5*yW%CopU4&dPHr#_^J3Kd?OP zim_V}M;jYT&vIY ziU%gY1EyHKr*iLijT>}XM7gB7#$~?|KUj%#X{u=*9}PFUGi`O{xJ%$&_}$ih>^1!h zwit}~+%sR~HhJC)d`>F)j__jQT;Qj;hdviO$2qXPIKC;dwsP0FJO9}!ahu4X#e;!U zmFs|+;l%radm-CZ4$%dZ1lRx8aM|Hw&6U7~)NWGuhCIdMsmXhQR|!`M*NXEoa%Xd8 z?t6DdI6s99x6AkWSa}Th2=*GW24HpMg8w$GsF{khp2%I#{ybNqHkD;1jBNOFa0~0t zx`$hi3-R6ioU8oZvCg!P1A=S6UEj@V#t86K;Uee$VPH%_sl&o0R+p~BXLiDrllNeMx2ae8yD<}S*S@=+ z*&OiL&7kovVe!O<&t=~(i+9O4=TlCf<*rp`^5_O@L++AlbHg%+*%rSG_mA_VVJ}wB z#3yBv$QwdhMJnr1j9M?Tg4KA&Y`y4w#UR?ggee~`acOQMuXS!{@ zt*hW`3_*W4n=_nry}j$wnMCnHa4WZ4++EGhyu~HlvG=NAF<8Z6g3}`VV?)g*!UNq) z9uvZRk9)ki^K$HQ;X2p1ifEIpDcCl&E{gRDe`9_FC2ow4!3cLLZg8$IXXRMw%JtlM z&L6-f=>C`?ce#GKoA2#GHg)WxU2V=VIKCZzUNuU9O1b zOBt75h7oR86;Mi9lxCK#Nb$x8FtPF&&T>p^SjKWyhzqtq$1{i@h+DgDDL3D9EafS{ zP$7m7wi3%a7uItjWVpqlp^O7@5ho5tH6=_(;ylA#$x5*3Sjq@HSpwpb8~yzyU(8a^ zweY-~Sen|+`Ejm+=ZeB=#2T{H;&s8G%gy$2x^&fkC7VWO!hEdaXULG25eVB$SLDYU zrJXRzA<$#u66n|1(l9zIaFAKwzl*C=d4fP2L?q)A7%G{W0(U4z;yWPD4FKXes)KoGpa0xI|Ri-NyoRRqH zSde_J_$jzfpLa%oInB!BV8W1t>y!j)U&T8z!_4o1*#*;=>>Iv(QW#GUX|9~9aAuXI zK(T;ps$<8_d%Kh)0v|LlBbGCNiOn@T46)Bfb=^DrsV%=m%7J3sf=-%yXmXI1EwW4rRH2{EJ>VWonzPBF z0FDSOAYwPX!45XIV)iSqMcy6uN5OefvTBpiS+IR!KgFxCTCS<#3d|6JEdVPS_OrHM z+=1AyGcY$hf6IG*bd2k)K%5IJnbB1>@M zd&H}(v=ka*cje!Nt#~uB_sL@f+IUYD%mCSz#3Y4Rjtnh;GW3g;c^jrgGPT*aC0KYd zYG4BgMDb;?Lt1-p##}y~t3ow*s2IevgW#LQ1i{FAOnfWiI4e~PZDtVw>t76y@Izql z!l9yq-6gcb!lQ(_T~S_%=>jmp)Sb_l*I{1ETt@Ko*$nNA+E|yojbdfVnXsnJV|a4$ zlN6rWidc;sZsO)grK z(pu-t!gPe&UXjN!Fk%U0y)p4%MI2zvYKwCv%)`#H zEkk*>9ndcCFBvi4l>7sO$2NvfQB~{A4d5#t3Vu)?1D9fvm0dck+bZFPK@Xo0Se^|X z2T^d3=Mt>Iz%-_oF)X#;;E#5(2IIF^1l>S_9A}sTF$-r~0uN`<7>BUG0!`TqbFQf)$!Sw+FW?hqD3G|F3Nmm>4{Fa^vr%}f`$?Q>6D_65cFCST zUkDO%#1Gayyf7HvG09-`X{8!Tc2||wuTW&Ya~}mhOm1smALk^Cr7Q-#G65Tsc)eLl zFqYlQT*g1}NlAoe`dY#xlS_+tjzJkCG#FQu=A*-OrI?E}n@;I;u;6QKq_HC8er&yImG0P2T5$rIf%o;9)mJi0P++<#=VedB{@4iGbaFADKQ@g_SNVbRKaFb=|4|0&hSpPfJNXd?y@auq;3jrJ=WrV5^E`&n zl~44Oxp#VBUQFCtge~r08$4Io+6yoS4BBHM#XODSn>mroc*(X#F0#>v93&kh#C*d$I~ zPwc@HV{A@Vd=gB21!YEQ88hU940HoMR#F>48TBnrl9q4S{n$I4$aw5n+HBYGB0m)X zjN6hpRd68m2YApi@NQGYL`dReNyxCKh~up2#@-FvuH*n}C4zqZ4Oz@o7Bf6f6p);a zQs%jc8CTrgSVUEE+F>`qe?D&D{L&%f3IUgAp3jI^8-Rmluz{6843XCpn;2pl3NOZi z8--!h&MVJK^cZ8DC5~^DSV<n}*>oGu69D{gbNdb6~*@lAI*|Ym%)&xn(QVfL=hPEgz zmej643r|nWAR(p@19M_|pp*%o6^9bqrg$2&Tsz4>2HfI-F{fl!n~j~A6pv9{MAnM= zAmna=3em=;%xX4r)v}4{^lO%uM=&RXAGw1Gc2Z8Hha_BJ7Vrc|lcYnPE4cGXrnZl< z1JHp&q+JfQi~ARGlC_d6O&xzULtjBCVGwWVfD2d^UPgn$e5#6N$WS`DJqF*H1#{(s zfuiML)L`f~SRoTwsH8voo3NdQZV$p9q6AmX?5wtJL{59WTviBK68IPgwh=h_C{b6S z0w{;WX#Dzkd9`i*{|(}+?KKvj;0>2H+mG2CCwe)WCle4Uxxu)8(N|X42VkfC&PJLG z7|<`Y^WB2hOAszhPF{+R01C*^W|rAe<_5H+rw}ZPFk7(L%NuF#T!H!=rTF4mJQ5pf z4f~Z!X_OW}tew$~GdgG3f^=cAAr!InGsb)vc_~WF^iE;J2>*HKt71U8fWCygxJ&g2cFfg61d+p2QE z#$uQSBRgQGYUBz2o1#q|t12y7_R`V=D26_{m`2^AtU-Qah&r`0E#WY2jSpeZi}H>jMHG^1fUSa{mE@2W!sc2I!_Zl`u?n!8N>wOhAKzStCXyHA#*cDB$%to z4fp}I1xFypcrixnXkGkB1!zIZW0I2MdPr3uY49k+(?L4kxz-E}L>;Nv@dOVE{O}l1&q|3jj5#8owm+f`XVYcOk?-H2 zaq)_Sq>w9O1teWuL5xPwrNQ_{bB+{0t9^B`=!3zDBV?jDF$(z}1O9M)6vo<=1Co4y z3u-2rL;w`T*&j+MaxGC2Pm_~2z%)_FtnnEozMwQfVo@@a1;fKrx=T2bk=i7-B?_>a zORiuv7aiQI1@oYFVzkW#Be(VXL`86sUJ;9AQy#^|8=ELlrV5PX#tf75Ko0e;gw8~- zHlP($R8}yzl=+JYDfX|F5dqyK@QWmKi0Ukl$!&n5-6vYd6&~e|&n&-l$ekaoJS188 zi6wHqjS_6+i!G8e0)pUS__GA~3E~J46UT5Qg@nrhHR`Wu9iho5qK9}kRZn{4jf;Jv zl>lhuTRDZmz|xL{iV;|>P=`DY@T%V#N(0tvU}Py+c>QkR9*4VZco^58I=)#fsk)tMWjBKqw6ECT7BJEjeGi@gj1}T;KQI!6SI4hFZFJh5Q6Cv$(Y} z<#_oPku)guDY8#F`PXBtTS~s%Ut;iyTQ!CCkEHd-q1c`Z>m1sVWm_9sGkOK1dz7Q# zwb{QIz>aOarH<12euWIZE&+yZ;WG_|DQ5I+?m#e$$t01}$CxH*$2339i@hvP{J zM+=Q+3!08MJJ(SQHrg`ZQt^%BJy1&tVDsqXT1%tlbTq^S#1nocNBcv<%@R<<;>;d* zWBbMbM7hPFSX+y&fC;?_`4_DcHLXz)pSI6C9!(C~lzFXbGBMS&OfZq5iD)tRgtUvP z7wC}3@NdNlQpc!~YPeOa9e7(ZH0`Xv_(=yw7KpzXx&36&zrESm;QbUs5Q)a04 zNZA@q!4%5rW#WQG(>6=M%PQP(ss`!!O2a_Nr$A*IY!4X^?SR}Fyz&_4PYO{($P$)` zl$9jIGH>s{L=Xie7SbX;ED0_%=cO0f7#^btP-KaUOTp6MTxtI>Fn|SIP-Kc7y^LTS zDiJ0|%M?f=6B1#un{4`EDw=Uz7PU>kKMD0vS6EWBS1+>tJq91(-mHixKPm}uU*>QM z*#ntA5FQ1#6boqCIDIOK@)NsJ%&-hM6LA3YP$HmOR1%W$?*Lpvj+s(Q1k5(fO?-@{ z6_KFY@&TQ3Q2Pa76aA!FS*5UVkSl}-46kkt36PV-3#-P+T9?sTNbN-iM%jU{Eq6KY zrGBU2b#{2tzWv=@D*PBifKdHndt%+~3isDzFa^MiR=*g+h=F zs3OKU++LMRh-YSp!)YNi_m52EQEErbVly|^RC$Svj(=e!<<_yER0+AL&81F8`1Ds| z;_V&i2EdvZh2kVoC`l!@4YX8@ob#|HG0`1GB^3gBRF(@i_ZX2Rd)ls4GqromqugTI z5fmW~mbEEqfGLnaR3t-VEb6Ja2XNtA+u^ptZ*?hNVs$}yNrnmq;D;TC9|S<9%*diF>`AHwW!74211P^72F4mWy9^f36H=qcc~ ziv&v>CCNf)ei_cG$P%D^S(skQ-mz!!y^eQjd9@Ak*i?Ke2@i@&nPR(Z7vgQ@6 zXliny27(HHB@I#JD{T>!RFw%&03dy(8nsAm9om6a(HRSlPs28FfvnonDguOM8BL1hLiGqmLgK0ztOJ_~ono*hkopns z55+hy%pcKS=$&lb}Uz7JmBHvqN_1N61?r5rdF7s|8GO}7OAo!0s=lg`` z{1_T1-jQNH;0NQNYStS|ZC`I@>HKzmzQ!`aWGoE15$j{R>=`hb-DvYb+R$>OZU*m2 zF@~b9Z%dXTzXnGMR2XIFZBrqif{wBxvW#En@1bY(bB{r^$_k@Feyj*?t&X@MfKae9 zQHCX&roDtd+1#E>C^WvRtR|uY%N6$Jv_Mo9jbJ@&UQhuI6?DSIJO;#xk$i%UWI-Cj zx&&$XN^UHUlxuD4{D@vhf2Lzgz4)3Mu&Q#LUq1YG()N6M#{JM1$_G>6)RN+a3MPcpHhgBS_%z20#@oi-qhz1IK@D)1~yJN zMv?7JGZRq#b!`x~s$~cddJKAD?-Dqv7lyCO%05SrIvsV7H9S1%uRn%8^i!6SWSX^dQlJ?IoPgN!_L|c zT`09g007`qQnB=`{u++(S_V?j7b~n3g+rOd)X-v!Bt2a=M7pAcPOqyrWDN>o)~=P{ zD@k!=_$o=tL!nstUU9*8@t@L~Xwv{k6;9Ju@D2nZa7a!ON)*>j3DixwpgxGF{X=n+ zRFrv8ObE}AQ)x>x1X-?PoA{KPJfYN9fwT;$NLyUU=GY0)32$LaB6^HXSL~;u)_S)vW@=qK>`Yk3`Q_3oFCWPdC6{$=V9-j%&*CVdlwnn1n|h)L zgG1K1_a}x6iD&yU#h0+8qw+u6@|!H zWaUDUKHVN-<1QSk?4}4wLk0tUe>iQSU%)aF7P5>e(uIB~e^ieF+(91BGHsnmso<^W z3-^Msg(p2IQaLCM0jCTMU_?XG<`i9C^~QSB+m7@GEJjVL41WY%U zs{n%t!LaHZ%1&wU1O3uT0lAU|0bG%g?5qsMuwIN(;fG4qyK79n-oE5Sg=%}IZE#|n zcBvx-K&hm8?KCn$Wf6iSIBS`qZ%O->ryPjHfkdhYW(>>0D3$QimZkPAqga-*jp#!+ z20saELbD;V&n#f$o3BxQ(6~jFYA4_VE9#1PAc&x72V}vu>Cv!6F#4nlwX!r37(fNi zHUfs_j#d#+((H$~k%*uvpjk2zU~o5&5qpJslVvIiz2Wo z&J;uCYpDsdOnKXs$#__molsUOq*JZ!cZ&zH$lN4@hn(6@ zMkOG!AL9+c5fH-aB7aKH%0J#!MG96AtO91#fM7=;7u(HyDIy0X3s;3Q7*1e!s}8^% zs8$9oxl;{AyueyzRiKqhKx|Qk6*I*gdnYxTR8#D3psLt!K#(ddwv@dNagf_85W{K} zIKbP~p)aVbj*ZgtgwD){a6%HSGS;+Z`BAnO_4H5CY~Upf6oO0r3ecvAnGq-nctxr$ zFG)wv_ScC%suqx)fR|b!-4NWbDoK*Y!Yn7jH!w_rHyvmYTk@#H4^*K`_yhe0)WLXE zAqox6p(t6>))VCj%&hXHph@h>`ln$^onV!%lE(mPLbqj(-rMT5cat3y1?6OQra~94 zZB9WZ&BJo9&j>UOW;?OG~W!PsL69F`j!J4dz z6pPZBU_ga4Q;~)}-;bLCZdNnV3f9Ft05h^Z#z$DCAwYcwBDHWCvsyxk_M?qT2a&WR zK$+eQn&nuLDMK`jmAxf*8w6PtfytgtE(Rzf=#!o7w#d(l#gr1uE8QAmD}H0B$tNkp zcsVVA{$guvl-yaE8vuaLDsDqyQMLh#NJkV6e^KDF5@AGOD###pz=RCqp^gg;6ac6i zmd;;u97JEJ^_ofzbr69dESIVz4hr0GSnq!oqdAwM=2 z6^|CC!wG(+;zz);YQQUrMt;yWmA-D2glGftc??*mT?5d8i*z%nhrx#00n$upvyxC# z$b$9#z9I}co)DHUcq^fQO0YUkb!TSo1TtSEe9JM^gEYCJ1_HasfJe%zzy)X@nr;X7 z50FYXN}7^vR|`zBM20cPUwdU<@V`iqo$H0v5|>5;H(sl-j3#7gm{!VNDWE(pUz~ zR)~WThj!426*|Zvze4vzYO&xk1}@x|r==?j8XY96s|`Zj(~wi6uPU`rN>P7N-;$JM z#WRFfPh=I<0?LVitC~Y*p)tzpnFRk% z$+7^jPz`_)y+IgML+YeXMu~*3b3TCv2{7oDf*-WP2`mQ9Um@8>>Bhyg)lpW$bSebI z+cYAI%~mortEy=jQ(o0T1DL50YmW?Kl2}sf0XN`9&4o04*@hyC8VhwEW5F77na(h# zZ&c-jxC(gMa6ZSMv+^s+L;$NZCYrf~x*RCMvwVkShe@F<)L#t}HkPM}YKpeZmAV3KIPIV}UVe}TKtKY7)s!-AtM+}~I3^xIkURa$s zE%a^a`ofnf)}@iJ0lt(xiFRtAlC9{7YgV-uetJb`AS< z2??^;xM;@^eMw@`29l2Gtokb@S&;{_v1CLkkF$jeLYES*4pm}7x($gDIFap1%D`Cj z?bsf`CMrZJ@;l|q0(c;07L-aoTEUBuYI%lakP-CmDib3`y4TPRtv3cO$4gY6Q6pm< zJ=TC%U?4`tKCxPTi0x*t0%r2KpqhGWjyDkAbzs<@qACp)SFpNNMdTVqeM?VNv@yz) z2($svth^ktK4oxAO5Q-5&6%HqBOvT)TbE`HrG6h#%8s@TQuU-PMkSZ8gd9R1ATQbr zgg}5=f?w7H;xTj)InL1QbK-SuQNdz zLSYeuO@Dt8+(0S|cTC3snbkjejICjS)5Vmv;CcR{W(a6WZ(sUL6wwgBtQ<;(u$CGQ z+Lk3E6_lb>x_U52^+<|vKp(EBLkgh^APL6y7)(*@HXu69j210cZi@5;lThZB_E2g8 zD7!u7I1-B?!!yakR;Z>25*OvwSE1Bsziny?EdhmB>Xv4w;ygyhRVq&FQ(-TPz!q_! zkmgj&QWA)Wo(ZH807WViFlxjhkPvK(P?;`Y2(MbXIkhEBv0>VR{@iXJ-jQ*A7aOcDmrg|<;OusJo{ z@`7v!G^CRreq&5lu031s#`=m>!-qP}tfKcxpvd zG>x{F2+*J7VDL)@O)&#W)}E+2!i>;ctQ*}TbQcsOX9iUu5VlN6p_CjjBk3f42bPK~ zDar=dsbW~i^l*etK?0bsN&|AfM)^pq6v`hJ|G?c;Q+K&NmHf6d3uqEr0?w)o@KPn5_R#!w(6?C}AUlZWbfA~V`r+mVP zIe8~%NN+^WbP?N}`XFUkD_lxVLMIjIh}9KG0G4^EQ__(_O7u4%$YUrXV2y?3KjcVZ z0BVh@mh%{1-CJvKQw~7kp7NR2%nj#$>?&drDJ~h4z^wa}E{L0uh{`vV zB5Y=5M% zl&*%zCngly63S9brtC>c##dsAEa%tkBcg`(Lx+nz!a*nC%EpF_0Yd8{Dq2xwtDHkp zoe>fRd=RE&Gy09zFI`ws zz^OcKAyVOlgl6+8BSWZ2sTvq!Al0e>l@w{!3$U;xRT#)GI^BFtDm=2JlM|FZ^-**_ z-ByMl)%2%57qY^nB93XbkPJZuXxU_0+Lj#Q3s4q#EDZcC?KT7z&>F)_iKH})fV!-$ z9@T?c=`TW*CF9v`#0zp|N>c@uOF7?m)K4rEN6XF1wTqOz36nxpn(G9S?A$^vQ{ZuC z$%@CqJVtO$p+_hXwspk(m*ikoDVVT zaPS=aQ)6_yBB~|HA^+&`R%>uU$}Y%)(h#O4y+`wNOd)flOrf_&Aw_J~HEIJ}w9dwe z*R*Kp!XnMnWgxweiW3xjWhx6Y{K@x0>}g|6aT4_?f{(E+I*{Mgh6fuY-^lJxtLbaS zRRmQWnZO4tTa+>wIl~f-xZ>!B?o~Vv6DPDOASog)doIY0da?xF~ z0D-Mf@hI&{^b+(h*vHqTvf&5kBrI~^gtVM%b4c|foV@0Ys+i-Sonj$!(okT0jfuLc zo#{kFaRngS;NiizJGO7-;4WrAYO91AFtx&)V9+Yj z$z5r`Q+$VI+kzu0O6HW5wUD-H2W|*JhDu(`u~54YsRAKsG02KpBXSj~KTDS$V_c{< zScj;H=PKvh;i#O|PdS5DH4qc{#UH|N1f^oM#WA6rj}ZcdOtO>0p2Lm4BeZ~CfIQ+! z=|PL0pSe+I!kG)FIyB*7Wfrye8k^k@(6FD50OL!umD&xzRBRiAFM4kTQveBA&;?B@ zw*16rSW_IKa{G21LZA$W#yN6O9*2^oP&vz68v=%?4rUhVCVu{gWP$*we9N$QlT#K> zYr|Ypwu2T5*=czMjQm1D2|?K80P734@|g57(qaT(rInORXuz~6f>}=FlPQ2fE!9x5 z4g|6tbz5{#KM}bsd>vwkP1BF+P=#u{JO)_gK)OKV-(*;i5#GH3!H(`>HxOVV{pZ+f zPKnBK3d!1M#4AlyG7L>4XBVYPJsCnWU?Lu9_ofu!afl#APam*C0MsvROY0sZZOZzN z-fH^D$q0l=aDls3MoIhxl}b?rIjHDQ(h?7d&gMKNEK5$L!~{10V>=aguyL#zOEp+m z%!(xBF*)F#qdBG(dayg>EO9a&@oAwIR2-U&N|k+!3m0T)4V8H&3S{>D@jxMomJpiL zMd~-+Oei7iv)W2Z<$i9d zEazvD64GIh=4h1;D3Hhj2}xHeMNezmbNnIE!s^PPX`PUy#f_Y%)=s1+L)<}^kSowt z!A@jEO)Mk9LX$7P=5QQJnpQw#FRlaPb}?|SchF;l2802j3UE=J@)#kEW|uA-1riir zSBJ=@k;W!tRwm>hRXlYgbh!*^+PCS zA}G}Y$p!^2allzE1~Em9VGleXmZG*$qA`fH%sVY(UHTf$7b;Ft6Uu8Im7$awKrTnq zlzb!(LxS`W0s<~<3wuZnCCzuzrnY1|fL==>CxNO-i%_t$E+p~WK{kz(rFo2+ZjNUP z9KlSgDLHa4!MJoGT#x7i?>P4cSSmb|71e{KUkW$EEm~Wk&MW|3`Y%+E!u1?S5erOd zfgZue9s_Qr&q{pI3M5nbe8C*mwSJ?irUX<_%&ipNSs~=mE|o#Le-1CI$5M{jbNz* zgXt*{a06^7<%%8FfQ%jxI%1K!m zPz)m=sIW^(>48(jK>slrSq?z6|FxmxHuzy$SM7CyHGR`+jKm~jYlwRWFTEv%t9*^k zWIzEIGGl=~i8CAOlTnqCTTKJRBJybzW^W|+z-mr>BI~fIpRNf6aImbXEVeQ#g{bCo z4ZUrCbV23F!5uj{)yhg_!UE7ASe}!0%Ku;}5DCdpI0?owQm(}SuH~AvA_&hBr735y z6gV$8Bkz98uC}jb7&XZ-0^was6!0A=H;EJ)7|afksnW57z=WuX`>>zx4SOr~Fav4w?_dp7iA!JOR-X^PNrv=sLxUiBsNAwk#xR<7W$zo%B;X1)_H;fQocb zNRU(_;85w+%%>Xw=L0ePhoUXK` zZ_DZeRmBCUO^AbCAvI}JssXOC5lcZGDPjXMmdXj;6>@_<{I8NCvZB+3rmj~(F4htf zQMf<~-A$C11ludEAUTa}?vM%F5pRj~piv30mX(vm4V!bU-d24@=2Kv6t?H5Jr|lfs z)NM?I^1~Ij5q$};&;UbrkO-Vaz(zEIrMp}oIxdbj(1T$QYP|U3P*$$R(0c*-0X#m) z;?TL}70aEcREG5cEYGfsG;I5j1gJquR0d662gfxUA_X#Z+g#)$1LFwf+w=tZ9Ou&0 zi$d?V{9xx)@3cZ|Bd4S)87Z?AP9}#PQUm-+%vu`-m?wZ2pgK9k3bg2vWC$XsIFvle zvG`#S4w&LCAq(B%YG1-$eg#0!p$6lQMNv337iz}_kbQ(N0xola7(z|6c4C^~L;}U7 z;U?2Wr|{nTwt(iOHbMcF2$_KP-vF4ak#>$yDL`(Y( z(V}f|nzNdw*oq)VXT6es1z*J?qL4=XP!*sCsD_T=E(HX&(ByjOZGa0G7$9p_zq7eX z%(-Vkgi%lX)lppz$Tnp+83q-8mN_6mJ=&!=XY!+_n7I$pmrt++Xd-- z1BL8sEp@qo!d+C-OU_IX9jtL^0^2rf1yV>P6JmAGskVx4M&WMFC??vXaFV7*aRkNuh!a!_8>v{WS*X@zQ)&R+7Z$ABNXB+x{` z8ww$*`D$TADy1vA#G);fu^he6!2n4dc1r%k>C;yW#nB|7dd-stNr}{vv?x)oUdVZE z(6>-qlZ2FoZ5^quN{loW0UVaQP3HYThcqn+x?r$inDV@?b7T#i@m*>`)&{@}34m(p zI}@Z61#{7ne&y;6T8cN`Q;s71`9A;{)gp`rWcft}qZD>janu`VLbw8HK*cLzn>r^G z_+{Zd9U(c)t2B}XZzQn~@lcxg7}YBfBt4a3Sg?yYK!sJeh0(1mM7ls(6Bt2Xd6#TT z@CIZ=EA6ttmOXD7M=z83Xt(J$kCcPLgS(ZP8*2;uOffTB8aAl}rt=#QG=j7W6?Mdj zG^y)o`;tbZ-r4Ba#o3F(itsl`WLX2?Wq+6DT~U1J}6WoA8>`R zm(o(&^Yb&K+`vnztF$JPdvG?XJ(&-lP#n;IdSJa)~yV7Utjvo-7V) zXoH~(M@a;p+`~n~0TP4@X+9c+*x?YLEovvK*aIW!@u#=B^aUS5KK+Z;;Fzp)AudI; z?9)O|fQFs}w#}d81$awopn94!Q1>*W7!G4Wwve~d(5I0Ig;jRK?#MY#prDlSIvIiP zL|fDQqjgoN6qbQH>=4i_i@jG&$XhMi7^f{qRa(L)X%m>JtORvx%I; zbp3-NKy;K`5LetVXYd5ZbQl9@fH6@42N6Nt$AJIB?}UGCK*pR9AI2b|LHrjK~g|*p29&uTT;9cE-ZK`F$FXybv9g{AshJ`fl-u6jm=p& z3`6pSLbB3OmY~~0{lGs4rQ}pVE-8$1lT~G%srfj_QD_Pf?v)qt9o*IQsx1*C3!M|7 zsSX!!mfKL!G4HD9K--VUq^KROK|J)^CPRLf9J5Mn)YNBmYv?hu)FN{QBbzX?DBVlH z7zU91hjL2W;uV~#4M0?M;5S$}$Bzm$O8iLsfvE2uLCG$6*mx72)$pP()exhI2L8eA zY48?9ohf$WD97BC0TS^PtnCp0vDb1EkOog9O1GRkZ%LMbii1!ld&7Xpwqb={K;@|U zdP1G3aoikBr-^%8o0^7_DolD(=t62A@=k()FdvneQe(A=IGThcEwrsB`$inmY?T9X zUJL=(rI1UV;KC4j5IpLKa;_>L_$oiA2Sue!Cn8J%6@p(j8R1#Ij+Uk*7AaC#hfwT& zgf9683`OFFB6H-3kdj%LbdH_mw3D@~Bdc)R65?xOmG-1c*KY~4WFqa~;=a!p4ed5X zt~C1s4X}cGpE9Y2|ICdBYm@>ZPg90iWOqrniki7fMgd*kc2FT2X71jzacp_y8bk-I z3|M3ycNdDbDky$CB|4?R%#sx&1PeGGqb>;3oPH~f41Pv>xwOyZ1|s?kg#xSU-TmXxs+Lwvvcc%7Tl=NL_2PLO9MQ*)f`i4zfkNp`ScjZToX1S1o6 z6_G19p}5A-jSw=rr}_a7VTplm&yCcGf~$ons)&@)UA;tf(45r_CRAk7$W&-c@L4&n zjH9MbBup00#ikSvr;1H#A!*_THbrW3FeqSfp)0nD?f{t0ghE!o+il^Z^ zhpEM)aAJk{k;C=ybrQeZ6|LC~>b(h~-sXNDkt0nzL^+=ef=Bzx_-V#T2b6|U>{_nR zWg00XNSI_PjXCy#7?yogDA6Z_=Sr%a6caAx#GJ(p?Z8qUaR7O9D+NJFI|H?vh$aF{ z8gdZeZr;+_Z!nx3&V@0WbrMq2sOB*R46W^j3J5e>aaN0#6TQ?>N@fcP^uwbQV+c7o zk-~HUy&?%(q#e$pkv9D%{nU~~>0toBAK5QzE9hPpJq?*D)PYqTt=*gnPxmwVg0@Ie zb5hPq&{YQ4N~-vqIxm(&MXp3NQxn6WbQ!6^T6!rV!_ap3w-BXf; za$l3Wxr~w<;v&nCv8W}L8&qVW1r-y zl4+z5^|h=wFlj2SASqWVp!!M^>4cd|Q!Y}1rc+0uoTbw|Wn6hY$9zdieka$|DR${p z&>KfWv11%dA_)sI1Pyc&sJ7fg*!60G8-S2|Uho)k%j(jTso<6#8+=~k(hTB{QLG_< zBSAFQ9gC!6hiVe|OhT2e8BvOG4+*A4l3Ha|Td{=&ZL?a8M#B!vv1*csi6JFLg#Lsax}w<#@F@@X%rR|5B|nOYfbh$s zR!bx&+fcR3u_VDs;oW3;cO z&VyqC(HK{>M%dfF@*)Z*-WuG|qeaQb|3o^cT&WYv+bX!x21!+lsVPiEgCKtBS ztx>$yF8`n0ZPrj$OvZW(!G_jEVMmoIrn%unDc{-)drxV?(Ks#QHlBkAV7U$`m3cA; z{He$cF&Z2DfZmLfV*a@c!q;T8!p?*Qd^XCPl4H(yfIncD&I@XpbYm4Ns%!A^c$>w3 zOmY_vt#<-xlxON9f&d9&D1Z~SLrl^AS=gS+Fcy%>1Qfv#lutF1m_jS0*pvUFwMt)d zs&8^`n^Kh~5iQR^kY9(c^qHu5N_`d?_JPE!v?cE<8%X{68evMn2$J!_&fDZPxJs!4 zfD$&al+$t4xDD6PnOz+?xgbI?5jns^Dg#}QI7|fq-7}?7RC>#za#_-FmdBunDTb?o z5r{SP=_df(-A?3<3fq`6s=Ps+)eUE%sjsIlrwVI9N~AO&e_y(Qe;_^BT8Bznms9LX zcMaj+W0WE=uUzA*V~TW(xj?Xm6EI#}bFMcg3M(E1rU-@`A!JFF2SiJ456>Z@tHD@n zfd!BkFs_?G!Xx7pj;W@$Ms7*=l2F5DQQh2;=%c|Jh2vacpcZWM(!yn?2`MHIkjL3T zivUZoPh^ios4THr(vc?}+siz30m zx{|J36cUp%h~lpN?o@_^neIv;gKnXdO)HhakbEuY_!^aL2-*U+1hRyExcY{gwz4(V zNP7w?O)I{E&%uG597xR>(zZo%VBJw>JJ3=#tV)JzYG5sXOt8)gB=9~0ao{xYBEu_L zco?-eKpQ%T?JXBYZX19&XX`=$POK;qWIJh~$W2bRvjeaxUZ(LR&2~j9DykHC8MV49 z9Zp#SNFLoeZ~>G9ptP7d=;}|w_9VshbXhkk)FyqZD%po}YGJf;B!Da@OsQ4KQgA(T zCHJIi4`$O^a(KjJG|}h{%RTn;rw&bBY{Fb{E?i|U^Hc3h91Zv`YA(#7af zBp@_aYvrqzqq0bhZK<%T_c14neHg7I2#5YH(&-N2p13?el^jm#Bn4-BN1RH>0tDy*e1%%{^c05B_@ z%8|BqsUnrNv_WbT3U-mA(l~`b7$FQ*e-s>+ZjFfj>DTeOkU*M`d<~8%_Uen1y>av? z44|22A;8|=QPL6-#e92wGuD!2(T=3W=B2&}57c~5Vfdx3MK zh{+_i%P|r5G%3@4J;GkXoeSL$w8b2y^On|c0n}=RF{WC=2w2W|1z{k!SurL6LzA&% zyO0pzPCg4-Z?5vHmlQW?n;Jj73vzB@l5OZMzDA&uA(L^VBo&HXF6eMPBmDv-PbG9B z0&zWOywlorIz6o)a3dh+!WY~#e@EjcCTQVNr$n3$Jy3F- zvRY(1=l=y#y%-_)V1F8XU^BA2S&~@`eG2%e;ZBE-n4NBEu+$HLSCIqRkNllmSaP+8 z)=iPkulgEQJ( arjJP^!_;@trCp$t9n5BUfxx2(gguD+}M^h!E~p#5;ATloEzRO zMv>f21Zo~S@gf~SA?YSf_N2Syem`tFdY(+;>Le|!!WAOzs-A#Y#RWel6%++>)oj2} z;habeqem4L!!Q8#%$(jylLBeVzVO%67hkvs%4rvIN39%HFNetQ-eO$Bx7%vn*}s8i2(pOn4AtOwFkUP4-%nPApx^WOiRNOcI(=VV2T`4 zRF5Xlr6o9Pm28zeBh@&~fpF{Guk9;j%cHr#g02Niip4e_i9phhvZ!1UDd_^jXw%=~ia4<-E%S&$uIDgsNZE7?+NDu;g5nL4!PeIADyJSuQEZNACxN#0#unJtgh8 zXbnmwo)&ce!YK|fAO_?PC8d9`!88??Jp3oagUeYk(K8ZkLtVhPLlpeahe7;yf7%4L#E9+T--tM;jy z;KEqpG8&BDY#D*TgEy-criqynmy1+VRkOi}kBJC++mfvuR>(28+)Ruhk2kQ6w_o8k$1^lW%DzQDC8r44lUxJYuP0gNQ9|Y3PIWF&M4F1RNSm_G+>Zwy&W} zDAm4*cI!Pe%6d>uQtI!nQt^lUEdf_Eh>uo}%>_B|gU6(ThKr!`8YW?JjZnGN(t~p> zhKgw*rEdQi^};r!;^1qx0~$>s3K7sUOd7TdQ?pUu_Gr?Kep1~OHhVBMz-qG)bip2R zg3=Ghq}LS;bdVMxLzL5EAhfHrlOq%@ML5Ex$(6<(DKF$oa{~@iF0>Tn;u75x(pthm zFmyUp#X)*JNR!46{8+CAWTKA>fk8uaJR~76e4Kzv^bWtUe{|tr!q;3j7#l8Xhd@iNQS3z7H5;G?ny{o|kI7x6 z<{Da7_mKF>7HN1l!O>?$8k#CDtDGfniIUKa$Ph6#SMzH3!p_nw25r&~?bFz@kTDYU zl({5_!liSjUlx=Nb=@Lu7;TQC8Cd}~cc$i`h2fytfKm_G?K@fV*FJMm(wCnVR0f>S{~d) z6j?~gjQ&zEmmn6TEG5&V6CIU^u^s$J#$b&6Zo8_hF!P0(e9b1l)7Msi`{(mCRt%YFeRAD>su;YY)1 z-}xGs1`(b>USuIx9@Fua5vio8oODIXmaO%c8p?B#N5KsXe#y}Ri5-2^2~2Q;Qg!5ugD8N|O-O^? zsLIqHl7z=V)@oXsvxA!fDRR329-o<<@(RUEhgQz<2;kO))n-Tt+RpXlz6gpyp*m$l zxx(1D>AbcZ6mLN4+%@3}-YjTRu6?QKrLv4}h5SSU(cqLllr~uVlU_p; z^;cDDwcdM-mFUx)uOpfHTszVUH|yF-PD>~4Hn}Z42swMjYK-g(fZ<`jcWg%Q! z58)L_VIT6T#HU=NNF{WJ;wHG}z8En@K%fz7P%KmrF?UdkByF{NH`lhIm||>dph6r9 zprJ>z4ImW`n7i`q0p(onf9eHFkyZ(vot0STMt^DqSidKQsf7d=LIlo^`+5sOh{|F& z^(0{VD?LO0Y^vf!DddD+h`VW54y7?CH0jhLO`oGgB2QV`66lZ4G+0|XW@h$skz^cO zv{6}H!m$`^)QN67NHmlePPcfspf?IOtqTs!{X~Tp07XraEyy1i6N@Y%Da2#KmHjC_ z0Xl}Pm)Mi|ufdXo#84y(bPRCO;sHtEza}y#adO6(`m_<{M;Ohburw{X^n0s@ zs&tv*@R1-z)wn{0wzr}s_{kI@Z734C6mI(iw#a4SD(Yw_L9c20w8Dfz$|DE|WlBRn zPc9cn3Pl*Teoag{+M0TqfF!%a634)1RtB`{w;RbY-U7tt`#LSYy(8pYC zmSbMwM7mw6+4M&6KE0imx>REaIAxbtV9(4!@)@Gog$pmA$ z9)r6<{t|X>p;k&!ycaGb+n_-{K==lXl`4ff30J*J4_X8a*+?eUIHlJt=bOmHDX&5~ zn#)LDlA8i3Dqflz0j#W)B6nN=RxP1V)(0F>GRz;p_`k01CRFqDtmF9qf6n>mJm<`F zGVhsZPEL|(k|sl)G?Oq1HbxC7LqLr24PEJIO4ERpQ45o`lujyQ1G=b+ML|T|3W8wu zaiMELP&Zk+F%c^2LKm%{?|o)AZJEw=<~;wG`@UX&*YEmW_d`o6rJ;`XB9g@@O&h-` z{Kx{Gx|C*oK@zWS}vnir5-{7>kw^2h|Cu7~Sf~Yc7bfNS&d{PY~hHOb!9- zgiPaJNCGGXzs9Kub=73r`LH5VQxb4k6hjFwb~LX&tw-!_YR^ibkh6w1uv!XHuv4FK z6-9gm2ctsgu-%4e`8cTOI-L+?FQjZwrl$gx>ZEB_0(TDRa=`>WfeyEcv`{#VV=l3= z%557bWW#e@Rgta~1K|xX3ZSm${6PvAoC(glZ?Zyg@zcbE5gm9D&>5k`be*A%CPI@9 z)G1I}8E+uGLjE{C%oDyrYLmGwir@{X;oBrrz+D$Yg$kB)1FsLk@txYJz6+q?K&?R~ zrL`oq0e1qT-iA5&lOU87R^A(w7E2YMOY{07kiR~Jihy~tJ(Yu)UNqKAfMCc|yrre| z4)S|(FD+HQ251|jGUnDq8l{r#*OW-=13Ry9g3d&8xxuZ>%w${oq|m1-u(-jR2vXwB z^^pElH%_y2_3y8t&wYL$!_!dw?EGSVl@r`Jdw5=F4R2hQTqZzS^HBwO> zWTNRRRPF`~(m+snbAGrLTu|=}4gz5W5vcdrlG~&*Fg~D-VTPN#17@}s-%uY2$O;uY z9RY$id?U;kd*oX@l4{OIaR==!yuY$1klO)-{M_m3gFm8H9pl&;A#%+?ko%zIu+W;z z0w#tu`an&r4NfGf(GV3c2}m%g6LDDap&5n~299i&tge7nJYd`#T+-MOOKq07(IvNz z;zcM`U1;khV7kPtsY9s(IU0U}(Q z2C7AiXwasj7FMb&mzh_ZE?kNbX#2=WfqSJ}ss*JJ4Q*w#>WdXqf+habjzH!FBjj#> zk9dr|Q!h(f0*>3HrSrh(XN_}BHeGZN5=%hB2L_3PA`n6d(5~Ms;JPN~R{0?f?10c> z+^GU0{HIHx_1~fOD0haA;Ti4Kn(>f|HBdbFr8D3nH*fxkkBY(sfM{*GnH4dAGFApb zDALqf6PqB5cnQatPxa+!`um7et()PWx|r|uHc%fyB|xce&Qn`Hd^1t0(h?g)WCDo` zPkAar!+c-AYD+UObAv>rYD%XfjQS2EL=uSa0(uCxJHGdGAsa3WA@#6wdKwwbd#Z87 zpJG#`F2QtEBSV)Aa9WbJwbu(ln;2sTQq)fRMKY${vCSAr!*vvWoTdSxK@CA3mb793l8rt&J(Cz5-PNDKq1F?M@0QWjEXDY(+H;cfs}c6&U3+oP-)a?p%Y) z!jLJ*HGZh$Qfr6zMdxs?7GhHT_%z~f8-gq4a@Zn#Vhv35B?1BxgYwPZ%iwZjIn_GJ zYM>ncUT{*bx;$ZRhZNdPZJW;1*M>#QP)M_9QO9kxpqRIssXDpNWx12GoB#T+g^FUd z?{FG>6F_*R!;b_MtymNt$!f!r_DHK*6LVzNC)M;d0odBlBQ%6LX*%#t9ATkiNPV>fRA~EYvAM zjGsd+gwQFRWLQQfA9zB?1@H|Fp8`arbbx}^20qJj)6I4=A7nWOcJ!Edd5cOVCPY(D zG}UY|Bv_EGw~;sk{Q`-4zR?EXg2>a+l_I)PG!Z?9mSZq`1K{YQbVP_TsZ1nxSOF(W z@v9n!t|hJJDTFB%GK$?>x=qX=rcf3h916$sWQ^SqPo0}OC5=N0yA}xDqNwla09jgk zqVj@B$S`yk+BA|K#Wbr^aMW#Mj2cK( zmVxoD>Rm5E(yesYUCo%mdv>Eq*YVweLE;vFNd%ZqALEKW0DD087LC1nKnsu4qfnyv zwM~`Bo)s!Ycqy|TPEkd|`4t7CI3xu*>Hhf`qXDRRHNrwxQx0nZgVwSpm~OGhDI;87 zwP(?Y=7ff(nXzhssKr#%AV=_>p2b2pmV)YzY=d*;suLubuF5FVQ$=&o1qXe=0G0O=tLwPHbj_1YBT5}`b<#?si)(}mr6nBt(S3(T{X3W4<7 z`h*~_trt{fUzA8E{V5DZ7A;b>q`m1FYuQ9LQe6oYR?2}JLU{dHBQ^F`n+U^D6)juy z20PuH5ir+uP?$6MXkANPDq#e}DB}>5@Ouw9&QAmK#Q^McB`*9X(3{`=zjq6SGx@p5|QaU z5J2?JzC1ZwD-tp{a3s?sF zr0l7Aiu%-)wySQ}bRwy?s9&QkQHY#xbjvW0*aHG3Q8U$GH+-kBL=B?PK91_mrPWh+s_e1N0vPMPCVM)s2y+V<7NR_?; zT`(u1v)##(`sVH=2uj^fk7zZ#jg$;#yayu^6J`jGEhf}h`554w*mcCUJ@DbjkyrX)kMn1 z6ky7DdH{jN)T(Hp;bVtI8~Wl(dw^1_eYBVL^tb_efSz(qQs7lvDTo6`#Bz9P#X_8F zYyip$Ua`=?KTVC=XPjZtyc9~oHjG+=sER{13Wa7*TqZ&)2tWcqkLgGhRLN*iKojru zG&tOe;v9_v)=AGM7y3p@Xi4ga{d6G}%rVy;%0V#S%#f%at?rl)OvFIBa8sj<+G_Mm zp-EoG2E{LN-P_dZS5zS$uz#cxT2YWRzZBIWJ_{*C=tv_$&_>O!_Vx9`r`^O9b!3Yk z7jwm(NVUAAVng#>hY*!c0P@Sx-87DBA}wU;s7X)#bz7&%)twMU6+(K2$RXnuY*)_& z8fy-u>J~>v<1SgM^66lx!T=`C&d!C^R1v;92x)Mg7|GFe)QEM=Cz*bt4fR438rokX zW0L^j0t9N5j4oC&2~*J-6;dpp6fY91K43?O!&EP6bHk0AQBk4e6h&dsG1MrMlz*@t zR$D;`HeiH1QKm{MK!(szUl@yY@JHKV2?eye2G|w7tcscRwz2^+0yAcfZrKkOd|*b5_s0W?1^Cm^Xi}uVnIuxj^#RfL)C|J2bx2;QX13yX!hx6 zK*%H>T}lLW0tmS@s}#M^p9G~6sm^(ehKR!fDQ_W5B2-h4m_n0jlxI4Jbqsky>ISt7 zVA!D_MxZgssegErqxf;~{z~$dDXD>!eTtwGc*BljTq1Iy(fUAxh$<=<+u)OF!A*+j zZsa4s>AW1rX{WF~}ZqQrDLDM!atg*?(428ZkRjXWq| zkXc0)AufW;{nD3a3mQ|2Wr1ilo$AaTbb%2pibey5h-%7Id^xcxrM5s(b6zO+H27lw z*V`~bu`c2xGE6E`{6b~uEkMSOV$3Xbr|ly+_>!H=%JG3>0KTyO?=GTIs@I8dR1_ z!fi%{aMzp&mI&q&4>$^=MRPQ*cLjIYFfzND$3hBXN1aYLTRkW?sN3W@%&$7a(`o`~ zvB>-OFkPF%Mn@*!tV%bMiEBsML7+Ik54OCcEJba zh{ClLL9hdE_q`QXOU7uVo#iNsMZ-ixO@-+vD40*r8bZU=d4*2VfMUCwL1==9Lx7Be z;D$PUrSGRpu0dym;t+nt0eQ1wVW_b)5VWb|U@ab;q1$*7 z5Jl`KgrhI657gJ9(?u&;0noH63419YFUGE)Cf*rkWzw2<96)1w1?h8R|sR3{3G(pq4ruY_R&4NWKT$FZqqaS?t4#{D!+ zfmkKO3tak=3LXeV{O58_EWH0-QD-i;IbVL;WOv zRGYD;C?}K6DzfCy-li6^CJ+Zhp^N5XgGHiTaaBcLGGN(~K9|6gFrgBg)wqpq5yEk{ z>C#YdHUKGOGm$A;BPvIDzluYkGOA!;h(*^IO~sJfDkUg*;v-y?ng|v%ozwRYyB|=b zVd6FnM$e5R2MX>}I#SKNY}E4~1) zs8p>sRjNVJ$rpPY+!YYmqUrc(q_9+ldi#P|5mq^gWTOrzcd4Afg#|K^Ii40x6z)Nf zaVf!D){|4~3Cl%Td^$fd$ZZJsEIA3QroH($x9B>+pC^VoZKNd-xI8_~q1_&(m0{H`j}7O)mipxqf#-Kw+Y%h~vP=Mn!bwzX@TG9U=ioghiN1IO(hi2?0dC_KOinSTlq5BSVm%p> zyrf&x4@H?FC(O~Q6r}Xdb(Q3Djwb%vv-J1E0snRNVG;ldbDEXpBNx zmKs7y@S`QA|7cL108RTFQOpH8pd*eD@CVSnw|DOAio`)i!uK@6JQHui(Wg*@2{BAV zoP-0#yKzSA51i+x9n!_m;Hf}0FHCPC?1Pzj49zMnBW(*0gY5+UC?NHMxU7WR$f$%bPTm5?{4+c*iejx?EZt1gICAd@ z_{nto>b=eq7_=q#{BqY#mzy&}jQQSvlt4ZO||HG>9MW2QCmK46ER) z4FsBs7p%qn0=~Tgo+cK{@wzams}7*X^?3&f+oA)@CS;QqnfyZ*7A}Q%%bQfyqV>gv)*?jG3xbtsj|z8*1| z3(*s|lamt8A}^}lc%809O&TcKX(Eg>ORFQND3UbMHxL`X%&?fYfwalkluA1AqjM_h zkdcRWRQWwsUv%1}lYP{1v|0S2z8hu)o)MCml9(6KMQ{Xifrf#fbb!sNDgRV@eHp6D zspZODNOOH3?kyO=HAWfXB*{Q%=Ie|Gp%-U`jM4G}T5XK&L1)z?z{xphuyb};0Ii?j zRQNQOg0`eVgQr?Ud7%ER9jsen0&q9h&0XX#Og&vQZv?!|^r%UdJiuaA7zz;vcN+p7 zfr--t#~>FhAWmpPaZ3#jPKKWsqqvh&nDL^H00IF+c%=`Lj5DD+QDqF61on2ZRkU0tLj-B13x)7_l`o{6hxYQ5H6!yrLgXlwlQW<0N>| zYU!I#l8O=l$r0Mt<6D05rN%Y@U@aN}S#XEAfDDImSpzy1LIyjDdcls+l1B1%7zuBp z!PH@Pb`~v>7LLHnGqlJ(0wbw|Y1|5vRFYIJZ7h#yfh`d3ZFT}~(U9|^RSOni3L<94kzk#9)meX z2#R7-q^c6#)S10%dpZYN8$3f?nNUhl0$M3K6e%Da4ogYGF`P`|6Ea;VC3a{l>Sm~h zY`5Y{FEm{$??}A`VHqLQ7(-|3Oqc zJW$!b=q8Y&(>XAC%0iZI9UYF-Y<`gjHdi@oDOn)gWi1kPk8q0+fS{g+w;5INBuV4-5xIcmv&bE^vJwS)4c3$&&C<%RjVTf6Vy-<}zt-u1!XchSw=sY@*La&7zk z>2*(*{pI4cJ8vcz-rJsC9$P+p;-_|h{=R!&Sgvoj7yRVT`v*q{7p`9)AMzZ#@Ar_u za({dA^fFKRt@|C1pS{}+4_xkW`PAOLnxC3><}1r`bYiAUeJw~)!L6n@w^iJzG79jsqpj~^Wi-77q~oLye>lQ-t8 ztLe_}ygui_=6lQIH(X=){jctgr|L;h9=!bV!T!yjc)oY~Z)fL=)18C$bZOam@y(|9 zxU=2NPuQOCTlNp0J!xAW`QrT#&Oi37f1K}af52aS!zHhK^mC`rb9vd{n^yCBI{#n+ zrpImVv>Z>HWqo3Wd)UL}Xm7hd*q?43>>s>&V|)Jevyb`hn=bdJOSjSe=;hOYI`qSz z_3Ow7E?>7m)7^PxfA1b$-P>C&N7JX5=cd_~S%O>J)_fY<$#j0p6*p~($DjA76IXfO zL+-7-?{humS$}ik!9!l`FV^1n;&iyIr=81BUGp%f57FY?+CKj9+W!9Bvc5EIbDo!T z+4-I2aE~oNzPEkRMbG%vgP&ja=NGKv@v^=(>~gw0oML+>bG-5Jf*$eJZvUBlVZOL& z1K(wKY|j;@oukd#ip=vFUbHYu&&SH%c<5{GjtyVk?xJ>N*v|Hc$4j4C_D`%=3;O7C zwB9nFZqnOwndUvbfE+r#na?ZsBi zFD5iuy`yJtwkmdGd}1$`Trby_^Y;Dxt1nF_S57~6`L|tUbNc7!eDs=MPudE;c5K1- zSq(csciP8?ap6+um5#M;8yi zU)WkNm)-2Oc6;5E-#m5uTc>aLr>=K$boytewej$_l5yo#4CmIeUJrXbo-Q9eI&3m4 zyteG0^FBN8&#%0>ckctc;Syuh#-)A%M7#CC7*7p|jrN#UUcfnG5?60NxND!B?jB6j z_RZDw;WYjD$>AfbN6t-~^|D&co9C9*&7I8)hyHwS-hBO!=jHHcb~a!C+nvMx)y}z{ z%^#e(u(O){+L`xm93TGdPrvx~FTL~L7v6j4FTL5fZy)~X;j;SR`M0OTW4~Rvxcaw0 z{IBnxJ@=je{Gach-F)=X*M8@(7ytH@e|vg!xJn+crSbBGmFiHR`khxdU;Sjy5B$*W z>E^fo<|{t=$v@ry&e_d3esY@r;^W`@zxjJUoVLCXLzoE3%|CwZ!~ftxKEK*M_aFbd ze|+TkU)+53@4MH2?D_}KAD>+Oglh=T-jx=ceh;zxls^x9gvu^3M<7=HUZ9 qc$2xc>&d$uv{ARslI7fYsPSGX0*rwBTYLB9r zRDybu_>iRHv9*|Kihe~i1?&W$bdK3sT9}>0!Y4z$-Jk{I%hxzuI+5IZP38YemH#!Bqj#5VJYl=f{FKQ5ww3>7E4Q|BL;7U)Lxp0}zIJ2S z3Y`L;H%TawqIdDkzoFVW$d^fHbJ@ZdJciG!BJ(Fa5uk(l6-8wWOxjf(UGbTpQxB}^ z=*HcZHnT$8@!9wLKRmEg(1FByiZeFY!anOLwgCz!v@BTpf#LTy}YO&e9JyQfmx?u5#GKA5v!gtm`-qfM9yH1V5NQnfRE>~033^=SpL5$ zi0WC2Pb-sTAxG>PN-*>iZ{oFyK}|!W)V((Yk0mnbFj8l}G12iTdTNib@^ocC z?cx2fhC!kbyMSO5j}Xix2+T{T5lDPA5-Zb@)B!ny{ro$@wnDivJd(=YTeUe1RHtK} znY?PuTb=^G!ellUoYswM>y}_fW5KT6sNyB|33c(#Z`Y8i9rkZDrw2L|E!MYMO*j@# zI=)bA-Rq5EgyVE1PRI69x$5g87fR(gw=Au4h9i-)u&?S2eYJ-3$*wHkO{MaNyp1}r z*YBmneno3wCS>Yj#fA7&c_852z{o|$uv#)Cp2QXGs;St+iC!c2K$iM95Ti620B_Sb zWwA0*%Y9e1H4<+)>^IsMWApkMyEX^^dI0CQ_={u3|ghvk5N>EZr>fm??s z`$4+I{Q~{h#Qg(f>4g1iEv(^5sJP$o8vIX4pJ|8$Av$PZJh1C2as7^B;LYHD42|GG lbKU*L>tUZOR*Kxgpo`82&9jSa0eHB6S$yBbGmQLtNt}fIg=aRS87F?ipZ(i@g<$l5*+4PY z2C=+~au2Db7!Q)X(QFlfjl3>nO-X#ggEYyc$OB0*QK*ROMZX6GlaA5BRMljd?qbHp ztmT+arGvSFPy-UodnI+z{S_7*!!f&9bg^VPWzSUAS1i|RwYyI>^yFMitgzk#RxQ0x zv8E%#x{d)hbPTb{CAOGmNN9s&yOw`o$HlHUqI^>ka=}CF^}Hpu^apGIQKSt&)KX{h>p5KTlH>M&A7M=p*BL{?Nl#FscJAWmJJDTwIL^9_k})~i+wvsA`~ Jgw1khe*xOWy9xjR literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4ff0dbaa1ff92bce1b6be92058373060b67c571 GIT binary patch literal 625 zcmV-%0*?J3iwFoa=zv`U|8sC*a&u*7a$#*{Uob8*FfKDLaA{*}Y-IqQRNre8K@h%6 z@6!6Slu(F>AP5qb9v1rGL!q=Oiam_9KFhL~+jNg!E@5|14OrBNR)htGjDLy$oNqe2 zx6x~i2yP(z<7VdjzMYwUAAJ25cy{Ram=-COQGX;@B*#=p77YX)GSI6p>~^0j`q-XK z;ob*0g^pe(vP{%u4Ek#2LY&JrlCl9Ub45i zSuJV*E%bW5gC98TU%aCr`DC>CyP*9&V7S*oVXmuJo}(9E(>E5$6tp-tI*-g;*A_}e zg+f#vNHrQm9Po{vSunocLP_d?^h0dDT-Mm8sHdrq2Xq;+rm zRVjH5H!r!_m6!(#p1k|+>aFD{kvvPJb$w(&&_pp{NQiY5C4yr&EpzxVcPlnV!yjsT>L&YaW`fK z^>2p(Y!QYcJ#{dSP=Y(y}Oc>8Fp5I z%TVT7pV@LUU%ACcc}{HnTCoJgJ~Ed~GgmK6nM6MU LLm3IRz61aOFitCK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz new file mode 100644 index 0000000000000000000000000000000000000000..b299e7d85808e540bff2ea5a9d688a677677289f GIT binary patch literal 521 zcmV+k0`~nMiwFoU=zv`U|8sC*a&u*KWpZg{b6+qnGB7SPE^ujMYiwlzZBk8d)G!d; zQ17+^Dj}++%cVj>NZSJum0HAs9%v5;B68@SE6dKBbwt)1`2*NPRXDT=$+r&wl^cwm z1Z_(lDU+GJ@w|C6-^j0nwO%~_an00}nY37Fk(!!mgGT6581VNe(VsN#73_Y8GZ_1I zY89+EStX9DMe*qa zQ*0)5Q*3++esn!)D!vMna%Kcpqe^_C??Mdm*E{KunyFcUo&?)LLlb>tnYNJiJTZ1r zLqB2E*0@ZDw9Dye^?)soza>vuZ LQ#LY-Kmz~(w%YyQ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/test_mi_py27.pkl b/pandas/tests/io/data/pickle/test_mi_py27.pkl new file mode 100644 index 0000000000000000000000000000000000000000..89021dd82810861dbfeef234dea91197396e20b8 GIT binary patch literal 1395 zcmZ`(Z)_Ar6u-T@p6!;Z6PwD7TRsEv2h?Ktee@rxtj}jd-wJa zQspX|v;t~k1mZV8_@U9n1S9GfW459J8?Eu1HYO^XXdpEYX6 ziP3#UVrvyxC$Z{9@oMLn|V&Pu?$ zDXz0}rkyhr)C9?M*7-jiO=6^|I9^*H@MqGxslCR_HOb-z6JL`Q1PB3c=Yd~EeyMwgCEb}A=6M=wq{M$qYbwD z7k-*N?>3ybM|Pk1#a(;!LgKA6pSj!S-yR$2o^T%{Soc!b<JE34+1007t#D! zQ*UtYXSapDa_Z&QbMDUB*V)O>$rEL==} z>FkD;ofl}YP-_GLagA#d_WaAY6JINZ-D Ni#f!xY|YW4{{SjH`hoxe literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/test_py27.pkl b/pandas/tests/io/data/pickle/test_py27.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5308b864bc0c728db604bac7b5007c1c0c916e4a GIT binary patch literal 943 zcmZ`%Ye*DP6yBGnCWuvZ0(MF=b>%5pv9W@ zqaV>jg4zh`Ps&0>7A7M}Yy{F9N-fH==p9jrf2!;3Mr_pmak%%K@0|OcbG~Mq5Ja*U zsfkAaXy`D>WDBw$2!;|326VsXLyCez6sL$ny{u}AE@%|aNuVe)3p0vy zSxW?3`9n2$D$QD1dn5_)lD0((PlLAVwXB7;62NqtwL@!@+wHFUNseh)pz-YX@7L9L%U(PnRRE#`)2FkSS z)8{NTOM#_Hm#_4C_?z;2i8b)WB}vF?4e{N$0# zAhsbtTJre2F?_YEzqi`b2V+x*kH5HUoIllB%{={R+-XAbXnP;n7r*Z}P#g>oI(Odw zI%>IL9bm={EKPb)H<%F&8z}OXA&q(_iIJd^zxh+0mTs+v4 zUH;$=c$sbMJ^i>J#P_eTP?nfenyR1xagUM%Gmrt&2LT`KxkEBCixq$fq!tKRfD$ls*@0}3 zdXN|g5Q_mZ3|s&@0;Ue+o|k_a7(nV_YC-z`gTZ?U_5XeSKPVrhjvJ~Lqz(i?=7Y=w z$?-zjfw86p26; W0mF_VG{jBU;yKhUskuNAAOHZej8AF+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/labelled-num.sav b/pandas/tests/io/data/spss/labelled-num.sav new file mode 100755 index 0000000000000000000000000000000000000000..bfab052089d7e62d2e9747c51434cb3a42156278 GIT binary patch literal 507 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiVV4OR%x%uC5HFIF%z(lY=-1vJ3K zz`(!=#Xt(o2GI-*kq!oC3T~-M3WlbJCRWDAR>o#%dP(L2O$FJ90ytP17=Sp;F~|T2 zjF^FvAooDP2Yc?2jLc#MAj;261=+^}lz_RH9moc$2Z?b2u^14;zy+X1Fm)h1U;bra z0I7$m1?l?_2Ja!%|M&I(pnQ-zZm3$2IuHPv4>AuV#|vc(0%@4LZD9T$&(7B2Y!Z@L>oIanrSU4s}Z^Pyhq~H&;mv literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/labelled-str.sav b/pandas/tests/io/data/spss/labelled-str.sav new file mode 100755 index 0000000000000000000000000000000000000000..b96a9c00fcec10b33cf35b2d3d87bef396fcd3ad GIT binary patch literal 525 zcmbVIT}uK%6dlV-j3CMfA1^4V55|6&#DcJCt}L_>&PMcDID}vt8Tja5^k0Ne{sOam z+PTnkXE@xsbI-kJW~$Mx7uG8cin|Hvd#y>Q*J-TNxTmSzYQBs=Dbe&eo{naVIeD!M z2a5!IN~xSB2ZcPtQ|S7n%{#em*AF}=xb&szzmW%vpSY+TyE6yv0&F zx95qW#OC<~8Bv~fa_=MFqYq~VW|=8iv7zYTz1}JXy=c+5`^6>;yUp_3=FlBmEp(WJ z`2cDsOq?NR_wT%#>BxMbc*=zM?}M=iP(Nd$`J9<`1=Vmko0xjdsTCWLl&s`{<3k!X RufA{##+Dxe$fw9>{Qxb9P$&QZ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/umlauts.sav b/pandas/tests/io/data/spss/umlauts.sav new file mode 100755 index 0000000000000000000000000000000000000000..e99cf1267bebebd16bdfe881579e6e319aa10986 GIT binary patch literal 567 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiVV4OR%x%uC5HFIF%z(lY=-1vJ3K zz`(!=#XyRI1w?>Bq=S*Mf?H~mf}yFQk(G(Dm5CXeUXr;$b3yij0LV@-;9zB70OBym zAVZLOB0#~AjLc#MAWFj2vPtHTZYgOH(iV8Q2SHUQqzHA3;+QaUGV?_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/S4_EDUC1.dta b/pandas/tests/io/data/stata/S4_EDUC1.dta new file mode 100644 index 0000000000000000000000000000000000000000..2d5533b7e621ce311de87989c9b7368be4c86bb5 GIT binary patch literal 2997 zcmbW2XK++i7>3VXLPCH92%rQImJ*26kfkI*_M979kPvzgq&E%8nxbO8I+h<6u#E)U z+OXHCSSPWgu3blG)UkIRdl?IO?suP2e!w5{BhS9y_kQm==kC3G8$ymsQYDT$jK7A; z%G#>N_SUwh_N^+`(ooh~Ro|#$_J5gF{14R)tp_A$U{{Nl>Mi2Q>c2Lt2 zwo6vlv|tOVRT+Eip-MBH31L7WlWx2I^U zXsW6?xI;P0>0v{=L&^#HOj3@2LA!gZ&_1O+oopRS#Z3zJUQ$;k74{B%U3~(r`zmL@ zKc8mxONruw^q{Z%(eH1$t0PknATI+B4Ekn}WqIvxJy^7-kFfM}j>I=a>M=J{sPs@c z%rZC986A%ADCMO1`xqg8PmdIK>oEOMLA`4<=N=9O>);5ey&XvQ<2E!^u)pg&P` z+)P4GhEt@z)18XWfzwzo7e=U0hcn4?3Az})6kUQ|Ci@Dz%Y`v_g;2RGx!+caKklxUdfcp$xo)$T`E^n^ zX1y@%Zct8N|NGpioFt!{B*)XG_&2lu7V#GqmI+G>%azl|_gB!bg4NQGyR|~EtB!g- zY#{eo_In)lMz|F=!Dh)ZCWby9wvf|`ZiDUcgkXJrVz3``67_BD>ty<;z*8;#=ixN; z>ExdwT4|5x?@ZBga~A!xsh>msx$wLo$DGgn1@J=4y@^D@MS(WkEfb02=nnM7LSrr= z|5C}@;a*1W<$+IM0e9l-pufx3vrD%ZT#3F)e4f4C!v>){Q2KQ}UW6Wk4N zhPNpDwoqQzt$|=>HA>VQ!zS7dHDX{pZxi%lp)E{P(FbA)(Ses3*am^pnvku$L43 zex=#~fBiHkcweW}?+5$SAArt)1K}VicpnbNHw0$V9}0(&Kb-yu^hk6V-zYfR?%V%e z7-Q-GUW~Qe)zJ~nl6oRK4&QjImFn+ilXEn=6VMa!O@fo*6gU;;z-cfSM&NWf1I`qB zdKNkl=EK==4x9@MU=(U-pbHD(JUAaN5E`>k&K-6a1$9+Ke=+A?f-Z(jxh^H>WpFw5 z70j)KtKe$6Jv_Ze)>CFJTqlg1_2g`Dg6DQ4{Y|ix{LT2bpv%zZtW$xmv~!V2L|37! P@z>z14RUoIzIymCo1Z8n literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata-compat-105.dta b/pandas/tests/io/data/stata/stata-compat-105.dta new file mode 100644 index 0000000000000000000000000000000000000000..b7092a7947a4318ce94310103e6bcdf72a4e1874 GIT binary patch literal 771 zcmc~~Vr1Z8U}hi(Bqe3$q^0C^X6B`&R)7r1w166uX=nyvWf~hn7->)$h7>rX1j;Hl zG=Pbr)2bGF2I){ioNPlQxFS_cIFmsYSvK{2{LqkM`(fbswT-~a#D z|37=?%-&fuL13r7kR<~{Vp4KSYFc_mW)>6FjHE!OY0W!pP9T0F>Ql r30;3@? z8UlkO1Q`GS|NZ}e{r|IP&g`8v69jhJ3t2KSBqk-Nq^6~3WM(lz9eVbR&pwEXllH0;3@? z8UlkO1Q`GS|NZ}e{r|IP&g`8v69jhJ3t2KSBqk-Nq^6~3WM(lz9eVbR&pwEXllHF!tEB26#8UflRuum>Vt^wL7kkTPbNo4vQ0>x*{@XeOw2FC3t zbk*+0-MEeK-tX-IDtiItOPSH7nZUJUZ*R98ch7PN^fU^77CxEbP{HA@jLC4gKAm2#y3-*53Ie^ow`fKalMfpRr6ob!LdB|E2stn%HOCt z9@6J*ke!4B_oJu=1{@R!3T&qW_MF(v4dwMS%NX)M=f-x56?PpI2nqxRf&xK-pg>Sy z+Z3qO9sSJ^MpG*2NHJmR3w$IApnskurpQZdy;SSpTEEt(>-DFjL)$(+$P|3Den^L- zgpDUt{_H|2>gjs##07rI0uTNQwB_Db>ryK^h)mE)FKb`jjBl$m0?JkZQi;?q(A&zr L*FN2F%hL28jjHT} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata-dta-partially-labeled.dta b/pandas/tests/io/data/stata/stata-dta-partially-labeled.dta new file mode 100644 index 0000000000000000000000000000000000000000..b9abdb8827432d59ab781a079f96cd466570d483 GIT binary patch literal 1390 zcmd5+y-yTD6yGx#E(Qw>%_SCB2+?4ey>0l&%XkTBb3j8wcg)@ma>*=rnHfl~($3D( z(u%^uKf*u2ih|A|9=A|LW2K9Byf9@Y7_81qnl3{M*0V?UIy-iUQ;H~s<~DfAA67CkGqTe?BdaA3DI zefP+?*}s3~^0!NDw}Ax{7^{_!M7*(HPOEaopSsBKo8e%dAX93bE(X;lp#OBmpBk=S k1P!K+{&b<_?;jP?`Lox16iMzMI>*#cXtIZ~CwgM90h9mgy8r+H literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata10_115.dta b/pandas/tests/io/data/stata/stata10_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..b917dde5ad47dfecfc872b5a5ea756ce603eda59 GIT binary patch literal 2298 zcmeHJy>1jS5O$8A1PUZ1puUEdy$cahqLH)WuL-&aD5zH6TZgQ)KPTH9L_#7GFF;Fo zqT*?I0-k{)4TvVpc)ZzF?v@&f>}hVspJ!*rW6yZOI^|T7IVIsxt4tdSMi*LHEyv?+ zh?)VUM^4v<6rMeQ3Q;?ReYq|m+LthqSA;-uBB4;JE+oniKYU zG~ODHc6PT$5Vf-qHBppmCefOIJ_mkljfF>n?+tjI1^(?4kH24L{$QQ?tH4hJU)x~( zN8rB$hlsqwT$x;@g-+bjOk4T$wEg`$h3MQgsaFb@Cw+Q!#Qz&WRkgR# zuW(S*%H`GTUJb91l)1G%T2<}0q}PDRD|derExhYniO&G97;J4^kz0pbbt7y7TZZUt z3Jz*hDre>0oqMDH$X7&k@xqFvEI|-83C5#XywQo@;DmQ5RW_e4z0v{x1%*yHtHr9W z{$5;kCeJUutz(}ME5+OLD%lU+Lp-_t@gd3a$&0wbX#Q)^3*F*O|M4BxpM7fdFAV+% zIqY=&aFAensEnFAZLsy_Q!61_LNdHpu`3?$39*Rrj3JTC6iypqNFor+0|Zz7RevT4&QmC-{2^3(A*;Cd)bEs)|rAL4!l!0h!$xfM`*EIaM!}IfBEIT zFO#M`$YHoR#Q`W9ZkK%;H=vgc+t)0&L2ctMtT4M literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata10_117.dta b/pandas/tests/io/data/stata/stata10_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..b917dde5ad47dfecfc872b5a5ea756ce603eda59 GIT binary patch literal 2298 zcmeHJy>1jS5O$8A1PUZ1puUEdy$cahqLH)WuL-&aD5zH6TZgQ)KPTH9L_#7GFF;Fo zqT*?I0-k{)4TvVpc)ZzF?v@&f>}hVspJ!*rW6yZOI^|T7IVIsxt4tdSMi*LHEyv?+ zh?)VUM^4v<6rMeQ3Q;?ReYq|m+LthqSA;-uBB4;JE+oniKYU zG~ODHc6PT$5Vf-qHBppmCefOIJ_mkljfF>n?+tjI1^(?4kH24L{$QQ?tH4hJU)x~( zN8rB$hlsqwT$x;@g-+bjOk4T$wEg`$h3MQgsaFb@Cw+Q!#Qz&WRkgR# zuW(S*%H`GTUJb91l)1G%T2<}0q}PDRD|derExhYniO&G97;J4^kz0pbbt7y7TZZUt z3Jz*hDre>0oqMDH$X7&k@xqFvEI|-83C5#XywQo@;DmQ5RW_e4z0v{x1%*yHtHr9W z{$5;kCeJUutz(}ME5+OLD%lU+Lp-_t@gd3a$&0wbX#Q)^3*F*O|M4BxpM7fdFAV+% zIqY=&aFAensEnFAZLsy_Q!61_LNdHpu`3?$39*Rrj3JTC6iypqNFor+0|Zz7RevT4&QmC-{2^3(A*;Cd)bEs)|rAL4!l!0h!$xfM`*EIaM!}IfBEIT zFO#M`$YHoR#Q`W9ZkK%;H=vgc+t)0&L2ctMtT4M literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata11_115.dta b/pandas/tests/io/data/stata/stata11_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..cfcd250f1cd9fd5b3c2a77f1414fea73d407d955 GIT binary patch literal 810 zcmbVK%}N7749?b{gC_+qg2;dr6l~ckUBn)AQT(|mLcxe3rmOGBFa+pinzxAmYO5Glt_&dPzw6_kJKMl=8vKP literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata11_117.dta b/pandas/tests/io/data/stata/stata11_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..79dfffd94483f30bcf6f155711caa392d849f970 GIT binary patch literal 1268 zcmbtUPiqrF6d$94coTZ@A_FC$V4ZA9(a2+55bL25gn-v&lDsy7Nj9CG26Jk`leb>I z_3p)Q(6isdqaQ#Jyu|lrXPdRgia0R){>*RQ|5+%!@g^I3qhW+*h)#n;i>5%GB#%K= z0`wOibH{*Jy`i^YNZa$aWt)wlJ zCr?`)P^(i=RTL95(_}5I50P$ME16EDr;&b)R5r@<_mOt5m-1nxpCUbv^y`iC{9B}; z*elG98=Jnx%r9nG=%IiYt^@?*r7e)Ld=2_*SGz8#>oIQ4Q%%UPbg z2XcqmL~I@k9)~ZV&$Hz*F=y`@ET8_n!GsO&{Lx@-CSuGm7#U{<9vyXXJSa4%-!wyd zZ`9nAlxH-h!Anz`#*=3^qbQD%xbWfAvy_4-2&ixo3hyl2jkH!s(%Qb&J-S`*0Nbsv zTTx*BX5{(VZqU9<`yOqs7l+5sgr5duUsMGbZ_q8|L7orAOaAPdu@kSU1N_=4fM24h b2IX&2uvhlZQE)W8{x6F9jFkneesn(pwkR(F literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata12_117.dta b/pandas/tests/io/data/stata/stata12_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..7d1d6181f53bf2b443798b2842cdb95f258b56a1 GIT binary patch literal 1285 zcmd5+%}yIJ5Oxdnz>Nwa^%hm)S|>;dZ5g?!2M_|mfd`1Zn+aKILl)bm4Qdg}od@W1 zaN{XBaYDQX;zT|6EJ;{6RO+ECt-qOX{_NTDf_2*IcE@Q2J=7gE3JeygEvi;)4P>Q2 zI&?TNF>LSZBOsykK6;8Ct zby{HC4NJ7@{_&j<=OCZa2?HX|=9}>aiNwfpJA{-BWu&K2gve zn7(#aEhnFvhxI(KGs}s6-T0E_uLD3t_N*3(934vinPJhKX9jp_LR zhM&OmFFMV*U-1@hY-Fhx*riUy`*`(L2sCa;480&jFj!{_tF{jIiKO7sykC624t5WF pxnK(=8NK(*@psA+mMB&p6hERnOc{Opn0Ru^=m-;EpVaZ6?+@SE-rE2G literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata13_dates.dta b/pandas/tests/io/data/stata/stata13_dates.dta new file mode 100644 index 0000000000000000000000000000000000000000..87b857559e501e555b74131bce5abc7b2b1d98ff GIT binary patch literal 3386 zcmeHKO=uHA6n?g1#Xl&Bco706;HBM75vy@nAq`TfjY{jGc-T&}lkVbfwwax@1`qAg zqZbd_i+F1f;=zMntXFS}XQ3bz5%Hi#4y9Q z!?1;1f*nFFM2SyuM666^2pM$)P1hI+sUTb(zk-a0iAJpxfQ(TK6|4gQyj!r4kEe-m z@xJOH<^_aH1+!M~;Q7?GB1;XLnW2<LIc&PED$j^@(8qakr zdq(5gL&`pLSmD!S3g2n$I0bzJIf=t2MXQ(g=cCi|< zFn~(6jRYsicxqG9mf&K)1em9{=dkZNn0ev9qYwm{!T~S@WyJmet%=uYueF|95%8_u4DozI6c{JqU1%w+Fm^;Oz_d!JdSLIR5@4 z{xc_@V>vc(JQd%O&!p;G3>|`~^Lx*OsBf(+-1s4s>mD`g-KXc>qD;JfvucZa2aoP| z342l7@5d^#jMVK*eaha(mrErsorIk^gwD2s&YRLGCQaYgy8fV|izzNLLiyL1SBxxV zZ1eEXL(B`+;^*AGYOap!x%J#8Y{C-}$`x&U|KYB&eW? zVL=HVV|18>1c%ILG-4zkZ^y2!;N*m0Jd_Ik1HITSRPlLIhp>B|;0t6o(h0j42=-}` z`Q!*19P=Uv{;A_+=K?RqhnX`BRvo2)U9E?OmuBEiIz#rgv^ys=q0t5*6RqG! zL5f}LXqsk-F^HB;p>3BgjF`O2xOCLyFKe~@`d*#W{W_mDdDj75e`fLr2X%eXh*)FSA1>Syjp?pN-64!i5bQazwBJOWq*qmk~iEYBWKc&wZb4Cxt= z(2gVqMCk7<5N87sp;#=wvsR0&_4=Gj7m8l(Y_S_c8SIx2fqpK7AXYvERD|k?#1RjL z??nQZlBY9rJ?X2_Id=0RJi@~?d^FPBQDW?rqv5gGXekq`xuevE@wU=hmTVEb_QSvu z>tAIFwU;FHmn~)Nl{OUwwiN-YX5_YZ`JZ(r*V^@=S>R6!o{03ZkU1275l=HmJ5vVvuEwteylyiM)UTpUsF}uj`jQX?~VOI zYXMBVJHEr;-kJQrvP zVGXPRxMcw4orJ~@RREMhYBsk`Zz4XN%PIh((s$van;%1Lu-r9hO0W=)m|iL(*!|~? zybL2fRKOSO=A&HDYNYpj`JC!Bd;eRJs78w4pwB>t&?JKgqz{vqd>Sxx z8AudD)$q4Lx&#IUALQc`929Dl#-AJ*>e6M@*oE$Vk?cl4^Ii1ce@M`fN^d){tGvc) znIdNfE=pULrA*gDMGYl;7a5s$7oq6}kPY3q8jNS3WWwySm0Mp`+rf~hqwJEE8OPAH zq)Dg!?81$eM=Q7C(otJ4KKlW{e$pq$LD5ieR(!u?cdeV#O^OfNI;g8;>sH=qc6(cY zaqf-Mvn-(N&e*yCg Br&0g_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata15.dta b/pandas/tests/io/data/stata/stata15.dta new file mode 100644 index 0000000000000000000000000000000000000000..d13e2fa337db39f73c2fc2a252126a1a73396180 GIT binary patch literal 3183 zcmc(iO^g&p6vy8JA;cJh9~VWS6GS1-FzgzUk4~#>R0w1f116e1tkpd=GsVxfv2M#2b3h;8*+tF2X?%qM{<|`mgTl*{^ujNq+shzFxh0 z?^RWgA15qfL)C-@euJ|rH-T@s!7bi8?~%0b^{we$>-9dcp|97kOt$)!!ju+k zLqYyK?&KXk`HS!5c_+Vj^1(TI`$Z@F=jG*^lW#luo|DnddAoMfuh?1rVP*siB_B#U zZ5{{LC;ZBcD%(XdEGghgLy0I>xuXB!nc0@x;>X$~Qp8E-IKR>@`ZaA@EQx~^>sGI+ z6Rqg&%jNn^Ott#bLwV^^{|0YIqub(F3KdwoO!-h2*P(pgM|W%<*wVj!pxgVuez887 zY1CvCGgFI5iKJ8VVy<|dNoVGduPn0p<2|ePP8=Y*_v!Bz+VbtiDprpldlU7iPEX)R zbfo=!*5BnE(b&1;wtdm@V^7%e{ZHlNd)&CO6M6l^u6^LO3z`0Rm)@G`DaO-`{^Z7e zboHC8|N6onCyV(Ddy07q`)?}xJ3ar_znK5ynfAMJgzxp=W0VXwhIEphC~G!l5o<$c;O* z^kfzwa`ko}>~kl~0=(Y_7K7#BKF|j?f=yr>coGbP5ZE1O&yob}1p0v!60B@9J~OIg45tDa1Go53+BLHungP-N?;?{ z0zyy+2J8l7;3e=fI14U=&2zB^upb-+C%{|aL+~Y-55Ft~OThiW2lkX61RSJR!uA@# zL6!MKK<)%hVE1ebSRZN72D^X(F|dA1!7vyByFmwxf<3_cbRXCcuvcjejDv&V5O@)s z1D}9T!Drxe@CEo5TmTorC2$2?1wVnG!7tz%u)hBd{0^>zKfn#}C-@8e4JH7=-#uUs zm<#3sn@IMSyAv!1cY!5fIk+3F0A;WmRKQwr7+isGLnf3?n~+t3ZK;|Zuac?6%2tRf z$T-I3Mj@|?s7aAlg4I(&k>Of$!nkH58;Cclt%j=5l2I*%9zi2o>KaL=Lr#*1I;9qq z!ggyh6|uU?UaF{Jwq_@*Ysl1y1f!XvHqGPPGdkA zR?W;wbQfzGj0%ntw=y(rbd(JXDfud@vk=YsR8#as92%J{^e|b+ zNrxYJ9_IL9BQdP8;z(n?SVJp>adiY3&r~SR%2+FFnody+1=1s(UaDAiUarnY zdrCk_2`o5&LUSy*G0c-(BM%ad*iSX9aI#iN{D{Oa z5V0cip@=7vwkVqPFyP6Q#H-O{yy#J{UT|h-X8S=rP)!<>ZkoP%Z{C~tn>TOv%`7x6 zWT9Z#LK^JXQCK%LXy{QLnY!ln`k^Wr*gI_L0|rw#8#o2kA`81T1wge+gWcL5z#ph~ zYcPs>^{57DA8$b`64R-_SVC)T^}5fYVYkoY^}Bto$B(r^wb%<)2_=RSbO8QL3wn45 zZ*&U!Wf$i+1yvQ!e-dC$9N>JPpy`90|J=ywFF|)7;yhHDS2%=>2byDotidi_4$F94^xx>D6%5)S^_|3C=vml+CgAT5Hu|#T9f!wr*;Ht ziNMZJo{!KRFsx|Aw46=U)5G?oBZhqgMhsb|)_l}wRu!>kpQlR0w(9JF4fSFbV4JB& zPc?>6m0_Y=Jvyid?e|%b$G8e0b(4Ao{%r)sP*U{wpig^sphv zYdOjF$Cp2LFrT8H18FbuN<5W(CBBB4a(r^Uy%qQ*IEiClMZYsi=~x;_0QCU!h|~jE zu|w(^D7H#{3;*x$z>?dKC@V13j|>#Dbb}r=BOz0R>J~|uUNW_ftA*vlN+DBN-bjJ` z3dz4F`8OoLO7d?>{+)c8GI{}zba&#B0Jx5C;G6gslOnIy@K zk<2J<1i&oG&XMdq$u5xWbCP{QvWq19l4O@~8cgBGIE|m+Y5bIXiMs&5eky{x?FaNn zE?wptfBYAkW&L3nuuChQET`_W?(H&kvb^XivY3u-cZgDb=ish@-CX`x!@1?y3gB0E zsM`4g*)`78P6y7j7|5cV+E1CXRH6!Oc8&sD*8~t|e31dL&B{_nm^lipfux!EmH<-h zKzgRnG^=aCeXx|x(o%A__fCK2%lwyr64dDy5+ XJ_5bq*^w)~t*~ubazndct7U>d{*0flEL9KZ}H!49QO7KDPu zGb=e97!)iO{F6%*j0_Bo6bucm42&55{r~;vZ(2@%Vo7{%W^pmH2}tae{L-YHR6Ht@ zDodc!IE={5!)pNpLr#8PIvxv<90O#kTIw03qYB~YsTvxPrbE?&H1!OmYcMn-(F*)t z93--J&H|w1s&JOo8RQ~#8tlpk^{G$mpZ@<3RL8)^0#*kW$3ieNj2cLJ7(xRU6y87p G4}Jh1&UZfm literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_117.dta b/pandas/tests/io/data/stata/stata1_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..18014cec77a9109d4fec7f0d3b90ec389f7c8ab9 GIT binary patch literal 1569 zcmeHIJx;?g6gHqDF(SdjfC^Gq+>}aC^kl@+rKk%Bh@93fk({QoT_jk_HMjr=VPjz6 z1}q)80JJQ@j-9p&YNd*W0T227e*Av?-g7`Is;J*ql)yEkZ6*i^#u=52c-{%%jDX-) zu~-;|%d0xz83_$iK!6(rnq&3VRuW9;j7?r`F8Z{6|_H!Nn~$AaBGJA8LIcDTD~h-pPZ& z(wTg}W$El(reFRy`1?DYdu3dipmj|JZ7C-7z=kbJ0RP~$^*e56d1xE(ZOBH$QIZS+ VrxK7#aC6Y%Tyq$1j-34!z5zE}`_upc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_119.dta.gz b/pandas/tests/io/data/stata/stata1_119.dta.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f75d8b92db1481715ba9df65d1fb7382a2b7e25 GIT binary patch literal 269559 zcmeFY2UJu^(=dDoL>!5N3L*%*;9x+4WDpop3Bmv(f=CvWoFqw{aUC-Z0+J<35CK8S zK^O#O1W6Jk2?HWIXBe2dU&B6O_X+#$_x|ZU|NETN=ichBs;;V@zTMSTJ^WAh>>*6u zHKU{V@U-%@lCY4FkQcS_w1TGlP25t)%t#Ae$Az3<2x08@YVxRAWiXFi@A7MQo$N7w zZPffYawa6lshIEai;&2EOhC6?<)LEsx|#i-Ql6onMakH|9O}s6zIU3Lxlz2J_JZx; zUOz^oqq59ZKe=6sgE-9-0-hcOro?w(8EKTKG|#XJ`{)phyf zW8`HH7U^)R+Y&G$2F*rX1ZmO2s`#sAg;z0Zrtbo#bG4r*W}E9|FZ?(h>HDetQEGgb z4)4tVf~#c#In<|LWc$X}N*K#l0$;lZ4HFBRmO|kco24z+sZiM#UHJV}(~|$Kh^E8$ zS2rBVjtTJMn#mZkPZ?VosO(-SG5Mh&vNye9NbksGq_s+btK^qSHnP8u$-qQI zsCRt@$D{KA*v2utnB-HlB=x~yKXu(Qhc+hpWjgc?M zjwHs-n%U~(o_Kc{o0;Z1S3cJ$!_P9+goo)f?{uH@DcX z1yQ5i_0i+G9-~e{qP@fRdI{>@#`NxUi@H%-=PFN(HFzX(UoTpFRdES@b4xAC{gtVl zyI_CnC3Nb6X60*gzE9hgBgI@yR@2WiWZI=RZ%jr+2DaGx=8G#vpPWl?p16td8@}aU zenzn)V1QUab$aa)q_-BZc}F%i)ae`X60XN%(=QobH<{usBpmL_GjM34^DC~I)K`ee z6`8rA9FeP3(B%_y6CrwHdefJzyGeNool9u^Me}1taCl-#!O4C!Tc8mNaQd_;wj~ntoWfw*HRG<8mcs z&fHu*e$93N)1I}ZL|*s@%W8`n;;?^6dfJ`oG2f(9&l~5~ygH7dg-_awEyc!v3)(c5 z_i@SW`f@vz)_JVDFjplqewO&!Z!<(TwUNaIHprv(9IKw4@Fkj>1ZB;!oY3ORT2fzK ztOBhpD^)$xNfVBrjn?{A#7>!MSd>aP5ckt#MOCk?3E<}Cs)g|==%qU}=?GmDq#M_QM2)_DpCPv~&BCx~Y#TK$TMU2reQ zF`MCDFtvIj?u%BpG02rex?Jqzbk|F9%>777PvOSa1o3k|HUFYw;3@nErNdMB4~mqh z@E?>yPvJi(o}R*gP?kM~|DZ5?3jaa*$Km9 z|0#cV1lsC8{HOfM5%^N~;Xmb1jzD|ehyRp6Hv&Sfb^j@Ub_6m)G_?;$#K@o|oVBm_ z#(XH0cw}$Q;d3}5^uFLQ7loV-cW#zh7vR`K+&<(w#xZ4!ZIEmOtAeJomoY1_xK&N$+0{&2@`Zhu->N&VauJm`FDf3&+%_ zCFlc#>+KQ=HxY*%5CKw!d-VWEBQE10`XzMk6ix?m@ER1ND8uBo)BF-NLm zyb=BqC`M)ZCc4wxVv;i%T92dETepR8gVi-A`KRjeJ>fH-V|af=ImubydT5fbTxa8H zl24F$Uu@$qi(-6t^l(iK?`%{~w8Wu_s2qFkPp5j0gw(%xt@oBhJ*Ya`XQi!Hc~lZH z5I*xUhPNTgDOuvs$0(-?2}H4t;d_((l#qHW=X%QFh<(oW#oU}XwTi{8Z#^~19}2Gj z$)%pc9r4AvUZHMRUwHpRiCCh~v=L0Z_o{+aM;p!ot6Qh3dCh+sPl0B~4?iqTX8CTBx`dEvs{n%XR7sOjwA%#xSm2 zGIazdUxxKUT^<{T!Z$Zdu`j)Xwx-NT*w0!`pVKp+2Nrr}!RwtL4H25crhay73$XNx z&&qh07JYu~*~`ae>bv={uPNHukYdLIF~YCp2=(e|c*a+S zcFply^GHbh^wTBmkm~E+6V)t|G;9Vf`v`S?biIq%X1i7Pb-nDnjkl<*<<`c=a>K1` zNae%2SlZXAFHQAZ#NNcUW(sWDwS{%~G@NH@tZ7mC?S#OzGR;=swKLVExDg(s@WMS- zB#RuED&}D`66`SPxH3+yr?`gtZ{_upU@kKfKZ`sqJ@Q@?olJ-(alt~TQa@DrX)t-# zIux7Q{~)NsaqIFIw_%mGtR>Y7T%+hPG0gAnk^b@aKY*r#egi!`nCqI3E+lhT8o z*BCP24-*q9PD=9mO$)>?wp;2Cp2H3+6E^crhTmjR)H-;ogWQp7kWV9i4Wg-lI7pEr z4f|G_W<5e0-A%x{`)*v6u6$<@B5HJvA?EBbR(54c>wKO5{(}I{6A3U+Tq;i-qwpoYu zHf<%~%8eR^*WGJz^ct@FB2{_>6MT&vT7^Do-h>SdI&WiKc`S-)wzoukZ0Nb4<3@ zE+^%s>2#zfX_#YY+!X`o{gQ^sm9U7RV{~G7*-^*C5wi<<>B?rI%Fi0HV=Lqy_zYQ^ z)>ubLs=q)_Q0aM@mXvGmZ)c0h>RN9}b66QqJhcRGC@*1`v}*ZaVgKgK1yH~B7F$|k zmO8)AD4g9F3R6CVIX(4sc&$fRnn_%d#IKc*r>+tk47aTYjxC7IdTb4l$fA~GM9zWu z^1LI|j;Iazwepe$?=7m5$?Gek2C4TNibk?enz(j}Sdv^xKIyZ$>T4)q;_QMBys6r? zfVKNHOb&p{+NQDbC5{JMseKD1{gSV#oA>2tj1PFR_R5u8HWcl}pG_0z%FPOJxnu&r zcE3UBEcrAodN&bfCc@+H6*m!maDupwl0m3)cn)#Z-2$7g1ouzDZ@uC8=9#b97atw` zX!;|nuk*VDE7TBuA)2W4$uIQvOJv`EuT^r(S`ZvBGIMLI0^WpvyM(vb3H^3$KRqmT z+Wx@P6ec}SM4V`KE5p@tNS$kNg08r*<`SHDAscraFF3o4HFn`Al}|tPP%p2bZ{h`4 zMbKxNEK?ztQ@9X1=0Zqj@0(V-?qk&wOaf)r6%ZR}KD1}MMd1GSx-Cth9yhMQZ4=)7 z4Wst??PiE+yA$5+PUh#MoUr^hw{Rx;+tTKXZKzM{KNZ^?{)F!hpBcWZ&FZZs)5sXh zI~kQDeEQHxRL(u^>Qg=DfVR5CrsC&_TCgggM+pvuPjCZh0x(!>$#BO8TWWp1r>#aj zD#@!**J}@vP(3}w8c%Y%)D8;m~`ONc^TsnnJA%fF?^(%q{8GZ$X>p9}CjaLHpFeKZzPuJ%xSZNa1Ok!Dm@q|d@ZmvM#&>f5+PZTe(NW!jFAh~^R#07y5{gh?=7Wo zn5pQmFI0LmVVm5iNzKu{t3yu3F)fM5e)KX(DGbFbCN{VP2FG^+Ev_YP>LwgdI*w|2 zZyMyC5x6EdCE^sQcXk53CL=JjdX8Mz?u|va?MeCQJ%VHyD==?lulFYG|r?S+u z)1P#DzNUP* zb4B#vXoGM;x;H{`4bN{*v^_;Bu=BdLFvc$$&)N2q`f^^{n zm^`-A8|go?p9d%<23!fxTdFr2&dWcH<)pwL5&|ElcbSorkeFlA_oaIE$qgrFRnl|T z-odOJFC+SUY{sV>6%;Wqu*=?kP6HYdrOxMGzZOX^E_(Rv=M(zQ#ogDYGT&ASTzZ`O z@S#T7&xYq6*u$(OE;!o%!rhWE);!&qSmrTT=0CeJ{eyJ#=338~VoTucSmI_Q_1j!+ z>ZVB?sr}5CkY`1?{(c{5QmN66$`PYfd*!F2cT-8va=F%(pN|UKD=U0AjMJTFZ0#3r zhp{J`#L_lCN)exr8np!|H07oH$~4)wZ~4WwyxdTY6U*q)k>r4qw3gDPuun%r+qT@} z#@e^+ekw&54Y)wX34 zht*x;xA$%{)mlojuL>I_wgpHwm9zyYHhs3I$u_}WO3AL8OIcDS>AqfZU1?>3yq4!{ zD%!@#38O3a15Zc&+XJw1vKjIcySDS83zt>F??-Akv71E|dEP4FRpfoJ`V@iYF5Cyx^0c_bLh13dZI84zXl$?oc80~M z!qtq7P1scDM-xi1Tc&YIx&?!vCNR1dO2jPr*jMEbZjxM}LqFC+rF_!LJnSjd;RkJ7 z^KDH}^~Sq6_fq|8E*yw`x7^SX>JBM>CYyaC$>FAwsdIb?^7tO@GmPWXDXosGxu1SG z2IAz#DPv2*1C+OYj|AE6In?6tVL>;%(kzVmCTvMzLEgMOZTo_}bL>dxzYre1;N!fY zh7#9(7fF(F<~(`AcL!?iz!Ap z9jq>98O(=rouMR#d1rdO2-@y2Iv;A2K{qCe%d)ru>aJJL^qE^jDb7#=`Ghll%$Alj zy^WB8v!>l?j;vxKsYA#sCLD3W_^eY%jGAg^6dom*;-IA(AEsAwG^I%+voE~$rcehU zm*ki%7Aic1^fKY-4aS$BLOQFd_DA7m1XEmoPkuv4%2{*#G)G*qkmn&J#)P9h7@u(p zX{M&y9)%YbOtIHejSg!qJeu-Z!>%{H^*7|phmg)D9F$;u@hN1WnrdGZURf~3SxYr3 zthM-PN{faaP{vlM5cDp|(OWF^Tk>;is$Eg|p9E7JwQ8P*wH6(<`lK=58!m1m?iS|jK_ zs398h5X+GW3goR1bsodT??=n-9Ta8|523?rBAim8)5md+`vr9N4)*P8jnFt2qLC>a zrpFVer~7k?)Xyo2=WDtj;{7;~)`A?9nnJ-lXmOc;g|I_~Z6jiLIBxE6#36;4@2Uw& zdF$iA(`DxwwDYvud7eIw7UXDNoP3{GRkpbr3J(SLZ=`eikX5F7DwI9^i=cz6grX~+ z5Qb6Q6VhUI^MrI6_jp1n^wo3K$$S`9^@!FlLY`8{e*)WumZgwN7**Yf)^?#Ze{HFL z>lYJ8I5BH6cIBk=?Mf(xRv~67Bt51E9U=ZZaDN=$GxSl0zjk4(5P(%9^z1m=lu&WA z5?@p!#M}P4v*;@Q)X~k2RYVOhLc0k%tqz$;qMr-mvbn0kF8M2}gBPLN1o;sleMwxb zAUfwzb(E{Y6g6NIav?xjH*xYBnVwCM$5Xx7IK-$xO1?CG<5|@!5qJkyj;y$Ck<9(p z*T;`=o?DCIE+>6xRYKjLm5=~>ru+?+YR};ihu>KTGXDUj1;qRkTQZ)YwEVRCE z2l^sBO5x)?r--`mt<5|W!`&+(@!t~>hO$c23p18il``WZIPLntGyC${{gYeP5PnL-!|t;tuIrIo~3oq zZl)2pF7|^Jy{mHZR(ojls&XA7V6E+BBS{WDzO%Tegrf1GxI@+I)yF7%>?nJ1=o@iQAQel0X$cMd=YT-jn^MCt&Rz&yt-JUr=xm#haqE! z0ds}pqCE1SpUyk&|J!HXEsowSq2LjrLLlt3rd@WpcvM8|m7`W7!YSDoYp&|38b#vY zKF8;tKw8Tq&8~0&hVmT-Pif6e#t|XU6bIZb4uw}d$x4#xmak}L0ysNdFEXOl=xE9b;gr0Ks#kPs^dj+D&+#cIkXPi9 z=dN&M-Qu{PC1fxn#Jt16owdy%4#rMY*edg)s@V=h`3{4ZJTmZ24?ElLH%;`@fCcI$m0@KN5jni`i995(1mgommOw|8Nv1*m?1@68nHR0oJ9zSFS0bT+19MYAgbL zM~bwx#0G9D;XFfw#)dy@O)M?>EE9F9UivyLN9u`N3k$1Smt{;90(O0G^cQK8|7mjN zIklpXb#E2Pcumf_SsAC`7up^=)^s6oIW3re+{Kzt-3Xr5hmAz49czetIf!725&ce(U}s7Ylm0C&ImR{u+{(1E1GG15gLl9 zFmLUjXJXFxN<1pG=CCITvC{FO#~~@?0gUQ}2)!>t20I{_-fe_`!|^`{vCmt38S>WF zhb}O-dP3(I-*`gmjPss2G$XqL^tW^54Dv9q^yr`rkPFfA#A3VpLN!k4Yi#p;G?h>I3&aSPChKsnOW> z^nU}U{qvFh-=5X~gZN$e;tL30*n=Y%;ZX)3Cq@w!=dCR@6Jysa@#heg;v^e`{xJeJ z)_@;d(NBsb9Gw_P>p6;JguMme|M=g%(RRUhr~ z?RDd=KFAm|x45_uEbh5Bwcrfz6RJM2t7;oe1mNyH6^9S@@5-n_iub-@)4O#sX_~&9 zN4T1{@8MCf#G8h2T8dS(=q6@3bu-RO>)@zR*2(TSZ0rutp-qHm{ry1bl{?)uqxijX zy>`cZTP3+b$mAib*Llq9b6Iu%>H{t>)TDb3Z1YvI^JV3r{P!AN#_2s|l}<-h3=S?M z(%AVX>*A)%TnX;DMd=i8vwPbDMF{mjd1n3+H$7pBK^Xj`(BUrKa|85ex%N!Crzx1# zD-*Rj8f}vjfRQ7QjA&}*zanT>3B@lgE(UAmf0KwU7mD}v^{qN;JpWNEpZfXpl_;AF z1Wlq){P5zUgjW7AUxZ+^O@XO-VxXq?UF*}I!>2!e{_G)EZ=?54Xi7}1>solzY|OM+ z{Y5b`exi_KokVP*P`saSnVqJ0t3<53W|W0_RqWB&XDW)tcjL735Aj9Z)yjYI`Sayy zn+VnIzNg}JC%_>3Z|K&;-S^j$!N*6HI!9XscyqdmYj7OngqF|n3>A;mTc`5;a2 z#*q>ED4R-a{X!wd+rAsXvbS3bLjkTTaJY{^--zHY+`I*M0(q(3>bBb_Tl_ad7*BbjPLnk4O0mky7Si;KT8%7x>7 zrr-HjY+m;n8E6_=TASg+t%STD>Yb#XtfghYjP6pmI25Sf-!`z1JE^T?`b=>@6{Kw? zPda9g8J6frU7FvFmNkl=Gd4N2icovmw4r#!!d&4E+~+Oo?9<0ttl}SZJ4mvBHDz2RFBkJNqRHD&H@TYr`MUm|$wae-Je9H4*hZ-oc9)UPPjrF<&{_feJ zmlgRd`KC>yU{ty&CQn{YUsjIv)z&ZEMbE2Meu57(A{ejMi@X+ktO|$+r zCm*8!`uf}~wWeWlbYk;`#fkeQ1?t>Ma!32op+mU z{$k2o-JH=n@7zY(RixKjnT&!~7z>~Zm0bbtIwh6M{tt-orcqZ(RC}4^#artxR_B7K@jAe2NG|_jPluw zX8791`K0?EYpS(Bf?2v9C){=>c64G2;jKHd_HqLq$3{4Exy@8_X-JA+bLq=IkQ}(K zf}2o|rNNi&X-TENtO|P6Ai3)g{X^{J@^2t`q=%wZzEQ4wlkSjx`a*hzY!3=F zdU)G*yOuC&8=RQk$Jj5yWXTlppC0n~rX_}2_VUzIFx34wuf1 zmqP1*(lB%l)$}GMf1C1^X0n_l*z_p!?U&dwJ>%8=4N8Ovx}!(;uLy2tyPbJyMc5eX z>haCl#XUNun-96$s{Hh7SLQpK&22IvRfcRQ3iF<{UfR(@<8QqlMK>L8D!Tm_Jw(v0 zJ-Qkvc61WKP5iB2+?%DaM%HZ!xXiyoP~V2e%?H~ffSdX|Vd>w{^7zG zQLJpibz|-ZVt3F%Y~Y(kIby&>XQ!BG5G@q8o5#-cpFq^KWmwGky*2><;{exyy51%w zj`7bDHyO4#%3zYLxb*w3UG+CkoJ?c!P}a!*Czy z#%orhg7@yQsr|~9`aMou6!d&dR2&U(Kn=k5Y(IChIcG0WV^TVWWiPW{9`f}~^1@76 zI;j(iT)K5#btUllUaG}G^t(B^d_%tKSM(nyA#=X%>a81II5b~%COv);L4GAZ@tP#8*vF_?%Lyp&_0Sz zV`4X?yK08Z%NG)%2j=Z?zS1OltbVP!@NB%Ifv<~0o|^AD5DL3Cl`rHN79>d z#$DvxU5=8PjBU-dRZUGqhWP!aa&DzySdTg_XP~0U3areE7dMc#%5&K^*BhPtp zssSrIFx;==#r0*a(p+jZ`tf1qNWs4%>^d=jE1BNfIUqyfK>Coy$C>@#q~Z?LdBw;1 z(p&pEi01!`3K#e#^kl7)U24$%_&eoDdN0o1lPG3j^9~~AlI&j!U_U@gOTXf>LQ9WY zU6(4imc}_R5PA+4S}A?cn>vE%`p#D{a<10@LpkF6!MT{3E$#t9R!Vd}RMkP@W2~a{ z?tUrA^tA9c$#4x<;_h|=af@V=sd&>>q^}ZeTkHTaP35id0o#@KtB;C?18T4RxzI-h zrbjP1&k?|4@)6MTv^8aRLd5{g4KiZWIapnEo1`5!Gdb~z?!#;H7bz@*Qy-%+)1b*a zYZTsdud})pO}3k zl3~wBL1Ru*+Uk?9sDA!>3ibgX*k6|Sb`J&HNE0**!bE@sF7?w@wZ%c|w&mW4AA-h$ zq_j6-A~0)TgNEVSfj70h^}7{N=Ynl|PFhwdhA;3HuY@(0&6+{Jkl>N}{JA%;2i!jl zs@Tu%V;p$PliZZVDf4Y$rjKZSu-be6;R>D53|WF{@FHIzvPdqkm|bA@wuxK6Z&8Bc zO~jVC7_JJ+paL-5${c##lD$IDOkwR?Se@0GjbYexv9LlX1xP;EoPCZ#+^iJZ-JJazSRud*NUvsmuYSX8&>PZw3vXNOECgP!eUnRWA z(P>1`xPqXW8rE2`ItK=En~mY&_U|Du*{m@d_(0J3`nFy-oL2bjdv9q2%{9xm2jcrI zfLgb^2mZ~``g!$yow=Bhr4junDyL>@1ix;|n6q9C3A)xxRnIwG{cJHQYISSh)b2f6 zxm2GY6FMCtXak3lm+4CKa>kHJ)@FO#&pPS{t0VK1q7DXvkKnt01?cBmhX_uLWuv{# zobR~^T0e_wZ-4fs;AR;eW;|el3r*HI$S7r{2ncU>h(MU`hHgKCF78C;>D$4UH*Q9=R{xf#m+ogla<*gBeR*W1)1 z;1E~9BQUL&E=A#uA8VwO@;}hjO7d7I2kVp45{8ZE_L(%v{~Wn@H&)WYDpb=i@MdVY zOY@yej<>*%pdhcPkky%&>PiS6EvE3KRM*$u`pkEe?U zC=u*#?>$YjIsd@I@a|^8-6S>xk!Jc%#bS>$eUEy@Y{^(bcmu8IuiZ9P5UI}lrMZO> zc!euO{vs^GfJnHuwSn$f&9>tSB0EV=-f_UCm|u{5QR|9msv@qz8`XOL+PjzaBLL$> z3**n^>CKy;)xF%XGUdY$d!bX6k8h6qzADOOtc#?KG@ASC&s~ucyyjBo?eUcskUWt6 zeG03R75HJ_%>G*m&y8`-z?k{X(PTw}T`zs{5U?dCB3{jTi>k!LUgUGe@yOds-zsmR zDgP^5>>e7lhcnGKfj-lpjVGfenOX+`x$nT2-V*D>Ym>gD)+POe$?Zm83gZjDXhhMz z7OJ->UNp(_ae5}{zwcXX-V6INU-eDFSf-1Wegj{YH~p2v%S7Kj&S(eIyoQed?dH&>WR3o76-_w7ClnFR{|u6`4e!~?fN_?g+A*x z-;TJjoR3By*Jh1mLd*|XbmuuO8hy+*ztH~vNQBje*DO0-2<$V10ulM-xV^yU+3fEX z%Hb>*)ql7?U|Y0I_=cLl_G6yE!vta|0STf6*82y}hP(+WLcFEK=*W7-jmS z#?2S{9LGg*XK<&E8p?;WbV&@Ua9jo7qZs15%Px)k`>K6@LYp2@!=)WY{L)O%+$907 zk6G>7Q@&4tR}E?(Bwm}PQkq22-Qs8iZCs`mWTFlAiQ`V9aE{ypqA2t?ahw?_OQ7}?>^6$Q>1BMhx28XwhJ;@`}+f6YxKh@fjxjfuspw=Oo({Yo-70oInx zl0H$Ur^}0UpU8Tr9J=vEQ?eC-3MUXwWmdT4F5S!d&{?W&$S!ek*jj{Ey!&n-YRpx1 zo9LvTBQYIQ+v?nx{7Kiuf9s4ilir-!Rd7x2C76)OR-7`vy}b!ZSB&TZ>86VF%#g;- z!_D{vC1+=Up#ref=X`wmx|Xlrv=+sD_lOjFW}h32ZB6U`)8U;S+WBE3%>I600j^OK zH0E9b+Nq!JdV@UIH=~{dA1T=;Ja9T(zROOCX*kgw^2L>Y(KtvkH;2F(hDXLKW_f!c zfZWtuKM5L}x=4jzIIAm$JA-bu6MCY(1J&A+E_Jtq@2Axb^~2tApP;b{$);7~V9!o> z_c7dMM@*q2LE~?3sNnCvwnfs%+yF%1X zI|4Ltp}>B2zw#Zl2xh30>S3?VeNzORsv&JV=WP{m&QjsVUE3-Q@`TvP%#JWT1=HS& z^BVatY8BRtn+tWqSeV%M@MV%IE&hujEswZ)7Tmpj&_;8>r7atYox&c7B9>2nJb36Veb0m$_MoYRPtMW|i4DEa8N%KJa`W8c@yEcf)7JMEVe&CTV`3)XB ziGAy^k}dZ7brU?fXt6b;m2GaeQb071qH=ALv5i!C@!Lb#KaR*FaICU$(pHSR*lCe- zttP>cc723sO1pkA*@cr(0p~SGHb2t@Q5q~n z4R#m^9b#OAX{HnKrFsh%%Zn-|O+RVf&GU8dY%w=|x0P;G0NGz)34$MATC-3zQi#UV zw9K#33X;awEXo%q>jKrHSMvrIdzTaPsQs?qE|kp~Vq(>Z5$s1%v_` zKWY(XiRU?tOy^HLNhM}==}MywzJI}Pv^~kDIrL;_N5PAoN5yi3jx*^g=3vUzJd%E?h5l4dQ+^W)a6oP9T5^f(G#8!Il#wNK!< z>~rLBaty?R)ww!nKjnA(fJ}9srQjBy(=SaEU5B0SIltlaPU~l@XoHVU-&cAgOHI^g zd8*tPKX2G;;wH|<9>uz}uupF9Ty=#TGojYsjZUZLxEzb^pvd#POKu3!nxm!6MuR6z z?@k4(<7xtv`bq}r*V&aD=nUfKFPE|;y&rR9tip*6iGI7AH|V&IdVTFSM6U6fS2}}N zpXW}jzI;0>>6c8Y@Ngc6R4e9SzAy%jcsIHd;ToYB=F65RwC_%I{$jD3yxi9Y%Us89 zB+=ZR{5ET*gim1SDyZ*174#8ri?5ZW%fn7sX|#E%FBV?HHqtR?WvXP+0?$913pi=+ z=k<}ewQ;=)D{8iEx$LUNwy8uT@~y99FO?ZXtd^aK=i4y_aHhZi$7gzQ*l*4H!vYBYefhU> zl3x+3c`koY{!N_ZF9DQdWbLi899H&BdTuYA9-n|d*BC68tiq#X2t_~{H$nCMC~npo5A-E>4l zm;Dwut!SO!u~y0Lag2Mk%%qJ?D2`}cX2+H2l62zkU2n1k*+KZZbKf`V2gz{0-zaerm4%p?CtW2jCxNB0ziX5=jvJPiv ztB576!&|c0rvp{q*DLFWafva)Bt0YE>(htyLca-~g(={2kP*KT zd|2RE^zS5Rfo=OYcwT{He+hu0w;Cn~%dBgfO1sGdHxSb}$Z7%c4F_FXKzv6aoE8vU zI4E}=s>ea@Lx@C0R3ucj4!y@gnhOYR82U)teHHJ@PF=6aJD>GJ%-ZgH;S)LpTfi~M~$X2j9S)D`xIie0n(QNUpJ%)5Z zn&KwQS)U-{GlqwXtnA*xxH&m=M*^ftLO2=Gd9EJ*Wp)c}n(Usu0<$_$yE{ZqfUHP} z@Y!7t6-z0$N{3Ow743L;5{BF7B{J}jU*mwX zY8bWLZ?8&r=cLR^rOcHg1AXFJeZJwV6fxt9E~?}Nl(xbjzG~7GF)^Os@R~kx;uEFF z2%t-wJ0}Y0k~WOeE^o(ck^nv(i75kBQy;o*(r*b6pRCpQ3eIHE222_+LuMcu>*#;C!nPGWK$+|IO7QMtv~{Xgs}ti z?(#(}GcaY$YR$I4wC)d-#~c*MrZ;zpY^({B}{fR~WT5+)bbZq58*PSV8#69J;&{>wmKy>)(#}YgR(VrO+i1 zT{qo{uJ7mCj;^a!RYM>&e{m%wd9 zv{~s}ass-T{oVK~b57JhqEJAM<2zZ6?%5|GxZpMKKO+JX7*1;!+9M&W2isYU3bD9c z@SQR1y+qb}TS%E9Eftw9rx^Aq>={&G9-L$l=L6G|nZ1jN-J8AIhr1d8UhD!B;%I#_ zoU;m=G0z=C-$5KaIfv-j?A!}ZtX7bPHq;LA+PF8Da07XUpXkoB(C_-V>kSKi%yqib z`-qzBbRSsg&wt!C&qCjQolb~_zEqWN7jQR@Wgzc@4fgq5z>2x;p?4~`t2mRY1Pc6m>M5MR*qMc2Bsy%Z_m#l4dBvC{s;l{CXET-> zaRyC0{ONrtyk>2oY!N3uNZGtQwtaR_qpXYNQf}B!v>3?-Z8;M+D&Sv;RHM7OcdE!} z)}Sqm5VL@)<)mk;Yf7^(;Br4c1N!x|p(;N%;0M^1-ew|P5%<&SSG3)?=7-_zCp0MOaCq4EpNa0{r={VoNm5mpBZ2 zudaV zZ*@%svUR!sBne)cQ-RCdur1r9?_1smo%U|rXnS@)32OnFP~4w{5(C@8TuiJA7poyV z@hs0>p<|}psS7311_LJ4sYl@PWK!O4w{$}y4O`^V<*1m1Wt@iR(h<9<8;x4+35!6v8LhM9@Y-`>!C`z4s?8>R&mE?f}b+ zBS_>Uw7WnKpAK)v#O?#K2G<}mxIa(?Y1$%8S6*J1IT5hoc~GF4{(Kyy%w{Q#2tR|1 z*p(@VGR9ry6!=7c8v{i!YDgh!s-WT=B>M=04gYL@k?h3b?8!+0p&L#5~(>(5A!S-XS?tkxyl< zI2^W{!7%x9q6`6D&3Ul$4&WbwMHY}?ICq2gI21D_t~F8`!+qWt0!=b($@VqXBM2!B z_lKPZs8_yzMs=^4;zT(T;x0C%y zG9Ija(jdw{)lvnksjEv@Dw~uIKfHs;H{vgHBT>kqy`k5r3f~Ex(AjKKL6N%CdSF2_ z$Z2L6>)evUe%QwsRC3Y7kUrD4Mo$SuPGZFkTJR~99`{V}pZBaJKJ`iP_jgcD1J!?d zNuqEP^#|o2Zkqf<_5aev6Fu;U(IV^XorFhM6j6r%T>0KXcyw72r33!wSY-X@E1Fzn z@v8P;qb+tFm_I0gyG^3{f71VdMW_GIc;14);&c);bG)^$7l8~gU&Pg!m=6vlAwxyf ze@m3f<@unVILPY#=8XdQ3b-=y@P0#2%~-cX$60w0r>sm>n5O1SzqGXETn!>RVQ{5` z)chXrdJ7e{L9LIo(XZv{>RZ)|vNvxqGP;fYvi5m(6NK$sM;CZUgL=k%crUS7g8pvO zGZ$GDkM5U>LHZY4tok-P;4(o!((2V9&8!$g2_QPbAg}sOPhP9PU(Cc~*LOc9as@Tt zxVi0#;38Btld6y$9rw-s1G`0NC#3nARM5m^>4yC}0IvMmk<#7^vMPzO9Fk^Yf3|zg zwjZR2vq!Fdj%5%(#n%ZwXNS4&$+c17myX61DqvX;6b-F4mHRZ(opTDjQ>P~S7|`!Z*y%MzEx(*l@0R4Vtn{57jHK0Ai{gF%5gWt{ zkUSpH_!@5AmU=gW!ENU%>)UaC%k+_I;l%c7kRL z$OCsv9QPAiI-3TF4ihwW)lQQew=a<{(erY?*BDjw)Gn6|FPe;5b`o+NkJ&FBuH>VA zUeH*WplPI@AH^GCPOw?wi=b1_&$9k`o$!q-d8v!@7FH$4hYeg)sx8Bw-htMK81~%f z6F6iJNq&QB-L%2wL@UNU_qhakydlYVkasttW{AP9mX)1F41H!A^1h7-F!?Aq4+83y zK{upBIizkgR*9fZ(YW1n)zJ*|7k2H5+okb%k0lS|c$u|6l3fPLzJH&914s4#eboRy zxUX6o$$ns;fD1=8fMt<-=Q$}TaKG>iUNZvqzu3DHc&PLDZ`_lRRLZ^4v~6;fD}+vD zq`Oo?Y~@zD&k3ciu&7KFDn~^~h03k6N|7reMv5qm+{c*z^Bt69sQvB!e!KtOZ?EZV z=6ip>$LI4r@8|uzpR=bA=R|7pZdeZxQY6-rM~5hDx@PY>UYrg99JDp=PP2$b_R10k z(={K@uouC`Th|eAI@fEA--asUn%Mus8t?c;UZHP}o5SuJd`QbgL8Z$Yw#qTf?6Zu(cK}hNDHoVmMkc z>=lk?3VUsYD+YV*z~Ms_uC#%n3MUB%rET`c`}zJ~O5gS9znNJCvyK)r$y(=#e{do+ z=n#ptHfmLLFu+lNaTbn%O_LijA%M{h|2RThI}$DeY()EGxn>)j9&5A{0Zw6-JaS`5rI63ljrp`31S0u7F+$CgfSvb@OC*BZf87$I)&0*VU z*d<(-5?B{jgeI+q>jH;_9Ug|W`MTN3;jQ6M5p12?pmP|uHiEsvbxDN1!bw%YUg6Ld zu-8W9NeH~&Llg(D&@aFhy87KecPLz;`>B-Cy7T~hd5Vwm7-}k=SWIU#Zko?mYAPMh z>fuo7pMsw?(>F4wB2a#{D}5her}+%WrDoD4q5gjjSb-(5Ss8b{QU+Vf}%mGqD5rDY85 zMVNvJhU8exU=b!~HD=!~Ch@gwr&w7|JzNmc7t5Q(V85B6wu~XW2vZ%wptXyMc!lW^ z6O*kj+E49egS$ID+4d4L5Yev2}011Q^oMGtoS z-|L4KFu%dZpM6s(2%HXYc(k`Ge#QjiFrGDRr}$Swm^S(p!`odi*THkK z6*LUMWrE%?(jLDM9+?}UYtTac&lfe{Gfq03bIM@I=c5#*{S*(iovo>bZ!qn1Lf-(S zneE{uX^ZXYr_J?uJ2REGaF;&U!;-6I zOgmR5JK9vfc+Dp||EEJAV~np;EpOD^1~4P*t{diVNRXPxl~M@^{zhZ(s~Qvt-@a9U zQDYh9K^ZVsujgKSu;`(x&yn}rnJTsdnsAN7PFeQyU)tJM8r9JD? zpc~P^KkCALe@^JH~$%v!?eO<~-KL~Nw2gx0h0CZ z?=ySv==Xulc->v749V;`{Qi2fbB)GbyITh_1%w0yO>F@*)jR5FXx<9w!WMKn5e|Q- zN3fjMLO$0UNR|gMf0@cOEc((?7T9Ua5tdrE3|QORZZCmU0XSP*=C|E`hhnzG8fY*9 zutZ+_-lB;|01^f4^C0A#*%+$!_7qnpi%Oy1+qUxz_l=8RY!NEb)`8CW#KN16#F;Wa zcLgC=fGY#?fM%}pk#&4@r7;RYdvmtFcb%!Y->yyZ>a7a`zuwud2Eeje`}K0FJM^qO zGZg@HNG+5xwHp%r$t<@m)DDhrV<|KO_DJV#XL|I!wW2((q|NUZG_e;a_Ifm503emq z#>)#Dh1M2ocZEA_XOwloE4kNfkuuYH0l3zkMxkY^6wdlCkPWnJbJFl?)9CB)16;cF zcUwgG$?vF($W^ak-9<4$@M#8X>!E7%kdi;2po+m4h>teJ}ORkyl8X zSgcgeI#(WhNDNS}1#;r*SO;buYL%*%|mI1n#vKc3OUSL2%h15sp`it=LW*~?*7cy`POOm zvAjq^BMY48TaD$1Ae9WzBkK)GI6o~JF+GpGb TK(sv~ZvIlH>5XRjZ!$|P#GeWA zmT8tBH!gk?5C;i%F=BdfIetUZlf1<)g0ZmF4=ciw+?#@jTsAk%X^&^68wsb416kN* zF`g=fm(0>mgw4F71xR#pi*S}1V}%N#gxLmUM^<}1-=z)@j_IWhx>#PK#jyzVpZEJWvEj%kSW zZ=uq~vPQ6%NC?td=V;Obd;zoopBOE`H+F-=a!APTF2wL?q0kUoC{%|Q3dJEo&^18{ zSIT6sZX`tTXK}WliV{xa zMo4^1!;NG8D-OKCU|2Jq@0{xSVZkiXC3d#ktDHmvLxPQZxQj?yeH}eNEa@V-X|gi2 zZ;TR7ry7~RQh%@IWL-l|#2p$M@?m29EhJG2;kWL<;N?bhXVn*S%^J@-Ov25K1p+L@ zyo&UA?bkoc2b#-5-0B{HXLU83+hpqeXKeO|7sJad$a|F<)H_B$1PAQ*lR1ODwC6@21slwaHQ(vlM0&4WzU@W9(&hm|0od_d0upEbN|Z zQ&G-~LL=cz8-a?&%i3k{Zr@`Ixg$U96S#xCq7j>nm@F@f?DtnT!N^!M0KiNhjDY7~d zD{weVUpPN7$$WkmAFO59P28@L)GbHav>{2{aq47E9y4)mMk(R^Fohu!h%j(EI*MHcuLW8V?`INShp*6O zYJIP{ljFZ*21A~kh!1SP*rX-#zz(~GqpEV>gAS0|u` zpxGYvK5sczj&!=q(%aWcX(aW-j!2q2-Rjw(nUe~_O2!-8U&rgm1J z$a0w__Fiib+=5AQxqw=yA^aVL{bs@{olm$=Sleh?Tzy0O_3qFUZmCWaSo#jG#B0*8 zogVdUk6{NZ@*-WH*$2%N3JP0Rr2`JvdP^J4qj~G(0=pdWL)ZIzc%oKhE?XsVpK4%5 zEqh{_pi|qG?M2C?3{cxnQTVzq&9CYTh%1+=6A3-8wz{=RbLU6moQ{tnD(Vf8E}Z>~ zY2Pg`AGN?TCGg~q7>ViK0&JbcA&_q-6y&Lqq9;<0vsS$pPeNZJFA zbQWYtKJH30E*XMlDuIvXG6Sv)9>3m*yh%4)W>}2wX$GW=ZEj>+9Q{0nOBnJ^Kg!o_ zwic>>jGWR9W=&WO2a;9aE@>g&VNV8s$TQ3Ul8zoguIDW;$S^A3$o<;^!fRL z4^|k}KAbaGI&e`u1Z;+2Lr7{pk`5Xl4ojNLH7}I9+3#!9KkY0F@5!N@r+eApjd^|4 zBbMB!S>6CmR1M^T+#0T~@@##9eD=)_xHzYer2W1o%-5B9D48;W?O_Ar+#>cDPMu4P71(LPMV-xjO_%^c1*N72K zLnZ4RlDUWki=y~$(iBkq)u9CJXNCa8`q=%PB`bXUsD8>Z$nn0|PJ5@cw2op1A4sXU zShJr|Mif|(WqOC#@u|BruW5&@LY3?z_L$B{3+;@42%doY`@a} zPRL|{jy%F@O{~?LnCU)r)EdS?;^_~t(|jhXHH?GA-{L+Q9@~*JrFO#v9f7Yf`0xVR zW>8_6@-rn2KC*zW76asMfT#NZ30eM&f!x0YJ^s@V`aefm|0ZDXfA2K@5F+`fL!95< zVTclr1}LQTEw-$N!C)T9X8Ehrw6rKk=4H`-5{5hEuVPqNIY=zwEFcWfN#YT>d<_!@5|!>|j3=N(WB0|UWg8@RS0>|~2SEs}3B>#)Z1sw7}_1Qf{I<4-Thw}7KA z4}9$>7zA1o$#Qv-4sa|O_2dIztLCkf54_igFfYurWtv(vcNWSlu@#S1#61m&)5(h* z5PJ>zNMK6y=S6-9C{S$IEcXNKARz9B@zzJ0@M_*rAw(R)1eP-r z_p=<%28)^91<<`^>ZQP7yvLAy2$NIJm>R}%_!h7|2UjIhoYRB0TC^rOkEV#^qgBWCLS zVKWt>NvSb{;d(tj#O_}0&!@pMWr(@YPz+Y^qZ789-oRhS=PX4%VcV^KHQxH6Nt5Z( z`=eT5x?()nC^f$Fy{RY}O6#n=D+45~8i<|!mzyDfHw&Kih&xUXmjW_(1mL?Kc1Of_ z)C6nSfUpVryY8`SsH}E~Zx|^U~MEQW^I~I3Mb)AfE3L{AoB)jd5|;Hd1Mf=+1CPsj$;k zpnKwV!;u{nL$=-KKrT+!shGF&Apb!jnP5sYmF=5rg&ul(IPBwBnXBB@=6AW@A#W4q zVSOs8Rw$TN_wjA`{jLA|DG!Yk`rq9C>N6$ZT~T{AOAY(>ZOxjM9He3fBPxT|ZOmKH5tLFtVtwTjD5o)0si|+aAc%94 z`axQlRr9GSA=QJBSi!KGKdXm()?9(CNKD`m#^N^U=e=+}vu0k_Tme!3UR}@B)RfIH z7%M^;9?RD;1j%r0w&3MLm~8mlI*Nw=TJs_*aDpVF31)`n8a3p%sD!*app`$#6K-G) z&v4EvXSR9FdfjvlFP{KJ6n&-S)pdA7A}bgGd%JLkb90He5u6r4h-WNwV0rF=Q0eQY zIf)gxnP#A;myGqAcG20>w5_fUZ*{!_CkwlsTjd=cjfHtdrLsobEqFr!>^lvNDggCv z>l}vN{*;W>(c#_jSpLclV8GE*k{38 z32i!Kx#lFff0x@*DGhuxMKLD*)m)d8>#BeI5WDrj%|Zh|;@!UbOG?R? z53xJ6)ZDJ+Ii;LhpT#hXe%UgLlcE74{5>~*?4`eW%X9uzT7D_Iv zZ*Qsv7%0X?zNJ^BB$JxaWx$ZRkPUg#Nl#&YN#u7CUko>Kpv)ea=!IZa~ zT5BK@r}9F|z}c?r3}h_TtMLzt4Ek&&B;IY>^>F{vvLP2SR?0`}#|quT4(^^>O5vTn zOqXaW^}RZZ1x%`k%{R^iBA>mo`aXdOuZuSGF|L+`odeU7+LB;@2&kUY_b^K4Ih&Zg?_tailO%&0w_O|ZR z2^2J-o{h*WLh|qXBqj9-F2hpH8aslfP}WT*MLWW7p{!V@>2?0VKVNfgy3bchL|~)Q z@z2FZB@yXsCElf%24GQlKAUO(?U2)o$&X{UtxYeL;~-T%gaDXrHwfQ~w;;%lefKG* zYX>ovbJFio ztFQ+rg8cbcprcfy*`UKFrq^wI6V&aj_d4k?J))hWW0`b>JYUgSh!CnWlcta!;DzUT=mTw4h5c=&F6_&zQG?X7}~w2vV(xsu1Eas3_j> z(u+TWpP>S11IkcyFqIZo8c5~5WKLCBW8dRQ6*+OMe~TxARf`e2XLY9g4NLg zgX5RTAV7#nY;cf$*nx@yFz}M?2#zO4g1{R1C!LxHpVG76@#5f2!T2l}C})o!*7XGu zGEiv@71J~K)8f`ZbbF78>D8XOY9gjL;HMP~qQZhfHGU>WG)O!dXy0oP+eK^^a)tyx z1j+-SVExZD?0k4YM^3#WG~km%rZst&eO~7^z~pN)_x8Kx4p3Ad*0;IWb{iCm94RsY zwaVJ?^UedOyoPQ<&f)>#+jVlYmhwk810npQ{wOWB;5e!Td@forA9Odz!|v)0XWo#V(dr+B1VV#Xh>JP4GU)&{J#u zlkmUOJ;YS?v^?+sPh{A?6*~A=GC2P%46alOR1`$bKG)~AN^V~cf}it5KRgyIU&R>R zEy8zybN+V0AdEwSpL)d^4ME2|cte82jXNOq#RA+31;SD?s9~PEr!1+X3`Vy7zL01? zEsV0{b9K-H6$-|2-8IkC0bIks+%FV>4H@_a7((<4gTZY;_9a~F({pli-TmeUYzeGO-2 z3z9EdIt;ZCJDq8!`ewEDwz6CUG05<#WRT0;V9|A6^72`e^9HjUVH^Y-Id|6sTO;um z_dp_Idu|8(@DN`SEw8@)SYF})46lad80B8S^J25?kl}b8P)a_r!&Ven@zBUtYhjmg zozKQ{lGtjF92#hgu6%;vI{B@;w{fn`ERF&l@ZOMsf>$%O?DjVm-wP-x1x<+?Cp*te zf;fCm<;BE)`5j1JP>s$#h#>(EqY&5^f)jlEX1kAE;E8)WAIwiiX2V{e2S~(S#=7aq zr2-&i)Pg5w=#+t%=i0Fth{lGdhc-h1I58RP_WV~_(wprMRbVHG2$djm zFzFi$^7ptYltB;}$T2qJ+GZh%bq{aY8;PGW3zfcc<76Woi4q`j^D>q#O=pL_MhOLR zg7?hN-{Y)M#-*`5%!wO<$|1Nos@2Um3-Yg4;_zlSuk>#W4m)eIr3K$RXn7N~c4hU{|g2LZ~JHzBq(ZA48b|z!E5%X-dskPML{+ z1aH(qBWb7mkHsH&`nY8cm`f1(W}k)2-2F@mhN)n6tqpkLXLh+(`kq?gy@z@4P8`oP zGIQerVMUAIR^fD1^JIZX7Z3+pp&vEmI1PQ20~aCi>^+cj6pPjn72Z$^QCs0Bd`Cf1 z`Iz{br@6grHZh5VI`q|~1?87`?`C2Tk~HU?uj*YREO_s8{hNpmb2*Np*L(wND4Cwx%10t{h+#w-hS;RH%G9WPkW;p@E}2 z8Seris#YvH;7)0d^`>jtF8G@U8o5KPWgB0BqCZVWSGAz_(uRx1#g*+AYR!cGHwQvX zuAf+iE6fzG%GWb%QHa^nOazMDCjaK+%|d&hUu3aaruO#2R-HnJIgo1H0UP`F5YK+F zDY{OjTz?4%IY~RnT2)}fu{~Mlv77VPbFAHmLN&VjUb!HwKU38_vuZUi-cZ*1w(kg=E9i$|hZYZchu-0splL=p-`c!alv$~qn8pT4B} zwG@~DoLJs2_)|0RjKDzmOSUH zM=u1ZtqAJ|Bn7wZ1+`cb-#VykJcleP_!1fa9O_P8-8}H*@RnyT;Ju%-AdaWUA+HRH zRwHCsIu}@_=M84v%gl>>=)Sr%2lQ?GS+=CF!mWotk7al=WcHLc2nV$SO4cIN)Tg<# zqm^sDzezbLKPN-cj057w02vzw+Bc=WTypfv7s0&Y9Qj8Am#?PMbmzc{q+15yLz&Jr zpAohHaq;6XWdL{-t}GkRRT@*8fSx4%xKJ{`9q%+B`mX9)ngk%?!_v)I|DyL#Kl|)U zO4>GsEARoue>XoZyI=MWBqWPOlMfFl;|9Iv?YFGeuY1|;fDl52kth<0l|}mT4FBY} zL=}(vVtE5gHP?~lS3MCwN8F3C0={H0P!jq#5HtsV#OQvue_$zcV6N_9NG{@tkWwF1AuOGnnf;`q5emNeX*tMO%pXX@`W(eS@USUn zfaJt-McjIeL>c&od%v|#z=HKGVr=PG85%==C73^*W?sJeVFAEyU4uy4L$2U2;?^A9 zpz^Vob|lClG)8dVi!Q3XxFc5($hlFcBEWe0rvM3e$im$Efo1b5#v_pBxo_uLliP6q zo6njwttbXI2R@rmeV(YZJ2P$9Vus;tr{;+1eF|esJRKnMLSy+Z$-&EemSP4w#5VUT z-`mM34z%l`O}9)L4BUl9U;3HELqG`pX%9$%tZOR;MLbpR;T2V^bJROF-GXyHg|l0_SsF^AJZdX# zKHGbF&t;y>ZLurfI+)pQ7~FL0#i`mGWeeJs?3z<13Mo5eyKT_25*Oe_KG7Rd+E zKWEjzGPp(lCR-$g^DB?wok5h}4u5(+Qo6u1pnw(p>fv%ga2FyFlj|ysrEq&!2Hpc# z-DVu21)%t6K$QcgtHxMDi)AYkzKs6M15dv|YQq7@|JV)rA4odLKl-PAGm?<>*Jv-K zLGf$IOpF!cO|ZxRbABk;;LYSS?><-?eHmQWxBFENEDfh}fd1HsGaI-`_;%@esH%nd zn+PnlnIy*bU~>|Jf7t+8-ROwFVlU{o(Ck_Zgh8gnO_JqP@8?cp04apu{D%Ri$kNLi z68AjvHZmAUnj*WQQ3;~?{3|NAfEI#&zo0f!ucN3KUlCdoz>)}WD2+TY92)a9E(Xu2 zb%|zWrg0uZwTDr^HMJAocm3KUP0%qrr`uP*QpB+&2=!=1>MOTa!8?E3zFNT!KV!FY zE1MGTF)aDzRlHEoG664#Fx(%l0wwG?|5QwZ!Q+p8cx zH!SOha2>J?C!9WZW*{4Ae-CgMZoClmBOLG#z}yD@1{5HJV**Yl$P4&cv~e*dz~aZe zNEn)EAmKcI@C=|YC8@B!zf#kD=e1^esf=lh3dA;`%z!OkNy(pCQ%NL$%`W~{rFIt2s9#JzM z8gYL*;g%>;Lr|z&rpUk+i|KE5WcJHZ28+(Co-BNc5M(oBQ|?9RlFw^5_VqIE^VYUm z5I?5C-A=h?AJ@Sx_RK#SYz)t_?47CVqfBOT#X8jO&n#>Y9h_t0mp@j&y1dQytl*ZP zd_7-Evp(_rjWN<-BX8;|9$dBwk#SqB!zI;)%F54#&$;jIuifxTidv%P*TV5@SjH#& zOsdYa*4{I0$2|6ibQE^?Q0fuQ>ixB9y^U1B3c3<5@kH$krdmAKPKfUfRx=nZzPT${ z(pkDcCCbMQs@lyu=pX*(NSC$;<;)JMUX$8_bBBuRrTQLgYr7}+mqmdRT;Ic2vHpr} z)*i_R$e9XP-@$=;3cINX^31DZK#*-z$vVBQ9U`oA-iK5B*{E+*qQXD-#e=?G>>2H) z`zi0WK~pdGXD<Q% zi?!Yy=mH(@W>>c~eYZ!soS7ha9pC*?j4HUdxBoh`W^Ho^#hoqYGfm^#7g;J>m)eVjR?Cz!_lE2U(BwHunSbDl#+PZDCa@#`PZuieDN+VH%RAUc z6`H51Up3+^YB71`Mg(B|XQkBXWamscy4sPOi0? z@0T@Y*-akF?4MH^ch&Ll^AWJ=@F{~ggspmLma3Cv(0uhnHveE zF@7{+vhdbPT#%f#vx2qCi0zZNep4g+moAqWm|?Jbo~UJxSGBtUSGJKNqeh*OBcEUP z5l8M#Sz3xLF?BO9oj7JAESa@Fl{vU>#=aBgHbPceI*uM2 zn@HDX#$EimdB#S9XMM2JBmJ%sKi+47H1EM|3nJ4oBjIz|>dag!1wJf^v*u+V*!}t1 zApfZn{4*n#IPV>a{N-7DcC#g9Iae?U81Y)1T$VUDIU7`el#O=v_hnmEFu53seDYBs zJr{{;+XemFp`4WaZtPsS-&GaChRG^tz~-Yy8AE}$$dLi#US`!3RpbS(Tn;3T-r21= zt9oZ-oU|sX4p&VT7S0iKoT*D%qN45JxBZ3MS??{|Vq>y`wT=56zrT5m+A-bwOsbm4 zZV}EW&e)mDznkMDY3^=r0YfpU`jE(yoFiYsrEIwP(?&Ig3QiY8sZU-MGHV*vT1Tfg z0ZLrH@b*b{`|hWcx5j5LIlI)1;b4yceG1lfMoQg^UcuHZKJudK6E5VruAH@5Zj*{W z2)C)#4d!ajXSqg=Y@4$#>WHg~FfMBykvYg{2L6Ppw9vsUJdx#&5m&(pr_HQPJc?v# zk$G8bb~B$b!b+U@*(1<^N&b2Ax1LRwFV7~;93U@l8YnZt6kjAur}%&8%SUS-#YbP2)mHa|if0~~IV|T+=WgBl;pQ0?+|AfV(ys4wWW;-GNiT@b z+D>FGF=8$7){_=!$l6C_`&oL{aVG8#BaDRi`o#GcvbOGKmC}8da9sR(%JesH@wl6p zjZsuw=9YOC$x1xxUG=@Mo`_X88~F!U*<1XCi{x!qwI?T*^kwR)!?<$PX7`Y131=HS zGV0dpIbWil>);kMnsfG~jC8kdY>cQ(6xLPcyWTxUZKDNgGoMlKw()D`$ktEWwJ?pp zeGF`MmWtas;rZ0KPvJ`A$RjDHu`MQvU-B%mVSSOaQjwdFv~Zv2GaHup9Jy331rjce zd$=5lr$6;B(mweVHLt=tE?0~}j(#eeLG~*$Ct`6db7iZ|UF*o~Ux!sWai~cq>_wkX zfb@(Fxk#rO-2)v=$ZvT;hRgBCZ%A`S=guTz?-sEW!gO#+AUZ|lD?=6OC&WWJCyGUWBu zZ#5u@K7TM}>xAjHt~U=}zn_uUPv1&pj4{4Ik)x5MvX5}a1}m96H+9A8%Y14b}_EFD%dGF>u$#x_%@KWR2=JXcNQ4O1RM9-bQd&V=;&%3#n z8ZL_SEc3Wvz@!z@M|Vu<$fe&pMh&WjTjo<*_j0MKEgP$djUmZiI)2B7>qRb0Dtj<# z`M%?^Hk|RfoT=Eh+-1ECs^0#jkpb^@_zvX$Ww(5(1VpdXB))hz(rL~-d*X;RZ9tH{ z<2-|LTgcPA=?s<$%}YFZuvTuCw`ptFB!etoRv9UC#Mhne?#NW*okC&|^iHXpC3sTI zW=_q;ix~!{^N?4tzWaAIZ-J}o9i)HJwyK?G$SdSiq)N};kbQVQ(^qTd9bf;Z<4bkw z>zkyR4(3P@Igc4i7I-b6e=6&^PhTR#yqtx*XKEWRk?;yOS{X+e8P5mSApJ`>XCe>c z|1$1160q?2!7X#o3$;%hk4<_1z!>W}(Md78!_b6x^sHK3H|NsH6*lu;WILrY@YT)U zcXFxC-1uy}RE$EMTPll4mJgZ7b!K>0jE#UQ^qMaKYbo${Fwm?`xX{5pMfY-a-uSl< zgUbG@Hd9#g6)Dl;ZOL`Hx4sgO>rWH zo|E07aYMscZ|F{kxu}+!|2T{DPR>-l%c!ehZQt)cbe>^u-aFZp!Yn;Sw#GWfNg&NU zR|b(B4YG_Y<2Z=s5N(ur-a^-)uGOXU`k4?Jw}G=j)#{q#9W8st{}3H_XkzceTArii z$WoqrGQ3lt$%yDK`PJWxmt8z{_j25!ZoZ&T59yI^aZhX^W8{ju1yz?&6eAtxns-Q`I5e)-e9m@clCD3=XLc{TV<1)m)IoLAAkHuO|=-L3LavYHF&*AWb4jRt>CuK z3MJ1IBzTeK1wC_Zn8zcC%~Hi~b7}8jUQ34!W=s{l36|M$-8y+!*6>`RUd9aXhc?_9 zo(a^e$Q{I_WKM9`4<1`Nd2|?gJZZk{^A=^6EvAwph2I(-#(I0lI?S`8!(Z7~V8ERX z;rvG5GFsnnmBO1{frzKuQJwJ}L&I2u`gt|t30mK$jJG>KN`0es*`G2$DYInA`ujhl zZ&OdcdszA03z-GrC{e11Z=UCO-Mkr_au+CuvH%jpm+Hnd7HApq_1HzS^T_laSxROc z(u4X{jrA99hvZH3;t^;d%O$6>ZH78G3i8I{EK_wXtQG8`hAS+N-|Fa(#|T7+*TR!| z@OLzhqwywUWiL#T%KkZ(an~@4^zFvEIZ7uW&!&r&+Lz*t!ZG1Tr+e>k!vNXP-L^TY zfpzs?FtRBJg)tV{fGJopO^vBbpVM>nn0@Nl1y4AT^c7k2>Q*UYzL+Z%**Ck+BM+x` zb;itHq@SSnhi`HrZMCv^CI2tnCDwfv-1lsT)PjmG%Z| z_4i8+R}fuHp@<*>7-84*LxXI?V5d$dHMeIqGFlj0sU1h~PWgRAHNqFSed72x{2C%y zr>NV@b^U8ULp}E>skqBDME851R<{#jM_#~uB#V~ZG$Bg*W5<2}vimj~D> z!qDR; zX(cuH(=n)?T^#Bws9-xtmuapR+P<&g;D%~GH=*Pchf?R(M^%rda=CyeWTTY^9L&~@6n ztDRL<2LoUYIdIW0_7NdDPMiVr!3bx~ffgWaWXd+vjtX6GeW*&VbqxQxv^x;L?K z5l0q2^-|SR=7D`s*R@k#M5I56wyJoOQ(vzxZT8$e6TuWXZW6hxHe7?8@)#c{n`6s9#vJP^pTeNS?NL}Zm>ds`AhXYq&d>O@eSSl*{Z=3 z>f4o`=Cuu`(pHsUr5jG!J<_tpM@CzAvdWxx;u)P>BALq9Xsc>D$E$|_fE*bH(V+XZ zbI`0U{H5w6t+mmek|H7lcW4*EyQ%L%$06EnXu>O=sG>FfV2)k2`}*Og<@NUV7SL9G z)UY}9BtK#&gwUp7mZx7lHrpz#8qdt_|neBKq)(KF*R+ZT|=#u`^b|5RkaF8 zv@;NOQ`5Dwo<001YN@KqJ*2H_B0u86#h817-`l#hOZLhnO9_Ph)Y#l^SP;r&N z0d?KTg+27;!d|S}3s>tW%+;e!vLfLZx6#%WT@_qgz%z2cUXR`{tGe!;Bljye?-2Fs z$o&fMp{^O(ZKKWT{W@P&MY{e?RjxD6gY_Tr8KiY}Z1@_Wr)FjeW=^ zU76R3!R=-E;6XM((-zY%X-+yc@W zO~g(|@jH+vb6f1!A z;{0$}YHIs-Y%Ko+jXJQ^N@^T@JQg5GWcd;f_Ix#ptWp!(*i(!5y1Ft%KRellVz zehq7kg|4QE4H16HtuGLHR6mr zVMm@3=Q9@dry=6mV|~jG4YwH4pM>7ke~D_0!j8<&K|BPtoJtI9{Ox9zBDd6d=h3K zk7}e-5mI&k9;Vuu$c)+xfSyh?5?;*=T(9m`R5_nR9S4EG8H`R>;e3)_v=~Vzj_w(7$mF_=#=v)}oDe=c|4n9Ra~9msaneOu?J-#z8duMM3kDVbtRr^vN*~C2&bj z79IrpA5OEdD%ggvHG3CX&Xo?Aq`%hX_?XTl8{v`|&Bn(g_s+CBY%1<6J)~4>JAmr( zjos53U0*wsFBVruk;9Cu#V)R{jGJ}#8PoU;NU4(PSjg^ z<5*6^sQ%};&RwiX(C*TUmZ`a+jKlmiqp#FZg;>N^VMwkhT3{a%`dTH8a`9O+d)Q}ORSjHlosG=J*U z@S)wo{Fdq1lM|yA?)J$0hqJOi`IX!I?e^8BzjlksI=`vCeIzFHUAGT-ukSx0^x}?Q zy6>GcHGHZ-_TBJEHPxpGFF?YoaQd-syhEUN1A>y(1ueg;b*mKHgPNC|dKy*Y>dN{J z_T)GgiFWD$VsXwWKGsLibarc;s(rFV)x{K^HAu+k&@mS-`pn*mEEyW<{L(gLbU0Ck z5S=EwJYV^flZXfi_wP_|?_^h%ZqG6rrnlbY&RNmvS~HnrK^gL-(`k+xDeZhsCBGpA zz!!Dh`{)yUCq9}qZfGYTVwUZ_pUycTu10kYD! zDKa!))!C|R&hBkWd3nFDbu0#jEe}~O5F~p%hlEPWoXjsA=NohdHz{63Ld#Col4BA+ z=nZ<~@;!!oJ7ekFz5zuu~K2))V$>hZyv|+u+aAm>0yoyjPnkt zttykPmGOjgm;Fjx5&nB(fhE&KNr(KFm{?u)XnR~?%aG0h#^S;1QM47rI9 zQz_s3`i($E&tMQlf-b+0Ny1Qj0b&0Yc6@2zCA}s#>51&uvHYfN3nJ4oqy4|`-;c2W z#N}#=9bSe{2+zkPP`zjz*pSYi%;8t?2TGsFa9T@rhX{-gzzGn^r0?m0!$6Nv0xs+< zV2@dB4DRdw9SKVD(C(J^|PS@=~a9D?0Zm|TL@kBA6$(Jnf9M=|D*CzX?d#iW2X@K z8D4N@m%=n3Bl-eATNGdcQXd&D{rx8wLjSuK#QvMr=brv_{)$4~|GnBRK#A~%ZvHPS zKt9~AKU&oK+ZA8_yaL+)FOIddg}56E-;`hHP^!(2} z&R?S}bpVWqSfQZZEyPFAx9$9^1g`!Yr|}kE>RrmX{VhaCcsS7?|Is!0+c`1ssJdyF z=V;Al!pw@??xeMG=uz}5i2?UirF}WwBXNlkW_xd_dAc&GE)5qe?r75&o zDZ(!61K}D|JeQ-Adbo5ab}ju$7j|2?G@VIeCssS0?xYL5H$0Kf#HJIg5l(MH{=h5j zmUB(lu@mdBTK96XO24;8`0&hFUB@nLf@)pa;z`#2m}>95x*;gMZb<-hphrZrBB4~$!J;P(XYeT zVWU7$@sr5AGD|G+=-1)vuty-M{7K|J8B)iGr4RboR(KF*81TPv(9lai1kVULFT#Lac9o*^_tZ?BQz? zCXdq<2#WE|FM0CL)cEsV8K+93iX|SMJ$y|}{*D&qUuobzej=X1y)cN1xx%!S?Ht!( zA!#Mj2SO}U7}svWS^eHubm!BV5c|Dv=+c|)?)Q$;r8lYQ_r9e|Z=%@meNC6%gpL_I zYTHgWV@GY<$!6@RZ5yvz*UUS4@|O3cJno9G)?wq>l|*m!EA2W&%HwYMYMB`aRua*# z!`I=QJTA&t%hY&lB@z8Pd>!t}<8JwCnM2JJBKmdsI-Hfqf#G)8czGoe{W^RdP}6mK za~?Har#0tM({*}t-pn|uk{BnC11AXm3fJL`ua;{CagcEhTO`+yLPbi3wplMRyVz8O zBIp{j^AY2g_)oEBub}oT9XlR1WdkSL@u(@gb)p@QnzF?c?ReCbT|Uu{N0Hg-VQCbZ zofeixk=bcs=_J&Y9dE}UFb))v$7K?VFgKVsvfbsHFJxnzRff@J`@nlr)rkHiTGgnT zm)_)ps!!wDgK;O*xc!Fs&(v5)V*Rb|NHQK`iz0&6g6j zRc|H~V{S2RU=tLwvCAsK&|w(RfLWN(rx^fZOP^-2dn%ei#Z)u{#jh;cQF_5w>g*^> zc9dT5l{!1hk{zWNjG@lbqZu^wnxO=P$>?<8L|_udYPU`VCQ+=mcp@-~VztXB0+T3K zJFO{)vJgH zfY{OpW&y0F56l8sOCOj`o(h-+u=cC0QI zc(s#&NffJ{1Wck>?L=S_WzbFrCQ$}0YOtcT+35{dlr}rP!8$G10<~YqbB53O=A*RP z*{D@J#$rdI*uaVCGz!ITorq4OP;Bu;bQ*n5wYw%T(Ya-WRSGY|VcG6@sD$hknn+%obnxsvJ%5zQ9CPU@9CTWwQ@?4X&$-uxz znX^;VEl_Q)>4nKqZLVpB$^L$VuYaXH7ii$#mbs56fq73RkIw%0!nG)|U=r5?B^FHL zTA)c_6S)>>64+#}1!~GpZOX1hWoEynv5Ug5(=vz0huO!&l>bT+LIp96TOi0l{R5#C z6UDTL?JSp~5~-OG!!%{-gw82SCqOxcDM}}FPEk4m$|=wc*Kuf1|0+dq#6p{`$Ia)>F-&$A!t(!2hXvA>60JLtx*z17SO_*3TVz^!aS~p?L zbwDd7OkXbITh`paU5ys4m@sX*h`$&K{rlB!u)ufcK6%GCOG(sDy@^ncxyIzjm8wK) zVwj3%Fmo!J0e&i)fy7ia1E3zzrx^hCV6v3KXfoIs>vaO>V2t%Tfpb8QW&rc~WzVKH<b8&L1?WYK|X5E8C)#ZAUTbiOA$|ao2zKTojfa3(L|f{7ytB(IU5KIoru)ef`kN zw$oCFr?chmqf7+UVx6|EFIw3aKi+bN%C=}07#1~UM@?CvSD;wzBzPLdVkg1VC>A>j zo<_0QN$~WvrrcrUX-TF30#p|@SkWvnfoTm^Gz;wa@ELB)|$~73Z<0k@>;wV;&La~$3>50H3iq)dDS(G;W!Dc7NwirLls1b+P1#XX7NyP7Gv5C=6x(my=&pWGw__ka{9N+6J;uJ4H=-`w z+a9pB_&E628{rnWO($lJ zh-epPqew&-W`l@&uuao!N&TEC?NnQAuw-5FqJ0m%wMS;&(oVI*rbyP6FRFgvy*JqA z<7_Ma9NJ8O$-0+|R33P11d~6^7S_+XrtN5prOlkFpHsAm-UP3oQ@V)WL_$BOcoDsc zg?>)?B6<@+{hXJJ=uOfdc<%}(H_zU$pA)O?XopRZtSegtGb@P_miP0E({S13WNFSKI zRn?oB&Z(+@WIC%#m|)S4GjqpTwByX&aTe`3Gk5+2?MP|h~rjbRGlmS*OTvuHkX#s2$teqCA{f#PNl^Z}@7V z0Pc7IH_G?haOOAh&VSYlw|uow0Czlq3+i)+jh9zWF6aF3ox4jQ2(@shvTzR=A2k5Q zg_T*wY!&?s8X}^dn5`laotQl$hMkznl!UJUZuJC8!dG*5A|+vrxl5m#Fmozu0)8rL zg2YtR1fVO>rzQYhVH_plD}Z|)Wh8uqj)5`~rl({43lP{pB5FF`vO|qsplXb_>`-Gj zu#$LA9*0`ETgP38@s=HG>=vVDEgkcI-yhLu`==9k&3*HoTI9^}sw${~i=wzw(U4IC z7d3Ew|8pV*YT!=7uTcYc5`K*uxRdZ})WDsDU!Rr7(XsLV)pJn;7dJJ8ea1K6nNuLB zkwII;@FSC;8=;6z(~VHfcG!*ZlC5ZLHrZF`b{{Rp>}xjJSLk*)#q4V~*%;`SHggJc zg6$OK1RyL-K~Aupf}8+^1^VO!AS{e#lYND5X@uM>XkOX3lIqaBvS}@R4S}FPf^YuQ zlhXjNL@nI$7B1kGsD(S;!W|D?qQ)*5yMR}s7VdZpcR32-PBibQNv%R_J4?#r22k+? zR6Jo?Lt<8~c-Mg0&y7;8Xj4h?s8a_UJB z4sPr1efDgiu*RAC{(XzJlRKs2*|WS_C#Sx5RUUP_`nlcvVh!B-m|&B|HE!`P`}6X` zG6xzu9BQ3A-oGyicOL5Fm6A$OOZb=`-mp1*fZXrV5~DKIL2S!xH})7R-`kcs@G>Ry zy>CygOUE{k?4IUek5|F_he*%(Y6joM)WA9P)b{%MXSSCQ6uJ*4rKon74_K#nv{{*F6yKUjIg+wzqSjEp4Fx z*vpujzEW$~kJs8V`f9BEOF8T4LoQ0u>8?M4j>AG+Y5-XYYMNqOPY zmf6*ilHNY#pWaz7;x>>QoYvkS+LrE`t9yUwkXxTiX6I+?!Pgy`Lr?ZvyMKP(6L!bd zk!gH<)d$-oNK>tu)n)wOH4+4ibv<-48QP!_uLtspQFMX`gL8KDZ1$cPx5L9 zJ44$Bdwtr%dn>!thLqf22di~N1$%UuTT@@{&yT5f{OIeS*)!B}HIqWIrdHK>)VI7+ z>wBTwQRmS&*I?))?9j&`gIv?&N)9M=BkR{v@3#%r6b>c(yA~dDCy9kqh6+92DXaC8 z?uhgqFB~MX)l#0TJ@2aRe4p8^Ek!NYb*EB4T=VGn_0Je+himXAMU^u2QN*pyH@r2d z|NoeI53nY-t$mzCib&HVU5W};s)|%0Dn}6upi&JGR0NbRLJ&xTqM)LnSU^CEC`w0= z8X!Rgq=QN?p-7PyY6660{(Is%=broBd%WMh|L1{6o2Dj$JqTsG~` zs+ILet&Xcet(cWy0%He>NuiB&Q*jH+J)aOzIXf{n{ zj81pv%*0fuJAsC4(ql|w@f|G3cqn_Ii52HapyraGiqR*I7+;zYieV7PyeR84(TQ4~ zjQ67vm;mBqOQ~8^%zZY%+V%Rs3SC-3ypUYn3JUIG>RUB8potJ zv8o-RFS*ciCp0%1635JHVih5wQb=rx5ssvVP8EMzT|D!iqwMmze$t|X3MOEg zLK@)9VhTZ@RhDMxgl{|qD4ex2M1s7wK<_A|`KOrq%18{;t88XSg_TxDn~0yHtFU&p zGoO&?0xC`WDBbQQ5Y2t{TXo!MlksYB)@q4L+3F`_-&G)#C{}fRKA}#B%1VwTjXiFVx|H3?Ps`7@lGbO?PzP-iz!{BjQ#*EAn6UnHbA zzbvFTR~`>7p_XYWS*xExv+knkOIc-lW%x2Z{BSO2Q6Q^}x#-J4kygf8i#~)!tL5>D zZ>`Kfe!N!Gy3Q2RKd~e;+S)e zq`^+GhH4#2HOwDq8d8dQStxIKMqdAn@R83?hAf4zH&Ry%c%x5THB>%wbyvhIr{F5oU3tUD z*3nxf?!8ltPEd+=aEU%IQT^_6gx%$;OO{>x`P=ueZ3D;6{MD-bbso~-{Mlxyc29vX z9u2toc^znpa8R@9bXiEiFfMFvmX3>~c;QF2AUx017@0Tr*B!;p56+GqQd#kkWQvnF z2YYZkdJ0yEm+!1n4dacimQPk?>K36M$u9r+T{b23F*i)?5UyaoB_&SA9+>q9j^ z$|3F^JG}SI!+UD4iyPWjbzI$hxZ90u1*F#M*Z^IEw|lb4-5IVgnSvR=HT9QYoBJQmw46M zqkDoqPxc?!zUTI3Yw{8650|XV&Fbw$+dXuTwAswbq5k%pizV!(&HEyGJk~$|baR8G zVs`lr{ZB^)uRaP15KdTsWam-dB=Xn68@6+X}@4D~Mz=kdK`;Cui2f%3BlDv+Y@-eY9qKZUQlEanQ%?McbzuN{^B z(7)B-UieDK>s9k!2d8&j^yf7;yIXAnFU%pmAw4~0=lpT0PaX`!V6XSLTu6J8_(}&&SFUR?Fcy996 zK01v5W&r;lW?7e7AHMpisk(07W-fQR$=OZLeMXcz)8`$td>{IMKwtv47i=sFj|{ce zYplahqrNS#%=neDoI)LPcLq3QHkbvP4S3y{M!K9|eE&KY+P>N-CbFPxI}s^gsBowV zcdLE*t2Lb}^h=6R_>B0x@69*9!CZ+z+|sXZD&PR$3%%D3 zhBt2h*ecm>shrTaY|LLpR~fV>VdXO>=1V>Lc&UW}k&v$5yfOa^eZuH8wg;N4(iI%8 z#AIrPp)7YZ9waWIU!Yc!Q>J`RIG>x)xQtRzS@Np(8;X0kkV_HA&t){KoWL)5vn;lu zSlEmWm&pQUlUmo4Ef3;cU@-YK$+7O4}){R!Ghl9A(H9gjLembN)_b?nO{BmvO&O^Iu zO~TfhzFgP(wtRz>zS6i!IJeqM-Ug>b3SG@%g0W=7YcKhFHE+u-ycBxV5{}@%u-4H- zbX4{JhWi)zhoZ>WPqIuJr#6J4a{AVhs@W|xaFZRN2ej9YqQCu;FV?gyfHd-2|84nK z`K5vKAj;@c;QyKOl-BUrSfCoxT^K|x3}N^NhWOs9j;h{xpmN{u1GtPU3Ks$}Nq$7` zXELaIZ_oXU$k9D_cbyzUDi8jmJhU$J0(Wj<@I>K_n8MpyJyp^@mAXCEYYu#rJ5Y5X zu76kDc|@q?2d@(HiINY_CFSUn3V-pawD`YP68#Lcjts6o3rhD+S>|jcY%{o*jWT!> zK)5K5vQR-?RYn(m8osFI<2w3?l^?(LVSgC+a1K!nNU1IAQG5eInZ*?@>nUO3A+j&3 za=&%8K+i{|`T_One&y)_J(>P3G6Q=?W;u;#`S~nA{t`FDWjiQhJ0#FIDBV|KQa>f| zpDk$xpVIK#-JAZ};tAJH$x9KUA9rA!ZXb}9kZCfgJ)(T|QKpCgPyKz;njId$ycj#IVU);II#zLc;UY(n_y>vDN-h42oJryp zn%g~O9!GspQ|{lYJg_%2V4K&Ozx8pOsn~U_+Y<8=-SIYs?pOnNqpsT5PxTVb$Z%+PCcPUTFm+$Q3e_s|82IWdI|3UbUl!x;fp0|PrRTT{~Igw?~d0$ zU*Y&FCePOOcX$3O$Nlb({()Z}-jSD=<4QTEue~>0_BX`V)%0y~BUrX@djW15f~^y1 z>?Nu#H|ySz?mxk(5BLi2oG84nP zwZCp%*K+5O;^6vlPWi38HOCI^{`@eEf3g}O_3QQ=yIb6RE*|~sa~=g*f7>Fx-C7(sV?-F_^L%`t`4=7kyx_||5fixTEn{9>!%(=|1jM` zHtpN&UF&Z-%flQKPySC#O#f~N)llccuuX+`(NPty?aQ$q?Th%cfPTSC3faVmS6r+g z;*A@W5Y4``?Lc)X&N++NhTnd@H&lx1voyCSy`BDvITb7dMq3egNZP^$#Hbw|}ofu(4zP>SheCg&W(K)eUU~^)NPy z6sQ#4qFfrcsNeDT-uRyc^)CSBAC?MIU?wa}{3~Pd@2#d}qv2|84?lk|z{@7qhle!`Ui-D85f z+Ae8(s2z%Dig8!`bMW*(B)g~Frl$gTfUJMu!{3QCU&%*>lB!K5l{zJm*vjLip8)Is zEnPDpMSuNgYG~{5EPiMEnyU-XWS5tlsX)Hw=X`CIGedjgLnuT3T7E#EPIh2pYi!)t z2Em@$j_;%qmeBt(*!@Tgj%gp$*dO>a#!hqV! z9(b%yj*gF#{;BKBfX+b)-~X@L@!OoWYy?_;Lb>Qu@d^AlM@RfG#f79J|7EV~-|-^a zeP7s@nEZRJ@u(k5;4g^m|9mOf>^L~E=ffYf!+HUXXYmQ;oN2()c0P}1xlJ4I`Gbx7 zcY64jOh~)oM%iGz=r&mLXq}{dBu0`lt@v4xF zm-~L#b?HJpzRrp|ze7ige{iFUGp7cnr_trz*W4F&L5(DJas0bhZJR@nJ^kZ(&dKom zcro&eDp%Y!e1kBPjKDJmCZqY>A=;#lf;CCSdD;cGsc($*zw<;ES85knre+ycUEDD; zZa&^~JmrS>boY&zR?!K5zfgsV9=DYGri7b}{Ok2rNsX>g$BU$0C3^}QDu%ke-AYZN zDcn$hnX61qdxlr&Yh@i}*Y)70&Wi_dO$y=h6I>dlDpu5?oNfWP&xiE0Wt(k=1TQq$ zsbZfwb7^GlD?HO)CWn^hpEJUlslL??+Ueb^(<4&eFD6?TRNhu(m9j~R9~p!`C(p$R zdJz2DEhli6tDrb)p1Z(&bmcYo?94!go+_zTyS?**N4Sn&`qzxMqBhYWe9!ESH(NR` zyhzJ0e9EKPy{nZOSUG^%>|XFyNzd-lM$2O+H-CAOz`1SbA>QLZIi-L0d0%j18BjsbE+d$D6_4^Lr&l8l~DT z-btKVH=0W1I5m(_VH0RI!091ociMLh9L;>QuZr27_4U0bmM|AMsZ37vF?86g-Kt!3 za{G`BCgSwrzeV1i z+Ww4CWiM1#iuvNQ&M}P0LHtlOu1tOx>HaBmgX$)cXW*Y#9v(fQUD%M$bdQlj1M*9_ zYuUR?ybJlVC7$31&+O|Kko>@Nm+ z+tMEHpp|O*pGs-Zan-k>e^tScCpYozuX_f)d*NS9oN@~7avZUt>`Uo9TvRC{hDFd! z+&qU>iq?6&jpi6we{Bkxd-{CJH1gy+o@$%GWFM@PL+*#?LqMuN6dgxgahIOnki9FG zV;VWRPDpwhDHT6{S~)7gnN#uT?bom@N=ac6(c``6`zB884K5odRXAP{31ZXTg~RJYCOCse-PcfQu+75Cd1{ z&|P<8FZ~jwmekoLs=>?cZKW_~tE`>Bp@$~gzMCvjtHe#R?R~A}mTfE0Alfd8IMvFz z7GGNIAC%kYx8I&vQDAGf7tCsh>8ZfY?P;mpJJb&0N>j7HW@K7mOTM0JPun6Pb!wpA zGaRf}>)y|P-WuGFLyK3A`BdjDUKw)_2fbIYjp&pDf}#$0rx)&y2Ix*_9xQ8UAT5({ zcZhJiCk5d#@z`w}p>-cfcEu}L@(!NWwCLrE5>cs<5`DUn32p{lh zh(74(01&0$7TENebu_iX!e3i zpJ^LVCcj*u9YdJk4iTiU?GTnp@8RkYk&)@;dV63qkFnLsd9I0i z>zd&R9&gb4C%sl8k6v6gdK+mk*a~-~m2*b4AIv10wc2QF9ig?~Y~w-M8xP&y0Wth? z*`n|uSji@Z^3G>V*V=QQJeqX6^U1uh-5E7R08j<(-;UJ#4Kfxo?XKZ0KYwUTrN8)bTm zqe)rSnZ1B>Q+YT4Q>|Z^R-Ok5{DnQ3p0ew1*^i2sDn0{Go*+N z)wEmVypQqg(X}r*au0PT>Z_1ny0s<(BT>-E8}dbcFVg48MzTeqes9iM6>?EXWE+sd zsIN~`#S&6BNtJTs0)zAJZH(5+TFcf3vr>*2AeD@|nXfMlV~|qgg3>E%HG!<9ABET0 zIM8fo;n9V~FJ1J=#?3CzfbIjMtU%5Lg6zFBn&W(~e6B<*sNU`jSb~><+%}}Ujf~0J zoz}2eM5e=nA9H95EJX|KShE3aS@OqgdQE_|r$*ZQJbw8Xqx-uKv-=d(t?51A2^>$~LH`p*Mf$vZTS*fZgE^&|xH{x>N%w z_)5wd`@G$(XIs z``DE?gGFv$67DagT;pQSo*SUM6lZWf{$7R-Ub~|?er{K%Kkzo(7d&N?;)ihX4$4j2 zp~je^wy|cpZCr7j=?IeZQmll2U_OQNHU`CGd>rwY>vhze+z`1cN}NHuGh_C3=r>SqdCCc5=AZ zC47p5vbN(=(bj)h0;}>t6U-0Ehh$WB+E*+Wr(HcORSNk$g0F@uK(R>0s)uf#i^?;2 zU2~`W!a*@$tHEl*a(+FY_N1si<32Z?%1;8M_2@DK3 zOY4h;$pIg?1#j}Q|h_L166C*eiKSHniBI#*(#qzO`)*gSOU%V?T`U8|^0dv_n|E|;b>k;jSabW|M`B}njnZ8g+3eH#Q&m*kVVJRNUcx|gKDt= z(V>RXzW%0KJ`0dss>Mpf0Q4eRc3Jy<@z^Qdeo~lK z?A~AZvY;Tx9_Xnf>RMI@1l8v;ThZ40Sq4z?{;BqcylG_H(%cwmZR8%~WPTD=7V4QE zX41J7)He~RG=g_PM!A*yD#VD&JsZ4pgycvp>g%r*VQRP(<8+tm+Ny@i);xkH-nDhJ zNTCulZY@lztVEkTJs0w+r11&F^LQU*ohWU@_^xYi>nTGmp{f>)Bwntmf(Vw5EZit1 zy{OV`2%1Pz%BV(5LRHO!akpq97@5)>h;;XR^h-z9lu%YjnZ{D?W;!%G1jRmJ1o_kj z4?%>mqP>^yoM*)^PD0A=9+gWL6Wvf+1!3{aVty76X?Y|o{0;8QwVvjHg;Ah7-QF`H zVXyPMJxm=L?Gg|Y#{mAC`vFAR`y5xTo!$pM96QmSfdnYZ)8O7(l z@1~caFHxzx^kYd-E>v8z$=Y*Og;66m98yQxD@-VwrY&y9u3q-0yk8x6N+WOv7<_oz z4b?^Eq>xW9X6j$%v8X4JW+tI50V~w*7c;$j7D{M{^}xOK!JST`W?=F zCUVb_v%yP&tSk-aKsSb3I8Ic<&(9MY2`ryVDUbAAYc0vg8kd&z-3W10BYtgOxh&R1 zIce=x2wJhIM&p;guHUrznNzgn$=w~6?{u^wIrFPsAH6UwC<$2F-&OZCvc5_;V4@ko z8;exiK&#$@C*mz8iyGa8pczYw+Ch%-8jCw?qii4io1{(GbwAcjJ zuF26uITu$I6wt*jtfJr=!od6m&6}C+sU!}Eu7EQ;? zITts00J~X+j2##JG>wcs3e2k1&6TxIKfEF$xLf8oa>@?W4Mo)@gt2N5|K zjY>83N9UgfZBM%B=oapWc$UGn=ru#_iKs6LIP5BO?e@tkX_9vbc6DdL>zJq8qFxlE zVWat`c=#TX1NUY((IA-vSZRQ(PQgQKXI^023XDVyhaUnqW*SI!Zz0(QFjq3!h3iaK z)*H$q-hkpt2lu7=uf7GSfPN?M)Z#^Z9A`Zk#GgAJ=Vx}QKfb&y_IylbTS%&DDTk{W zB6&Uj-UH+_+`UtT<;_os74KI=PXfnHf4|qlhV0DUh^T*eae7rkiwn3F~@fYjD`+ZPBF+hfn{>%fa-fG?pX56??l*e#zE$0F?; z&72Q28MMRs^7Ei^METtA!+R0@6J?!!ZbGf1iyJKSw!haoTqOVHK6q8;*Ny?rx%ue1#E(vZ?~S02JBl+x)eoe-Z}_ zDmNjB{}c}PPjj~~KS(e-KBNX3TG`svqt(y|+|Wu{;yTMw1%IE*q0W;?ss-(qL#P5C za3`WUN^UFE(@mm%T}3)??sU#{Up(x2FKU<~lPhMtVdErP2m1!B+U-xVh)m>rU7W_K zBq9{%WGu~KP9~T2!yDGvypbX5;5vS`kV2=&%(dpVroBqv=d> z05wiHPN5g4_9W^#$DdjRw`STJd#xf|m%t^<2qlOXRaz*|N6UCY%$G;gGhQT~i&&;~ z6r|oz|%^B~qT(n@IZ@@Fx=7D#_dT{>0wb6!qcV2Zswz zedmfYl}Wee+PRv)tCRz{OiwB5uf-B(x|0`>!uW1eU&R_zeD@QeeB5I`#~+{!QPeMa zcd+48Cd`*;0tfjG@XGM_YJa1GV7{dJi4h0Tb_cG(G|G(KFmO_4X2z7s$uz!$8AiVm zLNk4}KnV{pFKPxL-ZnV>#-T(i09_RS@v%)eAUy7y!yaIEiVZmRsU%*kqYM;O0Bzcm z0=N^*`+ce4#zL0=S^A4f^~cFGF!ki+E0w;G+xq;ejLa_J#$bWlU{;_55bXGL2GA+@ zNUWx56L0?o;1y#1zczI`nHE^{hG#)uPHcsoTf8R6Ht{-(Wr_<@{3Z^XS0+(gUd^=);Go>v;d`y=@MS+}K2ycQ zycE*NJhz)z}g#lPo)Y4g|;4NmM8C3Uipo4WWeWV!Ept)kXMktY-XbKzCX^; z;R^um?g$5fw;>CBT9>prK=e9DbvCZI0k(Ml=Hnbk3q|!efK8M@5KqI7J?tNPmG-~7 z1_+R}XpX)O*?=p^2W3EQGzOs+txgVGe7g0^K@pj4_J#U(+t(FFSikYy$Q&Xw>=;bPkNtU)+Ynos~ z>QbIoLo$5GZ^y)Zak=-$p%xR9n*z7RWVI{S;Vk>W=&}xVuz*j0dH|zhGwbXX%|2Yr zBB{U868_`x1f1<1lR;EwHCz^CAM&yqB-BC%+eg&i{OzzE!?F7x9IHR^%&W(4x*DMC zMW13)$99v8BT1&GWf7-fx~-l~w;h?C&OQgYW0<*VGy`K439@OTvw$Yrk5o_k z5cHJD@%TeU5|QI6k3AOQz@DTd1Et_{+6P8;e$qYaBLE*P*uEEVRgrppWd^Wo{sB-I zF!S5jbW@>Q8qHi+APrV|2AiX1vj(~SyFs}SHWeBR3Z@z9Ndd}0A4qm0JgUfgaIwx< zuS5(GJTYuy8L$ST9&-b_#qwT~KX_b#!oKRYF`dp>rcOA&zNuB+(a(RWIA}OKc~w&z$XGG5%6kA)68L@&_n=Xg~@h=>0JOdd=Dmb9xDHn#?F@jjT>Ke=xgJQi@TpPNwyLxD|@L8P%sLHJtdQ(zH1ehfqksC+ls1+a4a zVt{niN3-WK9U$ub$;EBYJK2j69;4^dux8k^8?M+8=zy0~oj>PxR`1+I5KN${U=`Xd z<+CM|6vHVn@w9oUhT3f(%E-4(9S}o!OSYwYJ=V*_jINa9x}{qmRC)2e&P?^ z$Xx1N=j)y1L^qEG^PJv%yz|GbNZcWel3V!1Wydn3~KcDrv-fo}*M zktF%9$&S6vA=g-i?44Tfo4I522E${0qe98o`|j8j+AlpAAau81UJZfP4;Ab6st>ZV z=qt6%rNuKN?)*HY*6++OR9{xiFf)kC+msQs%0wSLdsQP70SJZU$|+RkZpWVFw&4TCewnWNJrq)2=0 zacT1tBSWVylU$CcS>I!vVnOXh93iF8@5(dQtUEvI73mUYM7D9?6DijBNX!%FTaRHH zvwX+^jn4$$mdAmqrSYk({i!S#k3VC^?VWc+cZhAE*Mn{Y%)u0N32y1;p&q>+uEMfd zrXRV#=W4GWNH@J%i1Q6<$`4@7#0K@AKQ}}>uljMT8zs6x2qSA#5Pr>mvU_9z*0a5X zhg8VvZbdJ}>^fg40gStexzt#0glId3$99HeLt|Fs&X+1FQ=aJ|)%Dk-dk#OC{;G|( zo=p<7({OncJ`pyqEk};e(BLrz+&$o?as)`RI18Rp*#lfr{3K>Q&YBTQ3hxZsv1-ru zjj}jGi>rZ$2KaPwD_I1w82EEi6*8$X|JP{7rFqyJFwWVld=Wr@ z>U;ZtgM=!sJ?nLLbhqg##>{E(L<^{tM%ZXj-CdxGnR51&Hv*?UU{XF2w zkP2UJdw#K%9vb3nShYqj??g@>33C9((e%(XbVSAyepVM09|FaR@?JPn;pc5u{q#_3 z!?w;^s+iDnME&_3Fo?@#80*&Qjxk0hs%Y1apV{J`P32~?`#vjHB{UwD{u=+CkkF~- z|L_nNku21Ap}%s`xJ%6d{pD= zZ9{V1KRpI$l8yq8q`YXKYuNPpxR6EYWbPFm4Mc!kKY!~JG-J#yS3HQn81El)0+0!= z14o_ng0Fb+JIfYkga4~1mj!X$KF;O9}YzenqBU7NA4*+mi{=3SjTrT?xfkCbc$}RWvu1L^BhPk?U z1}%=8-}FN?3IQ4z`96cEm#gExtR~{amcl+i?_5wQdLZb;VunS5IFMGtQoqZ^Qc07* zO9npG$rG)MH`QYnD;$lw)q?ZT;5Ec;A6qqRK$!8FOYg*$p zQ`RHe#k+oHLOn*moO7mR_M_WpepY1nVJ$=ek?dkQF`WN!Qm~W*xbKgPdF?c;1g>l+ ztCYx*A2qv~tHNcB5TaZl#KU25_!3NweMxXY`WhS@I#$?%6`33XA`-=4N6OlIi>R-1 zu-7K=4XCOdZ&9A9SfdtpOYrec5WWfApQwMfRQ~MysCEOg%m<~+B0gdKrTaj63ZbIC z!UH>n2hVs7(U}X7`us4KKE=Q+A`eh+rb9jzVwqWuN)Nmt?7ULu{H8usUxA@VygO%o zIqfv`VAxwSAAQ9*ogUusY>L*h{#RKhFyYrJQN1NC#LfOa%R+t9oBH@|;x|~EkE$Qd ztB#?_k42+{pJI!${IE|xP(%a41RJR~W2@q$N%gUd~ z+8SMhWv)gX(3A}UPrlGIB9E~hEiQlR<=nc~vP}@Al}wSRJSoK|_A>QnKq^Y^InULe zK{#e4&Fg=F^v?Y;oP&Zuj`PQ&0Uy?H(7GQL)HztTS;$-=(ta^5Y`&YQOZ&);_3wET z9_X5uRP6Htt&+FHT!ma_=t1$JBHPluD(BZAWOtvNr=WeTyJ%CVE;cBl<@ks9qdS! zAUl#ZDpOW)HT<9nCZg1z5`Gd9U0mJ*9H3t5?T?T2*0>Q{6IQrV*rmWP+HJIs#~T zKszpMfMd#+t3bs0iyH9R8UT7rwgSPfleut!3i#QP_N*q}$yNa8-ev*|@)!`Eqr_WZ z0d?r$z0W`v5emZ?JCG;gg)=(%9i)Z)5zKS&nFc7Z@)gXOHv3E$UTKjM&8s$RI4`h_ zx88%ye@Lws;p))$*S7`G`@82;#@BGY#Vy`IP9CSMKaBYOIm`O( zaNYH(k@gOc|9B+38iTXr1n)n{E*PlG8V{;WPk|q2U%ZFGsXBuy{!|8T;*Xb-XYa}k z{~Qhe$#MIC>y#+$qSN)#D&0Kq^|^F)Zu?~B18i{&Oa~cASV{}9;H6LXkb>TH09A~<>43tXtkNy9ZuyO%*!^ zcyYif7{Ri5_AHRRXxJ&qhMDsX*&t|IhI9dy84-(l6F^b`h{nqGuRc_oGoDe=$deb15_pYQCZ-*)EBuf0-fXREnTqa(p?LtI&UW$)*Q@r7ky` z=TL%e;Pnh*k(qG?wH7Fh>~t-jSfU=N+s?+hoQjjgW3ga`x@PnTgAyHJ6vAX>fM>f98_i1%fTj6 zUTrAhnEFlJ8a_({?17RQr^XKnXR z`48?%VEv)o`R)7;YGwL~VJ$2ta0mz_CV+5yRBNxy zBH+8jz5z7{uc{xh6btOZHa?F4q;udDcduN(S}NT7SFjJMmPf4krk2J|(||)j>^w3? z?p;7b01BX3KLo5m;8B>xpW>ed-aM?{kHS$Jg@b2_fXN@x1*-o3Z^;Hzpgq|oaYqMA z5pM#aJ`(wUbI=O!Bdi-zJbxyRhq@0PU0LP@OR1ucb}D)ufUrTJcyCt=z{)q7_754Y zoYQRjn5)7=O}E505`-wN-+ zAc1N0vIV+(@E%Y<-XSYv9WCXZ=RAlLW;o7CS=(`+_B1~LVP#{dGC@!@R_C40x%^Vw zJl9oSYa+m}#-&axGeyO{i$KC1mip1v?D8W*BwaWEQZU^)^C4kMy0^#tMtWRY4nu0+ z=aBV@glIJv5OcI-C+?ip(DVbTWW)5!Fr-UR@KK0fq);*&$#4~ekq zEk6#@jizVb0>t8NKW|FGV*vg8|RjjjNJ`4LT*vU|IM!LxVnxSY9_S^Je zgv$%+CxPCGdWLk-IQ?QX^zi5L$8zo<7SqnwzJNR6sMc72KCH*ufF8TlY!j&T1a0li zIk}~0FdC$roZ`S^R1S7>&fEvtM6m3u{;o(5LPpna!_zOy)?#Krb&1&Rd;1(DOMV6E zPcXcC1A@h3cKankyt{pL^UW2Fwyr@Q?!^twhYKtBV{d5PLR;r@gdX~Ca5{fUH&X{H zt8OT(_HL)p3&;Jf^_gV`Y2)E9U55RwvBS9VP55xTgXSWXJ@uWZHZEC_^ih zA<36?UG9MJ2^k#aq6cNDIF#ydJqjwIml;s$g{gqj1!bjKJCCmiH;J-GtR}$CmD9p> zdf{%H2-hi4|9Qjw&e6?2IFPgDwLTFT@QSfxMu)af9HmrmyqRV8nj@{)Kh0z5oY_(G z599q7ePE1BlRk~C(M9Gbl!ZIe8&{0}hEvs({tl=1YJ(upankp=-!sopElIIIpQ3+~ zD}{GzUIf~laW1~fqZF!j;+;nCZ)Y+Zf~Jw?@u4?)L+I5U=`V5w7H)SxV4rR0;(YZb%oEXKZZffQFl%=i z05spj%XaQsF7D}-YodoURN|FbS3l1XJX)y&(E^yK`|SBw2kxoVmaS~fTB@t?Lu6jG zzwA;Wb^|*4ZO{{wvVel>2D9*kbz)Csl$9>Zt}diyoj5?E0PdHsM;dzU2!odMVgbuZ za}}#C7#UU?Nptm*A4AvOt&3@dM zXrIZY+2r>my)K^;b>|mFlI}~aR|C-43l#9$*Px*vRN{<>)vU45DB*P3H0Lqv)xKBf zyPB7i-0{7Z6Mf0fUYAMQeSYt!BOg?MDJdsFLBmBW?h`{E9xbXxRMho+>Wf?I`paU@ zIMt6%d?WSdvc(5qaxna=Z{(qf#Gw;8z@>a8*55JJnq*7%!yIvcuZ0DoB-{@WC_2KG z)!8c~4d~%7enh(mP1Dme0Bkn5DIG*hr9c>QyQ6;5n2z-^*MP`*x#REEFi6&KftuHXZ((1I^Zh+^F4;b$`bgCOB9;7 zBs75fSeR*C@SqTyD4p)UkcwwcysoE+3*ythoDHPm;0*LRKJb{5P1Dr1H#1_x3@7_>u(#=!0njgF8nYiYCr0V7{G*9kp@!QAK1I!PuVDoPj&F5Z z8`alO#c%o+PS?-{`O5kGKl>q^nGMehmb!=0)@?fg4sc!o*}P1RxPEu6eJE?g4DB(p z&7a@pn&#mg5YhoMr4_ry3i$YoZox(uK`uzw_)`kV@SJ%uL{(wwIatKCv;qlRSIpP! z!_fM6DsG!sG~hg{c3?a*J)=t+f^I-JTg!(wPM8NFxH|GTSljLnH+BiexOheAWYAnW z*TgI4YWoIUpj=V4x+9s3`)of=a@j>tp9KePoLE^y^JGNJfMB zKeubc8PWl3){(yPYNfKeKT(eva7o`%PgMA7o)37%f#|kH4=d)`Zi440%Bmz($736=)u|V`#zM0BPv`yB;J^&?X{LMjH%ZzYoU>Kx*ar!N*9d^RNqw80;b&dlb^bbg14;9Dd^bY)Rd`;Eh zohSFN0600}2k@oQW01kd$uu_n>0T3YQx65>^VH+bi_^z{>`3}>&Ev4Bn#2PT%Oqha zKepI_gxi|8@pB&&?2%({+D~g*EasluS>inw?_i;cSZMM9F~~957iWC4KcfngK;Fpi zgWnO)kzf13&L_L1$Z{+WXGjhAlev*@jD(zo^zHV>w_~{-T6stc!UjNySj(@e=>nA`3zfhjXO4-jk z`IFXcTe{3uz?7N`({=9kxmKDmQD>rbbkq%H+~`p6kW`s%1h@k?)axuUI~#`k;n}S> z0rBtvh`_lqOul6qUp=w(2ykmAMD03TVK-Rfz5zHv4n2#!{mSIxKIP_$P+BO)k$YA zknjx3`tn)s0}j!p0xA+X2bn~^^a;1wo98J|$|^MXxDW>|Q7>AzQ&?_Evc?{ZltMnc z%&cV%;r`x1YMd;CzEo?km4s2JuU^(xucP%tH&%o{yY8Uy6rEV$+ z^~dmc_gygYB4cyUd2#MMxX%S=su8Jiy}#>;eo9)0|tTKqk-;*4(gdYJTAdPDCClX-GkHK00Vj1s2q%!*74-* za1vB)M`3&-&rbCYVwh?(;iTL-!8BHkJ8Q+aOXF0vK!X9H5jFf?g-GZr7;J~m*5BS$ zQ6TVQek0U~gZ6h*7K#A7v@Gp>D2o`ERaYyp$qkB0ZdpG>Y+GHv$8dksTgC!W)*i1@(>;?UZ>ELpjZ5?u z%pzXqYwD1%GgBikgkQ-ZW+FEy=Kz-`vVf$xWPC9-ncovJ1ZDLCHi9#gm!v$;P3rMj zMTg0{>c*MVPt|3G zaq1L+R)up#f^RAc8<(KH$xgNFsZ^4NW>$D0)bh@e0V1_uO=cd?wRY3FH<6Z?QKKjV z@q(b{NoRM8h9=Vw@`7GF%J2hn&ccQ8g?DBGBkjIC++?A&^@U(V0h8QM`FQQ5#4qAd zZrl`95C_rJiukg9c`|QJF}5FEUKzsBF5$aSD3(S@+b-`*5Om~Z-ID=-#C(=9rEFp) z=lRBEcRh?VZ|N0=*@|WcKC|L`cKSOe>dxh;z4DUUj*vf&*w;Tv0`YF-3i*h8KF)Y% zqOYm_`kjGmmH;SV?rV%m&OjAeeM+-F96$yM3#6U#2F?le^x# zkAFU1v;lvXI7kQllJ7%$P*hV1`fCBI&}9|J)gjTdXKo*dYv}o>!ATvwF8bx@!S}45x?m{G68~c_AQ7;3~ z8s;%zn^7JWu)3{)QFGXr3Nx1{K{e|CxIr%`5VdY8Mp_`#lQ+|pey{i4a+=Wejbuzik_%M`$&GjA>$7W!}0O&Ryj5J&Q`2Wjg0M)B84fMET%Cou`p8SsY5Q zU@X#W5vcWb$Lm>=M~avo?{}CL{i1em3=4@b2?k~$ABxZ1I2iUw5 zU~o8OAG?A|bl?~9&#+d-$(yPJx)LjY?aG!HfO(#g=6x{&D#9QHF^FgE6C+Hs?BKo8aE-RkD49Sg0I2ufo%=YT;NZ3AS< zXbOnY@UM)ffL!Ihw?U;xK~t@%UT{|Nm&r0@J$OD-mh$13;ncv{u`pbbyVS6S1SD@FvKb=NE%iWhotyh0NkC0-P|`zIN7K>;I$dJHVRC zn!m+@x+<)=YeR?~J9d?1VzOLSWrMpWKlv< z10qF0f+i6JF(eouBqZP5dlSgL;J*LweV@nGCJwGhBe&O(JPRbKl&Nd8cwhkQHw}|Ai`?=Me~p!9;b|stg8BT` zw_5lG$8U&|0LnfTdm`h90Pcl;t`plV^?GMqFuRE7Bv6VFbNK97MFK_22QLoH+0g(*< z>X_2|hkYO!-w3XR?ughJ>@lQ~<8=MOyQO6z-0>R7jq_dY&E#uiCseeS$y23)$HD_k znLfZcB(rc1stUF@f15wf6-_()il&{N`qu_rqJ*kxmz`bg%Vb00h;cUO+bBr)TL#4O{l)?<;`xkb4-9o#9gKtIt;Lo9cxtL-s<2+wXDrGLD8f z1*{sAo~_Ds8r-F5F-c;c`p6!BQoSyT;+6dton~e6&^*RCQ#ckM?CdzXi7ngxu=pvV z+;E$wa}-)DuDd)?`mmPvho>nnP;Yp*xTjL@fX+(G415jI^TFgW#bt8;q?twq3@i28 zDjGqVs^V6v5)`8A!q^#9@hTl;QpKME88dd^TubAbN!pS(u-mg%r1LxQ&y|l)p zyZGbf+U0we?0@^_SbMJ1=qh3z)XVa&9eHAW)#T3L$L|+=W14@=~l;!Fk|!p>c(K-E_CTTB)9ceLS_>h5fV_4ag>4ZbPe(?)OI~ z!cLc?6?O-7c7(MBREQ~#>hqYAqkAJpRhWi4p~Fis!DIX*CtXF&s2i>!15TX_oxq!{ zEt#=m(m$JJ^>T7#$vk`wYDH^su0b;tp)T7#My+&{~L-TSw311gx| zFXa|$RcTj6GU)}jRy_p>Ux{96+pE41wUK3MR@P2aiPvgV(ZG6Vin65j zMgIFiOWPNm*u4=GN(RR>>94_EN}@jP+3=pT;^;vsR)o^r zmyFg*(I8vAU9ihW-_uQ4b?*1Uv-IlI$)}+r@UeA<&!0@sd?iLJ zbZUMVYBm#!0>=lF$=ABN%Z%6EELb#n$EfU&oz=lz4V`}y%0j&eetaUq!%V$)vq#K` zT=9|H`bWmwI&Lt*SDhbd8oFy!2C;Z;ZQSS1>o#EuNZ%gpb@ajqlln!2E+OY6a5o_B zW!cJ~=ASxx<{n&JK|?B7UO?mYV7p-C_&79pHFEq?2l0{NMN#{&F(A8a!gSP#E3f!n z>334kr2g@LxblzzmhtQGuG8dIP)H|yV1`ZOuKMaBU+U5z7ok?qW;rL%V41abjQf;# zH=n@?{et`4=|u!K%KgXgYpq-Jc?AR@kEt4^OnZ241mT zSh~wTc~l`%?!|ifGMiiEu#7=a8PjncG!{yF(P5 z;lkPVoH}Xz{&lzh1ZNk(x*wCW{{6Sge6?!f?M89`3 zwdRl7k6BGiTgNGSq?;izSTVVoP)wGQzNz`f;3ZXr>iO5%vaLxMn+UB47?S#fQ=0Lm zaFc?`ZN=Y+<@0MNHWPahl(F|GoopJpB8>NOjEv40Cr?nYIP%0vM$NmUoBS1uCttkA zO+YgNxBYy?teRQieCTNEHXn^QF_qWv-~U(eaC;r~NVD^)tx%z7dvNLPmNvL1b)#j- z17Z4iehX5vVE)+<3y)t|EnY|=IJmX;Yh7ShG%eQfa7;4CJKHVrdAnS*S6lpCnmXU50VQWbhgrQKB0)NRF;zx^cTA#Is09m$8)(zvw~x< zxq8?i0z(?9;=TRJ(xm#4@DF>BtS)c;aF?%!?#SN;7>&5s9%Ym!moFR(IOcBG&A~>M z_L>8@viI5BrO*{5Lh8crNY2dah_(N4L>9%d3uGs|k>#`94CP&E?>myyvO11k4OJ9Y zpD{|4bQm(Td_z{ARfyo9V?6@at!&KR2nF%S#LY-+YtALrd}_`eQ`6X-t2?0@{;C`O zLDZbfVa=^22j(Q!k~4E&z@NNdNAuYO2G%ls+qrp7$z`cy+&#j@72{LKC~VDApGdvz zTSTJ;?5v#73j2HRhP8Fm(TA)&9*eE(nt%IzcY+4_&6W?HG2&B$cQZ1KP8>ONj}b6T zIC{j@7}}H<84ldXCr<70u!Re1OOd?PPcmJ`ev%P{dO!VMVWKd1(llt1>B2R$EnAm) z9WzzreH-z{zn(MX{kN7F+G@Cv`YY6LY8diarki2B%q5mKdQK%$Qw7F{ zWV|5xhzKlF`t-fT2tBNo>B3>H^Ih2SUI3vZ88ipOc?HMFiad+d#&Z?@|CRh2dA3V4 zx2Hnc*LDGab;@I0@#ME%@-JIfX5brecru&B8?4S9C6?9y)^3$gP143U;m~g2PHiUC znjc-vx|-sy4kU#`=_ce;iDnS3(UNBRy7=aCqR5FC+JqN^eg5mN+#{H|N~D zNwe=wJoZR)2$Os85_xe=>B`IZ4i7xHC1Ok0Sc~nTu%JkrXM&=(oXO;`PE9ky_;B#0 z^)$Y{431bSU5`2}{T8>69HiKG?AmbZGmCzT#jlLUiX0R^4^PUI<>WlEh2bW}hx(71 znjO#X%UYhX43{6d{S&0g>1-QQb7yGr(UjjlT&(^oo;!SBzM?bI{mj+O>;sqbP6Wv- zQZsX2+i|YsHh2Gh*Yi)=!4;|fJ3Gd^w+-s*n&E%$sHqNlm|VIgGw9WI*ENtI^=ZWD zl6mF@jw`8@e+(KrsTchh2af*U9^cXCU{qlhG+Ndk?;A2D#bUu|9`ZM9XExF*My)t4 zBmGS(PoW>#tKor0P~dAZ+A%_Kx4yl@Kv^rFSub!fZN^sz|BKVwzTnJLLWddOkd!ms z<-swRuPq>UVl%PDjOO=;SKA0JW;B8(TrN^cHMejD2@0ZIuMw9BNzNfjh~ z+k)R#knBsT=oOtL>Y4`%CH*5|l}b0MSMWr3sQWo0^=mq$J6?%(E?ytbzpcW44~Ni5 zP0mL64mo|1KQgH`Q6gQ2pMcK10O{HQVk?L&z8nmR&VhG;9H0cDhmq4m`_`Dq1zuibOX+} zi9M30bOkMbTaIhBF!3#iwXcd$TJ(HU>)gg1|0-f{pR?ZutgW^JeD9`tarF2Uma2CX zR`PN}F>72Ep%q8g+6u{Nvz1l7D&vETeXVK|)(2osQ}+E-nihN}?RXJWV9dFt zAAQYCt(-UNl6G<01uP3&T2GrLG;RYMc(oLYXlLEi-tv{UW}?oPzs-m5iIaM z8>pS=O6aUKmM?G^T49Or4+TFy6!!t&i)UOhYimjXz8BA#!!hn<92T(?%0H>}6h~r< zC{i)YB87zOtyFhl1&LEkg=RH^&Q{7EwGfBZd?N2x?w9{Xqa3tN#TBOgmjZ#M<45rw zYoITov~ooTzQ0wgwrzd2c%XATp~cll2sn8L5lZ>OvRFz9Z51eSbh;A--^rzj;t}Is z!C@iy1CwZmSGwhY&Zp}jYs4O5#N!Ev0(38IAP4Jpzbeq>$PpaV&TBUhMjHy2OQ+rIq5DQ}O~h%4k|$hO&_&~|9K+t%-*ey2>}C#KyieRX_`BrIKttE&APV2t2*2CY6^z#Nu7P^@H)-h z?qa=Y;p%-CD3?~5sD}%n)zeGe8$pCtPtF&Z?|w?M#`k)fE|FHQ%E0%hvsyv2D*cA< zPZ!DS%-&8T84x?vWd_{{>eXZkmTA3I*?mA&s(nO5{6}ez${Kmfcj`o;G%4vz&M#GJ zQ8cut9isbI+p_Up?U(|?C&!NBA~q}Frd80PYaj4Udg`S~0(AEi`tP8HqHQY>aTE1e%P z>A$AT#y1BH@2WR&C(=JSs{nRneP_3Ki}nW;29r0vjqUC-gcW{=IQcp$p&87w<#jS% z3WL~yYFGo;k@iAv0ljm2DxS;pIjn6}_$Fa|x-7_ajfw7ZTsiQ|Mr#Qk&B%>3-q5#h zgnHn$5K}1C$6v<=uX|k;p{HOo-IkM@IN1GdHnDw{j3jC2E=n>a^f=Q9lFd2MMaT7a zy4fMUwB7`%*xuujJ@+}hi@^Gsg}(zz;`w(UQJ}2^xBkPVC#F>6yPN^=W3&+%|4T)C zqExz!AUGZ$sx7K|br%EAZBdn4AOUHywR;7LB6Vn^;JUMYXC@+a!EN^MEu4?RKKlH_#eFyTRcF z1Dpi)rg%Jxuzf-3QMHCl&=;ZWG~6do@gg(?3kXTMrxqSYx)aC@0P7Jug2B$}*Gy;@ z07nnk&4@MGPa621Y3tQ~QvQNT8k3fdbZWuVzGl3Xgu**iS3>cDP2iCcNzeH>?2+O; zzLQykM7rap7~h2Sb4qXp$vG9@%d9WTmjQNDJHo;|OVILW#6EiA;FcHL5t_*j{=VST zV9@gF4cH`ullY+t+p6%}RFkDGlP);bgzxz&(T1IReRoe#9_9J~8mZceKzpvU`?)q^ zOIO%_iz^JVK5X5sF<>p?g9z=ibc$Jnst6tWq@;nSqb|LXTi}~pUidcj?IuDO#kXBS zudc-qxGuF!BvL+HaCur)FUr3hQBWuFItl$sfy`S$gnp$%8n_dhd-X3IQAsC|9#GLH zKhLC}SNz>Y$8~^|O`(xWogG!@A&7>7ylG8Hj^Wp;r@D*R&i^}Zf_3iKv}X~mW<~Mo^M7g_IrQX=jxYKe#{!sM z%aSyfnsOdbFy1z(qAOtN-J##!4fF^aT`Z&GJ2X{3aaw7M<7So`EV#VHdl=AOWJixPS%eL!gb4i zqYdkqu$$28q6#1rp)+X)I5$>=cDeK8=#I_|=uE2C3t{kgp-wYCk_B-Lvz&|+0kQky z@JhPsj1+AiyZ!2U1X7Ie3OajD58rKAw=2kWg%ml*Le~vDR_r8?&y!ogVei?VIEE|4 z0LNCi;5z-b1(RcMe84wV9b3$LoWdrw$ek2N@6{m}f*Mj{huoLJ#hL_MT-aSXT{^7<{X~#a8Etw3o?( z=EUvBHzsxgxr*OSXcd_|st?+)&?vFoPMpASSFIo!mS(GmK0#mxbo$cUi0zJU26uJz za9u38`W_7(y@Ht6VKaQ=n)y0(l))h3;BT&d+*eGzA(@g9J zZk7|0vIw_7C$k$l^b@v*_^zV&<>d0WN%*G6aMZspgV7v=REvf`4yWg(P2~`%`QJ84O4Rva3z+9sz72lAc9sxfVm{WWJO}8s0mV`hM(7|NY;b9v387U~3?xmx5aZk4|6R&q8CT(3d7Ta>A1(PtY&Z`J zoxt%l-o=U|FOxF(CD8-h-C9b``VUmd>pg4+9BPp}Ugk9(do=#1$90=F1~0wL z>*F3ITB}CcJf!$RdD9iqw0{+w2+jO2+w@Z znLCi5HlE5V+f%EUL;w8mg0v6ey1J{V#(b-5@2uK~)Cm-wQ2CO*1nvdWB3GYkj0@B@ zzcdK$WH;2)BG=9Pui8l$>uP`MfoT&H0H!VL1Ew`kO*^x4dD6)bo=ZAVi1y$ooeO&! zjxoNCe9mco{ic0_n@jMBH|=vyX?f>(Q&%6mdkpeaZ5e}7il+W&^_BG=?7{rqqd&IS zM%1-)Z%M6>nf!7weU?SPad1#bKe~;-RzELi&uHIH!69KF+vLH^tWg)a2RrnvL!d^e z>6Kx*b%r7>c-oNd@HcH->h$(()gF_TXso&*}<67np!j|GS2VIJVX?)s>Vt9QCZHzgs zVTU{gO1Z4V`s8`zZjtztgmq-5R)ku+NeHl?VO@ zA!!xIMDxzH1PIo*NEb8=os^?JywEmcO7PjcXB%0|?jD}1-M*(~SjN+4%@I1oC(mG& z%{NJM;|H=uPV>$l^Ivmp(oZM>Y$FQC%Qvxa@4+qVf9(uAZMB_o>%>5xsXaSGfH*LL zAvtperpzZ#TFnG6Mz^os9Cq)ckL?I~7Z9@PSb!1Yz{t!vUqLFKYF5^Du@@GU4LY#o z7i>NLmq1*M+nZRig#W?iE(cu@50WKZsp?*kE$D)nj+GUNZR^vj{{@+Q6=E|)8THLX zqrQ7Rstt^m9yjfnc@fIr~5paF2N`IiR8*84m?nq41L`6*Tv0MABP5wt$Xe(fC_^z@y^ zF`_EGIubR?L5EDx*rsXS6+5s@^FWfin(pI%UQW#L9)TDk|Kpg+_#Rc|iKwb?IdzKH zPZNRpnt_(s7WHmzr+`cQV7Qxvbgu5yK+1xHk2S!p+`=PmH=G3$SJ@!MHkx=t#o%mJti1Aa?Rw{+E z4PA{_$7r)4Ujrp#yjx&)eT&V2`vKVq5yK1H?T_#DNSMB&x@7ca9%<+1*ug`X78>Ba zzpe-2pWaLOZG3NMhy}VAEBYTBj)~n$3ZfOKrkx4_!!WU-o{{S|;(@#s@J(LxAa2r?)C?8q6sZm z1HTCz$Q8pjtGiEk+2Gr*l8)`J>j~N7C6MAWs!w;@QwxWQn*qZ0YKRViII7L8hNX^j zz*)f)ULiQ_-VR-PzYH}m4I{Kf*XMHRgVU(EK*6T7Ou8h6Ksp>Z(Tm9^cfC``#K^pL zVj6Ie7`%`oG2Uza5=^$c9z^IL3J%A-=72Oh9#}#!7EKEmYN7|$)g4O_fF^qihwX&! z@|~%@0|n9UQt2hDjD9C(s6)#q2ENDJh|N|xZ30%ftzgZVXP!s|nY0N>*#)0@XI1_D zv+5WPAn_XM!ldrN8rFRp!f*t(Yn*`tpd4{eIn|`9eTdV#|2~txAjLry&O|}+_Z>8n zb}{+y4f}J3^qf`$IPCw+^&u~3tn5B-dOk0gMV9&s6o8vJCdK~KuXajDglZUuIj*C6 z1(v2vdBY@WA2w6V@ycK67Tr5i(6|g_y(Edy{`KRCx8Ly6@?tZv-81O_5Gv>^KJx-R z7B#}wl9bOsh*P|ly~_unzUQ6 zvyx*8zR2O`%s9HCa}VV-Id;XN5rU&|4A(!bnTj+ zM!$?Y4*jw%`LMwhVS84uP`1 z2)8BW%*a#ALqb9{DFmlG2Cg(DboJAQG)=WUFw;1CPlU#uMps5TIB>a%NV&p{Mq1?T zsIC|DrLld6IR^HQ&G)UTiXjW%5y+>=FeIhd?fs{g-f*Q*h0I#Y;k*Wq+!b`le7 zyY5qVF(13z)3C;GqoY9&{Pc`xUaZ>$fK8eIpmLAGU+#Kt@BT%q357TU48Sdb+h>{p zvHTW~3Cl^WpVpy~1Mc$AFQkmzBO@M^qh6Zn*p4Ed;URTl8-o*RoVXfe@*Vb<{MLTe zUmX6o?@?n4FY)NZ_XwUC>#kM0KMjT$Nq~z>B_gQ?7dGkD?22B z+-Cfg)HPdY+h|A>t@D@foGxMgda+*PU%{5(3IG0U*n`@Hh|6*)#K!-21|LIh!<5c~ z61DlDsI3D~o9l=NIS6XQc`KA>ai-coQ|bMWap)pR=;q$=yW`X|8xzjs%+U{2AroFJ z;*$*zsrIKO2OzHDcf>Uep2(nSWKpm@2{@CKDhvtL0P?4-SKh@;17HDe4g=X?9p zh1$I3v$1fL@;1kY_|9+i9acH#_RSAGIYcnp*>Z$z1qCG`e{v8)BH2#-kbEB z1AmG8`L`M7%9PH9FPU%dTnie=+N^e{a7CwOoE0&p+Tl6m9^lE_un(?Vjd8aB_kgYP zM8xcXWX^eIux2|1Y_R{A#prJj`AAL4X(4E)>s>DCQ7OoUw>ro#G#k!%9BU4@tHkNF ztntKf{c|d0g5i%DctLPXN;JW(=}h{Ql%~yewe-j^hz9l!9b2_bXeMAbTUia0l^zb~Zi6cib{+eH|Gn+YdPg^=Z3QMc*9X-x&?oGR<1%L{oB?HpWG<{fh zLOm%J5<@lw;PFGB@p=!VI68slL)lqDH@dpx^-Er2kZqE2Ifu2ZO1M@%8Cwn*pJ>Qd zXB+HTn}I|4w{y<#P*Cr4{msS~tEHn(j-&5R$xw4mUi|z9@c+uxEe)obddXp-V`^_CMA;WdGI&vfXD@IL^nV0e^rr z^0+P>W*f=|xA31fr5_I2sOU-Nnn}iQyi*%u`hBJ0KpREtg+zMkrMKGgF_7;GXc_Jo z;1bE_!cY*m1|Lu>nD znk`(tCtmzW8&k|WWlKBGx}=^s3*PufDI}sn;%ukzlqv`F6_PDs)hls7YbAl@LWQHi zJvebzh0N~A9EZ}S8-D&d)b;OEOV?Mvuq>CwhA-9pxw650|CHBH=h~2$PJBD$yH2{> zYUngpxwv}l72mTPM_q9p=Mb>0snU)<$*$=7*#qN>MC1Q?bSRm>B=W$xe;%D?Ii$~} zOXc+yCV8EgT;4b)(NE1~D!$Q@AD$y|{W!}%FXBkTd@|XeiW=j>q6&+6e9=#SH#65~ zug>0@ZJBLjHzhe^jvIHno_l0!z744MblP&TIB4^Rz zP4n`n`6UI01jd_M*-bROIL4s7Xl|KspqBEk? zYjo{PgzF^RB}*jUk`a=PlDQIBiJs(Qpm9lFySMBw*)`dD*+bcB*&Wk{&KJhZDmz~k zj(>J$p8GTMg3Pe43WH5i$=R8b&KH`?_5@uwwRU?-eyE}Kh`eCeG>)fVxcKzxC4a$F zZTTkXPf{K#I$v1uL!ylA8I}D8-?zF5)QH8W*+qu=hJ^+?u7SU?`r^H_s%Qza05|K;RaZ!Su1^l!~0K#$>ac6#4SZuzGBW zi(v-iltaZVyieP7f3;oo!bF;ut8LVyM4DT0hIc0GfjnJ9zg#%7sIz{JuGNhVO^n=K zO{FudJL}DL*Z+QiVUGuBHsx{nRyczrJr zsV6#r8i>3WYB0SDd2^{TwE}rVY^iWeq?mnr$+Ph3mx0z5Fw%Y-MU4}8pPmi!ptrFd zr2wAP`c^KCT?a#a^fp~s@D%xJvC#6Qm_4E;C5#iZe}^L4SC4sVKTuJ%?91;!rsgsm zRt3Vr=P!-z_U%+e?{LYI|6bZz|DfETeG}tU3CbzcYuu=LOYYN>LK3`X-)^EWk$u9( zSjP&7DeBf8(zB<$e;{A}P|L4iKMYcwCk{uas;?)2pSlHG90_~}Oex8WntrvMa_ULB zVAE4zDVx%W@al$^o>lC=9SYmQj!hk%GtpPrE`0e^zT8xCn;Q!A_$aZ^6U4wH(K}8B z9{I<)%S$HX{sQx8k;R8`7TrgO>ac|6crpG%!PbpU441gDWX3@oaAaujka_sZx3F{* z#v?0kW?XAhhOW3p0`U7(J;_`42-X34M6eO)-IN(-ziCM#EsR0Se5IsfxsnQwl3Buq z7_$TifLRE6upsD2q66@xcgt=)rq5-d6Zqae#vSDt?8|>*GbvgOgLFt(&dMI~{)tZH&?Ab=V-V)FxBF@3w?MY`ly>dBnoQG5b_j>RBq3Z_`kwLR zJ;E2|M2YRqrg7#bpRXur;)jZ+`#sM5t=4#vy!PR0R*#~kXIj@9Q054twDakUSLl&0 zkitn(jC0yW7QA@T0tseWE}QFms~MJa_806U?ax~kCKo18tLnHF#<{?WrbFaEuGMn8?d{$~>$Ij(8^+nlW6sWT3jy)OFW zjpF0=vQ)!e&u+&|G;x|Tu`b!n$&X^5>q`j>()`;dd;UM8H+9-i|820lytHs$NlChe zqJC(iNkXWd{ zW@cFPv_a25hS=WQ-w#x3*OPDC7fSqo>I|~boSOX9+uS89M|j%b>#2W;XV!9utenm6 zZ+5tQ*j;yvyHmRd{(M-w#}@f${=2J-%5@LHpB1_r-i=sXu6wrW^|+=HC;h$h{6i|H zA8fqbIAVIE=J=)&dE(D!#hRYNH?4*1cb&yHTt;%6kC*^F8sk(dm8A8ECM@sCb+E{1S7RQW`SCp4&!_ z_l?pfVxPmGFr_5qqp^+1G?q5LzTAYPKX)2@xopvvt!x;l*Yo7_PVs#*C@^R~!k~A+ zEOiED=y(r6Cg4h79K)c>>8{bG+qzCbNd>~<;VAcX#h+jGG2gm7Kj6>5%FjQnz)!dm zrk#WzpM)&fW1S`94Sl`f;}00^Q24Jc@L?+(_!eC5A2OnNm%E3LQcOZ&M2!>>59FJA z9c0Nf|5do{F==A#TtCqFS_p?D?$>;urhnJ#Z z_9a>NlTCIszgB@w&~sk2EQ=Eg^vnQ%6nl5TvfYVE!F-N&USgC%Xj(t z9t^cEc8*onEv?3m-te`kqR1=hboHfD&z18Ey!g62UA~{Fzu&dY9d?sBhd0*b@r~^! zC112I@X7Zn^eONua^ZQ4{QcrH?`XM|2>F_p< zIsTi&puD1%?Jy9e2a* z!|kIu=QtM$dQ~?rtPjj6iV|4^EADZa_g=KnzjqdmDdO@hdBVf%W2ah0CdlK{LaiG+ zh;SX5uY5~QUUb;7!|P&ab0%_Tu*1jX597ZS=}1QVIS+UzB>E@F3iCHAZfiCA_QLCk z8HsY@YUoiWQ)ZDaY8TK8=tWlfr}$GuTO>QKhgvmt5Zx2ZirfiFZtNfe_FYtKsufj1 zdYR0%=G4 zwkC&G``!#ozK~F4U9^bj!gJwY7R~W{8kl8j@y#vRnCPAqS@LXnH!Jd7ZLe{x3-9qv zcqaS|UIu@O$k;C{b4%?J*P6UU+fn|y?IIOSL-9TMf zlIO^CU_5Ds|Zl!Lf zuC1LWh-}Xf;0K9vM{H)%ZLKfU-!>T%!9u|rT}Fw3UC83wNtR3eB!eWsNsh}d$R5eo zUk^?G(t-ENutlbnd5(FJd4ahpMVL{@T(lKvduX~TdyT$f z0&ksYzu#Tcxm!;0yCquxuHDA?&_Q}py!1uY_pD3%%@!a0qwwCJPj@EY()y#Z>`+v| zpHFuuPtOdaH?)Jp=30KwIhfthu~XuEReW&#o||^Bn?$;cs_$Ee-%ej`f#~&wJB6=d~foKYD8b*?6gop^y#>a*A zC23RLbKVbXmrR3{eAYL>{vOwY~87g@YxQ0B76wcU?A{?jJhvpQ`78MwGTnXz_ zlkm0Wo-$+VeGb9@M5Ikr0oug+bU>sXO|s7~I`Fiq-KK(YUV&(vU)6okC0~5;jz6MF zcy7x_W!r*$))yM(8x^+l28mYrJhlhdfuekPvcA?BGOhEuiCq}=Cq9nLL#;oidLyi4;lCx8OU7!bMww6}=4wGF?EavS+fBvRkr9*;83q;Ck|0 z=4_?`(~x$~iOVvUuaR$)Zy--$L{JEe+cCJ87U4TnZqNDk-d{hOZfi(~_na^7>3u9O z^=FuGSZ_}b!>(%TP>*zAd&*uu5q6a~ z66xr*ry`l_QWjiEtgz4Cp>JS6nI5%OG}Z5s>Gs&^_6ssVv-i2Si%i@c*0G3Iak61V z)a$V7wBK31wdPa^W4+61HpWMFRL%xNj~Pa9Z0{}17a8kQggfM7 z*@&t|CZd^=6j_oiR+bHcoxQ$wo06G7nr){+AFC_+RWa3#dqmHEF8f^a`Q!-4!pFS# zBC%*fy3HF|cv|Q*J$8iqIroe81;cnJMEm?s2cF5?W;fl?J=oZ;4@rKU$!EM` zgp(rML4n*3BSL@;)ZeMIz)X1ef&fLy`4Bg1s%tW)uBL9KT2gJO7UXG+NKOPNoD)Sk zN0Au_n>tQY6}_doh%>cn?C#{gR@XDAChv=J_O94T_EYR zO}|K%YmU7ht92(=`%ouGBk1}0XZ?;QHLn|Uh8k64W86-Z0xZ0@ZULD)u8Z0O6{gYV z(x%X6)2#2;oIA5xFM->GjS5S7vw5@m_jq4!riE5CbnNoGWoj8a*WQqQK7)TYIvjyU zx^9}kmGpA45o~n#msH`rE^3e2r#X(_0Y&I3Vq3vy?)HV71TA<#U**HBg)7v z0(}dt3atvPirD#JG)K7x+EG_hw~!|>r!wa-Co?YEN7y4k&?#jLOAYplq@wP??dhel zCBh!q8=(r^-VEZWg>sLm>+UOoCjxHQ(R4CnKu!_6-=bCaE+`$eOItNghl1oehHaYKx806|2R1k3&gUG_kuu|wTb z^+au9Lu68Eea&@kiqKV04HOlT@=1lH0#XrUYL%}!OBp7`%#_GTlq&N1^-s57cE9WA z{e!s*&C7-Ab{P$w@GhA8xYgwO8KVJe3;~K%5$Y?tu&jE-z{xE4r~saxcT3;vx{bWd zNNDRRLbUv*=e*glN0yh@_DN;5RW$U76)CPoD_Lh^d?E85`CE z@b6@9u$z~B4#gpWN6>gYPb@O`3(wqHJJ~%Vwa82qh8WaynU=M41?SrfrMy%8E|G?0 zq-2X^&-GAxQ%4U7!7puCw>N{SGS%@zQ(d(&L6xb3;2Sf;9L3Ul(Q?T!iKS$=L=^;w zYW!m6BscCf;~wz0F?&_^@7e2P=h+9XuE}%pHr*OKjUAaBRiFRiu=$&ULFrT3S=SC% z8Q*46}(3@chO*;1rJacEMXE?<-8%9 zdp&g*)tveVd0zYUs*WCb{~LKGbCwd{zlW2GUO_R;TOH+UdXI|+{M98n;%V2SEV%+s zf%=~9T(**Z+ zrcLb>K}36j`sq?}DA?2ER7&ryd)OUf%ncnXH|C$f->KWF8>zd&FfS^}^EsjhF(=Yy z(5BMn&?eJn(IS2I)F5UNCm#t$ylG{N!8up7f@T&6V9t4u0TPvo&eHT#J#U4n8)oH% z<$g7JgN*HF7)D0rr{-Jo4~P!?oyhzh1tRbJd3XD5-3ACbOVf3I*QYFtu7R6YEZi&k zJ8&C$I%Oth8fC6B_5vnoB@523cu~A4zBMmgbWh|W0^bM$UEVD}SlvR#G&>esEG2H; zz*Zy{!9G*H%#JnF?}F*h*jZNL$>)+UDqZu>FI}VWa8;hF$_Cj=PWnYyrYsopuF0^k z=`J^P6s;t*|7U7rsD3FUu%Yy)ljLSm;Gvo_QwqELYojE*5G+VAuK7Ld-7*`P*&WTQkQ6uBF}w z!>e)+do(k0RC;=8_H7OtXTob{eyWbE=}j&gR#V3p$iXmpgGpRCo5ZOqX2uHC9iBIJ zKkQ+Z2QkThru{UlsN_h9LoEI&t-%*}U;$RwI(ucd=gOMA6UOT26{c7V|Oq{-b4z@}v}n@zdQ8fAP4a3Z&9P10pt1 zYmY_FEed%1{*c8UmSg7T6{1E-ly$Ye;#KPM=flRx$nJUT{oURLvdXmLn<`ZiRfae64&3>a-pT{6sw!)*X;gDinFrqnh7DFGqcn>T$iJ zFBlYr^Y4i8hx31~s>$2g8H5GDddx*gAouu-E-k%2H`pXk=4w_i+o*?&sjJVe)Y&C@aI&oO(ACxFQj`Ruxu`Er_yE%trJ0}K zs;3GB^7_b#>lEBH5 zPm{08i9^l<^XY|y`Kv_^ry(amh=i^2$fuB9z&2_e)X?YZT2ce{$&6#m1!!zj9Ss!1 znIP~lfs022u2*iLZmZEuH;45sEZ)@PvmkEkqC1buykK5fXtT#)XiCieYHy^4o)Jrt z&^2Cv@-wEtbhS<35}>V?UxExkEMzJmkz94ChX&Y$YjnHAO`=lKBGF@!XSy|Y6Ez#+ zw|y#<0(XNd-(9T#>uhO)-x2I1S|+)WXAvCM*WNYHZhPC|9i7O*AzLgcv3q=~=fkkx zU|=T4+qotWk{)nAn(Aa?$qGFpkVnkL2C8XE-jvR+zSb8ewTcDD5=3*sO9c#@$W=S; z?a`kiIFVFFaFy0kCJjfR4{|Os-RP$nh!aAbo*>Slfz3P+Xm2!5udb@%P2CUI~q} z0U9e-s^(8ut}A0O3#Ykpr|Q8u3=ZlImyDNG&GgiTYz`z$Pmtw{g^!}adzmJhEBf8< za;7cr;K}Mv$PboxT2-0_3ysm^?BjGwtP^*n9wFMG4AGx3{?YpKeQv!-=4NzunMr~q z10<^@QzShHF-uGG43EG$TepS0X|`3h!ng8Vqi=ALa4~-SPO&$6@<{9t8yJhSsi|j3 zVaR!x779LXpEC}vPP1dd(Ua&;zcZQJ)sA9LFa1^`B*H!H=m}IR5#3@Zi{k+n|6qZ^ z9WN6G5b+Nr#vQsIYTwYI=9d<7(l>M;2XMwFHTYs7RCNG^9K;w}9*V!qQFBaHF~xas z!m@@%#PUeOM$pchJh5@@4EGC0MK-)d(b05^+DU?oq>H2rKF9_p_%2u`;F{~97GkL? zT|QDoVk6Wb()o05z+t@SP8E)76T;191 z=+UV{e?7I>nd(?^!w+XWH~CITlQ$f*Apv7T-s6?g%&-8lv?qGDjXXnn@1&(`EKOw2K?B3-rb)~R zPnWpuqU{6*0Bqu)?kqapnZL2o&;kv58w$44(lxcV(3GpR;hnu(%0uU_0}@2jd=;h1 z4-YtGhy1!be`d$vA5!>^4~}RomG+}0(W7l^tDi1%-`SYXS!R}0kPSZ!^8e(wf2&#hi`M+ojpCZz z)_kiCsbyX?H{^3mGoPgG3;SiQPkwvv`z-W7Q#kjy@eMJ;TVl3&(5o#+j%K=cc~uIw zupo19FVjMg9lZw<5-Z=VWeuBtshq-+nr(UjSy)oDM&Bq%Izyhc+Atr@Go-HQwWJ|wSol) zAYrU6;GJ+LBzisI#D`I=;0c5BNpuF1idePG>$-A;VI#0t**QQGpc6LcG^4N1xZ#swe;H+ z)f0P_MA)amG*g7{rlP5HovrI2?QJix2uYpm()TuP`W+t3Tk2_S=X(bjk;&mAnTW^} zf~86_T5e+RFtj^}%)_@4Nkl{@$cFiy+M5uz730)nWuDNfKGYY>J4M#FdSLqpMzk>D73~QSA7+wW$_oXWn87<~P zE+HiUkv)+tM97v@nzAMUk2q29T9c#loMc5D9yI-=Tx!rVTt)N7m=8REI=U_^di%#**%=&ewyYyYr`JP(3t1W

    }jb9@F@@ln|3iwWfl5f z&N`H;oA`laFQdQpTe&YZ30_+O^F8m%tS|J3m8*KhX&rSMef^{oa~L@sm=KB*1xjsv zTYx?T`|+RHk7weCu8nYt2Yy24IM8f54Rq8aY!%9P*|Y`7P0<-~N4Xr?C^aj3+!*JH zzP7~}rfU}t>-t{cXN@ibKNob&Npuxf>jI15P_+a|t3SQ;N?SnoeNn_LyLgw%9(G*9 zsM@$s?~yap(a%S3@i%n!?JR(WhSO0Y*oKYlb-JSD0lGvetvf;G!V-;{UfuvhM^;QL z-=&K!uS+7ln6wFf@fq~RkyaHeK@vf3K6-<|M6bVJ^5ko~>FkH*6)7+xtsGV(7ybSk zvg~l_0TDhA?7hHN;2$D4=)eq1WMHqxpZ@&ZS3;?V#Yj|u6%*bdV|>B{coFOYV4c<9 z?Ez=et$}C`ljfv7cP zb5x_7gB*^2J5$(-sS1Q232-+76y}H?6}7&bAMl*gg20P8AX1KV%7lmOY3TE8Hppu zpg(48M5n~>=0|Q5vP_7EL0AfEZX>)wpQ2ET0FrcHgC^ZORSG092J&@mH_<;zm~Fj> zB@}aIUTDIv?~DajZqc+b$W^XqU@6IHhI7lO7qYva^5w6R2?@*QSls{lNKy}PRQ)OzPFmp;U&VG6h1m18G3u*@4vjIdk|sXfD)Ulu z*k^280QqWV>wDOP$6rvs}@A zY70O{KmdkPeZy?2bWlWAP!t<%ODneht|;|j_HvZef3fK-!KQ-%3?<1JWEz9MH*gke zHB|6JMR9DzC3`T8ne#EK4lDJbW&1GA4s*ouV%t7y87QUu@OmJ5CV^y} zNlj3qyvyz=6oZR@$nbZtUjl<*B*f5XgB}1T*hOJPnV=*`NIn}o)u3yOVuS>2J}?9< z9()OdfId}m`6aeMO&}hyK(KC@Eq`T%nO`fA2w>z9bVl&4cQJzUvH`P$FgY5aOJRt~ zRMIOW`1c46_@L2&ZPC_ln=0?0N^BVspxt=QY6JhVVvc`$Qf$C zL25xz+z%jH!URz{)KhRN!vx!|0JCA^#ilLEjs@MqO}%b>VuKG5EN5=BB)a2TmKM*Yw${!MC8_o+IID=J$S)S zz8o|GrnXf2BMJ02s55G9p-K~A`UAOT(K9hb5C5lDOweiN^`)f8Oz&F_E&WEh0+&ZV z7bwY;)VCZ4J^lt(2H1dD1ynn!l&ER66QXqQKUs>|29=fz-J1Y+SwJK)C5KH-tm$I_ z1FK9As%|l57K@n-APLVLt~?$YCbkAYjMWc@nFmtWy;z_CRaB~Ubhs{Nky_c4ypc&^ zkFVBER5BeiJib#)%t6-zmcR^*%C70!dxs)I3)jvP5xMc34Nt-H9Rmj77kzr=XGA7O z!BenxKt8g9OM!1KnPmg3WnPzl+5|GA%eJ}ss+BE7MW-b*0tPX4j{u>;=BE05d*M<< z9@oP!FmpSfXXB(G9fVNCrIb})A_yc6UW9;PUF zcEWoQiY}Hj2m+{Ec<-y^Hw&;ASEJt)0kwS(#z%hB1lwMTO2B3J57}Fg0(uLvSnw)P zn6LrC8wOzo*8(nqwFMyP22>=7JbVJ|M1RG_&=3aNzP2Oq1D-6`4TSq;Fb*d4f}YP0 z!835_s(XAO7+hqrra)aGK(q?IIWpd&8vfK$Nd5=ni1s|5YOx zg6W4j_R>#*Gt{O9V6l5Ix07DOl`$2zL| z#A8vzsi>Yq90ZlR9BvJ!!14dAby2165%>dgc4KN~mkSUALJZ8ulxdrN%%7kKaQh9W zl{Q_epL)Hl&fNqIbiwqzN(Ui36m<~D;Lid%y-lr@{u!>H zqRj7BXt1G`-Xepz=HLJ$YEi`n)$k{O;D2O?|6p1xe=se)|A8?B!C&lA;E2GzTIH73 zgPnvyuPdMq;r{8q#3zVng`%_?0eDsbz;*%@u|qIUhu>`Y6U-#2JOryF&!aN{d<0hB z)cwGHqYeaP11GReih{0>_mVKcwhJ6abdv(`gBha$&lIsjaG^H^!3k>PTmsd0rJGPS z2nQA)t{7wfv>Fs_u>~s&<6zqb)xIZD9a%B$v-L*MG8pDVKf;u7XqP^seUg!l3*j30 z6V;s_o-il4p^H(fri4wg8@iuR-xK@6B|GE_GeJ44l)wJESK$b&s}SK)8z=fA=<#k! z)iy98QULrwx20^d(71;aYFC&$)t9XIPDOocwX4t`>(nQvb~V@@0-Z2nPs8W%O6&^r zDllZ3*;*(FMd087r$nR5eu-jDI)GAyo5;1};c-uD_Y6oW*Nu+8w0h|qO2rsMxbmC_ zcpg#Mh_oSC3s4L!6Jp^7^ASUfh@46@cWEIypR*SjbPI`h;54(EFo3A6zYNL&2l+tc zx>YmkNT9C5NmNvP)Rhk=JZS6!2wFpgRfuFOm6(#y9t(xIs|cRz55%ea3qMd|_Pv$Z zhHy|^O5l$9FaHUNvFO1p%u=A*S}pnk*P}zhltEMw7D_E@p>h{gz+GwWJa%Ks0hSs? zILKZ{%v9a~WA8n{n#jKX@gNE+0=53s(V3hwD8oD~A^fViePJ=vCE9y8fMuCn2 zAn@-l@X2^E>?u85VA_4uNv3Sa&n`La4*7JP&$a^-;I~irAXuWn*VjnihXBFNv-zWz^tBS!56|=82%P`ADV*~g z$ptU`|4bksgiU%FQ~}jP_@A(}DGM4PGO#XJ+A-BFcm}5|==VoDVO;U!C_mn~l}iK` z^f&VOzh|>fsf3>?%x8{GJt|XT@mnDQItQ}@ba(Ld!J^`I{I`|>7G6j`;+%p<6o7Q# zK^5rtT|l;g^Lnoht1+Kh0<<4l0(t_K`GM4Nur^Mq%=@U8Vd~;POw8q_fYv z<%6tEk%BVsJSf57-}vNAon`2k`}cvfio{Mrahq)9v?SlRRQ(@}Le7YikyX8s+vjE5y(?j`Y!a(Drg3JAeRUgQERMrWo zIEI2beW!hNRB(03iY|0ff{mcZsWbmQ*XOQax&bv~%1LLRD;Omph)m7zzMQY84ea`A zFdr{}JtyC0Do51x34O@2$`_pdU)uds*Y{JJsd`Jn7=b@>R4}GQC;O?9n7dg2Xj&qF zyBsJuBY&vUnRgW2%Np$?xOrL+a89UHs85+!3K|i$6sqB*760HRs%-#+@-6lbU=*e1q!5X0Oao z#sv6$k!a};cdyXU^B@_`f`ytvb^}+i(20ZI?*>u1Iu~V<(36zP-eTunH zca4ikFyavMj|2g^^veDZbq`4~8b3twvK=8h(1`d;@PDKlK9d&%eOt zH6h$`F?uTXi1YcF)jr$63!B!ZBDbBB(yuV@|5bGM_x`Gy^^aO4VqDC6KwNV5uZVE5 zui}y$7T=@^g9&mE0PYV}(jab4!DB>}^as$H$8)Q3W>9jZP*@K&zU__3#N4g`k*Kyg zrUA ziJ1j!!QB73KY!xcu_X<9XTLf1XJ#wEce4L(`*#NZoq>O6;NKbecLx5Qfq!S<-x>IK z2L7Fae`nzT@ENG3iB-KxIsK}`i_{~}B>6wVm0Bru6$C!-@sTadAyzPI#F?b74|`BL zo6{!x7`w8~aJCDJ;}}rA!UKqyD?5(G)UivE8ass{8#64HZ+j>skRFcKW1ly;|BR}r zOCz_ZxZGHUETcNFV6L;^zTej|usuiWJ8X=Mu zxlMy4ul@KibjSDuKSD-PJbrculB`_VQImRi4z;^eXbUCs1tBP?WT!YyPpIQNk%G<&NHX zB*?%5ac-4hj#QGd`GM_tH`)Z}a@!l?&|>QlIt|B(rZce3`Md+@49H{S3v`bT&o)D1 z(y(ty6ZCv_6W04(oBF$6Y?+Y?_eRPtUDb|U%BcXp?HvXq>%@Cjw1yT$m3N|YWi_~I z`-qH62zA$HB=TSdSE92LxAMNBC9}G}u;L-EwCq1XVS%M!PWsMeSmP zq{;L_GRBz`(|HP_;rHuyXG0G%t}yoTM*|_n(N6G=XcmJ|VBcCs_exJLC1^Iqt7Z(a zFeo2KcQvgR2-C5Fm-oPj|Ezt4d-w9~v{vj^d_)g9K$Ze|IPY>-+eOJw@oO3_-CrFL zO?QE!NAfXCn;!b`O*ZQBPQGAwhn4Cwp;S7HYSQ~s*2Jxkot=W=&$qr*p643E-$)2y zhqK?;yVdYr4v7V(KcD1hr}z<5+Van@rno0z#tF?{)cu4Se#ni$OwC*O`0iDZYzm*l zNy1n>;KvO^nka18JXQXgC-@!{2y<;@IC9heNoa)$8}kZJaDkFU_foN>-EQNNd8QDw zi^NFb(n2VTq(PQ@aT0cYX`UvVqYdFXQ5A#oWAC_i49IsRi#YTQ%wYCv9`<(As5?0< zxs>VpM_CT9yG+!#L=U>w)nLD%5fX=64 zc+j0>=(Kg)I%p@;?PejLSjwEtsm`afTJP_5pOmDtHHm(aZ>ao7DCo4MbgAkFgSpBV z7Vh|8^{?67x#O3XNGOo>coy(_`M>)@o_BJH&cOjd|U*o{IKv^y}{QH zEgHj+Q0kM{??8qo`XQ3z3sUykDh3CWp~97xM7p2Ukr0%|b>5Y4PAID9Vc8l=ur2cA zr#Fcyk@{L*zXcf@>xaOt4|+NL=`>3GR__Jl_@;OPPBFr7^^65L{TxtbrUJ}DBB(M` z0eY_jRc0tQfU%tOTRllcZlP5B{oaG7{#C*x{&|X8y`+_hUURRSxo2S!bARgn8}EsSHun8*sL+a2U9HP1q8?J4uQV#Jk3)u5`61>|5toQU*Q$iq zazsqeRAVj0l_>K1b;!^hKZF()F+mhs3ibtxnoda6BA48&R<-Myxj0#+YFCF`GGhUb z(gCW>Pz-{j;E+pZEI@N2s4_#b3>@VVa>($K5>(NxHxN$~MYT7dW9%N{QAHsr)@B}MaweVVhQPW9@)P8yWl*8LH zILZ6?cSm>2pKV8Iqf^tKs4|3IgznV8i#X5FKe+(E%?Z)Hd~EFCh)$a&LMuZ|X;Pm! zJZrAd(!@=n3-vt~;JYMq=9qd`Aa+PzOWG7_h!9qET3#8K4tgD)^&n%BejTW-8{-2i z2%681i4KBWJ`x-Rv{hJ?lKwN_^9HoxAsvRJQq^ILXqZ^XTugL#+C-9+LeSmz-Qd2Y*F)y zCCwRvqAF^F$xjunU(($%0S7)j!6;H5vN{IJbIl(Hw#aL4w5+hSsR(?^J%Kt_5;%PB zZadXGA_e1(p^xe>An`h9bOKK{d@xg~rmVSqG{?q+CK=bp zf4UOIf1xN(Wse$iY3?>hK2C(JcAsa)ttG>CT%9M{zj%0Ap zJmn7Tk&!PLE4!WzM&aBt(vw0|!&nULU$^m61^c~J(nF^IBsT1W)F#V7TCQgP;SJYo z&6;dIXxuq_q;m7-m^NN`zQ!#%g{xVY)O|)rn|Q&tDu_PuOQ85^EUWAU9gD@{H2IO) zxEx&to;#h<30)ybZM61oeb&xSSIp-(MjutNEPK~si8$}@I2+G>(s#hU#Bt!BxN4vV z*V_uuMwb&+?OS^|j;_6B_H`BFvSqjj7z$2&G$A<9M3+XWPlu}AAv1gk=QeBoY0IL( zr!NU+Dv4On0~6*a=?8xAW_2ki>`#WDU-OS(B_#K=6jIs0$)I)DG*WH3Y zj})99SlPgDuz5DxpYja6#pXIQVLCmjnJ2a?eIk15g$wIaeiOWLVSmVj*z$g1eJ|BvDIzQW-L;_aWn%j}efU(qpIxQ1gRgwbQD5OKba?BV!#us* zQda%TNq5eLs>@F;Oz?`4IOkY{g1?>_23T$3lpDB9F zMwn>-IF@;{8LUOvT+x^)XA;?wJG4JI5Qw%L@Dh$b;UoBP2!&oP3x1+vvt5X7c?5{I z{_;I!OA43H zQpYuvXAr1Guq_8Nua6eX7i2$iQ2#)_qT9yi zWUlIJx3h6$KRmTSJp?N$?(6em(dIUC<*GKlfKzip&HdTS`eMoqz_E-1q3VvlyA0d| z)Fn0^sqwv~y6*-y%ktib(2mB*2g+JDeGN6|(`X;J;mC`NtCX~M8DoN^-YyuI}! zp?6$iWaa6hBU7kMm_z7-#;G?uNyoNiFK8TUr)oa%a>F+t(J(}|?M9r!;|?HdRE+M5 z7A2eQTNRo}F14JegxQW&T(4_7xuA5heq*uMI-zSxwo%@G%$rFbuVZp7T~-CxSZ51w zJwntvbV3Xlzs}10g={-wH~h^PclhCH^4{xvkSDX85z_f$sV%}5^?Q$qE0J%?W()Pp zqq4Gq1_UIk{&7fhd%!hUM`paOg$)-2ui&hkO-ce|dSJ-V!2MSMr~yY)tOya#sqR9BsU=7|A9} z=92h|>re)TuTkCFcE=?bEMGEy%C`nR2QH^8MXm}RAbwB@k57uu&AB$mzi)LX- zHBR(rEU~vJ1yd~VtTUq9iPs}9d;2VaWN31bx=)rF<^ee!m6-&kJMycJ)(1aJtD%p? zDcsy_&K1UCeO%N-k6p!fLR?6fe$w6hK#3zUrGvn8cSezUl_3+{D5wtz!{1vTx9^eKU|sG1^tPg8J3m<* za~M5uzFR32ci%lQ{c<}*4@vR6<4dfgZZ9wGhM2Efi{B(WBcA&H1C+;5J@lX~87gjz zCpsmvA(c@c@d2MnAB9j(#XJ`(X-nT(%4oLGH88mS2Njuy*!UqR*rW%W)k`Cp1rwTb z_pS6T*MV&BNH7iKhS&hdAYyXLp^G7u=pG%Y8d>K0hvU}PTpUEcZ&2OhKFp=-lnpxR zO7cee5X$8SWcErMGcfW@^!;t8 zA-rPnJABuJTyjk*@~y($_yUN9NwjQ{H95c;CA&o!4C;2|^HB3CDPE)8gfKQ213*IQ z;*c^sH#`qDSLj*=m=j{##_EwsG5iFzc^b zl?O)y)3Nz9sEYc)i}*+ak8!*@dPhdzu9k&C`W$ihK5-j@j$Gy2whYqCy$tX@XpNm# z9eh`UFftaCFH;-pUVrG|A#^&agKmbRRvGfqCAOT5(r*3-8kz6RzYN)^hw@2Lm>3z7 ztaZB~2b0l6bGJs>6Y8X&(^tPLt09rHMVhIE_g`a4_Aywq>e3#+
    QC_(Un$OI4V z>%)=!G#jXnledLWM13HWOx9bKVCh`_?x1>y*95?8~_v(C>leP zP@>A~KSGAy_Cu_qA{O62WA3+6TU@EPcUU?cbN=o!{Xp1wk#wj7mJGNGQ+7>1uv(gh z1ArMb+w=@kAB~ZMDE+Ae4Tf?<3!1zX!wlyM6)N`Md z{#+$gtfO2!)rD0;vO6t)3W@zWNu#t|tFjJugs?cy3 zW-0OvNftWde6$rs4#s!oXc1;?I+-Q$+D-lw>k9 zL_vqj)A1*A0^U_IX|DsQiY#5%pUQ{R%&(l2_jnZc@=$uTheMa&`;b~K^Do-EdBljUstX(!i|FBLo4xnl?`mZa~DBEO3OT3s-Tk{B!~}dT396p~b13 z@~$l+9L=){q-E{OcEJR;a!*`G-7i@?GE#5rJxJ_pfn?s=K5!atDkoMQEId5RRvq)S zi3T*Pl8R;etCnm^DG3OfAT9Sh`Q-hO|B{gR6YbBGgy?sEEb}>##{228KRXu5t_e_TH*&vkle3^vA6A zQth*rZ(CjU_QgPbI{CJ)rW^YBbM}bV`iuk*QkcApCb9CwlZa@h980Pf!+oEOVIqSc znk;eQZAM;-^5dD182J?e-OpGRzU!_`2FnKKHw`mv5?U$M8W{w%+rDyUPvU86saEg# z+@^;=G2}a-Nbu(G>-ePsQyHj_HCj!aU8t`C^5B9D)DwLZOD4Ye#)7Gwt{>kP`Bm25 zTuVgzUjM(}0`_^1@uUAK{p@{_pYqrq-qS4A`0)jbS=sPq+t*J0^qL^}Ee(O^Y&|vD zyCyNdlwm#ryZcB8<}|r*1Ybw>_!Y_?i6-D|pvz7S_pwvl_je$d$ zv7NsRUv=-k-Tpj;+A)XCJ*Gn)TZR3jW^W>7%g}{Po>5R$ZcnL3arDa>By8oovXRi^ z9wjFT`(1>VrfZ=d26Wfb;zAZRu_ATL-8$FctF0VgdH7DKEaTh0XH}xv$6S8><#kr_?-eI!LmhQOScoZ$QK3^_bL$y-#)u28K6~ZwUe8y(Smy!nR#n%47m5fBT9alz&@; za9AvLo1t#=q|7^oC4p3^hf)rWk~)Nqjj&aj6PyS|-l=DIMXOZPnEd=+A_wb>hU-*A zF6EzXxCMGrhd1`1p-%5|zxy|KFO9Fk2OQ=*jqRQTFo^{~ODGdsDmvSAIRUX-c-Gn| zH^hLD(6u_XP`H}#+Gsb#@FQrC-GXPGZL=EG#8TA^g&Y#*m@1?rJWXaTKR=;fikN7c zWgL9~aq*;(L*3fY--{7N_Dl8eTOb~8nV}fG^*ohMMp5b6r`zSovlQCfJ0RZtpV=hx9>Xn zz{V*j-;R&X;UHSJG9(2+tvUwQ5-~@$ zvmdi@MGlltP2TYE#UGT$#qoY|_ve^is`FR5#n(0U9Y5<@?j`BaO4N;cKYljHPqM!X ztwik=of?cPm2;f2bk&|ggx6-ud zY1+H=!$NDxjf%=@VPoK-elPJI(+ynqQ{b`#m3pQ^8dUkl*7PQC6Qs;$K-4_D6O*)rFe{x3()P$GVYpj$oPE*y|$u}9%aFGI};-@ z>FK8HI~|e7&B7a<-{DuFGwI9&djPUp)ideUy4;x?q9_B<(iKX7N$(P#$oLNMnsC;d zk|D7t(Jj0JbFdACk(lorUzE5C$Jimyr8y~ejXQc_3VPo=W}7C2oQ|q>3}1!oaCGHA zj0c$Pf_9B$cfFM^TO{T=j%{T+yBT-OxgM@r6ZNx#> zH=NYc*RK^5ix^6z&5x+lYumi1?zu-u=%OTH5UkS>6M?fcu6zC{X2HUR&BIFqK4j@Q z^i><3>_$hwC}JwapP=e)eNIF>+^A+YY(P`5STKkyipM~Ohmb=2#>MtD=+!)P@l?6U zH_{wl+-`(0qd~4+RpO=au(w*6;}|m~8$bszAli-7%Q9No=8Vu1AftM~cPTT4v?6xZ zZwH7-Y+pf zA2gsoKLCi-D3WnDfcz&S&eQc9-&^^qhZglQxw{eFr|aXH&WKZj9_u`M?#k$Qc-+pA z(Z_iVR?FxUM`t+!KtdsQ6{BGim@p+qgT@k#J%klrOT!&Zz=>2UTi8eMEmw{Yj()L7 zA-1&8XmCiPQ3O=j8}87qyHz*bp-y+JQHXJebM%V?im~EdE4*^T9Ud0ZVqO#(4Q4KA z412Z0YeSerFtfpXdF((j04U0s4RX247BtmG08h5lP#IMj=>EaF!A{VRP zy~1lO%pp>DYj2pt6YzuQ?nhy(V9NZR?V+nfE%|HALXKHN)M?-t(~jUfj9xgayZQLu z2N~Tgz6L$SeE^#uz%!*YuR=u(YLZSfmBf2qdb(lIq-*n9ivBa+OZNzw3w*GZ_rL3}oyHrt$ka zEEW9IY&q!~cnmdaqwJ1f*WI&9g7AmJ1Jg+_S)&22O)Whfem{2HkH6{wnhZ#UIP{2+ zgJmL=e@HYi{r#VOz$5G=-9<(BXhXs-XkXW%?up{ejKD?9iVadAsr%K{&K5!ZbQ2te zc4N!{B3F~3B3p=b1PSdw3iUIT@z_&@GLGhpt}-8kl3>byaJ9mYUJuV3M!z$ zgBbD^qj0{w>FC+V3&~h5x~sd|Z!HiipNUy|SA@g0dgw|&$I|3wvRElSdTCI-M`0J2 zOFOimF@hS8H^Wy(j)o8m`De>GY-(4D8}C)7BZXn&HTdGbsHt-xpR~WXTh?UXB$RS- zG@oh)zPD)4h|ZksCy(k=jxBT26ilA2^$%VMjfS zRqo0eRvf8UWLApo9)5(!YLupGRsfbDJ3uzzox@c|IS$2i8+yph5H@;x_eClh@JG{# zPPf?D_k8yV41oJ-?xAU9mvYitk7wJqSb~W50ra8 z&rhOfe>=kSd?HkSFae#uz74{(l=Id=Rkob^@&NwozB|qc7Vw1b&K-^9*We&pO&=~m z6sZFxndDt{MqCskLzK27)h*Y-ZD9r|jk^*$NjPy`7i#xHHw#X&o8*`=zuFlgTqD2# zKPHsO5UWmGUKLG-MQsV8q;#C*^x;!D*(?=4x*fsBAo<&!gWSE30N!Sj1|I9fi*S>5 z+|ZeDhQJO1(6m0*y&ikDB2$ws`X>$5(a%M>G69N?Sunu_UNjDF$&Eu^==9+V{s=`c zAd?93@}ZPA8Y%a(gaRXYP zEu*;WqD&b!7S`YUYzh!HRalmSLoXv|&-ofpeufkB7iD%oTUP};O z8|7+QC~7LxZPvE$nvq&)MADE#3!;dDc)0>dDh2(MJB1qv!VOVQh&gD)!$sn$PLjuW z0-{s$Jk=Gq=7}-(-jr_HYhL2`_{AS5AK%tpa?((3$udg(X5{Vo0w?^1?nO-(cA;0l zh;mxJp-0uu7c^~2o>vT-UItC&r<#sUH#JgQa${_&sl!y$RiJ4kXxgo6cN#QpOrGZj zn(hHjWn1S9YQ$0w~K{fp?)A2H(F`R)33#-~1KE%ffXPRd{3G+bkZ(lmLcgWZFQ+n>dAy)V*b z>r<0Gr|X1dwTNY3j#G>&vpmA<4~;VEEG^u2T-VXWCv$xc0p@|$>j(;vrXkjC+hOj3 zuc1p~scHH@)T2jlXQrDnJMm5t#T3&e9vb|}sK8Rs;4)!fpPM-%kWNv!B}TCnXt^)4 zO;DN}r2q?wj(e?-xEpm<$qQMj;$YaXpmkyH$=LY$jzjgJg;~k*kn*fo)F0<@? z5KY2dJPw^rw2P*7QqV@QN?lPu1Z&g}sVd7`v_eDO zaIq~~A;s#fuCLE8@9X4W+Cq6Vj3{=c+BKbVI`Fb}a{9#(;Q*aPMvt_n<40GOx=Fi~nZ zI6Ls4o8*;iB~s~2^OBVKy&;g;@vv?C zz&t^qz~4UM>9~d&Ll~x`9A*k(nBF&22*dQg8A2GQ!y9JQ8KEDW}PJKYqK@} z7WQLmfAIvezWo#-$K$B_=C%$G3Ft&Yz1F)3x2IYWrkd^B?GuG@U8fVR<-;r=>cP00 zWv2XJ-3J5|#Sme_+mi5UhT=Z^%;1TabMNc|{6y0)kh1L)K}Imt=3AR5akc$*iV?$f zl$w46pk@I;4By+*_JOCT26%)<`f}u2%#+2=faf?H{LVN6Brze4flv8cvdgc;OMZos zF&w>6e`93LF+H32DNICg(ldw2Cl`;6Y(7lzuL;Q9Sf?yBR>X7`_235vQaIn^EBYmfJVaQJcI&bVg?45&haN!hU%a>583oPT_!sNa@y^U&|cG zgqMtG_h-k@s*bJ~>Q@(N?!WF+FRH_`?d{q^)U!mFCJ8s~XuA!Qb*c$)XJlVsE z-v2y)p+!ZP^(7E$wO+{Ks?at0Sq-aZ8T%n;$Ltr0@tvC*vp}hDq5c+W{SM{OgIhxr z4G;qk2-X(K)Mo!Wm}TpEXyY{h2iR*472g8UUnnuKGV&9B1y)9WqOSmD0XC)c>J25h3*DgcMbKC?3utNDO9lT=(~kIN#sZ7d`P@I6+&gChqqrf`0a51I00`c| zA}-Mlef)<=3^0=A1cveMDZ}_}g?z`!ARv(j5!wkPg_R&wW{wz8CIA;rGnWd+Oh z@3pE*$msu8vFTEUiOP20m77A*GD7BC-}sawJXd%KsRbS%qHf|n0{X?32Y{D$3N%mM zycp(MGLba?ZXk1x>E>VOE9vf#eb_N; zt<0hDSq%|?%~?2Wtu*{KO7K+{{u=q$9Mk97@q1hJR=?;^ztY`U4gz*=YA@Bd10!3g zUk88n5PYRys@UX=^jRDF5(}}nhc6IV|9Nu*xIVgfFw!hQ3;! z>`@@)P(YQlr+Wfcz;ZnqqfKp@o^_TmT@eIeH3~?Ff@mu?PIu21SE-;fR))4TbmWV8=qu3xQ(+Gy#r<0EOeQV?ka0w82&m*rT)3 z#ol(XMLt%Xp;LQPqA|4v%4?2h`t&I z2gQ%y)C8Sg(?-i@3oW3X1l@W%=G)U-ox%2XLj8-~eOpJ(r)%9o{#|P?sHGliM;yzX z6GIvE$r0K7A_1I-XHs<(Q172W+i=|}F8N7?A3O7AXn!2TQ=Cn`GvY~A_W`lgw(bT0 zQ4bwyih2_)M~>i>g=riOZ-RG;r?z`hngUCw+Z+4+-98?75`-OihZL_^q+eFt`LT-z zAH9{&B?YGtL^demPMxIpr;We=Jz~?5AmbZ~f(aSGg*#k?PeY`+r6!lI++FbOJ>R!8 z&k_O1VjulU_)q?%4;)O9&*NH!<(m@;tz+Jpgg|Wh1gWzsWbF~o5*`}F7V%f0bWC;e zl*>o+6R}GCzC9f1yyRiw2E)O>+kON+z>!42YYzv5{?J_y_9dy> z-K+dAIbhm zChZO@@wtrL%K9lXRVgSo_STTTQJ5UU!WxGHz=$cjxfK~x32ET)z*IPjC+IkdGy+t-D(Zqzh+H-=oe!D zqDF0q&>f_1U#iOXmRVr=^$x7tF1(iRW{>a*Rz}^^$R2M9iz9`?vtNCt!~(ZnyK61MoLyL z_JsQeLe~sEjgOW@&qF4VP6PQqNI-Bzng_YQ2?j;KKLSg)2DZ05CdXV&vhgvO@yCM3 zo}jvZzslWGCOp8JwZ`}zm}pB_eL9(RH|6=7_7Goo#n==ovrpCqMq_gOlUIRiTH@4YS4Y3ad?w+8AU{n15{59E6`dALqMaFS5ID~lODvh>@d9$xS zAWr$MTB3(OAAB2n97sH|Ek%9>t!mF-v`tMYO_d0;HCw!KQ!Uy0R^6L-ReM@Z>qi2l zl8dqWSihZ%NJzui==zJubdhbp3UGzjwH8~oMgN8gwtlo1y(NGV3Gavj2NbELZAym( zeUP_oTaB+=puVy%Ag&M$3o*p168rx)14-TC&g+BTB-N1ZolT@;ldnlsc$PjG@Ls8b z(B@6NV=%^8jdA;gFG}=9=XhvL6Mn`4rANZIuI#OxVP?We>K2@kqpKzkgf!+JOHprz z<19bk$zO7PPAw?iZzD5x7iaQlju~m=`RD7l5%la=;WFKAM%K1HqjyVm-I3q;2zV35 z(m<5vMi41Ub8f|(*k``y1UmuhPjo1_@iwM08uu84TIUj@<^MW0uN1>~74a>N)6VfI zlOItE2r(_lrfhJGEUq>cg>+=*ma)2Z5UzgQ?saYVQWAKzjQWMa+n;O;XfS0;K3cQA z)qIUmPaUnvzztA+QhoL=s}Wq+55z~zD7z5q^6~4fAlUwJ-jQ0Gs9asi;^{=|(^vrv z8hr-l&58orB|z{z%N;YXBVHyar6YoD>vu~!00zr-glBT6^Qutp9G+p`H&|zHZ$x(+vj|BF*SMdo__n&Kob!4GhdEamGt&jbD_Gr+x)F^D8l z4bEH3hs<6;XPur0-zl@koZp8;z|1kr9UzDn=5pPYZ~9oe>NIT5ofBZ(Lmz7G)I(n`Pbxemc?|@_1|o(} z=Zh(6qZG9EWvJQ*)IG8}mm>(9CHn~NB-gK~ANGE<1+c?*)dN7?bRt;m<#18o^+UsF z_m=mS!bwU%WUT`n@HGaG*&Vats+U6Si3z}`+WQCueEk;HBAvj1Q(D~k&2|Vxfdy{B zZ)?~S(*)q~qHeJOcro|4J9{3?WFkz3`x-8Tr*jJcybb>r01E=&-NJ5%S|2cSr>fLI9U zdN9P>7djeM0}rP-C)n+Bs{+gvJek?CEfQWgR=f*|STJ=<31Ym*ik^ZXZ%va3Hu-MU z4zPw%LapUjXaB<`NIq(wRfIv0#6MYsK(3DQ55-3^fyb3|mJ+SZJ{RUO5tAlCXC==z zu}_-qB#YOSgyXzJJGNLLuw)W){0-VDn<@_?M4mGx?&%@o?+MGz+kD&WVE4{ncA*o4|uVCxNY|GUFjT8 zpr5m>Jpou8n?0#tQCUf##Ab`eoR)}Utnu6!8UZ305tYRtSaIm=tGP*q6Zt2$218{V z+w8zYRW}*IlK?cXI>2qLp3N%M={qhUO-f9R{1I+RIXmr9@)2sAb*2g-6An`y`HLM? z!H8Cbjx{8=sI5dwZVII;FW-erGpv#$9(O#iANUK9_wP8fr(Hb2ZJ_kp6cSN-eKRyY zxNq@0uIP%dbcI^lFRvb!WtcX=zA2^A7Onr1E%i-h@3buRK5zPCMegObJ!eL*J)_p& z;~&Y=jeUuN1s9f~q`paLY7Fl<3p6-l8y~?rcDi4J$tT$Y<3-`QDuYAYjd(b|MZ52~ zK@osC54iO;A6E~J0E^1d*{P*P)6mhCq48n>tUq;?shh#CiB0e%6eG&JFOIm6e7*SqUK?_RPo7$+VBGG(!{Mc%EkTC-s6>|RK9I3;Z^szorBus|cXk$d3HpLS>l`%nEiAguUnd+6ZHcgpBaq06O{BZ{chr(=zb* z4l6}NtMa45L#WkzlTqDTpNI4JpeIOeAvlg7uLnv%rI?D6A>y-DO}4e-%w8*TB}F%a zw%_YH%~&3^k?!n1$-J$*CM7ckde3ILpzP1U0d#Sj%qs%lU)qIIxMRItSC&6?c6*pZ z6MH(QKV3I6B*jcr7AxATkqU8=uo6T;`o=6>(}O| zqXa%8&$%6noC@=&NI4htq3JY^kRXkt`n!HHUXNm_4o1Anul|O(@}gfp#`&uk_|~W! zxllP*p$ZZ!Zs(Go-3_R`rvV^y319<8Kf>;lABwD71JVs79K*!%gPK((h9J=|*HKe&$QQrf!Vl2p4lTu%=w1!-jbtumL?NT#&;^YjRG3+h~+Ysl;3>hiM-aUJ+BEKk7IRP0uz8tYQq@;L$ZGCW&R_tQ)|=~LYQdtgC2 z_druY|8nI}e~Md@sW68O{ZfvzeHb{<)betd3EoyVcKAG|uFg%DncKUFso!vchll#w zE)kt}bG0?y_g43i#^4N* zAy=zzJwRYUa!3gCiYl}AuFG+Vk#DkbC~{LP;ByiK=Tr785)R` z5{csgY^j;@!n`P-^1@8Ja0*_3u(&+4wr+}KGdm426Oa{8<+eICH}N2=ngemn?(gF# zz3I4RLAW|zM)^~ze4v5{Z#67E2w#X83!~!IvLd6?)rIBy+au#0xEC zr?#UOUB8AvnUx6=E^_Ar_Qk_g)ZjYZ7#f`5g$O5jSsG*;l^oOp{Hy?mr&;FCHdSa; zwRCJ;s_zr7TF$gz6>h7xh-IKPga0fG+t94d{};KN9rNBIvasd9my?EdzRt{MWf(2Mt z&XMMrilq*1v&j`P^;A&yGmY};-;ulTfDce2A_3Pk(2x+5BcoqOn*~mU4hx+a8Z31B zjV2=og)C~0o#93u1H=%4;-R%q3ETq6eHMrBWgbIa$-YYe7~~)Va#;A@!GD0bn4Nz6 zs$A?RU&ZgBvu>!T^dbj>lGQGo$IhO+tw;$kg)sD%NVWFH&5?xJ1OV{x*8@HxBHqJE zFrWGB5G1iw_Rc7BOb%P;5_y*moO#DF6jso95x}*`eT=E9^?TI|p5g@BhAi`WN~BG)Hzdi6a<0DlK}N*ca)Sqhyk$&w}k1CLau6AVL*s_Xo-Db z4&dtSZ2(~;0{!TfQ&M4XoD%f|%U^HIIA!BfpeTUMJ!EBzw4gMwfS;;21<@euj=-ObzK%GCLW04;w+&!W2jz&ows_}}!F-+8 zBfJZaehGDa1pE=iIf{E=Wwly#_w?-o`vA?($i-gw1e?=EEhFa{p^Y`p*w{sYk;5l= zcmh7;HSoaj27lQETrXOE@D13YE!h~mt5)R&IHA{qI}&WzvFJ>GT>3Fa1;Sa^)G0&c zgOfz#{PP_M#V24!k$uRO>%AS}DV8jeSrRo7lhd4}$ZpLtXACEn&02nSf3}`&Z;)Q@ zui<`1#R~5}lsL=Ong?4x74lz|eY#w%_KbMMU`G3Ep$I?`*%(?3tz|k=P>ejOz+tTguH3YWT zyU)PUqxj{!05JENK}V4@NR+2L^McjJI2s^@*YKQ$SW>ub)?E~3+N3L)!hy!r+IS%O zmm|SZZYADtDM-f)tMppoiegYi; zURXwz@6^IjL4L1lfHIM2RQX2LiE8@`d5J^kh!xA5(emTWB_M!IOnxuScm zH-7st&rApZYVUts@-Ok9p3ro^q*u`&?S7vd;yPk`cjyZyr#986m;2PZPvJvuipls0 zyYg}t##Dt<{aF~`DCX3=z&D)HATzOo?DJjOum3U(a;xiJ37AcWPd423C@tam!Y-eQJY_vvo+de*jLOoJ9eCgLIPD_H!T(5a>@a%H09)%D#Af zzGqRZ&9_{Uf4#F2b-nYsZ2B|3tp`dhh`+jDFYko-5&;?9RBlYw9^yvo73MQefA|1z zLWs49b%2!`WE`?L--uW$>we{%10q zcP572$^TD#cOTQl9mjFJ1qIzu)I<>*yTqqdrqueiTd?93|7=6>b}e5V@;k?Z#l>RlrCY>fyNFhEWT>Gq_HFEiP5pv zV4QE5qf1h(sDEnsvetMr`=N|k*z?lR-JuhZ8t$mUl(61n?vF#fkJmh`H`nIf%vMr{ zafg2$+|Q@`2U_1USET#v?#(8Gsq^~FNufgvhQf!I4y_qlJhXBsG<^nJp5v8Ut;$oj z5;=ZB1q{{^iHfactJ$hLqL{EH=a6M&h~N%UmeP|TNN@7o(sn!|Bbbb!EU3kT0$Cu( z%_}!gS*{EwHj%pp`(@!AXRU+QUF)oUHS$tr8h1$+X`;o$*Y?I1%OX27`AT2Hk}yrl zAY=XZbL;CYc5uHcG+2TBI1~-SL^ip|gq)JzU zO*9tuCpQReZoRg&S6`e>xOIf@a;aQTgv-pZ~V6FG58R1{-x{@MK#?teALYP5W~UC@S*_x2r*blBDJys^z8p5{Ga zg2lv*yXq`U{1c0sLxlS@?Y@_&x%{d}rG^n%PC?jE00mG01yBG5Pyhu`00mG01yBG5 zPyhu`;I9R48+ly~6;p(EQ$}f8f6_g^UidM%hGJRG6Di9iC6`GhLmWdXIbBGjq(XaUK?yD~j8y7FsO?PrC5eJ#Q`)kA} z#JvdjA}D|YD1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch z0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2 zD1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4CZClts(jA|?(yEDpLOKf|-m?)7y zXl~!oQgf|)W=&dT#8z?Sy>_4ehDL=V&dy==sioXz}sT*d*HazV5L) O>ysPBW#8OhNdFz%v)ItW#FC*nJ~_WMuLPtC z6)-cX8d?;iic!i7C@L*3&P>cxa0b~~soZLfax(MM|>mV4!jV02;b2 Ap8x;= literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_encoding_118.dta b/pandas/tests/io/data/stata/stata1_encoding_118.dta new file mode 100644 index 0000000000000000000000000000000000000000..475f172feff8757337f408f75454add10ed276c9 GIT binary patch literal 5587 zcmeHLQEOa96h5ngwyCnHEmWT_QlCn4vs=NW45Kz{tfa=8RR|QiOm-*Dr8m3Fo!O9m zN=pjOldk~-eQcpGHV=h9_Dvc@MSp-w#0o7`iltck&~xU_*I=j~y+Fu&WgzyRx8~xIx-3 zdwesO4ur>_Jl^&A-b<HM$36xR=?c+KOt9)I>ILON9}sw7ggv98#5Qycj@ zUZYP$CUu2ZlIIc~nZoG1V_+)PMyqbq6Q19%^DXs7m_}mb$XG|52s=|T*GLwWHS(Rp zsCg#WM95p<)#H|~xaV3)&wZvH|IbClLBCVor|4WY@>J3?CQ|iaoE}p)hHh09TIN_txDUs*!C+H5lPG za|B1wfEXgEgMt9QgkdOS51@oH0~Qj*Dq;ZRXxXAOY_b^BMLxlcP|qFTfa_Z~@^_Fd zma!gCPBLoukFNOIca0pd5OHuJyI?Xf-fcMu^mLagjT`Hr(< zdsz7xy`;{ptRpKdqZ`qfSaFC$V&zfKyq^`eBixpiwpIyNQ_so3Zs4TyMLD8K0laew zeW%WxOd}^OBTi^coH)cGaq@T1yq^=cBixpgLYI(3yM&OMjxm4n!ZYYNbtdHD=bIfF z5kfj5#32rekiEZm=lz7R9pScw94QQQ<~vN&zY{gPqP}3%G*w-@sNq_|Bwa<4SVknd zdVwT4^d!BQ^#xh6_+{i?KU4>(Gb^W&6_$1C3$jBpYX7M(sG|_!s>^nS96A)D|Ejt@ z^#zMe-@B=XpS!a3#26jd8p#|?90rZ=`Am|rkwrq51z8qkS&(HxmIYZBWLdCNSs+69 Ykt&Q$V*EEH^w{(77D@Z2-T!f)0%MyuzyJUM literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_113.dta b/pandas/tests/io/data/stata/stata2_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..09c90dca943d1cdf84bb15b884958ed941705b58 GIT binary patch literal 1490 zcmXS9Vr1Z8U}k`T316L{EFh&|sNkDeq+n!VX!4K|A}j=z-FrN=9K=j;Hgp0>9q{gU z0MPMPq+h|%*viO+;s5_XfByXb{U;@{B()?nH#I&PtQwPmD@e*r$EpaV z08<;1NO@{%c1~qHZgsi&c_kV6R1}sb7L}wH;Z~B7n3EP?nVN`G2|hU_ry?^|4Ndh7 z(jftg!cJj8h?kTzz}1xGBC!jR*cnLdN;n(Yt`W`dh~o$QzhU94H|?cPS?-!#5lmJo zx7b0VGeolwg6L(F%?^rvU;g*V|99V*KmtJUpdLm)tp~x>)TdyPQ3FTAkc?ylNlY3y M-vSdM162|u07r08-T(jq literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_114.dta b/pandas/tests/io/data/stata/stata2_114.dta new file mode 100644 index 0000000000000000000000000000000000000000..c60cf480ad5dd82db28475872f08a5280e7a80ed GIT binary patch literal 1786 zcmXS7Vr1Z8U}k`TuZEwM#2Zf-J2W&hG6IDfv<-ps3=9Vt6HsUdi{;E9@ed#O$N#Tq zVAxmxX)=h0=`ZV<+WFtmSiv{3NWsVe$S^dwGB#oO|NqaQKfizfNl7e8Ey>JHjZcPH zi$TH_BxR;!RRmIip$l1{JT)~tr!pS5y4?J{k_>z*3QH4Yn(E6GUANsF&cO~k1L zpB%DVk=Uvw$qWYRNCJeIswK{(YDi%q#R3M^l5$ctvBq-v-nO#`7|AC(^s zfzc44dkARUeA`~?#L%$t)f*t4<*vyU!DN+kiyh>U8KT(-LG-f8W(UQ-FaLYw|GRHY VAORqF0IZuJ^wW9}Oig`S4*($EbN>JU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_115.dta b/pandas/tests/io/data/stata/stata2_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..ad7dda3fdc4b38fe4a34a2615414ec13bae8389e GIT binary patch literal 1786 zcmXSBVr1Z8U}iuDMnDb|5QAt4a5jd}6N*2xfLI_|WB}5SP|d)g29h+gP;g64QZOE@qDw7+1VLGCaI);Bq@QI(TE8G zf^kMOMm+BZ;6i~Qm#il=;qJXTz?BLeGK~Ot2rQ7}2q|%a0H*=tM9EeFnWRijmG;RQ zuidyc=r*plylV}wJ=4AkxUvE+R*}*^QKOmXxx=%^tZ>I+FlF_}4lkW>LGXp#f*U%=;`O!_jc`!@RqrU4?+h2-;zpstuBeN~SmChkq}m z)Jj5-boV7`Ldo7wSYvF7=l*0~+FO?KL5!6F6_Sa`l$i2GKWB6zjQuN@9CM%+UJ-LA3mu6 zq+-;+Tjd>^XfM?7`7eBD5AVVKIi}&=`~Uh2pFVkk_Tj(#PuSwg3l>fPhtDjSK6%cJ zX`z3IXBhPZQ|HfH__v=>KQMFNlD~al`-U0wmn{0*PpIE8Ep+DJzRyhkr#DsDT+hD; zYws+YGIP#?$&04{?Z+0+n?Cm+vcNE*^Z)T}hM74}n)A0`ufFkTqW(S&{|IK7?t}Vx z%=qgw|I2Id|N8g;2VVal!ua>?|GzTu=Co0DcupmGwjf?OZS_b{ZagA3Rrk$Yb`#l*fm3_Z0qTT>Dsk`9XIo9lb_g%>71Km-bYU>Yosfdk9EV=HD;;H;4byMr)RT zJLP{D2Q@AGSY5(&7pnYUlN=$pBjq`647fMl`VvA5rl0&@0s3Da0%+mcZ!My#7J89q0)eAviZ<4pU&#~zVoC^;Ll{%jlDv+<9&e%eL-qR)tcp` zCXmb!UXs%I3%dS-O;t<2c2uG%oz?sF@T5wITV!#|?E=Tu2;AwHM3QRF9A_(ZigMK; zB?tP$v1Yy;*A-R_6GJ_z?0u6k9@~U9ho0o)P>daD&t>9zRMsv({otL1Lx$zMW znFZ;rvD&3Cm7;Hw3%6Gb8wb`2T*XT=I=9X&J2piuEf9VwA3kh=;D1{-As*9yR_EQ{ zgGx(ok}57r2z>llVC-L$+sjpmo#2q!29WFgi$4ayN?AdspC`Yc)jO=>s9bu7eAOvl zXmELox84z#peJz5PP2lIm%o=aP^j;fZ*mFx7*-59(g4ccy-Vzzz7ZN8)(gte4;VO4 zV8tGFanP)Ej~;}t3NQu5p8~|_?=+Zl3m*`2;#(oh@=jnn>uK$%r^r}yL6UkPW9Rr6 zKHeHcl4?vDtgd>Y4^?V^BUxScpbH zpOCH*>B8>s8xdxc74@-sj!F0@L?ukt=pm>P3y!(MF{8fJOu4Sl@$I{X;I~#p-rtCJ zih6EV0vwR!ftr;uhM{nJxCxf{1FUY^C_<%kFUas42ZXHSdcvG%sZEt?rIjEjc_U+a ze8je)fU~CBQEdUy_bd^?u~#JQ^GxAkw2sKl8^g*=SZRY;%*fbLWXuN?_26$W-{yOO zH3UWtr*imPV%IcB=$U6tq@6aDVd}x;wnFWw@tR|6KY2Ylm8dXjXE4OZ?BZc%uZdyo0Wk)y z980+hD;n|Ql#rOl5J_x9!+hQ%mdnjNV0GP$ku#wm01r3Rr6*J9mL&A7I4={>P#&Yx#y4K_id@dR~ggx6j9Ox93-C6qGP9O%FakA! zfa=Hbhgn_KqeWC1)0*C6YJ@b`&P0mt4J$@i>4Q#Or&-DGFGBj-A;~qS!>s<^>qS(a z+?I~)cSDG(b|OlxE38;qWXFCWOqd1{kBWfLfWY?gJbSCm< zcZA=_A}4u)@EbMA&X4`TvFW^z%{W*wMqOG)x%Ta7@vQs8TRS**74t^2M#faROV*Nn z{h}YN?7*-X$EV%-4or%)l3mj`|VHgY4GEIZ{3?GJ6Y(~a0DF7C>Hir3EXW3h8}1VS)`n_;wa9 zTLi~WAeISy{HbA(j!lrh1E%j-ostqsrHkF^eOHEz9odmcM==Sw=79Kv@TWA$K$qdj zUL{1yq3}CapJ|Aq%8eehF5tc3^Fw2E^EAa1@nwF0{CJy&&hcAyTFf zGS&u!8HthfLW7LI{t-A<#%KENG9AGVV}z9^;MI~kp)%J2qT~y;YzN0iBIIj;MDyuV zfN=Wdd2PNNVfBF)Kq`9E-Cj?Gh4%J@d2NQ3_DFIdNK%PdlK8-;BVZ*GR-E}Gtf9q% zXey6#qt*e>g^f>|5-EWI;)Gbn!2dI_lExn|9RVHK!{>I4f@4NUE5vf$mA>Zg2`Rms z68U!lkWQ%nIDG6ISjpo9mX3rz?@~Edkj?6xv({rI^`%u2Iude-CCU{)(6Tce3xY(< z1jq9ERQr!X9tNzgFlDp)-AiJqRNz6|ue2hyc~*c72c!!e3qkma8f4)9k3pWDsEY+w z46*SWsMMn$P32iK-o=V=Cq9B>U6JIekcb3utcV{yJ`mv##p_$JVsv`80aMJ2hFKX% zy|oVH9|4YaLsv?u{!$H6{97Ozd9d1UrW{t+bKXYEtRG0f7-ma`$LWaF=VR2R2l{0; zXgO62zkJFlP5NxAx&orVbbb?+8xNtGHywz$6Z6G+6bRE3(l-yhT8bne$IPNH0_NP{PKA?42=3(~o%DCGb+wi@hZF#nTyIeZMr<7dw8 zxDi&2(s@{^r_^&(FCwF%+5dG@%Bq~27bX_ASlHU5IAUB^3E7T&T zMWZVlH1enX^eHSlnf-6SEj-E^TmllQoJXnN)q&_gsz)r7QT+gm)MGsqMvwWQ%`aX! z7IBTo`fw0djJZd4Q00n+CTwj**0n}mib7yzl!be~9<}%dj#cnu>&Bu(oy|TrenrPv zUC>Y4(f5RQ{ia^%JL07v{W1ww#(rLjo2>KYH7y zPIx%>xuA@h1;}`de0?K6b_B85ncKV>hwz_3`d$&n{ZbIiXj+-lmN=4E0(Tdy12YY!+=h>(BmC!lz4b(p|AKn3f)&G7`&}3oW9ayY zZHeucdVy;ahFE5xUv{ASyEH35r%u%3s_&Hyi1Gc9-LNu_hQDznoxEP5lncP*StzB9 zjBNqO-tZOfpFsx>!^-T!JXYt}F%^SvB0U)AOnRE%2+ZpRa4Zy7cB1#UB4bba_J==1 zzu1DwQAK&I-gfdnD$o3kHfr35)Q|r|Q1TZeV+&Er6b!pvh~+Nd*GHAX{g`Hp3-VY) z@T3eXul}6Aj~qzCl=p%%Cmb1Df+X*OW5H5 z_yOUU@<;wqAgHP4u5IPA?wli1lVE{3f6n~F?nMoaM z1Bh)u119oXSc$<7lMRm9qpkA!u%9O($wo7j-*kf2B{n-sIm2u$@Bt)dvVm|PB4K4C z>XM5VYpp??Lqjl<%;^2zMJI3;8*-E~w`NhFSpg(okNILB2`gK`v3v|Z2OQ&$@(y;B zQKL@*Sp$e6bHgzz^_fe*n(t4pUo;ZwM{FBXf<=-FK*gS#6+2}zWY8C}WEGxZjrC1( zapIdxhvyJte;$xKQLvH>9V^%7=V) z-c+=k6%?kbAfGinjm)9Us#$bh$8IERiw%)aM#1WSXmT00qwWa5o-dv?4gJzt)o0&) z)|gp+j4~;6sl&zoWVCe?qO4jENG3|Dz_B@qrIx?>WE!eJSk*E9Bw{&q0#j@rtvD-^ zjrW@p#RGc)cLWMkiGG=+L0o4{NBE7A@yv_d2_i z;!`b&axfN-6r+%6$SMdeNI?j%z~{vJn|OV`2CQsJIT0j^=xp zzd%_#KnHT*m{GqBjukASQ777yw2JnGJG~LHoCcHYK>qjWeOL4R(KF%LsKVM&w&p@s z=Tel1lSMeS@pC3Ok2w+f_SfkBvw+lVS;7Wxep)gUj?LoLjl7W6zopsmg;S zJ7DLpjR&LzwEP21ZUT}xn-fRQfUa z`F$JS7(FO+tvFqK=j28 zWt5u~O)I?Skt1F`2=`8gmCF{{n@2HB zzqH2%-mg&pTl|+ej|)9)&q#-n+nISBmB-J>QT7{v2PoS zSmKH}uCMtNVi|XX%28XW*NQnLq;d#hI;6p|Y74iuCm_X|W9ez3py&{k(jQg~^3q?Z z+-f^@ahOj0f430nZ5piHu*iAtfb7?-bQi_aTSppgd)7bgX-=a#(EZgANGC=eWZg;74Y6q^>=8(-N1;Y5JftGhcOK;TWdu_Z{C=1Xp%g`_P z05L4>b`N(-Ni@nZmn?oRK>7D0{2Gg5>jTHmY2(#>!9vW+Xei8kK#cD852=hTHh0BJ za`$JEDCq|Pd7wdRN9Dr*e7@_ug(&6c*!f%VXK+R)zfo>#G7WbOBTJW@a?hant)PeOSMQn-9XINw1 z;om8@CxsSNMG%)W$k?c{St$9bRK_})m6NBPNl)SC|9_K z#1xwdHyeYHtH(lO!Bw;k6ef+|n!5z#FX3^01Bl-F{7cF`-$fkqDsmhnl7y*vtCeKcjO?Jw-|f`8GDUV?&o_pU#1=3 zuv-FR+)7_ld2TA*S@I<@*2BtRY#Yo6Y~!d=*$dR=AfJ=I41~FYeQdSq9IK1{`7M=> z@1+rBB}sVXN0da!dJTi83d4xHimFz|jtoa#Vi8Lae~#6c-grkPF^$@cTS@Nq@+aIS zY#Xcef)qU*vD9mj3(m`-W8Z4zKZj=))$b_tAdSu%6HY2a;LKBX+prS2=K&-r{x_}q ziFOeP-xf!n) z`qJTGavn-)EifVDNT%O+h~+x(wc;xbi;p!Xm#~JBZivNtAFU3bMqF1S7Te>1G{&Li zGxF!n)eSyq{t8Vl)H~P`oRNDqV6>*uM&5IYXQG900v4128;sVe;8-LayUUjjSP2~q z!0Uyk^Q^9}_B~~;?5EG3%^(v~0clf!j5R^VW~2JcP|8|7vsj6pc{mn$FF^F}<%}p- zX3*flOUc<#3t{34Q5Rc;9|{={!8bqW=Py`=-uDN`a(&OUh6UaAqLO=%rk)Qc_tFVb zB8!mZrm(Ucz3;0*yy{lL(Fq{T?Sk{H(LT~3DpxY8<=QIZc!oeOPJ@yM6vGAa&{9_Sd$B>3V-C|XH`kJkJapx>B0!qK${Nhd!J5Fn&sh!W>jzP?@-1cc zCzFh#GUz)x>p>)OvKdOGDW?&Oy})c*hq{=xjHPy7gHpcMp7NBkhWPI*BlR3Vi7*CmGb9pYtYDM48E=>OIc&fW~?Zm%%SN&#*%f7h7cv|9O}{n z=a`Ln@Grnh1;6{{8pQI_gw8bm!0HZ7vKE!1qtvE)GnqfSKT&=?i|V(;g~ZqB{a_7p zHgGM{(E)^61BiZrkB!K5$fZG_$s{>|!*W%MQaT9C<#_Z8}uD7LYntSH;0zoyR^DzU zDo@W*ckw&2^ROF{T3<&gI|+=(I`qqbGzgv{VgENnU>i&d`o+49DF1$*9-sL=i3sXV zI2JoiO(#4ZjRYxlidk(@CK}O79=qW za*+aOQuzEue?rs^R$&cU{o0F+heCJHJwtLXyWyK}qAt#GEDEJmA3!AYc8}G|qCT4R zDLB7O?I0@aFVXx>CFK5AEQ$elkg+a;5)h4rM0G5IcMgmJWG5cjE#oh;y6m5wL^-LP z&TUgpoci@7a@c)jtc$>{jn=HF_ZJV-VxVJheNVsK1c<)FuuiCcC4HE1k(6A(23v&H zf$IuQj>g>o4;+i*bL}=@yKsUiIRj$oG1XaAtglgV%@yJm+?z0iusYNLQlut19^r4| zr>xolD?gdkN1hj1Uo* z{3TX*EVa8Rt@)XjnC_6JHK@zhr?Ao;mn$)l@i>$+hCl8d3y3#1r2^9>R^RVw50U$D zi*8`A-$Q4$YtN9!5#0D`w@Kzcla5Y z+)I#4W1wS+NOCma;1~xckHy-P11mc=Ad9As=2pHLBpi5!ZDDNjt7 z@$;yMeMM1yGNK3xPsxU>XzWZ+VLed_?Vh^ksMtFFN@*mG> z@a}tL<9`MaW{VMy;Rj?zn=vcaM$sOHjhs* zY{HFV34W{4%=bsukn8CsGAZxq`^$GoivyVZR~m^N=Y<_60kK@vtawk`gp+u0m2a?O z3>e}iDv=FTzwZvY`-dNqdSG@Y4G`oBNwAWnS-JcDCT!zWeK1s5!Rq{8d5O}Gf6%q5 zcSx))7R9|RtPF&}CWA1Wv4iFF^YxqI*gg|7SWv<0D`pK8xi*YAw*D?wxKV^T(ij;7 zq#*@^Z!sJ@$2(2jjEu#qzajv{aP6zXqI63qT87pVv1SyJzik4J4Z?ic4TT9csf%KG z-e%OL2gYk995Yhap(2y07wz7>AYSu6A@WRHBzdsF346dXNsA?b-J)qSe&SGch1IQC z;Uh{{4PpdePa?XFB{GZIDGh;+rD;~wy7-UU0xPXy<@>@btX{106_q%n=pW8dm!NTk znPVr)j`(fDrhS+%G91h1Gxu%56q|wDo_4Te=;F(Z++i#6NLM{gZTlIKCt(nl;qhkY z3|P60j2+|`TB|qM)4{hluwrz6XvU_*it~Ro((clDB7fc-5EFhElYyPz7EC_KUk~02 z!dR&*oKGdITXanjnW5HVX(~(YMohrdH`R(6j|lgp_ucT#`}w*1wql2g#1KDTSjpn4g)w2j#Ja3eY} zDHsb$b5!3Ca&Z9CHyOR3!pBe929C8b+f-#2RkFsietsfz$3|?kwh2wUJCP`ZTOi4P z0=M-b$iE7)B=ecaw!uo7FY3f!#oWI%3_4~j#>6(IIh`jH?v(=^^GA{oV(@JOdlMk4 z+qLTB0fgx)tG^yI96L-?@x>=i=@G9iCy zi;9h%=svLtb!H|J=51?mY#7#_14weR2DyJ>J7W2vP(7MnWsS#L1c=g-X5!*WEUiw0 zmFI26XInlJxaKQD3STLQ)^V^BGT}B~8eS~?DJIOn+D3)Ql8dr3U)h(%pmDVjp_da@D z-p$4@RUE}3A3lX6hcJ;#z~p4Uv}Om6qWwY3U4_?Jz5TbNuqU?=w+8-!(&5bF<|Mi# zPedbUX=#R{*pdSk*c zk=fQvJkshYeyO&SFiV_7`(qQ1}9OfoV9 z26r7fC9>-P4^NM-1Gu3By_u$W@Bho6$Qe@>nn8d0Ct}Hzg^OqW&1V+Qm@;?f^zgs@ z0>=D-X$$5r`pZw4KQL?l(!YFP{f3zfmM;FwPnf@9dibore4m;2cW;_u^F03=tiH2& z>a4j7r!1cFmmga)f5yDOi2}ofFZkQH8D`deY3^Tsz4?tl1NGNo_**c;^c>R9W9I+x zng90o{~qA|zh(Fb@ZTc-pa0SG|6-K>b144#-+v|J%#`OP5W5w<%8~2I(m(~nD>>i zF7v4%H9jF6_Yja4LO~bGbKD+2>WZh3bh0g<5$C| zLX^tij7+o*SQdk2Kku0}KyV4^&pX8?0HP;t`cY|6E!li%yO3r1Ops^&PENf9gs~Aq zk~V-PkH0*X6C!*z_{Kh3&1bQi{_i{~6ZA8gePgc>>2P0Q!e5a3;CiE++8p+W@sgCu zpVtf&tZUlv^}#8IELQ8&%abadZ;>T!whJ6rCvc};5=p8za-5Ce6zi%&$`1^LVJ&?* zt~*$C6T>~J?0u6k9@~U9ho0o)P8jMn%jo@S5UNy{hotL0gy75u=ImKD5zSgBb zmEvxa^S4(E8wWQCT-8f5x}d=*JG6i=t>JzJA30)>;D1{-z#qeYR^vU;gGx(pk{T{n z2zvZjVC-I#+bhfvTfsi3Js{Wl7k>-_OJ#A6pC`Yc)!IjMRIa!~zHmwsnp~dZt#`yF z}rHVVq{56J!#u-G9l_9{!y zxFPteKto8CzHzl8)Nv{BD$_*23t5erm*uZ+Z(gd+hj{Jx_hJS?%})17$8f zChueR3f`vysnXM=lfOeSt%cY}o;<_k^2g(cLT?tna;giT&Fad6j8rLlLb^v~3A?|u zB+MoY>SO&JviuWb7EJEwVaO2+jk&@wy|%(gx$e*L?Yo80w-!X+Z%LhEpBt4xdjxr~ z$}&bb97YEiz{DS5HPeF$mCn8(0XGf^xyQAHImc4#8Z%2n+$d`oOw zV;3ggf%~IqM7uSEWY-T;kYNt2*gmTSwH2H3kz$NI2ysHgu?DHCs&O(%Zs9G<{cMVX2WVpSe2OzrD z_dkVTEa=*>GGT2rQkl?}o_hHjf^mZTQ^1nW$M537ve&>cc7W)ER*j`xl?9D@aZ*Uh zWQZiTrx8AH;Y(K-7Oq0tdmB-)I^O^GPliLRMt^z|RVuA$(w3{j8Rk8jQU_%J9oj`V z6mk@=RHpH(zcZrt_ZZBAImBv1NKg7~|mcP)W0$^7P# zCe%(hvtMA*C6Q^A$+e-?Lw*qwLJ+ZsT`0%>0hV6>W=Z6SeK3J#8tTOa7QJ`r=TynC zqn!*lg<{r<$j;qh*axtr|Gafg)O8M+CXfaG2HJd%c*-Q##O518xYhwT?uo zcLj@uNp=_j!9=JK@u&#+Of=YjzK2=e;Kxg;?BYPZW9|s1u&zYj><;%WO>(Li1iw** z?EKgd8k@oUSdRmXKK9~r%5~~U%Vysf-rB;jtC%;E6(VMCcWG*nuV3&3%MJ{SA-=h+ z#_Q=9Xt0jdVR60Sd954a?qU+8HUrC02xhP9zQN01oi#}(s|~rxi_>bV4D3qB=o$t8 zcxS>~8wf~q6BF%=kL5ziVSG`(KU7`7yM!ba=d!x)+BK9J(w$aVHV7fTyAoxf7c|zw z#4sjw%S;vG6EFgy*o=tv0*ik97C>Hir^TzD3R(S}Fu?}FeOnWkCqiQ<;L8L)>EsBs zj!kHN2Mph`8YMl3N*8+4`>qTbJE{wjj$#sUtpM?d;7_WM!7c%aUNxGMeaW}1Hb)mr zl^eZiL*RSC=Z8*2`f>;$Z2%boDPB||L5BhmvBS{X8nEarYhx*Qpf|N{uO-9o12Sqj zAZ;O-0JwY$_Dg)w$d3^4gXsL10MWM}8cSu33oYy0D9DBFiIn4mh_#1cMqwnqP$A>5 ze*}$H@;QFH3`elT=)uw)x?0*GR2SHzDfyyXc7$Q0;PN#<;`l5nP&oDSoH}2Qu-YIK zAXRRK<1qjIntwF-PLY<$v! zNXY~cNBA-Z_MZkzCV#wQB>KP}zMxAi4AVPUz?bW;^fh-+NblQ%$iEAKI3fGv@UgGJ zQpg7`8-@P7%dD~DJXYhJyB;H{Kdp(Rm#U8ZNDhpCyXaPt7Al+bC7~D@$A%pjS4DswlUQA%oB_wU2Qm+9tgJ;Qj z7Yo9j_y~q|N06tXMI=LGrTpmeL2!RKUf%+X-tpN6OffGSVWA_9RvL(ZBsA6oRVg9+ z%T!3&Z$T*Jq2_Wk_`|&bbJEtPddi&~4k$VN5jZYLj ze1eGC8)F{@mwv};H}{aKbd}Q0FZ&ShnAZZgXDT2bCZ;qRv$76(N#pY$d;<43n+@Z? zV|Dt`sgz3)Y3RgmEoQaK2Qt%iCT%>PtgjvNE=_!$ejYy^v5ItP|UNvOBedXxBUv4bZCr0=;gOeE z81n`Z+r!Te`P96P8?pKTqFsGCmC6qZU1#Y?vf8~A<5~pUhC0ew3z>S{E|gu;n#Sq4+p`bFF3M; zDpyQ2d23s;t{w7H8U~hN6Zd>Qa`6ictK!Etj75by8-1+(N{_Lcke{}r?g{PwRin^< ze4r)QYkrr9(T|X?Hp*#WhUxn60-j#`q(*S-^wT^ zk4Ijr)Sdqr`dH#ulsgefduHm1P3yM;6Fm#QOh$=qf_k%%eZ6sXhY2X;OFSyGq=40o zYWoeEQV>m_+M48_dnGVW=OQm*CV9eExcoH?Yvf(mO@Lok@WqoaV6|KQ(&5W!TAkj3 zIFMHYcNePzGaaeihL2^z{pWn6)kKK@y!l`S7Ts36T^JT)==g^nh|QKpfomQCUuL3S zb|Cw^RFy!oji=wZ9b+d&+k@JPGw;10~0n7P4BKDf_5A zYZA3=){iuf|3grUmLOt_kjiunyIt_*F5lnB+y?h!nk^|VWObpFv#GrLGx|PeFo{s! z3(DL`L~JR7ya$FIN04vu^X`YJ`wY%zhCU2wdTIIHn_1W2E$)O5>>c=q{2;B?W5*eb|!Kib6)aKC~-@`oAnOikr+ zR=Y9dTPnSmsO@JaGH(aEWz=f8zZQM$5Smhr+AU9Q4~3Ckj83s1l^$nx($qW@@=V$g z;!lcfv_x6I2JXk9#B$ZaXYR>ibTW+WXsp@80HQB1`wsOoi#pT?5}N@!Oysp-iN_9; z2aVaGtcv)EpC=>8dLuf&;RLHmX?c`#x;a?j14;Z89pOI2fMp}{Qh*X`r$U^=!!VMJ zsQtdBCvX-Uc9b%=W>cTpfh0+b`C=CXmMzd&5eA<k{tfpvB0lsw}4fGR82-}Rv#yF&M7a%1ls}OX{3VvtH zX%NgTyq-`}gnjI%e7OHPZOrl{{*g9B2{XID7w(^dFHK10L%z0f8p_QA9j2zZh}Auf z$)`;8Y`U&X50bmZn#kYBf^|Q7awWE-o^ZdBFPl9b_0rYcpM8s1eNOE$%B0Vu_7?_{ z(N@ig61^Uf9Hdf#VRPY2J%97bbYy?1xsUPR!75af17XOGVj zv0iZh2SYKdYaD%ya-X0Pz35AF#*<j_mx7`9r1qxY%`bdKIPyUDoELlV@Z|O;t>0bl#qe-bZ zL%ORPt9>&UJ#8^>9!sUzEwe9C#Uq|>ELnhA=}MSiccNaZkjm!pMT>|%NIF=qqw}{X zjeUN_tgPc3Ixj%Pd~gzv1&eNsPSPdeQZw3sz4)?o~*hiODh+T(p_>x%4as7=a;mf!i zRF2(3y;ja8Vb#M3(>W7{)tb1iy#Xmx4a>?5heU@VmHuGS$;*DBa@+0H#eN3y|J_8S zx0ztMVUi2o0okvz^b{jdq1_=E4?y%cH{7J$yX~~of*Iu5ZwA8H?}IPDppXX-J(kAcA6bAmt1UVBwVK~u-rDuZ@h_9Y@Rw^*YXSD%QhIc z91yL&+g&Q1+<|Mgxn%SA0%80!AxQ9EXR2r+B zN0vMn(E0bn{W_Ck;{(Ias^ist;Udh+ICPlzfau-r9#R=wY{AM^l8jW;_Aj_5-iE`->eEH3UI~3e27a+(-`TB^( zm}qMun9GLK*kSx0s^%9SDbUuHE)l|KBPNku{C|9zE#FrTeHwS}|Ys5lg z!d0|2I!q?NwO}d4U(VzD1`w_DxtElCzKc2xS&chOf|Gb2e0hWU0`W*%6|!;MG7P>= zNag2%=z5-bMV0hDG)r1VX1}1g?#PEPZ!!1?BK8`o+|T!JwOl>EVYdWCzm>kG^1KYX zv;1E~-w2kW*fy9C*v65g$`{DXK|VigIRtYB``Bv3Syq$q^IIw%-%F#&Dw6!jk0>c< z>vasCDvTiJRn*Kfc2p$tk^o;q__M6G;>J5FiJ8=T+$wUfk3ZoqV%vz;3Q}AEd}&l6 z=bcxekA1C*|16$a)V`z4gG@SmOeCoegE3Fd+lGa}Jr5)yNx!MtPqB@H`!+c894|S` z>K|slp-kFddNXMjPOkn${uU!D&k`eO6qH;I@t@+8??%D><@Ht1XchWcAYLyroMSZ&_3tTj zWj}rPY$lnQ0Z98|M65X?HV4^Xj#Sp;nZ+vX%mG;7y#Ud=UuHzPI-7tvGw*bou)V{9@ z@oI>Mp%WmO+r{TtyRIcPu)3s>gaGIc9oPsQE!4geyg{~U*aO`Szis6EIcm=EZ zy-X*{@rUV{n`=pSA*ynEDIhJuvIetqsM=uP=dVWV8-S){;akCKzfaYR%8+mA><2N# z(Rw(Mrk;W?b^^0$9r9vSBbL#14O019each8>J|iAh;q+7y6932S!X+x$jT|SzE*gs zk^uL4HI+YaTZ2M2V(@kUzJk@aX~~N6_xUvI#{{y@au`u^&mu3aagN!Dr*HyTs`%Y6 z*T9#T22`fu2Uc@vvX!Wm9;Mc`o5_OF1BvqM8Dzf=E+oE0?T4z6GeK(+j?NIw8bGvz ze5^&La{&#RB$Kp24$D;qQfV(Rmy%E~ad7`EfAQE_Y_mb;vlt+{KF-ZWd3zyk+_r-> z4jDqYk{?FRq|3 zLzW%!5Hb<9zgP8TOq)0~SU|b^w_o|FK{F+W0FS#T8RjB>WP;MeIpZCqf zqW`=&ut<1hv(He;YQ36Ti%Or%2;Vt*Cg(os-s5+KXnDkQ8r4nw?++F>eL zU5sEaDr3&jyJg>!dHvl9cfSVN?}EITWw}djeW3yC01*IZV3@x8c3V+-dX~D2-;$k& z-H6ogI#TH*FdpkrFW;yTJVV0%Z$yJ_GAO7QtM;P&`#E}i)^{W-q%YxE>@;;wcsd#b zS?)o^GWm4t^$<)to>(pA6;?Y?bP(mT3OejVDUqkR6XxIzuyhq9F&6D29mb^dMT`H0 zm^WC3)#VQ8Br+Zf-97I#$-m@=Z@!7VIK!}5q|*EVB8|6wY`!e&r?x%?=a*@nMP>a( zTC}O0+~0~tG4Ku|)=f|X_%i3fJe`3tNj?`KC*PQ6U$wZBXp2lOU# z#C=4po4~D&Q(4US7Y{Sz(Z}BUo_e_n5N+oXPRM>WeVBZKl%K~2TZ+|z>yDlrhq?a^ z3`^t-Y&T%La70sb21M6unzN`_U8CZfE5s|bFJXpYb*KZR)Ewj_xW9>?8odE5KN-xA zJTI{Nk^Opz%J^DZKCYI0-VDCHXn@9g2(ouP*3m?j#p&J#2~64tUy2GCdv=%XwT74AQ)8T7mes+E_ei;hz^s98d=LX{L2g(1b93C z6031fbVIxNjh4NAM5;!*67B$odR<>+KLPINBVy5fa#bQGayTBQJTY9tZxtT)7p0x` zv>~X8Sh@@#oZ&ruaTAzZ@mP8b;Y%d%@3|3Y*3A0-xN|J}5ge0hWQJbvzB_I#f2dS4?t>zF| z>Yq_%+i&FNOfRB{W{9UCE&UR!!c;Z;?z_zwl6Dvtd%&U_>ES8Le>|t5yYG>W-wYzm z7Cj8ZuQ^ILV^;QqVX3^{G6_Mh^Ud*V#b0LiYcGN2(kp5sT_Y2(3?YoA4ypCT4@0(L zSp0%Vv}j77C7~zJ0i?|EBdduV$B9z*Yq~7^CP^CVgKH>DQ3>Rb%5BL0E!6u_zG;6F z1TzEIgO5sn#LoZGKv8OVLmS55A&#<<$RDvH!wkfg=XR`c*YSw9kWbca!i{1%eyh;Z z_eWM&;OQkY>F?2&;YXxin4PJE1bIR#SW;D%yWefXHcrhChDxefjo<QTp)@x;EnuNwC49xR(XX zU^Lh?2xc>Oup)keb~6myXFvptt5|K-?7%$MEhFyRLCq8M4Y8F}f2@mdYT z^wf2@$fRgR+cz(W*Zfb2Jj(__9x8Cc9%xKbeF_ zQ9Z^InZ@jshM|vTsx0Qb_y=zROFOW9S8|2biZ#BXlBgH`BN^%vGLA5FZAIAuzfIV* z5A#KaVR?MczAczyGjZF~5iGiHzP!jCwh)hW*V2p*lZZSSgRl~hH@jwo4xeqA0~#i~SE<(!r^rSV&qS`+jH_ z2hjSap!U=Gq^aAWvDQZGn!M6#R$n>5Ph{>`i|yAor1ldpEwjPA|qv1;$ zpL1**SSo#yC;lqt{>2gKV>V)ZLJOMTbqe8L*~2h@1ouC?giPm9aDIrycDgE;KN zr!eFYCUQBHoW@tw?Z8oVAY{3#u)0&Ma<@qD$ID6mqWGEzL@< z@<{>2%9!JK9V|L&K#-_huoK;{z9!`EGQt%)h{<=yW4;ua_hdBKQa)*S3i1+AYQ9*x zhWq4kBSr2|OR=}t0}^>TipZKyVuITk{3z@UATNE)k2z}Jq?nJQWg$tn#_Oym!w{AfyJ)QhR+S!uvotmHbY)hCY-nQbk_BW;i3 zmujmBv(!$O7+dB%^XGpy|N8fH|Mk2EuXA28TJxX&{r7*p&8)v^VWGZmGO7Q}YSv%<{J-D# zT@8F!&#ogUFX%eJ!*gI)5AUG^y_mOe-u*BCMNXf*;NO7%_;1AG$qN=u|C`S&m_B*V zjA`Nj5v=~v{DG18z zkJVjV>Q3cXKas}NO@jZf=Yo>(h$t1m8JSf!5Vl1Sw*9N%051|!GW2u?AsDx~bdKseUYm*cv_if&@KCzZW#5XNJhux{)_L4Ng|FifLSspAJJiKqwbC1pE0-$9txLd*kC9%nxOc-&C% z&B9ksapALAU1^Y!Dus_o_oz%^_ji_r*MWmh9lwq2{@&%Qqd8$2ve=(`x?|Fo2?#XwbPn?M7icVnsld7;No>e-qnUS z^?3UjTKKN$|O z8vW@{s8V4?6SrIu&M@z=2X#R8-#{+9p^>9_r6QGI{hblLzsF!E%pq1AQXC4ou%^Qj zuL!P7-w8}+M_RMzEy&*;+O-(5B=MU^n$SDl%yGerE|E;7OpXn$9`cJ2AA*e4ccC2j z2dwn^mz4y5*n1PKOhvz#U`6j;@)=dq?Pw>%4WWp&BC>NgIQAY^T;P8^>av!98z-pb z@xOK4kj-k&ESZP8w5G$Ho(Ru+HYZX>cbeZtV^S(y@y%Pou}Hq?y@2X}1A+a%D4W&3 zHO{9@pElGx@1amsV@tT^J&+DS0{fy9x53InJ|vN#E|z%RozG@p721p zn`Tdx&b_E4=}a8w3c1^-Lf#FefT!}Oo@e@Iv-;8J7C@BR(v`~_g!8xB5>D3#gwdI# zI#;ka4N0EE&nl!)n9itv0)Lp*)I3;3l`-w;U8YXRaP3N@*uJo$hn0Tl#C58b!hs^B zuOpINXE@Ai@4j3_<;flBr~%i7m|91oG`PZwg-Lc80K!D55b>x8_zVbaKi|WwZt%k; zRCaNo-qE)OQ&?9bZ+1ucmL@sH3xwaOLUw-W2aZkWeXPg9iazGzGRk%8NK0qk6JFcG zu`8H2k`*#$mb+9n$=5FU!O9K{iy^)_tj6o{7Z6xS>aeIm@VwfMaCa~XQkucaP!MLX z8ot5HU!66HC#wxPsEgBTstoK($LN{_|2SvDTpb8Va}%@57az+3lf(GJJb$n{pLYpK zEXrYZ-L-2dGo(8$w`>$bdUqwtKre8t1#YlRP|FMz;uA0eso0E+^@0`s_$`3E>`seT zJ{B_jIbnhgLin~OE>{G{P9T;EeB#Lwkd94|z5|ADS&fnwO{EJx={;A5j2+d5NJlXV zxK@DpgYYL+$Y7TMWUm^cWMBL(tIgKMQ000r+8Fpw@cE$=k-i)PNE<*#fQlDYNYJ4G zWb81wwgy&ombEdIJJ6e2x7U(k_W&6+9FVpkOaMZ@3I8QNXygY-_(3TDB|!A;hsID@ z<3daOHVJY;dm?4~AY<)8m{Ax>&s50xYaf7P6@0eeF2fP*FnU;N4qh#36sq&>Axgeb z%Z_kt6hgiVNGzWz1q!F0pHt_{5mp;y0;H-h-R<>ASZHTQn3qOa>4YQ)fh5(4C4~=a zF%njyVa1t0!s=Qth^2C{8?_32Dr|hzf=EdO5J$u^2L7Lhl??uP`AF!%9zMTI3>?!t zSRj^buJk2$S4ivIg2=xMfH2E=3WP|IS3pT@_`{s5?)V$qdvRr06(^f4?t8vSp*E=zBuDf7K-PANgF6ewhR-V?fId_~slybjHw$;}Cup8hNts zF;*M*J_(DWAH8MWDAbRADkx)S0W#hsU)zX}9YHL%#`dqqA^b;>zGub9SY6tN6e=YJ z(BlpoqMZ$nxy(Sne1htK2^~9!>RTDbr17XrmAdmEgN`M9MY$7!v}cB%*tC8vFso)F zmPu%_O<->(s;@VW?l1w3e2GVA7U#2?QEk70CFFAcGbrqyX3hy!^caCfjeFw;=VZTMIw!hgy)Sxp4_&zlcc zu%g>)w+q8!3>{zJf!J(m61e6Oh-C))We2LiOSSTI`b0IZraq~F=-&<74J+elA?hN(%bk-U|ueOW8tu} z6TQC`8GFokI{XRx#Rg1{DJfvJHk0>JdFCh7vROaUH2x1kDO`+(kH02+DW3dFJ;kbRK8vnt0NmwyV1-Zm4a_&BTac>qYCDb#e+i+J|= zL*R6)kg=7Z<$lP;4+y`UKk|ne@dCNg&i)G}%{!e0v=I|Nax zQPuL)_E0$4#po3CLCJAeCr!ykBhR3XA^xP$MoX0SYY=`cS}aE$d}d7!qm$rdM`O(% z1`vH&>38Utnbe^nkk|~+VIr@El{oA$x!{-`+NzL`_<0hNtT#gW4JTMla?7KX)6K>L zA4uXR>j?Kg8df%nuh8RHR~Av zJz_a@0#j@rtvVx;jrUp*#RGc)cLWMkjeeP=LR@D|NBGT<eH+59HA4y^NhGLS?{;-3r6=1E5kwHxSE*uwr9$+B*ZWIHO;B8H-qLvQ<9iqQY@W z@LH>8>eOKfB z(KF##aB)Mhjj@>3xRey&WD!ZN{hY~-V~#|=^(A`$3?NNvmau^vAD7L9W3zbkMqbQn zpEZ1slf?>pvBra>+GFQ$NCcz|wEP21ZVr+-8j_mx7`9rIT~4m`98H)SC}MG zMj3?nJ*-p_I)LixwHH=Pxdui@LM|d$xQr%owNj0DN0MVAtx{Tvn0$qD`Hi>igB=X)}uu_AJwIS)@ch%kxTx#Akd#j}^ zWp(`vYp`7;(lDQyB&N+^K+<648kE01Y3lPUW@R1U*m*uO=7W=X46NwKPr63AvP4?; z$pVt48BDkjF*_@2(J!5Ff%hwv|0X|X&-{PYh2c-K8rvncRC>LM23(IIjMX3_`KH6M zYY4w9iFr_|LhL$(BbI~`j_YqciCD&6r*g~|>a}7H39BAPn9dn+tk%SB?F~q&>R4t* zI4C*{rSyjtoxJoHD!1KEUF@e5|KCkSdYu6)*G+PPJ0Sa2D?P;sbZB=F#sd)jjSV*_ z_hvioG=DmI@|%G$_WKaaFKFaJ1Xr8L*fl=yTm%N+2DDg$;S_fMb2q6{vYlo{%pn(B z8VT1a6IO1SBLvTL)FXX$wS%^}8ik-hTe;Q|G@*Cx*rqW1< z2(omEktmlAA(r1vxI@9caz2uLly8Vwgo(BmgsC)~#t!565I4PF)B2_*B!7j0NDB_b z%0pCNz+LP`IF`eQSuDnU*@)dz4=cLNRgF|Ry_4E*Tt>26`Vyuh2T6Wp;;IDn%RLqH z*|&?;bD8-ldYaWY9{!zjd(vo8O%!oCjf_n@0>_@1lsN<(GanQ7^W(jjpkLy!j?UxH zu$ro8PpLF?7v+lAkhoF<;bvnHa!puBOt^}+hQegQ}FCdSkRUsS4Eydv5gi?M6h_2^}7gR~xLo=n7WY#l^>yA9c@*0DW zAY(64%Kd!rR?F1m8+J=T^jqmmD$h-)JIg*N`X*Qzifw~=k8K<^s(6OF9OUydmw_;s zv5&1boMkofKfk8Z@x3&PtRzVf{D_haS+8U8RAB@$ucBrvV@E}zF7b#Zgg?t_%dfwo zl9)lQ$E_rH`}h;?BDRfHT0x2pKrBrvl~`+I3bi!gkA1d&YnRCZti84WAP)qY7${~VBHgZUz_ z3_E|9*HoUKL2Vy~lKwOROfEnvtpp})9Le_k7O`C8y;gjIVez5LEw4 zDmWGm$L{dugH}Sv0`YpG;T)@JYMR;MbSXI#ZX!%VG3sK2@WUbFVff~!{QL#0(EI-2SibK$R=1$1 zR#fs2()4qYb>tgOMT9I6WJ zyS&wqz5x&=3*T~9`+bUDREB&@XWfq`j@HA8H02axu@jh0>rfY?nz8h*Yf#FU>QkO_ zRyRM;LX>;v(uJ3z$vWGiL{?5g`dZ9!Qj5 z&!GBka3S#}dOuW!oC#WsbaVz`)&Qa%*L&9l(!eqrfoY&(~u#AEB*nHws@qL2##%6HF;gHXb{E?uNnRa zoc9mfh|;VR)IMV;X>{WV6Rsd*?F6ME5zqR}hxc55cTBWOOY>un^Q@+oMGH~deS+RH zrIDX64?JtnfOG$sSV*!|Na%4h zq#Xp-7Z82N)h$J3UQLMEX1_o}gsX%h>9^}}l&U%_fhKerRP z7p2tU*EG^}$sN_NLhpA5a}$X9yl);B{pZHQiiAft`wSJV*6VF+QR#D+c5d!1Z@)xd$1`;M1(vgD`1$Vzq=9F@D zM4s$Un1k10rK=!`F_4QiIFrT~F8UK<-e470mouP~$apAp_uSJY?~)t7`3CCZ498+n zO7jDVRNnTX`Ld{=Dt!vhFH<{<%KD46a8nt%w-t+G;B92Ao1g^7Vj(d*mc%;;#R0Mt z5A2rl7g$a1&yJ#;Qc35wuOyBGdJ{R~9x~QV;MT^fR?PPo^%-%{vDdz*o^Jv~+j)c& zs$Wg(lP-|5^VncZusU$vp~|lPx?WSAMaAkW71vxQUZH&n zGX$$c9UvvBL`THOJC>iqe{&X_?_RSz3p>Y<&zXJ#o1b2N_R5DdYI#-tmBV zV^bG&T-|*{niN?TuFz#y-0@tmraZx`dfXRIXxjYU!mW(9F@^2jyz~r%5d-7mKuXk`0<@!3>bMzw+23`3@ z6Lic4kDwEvFd68PwS42}X2>AG+wqrJjeCL{8tF>70~qRceNp{*grA3u zt>Tla5-^d&@i66);SzoxRo`Efb~ez)ptr=*WdPv}?+}Zdz}$?((p!L7B6)w$jW{F6 z0&)=$-LjS*BKN3~CWpQxjd_SAk`bjkcQyP7pOdPU(a9S@eor|05ggOIck~n)o8PH* z)Jx)3=T4+7OtqB$f;1=&u^d&ca8Ea?8?1fyJwqj{IUMaFO4=vXaqI(fKE;zTyEURD zc?e2KyjmAkWW_H5xvxG*tz@;DL$K2Dgeu#9BR6Jv5k)jZJOydVmsl01sMUAhZN8AS z!?4%`E4q;$o}&E6QyRMaF4_3aAi`|X!!i7TtYkB0Wj{EU!s{&)k>onxY`<1~C97Y1 z305w>pf=J~GV$^d!dU81T2K5IZ5xKgFL*=?QTj9ynmikjQp1m|CUP7nN?9-I(ySXK zai|Zjp)5rukV7fAq53z`???H!`x8N!>9`(zQ2Zly{tpI6@B0^FHwnpOSSuMlRJO-5vdnuXUZT!o{$17DXNt_-)+J+PR$R7 zimOGaTDzKn9DdSZ&p;!6Mh5 z5yv*&!3q~lm?O=QF+kp?Vel=6V`q8CiJOtJc=N9a0MT9jVyGzH)QG0x4MeO9Ci2(K z!LcEjFT0^I;Rf@f7+J6xb?JriS`Eka)OEPXBx^<6SI>yoypM=H(*{W%DsaLca7% zYkWl|K`;78GSnqx9AW0zin0TKo3Lpg=8Fu+a{26iTQJ3D;I^kDtmwM=@*;QGLOjx4 zOVc}iLgYypgcW$a*)I8|>-e+bdYnJJ%brDY4@GAN91SG@i(xwgSX} z-^FBM=eGfq5AxSSw}LPh<`vGTn$;}2Du~Q*E3rJCrM4p{;OU#$iV=?p_oMgS@Xh=A zx%;+ahl$1zKVDqTYI|f8QI2mWelggBR<4;qm~9xhxhB<0%^n`jy9W>J&5#DP*fv8o ztNYqb6s1^evHxL9Iyfa13rQPT2;IwKiJUC_`H#$$kR2^&rT<3bCZ}*~hlQN`)`##9zVOzc>OqW+TSMx1f1lCll_4 zJsk5#k`H3=Z326fAgbHd>f-@~;R>t078igWriJ+IqZagt*CfKxc7Oy3Qm+G8NMsda zGi5t;Yz0y=0T5l*jsQ`ywiVqcHmAmaB0k zS6R)HT3Bh{8Gh19QOWWIC2ORxeQEB<;&}K;3zr}wA@vEmDSpP9gID> zwYW9t50nmP7B@%HC3PYiIY(7XGn1=)k^!+YX8T=(6`eF7NK`J^iSAck5^`rL;R+nY zq&wp=U-Hd+G6c4SPu!i1x&)M%FIKMNK6%_okvr5%-S<^|3cN>Erg`EN9 zxsUlVNA0U*^HH=kB+=G*jn$+-A0aBgwiGX3JV`=ZM-thglUR0Oj37IdnVF0qO=*mJ zF~vM94LFMxUt_iUq!A*st)+OR?NR(vZ6#rrIEr@5#-jQc5Ze&^{;iF%F*OC!Hx6|P xEV;(&K79pRrrC*w6$PYn%^F{$E9G|0B`c?H{+^ zLkxC|x7!ed_b|g*hQZ}9gLg#egs=z$uHRpH$A?FTK~!Y4p|MAw_NEz=+xK?&;M#li z8PLbe+wC8J^>+J1i^$Mu1Aa~YFXO|fgXaB3;D^sXy7%{!KYVtx{_x<3k6hdL7v zhkL%i5B~6(_~CQOhfi-ei&XEKp|Rni6C%RK$0tOG#Tj0`d;RXk|M1V-?VldDfTxGT zSVKZsXzX9r`KP;>8KJT9VgKQo*s#!u$&;f03HGO3%L5amrpNv1tG`}a9tfX4^RHk2 z`A!uPH8cLt-~Hv<^1!6%@V|Wbmz#5rBtBR&4|{J-ABPY92Q4UG@`*S~!DX2$fe z$p7F4hKY{)kAG&E@aa;-|M>HkKfHI;|Lg`Z`R{@Cj*E(oj|h*8|2seb{ZD(l{ry32 zDJnKHG(OJIxqmPB$^ZWGfB)_O9^m!AmGED{zjys#zU};f(M#TLfAAJ%En-I4c(t03 ze_!AJ;-3HB{D1BNOMUz22>+uIs&S7qFk-pZVyE37#?Jgq?Yv6;A?i2tKm2{dg!lK% zhrhR-`2PM6+#k=o|Mv%(KYa9uPyXw}zq{nWv|;{W#s9@j`O|Fuq5gPJoDv!vIuS!N zrmygcaRzU@GY^v^>xC0G@8*}>baz;KM z(%D1!qm*rDwdK2AsdVTvc^3Jluq6AjAk{r09Csg(hC*Ij%5&USKJlW5kanyYpB3$6 z+|KIG&vK>ms~<@1#+8EK=I4U4{1H*gelaogZ6PHdQg-kjIlTp!AUD2Uaw;Ht(zF+q z`c{)wd)5g#)=vaE{8w`PIUtO!5R|qIQV#It6F4EpXPF`S!2&*q)wuoUL79Lb$<(Xc zgapUC0u%j=)CAU;?=HQgq{@J*ez;jWF(Og(agN3i7Mxgkfq;(j8mD z$EJmPF#PjtvF%whG_E^bs>e2tvFu|QbSuU6nHo|6%IwI>amhl!!-oQ6|B~F8V}aNS4!JD=xxzpDy)UGc73TVS z@H<$o!+eg)r8mjv?b3udE|2l2--t`lBPeF4N@1Am{Ox`Hgt~48Mwg)BkfJ*%^`+dc zTg1-kOX1D^IzjP%hv+|o6nn(QL6y>Zaew@jzcDE77$AB-rva3UyGO{;uY`T(-vp+; zmevG5hQ}HUlGFtrJH8Yy$(A$gm$P4GGaNQIuJ9s3o8X(A*)@ZcF{AAcle z0QhEQkTYEPovf}Tz(kdTN2Ft7jF3L;8+hRrq`C5 zDA(~RetnA&{Mw4hJFIED+s)VNpxia^DtGUO+KNA=fulb+UGsLCA`B;vI6A7RR1H&HPF%YTLl7_sw48| zrnIixOF-HPi4MVPzNFtWf(erG#+*)o=oZ}l7=*E+i$Y3-Me`BL)MoVf^Oqn@JD48| zDVcoAW*$(jJOMtB@96CSPx~&<}Hd+OGN)Sl#32Xv})@ckiR2p*9_Q_&aWD3M(%X5*aa!NG%}Gg`)p}t|DS}^Ab9M4 zTgq|2LrRwqQkL@r-%|GghI9+k)1n0v3HQ-0{v4Fmqq-W z6hS4A-;E>2Tvl^(=5)lR2_4w(iSVp*10rR2qAy zn9FM4n4&1ttts_7_)sXUvLjrBPH+bx{@sy@Yau0$4@x75i#4u0^0}<8{9-g^E<4bE zV;=~&GaZQ1rVEuMotfi$pxkX&A#eLoz&G;8pXV5IS^bDJu_#K-=)5^K!r2?m2&d}? z!syJ>ogQFsCY&6>Pc5LRFl`Y1<@{b&Q}rO8DkGcI+sqvyyGMH>E$$8}dPwPoOkAQ$ zDd;1j^tFPM?-=*8+S@PVsT|sp4(okYNUnAwN=*+)u`QuN8^XH%|qD_S!3 zuJGCpid{m#k?O%?mU6dIb@G*SzL2sW&7!|yAFJ_v{5cA&6LpNQ5j-w;AlxlN;u{Ql@`LT54S@1a)msls=x|SVJ?zm{BdWRfx~vL2$(?c&rPg=*O%E zqct6gZV|69T5yVgYdqCX%_0T~1; zo>w6Odj`W}d%?AZkfO7$PNv+huGFT5mJGZL$S`j}nt?EbVe)n8FYy6Ghrr>xQTZv*Dc1)cYXQOxLrZ$5LdIMf0*;mOxxSl?`!U1lA*BI$HM3Tz z%yU3dGN4+vf?~s9@?}64^Er~gaQyihwZH6VwE<>8D!S7xo{xk$dwasXG(k#hI5_|$ zse~;Vd_cpYkdg!`&isB>*Cci^l>>Xydj3y^6^|McDV+e~1Y1Tz{}Ygs%^xWpiaM~B z&ug0u#q^F=u;oe*`jWdXWOi>zTVJS$R@Zw1I; zKsrFN5SYI~h4kAo9OT)AxR@bDmzuVWN?m%>ES@D}T&xIpbO;pd2q#ZOiAV>>iun;^ z0${#3uCGIi-s#CQbTLmFW2GZ?^)w*=P;jghvQk3yXQ_~qUjmTG11#xgJjiOgPG3Qp zrTyrXLAGS@at)Ds4M$wMAYZ0|mJ?O;%O?b?rO&3SBOu!IXI4_V{y>_0&4GxU&|jPb zL71*6ebd3KS#a_pKIC>F?COuoKL-$Z-HVvc$YUL;IVXg?3m#j zRx>hm17#jlS{?38$h0>CH!TE^Uf|egsFp=AKa)?M_7ObhU`l=vSo{sEUDZjZ(j`h) zJ?}=ml3ohj)(L>Po0;PI=#_U6myP_vdmq94RTjngZ&;mvL@bd zL@4`0u?1i+gZ`hv%LyYv9$!;l+ZB+am(D;+9i<*CyAiQ@tss3N!D9mv{Y5BB&s8RG zs2zzJri&>n+O_x)`hEnY3?ww~oD*4)jA>kNk-{S`$x!AMJhqje8uYPc8aH9|0Ytl? zJcG*j30-3CL~@!x7v#pFkV4H;_F`n^8{PA0z_uZs?`Iv4*9RL-YV+rxoq zht$EAP(#1_lOw-piBTJeiF2x~`5@_b0 zE=4SUf?^f?sM=A;P-l~mjc@T`RulBYI^;c}9lxv-+=f0Eq@Tt^%1F?18Gd;mAUad< zxY00wClWc-aG2Gmyi3QR=u2lfISq7ET2h$^t8ls&Bj=4-mzI=k{e}Ou72GOr)64S>ZE*0v`e;9Ra`InSC>Q6go z>xpfX*8(#?9JY){ime2Da}a&KX++DhNaPDVGP5X;)eLL)Hx#7+nmM5fIe6xUz&wsX zTtdw9*flWuD=1dS_gFF(cGZI|9(*3FUE`YxTSm~z%$CHFyb!os7#)~N2<2KlmIL#j z@^$sbf&6DJ3oA&`t+C&XW-*eEx!;o5uC5cf1~IT@GV*0TqQ6;{@?+LG)vmg38v)V3 z9k>NjM$?2>j-;LE3xqNjOrDBR%JA4~aO@Rd;ra>cz+On1R+P_b9NT80@r|RqmphZL zrdI;-p$8S26v#F%_z)gb;09zQhC9r^lef<5~I8ol!yd)Y$lw%6^b2!ldtlT zcZ1Y<25U28K6>S-ofvyUXz8flq+~D>Ic_d&nFGjnl)h6c$DHPT3X0ye5T@t|tMPdN zNVf^peBG0Hbo*W4bo1e{d7$MEl#B0Reks5IcMIf+8pyHCv|ay_bL z;sThz2z6`^ic*zYEsw7YhLUYf?UIKSA7ORUgab(A$+R}ej}+KyiL!Jd%wLQY+ov`@ zOHB@;h0KVX04d?{ zSdZcYR#UJw55F2o{e1-z#MURWX)!{%8IU5RRS>FWDZi=FL=Yw%*JFzcFpvFk5av&z zbvYiyFTs{5Ar|wu!Tgi3XC$ev~4UxZ1 zhSVLX$z_<1I>Y=rzGUhow4(Ixi5%n4nW&Tn?auG@eibcSd z8vfd&Nr?UcOC96Cg)MuIqKi$Z6(>cq;%-BtxML3B_M^g7B45Ub^dlgiDC0Vl zN4Lq~+fqKp=O$msYIfQn`kSKY?e?yu`sMZg=S^>HP|v7Qfy7_woQgD&d8T8 zrb1S`ptZ9+L~@(n1ZmjK^-^<#q`?QP^@q!O+MP1WLLB%-0>B# zSW zGtOZku_AsqEV=)NFTFGktX^l?9|J^N@}`V(;}_Ek&*@~pXBWc#CPT^vv+TtZADecN z@|bTiHUfR$vhP<7h_3MUIm%2~Lj8;4Ncrl{M49vzAm5vn8VjVO%CYKK5vXbLyrnG_ zW47FRfhz7PbVX4VdSws7{JaVIQh`u5fGt{h>^^^fRwUxm7?7iU39I3@d{31m@apnp ztSovGX8tA+<`Uv!hut{63X$$aLdr(ViVqNN$<&KjzAdMVE>9yV&K^YGwi!~Y;IXD8 zEBdyY`~I^mb7ogHlqIart)L3iMH&t92`9--`vH;(DOXVWTadbLKciRP;cMGO!DBvH zi6=vfe$4nQlslb9Pk$0ic53<&Za8{pSvB&dH8${mM&-ZGkK7vdAubGmjMdo9tftcI zm2~jc7{b)+OC&=U6uScR+mqx6s&ivZF!Kfe57^r0C>XKT)~aI_lyOM*My?6X|s}q+B)2`L2NMP^EMh zW00X8K^S*H^w*YMqug)nXzQpj^5hpIVH~!@mY;mMT3aJUqgZ>44$KT=*%1<%# zH{nlUjZA)_+{BGE!7+x+nrR}+g*~w47c=%yu&>+` z_bYn8ZYIf_Ya~+aUPyU}=nL43JrBk9@gY_-&|g+yw$ww4E@yr%RZeW8b}MF++(rh% zl@*+{v@lZc=nV^12$8xXdy`{F%oVX8X;GQfy9ihXd6_RY<^ANOpyOH zkL?>kw9aRqQ|{?z>ezn)_ACij;s;>MEA$tTN7Aa06{BaN@vTHCrvReseDnoXGPlwk zX&#ySjAFavAZ&S!#z)|>mk8w!zH8&z>hcYqzu5c!MwvX zju@3aLtJ+A2XkhFFc&e8Eij&9HK{+orqYpZG?C0B=?{E~vH@lN4ueyLLBukOTBM8` zmVmgV!j>TZ6ss-0`WuzRY-%%l9=YAkk8tNPZOqpS(&E9erA~#Mb)JJd_LWNhQ#iAz z{*5yCvgy>338XRv$~?AA8&(4M)Sm>U{h~&HgIyxbx5bj@NYN=)e?R*bWj1c3*V4kV za`hwf*Jx1(tkHsofytF1|8YM3RwB%w4UYBZPqUhUn3q`ZZ=*2{L-6<@BAN53?8pK# z0#c5s`LZ$VGe9;NEgN~KG4r>5P35p`YWFahxY5C2ay~*?PhdhulU(1gVapZXbMEJ8 z7QbOcx*5ePQZZTZ-dr45gbc`Vz>CxzVlGW{Ba#;Ji}^gYu-}k;tu-c$z(Dv3y>Ct z@K^(QY#O3J8=EYEO;)x~z!ib~#Ynsp|D+|D6HNh*eu8$!w)anc$3lYkf>M2httBdlt66MYZXxxP)vczrxk(J{peT{Lbk_z*AHIzTDU5G?B zq49P6wv^R3ZN!T5w+Cs?_o-xw^+2NRJB7G3!8&FIPT>Sdso=LfUkF>C8b!*p?y8itMSM(hq&2Xfa z29B*$Yx0sVNgzy5Tr>Q4Snuz)6{V?1sYCWAQrnXwOtb=zH5ZhcG@SKW7Vii6Ey+nL zEiK0!XIV{StA?Vq`jyt=Ma|`g1jRQ)$*`fEwip$qy(WRHwHwzdqhK# z>G>^9e)=WO#(ERE^c6>P z8g&7RIbz-a1qPCxDkS)b1=1V^)&PjU)q+N%(x;fFmTf{-cE=WUIcV7mhmgyW``c7o zMmAlH0_%%w9bd+3NP_%+$Iw-?-xw9-OT8AN{OcKdB>Wqa7}T9`EM}TJ?QlAp1X^x|$FliMo24L3 zCQhto@(Qc%BRYz5Nhuxpu9(Q7u7ufr6;j#@l9-HgkqKoo`GWZO5X%Isu)2M{TZ@dl zLbpVoAO|n>#4le%T%4g;GD2xNfY`{}J+y2V^-@cpg7wS9HlnihJS|vxn%rH3LDByv zJk~)_{1;;&u_%_#I|rlyvIz%vv-xwZ=D?3mqMT7qBU_Xcr`}zO9CH^Q>mYE87OPS$ z`xp1KQ&7iV8;(C;35d4MpmvCUCB2`1j+{P=3APxc1J@BXc`^F_-=NrXKF@9$rVA$& zC1*f%T_!q%faMP z7<&#vieB&7QQ`gYejVKD}Z&I&w4)Qyv*F;9Z6LZlbiQhSmnWA=WOv z31@r@TY3u2^%M-f`LHE{_w!hRHS%IW&I6*G-N;?!9@Wwf!7oYeLD-VOh|(Qb)%+Nr zW2%%98&-h)9#HZlD5iI9)&0;I0=!Uv`i1P1GY4DcYWX0e55@xj?is5aJ z;#KICy`WeIueVNvlkXUEeH-)TtbWl2NV)KW+DeznxQqP>W35AIJ@8`4S~QEFa72ru z^l2Ju@-#q7jNh}GgwdQR?R-gR?Yu_P2KZnb%34(XIfQa8qJJIveu#gwBMpQJ!}j2V zqVF;D59uRHwXbOHn482&HW7IUD>6(UYkiQCX>4NrJ3B~lZhquUV z(291io)ORK9}zj+7ET@@aKct_Oj2#}XIHB=8Lv1LUt~3N=lY1!C7qba*OA0dqlnC+ zcS-|M$Ffx^mbmx@u7;H6kn&B@MOG_T8AN5dUi3>~s7ugj!bI4KvLoJ2Sh*ejMTTMr z_}uNQ(ZwcXx2F}P=sFm9k=tt}?(e9jSuH;y@_01DG8}KV-w7!f;IZ9&Ts_MUb{P2f z3R3jW_f43TSTX8*J?$)wA@Zk<0Wspen4OsUZNcQ-{FUG}AdHn|g!8FnHSw1Pk@2o4 zmS(ZkZs=H?zFDN0a74HRx!)7Nyn~P2z6LW)5}NptqDofVDVK z%z!xYm(cgm4?-QY6;o0h(u3_o3HQPQiuu9GyV3Yog1zY|s_WF~;{d{ViPc_78H^dG zq4?~hhIGH@c*4=$kxyCTcd^b)H-+{9m$W;POMjAv}MBonW!VL43 zT4CzCrDIh1NUfGttmezTfSj-w3kO+I>#u{b7IPGn9}GoN+6`K6g2z_!cAuwXPF}4J z@Kvn#-tb`})2p!<*^#AN_Jk1SJ4bQJC>&|>yD-A7S0R(C(=jTztJ6gltBdXwAWA zX)t*sUwUUfmZE(?%gsfXS*`t7ftZtFFf8Y#fI~-ltYe3zI8+HULu3 zl?e;N zDsp=oiCsPKk%aO@B5PWUsXa&HrLdELJom92b5y_DU|EWm1f|)TuCSV{=YvG$=SJfB z^T$YVlLR6=wiZwC8Y#$*r!7pzOH&$?Ud*ucN+Z@{MORp@K7EkLtZgLjZ*~Z8s?8(J zOefKP_9#UE9Bk{4_urbDYBy$}^o>Sb{EM%!x=&w$mYMcqL0LX2U$_vP_HD$p31d+C z%T>kX3`f&Wjf|D}M9WIN*xPNA<-J4ixcJxz%WH`rKL7MiwdLD?y8Xxhd-eYSAwt?D literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_113.dta b/pandas/tests/io/data/stata/stata4_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..9d7d5abb1b92156dfa03b076bc88c20cd9afcf71 GIT binary patch literal 1528 zcmd5+y>1gh5S}Ci5|B_78j2Jvq)2J7Y|904le_eJ26HB3d!_YVxI5b*A>lS9;stmS zBo{nD9w0@U^eLzyi8J5cIxP4HnTnNWc6N8>o7tJ!eY?6s>-3n4G5Y%Xa7DyAe;AOe zsG}%V)pDgaUULd=y88CX>d|;S#ujWa&dzt9SOGYmy(;h5Yc=|Ldvkr;>KXICVWK7* zJ|;I=BgcMIs8XTlE9m%5(mgSpvj^lWIPk_m=bc7ZXPJ)MnXk|?Nt72cAc)0d_f@If zb}GC3o?NNkFXrjsE)CN(#FkGx`m4gP%g#Y?_nUJn9`9XDdoOlEWmB$P+EhPEb<{Lm z!f}AohhU%}ctCUkMxbG^PBa3e%a24~f`^ucaguj9T>!f*;8bjCAP!h%NtcVWOhvMG z$tckM<|{!ySxZCP+DPjN#Q2o!Jxbq%J(olz#oC73QtAyo9b%&$K(T$6QbAO z5A6(#ij|6P0sd2=XF|_~c7$FC?FqdU0tjw~Mnmn;R?<*}Fky>;fSJTh`5>XZlQL5) z>7~@teWpymqm26;6YF*-r&vQ#mUS`u;~mWm7*4l^Fd$C?_5+&^03MJM{~^kYpXuSh GNy#011gh5dIPfe+7!5p#W)x6p02azDOvf$z4N1!7Es27~3nY?;_vXh6=Z-sHovl z+#?#EAPr9eM9g=$C+^}ObOjPCeX~2e^S3j*lkM(c4|mWU-@bm?;m)JxHt@0BueO(7 zuHtX{#itK1f#8{WA$=1Zc8^T=_#`;OkIP>_F9%cS-t}DE7bkDvr^*25hCqN)t}bAc z*Xj7oNiH=*bpda@H@EqyH?~=3lVMgD7`Qaf%NQ8M_Hl6BIUH^mX`UWDZ>!J^+G+gz zkXVGC>pMt6n-?VaSAjoO+F-#KX2WUS#j5vmCo;ubF}~hr7Ef&4chbSDYhHpg5r)@+ z3lb2Ea1U6L;QM>vb9kd5QIh5(nQSx*L@f=j#AfE|Kung7r98__th`S~r-*J_gli1y zE3fo51#T)?2h~OkgT<%NpJL8Zo8+}*wlknbFn@P4FyZE?!xM%F~|10qlLTUi pqa2B)pt2{bwDuoe37AgrDLCo@*8^J(fF78VwO{#)hyNxezX9{t7M%b9 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_115.dta b/pandas/tests/io/data/stata/stata4_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..2c68cfb393b9ec5defdbf78959bb9ea2b174a942 GIT binary patch literal 1713 zcmd5+%}yIJ5FVhEpF@S(13hrCDj{(pN}?PJ7rYP9S0LKJB(db|qV*;LaUfR?eTPcC z2#=BrPtg)FpR))@DTx*d2}|FsXZ_81>=}<+U7>Z_rgCGopx>R|U1PdC_^nq07k;j4 z>Q|hpdbQS2)uZD^gZ@q)|4rI`Yp^S?%M~o^H4za6p!FxBF$4i4y-suwK@T@X zfAnWBq9o0`oGox$*5O=iYA749a%qpJYfi!Skwd<$OZg=5quMT7wids2o3~?0vr-2L!p6> zvXX)#gb7;&1fOZbln+zNyBRaJ(|$&6GhlM&k{k~>AvT?EPKg0g)MGJP`i_KyO}`2- W!hplT<^#YExRRw$SvVi&N}d1$bO#*( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_117.dta b/pandas/tests/io/data/stata/stata4_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..412c2c0d7b3569148266f331338aab76ff4b46fc GIT binary patch literal 2185 zcmd5;y>1jS5H>#n0ul;81wsg|B1H-Xx7iR05?Rh}6Cnkq;Q>~@JKvop@9t^sONan* zZ5{#9Q1KjTDJZCD)9?%snq$WH=9asJ=n5n(jc3Ll&wQS-y-T&>hNo@A8SM$)7A2#l zP=ag0qUb3ND}^@qjVMYqc=2+JhGUg(vpGV;ZAN$4bwWVgVN~&^Pz+=zP$M$|p|U=^ z6|Duypc|~NL{EZfZDVbnhT~ZpRwy#wXW-4Y_nzOq;Dk-jk3GMC(Ty)&@`j#$nsdTu z&(#&DzxMp?HK!kVPD6}JcX=s!Q;F2<_l0J^Mn6VBN5|09aAFEga}HZ$oq-Z=X(qLnc~{pBI;!BtVVI2W%q)I~m+GQ4 zN@{bGWHh1S$uR8{WyXzWOV1iB-FlewN%8hPD2y!MN6Ec<|*tV-_NwgqV)*MzGeCz{e-so+Ztaz#hxMAFZG|-VL&Cy$rX$ z+N;%UI-NX!W3Sl5mec*cB46g6zLMmH>51}8x3fE(!WQg701@aS6K+li9CV+t200Jm zDGuR6RqG)IkTu;P)xZo6?frvG8T1R8n_4nwb69(2e3@vg$4lNaqJ#@8Tbj{uYNQSC zID_{vNbpfa8g4@E77{F=En;RV35BBnh zZ;wXcqB?drK*WZwBP%)Z2;?Tv9iV$a4}cbdmVq7vVKy)YxNqDn?h-!%xF1}L$kAt! z3u5*PVtSMt!f&1WyM!s!U4R=z8+;ba2$t*=p1VGA9ieH9hxU+ zK9c>lzH0b6`)kgb`!0nBv3?$8g+Z75mpMn0Hi3 z0{0)818yiwTU!kQ=#i*lcK5{MiDYW&Jh7+z!u)@^2ocp#1IWDOT+o(q5iU9|!rG)w zbdjmZI&)R{X8Ax9V4d%+IFw2=EQM}}%4@=QiyLrtq;tf#F# z$Vwt?s2G`PM1a`PQCuA`<9%@y(-To_pp>4Dn;7dF3Js!mtYX!J{_&*V$h198YowHO zA(0yScsp3mOJTRTn7RK*S`yT$_CKx#I?A~V|(tQD8?jgoP8Z*7z8e_i5`=r~&XUck5UV%saKf~8Y7@iI0d<4GkhnrvQW7lIB zPyGhlvB{!rc+a`mPPiavMZiyF2 zj6aE7{7F3cm*_=L#z;JCLc-Al2NHp+L`_5l)|uHESXx>%CPpZev~S*f-+Mdr?Rz`B zN(l!_!8Txyg_px%4*fi@hKV6*FI)0q1NNtXT;t{fV=1ezv(jn9zR~Az#?4F!3whWg zDA4WVz0l=5;H?Hf=NA@@#cy1oUz08X7tf$MG7{I2zW^Ybh=&Vi2t^YD%8`GFiBuqp zR*`pz0$QXd=Jf+rQzCd3c?SjL-zESRB`c#HKBp^8O}{m_fE*%Nj&*yz>QWvRusHyh zfR&fdr~QL~0wk}xDu-ovs{*o(4&O2X*}hHvj~UPf6v7n>GYkW}w}0)Z#kh&tAwQOn z?bw<-HWTJ@0ya5Zt855qe^EP4W^o=ICB-=iK5W4mIjXZ8FhO?f#^Q-& zb{yMNZey-rE&`}g@Bkq5j&gw|;v!gdTm;2Q9o0p~K4UC;v==pHSKb<+Rct_9h{aYH z;sBSKGz=YaanFHja#h+ep1P!<#+s7Z>t52^{M$$|zW3PeCnmS9XsM`EcM zj7ssaCc~tp#3Gs|i7?O)+cZHFobW$S>JClo)3`@UDHj~-k&ky8%V{g@Wd}3WpKw_Q zYHy~}Jf8tb&ijdFc2}H8b|~MpjgWI|>-23R>+jk7FrYRqllPHZpRbemgX9h~?qT2& z(OKSS-6lR$)vTg$i_*aunmjpK6!O=lr(n_-fk3J6toQ`*e)9|AWA_fhK>a5z^}#MOZL(aAv%K8Ye1A- zBMO^(8sYvm0Le@`u_i-2nUOJvcTq9}(Ij4lXHjIlNThi70}~lKS%qg|0Pk%AP{WmE zB-J6mnD8WArDIbJz$&kA(}7^nTty5|S9Qo--wd8irPkq_+E%<}OU6ETz867k+a|u9 z-@EC#Fq5AM6Z}*a*dlbq+S1U`q2TKsHU+Q(;~#`M61yWmb z6aY1z(DdHEWGbD>t{lhrRGgR_SBohAok#&7_m*ivYOF=9vlfnvT-AcTREu6U zRE)l{L5I?UwNOeeE|f7LH*4D#*5an4#b*m#FV>>pFwwx^(D29&mMt`vR=d15ZjPub z0CQLik{8Y{XDqPiVWcVCe7q&n+IHgPsnhLe&UTzT-`UlD;o_ysJ)2jHIGD4Fw0M1` zRSMJw>kic)4vC;-w2;&}77wntSa=-i(sUC#|6iacn-D*^VN8<^IfhijJP6GUM4F7C zZa@q~nhI0$u$~%7!K9i_APr_!Bc&sxDlj?%JCTfJKm5;01Mzulp5%y+X(8Yo`Sc)C z$x`6}znGc*Tv*{?{x(+gH$ZkVOg;I%X&ik_0!RIRaC@;rDHqZE-35utkt`EmI{B-#FKK8$G@WgJiYzr^S z0w0+c`>+=H?5JTprKCj<&JlKAu-!XaNd7=Aag>r4m4CJ1=LV6$lEhv)ErseuVz~;B NEtiWnH$#s2{{X{R*%$x- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata5_117.dta b/pandas/tests/io/data/stata/stata5_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..afbd3b0e0afe31bef669b8e9398c623beff57d74 GIT binary patch literal 5366 zcmeHL%WoS+7@s9b^`SyS$^p1^qtL2IDBg7fA=PMUQ{t4SgqE~C%E64iWACQBUUzrg zCLpoyt&tE9i3=}@1Fr*r00#YQ=e2>$%Zx^`0GwDT;(W)%b50K7{G5a5 zoxKQgb>6`NDbv8=x>7LnL(01f8_xaqxOu!;Hw%_MVcOOqi+9qg#d$r7kTe_~O_ujS z4jW3D0a?CB*()F)z88QXZtYRZNKVObhLyYu@;8t-K;pdU6<;8+Ps#x;vE>F$oWHZb zvd!#fc7y-@i3~oe9)M_2CwzjAV5NjC8~WcW{5s?HI{<>HUfVH3HHZk0v3+r)rWpGU z0Kx~!-0cAgi+wSQZ3_UlpQfZpQG$~wW`UnDzM2PMopfNFZ&%`K6o-T*&i<(}tEK^W zBKx8NUyj{Px-c=3+zC4xvwtetT@N^~SA?f`(4-}~lS^t*b<{zZBF}rd=^P)DvBW_? z9F%zvhZbt0nQ5V0Kb<)V)>{k*8=eKxV{yu15#O+R{k`gX-mj1g6tSrlfcHi|gGSgQ#S7hdYIy$Rt=c=?X zqwnR-ZGerS-745a-_<1k4_y!8b$x4d8a)3o9zb3B^}LLi`zRU`E-m%-bg3&|J1{83 zFp!#a>q+I>5HclYglM(q`HSa*upTvf+vz#=6BlK^PNE>x`WCnll59<)$hPuImB$pIGz z-y=W2TO5I{@QA*c!T!wXae)6ANc|>h{4*^*n-A8d)R*#GLs#UzwP*buDUW_IdQkul zFM0Ak^7ij<$oD~GShf4O{E4)6+%FatzP0b1jH~e`^mF;m6^WOXUH>#5-&FDSJPv-Y z@WMWlW+@+3ihcwx9^zc^v*Q5786+;w@;w6Q1 qZcj~G!s;qIx4K+X?T0-whV$~X1oLw;5MM1IyFSK+x5w=K)Oj2C0b$Aj literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata6.csv b/pandas/tests/io/data/stata/stata6.csv new file mode 100644 index 00000000..27a1dc64 --- /dev/null +++ b/pandas/tests/io/data/stata/stata6.csv @@ -0,0 +1,6 @@ +byte_,int_,long_,float_,double_,date_td,string_,string_1 +0,0,0,0,0,1960-01-01,"a","a" +1,1,1,1,1,3014-12-31,"ab","b" +-1,-1,-1,-1,-1,2014-12-31,"abc","c" +100,32740,-2147483647,-1.7010000002777e+38,-2.000000000000e+307,1970-01-01,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","d" +-127,-32767,2147483620,1.7010000002777e+38,8.000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" diff --git a/pandas/tests/io/data/stata/stata6_113.dta b/pandas/tests/io/data/stata/stata6_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..2e4795b167f266cf86afd73033bc1a43ae9afc5e GIT binary patch literal 2752 zcmXS9Vr1Z8U}b=S1A8YsF#?%ZsWs&c3Wf^4iA4%V28Jez%uoeT>cf$VV08@t|NjRG zf%JgDf~Bh5UB>f5omR2Sf48Q;U{rCUh7sjN@lGJzx14FPW2~Zx7f}H%k zbi5{{<>V&<&A<&(@=KF)fUdv^QWAlVEJ?vB&rn=alnHVLP5@Pa+nWpwYD}oQRW0=l z(xE~mvr`xldW}p>is9xIGr-uWwvAxk*QG32e4hvugQaCG(hRWl&i6Ug0i^E1k4|tt zSgOo0Y9KVgM#B&@PP?<<#M!}c3Y^v%p?p{(egNe&pG*g< z{{)kVx&S8M@WTo2CcK#gs%r1!La=!pbMwK~28@Okb&W{o!3~Bo5*Zm8Aa*p^Lxeuq zJE-mPNK9g2NKH%6$jr*l$<50zK-UNJ6J`hx-LOakrWz1nU})G64U7E@ALh6vCMA!! zut-jMQV&cQ4ay(>|1&OGVj<5ELH$kdkV=JiA9OYC8KPJJT!HB#1v!rt78RG2mX%jjRzY(MI4vSl}3&VdPD7`tI;j1s#T!}4{8B2iT%}=H?lmoHk zWTVysf7Y_?{ERX=Kz$eaw=e_%_%szPpJ zMP_bku0l>~UV2G}LP2U#Ze~eIYKlTqr9yB?Vo9Q&!XP&^rIE4W$^ZZLz_6%;gvFhD ihJ+MXV7f>_&Lf3I#U-U>Zp6cc4dZ z9C!c_XOQ{^Jwz%G5S1T++QzKyH8vFvqNh@r%lwaLKh5r#bt%vXy%15qu9Zuw*$z)W|04R8&iJkx7(@COOUY1LLk4bdgCiAp4jA!y&v$g`wg| zPEXXkrT-#9pnShWj}{8-Zfuf>p(4BA7n0ORE}d=n+=2iRw%~opB;WS~-@wUy_>MF$ zHm8F4?Ll}dC_#I|Py&%+*<+Im`cwRSAZMcC9RC^)X9JGHSy!YEvYV#VyASm+{zvSW zjl_QMB3XtVFNe(A|Gpa5>-*w#2d0?EE1{mMCsVE&Q51;2;Y~!w*2LuS-Yii8v?bjz zm)~1<*;%15E_;s5L2eL^ri&tw8yebV&9o~vT3T7>2CitXZE~(r zRI5102`3}?j~AtN#XI)>n!w_7gvGZSEYfK)7c_lGR$ay`A3m;C*X1P%XX&=(^>FWq Fb_Muvm<#{_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata6_117.dta b/pandas/tests/io/data/stata/stata6_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..bf4f3838e8be7d375d83dffe1ab3c06493f791f2 GIT binary patch literal 3490 zcmeHK&1)1f6wg@2UK9lHM*4A*&Td} z(2L;BKcUi-pn_*F9t!pkC@NY#bj;`OvSs^mleU53_g?bLOY-val0l3UPKrJ!7zLF0 zG{q>T5haY`eEu48t%AHdr%5Uaw`Zr2+fvaq?ghx5#%Kng0YHc|7)8XR5f-u*5D#N2 zhKj5BLVg6^s#V}{Ejs}2d~Fc9a#XZP(vWx&E%JJmGW<9DlWk|evtQ!-2^_Jg4ub4ZA!318 zdZh?*c{BcQ2rFE)??H%?N_kHUb5TNsjqb%yYI5#-5Qvy0_q!7qPeVD2uEh|#FH^Zl zy9BW$oOk-Bl|Gcm7zXeARot7T1!|x$DdpUkyA!rfI`V_z znPeq0jV|2Ld=OjyiNO;en&3D@*tFeSCNfbeASo$vnu`1!mKa9vt{Pvw51JQob5Jc1 z0X9lInzWi_nH0|dxUV98V|!Ia{+qrVTF0N2{tSFlH@9PSVESzsz2{?D@xR-4Gnj2} zY)_h+edc3W@4hW4d%drc9$974UQa7+_L_!`KpY3?H8;j})z~ ziYrOAv!7klQSyZ+_DL218tcXv>tO5U6!A*O&KD(rb6Lbiqi-wQ8R2iyr6+3{xZ@Y@ z1tEj_!2|=spt!hU^N_JCkR^}@gu^g}g2)33W3m*+wHQXUQsx1y(lib^r#|%Ra8^Fo zhv0v1)L(HLn_0FjeDPHK;_EVa;1@((`0Zz8A!WR__-LvA*qjy>OGhhTP~@rs|+wxM4);;*k`~LLur`X7#uRuWF(+^I2^JV08VNl4*&oF literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata7_115.dta b/pandas/tests/io/data/stata/stata7_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..133713b201ba8d5f3c8ff716bfa4e61638979da5 GIT binary patch literal 722 zcmXSBVq{=uU}S)RQ(-XR+Sng9taQ!|FYjQ`3KiwqeI z3=DCa1XN&zQxYTrRA3A=!2pjUaVC(Ys->PmIv!ORQmU543?ynG(FTTiSyl{dsVk6^ zn3S3W3|?5&!A!q+Ef6JYS7bYnqd9}F;2j|uNRbGx1PjAMpk9I2lNNnx2v@fk zKywuutW1D57&KXl5EPpX65bLCLpc*b_fmnW(g9nKqRQnksYF4zhuhKq0npqF)K`)6 z0mEo^y*b=4@o^nH{BZc`aI}*7{l2-HNp*+U4(}X3JA8Er)MSMbmp$G}M690%Lb0E5 ziF}WNPMHnuk>jx#3Ly!C(5Ib2Bt$dDq5%mtj1UxU6SB>oWdqfhI_W8m?b)&i2Kr}$ z`@T%MR;*gz4IU;!%u%X!MHc*E!3;osaoD@7zglc%P4zqQv*Q|(gzJoF$@xkD+nmF>^=TSBnbwI#E>8{BuFF%{~1cz zqh)qH?tA$XsDOlE5PZq4-}~+^@7}!$yG_!^6+f z8Yt9HbR2B9+jLWFS2fmHD(}f|FskRb&G=kAUso+gy^2I*adXf=-Q-nn<8yvOUndw1 z-ULb<=!E(@uljn0{*kd>D$H+=(@O-yPm30O@rLwu!?bB^fxZ}tDk1hXluc%C3 zR$l=QLVo}O(}Z&Jt3J&;N zjNRgvsY@uGFn)(Srf#7O!o)r9nRXq0B20x4Vj369CQOGBW||kuAFo|!g;@(J@VcwyQSDj+Pp;+1JzsF1MuhBu}ip(4Uk1QDh^ mp<=>v6j7#qp%TJM3^Ardp;E$X9C4;2p)vwYm`uk)<%D+)Z-evz literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata8_115.dta b/pandas/tests/io/data/stata/stata8_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..bb78368b3462b142ccd0c8c399e3a18f335a88d3 GIT binary patch literal 1624 zcmeIzIV=QG7zgm#W!?9EG}{oB%q($LbV@qi5@FreeJy{bQmIrbl}a5YjYRh~?)zT$ z81EaO&Dw;BO2kXvzVH3=e{W{qPBLS5o956;wYTQq%CJjm4f_6KG8diED78NQ30dI9W61n3RJn=+UGlT z4-9u$k(h;%sTHWH{^}y-k%K6!uhcz}rQaz2!h&x+<}ZkJWm^A|E#_8s_gWjHZB25H znB4o6v(|gA{crewCOZKIfdd0eWy5Ost|Gz`pE>uA9 l1rcQ05Go|hhY(`g6e=Ptgb`-i7Ahvdg!%wL+Yu@uyaK&w28I9t literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata8_117.dta b/pandas/tests/io/data/stata/stata8_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..fcfa7abd7b0d995add24f8504e6ae6e7d36194a0 GIT binary patch literal 2063 zcmeI!J4_To7y#e}MDPhHDq=%SG*<2{5o{+Vz7-Ffcjm@gga*xl0MEH0dG;c?~( zS2(`lmes<{e1&xmN|Ev~;r^q$%q&z{J1;kw+0I!9zhD@;*uk0WbPAX2a#Ub4krcYC zWSZAlEe*Fay$y{qt4aUduo{_JXk})86iFw|_1(hHCyKQtQQ;=V9L4(5$S;;rB84yI zQQ;@W;fkm}O0ldms#j5DCicqu9WUW@y26%&Y2owrAQ!9!*FKS_BncLJkcjx zZ6jt;&wGPTu$t|?z9Tx}TBB|3e}^77<%B6amlPGkgyyFGI|;`hjhFc;Pr3P5ax z%ZGcn!>lLeB^~MWOV_W)dW(8Gu=+1tz|8#g^%lDsVTb3~j@ItL+5at#xYxx4FP!Xf zIzBf0KH@Q)gJ{tqcDn8NmQbscMQgms)wlSo&H$`w4Df+?gJ(P0JdG)yD45>+Tj&cF;(8&Rc#zFC+>x<_kRA{nRgj*Cd8Efg#}qtYfCZ!{M8_3)i?E0kCpw|PUxFp1G|@=~av7G928gN@ zWL97W=^atEg8o%lMS4$Equ@mrvPd6@PAM2zgEgd&M5h%D<{*dknW$F5%XL^s8X`KQ z;ME3fAblk|tKjt}Y$AOlI;Y^x7HlDXC#qBMb{n>leh{5k009I@zliD;Ff%*{%$J^v Q$AsdpR{?5KweTO{HzJm*I{*Lx literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata9_115.dta b/pandas/tests/io/data/stata/stata9_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..5ad6cd6a2c8ff90bd34341ce087b1e14822d00a6 GIT binary patch literal 2342 zcmeH}OK4L;6o#ihPICwgGxMK$oOvV=2r6jW z6_h}pQI~N7oOz4}j3kr00f!D`DuNZc5*#=*2)OCO5Ltr&4-pua29oA50gq;wN2F~G z5Cxyfq5KlrkV?k-%eh!8k=z+eruHOKfSav=!&&&WL}YKe+_c=-V~l;4dsi6WwVYXL zg%-Qid}JB1{Ib^gzbv<`Gk&inU^Oa?Q(mAMkEKPW!~*i|ck2gmTeFg%qH=k}r9#GJ zO`l7B8-cr2lc_U;iEaYFn_#S)Am2?;Sze$sU{(x4kqfU72)R|gdJ|U3IAs7{Y=VXZr7x}0Gm1qHg!vxEt=#^Po-?_BeAvB6t zcP93)t5IjNed*16tP>It^z@Rx==SwMWwd`l*@?FYov==2rqAl^tAWBc(PQ+;5B2{$ z{0DN_^5WZr1lc_0G%FaFPhTM$5WpQZ@+qf_KVXsWTkVd6Q1v4}&Ifedm4@_XacYL> zjHPRN*wDG_pbM{Ox4rwU;;KA1*e{7rU)-!Ye|16Y>bdut)!eM%N%3d zd%X3TBD%d$*KE8oT>r%Kz9AZM5PB(&j!x=%&VT(WeL3*M8BKI#+VH~-!@2W@)02|l zPkh=@)8k#+{-cg$sZKRC(fE|%z3YaxbB2Al9Hh(K*{B4pw*vyfJ+)hwEpLXZZF8G{ Gjs5^At5M(p literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata9_117.dta b/pandas/tests/io/data/stata/stata9_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..5ad6cd6a2c8ff90bd34341ce087b1e14822d00a6 GIT binary patch literal 2342 zcmeH}OK4L;6o#ihPICwgGxMK$oOvV=2r6jW z6_h}pQI~N7oOz4}j3kr00f!D`DuNZc5*#=*2)OCO5Ltr&4-pua29oA50gq;wN2F~G z5Cxyfq5KlrkV?k-%eh!8k=z+eruHOKfSav=!&&&WL}YKe+_c=-V~l;4dsi6WwVYXL zg%-Qid}JB1{Ib^gzbv<`Gk&inU^Oa?Q(mAMkEKPW!~*i|ck2gmTeFg%qH=k}r9#GJ zO`l7B8-cr2lc_U;iEaYFn_#S)Am2?;Sze$sU{(x4kqfU72)R|gdJ|U3IAs7{Y=VXZr7x}0Gm1qHg!vxEt=#^Po-?_BeAvB6t zcP93)t5IjNed*16tP>It^z@Rx==SwMWwd`l*@?FYov==2rqAl^tAWBc(PQ+;5B2{$ z{0DN_^5WZr1lc_0G%FaFPhTM$5WpQZ@+qf_KVXsWTkVd6Q1v4}&Ifedm4@_XacYL> zjHPRN*wDG_pbM{Ox4rwU;;KA1*e{7rU)-!Ye|16Y>bdut)!eM%N%3d zd%X3TBD%d$*KE8oT>r%Kz9AZM5PB(&j!x=%&VT(WeL3*M8BKI#+VH~-!@2W@)02|l zPkh=@)8k#+{-cg$sZKRC(fE|%z3YaxbB2Al9Hh(K*{B4pw*vyfJ+)hwEpLXZZF8G{ Gjs5^At5M(p literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/xml/baby_names.xml b/pandas/tests/io/data/xml/baby_names.xml new file mode 100644 index 00000000..b4797b79 --- /dev/null +++ b/pandas/tests/io/data/xml/baby_names.xml @@ -0,0 +1,53 @@ + + + + 1 + José + Sofía + + + 2 + Luis + Valentina + + + 3 + Carlos + Isabella + + + 4 + Juan + Camila + + + 5 + Jorge + Valeria + + + 6 + Pedro + Mariana + + + 7 + Jesús + Gabriela + + + 8 + Manuel + Sara + + + 9 + Santiago + Daniella + + + 10 + Sebastián + María José + + diff --git a/pandas/tests/io/data/xml/books.xml b/pandas/tests/io/data/xml/books.xml new file mode 100644 index 00000000..666ce60e --- /dev/null +++ b/pandas/tests/io/data/xml/books.xml @@ -0,0 +1,21 @@ + + + + Everyday Italian + Giada De Laurentiis + 2005 + 30.00 + + + Harry Potter + J K. Rowling + 2005 + 29.99 + + + Learning XML + Erik T. Ray + 2003 + 39.95 + + diff --git a/pandas/tests/io/data/xml/cta_rail_lines.kml b/pandas/tests/io/data/xml/cta_rail_lines.kml new file mode 100644 index 00000000..c031137e --- /dev/null +++ b/pandas/tests/io/data/xml/cta_rail_lines.kml @@ -0,0 +1,92 @@ + + + CTA_RailLines + + + CTA_RailLines + + + Blue Line (Forest Park) + +
    Blue Line (Forest Park)
    OBJECTID_1 1
    ASSET_ID 21100001
    LINES Blue Line (Forest Park)
    DESCRIPTIO Oak Park to Austin
    TYPE Elevated or at Grade
    LEGEND BL
    ALT_LEGEND BL
    BRANCH Blue Line Forest Park
    SHAPE.LEN 4060.368778
    ]]>
    + #LineStyle01 + + + 0 + clampedToGround + -87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0 + + +
    + + Red, Purple Line + +
    Red, Purple Line
    OBJECTID_1 2
    ASSET_ID 21100002
    LINES Red, Purple Line
    DESCRIPTIO Lawrence to Wilson
    TYPE Elevated or at Grade
    LEGEND RD
    ALT_LEGEND RDPR
    BRANCH Red Line North Side
    SHAPE.LEN 1800.132896
    ]]>
    + #LineStyle01 + + + 0 + clampedToGround + -87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0 + + +
    + + Red, Purple Line + +
    Red, Purple Line
    OBJECTID_1 3
    ASSET_ID 21100003
    LINES Red, Purple Line
    DESCRIPTIO Wilson to Sheridan
    TYPE Elevated or at Grade
    LEGEND RD
    ALT_LEGEND RDPR
    BRANCH Red Line North Side
    SHAPE.LEN 4256.243677
    ]]>
    + #LineStyle01 + + + 0 + clampedToGround + -87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0 + + +
    + + Red, Purple Line + +
    Red, Purple Line
    OBJECTID_1 4
    ASSET_ID 21100004
    LINES Red, Purple Line
    DESCRIPTIO Sheridan to Addison
    TYPE Elevated or at Grade
    LEGEND RD
    ALT_LEGEND RDPR
    BRANCH Red Line North Side
    SHAPE.LEN 2581.713736
    ]]>
    + #LineStyle01 + + + 0 + clampedToGround + -87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0 + + +
    + + Red, Purple Line + +
    Red, Purple Line
    OBJECTID_1 5
    ASSET_ID 21100005
    LINES Red, Purple Line
    DESCRIPTIO Addison to Clark Junction
    TYPE Elevated or at Grade
    LEGEND RD
    ALT_LEGEND RDPR
    BRANCH Red Line North Side
    SHAPE.LEN 1918.716686
    ]]>
    + #LineStyle01 + + + 0 + clampedToGround + -87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0 + + +
    +
    + +
    +
    diff --git a/pandas/tests/io/data/xml/doc_ch_utf.xml b/pandas/tests/io/data/xml/doc_ch_utf.xml new file mode 100644 index 00000000..fde215b8 --- /dev/null +++ b/pandas/tests/io/data/xml/doc_ch_utf.xml @@ -0,0 +1,29 @@ + + + + + + + + + + +]> + + + + <問 speaker="Opponent">問 若箇是邪而言破邪 何者是正而道(Sorry, this is Big5 only)申正 + <答 speaker="吉藏">答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 無得是正須申 + 故大品經 善吉 致問 何等是菩薩道 何等非菩薩道 + 佛答云 有所得非菩薩道 無所得是菩薩道 + + + <問 speaker="Opponent">問 既破有得申無得 亦應但破性執申假名以不 +
    答 性執是有得 假名是無得 今破有得申無得 即是破性執申假名也 + + + <問 speaker="Opponent">問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶 + <答 speaker="吉藏">答 不例 有無皆是性 所以須雙破 既分性假異 故有破不破 + + diff --git a/pandas/tests/io/data/xml/flatten_doc.xsl b/pandas/tests/io/data/xml/flatten_doc.xsl new file mode 100644 index 00000000..a9d62d18 --- /dev/null +++ b/pandas/tests/io/data/xml/flatten_doc.xsl @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + diff --git a/pandas/tests/io/data/xml/row_field_output.xsl b/pandas/tests/io/data/xml/row_field_output.xsl new file mode 100644 index 00000000..5a0f0e65 --- /dev/null +++ b/pandas/tests/io/data/xml/row_field_output.xsl @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py new file mode 100644 index 00000000..e7a182ea --- /dev/null +++ b/pandas/tests/io/excel/__init__.py @@ -0,0 +1,20 @@ +import pytest + +pytestmark = [ + pytest.mark.filterwarnings( + # Looks like tree.getiterator is deprecated in favor of tree.iter + "ignore:This method will be removed in future versions:" + "PendingDeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), + # GH 26552 + pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" + ), + # GH 38571 + pytest.mark.filterwarnings( + "ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning" + ), +] diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py new file mode 100644 index 00000000..4ce06c01 --- /dev/null +++ b/pandas/tests/io/excel/conftest.py @@ -0,0 +1,67 @@ +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.fixture +def frame(float_frame): + """ + Returns the first ten items in fixture "float_frame". + """ + return float_frame[:10] + + +@pytest.fixture +def tsframe(): + return tm.makeTimeDataFrame()[:5] + + +@pytest.fixture(params=[True, False]) +def merge_cells(request): + return request.param + + +@pytest.fixture +def df_ref(datapath): + """ + Obtain the reference data from read_csv with the Python engine. + """ + filepath = datapath("io", "data", "csv", "test1.csv") + df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python") + return df_ref + + +@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"]) +def read_ext(request): + """ + Valid extensions for reading Excel files. + """ + return request.param + + +# Checking for file leaks can hang on Windows CI +@pytest.fixture(autouse=not is_platform_windows()) +def check_for_file_leaks(): + """ + Fixture to run around every test to ensure that we are not leaking files. + + See also + -------- + _test_decorators.check_file_leaks + """ + # GH#30162 + psutil = td.safe_import("psutil") + if not psutil: + yield + + else: + proc = psutil.Process() + flist = proc.open_files() + yield + flist2 = proc.open_files() + assert flist == flist2 diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py new file mode 100644 index 00000000..25079b23 --- /dev/null +++ b/pandas/tests/io/excel/test_odf.py @@ -0,0 +1,50 @@ +import functools + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pytest.importorskip("odf") + + +@pytest.fixture(autouse=True) +def cd_and_set_engine(monkeypatch, datapath): + func = functools.partial(pd.read_excel, engine="odf") + monkeypatch.setattr(pd, "read_excel", func) + monkeypatch.chdir(datapath("io", "data", "excel")) + + +def test_read_invalid_types_raises(): + # the invalid_value_type.ods required manually editing + # of the included content.xml file + with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"): + pd.read_excel("invalid_value_type.ods") + + +def test_read_writer_table(): + # Also test reading tables from an text OpenDocument file + # (.odt) + index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header") + expected = pd.DataFrame( + [[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]], + index=index, + columns=["Column 1", "Unnamed: 2", "Column 3"], + ) + + result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0) + + tm.assert_frame_equal(result, expected) + + +def test_read_newlines_between_xml_elements_table(): + # GH#45598 + expected = pd.DataFrame( + [[1.0, 4.0, 7], [np.nan, np.nan, 8], [3.0, 6.0, 9]], + columns=["Column 1", "Column 2", "Column 3"], + ) + + result = pd.read_excel("test_newlines.ods") + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py new file mode 100644 index 00000000..e9dad0c7 --- /dev/null +++ b/pandas/tests/io/excel/test_odswriter.py @@ -0,0 +1,68 @@ +import re + +import pytest + +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +odf = pytest.importorskip("odf") + +pytestmark = pytest.mark.parametrize("ext", [".ods"]) + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with odf!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="odf", mode="a") + + +def test_kwargs(ext): + # GH 42286 + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments + kwargs = {"kwarg": 1} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="odf", **kwargs) as _: + pass + + +@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}]) +def test_engine_kwargs(ext, engine_kwargs): + # GH 42286 + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments + with tm.ensure_clean(ext) as f: + if engine_kwargs is not None: + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) + else: + with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _: + pass + + +def test_book_and_sheets_consistent(ext): + # GH#45687 - Ensure sheets is updated if user modifies book + with tm.ensure_clean(ext) as f: + with ExcelWriter(f) as writer: + assert writer.sheets == {} + table = odf.table.Table(name="test_name") + writer.book.spreadsheet.addElement(table) + assert writer.sheets == {"test_name": table} diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py new file mode 100644 index 00000000..3b122c85 --- /dev/null +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -0,0 +1,412 @@ +import contextlib +from pathlib import Path +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelWriter, + _OpenpyxlWriter, +) + +openpyxl = pytest.importorskip("openpyxl") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_to_excel_styleconverter(ext): + from openpyxl import styles + + hstyle = { + "font": {"color": "00FF0000", "bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}}, + "number_format": {"format_code": "0.00"}, + "protection": {"locked": True, "hidden": False}, + } + + font_color = styles.Color("00FF0000") + font = styles.Font(bold=True, color=font_color) + side = styles.Side(style=styles.borders.BORDER_THIN) + border = styles.Border(top=side, right=side, bottom=side, left=side) + alignment = styles.Alignment(horizontal="center", vertical="top") + fill_color = styles.Color(rgb="006666FF", tint=0.3) + fill = styles.PatternFill(patternType="solid", fgColor=fill_color) + + number_format = "0.00" + + protection = styles.Protection(locked=True, hidden=False) + + kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle) + assert kw["font"] == font + assert kw["border"] == border + assert kw["alignment"] == alignment + assert kw["fill"] == fill + assert kw["number_format"] == number_format + assert kw["protection"] == protection + + +def test_write_cells_merge_styled(ext): + from pandas.io.formats.excel import ExcelCell + + sheet_name = "merge_styled" + + sty_b1 = {"font": {"color": "00FF0000"}} + sty_a2 = {"font": {"color": "0000FF00"}} + + initial_cells = [ + ExcelCell(col=1, row=0, val=42, style=sty_b1), + ExcelCell(col=0, row=1, val=99, style=sty_a2), + ] + + sty_merged = {"font": {"color": "000000FF", "bold": True}} + sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged) + openpyxl_sty_merged = sty_kwargs["font"] + merge_cells = [ + ExcelCell( + col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged + ) + ] + + with tm.ensure_clean(ext) as path: + with _OpenpyxlWriter(path) as writer: + writer._write_cells(initial_cells, sheet_name=sheet_name) + writer._write_cells(merge_cells, sheet_name=sheet_name) + + wks = writer.sheets[sheet_name] + xcell_b1 = wks["B1"] + xcell_a2 = wks["A2"] + assert xcell_b1.font == openpyxl_sty_merged + assert xcell_a2.font == openpyxl_sty_merged + + +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_kwargs(ext, iso_dates): + # GH 42286 GH 43445 + kwargs = {"iso_dates": iso_dates} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="openpyxl", **kwargs) as writer: + assert writer.book.iso_dates == iso_dates + # ExcelWriter won't allow us to close without writing something + DataFrame().to_excel(writer) + + +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_engine_kwargs_write(ext, iso_dates): + # GH 42286 GH 43445 + engine_kwargs = {"iso_dates": iso_dates} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer: + assert writer.book.iso_dates == iso_dates + # ExcelWriter won't allow us to close without writing something + DataFrame().to_excel(writer) + + +def test_engine_kwargs_append_invalid(ext): + # GH 43445 + # test whether an invalid engine kwargs actually raises + with tm.ensure_clean(ext) as f: + DataFrame(["hello", "world"]).to_excel(f) + with pytest.raises( + TypeError, + match=re.escape( + "load_workbook() got an unexpected keyword argument 'apple_banana'" + ), + ): + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"} + ) as writer: + # ExcelWriter needs us to write something to close properly + DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2") + + +@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")]) +def test_engine_kwargs_append_data_only(ext, data_only, expected): + # GH 43445 + # tests whether the data_only engine_kwarg actually works well for + # openpyxl's load_workbook + with tm.ensure_clean(ext) as f: + DataFrame(["=1+1"]).to_excel(f) + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only} + ) as writer: + assert writer.sheets["Sheet1"]["B2"].value == expected + # ExcelWriter needs us to writer something to close properly? + DataFrame().to_excel(writer, sheet_name="Sheet2") + + +@pytest.mark.parametrize( + "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] +) +def test_write_append_mode(ext, mode, expected): + df = DataFrame([1], columns=["baz"]) + + with tm.ensure_clean(ext) as f: + wb = openpyxl.Workbook() + wb.worksheets[0].title = "foo" + wb.worksheets[0]["A1"].value = "foo" + wb.create_sheet("bar") + wb.worksheets[1]["A1"].value = "bar" + wb.save(f) + + with ExcelWriter(f, engine="openpyxl", mode=mode) as writer: + df.to_excel(writer, sheet_name="baz", index=False) + + with contextlib.closing(openpyxl.load_workbook(f)) as wb2: + result = [sheet.title for sheet in wb2.worksheets] + assert result == expected + + for index, cell_value in enumerate(expected): + assert wb2.worksheets[index]["A1"].value == cell_value + + +@pytest.mark.parametrize( + "if_sheet_exists,num_sheets,expected", + [ + ("new", 2, ["apple", "banana"]), + ("replace", 1, ["pear"]), + ("overlay", 1, ["pear", "banana"]), + ], +) +def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected): + # GH 40230 + df1 = DataFrame({"fruit": ["apple", "banana"]}) + df2 = DataFrame({"fruit": ["pear"]}) + + with tm.ensure_clean(ext) as f: + df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False) + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df2.to_excel(writer, sheet_name="foo", index=False) + + with contextlib.closing(openpyxl.load_workbook(f)) as wb: + assert len(wb.sheetnames) == num_sheets + assert wb.sheetnames[0] == "foo" + result = pd.read_excel(wb, "foo", engine="openpyxl") + assert list(result["fruit"]) == expected + if len(wb.sheetnames) == 2: + result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl") + tm.assert_frame_equal(result, df2) + + +@pytest.mark.parametrize( + "startrow, startcol, greeting, goodbye", + [ + (0, 0, ["poop", "world"], ["goodbye", "people"]), + (0, 1, ["hello", "world"], ["poop", "people"]), + (1, 0, ["hello", "poop"], ["goodbye", "people"]), + (1, 1, ["hello", "world"], ["goodbye", "poop"]), + ], +) +def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye): + df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]}) + df2 = DataFrame(["poop"]) + + with tm.ensure_clean(ext) as f: + df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False) + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists="overlay" + ) as writer: + # use startrow+1 because we don't have a header + df2.to_excel( + writer, + index=False, + header=False, + startrow=startrow + 1, + startcol=startcol, + sheet_name="poo", + ) + + result = pd.read_excel(f, sheet_name="poo", engine="openpyxl") + expected = DataFrame({"greeting": greeting, "goodbye": goodbye}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "if_sheet_exists,msg", + [ + ( + "invalid", + "'invalid' is not valid for if_sheet_exists. Valid options " + "are 'error', 'new', 'replace' and 'overlay'.", + ), + ( + "error", + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ( + None, + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ], +) +def test_if_sheet_exists_raises(ext, if_sheet_exists, msg): + # GH 40230 + df = DataFrame({"fruit": ["pear"]}) + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + df.to_excel(f, "foo", engine="openpyxl") + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df.to_excel(writer, sheet_name="foo") + + +def test_to_excel_with_openpyxl_engine(ext): + # GH 29854 + with tm.ensure_clean(ext) as filename: + + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) + df2 = DataFrame({"B": np.linspace(1, 20, 10)}) + df = pd.concat([df1, df2], axis=1) + styled = df.style.applymap( + lambda val: "color: %s" % ("red" if val < 0 else "black") + ).highlight_max() + + styled.to_excel(filename, engine="openpyxl") + + +@pytest.mark.parametrize("read_only", [True, False]) +def test_read_workbook(datapath, ext, read_only): + # GH 39528 + filename = datapath("io", "data", "excel", "test1" + ext) + with contextlib.closing( + openpyxl.load_workbook(filename, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") + expected = pd.read_excel(filename) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "header, expected_data", + [ + ( + 0, + { + "Title": [np.nan, "A", 1, 2, 3], + "Unnamed: 1": [np.nan, "B", 4, 5, 6], + "Unnamed: 2": [np.nan, "C", 7, 8, 9], + }, + ), + (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}), + ], +) +@pytest.mark.parametrize( + "filename", ["dimension_missing", "dimension_small", "dimension_large"] +) +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_with_bad_dimension( + datapath, ext, header, expected_data, filename, read_only +): + # GH 38956, 39001 - no/incorrect dimension information + path = datapath("io", "data", "excel", f"{filename}{ext}") + if read_only is None: + result = pd.read_excel(path, header=header) + else: + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl", header=header) + expected = DataFrame(expected_data) + tm.assert_frame_equal(result, expected) + + +def test_append_mode_file(ext): + # GH 39576 + df = DataFrame() + + with tm.ensure_clean(ext) as f: + df.to_excel(f, engine="openpyxl") + + with ExcelWriter( + f, mode="a", engine="openpyxl", if_sheet_exists="new" + ) as writer: + df.to_excel(writer) + + # make sure that zip files are not concatenated by making sure that + # "docProps/app.xml" only occurs twice in the file + data = Path(f).read_bytes() + first = data.find(b"docProps/app.xml") + second = data.find(b"docProps/app.xml", first + 1) + third = data.find(b"docProps/app.xml", second + 1) + assert second != -1 and third == -1 + + +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_with_empty_trailing_rows(datapath, ext, read_only): + # GH 39181 + path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}") + if read_only is None: + result = pd.read_excel(path) + else: + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") + expected = DataFrame( + { + "Title": [np.nan, "A", 1, 2, 3], + "Unnamed: 1": [np.nan, "B", 4, 5, 6], + "Unnamed: 2": [np.nan, "C", 7, 8, 9], + } + ) + tm.assert_frame_equal(result, expected) + + +# When read_only is None, use read_excel instead of a workbook +@pytest.mark.parametrize("read_only", [True, False, None]) +def test_read_empty_with_blank_row(datapath, ext, read_only): + # GH 39547 - empty excel file with a row that has no data + path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}") + if read_only is None: + result = pd.read_excel(path) + else: + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + +def test_book_and_sheets_consistent(ext): + # GH#45687 - Ensure sheets is updated if user modifies book + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="openpyxl") as writer: + assert writer.sheets == {} + sheet = writer.book.create_sheet("test_name", 0) + assert writer.sheets == {"test_name": sheet} + + +def test_ints_spelled_with_decimals(datapath, ext): + # GH 46988 - openpyxl returns this sheet with floats + path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}") + result = pd.read_excel(path) + expected = DataFrame(range(2, 12), columns=[1]) + tm.assert_frame_equal(result, expected) + + +def test_read_multiindex_header_no_index_names(datapath, ext): + # GH#47487 + path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}") + result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2]) + expected = DataFrame( + [[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]], + columns=pd.MultiIndex.from_tuples( + [("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")] + ), + index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py new file mode 100644 index 00000000..fa1d6bbf --- /dev/null +++ b/pandas/tests/io/excel/test_readers.py @@ -0,0 +1,1636 @@ +from datetime import ( + datetime, + time, +) +from functools import partial +import os +from pathlib import Path +from urllib.error import URLError +from zipfile import BadZipFile + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + +read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] +engine_params = [ + # Add any engines to test here + # When defusedxml is installed it triggers deprecation warnings for + # xlrd and openpyxl, so catch those here + pytest.param( + "xlrd", + marks=[ + td.skip_if_no("xlrd"), + ], + ), + pytest.param( + "openpyxl", + marks=[ + td.skip_if_no("openpyxl"), + pytest.mark.filterwarnings("ignore:.*html argument"), + ], + ), + pytest.param( + None, + marks=[ + td.skip_if_no("xlrd"), + ], + ), + pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")), + pytest.param("odf", marks=td.skip_if_no("odf")), +] + + +def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool: + """ + Filter out invalid (engine, ext) pairs instead of skipping, as that + produces 500+ pytest.skips. + """ + engine = engine.values[0] + if engine == "openpyxl" and read_ext == ".xls": + return False + if engine == "odf" and read_ext != ".ods": + return False + if read_ext == ".ods" and engine != "odf": + return False + if engine == "pyxlsb" and read_ext != ".xlsb": + return False + if read_ext == ".xlsb" and engine != "pyxlsb": + return False + if engine == "xlrd" and read_ext != ".xls": + return False + return True + + +def _transfer_marks(engine, read_ext): + """ + engine gives us a pytest.param object with some marks, read_ext is just + a string. We need to generate a new pytest.param inheriting the marks. + """ + values = engine.values + (read_ext,) + new_param = pytest.param(values, marks=engine.marks) + return new_param + + +@pytest.fixture( + params=[ + _transfer_marks(eng, ext) + for eng in engine_params + for ext in read_ext_params + if _is_valid_engine_ext_pair(eng, ext) + ], + ids=str, +) +def engine_and_read_ext(request): + """ + Fixture for Excel reader engine and read_ext, only including valid pairs. + """ + return request.param + + +@pytest.fixture +def engine(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return engine + + +@pytest.fixture +def read_ext(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return read_ext + + +class TestReaders: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for read_excel calls. + """ + func = partial(pd.read_excel, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "read_excel", func) + + def test_engine_used(self, read_ext, engine, monkeypatch): + # GH 38884 + def parser(self, *args, **kwargs): + return self.engine + + monkeypatch.setattr(pd.ExcelFile, "parse", parser) + + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + + def test_usecols_int(self, read_ext): + # usecols as int + msg = "Passing an integer for `usecols`" + with pytest.raises(ValueError, match=msg): + pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=3 + ) + + # usecols as int + with pytest.raises(ValueError, match=msg): + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=3, + ) + + def test_usecols_list(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df_ref = df_ref.reindex(columns=["B", "C"]) + df1 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=[0, 2, 3] + ) + df2 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=[0, 2, 3], + ) + + # TODO add index to xls file) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_usecols_str(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df1 = df_ref.reindex(columns=["A", "B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A:D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A:D", + ) + + # TODO add index to xls, read xls ignores index name ? + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C,D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C,D", + ) + # TODO add index to xls file + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C:D" + ) + df3 = pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C:D", + ) + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + @pytest.mark.parametrize( + "usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]] + ) + def test_usecols_diff_positional_int_columns_order( + self, request, read_ext, usecols, df_ref + ): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref[["A", "C"]] + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=usecols + ) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.parametrize("usecols", [["B", "D"], ["D", "B"]]) + def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_ref): + expected = df_ref[["B", "D"]] + expected.index = range(len(expected)) + + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_read_excel_without_slicing(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = df_ref[["C", "D"]] + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,D:E" + ) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str_invalid(self, read_ext): + msg = "Invalid column name: E1" + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols="D:E1") + + def test_index_col_label_error(self, read_ext): + msg = "list indices must be integers.*, not str" + + with pytest.raises(TypeError, match=msg): + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet1", + index_col=["A"], + usecols=["A", "C"], + ) + + def test_index_col_empty(self, read_ext): + # see gh-9208 + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet3", index_col=["A", "B", "C"] + ) + expected = DataFrame( + columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, codes=[[]] * 3, names=["A", "B", "C"]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index_col", [None, 2]) + def test_index_col_with_unnamed(self, read_ext, index_col): + # see gh-18792 + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet4", index_col=index_col + ) + expected = DataFrame( + [["i1", "a", "x"], ["i2", "b", "y"]], columns=["Unnamed: 0", "col1", "col2"] + ) + if index_col: + expected = expected.set_index(expected.columns[index_col]) + + tm.assert_frame_equal(result, expected) + + def test_usecols_pass_non_existent_column(self, read_ext): + msg = ( + "Usecols do not match columns, " + "columns expected but not found: " + r"\['E'\]" + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E"]) + + def test_usecols_wrong_type(self, read_ext): + msg = ( + "'usecols' must either be list-like of " + "all strings, all unicode, all integers or a callable." + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E1", 0]) + + def test_excel_stop_iterator(self, read_ext): + + parsed = pd.read_excel("test2" + read_ext, sheet_name="Sheet1") + expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_cell_error_na(self, request, read_ext): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + parsed = pd.read_excel("test3" + read_ext, sheet_name="Sheet1") + expected = DataFrame([[np.nan]], columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_table(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet2", skiprows=[1], index_col=0 + ) + # TODO add index to file + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + df3 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, skipfooter=1 + ) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_reader_special_dtypes(self, request, read_ext): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = DataFrame.from_dict( + { + "IntCol": [1, 2, -3, 4, 0], + "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005], + "BoolCol": [True, False, True, True, False], + "StrCol": [1, 2, 3, 4, 5], + # GH5394 - this is why convert_float isn't vectorized + "Str2Col": ["a", 3, "c", "d", "e"], + "DateCol": [ + datetime(2013, 10, 30), + datetime(2013, 10, 31), + datetime(1905, 1, 1), + datetime(2013, 12, 14), + datetime(2015, 3, 14), + ], + }, + ) + basename = "test_types" + + # should read in correctly and infer types + actual = pd.read_excel(basename + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + # if not coercing number, then int comes in as float + float_expected = expected.copy() + float_expected["IntCol"] = float_expected["IntCol"].astype(float) + float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", convert_float=False + ) + tm.assert_frame_equal(actual, float_expected) + + # check setting Index (assuming xls and xlsx are the same here) + for icol, name in enumerate(expected.columns): + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", index_col=icol + ) + exp = expected.set_index(name) + tm.assert_frame_equal(actual, exp) + + # convert_float and converters should be different but both accepted + expected["StrCol"] = expected["StrCol"].apply(str) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", converters={"StrCol": str} + ) + tm.assert_frame_equal(actual, expected) + + no_convert_float = float_expected.copy() + no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, + sheet_name="Sheet1", + convert_float=False, + converters={"StrCol": str}, + ) + tm.assert_frame_equal(actual, no_convert_float) + + # GH8212 - support for converters and missing values + def test_reader_converters(self, read_ext): + + basename = "test_converters" + + expected = DataFrame.from_dict( + { + "IntCol": [1, 2, -3, -1000, 0], + "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005], + "BoolCol": ["Found", "Found", "Found", "Not found", "Found"], + "StrCol": ["1", np.nan, "3", "4", "5"], + } + ) + + converters = { + "IntCol": lambda x: int(x) if x != "" else -1000, + "FloatCol": lambda x: 10 * x if x else np.nan, + 2: lambda x: "Found" if x != "" else "Not found", + 3: lambda x: str(x) if x else "", + } + + # should read in correctly and set types of single cells (not array + # dtypes) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", converters=converters + ) + tm.assert_frame_equal(actual, expected) + + def test_reader_dtype(self, read_ext): + # GH 8212 + basename = "testdtype" + actual = pd.read_excel(basename + read_ext) + + expected = DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ).reindex(columns=["a", "b", "c", "d"]) + + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + basename + read_ext, dtype={"a": "float64", "b": "float32", "c": str} + ) + + expected["a"] = expected["a"].astype("float64") + expected["b"] = expected["b"].astype("float32") + expected["c"] = ["001", "002", "003", "004"] + tm.assert_frame_equal(actual, expected) + + msg = "Unable to convert column d to type int64" + with pytest.raises(ValueError, match=msg): + pd.read_excel(basename + read_ext, dtype={"d": "int64"}) + + @pytest.mark.parametrize( + "dtype,expected", + [ + ( + None, + DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ), + ), + ( + {"a": "float64", "b": "float32", "c": str, "d": str}, + DataFrame( + { + "a": Series([1, 2, 3, 4], dtype="float64"), + "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"), + "c": ["001", "002", "003", "004"], + "d": ["1", "2", np.nan, "4"], + } + ), + ), + ], + ) + def test_reader_dtype_str(self, read_ext, dtype, expected): + # see gh-20377 + basename = "testdtype" + + actual = pd.read_excel(basename + read_ext, dtype=dtype) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) + def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value): + # GH#35211 + basename = "df_mangle_dup_col_dtypes" + dtype_dict = {"a": str, **dtypes} + dtype_dict_copy = dtype_dict.copy() + # GH#42462 + result = pd.read_excel(basename + read_ext, dtype=dtype_dict) + expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" + tm.assert_frame_equal(result, expected) + + def test_reader_spaces(self, read_ext): + # see gh-32207 + basename = "test_spaces" + + actual = pd.read_excel(basename + read_ext) + expected = DataFrame( + { + "testcol": [ + "this is great", + "4 spaces", + "1 trailing ", + " 1 leading", + "2 spaces multiple times", + ] + } + ) + tm.assert_frame_equal(actual, expected) + + # gh-36122, gh-35802 + @pytest.mark.parametrize( + "basename,expected", + [ + ("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})), + ("gh-36122", DataFrame(columns=["got 2nd sa"])), + ], + ) + def test_read_excel_ods_nested_xml(self, engine, read_ext, basename, expected): + # see gh-35802 + if engine != "odf": + pytest.skip(f"Skipped for engine: {engine}") + + actual = pd.read_excel(basename + read_ext) + tm.assert_frame_equal(actual, expected) + + def test_reading_all_sheets(self, read_ext): + # Test reading all sheet names by setting sheet_name to None, + # Ensure a dict is returned. + # See PR #9450 + basename = "test_multisheet" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + # ensure this is not alphabetical to test order preservation + expected_keys = ["Charlie", "Alpha", "Beta"] + tm.assert_contains_all(expected_keys, dfs.keys()) + # Issue 9930 + # Ensure sheet order is preserved + assert expected_keys == list(dfs.keys()) + + def test_reading_multiple_specific_sheets(self, read_ext): + # Test reading specific sheet names by specifying a mixed list + # of integers and strings, and confirm that duplicated sheet + # references (positions/names) are removed properly. + # Ensure a dict is returned + # See PR #9450 + basename = "test_multisheet" + # Explicitly request duplicates. Only the set should be returned. + expected_keys = [2, "Charlie", "Charlie"] + dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys) + expected_keys = list(set(expected_keys)) + tm.assert_contains_all(expected_keys, dfs.keys()) + assert len(expected_keys) == len(dfs.keys()) + + def test_reading_all_sheets_with_blank(self, read_ext): + # Test reading all sheet names by setting sheet_name to None, + # In the case where some sheets are blank. + # Issue #11711 + basename = "blank_with_header" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + expected_keys = ["Sheet1", "Sheet2", "Sheet3"] + tm.assert_contains_all(expected_keys, dfs.keys()) + + # GH6403 + def test_read_excel_blank(self, read_ext): + actual = pd.read_excel("blank" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, DataFrame()) + + def test_read_excel_blank_with_header(self, read_ext): + expected = DataFrame(columns=["col_1", "col_2"]) + actual = pd.read_excel("blank_with_header" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + @pytest.mark.filterwarnings("ignore:Cell A4 is marked:UserWarning:openpyxl") + def test_date_conversion_overflow(self, request, engine, read_ext): + # GH 10001 : pandas.ExcelFile ignore parse_dates=False + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + expected = DataFrame( + [ + [pd.Timestamp("2016-03-12"), "Marc Johnson"], + [pd.Timestamp("2016-03-16"), "Jack Black"], + [1e20, "Timothy Brown"], + ], + columns=["DateColWithBigInt", "StringCol"], + ) + + if engine == "openpyxl": + request.node.add_marker( + pytest.mark.xfail(reason="Maybe not supported by openpyxl") + ) + + if engine is None and read_ext in (".xlsx", ".xlsm"): + # GH 35029 + request.node.add_marker( + pytest.mark.xfail(reason="Defaults to openpyxl, maybe not supported") + ) + + result = pd.read_excel("testdateoverflow" + read_ext) + tm.assert_frame_equal(result, expected) + + def test_sheet_name(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + filename = "test1" + sheet_name = "Sheet1" + + df1 = pd.read_excel( + filename + read_ext, sheet_name=sheet_name, index_col=0 + ) # doc + df2 = pd.read_excel(filename + read_ext, index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_excel_read_buffer(self, read_ext): + + pth = "test1" + read_ext + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0) + with open(pth, "rb") as f: + actual = pd.read_excel(f, sheet_name="Sheet1", index_col=0) + tm.assert_frame_equal(expected, actual) + + def test_bad_engine_raises(self): + bad_engine = "foo" + with pytest.raises(ValueError, match="Unknown engine: foo"): + pd.read_excel("", engine=bad_engine) + + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel("blank" + read_ext, sheet_name=sheet_name) + + def test_missing_file_raises(self, read_ext): + bad_file = f"foo{read_ext}" + # CI tests with other languages, translates to "No such file or directory" + match = r"(No such file or directory|没有那个文件或目录|File o directory non esistente)" + with pytest.raises(FileNotFoundError, match=match): + pd.read_excel(bad_file) + + def test_corrupt_bytes_raises(self, engine): + bad_stream = b"foo" + if engine is None: + error = ValueError + msg = ( + "Excel file format cannot be determined, you must " + "specify an engine manually." + ) + elif engine == "xlrd": + from xlrd import XLRDError + + error = XLRDError + msg = ( + "Unsupported format, or corrupt file: Expected BOF " + "record; found b'foo'" + ) + else: + error = BadZipFile + msg = "File is not a zip file" + with pytest.raises(error, match=msg): + pd.read_excel(bad_stream) + + @pytest.mark.network + @tm.network( + url=( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + "pandas/tests/io/data/excel/test1.xlsx" + ), + check_before_test=True, + ) + def test_read_from_http_url(self, read_ext): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/main/" + "pandas/tests/io/data/excel/test1" + read_ext + ) + url_table = pd.read_excel(url) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @td.skip_if_not_us_locale + @pytest.mark.single_cpu + def test_read_from_s3_url(self, read_ext, s3_resource, s3so): + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + url = "s3://pandas-test/test1" + read_ext + + url_table = pd.read_excel(url, storage_options=s3so) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @pytest.mark.single_cpu + def test_read_from_s3_object(self, read_ext, s3_resource, s3so): + # GH 38788 + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + import s3fs + + s3 = s3fs.S3FileSystem(**s3so) + + with s3.open("s3://pandas-test/test1" + read_ext) as f: + url_table = pd.read_excel(f) + + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @pytest.mark.slow + def test_read_from_file_url(self, read_ext, datapath): + + # FILE + localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext) + local_table = pd.read_excel(localtable) + + try: + url_table = pd.read_excel("file://localhost/" + localtable) + except URLError: + # fails on some systems + import platform + + platform_info = " ".join(platform.uname()).strip() + pytest.skip(f"failing on {platform_info}") + + tm.assert_frame_equal(url_table, local_table) + + def test_read_from_pathlib_path(self, read_ext): + + # GH12655 + from pathlib import Path + + str_path = "test1" + read_ext + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) + + path_obj = Path("test1" + read_ext) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.skip_if_no("py.path") + @td.check_file_leaks + def test_read_from_py_localpath(self, read_ext): + + # GH12655 + from py.path import local as LocalPath + + str_path = os.path.join("test1" + read_ext) + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) + + path_obj = LocalPath().join("test1" + read_ext) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + str_path = os.path.join("test1" + read_ext) + with open(str_path, "rb") as f: + x = pd.read_excel(f, sheet_name="Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + + def test_reader_seconds(self, request, engine, read_ext): + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + # Test reading times with and without milliseconds. GH5945. + expected = DataFrame.from_dict( + { + "Time": [ + time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54), + ] + } + ) + + actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") + tm.assert_frame_equal(actual, expected) + + def test_read_excel_multiindex(self, request, read_ext): + # see gh-4679 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) + mi_file = "testmultiindex" + read_ext + + # "mi_column" sheet + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=mi, + ) + + actual = pd.read_excel( + mi_file, sheet_name="mi_column", header=[0, 1], index_col=0 + ) + tm.assert_frame_equal(actual, expected) + + # "mi_index" sheet + expected.index = mi + expected.columns = ["a", "b", "c", "d"] + + actual = pd.read_excel(mi_file, sheet_name="mi_index", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "both" sheet + expected.columns = mi + + actual = pd.read_excel( + mi_file, sheet_name="both", index_col=[0, 1], header=[0, 1] + ) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "mi_index_name" sheet + expected.columns = ["a", "b", "c", "d"] + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel(mi_file, sheet_name="mi_index_name", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # "mi_column_name" sheet + expected.index = list(range(4)) + expected.columns = mi.set_names(["c1", "c2"]) + actual = pd.read_excel( + mi_file, sheet_name="mi_column_name", header=[0, 1], index_col=0 + ) + tm.assert_frame_equal(actual, expected) + + # see gh-11317 + # "name_with_int" sheet + expected.columns = mi.set_levels([1, 2], level=1).set_names(["c1", "c2"]) + + actual = pd.read_excel( + mi_file, sheet_name="name_with_int", index_col=0, header=[0, 1] + ) + tm.assert_frame_equal(actual, expected) + + # "both_name" sheet + expected.columns = mi.set_names(["c1", "c2"]) + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel( + mi_file, sheet_name="both_name", index_col=[0, 1], header=[0, 1] + ) + tm.assert_frame_equal(actual, expected) + + # "both_skiprows" sheet + actual = pd.read_excel( + mi_file, + sheet_name="both_name_skiprows", + index_col=[0, 1], + header=[0, 1], + skiprows=2, + ) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "sheet_name,idx_lvl2", + [ + ("both_name_blank_after_mi_name", [np.nan, "b", "a", "b"]), + ("both_name_multiple_blanks", [np.nan] * 4), + ], + ) + def test_read_excel_multiindex_blank_after_name( + self, request, read_ext, sheet_name, idx_lvl2 + ): + # GH34673 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb (GH4679" + ) + ) + + mi_file = "testmultiindex" + read_ext + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"]) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=mi, + index=MultiIndex.from_arrays( + (["foo", "foo", "bar", "bar"], idx_lvl2), + names=["ilvl1", "ilvl2"], + ), + ) + result = pd.read_excel( + mi_file, + sheet_name=sheet_name, + index_col=[0, 1], + header=[0, 1], + ) + tm.assert_frame_equal(result, expected) + + def test_read_excel_multiindex_header_only(self, read_ext): + # see gh-11733. + # + # Don't try to parse a header name if there isn't one. + mi_file = "testmultiindex" + read_ext + result = pd.read_excel(mi_file, sheet_name="index_col_none", header=[0, 1]) + + exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")]) + expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns) + tm.assert_frame_equal(result, expected) + + def test_excel_old_index_format(self, read_ext): + # see gh-4679 + filename = "test_index_name_pre17" + read_ext + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array( + [ + [None, None, None, None, None], + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R1", "R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None], + ) + si = Index( + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None + ) + + expected = DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, sheet_name="single_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, sheet_name="multi_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. + data = np.array( + [ + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], + names=[None, None], + ) + si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None) + + expected = DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, sheet_name="single_no_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, sheet_name="multi_no_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + def test_read_excel_bool_header_arg(self, read_ext): + # GH 6114 + msg = "Passing a bool to header is invalid" + for arg in [True, False]: + with pytest.raises(TypeError, match=msg): + pd.read_excel("test1" + read_ext, header=arg) + + def test_read_excel_skiprows(self, request, read_ext): + # GH 4903 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + actual = pd.read_excel( + "testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2] + ) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=np.array([0, 2]), + ) + tm.assert_frame_equal(actual, expected) + + # GH36435 + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=lambda x: x in [0, 2], + ) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=3, + names=["a", "b", "c", "d"], + ) + expected = DataFrame( + [ + # [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_skiprows_callable_not_in(self, request, read_ext): + # GH 4903 + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + actual = pd.read_excel( + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=lambda x: x not in [1, 3, 5], + ) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + # [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + # [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows(self, read_ext): + # GH 16645 + num_rows_to_pull = 5 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + expected = pd.read_excel("test1" + read_ext) + expected = expected[:num_rows_to_pull] + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_greater_than_nrows_in_file(self, read_ext): + # GH 16645 + expected = pd.read_excel("test1" + read_ext) + num_records_in_file = len(expected) + num_rows_to_pull = num_records_in_file + 10 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_non_integer_parameter(self, read_ext): + # GH 16645 + msg = "'nrows' must be an integer >=0" + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, nrows="5") + + @pytest.mark.parametrize( + "filename,sheet_name,header,index_col,skiprows", + [ + ("testmultiindex", "mi_column", [0, 1], 0, None), + ("testmultiindex", "mi_index", None, [0, 1], None), + ("testmultiindex", "both", [0, 1], [0, 1], None), + ("testmultiindex", "mi_column_name", [0, 1], 0, None), + ("testskiprows", "skiprows_list", None, None, [0, 2]), + ("testskiprows", "skiprows_list", None, None, lambda x: x == 0 or x == 2), + ], + ) + def test_read_excel_nrows_params( + self, read_ext, filename, sheet_name, header, index_col, skiprows + ): + """ + For various parameters, we should get the same result whether we + limit the rows during load (nrows=3) or after (df.iloc[:3]). + """ + # GH 46894 + expected = pd.read_excel( + filename + read_ext, + sheet_name=sheet_name, + header=header, + index_col=index_col, + skiprows=skiprows, + ).iloc[:3] + actual = pd.read_excel( + filename + read_ext, + sheet_name=sheet_name, + header=header, + index_col=index_col, + skiprows=skiprows, + nrows=3, + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_squeeze(self, read_ext): + # GH 12157 + f = "test_squeeze" + read_ext + + with tm.assert_produces_warning( + FutureWarning, + match="The squeeze argument has been deprecated " + "and will be removed in a future version. " + 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', + ): + actual = pd.read_excel( + f, sheet_name="two_columns", index_col=0, squeeze=True + ) + expected = Series([2, 3, 4], [4, 5, 6], name="b") + expected.index.name = "a" + tm.assert_series_equal(actual, expected) + + actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) + expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) + expected = Series([1, 2, 3], name="a") + tm.assert_series_equal(actual, expected) + + def test_deprecated_kwargs(self, read_ext): + with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False): + pd.read_excel("test1" + read_ext, "Sheet1", 0) + + pd.read_excel("test1" + read_ext) + + def test_no_header_with_list_index_col(self, read_ext): + # GH 31783 + file_name = "testmultiindex" + read_ext + data = [("B", "B"), ("key", "val"), (3, 4), (3, 4)] + idx = MultiIndex.from_tuples( + [("A", "A"), ("key", "val"), (1, 2), (1, 2)], names=(0, 1) + ) + expected = DataFrame(data, index=idx, columns=(2, 3)) + result = pd.read_excel( + file_name, sheet_name="index_col_none", index_col=[0, 1], header=None + ) + tm.assert_frame_equal(expected, result) + + def test_one_col_noskip_blank_line(self, read_ext): + # GH 39808 + file_name = "one_col_blank_line" + read_ext + data = [0.5, np.nan, 1, 2] + expected = DataFrame(data, columns=["numbers"]) + result = pd.read_excel(file_name) + tm.assert_frame_equal(result, expected) + + def test_multiheader_two_blank_lines(self, read_ext): + # GH 40442 + file_name = "testmultiindex" + read_ext + columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")]) + data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]] + expected = DataFrame(data, columns=columns) + result = pd.read_excel( + file_name, sheet_name="mi_column_empty_rows", header=[0, 1] + ) + tm.assert_frame_equal(result, expected) + + def test_trailing_blanks(self, read_ext): + """ + Sheets can contain blank cells with no data. Some of our readers + were including those cells, creating many empty rows and columns + """ + file_name = "trailing_blanks" + read_ext + result = pd.read_excel(file_name) + assert result.shape == (3, 3) + + def test_ignore_chartsheets_by_str(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"): + pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1") + + def test_ignore_chartsheets_by_int(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises( + ValueError, match="Worksheet index 1 is invalid, 1 worksheets found" + ): + pd.read_excel("chartsheet" + read_ext, sheet_name=1) + + def test_euro_decimal_format(self, read_ext): + # copied from read_csv + result = pd.read_excel("test_decimal" + read_ext, decimal=",", skiprows=1) + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819], + [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872], + [3, 878.158, 108013.434, "GHI", "rez", 2.735694704], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) + + +class TestExcelFileRead: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for ExcelFile objects. + """ + func = partial(pd.ExcelFile, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "ExcelFile", func) + + def test_engine_used(self, read_ext, engine): + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with pd.ExcelFile("test1" + read_ext) as excel: + result = excel.engine + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + + def test_excel_passes_na(self, read_ext): + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + # 13967 + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["1.#QNAN"], [1], ["nan"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("na_filter", [None, True, False]) + def test_excel_passes_na_filter(self, read_ext, na_filter): + # gh-25453 + kwargs = {} + + if na_filter is not None: + kwargs["na_filter"] = na_filter + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, + sheet_name="Sheet1", + keep_default_na=True, + na_values=["apple"], + **kwargs, + ) + + if na_filter is False: + expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]] + else: + expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]] + + expected = DataFrame(expected, columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_table_sheet_by_index(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = pd.read_excel(excel, sheet_name=0, index_col=0) + df2 = pd.read_excel(excel, sheet_name=1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = excel.parse(0, index_col=0) + df2 = excel.parse(1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = pd.read_excel(excel, sheet_name=0, index_col=0, skipfooter=1) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = excel.parse(0, index_col=0, skipfooter=1) + + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_sheet_name(self, request, read_ext, df_ref): + if read_ext == ".xlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + filename = "test1" + sheet_name = "Sheet1" + + with pd.ExcelFile(filename + read_ext) as excel: + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc + + with pd.ExcelFile(filename + read_ext) as excel: + df2_parse = excel.parse(index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1_parse, df_ref, check_names=False) + tm.assert_frame_equal(df2_parse, df_ref, check_names=False) + + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + with pd.ExcelFile("blank" + read_ext) as excel: + excel.parse(sheet_name=sheet_name) + + def test_excel_read_buffer(self, engine, read_ext): + pth = "test1" + read_ext + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0, engine=engine) + + with open(pth, "rb") as f: + with pd.ExcelFile(f) as xls: + actual = pd.read_excel(xls, sheet_name="Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + def test_reader_closes_file(self, engine, read_ext): + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, sheet_name="Sheet1", index_col=0, engine=engine) + + assert f.closed + + def test_conflicting_excel_engines(self, read_ext): + # GH 26566 + msg = "Engine should not be specified when passing an ExcelFile" + + with pd.ExcelFile("test1" + read_ext) as xl: + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, engine="foo") + + def test_excel_read_binary(self, engine, read_ext): + # GH 15914 + expected = pd.read_excel("test1" + read_ext, engine=engine) + + with open("test1" + read_ext, "rb") as f: + data = f.read() + + actual = pd.read_excel(data, engine=engine) + tm.assert_frame_equal(expected, actual) + + def test_excel_read_binary_via_read_excel(self, read_ext, engine): + # GH 38424 + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + expected = pd.read_excel("test1" + read_ext, engine=engine) + tm.assert_frame_equal(result, expected) + + def test_read_excel_header_index_out_of_range(self, engine): + # GH#43143 + with open("df_header_oob.xlsx", "rb") as f: + with pytest.raises(ValueError, match="exceeds maximum"): + pd.read_excel(f, header=[0, 1]) + + @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"]) + def test_header_with_index_col(self, filename): + # GH 33476 + idx = Index(["Z"], name="I2") + cols = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"]) + expected = DataFrame([[1, 3]], index=idx, columns=cols, dtype="int64") + result = pd.read_excel( + filename, sheet_name="Sheet1", index_col=0, header=[0, 1] + ) + tm.assert_frame_equal(expected, result) + + def test_read_datetime_multiindex(self, request, engine, read_ext): + # GH 34748 + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="Sheets containing datetimes not supported by pyxlsb" + ) + ) + + f = "test_datetime_mi" + read_ext + with pd.ExcelFile(f) as excel: + actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine) + expected_column_index = MultiIndex.from_tuples( + [(pd.to_datetime("02/29/2020"), pd.to_datetime("03/01/2020"))], + names=[ + pd.to_datetime("02/29/2020").to_pydatetime(), + pd.to_datetime("03/01/2020").to_pydatetime(), + ], + ) + expected = DataFrame([], columns=expected_column_index) + + tm.assert_frame_equal(expected, actual) + + def test_engine_invalid_option(self, read_ext): + # read_ext includes the '.' hence the weird formatting + with pytest.raises(ValueError, match="Value must be one of *"): + with pd.option_context(f"io.excel{read_ext}.reader", "abc"): + pass + + def test_ignore_chartsheets(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pd.ExcelFile("chartsheet" + read_ext) as excel: + assert excel.sheet_names == ["Sheet1"] + + def test_corrupt_files_closed(self, engine, read_ext): + # GH41778 + errors = (BadZipFile,) + if engine is None: + pytest.skip(f"Invalid test for engine={engine}") + elif engine == "xlrd": + import xlrd + + errors = (BadZipFile, xlrd.biffh.XLRDError) + + with tm.ensure_clean(f"corrupt{read_ext}") as file: + Path(file).write_text("corrupt") + with tm.assert_produces_warning(False): + try: + pd.ExcelFile(file, engine=engine) + except errors: + pass diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py new file mode 100644 index 00000000..f26df440 --- /dev/null +++ b/pandas/tests/io/excel/test_style.py @@ -0,0 +1,300 @@ +import contextlib +import time + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + read_excel, +) +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter +from pandas.io.formats.excel import ExcelFormatter + +pytest.importorskip("jinja2") +# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel +# could compute styles and render to excel without jinja2, since there is no +# 'template' file, but this needs the import error to delayed until render time. + + +def assert_equal_cell_styles(cell1, cell2): + # TODO: should find a better way to check equality + assert cell1.alignment.__dict__ == cell2.alignment.__dict__ + assert cell1.border.__dict__ == cell2.border.__dict__ + assert cell1.fill.__dict__ == cell2.fill.__dict__ + assert cell1.font.__dict__ == cell2.font.__dict__ + assert cell1.number_format == cell2.number_format + assert cell1.protection.__dict__ == cell2.protection.__dict__ + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +def test_styler_to_excel_unstyled(engine): + # compare DataFrame.to_excel and Styler.to_excel when no styles applied + pytest.importorskip(engine) + df = DataFrame(np.random.randn(2, 2)) + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="dataframe") + df.style.to_excel(writer, sheet_name="unstyled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + + for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + assert cell1.value == cell2.value + assert_equal_cell_styles(cell1, cell2) + + +shared_style_params = [ + ( + "background-color: #111222", + ["fill", "fgColor", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ( + "color: #111222", + ["font", "color", "value"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("font-family: Arial;", ["font", "name"], "arial"), + ("font-weight: bold;", ["font", "b"], True), + ("font-style: italic;", ["font", "i"], True), + ("text-decoration: underline;", ["font", "u"], "single"), + ("number-format: $??,???.00;", ["number_format"], "$??,???.00"), + ("text-align: left;", ["alignment", "horizontal"], "left"), + ( + "vertical-align: bottom;", + ["alignment", "vertical"], + {"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails + ), + ("vertical-align: middle;", ["alignment", "vertical"], "center"), + # Border widths + ("border-left: 2pt solid red", ["border", "left", "style"], "medium"), + ("border-left: 1pt dotted red", ["border", "left", "style"], "dotted"), + ("border-left: 2pt dotted red", ["border", "left", "style"], "mediumDashDotDot"), + ("border-left: 1pt dashed red", ["border", "left", "style"], "dashed"), + ("border-left: 2pt dashed red", ["border", "left", "style"], "mediumDashed"), + ("border-left: 1pt solid red", ["border", "left", "style"], "thin"), + ("border-left: 3pt solid red", ["border", "left", "style"], "thick"), + # Border expansion + ( + "border-left: 2pt solid #111222", + ["border", "left", "color", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("border: 1pt solid red", ["border", "top", "style"], "thin"), + ( + "border: 1pt solid #111222", + ["border", "top", "color", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("border: 1pt solid red", ["border", "right", "style"], "thin"), + ( + "border: 1pt solid #111222", + ["border", "right", "color", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("border: 1pt solid red", ["border", "bottom", "style"], "thin"), + ( + "border: 1pt solid #111222", + ["border", "bottom", "color", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + ("border: 1pt solid red", ["border", "left", "style"], "thin"), + ( + "border: 1pt solid #111222", + ["border", "left", "color", "rgb"], + {"xlsxwriter": "FF111222", "openpyxl": "00111222"}, + ), + # Border styles + ( + "border-left-style: hair; border-left-color: black", + ["border", "left", "style"], + "hair", + ), +] + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +@pytest.mark.parametrize("css, attrs, expected", shared_style_params) +def test_styler_to_excel_basic(engine, css, attrs, expected): + pytest.importorskip(engine) + df = DataFrame(np.random.randn(1, 1)) + styler = df.style.applymap(lambda x: css) + + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="dataframe") + styler.to_excel(writer, sheet_name="styled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + + # test unstyled data cell does not have expected styles + # test styled cell has expected styles + u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2) + for attr in attrs: + u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr) + + if isinstance(expected, dict): + assert u_cell is None or u_cell != expected[engine] + assert s_cell == expected[engine] + else: + assert u_cell is None or u_cell != expected + assert s_cell == expected + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +@pytest.mark.parametrize("css, attrs, expected", shared_style_params) +def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): + pytest.importorskip(engine) + df = DataFrame(np.random.randn(1, 1)) + + styler = df.style + styler.applymap_index(lambda x: css, axis=0) + styler.applymap_index(lambda x: css, axis=1) + + null_styler = df.style + null_styler.applymap(lambda x: "null: css;") + null_styler.applymap_index(lambda x: "null: css;", axis=0) + null_styler.applymap_index(lambda x: "null: css;", axis=1) + + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + null_styler.to_excel(writer, sheet_name="null_styled") + styler.to_excel(writer, sheet_name="styled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + + # test null styled index cells does not have expected styles + # test styled cell has expected styles + ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1) + uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2) + for attr in attrs: + ui_cell, si_cell = getattr(ui_cell, attr, None), getattr(si_cell, attr) + uc_cell, sc_cell = getattr(uc_cell, attr, None), getattr(sc_cell, attr) + + if isinstance(expected, dict): + assert ui_cell is None or ui_cell != expected[engine] + assert si_cell == expected[engine] + assert uc_cell is None or uc_cell != expected[engine] + assert sc_cell == expected[engine] + else: + assert ui_cell is None or ui_cell != expected + assert si_cell == expected + assert uc_cell is None or uc_cell != expected + assert sc_cell == expected + + +# From https://openpyxl.readthedocs.io/en/stable/api/openpyxl.styles.borders.html +# Note: Leaving behavior of "width"-type styles undefined; user should use border-width +# instead +excel_border_styles = [ + # "thin", + "dashed", + "mediumDashDot", + "dashDotDot", + "hair", + "dotted", + "mediumDashDotDot", + # "medium", + "double", + "dashDot", + "slantDashDot", + # "thick", + "mediumDashed", +] + + +@pytest.mark.parametrize( + "engine", + ["xlsxwriter", "openpyxl"], +) +@pytest.mark.parametrize("border_style", excel_border_styles) +def test_styler_to_excel_border_style(engine, border_style): + css = f"border-left: {border_style} black thin" + attrs = ["border", "left", "style"] + expected = border_style + + pytest.importorskip(engine) + df = DataFrame(np.random.randn(1, 1)) + styler = df.style.applymap(lambda x: css) + + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="dataframe") + styler.to_excel(writer, sheet_name="styled") + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + + # test unstyled data cell does not have expected styles + # test styled cell has expected styles + u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2) + for attr in attrs: + u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr) + + if isinstance(expected, dict): + assert u_cell is None or u_cell != expected[engine] + assert s_cell == expected[engine] + else: + assert u_cell is None or u_cell != expected + assert s_cell == expected + + +def test_styler_custom_converter(): + openpyxl = pytest.importorskip("openpyxl") + + def custom_converter(css): + return {"font": {"color": {"rgb": "111222"}}} + + df = DataFrame(np.random.randn(1, 1)) + styler = df.style.applymap(lambda x: "color: #888999") + with tm.ensure_clean(".xlsx") as path: + with ExcelWriter(path, engine="openpyxl") as writer: + ExcelFormatter(styler, style_converter=custom_converter).write( + writer, sheet_name="custom" + ) + + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + assert wb["custom"].cell(2, 2).font.color.value == "00111222" + + +@pytest.mark.single_cpu +@td.skip_if_not_us_locale +def test_styler_to_s3(s3_resource, s3so): + # GH#46381 + + mock_bucket_name, target_file = "pandas-test", "test.xlsx" + df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) + styler = df.style.set_sticky(axis="index") + styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so) + timeout = 5 + while True: + if target_file in ( + obj.key for obj in s3_resource.Bucket("pandas-test").objects.all() + ): + break + time.sleep(0.1) + timeout -= 0.1 + assert timeout > 0, "Timed out waiting for file to appear on moto" + result = read_excel( + f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so + ) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py new file mode 100644 index 00000000..f6a77d3a --- /dev/null +++ b/pandas/tests/io/excel/test_writers.py @@ -0,0 +1,1411 @@ +from datetime import ( + date, + datetime, + timedelta, +) +from functools import partial +from io import BytesIO +import os +import re + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + option_context, +) +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + _OpenpyxlWriter, + _XlsxWriter, + _XlwtWriter, + register_writer, +) + + +@pytest.fixture +def path(ext): + """ + Fixture to open file for use in each test case. + """ + with tm.ensure_clean(ext) as file_path: + yield file_path + + +@pytest.fixture +def set_engine(engine, ext): + """ + Fixture to set engine for use in each test case. + + Rather than requiring `engine=...` to be provided explicitly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + """ + option_name = f"io.excel.{ext.strip('.')}.writer" + with option_context(option_name, engine): + yield + + +@pytest.mark.parametrize( + "ext", + [ + pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), + pytest.param( + ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")] + ), + pytest.param(".ods", marks=td.skip_if_no("odf")), + ], +) +class TestRoundTrip: + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], + ) + def test_read_one_empty_col_no_header(self, ext, header, expected): + # xref gh-12292 + filename = "no_header" + df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, filename, index=False, header=False) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))], + ) + def test_read_one_empty_col_with_header(self, ext, header, expected): + filename = "with_header" + df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, "with_header", index=False, header=True) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) + + tm.assert_frame_equal(result, expected) + + def test_set_column_names_in_parameter(self, ext): + # GH 12870 : pass down column names associated with + # keyword argument names + refdf = DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"]) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as writer: + refdf.to_excel(writer, "Data_no_head", header=False, index=False) + refdf.to_excel(writer, "Data_with_head", index=False) + + refdf.columns = ["A", "B"] + + with ExcelFile(pth) as reader: + xlsdf_no_head = pd.read_excel( + reader, sheet_name="Data_no_head", header=None, names=["A", "B"] + ) + xlsdf_with_head = pd.read_excel( + reader, + sheet_name="Data_with_head", + index_col=None, + names=["A", "B"], + ) + + tm.assert_frame_equal(xlsdf_no_head, refdf) + tm.assert_frame_equal(xlsdf_with_head, refdf) + + def test_creating_and_reading_multiple_sheets(self, ext): + # see gh-9450 + # + # Test reading multiple sheets, from a runtime + # created Excel file with multiple sheets. + def tdf(col_sheet_name): + d, i = [11, 22, 33], [1, 2, 3] + return DataFrame(d, i, columns=[col_sheet_name]) + + sheets = ["AAA", "BBB", "CCC"] + + dfs = [tdf(s) for s in sheets] + dfs = dict(zip(sheets, dfs)) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as ew: + for sheetname, df in dfs.items(): + df.to_excel(ew, sheetname) + + dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0) + + for s in sheets: + tm.assert_frame_equal(dfs[s], dfs_returned[s]) + + def test_read_excel_multiindex_empty_level(self, ext): + # see gh-12453 + with tm.ensure_clean(ext) as path: + df = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", ""): {0: 0}, + } + ) + + expected = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", "Unnamed: 4_level_1"): {0: 0}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + { + ("Beg", ""): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + expected = DataFrame( + { + ("Beg", "Unnamed: 1_level_1"): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize("c_idx_names", [True, False]) + @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_levels", [1, 3]) + @pytest.mark.parametrize("r_idx_levels", [1, 3]) + def test_excel_multindex_roundtrip( + self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels, request + ): + # see gh-4679 + with tm.ensure_clean(ext) as pth: + if (c_idx_levels == 1 and c_idx_names) and not ( + r_idx_levels == 3 and not r_idx_names + ): + mark = pytest.mark.xfail( + reason="Column index name cannot be serialized unless " + "it's a MultiIndex" + ) + request.node.add_marker(mark) + + # Empty name case current read in as + # unnamed levels, not Nones. + check_names = r_idx_names or r_idx_levels <= 1 + + df = tm.makeCustomDataframe( + 5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels + ) + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[0, :] = np.nan + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + def test_read_excel_parse_dates(self, ext): + # see gh-11544, gh-12051 + df = DataFrame( + {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)} + ) + df2 = df.copy() + df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") + + with tm.ensure_clean(ext) as pth: + df2.to_excel(pth) + + res = pd.read_excel(pth, index_col=0) + tm.assert_frame_equal(df2, res) + + res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0) + tm.assert_frame_equal(df, res) + + date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") + res = pd.read_excel( + pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0 + ) + tm.assert_frame_equal(df, res) + + def test_multiindex_interval_datetimes(self, ext): + # GH 30986 + midx = MultiIndex.from_arrays( + [ + range(4), + pd.interval_range( + start=pd.Timestamp("2020-01-01"), periods=4, freq="6M" + ), + ] + ) + df = DataFrame(range(4), index=midx) + with tm.ensure_clean(ext) as pth: + df.to_excel(pth) + result = pd.read_excel(pth, index_col=[0, 1]) + expected = DataFrame( + range(4), + MultiIndex.from_arrays( + [ + range(4), + [ + "(2020-01-31, 2020-07-31]", + "(2020-07-31, 2021-01-31]", + "(2021-01-31, 2021-07-31]", + "(2021-07-31, 2022-01-31]", + ], + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "engine,ext", + [ + pytest.param( + "openpyxl", + ".xlsx", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "openpyxl", + ".xlsm", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")] + ), + pytest.param( + "xlsxwriter", + ".xlsx", + marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")], + ), + pytest.param("odf", ".ods", marks=td.skip_if_no("odf")), + ], +) +@pytest.mark.usefixtures("set_engine") +class TestExcelWriter: + def test_excel_sheet_size(self, path): + + # GH 26080 + breaking_row_count = 2**20 + 1 + breaking_col_count = 2**14 + 1 + # purposely using two arrays to prevent memory issues while testing + row_arr = np.zeros(shape=(breaking_row_count, 1)) + col_arr = np.zeros(shape=(1, breaking_col_count)) + row_df = DataFrame(row_arr) + col_df = DataFrame(col_arr) + + msg = "sheet is too large" + with pytest.raises(ValueError, match=msg): + row_df.to_excel(path) + + with pytest.raises(ValueError, match=msg): + col_df.to_excel(path) + + def test_excel_sheet_by_name_raise(self, path): + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(path) + + with ExcelFile(path) as xl: + df = pd.read_excel(xl, sheet_name=0, index_col=0) + + tm.assert_frame_equal(gt, df) + + msg = "Worksheet named '0' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, "0") + + def test_excel_writer_context_manager(self, frame, path): + with ExcelWriter(path) as writer: + frame.to_excel(writer, "Data1") + frame2 = frame.copy() + frame2.columns = frame.columns[::-1] + frame2.to_excel(writer, "Data2") + + with ExcelFile(path) as reader: + found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0) + found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0) + + tm.assert_frame_equal(found_df, frame) + tm.assert_frame_equal(found_df2, frame2) + + def test_roundtrip(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test roundtrip + frame.to_excel(path, "test1") + recons = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", index=False) + recons = pd.read_excel(path, sheet_name="test1", index_col=None) + recons.index = frame.index + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="NA") + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"]) + tm.assert_frame_equal(frame, recons) + + # GH 3611 + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"]) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel( + path, sheet_name="test1", index_col=0, na_values=[88, 88.0] + ) + tm.assert_frame_equal(frame, recons) + + # GH 6573 + frame.to_excel(path, "Sheet1") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "0") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = frame["A"] + s.to_excel(path) + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) + + def test_mixed(self, frame, path): + mixed_frame = frame.copy() + mixed_frame["foo"] = "bar" + + mixed_frame.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(mixed_frame, recons) + + def test_ts_frame(self, tsframe, path): + df = tsframe + + # freq doesn't round-trip + index = pd.DatetimeIndex(np.asarray(df.index), freq=None) + df.index = index + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(df, recons) + + def test_basics_with_nan(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, np_type, path): + # Test np.int values read come back as int + # (rather than float which is Excel's format). + df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + int_frame = df.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + + recons2 = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(int_frame, recons2) + + # Test with convert_float=False comes back as float. + float_frame = df.astype(float) + float_frame.columns = float_frame.columns.astype(float) + float_frame.index = float_frame.index.astype(float) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + recons = pd.read_excel( + path, sheet_name="test1", convert_float=False, index_col=0 + ) + tm.assert_frame_equal(recons, float_frame) + + @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) + def test_float_types(self, np_type, path): + # Test np.float values read come back as float. + df = DataFrame(np.random.random_sample(10), dtype=np_type) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np_type + ) + + tm.assert_frame_equal(df, recons) + + def test_bool_types(self, path): + # Test np.bool_ values read come back as float. + df = DataFrame([1, 0, True, False], dtype=np.bool_) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.bool_ + ) + + tm.assert_frame_equal(df, recons) + + def test_inf_roundtrip(self, path): + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(df, recons) + + def test_sheets(self, frame, tsframe, path): + + # freq doesn't round-trip + index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) + tsframe.index = index + + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # Test writing to separate sheets + with ExcelWriter(path) as writer: + frame.to_excel(writer, "test1") + tsframe.to_excel(writer, "test2") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(frame, recons) + recons = pd.read_excel(reader, sheet_name="test2", index_col=0) + tm.assert_frame_equal(tsframe, recons) + assert 2 == len(reader.sheet_names) + assert "test1" == reader.sheet_names[0] + assert "test2" == reader.sheet_names[1] + + def test_colaliases(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + frame.to_excel(path, "test1", header=col_aliases) + with ExcelFile(path) as reader: + rs = pd.read_excel(reader, sheet_name="test1", index_col=0) + xp = frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + def test_roundtrip_indexlabels(self, merge_cells, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test index_label + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel( + path, + "test1", + index_label=["test", "dummy", "dummy2"], + merge_cells=merge_cells, + ) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( + np.int64 + ) + df.index.names = ["test"] + tm.assert_frame_equal(df, recons.astype(bool)) + + frame.to_excel( + path, + "test1", + columns=["A", "B", "C", "D"], + index=False, + merge_cells=merge_cells, + ) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = frame.copy() + df = df.set_index(["A", "B"]) + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + tm.assert_frame_equal(df, recons) + + def test_excel_roundtrip_indexname(self, merge_cells, path): + df = DataFrame(np.random.randn(10, 4)) + df.index.name = "foo" + + df.to_excel(path, merge_cells=merge_cells) + + with ExcelFile(path) as xf: + result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0) + + tm.assert_frame_equal(result, df) + assert result.index.name == "foo" + + def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): + # datetime.date, not sure what to test here exactly + + # freq does not round-trip + index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) + tsframe.index = index + + tsf = tsframe.copy() + + tsf.index = [x.date() for x in tsframe.index] + tsf.to_excel(path, "test1", merge_cells=merge_cells) + + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(tsframe, recons) + + def test_excel_date_datetime_format(self, ext, path): + # see gh-4133 + # + # Excel output format strings + df = DataFrame( + [ + [date(2014, 1, 31), date(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + df_expected = DataFrame( + [ + [datetime(2014, 1, 31), datetime(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + + with tm.ensure_clean(ext) as filename2: + with ExcelWriter(path) as writer1: + df.to_excel(writer1, "test1") + + with ExcelWriter( + filename2, + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer2: + df.to_excel(writer2, "test1") + + with ExcelFile(path) as reader1: + rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0) + + with ExcelFile(filename2) as reader2: + rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) + + tm.assert_frame_equal(rs1, rs2) + + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. + tm.assert_frame_equal(rs2, df_expected) + + def test_to_excel_interval_no_labels(self, path): + # see gh-19242 + # + # Test writing Interval without labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + + df["new"] = pd.cut(df[0], 10) + expected["new"] = pd.cut(expected[0], 10).astype(str) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_interval_labels(self, path): + # see gh-19242 + # + # Test writing Interval with labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + intervals = pd.cut( + df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] + ) + df["new"] = intervals + expected["new"] = pd.Series(list(intervals)) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_timedelta(self, path): + # see gh-19242, gh-9155 + # + # Test writing timedelta to xls. + df = DataFrame( + np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64 + ) + expected = df.copy() + + df["new"] = df["A"].apply(lambda x: timedelta(seconds=x)) + expected["new"] = expected["A"].apply( + lambda x: timedelta(seconds=x).total_seconds() / 86400 + ) + + df.to_excel(path, "test1") + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_periodindex(self, tsframe, path): + xp = tsframe.resample("M", kind="period").mean() + + xp.to_excel(path, "sht1") + + with ExcelFile(path) as reader: + rs = pd.read_excel(reader, sheet_name="sht1", index_col=0) + tm.assert_frame_equal(xp, rs.to_period("M")) + + def test_to_excel_multiindex(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", columns=["A", "B"]) + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + + # GH13511 + def test_to_excel_multiindex_nan_label(self, merge_cells, path): + df = DataFrame({"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)}) + df = df.set_index(["A", "B"]) + + df.to_excel(path, merge_cells=merge_cells) + df1 = pd.read_excel(path, index_col=[0, 1]) + tm.assert_frame_equal(df, df1) + + # Test for Issue 11328. If column indices are integers, make + # sure they are handled correctly for either setting of + # merge_cells + def test_to_excel_multiindex_cols(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) + frame.columns = new_cols_index + header = [0, 1] + if not merge_cells: + header = 0 + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + df = pd.read_excel( + reader, sheet_name="test1", header=header, index_col=[0, 1] + ) + if not merge_cells: + fm = frame.columns.format(sparsify=False, adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) + + def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): + # try multiindex with dates + new_index = [tsframe.index, np.arange(len(tsframe.index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.index.names = ["time", "foo"] + tsframe.to_excel(path, "test1", merge_cells=merge_cells) + with ExcelFile(path) as reader: + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) + + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ("time", "foo") + + def test_to_excel_multiindex_no_write_index(self, path): + # Test writing and re-reading a MI without the index. GH 5616. + + # Initial non-MI frame. + frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]}) + + # Add a MI. + frame2 = frame1.copy() + multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) + frame2.index = multi_index + + # Write out to Excel without the index. + frame2.to_excel(path, "test1", index=False) + + # Read it back in. + with ExcelFile(path) as reader: + frame3 = pd.read_excel(reader, sheet_name="test1") + + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) + + def test_to_excel_empty_multiindex(self, path): + # GH 19543. + expected = DataFrame([], columns=[0, 1, 2]) + + df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2]) + df.to_excel(path, "test1") + + with ExcelFile(path) as reader: + result = pd.read_excel(reader, sheet_name="test1") + tm.assert_frame_equal( + result, expected, check_index_type=False, check_dtype=False + ) + + def test_to_excel_float_format(self, path): + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(path, "test1", float_format="%.2f") + + with ExcelFile(path) as reader: + result = pd.read_excel(reader, sheet_name="test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + def test_to_excel_output_encoding(self, ext): + # Avoid mixed inferred_type. + df = DataFrame( + [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]], + index=["A\u0192", "B"], + columns=["X\u0193", "Y", "Z"], + ) + + with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: + df.to_excel(filename, sheet_name="TestSheet") + result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0) + tm.assert_frame_equal(result, df) + + def test_to_excel_unicode_filename(self, ext): + with tm.ensure_clean("\u0192u." + ext) as filename: + try: + f = open(filename, "wb") + except UnicodeEncodeError: + pytest.skip("No unicode file names on this system") + finally: + f.close() + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(filename, "test1", float_format="%.2f") + + with ExcelFile(filename) as reader: + result = pd.read_excel(reader, sheet_name="test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("use_headers", [True, False]) + @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) + @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) + def test_excel_010_hemstring( + self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path + ): + def roundtrip(data, header=True, parser_hdr=0, index=True): + data.to_excel(path, header=header, merge_cells=merge_cells, index=index) + + with ExcelFile(path) as xf: + return pd.read_excel( + xf, sheet_name=xf.sheet_names[0], header=parser_hdr + ) + + # Basic test. + parser_header = 0 if use_headers else None + res = roundtrip(DataFrame([0]), use_headers, parser_header) + + assert res.shape == (1, 2) + assert res.iloc[0, 0] is not np.nan + + # More complex tests with multi-index. + nrows = 5 + ncols = 3 + + # ensure limited functionality in 0.10 + # override of gh-2370 until sorted out in 0.11 + + df = tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels + ) + + # This if will be removed once multi-column Excel writing + # is implemented. For now fixing gh-9794. + if c_idx_nlevels > 1: + msg = ( + "Writing to Excel with MultiIndex columns and no index " + "\\('index'=False\\) is not yet implemented." + ) + with pytest.raises(NotImplementedError, match=msg): + roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) + + if use_headers: + assert res.shape == (nrows, ncols + r_idx_nlevels) + else: + # First row taken as columns. + assert res.shape == (nrows - 1, ncols + r_idx_nlevels) + + # No NaNs. + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan + + def test_duplicated_columns(self, path): + # see gh-5235 + df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"]) + df.to_excel(path, "test1") + expected = DataFrame( + [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"] + ) + + # By default, we mangle. + result = pd.read_excel(path, sheet_name="test1", index_col=0) + tm.assert_frame_equal(result, expected) + + # Explicitly, we pass in the parameter. + with tm.assert_produces_warning( + FutureWarning, match="the 'mangle_dupe_cols' keyword is deprecated" + ): + result = pd.read_excel( + path, sheet_name="test1", index_col=0, mangle_dupe_cols=True + ) + tm.assert_frame_equal(result, expected) + + # see gh-11007, gh-10970 + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) + df.to_excel(path, "test1") + + result = pd.read_excel(path, sheet_name="test1", index_col=0) + expected = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"] + ) + tm.assert_frame_equal(result, expected) + + # see gh-10982 + df.to_excel(path, "test1", index=False, header=False) + result = pd.read_excel(path, sheet_name="test1", header=None) + + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + tm.assert_frame_equal(result, expected) + + msg = "Setting mangle_dupe_cols=False is not supported yet" + with tm.assert_produces_warning( + FutureWarning, match="the 'mangle_dupe_cols' keyword is deprecated" + ): + with pytest.raises(ValueError, match=msg): + pd.read_excel( + path, sheet_name="test1", header=None, mangle_dupe_cols=False + ) + + def test_swapped_columns(self, path): + # Test for issue #5427. + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + write_frame.to_excel(path, "test1", columns=["B", "A"]) + + read_frame = pd.read_excel(path, sheet_name="test1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + tm.assert_series_equal(write_frame["B"], read_frame["B"]) + + def test_invalid_columns(self, path): + # see gh-10982 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + + with pytest.raises(KeyError, match="Not all names specified"): + write_frame.to_excel(path, "test1", columns=["B", "C"]) + + with pytest.raises( + KeyError, match="'passes columns are not ALL present dataframe'" + ): + write_frame.to_excel(path, "test1", columns=["C", "D"]) + + @pytest.mark.parametrize( + "to_excel_index,read_excel_index_col", + [ + (True, 0), # Include index in write to file + (False, None), # Dont include index in write to file + ], + ) + def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col): + # GH 31677 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]}) + write_frame.to_excel( + path, "col_subset_bug", columns=["A", "B"], index=to_excel_index + ) + + expected = write_frame[["A", "B"]] + read_frame = pd.read_excel( + path, sheet_name="col_subset_bug", index_col=read_excel_index_col + ) + + tm.assert_frame_equal(expected, read_frame) + + def test_comment_arg(self, path): + # see gh-18735 + # + # Test the comment argument functionality to pd.read_excel. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file without comment arg. + result1 = pd.read_excel(path, sheet_name="test_c", index_col=0) + + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + + result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) + tm.assert_frame_equal(result1, result2) + + def test_comment_default(self, path): + # Re issue #18735 + # Test the comment argument default to pd.read_excel + + # Create file to read in + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file with default and explicit comment=None + result1 = pd.read_excel(path, sheet_name="test_c") + result2 = pd.read_excel(path, sheet_name="test_c", comment=None) + tm.assert_frame_equal(result1, result2) + + def test_comment_used(self, path): + # see gh-18735 + # + # Test the comment argument is working as expected when used. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Test read_frame_comment against manually produced expected output. + expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) + result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) + tm.assert_frame_equal(result, expected) + + def test_comment_empty_line(self, path): + # Re issue #18735 + # Test that pd.read_excel ignores commented lines at the end of file + + df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]}) + df.to_excel(path, index=False) + + # Test that all-comment lines at EoF are ignored + expected = DataFrame({"a": [1], "b": [2]}) + result = pd.read_excel(path, comment="#") + tm.assert_frame_equal(result, expected) + + def test_datetimes(self, path): + # Test writing and reading datetimes. For issue #9139. (xref #9185) + datetimes = [ + datetime(2013, 1, 13, 1, 2, 3), + datetime(2013, 1, 13, 2, 45, 56), + datetime(2013, 1, 13, 4, 29, 49), + datetime(2013, 1, 13, 6, 13, 42), + datetime(2013, 1, 13, 7, 57, 35), + datetime(2013, 1, 13, 9, 41, 28), + datetime(2013, 1, 13, 11, 25, 21), + datetime(2013, 1, 13, 13, 9, 14), + datetime(2013, 1, 13, 14, 53, 7), + datetime(2013, 1, 13, 16, 37, 0), + datetime(2013, 1, 13, 18, 20, 52), + ] + + write_frame = DataFrame({"A": datetimes}) + write_frame.to_excel(path, "Sheet1") + read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + + def test_bytes_io(self, engine): + # see gh-7074 + with BytesIO() as bio: + df = DataFrame(np.random.randn(10, 2)) + + # Pass engine explicitly, as there is no file path to infer from. + with ExcelWriter(bio, engine=engine) as writer: + df.to_excel(writer) + + bio.seek(0) + reread_df = pd.read_excel(bio, index_col=0) + tm.assert_frame_equal(df, reread_df) + + def test_write_lists_dict(self, path): + # see gh-8188. + df = DataFrame( + { + "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}], + "numeric": [1, 2, 3.0], + "str": ["apple", "banana", "cherry"], + } + ) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0) + + expected = df.copy() + expected.mixed = expected.mixed.apply(str) + expected.numeric = expected.numeric.astype("int64") + + tm.assert_frame_equal(read, expected) + + def test_render_as_column_name(self, path): + # see gh-34331 + df = DataFrame({"render": [1, 2], "data": [3, 4]}) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, "Sheet1", index_col=0) + expected = df + tm.assert_frame_equal(read, expected) + + def test_true_and_false_value_options(self, path): + # see gh-13347 + df = DataFrame([["foo", "bar"]], columns=["col1", "col2"]) + expected = df.replace({"foo": True, "bar": False}) + + df.to_excel(path) + read_frame = pd.read_excel( + path, true_values=["foo"], false_values=["bar"], index_col=0 + ) + tm.assert_frame_equal(read_frame, expected) + + def test_freeze_panes(self, path): + # see gh-15160 + expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) + + result = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(result, expected) + + def test_path_path_lib(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}") + tm.assert_frame_equal(result, df) + + def test_path_local_path(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}") + tm.assert_frame_equal(result, df) + + def test_merged_cell_custom_objects(self, path): + # see GH-27006 + mi = MultiIndex.from_tuples( + [ + (pd.Period("2018"), pd.Period("2018Q1")), + (pd.Period("2018"), pd.Period("2018Q2")), + ] + ) + expected = DataFrame(np.ones((2, 2)), columns=mi) + expected.to_excel(path) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + result = pd.read_excel( + path, header=[0, 1], index_col=0, convert_float=False + ) + # need to convert PeriodIndexes to standard Indexes for assert equal + expected.columns = expected.columns.set_levels( + [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]], + level=[0, 1], + ) + expected.index = expected.index.astype(np.float64) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): + # GH 27008, GH 7056 + tz = tz_aware_fixture + data = pd.Timestamp("2019", tz=tz) + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + data = data.to_pydatetime() + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + def test_excel_duplicate_columns_with_names(self, path): + # GH#39695 + df = DataFrame({"A": [0, 1], "B": [10, 11]}) + df.to_excel(path, columns=["A", "B", "A"], index=False) + + result = pd.read_excel(path) + expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) + tm.assert_frame_equal(result, expected) + + def test_if_sheet_exists_raises(self, ext): + # GH 40230 + msg = "if_sheet_exists is only valid in append mode (mode='a')" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + ExcelWriter(f, if_sheet_exists="replace") + + def test_excel_writer_empty_frame(self, engine, ext): + # GH#45793 + with tm.ensure_clean(ext) as path: + with ExcelWriter(path, engine=engine) as writer: + DataFrame().to_excel(writer) + result = pd.read_excel(path) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + def test_to_excel_empty_frame(self, engine, ext): + # GH#45793 + with tm.ensure_clean(ext) as path: + DataFrame().to_excel(path, engine=engine) + result = pd.read_excel(path) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("attr", ["cur_sheet", "handles", "path"]) + def test_deprecated_attr(self, engine, ext, attr): + # GH#45572 + with tm.ensure_clean(ext) as path: + with ExcelWriter(path) as writer: + msg = f"{attr} is not part of the public API" + with tm.assert_produces_warning(FutureWarning, match=msg): + getattr(writer, attr) + # Some engines raise if nothing is written + DataFrame().to_excel(writer) + + @pytest.mark.filterwarnings("ignore:Calling close():UserWarning:xlsxwriter") + @pytest.mark.parametrize( + "attr, args", [("save", ()), ("write_cells", ([], "test"))] + ) + def test_deprecated_method(self, engine, ext, attr, args): + # GH#45572 + with tm.ensure_clean(ext) as path: + with ExcelWriter(path) as writer: + msg = f"{attr} is not part of the public API" + # Some engines raise if nothing is written + DataFrame().to_excel(writer) + with tm.assert_produces_warning(FutureWarning, match=msg): + getattr(writer, attr)(*args) + + def test_deprecated_book_setter(self, engine, ext): + # GH#48780 + with tm.ensure_clean(ext) as path: + with ExcelWriter(path) as writer: + msg = "Setting the `book` attribute is not part of the public API" + # Some engines raise if nothing is written + DataFrame().to_excel(writer) + book = writer.book + with tm.assert_produces_warning(FutureWarning, match=msg): + writer.book = book + + +class TestExcelWriterEngineTests: + @pytest.mark.parametrize( + "klass,ext", + [ + pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")), + ], + ) + def test_ExcelWriter_dispatch(self, klass, ext): + with tm.ensure_clean(ext) as path: + with ExcelWriter(path) as writer: + if ext == ".xlsx" and td.safe_import("xlsxwriter"): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) + + def test_ExcelWriter_dispatch_raises(self): + with pytest.raises(ValueError, match="No engine"): + ExcelWriter("nothing") + + def test_register_writer(self): + class DummyClass(ExcelWriter): + called_save = False + called_write_cells = False + called_sheets = False + _supported_extensions = ("xlsx", "xls") + _engine = "dummy" + + def book(self): + pass + + def _save(self): + type(self).called_save = True + + def _write_cells(self, *args, **kwargs): + type(self).called_write_cells = True + + @property + def sheets(self): + type(self).called_sheets = True + + @classmethod + def assert_called_and_reset(cls): + assert cls.called_save + assert cls.called_write_cells + assert not cls.called_sheets + cls.called_save = False + cls.called_write_cells = False + + register_writer(DummyClass) + + with option_context("io.excel.xlsx.writer", "dummy"): + path = "something.xlsx" + with tm.ensure_clean(path) as filepath: + with ExcelWriter(filepath) as writer: + assert isinstance(writer, DummyClass) + df = tm.makeCustomDataframe(1, 1) + df.to_excel(filepath) + DummyClass.assert_called_and_reset() + + with tm.ensure_clean("something.xls") as filepath: + df.to_excel(filepath, engine="dummy") + DummyClass.assert_called_and_reset() + + @pytest.mark.parametrize( + "ext", + [ + pytest.param(".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param(".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param(".ods", marks=td.skip_if_no("odf")), + ], + ) + def test_engine_kwargs_and_kwargs_raises(self, ext): + # GH 40430 + msg = re.escape("Cannot use both engine_kwargs and **kwargs") + with pytest.raises(ValueError, match=msg): + with ExcelWriter("", engine_kwargs={"a": 1}, b=2): + pass + + +@td.skip_if_no("xlrd") +@td.skip_if_no("openpyxl") +class TestFSPath: + def test_excelfile_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + with ExcelFile(path) as xl: + result = os.fspath(xl) + assert result == path + + def test_excelwriter_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + with ExcelWriter(path) as writer: + assert os.fspath(writer) == str(path) diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py new file mode 100644 index 00000000..86141f08 --- /dev/null +++ b/pandas/tests/io/excel/test_xlrd.py @@ -0,0 +1,96 @@ +import io + +import pytest + +from pandas.compat._optional import import_optional_dependency + +import pandas as pd +import pandas._testing as tm + +from pandas.io.excel import ExcelFile +from pandas.io.excel._base import inspect_excel_format + +xlrd = pytest.importorskip("xlrd") +xlwt = pytest.importorskip("xlwt") + +pytestmark = pytest.mark.filterwarnings( + "ignore:As the xlwt package is no longer maintained:FutureWarning" +) + + +exts = [".xls"] + + +@pytest.fixture(params=exts) +def read_ext_xlrd(request): + """ + Valid extensions for reading Excel files with xlrd. + + Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm + """ + return request.param + + +def test_read_xlrd_book(read_ext_xlrd, frame): + df = frame + + engine = "xlrd" + sheet_name = "SheetA" + + with tm.ensure_clean(read_ext_xlrd) as pth: + df.to_excel(pth, sheet_name) + with xlrd.open_workbook(pth) as book: + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) + tm.assert_frame_equal(df, result) + + result = pd.read_excel( + book, sheet_name=sheet_name, engine=engine, index_col=0 + ) + tm.assert_frame_equal(df, result) + + +def test_excel_file_warning_with_xlsx_file(datapath): + # GH 29375 + path = datapath("io", "data", "excel", "test1.xlsx") + has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None + if not has_openpyxl: + with tm.assert_produces_warning( + FutureWarning, + raise_on_extra_warnings=False, + match="The xlrd engine is no longer maintained", + ): + ExcelFile(path, engine=None) + else: + with tm.assert_produces_warning(None): + pd.read_excel(path, "Sheet1", engine=None) + + +def test_read_excel_warning_with_xlsx_file(datapath): + # GH 29375 + path = datapath("io", "data", "excel", "test1.xlsx") + has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None + if not has_openpyxl: + with pytest.raises( + ValueError, + match="Your version of xlrd is ", + ): + pd.read_excel(path, "Sheet1", engine=None) + else: + with tm.assert_produces_warning(None): + pd.read_excel(path, "Sheet1", engine=None) + + +@pytest.mark.parametrize( + "file_header", + [ + b"\x09\x00\x04\x00\x07\x00\x10\x00", + b"\x09\x02\x06\x00\x00\x00\x10\x00", + b"\x09\x04\x06\x00\x00\x00\x10\x00", + b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", + ], +) +def test_read_old_xls_files(file_header): + # GH 41226 + f = io.BytesIO(file_header) + assert inspect_excel_format(f) == "xls" diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py new file mode 100644 index 00000000..82d47a13 --- /dev/null +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -0,0 +1,94 @@ +import contextlib +import re +import warnings + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +xlsxwriter = pytest.importorskip("xlsxwriter") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_column_format(ext): + # Test that column formats are applied to cells. Test for issue #9167. + # Applicable to xlsxwriter only. + with warnings.catch_warnings(): + # Ignore the openpyxl lxml warning. + warnings.simplefilter("ignore") + openpyxl = pytest.importorskip("openpyxl") + + with tm.ensure_clean(ext) as path: + frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]}) + + with ExcelWriter(path) as writer: + frame.to_excel(writer) + + # Add a number format to col B and ensure it is applied to cells. + num_format = "#,##0" + write_workbook = writer.book + write_worksheet = write_workbook.worksheets()[0] + col_format = write_workbook.add_format({"num_format": num_format}) + write_worksheet.set_column("B:B", None, col_format) + + with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook: + try: + read_worksheet = read_workbook["Sheet1"] + except TypeError: + # compat + read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1") + + # Get the number format from the cell. + try: + cell = read_worksheet["B2"] + except TypeError: + # compat + cell = read_worksheet.cell("B2") + + try: + read_num_format = cell.number_format + except AttributeError: + read_num_format = cell.style.number_format._format_code + + assert read_num_format == num_format + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlsxwriter!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlsxwriter", mode="a") + + +@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) +def test_kwargs(ext, nan_inf_to_errors): + # GH 42286 + kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="xlsxwriter", **kwargs) as writer: + assert writer.book.nan_inf_to_errors == nan_inf_to_errors + + +@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) +def test_engine_kwargs(ext, nan_inf_to_errors): + # GH 42286 + engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer: + assert writer.book.nan_inf_to_errors == nan_inf_to_errors + + +def test_book_and_sheets_consistent(ext): + # GH#45687 - Ensure sheets is updated if user modifies book + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="xlsxwriter") as writer: + assert writer.sheets == {} + sheet = writer.book.add_worksheet("test_name") + assert writer.sheets == {"test_name": sheet} diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py new file mode 100644 index 00000000..3aa405eb --- /dev/null +++ b/pandas/tests/io/excel/test_xlwt.py @@ -0,0 +1,146 @@ +import re + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + options, +) +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelWriter, + _XlwtWriter, +) + +xlwt = pytest.importorskip("xlwt") + +pytestmark = pytest.mark.parametrize("ext,", [".xls"]) + + +def test_excel_raise_error_on_multiindex_columns_and_no_index(ext): + # MultiIndex as columns is not yet implemented 9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(10, 3), columns=cols) + + msg = ( + "Writing to Excel with MultiIndex columns and no index " + "\\('index'=False\\) is not yet implemented." + ) + with pytest.raises(NotImplementedError, match=msg): + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_excel_multiindex_columns_and_index_true(ext): + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(10, 3), columns=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=True) + + +def test_excel_multiindex_index(ext): + # MultiIndex as index works so assert no error #9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(3, 10), index=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_to_excel_styleconverter(ext): + hstyle = { + "font": {"bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + xls_style = _XlwtWriter._convert_to_style(hstyle) + assert xls_style.font.bold + assert xlwt.Borders.THIN == xls_style.borders.top + assert xlwt.Borders.THIN == xls_style.borders.right + assert xlwt.Borders.THIN == xls_style.borders.bottom + assert xlwt.Borders.THIN == xls_style.borders.left + assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz + assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlwt!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlwt", mode="a") + + +def test_to_excel_xlwt_warning(ext): + # GH 26552 + df = DataFrame(np.random.randn(3, 10)) + with tm.ensure_clean(ext) as path: + with tm.assert_produces_warning( + FutureWarning, + match="As the xlwt package is no longer maintained", + ): + df.to_excel(path) + + +def test_option_xls_writer_deprecated(ext): + # GH 26552 + with tm.assert_produces_warning( + FutureWarning, + match="As the xlwt package is no longer maintained", + check_stacklevel=False, + ): + options.io.excel.xls.writer = "xlwt" + + +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_kwargs(ext, style_compression): + # GH 42286 + kwargs = {"style_compression": style_compression} + with tm.ensure_clean(ext) as f: + msg = re.escape("Use of **kwargs is deprecated") + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="xlwt", **kwargs) as writer: + assert ( + writer.book._Workbook__styles.style_compression == style_compression + ) + # xlwt won't allow us to close without writing something + DataFrame().to_excel(writer) + + +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_engine_kwargs(ext, style_compression): + # GH 42286 + engine_kwargs = {"style_compression": style_compression} + with tm.ensure_clean(ext) as f: + with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer: + assert writer.book._Workbook__styles.style_compression == style_compression + # xlwt won't allow us to close without writing something + DataFrame().to_excel(writer) + + +def test_book_and_sheets_consistent(ext): + # GH#45687 - Ensure sheets is updated if user modifies book + with tm.ensure_clean(ext) as f: + with ExcelWriter(f) as writer: + assert writer.sheets == {} + sheet = writer.book.add_sheet("test_name") + assert writer.sheets == {"test_name": sheet} + + +@pytest.mark.parametrize("attr", ["fm_date", "fm_datetime"]) +def test_deprecated_attr(ext, attr): + # GH#45572 + with tm.ensure_clean(ext) as path: + with ExcelWriter(path, engine="xlwt") as writer: + msg = f"{attr} is not part of the public API" + with tm.assert_produces_warning(FutureWarning, match=msg): + getattr(writer, attr) diff --git a/pandas/tests/io/formats/__init__.py b/pandas/tests/io/formats/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/formats/data/html/datetime64_hourformatter.html b/pandas/tests/io/formats/data/html/datetime64_hourformatter.html new file mode 100644 index 00000000..c92b7218 --- /dev/null +++ b/pandas/tests/io/formats/data/html/datetime64_hourformatter.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    hod
    010:10
    112:12
    diff --git a/pandas/tests/io/formats/data/html/datetime64_monthformatter.html b/pandas/tests/io/formats/data/html/datetime64_monthformatter.html new file mode 100644 index 00000000..589c8fba --- /dev/null +++ b/pandas/tests/io/formats/data/html/datetime64_monthformatter.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    months
    02016-01
    12016-02
    diff --git a/pandas/tests/io/formats/data/html/escape_disabled.html b/pandas/tests/io/formats/data/html/escape_disabled.html new file mode 100644 index 00000000..260a04d2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/escape_disabled.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + +
    co + co>l2
    str + boldbold
    stri>ng2 &boldbold
    diff --git a/pandas/tests/io/formats/data/html/escaped.html b/pandas/tests/io/formats/data/html/escaped.html new file mode 100644 index 00000000..d68bdd3d --- /dev/null +++ b/pandas/tests/io/formats/data/html/escaped.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    co<l1co>l2
    str<ing1 &amp;<type 'str'><type 'str'>
    stri>ng2 &amp;<type 'str'><type 'str'>
    diff --git a/pandas/tests/io/formats/data/html/gh12031_expected_output.html b/pandas/tests/io/formats/data/html/gh12031_expected_output.html new file mode 100644 index 00000000..896e154a --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh12031_expected_output.html @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + +
    A
    06,0
    13,1
    22,2
    diff --git a/pandas/tests/io/formats/data/html/gh13828_expected_output.html b/pandas/tests/io/formats/data/html/gh13828_expected_output.html new file mode 100644 index 00000000..690d638c --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh13828_expected_output.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    GroupData
    0A1.22
    1A{na_rep}
    diff --git a/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html b/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html new file mode 100644 index 00000000..4cfd8785 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html @@ -0,0 +1,274 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    n
    abc
    1001010
    21
    32
    43
    54
    65
    76
    2017
    28
    39
    410
    511
    612
    713
    30114
    215
    316
    417
    518
    619
    720
    20010121
    222
    323
    424
    525
    626
    727
    20128
    229
    ......
    633
    734
    30135
    236
    337
    438
    539
    640
    741
    30010142
    243
    344
    445
    546
    647
    748
    20149
    250
    351
    452
    553
    654
    755
    30156
    257
    358
    459
    560
    661
    762
    diff --git a/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html b/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html new file mode 100644 index 00000000..d4e7fd9b --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html @@ -0,0 +1,258 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    n
    abc
    1001010
    21
    32
    43
    54
    65
    76
    2017
    28
    39
    410
    511
    612
    713
    30114
    215
    316
    417
    518
    619
    720
    20010121
    222
    323
    424
    525
    626
    727
    .........
    30135
    236
    337
    438
    539
    640
    741
    30010142
    243
    344
    445
    546
    647
    748
    20149
    250
    351
    452
    553
    654
    755
    30156
    257
    358
    459
    560
    661
    762
    diff --git a/pandas/tests/io/formats/data/html/gh14998_expected_output.html b/pandas/tests/io/formats/data/html/gh14998_expected_output.html new file mode 100644 index 00000000..62b96493 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14998_expected_output.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
    A
    1
    diff --git a/pandas/tests/io/formats/data/html/gh15019_expected_output.html b/pandas/tests/io/formats/data/html/gh15019_expected_output.html new file mode 100644 index 00000000..5fb9d960 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh15019_expected_output.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    1.7640520.400157
    0.9787382.240893
    ......
    0.950088-0.151357
    -0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh21625_expected_output.html b/pandas/tests/io/formats/data/html/gh21625_expected_output.html new file mode 100644 index 00000000..a87e4ca3 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh21625_expected_output.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    x
    00.200
    \ No newline at end of file diff --git a/pandas/tests/io/formats/data/html/gh22270_expected_output.html b/pandas/tests/io/formats/data/html/gh22270_expected_output.html new file mode 100644 index 00000000..6694c43d --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22270_expected_output.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    x
    0100
    \ No newline at end of file diff --git a/pandas/tests/io/formats/data/html/gh22579_expected_output.html b/pandas/tests/io/formats/data/html/gh22579_expected_output.html new file mode 100644 index 00000000..425b0f91 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22579_expected_output.html @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    cdcd
    0101010
    1111111
    2121212
    3131313
    4141414
    5151515
    6161616
    7171717
    8181818
    9191919
    diff --git a/pandas/tests/io/formats/data/html/gh22783_expected_output.html b/pandas/tests/io/formats/data/html/gh22783_expected_output.html new file mode 100644 index 00000000..107db43c --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22783_expected_output.html @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...34
    1.7640520.400157...2.2408931.867558
    -0.9772780.950088...-0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html b/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html new file mode 100644 index 00000000..55ab2909 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...34
    1.7640520.400157...2.2408931.867558
    -0.9772780.950088...-0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh40024_expected_output.html b/pandas/tests/io/formats/data/html/gh40024_expected_output.html new file mode 100644 index 00000000..0877c295 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh40024_expected_output.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    x
    01,000
    1test
    diff --git a/pandas/tests/io/formats/data/html/gh6131_expected_output.html b/pandas/tests/io/formats/data/html/gh6131_expected_output.html new file mode 100644 index 00000000..cb3a3363 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh6131_expected_output.html @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    d
    aaa...ac
    bc
    baca1.0...NaN
    bbcbNaN...NaN
    bcccNaN...3.0
    diff --git a/pandas/tests/io/formats/data/html/gh8452_expected_output.html b/pandas/tests/io/formats/data/html/gh8452_expected_output.html new file mode 100644 index 00000000..81ce397a --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh8452_expected_output.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    cdcd
    0353
    1464
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html new file mode 100644 index 00000000..4eb3f531 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html @@ -0,0 +1,70 @@ +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    ......
    5656
    5757
    5858
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html new file mode 100644 index 00000000..2b1d97ae --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html @@ -0,0 +1,46 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    ......
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html new file mode 100644 index 00000000..a539e5a4 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html @@ -0,0 +1,78 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    ......
    5555
    5656
    5757
    5858
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html new file mode 100644 index 00000000..3e680a50 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html @@ -0,0 +1,269 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    66
    77
    88
    99
    1010
    1111
    1212
    1313
    1414
    1515
    1616
    1717
    1818
    1919
    2020
    2121
    2222
    2323
    2424
    2525
    2626
    2727
    2828
    2929
    3030
    3131
    3232
    3333
    3434
    3535
    3636
    3737
    3838
    3939
    4040
    4141
    4242
    4343
    4444
    4545
    4646
    4747
    4848
    4949
    5050
    5151
    5252
    5353
    5454
    5555
    5656
    5757
    5858
    5959
    6060
    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html new file mode 100644 index 00000000..10f6247e --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html @@ -0,0 +1,105 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    66
    77
    88
    99
    1010
    1111
    1212
    1313
    1414
    1515
    1616
    1717
    1818
    1919
    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html new file mode 100644 index 00000000..4eb3f531 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html @@ -0,0 +1,70 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    ......
    5656
    5757
    5858
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/index_1.html b/pandas/tests/io/formats/data/html/index_1.html new file mode 100644 index 00000000..41221865 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_1.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    foo11.2one
    bar23.4two
    baz35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_2.html b/pandas/tests/io/formats/data/html/index_2.html new file mode 100644 index 00000000..a86ba80a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_2.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    11.2one
    23.4two
    35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_3.html b/pandas/tests/io/formats/data/html/index_3.html new file mode 100644 index 00000000..02edba49 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_3.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    idx
    foo11.2one
    bar23.4two
    baz35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_4.html b/pandas/tests/io/formats/data/html/index_4.html new file mode 100644 index 00000000..0d1bf9ff --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_4.html @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    foocar11.2one
    bike23.4two
    barcar35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_5.html b/pandas/tests/io/formats/data/html/index_5.html new file mode 100644 index 00000000..c5ac12ec --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_5.html @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    idx1idx2
    foocar11.2one
    bike23.4two
    barcar35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_formatter.html b/pandas/tests/io/formats/data/html/index_formatter.html new file mode 100644 index 00000000..7a2f8a9f --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_formatter.html @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooNone
    a01
    b23
    c45
    d67
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html new file mode 100644 index 00000000..817b54d7 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html new file mode 100644 index 00000000..e85965f1 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html new file mode 100644 index 00000000..8c41d2e2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..7af63e89 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..2f783786 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html new file mode 100644 index 00000000..ca9b8bd8 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html new file mode 100644 index 00000000..6478c99a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html new file mode 100644 index 00000000..432d8e06 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..d7660872 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..4810f660 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html new file mode 100644 index 00000000..e111f55b --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html new file mode 100644 index 00000000..d3a9ba01 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_none.html b/pandas/tests/io/formats/data/html/index_none_columns_none.html new file mode 100644 index 00000000..44899858 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_none.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html new file mode 100644 index 00000000..b21a6183 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html new file mode 100644 index 00000000..1249fa56 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    01
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html new file mode 100644 index 00000000..95c38c9c --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html new file mode 100644 index 00000000..9583a21f --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html new file mode 100644 index 00000000..81da7c36 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + +
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..f6202590 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..2ca18c28 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    01
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html new file mode 100644 index 00000000..ed3360f8 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html new file mode 100644 index 00000000..54da0385 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html new file mode 100644 index 00000000..3d958afe --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..b57fafbe --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..235ca61a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    01
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/justify.html b/pandas/tests/io/formats/data/html/justify.html new file mode 100644 index 00000000..33e4b571 --- /dev/null +++ b/pandas/tests/io/formats/data/html/justify.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    061223442
    13000020
    22700001
    diff --git a/pandas/tests/io/formats/data/html/multiindex_1.html b/pandas/tests/io/formats/data/html/multiindex_1.html new file mode 100644 index 00000000..88db1775 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_1.html @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CL001
    CL10101
    0abcd
    1efgh
    diff --git a/pandas/tests/io/formats/data/html/multiindex_2.html b/pandas/tests/io/formats/data/html/multiindex_2.html new file mode 100644 index 00000000..289ea220 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_2.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    0123
    0101
    0abcd
    1efgh
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html new file mode 100644 index 00000000..5b5bcf9c --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    foo
    0001
    123
    1045
    167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html new file mode 100644 index 00000000..fd4c6bd2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foo01
    00
    foo
    0001
    123
    1045
    167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html new file mode 100644 index 00000000..42a5ea5e --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    foo
    0001
    0123
    1045
    1167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html new file mode 100644 index 00000000..2be61392 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foo01
    00
    foo
    0001
    0123
    1045
    1167
    diff --git a/pandas/tests/io/formats/data/html/render_links_false.html b/pandas/tests/io/formats/data/html/render_links_false.html new file mode 100644 index 00000000..6feb403d --- /dev/null +++ b/pandas/tests/io/formats/data/html/render_links_false.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
    foobarNone
    00https://pandas.pydata.org/?q1=a&q2=bpydata.org
    10www.pydata.orgpydata.org
    diff --git a/pandas/tests/io/formats/data/html/render_links_true.html b/pandas/tests/io/formats/data/html/render_links_true.html new file mode 100644 index 00000000..3eb53f31 --- /dev/null +++ b/pandas/tests/io/formats/data/html/render_links_true.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
    foobarNone
    00https://pandas.pydata.org/?q1=a&q2=bpydata.org
    10www.pydata.orgpydata.org
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html new file mode 100644 index 00000000..e66d3c81 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html new file mode 100644 index 00000000..536b3711 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html new file mode 100644 index 00000000..0f262495 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..d472cdec --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..31c71ca3 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html new file mode 100644 index 00000000..779e84f6 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html new file mode 100644 index 00000000..b86454f5 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html new file mode 100644 index 00000000..d294a507 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..24b776e1 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..a0ca9602 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html new file mode 100644 index 00000000..6640db4c --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    01...67
    89...1415
    ..................
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html new file mode 100644 index 00000000..364a0b98 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    01...67
    89...1415
    ..................
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html new file mode 100644 index 00000000..e2af1ba4 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html new file mode 100644 index 00000000..8c9a9e24 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html new file mode 100644 index 00000000..b9dcf526 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html new file mode 100644 index 00000000..0590d0de --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html new file mode 100644 index 00000000..28a2d964 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html new file mode 100644 index 00000000..387ac51b --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..30cd8590 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..81edece2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html new file mode 100644 index 00000000..2acacfed --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html new file mode 100644 index 00000000..c9bacdbd --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html new file mode 100644 index 00000000..f2696f7d --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..37e73152 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..3241ff41 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/truncate.html b/pandas/tests/io/formats/data/html/truncate.html new file mode 100644 index 00000000..a5eb8c5c --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate.html @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...1819
    2001-01-01NaNNaN...NaNNaN
    2001-01-02NaNNaN...NaNNaN
    2001-01-03NaNNaN...NaNNaN
    2001-01-04NaNNaN...NaNNaN
    ..................
    2001-01-17NaNNaN...NaNNaN
    2001-01-18NaNNaN...NaNNaN
    2001-01-19NaNNaN...NaNNaN
    2001-01-20NaNNaN...NaNNaN
    diff --git a/pandas/tests/io/formats/data/html/truncate_formatter.html b/pandas/tests/io/formats/data/html/truncate_formatter.html new file mode 100644 index 00000000..7615ef89 --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_formatter.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    A...D
    01_mod...4
    15_mod...8
    29_mod...12
    313_mod...16
    diff --git a/pandas/tests/io/formats/data/html/truncate_multi_index.html b/pandas/tests/io/formats/data/html/truncate_multi_index.html new file mode 100644 index 00000000..8a295d66 --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_multi_index.html @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    barbaz...fooqux
    onetwoone...twoonetwo
    baroneNaNNaNNaN...NaNNaNNaN
    twoNaNNaNNaN...NaNNaNNaN
    bazoneNaNNaNNaN...NaNNaNNaN
    ...........................
    footwoNaNNaNNaN...NaNNaNNaN
    quxoneNaNNaNNaN...NaNNaNNaN
    twoNaNNaNNaN...NaNNaNNaN
    diff --git a/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html b/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html new file mode 100644 index 00000000..6a7e1b5a --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    barbarbaz...fooquxqux
    onetwoone...twoonetwo
    baroneNaNNaNNaN...NaNNaNNaN
    bartwoNaNNaNNaN...NaNNaNNaN
    bazoneNaNNaNNaN...NaNNaNNaN
    ...........................
    footwoNaNNaNNaN...NaNNaNNaN
    quxoneNaNNaNNaN...NaNNaNNaN
    quxtwoNaNNaNNaN...NaNNaNNaN
    diff --git a/pandas/tests/io/formats/data/html/unicode_1.html b/pandas/tests/io/formats/data/html/unicode_1.html new file mode 100644 index 00000000..72b81018 --- /dev/null +++ b/pandas/tests/io/formats/data/html/unicode_1.html @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    σ
    00.0
    11.0
    22.0
    33.0
    44.0
    55.0
    66.0
    77.0
    88.0
    99.0
    diff --git a/pandas/tests/io/formats/data/html/unicode_2.html b/pandas/tests/io/formats/data/html/unicode_2.html new file mode 100644 index 00000000..79c08809 --- /dev/null +++ b/pandas/tests/io/formats/data/html/unicode_2.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    A
    0σ
    diff --git a/pandas/tests/io/formats/data/html/various_dtypes_formatted.html b/pandas/tests/io/formats/data/html/various_dtypes_formatted.html new file mode 100644 index 00000000..7d2ede33 --- /dev/null +++ b/pandas/tests/io/formats/data/html/various_dtypes_formatted.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ifIsbco
    0formattedformattedformattedformattedformattedformattedformatted
    1formattedformattedformattedformattedformattedformattedformatted
    diff --git a/pandas/tests/io/formats/data/html/with_classes.html b/pandas/tests/io/formats/data/html/with_classes.html new file mode 100644 index 00000000..8cee3f0c --- /dev/null +++ b/pandas/tests/io/formats/data/html/with_classes.html @@ -0,0 +1,9 @@ + + + + + + + + +
    diff --git a/pandas/tests/io/formats/style/__init__.py b/pandas/tests/io/formats/style/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py new file mode 100644 index 00000000..19884aaa --- /dev/null +++ b/pandas/tests/io/formats/style/test_bar.py @@ -0,0 +1,307 @@ +import numpy as np +import pytest + +from pandas import DataFrame + +pytest.importorskip("jinja2") + + +def bar_grad(a=None, b=None, c=None, d=None): + """Used in multiple tests to simplify formatting of expected result""" + ret = [("width", "10em")] + if all(x is None for x in [a, b, c, d]): + return ret + return ret + [ + ( + "background", + f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})", + ) + ] + + +def no_bar(): + return bar_grad() + + +def bar_to(x, color="#d65f5f"): + return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%") + + +def bar_from_to(x, y, color="#d65f5f"): + return bar_grad( + f" transparent {x:.1f}%", + f" {color} {x:.1f}%", + f" {color} {y:.1f}%", + f" transparent {y:.1f}%", + ) + + +@pytest.fixture +def df_pos(): + return DataFrame([[1], [2], [3]]) + + +@pytest.fixture +def df_neg(): + return DataFrame([[-1], [-2], [-3]]) + + +@pytest.fixture +def df_mix(): + return DataFrame([[-3], [1], [2]]) + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(50), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(50, 100), no_bar()]), + ("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]), + ("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]), + ("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + ], +) +def test_align_positive_cases(df_pos, align, exp): + # test different align cases for all positive values + result = df_pos.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [bar_to(100), bar_to(50), no_bar()]), + ("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]), + ("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]), + ("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]), + ("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + ], +) +def test_align_negative_cases(df_neg, align, exp): + # test different align cases for all negative values + result = df_neg.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(80), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(80, 100), no_bar()]), + ("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]), + ("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + ("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]), + ], +) +@pytest.mark.parametrize("nans", [True, False]) +def test_align_mixed_cases(df_mix, align, exp, nans): + # test different align cases for mixed positive and negative values + # also test no impact of NaNs and no_bar + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + if nans: + df_mix.loc[3, :] = np.nan + expected.update({(3, 0): no_bar()}) + result = df_mix.style.bar(align=align)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ( + "left", + { + "index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]], + "columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]], + "none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]], + }, + ), + ( + "mid", + { + "index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]], + "columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]], + "none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]], + }, + ), + ( + "zero", + { + "index": [ + [bar_from_to(50, 66.66), bar_from_to(50, 75)], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_from_to(50, 75), bar_from_to(50, 100)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(50, 62.5), bar_from_to(50, 75)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + }, + ), + ( + 2, + { + "index": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(25, 50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + }, + ), + ], +) +@pytest.mark.parametrize("axis", ["index", "columns", "none"]) +def test_align_axis(align, exp, axis): + # test all axis combinations with positive values and different aligns + data = DataFrame([[1, 2], [3, 4]]) + result = ( + data.style.bar(align=align, axis=None if axis == "none" else axis) + ._compute() + .ctx + ) + expected = { + (0, 0): exp[axis][0][0], + (0, 1): exp[axis][0][1], + (1, 0): exp[axis][1][0], + (1, 1): exp[axis][1][1], + } + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 1.5, 2.5), + ("negative", -2.5, -1.5), + ("mixed", -2.5, 1.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that clipping occurs if any vmin > data_values or vmax < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + clip_df = df.where(df <= (vmax if vmax else 999), other=vmax) + clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin) + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 0.5, 4.5), + ("negative", -4.5, -0.5), + ("mixed", -4.5, 4.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that widening occurs if any vmax > data_values or vmin < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + expand_df = df.copy() + expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result.items() <= expected.items() + + +def test_numerics(): + # test data is pre-selected for numeric values + data = DataFrame([[1, "a"], [2, "b"]]) + result = data.style.bar()._compute().ctx + assert (0, 1) not in result + assert (1, 1) not in result + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(100, "green")]), + ("right", [bar_to(100, "red"), no_bar()]), + ("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]), + ("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]), + ], +) +def test_colors_mixed(align, exp): + data = DataFrame([[-1], [3]]) + result = data.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == {(0, 0): exp[0], (1, 0): exp[1]} + + +def test_bar_align_height(): + # test when keyword height is used 'no-repeat center' and 'background-size' present + data = DataFrame([[1], [2]]) + result = data.style.bar(align="left", height=50)._compute().ctx + bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center" + expected = { + (0, 0): [("width", "10em")], + (1, 0): [ + ("width", "10em"), + ("background", bg_s), + ("background-size", "100% 50.0%"), + ], + } + assert result == expected + + +def test_bar_value_error_raises(): + df = DataFrame({"A": [-100, -60, -30, -20]}) + + msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or" + with pytest.raises(ValueError, match=msg): + df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html() + + msg = r"`width` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(width=200).to_html() + + msg = r"`height` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(height=200).to_html() diff --git a/pandas/tests/io/formats/style/test_deprecated.py b/pandas/tests/io/formats/style/test_deprecated.py new file mode 100644 index 00000000..863c31ed --- /dev/null +++ b/pandas/tests/io/formats/style/test_deprecated.py @@ -0,0 +1,170 @@ +""" +modules collects tests for Styler methods which have been deprecated +""" +import numpy as np +import pytest + +jinja2 = pytest.importorskip("jinja2") + +from pandas import ( + DataFrame, + IndexSlice, + NaT, + Timestamp, +) +import pandas._testing as tm + + +@pytest.fixture +def df(): + return DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + + +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_hide_index_columns(df, axis): + with tm.assert_produces_warning(FutureWarning): + getattr(df.style, "hide_" + axis)() + + +def test_set_non_numeric_na(): + # GH 21527 28358 + df = DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, NaT, Timestamp("20120101")], + } + ) + + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + assert ctx["body"][1][1]["display_value"] == "NA" + assert ctx["body"][1][2]["display_value"] == "NA" + + +def test_where_with_one_style(df): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + + with tm.assert_produces_warning(FutureWarning): + result = df.style.where(f, style1)._compute().ctx + expected = { + (r, c): [("foo", "bar")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if f(df.loc[row, col]) + } + assert result == expected + + +@pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], +) +def test_where_subset(df, slice_): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + with tm.assert_produces_warning(FutureWarning): + res = df.style.where(f, style1, style2, subset=slice_)._compute().ctx + expected = { + (r, c): [("foo", "bar") if f(df.loc[row, col]) else ("baz", "foo")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert res == expected + + +def test_where_subset_compare_with_applymap(df): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + def g(x): + return style1 if f(x) else style2 + + slices = [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ] + + for slice_ in slices: + with tm.assert_produces_warning(FutureWarning): + result = df.style.where(f, style1, style2, subset=slice_)._compute().ctx + expected = df.style.applymap(g, subset=slice_)._compute().ctx + assert result == expected + + +def test_where_kwargs(): + df = DataFrame([[1, 2], [3, 4]]) + + def f(x, val): + return x > val + + with tm.assert_produces_warning(FutureWarning): + res = df.style.where(f, "color:green;", "color:red;", val=2)._compute().ctx + expected = { + (0, 0): [("color", "red")], + (0, 1): [("color", "red")], + (1, 0): [("color", "green")], + (1, 1): [("color", "green")], + } + assert res == expected + + +def test_set_na_rep(): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + with tm.assert_produces_warning(FutureWarning): + ctx = ( + df.style.set_na_rep("NA") + .format(None, na_rep="-", subset=["B"]) + ._translate(True, True) + ) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "-" + + +def test_precision(df): + styler = df.style + with tm.assert_produces_warning(FutureWarning): + s2 = styler.set_precision(1) + assert styler is s2 + assert styler.precision == 1 + + +def test_render(df): + with tm.assert_produces_warning(FutureWarning): + df.style.render() + + +def test_null_color(df): + with tm.assert_produces_warning(FutureWarning): + df.style.highlight_null(null_color="blue") diff --git a/pandas/tests/io/formats/style/test_exceptions.py b/pandas/tests/io/formats/style/test_exceptions.py new file mode 100644 index 00000000..d52e3a37 --- /dev/null +++ b/pandas/tests/io/formats/style/test_exceptions.py @@ -0,0 +1,44 @@ +import pytest + +jinja2 = pytest.importorskip("jinja2") + +from pandas import ( + DataFrame, + MultiIndex, +) + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + data=[[0, -0.609], [1, -1.228]], + columns=["A", "B"], + index=["x", "y"], + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_concat_bad_columns(styler): + msg = "`other.data` must have same columns as `Styler.data" + with pytest.raises(ValueError, match=msg): + styler.concat(DataFrame([[1, 2]]).style) + + +def test_concat_bad_type(styler): + msg = "`other` must be of type `Styler`" + with pytest.raises(TypeError, match=msg): + styler.concat(DataFrame([[1, 2]])) + + +def test_concat_bad_index_levels(styler, df): + df = df.copy() + df.index = MultiIndex.from_tuples([(0, 0), (1, 1)]) + msg = "number of index levels must be same in `other`" + with pytest.raises(ValueError, match=msg): + styler.concat(df.style) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py new file mode 100644 index 00000000..0b114ea1 --- /dev/null +++ b/pandas/tests/io/formats/style/test_format.py @@ -0,0 +1,501 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, + MultiIndex, + NaT, + Timestamp, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import _str_escape + + +@pytest.fixture +def df(): + return DataFrame( + data=[[0, -0.609], [1, -1.228]], + columns=["A", "B"], + index=["x", "y"], + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +@pytest.fixture +def df_multi(): + return DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + ) + + +@pytest.fixture +def styler_multi(df_multi): + return Styler(df_multi, uuid_len=0) + + +def test_display_format(styler): + ctx = styler.format("{:0.1f}")._translate(True, True) + assert all(["display_value" in c for c in row] for row in ctx["body"]) + assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"]) + assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_display_format_index(styler, index, columns): + exp_index = ["x", "y"] + if index: + styler.format_index(lambda v: v.upper(), axis=0) # test callable + exp_index = ["X", "Y"] + + exp_columns = ["A", "B"] + if columns: + styler.format_index("*{}*", axis=1) # test string + exp_columns = ["*A*", "*B*"] + + ctx = styler._translate(True, True) + + for r, row in enumerate(ctx["body"]): + assert row[0]["display_value"] == exp_index[r] + + for c, col in enumerate(ctx["head"][1:]): + assert col["display_value"] == exp_columns[c] + + +def test_format_dict(styler): + ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.0" + assert ctx["body"][0][2]["display_value"] == "-60.90%" + + +def test_format_index_dict(styler): + ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True) + for i, val in enumerate(["X", "Y"]): + assert ctx["body"][i][0]["display_value"] == val + + +def test_format_string(styler): + ctx = styler.format("{:.2f}")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.00" + assert ctx["body"][0][2]["display_value"] == "-0.61" + assert ctx["body"][1][1]["display_value"] == "1.00" + assert ctx["body"][1][2]["display_value"] == "-1.23" + + +def test_format_callable(styler): + ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "pos" + assert ctx["body"][0][2]["display_value"] == "neg" + assert ctx["body"][1][1]["display_value"] == "pos" + assert ctx["body"][1][2]["display_value"] == "neg" + + +def test_format_with_na_rep(): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + + ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "110.00%" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + +def test_format_index_with_na_rep(): + df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA]) + ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "A" + for i in [2, 3, 4, 5]: + assert ctx["head"][0][i]["display_value"] == "--" + + +def test_format_non_numeric_na(): + # GH 21527 28358 + df = DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, NaT, Timestamp("20120101")], + } + ) + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "-" + + +@pytest.mark.parametrize( + "func, attr, kwargs", + [ + ("format", "_display_funcs", {}), + ("format_index", "_display_funcs_index", {"axis": 0}), + ("format_index", "_display_funcs_columns", {"axis": 1}), + ], +) +def test_format_clear(styler, func, attr, kwargs): + assert (0, 0) not in getattr(styler, attr) # using default + getattr(styler, func)("{:.2f}", **kwargs) + assert (0, 0) in getattr(styler, attr) # formatter is specified + getattr(styler, func)(**kwargs) + assert (0, 0) not in getattr(styler, attr) # formatter cleared to default + + +@pytest.mark.parametrize( + "escape, exp", + [ + ("html", "<>&"%$#_{}~^\\~ ^ \\ "), + ( + "latex", + '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum ' + "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space " + "\\textbackslash \\space ", + ), + ], +) +def test_format_escape_html(escape, exp): + chars = '<>&"%$#_{}~^\\~ ^ \\ ' + df = DataFrame([[chars]]) + + s = Styler(df, uuid_len=0).format("&{0}&", escape=None) + expected = f'

    RQAULP@$d=rC_I?SMI{JF+P8gibSApBmH<;*E+rE$}iMpi`?T(wqi&$Ocv0_vbafVmx2YjqQcm0Gas4Zm?F?dvF+-bcKpF<; z?Sj{Ke*}dce#d<%Lv+4b&7IFS7cCgORk=WT3YN~(2vaYKZ0N@`(G!2ylSqh7=S>K6 zAAjG_aO0uKXr=ZOm}1tt#IJ{6p$~5ray^o*mg5-iPs75>IDSE~_0HUsS*lvhW^CQf z{OqJPc_9a-@sMDP{zVt1@5pccS?lGTw~5OQuxfA&@X8mC^rW6*QSU!!bqC)lMfgl+ zK|anks@-O9=m$PvJ_O+>w_O_SN{*$lPqd@!>XyoXe1sCfzisV|4=z80`__9pl@_Ca z3MrF*kHXeLP}$Hg5UJwE=!`n6>~WR56u~B5ohu2^j0m*Og7wytFqZ*cRhZ*W{e<|Z ztl5#%)gcMUn0R0R?1TR0*`3Uct&QpbdjHEMoM}kf;;#VdlaiBE*TMC zIVh~_;^#)N33oWLoNK$`k;vdA6;!}g3Auk-2o%S%$DJ;6zvWZxZB5Fjuw5UDB`eO2 ziGsDJ;rZF|(@lARgb3*IZN{NZp{<7T zz!peKdRL=TrA`_DmKg{)B&Rf~aJX((hmuW%3zpE6W1>yHESZX~v)k8P;y@#Rrex7U zokkI~HYJJjO~SmTw)1EUet4%`@kzq4V_D1P3c~1mO!4UZ5SNFapCvM1l78l}G#%X%nrAN$9thq@Z3eAWU*v^7h{9&4fSLk9 zE<}-To8P=tpYco>!=g`}5>}ZtBfVz5nxVU6l9zsgwZf znRVuK;c&{w{rYSHgW&UJLJ`qRd&>rSBNsZ(EHop>JIl{zkm1)}0rf%tQGe=pGq+%M*ec+Q;|7>77jUz~ zUCo$i_OF-(uZuQ2^Y+%3vHa4(v6+qD%w*B2o^Zu5q9mj+$F6T-NQldV5_oMkb8>bg zuQ1#NCH|#LBzP;%Am#$~;;2_+)8vHBD>V}jpz~Yf#rF^#E^)C)sqR6SNC&Cc-|#6i zp443Idcz?$d#N6v-5JpIk(c6OxbTn%<;z72?q<$lrkl3zJTc4Cmd;nLQk%tckK$qu zzZ|rOt!;@;V3Mb)nxqd3pW(TK5rxm-tMWX3b7dCV3?#7oKDUlYVZY|>L)0uwELtG$ zcfVQB{!v)e5Q!tVLl~-$yjc{g)pv-PKhQLKf?QxT zD!0rEMOSZqz^3BlHBsy+%88jdvq)3vJCP^fS02Hv^X;@==6>^l0h&q9EoX8kecRjQ zS?w?Q=>D8&9z0(0(fwPKptk)h%;tnm!d(S*4-lv zf`d)c{Nh*kL^y~A7dmRx>IvzQ0$Sgu?>h`mTA~-HK&0gjHZfr%?pkc!E z;ab6dl*7cZAbgy}xqdp{#BOFF*`|Hfz?gbLs;;FhDax$K&`t@byh56*_Ct-;jlGe~ zi;?(g5`vn(uUdT!!I8zR%5&vnQIxppsE~d^m?^}zcTH5|(wrbEDnX=%sW5D@4Azjii)%m9yvzG~S=$YDX1_#Fwu_+2XazeQM9jNoh zpfxc)@86a9v++Zi1XUnd<3a(05(TJ-evGk|hdl)hoovoqbD>X>3K!8s;7%wBo^ZIso-oqAT5 z{1MP0Rw#D|tI0pn_L!Dsp@pzfTQk+NuoBK%cx`ast7#1ZiDpMvN=PO^C4kA2zKGGj zw*4xniJXzX`T)bPq+1sSISW3UJVeJL^NHtY>k#td60SGrae5%J9j*3 zerE4@KzF#ev)VTG9GR*4yT-X@?miVe{=q8z3C^FN-F@1rIKhsK=6NaucMQ@m9gWW8u^); zuI=hKBwvE+OTHuRI3vw)<6){Dz&v;TK($df{*8cSqI4n}ceu$}yQq3a4?03(w(pTDoDZrFsQu@rK~iEq5mkB+VuFB$Mq~ zm!kMX^$s+l$Wyw#L59!HbuNxQOKLOV#!BaK^pMwHclPSgKQQ2OYyyENXTF6QAzG8Z z4OJ2{-w1svZb56HSOC9z5K^Db;0ng|tO>mZCExn^F2jHH+(G=_bd!_;kwxOF^wTaz zaW}jKF`ha_qTM6RpXt0o$IZ z%yLK)wQ4(stgr;Z>J>;6$MUWL;8H#IKsnGS zv8F@YLl0v;j;xv-ElwE%e%@>IY_VDa-AcE-CYMvNBhd}xu6Gh_3Q8&=I2Jr8Vv)Gs zXf=r}G7%;*&Z%YAmUQt-3#w3$vrwG>T0tq*oXyPC$y6ZKKa9y`9_FinrAfPdUQKQB zGTYkJEH)fOueyTYavqnYWsxe{Ys7p=&>w=#I5;K z|2Mr#8R-*m+xVqmJz(c*u$1afUIYm1vAU7~)RLU8%&04m2^Wt9%f6Mxe)ff2?}4M* zodS4{>T+w;%AkTxy)pzyj!4wwqdCbLtw%q~s3Ds%(HB;tlQjq4-Y4Te3?Y*PAj^!a z`LcJ}`wRAE${@ZQX4+ejAl_xy4P-Lg4PV>GK7t=mFG21oAR80E8Fyg?4qiRp%WZsL zB7M<(Hu2Wy(JqT;6CwvmoMQ)_QO5J{9I|hTv}GbQ>A&0SX9jCoQ7G20klRnm^2MsJ zMDh`|JJjdK=6&Glngp_kd%4bL!>#q$Wwl&;VYa5cTcvA*EBaz;bEocEPNDmKS>1)h zTxq-ujKs-fY1mJr^iRJz5Z4At94n=K-g97dskL$`{fsP;sR1S0l(~IG71xrDqR>=ksyyccAH4cwr? zmO9Y?;!+)2)joCIL0H`+h3&7JH@wR9Y`lQ zi!j(jhsOux`Q8i5+!WpxP|JSx1X}FO%9)fM3?2(i3rtJ)!QWOr^JmN7YRd=P za+TtkAjHwkw}J%k8Df}KUwN+3cJbG+pRA!l({jb?&(U?JgUZNA{1}Q=Lx-&imM3|~ z?DOegu8D>&jpaF57XRVSF!CIMGhI<;VSS?uSx+z1a7nZ0ts3~9TS5KfEZUoZ%O<~T zQtW0bNSq^9k0(!zV_5k7#?OHN)b%0D(i;J+$a;M2_5rX;@$ba;D}?zw4dC{X0ADgh z{`VuW1N`fqj2#q>ot*x}0R8{(Z$>%;bDOhdPMJPFq>wK7kdT1%Xi%g%Fc=}j#8?m# z8d8j~9pPvpd>9DDmmJ*S0N!08K2)NdYCTiZsYy#$uHE+~O6_0lU-d^)GoZnB>I7O5 zQfZT{Wd`%_bZ%qz@=mUTCqMKwI6|2fP2PStkSt8e67haH(XQ6cqb)y2sUfB(-_SUG zXiM|5ZIIB%fGq!A4}Bbji>>HCk%FgzLgmMBn(4vCxTAx~h4M`{Xo#7}iSC$x?RH#X zrO*VchNaZ2L%pM*<{2a2(nxlxgg4QLZfXG=+FKaH!UWvGxkD~yierYtx<8)z!fdSe z*wCsYE9Bcp4NUt*u43!7I_<(~Jo9_zfXl^u%I4iHl77(>-)9|&wg+6Z$#A7jSmQbf zG~4VwM?-u&;IaPu12>y6MV13x(F2r;kO3RzMz)4>4z_lV^oF($#(&fV;JaP^XOSEb ziC*#AmVtlPJ0V{Y<2|EhR2p;sS3Bq84RspE@#(T%T&5xS$)zKAjoT1PNMRaDqQe>? zBgrvIE-9LFFGfVRcBy0(M`KpaQZ_s8MvmMzsc|hEnF))8q1|88ss-1VV z&#iqgI1-s^yHy~l|9I5?Lb--QTP8i9 z6mTqlA0@OtUbjm&8TRJJi>ik$lYFNtv(&EN&iz$s!kSbxaE>BPq&ADTur@q z!zbB&$#u5k)%hv6&?lyb+|eBkUA4-kM;c3eY0ouM1UPoqE<2u8*`cUs=YD$Cl2Ru* z>!7)=ZN9x=>Cv=`^$s~=w!`M@f+3n}R$OnVdH&GigKcxB0|I5$v8*O`eaf64-+^Fz z6z?>ue;w0?s(90Ilfy;~LyH()*9GXCd1zCMSssRdp`qWS5DLF9uiWe!3fE8~X=2qZ zI%$HIhtp27i8P{P&-DC?BK%&E&k5JxowNEP;kDJ$v9l3;Tys*N_EZydf3|ic^`2WO zYNW7(V4hd+hx+jvX)Hu~CxhJ^BeCi-X@sOGN$`l@Fwym4qNh_)9IY{TUZ1tUR=VS9 z0Vh>jcU;S@+f65}#JsaJME#(QYjwPoKn!nRaWY1}M#^?1W1y$skiThU?V?XtZ%R^| zH8Yr4ts@{auEeS5882xjn)J&k#OxGv(N~gF>|ogN^Xe^Wo_NyztAk>4`@F*5#ymlrTc5Xiz0!c{%B9r#)>+}r38x30!)OW5IF);_dc2N8>oo4XG|DQ~ zv0Z~3TBO_y5LNn3P!As>BwvOFZvJ@zqhDeeECZ zBFOfwk9r>V84w2|L`KmprcDg>R(>;oRhSe8%%p+p6eY z;s-rTgUYQ#LviScq)!6&MXfprQFfgWgaxzGcgH#HsKTA>XIW*9n6{u?-8}-ql+e zJQ~iArG{~ArH#HXOGsWq2Vl(GSibTF0qXc}>F=w;)znyeL(Kxe#In~~ z#n-$NJd)`qhm2I6&(`1Joxx%295jAw%Fd9P?uagEpG=M)c>HYmpH!J7pLe}2=Tzb- z?u*rd?K%Ug(&zAmKKyzDrtgjEB}bO+yfM{gi+Td(SXGN^#Z8RFbK^uC6WKd#rLz>8 zg*B~eU3)VVSNF?w`Ce{%Br)@rW4&}knoGS0cC$*e{!_xsschM~Zi7jti~Ar~wZs4# zkk9}AD^_lWHrL2q9pO>?{Djcs(S;}q5N!)50)W=?WliEhA=n%XmYP7f3SS(6Ut48? zV$nu8uoQ(#fDDF^_{DODZFy;Z|)kl~GMu-6n-cG_8jHM6* z-c^F50v@r#fHYen06uU9rQk6JEcLhlECmlWKmvFY027WlfP;X1E&vk2#2O|BI0}hC zT8)@ygHX}r5k{g=-2@csKdB@r7v1zPmwP)clYdD8K(*)=Xt|oUf0^3<+lI)6_ycs2 S{i$)95=sC){dwTir+)#IGJ??n literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsx b/pandas/tests/io/data/excel/testmultiindex.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a6174445bb83aa870381fa90feedfb91c6292c60 GIT binary patch literal 22743 zcmeFZgL9?(x-A?h9kXNG?j#-Cwr$&H$5zL-ZQHhO8#leyKKHx#+iTx{;G9{NnVR+f zDpl{BXFOw!2UA8I7z7yr3;+TE0018VLEr(p77zfy3K{?a2>=2}o!{EZ!O+Sm?t603&Mi$!@ZTx|F0a*=hSX z3%UI%WAMpK6?duP^BI0?#=Yt+(Q^PHumtg4+(A$2aL+bY*j0q;i`WUb_+l`@8f+%qDTejk{4lZuf{-U;4tM^n0|f&1IYYeSVz+8Fa-L- z`sNpup}w%LZD(j#(Fm=e4{)3h_E*H{iIXRXS(5rraCVJfi$Y63@y zvlEIq6Y23KuqZh#y=RP|*w0?N=L#*`$qAq#{`5g}6jti@<80P2WmQSALsPpGgDWA{ zNZ7{?+Qg692fnc`>&f$7$dv5!|B3WrV>SjHhPe>VEATSZF`V_s)cXG>|qm-Rb z)LrE@avU=9N}vynL#p5eCuoo4y_O>6aY)z309UAD zMLV4HNMFsVKcPIyf;h2J^YV|7QENU5rT_r|n1%!ZfcrY(VnOR{ZD+1$ZEgOC8uOHN zt#?@A-@RtugNJaHoM&eGOtXm9TXJ|5^W@y(FG}R{hlmQQMb650k*CZ)Un7tZ`If}i zHRr%ACc`&>yW$+~+>%6wD^tjmxWH{AL7~UL66n0;2jd(rmfIWTgo#SgrYK zB!MO@2%_9U8l*y#A|2c_sW+mCATSd`=93vjB|2(S{h47yA5%(Y`b;2&*XJiFVR@T1 z6yoXp))Pn+24P}tv$`bmblSz;u%(;C-aRv|M1Pwv(l3|JV^Ry?*@vr6G5}7#gmt*( z8G|K1qg-IFk{z4ADKKBG=lIZFbw4;wCqlUx3N`#XZgLL!JA#Hbkt#ggT753_d zi8#Cvx154q&Jm9d=+Cw57y0r1YVd4;KupFLYW>)rZXi!UWf#Ov;;_4nqNyQ+Iey^B zXL`_=Nqp5&OM#4FCQb4fj>UM<$f#~wlenkUs&C;?3jLQP?f|6ga^8<(7+3BbZGjjZ z(aJ>-bDT@qOZ0hInQm)IG!GrltG&MS=&I#h1M7*imfH0#mM+j#XBu$qxgGY755Pl1 zk*83x#Wu9k&kzL4EO0ifUe*I*23@$m*Gnn8oaN${ClV0w_z5VWh6 zf09C_V~!6mbh#e^TvY*hz+@(RVFCOw54rt-9J;mkKQtx?w8D{{M3wz+FFnaCzX;&K z*)kMeu^}A^Tz0(3so!v0!Uixf(*5xZc+c0$?187bhTC$5HUQGRL1uPj!4*j&&QJc< zjHR7P(|TOth`z#1UH52fUh8?-3Vza+snO3#jxD`ia6?@nppv5|;Kwwy&#BI&cVc7# z_*&O|;yMd9>_b2ea|yI$p3*_v{`~O?!yb(mba@nUR$?0PT492WO*`w82*oS>aVW7| z8Jq|2p?d;$ae)Z)(d_P3qU>fZ%@0;=>*3j4+nfe1`$e~Uyz7>fghSvKj$NjyB(Su^ z6%%J8t3v;eau!#&r^^M*heN_kx1Yc74u>FA;bq<{0%(kLU!S=Hw`YsES5rJ3KHhnD z_T;OhU%A3BUTZ%*evYD=S;ZZ%zZKloZeFsSaWl6?`=)kV7et{H_%GdF8|<}5n0h>_ zT)#g-MZ)EF-&r*cI_fUWKo)aLG)oiUTB*M$XhSv34orpJxex#~vFPgpm$-a z3EVY-ZvX%o01$v*Y2|M+{y&q;e-Sg_ zm-zlV```Q3mMCu4O9#*M7`V}GqCIMdNJ?H^OCD&kdI!*h;~hec9`SO0PN`vI1~E$$ z{v#-6>gd{t=-KA&WMvB&`iEM+L;wO5&wh)ELPAaB?QL%)s7XGBK`Ba}1%Sce*~i5d zPv{HkrFV)JO3+s5!uCteCv7f?h)q=eVxz1;d|bV8 zDl=&9BXlikYkwmKee-thXWbd*KSg3+OR!AcmmX{d2LM3({|L_B#L&>ep7yU_^naKx zEkV=zHyv`&CFwag-ezzmPbxu{8CS`@-VD?E?g~AL3gDOpsVVb$=Or4diAafI1R@X@ zE$w9WB>nZeyrLR71<|gfAIGZT_wsL(D(#X}kJfN;Y&Cg3jzQvDvml)EzbdA=TX4a+GD^`%9E^nYnqF;NnN;$Eeg_X6(EiJBBm$aOl-yo97V2as#Audbgd-Ws3W!BYSvBwY z!6-_s5un^^RW~?1%qovDsO_%C7)a&qi#s6ecu-WtL~bI+)%E~xH#sV3f#B~3ZxD9%R=5nA0>v4BauTvg0Y95WEW%$72ebXehx-Wr`vif> zaBza*?K`>5}*F=32Auc^lPa+2zN+&wZ|J%43 z{cFha37GY$sBm_|#dbW|u~2pLC6S!cvDnnr4z7}vXv589 z)qd*L_XgBt6RDzx+kqY2_hMrd2FQB0zrZH#g^p6=JVhxD1wg2)fV`UZ>n-NR?0#wd z>hJBqLC{*jtJVdTmyRH_6GPmzrlh$l@M*#I|3)&A>*EZOY1=A+gj=S-K+v?;Y{$FI zIM#bp*+vN$CuOSgDag>>hjiC<2lybj{Tl68Gz;loea+rqke<9jgK(+Zz|;ADxCqe! z?0GHhl|@Th$xSB707hC?Y6miodPEXpZ9F6%SK#eM=N3=eiM<|)Ie~lE#Ywg6@0SNk z+(M59tqtCiBM8U`KNhMg4ay(Cbph*?^-p z%=EHC01qS;F7SdOUqPaHFA>sT&OUBK_9G?YrljTC$;j!_Eu86lfTqDv>$YqaQe3^3 zz?|jQ)3A(wx8e8R%vPWIwn}4r)cr>Wn&BJzVQy@uIp2>*1o#(M*6L*;$4z}PSAN6o z+N5HN--cuT+`rME8m4aUZwgOUy{wSd1A|o~Wk{G5Eti@FUZ-S!g)6MhC zSgjbhh5D^SD(imz8A=KrCh`XJs&_6XXoIhiO_}Wg#tV$Wjc}QUED@2?MkS^LDLR35 z-OKcB(%07M>fpwxh2)=9n6x&mY_f`78zY}8Vp=ZOH}19j;NwcDjP)8EKW8|Eyw?{O zfxI~H_h~eaKt1Op-uZ_~3T1Q!4Q=K6jyA%a@aj2|tUA34pbO z+b?-#%7zk4{YXovF3JV}UC?i)>@s~)P8ug!Q1>W?m76c70**7_Yl&K=BL^sMFSZt8 z57cpXALCGR6%0nQra9v>`@YkOM#5jsFzi9wR$ZgWx@8tsJk+h@%pm*Y!FjJ(d zY5>pCt^+{CroiR(^~OddUUidgP^xBqdj*?GPfs^ zIlH&-qvu@%mmt6EbnKc{Y=&kb-m5dC5Hzq|YlI(EGA1HB$zW%(kQ2+Gg=!$i({&gH zm9R=)o>StGxLa8a*sRDsqk%JZF)-m-$wdgDdb@1cVu{Hi0sIV}tRvVGeU!pnyf(9} z6#%^rg~S*<$ogWxcit$R5i&6vcU(#u$>TEnZyp^Z4LiF7RwAk>+JYR|@E5vnIo z0cFW%JU8ljT)MoT5x(*hhi=9NJ6E*w;!Yw}w$TYSoX2MsUGy-+q+L+MB*{ES61m?t zDyP8>I25BYm-SqB4%8uG=Z+dV|2E%b#ZI#BI4qq{{T+}nVv#=e91{qNCR>qQ+L~<5Ki(Lm%p=-=3 zR%y25b)t!fem#|03?gmP`*XsX3t^o@A>T1a1Gy|R#+-d9$8n@4XJhbab+L6BQI#+D zCn0gEX*f@6{>!WoA5j z%JG377J@N~`P+k@Hc)b)9VY3`UjaE&XcxeKb?;f;AsvND23Tzi>t;)c-y6E7@Ak?NMkiCf9gBVSy zqB+|gNNwik4~&frsd4(`i)C-J1~>4z`R&j^-qmf8GAvJl>D*I#-qAL0T`LRx$(&Y2 zBH28EeGh&(?)*fe1QQeAiB<#S=HVS3)~+fUM`=nE$=P#8(Gwb%@3cZ2*xeCSo55XO(KWhKk=0(u$#M^ zZdMgJ2%%PfW=5Om`>qvDQinh8_2NVfZ6j-R9F8;-vY;vRigU3H)zAOy#29sWrx0x6 zg`M60S`21*Y2ZW~Z$)``fCeSZp7`$BiP4o^{ZU6DyPV*2Mun*=B&DJ7d_%mE^*pxA ze$W(emLu8&jKje~BzOvu*-(ZTfrs{74~6N5QxBzq?K%ZE<^{32g15rB!(g6);-iDl zA_%1BWOeqM{eZzEK-lg_KjHHijr=<}iZGl^Z@<9N_MgG=<8Ojd|6`}UGV4Bl-f*mZ znFM*}HA>EPzuffA8CskQ=$D(e{Npi}tVDL|)Rp3W*penUtE0GMHaQw18guy=Hc-fJ z`1htGuRw|2Cw$P{t#9BOzl^IxbSh?b|m zi)TL80ZWeyhM}~DI^wzW+8V~Lkc9NwAl4;24iOmaAVO;`hEIW#t|mH-_(81bfWGYq z-P=Vi#ENgJQC&2?hg>y)hoG1zyD?-9nTtNyuG{)5!_2*-5huGu0eR8C4ZfO&zgrX} zB6Jg1Ccq8(?t5r`IND(AFyN&xdLu~0gZNz}Z*&1Gy!{Z9{i)LrUUiMxO#+;(MH6GG zs+gs{eaq};oM*jFCUJj$^b#@BrdQS5LLoiX!qX_*kd~D-Ci&@Me8-Cu!MPquIdNIP zODKT^YDi?=cy01$1Gct@T5R9-GeGu#y8@ z@B>cKCZT5ervp`_0R_QpYVOH!+YS^?^lvgGRT^Z(=V0#bv`=*xAdfRTgrfa!w_R2@ zKVvss&Kapam>E@Q< zrkeTYc=R%=tO*V`haFoxCxgq*sb4;f?g-_Wb-}cQ`1Y9tTX<%7^3x^jB05d-?0|i) zoS8G%DjRmLa6-4f@N++?y2O~dC|}QVPS!2Jx?aX~tlJFK<+~vsdKOBgCt#1oSvO$8 zn0s46W|c~5ps@T6l)vguo__7mforsvJq zeRi0$`-05TP>MY@abdl#xqAwDJSr$JWZ3Yk#^Fsm#R#URq0}uCeVOrjgGefTTluX} zW+L86fvncM_tvMbr`#n(0%{!-0lx;hhM3(D4{lm=^q>&!-tLju10wzB3X%vWqBI4So|pc_hMhj8!^NrjWbTU<(TS7qWD9jU zH>z<2r4zZqlD({2%2}_&P&QVawolJjnHdGs*V7Wr<7`M}@jvL;jmGt#8z;*BZzE01 zJ}EH1{Km4pVpqn9_iAig)$WaZ%`xV%5q63gYLru*$dvV?i^SGRaU8p?@ri@e8?UK` zOi`8Gm)jsQ*%mDQDm|6G9)c5t+d5enJ83XJ6oKLBV4$Lb6pK_~^!C^+#}OBofOy{V znH+1UDy#$N&)Q~eW|4(BXi0*7TC{v8aFh z{Vk`3GiQg5kRaGA&~e$TlAsr{klq*^%IvbcM6Jvz7Ij?9jWNPLP?OBY4*)D4DDPs{ z_3TXd{u#lRTWC#-AW=WS{nyX{#3kajZuk`~LMgr83O-i!#&K{^t5zovf6fFFJT#&X z-eHJuTYe=HOLTlnk##U|anNKqn2whJHA*-?{Q~d=)UPU-ilm}0H3Ft0nwR!pX_Odc zo^12AaAMIZM-F*ISNEYt{xjV!g+-I=fjL{=$mCa9CjLYlm_z1*P<4GB!475&NH9W( zFxs@&MtBGVpTx4w{qwJKzO-RR1eGY%bD?18zf(+Di9kCv$L;xnFcJ&vgDHO7<6eenEZ_&Htlj#rj`;R_#FrUG=rrBG6$l_9yqQzx3X0_LQ6zscT!@5`g!f`~9Nc72_ea!q#KE3S#| z-s2+r8{JL<6FqO(MC-?I^p_VyX2_UsP%Ar)>7cPG%Izb3*H0QQnTN5)00>|-3@(0k z{FR{^mj+_y0}@M7;0(VS3@8^v%&^ArEe*>O%F+7sYQ^0e-c)?ON1Le}GRD{$3*|T= z6GR~jDmEA&NZ52F^VY|CyF1lCt z+t*TqWUuq0jw$pbruY&%4Gv?AH-!sxJ*tBa<)1VdgN|+qX~-_hzd*}7lA4!#3~ePgVMwxffp(`~Q*gMV?P^iBz)DXd$-E`M zO=fB0Qz3PcB9MP){e{knCgTR4V^-ZJBE|GEJ3%x-#izuArv+Y(2dMd-XZl?=T4xBzpmfxad#DFtXC$q3 zj#?w3+&4r)U6TJT3zU+zxZX`S$1K}lnCi@24P zZzK>&E{S8ng@SsC6ei9!_68DtdJ~CWWr_JyapB+}Vt_|9T_p^3m@ClBTi#bPne}Mx z8e}27ea?^-_N_n4`q7R$)xDKl_{K6FEh(t`Q;McmB{4N!(qX!bdio#L9Dgeul$2Z& zlH{{8a3D2~-8(BA)IAllm)=OpUXMC(GkRbWUE6_q4p}a+Xp)WLi)jAFkeuP)@Xef8 zo$ZYTem@nG_jGpkdj|wWIMZrC2PGLwAu3yPt9AGu%Rk~Lk3}Ht=NEq1zuNcz!SA#E zlN=)wy(mVtXaG*6$cx@6+&3u0s5cRjd_ewgV284j8AfOv zT+zs8DRH?j5kA#ON*4og|D^COPfqel=v5|Tp~zVvQLw0p?JUvQ0R#SBd8tsbSi0D4 z$eY8#x;^Cv4jld@{RbBcQu=Z6tfhQaHO|F6kx~82Z5f#$XY?bl8Z0bxY3L7&r%bJ+ z;$8oQ8z!Up*}NIw~;>8}7rlfN({q^AoF(>s__CYlHep~Cc0I%&$3m%ZuLNdrHR^U+hsDSAa-y1?|B za#5=9^ZljAF@0I@BIk5V+3i_irnlN}7G!z_cjLpA-vv4-u+L%D(!jzdkUw)e46MHq zuM#mV*TA*5(J07^s)`a27QV|T)_=jC^)`ah0Jjrw0a4cxUkbOz6tAkB`TQY4)XH0K zF5Q6VMDk-R`T7~khWoQQ_7h&0;usD=3Ag&4RM;Ubw-A=A>;F zZBe4XZPl~X_L*ov?vD-*APw=hi*qrdmeBaMMXUCfaupe&LSVtzO2s^^35Wqq!SjuK zWL}0{ly6mXOom9xh<2qWwQ4&GF=V+DJUM&j#1w}tv6RC(?f9~5efrjI`2de$B!rEj zKavvVkcW%dWr&{D1V>U(B6cM)M=J0yZ_}#y*Dad9&&D;~1z!IyIx{vO^?yZ$F6wKX zI@AiI#kN*ufd%bMbFE_T(ytp0^7;k0$7mE>x1}Z&+j7bCBJqPn9#zhSyd}m&lU+@b zsgnzPTNuL^NB_holPF$k5xiRDwwCq{>Ywb-awq=)jczwn0sg~)%6%lu#6M7sYX ztp8nLlWq!FgVz;=D0-DCE^%{p3K5+GD)=G#7hw$m`o$HPLJK>gvgAxDBLwhL)tf;Eec5Z&5*5PE8*Nxi^0$NK96ITx;OclO$v$#9qUJcKZz(t985$M8##q& zk4O;ZP$uq7EQ^IJM*z*>*}h7oUTP9gwPbCbTd8PF;^4$>M;5d-pbMbsLaj#aSe*7fyXn?cgAHf%ZHbp%F@PY zx{`^kDLsS#s083*)C2&i1TuyK=!_Rcum#(9#^3W`|AN@eB_MXwU8l7nXF-}9hbbCc6_Ej3@(Irv2$YK+8%jKW`Lp8%*__Q}^Ri5icUpUNdIuCqYEjlys zJ??6U!co}4cW83A-E?|u(I<29KIqy2h}xh&Z6@A`n@m2R^)p8oJqZFnW?YU5ua4#u zpAI_E8K5qwNM*4Ge{vVfYDUv<;zJZGJrJat`Pjbw`0`NyUWQnkXaf}e!Xe=Q1BZXN z_5Y7TcU!PJ9x6y#Ec}XRk%D47$5)B2&Zk(irnThd5_{DbgtT?MBA$$niz{Q7?SZJt zy+rKXhn!Lb`=mkl7^6Zy7UllOoVSOx5Tm9(bqS+FEJpAPqtT1z-8KJ}$Pw_W?wVPb zOs&jjz2)w25^?7yzA-$Q3SA=7IzH;Qnet(bP+1ZZ#r$KHg}yQw+0)qW!bY2D4q7>^ zI0xPor9kX87#MlR_mTlGms4fyn6_V zudC17ETMs80DvN#ZE$o`Fb|)fv!`J!OIm!IGz0#l7J$lu6ChR@L^1_X1rL>A7fJ|c zK-uSHY}*J!^ia9IdBk@ttP>mGhgWXQgN8PNu|G&mVi!UtQvx<;j!pvwYi_k3PWR8| zs7y9?4S-+?TT`mI`hud05H|nywAzGo9~Nqh?+GJXaKtKE8nCEMmlZ8IIF#tR7EM$P zBWQ-oxU2?A?k*Il-G`{J9S{=QB}!Tvy9@xPM=KDlQfQ9F$_)ft91}w9KqwKqBQ4SJ zGpXoEibNj1fvi?ElV=zaEYA0RG&XsLoSRm5+nS;EU!@o8U@Ge z5D#0@v230;K|I(ck0$()v zM|ajHj$RBEe9>T|fnIa6^GNK!bZ32E5*xkd^}PE#Z+51EjBd7=J`v26*@>XIL4en8 zCN8?`Uc0&ElCAWyzl#mr>>u4J^_T9POtBEj05_=x5&kq|-A06nNUZku)Q!Rb_SRM7 zlk*rkg&FrY67__RE^Vz%VRS}ibZ~u2;PN6^-d5>V@-xwgrFGpv#PL_hr(Dmd$7pC8 z$eujP1xpA<*+KdKI&^dJiGlu7-Bkg~%9905#s$d6Y4jpTe=@m#Gmv%3NyHbWncbSJMsu)kOzD2=HR3Z1Z{0v z-OV|9*)3d!;)S>eLCe&vRuPz;8U7x~f2|w1@sOrVr<<}vRa|7#L(KHY?Le#;TLXjH zX=uJv?4;EI>lCuW=&;5E%Pv)#3WIJjqSKN&4kv3C*V{ zr5wy(3-yJLl3}Ck2p`!9NAv}-!k%KLp3MjmHSd$CAE5ulda{B|GiR(xg-a^&WB>lc&zA$82$o#T`|A0xBHf!N9ke5*Kn z553RO*;|^*DIIj=Y^VE{>aoIA_ys3^Yle4vx2BfU$)z%}6$|w0c?=?Vh-`F*Me=PF z#nMOkOckLesWoO>QmWOYl8M)q0gB@rNHRI>Pbife5M6qeP_Eji1!^K~Y-PZY*rVl; zu^Ogiy{0m^1dAvkz`Q69t2^e*-(hJ;?u~b(%gQ$TEZu~jw!((WO7@;=5)v&d!9SK` z!iQQBbKtann5J62ebo~S<;Y}BpoW|(pKYJa-7S%RM&k^6e;<|4rn~ zRn_L6{3*1g=aaG@yPa`J{}GojZhQfLJf;bLK{fQ|&@lXcVobA)@sdHEdU4`_Rw^g0 z7xs5iw1Hqai&Zk{l2i_jx0j8R0^G__-mX#zrhZe9DHa}?61^EBFv1g-%18;t04|42 zu9u_CELa{V9d3j4TPaN-w|IQ5-x+0k=n3hhBl>Paed?rtzZu&g=`O16+t5?RoQwqB zO1^ONrlExFkn)tF>n(n0xJj&l92UY^J5-699>p+>|KW!Oftd%(0Q$ioL^LEHCs75` zGI+G~XRAiXMJ(6$P9*tq7|jn6+&liBw8~9Z={t9%cj>0DHrAb^*aLxK-ljqS@~L&2 zZtk1cG3ki#fT+sx5bHNGOmu%h_L*N#uj@JmKlbxKJ*5q(+S8 z*L6Jf4p+yoC(_VY=y|dR45Bc14fh16`O$R1R0t<*U=m5>n%=rjPq$_|%>9pL47S~` z!J4nYmiP+${}kAN&n?9NHMj7u!1n(dPT=jLSY|rWQYdk2aSjKh^ocpgQc~P(*EbpZ zu`G5hhNm0#{?7OTUI;hX!0TR77p)oOx!4=xEb%?3l;Q1tb)|Vw1ak$YE(Uw@igED? z-}{q>Q>1>L`49{+dltPS;uwjKsnk`xp3;4TH!r>D&OjA%KsokYZ6gIC$AuqeuhM+_6sg>>aZ9bFp>;#I<{YpZA%boa7&$kQSXz-3 zWt~apaXM2zC=xOQU}IU-D7jP43{+ljbf$@iM1Nv&e~^eIUNZ0W#k%LoKIchG1$eS( z%aZ(gyAN`rY8g{^8l-kGKQ^6=;q7)BuHBHX)!8ovmHl?!W4kYm%m%0ZKA0{t*QEN+ zYny`_gw>*yFEj~V>uH#whe$g3;~D>nxARFT$g1Neh_RQqcW|iTP-vuF^9e*gV_mG` zp(6O23pg0yBWWl%KqWB%* ztuJG}l>Ih-xgJUdnhr+s!k(7cT_ocD7q#}l8@UjV(G1YXxmuOW%nfb-2jmAo%*3`+ zy-m@v1jdPGWNqzY>93DqT$s15ow0%CXHf4tcZcFalurRg;-3*1TChqRx_LsCoTwcU zXXV|_GM7RagsZbfLF!@tmKji<8e*nWz^ihzoXMYH|CBY`vpU<((dFrK zkw{QTWXUis$h!sp=^s2mcd;KBjpQrHL)u5t8lvK1q2&XD+D<+$xSP<2h~{550K(Q+g40RI;XaNQrr?1V2c+i>u88dNX3S5A-Y_z}77s;%Zr1b#ZqRhwQ;^k!u z&lRVgJ}gc}G5^N3mjeR;W1upPT&yef!WuwkwUb9l1}hUJPqWQyT0KWY-TnEZ-LIi| zcjJ<<1(fcIJW?#IhDgNE%cux<0koI};Cg$r%0io&`}LE~8XdptAF0?~#^Upp*0Sv{3SlGrTZ~C?T9#*qkJSKO#a};K(xYYBaJF)n~0V)-KBlRez{Tz>DL0$nO`>GeezCXvns&Xt=KnRy%XHmgdpC zl7Z3b4W5i7k;(4Rg^>>e08Eo zSEEy;cugzS;}5`dTVsXyU~En?(eTNx0hjOx$=5${$kLuvoNRhR!8Us+9=^HKA?qS6 z#zJynBMiuv3g_KTpFvJFZr!<~m82}5uUaHGiDVzeL>+!VXbV}}5*|k*O;I*V9S}Uj zb_Kx?oyJk-divqaD6r{|Yx8q<9gfU)&C?6NNt#eNPt@m#1$g^<-?C@j1FMFMq=TZC z2C=ZJf*{ zkukkMUEwFbI}ZUDf5!QCN)Kb7sb4?U1pAgFse`WdZQ_jPcN`R7c4RlM5Y$RpC*mtI z5NGr6i|uIat%)5;A1>ICgWjma^h2Y)uD6ILLu*VjOSBfhVT~q$;(U&ELK{5xzl&f! zN{ifem<@qxwRHThLt^1!*<)h+Cb_wAO$Q0fF?+30`4eZgCjH#SNHKAo(Q|@ot+T3i zxy#=nsw;>=v!x>Scj*Di(@?(ardr1k2@-5;*yY)9CP^z%Z;h#c#tVvTI3 z`xC9&R`m=i=Ot>JOA;bX3iNFhFiXoMIch#sm|a*KNId8XpC-U4X?v?wM`7)mOe)=1 zE*69d8;|m7=LH#ptb5jk)hB~#~-kjT0`POM-`2z zSZ+32T}+t@(p^Lk&r=)>OzGKFL#0l5%Ue`nWI}4)rh);fHAv81G*2$E#|T*WArh)| zy{#C2+oZeH9>9$uT-ecgZnI{kWl3>Ml#l_bJ5ClnBj7 zfe7l9gsjRvRhzktlR-(>bkRFF28<T5@uGX$=V>VE&Jz?+GaKqtzA!n@g( z9o^92y8r!<<2Y299?UpXKe3;cw(+N+aEirt0bYb5owJe}AxyY zcON|nM|sH`1|1vY-O2Vz#v;sPlEciE=Ou%TM$Y83lEe?cc9DFUJ1BMD@z%$b3^NVb zjhgDo=J}OS=KO2D`yO>mFmPlWnqoW>J_;EN^`&aZ z>e>5bjMxW@(31y~JA8dC{SU6(CsO1#cZkRhpWYKd`57F3Zogf!r;)0`_2{>S<{*dj zKg*;4UZ>-x4k|ADS}zqx1^|Hhn|tjYTrCXk|J2CORJ5&Ef53a=R$cNOX~r0+hZ+u1 z?0n60*Y#H!bm83aiN{OEBXfpIoxwg|u*b$yliH?}^tJ@E`Vz$swr*ZW4ziQrHl$^< z6PQz!R3m9T!4fFaK&nv>PT;bnusg_<+bHUgP%t<-2N;cn})Dtxo-4ji;Wn2p54A$9E1tUyq_XOxaJJvec zcQ2|;gBmKFL(_s^d)(Qoe*1+Aon_?@I5GVr!~o8c_-(KpkMTy}dtozjJ=r|y)q{ZQ zOd3ZZmV0&ZEimcU$4@EVqvv+Q_r{xqG_VXJXNBK3QQ`xYdno6w{Zgq4m>IT;xf&J} zpdZzjd`^4%fa%j+ybELUvL?8`o8_a=X_zt z^@xv(1;J%hk_jaDEDpiwcOa?4G|AB-zHz=}v+}x9RQoFu%q{uOfU@&tY zn`aAEawrzsrPbN&{O$2BsCPXRAd?Uhae>jG0bvV-bq1>mB;oOp@i7j~GuFflR~iuc zI_&wPyw`FH$)+qO#tz1O$-W^BPIHh1eC9@NvN_c?h082!lQS65U_GjGKFc{A;^qmi zT1i6WbdpkNtqUV>syOAd4QFQ<5OphAZotRVbYi!ri+w+I%B3VvJgs9F19bo$t3Z;f zI=ErMEJtgL{E&*WI@2Sr+{T^U;>>$j7W!D{vpxHdYIgEq*s4k`k;((|Hg!s1!P&x* zj*n)=r!^jZ$Rh@=MulIP@lV$5xO<)qdr<|94gk#4uI5VKrSC6Tm&pToZWyU=fdaUf zoi`9jtT()^AA50sLA(UGA_8rUFVXKp@g2Ony_Z^fzl3`udu?K`&mmtH&cuZe5IIH< zI3kYa-q~f|;%iEUr_=tl)lCo7uppDKTOqZbl;(+6T?yyGZL_P(j?VeO);98I4fSxI z$%J0(w#jI|_CRY%dbdc`1eNzj)8tIvGoM89`M$afjkeNo=O2!l!&JYYLhhS-b0De- z6hB%_{=8>L?^I*qRQwrUBvlQJzbSPe9$t|Z8Jz^-QDE^?CrgpH0pvSTP5rbf2nKkW z&x|ud0ooDrbkWyb44q>wE*5+6KqRRmbI)8m36x`2aH^27Rk!!?y9Yp;q+a6k@^&G9W`ob#pLm?k!Vj5axu6rfu# zVl;X+fSUzh2lOW8n`AFgWnO^~nv2K11vuC{o!N<-{BO%8(#-KbS3;Pg-IRxMqb@%t z-7cmwq1Smc1FW#5jI3CbrHpE+9^60fvh7cAgV=HHWkC%E>Con(t%3~n=LSR;gFC8c zO;s4ttwEPG&unRLDtiEwnlHgG@Ot>2(eC+Kp!_$eFeDCizdKb0SGG-Fx8qecN?>@f z%EcoIl`fE~OP@XXG4kEAi@B5&xAju%y!>-ovmvdj8UIRWxnFCmUzzPs8_?cF*Ur#D z-oeh)%J{GBW-uiy20{-Fw&mQ-9^clQt|{nn>>%5XbOg8~;Z(R(pg)Vgwz?|FYl3^b zFAYQ=e7Q9*CaSTA)9@P>`5In$un=csQ_C8MM^p%BTDSmbg2<+7QCWznlwowHH~zM5 zQ7z~f&|Rt zV^(LkWDxKEy2AH$`$T`={-3?xe|16sUiAHYv+}QM)L(nP0a4wdJ#@%|kH8=NF79y) zfzbT&ECfx8hroI-D`2Zh(T~XQ?zLEkUHkhsllwn@zj72~8o|a;&9wjp?&+hNR9(5R zPNnwu&IepHyGB5C8SGLZp8`0-LXYy@R2hyrF}`pBSM1 z|9z&X(KEF=OJtYo<$(|Cgb51rON|7Ep9O&w(2tJ>BBCNj4cQTl6u^N5lYhy=3iRXN z72rX_&#KZfCZ3!ycjnlAUnJM8U?tEUPEPvoU@0|_d#809wU={p6*%#s zqsA7@C~x$(R8KTNDUHwl{Y0}$Gl#nL9I={^mUKhy@S!!u!@6Ef7Ztp8sqWix02YS4 z?|2fn8X|=c-D$cT2mOu~8VBMJ>3~5-d(})Wp{}y!_81`Bx-*#A=%R|5@(@e}#*6kC<+- z;H7@eI@1g6#=>lc8aAc;U+tXvQ&U$I$0JKEND(3+iwMfT69|Y>WEB(Hmspj345Bi| zhyewGD64`I5Xx8umy%c<3=x`wB9v7KM)m=*kwrm4Ku|yg1GLY$yl{Q}56nzvUViv~ zCUfW9H}~B0J*QjoBBh5~UC2C6nV24~9C@}iRg=4)5EFFW&5J9SrS7y57ntrq@|n$L zEvgJFap}RKMw@mRDY73Gv(Ux5EQ`v~eP_zV72C4tH~bj~!J)Cqn z05iU;>OtCD8=lC8I`SEVJFcc&^6J@coRbcHP{Ui5@YW zYMo6a^zOcw?_S|kHt2ZVRm~0a!tWpPw#uB0sb+^&2;= z?W$8PCN>uL(YA=6SSfp+KdIsP%qre z_>)6TnmXm!z;^QT6Q#w=Q;R97W5aJvFq`8Q`5l{e%#~Yp&uKT9*D#~s>XaUZ*7Dmh z+#0Lz{Q(NY1?W&KBOHbyWb*eYgqG~{CoiX##50vjnG&6oPMM!+)k=B|&ZOHbhLwB?maMIEGlRCaVzIx}2tU5c7 zX!yu^*CfjFC9%%s$70vl%G!@)4)pe$PHY%iy%^N>^lVC-Ei2rmMvqTqUQE!)H(Sz9 zHt&~HjNLBjX{fBA)WNiy$Zyz2+l%!t{dG`MftaV@A06DGSG)1d7B|0$SFsPJt!z)* z4T+dgy?Lbi;CdUZ75?O)dju`1n^Unv#bCUHVSAppfJWJ*1A_yqIusC`wm!y2Ex#o;lB;${hB`P9mTe@;a0KXNg+^n#ZwZ7miU`1@XkGc?#=7Yutkm0Rc6d%yF7=2wPnS*Fqb8SCW*v zBpoVa)y589Ul1x0_G+-AqT1jpQ;GhrilVO0Hh9*|4@oW=(z$iXFKJII-F!Dj&HYrv z&G8sxmHM4pe>LS~%FT2{7qq`gO&qA~9{x97E+y#oQ*Qjm1QKDfCahg=ARQf##rI() z@G^ZbEz)po`vo(qou)<->0lLCt^Asd)MK+`J9GJ))yk(xw5_(Zsvp~%S;qBOX3BQ) zE=0LxUB@+`ADs7W*rT#(W3yo^{-5a_`S~7`H!M%WAWyBx1X_?!uiaH5M$q9Jd5$Bz zYM-7EraF4MtOTqa4krwlfQ$@6qvAjzI3f#{szFtS&kn#Z1G7N!%0W1=6ora{42F>S zf5zf~9B_<@5J~#$pIBi-{J6D13OFJHri{rTQUo3jkOFqn!xRZ98A7l`3bFu@0=6o{ zlt~3dil6`pQov?Sm|~`kND;Jdf)uc;4W@XgB2olBZXgA0`GF}X)e$Lz1|X0EwnV^` zs~U(DL4yQH0gd4>rC%G7BCv^r6wvMpQ{43tDHwtI6~usUJs8ttgp3hX!B?F9h% z!WEQ)*BG#r;QUDnUTS~@@FoBz`0fN30r_44B!Y=GO#B5WAOdMMVww#?S0;}z5{2U6 zQK)ZHNl?D>&@U|aa21k&P60sm$|I1ln(85>-v9ML0JaO3StZ@Omt?JGtQs$)?M$Gwa;DaytB{Q4+dpmWCQ%u!~xM5oj5RU@nf*k zhA-L|i6{6Y;r{*@v=e4t1hBQsB&oWuh09M)4%Z-2) z^I;cwpQZB09zzQs4YhmMHZuaqXIYGH&TonGIa6JOo5Q;D$hs{mUbT%{?qPO`Dq(qf z%6I~KzOrk~QF7N|4ZK5QS10u-8_)2m;b`YQa3NUxGR2GHGmavLg)e|ybe*+5>xaZ9 z59w~gA?61>h9VpBygGwE^u~gzr{i(lZL9dw!1B|u( zx?&n`7kzc?1@U#Pclne8J#j3T#jG7T{mod1TrL!-639*2QqfF6HqCh-rO{Z8Tt>4f zAwWo74D)mE!-`Kg2z*uYcp`4bKH=6!r+Qv=#vvK zJLD80U)DSslQ?y)r=II-A}5GzkE!mx((y#!b(@V>*Pv#z^=^wf+1oK2E3nZy*EiLA ztX`sZ?-%Etna~+TGsks_Gm7l9UB$<%qs#(bUZkV^@G8h!z0``lKupc^wWoAEsl3>bR6 z8YbWGc37a^$Eu}+-Mfu(4S+9AQ&!`uPb}u8=o;98*+AnlfqhP+X&0S@pTEscA`$kQ zaueL8nD-kk^~4+{}iB3z_o*?>O@`$sRNUA006O{+r@a zthoMoMgZWeAOJvPIl#*gi~ms*XdK^mvh-WgJl-X^apM%e3`vztWSWV;)EcG`>fQL3 z$=f{wg-st?!6uB(utCMVV#G zcD=qQu0nfII2y$pAF;k)*oHjmig~rTE~+~lHZD^8&W0Q0ibUYi-6H)Qmk+_mRhp0zSD@`k6TT_=WH}TQC-#0F-ZgHFZOMi`{W&)5RlyvfEn18X!3%LhmkC}nct1`Bc zl7#`?FrPp*xzqMV^gHFxiPf?0HKipds>-zCm5V~WZ`wERI+~Y~Op7g~3T445^N-eA ztSGH$@?N-Ns+}Fyo=(JaGT*k#S;sp>LNy zb+?qw+yz#RZs7XEGS6hWlC)N2$81R;>wc4bl9a!*%T`2uL~*pT7`OBE9j5%h^>x=KM`0mrp-)t??22^(?zm zy(`nNgF$Mph{in+?xrrmjBrA+Tv|J83V@!1~-C3w^py+luR6U4JcaGeVbWW=QN2KY@&OLvV zy|xgi&*<&Gg|=xTIWvA>EYCO`w1fq1mK}_N-2KTW-`D;Awq_BBxIez8odD?p0Djur z(!}0K+1$!h#TSl6qma0t)vG$$$cI6d!-Nt&-y32O+^8s%za1L!8Sr>Yo!M1W((>u7 zz7;$**ybEb&s<<9oNz-sv)W1}Tw%(p27cHk2|; z_5YC)<4PJKF{^Z#wb9+?9$_u5Nzu@Ta<`0P4erf)@S8LSiTGZ+gr~b^hDi*;o$7MOH56hS z(bM_XTyc*;qJ$TGCIFKglx%V%5RdjMi#XJW_q{^x`axXd=LRE!M$?wZLOZyh*GoX+ zfU<%gmY$TDQY`kY8c#JH>Sk~s1N%FZJWy%vY6^0t;#bv7N16=k7V)G``I@wj#`2gw zmBm#7`Lk+3rKd7|2+ohp>j<;+*@D2@h08|;Pkc_f0HvQWjSPY0Hx$aMFfgSFw$TjB zH6@SXszI?5D)Ya)CLO)^mM(uwoZnsio?^dTd_G3|&G=Ws!JtgENC5y$wC4|6Li+v? H6M%mJp8}3h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xls b/pandas/tests/io/data/excel/testskiprows.xls new file mode 100644 index 0000000000000000000000000000000000000000..21ccd30ec62daa28898220a6fdbbd8e061f51f78 GIT binary patch literal 22528 zcmeHPYiwM_6+U;pwyz&aY$tJ^#2Y70Y{w3X?S#D7Nq9v#4?$3EYYdGrB=@@l9EO(IqBXK5(~QPK!Pl}1!lEu^hl5U~Bu%--F(PwxUk zt$MFD_ue^o<~wK3oH=*q+*!Z&o6<86Jw5Y1vK;FuhdxgfP@Y29a9?ZWO9;;s3CyA~eKy4z8t_d`Ya^Du` zN^zcuvqncnol}Ccl;+TBl-KDVHuV7Aov&@A2=!1G?V&7XeQP^m$=G?a$H5nyb zd%tO1E(POa6pn)tplL=4a+Z{+z_rAj;B1W8X+)LtE9ceCo3~_V&Fa=_Bhy+lCpZ_L zeYWZOwO2;luQL~_qiT2@Wt?%t^t#5B{4Rv;|>g`=MICzGpU`P=CGZ7M3c{&mD z>8y#bUJn|Dq>A|*koY{Y5&C~F`6&3H(7&DjQy2PAT-B((Af6@I#6g(u#m!;?(9mgwmvR=S=^@-sve7MkCmQ8b`jPfuYS7=H z>v22Nz?opsQ*u6@)LXJ$W2L3#Mdi@*1A-0#WP5=x^(^(Dhu5W(p7V|J7CABDmF3^S z43O5P^f1|=Tgnq0zVI_a6yO7X<|cg#-NXt_9bw9Seai>KdOZ!B=v*K`6YyTh!5z;& z`Q3`QQtd>%$OHBYTqLUsaweGt2NonN4&3mpDzJ*Ps=&TEs|wt?SykXp$*KYmi>xZ} zc*&{)57ewGuxHAu0uQmQD)1Q2ssazntSYR_w!)%pE7WCMp+4IRi?gkO$!d05T5YPp z16K7K+OWRf;$|?Hg@ZAifVDzEPx>Do$#UKu3fVz0fgc|v5%vaY%mk#N(F|hsnoR&n zo0Y9OcF~ulxUxHhV^63G3k1=Ae(>_&ew}b{f3*o@jPjg860dlHTx14`x=51vU`&zZ zUQ2f_14zE;PL?_xzQIM3_;*tv`N<~k-@iW*N*>(p44~2q6dE3hqtJmitXqXziL$Ct zVP~PvpFeLCDjW{mgi>jSP)tk8M21i*%`Oy&CeuQFGVG#mC!tiWd^t^7N8@q`B}XG` z-zJp7EHR>`G-YoY2V`Z-*#7x&H0ehleKZAP*np~9gPBUpT;0}zxfWu`fypgHUzu&d z-U<%XrCVW}{}=-^k%-$`@ys*N*o;P{8LeO%xQN3r8kJ_BV_k@{=~l2Em{dEgUu_wW zYfvGWa<8^lu->_oRHw#zH)zIqwEL|})JpqpWbB=6fVlfDUYVppY|^ogb12%ON@te@ zF^%y-;w|1F2s|ku$T&5}>in&vXI>^C#DQy%cNwobGSG=P2;ywxgT!0BK@ewCK#+0P zi8sjnOh6FtM=^Ih;gxCzI`IaXo(V|2#T#Uf2?QBuop^&(Wdg$9$4e)?>dQbU-XJqF z0g1PGgJ2=5NP>*BPP{=DWCFq;LcDY`#RTH&PrN~}&R3e_0upcW1__x!ka5JWnd@XAhR+7iMM!z%r$`^8QS$ zMp<>qLkrGHOh6QW2~|;u>XFt{8~&oyM=_3;Ljf8UQ+_boar)dxiCn(tj>RmXih~)< zrqs%&!Oo`AgN*H4>qc$H8ehbMzyqt#?e<#JF}TM$29P_T!dvm?zy?H?*-}PNd zwDsnfoI&TML19r=(76^+)r#;9W!q+F(D`Xl*pC&o$^xp|4AJ2izu^p8l?H{iSV3zn zpsIxsjkN#38FWD!6t-ant+#-x_CR#S6E8c1E=+^M60D$0Eud=e6aDMZrB0waRpaeS z8zSwa`bu-pYqJtLv;pD9_S73VmxCgR5c2GZ?>UT^^du$N=YXL9#z;@wP){V*)!#R& z-)sg9lqZTvZcS>VA#6~JVCvOH{g{IBN`~LP49ejwg8_$^1cN-nCnY@Oc!q|;YEC!h z3QW0!fl1iCfzKBkJELvgp^ed=o>6_jMXqUyLdeyL$zU6GV`{h&cRl!1rQ%SOm=vlG z)R2nfPkAY+WX_OE<_xK1?jVA;i$-!1qk70JN^TCAY=laB@fQIrA}(7ul83U@ma=)+ z>6k7XqHFOcxGq(vek6!G3oLbl2_1EYurF*t%zfKvU#iC9k$luxY^jl-D7Muw^O{N* zVg^o^?A_B7>5Ihr2lj=wNB71?^~=mqfn|vjx`g&(V;=W?pXmR72(k}4-wtv1!X7wy zW%NFGcGA2G&^Cl;F6-}yp{_RLYBLi-x(rI|x0otN#0@MkGq5INzK<2e<Q3CE_G56Pq>JQ$dWMTP zD=flHhhJ(D9o>yiZ#Q{>oRl@De39XOIK-U?#TC;4r;#i~Mc{xyRxHJwjryZ@#87@uc43{Qz2Lna4l4EpRhhn0~ zZ?rI-3h1pu2lhT!T|RqAQ=c;-Es4w-lE|DPiOjLXTs4vl-OV?PkeiE+*oHO_q!#|H zGB(xKY^vM4Vm;8-Qgf}GN$8&Ku#Olyj`d}?c8wv{2#Ed|TYx3@kTXL-u)<~U2Zd3Y z;tQ;I!;i?;8aXzxQd*0*BkhO`^s&p5?lo){G*ARjy#pKuFs=-*<>XfRB`!34mwpZ1 zr36XtLF-EKw;dq6L~E~t$os_IX0)B1;SlU@CA8Q@HwX(#m*5e!8*fn#r!1%I#TMn; zX)4{ppOi6^9qjJfGthtI;Le_|!Po=(bb3HxEv;8>B6%3QyB}7barFbRW(|KQH64j) z_c@I|rRi+rB7qb^+J)4H)SkdQU#Bv#hT5o2FGpe=d6Xxw0XhU;hm4}h>^tJ!+@*q2 z0(n3E@kg(3-nA{fQ&4Kfd678Zg0lvXkQSi|W=>S6-HazEgUo9x(ygNOZQ{HDr8~rN zF3j6qjr-~p%KPCO<^Cbw5DP@svj(lQE*i!0yRdPM(HQ0wV`(u^!+fma*Nr%?h7>vt zqXz=`+f+75gzyP(F+(S^@AN>4pkSWE51_J1#5BB5AcdJG``b*z(6yE0=(<|>GxQdN z^PUbOnbVMCn#=`6Zy328c)k<>*#}Z(fq`TW}j|(1>mD_toNW z9Rig<=c?4qCmf7%C+zo5Q?C_blQh~WjgO5GcY)pdG}^5cgVAxcAm?If?E>z<;Pg3M zSRri6dT?)^x&6WOA8+j}dHi0T7A<`CZQeccBP9N|z|WhJxTh{h;+9^IRED$%iG9Vl zkPvyH`;aCh9YJDFXHkhi?O>*x{87n<$UrR8jjW(EmiVzrGSB5UceM@l5B7J&LYM4q zi}ncKBO}jWI{)lzS~H%He0-$$#r)rDY+?T4YrxlluK`~Jz6N{^_!{sv;A_CwfUf~x z1HJ})4VX3Hwf+yh`Rbcvi)WP_ydUfTx{sga^?w|R_l)uSe;X1+uUtHee;-XqR!=bf}2LMEo9H`UA1_H#A(^BCgslPQ1x h9kAH^f=e!43~s!MzZcsAxd*b7XT=w2|6GOte*hMZw}1cu literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsb b/pandas/tests/io/data/excel/testskiprows.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..a5ff4ed22e70c9d19b22488024e79dda8144a598 GIT binary patch literal 7699 zcmeHMg6B2qySu*e7FVvn z?;p7L%yZ6p&a-E|GkfjzuKlic)D(dTcmN~-3IG700*nVZSopvJ02T-U04@Lp-cSnS z0JU_0nrL}CTDlmqc|5kK%tC-?$N<2@zW?9xFZMv3R+maM2UZ=;E@pHogIgCI)o>*; z0}esHG`98V6puRDx4jzL%cp#+QfgHN&LQz*+!fqYJzx!8x^eM_!SS%Wr{1)t@k6;r zRvUI#DWc^F*_8&w)>IaT;(SEYQ(;Snm`PX@=53*bm$6v+X!-Y@mm3FFwj^n3>H~9k zG>R8+S}sj@DB~)y?UgK=8{4u8Iu<8dKn6q0Mzk*n%NIP|OQJiI4yNK1WSO+=if5N4+yGQ{&!%I*#unv$`8$Z>$D(V-p&i z7iBn8vz52vYH`<(R#hk2oo_0v3dk}F46nzie&I>lsBAWh=+;=Tf7g|rsHrLgve6$h zj72%I9OX_w=)oWIebKW(T&Suf-^fGJMgGEtMFl<$1&F)SRFV8pxwW3Gv_rM(L~+mGvijQtnW^lwuSi|K-+e?M^ zgQsx<{W{c0i-eN>UQ*=0fKC0MJXt&3PdtH*)WCQ`3_yYNuxJ0BD((u&Mu%=(l+`#ZfYIYn9|{B~QJhJ}6i56_{5He5Pmebsh9p zWKVd_`uTWo?rs*#zYoD|^+uKhX*V(G|G4-mzCvv|u_4v_}OzH|NUA-7w~ z5Y9(8%4j`dM#t{v>^KnVQA#;ShH9)}i?NGuS+pPD)5(lSi>LKrE*K1b{H?uA!9q{( zWDy7DT!^{nuYaB4OFAYOF^P(5u}CDRWFjL0A)4?%wG-*++Ivg8DA4Lle~8*yfUn=V z`T(;vhCG65>kHN1mv+^c3p|WhXbTPxxfh_)=S`ex-!2T7HX+=58*`niu;u<|+%SpK zCiTHakYVwF3A@AM=I;?>YXt(E@|cSQmBgPTN?`zLqWFNZbm&Fm)l`0{Sj3!!9NnM= zb$|lD2qL4IshXt)$aKjXoWzzvkGAlHQ;)NS<348&d{2jUheZd%95m28kSjwTJ4(f> z0}#3h7|G|XX zZA@94L`4ENhEron&m`@5iJX##nxjZa$HCQY8K>}~4dqA%O>Hwho`U_94spznw4{|I zH99&oAo@qJ|8wq=;4E4V*w9-TXYT$!cNeIqy`{?!uH&p}`50!z2@Dx}+D3rtv&rvYXyKV>->!+BRUtZ~b?(4wA|&#LCU(ke)MPg-*<>pM!X)Io<@+SoH%XP`OWL{VZ=l;U5nTp z7X!qL-dddRK|B7BA&mwz5{@K%uVz;5Y*c8fbUF->-F;;{c&4&ZpHD~jvzf<2^wFlO z7(Mv1#tHm%W7XN)>RaqzZ+{A9VInPfa_2i@uW9Po>Wofc9$C5eghLN^b(@4PhYw>N z(E%!QU|^6O_woRcXuMRlc zf;Gp?qU=`5#Vg`lcoM=b750t}A8(HBCZ338UR4HYPcpoWw9l|JP;226?5ptE?iyVe zVqomR80`R=()$cKdaU+UnitUOd*Nu`!v`udu1eXJ-9e0_qnoQqHpkGj?XFM(m-5Go zR&S>EMk1lBz9CNY5w5`)wp2abqY&~)&2A~+;X@+J`||BttG+n>HRm>s&_rQ?=Nt2x zW%n^&UT^Xzmj^?R$iMP{U+LQXTO6VHF6AIp!)F5}SJ6lW%lHb@z4fl(1 zx90g!xkXjrDBKvIlrZK9Sy#79HZMi3tUNn~h9DxrU=7sKPT}*_7P($(-BV9%1prcSLcedQ$lD zkc0&C?fhSLk>jplRYV{=(_Mh2$r&&?S?<-Xc<}De7uy>fHIgu5Ybt{8K!N+QgSiN? zeu2@9zD{hFqTuRM`{j$f-UVDG)7!^qhp*D%OsA0*i`ise_cT7KPu*ql-e~9AHZIn- zt2u$c9*ODQba?;GHm^qS4H8IkQKM=y7tu^|Ea#RKn! zN2kz(LZ5Zakw>)onq$)PFX7CVnp~+&vrq?&iB_N2PjlOd!C-W zw5?;AdvQ4%Yx4Z6ju_>z>fLo~qU!UfM+>dG=~q{i^Jpgt4fE7q(lpcqtj_*No(R=M z?H^KN%lIi;8Cr%j8>n_lOeb|Sfpxs?MQEe6CHq@wU{>vh^@{U|l4iATyy7S?nlREC zw&kKOnOvX)SRWBH(#lDV^-*`|hi74!Z$Zb78~)W|iWzy5oe9sT?}TJ8kvEq$!G(lK zo{@=B=J;-q=SG;g7qEX=&t$?xI#6EYXA&~=Y;$Kq@j??COk2q$C za&{MevWRP=)}H*MeCCS8y}s0>eQM`aiUUbfTJ15a@Mh{GpeKMabbwq}d&g)?C-`|F z|BQ#thi#2H|MLS`#~`E#g_j5e5UVWegi%!YJ$HUrpHgA>3D{BPflFf(et+jR9GsnW1c(0Ndp{ANYIOZh7spY;<*PX}U_r78c9$c>c< z#>RfkKvrTEsif$TquOwFe0TopV9K)y;H%qVBKfs(dS} zt?)JX*Ee8G1h)+q7wWXeab)?RZ6 z6Z*{fW`VsU&YbwVPif#B^^sbU3KYTXgFZgts=}(rqx>i-0tLx6Oayd{H}2xq4Tl_c zfjPeCkM~WYq%NnwzFIWvB<@YNO-GVf8;jIpKpFR|29^c3^9VqyK z%1hUeTD^qzHVPA_qO6}n2Cy#8Ijwjr3IS#T#TjV{mv)7Yy~SVMieg&Bb>5Ilm)Uc& zECJQwN0l6Fizw;6n7ph$vkZ`|1#e41WOgQltdMMg`p>>#kjp=iujLPH{K7T?8Z0WZ z8ya}{l=WK?yT@S9;%mq1uNy48qInhjv)%WWhv#Gjg%01%+tpSNk6?|*(xNF2ZBSak zTYR!)evaMNeoWQ>Z704tjL{|r_0tj#q18%e0g&N2^FWlt72>bCj$Zi zg#XyxZ}xTnn9m9=;#h5;$HO1`%SfWNF1=H;Yt)h7A@c(63T&we5A?nOuV#-Ke0y!0 zX}(Ib@;p_EjZ^M|^3*LUZMxfE3E@HcE-6tQ%2`3{j1Jc>eh`9beL5-|h-bM>eO0wx z89}&%Z=YpRYW3r0qHnS(J$@2z*tQ0)1p&oZCvQB%OsJJp6ba^xOB`;iXD@3Be(roB zUg`wvux*1n4guP_?i`PtD(N_$h*J%*QIMS=xO}v+QqYUw%_=}T7nN*m-RaC&_AOd8 zx#*sIzJ#!_8Uf(Zo$KMo<2;jsmFAO5zdpm0_b+zN($Q7YU3b=5g;j(omJJ*!$aHXB zCR;lX@fA0=$=XW4f|9<~$7b|N)-zo*4Ild%qa7B^5d6xqwTt}w_hAm@f}ME3JJtnk z=4=VlggV;Zg?Srj;)+71Jkc9KEQpesetHUI7A0-+&R9CX_aj5Zr zyqL`EG9^?UEi?@%yx-FJrgD>AC-Ys^nnDRf)HavUmr>MspYo*LP9kxs z&qs%dU(rUjY8@IO1iCX7QhyExL4-*$8mSyCK3pJtE~PM;DEy?=Q^KFYg8j<*Znuu0 z;Z`Njl&?+K5h^)K56&H{jfu^tG!iT-<5IcB^fSFr6MD}L-M?4MA|ua!G_8E=q&_vb z`0SIUK3+^2h00~p0!Vfq^=>;w&%W-ya{pGAaL`FQ|32z&wv!4S6mJmBLum}qPdz_# z0Zsoh8fPtIH8^1-4`C-c?EIr9{i8I1)n;bSo(~;BmL9*A27@uCAX8H{1an48StMeE zYVj?Vd@LFu`Wn2Fs);T%itBLZFh3c42Y6UJpV0|rkxoF5K8z8$1e4gja?P8P zziKH}y+1PfX;&W^)d)Qxsn0oD;$0ck%rH2wr+Lvjnkre1m;#752VmGbXr-@GC<~Cr z;IMe<9grU~5fBHq1EcrqVH9a`@&@uN8xp#6Vvuc@ zk)z;c&E7oS@pJ(b>Ahutyq?C}@_BeVO&QxlW_+|Bv2;O8OX)q*c?u~Rv}A!t+s;my zWn53UjfMdx59By5`*qrAhYPc1kQ2;1`!8q9j6ybADG&GqR+OCSuq+NATX3~}l2JIb zRuU?LLy2DqEaUednzGq&IPe0`%+!@VAe7MKm;27Y=Pgzk|L}lf?ljKBl+aU-^t-^+ zDCA<1Y*46scKKpPOqIoAD8+WVv3tWbm^`Ob zApO*L0efTn#w(B+PhqIY=d{I9fl_x3%{}Pg89Qt_e(z+!!L!14fM37i`g76#y#L~T zmzv_w06#a{|1|t@uYob+Z*BM6hCg>6|86)3D{lXHAM!TN?M~DU5;5vuIpEJu=56EK zt(+TUe6-(;ZyGzdO>Z|MZcJnE{Pny4Ut8if;O)Bp1~49$gJ2H1UFF{fxGkP;0Gwcb zf}gYDjg-2La$70fpmf570}SOKy5Tm$&#K_Y9sr=c2LSv-BiuH>oyKnfN?_9F=V{zd p>bJ3e&eAtX000S0UjFr0{>a&Ciioh#0|3xrpC1ZzhWN+R{{cg%x<3E_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsm b/pandas/tests/io/data/excel/testskiprows.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..f5889ded4637aa8d336643b8d42ed34798c5cb5b GIT binary patch literal 8281 zcmeHMg5wkz2I-QLuAv2_!J$Dw8kM18Na?x=NJ)cqH&TP7^l!ZJ zU9aBXU+{f5;u$D?-LKZkg(rKia4o&g9<}gZ z67A7G;wOTKPsBgeGO22}t-C5;o~w+71CmKTJ0HE@db{gdj+)z@;fR9o0K=Hv5=#lz zGMREVsXYJU9x%+KGG=Gj5*kv5Fk|cO|0t~-$xLnn2U9nnh&oJCCpfv+A0YoAKVFy7 zH`X0L&#+f0V$KAn#tb89cwML)ijAtS@hM+%rU5cc!Gw?Xe$SP+%8FP8Y-Z0uC`8O! z7^rc?9;{_0fC?2}#-1@-rG-#kP*s+_$e;XZD9xY1-KwC>FZ1kUczksFIc9)yo~Ipm zxLojxJ_5aSepJBT`T!udZ+4%O8t9qK++=iz+Y8$t_BWWssL@kaR^1|{Q&@;J=$uRT z7-sULpEbZ+IQ7GPGk{K8P>axVXfKdc*qa=ay`-6kWjy~uZ~vLNZ8PI2YF??#x4`>e z=C;sdNhv%n>?=Xa%b7EoPct)4;rxB%CkS=hYH=$}aERUAAptc0!P8nDt_LUZUsT{U z#Dw$I)XmD#os;AH^?&^QFDB&=LoZEKRqy1&0>c!p!+Xys7GB{=DL;R#&Yz?^nG`AocyVDCiTjeW_CLj`dQ1`qHoOS-QNE53^Tm{6lC%5k$Ff>4Dt%SD7ljVGccVbi#Z}Fo{@tuwl2V!J@^t za)b#ft^G1^WxYW5PSS8@;8aS%7i@_LA(g$cbmCrj3!9m8zg|b+=>@rtmaULorNvt} zVLBgEOUDn#GMR0JH-7ADS-qNcd<5TkhUI%0vQGmIYxutQzw!AffZhr1J?smKrYTy4 zhpb;o5-k2Y^%y=QO>mNs1JDq>9XbEt$#amKg9QlW@I9RU#Tf*6OoMy*?{1}&a^ z@LDl%!?~VkdJ*H#cyQ9~>Fna7_0_Pr5PL?PhHa*p!ROUnifqh_uJznRJa8{8B zPup3Gq6ms5vosc%=Fi+2VI6~?UjYp)YsEyuvl;0HA>H&-Fp|}Lo<3z zhOQ`z?E@Iy{tRUO)-HR7cGPlht?D!6(4KpVwLuA0rY?CT<_Gqm4eynw31mrh-^fXC z)`6{fNL_tmX`4G@>py{6ulMy#EWua$SIFR_z5`Oj;lc^ee{kVXz#;P^Sn{+~@vn7}yu$k4@;3LCX4Ab!2^TpB% zb^{!yRNF;*46t&a z6`rzsYz9qHtW;j$e#sQePe2K_rkNh%Dst*iOQMVDy|MWDG zMIe}z@x#ioQ@@BvZr{!$7lHu@j3haQN@}lR`9L&8lMd?GtT+2X&1|l4+g`4FAEjnJ zc~n=xIZ~tjq&GSsmn$Ntc*A1BrTu_u7Tse(;MFb3<4C}+_{4mba3KrW*K9a4j9~mZ zjW5R`pHiY=-s7+IU9rGN4%zn)NMzGis<_lv1y7TwKfhQ>d$K$&#~My~Ykn7aL0%pzvfF#HiNJ&h zR4}LVr5PO!LPL*UlDZV1Nc18kZ!t((6!fTHwJAQFT--U&!ahrPrb!$}(yOI*BWDs6 z-V!?UBI`~~Lti{gz-}I7?na z$_N&OcJs=0KHZ5bHg=#*kM`k7sA=~n!Nw?EkC?U{@>a&jr$GV7#1toOX1N=c=WU!p&t*{^m! zBipkZ4;4;O6GazxpGS^#V3IwoHzu`yGM+Lm#l%Bdv)R*{YImG$<{% zwH}aLKrWg*m2uo(s*_Zw@wsf)9o$*oU^&{Z@p-eQ^e9PEPV)ApP60 zLMwLGV($R}YZTurgx?wBVQb}N#rgY{`}?T&3lzb%6ILY-|U@fleHl5)x_ zzpKTxQWRw|aczHEv+=9fR0~n-`q`4BHQV`;ATc5*Bzj5CFXIiWqXm*C5NS7t1QO!8 z$l%+qdybJ;u7X+b;(6zxQ`DANR0Kel;mjweZVXcN@fSgxio8wZyC#Owue1ee%(~sj zV!CDIruY{FVGm98So+XHE*gz{2AOqA^i2&Dxv`%4q~E6xw~^-HDP4vtq})TK37(H( zXV-fORVFL&33JN|o}TLj#-i`oC5(2 z$=ZdH0zN;$)ea*~!Jy{Ljk<_T;VN`pNjM|ztc&NYT* zzPX>fFdorfRq=CD_|TjYEEz$ zGwOl%a+We_5u099GVwQPpu?t?Drfu@y4U6mM1Kg|PW4p`3cEJ%C0?E!&iceL%v3Be z%0w*C?fP)$Yv;Q-8ZqI3Z@I}Es`K4Gt7?81`!<0AH{6$NUj#U50`a?*@89j>zPmm; z-ZH0MpK`q8{LSxZu9}JSx66}r7&Yp0CgS;1?|5sO9HVu#{jXSO)>vx}YL}~iyuK_2 zi&$A9dqLZAN@B}p+}1$j=x^Q4tU-oSA9i!E-j-h#ld_QOU&v+ne4rXKh>p%iaNe$^ z#5|YtAuutHF`lRH4u{mv37#9zkr96=#(RHU2P*ui<(|4?yb`u91H6f$!6QiXiq{-2 z7vUo@K7vS{GuxY!^C`xkwMu5F&K8h;v!c|*Gv1%E-r}%#C7C@X5jshDg_nl%8iv1_ z{#BToCF`ZknM=2E=+eiB*es0_BvsM73Mg;Y(AJ80X*I!{^CXp~JzoU4Dh)>VMI5s( z6d>D=6*g4|8l&e|QTK^#WHoFu3OUY@d;q|xQ4T%eH=J``nLt#DWj{zWOmnsI%7d;! znovF8AZVNE0si5JXUB~nWi^lyY!!4UMm=nME39clq<@24Jhhb2B$lj}c*#f^1o=EG z{&*wWMFWp z=3f^$r@Dw@H*wjLV~p#lWj4^gjJJwjULgn`q@-`Sj41Jt=t6n@O=moW@jcNS1#K7f zCzUpov2>kUHdwP7AjjAX#w8vN1{kvCcAXYTNW&I zpc7Kn7e-L>2`m<0qfBQqKf$&~q*;c9JJiS%1o}S2sMa{5C! z5?QEUjmi5i9gCNcGpSlw5y&CXeEIE|h(-wdsAjref1gdwEfvbH@4VVt|8*>H=Bnc^8Y}4ee&?@JRF-UJxcg-fFXM%8Baf(PWtb-{C#ub?c`Z*Ace{#QJc6^dQ-p`FAX{c3$ zS5PF*AYnDp*)vLwXQo4yTHTXtskBZ`Y}!cUd3ED4s4MRp#i?QTZ*#0T*{5i8Pu zFsXA2ikz@%CjR}08y}=(mDb9G93FL2;jz1jVZ{TBmILOl@2_jz)eG5JrPYANwf0Am z(J{6Xlu&@plFE#iYWTYB!yK%M$ssB-LOY*2N*0*H!%q27%Xg~9q*%h5%##xDVI5dr zUo7=w?JzaS6rE1fw z#aQC_iP48gg!3G%5s_(8X)~z|my-Km#g=uEVd9QHFKJ6SXF8DVLTXN?!#nzLX3fTO z9NRT@>+hS$wPX7jd{LWv$>yJ=be`4ymc-`P&4lB(-!4VpRVib>sPkf72r35JsTUM+ z#52amxW@SOZ~lh1c5}77&F;OyWuY{iijKRiiQj~EHHMhDBs`_E@D20dq8A4*oy;IS zT${lo81|n**~7=t>i1}zr_<*=2hZ;AZ%g2{uo_x4*3q&Q?W3WM+(Su36kC^4eyti7 zEvZ?&kaM>Pq3_4g@f$D)Rc}-80<)E#-An3$@N^+2czpB(1d8s;*uY%xgU!cxhi0mSkx6n9cEYNFcT# zkB39SMFJzFkrOScL0&=Tf{8X{yQl7}5(Bx#Op2>@JQaytj*7FFrrrAu$y2-`1)q)s ze$HhS)>@vDy8D1FXp=RAf1leIPJhWyS~F2n`k_h_N|^SsjGy1lYnx>abL~W%KtHkE zdxxqXfm%^QJreK^ErB%jM)VZ5MYJqAt0O1Q>4FLnEGchlE&syJ)9m*Ks(=+(SJU511#(j)*Ml zwfhnIZS|LEC=@u&rF_$dV8pODj6GaYOb7v~TW(Ezl(qE0i9Q3yn*EBsL8yIvjEY7L!X-j-BRNI(Otq9nsuSQ z@cZ^tiL7SI6Eg|Y29m>Q!$F?w8{bps#zqc05Pm7i;$C9+B9Dnvi+7!g6Mq*^M967; zU-KbNh19w4V`!&=g~#Ml z2#|gA{E2|KvAjSM*R0yXskWsmEnBpb6cgIB-In8l7^bAcT(&;y8vK%sF%!Heh{Ss7 zMf_TqyIVWvDv?#CpY)3#ri~GjUh-XC+qr4@I$!{lM<6^}hWbW@=C9_HMfTI_pvBE=QgQG$ zU#2A@uZtBE`4NkM-1tTWxH__87=(a2?#q}`7AT26F9mkW`IsD7_IZ-Zue}V7Dpq2d zlC0hW7n8Z4<>`qr=G`Uj0hx4ID(U`~drLBCMnILPcc#2%X<`L4Le-qxC4|8xS)qEI z0IP?@Y@1dsi&c-oaG5g!5vEC_+#7d$N6TZhJhw(m&D3fcvDHT&^Avqc5q*gay8doT z>un|Xah9_6jAPD3@G^EJcWOrC)ub{itey6y$W=@@cToG-p3Nxz{Ij}ioy_{$#o9Vs zhp)ADjt)1`By@`}@f#0pD7~9-+FNE~L-V9JOP}@`%OPcD$NeTl#cT3hv|_vR?0lSI zu~;kAr+Tgp;y&rrWM1f1WV*e0=_)I5jkYi{-s;)@+<$Qo$9X%;Gt>DqMEWMsBR75| zHiN_S;AwT{{D{yl|3ZUw_+|4Or_ILnqztEJ)^Um_WSUY3TZBs!g8=#9<0rM5vk93} zR<1`wP(+F0pin{-!InnNvlf@7n10m2Gk9_RYsoZ1!Dy%m7hx>mNX3T>HkP1g8g3vL zcg|-ZH>*F-9{#7(!08*7_}HP7mJ|#Ny$N3ydqOem1XP4jiIs8b_jEhwm>Svf|Fdv_-_(Ia21a2pg6RGU<@K#7?&#Z?HAw5Pd7f<%5tg;}QH^wI+VL(I ztPHtI@N|Ll4hOOF7*!imQiPb z^c&D!yn>>N4F%7ai02PhQ8c_3r`mF^;rrmP_&ej4){BSZj}%_{;{Av}a~GHY!4HnR zKU-F!DroMzw2X8{gS9{*w15hh)aA~j@SUy%C^oNU_j$0%q#!4pCf;0*@@EL|sx^OI z>w?PDPYhNa%i@wf#Y)!aPgz2RWV6 ztepgbb(A1K`L(UlHWo(%Rm1qld9qbLn3W^B%?kf)4-R7A?t4GZ6$L!|qKdH@4V5_P z+uNy{IIrc-@C6p`N?jVQqapHD)18W;&4?<<>iB4gp4+0fNq64g6(9NA^&Y zJRLfycbEqIyIqiQ$055L&9WB->!(BZO5*QoZvP|DARw~CHKu<*#Qkg0{_6kbNLNGo zpAPr-}bF^k4F} zpS}DulK$-j00@Amf4{`lpUwZd5&md?Oa6oTpWC8_G75Zk0RRm6CkP(8JE^|k{U2pW BIAH() literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsx b/pandas/tests/io/data/excel/testskiprows.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2d7ce943a7214fa4ede1cf0b37d371fccc3ef2ec GIT binary patch literal 8258 zcmeHM1y@|zvTod6f#TLE&e^rU+Ew+Hk{ld7J^&Gb3;+Ot0OS4l7Wyy%KrB1}fCoT^)fETX zIh)xz8>qS4n>oE=b+fgh%7TZb{|JDEKL5Y*Uu=Qmcl`<-?AVeQGPmNZEVA>TMUkKF z`S;*5Dhs!ECUh5?=w@14K4ga7V2gnXtavJLhvz*xjs`70+u7Fmhjurrphx)iwyNus z^00OF?9#M6B93>|(LVTyPb&J9*uWs_bvhu$p|(SXOJe!CY%z%i0im$(%KO|^^Vuc za$lV2>o_M2O<)(S$a9f=E(kv@)aj13W@_=$^YUn+kB&H1x^Cbm zurOfJ%@p+m`j1Tml&mAF2unEZhTnFI5M;mXKWwL3xS?6bBkNouHEqa@i8WOqqPAs* zs~e|HVG| zx1|@y%PV%UqX!?z+=ldCOf1A=i^;utF4G89^Y(o)k5wI&{e*O(g^mncjU)h0+Nat3 z_Uqh&VAOUm@ccVhaRe^Dz~h=XCBezp4lYQHw2nzq4#mrzIIa_C6X&VYvhMV*%`uEc zb@^bqzNN=f<3|z|*u$)vBq(^fWWj_&seU@$^4g2Wm!&WhVyb&3!R56)AGZ^R(|x9r z^0qKULit|qj-?XxIvJbKlzR2r0MD*S)Kx9`EX$2E9r>x<4NPrXPbAV?@b0`=6f$~M zsJU=2IfkWsXg;3#=u~kX^}l!TOJi9kD$z7ghr@eoy1T0UGfRkA`MU{Aps!6 zxY@A%?k8_Rj@HH?koAw~^{3CkKtmW*%YXMMR*{$MWXEnsyANT1lkQ4@Gvmxgy{o>1 zh1^%gFh>jI^gdl8Wo&q*H7Umi2MTsS80>Mqc!v2M4)3guDL)(+-UH9(KmY+Wcrplw zY`j-O;V+GVhr7GIQ?!SMndXF7FC5>?oQco7eo0S59FmPl!=o2x7PcMmN6!nJ(KT&ykOt zZePp04_UWmU!$*)70FQ)J`?f^cBTsHm8SGzh<94cN^Mj>Saue>`O45Tcg);>dhmL+ zuV-Qjy3@Zx1_$W_kOB%9HfXwo1$_bvnZJW2M^!#%9BQA(N0iJ2fV;WCJ*gSn)67y;mnK0;xa{**vFhn)`{a5#47D-xz?rU ze9et_O>WX*E+4J1bt*D4Q*)=f=6H6Vr*|mqz&!_;InDA%T_k_>G06W4H^A-G`7=k?vhrtrM7mSmO+ZP!a=c#s-@#Aos)fA2-biDi_1*^*$jWJ#8_}UMolWa8>Hvj#HGkF<7ZD2nEcqP&yN? z6v%}_XchX${~bzB7DkR{rmD`4mUiY&KMo(gq35XW?ASre%@FeTUE9LY&?j4BhwvW0 z)6upcQ@;(Uot|q*1hPJW+}QIxy7h(Fon%anUcLJmyZsH`ibZee(*y;6#Ha3yu#t8& zV#ZoMqSwmfNz-C<9As7N4IP{HGH_$g^|@rtFH>~~#if?)L+C4}(m&5{ZNm8#j>{Mu zp;_hFQyyJfoAdHM9wKEK+Enyf<`40}9Hcxw&Gm%)w`B#CZLdT@bCMO(9|gg09^!0a zW^2aw+x6*>RqbjI+L7{Le|fYifbFTsJQHt190wiv#i#`X8`kD-V$o^OL zdOd^m>V;YcI`L1@P25u-Jqa-vXXPk{6v-qZ!czLpN3yVJG8V}Z=eY+uX829dbpWGK zwk_jEyCvL0+NShg2w`(plu6=&g4-}L3rT=Ip9kUjJBx|i_~E?2J;qWCBuYZ1;K~la z3QOWHWnDgV#8pLG=Rav!|CFJrKt#rz{!DW01FO#7B#CBC-Z6%6fxF(PamzP_s_uxs zD@3kVE)Hgs4b~Hr5_xc+DL2e&na*SfdjyU(*PZREbV@w3zjdKLqq?c!4lyVyp5#sz zYJ$=BdW&gbZCf)}M6u}0?CpZN)I6qDa^(gmOFW3RJ~9hluw!_OO6*ue9#{tBRie^v zP9GX<>AXb~ET|b_%+hgDzX>Y}N36jmZKW3rj0T3!@$R67KURZe$Pwi;Yc?bjTo(CQ zudB+l#Y~~NuFrtAhA=GUV&yPuj9E&I<_y@ zrxynlNRV{ci&t(juO+f{SCRLQ(9d6^uUIQwfA`|_V8~lU&+y;%-HetMf|NXc4b+Rc z>~3W8)e&pm$-c=fy(u7KAkn&#N^@@|AJUG9_y}XSSxts^A?1#%uNSE|PthF$uAbw) z(3>MBXf440d{P7Y)T!!}ysWnzxGDj>i=@QHO>vD;87>vzBGfwui(SxL8If?kh&gW- zO_QI^Bfc~vQ^D5Tn|Zy#YUMyUdq&828W)S5g7EGDXFc_ZpMoJHNaEbSTQ6X#lM#cV zUWBkBVn+rcQvrD;pOZ=@czvF*+@NQRhrL{TWKX~*<4Oj+`CMjQexN>L{yWkhzPY55 zdDbYq<{mrJ-Q9GDz|!mZPSuJ97SAo?WuNAVveb1qtf7xgSSlqKY^>|PEd+owLgMkB!xJ;Ula48PLk~{m^Ge(B6dvLW9dY=kc3X&*bG++ zhH~r62|guX(X*=QTCF{11;^wf7A?E)&9z5kIWw8=FE;kVoK5S~Gnl2N2lJH;N~P($ zB{@mN(lID2VVhW83c|Eu->6sMa_=h2zs4)&1aY?;X$Pg_Szpv+cnT^Z7u1x$w6#c) zHbX9-*Fz<`OWrY@jF|O}I^`KLHpB2=293k+ZZ;;Nhdj|Jt$dH;#AFPhNj#3DHWnc# z0}GRe38C*#fl+UWl~68#!8oUn>So6$xnsN>Srdkul{k3?qP63`$J@DtD{#!T%Tp-2 zurC$YNC{2rD!r+!KLK^++#=X^VXQpNXq>vkKEh{$+w&u`OG1$1H%P~M#JJWfCMmm8 z>TCV1gB+X1UI;w~m=E!uyM45(^ia!ZZkAFNEUdaW3Xg)i5hsfPY!sEFy;i`{U>Rm* zicj=^DZ#h>wY_kGE+p`b3#oLwQb>#;u)!!H9ua-t^!940AAOsyq4(;OhA?;Pur9^s zc59NB>pih`T$=dxkn#v}=G(lyw6Qj8%yTD->ZFQw*CsuY6J_1jXL$3hOrc>Z;VCo8 zG}oegM?w${_yb`Z_aLf5wwZP~OaH3V>5%q5%vr;+ESokJjoL^0Qf(OS+FJ@!LCkLx zWG^zhGYQS_9ranie7P3wvI;9CH+RLsj~jiHT-bcl5CU;l!&TMlw#1oa5!@(+JFM{e{&xV{(CoLm!MMveo z*;8{QOG9EjljQI^hMZ6;>!qEmisk1u(KGBJ8Ta;mZZ-%4Q#D6n%_G1@QNwGRw|$OV zY;Q%ss?LOqX$8nm$f8+A(SG|duV$Dr%)T9=U-Xp$@dJ`#C{kG=#fbL@$JHa!g zCc%=#jP|T(+hh6(P3^jD;pCIAswWsiIG9NzM(@IzqL^hc3-}|R_@7f)x6f#BZfM}O zT~_c&4~@839^#>M?I52K-$rA?KOO2ZeoQ|}e2i~etJV+8ZK1V9Nh-r;B<7JabU<+M zp0;#pK2biQWU)vNHL{e8`+P$MHz z#;;fJ(`VVvICsT`EGY+CiWXhQSirGHGb?ALAtI7H6?Eka#VsL{As(GRghB(~tgE`+ zB+`N@LAn*W4V(K*?4;z@5I&rhRD2WVUU}tU&@1*30WNIuxg>}FRW?(=z_vXhxBc7d zeQMYE7;VvS;gdOQ=iY(?hwgWA8pQDm4MU(EGMCk&H&NQ$rD=I>N&Uok5P< zJC8HF`g&Gs5Kb}S;%Oc*@^A9>9^GF-8*^EcXsUH1ExKnaxV}rEDQ;ZIt+NvD0jj;Gl~;P=#Wu8IMuRL zp=yC#n50j2zSDFv5J{KtDVw>Eq6()lZA>3q8J0jZHJ@AcdS_$X=w(<%@mH+^#*{HU zqHC_3TT4eJ4{KC_+z6OMRYyzrlaYlROnj>7gc3NOi6ylwcQG(;vX$c@p1FfJJQ!@6 zl@%pkcyOfT&dSS7u=FC|#v}`DN_Zs?v~qGyxTO&2J9X6*v8S=F(uz=-PTug8?_Nc!>BwD02jEs=2WCRe&CET_F09mz4n+vxGN(BR6j zvSAPm(zpk0Qi+c!%Dh;xZB}QZPsz7Qa<8@$Wb!Bx2pMs$F%}wqKf|k2J+y~ws(oVd zz+|Gm4X36=&ieg$yQ58XX8*9PEJ# zrCe|H{R0(;E@WvNePfs%BUeDNMP541D!k#9l z)!$6YAsw_)1qs|lhCKCc8{0J>eKK!SbE}?STfJCaV_|(%U1MW?7ePq97=%;5Z%*dc zfZ5hG6BUpnzFz#QPfrRi<74y}335*TH-crG<>wdUG>ZkQ0q&J^Enuff+XkaguK5O= zi`Nd4Jh#XTBje33ZExN#?qb?)X1Ju=eFKZ%`8a3Cj6|iey6nHIOrIa&+u>fQ6A!s= zd~ds6pPGt9Qz*#1fyaVQC+0Yxefl&~=cnJ77e?48(5K#pdAv>N`$(?H!fFkV@% zgB?F;3E@G!&okZB%p|D>j?UAON=w-bt#5(QW-L~s@y>JHwI~rqcrnAN#dDwLYCC^> z6RkKQiWOTPIb8HaN~W1@M6Nn4ncgwevTa*R+)n{AQrOhoSKsGmvfRgwSey*W5*=e* z$rD97kPog{K~fr^X#)q;Sr;1%zjKYjSfRARa+B(XWjcJxK5UN^IVdAxo&$M2<~7@Z zgd7bjv+9?)(fK{+K=v1xv|;&(3jiSDri$~ATUpeVnKE>pXjIH^&hWm1 zpyvA(dFM~XHDjR2BZAhv*ndZ!k-h!@fCn|%A0s1P9yIrZRE9gJL|-7~TR;jH)p(ji z>M>moc+t4>vCo-VA_+e3EdK6#lsk=oN1^d(rK?E#$;4pAi6j>BEA&JyR#|1y@(NcY zgRq;0jUucpQ4TP-Fk*fuiqib%RM|J$9{5x;!!|+~rcvD7#CI0Do9L`{%DkVdigtx`ge)*TE!Ut_Jsu+YZm?Z8 zv>sXk{yskHuj#m`_Oce?CWZXHbHS>hx~{cg@r)Kv_hb4iJ*78>!C>3S{(N=CA`0d% zy^lA8CXIw$iUI5pwJe^QRME(1y?LgQZ^gb|dk!Gkz#u1@wyBYI)VoqN)_U-v^48&F zBH|(Qu;RRl7KGHkHsQDnaO0z+Af{bJLBfi@I79Ps3ps^#y5$qOoWk3gromH~=an|l z;K(jgf{S%K#Wvkwf44mx);M@)tx@tSZ}n`* zN>=zo)%|~{85md=D7*CU-P%7l?a%pNdbdh)|5WhLzS5t9KjviUEC1GI`dRRwT>IYz zo1iJj|IfVttmkI}_AgBb(1!F+bnMTPz1p^4uw5%p*Be;$N? f7vCWHoA@8cqLLf}baw#&ROrta8nlbaf86~ax1}@# literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.ods b/pandas/tests/io/data/excel/times_1900.ods new file mode 100644 index 0000000000000000000000000000000000000000..79e031c721ea34fc6097556d7a8d427f72f236aa GIT binary patch literal 3181 zcmZ`+2UHVX8Vx0ibYqaB2m%610I3Es6d_8D^w1On2Bd`&AcjtmAVmb}Rl!2cvrK zpmrAr7qlnd4UKYhb9S&rc{;daB)u_rC0tQh2dspv8ya)h)z-@ejq#Mgx_O{ccd_@- zXiq(AOfcZsuL$P>nw}JnLOKB8A`<{`^d_t)-WiRRxbNcp#-s}~ewBTav*wV$bjctQ zT>VH;eLWkVbeqJd5*OcnCIYN+$@}{*Z{v=}V?|TF-rVC!WkaK9BpdSD zl8SnJPuGz>U588-3r^w-JhH#jSg`ioX|V(*-D0_Fp*RS4ueJ0}IzTKJ=bk%vjewBq)-ZI_2hZm4j&NRLGWm1Gt2iVpt1w2;?~4a-;2dL^f0)+U7#(E zALJ45tHJ>0F%NrMs#0J*t<~Xyj(?kz*_E!{b@dgtQxU^Lk?_buexkv)l#=bMTPVuC z?!AtK!l7H?@(l8?u`?&fi@M7)uYYTQ5%T5+lRbaxPaL52;gcO<71~g#t#grn?3ZOl z{a!=-iltck7fV-GIfv);O>rM4?pmA`UV3bTNM_fZho0q0tfw31SB{;Lvserv7Q7M< zs|#U$Xr~Yi4i@K4H@;>*)S3y3C*>=lJ`Xy`X))4sGRk*4LVAMa3eyWN91D_{%xacN zeJy@q)!9D)`FLeO#^_V-nykWE6m$5#{swEf^++i2!FChz@Igy~WAcv$y$op9lnEl{ z&RPxk%I8DFSlK9>x1F+&5%aUmFzu=>{OBp1+G-+GAOCau9MLqPylU6!>adk~S;wXa z7N#TlkZHJ+Ey;(#BS;vh@OMh4k6}YPZ{u-+fiu#D{zk7*6gIvkni8XuYzX_TvGRHJaBzVk72k)yJ}*xWKC|Px8j=gs6R`SO>=Yq&-&{vIqEhW00B3>oIvg} zVL|)CS&_!GJ!DpU;zi4P4&UrTW5yUOB=ACpS?8yNFt60K^4EPpm6Q}ev6snRhjDgX zvzOYvlww_&B)TnsWLU;}!N~1~iz1!yEH!ieV!nKbav8DtZWL@jL%aSk>Cm(yz zah@|Fy=^H?VaUpUCv|!c*>3tpV6m2h9D0eveZQfb_JzvY!$Ug2|8m+aoIH-A2LO0Z z0|3+q0Jg3e&m$3_l6t~)vUpCOy|H8vr&5apEwb6r`1a+`2^+tJ`2*sh>_w+;R?&r3 zZ$J^yS-^PVSU$)SP}aV^-E+lOaaD3d&MH0GDB+$wtGB;>bE+lS82bSl*myuR>AmhS z29A#;x}-B1_9m^pS1y8^nJ5T`cGRcJ@hoKbZPo+)9|@$cD`?=#&lB>sVu{1dM67`M za8zW6dh$-yTF?*-_ig6g-3Gf2pLIt;MN-iYx^b~#)6?txGQJos$aMKuQj*R39PX|D zMZj*|%R1W99LFjAkl#4;W|74lesOyg7FHZ{oGr&;Ww)X?CRgBfCn@7&gHfz+luVrQ z0*cf3x%0q~ukfOesh;k}roPCtp=%~VOL3d(3dk@~n#3kEW^l^pHMc6kP;G08OAfty z%^t;7POH0KTzhQsOWb7lfGqM@7h}YnCx3{>?X9l%sOsCkAq@o)`rDTgisFtM7STRK z$hd=nN44oF!@#xx&7zFPO1)kvpS}4dUEmj+4)9ppw}gbsd7t>pwKXY+lenf^1$(i^ zO_bNAJi^m*Vwpg(`^YOXQmX?&D#{PdPw>K5BJ$uDJMKt-XIV45DD=$Q?Ht{^qP^Nn zJMQ!J5{Ul$9MMn7^3iycz(W|%DjxmB$UjSpfa4BQ3@~L159_o8#5E#VCbYwKtiyPxy-@W znOv7ER=V9Wwaxf}65>wsgQ>!hOiaYL;CmPbCpmDH|L*yz9q|D+*Mq%GrCh^;_v@A+ zx!0eNq8v>CWpQpC(FV#k9Lb0L^oH1{*`Z4QlL4X|T>VL>%Zn=MC8832wOAjfu&ahC zBvy`B=i52C-Vaj#WQt}(!sOYUdl)|q&Kq&6V+&TE5|6LZ)La9H<5fgX?YDiKXW1Y? z_P;|tD#X~DzbEg}Y^jZWJ>XR1{{VMCEh2N7K?58Ib>Hejv9A$iBm23z)P*!OmrZVI1tx*k77d7^CV)C&di^8amZ{U)j4tOgNL`L-+z{8J0a}Aup)gF`4&UQ?9y7>`*<#g6sB#I&(@wV|Z}3O}NC%36$0`%)?E)X0nv<)k{)TvT25uoFmU(YIBxG-T=Op{fKCL$mx4oX7_YHeGb!_p+m;Cbhw<6 z8#f|4R{DinGr?I}$KxS3xv0wQ23BW#cZ7171g(>d0L{_bEhK_vt@@wN-h~ z7Anbp`;eaBD^Ei!{>#o%e+A&L#`);(@Ae-Xt*3p2`v2DWH^PWI{B4x~#Qj-*f8#`{ zMftyq@t?py+437$PR*FV<<6fW{>-x9AsS9lHw^#IIX!LQv7;Fv_28%G(Nj89x1c(R{R^E>ULjuS{c6o@88pKf??0|zI*(E6F2v-jTYjx$bm zJehO%-tV6C`yS^z-+A1Vr=O}ga?{--Ulk)orAR(bl}U+>9>6`bO64M>aKq=1Q>m0D zis1H(^ck|iN04Ar$!N^09;Q~^DT!}msc^L9= z2oXU1=5MXq(o$?bl|s3wxWgdPgU?3%Q!()i-dU< zIjF93rvA~_OK-S6dDfbz0w?3KM;DWo;NB(cWv!B9y}fu|_mvV1&~Ggn@~K4D@6FI9 zi{(P;mJRr8Qo5f5DculLgY%fjfzCB<^4iED*Cn9uzpLN-fxL$KP3VZGwfkX8jzX&ln|8g zZ~XwqM5onFE*vraF@hK${YwPkh5d0U^TenQjX+$+m^}98vEypSjjNl#y4Jd_4voeL zkFnVz%wKguyyX&mfocr@7`Uwh)9dW@@l+ie2k|hJ+e;WIs@N=`j$b4!;LMQ8D+v+ui=_o9=VTlvp3Js=UPr&CbdlapL!hkMiX z(2dIex%4|c=s)+M-|0c$>p{QEgMPaQ{RR*E4iCDQJYM+sC>&1Wo%Z>W2VKjPTVBhP zOTWbf=WY-By&m*md(gk*L4VMLUZ_03R5)A=IqjqUgo`3Y|9(Zqo`HJ?N@%w#&qb1^ zClZg0d1Q=)?pO3;1RX{H4+^Mc`R5c5dW{Etsw&n{pGz(JGjcv| z$6)HE{5Dh+U>vxFioEfZGF zT6jAxSEwltp+4B=VQx2}>{j_?mNI+eYzU=$Bdwn!l*KI76UbO9SF@=Z&FKG3t}eZQ z|Nab!z=0g6 zvC}LqNCUP?mebg2xl<-A?QL4n4;-f57QV>nkEf$TC=*_Dw4mLoo;|41?iNk=Mz@eV z7PYc*8xOgSY=L;kEl!xTAUV>}#zhiO*rm&jp!`hJFGydbKM42ovmkhIQ^yScX73r7 z4~YIYKCN1(H}&APqU`mKyKgf88Bpx98Tb;K$T(I=3 zPCUW6wKvF!d_ejd{XzKMFDnTi+->3yG9e!j{q53Q9WIKql6d+Fmk-_`{H)n8NMEBr z2-cN05Infs#2;j0J|OyAuD3c|@b;@tJi!%`HwZuT_6yS2=noQcfZ)O1CjKCk@&VD` zw!PKi=2gGy#1mWudV}zYM86vRf)s#7OOJ@)h`NJMIp7vbAwv*<6EdPx$%j(2G&p7_(z zcT>f@CofDJCU7%@Y${AP^|@@u`moW97R9DGo;d!}VRtr_CYy#_HWPi==w*#!QxZ?S z_3lgVY=)X_X6CY)>cfT$pCU|?V5rh~;^>jTxw9E&vYD03X1Wg>t}oJT!f2a!{^`zU zxXEUAE}MEEHhLbcv?+@xK6>*7cQ#dIGt=MG#(>StJUp$h@qxbA-PxRIvT^aWaXxJ9 zp4QiR?7;`z*^DsRxOmzmA2xPR>uWsp`a|w)PBPiJc-k}{Hg-?zYwSDtqC1t#*-93ji z8wNC_OiUYXvT^aW@jh(qo>t%R!k+8g*;JcsTs&>E4;#Cu)i?a+x$E88j4|1`cv_7Q z8@s2~HypU>33oOTlZ}g~)%mcods=-%?~~~WZMMxRCL0$|o9V;G?rHT69~`{KO`Ab7 z7OP&#jGK#$adUL4)N`y9S$_G$?x5qcpztUY^b`Zs?nU^9a{e-R(D7MN_>T!X!2q@U zjL6VuDUFKW&!cT!6#ZdjFMq|#58RQk!1NrEQEFE>;uNL02uzmXT%A7G z2VTOmH9aLLJJ~2(f>X}fvJqK@k>Cf=kl3b?u zl%mFTqef|JP)-deuixI(wH$%4b64^OOagfFtuvmGsk4+U1E@oT~- z?EdALAR#U_wsowJx1{_3xrQ(!5SJQNL$61pw>Tm|QN|lHvB-)(+!HT>;ReZRkYCP= zcPG1=5MGSH95@+)`8)tq(=PZF1B+hKsCbI6E0(_d@ChdE@NlUfL3eC$${G-`JP)4U zr4IfQDsBA$ZnKNkG8`%c#Ce^`_^$9;Cq{4p`U^3V5Q>|VT=ghXu(NH2H10}g1Fe3t zLYjWELYjW+4GxsKf?crX-AUErUo)5v2lR4fgWNW^hkQmzN1Kz7l|(;TlISN(68*$b zHm|1`wj1vhp|}_gaXxhJ$}Ie8GP>$~y6TFyWIL=i!&$3n2%2XFyd#N*qrGyyJwS-l z6+~x}9$@S}6rH3XXmH*7A>~w#{RP_HiX&QE>xnK_Aq&t(va+eGgCR?Y*O*h#;6Oy` zmEh2Yeq}{1x3J1VTv_<)@WpbqZIk30Xjg&16#!YQw7n1_Z&i28pgV&hkDSki729O9 z@}O)9_MmkbMUx1Xv*l8CJo!p!wVB_b;TadU#^cEu*M~>Q^){Z`ZT07tn{lN3X4^Z~ z+X*)g;Ed#>fiSYjx?=(P5C?Wxj~PzBqHRMwg)u%XrCWQ@y%unhKm#6D(qT87;cEV zufUc6eG--rZ+-U<%hsMBTchYx6@8kzep69;P{v9V;Lsv9fy?pc(GYnJNB)*7eTBMC zMCpV|$6}Lg_meXyZ^Wk9?r*}JpNIj-7k~+Ag$9-W7_)wpvJbO|eOWP3!|?0gZ%!(`x+Vn(lg3jocG^eEXY~IZA?Cv#)HEvbt8V?>+e3aDBZYcKrE`Dx zE3a*TSW&2DRCX`f(f*hJKA|YoF)DLjyyMLj9sL{@ice z{raI76ouMF<=Sr@yX%$imlcJ&MJ08~q?K=f`&C7uW>J~_^;OTk_57a|g?dG$efRe+ zdSlWtMWI$vx%t3vpEU6Hgmpm&E)MAkH3(Sf*IQFMmxec2bf3T;OO{(lxNLblYJ&(w^+U!>2c1wO_w zR;&gYG_)Gx0c^_ZCIvFy61&D(0+`p`@8l`_NlXx>1`JyS0c9}Gg|c^ zWA_b%kN*F_AK;h1_Btx@dm!uZpNjC~`phV|tgX4Lb5my`8CkleIo_^&!}GH%m*W4( z7P8{=*Qoy~OZNa$3QB(Z2cLcN*?rSTmG|y}{x$F4N&O!~)^SWQeCwD*z4_s|1)06P z9htIoMm`kzGGvaOS0G#VpnYycw%Y?{QAz7ENMVscOeXWU8cf(uY!5vxkU=03lICUc zm7QJdH*v@A002?| z4!VIn+|k9#(Z%GhC&J3vh~LA(9-M=L&Y1;3N8SJ5@h|p3!rfl=Rzac$mL0;lGETQ% zG^VjCY)%r&LIq-*$+>G#`uDqaY-e5~pX4FcMNZ*~Q^J+PbNw()UG{02rjhAL{BM42 zrir6@M%J5l=c!T^7`avYG&W2Y1~MYl^K+4_281a@Gv*zSsm`7d72*}LI;}O2sBg)! zu{4I{ZEKb+leC?gZi5pliS5-aTAMp^DZ5r?+AQ@)*NoUAMk1tx$sCF{*i&ioxw)qC6YGB@;SFCey!kwn@SNyaSZEp;L zx$#R(ElF`6Lu{3-ueQ1C#c4okca~aSR0ro6g+w(H)cAVRHQzCt#B^(J)Wdfbpl+$o zf^QCmj}d`q)?(c`Mm$8rKdg8b$w*X}78-e|IxC$92t^tIr#MRKb6SBESN+Z-{jGBt z_e>Vp(41~u8DPOz^gAb}ud9$Pcoqh3BZ+?G$)A_YeCMuj^AibwYs>5F!ZSH%ijlr3 zeqLMv0g#`VD<81beuN4~H54(gpqOjoWM%Ix!2kXHA7lT;H2vG7N5&7L7z^q^DmBP2 zpKLfP;bd3WgP+e5aT6}EInq`~<+92g9a?e}UaO%$L+%Y`Hq7>q7QPi#;3*Aog$9@x zwBMW5-X3l!eJ0bcFfha!%_Hg*Wc0}Q^`o^oO@_yf2T&_=N}hGC-N7*E(M>3CTV-#e zPKyFpXtun@!>s5BT1hcf=e9D9LwCKjWT(=wA^pQ^{gc&s4kfFU!MWDk3!5hi3G9N<6-zt~j9@JEMLqgId9rb^m;4nqQ-|US4FCtt!(QNbs<^|Q?9AbC zJAU)~j(^ey4Ykps*!J%}iZz?RhfU-6M321oMFuB3uU<{5UhbP;Z0((Dks|ZT;ddO| z0j?u{ssc&R_}(4uE*`ArBHPeds+)N@E9fvDlG=Z!#Vi{i?FZtRe<|4txQ)p|V&C)m z<$L|$@H*nm&n_P&#*4zY3y2I28!7Pm@;%&yx0p)RlB3`puhAZD1Bb^C22SwWl zxWgp8uH?{!K0lRqFZ?{QAd+&#AZZdC+h&o>0OqErwZt?L^|F)f>fL?Gwj$OZz;S@v zQ$(TH^XUd*dptul(^farZg;0f#PT(+bG&6oW#MHPg_9PcjQ6Jot6$*4yPJzW8mQ&| zXWY!|*M3|;&G4Y&fe>{?#m(O%#xS~ERSrtl;akwh)g?wmH^QX!Fb0O-rpO#=$V_+5 z`zCqjV_Y#jtm;qfW;G|sed+bNxdnGd_=e91=eJ0SwK-?&jxq1^4B#hRNwsg{;kOqV zbOQ$2GTqX(va4MAVl`+(In*O zy_NHK@;ukhR=9`qr4f!NkK_%4^K| zYBB6zF9dJZ``wwdnx3`O+BR3HEW?7cml+4lGgm6kop9q7nooM}Me2wKI+zz2YCjt$ zn111)?fk)Dsp%zEE9)HoaS%|RrAbThRM`ZcbB7XP|G>7BeD+H! zPdTQ;9mWm~UKotX zgUz-m=3*_JWFbvmOw#uSJHoW4%wp|6G00RVwp~k#vQpVSJaG6jwUhi+I{UmTSbLT; zBE~+;P9M@HB0gB@kL;cNEWyduMKIZAY0BY0itzX}SY=+srsqwfO-=z+<@zLVSAGRE zft`J^?x{J!J=?xYby%6`6RDanX#+7>AdP&Q4F8*T1Y=ej2fK_C9%;F4Mb|{I=nJ~v zU)*gh$$Tb+WRaM85$u_7zOd##b^W>@!^5-vQ3Up{C>U6_vGfuP09c~^-kkhKK^K^n zgO$KVrJS68btley`3#)I? z+Quj;g$2}tA1wL#sg$(kou@n<;@+WKCK*O0 z&MK$L;NEb&k{YVCxrZAS80~Izt+r~5sR(hiIkNNlQh)e-N%2~EKILEyao6H!E?$6Q@R0h)_`%zBXsTysRD$krW>ej6Iy`hyc@%If zHShIPA*8TJWa8KrDN_0VR8cZ439&-Ep!8k_(siQBXly6;Q6vJz^Vq5- z{EE-|n;$l&?eO|-c3wpqmuTD7eMP?*k01Er`1-wVLEXK4EK6w-)vpIL`pz#uF5q`9 z8nAg+){lQK5%U06e61$&LF6?~xa?q&U8P}67!uD4lyC)Z5l} z$0Kk7J;P|%>b|mFz;gjuj`&RZevE;@1_+G*K_#Q_mVJ5xw>8G#5xl5ta`o;tw z38Dj;KlZ|Q@)-$r?Uhhs*7{ndj%T zOL$+CnwFTo64f=&h%R{CuSIGu2A%BRLWE*Pt3+T7!>x0elO}Q9cily8 zwA9p+{X>xZJ7FEZlVjAur9L@Dujj72*7Ic8Zy3Bn(@C5d2-kTFI%2e6c<(`hBrbgF zMEYp*-TVO@S%RGJwf(kOf0zT6C!Ea9$r$Y)W8kBoDM%>KP8D|2N_}J0a(IopDJNeT zTPL1zF`h_jLj?D6+JA09E=a@Nh}c&?cL@s zV>>e?w|eEJ)Wzq>qNlfoRKOdWF{vq-2;CJPv(fW0a}n*gVb&7DeS87pAJXr=O9r_e z%P>Em>9|@|WM-Vl)nv}sy)h5-HxTIYd+g{U!lyIIM16jpuPMe6*6pI=tqpQBQ#=`v z$}dS6BqMG`I0bdsvmXitnG|qO31^dR$*bKSB1BXdG&&f+=@;XxTMSNZo_6hvCo1@& zlMK`8rf{UuEDiMJF2t_U-f)bRc*m8$EYOu;PIED+Hhh9>2q{)~!SJrtqoDev^2x)n zFjk&YMQ(!-14NMTE>qKVAlMM{JmAD(&m>mstJl+Ic~{ox>AmXC<=9toTmE(r;YSdhBF8%A87$D2<@uYz`$ld*8gb`66j9*2W7yOmt>0WPQEz1;8x0Br7B7 z%*RzX&oIX6m zv5x1MzuMczS-JAfP&@k-l<-FNkKO&pzHV!2nbODU6CWJj?2iUImq!Z|4u%309t7fW z!mKRUEfeZnK}&%!Y_K|-P?jlHE}jyD9xY2k4DUpH*HA}SR!>J)Q(NbdU}9i+lx);T zE@8qhG&&F-m=EO82NID3W8oAp1`nx$zVv}eiS=p0`qZpUBOszi(=!OM8$$|NSco3( zB(@e_4QU5jktI1G4jeajm{>5sD^S@v)Qh2shT*ZlD!|uxQRpL(Z+wq)aaf}Njl|kF zcbT3?I_kq-m^I$qF2Nk3f66N2f zk5BTB1P`2uCQhVvBu^dI_Mo zgqGdp^D>hTk24dGAZ_D^Jd{@thESS9)@@}@>-*HuLGCH|BTWlWB_l&%K4yfuQOye# zl97()mMEY9qUslOpCDpF&CsI;8ET-aerK4Ola-~Gi_?8ao8Ri))WNRiuBIkHOG^WY zSB5)?=?POZQ=%Cf3-Fha7=>j%5Y$SWp=xJhR0C=Ww}3do5zYb@a3`xj1lIrhPL%7s z03BiZRh)uYCm^Ue_ruu{8(~5^^k8 zI%s*BWZ@_0$Uuph2{95QBeeaEU%ru6d{(X?sHuvEIpZ%|xqHf6z@4kv`XmXjYQ^;C zZ4&m}7czEs+9BoHcCav9L-|?+m#ciS!3McbkYg7ff-Z#ggCASuy#R@-*AIICKg46cmf%Vgggqj91&A~@~H^j~} zB#g_XW&AzWIcfI^yA7PYbo-&yq0o)cB1R5vI-C0`d808~$4~Y$7G4c+vYm4npFo#8 zleMpX3hN#x-T`oWAxAZ!KBzeSXFoe@szk4d^0G5(FrdaC{p=s@1gecQbMjPnw6ya0 zt(_Q&HwB<$f|JmNV*PG)`TAgmWI%{-VQoSvIWzIvuK0iDCsMt7RWJyBM*%E#r7c|z z1S*yJfYavX@4-#_YaW;>;}V6VctDJrCi*)M%n*qGXnc2vVsqqlsSmAW=5U z(MX4HXh;+4hZ_{E+LpmW_%(MrMePD?JP>|6B;37an!tPXsq@>N9`6yx>CaPaq|v+$ zy7Pt}?FCg4M%KGZJo>vv^;F4v#eU%WYksQwJ_vt<9L;rDADiW7em@Gl$w ztYQA$a1qrW|KEz{Wt_{J)(<2a+`lrypPJ}pR*55k8BN5#YBZ3 S002UL=uy$jjnZfUfd2ts?A??A literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.xlsm b/pandas/tests/io/data/excel/times_1900.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..1ffdbe223453b103b4a67c138c3eb0109b9b4863 GIT binary patch literal 8282 zcmeHMgqa|L^)A{sLvmgQ{KJI5H6V8>tPBsKsh=)Q5-g zU+`Ep9(U9+sc3)Na8-grl*hvXDMXV_C)L|o`>quzdA)BPkZ|q652m-pQUkS2W}Hka zA-nE=BfQGvc6P17LFI6>w%)HErdJ@C$xdOR>lP4Dg^23}ru4r8$k7$V>oWSpy5r^> z_6tSKn}AiB!FY|yMY_S5C~E3o3KV7=b4SRTa51V6T=}Z32$jRko-^PJ5waEetDkTL zYFRx(DHUG9oHbjc$)&uatSo<7Fx_n^C6LI|Ca)wQZP^_jAN>Y`?q{6uX~z>T8@Q?u zNAFY+<#(|06%acxcSu16^h{xHHoC{|2Oo~SGMIi)tEZ%-vQ0uOzZhxIHJ{=!!sJUo zXMnSK?u-8JD;iB<9bD`8=a1eO^{2$-ENkXt7%z0_9a=uNZDAZk$uE<>^e6T*w=F#t zm&DP+ycVQ@&R$4=nVoeE7Z@NrgR9?Bjay}cLG1n>0igajp4RDb)1ARalwmYPhw;?Z z&C0=@i}T0*KYsoXlkzW9FH2HU>*B@;1Iyop_d})@6L2JzUP#Ed0JZ%B6c(`SV)N+9 z7e6sk;AoQtBgpx;`Q3b(Ulfhq>jz$}@s~y85s1*$zbFq&gSvX6urj)(%DR@VcH??Y zoljl7ky8XQd$q;0mNpjWDh({t$xa+gSK*9s>XD)07f^%|iM{T&Xvbd~(o08Nz zEDx(}c$BmEdL+YtCbe)E^GSq|^1=8U!hUxPo7oEAeh1+B6`78ft&m-%MV6Z|Ey(nl zL&vFf#wYw+Uk=sGeob0_yi48@xju%RbAQ8H{^P-SpzcR#U8Vis2ZExhiz^bE ze4Lzi3i~3>Fp`h~P~p5CxPIly3uiZb3ukBhAK~nG&cMN98qCZ8_En~-qSVce(}sQ* z&ix|8ix79#gNyb+XCE7Npq6Ex5ya?*JA-avnMzLO``RET;;RL&C>9*xN5XM8|sTj^Fe+sh>TIKyd4lnSnGsuaH*y ztZ6)q|M@VtC+~CjQOYkc%na6U1VBTy4$6aS8yaZdvRH54H%ZN3AZG`GsK-u8(G)~6 zL4dKHNg&(hXPFDslhzw+70|H#=R7FJCPk?d)rW^-zF{6T;r(*d{wzuEoA2MW=zv!} zB(J})e40ODA3OtFZw&NJEyGs&pO6{ot$cb2gTWF40Duj90tT7Cf+hd8iu3#rP9a=S zBQDy{#^JNe9$4%Pu&3qjG+&g-d$3DWXVE_z_(bhE^0dKN_`o6x?^fPd!Kp$`_BGRY z2_d5~b_EtVwoGj=(=jk4`{R-Z9&JOCy-n_W8`QIAziVrEkGo3uBXJxvDVceamZT~g zLvB&}3WR8;*wZjMkK3%!VT80?qzKs>Q5C(fE>^qJ%(wO=6?Vx1@v3LY4w*Dm{PCAJ zqaQfG^3$0F^DR(Aj}!$bfEGoyQQNqRk+_>)&hNHGV(qf4H;3JpSq|hKM2Dhi)mTci zm0b5DL&6}!OP09SqR3k^Ss%mXmG|1>Ryg8a)*YlV{Y&mY`NSS%Oq+Z5Q{L>Ppg2TH z)cXXRaw@i?H;C5fLwv@$%v$=Tg>mu9vuLVtW}O()RBH6uYdqWa-OaePx6>GIQ5Ks> z5Y37^&c4{)HjtfL@9d%=^?f&*`OHuxXH_7g%g^N^J6w~-QOOgxK9-kMB~z!j3k8AE zi(iqmHFJ({df%hp2%>op*+x=|tju#$lEy8VHAp4092azkE`M>L|N0570!i1G+tSP= z$JVl2^zFIYzapbrG~6-+Hc|`w6a0z{cUyBet7lpsZgx&K?mzYt{vnA6o!mH~t8FWk zod<{7#I#BI=%4W?5kkCb1Fbdrw8rMii8-H8#V>n-`7?+KBi~RSWDsE-C-n7Wqcq$# ziv;3giiQdC30)?LZ#R&$y)X1LpiurWNU9MQshPmY)$Q4|ofM>-ity@oJ0dtQ)!1i~EJ+g)lv39OTrIvSMp2Xy)(xh&7$+oC zE=FzW=ZKHh?i7eSixD^?(2H~JPBf~F6^fhWO1Uv45)sZv2Hy1`J49Z)3T9@<^DUIl zP(8z-!~?R7WN4hbF-X$KUj=L_@HJ2Dn;1qXXbV!C^}3J8^h(Rl2rT)7Sxxj<22g{p znvDB~m~}qrn;ItZU|53Q5Yva-NOAI(t(3~ABEwS$F2rzf=&_b6krskN+%f~d&36G~ z(e~^T$9ko`!#~d$D~RFnRehAfcMkiEh4q09*jGJ-DBN8}`dJvkZ;}pMJA@?l0Tq8< z)Kz4vKn3UOxf`Aq`j+r%^Hx!&o+=3id&WbV@oY}R!)Y?T`oa^;fD({#(S+TL4_YAP zfOQhD=bo-s)6Mo%)6#_q)zsTo4Q%I%LwzDAdRrb&wT9(Bd6PX49@1P_351)LmQ4$! ziM7HR_`bq2wRfzaFQr-vVE6OHT5cQHFTe6eP$V6~-Wr|DU36l3^?=l^o-*VkoNu{i zrwwyNn4QNiLzt*uv;{{Oq-h6U9D!7qU+%ea0x%93H7~f29!00UlBq;e%&ym*LU>u~ zZ@;Cb!WBP*=Cw7Ot3Qlsr;-qZ#G%ax#mSe&+L$^8n~M2InTYwj-F%-t?#hm%78CZn z%uCr+S?C3=srp_W+W7n3@<7*jA8}FpolJ7#G$U>-MV9HD5j-mck{B%%FpSoj66Ym2w_ypmFqN zZwp(1p=8H?-gQ>RbqNUznf{gRTTln(ut9Wm4xH0Y9R)f>7KCSF9Amsd)f=8$H!lb= zo+l;jD8Z>dt#>Z!)^bl7~m3;1wg#4Du*QV%&HIzdF8yR(* zw?&*62p|A3YK&74c!>q!llDh>vf`WD0@hq>y7r)Ll)_glFbLRTqQm{Z>DhVfOHl)4 z46_ROE=DzCdnc@EL!f_)STeJm*esT!ngnH}2*{nBdn~aT?Q$;(zucMUn9sPH8Tpn9ba_WDh2ER5KcB zL*uQYS61-?hbZV9p%EWEp7bCkU+PQ*F;)}2lh<}ZOZh}9FvKkcoUd&lc<^$fWZDzk z@{N>5T^EX%ch8|tk%jX1+jKwg>d`17m+2neiq=I7ozf}Enk%DHvMCG}AEOK>QeVMO z4@oi&@%N|@r||SagcTUIN28eLvdD8s_3_z_GF-26Ig?nZ62|3xmQNplAY)Rou)>pt zquy;lC7>26eOUXgLI03l)h(@*L*Hqwt>Jh)e>MxGuzeWm@vJE$lU+`3s93|aLXNpt zhL21#1CzQ2zLnFnB+>x>g-#Woz=4{IHGT!3vp}c0L1+fPJ){ZqrKmb;NqwcVqiwpJ z6>8;z@dJ|Ew0*Pb=(&K{vqz&AR+vGn&J*wlJ1wag;q$|(IA0I9sq+bn#2F;6B{_LUsq)Tts!*wU zaxa(F%Zhz7Qh!m?bn4ube}m-MgSmb`t9u55CnjJ+I1D6lN=1?tHqF2#X5H+Nlu=x- z2(W+HMTx`VB8CwUEMD=OzaidGzi$w-u}ZHEd#rUhhKTlHJ5dn{*dngP2vx<^Zfhc!k~vtAVzz8(K;k9FZrRhL?|<<)BZ|1!}0M?w@_v8y`W#Jj+#Y^;mOVWxqJJP#hIlV3{f?@s^iakIMR=>pHe4TkG zSc>+Fy3!x#=IC-szVCV5Is}427d4%xj27^?Kt@<MLBeaF25Cuj}td# zIRj1E;1KYTBYnsez6mrbXm|-ET;j>`iP>rtwiH=M5-mzeG8C{zb_sMKGfYfl*Za&0 z$fw1p2ba`z}qCm!!=4Ox#wm1BAGhAOFov_C?fOC9?Y$)i#lOteFg8 zoR<0*+&wi^3QGC+F|iE2>hC=XNFy90@EAyJ^`B%K3Yv5npK)^@rR=nI7BQf#Ln1xHa&et|+>iv}m$39(aYGXbJXk~=^VDqsm(&KC^?AK>x^p%?)M)Lfm-aE~saLIZtT;*)Rr z8!flY7rwR)Lg`BE`t&R(nV|Hs!Tqp)@S%TgWsOwaR&ba-oarRX+n3AE(_r2C4Bm^h zRiE9FkMe6Dlg_8rwldDWj4C<>7jDv9k?)U*8Xut>QVwYit@Nl%RY^rch)s`FVzSyZ zd~?J$+oOnAujwD$p0LK;T)}hQZmzK&57I|9L|;J}Sze<-HkKRZ%yDFHgY5^HN^&!~ zS%=$%U)b)2G6r5{&<43P1r1V59>a36f7T>>RyP%8Fu13n001Pv!u>~0;%;kY<>Ait zOY=*{mYg#0l*f$|2G*N{^AUg+LwC6EJaPtHHI1((Ik{ z^*q{Few3Q!V$0!Bza7d#reUZd@y;0K^poZuA!YU|6MT}ji@1V`VH3Wo^h6GCFxj1m zI&z4=1;JJA2`0nDQMB z6N;_M2~u|g$ZTvafjTP;$2~nh8ob`ji4$dhcZRO@{)|a_~F-n1cmgmT6ma5rqe~- zCpNXjK58N9xV^k&@u>-HHaVw;ByA%kr094%kyZQSY(?o)lU>r8s0tLa9(xVgMI>?? zg0FqriI$s828RV>i^GpmAhPkNiQbj zM!FAAf(YEsjgETdSYoe<*ZQp(DIe9FWhB#A%=nPXR)ujFtV)z3*AkkE$|Z$WGRl$L zlLu9qlNXnuWlA=0D(=2DgiP8ms9z||1gvvsu92~CPHHZ|-CEPXM@Xu{o%P-QfZ`IC z8$_1>Q1AimOjVyhI*o_Y&!Du6waebS?kMZqbYhCmT48+DFDA3t}PF?Mo95v<2Os_w#6|+-<{vB zU+^Fcx{Q$0NyV&hJ#2cHhOXUho(whL>r)`@qsLV6IbWI;Yyrn$1w+lY{MWj%nf&-U z@`=saw~3f?+=V7Sufi8~<&jn&rd-Zj9 z7`{>(5V4PNE&*o_;aW=dK!TAzZWwDTHy~XnQ&M`CY_#ea*^8D5=|AeFW4_{5Z!ilh zaEuu&)=?{^V(&4|xgNA{A?TqF=3dc$|7iA&c+A6Zh1O-SB)>pke2xDGkHb0;W+#~H z4sP|zK6m4Hado1AXT%{I#(fbdy7JN5lDAE&H4{G`10NhxLwQ-qx9z*0a+R+)yZ?;= zKaM@~LpG=A+N-%9a;fjF!>74(^^8(OTrP$XyBsIb>sf5z{&a}g372)Z83}# ze>rUW+3@d@`(F)*V9EObFTnro=Vu-EFP_l;q00Vj{IgW|r!g6^;cukc5~^0VpB>E55F$%KDQ{U6ES&tCo>QvdV;0C>T2 r$bSUbpUwZi2maL@5hfY_VgB2WsIG(rTU`L)0qhq5i{agrKYITMgTg*> literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.xlsx b/pandas/tests/io/data/excel/times_1900.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3702289b256fd0768b7edb235c423b0a9f14b15c GIT binary patch literal 8266 zcmeHMg;yNO*Bu;!4el}o65NABaCf($!8N!{upq%55}XiRf(3%R1$TG%;11tp<+r=+ zet*Gk&6%##r~19q)pc*Z_iic5z`$Yy-~osL000CqI!HCwg8~4eU;zLu03x)Gh@GvI ziLH~qs=K|3<10ot8*B1Uu+Y>Q0BFeb|FwVd7buGBm2YDPid{(FimWk6&y@=zzS#Hc z!lYFaXl{?~EHKtdv#@wb3%&sgXX03LmSGOgd9oh$o0r?#RQbK{tW!n__3ds}(<9gd{|YQiIkanRN}%)llRWFgSk4>wH)e0Wpcrpzw7EF@h-XpVy`;Io=K)1{8F*ja;8 zpqxvqL-}*(o~@_Qm%2hPLyeM zr2fV>ZeR?(z#JIWfHps{2w&I8Nh-%qSe_qpTAv zO;mxKlh)5Gx4Y*XSF^&tF)`RSXBc4(R_yWt`y6At_M}-D*wYN6d>gdC)V&!n?ui_WSHS zU-(Wp=zN8}C=?T$_i2@DabV*08)pRCXATMCZ;F=NF_Q@n(TsuG3~@ev8T3+j@VO4 zGy4q0=5@MAL|gkxb3%p*#xBtPu)oXY;sx3Y4Axl-U0w(#tOu6$AupU=|4Ba#qS1aa zshh3IL`NFclA0JAfzrQ;CEcL-*B zE7yFk{Z;mg{VJ=g$&+YI*B@#`X3yc%{oKihPY95t_)y&e!&_q@y2}=^bHwBNTT?mr z0jrj*Ym{~3LK)J67yRHrC-R_f2{Lb*7{~QbAM4Z(mz{)fzSA_#9@F=p9-6N8bd4`U zR{HOd>FF$ewGV;80t^6v4tW9snLmT&bF7@*>`zX?oRh)MTTMq`(TndH?Q+m3W^UD< z7fCwNi;|^La`rTl*$lp_*5%nV3cY_hyQ@^OG6kX|30dsiV_s@d`dUjXrymZBgnwEBq3RIG;%QQ+VqXa#Hrl%iEy>#y<9^dhgig$gU5hxkf=o zxs@TC7}CKQ>n?Vwo4nx`>E-LPo$GKj)XT5z<0S>Uhpg!W0>;6Z-}LR z36zxGX^2>2h_qj|7De^We`xXu-%Fk_bnYg(-HJ!B4iT#Ic&E=O-H2SxSM~YAV}!|| zu3MA`1G6}dybL&POO+^Bp-Eri+^Fejz@)mFNOgxWUxN>8kk@?r-QupA@XT~;8v(BC zht6aRRj!0dHm^E6lfC#rMIu8fOT_AMR!o`LSB>p!_e12ow?uTcj3euA56HJ%NN)Y+ z!6dv(v&oV8nvhE1)WOWg26`Fh#_fDbXA1v_jB>sp<7CK4 zCFGC&D>59-4INBgt2jAW*qS;1+)LQ|g`TuB0|S;DmPlIn_EqsHVm>3cV2#1}yHxs` zDzm8!&l2G=z9fxYbU9>C#={BzNV1oVi*gjz)s2o&eP7G#i-F1)$j!!m86~(`O+@!8 z$4iS?wxE|lDKJ<$>KRkJbIoRqpLznqcV+4@Rt$@zqoFDZSrF?7(3hgG$foO;y4%Oq z)tC)xi(R6)<2ttlR^#6PxHG={va=cvNi0^0erEN*z}(5)#Kwf_mn+N9x$J56+Y)gC z8}YVyfu0KV(=o;b(J(RzD;#f(rldoN^KriReyG!piX)j1S<}oA9Io8T7PRBXwt=A( zWZEA6CO4cTsFx|?KoyOPGaKxC-vMtOeDj7YB|VaDu5gm{H3|tPh-NTZ>CAyjm@@Lp zXG4muc63)yJ2Xm_i_DJB9_t*!?3O zWssQ&BWux8p=1I)G@0*Q7z2X_ZJ`W7j=R4@itp5H8z>xU$0B;TQ`9Y}Wl~p)AIMhr zMGVU>umugRfDqJG-Ve*uUPREs1LHOJ6kXLHKj8@}dsfI*Z~{jO2dSHzpIcR^t<*IWXm9 zn9Zz16-;{9#=@%AFa3u2ALurBEqUl@NRLvHWF8I)U zIioE$jA#~Lxxq*i^rLSK&1BBo(!6~_;7~>4{{;$MtlVlw{XWpbX`3pLPb1Wbq3xn( z3tA9{K%HH}Qa1_|0ScMr+C>g|s=AaSgP%vQQ5%nQS?F!Gp(4i=If>-5F`cP7fNCKZ z6$Zzk%61L>ERMD|esZYK?;WDY@9l8=WBRBqJ%Ws%$LlgHeqC;^(|tuAe6?@p?RCd; zy}HfGMCOgrDTDX0i=KXae6ndszBXz7z|;sno~?M!)OdY*aY%}=lnj0G$}Q4V^pnmS z;{FlJxhcx3mHhPzn9YMGX8|R}Z_j5dLYjZ6n8g&N8+zGUN9Ut0+`OB0lU8z*k55CW zc_p6Y-b^x}6&jiWWxMs282Lin9aB#?Om~j7Gbr=xEZ2qZECEh)KCt|x$}YEE#W8VN zciDeU6m%Cx2E_c}5~(~`!pn}Udz>kJL2YSB$SxIm-XNGHHSw$ql)_n$Wowgneu?F-5baT*h-4cK|44m{9|CiFUyz z>W%Pn37lDXsJv2h<3#aI1WO@X%V+EPQ;Z+$&aHP~;tJ5SKog%I{G@~C_dLpG*qV2+ z`IC#$wfyn&G1t$CeKN;p1ccT@?H`1p7hAKyiGl-tv0J?NUR{gB-f2yE>&G1x=V5ui zZD!83`7;#(VmDE&RbM*8=HhZ}6Ycp>YnjanpNy)>Cx4>|h%^aZTE_J4C#L*%{l38I zWd~f`rP`?9vvTZINmYBK_$C65er9gaY-Kg}6VK8731@WUk0M52+YnsbI`-9ajbt~I zK6ts64~5{`Pjsl4)Xy8K6^;v6T!CKI+ zYGs%ldkS)nSj*mZoq zg|q2EU458VKXr%3!={7T_rajkH*yoja0#O(@%Nsn>mZqNLOO0&9+u?tSowkPp*|RM&$0NBm3bu!jQH?g8Wl zOw+9}7JikdQ$ejgXfp;QpR8My)vNLJ#9L6^wYKFa1L)tzN?)XOrs101JLoYsHeU0k zm&qV|R=Lp4`{aWx6ms$yBcFwby$N&g-DpHyy}Mc3VsKk$HjA#y3dxhP3B62FIh0wJfxaF?L&}g# z7bZ0k7ven9Hg1}MsR1`QCe2~$Vp=vRE`2o|x-F{&I`4HD<%DkawN`W0Jas}vEBj2_ z%_`W6G#y1?nu5X4S}R8LEi63(|N1{rZr$5NMgd7|36$fQ$* zgD0Y?$g48L`&tC=u8&mPjxaYqy?gVbnXV>d*Qv zZmEVxUFIa`h!Y`a7o}U8sFzITH&Py<6H$?puTDna(z#Uj%op#PwUxoAJ!R38WWf&D z!_K~3c;@(xo1dl9ihxu0UKqZaJ`&Gx-iOHZE@iEahTKw$Zp7h)%R1+>{9aOKB4iwR z7R*n#O0EpnTAaQQLgwW?H9uh2MZWx0Wk_@HRne$zV|ScijWQu)Cy*6g9WtTAlw>zC zYt#57r)8sS>&0bcb#sH((2hN+6&jQHDBf)1rTtO1+iR!O`8Ak*mWy#G4DkEb=3X(i{*rQ^nUyJc3GVIqRtb)8s;l0( z2ovSkX3Oxe@O+|EqpCX_xx3rlW^^VocGox(cQ)E?L|$FMOMkiUnXTV1j=1Y1Y?g4) zIpei_lQZ`|fk@%((>rg=Y#m#}!pX+%uS?@U6lX8PqNeO>_U4ofGwp^Iof%|~^1j>9 z@$iP258ERjC5P|zD-|=?MO)Q)&7ZqwTY5%m_K4412#q<PNUzPtYb%(?MUmWf_Vz8@TO5$fo#0nF1I+Fjr!lN2qnn=sODqOAe9IoNrj~Yc^if z5GrXa38m^HoHQx#;E<#*KZlMnwHK7q)2_kN7ah%DeMflDs|fG!ZG?SQDR7(?m~=NJ zPi5I?vllw!jpfi#28bX>_z)s?c26yf^a4=XMS)VUiYnP$Jt|!<8zpkj0Z&I)=c~3v z757>Qns$9cbNrtF);IEI^+d!xa_*R7((M92PEK+sKYgzSIGG_T|NQ_Kp7_l zAh`_qwb!StPz6q&X+!Q5rPQJ3nC(vN4gtVH$hasXG@*o6E1FAqRNpVRQc;-VBL<~E z2%!+x`m2#r2N9*>0-lFL5|iYt$bncd>>SE)6--oI{fS)WW3x&;4+Z}Z7@e$ykqJ?B zW*H~i_zi>j1jv|M!DYK6bh#hK$J#_w5T%I4op!3xbMYnCxMDpTaTja!dIz||^MZ~L zF2o~GqTNcX3c51itJr4K%hs&g!hCrNB@*H`bo5pE{9DVmt4Aj8!#~y|7uPso4akTE z1mzeIrRKGrv7&>Wy(5#corB45XAJ)dG!W{8W9DVrn6Wz+;U0owE;#Op31`rp_JoMv zPoRTCd*;=xV`JK#Zw}>Vi(RM%HWnPcQye9E7ANB?V6tUsSh3v%90ZwrWXgJm9Lt=( zL}j7kq^K(k2BKvQ^EK5=YKeSM{gLq+=6hTQi40~^=`#@>V?w_&Q=Pp0^CID?b?t&A z!W&hhImL4+P9KrQ^kqVn^)bacsCiS$PfueiFs8xz1qk+mnSO-+!(2b0Z?j%xSl(?4 zHGQPs9U&%6-ZpprzG1ByvD3gPVK`f*M|8$f^Z;loK1jdLv!$sm!O!4Z4`YTKlZ;>N zDV98XJru8@WTRfS>5O!{5yiAsvNn))TWZ3?l#wy=tfR>}VYUl@shPwo424BLd2-7| ziC&5HT4?XOj7VGzKW6Ir(YUAC>Ruxo56Vu~$A{qH-RHK&?l4c?QUa3A(|HgnnR<^y{b7XiG8-jx1zD&{JgygqcxHUj$y~bHo3-D{^zd4}x640F z>Ble@>9}|Y_hn62Rkj-LCXR%}Ie)EQO~;C_h+C7h^C|Tet75f5V75(IZ=RY$At`-_ zZpKZoRULK***oSX)lZz$9|gl+Oy!ssy%qj`?b;VP1r4O_3AEr!a6GJcNk4O7w|`?x z4E+pr68S=Z{_Vw z2a(8+`hk`S7Zw?|?y=M8pxp_7{;|Qd>ee5Z=%eV9y@g^|`D?DJ{c+%{ z-fJ?*O8kmnC};+VEcMT)w!g2}@BKfV;VR1f)xlrSmwq?=*(X9s@u$f zNR@np@7?8l-uw1hd!2puKHq=V+UNhi|J!=nc=*JCe>Zjj!u?jf2=?Ks6JooEiwD#< z&l_;VCU-s^Az>>bQFQv!Cl}YFfXX5Bh10i1M2B30{8NP+Bw4SL!rKU*qGD+ zf}auC0h+$MG&Oht06z%;aP=j)Z=gFAF7m*`z29Wmb6$>efwt|Et$x!WgSz!GC-`%T zdgeV8tICtK5yp7xI|BYEN6aq{?=1Wg;MWve9GDlFPrtP{fufz2oQM{jEpjz2Nz^Ri z%e)u)DEKmmRnJK%gPMffJkmN#wJ_j20un^a&d+Vj?`1e&H zT1!f8Bif5|O6H8!8`B*^?StH+sV&beLq{a*T^R*?4kZI`rO`9RZNmKO}?X%EYl!H48M$#zA2icQyX;OP1;=T`Kt@|gFS>m(0i zFHVL~rW?LYHD5c@Q1Hi(!P3Zn4H9RzoQnWJUu^0jj|whAe2*m|m{LrNFSuJM_?bA_ z_(uKq4(;>@eY1e~7)MJcp3P(vNEW5$sw@*jMkn4hn=*1)+HxZ@y1Gm_rX!L(*6CI_ zb+|Bdp0R?(R9^uo4OOLNw?FA3twoGaODr?wDl{4|CQhAEU5OCoiiBiD3_-rejx%#^ z<91)LYH;1ML4Tk49^au`hcVt~Gm)jR)ogSZEZ+;s6FPiu_`F#^_c9lz-O16>Zb{{rCf(kl<8Z{Xo{u36!I z!8F=M?i|f;)k*WPq{f&y#l{rKoo_z0ejekOliS$+4ycly9W3}~*6`&MC%P4ZK|dv= z2Z_jt)oH#J(oeB#&~SryNWECiLjM_Sl}n?f;OY;neQ_yRhkrw+rwCQD4<0Rx3F;-f zKAW((zq?_Hl{@g|HX&^RvN$Zr?woYZ@o`@7O6H-l!1s1gGuhPZGeX zxXC;v9j0h}FtBqxKP~UFfMk;`d~V=3Ds#k+#x}Zh z<&1D(U)e?CdAyu_OS4|DEUUAefG*HN+>zL0=X^+D4z8$`WrZ2JWC>a>Hw=CwV0&z1 z_BeA%T95@O_+Y6yMSNo-EQ}ePAC<)6H#%7YT4SR>k0!4Hf%L>=lf$VYXI7af%|{%p zqD{Wc$%Q52NkB;VIm_eOnEAZe+nZ-Qp7zQ_Px<<^znP!M=I&GF800Cd5OEqZF%vs| z?cZJ{vfzhw3T6hqOxg5Z&`T=D$^Dg%t?;r|T=D$0^NRwdia_2i|6Wxi_SbR*fHcZe3G2b9lgVZXfsX4Dl}i+h zs&BeIp#-o}QRc8`&G^=jGlZXC>(_}b>SOZmSkPXm*A-Lfp@4(o*Q66hnJt@eWl)M! zz;~FclkyeS>hKOa;xUCp$J14ANS|Lz^8WXo^UYBaWqc-kkOo)cxfp371wshJ>rx=% zyiAGY;@%n`yc?(1oZP7J+G#l@g^yEn{Wf-@anQQ}Ea_ziVtT^0zgphUzEEDR0C8Tf z=h%wx=g(^^Xi%;>CK`KsM9NgeCl)vT=p;?H#}m0ub@7l%EG#9J@N}5BA1CHP;YZnQ z95o$u;&Q)@A~)$frN;+Tq;#NET##Vsd72p4>4WqbNn5kj2kPjzZwf%sAC-s?d?Cg) zn$)RMKQ1Yr^m*Wqy}xOS6f-X$URx~w-ixvy&{xj3+Nx=5zgS|qSh5LOz#sMykS^5DifeJ5LuUDEy~v)ugDp z;)#>0e?u(xK2Y{=667h^4T?Ahpr@q>EM+)#UtyFWcB%gQcZ%gTZ+r;}43*8SfylzE z){e%y?jxvObq=?V22ba|R?(2?MOhI&w?-b>1wWdjEs|nSm?xgn>~Edkf%2G!Uzd-F z*yU=QOR*=@Rt0^7FWp;MWtRUlQOzdT6h)6|UQ>`tc{*dVGzfHvP@SGWP<6WnOnH=~ zs4r!MGZIj@BEtz*;)riaW;CBpbPw9LnT&95nAuQPQRCjaeIbb|>WRGb@Dx)^C;E?D zab|=TGsFP^g0Wli)89dT?XE;S(LncZ9~<3^-0!RmpQ$%FgDd>g@M8Z2 z;P2M@YW7$A%V_IqU!nfLdH#hk!VZ5~>ECg`7wBI&K5W_kuX6o6@OQHO0#0EQ<{zo^ kdx+nY>{p0oBJ7dkKPjiD4J5c)0b(yUY)YD6`3S(j0PgsQ=>Px# literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xls b/pandas/tests/io/data/excel/times_1904.xls new file mode 100644 index 0000000000000000000000000000000000000000..ac70787c358a56f90ec2fe073e58258c5d063a7b GIT binary patch literal 16384 zcmeHOdvH|M8UJ>(Nj89zgb)!Wth^ryh6Ip@>?){e4d5sacEG`q?3$F2(6FJ``O`7my0%O5`EPRmek; ztC3Ga9)^4}@^EDO2y>0?{xf8z&#kBzOBeo<5|MMI6TjWE0WFk&s)ol{#tEWYB+RSG z0d;knMF@;pLJg!63^a?8AO43@F5>8!0{gyZa8<||37_?YLzZR?Nx5SzKmWcLKA|Nrt9;T!LnbuDUK?(oX z4`57mTHWNr5z`+di1E?CL;zmcAD1#ujGE9e#AS@hqjrxPT|0Vo{hZ}>)@@B_1V(s_ z%@$$KvWw%bm)i@}VED(tZ55bOZ?BK1n$T#7hoRhF!az~QrpqL$7g<5(lOjrz6?Iyc z>|^@vsJc;jx1#Q>6?HYCTA3*0SutB|x$T~GNBOx@3paHzf&d0DP06y0#j*^i$z(a* z#?Y#r=5JM1Z0+@i%Wa(VEE`idvwqg)t*y%=tC5yRHXzl6Dr665O>J$nSFfJEb}dlq zUs2N}k$QJ(0}bg|BeU!ArE??^88Zaa53bxkMFctH|PdmE*@3+jD58KV+v~FT!CAx7c~wSoK89KOxrC@ z-&IjjJ+K;f-l6DRm|FPS&f5OP7_t@Jurn9Bif+hRWznC(>`$T@J8+@vR$dV1Rh;3> znh9~{s&JKDMZ;xMOlC}9cJIb;oosZ_Ss;job(LB~a12nx5{8-+F^C6qE3imG74#gj z01jNu6vTmw7gT|vwx9}ZtAZ-fnG34GMk%NQ-M^p;>~{rKV4x|e0((qB71$*Us=z*0 zPz46Xf+|cdv_fs673vDDFs0B6vkR>-$5Da(&mIH<8^dQCQG@#zMVY8!RtSdE;e;KN zQ|bs(E=WT@AgP!?NMk-A4UJ9^GYC!tNY+bo)X5bc%ZjxAjBbe2DZ9afA?VK+kN^Gm zDR2EJI6!tui91N@2|tiCoFH)zNm75^Ws~GaqdBJoq*OI0rH;kcc}UXt!wg7iTE$J9 zHl^6SOl)=n*lA@p8eXY1z=gIkYzj3CW$i-6+=V)R{CJK~u~;leC_Bv(inO#$ST$?m z?X+B>COU-rV55h*-Gs7R<&#;;?2R)Zlxf1U>Tlv8fuXx#ooPpbDYLb zv$P-$*eY30W2fa#nXt6CX+b}5n090MQlmefi3*`ic+Js*cBgvwphmk}G~FBBLhdNk z%EoOx1*-_;dj5RBzSPQi9g8Ld_eTKOK)|!D9%da=_g!1c!Th>X1^ePP5vNQ zSK2`E;BFItka78d=x@2+>Tto^uR8GrS4iF<{LI@gNMDmbNW=kx2X~wJgN)AyM1R}% zR)?Ec{i+jBa1rPY!Xpy>g7h`{gPi67!GpU^oFL{n9!0h61Utv=w*@QlWeuyrhWfFQ zy2#lrE%DCe)UDx#vX!<@r$DGCb%HctPk*dLqz?IPe7kHB{pC_GNdnmME)B&Ke>(bZ zs+jlWg=xcBZf1~8g~_HNm(3_2HhR&b*c8VT$6q|+&Zg32)0oR?TlT3^%sFTUZ<<`k2Si>EdEu(5kuU(<(&_Pet=m277Cds+l+X5`^%4UISLdrq@q zKtsyJv=Js77f&1G!^ZAu4UI3{e3Lty8k3ESr%mu-WB0U%#@{@9FZ#Z94kc@U;T(X=;$mcJjw(;%>cD~5x${Zw9p-NOcoUWV}gz~K4=Mb3_R&wa;qBq_(a z2W1a;2cXe(JU+n}8R;LDvK9){V2tt1gD<( zk{cian8E%Oc083ip0))C<0J<@U(9cdx2%fHk9Txz4R12!Iw>^(an9RqyI(@DW zyo6E{vr<}88BeD!5!R4%?>eTgyP-mP`CzJ}KP7lt6b*s5=OZwC#xlHLT zMU5#&jndShoElDEL*;ays6BW6nvUkq=44m*hRBlm`sCK|0w+{(da4|UHP_=%9^1ZC zwLcG0Zh*~~K%DjP2hR1a)@Ng<-Kz|`VLxXe=$#fvx z7CaG)3>!FNp9x;!0^8+tI5rQ|`!4U&Z@JbhO9S1$Y_0%A!z^B>45etog zP&C8^obgb%SgGAC+kyjRIvP?g=v>nSSLkszS+ENF@WkpG_#zuU+tE_^P+&D0zb1Ub z?q7}x65>)*d*`ZnYr6kmXb3Y5aj8i)^eQxZt0MvwWxO#Hi>&CwJ@FD4ZjhV-`DIbO zC)wSM@L~k!z^Mq#ivUbbyWvv|EP6$w;wiqaSo-e5Cz!Ou!=-u!-LcUrYe2wq5j?$H z9sDI!+WG<9W*4hvI8+9RMP14G*6>OvMsNW73o()qikp*M^(axWvu%Ym?n-9^t$wmX zntrlEnts|A94NB{yI_lZlB&hOW-uKJ=*7wgxovI_`HYZ`HYXt~iGH#q(NC5n`iY@z zc5g9kH^wPKaWNX=BIw+mS@_drbk#X@)g|r84p?idvsTeyG|v)vM-mN3d*yn2fDmUY zh^{0(z}R~zI$1%`;JWog%BdXt3$(ixN3^!q6J4xA=Aw;cX>)fcLzWJ&F{hxxfr!*g z!J!-d%8FWUVU>fpvhelc%jA07Cdm!Zt^$8c0J2hPdkI9|pzanzcLqZqIiCe9w#z!@ zLD>@QL04iFO(ImzkSo;jNbqBY7|oMi#m9SU^6+fgRRkhLbO8UmZ_jj1NmGSiwxv60SzZO*&Ple}l3eEVkR; z%sU#8ouKWs(FnT*v@JFo0eA>PpE@shoH`#Etm12x$bAdG*{sKKaROY0T`LH~4OaIR zxbnYG!t&wm@BU%o%8Ozv6n&DSPgd7&DoQWPSZORATBJ5`HQpQyk=IbiGPsOvbC zPN;M&Hr{qWA%pTpY@+S{R?PW{7=U~Mn2=UzROyc~>o+O8FnidQ6$3S#=}ZnRDqRXG z!m?4>UgZ80s4Wu4D<;}RH&!^v??1e+rIB$nSV+^2>7)_CEQb*$heq3&<fZ`}9# zb1x_gwTsG)-#T{h%RMhC3U!N0>hkeR-~RTiibBnzGUMyZo_*{2KPw9Lib}`!?_K)F z_+yGft)jAf?{A+uy6%nGb8@OY1~rbLYgfAbMX^zOKZTLActJY4PGmFs(`JV+y|!)y zdIfbw(%yjH9X<(J(|AP(;%-IJIlA{{U+5~d9TE8dS+oe*Qfod_GyZ;&KA#r&7{gex z8f4JWwnSHY<=R`0zrVPx{LUN0GI{*I7wSR#39|0*+B4aw&PS%VU4~qV+=k3()r*YX zHw-@d{{w%3U;5hXsKoDqtiyjQ!jJ1Squj#wmhP^#U5RAm-1RN-4&57`pHX=({*UY| zyzd=*N1XptmhJ(d6qfw-4<35*(5@+`lyAEk`q#dH5A}ZxS;sNK@U3GK_2!4;R%G__ z4rI#C8Tk<8tB^T%UW07egZ8-v*=`S*MJ27vAcaK&F`3NY8Zcoyu|4#(Kn8(CNSc?$ zmv(ipTFV*3TGhj?0D(8@6iBaq*I-t;Qodr5^Zw<3VcB~|J{8&S-T4&EvrM7<^MuX2 z*k_r40MWe&gB5i!|95j7ERgMgixY|VmbmVQk%s#E#z@Aw)QB;!J&~A+(TrOd=&4hb WM^2e3@6Fo#)x1slMZr(m0{;U(y*23o literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsb b/pandas/tests/io/data/excel/times_1904.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e426dc959da496e4d70b4f7e798c73e141a40dc8 GIT binary patch literal 7734 zcmeHMg;$i@*B%(U8>G8yC=n1rKxu}Q&LN}*X_OwiOF%$6m5>w(rMo*MBn6Q!$#1;n zmFw^O2kw1mo%da{&g^IAoU@-D&wv$xNJIb>02%-QpaVtDPB2^zi1t-N>*412h-Wvs5f2y|mr zsH_B}h0^$zlk)uG0V1M&vGJZaq zXJD~qbCn`qfs|9FOKwSLrY9kEYhgZOO%FF2Z^pDEjO;QVuMne<(Q&u&7C;4zbLH+@|hHdd0 zaXy%XmlD&r3!Sz#@sS$x?P^C8-zZsAe9Lb@vIhFy1(`axCiH0%KbeiaC|g6Yl`FUC z)Ur718Q5CZg0Ia@Cssw3VsE+SRdrytK~Q8PPK}QTWwVmWB(iIBqYkzU&#ji~Oz757 z=olXD%zBI)%ZR&h=*Lx$A_>vz(n14w1t-}He^7)TaEhgrD!Ua(!smMx;b)Oeb6~W{ zgy47^bASO`#`g-Js;)w^AU%Y(jUdX)gF7#WUP;5JIbPJy0wVaCeO9&cR?t!(9Jp;d^0ej#7UYRe#fhcI`>c z-QkAPbcuH9fg#o?4q?Xt123PqUhA=H)M1TBs^-L`9G}(q2SZd(#8f%kDtn)6wMerC zXGy6%&5U}Yo)}GbWi8P-r14B$aw-)S+&{eDKUtk;SF%ProIzOcY=OL=Hg!vggN9E} zR>&xa9D}L=BjWShv?$qm(pwa}V#8~$+1F+5p@g^O19TCM*6z6IBkC^TIL~F9cb17B zge(vQ!Mebd#iB1@;WULl@TLEgC!0qHNvH6cIyg_r0cZ&BwmiR6#SQ9cV+w`ZaGO51 z|C2Td@I(h^+rRrNR%`z5HjUo{{f@v_3AC)ddPUj#``>(HYL%*micBkqKd`X-yNviM z@Fb>leK^@)I$F!wX+vPBZsuUEphS8~X#0f%xomv2ABbl9wPf4>Auzp2Vr z@M(t6e1i{M_<;M(-#y0K0%Bz>U@8Grl<-ED!T~bG2*KitC2Htk=y>EHGTuJOiYk)4 z@Lgm!6JxNs8N_(a(khudlLcetDM%aC#>)uGMeOgg>@w>@T5=9D4dTm`#g9?8=mLbT z0>%qLRs4K?W(mlti1gWHWb%h(Wb_iszW!l24}!cO1l@^Xr$vYfS zVAHWxRqGOdN31XXoI%QEf{L{Zb;;UqC|Z5c6rF-KD+J?6q7SwZ+IR_o)e@zJDlBg{R}#l9G*ClQn0t+Ko4U)_|m$SvZKGCVnw zwmqh+H}SNVRDeETF`D`Gn)gAyuhP8v^qh_QuBmio848rS%+P; zLQ5ECXIh}InLdm&{mM?$>7(9q(`&L;#(C_s0H72@lREE(oRI^A0kg>}#b+m93t#VS zB~l05C)SRGE{%dl)5r-x+@2^Y0$!WMyvd z?DR*F`t1$quMlNB*lZ1_Cd%S@=8mzmQQ9GIM~M2ANsP?~wM6CfHi5)QbNT(_BfGCt zdr7DFvaYHEHRo8vqir*7bir*xB7>EFJH3-%L|NIoa3;GT#w>oL4(=O+Ri;HuI-UfY z#3Vokwhbwpa!lj|X6B{37p6Gc)_s-AR%OER;x%7W2cl81RPxDR`ia%yjG3z(?bC?5 zr{=U32?(K36?D&DYc!T*q=R-CL}y+FdgPlfuDeYM3i?t%y*wOsK>ZcRU}c-juTcPi zWs2|J$8Q{SwlcRf=lSgp`rdtP=_o-dWXRq#?9n`U1~n6PC4cDhYO0*VYV<+5-q^DH z-T9Xmu9kxQ%9|9{(Xz53{F^(4-&JGtE!0 zUtT4J7nC0KUr06h{GJ>fH3ji>gQ*gnrS!&^9gd>X{k&fX7=| zWDlE;kDcYc{2!#`y?p`N;dhUCK87hyDAk`LOoSqOr_d%Kt(ThcGhSsBsw4A-l{7Ug zE{K!3;j_Eo!E%YMp+PeR8@{@N6{a)rAbundDIOLS%jW-tyGq=urp$Kz0^7HUuXJJO zD!kuRkqfcP;5Lc~;hse<``+4y-t>Ch&I1Yn*W2Lm}cLr%4zR$%IMv+znQ|Gg}l; z2J~!9Y$0Xs3KFZY3So;Luh0+Q_2NbW3`qnLN5-bGK*GGbmZ+mT{7rG0gx3g2tRrVK zYcPC{prfAF)_d--MZ}i|`{s}3Z2WV1tg=PsgPX=VBn%hw>BP;Tx_7a0 zI-Ii{d{5U9=Mzp8jQ&$E^xpZwm*`;{l@A`@PQ$GmIG21bKgAn)Up0`U9aX=%ZckG2 zes;Xl{wU+>YHk_hG_h%!-cy=^ewfQK;MfDH=2mBIN_@F6O*?DbSXL9=ZmIFyqby*9 zU}rJLBva|ZHii|KX499-^Qh8Ra34`gj3+|`UXkPpCyS0QcG4YsvHyn}=n-5=NPs4WGYA3kshMv0OSM;eA& zWYZ^3qPy+830tZwDkk{_?Hulfbofk;-3l!A&Mta8FX&Rw@zQov?+t?1^NE2_t@l_b zG`5Sg?j$?-g)bZly(T{_96@(Vb{2ZAzb(}tXRAu#PUd949PJ;Y=AxP@NGQ)r;dfL| zd1ugaEO4tSJD(p_D~@I<4o`ej9t8o18*gfewt4)aMqn!*3-8k8-G0Uxwl5Od0Sd_% z{fF>oEq$rlzpBRK9A#vHAUC^qjo^vS;XI?f4hl~; z(nsY)hVbzAZd2#6y_u2+y;9=JBJ)I1)4L#f+D*0Slw@RwN2?qrqgP|5LYnVGEJXSH zxco&vrfGjj!g4*6pwFG@;HxS!G0bCYGUe*tT(I)fzMd<+UODqx@D&m!2CQhYds>rh?LXlMAY{|;B( zQeaB+v`b$cUcpzbBrC0M5_@vBQkVyOA!?20rhSCy2e$kbo~{H_^6Npx;d69-aIvy8 zl4q?B3E77HhP!@Yj1;N-{Y_jXES!8diJGP(-iDxD|8u(oqZp~ng^9FPlOFPc7uFdl zvf!y`4OX;iSPi6Nhsff{d8dL?r>6^|A!+bJySj5F=1uIjpUqR~2^g)&zD{li1#25} z^=|MnawPEX3elJT4am278=J+9bWd9lOx2!72&lAnt>5k`roHtlaX!ZK8FU!$(iCJN zSosQI5?GS?GV#*pmBT>EgllnJd!$xArF6M1h;t37ia4q0P+v^T;>qr5@qu%gVl!k% z3M#WZ7i@v@7^vgbjYBPaTee;}sJWYa1~O7yY%@A6_l#?{n8$sje>L5qW@3wTPrRV= z;8P#*`q+|;i0ILqWt;k%v2ncVcqR;m(JfjtM6>ti9NzdHohNidv%Ak*BiJ6tp}$`v zAhYSXWXA zA^d@=0O=<%G*&Bf$Y)4GeJj>7%nFrO83B}OjFN*9CI(K%XAYKj1MyXX_zEjPe7t)g z0mW5gQ!}ucdru$G5r7Iq)K;M2hehHk4ze4fg@P#RBt`QYAbVFeH?2+G^L{`e{<$qY8qFdJS3I1BKq#$xo-zzG&{1&YrHEDJeE?1so~c zde>ZeZ`@UEqzpK^iq^#Snf->R&ckBTXrVbgX*lN|QIr(^WOIcCDVu^fT5s1}9+c8E{X3pw9UIF*v*WMY-U!+1c$sa7j9 z-dbDSoK&(~@V)*;Ie*7(0*?+pLj@m1@BuI9ovch8%^~W}j*smvf2(s-2D_TOnwkJD zEe%+l66^tV@pMUa&rJ~iN6oxGDMAQ_pRftMX2yqimJp~J*b(aB#A60^H2*_6{jZ^f z@3m)~qza6eENF-M3U#Cs+%v_8{zkeVXOkd@Eb?vsWlgN)Y?Wq>57ZA^M}T_vRn=NdWE3@lDJe~Y0hp6&lVf#x^TsmMQTBj`Cc>?P+C4>iwa91IwAl=C zl?X-RF5?Kotzb#)7IH;If1f%MXS*x0A*LJ)gBh43RAe)1I_>Dw(0r;RtB?|*K)iD6 zH^PhePdzu%H@~? zx%+Qz#7LYm01=rs5fK#Q`=HCm8ztx^82}pv2Z#S?06&pGuCzEw=qxW*;12MZ z25XMu&Q~y&U$LpI=;0d^gt!_@rF^rj3dWG)*saOvn%ItM*O|zbXYOaxa&uH7Mu#!O z?cmVWY$9YN3LDbn=k7CJjc1$~jD*(9X1?;22<)qFC@uM9B8|9ryfS5|3o-GWC%=qDExOKKQ z>3Yd^i`s-hi#~{^GIRum1;dcbR`fL#-y)wUQc<}Stu$!r*@zY2(Z1cm!0JI=_>i}C zM5x1j>{Wp#ZlWpo(B)FKLHJUEB$)&IaO##KZ3nrnI{$8eHIG&>~oZj#Cc#hCaf0<$; zjN)u~w4m?aUQiW&60}$~(3>?=!z|U$<1jJ^JnBmA^G4Ho%e<8(?r}vD)ajG%U28bc z-IE%(;3A~Hf01+qg&)N4{6RqEf@gtW{|WZzasB!H#b07zg`WX_*5dy({Qj(ibK-Bx z{7u83#mT=LF2UR4|1DMC#JMR}{Xil||0^Q=DR$m8zN!8EFh;}pJyHIp1l=^fDPjCD z^~C(^d;dQ{<0jxuMc@aZH`Z@}H}!#=05{979{{>=h2iJe_@fBBiE^`A_<^zkFCKnX zJKRM0xj6V?4*-Y}0|5Uh6>gf}4C+4sT1js%<7SAziS=`Q{(%GlNW%-$zkbRe(Hg9P V40k;M01N)2f_pCzuFU`d{|ED|w^{%I literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsm b/pandas/tests/io/data/excel/times_1904.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..e884eca1e7c743198dace47a50baf24f57fc7c54 GIT binary patch literal 8260 zcmeHMgWmDJdZx0+NpO zFw}3n@m;Up-(T>3`#k6D=R9Yx^_;!;yY_nbyS3G^ut5M^06qW!fB+z6^>9}V0Kf+u z0H6fmW11?sx_H>Qcv$Gc-E3gL33@v_vAo8{uc4q@Q!6P0s? zRe`AaDmnaXyO{kHymxQ3)^KU+yScaF=o$iD~?ua0q z`~po4(lwuQF|Rn;g!vDPYK%EJG=&70Vock6`(1lhhILPAf|SVM4T$-PoPJPBuOC2} z?M=J^r%x=5JlCXGDq_|gp~a1$s87x}2qD3{quu{Tb-F%#n1PF&xN7I2Sfve>M%cXv z9F$U2eE9*|`vO6_Hj;S7(n}=M_f}Z4nNFE1N*}+O>@-o3NEB&SQI}A(?hKEQettsa zZbM;`6^aD1YdBNs{KB4q@Ujgvxn%@5#h`0#IK4(JhsC z5Iz4u)j6`7*c^6-`S)CmKt1{pEaiY^|MULYdD+;lUdZu^cu6z`NQSNUQE6Bj>Y*nd zFQ(4p33=V!(y)p;$yZxXhcg1EQu8)RZbnFH?2J99>V;X^O_%xhIzf(3>GgH( zr5q}(vfQOv;THFuS`QR6-cw%q3TS2a>adDaAVr6jdpKSl1(;Ne?+v_wcS;g;6!(7X z3yuaCETB`?uR#)YBRTB=JtB?hAfX4~V|Y6W{ShaRT-_b5TwNW%r?bCe1_Pbb&|dz# zONoxAdZ#e3ndmZH_)&%z75TJ>5bKWqHW_|jHP0+3M9lwik%71IH={{)AuQK0IAXBJ z>*N~g3Kr#28(%>b1-1{R6G8^Zb?{&i3*TzDlsQ-#hmvAvYrA-th%_BW*>EGNmp==1 zeI3coK^LBr$EtYPFcv2MU`W_g^Z{&ysUMA*f$G&j)DzvE(%|a4I+k=E+qJ6(h1p}= zm%(uG=m8ypsw@c{FuE}g;X}6FKE~f~I=9t?4>`8wporHPi`AKnu0egnJXpeemB9f# zNwBro&)?}ImObRp`gz{Z?(+{EB5YUtdL|apGyN-M7A9M}5702!#sUDy&@IrA`4KF+ zx|*)D!oSq_#IKTRyV%f>8ZoBi!a5Gl98=3_xkAzHtA%WT^3rV8B1k$N}xb& z;g-DL@Gqn_p$Iv#m!Fu4l}xt9iR>HwwjSeYAW2#5Q>cw=)vi4a)53Q=p(mcLP3lk{ zz$4+*U?Bw&A^jHn7KZK9-{WeDJ)%YNxp>9{WeV?KSJqY&w{Sh_maf$`I_9w$>h*34 zoJ^A0c4nY>qLzQhd_2R>A?pr@WQGB+_cMF062yB^sO{Zk?)45X2v$UT*+{iAvST^y zvV(<6>7a^VG+km?XHJ^cD6pwtEk=j+l}!lYNkMNW(Zy|JQ1Kp}0n!}D4R>+Qh%#Zs zLg!ovPJWj6mYb>}1RUZK$Rz6i$;r*echKuHH8hS`OY^?Z4pSojfZyv?N_JfNM$<@} zSwZT7CSGc)ki$^y9i7DhV9;J-Ac*dnztoE>+nnhbl1rB5jew`J>8pp_u@yb()EuNA z;I;cWUVS2A%y00Ss+#+Y>=tPf&IHG+dEeT6xWc>g{HlsQl!V&MqqF{4=!>HAfP)>j&7(Ihq0gijOMUaO(^-E~?Qt=ns6r>@D4G?(2HE zJGj`vzON|agSQCVg@K{V%}Y$}JG*)`tVy{LDyBij%lfWs| z>DlllDcB$tuV05dMwHwkeQ%^zSpy>a3{qZFLu9*#G~3*-tD|VvU+ht&o-jS9bDRkH z*Yz=^eCu;8E&%YE;XB#?9qb%yUE)h#T zbw%Q#)hD$m#zLx^foJc`5|WwbqgIVx$&FTTypeN-f}F9~<%Bjr)@zRD$(d&>xN{^@ zQ_V&OU3TF*MV>vpp7}CfY_52U`93id1%ziflLYqp+ES?i}8ks6lCb)d$PN7S*E`89ro}X!`Ma#&aaqaflOF@&} zNqWQDynT|uLbzG}M~6p6x^UdU&$M0-JRjOjHabpBD&}ETfxp<)@g1oR_Q>oTu6wvt zo0R(GjCT=UV>zpo2)8IMnUqL_Hene1`jJ{VI@it?GcN@4`+JfuHjf#Vo_b@c(G8NV zkIZDxyYToC(z(|%Jt@cVE!Ana#dvZguL4PVjnJ>9hn@V^j2ecqH50tb+HtJ7R_Ost;TH_N4K&g9V{Z5VZcX1mk2Uj!dKq@FpVmO4yK06xP>Mv$*R z-;-wM$qZFIcI!3^S?uH`;c2)@T^YTtf|I3%|G7YnMJH^1j=I94XH!zR!gyp?#wqhu zC41wR%DU!M6$mjyQaB4BJ5t^cvgyLij%pRY) zaceEw?MfbVu|3B(O>XGJ(+!zR|DMIsfUNg4Yx`Yqk7EjgI_x|f3uoSj-abnZtu614 znM=-dNpq7WX%x1nC;X_dl~K|DlU)X7P4iaz z#S`*xPfd&ICy04`Ofy{Qe6PR1Mw@9uxy6hPx8}Cwy z!`ThW-o7A*@mT>lQuY&Bt!^?gX5V0lgc5(9$|gLc(5`P*ur%K{y*bua-1+@Qqqkb))u*>jpz?W;N|JDX?KVK;Pl zN3jVAza*;RK;FrzbE34!4FrY-`I1tCH58?``rC`#SQoU1?Uf+lKy;EDj?`B&uFb#>y|uB36fpiMdtgR4@qWD;r^El7GH{AzMT{$j|PWTLmie)12PtV1u|yq0uf8aZRR9c+;*r#Q99`*Zn|y|h<#I}&PmeXy)S2fy%mnV zo*Vz^p3zG)MZtd5eAi4xSh+LmvqMlqwT-YG zt)$y~X+!q(kiJv|yj*LO|1J&@!8>2dVB1lNn?)&-r7A)lx&vBUc%fWa4t zaqA&rju;l>Jn4@YT_+I+vl*htY0EyF!{sU~Q_` z9u0R(JOF_954eBdnZWFAY&>8>zgNFAwq*U;s99m4pU@#VX3<)GAIffwxjE*?o$Mks z9L3*bB`~1Uzl)HmlB7|=+zSi=fnVg%ImFos;Z1wXl*MZA&h=pILl;E#qrAjUuFm~m zthU(kJFCwWqz+%ONnQ@DY^P0|Z<6W4T%h;t2j|L5ck?XmjRrZc1#Ni~x$?f@s1csX z81YE|SY^SUk-(B1mJ~G!rp-gqk?QLQ@1Bx64=SI}qszr;cFh)WAmGmXvLrB1VouS^Qb@O(qwCz#SNl=$s# zU+F&PDBq~9d_R6ELz6Lssfu}8U!Ax1u!|(b2UAmyqNh_2YyaD}>arm2cnCAD<=q*2 zrMn51dWv)v6{VGGmuJ4CuJ?@=UF~iQ{Pv-G=CND5+jIxC}43{BW zvG%c3--_yP)=T<)QwFtuDjItJ8rD!#wd9^}O!88clrt1_1F6RIIQ=@qjQ~cPtAE{b zm`baV8==2Apn*(|R%GtGT5G$zy1|63UEOW|JY4vn5QC0x-=ul<4q;H&BF-Kq3?uI1I^)ONP^>c%rGpoAu+_(QvnQ?k9;t;@pbEt;y`D6D?n zHlC^0lCjelGku_VTxU3knAe)m0*J*W?lgFpXc<~WW|((^Nv~$Ddv5xr{qNg7b#zIk&koxhB4*Euu6w`fSqvW`UONC6|`3=w8RBbc}-r_w$$t zJGq6^mUG5aN%WZhx~4ANqMnXI!;u`V_5go`qoteM|ImmYvOh~^(yi~ZS11DO7)(6R zAT=*d98_YZB6(P0g^}cCrlw8deMfEjXIpOWF{uh!qp~yQ%BhnDI>{lHtthV?YetmZ=7QW z3iR(3GxK+uy*e9kd+(TDg?m_XjXFo)}@FhrBr z%OiJjw}&o_n4CcBA@WCYvP*PH%tWpp4_s zEP5#@yW%&~#h~!5Y3Lw(rj}D-P{_^X#Nm|-{OL3qWP9=nbbL6guKgR5>?7IKK=JL< z!qrDF29teH2T)-2O#Fdg3`_yE^7QZLwSUdmU-iG7-fFA=)4@LvlKwLMUKgN);)i3U zpAG*huK#E_h$iI!ztsM-pPv=iA3PEKqQCxZ{IeYQw=oNP&w&1kU&OheJ^ZYJ{q14* z4-Y>pVm~|hc^mS#gJOz5e&IixlAld~rgVRsrc?c=>%S1ZpS}Dur~d5&J$q=L_)CWU l+5Det;E(2C>3=Z)b4Aov$3f3706>Vo0?|pljOqK^{{f-?9eV%( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsx b/pandas/tests/io/data/excel/times_1904.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1a13468e59d1c51d7414a2a33e35994207be491f GIT binary patch literal 8244 zcmeHM2RmF_7aqMc7(_RM=)I2-y%RlJ2%`5I6P+Mhln99$Bs$ST5WPqAUPq4_(MSI# zw|w{B-2454@9y)=+0S`qul1aL_WSO&_PaDy(a=EvOaL|j0009lzGc{&q5uH#=l}pI z02|d<*2US=%GuLQ7v^f^@qpXM$&n!k9hLPB02Mj@-}Zm}1xk|oG}?KA@~29dva4Jw zbCpuqLc1Z|BrtdVt20CmO z+I8;dDXL8#tRDr&A6lX=@P@}X63!36F&mmh=+p(NDhs2Iij2DA9XOi(O#GfSvp$bL z(mbyhCbKo;GR~9=28WDH0W=+=tI13God^9p?vmxI^?h$+n7?3NBBkkoQ$MQDjEjGy zaf{K3bMakwrrbR-5%!RP{GQ&AqAmA!EouCm^M z#yIxkj|JJPz3|%#qx*6dz(MZ&R5+yXXu`>%uY--c)D0Ib5M?OiGCI4Z;pMd=Z?;|x zrUgzW=WP<)eI};1GxD0O$HT&Ay3DV~5qx|`rKe*nW>;>J=`PL)GkfINawwP9OnT+V zrIFsF%_vBM;2%`%W`1)VXjCn@-FrmtW<`NFu>+kkH6(U?lBRy{AD2O6Ke?scG2f=XZ zKkP@tw%9GD3sJ-%CE3~9F5bl>O!XkGlT7U4%mj(9BUqU!BXaW?<&Nq`!UgRIc)j@T zJ%(sMBQeugy&81>RA;9&q`J12A(hQ~?WRt4_5?F41V%r6NQt8?K>z~`Z;XLC5Uuhj z*ax33t<_-z4z0Q8_-i!9s&qv{P`_|bhKL?T`arfskF}iF4SIV^o-!Ao*_vk$IQx$F ztXF%x$KlA8{yStACR(}&u`VB(!+L5qEmk5B3!MJSMqP2pg0b*`QH-32ECe^jC4xQzpg)@A0d&xN^Lm zo;3A-IEf?nxey3FxnQx zv#s~v^2Jg^5Hi@lBsZ*45qc4>f$eZgg+E)9*sj=zMZluQKnTJ^_``b_25nQDF*O7p z-#X`T_KE{S3Ll9oYN`mDxtw;1*XS6Wu$c|?_8n zO`qN8mF=C9y=H!>wN?`AdixzP+OyQMp=w1$`%?III|G@*VI?Pjs_>xpoH(OF(5KHT zG1`o8tU_^53wqM=uH+3t#ru@{2vZDKO!%AuY5b6x_9YLD_#&^0m8>q5KGZXamfwBK z(bdYY-}^c_>^Z)M`lBa1vz$xM{ zxW}s5tE|4hRX7irse zc6BKj6W`*sl8&K0^{x)K))vqio~5SXzDpMe_udmsqacfXO}mpuj=vw@-9v;`dtDC+ zCMJ*w7ZVUe#7ljxrRK=V3oxWnE9#@v3Xjx|XW{Aas{5K4qMwZQS(`P6pV%&Sf2c-L z4b1-v{J!J^p7k2SWb>f5mZVV+-YrKyZhT4UFdq1i`(x<)t(910da*+NBe(wr?4Gt( zPF6g>-1&aYWyi4JnOX$cM6m$@`rhN5PPC*T|!s=*{aiFQ6} z)!>cPaP`JJDHkZn35`jLXLIzU`f#3ay}8QlP1bgpg_fYc~Y0~RzkueDsXpYKe~8F31zD|TEN(CBHgFAl5yz-bED28 zlZDH;myX>@bnw>259`Jj zilLuBp_zkI&1^B=mUwnR4Hz@iLx#a4!@c+tWG-K@HF`y%vRmuX*xeT z-J`==OhY|=;1g#pmt(w&y}OTpVvWDzpmDzJC-8(VZvj6&WG86jxe9c#l+PM$5{>9; z;0Q93Y1z)b$Sk`kyv0UkaHfz7YoQ%5jE;VT;=J*J2Jch>Mq+9bV=_nA6_Nd6R`k?l zmXfTc5LkIw$>mX_QJ; zpUI;{SkY($O?IcPzjE8Vkx%QTnNql)R}4H`T?@kX9W9*jBC>of zYw@8S%iE`OS1;c}?Q80*0H4aCD00_{PW`e^^A>u=<1$re#>G_Q_-s#%)0`>&M4N?f zr5llM(V>r%7{JKN@TQp{!Gyxds3Kk{}u6Q=xiH`4P_U0OP)qGEr6mRXr1YnQwL zl}s7|eHH2_Zm+^fL)6E56(qtt_tdRP%LH76+sqBa(nuXn>j-=$G_eb7%GI1~UnyE) zm(Q8t-nvTJemD_56BK(SGGt*z5VGVlin_DWkc=O}q+eE*LF~a{0bqW4kickhmzE}5 zk~$KK|7|iG_kvOr=M)@Hd_?haW^_V0&d;6u#XzH`fGFg-VZw5vvsaV`|8$!=-90Z} zcu9={bjn!saaG-+OXu5545v1q0k5rOI-yL9EX;$jy=X zGDJ;IZ0mDd(fpl=r^kX=Wm{EH8MdeO<}VU4@xMK~JcIY)Z{4ZyIm_3V6n;HuOt-Ps zl5F68P3e%3DtkVlHH4k%pO=?9(&|8X;z9QzxnkY>lgZsfE#np;(m8I9XOXX>UQMSk zpG)oTLl^bY_aq%*VGKn)(`{&WA=O7y5pBJMGY>~{99y;ZYbi_>S_xo=n;Mg0oc=FV zPSd+G$!)IPO}U$z&Ly%cRPlUkygBBB3c+^w@(Q@)SYl(`VqksiP1q|}7mFKQK5M)d zDl;iKM2p(Q_4pSfsEKg#Nws+d-am!ne7M4HCo;3wM@BG$-$Jn`%+czX82nbx$t8^! zcr9sn(|Yh2l?kj4)Z|i5VD}n->pjvW{*a_NKzhovH}C2+g8;0XG12IU({wn$@--6z z={?qRJyN6YPf~Le#o^7V)MS2f0SXNG(#WhZ12iNlV^ZIbwD-$s3{0{0SUy^kDwGC* zy`qS^#wzxP@BKD=;%}^O-8~fx>n`?+^6Ewj?2R1c(lnyQPtCz>stTd%cO`FUF=82n=UB+CGQTMiK=820cnx| z?Av&qtR;^i&KEdnDGRb>b`7k|LZbQasps<@p7S^)-p#Q9fLp)f{m0S7!`8~m(}U-i z?w6D;NpCi4mKW&Hb3`8lw^TiVG8v+7j`*`CIr9ugadum9^(lSc-Gfw$P$;492Ze&@ zGjb{Ip4;$XP5VH~Vl{W?x={|G3;cRf-U6pLmjM~8O*WiPsxt-2gIBjjuKSj^Q>INf ziF7=ip$~2Q=iZm@=9$|X^fLLM`dR0;q@5rOE-LdlXyKl+u>Wke%BU9_ zjNVmkIon;o^7!?v>`Gq#e8&fTm`0sAt==d_f->9i*9=nE6q|Klzqn>@N z&RcWqBnW+isxC#+-Jy$i@O@i(i5qh)ln&EeYlcceE8bjJjWpqa6)Ql_JbV>P`8E(x;*R5L$!^STzbk z(|e`E+VvgsJ?w6zM^e6+%DK|sp=J3V@$T9XMOYGp3~oAMLN=Dzq8r!F(0wY5NdAq; z?Ck?A!x(#e-hPKL&R9~v&Du0h%xD^k!d&X>(5+b8*vaojwRb;?Jo&B&YMCk;c%9=> z_n~spJ>QTZi=U7s40Qvc!seSQN-Y^kLvizuM-BtUQ+@;FKXynU6C)*=M=q9{?k=t# zJeDr*R=*V!{#9a-?Dk8XS8eA7b;2=jA`(x9uV!enccr@NI)ZJlT54a-IkgJ2DbLLX zy39C#klp7y&AeDim#sXz{{RLVQeb_Z%Zi3CR?4g8WVt6@=qOk*YNQJp^K_O1MO#PbCW~$NY81Ni#S4SE25D8jup)d zL8i@}(hqLlaH?IbMpkr*lW z0RId}b63}Y(TIfMZ!JAh`iJfnwug2?k3UZ>HZP7JTwl4WLf?P~Jt|9FghQ zQZYhPdDbQVGM~>Me~Fty1+w>s+leN7?0xdiI(8Ryb#tacWdV{85I=q{vH0HFlD|>* zQ#xrJKIz`mDq2LIz*oPOmuGM9Pj62#lRoE6FUsPUSoWXkq?Y~uY2Yw>riMkfpU2hc z)b5Ql?8P(@czfb0bZjuQw(UEDXq0HOuUP)9aP@IUf0Ex>-#I;UC4S8bP*Ay$($hcJ zXMbO>-}`^4)@rK$)xlpYNxvKZ=u?oS_)}f!XT!g$>wh-1}kUt!xlKlD#|2>ub zZ2EIb_lIc|*?*`0w*>EJFMo}xfA|0Z?jiHUzeU)e%@_W%2maZ7f$C4@zwL;csu;-C R1psi7R}eCYXVU(7`VUT^90~vc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/trailing_blanks.ods b/pandas/tests/io/data/excel/trailing_blanks.ods new file mode 100644 index 0000000000000000000000000000000000000000..a56fbfe452387400896ab49e0bead05f44d0e779 GIT binary patch literal 3060 zcmZ`*2{@Ep8y<-mOVVIk(1I*klT5N_9mCi|mT3$|X2{4E(MYl;DO)sT&yp?M=(l8z z>`RzLV@nv{Fv8%Q`v33h|NFlGyw|zj_dUCIF$Jy5b*DI8W8RQiIQRb z{OD|qjzMEkJ^@}RgqN2G8j0{hdtzmMvCcA{2pk$G(30Q0W-I98nMg(fC*!nA1mQ}0z6PS8Gnq&JF8CYgd9(Uz=Ap(XJDcz zL?VIB)Z48eX|jB%40D80rXB9#^vC(0w9HkZ4-Fj|1xnXDSuN{% zQ5L+z-XAY6Y?cadadpMCmbYpdj1vUh`&xGAP5h$djonC+3N(xH$uR*<80&O)fwUT% zoKRH}pVH(I?9w`+asKI(Zg8ph)`tqrWtFx)C~pw$S2rm^;u~IG#!Od2l$jk6mTz@P?{pemgp=FqN}tybUO~OHgfn(+1~s`uOs^nX@KU zwa$do1vvIzE;9fcDYxmD`PG1P{gBHY)=LW~_~``5w|6*ey(bybNjBz_Q+)DMpffsg z-oiY*EE-FcBm8bpzqztomc*2QFP>MJ9~g-HN}Ls}X7v@Q9g?E*T)0ghEa45x<9yUy z$K@xjaME^dBJHT;)qxts1E<;vGV9g=e$*rV@bM8lJ8r*I3xHei+4&)jBTd0gFD+Gh zo)KaSwnID3^M=Lkk}KgF(yz)CnMWE`M`1<)sX9)sjDC97dPsrcBfosl4D*HVo-wp{ zjV>6t1z%M%?4vdx7}#ojuz#=Zm3#73;nX6$jyhNRkA_ij)*X`|BJ1vLunT}%bRa@HJjZ{EZ1>%K}2lduY zb(s*W+F760m)8+%XB8;wl7n)zZ%ca*iYylh5NmXKF_D2cEh5 z&gxXpowq;k-+bnD0QnWbWG0!mQ-d(5gMf*$FXaI4ySA8_|T zSh>bGKM-EQKdOtgG44QJQGS26hm^){cj#vE?vm7W6KqIG;~@7}ihkt9V~`tM!fWjE z)RL1D*lDq%d_I(s#6x(a@LuFSfwZZYWWYSlJTM@9lXFNYt2UP=O&&bk1MFFiyc7L4xq z^^?IT=y^zwJEqi0+N+fTj-&ljBv$r7;liQcBoV#;`+ z8fKr4lrBH-%9Dp5W=TwyGj*q{Ylq}SU4$vyj+V!;Sk^{CZ2>YB*DT=Muv@*#Z^p1q zTN!l?vDwv*Et)1IjK|KHp>UoaHg>1p+#s!>qm3WE;@SIdsOM0tfmr-TR@P;`@*OPq z6;f+nsmf`*OpumZ88QysF zvqlBE0`z?A#9>8C!eYnTRi)vGj`(L0P9uiddDIA}CDG@Z-UhnI;z3~RiK^&2JKf!v zB1QO1O;d^Uk=X(E#)E&QdpY1Qhb%8{(bO8Fc1e(|(H_LE^9Lob?o z^IOVtbiiIKFDxulft4=rK;vTH4=LEg5=wX&D<|fN(bFe$(pOtc)Y)HY=1=>hIn*t6&D!cY1#pF_|nsJlf2UK$~~5? z5>_P~{eq>BYuj>-fRAqgYeX*Bl1D~2QG(Sh84uUTHUuoEJI$*O^-WZTxg=JkA`kJ1 zni9COQn&Q z!~_~?nb1Eg!t_MEV!zQqt;Gmh+R7gqewTKw=%& z_o&?NW%cnA&l*+REyDTjidLsRU9tP@_tf52a7LKXLvA`l0$J#C(fzw${y@u*IZma& z->an1P;_U`&7;Hp{lJwDzK9KSs^?dsT_(I=nW z7K6Z|T~N56^`$6Y)16tKQ)e^$QX$QE6G-OX0 z+(%lY{ZoX9;bhE%oKAhS`rXYpXq7oQC7HN7y5m2)J(j<~QS?OD`k{zKlm_l>_9x2) z3>8!gCtn%FOT5g~I_KqPKlLfmrT*#3d^sXUW2c6xH_O3D<#L?lkS&Z0H(=Ux+-nHu z&P{LyHe4NaQr+V;+eaN9gO+^e+g{((kXNg;5^Y;~{TU46ln@kSZ*uDjurOQgrIais zV2`k6ivv&Qil@?u`Qq9e#NE#hNvB)56|c?37CVYmx1|(^aPc9^jRqFzTmEDMh1r~F z*}Yl(uSc`(A8$NO;B}0QoiT&TrbN|-JKZV`50?paNmniKRWB&eWsI$@JzLC^~}{6JR@2&{0;tb#Y2d`Nj5$zu3og zK>DYvWSjxu@Ba12=&$ydn}tJvp#Hx@{e`e#eE#yQzvF(tLx17EGW_m;-Kf6q literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/trailing_blanks.xls b/pandas/tests/io/data/excel/trailing_blanks.xls new file mode 100644 index 0000000000000000000000000000000000000000..32aeb3fe36b056985c979bccece3f0d9667d55fd GIT binary patch literal 25600 zcmeHQ3tUav_uu#Sy3xBtLbr#Mo_czv6!It~@9{`-dnmPOVb7m+s6WY9Tq?kX-D5V$T9VNv|+0_Q+drT;}5(B{EFYHEUfB%cZNFp5F;W=6QyNMu5{@MtqHUn$MvJ?t)h-r)KL$P9CCtc zbC;s@AYI5WP@hNEqN#I9q82BBM38tAOQsSb9RKIk=uTolMlXq^x-3cgBnae5Of!b$KoeKJ;-fWFuKwru+PQ2B9)CCYF%3)Mi=!CM1vR;Vr64x?PzW7+GnDZ__T|9XYlYH z7}q9*38qbAR_H>s;D(blc6Md9hf)`HYY?xCbPW~?Rn3!h$EQa)GVji(Bnfw-Woarl zUbS+vf@|SUe&J4C)E$XE>4puPs!e2^BVDS#4{;=L<1-n704!cKrHP%dZxcfg;y~b8 zCSfRU-N;|ulw#Y|u9ld3EyUtFMTTsPgLnU#4>i zAG=2_B78;l zk0`+TPs?+P;@eJUyv{11A6Gy>p@4o=0iEuzEMHolwleY@pm1RJL)+B`L&ME&H@79~ z^tc#G=uuJUEY4XFby^+`@F0{O{s|SRB(_g`1$0LR^zL%$;LT||%b&Cn(e#S65tLq959RU~VbKp8}^H#W*@* zxL>X8vP6wDF5R}lZmD-e*w>gEI@@yk{{vUUNRz;=R{Ja=vaYEbm6(6250zepq@SO zJkf(EHCMI)R%ulOdYab?4k$>gIH|5i1?`>|CG z&;hlo0p3ZiYJj(Ds~X_F*Qy5SEL+vUp|uShTid{?wGEtG+n{%A8}yNCfcGEM32;(* z{aIe42UJm$cnx*~F0av@;Ejp%W>JYu#Gr3X>=}%ev<=Ai z0%Gk(qrqOtOlQP5rbtvF(~E+F0vV%W(!{+$8emgNY&1-o?34-OcSbMJ4y06@${WqP zANxWBb%}eG=>_T?yT^2DsCO}qz8mGaTq|hR3#Dc6+y~DI<*9Z?fltykF^vC6y)m!=ZuPVg27h{bfO%jeM>+pf|P@FZV8B+ zyK)d4DG(^g*9lIG73%~8nHK0oIS7P%&4W|~DF=ZS!RA5S+?9iLl>&i+e4XITU$IUw zSZ;w%l!L$$X!9TyLCQfO-fSMk&0RT&trQ3p7$YBGM&pu=?MX z@QD-T{v;Cq2}lCB5@ltwl@LDc>F2Kj8-vDtXZvT;+whMr7QO4AlZJ^KBwd^V=Y#zVQK zSpXZ47Fb$E(6O>#<+EweW+TVatd+1~T3SWWpH~jZXVZbrMvkR*Q^JO6X%#{D?%k8m z#*EEIj-@#$VZ*eviXhRmt@7D)WV4ZDX)a3GFfFYjsQAun`E1PDY~)y)yAn1`ORESf zt34*4O($gIrQFi^z{aZumgeTZdar`9>C9#$$I@(+uwh!7oBR928S;D6h0R8erP(QA z!?ZLv_ww>``D`rMY~)y)qY^euOLKD<)kMi>!)LRRV`;8R*f1^4%{}|#Gx=;R*=*!k znuiiLOiOcf-&?s_9vdBE1)(#MJU9EZo|~OTS_+?)gbWat(xCXDgw*(~)FIYQpkPsK zP)iml(~1armD)o-s7(_n*bf`DD+`oqGlbv@2K7{DOWL&w6s&~}YR>{?S_pLcJdqD- z+XM=>!3K3=fimrZkO|q<@bfr2HlL3^-3nWIn0%mab)KzYOtp030{VpMNuIzOxD*5HlBh>K$HmLW5jTJx67c3AAR<08IX)sOHZdWK_pLM_*HqM& z(3_beNiu9{ih!wCEJ=hZ7_Q#nvll^e5lzeh7qut`ngrfU!9`p};Xq|(i6>RkmMX31 z>cgH7cn1;?BZ!RS2MFThvv_GNx!Q}`fLt*!8H^-xFf|N-vv~N&luEAoh^4|+FHMP5 zbV)1~T@p)0m)65lu|v9=D2vCJ7DYo1x*h;3nF0SIKmbCpu4B3;)U{*P)r7r~O?CNX z0z3&ON!mE2t3w-GRvUE@585QdPBI}i_l+d;BrTlNwV;JFtA&;XM8*K}u2nSoI14#;O<-@U0pvFHD1epgq9|hv1fIVQqARc`j z>_+p_hQ7g~P+(#r7-}DBTuw)kItc`&C9+Hv*Wtxjz)Xy#2fNWAZW%0ym=>E5&4*Zl zC8(MKXm&6Ngti|Gf<}XY^t%s)(}Ejjz^Lgo9GapQNJcxt^;|s&G8o|C``F-ya=>Z- zrvO~!yYplnbOgcOnc(09+2BTUz-d3I0Gys0a4B#PCOG&)mP1f8mIIFdkRCB9wXEm1 zA)YXhWJtoyWH5zf=^^8qK!1jSW@mye;-EL({vvqWARaV)CGl_G{su5X0>2a#n-C|M z)_DI9V+qp%{1R?eOn|#z07K670BRCzl=vdC_rZmAO;DT;@do+Ha6xjCFaq2Q%Q160 zfny#Hz}QnE*c3VzI-;TcDZHW)_csUL<1`vPm{dA}+tFQGRt^D{!@<&pl!HL1M(uOp z6uh(Ke(8z#%kab`K^AYSGzPZ~+%Jfc1VRz$pV;%_^0?Tg;nH@Kqoqq?mqwSwE{!g& z=e8xi2rQ+ABqvcLK9|L`9YBXrI*=W6rpreUDK+LuXp)33i6x;+VoB%{I?Ueb8lXEH zX%RFuU?4)F&q7Jyk19h`?SrN|DmEz|wADkpm6|>b&nU2tBp5i7yGov6-;q>UMQ${7VzJPic`w_ag;u4yeA@KpnL&io36VPSR?$uXXP;OiB)MJ5z z5DZ!DwdA=~e27yTUdabNi=kJ4;(KYZjFB+{K~j%xybfd=gC}ja^gEkfun~M0;~KVG31-Xz zonPSaAQR&Gn;!YQ@udg8vj9>A_R4 zaYc6}mxK;eQ#S!Ak!(p`JkUP}Y9gIrwaFNMCj=xWB*D5;_^5d^1+ynPr_6}ozwXNL z5?6zP4TJw)^weo+PJth<>x+e#R$tn=;QY^9ZA{DmQLtz9UkyPwNBQURP2(MF+#9~{ z^H^AHp>@1uOU|C5hu--+*wmW`PRf1g9a3{(Oy;*8JM@jn)!*{I=(P8Or)TH-e4qL9-tF=6XYQ{qSiQh&a>eiqB9kc}e4iWNsz{rV z1{x8;#86saxwrTHdLH!T1;c>*N5X}I_}MPbc+6OHeR_$j&eGo(7caVzI^t$Yhpuy6 za@Pj9>`Endc4;wI=J(9&?}c2_Y;&5I@#F6edwsqfT=A)H!>@ePZTmD!9tHSbiK(08 zb1rRR`QjhO2!8g|Ef%E2{x<)Wh{t`*;hTu3|Q_;jFpg7=1?*e6qW zejqDNwXPh!&z# z5!|MPZ{Sy#x?}Fz8t=%{b_1stTse69BMHdq*#FWGH@Dxr^Yrl|?`scDsb&g4n+1-0%=7LJ?(~ymeBGsX&t9Z-C_lE!_~DSBXOBp=u3o$N!14S1+s7TM zEq`~sKlzl+be|Q0W1rt@uzxhL-odsFx2JsJhu@rW<2tzL!xn_#-M|o=?m`a~`{npy ziRRu0XWKtd8~3vK$h3Mxvvq3q?YbR_w+;(?GI&Upv(sx~|FAhp4p}2zG}`<)&_(0x z`W<#1f8ca{f72q8-#`1yGQCqbo$W`wRU5R{Z*+eUw!3q% z(Bak4-63j~GkM*DPyM5Je)iRyo#N7O9@4viV0v7BOr7rM2@W}PulyES=V@0R{?&!K zy@L<$TzuyTlkmwGHk%(GbmjJ;{`O}J79KL4Ga+|hZulbqp^wVj6onp)_iVrCDnD{i zL(ZG8PF3wqxVG_EyJbrpi>gc`O|K*$G2Iba)`x`pEw{<^={4+42VKkM*~_vx-5&JQ z_UD~Z|0(FaFyd&Xef)*A5B4t-@9v5*w+{H~E!TFcV949IrEAv@xcyn{hiY$r;=?n& z=70I;%TI~XE|YV7M1r#?#t6z^?da+H^nvKl4-X%v)qVa@um5}aH|rCMSGKqPa{qex z*}p!o(cin&_}ASjf4$0|dnmm4^Xq)yw69-Op1pG8d`82NjKy=)Ly7|(7pnFC^D*aK z&HrCENK&*$VNT&d)CsW3>c>-Z0^I%B=@Wg2I^eF{&0^zXN^^vEA2t_A6n%3>DH z+QaFxuUMyT!=72S4{}0+f3eOOK5@SHi1=P7eS-PLMw>53EpyryX!4VhM$fXFBhMYt zE?a%er)z0J+-99##_^8DdrIT@k35EK*V*rPdGW`=>fdZV#ao!~bT4UpiteCEdlKr5 z&9zTx?9_Xg8sy~uAYf#s)#l6nmgmL3Nwo=RU;bdFe(=NzgWn{j+y8Ob+A(9;v$ZAj zbocmX=scb?fAcjf^YX4+PbR&#E(@BrB6yqSa-;TzfsZOnAG9~-jd;?z$BTt?x27)5 zi+iKC{7%td+7~vRz8<2!#`4s*4e!W^b3vy{HaEB)eRRTmS7gSd@ll%t$IQ9^wA8Hg zs@N%uKlUp#`EI9S*@MS_>1pb28s~L=h%e7F|7FD3DL#I4_V26j&^BiMsPLoPtQs;8 z`xh=%D_jyXw{SyBl-+8B@I9Z%nS{G%bVpy?I#mC~%&A;`-mx?Tm*KlQwW*9yZ}{p! zpk=}I%|)iKQ@)#ZC9VCbOSNbF7RF4hJ*#fEAl7W~%iR@PGj0V|-R0|V>9|{3ljb~TJ>>*=)1y8v%DvUlsq02pSyopVek5| z%MZd99o1{Fh>!a!rlUnfo=twQ`rY}bUQGX6lsfk4>&a(d2yQ2L@-AMf?^71+;=XA=oRgBiUIBZLL=DNvqejAh0dsR{VhT*+e4D_rH za>(~{3wPOYGT73)ha7U> z-HuZ^)y0b6kQ6*?()_;3MIKeowXrKQ3=_PxXPDiu%{1te@#~GfV_f_n7o0u#)cN>x z`;pH-^N$HTR^PLjI%{{28h+xtm6sy>ZrFWqRXf3p_eO3DwmIaSoBlR`(MFr>g{SA&J+Csav`@?6mtL;k z=~lapmoh6f=s+*4H@1$$rp^1?som~*cRj-gzUVOYj$yFB$055Yw~)Ht8>5rnkB(A% zJpG#}*Sv_Vd6u2j_nqH1==i?aF`i+1arv)5`Ck?ex>*sod*kR))k)Dk;+76H8&~kO zdij|P(<7$-92>PbWMqTz@&PB|{jlw8M{B#LMEM3*7yI>h&3l?XzRKirwa0$p#|6~~ zJFigtKH!zrz>7csbK~N>T~7-W{efFRWuM==H`Z=1ge&IobjLTg%ts(SX`v{Lr+DKe zCb-Q-SCfo}g>!j{iMNUj*4f@)R(YUGE#rx&u9vUPh?lQ)4?6Okmqwi38hd*4cjd;B zZV!6I?CZ$u{bJ*+Z5L*oe7V{$%yf8WO`mCprz~8Z=>4i)^txeo!L~MGcFsMv2=%%R zU7u6AeAK7(gFSbx8S}(y*Y5s5pROF-kevF%+icE)pKOY!8qBXIW}8B{iH?5m*)cQP z#lm#bml6GE*DThs+gVmoYn{C>Nd5AJqN37=lCE>OPB2$iG^>Sbj3j4{8#+uHfIZD? zYV8X8WK89#y1YDUAxU>iXbMhz_8Fr`sq5~0I1 zaN`I>avEe3^u7u=T9a%DVX_&a1zaF2fwq!C!+Kbm{_fR4>buvrTob%ZB@ANpU{6>l z*bb))ArOY?TnNJSsdGa(5?FWRk-zu8y%{_;)Hj@>cc%D^vt&#sSx?;Q#35FX{Z@C z4TD8C4MPFLLPL9OKG2wV)$YasaW%kFIC?Uk32svbqyuJvVkA& zfiR)2b%1Let|s(P1HKGJ1e6JDb)v5GsiRe6EtJid+<^f4Bb`*=?g^KZ9qJB zLjBe*-u~`WNQ^=LIv#Q8cC^M7Xon%gdK;yGKlnw99t;`#H3hN}xSmXFKteTReR|8IW;3U?pT+ zAJ`6=xgG$^0#u@7cLNFw35bb_{o4hY;5r4aN6;G_hz^ht5|NM z!P=^%(tt_>Dh;SKpwfU!11b%uG@#OeN&_kls5GF`!2drDU_^`oGsfH)f8&M)+~9yQ zHNMlwSRePxU@(m_JI3$0JA~R_0l)Z1c<`M+#`)qMEg0+LdI0_%8T`8`_;*6^?_%Jd zZTwR?7@Xt(8Ni2MOUPD`ts&b$?h4r!GG4cXUwg>iAv-{JgzN+vzA>P#e}+0(PnG_4 z4TQmmB=~I%A3hGmzs7+d1hyub5)En>3rc`?#OL9Wkn_}$45ELekK+F?0;ywxyAVG8 zno7wLCta}xCHP+eB>Yi^7Br!%EQftvm^8>p>a!aB;uHIh-^E2yG9jhu6Ir^p?9mSR r<2{7;PfPh*APg>$LDC=tfE#|2I|DxQ!4YVkJU#z~{^QS9B0raVTAucC7sfS{PHgQLU5 zE0Mr>Hof#%m1O>;iTc)yYe!tAzSJq=)eL7X=hBd+nl{a>Nc;F~G|H7f_0v}qg@zXU zHg{=4HPHEWdW6p?%=AUL2$q+kH}%m{(C17+k@&YS(Mu3ZsGPPs#+44ls3}`Q3y;() z)-k(ppB|AX)?(Nyn00l4^6@@w%ypaVO>7xbM~&C4d%9P~_otjKCCW)LXxNr-;uNVc zvJoTse?TDbO43$?;jVWw@sE))#dvlFh2Q+>Xj!t!y&Yi|H$ZadIL6jU#nO$1e`ZaH z?yHKmj0H!xyKcO)D$(&;=j-~AJj2izEvOBCp2QuBCetu(9WA;juB-%|^*IjvqY;zn zs(9ZqFvY-?z`zR<2>6K^aV>%EHw%t^)!0T&|~hrSG# zaXuj;Nikydc4J12%n}{}(UHK|rd!^9RZj#Kfn2a2r12)Ns{y3mGOEjB9;bF__?%-r&>fm6*Vrpd{9Y4U> zt%}_%>>l0QY1S;=lD+pyrWv+}an!R0;y2Tgm`2D(yh;MnY!&$T#k@R*>mY^0aQrBQ*DW!TnE z$hwl|!-g$Wff#A2PgAJ=z&ycXLN`-CPSP>!Bx#bZQHXTq3dZVWw@4jNwUF|a)NNFZ zjvBPr?e~?b)WX&J)UYTc6B}X`ho&N3Ja=B&$0t_DxTif?sSwU3iEFPxtU)94GOYwJ zvN}jRBk9)29yNL~NL30XnaqBUUjk59YMq>>ig!jtk3$Nz{UxLFo6a zUU-xY^+2YpAVD!<9|_>SZLxO#3+wHCm*dskG@mZo--dAa{51Is_)QZygoFTi2yh60 ztag77ptxk^oI$J%Ha z0$s=bpK|fYKWnqW7+Nz!BkC63_-!0ey%tOl!l?iwC&$3 zJLhyuo4|bMMjo#tK<_x%m7i!%e34eemZSWVuiMBu@J)OWm)ByB!Nyffv@yPR05ZCQxIoDkzt~=Q?=Wp~kKRa-q?63AKgTwtR z^}Vt2=a|5`Fbd8aeDI7vsqbtF1iCo0ejnJstpVQgf^yKJ=pn}E`+O*<;{huyCE?pr zuz>-ekc*5&x>cG3uINfE$-jh!F15F}bK<90T10+Y61jF&e(`>|2CXUTx%#c9;${4b z)TWU>EKHy;1N%FiUbdm3vd_~ID+ajNvTUy_j{flb;1P%8%k6s%PwmUD9eWqDqEXF9 z&BjT0SZd#E5V8;``BM>sQ2B@VRRH8__f8u896A*EB6?s5mBu{y^vDy7~6s1Tf zYm0a~@cJWeEl;hHfeYyH-X^7u7L=P^P6q9>K}h+DfBG%PS(42>|c@>161b`Xv#bq{?<&b0GoiU?=9 zt1od#re^y~EN#Bpz-6#RAF^uFN=zH5v(0DkW!r)T97iYng@_OaT*;cW8Vg!KkUhkN z%D$RfY2`id8hgS~Ki&p`^i~dc(S}~(I9+czRcmq%{-jXKNFTNrR^fPCy{b^bQJPFH zY*AwDD{EU>%l-Yj*XX9qUlYBag}ZsDJOd5Wen~wOj0_HD<@1%izuM5}9@r+w%5#b> zr{EsstK9`dZp?{IOj#Yop$Jz4pAY>c= zap|xFQD>DZJr8j|pD<@`4Kk!E(bBu8aKeP=9g(<3L2BM_gV4M_FSUQH5~Xi_6Xc-P z6qIANGo2Hju( z-mN7{+%8Cxn`4BwjBYBi?gSBxZMPEA{tu8##+19A=hb$dw8S;hNFLA5Cs+AmzQ|$r z4qoQXK$3Dwc}4T1I4TbTbv$b1a1Z*f`nv_iwg;cMlTjxHg_ThkXs@cx##T$7QbvZO3)5B-clgzn!ZYI>xH% zEwPz`^0uHf(I?*}7w(wkSR<)RpmQtD$Vo|q6&B-giYKnRn(RgifmUW^11>p+hB)>K ztW+Cy?Oc*~yAAlsxChT{B)cr9q;x%*^R{1&2~`ny7CWs}COWYfX6t!qmyWu9{VhmzL0>;0GZ! zgpC|OXFM)f#iVRiKJ__jvO1;E;C;s6BT`&Ub0idSfwtOXl?Vsvg5S}iz1Ib=TQA>& z0P^Ke@a2(XMr1HNNDy=8M@g0)>&TH6`~cX$dK@s@JiO&X!EXoFz(U zUv+!MdT$1gzcEnvraCK)^*4T8i-F9*>$4DXO4$yCj(t7Y0gY?l5ubF-^D-NQ;00w} zaDMLVszfGvA zbUIJ_ES6nk4^KBc5XVMSD?3R-m6k3Fr1B|3l5&n4WxS?innT59F7#b{O@b@Je2I9o zez6kvycZ1$9fE=Wv zOS%aDRz2SX*tBipymy*5ap-q->vM5hced;8I}I<7y<>{UUEZ_jF=e)jg(^FToa!_C zx>hHcGdf=Kq-A>1y7Z7yz`v>1)r8H!fWU*BnoQ$_ACbY&RX*=}5b1?e|fu$7o3l%r=cX#_lZVXZmQY+XQnp|yyHL_a_vTU!El zy{hI!W=}Z*7x3vok~ZTBY}tzlisDopA;rac`o@+%SA8EbBkJ~*celivmyj?WIa&%9 zs_N116V?5*vn7W7Tu3*7`Y3D!$~|r_>&E@0Y4|M{Hjl!7G^cr0B=J*4<5K2|s=*lYX=*DVl$Ro3#@sh!x&`?%?1K#?q!xxG1Uy5jz~nn;oE@J0%( ziB`dy2<69)?d;-d3v~V_gbLIKJ6!k7xn z8-QRhRHRW*RwY0|!qMAQBBYIhHW*z6V!B9!d5=tMC31Ld0Cb7s7WdgblmqDZ!>FsI z^8mZIlMO2@sY8PTByy=Amg~Ga)YXjYc0_fM1dvD*-r!iBOPVbURJ4Y9U_3Pp(ijvmyKB-?sUM56++wRjiSh+G3F13szq##)z=a4x@nB4M$A&tjvj-i5? zNlnYdJ?OZfq z3pcr;U@E`z9iRT_$1_4dvv)ObtIc^&k70f%k*-A}ZYaQxrU&2He>&XK#0hAw?&4%+ z|LnWp(?&jY47RreIy?7~qeU1nKr(^s2?V%o9N_uC(Y3j%G$|e6H^ksLQvQJaRtf#7 zYk!nRKh^E`!pL@{!x~%=z^+{996oh1&NyQOg{#k)#M$hSiqyX9ew_3ID0gykZufcS zIQffk?pP$376!-XOsw($K6nrJADZSP#}JSyu796l(aKJ{${+tXxEx86ePvItf%hKOy7y zNYC#sear<*QrtIxuhCMGoyC4g$v^ix#IwkBWy^i$$rFFl=eK7Qj$n29tFp&_f`$9d zIkMoJ^cN3+Gugz+Q`X)b=a|Q!kf0{d(sW>?}Iad}j21V9l+RtImzMg}%Sz*{SJL5FL z+c!@02nH0XheH=(itTUu1kYMMr*Z00vuJ>;#vgi9h=`MclBXvpkDjGf&$UODKEbDL zhs`QrGHDXg-+J6JQ^PD{O?jwdWH2qIwiRm;^aRM?1d#zcev-68R~)m7ioY8l9wRd> zRj`!)8qP5^7ZRagH$wv9?wF5l{El~&5vv%xa;kNL{UC8@mOSPl2q*uIRa!~kS9~GA zK;fj3DxWEp5_@fAl~iY`6o_4G^(A`y9E2=OvpMUwfn$li3mw(HB;O4ih?`cy#%z6o z7FJFH2!W&H4=S4l&Ck$c%U~ELcoaK5aEqqP%%voe>joMMjWE|qBqXk->&=?_HUj0mI@};iI#1FPX|}F$ zE)a0?b+IN|k}1pR?P|SY)M~LPz9Yj}`o01=h|pG@^P><2@D}rwu>lY)S7SmLv$4Qm zwMg4SjACFw80L=<93tPHNs0C)e>O$op4@0SLOL|Uy>k}Tdm`1j^|;@AoNRV)h8in| zsabp3z@w+QF6uILrEYj2cdmg(e2CR?d<5$JL;9dEylx}Sey)(`9d2l^UzYD%qa~L9 z^n_(sF7=a}KtOhZ^RmsVUA6(}#-YA;Lq+@)M!{2g3g+U>*WK zQ~*B#8*v{x^`Rbki1p9?{U;ItK#d0g{Ic6WH2>%N{CD#Lg1?#nxmv5p!+e(+$N*#j M2{?Prh`(9<59YgwBme*a literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/trailing_blanks.xlsm b/pandas/tests/io/data/excel/trailing_blanks.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..9f8ca5370ef81da3c0e63710ff6ffe57c8bed136 GIT binary patch literal 8824 zcmeHM1y@|j)@>{}!68U+cMHLT26qn*jeFw`2@ouV#@$^)(BMG=jZ1I|9^Bo(o_RBG zW|;YY!JE3PZ>?VU?z5`uo_%)JIm&Wy@VI~{03-kaKmnkr_~PjR0|0oz0{}PxB-l6N z4)!iUdly4>Pe&j~pVh<8mOKX@mOcvr3w{6J_CLG=WpVwAo$Q#B7t*)lYb@alRj-hE z_F((48C8V8*3v1cx2-wJU0=wLg#zO7$L)`+HZpdcDiCwKQ*05iYz|N+H-r)b)eNTW z4Jt3TKz_rV@?%z3&2NLsVP-5ne0h>8;EbduFi^BB-p?5vDAFHbe~By9e2b2#q}-P;GxH^Tn3xU=t!mea>oX8fKE%kH28R!isnB2f zm?cmR$ctFYzl1(xv_hUudPQ1U?wvo`r7Qj-hNDGV?uCSDS7>xZ@&&4&UY@HJN2pZb zvNjC0eSWy#?%Dt#s&{se~h%!M-_)bs&l@`75J=0j`VoI*%^WY(f;9-7|#SFJr$Vaq1kQN+A5iA#S1H)G4v zlUHJxYUnpFNv>zkCHiJ&>_T7k5}m@-Z7P0Prh~fJ!vh>Z`7b=J)nKPOg+7so(hwEO zQ$uHM+-TSz72(3Oe}oF6q9oom2RR?_Y07je_9)r zOHI7cMn{6FPV^S;wSSA>ZQtC2VAM8*;(Ud>ECL%>fU3^DJS6ej$rX{2);U4Sscg9m z%WdLp;yn4atS7x&OEhEYm!fRB-bE^@??)1!F^5^Th>&seNkZ_2k^^TvotL zh^g(Bhg8<{W^Km~r}|GN6l|f3yyuhO9ZSZ8fJ`iADtsWe6lYgN8fun&R+T0h&is^~ zhGw>3Pb5;?aPE9q6w@H8l-$^toWrksXtK`yb!)hf`qMqTc#%6xA&0#|5oAR_ppx}> zl5AokPFceN0A^5adkS3{4_h{O2WJ}-2M3#9gse=%#v%75?p^EHJz|%8xPq;i@)rHm z5l!pY8mU%o5+!=<-%yMulV~+~9(*fgqzy|o>Z&!YR}p(Q=DjQU?&`{Acfv|FA^cTG z$W<%`g`7BNre+V?w2DS=%FSK7Wpz#)v^eq?ORm>G2 zeA4!&BsS=jW!3n`jW&@x_aeNyjOFE&#+Db-mHjsfy8}!ySG14;P|R_#d{g+87h;Y_ zFZYN8>Szs|Zv1mNEsolKBD#1DILxOxxYrGcwdIWH!(=wa)8fcg^+~#K5JbrCIfA;0 z6@&7XUTaWlRK+(4!&th%zK->a)QemUHoD<^(nEozbwp?E>|vCD%T{&#RQsNZP^v8| zq+Pc~OAP4;!RTgSszqk{&z!Y-fgRm4tJdm1vyz%}a4rAPyPrB{(Z=+=w+rvKQ*`KL zwa=trI_kXLDo^68I5Vm%#^utuFsc2DvCVg02Ik{S1*OupT9jl`HX@_=1^itmLG;yU1WTe8_pNH^%7GeM8HY z<*%t87>X-h6!4}N;gE5eqUeI>$vrTlBBH^^o$6JJ1UK)92OzZFIb?~fGj7`L^t%%0 zUiYZ84}1j=BWN}vQL3QY1(a4R!*b)0$#cc=PStm-#L5o0qwb`mlkOGtfR~K;k?bR8Rm>e#2Mu5z1Dqo(Z9YPVbpxmRsG`$K&HV`sG?lyrZmWuS0e;tBLgBUHPH07x)UTK-4A{!Gr_r3?nT)r6A! zf4h~bD#&%QW455)hqAk;y5V8XxUf;~YV15k>aAgzqov^TJ6$AZY}D7Dlw*T)2=P1^ z>~Xu`!B~OAIcsMs3de@`!m&LNKyVm58H7VJ*(-k@^cn#Ndv|-MbPp9H1%%Te42Cdg z;J*BMNl)`EG`E0K;6t4Bw|J;LXB8l=hfLD8lb#`>kw&WBE~&;7{1+V>w%suRuWD$UBWXOwZiQlS^|d8 z;HAXF6M!z+xQ^zR z%8`utBIP#4ny_{Uj<%{eYr$eqe%km;uk!HRJ0y}xjF;8dLY*z zn)OM1VNnR2^d1T`HBf5zDlHKho|vWh;7nZ!4?D(@_9@y{Zex=TWxYA8Y0M`+R7tOl z_lc$W=652RAwzsUE=PLIi?@}rEm-dLS-}~qQ+?p z1F>`RsJs5bA<#Y0-Y)~Skpu=g9plUPjVrujJ6?ZTRK{WYPfudf<{|4sKlcl`9{%R- zIk9Pu+2nUxzV&oci7M_B3~nzP3EK}@OhP`eH)4O6>e1qy-CR4v3dmes0le|0Xyviq zDGsT)vM8C%nJiih<}!E}W1YTrW^89>xpb)>Ypx_KszAYulHYFb600Df|wV#jo?}Y4di~yvl>nF zV`Ti~~!vC!OJU(fhM(7o%h6a%lGp1b)DCchVXN}7P+H`467}~4f18)|+&)Uu zagkU+Wcw>r=3BYBOh0EVyI!Q8X}Bv1+w24~8NNA2{BKAhF<-u#zYJWYjqH)`pvuJW zAt8PHBMF}=U$vZ&3Vdl0WTNct%Sl1CiyklQ?_J!iw9FABh8{`3s8#ZrCj0YhXO-Cd z;7!$iQi|DQ;|vag=PTZ07xdWJewJ4>cN+xtZ1|&^v=Zx2bj0nm9ylB1jeN96LC0I| z)??&IZ*{%%4dIIPdd_z4bld923Slg{gW?P7rulc`?LQi$D_>7!H)$Pc^yB{+G1RYa zAT5awp2|}9uM8nWYh02<{UpjDtk)TuZeD1yGnIvF_@;A8ZqGou{?tZULdN+^2y1Yy zwR^)Xo>|A{2b>>hZ$(?c48tlP(Mxw#)#SA)dDnF#_c?_D0kEpnGAq5`(3D1ys=h?f z^DTAF(R@e3b_l_yHTt5?fG(?X`AInNzJMycj-4|vUf(E8n^^VS!=$TMzV4POYl4@Y zEknrr{&4Fgv`xtWbn~jyu&SY!QO+KbMon#hBHHlb?vCJxVcY%T_7U&aINec4SKIAX ze`4Fi5&v1`$~+!htMBpgRxH7y@7ab@H0kUd32D+HMcC)Vie=LS+6d!7jGH4Ek@PNh zu`ci{tWCF~{eB3n!5i}J4%f=PjyV$TZiHsRHk+A7vZDs-56PdTJ|oPVM!FHY^DmX8 z^dBBNX2>tZo-~9tg^k46?6b<20WLPUvb0uIXQC>BKheXn!wO!ukEykN7LOz?Z4!UK zJ|)DtB3~JHx{vwk?fdUE;?71is;o8QChz+@faLFHj|YF)ed;W-nGo_ZqcVn$gWd0a z@p^Z^C9BG=DxU-g4{2+J*&3&_fBS3N3N~s9uW*KwYg6WTazFV)&xl0ZP|#K}Zl3FR zy#UKOZ;mxY5OT!`xlKnY3xa0l!H_OLS_}%w827g_1U}I-0un;vB+6AB^YzGe9e3uG zUl7);Q{w2}Ujv6oKQ~7vw~!II1XV@=X9eq~YX`%)1o%&i<7b-))m$RG3BBuuLE0n6 zT>Tpjqgs?k@ua)5WlWGEjk7c(7$CUBW|F}<5s3+SaTI4)Y2n}$^H@J$l5ANI6`d6gF(--JFvTmZNp6m;Kp9Gw@`~ju zPKHr7VdEl?c2S#Ybn>)zwJK?m%wwU7bg7PFA7c+9SY{R&Ot{hMZcGjx=TvGiX&`py z+KzgPWg|&R_a%^)R&At9!2YV%8mXR)J#g!myWAmXj__8>=KY9(IG z`jz{NQJ>{hbNCov+y^TD2xYgNB(sbTp0J96Q6(laLZz9M_Us*2!K4zn^A@EAUb2Zk z&=ael76soJx5r&#w?YNh&`XcS$Ez6g?e7#>oGWZt8!)gZJY}1g$9*H<+Syq$2RIdU zniV_4tE*M!YPgt@(3HksT-v!xy%ajCpc8?l59qqSV&0KWzr607Z=NtmbPp8ta`b;$ zsJf{v=8#B5e?3MFiF;ubF(0cFg|yhrN5}Hu$6$-NAUn+xM`YZFss2_q8>_A~*o{!L z|E^MR+m-R@l2MF`9>({bYZyJ_5lJ_zPd!tt`>X3LgTYu45%Puu1hvc8E3JAnK28T% zWS~N?#P);jn)}_%~}B$)=hN}|9H@MRP1%!hi(tGp%RS#ANd8j zc-jI%zw8O~;@>!=vg7tF((MUo`xWZJ>R`i6fN_gec{Qx*_9kG&%dR^jw;%<+7o>19 zMf`U9sRVO~HyaNPm(;Dh@UNrQdF6ZIZwQ`CssAi7s**iSycy6j8YiSg?TF7q79M$G zSa7iTEsD?4tG;lY{1t?@-P?3Ke&@wi2FjRTQHt!PDkIDL!2F8&g2_1CC?V=E#2!E9 z@!rV@Vro+QBTHc3jZ7+5NGPDqaSRP)TNqOxPq@;=(+Xgy3W!kiZQnLvC>KU2o=#d^ zASKvL6piTtrZj-P&ewYTWBQHMBC!Ft{9v*MgTt;1#WpZ*V~Q|ms&HC&KDa=%!8$+C zEJ&F+XyUEZ}Y6p5dtWpD11Y#liOjVPHx1}r{s!iV7?#R^z6&V zz9o}j6B(f&LMhFeJyqPXPFyN`@XYwO+x=7?zb02QXv0`36RB?KY~>opXP!a-D-(au zy``+Fq8yz1U`Y#URLLG`A7N1qH!ERr9QAyNT|gRbz#zkh`+2@EOJ-#gcCV&chu&L3 zkLnjoJq(*fg>Vkq+V^oUD(SRSlOB3` z)knZJ^YfX)w3mVp5~!0Xj+XPT{mrt&rdXP*OS|B8*{4(j)RKQ|!=&Xwdq3Gp5J!~KmImd4INGc^}y zD|?Gy`Jwkg6s6;rJbsWnQgwYX3MDuXwH;>@#zbKD6*MRpZ|+6CM`B&P5xgw$>=Iqz z07;t}>r7;QoE=AK)O|_%G=ha5Un!joJ3E+;liOaHYamBf&NiFinY%7%&h%6-AFG%- zF)lYT=&HnXU{hcpiw46>V~Uv6mt5H{MVh%xQ~}o>XTNP14bh^@GxyuGyO8LY4-w~d zE_~|-&=LQAPCK64gfh@D-~c5&7Bqce=3uJq?BEDuGj(tV{#Ky+H){Z0x=66TTqiqW z@BzYuc&~42U8=bF+%jS(Dq;|ExI2YseLN2tb+Z0Li&fJ^U-Ka+--fUAkX+T3ih4T% z;Ty)}T`u{IjViYDc!1=|$~B{D3OjIVf*!p)-UvKz2vcrutsLjjSXJKSm*`;h(4j)R}TxhH8B>1hTVqc1btlDZkytE4ZVLg^s zV_FAvJTnD_%khh(SGkc_t&)Z`d*kE}r1`IGac)x_R?i7m;#L>CQj_wkv5wH_Iem^K z!yW$7|BPjjF~gw#hXu_nWB%Fyj2#{SbwB8m{npaJq7HNHXu$_?*JNl5#C!|NXn|!W z(psmL#xUSI3xxvH>?-Q_Wih)`(RHKaGhW+KYu+B$5;6-U{D!9=N53hDt>@q(z!TdL zoX2}iFX;Q>A%KxUA_m(BIXh_7<2YyR2F zivyr7k;U?h*+dN3-qNb(1V?ftp7F#jT>6484J9)4@?6r(Lnn86w6cl%xaR7R%Y^H~ zYCaR&qD#NZrTDan@U1!{!>Zg|*7&q~Yn!N09>kq)4|tlzC)>{lDr=&gp4Dz!O}x!d zLUr-?u+z%9w8MOoCVNUNfb;(IlezK8S|w>6k#Dz=EP!mVZBmFS-^Gy4g(TZ+ID(CC z3L{wKWfG?i2;9B(3)=4jUHyDV5cVU*mvey*{GcITgJh*^I&ab@vyPZxwX@D(3m@qw z4;v(dDNkYbxk`L)zi+iV=omy)+>DrE}F&r zs{8+@Dqvt)pyB_YYy7`|uHXBAsQ4?({nfx$ocL2I@-gsnLFNzWAT+V} zs66u+{MWScA5dt02jy4r|C2<1Z0B(<@efOhX#X9=-!hAjtvt^7{9%O`T55t$<#GP! zv4O{X);|o?;s0vjj~(n|=wns?13LWdKhVb-|JcG`HS`ZW0FX-rUC+mg`WXJ#5cp>} d73rVgzXe2PIRxm}1prW>R{&JBSIB<(_kYP!Nw@$2 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/trailing_blanks.xlsx b/pandas/tests/io/data/excel/trailing_blanks.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8f1a739be9078b49c6588643c8327dbadfcb2834 GIT binary patch literal 8806 zcmeHM1y@|j)@|Gg?gV$2;F91LoS?zA8+X^>78(l`M7MeaA01f&6-}XPe0;LH>vc$y`@&A!A-}eS{rMCgK zSf`M`f1zVl@y#+O-(b-s#*cCADWDaDm;UJw7FpOxO0PvcSN%L?3N1ATW7P)7n`HS` zC;ED>X+sm}W%iKwE$B-lEAUONyw6{A6IB*PoE7Wzz6UY2dFy$5w$aB#o+;ln^59w; zu;^yK44?>{m<1?EH=eeOVGOiWQ2T`Lul8>XdhLyE z=ms4eCvE$2KEMM2kB=|_<-gFfMuVN|4B~4F5Z0kUXldYRX6?kr`pf%YI{puH@J~}O z1;1A6Vn+=*l(`G-znorrk0B=S_DZISLfzM2cJWzFbS^dVQX3r!hB{FYjI>{i@7>_S zl2G(cKgGp&?$Sss96_pDx3ZAr8wVEzMq0-tDTmUvZcNwd^XZEeX*myi*OpkulKR3N z`GFNGsmWuBFBqe&nnXz0c_blt!YKhdy|1;Gjjzg~rp46u%R(yZc(ZrnN7MXflJd9F zM8o(M_9jwr`<;v}=E}YMttrm0i8RzK`K>C9GaUseJq%2(+fOCZ+OY4vSrpUzRVlf# zt~f`f`)IPy{dB6ikB2fmx_ObhO8Som0wc)^mmv!Ecaqd+YOw%e002{nls$v2jJq|P zo1G)b*v=00i*A)_*x02B;oLvS+&&9C#7V0I00q#XJsE4%rp@=0WfD>HwV=L6S*dE= z9yW!%Z+)%ytxV45uJf4xG7}UdYPT!rmHpAAIc4_>ahu47GqdWaZ@Hgte#mD~2K8%2 z#^tew2N}Wp(}9A=7+$^7Lp}o#V9eR6C7}}Sb=8oZPW(YwU3EZWU}UNcL4;Qo2#&@> zQa}K0@y4K6;cyG5=f;3*IGRmk`V@+nXBs8~hEoRBjB4aQ>L*Nk@ zI(XFF?l`T;-kU6^I*uW0vMh77rZ}_WY1nsDw#%!2VdeIBk<6qOxOlq#jRf5@nE#8 zy^F_X`!u$C?d$-*T~GP!JxRsO){-T)uvWiY(*^xiS{3UnBt^77SqmxupjnxZaJ}JV zl6)?H{>W>=gmf*;Zb+IRoR(DZe43mUYbTMVj7Cp3PqW4jMwRc=%yn*a5{gUqQ^#Ll zTBE&~s^u*mxHd0e$V3A^FUScstS0-l_I*ZRuq)*UN##S4j;Un^^E;pJEwPFZPcu` zR=A)%o-Fq#X&;zQKL>1d#b*Vvk;d9s6v=_6peyRP6O@#s1Xeh(-Ui@kJy9_pWoOk5 z8WM7<2%$cE{G$DlBTB_Zbq_1*l68Et=1^}Z_cW|I@#x|clHMHP|nQho^?q z+J;z<&*jA-kAA>S#B<0XMeI%P&b>W+SC^|yJ-p)4>)P3-Q^k4I1>O789oW-5IQzOw2KIU2nC_#f8^`W&a4CWh;4dM$I@L7Q#WaFoVNU}m`9)R(!DGH{m z4#^9|ljb|~*B&FFj@%p64U!W1=fy9Cy+fSIL;I!4{20Ja8y{1eG!ECC#cl@~+7?cj zht3Yo*9ZEhS0Fq6_edV;h_eZVxC;y9Fhl_P;-kNj!T4}x4}g(ftC1fdQ#<22H!OzQByKeYJ{R%&#s17{>r;J zc2QQwV%!Ty5j(Bc?r14u#Suia#V^LAzjM2V7K33kc`0#mU)W+AyF3eFM((&0t9;u7 znhd1ZSB1Gbm*Tz&d-KI>m9eK6P|b$trHs46Zzl z1RX~##vw86jaZXXeVUx}TkGeT{#h&E0lGdEtuKJPMIq(a7RBE_ek)uL=F)!`2h7+$ zH?lD`*(V5)*PI~GCUqmeVsTx;S+D{_nSPyX3~kI|}#008uo|8h(I z)jypr&1}rretrMyntR&Ak;J?>ZTL5$s7`KoFMh<*EU%7Q$1hQvW+af*)b6Rg;o<~0 z6JtZu@Sm&Ff67Z1vlC93cnO2nau$n3UA;{U=!W^1)O314<`2$)HvFL^wTa_5<}~k{^hVp4(eVCLs#b49WHyh51f?AQoG)O7HmXmilPU|ZkAyU6`2!wPo@yBZ75GX&&{)~a zhm(S84=qv7&#S0eX^kUJ3@wU&MYH$|P0p7eU6o>C!CR^aq!jZfMwuLf&%b+3T+(A< z`C4An+;8I7vEhxsp_SNx*A}$KHQ z6hK*W2PWp%&I;@%+P*hJQ@)wbY0^B@7{Xf~Gk9CoKw2CdJd>^NR}n&n+PEr-^7$2m zh+bD{hIxU-?o2k0fo|7~{Jy?&-5E$(Le{Z9gf+MZ=+-cgYudRLgT0Iz^r{8SFskw% ztz=JCO+ky2cS9%YfKvqE53Ncq`+cAXRcQ>dvObcYf30K2=05JLjjww}#juBX+;S~E80v@C*CpTSbg9_y zTp-cvg=-dS1I;y(9XC+Nq@&Hi57tsp3m1Yq1N_AJc_iWNjz+0Mws)vLPhx50mkQ`ut^$mM?)G_ z)@pI%u%S*f@^|wm!^<|Gy9zJRkW=Wn}t!19h;6Ah6BTyes#v(d^zPV)-j z{%&7dbPCBhw;)-3@7Os(31M*(Yf=f=&x5_KyQ$(M@}-# zz-_UEo7!Y|qYB zfnm50dJ-8WI@SjXX|riO&tPz*-n3xHuSfTchF8+vxeY#dgK~n0i`6~8O%O<;3LN;_ok1^JJXtP(OGo%{ zxom3JcT3$(I~ksSZj!)A!JJsdg+RpsT?|Lx;+_6niq^QZ*0}Oca*mFa!v-?D4KA+x z#>J{+%SPy{`Bx$4Bng`)xCPZI%~9pZBPmjz@mxhIP|C(^T;#FNYIBVa9zYkX;ugtI zEL2g>RnhDd?1A`e%!0#7x7xjpDWOxGN)5&h#Ex7$(a$hJl9Y7y0kpJgW8H$b*EK-I zIz}pP&^7iJLD!5sd;`^#F8)WgdRIc(Oxr6rqa;H7%20^}0lj#|L3ob14_*n~ECwuo ziD=y8Bp+>>#G8O&J1aW@?k749uymmC= zN;5wl%cC!YHNukIEK35BQ5%MOkZKNQZAq{z!JDD`3cVc{#%HUBaVmP~le;%idPZZC zu2!G>W>^n?Y_JRmV@5_Q7<|RAS-bh(syFBDaEM8saa%K(t!?893^uP3riX1&%pKeN zCJ?M&rhACyxl}PuB6QS3g@!&qZL7S?i(KPar;7-rG4svUE<{L_6D+JFzR$iVXEHI| zVDK?*1zZA~Y9IgcqHnJ_V1EEP9%?}(80|mu>*VZVZRYgLnebDhu3Z{CPTvaMzMz(G zfgZFr7SuEtr%07o14y?&4JBTB(;2nhpYL-?3L{%6VDmN&e*xik^ReNIx^)j$I#!)m zVF30P|GAX<&tk($xufLUuiA!F1e7S9iJy=}#^4R|4_A7k`RzUH3Z}?k_S1HFnd~I) z@?2*kPv{k<%3Y~4vV;ZXl`rOhOTdX1rmiP;UtYw0Co6>UhSCp70^@${n_{`dYt#jf zk*_%xM${+ME;Na>g6OJ(qSX95cMa&u1+mFz-z+W>lR(pj6MBFd4YL8q8@+>xw~f@I z@&0!LV6q1NqwY(^HZV?Ost9MANP2G`IR8}xFfYI~P?mPa!d2s<#uhu;ysRPjkdEQlJn7iuA!u|;DAkh-f+IPq@#P9++m&G_(u44SS z`vf$H^QVZW@CgB?bU;U+-io8m$QM>aO&;F%9mvH8kx8(Lj?s@GmwcQ*Q``k6ua-V~ zWcKX!K2yM}&Xo+@G*Ze!tQ|T3egoyb$ndtENuck+QqDwC9!7n*xP>&jc%O8Tps<>o zm7pkrdNIVtKb_Wpm|@fHBF~2TIP>4`kFzjjYwt zS}O;4(Of6X8BW^|cjEK`+RmHnXK<$0Fgug3jc|_YW8mt=#VirpE1^dTlyAuPmWwV! z&l9&XkwixAtkJHWbfc@ec+rG72~NM49U}wO5&&&pVMUxE+(mJ$EJ6%v+Ys z<=KvS+GuT#&|X&@&MUL})&u#f<%9(UBiaNPu!OWxPj2gAD+}h6I~H#Vt~29@g;tO@ zP1l%zR`m~1iq4L7ooggWV(*VLrR8njJdC-PU0Ira$W9+HDUTsc)uE8BxQoO-;J`k( zX5+Yr`2FASO>jaB#UCMrn1uXrej|pZk)xTZnzN&ot;MhW(2IG6-1$q|KF|%Zs;&r` z68s6J1A7a~SaAL&Bq$ec?MH_pvaZ|;UFCapzgpsOk});Xp3Z(hKLyvQ!$qj|x>l}PSHyx)1lQX?M%p&IGgxuu7>tc_uTY?9eH0YihGsL7m zU z_l<)_OPirz83u3obhjh{G{^+=Ie)@X4~plA+Kgd_aGt+_p*QL9B`2{*=*jndPB6b8 zw{9unN)^j0b&Q2pph+Nqh^}EF0;K>29NGID!$EX%z7Q;GdJB$l#D`-6_K64nF$Ssv zX1!dW0<}dB-6MUHMQL5s-p2A=&xxL+YorIQ)@B!tF0q=X@BSz1_Je5bnkjesNH8u( zUxAYep*|7PI!%3G?Yc9j6&&*Csq(jZJ(EqG_t@mnWU&`!BZIf=$5u5jJ+SDgqY8{T zb>kQ^md8y`=4h{8tn}`)tuyqsh4XUIm`Vu2oZuLE$K4VCllvLTB4LC>+z%6ySjPCX z`x)8W|Lc7axBIQ7gJ0P#u%iYa!rYLdE)nxDDWe9I8p~*&RTx2mYb{>qo8(kdhn2?d z&BWG@PtAGmM6Y|f-$=+Vkq8)^y&vyU4&V5Q0|!eC!oNs#pIv(Eiwg%vsyXI(NfYD$ zUCO{)HH`$@)X^VK8n#qIKH-h(N1Gh3dPDw-iS9+h?>vWQ)k^u)Lavf-3aa>xtqZSwD^?TJBO|tJ4Gk)D zb6FG9>wuu>&=&~1z3#9yEATtdzgAR7I}q0FSWO4zeL!*cbGOm_cx8hDpDuSsD~KKT z1%6>FszymhTeRmciUp7Zw*C-e!hbmex|C#-hQZ(Lr7(mxS|f4T?1y;(^3YBSb`S9% z_p=`>@+}0|2{?`L>Zd5((0P$InRdnntDScRTX@Sfxq}e(XFNpI7rvPmUc)Kwil9vi z+i~7fm@cJOgx*Y_wy_I6{5b16Kk-!YWxalD!9}z3Q2FrROa&A)3ncpgb4CC6?fSj{ zhx)#<{9g_Hwf6Ws@RvRr!ihgsBA)`E)?@yFHbe4yPpUId!GFyf{{aO6)R2D#|35k8 zr*@vE692Fig8JV<{4KHg)XLL@&mUH3A(bY`RGy}Po*H<1Wc|ZH9^S78{y4xsg+3Ma zKcKCI|A9W0_@@^BDxrVi0RS)&q+Iq?P@ls88Up_e$0Yp|{I`IpEDs0ST>tBa;1FB`!{9o&TW}}1ySuxF5G28aySuv|*x$`b_CCqJ z_x!sx&swWT>Rqd9y5FkqYB@nI{%#F?UZJj{+ z01(K`7znU6wzOchx6os-1lSncFj#{0E%YpbcINsPwhXrVj<#}t1Cziiiw}hc1A97f z9>Eli?96p70LErEjJ7|T3?K``KslMW$OyOyk3T^c7Za9$ynX=#0|$eHd~7XvUs*q1 z+U2AaMJOpLsi~=1Sy{QbxR8*<5D?TbF_m#}6v4oBz`%@gag9kxKte)7Qc_Y13JMw; z8oIi=rlzJgHa4!Vu3%uE2nast=z;k7p`@e{)YQ=|ED1b3DFOm%!onGnk~wm6d5Ve! z8XCpAy1u%)LAtsh&CO#$pg@|DJeTUyR@`4C#SZm zs;afM)!)A}DCqN}eEs@$baZrjdU|DLWp{V?uFsJ}=;Wvto z(|glf7Bid}Fhk?1ZgfGUd#nY|I``>7qO*R2;z7~#4>h6^Xs5Dd=mFe;BL^m4W@uR9 z3{Lcn2iF|W4%)#Wk(mB@x_sUnc5`@H9up3mvWfK>&Ub{gnMIJjc?&A9GAd=som?-{ z&XnySwu8!UNoC8e%0wj*w^1Cg%|d$E;$;Zmy6awa-E&d+RceS`g=9 z7dD|$Xi9S|q$j9=u0wBB&DS7aR7jRex(5t3ZDyTx*u^jFcpe?3IUQ~JPTvjdzq+4Z z?_y;9zCRJ&T|ZCcFf(`FxmTXVy|-`QHb+?3G&kwY>BtWBuxXhgs9o6!&1-X}Sh*eC zs^(vqpPO?_=HA=$-?IwnbP5>74>>vC25}OEIcyAWEY+lat>+`K9N1X7;4%m)5rVl4AvBnboM+hYhan`UE+&s5 zTS6w(|81y0d5Spule8kckH>27VOG9#gp|0NDjuB3|YgIGWx!& z2_cm<2aFsAxv)yu284>^1KeGv9W+htg<0tsywq*n{Yf28xX?QX+Wt0{9a!dH=dF5 zqVv6`?fiuKCCY`;oe_mr<#1Q2FAC9a0oz|eZ0CQG#*dK6C4KT0h^5P zPSbfVL{Wam?ymfNJmPd{Vd)*N{R);bZWz!~9ye9$0$H0RYi=`EF-)+{+;iU?JzUd&)Gi|LL} z)aLcMqQ|5y5->TjRk2l*93ikd@jJjSJc{IZsTFb)&B`^UZe5v1FM;qe2q6a_hFd}( za7E{Xh8oeLPI?(vSh^?X?|s(Zt8HbmmN3o9R}I&OB6j2quu;>ojuwDyRDsree~^OCTyRaiTy*fMQ%=HRk4xF>ASo0w+DT97Ux~2SRyamM&1<8IY7t!_n=y!p&30*|HeLQ2hVbi~BORQ62MK)8o(&%89o+%)h& zUmKXCQ6=JVXE@$hYuEE`enNE%BEQz@^EK`Q;3Vf&Ry+!E29|-F$QiE>$Nt>jQg|wJwHU=EuOfMk~f&3 z$TOKyqnY!N3K~Uy9b83!9$cgWx0AV%0f)kkWU+7bXF7S#o#F#K_*HGpM3VDao)+|yj^gH;Fhn)avDSMxobi1 za=vf-y<&VOKx~8&D^0(1ecv8+;)9$X-^+~^ zGOOkU!?fdnSmXsa0P)@jV%L{5UH5#tcx|j4*jiLyHb=zug@X!HXu?asNl^k$%TljY z;UwMXeTYI2w(|3j=p>pR7j%~JX8er-1IY0r^igiU%oMHWG>OA0;Go#_>fDddUbO4B z2__rG5}_sc+IyI#Pmr;C_dm-}!*o=&8W&t%jfZmwMsC@rTrswBbJ%Pk%k*eGXVjyV znWJzc-odO`5INJurzbBLjBFouFC-jlGrgLliR(0JC4E-K9jknu6!(S*$N-P!Yb_&a zm+NT9{z3~)C!H&dR6^Hi80skAtdIAAFu@AR`uk6UNG=lXR3ZtU=p|peB^?6n*BfC@?zKg5A zS_NfuucI>MNT=%<;pN6$!?>iWlnW~?p+FM>OKZ8HpL;L1;RruqWrMMtN4)mp|-!nPJ zFh3{obg+OmbmUuFA)Cx)UD@JbIvd2@Nl(sH-ubZv8&R72TGT%2Mw)R^^X1ev9oi0bnwEW#-u8e(o-zdnF}Offn9FTz zW-EGewuFhHEjPfGyXnJDjOQmQeI}0ZJK>lvip=Y=vVl_KxCpViGjCbLn9i*K5l;A4b$AGqNnNZ0D4G zda5-eJl?#<*B`;9-B^Low1Ael8wkOK1!39{NYRgsXoY>FCp z@1&NZpv^6tVzZr0xszNn+E-xn-e4oS-h=6aF>Lu>7N@w!;PAHyg@4402Z@Wb+eJH1bNl5Z++>y ze0@{qR{;;4jE6@?l$PJ{GI8%MzGHp)oSRHqNN83Mdw-WKG4M%0y_}|*0pYD$Fj@H} zBoDYmiYL8-nyQ{ohBo+6Tkr(KDi$XyAZfhebqk$ zbheO6+Vuij6OZnGv>4=;bI5(*JVUQ~x6VnUxD=&q%2S#2?9A^bGSgJ$t}MDB)>VNo z%UdWr4xN2x zeatM=^BgDfBS%yjbN#AXg_xt%I^yTiXx>nWo}y$A*S3&Q@bcN8or*aMLQw4k=a5-j z?;E`+mK^wBuj?MSN3U1b2QLS!u>wR$72cceXKJ_I3R`3mEj(52FR?Zn*%71p(iz)4lv-tU%e}2_l93^M zT54s@nyY4HdMSIH)-Syauo& zK0{3cH;SZJ%4z__%q(eTuXHwv3C$!Y?g>a)b3VOH4A(KArj=d~M66BCB^1a4TMiXYY49 zoj-_3@R@s+1y4pT>Er$|pj3N(^!ZMd@*pz}19(wAWX0-5x*lWAfD%fVSS>0*GF1+N z1?!Y^p}Aw^>>L}8A5`J%XpL&1HmK*3k@MYSkESt?l0c3*Ped|nmBMS}LIAT4-}Qry zMa_mAWXrQsF0=QG>S$gY&>QKUjHU1d^bfNu2g3oWY5;p0j*HS&}5?>rUV)rvF zU$U=@l-+{pI6!j5pi*7oI94wOf+>lvHt}LM8JpltVsoVSMO9UmmH`z4iBGt^<^2JYqslUOY7700f9NtioPi9z0rb~zo;-YTYf z$aF=lf{;GCk^Wku2c#1(W2hU2QN`3SGOw<$TSR-R0S4N}#0@TqfzAPmb^^`rw)4hI z5Nk$PLbavT5VGa-L59*k^5laom0?m^TsA=IT?OD}2;gBK&GU`Q~NZe85BlHI{ z8o}=O_-r4|Vgg>-Wuds)Ms}hh6nN(Pwg#`4(tzOH?@V2&>Wa%rC_dy;L-lZ*Ao?wG zl})fSqxNZij8DNsLqrh(OBZM#qzt0u6z5V5fuKy#(q!0Bk;bjFDS*mP!gnXT_26F; z>U*WV*vP9$m)A@@8RE=L7*#X7W&U;zdTQ0#H+=*FOTM?|TvgKd%4^n}04Ct2QNhP4 z#*}!fh65=SS#IOon2Jc827p2E0VWgrpftYB=A|2|Q;d9!sQ1`sSXnt_5eS>Y-tv{B z`vD`|)$>KrVY<7%=(QF#lbet) z68lV5@YLR?K23)hu(hQH0?fITdK7OyF^qHKH(jZOrwuf$a=K=jR|qFJMCVOiyOper zC2wQUV}^qV7u?e`6zbzRBykHvTTFSh)@f8kshvDb*WQGih62)(MuOgRh`UiF+&qV> z9grUqiV%#FyN!f$v)@O~ojW#o&Azu=0QedqTo2iQ&r6sa4oY+!xR(7+7x~=TY9aMt4JXjQ_$-QnH%-nOm6cwt9p*Z+ME&dK=LS$H~e^|bC8Do z7fAGr7)Wl^wg*|>`*KCud|wsYkZM&uwdszFV)*la4R9h}FayFnq+-6s9TpM?Av={hbp1XXk zy1bCVOC^9i$70$doDN$ks@y9gu%j25Ud}{!96${;4N~{b>)PV#aj4D5n%`)`KvJ6> z43q#q=kG2;i4kREZq<(B0Rkx|A=zWB42?bHvRLfjYPNIj^2Vu;jV6XrsvFwimb+7+ zLcaedd!l;}`*_$Ocp?yE_R z!%j5eG@>kSo*`!oeI!!COn;qmMe&xyCkNm=UP?NN;d`}e}qXoIf|3S4zXf}MR4N+dVdy-}Cuvl)dF8z0f^RGNZwp2< z)p}F&9O-Y)KefFqm)&8cJz88Y90upzl)>!Q$We;VVGgoJC%8!FsN;welHRC5^4gB0 z+R!4i9!PFhTe}pqzE&+mX%l9vL#Bo9m*7wKhPhsvlJ@j&3l$tkS}ccAHm+#idzblQ z{%Ui~SpJw{MEhz`nG_lTqiWDKWv5=!}r1s;cH3>@}x{MaW|Qgl8Qo&yh1@!Ks$l|I1J*7 z)%AH-o81UqGHja9h;xwg+yO-1jDnAJYa$_kgQYXreU6Y z7k)wP0E}F~=_#p_3*X(Ox$Mkm4e-3_&9wHCrV<2xHzR%O4^!(0VOgEWw|F~(?8?@b zL-agxXrMWoozm|_B5D=7&yHSh+Z@=HjnhWbB?5c_XZqCH3OS>q-*l^!gx#%p>vXX2 z-ztbVeBEOh5maE=cir#hJ9?Lzk@Plz(CzXT^{HEMsH8P4frEiDK>b^{Kzd4o^=)m9 zEe!ukgIm-WY^FKU9#cosP8SKmC`~}5EfO_7Jv>rwzvhBZizkZO&c{`>`su8#aV* zD8Eoi3M2xNzYLLG2fUAKj9O?l(UY)`FfQeZBv0p|*w#U?zER(}8YRf1YdfZ@*VJN* z{gRzxVAJxxO|8w%+hmy6ylzI?!QsZp$9e#}Oeu!{P7{;#HFYK9YOqzGIwhrH|EqyKJoE8@XjP3ZzFls1 zQr;pP=bEKg=YA(XO{LosU$wh0{~|sm4zuBtz;t8UD`Xs2g4~ydh-i z*>c>MX(Hf0fuR3%^T4XNqhY}`l|`s|XM@sQOwN*nz4Lr2LoE{B-brvZ$+uDAFLs0Ah41&7ai5}EcZ=7D|R`|O2kJHDOD=>^XE+L5&A zj$cx@1lM!plxk1s43c^m>g`ctnzIa%zy%rGT+PKQuwf|Uo&M$y@e8$Gs z7C~*0FZdvCMWFDU1?bC74(SL=jIy-(67N(%eswKwA16}e`$=kleM^XXi-Zf^w~P-; zKGSM{1~Scg(&3n&CYHfz)%1#?|N7J_jc>jM%J7{}QW;7#hN3p$`G;QnqW+FgZV2H9 zbgEF27_W8>iiGmvM-$|l&G z^smx}`s+D?r&+B-dt=$~#csdK52(kE$qGg!Di6WU;|<)SL=x&f9rN`esDB@uloQnceXsRprlEv=l!VcSei2e`41|J$$zq!bgtAyx6B;v<0tP#@mizBSuI_ zir(+=g6FC?3{sQJw6(cvqy4G2XAm&;U`~qN9ig32j?}`><97~cHtEGhgpJGn#U_2hz>cy&Lq)Sy%)R2t+qL99lVV zGs4t`0hNGe>-tb^m?{AWBKU6APl`I|n0~A(jl_VaQ05YX1hfYdHn$7!g_GEju>%G9 z2~PS|$)>9&ZikO?k=m;TZ2TPR<^EAv;B$bYW;z&7;UZ+A+#oh8QK7c6xH}Ar;&8Z$ZyvKNIplUAak5Qx2 z`M$!RDwtN1TU-PYDhbcy8y81rNIWzTxgKG67)+&6mU;Y@f4sGNxx@$q2IfxvBhdUP zqW>tEJY8`g4%Yq9Hq4A5OOPGtv1V?^`1gAGuX;qk(K9v&80yo zllB1kJ;9av9g3_#YVtlYqna&R?k0wYoEh%v$y!>XM1ms|@0>K6Mh6QTF6~%fdJZJB z!KzFu$HWs}*=Ev?F-|Knq%YNBaPWkog2rd7uDDzS%kTy&?`(I{Bb+XX`GV)T3(_kQ z0JZWh{>$Hq|XY%SC(uNIA&IcgnuDxGu zDc{%hUT}>ua^43z+l_V|Es#a)Bs1^JZa48bJ#5@F3_iSAE~~Z|HWL`gGQ5u1s6HlA zdRz9CqE%lQjX*!9Xx@+eJN~>*iTUV0X%WRY^x`t2jQ{P&$27hqO46j03C-_<%Xhd| z)uf8%lMI$uDlwlYO*nxUEAu$j=ejbp$jKZW-9wAi2UlFtuA_A`dtU~eaPphQ13Jj> zt<00ryEnHn9+fKq6;9t=KY?R_UIA+7D81UQoHM3WL_R*0xcW6#`7t-QK%~dc8t2wl zKiK(h`PW>hnu>_L`_i<+9V7F3n#O1;x)Q1{^?ptLpXPl(r3>qEDE^;W;6Jr~`%`8g z+2Yqp!Vk?q@%&Fskw^OeCmsIE`hR!#PdzZOC*J=lZok3#A5q}XU{Cz|QyPB*_G?u5 zGtN_7_$kM~!TEcX_%qTI|Nay@;{WNUpHbt_D1Wz0_YWvPqR5|d{%%*|Z*cw|RsM|h zce}p-2I*&%`F}-0Ao-u(`5ATogYsjn{Yt5S@5BO+DF4l#k5TAHC;p-ND|`se) literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/feather/feather-0_3_1.feather b/pandas/tests/io/data/feather/feather-0_3_1.feather new file mode 100644 index 0000000000000000000000000000000000000000..5a2c7b3dcc684b3676a94287702ea01aefa057af GIT binary patch literal 672 zcmZvaF;2r!42GRH6%j;Hh9YHPs1O4KL)q8}5_IMO2&n~CAXRkf$jr*b0gw=fKs^*E zAbkIOF+@;D_Otzd|8|^AF3ye(Nn|8HPIP-QYtvcl*z0)x_%bv*zkYri?8fZpX?t6H zxz_sISSiy5(TliUEtb!#> zF9YhAU;SJ@TFyNZ>fxdp=8NTY#aTB^!?9gsBtEC}yvO@q#oZA)Lc3hT-$In}#Q8{= z_3vU`Iu7A~UhS%)Ca0=oYemE*>a##cXt$5{aKpsvqrCR3eU%_L>}5wh5`Q0$OWWUs wPyBryFa9V6J>vJ&m(`r=t6g;GlRsN7H9B~0T=zM4NF3)??l1odi3$JaHx?%;)c^nh literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/fixed_width/fixed_width_format.txt b/pandas/tests/io/data/fixed_width/fixed_width_format.txt new file mode 100644 index 00000000..bb487d8d --- /dev/null +++ b/pandas/tests/io/data/fixed_width/fixed_width_format.txt @@ -0,0 +1,3 @@ +A B C +1 2 3 +4 5 6 diff --git a/pandas/tests/io/data/gbq_fake_job.txt b/pandas/tests/io/data/gbq_fake_job.txt new file mode 100644 index 00000000..b0995222 --- /dev/null +++ b/pandas/tests/io/data/gbq_fake_job.txt @@ -0,0 +1 @@ +{'status': {'state': 'DONE'}, 'kind': 'bigquery#job', 'statistics': {'query': {'cacheHit': True, 'totalBytesProcessed': '0'}, 'endTime': '1377668744674', 'totalBytesProcessed': '0', 'startTime': '1377668744466'}, 'jobReference': {'projectId': '57288129629', 'jobId': 'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, 'etag': '"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', 'configuration': {'query': {'createDisposition': 'CREATE_IF_NEEDED', 'query': 'SELECT * FROM [publicdata:samples.shakespeare]', 'writeDisposition': 'WRITE_TRUNCATE', 'destinationTable': {'projectId': '57288129629', 'tableId': 'anonb5ec450da88eeeb78a27784ea482ee75a146d442', 'datasetId': '_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, 'id': '57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', 'selfLink': 'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'} \ No newline at end of file diff --git a/pandas/tests/io/data/html/banklist.html b/pandas/tests/io/data/html/banklist.html new file mode 100644 index 00000000..a0562989 --- /dev/null +++ b/pandas/tests/io/data/html/banklist.html @@ -0,0 +1,4886 @@ + + + + +FDIC: Failed Bank List + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip Header +

    &{chars}&&{exp}&X&<>&">X&
    + + + + + + + + + + + + + + + + +
     A
    a2.610000
    b2.690000
    + + + """ + ) + assert result == expected + + +def test_w3_html_format(styler): + styler.set_uuid("").set_table_styles( + [{"selector": "th", "props": "att2:v2;"}] + ).applymap(lambda x: "att1:v1;").set_table_attributes( + 'class="my-cls1" style="attr3:v3;"' + ).set_td_classes( + DataFrame(["my-cls2"], index=["a"], columns=["A"]) + ).format( + "{:.1f}" + ).set_caption( + "A comprehensive test" + ) + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + +
    A comprehensive test
     A
    a2.6
    b2.7
    + """ + ) + assert expected == styler.to_html() + + +def test_colspan_w3(): + # GH 36223 + df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert '
    l0l0
    + + + + + + + + + + + + + + + + +
     A
    a2.610000
    b2.690000
    + + + """ + ) + assert result == expected + + +def test_doctype(styler): + result = styler.to_html(doctype_html=False) + assert "" not in result + assert "" not in result + assert "" not in result + assert "" not in result + + +def test_doctype_encoding(styler): + with option_context("styler.render.encoding", "ASCII"): + result = styler.to_html(doctype_html=True) + assert '' in result + result = styler.to_html(doctype_html=True, encoding="ANSI") + assert '' in result + + +def test_bold_headers_arg(styler): + result = styler.to_html(bold_headers=True) + assert "th {\n font-weight: bold;\n}" in result + result = styler.to_html() + assert "th {\n font-weight: bold;\n}" not in result + + +def test_caption_arg(styler): + result = styler.to_html(caption="foo bar") + assert "
    foo barfoo bar
    2.6100002.690000abA
    + + + + + + + + + + + + + + + + + + + + + + + + +
     n1a
     n2c
    n1n2 
    ac0
    + """ + ) + result = styler_mi.to_html() + assert result == expected + + +def test_include_css_style_rules_only_for_visible_cells(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap(lambda v: "color: blue;") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_index_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap_index(lambda v: "color: blue;", axis="index") + .hide(styler_mi.data.columns, axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_column_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .applymap_index(lambda v: "color: blue;", axis="columns") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index, axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_hiding_index_columns_multiindex_alignment(): + # gh 43644 + midx = MultiIndex.from_product( + [["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"] + ) + cidx = MultiIndex.from_product( + [["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"] + ) + df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx) + styler = Styler(df, uuid_len=0) + styler.hide(level=1, axis=0).hide(level=0, axis=1) + styler.hide([("j0", "i1", "j2")], axis=0) + styler.hide([("c0", "d1", "d2")], axis=1) + result = styler.to_html() + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
     c-1c1d1
     c-2c2d2c2
    i-0i-2   
    i0i2012
    j2456
    j0i28910
    + """ + ) + assert result == expected + + +def test_hiding_index_columns_multiindex_trimming(): + # gh 44272 + df = DataFrame(np.arange(64).reshape(8, 8)) + df.columns = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index.names, df.columns.names = ["a", "b"], ["c", "d"] + styler = Styler(df, cell_ids=False, uuid_len=0) + styler.hide([(0, 0), (0, 1), (1, 0)], axis=1).hide([(0, 0), (0, 1), (1, 0)], axis=0) + with option_context("styler.render.max_rows", 4, "styler.render.max_columns", 4): + result = styler.to_html() + + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
     c123
     d1010...
    ab     
    1127282930...
    2035363738...
    143444546...
    3051525354...
    .....................
    + """ + ) + + assert result == expected + + +@pytest.mark.parametrize("type", ["data", "index"]) +@pytest.mark.parametrize( + "text, exp, found", + [ + ("no link, just text", False, ""), + ("subdomain not www: sub.web.com", False, ""), + ("www subdomain: www.web.com other", True, "www.web.com"), + ("scheme full structure: http://www.web.com", True, "http://www.web.com"), + ("scheme no top-level: http://www.web", True, "http://www.web"), + ("no scheme, no top-level: www.web", False, "www.web"), + ("https scheme: https://www.web.com", True, "https://www.web.com"), + ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("ftps scheme: ftps://www.web", True, "ftps://www.web"), + ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), + ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ("with port: http://web.com:80", True, "http://web.com:80"), + ( + "full net_loc scheme: http://user:pass@web.com", + True, + "http://user:pass@web.com", + ), + ( + "with valid special chars: http://web.com/,.':;~!@#$*()[]", + True, + "http://web.com/,.':;~!@#$*()[]", + ), + ], +) +def test_rendered_links(type, text, exp, found): + if type == "data": + df = DataFrame([text]) + styler = df.style.format(hyperlinks="html") + else: + df = DataFrame([0], index=[text]) + styler = df.style.format_index(hyperlinks="html") + + rendered = '{0}'.format(found) + result = styler.to_html() + assert (rendered in result) is exp + assert (text in result) is not exp # test conversion done when expected and not + + +def test_multiple_rendered_links(): + links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + df = DataFrame(["text {} {} text {} {}".format(*links)]) + result = df.style.format(hyperlinks="html").to_html() + href = '{0}' + for link in links: + assert href.format(link) in result + assert href.format("text") not in result + + +def test_concat(styler): + other = styler.data.agg(["mean"]).style + styler.concat(other).set_uuid("X") + result = styler.to_html() + fp = "foot0_" + expected = dedent( + f"""\ +
    b2.690000
    mean2.650000
    + """ + ) + assert expected in result + + +def test_concat_recursion(styler): + df = styler.data + styler1 = styler + styler2 = Styler(df.agg(["mean"]), precision=3) + styler3 = Styler(df.agg(["mean"]), precision=4) + styler1.concat(styler2.concat(styler3)).set_uuid("X") + result = styler.to_html() + # notice that the second concat (last
    b2.690000
    mean2.650
    mean2.6500
    + """ + ) + assert expected in result + + +def test_concat_chain(styler): + df = styler.data + styler1 = styler + styler2 = Styler(df.agg(["mean"]), precision=3) + styler3 = Styler(df.agg(["mean"]), precision=4) + styler1.concat(styler2).concat(styler3).set_uuid("X") + result = styler.to_html() + fp1 = "foot0_" + fp2 = "foot1_" + expected = dedent( + f"""\ + + b + 2.690000 + + + mean + 2.650 + + + mean + 2.6500 + + + + """ + ) + assert expected in result + + +def test_concat_combined(): + def html_lines(foot_prefix: str): + assert foot_prefix.endswith("_") or foot_prefix == "" + fp = foot_prefix + return indent( + dedent( + f"""\ + + a + 2.610000 + + + b + 2.690000 + + """ + ), + prefix=" " * 4, + ) + + df = DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"]) + s1 = df.style.highlight_max(color="red") + s2 = df.style.highlight_max(color="green") + s3 = df.style.highlight_max(color="blue") + s4 = df.style.highlight_max(color="yellow") + + result = s1.concat(s2).concat(s3.concat(s4)).set_uuid("X").to_html() + expected_css = dedent( + """\ + + """ + ) + expected_table = ( + dedent( + """\ + + + + + + + + + """ + ) + + html_lines("") + + html_lines("foot0_") + + html_lines("foot1_") + + html_lines("foot1_foot0_") + + dedent( + """\ + +
     A
    + """ + ) + ) + assert expected_css + expected_table == result diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py new file mode 100644 index 00000000..52fd5355 --- /dev/null +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -0,0 +1,300 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + Series, +) + +pytest.importorskip("matplotlib") +pytest.importorskip("jinja2") + +import matplotlib as mpl + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +@pytest.fixture +def df_blank(): + return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + +@pytest.fixture +def styler_blank(df_blank): + return Styler(df_blank, uuid_len=0) + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_function_gradient(styler, f): + for c_map in [None, "YlOrRd"]: + result = getattr(styler, f)(cmap=c_map)._compute().ctx + assert all("#" in x[0][1] for x in result.values()) + assert result[(0, 0)] == result[(0, 1)] + assert result[(1, 0)] == result[(1, 1)] + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_color(styler, f): + result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx + if f == "background_gradient": + assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")] + elif f == "text_gradient": + assert result[(1, 0)] == [("color", "#fff7fb")] + + +@pytest.mark.parametrize( + "axis, expected", + [ + (0, ["low", "low", "high", "high"]), + (1, ["low", "high", "low", "high"]), + (None, ["low", "mid", "mid", "high"]), + ], +) +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_axis(styler, axis, expected, f): + if f == "background_gradient": + colors = { + "low": [("background-color", "#f7fbff"), ("color", "#000000")], + "mid": [("background-color", "#abd0e6"), ("color", "#000000")], + "high": [("background-color", "#08306b"), ("color", "#f1f1f1")], + } + elif f == "text_gradient": + colors = { + "low": [("color", "#f7fbff")], + "mid": [("color", "#abd0e6")], + "high": [("color", "#08306b")], + } + result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx + for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]): + assert result[cell] == colors[expected[i]] + + +@pytest.mark.parametrize( + "cmap, expected", + [ + ( + "PuBu", + { + (4, 5): [("background-color", "#86b0d3"), ("color", "#000000")], + (4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")], + }, + ), + ( + "YlOrRd", + { + (4, 8): [("background-color", "#fd913e"), ("color", "#000000")], + (4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")], + }, + ), + ( + None, + { + (7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")], + (7, 1): [("background-color", "#4cc26c"), ("color", "#000000")], + }, + ), + ], +) +def test_text_color_threshold(cmap, expected): + # GH 39888 + df = DataFrame(np.arange(100).reshape(10, 10)) + result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx + for k in expected.keys(): + assert result[k] == expected[k] + + +def test_background_gradient_vmin_vmax(): + # GH 12145 + df = DataFrame(range(5)) + ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx + assert ctx[(0, 0)] == ctx[(1, 0)] + assert ctx[(4, 0)] == ctx[(3, 0)] + + +def test_background_gradient_int64(): + # GH 28869 + df1 = Series(range(3)).to_frame() + df2 = Series(range(3), dtype="Int64").to_frame() + ctx1 = df1.style.background_gradient()._compute().ctx + ctx2 = df2.style.background_gradient()._compute().ctx + assert ctx2[(0, 0)] == ctx1[(0, 0)] + assert ctx2[(1, 0)] == ctx1[(1, 0)] + assert ctx2[(2, 0)] == ctx1[(2, 0)] + + +@pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], +) +def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected): + # tests when gmap is given as a sequence and converted to ndarray + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] +) +def test_background_gradient_gmap_array_raises(gmap, axis): + # test when gmap as converted ndarray is bad shape + df = DataFrame([[0, 0, 0], [0, 0, 0]]) + msg = "supplied 'gmap' is not correct shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + +@pytest.mark.parametrize( + "gmap", + [ + DataFrame( # reverse the columns + [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"] + ), + DataFrame( # reverse the index + [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"] + ), + DataFrame( # reverse the index and columns + [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"] + ), + DataFrame( # add unnecessary columns + [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"] + ), + DataFrame( # add unnecessary index + [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"] + ), + ], +) +@pytest.mark.parametrize( + "subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to + [ + (None, [[1, 2], [2, 1]]), + (["A"], [[1], [2]]), # slice only column "A" in data and gmap + (["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data + (IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap + (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data + ], +) +def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap): + # test gmap given as DataFrame that it aligns to the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset) + result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset) + assert expected._compute().ctx == result._compute().ctx + + +@pytest.mark.parametrize( + "gmap, axis, exp_gmap", + [ + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx + (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col + ], +) +def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap): + # test gmap given as Series that it aligns to the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute() + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute() + assert expected.ctx == result.ctx + + +@pytest.mark.parametrize( + "gmap, axis", + [ + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), + ], +) +def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis): + # test giving a gmap in DataFrame but with wrong axis + msg = "'gmap' is a DataFrame but underlying data for operations is a Series" + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=axis)._compute() + + +def test_background_gradient_gmap_wrong_series(styler_blank): + # test giving a gmap in Series form but with wrong axis + msg = "'gmap' is a Series but underlying data for operations is a DataFrame" + gmap = Series([1, 2], index=["X", "Y"]) + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=None)._compute() + + +@pytest.mark.parametrize("cmap", ["PuBu", mpl.cm.get_cmap("PuBu")]) +def test_bar_colormap(cmap): + data = DataFrame([[1, 2], [3, 4]]) + ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx + pubu_colors = { + (0, 0): "#d0d1e6", + (1, 0): "#056faf", + (0, 1): "#73a9cf", + (1, 1): "#023858", + } + for k, v in pubu_colors.items(): + assert v in ctx[k][1][1] + + +def test_bar_color_raises(df): + msg = "`color` must be string or list or tuple of 2 strings" + with pytest.raises(ValueError, match=msg): + df.style.bar(color={"a", "b"}).to_html() + with pytest.raises(ValueError, match=msg): + df.style.bar(color=["a", "b", "c"]).to_html() + + msg = "`color` and `cmap` cannot both be given" + with pytest.raises(ValueError, match=msg): + df.style.bar(color="something", cmap="something else").to_html() + + +@pytest.mark.parametrize( + "plot_method", + ["scatter", "hexbin"], +) +def test_pass_colormap_instance(df, plot_method): + # https://github.com/pandas-dev/pandas/issues/49374 + cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]]) + df["c"] = df.A + df.B + kwargs = dict(x="A", y="B", c="c", colormap=cmap) + if plot_method == "hexbin": + kwargs["C"] = kwargs.pop("c") + getattr(df.plot, plot_method)(**kwargs) diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py new file mode 100644 index 00000000..b719bf33 --- /dev/null +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -0,0 +1,140 @@ +from textwrap import dedent + +import pytest + +from pandas import ( + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + dtype=float, + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_format_non_unique(df): + # GH 41269 + + # test dict + html = df.style.format({"d": "{:.1f}"}).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<"]: + assert val in html + for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + # test subset + html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: + assert val in html + for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + +@pytest.mark.parametrize("func", ["apply", "applymap"]) +def test_apply_applymap_non_unique_raises(df, func): + # GH 41269 + if func == "apply": + op = lambda s: ["color: red;"] * len(s) + else: + op = lambda v: "color: red;" + + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + getattr(df.style, func)(op)._compute() + + +def test_table_styles_dict_non_unique_index(styler): + styles = styler.set_table_styles( + {"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 + ).table_styles + assert styles == [ + {"selector": "td.row1", "props": [("a", "v")]}, + {"selector": "td.row2", "props": [("a", "v")]}, + ] + + +def test_table_styles_dict_non_unique_columns(styler): + styles = styler.set_table_styles( + {"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 + ).table_styles + assert styles == [ + {"selector": "td.col1", "props": [("a", "v")]}, + {"selector": "td.col2", "props": [("a", "v")]}, + ] + + +def test_tooltips_non_unique_raises(styler): + # ttips has unique keys + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_tooltips(ttips=ttips) # OK + + # ttips has non-unique columns + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + # ttips has non-unique index + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + +def test_set_td_classes_non_unique_raises(styler): + # classes has unique keys + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_td_classes(classes=classes) # OK + + # classes has non-unique columns + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + # classes has non-unique index + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + +def test_hide_columns_non_unique(styler): + ctx = styler.hide(["d"], axis="columns")._translate(True, True) + + assert ctx["head"][0][1]["display_value"] == "c" + assert ctx["head"][0][1]["is_visible"] is True + + assert ctx["head"][0][2]["display_value"] == "d" + assert ctx["head"][0][2]["is_visible"] is False + + assert ctx["head"][0][3]["display_value"] == "d" + assert ctx["head"][0][3]["is_visible"] is False + + assert ctx["body"][0][1]["is_visible"] is True + assert ctx["body"][0][2]["is_visible"] is False + assert ctx["body"][0][3]["is_visible"] is False + + +def test_latex_non_unique(styler): + result = styler.to_latex() + assert result == dedent( + """\ + \\begin{tabular}{lrrr} + & c & d & d \\\\ + i & 1.000000 & 2.000000 & 3.000000 \\\\ + j & 4.000000 & 5.000000 & 6.000000 \\\\ + j & 7.000000 & 8.000000 & 9.000000 \\\\ + \\end{tabular} + """ + ) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py new file mode 100644 index 00000000..192fec04 --- /dev/null +++ b/pandas/tests/io/formats/style/test_style.py @@ -0,0 +1,1582 @@ +import copy +import re +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + Series, + option_context, +) +import pandas._testing as tm + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import ( # isort:skip + Styler, +) +from pandas.io.formats.style_render import ( + _get_level_lengths, + _get_trimming_maximums, + maybe_convert_css_to_tuples, + non_reducing_slice, +) + + +@pytest.fixture +def mi_df(): + return DataFrame( + [[1, 2], [3, 4]], + index=MultiIndex.from_product([["i0"], ["i1_a", "i1_b"]]), + columns=MultiIndex.from_product([["c0"], ["c1_a", "c1_b"]]), + dtype=int, + ) + + +@pytest.fixture +def mi_styler(mi_df): + return Styler(mi_df, uuid_len=0) + + +@pytest.fixture +def mi_styler_comp(mi_styler): + # comprehensively add features to mi_styler + mi_styler = mi_styler._copy(deepcopy=True) + mi_styler.css = {**mi_styler.css, **{"row": "ROW", "col": "COL"}} + mi_styler.uuid_len = 5 + mi_styler.uuid = "abcde" + mi_styler.set_caption("capt") + mi_styler.set_table_styles([{"selector": "a", "props": "a:v;"}]) + mi_styler.hide(axis="columns") + mi_styler.hide([("c0", "c1_a")], axis="columns", names=True) + mi_styler.hide(axis="index") + mi_styler.hide([("i0", "i1_a")], axis="index", names=True) + mi_styler.set_table_attributes('class="box"') + other = mi_styler.data.agg(["mean"]) + other.index = MultiIndex.from_product([[""], other.index]) + mi_styler.concat(other.style) + mi_styler.format(na_rep="MISSING", precision=3) + mi_styler.format_index(precision=2, axis=0) + mi_styler.format_index(precision=4, axis=1) + mi_styler.highlight_max(axis=None) + mi_styler.applymap_index(lambda x: "color: white;", axis=0) + mi_styler.applymap_index(lambda x: "color: black;", axis=1) + mi_styler.set_td_classes( + DataFrame( + [["a", "b"], ["a", "c"]], index=mi_styler.index, columns=mi_styler.columns + ) + ) + mi_styler.set_tooltips( + DataFrame( + [["a2", "b2"], ["a2", "c2"]], + index=mi_styler.index, + columns=mi_styler.columns, + ) + ) + return mi_styler + + +@pytest.fixture +def blank_value(): + return " " + + +@pytest.fixture +def df(): + np.random.seed(24) + df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + return df + + +@pytest.fixture +def styler(df): + np.random.seed(24) + df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + return Styler(df) + + +@pytest.mark.parametrize( + "sparse_columns, exp_cols", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'colspan="2"', "value": "c0"}, + {"is_visible": False, "attributes": "", "value": "c0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "c0"}, + {"is_visible": True, "attributes": "", "value": "c0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_columns(mi_styler, sparse_columns, exp_cols): + exp_l1_c0 = {"is_visible": True, "attributes": "", "display_value": "c1_a"} + exp_l1_c1 = {"is_visible": True, "attributes": "", "display_value": "c1_b"} + + ctx = mi_styler._translate(True, sparse_columns) + + assert exp_cols[0].items() <= ctx["head"][0][2].items() + assert exp_cols[1].items() <= ctx["head"][0][3].items() + assert exp_l1_c0.items() <= ctx["head"][1][2].items() + assert exp_l1_c1.items() <= ctx["head"][1][3].items() + + +@pytest.mark.parametrize( + "sparse_index, exp_rows", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'rowspan="2"', "value": "i0"}, + {"is_visible": False, "attributes": "", "value": "i0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "i0"}, + {"is_visible": True, "attributes": "", "value": "i0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_index(mi_styler, sparse_index, exp_rows): + exp_l1_r0 = {"is_visible": True, "attributes": "", "display_value": "i1_a"} + exp_l1_r1 = {"is_visible": True, "attributes": "", "display_value": "i1_b"} + + ctx = mi_styler._translate(sparse_index, True) + + assert exp_rows[0].items() <= ctx["body"][0][0].items() + assert exp_rows[1].items() <= ctx["body"][1][0].items() + assert exp_l1_r0.items() <= ctx["body"][0][1].items() + assert exp_l1_r1.items() <= ctx["body"][1][1].items() + + +def test_mi_styler_sparsify_options(mi_styler): + with option_context("styler.sparse.index", False): + html1 = mi_styler.to_html() + with option_context("styler.sparse.index", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + with option_context("styler.sparse.columns", False): + html1 = mi_styler.to_html() + with option_context("styler.sparse.columns", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + +@pytest.mark.parametrize( + "rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn", + [ + (100, 100, 100, None, None, 12, 6), # reduce to (12, 6) < 100 elements + (1000, 3, 750, None, None, 250, 3), # dynamically reduce rows to 250, keep cols + (4, 1000, 500, None, None, 4, 125), # dynamically reduce cols to 125, keep rows + (1000, 3, 750, 10, None, 10, 3), # overwrite above dynamics with max_row + (4, 1000, 500, None, 5, 4, 5), # overwrite above dynamics with max_col + (100, 100, 700, 50, 50, 25, 25), # rows cols below given maxes so < 700 elmts + ], +) +def test_trimming_maximum(rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn): + rn, cn = _get_trimming_maximums( + rn, cn, max_els, max_rows, max_cols, scaling_factor=0.5 + ) + assert (rn, cn) == (exp_rn, exp_cn) + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_rows", 3), + ], +) +def test_render_trimming_rows(option, val): + # test auto and specific trimming of rows + df = DataFrame(np.arange(120).reshape(60, 2)) + with option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 3 # index + 2 data cols + assert len(ctx["body"]) == 4 # 3 data rows + trimming row + assert len(ctx["body"][0]) == 3 # index + 2 data cols + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_columns", 2), + ], +) +def test_render_trimming_cols(option, val): + # test auto and specific trimming of cols + df = DataFrame(np.arange(30).reshape(3, 10)) + with option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming col + assert len(ctx["body"]) == 3 # 3 data rows + assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming col + + +def test_render_trimming_mi(): + midx = MultiIndex.from_product([[1, 2], [1, 2, 3]]) + df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx) + with option_context("styler.render.max_elements", 4): + ctx = df.style._translate(True, True) + + assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row + assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items() + assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items() + assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items() + assert len(ctx["body"]) == 3 # 2 data rows + trimming row + + +def test_render_empty_mi(): + # GH 43305 + df = DataFrame(index=MultiIndex.from_product([["A"], [0, 1]], names=[None, "one"])) + expected = dedent( + """\ + > + + +   + one + + + """ + ) + assert expected in df.style.to_html() + + +@pytest.mark.parametrize("comprehensive", [True, False]) +@pytest.mark.parametrize("render", [True, False]) +@pytest.mark.parametrize("deepcopy", [True, False]) +def test_copy(comprehensive, render, deepcopy, mi_styler, mi_styler_comp): + styler = mi_styler_comp if comprehensive else mi_styler + styler.uuid_len = 5 + + s2 = copy.deepcopy(styler) if deepcopy else copy.copy(styler) # make copy and check + assert s2 is not styler + + if render: + styler.to_html() + + excl = [ + "na_rep", # deprecated + "precision", # deprecated + "cellstyle_map", # render time vars.. + "cellstyle_map_columns", + "cellstyle_map_index", + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + if not deepcopy: # check memory locations are equal for all included attributes + for attr in [a for a in styler.__dict__ if (not callable(a) and a not in excl)]: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: # check memory locations are different for nested or mutable vars + shallow = [ + "data", + "columns", + "index", + "uuid_len", + "uuid", + "caption", + "cell_ids", + "hide_index_", + "hide_columns_", + "hide_index_names", + "hide_column_names", + "table_attributes", + ] + for attr in shallow: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + + for attr in [ + a + for a in styler.__dict__ + if (not callable(a) and a not in excl and a not in shallow) + ]: + if getattr(s2, attr) is None: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: + assert id(getattr(s2, attr)) != id(getattr(styler, attr)) + + +def test_clear(mi_styler_comp): + # NOTE: if this test fails for new features then 'mi_styler_comp' should be updated + # to ensure proper testing of the 'copy', 'clear', 'export' methods with new feature + # GH 40675 + styler = mi_styler_comp + styler._compute() # execute applied methods + + clean_copy = Styler(styler.data, uuid=styler.uuid) + + excl = [ + "data", + "index", + "columns", + "uuid", + "uuid_len", # uuid is set to be the same on styler and clean_copy + "cell_ids", + "cellstyle_map", # execution time only + "cellstyle_map_columns", # execution time only + "cellstyle_map_index", # execution time only + "precision", # deprecated + "na_rep", # deprecated + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + # tests vars are not same vals on obj and clean copy before clear (except for excl) + for attr in [a for a in styler.__dict__ if not (callable(a) or a in excl)]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + if hasattr(res, "__iter__") and len(res) > 0: + assert not all(res) # some element in iterable differs + elif hasattr(res, "__iter__") and len(res) == 0: + pass # empty array + else: + assert not res # explicit var differs + + # test vars have same vales on obj and clean copy after clearing + styler.clear() + for attr in [a for a in styler.__dict__ if not (callable(a))]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + assert all(res) if hasattr(res, "__iter__") else res + + +def test_export(mi_styler_comp, mi_styler): + exp_attrs = [ + "_todo", + "hide_index_", + "hide_index_names", + "hide_columns_", + "hide_column_names", + "table_attributes", + "table_styles", + "css", + ] + for attr in exp_attrs: + check = getattr(mi_styler, attr) == getattr(mi_styler_comp, attr) + assert not ( + all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + ) + + export = mi_styler_comp.export() + used = mi_styler.use(export) + for attr in exp_attrs: + check = getattr(used, attr) == getattr(mi_styler_comp, attr) + assert all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + + used.to_html() + + +def test_hide_raises(mi_styler): + msg = "`subset` and `level` cannot be passed simultaneously" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", subset="something", level="something else") + + msg = "`level` must be of type `int`, `str` or list of such" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", level={"bad": 1, "type": 2}) + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +def test_hide_index_level(mi_styler, level): + mi_styler.index.names, mi_styler.columns.names = ["zero", "one"], ["zero", "one"] + ctx = mi_styler.hide(axis="index", level=level)._translate(False, True) + assert len(ctx["head"][0]) == 3 + assert len(ctx["head"][1]) == 3 + assert len(ctx["head"][2]) == 4 + assert ctx["head"][2][0]["is_visible"] + assert not ctx["head"][2][1]["is_visible"] + + assert ctx["body"][0][0]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] + assert ctx["body"][1][0]["is_visible"] + assert not ctx["body"][1][1]["is_visible"] + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +@pytest.mark.parametrize("names", [True, False]) +def test_hide_columns_level(mi_styler, level, names): + mi_styler.columns.names = ["zero", "one"] + if names: + mi_styler.index.names = ["zero", "one"] + ctx = mi_styler.hide(axis="columns", level=level)._translate(True, False) + assert len(ctx["head"]) == (2 if names else 1) + + +@pytest.mark.parametrize("method", ["applymap", "apply"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header(method, axis): + # GH 41893 + df = DataFrame({"A": [0, 0], "B": [1, 1]}, index=["C", "D"]) + func = { + "apply": lambda s: ["attr: val" if ("A" in v or "C" in v) else "" for v in s], + "applymap": lambda v: "attr: val" if ("A" in v or "C" in v) else "", + } + + # test execution added to todo + result = getattr(df.style, f"{method}_index")(func[method], axis=axis) + assert len(result._todo) == 1 + assert len(getattr(result, f"ctx_{axis}")) == 0 + + # test ctx object on compute + result._compute() + expected = { + (0, 0): [("attr", "val")], + } + assert getattr(result, f"ctx_{axis}") == expected + + +@pytest.mark.parametrize("method", ["apply", "applymap"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header_mi(mi_styler, method, axis): + # GH 41893 + func = { + "apply": lambda s: ["attr: val;" if "b" in v else "" for v in s], + "applymap": lambda v: "attr: val" if "b" in v else "", + } + result = getattr(mi_styler, f"{method}_index")(func[method], axis=axis)._compute() + expected = {(1, 1): [("attr", "val")]} + assert getattr(result, f"ctx_{axis}") == expected + + +def test_apply_map_header_raises(mi_styler): + # GH 41893 + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + mi_styler.applymap_index(lambda v: "attr: val;", axis="bad")._compute() + + +class TestStyler: + def test_init_non_pandas(self): + msg = "``data`` must be a Series or DataFrame" + with pytest.raises(TypeError, match=msg): + Styler([1, 2, 3]) + + def test_init_series(self): + result = Styler(Series([1, 2])) + assert result.data.ndim == 2 + + def test_repr_html_ok(self, styler): + styler._repr_html_() + + def test_repr_html_mathjax(self, styler): + # gh-19824 / 41395 + assert "tex2jax_ignore" not in styler._repr_html_() + + with option_context("styler.html.mathjax", False): + assert "tex2jax_ignore" in styler._repr_html_() + + def test_update_ctx(self, styler): + styler._update_ctx(DataFrame({"A": ["color: red", "color: blue"]})) + expected = {(0, 0): [("color", "red")], (1, 0): [("color", "blue")]} + assert styler.ctx == expected + + def test_update_ctx_flatten_multi_and_trailing_semi(self, styler): + attrs = DataFrame({"A": ["color: red; foo: bar", "color:blue ; foo: baz;"]}) + styler._update_ctx(attrs) + expected = { + (0, 0): [("color", "red"), ("foo", "bar")], + (1, 0): [("color", "blue"), ("foo", "baz")], + } + assert styler.ctx == expected + + def test_render(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: Series(["color: red", "color: blue"], name=x.name) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_multiple_render(self, df): + # GH 39396 + s = Styler(df, uuid_len=0).applymap(lambda x: "color: red;", subset=["A"]) + s.to_html() # do 2 renders to ensure css styles not duplicated + assert ( + '" in s.to_html() + ) + + def test_render_empty_dfs(self): + empty_df = DataFrame() + es = Styler(empty_df) + es.to_html() + # An index but no columns + DataFrame(columns=["a"]).style.to_html() + # A column but no index + DataFrame(index=["a"]).style.to_html() + # No IndexError raised? + + def test_render_double(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: Series( + ["color: red; border: 1px", "color: blue; border: 2px"], name=x.name + ) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_set_properties(self): + df = DataFrame({"A": [0, 1]}) + result = df.style.set_properties(color="white", size="10px")._compute().ctx + # order is deterministic + v = [("color", "white"), ("size", "10px")] + expected = {(0, 0): v, (1, 0): v} + assert result.keys() == expected.keys() + for v1, v2 in zip(result.values(), expected.values()): + assert sorted(v1) == sorted(v2) + + def test_set_properties_subset(self): + df = DataFrame({"A": [0, 1]}) + result = ( + df.style.set_properties(subset=IndexSlice[0, "A"], color="white") + ._compute() + .ctx + ) + expected = {(0, 0): [("color", "white")]} + assert result == expected + + def test_empty_index_name_doesnt_display(self, blank_value): + # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.style._translate(True, True) + assert len(result["head"]) == 1 + expected = { + "class": "blank level0", + "type": "th", + "value": blank_value, + "is_visible": True, + "display_value": blank_value, + } + assert expected.items() <= result["head"][0][0].items() + + def test_index_name(self): + # https://github.com/pandas-dev/pandas/issues/11655 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.set_index("A").style._translate(True, True) + expected = { + "class": "index_name level0", + "type": "th", + "value": "A", + "is_visible": True, + "display_value": "A", + } + assert expected.items() <= result["head"][1][0].items() + + def test_numeric_columns(self): + # https://github.com/pandas-dev/pandas/issues/12125 + # smoke test for _translate + df = DataFrame({0: [1, 2, 3]}) + df.style._translate(True, True) + + def test_apply_axis(self): + df = DataFrame({"A": [0, 0], "B": [1, 1]}) + f = lambda x: [f"val: {x.max()}" for v in x] + result = df.style.apply(f, axis=1) + assert len(result._todo) == 1 + assert len(result.ctx) == 0 + result._compute() + expected = { + (0, 0): [("val", "1")], + (0, 1): [("val", "1")], + (1, 0): [("val", "1")], + (1, 1): [("val", "1")], + } + assert result.ctx == expected + + result = df.style.apply(f, axis=0) + expected = { + (0, 0): [("val", "0")], + (0, 1): [("val", "1")], + (1, 0): [("val", "0")], + (1, 1): [("val", "1")], + } + result._compute() + assert result.ctx == expected + result = df.style.apply(f) # default + result._compute() + assert result.ctx == expected + + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_series_return(self, axis): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + + # test Series return where len(Series) < df.index or df.columns but labels OK + func = lambda s: Series(["color: red;"], index=["Y"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + + # test Series return where labels align but different order + func = lambda s: Series(["color: red;", "color: blue;"], index=["Y", "X"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(0, 0)] == [("color", "blue")] + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + assert result[(axis, 1 - axis)] == [("color", "blue")] + + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("columns", [False, True]) + def test_apply_dataframe_return(self, index, columns): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + idxs = ["X", "Y"] if index else ["Y"] + cols = ["X", "Y"] if columns else ["Y"] + df_styles = DataFrame("color: red;", index=idxs, columns=cols) + result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx + + assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present + assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index + assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols + assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], + ) + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_subset(self, slice_, axis, df): + def h(x, foo="bar"): + return Series(f"color: {foo}", index=x.index, name=x.name) + + result = df.style.apply(h, axis=axis, subset=slice_, foo="baz")._compute().ctx + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], + ) + def test_applymap_subset(self, slice_, df): + result = df.style.applymap(lambda x: "color:baz;", subset=slice_)._compute().ctx + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:, IndexSlice["x", "A"]], + IndexSlice[:, IndexSlice[:, "A"]], + IndexSlice[:, IndexSlice[:, ["A", "C"]]], # missing col element + IndexSlice[IndexSlice["a", 1], :], + IndexSlice[IndexSlice[:, 1], :], + IndexSlice[IndexSlice[:, [1, 3]], :], # missing row element + IndexSlice[:, ("x", "A")], + IndexSlice[("a", 1), :], + ], + ) + def test_applymap_subset_multiindex(self, slice_): + # GH 19861 + # edited for GH 33562 + warn = None + msg = "indexing on a MultiIndex with a nested sequence of labels" + if ( + isinstance(slice_[-1], tuple) + and isinstance(slice_[-1][-1], list) + and "C" in slice_[-1][-1] + ): + warn = FutureWarning + elif ( + isinstance(slice_[0], tuple) + and isinstance(slice_[0][1], list) + and 3 in slice_[0][1] + ): + warn = FutureWarning + + idx = MultiIndex.from_product([["a", "b"], [1, 2]]) + col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) + df = DataFrame(np.random.rand(4, 4), columns=col, index=idx) + + with tm.assert_produces_warning(warn, match=msg): + df.style.applymap(lambda x: "color: red;", subset=slice_).to_html() + + def test_applymap_subset_multiindex_code(self): + # https://github.com/pandas-dev/pandas/issues/25858 + # Checks styler.applymap works with multindex when codes are provided + codes = np.array([[0, 0, 1, 1], [0, 1, 0, 1]]) + columns = MultiIndex( + levels=[["a", "b"], ["%", "#"]], codes=codes, names=["", ""] + ) + df = DataFrame( + [[1, -1, 1, 1], [-1, 1, 1, 1]], index=["hello", "world"], columns=columns + ) + pct_subset = IndexSlice[:, IndexSlice[:, "%":"%"]] + + def color_negative_red(val): + color = "red" if val < 0 else "black" + return f"color: {color}" + + df.loc[pct_subset] + df.style.applymap(color_negative_red, subset=pct_subset) + + @pytest.mark.parametrize( + "stylefunc", ["background_gradient", "bar", "text_gradient"] + ) + def test_subset_for_boolean_cols(self, stylefunc): + # GH47838 + df = DataFrame( + [ + [1, 2], + [3, 4], + ], + columns=[False, True], + ) + styled = getattr(df.style, stylefunc)() + styled._compute() + assert set(styled.ctx) == {(0, 0), (0, 1), (1, 0), (1, 1)} + + def test_empty(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("", "")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0"]}, + {"props": [("", "")], "selectors": ["row1_col0"]}, + ] + assert result == expected + + def test_duplicate(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("color", "red")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0", "row1_col0"]} + ] + assert result == expected + + def test_init_with_na_rep(self): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = Styler(df, na_rep="NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + def test_caption(self, df): + styler = Styler(df, caption="foo") + result = styler.to_html() + assert all(["caption" in result, "foo" in result]) + + styler = df.style + result = styler.set_caption("baz") + assert styler is result + assert styler.caption == "baz" + + def test_uuid(self, df): + styler = Styler(df, uuid="abc123") + result = styler.to_html() + assert "abc123" in result + + styler = df.style + result = styler.set_uuid("aaa") + assert result is styler + assert result.uuid == "aaa" + + def test_unique_id(self): + # See https://github.com/pandas-dev/pandas/issues/16780 + df = DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) + result = df.style.to_html(uuid="test") + assert "test" in result + ids = re.findall('id="(.*?)"', result) + assert np.unique(ids).size == len(ids) + + def test_table_styles(self, df): + style = [{"selector": "th", "props": [("foo", "bar")]}] # default format + styler = Styler(df, table_styles=style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + styler = df.style + result = styler.set_table_styles(style) + assert styler is result + assert styler.table_styles == style + + # GH 39563 + style = [{"selector": "th", "props": "foo:bar;"}] # css string format + styler = df.style.set_table_styles(style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + def test_table_styles_multiple(self, df): + ctx = df.style.set_table_styles( + [ + {"selector": "th,td", "props": "color:red;"}, + {"selector": "tr", "props": "color:green;"}, + ] + )._translate(True, True)["table_styles"] + assert ctx == [ + {"selector": "th", "props": [("color", "red")]}, + {"selector": "td", "props": [("color", "red")]}, + {"selector": "tr", "props": [("color", "green")]}, + ] + + def test_table_styles_dict_multiple_selectors(self, df): + # GH 44011 + result = df.style.set_table_styles( + { + "B": [ + {"selector": "th,td", "props": [("border-left", "2px solid black")]} + ] + } + )._translate(True, True)["table_styles"] + + expected = [ + {"selector": "th.col1", "props": [("border-left", "2px solid black")]}, + {"selector": "td.col1", "props": [("border-left", "2px solid black")]}, + ] + + assert result == expected + + def test_maybe_convert_css_to_tuples(self): + expected = [("a", "b"), ("c", "d e")] + assert maybe_convert_css_to_tuples("a:b;c:d e;") == expected + assert maybe_convert_css_to_tuples("a: b ;c: d e ") == expected + expected = [] + assert maybe_convert_css_to_tuples("") == expected + + def test_maybe_convert_css_to_tuples_err(self): + msg = "Styles supplied as string must follow CSS rule formats" + with pytest.raises(ValueError, match=msg): + maybe_convert_css_to_tuples("err") + + def test_table_attributes(self, df): + attributes = 'class="foo" data-bar' + styler = Styler(df, table_attributes=attributes) + result = styler.to_html() + assert 'class="foo" data-bar' in result + + result = df.style.set_table_attributes(attributes).to_html() + assert 'class="foo" data-bar' in result + + def test_apply_none(self): + def f(x): + return DataFrame( + np.where(x == x.max(), "color: red", ""), + index=x.index, + columns=x.columns, + ) + + result = DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + + def test_trim(self, df): + result = df.style.to_html() # trim=True + assert result.count("#") == 0 + + result = df.style.highlight_max().to_html() + assert result.count("#") == len(df.columns) + + def test_export(self, df, styler): + f = lambda x: "color: red" if x > 0 else "color: blue" + g = lambda x, z: f"color: {z}" if x > 0 else f"color: {z}" + style1 = styler + style1.applymap(f).applymap(g, z="b").highlight_max()._compute() # = render + result = style1.export() + style2 = df.style + style2.use(result) + assert style1._todo == style2._todo + style2.to_html() + + def test_bad_apply_shape(self): + df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) + + msg = "resulted in the apply method collapsing to a Series." + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: "x") + + msg = "created invalid {} labels" + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: [""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: ["", "", "", ""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: Series(["a:v;", ""], index=["A", "C"]), axis=0) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: ["", "", ""], axis=1) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: Series(["a:v;", ""], index=["X", "Z"]), axis=1) + + msg = "returned ndarray with wrong shape" + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: np.array([[""], [""]]), axis=None) + + def test_apply_bad_return(self): + def f(x): + return "" + + df = DataFrame([[1, 2], [3, 4]]) + msg = ( + "must return a DataFrame or ndarray when passed to `Styler.apply` " + "with axis=None" + ) + with pytest.raises(TypeError, match=msg): + df.style._apply(f, axis=None) + + @pytest.mark.parametrize("axis", ["index", "columns"]) + def test_apply_bad_labels(self, axis): + def f(x): + return DataFrame(**{axis: ["bad", "labels"]}) + + df = DataFrame([[1, 2], [3, 4]]) + msg = f"created invalid {axis} labels." + with pytest.raises(ValueError, match=msg): + df.style._apply(f, axis=None) + + def test_get_level_lengths(self): + index = MultiIndex.from_product([["a", "b"], [0, 1, 2]]) + expected = { + (0, 0): 3, + (0, 3): 3, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_get_level_lengths_un_sorted(self): + index = MultiIndex.from_arrays([[1, 1, 2, 1], ["a", "b", "b", "d"]]) + expected = { + (0, 0): 2, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_mi_sparse_index_names(self, blank_value): + # Test the class names and displayed value are correct on rendering MI names + df = DataFrame( + {"A": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + result = df.style._translate(True, True) + head = result["head"][1] + expected = [ + { + "class": "index_name level0", + "display_value": "idx_level_0", + "is_visible": True, + }, + { + "class": "index_name level1", + "display_value": "idx_level_1", + "is_visible": True, + }, + { + "class": "blank col0", + "display_value": blank_value, + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_mi_sparse_column_names(self, blank_value): + df = DataFrame( + np.arange(16).reshape(4, 4), + index=MultiIndex.from_arrays( + [["a", "a", "b", "a"], [0, 1, 1, 2]], + names=["idx_level_0", "idx_level_1"], + ), + columns=MultiIndex.from_arrays( + [["C1", "C1", "C2", "C2"], [1, 0, 1, 0]], names=["colnam_0", "colnam_1"] + ), + ) + result = Styler(df, cell_ids=False)._translate(True, True) + + for level in [0, 1]: + head = result["head"][level] + expected = [ + { + "class": "blank", + "display_value": blank_value, + "is_visible": True, + }, + { + "class": f"index_name level{level}", + "display_value": f"colnam_{level}", + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_hide_column_headers(self, df, styler): + ctx = styler.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 0 # no header entries with an unnamed index + + df.index.name = "some_name" + ctx = df.style.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 1 + # index names still visible, changed in #42101, reverted in 43404 + + def test_hide_single_index(self, df): + # GH 14194 + # single unnamed index + ctx = df.style._translate(True, True) + assert ctx["body"][0][0]["is_visible"] + assert ctx["head"][0][0]["is_visible"] + ctx2 = df.style.hide(axis="index")._translate(True, True) + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["head"][0][0]["is_visible"] + + # single named index + ctx3 = df.set_index("A").style._translate(True, True) + assert ctx3["body"][0][0]["is_visible"] + assert len(ctx3["head"]) == 2 # 2 header levels + assert ctx3["head"][0][0]["is_visible"] + + ctx4 = df.set_index("A").style.hide(axis="index")._translate(True, True) + assert not ctx4["body"][0][0]["is_visible"] + assert len(ctx4["head"]) == 1 # only 1 header levels + assert not ctx4["head"][0][0]["is_visible"] + + def test_hide_multiindex(self): + # GH 14194 + df = DataFrame( + {"A": [1, 2], "B": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + ctx1 = df.style._translate(True, True) + # tests for 'a' and '0' + assert ctx1["body"][0][0]["is_visible"] + assert ctx1["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx1["head"][0]) == 4 # two visible indexes and two data columns + + ctx2 = df.style.hide(axis="index")._translate(True, True) + # tests for 'a' and '0' + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx2["head"][0]) == 3 # one hidden (col name) and two data columns + assert not ctx2["head"][0][0]["is_visible"] + + def test_hide_columns_single_level(self, df): + # GH 14194 + # test hiding single column + ctx = df.style._translate(True, True) + assert ctx["head"][0][1]["is_visible"] + assert ctx["head"][0][1]["display_value"] == "A" + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][0][2]["display_value"] == "B" + assert ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + ctx = df.style.hide("A", axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + # test hiding mulitiple columns + ctx = df.style.hide(["A", "B"], axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["head"][0][2]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert not ctx["body"][1][2]["is_visible"] # col B, row 1 + + def test_hide_columns_index_mult_levels(self): + # GH 14194 + # setup dataframe with multiple column levels and indices + i1 = MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ) + i2 = MultiIndex.from_arrays( + [["b", "b"], [0, 1]], names=["col_level_0", "col_level_1"] + ) + df = DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) + ctx = df.style._translate(True, True) + # column headers + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][1][2]["is_visible"] + assert ctx["head"][1][3]["display_value"] == "1" + # indices + assert ctx["body"][0][0]["is_visible"] + # data + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide top column level, which hides both columns + ctx = df.style.hide("b", axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][0][0]["is_visible"] # index + + # hide first column only + ctx = df.style.hide([("b", 0)], axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert ctx["head"][0][3]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide second column and index + ctx = df.style.hide([("b", 1)], axis=1).hide(axis=0)._translate(True, True) + assert not ctx["body"][0][0]["is_visible"] # index + assert len(ctx["head"][0]) == 3 + assert ctx["head"][0][1]["is_visible"] # b + assert ctx["head"][1][1]["is_visible"] # 0 + assert not ctx["head"][1][2]["is_visible"] # 1 + assert not ctx["body"][1][3]["is_visible"] # 4 + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + + # hide top row level, which hides both rows so body empty + ctx = df.style.hide("a", axis="index")._translate(True, True) + assert ctx["body"] == [] + + # hide first row only + ctx = df.style.hide(("a", 0), axis="index")._translate(True, True) + for i in [0, 1, 2, 3]: + assert "row1" in ctx["body"][0][i]["class"] # row0 not included in body + assert ctx["body"][0][i]["is_visible"] + + def test_pipe(self, df): + def set_caption_from_template(styler, a, b): + return styler.set_caption(f"Dataframe with a = {a} and b = {b}") + + styler = df.style.pipe(set_caption_from_template, "A", b="B") + assert "Dataframe with a = A and b = B" in styler.to_html() + + # Test with an argument that is a (callable, keyword_name) pair. + def f(a, b, styler): + return (a, b, styler) + + styler = df.style + result = styler.pipe((f, "styler"), a=1, b=2) + assert result == (1, 2, styler) + + def test_no_cell_ids(self): + # GH 35588 + # GH 35663 + df = DataFrame(data=[[0]]) + styler = Styler(df, uuid="_", cell_ids=False) + styler.to_html() + s = styler.to_html() # render twice to ensure ctx is not updated + assert s.find('') != -1 + + @pytest.mark.parametrize( + "classes", + [ + DataFrame( + data=[["", "test-class"], [np.nan, None]], + columns=["A", "B"], + index=["a", "b"], + ), + DataFrame(data=[["test-class"]], columns=["B"], index=["a"]), + DataFrame(data=[["test-class", "unused"]], columns=["B", "C"], index=["a"]), + ], + ) + def test_set_data_classes(self, classes): + # GH 36159 + df = DataFrame(data=[[0, 1], [2, 3]], columns=["A", "B"], index=["a", "b"]) + s = Styler(df, uuid_len=0, cell_ids=False).set_td_classes(classes).to_html() + assert '0' in s + assert '1' in s + assert '2' in s + assert '3' in s + # GH 39317 + s = Styler(df, uuid_len=0, cell_ids=True).set_td_classes(classes).to_html() + assert '0' in s + assert '1' in s + assert '2' in s + assert '3' in s + + def test_set_data_classes_reindex(self): + # GH 39317 + df = DataFrame( + data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]], columns=[0, 1, 2], index=[0, 1, 2] + ) + classes = DataFrame( + data=[["mi", "ma"], ["mu", "mo"]], + columns=[0, 2], + index=[0, 2], + ) + s = Styler(df, uuid_len=0).set_td_classes(classes).to_html() + assert '0' in s + assert '2' in s + assert '4' in s + assert '6' in s + assert '8' in s + + def test_chaining_table_styles(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + styler = df.style.set_table_styles( + [{"selector": "", "props": [("background-color", "yellow")]}] + ).set_table_styles( + [{"selector": ".col0", "props": [("background-color", "blue")]}], + overwrite=False, + ) + assert len(styler.table_styles) == 2 + + def test_column_and_row_styling(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + s = Styler(df, uuid_len=0) + s = s.set_table_styles({"A": [{"selector": "", "props": [("color", "blue")]}]}) + assert "#T_ .col0 {\n color: blue;\n}" in s.to_html() + s = s.set_table_styles( + {0: [{"selector": "", "props": [("color", "blue")]}]}, axis=1 + ) + assert "#T_ .row0 {\n color: blue;\n}" in s.to_html() + + @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) + def test_uuid_len(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + s = Styler(df, uuid_len=len_, cell_ids=False).to_html() + strt = s.find('id="T_') + end = s[strt + 6 :].find('"') + if len_ > 32: + assert end == 32 + else: + assert end == len_ + + @pytest.mark.parametrize("len_", [-2, "bad", None]) + def test_uuid_len_raises(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + msg = "``uuid_len`` must be an integer in range \\[0, 32\\]." + with pytest.raises(TypeError, match=msg): + Styler(df, uuid_len=len_, cell_ids=False).to_html() + + @pytest.mark.parametrize( + "slc", + [ + IndexSlice[:, :], + IndexSlice[:, 1], + IndexSlice[1, :], + IndexSlice[[1], [1]], + IndexSlice[1, [1]], + IndexSlice[[1], 1], + IndexSlice[1], + IndexSlice[1, 1], + slice(None, None, None), + [0, 1], + np.array([0, 1]), + Series([0, 1]), + ], + ) + def test_non_reducing_slice(self, slc): + df = DataFrame([[0, 1], [2, 3]]) + + tslice_ = non_reducing_slice(slc) + assert isinstance(df.loc[tslice_], DataFrame) + + @pytest.mark.parametrize("box", [list, Series, np.array]) + def test_list_slice(self, box): + # like dataframe getitem + subset = box(["A"]) + + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) + expected = IndexSlice[:, ["A"]] + + result = non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + def test_non_reducing_slice_on_multiindex(self): + # GH 19861 + dic = { + ("a", "d"): [1, 4], + ("a", "c"): [2, 3], + ("b", "c"): [3, 2], + ("b", "d"): [4, 1], + } + df = DataFrame(dic, index=[0, 1]) + idx = IndexSlice + slice_ = idx[:, idx["b", "d"]] + tslice_ = non_reducing_slice(slice_) + + result = df.loc[tslice_] + expected = DataFrame({("b", "d"): [4, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:, :], + # check cols + IndexSlice[:, IndexSlice[["a"]]], # inferred deeper need list + IndexSlice[:, IndexSlice[["a"], ["c"]]], # inferred deeper need list + IndexSlice[:, IndexSlice["a", "c", :]], + IndexSlice[:, IndexSlice["a", :, "e"]], + IndexSlice[:, IndexSlice[:, "c", "e"]], + IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list + IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # allow missing + IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice + # check rows + IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list + IndexSlice[IndexSlice[["U"], ["W"]], :], # inferred deeper need list + IndexSlice[IndexSlice["U", "W", :], :], + IndexSlice[IndexSlice["U", :, "Y"], :], + IndexSlice[IndexSlice[:, "W", "Y"], :], + IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list + IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing + IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice + # check simultaneous + IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]], + ], + ) + def test_non_reducing_multi_slice_on_multiindex(self, slice_): + # GH 33562 + cols = MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]) + idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) + df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) + + msg = "indexing on a MultiIndex with a nested sequence of labels" + warn = None + for lvl in [0, 1]: + key = slice_[lvl] + if isinstance(key, tuple): + for subkey in key: + if isinstance(subkey, list) and "-" in subkey: + # not present in the index level, ignored, will raise in future + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + expected = df.loc[slice_] + + with tm.assert_produces_warning(warn, match=msg): + result = df.loc[non_reducing_slice(slice_)] + tm.assert_frame_equal(result, expected) + + +def test_hidden_index_names(mi_df): + mi_df.index.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 3 # 2 column index levels + 1 index names row + + mi_styler.hide(axis="index", names=True) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is unparsed + for i in range(4): + assert ctx["body"][0][i]["is_visible"] # 2 index levels + 2 data values visible + + mi_styler.hide(axis="index", level=1) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is still hidden + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][0][1]["is_visible"] is False + + +def test_hidden_column_names(mi_df): + mi_df.columns.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "Lev0" + assert ctx["head"][1][1]["display_value"] == "Lev1" + + mi_styler.hide(names=True, axis="columns") + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == " " + assert ctx["head"][1][1]["display_value"] == " " + + mi_styler.hide(level=0, axis="columns") + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 1 # no index names and only one visible column headers + assert ctx["head"][0][1]["display_value"] == " " + + +@pytest.mark.parametrize("caption", [1, ("a", "b", "c"), (1, "s")]) +def test_caption_raises(mi_styler, caption): + msg = "`caption` must be either a string or 2-tuple of strings." + with pytest.raises(ValueError, match=msg): + mi_styler.set_caption(caption) + + +def test_hiding_headers_over_index_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, index=midx, columns=[0]) + ctx = df.style._translate(False, False) + assert len(ctx["body"]) == 6 + ctx = df.style.hide((1, "a"), axis=0)._translate(False, False) + assert len(ctx["body"]) == 4 + assert "row2" in ctx["body"][0][0]["class"] + + +def test_hiding_headers_over_columns_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, columns=midx, index=[0]) + ctx = df.style._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is True + ctx = df.style.hide((1, "a"), axis="columns")._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is False + + +def test_get_level_lengths_mi_hidden(): + # GH 43464 + index = MultiIndex.from_arrays([[1, 1, 1, 2, 2, 2], ["a", "a", "b", "a", "a", "b"]]) + expected = { + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths( + index, + sparsify=False, + max_index=100, + hidden_elements=[0, 1, 0, 1], # hidden element can repeat if duplicated index + ) + tm.assert_dict_equal(result, expected) + + +def test_row_trimming_hide_index(): + # gh 43703 + df = DataFrame([[1], [2], [3], [4], [5]]) + with option_context("styler.render.max_rows", 2): + ctx = df.style.hide([0, 1], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][1]["display_value"] == val + + +def test_row_trimming_hide_index_mi(): + # gh 44247 + df = DataFrame([[1], [2], [3], [4], [5]]) + df.index = MultiIndex.from_product([[0], [0, 1, 2, 3, 4]]) + with option_context("styler.render.max_rows", 2): + ctx = df.style.hide([(0, 0), (0, 1)], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + + # level 0 index headers (sparsified) + assert {"value": 0, "attributes": 'rowspan="2"', "is_visible": True}.items() <= ctx[ + "body" + ][0][0].items() + assert {"value": 0, "attributes": "", "is_visible": False}.items() <= ctx["body"][ + 1 + ][0].items() + assert {"value": "...", "is_visible": True}.items() <= ctx["body"][2][0].items() + + for r, val in enumerate(["2", "3", "..."]): + assert ctx["body"][r][1]["display_value"] == val # level 1 index headers + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][2]["display_value"] == val # data values + + +def test_col_trimming_hide_columns(): + # gh 44272 + df = DataFrame([[1, 2, 3, 4, 5]]) + with option_context("styler.render.max_columns", 2): + ctx = df.style.hide([0, 1], axis="columns")._translate(True, True) + + assert len(ctx["head"][0]) == 6 # blank, [0, 1 (hidden)], [2 ,3 (visible)], + trim + for c, vals in enumerate([(1, False), (2, True), (3, True), ("...", True)]): + assert ctx["head"][0][c + 2]["value"] == vals[0] + assert ctx["head"][0][c + 2]["is_visible"] == vals[1] + + assert len(ctx["body"][0]) == 6 # index + 2 hidden + 2 visible + trimming col + + +def test_no_empty_apply(mi_styler): + # 45313 + mi_styler.apply(lambda s: ["a:v;"] * 2, subset=[False, False]) + mi_styler._compute() + + +@pytest.mark.parametrize("format", ["html", "latex", "string"]) +def test_output_buffer(mi_styler, format): + # gh 47053 + with tm.ensure_clean(f"delete_me.{format}") as f: + getattr(mi_styler, f"to_{format}")(f) diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py new file mode 100644 index 00000000..1c67d125 --- /dev/null +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -0,0 +1,1087 @@ +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import ( + _parse_latex_cell_styles, + _parse_latex_css_conversion, + _parse_latex_header_span, + _parse_latex_table_styles, + _parse_latex_table_wrapping, +) + + +@pytest.fixture +def df(): + return DataFrame({"A": [0, 1], "B": [-0.61, -1.22], "C": ["ab", "cd"]}) + + +@pytest.fixture +def df_ext(): + return DataFrame( + {"A": [0, 1, 2], "B": [-0.61, -1.22, -2.22], "C": ["ab", "cd", "de"]} + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0, precision=2) + + +def test_minimal_latex_tabular(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_tabular_hrules(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\toprule + & A & B & C \\\\ + \\midrule + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\bottomrule + \\end{tabular} + """ + ) + assert styler.to_latex(hrules=True) == expected + + +def test_tabular_custom_hrules(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":hline"}, + {"selector": "bottomrule", "props": ":otherline"}, + ] + ) # no midrule + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\hline + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\otherline + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_column_format(styler): + # default setting is already tested in `test_latex_minimal_tabular` + styler.set_table_styles([{"selector": "column_format", "props": ":cccc"}]) + + assert "\\begin{tabular}{rrrr}" in styler.to_latex(column_format="rrrr") + styler.set_table_styles([{"selector": "column_format", "props": ":r|r|cc"}]) + assert "\\begin{tabular}{r|r|cc}" in styler.to_latex() + + +def test_siunitx_cols(styler): + expected = dedent( + """\ + \\begin{tabular}{lSSl} + {} & {A} & {B} & {C} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex(siunitx=True) == expected + + +def test_position(styler): + assert "\\begin{table}[h!]" in styler.to_latex(position="h!") + assert "\\end{table}" in styler.to_latex(position="h!") + styler.set_table_styles([{"selector": "position", "props": ":b!"}]) + assert "\\begin{table}[b!]" in styler.to_latex() + assert "\\end{table}" in styler.to_latex() + + +@pytest.mark.parametrize("env", [None, "longtable"]) +def test_label(styler, env): + assert "\n\\label{text}" in styler.to_latex(label="text", environment=env) + styler.set_table_styles([{"selector": "label", "props": ":{more §text}"}]) + assert "\n\\label{more :text}" in styler.to_latex(environment=env) + + +def test_position_float_raises(styler): + msg = "`position_float` should be one of 'raggedright', 'raggedleft', 'centering'," + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="bad_string") + + msg = "`position_float` cannot be used in 'longtable' `environment`" + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="centering", environment="longtable") + + +@pytest.mark.parametrize("label", [(None, ""), ("text", "\\label{text}")]) +@pytest.mark.parametrize("position", [(None, ""), ("h!", "{table}[h!]")]) +@pytest.mark.parametrize("caption", [(None, ""), ("text", "\\caption{text}")]) +@pytest.mark.parametrize("column_format", [(None, ""), ("rcrl", "{tabular}{rcrl}")]) +@pytest.mark.parametrize("position_float", [(None, ""), ("centering", "\\centering")]) +def test_kwargs_combinations( + styler, label, position, caption, column_format, position_float +): + result = styler.to_latex( + label=label[0], + position=position[0], + caption=caption[0], + column_format=column_format[0], + position_float=position_float[0], + ) + assert label[1] in result + assert position[1] in result + assert caption[1] in result + assert column_format[1] in result + assert position_float[1] in result + + +def test_custom_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "mycommand", "props": ":{myoptions}"}, + {"selector": "mycommand2", "props": ":{myoptions2}"}, + ] + ) + expected = dedent( + """\ + \\begin{table} + \\mycommand{myoptions} + \\mycommand2{myoptions2} + """ + ) + assert expected in styler.to_latex() + + +def test_cell_styling(styler): + styler.highlight_max(props="itshape:;Huge:--wrap;") + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & \\itshape {\\Huge -0.61} & ab \\\\ + 1 & \\itshape {\\Huge 1} & -1.22 & \\itshape {\\Huge cd} \\\\ + \\end{tabular} + """ + ) + assert expected == styler.to_latex() + + +def test_multiindex_columns(df): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & \\multicolumn{2}{r}{A} & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex() + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & A & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex(sparse_columns=False) + + +def test_multiindex_row(df_ext): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex() + assert expected == result + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False) + assert expected == result + + +def test_multirow_naive(df_ext): + ridx = MultiIndex.from_tuples([("X", "x"), ("X", "y"), ("Y", "z")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + X & x & 0 & -0.61 & ab \\\\ + & y & 1 & -1.22 & cd \\\\ + Y & z & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="naive") + assert expected == result + + +def test_multiindex_row_and_col(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & \\multicolumn{2}{l}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="b", multicol_align="l") + assert result == expected + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & Z & Z & Y \\\\ + & & a & b & c \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False, sparse_columns=False) + assert result == expected + + +@pytest.mark.parametrize( + "multicol_align, siunitx, header", + [ + ("naive-l", False, " & A & &"), + ("naive-r", False, " & & & A"), + ("naive-l", True, "{} & {A} & {} & {}"), + ("naive-r", True, "{} & {} & {} & {A}"), + ], +) +def test_multicol_naive(df, multicol_align, siunitx, header): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]) + df.columns = ridx + level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}" + col_format = "lrrl" if not siunitx else "lSSl" + expected = dedent( + f"""\ + \\begin{{tabular}}{{{col_format}}} + {header} \\\\ + {level1} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{{tabular}} + """ + ) + styler = df.style.format(precision=2) + result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx) + assert expected == result + + +def test_multi_options(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style.format(precision=2) + + expected = dedent( + """\ + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + """ + ) + result = styler.to_latex() + assert expected in result + + with option_context("styler.latex.multicol_align", "l"): + assert " & & \\multicolumn{2}{l}{Z} & Y \\\\" in styler.to_latex() + + with option_context("styler.latex.multirow_align", "b"): + assert "\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\" in styler.to_latex() + + +def test_multiindex_columns_hidden(): + df = DataFrame([[1, 2, 3, 4]]) + df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)]) + s = df.style + assert "{tabular}{lrrrr}" in s.to_latex() + s.set_table_styles([]) # reset the position command + s.hide([("A", 2)], axis="columns") + assert "{tabular}{lrrr}" in s.to_latex() + + +@pytest.mark.parametrize( + "option, value", + [ + ("styler.sparse.index", True), + ("styler.sparse.index", False), + ("styler.sparse.columns", True), + ("styler.sparse.columns", False), + ], +) +def test_sparse_options(df_ext, option, value): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + latex1 = styler.to_latex() + with option_context(option, value): + latex2 = styler.to_latex() + assert (latex1 == latex2) is value + + +def test_hidden_index(styler): + styler.hide(axis="index") + expected = dedent( + """\ + \\begin{tabular}{rrl} + A & B & C \\\\ + 0 & -0.61 & ab \\\\ + 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +@pytest.mark.parametrize("environment", ["table", "figure*", None]) +def test_comprehensive(df_ext, environment): + # test as many low level features simultaneously as possible + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + stlr = df_ext.style + stlr.set_caption("mycap") + stlr.set_table_styles( + [ + {"selector": "label", "props": ":{fig§item}"}, + {"selector": "position", "props": ":h!"}, + {"selector": "position_float", "props": ":centering"}, + {"selector": "column_format", "props": ":rlrlr"}, + {"selector": "toprule", "props": ":toprule"}, + {"selector": "midrule", "props": ":midrule"}, + {"selector": "bottomrule", "props": ":bottomrule"}, + {"selector": "rowcolors", "props": ":{3}{pink}{}"}, # custom command + ] + ) + stlr.highlight_max(axis=0, props="textbf:--rwrap;cellcolor:[rgb]{1,1,0.6}--rwrap") + stlr.highlight_max(axis=None, props="Huge:--wrap;", subset=[("Z", "a"), ("Z", "b")]) + + expected = ( + """\ +\\begin{table}[h!] +\\centering +\\caption{mycap} +\\label{fig:item} +\\rowcolors{3}{pink}{} +\\begin{tabular}{rlrlr} +\\toprule + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ +\\midrule +\\multirow[c]{2}{*}{A} & a & 0 & \\textbf{\\cellcolor[rgb]{1,1,0.6}{-0.61}} & ab \\\\ + & b & 1 & -1.22 & cd \\\\ +B & c & \\textbf{\\cellcolor[rgb]{1,1,0.6}{{\\Huge 2}}} & -2.22 & """ + """\ +\\textbf{\\cellcolor[rgb]{1,1,0.6}{de}} \\\\ +\\bottomrule +\\end{tabular} +\\end{table} +""" + ).replace("table", environment if environment else "table") + result = stlr.format(precision=2).to_latex(environment=environment) + assert result == expected + + +def test_environment_option(styler): + with option_context("styler.latex.environment", "bar-env"): + assert "\\begin{bar-env}" in styler.to_latex() + assert "\\begin{foo-env}" in styler.to_latex(environment="foo-env") + + +def test_parse_latex_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "foo", "props": [("attr", "value")]}, + {"selector": "bar", "props": [("attr", "overwritten")]}, + {"selector": "bar", "props": [("attr", "baz"), ("attr2", "ignored")]}, + {"selector": "label", "props": [("", "{fig§item}")]}, + ] + ) + assert _parse_latex_table_styles(styler.table_styles, "bar") == "baz" + + # test '§' replaced by ':' [for CSS compatibility] + assert _parse_latex_table_styles(styler.table_styles, "label") == "{fig:item}" + + +def test_parse_latex_cell_styles_basic(): # test nesting + cell_style = [("itshape", "--rwrap"), ("cellcolor", "[rgb]{0,1,1}--rwrap")] + expected = "\\itshape{\\cellcolor[rgb]{0,1,1}{text}}" + assert _parse_latex_cell_styles(cell_style, "text") == expected + + +@pytest.mark.parametrize( + "wrap_arg, expected", + [ # test wrapping + ("", "\\ "), + ("--wrap", "{\\ }"), + ("--nowrap", "\\ "), + ("--lwrap", "{\\} "), + ("--dwrap", "{\\}{}"), + ("--rwrap", "\\{}"), + ], +) +def test_parse_latex_cell_styles_braces(wrap_arg, expected): + cell_style = [("", f"{wrap_arg}")] + assert _parse_latex_cell_styles(cell_style, "") == expected + + +def test_parse_latex_header_span(): + cell = {"attributes": 'colspan="3"', "display_value": "text", "cellstyle": []} + expected = "\\multicolumn{3}{Y}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"attributes": 'rowspan="5"', "display_value": "text", "cellstyle": []} + expected = "\\multirow[X]{5}{*}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"display_value": "text", "cellstyle": []} + assert _parse_latex_header_span(cell, "X", "Y") == "text" + + cell = {"display_value": "text", "cellstyle": [("bfseries", "--rwrap")]} + assert _parse_latex_header_span(cell, "X", "Y") == "\\bfseries{text}" + + +def test_parse_latex_table_wrapping(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":value"}, + {"selector": "bottomrule", "props": ":value"}, + {"selector": "midrule", "props": ":value"}, + {"selector": "column_format", "props": ":value"}, + ] + ) + assert _parse_latex_table_wrapping(styler.table_styles, styler.caption) is False + assert _parse_latex_table_wrapping(styler.table_styles, "some caption") is True + styler.set_table_styles( + [ + {"selector": "not-ignored", "props": ":value"}, + ], + overwrite=False, + ) + assert _parse_latex_table_wrapping(styler.table_styles, None) is True + + +def test_short_caption(styler): + result = styler.to_latex(caption=("full cap", "short cap")) + assert "\\caption[short cap]{full cap}" in result + + +@pytest.mark.parametrize( + "css, expected", + [ + ([("color", "red")], [("color", "{red}")]), # test color and input format types + ( + [("color", "rgb(128, 128, 128 )")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ( + [("color", "rgb(128, 50%, 25% )")], + [("color", "[rgb]{0.502, 0.500, 0.250}")], + ), + ( + [("color", "rgba(128,128,128,1)")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]), + ([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]), + ([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types + ([("font-weight", "bolder")], [("bfseries", "")]), + ([("font-weight", "normal")], []), + ([("background-color", "red")], [("cellcolor", "{red}--lwrap")]), + ( + [("background-color", "#FF00FF")], # test background-color command and wrap + [("cellcolor", "[HTML]{FF00FF}--lwrap")], + ), + ([("font-style", "italic")], [("itshape", "")]), # test font-style and types + ([("font-style", "oblique")], [("slshape", "")]), + ([("font-style", "normal")], []), + ([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments + ([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]), + ], +) +def test_parse_latex_css_conversion(css, expected): + result = _parse_latex_css_conversion(css) + assert result == expected + + +@pytest.mark.parametrize( + "env, inner_env", + [ + (None, "tabular"), + ("table", "tabular"), + ("longtable", "longtable"), + ], +) +@pytest.mark.parametrize( + "convert, exp", [(True, "bfseries"), (False, "font-weightbold")] +) +def test_parse_latex_css_convert_minimal(styler, env, inner_env, convert, exp): + # parameters ensure longtable template is also tested + styler.highlight_max(props="font-weight:bold;") + result = styler.to_latex(convert_css=convert, environment=env) + expected = dedent( + f"""\ + 0 & 0 & \\{exp} -0.61 & ab \\\\ + 1 & \\{exp} 1 & -1.22 & \\{exp} cd \\\\ + \\end{{{inner_env}}} + """ + ) + assert expected in result + + +def test_parse_latex_css_conversion_option(): + css = [("command", "option--latex--wrap")] + expected = [("command", "option--wrap")] + result = _parse_latex_css_conversion(css) + assert result == expected + + +def test_styler_object_after_render(styler): + # GH 42320 + pre_render = styler._copy(deepcopy=True) + styler.to_latex( + column_format="rllr", + position="h", + position_float="centering", + hrules=True, + label="my lab", + caption="my cap", + ) + + assert pre_render.table_styles == styler.table_styles + assert pre_render.caption == styler.caption + + +def test_longtable_comprehensive(styler): + result = styler.to_latex( + environment="longtable", hrules=True, label="fig:A", caption=("full", "short") + ) + expected = dedent( + """\ + \\begin{longtable}{lrrl} + \\caption[short]{full} \\label{fig:A} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endfirsthead + \\caption[]{full} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\midrule + \\endfoot + \\bottomrule + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +def test_longtable_minimal(styler): + result = styler.to_latex(environment="longtable") + expected = dedent( + """\ + \\begin{longtable}{lrrl} + & A & B & C \\\\ + \\endfirsthead + & A & B & C \\\\ + \\endhead + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\endfoot + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +@pytest.mark.parametrize( + "sparse, exp, siunitx", + [ + (True, "{} & \\multicolumn{2}{r}{A} & {B}", True), + (False, "{} & {A} & {A} & {B}", True), + (True, " & \\multicolumn{2}{r}{A} & B", False), + (False, " & A & A & B", False), + ], +) +def test_longtable_multiindex_columns(df, sparse, exp, siunitx): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + with_si = "{} & {a} & {b} & {c} \\\\" + without_si = " & a & b & c \\\\" + expected = dedent( + f"""\ + \\begin{{longtable}}{{l{"SS" if siunitx else "rr"}l}} + {exp} \\\\ + {with_si if siunitx else without_si} + \\endfirsthead + {exp} \\\\ + {with_si if siunitx else without_si} + \\endhead + """ + ) + result = df.style.to_latex( + environment="longtable", sparse_columns=sparse, siunitx=siunitx + ) + assert expected in result + + +@pytest.mark.parametrize( + "caption, cap_exp", + [ + ("full", ("{full}", "")), + (("full", "short"), ("{full}", "[short]")), + ], +) +@pytest.mark.parametrize("label, lab_exp", [(None, ""), ("tab:A", " \\label{tab:A}")]) +def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp): + cap_exp1 = f"\\caption{cap_exp[1]}{cap_exp[0]}" + cap_exp2 = f"\\caption[]{cap_exp[0]}" + + expected = dedent( + f"""\ + {cap_exp1}{lab_exp} \\\\ + & A & B & C \\\\ + \\endfirsthead + {cap_exp2} \\\\ + """ + ) + assert expected in styler.to_latex( + environment="longtable", caption=caption, label=label + ) + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "columns, siunitx", + [ + (True, True), + (True, False), + (False, False), + ], +) +def test_apply_map_header_render_mi(df_ext, index, columns, siunitx): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + func = lambda v: "bfseries: --rwrap" if "A" in v or "Z" in v or "c" in v else None + + if index: + styler.applymap_index(func, axis="index") + if columns: + styler.applymap_index(func, axis="columns") + + result = styler.to_latex(siunitx=siunitx) + + expected_index = dedent( + """\ + \\multirow[c]{2}{*}{\\bfseries{A}} & a & 0 & -0.610000 & ab \\\\ + \\bfseries{} & b & 1 & -1.220000 & cd \\\\ + B & \\bfseries{c} & 2 & -2.220000 & de \\\\ + """ + ) + assert (expected_index in result) is index + + exp_cols_si = dedent( + """\ + {} & {} & \\multicolumn{2}{r}{\\bfseries{Z}} & {Y} \\\\ + {} & {} & {a} & {b} & {\\bfseries{c}} \\\\ + """ + ) + exp_cols_no_si = """\ + & & \\multicolumn{2}{r}{\\bfseries{Z}} & Y \\\\ + & & a & b & \\bfseries{c} \\\\ +""" + assert ((exp_cols_si if siunitx else exp_cols_no_si) in result) is columns + + +def test_repr_option(styler): + assert "

    &1AyZ`VX)hFi=4R^)6?-vODDV z_e^c854H^;KL{RjUq4%)m{8lziZ{1b$d;91F1np&uH6p5dMIDL2MB1{<7)=ECiEkg zxo?9265~(7u}^{i8Zf%{04|Y9yD!?B0~1|D6`R`@3kV7o?$Y0k)jC+jC|52Pxc2~r zXYw)M@T22=!n;w+d!iQL%7mT4>XjRk_GI!5DA(FZ_isj|$l!8s<(Em_PN<&m0fj$i zr&KBhPN8*{7k4RNAHBMGBt_#c{Q{_BVr3dct7bL3cHhMsC3Wd{I54tV*=op~!KGOY z3jbgXV;CG7;+!zR9Vb_VVmPQpy`zE1v2}TxPR-f2Kc5?Atc|z_RCSQJ+2Yj0`FR!su9SaA7v*&?oaKMWwFg1t8C1cG+m6FsnyU<}~{l z5*iJ=mtPX~1@?d&+DOK1k$_~`NphQU$r1V$o0ARz2Blhy_p(Vzh%z*U|4>5U?x(CH zIiJnU`9vb2P{Z6(-Q*G=_)R+ol+w%8L#qRB0G;^U{?(b^V@m6u6>qq+4sngI4L-O< zQXTEo9U_W{(np?hSz*?*vj^DMi7p6OQ`KkH?j6VRE1>0(z#%-^MON3lo3rYe6C~%X z?OOvWqFq2_)^lDD)X#H3nS5IMvO~k}OgsV(dge|Yx0y;bQ=7>5vaf2Z6XLmWH_Z4F zK;KCnLA!#A=AlMCZ^EIsjCGHTKBOeIzi>R8`PWg=uK>ll{LwL$9ETWVS35a4vr(pCY1v!P2qH%`}049Y^B9!Ft z#}T;~RE%~H&r!!{gvWbT()D@aN0?yJruG>JhDNJXKTV8OflNeCg?N_(!qb#rj=)5l zi95y3NLW3`g%i;_17!(zwd6$;Z+5F7Xoub@Z{f1H#1fA=H;SlQtLuIN698)}6iJsK z09pMPVMhM-osBrt!r0rWvc#SfKET^ z!NX_rauOvLTexsTkc5JOdQTPr^~;)bg@AR<26?CC>5Od7G#t!-*g4IK%cHM3DA$574_*Mg1j~! zFtUt|q+j)5C_byVaVuIb?bQc5TElzUd#vS3l3bs68sGplHXMd+i7s!7cZ8LCn=djc zSnY}|Pa7%@^iM|M{aTm<;LwF2jqiTAgp?Q<{+_5x>tPGajt6Dgod9Ns|E0?MDK~gX}WIN>_k} zKQE&oKvBVqT~iFov_MoFLe$;5-3FnX8D}BUzB>qk=6BG-REDt7x3+C66a4gAI}A*L z{2e=ugZguHhI>Iqc<{}nnL{ti`jGGwi%O;S4Vm8`$FK;#(;? zzRhLV>TCqDyxJz1Rr=H~wfLl8zWIy0z(9Ee;MJOz&onOr?zU}Y&+>0}2H9mVLWnqW z%YmS3aDBX3n3>oYe=n$t3tvk+84nFrl8iT%A|b{y1x?NX<(rdDp>p5QV36D!g#l-h zIw0HkuLMnqG#3QONSTY%@LMPqH%BEHafcTI(~|~ zr+`&I%51rTg^78COW_K%nrnK>vacB&1cO0AOKB%lp`ip=aYXu&HiRHotE_%@N9D;l z=BJyV($tayr)C?3$@~QUjSL~x`L0X37K3N&F{XKo z&lm4ilI{qsP$5hfj3&RH@0|Bi57Gnrmf!7^a8>qPi0AD zhP%`NIBd#19)_$Mce`T&Wx#&UN`*~mUv95V7sA*?wVG@Tm+UIH9w`}R%Y_9N#0K+j z{&0>t4*ZmDm+tDHJem1mzPk|;a)@Pch^3kh1_!Z@c#N`ta-5;fm9qi^uSvMSNyB0Y zDXl5J7;5m!9)Id~D{y(h%1Aq zr>g00s;B3uO>E+wwZ3-D{9hF-tygxHFTgc5ciJ*wfTWS#d%7aG^ZqPnbmsH-`N)o@ z&+4iOrcTJ3l*_6;ZzNCxK%(C4RHfbB>W%4~!MnI%R&C3S@uMS84%axtuxIt!{S?Iz8OG1A8s@#PN*6A6Hvg-i$W!w z;0a01Y6GUVMjH-p$RL7bD&t%wr z(O0JMi4d)ABiC}An1n*I%FY{poYo_KK<;!BO=Y~nIV(SLVCr(8#%d*o>7m=IAeHQg z0#eZTGxm(f$R=~od}Q6xBe?eQw6Pb(oEc}l& zrjsDNlQAjy3B2hgjRNMTqH5DhOcv_QI~|ab%PEc+KtXAXqiQvjmFlL7?+%psFn0Ny zJ0Gn+rF+=&M0w!*d{OfcFF)=Z%lzUJItUt==v(^%b+_Uay&QRnPM$~FUATIB%%g=K zh}hKd<<|onj<&~yBO=_Ru<#JEAF!U1g;cH1 zh9uLMsNe}LQ7?9HsOn9nuIQg};9@g3_{s_096tmdM3)^fq19UsHcefw?QAtXGF3E* zi^Zlyq$(VnFFJj)=*{1g#^*;$u2DA@x)e+8~du6#2FGy; zV#Sg50GWR-45ET%<6^PyKsC_Ku{;AsttipujE6D?_%}w!5B6XecxUqld%yMHA}^Yh zB7?~exNfnK%ZOGA7>%t4LHRttkidtfpA%Ompqilz@rlM=cHK)bSDjBMCzS=J=H{c# zmr@hVrqBR7@AP9xBofj~PhozEQ0-J%vGXC~Hpk-piuQcq&6V~=KX=!W4GRpyz}Knx zDUS*tkiQDa=UNPII3&O^6P>nJzdB{Nm`vIE?8`nZf>W5Ft+eavr`!k8XnggRx8opr z=b3LQ>YKM05eA~%Ma9_F(CZ~q@Hj55+0g?7@`5S>)7J!x;JnJE=5{R19wY!E>x!d; z`XG)y#D#N%gmU`wMHS>LMa)hY55C&(`!3k-6mq+PwDBBNX{SsADPvi?Za&|9+?JHAIE-vUuZy}R;! zLuG9+!MVxj&yA7|N8oGp9RpS;-FQjnRDH9t|;aYA%`Ujh5P9OFmT)45NG1u6NG>LN%m zxBUsN#4f!MOjsVWm&0#5{av}!VLbQc&&C&cO%ar)ubG#D>lSUVA!uNyDFOl}1t0bZ@1f%ORmaju% zN52dv&klECQr<2^ok&Xa-2CVm3+c(!v!hvRR@L$aC6d`i9zT$^GE<8Sr#PE5Ei&lu zkrKA>G-zN!Y<~1?CJ|Mm+evqD3K3-0>pC$YUGl7ulqd-vOpazGc{YJ6QIA3gRD+X z0u5tTX<5Ml$HDSK-mj~^z@qM!wv20eS(sUHmykIT1qm)Hnu^uqKh80+>0oLD(*Czl-tK&s>(hyCka>-yjM#@dKdgc)nWk@Pg)BF6cJgf0ce~D z@xK^~>R5+ze)%;#`GJ7Ih;#qH0C+!G15#-fXRX8#Z7W>7hSfOJPf*P(KwU3e*L`m) zzQ?)M1sU$hats;tA+*V^)@tVUnjxC}<5(MB1q8e?5#nJF(l1>A$|T(8lCaVUTSFHM zaO@o1SYVU94Wfg2SOFGTd@8p}mB3y*i8iC%w9^=U$In)J9_a$_K*D7Gm!ynK z5A1?oYg_#2J-9J%&*x4`QYYV=+|-J=w;VZ-aMX~5$zUewD%r&eBmk>QuRJ_c;&0ys z6uolRl#fLQ&i9Q#L_Z*=1>`+U=!^$@K`kgBCkTdqR-EN|9a;8;8v141AwufN^gBf3 zc9pNDgA2L}nsh<$ez-5XFq)^wZN5||*sScU8Ap1pMceBG>36H&FLOQa9o!3ZM_U9R zzRL3owU#|Gp2~w)KsX7=I1=TV_jn#BB|F0=uDe(Z{mQK3N-egkr-%E`&rSiJpWNpM%iXAr4!&b zQ%P7?BJx2@6N_u^TJC`DEUvlCqQwu(ijq`dkJd0v{5#+m)L4|Dy{1bc}Gu^-+ z=Rx_i%RTLhSh4Uap({hxu@PdGYHLr`>LbaFS9gJ1${m-YpzG{*8z!-xjzkc4hUl|{ zO604$4NoV9vGVBff?Bdd46*4Xj_<>usw~)Al8d>dZjL~<$VV4^cmPb%s*G{FiC<1) zsA<+`OnVXr*-?k?_&URo0{eTM$w|2BzG++^V2(JszMOg#nUpo&2{`+rf3W|GuHKH1+ z|GLg0cmlw-4q~9B>>75k6DfOo-QSC zQ7UO0TlZMU>@`OGUQFqZCe`NY=;oHr7^enm53$_`i5tC`&}R~ z1CO`plW5V&q+4OoV{MP%04mzu0IlP&*3q}7UD-9fZ>``8@x=CNvt3Nm@>dM~UoGk( z;>{Glr;{X?cnHh1th5TFiNpH!ye^HJWjl?~6DNQnvSSqb3X%$Jw$jQ3IdFX<#UD0X zg4`5T1ml+U{R%N*Xu8{viaH4I#-$OkccqUht4=g+-%|Yc%Ta)`rk(j!f93xZQs8Co zNlgkyH>-JNdE37i#Hh6Ptjcaf%hyP|)%7X*8a1)tSA+}uFPjwsc-&Kuk<); zkNgG~5ltOXnb<&w%HN|G325ffmMD#-ifHmDaD8L&w+kqpwj2=g1J}#U6fIAK_JnyA zrQ%LAvGok~h;!_0`4aJ7VP#-a7d%f3gF-q?-4fGF7n(k668t8*H~Yd`AxPZumDpn9 z#|(r8s_}6i=L@7_*}bSf0L1rOWu9v?379Yn>MBmdrbR6;jl?m!%rHz`i%TlH0{Sew zG8Z>qn95X7-@@izRhd?!5bbQTsODJ*|Xd3@DU&*RL;r&7u8t-eX zKSA8zaSp!nBD`a?o`Q4`brZAb_*8lHvbq7syj+eWu=7%>?F->%N8ca zRdcJRe>?p8tvc*5wkN-MK|C$-iUJWTe`+-|?tOo~Ag7m~K4Mj#b1M@7npuw@ZgP63 zl{}66x%w+_8I?XN5$3FZDuD>MP zZYm1V^7<^CH1Zwoh1-hNPB#wTsYk^n{Cjc5j#N{$g=KW&v!Z38kUq6%RcoPaq~*6n zEXRaH`S@67PMTkMDn@X+AzC4*?@?j%2dPw$9m(s@kII0j>+$kEFDR~TUzU$zj0A`- z-RAn>X8+!;nP2Akr>7s%Zt52Qo~56SzhK$@lb7wQzS2)iY_sKVWB0` zpd5R~q6s*IPVtyw+;f#B*qq9D`ZCmo5L7!sSVMKHDoZt6rUJo)tr}&v6 z7y=blh-fwTDv0WJg#|XHe7leg;Me)TxaRTW4LM@%S^MFQvh7b zRnCI%Xzjn#)zmLQE;9Y&;VX{(+$0tPuf642Qz3S210Fdp(qKjqIIOIWa)T1$0XZan zU^qwTlb;+Kkk$NP_5by>9Py~=*#E{{YroX>rmO6bo11JBNIaF~1@iuNwKg}BruF&k zo#UtcAe*$%CKHz~072Xe22z$j(8xl-jelQ8V_hvjvHoA-G4?*cr13 zLijSH9l@XD`MYjC!m7lagc{Xq%6{sX1CD}rx(ra+C9p zMM3|BM?^P9Q+RxH>Sw&>es&Sq@MVW0Tg#cMK0L*1EM)l3S1MT>r^9!d(rs*+GVk-p zpLti4GYd1~es;kAaa<$5?~Kvl^tS-k#m~RNJ3WMrkPvS;1wPiol)3IbMw)8uywc@a1c^1KmHZAxv7E zja}rrFoGMs3> zVw?HNYUW`#;dC40j_$w!=n&aMwY)V;ib8)iXUAJ&5K+d-^OWDjU+9)T=kmfTZ?cFs zD0s0o=*9x~uidfjk?BDsR7r)Q$(W?uO5_`q`o!&d_;XGDM@cx-^wpx(^p+Ti)hhCh z;E2fGoT*szu7R>gv%7GnpELqWKNZHSyRJQ+mtVMkSNxI-EIT4&TuG7nVO5;IPMSjMrY%^e)vCu_;0Pok{OdvwzOx2(ebQ#_}(`ixDhxy&B{wzj?om%iOyOfwm+vE@B3 z!1{f5607M9uX*)Rd1?h$W|N1c6J4BaU2l5IrQ%bZZgz_EvS1`jRC~Y3?cJoEx!&3s zR_3>pi7G|kADg0GGtrQ)!E1TUC!y_A8{g_$CQT`?e|_l0npr1=(>Mh>bf{7Nnzo-8 z(B1(woh+4b zEc|2r5L)9Ni$}$!GiN9ue`T_*$7WSxQOIpo-s?3(53B%ZJJAHf*&TSm3UEk0H>vhC z52yy!>kGXpJ!Gx8{)F>2P`W!m$e1w+j*m=Rri zMJj@h3o^4IV}B4&ogNk4B5d3OJ6YnrGgLBeUUF*{Y|D7!nC8L^xE~kS0&o6x^8EyuE>`dP-4Dc;;UBUzrKX zjhgS0;TY8(b==)EkEF@6SF&#WEV*ftl?g$m@IQ4DDn#{XzSBs%Sl~cZu&;q zng2=)qJ=g2&41#`rLBwY@AA&n3NR0R&2KU($eeH;wma=#)$f;$)Hq;3Gkk6d3_-rZ0y;S&5lk;G+kNJ{NFSu7XH=S?CuTN zzKTyy6GiSlxro5F1WaH|SuEDaFZ!2wr;F_KYvbDzUJdkR;2ss# zvud`=;{*BRA3HSc3~RYdS(-Z(W_Fj|^Zks8-!AdHKaide!@tK!-m ze6-wEHKqoc{_(xc0|=q7k~sf?y#1l~-l{nvGULSrrQD?E4lk)-CMI$FOlhZGW2QVS zx*3z7j_~X{*jmr=ZTTRLm+KGF8JM0+KNd+=aUkGar7VwRcDz}#Le(6w8#?GDPeHP)c-dcOi} z)@ocm$#~faa=_G8$F#6J`G)c9;%U_h+ zy4O6l#*Q!fl6O^8RviFZ6Bh}}?*xI+lTKxm+f7kId=M^}G-NO7Z%K4MZ%D>IT0aO% z9rIPi3H6LP55If^>O!E~#}iOHCf?x)Q=J(F5$^0#G*`30MZ64k5=^k)^kp{)0(oQ( zk?+C9TsFs<?_|`skCOpbzqgWT%F|3AU%~2{;*Y?FF zR{nF(4@tTynE=k;#QQntYmG*3Unean#6=-rscQacD3mYAxxKgo2pHA5ivYT&ZsC`W z+nfBmP(+@pOY1~>yPPACWmy7{9v_jVTD3)M2rSGYT)TAduuZq~{XHPc zVG9QQ4{CdE5Xv9cK5IAMIeX2s1U=JhHKnTd(3po5w`^NDlWa5rK!VQs{+!DFSxc^! z|HZ}Wb-Fk8t{dRHxH><`f{lI9X&Bc&x?LRbyjTj7ZXy2m-3RKaPMxJIWqT)Ik7{>; zf9Qk){ur(vYBo__8pruQ3fHrX&pBF#G2*c%iVH2C52E`|FC$Czx4UWpwiuR+Jqvm zRA3WmqAf%1JkP&8C6}_DPvsIYez1WH%RQoGrECBAzyC3e^a)__sYR*qlCnl+J=wfD zxfLM}ji8fw1eiwV2!y39YE@<1{>w@BKTT!AV?(#<-{yemnpLKu)ir{}I{rvxY@r+l%r?EjKcl3XLZ>d2KIYJv6qW|;7 zogSjx8S3m^{}Y|?UoZPVZtnlvH*Q)Pt){My9E$n!|M1bB$z^|KQvJ&B4-5T+kMV#0 z{7MN-KT<-LJ3}A|_?JLHzep_GWVoj&9yqOYnuTn@_MX7(_9a`{a}(@GTG)gn7tHn= zf4+w~*yFlX`X#38|Cc}ACm(-vqF8;5>W0lBjunmvXIhq{L5EoYL+5e=u&E5^ZEE<# zenFYAf}+*l)M`crM62y^iQWC@bNfy7U_w!TwfgkQ`)J^ydF4?<*X$YHh$%}M3j5jt zEMs>m!hjMw;f5U}^q)`DJOKF3((z}PKZVg~enuziZ2_;Q9hxaF#Vk`)X@hQ-U|W3p z4Y;nZ=<0utP9IHRM5!@zeHNq`d5l2;T~p^3(G*PjWr7b6jV)q%C!%RIYc4i;k{%=+iAc0{)si?A$) znVuK83;WtUHqCpe%&NDgSsOXz7-3Xea)0uM7QDREyN|NYzc^{A%kyxXYjk2F6?Boy zW>gAk;W1XMaHa;&8t*xekI%>EuA{|54=EAL8Y{mjxcVx!$)wIj-DrRC^ zZ8Au-$K^?na?Ho+brfmmVQBGZW+e8X3EtbY=ooCK5I?eY*z|=8+M+aCP;g(EUxGTqJ#jbj(s4- zXA7`LTD2@Ix@OKo3=8 zU&J+Fm)QYeFUq!RojZV~wETK0h>a2nYbZ8qWCtpH3-PT%7Q&EY=Bw{Tsy$gU&phN&WY1)`}s-2e51S> zT2WP?wvI~Xl9C9(b9H_I<-o~K-?ecme7`!`vYPz!= zkJC0$vnZeC~anY#HNoZaCz3p(y4X-Bun6ja1}hpqEh;NdgL#vznz z#UxZCRd6E0`vMcZC4@U6`>RX~7kofQU;hD%ZEw3FnfEl}`!Q)$1=V`1oOuSXd!L}yWjO1C1+4lMxB_6_^W3u;UdRG7r>nt*xrk#m!?s?g z-ODlWJvYKXbSml8v_L|kR~|MSkBo%VQMsij^tV= zo@?p`O=oqR_x>OI`p2{G&sKeC<~=(w=l$#Ct zX6mfCtDgmV4;!yC(^=jR`eRtJSM#0@FMH03Oil9Ey6c zqWcK$X$IGQpj4xhQS<`rp1rc67jGa3IS-gZ&lV_t?t6{otvpE=<5dhZMgAlI>kjg} z=p)ngPP&o{XG(tJUx8d@D=Mb(eyniTe4n zg0z{zt;FQ|;GNyd#5T@ZxpVyjBSyx_S0)Ua9gtmnC9KpSNFOEld?tK?UO(i z-Zs|ls|H$=4{vceXJVNzcbb<(?_al+YmMAGtP`ZzV7~{pKVoP?=rGDB05~{R3;4of zZMcldgB>5h#Or(4fD`oK?yR*v(?3K69!V0RJ$s*6V$fzCW*dLy^D-Fw1 zyP7uTJpK4J%*R{Jjpg*EPmZXV>}Fkp>7*Y2mQ$Zho_; zq9$%VpYA|>vN0mfaUI|SIu@6=xDhD$PCmq1UJk9E16lS)087Mt#C%>} zKgR5wf#7GuB9Lz;fE9VS7=}nChL08sSb97UCLt88+=0R)Ds7_mSGT91xCr zK|1zK&g!=az!3^^RgfwHN-uk5+ZU9qBpPnVfN94Wz#N;|l^g|%nay1jzzyFNU2E(E zZ{Pl*WkBq0lQ-}UON!$%u&IdV22xY2wV!=6GHC&3PE=gi=0FN(oQW4+N7#IEWjZz@ zw8%iCig6CW9bMV>rzrgTlU|Yn;FuPpOSxN{fAvH5uN%lQ6LRB7M!mQUcxvYdG+GNn zK~wU!Bu>qYukOpoj`bA0rp@9Tn^94%taIAx&i`OKvlBY|b_;JmDk6DorXzKYcQU2b z(b!x@_w$CVjfd{dx?flori2*{aXA3m2HyJ{ijhEc5P-}b1;j6&^8wCbs6_`zTnft+ zG$C&7#$+r1AE?P)B+(S*xiXMm=y~rQ$9D;xi8~Rz-YpUns|qQaN@_L$Ke`;gid8dq1mZET+sZ*gJYCEA-=y@6?MCY4)8g=eoRvDAF6BR^L;O(-j30_Lk*yHj~ zyJsW0C`^;34a!Bc+L9S7s8?uScfca!EFSx`S9LyDn?#Q9OFzQ{o*g{ZbZ<1gzvdfq zh+FVrT3MLv0U$`vY+MKCk#ClD{ML)`DS zSdfJaY&X^UjfR>V{u*OH-4C3zhW$vs-fEh@`fx2;v**%ZH-9~waFLXMvq-0VRL}$^ z4)S@J zhz4=5eitwHum2!F7690*Bg4Ozw;f)1*5kT{8Nb@xfjmel7Ks?LsR8JGFg6E(uVedr z^(Bb!>pULT)~YcaSYU=25eAi;89$$bg&iwwM5JCCSXcF>6}B^@yZ+2RuzutM06@WpN-A7S00fwt;i80xP!vS23Dfd%=_+x*2eU7VJ zKf_ux*)9NZRi*D1yzg(_c0^wMD4(@fvib#CG*t12eM#pszUeFFvCmum!{ZMt-)XT& zI@R;hWLbK1*xS}lgfEy3$(N+9e%P`ru9ara+qh1+s>{eHsbay6!Gk}+sKGN`c?M)Z znw2mV!qNh3|171~q`Kkp{sPkALo0CEVFKP0I`J?TEl!gzJ;w>v3@`uG2AA?>fJi;o zewW&3HV5?2$o!TE%y>XS|CE5alC@TIZq9LIlzYnX{%l6iX2#lEqEg84pSTw!N&JQ| zs%`$p9Wet1&woNH@@(m`W#)T&os2S^1BriIu(r49L&WW?tdth&pL+8I@>%U=5z5^H zcC}MEi#mQ?dtHA(KSk>&BHlfwfN_qnQ!WTz1^(W2>saI@?CpL-J>K&4_ebI}hhQTdVC63dDZy#?CpnPL4;!i<-7iCG&d~LS3%c@ISA*Kt*bI*v}d6*;Y9 zL96oVBXf2SQ^q%1i!>{U#>DML}U%0<#^f0EUf62RJp zIa5M_?51LgFX(pzP^InQ^PG%`&OLzWzi9+5^Z~70s5DGuTLTExc%AQTLu&}4Y5fG% zDAWy$zS|chY1eh%6@~+>7SUAZ(imcwhsSP5G-Juxk6ATKDAoKgjw?I=yNq0%(KNtj zOV`%(o1{(rMg_3F4Fia;xcUl8ruFG9DuM1S6dIh;7C+2vSVv14MN)~m^?(Khx-6sj;p=sI_z6%jw_0RgG-h{s)S`9KL!sfT>OmX{e>OlopOaYF7=v zymi_HLn(oVr&)$2P=_cUIzD41ehjz1##sPjlq^Ue`wDWQ#{zY9)M?82mIM_eD9Nug zOEHT--rR4f_bV7oy_c^@l;*1G4Nyc z!Y`r3=gQe5RC#qItot9jn)6_F#Ef5N-}?sPA-(5$jH{e$fq&BfXj7Ib`Hly7cln&% z8xK76gAGF7N#*3^;7G|Th5QuztMn|ATu6b-0`av0K;db@oDX#K!sWhyrvQ>1>JFPD zlzCooJv!b~M~ zs~vY9T^)ARZX)%-W9JLQD~>TS7QG!EI&%RVGc{=qv9ERvg_kao^f0h072$RG(xe6fmdzg>^#vj(Yj+Ju0m>l+v<`~a(=fAfAgcZ0{^rvD%zfv_Xs!Lm5X|LBpJ13DY&Dxbm3 zz+iquW@?&%)i*E<2GB6uvV>?ROLrY^aMO2_>MpaI0knnX%UO7U!Z zf|xis>%&SenFJWaX;ir3J#@n9T)zaa!^*4gZQdNJ&)TExp5 z|5-3-v=B(;!vN7&32M}zI10o&%i>RsW8Jg zhJLAUZ*f>v#v8o9Cex{VCBwt%4j)<~V(?a25go}kLT(}lYkP%~z!^8yq3ah0h*_wT zh1Fq8l8GuZzCtxcTP)R`Sg7yF?G|_=$&}X(WpFD z?OjZXym|44eA|~>f${)8!Kjl{DYk#a7H7LeNmyqG64&J>NZs5&M=^1@n0$BaVcb4@ zE^|MmZg_a2ED;$Nl0PEA%;>UVmp2q6r%4TmtC}lhs%qKi8;=gl=))v9d9Z4hBEkiLLwnE;kavPxMa{uqp9; zl$$Qh(;!@OHQmU8wehu1GDU$J=Ji)1&|84+c|%+GB}XKEe59d8DOX|?pmXpXJwEQ# zbh|oXgiOOifIdlDAK-s}h-%i^^_M@e|-N4UcElq z5+(OM!T%BIVm#gA$ybEIw(rZnNOTUHcJ8f)QT3Wy&uzCFL=D31d^2d0Een7kZOdorp9ss{3y_$I4@iyiR z0DX)B-_}Ba>uXtcqkhURL$;t4?Jl1R$^jasF`a6rXxH1>K54dIXaNDTF%~|TG`jc; z1`sq58YI=>7>P|K2G4oI*k`n}wYWl~LzM%hw##-Cu$Ylg$xQMGbu0bR>UEIU-MsVcW=|4U3h z%ke-IX=TjIaLceTbXvcp(>V?cO?x%P<4Vdwe6xv}IFskpIb;og-gG@rYx(w=czD^7 zL8sa4T~4r2pz_JYE!!yE`H7=yZ{sh1gpVmau=}Sc~WNWV22damk4GXKy?`$zguQ^lx0kKt<;P<%V8$J=TT zz={|0_EK`#WC#HTEmq>;prdC;3Ed_Cc|p-MoFz2O^&)@*-3BjHYTL0=0*WzMlPTbC zC;#^nqF?vs_!hZO(}$cWl>*B=@WP1+-?umr zn+D`=i%D$S?=L#jsI*WFcY`?o_DRehd7@2z8^XI+HMRCOlMd(i&0?wLiN*DDJabr# z+)hU@Oe0*1S%>23`-{9^#zNJXCZSYtJEH8}g~3InVZWscC4A^T^zYg<+qc)9JEukQ zc^WrX#@Y^q&r8#!mI}^eDKo%ECCFHhB9FddG}bD~4pL?9uh7w^ucpK7uYQf(TD&ei zC+!lKjCH>e#B; z#;*Qk55d7IA`#y$ru=WqiM)=sjP)qYP~Vn zqxLY|fq$K{$;77Y{eJc21219@wHm$ey_nQWk`wFZ8al~__c8P;ghhfuro#rgIAzp- zwh9u)*RwRmrU5(0)WK;QvCa(4tfP8v+)xwnh1J(cqt*w_5G5`gGa6Kop>&?{TuIz8 z`!GxoA3h1W*8;4gD53MQV|_*=d$1X*E2{tX<~P5Jj;5>kpBK3UKr129WJsP9pAB!}{eTa)l}pmbJPBW=@o`PY8VCeSY%9 z7+WJ;q$Zo)!A8BY;`m(j@IS>=yVumz)EInjhvL8JMn~S9ZrjE`-ZKO$4HXM0WO5{G z8;RuRgyqK=he?t0ISHsR>UEaSs|JIrC8k5kWyXStB805gWmkhGrU{LI@%&6Nr}AMZ z9r=xy@Gs3Lvnd!F;EiU3l7kU>4hBWagZ6PSBS2bW)2s5eW{Y+!b&R1>{iSfJnPy+# z`$(|&3mhs&vDEhwAbYC4-V-ZA&x4}Zf#jUtQ+`d%O~a@*@1fji&mFm7ShPT^(xINL z+!d5r&8M>FUH8r}tIV7N`>SNw3^P=KNiRq_sV zwd)@{bTiCbrBko4yJZS69-LCgm4(CVe21`id0^&55Zo-`M8U9`2#G@&ARC~br?Be>Qje&t8H8x7gA|oe! zf#}Gy=N1@pCO6J6ynfqe$zu1WOI10`PZ;zmold%7A9m8gZSI6-&qp*i z@QuQ&tm<>7lvK9{&dKNUqCiB!9L)GO&!4@cg|nORv$wN_+dQ*WfNs@)sE*e-+Vkj3)Z5u{=SOo3l&v(lw{)RJhSh!CX70 zT%u-GbYkNLmlk!s`|{E46#H%SnQ`A)%-OIp=Cq|bcc2C5g&{@jU2!z6yC9tZm^g1Y zBjS!%KlAy>$`Qm>=#;#AxH`D%T0_`ZDLyR4$u5FZ;y3^6!w^mF-s@An-u31FC%F81 zk_IhG+oy)`cQN^AF*OaOU4p^LZX%aFX5>$Pi`Sm8PutXk*I|191gv=kzfM-x5MJ-r zyjDYPo@tsL-?{u&^1~)zsS{rH(O`Mh_CnsZIl(MJ>eU50)$=ugGj0PHg?+YosnB-x zgoQxPPPlLJdGBcatCIlqrHg)O88)Q#Ux`{yu%eo9x>1N=u0_gI9)Xka5zGKIG>P<#z?>j{@!Ywyzp(*du-xHGB!_ zR{e`#5Nxx?5c5JZGx#`!oXSMmj=;<%249H}4ac(cUpXCdlFikL{JKK}UpdKomifI& zTO0F~m0{C8+sD7i2(KKjV%+JITsp(*Na-bNY#AmhbCZP^&kL-?KFW11ySkR>uR&G#7%ku5ueel0bTP6Gxn}12`gWUaQ5mGAGLSgzX z{ZuKqoT$~?R-qFEc6zZBMH6NHsEvVlx<*<4ctC>#69mlrkf6b8j+ek|JX&Cyf1#RMdOu%oNl_EmT=BEzIIJ2JS zR2cF;9gCe7Bh1gBka#B#6j)KoynP>n8D?J{HX|_a^vUD~=QV`GMLEA-2#JDBav+xT z>d)~0tVs?wMKOqW;Csz|r+Y~t2#SBctlud_uuol=1r0P*fD(nU3cD^ZgZJ=9Q4LFp zA-Ue#&Ys5e8vF>MQBNp_T=f&&i^E4x<)`A}k2GZd@w&x7Ebh{-AwR>YfUy_y<#2th z+2fMKd+{q$G~vC_7ogzV9&h}utEY$bM zq_-f!bW9@GEZA+IS#8&7d#d~AlZ_#H13rZ7n=KcA9x}4tt~(x%NG4U5qW$)gqUFh_ z9`wZXSC6^OG~*Ik_srmB1<{0JZ-=g(4h$U5)X>2lH+wxC#3~y0iH+nIl zoSgC&Tcao`Cd8-|bZ}oYwxuklU*v1)5_b;dqdCfpY})AtgNw{Puz&xphEE^KkSQG3 zU+~M6YL!eNb8=qfSquE;#aq2iu|r?W4Ek7;`Q_P8MRTZz@VZR^r9GL<{h-J^ z+xpdK%mv;_{g+wB14u)}O^INnNdDdMuQ|Xdo~=+gPNcO)L;4IS)R%CuCePF9_r>UcU#K;sMH3WPb<#C2-m* zwgu0t+AOc7s^}SWp9h*EqQ|^=CDpV_WyTZNlbXP&5@=C`&S4IyV6z+1toC0n0=uw4 zv7Ep{Zn*OF+ohigdtKYl7o@s()v<>?&m+KfgVDVNk4;(|hdgi&J!XqknFTRS>kG5I zy9S*0rt_ZVd_mhG*mH{n#;G5Ht%^+YS1KRNcH>}*Y?y>iyp&(zCa@V92Fv^RGDxx6 zS9zTZyi8+Q=KRHlHvZXfALzx#|N8kr{ot$F$LI?X3gdwM^m=U>=CoI42a@UqmA163 z9}FsbB-~70evOkD(7g4?6X$5I=*^UkcQbU8sD_8!4h&X1T?ujqkgU4!f%oes-(XT+LvL?c&D>y^;oFaNPts9Yg; z!cP;5StIbzNT@J9Bk|0m1G*5{5%qflYTsmKoD@4=mrwiI1pYZUI$cb&25048)z6q4 z_ZB-4*2_PNDLHK&advl2srV2%zqphYf!8M~I^Jp;TDS!c;(e%(nR5GLqqx@0E0l4U z{rdxwngY5cFeBcu+4=;JA}_fV!L9Zk-&(fp=;rR>Pvzge;Y3D)a82x$u7Ab!n^V3Q z*)$^Eh>xXa(8pmf--9bg(&s&zMMyyTbt5L2V(ir?X6!LZD^CuTH{l4GwB~U2X2DgZ zrVk`oTF9`)jRDOkDQL>LSsCof$$|}*^5caM^YCLlF)RO&2riS+!#7Mfd*};%+Ytqw zhem)V%u_v5UKe*odjgzHuBF+!6;ODnes1vsTG;6aD2b|{1kt6khou7Tj=+t|7Jn84 zYCK$5ZdFcvBq$JXI@41v{SYnH{z;cWLnON_^AS^j?(oLOMlM@jws*@NEJVnN2x&*( zl($1s&p5wmea0BSh3n4)`ykFNKN2=w258`j8b^=TrwmDf^d1AhsZp8`&~>o5m>Awa zQxC=LQ?ypYGj4X_l9g)+Ysh8_`|v}@1lMdXFc@sr7j=ylJ8k{zwd94l*qhHUzaJSe z1C9NXyCb<;(7xa4ze4l;evFY7R+qIoYFX__rfxAN3%xa|6PpnTw=%TB){T#B3FCMX zki1wQtv@9cqbcbvJ+-pz=^A#GYh_{@<$mK{!iKU(Fg8?})_hVp*H*}p+mNTpc~0Lq zL`EC4&SF=YCHA)qZ$uWl^aGy`uO0d5g8{HRn`N{zr-irdaHkQoO?RnVQjhT$>&#pn zx~&he5)by*G3x+|QR;~WT9Iz)v$>YruSqS>5u`KlW z8+5}A`G!~s^w(d?(cSg=ws-r~B9}=F0hCv2V*0aUG$F?PVOTe&w7tGuG*n&{VjO`nq@5s{uZ({d!)7U#|%fAw(otPFz6%83GJ^xwe5~oA)IB#2mX1E z5$PEny)}G!bx1wng5BE$RpsC%Jcp_wYCArI}b!{}ys`dHUj5o+RqP`)D3gH?usA*G-PD^?bFB0N-55d z)Ansr!#%>4E5x^QU(1psPy&ci713;tz!7s5rU8yvDbA;2#I#Qc9;-Y96c2u3eMtqH zUAXYXYazUC@+Yu`M&~H}&nc{Od@5lQnvG{FW_L}kJcKi~xIb_L`!T!-C5e^G&v_AhH@N9*g`C& z2Sn3_O8xAk+^@k|HW-8YokwbL zgffE&q3Z1YuggR4zqg1{CB3zubHIN-X~6cS=?HsRFa7(^H&1iP#MYe)G+qtWnc6V^ z$zx!BwN#Wh`8TaXj$yCtv2<-{z~`2}XSPpWrD{mw1Fr+L*kNQ5SklHPFR@l3B!0Aa zs3AVjB88tT)qHqRBOE=cJ~FXupE*7{bwC6k+i9t3uG5~Vw9**E<@vHElrHTrHH;Mp zl;-S8&F>NAuH?_igm^M9*dy{+qFzNQuMZX6Wm%^Shvf{MP^u6y=iS>BY|MCnIRY}# z*{@AR2qglnjMFU(DjO*rI$d_XVza%|7GKOsG-R|?KT2s`-!31oT(tAq{xZv1RDUS1 z62;*jRa}7adycOL;yC;2pH%X=%*$#N(YfRpxFH6p8a|@OjZ~J=kwa|O;w&b z)TcFk);)Eq%c=vN9dT3RZNGtoGGD`Gpk%F&z z-ycbv4GMmE+skw4GCB5BKA7g*d6U=Gx8E}rer2jyUQmAf=g<_7&=6g40H(loz1la& zuMTq2xtsf?qA;_0IlU)!On!93Ijgm+w7R|2L~rIN$#gr#qUT|%jt~MY!@{$dokfuS z)>lXIWS2V>lFv^Beqc-THE=QwF~3&;>`Q!Ymf7QP^x1V%bRx5yyQB7e#IE}je*?)L zy=M4&Wz~06QQplzTF7Eb?ULYi8yG`!+7=^T)ULpaAQ~j_d!)gsJf8ky(bI1`W~HOG z`J71#nJBK7VWctqq!2Dhn%wfUEeV^Gl;5epY#6srCGlSlbw9~yH{I3>OZeiiA43tt zu7ZXKSd;}B;N?{|`zUF&<&dK=1|AZjWPkYtEhWyNnTO^=8ui~BG;#+Cf8|ktYLkB;n6|S=_*S3!9QVoIZ~eHX zp5;PV|Dvaj2Xrn`m0mqc_S=5^8t(d2=wirufyPT1pTbWa#F-cfXE z7*)O+AaO3{2G(~7eT<_p)EoXMNiv^hv42%#%du}eIxxqZh)UKy8_Emm3&ub5)DM;cWk|YeVBMQyhAr3d40(XiRbW4N!?|k2Cz*W1D zBNd23lLz)W-+Krrb%1M{*UUXkU|)OlN@|zr)@eLwB00LjH3YCGU+|^}-}XxL!pl~w zwI-&ZXg{C(K~b3eY5l-H=u#3%|LTVOII>?>Pmj!0;AAbB zIf0?gOSR3QZzAGW1P>NQK#A0rz~4t*)IJ{rF)b)iHB=nlM;m?j5+P?`n@3|xR2MDP zkS}IETlFW`K6Rk`jGKd}ORNqklZ5(?3R4+}%II0wZ*IWY@1XhW66(~^dMocm4SYE4 zTtRm`f(~-utsAO}SD0y0QL=hc8c{tGf;=;R5vi=(&r!25EP+Td2I{(ELnH`mx7EtD z9^CIPgvLw*w@4PO`L$HCOdxc`@wm!Z4`pNs5$(0~NTq6*q-=Q3Xmt4va6zblXPQWT zYWirm8;VYZxm2J=3s3ja$3dE)k#ZY7F^x!Epokodx|(h z#Bnr%_kFNxg*pkwjpvJzv4C#s#ibhWB`B-?QdNa;nV(Fob43bz;}-O|sh|sbT-6Aw z!iF8w#7i!_3c}VC#{XwwjF^24w@CY0!fX!GvYuioAU@DbE=K&cHN4$>Dj+tdO9GGm z!&Cq^C=d#jA%RbtsX{a!P&U>4({Xcto1pA`U&L^mDN@kTG#3ErW8r92smGC^k~CPGop^$lJ;D(HqY z{H{|Ka6JF>=s9*=Wk|2Sfn1+FBp#bM9*R23Q~N)@I=!g$%~$x(qhH*?06*BR+Uco` zHADxNKWyRxJmq{|odF5l3-MM{lLX%PXKAeoy!9Ff<1))XCh|XzeoqH>$Iu8*lrU0; z=ARvp;*Jz_cyTZWHl9bPuM51U==6&wV7V`RLl-Xo_k!t&Uz6YLazOGE=cRjKI8{q7 zF+e{l0`Kcu97mRdi#GUY^D6(<=HWey1b748Z+@2#X>!4g3qijNkMJ{P^GF$6g&T9= z;P{t|i-#IxeouVKgn19_5#0YC_?|IQGsY_DRfTR*Q5Cdkc>4QeU>om0#Ajsx?;T~t ztOx?1kK`js9}yf(56QV1FgI6L`5sDlc(Y-fHK0oP-t2Ey{|9CGSy94mgGwV)OdkQd51fxAJmOQhU#fxuBL#DFX6%L)4~3j&fLP{6u}Z?r zhPR>Cy(k5$^-cssl8BRu7w7}rpimmKQ4zypvjAcS2Ta#$hbiHivF00GnQ=1+9{$L=TpmEAL$F#&Xf!$f5$jgT!mqX|G&1l_k@6qNNWs|Zf z3rnQn@@wZq6W~A&$rJVyfg}FUdHJqSL?jWFzt6|(ER!#p)0YJxq+(3Ev1y`+BY?Jl z3IGb_CNf7G6L?G6%1do2qnbPqz+h-;b{#Y>5B%^BF*cX5>lx5yhAnm{8=Mzg4PFCq zpI)gRo7ce#;&Wtmdssk(jB%0XqY7dcFL}n8fIql7 z1|Pot_shOHKn5+KYbZii{V^vd6s-`(^`bsr>gvSu41DBb0R4H#xsZ#%?iGx;;;kWj zW!RO#1P6h9#;Y%&Y)%A~^5V#!x|&4q?^6(dN|t9za5Bf+@awlQ~Q{&iY73Oqm;x zkbV2#xVmbrGN=U!oGw*{Kk}8PCV;L)fT&Lcbb1Q_<04b~WO{euadGmdfK#2Vw;xAC&^g=wM--)@ zV4qpxyBwS~teBcIqbhd&AZ2+4V8;#C88EEeRIca3-PzLLEZ&y7YSKGxuWaCq&eT^* zdJ=nlt#sSd{I1ZSwF=U)`Y6r*#Uj`D*Ra%VOVup1rJ*b~&$G8%21K1hJqIGQ@6yUr z{%>o)xtLy)V5~C4VAyD)MM-LgGkzI=^$~U7;~Be+dfvi9vxbET%faF>C?m3Pi)+ak zx$s$PdxYH`Pan+x81mL}rqPHkD%x041HJj{Q!Aq}OMAzKc_}+d^{VZ71=qX#k3eaU z=mneNGhhH}AQPPPaEuDeO^zq|zllVX8O`GwDngTVH;+!1F^ zz-6ZEej??6l>trCOY?1M8+yUen^7+>on}&cn;gs~44m=5_7&X4lDofD04a$S9BGc5 z0<>vpRTptHDVT6$rn9Up3!8|M`FUpgnc0tVoNYlD`cs4Rs$ zT{U*&++W&KgdhaVw?u^j-ZpVKQ2Q<+VG+1@f1^#HGb8y33R2Qml_%yiaQ=A3<_zUv zW#D&J9&OxmH4eP1y{!yZ!2m0GXmqoRf%TR%+dN^$eQ{dCJ*7f`t$ckRB~k%HHg}x zGVG2)Fnx6P-?~*DG2n*PRym6Ku0a%(hNZ>|0v|5ZuNih|0A!3au7=5m-+!dz)skv_ zb#dBoL~~K=>FqrncpE3l?G@2?nR05Ip%oc$knRU02SINR(zAbckaW)QZF{64kf8L} zCSQGw@<*#n?Zztvt$V0UB5&bwlZP zpj~c4@*%^4?h9HeA7!9AA~Yx=KpCoHbAGhh)!Q2*0w;zau8qzk6aaU?Vdi&9CCFxi zdKbl2z-Ns7SD&%qfV^7Z8ZhNj&(#2ipCU9oTsQWiXfZUB<$!g-&7`S3dT1P{l9YGTm>J@@Y6x~qVp?gHKPI?lfZwwH@u;} zrX+AS`(_tSL-2eV@aF&3#r(f4D6M6QC$gHX)=aGECK<{3x0|MJzMl#;#(&P6Ijq5o zpxEIIs*uLb-=LN((cU^(Sl#jo`L&YvI~x-4{B?HwQI=P8(0xt6$1ZT{wNc-K6o!>Dk%jDt)>sIVPAMT=3rTgANFiOU#;s zG(Tu$Wb$}hKx_T=)6il2-;ehJO{Cc8bkE1(Q9~hs;)VwW2a{ej5#r-p^uJX4<(w+* zy~V!PNro;X#eMSb7$g9z02-epV57^=+k!#Q*ISleJ6=A;yp+XdT)Ffg`?gd2IHASS)H-961|>>vL5M3 zTz~Kuv|n_M+C`1^^>+M3P9!}1a?F_F)%Q((Dpp#EdIoSOG+@u8XM6?HRhk$=4OP$@ z$jZ>q?%8S@s4=vanztCg26t3=h(eUx9-m1gFB5Fiy;#Xou@22?#Q!YXd%2HvZEsV3 zvhuW8<@ukM2H4p_pWX)((L^sL{4Py2-x5PB&TqDt6NJC2HKu4=LNMi_4&92bOiy*c9Gfx_K6-T(DxRxXzWpYV&z`)Zzf{qP zpQ?+d$9y_$O3-|mD~g-cm@?mBdrA1z;~DNmF(_~lyw=koCMLePvkt;km#t}0FVHvE zFF&gU1d2kSN1Q_ROC^(N01k4PjQXeJKQG?Pn*j$hQlLiJa#pJQ-k=-?sCD{BGILGop5)iUoF6IiFVjz~62c2lg-=C9j zs>J>*Bl9;|)rP3M7?5*GU$aGO$%E*Ui-4<^<3(w{2#;Y}Rg#41Q{OF=%ctM-_2OxitE&4={=30o zBKdqbgs5Fw>?_1-Cw5#cfLZlNmXbz9T?SHebzRe8K2w4G_JrE1T$d`k+aPLE@W^KokH7fm7l>d3y?mF^#N?-xEGY)#%1iT22zu1M3VO=<8DNh&=W)Iz_^Fi5`mb$N%;I1_^f_fmmB zTRzeK0_N30k~fi?56X7UARMBj5p!Xsyg9wwet^~o38%6%9=pP{+G7PGRk3Xci{Ejf=X(Ae}}|%un~LP?DDk0b=L9rd|`leLzefm-5ig7;W|a%H*t!F=dO+wQhSVWs27=e^4P6!ZN~yUBxhY6;5T zKLUylhYOe?Tw-FVuh&=JC^6U4UaQUN27yb!RGnV99mLBJi29^VWeGyryfj9ony%(? zQ-2K(!-|FFQ=s{@(l33z;3;t+?=Xp6VEqsBVekagI9FNGc(RIYv1?0%_~|D>uU-vQ z{h;#J<;Xy`mk2A&U6rMxDMw72S_o6BG%d5xg|kmXfEGuAzq}4d&>uz3CKXk@dzM8WgLI(;eMbKAR_>me=Psi;DDe%f9YnCH%%~B2#9E5Qsa9I7*;# zqCcM5o#WAnI0nmEi^`)jMV+HIi(TidD)dTN%U@2#$)EwGWpIw3BTaZxY)1H%GARvj z4&v=;FG?$i{O9rBUYXI$V%PQ0JrdoSAilx&-Rt7jY_sZFY1R9)tX*p#n=S_E9ZN=K zG^fH;Js-HZ*)vQy&jT$mxf7c?Bu|BDR33XqPjU>3XbnY?`;w`bS>JO^Dp>Y9%HQsV z{$=&)hZOqOBGa9{3@_g`z84b8;#i=*qY3ZXmlAp&@9xUb@aVxofC;Y-zwim0cfbz) z?1!*;C5-vIUL%XaWR1dC1%qo3duRZNq84U&>oCn7$$-&fuO03`XZRViv|bR?x@-Nm91C%Xk+KA z(vTxoVc%WaS)N1ZItD0Br=~q;)B-NT+Gp!?v&s`9Pe8KNS9VrG<3UNcSMJTEi#Qd6 z;@Q56nTB;=hKfgBWOOtdF!J7L=n4MKyt{33L^88)RuKnM>f=*T*SrcGA%1hbP@;@B_BbJnxLvJ&`c!pZ>fB7(JA5grsr~1D`pNqhRT4P5e5>59=#3_p&c1yr1wU za4{!Z-?53P#<-n281d)*sdk-X?>r;QYx?sxLC#BYt^Kofk(liH<$n2zpOJ`*)7o_< zL94eou9TBswjYjG9Fl%;HsIg_yuIjX#)D2nRKGOL+)y*GgojrxuIAN_H6zZjj<;|M z@@0_zCq!enpYp2oMl(~3dwiK7O&i|za5DRY?%Bvsn(2yaOS5%#WQa+0PWNSY2xH9Q0fl|Bc!dESYbs711!SY_y=0r zb#SXvTp@PsZ}s9bo@kfqm5}uRlvargc=n)CrnTrP5aU-!XqkKO`q7S9UfIvf|TH#>S0% zGa%2)|8{j2CHa17{%cRqr3pS{bh4@0XKW|es3K!fvMYt_`O19UL{%1h3>9CHOeaHN z87Vqlbs^cBG*LM!%1?RF9S2F|EX)&zc>TD+pH3wE#3sd19^I5d&af-7f6*q4x^&PK z`1F7Yk@72|d)FQkZyX`WCFCyw`(sXR{PLz+?pJ(=4$O8EpY2VefkT~+rP`~%%B>`J zPauUSA7DZj>;LR9*b1#a_5**iy36wrn(Ma!&`i5e(e!jcXFvb%u@df5ZcG|-w9F}nMwudbs`$q9$6lIu zmw;?ydeImT8aa#IhjYfSvi+YXkCkt}f1I*x_(4i0h9+k5bbq<%X(Wj!{AWtO{jw9* z2k|!+B)WFT%btg{X}%_(UE)we9~9Imre-g7j}@eV>6S_JI`GB0ytut_I>Nl9n}4uf z%krVyboY$1$I$q|b&A;!IoV?dH8L-xxemf)p*}CcxkJCCzvOE|?&l&YLfJd%{($2F z>)9CF(%tepsYPr zABOP0BX2B!c-^vrh|}218J7jVnhc+;xGLvop6uW08EvMnf-ZTglF=yO z(RwO9)=Kk-0JBChhQ=pofc5?Jx{BT|2&)2UCN%T_)@F%ZC_eMvPTg4Dhg~Ffo9Wwc z^UTZ$3fC_zRb_(48)+}9AC$36{K9tns3OGkI;KjLbG<$4Lz1$kjbObEV}FpqNrUMq ztqQt{9lhxJ1hdOGQKif(K=k|fz}v}xm^Cw$CB3L>G_>(zgfTO02AWH;FKT1I2qQUngiO9@!5*1r3xkTs)8 z|GeXQ^g80(Ju7?bwo&T+zG_8u=4WE{8h@$gHy2UFYvfV<4$@WC2}mz@lqX_*2Bn*6 zhd@UD;%Z5%!t~*(#Ql!@aNx6}V?n>Bwb-Ot58Px%x)L*)J6;w}id^)j9jL3~+07GJ z%1lruRi2b;QNJf8j{t0)%n{?E$NQv0QYtR+*8#8m00w}(DAmA_eWZzxSqd9H80 z^Ri}8BsZpZgpGv~aBC8vMcNts2oxOeX10B(3av8^CZLd^8|o#Ea3+4KALq&9qsB1F z(fR*$lx9cj#|zM)s&3Gfhm*i1 zy^j{eS$7PvI|_cWg=R`Wq?f-lA<;a3_oCvaD}1daZhrtOESeDylbNDQQi45T0WUyK z7fAnfVRj6rBl=-#eI(~IZJ||y*-X-Ux^JT+yL0{TM$L<;;>fw5n9d5&!|+TnQap>) z2Swmy5Yo*zE2qSLXYk_NHgVoMeZ=?w5s=s@BqFfz8}_q8*+=ww?v1naDDg{1n_o>T zmNzt?n`AB4yQ!+u4P{B*wI;+@G{piM7#4+u+i5}HRdM7RqcaDBb&<+otg_q)m z_i|zA-=gw7DMvbM3hiH7iWXWSUJ!*@NhO|?CxwTpV-eMxELl4FyEdXGHY|pP5);uh z@{NBNvwb%n>-aDDH@dvCS5Qb)=d$FPO++Gje!a;il9{8W;AP5GYMhPzdE8RWKisKc z+%Jr0`_0_zgW{zJMX&RdFWYqGo4R2$T$XID56Un85k=@J1%eXvyo5ehvd{kvWs+6LLZOB*bea(6Pr3VG?FJkVy0Vdpg>=I*ZUMK*``bVc=(?UvWdlgA=03TIu5( zMu~<%p-l^GSlt_M9@p71?Tv-y0tlx$yDuS*kk~8Z?SP|pjdw?FkRgauwGEApc{dqB zmfr%BHoalDpFfXJ%DVI-~v?GFF%}uIj*;;LiQ*B=I zg|+>9-pG}|r{~NV?aAXai#9gRLgdfD7vk;eP-mMM|2Gs*rG8TE6-egK@Nc*L6i6F0 z{+4QNXUv?B%G66{3sjb0=xy|TzHfBbL12`L)ks8SUabX4P^?TjN#|{U?k$At`rDLl zMfeqJKr~R*Hv$y4R_*PM+U-N>D%_A>Nw7&HW6D{f+g6j-IEFAyQM*}bPv4PHNwP+B zbm2Yr(fQ`;#B<3i$fH_cn$T-I#^04b{TClSPc7CHv-)AOpJWSdkC0!uSwvrraOB$>770Nd7zFC_KEf4Oy99|x0})j0$CpnZ=iDN=J7L{wldRFfyPES9 zRPnG`Y4ky@W6q9%$4;4UDC9KmNz7}_IDWrG9+O5^)WR+~!_IH_S>%(Vj@3_tB_hUR zFI*$$h*zQxp{6J8q)Va2;iNtix4RE)i3UJcF6E25qvMsRwX)Qkt{OnJtIio&ZCGUx z=}O@ZH3sSpxE1a)j`LY|6?!|PhNhp)jO|Wch$LQt=ECPfY&%F=lr7WAD=>oOxEf{R z`7)0P-t$UeFKo36t+X{`z~HQ-PNXn%k@Avb80LYLxcF+>U#Opuq1?L4*(qkQZ1kEP zbfZKw86`m}PvuW?a5}?HL|Yv3ysEq-mg%9%-?01pgw8Si&XC@R4Ey|UlAv$Y{C*VC z67sM;eDagnA0@1{_Li;+;5pTS=On~rD2B3JLMLGq`X7dsPDfI05QH{7MVF~B3Osx2 z5H$zQ&nIvZqRv))32zt+&Iaw}CPTM@g@=E(cJH_#Py-0RO>bN{A4CWAS`# z=2CZU%=f-VW@WM@`mAbTHnDn6fbUw}2#S(s7_#2AtF)>uA%dsg*WUS@i=xWWd4!R4 z_frlAxSyXcoJ#ial@gwg*eaNin|uxU0TrONHRYcrBQE=D0^|XReM~BhUWPY|A zOA@@3K!*6Fg#IJWB`E)?;nV9sa!bQ?SdyN5A*AMOun%4uU;aENUHU#o5^)FwnO2#X z%3~6#4nenioKE1GBh{WW->|wX$y0Gl`B=V0jZuN7`_4VZQ_>%8#%@_XIqsq5-|x0g zVN8wv)JC`TR9sR>aENSTU7cFSRr0alEI;G}3Z^*(>X8X#CZhGCm?fb=Bd6KmaV+yD z(~0Ie(7L5ILWD=z62pPK{u1$bqO3<>9)k`RhJ@``I!y3Och-fjEj1W=DGW}jnCjr4Z1x{^X?pmcMhJ@2z(M!jfNbC%?J?TL>Z(JFxYqgp-G?8; zmPd_k+V?HTx0%FSA>Ik;;@7mkmsCy9CT3al_ML!K6IiaHjX}PBbR)l3#7bmmmU65u z`XvEw{yXW1^S&%KT#8)fX=t1UeVQK_>|K7wnS8Mb!6>aWZ(X&v-mf* zDNPL$BX4u81$02}#isJZ#NkXAQmtsxbjMg|_y~>K7}I^s*O7rXggnz^T{x-L?;*VZ^UMXIk(L>KT{=6_4Q}!b^Z}m?m=?X3kHk z3y_f@1?_IS1pZhc-t{8Xc>SJLO(jM(>279+Vnjo~KqrdD^&2 zG54)9R(_d5j6u0X0V5LlnGHikbs7r?>(Z$~L4GrXxF^MlKAW9Y-gcaIN{&quv&i;% z7lYQdp(0v=FQ&deZMpmOx4>oGw6jlsQX)S?WHlOU80!!yV+$FY`%V@z4p^~fqdhve z0@6A1evq+he{WGjd8SzxGo*4F>9`c!h&j1!{A%v1d?@-?g(cpDZVnnG|02y9KT#_> z>VbHI2TVq)AyTj=uDYS;VD)_+moGZ>A5n&LzL!Ju44c8SEGxXje~XPFqz=^2`XH~r zk8stkE*B`uZM1zzF45+u{h?%>wPE7oXsqwYy)T__IGZzQ=Tt~d3$p1$G_!^+s#PX} z1V!IE-Z(R@1YkfDt7GM!AXdY;hV3M#g$40V;DkkUUI%3X|q#`q1Y=*(MGo?y;QX6 z?m+rHDzT|d891v+rAfFOZD+^V^Cfuygv^=G2;5||IIh}s?04SNb;rH0sYQkb50MhR z;fCGx(Si>?pD}aIlI9S>b|#L5zR(K$vZVf1+Wb}jPLHvU?{Cr2gr*`;TDj#lk)aJC zXx*5siIToud}$r{BzQgi2K^I*duW~Z6{%8!hMN6M;mUZ?EV}aod$tl!#iK+smHQDb zW}yWybt2m;@ka$TeZrRbc4I3G2Ce%N77S>Ic|I7sjuDwLlQQ$|93DQqoSYn8p?51g zOg@Zd$iZ=HOqY$a2ctQ<&xtw(PlV;o>H+0!hmIE)8^=NaIbJs&?>f}qCpJVbWFt8PzG&JmUY1@Nee&ZbvDPe8leouRkE<6A$f;nH)*8fp)xb2tJ)KoQM_*!wM?*lUQ zSAxsY)bIPtj(2nqXF5D*QfP&6C9I5XLZ@i|Rt}3N=M>g(`HnQY_(Z}6#=GiMeMgwrN+N!#AUFKo?9C^6gR?}0H?r=x8PG}=#m&zPfJ z#xlq6?rVX6oSRGLv_&$p?Yich!g|+&g)fVZGSu}EFQkTkQ7T4GQv04udwtqa8ZC=9 zs!2hDUyV0VnyVHDxznruv~x-}*i#WbBx9YrUr$CAox*Q38r>d=kvZVk+(ZhT}X_Esb?2ZV{2o_nk+F6ixt%t{ckd9=5vRf=JLX(*eij2M`FZ^%O< z(s!g*G{aYr;ecgN45t(P;#zeh5}F&-f}l zX=rFlr?X{?fm(CNm{z+n(H~0VDBinao*xb5v_%`e8{`%YKp-@ig%Dk>@#zbcuBz`8 zXHLzBp(Ur3`TGBs=iS>vBKtqVnJYn*=PhqU)~lvk^ZaaY0r6#=g8`%m{6n0~&qTj@ z*S2p|N{i4sph?a6t#DM83H!y=%!w-X^N>b#^^x@#GSo3VzK?l)*i1h0>* zgAXfmS&I;ERyTU?entsZSg1uI=NB7w7O1{LfB_ZP>0vEP}+O$37gtw8S&VlmfEO5kQe)y9|h;D(p_{P zpM*O~;1-1mUBEQQ|7lc%$iFo?iDNuil5;voOR5~2E^Lg#sd)A&6(uBKE1oGX5(8IX zva;P&O0!%7nnR2ur7EDc!Zm{#~$K#rdscCpo2H8GA-(t;K|*(G1Jeuvp6x zXr%VEJ|KIuMi^y2{B0_>kzHirP!uQN z1*iE)U*mI4duFE_&t$zf)m3mH5&Pn-Vte9tAvyE#ir`wp`T_Hsi!6ptff{uuin&+! z_m6RP5Ar$R7Uu($+8NP}+`?u^B8%2r4GJ!t{nx_*vnGsB>cXC5BNHA6#>W%w9Ud;S z1g(y;WIy|=4p53WsK7EN*x9@lO$Vs#*~Jz~6-^d|)$m#Q{-ujg0}5cF;WN}?sv9eh z47=eBW`x@yBqG^r`gk=sB>3{5b}7uqIw+%|Fz?hzEtOjX3}(y+DB)p{1@@+UQa_a; z#_D^)Ca3!(k^_hx9G4MvNv^A1e_3`l9^wIILvyUYP^E zU6r0NksJloVC=&$PicQw=%i5Eo66MM)UNp&zlEiIe*xVC=OrLbXQQ3yo1sW2L<9$B zxgflU83NLkVW6!FeJa|r%Db_9hdvDZNlRPrOARz!v~dJ7RO|TLc8>_BqaDKd<*iQD zJvkwH@`+@1alRks-(D8&K5*Qf*n0FbaONP6>vPCU{(FB(tR%{+iA@{ zwo51J^;p4F@2AS$ I1FWqJ_-!owNJK8#SHX60#Lc8Rs${$^3d+ z7<*&4KzxV#8&dhPXH0g%Tnq#2+J%W#H@4zOt5d&6Q>&JRbf5SR>^C|#L?o`1UJ&b9 z@e4mzczi|fp)oLhpKSl6ur!&qRjEg5GW~kq)qJX`vNJPXMG_}Us%L?85*BfbhLT1( zF!2F0x~M1~D0P|XxXty=8jX%hH z&6xH~xoW{Zme~$;?WCE)N{Y)HI%_?!+QfqAZVl#Y(y3WmSd8d1dz!c7@jrYO znV$aO+6jWrH;el<$QMHP<`#yx$9z_lH;;~tOhBX}dzmko<#2negJ{+YhMrJ^Xyd(D z%#ijsp4Z@Ji6aD}tpZHgi1v(_rwfNE%&C8}7MR={b?^CCv^?|l+$5|r-=}JQl=lS z7q%(!xO3DMQ)xrH_%@DN9o9ms0CW-{;^XU_i6wY`t*mAE>+&DjgYDZ@U3}&5_`@fWt6TH0QJ)X*#}!7l zXVaYNI$2+_kzitdFCrRplKv_yq)M%ya?bTxRlr2CdHt!h^yQ8M#@^xX+;n?Neq8Z> zbQIY`3i+!Zm$jXG)rHN~_70fzp+x!2b^LO8W$Ku<>)WS=2U*qU2LfyS^O!8lKEBhd z2TAOlrCIY=0xx2e&nu?vjT>Q<-)3Ue_siTI7(6|I?1SPNO)nk09;^WG+k);(Rc(@L|VW8NAy92MEyqe;fi^UKM{HF;CPnB$d7+temBFn!e`~| zP&C?Qe{tD$K?@uV+hGkL3ot7h)uMIP{IEDB=YZD5k~u}OlJ9Iu#p_dE+*>0f|5ws0 zTaOzbK3V0c{X*HZ|2ULgyInEgL+U>96M&Ifd4B!rrPS@y#(e@$L#OV^d+NWXqA^)~ z`W72GMCh{J#dmz{2N>`#_YL2D_DWJ<=vMq{&v~42)yVaCMGXHpU)1uW(?p8MYK1SD z#T^n1e?}wV061HAi`)`r%6tB>4fm?4M~P-=>B>nb28d(1n0Xnkc33uk#dp5=R#>;| z-O6EdwOX}6(|fhg%_C{@B6zx{#r12_qoyN@7$YY}9sjHD?5`!6Q5Tiy{PPWoUY4vy zC@r*KvqF6*eF+BOnNn?lMgE$R$7G1uY^5mM?{9D`r#93Es;Al5!yrdzGDqC3aWW%Y zEdpcf0_8|;rHPPM&lAzTqoXCe+eQ03M&XC80{tNA|K+4BEG3E+8<5Sw*lAuxq7t9i zDUW@%ZASBs(isYKm>4=4!m}VB!{u+Hxe0WRtm(2MYk~*@o{?UKZX)K6A|83~$8W~c zn-Mw*sl>WJ@;Tn^3*8pqnjJ2BaQVB!xJGqQz4cz%@8$6irXOIW{$NgUBhWyT&OQKS z&3aK5!LfKEB=xsDT}YBpWlKsT*JYL?PA`rtoit&TO%}$h9d;Vij<;_HwUC@s&^Usa z5AJGOs~5Wxw#siIT*4(1pORwjVbRH&Bo zh4DYG|Hy$?RG|D?D(Zp2<6k3qj4xijLhC|l)bE{YsmE{XH7U8q6?!0d>0ZS%NMh|4S{>N5 zGRB>lyxa=0SZTswsc&Ef8aHMX@<7B{EDKfXMz>^{IDFzz!UGGMbJGUG7`VO!KG*b^`fApLb$MItvM zBAZFgjSnHPUH7W%xQDYkg+G?zMlg`KKf1RAq4br%lx@CalE~EBuR8~if|59I#A@^M z3kezB&UhHmFw@dJAAxDk2{t|v-Q*#1^@o(^ZCp?uS;HMpqMyt9Z&|IBfP%$v zc*`T&S!C$ohBJTeFZRS!zn0}Y9Oda=(QwL!Pqb)P?&^Sn2Yw@LQK#8X`i3&>WG z&RZ<;PSqUFSj7EO`TSlhongKCIShdpa&d8ybj&<*xH1?F1~Z!NnY>z;;^|{+Z*_mP z{pRn>dxvx@M9l&Xx)#LnH-m(1YZN}%rP&+=_N2!SZ`Di#CFi{awl2C!PpeTtpwukThr>dmp^yiet<>X zh4(7m^CL!6VJ6k>uLeYuMwvuNTyaKTb8TtHJWijwdI%!y_)7nr_9F~s*szp(hWdZ1 z9B37)@B>V3qb-Lw()YYTk%pZ}+iZ!}eHG}CYeeu>`&~7Q)=S6(K)7;(*Ay5JnV^+G2_td-+y{O zUH>AegSRk`#&W#J$ravNmm67Xnr5c4vV{;B+YT5AX#M!|a0&WM*BVnQ=8oW})W^ID z{u}L6g4>Ii>Yc$6>-$!+KwCuk>EaIHCY{mgp7RA81g5+X0oQqq0>5JmPDtS}kaRFbiW*&*E~#Tz`B$A*+|Q{yK}G7^9MIq5N= z9pW0n@dQ;T+%cl}0mrF~dJOB>?$DhaV-jEu)WG}#0s6s!xbky}B&r;$z!);15uGMy zj&XIkHd4Y+nsAu*3`)ZdIUBs5_(J0Gpj7(G`~9)RdDZrLN_>3gP@WzvE{{Yd56lWm zN|2rm;fz$p?@CChgpxtdb;fK=Y5S+fuTQhqd(yqPr@fbBkJHvKW8*|@`c5+izMW#{;-dS|A8~2jM(^r2t?!kZU!Eg>C@m&iB&-kAt(|8*q;{L7uGse*1ZdD_c&lC_Yq;1hg-kX3)P9QW=(X! zD0{Rx-d`!Y5|!I~Zl9Ecx!E%#)e7W`64*YQQR%Z1|2ZIe|EkAPb|6zCTTZFJ5EsIo z#2)!f;ZN@UhmD5_R2CZrg>qgSS;v}ZXnrNvhsK(H19w2HaPm5N?tbR-a8vqqf+`sg zqd_i*EImE*j+#d5-MeipjJk)m1)=YqlfH$!CVpFkqBQg4SMbS<3n{EJq%~JNj&OC4 zTwa!68?_nTEgy`o#0pIDb18D-SC4n5FL?hH=Axh~{yB=j|IzMs4ma1{jaI{V)%0gp zq!sDp@0Rm*TAKD$i;}9<#UNSwZz8F&IHo&a&mNNP>AH6Y@;O)}AOtLSp*N_rAL((MP4Y$H^GIzIQn4(1IcsSX6;O6>stFz0PhS zAzy&){vG3NJbXvza(}v6UQQ0TE#tRT(gGtQY`^b`$nzf0A1%){Q{Z0E%Khfk2T{pl zvKChT<`oYS6pR-2l0H8S)_SDucKx{gULCl3pNX|feC>X1fmH^1%|#6VeoO7K<%Ipo z)B`37*zV#!Lg_I*`7{}!1$p9ahl@|jx*akB(|{fH+4oUk7vJAob3aQVg9D0M=w^*Y zFg-5m<*FsR8Fhb`u8h&&5^W@^NLP!e2kGU0HWjH{b(dMRM9G9@=q-tw_~8#IDpN7o z)f|foLotBZ4C^gz;7`!WiAepi@gp^<)~E#MhXp1ro=A69US~2tJ_%o7ejCnAQOFca zBXdR~Lm1MZPtJ<7Hp2KT*H9aNzHIJ~kOwwQ4g74ih75cNHzDo*){aJyHwh)-4*7$D zG^eAaU-v<8B;k73RZN7`qGEG`q0AE6GEU*+KtMpy+bd6V)C)z?bGAWIeB9P*NM(Vj zBq32|pm$|`Y@m*w@C|t;{m)9lg6TdI$-JoS3Gv6uPGO(s`M=nEtEjlT_3IM|P6!SG zLV&_OSRlAlxEJmaENE~I8r)qIDBLX&Ttl$nkPs|bkO09QdU4MA_t$sd+ZR20^yqU{ zHx#?}-fOS5_Vdi&oYkET7w@R1j-1ROY&bY~GyJD^DbQ9jo%!u_ML203DuKX4%MXwB|Gx)z{HYc4n|ECk-5%e? zu&zsSji?s=9&>|{RPc}yGugQ)!gYH`_bb7ia*bZ=|G&O!LIVD{ei&Nw zJX-9z)Vm0tHE}D*=u?vVLPdCn^ibwA0-IThKa!DbwVBefK(>$!k?wc2%gqB+}(ly`Jpe6+{M9BTBDJ7yV2Lh zAAad`SNh3lXHNYgsZ3pX#UPl9u=!e>%pjxtG<{c`|L1AGGsFc)VW;W#eu3nL3l1KQ zmh<0J{P%VJ?_KL8_OU;wEEK@LE( znF!*F*Oc7y+y-KE3fixgRR{4yOZ6Hj35khU>y_TT5!zdN8{N|@S?XR{M@U5Ut3HM= zuuU0WXl0=`a)>P8PoKe@GN@vyAf@i)6wmjyZI+JCYCO>V&~?)n7;aO@5y{J{g%94H zQAQMN{8lef|8b)!AdnbTQ7#TvdmN3_h-HKsuCHT=Pnk(^jQ#Tq3beG_ zMp2L_r=}i2YRwN~h_;jPh&h^UerJhTC191=SKnIa=^y5ka<1idcjI*hjG!$=pYE2F zFd8d0FFiu)asgQ+F3-v}OIJx!A5cfo*-#Hg#V*mQHG3W#Z|Dm=JmxzCM1Ki%)dHz> z9n0;ArG*91`MuxzUTdxiXhCMDR=})ibPIc@mHCqq+NaU zFWbF@g11UT%}!r9OeBY41%SydiZ=^dA8$vuEG(Yv&ns4)Y{`3k&EmU^Fr?i?ha%0d z58qVSIb3VXm+BLxB5+r1 z=J9P_Om_8I3{m_GI<_pS!TXyn+PECwQ^(@3Kd;nObwr9=D%PV2MXk^aDHk_1_s3!v z!vV|b4@Z1dG+AKyb9|&4?dmPFIx)u)_iNTPKZCaP_2~}ngP%6j*TN-Gyx(@X5&?Io z9H-vi&-NC)zLzSsx}RFF_E~`vjEnNdYY*_Yg4(ltYty>jn{h87u0<8Vh`p_q=Dlc~ zA|aW5z{DV)mQ(**!+Ip`e)8hrtYsP(JK#*fd;ixC;q%SAM7KY#`0avjBy75GgOmqe zCoP|rk>uAtB3C^|?O(j&*65PT6qL^kxO;Vau)N&f^8s$!8`ki8lq{I+JikCTZW4>5 zO-%H5{pP2-blboBtJ58RKIbw9E^iUa1<8awUjS{~ zd})VDp2*2J!Grit4&~js>SC)rKUjy3?B*1>{ECQ!1i`40*njs_1jwV7P39VC+QNCZny=;@o^- zd8PWRC-k)6=r|%G;-vHbHV%Xs?$MtQGUVZ2ouq|$>@vm?rjv@&Fp7Qf*m~#15rLDn zyA7i1-x3UO`|$GerY!lL8+x?|UT9%RiByGx3RpR036&A)?(TWX)MI=c(&g7=@_KI$ zaewup8w{hb*!2Qzup-B-RK0NP>?g_(^LW5|(@?ILl>w*g5RR#f2^rgk=Gtta3ZZ`*c$ns`{UGq-6aXia`9aUlg*ezpPb|dy5ez{^q!H0j#~eV z0=tw>9+CGBH-zJ&e~d!j#%Io^UCM1s^Ehp z`McYPptZT}uL^&16 zvu<6D=NEJ{9ofFUdEO=bj8qc9RgJLje0SunTy%l;Hn{v?6!2ud7TNY!vAlBqi>=q* z^cebC@VYWzsr8!zXS{PLR>;JJ#t*4&zrEp!lcp>Ix3`= z$}|e&JHBe>M(}u~1FvEkaHHCMLd7vEqF*{Ly-{5=KvrPT>0RN|qSI;JiXg}Jj1xKv zFWa<#LiHESe;UA%#8SmGffq7)b+*SMqBk-gi)yB!GH((+S7m6Isi^&T??UJSRlX7f345JUJ?G`Sm=}QknFV!qfq+iRNhEocMQ;5!O*1YI#jr9IP zhG%otF>&W|przCKkU7m`_3fDwg+alNxAZUYP|1MQv1S=aIg0f#2j84n1%-uXL*oKG&Vahi<=8eAix>oJ_bgQ%>RsXm$s)^cVie zqkKK;h2i=+=8@$x~@srCmy(02M!i^I~4g(Z?wRp^ApcGIOmXwV#z-2TGjG06Y;*vOqV z$Od7nka^4ObpN7M2Bybln0?7*nSG(Ml9q=lbn`qkwUTCc634zMsvh+c#>bc*`S?a$ zC&iR2Lwc}t40qNliq;=*^!bPIO+$(S{Qb<|1FvcxGifAYo<9A(rQiJ4=;n-f8z!G$ zePf^Z!FTkzyWJY@%lmU%e4C3eu76jU1iW^TZd*<$$+%y4$QbF?LVtD$hwaRH==AZ} zs2eWqR%3X?Tg%JI6`1x~Vc0cz;iv!Vr7WbDeD)tJz--EFhk&eXlM z#N+~gc8wt3xxkWKr-;p1q(`VGjrJIl@Y3&>_>SlfWds6x{RGE!@z-$NAgv!7y?o=5 zr^S|wDtVvsVSN1i0ghd<_wF3+FCDT7z1poP$SypUZ2ZoQN2JMw*Nm^+<)ZZio_6Yc zS0}HNhc}Tr@_mHG$U_yvEO&u^H26Nze*cYDW(WCRkQ*{-LU92Y?x6x9aDtP1k6mh*Nx5G~H?WG+p%Zhj+q$^adAiTdeR&IT zndvsKw};QZ!BMb?$Iat}>ElomLFyPC*H}O_UU)>rM*+8I#{?slR->78qGY{V_ zI3qr%u-tT-UZTIibqJXBLF5cdXgnd4?)wX+pKo{KLOV^Z3kM(%GsUZrgS7}~q1H>YgmfJ&Z=&Dr{HSN}yWVTkOJPY!B|qQE za_o=;E8Rz-O%R+;mB;3p+uN?`_z^SlK+pL&8($!c9oaH~Y7&9(D>4>-rKya43tfE% zEK8B;#OAxAJ!MAPRIbasw&44qxQXoeKfhn|hpOE-4A|&x2?-R_96M!q*u-IGj{dkt zxvjv1FXmByqwL%tZjLE?4O4x8_!$(r7^GOc(96gV?V=Utm9p}V=d-rJ6z>X>F<=p}Bs8GV+HoY0Z`1D`S5y<}w()CB z`5BcQPdhu^@KgeM=irG*|u=YZ;4A3(dc_K=7)M!#G%w={|?!D|@20asj-vW6Fbpb%tIQ zI%r$>=O*0;S-Dj6?=KIghR8LnseavR^_K9cA{8|XxFE*1Ly{J_$5g#q^T>PcDNrw5@tKj<>zZ?Qf;-FpaFhAmYC@E41Aeq z_K&do8H$p0%Nrc3@1>*sMU}B_iHc^X9GruMnCOD#7wc40yZev(2W*FKTI{X1KZ%rE z_}{%}+e$6^w)XO3&7Q~fQ?Js=LfJR}&l&j|^pms=MfdQ+!U^H?1sZb);j68JhX~_5 z`?b~81{@2|k1&pnGHL_K6goluLMBc204%13Y?H<#e za>afM&)8u3lCjjY0bB-)*bQ6wbI=WL0X)n;479uA7x6=bPXoaVfbBVExV;Y6HUX3^ zo(OvNs>1*A$cPU`+%t(OQla_=JefRnpB&&jvS)b8l*Ij@Qf5%fgdaqn zAU|1n5+f2R$oth?5cXw^C?7-ysK-4Qrj5WJqoCD>Ch+Hk7BHI6bOf=9H=OiiYX9I0wO8T3VP&cXAk`~0CD>eBOjJN*Uu z?0|Jwnx;;RDOWY0-1}E8SJc{mpLfcJYNdT~ahBXBuU!p88#ff@-#W!uW;oTTL-lai*L95gBV#1Q@E1YC$~Ko2jk))bkeLZ2YUHT z_k6e6$@E(4Tv4QDov}m<;isa4rx7{fGJ8x7P$qdn=XiDZD!1};t$2MYy@|BY#;1Ry zL}f`t+-|~m-39{`IELm<5OrPE`s5WvYt)SQQaMhhrNoo9h?))H$CR(;w?l)l((--h zmg%Ds`EIBdrLoR$BZII~;1$!f0{7xp?ad=-p|Y4PO_yAG{+{JEnLhnWU$nbtXeU=h zFtnBZ=G(7J8szIG;Jj}xkn9(|;{`lyWR5#1D-K|rzejW>aYTjmHdD$s%Y%!i(kG5l zK9VqXcn7cBiu3e)W#h zv98)q!TB@}KR72~?T@|njz;sw9+AHt%IH{pnyd#uQqm=HuffJ=SOhe`sf^Fs-P7N* z%)B?y!Sp}uCvkwj`>vk)@#-4$t?febRmderA_D|FHLB)`5iKvq|9Am}DzLLI8xV*v zvUms?JAAHT)}qB=9ZR;W?NHGX>|sH{Ag+QLkDU4$v3Rkp6^nZ4K#&3!=D;pBf&H|^ zgKpC0$|$toX0GZ<++*R|a@SxwU~tBQA;q$Ah8YWf^TC=$o!{IqHvz7LY;AR zFb1FKk}*41<*m-|P2hbg67WPLN*v=Vv!FoF)f5t}a4QYcq9Ry#@J z)?8J^2J?ma3;lQ&sZ9Q{{NPlm*5p|w0pe{=XZ%N`S=$;tN%=cJ z0lM35k%;7!w7ai&Qk^zFFpV1=8Vehz+9N{sh6-~cRq@o%^zjx{!(1HMa4dq>R@kuL z3yYktXQ*X`%7^38cq$b2bde8)VWSrnrN9FZk_ zvK;fWy-n_w_W7~?X?nfI%%P-G;306TRT~Lk zLSnwZ4p#{aeP$5B(+na`bk*IrBBq0gG9eS{NWc! z^teuR#lRJq|8Ylpk0J%eAuKuKGwb+mcN}tuSxHw4bH@*DV?Vkzc1E=Vj)yXUZlkc< zPV!lBFxL%&DGE)2YhgH#&U>POABS{Y+hZt%O`dgG0^unzm#+r8xoz9pEiVpe_(-*I z35k{ImR-{Czpy=(#hYvV@Ru1oJ8092j$!=rV5xZezK-o@q{u0IsJ1;cBf{DB*Utpa zi-|cV{<;r4F#I<)AyKz_ijmSQ{wz#<|2m-mnL?@Z(3i{Pi(M(c8C;giuTf7F)thc5 zFrp~nnOPqgyKJ+hjr17H@zZ>zel_;Ftfr7fhEhH=&QncD!B@e+l2M<(`$IX}C)T2 z9pk3RetA@oF?7A1A>uLKhcVXDC@%i zi_|CZPNH-oj-x`+EWM&oCmJrMW(o0GzwVqUMmn^Lot^Aj*;|ip)`v9PU-duH<6~B) z>{y7mg&q07dE{n+M_`j1@67(5NIZ#N2|kma(3)D`aGV>N6>PFra*$&e3KjGhf+_Cg zB{Ni1j7EPG;cu>Iea=_K2lnEItCthneeSPhs-LvPEm7LMv3GZ5sHMHm$97%59ktod z7MEG{W*xSZn!h2EExpd#rF$;xzYPt=3~4p3_wrzE-2SYdTAkyJHB)Z%UFl2XO5YD! z+mo-3T%R)S4C}A@ttS{NnT*rT&Cq^io(r(b|C%o)=5w-tf15YZRFsMxMXu$=1_S~u zLV9$o&q`6;eRVsnW7sWCyh2juS2{3|`WZ_}#HPE|Ba?WM#X^M67v4_R;iQVAZ|#9t zSB_4q7r#)8aB>i_|JLHCsz7B9_bfv9<6I{C8-u(O)g+)#ns40p7k;b_$QQU9q{blZ zGc>NYLlo=n7T+#~KaF>qDSK7n*RGqzB!NoVk;0pCosccZM>8z7usmnzk@G1N?!Rw8 zO_}D^#He{ewP`X{q_n}lV8^0aLUnArU=Y!e-Jz`&K(!?#{QF}(L%D3}8-zPfN%-EA zB^A-Iis^4v)-cnZ4FArBF*yh}g+Dd%^YLm3W_eQ<*UnUCd_cn*&D(%4d(XAcT=99n zn?gnr2-$(0hRQ@rnd#VZ?(T1ju5RMTCx>{+t!QUj9E$5gS>!(P6I%}g1!tJ#Z^7A}!#g`Ud@1k%r_(-U_4L7sY#R*pNu_dCj`NJKn^Z6DE3YYu zoV`+9JjXFi7lifMN2Qd95qsURea1yZ7SjD@6C-Tl@*Y~w8r~px6#lQ{>@D*WimK?$ zctNj-#0tZ80=Z2OpUN+uwz@7xzhQx`ym1}OR0PG}f@@@dCPMWVR3h(H4cY1-3q?K7 z(8PNqD(xqu0d|y_omGBkc+z>b1qa`j_%d2y;SB@~-!y*IseG+}DGB$xFjXOYvGP0y zN`9fvfX6Z_-uGFectP}FZ#sGNe&spNS1qH?yt(iO6qdNjHAnt%9uL!S z1JkE<^|ej!J*xNaM{3wFKO-dGS)S9PpXBireqE!3AfozO4uFP=sR|nD!UMcMR8xc9fpR+gKM{)yLHN_aj#N=JbND6kX^olEh=A<#|8E%0M`e_>;_5)3? zNn{0n#|ROtj%r&vj+0)@AMo}&5o9fK7Kvw5SRZ`DP?0Tt*lDT1#FSGz6V5|PY*QsQ z{{!Qe>Gr#bpSI>j2T5NKV$)#Vg|SF+$jotec*Dus_12$@J7s&gPpOV@a*$+*ojHDv z_iw~vZ*IZQlJITT$yt9%x$@Uf%kuGTL|7-m_OU=qtU%xiA&b)zbzD+|0q*PF-$TBE z?2hU>Y#w~l?YN$L(9Q;;xb$hY2)q1I}psw1guLrfD9|;Yd@BSr4$(sl$Q^B3@=^C}S1H2d8 z5BTh-jLPn3elGS3KfG;e8z%|{b)nadpVs#tlo9!4Si8~F zB|sUGYi27ko+Tk{upZMF3r~^w5E1wMQ7w&u z8%hG|Tld>E-e&2Q_m2)C(^B}(jDIObZ3ZB7!b z&&DN}Q$}{ptF}pK#Vn=P*a;`jlf(m%(Z*wiLP(m(cQD253}`$kb=A{yG8GnAgeqFfg$SC?@Fn)%iu z^XF@ZTt#|0?48gASpiM>prnK5>ikPOwj}umeX8?;HbxkUUze%wM~n;bVz>E-O0CjN z&x@v~?KI!$=x=Le8dCwXN8b68PaMBYCZ0gY@HBJy(*k4vOKH~c`1!m&Ls{u_RDnF= zJ;S`4ve0m0LCYaRGC}tW+7VUM$wwR;H6jvkgjk;-aA{U9ppzEBSJ}I-q;zRN51A6F zZuN0a8YFNo4Jt7lOhPN#ddN;{sgr zd9x=m>E2Y=)4pV52z`Hp`;(8J1uE3)4Qe%RRB|INeZfIyfx6Q6(Xtq}G-#MN;`HFN zHv(_w$qm8c11%w+ze}5Yq*cV@e;WiUl2#Gx)hqy3s<9V*;SH`s9}Z2q zbBLd-I`xd!yS+AVw6g)?NxJXNcr+`qV1pVum{uG?+z}>bM5hhG-T<148R!ee$F+%R z^4=QG$-i_i4rkEn|4bCTH-PE(yG*Fu7`lZ%hA~L6@}!txpq+;b?mOuaetcH6pPp=l z4Z%Jfm@dn)L2H`$TpFBGHV<>O++>R-BSV%Y%EzxBO|%z2eJ|fwXEmQ7?7Ckc=%S@s z@g&j}b3b=5f~)#oqvWk<J7#a37s~U63MA<1c^nh-j68_ z@|oGGGw@D^YoJ>>|H^f%*9>x3@{`H&r#p1v_alqXF87*;V|t$|*eVob){u$WZx{`M zc!4)I^Jh0Z(K_#(vfoX8C}k7eJw~BLoZD^5pN!u45mGP=gP-&j3EdwU(RzmpTP z)kwj@U1E{8t!?kx=sJ{olsh#b`iR<`2V82YwusglkG!uzA0rtUmpgMPVdf3$VIbi4K)-edmO)DQ@CYc0u zp>3)o(0dm`q}$%3jGw2F9D{+`N5AlUl>YSD*tQQ*l>Caosrx~Big0z|?>3eM7+XCg zWppT)!w@~P_>nlLa^M{&>>Ts_TV_Ve+kOc4Ejb(D;YpruF;`Afne&p~hGlKDK!x7) z@o5}mh>B%D4YVVDSj>0K9%0fbemFcOms1HcJpw6Mmb$T`;d4jFW1_=d%@_Cu3{yPR z2@}-_J7wjWeWjEHGf#|%aqAH=lCW4q5YETSCEyqo@=vYc6|F&VSx&7!@{1 z3i5cRIM+daroQuv4U*Zv4TTb*F<;er;*nff()CSVI>$9r2;A@aH8#IKtY3E0v!7DZ znKOR;9}?tMxjKpNNS)gpd;q+OzSCoSyW@I)K``hk$lqH>SF^sHJV$c7;A}LpEFoV|ieH?R~}fpL+9tM==JQ!HU!Qq&cVEEc36}lR(z_&u`npQ8Z(X{5Ps&hWih&AxP=_)vpC*M^tRK%! zy^YiK2)61f88vGheWVHWHWb%0c={7cTk75U%W-N=b8~~jG$cxzTiS@XyXE>N4l_N| z`2$?@9elCRJ6rK%0xBm8m+JURdmxp~bx?x+5AC7)C8D6MaA3V{$bM^RA$A)5RQV!D zy2sI`K<3+8c9C#HNk#V{#5yn8o|!5c_Tv!#EZlqHa-+jkp=_1)@AnHsFXC@ z==w}FX4!IMAEi~Qh+Im+f|K7Gp2&695U#EYANCRCwhSF_kdl%j?+Q!jch_On`c~39 zNln2O>LQ})#D9{&a=R5&(!@p14nhuZC zpyZrp$n6)aQCw2W%1h-=qXP%hMorLGwssWWuVs7ggZ`u~@03w;W`^w0@v&Nvm$@ zlOo(p^8M~pMA3($8Ibk`MjT6G^2%Qvo$z&gT9S_>@2E`%i_UKjD+Pt~-nGb9P4m{Hy7snsD73*W7Mtcinz%%EqbqNsYy8$JwXj-h%ADH2CR!t4Ct`LfWME^~i5t2===M zA8!=1!C82Z4nyV+4bR7PZ1oJh@~-C*`bHTPa~Dn!lN^ zY%?hNUt=lVns$!l>Ca?HNCsJ-oCu$MOU|i#vmEDvX$Z-l_iMkvJ@s=;V%6vR$lGYG zx=^|_2ecwfcson&tqoE~XD;jQXb@axX7|zr1stQ66uRr_QjNmlylRopOQeYL2~_OS z_~B1|o%-I3^EL&g2um0}CaCfN`m@wggI*gGk=arW2H?l@xj`yeIPNf>uy)AZ^YE7y$QW}YfC2w% zCp$xK|Fgk{Mp*T)GQzH`fgN-AP!&iHoLtn4 zyWc16WqcuGH?07;7tDD~ocWH9EHObgG06#GuUQZ71$+}wZxs|I@5jHfQie_)+r$mt z;`9`06kQZgCT*Xqxu&+JY=*7uo%n?J3J}$yNM+ZHZ11BYmE~cWF01C(cxlM#c9;5< z>jQcPGO+g)kp%QmN{8tt$Aw{`>YI1W=tXydIHYrOp_oNc0x7s+B;{X9*2~Hvlen#j zRj4u;FvoP<=I8gI3xBz@Knt6H#yr!D8BY?BDma(B>EtJRYo;Wh+ zti_#dVl1wBJTYbcu0M}KV|6~1i_+*8mHDaGM9Y92d#XbpFfoi8FJBGb~YYczYya&%6);nqyzXE8-9K@)ZC{@WzUHACV-)-g&J zE>X~_RtKcc^^GJ(uj4U7y-&m&2jh7X^8C=u6UnUY)e7;t^xQsUZ={87C|=n^ z=o}5@#m%^q?mu+|dG1nea4A#5o5%7JyaMi#AqlYRrQ@E}<7lB70*2S&DFZ#0{?}(u zYp)CfG90H%nEg(i9c?GtQj;mJyM3=*7tD0|d@l+hsQJMSO3KOv#)H@62{cyyLDUI6 ze%=9xM;A~%;DmbjnxN|z6$Y>4BuG-4F^lb)*-Fs{(yuMHB3vBoK3$M` ziJq#4{vj5}l*;>WUF^&)|Hd;UTYqHVT8RRYZBg|edt4Y@3b12hqk!}4bMP#iB0lA_KnGFn$0C9uko(-_Wl;^)ZbXJ3a8%Wf#LVNMeIIZ zd%}gyL{A@YBfSwX|DRH zsaw%?51Pd8$+cmQSsGD*X&qI^GXlcKtD+sTgEFS!WTMm^jX%ce!b3lmZnB^Z9Q=gAws(JKD=3Vix5CjQKNbnH9d}X^VzC8Y?87e}x%#n@U=ZrqQ`&C9NpL zhWeGfGPyVezDE4{HoAe704>+4EvD^o3OhVpj`%Kj*^uP7Tj_}WQAGbI+N-dO*UU4O z`hg&I^bsDvwhn=jymvW|)@I9Mqg|V)+DX7_E)aP>7Wa4i)08>uFz7D(j+`N!B_+4H zn9Gmr0<}@$eJtJhDUJZ^Oy&%Si~_@_nVl1OI&+0lEy7;R;HRz%gO`za7x@%K%qvt{ z)WvbtVScPK@yUrFw*zmzDA7G4Gxv?1s1MjU6E;aH+N?{{C~gl5i;2-A-XUy3tIF%B z_lYZpwZ@&luJGw~irUZfNK|krzlwSX1evnx&Qa59oaYnz7r{3@Xv)HB1#wnUC?9Ys z<>=-v6Pf26e|ftwZqVjAQA}RhK^Lyz==@~_m(%GoI5b_p>YSlX+Qcoy7t9%w}5kfaA&j6l9fMdDheAQGlkUsm6Br05_4D4bL*lrQBTQ%(G3RUN}I zkOUH{d6C%5%innl>e!cd)SnYgPPy$g_9}+vOX;xYhJwWhto3WcBUtZBHF@ii7So)w z77TIgvhT1&O3Kzhy?#x!9j41u{sO#I6f|~cZ9~T~7)Xt)G8N&ky88vA-`zg!sa%4f zy}D&uJ@MKO*+ZJoESZ13r!W=}qEnpy+fPLBSljTF1GPrTXC?5>F3ZV#X|xw!QZBc( zpA(5~)Dgvjjl0#_dGu`NG$lE1w3K zj=nr0G!Q#KVnY8g67`$$Ou^LBZS@2G%qJb`251vCp?Vzz1%ppHMQpwBjiETnre8(m z&C(`)0xDMPP1AXSFShibmn1hUnWXoNyAH^(k474dv@Q8Uad2Y>)uS&c%D)MJd_vnO z4`4u%CYnik)LBm`>Vs~Xy;RNOVR5{C!?R(x&>!JTSixxd<#};)HQErrgF}oW@psU% zs7j7=x0mGZ|4drAGn4o6_L_niUrz2VqjAR@^~d{|+*Wt&L0kUf#ma_GwoAW%Jn1>D zZ*|fgPiRS_z0*7AIQ7}*o^Kc=z1MfN6C?O?j)#?eK?2vM;OD@%;rGCWIaNP9-gu4@ zx)7vFem3GZecZu3S*ED96VWgm6FIf&{B0jva=#ocd@>XFUX#H7dUB0-J9NNS)s!Nl zh?%5XMA`VxQ?+WRbm&8`{iyt83CWJe_7+s@&s=H*&zRG;p));@MfDFIO-oSHch9(> z*yBcFrP?0<9`o2i>YRFz4ib>A(RK-@g}&neT4Sa!B;`dr5lX%xA#`|O6jk!YGY}u4 z;x0HP7ziV)elqPv{hXd;cT=wK@LBqc+GN1fP$32@|y_XQ|*evIinFRT77L|oK${46SFtD1&Y0wJHc9Kl#&Fa^r(Q0@WgzI9Rm z-egues#rf-`oU@UPs&}qDJ$;`27wJG#u69OOKDI~90ILI_D2`;k<*{wS{+IVC*f`p zvp-tGkBuwrIbA1HjN)`&{$!u!I;FXHDn&-02&iF~OsO)Hw)h8JTwU*tq8t{N?L}!VF3Ymj&ye8IN2zp0R-cW|GIYww4r1-pyG%;!{F9c9Lo*LQampt_4vNN1Pim zFHYL(%dlWJ)~hJ8NoZET1-#J2L{fw+^UKg7!nhB0bP(xLeC-elWl8B2FNgWF} zD+HzYF0iq(DxQ@#y>?04#BZf?+8iPEid!Y?8afLvwc<7P$37oxZssR_q^n!0yBGI! z;|bDd=jxtvv@su@>Cfy!iGo(n8$OK^@WwG~89s|yH+6UIaS^>|SFIWu#W|{&AD?d< zmRP59*X}WoNoBG?+Q!hoKMrKm|D+1it#~SLs3)g~IQ7g8QAL}tHrjD01ZWreOj$m@ibeo3TOz*lkKO;8>Y&#|ZXw+EHgM?j02dzTX+LMCYU>cie zut>uV&1-Cm--06iyqrZ5L{|BOOevS7+5+1QZAtlw|R7S-* zK5y}|s7Dv&@nJJAp^8F~_SCPHwXjoh2(40Y^!)V8dZs|d;}rU^Z}}k~{=_pn(S<5( zZ_t-pc*ei*IkmU_FvZTvsn#^03%Jj}Xoxx?Ok9um5yIaW;df-e)9l?5b;3Gqq;0je zdcXI;mWpHl-0qT^-@pdLJnem6^{gWNQiActE;pD%h)rm*SHvVX68{&2q@Jy5fVKo{|k_};*G)RSD? z+%~Iy5hpw4)!zLMD_w!=MIc+!Xrnn14CpEmBme`(A^szTzl$9m899NNoLoUg^p=NB zx2})Sp=rsRWi&@H4PbnVgY~%JA{tOU5W!#mG1!X~1ohd>u-XE;VE5jKdh0{xv|ghv zQUAbz;LQ2n&$I_7OY@5$IDHe2f6>M`|8%t02q%PjS_9z3xd&?&)XZY?!^6>apWxer zmd4Sz}{zKMR9kL6$>H1jrwd4aFpKNeozv;oI7pD5$FdsGBPw zW2OLA#1L2SbMEltxxf?hm(d_EWOMrL=JNOi#a-w1>eTZ#&MpL}rH z7_zZ9aXSa4ZJX=0FEMW+NKN)XWk7drW(V!@Q!%~;r$-e0PX9`GO6feE)kj`Dxebi& zyKJ=onJV@IsTXi*8J`@N+1;#{JKCe!vT}UlR_keqvF2z1rSNhi-_4vn^k~bkWJsnCXpX}6Qd%QX7 z5~^I>qaX<0qzix4TiIz*2)I(+$Rd&QxU54jjR~9vS9FYh4^b9pvA!d%KPad-%+bXReWeij=J~@svy;8un(lQGx=B)H{m|a z)}-3*eToiq^2QR(ru@nS&xTA|SP|cM$vc3s!#X!_@0JFWM_$`3f!21@B?*Mbp`fYK zF+L)A`Y>1U{?obQdnPnk0ltK^+l@v^tD||DrJO4y2clc%pxn6DIx_@SnS+CaCl5hj z;dnF+4iA*b71%khcd#%aOFWiR1%7a@YST@@(m%niuoSF#1~PpDhJ<3#JmiVWD*zxf z8L0q^6a}%*1t7xiH)8@SR|Oac$XrLUg8Sp&Cy$9Dt7r&yU)~nO8=6*tkJH;V*r*;_le$~XcYTtE&zz> z1ay$>xQdl?@&P=}r)~}M#FT5tnYF7}4N0_mu}Iiet*uLbQQ&QH2Z_BZr+|S!pPXNd@s6vtI$*-zX{MFuVHzg=EUhB#QHyDeURaI$y z){S?@u+?1+jP1o2t)OpPI9#Fktf!PbZ1D0hW!z|H;Nd%z&JQkmF6?Ee!|i_Owr zm=3ds9D`V`VJ(pq#?{~yAl}^@+5@*r8N?6I4dFUgf<2cq9G7+2WU)vEJ+l+=7M18& z1cbofbII#BMG#7wnk_(kO#_jDXU3}L!;T&1!qIV&zXMAI9=e)6L8#&9MTxtw-a!?V zlnV2uqnPnX$0|YLbzX$a=j@MjoonwZiq3S`3}jT_K(P2p`yZWl)pplGt(Rge7e1qJ zK7`(ChF1_eh!!G23*wpkA3=Y1X*7x*KMySCvHL?1_~C=Hh^5Yd98CW6(ItQ+t_!)G zdIrEKsrtQJzqNSB0dSwqNOl<}v447t3M-XqOKORz_w1z`!SujZXosz^6 zhPxMLLBiREnKTPzuw7zEQ0HWu#meB}JbfgfH4(3olQ`ih-po|q3LZQYKh$%zl$FRU zus)X=s?&aC_5^0sIm8SI37DDjw83i!X98Irdz4rXx)33-MWdYP#OB{6Hz^!WFs- zNnto$e=z$+Pc`?3C{cc>vMO{6BeBo`sAiJKPK5^=E3R( zsAl<0{?%R}Q+zw$QLH8SHV7&ha3=)%p2~v6&^}P)u(_5YjkgXLEFKsM_vNwOEsu`i zF-AvuEexKowJ-nZ`2zk&Rltln-pFf%$eu{*3S-agq*z%q@>HD^%9WE?t-Z z)8E$z?HMO|UDhrN8VWb!EKU{=V+?r5`~!QJhcVV4&Vv9jKw~vv`F7Lu25c=7-)Y1c zuzg$kI3U< zScQ(bHx|!yHww`@k;Mru*^WyV^bselSefFPyn@qAhzQ8yrPbLhMJCZ}Y!2cW(IgK5 zO~K1x*vePnm;>~)|71P`o>nW7_%;Z36Zz67AoQsByz(Wl(JsR<$p`LQNJy-Z*o{%4?^zl?oQ6gU|JNrzdjJv+G{|GD6Af~ zhhU@K11Z3EhE=ZfWUm|A(?H)o+d^o^i?{ZUuYu5xn-L`!6pO!@|MNl5pMcth+k8LD zx18L49TRkBJ~R5DzX8EO=HZTddSukN_YWD(d=gKpK7bZj(M$Vz{>FX?f^DI4fv0h9 zIFBpRC)G;_mJRN+CGKH3f=m zaHl|UE$$R3?%oC~?iyV0-x* zn@hq03b2BrqG`bCeFvzMYXM0a5K6vA@$&xr3Tq^SnDs3SO9bFDZKJktLqo2I+l>Y2l*m#L?_zN%g`}`4-ow^C3V83$lpC~6-k~C$r>`7 z;052FZ?tRg9B%JvtRk)6npfS_ohzkZ_BBuJml~!D%nLW&~Q%b(?AW?r0hKi9K;MBpX6Y}tiCE?IhO<=qzFll$VdMUx2k(w&u z*omNgaGVh-YCyr1nObo4DA>+!jg*kg8st(svl`J;d4Z}_5J{Sl3BTC$6OQ)od`Zk~ zZXZu&Ed>Y80c!wV;suIjOs^G^+2nyHI*=&Pg@hz_JYEBNya@%Q45R=R1sDJc7x|wH z|I_vXl9u>?q6nOY04HzQqTYJ4yJT^GEQV5e2hc;vmyRS63{U1X%?90U5bjMCL~1r? zxqr|LJZ}ZIcIDJ}0oi_+`urD_tw%!q<>wMKKR@{2CUjh{Vcr6sR)86DlFBG5E|roa zVB3*2=&j4UA}a8*l*LN zMa;CnR`UT+f>8pLCh~!NM!4i+LK1iKAL0^DUxYy|97)Pq2ypUd58?l`dD=*T4|P1( z1ErJQ@N8MOZNmBIx!bsMbbo*<9%;LvbPJfm1GLTKbLKryhw<-#5l!pG63}x33{9e^ z1L^4>RT{L7^IC%KgYkBV;JVf3?b&-qAlo3lIQTPt=xsk=N$Ce1)A7icbJ_cq#sGBC z>fS$>Y_-J+b-z5&Hvq)C-8rr$4sR_=Sh#X~gG>vA-1gJ}S>Bn-0`?B?2S5?YU;&8K zHGyzvcd^-}?qe5H#q0jA(=(eI37GWq({q%Rk`!$HLYkmJH_i;Q-Nfsgpv0u~IQI27 z+oaG2a0AUi;ZSxrxohFcoL{XEic27@Hc)QiU4q8^s8izI4L^T)-PIOV-Xy|;H+qO< zShj!6((p{1lTaSgHb0P9kRT=nk9u_5SuVsG4nBSqkoE&DtixWgO%wgl{h(LyAX4{y zw#+&juL&*%Z`E|4EZ%LcN3iO0$H!z3F^ZPzvVE|%sw)a}a+&*7|s!e8x7lBB^0GpS^L2hf!;7qPr7VUwr}&I8W&bAo6x9+@(a(Vs1=9d}3n z+M;_|p$$M2e#?4{aNQQo-Ur6r=@rP6GXJt?e#d|X72?G}X^PwVOIxz~*bvXI3+wU)&2Epe4P51N{fknSzP&;lH0?D#%Z7% zu*!Y&T1G}jD4GIIFE0NUInVC;WYqvaOnKfG%Od(FGc9rYAx*H*_!9uCvdeQ8;2w$vzYvW_~R{3|MoMu4iyZkEULWT|l#tKNY9pBibA#wD>Za5NH4!t}U8W=-c+ z$LwkC7e6lw0y;LM90VXajw8a&&Xb_=zWn>l;r`q8UuHv5vH5Z43~!q}$%b@0$%6T9 z*3UVRqX4#al6~9irU5CD{#ROq?vD}z6qLV}!FLU|v}W4-8pYSgtorubWh~rw@hfH4K@Z(kI)8u#YAYLz}y^~-PQ>Ju& z$I7Jj$X`6;BKe2g^--Nt2G&ViiB;v3=b#Vmf5$u2Eo#c!tsH@8B7{|(9uMZ zC+!@kz&Om9w>#<_ZOWz?EP4v}7@ySEMzV1ll~=pTW)1Z+!YiDRzRwU-Yr=8*>xp%! zkr!%H{H1E%I^S9(5<}n?PuVO$r_+KPJ=6ee`7Q23vCZ?_z2FvPT6SqYj&?E55RieZ z-dwvs#L~M!5k++mycw#(q-m~D*l?VF^U(}z)Y!*>%A%fTJAxRkAm2)}c=_vvMx9PG z`iD-%XU_zf<)y^6?s;whx@k!_C4I)=_;R{pRdUylW1cISZ(cK2ugY_$n;^H){Dj2k z!NK;fX&9fru>v@Rhs_#8Zg$4$Aj(C# zCbuoRCL4dmw3)X{o5hz^_{s#j@J@bOj72or@2^x%^{K6+%`icp|f6Z!IM7wJzT09N$I7I9zk1@n7om z4i>wOri}wFo2<(P?)Fs)H-`%r#r_YE+$%77H#0CI-RVU%i9-OG*Y29lEPAWjjskSA zPR_ky#=JeA^MQ6M8}jIN3uJhkyUJ1lL!y@{*n&jAvNhbMEL=09Y37ufvgw76-fKA`>FBt)mijb2<) z(`j-ttDtsFqlVl=kD9lsn347=_2|18a5#PdvS6sA*@raA0j5A6jb7PNjrR-G=eFGD z^-oK$ED^67dX>}6pKcQXVMsoHO-R7=goc<^M`@_blJ`v)BJ?+K z_zi)aK{Q8zK>WVEu_%d?AnGK>zCNsR#oth*F?7I}=5a9Es-W`<%)AKjTs8zX{6DmdFex_ggr z_S4kkf&%H|RbGQ5V~~HI*sXX3<4BH-e594l=vjS70){(8f8i?e2YgHuBB(zG5a;p+ z{ciqgjeB*%{!!d2x{kIW z*4GCwYJAs!GiqtaS>K+|4kgS$N`symTwY=$caN{Q@`cyocR!p#cwW!BxB53p##?(o z9Pf6f`>%M)B+5>Wc~SbBi|Fg+wX?BJ#hE9&!e~)?E|#EsRS(JxyP>!SLoM{HHHM`R z5^0cnS$6N*Mnp=hbSHJbLseD61#3}5zKq+5zHaf0ZE0F%$ACg7HA7Y@gGiF92wc`D zjinaiUy4giDqNEsMshHq3mAhnf_}=d0GmuGVVROkmg+s@mWzBIGi9|sWFODqhi?X( zn5%W^n}FODkI|PHKrN1EuYi=gd?vD3h&s;lAHd#P$yszA7mN!OPdJ5TDzj)7PmsPL z54*LY?I$enp>p#p1pt=7d}#b|pu8sByx;9d_zjf+^mR?L*ni~nNa~w#?S~)u z>y-}1{XFXR;-ZywO$%%H=g1j0J^+z;kaTUqp;cKA#-ZIy)hM`>)Boa6HIhK-9YJzv zHPSRw(|UInKKiFudflS;1sECoj<*~3wrw0Q zu8dVgSe-$S%Xb~`PI&!wDmfkdr>L#QG6m0nzQ7d-->=dYku3}Ro$an#eI)eMaUI9B zTr~ottpZweSz=;)m0-v)UPSZvAw6I~iRP;)JM+zsyl8Y;d%TuS_=;F%1=3(K6aE+`eAo zJszKS)f|?pBK4Dt|QY{A~8)SpZ}`6KF{kL)pFP{yL~~?fAP| z_vlU$mL*KeiDWFsxDZ1zZ`dc{vZr?-vI428^OkI@!zBGYxEF$LiU(T|-&-so8@=;9;TM>k7Mgt)8zdPLBo8O z^ijE>d_pbc9iAujR0`G=filPAIoPXMZQHFFU!(A%M676%7=pOU1s8^cdi6V@AH^ zIb8yd7ye*7qpvoam3-PnEQbob4T)<{^q&`Xgf9$yuoUSR8LsC2-PVy0`BX#DlO71X8(5`qjXTPYw^ z2Nr}VYsKOnhGu?6Ag)$n6=VjkXQ}L}&2q~CsI3Q6QU3y2fIhH5N-|b`m5cs~%*g0((>C z0?PZk{qkoGW*O+1*djBf^vZjh!zC(*GY5C1Suu)qWk*@Mk#arFqz;=j>MOg3v8b+P zxhnDtR3ap*7u5IKM-Z{Xo2La~!gtK2WyjQWW`soaG{`8?7!jCxwCTT^@a9wJH{ImJ z+5%-3^JJnioTC+u0@d@HzB7`)AqVagcrc{@DmK;n^$HK4So(t+{;Q9FTRRsrx7^*_ zRNc7NUAqLwc{H^fk93|4jotPkEe4Y;2< z<126m9~e16X10R%J0y>b=qc?i9-?R2A2EbZQ-b`+4Q!^xjNWG0&6u1dH89q25P+RM zbNNCbH45Q%x+Ym1!iU$Irkj|YCd@$|gVBb+`?5ShbUh)Pa$zW#j7{iXhF2Yf_$9hV zTuNg?WvN=dopAmR=eDXXsO*@_aeIJS(#4TgupB0z{D~mpwyvC5T&(bs!0L;t#19y! zKTIR!f&@HBqPF%9YqEPRU4a zcO~MBsm8sGLn~z_kcI6E`o`g-6B9gVm$?5R&O#rZKFIe`Bb>VSB#1}@SIi~VxKCOT zoA0Zu17UBFPzi+@N54p<&vSd3+t&;S?`HFk<2V5Sxt1t0p1u_JtE$|6)_I-!EKPP+ zB~s3&zkzv2_|GKjUEh__v)Bmg192aJ?zdmvbsF+KL+|>KJHO&>^9K*|A%*+E!9_QI zL$ZTdDjz;N<>Z0MZxt?s10i+Kp;&^r@}59n(n~AFV-1zlssiF%c$tKd(uP^29@?fqa!-MB%`-rIVXibg#j}hUUpT49S z^)`ocm0}9D?q%#U$>W)fnEucVh;<)_#nVApgF6AkRJ!AY_WP!DoKcci`@X9SmZy0z z`j5IzbK#kv;W9A3ci(l4y^zx*!4DTjb zPMM|cgFd^m1dL1k*A*tG+gWW(5Tz*V^mV{w3b#K)x3(r@$mCay)cR`k*pnaP8x+Jd z%W2ltNMqCz!m06pH}tgE#U99t<=J1>g|N82CA>@vx_CY0CYUYfeMytAD08$pjQ#pc^SR*TamS{i>Kq3sJAN?o8^e>al9v zR5$Zw{SsbcXv5#gf|}Eg*z2QZ_eqs8PJJ%@!3(#Cu^`a+#rBb!br#E({JaONu{M^G7#z3hq&I!?{Vxay}c-l&tY0MPCMI#`H|jtu+43J{bp~ zcfY2}uOi_tZUp>8In^nI&aES9U+RR~6l}lnSfxrP_V|emXeIX9`Shk=B0RWCqMEO zi>r4IY`Jf!$LQ0G16L?FmB0)r6}r+$&vrGb+m5XOZ@&=+r= zZXI9pkvmLq8z|_dng0G{f*cU>q0M!t`qn~qn0Zo@19$rssjt!G%PluF&;cq|@qN40 zpODH}p)z$Ej_V+LoN)6)R&mfjv;?BcMH)Sv@8ve^t0^ydKeNh@3(-qGmXM=+gCD%h z+d2M){F?#Om3s4EQfrnxvoI&sGEUJPwJlTYkg;0En5cewBhnW%n}D#sNnxK|mO+A* zW%`Ogvuk`*&NAiVO6lJkto1o4VloNBsh#inRm@5d2_mGAaGc9&;y^g z9FYVWpxz$?{sbOgnUd6hTKoZe`vD(HVqLb$lk!JiF|2@Hw~!UveJHFBHE4=OA@GAH zKG+P+L+7m*v&8pa^esjJ61^(&^gNNyCWf*-QppK!olVA-AY#MgrF_F4kFmjR;NXnZHRNb2ecJb$&}BlPt1aCJ zKj1o&(6@<;bZa?ifZ>C_2Kp$WWIU&VN%*hZuqW$g8<5GgRY8m?L+68l2n$)K!ckRI z1Fte)-mvH}-wu|#Y4|IH(V8!Vv}qngR~Rk7ZH5nFOa5b4u3|+u%da;Br_7}cxZcaa zh%L;Sqr*M`_SF~0RB-ds)1(l&o|zYJf7S95q{q0RmJ*RWJ#gwK$R0(YMIQMsEX6`= zS0CjQgWq~I=7UOaeGdj@f)%ZSbU5!`q(VL+I7>G}DVb+sgb+M!8pjZ8LUj1~&-(k7 z&c!tQmXyGRUWkCW&WK-;-Q&GHzgLwrvca(bUAck*vm(`N2DjsW)#bBIg#%tx=w&$F zR73e!xmMqu&)i>Ds2IFo<@iQp(mS1&en4`Jrleouw>D06o-3-axllDZ+cg$$h&Rl1 zU*x}YBk;{%dT3PLIA18-8pfMNQ0s2=(dDFmEGh}B`-3oo6$X7_U6&85V^vwwQ4wfM zwZr0c4yR+1HNFao>V$Zw8dZ0$@V5@TiBZrjwbY&NiiX_llW-UuUA=)jycSivT&ifT zvzt^h@v`lRYC5q@w%pL^yo zCYS3Y%903G`y4NP7rwkY9Tse3(}F{OH-9wjqtN}z)-eyDaX(4N^!xKe{!a9;a*b}J zcaOir79*T;@IHS1`ixDaU*7srKSHkOz+t9Xy~C40EX+*U>~8|SltJdouj&pqpgdU1 zJ?bcilLK4V-F+aQPcGbT{6l7y`35Kq)iaXX&EE6e7qSkhS-wQbQnS0|`06dPTKWI6 zK|F#Kto9#;<%hE!oPYZf1_Oc4J8XN=w^!L{h~PU)917-9c-3~oludF0JLzTqkIQH| zTeeRX%X-MA!E$s4goUJS*ZRRad@e$ir6IZsupc!M`Y@Fvy#TWs890!v9WqfDca>S! z(Oekj?XE@|_%-ebGW^jh$aZDSCnu*W8ZNI3F7tcMq8K4pWgZu90Z^Ot|&f7xRp z^kS`aMQ(-Qmcxm{;DhQv-XQNCs)ce+h>#D3Xp-lGmXtv;^fJ`+i1rS`YLZ_?5MB4~ z_p@(*y!!1K{O(*6y)bPyFJD>>ST=6W>~PKdx~<{Gs>{KYT^|H!redEuu7VG)gP8o5 zU1D}w^J5NEUKw(eLz9Dm6pO?LHk#&AG@Ux6OwuDx)70g_QNu(qD+dtTTsFT4p$7j! z-0kPdkRQKSdL!AX-6p1gTBXXuFjWTv_C(I`ShraX&dmv<;p&3dE-1@4ZIG(wE%MHv zy@A9yRpA`PU3e_c)>I9XYIl3(S&wVTlsTVR0Cmp$mfYYM%(?zKbyy1%)LI0H*%J#P zl}PMbfi3yCc(cTbVv6W$+ylhvkZ9mvmx|0zLnYiarY!Hhk{jnp7n&nh#Sb~ zk;B-!I8rlGFf!FDpK@gknyupFM{N^i(%0fI_0ujJs}5_Fh5W%sRn=KL9R%RiEJ88? z6E!EFs~E`kgS0O zn|(9)l1;TH5SqVA)KMq+H(w)fpKDq{y|LS7oUILj!)cc4?r!DDq{M&wG!OV^k7bC$9-g2;%Tr zO3B&@6GB2sBs|OEH*0BoZ(#R4(jgC{u65!=SNuj7HBkeFfFcp(1g`*9pdi;GNq!7-zILNqoi+@o5DVY;oVC zz8n4-kx6z;!0bW1O?=OWUU!s~py*mj7!aa=aD+BDjal$HHqCk~F1^eRVL-Uv*J#rM zLxE1nxbaXP?Dhe)&p!s=qGN9xQ^-lOJzJP}Pl(A`ewnmr3YS^b@q_I)8W>SUvt^-B zgW*ra2vBk`VgJz7{9~>>L!*7jYZgkJac~?tR0k^oAKaU&r$9&hoih!l{B6vaS(D=n z2CHE?QrS0FwA@%;s%n8HT!xt=+N?pQvW6rAFf!{aQE%4{8=VOo8=JHKgJMDCJL`bc z=K`TugU_=gjDqMA1%7w-;Uc5P^0JWzDU5)T3pYg0cXhno7&b9 z;F&SvT9cGsh)`{Dcyh)O3*VjT&h+q&IxEW#0UW#CIK5(J`miEVnxC4MHN~GSg*|1f zZXQl0I3~TbNbh0PsFe(IsbISsCoCA&aw_Bs<6VSU&y3aLE=!FGx~}I@DSJIB{yALK z_45`KvynzI?q+Vh*Y$r|v=h&_gEtw&3bheUcG6&h6($4gDr{Sru`(vaeSbYkE0~e>O;ejitCsb&-H1sP@Mi^v9Ity91Rz{4cA) zvcySV>(N(>cYn2VOvdhOi~p8>D^GJLmt(uvywtT~3VSB0?T?QiXT|C?PLf_`v-#8Bu9iR6z72hkhcuXfJPLsf&8S)1$#f}P+m?@|nEkm= zJoN)%4Fv|O-qcd#wpyx3lyy_QZ$I#wmRKB-F<|}pxY`kk@tAbOPLr!G&udyw&3|57 z^Tz*9zMpgSLEAxO=1ISpfnu~uTz}|BJu`t2`)^}po;yKw?Zst~#jy37x7ck7@BYLfb>S?VqM^#&M5-a(6gi=BSu0uq5Xs$h z;3dhUFEciKOCjoEdG_LY ztCHB!T*SVb)do?xjS6%~xml< zb&Z{~Ss|Z*4Hj+mk*~{4+1b_zXM4jd0}J)$<6glp<&mY~ZLXU!HD0j|lWafw&0wQu zK6dn-RHxCued$%Q>h+dA9dqlzOGcN}zYE^3^dnLfIyE16r;i2PSZ|h9dc7hrqzM4* zDW9Sh$47xt_0H&BPupF@T6lN6Y8z)yRxJ0%Cc4isNk-y(`!pU%xSji73()a30yr72 zb~mqjAh{t6d{z#ebT~(J+s0!0m09n!mFU=|N>{I@3BZW0AMc=aU;Qu2EHZ2N7kO9@ zR!`|1;|v}A7ya%V-98OTL2T|0Rb4(qcs70>AJXHBHhHf~4=ZPe9anHYIt?Z67dV=d zpvEgE1Hp&+DKzH8+V@~oLG6+mSr|5z$Zx{uBlw>4edrsM28NrU^t#tu;l6)UrsF4f zob_p^Xvopkeuc7Px_|6;`xS0ZjvYso!aoD2HGk`pQ~Zq_j7N$mV0~v+87>_24yid= zdzzJZ?j!+aGH+6pix?0*WO)PC$|g+eQ(n>{kTV?<1`J$V-~!JMCiEW1YSU#~%K5$SNQ7p2EOhebNeV7wn5IyKh@+ z@nFn9^B+NX+oeJuFu{o%h?kgthNruh7HFhe0qlmFLB{4_^+lA`2&URKf7#)}_BW9^ zerm}b=XPC|GY2&(<^1Ma_|Unn_3Wo9+ z&f-jmvhJp;b=+)a9BdWjhVkmqcgqWvIc$#5|CFTE<8J=M{k_}feQx&bOmLDxrax_} zz&Apvk&o}_n3;+<2@65=Q?6(FfZwAF@cQ<+t$p}5F5C{z&g*rosz2{9LX1ULbxJEWc9rE}#=g@;&9^bH zS3$d%qMusX=f*tH#j&4Yc(lhh<}TcuQ^MjG*qNsB`@I*-yUKp={ypx_5OWGijy+Kl8tUrcRpO=br3K ziU{nL>DXUt8qh{h1)*l`)-e?do)sl1WwbZ(H?9Y*? ziF&*_-aH#Vy11)qGoNu0-|liRwO;KSBAo24`~P}W zWHPpIIgYSbQYoO*GpZ9zu1N%8x+sLP&KADlB%#WuCz&Knn)t9E6FmH-o0jHr9dC=a zG^^0E+ee&KX>x7aeZ5XpV$|1{E_d+T`QGesYyI2Jdtu&TZFb0^>|#Uiexz-_!F2as zCR#nYG@m_0I}#s>yhdJLwSqGf!IKg)@%5>IYRw(WG3h-W}?#r6@ zy|BN@{<(+z=2_p@=P%BrucO85PJOoFGHUTsSR+?0tP3Oq^p!Lohj&iX*Z}6X@-?(RMR#b7EOi`oTJ*EcRWyf5! z?6CuT09vJEVdi7{{NHkE%RKpmuaK#VIYOdOtj6@Tfh%bZ(Uu~YBE|hejLrDH-!6^l z#9Vv=4^x?2yI?6N>fS;-l-P%U&zAk@dF%}`YYR*9s;nQZSMXu>KFN-iVani=jE(Q5 zQKGH6R|VU{LUhf+Xg|!Wu|uP_Up3QWXff1s>E%;01MWTBN>YRD(#F`uWxpa*bb@8o zW?r^U$XqKGYlXXjh{xk#*-*N5gXkJ6czG5-B?e~bnlf$V(uJZBewDO!3qar`(92}H zHg{ubPy21TI@XwFC3kpRzOO))jz8je=of^k*tv19>L2}=RdfEge<+U!q_%6Z(mea- zKtqLMdKbsJDO*F;`^#stswQ>qSgu`23WMDYZd3ceNgu#@UrdC}U2>?05V4%Tv`kBx zKKik*rX(w=0JD3Wp+bHJ=zGgstD521FNGPY$|i1b_ZpX{v9ndC$c>ip;{oNXj`ceb zT<}_KLGV`hkJnDZS=T|P4xRrfypr2RbQ{jU5#m#LV^{R#gsJL{A5Iu^Jw84f4^8P< zqjSAHnlv$Kv2|1xk@$V+v_a@425uCB*tg=|15ON)bf=&c>ZscyrHtl@`I{rUJH?a> z%+Y~*)q(~Y*k)WwtXu{kCPbI%W+cetkQL%}5GVp`Ts&){qj5OtmxP>Cl8Np?jET0g z1OIIQv3k7xUuV``j=%d*^LE-mIS~iDcTkkh0m+JmI>eILsYWyLyBYNF#Qfb*hN!|fgIEO ze8`oF-9q1ir|7GKB^RviTyO%|rivj_=?6=#jjC1wvAC|hfAHi4LDknkWhZE?)GNrP zA2L^uF-&DxK7Rd-*DXQmxqICPC=zCue?9*ihm6S}q_8S2dQB!_`$ByUr;$MKnG;T} z0sXmP(PO9_liIV6ohDqGvETj1C_1Ru3mR(@e9xI*JP#$KD)hDH$dyN)ln}fHS@dBY zhbFg0k@+{{ojEVb!Eb$6qn@Ulc??3Y5$sHH*+QrV)%3qfDz)(ZLl-@6h1Wd7WYSO{ zzJ5?&>5}T`JywZ1`^hL;t;T)jA z0Vs~YU$9%Belw`wB*Pc0%NF(73YXORsn*Vf4JdmtBPW=Bn>hbgTtz|~4XPL2(4A_NbN?8lz|%s=c}axStdKAEbNNn+OD&Np6#M*8#5NFUxy zsS?{SP(Q`VmCeHGq{#xmS#o^r(jaa6t^sDFjrKS4Ur}m4d%QX`?k*zRNXFP8f!pN9 z*5*Xzuy}y(evE&45?mPYxUHJ2p*=0VTS*x{%2*vAZ}O^k{UW6*_$KZv!RL+0OS1_Q z%FS$^#^jib8O_q@Li(wlY0LGMc+=%<9_Z$)HQUv^*>AQhAP$xigkp$SnL=mK)7n78 zYv}vCk|0;&pXxAR>}aU^fs){VrmhC-G#H;z)ia61$`)0+A zc-5XLL?q{zt7qZ;RQys79n7-7z=fC`sekLf98F4InXt0e&g!&pY^L-JAbodw!)*F} zL3VzTn=)I#DK@%yW^9jXb*FxtHx@-fUXy-zQ4~8 zE*zT#o%jVbd@ca}t~<2(Q|P6$)~Tl4kiT^b_P>jeE?5VIMp>>FG3ux4R43V`Kv!1v ziia!IDVz?idEYY*ame1-Ux`Si&u(;t?rn|4k2?yO?twh{bE-yvQ#P&s8S zq!O>cWKkfN>EslZCdr2nE}oq^@Qt)f6z#e{P+G>CAJKpApl33s6P+3RT)(h55`q;Kp`{p~x9J%?JijB^(*F53h zdqdE->tb~XU*g%kJZ#G~+U4DZSJB4D&k&DJ&q$HKT0vYZk@2~IdG0JN7Axwh+3fU5 zZ1;Drn|LGN6l1tc_JjX(sQiZjXGkVTXUBdg$_AMi?(%f>pHsu(Let})W&wD=|H9Fy zH%cI596K-ZKX3|rQT&-q3n;_~>S)Cmhdz0c^a8_Rqs*PU*bYNvOCS~14sq9iEA^*s zW6I}jE8QFC%fXalsE~5bLuT`$O-&NQx*s6pyL{P7YZRv4=3!rJq*MJpKkHU^7iO;; zmwiqHSdx7XzLry@18J?wefyn~ z_J$}7%jp-H@T9Od4b7_*wS3cYu9x3tB9~d&NVw-C?!HRzLq<}$OnjI#G1Zw6T(^aA z6}ZYP<97-*1tqHcqkANW6~^xJ?{WWIm-K{zn{UA`5yStsE1l!+v&%J)1dGy#Yt=S4kIkuHpmfd*yG8ByaO)tcq_H$avyz8_ zqcQ)%juDY7JM9vIHo>PM3@C}~4eLs8FB0`|Q_P1*i3g8@Lo_K&#uz{#;yhA6mvgGl zO-HjC(9ZE{d$)Q35IVZUQ?fMs~dGLfdTN4}Q;K*Wj_%?*dl(_c%*<;}+4&{)Rf zCE_8~1&y;TT34&3&EZn}IqcjPyVw(tSfs^DnuweCq9ZK(5^sb$CF_^NhReX#(6a?; zL#(MX5H=6{{Dfh-6isZ?yfZtYzYDLcp|ZYd!MFmi+#8kf0|Nz?hr^W8qK-quO{=y*J+StE~JuVsX1Zy#2GC3u$A%q3%8qtq3Hx7)&UDCgJ+?) z?ilfQ^LW7b4kUx9YSUJhye%vx(X}mTt061F#aOQe-l^I_Iqj>TYp?W15v-uG08&J@ z;zY_iQk5-xES*Bc#@Lp@v2pv?S78Yhj%`1F>Y}qA;C(D$ga{1x3f5G92WJ%z2a%S2 zB5%k@QP>_UtJh5*l1;yKwqNnr22?*IEP#F0maY#7d!q9r0*|le$1rQlO1+Wa1&Qp! z621xIBa$ax+Rb__3;q6x_8~jhdC2J1iWTENDw5vVUrhpnq-WfWv*AC77$QHNCR`1! z$ubQLSt?ox|M6ZhhCSr=dnxmdR{P!(={wyp17~@e6g)5VI8hq+@x8O4l5Fe!mYKXE zhqI(@+$8VR+%pkY13@HP2^iS6<*bY~0g%^0u>RaT&_QSqldRyR+SSB|9|un)*1ow5 zQx(XweN!Iy<1GDCZ^N5gHE?x6h&2zU+bd|p=loAn@bpz(Q!w4sp5)t%Z2(Z+YO26) z@B4H-Ko@)d*@x{_dU*!gM^t`?MFGcdipHjPhrAkhQ@%$Tp1|BQ zy?qXX{jsEVRUC*XUXcw-?U7Nt@or1RSA6jP-T?ok8wGSu>L>q~TEGSLg`Jdv2#368 zw47puR*SC;te~d?u1E&eIi&maz=MqST=FO0J7lbYa=MRF26M9SEFSo2*)3iqSGWFq zvgQ9i`F{uOe<#WRuZ5w;1x>!dYWR_Eq0a2SaFU;pvT({)Z3R{$vutc%%*`BO zB(X6ul_X3fkm6(r)x@x&gF#3KP@mN1oR~ z4tT&xo>B~?1W4_hAq91QJ4#fZc3p!30q{pi(O9)V`KITS3WAn)p3ZLl@kS+1N~e9R z1KwT@4SiTq*zid9ei=7CnY@cAB_QJIf|PK=G?1GlZL zNwC;c^1wBsj%g3^But0gYAiKPKMAYj2(eAD*7V<#}tqu%8@{^2lXJ_tAve> z>_kfZIfO%mH{Z7$<4b6qh#PoHiPsgX4GC4mqk!2G%NyD+NSWzVT3D))-4aZ5pq`-t z7q&jE%a$t_!DQfF52YieFQI)PW6$FD>N(9bFjoQ!8&O=eq98_zgN9R$^b39}2rHkX z=u&C0Tt)>i8@67wz3^SWL*X!3 z^-nDxk_4g&=x09@Qz9OkIXxAwYD~otn+b!Fy$Kc5*-x#nsFlG@L*6Dub<#_yOY2Mh zOH{VF?ijprIYZlymt4@<&tt!3a%Yz&ebqW1G*nvZB%VETA|i9 z)5cx)T-K|Ptrx9lb&hhLbiQ!*a+Y^)*%ug{-RIv+-oG1d852|!qWeQvrKDBqD7Kx{ zCo66x?jxRNoV!Ci=p$2?)TL5416N`q<0msa_GL_e%y(aO3^&!_2UsmzJxMJ}ZL&h{ zhl)Cpdg~AIABXCBvu1x7uCculJHrJ^N{bJwsnmFXEYxyT))Wz!cIkCWzZAeJ_e)R+ z7fBWAn#icC&l{^!GSNt0r>VG%#n%$LkCi>O;b-pz0^ZedT*Q6)$Q1US3A#}rYE9M>IdHuun zxA zbGwOUZPddJy)I+FXKaEs(>aG$7k%>X5JWS2jpvw4eXIO99fOtQ_=p(|SQbm~m5v-MBPp+yH(P#l^>lT+zLtqkh1XCR z&>GqsLaCk6ZnB@BpGT@x)yUJM=n88+aQ#ZfLEbEI%gUOdJ(?Y7GLSkf|8q?XUYl`g z@`U`>v_sU_=y?_mG_L=%t8t3CF*_a^R#RGHT01x&_LJ?F6HN!B6?MPOddZGy={-apI?Wo z>Z{Og`WJKcsaxY(Y8*OWAN7v&%zez?8lgG^n+kl2R)*{dhB;bodD=d$5Vo}XL=UB9 z(|^$IY+kUZu$SGqJ$9cx4PD(ljaZsaEXrD`GZdf`|%8iUM!kw};!RU1e>GtJ(H^ zMQ)`X87tX~z@{pP3~PB?BD-j70^2*QvwwAGmal{N>6r>mll2WO7RZyBQ~TLjym@~9 zZPA-Im%zh5&%4ZjPq-9}8-KN@X9Gs>W>SCWrZe8QUFUZ1IF?-fSf9VO;aJqE)zx!2 zYB;fZoxfZ>@UMJLBUs>EfwP6@flEivC2+=XbDVuS*w>BGU37q7BPK*6=wiiUW$_O8 zJ6v8*X|FvqThHq3zJWRr-VlupAB;~SR(~zM)157!|Fg_|_p@u5EdDIMVaVV)_u}a= zGMhSudQee4Z-JjRV1fIHoBP~%|0)vq8MlGa z|5`2`Y&Ph97Ws+TUjF#~eOk`wK))oh7F_W?dBMIoIyzaI`ZYyqaNXtVd-L*W$lzul zH-*jAEe(ae<0t|II9LPn#mH8mrY8nckst4~gpo5}2?E~+kntb0w4s!CQG}K>0M8h) z&M;2&A)_Nc`f#B<&`W(HdvA>~FvwS-I;LXm#scFLo$7FaMW`O!-nF%p?bcRCb*Tmq zs6R3W;QgyG@#`n)alV@hTevstH-LZ+>?IK|IuiuJW?QLgxoXMF@t8Q+F&LRT7@IM8 z+Brf1Edaph$pg8yGjlZ}^|Z6Kcj58mC;yKH59I#+F(WzYe@t9$_{p{8l}JS$oXtqt z8Q2(@$OVu{NlE#fP0e{!#3cUP9a7>aw{&%NcX9y$Dtu`% zVKq;vQ$2WJZ1oR$YNW1-acW2Tz{31;EldLPAZ+C*V<@P>*8*2qW#K4P*ufy0peVh% z0w7Q&MZcs4Jp0@KD(^S%El2z4`aTEuc-HejcaPh7FNZgWjT1AfK0|VO9pL+AL>f54 zWEpH9lm>dgq=cZ8auNPhK>>v-WyFfzM)qF9Pq-!oV}Y>{2oNoWkg^KEsMC-V7*u@t z?k2PZ@l#ns3AK-S4+2;G?Lf%bn4E~)Ib>#b)dVuUe)M z+R4OJb8&G&W_Oq|koZ3H{OieV*&FBmUnGzGtasw={|4gfOcshO2tRWm67ywt+ zcFAO9WHcJ}!$n0zd2VQ>6_g_}ELQt-_;PoN+`=0Kr!!el#TXl!^%-(%g4 z&3=vRxSCH7e)SF(8AqzS`|Z`IB4B?k)j9S7mdEuEhI*A&;!qrsY^Tp%+EtNc{1|^g z7aSU)RLh!k^thhOqv1ZtBH!=nmd#`c#l>mk%5a|)b}M})vtD*9k~+7CaVb{7?-4K0 z|FL4a*-p3hK$uC(gvER+*RD#heL+e_hQ)fZ#!fDijn;>Z?BHC~CI*kW50BfqcrNm` zAV)MDjh(}81$$?u9yi=yvs|gy5HRV_8r@*_8z*sgXE2&*c4-|%IK_L$S7i1(ZHV`* zgMx(eDtnA2K2)#Oy+iQrS#{-?)2>AOs_<3N>C;B1PYM5&z*E)S%j0qF@0$tEOedUcEOD&gA5t1M-*+r&u3;!FFTt~9Z_pKZ_^j?!g-7Md3I>F#m^UcF5ELJ*zf z!UI;D?Jg@csxvRPg3)C%{vd-EWdxtkgO%$|rA-bebDWQVGj?B>p3Rg*kHB8GUC+qI zcKJQwcO>aVJ9cP*<~n^m95Mr5A0$_}T@K`EZJzHhoMUeb;A;;(g?PtssMae46eul?*f&7{r` zUJ2l>tB;ux?Y^N#(rJ;*fW-6U~U9fC>6#vseRn5D5GS1q~i@P25ZP&k`UXr>^;cL;M=(s5>&@VV5YP$Ft~yC? zAf5E4rtQH-=cYO_iRY(U`8at2Fbt{~?Jyoh$}{i2+we((b9;Tc+f3@#tkoYX6ps!; zcN~g}YhJD%5eU_0z$Ps~&GWm>Xe=@rjst=K^5@{8hpR&Zz?b6w{)!=>$;~Krp66U%O~u47xhH)nQWO)E!2L(HZEq>q9*0M26#tDDa4jii#qs5!c-o^^7Hj zL^gJmz*G$Oq8*E;OX?{~|A%jinSoCJd`KBnghA1oDYW zb-EmkUyOydGz_dFrOuyGpd7Spc+;)B&dRenZ1DZ~*2w8DuZ&SlrSOx!OjI;dk==pToqFSrpS$vAZHV_>Z z!wn6)9`N=ufRD?Za34XA$Gv4XXb|us9*#~@s!^?@Y-?M!q{&RI$bW;)ZnF@>6s|@P zoEQxEInU_wTMr0zhrfN%Gnx|~_nTuNp6EPu4BLUrHv_NzR3OZEFl32g{ppR`Tp2P7 z8`PVnsSTBKnClZ4g-uKTY&;k>s>`guy@xN^NFX7<7JPvh;(|e?6NyDx-znWMCQ9s^ z5_7IZ6I76j%iUU~)AWNgw(V>Rb1U*v84YEnsv42>EE2Cn+1dwq=GM0 z?Ig)TN#tuL^fG$h@kn4`DzyIAIZpam8a>@^wD=t);gBzUj`Tph>=O+%V1gv3J)*){ zYRGha=D>;0*Pf@`_@@+(Mr3NtcQwws@e#m1S8S#mYKiDc5Hyd34?~5wLD(vOaKev&%H20`M$(wSNADRSgl2 zjgBr_wQrXfLbevvaPP>3H5ydX2_O3~M$w2@bqkKFllh4%-OL4`D4#BoO~*;-Q^Xw) zkw#{*;rwO4+FadH5C6w_HHbbq|RQ%{-U^jn`qJy%^0b3CHRnCEI!T`Kc5oxJFv zgUv{bWA1IAu!29wq`DWQWSW}@?m_RilEa}N@s1&}7X&ESAr~{u8{mONrR{I|zq{|C z>M6L8DRFO|5{)$`;p6JZ>}lxlZE1hWAyvCF3my1tsu!6pb~URg82Mwo*1FTWS#?F} z>Stm47xgb&)k!NlM!X=Fs!WhIbz{PIfB1)Pz0W0SbUw*4!FIszlA==itS=fPaokPx z2{M!Xci+4oK=b@5d*++LRu=Z}?Fu1_WN4`D7P9^87$E~Dm{$uK*dSU$F6bOWb;Ws=R;MH7S|llK@eYEDhTeg?oZ3ochn~=wRJsZE8kSv z!a$WKF#iq+u@Ev~vNuYWfmr@brP2_0Az9LH%KR4F2YM&HymxR|1j9zk9VM~(Zam@u zL6c(+k;jjXFpV&Tl=)Dj%50Be@3H-5A&Amn_S<3^Wj^H=O<*1TXD*W6HzxNJP*(xu z(fMW~=+x%T@eeTw;Wcz=xq}V9D`1NqW3nlQF)Ny(jb^K~4|@CU^-j=O@_DBZ8+MBm z^Zba#d>rq>XIHUcB%b5TrsV711*bf=xQeY<%JZUt4}X~&MKDXutUe%GVraw-|EhYR zH?dqwj7_}WfDmJ5>jR>{DO-S~RKT)DamqHw=1P}g?H!41>Ar59D`5Uii3CoFP z^*?QaLCVi%vku4Ts?OGMgA$S|$8Z*^89U5sO1^nRp?}C{BJwT!cSSzAQ1Fu{#~^IJ z#Y<+=+$zwUOjxmyvqq|py1AOaUO^vu3*lo)u3~JCJ1Rnvbe}BoCbR)%kk12GwSFxP zoD~Y(Z$V$Upu~rsYoo#|^Wu&WcDQq#jVC)wNQ!x~b@A^XVGcLH zy{aO#j?Tm9r)0LEs%E%8B4jEfr3a_{f|o~rP>uA$?1(~I%=&-fd{(SCf#wU zuUre-`3Iph*(?+d2S|#D#S?~1!2FVig^)(fj9kK+3vFHGkY~8hd>w@U!nXK`YqUSo zY#X@;Zw)1z7)J&>|JoB%Q;Zv=#H8K&9MgXn!K)1z&KC|Iz-24s*uqal$8dw5+}cJb z;Ghw!QSvX{h9TDF6k7+Bvs=xGJu4K&J0>Fu36pHsp#XxP;L!;4TQ>c53;9U@24w(v z9B1O#B3K?zTQa&0Fv0e^2dUjdFDA^b^SU?S57j@oaR$O@c{d{vd~I&uz~I=MT>;EH!)!9 z;<{f+>DlnfHXXev3qO$|S^>p|cWK2!r`hH^omC>?XR_${>a6jougae05yNsuHT`y7 ztu@(`(9LADilB%n0E;BoNJXL0S+647@Ftc#ia;uf*B)lj0!()oyY(EYElI&V=^i>; z%Hz$k?8n-U%P6r&kfKo|X53K$D%8N%3GoAl1{_ITKV?@zBBrizh;yI@bihRF7j-NG z%)U=X?Fa-oeLJLM(4J8MopYK|6|7=T2#sX(bEX&(9wf+g`a63 z5RCRi%wd@NC9S^9pb;>+WRXa%p3a!pm8!NIBl48_5~kAN%j@3e6uR-otG-xMR&Y5? zAQ3>>*3lS!3*^NUY_b%_0pK=N)8@2z5UYvkoK?gxtsOk6rS%UU?X!l9xwioQMoz{? zB8LN?KAUT;x4G?;Ntz;Cl0}3W>5tB*DWsOHS?a8Ymt#6ApoZ)KYj>cY#v->DwTXPopxb>jL01|+-f$=#)Hu6msHQs`m;0_6gagI{k zQ>QG(rh^yciLa{U5YNQvxcA%S?c5K!If~kguAxO#!35R#+$2dj~ z0mLq;YzKy~BdszOSE!UAb>1xK*3So>BCN7l(w)DwLpJn?h#}7qR^TH6<|HOJR3c$r zlV}1z=;O#^PhU7&vqzooHgU{RL^AB@gdCH-?p5X)ymK5PoKVu~CmZfMTH5v`Rqkg6 z`OEU^Z(qC3peszMpb1uRv&@z>d;z&xzI+%3`L1xef}BJJ@;>~Gm>Ld(Ln`FV-_GI` z3s2#(%JB#~ObW@Z=xr48-P+@R_v_SrmPDlm2(Yy(eLyF(5zFPYq~T@xt%XXP=ieZ4 z8AhMH*(J<+2}qL+1~M7dfC?M*Q#?1iTZcJq>{Xl?4E$xQ%_lPF+T3gqu>lERpbf*lp0_>p?gPAZSS)ECJb%2}I7qLUV>QrZwl2hkWk4Max79S|KB+K@h|Q zX~6VzG-uqCs?au@5YmdxeDB8ihOUFvgSHjwEyR>xByVj7HH9BBRi#&P&MtL`E-p-D z7<5X_L%xtjIeiY}bV=0?7n+k9x>^ql!Dzzb&Jzi|!-# z=1QE>H_=bZ5Tq9(Y1mx2q}iN`TdyK z1~H@>0%}4}!GFT65U~Xim%45leT~78SP(n}k7$WXODG}SBK(eqAXrLT5r!U$KCmtk z61VOX(mDpD1PSYn_Kw3sV&5Z8DE$e0FIfsXe}_Kz1OcGO1Y1N|CjJw!0AisXf(QsA zv3-ViYIWWp$U*eAI@|-H3Zy~+pgAq={cnolB+G})&PLaLd!36reI6$kaU_?W)%lc! z?=k^3E27jN_`E(O%@lu4iL8&$lS{L;zdBtxKtG%3-kiR?DJxnbj1ZJ)LHQ&r&Tu62 zGAv)TTZYx}aX00_)EI01?U^8{0NR@05NU`!jaLuhCA2pjLQUWL7mGXo+6OKq_-m20xTR9 zV8U$=BB&IOp0|r8tKWzV$WN}yl=q2#~f}9UHlG;R?t8 zO)JxK_iW}q?ppN?c(hfxnp((VJ0Co{KL5?yp>Pf!ZVyu|Mbm67?ngFP3T=>?J+~ss zYLGU_nL+U};Yejr&b|$}U}*QNBbLufZScIsFgVj3VlE zsaTRWksUvlFXqWDYD}(fJ)L|AKJ>q)GjpoW($eR2e#z}OOQR8Svq`16&lm9K8&!X; zfU#gqKe)}Dims>Va<85&^Wpg>e{}K@q!m((zT$!Tna*U30W3^a1xx*-h6gfnz&(BgK!Z>KjerXIAe;X%rLKzNc38bP` zZSVMRncSMX3qRadWBzA#rA8}TxasA9OSEa;OjMoJO5&<-JCDn5Nxg&Tjo6YZa&7#daCMgQsK3^+t9{kN~#WU(^YGen1};ZUbU0 zH`%0%F)`nvPD_A2)+EozAat9uaWfg%RH2zNWqa?(?6a_m)?`Cb!_1<^A=aY_mvKNtXNoVz#=)TJa0%MMkU5 z`R8VEFjCvAvEW9OX)z4vr(B2LSTMHs4tofr{w z_%mYuMx8v8&s3dJ|K3EV_1^W-Y=y_!hBG4c-MYsn{UM-EJQ`;jLg8DTZFI6h*jt@C zLntz9283oBu@&(;*#N7z0#y{eVc(?wA=MIR+LfkZXqvxN_Y;{KWF54@(~litsdMz) zJJirOOCVHH_vdM`<;jXu?VBGZ70#haHCKN4*gbztakpTFQXa^0}Ec61i8RELBkun1X^66g!ps*)O+uIY`_dq=liec9y0x*^dR` zrDW`xjQjP?aUT&1QXQx8ou&6t)&Eec>(j)8o9A%A-)dHUUG%)T73CBPeifF*&bXU# zmq<&=Y`3^AU$srN*qG#cEp)n*!oDZ=Gf9GA;Ag?HdFgisp%Dg{CX~j2>nM=(pKL2j zx(Csyu$>eC0-GR}K@jqy*pRa9%IZDSeuLz15e5~#{P!$h_ik6U=PK}?niJl+hY|4~ z$nT5?f(B5#oBwVC?a(yyzuH(!0?l@-S)q>7M;<%B#|6GiULf&O*7uzKixQH|?WHx+ zDtYif6I;F*<~M`nVk=K#2J&Tj{F|Zs*_j0Xj@$IA6&APdojI=7ZV1D| z!Wuxox!nJ4HCL`IaJ1HeV1e5KnCLRmFGD6+C_T-SFThWo!ck&*@+ZDnAOu=2Pmjs- zmaEUUT@NMS%{)=Sf!XP^Wa_7952MP&QkGMQvY@h+N+F9T2V6fL*3rZ0H)r$Ad$MRs zcVh>#GB()37Bpl>%>s^HAa`mlM_sX=g^7V7@x~1*gzw4Qv z+_^$wJ05qIK$$ucnwH^}if197f1IR7mwl>xPiNhV6q0e+Oo6OEA!wKBTd_{#-@ zOSMn_l0vKMdyk+7vZO_D31JyO;8cr6PIE799F1;MP>7)+W2UQFEHDZkGoGx+DLdrV$-rUY{Oi)e6Lb2`h}u_tQvN=GnSeXTX;!?3(S z?aN7*9SfK6J{N6@wW(tdYOo5ep0BCsl`;e>$T*u%9dm51am){O5HrV})rF5MzqFnE zj-)s$87TTDy}ac4$2j@hH_51_7qz79JY21;ogD=$Loct|l~$UZvZVuapv!!cq8rbT zPd5YJ^w!$@)Q{&XR0oF}y)j%y8CrhQi(!VB23|v?wGxOz zIt|h0XfTB^1W~6|Fc2=cdV}=^`1pXfm|X~NJ$m$OS3puIq4Ne*4D03k60QgMZi}hh z9UFpzWYT!yV7Q;N&vI5}dYuA9liNUIRLx1k;gy(S>MLG@7dcHwDN;E$OL4_+j_BM! z%ff%_af03H_1eYNZUPy*McH{#@*koV!mXTn_u^F_7Tn={VqXL2rfUe@6p7UF-Kygr z1~qR&E|-d*iddT2d9KSo9=CAg@ZYXaSz4a0nqsO%W)F?nb^Q=*DS|L(@>`IBV8Gvd zp6AmBRxyt|S7O{%Cj&vC?-q{Hs}N-fq7>(d-2?DQLokTF0Xq;Dze24-CCU79Z*+#3 z?-^`tKK`BTwGv8biws=|PEgrl{>Ml4A8OKn)qSreFQ%!k;Tfzp{qFQcEKg=iWMy*z zoy7d!j#>qHf9EPbq|&NN2-!j8)axn1Hw04qrMf`>mm5teUG=Ue{kd(%!6C6(VYs~8 z;Jh;NN)B*?ACL41iS535Eb&01MUl8XXKf4(!H=i@b$blA*V&)(;J%6}Fh%WnF^u~Y zQvI0T_lF+Mo4uPqiiAS%-|du5-s$BO;X+;PQviO2<&tufA!qi)09+|w?#uWRm zs?e!w$2@3%Xe@77iyHufB@zGx5pb0zGTHrjvS(pM5PK!rPbAIzf>DR>0aSF{FH^{4 zKqg>$CWqaZ3oux~cE65DY@~23mCh$xlnICuMnrayZi{L5k5;sk@78;V5Whq7=)ajsIM z3{X8Jk1;ld-6A^+WPBIX)LRlm8d^ZvwP_}+!0*HbJY)QA$2CozZC5L;@{?O$KgN5E-G@CE;*!|STplWz|| zwUh}UUJ{W1&J@VdaW3-1pt}nsnZ;oTgJ;;UEc~@WzOQOX=iwd^_;@w&K@nj|07w#Sf@Yw_B(G@O9QJfuPVR4FD z(1%B!`7blgUddJy;k$!NeyUjq*Ms-N;zXMrHr*izB72GE{CW9i$1O7IOEqL6N}=_X zl-|RK-QFiQ$Xr42A}Z|@nmvCMyIo|GM@<4RA|_aDhb25w*6OImucCLpN6`HIxm+@8 zs4FXcKHaRO`fXavTB+#E#3<5HqwA@;wq;b^P}KqG#}%i3hkZw5YujEA!cU7K!mZvh z30q0#syxV8@S3dWgHlla?@`d;O_^x$o=GiaZ6<6w&7YD}g6q#eF&l^Ua*)yd9yQ8w&9*%mqP zvEq6C{&h9UyYD-iL(|2i~xE3+j0s^ALg6 zgbV5GALb(Mc(S3tRm>V@p|gY|a6H;5tK}32c8G$uB4A6%5sXrXPRRjEQNAZ3M5B=W z457+8``;&h2xMBjMezbMu>_pWsKOx}k}l8gw);Vpc)WdwhBCjZY@Us2t=5&T?m1H*a- zBx+YG;k=uF9Ix~tUf0yycZ85D=mzBotDL?k2>F1bh72W6A>iGKzvSMWu*;{`oc^v7 zsnl?(raE~en=p)(2#Lxb1gq*(wFl%!OWXjt6`K#~Uj^?Ak|ga7L%g$Ru(1Wz{5aEh z*}4wc6b==<)(?1_hR5%8MOiGqM7Wgv6!IB$0OIXXBLc`f_GjO5Spp|wIqas+*e7VI zg|K6rcksrl2~+@KgLRu(Tb|nX-w#Mx1VNBq4q{R9YB{Bn)VpMsfYSJ{G_5Ct6oLmW z`e=?gtP!!X5SQ?qkfk@>INxz&pBzv}6sZAn5^x%eW*mY*)xJv_-08`Zjzj!u$Kk{{ sC1vxO-odC5vERA}eIQeE&l}WgiPDtB#}~+HJ^(;kTtTcx#4zap0TbVtDgXcg literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/tg_axNone.png b/doc/source/_static/style/tg_axNone.png new file mode 100644 index 0000000000000000000000000000000000000000..00357f7eb016b15cc61505311b014c0598c5ce54 GIT binary patch literal 13502 zcma*NWmFu&xAu!OxCRLB4uiY9ySux4aCg@b2m}r8?(Po3U4sO7y~F>U_q=DVyY5W{a0<||2>=H4jU_*b4F(2ZWGyPH zBrPgRtmNWoVQps)1|}7imJX|?CXE$-+Bp@qFk@!rGL=j*OeqVaY~7Y;Ei%u>Mr&d2 z1SN@yhNdK876~6OgRdrr2^kUye+d3nZNZs|n3nb>u_$wezuo6P$93yv%CFgBqfFC% z3I*&5D`i?ShyqONz#KlfQ|~xQdDd+mDi92JgczAs`>SupFJi&K<=vNydw-l!iL>Ha z-^zfGcSHGPg4|vRFnAHJ_@BKp;*W^CKk2)OF~Bg1qHHTB_orRa`sN@Cg+pzRGV6PO z-rzU)KyJB+glwR`6M*q$j$%525zCV}dt=3fERc|uLl^ULWJJ`k_Qh9-QR3}X0`Q89 zizjcKL*4h@RU&She>ms=kfW3wK(KwX@bYwk5e#d6|5%UzurHS9DZ*T^ z_}v~Wp9oeajn)s@@A!wLKR9zp$})fA;MR_4uU`g=Pe~zP>5n;M3_|xADNn;eD9+-l zvcA_bGK!{7n=sy`Wa6B3E?5?pzw}A>Wkk!(Fo_iK4Jou`N-(R*f=n_ z?PN_u#9mScZsByydZ6C};j}WZPFardJY@H(v7NZ1GTpHEhX&@q@}<|3Y^*P=BL+Ub zG++8`&2??iMS9rNq;En8+7qJh(+_?h;kF8Z0K0ork=sLZVY`QL@(85BG-DW;xj{iB zhHxfE+7E(v@plVCV}jM_A>@KY*@lD=rjCRt2S*?eRwD+p?UkrO+7I@v!NvzG`UQax z*|-hG1+61Y)dI#Ca%u#&4)f4U{T&S0gXx80K?3L##*Bm?6)j7|G6TyLwXa3&@s59JWy{pDMNN*fk0;yyF2#Ons$29G4-na6B}?hWY|tjzQ^Jv`0Wei^DM zNZ-hi3)2AFb=!>#Zz|}ahr)@%7vCWW*t4{=c1isTks|?)2`4UEnis3YLCvW~OpBWq zi2jSC;96<0L`IFQ4DwG*TNJK%r8u(XfqiI`q)$Qr480qVH>)>i8>W7YgYd&I z$Nb?L)xXs^@DgyQfwlciOi4J%77UcwsLj$67DWp&v03M-25Lq=jN3rnbARHMvQU|hhRJ^E+ zNR&uNY4T*A(e#Gk0SbGHv_ysEs>C5FqEQtyzdAhbaC-6!lBLA<5zkRa8q8>p!Ax}d z(ZaL51!YDRLS=zS<-yoN922`*1`~4=1`|<}>H)Z2on6M=^1=GSm_fyuEz|%S;l!DA zrxYU@yL9Rls|=i^@9BjyKcrz~ie=8zD>6E0pXA`FiWm+^2GhrqcqrYKZL*{DZIm`j z=uePszTBa^i{R2#l1+jxY$`*5WCO@gK}Jz8N2zD=Q!aBtJ5w1qx>(+HM#T!cLAAkn zl}g)O8+*lTMZYetPPC5ICE8`m<;vxoi@Zzofxzhe0snr=!NX|tn4p>v-Cw#2C9QHN zv7O{TS#fJ|AMtdPoL!nhADNouE|r=&m?BddKbiS4+A)JM-viMx>@>r&8MSQnWVL9u zsZzNz6?Fpj)-v(3BlX;Q^S_L@nBH)m5duZUg@=`tYP@BO)g0wj1%$<2`km5mc`(ZT z5@f;!QU$uEGOFtHhOPw`A&2}7!Rc`t`HRAp29;m!3H|~8{ameFop#%I!{J%sLE=Gi zqiIuUJ8nyBYrd`V=I|c%j<|!lHTIKz6MJKM!-J^_N&{couIc?nGBmT8XsKn*Wg^Dm z!aT-e!-UIP{PpFlBc}tqzj?kjx_#Yr;nIG2X_0lbb=t8XD-A1A{XYu=3&)AON$o@P z!)AJFtQ;&|`c--ZAgdNu_t{7tZ@QXn!1&Mfjda6`ib^PrIE|FWAB(k%2a9AJa~#4P z?e`Zu6dHA2?A0_X*tRJM6mzMW5uDB0PPT8mEX7G+}jPp--Zi$YO zO%yEbCH-!r8m{kk9UGss4bjZt99moQ$$bEbX7-vaFc;YdA4|Y$$9BN%7yxf84p+$O79nsoGQaBua`7g>A88ix!>N(B&5M= zC=6%~?GGVV&uKS0{Q2_-zFbu!SD&may!FtHo|1#KN#LH9HBoysJKl64ZCJi`T?TCQu4;2yG7ix(QJLNxRmal5!yenIU*PQyoGs;^^YE8+_)Jpk{k%F{{ z=1VSa*6LKsF2#-aoZ8%C5z;l%W9#dzTl+2Ku_}b2uscj@Eo}jVDlQt2pNRgBXJ0M- za7A4Oif#W=jsaC`d~=m!=liq%X|9Ej#YY2pXHa9FPr>StJ>D=!s~u0<*H!%HR-c%m z^lSzU-Oi>(2QmlQ&HGc2`F~++TmK@LXOjxD>)9<|d8;pQT?SpCTgiC;d20A3yyv|5 zeK5x8M|k*6njYzob4~)MK02}*Ush2p1XEv`nyHKx1A^3hlRB3>J39McvhV+3$zWMv z$6q{bZK;R>$LPF0jV6Q22gjr7kkL`#QR34d(;5io_!=H&j^+mD*37HSI+Q%%B z>rqe$oCW5z^7N7nuZ|_p6FlR+3G7Q3s^|s6`P1Fcq}9FDFBCZh|ESZc9cc!X?)-^e zgju>@&|doUhwcwHTMtrS_~;(?MfC3dc+vc{>987#lph2kVRTnhUGYMQ)tr%*;%}~ ze*SGSTX)wGM}1xoKh9sU$$*#%o3f2R_qH5MI@P-R zj>q+9w(ozgmk#~Q-_!9HIaguqV0mCNP;&5GaNC^b-wqFSV|AAtVb=-qk@31%(OFr% zBm9n5Hd5QGFU&WxI=k<{4}~{Hqap?qQVG@Hiyw67OaA;_VScFX8YW4&NT?q&e9gIf zIf}}rN~Ic9RL@=HXAM~7KIZ1W^gXzV!hXfB2fBVFz81WuotLu)@O-Rre&J@n(z*0L zpFD5AcC_7O@LA#~V0-)Pck;55*@1FRWFxrhd-jHTb$oobI^8l&VR+l+=6m<{Y{cm9 z5I>E{)GZB;x$7hX0p@4}_DhUp6?}GbAPwR9F-sWX$2(ro#{dHEbCx!^(jKDFvIf{I zYMcv{GehX;h>rnG7!TxfpUD14Lo5`+O_+|E7`utU#AK&B4A>HQ4|ebR`q@rvD^Ojk z-V^+XqIAZ3mrY_b_Zq|+t#Gi4EO&s0b_(@4W8~UHme_f}! zm-YW`$-(u%y9MeX@UsTU%*X`%pV%N*zR$BfO4eTHc3NWA_T~<*pf&_pxLNuBWB>oE z`M)jxFHi0N>&eaaza9Tq#eX~U0Y9(*fA#0T2J1g(L8B!A&j1BrV%4YmE$*Z9A)wmvpJ^p3 zEQ0kczzT{D!V&__S?|1Y!| zK(;c#N`$*ii(t#m6Hx< z32SR>Tk-)~s!B^sg$a&USUmR%AqPoVS?BlMkuhQAWrpZXYf;dDuvw#n{FF^hNX5m( zW(41_P-tmr^6b}|l%=EqlbNiNBPX2?n+1)wOF9kcdg>k?9F119BF_>ZL)}Pt98k@y zD3zU$yD_)F1>Z)=q^G|#hpaZ)%Z7!85$e=v9333YnrG-Aa!^eu8awF6tXGBoX0hO} z`WMH!DFy7f41Q8;jfedCGL}YXzG>)t6?WV0|K`m-^ajoCafb7!R5hYlE>k1~7A5@+ zgGyn9+wlF{WiM>0<#dkA1@1U(G-OVY!$#ZedaG;PWa~ZTAh;tBg8?cIs|BLJ-bu^D zrVqi#({up7ew$m8X%{MTQ#O7K7K4l3X2(iWR@SN2LP?V8;Y6l^4#E_TzY_rlrJNJr z%US&#)lh`9z}xM7JOQr<)Nc%6rO_s+{~k*@~K8vm(EJ3q<|NN0A~>e@JQ>uD1@@_)QE{yN>_yjOU!WZ)wo@7Q{P z1&a}3`ln3Y<@NY?F3;n^-s5FI3c=IUlX-lKA)-pS^k`7i@|b%$1~qcalGDZ8`^$yH zztRvP=b6Gn)Q08rjrJvX)e?lPoE*jhHk$?N;UqFie81;gG?TuNP=ruHJukinV)~JE z23@g_w}&m=TNQyXLqT23-pS$qFaJ0tZw@Dmy0aYGMx8IWg-PW63*ekLocfVm&YzBQ zJ>_x*^zVhieD2SZx^Jdoy2cy5l)2f$o-Ct}0f2RWt&uIgWetN<) z_MD!SS9>6SWf|yoRCC23O!z{x;l89_#gfHgpX}WAcE2{axKOHkfV0<+?|Hs{yj@hN zhrm~o>w6tlAfLq%PnGMdGG<(%u^h6T#b&)rO-=0-2!KJx|LgU*6H$bGSnISSqWLy0 z__1z@5BZaK1OjRTs|3=19n_gQ!7AW+yQZO^x|4!}f=9>goGh{t6wCQibuQ#X5a|VI zWMur-F?xu&^gTD+uOa$7CJOg~$R~YrSsat%E<-+G4;bhD|248r%VnIsXDjs#Q#|;( z%?@b@$fuF=j;+Q{qJg28Lc523NIe!6+5I6HcFKjJSt*k~U#ANa1WvT-E#e@>LL=gr z-WcWamu_2hrn=m zy`1yThWxs+7+$W}oN$QCM8>2sMkMmPO^Pk>yx0^8CRX^%G-N)W&Ml-t9RaJRkDynM zBJkM5PMI14ZA%CniEgUljmo?V%^g%zixTV)`oE!hw?_ofbDaiKAu@SSc7!FnYeFww zdfEH5{2zBoVQ0wi}|w1{Zh%zKytg8`e2%{VOb+PM_y}cQOmQseyaXTOaR| zP7_^si@LjZ!(0oEOdgjrqA1dS6NB_6C>=jI>}6W~OA=ZI-XHgFGrC{Jqi-M;=QnJ;*Smqy#ewPG>H8GKpa}j`v)5hDE|< zlNy>b6MZ#FvYj#$4tP5?jA8(qusDa1qT*kI?`0VJ=j{zAs74?YP_wY8yjHdlnhwVo zEmrDcoRhFi1b`m$eUGShlRV}x7eTss%l)-n~H`eYB`ABsD4GZ<2KKr(|dIC>Y@8%_NE0Y=Y~5CPD^Lhrv+-fJ8gUK zNRhoN5_4@+Vb)trlJa4oR^U4kovk$|EGI}p!XPHTKpLu$yzhb*ccYtE6n|tU=Up*v zkw7z}yY0!igYW2i%gf7JZU36qLR)H}dhiHy?Mb{`sZT}!z#U>OB5pp{#9ua<*?;&@%rTs9sJY-W9inKjKO-g4}ITNDpI(fPinX^(RUnyo158EWYJ z=Wx^W{%Ga(b8CeWzt%Yo^yBbWZF+=?eY@OIEg}OXWdt*iRjeiio{89pv zRCo0d@2QUJo{ySaeJg`7=82R+lob}0f{rXlr>vl`2Me|h8PSGLiJHl~3g2YppEGo* zXlT>r1P`lvmhD5lH;}>W_z>L9wOvlQ$`;ec_$rdD8nnCZtL~kzROv|)$VsuOa>$un z9%y}ZTHTFRpDDQ4Cg-4vwuXFVp(rJF>9bOpuC0ap3#nfG7Yw4U+3VxgfFXf&=HIc8 z!ZD-K9>a?g^>PiFFAbH=wNLY-;@JA+MC6z0D8qLw+i!4bT`s@2R z3Dvj!oYEAY0_i2kym#?^;sB+{e6AAs9&xhC8S^}?Co#;rA)49Mab~I0xtG}b10NeJ zW2QxXOL}97%xI*Td$zTX4Dh3EIIh+nreve`8+U`% zmRlGl`0MlPZ#MU1#eGC0-L{9X<4ZHeZ^Kr@6+3JP=e8wVw?%3LVt0?h)ulRmZ1$PV zsf>zcqL7vWa)bDpa|--J`1GTb47)Y7Bx(f`n38O6i1hpGlEJ{#JUX3|DiI|0kM*0?qS4shH1g+%Yg zP%!cDiVbm1YID+eF=_)}g44nPvD6`T7hNw^XL~qaPh_o6;;FtYSrV_UNa~j8t<|UI z4If%<>wL3GI}4rerYDPSXkXG&{kL5aqOIOJ@vK*Ss_fu+{)Wo}3#+Nt#h=2&Z@VX7 zj!W07^Kq|cZ?&fH8{V97DRM`e)B{MzzheE#ee$6lzuMq}H!GQ-RJ0|a?rxxA#3m=*lovN5_%h9(s{(=4qsqd@~*Zkzki8 z1~~8d%bXFyvDPl~d1#0@XB%QTd4y{me(#2XbO{asK3M@o-8bSMzFpgXs<2obS_gKB zanr96c7()DAZnMvx1B&E(G;frP8OUKWajk#cedd6Fw0Tq9$@Cm1Rn_-w51HQA}9EA z#=7zjx>+fvs5C6}%uRKjXVDtWpZ(O>Ib-JzF{)Kt+Wfe32)%?c)G_#bs1ivMzCpNgCf1ag{i~yKO`r*S-}i=WA?%nj?;!j@M(`r==HvCava!_7?V9QU3m>fh^(@ zTuV1;-mFsbKdH25ybvHkL%Pv+N6z1ANBT<`JG$qkB zLz_Lk?m93lfcFOYoELw^b&m_EIEXzMK)5G zM-S0ZmF`(a^(A?pPr$4|sK*Du!qlUTgbvaBi$4(=B9BhF_s-eQ4x!K-3(83Io%)Lv zp?T~c+tQUshleLiqn|G!e~)a7dddq%1(hKbmbim4^PQDX5~kxKagvGGXvXw-8^k@` z*Ej&aX_V#f%?#!j9m?P++;PsxYbxU|3!1prdZoeBNiw<*W+`f5OjJ0KJl&WW4~LIJ zA)u=~q^|=#_ye&2ML*0>5m9%fwA>ILHsYqLM*ad{x2W`)P!JZu}fc|Yq1vPJ%!xskm8_5M>=$-$q* z{D}J|>6?w^c(0mH^9x{5B1~1K66(jWKSk`(OC@Zm8+y6eyg8EXtWSB94957GZUaJT!TzFmh?9q!iFPp|ns2sEjJ9GG?0HGzikT0^K8ytN|Q_>#<& zX?dKwPQ5+g_C1oSsTU1_sT5FtB&R|Wm%LvNO|^&pIl0<08@5>?n1eE+AG1@B%n8U5 zFL?3poW@>g{efG_$;l-k+)ws~mBrIy!8Y*9(*kyRoG5Z@F-vKik%J`yTugguzY6%a z?1P8QXQ@(*Yt0DE5g3=k#=u0~B!0tmIY`rtc89X$OwWE4l~ALfh7UWbeV(0gB_se#lQ26)`2=x=yCi?%aoz$G~1aT2$u zZi-}vM@Tr9ZU+Tifj>a8SF6OOF$9|Y*v&^J^f{&{x0afO%3#acBCPHCBj_3( zH=zu7h0H8U={vvS_i}k?3xcy9mR4TE{nH5N60if}|I;B+dq zZ@5>G*z!{TfkiQ&j3z*$#Heyz3ac*#qNW+ZV^AO^g<#K|I0)>(Up`q*aHtIy#A!V! zP;4|$;OUHIAgkpBkkt$GBLAoK=TE$SM+NTRQ1G94xIw>&e=!vO>M6nd6cB?yRYD_@ z(zX8-fXLiKx2)4`(Z?5JU1m z*ra!T$l_DD*CQanm^1yKkEv|dnkrS&Y<>2{C^rOOMB6M+eB3U*c--w46Z;dz5~(`X zEi<{F+$K${=Lmi>7X6iq7f;PnMCtB|eHM5?w&zb2S!%GEAdx#;(v6$M-jJnhQlX?$ zZYk0DLes@ckun5alOn1T6xzBFkf_8}`|X9?Yl5k09&dgy&#L$E=KgYA{!g-0yR%G< zfin4Q5pNi);w`v?R7P~XzL0(+B`X>3!$4S_8=ZW$&>muHptQ`MmYiYi~nUv9k4BHo=Ws}Qd9 zA=kF=X8gRpwEtN&iN;@*f7gzp3jf=sPBk~@UqEybj{i+_iD8<}fb+5`@*Di<;Z;8l zj<32Y$(QWuZ6o&~*e%t}*mV(^Nyc9!nw-Tje@v_g*s)a!brF5y%~#75r>Nzz-q&F@ z%#H5?)pyG!Tz3PNn%uZZ!0KzC2193>xg!2%5ZeMyNK+m3L>ja-)yoE@Iw6`Q{w-j$ zEOte26^2HU>7xSRg5^ka(pkHZcjf3JVkxBt)!IGmoAXH8C`onuyxTykF@++2zP8C| z6G*Jn)HYkTpD#utDS+SgF#J>xf13oh!+Wm?|6Pr%7LSM9D*rKDR zU`-&@e?{=CrO0e+wiDV3Ppr=w%N}DBOZrW!T-I7vy9~OopiE=Tb>eugvIh%8=a$jO zEc@bi%}buRW8>q<6BTfVjX7gzZQ-R(Z2B2N6PU*Exo4d^TnciEFug6*KS>#~Lb~Sy zIFV}($Ia|2ka{#q#p!w|rx{IY6m%=kg@ORLqMXQ!JkArqdMpthF*bZwc4)ronRP$cXlhXKsGFPrM7N~UuFOiqz$#gnk3*(Q zCJR63VQQOU5Le!@wxwEc{XAxWh7Dhk^*wdy&Psd*pX-BKS3!m^O?t{Homm$wjL$Pt z-fV~42DMFgRDq{YN{5(Q^=&&RwbyOmv;r?*~Mq7fR` zCJgmW8-$PP#ABkF;c4W&wJHa3sn*oVcXCDVhT6St=G7KpCSzHy6zpl;}%3 z^5k(!lxT@T9PL8aF?7Q|tfhAa(0@f(oeR?OpN(_V$*d$3dG?Y~R?7TwyZ&%tYY`su z7?yRu|GH~-*CEg*XKdl{R>e7c$cH)LTL}I8K_IF!hJv{H?=Hp%JU3&OzKpu;Ghb_- zi0*vhn&?sU?yp@V-_W^+l@r$6F&R+6YFjZPE2?nY!g zOGLtueBoBzgGixhTtd#3ZnINg^#|MK^n%5Htjcqk=|yqa)~if}D(l}X0sZwUK_(wK zdBR&uX9pq~`QHY6N3$I7+(MOQ&bLqdc?{t zJt;GdVVGQoIxf5N{ZVR6YH#}Eb+B4za(+}L55e@5g|YX^W#lLUQkzOqH>0M>np;Jsr?(dABU4?6qR_sN+m(k(om0#Drju5Y z>Es)yUiiozua8y#&1xhIiM8>Xj^eWT2zO- zbo2=sae>_cD2dwl`~30GRUQ|d2t*zGWJ+z{KSd{Ikj_+wlkE=tG)w>$uF`yq6{Dc!gDq}GSPSg+~&V#Jv{78HLTVOK6hS2zUlhKv&$g5oY)Tkgn z1uLPNW0}vO`kzV`g7!ugs5w+rP$Cl|SYGo}i8=#WCF_rzczkMHfv6>JDt#Y)l8;)U zX@BxT%~(Ovrlq1@LxIntCQvgoD+ga|iutpP9g9*MChla?OU}<(Uk+%NKHojem1G>M z=t!imIkCE4m(15VyRnqtDstWvaVu~N@`XL~asO;v7Yy3`aA{na9GfhFHO!&2&;PgJ zW|`%2x&dPl4~eWS!JLPNiH9rAOoE0P{`ZmcO0Br=ws;5n0Cg1IN z)`>{GNb9#FxQA{6>(ohOS66PGqjY_$Lc%dK=V;2v4R1p#gXidHC02I!v~Uz61!rgH zg;rO~=3h?&Y8j*eDqfE>t>=HuThzR~y1P2n`t9kI3fX^cF%B3#DI;#6oslzNarI{_ z(Ehzyaj;itsz#3DM11S(#=?jkmsz}6mL$;8m#Y?JPTkC8O0IHTl8Ki0J{6UTj7j6r zW$zLKz=P)1W?WO^wi2Zy#nWpUE=qPzROzCST_0Li?PsAwZH=FuFRUdsBf{A1a=)OE z&tX(pEZl_&n|Y#`)qYzb{;4LAO^SRSN&08P_0)F(%SzENU2s#bB<2-~zeiZ3Yq*%b z@U2@KE<|!*)N}J&M2_FRHVG!wh@=jwA+$09m0}J(O>AN~AXur^vFnMF#1!znTw@H` zXt$ygSq;iuRe7xdB-hF{fwLi)>UMSwPkji$NJ3+qr3!7bfBCPVG~32405+PQUMeq? zBlf`aY?)+~JkN2i5N-!2*bEGmYst6<1R3YS9_wV@c5{R3~RjR z!xaYOCdyN&)pk0h(C{*&tU0l-81R@@=5kV$6VTSjV)S~yg`zr~E>E4YR@!HsZbLrc zO1&ZS>@rD-yHTChz}@k1cY8YJmu*5bui@w2^rE{i36S|Bis-ChBMu#JLTfBT`->Fr zA3TB-=rJ@_sdv+FzZ2W4{OAd}Y?%={)# zW<{jF*%45%M^cfS^bnm44N55W!>B*zkdAHRJA0hCy2SYAG=ZSR@LRP`qm0NP>Dg&E zw=20tqc@J<@mE3TeL9pp14NDD#aL3H#Pl zC!?T%gt z^Kn|`lR05ePWSWVfmRc1nP2T$W8*k+d$nu7Gblb?qM1O+Z_-Hn!`-NEzEHcWk!o;< zkzMKf4x3zW#i=Xz$m>kI*rfgfD$+1g>Pboc)O-!A>3LK&X*Rgqvp)q1N?Pin;x#6N z@K@kFJK=*+%tuo7Wwwi%j);j6063_*25Z%}vD*-V+eVelaAHjg)~h8_AAHd;N9DvZ95epcjXna&B_2o_@=s2#UpO zLqU=Pi^(6eF~BbBQ7Zj3B;KJ9>_|*mt&^a2Aqy>Bokqu!^Z9ef_WOAXPXW1phkr7; zask0x9?A_zO7NAdH9sA2xTUlIAPBt0^K3peWYy!LY<_FR2uqwn^OgLwyGNZcPkm66 zMvHP-iniVIeexUcoq+segow?0&IpFQ7d<4YDUJG)L+35-dO*+cW4SJ!25yyd+Fsnv zZ6Z?-eJRz=lQ^!-Z1yO0{>gPtdC6ZcUW_J>WBs2`H5jNgB(|{03zDhqlhs zOh!4N0;#Y~f&;Z++5$9%)phh`aA^t9o}MLB$QCjZx+P-xDiWj87Zoe!Dro9CL+gWN zNfF|oJnrZ&fr^q8WAyH1Ik-{U3At280;WT;XnkSgku1cS5^=b*b>`#yUbFe)bJ*xG zJ=cBT%z6hk#U{r0LUu%coe?|+g!6yEF!&uapKKh+ytAnE7%E(sUVU@7noicfU29hz z)ZQpi#e2S37#@^txamP#ZFm9e{u)zy7s%XTaq4gkkmaQRW21S5Da*iET(rgWW=|Ke zIWq9Yw4HEKhY5;%#EG9&k-MEtG+P7wRdxqc293mc*R`Hj^=w-O%l+S?saPi=!9kN0Z>3QAXD*%?4H|AdfRosyeE6Tpx&{!707c9s%B zA#T{_)ZtV+jpwKHL?NTco2Ib6# z=N7}_^!33wTo<`irA{NcxiZ&_$mJknX?)@?fW`mS>q^j={_-NQg_xMGaFZkE_qy#& zh+bm6F89{U!v#=QFb%F5v;hce7Ybb1$h|iMdiZ6t(?=YZ6!Q4TplbJ_%gLjX3~sOX z#;K!fTJ-z+mCIAIMyE`+Y{c7ujl#U|Wco&`?<-WJov(l&k172I*I27w`RI{g))jex zHQaCM{+Bs>#SS?X`7Fhm02V*`BN>Exbw>k(HW;EFl+^15bTwa~kAV|#K0B9x6 zS)(kk@N@EegGhB7Usxz*j@yOgj*SiC2p)u+Q)zUiM|o&mlBhF2DfY};lB9ox2T+jctU2|ISuNyoNrqhob!>pgky{qDVEeCNj*Rkhbz z<-KRkS&@qJ5(u!kupl5H2vU-w${--16u`6*G$impVYRsx1O&F&N<>6aN<@TM(b>V= z%GL}7L^3iZ6-rfA3iIb_=TzjvjH#vbR1(E7r3|E!Ra?H5@H`tEt+|;agakSYilVq_ z1ZAQ&0K+uUIZ7L`Cr(IC`=D>=CLT!#R>U*+p@tb?Vx15DT zHjqCEKzK7o(H%jE>w&gR+N)59K8AguM{^KaazT{%tPAQu}l6%}kp$k9>eR`Wia0m|(z z7#LQ@xc)z@9JH`aqv(k-$7p?1_W2DBHg<|0heWiN9C)Q{S@D0QaC5<8_j|0o!k8H} zn!jYy@efIVapaKv&dA2btsT+&dle`)C5d>gH|B&k_^r=SX&MGhVHQ_~^`nlFQ6zcV znDHKvfql}sU{O>C<=)XAPCW&Oc5ut8CwBi#BUJGAgwCYIOGYQk#*W5iD`OHO`U)7h zgV8qa`SuYAqnUAi%5sF~F0)sS<;WG8;fl3CG%%0Dn_5e<@ps`beBkqI^OeumT-OF& zgu5M0>gKmVJ3`cb`oZ)OF3SKgko$LK**yejwg)gr_dp7CQ-*<=TO@d5FehS!{UBIp zf7c+C?@;PJgq+|=+u&eA)Dd76pm5~Ds>C2Rz2Y?p`@z07Sok2td0_b9joT2M-?W9O zT0j^>P7Oi+LO%9Vr-Lx{pnD>jlR);8 znY1d|Z}7$Fwn$vDPEkKmlUY?Z0=v*A37^9L8G2W4Z&q)PHgvscd!ffXhl1f5m4DUP zu;MT#fwleLzb9fNnln&hsl=2Iv6(O$*_%*)zpT}yN2myC9P&0PtdUwlSlL+VU!kAbOCgEe0PP+s2$orsqbOKL>WXZSObJ~tDpi0u8-y(_OKMM+ii{VT7J(GuAVr?U zJ(}7OJV0Sbk&+;vRFyC!Ni?c#>Q{%y^^>0bl4LodeZ*tbfd)N_eJ}%6ZnWque?f^+ znNW#8LTNB&5Zln)e&GQZJSCB zuuQ{FOiwM6{v`z|T_SyvTA9{C`z#AfRm^ZeGMGA+$W7^{WStdNV6C`OMt_26jd73a zCX7q>hinqqu&FeDk_{$43Nng%SxQ~=Y}t$ntqdjHs1iAo8D-0F4Jr*rYgAfhT3D-| zt9o^@bs}}F&QZ=&&ezUf&T`Jp2mGV+2Yma0gU8Y4F#%OUx_@+)ikcOUqB}`_GGbO@ zK4PiHIlDB2KGHQwUCK3cki{m_e$w+}v}5{Xz6T;>SSbd-XH>J)l2oHqr^;o2E2|Nx zwf+|SeWaE|Y1^N>S6hTzm#^@1g#Kl*=g>0pVBeU4p-z1^;A#n@KQ zu6f0M&A*|(ZoGlV?6I^n(XY<0^S%CndwefVgMr9H(Zi4%-woTXh-ZlV;~(cq(cysW zZ&&ZL%AE=KDtG^rf}^B;=f4Lj73CE@jjL`$lT&s8oiv`Yjd8x|&MlEKvWdcly~NTs zs^R)xm$C6Vn-GmOj-mBspWH`ik&IsB1?CdpNqX44M z_YWkn1~5<1KS9I^C@lh4MhWtY-ZM;eHFUx1$$vaiZ81BTTR1{->nMx(_x;LjEQS8P zf8Y3Sr0LP(=_%`}_aqzYBWg5YStPYzGIFW}tMs?5$x_$V)79u2ksD?KGUX%qhgD{F$*XjYubK+3RO?O#nOEvA*JGtvjs z4iR6Yw|R*0(7sR$e5fh^DboTKW2ar2D!k_8SME`sa#AZwuJ5gsUW^o^#WWbXxS4BH zfL)4P?>W`ErDB8|gs0ZGS=aVE@M9G)10graht7`|z0+KCAM?)!(9WR7e4oO#Av?Tb_EuZ&Hk>v5=2oBRq0}q}G@Z_- zC3`Y^nazh&_xba%^{w-WmD$9?tok1oZ#>nPxXy#l-&)Cd&OOw9<3Dm<{XQ9^^}^l# zCQXj?#yKXLran6|8(-Iu%mtF)zBf}DDFg(m^(J<%bar(1zh*rwVoGD0W5r!QZfz-x zGL6xBdl*gzRSb?t(IKKD!6L<_KBY7e%<(on&K%7R%&nVMnaLeUAGD8IAl4%x5jgSB zY3A!D8eAVsT*Q0Cc@x-`FI3U<|Kv+`JCjoLRJ&AQ7g$uIQ$5lMDBoF(S%O@CSkPKt zT%=pXV(UTZ`#HMDJPak@l51(hX3e`MYBMGPBM+;<|H<`mw4>5h+NQ9cWzSpaR??BN zmbDDlSm}^qEoVz$7j2Da`)GA}QFCeeG5D1BOTKZcuAapLZVF}kAS;t6*U!H#dh7lM z?5NN4@z=#G78%p#iPr3V!06*#%6Lv1(?i=`PWP^3@$K)8#RnVqW$kJmJ%{7^Gn}E^JRX}?V6W$8nQZ6c31Gqm|IWV|>Tx(x>Urb&!-#FN8 zGWaa>5wN}g^E-K6&FDb7A+i=&^F4b-|D+9O z4sL&n-Mvap@ZOi(LVj9*g79m9yCL8~`VK^}Syrl=u9|YP+$Iioj7Fvo#%7G3c8)+m z3j)IH$qh`}nYkJfd)nFByKsB*k^aYm8<_qoW+Em2kBO@dAE~CCBC&{rvl;OZMmEOp zr2MeN#KgSLrsmwrqT>JU4$Se9TDrPAax*b`cz7^+urfM0TQD(mad9zyXJKMtVE|e% zxOmyS8hJ9?IreYOAp^yj|@>p!Ky(c*{YW%{3y=ZEc`=?2SckanjoTdRRgCCQJWuOyjO{?Fu;R>F;0?4O&S#( z^-WAcs0R&QsVFk|PM#P-vT368Q>Sb4tPXHy?=H#N^=MMh>d^I)oAKFkIeC*~RF{Am z8z?SFrYMi>m;!M6XJiS%28IhNBnU|ALI$RSoWlYsi=ExL?+m}n<)mM5XP_-1#xTHs zCGT<)ygr;4X7hL?ZC%C}-hBon3K9$IG}}|OdyxA)oaJ}=Jf`{(JL-F!tt8Gl5z@Fc z%X4et-`w2TPPyQ@hGhm=cAB`E2^tws#U1RcIY#8KJ7-M!$Vp<-yE1?L&oB; zijSO}H*b}M!C{FvCcJFCz4=;icC_Bbe*DQDjK(VM>c?h?!~GF8#dFE4ljQ%fYEkZX zy0m}$Y`oBUAfmpOD(K43ldXt@Mg*pQ+moxc>V?RP&r_C}W7v^+s5IB_ba+Th}T4SmL zCe&YWECXL=x%BV7Yr391TKGkvZ{&h9wh$NHpZ;&UeP`><)z6zgml@}4jQ~G>6iOZW z!(N(Wc;>%oW&blC1>qx!wmTuqz0>yWTb~>jUEZI$D}4k4V+q zYhAYjK09Na2BR=)5vl&jxL@FL*e74}-i(mVJ&rSV)4%GqxiV*PPPDoljeV5n`di=h z5Bq+;KTZY*2cr&;B0YYk2RGRz9JE2ODZ;WrG6*3 zuTmLWyU(LNNeRzIOaI>vuX?XVH4PZ%9q~SxJ}4?e4hCqrI@e?6*?`ycW@c}iY}h?P z16w{R5rNOQ`qfG;*5hiGQu)J|`_r)R&ykb^i?t>aTn__i3hC3lw=uRMARkYANYD5# z6U>Kp%^Q?<#*D;Ume0G7(bRVgB@<`d9q-7hB<->|mbrnrvM-(T)opCfTM zLeINMi!V0X!^i2h>O_aQoOVU6PRmPklKf(jT;71IN>*QbthFchWt1v6{f~Ad%jd^q zw+vUz56lPR2THkL>0x1Elo5Fzmz#%r9vc*H-{VICRN<%7SzN64_<(Gl5ip1e%(6)K zs+L{YW1=Ih zuE9n_5f#GC{55&$`<2TeRaCm+qq~6*KWJ!@3%eEp+hJJx5!|S2dQG+&S&o_#w%+64 zLg2v2L-A0tuoywuO%AZ_vs9x=DRi@JH^)68kqB)WHEVhxP)+AAqY6gAQ)J1;#P49PTBY$Ug%60ned>#BAn>sr!xe{`KzIWGaJI;*Qvoca^zG zQ{gaTsK6ba#t44CR3XjD^>j&25_tfIVCZR9f+&NSF1a7!_tdwk#rDfCJVLaw-r!h5 zs0dUOb#G+mwQpQOHMI!A{zus!5Iox>{7Eg21IbAlJSRKi65TbSSB|}0eVT`pnN)jf zp~Zywp8xXjBd|>T?q(&5d8Y){gg0tUhJSUurJP$TOG82>a89AU41x#xg$FaMqji6N zwCi&8{8+UC;k3*}&21G8%~fqM>W6f0-%y@Ba8?3t+;Z>xlU*dP<03!=6Yk4#X>Np^ z<}O}jRqG+{Z>2(U(hYLm)JezkehwzT#|@8xan!NNNeWIfK0>7HFyT(`d(>_)*c<0~ z@4J6OvZT92CSWNq9kzKgjIE(7q$FL(k!f-T8UZ!A+1YY9I5-0eqBP({^T1t9_b#8! z{UZi}$u2St`?vMqEyse-bBBOJ;G#$_k;@c!J6XsFuJiGhuIEF%U=#PZr|VtYVa|ot z8u#y(GsNZbt)g%z;MG7=)G4A9lPX}_h=A+PO*zZK(O5YTk$@XI?&^Rd*o4K!VZH=` zc{YN`Wuwi_IqnHm;`3#>I|3#!JAU~iwZvMNA5XT|VMcOlD&>cIRy>Kw&l%trk#709fDPys0w@ny z(XIul;Pu%&)rep@pnu$?bw`A#ZVXVHYz%#T!_p--4UB#{t?s5|Z_IaBbqo^PhBzwy zlQ1Umag8Lcpn&BU28*$8ha+%oO0zR)XS6pAMT zlU4mz1K0r0UD$Zp-nDOt;4J@I5Fqo9I*Hd`%cI8Brag`)FHQ}Lvg~y zloXspOUY{p0d&eT=ujOCI|qyh`}-r$gCUQ5@dA`h)R7u+H>jd0hAf1~!v>TK6XPi2dY{mWRvHM%JZX zIW!~h4elL`gwxO?r-(J_2?V_%H_b)sJ&7XZ*=*afiez`{?fu=GiK#ypX*Sm#@Ogy* z8ZDiuC-tr!uOoHS2gj5w(z0U+8tiD_d9O~QZ<8(BWTaPnDKZr2ee#B}3^_*rwH71@ zG;tHAvU$csgiufpV>1br=RA$xcP2@h430yP_!FS$x;dc*9E(ss0Pu)UFoU<#0tziu z#F|{&+?w^~2(?7<-VbLq%TSv|!b!jhPZGp>Me;}d?)&7ZB^+pKc)vXwdytNrwK6dw z+xB)d9@bI=(St@)-BvzVF5#fhB`YPgEJ42`(BMjlu!{N*}QWF~)-IXZQ>W*2@BiS;(^Y$Bw-2 zPfdRksDi~ELcuVnP*=UhG$J668*BEwg1z!(FODa!rlF?}UdBfAXYCLyL0m5_#7}_eK(V55$Vnn8a?T8ypa6LfOsV8LlC2w%< z*Sl6|IL-z51J{@v)C6Dw#!HR`JIw#FePYlJ_sFfcmi0`(dMjr&Ji-b-&=EStUZRB0?AzY~=?ciuD6{~zl zE8T^*BygoIie%0fOu-2x>?>M1Lh~`6TdJOZ1LdW;PLA>?UG2Ti{d|*{F!zteV{bhl zE76Xp@;MkwLDOe1H^WSYv?N<2Ron~xakF9)kJ9Z%ZdO3|37`$30PpTBOl8oaRU7My z#?v9q#t(SAz7Ymfcvg)9xG0!sQRu-%?t<7r+lfbWGmYIaA|OIqGPHqrR-^m}$-#o! z(WGG9v>5<#ATcEn)*1H|q~L(|BhV>IV>Q_QwJ<{jn_<3cwiDm_cO0$;dr1pBB!wX#ZvK zi3DLAfT~DX0>vWiONwa7)Gu^KEJ`FA$cO=V6vH|t=~)(G`m`N_gbMYT=2&J4(w!kF zn?6hwpu2lGFi(AA5<}~*%5dK}}1(HX><2_saj7 zZ{)5-#b?Jb&46w;cT^$IAE36$WccoF&)Q?xhS-#G+`_-fm8LqC{mJU(;Wq^u@}y-A zNuBNvkFf+FR;}iy7p61PZU>9g>t~94ykD^y8;pZcUU_G&TmHuG{vnlvL`b=az#F&q);hQm}OU)H(o8-rLXlN2klq$6D%wKF)Ndt1?`yvB9;>hKu5TH|> zc;U!Ou70705gJ$EDg!<8o$0cKnV!D-+=_|@k~EX;Iql4&r>jyJEFXrXFk%nUiyq!< z+?0d@%XU2jYKh><&cJ7FrfKSUwADL%QV|SjM)-!6Z6MaWrNP@Y&V6esK;%+l)%`W4d)uQ`K!r-?$&gT9SqpGlSG zF85E2Y7v&i41|uM4ykkKyic_?A@ZakWyIPQ&9og|*e#2~7$GVfA`ijje1*p;2)Rd} z6Kk@?=C4ig{rII!h^Z_Dd}YTOl3=`8JHN};Cc*dbuSoeEq^o$R(XnXy?OUR)vx4D& z@?;;Oj#%`K<{NtVM?k`V5^^1m9}f(l0CNA3_unP@Y&n<}cwhvcsnGrG4wPD!>2+}+ zXJyR{j$K1Ousf0|{5Qk_`;_&9q?mU;X{!EPDyZH&Nhv`=oFu?`8PMHNO!|*!lVK^f z7C>Z-ctYc#BTNh&#+D$i6hsnYzP=7x<35D*P-1fGqrXXcN6?p%XZcyDGAw$;GuWk& zNiz+^@jRCuD#kKocn}YXN}#K}$cBX1jXMBgad?bo+@OQn7!h_PWSwYC2J*NchY<-C z#26DOLeqjQoLy$)n7ADFS*G!V>`)bfp2$xGW(kvb_0#--OC*7}1kT*_1``H@zwY1D z#b1%w%x^{#jP<7xYf}vmp5D1+S=^p9sYwF$cr;(kimCx-Im3FQ10^(PL+3j@4YBN5 zOY5<$_Iz8eKLz4-pJhhdZ&{_+Um%NnUm|JrXMaIbRA1<0v3!BM$KtpLJFHvRAzg&UH ztVoWP7Lme|u7eHy;>r*GK;@2Una|pip=TMe?0UUuhgm|~g&^`ds|_tCuf!`t3odrv zjii!(EtH5)g(r$ z$>|Ap=7q-yOydLPVItrXX%aD^)d`F}$qI-H?t1Hl*}6nOnMhJKYK&`QLhz|M)!jU< z){Uzq2zu>_69)GOS4DZxM6T?~G>DTY`|R=MPfg&NNidF1@&wggKC=3{in#Yj-=lcF zwgBVJVZoc)iFR6EBi==ocT3DgmOAu#^VJl~3iG3#jKk9=3kyDa}$cuH;OjG@bws1{p1m;xklwVcOY746Pdx6pzCbQiqY28$9Wdx zVMlTJ_)QB1dwif-zV|30pn~=0@g*RF<_&yh8%g~%%W)2TeS>C?Z%6q+km4WPl2*ht?*@1;YFKr9 zt)Bd02n|gIOG)euW7;BSvOt7QwN8ioz<_ypH!Ebq6+LNM5jxMgXS0BpEENRYjxO63 zaaJ2~X$>}>=-kNTh?>{O?VoZ)8uBS6I3aHV!m?|pnGo<7b)YbZ-Ia?VPQQWWCIN!+ za2-YyWZ;9z6EyXAO2UcYWLT2Rb($GFfv3;9vtT9|88%IZ2#OBhXJ;vn&=-@T7?fO; zro=>09MYNz5mEtUO!8l%$N*d!zNcD8ctrR8onQ|8`H3ET4RiIXx!DAFN-QCfJ;lI=}O$!EgEB_}=3`dubgBv@^W|R)$>- zkzV(nbS(}Oe0ojG_g=2~hK9w=)%-Jd^KuF{;fHcG6WQ|*@KQ-FvfWaPHhbqFQ)LUq zV+XuxtXbT2YK*S{AatOCq6v6#>Z6k!Tl8K%+jobmqb!(rajp`YC%pwiyS%1^VcSRG zHF4W#LrG{vrlNy$F{}xaIHyG9S0RlMec(a)M9^6$*nmavDESAA#yVANe4F(_JCjvN z(reF$5h~TCK};2s&6s!mYft3G-f2wbACz^!Y|ZTQCt&G{e{bbZ4geVC?JG8q`ln&8 zTtY@}$`^+TtKsQ`1WHO_ztJ|CgJ}~Jsot9zP*=ZiK&f9Z;MCI|p=ABFYByILYD=7J z6KLmCU@o-1t9q(jhcJZZDw{qL%gHDdUQBm=&%VGaZGMe^KD#?Su!Uq=1n0l0>5jZI zVx}_%R4;4Z7~Fw)l_HQljyZ(Zyg!u(@=R*+`D(yK*zJO(IQ?j2;0$+~uh#F>-i#sP zp{yI?rnw^PO$rJtN;R*25#d5csKl%2ISS!0txeTia;C7`vw#_~RQ`)CZN2A6819Bq zmqmZbjYM1|aQD+mJNI{M`nrja}CS4s+QbHSvhtalHpvgYHe=o3$o3I2NKXw*y-2^bH6Z>@|gfZoFK84 zGBZ88=Iyem9 zRS)>xS^e}+iHLM*zgOp;$}&Zy-yp^;K?5(3m({ApFYLFeNv&So5^(#K6c_X?C;DRI z<|cLX!)Iqbkg!#LDZ2L#+m%=doFx~OuvNJuweFk`NzwhqOPxPsD)+LMhw*hs$9g2l zM%?~tnX;V{hfSVW&|FRViN10Pfk08Z7>qIlXw~8^N19|mJ6dy!P@8=;;s0@mr{`OI7g0E3h4p#7f%VoF8)~#ec~2{V@B!hvTR|y(iUr z<=gylp`fM5qw-FHe^n*!w)Q+TBZ=QK-vJy~4U z7Zd3HFF&088bo-n&$tf?6N8@4<|b4u8WY9Xeb4h~Dx0ETO8A!XZ!eZspl%}*8r*Q% zQrx4T=r{a6_EdgZUa`(}yb?zcCiYN1XpixM?LGK@{OsAX#jthWR9#z&pnYnG*7}$C z(}QZB|Y#@nU_xk-mf?o3@Zm$I?E_40&Ei_b%bxD)Vp=e-;vSjq<9ki|e(p%xmF z5pv_^tVA%0Z6Z@PA=;8(KN30qNFwW@B{CTC8R3LCC==&IGqy$eKkbA&3Jst+EdIv$ zqUaW?^ms_F9?sY2cL$^Q;x#k$0ci}6u=!Vd5{D1frZ5s1CqqlOlhznoyXSgSW|#Tf zzp(y7}UFxWBH6d;JFO7n8eC{O8 zU!jsP2xXkBwAqT4;_;>g5vD-0F9~Om8_ATp_pcK^3=5F+rgO3-%P@25qHHk2dYE7H zziVoJm$@dx$%q|s1A7%&@eW=&R`YXtBxT$`a;~Rt*;R6~hWf-GA5Rfb%C!13!kv`K z91*=F)QPJ})4h|2=-U@(C(i&e-2BSz57R4e?c$)MthmnN)e(bg)63#@f*E7bN=O;=DB>5IH5a zKT%eBF?SbR_sV3u!m?-yP5j9XP5quC@2sBNo1vvblBGSO`#=J4=Ws3Nqt&$mbOL$P>P zSKGa8HcRxv?)uQaDkJ}*Lw{XIwVt*kTWQkOAVDKKxU3(kGoVT1zQ$ebaaeXF8%Z=( z%nFV>`R_GZtB#asuO<-mcVA7zZ<>h1a)rm@X0K%U#_f5J2coXQ`}Zy>o|o)fdnGQw zF?xmv*AIuS8EOQ#pOQg@dtBXGcp+vUP=UFdO)zvT8M3_Q)sTyB*prP|y5e&EQLyZD z!rbJzg19v+PJwEl9b*LBC@xJ zn1AqpWNV$ViBU9S|IJjW$trMi)HaxFapbj9jw*H16aNoYMIIIS0F+|V8NVno&Zt02 z3W=S?SQ{X$i6jLA5v2iFFCfX)qXfZ5!v$MyuM2dNL*hSobLk|hn_$c3zIH!LiZUulsKHtP=)+GyoDvmusmx}m} zO)+$Mu=N*a5ydMojANJ`*Wd!?YFdzce$3uT9d1qX#Gd9@GtKJLGQ<#^@G=b zex#X(i;!bMdqRwh4T;vz8^Sv$@2PTtXp9{~~EPNZ)5yOiscsFPLB5q>WNaLr8!C>WS13zuQq*aN-pVz$=8e!3p zrRfqli46&&yM5vSUMvd6_{_CIV(rj1L z)#c{nly|EH{m*mX36?8r2XC{HX^gHSIG=&tpQ|zI4^>1aCbEj$#SnG#eY+l}2t%n0 z5=5qT|J&je8bk6AJVQXc!IETP`HxPsV~1^MVdq)hxG0jcYHjW8AwYqj#yhlJuf{N_ zA^L`?<4QP`a1<#a>>Ew;%+JQRu@t&5UA(|yt_a46DA*LlZmj`@Pk1{x*~P^L1|s&F zCRlu5O0v@bKrkx+ULMBu2}mU|9PwAE0>CVs^4@~qzSFc~IU;gS{WUUQHtC?J`H9%% zVEdXKjG!HU;)t#JTR9wfR%H1vwh1z8{mhdCCyQZVrFBY)@4yLr3q4{~<}JAOwtRQP z!x44Vb`0N<^_dIa>3dp869IZVOAv#O$&#D413@OPiBIIAgTiww)Nhq=dv1HgxA3678SV^OnD>UM( zI&P%bI(?c=p}bDYOUVsz=g{?huLmJ^g#>CU+3nWIF}1t>-z`_`%Fdd8g!jEIEarw`Rev<8KFRto=-!__9DQJ0twyY24z=Un>3l zTf#OuNl5|^f`Q3Fn$!*qmgHaaCt+8v(nJ?!p|L<*g4qka_dp_a@8r^N94vYyNtds{ z6_dF}OZ;tLRU~2qC}Ap>%4d_iKtqEbM>_6>pi9KzOQq0hq_O}vbVm%-0Jx2LUnmSJ z5*|4O!AYRymCFO`G%vthsa^6v6rx@%7;`;vIY{(@j zlb+KcM)D+%xR5PS{x7DGNrk3iH0a7QpUzC7frp(~}$LHCYTLvi$h#n}Z9%B#~*=+6&=dDmKJrp_+Gdi0-;D%!iwCZCd0tmyl-(ul0% zPi9-eWqLTkGhBVuy!ZHwawE<@b3e^S4S22wCjC6iiUfpT!yCT~Vq?pN@-@W{_Sn}=W#FKQ3wIx>1os{F)ARnje zhzwcGubYz3WS9HBCl1Bc`v?Knt9+7Sk&{0!&l4-pdo41f=%7Y)WMvJ1OXC+dAH?0)ZjaFIo~9sE zq(-z|u2ZC?m<}+SWKlX;pi0IHK)2Bqa8^f?XEjxKQ&+(7g82o;%)*7Ccnc^qTnnGm z?ww%b!jph({jEy*8MQUi{Df6Uw^yY?XRcRzkFE22Ff&7FF_XQ-Oho`t!tv%&iyX_d zA;S}(BchWQ`j*Gp;yxwJy));oj2NKUte*+68idNX*^6g7gfTK(ns8FhRi~-lhdk4a zFc5@ze|=1Ym6YRi*yMG3a6g`-=p&^m7vl`y;o-htQ0W&7Mq!oCGt$#XT-oXhb85Le9lvw)+>3m;0HVI^-9xbu35xhwHjcvZYr# zSj+dLlnlw<=#_5VA3b_8;aA$kydm;Hd|uxa&Z!ZqRiO$!UAVg!uXORF)9O+{LdHAqtl8k`y1o3W8;+Ci%KRp0j>R3P z%iS3G^_Vu1Y242Dn7u`s-=E@`od_Z;dcI&Cj&!ixd+b)+Ecv5aFNPK@;yWDmLp5L9 z-4nXZr#GVgU$#AXk-d5B?}hjI+I@5UtOcFytDRo-dZWzAfi_X4d!tp&NYUfONBDt+ zGGZ|ldh|eyPSNO;0%#bL%hn@8;C(c&i92y@))BaY)XE+dq#F?)OC-A;gv~__13BPV z7X!)*I0FM|MUu!<9x(=5Qlk&qC*Czx6L-?Wf=f-~(hdY7R!tO*$U5@Gx;(XXD(c?t zPO0Js>D>mExu#V-3v%Pe@Pz|bosI4PeDn!q{xL&t!Pxhx!c<0SrhZ$4V(vk)!vLz~ zJk zhFB00XiFiBN-lt^e*8kUWLTg)%F<8g>xpndgsnguM{;5y^(VxLAi~>gJ`P2~(`O=bs z{2C8NH12MJ;1Jy1-Q9z`1$PTB4FrM{+$|8?-QC^Yov(Rk?#!%p`$w;m zN2+R_Q?<|D6{(~kiHv}c00stzEG;Fb0tN<70V*58!GQiXGB9<)zz_!2t;S;CO1YPRyiqbT9FR=}Y|oyzjGIHeSYko9x$0HQmNh!Je>_ zCKLmyz@+xf5rW$EjuMooTvwq2!0-o2QP{Ncd{T2r1p^khUoP(baEByL|4jK*_Q$Dsk;eysZ4N)K*VsnsQ*ZKQ~u&EPr z!&xMF4e(9`#+N>X=?F$DPv+!>{WEx$jG_$s4>IpzJ5qn7MNw0W}d^st8!3~he@SdIO#E1~V~<7NlY&>*x19~@+CH0`n* zqTOsmK;Wd07#vzA!LDYX-GfF9XEKrBJT)hn+C`pu3HfC*UCzr|?zT-fI~d zMUy8?81ItOagW<(zZVq4y0`ud|1yqePNAXpRodbj0R@O9F z>?Nu97GB4!6Z$;>UMv0Tg!O>HU3RAm$B{cS-4$oIzjp?Y4_HICwmQ3t6!7%Ybm_e@ z-M;oU!rhJ*xDFj)M~uEp-) z(*W9K)0K;0Jn*5D%8|;4&_0lNN6L3*17^rbnqf7ePO&N{tO{tkLYP9H)%YqyFy-f3~r5BMG*A{yg zscmuG0lcxl`?nmgxgawk836;m=G)Fr;f@1N2Tr1Y;a6e#;H#oI2fVlQulQcweOTIw z{ZRx%{G+m|qyY7B?jb@D8P!=zLdE2+0DAy6T%DM79?Dc8u8bVHJp~Xz5Sbc*8sQ*K znaDE)tPkp?vZG3gS4gak@0TJOQZe(bCEyODr@SCri2pa}G2}pt8O70;jxIk`aGE=- z%&0=FED)jG7u|HNxG@L!)qh_6Kxx!!xW?WvpWez zEi?mlT`t2T(>B4Hshs^Q3*OleaH8p5CbKMme9C<}o^Br#eMNm_eMw%9Uf^GOUXA@p zKHlC@A?hJK!7Bnu)_ViEfMC%JP>p1izNLKw#Im*v3-$ z@Qrzm*;vctucxP+r~ac{h_{$=uVsPs?w`REWd!Bb;zmn7S5H^B+gq8q6j%*~UakJ! zex#~t?FRd~xjBR~RgG+YiuSPPeOG#F4)R8Udp5Rs?V-$A)83Q;`I=QNSZ$`o@l*19 zvsO_b`~J#Xb%F2P=M3FU!F%SR;S=|s_2T=%_)|aJ z-FM9NK!1dDjA{I%HKXBW8P!5C`IWhe##qrmP`xXmZLzJjt>-24ehym(+X5%{;$dS$ zMT}|qtCxq-SYTP-NYqypbW{Y?Sm0wyJ<&8@{lnzJbnoiP{|v#wqaA~R`{ zY5Ek^v*-EQ{O7rkxVbNV_HH6^UUBM}Tt4Do^IucW%h>#RK9)GKxW8TLTzZ|4oi|-O z*sL>nFYptwza9D>zbvJ&xqEvyVsx{Qoxo)7 zkOs%xb`*gCbFc=>5hGg$pBn2;L41D95JvpA`fn4ks+5M=GhC;jv)iD$MW+E^;)}{^vwgBFV)3v&Ky4Bpwq%KwG0e&cB zIJtKdCVu-OJ<4}iVhi(O{Q)MR1AR@zi^>c_u$fkBTCQ61ay+IEc8tbm4kqS|o_3BP zKnn)O=g9*q+L^l=lX}|O+Pm<0@{|9^f(KOoY-S=S{f~*O4L`Y-yb`IXgR?p5H%4|w zW^w@pQc_YrXEO^P6)}nbb_dn?$t_)79eJ3TJUl!YJ=ho>oWC=%aC37rF|#tUvNC`y z7+ikXyBd2k*t<~tSCIcNj+nWNsk4=%tCfR2>1SMH69+d}esc29ME~>quW_1tTK#V( zdzb&77HEP@pFK=0jLb~`6C32p_u0y$WaVjYt0iV-XKwET$|1nY&BFH|`~P3h|7QF@ zp4$KG$<6V99sf_qe>?IqeUAVC%;&!r>p!ia)e=D9WBQ+!7eHXLj=2K^`%)k+CamTO zex?s^gf%xO5D%zj3W0(n3#q1r>TWNIfYb2foNO5(kBXRYQLB%N+SZwEERJZqBT@EKV72zaXOzgn}?53Yj@O193AYszt|Nme>_E)zy{V-8r~l;B?j0#HgsKVu+De7g}5S zs_t(VSlxGW`1^AI{#`usy~l)=m+0p;U4(t&dsubB#KaU!NJyB?6G!ygiQzoFJ(@}W z^-E^#*SCCbmwh;C-`A^wdcQZXp1Ju`AP}e$N6cSfSMTt;RBQh8TMW{;T;9J?x-UN@Z3$1ynN7XTyf}FyI=aAy%dH@ z!Z_dqX=_Wngw3dDxl&^qUQu2?3XG38@;+-=kh8w3j3UumnNbw9^WIPYULb#q`sTA8 zNn>keWwmZMTEG#_%fqwWziZ>%sMFwaVbEnYSE{ZslJ*-cw$1CV7MYN1^8WqhVkEA+ zwXr4op}X9GZEd;1rZDcG#{~!PS=imXy5m!t4nP4?QUbDDjk@j*zM@m7Ue7A8=cQ51^K4)dmnEwT z9Dd``(b4@-j=iO^aV}uG?P1-9*Xy?U?D?b$N2D7vgdmt?)se59lx}eRH?R1|&4eI< z^}{&hb7145P9g&6BiCA1K>>I#5rc2e>v|Xupz7!fsrI|c*cfaUd^=KX)ULA_5+Htk zdwCd9w6!fi^XPc_JsVjQ;7YXj@1ngfG$dq^m6dgOcDhjZY|r8E7EBkkr@mtsbfa_< z_0l8qu~z7esklVbs`Jmb(fhNN!t9&Yb{F(S$tNI;HzFI6^(+gTd2;+{Z~_xBuiBv`xy>;{ZlhF z!IVO+woc1R0}TrXU(a^lP1fJe9Q-jd;|RG;UT(5KY#}8`w=(QohQfa2NWDCCe6;Ds zaf81G&P1S5?DmB^aVIAH<0>Y&S?GA5Q?T#n4tNUraY&2uD;l#qtit4d!O(v#=gr(> zp;F&D-x#?jk^uu61AI3a7A*;ZGiywpI~=xuC2tTeDRIp6XkxZR)$U}zqAoDtvihoJ zT|vp^V7x(52|!EiC^sKY;-^(kcWQcV>Rb}pYTOfszm_HF-<~X-vC?RlbOL&hEnhDD zKfZba*w%LsGnO*`N|>X-MnzACgWeI5XQ#1m3vf??%(uMJV3QK>F*aq$<;jLTxej1^}h!TNfyJL~N|SS(CTKxTA%dwVZN2(?@q zsUIZft~pT*5ic-}ulJ*`mv-UiI-v6JHi8La$fGfh0db_kTep*^C)d-<3q}WcA(D@j zXgErWyWrccDlbQC2$g|Mv8?`jH@Dy2oaRi5rh)WN9Hu|+XUodrC`1K@ATx}vt>^Q9 z7f~2QZ%Pl{i=B|De54De^szxFfto5R=v(hwUeR}P{;w2kJ~v}3nIyI*tT_o3k_fSv zbIrl|G^Qkcp3E#=5!tM4Y^vvI9gBxd{!fTk!HROD)3DzRY^P|VuUbwj^CK~6WH!>x zA`HJcPoz^GR{lFDZ>*1yE`Y(z+6+L&AmFqwSY6e_XElR+dU}c!#CkAYuD6kJ~>(p_tP@K+icKV^&F* zE=>!-)%0{mXLj9<$fPs^WDq9-07wlx9>=H$7V|b7jkXC1`v9k+%tJxJ!F%}1nNOk8 zVK$ff%-xW=PzvgPLnFp7_h)D3vp+N&Nfl<}yf#}czN!xrp)QK!=u?s)RJ&1=0{{Ny z`d+jrES$a6Qnob-P2AcVe}Wmwv#zV@bZd6%nNUL|X}dNwf@lVx_}#w{Z~@xT{O8jG zForkZOWN}RaMJMOHFpIlXS(7R=i32s-&)HHBO>6MJ3P1EPYgfE8%ZNIVXvJ;(X#)6 zHo4l3EJOG0ot;Rc9thTqre0Y>d^U?{xu~O=qLp+K&0_eGcK8~U%Q;8~S}l|`y+;wV zhD`?OE~qX6CLDNdL~1JP@?V5*cRN|$=Lwu7FUfEkK+}yYEMy1C*~mzkuP}xRoThmfVBqb_#=8a_;va$a!^c;It_%@vf@6FKd*ilE22>k32bz1lp;{O*s2Og0 zdO0`*|12X+IjxAP(ylAe>$z9Q2ZtkzMUN336b=CuA{_x6>WkiMcyHDm&QUD~>14zW zji{V{($BNHq`7Ep70|@84AYgLO}e%>lDaaaxQ00JtWG51docbRUKWnNqdFvv%5q-w z&=Ez0G@_l6@Cx9?;||~i{sBazcDLg#iNtxsE5G##oM+eiIUx!2liI!;k=>y8?#r$A zfivmtAY#i}-fN;6n0TTr7Ja!vN%IbY6y(#f76!{!TKX{Q`R=9(W$4x)LqMY->fp~) zS?25KJ+gqqMYD#sSN+{Em5XF6jU8jJm7V845S4IGCXLJNw0LAC=!*L!gV(O)s(a}~ zzQ*}9>lVXs+d2ag^qf_>7SAZp`flMQWL{&2A8Qt{9nX6GVB4IKR)cG8^`&>(z1AEgrXs{8y(DzO9!kpyIR0vG)?#wDRAJ2?+E0EmlQE zC3)CpW%+qufkMBtyobVsbTe%J6-=C1>B(q0fj@!S;uBa0J3LGZTN$%9RmwVNc^YU)2A@&FyYO^!F!@}%(-Iu^lMvN|Kg z2;_c7mi>$@Tkod(8Cexn;SQC^{){qC1xhncwcNr0%F{^;5t9`S;EMhzteycs!~hw0 ze!xKd$-XD(nG_OuiYlw7g++Ym!grf(Ne(?EHRt% zS#S_Po!k+6Cw_W9Nbk!%7najeqmaVVc7AIh<0$)1rY1i z25S(FriM?KpK1Cl6jAhH-Oe|(S-LtC{OapfAHsNex_F59|GCe8@GA?GU|&&@mBncr zU-s^>u8HzrPNt6Yhc}Iw(nDB>OOzG6@zAv;$Cn>F9$9}!JVLM+va_+WjXfWEHN-&BiauWBWgnPOH;9CMxLCnbcW8m82xA2~wj-x|l#L%iHkXaxJ6 z7Xn2_?#?$iS@FOlB0(?x)Lr~*>`bpzj)Uo`K6C_UW5i5!u{+&7t4KWgG&wo``j3O( z=$f!w>|_zy5JA1Dd6=cR{Zi$)6J%z&k{iPIGD0$>sv$Qag`tP84sCb9Wbs2SlvXNn zG;544c4gROl_C5&!+7#a)Syd>ZL;B<(;#R>Gm?SM5h7Y{ZE-ByH92BJN#)mh{1qmk zjP`(!2snd}rK28HUv-1pkU3#t`8B&8kDi$75?C9{VJUBDHU!oj1~Vg*fR zWY)<9MN&jl*V;K((hm^^f*EF-NQ!g@v7dK*_(iTopCWKcgh7}M2yMFeR)tN#<58>W zSwh(kI3q8y2w8%_bu!l!E`>-&kQ}oF6ai%O=4*}Vz~OI?^#og_!B`_q018BDK2oi} z-`m|n?c4PTQ{;`)9A49+mZqhwq1Q+MdTa7^66gBrUAmkA!(L4GG)IX2DUoQKsYqyM z^n&q*`#ur(*+z-$!@@ z>cpJ{r);2HfrPJueyrvjQf=<7h3~Bztj%|+xGy})2I4mEyGrhU+zl~E@y)r%P|B!M z_6)L|i-oXpDTB_HcpEN*>GF5;0daF)R;{sy_Ak+DP=aPm0k~{vvr|CbBD96{8`DVW zFx){@fk)CV9TCFySR!6@V05)bJP;NrZDNu)`*B=WB_n~-3noE=CPu6QfI|vAaw1ac z*bKihlcxW!7qLe&UD5eV*k`ZNZY4?)jyAknEMx+q#3nuloQH~(UMxt=i%82UA|VFO zD$;`+1;O*mfR4u#YT0o1_vUcYM5yhEMxeCI8_Z|?-Q zAYmRjIpNta@JgI1vstLT4xoU9E=M)1P?Dvb%>_D@3Sd1?*Z2>f2MApQ%-91U@pmCj zgndZ~pl~(wg64M?wQvT9l>v)-PZ?@p9?2~CGgrTsV~xk;^lOtO++*^ZIq%4{WGe@0 zIVNf(NY?TzFfY~^la0`l*^Gn2qi>K0w1%}yJPIc&lZ87(A$>z5l-gBSjPOQidsNhY zFmRFlk}um=gJCAi7(HEdm7EhBYlGJpn(+GSCXoCn=*4Bzl4BB`>eYOt9d%I8mvn#) zdgb^8I~n0_8YeMtr8XOOtU(PBffIu=U6kFLOuO4bq(8`k%ASi`Mcvbg<|Za4ha?}5 z;q+CTU%OS1G!RVgK!T~vGO?cE_i@52cw-Vglz1ob5pK#(NM&KdAxM_&nu6eLgo8~k zN#d_sb`pUUD_1%@KRoWY9@6WsN#zeoWHDgXUVcU`cBp39j?O5DJwfkM+5UbVV3jc6IwI}Mo0u_1H-nxuk_G?E?Dth9dc2-YB)A-sLNX>ol_(nCzDEZ6qQR)Ao9%SYq z_2&hp)`dIE(DHO=C-R2gTv-KmFa!`iI~wH6-8DpGEa2cxcdPhT=kL)~WPCW|)jU3W z@?5muP=+D{nXC19+>R;dqkhcY&2(FL75VN2qh1Ty(`u2a{*g=I({Z$Lkug3()Dc@I zr9U$2BHbcnbrZS#4ptqu+1o#jE;vXUMTPtQHlDTLP|64P1WpWgTZ9+DH8Mq$->{oT zIbo?@g2~q3VzaW5p-DGV{eu7n&5zXhz5SNjpQxum)nu`7?$wRqUqR6L39j&rq&UgJ=V7-HHXux<0hIFu=`-w2!!22 z?UL7=STXnQZCPb|!~$&jh%rcSm#V3 zc~VrB4E}$&U{NsrfyPeI2}*AWcP7QvVv`6|gf(m( z1#0#?yPfS!AjXpwGz!eB4>a?k+R&vjP&w`5dE9E!BL%Z@14VD-Lv z5C-IGd^g9JaX9?7;NG`voOmy$(5bR*LqrBLJpJlZ8oP~+SMu}9X#QbK!|kX-LWpAW z7QjqF5oaqcFDCDBeTZfQRW8vbh7vMwed#miVm!f&k075td3fmzRUUa^>Q*j-m74@+i?cSV)+QcB7PoeRNor$pv$IxBO4~e zHQD1v9fx^NP1hU$bwz!y{f8g=c?F)-rq@%YuBDqf*SUmhcB|0vm_tL*(c9&60yHpb zFB`sUdIr*ge(kO!Prr-xJ;AgqIPb@FUzP?GC-2+16YqNCFxYRUqxadxzBwt`RV)F; zD?Ha-_b5@~P5O;Zk69wULp){MTEvwpvzv6Po>dp-DMsLg$3et`KqQHpCS*tB4cu*9 zL}GOmpbgE~J|P4wa5)n`s05EKj_d0|6!Dg-Fus0Xkk$L6Fy`3PG_hW5sDe{DHCvlD z9j@39AKM{!1!cA0!eX(h_DQGY#cZ)Nr1E<_Xzuqch<6sgWr)Esgw7uKO(m|RO})ep zuwC8Q%xHZQFjH-*)K0@xBxKalvXOPHLYqA)FYr#eMbU>6#)N{;!B{Wmry9rnc>nN! z)-mkwa8Ty~P-;0SsU(8_*h!C6qFLcAoD1p+jc}T&B#`kURxj7Tq=~r^b{a>bt@_L2 zC*dzx6|NHzu98!P{DMX!(;Wzegl)yPp2K@Q(1|-GAH)ty7Dq#ybHEU*%j9$*=D+I2 zK%XQbvqalN6RgfLlEzCyw_UD}1Yi&xFm=2-58-Z)b477`YT-$q{=3LyG?|japO&6R zz;ezsWozqYcDFP-Uxu^=e^k=*%<1J4aJt4cza+RzkWDNYw`yfD`?^JXQa=0L<(I^58iTny1Gg7KQ<>XG&s|q0 z`b5J!a(&pxF8sLvSrL(8@?yQX@yxeic-kNag91F(?Eo9JF1E~xE@u@prE%dq0DP@? zkyLY~W|Jk)7)!5JVIcbUiOAIUK7Cu4q-%wdOk_o=E)k7apFLeks5h zdIF^}gK=}%ty~0gv&9(xa7sHs6s? zw+cesa>gt6MW<|(2H(@@?I4mhCjFkkQkpbdl?>UysI}jBkpObNvMai>yZ-mNhU`^= z%*w-=ES`FV?Nog?EBXrJ_PlWVG7I0fB#k;Z=HyhG{u?~M#nKiDQr z@Dj%hobOh?T9Pnn8W66@4)&xQtie*+^s3lDe^9yHtCQ5|sVlH2JEf%_vyR-|hpjGl z)y^vCE!N_73lzXn?|*x7mRIHA1}fu;Wm3t3hQbBoK-eQWn^hr7-%wjC%ayRRq?Jo# zj)k{kJL=MD&e|QIMrfS^9N-%K2=xU>Sy=or!)J+w)fQ%U&y+M7BJq0^CY+ao(9U3j zcS{RhV4t^Z{J`jVDxzN%N~LNl?DNP}tOk;f-Z@At05 zqoT$bV(N)D?1c55qHniers`RJ;|y(_>|txz_3EWFm;Y(IQmtPWxRV(o1bY6OGuI{i zBksoVx>$e#8prCo>#O@%z^UakNiKd(7blq>2em#ivs+|tn<@lZo%atM%)gW*x)mPt zBy4TEW@Z{)ABia1g7GjfvApN;RCxU`u=gSG7>CUVSxI7XTK!~icb{CQ;p*n8287@Y zi1$A$slj2;p)4Y-3opL5SiyLWZ@Yc7|J7~y^SmYJ2G3uaihCwM)(vmK>ZzUZctkd{ zc?V|dSv0a(b*x(JFxAjCwvuk+trtHK4UPxXv`Rc)r)h{P510z@ z>C7MmI)}K)tYFKoI?-=86Udi6iaV_kg)@y|(s}M#4dT~26P+wEy6~(RX2V(EE2AVo ztVs-~H#;TV0j4^{ngn*Edz%|Sj!TMm5q}*YfFudorZx8r!tQk6lZvKcfk8yt4$TT3 zacymFcDG|Sokm+cvJ3{Ob{exzadH_=a}32|$@W#9d`~Ehp&kl!5Yu%9bxWdvD_56# zk!zY+`+ffq^To>VXsQ7EBqVzI`q~2Z1&8?2r;gt4l3Bn*ToE64Vc5B?P1OF}r*zDB4$B6fUir{$iPiJxS6xV^vN`;Yvo5ibdD6s~5ye)>X$8%&s z<@2)N+g@#W54w4~Wj&~V^MZ~(UYP_Pyk3Hrmo)2Z81Ymw^xBt^s97?w# zItZA1Zh1aJ!<$nt5vckP*|aJy^b)-)E|>Z#SJDfEPJF8BW0`Ltn#lug(rlN}q@@)E z4~*bJWDF-aFrs;t;v-fEw3qlcQ@Z~KkAh^(}4<)yQN5&C{{fQkYjIbaq zKICL{B{Xs?PUb-2S4BYKbsdkoKVgJAh{dW!(AKX4S#Dy3!mkF&;>?4r%0I2t2amlz z!#4penOVC#q9oHaLLBDu{xf7b!82WCM3FesfOPcU`78n7mXH%Ap0AB?EM61*50fBG z%mMYUgE;S9#f_seH~+gEojG>0s)-tapN%fT)Z_!bpfo;sG)})dVSjBlH8_6`si-C! zvRts$6{lg>7B}MA(@uuB#Z8DF>*~F}N`*G1C)pkDp(wTTP+^Mjb=2C|w?=6pRZ~s* zIhQ=v)-nkR$BCs2^OV{B6Zcg;pN}z!j8?9u$c7fCpl8rh31sp}z%FG71-zSQ#4X36 ziJ{*3S;kq?#@UQ;2awbjB1v1z8w)eQNh6leuJEc3#OeZ-4Gf6o(^*4VDr9iPG}^Dp6c!eub9%i!-`$?pO)QUSwaujU?33GHK^mUtWD%sgt&CmGAGfVQln)wJG zFY`nYn{ja$bxh+7Y4!o_oGxF zuHK?f@8TF&pPIrIAB|Y>aTC%DB+-UQ0X&J1eD$!zlta{m)P$rCAcdvoJ=TAO#|J{% z*)G+FdflJWhIoLKv&9`{iqn%c7E>`)R<)WU<%6G0?O(h?g7us1f6xBBuIhOALgr6I zAT5JmmL%%G21&lN9sd4P9w+lLj!44sZ(YH#KZ$cKg!`ePp`lD9J}_A*rYU;M^|`mU zt1wcExMyv#|F4F=bCCEEU z`O3D+DcPkaKZ}#!nWHgCoLvWBnB;P7W08H6MbmUjCJsmq)&T|6f#QH3Y$1TIs1pkr zpL-rgX1g@#2odK~sI#eN#%w@kg_uX?Nu7$J$zRFRxba5NasE!{-4}(0 zo`|)jelVI&!IC>CdRPvgPt4sP&tN*4BP{W)!C@l{Bx3Q<(bFf}5`e&4?JfaTcBX2h`^=|n}iV=B`;tcMB5GiyGWV?f|fEueuo1fjija~VI9tM z)&Jw|HH?ShjoqfHKfs1N;O=^_7hHsL6{)R`BdMi^3etsOq^ar2^LeoHnwdu<_?kBk zYo>hf2Y~RNV zCMT7i()qm`Z)v`LQ?K;BXnP3ZQ-U1?qYT_5eLwXXj3=9IwA1CTy2Xu~&S%0!-S(RJ z&8s=BzTth*mIw|N!s-9+Gd?&ORM~c~1ECx=2~y-OK_Z^Wufvz?y^&B$^|#nF`~lqs zf%zM4Ilhh$lYf}MJzNZjzWm%<`I$;3ohU#bsK&^wP6Fa!y0GU079Gjz=3sA*qz$t4 zXSr3PxH>Bt>vZ+q`_hRVQas8Rc1*t`nUzm!r-w}wyzCeH(oa0Q$L5GO(^%fL`y8(_ zrU4u`zBChJyg|JLg;9cr=~lU)R8&7-RP$U&b>PjYD^Uy)xR(HlgbhLbt|<7H3{v-g zFPuBV9QgrM@!!ewH*+Nb-tmd~X>SDB$-?^8VnQmkw6uv;NN+vY$g)BOSk?=bzuS1? z38W_FP)APwWZ0vlAFsFZw%tz4Ce}~+)ff*vUfjo*!pZW)<4YmQ3e`O$r~a$|_pU@1 z^Wn*+zrh{ZS7{ zi-^TDcXD>nsG*xS_Z%iQ-;DE`<3X1p0%M1#TRg8!nlVwiq~P1B0I)#0I*YM9a7Y*>5Ss}Y zn9LOo#oVk@hNmWCV4g#jy`VwMk44#KHNJT~#2&C60A3(%mOl|~3kN~mA zDQxI#LyYw3dMZudO7%4xrHqkb@A7dxax-3f9jy;$)Vj?Q2L_QJHv?IvW}?meYFdwbb0uAd&Gg>oTAH8ClgGTin+FBI$yGJz#I^s>efS4qZ^f&S<>-kepe`&Br~E>}GSe(8at_9QR zvUxpEi?zJm*lmjj`OT{jU7PpG@GoD58AY>91-hn@G@iYFgP&%kQz9PSo>t9tXphQU zva->DF^t0(xu~cixPnB1{3jkYVL9}x$C-|WBRSrR>+3~`R_B#n5L~Q1WeL*lLycjq z%}d>km%KIcmdJy2dB8K$bocd`YG5!?LAaL^$#FMMGlBizKoAStd&Td( zD|LlMdmY?E@TRGeP-N%*!lR`s%qPtPA2Y4wWlC~fXV|qPqI0m^`?;Yk%xfx6Y~8eX z7`nvm6ICK_rl#j>Rd6;QJdW;UjsVeoC_FD)sjA6G>EUNlMFN7CzE~1K1-d*sVb*nV z14~YD-|4AJ1+=s;#k=T4bZpEb)x|<;W8c&kIB&5H_eKjo1&1p!O&qjbh5z7^A+D2_ z{%E=57Kz^;?iF)hbM18%Bfx|@gz*#uZMtfm@rD4nntCYsEt{-Ql$zrQSDe~b12ja& z!{e=Z071?`Lrw?_v)%cHdc%iHw~Nn~{%8=<*6-uty1@=14+kRJ58*+8%r z|7LB^tN4?2qYxI-m5AAuIJCEf`o#k>`}K@8xw8Prjt-J&zaw$PJcfG}B=1dhUY>KQ`@B6WEv_I|DPk1#{{X!G|B3(r literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/tg_axNone_vminvmax.png b/doc/source/_static/style/tg_axNone_vminvmax.png new file mode 100644 index 0000000000000000000000000000000000000000..42579c2840fb9a60a592490256572f9ec1603391 GIT binary patch literal 12808 zcmZvCWmH_vwk;MkxCN(i_uvxToshCSD~J6mqVmxVVa} zxH!2A*w)O_+7t>(CL|#dK|@0pFYvHsG-P_*!~#4TOWj8!53g$3_{&mkii?ZE%+wA} z8V3_oMam=?IZ}>TLjniZ|2y(7G=avnJsUX#!(()A%7Rdn$4#cg+T*BK9cbmx7pGBl zsC&G)F=bzBD488oWWN^O{TS5=$7Q(hP=x*DXq;LEo=HE+MZV8(KAzrqe;AND%A4>k z@p*kQP>dzX>VSbl7UPY~=#Z1Vd%u~%)Jl#Gg+mr%RXnmi=78Bb36molV6~T0)t+%l zT-Oe}1{U*Q!FVBo5=lp# zT-paXZM~=kUDl=AXQwLANOz-H-J7|(fZ#=bG`zenN50w=(s%aoabeKXA~*Z*?WL{N zZF3o5UT(s`;HM1f|Fz7-{;_HpHaut-rfE=4L&Cg4v>#s|WRey!!R?~t{8EEmSbq4UC=hcWN9H=I-%J^!Ma2 z_T;GBzQ|y2M_)`f1kH9*URd;XSQt^dV3;Ck6e>Rraww|~sdCh9KhJV}VyN7oFvPGm z>u|j9v_)y_p;-J6zd|j;-*(U?L$S8wxT2c@5IaS2f{_Qr|3u@NK&6OtM^kP?--~vr z;NYOxlao>faEl52^en(&_z@}QG(M&x;0WD_j4I~xi`@dt9oEZFm5m@VFu~Af9_lhnywwMoSX3RAB>fwdGT*fShAY&S~(+VIHYLS0Uue9QAW~TBCave{@8z8~ZiC_t2n7(^jS z!RW!ZvQ)AB1Bums-PAVJ3DHWirO~}IWCLm@UX?_AflO4VfVt?VewP7TdYn-1o)j#_ zft;gX)2b|Lq^iQfsy*R7AB?Okn2k)0n2p4Z%DRy@wKrKdi+ZYh!g`d$)-ZhNMWe?P z?c%=5StruPStNakNlwg>OO=I}%ac1vEKX`>xK}`?&1K#J^dt_(@Y6V{eoGI{{-&~0 zz;uB24fh($NsN%OgmMJZvC$-9zzVAnH6?YG0*$U&hC<4)R*EWNXr7|+xSGYgYV~Tv zMOrOWE&K)71-;6MO7Tiga42{bd=7R4D}w8Gga@W}gtp^$ZU^cHMKnG!{$(sy0T$Uw zY{YiTOIk{LNG2L(ZqoO7$d$*os+CW|=Nij-$xRJ14C)Vh?uZZKCm8%0*GT^ys}ZU( zTBz_x?K8>ehCh;j_C9A#nf_(D!f{7x2@=lD%h@fV(Gd7ETgF{f`kOSbRj)<%`4_xu zmlUPwZ<*gZ#&YVPrwkl^oB8hwG5aM(XlBofmgtud*pM6}9%n3;ERH#DJAU9_;78?0 zccgDrYTR#3Xso*`cjtB=a1XkMzcTcaf0lUWcqW1`_f3GFTQBeU3FsZ4jW!2b@)}8S zgV_f;zOfN<<`Fy+*z$lrdYfijV%b!V<;-mt73NxoS|;p!anf^=RUMm=nAr|rk7(_h z?$$BU;br3KFfB6avvLCQ+K&1w1rjyneTFg;R}u}1i%Z}%BQ)b?Q)eq?cV;QMC%Hwr zn`}zwjjZ)->gJsny{oG#hpGikZ}VGXyehp~o~w5FhqjWwFq65cxP0X&cKYCyBhbtL z@|X7@XSdt&kE8oh@y4)osk8S%_Fn8ZczGwGsIaKLX2Gd#NY%6SrK$m(~R|QwSJB0ub3BzuS9NF!>{zFw{)#ZX(3tdN7N2jYRxu^sL zO{H#N?{@F|vPrEP(9FyXa*?`bmL6qmV8gB>6Ad>-t?&&eXSCKpdZcl8LZ4#AG7v$F zb$;}S;>M&|+|%%B3N9$1Gr*h}VbpunBwO9cep9}bs4o7Ie?Xv+!jguMt%1gkg_eU)rao;$WL>Fk%gck_VV0SP*=sd)i*L;@kKc>EHbi~g4c7dP1dGIV4IW{= ziRsMPIxV%cAWD$@>dm3^)bWp{wd3IViJ0H%RUgft1jC zuPkADLC#(y#(R20JR_{5ugz&SkBjJLBJodbb+m@cKE9tjVp`@~np?Ua({E<*WhH%Z zWaOmo!q#e)*<((KgzNdQ*TLgLN;CQe**B3z&!cCY^Zos!#j*M^YJ;m*N6+i$hp#M7 zpvW;Cwl-O4oJ~717%1CsP(LLAi_jAz-3cfUcWI(1sV_vnuiYqw4{2J^DqHVA&1*tE zVMKu8?3n`w`aSgFfAGW3cZzMlR)@o(T>j8Dk@#pNJUr6!86Ij5x*fk`dHHCgp@H?Y zOqC1tUpa&Eoy$PUt4G;k!Rtb6_}6c*P{P{pE=UB>*&qlu-BJVS2vk(yH@3B5F*LC? zGG%eKv4a3wC@4W!e#oPZsiPsetBp0tf!|e#;-4D)kmt8-RtoZesyJE+Q2-TH$i;2J zrsN-4xLDXIgptX~$pyhCX8dXrQvWuGq=YCe93AcWSy^3NTv%K|Ao90%4UKG_9EB(--W>h!?_d2i zb+!DDCy>LxhXolR>stvcI}01@|F#V=6@1I(SFv<8wFXL9+L(eIAU=dS_&5dsssDdU z{^Rk#EVce)$;bEKhW}OYZ$m-WxBmavc>Xn6|KvhuOBh*@^?zqx7&+zyHy8?v+F4dY zRKpeeI0GRa=krBhn5l~_OD8mg==!Hma7qKxR%O*xI<_R>-bwzT zOn%qlm`NO(+!K&r5R6E{hlEAVV2^a^v!=Z|FCfq&z&Tvr^y<}PUYlQJ@Z@w`cK8|n z^A8xQ?hx^Iz&b`hB_h>MjD5W!FB~xp8nk44Ol%Vd ztiizMwBd{q`_JC8K6rroV%;A-kd*(|GjLDX?3oo<{^_=%H9E`ZMN3KK z*^xlVz1s0~Rj=)Iu|_^h@*>}$%^Tc<`{i%VAu|~x+A^%oQf&ggdY)?qd>Dn`|H!) zxfW4KRqt}W??zV;!b#Jq@NwKZ`{*z6Ab8DUAm-j`C3j!5vcPfmcfdw5LX;K6D3^C6 zLtw5>p&oYqE=c4FFkWxJ)$TARlus7s=4|6OCRy*?wcq;kU`4WZu`|>~5~+oMxXfd- zn6uRYnUU&K+Duom+oc*Se70`PyQj-Br`HUZRixw~<*&Wr1is=yXgj=0mtV&@0^?z- zO^cTmoQ!&Wts<;W`^bETr!!YVa1nIKIG1G+VXWp#QT1_dx=$9Wq(cgA*IIpC zTW++eX+Ts;7-ULpSzEZy zrj$kK4O3}TvF3X44T}4uA_#Fm2sjse2SLpiYyHlWOJI2S@^af|l{WnD0yP0$_>Md} zF%b{g=G{W-emTP9zXN&Q3hKYlWxZVGe;Ugag53=vTnspErOqV+)dPTL=bJtJqn1;7 zmO940;S*lHeZ*f%bqUzyWrgsU3vAn-oqvk@!}~U!w(1PA&)Ut4AAPi%eV>z`k3jC1 z25O|YTe7LMB-uT`7-B^@Y`b0cz})(9xC}r+oCp4dUaWsOD5~)pWu05uiV`X1wOwuI zJA*7ULOUYI$#kLm_rvyBmn>ngYT-G!JAd^e&5|qV-exgbdTrvP5VxxdX@9D{7Wn$i zdNkvnj@nb10g8=#iCA>Q3l)tCml=%tCd^+x7!3L(luMNanzeV=D166qi` zl6W|Pl4H0%WHF%oL&3{GZ*=WS^}ViV)|2Ui-VyPm1;?`M2uUb3MPmnyxF9}=yxvC{ zAw>2{V_`-r&?h#iM-gRsR|L4$+eLmD!Z+~zwN(>fN=7W`NQZFFF>BZrXz4a8VxxMy zpPz-6yi?|lPMr(1``*o`d^H5yeusu@tchLprEK;PUe~wF2ghVL@(B#ol3{qbmlD$)cgd+dwx!QXS;9iW5n79(T1NAKFm3I#4cvAT zE|S`wB=tRxKhhhtHmAGortp_4i#%)C4J%~|YGoXUcV(j3BEZ7GAAcC8)+j*NH_!5_ zwR-mzRBCv6IIO8}=;(>|s|KxT+jl47qefN?w?N)wqhP7$WFl_saR(DpyNiCx zs9*pTJ6wrz`K9Sla`M*4IbF(?MyII?qDsumuM-ZSelbwP4jy}A(g%(KI7uenGnlO< z+qQ(`rMjW}y)4$k!omaF9nbd!91;g}Wvsg5&fyj7?c^l%ru?4?1bAyOs{+ya9o>H0 zt-9?M`cJQR$Dezfzh;NAsBx@kzih`TcPB3vU^D1`MO3o8tp2R#w(g6NBNIdMhgGvU zxs{nynSaH6sNUWY{62`I(6?g_wbjD#5cCg{ zcS_?~^t8uV(O2tM-Q~k2>CI|>vWhZ6_xi!YI_BUADzU?R1}igMC)lWG|_Ys4rfcSxBB8r^_mW@ zJJK|$@Q4fcWRP$=5k(L_up0%i0LTf7Ni&xrCn`ImFnm^)FM3W1Sorc#nP<(llCEIj zVb^TP9-dWTzVIxjGUJx|t|tN1}0Wi_j0`F(R3v4VhA{M=!{8YpKTCdXVSvl4f?Pa0h(}N!4%MM zb$BWMI4mqCfKlgG@oSD6ZjW|iv)zC0@u#V!&68GVSNLQ59r1M|>?P2CydpKNUzQv3 z>|o~39Hu!kO2QRoI#(XwiKX{3!9nRcEx)0)q$7kN0@&Aj{-ZDs3&D)HLP_Zq1?sHh z7pMm#cr>GSlX-Z|)_UT~bT3SuH#4@sp^rmpb=lN~St1P9_S%9|2(ZqSyDYYPeTZR)7yto-YCvH6h`h(v;e ziXn%GpR6r~JluxM(nzGD5dAmVhd6_iqHl6AIEPn5N zzc~J_(Wm=K`8CKOJ=f__S41VT_*wSa1D!bDJ4!)S$mVoDZo(}4+7N-jpQAw~7DrhQ z)jJet@WCpPik_+`R;^m0LX2K*ep(bPPu9idP^>mT+`HMXo1#;o5_qp3$#IVwR3I~8 zay||m3#cVGD@Vd34V7NLLGqD9)b4b;XexxnK-GbO{-Ex5nqUnkvG1I55tJWlXO`dr zA1H9WzskKoaz%k#xO}0_Q9b@vd2=12mg}iSO>>1Y{PUc(Uk}^h3RU@HcsNR-A)@jn zqHp_`aK=vFM$~{Rwl8>ln?S8n!rrDz|9VhPr8>pJtNCn0iZZ?DS39OGda_LPg--vM zCFz))_u2feN9Bnhe=S|9fcB5;Hycb@`i75^ez z`GH=1AxYm|wyg?Xf1>}zf4|asihF;t_D4K_ZvC>Itb6QvMGDoc&eFnm)jR1 zF^zFVT9abvK!W(k$d3+G+YC9K#7OS&19qN}o%;d;*1p{!R`R*N+XS2mnD(1JKu!(^ zHxWFm7;(x>ef5K@wQ@L&6exK@7R9OI%{cdGS%+6Q#)fLqj4e`Izvn% zoH7$SbKYehUF3y}tx+15K1IU@jlF-UsrkBM>2&9rpd)E<*S}_8cR;7Nj_%N;f{F-PE?{DJY6N3TX-EdmxwVW^y zr56`sbbfL6sJyDplBw0g-{qUG#C$hU``y)xlTRb!fXzN%(VMlVKX}6P@wg6U7Teyh zNTZN~iMj$LLmQB*R;0;n^J?(=kYt7dkYYEcnitZi*&3JL-O0{n3#4&6qE<=IHxv0A7=dWU5n-)u(SM{*hjW;u?J7AF;tEBF^ zyYZa}sFkv>gdD^rEDLS?(GA4A_@y{Xr(*R5EPqNO-rQRB&g86pEHCkAN1q280D!p6 za*Re;G$7~Q4lt4*P#Od|ft_igb>fIs9Aak*RK;x_In2IBM4NVbz#LtSqICprzb7GE zphI+xiuQ$;{zwJaF;1EzPVUIaT`&B&T1#G{0)T3|mR|UD8GjzhS*_&!7^Yd0q)lQQ zr~jokv91pduQ*?3`|pJD#o-I=?WvBb# zN6+l!EM~%w^oA|uckx*Cx)QHy$hqqx3i$MYSxsd${!VH1(en5D%_$MsgW|#9mqvBN_j~)nInr`6KUsMwY@DarIBbx0~?Zf{sQnNo5c!1@SQ;SQ_o;u zpha&SH}TPD9LnqcrAt+5zoew~vPEMlVxdsZ>z#uic&pkL`&Y@g-sP@{q?jO% zM8<(1YBjRN4ep#`VtMU`e1DWco{G)L(&aQ5>1z$Y=0>;ff{4J{1c_D9Oj&PyPBQ!x=RQ8jU)%#rPo z6BZMdtzT}SYSo)2)#)m&-D|-RC{_}QE7_zEZvnpmmGm!ie6BVU{X=H^tWItUv-CW? zO~LY6D?nqRU7jSF#mk~JixwB_JKbk02DQovJO9kTil8O14g^O0Tn9vx(D#@a2#7Jb z;>V;_h_9OfFtF(}iaQ#--&ildAP5@qa~WL;CxLMzgwS4Y4_Eaad~nF55XghP%ICuA zjJR$222A#+w&3?L=v)y4o@GCE_;-xVzriF%L|%FNvUDUKG>ErPn07haEO2j#Ng1#% zL$92H#`EC~%s@Cc`2QcrCOiifgC(Q=b<67!$Kl(X-6(`uLx#q|qA2Bl-LF?j;UGl* zF~8h>a*l1pA@qDTzHV%!R(ke7dSQnu!`R8*4s9D|l zxltJ%JLV!$Rry<5b;}AB>L;v{IVGdJ%Y)^bi;;shqhGem4F)u=O%kMD4i$pW8{mZ+ z9;-EsVz zvgMz&4(1CUK9I zsD4HYZ48-RdZAqLZu3qOyqo|plpAM-yh~6G+v^A~o0I)9*^_Htl&VJ6P`>aQGY=9y zXY69;dmeI%HHxKPcHPiYs|5ZzA^V7nRY*H&EVf+^AcT3!#qyf8UO>kUrXx07sGNSM zD}y3iE&Mwv_*F_$E};EKbU#ZaXr`@yQ7vAa>wlE6E{(dy+z&KFYqn<0*pstW?5GU9#yswDj zZF)mgv_1zLv@>?iD7#Rd4VL^pxa%#%kQT9%AoD!RmZ+N-(pzcl#?rG~i-GM@HsiL$ z8Fa;~<3Sv|?GXx~qORGhYYmyoRB72Gb_+^gxrKn5e1;)HL?l3*jD<$m5&s~1U%!PI zL*A5<0P~bxNU5xed~rUhkh?`DZ?=~A^~@IzpO>evS{z5*7UQ4#3&fUBAgtwnUoR9o z-taDpDj=zae--$VAm%X1mHG5FW!RufFLj#Eyc5=8&NSG7K!+;VPD<&a$%LSl1 z<=acA#Mg6-*^MI-@%Chs#0EyUhHL?4Or!xW#6yi1ocoY9EdnHkfe2`_hF?XcR>3V| z$N6=8zHkXl=Vsvz>U8pEx~!J>%-0ZzuoY(J!8MFstDuc(2`nkrmR~DfFw`Vz(<-Pv z9_NsbOgh(5q}I0W!<}PRKsj&cGh_?4GH4hFcIR}@Tc=y6DbeBgrj4@zVo`~>EzEFi zrA^okPl#znccsa^a`-l6%u+JSLbxkY9`}#C9-o_V4|BjxI@_~WuS{e@-nMLcwOOmP zdZv}B!}rc1)%EvT2jiI>hsK3#rqTBCr-~K~)NLAy&bqXXE8mExUZ1~ZNF4aXhV9`86N+G%n3S`9H>oBKY~NZq9sn^i zsPQlO3oJax4dY= zs29%3qOPsHqFH2vv{0oso~!7Kycw}_%hl@g=`gswKK-RB7HMXQscmDIAR>}_E=uGS zH+{Vyf2i&NFIKzJzK%(^PCW^{F}Uu}@U6HPvJY8gj3wI4?CA8YCCC)CI)?OvulIju z-@`&L@O)d%$@yC>tm~1OV3X3-SEA7LBp<(8n$;CCAdRmc54GC7@s}nogeN0fne1LJ z;?;b<-0-1y1kLkedtkD~!)2vs-)l;dXWvC8Tw~W^Z!|$MjeDTo7uNLVXhFF_TZdutT2iRaqn{9c$}liuHqA{`-?zfVz-34BH=N zd?!#`_iYPb@fBelRF%g|X(3Mzq8sCn{ZN22ZONYkm^P{lO?hevPnR<-%R8 z^Iw;fGVW^zxt76P(>(gRIo?VE0~Nie!Q!>nFJ_+CTjx39*=Uyz3<C0Leeym+aj1G5odv}u?01?xhTa<}Kan|I4^*A4k{E#zuP?h`g_mgsj&NQ=nI zsI*Uy89{6Ifm(;Oi&D=AG}>evyv7u{h)d2rZ>m(#r)fmqEVb$A2IRK{1ti|a%Q)&S z{kudhl#t^~2P7=ZP(h+MR!FJU5+o7)7S>h8cNyN;hY$$}$3ZdSWFr7cs8ZTJxg4Y% zVy+MWOB|EIfmwLIWi+R!Ms}Jjg>ViS*pP;d{6;>5A?g>_n?fc5I9zmsB#gu$4Jnj2 z$i4Z_#(;E$&P1>2Eunz8O-O&kSJ7ZEyjxDiM%;8 z&qL|PRl-YFY22GD0w|-kYUzJim0sj3&F_&?%KSA!WBDIWqMp4sn=v;2Jg(8d56z0H zN@Kv2b285$WqGoXV=VoE4S!SAw`-!kf+?rF*ZBh_7ZTIuJW2Q+ z5qpPdh;1Hkjz24B%McNzag%wTiOyi>IYZQwMV1{}rSxRku>k-I3VpmqmyUN&DDIoA zI5StQr9Lk$13zAEsXl`ZgRcgew7O5@;Ze@y78J@98xJ$Em@j~y$wkGb9IGWfwLufE zhK<ZwK2MdV#wY8ABrMP zRwhN}nKTZAleAMY_+2yzRBgP=EuTc0DjP)GhTF#RqsD9&f{;A=%=%ojb=ZEcbfu1V z!^Lp95_5EK;hq5EJmsQdyu+q7Fp?X9I3X(UFDcM4#n;*_A$N<505#Ki(#i{H zN>m|Ao9X^!?gc~vV6d;CdW5K6JBwCjGe>~<*Nq^uZHUU0_{QXLhDB#ZFfmc3E!W!U zs3_;iASO-AT#5eG5qk#v0RWrM;t~)gXSN&qZ9NkcT>w{qe%gTe$L%N2|nzUI8 zv7P3V8aX7!K8j2*Uy2`b>w4TIuPkynuYr^kS8uShY$&2G&%rWv|DZnQ&^1waYOPR){JNhF~d_0Hlf%gEoCUR{i4b zf%(1Rl-~hXOX$ZuIQ~aBvQgC|7~_h!LU}!TQH^>sS>2Tw!*v7u&dORFg7zMG^%jGAiie*!JO6My4Iq?rd+dIUOcc)Cbzl))D zxMntH6Rn(9tk(C4I3*L1-cp%jaE5FS! z=xN{kVkt${i2kvv^mX;(ZgIb!+`+ZoEcdjYU6|ANP)ys93Q9i{5+!HK?TsX1j3E3N zt6lSr-j~aERd)b7D^f8&We1L!Orsz(i7-E}2|p~FBU+*6+l&cBNo{-`WGu5V zpnBhavB|4t=N8uA?5XU@%jW5L=i?sXE@vj}86?}ZLX~V(2;uBZN35_-CghE$p-B&ks<+^Eu0fYu79(b&fT)qmw1&vcDX(_qi;!^8<&|<2G#5AoNFI zeJHhVR1F?_2T$j{jL^xE5_7!ax-khAkpQ$+N7;K#$$H3M5~R&?-s$TM`2z4RDtPC; zkyCJ&RQ*#D{)g-kg@b5SRiiP}(*~aBKlnW$*9ejZC@RSBZ0ymhmJ1wjB5vG2OY4v%pnS@aPoW?dXf6 zH$+D+@vk*G?q|!3q#@mgGL~WlW5bA*H=dbpxEeG$mmH9D$na=9J)kn1-^#iLR5d>T zWY?tj0Xb*3$}(nXQWR`?q^Maoir5Z(TWI!5nlACqYvu$6>rV0*H)zfh`?xHx_>EbZ zoE~of>4@rw@FK_$5TY!`#NU$QdiAKe!?*A59fA#}xK#75RB=MIH4}`3GT|PpZCYm% zB%+3^kJK&#F3nI4VtO_ZbF99(+P4EnVK0D?55&L@F>R!~NumPLaKSaC`(VHWo?U`5 zhptl$x_O*hyB#)VF5ZQyVn%qYRlb#Bs%>r6xdPVcdT4p`m4m zS)Ug&$oh)z?4gioi>WdN(7thsxsub5En2OOgA3epiiW4>J_OFI|vA2bwb}*9rxXpW(cc;^Tz62b=Mn0SRDqKSA@Do WPVzg(Nr$)laI%t05~X5a{r(>xrLs%_ literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/tg_gmap.png b/doc/source/_static/style/tg_gmap.png new file mode 100644 index 0000000000000000000000000000000000000000..fb7352954418013bc37e70d996723e7ec4831342 GIT binary patch literal 12591 zcma*NWmH_x@-B>9f|KCxI=H*LTX2F0_rcxW9fAc77J|FGy9O8r2`<6mPJZXS=YQ6F z*Sh!4hgscST~*z+dv`rmwWCy(Wl&KFP@tfoP~~JL)uEuEDIsk$Bm~I+47%kl6ckFS zjf8}%oP-3Ks*9tgjhzJ)lx$R5I-;hg+{f_Wol{W@Gv?MVQ_0lBGzti6Hf@DA;`5xG z43-v7a56ZUn5xp|ktp%#je$5V> zl{)TI=ui(IQ>ImdsG(%{El`3x4UUr3X5BX60-*>-$j~@G;rnKMB@+o;*?l_y3jHEwihlaC#?Wg0Uv<`iRO&ON`W!^utEEvHcvizWmX;@LH)?5jf+63DOt2j!!c*?0vfOY%Lj&{pg6Z|-n;Q!o z?*kv6nlF5|=ejl-BR%Zt)3@LQ?MbjeOoN#teAWRlP`5AYihHOooPS`PJOZh4%$Wyf zuF>C4G8w#qpXCa$4mN%?luo^3VdU%?t z{R&)DkfDh&FOCts>n}H6!l|IU9%?6QUm}MfK+p2d`UTxHOujTU4ziR)MPaNeHyw{A z83RFDAl6sz;w#m`a(PY4O4!AiwkQIrPDwvWvsq0}V*Ai08K2_*874P=Zw_ysHXOqk z2eG@ajzz;W8i%!bDALGgf%W~YtVwuimdrG`8nG2aoMtSh4rVm0=k>Zws8u09hrG>- z>*Q8YS2kDrS7_{TJun2~^M-buu6SW{VOcPS2P}47oFkluoe!KPT97vo1(9o`d4_#< zi!TMAJ$%`^NCMDALIa`;sAVx4kvu|0VRGv7RYl7w+%Oz4XpnwL$`zr_2I0voQaDhi zV-QAVM50GJ%26frkES;U4^Z1vrzI*U*CYo|lP@Q>k9dwc(&I#P z4`yL0jh37gE~v4nlc)(tstv{t;(fNOXZ~#QnfbHC=h^|}UAvmU)U<6Q8ow2%^6!na!wM z!#8R)ny%4)viO9%3S2d8h-;8&;Bbj{nR2;w@p4geY2Fteo!=J%rR?90HjjyDiZUKD zR;%h(IZ5s$_bEu(Ncl*mf6m{fAM}y0OYTyyn?ooyllPOKA7dCZ8uQ(k7{g67uAI@# z)k@Zk)|{$PtW?(`)@rSksyxstn724&xxw*9?u-yF{Z<10PNOMMxm3$tRZ~pzt;?`e z?xhextzVi_tXQ^K-%MUZYu?zk*fInx#2lO+r(Lup_TA_^zCH0N(rMn>_qAy^kQ*NV zDnBYex*L6)a@$c`T3ho?oj14lsCUFI!i}k)!i(e!`wJmLT~HeI@~^tyujE5BONmyx zHoTuDxn0=C*uSt6aD2mm!gu6x;PSU9vca-%m@ZicRaKPQMBAht`Ek&5kp4KeB(`*% zxSjk2wg5LX(S6MSsL!;9TwVPgPCHIJWhr~9erbP+ zl6#I@jJw^wX63V;p?&j;$C`iRkB0F^0gJn`&LqDEzs{E*`~2g389L0Qo~oWE{6y|} z?j-_4{I7?+$0gtaw@Nqflj@xbj~WmECSD5G0KVJg}tP*HrnAIy{=>9bG9Km89YPl%RU8nND^7SpBLD^`BwXJKiu3W`$_mJ z_>n#xJt05yKbr=SzP-Gn!!*JGp}z-_C1SRST$m;*t9s7>80#2=wNt+XvFtu}u(j}n z5;V}12!s5}ZLP%)UsyL;O?5q6fIvl{;k{z0kEH2Nj)|KLJ*zPALAEcS!h}KF`BNg!YA65g|_bPnj2Ke0JVds3B}lec~S#sGzW+;bU#3 z@nWH-D5d{UK#;RGm9k5H?LDVCw^WLHg?iumJnPne1AC+aV=U&5kXp}B%&b9x$?qp_ zxZ~MZ&oo@!P>pWeznpJG+Zx|oJf~Y|kYs#$By*PF8ShPOU$IcbBpfc3?tUVt1=KoM;TBocV$?j) z38>gvj9o%l{s#V;@pIZ)6-NO7+bYio zKCVl>3-7bZv*s&D+bw3FWg%kDmqWkfr`4Sc%a?m)tPU0|7j$fd@N|LWZ&rS}cy?eaR5qp>YN*MGu@Q&ay=M%K* z-h0s%ZK!9AI2Sl)=Frg*A0vb?e%O^hanM_1EZn>6Fg=cp3sN##xwiZ;ZiqGaub5L6?O=3U*4dE_293F1<+X`2sYP7Q`b#bNs-^o z(VoTB-0`yo3((#P0%)P21cCgJroDxmDH+h-&cT%*C`9qk41P%a?=XOZ?4K!awn7xT zN~&ZMjxH8tTr8X{tQ5j1WMpK5F6Nf}>XOp`W{32ID6HMwocI9%Pft%4PYxDG7b^f8 zA0Hopl^wv&&J3Bs?CRyaHag0kpEAPq=l=Qi;a_;jiUqEU%95A9o^l8C@B6a z`p@rQ>$Cvc{EsFF*MDydvO&OK3V@A;74RR~5LUszqx`BiKnpuvNgI0$2Umy=VRk+? z!GGreZ)xZ7=&! zP*izxl46=b=u<;vUmU4F{v0VWWOvjNco=G`Ak|kOg%KSR#a1&NFmSA|fuW57rKY)o zgf5zqf`KG*H;Ce_h+-ES1&Pk8MQloRBv0{e`z|~CMzM`{KIMo?cJVyJ&C~t1>aMHi z@wl_HWX;NnB99Y>06I?e1Ld#U-ADIZNIOtk6l2D`gfLiIlv0%$($r)&3egs){E7(~ zU58NrkE2tzBa@S*hV33|&d$z!)adc+)YR0C_Uqg(k1>gzK6i6neovJ`c)(f#&ojmH ze8lKe?c%*bash$$OQGSI5E{zgIrX_Az9jzlrj?(5&~RDL1SY4X91Y?cmzc2120ULP zKKKd~@HpfqE(n}=NL(LGHw&Il+9)fd5xY*Xsq-8+zcT4Hj^xNwmIz-CV7sl^l<_5B zcfY+j^1t5iRg{<0?T#L1JUl!s)SC^n=9m=pZ5-MBZg*n9&PwoH|6!%#{d_r0>8N$c zZ?xY1c+6P;G!E!aeGGWH)oGP}5?|}`6X3F$3nlS6VQbq7Cj|N4oo%`R(v2k|&`RI2 zw1}?N{PgYM{fd5byck2HoNN67mp=g)G1iHHmCWgG({nFDERN8zWA^@62zo4ukoO2z zH&loa_1>oEHdi`};TO>UKK^(8X6}5SGd@E4sL5S4Tfv(diJ`;U;!@>9M4|Kcb78Z= z=nqlfC!`5Z95!0&dqK9Y%M*G(zoh-*bleggX7}I;+;bi#wsJdNTLiw`F7L~jk|?F$ z8m^4~9Dh10E07%CAJ16dj28eHt@Ai;F+B=i^uVg$-CyqQw>fTiAz+8Id7c@xzCK@1 zqmsDA+RT5$hlhvXKOTh*PBmp`CyVJ^tFzi2gtkYucie1qH@#Yr>fp6zS4Yx|&&tXw z9QtZ>m@niLnt0Z-hi3MB>HAP=h7m((C|W3P2x*6tn>NOWxdw}Is<-y@t{<{06ZAKt zh9}HXJ%cg0gs1Dx8IMy!cLrvALviA%g2Bk9d9G7@F6Xl{q+CPs#Eb`r_qI#bNd+I7 z_3$LZ-(zOg!IuzbSx#nKEL3WZ>vsEhZam)1t4Qnv+E3Vf;emNBWAsBkL9nPKVw6(6 zMvUxj$CXv$7U@i%D7?EahX^TtkJ1*T+8!bj>eLw6IE~Pq!w|a-#o|k6uo%YroOkMs ziRlQOh#lImH*ul7dxwhAbGvNp_?}eQAU}v$R z{&NNCIqvBhWZzi1&33Dw1#cHL5LjW5jhzQSAgl#ZoUS`|8|KU8K)v=HO>j)72EFr; zN>0)zT8QxiXIY7ZEbU>fRjeZ}cfXFxlH2hyfCF~IaG6?5%evoQ+dHg*5+|P(B*Y}3 z9==i*sqGB@XlJ;aF~!Qm!7o&6AOt+d;<5GJKO7XGVqowS4&RaoXLH)9mj?yQ&i2mg zo_Aa;UsgKB4`I93kV$uU;HkHeljQ)8@7H#uHdY9D{ z6E`~cYrhPsAC>pb`L^*0dT5m3;XTA~cx3dc62qG3Fz3fFciZSkp)MD?l($e~;#O(s zFyRJ?Sdjz=WZ=6kAC)@TctJ!g?08mslJ!d>e%|A-gAg(JbEG!kx0k!3S_%d(6O%eI zJ`5H&Vy-W#sl!hZOo9TYeQ+WD27C!r#Xu)0uRWncBMgygRFt7e675>3t zT+HsDy4VSdjW+W%C~z=Wk=y5GXPfOB{o$w znc(yebY8&g!vG1AvlT5`0+sPgpF8CI(=#*20pU{FGbQ*TkGXWKLZd9aZU-ZsH&r!Z z$aesAABJlr>-Tt5h%WM+n9Xf3A3^Mqd3Za?vyDMxVC-|sl_eQLa%`x}h{tW0DTuC5 zft`CB*z03ps>> zgs6`75^}rE#1jh~4q&U4q^9CzsSG}~9OV0yyBFA%1@l2Y0; z0oRw2W84HgmpICsAvl^d(;__N16gbH2qKqAK}!6mfWo95{Aaq@8FJ0O5(e}um(>oh zYgK%Ak#x#P>??A_l;x9Bl-A;qJpad|2Gzh~4Vd~yyVbd)CO4eJi7msGP?n2d7vlLE zR0IzWW`k@b>uibjv@$1~qrc!A;6|@~OEJzNr`2GH=2s{?FhJfD2Zp>N)aOK~Vths| zDNCQIPPgBbd9g}IWeb3?F)jQQyAnbbk7n*HfmxtW#EBnfq;e!DEsbzFP!~9H#_Sv^ z@$z_^BJB)0x)GVAD&+{{uGqW8CX2AeID&|rEY%oZd93o}g+ae^`=x@1gQcSost9Df zyf-}7?b)v`bFq5IqZf#)tnrA+u~AWt-fJuKnq#YgzU4TexWfTeqd{X;%ii1a@m2+X zO&j7eA;XA8_=3PXjG9|~o`Z{TKnDr8Vb|jqsgxjg2dCI!4?);zm>s)okwb+6GVi^sru73|HPF-fX6Lap(+IV2yyA~OhO zK*Fv@&Vzvqm5z+>bHy59^uanSL<_YcLkrcoz({L%NZSD$m`2P%6|)^9^h&5Co;7d# z6HZBNIFC@{JeOJu!n2~A_)uWu}&m*Tk-iQ zNYAn4j$&LpMD|~H36NbxS2|3{Pq7BTsT;e2(98Ti6rtb;hCZ@z{~p)qxwAko174?t zS$Y&0UGe<~T-fpBdaCMU1J_9|cALz@(!HzWgIRrTV^+Q+nhIo}{NKr=GKz|8)LdRR zc`Jy*nRGf)Q4)3$)snO;2PHW}jwJK=jisey<&!8Au{nQOO@F-0%rKLv`Khrgd0<9WYuMiO`$9HM0|}apat>D2 zyYC5j(TfPE&Wv+&Lv!w%65%K7p7Q(=Q0eLA;U^<~ zrx@4{(veuj<4!si0V5*cuMM(0k9vPW9FitC%o<0f-GIB_U$n;zlO1bkI-B(u1zJ#k z9W8IbT)wq|KA(88FvLI&~)MKGX7MBaFG=*q@q5P9)i=$XDV7+ zNIrbzDgu=lCMUQy)AQNzoU8muak!aFfp|;gY6;Z{IiaH$f-(kaG%pj)tGddH?d-cD)*Z7moe>h)26uK zK5$2whF+#(*`K|VvgHTUF{xJ7PB!_ZIsIp?_p1u{`idqhMq8onHu6sW}+mPCoHU*C*$yfTA%O zxxQTMH9NP_@M_{&3o~zJ7?e(ZH#^F<+efJ8qMqCw zb0SKXVNrv!FY%4*!B@&Rfa|R~jh*hDI9*!Etl%Heof)a61it~Rt^|KxR9J7TutVg^ zG=C3wdK$7}pn{sSSrT*tQXcLWhCfOS!S(f#uVj8N@7Xn{NYW0MQ_yK0C7vR%E^(jbdu|54p9PhD!>DL%5jGdW_bHqWi*(#sj*FFP zCt)$C0`+QsMHc`u+kd46M7-4p?BYzHU2{YebuWwJO?vWR8@dr2`$rL&+}>Rg4b zNfIslv)s7MmZj@XW?#b$ty|~vj6tJ*k5(222eN@s+#KUXF}a{T7eigmf`ZivfN<1r zhp~tD`>ag^6>}LRmwwrO?`(M?C!gzgdd?5TOjfmr>I7KQG7NZ%n2s1*@6=u9lJ=eJJ>j8i^SGe=sy>sL1E1dX3!;c`C)DU8HH;Pw zrc9^>1Lr#@*^fx(Kbq@x2)?eWvE3#0<%>f9PVAEm%n(7xKDP-9{TJ;k^Xm*#D|R61Py6+Dymchs6N`>|;9hNpl^h5Gx20*{r&&e` zd>(@$5k?RKKH-&M(0?V?PceL|;PhZvVT$G&J>};6zJWi{>>aD8z~;-d6HauE*7=KHm zlo=~Cx_|dB-!~qSnAOFG6qID#Ly8EBM=T01)YTidE|f-uGE!YhJ}-^*400M)q=6>a zl%SoQ6o3GPDh=JK;p;Cp+togu6Ptl>Y&P-AaiLDoZv zWT1=y7Y$FUW#&C6gyeT6|Zg*J0?><*P2N=ZUFX#X<-R-i{1m^ z*CcvN_P&t&Hrg)C^sFrUZO6W4s?N-gCI)0j#4V&<6O1?wb{zsm75a|%8!Mm5%PxF9 zb;!!E+?;4<*XL%IZ&Ld@g*dwHRu0_#i>lq{<{7pt6t(vC-4zyETicLSR{Sc_cr`O) zQ&(5cM~+;%D`SQKdT#<~TDX&uk<~?~`S6Z{sZfzyp zoG$n$sLH+M?xATctpMNPfoOfgN102=7$xTP_x8>t{R6h5T!(^H6}03Lb3YtkI$w_+ zItRmU-|SGNOH1rPU0ZWdOJC#V-thcPp@dg=?p!U_iitCn0{qSP@S2X?m}ZPpwZ*XT5iIKL$;Fw!t0MQE1bHL6_ZU zfu*jgr>Y7c-@MUf;jIrTr$wMO^J>44^J`(1d~ns{_?N>}v*!n66c(RMw&8H+r@PPW zK#{6jjmR&Y)B)?J9m@s`>%))d+oHWL46B3}OiQ zYr7H|Bw;pX!(*b72xczT7%J>Mp)%5;Z4}r0ko!pbj4ofHY|)mc;qBVa#QCBsQ<-fu z6a^D2;y9{1%^HHI&l6m{PK0&W@M6Dn(6<0fyxy9hMpoJ zm!C}oeYXmySqhM;+3{^@T}3R*cq%@2Q=Z|3RhzLzEE9tZ05{{Y|08 z_(ou)|G2aJ5;~&Up-F1y!iS9&V2PY>#CC2)(ztS_NmFnssA>m5C#EI*&MO*D9F8bT z7?~9-D%c-^j!O~T$@S$Iq4tvui}~{hEFQh92R>guh6w`Wkd#cMOedG6_YR9ZH&P_|I0~v zOe_ z4wF$#Jy^ulV%$JiFp>R^h#lj&x3~|krn?`Hrin&x*pbtpLgb1T zE5r@-FwJL@Z3yib0bHkj{vb%O%BU#7nl+8R_SzJHO~DTQtVeyoE_eqjjZ?;B7{mg) zHPz!F4j;2#8c#YqNlC|tze+jWAG@>DdU`Dmtu*UZvwOUX-X|?q_Yvb5jDZ3Zk%^0> zUD+{Wss>E65?7Z1yb%)BsOLGJ>gZb3V46x(FD=Gu_?eTD7hQ#thaM&UlPdBFzEm9pE`$;}=L$k@m4MQP@2dRJ?&z zK!@)x%DU1{l`ZD7y={xJ!;u>{qm7w(b;l#}u0^9;OO7`Xb{}i~s!L2^2#pahpWZ(E z4i%!MOIA4P9JN=kW((XgyAT_^paf7YQ89!n3oCM3G4Jw$?eb{}rmBR`$8y8HSC%_S zryX;PSjL421i;DlHr_x!w4c5a`4U6m=`qk#LTf>=?h)gf%xm$RY3p zcAgBbUsp-k=fDG_(u`=w_3U`s*W4I9_%D#WB&Wq#YBvdf1J7v0@qK~(C}DuZi0 zkQY)l_#(8(zRwS+XcPC~!IcAw$gn8OSbk^{9ZdREm)AWqTI|~h(j2lS%mc~Hb8Y!3 za}QBW%c;~}kIyo*urtVf>-zprY-&Jxfg4kAuwG00H-fB0hWMJL)mvrf6XR?#KGAeNZ-Syf0@VxzWr>s8=Ktk zE}WJ7$5R4BMZLWTw$Ge});O+^?3lG7qfQYJ59bLyn)@45%RuB_A?g0#2%>38YI3HS z`{!E9rO%gQb2dA7oVb}a3;+-vkyqiW8HwF!@2klKPcM>L+LNApr z2K&9zKbxXf-3&2UB@&>Pm8QvK=DyqE^D(!BgJPq;6C5lodyp!xz8WBh=C4?w$<=c8Jt#a#J#Ws0mm+ zfWFt0JT^Cnv#BX55|AR+w>ZK}zp521UB9zpfW~kW z(}UwjY4Il4Jd-9F|Wbwbh}&*hlg{OYmx{si{<-IAc9=V#pQ^ach}A5q#x z*Z2Nh*QuR1jqxBmYB7LoZUuEe_S>mz)Z*^4@*OA~Z zxz;L}qL;f6MMnG6L#a}&J3h28%h*q4Gu#ls-oD{YZEtulu+QFP1japC#Vs`S%|O~e zy=fftWZ3%|rwp;^pq2Y-naN&J)O`6c-j-VGy2SVy?TnUjhrIuI{ta*KHrS~$z&yM$ z?hz->*d@3^0Pvany}T>ku~H7kgz&~IZk#^vhJk+7>U zNJObr#j&fv*M<1;IHmWi=QTSXz=pQub z_0Z^Zi$Kuy(5@;hou1WCQkZXc{=xD4-tCLc!_6BCpT96jjQ{NMoy|Zz>8k#bsIBE? zQ=KBr{OZf8(~#i`OcNuwW(U^0~biT5mx7e!sH8L(um;+L6H-e}YfLL0<(qzm;W0 zGPFY9UGl~>wYs#LCG!$QzBPBOTcHaGao(?u4)p}A`AL{Vsozy`ps}T3ivRfZ!*cek zSO`d=;UY1SfM;l_l0{=65{yB95R0MllO+Vdinp$NMiJ^~rYIPmw}dCp&zFB{j&~a=L=BVF6z1<7 zi}>?GBcv>6L*IBGchm?vRk6?v@`Uaa_%kMqBdc6g4B(HL<6WSiV*J|qyP0+Mdxm}3^4m`wVqmM|2Ce$;MW+52fDsPT3E!ELR4JuDHk%kbt*x6dzxE#^HB zDowdVK)0CqwnAB_=<<|*NSC=I8WjsY%w3|g(Ln@5<*%reeC>K=i%N#=O8wv|0I2ta z1`%vPKEn<-!uCjN=aI_eOGwtjN0LS^EiKu8<J$5a2QdT-P zJ?#ImV?I)diI7h}i}#K`?ntjdpUa=r`zq5XJ=Cx18}UKT$uwVDMWt_*Lll1mGhQag z6u+D#1yTQ#l)t&2kZb(=qD)3{eheoF=(z?@7XLY2*BDP{wppyyTBy>gm3?1i)Zfng zP$PD88Fv>xIXcnej+5uKzL;v%P2z)})wrKwL1FbE^Oi;l1y9fHYo857(v^C=T$K~# z^JL<(UXK=WHI`&nKBCA+1Ka&-m5p}B>A7y#t}RPBBcj3{&MYti@A7L@I8=RXyqd2K z8%giH3tNCkN6G&dlG()Xg*p^l!ikXJb!U-o>V`uxud zF-O}*h zdtB&PcP8@or+%=t^aJEudGC4WgUc2eo~9Gk;yhcB0rScMBRWk2X_sr#v=31Yn<&V3 zc0f1VM?VJPFe$8LvkRmWng}P={pBBJ{^kWF$LbVt0`K!~R5ZH?iR^el%}=#2)zwUY zD}MAG94XTAMD!)0i$5XXHmJ{kQ<9J#c4@eeOBm?Xv}kw5o4fsEOZBXKcdK&a(B138 z*s12%_5~bJF6=pw74U>>`YKs^hQ2}L$~7o>93c41K~Lyfpf&76s4^^|Fsnmv!EAz6 zzDg%YqD;^z$vbkY;DU8>x0Ss8`J6O|OyL_wu-qQCV}Lj;Sh+Aej$p>;#W5xE{lE|V zQF#K+0D_3LPdEN=>-4tY`GOLn2Eeo)4d0umtKLv@%r&`Jh{d97J&s5{s+4Na+;;R; z!bW=v=x%_(mc-fQX5qpU=JmspfcGVFB21|lXfW0Hz4j5rWIm&BzQI6u$UBIy9`p`! z03mj)sC-7r{C!Vnmo@hUp7R)fDTZ$rU_`^4p(*eZy7k#}j{2BdcN`kB8E`tRD9xx{ z{dFo&fU#YCn9mig3MsaWFW2X%UeKW>z%d$__}canVaX!wM9GtGvnDHp9umV(*CP=V zQ`0LQMlr7ibeAOQ@ErT#1hgs15UmMt5`90h;>fq7PLJ&Ci%M^e`MoG^$2FdpWhFI=Z4h^i3S3Q4E@1)Jw} zi^+fUEVzI3EYRsJf2%V8Kf;Q#NjuO8ck!R<46>N0k2G?S+PhgummxZkrZ}3SykHs9 z!GdIGibH$K!~RxeK1e~*KvI|s)3%TfgeOH!++;!gZyG3?;^Hl8$N%k3VlU|LXJc|w L%91tWCc*y;|?hrz7cXyYj(MB41{QueKo^$s( z`@Z|$cz29fqefSES5SAv0;A-XQ272Ek z2mp`(WW+_(y)w_%Err#z2mo7m6viNhuh5v(-{EzsX_%Zq3o2MuzB}crcg@u&hm{rX zljT^-$z(J%U(ANlMTEKPwLDyUh>b&j2#ZcNXCJ1xB=~M*`kwoPKHq&3M~D7`8)l44 zg*3B>3g}}&?tAffdi?XYWl7tu|62ls|#io%beUm)Id=Jkgc`D!-B9-Kzc${LJ{((Sq4>V04?XCeOL#t z+ZgfjZy!R`?|gd3>s zqogm(gDg@}dejgA!qW}sd^YC;0zyi)nqpP=bV9pkIk9}cp=O323sxFBL1PV@I!tj6 z7u>u3H8Yba$|)By;6Ayo+Q*=otM8XooA>_6nm5}`hJ;+ypO5G4U4xRaomQiM{LI!F zgKa4DZEnX;uuVazP=#__yNQ}G4{A^AE7fU`@^Glls54Req>!`2Gn^j)howou-+fA) z@H5$9;QeW+RN#wZL$~*(pwyH5t+ACXt&vPm!)#)kS`WKsxsLcTi|=cr|3@xIr;l}EHi1kc4$BvND(v_21Pe9eO#ETYDl@jKjIRXuAN>0jC?lngKyx!a;c zxQiB1?^>e!dk4qfHDYfX{QAPs#;d4s*%PxhU|`BGL*`Df;Xgv7cH8*`=j(vBk*LN9 zj`&R-Ae8NR+JAy$$#FQ5akLmrk-6F4clIK#*>$HmMX`Jv%Uk@2{_Y_4xahs8E%(7Cm zV)8m58wwBY1pT@v{nrFbKctR{tF<=!eU_$Dg2Cz@+e_8Zt$OH{ELg?VHK~rvpXHv5 z0UdY+f`0qWSs4}{j5h-QbR;z|CJAO7F6-($6V;IApn=mwS0p<3RQI#VXH)stJZc3a zA^xQEYXQ4mR6X$ts8t|4uKHOHF~W11>Iqhks_06)Ek;}=Xk%bhIVIxZWyBYS=59P zG`9OOGFW1^tA@|vN=AXC7v~&TvqsI~iSGfz?q|a}VautdhQGz3K3zA<;NJCQyJ2SE6UO2R=Ci$`Ev&9!P3TU=e4fb5$%jw#$}hNw+jg zQ5#05=Z@<5#c6jUnbn{XLsYsYjyjl<71nqo=Q;5vOW%GDW7%e<*Yw&>9V3G4cJd~D zY;yc^owI-Adl0&6F*wUJU4_XIN5gSqlfE{KeMFzZ`mcZ(;bSjQ?}l!W5gl*5nS@qv zi;97zG(bbXI)6&-#yz8|v2e~7FnOJi&5g}{J2Em2KF~D_iLa07YwX&Q`cxXgGx^o5 z=M|)O4N*zL5T#3=b+AAxoXbwU0B93eGXfn&MWmNa?jMZ;3l}yC$+T%b1BvDpa7aVu z7{e<&U$SXEa493Rb1u#Aat`fC0_MMRbSPyC_-D%H#Kuh2JiydEZ+>i^wM0EOu~;x4 z8aw9tyKN%xde}YcKWwACyi>}&nP3N57RW&XNKWt49(Nn3C%UX+9N5su z&|Zuw!^jU}J!=bfd82dKf9HPY*b3JEN|#ll*dYCKeE`wieA(?>wNa?QX}vqN!I5(h zLGO3_Q+KBiK-c`Eug_Z;OeG3$?TPmsrDXxcFwlN{Y)Oly8i6M*bXYN6t6s!1(V$>J z65<_~gd;7SH=Q6RUShPt(l4@F29Z zEnji^R?Dz($Cw~!-JDSc(`5Q;EdF7FxX>LuNw(5@v*MF9Y~XsC`^)TrOQY;F&A zjSiIeGHXzAwMRP74Ah%%_puibvmI@$7?$+K#;O+}=lgFrWMy%=2D!bnZ)nMbr@hj+vLH;l6>Jq@|w>+! z;zCN8^JWx1c9VrF{dZpna-%|_d*y_T<8IcC)G?q%&~qlE&Rvj38b*(2(bfaA5|ban zCvlmZJ;dEfy*B$Gx;sP9QnLdf+I(gtGAQQ#jr31b03g8nc5BmUJ*~eJdQX8Z@Wl7QmG0gOK{L?>QM zdC}Jo$8n)0ICg%9hCMWwSCBe1K@Aq9-~hT6p@4&?%Hl~M{H+uV7as=R{l5G7><=C; zUXSOjyDy6u)(9A$W-*Q_U3sB)n%k@FLT<|Tb>MWg8eWsA)irvn>EP74*3(_JxOr|0 zkqskPxTpJHUyiFv6B|RQGse2s!fQ~<0!MVRE<7m)eL}h%Y#kQf_m0RsA{RGn;==;~ z*4no$-zH_+?4SU&$DRai3*%vTWsSTYLrzoLErQr5jAw;U`Frx!HgE*w)2M}JYhA%~ z)YoD^wEN55J}u}8053K?{Tj5-W(osh7T_kBFF+o!xej1r@PaKm(c5C7wO z$b;xtY_95%^^d939pNae55xNx4Izz*+=qkv=Y;GJcaMAmPn|eQGxLPu?skb?fbVPr zo6h2QDqK~*L!=B!N)_S3xg*chLh_YtI&6=FQZmLa z(aLm=_KszAXEZzo;@r_z7)}F+L=5Qbm~d}bh584u(XCCvsnTLnGJm98%)>0>@laLr zs!$s5{#}P$dEj$^np0VPf}hXA&qLHfkmGUHx7Op90K=K+V2q?cLL{B-^)yueS!MmC z0mS>K_`)122d*(t)E<-}p;E?G9qxT(8-lE4RT6-g`=1YaAuWCXMzh@4Eb4-L^* zwm7nv|iHCnTT$$}le;eQy(2G)&NvO&sf2>}4a= z5k#t5c;$pHM_~~vstMU*{I$Y}k@a2Cblhl$pK5BLJwP>Q&qxKwGx%ni-{KyT?&QUm zE#>W^Uyq~33aYF;&kBw_(VxZ|VgWZO?msw61Yt$aiT#rp7TC0Z|6WxzTVL)?5|`Lg zCw$hKq(=xi2z`eMv{sU}Ic?y-fU_^^2*2PM>2Ah8({8m@j1g=vAMw^l%^Tn~8~ zbM5-Z{{&J+=pTG`Jl@HAfoYom!wkRF=5_g_^h&?SNUMq$H(u)ne6iEt0VRY4PfGuM z`RYO3{ldn2wv^2`?btPW#^ZFW5OE*(F|m8C#p|+nXrx_pLiWjje#hwCGO-(CI_oky zdRc@Q=*?v4FBriHX7pI&vSranr!y9^3nFYOA{U-4phfYxI;rg+_@Xw8jPTYhh=3w& zcV2xI@cH*I${#*wPi%4P;Jmd?9xHh;MicpYxLETSmREE3aRa+u*$shEO2b{w#LSw# zIZecCkA&NX-DaKOjkWlLyGAOk(ec&>>$<`IyM1fV7Dgja zx=xnY8%%!p&m}nqreXh2%PbLhFBC12$hb;&)o%hMHpC=LT#Yk6s8`p7JZyZh~NQ1^UEp6K*-^|;lL1avl*dehI&St`Ezr(+k156r8$X^MliNoTdx z`Bw0wYyj3CjhuUr{Rhq^)(7GxNb-_*Y#-KtYSrP-DIl1kzzUx=%4!q2dmmPBc9E+h z((yo%hD*SQtGBpe4-fmexSeiiOm3Pdv3NrXMk#0~v%mnF;#%!JAB~8#TpQvmWm?cT zf7*0Ox;fm?sLLW#U@4e~B+ez$(xs13kkFQ?d0~Bm(k2d0xW}p)JmPMBJWf{m{od3Z zKyp7n@29M(<=_KrJ=fpbTxC@ORgtL?7-qk%?8=2!&f&{Wc43bVS6Rp*me2dJ@nZkl zK@1jr8)V>IY1`%bBgm0#oR0f=5kE_6S@(~fRasbhlQm;sUwb4FAY9;;g{7)`bYpX z5+*Rn2;?b@UTyF8{a%<~RZ~+^wX#+~@iQ`v&53TA?KY0f9p8*&$+dTG8<0ZJoIYt|L*QOxnH7Cb2m)gcaWut#35l*}xlHFS|ilV_> z?oe_@oR#}knvnP5E6Lr0+aU~y`WfW8$=XbXpNP0wwvK}?dVD+R)8Rg}#BPznv@STi zsj3y4QakBg=ot*Ma&PoJTlWjpDy&>SQr=KSh6kXFmL3obqc^R#K)&78WgBDwlf%?+ zkmt8dotttm#Q+pVRq)Dj<>=o506){SVWBK&wE~xZ5(Wc8Bqme8 za@@?J15~G{VF0h3G=t}q;(-hGu|fy0)SuM#DAdOyI}A9U^FD= zbImI$<(OlNZxI2xT+__2jx7USCV-)VUFPoBGmJCh8@9dSr2Rq9%;k?b-Az*CWgp0X z5L8A2th{%*dVHY(%32|T{PQ&r6E2vH9~r;XI2=4B=9}lk%#Sl#W9t|g5iXg}582J7 zuNMHk(gHPt%ge$t;m8UL%PJ79DL;+pxi+u7N7ry1<;l&fA?;S6aHRHb&MlNqw||X6 z^%d5%QBF(*Gt)%+{P^>p1|ChpGBwYAkW?^WF3QOWiNNum1T%TL{fO}yH>`TJ6w;(|Rn>RPz}UZ=_40Z7t=(~=}@ z6>&q8+RUx-G&HILyZii!!7^@>ddiqM&uKHg8}fLG5LY%h$(^WzZ&!s!e8pzP9619|ie42Y#zhNfLcz98tXaQF ziuKI6pij|Hm&s`#$jX!=VtIIJhZ%9MynF_Ik`#L~bT zOX#}LvbC^c@ulo(>{(jwW7g@pGv=AitRp8;1Xg-44G;ULh`k{jlI|p!ei;ybqn)o> zMrMudT9B}loNGh9i(H$1M{XYOipQ);aZio68B?Xa97ClJMc1>d>cm9)>k|0BLJCP1 ztoZOyR;pL{^P#j!kDI&Bz1yH7b{|NXy>wOCqBAvJcARjvfW9dbaZ)*{;&^SMy5zUD zPqEkKpOK@wg6i1E-oUMZ5U~XnFUveGxViio(sUbfUS0FybMC?_)pg<#D@@EBixJ^kLYR zScRPlV(XFt9GWLL3J_xCM(%_<%|{nTK?Jvg2bliy(J{#FcdI6g#4kH2n?1hYQ30i? z%d@2Y)D#&=Bs2l;Zu7&@u$yG3?&pOD7(UcAU77tBQ8P3WJ+?UGzgb@I5uxB020I7> zQSD~s;Qnfl^$(3*ND5+F^4iZ*P9{FOPZfhWEp3y0YMb7M5$9ZGV*M3glcM6Nb?&l-B(q?k+wyV^QHH+++HF%r)nY zqE(Q@AFw*#@*Ys)MxjeMxUXkRDktYfOP{Ib;DuT7wpNm)Tm2#CvWA8uqpO}Dl(}18 zs2C0A(a-YF#+1^Q`|SYJLavcK&;V5sn@xOx$`&UmweqPF7i9ZQ0#?z%V7?z!ZT@m& zI;8LB+hYux!P-rSZ?oO9@2grxddj1xkfSS;pn&>7?3QTo zU_aT=qQ?8I;7MZAxsy)i%@~fSXfwRwC`&*@N}*9%zh7&NJ*Kq?k}}&nq=4P7#C`|8u5k($Sek^JKidxIYLt%>{*Jh z^}M88v)bVroYjZZn0rxI@QokM=UTD=q?dz!MQhtim8@39eBpZdDc|_$+p716P;e__ z^u376(6#}y&7CMMFsIa~!Enw6xna#vhSH)9jBH+a@Q(=q9t(!>KuIt*-`E17I-6J+Q=RLaZMbraOmNXynt zPyTCt6s*f9l(l)MN_-h5cXQIcupV}P|M`i#$JG>B1|ZCa@3{f0ty&akXC%DtrB?3- zZH|;shAtN09nU-N<*P*)2oMi=GuLUt0+i324<$Zfa0z@TwSR7*S&#|d%{<*eC(3Ls#RkPr9! z+|Ng6HIO?{SNb1Vz|Y=a{X_}82mGJwT@QbRw=JmgW20C--89pBjEwt3z{H;)3IQaC zAk()K_-@O6>ZhqQ0j<)PmFvgPx2U&vkFI^>fFJwLuu@@yO@*-JhxA7B2!T<6YS$ z0g-iQ%Zn(}iQ}F(wzsTcJok9jZ-nobt!h#fbHxk|Cl-|RL&1TNtZ$nrfy2^cRoEoE z9Ml>40UOX1rQ#2@MFl*LL>E2Td}a=pzB((9Mi-5>WG~uGnf>I%vX!rQBWi1JZwfaF z)xrwfBuCK{Xzcu}oz_=qvem~LI|fSH_E$9o!Y5$Ja_v?>2Vr^5y;zdrcHFs7_5>x*gO27u==cr|SeTdD_-gv1CuC(9R!l2`g_ z%(#KDV>0|-YK#Lf{0Y#AIZszsu@b{ye$dKoIuExFhx=Hs5KG(Or#CWcKl_v+94X#0 z{d}|lEPOJK05O;r;0d}c8bE{~6r}kqHt_>Y!D+|#z}5j~zGX}9v`0^95u5&|k6@U1 z-Z0uJ(A1pa}XWmZ0XeE0*x>YL@ck3i7}Mz^H}@j9u9vj zK>XV97xmwAQHwa%#n(AJWvC4)^ZLtH+eF3?4RXKThPrgJ!JeGUQ`O5m-SgkuY_Q) zM1m)m9G*~QMNlLXFYv=(a_8QZ7uoNzt^{U+_*fFRHJ6Lo3Yib#MYi`%R|#FQxMQQq z9EYXT`8xA|PWbw4ea+aTCYXGu22TEl0 z4IyDc#I1Kq+3Qe1#@ppBwTSWiUyUd#b+$0-2X=rmk5U|FQ$cItumbcNere(w+fAFZ zKZhRsO5BL%qkiIK)m!cW6);R{tPs+a7Wf5o3hwh+-_%%nj6XBfe5B{v(B3!4KPuMc zeR$kAU!`(TVGw#A*+T&glJPLdGuaf=rLy8LNdy!zQs0sQ^E^l~u_Q%E6{dN6BlZez z(0SwoQLaUC-QV4;N;VmppFf`tKSy*s9%P@R0I=fxM{SvfBc9+U4-9vgP?IkO9N&Rl zJf7kV=lUZsN{DfNsH>s71?+!8LZn<|0m;*3LG6dOTeNQIaut6=tKD~I;a!}rUV z4vnsjbs#++Qog@c=n6r9#L;>l0Nq?W0t^kEx;@RP$tY)>75(K$C5WRp+*B7RZy28f zu$A=6BlIhL?qP;fZ`31o>u)U^bDO@HCI}~RWYk&#d$fMA>#K-A=R4t4k7>_+wW2gM zDMfbjq@h;9$RaV0?J1%kCU3V&TkQ5Yq0lxD>enzmPLP!$i6C_il$&+A0y_g~NOSCs zl+z|rz-Kcx?27Q+gc7C#1@V;5py>EX{jH63zXQ#6>fqOfrW1na04L_Q1NQ#6CRXdA z9C(d=ysW1!s`G_GDiNROIL)y_S|I$m94cO2LtR8!hrO)5_QJHUTZEvZm+{}d0Pre9 zzYgK`GSouPYP0TO9Y)A9vqX(htH!ecU)|bO34t+Zf3J397yy9IXkJ{ir0s^%zN;(y z2>X@OBI#RusqBg{&@Bfa9Z#p!Iqc6Sr2B_kB^zFUDa=^PbXB8%S2{K3Yugjx#2(nt z6>9Ss{){vFF8Z`bMiaOu(?-U@L$hv1ktCn5tX{bB(ITuTwVp2yDK``B-~sZ?*U)Cp z+Ins<1{rMjtZQ7?{m$Ty2^@nt+@5?rnM+p`f65H0Bh2o{3ekGzZD3Mg(i7Rm`_o92I%;!ebPWV2GF z;@`!shQApg`~|$)?f%UHkzM=O_Fr2g*fRzH?wuB&{R1#66A9?yW;HNGZIZ<_H)WH! z5cKsf*c@tdcI-^^qtRzF!HxA#W*EWUj5a={?N7 zSG1Xix7V|Pg6GLrR0X;XFi2;@$;Wdi%du;EUql)ehDRHKgR!@sCSa2wOAHD5cXJ!d({VlDabo1Ol6F6Keu_58*#f+>54`pll+oT5JWU7}v zOadKRq&8omO4#(LQbg@@q}d?o2pV7QJ^e}zyz|6iQ2(e$8@(7Q1DA6ZO?@R)`l!u*3T z<}EvR3*n^Ln;;#!Q5O1YK#uwxO?P!C-b4DqqR($!3=+rD0sd>+w?~~i%ecYRFAf2I`5?Ve&dABbwFl064Pj&u zdQqnF6folh*&sxjiK4e6gg-@9DYk@7}P&e<3Y@>uV0@_NyC ziwZlr2FIQw>c@yFEA}e?L1tD8u)=_i^~}*^`Q#7i1l>`w;Vkc%dFF&7WtF4dsK z+c8E)WOX`nrj6lg528hlOC=rFGrgN82$TQ&5Ujk4quiOrYh!iQIkCxZx-V_KmG%B3 zJJ{4x?6@@`29Edpc_l_HxGD&3^8I&2`xg3H7RV>T&EBDl-N(}>LE5a@ngC4hocaMd zFT4^Tk!u65(HD<*UlsXwmLuVf*o?RPBJhQcCYAlTmwe7zf$ISI5~Axk1(}FjA+CAM zUmjW=PaF;h!Ceq$Zcq-*bVnw?{x<9p!Fp4=w(CiN@F4V5(0OdL-L+@QN zU&73XWt>M2UZH%)?^;)F*OLmfX-&=7$q#j?ps3pZ+1yTzE7(b7T% zmPVk9+BAMcnjF;Jx$9(=XMICx%!kL%pV#X$0s?RNZ#V36B?O%h&mcKum`t!Sim=pIXTy@_S7}GhK>fccU4lG__k^?r1;rw@H@? zNPMgx@hVol_9RSom|;4t>6f|nNi^p1Kcw7@PYJNJh=0}PHd`A`1QW+M3s|@7f47*6 z@T;FkO$=kew98hsRZ<5Nc_G=4hg}@x3Y2lU~ zM|AgSKQiMr4|S9ZD8jRx5l~ovCKK71|JGDch!E99RVoHIi94NZS81h0Kb5+tMn>`d zZ#j;IyEOA}oi@t}z;U4(EV5WGih~=}Hkh%1tUfr|{u%IXcbMZZrB&s{u39ja!QX)K z<;NM4KLB=}&Tbq~frzTaq-s;sRlDoy1{G8G4GX9h^4a04W<7)07GxBa;nIjRlYL^a zMx+;kSN9VU_S#52;zbW@ry9+of*flTD($}}lC5tL7WsD+*}6!}=JF(I9#d*iTmO+z zY?S{;LV>&Y#;6fWI2*W|dvkswx=*p-V-Tk@fU=Y|U4;2Xh`GMgrPIq+9BT8aU(k^m zO+5Hx5oYUCEN%pbe$^R)LTPVR_O7&Mi)s3IueBuZ!~_sHUnFvND-)uGBD zS;~Q`flHdDRyfYT_g02~rv$*9U%|7$kkCk?koJ4IjA{-9WtS6)D>gVkRaC2c8m`gLD26LaEApHEz{vhSVDHhPT=HY}q4ms7E?J58#GG(# zlncE`Ty>hSd1hYuxMbL`_;N}*874&38Wa%@J>OT9wK%N+!KC@?Lm*ST5g#TDscdkZ z{>VwEnFI*jRu$)_9~-XkjGAgyCP`MNqk#I!kLP{~ov`21rm1>2v+>H2uoKIOl%%yy zMCuOOIhViXtY_wi<*dSn$sbY9(2n4_pk|l0r;n-K1j3PTl|IF2O{FWV;Ne!^j!irGim@;Bdui29avAc@h&z#ekE+m!))vzX!KiC0k%DO?|fi-2axWs4L%ycQsv~B z4Z5h5?bE=?r-DM4xVxg0plkx*MR#P!D>Fg^NUUdU&fA8Gv!P_Fy8`(P!NKNY7LVXI zTpw>Lg=1T(0-80n6tqxfeUM^7%PjVXJ;4=k!Gb$tR z)(z0HK4SG-e2_s=oA-PBDKCiAg-c!2i53^EVB8Yeue%?8!R)sF<1>sACq|9xMlkm8%RvVZrY9eNvqJk(l%P=#71{z0$Oggi&ZS zQ`Qml*P`HQYdW7^Ki3}HD3K;oV?*Sj*)J4JEe{Lkwtoo9=QG%%~c2vHgcV(YpyDCrb{zGbZU|DFs zm)I0|xM4FetS0lQw%CE+vM7-rZ=ehFReAkIf-C`$x_<`tDT03EKJ|2QOVuUyI}-|k z{M8jZIrSz>Ev4N!7($ztWt{Q8m7Q3+tG=)H`jSOoN#Knoj_7zxZ*Ki?lQ3_m8fp-= zt5GW`;Dbn(E`5E;j_PlG#8oq2MY!#!GPX5nPL`?7_9-Pr^kCgX}vzcsIlytWp8Kb7kM)Nke}vrE+HKADc`N$w>u8oWCL3;3(yWMlqYEK}NU z?H`^-9ysMg8W;9HI>tekR2OAZyX=x`!_z9k3x3SCH{5QrFPevWPAD0r3JFD{wgLDH z6s+@_e1z_eUEM(W<#xjQx|Eox{c5VF@XpS&X;f*~LU5_$^{1Bd5>k$fb<9KxIE{i&gB(1%MnH4%akEw z{7?A`^VDs?|L;Hw{q*Ph%n&hCs!#Wy&*Z&KD&xY>pNslZ_^Z<<42KxVDVx@5UHmh4p}0fAk7%ME>R|VsqY6Bc z?77p?0X=i2KiYrdJj{Ob)^%B}`0oLX^rc$ImnCkQ87J|e&WiW8QZgU0eHE0$QL|a| zMV7Uvvo&*BhfBnz(dUh$VtT(gDD%9blU_N#ZGhEQ5o~a^Mko^tv}+u96<%p>rW?uex4T&)L zZmuX2O1LmHHlUt-0b9`Nm;P)4B2zZob>(MpYNkfh>72|>eJ;Ty+cI?T3#OMqX{MmH zEKdJquqeD2RBq|Q1WOc6c{zp1&UM_?c{XlX#E<_o_@)Xu4stm%SyJ!w(z!07)$O8- zn!mgPakSc8f#83i*&^1SOOd+;+M5f0>WTQKA6135obB-X3+2fSz= z*U4EXx4bOW&_WYI<_UKVC1HxS^|x_<-Z1~%@10yD$!GybHHt8-2`aY})9B51QD z+t0*^5CA|!gnCpsu2>3#v22V{Qh@*chA&iY0as#>hi7nd9ACKrw~&(lZ%P254A%pF zy;Zb4%t9AETf@5bM08FZkbh)oSX5ewO%m+9`0+z|C;&jApgS(mg59@kE~L$6R_yo> zMnPA_$R%Fi_tnEFAKcA<9zGf_N>lAwGQDbzgy2F9 zJpw*9a|V=UZcfKVh8Qm#3U0Fr31EhL(e>VTPNVk zRVchF=keV7z+X#oM^l}Rf{u-W)JpzXt`;X6U&Rjb;76+Wkc?(O5^`HTkB?Oo#RLWw z2Q(A{{0;--5AQ8uQQNOhR@P#HZh7zCI*XCQ`3iY!(em+e4x+ZgX6}~OfyEoX(ctut z5E~kslpY`FT@P?aAp-6U9N=(TE7hYk!Usr+UXSx(vLPMNnN}Z)s#v zE{ksUqLy0q*4OG6GViZ-RPkpf3+SFywI#XJ_NH%azI$11jh1~Ug8n;lO2bY0`1FB= zj`k$a(g>?opO7lOG@Y=gg*u2yu11QY|AE+T7Pbq|Ff>2gUsTi31Tz@J$GH<5SH1^- z;EQ+<^PV(lgyR0ms-4KHgYYI!F0!?|5s3uyjKHZzfixmC&}l9%xIBaP%VBet+kIPx zl=ZC!=B)>E1+6l)ynr<^vP0(0c?Cs(F5V_J_!X)%w0!1C@AdMx%)_HW6L`+A5|UL; zdN^kc#_+3T{yKjWzKtyDCX3f&-*nehGMcO_@%wl9N6xR^*?Hd)#wq8C_#Aw>0kdU; zPo`?BCeLvSoG`WP^T|0>bwFo!;1lr=jBI9S?^=mN0Z?pPj5-@mA0a6Zttu3u)M+6XBb{N~ZSA>lEKF1NN;)~? zKk_}jkGqJoP&u93H{*g^l$VOaw=yNomdw%Z`~w@+hiJK(k(YtrEVjp8_W4;DFo3Tn zbFMSsK^=QK6_eFn^G$;-t9&RxuJL~7g}fO;fHh1XCNu^#Q7H7^lyxxQ_FuF6{;NLr z?`+J!n(oJ5cSFk%!+*%S@vT@v5}r1L*RN-gxg;Q(Lvti1DfqY?o|JQxw8F8d{xtc* z#g8lQq%ss;l?}-EsxQCp20klnkQ)Gl?1V+(bt;V;U6LvC$sQ{qo>a2%q(G*E7XIoe z5#_BO(*qV?q_@c+F6{<$BX=5=1-%tLWW^dks za!aDo$q1IB*S(lE?9_SRP!r#OCDKbh68Zj_=gGqbZ29}N+Jd*3ByHZvb9(`|hoU7! z<0#gr0vYSQ{kX7vs?6$PfvBdvH?JFJ*r)XAjPf9KQMx%ncC;QVv0E8iUvc-D`6Mwu z8?umieEY56*L6wv0*=UPZ%t=-)Z0f$(etc$lUM-F-@_h{(+gsiVz0sg0OYgT&3<3T z(PnpK`+&!7!7@3Hzx^P~5z9cmJ(Y?8Cy2`TF&p=EH_tcl_<}OdwzvU@Z4|!Z2R-T; z(e$HtPKbnHR5){BX~zIH*X6Bs%G~qrAfXH<3O4;-BD?!1OFN$W-|e?ON-kcaqKnQ8 z6GB9AJ&3*@?l8c9%@&pL4{cROt_7`fzd_v;nOd))jfSbM9<^wuoC zHw^P4{-oj;u$`;p=>o&}=OT`(=!fk;F-R036#Tb4^e<4@nzyd({~HvRH~#+)g^5A_ zB?|jkbu)`6_4P&T8jGV)CkrQ`-W_r4W#p}>5HHmhEl@qYqj`Tx)?%Dx+WM8Mj|;dKWgh6#05 z(n@Q4IktXSK6XJRYDMeA6y7OoiZ>X5{%H*-KnYOvapF+x=&5M3Du$g`B!^gQNWql2 ze$BzTd}JSX@KbtI2y>O;Pzo?EzYSk0T29KMey2X_gH@3r_p-VDWE+lXD6u!n)8lyh zdsnOQF{|qabved)N@J|yOOT#QKg?eoJ4qE7Q{)$e`8U{geX(gZ1I}K{TZ2xM5>J;g8u++{t^lkbcVj=1|^F5DKP6^$-`9e;F4dbBKtQoYV9-@M3|S&QvVXY zF+u+yV5b;EwXoP00i={!6T6D>i*SOgYht7c-w{Sd!9K7y{ymn4)Rgg>zV?I9Ui*h? zIs$z`O%darXL<&sU| z{{r$!n4N^#D^;C-rmlL@gLCZHf`@l-n3@(4`9w3xs3t!9CPEsz9;<0znXDcW`x7cX z5PD2wt(IU&%!W9~uurSn)E6Yo#~f)UTeOc<2}0MB30O5fvme=Ra_|#NjYOP~J%x5;F;BqXZpb>a^WACQ#d?`j)ld>ul&W-1v$YRMx%fk%fXm{QpZ%N z2QTK&$cN6)iy8v&emHshBCCBL@N|WdnB)Sh&J%hwG?$;(p+(ij!mhBNQo&XDaTyRC zoo?2{8=g3N2)ndtsbA|Xtln}=WxvS#;nyAH_#^5(&_*Eqy+oA3RHlQp1fNBaj)WE` zh$$I5`JhE6O^N$0coHto5{$BcQ0N+n&Xh!^hdWgv)KCr8niqd*WNB<4{-6U-j|&%W zRp#GvTeQ@v{L{9Eu~p{|$4#9JvOO|rQGB6#)S#q!)jvEiVvy{8$eC@cXoLJ}HqLp) zoB5|}UBrHF9V(Qc)UFtOuY&`5cLON^V-%7=c02*k>E0B~Q5LI)<7A359+H|Ea=-j` zjZx>Ywh?_sB8@kj!;TYjUd=Q-v4ZO?Ka}^Q(Z6Y*mnT!ku9ZhL!?EjY@8+m|7Ci~m z@ATbr4~lYJ>`WK)ke)Ga1e2I}7u5MZxs`<%Nz1n9VcY&dbCfhu{_b+b`*$zEHoUA6 z5I;AM)k(=qCwMm{aU}tV`gtIZ!~8K*4{v-@u)6k!wlcuerLnslh~E=5;J%=W{Bv=b zc34(!bjZ=d#okat@p15GZKQZrN;xa#g;k(K{oS>btkqsq`S0YkpqH~>#R7$9RC(Z5 z+Hm`KkFRNZ8G^2_XK-Kc^laqRwq>cxfSkxBXAd9Lre1nm)K#AISk5xuq@0HbLAk1L zYo_~EQ!5*gCf5ZddQq)uk)oQzzS6?d@N9pLUoP&cM>diFEub{e9vYF0HenHEj$MQb zKGy2PjXt^yW&C6dlS8bq_};!H_{CWUV==|!omr1i-oc8b@(b$$Bl*>Eay7sMlHFZ~ zq=YE5ec1qFfH(uEZR4QCjKUucRyiYJzg+C@l6lvB4ly^@BTV`#2>E@@-PDAWr8+=e zxR}k~`k&Y|N(sun1qyNpXBUp(%f|^(c4Z=;$IyGSW!Bv|lv(KDI;kh}(|`mkB-xle z7$f&~JKC7PWD9g`!OYTkH@DHvM!7Q80v>B%2SBk}+{0jT;h#t13>(BR9dhS6R8{{i z?A+-&(|i~9tQPSP{>>4%Fs7q~{?7{D|C)cB)#dHSPV5m77@YnN4ZvS;=<%{#G;m7% zH`u9jy^RoQboO8@OfAL<4al{#<965u?pf4G>W05fj519i*bRQAE7M_U1Dk0baCQ#& z(LcgYKRZkpc{fCv4Nmf(F70tgH0L(r>RxHgmfT1xUi?EcYy)bwsoKPmd`{a~rCPyL z0RU(u1*q)$!pA?Hw1AFBJRs}6E)0+0PESP4cq%8 zt*r6;O+{HO86Aq0k*(!+Kp2u}1wQkSPMCdi?_Jqgk;p*N_X(;JuNNx`$e?Uou=#dI z%indtE{=}a?jG_NHs3_{yZzsL1jm1&HUG?%)N)*94tl|*bK#K|+MQfH6ZK?1s&s49 za%-X%SCECwF@#rkCyi9={T{ts<}Yu+7L^`u1UXg{d(Lemhh8<3QqlP^8Np8fKh!h0emHd$c0l;rqx86y}LllCNwy+ zk((EXCv{%|+?nRtmv%CjK21+i2_f(pjN-@|P@uiB95Aay|J48!FiFsdSr`By19$k7 z8(28~R!W&4y?F$ls*gxRiaf4N0<>G2(r(1UJz~WQZ zm-82Yw0oY(n}h?4J@ek^|DaNfGowbo(>hmQNvp4Ig9H|~k|G~4ecmEc6$a$l4xt(@>z-!tpedW71T@tg|!v90x?%{@$2F!Y04@}qOh!642su*NU&;@zj9cwOJCEzIt?YWj%F#|>3{tJjOdH_8>%iPE#5N?-x6s4p`Z51R4)NJEk}0(&wv-A!pU$o6dMLCbpa)2B zH3OPN37-2l2U>!2$cTiH-J8>zB;h=mRS?pkv}KN`bW0PdzEO!yU6JvW>dNOG$IOy8 zK0DX5>`@ScmH+H zI&0RPv-duGzdrB%{_G>-)mx30D{*z|iz#fBPgqGvWx+FM=G>{YTB>Ms&u_S@5P-(3 z^czp{pLo`OoLO>j6@V4ZJnn_gIh94$wV+ID5#WYPg<39zEf|$Z$G~gGL_x6GSwihx z&-TLzIHaS{$kx{+8|w=RE{i`c))0e?+Xf4`rG1=i_D34^+1Rl3c52*71v%S^5Bg zR4yTn{L@v;1OxBK-l_!v9*-e)wY+FhTW?9zaeY|1) z?ekU`0#D>DbH(P2PVu=JTvi@NjSK3~1NH=^*sYsA<0Q;! z*v{95A({W}R$BLnmZ^F7ce@g>zxp*pl--|)lj$M@$Z3~uDm;!TV^fU1yv}cLL+{>W z_N+inx?_5Ckq2Sv`&>g2&<-Ec7|pj$xoREP;s`ilpDx;wMjMxm>tzRr0)Z_Z>1AWV zd){e$;w3*>lX>~kX&4JiqxtrmAZqIpySg|=?gG0{O7W42zhk2Zk7L<~G00v_zJA7E z-v1$p3kOJ>mmiqV#Jf54NhtrzUMO`A>Tw#n-ALDcZhSgx@8(*J()w*5^t^$3u!O(d z+)|`e;eB?omXxh#d3NpH7U?9ako#8F_IKPHz08!a+ZBx6Pg?jYp-{Yy~p`3ufjx zG2nO4>V$Q4cCVQkODvLf)v!0 z{e_ALkebFYg5HdlEn6zo-T5JJPU)T_;Lh%U$5Gyw?Zy6&NjQ!sMu%^ zHON+B_Zrpsvh9Yh`$=q^bO#EnHWaQ>F$YBpkX@Y6XNNutG!HOvv2t6!?G{VUVB;wBZ8b|9O*{c!hb@ktN16(TJaQy5JZ->aLm?0G>We`(`fzwiy#<{wdsPGt_=HT%47uXHxOgIu|SH z{_Jc^;dNs!nea9*OwFc*&InKjajror|T`2B0@1KA_z zufC+R+9N}GF&K5v;ddI0gWxnHHAL-`;3W$UQ^l1Bv^kx1yE3T`#D4BFs!`$bDo~T? z-%!c~8aJ3H78%D>eQL8=+p2?woy@2c7v(nWVQlngs?$hKGocK0qWhf53IxR(MwYHK zHQWW;l&x~|+QS&M_k1g3w?0eJD1|-nyz=dn!@*`7)rkWY4?wi&$8150Dooc%-R$Z@ z$-sV&&Ya87pD!PG>OBWwHZp;$sI^ZmT6nf0UR7msgUvzPtQ9;LGd45?3n^oEwc$BW z#FM-6I_nDrOQIzasokFxIak5<2pb5zsWE5Qzp=!-;J$FNHSLGc)6GC!J)@nIkV}`2 zWB@@`SD65iZENMRO#rU*Bv>%^A=Z(A%WMO6TG#onddCoSb+I1E|EaEz?UJ3#-cKGQ zcGkMg_c9s+dF-4AIuF$`WbTw{(-_hDQ+s6Vqp?*Vv-pWa0PsnJ2 z=~G#>c<+N5xH10P7Aw*h( zl;VspN?Iqz%XZ_vb+;+B>FX`tc z)a%OUEj9CM0nXy1Se?sd`TSkVv!*u4{z1tj+J>_aMc0?v^ z`&yTg`v~h|otg2;KH$iXgW@yf9juh&;RQsb(LJSr5c&rh?kxZR0mROq-`SAjGJKcmR{jsmK!U4KW3HNS>5nIY8AJPt zIANRT7?2}c%jVz2GUBQa?>@jee6|y4quVsB0U1ljf zDOv}QLQ*A3b}?Mh6Zo07Sk9MsJEZ_V3(3zLHS*qbuRZME?(Oktv~g~ldSK#4KJ7Lh zqbnA*MJIT5Pk4V6PI8wtnmyP@?p(yPoMTyQ_OCY1scO|s zDqdGj{jB9;B-C}5ce&{X-(zg_eVU}Ul7vky zjIXU-e9s{Ndlp|`+>eabI34yPET$38rJvK;C%Fmo)nTf!MB=$rvXLuk^Iv`GG-PCRKiKS&LZ8P`3^*s%L(( zHbZk_*nGz_KW@>^`kq`M5qI?&95bHJ5Xd74Srt#fgOgX%#F2SbUlTGrPfM>}i-rk$3vHd!HzHhSIgS zO{+A`HaUMEeQ9NoN^4(Uk5p2f=C&}M&;iF(kNFf1uia97qmQ;hYB?Osvy3^Zxy<#Jo2QEZorzB!r4`PY=I7??!`f zaHQe)>r3ury=&A?Yz$Xaa55`3MehJe}F=d|? zcF|^Lj%<9jRnqKbf>^_< zE=59LxJ;Oirdnk`^Q!c?C)54w9QQ1I3aO>VQ0o$#RvBjO=Qy6?Dq#&3uHNK#B-Hs2 z6zx4&b~9C+Y8)RIpYgX6uQ!m=(fQmfTA;c!@xYIXvrQ~IFkyMn>XpxWy+p`DhF1Sh zyrx@u;qr+nS5}}wM!K#(JGsR^J%EPy=zZH|Lw-YR@tcR=ypX(lT#k-28c(_P&%ajY zZ?np!9!Zj#tSjo_B$spr2zC9Wyn%_2qa0o@O%Y8LFMoBot(&WtnjJ}ee79RXRR-P4 z@IhA1%G920YVlVuW<9arhZ3fiVK*)p^4VYW7x+07?Iv)hcvOYG^+&Y2;!uo9FD4KR zwsZdM=h}+&8ktV}urm-I(C>CH_;yy`pM)Z5NpDMJBY0L#t*QzPcd%pPsaU|a)l$H+ zR;1B9~Hcol$1u^VOWF(4R6h|EN^4 zs?ceRCvZJPBEiFB>iMEl-sis0FHPOwKgSap%H=2{l+2^9q(vS!6E#KO7FOihC~UBM!k4E%AjLm+XGg zL-3eP$)b$eELv3SuJBR)B&dLGmFE=6Se4ZnDA97x;C}fd1?KgMaF*E#{7u|td!ACB zX+H(-Z)PfypV%AOhvkc&V;!8EeOOum?ELiN=@eoL?Z!LJu6&5SRGjT9RAXzIi8u}i zE&iUuOrAc>37+^_$L{cfAW_9IS9T%AWh|p)K7AiR=ZPih%m%_)r&80pHMwYfs(TbB zjwjO4KS>K+Fbg=QN|YTb9YG7gwjL?o*JD|lDDh@;dh z=yMhNm6D9Wo;(g?a1-L`mlLy;)IQ7KC;~vJfB}ga$S0^?0Z+%b)V>_Wad}w(0P9P; z9ZJnc{#6_67oV{2y=8#xUP9x!BUqyNMlVyPg4`va2qN|)78{Tz0J>|_ z8|72WV;hZRtnxbPXV1C>Z~IugsQ>_0N54)z!jf4ecM|egl(Z-y?xp301;{c>VUO9W znuVeTPD!5uo1bkh-l6%7n1DdjZ9ccb3P0P}XKIZGCeGpNCp2A7p! zocYfww{kWb?PI@lL!O|C`9U`Vo#gUQ7t;#WTr4MQ8pd(b>$Zc5wG+Mp9Xbszt)lq+ zdt0>4XYGQC!I3WZoEB4p^tK!X*FGAOjxym9{o!(bt7(mORchk3e!C_9*nBdtv%wjh zhw-x7eGGa+3-3aRzt?8A@91b!%N9x-Y^gk&Q3xWIG2~44wC!N)iJGY!bt|EWB$io1 z^3);oobyil+N?!)$8mFaME$F`6y@d{R>>b%Mu=SYzvkta!5wRKxmz`>BL)+TTk@@g z?v@j8eS(Se%*4D7mjs!dhg!BzJIaG<*cfH)J>IJ{abikM7G{C?m-NwpF#+r2K=Olp z+(HT&uadn9xhK&T^6+#F|K|HZ8wIp8W?TE`FcCI&DY1-=58-&zLb5jZVbpQK!xUa7 zrK)vKK_5U~t%C0+A-^T54DPu5Ogl8DjY0z5AVDu52e5SS~fZ>=j zQgvo7;O89aj)_IYzYr}Jaahwz&oU{`za5|)p2wi^jB zv9;a7;QaH1x2?9sU4c+)=P3`{p|k89>|P+9Pii9K_b5(p&Zhqp-JOsy*}{O1zI(}A z!}EAt#=Xn8rtu8bx?$RptYzKXtoahA<>G+{wVJ=UYR9uP^-a}ufdOI)8Yig4r{$8T zi*D-*&TCmSoLB#n#-IE<@Si)(zZn@%h5V4aue?R)7aO11kL>nL6j`(lEBJ_4{?bfX zCC@2f4VP`rU)LZ1J#vhUGV@dmbFox1q-bNSIOLX3n+mNAvFLb+=Me5@UH_@=H^ctu zcZN+uT)t5$t@+7{(ZlMKs05CI-k7rnEZCP1RXya|7w%t39&z9%?Q6kP!6w|XJTESy z-Jjw61zZgQ!cbdCs?-Ge@Q0k?j2;@Q}RXni)T`%25F zKLO8UxtOKxzzjU946dH#m}jizxk{1FIlDbj`crMMV$XxbOH^AyjBT#le&W@4j1}I8 zyI<&ay-3(qa;I0B8{&aImbwet+nk+LjmSOr*UOg6xB9+!eCV(xA0w1uFeNz=c$LJ2 z_NqH*3f`x_!X}yenoglq^>INO;T2b|{LN&{d?$BK0sk!bB8|mT*EP}jqSX(FL?$9( z{(`l#Y83AJ{=5}hBbttx*7b3lLWiMNg7(kb*bs)VLE>_gTT7C(1C}Fx%Q~}=x9ihT z&SmM~*wys{HFd?V>E)gp$$5$HMC{RbUv%+&HaQM6LpGHodpuh>+{86n&cJ+Eh-(Bbei44NIR*R3T<~geJ=j6C^~g6O~?KupVU}`-)Z;6-Oi|d zfS$YU!?HI7Zo#hDsWnm~hhO#3W8O6m=%+fD;S zs9!d+A+e-7vh+9SuGF~fN^`Rt<>?a#MJ+ZdhBFYxQhnoJ<~RTViO-*J1jyn=$XqxQ zIRc#2waY)^gR)R3CLQA`1`F=KtytpGQ}z>lzTpddRQk_yWGDN#nt(9C3T6GZverg7 zP;LFfpu@GK_G(%>Z0xCs(-kn&6)M;*lPRy+o1ixB27z1hdJMP_TB}HFF@aVaiZIBIu^) zG!yFFsGv}8{4&r7_l`8jgP?u z2CF6AIXo~4#ovYp6J|JX0O;(juP26?VmHbw)0m>m>t_d=4Hqr0?oMm|M7+>i;o1i1X5t;j2YfW{zx?0jeFWq|i*RZDI8*VaPSfJ=GVSH!%gLS^tPKC;gY2nG z_y}5(PodaTcYO7OeQ=QVs{^rHki=;6`RjogP!n!*Pm6b9gN$_V5=IWsDPhxWP1ep; zBI!R8dJ!$_|D~qSH^!1rC2ww$yU@xpZ5i5fZ)%;Iiaumro)1D53>J#RU(Ki@f|0jR zG32un!O{^AuEQ`ti;cl5*Uopd7XFN`6&qCQ>N}a>N=?<&8w3KS&kQT7T32RolBSs? z`KA|dUHdOt|MdC)ko7Yg^*=@D`$~GbFt^5|H{REkUxr}ZEG(OXt4I_yk1UJN;@!yQcr@?5JJe4aCKnrUMxhHFjQxYGb)43?p+Ob3(VEtXQa;N^5>HV!;PQ%r=X zjuCD`)KZM<&r%Zg+ES_^l8$LJ4f9H5w8_VwG)gjE2i=S)Wfk1HgPwcP77TdYv6;V% z5xo$7@qR*-p;NnT^_TCn}*X0Upz$IiB$X9x;u4O)ae5}lUCUGJpj2t6z)_j}3 zQg!` zeIqJajm53!RX(ds_?LQ*kit}PLhOf>VCu(Au z#mWC!EMUn(B8VF8dqrRi$gy2J>rB6|GLrL$1WqEDbscAlq52eN`@kyn6R?=hc4r<5 zZ=?4&Axv%9Q`c)^$l`AoI{5~`#0C)48^nOE%s;~ExzRumlCq!T@d1Kd@N z_*F0Y=5i|H1F?*=Tw8@&-b*B93aR+XpHNfNeWrl6?_@SzN@OUulY3IR%NfAP|6 z0rV<@#g9C57yi4AjQLczu`P%$j=8o>_|*PN+GgJ+^_EVQWjTq=#EagFmRP;JX>w`b zRbT_Tc6Re@sQc||XYADFX_A)F<%6{*1A<^;ANJPE-#QCj8#@GF+IISq1S+EWo*jKT zIV9p@rbBZ*kzu(nIOOWYU}KemYfCaD)p*iowbo{yXsP!F(OZ7C&NQJrp@1cCbV#sZ zxKu;LjFKpah=!ooVGt?#`XnjFu5tAe^Nbuq5(HgHnJ@3o(_4!NKX;i(m#lC6FCE)0 zPikhV&7eeW#zJMro-^A}Ji3Aq;BJ1AtwGBYVaGu@C(tb&0|-oOIXxl!#BWx{XxT=; zh)NGeB^QU^%|$E)DA-}jeF90&3wl<^Jpcr@9tV~#ORPoAcUC5DgSXk2$L5e1(r#8Od(m&C)ybJE zRWBh?VHKPY0TaGN=}la}J=t*l9t|GQjp&htW?R6JT?Lbp9!`Y(#q=4nmocP1!{LtA- z2%&*#q7S`#TCA_*;G**VSWV#qo#gtKzdG}`oQcbzfdN&=_f#Pi)L~y^hFE4>GC%!Q zSa46=uPU49@yvP*bkM#l_ANuHHGB9-e|K24<-DM`RTO!R8kle!q-(&%g~u@*)70O9;fZx8-ZbW!mjp5%;Ktlufk)7X`DJ5r;UvJ1OKkum!R+F@8mAJm*y}e zB4WPIytNI?{OlE%Nqu|ua!~hvgBf5&e0lx$+lPS*|Cr&Sm5fw%f0M?mq7spf2WYr_ zQh9lUuND(MD2(hmpsv>Of8A9x89W`;{3W+h|4hqeWj|b4w7DjVKLnz8@+^)Nv1Zmj5@3<^$M( zH*ThA{pTgniPZpvTlP3n4&&{_FPkZaZ&#GjW*9T5ThwAw2kk}3rMF7z%T$*G5RLN( z=^i=Ya(InzKt@G6P2#|#Ys&y|;6NVYrAbV-SeY?U1i^D8XkkWGSs^@g5t9vks4wQM zS@3)1=7YbBG0~V-D{WQH%PLaDklunQ{(-dwqWO5=%}u;HQ+KvMmIj1~Sr#AFCwFx@ z#iG9lNEW~pN4xCLkzCAmFQ$wZCG4wd0$~?@PwyUitV&HZS@(IeRvx^q`LfDyLA+tB z67cJg|B}9r^~R>dwhGGx7m)MPe@~a8s0<6`8z5jdx!`LnmhCSB_^7+XWFlr?8wqWp zNxi6Xp($VQA(7ecEDC#GIrIVRdwW>Z7NGoI(%(bQSUY53iSzG*zB?;%Uc_YyX>ShoD|daUp;l6shGG-ACZ5kq zda<_w2xVhBXn4JoH{c+c-g~9_Mo!CsJqI2B zh50oOu{!q=@8xPrfS_k@r8JUGrWJ5=?-HGg)F{Lo879cJV?F(j3SFXAc}x1MuymX; ztx}d$QuT*)?&p-8lL5OYW??Ln&jj{%8p^2Wi`r4VV(iZgy1cF*kzoTyql&JIp-Q+j zhX+4i3!k;5R18KJR1R$uyJ-UZv@s`sh%0V**taJ{Fc=WAowlS^`et?DUVYOGN;kgm zv+1+wu;@v|)xtoB%s6TCNOt8COdnCSgt zpxM*hlZld?88GT*n_p(dL*rDHMK8mA&+J>{%X0I#t{diKa9o`HbLc&Vo2$o5!}Idk z2ca?ckkiK#x~IBFg(;!|3OM(hTxko`=}9t}`se-*(IU4D(t?kKzRNzMA~slLV;%!L z9)RVzzSkBe81L47e<>?+esi`{t4mA)rm!igbdE#V+HvsPl*HZE)^L=|Al06Ak2biS zf?764qe{Z;uPlr^vyRWL(w+p5N-F4%7dMNrOFS2O&RC3{gFX@wdCt?MLy*Jc4x=fa z?&Mnb;$(Yqyrjn75j}Q#4w+6|LQ>BcT6Mnf$tiPV7s!Yn`l;GlXFt1%1)Vik=6(#A zQyoX?8ZMts*bP@pJl)h6pM~Vt-f1bI{WWDzyiVsbw`9yf>r@tEP(PGn!|mffP$u6s z4V(yOwIILa!}E8N?7cfMMGoU86FuOt-`t9fBQvas5o%D1#-qJ>bneg zuIel50o{4*bT~vZ(JmrHyhF=*CamiWV2VdXo=f4wLxZaR?wf5d1S4bt`rWz^v$(c^ zo#gTwJ-wO$2N23FtB1Kp}~Jv}AmCanD|PqxRp7v3c=kRUho@Xl%TExr-jXKi&vf=?)`;~|D2XGqr4|DkAO zC;LS#h=1$HGe=aQiaI}i3g{)c_zy?+hpiN5BNuv-^IEoz92Dm}N-xWw7l9#12y%Y<_Qi{WM&p{|80B<;dL`!l9eA6!J~JY!FH3gueA*s?1hX)tFO<=bz-*c z5XRk+mqR{7n#)6};3?;Zbo2+VZmk>{mPjAF>#t>&iUbS)dQj;dxn2gjtYI9ZgVZPH zt<^%%LyqTM{of2L6VwaU1pmUS^522h^MGZVgn)d#i6Qjo907`QsPkx@z@WyZ7qdZ+E@xS-<_f9V#y?j)qKt3;+PoB!2!-1OQ;8005Ze_pomx zA}BZIZw;J-u!Qpa_wQFW=0A89?HPkg{v~$(oB6_TV$(1&BzT+a2q0_k*&03CA}~DT4)d!~QS=DNANCLm}oV zd=F8>D(1#T9CnU!Wt6a=akL?`{@cqC2-lC@bnrHI9w%2{%3OeJDOw}ReT{NmhsUj?=^+V(&8w(T|lkx6jY`x}47ne1LE z`0cuO9P-n#_BIhLlbYk63`sYPG`C^P9G`D?mP#d5L4_aphQvx13`N*#O}< z*mC?ehKTOsrYnve_)=U4=Ed9nu@qQhmiw{A+$KqjpuX84s@tCe?}?MYU;fAaO; z%-i27>jT{d`UyqvL{gMb~bS&u# z82p{A=T@iRWV74xa_eX}j}avl@%l0sTjt-9bfxiOy9(c4;j~$h{;%U zW2@K&U*_t8_h%Sz2*$s2(iCz}9JnVyfpGw7tPG!laEvS{dB6wu!s#JyI}@#nz&wXO zRvXadBJbHwPWe3)Pr&slNVegT@9vDkC~&1qQ{Se^r7En#V$t)E)jU94ZNbN!j#*%4 zZn=49Y46jY+Qi17t@Ie(%pGN|3(~b0?&*Ax9`APSY99DJ!@VQ|F3IER*Y)~>EPpH) zo6`jgb!)@6W4J8m_SXcugv>OzOH`2c!xMP?US7yyzca2W(^U?vFuk5j(&KEJ) zNJ2fc{2bwjXw}ctNdB+#SNLF;-U3(vwN&54b@eOkmjQiwi?2F=ht5Ju9FHAM;3Hc9 zf&eLa%*JqEec4F7p2)vp@mW-dC%Zn(rEz0I7SU?x5TbgLy*EhoHFPbd0RS`Dr@Q{8 z!3$EqL#)YVShdQ$BVN?+seh`UT|4b&o?r|KF`gv_x9>Vi)+Tgl-}&}PnY7BbcY>#H{(9<026mN>;k-)+MlPIpT-`(s8H{ z0xw9ed9Q4Bl>JSNDPOguIY|II9i6;Commj(QLk>jHJ-`bUSin&8;6F847!6a@v(U+!kNIY|(EB zV8~Spqw?{F*`j~g+S7iB|C}x0EXe4?%KEp{QJwHJFG|0Wt=vHV3YRBhTsryicVqya zbaHqmx}I+{{_MhK~31V}5 zPB-=O=eVZF-&N(GXdr9Sol_6RZbY2!@#7w-!{Gekh3U!3IoNu(QK%xN8~X?rk6S9! z_t6UfPrkWSEpEB(Wp;pwHYZjrK>KnOTn9{ifOzPyD%!b=QwtN-^&Dxbgd7K)00`42 z48p+%GC+<*1bs);AcCqsa$_T+Rfmfl)_3f;TEPjycPysrx1Sje_V)p8K=ljqnfpeH zp?m=%#q~t{Ym@GH8>k8+)T;ib zv-3Njnu2?=0YwqAqXma8gCZiv9!?g<>iBWZhG#HY-uEoZnoeZ?S9(M1?#h)FM2V~i zlX!bbfm;2M-VFjVWN=IowDKxc1X~huL)cJ z7u9*>yX@m*l~+(HQ)UnXU`UGM+&=pHf$8HY8K^H4A7JYYL)X2w4`6cVH;(S*)D^Dm zffYO!1H?GPaocC3s^#if$813i0k%-g(ayn10j3g1-JS$;T|_SvJ*e9+<84AiG8nv> zQuE?+vd?ev2FxJD5LxVw2>t&=pkx;(ddcY z2&VZcdY51TXi4g&jBFWQU^`$@Nno@x@jCA?4z76cZpWTHWgigBs+!{*`cmv5eX=TJ zO?4_AtC)+NGnLiWI^2Ph8_+V~eYt)C^ zJ8q#l&I?XFBQ_cF9%ga2aaxhwJA`?RxGQDLn+1SM01JZw@tbT3dw4uZfyt0+u8`z^ zi|LYLA*zPsBswWr#;PzMPBiVw>hc!tlY4GGWhd+xuS^d{W=7@$PX=~Q3P$Ej4bRLi zL%GsX-TZTP!WuKB*(q@%z*m9wRvLqu*5Wd5+LHv2GABdSto|e+(8+NDoZ6EGE7P^C zb``1lR7s`rRYiQUr+=XcuMYoOyN(lG1>if_`ESqw{VBUcFNx(6c8#%%u^;U<>{%Vt z0PZFHn}#T$+OT=|zIuZi!IjL3s@1u-W!bbs*NZ>3lB*dpa{TguX~OpX(ThEt$TPe8 zB%1p@Q#sb}v(MR^Wr_sG2U8@VD#KTn@?{KSmoe^;N)?8%8Md?s6HvGDljxvu*O{Xy zhFX)ysTyGe(Xr=guYsI+)zj_MULD09eLE-dje}aj{Uqw~)211V-kWw{8bkq{LeB*r~3HmRJ`D)I>}b;;+>{Fr{J0k4V7DG@lLH z5JVgD=ajNTabt>xrag4K$Q|IGkLIo&G-w_m_X2|c@KA|e=r;R!0e{8t_dy9!oqHuBHi0~ogEtlVQ>L^O)gKQI3_)M>8k=eoiBaD9sJBI{_{{DlWj zC|nkbfJZ79aR)_8xh04^u6KD4CPZRXKL154!*nxm#y~N!T=83#BS!uwhJWm3lh3&s z$qdrl4~~X8l#t;tZcwnV&;ox)3vTUwb#V#IayOlB*Y=`gIG;`_??t%}#)}HmHp{!8 zT2b73u1p`PD9;%|dYdY>NP18jG|{s%I5Hy|j3GnlLPts}g@;7~h&PsQM6-w7J`@Qo{#V+@aAVq`LfSuB?{yLDGk1*7FJ42Ta>&xgX4;tghZ*4W ztAwLs^iO|Fmj6*g^H}l~N{r0f_e1`dfZvsCs zdF|@98=dr~-V1;JQCX^T$qdYqFXJ&gvPhkQ4)PWo5Civ1Uh>D+#6l5)iq$cC_mj1wb|62BU z>Xj0(ZBZ&%qlAd=GOL{>Q+=Yt+c82&$C;0-h5*3$t$CK4vq}HOsSk7 zpa5}zLgE|h*|GNnUar0teJ1$-QI+BT)z0*2 z!${uIV(<_*!+Jo1R*jaQ^Lp!3=34fj%#PGU>?B)0!!kd^j-dOxDMUcVJY0uZ#03Fo z_w-MZz>8}+^5svjghH_aCEI^;deBCexY%mMdPvn%k$t4{iMkmIPhWyJt{NNA( zTZd=HX=II;aqLO^;ZXNU%U0<2gi_N!ii?W^dfJbu?gHBt9ONo23GhA zn-(mOC3;MtoGjnvLxqZ!m=hj!{ib^V+@ip`%j$A{PEYEms&ymw-ADzGlXJk`p#?ed zH}y65x&ytR1EuqYh_vK~F;@Ur zXu6r{d!J>p_@baeqMy+L{SR|%&nYED0v3L~D|((-`#`dN%-B)AJfI9_7rZ)Yv^(o2 zlr>Sj*9x749}ac2X+pqX4}aGt> zTsr{(IfG9IKYMrnFY5QHxK%!d^*JuUz*=hx5#ya?t21vvHo^oq<&FT&0spP-YA)^%(YOxB0V2{p|0Py~lyV za7#WHG?XYQm10k<`ALM0oJ4ZKPpM0D7wxHTfZl~E3gDB=rCle%_qTNsBY~kHq9c)5 zkVyegG{DKq`APauze|yr&6(jtPVaSez_!k5qBP|;;JZ%$&8grjqv?T}Pe5Bp(ydzB z5zZXxd(nR0ofvyS_wdEho-o`Rr3gSvBa(dukPUXlSLr~RK4!%j6S&J&*5hYs9=%)R z->ct&lOOsEqsbrx;wK?sSt$du_Vy(A5j$W!+_V*qjPV0ZUsh76|Iy|LE6xEi&??Aa zm+HW7p!U&*3*@5-si)P$mF)RV-q*0)h?l;dEd!jYSJI8k-ndZvp>W}QJ~+J%OY>Zl z=~4#Ym+e6ZDHu`btM#9N)=r(dbC_*2Q-E)~^3`9dTsC-sd^#L=OjZc&oK&JdScRI4 zk1I?XFkd|n>#KP>x$;iyop2_=8GldZe}LZFhh-dx)tb)BhqtWoK zewLpzq`$=C-TWhav_)Z?aMe7g7VRIHLz&|Lv6(8v>!-=J^{F)Nt_4MRyNQp(qwmNF+U*P<8 z5IT(kMT+O7+4;Q7W^AXRXrN>8&tpArv4_A^0`l2wa5+RKEjLPF3qW>xIWL2mHZQ6@ zu8>22uX;RyFA&b?pSiRkg;;sBV63;7(W)ajo2#Tfz5}FDctns1c5ZiXb>mkNbA4(= z;b5~it4blf(;F=eVQ*taS@+QCwkAt-g4g}+<&!;ye^90~{hdi$q}_XDxNykrD%K;u`kUXk<#<2l4fR2IE9G@GMo z-O$o0VRXsPKSl^TF}eWDQ~TUMSSU-TKmu-R+9REQh=CQ_F%(;IZ)rAKMf&|XF1ay?Vw zD)guQe)eKR+-2*>XZYlt8eI@&cp}GZkMI&Y_<7;Hd1F7EpaSKw+IcRF>nYg%jqnck zA#l!mTjUicK(l@Mawe!kq;N=S`25aC*sKfqtdx|VQ%ETouJm^vF(x2(JLhdBfh>iF zd&?ZQo*08oa!&9#=4;C zAHdX)351Q;TERuX`>rz^2}rwYKz>or4>49*JRv?sdz*jm|0eViKRDg83x!vS-`TPJ zrlu-WP9q7x%$<)XRVXwu**rDeJk7@N5TL|vQn-faQk?8nFr5NY6^|% zy~b3ukTS84-)nG4DDu&7{!P9ZLb@~|z=xZjZTC!}HR4uq);CWr{!ytY6}t#uIhH$E zzv=Bf(5lIkC)Rn_*C=Dvuw`sakNqbH-$^cHzti=!FV=XojwcUEx z*lRZLxM#yVAScg*M@Qtd3-%}Pp1Twa0KUKDh}l13xxS|MP%xP1_u?ClISG}&1H|~` z%RHttc{l{Md^_B@L3cuZJ5oK7G=o8rkZ|99{YvkqsryZbuWwK@nz_?yC5u5oO*z2D zS;vJ_iNr9%L~nn_X4n5HI*#vISU!ozJ%V{#OoT)5TM4$P$|ePbyP=~F#xTN+fVTm^ zgHEEQEwG5kg85!zrgfppC_B&SVA*OD3@Q^;&rN3kvfMg1Zxq3--L?l)X5B6i@TIma zr@)#BqbW1JJF_IO!*?oZ4jVz1bD5gCJ=w7sNfv!>IqtGB7)o|qDY%z}p|-Dj;j1nP zZ{aYE_{8<>({`%!h>0yqe=*B(+#x7}4$zt!Pq}?G_QLq3O!&4HV`9x51c@)8+n5*h z8_diLG_dF~g#~aE&I++y2Ie%mc*Oax?@%K+UDFiIQhy)(b99>kBJiGW&vU4lg~OvYhK|)cC-=`Dz?|XWmRT&EJg9 z@m%oGp1CDa0b1)Rc3!xvHVt>q!kUsNficXPu%b>L6jDpeU864^<7$EZ`~uLtsWVZ9 z1P+LW>(%U<2x8^LX&WykH$BbMY?vQ|r&~dyL6hK-Jy${X*Zr-KeL)w(YPyEh6(-r( zxRL9$Rth#sflK`Z*!$|?Q^d2;7&!M%a1=JB6_`i`qE-0y>Hbs#y?20fs>--Z^7@#e z-O;!d5*R4-C>WrvdxGES>E(!&{lSQj^@z&_lz@m7dLQj=;%*uMZJzW*_sv`w^QYqn zMG~~r2;~l;Wz4&nnQi#R2+Y%t-`KkNq8 zKDeAV72fYVowR*KEzO16yn0}aSW4W)4L|xy0A4Xc9#k6OI#nGqwXS-#( zNWsjri05%IY5eJIzavJS}vVy^%2vY zk`mVo?h}X{RkpJhFxp3bMZ0ypQ{v6vpK?pUKF!(I%kLlC*V@67@Z< z;?osv8J0No;9Z>1K1y@a96M~+njaDTU}%rn!f$=(qT(nk zE(2r-1?$SzBS*eoN)di9n#mzDS_#ApP8BY@F8N*ByDgdLZTq8Q6HnKJbkL4S&9*j< z(y}NZNnGc{GC0Pq12u7*OT#<_gY}bp%LCX@@z<&Ogri3$y)wcB@Jr@l3<~^P&;orh zSWV8tNmaa#aMF*yF`amHhR~s&=7H`7Uw+fITxV3srin1FFF%WS&6=oUStqokGHin& z4fXt#V~6IYOQt$MY9y&NE)xJsrj08ZkD66<_UO)&0fQp}SNtGB&Tf2k)?CT{DM6k~ zepM>TmfV$a1T+mI@~wO%I>O7qI)lD*hj%yy+$A)_Hj)xvI+~(4S0|H?etw2j6U&Ye zZ+mltof;!7I4Y}mkB+?8Y78>3C=catSn zYDD>QjG=ucm?a+??dRw)2BrsRek9^&kkf#zPxYzfAu^-$GYBP{G)7Q$xrUubyBarw zO8ShEBzvTEV2kO_pn&_OJ%{Ryq-m*^ez$gM)PMH^VBsHS*Jmrk_}a`kDPs7udSx8@ zOqtw7Q1Dh2CaAc?jAV-O1^VlHi5U4DwKRS#{g^G=@fY04ur#l_SlsJtV?r z%WaX$9nt=6Y_u{R!aEF*M%#t-T7l2RmA3;!4)VmQr;P41zC^9T#{y%W^@fCCq$ejJ zH#z%f(QOcf0}{gcGD-t@|2>Nps|B!)b4fGJTn&?d zkOpAdf`LCfzxH>2xa@kvdVVXpYdg$LPbTC^kw%5}Zdn|f=UM3H9s~h26<8 z*hy0m)0vug^TlT%9sh>d}eGz+O$%_ zTreu??rZ)FO1IrcezcmCIWN65m)%NWvG^sDo!aj;S^`)=CEMl5?YHl^F6@$l;wM$9 zGxKU@3b=^hK^;wjKRuN);DMqdCjLBfU-;F=6WV>;Holc+uXk(@d{L9OYp6K%({Q~%S*zY5e!GY;% z+;Eba0TeyX0bc!h>G;uAe}_kYmF`-j87@2xT$L@v_N{d|0z$B=`bQ%*=UL~9??^%q z7ayF7&sLlsW=nJ%Y6xnNW&cu(*Ci3+Y`wG_aD%)14vY+@8ZAzISzTuOZ0Fv|C*md` zB)>|nRc#;gK6()!+&gXD*bsWOFf6)s<%_Rei4P&EZHs;>M*~zco$VS zr&7~>RGUDeztai?J4Qx2mGOUR{(X@5CzV+4g(Wu<6M)^{Zv+!b+q&N$c1A;fu}6N~X(AzeFL?}?KAG7v*} zNIdt&(>{0hWGH0pz_kW-`UWut6I0{Tjomhhei+QG=O5@?U2;m$0? zBuBV35$E)zc4Wy63~|wz#btJRDGkTKI?vMN^VayiHhJ0kw2{)3H7y0b`i--oGej=I zi`jni7pPB3tj^`(e364K(J#ew3pe1tcm9djYro!@@!N#sRKYk4Gsh zCA=%xd-t${XVG=JpsjY`OZrGKmrNf9Cn%NQVkyvs*3;kjklR%~EKhEAuKvEj>q2MVQh!(81E5NI$}KMDad%0q-vlgpLlxOitGnzFC?I^y5A)k zKLK1DTCI8NW?$7YEa1MR&6hPibId4i(iW)L91X`g9BXIHj;SnG?+3+tljCtZF<0S- zHp>!K;7pG3>p*g|-MhGX71jMi6E2S|Hpz;!Z zwBtlr*kc}WJ&nQvOjybe!O@ft^PT@G6HKaFd1U#O{b zId0M6bA29m%ba_$M*{0VkPdcos)I+gzCX zVIRwrM6Rj1p}wBVLyQuy{icl$XSR1FdE#vVXMb3)(qsQpBmzdh8;k9(=Ou#Fd1v#c zbVB|*{l~;w@#D@CI&ZuYxH_u7*QtspKg{u#h@g7br$kRNNZbnYhM+= zfSD`Ke%lutjuNtNe<|XDLLtuPn?wS*GdH|q;D_> znomh8e~F)jwIs|qWcgiDMG$xy_V3_`%XHG8^)pt-kh|wz(l9TwrW}TC0iVqKN1w92 z#DvRqs_0$HE+x#)g{{!lErPIMPh&~LJ=cfIpR6}TEmqGT+Mq>(d~e2@;$b9PVF8<1 zs1&gKIByZ-t|xD2ywx%ZFY?EmI;bf=X5e{X>NU)v-9KRjC2qFnJ)z0>c_ zFZVm*i3+Dg`3EkyVM1-l>y5Nsr0891?zPvAn@p1tEkykERpi*)qqaqE+TCdNAFQyf zEsLk>efcF&jX+>2ktPK}3)4&8D68(Db4GcExt^_;iqL~UT&29B9J9sZf)OH(%c4hy zsbDHb)lpt)wyF&bDu2!<+fUC>CS#!p`cL*6S68C{v(NJA2hpql7tmvo2h>%6zW$07 zds3tLx(i)W^uNKRsJMclhwA8$^CqL&uYV1qu)t~CFIh+LWz%sRZTi#C2O>VP8o%6Z zoHp6-X7PL#h5#Q%pzi<)q|xh$43d!=EI#YUw?ER_ z?@@Q;d?q+m(b)JEx`d(~37yS&yAzT5zvtkbEHN`%-}A*XNj74wbD- zRxM?K*US=5G86Q?^y%3AP@My9^^=-rr;LKFhY>VrAcC1cDYI*BM9u2yd!m!IketgK zzme7h+n>xo_m7xu#vc3Oj?w68`Y_Uo_$6kFteit76JP)u#N1!Wwatt_P# zUP7jUDoCF66cS$)6%jgEhATXr(=XKht1A zQhhXl0uKo38PH_eEKd#_4ykRc3mh|;P1vHPrQ=%@m0PHbtD-T)J>9Q_R)&%r?pY(VGtqpG)&W@xD7qzRIwC4Cuo z4Y*Mgn4ork5ySK5xZqlWovDJCsnp&$hC|e)r55qiH(v?eDr64XqpiaOoO7FQNF|N; zgAWg9u&T<(U{S5kCKNzz9~BKTmn`m0zpjz9{^lUqbRb-U%r3d5>U_#~?gtJ#wnK8k zfHyz?v}C-Rf`=m^;AWF7KGgdX`x27n$a#O*`SRzTuR;>5;+t+s-RusiI}hWI+HM|n zSgQvB26T0|xGI_Io7M@x#KiK}t=wv7+~3}XP+e{U-T8Faiq=k6a6m5HhY4uWZKTrQ z7^Ie-``1K!y`w(D0g_ydwX)#vffXGYF^L~N$bUr?eOFl18CGn(J75strs1;}a-?XJXA^S4ep4H4mIZLa-^wz?KD}iXB&FhSso|ew=B-eN`A=Ds zqLY=%A3SRJQ`SjPVlk0FJsW0zCGg$Lo>gK!+En^a4j}~ z%&j>WeDKP%9X%)@<7K~p@w<|vJe5`W?&PMO8L9_aGEv!a)tK<-`V-hDw3C=^hBY9Q zOj3VV)#f(0p8P?09ZgLF5~}T4j_yw8ajPitd>^m55r5dVWqqb#mMh4fD zI8`z#k8XEIc_rqZ zvA^sZ%pY2wOFz8h0~VuRG>P%ekmDi0(N|hi!ZM6bMo^-R}hSHMhTT*MhH<<*fcjP}* zXd_FaLr|XLrplBxnS=wJsj>8PnDD}PBCop>^iR)l~PX(oSji7znTqEeh+LpNFI;W{%KUcS3V)n6F{8G)x)oXc+N4m6l0bB+PIWcJ7LBr-?psrUznN8AXj z8;aRGPsaA)TX|)SyY^b^b56l1sWJreMazc$Gt&zO8)k>*7RQICzC|Tv!wCsL zxxObl7~L*L>Bu%j-rhP!@_Bm>CA1g2rswLGzLFAM*55YZztJ)&5uk4PARD}9Ob9J{wHtAG5-CWH>fKHIAUubA z>k#l@#%-w2?Ko-;RAv2?c6m#0d9rR`m;ZQj)4SWP&Wy~kIz4JU;+#~Yx_%m$ zKJ3CH4``=L_(*A3<)sYvd20K5zXeBjsA{oIR@kr?tPa>(v+};wiqmCfHAX%xYtd+t zhOs5;2AG&A+R6 zK5eCFNtN~bhjx{vT+sg~xXXPRsw%F$hH(|)^$LvS%N2AzJ>;#qL_$(XOM}yTl&%PJ zYNFA#H&M$P_H@=irO{}k;kQzG|Hm(yDnpn6}05`IR z^&Q!0Sj5COuba9VOaUOqep{b>-+00|mP$Xgnpj`=5}s;c+rqY%CB=Nf{69xt<{(E) z&o=b?Qy$Nw*{WrAv?Zq^y?Dd^5&l3f!7B9$PA2AQhA!6sz`TkZr=yF`ckZ3C!3%yf zT~;T%t$N_sSQ4vIQgSM-NW92u4nT8HbDEsK;JR$vX8{>M%8{_X1hvZqz8AXk`r%YC zrDOdT%m)*}iFYS-lF3bJR}DIY34|i`4CXG-GLeb}n^G=`&2Zt8pvsyW>jsl|Y9GAV z6zjG_b;atLVSRI(o4;i%h~Bq1UwBn*v`X!)3N8g#o?IbdlwKsz`91jZyX-;5BW zMRA<(IXzEr=Kd7}qszcdg_ViN>72*hIK)4SzemT2+55Vu+9Wl%O}Jn+(`B6o*)GCJ z(`p*t%)9Jryn);^uA3z{5Z%WCm(s$&xh)qOtE`S)jUTIDGwvAU4zD?}0fvz4!gDMN zi;hgkSj5*rLRiYXKviU=7B1C{xK-!VICxmZ~`-Ygy*)J$!{b-BQ0 z&vrRqF%OC8VT(JBeZujrhC_Bh&4frsIZ@QQht| zkSMku>+dK@yM|<}qu%Dp(%)c3or-tUC8SLAtUjQ{LtdMHsx`q5F53z1I02>Q)Q)zs ze?`XnDHQ4&+xVe}ha#La;gbCZq3x*`w}f)|doi{4H>)BFp`mXaEkfsZR)4YY2ve^a_b4W0=mEP^yc9wHGX4&}}>*PY39dK*$+%gPhmJbKDsF_A8t8s)O=39P8@ z>dTr5cRq1mxqNG>HXpq0yBRBAeark-78o9VSBpI!^7fp+xaVqo@;!azL z({|@Jkf6&>gvz_wZT>`Klz~r4UpDA}1B`{2am%*~%&D#>dVtO*~ht>@8o~ki|y6%?MgRx zw^)gcKes_5aO}di+JLwuPuIyzKr^Iyo#NFYYb zRY8w(hw2VG?KMcPE4nIjD!)(aO^G1+#u6HuvkoNd2a9|@8`xOp8^EtxtAPc$%lL42 z`Ls{{1=yIFoa+Zibgj8~+p%`|yf0zNY$U(ed)}#BcRD#)bFak1SogNiG-2=3>T$t` z4Az{Z%{Q?uaM~Ss_qo}2gvqD`3}WHOuWPZkdcoyX_zzC@8plET*ea*7ew}N)W-;DW zZ7*BCu;054-or1dr8Z4ja~Xm_^rX@jo0b+YW8AQ>`{)M0gBO z4mwJ32%WDR6>W%n@9tA^Q-QZs-)G!bYk(`AG%MWYHR1k~W#=~csNZf~0soq+Jx2Hn zl~J^Hx^Q#O4h|12x1P5M12!~nb-D^|E@3g?MA8Ok;yQ`Gy;Pr5UOV3E1VH$j8`APB zs^73&5K8pW+`;jLBIM4>&R#;3-|6YEpeJ_ote# zKVaM9L^;}W*%2OfFiCi0w6l#1+E*MJ@sy&(gcdycggz+BJ^E*2&bqmSC;?=;| z!%)ctw5v+)%?gs{Lcg5hP>{7G{sJG7EA=JgwMs55^0CA_!BF@O!GTJLvuHQ|NBKgj zcjFe0#_Slow8xs9-DGx_JQ>SWc=@1-cS?I+_U#QoZDi)nIoV`xt?}tUlrs#O6tWpd z1)qnpiGcpBG-A}`sYt%PxXaucHa!NEwljR@Hd@vB5Ntzimh8oB1^I45aDXKY;iOji zr~bh_G85^)wub>1JZry+D$N62`wxc`kfnz57Hq~@xjEqp#CslnS04B`R`YzzJ@-}`b&KgC_s7A~ zH{mr|3jW;T36s5$y1Ah_t&ZUh{HzRb7G&{cf~3qIegQLAVh5-1>7QvBthF{1?9k`^ zW%;0NkKa>D3-t36X&rv`XYIB9ASJ;p+a;zzH2DLQ3p)Y~Ih z&Yh_GT;7q){BaVy6)^8VgFtolN9RmBhGo`QRl_9toQw0u-mDjG%uFqNqumanjH+fN zk-#3_!Rz&9V2PD-WqhucGpW;@N zX`aLWDP8%@cQK7j*iClPJ#=xw%-e=+UQIe45g?3}Gx=jEY4YOV{}+I|T%{>7C~TqT zK6hI{;Ma=WIGROmBq&)n7;Ga3hIkA{?14DZ?#%0`TX(SF4JIA$8fCBuJum1JgvxF= zF*lp!#w$@~pATs9`L>hut0~*vIl!#)I%& z!`j4-RsX@4>b@V)9eS07+S#RR*>aQ&u1QEN@Hj_3apGFoo zM~Jw4Ij^CeXfX$q)5>Y!mxbpaFsQVNr<}HWrC+Hn@8S`Uw(s7vgzxnhf?(L(` zStO>jnJe43+pqT1gh@W0tp(HtdG>kIy5Xjw52BwEs;8vl6CRm*wx#Q8Jsre z7-dw!YF4e6TPn4CEBBcY(fI+K^#90R0=Ih#>vfIFxk;an5p3LaMoNSszn0up zkJXclufoI1GrHiefut|@JR|nMk0+Ya5-;a@vHL;DnT5FFzo5!Tq&virtzB6T@T>G? zY3AE@KiT+L*Zb3nRRM~r`Xi__=8MlKCrAa(D*-;tWW|i3wJk4BXi-OLF1~+L7 zaee@k?IG87RXocZ-K%}LwYqfzRz+bsys@Byn$gtb1MiwWyOc2fPB^!x!~yA|-FTZ7 z?vw*_vlNaOV-;7S$0(W~H9&Fp%6>RDs}&h-w(i)sZB7MC&~%8I^W?clr~G4=s6Liu z_ME;=p;n99%AkwouL=-cw%O*A$L10$&6_9c0n*N6lPR2_`YP!iKL}U##A0Bc(lZX1_z0!j;h@9S?Yoy1(~1rdi(=%|$gF73XKhI9 z&0Au$?c3mTrAyMIM&D^ET=Iq~N%)KziMLHmy5YtMq6On=cS}$CMTfTq3w=};-UHs< zRScfHQ!CmMQH?&-WwjXlbGwA$`~$;^c-sgr3CGbGE4ikymwdcSD&`>H@5+>-x4C4~ zZS^F1a|8evkwmXzr_%At3!?o~Wx>x2O&lz9#!c-PO zYvmW=>#)j5HlMmy%{*Nw6QnJ->&tPQxGp`U=``%5w<#mNvcmJ*J{QBeXMEbs97WA; zRxF}aB~OdE?%00m`c3SO!S~{sq~T%4=yy~!I%{`YYrf_C{{-ssx;)1T)Bps2E7AHH zfA4?avWJ39TZvgaR=?iku;v_3&4@0#aPG#f{X|oHRUrJQ1zH$4b87fzoFbICJZj9* zwZ@hJFJH;EW!6>orx9CRuVutuab;Rx7>jwioM+7q_X7YCp5F4-&q>h;gLbv)U3kt8 zQ;2%AwijDuq!(R0f1r#NU9ymG9VQv0*M>S7sI~F&QL_6_nK9I$mfFEk;`9lpB}KP| zUzl|abB7rzWv+FNK3|auwNau(iteuK*(RrE8}75bZ_N60ijP>;Jt?~5*)<)1jaO49 zrUL3uJkBK&?6ju9aFHWz6_Bmwg{UKFjZ3D!423p8ATHo*{ABn|>HCoP27_}rI3DN6 zN6XbNY8so3k&aSrKlJh6j`b3!y@^E9IC^qhJvc2 z2ihOOJ^7e&Q7zOI_lc4uOvlD?h8@>-9huI?tL==}s<%Q{PA3*oZ7fFx`L&o-nqAM| z**m$bWSF~_i$)2Rg7?s5HrWIZ#qjTCn4x(#MQZl!m3QGA*Q*l`gaCljbby-;!&)-VP!aZfg0E(a6 zov+Ty5I~m_G!nYM!Z&^1!ax5%(g1*nCUw(nd!C>{sJWDg#E;oV6au@i zF{r|IAX##_+%p>?G5b55Cfat1KGlNM5UZ~bL}Zl(L&su;%@AIEtG-YGVODK?P0-t% zB9(~dyaG*tt$bPMuAS_|<*yIm+>1vg6$lVg>xX=ONaR<$Bvw;s-h&b_@<|1?ST(OQ>tdf2N5o=HrL_SpgM>kk+vIvK3zm$9Mhc{6ph zU})@7m}7YDnqM0!>r)-L%V^xvs-A%Pr=E zSpkR5yzOF?&fA>*DClqb*x&B5t_L8ESglqSIusvGC)&z#N+<)XGS_K7?CV88DsDCOyL3MWyR??h3lb#rje-Ki$}k_JBM zz71`8^6OXu%RKm(Ptuny8sVWy6SYc(8{dK$d3PO`Pd$^D;<$RQcI#lt=BU+XC%0eVYi z)XjFQh7g?Et zPY*f$>tLC=yvGDNREnv)k}3^9eZ_q6v5*vSo2F_qg|SB6y78i;S-*y2A(eboj^ry8 zS-<<*5))DMb}e_qZw=L7Z#2*YP#^f5W#o;HImZ#p?Njn1mc8TY?kv{BV&-B!IYsg$-BYZU*jhJIio{CaQ)kOON(}SH>0U z{=NRj@(Rzdnj=`~P(7$WZpYTBuD|I{pXe&Qyy~XoW=sw8>q|uopA<~ea~%Oev$N}C zS+du=)R{;PbsiqnDW`l-p-<7b5v{14C%_~@S4$gWd)A8Q`?JG^0F1ma>BMEBKLXxfZDRR^mLP{#^P!=k{4q9Ty1L7 z4gclu>tVc@rq&ttCc9ZCO0oQX6h1d0tD(ez->`8;`YN6=3}Uvhxd?xBDSeL@sI~AH=+su^fMn2WAYPI$sy0rmMoZ3Ev6); zqg8su{_$yvj%5cLo2-xHRI|*`osPt@0NDiKD-XNAlKV=x3f+%Up?-HQju8{Lr1Esx z8hSfcfdLT!mA!XlXW7+tK7*+R%1lqySF}Bl`&+RE5RXw4k~k(RVtGD3T7keZfP`&v zJYDcvt1`@RN)0c-WTjwsx8ZprJSYJtIV-i!Ifka zV4-K>`+C|pDo!^Wqf*pD?b!{_)$k-s79y#PxF)M<@T6uB14 z;8QzetM=-^U@5TpRoBPfZPx7chYp)bhjHsu_aZ0yoy`Z^dfMOap5_Cu;_o_0+rKnm za2btD0un1HQrSvJhY=<}g?AEJgi#mhEHdN!FUGXAhn~ci(fFp0j%Xg_;b?2?IGt|x zwLDCf%))5W15Q4;YjXRWP(w78PyybJ{RmS540AuG{3IefFL*LxS>o~Op#+Yf>=0SKGu zOQKX+1_x~;AYaP2)fha457-Uk)IC(4Ps_$JV896>v%x=+1N_bMzeZzsw%6<=QD=vbndQoAV$Ip=Bv5a*Hwp^x18KPv}5}6bg$(CFZ*|G;0XMGMS z6pgFcM#FlvegWxv=WX5%TCeXFF3OgmCnQ|F+`T!C-*e03Y0rxRV&{;swm5?e^0)2U zPywpJMnHhL9ySE&a1y1c$&R!q%$>3tS1Myj4p0sktb1L-?`zLP`I^W3L0d=pokjGvHOi@pC-*P9# zdu4b46xlG=Oeu|~%ieO-?@>K|6xX+#=>l;dYr#&Y=kWpgV(|qEItyBgV&+|Ql;o}! zRNvx9O-@o{{R3`6Ke7@V&jUfck*F#r%&X~<1<-v`T@U?UfZiJ4%)H&79R zZ@)iYto`jEa&wy9D7-GLUS0VMVXeKPt#=;}Uu>>CCg=ot%L{2_3*}T65)DV9izPpq z4^NSfqb)MxBrvMqzXu7yHzWaHeQt{ty{NrhS<=9)SgKecLFt2^F6SP9Qxn=ZZs(;v zzc1{~^82bHgm83XnwC!wJGlxPSur;@?YbBM5F$o~`KfZNERP18=|wZnM$j|kqpiV? z0Sh{;QS6jX`=QPs@*^nml})D8SK1g@Xo|$2Kjw^$aA(7sWDQ^w@8FFE zh^2^(NsE71FvI{zwoob=!{jx6PIuMpNiQi575-c9ngC*k0^sm4w zOLD=UNyeK|s5HI#Ipdq;-qPN7Wn!R;o_b5>Hk;@U1Dv)slwt;9Ji_-nxuXglSU;1Q zc|7?J`SMiTas+_?Yse5=SWAE)Q0ors+k6cP|GM|p?S9#y0vplujSTReNdP)t6!lJ0b*1NixI18 zucz}Wv+l3%3zqJ~_jZ<QX`U3D1kY=Yk~4n7V{It*o+!uVGnW zs_M?1WscQhn`FyO64=l3VC^<9+*y1!K7B{9K;ogU^x>b)FCfVKI^VI&5QHieds%OH zne0pnD9!9>5iR_v!{UO-4LhnE6`(k!k>EVD5*Bk42w|u=_|?2aPu4HSiTc@*D=^CC zLL5h}mWNPxbQE{_SMPnx`6t{cE$4UfA6gc&SABJ9ChyMtez1Vtu({%ptB@l{8~_6o zFF!vyZ$-=gPHct5`B&Rhjhpr3-Nj8fDM^_~&%4VSPL*}reRznF5tr4$iU`7AHzzx~ z6sQ|od9Lm^YiT@A3kUQo5d`@O`y|PFE*;F=?|Y>}kwS2xa-+i$_Z?RX7%krv;wBBh z$fbmsZ0UcJ5@e}!1Axn7LCQqsk6O|GxmPR2_Tz?}Nwz`>W*`Ni{hqEY$z6;TiCK7r z3lA5dY44I|yZwEJv^Tt1 zZ~Nb{WIQ$eH~<$7Ss^)fWhxbvq9XofrBu7@Rl8bZysGm$`!@EBBS{hEMJxIbU3)8W zSx+E5YO^@djuqRk#8VKUscyRc>vyu(wW*$Zq1gWUMzpq#AzPpP*|(4pSv6UtQ$w)Q zh{pcV1euhe=$PC7{Mp5J%McJksg4{rePj$N6{TJ!ND>==^L}4gkc8MD|Lgoi7YSe=HoFND?^pB?o(GF zQxU-AXT$Cwl4M^6ZOI%=UC{q{;k=1e!D_G%U;?YZPf9nAvo{&KljIn6wQ>I7!Zod; zb>SUc1)opRU&GqVBO=Z`69JE7?PC!U<^IwT0L5E(^V9!M>@+LNNvz{(yj~LN?F|}_F zy^$Yr?Yau4D;XoIz}I2y1q>a|Mg_eyx~8oVG@FNC0UG9&h9iJDyOPtGf1q zVv5m(D$519kYvBIis*qQ%VoBTw@30op2JqM0>9M-RPG{3-_-T|fSiU?3NfJF;lbC_ znPVimyo=c+l-W`eHEKrh&hhniuffw=g>AI6tNDS3L?teJ{I{0`+JmC-^f>~VXFS>& zpwmhU??)|Y>nrlyQ`OvL)!)gMuG*>`SLM=+pB06&x0<1`N!mdgb7SA<|9Y~LAX{<} zm9Xv_7kqi}bV=PWL?z5!TTR%o5s}1Jda@5n9HAAeVYErSYz)=8GY0@TDp1}`&b%}! zN#SzW1}j9`#H!r#dwtq(4%hYuY@5Er$ZV={n+hm7?SQM`Et$tb0OD`YU?zp$J4GoO z@6$}7T4yhD=dkqdgV)!K!@NG8HE3c>p4X)@p$EHSPvfC!KfkBU~adhKQh zi#5yluUzoE{6Z>-@LNa+cXyISbSLtJBpg*C0onMs>0T#Z;u-x~a_S1`>_(@_m4zn= z;8PWF!TyfIWGT22p0(RC-65!*wNSo@U5i1SvDCh{4xUQC52d=q)H?Pa>4J$$sLSxh zki7}ApR~0k?g(m=_zB^3Zu@^2+_Vf zPj^x^J?B>an%4{Xa_GqdbISEx#5{pbfYkNU4r)5mTGp3E>pr-rUXFHPHuCZ~?T)rIcl;dpw>ge(svcm} z4dmQ(U~=sP7jje0$J*LO^keBd-2dVuid18|GsWwHFl~W84y1=fP6@MNu!5hv1`_y_ znD%t|zB^*}Zi55^%20BDp*b&+Mh$xl;%;-lOH(8|vpK-niNUHFxhZQp9Q4Fq0vdk7 zLxPzteDc9t>Cp#2Kj4FDvp|9g=+R%{`0sanD)GTDd`ad)cAf_x9?(;7UR^fu&*6n~ z0Xm%j9R0-_|E!etpEu2C_0|9U^I5WK|Ia^a@32&y4pxD*9y*7~B5qcLO@KT^!$adW zWH^;Sa*d-5_4F7DTR95RE!URvlJA@9o&a#u=pB*zTF>%^gktC`x8msiWU(!f**FOE zd|Ar$>bGo<4`minV{eF22UF$)vN1;a7ItE*p$#raWy9K1hP;+nWWh~W4sWfnYP*)1 z#Z|b1oneA@dO&#*rmAE|2ZFXDcb9nO+u{lQjQs8VMJr$A3ukEkXIcQD8#VPMX|&~} z71v!?7xT94M%~>$e-|rRu(nCLg9Q4?+^53B`)sq^5r@{yysaX`)v|(_2;jR@@4{2l z;VN-xSkx(RbE*P9#}%Pf5CGrO^_t1z7J^152TsN4eI@z5)=2rLB8h(MrlMi{t}h@l z?pg86)3?G?;&kWIq}V{3J-C;*&V-GFQ$wA(sV3ZwAkvYnj~24t{YNK{=c25o5(OLA zV5$P+#9!U|uZ@2KoQAV5a1$kjnmrjv8>3~EIZt`)pZ7WkZh3LA657B1f*1#{$$}h^ zF%#R{JWj=}?6rtxBptMBTsp^Tl)71vI=r4XyYt!n{7pQP?AQlfX5Uork1}v<9iBX< zvsean@rvI+Vq{+T*heR584-v|7OVX*4mLD9n)G`EKI3GaQIv*wJx=Zp<K|2+OlZI@yx4q zwW*^mY@_ZDGSn`j9M{kDHCXy_4zvB=C%eWN>$16h4t;ukDv0|%pC{n?zE`yHfqGQE z>_HDr)_=t_W{g2&S6?1*gOMQcE4hNo^KNGLi#_;#n$Zc+bz8L-`6n(OTCNY-vTyPH%ig8c zAF1$#@gKtO1ft1KqabQln>+ms-UQL1ecB^Fq|7g4&D-bxYl9MzFF2YCpsTfdBj2Ba zi#cT^^zZ;ujhc-ny~66h(7|Z6%DQVek;do%08|u{YQu?uMR}dY+Z>$yNXEBANVoOF z%7-69MFf|3%U$nsC9^7|$4FY7nawTx@{NR}hT6F&=YC$(&ZDMIR=3BaFRsp~D(5^h zt?Ysy-{F?{#W4>JFrQC3n%j8#xR86wV64;L4JDR2&=gd?o9(Z(@?%e9ok8klPM1Y{f+OfQfIYckrV<@?VyflUGoF0-C}AJe~u}{?sXAFrN;_I zE8Vu^!%tLSBsU`Kfa=Om@zn%)ASd%3g!&1f1k{L0&;c@On;-OBpZhvWRAtVuYUqSY z+b;IK*?($NLp)gRqFybd2#jBiS|-tAK3!_c^$wPg>&9JSTaSlCpBClfR0zM7IC4w2S9Pd}K^ zO$$O3MHtWN^@{M#8Y9*)tUq`qiM$^?Y0@G2cSTciOpZW zuhx7q(a}chpNx93cYj|{N1T5;4?+xGu?6qOZo{Xg5M#xwX3aK{t=gIXLSL)h=pO$E zB|si{{>6S3v~$2NlY0jMlSmj`yN-3fGX1K1)aZUfn z3t%jwAb=pMO0hWK<}+Qz-b-B?!rzvi)q)@jSK^HK9PDDUKEgC~Blz^)e2PV$-pN~x zo-40Nfp~gzS+t=?f{BHKlvKOq@D^TR{O-@$T45AxB?2h?XBZ$l3>bA^?u!{**~!ER z*WrF7CJYSDRNK)d7XK)sn;k&8uwJb6<_;{D_asy;75FtEDWT$}G8of^GDi>Z8LOdF z)S+(oYFw>66bidsi;^QvD-IxKL;>$=ME%Aq8j?Tn)?0Zf-k?c4uah1cx^>z^l1`rRB~#ic0F^-=@&Kkg+s=LifE)!su-9uYyUr?Kd{_^4}- z`PG$5RPjE#H}L8~_FWXAJE(Z_uRR-KBX3dEoQF><5$^T(yEJ%IKbr5aUr3;mN)FTA zc|=TC+sPvbR~z;$5lJ?NZI9cjdf>Z~P=#xJX}Qmt;H>BEo*%@p6bkpCuyNN>Qtt&+Q+u7$L2rmhalQ$_%ds{ZoP(J0(|Uz z$0QW$9sy8QwBd6e^E@BL`E#xx70x0IF!F7g3{ZfEZEsJ1{VqMrFh(vh2RMJykiSCK zB>;ebt*lLvn}k;{#%!#AWOJc45!cSI+=xCzv4f}m!vwWQlNui5Yu$@`dN?s)3$(?z z_gi=Dv{-xx+m?fjWg(VZ7Tm6+XgYisfg_`?TM)zLT_?JmrSwyQP85Qea6EMRk-!a< z?auL5R0%xG18TRzYc<7@C>tK0%#`1LVoJuKH(K`mL zM#z|LtPkxSl~Kf@&eKW0#$)C^GrAeJy^A>7+s}>j9NjjPtqM&_W9}O~6A*Abe>-+FuPUWjFI|RchIHGBZ*)#g$Zx;N=B7xDB>y7Fqoc2NLyks;}fb;j)*< ztZUt+r$vxxmLH7G`I3mw@!c#$Hp(Q-rR5=w2<;R4tMvd~q|8ZEXJ!S|HY(=d?}LiT zIV*A}23eF(`G>+V-B}u2r)Tw&15OO}(4dsIg>+l&wqq3E83GnjGQEHX?{7zYLx?ul zFIA(8?MRivdhF(tFBVGsZReN^RFTcUtPMQqS+q(4 zba&gQua`c5LyNz)OkyLvc2KEZJr$j#?+0b#cNNxvXBeEfMKR&2! zUvcS#ZTrr8VhGsOS~elDmV3UCIZ)=Xj6AcOL~_0Z|La+{Q4;1D zirZPngY=f(V9m_UCEr zB*;a66YcJ{65u2}N6L(|=g;bjNY1REksyn?4^3@~1f{krnBk2(o_jQsf@E33 zP68zg8t#pr6_kOG!)4j`AO2%1I(}#FRplEm*gBVxQ~Q#s8f5O`XLdOv6PEqrAbK?90h zwCcC<l--?fx1ldGqYwq>@pYfaoi0Wvy^RXr z>&&N1^K8RzvTU4OHgVOY(8$%!gkBnnJKqihE;9wZNjBD=El)dF?aEuJQvXTTypD&P z751};Ty{N^ok<1{znnG`C&4y(}-WeO4n+-;*g@TilSX znwRrT!5Z$9^D6oh*~DuTw@S-m*#Ke;bq z|70qdG6Wcv;~Ym5IuFT<*w@p*um1a^$MYajFM_TxJTnmi-k(%#Z6*jtg2hO!C&oUA>86Y^*kIg}I6 zRE~TL$u~KOD=~+}9|x3;I7unWkjV-nv-ZzbALKr7zk2l2Qle6zG^&H=987Kbgdz7k zOfnE7K~^OvHrk&Y+ZFi3KjVg6R}q~}wr*ocdhk|R@JJZ~Ahw2pqKIYwM?x3tmWiVu zAqES0+-*`T4*2PT&$Hl2?PqhQFZ>v6YS$yHwZUa8Afs+N4#O*1Ppld1?!S}VjL<5_ zf?4BibUqQ;dZldD+8fgJ8|~0Q?yi`^-$r)~R`Jyct`y@A@o-I2HK;H#v6UPexbPkr z&j{1%QCt=)qt{MO7GPsDO2{xGE?r9*A;|%hlS2l&$6d2hrm|r-GfpIx8=9$xHGdW% zDSbRNB-AL3LgJq!VLm&Nan;|egb5PVHba%Aivd0pq6CoG{d*s4}>5@H!q=QY# zj5hV-fJ;+AhVnEU01$>NErpUh{62ak)oRd1SR0#Km>42rZQmXHZ&EjBCR|6wL+e*K z>ho`y+We~GNR&}8LI|#T_(Ws=+I|>0$X+zAyFk!;kAkKUL0;<4_pO22DDPm$)H+b}^Pi?yN5eLWUU z8jfnc{OWS)p9k^HkEs2xxv2em@V~?^0d(_1v8;;ume5mZa4^c%R%=#k^@k~g9B+)5 zw`5d@Q|vc@FJS)F&zID~e%fSlkQVi{FhF-0NdUOTmcpNV5eyH=hJntt2}MgIgaE)j zapENTBQ03IHjZm3_`A;#{OX_fetK9_O>6&daKxHjn!h|*g+C;nD&0+2B?|LR&5Gz# z_hh>c^sVy*{SIBH`C)>B;`6-Me|m@-IkwicKa%owvxQU7AVjP( zNp*ZohCOA~=q|o6K*Ji@T3nK@1cU3w87qhwQnZUPE0w)`j1(H%i8nvd9_an$@0EsA*>NXEJzxr?kjaqtcu>=mJ{i4&^n*D@4N?{2)TK z*O8f>omzT7N^XiVuQP0D>Tj^Aw8BoWZ=}vHlW#IXU$s6H99jOt0G(_`NMf;(Z7s+| z{4Y%Ba;pEcxa!-4T!Qtn+br7HkOw-#xBIiy^{%*<7{5+;ps05pDxKDS9W|9yNH4ayQsD)O`07gXh*WqL-LQXd)G=YH9qf6l-g0m3V-z-0TjevdP%E6S zxRjk#i&afBN8HAJm>Yz`ER`5CS!)g`MAP#Ahc~fz1IyuZf{X0&F=tYX9B}f$5sA)=_(4pCdrHGRwly0CvxFE>%|Ogm z*9b!b@S48Ves%Sl&*paPNSATo>>}FT>e1J^+Mju^&Wr>3)hS~^1V)`nUx(OxY3B9$ z4-32n;)lcaw(sLyvP2s$i6m=g&5|ax5Y1ouYmTn(MOQD(YV@&#0G7c?hh^}t)7#-7%*IJ;+-BFg zS`f?5jIyCWN#;5SsA<<}fs^(>tm^~XkBdq@sk{q4GZxF;Zd_Uqui7!<`Y=%;`oncI z2$_Jz`<3wbLit^4uK0rCtp@8VqjA>DeQ-?*od}J(4f2cQ|CTlOn?6kbO5e#%zS`2! z?8ue)HEPe!&Q^<1CZ_m8qYVX6orQpmC=ICEfR~$`36hWcBx!`h#)L z*qXiWc$TJ{tRRGBNnf;JYX7@Bv6#ueskmVCj$Zlxj){)TTskeHM|AccTS2&V9|zC1 z>4L3>&z7biqh<&Ptb1kSOSAdjSr>^!L4JRim%YDTHs9o5Ol)K$D($fstuH z?2k?i@#Onz<-|GbMRP-YNG9RMA&Hb_({k zKuFSe_M$);U!m@IJeShyWQ+a-xUY$qXVQbJP@H{Bm_aZ4}nvdelo> z*UMt7Al^r5c$Raw5&OgaZmyoVEAuW@x7KSdwTOybSQQ!4aP~XiY2z>1+=TfTS>RZ} zL*VY-Myj==NZMHlGDFCkeMuSU_?^K4pixAV;ovOD*%!=&swdZQ@pN@Pxy#FdEpF64 zp1U0JdORJVOH?v0w1g4;K8Kx%TDYpc67_W;)A{|nGLs%4JoDLZ1>Cvt!3;7ebz8w_ zmij*4$)3UZd|trDvLnu zMR6C3ENSGsReouH@sha&KdP@4$N9{DORtU&=e!d#-z5B5gI)T-Vmwv9&FTfXM673Q zOle_e%c;oE%wR7NY77SqG5q!VoS@?_8Xuy}3k3T^2TYs1mXlzbDtPV80L$UsJLs%) z#r$n=$683+>8R!Flnp50lwB@gp@ze0qt{@AahTs~toXP3opxvI4fnrYwWIaG*>hm(tm0pHADfqn437z@!~oLm z=l2Z0rjIucJ%H~~;;#Cbb51)?ofqhyJqN+-upG}+(;efUY)!`} zbmGDIW?byJD~D^%IGaQ2DiMRsmy>rLSB)D(j5Nn9kwknhL~g2pXot(Gi44B`_p}Mf z&q$+Hw8oG}E>k6|^Vm2Y{GDEh6CFu;Jc`)P2A2~Vs|vx1;Q+KeP!&Uwv%%9xhZBgo z%Jn=aJP~D~O8XT$XDE;(P*b8{>o} z$f5+wRXgWT66x=wgZLi+f^S1i`V}Q`zciz?(DI!@`@eoZB%lpI|yVgYjD!> zERFEK`j8EE@+*#8iA&DPaCfrr@u@#Vp%uKd8)P&8$&D8l|xk<--)tUKhG_T&S+bgB_D|6E?(*TI|8&H*QmdN)bE71{Lwy zv!2!P(}!e#p1}%*hI+WJ-1CW_MoYI0zo2z`k%ARE?htQe9DgLS&UD*0{g<&O2z3Rd zgYZX#f0HmFm}V8f|1AKE$tpWYBB|eun(_{O&wQ0_+!f`4wjA$|O4q`i?gd%@!{Y*; zYKon}hR`waE)0v9dy3nnV+944L6C767v`5J$K|R!ZRdo*Q(jOZ5Pj69bwTGh=?JPT z_w0TI{Yuo7h;^FJgLm=Wh+{{Gxn=V;^MW;-FW$cZ-p8kFYUbA)!#rE-5wnk^%PH@X z0(G^=@mEbVv`en)mN+OaVNGFSmc=1Q($7wu#|6|wPa2QR3Q%lk6~F)aNn;;WSO9>$ z)oqUA%@kiiVhbJlR2`4rYgJ#{dejR_a_(PbM6*o+Ywt7LG^-(i*n5E`Ot>oXVHC00 zNQ$NN+YeR%Kx|#H1qzVOZmacd4oO9~12x9bB~ci#Q_t6>gI~n>E^l^#RtheW_S=n+ zIH9tt>}owL*8iU`k0_4@R}K#!8v)#jpl}XO&Wy>mmWJd)jucprk|lnA8}G5BZ9 zL6D+gUf{-s>s$D7d_;mEr-waK_qBp}nTpmJ2M%J%ZSkUx>7|jKa_O>4kf?@Pg3I(@ zlymHZY}2&{M(vU!NV26eRO@(k!4|o*`dfasLBK-=n}R zLux$)rhD9Q;lhh$>j3E6%Gz<=V-N_}X+ol0n;OOo0OSQD|AFeRX61kcsBt1jng182 z<)8!z&S==z8eb1#?1Mgk%Vxp?+vm^hutq zFjE7%-3cbKAyldNz}DHwph{5(g-^b|NofmZk>HU{{-BWr0B+?ol3Z|~ncdv? zq(oG&29X_lXKt+)Tj*i{W&=SwK!yw5Q5IYX8Y;WP<>$kIxa0GRVr)Nn2=gCY65ZC$ z_Gy&@E}_JqHr82j+wMTMB!ntRTYEOMehX^>0RQx*?E}*>iS25(fB5mM9$99Y+LVU7 zPj8}4f7*Jb55C<_3g{kK5WLs z#l`%Z^y&92!OmkV0#0(P3Gbo-hy8!i?AI=`F1TM%lx$cTErk)0^2l4Gw{S&Md2imS za{8Y{dIv-PLESKrfSa@b;^hJ6(7!IR|Id&(lj)L_u8cQZ4n-iA+zV4%CgZvObd_J0 zx8YrLXQR7nVDv~}5hG71gh)>vwEfCzkZMb=H?3-Wz&9Y!*mbdjbfDh6b+>V02 z(947fo(`MyV@aKCsFh6`n`~~iOi>V#f*!K4IJZ^9=CwN^obsPJ?dS42h6akkY`y!~ z)NxT4^<|u6cgG*>5RR2Kd_Sp}dCnC(v-jpp($pRZ^r*ZBYY3qpx%c@-0y=QG{La-UY^*bhR?mIO_ko$Jp8 zZ0Am78oC&Nmu&9aIFt#%n^5ABR(Cs|!Hxh2G`tov`fK6>r~~2{EAV=)|Jz0%QvBd! zeApgMEN0y@Lj2&1`jCQ{jw^hdH6VS`2yu@o6tiLD(^1}QvVEK)_V)6;>uo`_p$>$* z4pX!Obxo$6Nj5PqD9~Pt2t2Q{w;+D;^vZc$cWN~p>dS!tNmCUtclxU0?UFBc;9}MD z##V}4tg4BppLd|TC=WJwxmC;sg0+37sT=f2{hut};@NRv735iCUxYEyTZdx((KI_c zPG`bNG+YtM!A1$fCEzH!x2DNwGrTf3FH`(kS<@F*;2+Lx#=7!l$e~V5SGpu`q15@O z2ceg;Q5mr(0d)j&rH1pOjoPJ`=l37%Enk(Dru&R%ZTU!qcq=o0bnyUapKVT2+}H1s z-7tUkeEw@-8m5L4%L|2e!>vKz4qKLMd6klYhfa7)w}DV;>dUlmEn}hmW`{_qU)wV+Yu$)QpT_RX zhv1Isrp^}Ibq;8`J6z?@{;{zn5=b5;QUKg<*X?vCWTxzNlJZJR2ZpBS#m#P_-+XJfqd-{ShS;)via9I7c{));Hf@RIggy9T@_^zzZy#u~(G{-TpH@_|Gi+{i zQrCnn9r-qH{!yc=2hZ}hQof<`_VNRvldW$jtpkw^Bh*)C!sgAU%6>9n+pdOFOlZ{# za?0(Y4E5n4dAOyFf!31*_^EY)w^;@pp7U?sT&`BWNalUC@=!Cw9=JL4@yv4U&iF%` z7lNwp7o$hNe(mC5<@(bvHCzJYZT{1!IVdbwM1sJpUkztqviU|P>T}eJZEH=l*LCpK zPXOt#INHPRwRl@ydQkIHs+rQrPcM`TuWl|q&i=wX*wvU=5f6fZ)DJiyay+2;`vTX2 zRz;>l?$6EnvaA%;r{xV*O#1+q_ZBXH)}0e0hkqP15svStXtyL8CxHy%KaffQ!t5t) ze;I1D2xJotS(&ig;p$4CT*au1YfpSN5|x0XOTE{27DxddBM1$ezD3_71xAauUDU~o zd-G04Vfy@hzsfvskc1=4!#1#5b|j;V{Si-EF>tE!sTq}{=QkZ$!60Jqc08LA3znF zNGZlc`YsP%d3fYexD+cx)pNOd2X&jO|bNGG9J0*YM+{dGuwrclLk1t1xxGyL}zEYg*ch# zhF@_Vmqlsv_xy)1Hw~NDbbG61&wswgQbvKO;n1ZfjW>v^23Z0i-y-GmHgpzzUVqEF z5#tTZ@e3M$E9|Hw6bAr&&rP~q41SV(Ce{~?XcL2_=7o8?U&@zDds{%+I4hw)T7qko z@+XHV$?Jibvj<|6Z2o~FRk43@plnVGU#Wi_ru;fA`~UF*sC+m>&At3+%$Ao<@N=l| z`Srx7$u)~*lyUyM`&g1b3moJ11=Dii-4R7gPc~w`20mv`-jQoI{7MtHENjC(#iAUW!9L=_w9#jTwLz(KVAK8}>Fbg#q*Y+ZUuS)IVP_ zwRls<2B zy1@OuX53Ml)MDe{4$AISCNv27dHN+UXqM1&!Wrq{V&+%dm^&W*eskaV>wg)a9s(kQ zIu>pJaA-DL*9*e?>LqHz;186_Zl(-y%r1M&0u7OAM;S3gs=grAL)ogk#5`ztKr`)gr@lES>k*d{BGTTo! zu8!khdCMMDtIkLY_`yQQO=%MtF0E&c{~TU5tJLN7Hh)S85kgDvPF*Qfa%ILNGysW3e~B^v-#zFD}UiLE8ZBYScZI9G38d2U!TJ1 znY){d(s@ZY0zNhV6=itInv1O>IY1SLkPa5v5IO7-fuRY?>BjYD8qycDrVvThy54L& zEZ;@HA+JPgq2wr z#Imq$W85o3j845Suj($D&$#CopRBg}O9pQEtyBJfx!eP6Z7t=sIk#UA-I?-#w{EoD z|H>%u{{Y${T=EPDY4u}BWPs}>(-er%4aIU&EDG%vp<70lS;5Qer@~rLR9T^u>D+BSnQ*kUcM( z5cp2C>3KDC56Vuwt0+DSmO?pJI2pxK=QELvxA$0M;eBol#jx1}-i-4vwocC1gFd9tr}Y$45;FgFv^OhO194OmXYE z8zB*iyORj4Fbba`59eYsUCg$DEs1qCO!emV?K;g}{?xxit&=Mi{AFLSsLcc?;AC=8 z_xEq{zu@9--gxXRZ#PgniYIl$FLKtJF;S%QuQki;0+G58@Nfuj`wq?(Wz@b;rQ8V> z%*BHKF#HQ#?mc`umS_{6oxWFQPS`v}Z5ZVN zs~{Id@OY?#H;yh({VtP-FTSRmhgZjG!Bd9tHJNh>Ro^*mkZWhOjq1S}0SGUg^X|Gr z)it{9{m%rP+~4w(MRJ z?nH^f`YX(?sK5T}+5fdy7<{!AGqzP=A~`lI zmj?UgR(a&t4w67_G@{P2i?LaCG@)S+G$zioD#HI*(OS2+X@~-$qqr4_w)FGCsS`7InDGyI zW)Xq^Ar>JGp_w3B+F zCl$L#=Y`$>L1XAqNWoyj*H%U-i^JXB)4!&k!%%9W{&#A2wVNIq)}y}%1rPAe0^Q2j zKVLb~TneFjIveM_hYJPYZ({YpfE}G8r!ewDeVOh|jLc(}(sw21kOKbwFBT4l4=4l; z494Bp+;&QWqw|-HzcN0mHP#ZtN=M=AM8aLHiHaHPsB?dkxGai)lvZ7Zh9UAS2)P{& zp}*|@tw;_TEmw0$tu^@gDYLvOo9n)2bop{z7$-3K)aiNd;)(EdU+PT}N$J&3Jsdk9 z_p<{hEY?f|y)Aun!gwK3C+@D3{7nJvRT^j9bzXyzj8uX0q@U*M=nTPsold<;|p3iV#1p@JJ1#-F4 z0Vhtikr&^+_W#xPmQithTl#+!0)*fW!9oOrCj^Ja3BlbhxVzI>f;&NiyL)hVcXxMd z92$pzc|X}g3U|q)Y-QHb-^wxnt@b{dF45#=`7lgi)jrG6Q+m6CC?T!pbymR< zJ%G?~w;GI*2DD#@CV_$mum8lA3CmHZ;wD8YxCic}k!S;fbLI&B8xOS2N zf!YUVx8>Z$%H&d$t|)2YXL=sRuLCIyf=-~O+_!SQWz!2Pm1UQI$B-fOZ{dL0;9#O# zqSi@WiYUirhlT*MoDaU5DgYy?2Za}8{Wln*PXe`k&b20rs>75P-t1gq^<`%bmiL;S zxy85XL;MtuQfwLjT&IlcHhj5(Ncp7)?xso82anf`>g-5Z10(wY7=FdX>ZAj_LE0c2T1!H$?PzzlJDszZp zeDmi}lbni&xPtCiGQU(QF|~CFjggm2j=s(SDxh?i3OtRinA)!23n?206lMtpTgK4Q zoP`oJ9Mgl(osnujA^WFt3=ba6h3ChfFlDRwr0p2vaaUf%vnnzWD0;;%`!%-Wg2z66 za8v2=2uoD(TRc3l2J1uh`Dy0HeBWgvyjR9?^-kJh(j3T!uX?_=rv=~*wU&=U1V68!IB^r`Cc>k+%6Y?WiK z?FAL-sY`QxeW8FA1dN^l-fqU`L|UX8%D;uupZ!I4``!7UFBwkz#Z34;PGZ){cnxu9;0x5#PpeCgaMAwvxm9r%umZo*lhOOz2kT?+}7d(f<^4cZ4SP+^gT~ z?N*vj5kHo;&epFqDKcpuSCI9a>FDnTq4>qBKQD$4QhSDWus~kM^V{ofIzlt7-&zyt ze!<2+V`ahxtgE}wm1P<7sl6@f?p6u}fxgmoC+t?Nk56s@QTnV^LFvCm>3I6pGX%iy zzrwZB?h4pWbmN?CLDtsLF-@*z=gr&CWSOiBRkkGff5q(RW^uz^BQ26Mnb0L1^6F}D zZf&XbAV9R436{CBp86FV@zOZ1^f(^KQD}?=#QV-=O1X9H`FC&i^%3_Q%cisnsyb1>@n`SfVCkK4Iv+ zXj%p@upCeK7UYn2_02gl>r7sBXBYN}x3B`7`ot6aqjd1M@#98D_SfoVry;g-E%+Bd zzdncB!jy|q&9JvpkUBPW%>q2C=MdA*kCe%ehCW4AJmKOthtt4mnvuqDzEG(HXv8H( zl92E0y{vtp`CqCI&Xt*`JDV3ycT_dM9j5KbtdqPAG*_Komo390(KY=1}(KzRKX{+ts z)G}Nni)RF~2!ClO5GHo7XKgCL1NsJ&hTvAAmY6~eUaU;7!TT8GG#XmUH%X%M6~I{& z9Cz9S>UU(?m0SZR0l~$A_}O&RJEQYYRZc5#lA0^e;s{&bo! ztHK}Cy0T_A>cv572c`!_)k+q{^Yzy12(q$pcyoRfOhQ;P`gyN)5PO|lc(Eg5FrzMJ zDpN+Ze{NX9R@ja}v}iim6u0 z79!&7%0^o53El`Cvjk=2&Bv)OEj%S3k!!wf6m8_W>U_qA;s6@m-E01^dAdO5lMvMY zqshV^#MkBgt@Zn4!8Ve{D<-zts)dvR>5QJ=og2#8(5Jzv07MXHHIG?Iqp?f3FX>Sx z4FmUSr%w<1`M%70ymqeb@>cgv^*diG9!NQ6s>)i2RM4GFRhgY@<-&3Riq#`|MUOjH zz3KXETKBaPF8W}8c{XiY)95B*YWYrD&O9&ws!o0B)-YsP z@+5d_?b|_!n!cDBmYm=Ae+8`9e8#d(@u7RwHu=uSST#(;ma{ZH+gF{W8TJ+e76?e# zTW$5;qd&M|*}T8qx=x4rF$;VhF`%O+=*}vb@JPP=9{7sFXW0x|Ur7JyM0k!Gj!C{0y7=W%}0Qb9>0uxlj%9Ol3m(Ha~jZ@H}*T_}^K z+D=@$aV+Y&RRE3!DB{$(UJME*?d7V>6=6QzdVl_c)Pu4!WjQ$i$#Q)*JaStAH zJjhP_yoV}RE}AxBR}Sc%p)`J|i%z=lFAw5yjTQ*wx>rlT@77y)PnS1EY|qdW&GzwY zO$t*32NygGdG4T9{T_DJ$&D_@M=^()v>vHkZryX(9t59y`2=7e;D$O6gAXa#j=Fok zKvUBnXRyPiAKe(v`_-@=&kRGuF`gGej?C%I2!=Y1qRMPG76fD+Ex+M7WYLf<01Hnos>Sql=9(X6wmBX?A>&Y|x1h zvAbg{-nZRTre<8?m}hslPxfg&NwaEVx-i@{YNtd!|G9U>z~cTsOjT2WH}3n@ef`a6 znR8?#aRP4=HKWpEKO{2?K095Z7+p@&rl{b_Y)g{fID0ysmS>ds7R8b<(~SFz-iXZX zm-7pl0>}PRC5kAoW-s|Z11~SUFftn7eZ(&t)!T240IcRm3ijZON z*uY=l*-fRG7L-^#d)q*^|C$;S)LL4)>4iQaAhMs`WYAS;#KwmK$qdW&d>NK52~&HB zSO4j<*E;7Gn!Z6=i8n5rm`nnk4h&K~@a#3bvU&vLH3^ycLuEx#%oUMk$7-(B+%9y` zq@cl=`(pmjx)@Q^8#t0VJO|!=eg&#Ib9qnraaVE1=o2%Yk%+<=5u(gZV@d(Xg0P<0a>!i|NP>dmFviNxJvFTg+)UY{0l8@O{VkqH26Cn zR>hO!6xf_R%eiV3*Jk^rPv`FEHu%&-6cSeOrtcdVlFz88>Vh1-GfssEG1N7oXIb+?R7nmci1a8t};b{aZr`+MJ)& zll~I>Q`Am)W7`T}l3bQx?}Rm+=e<1w+#`PBx|jw_&y|>@k`G7Ss9i4bnNE1#Iw+&Dk&NdHSJEgN-LrK+-&NZ)Lay_=&u27j3pVnGkcS}2WMw+p5XIi zX6FmG_1t;oRDZl~iQ;*|OFN&$fqoYy`g&s;N#~5$8{3IL#ujJm(dmx`WQOK>?gjynG*qoQTA$Cz>0L9AHF+LA*;lOv;x zok1k}tkqz&8R?*u(^be)IzJxrd}%WqO!-uGWilJD=LE=2YMr(fRbODlyUP^2Y{pqv_h=U1EDSaMyqTcf9> z^kouJ1QcrlL@Qh6p4`ZDX1<9nGk9so;nG>RqOHXY49;pYaua=sU2LZZVkjy2jd{@r zB1i(esff73vW?{@uhnJt8g}iAc8Rc+#J=dDrDlBKcYx|r8l$2&M<+zNQak@fRAC6j zNqNtz@3AA0raao0l@vm};J*89l^mw#{2R|&tU_M3Xto3AqILAxnIt#{w(CHE#Pe7v z!wa8wXWsJjXOult9zlc$uYTb0+_j?gguF`M%rpmx-ILs<xfvH<=Zf~eZvhhiPAg{Yj|W5Sd|%%EL9hEzGKG! zJ?m9;f}$KF9hW3toe7;%`&`|28jq=`#6gaea%x?VJOOIYasAK5vv{ACv`)h1%6jevRmsDchXVVSP!qlC=GSCs*QdJ-L%kBvX?pHYV9%%f4ZFq>Ou0Rz6=iw z4(Dm1ZT*Sp+Dw(*;GUvhTgq$iQPbo@2{XVjcFN@v*j+&aK0c;;h7MM|k0MuAWHrSj z;Ih^s0a6gNYdzyjmwQ-cPW}hF*@Z7iAcYVlM3S$?raX--`j)@VYQV9nXI)|NK_`WB zFwbpMA$Sm}dMb;uR%O!5-yQA>UA|OWJ8P%3y44bQqb2_MKt&HWN0_DqWsMXJ-cGQa zGZ8&A_bo#s7EAERa)X|~i$iT(|Gk>?5Fh*IsL4db2KZ^Js>2A|!#h$p;`=q!M%$+( zfxTBu+j*n%S65noVkusq%_~M1D$l#IEyk(!3aW2C&mr>;1X|UUx9H0QVMfzm=dga6 z`Yh}DWSr<=0W9>@WsPJNXgwGq*C7`Qz$7&b-PHf!5{H3KA>Pe;&j${*{w0Obl6~tW zJyw^MGUh5bvMTeBe9;$UeJ_Dy*HNwr-FxQ zzggQ|DDGmV{V$oO-}zuBG(YTiiI7h>ABwucmPwx!I}6Gl;+GU&PzX)9 zrLA1Ci6_`~(sNg=>p4;3I0ytBMSpeS!30^ONCQ=(^-2VR`(|vNR~eD5o&(47Aw-uO z+{ZkI^*}B7p=;~ulv&$m5^&&1$~B?Bd{m{u84~LfFi}E7dB;911m|3CM2#6ZID0-3 zIuxawah7}nn^j|v_TveMY#4ZdQ6z5&BB+%Hz+OL_g!9py5ql=dkE&&($=CUAp80J z(!Aevu)R1@dhu%FGY(+ktj+eJbcx@14$%;}Y{VS&2~<#g(Bc&!rOI>Typd zp(Bjve8|R>mfGeRbvA4~th!IAmo2}X z-OhIG$Uw~7zP|Wsw-(&C4@XS_@Q*YEzrrsbwgO4S1p|lvOyz-pMkPcV{gIxThIMWD zrV5_SB9Y0~KtS^#mYIhK^Y>9*MH!U*%cLT~PcDl9;}l^Q7Pp`7JqE2<0(r$@IB}=q z;TUEq!7?&ftg^&K-V?q!BDUi{iusIeuNSsBpJfyRYx#ra<1(Br!;HIU&;Ty<|bEXkZL7@mv>|SZYb^u-@9&7*pRXDJy|uac&#hxYjIk$ zYU}MfBORg_-`|R%?|!7W&(hM;5Rg7Br?NmjmXcAnzZ!jf=ONALWH&}luZUMM{S+~D zv~mwA5c4?a+GDn*G5KChLqT&3(*pKbsI@90h0pwfb=*#@Z}C#na8ipwBoUgI`b{G? zD&a-JK?UKfUlnf)c2VK;+_d^|s(0gw&mxsCrkN9*%N1gEKvP|KOur}!(i^i7QRC|0 z_qaifIg52qA#r=ikjTKSliHCmB*dks&0Qr7UqueBwa4#1qm>d!p79^gWbVfTzc2$N z$uYT%_VJxiabnZ4Rt#QE#x!h7X4Gic%Y9BfW^@w5F3oFIv7xoAQ3w+*+S@COxeL-z z@;FFmR2E4#lCCFQmK3~wr7oagA5Uf$l&c-Ea7cOop32$Xp>Ghy8y$K#s{q%+;lmlO zF*Rs(c@%*%gM*zp@*Kmhn$ddpB7(s75aUJq72kr|`+veqDyt_c1qaEvU#K`>mhq#9 zw7=u!@#q_LDi`X^o19nM%2x|1emD;9pL6ggzN+sFQeN&yHv}2m(YzgQw1h;Ce`?I> zxih3=yuQEwwPv3$cobRCbzz4wp<1SlPRHC({jHzPNo@u6o&v%&N_HU#^{&k7l~;G` zca*%lx7Lb!#yU@3KeKjmG3gV)>y4CEa-+Yd{oX?CWht}y&#Uz!WUBTV*hFOMTjaSY zk`Atk-6p8(7qzqGVBTS$Ot?p`iTw{Ct<(6r@3LC{9xxWARS@@gp}xO+0fcvcnsFkQ z+?jUIVhQ?%B)oE4?@oEFQm#p+j^2>t1>XG=N4g|!vZ{iXoX0pYhN*hVgpJAo5nt+Z zxxzQeXmZjxS~+%HHmjP?quj%>!CC{Ns%@ZpZG{721g;h(`qgM~-*Eq5V;Ith)zm_1 zn?UwanYA?MgR{HkJGfY|w0dz8^|l&IX~9R4+Pb17qp6JGob`@E_Ty#ISCb-1bn?AZ zqL0CE1>q3MY1T#iQv@cvhu9D@>&l85p^x-41qfCp660`pq4D$tj)_4G!)3B;sgBTm zT>_v)KvRisgtQ(EA8C}39QMWep>~6e-)=5tVe-PAJCBilDnK*_BQpb=cbefOy+qeGw>-i z_5;i9LmZce`+>vVUH-XGV}0BGDKRmQd*UcE#10?#+D?(dxVDj!vbYE`+EcHACDN7l z9M^i5#3dyrvij}!RSRiJ-`r>Zk}WLvD))gP;x6=-n&FgALfp|9uOB)4oMZ@`f=;>m zY{ytcvcx2C8r5CgjodwAlKkMnz@#ghLw*E8E<({MjMAK}k1f9|tD~L=SYc1b;%yV) zSDZGIrhBph+p(6aa|C_fg{D2H(gi9q?fcB%e$ukhX`GIqaF`L1w8e%*n{c^R;OSyJ zkw>tJnOf{9Mrn8T<)pep4|(iAo?t-e8!HrBsfVNt%5*WLN8trqHf-pW<}+tBlh9AK zeQ$|COGbC0AxT+L{7qgLcNF6G!`-E>2`TMj&DFzjtv7eQcl2wbuPGXp7+q_%mbqFF zj;wC&kGfYzv|DhC@KKV$9Ps>u6rPclYhhNe|g%QFr7bzKOEGCYJbBwYR=wK_b z3RjrrABgvu*4F+V1q53+0-5WUw&h0s>H6S{gn-889BC@YIN!kCuAOB=>}e2sKLn}v zy7#8ElpaOp^gJKn-Rs@+hG!%uX*&?P`h}f3-@9AE9j66xV_j{S*LDvFrofXLpDeB- z@#jLw9k$fJqF6`vhW{7EE|B9b{1?rW$W!0{E-wg0n!9YRDcehKDbC6Mdw8&K%11uU z!nF!mXQKzC7dN%Zsk<~_j_FXaflLiBNi5CIIZeeca*p|9Ympjz-#8S62p%pHq!&`6 zmyZ)HlNB3$GTz@d6K+pcgMKVUC$Kj-{we@_*ZDf?774;byr*9v2$cIGx;Rs_W{+7q z-AQ>SR(0Kr0FC=5-Eh<%Wt?%m+5~kr42>q$$^Wto{_^1^IX*wYu+ap=aAE&*r2RYp zz5;orG2N_=Bf}Hg;|T=R?|J2`zX@6y7V34uE;oKIABCA=z+v0B*XZ{P-_quKJ{)=p zv=7ShhR4XMDcYHdI6sKp@Si6he?b9`Ez8m%j4F`2bC^b=H1)xBEiF4JsqSp`bkXau zAMpTJAyaf7O4LP5{|H7YWt~rzQmc5_PJcF?{ha^il4IKkGxsn&yDx?Nm7ODoJeAO$ zC!3j&=Nf*B>SR8`i0glKz0p)>*c4VNUDaa);gRU zljgt0iuNBlSF;xNRK50w`aF=i@JgI4#n5vHA-^izZ>YO1U8ovWx|OD+;0A$m(Xg%( zsk%|M^-CEZ{JK{0Qqk1njC+h9*nlq-{J-<;^lkV*D4LB6R#js`>a_5~`&`utyA7Wh z=_*IZtu}rhI2{XATVj0|QnFC=6k2T(jYTPeM<#ueoNU)(wd*)4IxrVg5H2a|qUGQX z-D}*2U5=o&naj!yJ7>AL6$7v<%W3n z{3VqfY^qhIeb>ZbmN$cE@#_A;Wpa6uzFeIEc<4;n3v8yCScoJu?!2wg5XL!|J%| zdbi^hhX@x35torURuQ;S^k zxqjbseHlYCs-w31wW-mWs#E`%KIk76&aK^me-Jkp7-uQdMJXf8b7$Q#C@#o$tsDQ3 zI8}Ps}LL@s!oKR9_1$U0lrey9+bWwp@__;I(jIb3q#>378FJDk%w`4iy(bLFiBpCK=I7LniWIn219 za)B?@N%aE?N{}Zabux0N*J_LZ4x3bXa*lnXTJvVT%LG8QtMV(UVaHp^Oyl2}bL4N# zIr*YOS`PszsdzPE;PeoxhA^k<{6}0Q@qHp;>Ks!1JabeC=!y@`)lmj6x^0Q-wD;lj zP_a7==zceL`IS@x*>Pf7nCNB6EnR4BzbPf5BA@>Sb^(>b}9X5tXtOcM3qP(xlWT>%1o zXpP?5{aK#k1IuL()J0~&HaV1muex#<7U8x7ySK=-P?verEs0Mv1ulxd z9PL8}Tik{Cn#?hj+_Di@&(PYOwcIrQ@$;?alss^bpXUtT{*3!LaG)J}yF#4^w%PfSv>Y~Xq5`>?SxzR5Ut#YThEMy%OH-!? z0TPLbc?oDYqbk;?`yKPtWUipba-Rj^tkN3687dW_iO%7&4vT&@biLaMyRlP)MmVJc zP*uG0XX>bYLCFRlU%7O3n4{^FNPfpglNdw>t{eReQ_8gQg{nIj#4&UChu+X3haf== zJqm^LVTHU2y_@W0Z%MGZ;u2kfsG{Ar@jw`CBRB1W zxJc}DyG`kY1k^FiX_v1MdA@wkj_*0UBGllKsreopG+Tx#+@KJ3=b^WF4p&#+2wMv>Rc(0u9fyj3?=kfL})tZ+sa;wp|j_6J-5fb-aUs?+dW2UAFzA^s| zTWFUUJJ$@!q{v~oPMKVV*#1^iXnclHt_S_qN49@L$)No_3)m}L*hmUq}Y2*6mBTo?zPWjl~=}x z0#-w%E7<#_qZMvT^;C%N_e5V#knx4RHtyN=;i$g6tjh12P;^ipAqFaYgxZvwvsv%r zw;4+a1MimIg3~g33yr~(QOOr(&}iAEs@<0G!1Y_MMG-{}=XD3N6faU7sL70`a~S^x zX=5zuNRjNQO`Usw&N~BXou;8i+saB@@J+bKWWtv*_H5aL7;ch zg>UYm8oqaqWwX+|ja?_mlS2M4qZI($Mj;_GxIA<}{?Hy{5DkFOk7nTQ&~j$6Xp9Oos_L`5GZyJ9Vx0|4Zsa!|+lkIbBjv{){H@qByZ? z=k`u_Ix7N^@rjCHJfVM=R*}9KP;tHIjXhlxOUzqbKB(1r|EIk78Ibq3VhAywz#_U< z$07%LT`}$VxGcL5+5>g%5>MBPz-3!!bT$+`ki~ZSbMRf<0WeErRU~BB;_bxWNx&>w z9z*66QENL_^PY$QAY#T_@|nZlxqH8$co#+k0@bje>H0lhPqkR=ndLt)+0JBG^|(%E z>zmt6J^Pz@8KRtQ=x%ufH6Fbn$~5^E&x#8X)`4ACh>#2!&xl!D)puSW-nhlXwv>@)l-K))lqELfAWUYZDJQ#2 zZMLq59^bp{2mSSq%-Nb)G;)ZLCu}-GQ*bbC-9#!TtT7Yimi!5;d0H3;ja*|VO%2cP3j499Z6VR^oW#VgWvq_*wUQv+GQM#_^Y&a>O_x_&zb}qM& za&uO2*w042Y&+^A1{dw_zq*~awDXY6k2KQ91Lei~H(nr9sBOdNx_ed#G21foK+f!A zDqEFeF!9_WrGL@@_wEzza-nPT5-%wBzqq@5GxBWNPvf*SN5XSTppfRr5*%0NN>$7h zB>xjd?!Cxcq=&B(-q?fS&@6{7$}4^{d%&23J`T0ya#I}w9eYxno@%TxDK50f(iP4J z+NUk0jD}a3`a%#}i`}%45m8;IE>%&{2IjC7?fu{scuF^s)h`CoT>5BT&0a}zs}>A= z^x9v4Ni={tC8H$=k0`Mesn-lMwd-ODIHN`QL4W+*n81fW4`D@gsL=d`D zI17T+Kt4Lw}Y>SSJ@B&0y0@M&yYYS||-lG2Ws2r;ro+SK~g@AFXnEt@~6o0SU}5P-1}*0(?VZRn}N*-=#d}VzYGmNO`cPh zS-B}&l#2jKg44ot@|;{sp?e~zL0)^DOLJ}E)?8L6BN|aA95{BVH21?%a6}-Oj;95d zAWiLIK>ukj^MlwRTL7j2X(gv9mV!_z5%|$5v)JiLZS_O_%9kN86A)W)f&HRWCm7Ld zeph?;TkA&|yz0qlg^1=hHgW`b<}43QWP1_8PzFyDajA64_+=~W8N(jv%fW<^c?LTqCh zzu_=AZs8Jwog%&)_tS$T#YoIt6wODI>Ro| zFeH4g_-wTASv)r{4!qHO$dZR;G6?pWVnV{BSQelqq~M;y3^<9?wV#&mA}WHahCg=;I7_)pqeTwJDiQ@=eo(w(Z^pbRBCKZ?os)mQTv&;O z_YIonMNX(`{2^duV`9*}0Y!dN!I=ig0H}9_ccs4gEjwt0WfIi{vv~F$#BLtOE$!Fz z{+rPfBcdLX9t$Wry+OXbLWn7YmHk<*d9aJKOIVBm(*naEux0Blq^2HZq$@sr2e+R1 z{L;Yp#=ZiGMorcmFRH70LR)s;E}Yg=+swLtQ^=#9M25dHESsKLR5r z;t-|sc7@x!5>)ceE8aJ8eMx&W&xr)uIevRw<3sfwSYBkuk9-J4Mh|3B5AQa*!)y@K z*!J9d)DeB6+^(^kIqvLPLY?ET zqw+FY4SmTYEGy~TlS}9gJGs;e9J&i4Ar5`3!4iKTlr}cuF5A0NqnwiXpIxLKKvN{d z2FC@v8|s$Afsp%x=!ew0S7RW)o@t|@7y|m(li4+%g7tPBX>(`Xnv3onQFqiApu7as zq#HZkY$(PE^ETglaaPMCoa_!Bk02N9#{?8Rrg`Xk)!XLbN$YC5D)d^$P(>)wsBI2g2)2#7fd%s9e@3BEd1(^?t)v(lE(u_`bvwL@qev3|7f8e_DuO3 z?a%KUjaPp!2vi5VZc_|j$QPL_1$aKR5oLOBm_5b62Hxa{D|^*~skjNp$7-{oM$ref zdDh8X=u+Ysc=akd70t%}RMq9=4$f-W9F2C%Y0hAy!^+&`m-xt2U1x_}B4&mur|U>2 zC1^EfX_325+UXUAQ1Bl9g@_?Nf+WYvrBapV)9*uLE4^+`V#OYEG8VWOG&qjZPAsxi zKA090&CQ9}1@BWAP-v_etCjQ+g`0um{V)1UM_851G+y6IpE8qp!^mdqnNBj7T`eA^ z6qo00HtR0WIud!vcG*uhB>jJ>gSi^dEe&SgqACn^=!rLxq5|%hd3^H0*<_9PZt{9< zzNJ00d%%M(4c{FEBWQ*@%m_)1Wux6xC|pw{kh!lU8kTMHASKvVv*#tagjXw4CAe?j z-*@PX<7FJ32+xZ1fY_{*wbjD&xu&I$^s9O|C*sbiJ_Ek8cJcI=wS-y?mmq~4e zdD6bIU|aIgxVX20_Qf3CDiFMnHT-D0pnZV$l*IE?D#ufLJxGw6!mQ?LlB?`QBtNEN zO;w!2Gr6FI11Ot^-Kb-Q;!P6@TiCIS1_)%_Klk}C_;UhEsXi}bf5X#^p*G5!Yewy5z84hlb7&5T z>taS|`A6m=VjDBAdN-QTsg89C`5bJ7-+GlWAJ^R6j?hOPPJ&vO%@V+T8{T?Ia=WQt z9@))4kftf094C$}6yJU_4G-U`g)4})=Ms>gGoAg>sUmOqu96ecA>AVfM*A$|oJ$WtuuOS)Li z#j`$;BxvCn{k!MC$M6*dt+%8#z|^&t^uc7TuVEb>l=9?#-! zPE2-P^KHWvr+m%YwrTOxs7$onbJn9Kd!MI#Q_kNlaE!c7J_{DD?055$GRg+@36-rzI{Zf1l1rA%*%$zPvXbNKbTekW0SHU+}P5|E)X^(Hp+gGLD5PLpXHy1XzwU=gaU|HHe z>~Zl0wq}&zSHhW|Z&p}nyjw64knoF>d5R_IEX&HGgdUY&QA9aZTYW(Y4zyc4oVs&% z4^dA1zQLp3sN<rDaBX z;aG`T(0*fY0T5_^WC#))2yf_2sWe$EY{5R0&(w+wd1YAe5=7IDgU;(3gH_!%BFu6( z`z%$YpO95*VBhKjQ3RT|}kmQ;+!ReL_sN3^u^Jh2{kk;RSK;*P4qpg(a*giE6U z)(8Ftb8L`+7YGyd`uT^KK8HRL)h&2JR&OuX7UZ>fgXTX&K0MrXDMlq}wgf#$udIe+ z3iOL_%N3ilZVKwPbrfRwUU8oHV_OpZY+goJW3rU?=7@m1xgrZ@=s)Y1o1j%JzKaK% zpEixWzJ#LWO5?*&l2{hrA7AeUbG#zIo~^mavfPA%?e=~NvZ*NN>}RJ2(;c13YJ2qE zFg~E~dDwArs_ZzmFb+FBpd)`*2ZZtxsgOZ}M*-u(aIunUvQ-`W8@AveotCzo#*5#T zFt)b*-7DM49o{f{Oz_E61o@rM%21h;Q_Qw9CJQ}_agn9*IlOm{=!{SIt1V*4FN zRnI_&-^G8`Hgeo<6+~=`U|1cYt0kJQ5^#@6WDlS@N=?`($#^<`9i8ckO2O6FT4A+v z_iKwTQOv2Nq4 zpyA5tXsip_Lhs1qsg<^=I{B_!Z_*EC!BsOE{J>{9V-Rq+ZR08?rlw@$e$HDV&Vh>w zA3kLlhrsJmc4|AHSvf%5a$VMHdqjib&CuOeg;b()EPL+15<)H@f0ja^zXUG3Ss2CU z@j5~Z=BaUmO%SYTNeqlgH~fAKPEA$!Xm&nXK$+U#CP&}XvejAXnrx38kBw>nX-=;$ zZB#Xgvr}>Kf#0J0Cq2Oy-PC1KaWLCG*3YE#qFOJ9n~{SqH7V~wJnLi-h0kb1rrL}C zk$U7@J`-=!qluD>v4>1Ez}uIXlkxkT6)`DD<`WsA_3J7zKGAdvy=1g9jhJqel~Dz^ zL-m#AB@k$G$bXn8PKkbAtHZmBmCue#sXg~Po9fGGD=aYtQD;yQIgtvZauR4?H~NvB zBx9KqzvMV4RYJ+t+7C+EQeP3CarwOphPp4d7mWn|h*blm=LZUF{(1lG4%T3l zB>TXB>CHhl11F;IQmz6czf;xW#k2$<4omweB|M;qH` z+j*yr$S;nI;e}=@ui^}V6R4QyU454dda!M+SMV74r_Lju4pY|G47opb{asDt7`_%| z{!umrLw34mklAD$mXy6wQC!{wA_SpRV@7dv;K8M_5+HoT=%Aa=M6 zfekJ4l~7~nlpc=OpDI;OJfGiil0U*C<=XOvfxE0`_KW_A>;nBr1Z{w{3BWL5G6u}W h`G6zizrUP+f^*VuTSt(*o&f(diJ}%@BQan3yZmL_gvR`o#i->^O!JA4J9IcI(!HOLZqS$*MdOMCcyVg zTx{@)Ryl1W1ajX{1um=Ym9qQuwaL?$%~A)O7Pe#Tk7p_BM+1JV%|zHKz!?&ujp<85 z?dj3KYL{w%j;6UPSt;&M(l))>_u8L#rlPhaC52j;%xjvcW*{lvPio=k1dG~_yY-Fb zJ|5bSQ4NvPQhnI>Q02?8@q5>}qup1ZIrhC5j+z5F!(m7y@>fOP8s9r4l9Nh6GYpAj zZIxkz!Qvno2}qk>1a9Mu!z`HE3GTB9n6y#w z^_9{{c>taM`2PL7?rz1#qoGIB&2JkhLhr(0S`2;qc>#AY-%7BtVVy4eNi)6q#S$MM zuk8K+27@WYuF(}}d=2bae)rQsNks*3W8hQZ?p%%eN@rMJadGe++=I(&f#gBiG~*X9 z2wE2T6}SYutnasPxqUYbfw6ILWYyI@-xH$fGb>LoFGqmi!uFF#;^N|#T&eC4PKJkS?%i^#MksO7> zmqdqy`JirBa)#Snxz)Nlp8&F4rlS1(Wqu*t4}C!&KT_58YqD0i%c(tntROERIRBC} zZpKdW>C+G3H!|sle%l?TU-M*7 z3yI{5u-M5UJ61H!AupxWL4)Ybi&7}^wyZ)CZYy7@q0m^&X*ss%gS~<66-6@D4itwb# zqlhC@Jaf?&genkVa|g4Y;x4k6BEL%Ix6(J*9SA=UQ(k`XJ+Yo`6L)bEXB%O8nm(}C zW@tYCzCgEM6AOwM7SPBKEn!>cOjpW%B9!aaeB|WIQ>!B@Ly_`yxd`|=l#p;mB z$;%8u*4-`Yt@`DB8st{j&jQ0YNsnW_f*rV5(TnPymD=;snRmUYiHX+pbga~cFLQDD zFuYQS|6Ic2jO&c%rZzfPVuLL46NJK^Q!nh>=s&hlyb6bWEO`|2gIId+QmYf6XKU5jk!2&kTg3y z{c>)DocC-o^(z{LdHx~8?DTngSROS^9xGk~WX+2GyP={U>kcNx8V!H*U1*mGHaCLb z&^s7U(U4d%n-;hY47}o)A2d|r0E4{?jN^=UPSu6QkzpjPj5SpuqJ~ADfBqNg4tU%nTYxAUnnt?1dZF}&Gn`8#sDqIb@+GD&-%Ju8uj&p zCvkR7B|oZgEQLU6N6P$w#o-k(*$F(lnd#{e1qFq?l9IiO;&0z5=tb>nmz=eT5qs!J zWM&GhVY5*wHTwrVokGg<^YaJJE0{vU!e|f?5s~QzPkzh6&um>8oP?j7U`F%$Vd@`< z``J%R);S<&`wub7Te^e$W(>9?kt;D4Q<_*TXOLJ*9-TWJ95@Z0`yq;1vPMQsWMpLZ zzq6)xSi})YUO(1)F`vAv(g=)403B+d9QMN^A4* zy9ugYuH@*8Vi~Wh^uHwEk$d?<03`PRAt8GmwBaltotN28@%=h}^UJNC;LGCdEPl(a z(EeDAEL{{fQw+!No==bNVJ&JuiZvZ$;gv=93i9j9$;oL?aNz{DwYriAXnb9*!;>x6 zIhx1J&^m7H*QlV{&Qyrd)YC)eKm7_57AAlB@+IV5XXg-A|44>>#;7$P*DjY(oIAQv zGhd=G4isvL-G?qBDam-WF-UOb*Bwb~SiQNiq3X^V_ud%uWM;ni{`Py*2~Iw31*dQs zSU7Rx;hiDMG>_#(NJ*!ipc*_J6{?wm;xEmiJ>z^OuvywhL{ID6wEmW{VwOfBmAPp0 zoHf`W(jp%&^3ynKVUVR4JZOV`?%^*W)o1|;iXp;&$!s%|D zEAXTl(wOegU%@oBRuwf|(8oU9%*0VZu1<3OryX~$El5BP4W}a1uFnh3(ke62R_%3~ zL#(&++~#3I?imYEYy|lkeHPmSLGHA{3JZVB5D!NiIvK%0PHVA(l=~tNe}&rL(MD2P zZ2~q&F=HHsyA;-v_HJ;i>k9Fz*4scoChbwNF`2LLW4;SE4!2jSWY8{h-wUqZFE(Od z77hGE#o{xDezJHzl-#hRF26{AQzWH{!@N(;mieHIszCh;=9K;e!!{FphQ0W=VFCKP zsb)W{lVh{0BodN2g#Nzbt0wxI7!7$vvPB&@h2L2p$A>sNF{86T6p*EuBb+S9Ol)#t z^20*g%Z|@^3Td6a*ET~M`&JVk((gDY?!T*v{I>FbFv{1`uyfmC|GiZvjx=Uo?VEd0 zERYqj5O}TAZWtGzU}9e#Ds?-@(^A`a9@&nJ4%#UW8bPJ6e^cDpS`A|TtnU*?YpIi> zmi`on=8lPvssQtq7xday@ah*=yG_SlZciWDIg~`g;nZg7-!0@6t!)4LIWkd<{`hq+ z%3$9Lg3gZD5gt`2x#qhQJ&n-c?Ed&iaoT_yduE2=I1Ig6d{wckpZcRm3JLm+M`tvs zl8sNz2s)s0%$}>GL#xeIU)jxi*fM{CMSn`MkNUp5v7G-vb5$-xB{VX7WE_8pd32^Y z`Q;c(Q?hOL98aoOvA~&RzLutZ=R8#BcY$5oPx3Lgx4RLHo=l0AJO&c`T8C|(v)L{bhv}zw^cc3IUQ8AhT>ZN$+%oQ>qTOoO z6lXzNFD6h#!{XDiwVgQ$Hk6AVLxlK@#pHT<2u;Lq#_H+SV`eGbJlC4lBXgoWJDZL+ zI*xlO{$WUp=rk>787%0LqJFIHsQF>-5>Gd07#-|_)ke_U3EzeH&Af1ittUS;mh6zu zCZ*OLj_*JVebYsgQ|E5+-II4qD$%gg7!#aS`;xZg7@`0I4{7nw0@SI+!54?hv)pQ364-t16^GAI<4rM$z$|gn7lm*iTf@gKB zy?C^V=ezTfrL0>;%nJD64IOpviLjjOkZxDuB|TzaLn*KUW1qRG<1{qq&hs7}Zm3EG z?*z@%JQMSvAhb1*IbnISoxAAK_OFV^Y>+9UilsSmF-ww)*1QCM_2kCXh+5ZAk20q_ zsB8$o539DPEbF)-TfHxB4u3y;j_|zxCia((vLcQ=;HKy!CR*vwY2zP9chGCC?74h= zp=zZAohdAqw1q6N{Bp41`vJJNnN*?neD?;_A|eDRAqFBN=ua9*KDfro@5#d{`0O&e z^Ij1+=cdH)7=Ob&I|4@^TK<& z)bPwY#8bn_IVM0iP;f__+Iche5xhK|?Pg`(*7rA6)9FGx$D&x`jj&CQ9TI>VVdz`i26c2FIdgL zH=x$^>xRUhQ{FY1#4}2*$39D|U2BS(Qd&OT_PvDC{KdR9Lh%yaL}Ulnbf6KM-adEfe* zfa@HvOT({*G#@Qx3Ys@@4kKg^UvVh@Re`GpNG$3;efo5CWaLhNdBTvLXjXo{3aWkk zUoG8Ao;J6I#b3L&s9M|!YszRDN|tM%8eb@9zVlsxvtMEUFBVD zgUL%BOVJ^Xo^t}MuO1cF=-sonvum6mGbh6kxsPpQO*pZ{$EG>6PH$eHeEqeHXXVqd z#`1A$qFDOom7~d{0t2t*{-?Em*Mj$t%><%xmttEOQ4XW$nj|X&ALZi1X6uS@SE4&q zsKe;bwJ=Y5=Gk^av?#h{ol38?Gp{G`Jv?P?5622?{5R!c`wvwMj-4_)7i_5q^N<#r z=2$V^b-LIGJUF^~fBk=Ofn+17X&%Ik{)4eZ>8UNwO~r5swdJP4h)&aMJ?`dw50;K> z`P{>?4hqN*#`3IhLs4VmVpP$4fic8$f>Qm+hVGoz0<=DX)0O%R$dca%F}{J4WTGmh zWI%QMSuq?Q+bI(T z6+P0%iBiQSq}wL?P;%s}3(K_bl6gi7D&`xB6`640rqjOZQiXg;+WT^O>TmHxhX=w zH8j9T!<`oz3(t5JmL%93Ano_;=Z?QhTd(Am!QLu4!p$_D`yDg7l!P{_WxR4XF3yzh z9X68x>Cy|Yb9Dqo@yqp*6x?ee*!@{^EX5M{>&tJ#k`1Q+ZcMuMae^0^w(pio^AW=z{Iku3kRJ*RE9Btmi+lyM5O9`x$55+KBj2yYjAZrO4A08fG@YfE2>aS;J*F%TtW^Er>q*|T)xFRC1;aRKevfRF zUYgS{EV#ZdRd8!wjrKrs4z|P(*Z6`dBK^5D9?B1%?LNj3s4r#WMxz;@dY8gvB#i~J zYCu|v>8?y>+NkK;X+3xC^;Z&0SoJc$R5&&Co{k=SL((jJT7eB zkLt=K^-rESwjltbTGjz{MXi8CpsSzVG@#I%gg_K$Nhy$>4)sMXIajB&oXDDAaW5>zI z*PSjK_&za_+G~Ex&8#Py@!(BUlGIyqEiJ8Vaf^XEXJ!18FV1Z?BX_2JmTd%GG-W#* zp|8$*`q=L=B}T{Dh*f-WXTf>yuJLz-S)&xmn4qD)WfgP+GoA~0i+3p78j|Jlt@`qC z2=pUT!VOBt6}vLa0-OpFiwm8I#R*Bk=<(>5z-498!o$Ng4GkH+=2s6*rpruwhlhK@ zDS5KRQ(($JkIg6W&rWS!57zz;M1*M8rd~2${XnD^D<_Xf7{7guPg2A$zp9W+ebtxy zPkVhzGVAT`BLbhlGNj)7xfNA=&SM*o2Ub%;!w)LYFLFCwDo#Za7{ z7K4U|FYeiz-rXCc*>{&5FU{1An=DF;gQRC&Ib3RC&tiuA8N7V-TL?~ewyYNxq&7Xivb)!I!OciPgJ}M3nF;GjibMC`n~yTK4b?f6Emte7E`x6*oS*Gs4cAfF zROx?oB`4D6rW}5Z>$4b?J8ARUA>bF^+z^LK`nH?+0|w&hT@S#cnhd2p1Z3+Yc8wI| z?5uv?qzy6?w?*1Z%+%J_w%1-2beC13+nW0}N_|^Yk%K~JH_AM-`Y$?MB%%~CVtU9w zPVEg1c4!?ut|EWV&R7xW4ydccj>D~k>em9Z1!yDwo+ndT-xi|A-iSFcr-lq1mE_w% z)Q6O1l#+3*LMfX3(usSvB_HVQx22ko@!4Ka1O;I}A9{RWfQC>)jOCY`_?Ki-z`_nc zCgpxM+;XP&L(cGYBhmhYwyNv!C@O4o82}jyFt_ZGKezNcD`412mQmE@jw6~ zmHgq_=VN6NCsv1|?E{QQg()=2v8;V^c<@mDR59aOzZAVuO6qt+*qVY#+F+D=7BQ{j<0X+?3y$S;TbI~`qQK`NIpafd z1tMy(B92K|w2DOC*&K(bO3uN&6Gm}IfpVZHYDpIJI;{O`G^OP;8*J?Bj4yqYM;KLn zYtZ=uMM;nY+s5v0+WjX(((;>{?U$Cdwv23a?9>yky|Ul}8Ob_PamQM$TOosHtiFz) zZ+vb|B)|V=il2Fu=ux@dF|K9FlZ4D}mwA>H15m-{OJa%p4+%az&UH9#n!l42mm0Kb zwFw)~3tT5dF9f+im^T5OA;3-!`(pL#CO@N8dZQY$zHWB z53$XDXSg-U9Gv0HJcvIE$XNr3$V^WkW=T}>_l#*`0rzyc#oEr8Po+V$vW?F(R zv%lj~lk(SZ_oj%Sq1=?xm{$jZTHn5%3y6**jg7`1I__UHGb$BVGS;jex5btJNEi98 zP;jZae)F(hcJER%A@VdMr2A4c@WXE@Z3KmB<%~(?3@caU8}>2g0|4yAgm?wIOrAtD z^RQ6xNaH~J_$~Yl)zk=Xj-+qNsUom4IWzO~@t#N?Q$#A7v+iW!J{J(es)<&|!;FM1 z9q*I;tF1Zm)PTDQvS7H+RonkF)Oj_=3Xwq|pdgt$T^~;=6+PId1uaX>>yfEEg^%T@ zr_o<-C9-s6WQjr3p2s2Qan8=l+Pdt7d9ye@jSLE!PR5m@`YpBE&EK5YD$ktIws&ZF zg$H`Iy7#;jg{94OerFxb2kVK(V6s9;BLF%5>1oVrn?}@T`YpbejE=L7-yi0OPi*68njt2mYYZ>mj8E zX0NI|x?zWspvu%1ayGnKx$CQ=VdsM-S&;LS1}!12qghJM{k%2sZx8TGEmGelmiDjL z7~uDfv#gApr&jV^zcl9k_*f&c4)%Ixo^e`h0XMxe)M}K*_VfvEAiyUSUw)_TZhbH( zzDM(k?vReNHr-#`>%asQ6$>k?4zqF@QcMHy-74iLG5gZFdYu9}do=gaOPJ%rwHoLn zdX5ts7-n)fYy@o6v8?nPYhTw&$bPi_v0Q*=IW5UpYpNW6Pejw{=g_)0D9e1~#3wsb zB%eNgilc6;^9hWYFlZiCE~xV{M?Bd06P2*zt~Iz`7<9zqPL$Fw_CRIhkEG9;fKHK~ zzliOR^nwEXLfukDd;4Mw1v13n_m3pv74bIZ@kBIre`0S3U=f&9=?nVZ_{_Uc8J5lM z#ui0^X`4+Dp?20N5=`m7)zp|zl^8y7T7pd2M31q;VEF87>MFukLqUuCN)8Tu&1WkF z7B63#f`x+r&6H7VK)#*iLxG`>HA!R$27+_w^w?NHbTn~xPR{bloMWg?;riRWiD2BN z^2h65Rb}{v#D82M(5{+v-U-jex6}y<38a{4ca@^yN=i5dg@uPpfh3CcQZQI%9_f8e z9&YY~dDmeyh|$~QXTa3vGZimW-W*8y;%dsC3nv8GB3mF!Ee>?ZYlEN^B&S!RqnsRi zqY*#&x<^HTkB>jHut1y^V{8AVYD-^P4ADq1D`#(S-=)UiL~(Ze!m$Ypk|gOZ3Z4Xe`YGU?WvN5{IM1`nyz53k^5%=n z*Zs@68lngKJKdxDVpNe8FtLX|t|x%vtxWg704}IhADx#OfPA5tv;08|2Y`9qa#MVW zYiQ0&G9C3;l(+>gZ7v+X>>``KH(y8I;IWHee+0&_g0IH1%cv=d9O%rtPeYR^8FL{f zV|ZR%?Y!1(>ivpvidWQ_+ew1v0$vA8>5n+r*n*qVt;u*Zpnc+?gEX3U!M`4OBO&yV zaohSKm{BKg1;{*BRmI2L{37XHuO2OJH!VO&cc0T`5;5`oUzlEXN3zyDl5#7IWs{f% z7S6@eW9bW08wxoOr9O&HNr?n-BdfTWH?9gnQSY8jDL+nFYpIDpMu_6inOUR{HNHrM z?Uc4K+5RY!1wePeQn;X?K)_|KSNVq$g8ezilGxN~%;!BP7%V@?F5~l{B?uk>zU<~} zp)acK{JYG;OkPnG)8xV_V&tkdt%ko4At(fGCO9dmsm;NVo92;+9jonKm{yR7w5Gv& z=h*-&P1V(R<+fnEj1uD(rf1KdDXXh@icA?`fEi8SVHhW0lV@wREvu9|K3z$cR{$RP*_BJjis7BQuzXkdLl_VQ!|vh>Uxk0hpgVHT<-8aKMHbyk1WL z2tCHy84g!H2prWRRDIl98_60CO@AMNy&cc>RN&=ziiyd|<>_*B8fjk%;J>2_3(-MA zLCRCga8{mTe9qqGaMg*X_knTWzke^Y9Hc`<*ix%MCqIAapk>EI{?oh9nE}D1jr)-E z)$Pr7lC+t@Ixt$%Q-7=irh7i$1Ai|adt!sTB?mndj`=P+2%L8>)JA3C2Y&$`} zhJ-T8J}K!FF6CE27_BpwH7s0z$U_=TX|W+*E{f6y3H>vX%2F(j$t*C~E9acg)7^k~ zQ{c_g36>sWCj0g0Spl*1!Q2Ynw1Lsp+yFz8+wbIP1v}KVXD2zYJNw>CV`<4@Q6xeUxi?3f&8|8J!q2?@dT^9Eop2wEX7AQ7Q>CVtgw`^e^CVDsz9%FCi|?!I~- zESA5M{vjdQMb9g-PZzu$M^N2WX0O=H`F%Q~mY|??ox;}+a+>+tpmpdyZsw|NDBp$O zg_w+&E6#sNqdcVJQ!JeP^OhX$IpZOvg8uC5c-yj5t3ypzb~dK##z5Q2?wp|W3LLnw z>Gj2N?M_L9T!sP)l6giy~|8zWe0UVEi{~k4qY%^H`v&gUGeJ&1T$8t;6pOQoL zD9$etg_Ge$Y7U@ZdE~-oo58354L3MAI7!@{8;orcIy%_tM|3tiI#ktmGw2Y#G82E4 z3B&Ol?b)SpGtn>fZEb9(<%rrXgq)pENkQJGqo-fzlaJLx5udFyUIBv1gVr`SaSenD zLUd71;;Al)84tJ<@M&pjCFjHjsz^Z;U||EAHWrg64>&KG$$zE4|K|%_A!+1C2nzFb zvRIgQmD|$jy%;3Owt^kf2iuU zj>cD_@dvCDq3*TWfM}XiPs>f~44%!}*k=j#D!fv>{QfuIA)r;_}nv&mKs4bq?ca&?wR zT}T(6O$ed}H5}m8hSjs;*`PW86sG_UX|2#Bl|2_EMIaa&Bn}klw#~+vs=D!cod0w4 z|C%H~gAJQ{LRZF@>jraw>~`!4?@jIm_y#B zsqcr{V$-mB+gmj-t4+B#pL`I$SS~QwGJ4@~?7+-4tBDOcn<+9d_>{ous^BL0pNqri z2~=!NbEFMeB`QveXWQ*H`ByRs2=5kEABZfbGSJVYYiW2busy#E*sD994<|5yE$|!e z2hzPVt+%Rj%%pouZqB3bKa>N8KXew!XM&8ps7U{3CIaP>3uWe%ScgZPrpfE?%uD&W zv;Y0IWHS8F*qh{pB@X?P45>! zf{7**r_-9YLDCl1t+Qr@%{=nW$%5NupuW{Lpw3>K`{IT8>8Q01|`)*ePfdF{bzeFoa+DFo{KQSjdTdtXjd6axWd zfXOqJsq&O!n7V@S$loU?M{I0t@Xh5uNsC5gDXO@0qMj1#W`-mC>*vJqKUwut&G6Sz zYas3eLb%v}C>UJsgz!yv%w|Wqahoa$Fg)i`t6Rah_-Z`2 z%|yTi0D)Sd31kU3OP{Rs+&2Pm18PuKPL4u>JcviGkZi+dP;POr9tjFU^Yc@A#y2-N z?@CJ>f0u`X%lP~9|1LrNcN71Qw*P-|VdWjCCz>;A!bs6Ed)>;TFnKn!q55Esws0Ah zox)8y=;q3%5li6XRR$7 z!$G(*?;Db6diacCnGae8h@}6o()$1RLh%3JfApaPz_FYaBh}s(h{Hdv17tmqlSSKh zJ=Fd~?|JXPt4qBA^8osrKY#v|y*ak|Kd4JR{{BKlGD|aEU!E!frR9I9OQW>;S#jk` ztI=d!T!i3=_=5PPjPb2>twSMC0r6B~%Sx0yR3&Q^=s8gw^CkxAK;HtT${<@f3Onn}7Dz9f&FiBW#m`1+wC zD32XUCPHK{O78&z8EpP8KR*e84FGV5wwcJ7nzDROP8PKNaW8}e)OXfCbfC)R{BnrS zh)SlgHyJt2%}o1Er%V6{l{EfQUEAEmsV+8ZqGO7|AtEBut9Rp4XMf77_TlFzVXHES zpW+}E4g9W;X~f;E6L$fUo^E_C{KfC)^(XH5CjbQlL~Z`9s%km<(VonkH*ZYlu1^u`ySs_Zr)SozN7Q)+qWfu93~ksJusR3n&un+`7<4)HI7YfjT<4# z#>U3U#pOS~)rmnOLF(lQ@`K%U8Jf(dAufN@-?)^Q-^~Dz#L2@G2yTh-yE12GWraY% zUJpUH++iUkTR3TUxY8x0OZS>M6l1^nlJD;uNpRSjnwq53)i#sdV3oi(U)_(HH*<~Y9H@_)$nn=)2p;3+G{-0xc8!(FzxNh z^F({4Xa@hD)vyMBTC1k>jpW+Py@taNX;+xczGur=bAy(`xsQ{nJVCEo+V}_mBcEY- z50f_`FgH*^rwxa8gpvT#!(=R5MM#G;4G8*HnlJY{fGVxbd9}NBm5Yl@ufhV4iqBwr zoj|#fQ~`Q>)+2q8#e9t`w%~QQ+J25|+Ol$Hq{h)0H20FA&q*v-J^u|lU?5TJK-t6x z^q&4b$)7(jxrvus^pUBjicqk$2JgsJoQOyxTFV~@eXLI33|(d6R(YHA}dG*qARJ%I<5i8anAI-AY9 z8sOn)o}jBzjG_w$-eLxzu#h%8KiDY9;gHm+5}p&qWh9 z?&i<_VtY+KqyRa{&QcGWsx=ZbOJ zcrt72wGzf*QKZF0NlAHjxp*55xTAXpwWQ$BgL9uGQG2=tuT>&eHnt3~5w%3VsD>~B zRW-G8R2)`UuPk`)cj+3x4F&@)R0=9>xj_x}fSUcF_ijwDp-t!ROeGG8f&(v!S`bVJ zeH>{fzo*N=Mo4cCLZwYWs5!s>1c6jL{0s+_q?ZKKJ;QnmRAMP<*b@h_X#!3``SIgm z#jLP7+r0h8BVsaga#ZdAyT8Z`gsZK0V4;xPNXBau3TfH(Mca)Q&c$Xr`9 zwSQ{%DF? zax}5zlO+{!n19e`b0=KP;C`$!JIFyDAD&3zfdLJRd zVkYhSx0M%&UxcAy6iAE`fT}p_;`jAOv3;nhEO=I!K52sh>1;~a9ePZGk)X^TBk>2< z_vR=K6+JW(UIGry=a#=`s|Z{-hq}O|yp}03=o0 zN>>Di@ZVI=mbO3Z{mZA9&Hz6O1DQAgHmo21{&j0S?4~c9u{#5)x!i6>h>%g7 zLPbU8>`y;G8surNIvNB78zhyyAu%yAD0htf;7HcgIJX;iJrfI-G$|!m(ezM1w_K z-JG`c0EQ{{9*ZOsJvEf$M$q-kUe`%^#EROU}+YXYbX1>$lh1slVD=i|&)% z2LgdaVP~O^AP`R&Fy;yg0KdlO(D2}|sJdHP2a zH$b5MAu#AkXGG!LXi}x`lH26s%7+u^tL^8moy)mAY`U*?;{~Eh*><|#^T~eePTtOH z)w2>B!O3N>-?`R|UlFMp$w^Il?fPB(MV6|h`c?j|1#4B8L%$P-M6C`TIHiAC9hD68 zKOmuU>!e=j!=ixgsmOr!L0-3n)#HO)@CCEEESdp@^!*M z0DMJT+S|_3cqIHo{*a!ij7`V+Qz?$ubM8IPkyrCQtY$4KbDBfHA+QOQ8kc#PxM~8Jo$3DG_uuMU5&4Js(6L6D?^J8YNg&Q&B zMb`>)`fN4-%Su2I6O$Y7<7jNf9`hBC9y9hi&!sZ>ViqR8IBV%<8GwkP!npamk!X5? z8JhwDq996K%n17yho*(UUaxkx%*b(rculHZ|`NYnxh`;@^qA9;1tO=`Pow>jn zaA42CN%dJIGczhQyY<_b0GyigFfj(wmZ>mSOAYm5(Hb*eQ6J%pt#Sw0iWTRTJ}eD+ z^_E|1I5wW+gkTrYDJ7Phfi%u@j_)-R;K`0jrBk;jPWkItmaZl^3O zc7qSNrNHERyMv5PjJ-i3;QQ=iMXGwg&`ht~{1$G`t}3MZxYMXnmg>@tf40CrQ-YxV z8Dqac4CaV24-4G0OtcQ@jfVZDfMAWTbbl+45mE!QFKU3O}^5cUNE&bW3(ZfT|d-Ve$$Qgp<0w3 zRHz#`ieR7w3CmxDVt#aJ)Xi3R4c7!om*5iDccRqr)Mi);fb8|_IpmP@d6aJ-l(vqp zj6{lit@l4zWlEFU8)!@AR-Z?AAb3dvJUdc%U)#O55(pM;F*7=xJBK%}p10r6s;jGm zSs%9AOML?PY1i1!H&Nd|kKubwCgsJHEJoU6O|{WPnE(TJ@Jz%HHZ zjiIp(g}WWB&$CA+BuAv4YW`uy(z(X@st&H=A?zI%BT5jIl|A(;hwT}UDTVwKNP4Oe zW=bva?o2FA8ietiKvCkw2z-Pa-dGJ3g@?8EIZE$PP7hG-V|8CQVP@2EWs^(<;25~ZNKn$ z3Z^%YLLhwP)(kmpQIUC~=L{2kdX)3z{DZLAbwbBO3gCDt?-fDLyIgw z`7w;Xxv}w3=kmPyL|qe=V~1zQ-|(xSqhkWAD=W#5%;P|rLtWz{2_N*8b@og^BI67< zZ!%oB3-Z8=dW=!*8h((|NRGm;G064tAF6%g@6Q7^OvQh=9+khPpCX82D_>Dp_5^u| zjhIwY5*a0BCp`s-@~c0NR1a0-qW?5`Z)p0G&sC}_1Y^}~&j^hz**D)qNdQOK8E#84 z@JM$R=H*yjfo*dL=B^1|Rr_PBA<>q(M8s)O)ot@0rnQ-*Nz|T!m#bN()^-@MV`)yU z4wmKcsNU)Ik-|zg%Zo@5&l!oi+Q_Q<}hYbbkTH0Si2;7^Gc!2 zaR6m7b09O-IF-(df8Kuo2(KEL3ZPUX*7XR6dppnRed6U__w2XOlClH}y&FvTps< zldpb6k(qU;Q(;cyz!C=nQm&+}w9|jnBX!1P8=Tn}RN&i2()!S`WTI=Xh9RF1y%ep?&xHu9CZUzGuGf(R_?A{Q($> z*mcm(mQGcZPY9-0_13gF<-|_aS&!ouMOG zEO$Nci_nP3#tnbPG<3I*ttO0j21C>ZDRxo#uxs++g5lF-eUi{dpuZw-J0WMl`wFpjJsYWNK{qciiVmU0Eb=E|6O(3H%h5rW?Yl%z?XPn6~+)@u#}lB z2>WAA%6+TzKOO$iwYR_%7Z*h5T!^)%Z;J75=V0l)P5Z&yf~$uZ#mq*ZHZgyHf6y=? zLIv2X`NTtc9)54=)*r~-$tKZrx;mFO>#7M`&z}di(T!+}4}vS&;EG9$+Xl;sphuVp zOB&1pj>*nlafr8Af3C5~{Wv~-`!@gb>=(XpqNk3AKHP$kiFQKi_L-Jhh?s#q^cGUa z#rxx&*d?d|6c^P6VP~PD{JLE~DNR|=S>L;p@i7^lFY@f$)q|%uBPD#23ZG8trj6-$ z);`=|f9%qmo-EEti2Pf~taOMkT1cN=oIWOE{-cNT6kH`qkQ)Zg{*_gElo2JclAwjK zJRi-{m7oUeE^9pM;!oorF$||`^9`{0Y1CkYK!f`h^^RBow5`~84z4ykb=+W#1_`T? zTLmEG7EmM{Z~q4*?85!>;)Th?rQk{ieTrZVX>HN`3hsb_es1cRZ;;wM?y++tG zo@D^JEma8He47CpyY^>^Xs5P=U#2Tj?SB~|LL^^z9G1?^=isqH3Hi&9@Gb5^5K`+} z_Jx+()Z$WKAUB=m)A7Zn26N_N>3G$sdoAw$&ecGA9?9Gp?M#`Bk-qvZ?A(L5p)ubs zy?V8lWpEcrF2@Lb_LkP|14FVIskB&9s`P2r@+w_qRN#x=qv_pt-`E7)U|mQ zDxZPI1sOe24eaX!;4gQatROKnG{g6TsnmdAUnHG|UABW6b6_bz7{b)ADNR&0$L9c?7sSeoo)t{3E&DerAuDw!4D)w6lIvx_L?g#jrEEuk*X$rA}IS?z2x8AoAmMEZnsl|D7|e44!T-e zl>xnq$g!pCCVI04jVjDj-WCu%d>~9P({Capp(`FM$sY$<2qWtTOce2b@64}yk@On< zhu2k8%dM~@m=_&=Co&yt&m@T9&$!9NnzU}3{#vpe?VGqkDyihOTzAZG{{`$j0TXPA zvZjgiPM&EN2|qxPCp<|)Q8t>IPKNSZ9Nz_`>_sAkeHA*pXI{Vv)BuU}f_gZ0lb+qa zo6RBGt-pvHk-1heS%i(|i54Bf2Y>9L;0i;e4A~cz5{50ih(J+1Ye1;?SFXp&E(tzy|evC?0!&V z0cgEd|Bx1$5aFdwRsY!u9o)qyu%7JGdSgR4H|6<`s6c>puMGpG{=aLU{+#k>>H!KH zI4)+Q=fG8#%8D82?|IlHznxYJmw_f_poa{UT(aN3s6|*e3oz8ZO&4q!X=p?X`HGjo zR-}KEz^WZ1!A>w&wdiPP)(St_2ODz`g5#S9>CAZ@gly|TNN#pC@xsyIb1+BJ`Lq=& zhBuw_c%~_LLz>|ev{wx#8=52Rlo8+Y5={^y>|3oGpI{AF-NIsJKt`b>?X$D9WW1&> zC^7->=3oB-~G)xaC=%W1hMXB%>av z+(iqc)+i&Mb|i&kd01mHtrb~DFLDmMHmv`>=T2m&A)={-K#r#TYl zQ66rao$o?5>iF-(D#b_w6(BDS-fqKnQG{Ei-LpUAvzqQ6g0Oh!WeKdjCTbWz?Q5_B ztlwrM`PnWv?QrN%dLXu=^Hg-3`l?7!qF z?Mi%LIJK=~$bG@l%D6kDgOTwXds&&DdHqtjKGdJH#)wvU50?E6SxEpXy-uEkTL_Bd|C z(Y|o>x)1>ZS+Ua@^mV2(wHRBJjiDwkK47BFy>kJFRKc#Dg;IDb_!T=Tanu7OXYk?} z$G&va!=E<5BuMje_1OjB2!Vy~WN|*6V4^03^yO5F7tfX%MEtXk8BM-~FAVgkokB`O zYoe6+6?tgzqKOf!OqO6`Chg13Hl!{{sfQBKs^^)dgsHITh>0_dNm`2B7aq6td>VQN z8ua?{jPy4Zat(P>g<0M7g*57zLKmP3#~)VS(pXSrm?GtrnL}UOkE&8bv@t6u=yL#9 zfL3*4sZY8S*ta-eb3i1)LKrJ%O)5OdRwhvraZq_a9lb3JrjPa$ZiVve2+*X!NqaWT z7FIZ(&X#>vNHjuj;SPCvkWj)m30kpkKI0=}07>WaDDvC8Pi#r|ar^p;*$TlCuNukG zF8F|{#?+0MgPhaaeu!k6S6~2Wo0mwa-!?_|))KTWY*?Z{Fw2`}4TvOL8O51~&4UZ0 zgLAAs%fQ3srO_=8nRti#Rf;24=IOv{`6fqhLwWIfeqz?HdMvR@$(Fn&7Y3@1RooVd zLx~Q~L6D@vGE-|(m~;K>0&yl zFfIFNL+kOT5DYI+1K7H4!X9DiX=op9KO_zJ6w_y#f82_-NXnym@rQi*1*}`zO1p=p zgW4$pD$J|tCKgIyxZi{z>W&K2TA%F%UgRU=sr$09ZOAxeoHCQ2+}mfWG8c{P)^0iW z>{?ObNfwgXLUtkx2UaxPBL{M3285>@AKu4$WuOOI^}I`4xNsDytjF7Jwj_kV=V|oC zD{`JTtb82@T?p&GwjXG<4gIj2(hKsPAXzo{@;0D`-sb$VZPhG;T?^x^mw}+oRcjW z$LG8^Aiy|*E#B31^lFdPoh=`4r3Lu~I?@5!tbr0P)fxTxn>YxUhUTJ_1Vi_r(GU9L z4h4Et1r#_-x6~tPU<9uNNc@5|GUV>|NycZ&MzcOUw3qUZJ=&J*qq*SWpBTc}0wroe zSV4zA2t(NP2X#r1g+{E&Y(-AQHxs#4FU}UUNfm;xHQBmeW-RKJhsEH{=F?Fj9I<9G z(RmFYMeaCu?RIUe-1Bv v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "../../web/pandas/static/img/pandas.svg" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +html_css_files = [ + "css/getting_started.css", + "css/pandas.css", +] + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = "../../web/pandas/static/img/favicon.ico" + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. + +# Add redirect for previously existing API pages +# each item is like `(from_old, to_new)` +# To redirect a class and all its methods, see below +# https://github.com/pandas-dev/pandas/issues/16186 + +moved_api_pages = [ + ("pandas.core.common.isnull", "pandas.isna"), + ("pandas.core.common.notnull", "pandas.notna"), + ("pandas.core.reshape.get_dummies", "pandas.get_dummies"), + ("pandas.tools.merge.concat", "pandas.concat"), + ("pandas.tools.merge.merge", "pandas.merge"), + ("pandas.tools.pivot.pivot_table", "pandas.pivot_table"), + ("pandas.tseries.tools.to_datetime", "pandas.to_datetime"), + ("pandas.io.clipboard.read_clipboard", "pandas.read_clipboard"), + ("pandas.io.excel.ExcelFile.parse", "pandas.ExcelFile.parse"), + ("pandas.io.excel.read_excel", "pandas.read_excel"), + ("pandas.io.gbq.read_gbq", "pandas.read_gbq"), + ("pandas.io.html.read_html", "pandas.read_html"), + ("pandas.io.json.read_json", "pandas.read_json"), + ("pandas.io.parsers.read_csv", "pandas.read_csv"), + ("pandas.io.parsers.read_fwf", "pandas.read_fwf"), + ("pandas.io.parsers.read_table", "pandas.read_table"), + ("pandas.io.pickle.read_pickle", "pandas.read_pickle"), + ("pandas.io.pytables.HDFStore.append", "pandas.HDFStore.append"), + ("pandas.io.pytables.HDFStore.get", "pandas.HDFStore.get"), + ("pandas.io.pytables.HDFStore.put", "pandas.HDFStore.put"), + ("pandas.io.pytables.HDFStore.select", "pandas.HDFStore.select"), + ("pandas.io.pytables.read_hdf", "pandas.read_hdf"), + ("pandas.io.sql.read_sql", "pandas.read_sql"), + ("pandas.io.sql.read_frame", "pandas.read_frame"), + ("pandas.io.sql.write_frame", "pandas.write_frame"), + ("pandas.io.stata.read_stata", "pandas.read_stata"), +] + +# Again, tuples of (from_old, to_new) +moved_classes = [ + ("pandas.tseries.resample.Resampler", "pandas.core.resample.Resampler"), + ("pandas.formats.style.Styler", "pandas.io.formats.style.Styler"), +] + +for old, new in moved_classes: + # the class itself... + moved_api_pages.append((old, new)) + + mod, classname = new.rsplit(".", 1) + klass = getattr(importlib.import_module(mod), classname) + methods = [ + x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__") + ] + + for method in methods: + # ... and each of its public methods + moved_api_pages.append((f"{old}.{method}", f"{new}.{method}")) + +if include_api: + html_additional_pages = { + "generated/" + page[0]: "api_redirect.html" for page in moved_api_pages + } + + +header = f"""\ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + + np.random.seed(123456) + np.set_printoptions(precision=4, suppress=True) + pd.options.display.max_rows = 15 + + import os + os.chdir(r'{os.path.dirname(os.path.dirname(__file__))}') +""" + + +html_context = { + "redirects": {old: new for old, new in moved_api_pages}, + "header": header, +} + +# If false, no module index is generated. +html_use_modindex = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = "pandas" + +# -- Options for nbsphinx ------------------------------------------------ + +nbsphinx_allow_errors = True + +# -- Options for LaTeX output -------------------------------------------- + +latex_elements = {} + +# The paper size ('letter' or 'a4'). +# latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +# latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples (source start +# file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ( + "index", + "pandas.tex", + "pandas: powerful Python data analysis toolkit", + "Wes McKinney and the Pandas Development Team", + "manual", + ) +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +# latex_preamble = '' + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_use_modindex = True + + +if include_api: + intersphinx_mapping = { + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None), + "py": ("https://pylib.readthedocs.io/en/latest/", None), + "python": ("https://docs.python.org/3/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), + } + +# extlinks alias +extlinks = { + "issue": ("https://github.com/pandas-dev/pandas/issues/%s", "GH"), +} + + +ipython_warning_is_error = False +ipython_execlines = [ + "import numpy as np", + "import pandas as pd", + # This ensures correct rendering on system with console encoding != utf8 + # (windows). It forces pandas to encode its output reprs using utf8 + # wherever the docs are built. The docs' target is the browser, not + # the console, so this is fine. + 'pd.options.display.encoding="utf8"', +] + + +# Add custom Documenter to handle attributes/methods of an AccessorProperty +# eg pandas.Series.str and pandas.Series.dt (see GH9322) + +import sphinx # isort:skip +from sphinx.ext.autodoc import ( # isort:skip + AttributeDocumenter, + Documenter, + MethodDocumenter, +) +from sphinx.ext.autosummary import Autosummary # isort:skip + + +class AccessorDocumenter(MethodDocumenter): + """ + Specialized Documenter subclass for accessors. + """ + + objtype = "accessor" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + def format_signature(self): + # this method gives an error/warning for the accessors, therefore + # overriding it (accessor has no arguments) + return "" + + +class AccessorLevelDocumenter(Documenter): + """ + Specialized Documenter subclass for objects on accessor level (methods, + attributes). + """ + + # This is the simple straightforward version + # modname is None, base the last elements (eg 'hour') + # and path the part before (eg 'Series.dt') + # def resolve_name(self, modname, parents, path, base): + # modname = 'pandas' + # mod_cls = path.rstrip('.') + # mod_cls = mod_cls.split('.') + # + # return modname, mod_cls + [base] + def resolve_name(self, modname, parents, path, base): + if modname is None: + if path: + mod_cls = path.rstrip(".") + else: + mod_cls = None + # if documenting a class-level object without path, + # there must be a current class, either from a parent + # auto directive ... + mod_cls = self.env.temp_data.get("autodoc:class") + # ... or from a class directive + if mod_cls is None: + mod_cls = self.env.temp_data.get("py:class") + # ... if still None, there's no way to know + if mod_cls is None: + return None, [] + # HACK: this is added in comparison to ClassLevelDocumenter + # mod_cls still exists of class.accessor, so an extra + # rpartition is needed + modname, _, accessor = mod_cls.rpartition(".") + modname, _, cls = modname.rpartition(".") + parents = [cls, accessor] + # if the module name is still missing, get it like above + if not modname: + modname = self.env.temp_data.get("autodoc:module") + if not modname: + if sphinx.__version__ > "1.3": + modname = self.env.ref_context.get("py:module") + else: + modname = self.env.temp_data.get("py:module") + # ... else, it stays None, which means invalid + return modname, parents + [base] + + +class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): + objtype = "accessorattribute" + directivetype = "attribute" + + # lower than AttributeDocumenter so this is not chosen for normal + # attributes + priority = 0.6 + + +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): + objtype = "accessormethod" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + +class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter): + """ + This documenter lets us removes .__call__ from the method signature for + callable accessors like Series.plot + """ + + objtype = "accessorcallable" + directivetype = "method" + + # lower than MethodDocumenter; otherwise the doc build prints warnings + priority = 0.5 + + def format_name(self): + return MethodDocumenter.format_name(self).rstrip(".__call__") + + +class PandasAutosummary(Autosummary): + """ + This alternative autosummary class lets us override the table summary for + Series.plot and DataFrame.plot in the API docs. + """ + + def _replace_pandas_items(self, display_name, sig, summary, real_name): + # this a hack: ideally we should extract the signature from the + # .__call__ method instead of hard coding this + if display_name == "DataFrame.plot": + sig = "([x, y, kind, ax, ....])" + summary = "DataFrame plotting accessor and method" + elif display_name == "Series.plot": + sig = "([kind, ax, figsize, ....])" + summary = "Series plotting accessor and method" + return (display_name, sig, summary, real_name) + + @staticmethod + def _is_deprecated(real_name): + try: + obj, parent, modname = _import_by_name(real_name) + except ImportError: + return False + doc = NumpyDocString(obj.__doc__ or "") + summary = "".join(doc["Summary"] + doc["Extended Summary"]) + return ".. deprecated::" in summary + + def _add_deprecation_prefixes(self, items): + for item in items: + display_name, sig, summary, real_name = item + if self._is_deprecated(real_name): + summary = f"(DEPRECATED) {summary}" + yield display_name, sig, summary, real_name + + def get_items(self, names): + items = Autosummary.get_items(self, names) + items = [self._replace_pandas_items(*item) for item in items] + items = list(self._add_deprecation_prefixes(items)) + return items + + +# based on numpy doc/source/conf.py +def linkcode_resolve(domain, info): + """ + Determine the URL corresponding to Python object + """ + if domain != "py": + return None + + modname = info["module"] + fullname = info["fullname"] + + submod = sys.modules.get(modname) + if submod is None: + return None + + obj = submod + for part in fullname.split("."): + try: + with warnings.catch_warnings(): + # Accessing deprecated objects will generate noisy warnings + warnings.simplefilter("ignore", FutureWarning) + obj = getattr(obj, part) + except AttributeError: + return None + + try: + fn = inspect.getsourcefile(inspect.unwrap(obj)) + except TypeError: + try: # property + fn = inspect.getsourcefile(inspect.unwrap(obj.fget)) + except (AttributeError, TypeError): + fn = None + if not fn: + return None + + try: + source, lineno = inspect.getsourcelines(obj) + except TypeError: + try: # property + source, lineno = inspect.getsourcelines(obj.fget) + except (AttributeError, TypeError): + lineno = None + except OSError: + lineno = None + + if lineno: + linespec = f"#L{lineno}-L{lineno + len(source) - 1}" + else: + linespec = "" + + fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__)) + + if "+" in pandas.__version__: + return f"https://github.com/pandas-dev/pandas/blob/main/pandas/{fn}{linespec}" + else: + return ( + f"https://github.com/pandas-dev/pandas/blob/" + f"v{pandas.__version__}/pandas/{fn}{linespec}" + ) + + +# remove the docstring of the flags attribute (inherited from numpy ndarray) +# because these give doc build errors (see GH issue 5331) +def remove_flags_docstring(app, what, name, obj, options, lines): + if what == "attribute" and name.endswith(".flags"): + del lines[:] + + +def process_class_docstrings(app, what, name, obj, options, lines): + """ + For those classes for which we use :: + + :template: autosummary/class_without_autosummary.rst + + the documented attributes/methods have to be listed in the class + docstring. However, if one of those lists is empty, we use 'None', + which then generates warnings in sphinx / ugly html output. + This "autodoc-process-docstring" event connector removes that part + from the processed docstring. + + """ + if what == "class": + joined = "\n".join(lines) + + templates = [ + """.. rubric:: Attributes + +.. autosummary:: + :toctree: + + None +""", + """.. rubric:: Methods + +.. autosummary:: + :toctree: + + None +""", + ] + + for template in templates: + if template in joined: + joined = joined.replace(template, "") + lines[:] = joined.split("\n") + + +_BUSINED_ALIASES = [ + "pandas.tseries.offsets." + name + for name in [ + "BDay", + "CDay", + "BMonthEnd", + "BMonthBegin", + "CBMonthEnd", + "CBMonthBegin", + ] +] + + +def process_business_alias_docstrings(app, what, name, obj, options, lines): + """ + Starting with sphinx 3.4, the "autodoc-process-docstring" event also + gets called for alias classes. This results in numpydoc adding the + methods/attributes to the docstring, which we don't want (+ this + causes warnings with sphinx). + """ + if name in _BUSINED_ALIASES: + lines[:] = [] + + +suppress_warnings = [ + # We "overwrite" autosummary with our PandasAutosummary, but + # still want the regular autosummary setup to run. So we just + # suppress this warning. + "app.add_directive" +] +if pattern: + # When building a single document we don't want to warn because references + # to other documents are unknown, as it's expected + suppress_warnings.append("ref.ref") + + +def rstjinja(app, docname, source): + """ + Render our pages as a jinja template for fancy templating goodness. + """ + # https://www.ericholscher.com/blog/2016/jul/25/integrating-jinja-rst-sphinx/ + # Make sure we're outputting HTML + if app.builder.format != "html": + return + src = source[0] + rendered = app.builder.templates.render_string(src, app.config.html_context) + source[0] = rendered + + +def setup(app): + app.connect("source-read", rstjinja) + app.connect("autodoc-process-docstring", remove_flags_docstring) + app.connect("autodoc-process-docstring", process_class_docstrings) + app.connect("autodoc-process-docstring", process_business_alias_docstrings) + app.add_autodocumenter(AccessorDocumenter) + app.add_autodocumenter(AccessorAttributeDocumenter) + app.add_autodocumenter(AccessorMethodDocumenter) + app.add_autodocumenter(AccessorCallableDocumenter) + app.add_directive("autosummary", PandasAutosummary) diff --git a/doc/source/development/community.rst b/doc/source/development/community.rst new file mode 100644 index 00000000..59689a2c --- /dev/null +++ b/doc/source/development/community.rst @@ -0,0 +1,119 @@ +.. _community: + +===================== +Contributor community +===================== + +pandas is a community-driven open source project developed by a large group +of `contributors `_ +and a smaller group of `maintainers `_. +The pandas leadership has made a strong commitment to creating an open, +inclusive, and positive community. Please read the pandas `Code of Conduct +`_ for guidance on how to +interact with others in a way that makes the community thrive. + +We offer several meetings and communication channels to share knowledge and +connect with others within the pandas community. + +Community meeting +----------------- + +The pandas Community Meeting is a regular sync meeting for the project's +maintainers which is open to the community. Everyone is welcome to attend and +contribute to conversations. + +The meetings take place on the second Wednesday of each month at 18:00 UTC. + +The minutes of past meetings are available in `this Google Document `__. + + +New contributor meeting +----------------------- + +On the third Wednesday of the month, we hold meetings to welcome and support +new contributors in our community. + +| 👋 you all are invited +| 💬 everyone can present (add yourself to the hackMD agenda) +| 👀 anyone can sit in and listen + +Attendees are new and experienced contributors, as well as a few maintainers. +We aim to answer questions about getting started, or help with work in +progress when possible, as well as get to know each other and share our +learnings and experiences. + +The agenda for the next meeting and minutes of past meetings are available in +`this HackMD `__. + +Calendar +-------- + +This calendar shows all the community meetings. Our community meetings are +ideal for anyone wanting to contribute to pandas, or just curious to know how +current development is going. + +.. raw:: html + + + +You can subscribe to this calendar with the following links: + +* `iCal `__ +* `Google calendar `__ + +Additionally, we'll sometimes have one-off meetings on specific topics. +These will be published on the same calendar. + +`GitHub issue tracker `_ +---------------------------------------------------------------------- + +The pandas contributor community conducts conversations mainly via this channel. +Any community member can open issues to: + +- Report bugs, e.g. "I noticed the behavior of a certain function is + incorrect" +- Request features, e.g. "I would like this error message to be more readable" +- Request documentation improvements, e.g. "I found this section unclear" +- Ask questions, e.g. "I noticed the behavior of a certain function + changed between versions. Is this expected?". + + Ideally your questions should be related to how pandas work rather + than how you use pandas. `StackOverflow `_ is + better suited for answering usage questions, and we ask that all usage + questions are first asked on StackOverflow. Thank you for respecting are + time and wishes. 🙇 + +Maintainers and frequent contributors might also open issues to discuss the +ongoing development of the project. For example: + +- Report issues with the CI, GitHub Actions, or the performance of pandas +- Open issues relating to the internals +- Start roadmap discussion aligning on proposals what to do in future + releases or changes to the API. +- Open issues relating to the project's website, logo, or governance + +The developer mailing list +-------------------------- + +The pandas mailing list `pandas-dev@python.org `_ is used for long form +conversations and to engages people in the wider community who might not +be active on the issue tracker but we would like to include in discussions. + +.. _community.slack: + +Community slack +--------------- + +We have a chat platform for contributors, maintainers and potential +contributors. This is not a space for user questions, rather for questions about +contributing to pandas. The slack is a private space, specifically meant for +people who are hesitant to bring up their questions or ideas on a large public +mailing list or GitHub. + +If this sounds like the right place for you, you are welcome to join! Email us +at `slack@pandas.pydata.org `_ and let us +know that you read and agree to our `Code of Conduct `_ +😉 to get an invite. And please remember the slack is not meant to replace the +mailing list or issue tracker - all important announcements and conversations +should still happen there. diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst new file mode 100644 index 00000000..faa3d29a --- /dev/null +++ b/doc/source/development/contributing.rst @@ -0,0 +1,376 @@ +.. _contributing: + +{{ header }} + +********************** +Contributing to pandas +********************** + +.. contents:: Table of contents: + :local: + +Where to start? +=============== + +All contributions, bug reports, bug fixes, documentation improvements, +enhancements, and ideas are welcome. + +If you are brand new to pandas or open-source development, we recommend going +through the `GitHub "issues" tab `_ +to find issues that interest you. There are a number of issues listed under `Docs +`_ +and `good first issue +`_ +where you could start out. Once you've found an interesting issue, you can +return here to get your development environment setup. + +When you start working on an issue, it's a good idea to assign the issue to yourself, +so nobody else duplicates the work on it. GitHub restricts assigning issues to maintainers +of the project only. In most projects, and until recently in pandas, contributors added a +comment letting others know they are working on an issue. While this is ok, you need to +check each issue individually, and it's not possible to find the unassigned ones. + +For this reason, we implemented a workaround consisting of adding a comment with the exact +text ``take``. When you do it, a GitHub action will automatically assign you the issue +(this will take seconds, and may require refreshing the page to see it). +By doing this, it's possible to filter the list of issues and find only the unassigned ones. + +So, a good way to find an issue to start contributing to pandas is to check the list of +`unassigned good first issues `_ +and assign yourself one you like by writing a comment with the exact text ``take``. + +If for whatever reason you are not able to continue working with the issue, please try to +unassign it, so other people know it's available again. You can check the list of +assigned issues, since people may not be working in them anymore. If you want to work on one +that is assigned, feel free to kindly ask the current assignee if you can take it +(please allow at least a week of inactivity before considering work in the issue discontinued). + +We have several :ref:`contributor community ` communication channels, which you are +welcome to join, and ask questions as you figure things out. Among them are regular meetings for +new contributors, dev meetings, a dev mailing list, and a slack for the contributor community. +All pandas contributors are welcome to these spaces, where they can connect with each other. Even +maintainers who have been with us for a long time felt just like you when they started out, and +are happy to welcome you and support you as you get to know how we work, and where things are. +Take a look at the next sections to learn more. + +.. _contributing.bug_reports: + +Bug reports and enhancement requests +==================================== + +Bug reports are an important part of making pandas more stable. Having a complete bug report +will allow others to reproduce the bug and provide insight into fixing. See +`this stackoverflow article `_ and +`this blogpost `_ +for tips on writing a good bug report. + +Trying the bug-producing code out on the *main* branch is often a worthwhile exercise +to confirm the bug still exists. It is also worth searching existing bug reports and pull requests +to see if the issue has already been reported and/or fixed. + +Bug reports must: + +#. Include a short, self-contained Python snippet reproducing the problem. + You can format the code nicely by using `GitHub Flavored Markdown + `_:: + + ```python + >>> from pandas import DataFrame + >>> df = DataFrame(...) + ... + ``` + +#. Include the full version string of pandas and its dependencies. You can use the built-in function:: + + >>> import pandas as pd + >>> pd.show_versions() + +#. Explain why the current behavior is wrong/not desired and what you expect instead. + +The issue will then show up to the pandas community and be open to comments/ideas from others. + +.. _contributing.github: + +Working with the code +===================== + +Now that you have an issue you want to fix, enhancement to add, or documentation to improve, +you need to learn how to work with GitHub and the pandas code base. + +.. _contributing.version_control: + +Version control, Git, and GitHub +-------------------------------- + +To the new user, working with Git is one of the more daunting aspects of contributing to pandas. +It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process +straightforward and mostly trouble free. As always, if you are having difficulties please +feel free to ask for help. + +The code is hosted on `GitHub `_. To +contribute you will need to sign up for a `free GitHub account +`_. We use `Git `_ for +version control to allow many people to work together on the project. + +Some great resources for learning Git: + +* the `GitHub help pages `_. +* the `NumPy documentation `_. +* Matthew Brett's `Pydagogue `_. + +Getting started with Git +------------------------ + +`GitHub has instructions `__ for installing git, +setting up your SSH key, and configuring git. All these steps need to be completed before +you can work seamlessly between your local repository and GitHub. + +.. _contributing.forking: + +Forking +------- + +You will need your own fork to work on the code. Go to the `pandas project +page `_ and hit the ``Fork`` button. You will +want to clone your fork to your machine:: + + git clone https://github.com/your-user-name/pandas.git pandas-yourname + cd pandas-yourname + git remote add upstream https://github.com/pandas-dev/pandas.git + +This creates the directory ``pandas-yourname`` and connects your repository to +the upstream (main project) *pandas* repository. + +Note that performing a shallow clone (with ``--depth==N``, for some ``N`` greater +or equal to 1) might break some tests and features as ``pd.show_versions()`` +as the version number cannot be computed anymore. + +Creating a branch +----------------- + +You want your main branch to reflect only production-ready code, so create a +feature branch for making your changes. For example:: + + git branch shiny-new-feature + git checkout shiny-new-feature + +The above can be simplified to:: + + git checkout -b shiny-new-feature + +This changes your working directory to the shiny-new-feature branch. Keep any +changes in this branch specific to one bug or feature so it is clear +what the branch brings to pandas. You can have many shiny-new-features +and switch in between them using the git checkout command. + +When creating this branch, make sure your main branch is up to date with +the latest upstream main version. To update your local main branch, you +can do:: + + git checkout main + git pull upstream main --ff-only + +When you want to update the feature branch with changes in main after +you created the branch, check the section on +:ref:`updating a PR `. + +Contributing your changes to pandas +===================================== + +.. _contributing.commit-code: + +Committing your code +-------------------- + +Keep style fixes to a separate commit to make your pull request more readable. + +Once you've made changes, you can see them by typing:: + + git status + +If you have created a new file, it is not being tracked by git. Add it by typing:: + + git add path/to/file-to-be-added.py + +Doing 'git status' again should give something like:: + + # On branch shiny-new-feature + # + # modified: /relative/path/to/file-you-added.py + # + +Finally, commit your changes to your local repository with an explanatory message. pandas +uses a convention for commit message prefixes and layout. Here are +some common prefixes along with general guidelines for when to use them: + +* ENH: Enhancement, new functionality +* BUG: Bug fix +* DOC: Additions/updates to documentation +* TST: Additions/updates to tests +* BLD: Updates to the build process/scripts +* PERF: Performance improvement +* TYP: Type annotations +* CLN: Code cleanup + +The following defines how a commit message should be structured. Please reference the +relevant GitHub issues in your commit message using GH1234 or #1234. Either style +is fine, but the former is generally preferred: + +* a subject line with ``< 80`` chars. +* One blank line. +* Optionally, a commit message body. + +Now you can commit your changes in your local repository:: + + git commit -m + +.. _contributing.push-code: + +Pushing your changes +-------------------- + +When you want your changes to appear publicly on your GitHub page, push your +forked feature branch's commits:: + + git push origin shiny-new-feature + +Here ``origin`` is the default name given to your remote repository on GitHub. +You can see the remote repositories:: + + git remote -v + +If you added the upstream repository as described above you will see something +like:: + + origin git@github.com:yourname/pandas.git (fetch) + origin git@github.com:yourname/pandas.git (push) + upstream git://github.com/pandas-dev/pandas.git (fetch) + upstream git://github.com/pandas-dev/pandas.git (push) + +Now your code is on GitHub, but it is not yet a part of the pandas project. For that to +happen, a pull request needs to be submitted on GitHub. + +Review your code +---------------- + +When you're ready to ask for a code review, file a pull request. Before you do, once +again make sure that you have followed all the guidelines outlined in this document +regarding code style, tests, performance tests, and documentation. You should also +double check your branch changes against the branch it was based on: + +#. Navigate to your repository on GitHub -- https://github.com/your-user-name/pandas +#. Click on ``Branches`` +#. Click on the ``Compare`` button for your feature branch +#. Select the ``base`` and ``compare`` branches, if necessary. This will be ``main`` and + ``shiny-new-feature``, respectively. + +Finally, make the pull request +------------------------------ + +If everything looks good, you are ready to make a pull request. A pull request is how +code from a local repository becomes available to the GitHub community and can be looked +at and eventually merged into the main version. This pull request and its associated +changes will eventually be committed to the main branch and available in the next +release. To submit a pull request: + +#. Navigate to your repository on GitHub +#. Click on the ``Pull Request`` button +#. You can then click on ``Commits`` and ``Files Changed`` to make sure everything looks + okay one last time +#. Write a description of your changes in the ``Preview Discussion`` tab +#. Click ``Send Pull Request``. + +This request then goes to the repository maintainers, and they will review +the code. + +.. _contributing.update-pr: + +Updating your pull request +-------------------------- + +Based on the review you get on your pull request, you will probably need to make +some changes to the code. In that case, you can make them in your branch, +add a new commit to that branch, push it to GitHub, and the pull request will be +automatically updated. Pushing them to GitHub again is done by:: + + git push origin shiny-new-feature + +This will automatically update your pull request with the latest code and restart the +:any:`Continuous Integration ` tests. + +Another reason you might need to update your pull request is to solve conflicts +with changes that have been merged into the main branch since you opened your +pull request. + +To do this, you need to "merge upstream main" in your branch:: + + git checkout shiny-new-feature + git fetch upstream + git merge upstream/main + +If there are no conflicts (or they could be fixed automatically), a file with a +default commit message will open, and you can simply save and quit this file. + +If there are merge conflicts, you need to solve those conflicts. See for +example at https://help.github.com/articles/resolving-a-merge-conflict-using-the-command-line/ +for an explanation on how to do this. +Once the conflicts are merged and the files where the conflicts were solved are +added, you can run ``git commit`` to save those fixes. + +If you have uncommitted changes at the moment you want to update the branch with +main, you will need to ``stash`` them prior to updating (see the +`stash docs `__). +This will effectively store your changes and they can be reapplied after updating. + +After the feature branch has been update locally, you can now update your pull +request by pushing to the branch on GitHub:: + + git push origin shiny-new-feature + +Autofixing formatting errors +---------------------------- + +We use several styling checks (e.g. ``black``, ``flake8``, ``isort``) which are run after +you make a pull request. + +To automatically fix formatting errors on each commit you make, you can +set up pre-commit yourself. First, create a Python :ref:`environment +` and then set up :ref:`pre-commit `. + +Delete your merged branch (optional) +------------------------------------ + +Once your feature branch is accepted into upstream, you'll probably want to get rid of +the branch. First, merge upstream main into your branch so git knows it is safe to +delete your branch:: + + git fetch upstream + git checkout main + git merge upstream/main + +Then you can do:: + + git branch -d shiny-new-feature + +Make sure you use a lower-case ``-d``, or else git won't warn you if your feature +branch has not actually been merged. + +The branch will still exist on GitHub, so to delete it there do:: + + git push origin --delete shiny-new-feature + + +Tips for a successful pull request +================================== + +If you have made it to the `Review your code`_ phase, one of the core contributors may +take a look. Please note however that a handful of people are responsible for reviewing +all of the contributions, which can often lead to bottlenecks. + +To improve the chances of your pull request being reviewed, you should: + +- **Reference an open issue** for non-trivial changes to clarify the PR's purpose +- **Ensure you have appropriate tests**. These should be the first part of any PR +- **Keep your pull requests as simple as possible**. Larger PRs take longer to review +- **Ensure that CI is in a green state**. Reviewers may not even look otherwise +- **Keep** `Updating your pull request`_, either by request or every few days diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst new file mode 100644 index 00000000..26692057 --- /dev/null +++ b/doc/source/development/contributing_codebase.rst @@ -0,0 +1,928 @@ +.. _contributing_codebase: + +{{ header }} + +============================= +Contributing to the code base +============================= + +.. contents:: Table of Contents: + :local: + +Code standards +-------------- + +Writing good code is not just about what you write. It is also about *how* you +write it. During :ref:`Continuous Integration ` testing, several +tools will be run to check your code for stylistic errors. +Generating any warnings will cause the test to fail. +Thus, good style is a requirement for submitting code to pandas. + +There is a tool in pandas to help contributors verify their changes before +contributing them to the project:: + + ./ci/code_checks.sh + +The script validates the doctests, formatting in docstrings, and +imported modules. It is possible to run the checks independently by using the +parameters ``docstring``, ``code``, and ``doctests`` +(e.g. ``./ci/code_checks.sh doctests``). + +In addition, because a lot of people use our library, it is important that we +do not make sudden changes to the code that could have the potential to break +a lot of user code as a result, that is, we need it to be as *backwards compatible* +as possible to avoid mass breakages. + +In addition to ``./ci/code_checks.sh``, some extra checks (including static type +checking) are run by ``pre-commit`` - see :ref:`here ` +for how to run them. + +.. _contributing.pre-commit: + +Pre-commit +---------- + +Additionally, :ref:`Continuous Integration ` will run code formatting checks +like ``black``, ``flake8`` (including a `pandas-dev-flaker `_ plugin), +``isort``, and ``cpplint`` and more using `pre-commit hooks `_ +Any warnings from these checks will cause the :ref:`Continuous Integration ` to fail; therefore, +it is helpful to run the check yourself before submitting code. This +can be done by installing ``pre-commit``:: + + pip install pre-commit + +and then running:: + + pre-commit install + +from the root of the pandas repository. Now all of the styling checks will be +run each time you commit changes without your needing to run each one manually. +In addition, using ``pre-commit`` will also allow you to more easily +remain up-to-date with our code checks as they change. + +Note that if needed, you can skip these checks with ``git commit --no-verify``. + +If you don't want to use ``pre-commit`` as part of your workflow, you can still use it +to run its checks with:: + + pre-commit run --files + +without needing to have done ``pre-commit install`` beforehand. + +If you want to run checks on all recently committed files on upstream/main you can use:: + + pre-commit run --from-ref=upstream/main --to-ref=HEAD --all-files + +without needing to have done ``pre-commit install`` beforehand. + +.. note:: + + You may want to periodically run ``pre-commit gc``, to clean up repos + which are no longer used. + +.. note:: + + If you have conflicting installations of ``virtualenv``, then you may get an + error - see `here `_. + + Also, due to a `bug in virtualenv `_, + you may run into issues if you're using conda. To solve this, you can downgrade + ``virtualenv`` to version ``20.0.33``. + +Optional dependencies +--------------------- + +Optional dependencies (e.g. matplotlib) should be imported with the private helper +``pandas.compat._optional.import_optional_dependency``. This ensures a +consistent error message when the dependency is not met. + +All methods using an optional dependency should include a test asserting that an +``ImportError`` is raised when the optional dependency is not found. This test +should be skipped if the library is present. + +All optional dependencies should be documented in +:ref:`install.optional_dependencies` and the minimum required version should be +set in the ``pandas.compat._optional.VERSIONS`` dict. + +Backwards compatibility +----------------------- + +Please try to maintain backward compatibility. pandas has lots of users with lots of +existing code, so don't break it if at all possible. If you think breakage is required, +clearly state why as part of the pull request. Also, be careful when changing method +signatures and add deprecation warnings where needed. Also, add the deprecated sphinx +directive to the deprecated functions or methods. + +If a function with the same arguments as the one being deprecated exist, you can use +the ``pandas.util._decorators.deprecate``: + +.. code-block:: python + + from pandas.util._decorators import deprecate + + deprecate('old_func', 'new_func', '1.1.0') + +Otherwise, you need to do it manually: + +.. code-block:: python + + import warnings + from pandas.util._exceptions import find_stack_level + + + def old_func(): + """Summary of the function. + + .. deprecated:: 1.1.0 + Use new_func instead. + """ + warnings.warn( + 'Use new_func instead.', + FutureWarning, + stacklevel=find_stack_level(), + ) + new_func() + + + def new_func(): + pass + +You'll also need to + +1. Write a new test that asserts a warning is issued when calling with the deprecated argument +2. Update all of pandas existing tests and code to use the new argument + +See :ref:`contributing.warnings` for more. + +.. _contributing.type_hints: + +Type hints +---------- + +pandas strongly encourages the use of :pep:`484` style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well! + +Style guidelines +~~~~~~~~~~~~~~~~ + +Type imports should follow the ``from typing import ...`` convention. Some types do not need to be imported since :pep:`585` some builtin constructs, such as ``list`` and ``tuple``, can directly be used for type annotations. So rather than + +.. code-block:: python + + import typing + + primes: typing.List[int] = [] + +You should write + +.. code-block:: python + + primes: list[int] = [] + +``Optional`` should be avoided in favor of the shorter ``| None``, so instead of + +.. code-block:: python + + from typing import Union + + maybe_primes: list[Union[int, None]] = [] + +or + +.. code-block:: python + + from typing import Optional + + maybe_primes: list[Optional[int]] = [] + +You should write + +.. code-block:: python + + from __future__ import annotations # noqa: F404 + + maybe_primes: list[int | None] = [] + +In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 `_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like + +.. code-block:: python + + class SomeClass1: + str = None + +The appropriate way to annotate this would be as follows + +.. code-block:: python + + str_type = str + + class SomeClass2: + str: str_type = None + +In some cases you may be tempted to use ``cast`` from the typing module when you know better than the analyzer. This occurs particularly when using custom inference functions. For example + +.. code-block:: python + + from typing import cast + + from pandas.core.dtypes.common import is_number + + def cannot_infer_bad(obj: Union[str, int, float]): + + if is_number(obj): + ... + else: # Reasonably only str objects would reach this but... + obj = cast(str, obj) # Mypy complains without this! + return obj.upper() + +The limitation here is that while a human can reasonably understand that ``is_number`` would catch the ``int`` and ``float`` types mypy cannot make that same inference just yet (see `mypy #5206 `_. While the above works, the use of ``cast`` is **strongly discouraged**. Where applicable a refactor of the code to appease static analysis is preferable + +.. code-block:: python + + def cannot_infer_good(obj: Union[str, int, float]): + + if isinstance(obj, str): + return obj.upper() + else: + ... + +With custom types and inference this is not always possible so exceptions are made, but every effort should be exhausted to avoid ``cast`` before going down such paths. + +pandas-specific types +~~~~~~~~~~~~~~~~~~~~~ + +Commonly used types specific to pandas will appear in `pandas._typing `_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas. + +For example, quite a few functions in pandas accept a ``dtype`` argument. This can be expressed as a string like ``"object"``, a ``numpy.dtype`` like ``np.int64`` or even a pandas ``ExtensionDtype`` like ``pd.CategoricalDtype``. Rather than burden the user with having to constantly annotate all of those options, this can simply be imported and reused from the pandas._typing module + +.. code-block:: python + + from pandas._typing import Dtype + + def as_type(dtype: Dtype) -> ...: + ... + +This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like ``axis``. Development of this module is active so be sure to refer to the source for the most up to date list of available types. + +Validating type hints +~~~~~~~~~~~~~~~~~~~~~ + +pandas uses `mypy `_ and `pyright `_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running + +.. code-block:: shell + + # the following might fail if the installed pandas version does not correspond to your local git version + pre-commit run --hook-stage manual --all-files + + # if the above fails due to stubtest + SKIP=stubtest pre-commit run --hook-stage manual --all-files + +in your activated python environment. A recent version of ``numpy`` (>=1.22.0) is required for type validation. + +.. _contributing.ci: + +Testing type hints in code using pandas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + * Pandas is not yet a py.typed library (:pep:`561`)! + The primary purpose of locally declaring pandas as a py.typed library is to test and + improve the pandas-builtin type annotations. + +Until pandas becomes a py.typed library, it is possible to easily experiment with the type +annotations shipped with pandas by creating an empty file named "py.typed" in the pandas +installation folder: + +.. code-block:: none + + python -c "import pandas; import pathlib; (pathlib.Path(pandas.__path__[0]) / 'py.typed').touch()" + +The existence of the py.typed file signals to type checkers that pandas is already a py.typed +library. This makes type checkers aware of the type annotations shipped with pandas. + +Testing with continuous integration +----------------------------------- + +The pandas test suite will run automatically on `GitHub Actions `__ +continuous integration services, once your pull request is submitted. +However, if you wish to run the test suite on a branch prior to submitting the pull request, +then the continuous integration services need to be hooked to your GitHub repository. Instructions are here +for `GitHub Actions `__. + +A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, +then you will get a red 'X', where you can click through to see the individual failed tests. +This is an example of a green build. + +.. image:: ../_static/ci.png + +.. _contributing.tdd: + + +Test-driven development +----------------------- + +pandas is serious about testing and strongly encourages contributors to embrace +`test-driven development (TDD) `_. +This development process "relies on the repetition of a very short development cycle: +first the developer writes an (initially failing) automated test case that defines a desired +improvement or new function, then produces the minimum amount of code to pass that test." +So, before actually writing any code, you should write your tests. Often the test can be +taken from the original GitHub issue. However, it is always worth considering additional +use cases and writing corresponding tests. + +Adding tests is one of the most common requests after code is pushed to pandas. Therefore, +it is worth getting in the habit of writing tests ahead of time so this is never an issue. + +Writing tests +~~~~~~~~~~~~~ + +All tests should go into the ``tests`` subdirectory of the specific package. +This folder contains many current examples of tests, and we suggest looking to these for +inspiration. Ideally, there should be one, and only one, obvious place for a test to reside. +Until we reach that ideal, these are some rules of thumb for where a test should +be located. + +1. Does your test depend only on code in ``pd._libs.tslibs``? + This test likely belongs in one of: + + - tests.tslibs + + .. note:: + + No file in ``tests.tslibs`` should import from any pandas modules + outside of ``pd._libs.tslibs`` + + - tests.scalar + - tests.tseries.offsets + +2. Does your test depend only on code in pd._libs? + This test likely belongs in one of: + + - tests.libs + - tests.groupby.test_libgroupby + +3. Is your test for an arithmetic or comparison method? + This test likely belongs in one of: + + - tests.arithmetic + + .. note:: + + These are intended for tests that can be shared to test the behavior + of DataFrame/Series/Index/ExtensionArray using the ``box_with_array`` + fixture. + + - tests.frame.test_arithmetic + - tests.series.test_arithmetic + +4. Is your test for a reduction method (min, max, sum, prod, ...)? + This test likely belongs in one of: + + - tests.reductions + + .. note:: + + These are intended for tests that can be shared to test the behavior + of DataFrame/Series/Index/ExtensionArray. + + - tests.frame.test_reductions + - tests.series.test_reductions + - tests.test_nanops + +5. Is your test for an indexing method? + This is the most difficult case for deciding where a test belongs, because + there are many of these tests, and many of them test more than one method + (e.g. both ``Series.__getitem__`` and ``Series.loc.__getitem__``) + + A) Is the test specifically testing an Index method (e.g. ``Index.get_loc``, + ``Index.get_indexer``)? + This test likely belongs in one of: + + - tests.indexes.test_indexing + - tests.indexes.fooindex.test_indexing + + Within that files there should be a method-specific test class e.g. + ``TestGetLoc``. + + In most cases, neither ``Series`` nor ``DataFrame`` objects should be + needed in these tests. + + B) Is the test for a Series or DataFrame indexing method *other* than + ``__getitem__`` or ``__setitem__``, e.g. ``xs``, ``where``, ``take``, + ``mask``, ``lookup``, or ``insert``? + This test likely belongs in one of: + + - tests.frame.indexing.test_methodname + - tests.series.indexing.test_methodname + + C) Is the test for any of ``loc``, ``iloc``, ``at``, or ``iat``? + This test likely belongs in one of: + + - tests.indexing.test_loc + - tests.indexing.test_iloc + - tests.indexing.test_at + - tests.indexing.test_iat + + Within the appropriate file, test classes correspond to either types of + indexers (e.g. ``TestLocBooleanMask``) or major use cases + (e.g. ``TestLocSetitemWithExpansion``). + + See the note in section D) about tests that test multiple indexing methods. + + D) Is the test for ``Series.__getitem__``, ``Series.__setitem__``, + ``DataFrame.__getitem__``, or ``DataFrame.__setitem__``? + This test likely belongs in one of: + + - tests.series.test_getitem + - tests.series.test_setitem + - tests.frame.test_getitem + - tests.frame.test_setitem + + If many cases such a test may test multiple similar methods, e.g. + + .. code-block:: python + + import pandas as pd + import pandas._testing as tm + + def test_getitem_listlike_of_ints(): + ser = pd.Series(range(5)) + + result = ser[[3, 4]] + expected = pd.Series([2, 3]) + tm.assert_series_equal(result, expected) + + result = ser.loc[[3, 4]] + tm.assert_series_equal(result, expected) + + In cases like this, the test location should be based on the *underlying* + method being tested. Or in the case of a test for a bugfix, the location + of the actual bug. So in this example, we know that ``Series.__getitem__`` + calls ``Series.loc.__getitem__``, so this is *really* a test for + ``loc.__getitem__``. So this test belongs in ``tests.indexing.test_loc``. + +6. Is your test for a DataFrame or Series method? + + A) Is the method a plotting method? + This test likely belongs in one of: + + - tests.plotting + + B) Is the method an IO method? + This test likely belongs in one of: + + - tests.io + + C) Otherwise + This test likely belongs in one of: + + - tests.series.methods.test_mymethod + - tests.frame.methods.test_mymethod + + .. note:: + + If a test can be shared between DataFrame/Series using the + ``frame_or_series`` fixture, by convention it goes in the + ``tests.frame`` file. + +7. Is your test for an Index method, not depending on Series/DataFrame? + This test likely belongs in one of: + + - tests.indexes + +8) Is your test for one of the pandas-provided ExtensionArrays (``Categorical``, + ``DatetimeArray``, ``TimedeltaArray``, ``PeriodArray``, ``IntervalArray``, + ``PandasArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)? + This test likely belongs in one of: + + - tests.arrays + +9) Is your test for *all* ExtensionArray subclasses (the "EA Interface")? + This test likely belongs in one of: + + - tests.extension + +Using ``pytest`` +~~~~~~~~~~~~~~~~ + +Test structure +^^^^^^^^^^^^^^ + +pandas existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. + +.. code-block:: python + + class TestReallyCoolFeature: + def test_cool_feature_aspect(self): + pass + +We prefer a more *functional* style using the `pytest `__ framework, which offers a richer testing +framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: + +.. code-block:: python + + def test_really_cool_feature(): + pass + +Preferred ``pytest`` idioms +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Functional tests named ``def test_*`` and *only* take arguments that are either fixtures or parameters. +* Use a bare ``assert`` for testing scalars and truth-testing +* Use ``tm.assert_series_equal(result, expected)`` and ``tm.assert_frame_equal(result, expected)`` for comparing :class:`Series` and :class:`DataFrame` results respectively. +* Use `@pytest.mark.parameterize `__ when testing multiple cases. +* Use `pytest.mark.xfail `__ when a test case is expected to fail. +* Use `pytest.mark.skip `__ when a test case is never expected to pass. +* Use `pytest.param `__ when a test case needs a particular mark. +* Use `@pytest.fixture `__ if multiple tests can share a setup object. + +.. warning:: + + Do not use ``pytest.xfail`` (which is different than ``pytest.mark.xfail``) since it immediately stops the + test and does not check if the test will fail. If this is the behavior you desire, use ``pytest.skip`` instead. + +If a test is known to fail but the manner in which it fails +is not meant to be captured, use ``pytest.mark.xfail`` It is common to use this method for a test that +exhibits buggy behavior or a non-implemented feature. If +the failing test has flaky behavior, use the argument ``strict=False``. This +will make it so pytest does not fail if the test happens to pass. + +Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param`` +over usage within a test so that the test is appropriately marked during the +collection phase of pytest. For xfailing a test that involves multiple +parameters, a fixture, or a combination of these, it is only possible to +xfail during the testing phase. To do so, use the ``request`` fixture: + +.. code-block:: python + + def test_xfail(request): + mark = pytest.mark.xfail(raises=TypeError, reason="Indicate why here") + request.node.add_marker(mark) + +xfail is not to be used for tests involving failure due to invalid user arguments. +For these tests, we need to verify the correct exception type and error message +is being raised, using ``pytest.raises`` instead. + +.. _contributing.warnings: + +Testing a warning +^^^^^^^^^^^^^^^^^ + +Use ``tm.assert_produces_warning`` as a context manager to check that a block of code raises a warning. + +.. code-block:: python + + with tm.assert_produces_warning(DeprecationWarning): + pd.deprecated_function() + +If a warning should specifically not happen in a block of code, pass ``False`` into the context manager. + +.. code-block:: python + + with tm.assert_produces_warning(False): + pd.no_warning_function() + +If you have a test that would emit a warning, but you aren't actually testing the +warning itself (say because it's going to be removed in the future, or because we're +matching a 3rd-party library's behavior), then use ``pytest.mark.filterwarnings`` to +ignore the error. + +.. code-block:: python + + @pytest.mark.filterwarnings("ignore:msg:category") + def test_thing(self): + pass + +If you need finer-grained control, you can use Python's +`warnings module `__ +to control whether a warning is ignored or raised at different places within +a single test. + +.. code-block:: python + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + +Testing an exception +^^^^^^^^^^^^^^^^^^^^ + +Use `pytest.raises `_ as a context manager +with the specific exception subclass (i.e. never use :py:class:`Exception`) and the exception message in ``match``. + +.. code-block:: python + + with pytest.raises(ValueError, match="an error"): + raise ValueError("an error") + +Testing involving files +^^^^^^^^^^^^^^^^^^^^^^^ + +The ``tm.ensure_clean`` context manager creates a temporary file for testing, +with a generated filename (or your filename if provided), that is automatically +deleted when the context block is exited. + +.. code-block:: python + + with tm.ensure_clean('my_file_path') as path: + # do something with the path + +Testing involving network connectivity +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is highly discouraged to add a test that connects to the internet due to flakiness of network connections and +lack of ownership of the server that is being connected to. If network connectivity is absolutely required, use the +``tm.network`` decorator. + +.. code-block:: python + + @tm.network # noqa + def test_network(): + result = package.call_to_internet() + +If the test requires data from a specific website, specify ``check_before_test=True`` and the site in the decorator. + +.. code-block:: python + + @tm.network("https://www.somespecificsite.com", check_before_test=True) + def test_network(): + result = pd.read_html("https://www.somespecificsite.com") + +Example +^^^^^^^ + +Here is an example of a self-contained set of tests in a file ``pandas/tests/test_cool_feature.py`` +that illustrate multiple features that we like to use. Please remember to add the Github Issue Number +as a comment to a new test. + +.. code-block:: python + + import pytest + import numpy as np + import pandas as pd + + + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + def test_dtypes(dtype): + assert str(np.dtype(dtype)) == dtype + + + @pytest.mark.parametrize( + 'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', marks=pytest.mark.xfail( + reason='to show how it works'))]) + def test_mark(dtype): + assert str(np.dtype(dtype)) == 'float32' + + + @pytest.fixture + def series(): + return pd.Series([1, 2, 3]) + + + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + def dtype(request): + return request.param + + + def test_series(series, dtype): + # GH + result = series.astype(dtype) + assert result.dtype == dtype + + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + +A test run of this yields + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v + =========================== test session starts =========================== + platform darwin -- Python 3.6.2, pytest-3.6.0, py-1.4.31, pluggy-0.4.0 + collected 11 items + + tester.py::test_dtypes[int8] PASSED + tester.py::test_dtypes[int16] PASSED + tester.py::test_dtypes[int32] PASSED + tester.py::test_dtypes[int64] PASSED + tester.py::test_mark[float32] PASSED + tester.py::test_mark[int16] SKIPPED + tester.py::test_mark[int32] xfail + tester.py::test_series[int8] PASSED + tester.py::test_series[int16] PASSED + tester.py::test_series[int32] PASSED + tester.py::test_series[int64] PASSED + +Tests that we have ``parametrized`` are now accessible via the test name, for example we could run these with ``-k int8`` to sub-select *only* those tests which match ``int8``. + + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 + =========================== test session starts =========================== + platform darwin -- Python 3.6.2, pytest-3.6.0, py-1.4.31, pluggy-0.4.0 + collected 11 items + + test_cool_feature.py::test_dtypes[int8] PASSED + test_cool_feature.py::test_series[int8] PASSED + + +.. _using-hypothesis: + +Using ``hypothesis`` +~~~~~~~~~~~~~~~~~~~~ + +Hypothesis is a library for property-based testing. Instead of explicitly +parametrizing a test, you can describe *all* valid inputs and let Hypothesis +try to find a failing input. Even better, no matter how many random examples +it tries, Hypothesis always reports a single minimal counterexample to your +assertions - often an example that you would never have thought to test. + +See `Getting Started with Hypothesis `_ +for more of an introduction, then `refer to the Hypothesis documentation +for details `_. + +.. code-block:: python + + import json + from hypothesis import given, strategies as st + + any_json_value = st.deferred(lambda: st.one_of( + st.none(), st.booleans(), st.floats(allow_nan=False), st.text(), + st.lists(any_json_value), st.dictionaries(st.text(), any_json_value) + )) + + + @given(value=any_json_value) + def test_json_roundtrip(value): + result = json.loads(json.dumps(value)) + assert value == result + +This test shows off several useful features of Hypothesis, as well as +demonstrating a good use-case: checking properties that should hold over +a large or complicated domain of inputs. + +To keep the pandas test suite running quickly, parametrized tests are +preferred if the inputs or logic are simple, with Hypothesis tests reserved +for cases with complex logic or where there are too many combinations of +options or subtle interactions to test (or think of!) all of them. + + +Running the test suite +---------------------- + +The tests can then be run directly inside your Git clone (without having to +install pandas) by typing:: + + pytest pandas + +Often it is worth running only a subset of tests first around your changes before running the +entire suite. + +The easiest way to do this is with:: + + pytest pandas/path/to/test.py -k regex_matching_test_name + +Or with one of the following constructs:: + + pytest pandas/tests/[test-module].py + pytest pandas/tests/[test-module].py::[TestClass] + pytest pandas/tests/[test-module].py::[TestClass]::[test_method] + +Using `pytest-xdist `_, one can +speed up local testing on multicore machines. To use this feature, you will +need to install ``pytest-xdist`` via:: + + pip install pytest-xdist + +Two scripts are provided to assist with this. These scripts distribute +testing across 4 threads. + +On Unix variants, one can type:: + + test_fast.sh + +On Windows, one can type:: + + test_fast.bat + +This can significantly reduce the time it takes to locally run tests before +submitting a pull request. + +For more, see the `pytest `_ documentation. + +Furthermore one can run + +.. code-block:: python + + pd.test() + +with an imported pandas to run tests similarly. + +Running the performance test suite +---------------------------------- + +Performance matters and it is worth considering whether your code has introduced +performance regressions. pandas is in the process of migrating to +`asv benchmarks `__ +to enable easy monitoring of the performance of critical pandas operations. +These benchmarks are all found in the ``pandas/asv_bench`` directory, and the +test results can be found `here `__. + +To use all features of asv, you will need either ``conda`` or +``virtualenv``. For more details please check the `asv installation +webpage `_. + +To install asv:: + + pip install git+https://github.com/airspeed-velocity/asv + +If you need to run a benchmark, change your directory to ``asv_bench/`` and run:: + + asv continuous -f 1.1 upstream/main HEAD + +You can replace ``HEAD`` with the name of the branch you are working on, +and report benchmarks that changed by more than 10%. +The command uses ``conda`` by default for creating the benchmark +environments. If you want to use virtualenv instead, write:: + + asv continuous -f 1.1 -E virtualenv upstream/main HEAD + +The ``-E virtualenv`` option should be added to all ``asv`` commands +that run benchmarks. The default value is defined in ``asv.conf.json``. + +Running the full benchmark suite can be an all-day process, depending on your +hardware and its resource utilization. However, usually it is sufficient to paste +only a subset of the results into the pull request to show that the committed changes +do not cause unexpected performance regressions. You can run specific benchmarks +using the ``-b`` flag, which takes a regular expression. For example, this will +only run benchmarks from a ``pandas/asv_bench/benchmarks/groupby.py`` file:: + + asv continuous -f 1.1 upstream/main HEAD -b ^groupby + +If you want to only run a specific group of benchmarks from a file, you can do it +using ``.`` as a separator. For example:: + + asv continuous -f 1.1 upstream/main HEAD -b groupby.GroupByMethods + +will only run the ``GroupByMethods`` benchmark defined in ``groupby.py``. + +You can also run the benchmark suite using the version of ``pandas`` +already installed in your current Python environment. This can be +useful if you do not have virtualenv or conda, or are using the +``setup.py develop`` approach discussed above; for the in-place build +you need to set ``PYTHONPATH``, e.g. +``PYTHONPATH="$PWD/.." asv [remaining arguments]``. +You can run benchmarks using an existing Python +environment by:: + + asv run -e -E existing + +or, to use a specific Python interpreter,:: + + asv run -e -E existing:python3.6 + +This will display stderr from the benchmarks, and use your local +``python`` that comes from your ``$PATH``. + +Information on how to write a benchmark and how to use asv can be found in the +`asv documentation `_. + +Documenting your code +--------------------- + +Changes should be reflected in the release notes located in ``doc/source/whatsnew/vx.y.z.rst``. +This file contains an ongoing change log for each release. Add an entry to this file to +document your fix, enhancement or (unavoidable) breaking change. Make sure to include the +GitHub issue number when adding your entry (using ``:issue:`1234``` where ``1234`` is the +issue/pull request number). Your entry should be written using full sentences and proper +grammar. + +When mentioning parts of the API, use a Sphinx ``:func:``, ``:meth:``, or ``:class:`` +directive as appropriate. Not all public API functions and methods have a +documentation page; ideally links would only be added if they resolve. You can +usually find similar examples by checking the release notes for one of the previous +versions. + +If your code is a bugfix, add your entry to the relevant bugfix section. Avoid +adding to the ``Other`` section; only in rare cases should entries go there. +Being as concise as possible, the description of the bug should include how the +user may encounter it and an indication of the bug itself, e.g. +"produces incorrect results" or "incorrectly raises". It may be necessary to also +indicate the new behavior. + +If your code is an enhancement, it is most likely necessary to add usage +examples to the existing documentation. This can be done following the section +regarding :ref:`documentation `. +Further, to let users know when this feature was added, the ``versionadded`` +directive is used. The sphinx syntax for that is: + +.. code-block:: rst + + .. versionadded:: 1.1.0 + +This will put the text *New in version 1.1.0* wherever you put the sphinx +directive. This should also be put in the docstring when adding a new function +or method (`example `__) +or a new keyword argument (`example `__). diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst new file mode 100644 index 00000000..a87d8d5a --- /dev/null +++ b/doc/source/development/contributing_docstring.rst @@ -0,0 +1,1001 @@ +.. _docstring: + +{{ header }} + +====================== +pandas docstring guide +====================== + +About docstrings and standards +------------------------------ + +A Python docstring is a string used to document a Python module, class, +function or method, so programmers can understand what it does without having +to read the details of the implementation. + +Also, it is a common practice to generate online (html) documentation +automatically from docstrings. `Sphinx `_ serves +this purpose. + +The next example gives an idea of what a docstring looks like: + +.. code-block:: python + + def add(num1, num2): + """ + Add up two integer numbers. + + This function simply wraps the ``+`` operator, and does not + do anything interesting, except for illustrating what + the docstring of a very simple function looks like. + + Parameters + ---------- + num1 : int + First number to add. + num2 : int + Second number to add. + + Returns + ------- + int + The sum of ``num1`` and ``num2``. + + See Also + -------- + subtract : Subtract one integer from another. + + Examples + -------- + >>> add(2, 2) + 4 + >>> add(25, 0) + 25 + >>> add(10, -10) + 0 + """ + return num1 + num2 + +Some standards regarding docstrings exist, which make them easier to read, and allow them +be easily exported to other formats such as html or pdf. + +The first conventions every Python docstring should follow are defined in +`PEP-257 `_. + +As PEP-257 is quite broad, other more specific standards also exist. In the +case of pandas, the NumPy docstring convention is followed. These conventions are +explained in this document: + +* `numpydoc docstring guide `_ + (which is based in the original `Guide to NumPy/SciPy documentation + `_) + +numpydoc is a Sphinx extension to support the NumPy docstring convention. + +The standard uses reStructuredText (reST). reStructuredText is a markup +language that allows encoding styles in plain text files. Documentation +about reStructuredText can be found in: + +* `Sphinx reStructuredText primer `_ +* `Quick reStructuredText reference `_ +* `Full reStructuredText specification `_ + +pandas has some helpers for sharing docstrings between related classes, see +:ref:`docstring.sharing`. + +The rest of this document will summarize all the above guidelines, and will +provide additional conventions specific to the pandas project. + +.. _docstring.tutorial: + +Writing a docstring +------------------- + +.. _docstring.general: + +General rules +~~~~~~~~~~~~~ + +Docstrings must be defined with three double-quotes. No blank lines should be +left before or after the docstring. The text starts in the next line after the +opening quotes. The closing quotes have their own line +(meaning that they are not at the end of the last sentence). + +On rare occasions reST styles like bold text or italics will be used in +docstrings, but is it common to have inline code, which is presented between +backticks. The following are considered inline code: + +* The name of a parameter +* Python code, a module, function, built-in, type, literal... (e.g. ``os``, + ``list``, ``numpy.abs``, ``datetime.date``, ``True``) +* A pandas class (in the form ``:class:`pandas.Series```) +* A pandas method (in the form ``:meth:`pandas.Series.sum```) +* A pandas function (in the form ``:func:`pandas.to_datetime```) + +.. note:: + To display only the last component of the linked class, method or + function, prefix it with ``~``. For example, ``:class:`~pandas.Series``` + will link to ``pandas.Series`` but only display the last part, ``Series`` + as the link text. See `Sphinx cross-referencing syntax + `_ + for details. + +**Good:** + +.. code-block:: python + + def add_values(arr): + """ + Add the values in ``arr``. + + This is equivalent to Python ``sum`` of :meth:`pandas.Series.sum`. + + Some sections are omitted here for simplicity. + """ + return sum(arr) + +**Bad:** + +.. code-block:: python + + def func(): + + """Some function. + + With several mistakes in the docstring. + + It has a blank like after the signature ``def func():``. + + The text 'Some function' should go in the line after the + opening quotes of the docstring, not in the same line. + + There is a blank line between the docstring and the first line + of code ``foo = 1``. + + The closing quotes should be in the next line, not in this one.""" + + foo = 1 + bar = 2 + return foo + bar + +.. _docstring.short_summary: + +Section 1: short summary +~~~~~~~~~~~~~~~~~~~~~~~~ + +The short summary is a single sentence that expresses what the function does in +a concise way. + +The short summary must start with a capital letter, end with a dot, and fit in +a single line. It needs to express what the object does without providing +details. For functions and methods, the short summary must start with an +infinitive verb. + +**Good:** + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type. + + This section will provide further details. + """ + pass + +**Bad:** + +.. code-block:: python + + def astype(dtype): + """ + Casts Series type. + + Verb in third-person of the present simple, should be infinitive. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Method to cast Series type. + + Does not start with verb. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type + + Missing dot at the end. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type from its current type to the new type defined in + the parameter dtype. + + Summary is too verbose and doesn't fit in a single line. + """ + pass + +.. _docstring.extended_summary: + +Section 2: extended summary +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The extended summary provides details on what the function does. It should not +go into the details of the parameters, or discuss implementation notes, which +go in other sections. + +A blank line is left between the short summary and the extended summary. +Every paragraph in the extended summary ends with a dot. + +The extended summary should provide details on why the function is useful and +their use cases, if it is not too generic. + +.. code-block:: python + + def unstack(): + """ + Pivot a row index to columns. + + When using a MultiIndex, a level can be pivoted so each value in + the index becomes a column. This is especially useful when a subindex + is repeated for the main index, and data is easier to visualize as a + pivot table. + + The index level will be automatically removed from the index when added + as columns. + """ + pass + +.. _docstring.parameters: + +Section 3: parameters +~~~~~~~~~~~~~~~~~~~~~ + +The details of the parameters will be added in this section. This section has +the title "Parameters", followed by a line with a hyphen under each letter of +the word "Parameters". A blank line is left before the section title, but not +after, and not between the line with the word "Parameters" and the one with +the hyphens. + +After the title, each parameter in the signature must be documented, including +``*args`` and ``**kwargs``, but not ``self``. + +The parameters are defined by their name, followed by a space, a colon, another +space, and the type (or types). Note that the space between the name and the +colon is important. Types are not defined for ``*args`` and ``**kwargs``, but must +be defined for all other parameters. After the parameter definition, it is +required to have a line with the parameter description, which is indented, and +can have multiple lines. The description must start with a capital letter, and +finish with a dot. + +For keyword arguments with a default value, the default will be listed after a +comma at the end of the type. The exact form of the type in this case will be +"int, default 0". In some cases it may be useful to explain what the default +argument means, which can be added after a comma "int, default -1, meaning all +cpus". + +In cases where the default value is ``None``, meaning that the value will not be +used. Instead of ``"str, default None"``, it is preferred to write ``"str, optional"``. +When ``None`` is a value being used, we will keep the form "str, default None". +For example, in ``df.to_csv(compression=None)``, ``None`` is not a value being used, +but means that compression is optional, and no compression is being used if not +provided. In this case we will use ``"str, optional"``. Only in cases like +``func(value=None)`` and ``None`` is being used in the same way as ``0`` or ``foo`` +would be used, then we will specify "str, int or None, default None". + +**Good:** + +.. code-block:: python + + class Series: + def plot(self, kind, color='blue', **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Parameters + ---------- + kind : str + Kind of matplotlib plot. + color : str, default 'blue' + Color name or rgb code. + **kwargs + These parameters will be passed to the matplotlib plotting + function. + """ + pass + +**Bad:** + +.. code-block:: python + + class Series: + def plot(self, kind, **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Note the blank line between the parameters title and the first + parameter. Also, note that after the name of the parameter ``kind`` + and before the colon, a space is missing. + + Also, note that the parameter descriptions do not start with a + capital letter, and do not finish with a dot. + + Finally, the ``**kwargs`` parameter is missing. + + Parameters + ---------- + + kind: str + kind of matplotlib plot + """ + pass + +.. _docstring.parameter_types: + +Parameter types +^^^^^^^^^^^^^^^ + +When specifying the parameter types, Python built-in data types can be used +directly (the Python type is preferred to the more verbose string, integer, +boolean, etc): + +* int +* float +* str +* bool + +For complex types, define the subtypes. For ``dict`` and ``tuple``, as more than +one type is present, we use the brackets to help read the type (curly brackets +for ``dict`` and normal brackets for ``tuple``): + +* list of int +* dict of {str : int} +* tuple of (str, int, int) +* tuple of (str,) +* set of str + +In case where there are just a set of values allowed, list them in curly +brackets and separated by commas (followed by a space). If the values are +ordinal and they have an order, list them in this order. Otherwise, list +the default value first, if there is one: + +* {0, 10, 25} +* {'simple', 'advanced'} +* {'low', 'medium', 'high'} +* {'cat', 'dog', 'bird'} + +If the type is defined in a Python module, the module must be specified: + +* datetime.date +* datetime.datetime +* decimal.Decimal + +If the type is in a package, the module must be also specified: + +* numpy.ndarray +* scipy.sparse.coo_matrix + +If the type is a pandas type, also specify pandas except for Series and +DataFrame: + +* Series +* DataFrame +* pandas.Index +* pandas.Categorical +* pandas.arrays.SparseArray + +If the exact type is not relevant, but must be compatible with a NumPy +array, array-like can be specified. If Any type that can be iterated is +accepted, iterable can be used: + +* array-like +* iterable + +If more than one type is accepted, separate them by commas, except the +last two types, that need to be separated by the word 'or': + +* int or float +* float, decimal.Decimal or None +* str or list of str + +If ``None`` is one of the accepted values, it always needs to be the last in +the list. + +For axis, the convention is to use something like: + +* axis : {0 or 'index', 1 or 'columns', None}, default None + +.. _docstring.returns: + +Section 4: returns or yields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the method returns a value, it will be documented in this section. Also +if the method yields its output. + +The title of the section will be defined in the same way as the "Parameters". +With the names "Returns" or "Yields" followed by a line with as many hyphens +as the letters in the preceding word. + +The documentation of the return is also similar to the parameters. But in this +case, no name will be provided, unless the method returns or yields more than +one value (a tuple of values). + +The types for "Returns" and "Yields" are the same as the ones for the +"Parameters". Also, the description must finish with a dot. + +For example, with a single value: + +.. code-block:: python + + def sample(): + """ + Generate and return a random number. + + The value is sampled from a continuous uniform distribution between + 0 and 1. + + Returns + ------- + float + Random number generated. + """ + return np.random.random() + +With more than one value: + +.. code-block:: python + + import string + + def random_letters(): + """ + Generate and return a sequence of random letters. + + The length of the returned string is also random, and is also + returned. + + Returns + ------- + length : int + Length of the returned string. + letters : str + String of random letters. + """ + length = np.random.randint(1, 10) + letters = ''.join(np.random.choice(string.ascii_lowercase) + for i in range(length)) + return length, letters + +If the method yields its value: + +.. code-block:: python + + def sample_values(): + """ + Generate an infinite sequence of random numbers. + + The values are sampled from a continuous uniform distribution between + 0 and 1. + + Yields + ------ + float + Random number generated. + """ + while True: + yield np.random.random() + +.. _docstring.see_also: + +Section 5: see also +~~~~~~~~~~~~~~~~~~~ + +This section is used to let users know about pandas functionality +related to the one being documented. In rare cases, if no related methods +or functions can be found at all, this section can be skipped. + +An obvious example would be the ``head()`` and ``tail()`` methods. As ``tail()`` does +the equivalent as ``head()`` but at the end of the ``Series`` or ``DataFrame`` +instead of at the beginning, it is good to let the users know about it. + +To give an intuition on what can be considered related, here there are some +examples: + +* ``loc`` and ``iloc``, as they do the same, but in one case providing indices + and in the other positions +* ``max`` and ``min``, as they do the opposite +* ``iterrows``, ``itertuples`` and ``items``, as it is easy that a user + looking for the method to iterate over columns ends up in the method to + iterate over rows, and vice-versa +* ``fillna`` and ``dropna``, as both methods are used to handle missing values +* ``read_csv`` and ``to_csv``, as they are complementary +* ``merge`` and ``join``, as one is a generalization of the other +* ``astype`` and ``pandas.to_datetime``, as users may be reading the + documentation of ``astype`` to know how to cast as a date, and the way to do + it is with ``pandas.to_datetime`` +* ``where`` is related to ``numpy.where``, as its functionality is based on it + +When deciding what is related, you should mainly use your common sense and +think about what can be useful for the users reading the documentation, +especially the less experienced ones. + +When relating to other libraries (mainly ``numpy``), use the name of the module +first (not an alias like ``np``). If the function is in a module which is not +the main one, like ``scipy.sparse``, list the full module (e.g. +``scipy.sparse.coo_matrix``). + +This section has a header, "See Also" (note the capital +S and A), followed by the line with hyphens and preceded by a blank line. + +After the header, we will add a line for each related method or function, +followed by a space, a colon, another space, and a short description that +illustrates what this method or function does, why is it relevant in this +context, and what the key differences are between the documented function and +the one being referenced. The description must also end with a dot. + +Note that in "Returns" and "Yields", the description is located on the line +after the type. In this section, however, it is located on the same +line, with a colon in between. If the description does not fit on the same +line, it can continue onto other lines which must be further indented. + +For example: + +.. code-block:: python + + class Series: + def head(self): + """ + Return the first 5 elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Returns + ------- + Series + Subset of the original series with the 5 first values. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + return self.iloc[:5] + +.. _docstring.notes: + +Section 6: notes +~~~~~~~~~~~~~~~~ + +This is an optional section used for notes about the implementation of the +algorithm, or to document technical aspects of the function behavior. + +Feel free to skip it, unless you are familiar with the implementation of the +algorithm, or you discover some counter-intuitive behavior while writing the +examples for the function. + +This section follows the same format as the extended summary section. + +.. _docstring.examples: + +Section 7: examples +~~~~~~~~~~~~~~~~~~~ + +This is one of the most important sections of a docstring, despite being +placed in the last position, as often people understand concepts better +by example than through accurate explanations. + +Examples in docstrings, besides illustrating the usage of the function or +method, must be valid Python code, that returns the given output in a +deterministic way, and that can be copied and run by users. + +Examples are presented as a session in the Python terminal. ``>>>`` is used to +present code. ``...`` is used for code continuing from the previous line. +Output is presented immediately after the last line of code generating the +output (no blank lines in between). Comments describing the examples can +be added with blank lines before and after them. + +The way to present examples is as follows: + +1. Import required libraries (except ``numpy`` and ``pandas``) + +2. Create the data required for the example + +3. Show a very basic example that gives an idea of the most common use case + +4. Add examples with explanations that illustrate how the parameters can be + used for extended functionality + +A simple example could be: + +.. code-block:: python + + class Series: + + def head(self, n=5): + """ + Return the first elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying all of it. + + Parameters + ---------- + n : int + Number of values to return. + + Return + ------ + pandas.Series + Subset of the original series with the n first values. + + See Also + -------- + tail : Return the last n elements of the Series. + + Examples + -------- + >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon', + ... 'Lion', 'Monkey', 'Rabbit', 'Zebra']) + >>> s.head() + 0 Ant + 1 Bear + 2 Cow + 3 Dog + 4 Falcon + dtype: object + + With the ``n`` parameter, we can change the number of returned rows: + + >>> s.head(n=3) + 0 Ant + 1 Bear + 2 Cow + dtype: object + """ + return self.iloc[:n] + +The examples should be as concise as possible. In cases where the complexity of +the function requires long examples, is recommended to use blocks with headers +in bold. Use double star ``**`` to make a text bold, like in ``**this example**``. + +.. _docstring.example_conventions: + +Conventions for the examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Code in examples is assumed to always start with these two lines which are not +shown: + +.. code-block:: python + + import numpy as np + import pandas as pd + +Any other module used in the examples must be explicitly imported, one per line (as +recommended in :pep:`8#imports`) +and avoiding aliases. Avoid excessive imports, but if needed, imports from +the standard library go first, followed by third-party libraries (like +matplotlib). + +When illustrating examples with a single ``Series`` use the name ``s``, and if +illustrating with a single ``DataFrame`` use the name ``df``. For indices, +``idx`` is the preferred name. If a set of homogeneous ``Series`` or +``DataFrame`` is used, name them ``s1``, ``s2``, ``s3``... or ``df1``, +``df2``, ``df3``... If the data is not homogeneous, and more than one structure +is needed, name them with something meaningful, for example ``df_main`` and +``df_to_join``. + +Data used in the example should be as compact as possible. The number of rows +is recommended to be around 4, but make it a number that makes sense for the +specific example. For example in the ``head`` method, it requires to be higher +than 5, to show the example with the default values. If doing the ``mean``, we +could use something like ``[1, 2, 3]``, so it is easy to see that the value +returned is the mean. + +For more complex examples (grouping for example), avoid using data without +interpretation, like a matrix of random numbers with columns A, B, C, D... +And instead use a meaningful example, which makes it easier to understand the +concept. Unless required by the example, use names of animals, to keep examples +consistent. And numerical properties of them. + +When calling the method, keywords arguments ``head(n=3)`` are preferred to +positional arguments ``head(3)``. + +**Good:** + +.. code-block:: python + + class Series: + + def mean(self): + """ + Compute the mean of the input. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.mean() + 2 + """ + pass + + + def fillna(self, value): + """ + Replace missing values by ``value``. + + Examples + -------- + >>> s = pd.Series([1, np.nan, 3]) + >>> s.fillna(0) + [1, 0, 3] + """ + pass + + def groupby_mean(self): + """ + Group by index and return mean. + + Examples + -------- + >>> s = pd.Series([380., 370., 24., 26], + ... name='max_speed', + ... index=['falcon', 'falcon', 'parrot', 'parrot']) + >>> s.groupby_mean() + index + falcon 375.0 + parrot 25.0 + Name: max_speed, dtype: float64 + """ + pass + + def contains(self, pattern, case_sensitive=True, na=numpy.nan): + """ + Return whether each value contains ``pattern``. + + In this case, we are illustrating how to use sections, even + if the example is simple enough and does not require them. + + Examples + -------- + >>> s = pd.Series('Antelope', 'Lion', 'Zebra', np.nan) + >>> s.contains(pattern='a') + 0 False + 1 False + 2 True + 3 NaN + dtype: bool + + **Case sensitivity** + + With ``case_sensitive`` set to ``False`` we can match ``a`` with both + ``a`` and ``A``: + + >>> s.contains(pattern='a', case_sensitive=False) + 0 True + 1 False + 2 True + 3 NaN + dtype: bool + + **Missing values** + + We can fill missing values in the output using the ``na`` parameter: + + >>> s.contains(pattern='a', na=False) + 0 False + 1 False + 2 True + 3 False + dtype: bool + """ + pass + +**Bad:** + +.. code-block:: python + + def method(foo=None, bar=None): + """ + A sample DataFrame method. + + Do not import NumPy and pandas. + + Try to use meaningful data, when it makes the example easier + to understand. + + Try to avoid positional arguments like in ``df.method(1)``. They + can be all right if previously defined with a meaningful name, + like in ``present_value(interest_rate)``, but avoid them otherwise. + + When presenting the behavior with different parameters, do not place + all the calls one next to the other. Instead, add a short sentence + explaining what the example shows. + + Examples + -------- + >>> import numpy as np + >>> import pandas as pd + >>> df = pd.DataFrame(np.random.randn(3, 3), + ... columns=('a', 'b', 'c')) + >>> df.method(1) + 21 + >>> df.method(bar=14) + 123 + """ + pass + + +.. _docstring.doctest_tips: + +Tips for getting your examples pass the doctests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Getting the examples pass the doctests in the validation script can sometimes +be tricky. Here are some attention points: + +* Import all needed libraries (except for pandas and NumPy, those are already + imported as ``import pandas as pd`` and ``import numpy as np``) and define + all variables you use in the example. + +* Try to avoid using random data. However random data might be OK in some + cases, like if the function you are documenting deals with probability + distributions, or if the amount of data needed to make the function result + meaningful is too much, such that creating it manually is very cumbersome. + In those cases, always use a fixed random seed to make the generated examples + predictable. Example:: + + >>> np.random.seed(42) + >>> df = pd.DataFrame({'normal': np.random.normal(100, 5, 20)}) + +* If you have a code snippet that wraps multiple lines, you need to use '...' + on the continued lines: :: + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['a', 'b', 'c'], + ... columns=['A', 'B']) + +* If you want to show a case where an exception is raised, you can do:: + + >>> pd.to_datetime(["712-01-01"]) + Traceback (most recent call last): + OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 712-01-01 00:00:00 + + It is essential to include the "Traceback (most recent call last):", but for + the actual error only the error name is sufficient. + +* If there is a small part of the result that can vary (e.g. a hash in an object + representation), you can use ``...`` to represent this part. + + If you want to show that ``s.plot()`` returns a matplotlib AxesSubplot object, + this will fail the doctest :: + + >>> s.plot() + + + However, you can do (notice the comment that needs to be added) :: + + >>> s.plot() # doctest: +ELLIPSIS + + + +.. _docstring.example_plots: + +Plots in examples +^^^^^^^^^^^^^^^^^ + +There are some methods in pandas returning plots. To render the plots generated +by the examples in the documentation, the ``.. plot::`` directive exists. + +To use it, place the next code after the "Examples" header as shown below. The +plot will be generated automatically when building the documentation. + +.. code-block:: python + + class Series: + def plot(self): + """ + Generate a plot with the ``Series`` data. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 3]) + >>> s.plot() + """ + pass + +.. _docstring.sharing: + +Sharing docstrings +------------------ + +pandas has a system for sharing docstrings, with slight variations, between +classes. This helps us keep docstrings consistent, while keeping things clear +for the user reading. It comes at the cost of some complexity when writing. + +Each shared docstring will have a base template with variables, like +``{klass}``. The variables filled in later on using the ``doc`` decorator. +Finally, docstrings can also be appended to with the ``doc`` decorator. + +In this example, we'll create a parent docstring normally (this is like +``pandas.core.generic.NDFrame``. Then we'll have two children (like +``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll +substitute the class names in this docstring. + +.. code-block:: python + + class Parent: + @doc(klass="Parent") + def my_function(self): + """Apply my function to {klass}.""" + ... + + + class ChildA(Parent): + @doc(Parent.my_function, klass="ChildA") + def my_function(self): + ... + + + class ChildB(Parent): + @doc(Parent.my_function, klass="ChildB") + def my_function(self): + ... + +The resulting docstrings are + +.. code-block:: python + + >>> print(Parent.my_function.__doc__) + Apply my function to Parent. + >>> print(ChildA.my_function.__doc__) + Apply my function to ChildA. + >>> print(ChildB.my_function.__doc__) + Apply my function to ChildB. + +Notice: + +1. We "append" the parent docstring to the children docstrings, which are + initially empty. + +Our files will often contain a module-level ``_shared_doc_kwargs`` with some +common substitution values (things like ``klass``, ``axes``, etc). + +You can substitute and append in one shot with something like + +.. code-block:: python + + @doc(template, **_shared_doc_kwargs) + def my_function(self): + ... + +where ``template`` may come from a module-level ``_shared_docs`` dictionary +mapping function names to docstrings. Wherever possible, we prefer using +``doc``, since the docstring-writing processes is slightly closer to normal. + +See ``pandas.core.generic.NDFrame.fillna`` for an example template, and +``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna`` +for the filled versions. diff --git a/doc/source/development/contributing_documentation.rst b/doc/source/development/contributing_documentation.rst new file mode 100644 index 00000000..fac6a91c --- /dev/null +++ b/doc/source/development/contributing_documentation.rst @@ -0,0 +1,216 @@ +.. _contributing_documentation: + +{{ header }} + +================================= +Contributing to the documentation +================================= + +Contributing to the documentation benefits everyone who uses pandas. +We encourage you to help us improve the documentation, and +you don't have to be an expert on pandas to do so! In fact, +there are sections of the docs that are worse off after being written by +experts. If something in the docs doesn't make sense to you, updating the +relevant section after you figure it out is a great way to ensure it will help +the next person. Please visit the `issues page `__ +for a full list of issues that are currently open regarding the +Pandas documentation. + + + +.. contents:: Documentation: + :local: + + +About the pandas documentation +-------------------------------- + +The documentation is written in **reStructuredText**, which is almost like writing +in plain English, and built using `Sphinx `__. The +Sphinx Documentation has an excellent `introduction to reST +`__. Review the Sphinx docs to perform more +complex changes to the documentation as well. + +Some other important things to know about the docs: + +* The pandas documentation consists of two parts: the docstrings in the code + itself and the docs in this folder ``doc/``. + + The docstrings provide a clear explanation of the usage of the individual + functions, while the documentation in this folder consists of tutorial-like + overviews per topic together with some other information (what's new, + installation, etc). + +* The docstrings follow a pandas convention, based on the **Numpy Docstring + Standard**. Follow the :ref:`pandas docstring guide ` for detailed + instructions on how to write a correct docstring. + + .. toctree:: + :maxdepth: 2 + + contributing_docstring.rst + +* The tutorials make heavy use of the `IPython directive + `_ sphinx extension. + This directive lets you put code in the documentation which will be run + during the doc build. For example:: + + .. ipython:: python + + x = 2 + x**3 + + will be rendered as:: + + In [1]: x = 2 + + In [2]: x**3 + Out[2]: 8 + + Almost all code examples in the docs are run (and the output saved) during the + doc build. This approach means that code examples will always be up to date, + but it does make the doc building a bit more complex. + +* Our API documentation files in ``doc/source/reference`` house the auto-generated + documentation from the docstrings. For classes, there are a few subtleties + around controlling which methods and attributes have pages auto-generated. + + We have two autosummary templates for classes. + + 1. ``_templates/autosummary/class.rst``. Use this when you want to + automatically generate a page for every public method and attribute on the + class. The ``Attributes`` and ``Methods`` sections will be automatically + added to the class' rendered documentation by numpydoc. See ``DataFrame`` + for an example. + + 2. ``_templates/autosummary/class_without_autosummary``. Use this when you + want to pick a subset of methods / attributes to auto-generate pages for. + When using this template, you should include an ``Attributes`` and + ``Methods`` section in the class docstring. See ``CategoricalIndex`` for an + example. + + Every method should be included in a ``toctree`` in one of the documentation files in + ``doc/source/reference``, else Sphinx + will emit a warning. + +The utility script ``scripts/validate_docstrings.py`` can be used to get a csv +summary of the API documentation. And also validate common errors in the docstring +of a specific class, function or method. The summary also compares the list of +methods documented in the files in ``doc/source/reference`` (which is used to generate +the `API Reference `_ page) +and the actual public methods. +This will identify methods documented in ``doc/source/reference`` that are not actually +class methods, and existing methods that are not documented in ``doc/source/reference``. + + +Updating a pandas docstring +----------------------------- + +When improving a single function or method's docstring, it is not necessarily +needed to build the full documentation (see next section). +However, there is a script that checks a docstring (for example for the ``DataFrame.mean`` method):: + + python scripts/validate_docstrings.py pandas.DataFrame.mean + +This script will indicate some formatting errors if present, and will also +run and test the examples included in the docstring. +Check the :ref:`pandas docstring guide ` for a detailed guide +on how to format the docstring. + +The examples in the docstring ('doctests') must be valid Python code, +that in a deterministic way returns the presented output, and that can be +copied and run by users. This can be checked with the script above, and is +also tested on Travis. A failing doctest will be a blocker for merging a PR. +Check the :ref:`examples ` section in the docstring guide +for some tips and tricks to get the doctests passing. + +When doing a PR with a docstring update, it is good to post the +output of the validation script in a comment on github. + + +How to build the pandas documentation +--------------------------------------- + +Requirements +~~~~~~~~~~~~ + +First, you need to have a development environment to be able to build pandas +(see the docs on :ref:`creating a development environment `). + +Building the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +So how do you build the docs? Navigate to your local +``doc/`` directory in the console and run:: + + python make.py html + +Then you can find the HTML output in the folder ``doc/build/html/``. + +The first time you build the docs, it will take quite a while because it has to run +all the code examples and build all the generated docstring pages. In subsequent +evocations, sphinx will try to only build the pages that have been modified. + +If you want to do a full clean build, do:: + + python make.py clean + python make.py html + +You can tell ``make.py`` to compile only a single section of the docs, greatly +reducing the turn-around time for checking your changes. + +:: + + # omit autosummary and API section + python make.py clean + python make.py --no-api + + # compile the docs with only a single section, relative to the "source" folder. + # For example, compiling only this guide (doc/source/development/contributing.rst) + python make.py clean + python make.py --single development/contributing.rst + + # compile the reference docs for a single function + python make.py clean + python make.py --single pandas.DataFrame.join + + # compile whatsnew and API section (to resolve links in the whatsnew) + python make.py clean + python make.py --whatsnew + +For comparison, a full documentation build may take 15 minutes, but a single +section may take 15 seconds. Subsequent builds, which only process portions +you have changed, will be faster. + +The build will automatically use the number of cores available on your machine +to speed up the documentation build. You can override this:: + + python make.py html --num-jobs 4 + +Open the following file in a web browser to see the full documentation you +just built:: + + doc/build/html/index.html + +And you'll have the satisfaction of seeing your new and improved documentation! + +.. _contributing.dev_docs: + +Building main branch documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When pull requests are merged into the pandas ``main`` branch, the main parts of +the documentation are also built by Travis-CI. These docs are then hosted `here +`__, see also +the :any:`Continuous Integration ` section. + +Previewing changes +------------------ + +Once, the pull request is submitted, GitHub Actions will automatically build the +documentation. To view the built site: + +#. Wait for the ``CI / Web and docs`` check to complete. +#. Click ``Details`` next to it. +#. From the ``Artifacts`` drop-down, click ``docs`` or ``website`` to download + the site as a ZIP file. diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst new file mode 100644 index 00000000..942edd86 --- /dev/null +++ b/doc/source/development/contributing_environment.rst @@ -0,0 +1,212 @@ +.. _contributing_environment: + +{{ header }} + +================================== +Creating a development environment +================================== + +To test out code changes, you'll need to build pandas from source, which +requires a C/C++ compiler and Python environment. If you're making documentation +changes, you can skip to :ref:`contributing to the documentation ` but if you skip +creating the development environment you won't be able to build the documentation +locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks `. + +.. contents:: Table of contents: + :local: + +Step 1: install a C compiler +---------------------------- + +How to do this will depend on your platform. If you choose to user ``Docker`` +in the next step, then you can skip this step. + +**Windows** + +You will need `Build Tools for Visual Studio 2022 +`_. + +.. note:: + You DO NOT need to install Visual Studio 2022. + You only need "Build Tools for Visual Studio 2022" found by + scrolling down to "All downloads" -> "Tools for Visual Studio". + In the installer, select the "Desktop development with C++" Workloads. + +Alternatively, you can install the necessary components on the commandline using +`vs_BuildTools.exe `_ + +Alternatively, you could use the `WSL `_ +and consult the ``Linux`` instructions below. + +**macOS** + +To use the :ref:`mamba `-based compilers, you will need to install the +Developer Tools using ``xcode-select --install``. Otherwise +information about compiler installation can be found here: +https://devguide.python.org/setup/#macos + +**Linux** + +For Linux-based :ref:`mamba ` installations, you won't have to install any +additional components outside of the mamba environment. The instructions +below are only needed if your setup isn't based on mamba environments. + +Some Linux distributions will come with a pre-installed C compiler. To find out +which compilers (and versions) are installed on your system:: + + # for Debian/Ubuntu: + dpkg --list | grep compiler + # for Red Hat/RHEL/CentOS/Fedora: + yum list installed | grep -i --color compiler + +`GCC (GNU Compiler Collection) `_, is a widely used +compiler, which supports C and a number of other languages. If GCC is listed +as an installed compiler nothing more is required. + +If no C compiler is installed, or you wish to upgrade, or you're using a different +Linux distribution, consult your favorite search engine for compiler installation/update +instructions. + +Let us know if you have any difficulties by opening an issue or reaching out on our contributor +community :ref:`Slack `. + +Step 2: create an isolated environment +---------------------------------------- + +Before we begin, please: + +* Make sure that you have :any:`cloned the repository ` +* ``cd`` to the pandas source directory + +.. _contributing.mamba: + +Option 1: using mamba (recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Install `mamba `_ +* Make sure your mamba is up to date (``mamba update mamba``) + +.. code-block:: none + + # Create and activate the build environment + mamba env create --file environment.yml + mamba activate pandas-dev + +Option 2: using pip +~~~~~~~~~~~~~~~~~~~ + +You'll need to have at least the :ref:`minimum Python version ` that pandas supports. +You also need to have ``setuptools`` 51.0.0 or later to build pandas. + +**Unix**/**macOS with virtualenv** + +.. code-block:: bash + + # Create a virtual environment + # Use an ENV_DIR of your choice. We'll use ~/virtualenvs/pandas-dev + # Any parent directories should already exist + python3 -m venv ~/virtualenvs/pandas-dev + + # Activate the virtualenv + . ~/virtualenvs/pandas-dev/bin/activate + + # Install the build dependencies + python -m pip install -r requirements-dev.txt + +**Unix**/**macOS with pyenv** + +Consult the docs for setting up pyenv `here `__. + +.. code-block:: bash + + # Create a virtual environment + # Use an ENV_DIR of your choice. We'll use ~/Users//.pyenv/versions/pandas-dev + pyenv virtualenv + + # For instance: + pyenv virtualenv 3.9.10 pandas-dev + + # Activate the virtualenv + pyenv activate pandas-dev + + # Now install the build dependencies in the cloned pandas repo + python -m pip install -r requirements-dev.txt + +**Windows** + +Below is a brief overview on how to set-up a virtual environment with Powershell +under Windows. For details please refer to the +`official virtualenv user guide `__. + +Use an ENV_DIR of your choice. We'll use ``~\\virtualenvs\\pandas-dev`` where +``~`` is the folder pointed to by either ``$env:USERPROFILE`` (Powershell) or +``%USERPROFILE%`` (cmd.exe) environment variable. Any parent directories +should already exist. + +.. code-block:: powershell + + # Create a virtual environment + python -m venv $env:USERPROFILE\virtualenvs\pandas-dev + + # Activate the virtualenv. Use activate.bat for cmd.exe + ~\virtualenvs\pandas-dev\Scripts\Activate.ps1 + + # Install the build dependencies + python -m pip install -r requirements-dev.txt + +Option 3: using Docker +~~~~~~~~~~~~~~~~~~~~~~ + +pandas provides a ``DockerFile`` in the root directory to build a Docker image +with a full pandas development environment. + +**Docker Commands** + +Build the Docker image:: + + # Build the image + docker build -t pandas-dev . + +Run Container:: + + # Run a container and bind your local repo to the container + # This command assumes you are running from your local repo + # but if not alter ${PWD} to match your local repo path + docker run -it --rm -v ${PWD}:/home/pandas pandas-dev + +*Even easier, you can integrate Docker with the following IDEs:* + +**Visual Studio Code** + +You can use the DockerFile to launch a remote session with Visual Studio Code, +a popular free IDE, using the ``.devcontainer.json`` file. +See https://code.visualstudio.com/docs/remote/containers for details. + +**PyCharm (Professional)** + +Enable Docker support and use the Services tool window to build and manage images as well as +run and interact with containers. +See https://www.jetbrains.com/help/pycharm/docker.html for details. + +Step 3: build and install pandas +-------------------------------- + +You can now run:: + + # Build and install pandas + python setup.py build_ext -j 4 + python -m pip install -e . --no-build-isolation --no-use-pep517 + +At this point you should be able to import pandas from your locally built version:: + + $ python + >>> import pandas + >>> print(pandas.__version__) # note: the exact output may differ + 2.0.0.dev0+880.g2b9e661fbb.dirty + +This will create the new environment, and not touch any of your existing environments, +nor any existing Python installation. + +.. note:: + You will need to repeat this step each time the C extensions change, for example + if you modified any file in ``pandas/_libs`` or if you did a fetch and merge from ``upstream/main``. diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst new file mode 100644 index 00000000..7ba2091e --- /dev/null +++ b/doc/source/development/debugging_extensions.rst @@ -0,0 +1,93 @@ +.. _debugging_c_extensions: + +{{ header }} + +====================== +Debugging C extensions +====================== + +Pandas uses select C extensions for high performance IO operations. In case you need to debug segfaults or general issues with those extensions, the following steps may be helpful. + +First, be sure to compile the extensions with the appropriate flags to generate debug symbols and remove optimizations. This can be achieved as follows: + +.. code-block:: sh + + python setup.py build_ext --inplace -j4 --with-debugging-symbols + +Using a debugger +================ + +Assuming you are on a Unix-like operating system, you can use either lldb or gdb to debug. The choice between either is largely dependent on your compilation toolchain - typically you would use lldb if using clang and gdb if using gcc. For macOS users, please note that ``gcc`` is on modern systems an alias for ``clang``, so if using Xcode you usually opt for lldb. Regardless of which debugger you choose, please refer to your operating systems instructions on how to install. + +After installing a debugger you can create a script that hits the extension module you are looking to debug. For demonstration purposes, let's assume you have a script called ``debug_testing.py`` with the following contents: + +.. code-block:: python + + import pandas as pd + + pd.DataFrame([[1, 2]]).to_json() + +Place the ``debug_testing.py`` script in the project root and launch a Python process under your debugger. If using lldb: + +.. code-block:: sh + + lldb python + +If using gdb: + +.. code-block:: sh + + gdb python + +Before executing our script, let's set a breakpoint in our JSON serializer in its entry function called ``objToJSON``. The lldb syntax would look as follows: + +.. code-block:: sh + + breakpoint set --name objToJSON + +Similarly for gdb: + +.. code-block:: sh + + break objToJSON + +.. note:: + + You may get a warning that this breakpoint cannot be resolved in lldb. gdb may give a similar warning and prompt you to make the breakpoint on a future library load, which you should say yes to. This should only happen on the very first invocation as the module you wish to debug has not yet been loaded into memory. + +Now go ahead and execute your script: + +.. code-block:: sh + + run .py + +Code execution will halt at the breakpoint defined or at the occurrence of any segfault. LLDB's `GDB to LLDB command map `_ provides a listing of debugger command that you can execute using either debugger. + +Another option to execute the entire test suite under lldb would be to run the following: + +.. code-block:: sh + + lldb -- python -m pytest + +Or for gdb + +.. code-block:: sh + + gdb --args python -m pytest + +Once the process launches, simply type ``run`` and the test suite will begin, stopping at any segmentation fault that may occur. + +Checking memory leaks with valgrind +=================================== + +You can use `Valgrind `_ to check for and log memory leaks in extensions. For instance, to check for a memory leak in a test from the suite you can run: + +.. code-block:: sh + + PYTHONMALLOC=malloc valgrind --leak-check=yes --track-origins=yes --log-file=valgrind-log.txt python -m pytest + +Note that code execution under valgrind will take much longer than usual. While you can run valgrind against extensions compiled with any optimization level, it is suggested to have optimizations turned off from compiled extensions to reduce the amount of false positives. The ``--with-debugging-symbols`` flag passed during package setup will do this for you automatically. + +.. note:: + + For best results, you should run use a Python installation configured with Valgrind support (--with-valgrind) diff --git a/doc/source/development/developer.rst b/doc/source/development/developer.rst new file mode 100644 index 00000000..6de237b7 --- /dev/null +++ b/doc/source/development/developer.rst @@ -0,0 +1,187 @@ +.. _developer: + +{{ header }} + +.. currentmodule:: pandas + +********* +Developer +********* + +This section will focus on downstream applications of pandas. + +.. _apache.parquet: + +Storing pandas DataFrame objects in Apache Parquet format +--------------------------------------------------------- + +The `Apache Parquet `__ format +provides key-value metadata at the file and column level, stored in the footer +of the Parquet file: + +.. code-block:: shell + + 5: optional list key_value_metadata + +where ``KeyValue`` is + +.. code-block:: shell + + struct KeyValue { + 1: required string key + 2: optional string value + } + +So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a +``pandas`` metadata key in the ``FileMetaData`` with the value stored as : + +.. code-block:: text + + {'index_columns': [, , ...], + 'column_indexes': [, , ..., ], + 'columns': [, , ...], + 'pandas_version': $VERSION, + 'creator': { + 'library': $LIBRARY, + 'version': $LIBRARY_VERSION + }} + +The "descriptor" values ```` in the ``'index_columns'`` field are +strings (referring to a column) or dictionaries with values as described below. + +The ````/```` and so forth are dictionaries containing the metadata +for each column, *including the index columns*. This has JSON form: + +.. code-block:: text + + {'name': column_name, + 'field_name': parquet_column_name, + 'pandas_type': pandas_type, + 'numpy_type': numpy_type, + 'metadata': metadata} + +See below for the detailed specification for these. + +Index metadata descriptors +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``RangeIndex`` can be stored as metadata only, not requiring serialization. The +descriptor format for these as is follows: + +.. code-block:: python + + index = pd.RangeIndex(0, 10, 2) + { + "kind": "range", + "name": index.name, + "start": index.start, + "stop": index.stop, + "step": index.step, + } + +Other index types must be serialized as data columns along with the other +DataFrame columns. The metadata for these is a string indicating the name of +the field in the data columns, for example ``'__index_level_0__'``. + +If an index has a non-None ``name`` attribute, and there is no other column +with a name matching that value, then the ``index.name`` value can be used as +the descriptor. Otherwise (for unnamed indexes and ones with names colliding +with other column names) a disambiguating name with pattern matching +``__index_level_\d+__`` should be used. In cases of named indexes as data +columns, ``name`` attribute is always stored in the column descriptors as +above. + +Column metadata +~~~~~~~~~~~~~~~ + +``pandas_type`` is the logical type of the column, and is one of: + +* Boolean: ``'bool'`` +* Integers: ``'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'`` +* Floats: ``'float16', 'float32', 'float64'`` +* Date and Time Types: ``'datetime', 'datetimetz'``, ``'timedelta'`` +* String: ``'unicode', 'bytes'`` +* Categorical: ``'categorical'`` +* Other Python objects: ``'object'`` + +The ``numpy_type`` is the physical storage type of the column, which is the +result of ``str(dtype)`` for the underlying NumPy array that holds the data. So +for ``datetimetz`` this is ``datetime64[ns]`` and for categorical, it may be +any of the supported integer categorical types. + +The ``metadata`` field is ``None`` except for: + +* ``datetimetz``: ``{'timezone': zone, 'unit': 'ns'}``, e.g. ``{'timezone', + 'America/New_York', 'unit': 'ns'}``. The ``'unit'`` is optional, and if + omitted it is assumed to be nanoseconds. +* ``categorical``: ``{'num_categories': K, 'ordered': is_ordered, 'type': $TYPE}`` + + * Here ``'type'`` is optional, and can be a nested pandas type specification + here (but not categorical) + +* ``unicode``: ``{'encoding': encoding}`` + + * The encoding is optional, and if not present is UTF-8 + +* ``object``: ``{'encoding': encoding}``. Objects can be serialized and stored + in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of: + + * ``'pickle'`` + * ``'bson'`` + * ``'json'`` + +* ``timedelta``: ``{'unit': 'ns'}``. The ``'unit'`` is optional, and if omitted + it is assumed to be nanoseconds. This metadata is optional altogether + +For types other than these, the ``'metadata'`` key can be +omitted. Implementations can assume ``None`` if the key is not present. + +As an example of fully-formed metadata: + +.. code-block:: text + + {'index_columns': ['__index_level_0__'], + 'column_indexes': [ + {'name': None, + 'field_name': 'None', + 'pandas_type': 'unicode', + 'numpy_type': 'object', + 'metadata': {'encoding': 'UTF-8'}} + ], + 'columns': [ + {'name': 'c0', + 'field_name': 'c0', + 'pandas_type': 'int8', + 'numpy_type': 'int8', + 'metadata': None}, + {'name': 'c1', + 'field_name': 'c1', + 'pandas_type': 'bytes', + 'numpy_type': 'object', + 'metadata': None}, + {'name': 'c2', + 'field_name': 'c2', + 'pandas_type': 'categorical', + 'numpy_type': 'int16', + 'metadata': {'num_categories': 1000, 'ordered': False}}, + {'name': 'c3', + 'field_name': 'c3', + 'pandas_type': 'datetimetz', + 'numpy_type': 'datetime64[ns]', + 'metadata': {'timezone': 'America/Los_Angeles'}}, + {'name': 'c4', + 'field_name': 'c4', + 'pandas_type': 'object', + 'numpy_type': 'object', + 'metadata': {'encoding': 'pickle'}}, + {'name': None, + 'field_name': '__index_level_0__', + 'pandas_type': 'int64', + 'numpy_type': 'int64', + 'metadata': None} + ], + 'pandas_version': '1.4.0', + 'creator': { + 'library': 'pyarrow', + 'version': '0.13.0' + }} diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst new file mode 100644 index 00000000..c7286616 --- /dev/null +++ b/doc/source/development/extending.rst @@ -0,0 +1,490 @@ +.. _extending: + +{{ header }} + +**************** +Extending pandas +**************** + +While pandas provides a rich set of methods, containers, and data types, your +needs may not be fully satisfied. pandas offers a few options for extending +pandas. + +.. _extending.register-accessors: + +Registering custom accessors +---------------------------- + +Libraries can use the decorators +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional +"namespaces" to pandas objects. All of these follow a similar convention: you +decorate a class, providing the name of attribute to add. The class's +``__init__`` method gets the object being decorated. For example: + +.. code-block:: python + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._validate(pandas_obj) + self._obj = pandas_obj + + @staticmethod + def _validate(obj): + # verify there is a column latitude and a column longitude + if "latitude" not in obj.columns or "longitude" not in obj.columns: + raise AttributeError("Must have 'latitude' and 'longitude'.") + + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the ``geo`` namespace: + + >>> ds = pd.DataFrame( + ... {"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)} + ... ) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. + +We highly recommend validating the data in your accessor's ``__init__``. +In our ``GeoAccessor``, we validate that the data contains the expected columns, +raising an ``AttributeError`` when the validation fails. +For a ``Series`` accessor, you should validate the ``dtype`` if the accessor +applies only to certain dtypes. + + +.. _extending.extension-types: + +Extension types +--------------- + +.. note:: + + The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs were + experimental prior to pandas 1.5. Starting with version 1.5, future changes will follow + the :ref:`pandas deprecation policy `. + +pandas defines an interface for implementing data types and arrays that *extend* +NumPy's type system. pandas itself uses the extension system for some types +that aren't built into NumPy (categorical, period, interval, datetime with +timezone). + +Libraries can define a custom array and data type. When pandas encounters these +objects, they will be handled properly (i.e. not converted to an ndarray of +objects). Many methods like :func:`pandas.isna` will dispatch to the extension +type's implementation. + +If you're building a library that implements the interface, please publicize it +on :ref:`ecosystem.extensions`. + +The interface consists of two classes. + +:class:`~pandas.api.extensions.ExtensionDtype` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A :class:`pandas.api.extensions.ExtensionDtype` is similar to a ``numpy.dtype`` object. It describes the +data type. Implementors are responsible for a few unique items like the name. + +One particularly important item is the ``type`` property. This should be the +class that is the scalar type for your data. For example, if you were writing an +extension array for IP Address data, this might be ``ipaddress.IPv4Address``. + +See the `extension dtype source`_ for interface definition. + +:class:`pandas.api.extensions.ExtensionDtype` can be registered to pandas to allow creation via a string dtype name. +This allows one to instantiate ``Series`` and ``.astype()`` with a registered string name, for +example ``'category'`` is a registered string accessor for the ``CategoricalDtype``. + +See the `extension dtype dtypes`_ for more on how to register dtypes. + +:class:`~pandas.api.extensions.ExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This class provides all the array-like functionality. ExtensionArrays are +limited to 1 dimension. An ExtensionArray is linked to an ExtensionDtype via the +``dtype`` attribute. + +pandas makes no restrictions on how an extension array is created via its +``__new__`` or ``__init__``, and puts no restrictions on how you store your +data. We do require that your array be convertible to a NumPy array, even if +this is relatively expensive (as it is for ``Categorical``). + +They may be backed by none, one, or many NumPy arrays. For example, +:class:`pandas.Categorical` is an extension array backed by two arrays, +one for codes and one for categories. An array of IPv6 addresses may +be backed by a NumPy structured array with two fields, one for the +lower 64 bits and one for the upper 64 bits. Or they may be backed +by some other storage type, like Python lists. + +See the `extension array source`_ for the interface definition. The docstrings +and comments contain guidance for properly implementing the interface. + +.. _extending.extension.operator: + +:class:`~pandas.api.extensions.ExtensionArray` operator support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, there are no operators defined for the class :class:`~pandas.api.extensions.ExtensionArray`. +There are two approaches for providing operator support for your ExtensionArray: + +1. Define each of the operators on your ``ExtensionArray`` subclass. +2. Use an operator implementation from pandas that depends on operators that are already defined + on the underlying elements (scalars) of the ExtensionArray. + +.. note:: + + Regardless of the approach, you may want to set ``__array_priority__`` + if you want your implementation to be called when involved in binary operations + with NumPy arrays. + +For the first approach, you define selected operators, e.g., ``__add__``, ``__le__``, etc. that +you want your ``ExtensionArray`` subclass to support. + +The second approach assumes that the underlying elements (i.e., scalar type) of the ``ExtensionArray`` +have the individual operators already defined. In other words, if your ``ExtensionArray`` +named ``MyExtensionArray`` is implemented so that each element is an instance +of the class ``MyExtensionElement``, then if the operators are defined +for ``MyExtensionElement``, the second approach will automatically +define the operators for ``MyExtensionArray``. + +A mixin class, :class:`~pandas.api.extensions.ExtensionScalarOpsMixin` supports this second +approach. If developing an ``ExtensionArray`` subclass, for example ``MyExtensionArray``, +can simply include ``ExtensionScalarOpsMixin`` as a parent class of ``MyExtensionArray``, +and then call the methods :meth:`~MyExtensionArray._add_arithmetic_ops` and/or +:meth:`~MyExtensionArray._add_comparison_ops` to hook the operators into +your ``MyExtensionArray`` class, as follows: + +.. code-block:: python + + from pandas.api.extensions import ExtensionArray, ExtensionScalarOpsMixin + + + class MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin): + pass + + + MyExtensionArray._add_arithmetic_ops() + MyExtensionArray._add_comparison_ops() + + +.. note:: + + Since ``pandas`` automatically calls the underlying operator on each + element one-by-one, this might not be as performant as implementing your own + version of the associated operators directly on the ``ExtensionArray``. + +For arithmetic operations, this implementation will try to reconstruct a new +``ExtensionArray`` with the result of the element-wise operation. Whether +or not that succeeds depends on whether the operation returns a result +that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot +be reconstructed, an ndarray containing the scalars returned instead. + +For ease of implementation and consistency with operations between pandas +and NumPy ndarrays, we recommend *not* handling Series and Indexes in your binary ops. +Instead, you should detect these cases and return ``NotImplemented``. +When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas +will + +1. unbox the array from the ``Series`` (``Series.array``) +2. call ``result = op(values, ExtensionArray)`` +3. re-box the result in a ``Series`` + +.. _extending.extension.ufunc: + +NumPy universal functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` implements ``__array_ufunc__``. As part of the implementation, +pandas unboxes the ``ExtensionArray`` from the :class:`Series`, applies the ufunc, +and re-boxes it if necessary. + +If applicable, we highly recommend that you implement ``__array_ufunc__`` in your +extension array to avoid coercion to an ndarray. See +`the NumPy documentation `__ +for an example. + +As part of your implementation, we require that you defer to pandas when a pandas +container (:class:`Series`, :class:`DataFrame`, :class:`Index`) is detected in ``inputs``. +If any of those is present, you should return ``NotImplemented``. pandas will take care of +unboxing the array from the container and re-calling the ufunc with the unwrapped input. + +.. _extending.extension.testing: + +Testing extension arrays +^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide a test suite for ensuring that your extension arrays satisfy the expected +behavior. To use the test suite, you must provide several pytest fixtures and inherit +from the base test class. The required fixtures are found in +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py. + +To use a test, subclass it: + +.. code-block:: python + + from pandas.tests.extension import base + + + class TestConstructors(base.BaseConstructorsTests): + pass + + +See https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/__init__.py +for a list of all the tests available. + +.. _extending.extension.arrow: + +Compatibility with Apache Arrow +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An ``ExtensionArray`` can support conversion to / from ``pyarrow`` arrays +(and thus support for example serialization to the Parquet file format) +by implementing two methods: ``ExtensionArray.__arrow_array__`` and +``ExtensionDtype.__from_arrow__``. + +The ``ExtensionArray.__arrow_array__`` ensures that ``pyarrow`` knowns how +to convert the specific extension array into a ``pyarrow.Array`` (also when +included as a column in a pandas DataFrame): + +.. code-block:: python + + class MyExtensionArray(ExtensionArray): + ... + + def __arrow_array__(self, type=None): + # convert the underlying array values to a pyarrow Array + import pyarrow + + return pyarrow.array(..., type=type) + +The ``ExtensionDtype.__from_arrow__`` method then controls the conversion +back from pyarrow to a pandas ExtensionArray. This method receives a pyarrow +``Array`` or ``ChunkedArray`` as only argument and is expected to return the +appropriate pandas ``ExtensionArray`` for this dtype and the passed values: + +.. code-block:: none + + class ExtensionDtype: + ... + + def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> ExtensionArray: + ... + +See more in the `Arrow documentation `__. + +Those methods have been implemented for the nullable integer and string extension +dtypes included in pandas, and ensure roundtrip to pyarrow and the Parquet file format. + +.. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/main/pandas/core/dtypes/dtypes.py +.. _extension dtype source: https://github.com/pandas-dev/pandas/blob/main/pandas/core/dtypes/base.py +.. _extension array source: https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/base.py + +.. _extending.subclassing-pandas: + +Subclassing pandas data structures +---------------------------------- + +.. warning:: There are some easier alternatives before considering subclassing ``pandas`` data structures. + + 1. Extensible method chains with :ref:`pipe ` + + 2. Use *composition*. See `here `_. + + 3. Extending by :ref:`registering an accessor ` + + 4. Extending by :ref:`extension type ` + +This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are two points that need attention: + +1. Override constructor properties. +2. Define original properties + +.. note:: + + You can find a nice example in `geopandas `_ project. + +Override constructor properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each data structure has several *constructor properties* for returning a new +data structure as the result of an operation. By overriding these properties, +you can retain subclasses through ``pandas`` data manipulations. + +There are 3 possible constructor properties to be defined on a subclass: + +* ``DataFrame/Series._constructor``: Used when a manipulation result has the same dimension as the original. +* ``DataFrame._constructor_sliced``: Used when a ``DataFrame`` (sub-)class manipulation result should be a ``Series`` (sub-)class. +* ``Series._constructor_expanddim``: Used when a ``Series`` (sub-)class manipulation result should be a ``DataFrame`` (sub-)class, e.g. ``Series.to_frame()``. + +Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. + +.. code-block:: python + + class SubclassedSeries(pd.Series): + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + + class SubclassedDataFrame(pd.DataFrame): + @property + def _constructor(self): + return SubclassedDataFrame + + @property + def _constructor_sliced(self): + return SubclassedSeries + +.. code-block:: python + + >>> s = SubclassedSeries([1, 2, 3]) + >>> type(s) + + + >>> to_framed = s.to_frame() + >>> type(to_framed) + + + >>> df = SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> type(df) + + + >>> sliced1 = df[["A", "B"]] + >>> sliced1 + A B + 0 1 4 + 1 2 5 + 2 3 6 + + >>> type(sliced1) + + + >>> sliced2 = df["A"] + >>> sliced2 + 0 1 + 1 2 + 2 3 + Name: A, dtype: int64 + + >>> type(sliced2) + + +Define original properties +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To let original data structures have additional properties, you should let ``pandas`` know what properties are added. ``pandas`` maps unknown properties to data names overriding ``__getattribute__``. Defining original properties can be done in one of 2 ways: + +1. Define ``_internal_names`` and ``_internal_names_set`` for temporary properties which WILL NOT be passed to manipulation results. +2. Define ``_metadata`` for normal properties which will be passed to manipulation results. + +Below is an example to define two original properties, "internal_cache" as a temporary property and "added_property" as a normal property + +.. code-block:: python + + class SubclassedDataFrame2(pd.DataFrame): + + # temporary properties + _internal_names = pd.DataFrame._internal_names + ["internal_cache"] + _internal_names_set = set(_internal_names) + + # normal properties + _metadata = ["added_property"] + + @property + def _constructor(self): + return SubclassedDataFrame2 + +.. code-block:: python + + >>> df = SubclassedDataFrame2({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.internal_cache = "cached" + >>> df.added_property = "property" + + >>> df.internal_cache + cached + >>> df.added_property + property + + # properties defined in _internal_names is reset after manipulation + >>> df[["A", "B"]].internal_cache + AttributeError: 'SubclassedDataFrame2' object has no attribute 'internal_cache' + + # properties defined in _metadata are retained + >>> df[["A", "B"]].added_property + property + +.. _extending.plotting-backends: + +Plotting backends +----------------- + +Starting in 0.25 pandas can be extended with third-party plotting backends. The +main idea is letting users select a plotting backend different than the provided +one based on Matplotlib. For example: + +.. code-block:: python + + >>> pd.set_option("plotting.backend", "backend.module") + >>> pd.Series([1, 2, 3]).plot() + +This would be more or less equivalent to: + +.. code-block:: python + + >>> import backend.module + >>> backend.module.plot(pd.Series([1, 2, 3])) + +The backend module can then use other visualization tools (Bokeh, Altair,...) +to generate the plots. + +Libraries implementing the plotting backend should use `entry points `__ +to make their backend discoverable to pandas. The key is ``"pandas_plotting_backends"``. For example, pandas +registers the default "matplotlib" backend as follows. + +.. code-block:: python + + # in setup.py + setup( # noqa: F821 + ..., + entry_points={ + "pandas_plotting_backends": [ + "matplotlib = pandas:plotting._matplotlib", + ], + }, + ) + + +More information on how to implement a third-party plotting backend can be found at +https://github.com/pandas-dev/pandas/blob/main/pandas/plotting/__init__.py#L1. diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst new file mode 100644 index 00000000..c741441c --- /dev/null +++ b/doc/source/development/index.rst @@ -0,0 +1,26 @@ +{{ header }} + +.. _development: + +=========== +Development +=========== + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + contributing + contributing_environment + contributing_documentation + contributing_codebase + maintaining + internals + debugging_extensions + extending + developer + policies + roadmap + community diff --git a/doc/source/development/internals.rst b/doc/source/development/internals.rst new file mode 100644 index 00000000..cec385dd --- /dev/null +++ b/doc/source/development/internals.rst @@ -0,0 +1,103 @@ +.. _internals: + +{{ header }} + +********* +Internals +********* + +This section will provide a look into some of pandas internals. It's primarily +intended for developers of pandas itself. + +Indexing +-------- + +In pandas there are a few objects implemented which can serve as valid +containers for the axis labels: + +* ``Index``: the generic "ordered set" object, an ndarray of object dtype + assuming nothing about its contents. The labels must be hashable (and + likely immutable) and unique. Populates a dict of label to location in + Cython to do ``O(1)`` lookups. +* ``Int64Index``: a version of ``Index`` highly optimized for 64-bit integer + data, such as time stamps +* ``Float64Index``: a version of ``Index`` highly optimized for 64-bit float data +* ``MultiIndex``: the standard hierarchical index object +* ``DatetimeIndex``: An Index object with ``Timestamp`` boxed elements (impl are the int64 values) +* ``TimedeltaIndex``: An Index object with ``Timedelta`` boxed elements (impl are the in64 values) +* ``PeriodIndex``: An Index object with Period elements + +There are functions that make the creation of a regular index easy: + +* ``date_range``: fixed frequency date range generated from a time rule or + DateOffset. An ndarray of Python datetime objects +* ``period_range``: fixed frequency date range generated from a time rule or + DateOffset. An ndarray of ``Period`` objects, representing timespans + +The motivation for having an ``Index`` class in the first place was to enable +different implementations of indexing. This means that it's possible for you, +the user, to implement a custom ``Index`` subclass that may be better suited to +a particular application than the ones provided in pandas. + +From an internal implementation point of view, the relevant methods that an +``Index`` must define are one or more of the following (depending on how +incompatible the new object internals are with the ``Index`` functions): + +* ``get_loc``: returns an "indexer" (an integer, or in some cases a + slice object) for a label +* ``slice_locs``: returns the "range" to slice between two labels +* ``get_indexer``: Computes the indexing vector for reindexing / data + alignment purposes. See the source / docstrings for more on this +* ``get_indexer_non_unique``: Computes the indexing vector for reindexing / data + alignment purposes when the index is non-unique. See the source / docstrings + for more on this +* ``reindex``: Does any pre-conversion of the input index then calls + ``get_indexer`` +* ``union``, ``intersection``: computes the union or intersection of two + Index objects +* ``insert``: Inserts a new label into an Index, yielding a new object +* ``delete``: Delete a label, yielding a new object +* ``drop``: Deletes a set of labels +* ``take``: Analogous to ndarray.take + +MultiIndex +~~~~~~~~~~ + +Internally, the ``MultiIndex`` consists of a few things: the **levels**, the +integer **codes** (until version 0.24 named *labels*), and the level **names**: + +.. ipython:: python + + index = pd.MultiIndex.from_product( + [range(3), ["one", "two"]], names=["first", "second"] + ) + index + index.levels + index.codes + index.names + +You can probably guess that the codes determine which unique element is +identified with that location at each layer of the index. It's important to +note that sortedness is determined **solely** from the integer codes and does +not check (or care) whether the levels themselves are sorted. Fortunately, the +constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but +if you compute the levels and codes yourself, please be careful. + +Values +~~~~~~ + +pandas extends NumPy's type system with custom types, like ``Categorical`` or +datetimes with a timezone, so we have multiple notions of "values". For 1-D +containers (``Index`` classes and ``Series``) we have the following convention: + +* ``cls._values`` refers is the "best possible" array. This could be an + ``ndarray`` or ``ExtensionArray``. + +So, for example, ``Series[category]._values`` is a ``Categorical``. + +.. _ref-subclassing-pandas: + +Subclassing pandas data structures +---------------------------------- + +This section has been moved to :ref:`extending.subclassing-pandas`. diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst new file mode 100644 index 00000000..1bff2ecc --- /dev/null +++ b/doc/source/development/maintaining.rst @@ -0,0 +1,308 @@ +.. _maintaining: + +****************** +pandas maintenance +****************** + +This guide is for pandas' maintainers. It may also be interesting to contributors +looking to understand the pandas development process and what steps are necessary +to become a maintainer. + +The main contributing guide is available at :ref:`contributing`. + +Roles +----- + +pandas uses two levels of permissions: **triage** and **core** team members. + +Triage members can label and close issues and pull requests. + +Core team members can label and close issues and pull request, and can merge +pull requests. + +GitHub publishes the full `list of permissions`_. + +Tasks +----- + +pandas is largely a volunteer project, so these tasks shouldn't be read as +"expectations" of triage and maintainers. Rather, they're general descriptions +of what it means to be a maintainer. + +* Triage newly filed issues (see :ref:`maintaining.triage`) +* Review newly opened pull requests +* Respond to updates on existing issues and pull requests +* Drive discussion and decisions on stalled issues and pull requests +* Provide experience / wisdom on API design questions to ensure consistency and maintainability +* Project organization (run / attend developer meetings, represent pandas) + +https://matthewrocklin.com/blog/2019/05/18/maintainer may be interesting background +reading. + +.. _maintaining.triage: + +Issue triage +------------ + + +Here's a typical workflow for triaging a newly opened issue. + +1. **Thank the reporter for opening an issue** + + The issue tracker is many people's first interaction with the pandas project itself, + beyond just using the library. As such, we want it to be a welcoming, pleasant + experience. + +2. **Is the necessary information provided?** + + Ideally reporters would fill out the issue template, but many don't. + If crucial information (like the version of pandas they used), is missing + feel free to ask for that and label the issue with "Needs info". The + report should follow the guidelines in :ref:`contributing.bug_reports`. + You may want to link to that if they didn't follow the template. + + Make sure that the title accurately reflects the issue. Edit it yourself + if it's not clear. + +3. **Is this a duplicate issue?** + + We have many open issues. If a new issue is clearly a duplicate, label the + new issue as "Duplicate" assign the milestone "No Action", and close the issue + with a link to the original issue. Make sure to still thank the reporter, and + encourage them to chime in on the original issue, and perhaps try to fix it. + + If the new issue provides relevant information, such as a better or slightly + different example, add it to the original issue as a comment or an edit to + the original post. + +4. **Is the issue minimal and reproducible**? + + For bug reports, we ask that the reporter provide a minimal reproducible + example. See https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports + for a good explanation. If the example is not reproducible, or if it's + *clearly* not minimal, feel free to ask the reporter if they can provide + and example or simplify the provided one. Do acknowledge that writing + minimal reproducible examples is hard work. If the reporter is struggling, + you can try to write one yourself and we'll edit the original post to include it. + + If a reproducible example can't be provided, add the "Needs info" label. + + If a reproducible example is provided, but you see a simplification, + edit the original post with your simpler reproducible example. + +5. **Is this a clearly defined feature request?** + + Generally, pandas prefers to discuss and design new features in issues, before + a pull request is made. Encourage the submitter to include a proposed API + for the new feature. Having them write a full docstring is a good way to + pin down specifics. + + We'll need a discussion from several pandas maintainers before deciding whether + the proposal is in scope for pandas. + +6. **Is this a usage question?** + + We prefer that usage questions are asked on StackOverflow with the pandas + tag. https://stackoverflow.com/questions/tagged/pandas + + If it's easy to answer, feel free to link to the relevant documentation section, + let them know that in the future this kind of question should be on + StackOverflow, and close the issue. + +7. **What labels and milestones should I add?** + + Apply the relevant labels. This is a bit of an art, and comes with experience. + Look at similar issues to get a feel for how things are labeled. + + If the issue is clearly defined and the fix seems relatively straightforward, + label the issue as "Good first issue". + + Typically, new issues will be assigned the "Contributions welcome" milestone, + unless it's know that this issue should be addressed in a specific release (say + because it's a large regression). + +.. _maintaining.closing: + +Closing issues +-------------- + +Be delicate here: many people interpret closing an issue as us saying that the +conversation is over. It's typically best to give the reporter some time to +respond or self-close their issue if it's determined that the behavior is not a bug, +or the feature is out of scope. Sometimes reporters just go away though, and +we'll close the issue after the conversation has died. + +.. _maintaining.reviewing: + +Reviewing pull requests +----------------------- + +Anybody can review a pull request: regular contributors, triagers, or core-team +members. But only core-team members can merge pull requests when they're ready. + +Here are some things to check when reviewing a pull request. + +* Tests should be in a sensible location: in the same file as closely related tests. +* New public APIs should be included somewhere in ``doc/source/reference/``. +* New / changed API should use the ``versionadded`` or ``versionchanged`` directives in the docstring. +* User-facing changes should have a whatsnew in the appropriate file. +* Regression tests should reference the original GitHub issue number like ``# GH-1234``. +* The pull request should be labeled and assigned the appropriate milestone (the next patch release + for regression fixes and small bug fixes, the next minor milestone otherwise) +* Changes should comply with our :ref:`policies.version`. + + +.. _maintaining.backporting: + +Backporting +----------- + +pandas supports point releases (e.g. ``1.4.3``) that aim to: + +1. Fix bugs in new features introduced in the first minor version release. + + * e.g. If a new feature was added in ``1.4`` and contains a bug, a fix can be applied in ``1.4.3`` + +2. Fix bugs that used to work in a few minor releases prior. There should be agreement between core team members that a backport is appropriate. + + * e.g. If a feature worked in ``1.2`` and stopped working since ``1.3``, a fix can be applied in ``1.4.3``. + +Since pandas minor releases are based on Github branches (e.g. point release of ``1.4`` are based off the ``1.4.x`` branch), +"backporting" means merging a pull request fix to the ``main`` branch and correct minor branch associated with the next point release. + +By default, if a pull request is assigned to the next point release milestone within the Github interface, +the backporting process should happen automatically by the ``@meeseeksdev`` bot once the pull request is merged. +A new pull request will be made backporting the pull request to the correct version branch. +Sometimes due to merge conflicts, a manual pull request will need to be made addressing the code conflict. + +If the bot does not automatically start the backporting process, you can also write a Github comment in the merged pull request +to trigger the backport:: + + @meeseeksdev backport version-branch + +This will trigger a workflow which will backport a given change to a branch +(e.g. @meeseeksdev backport 1.4.x) + +Cleaning up old issues +---------------------- + +Every open issue in pandas has a cost. Open issues make finding duplicates harder, +and can make it harder to know what needs to be done in pandas. That said, closing +issues isn't a goal on its own. Our goal is to make pandas the best it can be, +and that's best done by ensuring that the quality of our open issues is high. + +Occasionally, bugs are fixed but the issue isn't linked to in the Pull Request. +In these cases, comment that "This has been fixed, but could use a test." and +label the issue as "Good First Issue" and "Needs Test". + +If an older issue doesn't follow our issue template, edit the original post to +include a minimal example, the actual output, and the expected output. Uniformity +in issue reports is valuable. + +If an older issue lacks a reproducible example, label it as "Needs Info" and +ask them to provide one (or write one yourself if possible). If one isn't +provide reasonably soon, close it according to the policies in :ref:`maintaining.closing`. + +Cleaning up old pull requests +----------------------------- + +Occasionally, contributors are unable to finish off a pull request. +If some time has passed (two weeks, say) since the last review requesting changes, +gently ask if they're still interested in working on this. If another two weeks or +so passes with no response, thank them for their work and close the pull request. +Comment on the original issue that "There's a stalled PR at #1234 that may be +helpful.", and perhaps label the issue as "Good first issue" if the PR was relatively +close to being accepted. + +Additionally, core-team members can push to contributors branches. This can be +helpful for pushing an important PR across the line, or for fixing a small +merge conflict. + +Becoming a pandas maintainer +---------------------------- + +The full process is outlined in our `governance documents`_. In summary, +we're happy to give triage permissions to anyone who shows interest by +being helpful on the issue tracker. + +The required steps for adding a maintainer are: + +1. Contact the contributor and ask their interest to join. +2. Add the contributor to the appropriate `Github Team `_ if accepted the invitation. + + * ``pandas-core`` is for core team members + * ``pandas-triage`` is for pandas triage members + +3. Add the contributor to the pandas Google group. +4. Create a pull request to add the contributor's Github handle to ``pandas-dev/pandas/web/pandas/config.yml``. +5. Create a pull request to add the contributor's name/Github handle to the `governance document `_. + +The current list of core-team members is at +https://github.com/pandas-dev/pandas-governance/blob/master/people.md + + +.. _maintaining.merging: + +Merging pull requests +--------------------- + +Only core team members can merge pull requests. We have a few guidelines. + +1. You should typically not self-merge your own pull requests. Exceptions include + things like small changes to fix CI (e.g. pinning a package version). +2. You should not merge pull requests that have an active discussion, or pull + requests that has any ``-1`` votes from a core maintainer. pandas operates + by consensus. +3. For larger changes, it's good to have a +1 from at least two core team members. + +In addition to the items listed in :ref:`maintaining.closing`, you should verify +that the pull request is assigned the correct milestone. + +Pull requests merged with a patch-release milestone will typically be backported +by our bot. Verify that the bot noticed the merge (it will leave a comment within +a minute typically). If a manual backport is needed please do that, and remove +the "Needs backport" label once you've done it manually. If you forget to assign +a milestone before tagging, you can request the bot to backport it with: + +.. code-block:: console + + @Meeseeksdev backport + + +.. _maintaining.asv-machine: + +Benchmark machine +----------------- + +The team currently owns dedicated hardware for hosting a website for pandas' ASV performance benchmark. The results +are published to http://pandas.pydata.org/speed/pandas/ + +Configuration +````````````` + +The machine can be configured with the `Ansible `_ playbook in https://github.com/tomaugspurger/asv-runner. + +Publishing +`````````` + +The results are published to another Github repository, https://github.com/tomaugspurger/asv-collection. +Finally, we have a cron job on our docs server to pull from https://github.com/tomaugspurger/asv-collection, to serve them from ``/speed``. +Ask Tom or Joris for access to the webserver. + +Debugging +````````` + +The benchmarks are scheduled by Airflow. It has a dashboard for viewing and debugging the results. You'll need to setup an SSH tunnel to view them + + ssh -L 8080:localhost:8080 pandas@panda.likescandy.com + + +.. _maintaining.release: + +Release process +--------------- + +The process for releasing a new version of pandas can be found at https://github.com/pandas-dev/pandas-release + +.. _governance documents: https://github.com/pandas-dev/pandas-governance +.. _list of permissions: https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-roles-for-an-organization diff --git a/doc/source/development/policies.rst b/doc/source/development/policies.rst new file mode 100644 index 00000000..d75262c0 --- /dev/null +++ b/doc/source/development/policies.rst @@ -0,0 +1,57 @@ +.. _develop.policies: + +******** +Policies +******** + +.. _policies.version: + +Version policy +~~~~~~~~~~~~~~ + +.. versionchanged:: 1.0.0 + +pandas uses a loose variant of semantic versioning (`SemVer`_) to govern +deprecations, API compatibility, and version numbering. + +A pandas release number is made up of ``MAJOR.MINOR.PATCH``. + +API breaking changes should only occur in **major** releases. These changes +will be documented, with clear guidance on what is changing, why it's changing, +and how to migrate existing code to the new behavior. + +Whenever possible, a deprecation path will be provided rather than an outright +breaking change. + +pandas will introduce deprecations in **minor** releases. These deprecations +will preserve the existing behavior while emitting a warning that provide +guidance on: + +* How to achieve similar behavior if an alternative is available +* The pandas version in which the deprecation will be enforced. + +We will not introduce new deprecations in patch releases. + +Deprecations will only be enforced in **major** releases. For example, if a +behavior is deprecated in pandas 1.2.0, it will continue to work, with a +warning, for all releases in the 1.x series. The behavior will change and the +deprecation removed in the next major release (2.0.0). + +.. note:: + + pandas will sometimes make *behavior changing* bug fixes, as part of + minor or patch releases. Whether or not a change is a bug fix or an + API-breaking change is a judgement call. We'll do our best, and we + invite you to participate in development discussion on the issue + tracker or mailing list. + +These policies do not apply to features marked as **experimental** in the documentation. +pandas may change the behavior of experimental features at any time. + +Python support +~~~~~~~~~~~~~~ + +pandas mirrors the `NumPy guidelines for Python support `__. + + +.. _SemVer: https://semver.org diff --git a/doc/source/development/roadmap.rst b/doc/source/development/roadmap.rst new file mode 100644 index 00000000..f935c27d --- /dev/null +++ b/doc/source/development/roadmap.rst @@ -0,0 +1,250 @@ +.. _roadmap: + +======= +Roadmap +======= + +This page provides an overview of the major themes in pandas' development. Each of +these items requires a relatively large amount of effort to implement. These may +be achieved more quickly with dedicated funding or interest from contributors. + +An item being on the roadmap does not mean that it will *necessarily* happen, even +with unlimited funding. During the implementation period we may discover issues +preventing the adoption of the feature. + +Additionally, an item *not* being on the roadmap does not exclude it from inclusion +in pandas. The roadmap is intended for larger, fundamental changes to the project that +are likely to take months or years of developer time. Smaller-scoped items will continue +to be tracked on our `issue tracker `__. + +See :ref:`roadmap.evolution` for proposing changes to this document. + +Extensibility +------------- + +pandas :ref:`extending.extension-types` allow for extending NumPy types with custom +data types and array storage. pandas uses extension types internally, and provides +an interface for 3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy array. +These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the library, +making their behavior more consistent with the handling of NumPy arrays. We'll do this +by cleaning up pandas' internals and adding new methods to the extension array interface. + +String data type +---------------- + +Currently, pandas stores text data in an ``object`` -dtype NumPy array. +The current implementation has two primary drawbacks: First, ``object`` -dtype +is not specific to strings: any Python object can be stored in an ``object`` -dtype +array, not just strings. Second: this is not efficient. The NumPy memory model +isn't especially well-suited to variable width text data. + +To solve the first issue, we propose a new extension type for string data. This +will initially be opt-in, with users explicitly requesting ``dtype="string"``. +The array backing this string dtype may initially be the current implementation: +an ``object`` -dtype NumPy array of Python strings. + +To solve the second issue (performance), we'll explore alternative in-memory +array libraries (for example, Apache Arrow). As part of the work, we may +need to implement certain operations expected by pandas users (for example +the algorithm used in, ``Series.str.upper``). That work may be done outside of +pandas. + +Consistent missing value handling +--------------------------------- + +Currently, pandas handles missing data differently for different data types. We +use different types to indicate that a value is missing (``np.nan`` for +floating-point data, ``np.nan`` or ``None`` for object-dtype data -- typically +strings or booleans -- with missing values, and ``pd.NaT`` for datetimelike +data). Integer data cannot store missing data or are cast to float. In addition, +pandas 1.0 introduced a new missing value sentinel, ``pd.NA``, which is being +used for the experimental nullable integer, boolean, and string data types. + +These different missing values have different behaviors in user-facing +operations. Specifically, we introduced different semantics for the nullable +data types for certain operations (e.g. propagating in comparison operations +instead of comparing as False). + +Long term, we want to introduce consistent missing data handling for all data +types. This includes consistent behavior in all operations (indexing, arithmetic +operations, comparisons, etc.). There has been discussion of eventually making +the new semantics the default. + +This has been discussed at :issue:`28095` (and +linked issues), and described in more detail in this +`design doc `__. + +Apache Arrow interoperability +----------------------------- + +`Apache Arrow `__ is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned with +typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and data types +within pandas. This will let us take advantage of its I/O capabilities and +provide for better interoperability with other languages and libraries +using Arrow. + +Block manager rewrite +--------------------- + +We'd like to replace pandas current internal data structures (a collection of +1 or 2-D arrays) with a simpler collection of 1-D arrays. + +pandas internal data model is quite complex. A DataFrame is made up of +one or more 2-dimensional "blocks", with one or more blocks per dtype. This +collection of 2-D arrays is managed by the BlockManager. + +The primary benefit of the BlockManager is improved performance on certain +operations (construction from a 2D array, binary operations, reductions across the columns), +especially for wide DataFrames. However, the BlockManager substantially increases the +complexity and maintenance burden of pandas. + +By replacing the BlockManager we hope to achieve + +* Substantially simpler code +* Easier extensibility with new logical types +* Better user control over memory use and layout +* Improved micro-performance +* Option to provide a C / Cython API to pandas' internals + +See `these design documents `__ +for more. + +Decoupling of indexing and internals +------------------------------------ + +The code for getting and setting values in pandas' data structures needs refactoring. +In particular, we must clearly separate code that converts keys (e.g., the argument +to ``DataFrame.loc``) to positions from code that uses these positions to get +or set values. This is related to the proposed BlockManager rewrite. Currently, the +BlockManager sometimes uses label-based, rather than position-based, indexing. +We propose that it should only work with positional indexing, and the translation of keys +to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. The following principles should inspire refactoring of indexing code and +should result on cleaner, simpler, and more performant code. + +1. **Label indexing must never involve looking in an axis twice for the same label(s).** +This implies that any validation step must either: + + * limit validation to general features (e.g. dtype/structure of the key/index), or + * reuse the result for the actual indexing. + +2. **Indexers must never rely on an explicit call to other indexers.** +For instance, it is OK to have some internal method of ``.loc`` call some +internal method of ``__getitem__`` (or of their common base class), +but never in the code flow of ``.loc`` should ``the_obj[something]`` appear. + +3. **Execution of positional indexing must never involve labels** (as currently, sadly, happens). +That is, the code flow of a getter call (or a setter call in which the right hand side is non-indexed) +to ``.iloc`` should never involve the axes of the object in any way. + +4. **Indexing must never involve accessing/modifying values** (i.e., act on ``._data`` or ``.values``) **more than once.** +The following steps must hence be clearly decoupled: + + * find positions we need to access/modify on each axis + * (if we are accessing) derive the type of object we need to return (dimensionality) + * actually access/modify the values + * (if we are accessing) construct the return object + +5. As a corollary to the decoupling between 4.i and 4.iii, **any code which deals on how data is stored** +(including any combination of handling multiple dtypes, and sparse storage, categoricals, third-party types) +**must be independent from code that deals with identifying affected rows/columns**, +and take place only once step 4.i is completed. + + * In particular, such code should most probably not live in ``pandas/core/indexing.py`` + * ... and must not depend in any way on the type(s) of axes (e.g. no ``MultiIndex`` special cases) + +6. As a corollary to point 1.i, **``Index`` (sub)classes must provide separate methods for any desired validity check of label(s) which does not involve actual lookup**, +on the one side, and for any required conversion/adaptation/lookup of label(s), on the other. + +7. **Use of trial and error should be limited**, and anyway restricted to catch only exceptions +which are actually expected (typically ``KeyError``). + + * In particular, code should never (intentionally) raise new exceptions in the ``except`` portion of a ``try... exception`` + +8. **Any code portion which is not specific to setters and getters must be shared**, +and when small differences in behavior are expected (e.g. getting with ``.loc`` raises for +missing labels, setting still doesn't), they can be managed with a specific parameter. + +Numba-accelerated operations +---------------------------- + +`Numba `__ is a JIT compiler for Python code. We'd like to provide +ways for users to apply their own Numba-jitted functions where pandas accepts user-defined functions +(for example, :meth:`Series.apply`, :meth:`DataFrame.apply`, :meth:`DataFrame.applymap`, +and in groupby and window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled code. + +Performance monitoring +---------------------- + +pandas uses `airspeed velocity `__ to +monitor for performance regressions. ASV itself is a fabulous tool, but requires +some additional work to be integrated into an open source project's workflow. + +The `asv-runner `__ organization, currently made up +of pandas maintainers, provides tools built on top of ASV. We have a physical +machine for running a number of project's benchmarks, and tools managing the +benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +* Be more stable. Currently, they're maintained on the nights and weekends when + a maintainer has free time. +* Tune the system for benchmarks to improve stability, following + https://pyperf.readthedocs.io/en/latest/system.html +* Build a GitHub bot to request ASV runs *before* a PR is merged. Currently, the + benchmarks are only run nightly. + +.. _roadmap.evolution: + +Roadmap evolution +----------------- + +pandas continues to evolve. The direction is primarily determined by community +interest. Everyone is welcome to review existing items on the roadmap and +to propose a new item. + +Each item on the roadmap should be a short summary of a larger design proposal. +The proposal should include + +1. Short summary of the changes, which would be appropriate for inclusion in + the roadmap if accepted. +2. Motivation for the changes. +3. An explanation of why the change is in scope for pandas. +4. Detailed design: Preferably with example-usage (even if not implemented yet) + and API documentation +5. API Change: Any API changes that may result from the proposal. + +That proposal may then be submitted as a GitHub issue, where the pandas maintainers +can review and comment on the design. The `pandas mailing list `__ +should be notified of the proposal. + +When there's agreement that an implementation +would be welcome, the roadmap should be updated to include the summary and a +link to the discussion issue. + +Completed items +--------------- + +This section records now completed items from the pandas roadmap. + +Documentation improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We improved the pandas documentation + +* The pandas community worked with others to build the `pydata-sphinx-theme`_, + which is now used for https://pandas.pydata.org/docs/ (:issue:`15556`). +* :ref:`getting_started` contains a number of resources intended for new + pandas users coming from a variety of backgrounds (:issue:`26831`). + +.. _pydata-sphinx-theme: https://github.com/pydata/pydata-sphinx-theme diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst new file mode 100644 index 00000000..166162a4 --- /dev/null +++ b/doc/source/ecosystem.rst @@ -0,0 +1,602 @@ +:orphan: + +.. _ecosystem: + +{{ header }} + +**************** +pandas ecosystem +**************** + +Increasingly, packages are being built on top of pandas to address specific needs +in data preparation, analysis and visualization. +This is encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for developers to +build powerful and more focused data tools. +The creation of libraries that complement pandas' functionality also allows pandas +development to remain focused around it's original requirements. + +This is an inexhaustive list of projects that build on pandas in order to provide +tools in the PyData space. For a list of projects that depend on pandas, +see the +`Github network dependents for pandas `_ +or `search pypi for pandas `_. + +We'd like to make it easier for users to find these projects, if you know of other +substantial projects that you feel should be on this list, please let us know. + +.. _ecosystem.data_cleaning_and_validation: + +Data cleaning and validation +---------------------------- + +`Pyjanitor `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +`Pandera `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandera provides a flexible and expressive API for performing data validation on dataframes +to make data processing pipelines more readable and robust. +Dataframes contain information that pandera explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. + +`pandas-path `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since Python 3.4, `pathlib `_ has been +included in the Python standard library. Path objects provide a simple +and delightful way to interact with the file system. The pandas-path package enables the +Path API for pandas through a custom accessor ``.path``. Getting just the filenames from +a series of full file paths is as simple as ``my_files.path.name``. Other convenient operations like +joining paths, replacing file extensions, and checking if files exist are also available. + +.. _ecosystem.stats: + +Statistics and machine learning +------------------------------- + +`pandas-tfrecords `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas. + +`Statsmodels `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Statsmodels is the prominent Python "statistics and econometrics library" and it has +a long-standing special relationship with pandas. Statsmodels provides powerful statistics, +econometrics, analysis and modeling functionality that is out of pandas' scope. +Statsmodels leverages pandas objects as the underlying data container for computation. + +`sklearn-pandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use pandas DataFrames in your `scikit-learn `__ +ML pipeline. + +`Featuretools `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community. + +`Compose `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Compose is a machine learning tool for labeling data and prediction engineering. It allows you to structure the labeling process by parameterizing prediction problems and transforming time-driven relational data into target values with cutoff times that can be used for supervised learning. + +`STUMPY `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +STUMPY is a powerful and scalable Python library for modern time series analysis. +At its core, STUMPY efficiently computes something called a +`matrix profile `__, +which can be used for a wide variety of time series data mining tasks. + +.. _ecosystem.visualization: + +Visualization +------------- + +`Pandas has its own Styler class for table visualization `_, and while +:ref:`pandas also has built-in support for data visualization through charts with matplotlib `, +there are a number of other pandas-compatible libraries. + +`Altair `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with pandas DataFrames. + + +`Bokeh `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Bokeh is a Python interactive visualization library for large datasets that natively uses +the latest web technologies. Its goal is to provide elegant, concise construction of novel +graphics in the style of Protovis/D3, while delivering high-performance interactivity over +large data to thin clients. + +`Pandas-Bokeh `__ provides a high level API +for Bokeh that can be loaded as a native pandas plotting backend via + +.. code:: python + + pd.set_option("plotting.backend", "pandas_bokeh") + +It is very similar to the matplotlib plotting backend, but provides interactive +web-based charts and maps. + + +`Seaborn `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Seaborn is a Python visualization library based on +`matplotlib `__. It provides a high-level, dataset-oriented +interface for creating attractive statistical graphics. The plotting functions +in seaborn understand pandas objects and leverage pandas grouping operations +internally to support concise specification of complex visualizations. Seaborn +also goes beyond matplotlib and pandas with the option to perform statistical +estimation while plotting, aggregating across observations and visualizing the +fit of statistical models to emphasize patterns in a dataset. + +`plotnine `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Hadley Wickham's `ggplot2 `__ is a foundational exploratory visualization package for the R language. +Based on `"The Grammar of Graphics" `__ it +provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is `has2k1/plotnine `__. + +`IPython vega `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`IPython Vega `__ leverages `Vega +`__ to create plots within Jupyter Notebook. + +`Plotly `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline `__, or `on-premise `__ accounts for private use. + +`Lux `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Lux `__ is a Python library that facilitates fast and easy experimentation with data by automating the visual data exploration process. To use Lux, simply add an extra import alongside pandas: + +.. code:: python + + import lux + import pandas as pd + + df = pd.read_csv("data.csv") + df # discover interesting insights! + +By printing out a dataframe, Lux automatically `recommends a set of visualizations `__ that highlights interesting trends and patterns in the dataframe. Users can leverage any existing pandas commands without modifying their code, while being able to visualize their pandas data structures (e.g., DataFrame, Series, Index) at the same time. Lux also offers a `powerful, intuitive language `__ that allow users to create `Altair `__, `matplotlib `__, or `Vega-Lite `__ visualizations without having to think at the level of code. + +`Qtpandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spun off from the main pandas library, the `qtpandas `__ +library enables DataFrame visualization and manipulation in PyQt4 and PySide applications. + +`D-Tale `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +D-Tale is a lightweight web client for visualizing pandas data structures. It +provides a rich spreadsheet-style grid which acts as a wrapper for a lot of +pandas functionality (query, sort, describe, corr...) so users can quickly +manipulate their data. There is also an interactive chart-builder using Plotly +Dash allowing users to build nice portable visualizations. D-Tale can be +invoked with the following command + +.. code:: python + + import dtale + + dtale.show(df) + +D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle +& Google Colab. Here are some demos of the `grid `__. + +`hvplot `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +hvPlot is a high-level plotting API for the PyData ecosystem built on `HoloViews `__. +It can be loaded as a native pandas plotting backend via + +.. code:: python + + pd.set_option("plotting.backend", "hvplot") + +.. _ecosystem.ide: + +IDE +--- + +`IPython `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with pandas methods and also +attributes like DataFrame columns. + +`Jupyter Notebook / Jupyter Lab `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Jupyter Notebook is a web application for creating Jupyter notebooks. +A Jupyter notebook is a JSON document containing an ordered list +of input/output cells which can contain code, text, mathematics, plots +and rich media. +Jupyter notebooks can be converted to a number of open standard output formats +(HTML, HTML presentation slides, LaTeX, PDF, ReStructuredText, Markdown, +Python) through 'Download As' in the web interface and ``jupyter convert`` +in a shell. + +pandas DataFrames implement ``_repr_html_`` and ``_repr_latex`` methods +which are utilized by Jupyter Notebook for displaying +(abbreviated) HTML or LaTeX tables. LaTeX output is properly escaped. +(Note: HTML tables may or may not be +compatible with non-HTML Jupyter output formats.) + +See :ref:`Options and Settings ` and +:ref:`Available Options ` +for pandas ``display.`` settings. + +`Quantopian/qgrid `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +qgrid is "an interactive grid for sorting and filtering +DataFrames in IPython Notebook" built with SlickGrid. + +`Spyder `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spyder is a cross-platform PyQt-based IDE combining the editing, analysis, +debugging and profiling functionality of a software development tool with the +data exploration, interactive execution, deep inspection and rich visualization +capabilities of a scientific environment like MATLAB or Rstudio. + +Its `Variable Explorer `__ +allows users to view, manipulate and edit pandas ``Index``, ``Series``, +and ``DataFrame`` objects like a "spreadsheet", including copying and modifying +values, sorting, displaying a "heatmap", converting data types and more. +pandas objects can also be renamed, duplicated, new columns added, +copied/pasted to/from the clipboard (as TSV), and saved/loaded to/from a file. +Spyder can also import data from a variety of plain text and binary files +or the clipboard into a new pandas DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's `Editor `__ and +`IPython Console `__, +and Spyder's `Help pane `__ can retrieve +and render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + + +.. _ecosystem.api: + +API +--- + +`pandas-datareader `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``pandas-datareader`` is a remote data access library for pandas (PyPI:``pandas-datareader``). +It is based on functionality that was located in ``pandas.io.data`` and ``pandas.io.wb`` but was +split off in v0.19. +See more in the `pandas-datareader docs `_: + +The following data feeds are available: + + * Google Finance + * Tiingo + * Morningstar + * IEX + * Robinhood + * Enigma + * Quandl + * FRED + * Fama/French + * World Bank + * OECD + * Eurostat + * TSP Fund Data + * Nasdaq Trader Symbol Definitions + * Stooq Index Data + * MOEX Data + +`Quandl/Python `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Quandl API for Python wraps the Quandl REST API to return +pandas DataFrames with timeseries indexes. + +`Pydatastream `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +PyDatastream is a Python interface to the +`Refinitiv Datastream (DWS) `__ +REST API to return indexed pandas DataFrames with financial data. +This package requires valid credentials for this API (non free). + +`pandaSDMX `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +pandaSDMX is a library to retrieve and acquire statistical data +and metadata disseminated in +`SDMX `_ 2.1, an ISO-standard +widely used by institutions such as statistics offices, central banks, +and international organisations. pandaSDMX can expose datasets and related +structural metadata including data flows, code-lists, +and data structure definitions as pandas Series +or MultiIndexed DataFrames. + +`fredapi `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ +provided by the Federal Reserve Bank of St. Louis. It works with both the FRED database and ALFRED database that +contains point-in-time data (i.e. historic data revisions). fredapi provides a wrapper in Python to the FRED +HTTP API, and also provides several convenient methods for parsing and analyzing point-in-time data from ALFRED. +fredapi makes use of pandas and returns data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +`dataframe_sql `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``dataframe_sql`` is a Python package that translates SQL syntax directly into +operations on pandas DataFrames. This is useful when migrating from a database to +using pandas or for users more comfortable with SQL looking for a way to interface +with pandas. + + +.. _ecosystem.domain: + +Domain specific +--------------- + +`Geopandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Geopandas extends pandas data objects to include geographic information which support +geometric operations. If your work entails maps and geographical coordinates, and +you love pandas, you should take a close look at Geopandas. + +`staircase `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +staircase is a data analysis package, built upon pandas and numpy, for modelling and +manipulation of mathematical step functions. It provides a rich variety of arithmetic +operations, relational operations, logical operations, statistical operations and +aggregations for step functions defined over real numbers, datetime and timedelta domains. + + +`xarray `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +xarray brings the labeled data power of pandas to the physical sciences by +providing N-dimensional variants of the core pandas data structures. It aims to +provide a pandas-like and pandas-compatible toolkit for analytics on multi- +dimensional arrays, rather than the tabular data for which pandas excels. + + +.. _ecosystem.io: + +IO +-- + +`BCPandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +BCPandas provides high performance writes from pandas to Microsoft SQL Server, +far exceeding the performance of the native ``df.to_sql`` method. Internally, it uses +Microsoft's BCP utility, but the complexity is fully abstracted away from the end user. +Rigorously tested, it is a complete replacement for ``df.to_sql``. + +`Deltalake `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Deltalake python package lets you access tables stored in +`Delta Lake `__ natively in Python without the need to use Spark or +JVM. It provides the ``delta_table.to_pyarrow_table().to_pandas()`` method to convert +any Delta table into Pandas dataframe. + + +.. _ecosystem.out-of-core: + +Out-of-core +----------- + +`Blaze `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Blaze provides a standard API for doing computations with various +in-memory and on-disk backends: NumPy, pandas, SQLAlchemy, MongoDB, PyTables, +PySpark. + +`Cylon `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cylon is a fast, scalable, distributed memory parallel runtime with a pandas +like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache +Arrow format to represent the data in-memory. Cylon DataFrame API implements +most of the core operators of pandas such as merge, filter, join, concat, +group-by, drop_duplicates, etc. These operators are designed to work across +thousands of cores to scale applications. It can interoperate with pandas +DataFrame by reading data from pandas or converting data to pandas so users +can selectively scale parts of their pandas DataFrame applications. + +.. code:: python + + from pycylon import read_csv, DataFrame, CylonEnv + from pycylon.net import MPIConfig + + # Initialize Cylon distributed environment + config: MPIConfig = MPIConfig() + env: CylonEnv = CylonEnv(config=config, distributed=True) + + df1: DataFrame = read_csv('/tmp/csv1.csv') + df2: DataFrame = read_csv('/tmp/csv2.csv') + + # Using 1000s of cores across the cluster to compute the join + df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env) + + print(df3) + +`Dask `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar ``DataFrame`` interface for out-of-core, parallel and distributed computing. + +`Dask-ML `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow. + +`Ibis `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + + +`Koalas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Koalas provides a familiar pandas DataFrame interface on top of Apache Spark. It enables users to leverage multi-cores on one machine or a cluster of machines to speed up or scale their DataFrame code. + +`Modin `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``modin.pandas`` DataFrame is a parallel and distributed drop-in replacement +for pandas. This means that you can use Modin with existing pandas code or write +new code with the existing pandas API. Modin can leverage your entire machine or +cluster to speed up and scale your pandas workloads, including traditionally +time-consuming tasks like ingesting data (``read_csv``, ``read_excel``, +``read_parquet``, etc.). + +.. code:: python + + # import pandas as pd + import modin.pandas as pd + + df = pd.read_csv("big.csv") # use all your cores! + +`Odo `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Odo provides a uniform API for moving data between different formats. It uses +pandas own ``read_csv`` for CSV IO and leverages many existing packages such as +PyTables, h5py, and pymongo to move data between non pandas formats. Its graph +based approach is also extensible by end users for custom formats that may be +too specific for the core of odo. + +`Pandarallel `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +If also displays progress bars. + +.. code:: python + + from pandarallel import pandarallel + + pandarallel.initialize(progress_bar=True) + + # df.apply(func) + df.parallel_apply(func) + + +`Vaex `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Increasingly, packages are being built on top of pandas to address specific needs in data preparation, analysis and visualization. Vaex is a Python library for Out-of-Core DataFrames (similar to pandas), to visualize and explore big tabular datasets. It can calculate statistics such as mean, sum, count, standard deviation etc, on an N-dimensional grid up to a billion (10\ :sup:`9`) objects/rows per second. Visualization is done using histograms, density plots and 3d volume rendering, allowing interactive exploration of big data. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted). + + * vaex.from_pandas + * vaex.to_pandas_df + +.. _ecosystem.extensions: + +Extension data types +-------------------- + +pandas provides an interface for defining +:ref:`extension types ` to extend NumPy's type +system. The following libraries implement that interface to provide types not +found in NumPy or pandas, which work well with pandas' data containers. + +`Cyberpandas`_ +~~~~~~~~~~~~~~ + +Cyberpandas provides an extension type for storing arrays of IP Addresses. These +arrays can be stored inside pandas' Series and DataFrame. + +`Pandas-Genomics`_ +~~~~~~~~~~~~~~~~~~ + +Pandas-Genomics provides extension types, extension arrays, and extension accessors for working with genomics data + +`Pint-Pandas`_ +~~~~~~~~~~~~~~ + +`Pint-Pandas `_ provides an extension type for +storing numeric arrays with units. These arrays can be stored inside pandas' +Series and DataFrame. Operations between Series and DataFrame columns which +use pint's extension array are then units aware. + +`Text Extensions for Pandas`_ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Text Extensions for Pandas `_ +provides extension types to cover common data structures for representing natural language +data, plus library integrations that convert the outputs of popular natural language +processing libraries into Pandas DataFrames. + +.. _ecosystem.accessors: + +Accessors +--------- + +A directory of projects providing +:ref:`extension accessors `. This is for users to +discover new accessors and for library authors to coordinate on the namespace. + +================== ============ ==================================== =============================================================================== +Library Accessor Classes Description +================== ============ ==================================== =============================================================================== +`cyberpandas`_ ``ip`` ``Series`` Provides common operations for working with IP addresses. +`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` Provides plotting functions from the Altair_ library. +`pandas-genomics`_ ``genomics`` ``Series``, ``DataFrame`` Provides common operations for quality control and analysis of genomics data. +`pandas_path`_ ``path`` ``Index``, ``Series`` Provides `pathlib.Path`_ functions for Series. +`pint-pandas`_ ``pint`` ``Series``, ``DataFrame`` Provides units support for numeric Series and DataFrames. +`composeml`_ ``slice`` ``DataFrame`` Provides a generator for enhanced data slicing. +`datatest`_ ``validate`` ``Series``, ``DataFrame``, ``Index`` Provides validation, differences, and acceptance managers. +`woodwork`_ ``ww`` ``Series``, ``DataFrame`` Provides physical, logical, and semantic data typing information for Series and DataFrames. +`staircase`_ ``sc`` ``Series`` Provides methods for querying, aggregating and plotting step functions +================== ============ ==================================== =============================================================================== + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest +.. _pdvega: https://altair-viz.github.io/pdvega/ +.. _Altair: https://altair-viz.github.io/ +.. _pandas-genomics: https://pandas-genomics.readthedocs.io/en/latest/ +.. _pandas_path: https://github.com/drivendataorg/pandas-path/ +.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html +.. _pint-pandas: https://github.com/hgrecco/pint-pandas +.. _composeml: https://github.com/alteryx/compose +.. _datatest: https://datatest.readthedocs.io/en/stable/ +.. _woodwork: https://github.com/alteryx/woodwork +.. _staircase: https://www.staircase.dev/ + +Development tools +----------------- + +`pandas-stubs `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +While pandas repository is partially typed, the package itself doesn't expose this information for external use. +Install pandas-stubs to enable basic type coverage of pandas API. + +Learn more by reading through :issue:`14468`, :issue:`26766`, :issue:`28142`. + +See installation and usage instructions on the `github page `__. diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst new file mode 100644 index 00000000..f91f4218 --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_r.rst @@ -0,0 +1,581 @@ +.. _compare_with_r: + +{{ header }} + +Comparison with R / R libraries +******************************* + +Since pandas aims to provide a lot of the data manipulation and analysis +functionality that people use `R `__ for, this page +was started to provide a more detailed look at the `R language +`__ and its many third +party libraries as they relate to pandas. In comparisons with R and CRAN +libraries, we care about the following things: + +* **Functionality / flexibility**: what can/cannot be done with each tool +* **Performance**: how fast are operations. Hard numbers/benchmarks are + preferable +* **Ease-of-use**: Is one tool easier/harder to use (you may have to be + the judge of this, given side-by-side code comparisons) + +This page is also here to offer a bit of a translation guide for users of these +R packages. + +For transfer of ``DataFrame`` objects from pandas to R, one option is to +use HDF5 files, see :ref:`io.external_compatibility` for an +example. + + +Quick reference +--------------- + +We'll start off with a quick reference guide pairing some common R +operations using `dplyr +`__ with +pandas equivalents. + + +Querying, filtering, sampling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``dim(df)`` ``df.shape`` +``head(df)`` ``df.head()`` +``slice(df, 1:10)`` ``df.iloc[:9]`` +``filter(df, col1 == 1, col2 == 1)`` ``df.query('col1 == 1 & col2 == 1')`` +``df[df$col1 == 1 & df$col2 == 1,]`` ``df[(df.col1 == 1) & (df.col2 == 1)]`` +``select(df, col1, col2)`` ``df[['col1', 'col2']]`` +``select(df, col1:col3)`` ``df.loc[:, 'col1':'col3']`` +``select(df, -(col1:col3))`` ``df.drop(cols_to_drop, axis=1)`` but see [#select_range]_ +``distinct(select(df, col1))`` ``df[['col1']].drop_duplicates()`` +``distinct(select(df, col1, col2))`` ``df[['col1', 'col2']].drop_duplicates()`` +``sample_n(df, 10)`` ``df.sample(n=10)`` +``sample_frac(df, 0.01)`` ``df.sample(frac=0.01)`` +=========================================== =========================================== + +.. [#select_range] R's shorthand for a subrange of columns + (``select(df, col1:col3)``) can be approached + cleanly in pandas, if you have the list of columns, + for example ``df[cols[1:3]]`` or + ``df.drop(cols[1:3])``, but doing this by column + name is a bit messy. + + +Sorting +~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``arrange(df, col1, col2)`` ``df.sort_values(['col1', 'col2'])`` +``arrange(df, desc(col1))`` ``df.sort_values('col1', ascending=False)`` +=========================================== =========================================== + +Transforming +~~~~~~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``select(df, col_one = col1)`` ``df.rename(columns={'col1': 'col_one'})['col_one']`` +``rename(df, col_one = col1)`` ``df.rename(columns={'col1': 'col_one'})`` +``mutate(df, c=a-b)`` ``df.assign(c=df['a']-df['b'])`` +=========================================== =========================================== + + +Grouping and summarizing +~~~~~~~~~~~~~~~~~~~~~~~~ + +============================================== =========================================== +R pandas +============================================== =========================================== +``summary(df)`` ``df.describe()`` +``gdf <- group_by(df, col1)`` ``gdf = df.groupby('col1')`` +``summarise(gdf, avg=mean(col1, na.rm=TRUE))`` ``df.groupby('col1').agg({'col1': 'mean'})`` +``summarise(gdf, total=sum(col1))`` ``df.groupby('col1').sum()`` +============================================== =========================================== + + +Base R +------ + +Slicing with R's |c|_ +~~~~~~~~~~~~~~~~~~~~~ + +R makes it easy to access ``data.frame`` columns by name + +.. code-block:: r + + df <- data.frame(a=rnorm(5), b=rnorm(5), c=rnorm(5), d=rnorm(5), e=rnorm(5)) + df[, c("a", "c", "e")] + +or by integer location + +.. code-block:: r + + df <- data.frame(matrix(rnorm(1000), ncol=100)) + df[, c(1:10, 25:30, 40, 50:100)] + +Selecting multiple columns by name in pandas is straightforward + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc")) + df[["a", "c"]] + df.loc[:, ["a", "c"]] + +Selecting multiple noncontiguous columns by integer location can be achieved +with a combination of the ``iloc`` indexer attribute and ``numpy.r_``. + +.. ipython:: python + + named = list("abcdefg") + n = 30 + columns = named + np.arange(len(named), n).tolist() + df = pd.DataFrame(np.random.randn(n, n), columns=columns) + + df.iloc[:, np.r_[:10, 24:30]] + +|aggregate|_ +~~~~~~~~~~~~ + +In R you may want to split data into subsets and compute the mean for each. +Using a data.frame called ``df`` and splitting it into groups ``by1`` and +``by2``: + +.. code-block:: r + + df <- data.frame( + v1 = c(1,3,5,7,8,3,5,NA,4,5,7,9), + v2 = c(11,33,55,77,88,33,55,NA,44,55,77,99), + by1 = c("red", "blue", 1, 2, NA, "big", 1, 2, "red", 1, NA, 12), + by2 = c("wet", "dry", 99, 95, NA, "damp", 95, 99, "red", 99, NA, NA)) + aggregate(x=df[, c("v1", "v2")], by=list(mydf2$by1, mydf2$by2), FUN = mean) + +The :meth:`~pandas.DataFrame.groupby` method is similar to base R ``aggregate`` +function. + +.. ipython:: python + + df = pd.DataFrame( + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + g[["v1", "v2"]].mean() + +For more details and examples see :ref:`the groupby documentation +`. + +|match|_ +~~~~~~~~~~~~ + +A common way to select data in R is using ``%in%`` which is defined using the +function ``match``. The operator ``%in%`` is used to return a logical vector +indicating if there is a match or not: + +.. code-block:: r + + s <- 0:4 + s %in% c(2,4) + +The :meth:`~pandas.DataFrame.isin` method is similar to R ``%in%`` operator: + +.. ipython:: python + + s = pd.Series(np.arange(5), dtype=np.float32) + s.isin([2, 4]) + +The ``match`` function returns a vector of the positions of matches +of its first argument in its second: + +.. code-block:: r + + s <- 0:4 + match(s, c(2,4)) + +For more details and examples see :ref:`the reshaping documentation +`. + +|tapply|_ +~~~~~~~~~ + +``tapply`` is similar to ``aggregate``, but data can be in a ragged array, +since the subclass sizes are possibly irregular. Using a data.frame called +``baseball``, and retrieving information based on the array ``team``: + +.. code-block:: r + + baseball <- + data.frame(team = gl(5, 5, + labels = paste("Team", LETTERS[1:5])), + player = sample(letters, 25), + batting.average = runif(25, .200, .400)) + + tapply(baseball$batting.average, baseball.example$team, + max) + +In pandas we may use :meth:`~pandas.pivot_table` method to handle this: + +.. ipython:: python + + import random + import string + + baseball = pd.DataFrame( + { + "team": ["team %d" % (x + 1) for x in range(5)] * 5, + "player": random.sample(list(string.ascii_lowercase), 25), + "batting avg": np.random.uniform(0.200, 0.400, 25), + } + ) + + baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max) + +For more details and examples see :ref:`the reshaping documentation +`. + +|subset|_ +~~~~~~~~~~ + +The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` +function. In R you might want to get the rows of a ``data.frame`` where one +column's values are less than another column's values: + +.. code-block:: r + + df <- data.frame(a=rnorm(10), b=rnorm(10)) + subset(df, a <= b) + df[df$a <= df$b,] # note the comma + +In pandas, there are a few ways to perform subsetting. You can use +:meth:`~pandas.DataFrame.query` or pass an expression as if it were an +index/slice as well as standard boolean indexing: + +.. ipython:: python + + df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.query("a <= b") + df[df["a"] <= df["b"]] + df.loc[df["a"] <= df["b"]] + +For more details and examples see :ref:`the query documentation +`. + + +|with|_ +~~~~~~~~ + +An expression using a data.frame called ``df`` in R with the columns ``a`` and +``b`` would be evaluated using ``with`` like so: + +.. code-block:: r + + df <- data.frame(a=rnorm(10), b=rnorm(10)) + with(df, a + b) + df$a + df$b # same as the previous expression + +In pandas the equivalent expression, using the +:meth:`~pandas.DataFrame.eval` method, would be: + +.. ipython:: python + + df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval("a + b") + df["a"] + df["b"] # same as the previous expression + +In certain cases :meth:`~pandas.DataFrame.eval` will be much faster than +evaluation in pure Python. For more details and examples see :ref:`the eval +documentation `. + +plyr +---- + +``plyr`` is an R library for the split-apply-combine strategy for data +analysis. The functions revolve around three data structures in R, ``a`` +for ``arrays``, ``l`` for ``lists``, and ``d`` for ``data.frame``. The +table below shows how these data structures could be mapped in Python. + ++------------+-------------------------------+ +| R | Python | ++============+===============================+ +| array | list | ++------------+-------------------------------+ +| lists | dictionary or list of objects | ++------------+-------------------------------+ +| data.frame | dataframe | ++------------+-------------------------------+ + +ddply +~~~~~ + +An expression using a data.frame called ``df`` in R where you want to +summarize ``x`` by ``month``: + +.. code-block:: r + + require(plyr) + df <- data.frame( + x = runif(120, 1, 168), + y = runif(120, 7, 334), + z = runif(120, 1.7, 20.7), + month = rep(c(5,6,7,8),30), + week = sample(1:4, 120, TRUE) + ) + + ddply(df, .(month, week), summarize, + mean = round(mean(x), 2), + sd = round(sd(x), 2)) + +In pandas the equivalent expression, using the +:meth:`~pandas.DataFrame.groupby` method, would be: + +.. ipython:: python + + df = pd.DataFrame( + { + "x": np.random.uniform(1.0, 168.0, 120), + "y": np.random.uniform(7.0, 334.0, 120), + "z": np.random.uniform(1.7, 20.7, 120), + "month": [5, 6, 7, 8] * 30, + "week": np.random.randint(1, 4, 120), + } + ) + + grouped = df.groupby(["month", "week"]) + grouped["x"].agg([np.mean, np.std]) + + +For more details and examples see :ref:`the groupby documentation +`. + +reshape / reshape2 +------------------ + +meltarray +~~~~~~~~~ + +An expression using a 3 dimensional array called ``a`` in R where you want to +melt it into a data.frame: + +.. code-block:: r + + a <- array(c(1:23, NA), c(2,3,4)) + data.frame(melt(a)) + +In Python, since ``a`` is a list, you can simply use list comprehension. + +.. ipython:: python + + a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4) + pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)]) + +meltlist +~~~~~~~~ + +An expression using a list called ``a`` in R where you want to melt it +into a data.frame: + +.. code-block:: r + + a <- as.list(c(1:4, NA)) + data.frame(melt(a)) + +In Python, this list would be a list of tuples, so +:meth:`~pandas.DataFrame` method would convert it to a dataframe as required. + +.. ipython:: python + + a = list(enumerate(list(range(1, 5)) + [np.NAN])) + pd.DataFrame(a) + +For more details and examples see :ref:`the Into to Data Structures +documentation `. + +meltdf +~~~~~~ + +An expression using a data.frame called ``cheese`` in R where you want to +reshape the data.frame: + +.. code-block:: r + + cheese <- data.frame( + first = c('John', 'Mary'), + last = c('Doe', 'Bo'), + height = c(5.5, 6.0), + weight = c(130, 150) + ) + melt(cheese, id=c("first", "last")) + +In Python, the :meth:`~pandas.melt` method is the R equivalent: + +.. ipython:: python + + cheese = pd.DataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) + + pd.melt(cheese, id_vars=["first", "last"]) + cheese.set_index(["first", "last"]).stack() # alternative way + +For more details and examples see :ref:`the reshaping documentation +`. + +cast +~~~~ + +In R ``acast`` is an expression using a data.frame called ``df`` in R to cast +into a higher dimensional array: + +.. code-block:: r + + df <- data.frame( + x = runif(12, 1, 168), + y = runif(12, 7, 334), + z = runif(12, 1.7, 20.7), + month = rep(c(5,6,7),4), + week = rep(c(1,2), 6) + ) + + mdf <- melt(df, id=c("month", "week")) + acast(mdf, week ~ month ~ variable, mean) + +In Python the best way is to make use of :meth:`~pandas.pivot_table`: + +.. ipython:: python + + df = pd.DataFrame( + { + "x": np.random.uniform(1.0, 168.0, 12), + "y": np.random.uniform(7.0, 334.0, 12), + "z": np.random.uniform(1.7, 20.7, 12), + "month": [5, 6, 7] * 4, + "week": [1, 2] * 6, + } + ) + + mdf = pd.melt(df, id_vars=["month", "week"]) + pd.pivot_table( + mdf, + values="value", + index=["variable", "week"], + columns=["month"], + aggfunc=np.mean, + ) + +Similarly for ``dcast`` which uses a data.frame called ``df`` in R to +aggregate information based on ``Animal`` and ``FeedType``: + +.. code-block:: r + + df <- data.frame( + Animal = c('Animal1', 'Animal2', 'Animal3', 'Animal2', 'Animal1', + 'Animal2', 'Animal3'), + FeedType = c('A', 'B', 'A', 'A', 'B', 'B', 'A'), + Amount = c(10, 7, 4, 2, 5, 6, 2) + ) + + dcast(df, Animal ~ FeedType, sum, fill=NaN) + # Alternative method using base R + with(df, tapply(Amount, list(Animal, FeedType), sum)) + +Python can approach this in two different ways. Firstly, similar to above +using :meth:`~pandas.pivot_table`: + +.. ipython:: python + + df = pd.DataFrame( + { + "Animal": [ + "Animal1", + "Animal2", + "Animal3", + "Animal2", + "Animal1", + "Animal2", + "Animal3", + ], + "FeedType": ["A", "B", "A", "A", "B", "B", "A"], + "Amount": [10, 7, 4, 2, 5, 6, 2], + } + ) + + df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum") + +The second approach is to use the :meth:`~pandas.DataFrame.groupby` method: + +.. ipython:: python + + df.groupby(["Animal", "FeedType"])["Amount"].sum() + +For more details and examples see :ref:`the reshaping documentation +` or :ref:`the groupby documentation`. + +|factor|_ +~~~~~~~~~ + +pandas has a data type for categorical data. + +.. code-block:: r + + cut(c(1,2,3,4,5,6), 3) + factor(c(1,2,3,2,2,3)) + +In pandas this is accomplished with ``pd.cut`` and ``astype("category")``: + +.. ipython:: python + + pd.cut(pd.Series([1, 2, 3, 4, 5, 6]), 3) + pd.Series([1, 2, 3, 2, 2, 3]).astype("category") + +For more details and examples see :ref:`categorical introduction ` and the +:ref:`API documentation `. There is also a documentation regarding the +:ref:`differences to R's factor `. + + +.. |c| replace:: ``c`` +.. _c: https://stat.ethz.ch/R-manual/R-patched/library/base/html/c.html + +.. |aggregate| replace:: ``aggregate`` +.. _aggregate: https://stat.ethz.ch/R-manual/R-patched/library/stats/html/aggregate.html + +.. |match| replace:: ``match`` / ``%in%`` +.. _match: https://stat.ethz.ch/R-manual/R-patched/library/base/html/match.html + +.. |tapply| replace:: ``tapply`` +.. _tapply: https://stat.ethz.ch/R-manual/R-patched/library/base/html/tapply.html + +.. |with| replace:: ``with`` +.. _with: https://stat.ethz.ch/R-manual/R-patched/library/base/html/with.html + +.. |subset| replace:: ``subset`` +.. _subset: https://stat.ethz.ch/R-manual/R-patched/library/base/html/subset.html + +.. |factor| replace:: ``factor`` +.. _factor: https://stat.ethz.ch/R-manual/R-devel/library/base/html/factor.html diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst new file mode 100644 index 00000000..595f3c85 --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -0,0 +1,584 @@ +.. _compare_with_sas: + +{{ header }} + +Comparison with SAS +******************** + +For potential users coming from `SAS `__ +this page is meant to demonstrate how different SAS operations would be +performed in pandas. + +.. include:: includes/introduction.rst + + +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "SAS" + :widths: 20, 20 + + ``DataFrame``, data set + column, variable + row, observation + groupby, BY-group + ``NaN``, ``.`` + + +``DataFrame`` +~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to a SAS data set - a two-dimensional +data source with labeled columns that can be of different types. As will be +shown in this document, almost any operation that can be applied to a data set +using SAS's ``DATA`` step, can also be accomplished in pandas. + +``Series`` +~~~~~~~~~~ + +A ``Series`` is the data structure that represents one column of a +``DataFrame``. SAS doesn't have a separate data structure for a single column, +but in general, working with a ``Series`` is analogous to referencing a column +in the ``DATA`` step. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index`` - which are labels on the +*rows* of the data. SAS does not have an exactly analogous concept. A data set's +rows are essentially unlabeled, other than an implicit integer index that can be +accessed during the ``DATA`` step (``_N_``). + +In pandas, if no index is specified, an integer index is also used by default +(first row = 0, second row = 1, and so on). While using a labeled ``Index`` or +``MultiIndex`` can enable sophisticated analyses and is ultimately an important +part of pandas to understand, for this comparison we will essentially ignore the +``Index`` and just treat the ``DataFrame`` as a collection of columns. Please +see the :ref:`indexing documentation` for much more on how to use an +``Index`` effectively. + + +Copies vs. in place operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. include:: includes/copies.rst + + +Data input / output +------------------- + +Constructing a DataFrame from values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A SAS data set can be built from specified values by +placing the data after a ``datalines`` statement and +specifying the column names. + +.. code-block:: sas + + data df; + input x y; + datalines; + 1 2 + 3 4 + 5 6 + ; + run; + +.. include:: includes/construct_dataframe.rst + +Reading external data +~~~~~~~~~~~~~~~~~~~~~ + +Like SAS, pandas provides utilities for reading in data from +many formats. The ``tips`` dataset, found within the pandas +tests (`csv `_) +will be used in many of the following examples. + +SAS provides ``PROC IMPORT`` to read csv data into a data set. + +.. code-block:: sas + + proc import datafile='tips.csv' dbms=csv out=tips replace; + getnames=yes; + run; + +The pandas method is :func:`read_csv`, which works similarly. + +.. ipython:: python + + url = ( + "https://raw.githubusercontent.com/pandas-dev/" + "pandas/main/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips + + +Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify +how the data should be parsed. For example, if the data was instead tab delimited, +and did not have column names, the pandas command would be: + +.. code-block:: python + + tips = pd.read_csv("tips.csv", sep="\t", header=None) + + # alternatively, read_table is an alias to read_csv with tab delimiter + tips = pd.read_table("tips.csv", header=None) + +In addition to text/csv, pandas supports a variety of other data formats +such as Excel, HDF5, and SQL databases. These are all read via a ``pd.read_*`` +function. See the :ref:`IO documentation` for more details. + +Limiting output +~~~~~~~~~~~~~~~ + +.. include:: includes/limit.rst + +The equivalent in SAS would be: + +.. code-block:: sas + + proc print data=df(obs=5); + run; + + +Exporting data +~~~~~~~~~~~~~~ + +The inverse of ``PROC IMPORT`` in SAS is ``PROC EXPORT`` + +.. code-block:: sas + + proc export data=tips outfile='tips2.csv' dbms=csv; + run; + +Similarly in pandas, the opposite of ``read_csv`` is :meth:`~DataFrame.to_csv`, +and other data formats follow a similar api. + +.. code-block:: python + + tips.to_csv("tips2.csv") + + +Data operations +--------------- + +Operations on columns +~~~~~~~~~~~~~~~~~~~~~ + +In the ``DATA`` step, arbitrary math expressions can +be used on new or existing columns. + +.. code-block:: sas + + data tips; + set tips; + total_bill = total_bill - 2; + new_bill = total_bill / 2; + run; + +.. include:: includes/column_operations.rst + + +Filtering +~~~~~~~~~ + +Filtering in SAS is done with an ``if`` or ``where`` statement, on one +or more columns. + +.. code-block:: sas + + data tips; + set tips; + if total_bill > 10; + run; + + data tips; + set tips; + where total_bill > 10; + /* equivalent in this case - where happens before the + DATA step begins and can also be used in PROC statements */ + run; + +.. include:: includes/filtering.rst + +If/then logic +~~~~~~~~~~~~~ + +In SAS, if/then logic can be used to create new columns. + +.. code-block:: sas + + data tips; + set tips; + format bucket $4.; + + if total_bill < 10 then bucket = 'low'; + else bucket = 'high'; + run; + +.. include:: includes/if_then.rst + +Date functionality +~~~~~~~~~~~~~~~~~~ + +SAS provides a variety of functions to do operations on +date/datetime columns. + +.. code-block:: sas + + data tips; + set tips; + format date1 date2 date1_plusmonth mmddyy10.; + date1 = mdy(1, 15, 2013); + date2 = mdy(2, 15, 2015); + date1_year = year(date1); + date2_month = month(date2); + * shift date to beginning of next interval; + date1_next = intnx('MONTH', date1, 1); + * count intervals between dates; + months_between = intck('MONTH', date1, date2); + run; + +The equivalent pandas operations are shown below. In addition to these +functions pandas supports other Time Series features +not available in Base SAS (such as resampling and custom offsets) - +see the :ref:`timeseries documentation` for more details. + +.. include:: includes/time_date.rst + +Selection of columns +~~~~~~~~~~~~~~~~~~~~ + +SAS provides keywords in the ``DATA`` step to select, +drop, and rename columns. + +.. code-block:: sas + + data tips; + set tips; + keep sex total_bill tip; + run; + + data tips; + set tips; + drop sex; + run; + + data tips; + set tips; + rename total_bill=total_bill_2; + run; + +.. include:: includes/column_selection.rst + + +Sorting by values +~~~~~~~~~~~~~~~~~ + +Sorting in SAS is accomplished via ``PROC SORT`` + +.. code-block:: sas + + proc sort data=tips; + by sex total_bill; + run; + +.. include:: includes/sorting.rst + +String processing +----------------- + +Finding length of string +~~~~~~~~~~~~~~~~~~~~~~~~ + +SAS determines the length of a character string with the +`LENGTHN `__ +and `LENGTHC `__ +functions. ``LENGTHN`` excludes trailing blanks and ``LENGTHC`` includes trailing blanks. + +.. code-block:: sas + + data _null_; + set tips; + put(LENGTHN(time)); + put(LENGTHC(time)); + run; + +.. include:: includes/length.rst + + +Finding position of substring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SAS determines the position of a character in a string with the +`FINDW `__ function. +``FINDW`` takes the string defined by the first argument and searches for the first position of the substring +you supply as the second argument. + +.. code-block:: sas + + data _null_; + set tips; + put(FINDW(sex,'ale')); + run; + +.. include:: includes/find_substring.rst + + +Extracting substring by position +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SAS extracts a substring from a string based on its position with the +`SUBSTR `__ function. + +.. code-block:: sas + + data _null_; + set tips; + put(substr(sex,1,1)); + run; + +.. include:: includes/extract_substring.rst + + +Extracting nth word +~~~~~~~~~~~~~~~~~~~ + +The SAS `SCAN `__ +function returns the nth word from a string. The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: sas + + data firstlast; + input String $60.; + First_Name = scan(string, 1); + Last_Name = scan(string, -1); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +.. include:: includes/nth_word.rst + + +Changing case +~~~~~~~~~~~~~ + +The SAS `UPCASE `__ +`LOWCASE `__ and +`PROPCASE `__ +functions change the case of the argument. + +.. code-block:: sas + + data firstlast; + input String $60.; + string_up = UPCASE(string); + string_low = LOWCASE(string); + string_prop = PROPCASE(string); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +.. include:: includes/case.rst + + +Merging +------- + +.. include:: includes/merge_setup.rst + +In SAS, data must be explicitly sorted before merging. Different +types of joins are accomplished using the ``in=`` dummy +variables to track whether a match was found in one or both +input frames. + +.. code-block:: sas + + proc sort data=df1; + by key; + run; + + proc sort data=df2; + by key; + run; + + data left_join inner_join right_join outer_join; + merge df1(in=a) df2(in=b); + + if a and b then output inner_join; + if a then output left_join; + if b then output right_join; + if a or b then output outer_join; + run; + +.. include:: includes/merge.rst + + +Missing data +------------ + +Both pandas and SAS have a representation for missing data. + +.. include:: includes/missing_intro.rst + +One difference is that missing data cannot be compared to its sentinel value. +For example, in SAS you could do this to filter missing values. + +.. code-block:: sas + + data outer_join_nulls; + set outer_join; + if value_x = .; + run; + + data outer_join_no_nulls; + set outer_join; + if value_x ^= .; + run; + +.. include:: includes/missing.rst + + +GroupBy +------- + +Aggregation +~~~~~~~~~~~ + +SAS's ``PROC SUMMARY`` can be used to group by one or +more key variables and compute aggregations on +numeric columns. + +.. code-block:: sas + + proc summary data=tips nway; + class sex smoker; + var total_bill tip; + output out=tips_summed sum=; + run; + +.. include:: includes/groupby.rst + + +Transformation +~~~~~~~~~~~~~~ + +In SAS, if the group aggregations need to be used with +the original frame, it must be merged back together. For +example, to subtract the mean for each observation by smoker group. + +.. code-block:: sas + + proc summary data=tips missing nway; + class smoker; + var total_bill; + output out=smoker_means mean(total_bill)=group_bill; + run; + + proc sort data=tips; + by smoker; + run; + + data tips; + merge tips(in=a) smoker_means(in=b); + by smoker; + adj_total_bill = total_bill - group_bill; + if a and b; + run; + +.. include:: includes/transform.rst + + +By group processing +~~~~~~~~~~~~~~~~~~~ + +In addition to aggregation, pandas ``groupby`` can be used to +replicate most other by group processing from SAS. For example, +this ``DATA`` step reads the data by sex/smoker group and filters to +the first entry for each. + +.. code-block:: sas + + proc sort data=tips; + by sex smoker; + run; + + data tips_first; + set tips; + by sex smoker; + if FIRST.sex or FIRST.smoker then output; + run; + +In pandas this would be written as: + +.. ipython:: python + + tips.groupby(["sex", "smoker"]).first() + + +Other considerations +-------------------- + +Disk vs memory +~~~~~~~~~~~~~~ + +pandas operates exclusively in memory, where a SAS data set exists on disk. +This means that the size of data able to be loaded in pandas is limited by your +machine's memory, but also that the operations on that data may be faster. + +If out of core processing is needed, one possibility is the +`dask.dataframe `_ +library (currently in development) which +provides a subset of pandas functionality for an on-disk ``DataFrame`` + +Data interop +~~~~~~~~~~~~ + +pandas provides a :func:`read_sas` method that can read SAS data saved in +the XPORT or SAS7BDAT binary format. + +.. code-block:: sas + + libname xportout xport 'transport-file.xpt'; + data xportout.tips; + set tips(rename=(total_bill=tbill)); + * xport variable names limited to 6 characters; + run; + +.. code-block:: python + + df = pd.read_sas("transport-file.xpt") + df = pd.read_sas("binary-file.sas7bdat") + +You can also specify the file format directly. By default, pandas will try +to infer the file format based on its extension. + +.. code-block:: python + + df = pd.read_sas("transport-file.xpt", format="xport") + df = pd.read_sas("binary-file.sas7bdat", format="sas7bdat") + +XPORT is a relatively limited format and the parsing of it is not as +optimized as some of the other pandas readers. An alternative way +to interop data between SAS and pandas is to serialize to csv. + +.. code-block:: ipython + + # version 0.17, 10M rows + + In [8]: %time df = pd.read_sas('big.xpt') + Wall time: 14.6 s + + In [9]: %time df = pd.read_csv('big.csv') + Wall time: 4.86 s diff --git a/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst b/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst new file mode 100644 index 00000000..d55b669d --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst @@ -0,0 +1,465 @@ +.. _compare_with_spreadsheets: + +{{ header }} + +Comparison with spreadsheets +**************************** + +Since many potential pandas users have some familiarity with spreadsheet programs like +`Excel `_, this page is meant to provide some examples +of how various spreadsheet operations would be performed using pandas. This page will use +terminology and link to documentation for Excel, but much will be the same/similar in +`Google Sheets `_, +`LibreOffice Calc `_, +`Apple Numbers `_, and other +Excel-compatible spreadsheet software. + +.. include:: includes/introduction.rst + +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "Excel" + :widths: 20, 20 + + ``DataFrame``, worksheet + ``Series``, column + ``Index``, row headings + row, row + ``NaN``, empty cell + +``DataFrame`` +~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to an Excel worksheet. While an Excel workbook can contain +multiple worksheets, pandas ``DataFrame``\s exist independently. + +``Series`` +~~~~~~~~~~ + +A ``Series`` is the data structure that represents one column of a ``DataFrame``. Working with a +``Series`` is analogous to referencing a column of a spreadsheet. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index``, which are labels on the *rows* of the data. In +pandas, if no index is specified, a :class:`~pandas.RangeIndex` is used by default (first row = 0, +second row = 1, and so on), analogous to row headings/numbers in spreadsheets. + +In pandas, indexes can be set to one (or multiple) unique values, which is like having a column that +is used as the row identifier in a worksheet. Unlike most spreadsheets, these ``Index`` values can +actually be used to reference the rows. (Note that `this can be done in Excel with structured +references +`_.) +For example, in spreadsheets, you would reference the first row as ``A1:Z1``, while in pandas you +could use ``populations.loc['Chicago']``. + +Index values are also persistent, so if you re-order the rows in a ``DataFrame``, the label for a +particular row don't change. + +See the :ref:`indexing documentation` for much more on how to use an ``Index`` +effectively. + + +Copies vs. in place operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. include:: includes/copies.rst + + +Data input / output +------------------- + +Constructing a DataFrame from values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In a spreadsheet, `values can be typed directly into cells `_. + +.. include:: includes/construct_dataframe.rst + +Reading external data +~~~~~~~~~~~~~~~~~~~~~ + +Both `Excel `__ +and :ref:`pandas <10min_tut_02_read_write>` can import data from various sources in various +formats. + +CSV +''' + +Let's load and display the `tips `_ +dataset from the pandas tests, which is a CSV file. In Excel, you would download and then +`open the CSV `_. +In pandas, you pass the URL or local path of the CSV file to :func:`~pandas.read_csv`: + +.. ipython:: python + + url = ( + "https://raw.githubusercontent.com/pandas-dev" + "/pandas/main/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips + +Like `Excel's Text Import Wizard `_, +``read_csv`` can take a number of parameters to specify how the data should be parsed. For +example, if the data was instead tab delimited, and did not have column names, the pandas command +would be: + +.. code-block:: python + + tips = pd.read_csv("tips.csv", sep="\t", header=None) + + # alternatively, read_table is an alias to read_csv with tab delimiter + tips = pd.read_table("tips.csv", header=None) + +Excel files +''''''''''' + +Excel opens `various Excel file formats `_ +by double-clicking them, or using `the Open menu `_. +In pandas, you use :ref:`special methods for reading and writing from/to Excel files `. + +Let's first :ref:`create a new Excel file ` based on the ``tips`` dataframe in the above example: + +.. code-block:: python + + tips.to_excel("./tips.xlsx") + +Should you wish to subsequently access the data in the ``tips.xlsx`` file, you can read it into your module using + +.. code-block:: python + + tips_df = pd.read_excel("./tips.xlsx", index_col=0) + +You have just read in an Excel file using pandas! + + +Limiting output +~~~~~~~~~~~~~~~ + +Spreadsheet programs will only show one screenful of data at a time and then allow you to scroll, so +there isn't really a need to limit output. In pandas, you'll need to put a little more thought into +controlling how your ``DataFrame``\s are displayed. + +.. include:: includes/limit.rst + + +Exporting data +~~~~~~~~~~~~~~ + +By default, desktop spreadsheet software will save to its respective file format (``.xlsx``, ``.ods``, etc). You can, however, `save to other file formats `_. + +:ref:`pandas can create Excel files `, :ref:`CSV `, or :ref:`a number of other formats `. + +Data operations +--------------- + +Operations on columns +~~~~~~~~~~~~~~~~~~~~~ + +In spreadsheets, `formulas +`_ +are often created in individual cells and then `dragged +`_ +into other cells to compute them for other columns. In pandas, you're able to do operations on whole +columns directly. + +.. include:: includes/column_operations.rst + +Note that we aren't having to tell it to do that subtraction cell-by-cell — pandas handles that for +us. See :ref:`how to create new columns derived from existing columns <10min_tut_05_columns>`. + + +Filtering +~~~~~~~~~ + +`In Excel, filtering is done through a graphical menu. `_ + +.. image:: ../../_static/spreadsheets/filter.png + :alt: Screenshot showing filtering of the total_bill column to values greater than 10 + :align: center + +.. include:: includes/filtering.rst + +If/then logic +~~~~~~~~~~~~~ + +Let's say we want to make a ``bucket`` column with values of ``low`` and ``high``, based on whether +the ``total_bill`` is less or more than $10. + +In spreadsheets, logical comparison can be done with `conditional formulas +`_. +We'd use a formula of ``=IF(A2 < 10, "low", "high")``, dragged to all cells in a new ``bucket`` +column. + +.. image:: ../../_static/spreadsheets/conditional.png + :alt: Screenshot showing the formula from above in a bucket column of the tips spreadsheet + :align: center + +.. include:: includes/if_then.rst + +Date functionality +~~~~~~~~~~~~~~~~~~ + +*This section will refer to "dates", but timestamps are handled similarly.* + +We can think of date functionality in two parts: parsing, and output. In spreadsheets, date values +are generally parsed automatically, though there is a `DATEVALUE +`_ +function if you need it. In pandas, you need to explicitly convert plain text to datetime objects, +either :ref:`while reading from a CSV ` or :ref:`once in a DataFrame +<10min_tut_09_timeseries.properties>`. + +Once parsed, spreadsheets display the dates in a default format, though `the format can be changed +`_. +In pandas, you'll generally want to keep dates as ``datetime`` objects while you're doing +calculations with them. Outputting *parts* of dates (such as the year) is done through `date +functions +`_ +in spreadsheets, and :ref:`datetime properties <10min_tut_09_timeseries.properties>` in pandas. + +Given ``date1`` and ``date2`` in columns ``A`` and ``B`` of a spreadsheet, you might have these +formulas: + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - column + - formula + * - ``date1_year`` + - ``=YEAR(A2)`` + * - ``date2_month`` + - ``=MONTH(B2)`` + * - ``date1_next`` + - ``=DATE(YEAR(A2),MONTH(A2)+1,1)`` + * - ``months_between`` + - ``=DATEDIF(A2,B2,"M")`` + +The equivalent pandas operations are shown below. + +.. include:: includes/time_date.rst + +See :ref:`timeseries` for more details. + + +Selection of columns +~~~~~~~~~~~~~~~~~~~~ + +In spreadsheets, you can select columns you want by: + +- `Hiding columns `_ +- `Deleting columns `_ +- `Referencing a range `_ from one worksheet into another + +Since spreadsheet columns are typically `named in a header row +`_, +renaming a column is simply a matter of changing the text in that first cell. + +.. include:: includes/column_selection.rst + + +Sorting by values +~~~~~~~~~~~~~~~~~ + +Sorting in spreadsheets is accomplished via `the sort dialog `_. + +.. image:: ../../_static/spreadsheets/sort.png + :alt: Screenshot of dialog from Excel showing sorting by the sex then total_bill columns + :align: center + +.. include:: includes/sorting.rst + +String processing +----------------- + +Finding length of string +~~~~~~~~~~~~~~~~~~~~~~~~ + +In spreadsheets, the number of characters in text can be found with the `LEN +`_ +function. This can be used with the `TRIM +`_ +function to remove extra whitespace. + +:: + + =LEN(TRIM(A2)) + +.. include:: includes/length.rst + +Note this will still include multiple spaces within the string, so isn't 100% equivalent. + + +Finding position of substring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `FIND +`_ +spreadsheet function returns the position of a substring, with the first character being ``1``. + +.. image:: ../../_static/spreadsheets/sort.png + :alt: Screenshot of FIND formula being used in Excel + :align: center + +.. include:: includes/find_substring.rst + + +Extracting substring by position +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spreadsheets have a `MID +`_ +formula for extracting a substring from a given position. To get the first character:: + + =MID(A2,1,1) + +.. include:: includes/extract_substring.rst + + +Extracting nth word +~~~~~~~~~~~~~~~~~~~ + +In Excel, you might use the `Text to Columns Wizard +`_ +for splitting text and retrieving a specific column. (Note `it's possible to do so through a formula +as well `_.) + +.. include:: includes/nth_word.rst + + +Changing case +~~~~~~~~~~~~~ + +Spreadsheets provide `UPPER, LOWER, and PROPER functions +`_ +for converting text to upper, lower, and title case, respectively. + +.. include:: includes/case.rst + + +Merging +------- + +.. include:: includes/merge_setup.rst + +In Excel, there are `merging of tables can be done through a VLOOKUP +`_. + +.. image:: ../../_static/spreadsheets/vlookup.png + :alt: Screenshot showing a VLOOKUP formula between two tables in Excel, with some values being filled in and others with "#N/A" + :align: center + +.. include:: includes/merge.rst + +``merge`` has a number of advantages over ``VLOOKUP``: + +* The lookup value doesn't need to be the first column of the lookup table +* If multiple rows are matched, there will be one row for each match, instead of just the first +* It will include all columns from the lookup table, instead of just a single specified column +* It supports :ref:`more complex join operations ` + + +Other considerations +-------------------- + +Fill Handle +~~~~~~~~~~~ + +Create a series of numbers following a set pattern in a certain set of cells. In +a spreadsheet, this would be done by shift+drag after entering the first number or by +entering the first two or three values and then dragging. + +This can be achieved by creating a series and assigning it to the desired cells. + +.. ipython:: python + + df = pd.DataFrame({"AAA": [1] * 8, "BBB": list(range(0, 8))}) + df + + series = list(range(1, 5)) + series + + df.loc[2:5, "AAA"] = series + + df + +Drop Duplicates +~~~~~~~~~~~~~~~ + +Excel has built-in functionality for `removing duplicate values `_. +This is supported in pandas via :meth:`~DataFrame.drop_duplicates`. + +.. ipython:: python + + df = pd.DataFrame( + { + "class": ["A", "A", "A", "B", "C", "D"], + "student_count": [42, 35, 42, 50, 47, 45], + "all_pass": ["Yes", "Yes", "Yes", "No", "No", "Yes"], + } + ) + + df.drop_duplicates() + + df.drop_duplicates(["class", "student_count"]) + +Pivot Tables +~~~~~~~~~~~~ + +`PivotTables `_ +from spreadsheets can be replicated in pandas through :ref:`reshaping`. Using the ``tips`` dataset again, +let's find the average gratuity by size of the party and sex of the server. + +In Excel, we use the following configuration for the PivotTable: + +.. image:: ../../_static/spreadsheets/pivot.png + :alt: Screenshot showing a PivotTable in Excel, using sex as the column, size as the rows, then average tip as the values + :align: center + +The equivalent in pandas: + +.. ipython:: python + + pd.pivot_table( + tips, values="tip", index=["size"], columns=["sex"], aggfunc=np.average + ) + + +Adding a row +~~~~~~~~~~~~ + +Assuming we are using a :class:`~pandas.RangeIndex` (numbered ``0``, ``1``, etc.), we can use :func:`concat` to add a row to the bottom of a ``DataFrame``. + +.. ipython:: python + + df + new_row = pd.DataFrame([["E", 51, True]], + columns=["class", "student_count", "all_pass"]) + pd.concat([df, new_row]) + + +Find and Replace +~~~~~~~~~~~~~~~~ + +`Excel's Find dialog `_ +takes you to cells that match, one by one. In pandas, this operation is generally done for an +entire column or ``DataFrame`` at once through :ref:`conditional expressions <10min_tut_03_subset.rows_and_columns>`. + +.. ipython:: python + + tips + tips == "Sun" + tips["day"].str.contains("S") + +pandas' :meth:`~DataFrame.replace` is comparable to Excel's ``Replace All``. + +.. ipython:: python + + tips.replace("Thu", "Thursday") diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst new file mode 100644 index 00000000..a6d9d65e --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -0,0 +1,512 @@ +.. _compare_with_sql: + +{{ header }} + +Comparison with SQL +******************** +Since many potential pandas users have some familiarity with +`SQL `_, this page is meant to provide some examples of how +various SQL operations would be performed using pandas. + +.. include:: includes/introduction.rst + +Most of the examples will utilize the ``tips`` dataset found within pandas tests. We'll read +the data into a DataFrame called ``tips`` and assume we have a database table of the same name and +structure. + +.. ipython:: python + + url = ( + "https://raw.githubusercontent.com/pandas-dev" + "/pandas/main/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips + + +Copies vs. in place operations +------------------------------ + +.. include:: includes/copies.rst + + +SELECT +------ +In SQL, selection is done using a comma-separated list of columns you'd like to select (or a ``*`` +to select all columns): + +.. code-block:: sql + + SELECT total_bill, tip, smoker, time + FROM tips; + +With pandas, column selection is done by passing a list of column names to your DataFrame: + +.. ipython:: python + + tips[["total_bill", "tip", "smoker", "time"]] + +Calling the DataFrame without the list of column names would display all columns (akin to SQL's +``*``). + +In SQL, you can add a calculated column: + +.. code-block:: sql + + SELECT *, tip/total_bill as tip_rate + FROM tips; + +With pandas, you can use the :meth:`DataFrame.assign` method of a DataFrame to append a new column: + +.. ipython:: python + + tips.assign(tip_rate=tips["tip"] / tips["total_bill"]) + +WHERE +----- +Filtering in SQL is done via a WHERE clause. + +.. code-block:: sql + + SELECT * + FROM tips + WHERE time = 'Dinner'; + +.. include:: includes/filtering.rst + +Just like SQL's ``OR`` and ``AND``, multiple conditions can be passed to a DataFrame using ``|`` +(``OR``) and ``&`` (``AND``). + +Tips of more than $5 at Dinner meals: + +.. code-block:: sql + + SELECT * + FROM tips + WHERE time = 'Dinner' AND tip > 5.00; + +.. ipython:: python + + tips[(tips["time"] == "Dinner") & (tips["tip"] > 5.00)] + +Tips by parties of at least 5 diners OR bill total was more than $45: + +.. code-block:: sql + + SELECT * + FROM tips + WHERE size >= 5 OR total_bill > 45; + +.. ipython:: python + + tips[(tips["size"] >= 5) | (tips["total_bill"] > 45)] + +NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna` +methods. + +.. ipython:: python + + frame = pd.DataFrame( + {"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]} + ) + frame + +Assume we have a table of the same structure as our DataFrame above. We can see only the records +where ``col2`` IS NULL with the following query: + +.. code-block:: sql + + SELECT * + FROM frame + WHERE col2 IS NULL; + +.. ipython:: python + + frame[frame["col2"].isna()] + +Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`. + +.. code-block:: sql + + SELECT * + FROM frame + WHERE col1 IS NOT NULL; + +.. ipython:: python + + frame[frame["col1"].notna()] + + +GROUP BY +-------- +In pandas, SQL's ``GROUP BY`` operations are performed using the similarly named +:meth:`~pandas.DataFrame.groupby` method. :meth:`~pandas.DataFrame.groupby` typically refers to a +process where we'd like to split a dataset into groups, apply some function (typically aggregation) +, and then combine the groups together. + +A common SQL operation would be getting the count of records in each group throughout a dataset. +For instance, a query getting us the number of tips left by sex: + +.. code-block:: sql + + SELECT sex, count(*) + FROM tips + GROUP BY sex; + /* + Female 87 + Male 157 + */ + + +The pandas equivalent would be: + +.. ipython:: python + + tips.groupby("sex").size() + +Notice that in the pandas code we used :meth:`~pandas.core.groupby.DataFrameGroupBy.size` and not +:meth:`~pandas.core.groupby.DataFrameGroupBy.count`. This is because +:meth:`~pandas.core.groupby.DataFrameGroupBy.count` applies the function to each column, returning +the number of ``NOT NULL`` records within each. + +.. ipython:: python + + tips.groupby("sex").count() + +Alternatively, we could have applied the :meth:`~pandas.core.groupby.DataFrameGroupBy.count` method +to an individual column: + +.. ipython:: python + + tips.groupby("sex")["total_bill"].count() + +Multiple functions can also be applied at once. For instance, say we'd like to see how tip amount +differs by day of the week - :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` allows you to pass a dictionary +to your grouped DataFrame, indicating which functions to apply to specific columns. + +.. code-block:: sql + + SELECT day, AVG(tip), COUNT(*) + FROM tips + GROUP BY day; + /* + Fri 2.734737 19 + Sat 2.993103 87 + Sun 3.255132 76 + Thu 2.771452 62 + */ + +.. ipython:: python + + tips.groupby("day").agg({"tip": np.mean, "day": np.size}) + +Grouping by more than one column is done by passing a list of columns to the +:meth:`~pandas.DataFrame.groupby` method. + +.. code-block:: sql + + SELECT smoker, day, COUNT(*), AVG(tip) + FROM tips + GROUP BY smoker, day; + /* + smoker day + No Fri 4 2.812500 + Sat 45 3.102889 + Sun 57 3.167895 + Thu 45 2.673778 + Yes Fri 15 2.714000 + Sat 42 2.875476 + Sun 19 3.516842 + Thu 17 3.030000 + */ + +.. ipython:: python + + tips.groupby(["smoker", "day"]).agg({"tip": [np.size, np.mean]}) + +.. _compare_with_sql.join: + +JOIN +---- +``JOIN``\s can be performed with :meth:`~pandas.DataFrame.join` or :meth:`~pandas.merge`. By +default, :meth:`~pandas.DataFrame.join` will join the DataFrames on their indices. Each method has +parameters allowing you to specify the type of join to perform (``LEFT``, ``RIGHT``, ``INNER``, +``FULL``) or the columns to join on (column names or indices). + +.. warning:: + + If both key columns contain rows where the key is a null value, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. + +.. ipython:: python + + df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) + df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)}) + +Assume we have two database tables of the same name and structure as our DataFrames. + +Now let's go over the various types of ``JOIN``\s. + +INNER JOIN +~~~~~~~~~~ +.. code-block:: sql + + SELECT * + FROM df1 + INNER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + # merge performs an INNER JOIN by default + pd.merge(df1, df2, on="key") + +:meth:`~pandas.merge` also offers parameters for cases when you'd like to join one DataFrame's +column with another DataFrame's index. + +.. ipython:: python + + indexed_df2 = df2.set_index("key") + pd.merge(df1, indexed_df2, left_on="key", right_index=True) + +LEFT OUTER JOIN +~~~~~~~~~~~~~~~ + +Show all records from ``df1``. + +.. code-block:: sql + + SELECT * + FROM df1 + LEFT OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + pd.merge(df1, df2, on="key", how="left") + +RIGHT JOIN +~~~~~~~~~~ + +Show all records from ``df2``. + +.. code-block:: sql + + SELECT * + FROM df1 + RIGHT OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + pd.merge(df1, df2, on="key", how="right") + +FULL JOIN +~~~~~~~~~ +pandas also allows for ``FULL JOIN``\s, which display both sides of the dataset, whether or not the +joined columns find a match. As of writing, ``FULL JOIN``\s are not supported in all RDBMS (MySQL). + +Show all records from both tables. + +.. code-block:: sql + + SELECT * + FROM df1 + FULL OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + pd.merge(df1, df2, on="key", how="outer") + + +UNION +----- + +``UNION ALL`` can be performed using :meth:`~pandas.concat`. + +.. ipython:: python + + df1 = pd.DataFrame( + {"city": ["Chicago", "San Francisco", "New York City"], "rank": range(1, 4)} + ) + df2 = pd.DataFrame( + {"city": ["Chicago", "Boston", "Los Angeles"], "rank": [1, 4, 5]} + ) + +.. code-block:: sql + + SELECT city, rank + FROM df1 + UNION ALL + SELECT city, rank + FROM df2; + /* + city rank + Chicago 1 + San Francisco 2 + New York City 3 + Chicago 1 + Boston 4 + Los Angeles 5 + */ + +.. ipython:: python + + pd.concat([df1, df2]) + +SQL's ``UNION`` is similar to ``UNION ALL``, however ``UNION`` will remove duplicate rows. + +.. code-block:: sql + + SELECT city, rank + FROM df1 + UNION + SELECT city, rank + FROM df2; + -- notice that there is only one Chicago record this time + /* + city rank + Chicago 1 + San Francisco 2 + New York City 3 + Boston 4 + Los Angeles 5 + */ + +In pandas, you can use :meth:`~pandas.concat` in conjunction with +:meth:`~pandas.DataFrame.drop_duplicates`. + +.. ipython:: python + + pd.concat([df1, df2]).drop_duplicates() + + +LIMIT +----- + +.. code-block:: sql + + SELECT * FROM tips + LIMIT 10; + +.. ipython:: python + + tips.head(10) + + +pandas equivalents for some SQL analytic and aggregate functions +---------------------------------------------------------------- + +Top n rows with offset +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + -- MySQL + SELECT * FROM tips + ORDER BY tip DESC + LIMIT 10 OFFSET 5; + +.. ipython:: python + + tips.nlargest(10 + 5, columns="tip").tail(10) + +Top n rows per group +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + -- Oracle's ROW_NUMBER() analytic function + SELECT * FROM ( + SELECT + t.*, + ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn + FROM tips t + ) + WHERE rn < 3 + ORDER BY day, rn; + + +.. ipython:: python + + ( + tips.assign( + rn=tips.sort_values(["total_bill"], ascending=False) + .groupby(["day"]) + .cumcount() + + 1 + ) + .query("rn < 3") + .sort_values(["day", "rn"]) + ) + +the same using ``rank(method='first')`` function + +.. ipython:: python + + ( + tips.assign( + rnk=tips.groupby(["day"])["total_bill"].rank( + method="first", ascending=False + ) + ) + .query("rnk < 3") + .sort_values(["day", "rnk"]) + ) + +.. code-block:: sql + + -- Oracle's RANK() analytic function + SELECT * FROM ( + SELECT + t.*, + RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk + FROM tips t + WHERE tip < 2 + ) + WHERE rnk < 3 + ORDER BY sex, rnk; + +Let's find tips with (rank < 3) per gender group for (tips < 2). +Notice that when using ``rank(method='min')`` function +``rnk_min`` remains the same for the same ``tip`` +(as Oracle's ``RANK()`` function) + +.. ipython:: python + + ( + tips[tips["tip"] < 2] + .assign(rnk_min=tips.groupby(["sex"])["tip"].rank(method="min")) + .query("rnk_min < 3") + .sort_values(["sex", "rnk_min"]) + ) + + +UPDATE +------ + +.. code-block:: sql + + UPDATE tips + SET tip = tip*2 + WHERE tip < 2; + +.. ipython:: python + + tips.loc[tips["tip"] < 2, "tip"] *= 2 + +DELETE +------ + +.. code-block:: sql + + DELETE FROM tips + WHERE tip > 9; + +In pandas we select the rows that should remain instead of deleting them: + +.. ipython:: python + + tips = tips.loc[tips["tip"] <= 9] diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst new file mode 100644 index 00000000..b4b0c42d --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -0,0 +1,501 @@ +.. _compare_with_stata: + +{{ header }} + +Comparison with Stata +********************* +For potential users coming from `Stata `__ +this page is meant to demonstrate how different Stata operations would be +performed in pandas. + +.. include:: includes/introduction.rst + + +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "Stata" + :widths: 20, 20 + + ``DataFrame``, data set + column, variable + row, observation + groupby, bysort + ``NaN``, ``.`` + + +``DataFrame`` +~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to a Stata data set -- a two-dimensional +data source with labeled columns that can be of different types. As will be +shown in this document, almost any operation that can be applied to a data set +in Stata can also be accomplished in pandas. + +``Series`` +~~~~~~~~~~ + +A ``Series`` is the data structure that represents one column of a +``DataFrame``. Stata doesn't have a separate data structure for a single column, +but in general, working with a ``Series`` is analogous to referencing a column +of a data set in Stata. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index`` -- labels on the +*rows* of the data. Stata does not have an exactly analogous concept. In Stata, a data set's +rows are essentially unlabeled, other than an implicit integer index that can be +accessed with ``_n``. + +In pandas, if no index is specified, an integer index is also used by default +(first row = 0, second row = 1, and so on). While using a labeled ``Index`` or +``MultiIndex`` can enable sophisticated analyses and is ultimately an important +part of pandas to understand, for this comparison we will essentially ignore the +``Index`` and just treat the ``DataFrame`` as a collection of columns. Please +see the :ref:`indexing documentation` for much more on how to use an +``Index`` effectively. + + +Copies vs. in place operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. include:: includes/copies.rst + + +Data input / output +------------------- + +Constructing a DataFrame from values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Stata data set can be built from specified values by +placing the data after an ``input`` statement and +specifying the column names. + +.. code-block:: stata + + input x y + 1 2 + 3 4 + 5 6 + end + +.. include:: includes/construct_dataframe.rst + +Reading external data +~~~~~~~~~~~~~~~~~~~~~ + +Like Stata, pandas provides utilities for reading in data from +many formats. The ``tips`` data set, found within the pandas +tests (`csv `_) +will be used in many of the following examples. + +Stata provides ``import delimited`` to read csv data into a data set in memory. +If the ``tips.csv`` file is in the current working directory, we can import it as follows. + +.. code-block:: stata + + import delimited tips.csv + +The pandas method is :func:`read_csv`, which works similarly. Additionally, it will automatically download +the data set if presented with a url. + +.. ipython:: python + + url = ( + "https://raw.githubusercontent.com/pandas-dev" + "/pandas/main/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips + +Like ``import delimited``, :func:`read_csv` can take a number of parameters to specify +how the data should be parsed. For example, if the data were instead tab delimited, +did not have column names, and existed in the current working directory, +the pandas command would be: + +.. code-block:: python + + tips = pd.read_csv("tips.csv", sep="\t", header=None) + + # alternatively, read_table is an alias to read_csv with tab delimiter + tips = pd.read_table("tips.csv", header=None) + +pandas can also read Stata data sets in ``.dta`` format with the :func:`read_stata` function. + +.. code-block:: python + + df = pd.read_stata("data.dta") + +In addition to text/csv and Stata files, pandas supports a variety of other data formats +such as Excel, SAS, HDF5, Parquet, and SQL databases. These are all read via a ``pd.read_*`` +function. See the :ref:`IO documentation` for more details. + + +Limiting output +~~~~~~~~~~~~~~~ + +.. include:: includes/limit.rst + +The equivalent in Stata would be: + +.. code-block:: stata + + list in 1/5 + + +Exporting data +~~~~~~~~~~~~~~ + +The inverse of ``import delimited`` in Stata is ``export delimited`` + +.. code-block:: stata + + export delimited tips2.csv + +Similarly in pandas, the opposite of ``read_csv`` is :meth:`DataFrame.to_csv`. + +.. code-block:: python + + tips.to_csv("tips2.csv") + +pandas can also export to Stata file format with the :meth:`DataFrame.to_stata` method. + +.. code-block:: python + + tips.to_stata("tips2.dta") + + +Data operations +--------------- + +Operations on columns +~~~~~~~~~~~~~~~~~~~~~ + +In Stata, arbitrary math expressions can be used with the ``generate`` and +``replace`` commands on new or existing columns. The ``drop`` command drops +the column from the data set. + +.. code-block:: stata + + replace total_bill = total_bill - 2 + generate new_bill = total_bill / 2 + drop new_bill + +.. include:: includes/column_operations.rst + + +Filtering +~~~~~~~~~ + +Filtering in Stata is done with an ``if`` clause on one or more columns. + +.. code-block:: stata + + list if total_bill > 10 + +.. include:: includes/filtering.rst + +If/then logic +~~~~~~~~~~~~~ + +In Stata, an ``if`` clause can also be used to create new columns. + +.. code-block:: stata + + generate bucket = "low" if total_bill < 10 + replace bucket = "high" if total_bill >= 10 + +.. include:: includes/if_then.rst + +Date functionality +~~~~~~~~~~~~~~~~~~ + +Stata provides a variety of functions to do operations on +date/datetime columns. + +.. code-block:: stata + + generate date1 = mdy(1, 15, 2013) + generate date2 = date("Feb152015", "MDY") + + generate date1_year = year(date1) + generate date2_month = month(date2) + + * shift date to beginning of next month + generate date1_next = mdy(month(date1) + 1, 1, year(date1)) if month(date1) != 12 + replace date1_next = mdy(1, 1, year(date1) + 1) if month(date1) == 12 + generate months_between = mofd(date2) - mofd(date1) + + list date1 date2 date1_year date2_month date1_next months_between + +The equivalent pandas operations are shown below. In addition to these +functions, pandas supports other Time Series features +not available in Stata (such as time zone handling and custom offsets) -- +see the :ref:`timeseries documentation` for more details. + +.. include:: includes/time_date.rst + +Selection of columns +~~~~~~~~~~~~~~~~~~~~ + +Stata provides keywords to select, drop, and rename columns. + +.. code-block:: stata + + keep sex total_bill tip + + drop sex + + rename total_bill total_bill_2 + +.. include:: includes/column_selection.rst + + +Sorting by values +~~~~~~~~~~~~~~~~~ + +Sorting in Stata is accomplished via ``sort`` + +.. code-block:: stata + + sort sex total_bill + +.. include:: includes/sorting.rst + +String processing +----------------- + +Finding length of string +~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata determines the length of a character string with the :func:`strlen` and +:func:`ustrlen` functions for ASCII and Unicode strings, respectively. + +.. code-block:: stata + + generate strlen_time = strlen(time) + generate ustrlen_time = ustrlen(time) + +.. include:: includes/length.rst + + +Finding position of substring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata determines the position of a character in a string with the :func:`strpos` function. +This takes the string defined by the first argument and searches for the +first position of the substring you supply as the second argument. + +.. code-block:: stata + + generate str_position = strpos(sex, "ale") + +.. include:: includes/find_substring.rst + + +Extracting substring by position +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata extracts a substring from a string based on its position with the :func:`substr` function. + +.. code-block:: stata + + generate short_sex = substr(sex, 1, 1) + +.. include:: includes/extract_substring.rst + + +Extracting nth word +~~~~~~~~~~~~~~~~~~~ + +The Stata :func:`word` function returns the nth word from a string. +The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: stata + + clear + input str20 string + "John Smith" + "Jane Cook" + end + + generate first_name = word(name, 1) + generate last_name = word(name, -1) + +.. include:: includes/nth_word.rst + + +Changing case +~~~~~~~~~~~~~ + +The Stata :func:`strupper`, :func:`strlower`, :func:`strproper`, +:func:`ustrupper`, :func:`ustrlower`, and :func:`ustrtitle` functions +change the case of ASCII and Unicode strings, respectively. + +.. code-block:: stata + + clear + input str20 string + "John Smith" + "Jane Cook" + end + + generate upper = strupper(string) + generate lower = strlower(string) + generate title = strproper(string) + list + +.. include:: includes/case.rst + + +Merging +------- + +.. include:: includes/merge_setup.rst + +In Stata, to perform a merge, one data set must be in memory +and the other must be referenced as a file name on disk. In +contrast, Python must have both ``DataFrames`` already in memory. + +By default, Stata performs an outer join, where all observations +from both data sets are left in memory after the merge. One can +keep only observations from the initial data set, the merged data set, +or the intersection of the two by using the values created in the +``_merge`` variable. + +.. code-block:: stata + + * First create df2 and save to disk + clear + input str1 key + B + D + D + E + end + generate value = rnormal() + save df2.dta + + * Now create df1 in memory + clear + input str1 key + A + B + C + D + end + generate value = rnormal() + + preserve + + * Left join + merge 1:n key using df2.dta + keep if _merge == 1 + + * Right join + restore, preserve + merge 1:n key using df2.dta + keep if _merge == 2 + + * Inner join + restore, preserve + merge 1:n key using df2.dta + keep if _merge == 3 + + * Outer join + restore + merge 1:n key using df2.dta + +.. include:: includes/merge.rst + + +Missing data +------------ + +Both pandas and Stata have a representation for missing data. + +.. include:: includes/missing_intro.rst + +One difference is that missing data cannot be compared to its sentinel value. +For example, in Stata you could do this to filter missing values. + +.. code-block:: stata + + * Keep missing values + list if value_x == . + * Keep non-missing values + list if value_x != . + +.. include:: includes/missing.rst + + +GroupBy +------- + +Aggregation +~~~~~~~~~~~ + +Stata's ``collapse`` can be used to group by one or +more key variables and compute aggregations on +numeric columns. + +.. code-block:: stata + + collapse (sum) total_bill tip, by(sex smoker) + +.. include:: includes/groupby.rst + + +Transformation +~~~~~~~~~~~~~~ + +In Stata, if the group aggregations need to be used with the +original data set, one would usually use ``bysort`` with :func:`egen`. +For example, to subtract the mean for each observation by smoker group. + +.. code-block:: stata + + bysort sex smoker: egen group_bill = mean(total_bill) + generate adj_total_bill = total_bill - group_bill + +.. include:: includes/transform.rst + + +By group processing +~~~~~~~~~~~~~~~~~~~ + +In addition to aggregation, pandas ``groupby`` can be used to +replicate most other ``bysort`` processing from Stata. For example, +the following example lists the first observation in the current +sort order by sex/smoker group. + +.. code-block:: stata + + bysort sex smoker: list if _n == 1 + +In pandas this would be written as: + +.. ipython:: python + + tips.groupby(["sex", "smoker"]).first() + + +Other considerations +-------------------- + +Disk vs memory +~~~~~~~~~~~~~~ + +pandas and Stata both operate exclusively in memory. This means that the size of +data able to be loaded in pandas is limited by your machine's memory. +If out of core processing is needed, one possibility is the +`dask.dataframe `_ +library, which provides a subset of pandas functionality for an +on-disk ``DataFrame``. diff --git a/doc/source/getting_started/comparison/includes/case.rst b/doc/source/getting_started/comparison/includes/case.rst new file mode 100644 index 00000000..c00a830b --- /dev/null +++ b/doc/source/getting_started/comparison/includes/case.rst @@ -0,0 +1,10 @@ +The equivalent pandas methods are :meth:`Series.str.upper`, :meth:`Series.str.lower`, and +:meth:`Series.str.title`. + +.. ipython:: python + + firstlast = pd.DataFrame({"string": ["John Smith", "Jane Cook"]}) + firstlast["upper"] = firstlast["string"].str.upper() + firstlast["lower"] = firstlast["string"].str.lower() + firstlast["title"] = firstlast["string"].str.title() + firstlast diff --git a/doc/source/getting_started/comparison/includes/column_operations.rst b/doc/source/getting_started/comparison/includes/column_operations.rst new file mode 100644 index 00000000..b23b931e --- /dev/null +++ b/doc/source/getting_started/comparison/includes/column_operations.rst @@ -0,0 +1,11 @@ +pandas provides vectorized operations by specifying the individual ``Series`` in the +``DataFrame``. New columns can be assigned in the same way. The :meth:`DataFrame.drop` method drops +a column from the ``DataFrame``. + +.. ipython:: python + + tips["total_bill"] = tips["total_bill"] - 2 + tips["new_bill"] = tips["total_bill"] / 2 + tips + + tips = tips.drop("new_bill", axis=1) diff --git a/doc/source/getting_started/comparison/includes/column_selection.rst b/doc/source/getting_started/comparison/includes/column_selection.rst new file mode 100644 index 00000000..071645c9 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/column_selection.rst @@ -0,0 +1,22 @@ +The same operations are expressed in pandas below. + +Keep certain columns +'''''''''''''''''''' + +.. ipython:: python + + tips[["sex", "total_bill", "tip"]] + +Drop a column +''''''''''''' + +.. ipython:: python + + tips.drop("sex", axis=1) + +Rename a column +''''''''''''''' + +.. ipython:: python + + tips.rename(columns={"total_bill": "total_bill_2"}) diff --git a/doc/source/getting_started/comparison/includes/construct_dataframe.rst b/doc/source/getting_started/comparison/includes/construct_dataframe.rst new file mode 100644 index 00000000..4d066c79 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/construct_dataframe.rst @@ -0,0 +1,9 @@ +A pandas ``DataFrame`` can be constructed in many different ways, +but for a small number of values, it is often convenient to specify it as +a Python dictionary, where the keys are the column names +and the values are the data. + +.. ipython:: python + + df = pd.DataFrame({"x": [1, 3, 5], "y": [2, 4, 6]}) + df diff --git a/doc/source/getting_started/comparison/includes/copies.rst b/doc/source/getting_started/comparison/includes/copies.rst new file mode 100644 index 00000000..08ccd476 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/copies.rst @@ -0,0 +1,23 @@ +Most pandas operations return copies of the ``Series``/``DataFrame``. To make the changes "stick", +you'll need to either assign to a new variable: + + .. code-block:: python + + sorted_df = df.sort_values("col1") + + +or overwrite the original one: + + .. code-block:: python + + df = df.sort_values("col1") + +.. note:: + + You will see an ``inplace=True`` keyword argument available for some methods: + + .. code-block:: python + + df.sort_values("col1", inplace=True) + + Its use is discouraged. :ref:`More information. ` diff --git a/doc/source/getting_started/comparison/includes/extract_substring.rst b/doc/source/getting_started/comparison/includes/extract_substring.rst new file mode 100644 index 00000000..1ba0dfac --- /dev/null +++ b/doc/source/getting_started/comparison/includes/extract_substring.rst @@ -0,0 +1,7 @@ +With pandas you can use ``[]`` notation to extract a substring +from a string by position locations. Keep in mind that Python +indexes are zero-based. + +.. ipython:: python + + tips["sex"].str[0:1] diff --git a/doc/source/getting_started/comparison/includes/filtering.rst b/doc/source/getting_started/comparison/includes/filtering.rst new file mode 100644 index 00000000..8ddf7c0d --- /dev/null +++ b/doc/source/getting_started/comparison/includes/filtering.rst @@ -0,0 +1,16 @@ +DataFrames can be filtered in multiple ways; the most intuitive of which is using +:ref:`boolean indexing `. + +.. ipython:: python + + tips[tips["total_bill"] > 10] + +The above statement is simply passing a ``Series`` of ``True``/``False`` objects to the DataFrame, +returning all rows with ``True``. + +.. ipython:: python + + is_dinner = tips["time"] == "Dinner" + is_dinner + is_dinner.value_counts() + tips[is_dinner] diff --git a/doc/source/getting_started/comparison/includes/find_substring.rst b/doc/source/getting_started/comparison/includes/find_substring.rst new file mode 100644 index 00000000..42543d05 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/find_substring.rst @@ -0,0 +1,8 @@ +You can find the position of a character in a column of strings with the :meth:`Series.str.find` +method. ``find`` searches for the first position of the substring. If the substring is found, the +method returns its position. If not found, it returns ``-1``. Keep in mind that Python indexes are +zero-based. + +.. ipython:: python + + tips["sex"].str.find("ale") diff --git a/doc/source/getting_started/comparison/includes/groupby.rst b/doc/source/getting_started/comparison/includes/groupby.rst new file mode 100644 index 00000000..93d5d51e --- /dev/null +++ b/doc/source/getting_started/comparison/includes/groupby.rst @@ -0,0 +1,7 @@ +pandas provides a flexible ``groupby`` mechanism that allows similar aggregations. See the +:ref:`groupby documentation` for more details and examples. + +.. ipython:: python + + tips_summed = tips.groupby(["sex", "smoker"])[["total_bill", "tip"]].sum() + tips_summed diff --git a/doc/source/getting_started/comparison/includes/if_then.rst b/doc/source/getting_started/comparison/includes/if_then.rst new file mode 100644 index 00000000..f94e7588 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/if_then.rst @@ -0,0 +1,12 @@ +The same operation in pandas can be accomplished using +the ``where`` method from ``numpy``. + +.. ipython:: python + + tips["bucket"] = np.where(tips["total_bill"] < 10, "low", "high") + tips + +.. ipython:: python + :suppress: + + tips = tips.drop("bucket", axis=1) diff --git a/doc/source/getting_started/comparison/includes/introduction.rst b/doc/source/getting_started/comparison/includes/introduction.rst new file mode 100644 index 00000000..aedf2875 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/introduction.rst @@ -0,0 +1,9 @@ +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows: + +.. ipython:: python + + import pandas as pd + import numpy as np diff --git a/doc/source/getting_started/comparison/includes/length.rst b/doc/source/getting_started/comparison/includes/length.rst new file mode 100644 index 00000000..9141fd4e --- /dev/null +++ b/doc/source/getting_started/comparison/includes/length.rst @@ -0,0 +1,8 @@ +You can find the length of a character string with :meth:`Series.str.len`. +In Python 3, all strings are Unicode strings. ``len`` includes trailing blanks. +Use ``len`` and ``rstrip`` to exclude trailing blanks. + +.. ipython:: python + + tips["time"].str.len() + tips["time"].str.rstrip().str.len() diff --git a/doc/source/getting_started/comparison/includes/limit.rst b/doc/source/getting_started/comparison/includes/limit.rst new file mode 100644 index 00000000..4efeb4e4 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/limit.rst @@ -0,0 +1,7 @@ +By default, pandas will truncate output of large ``DataFrame``\s to show the first and last rows. +This can be overridden by :ref:`changing the pandas options `, or using +:meth:`DataFrame.head` or :meth:`DataFrame.tail`. + +.. ipython:: python + + tips.head(5) diff --git a/doc/source/getting_started/comparison/includes/merge.rst b/doc/source/getting_started/comparison/includes/merge.rst new file mode 100644 index 00000000..b8e3f54f --- /dev/null +++ b/doc/source/getting_started/comparison/includes/merge.rst @@ -0,0 +1,17 @@ +pandas DataFrames have a :meth:`~DataFrame.merge` method, which provides similar functionality. The +data does not have to be sorted ahead of time, and different join types are accomplished via the +``how`` keyword. + +.. ipython:: python + + inner_join = df1.merge(df2, on=["key"], how="inner") + inner_join + + left_join = df1.merge(df2, on=["key"], how="left") + left_join + + right_join = df1.merge(df2, on=["key"], how="right") + right_join + + outer_join = df1.merge(df2, on=["key"], how="outer") + outer_join diff --git a/doc/source/getting_started/comparison/includes/merge_setup.rst b/doc/source/getting_started/comparison/includes/merge_setup.rst new file mode 100644 index 00000000..f115cd58 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/merge_setup.rst @@ -0,0 +1,8 @@ +The following tables will be used in the merge examples: + +.. ipython:: python + + df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) + df1 + df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)}) + df2 diff --git a/doc/source/getting_started/comparison/includes/missing.rst b/doc/source/getting_started/comparison/includes/missing.rst new file mode 100644 index 00000000..341c7d54 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/missing.rst @@ -0,0 +1,31 @@ +In pandas, :meth:`Series.isna` and :meth:`Series.notna` can be used to filter the rows. + +.. ipython:: python + + outer_join[outer_join["value_x"].isna()] + outer_join[outer_join["value_x"].notna()] + +pandas provides :ref:`a variety of methods to work with missing data `. Here are some examples: + +Drop rows with missing values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + outer_join.dropna() + +Forward fill from previous rows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + outer_join.fillna(method="ffill") + +Replace missing values with a specified value +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the mean: + +.. ipython:: python + + outer_join["value_x"].fillna(outer_join["value_x"].mean()) diff --git a/doc/source/getting_started/comparison/includes/missing_intro.rst b/doc/source/getting_started/comparison/includes/missing_intro.rst new file mode 100644 index 00000000..366aa43d --- /dev/null +++ b/doc/source/getting_started/comparison/includes/missing_intro.rst @@ -0,0 +1,9 @@ +pandas represents missing data with the special float value ``NaN`` (not a number). Many of the +semantics are the same; for example missing data propagates through numeric operations, and is +ignored by default for aggregations. + +.. ipython:: python + + outer_join + outer_join["value_x"] + outer_join["value_y"] + outer_join["value_x"].sum() diff --git a/doc/source/getting_started/comparison/includes/nth_word.rst b/doc/source/getting_started/comparison/includes/nth_word.rst new file mode 100644 index 00000000..20e2ec47 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/nth_word.rst @@ -0,0 +1,9 @@ +The simplest way to extract words in pandas is to split the strings by spaces, then reference the +word by index. Note there are more powerful approaches should you need them. + +.. ipython:: python + + firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]}) + firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0] + firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[1] + firstlast diff --git a/doc/source/getting_started/comparison/includes/sorting.rst b/doc/source/getting_started/comparison/includes/sorting.rst new file mode 100644 index 00000000..4e2e40a1 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/sorting.rst @@ -0,0 +1,6 @@ +pandas has a :meth:`DataFrame.sort_values` method, which takes a list of columns to sort by. + +.. ipython:: python + + tips = tips.sort_values(["sex", "total_bill"]) + tips diff --git a/doc/source/getting_started/comparison/includes/time_date.rst b/doc/source/getting_started/comparison/includes/time_date.rst new file mode 100644 index 00000000..fb9ee2e2 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/time_date.rst @@ -0,0 +1,22 @@ +.. ipython:: python + + tips["date1"] = pd.Timestamp("2013-01-15") + tips["date2"] = pd.Timestamp("2015-02-15") + tips["date1_year"] = tips["date1"].dt.year + tips["date2_month"] = tips["date2"].dt.month + tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin() + tips["months_between"] = tips["date2"].dt.to_period("M") - tips[ + "date1" + ].dt.to_period("M") + + tips[ + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] + ] + +.. ipython:: python + :suppress: + + tips = tips.drop( + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], + axis=1, + ) diff --git a/doc/source/getting_started/comparison/includes/transform.rst b/doc/source/getting_started/comparison/includes/transform.rst new file mode 100644 index 00000000..b7599471 --- /dev/null +++ b/doc/source/getting_started/comparison/includes/transform.rst @@ -0,0 +1,8 @@ +pandas provides a :ref:`groupby.transform` mechanism that allows these type of operations to be +succinctly expressed in one operation. + +.. ipython:: python + + gb = tips.groupby("smoker")["total_bill"] + tips["adj_total_bill"] = tips["total_bill"] - gb.transform("mean") + tips diff --git a/doc/source/getting_started/comparison/index.rst b/doc/source/getting_started/comparison/index.rst new file mode 100644 index 00000000..c3f58ce1 --- /dev/null +++ b/doc/source/getting_started/comparison/index.rst @@ -0,0 +1,16 @@ +{{ header }} + +.. _comparison: + +=========================== +Comparison with other tools +=========================== + +.. toctree:: + :maxdepth: 2 + + comparison_with_r + comparison_with_sql + comparison_with_spreadsheets + comparison_with_sas + comparison_with_stata diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst new file mode 100644 index 00000000..4792d26d --- /dev/null +++ b/doc/source/getting_started/index.rst @@ -0,0 +1,647 @@ +{{ header }} + +.. _getting_started: + +=============== +Getting started +=============== + +Installation +------------ + +.. panels:: + :card: + install-card + :column: col-lg-6 col-md-6 col-sm-12 col-xs-12 p-3 + + Working with conda? + ^^^^^^^^^^^^^^^^^^^ + + pandas is part of the `Anaconda `__ + distribution and can be installed with Anaconda or Miniconda: + + ++++++++++++++++++++++ + + .. code-block:: bash + + conda install pandas + + --- + + Prefer pip? + ^^^^^^^^^^^ + + pandas can be installed via pip from `PyPI `__. + + ++++ + + .. code-block:: bash + + pip install pandas + + --- + :column: col-12 p-3 + + In-depth instructions? + ^^^^^^^^^^^^^^^^^^^^^^ + + Installing a specific version? Installing from source? Check the advanced + installation page. + + .. link-button:: ./install.html + :type: url + :text: Learn more + :classes: btn-secondary stretched-link + + +.. _gentle_intro: + +Intro to pandas +--------------- + +.. raw:: html + +

    &&}iSb*ru6wR}UY_{w^x#L#&BTq6*r2gS<>Mj8i7FHGKs0+w%r#k@FSn}| zg|1qB=F#k)L8!coj~=v358TZAU)J&)B3we#ohW`NSJ!;@dR zRVK)!I(|O*i$iFkYggId;nB z9owp6YtnUJG~Lv_^;HpJ8eCNukpS;SQf>;@3Tz)Io>N>X?z0dDK#Qw&lDDZcg z5DtW3<59+%2BbRCq2R*$icXNb^JzTfaH$KH6zYZ)7R=TXZDvDwcHsR<$R$-R|T&O8v#=F~@JFRjX2ZUA0anlBi6Jp7X=K=JfFX zGiC;Oxj7&|#FV0Q7x1Bb>h3M79pAjx3Y$pMeg{(P7vcPJH@(g0ff+xvw}NrQqfy)$ z+MHy!TxDOdaH65J#n4~YG`|yh)>5mk^A*P*9`LT%WRcsgAb!?D>piI5n}&Az?fLR{{-p8QD)-Mr$IHzuxiqVX=qbl=OV zvhi!LRe1G>$li5j=Ez#<@ zsjx`CKl~13;=E-bkDwQ9)^B--5^!;0BtCPIh$wKgE0jZNCvjt@u-Fz&1+v$n1BPB7 z3g0XIh~v`81!fdeVlp`_rpg*=FA5;vN%ssNs8zk z5@BH+aG0EJA&W5wTb|zL;?+L&1jJWaM z7nv~XIM?_+ZYZqEhOaCD{}~w2=93IFKW_CDg`!S$#>W3jQ|DNi&8a_AnTbiDAiG0S z>J2-+Nq#{L8WU);Q(+$ugbD9dP9MV!BmJ2#2m7(|+1NNz6nl^1S6d*N1|4XRu>o z;gT*ZlR01%IQU)RiADVjiGb(ncs;93@6W$vI?R8$M0@XsevMm;RSQSuhjArLbSwo4 z;=J9oDwGS9#2oS3+dw2 zh@*3#QWpw=IDy=m`{T)CO`Z?9`4QjUkXz9yRS2(hzrl=}X)=;aDOwuN(uWO2>rOUP z`n@sp4@tD6655Ju8DHO3=_L^37WYBbpWv5h!S~YBR!=I;udU=OHoftE(k+wi`)4^azG7aQD_qr5?WiaDSqNfuP2b>$mmBuezGo#BX+(^LCF0 zN{7DppA~BLG+Zfh_T|*KHk{5+ynbqY^t0bw)f}FUBdqU>5&1`wC(;iTM9%ZyPDnK< ziBWQ5UAK%8ZwzFP=WZ1zNVqL^DkrPXQ8caIVx}qoB;!N=8Oy^h8E(F!o`LUVjTcV` zb82Zzg7fmT@ZZLc)4M*u!nTGjuEDD8w$pLjPmIbOlU{MPww#ZGz8b)Bw0`w4c;uk< zXk(YX19s)z&35--;%-+el!D?Lx6~B#|0kPLESYghNr_kLGj0Mur!yj7RDx(_=vP z%>6Suj1gYUkG<|Zj=ms%kr?;5UDx@P+X<^Z=5aHDKA1^85zpkC&LfzkAIsqM4Ji_S z)*^mxE8I=w$c>-8%-T)RkFEM8Err|UwT9vQ@xVf4r|aQJ9LZGGTnR5;)(V${5dyo+ zGzogRq)upP^*2 z;wHv`i3@WfJUnKUPC3wK$Wsdq0#$T@HEs8xuv|q#)+GQzUD_7&S&#g+SN7kJK;%{V zBei=)oe}kbXxwE+dbA>T=s1WCE{aqOLK026X%)j*-M*Dg0bv>*jd}rjeaB8sAl|JO zUgz|Ui>^R{)geCqhu$1z%2&eCH`3ZLZ$J>Qm2|zObhwsHrwKKukEk?jZ(!@Q;?!&`KZd=3Xnb>)AmXT*aA)!DHtVyZMH>Xms@zSf^zJ95u4r=A?V~FJWie>5bjPtQ;!A&8f7ecSgf+N zF(#)f#rG0Z@X zZU%O^N&XG+i#;#am!YRjKEwClx$xW zFt2l_L9xYPx#wwL*qdjQYdl$X?{BAIT_2wmE z0f=r_;=uK0%foWXNT&|h4Hc|r8$_2x*tkZ$7}^dU$fOF zE+eEBBjKLc+qP8Rg4P0Gwv|gn`n9wTGmk5lzditiczesP$l=zK&tpMKs>O9+=xpO? zIPkHW0gycHb0j0rHO(u;g;)omcvOinJaodku-bBIMGG4IfcI#KB>3U&X8Hbcq&4u2 zYv3^Jj|mg5?Zv))HOKqaFqHY{p@q%W^FU<$jo$qHSI-&;Ud+I6 zQ+HFLc4*>IwuzF7__g1&xUl;tq}RBsF45RIkv*ds7Ny1Z9=pSxztRD5IjzNa1l|AC-H8ozENV zt(|lteJv4Z39g@GguEN@i`~Vq()NGZgigRf@aoR7r_E1>V%ShCdoU|8>mfd865E%1&QaP> zeZb}A!Dqg}R~K}wx){WhS}XfCCO&m5D1;4IEP1Peo!X*%h&;avBh@IC-4{jY2Zo6( z@|8r2vJCwpx!{8SSEPvjy-^IW#Yvp<>mv(sr~5qDr0d>I^%%S#AQcCDho2G_{L{IW zY*i$a%r%N0L#$O|3!V}VQv}}ITcL-HH7X(D395xN$nb^nb@hGW5A%@d-0FXS$GGO8 zz=GnK@A)_T|6elGE?TAo$(^h_0bjfnAA#A(Mec+2gGh$x14X0d{%nnGD~yVwp!Pt` zXI}ItW8%-ICd4R(u9zCEH5yo7UpA&ZCZs1$)M?8Do?Tj@ zal_+ip&!9hXyHd^EfshkGLqUw9e>K;`l*~Rcch$pH8&O_EagGC&k z3jCpJ>n&R~@(vb;zdKCMh(dyQhl8c33rSH!Be^g*>?q8VMv{jEQq>CQO^ELcPl$kv z|GAo=299LpcUyp-REmFWZ!{PMK7b4Cy`)Y#_4bBET2^HXa)g&4;PGEb?;of-83)Yf z>rE}KOHn^1;)ju=p{67h#s<0DxQKbRlJtlEstT+>VfAgvS1`2nVQ8peyIpAs#Iu?fTFno#3o zFGSso3%d5@5z!!(mx09(4`zxCjlgj!nBpnv@e^TK)Wnf^Nf;zQHcrHz0^SbHu6LSR z@qW5SN^1Fi*Y&#;7JuM#|4^d9ql673t94o$a*gXwe4X)J*Z4^d^uP5L4IJA!8;lVX z21>Xf+;cHLUs>-C#@AO;v_fJj{>0!++R)AD9VWB zcxaHJ6`EfSH4P10!f_zgheK;M5K1%xkK+~nOFQzSR$Ei}UbeS|F56~9E#nBn-aD^V1IrV{3b9P*UWZ!SCj5K_4**bR>jlYzokIJ z(_VR=t~jtd@5;-*SE2sm>bTWYOpG22ZOgatwh+Vfvk5gZH>vt|{&8+h;`+t_L*Inu zlu#^<{5$RueNgr%&vamm?SLBPd%ob1j^9X&Mp1`RP~?iNC_$dWDjiiL#-f-}#c(cH zvr#Qk%DXMCHR>gug7)KwakB^SJ1b7isQ;lKcEdbArQ4-GuiG3ejGd?$+92C9)RPpl z^C#bOm`0*RoJe=_9S%(22(RrXWsFcV?Cq4T@pdFLI?3xq@azoExE*fl-aWZjxi_sq z={;sIgfuXO2;+6I%AfMkN;icq@SAMYP2%I>IISFwnmo8CdlKx(Z)KD{`YTH2X3e|<~s3|=agYf_s~{) zBU|w*FXL^0hR_0Iz}Y-|^yVHPDK5R!ZKO3pic2(NQup^;KLPhs_51iTf=YKdZUAqJ zX?v7K;X@@<;{O~*Iwy)#r<1}S)-R6Jz25LW11U?ro_FhMQAq1e6gL1mO@JPMHPg|* zzUwG_ACME#HZscFnQy4u8OriANMLkVo%w0yMRU990pS$Q!Sq7WMj`2rpg&^!V_Yz0 zg4ynfSqhGd9wL9<7G36#a^Vvc%8Q=@&jhb_1kvgUWn6k%x)0m5e{eF*PTg$qe;+6C z*jLgI`rTu1d2HHYzu7OZt@!)kM**7^3n!dtGKHU_1{;j@hA)~3L3ErCU1Gh~;hV;= z*YN?eMEsD~Z-3@*2i33tFF;8u@(~|46}XAH#?{+$YZqQN8pD{Ay?)2Q)k_;$1GYn` z0MJJfPYF~IK0#1Ix+2M9^yqxV@F=6%sC)u4?5Vm9Eiq78J<$LYrS~Z0|NmuWTi?}# zQ-hW9`*EZn)KJe5H+;9L3M-D=*bH@BBrm>i)-XOna&K4VJJw7JKdgD%d!eHgXCxu4 zd7*k;hTr%9m+}C#Vo)p038-yXCHrWQjwC=$W*U^u@VQ|x!#VZ&dq2jtNE76iNiYJ4 zOD@wDJqIVW7Ub^ElmQ+WbK`-<+Mi_^oFJLthFox1v^H^=!<_3QuSfB*<5M{wmT@wxgaay&tEPUc77)r$2$6 z7KSas6qUA7ugG73ig(6w~+4t}I0Di48))f`o`3U=$>F(ACq?E)kpWWJ2 z93DzU`@nJO-2I60e0>9OjjO)FV>fzorJyY2@frGwOVk?Wl5uw%BQs+?4)cdPPe5E})$P$r%G_VsJ8duE-wFRNN zIIQ=Fd}|Y}IvIFb$1%X$Rt&f*cJ=%wX-)}*T;_4UW^)FS0^@1+z4U-ECeHm+Fzy0$ zW2-^%g4z9bQ_(2wkCvx|)u3uVX9cFn$<{Lr%hBZUINCAvh?HSej)txnH0iRf!Jf-c zcyH~Ogv*!#mthYS>-WJO$GI6}QJL?}q91BK`_~(CP}^iTh?&rIA7sTMJWO1;|ug^miM2l*;e~4a=!yG zii-%B^~Oqn(u-NTf6(_;8lkOHC|#TY<~doj1Ko0s{@njV5D($OT?R7gR93TVq%udI zvTAL;-L%=X+kD%e6&X#kt4`8iNn+OKN5vzpF_ zwo$xu8MK!Hh9aJY&p^vyr>z@Lg01qnm$448*P5;B_I}QPfxh3&fo$})ZY?xSA^-x} z_iKIXAsDY;ZN7p~Q755sqqR!;2D5n|k$r$(n06n=J{v*#?%#C1&FBJxST~o4s28-P z_0o1xALi=10 zKe1O$S=&RwH7tP@26%{^pauVz`(-)0K@FZ3v3GNBKdW6?oX*Y9Yr;ptG49XWBSqfK z<8LF)j#-OGbr)K8%D6ms6*(^fg;QLTPfJabDlZ$PY$+J7O6*L%d1~AhAq#>o$v#tZ zLp(E9YzzP%KUR4$1HujlunxU(^k#0y#8Z>kY)^2ufo8-~E70i=YRX4?7DS6@%%96m zN0td!cYld%kxGHX@`l%8aosxe{d<2X;ffvzYIIx|U~J+301$cy_csd*XXQ{sjE>I*khBnjhou827GE<*u0K|9&EwhwyBK zDDS{xL*T`+#TU+!*^K`USIy(^%_DuIhzayGl%x&xYx$A3u^vrPP9ME6W9r$ye0C!LdSRuRF!d5%f_bqajb=M}l5G5g}IF zMkxiCMU#mu`f!LmbE6DM`-71vr)qu_1sgi}VuL8s2(Vw~7F+xpbU$<#EJSv@Lb3(z!zfakh()19-+ZAe$0B&kQGdg}W#^GA==O-k9mD+d-s`h0{OIa#$|U28ajHd||A(c!Jm_^?zcsatd9CG2ea3ouHGX9v!})3G z^edN^2(Q3-obo3R7W=kh_6RL>JRWPU&jHaba$|iv-&brs7~Z`%3f@NNvsM2 z#)Ii;=i~J%jK!5P1_E3C$sD7{m4Q-s+>%!mdNE^1J8_O5-O>toGtDAY{8wlo=&75w!gwAQr42u4K$v(Af%bFM#FoL?@1SVe{9$70UQe~s+3*yX=u+UlhjY0^52~{%@ZOQQ^H8b4OIpOc51guuT!>!a zARS98cQqzUFBMU|EaLj%nXk@nBvj+!%(8Y&c=UQrre-~7j`>Kt7aMow&k2_d6DG^9 zrjx+Jl^eM~A1^t1|1I*7Cv9#J`2)fU^I~e7i{RKI|ukOBfFXEjziB%9hs_XU!+^NP!_pF3MPsZRVMZn?->7Y*M^+)rims{b% z4v8#VjqWuxAU!m8p!0U=&HfzKE=y!qYtlW6aktxAzlNh(tEU{~CaP)0dvJR)+b)d> zxKbqRC?N7A-nCB|DY%^OdW;Y>?0$5>8nMFR_7z992Fhk_I6K$eZ*W-%J)hTJ&Nz8I zgAzR6O`14vYp|LT?lJGZVbwJ(Qp7s@bK$3T$D&Z6rg>ws=XW8eNH_O>QTyrnd??Ope? z-S~4kg?U!2g5-r?junj=YacwqmVT>h{?hH6?WXv%kkcu_HC`@$4Bqn1BhIWswlFtlt{KH zC3vE&#b`3>*BsPnf>By=V0%2rzG)?s2DzYo^c$T@3B1bIwMYciaFVJ z{KrpRQRQ~5J}7us;I4nv?77^sP{wJ^%gwH5^s*ATVQkCE5lBaNj~9jy!p|pO9yG4p zJO0Yp#xuurY`Xo;I+q<#_*#UU_S#aUlAF4g@811~zPn)w`zp7q<5OjYiMsEv)e}W-c&2SBB@1vy_orSV<0m{H?gTcc zu0f7LK?#}(Pz@8RT1(J$H{f6jJ!_aX8QF*hCQL6<+9PYd)rfSiY0?_3US{u$$cLNn zsq9{X8ykZ4LH!hq`lE#aUckZ9_m_kg2Z7m!S0G4)aI!->lp(xgUWMIWjmogrEIRPUs5_Xnp}) zm&+ALcuJ~5Stgg` zsVJ(b-9f{-s-wh3Afe4+;JegM-22&~VAIhdS=Vu2qPB6==e2icmdFW?z~KvGV|ljE zIRy1GS+DL;HpnKqjwHXCN8`$4>fIPJkKlI)hK9L{!CYs&^>pA^&T8VKT$mcm$#P$3 zOp9cdAm9YXU%s4?c0@ZOnJF+(uc}9-NEbnK&;Mgc9 zwd&rwhbUjk?G^|>yLiWv;vq>#4`3A+$P&ws)SO_rj45#o{ETwN)tmw}$8e!eKlihp zi9x%5v66OJ5Z&vl1ts^(B7&)&A4Hc9g{XlmzzVrz3u!VxCxe(z=jeJC$Ek(bA1 zZn^P1YYjKiS*LtS^Acpe4@EwrKa0-bWXZRLn8WT>uSD#FDEe;KJ$}zTg)wU4163PUG#eEYEJdp7o)f-^%fbTgtofw5kPsLLt2?+Phn- zs%rP|8BgC?O4b83rqU}368sLs{HKG~G|Rh+f{R6G^M)((%NrjDSX;YAS1~-dxaAGT zgu*ta^b9dqVze#9>(x^B6@pNd^~vX{^MzQyyj#pyNZp@+El7>|X8)IYml=olo?hf{ z5kfz*z~P+)4Ihkmv?He$%jwHT^eFH$tGns^z}F{lBZg%qwqpI2=-b|yfe23Y#l7hP>2EjO;MF;@doWV8Usi5(EO8}JRvpw4 z7$r!Y4VrTi65#y-Ej*_2-{H+CWdZO zM8=PO>5(JFlw~J-6cUZFI8#1fCRx7I1Q*6E8a zJ2G_d45v9aK|txofEYY-bl3|Kdc}TsaOki}EC-p8$qMp=30f2d~P!DC7VQh39m>1iSNC`w5u$4ApI<=U%r4sO1t(Z5S} zRZ5~yjW*hU*7GZ_X>ytL;bz6jIXZ6zhVtf^k@Z4Z=fhd5E+`y(b8?SN$!#cz-mmh6 z>7_6oB<6QPwi{1aUh+(e$x|0YgP|qdancgxIq$Zlj+SM+a(IvEBA3?~bUZ(G?M)kL zw9(748Ven?Nf6qszJyQ0_;pZV_NB*t@QU1M$SB3P)b)t^q=1n4cO6b6C4|V06pZW6 z4B74Q1dq40BQX2_3at?+nv1w37^r!Ei#ZbTA&xqAhZIoPcDp$8vGln2c;F`@=J>Iv zvc6D!RoC!XCQxJ~RzX!v+2+m5=X(_jPL)4PocC174=jvWb>SxC{tS`hUmcmOvJv^n zsl9X`7R{qw)w3emAs6pK7tMG1PL8@0J)&f=9a}=wVOJl5#U3%xolEwqX-pe|Nb4$^ z89p;t@eN-RS-WBI@yrVC-Itt*;?H0uu<8;>0Mm#)H;l@_-!@Fz^&u(i ziMx7fSNHhyiMd6K!AfOk792fwc|t+ z$z#@vdq+uVjgrHuwf#I~@EPP5T|uz7*ETde+&HC>ug5iii%nWGCa9ivOX?uza^%Pm zv%uuBj&r!Yj^nc8GpyqLdB)UhyuY;WKElTd%bCjMqcvf4+x{86Bdu$EeRYjj)j|tc zj@;A1f`Pga-D`Y)PmbI=>4O+qf$oh+`W9un+N<@rR^cmWPU$Q~mJ<7^yA$vV5yt4$ zO&)YPoMbzlgE2KG8Je5C+`Wz~Hvz?0EvN&E^%FtXoXB@BJrcM8JD`X@F962Xw-FU| z-&mV&?Ta>JCMU1M2)KH~Nd%^CqFQ{dbx0(=+ox<{6XP~^*%x`bTw-?x|S}n|Yl=bXg<0 z$)(OBY<_C|0IZWSPCi}28|8BNFrpXb5%z=8kCWjIjsOh$sE0pu_qyBEght;l41mWc zh3c1ug+-P8@U}j`aLCVKDLm~QHvxgK;dK?%YgwZJ7Q$XFyAJFZX+Bu%tk_Nd&&&Nu zn7t+aL*XKp7h_3v5XiwBHiMj$+UGFwWj6nIR-N8}XCjj){AG%B*d+^cRV>wgUzj_o z<7RszUMKyoEnW}Dv^kZ(>_4dgyoXNixLc;YyDT1hSJDrJeG_eODLkXV)_O&e;S@7@|H+_D?=bZn)tIyWdVP}* zMew7QNz_>zM1tua%jB0SB4NSG+)d;!J7{>0^?JE;ipFXh3wPuT{r=ufXC|S&FGTSp zB9inCGydqaaza&_oR-*!pVTq-Q_g9LXcs_nS?xTZ4J{2-T;8K~i4%b~?n54rvVg_t zL;a!19tJp;R{ltom#j33YnDx94oF>x<{`Z+qi}hp`yGVV;*@VwUZ(}or4J03D7`D~ z_|gn)rzKPe;AU0MOWUIO$9A0f$N76$RQ{oiUm-CI(+Eqx_MeUs(7|*PKP8tkjO_N4 z+UMiw*WOYvVaL+6q~7x13}t8l>qn5%8_Xw1U>@F687!tbooh6n6G^wcE++SRM!5RQ zl#T2{$esTMPC9;QhixFVKV(VjiN@i_$c) zVB4_pB~Z>%T@;-5M#HT;16a>iD~if?p)FQo`%Uuw*@bk?)4|8b3LLzx+}(p;s0VLA zqv7t$_jC&_iO>*Qfw|56-tNvdjqQ;ip(b~mZISvt2;P@dx~ov$DhL{t4|GL%_^t+DU_4tvI2zW_y#-2gE%69ILbN%86PG3D%LTRKN#v+j%I>a6Ai@AY%h z{N##Mt5e1fQl=R=qG8pny$NA2pG?pp2JxmOxU{SJ!FgbMR&~e*J#v zq+T?4!$2jPzAxpv;|oJM?wYX0%2w2-(iEkJIMSaXg5n5#cT|6n9X;_E)5oELj3Y}! z1yMKBcSr*AR0eUpx1Jni(;AAfv3$8x)Y@l|2{ftDN2sPoMn=<>a?$m0TVZ{2w>9k? z`YA+QzoQ&`#>Jz^jt@NGv=(8E7w)G?x$Bk2_@?4<60(y~+9Ev5fe zl3kfDw{WBN>)2DiOML1D+nncf5`@;P5tmcw-rI~v(%Zi)5`_}{&UzZEw)bAXis3&C zV5cY{wl7|(?*n(VGdB5fLKQH0X&cA`xku$trYz-q-c-l_Np|azcIgtJE-tajXhiW# zJf$iuwvSJ@(764nSWXll)JG%va6i~|7dNtM{GMEw#e0q?kzacLx_8nwC;U#Mh3}lM z{~JF3L+Z5S`E@ws5oSwXEeJYJz934-#@Z& zGJZ#m#eq{^J^kvBx`6I292;cn7I`Gt*kC#(t}dKDC(3Q8aOg*_%~3JJ8Rzz3NAshK zu0zng-ImnS;7sIYD5%E%uGPHj)kE$yNsHn(95hwd(T!%x0+*zNbjDv-QeM3Ay=Hru z=mKHwS*t{YHas4yVKy%UJp$!%6i%nI8mNjbN^S)zn63VPQhJ-qQO*I3qg8FrFJ14~ zO&^ZypH1GLmStQI7eg=giBDII1}pmWyhrmDPAH?d@&gMPgSZC6irC8)Xcd&{GK}6b z7uoMk=L>p=7b{9^qIB)26+-HJL<;)mKUai4on#*|KD`o- zAk&JXkAg$-l^T7&^ar&uV%LL&j;fS?8BeMMuhYYfjJaP5sV?1KOqRVI6#d}QP3Vdz zbhBHb5UV+D^~3r_yfCUU_-Vd&gRpN@6j?@?^{7M|)~C~^x8xHH_mn$4Ke-Ls8F%=V zf%KvF$6?JXkItWf3lyEYiN@kQ2M3ZzIDBt;#S>h&OfJok_&w`H?-Z<#r#PSHY!5FLW@ z6Kkv8?TTcjOGkL3M!0g2g^sfBp9t;6MwmRMp@UkYUge;_YBm)^_o2+968`jEvOY}M z8V;3;k`qgARhgtWP-;RTIENoUdpS|@#7Lqmj;WA-E*T$_M$Q)c&N5+4;UcFCsvA3I?ckS`64mt^Lh9^@L5yoOz#|xCnA%Gw*`Jlc2&zK*>979WRWkjgG z1rw3~@=9$t_11r{`}c>r9}{j`mSW6mtM(fggSr}=*Cg6RNcs}S`*`nA{(M94IAA2v;v1z?NKk)ZXA4oe(W;hKIR=6tQ|PWK97#`4vpA_S^5;RV z47EJ=CQ{ci_ty&=63g_L&^KKTq2+{GQWHG$4pESx-ij0D8#O*^YZE6*#2HvIrycS3 zx``*~A5UNV_%U5pE8Qxe=hwA&f@Y;{v=EKq3zZ&gGmstt1K%MLv_%hgn|uC=EU#w| z&zCgc^tWS8LFs5Nx%T(`e0U3jTgG2s+KpT+l4}#+9}W$u8oWouf_hZQn&740%fQnU z5ky{MM>6I_r$|ZBlL+VWURJa5#=q8J(6r#952_wIq{@gUA5wukojvwEdb~IJJ!j$2 z1O)Hq7t8`)AGy7kZ%ADGCz@ee1E1+PG%WC8-qR*GtIu(FXn2RyQ}k4TejG>UAv=Af zV5q|MnHXVpZsJmqRFJL})vP(P+<+rHz(DiB5E<&2Q<00+8Nw|~K0;qdd@j7~6r$jZ zfEy!RSZ-Gn9CY~Y*7kY90Q{*O2y1j>!nj9Vxk7%0RFI2X(!BkVDI@$JJ`|=@gQ|W){`^rl#GYXHzVx#qS&Mnh$DI?}*p0|25})A@xxdvM9O_AW>yMEvnmT zQbv;9^9r$=SZMWze6vQ9d=*Fr^MrzlQZX<+UttlFBF$%39d)Vll%T`R@-r{3?jFZ&*6*Fc^r}wXLlB^947Oe?cR7+}94V@QFB8%n79I?o~r`s~|4J>Jn-1;4jHAKyQt);~UlHIVJ#k!AM%`rw`L$Kv=(0uGG|&kM4Tl@J}|L+2Yb zEQzcn)k%32)!TCLM5SFaC4zz+Wr;Q?fz2oih18o=4UBku0nz;b{^{K1!jT64-AU^x$s}hJnK$2US14Ku9YzGh1RN-Qd^(> z-aGu|X`ofKriUz-&Oh}FOfH;&NF9V0+>cs{r^c_q;Kv23YyI$gLfznb*Ke#89-H*i zQ9xUrh4Li1fd@~*i<^Zimkdu|bj`ZtAskJWx^JaAo6b{t z&i>hG7gdl%QW&Vu`KC|zemM~MB%sh&HUA(7(9(A{`D`aKHeBXTlyMrh0Y+j@fJ6$_ zuDyTzcKjy~ImW9`A3s7eInl+H%E$~3_|)+fO+Hg3C@6h6Q~n|)&YU!D+A0C3DH@I2ho4t6SSs?A zRIY5hVi+R+qJ9ee@U*vbiAL^>w|XSE-Fc;r*S!zpto4V!ZOy1#;DX!1@9A@J68mv7 z%#ix%ih8w`|KAC@@JH9Nd>46%S-M%VdK~uXdipmVJho$yKKL=xfx7^bL0)KWEXeGNAn^85cnvmpO*Z_22fdYxVIT?~eqQ8Q& ze;2zw{fOv*=u3}tOB>g?u4W<-1R{hO27fwX30&(nRZrW=3L>%B^*jow1A(ZObb_Sv zUus3!<|K*-31JdFm^F}dG?_yTg-G0184={*ZUsITPKR~5D!>8F_l%JG03!8s1sUGk zz@NL4!fi#cq1b@oUR-KdMA4xlK~7Kr|mZ`=TjjZJL4tAYXiJFWagPE#@0^4k6$6O`WKPL6|5>;+9(n+I4flJ<^~j^P(UP#2@Us zh-FvhalhLxi9o3pSF1qWQv)R%g6?I-J`koI?&(jnoC2*}HE_>Q++4+4HCBHlSVnmA z$sgVKSUM`@T@6AsNIK{^pmw6$H^DKOP#WRtNwpy@h@xA16aR09&y= z&9LXJ?SBH7U`ykJ`EiVKW02`5T#jxxypvm(Re_!;^MI+KzY#XF!|1a8i-*HLIf><> z8CE}q9mIF?-JYuSQx7Fv}+yh zo)dCA`aMHEw+j=b#>`29n*MyDM!ybqI=^rHr)!D2wQs*+5EBNGxSMjids}`^D~HC7 z3U)Di;^guVMW)tIk}ZZv{lk#O{A1u0@CuSK;3=h8o@O1lDR}#1`W88C4BTrhz4)SB zV9G64s?*>p*^%*ZYoa?2+F>LUgK?U{5c-+_M!x>6<7BXK*$9UJujYXiE>ql;Yk10f zInsr3ZSi^h$;Mt4rm8)WUOGeMBMIt2JyWgie6~X|q5Dfv zJ@0^b1LCCmjBppcqax%KkonT;2Z&&to3&_iG2T;?RE4#1EHx1+kta)3uo#JKW?70D ztau?Aj!tlk3-*1?b5Q9h&k1_MnE1Pk{W%Q3AJBETlns-!RI5rt$6tRBWIm%4$yJs}wXfdXM8qxrCHE^9AFK22Z?q&{JN;S1 zh!psct-@@e3vy$v>tO-mKB2>15=H|t&*bX3j@LOH#{ITH-I`tp1OTHsv6qj;Na)8J4CpGts$0@`MdEy=PU4!=1!5!Y{@;VZfBelS!67!#B7!; zyWv6G;f>s`mbUjk!H#@aq55%e5J0j3N5W#tT>NFgdX3qrc})bdY`Vu(nVueKHQI;b zs)+=sip2}RQhslCDEy3ArD*Q66FrEZRL!1E=pIC3wOoEw4}#_>uFC@Y4xu zsB~H-TSGVzXyZi*Jaa*G^I61JFJ1z)ds2rwm?4yrJ$vxs!Q%sa1HniI#f0SaUz@^z z&Llo>&|kTTJS?|Hl5{@lsaC=BdfveAmib8Mw8;~q!cFY4o3goE%co>uF1~>@9(v0- z2ZpGj`9geMag)H(RIv%~+s^mvY_DXOM&0d2@T0m){aIlm#uk+C_7c`u~nkZuo^qFuc&>a}4W@>~Y{IxBekP#^h!wkobHU&b;eqdfDiP|GOf_FLiauYwk(4CGkkv`Fb%>kds zBK7ZUDKz_Pi3c~7HPpgJFdTC9t&P-6jt{4kI`x{0Fl(e^d_xw`J$`EJWIz+UI8+9& zWHf)RM$l`!=6sJQl6!q}_H0Luhaj5noeMs(xTy*(TJx*R4WS481^Sm&G^H(Cjh=3w z=WCrTTtK=HO5&ewIG?uIzuz=P91!zo<9s+P{m@aL?CLtH@u3GlwdWKFu_PqdKQGzF z($9GItf_3U!90aw)b{_46xb}xCJf>K{W1_O_SG9!JkQ)v=LIj932uV#^8knbsiO(# zv-Sb-))qPnG!b7c^o}ot{7V5wN9?2cfcOGCG9CMR3E_;jHFrb$1yy$W7dwNE&Pd($ z*8)yIEPgW(kuy6MYnD1}e6Hxl#-fTFkqF1IHu`O~wn_qnhZ06)UqasAy#~^t>e*Q- zG}17XpTLgtSqL`zgcB5@RtH54T{KKv{ShM0l9SO;1$S6pxWqbOzYvWbmK)eGG-qm8 znF4WMfpmzh*3YYM%~*InN|=jv@F}R_MLBd#WR3%c%W3 z7J`~Pk@uFp3gaD81Ik^gX;P+dxXi%(muQEo*>cFc&>AMb30vvPbt^MRy6cfm%NC>I z>DsqSIT$baTu`15t-fK>B1Ws;%1^x=Q{4|3rsJNJ76d#A+WI{ob2%vvol=6=ITSiZZpiAPVgLWgy2_|1yR|KdNOwwiHv-b#hzN-2 zP{JSx(k&s~-7Vc9p@ejolma5%r7)EA@a<90Iqx~&FI-EP3iCXB-}jY|4q2@}o_m_y z-@@%grDHUWwPSU{6U5s_V>~CW5?N=+yu!1DBxrib2(FvlT%WpXN}lI`A=-97+HlX) zUhgUbIqCoFH&92J095wVtruH*6Vtrxp_)@|%@3cNYb3Uhy{AhrCc2o1vh7`;9?`2c zB=I!9aTVa@Kw8P*S^@fK;KaXawlxaIp}Jx<6M0N7)C{^V>>uqitcILq@$%q0F?XYp z$)EcGDf0?8G{h9=95KE`n|XsgVmL%$qUN*GLYn+vCsMo?8VSOPM4(Kdn(xRK0rO+k zM2>uawiR)f{L;h%AC>4lMklG`O0#RstYN`p{_olg zYqK_GtF?9{8?j22Y_=||RrF|AKtJjdEqPIF%ccE17)T^j_R^w&Zk>juI#rWBP2!CU zC1{>|-UoN0F5HBJoP5bt>^CB5jH$e@fOeNsN#I0i*3+Kuem@}w#koDcI0UVXmlA`X zyik<|+Z)17o}14Rk*y|+SA74XqAny4^%~iw?6*Ar@7b?|<*Rs7Y1-c&=lv z;n0EGfoy{J>T9gXiOs;H1WX=F7~Wk=r?jhlM9HrW$kSZ-791?XyJ`U#V8CCpT@TzyITX-EQmBm;} z|IA-+R^rn7r^QiH_ZCz7CB$`paIk!8a1bA*HwE5h;P^gv}?F_`cP8I(l8lJ=2 zY3BvTXD`t&CZiIk5GkglWMAhDkSMKbo$|Qt&^%r9WQvHC&fh+SQU*q=O$*^o8l%H8 zJc(H*U^ei>ppH~`z+tDt$Z(!1(xhqIhZjzuS*|(SY9iI{-M4}Kv=B9;^1**tcyWR` z?~hw)H*0Iu81&jff@DDuErLqy~rwd3RZuC zfY)!_b=D*P_UL>yAB;PYNwWmp;OCo4n^oR>-ryH!lp0v{h6#xr;i3`)HFlE^;%n6`Cx>h9(T zVlf3(^L|r!X|%M(T%xn^vY?)uK1*oFd*H|*9vYOJ`}q2~u*KNB7mBp%*m(x|b>8tM zwV>02H7S|5FN+tWmpdO1H6%i93L4j|Y<{%yd9`_FBOJ4sj@p|7OBJ0ModN~h(l`=e zHz>HtisDQu$}BVolnnl{qpYs{2kCtLBNpe9mp`=2y{FEnYD`t*=xYCXGylGdmiWb) zdQKR(E+`lcu#-i|)A_{#wk}E~{sFZ4vW^Ckt7J%gO1j(=Q@jr`yG5iJb`w;>B44`u zuGM5rgXSs>H=0JO^=b1R2O1O2k3ZW;&S=RL@K!^iN*Bil)a2Dd9H_eli?RdOs{3^p?MJweuwpR6pQyqeAB8Ug*I za-*zJk;nSa8dZmcvJdrK^-y#6w0jvF>@A51r2_aur2XvpNbsMfeGhAo>j)jP;7@jA z%f^(%(m8Z2XVvR6Ms$V=BHXsaE(ZUvn8hckr9y^rAP@^fD*wuXu)6W&G<|#V zjID*>TjQjtRpft$udik(DfHpqx>|rIeMG$Y}CnW&>=gh86jQKAtMUgOS$o{mfG{5{cT5z z&vq*tM>KzWfm+lT@aomx3n43=Uo)Fo&O^yur`y&~ps40piSGZJtdZo58R*{HvtFwO z<(A@{@Ei+j#Ofkys}o|QClu-axLM5nZynywpFiiM&=^#3yY}EdVKGMEHc%ye84+~-5y_Fx_q2yWIg+Ee6`v(H9QMg6!_jcJ2 zZRlG<`}i?BBtjOQ0ix{5ew%bTP|Y1p2>%>?l|55%Juus&Kbyk|oV)UyaGU*&k@Vdr z*WE1pT9WIgelAFi$m$80>IX!!`TjW(JOXA{&{FY_e){h}1p{!5`oxC`87y((qqA8p5v zuJJ>9DGNZ;?y}NNXS8GxoLM451PxGvl=fRB4d<*RqMHHz0TcL<@-Qh3IjwRI2r1xF zc5gTqFT}I%;|EVC6Rv4B?s~#Q7@BfWG&>6M*V&aQDBo$FSMzHC4nUSL`aJyRkG{{7|`Cl*SE)bh@I()3m*)9(^Qe`Mvw+q$!WZ5D990y z>T8$y0qa_Ufo=`g(@spD23*Oj_M!PTjgS|LNB`NTr9UX#YOw^Cv9Bctx%l>yCOD2{*{m3sk1SpF7Kx4Wq+ZWD>|12Y~P?0C{@}H}K7SG^T zg^aWOyydvNU5^tqquUZ5D46dt6Mbslnjy6dkX1>A{`$j||i$AX9x9AngtNt#V z&pY=Krb!p!d8c>^w{_`kmhvm9|})?$b>D>)EMM z8-*g%q_PO5V{Y=yEz2$Qfbk*0K@~Fg?+@}&Z@sLyKpg4dm6ZS*$KV!7DKRy9>&{(d@-kRU1;1S=8zNnLeOYcZ!kYbCnq5pklJ(^?Q?az>W%A)KJiTa z-^FUN`NtkIzS9wEhU}csXEoy;ps|og2har&OPH<2R;!3ZyQ)0%gAXL%``*|EG2O(e zC?zoV>6yDtj@id>wQW~~47i*{|Umx_*%!9N(s-hvsVZOc;dp6$xtm>~= zk*%i7Peh4562YN?S_D999&WU2%uEf%!n5#-4jq#}TT~1iGJ$xGR!BCcKdww5Z5%$D z6e*(~GKQWa{Wd$HUOqp187Fxftc3Hx)*_k81al_|viKArZp5!iH|=Y3Vz+?x(g{&B z=EpS0Q{<)%lNymjroB52RI%SC1_N`c-v54$j>5_#!z%+ebZgN^p{s z?57Q)7m6P*W~t*np$Zy44#EqT0uy+djjwXnC4LsG1`9lh(?yda&&)5Q-VcT2vCtJY zV6P)O+iXtfT^1kp6mjXDQt{Wlo)*GtN)<2{?b%i@dx53(&c%B?_i9b>kG=?z&JiU2 zS?1O}@-_R{wy|jtV$srowu?-DMSmDCOta3dj_3c!!EfgG)}!7`P&KPRu{!vXAE@tb z#(P5W;;WqZggtQ(DK+;xA&oeUL*rbhiYNcx_4xI6(o?a4Uh&0|izy`ognkOtF6!0% z`OE&ERAv#kE)tG;!Lm(_pbPk^5p+Q(f-Y!H=`sG3d9^MhjeLhLz)rR>K#@jPS(`PX zFjjeot}TLPfFZHukkwyMw7auL&U=tPvSz6YhbVOSOb9`I?PJOr)M+PYzPNqVxS8!( zUSvjUU;)IZHf!??rq-H!t^n((VITU_`1yNsC~idiI;dvd#42%{O6`J_Ta?6-UsOg_ zG{Z?C(&;;Q&Hd8{HDbaj?01mJ-HAy41|p&Op~ZzuKL)-u1rfS_G&SQ68VsRM2+Tq- z!6DTFQ;YdZH}hGY1^!}Zu0l8@yVYwBQJBTuu!KCF>%Q_Wj?~D5?;hOQ2KYkvjs+ef zdiLI#lFk=pc@KUD5{FIO@8u;FcFS)YIRA0H16BdcS2;O`E)<7}iZc=9tzc6xDc$(t zUSke282=WLBYJD)WD#Bzq);_7UxvIG55~8!@NB1%>VO0zZGKO)<6sYE?aZ_LlO(GN zZbpV*G4-e{7QKFPxlsH-5%V~<=tGqnv-P=r?s^VQ?h_|VbzN{T`ee0cP;H;urr-K$ zFKzcq{|R5-Kc=pBJTyG%qS9hG(=|74K{-F$1k?O*>WAFg*}^BQWB?roMlotMgMTyP!rl8iijw1Rz&mL|pRx+wjp zi=zPK)5g=7R3)n*WZ?#ZmrO!Gly&!TrM^0l8bOwNqemT&Hd%snQg(@9&lcxgW)xx_^bi6y_wa?U@%x0WjLsM@|^5UIwDHW zvYGLwB28|6XxoDEe&2TIB+>7=-H#i99Yj?b;s)zU#3#S!J$Se65dSV))tSl-@caLU z$cy|skATg6o8H4{D5Ni>Kgy~Wuuh+PZT*#> zSefJ{;-vW#>cfQB&P#a%KkIj~+5~NoyUx##v2A&o8@p9pO+S*OR2|3X+Q0}V&8RzQ zLH~pETz_o3LLxDnWC8ZxaGLJW$&T*m=QO6S$0w5xMn0Lm;-s9#>)dVPe9(s_8&9ZE zM7F#@MTgDTVy&J8FLLaXJu~3kDaj4*F_BM5PI=}AsH6+6ue>}weSkx2ULL6H0#RCA z>fFmdz`|#`8YjP}rKp9zagV?^X51Ddi-4P7#QwemI%9s9rsR7DjZT$y<&;>|UIgU} zW;j>A^5u2icZ;N*_oowkH`LTwtzq&*<17I*Wqdx8BU_kR~!-W zz=wWWsG&4c9!A~@iL3RCl|L^|c|r;+m?*+@V$*VeeOkO}?QbxvI~?^f{3+@JA&o>` z|AfiI8B#bn6{gs-)8KV#Kxm+#z4kcSqPj7t9f15Rl`X%X0ZuieJnbW3ZJYdYL|LH= z1}-y9P|qf1Y7Zd=lQ8aX3n*amAn8J{L5b?>9578^>&yVCb=rAJ_kPB?O-DDEpa)$Uq&ZAm z{LdW+PE=Y6eOI(nA%_IImYx78Z0JXz3q zT}JQ2<+@wptjc9G*B~zJ{Hl}o=lq*U3Q;HMuums?gt^Fcm5~9>=~{fc0;;)|$FN0K zhNAIWf9yiTT6rrX%i@y8eJwU(c9?Z|IhZutFwK60QL(U_|24sj;LqXQO{^OtKi+pW z$MV}PNz?pp;k?gl+j8=x-eFyJx>$>08pOTctW!x{=>cnNYadZT0FXncci z`G+t>KE#-U_?-hvb?>Rz8$c-ch`TYiE%l8C#^n8PRN63}^o^>*#N=3YH{p)apj=uT zduf34)_ZP#R6W3M7+wQ|mcM3jw^ z3f7pBBXAs;7r_X*H!ZYdGDlS5wOx?B@=Cj6zI)4hsyubm9c;xb%Wx6xHIwT_z*lYO zjUY-{!H3J+sCWq-Ly{HA4e1E>T&@oeR@)xT+mvm2fgI=A?fm20c=g6F#BjP)xlkRW z3B~Vo($5yQWS}}o|HpX;UfV|mzMU2{l-ou*hKiJL#Ftc~yRubpWUJ(E%3$|~@flR_ zg)pT<#Lb6O0*IR?pX|=L%qCJo+uaskW`7v}-qLJQH1f<_r>^d^Us$qD4n%q>jH9wY z(ELoum{Rt+O=Cee$u20)Gej-yX$8CX_}3o_QJ>#-YJ`yKQwUk7jCSjlsJ61xxkO zc!?7;4MAJX%NsnswLMbKr%Sc2dvgz7^Zj7t=vCw^ z$d1NQE2#h*q#L`qpT45D8l$7+*PbAq2o9pP58&g3s)rdJu!p}NEP8b|8Ls#MZyWBQm7eSK?;wyr zCpi@0Y=MF1U%%v=^3_IDnW3}n8hw?WQ=|zS-(teH_!iSvQexObziu}`Ai%4g zr*r+{fgXRVTg*)UCh)^Mvt-!$y#Y5!y#7SceGhcG>x+{whtltcHuTwg1TAc91z0?+tVf27fRx-v7HWVS$!)W>`sGihOCQ=qAQ-$)reKf>fWgrKBFdkl0A!Go3?(F;R*Qw*o5PUX(hsEHrynE#<|~ z#jqV0i{*XafT};mdJ-rJ0r)A(dw5FsOwRCC`SC)FtPi@%Jj3Fb@^B7iTZgw&;yRrv z^gf;s{&z;AE}rjqcZH%`jJrk?1NpNc=yKC9A2ZB0Pcv4*C;~Mkw)Kh?UHybY zJ@Z+@RBj+!SP5^br9Gtp{h(7o*xjtpUfz)|$!Yhkl4_FeyQS&LI=0Xk~RGzwb*?UQ5 z(_P3>is!T+tdR*Rs)piT$Z|D3)T9PTsoo?#aNIzQoe9R9 zOy_|4^YZMAqPk{X12IJF*xC_abo9=0tKIR$bC4jpaj&THF&JzT-(xAfkE893rll_z z$1r6ixT|8kTtvAZtcUV9-VOi%KPSeg*Aw8yYz1fJjv$uADS ze$4EF;9pk+VNgDcwGEEQNbG)4FVct@1JqqKrLOC~j58ucW172@WywaD`Vo9+SX824 zCC=C4B{x9#_;GlK|JuFHbzwQ663<16#gdW+{_rzm+VRr?1CCGGt;EHv?K}))Q+2-eT(o3Lv+;x*+6s!GcJu^{+D7nFD1xZ7n z5HL@+YvYR^3}NUXQ6IC^GuIh}G!$h2Hi(OZ?Wt+eK54!jkuTJClt_jHdShGxfp}Sx zqVBxjkB_ASnR*7w*E6#cofClDxS=NX`9l=O06d{4UV6GtG)i*cZu}rTRkA4162>5x zsP$5@GV!63UkJ_}D!Hzi;s{Z-27sHS{r6}QLyZJJ_D2&4=wX1CJfpk1S><5jMwx|Y z8G|B))bQz2mOHaE-}MD?CRalrwG{)@yU~``ikHUCSfS2R&A`?Shw53Mec;!3GA4qu zb>jT|)*jX&YPtuw)Zz`_M0?-Q-@c0z;U6Ki08m892);w$rfWpxpgq+5Wh@WzI+?7t zmlaJvm5*?G;nZ)z6t!Ij`tdRmNgdai2Vn6Lt&B<87_6)=ZxdvfLPn!NTc(%l zROw`Pmj{NkdKZJDHr&ONJvBzA03DC%f12jIynaC@Uu z1*o%{!n%qu4cCdYJ7KU(Gua5qE=}?^E&bHB`_%tJEZY$izi)Shz%kpl!A)B0j~oEL$jwuidMUibZTvt5qmB)OY)4EhFZ$ov)T=F zjThuM@@#dHW}mFw#GJ;T?OAU=zG@(wf;ZT>U!?;k zo$YGps}hZzPdqg<)x)NH%@YH5M+NQB$5jWvm+F`ZX$x9 zmy$7~Jw{<1k)GwB0(EFQV~&emXl(x{3|Cc2+9TeHvk_<(G$>8BUjvt6$5>l_`1|H&}9lAWM6|%tI0?7kb&W^iP|0AiK zLDNZs2TCT5(qIVwro?C|P5tA9$J>Q2>ppGH z#~)RoR(@_?_Jeh@ZxT@BvSjGvu*)O2NdQnI{O`sB=T)%KL*GyN0&2Y=@!T}&h= zGV>=Dh95+C@S~IND7;iFy`3p06Fs2&jFgLKMJ&j$paZFMqU`HAPfN7m)tms;4Gh0m z!>d)fa-s3V5O|awaFEz$`MHUcVr1}1I~W+WJOe^cG2Za@WjmqsMJR=cpxkSQ1T>n> zvD?gl&lT0JZw!)Fz*~CYZ|6R8_HK%;cvV=oSh+Fj@00cJg7$4x=lYIVU$1}JlgdMr z<5Y+c_xYW%9@3s+*F>T=F+@hKh!PM`w@z%zuyYQSgNN3D7kI;)o8% z+~i?)9V{}6Epc+);dk{IbAsCCszJ5MqicR;;;JivTocPkkMcudKY*D)uPuu_nHXvE z@HYqe=rg`C%ac}O;N7oZJLIY|h#wng&-KZva=)#MVZy!m{=;X)_f3k#qfFn0+sR|9 z*-0c5*bxlEWvM4Z`l`eo_y{H)3;E?tl+q3d5@l2xOBaRv;edMxd7Cxs*K~(1nx(Y6 zw4C+NiQxO{?JcF5gU`~@v4#m{^}6I2$$J9XM|a5arM3JYe_{;?Jq}^mF!_nTo6#Is zFPUjq?LZ$GO_sRpDBrx>h{k+a2A7v7+Ei1?4?NEcc+XWjRBz%%-p%wSf?OajkJB2> znis_h$K2r#(DpoxDivDJbrt9HlJyf5Jd?dmC`H<-eBcp{uEtFVV+|KvQTEqqg-j&z z80`TkFpJpT+VA4JjgAcBKH^r8W7Yqdd{SxKg-w^k0^yfAI0x#7uQ9T~nlw^Hx1QlT zr7JrzZXqU0Zep-=tckN}`I-x zmUvqgN5}2-A`vMD%Fr4wd`3m;HfQbwt(A+y3+vEdZnh95dNUsY@dL*EZ6{Y(%mpKd ztnk4C36+Uo%wuZukbr`AQzwF?B?8>McVT(lCkZ9krSXCc2ufh-0ntH5^&YUZ{gpeg z;sW2>0Dswn2VvB{m|Tk!KTeYI23GlVE(4jx#E1`jfhSbSlCU2IK*pi`-h6c26U(3kPy zn~z7I>NewA)&3Dfv=_>}wN)B%^;Rl{fZ_7|@q!Tc+>x-YF?0jHIT4Z((gBHB zlH`E_19xM&yJE?S^E;GM*_E2(^coILn^%gkAcJ|NBkO0VGme{|or^L&!qSCZvt})e zG9hqtg(AiKR-hRRB-5UJs|`jN4$3YVR;b{e#6gJy$l*rl4YE_=-VW1py7AX&k zg!$LcOg4igQSwbb#lQb+4S#71$OJNZ+6-B0;sN$d3@edhk!SuyTd_7^fi5zRMJ9Mz z74l6pNP1X%{;XVszPDr*8G`tU2BL1!M}*_Eu4?{M3%(r8s@7W&9RhcwBS9A`oRTG2 zaPTrCKKlagXC;()N|>y@DKKF&#(^lDmk{+%WmOz9c2`~8Cz|C^HK z!-SKK=eA8^d(BgwgV`k0VX3p6J_5oQC*$@Ki|RMMuO}Ti?gk~Qh{qEb zFl(eY1Y!c`J#?tnaxB+~^xSe%y&jsMerYw=P;Ir4+h{dcY#&;vapjr2_#tM|ak(>a z57=5<)c03fEWZ`!?2fg{4GKuOZ$5fQ!U(itfCMol@$*06Z~)No=d%%xRf;%!s6KD5RE~d+k-hEN_?M8)Pjpn_wkV6-drI~? z!8G(nlH><}JXAo2Mv&4U`{*0(bNM)dd^O``CFR&4BUA?W5KQc$H3uEkgE%AD>ZWYJ zmbosoA{2o`*&>0QzpsNJ_N?J$mKSq@ugcXhxq!jG=^GBUXQbto&4;Q|RHV#PMsH_} zvORaIq9x(|^PXQm_h8+}YPxY9xp6GhtN68E^I%-~OY6aOagk!9&pe!`ewX`GVTFJ? ztF1oSleyXN_pQ9rKUDIy$&8z2T;4dgA!0}&fd7~i(3H~N2@*AVE|k>v#U*nNK|y>w zdkKUtHo;Ub{l%~LPQVr(o$U%DR@Ey(9LK=V=A)5?Z&;Y7nLH=MDG$1?YhcMDp|02r zr(YDZMHPp(14#}pHO~R6=?)*24D)cS-&$LGaa2dA46|VXt)H2438=gqQB9K6N`nV5`27Kp`Re3P*_uU!B;a@fONoHl5K@Yh$H(=K4)bjmZL_-W{M zu8TP8&nIjq0}QknMG952()f6qh4U_X&Ep7$xFYmDwMYtUFqq06J#g zs;YG(>&>qR_s6|}EPP>9*?{?Hh=2_vZc8hfCBS}!8HS2*I<@_YOS*|%x+?MVD%LH<0Q{#Vt;B~W$0NIlj z`4Cs}@gE0s@d(;j>Zc*@Pec@rU$0-7#XXsynoyd(&+JruBC0QRLAr?3%B3*hDqG=a zqho%oRYiOfA_~aJ4F0tW}@jrT^`z|j^k{v35EB1reb!L$}e7d-^+p&JA z@>2tPwg&8r&SmCTo{^!E+DD@;9vAn$?PW1=+lG9y;Z>TAuq({@aV07$M=Hl-J`lzS z*zLVZJtJq9>l4Oop^ZpBn}d}eoAlT9_R|KA2I4IapQ2{ZVJa-yYpt0l6%-QYbz+T+ z1NQDE;>29Lm16r2{2rZ$nm5^B=F>h{T^)}?Bvt}XlKb(pUMli|SoXCdhQL$&i_OvO z{WpWV(^dPTwu=wEsmThMt^m~OzDgj8)Blj!%!X`Kpu3;>FanXwXI1-h^j8Pqb^E@U zxEF~SLVE^@ZQ#x4-UfE1Y>n)Ff9|8w+sIoP+r@QtPVS|$;uoYuJ64{z^UK^<`F4hb zaWf5@Xtu#%*vCHE?s;=9?mF_|&bYe~`fcNt?gAi=s)-!7KGF@O8T_6i;=Ggjq!vzkT>@bn24aAJCR8UBT)+eB`7}r z6w9FGahruRA1J1%K+xG9qKE3sL1$gsno_YcjMfg17f)6xD1U+}#0-mIiipsyTa!GY zXT}%wSmnI|&$?Q~h_NSvi8nk5OuphbFS`fldiF5{8%q&sb2xK#s={D1Qf!J4hl0QS zTU~da2UQ|3uaVDuAnL}hso`n;!mew}V8C1F41_c&!tmFgfrBWQVgZ&K@h?EydF;jG%Ybt{7o`gsX#C7vzDb(yC0 zvYqeqPqPvJ-b3v}8$fB^_yv02?}?_B*^+)7V}G7$*Ace}Ssueo1_Sp4EsKRWGIC zMjx^oR<86Exy==7sNPdI@z8}G89IG$jo)WA_7H3{EydU0gAS|&$Ryi*q3;RjIfu`b z)1~I4ZOtIsmdZ}ljal1im+MPXdF|I>LdWl5HLSCq&_t{4M&DcM zPa$u9emKB^%+!T#o8pSYq=odu*Bh7oFjXTYgRscn0iD~CmiqJfn$MNtV4h5mez|rG zt}#cznb&6v^?DRpRW|lk9jq7vMW+Fnd7&I9CM}aaH0){1X!$77D1@hKl=76wXx`jt zNKvtDrU~~w$dXy-L4swa*jkKS^E(5vll zxBkxWxn(zfdD?Par>)pnM~8*l0}@1+ltgAeCu*q+!F}6H-aV8_b;TZe^s{-mD5L6G zrU(x691(N5@_y7=XPsG_8+%^)T1}a1ie;g?!Mc8t|J&`wl91Co6QAOS30w6BEZ5O@ zQ)x9z8Fbw0$=37bSUmTKw1W@+00_i*XCO++vyY}$;v1ELBC|ux{OGnto@2deMOOm6 zM_A#4=rppmPz3BFoq7A3{iFNontqaoGgnK~hR^#bk|XaCE2=J(_8Se&dsvM}Ic*OP z3mG*Xa!S_Y8K+T-Ss%Q}0^9k*O+Q0)LRoAC2ES?u{};f-BMACLj~Sd~1Su&&WtK%nV$9R(RP0oiz58xS7dKT= zNG17^Kj7L<93!Z2Kz~MPr1uK&J6X`czhuMZ$1pG6W9{fth|3YfyL22pG`HEHrR7-a zHldd9BUV~l0q1`1T0!>C|Mj?LR=_6w`G-^L&eXTk9R@B28Jw8-k1N_X%t!Pi$VL^ zU|ER0%571nr=*#QA4R{L8K;WvmQQM}cPDAgY;b%sTkXGtf)E}@J=h>S1q!@N=P(ar zf!DKlX*`zzv8plZ-An$`jY;fF^ZN4Yy4y-9izi88?6|p>+pi_yu`b;DhP{|MMkDYQ zFWP(dX|cRtFg5pkY>KYurpLEu6|Cnc@k=@!jimZBK z-urUUBHs6~!gWx{DC$ONs`&&Bk*v6f7^uFp85JM}I^?mHS#Q!ru_q<*qze*RDL%_# zh!W|HPAa90FdGNq1B|vmYQT<382GGt^0Puat%aGL@;6!w&^IVA-NE1ox|}9pLvOov z#n0&uRtSP)Na>ckF1!}lUDQMO6u9&B$=q&p-&@KP3G>T*CWp)-)bBgeM#~om|Fp;y z#fyGEalV}@zSZvUd3YiW*=JVIz^i?jDKx2RzbQTp8;eHkTU!0G~&FZW)?CR@k@_sJM#O? zS;sng>FbR~StZOGgkL~r_{z}J*j%i_vRc8n3zz$?HJlJ{-k6z9G%|bf zom&sHyF0So`y7{A1NgpIo{srd+v)ku-Fj|e)?vT&6X+h%p^#~rtpx=qQyD|ebc)9W zQ)xi5vq&J7i5!rMrCR|qd(kY!z=AK0=M!iy_E0apHPmj_#QAb!Qb+dPmT>2-HP@~{ zGdv8606ajT%7$p+D9?hOF=)*S^E}tJjt67->~!{Szl42u1bbKt z{ZK4zNyzqlYeA+#v(u*sD>%&#?=FF+xW%Mj$Hk>K(mV5vVUTE^FiMJUq_cQ+@^+QW z?W%iQO7QH9eeI|yvXOM1bd~8Xvkbn#BK{$aaGxIseV%VDzT^+Y&v6^LqQ}?%3Lbqu z6_^lb5A5hJM?P8E4k+{}fm&+Q zz>M=FK=jT7!vW++0me`UJ@asa-haesl9Iyx(P+L{b)M|dn6O+b5G&Oo80x}LcX z?i`ILOs3D>=h0%T*L9luq1rc!!n$RS2<l2jkja* z#FSyMJO4=;T>V`8An{4>ugs6GG$2!QZku4kc04x_7cRKTa=M#q)Y5SmC$iq+$H!NV ziQ0%XL6!^072_=x8{B>BOmnBhansR$CadLTQ&kXlKAR+x%gi}?^&7m@5e8v`9}ZF# z1#27o%-qv8$SL@7KkW}0=TmQ+Y`v^A_^H&FSF6>OJYg5gkWHRKahP0FGZfhlFtR;j zmMP>+uU|9kBSu+R%UkQ{EJTqL0Z?NnFkrX*PWS9(#ULf^-T((kp(rvfXCCtzK06vw zKum-NITHU>yNvh5QePmAuzJ@Rqq;c55Lxno6QWU|!~brdEUq_tz1ac;OW^Rdp#>*@xmDUXP zA<>O3Q%XI{<4+7en>dnZ#(FPnyGeEm4w8f;H^VoH>&3f?zNrf#nbrnmNxUiKy2lU~ z{*tN-ccx1$F6tX47ZqQokU{#Eo3T4`fc?OyXHN{acdFff<8NSfA?D+3ys>VjqM#5i z?9cU4ky^d6z(l@_+%^Qi+}~G9L{~#w(W5#m6OaJ5jRnw~hwlF=_p9g7MkVJefB9|yki|m5 zRsJ=e&qk?Gdv!Ia)a``60Cp#83(QbJQl)5#AxdmV$n(Kusm2I*is zs82HcW!6}u>~%QrqWA~UIn{dlY(btP4uk~&-27LS={hv<7A~LCW9f){g%n!Dr7)t@ zr<*F7V*pPkZ4;;CkPs<9`2<4ruPl(PddWUh0((sMJxm^LA^lHU5^}|esGF#fyHSxjrp7=MyRa2Nx(Ww8q&UaziefA_KDxK+o9orRC*hT}$l`jr&EMTyYgJ7C6;cN*=y>hzYZP(2-NHOTPEXm( zESiLh+#MXA#e9u9y-%OMD{S?%4373bCt$0jD9Zt|4ZONCk|jPj@Nx3;>xt@uNFM-j zu4;wkMTxTi3?hcffNHq|rOE)uk?qFDwK%n_CiMg_Az$zX`oqV+%)uHCvL~p?vs)L3Dc%K@RlHDxHuKnEf<;&u0oAGgL)HOv&FSgtYrDDj_R&Dq1e7P%@^gGl zTYOb|TUfBt%|)lxR;_Nx(wP|@zT+9)=wd4#FaBKIbtsAZh{nJy_TP_H3=Yz)^GfOUMpmaz2*>bvI8SS$aXyU3d^$QWc7VXAN zk=iVuFT|ZQz1s_q#Qklx2`5C)BEcJVambG5N|zU0Gr9WAk5j zdXf;Sm2fDD$I%&c$740ZR)stMnAwR{x$>B&2VXH-&Nd#WuO-Jvh4e_{Ync^#kF0;DBakY=IFsDmv2w zNP^3@rbZKn6vfoFpk|vNrVTlSnhHGZ1s@dNC6!{Z-$8+A`-$YPM~be<{Oahw!Vxt0YoIzCwud= zwik6k1L@WSFRN-jd>QS)%oboWT1{kf-CFLfaM&vV5^vriqrp67#GgA!k1uxzNl_B| zx4}0~TCVVo?v&P)|BJcDLpr7qZBOQTz85_wcUq!XpEd1!=CXEiDgzjp*Xt|PW2;}i zTYd5!t5!M^;eDfu4Pd1YBqaf`H=8IflPC4XNg2i6({H8~6as}7r!Ce;J$KSritv{_ zWg}{J1nrjEMbd5~w6A+28Ii#nE_KVeJVL#%+4G3~&4BZ!H4KMJv|-+5w&Lr~h}4az z6#E6htuMPqL0E`c)ziZod!dXF%jzFO&$ZEWZV$vH7|VW>Mq@yy?>y^aSY~7(v_6Or zcD!JXH!y8{j8BE}BVSc1(8a>E;`X{?;j2n$?-33=g=l*^B& z?(;_Q!{l$-BVE253`$W>UKhs)3@>2^1OpB7ZZUsS1-=k#|6Ijq%&{-w?L>>q38l&D zv1MCk?230&USFaCjAd83>0D)pOiYbp|^-SPk!a z{_Yjs+vRp;CI-}!h{x101VK6>1kb?|TIpzKvmGUHCm+ZVco`)em69~jUl37jCWvtM ztCB8^XC4obKgapfPJHNwJ^*#@ke0_+z{bvdNC zk_6p&RhRpiGM%d>-W(sB7#06dcqS{M@2RBQX!Yrx%oBdr@ei;(tDHp>w^#4pB9Hea zMD3mz}4WnW0=jm(?7%t)u1j>zbom7;whVl*M<_S z$P|13YV6G@aKKO2xs8}tjt z*IbEt&aQT@Q+Zg%^@m0ID~UHoONf+)ZW$RCVHQ;;8dTW-p|mKGu#_Ssxs`*mAb=fL zh~#{Q5g4I-*deFigrHp#T`o^nzx_5Vx4xtTJ#pI?W2LSpyy1gR>1lty+T zvZwF=z!)#!A>reNZs*Pf9rWVN>HE3`WsYt5e{_6 z0({21r7Ep2x4~d}W!}-p5%);F$+l5x5V;{p)wcQkKmmiF%+Iy9g4(Z|^=#s;2jU4FmsXBMmsE&_fCv*U-BX$FC*(kDDz?&#W zL0I*k9TNypZPw}=v55~`H(1Sw7xa%8a~&$GjmJxb;S;41hc=(D1>_FwCdU5@hG`V} z&TWZx*>bCK)k$+coBu;qSB~ic5iTDmRvOiC)z}vp!K3?OjK0x!?SVIjGz;>3cs^*~ zyf7Pz=$R4H53ygLFan{8#6*a^U?`smDKp&!__#50OLdI{UB|5dk$L*BewNiud4@E7y>-x(qa$_)^0{NA5NQ=I~7 zMMENc^;6>8ZU>3QRTF1;%m}A!4i=N>?4VY0{=e{AQ~=;tc7(o;%44ry!)D=g+SL*BAVbk}81wNLqi=Slr=y6W62I-ASdUt|udSF-Pf zxw2}Hih%eJ-7em-h3vlPC-z6tCYK9?E}fX+7bmevaV+fmO9XWn$N~rUzm#+8WJkLD zW<(B{!R${CcY^KCCVFlZ|7cx|!hCqwx911((e?exK*9l_Q6|U`Vwzs2ZPoT+g=B8z zX=9Ly|7WPGuG}gFMIO6^@AMcp<**_kr``>;D8fU2z^Ea^dpahn_64y)7BY-c#3GadVZ)!~}0YVSGrXfe_3zLX8ZLZ!;~ zK5a*I!q8xPingh|uG>2K49NeTuoqk_)k%Xr#7*oZ?UmQ>2!n?Ql3544jsRrsI;!cX z-A~@|6%c~iZp6q`XiR7z1!yAwa7a`Nu?vF8uXim3;llo`=w>KJOSCE%+sY|@8>%hw z0qcrf9^D*wrYEfpk+|(?EY`d32lmw=b0okLR=f!9h>3%S8MW(@DbMdLxD)BW)X2pc5L0z7Q3bLk#JaXq ztLp*e>aSB2CY}EL{i#MRZ*KZRBA;mf>}-r721qy zjG&te@e}9@7vJ2e!gXC>t8kh@SEC-@UcCkiSLXXFLenNmwCcN;hkO0Rk=qlAe@})S zIWkY4)p-R8R6)>Oj=#HlC~JX9X~IaYr&r9R`1PP;EGagz3^;HClbG*{$Tz_D{?Szc zLr``gQ$v;;9YS0PV@v)M)kS_8O@gSJE9NAcJk-UxcroES9(RH9~pxuL3NLNfA&WR6BKTZ5olr=CoNDsN&QDJ+BQ z?BibHbNcfMt>(^3K1ayW;!ts0Ly~=$83tu?jEhVJf>;YYe`JD0v6s9E`g-OGlWET!b^AzhF`!@*s7=7EMiqM?jC*cN^nZ&V7$UCvB60pYYuP=|M-mkab#P_lLjW2**LhER*!f-48 zl44$$qZ5pT{rK<7SWSrh%|6%L%Jr*Vh%HA0#DKBgBgN55@o3Nk=R5(H*tdA%u^6*3 zZ*3fB5{Uuwy{2ly>Jkv3DdSr4he4IF8lY=C-W&%oW(uyQhPmOw2f8c75303vqU|mR zN}HZZog!FDlxa`dRY+J$gAq6_>Pq}EH5jA*k~57j^aDFO?z2%&ax|Ntds`hFR_Z2p z+ibVY+mQIQu%tWWLR*v(-a8H9FdFy+N)4qa0y`uo0)^EH;H*G# z@ZW{OJhK|ziK?gJA`xR=MsSPQDv>MpY%i9yQ<5+l@ng&AuX2x-ioRj~by@F+xU*uD z!A&J4Wa&KrA~KY>s`d6uK~>)JrOZ*sF0QF(p#y!v28w7W;K#A<(q6tk(?yo%l9DnW z*@jBtV4lEELcspg>h-MBCWxecbl!C#V&%LQ;g~DUWiC8TWYJ<#NomvcC-vd|P(I$) zq64(YC7ApaXzoT7@`_%WGm+pe0C6`2taO+a$AA?pYk(85e}C_Dug3Rm2i`?(K!*iH z8cqi<^Lt z-X#0Mv2t0b~y!>*7J_E-AQ7VmfOdCinoP^JC(kYBDf(PW8tRxU(x|%;NWvutK1Z9yE_ZWEKnaprU-fW0S2&co&j2C z9u+3WP(oT9OEECOs81mpai@8P_k{021Q5GeO_BdyFjws(9Jg!zPXoS{x=QQwy@jxd z{#WZ7-O^>o*Omv=i-3LnZCXjrnAnpWOGdk`Na6Vx9qto z;0wn3YV+m~yJZ$o!(t=_q&{8OG;gSqw*A~NgBzd-G|IOP$lm_ihT|ET>(zS2fkshf z*t-G6Dh*woWD=eHpopM#Gs^uzQPzN&3jhv1t}=+>U!N4kj}Yu|zm0rmnI$IlubvEu z1}JF3eMoOUG76HuEuw6r;wka&RCUCaPJqSR@e2Iu+xxq`gN8e0KCgs;u2xV6o7CzC z!(}u^KMtQ)dzVNQ-pP-aoiBU=?Azdve7L+rkY2Aq+;CS~xnZCTIcLhSpodEYyswtX zyRBG|6#OWqJ?emo`boz<^xng9^7;~CDzxpzi1CiiX_;#XCu>-rOMNY6Y>V+8BOkh33OXkl=J}+q_Ju(!N@vk8X5V=&K{#}mt;&9%Y z6h!+!Be_XBZj>e*SEI~^UECNM7{Qjm-y#-;GSJY!90`MTYb7)5#yP1umV%Iggx`f< zMoXH7{u_V$P-R_ifX(3Cojm-t8nM>4D7*MVv8m?4c`%Y{`#V^`5wwr9;PrTdiwpM@ z353Zz)?@`!VnK5xkn5|4mtcL|_u+U;ybI<&n+5Fc`)@|THIM)l3~?tg%fIfa-AV<) z2AUYl3|7EXFdlc_?_%qsbv**m4>X`@*V|ueI`Gl%qk+f=Tq8n8xSk>8AxB6e_X+Ie zGL&cc0#;`mJ*}SLoRAndc}$Vr;o_=~#EGp!X_uZV*eW!-hb9xDfj@{Jq?ctXvwwJ! zCfF3oJPb*{>|3#&4O(kYf2q*g({^vAYdg{WIYCs453!HweLteR!Uu%=8sz=xi%Eph z7rUt91l^Ln@&8U`In$pHf* z-26sisnV!fL1=q2x;>Q)k=cySS^g8$5*EaLh{gH?OC+rei^wMve9X@J-T%C3il=O& zBFRm1UtJ5_inhjjpq&GUuYaHo`>1UsSz$&@-g&DtS-)`77`L!A6j6Q13IkCd@-sO# zHF7~*r7!+hC|0u6$malKYPt;U@zn_v)n$hrm3|i(B7ayR41H8sax_nWBs$nye-IrS zELqQXzu2#Wb|nce*Z4~A=5xiUaQMyA4~z*xl-0ihPQ6MG`xFlza>RG0S}@uvuxso} z_H_l88!k?hAMF<+Z^XPigvy206vUi38BQ>IH#9L9PZ!Xoij=|U)N#8x#;i!-o6#*5 zr6V2!irh{AfZjMAOJAiq0S0%(D)2w6k}rm__HC}pfw_2&4^E1aZ!4TxDW5Hu+3Lge zjyhZfzZl0GD(Mjr+dR)K13F&@I(tD^cCV1@jTsj27mqh=0K17MLqv)xWB#R{Fz>TY z{KQ#z#~T9L%bc$b=MtwrY|a<(fq|igHYc%Dz!$K(%}w?galc=Mv2V1R|0D~A<98_% zowNy$BjBd3i(e;k3dBkwZq;6pcd;2K$YZb=&3Ll41^vL>pQ}{Yrs7%)Py6&V*}=D~ zKG7sSNLQ&i&$BvOMb>H-$LEw4={c+IJ~w&#@Hp5!c8C_Qkw~iI3F&p)w#Jc|@$D`G z1xV3K<;JH@ceHD z5hP5sa(x5bp9_E{10CIFwjW~XbAc;^g#0A|s}kc!dd7;+zJ78szq$jES)r+cc%KyF zEfn8`^e6@aDUsaEGI(9Nzd=mFeyUtOpr%R&@hka2s$&DKZ{h9FK!`WZwTxO-X!oZ% z!Wy&B#AlXrsFfUFDx1-DJ!;$mXsIXbB|YG#=9D?a^k&Q`24Y`qW1i?R>ax6FTB4?d zf~x7lWj%}4WXsk<;Qj9_dYxO^tqAB9f0zNrl6uyJM$7M;L&S4&UeDL_FPQlVt=62Q z^(N^~@#xf-`?pv?q)OIH`K`aZ7a-$kCwvAZv3KhD=Roa*z^7PviK!KgWz#gEm_x<{{v`1%Tr*9ULIt;=ZM zqGtQ5mgSv_q&OtRt{xuSdA^mTtVnx$KCdLMhpN2-+Ch|3D2QW>vNOt;OXSPu)UoL6 zTjFOpZCJiFx~F}^eaNZnV88sbZb_Uqn-y+Ko1OQLf)|}vGFG>Lr(p`RU9TJ`qjcP4 zX9CTG651|6zxPR5=Bu*2#vqSyg(*5e6S`F-AHuR&EzRJ%8hzEJ_y8+PufC@;4TDMK z>*ph{X4|Z@AOsQgR}Q*6jDZM|moDb1`J7;39WKO<^27V$+6!vyA$lMPRG^w8!_T{hb2|o*0D1=}^o@r``lc)A|!rV>swW9O_WG&%{aL z2I%=Z_=P>gaTMrG2MvM+?s_8}D55f~h2sPGoxr3?6s0Rxaw3t83S8{W*MLk#Y3dVT z&Pv-WBC1P_izF{30rrNIot;4B$xwh@MY~!+gozcc>9w8i6COroIaL2IV{%JkK>|#m z-X^Uty@Tk%lM4IzeKUP@+hKg11R-;~oaNTeXef`Qw~j=m3x7 zgfwrXSMR{-Oqg|Kge1|c%Z+lSaG90oL$N!m2T*1%Wr47~8#4CyOaZjMf(27u@hYh= zA7yoFdm2mLnF#jTk5eAs>S#6<>15p%$n>%?z#*7Q(MnjF)q8TvCkKV+1V>q&O*Ln=lGC&?<2%s{ zyO;X2$6(#~ve6s#creWE%yshjb&(6{T}f$K6nx-gp3xA%E9kX$IFSdwU{76#(w z#UZpPaE;$b(Q_H~&V(X(%yZ>Y^BA$*&wOa#v-mimd>|tD88F z`?57tB5d#40^ zbQLS}nUiT>hD}>s&8gyO8tMuswVj`JZXVmRnuvqo>k?T;wbVdfsxP-|Ge0hZUXDGE z>Ye7zwM`|CCr!0bu1qG<&2_yPJ2YCzaeQ|pze(Sq-$rKi(@-sHwj9%6Zefk)Pd1YM zf|GRVK{8opz@=bWaq9y~_Huk}$p~ke6t%X$CW>B>`e4X`PTE!G$WsJOtS+ka8<9Av zJY_g&fkWlfBcRmA!msr>WHH6V5ri&6CQj>$iATC;57HCC6Vl>VkiqiJ=e0fjwnV66 z2F`}Z)SB^Kmu|$DF!Zv26yv3CWs7{gTRrnCGHQq}iBF$S%?Da>)G6V%g@#Pj?~0f- zIV>0!;eBzg3i5b+?l$!v6_1BfVghy-Xz<>h0u^=!eDW_!B2$c(bYiS5>d6h-o- z+Jce-_GJ%uWn8fiqn8<&Ez68+)*;gID_mcbpm!r-)lJm}U)jHDu%_v}jGX3$vpZpT zd7j6RNvYdfVB|87e@7JwP|&_VbxvB8W>YTTz;nHdhCO)uHdU1Z-fFWE)H-{D?Nr$o6E+C^3hH&eutD!+|_FkGt)B zlp-8VCD#nHDNk z!86-qvyD|&-A~PgD6`E}h)QIc%$=a%NQf{c(UmaZjCygMWP&%%M6(x(d}u#iwEcV* z=Dfeu>2j{^GHQycTH`wYFm;g}%MvmZfZo{1_e|MPGm)rChZH!xx&E;VjOV&+0r9?b z2*dblZ=z{i^L4b7;iFuiQjI{!frlLmhhfZtrJJ64-1TkO>nebYLR-99^M1twBqd~J zEeUy+a&&f7I}CPN3UaNcE-mZ*lm7Y1L!ulpBHb~8ci7!!4x{k+LxH;2(J+*JWT{|L zfX}s+F=Q+C=L?`ib^i6XaeBTRN9Kmi4DW zX$R16xBfzRsInW6``Vq!!k$VLeQ)qaA7bzL=4}?-W=c1ik1XMng+u6ZFsP#EA!Dw_ z&-0-i8vBU`aWAqz-#eZFq8w_P0$P;(FL=Q#i@tKsp7A5CZm+uB8xX~|0>cbmKazFm zcHIGwDpK0VrCH+ndn~ljP7oTBH$F!|U&}98;Q*poRA?mhd^y-qIT@mzSaE21x>y{6 z?GeTVnhTbs=)DgHSO-8#x=Gt{t>n0-Au~p>{KWQv8V9vMgq7W|nJeH+R!evNP204B zk}>i&C)2g==PqQHY6H@jfnBx$Iqe6z;3pRF@^7^Yu=_=07`AV%1dDo{A4ikj42tOG zg<%f};Wqy?gdKqPuz-C;tw6Gerc?bQ_9kP&+QVAzDT@>1+H(IoVPlua5NifhTq>qL{N1l13a6s2MPMy9 z<`p$`o?5n0f>hfgHDDW%GJupH#56j+vg0EjTD z_Tt(Ziz$@kDRWol?E;V4ZvA+ykShINQGdEN1>OxJ?J>+d9>AI0b|f7#Bknv0r7@OL z?!_{>*v*pj9cX=-Uo}0GZbEP${Mk}xf`$FEO%%!N+j_l^EBI;yDFNfFb%Xwa)-6Xj z?OE8B!`>n;P(6ph>%}#!+FpOQxM!wB`QmoX8Z$g$W=Qb%l)6H-D*gjN9HKpWx>6)g zs&}3em+ZZ{G2YN(QJM{61u>2;h^`UbcyNc!COQI^?R*Hm5NNK zHeiin%QpJjxsZ~&lx@*CBO--_xn)|ksCdMJDQXI8m#@PZpJjLARXJkLkEAhkrpjB4 zhwIa)52lL-nRpj+>;b+~jSe8Pnpz`nNHeh%i2>v?YwTD-?&L;L(||T)fD2L*hc~iL zFM~{Re1h|+y!|W_!J_kV^=zqbg4Y5FqSD}g5y&fCe{&yT0MqrE`x@NQwlz~E)4^h$ zspZjI!rY2{N*7}nW!LkG^_G}L<3JoUQnB;X#b8kvb-)c_+LOU(88vWJr2kX2De?}E zMrY##c{Lg5!CA>vUXOe6nPqzGPYIFVS4GZ1o~bH{hi_4O}gdRObeZFsHIX)Db>->m)RxCFEKD_@9$=*Ph5&VoMyDRvhPzZ~9Od*JjA?xplOl zwOn@+IOg?8k?>6=(nY>71~0>=eO6*; zlaYZt0PJ)qMx;*u+fQ92wvAco^69d;UD4Z=JI(R175T=U=uFLq)2$D zrbJ^o9QJ=_>*9U3XoyaNC3+!ikI&ZbM5s6kK0orfzJ{W{$Kew} z*9;^*khG8w;CsZTzwMEESuQ-?`NQD4(I+ujSJ-BO|7T6h?9a41qxV(5)1lfWCVi20fp*Ds3Xi zNz^JDOQ@fGSa(K#eqGAw#Fs(eF%ldl(B1@xvgkWfVkq8{_&(vxLKO;GL=sqW30Tb1 zD1p54-)EpT%uedqlABb;q5B{y-xB~*+80)`d>l%%7h9(e*cTXkbHF4UxeRqU$1WSa zScd7}3YTkZki6kcP`QCeJv%4DEWl|r6z#%GjP1bKkcjyyNy|>0n+=d#E*Xft&nt*$ zgw0sNZT-zF#=BPX5NkwOMCQ6V(x2yXL0CnnR?;c39bDUx!-OP>{XWzGQg79h*|J;< z=*%ZPe`QZ$PtfX1&{bSuJFXGM1{cY!fenPaEN=1U8nuHKI#e^CEarZ6K5hFQJY{;a z$qw>tLK_kNy3Oz&3;kI}_87M2Np~8D{7CbM`BwYhePrGDm~Zj;p~D;${xiYA5|O}) z=Q!HDiScE)$bd8J`r9m(n?VHeuV5JLRECoV=d(Wt^bU=Y-CWjx+JDX|o5aP@MnS=C zWdsG4{J2LJ;hnin=bv4;_AF0a>_c6Tb3n8uV=(|%h~FcN&hJ}cTs!162P>lQ2;okJ z#v#QNF>MazN!dDa?r<^NB>4{f_}(v(F)xEVqPX^=tfbTzBUzYNN0$t`@#xCCuaEEp zYseZM4HBz%06~faT_3Q&A3%|zS6#-8)s2MVc4*0^^awm;bN1uSeJnHRB0pP874?Y` z)7hbiJ3FWTIo!_Kj%|#)bC|7r@`Lp&&cqU#lvwYi7T(`;@-1L$3;X?f-$%e|1T+!f zzh>)2{9G*&yu9+H^3;vH$-4boB;B`fJ;IWiBk93vZ*hEA z>n9v5my<>dRn@6Xn}=w&9OPZh6jt@pVNY9YJS>t@WABN@kzz@_AJVW;k^}5LyBD%8 z8dB2cc3$?7U$Jb;As90vAOU|Z!QL;ArZUKl^+)_r> z)gFPxd>*W!iN`o8w@O+Tc#!-4Xc5{)r&dwY>DDF<<*RokjHEbiD7(;o+>JCkpp1*Z zO2Ag9j)5u6s%W$Ib&;SU*T8nBBY-mjVJZj0|LHD1EZ48aV^N{)f{lEUzUsNmQNsdP zv;Dho#M4R1WQ)*BlGzaR_&!w@L5&Rp)Y>mDA?n z2eB`ZM1O&7AhD3&KS_yv<_h>s;f!nEsIL0grdKQNle#lh%8-mqjIrqVa0!R%zc$)G zN;(0)vTK@^OT zDuJ#S;_l(g^H);IQO_Uo>F?{##uW{G9!@wRBEPMIeyPs{6OQM`gAfVK1aQQ6B=5iH z$1=gAirG>4MKN*Y*$Kt*L`cxg7~7LWXBuIk3g0n$pL%F6=E0T`9p^fOfg4CAfZ(?( z6h&OB|GG?4p5Sq;R;Rg!sGqFBX&bq?S;{ka+1lI4&vScf;Pfq-%1yz$j!wS-x>j@Q z%OA_+nuIIm08`=988{N%#pdx zzo07>)Gy=LCyUcorS*pb^S{2ekMAjQD2dS!T1iWyDI}=~miR-!AW~_q>vdAvB4%|N zoPxsEloLgr(LPVvG|n-%zcd~iW{hh|w`0Ja&v8RczQCO=X?!G)X5U>;X-m$)N$(Zz z{$k|%a49!V#>+n@QVKt%nJ7mu(}1Aw8a++No^sMNUc;W|AFd;CE}g+dduW$fDu!Mp zp?YRI*ASC-TixASl0Mc{{h_k+*C}F>;uXIS!X{Q&wPH^XfuV$%JyE9aDZ(RcPzL)A z?EQkKzt(~=_gU@1kRO@?J)WXK-`h8Wx`fPgXn^odcOmO>`Y_h8<2)Q(YC6t);Jy6*4N?uig}Zn_h0qd& zBakkQBgp!WNO!ydXJoXH5xoX8FAmdaUCxeBcAV5Id$75+e3cX{d;jzSC9o}ySZ);B z24`uCRMIbQwj{Yty{L1?Ezr1Zr?zVb!$WJQ-_pON4)5&+ydWYl_M@8&CLR_>%)MM! z#L=N7JcSREv+fjAtmNhh+cu;6WVhgn7Aq*8UeZ`Yry@UmM$)zqJy@$aD8uSHWn{Rw z9KrghcU2Pk$=7Ki|Hd&;x3-t?~I`*q8Cr_12MuWr#i5HMyfiuoHC8G zx+!10NG5W?;?#EIqO6K`nYqoeKZXy@bhd*GMR^YXf%$CWv2*d@jr-C`jPW%7; z{vL>sP0v5hh6`e{I7mp(iyvR^j96~`tjbda-#1ukG}Qo_doH{Pe;GmRoasZs@&k=} zmza##dDltXimLX9zx_w72#_lNg!i?H5mzE9dEM-dKf33;Gix#%yB{)gJEp%z4_75E z=*ms$Y~3f)e#sb=AZO;F&9qz8-T!H~yC6b{@nb_*;`L477|EjQ(V)f(>by;8|iMkL0-6jMAS{ zc0f3!NhoDPk~vX7>SB-R9ll1CRsS5-axVKUxr-RT^0QRI^Yq*F{^9)r)KY9MAyScS zB;i54j`+d*OTq(xqZTwQV6;h3JpEtu3z3gNEMAOZFbyTnpWtr4Tmy+R z7jTfS44LuycO?|e8AZ>j^V0NY9B~1$ld_291N{GaPwWsZLZfBq@d-)wZGrt!G=~ck0#&sX~ zjsO2?$bEoVP>MdL-(1kmH{OeZ3Q5BJzMx$@uTYSaN%S^H0uzfWWb80BX8sPvFb7E% zd#G4s4jPJCy3kZK+85hxu^4$(2}B)QvLbFUF~au6UiSg8f;^Mr>n;L3fs1clw8+$(>kZmZj{@ zzgO{gqyJp?wg?~@N6`1yZ))*N!8WD01=bq~WIafQ@eC0xTfCib!yEbV3L4fUVk7cG ziJL=Gobl)2jhVk1+s2Kung2eq|GxikC(wGEB9c zuJmX$u`b*6F0m~Z7c*ekdFOpm+m0|#7!4y<9 z%xTO4a0tK%t{t%&dlem4!YczwtpC>(C`rr#Idm@B<>~QSC&s%I8C+~0gXE&7Meet! z1IokxFTc8b8RPTFMHvp1e#$p~Z6C@8o`apZ;=t`sZaT_u5ta&cn43{OPrYwz^xIJT z=LZ2jq>-ud5)j=diTG{^(J$&LA3>D!lqgL#%$1?z-XsM9b8289QaXYN(7{0*O_R69 zas1GN!)DBF;JFZLq194b?FQ;J#y5{^stbicK{!oRHQgA2_#8m~ECTta`fpJzK+^zobQ0j#?9fh^s|2-@b6ZD&dklkso^gGyRdi*Z)s>>LQyP|X#nH{%Q`L_A0GjQ z!t^mWByjdl%W&OInFD~=4qhhyoGTNkvfTPW>9G|QPXF_1#Or^@<$s>YrQo|Z z*vC>9;sD?3rlDS)-by0cG+MJ)(B99e>UKhS#^Y=KUXA67rbgI|V!JQ)i_WOzjn%#% zmNkWiKU-fm0!id6Kp5`mA2*AfinbrMeOuD+ea)r|7Xk2<6*!huz3uU^tZK>o^-;GC zP=8qg3b)DuX0N9^^8?EeKw)JxGM*A-?+WC5EdXsY)r}3p?gJd4*t`nJa?1V&!TvQH ziY6QwDhP%OhKdY(#AW-T1?EiFgTT|H{P=;cPnpa#y50fNkpNA_q@y_fCrVsg@q@YG zu(UT6n$E{*LWus(@^VglW|N1Yf98(=jD#PMKNra!;WR_pZ_sUz8IRc(_1|`_wOd1R z%o=nb=$tKBlG^_&NOIk8v+6JDm*%9lC>F$#B?#};nofMyC$T6wshg-}=ebEx)mZwS zV`VuI&h)q$#?*GRGzUbz7&|LmnhmoLje{6&AAWs6ss~_WeRlvm z2UV0bWgzUz1?X%n08&p~=}`b7cHDk7F3FVyn==W(Fv7k8`Be=0KdDm2#lz&FMLEC{!UlJ731Q(0#G|4k5;}&&RdI^9 zv=jOv5oI~nnTayxCsf_!m49}ew~){F{y)|WBIuzPqIiVB=Rn*bZDG6{jGYC|O~Ux+ zk4pI?cg09nwct<&TsyO5gJuSdHAz20^|t!!IcBbd{KN4A^OI)7+13&c`@?xQ@ZV}% zklxZ!l-FNkNzSB0{wd56tr3dMKtx+ATll!~f~AGGc=QIKBwMs4gt6#xTr+5Zmxh`F zs4HiOd13^KsWP5$+FMh(XVhq6c+^etw8?6Ymb7KMRvin%Y_covQjN7Odz{C9HO zZD{)pXK}pYD7%a3Bz5b4J6%7@Q62|KJ3?f<46$pEiFC>KJ+6OOw6BDaMyMk%L=n%z z6pvxc-I2t!ZPHGd#tVk|Ta2VeXdgVU6Bg;c7s*x=1QErhLnKXI^2Ld(fdDK(6M8gt zTMaPq3bs-}td@XiPv+^;EYl;_YCml0tHMukwxi5VpW%B zo9JtX&?+Ut^KX+wC6MUAVjf@Zn*^tWBQ4gxotG|_*DfXq6v|78P2)U_daK+XGAZ!! z$bm&ejL=6gL>7E(yO}Di^97F?uK^?Qa(HlD-~Qi4g@O<=kzNzeit3mObd!W5v#7tI zriFZi&YG4kbusUbP{=8lbbdX*I9e3od94fw$cUKQ_A)Q4-%FXt?^pLG2AUdk$e zHz@JD3XB`7tisW}1f)iTU!Ru&2z)siR2;5J3qfZ&yZxJGIm4z0h6Rrtg&OTVhq77; zyrT3*i>L3r%l$_yE2@t7HJCR2@%jioO635^*&e0tN21KkR}@Q!3qtz0W7Wl=MDbCW zf*eFc8Yf4Rl20m$Na_(w>V<^st04L=p@CaVvD4g4HqzSIu#=(*d^K`hMyikN5SB34 zuqtsh=M`X8_lIe@Z9-wW+1|y3I%8a6@Q3sYA9m7MDi8I7i0?hp=VZ&j|IAP`-(=A%t(HMJ!IU)w*zoX^UY;bHx zJ59EXS~|ZzCj~_Oqz-Qq?iI`p763oyUKVl7#!b8J*vPGTVST@kDd&L4^z; zQx~|zddpH0z7ym0*yIN?;nfU;P+p91H=kBFBzGlMbDp*x6e^tzNIas+NsGAO9+Tj^ zL8hlB@hiAa<-GDXb-QQETV@A`_pl7C8c6BhSVcAO?et*rE-3Y?a1!YWjY^wgU3 z)IZLeM>5)*Ue16APIpFE4rW^79IaMa4#gb^TVuntC;6Txf^3~{BHVt&^H%*}ah~W0 zM$9)D0-tAp&xHD}>iO!f>W*fyBM6jDqD9LciZUZ#3@lUVp+e;96 zUtC%|AD2%~#<|cnUtg4o(IY0wt@AdJ^#rph6$|?eNs?8lg4PR?bV9jmUv8SZw2h(W z07@i{4FAbWi@gWe2sFhJ>l^*$5uh*tLEXe z-Ij>`OW5TN5EmmOcLzFH$6G_RYcmIdx9}IJ7~%cjuw#Zr-366>!UK}2cyMN%)TKj= zsthTniGg|fIsR)g=N&Knr^T|WM)*CQ#>S>U=9q;_iHlL*D|3Im-1X(sBW8)MiwHr@ zH%k`Nn=UboCZiaSTllZvEi-#3@xu-5zU*pPR3z-a1(Mi?Bx8l;4exJ86AI?SrwfYU z+1?!ru(P7;xad20l|&u$+$%h9^e#T?yX;K(R z^|Fg2k&`VdC$k@)KOKYuF=CV=Lew4bFvSnFib^<#vcu2h6`W%60rV1_7rAJ*5(ql# zn*$wY#M73$Rb9BoT;yo(dUT{qN5`0snu%XmL$f$n3cy~8AIo5ycLLlB1NWo-BFubQ zW{O49EILM)b_prC`=i@p?>$AD#Jd1h^p?k7NF3SHl(D4M^XU=WP(_YuciG62wgDfO zP%+%w;UrfyTR6EkS6+fMEQ0hg3>iDj?k}=&v?cqZ$v&A&X%|*lxy3FqjVYYS%cTuX zUv70Jtuc<*g>cIA4t>R(R8afuCt)A_o|osxppuFC<-)WdSjNny;P`ouVERj7fc2R}{l-b2MB_$&D4 zy9CDt^?;CEQxHj92eh2%La5jGDNrl?@ia7)75_C z zfsTVOHLum| z)i~npRPzCe^S8*0zR?1{p^CP!I}h-ley4(lP7r86Xv@L?&GQN#v{Zsg`hA0Jyc_ zaApW*<~c8Y`w0|VC>2TgE;On6j?z?{I{#rkZCjc%E%Zv8Aktm}W2eSu6sT$QL%PiO zrjC^L)T^C*k<`(qHzI&b87hXW$zR8Ry%Q~W1NN-_DbS8F(<_JBXJoP|mT&TZW>*`W z#i1aC?cbjJU+EkOlBU|l{GfCW8Q`EXhhhED#IQmakK*(H-mwpq|(}k<8 z+g^tAQj93Bnc*L7#bC8PN8o)T3;Z@EQ&#p&npOcHplmr)sythz_IRUwVQ^G*Tl53^ zdwj^!BI`Anl;t4Y1dlSdOok~7AN;1bV=NPm!gCs9neSu!C7x-N&NfWMdIc7dy8dic z-Tf&vQHf$JNWL#WkDT-aMjz!MrjYLjWNaenBa#_7Hr5X*h3G;~CY@=Pu=1v4ZyP$E z2tu-V%kG)W%1S?ZQzJ=!#L_X^(JDIRxb-*ct6Q!{Lz1p*i$<#ws$!9B!}Yu_U!=gL zX7KdT88N3cCj%<>PA{=&Tx+-yQCLZ10J1yI)8=2@zK8&|PdzdxZyd^whv70?@ezPf zhL5XuBZzSsMtD#_8c@H{4xCzke?`^{Eq9ReJg)klR1u=N>k@mhRTsqo`8a*r#m6!h z(P`CYh3h>z*B9ZM26`V!z7gTbx(WB#hpIgA$pTYYO0eH-NTfZX1JLhI2?MEuuADNE zqEU{lfV5J+!Gm;xB=|ZD*}?_`EEO3XN{b;G_XG_?QwEXHgXueKzwllW{@lhV4S07L z7c+oPUgQ)uwo5~0UG~F-{8m=G%~I(RZ7Iobv^CP;4Gk3T9RvV>C9!z%2d%LlVH|)U%Sa(IQMmj;b8}hleT74&)R6M3&X>|``BBLrU2{pQM75F2 z!~rsQfagxgxd=Q|tCPK=mzBU*ujYgXn!3>b&#WGO=c%oqNAlw=j2%7!BaDIy zGP&W`74@^g9vb=R;LA@WT&;#Maa`+*s|ij2E#lJj{u0Hq&9knxsoimDOuyB(D)_Xf z{PZA6dMjM`tHhgLd^$Mn1{ydEI#qwiPt;b&3$@jCrbpGhbS$N<-Jhr}&pOHqN9Jn6 zbSh#%>nG%*KF5G5?4n(APU~`uO}_{7jlX>iys)yzptkilqV~a`3_xK0~N`+ESmS+td=ozMj=MNX@Z^OR;m zICf(3AuCb_euRbN0+tN*fWD~TJ?G34-T^ori43BVr{Wu8G68NwyTamFal>_JsC02k zVw`VN{#Y%LXq5PeIpq|+-+qz=G2KxRBN9pu)0+547552fOT)!6m&thmWvT+7cSj`n zul34W&g#?8kqhKB!i$BRgHX(4kuJY2b!Pv#&tlj}#qbyf>9H$4kE(8g6ZJGm>2cc1 zr~Bz1W`&)8cSB)vrWiwkBc_#Z>j{f6kKTJbUz{B`%A^UQF+<3nnygR%?TMaN%?>k??U^K@wk`KqPk6CQL+pv zaHaI8yd*=BqQZ_$&$-3MXQyS;ik=r3({wM9^y@1iUZ5Yn^@&&DEp}WhFGR6*&Fis$ z&E;jcVpwyrZh|{aIn(s5abip4bK=jtL6kWoZpU5V4D7>NYNiP;PzO%|)xrV=g?}KT zm0=Wb>Z&X*ce9m-((B8}Nz0zXAa8ZORe$K0oAcg^%>)w(A&n*-%t+s2=xJV*6g(>w zXotfYN=}Fg?yGQHW8%V_r(L$L5U$;lbb9K%pW$@7M1}9e+lAyt)b?#b0n!E(88YK=wzS2df9O=uix6Npg@gSEx%~;rw z+I$EI)R#y*A#qey9vAVE1F1w*(UOuMCXex491sP8zb?;tZ!0kM2X|-MR?3^aH=I=+ z>GY4$Syek;tEI{_DE)@ME4 zk(ULh+${<{dhG5wZAr5&0R*ETskH&qf8Qw68GZO(`y1?}N(qyXf)jYDncY3#vD2m) z?2UnhibfZC#42@&xo^!j7d`uWWTJFO(+CcfwF?0^fD~y$5^bDtrqq%8aWcvaZL5VB zk!M@3?IrNY>P`Ju!VL&Xh^bATWH9&$cf0O3!w%LqFJ%@_dbBQi)_|%X&^TOBSkPNg zJx&q8Sp^pNFNEk$m`?4MoRZ91w(dcxLkARl8Ni4{t6D)vtcKr%t`*Bv6=nC zOPJ*bO-vD%q(JLWO*@aRtcDSLlE~6xsK%`GN38} zc?A&f#qc;=<)vIvK7&GmAr?tU2q_}oFND{wpw=gEpdjW3SJ>*G;?FI{ghnp3QGygJ z-U&1Ssi6Hsq*-lsT*OiFz?Q02HbgUKAMEmKt{+T917pT49l>F>g4p*8{jsp_yNffD z1N@Khp|dx*_15q#R;)=h>5?Yl+>d|jZf3eNYp=$6CO93Ak#%oY-Pby1OCy>sg&gQ? zp^(-pG5%8y)rsCYIP*_A*s&zvV$+KIdE1} z&T-$l=lVv(`T>_4j=B;(n9pn~f4^7#U*T1SzaFW|B-bTKvB2-`d36jBn1Veq`Q$8knTphq*HbY2?6QuZjtUrx^qFK zWswf)?(~1}^PcxT=YP)3GsBGI_^|tn`y1EwxeC&&_t}3%S+;xK?G~4mhbs1ih?bT7 zN4#nBx@H3BslgbZUE@;i^r`#PsG@j_WEAT;rb9T2VRSTiz7B#ld|mKuo(n;GBBu~Z zVigt+Xgo^R%3&8}Kmu26M;LS+C-zS)ZYc4HwO@3}0p7<$lle;J`S+gQFRt#oaV#TI z{rf5T15fY^xGx4>qQX_DC3R6q(+6$2Dpa}c)=vcP#=}_Qz7_c5^FLEvEGyn#R5O;h zP_U>G2phMY?M?9LsvUh^K`=@6U1p4f;NdBpD>|QPX-~*O;D4 zi^TgGp5B(apU7%;z{anI%C2r`DG)Fa^xdRCGO@8yIh!MuvOPX5U2q(Ftt{CwhVF6(cZ(w$O-)vA&+ZTMNO4DhmJ2AK{^nJ*VC4vT4e3E zTh+YoE~7pu^a3QLE}O;@TSnh1aych5?7jp$nF)P6?TSp1xQaF?mQ|wNu|1AZm}`_r zT7gJ?4WamQ_n3O`%`OA{gWz8%M^$THKV8yYfLF=l?Fw8HVN+;lmscszf>ZRzq$DiqS${B(|^m;+EBJ_p~F-d^|pO6JV5<$ zoxfHb9(@uHH!7S7*4JHMdSkj6oiZ8qX3%`=>FHoW)?`m2%ftY?)HhSrU(Tx494A26 z3oa+B&sUv1sT~vVY`^JCRC8S4`Q~l#=i1mJF#}6+#$tsk((x}sWw&xE>H6{dRbleC zhcT?$vT7*fo77h8okSHzPf(KH$bEb}gkzbB>R-_o&}vs|5%hxF!+86#!k>&tERXE@ zH?ZA??V-g9Ti&Ekfya82B__##NN2y6&2AQX3n!R2f{f9LBU67flwH|WyIPJxO*P`T z59;g_Jesz)lS6gXvX1F(CYluV_`j)i%9Erk)Dy+aeaTbs~yT`Fmx8vKLYE{OC3Hghz==ZX3_0} z0b2-3*S`>Vk?!(f+TbEOaRS8cEY%-e;{t}39A*{S;h;SuHZr5 z8sc${6w${P8MGv!3YCIM!lY$U;D_57DUuBINveK9pHW+>yyA(GS*fJrc%|?L_%v>| zAFV4!iWI%>*j%|^)Uy5_VWpNsbQxCJxj}M}y0^d+fIhN*vT7q4qA5L)A2AzT6#wdl z$LT>-pMn?1jzj&*XtvmD3_h0htw#i{Y$ z*v*;j9EA*_shJlft%LPp*kOc2fYKWHasnQlC+#xrvQBHkFewes2_dI=X0uvM6ZZ^q40V4V z$1O&H4Gz~6Bvna{Q0W{?WkH!c(Y6KuBEz+1!Ed#NtO3;8lbUUKM3kum`4}vw!B~xl zgX9_l|Eg2dXuWfWpuZdYt2L$#tx58_K6Sefml(+C|Ga0tXp%dk#D&+Y5LunZW|%;X zs>ena7vM98DE-P(DW%u!?f+!($NzC!nxt4;}^H198O*@ihaB~uXCgWR<+OmcU0 z5*p8<%KZCKsk3Mw#}Ikb4YAGXlb-^@)gxEX$PBRFI0w}IK5a@<=rH<@Wl$z3*Z#QT z0_)Ge)INw!3zryjB6EdKcZ7XW*j<2UpT>cSY$|!1mmJ~-e}Dv~%L~VYk@vvGWCjVv zxdgG@;z8zC9u-Q=XH&8GAympJ4%er>O0PmW{9d#?ADRK3J2|q${cK`IZ4C-jJx_si zZRitz*(QZ#^)K?Kzy25<6K~I8XSk*8DRm6xBZ6dSHaD`AZYMAWmpW{1-PZMQe}~x} zLi|>xf2#ob$b8eMuxn$W9E4ZQzOIc;4n46%NHrC)#Wl zCsH*ZP=r0(X*5>S_`x^_Y`nVwCSL=@&;P6ei9lZq#HzYGt&optY!Fd*9j2Tfiww6! zur1lQs&#v_DUbNfwWzgBCH#+HPuz>*$I$*(JjcIAk=$mDhJ0eu0>sF zujt2&avzFr3MhUf0y+TGHodM0LsxHZU;qzkeK}+JlI#Z+)t?t?A#EusX-`SF`y zdM36sr=Vb(+29n8lHe5!Z@4?|GJJ)Zzg@DBiV3NPdpeKj${g96Ns@6MFxNzlNBHY) zc7SQD1mS?FdA8Hs(yHymM_cRfi;!l|!%h^4y4&u|nT(1Ese;Dif%$IOD_9kKGM`F1ZC>u`SiBc*Q7JL0wf^A>aATaLb z__cygBgcBm3KfIqEgMeM)znHtI{8Nlv%3xgw=Lyj#qhji2BNsqUAw&P?pME+CS&5B zvUx-ZVsu))@;0-kZ3Z@O;fxI&DI;e}2Y|Kiqq)xHEYwkpBJp;vb8?jK*Fj7H7g9J!MICQoVNn;2x}nw!rU%ng6#Y-xPxsN^TJTij?r5E?>n!=QDii6x}*rz zBj4qVXB>(=qnPIB?P0w*FLN{k8ObEP7ctHmP{`vpr9Tk6D>Ev8kD-U z1SP1-Hmxw0Gd6rz-9FHLuiCcD3M-PEB^gFdK`eVkJh7uWgPr`fb#<^VUO_p&D zFljZZoHQ8iTC6Q!x%}AZ0F9Zd%K!w5EKB&1Sd_SXr1RZeV?Xp}73<}!lwobHl(4;- zNf%#=T029{V>vZe5B^#mtGE1Rm3B?c-yd=7Tk8fn?A0@t*$qwR3+?N1kMKN|s=9G6 zQV2nW5~ZD;G3hqhT1|SMKA4{=Zb_)Ko8Tsuh&`~cikR^D*o!C<_>p*ns@vD-wlibT zH76w!)Jzki?(%p!VC=KlEB`4HyuYd|^ZGYMQ=_OrYx0(*o|r|C$I7L0;JtLyQ~knQ zUR(dtIF!SnQSE$?7)$g7GpmvhgN{B?Vb+Bs34#qp=o1XnsGosP%N+ zn`>ffDSbL(Whc;=$WUH#(+1&!Hw^qsoz^EQ;_^P9Ilw^nzXdf-#P9w~^84bU6DmTV z!fc_2CN)8e3PH;YMstRfFtgeK}u{xj(me{6FK9L#@;yn zq}^U|B$A~5chmxrf_ypa980pK>L`dCZ`=P1URb*L7G9o;q$)pDP$uptwVC+aqj8-7 zR$J_nJ}Y_)2qTbBJ6bD<|7kRakC6cHe>90W58(>cRd}#_-w?rZAm6jAUS11_y=0{P z_XWp^<{+16Al-V&mp+i>NEgtP#yTaXjZ+{mD`(~|7Kna4Gv^z9-jh>{N7nMM3-TXz zLmlNP5ObU-3nbUA?hzx7+($@IbO>5V&R1m?f9#R$W9e1AZPm8?j%{W_=U_@TMJneQ zxy;KVtSnazG9kpJ`j5zf*yk2o3nvjDp$D^@Uh0oW{{53i$sXrrP^w*nF*Az2*&OTn zW~p{Kw6@ZD+E`!Xi63nm6B6%JggS;&32+w-A})L$so*Y)b0Hr+7*6A1A3z3j;by#sxW+dsP^U)O{OtKR&TWCIyu=U6?NzN;*h8vSTzfA$1jnM z!EZu4g2=oqyMwU4BQg!V5UEDXpwQL6aROO>zridlvrayxpVXr?0}7*;ou)UPUjVcI ztbJUR_rhnBYFj~4q7d`_wi|no(Q23{Kyyt#!!H=j(pe%L*i& znsp+f|8P*Q6-Yk2nkXG>0wBI>uu*`Mf`HrFNAH4S4d;e2Bguq?IBgP6eIGx2Kgh_n z3RvAhpEQ|Q0fkaM+c-`(iLB~vxf^)x3gPb$Cp5WO^!Ap?(x&N+b3zMo%nrWv$O&FU z?qbcz`Io&mnsOCD$*UK^!Wv|CeAl=kvckh7gwic1m4*wa(EfA&{O5Lrf|WTn%JY}I zj9}|%=D)V(D;1fy%L=~)A1s2RLyNBU-^#9QBWniT?6c)8uA%^aXN~Qagr><0{@$Pt z5Yo0AE5}j=Bqq~EjzD-)0tUL7+B^0@i==&W^24-vA5_h1i~|2Cv@l6_|EEIBTNxf!oZOzG{rWqi&WXkJTt3 zUS-i%cC0B$G=R$NZ0fn}w~VtiS;_=|uiU9%9oB9+?Bec>{)jVOUsl_Cu49h%cC9lzvWJflvFy8GS&`$ZvC} zzzwI0G(ueNX&|NN`U&b5vkZRcYQ~Bdjz1BkZ~67#hvKx($3Bl0QlYa@$_ZaP>3e2I zP5s1vQtD;)IG|uzIk8;n>bounKK&uD%$4{GqP2m!BfIf_TyX=MyVaBF(5( zcUR__<{0-Ie;;(qMQ_V(mC(bFKwfBy5~MJSQ+PkK)G61k-WlMsT_orCNzVTMbk31^ zZ%j%pOw#*qC$yna>Ao(bMhnLXmss+S)OG2hz;0#tN>Vk!^)+GzP*irh4w4Nyz{F01 zH=!WY-TKXjY73BKegDP{CbUj1c$6{$6lVS_K5Jaoif!>~&}rnebz;?#yzi#GSAHm1 z+|#blDI;DQX*7i(j-el_95aG20N!GWup zmY+5b|J0H~LkCT$h=fGIj}%T?(r4q!i(?>xY!t8@Yc7o{b<0S7n?z3UZw?Ads^YBZ zalNy0Rp0Re8({2{om^Gf53F7=_&mOB{e%mN|}b& zt7&aBhIL+)xL;|6hP-F-J!HGPz@3giVci$|>QL-3HvevRdp&mJ>c4-xCB#}-J_e8s zNF?;Ah!pBGcbQ>kJA?OK^(7a0+HdqMUB@KKx*L6&yUghsyX|v|RKi5fc)d@=zMRl; zhVW1U_-E)Z;IXv=tRsk^A4gn8{%Hiyf+fO)(cDqZXyqA90tK9-E4mLE&eK%~KuCLr z^J{JAo0c*!-OHGCF;Uo8)5dh^`+97_6$w&J)v6+<6j}@QkE3*IZrWi>bLF-=tY!Xc z$Us?x(7n)oK+d%ZYY1DYdoE@K4~>6Y1+)X@=K}p#ug*s3()jI)_wMKCH9b5craE9R z7;9z?YkzXG{zmsubRjn5jitdh&Q7umDIZY_B z50DZK1Gb->$*)mvyV)MAYM?9R%Kz>Kuw4)|YJo6&ka%6c;J&l&%yZCskqLr-)MAow zM%N-M-JVLbL12J}O&1}gQWa8FjF;iFT8sJDPV(7$ zVs2rN?}v#=9X>0xxRzdmpSuO)Xkk@97ZBA6odR+!zhF^L{NiY#1NzME)k)4m!P{At z=DOag0?{1^7TmPCB|ak)BTX-v`b3k~3q%M-&^|hyE2LH6pL+B#6W^AF<*mT=@4-g( zS2rb~%p6n9k@?TkPb&lO+QcF&V|K>ksZ zn_on+rwdAA^9kj0uiSV}X=qB+3xPsHgEmP%og@;L%wZ;Lnymz4(WF?;V_`{MAD}L;@p;EV|ZZx^;@}rs}%2D!O`iDvP-m zHYG`N^+SvXC~@uZZxUgs>DwU>79Dr7cLB6@#eJdbJKaXt{RcI&w}$RQc&ZiYq2jN? z?(Z+4H;3Jm>P-iqEgIk6yH3^eUmp(AY6#rju9B@!m(Rl*gdAJX=3v&P?JDf`-4~}b zCQ%WX=Gz{}G6wUX0TXq1LPmh$CAuU1D zrWIkvW}#DtI|Vy93JbAP@+Mn7J~4jOxVwGZL;HK^WzxvxN1yA&h+pd(cYFRyYF$zI zWK1C{Svyx}j91%&zu?mKug*hVgT-V#A~?#A`|kb1i8QBmA z=sKcamd7oKm3+-tK`JIl_TiV-7-f{;2faqinM&iTO`y4pv|y#VvTs-pJmgqAO74Y+ zgPKxFX-EDz;e#IRiN0@@&$oAt(#v#MLcPlg&yj$d-t0kizMh+ZZe#B7l23d=6M;|_ zOY;Etf=7+I1={809g}X`aH2X>h^PIpwhZUJ$v16lqjz|($F={dn+8Hel(WugPTx!P zez>_VsjwUCl2DQQZa*;#(@iWm`{e12Kk?zp(-0Wz%cOpiLeMh37{A$LwWBE8L15%} zUWJBrQ#p#4=2+9axmL$b@!@iQasTliZbiQ3jQCcxnwsq-Msugoh@d>b7tly2T{fYXtPDbq` zGOj&*VTtXPYpp{$*e49tnf?BeEF&|Q@>wN<&v$qA(>s#jVsnf#GY_6E78Khdn3Wa} zjrm56k)Nvq_EPN6rNy8ky<*aDk7#z(q}Kf@Ogm@`RFC7JJ-W8!cYh30rYEY@q;x;E zPPNm&t1@1?L#WxNqD1mw5g63WMW$W~tT?n_Y6{_U1JVbc`tjCK5Xi< zZ0)3)POJB&%H{V~tLJ+LovYBe3m4tk6d#!71$9*C8IuLE!R9wH%{c>J-5|@(+6-C{ z{gzOq;Uqk@?P9%o=21FEGHep5Z4U01-5OnBxyX-XO<{Z3n-Df}l17BY>#<8w!fJDT zKhv`gVmD;R~9i#G;gwg6|Z(OZx6a2sxo1|VS|)MRP8-Y(e#RI1QkCBY-{&$_8= zW{%kirY3t>5r4R;IsaFgd)4q9byM}P$;xcl-co$A48IoNkG!^uknE=PG97s8hY~>Z zI4Okvv)*KqU+SQm?bbSDV!wVwbNR0~_Lds9cPICVhIm5ibzzS*au25%8lyIa;jvi7 zvF|koF80{Ql-Syl?o7TjQQBE==_dNO_OV~%7cy>FeP={n;h0WB90)7Q*BETnpLMpp zyY$2ZxuA(wXkB&-a8(Rhbrs$8aTiF=@?M}ICL`A{<#x8#xV^j3otVwnQYgXtL zGoAMfr{W;(A`6)Gg`bij($%*v0UVo{2fQ#z^dWO!0pRiHlECGXpB-S7gl_1w7_`i2 zMRn3qG!KXZ0$VEv0o&`oGYE)D?IW>(k8J~RJ0&HlWXklnxhx}j;RKsV5G~!;{KcpwCSZkA?!r9 zmG4TU(C^u&aG>a16f=pN(cq7d0B{rVfhxE#Eh` zlwd8mr&W)hQp#^h___6EwmmxIj`Q~1;L9MZnrH0>mS%e;)20;P%l$EvYIHOS4Ss@m z8k&>{>#~*Ke>OTls9c=CJXC8e>|kb|4JhNd4)i=YzY2saU8mGx&=@-L`pp(FJHdM@ zXe8&O_7gId&VWf1gSuew-$SFk$NAp?QX> z&}6-x*XQNUZt3;);q@O<)hBqANLS@5Yo&5ypeu9y$GCHq1XCN3-k>i?fBs9zGdpv@&QUL$j$ZZ#KBXYgE$|kVE;4C|4oXK2up>XCr;IRKQU6xjBiDxa zg52ydx}se|VaFoQm9y@ZD5gbWaRikTd#{^NXlqojf6@hF+;=P{GGaztg#PH6+MhI% zE6th`!#6h$`S3341j@utDnFjAF#bu2C)LqKW25R7It2U?_$`|b(#FLtl}b)IJN;d3 zj{AL5U-`V0;s7@xW0z;Nmqm>a!orslKXi}F!rb+3<}1pv<2`m|6a7q(F&7L(m5!S; z6%LjKk*G#^-zF&ao`R8u0rMhmZ`gy#Cl(4Lh@niHyl~kMW3x|;m(NIEtoRu58JSQ0 zn+1Rzf8rH8V(&TDsS=17>8H|pl9iZoA0ZRK$ko%MpGo+Mu6$%5#d|JJ*c8M;{8?1` z3^i{_J&ViRMx~krrnM&QysiG*xS6C!HQsoqzRWL@aX#s}#Ff;MA|KL^2R@r@fj1ow z^{~~~$Be^#l+1Z{BRq3T;H{mL+XVGPqfT(J0+~H6nux$}+jJyM38)u@tcaUf$^U(L zxB5{k`z)jV6nm6L*p~nClsO6q5h6sc1gFrh9^OZAIv_Dalp_@pdE>oXzhmDxlWO3V zoN#z9@R7*xO?6mR4HtnJu`Xt_;?IVa8OK+9Ykfp_$+5Gen0J^v$JVRI$A7{f7>+Kq zdf+>SeZI0$V}K(ce=P;xMtGp-PU3t!V#gNp`}nl+b;!pMP}Ml^bbfr*8#B1^|P#K1Axr*<;7lywF8=6Pigu;Pt(?tS}K?%{yXpr3M_qjWD7iAdJ=P>yu(?X<(Qu% z8P-?|AzX3;vFp7UxxAuJR?IrUtj9u%%(E8Q1`XJM0&~Czi3eo3QjXe{SDC0cE9*Cz zdzX7^pPV)dZ}-vRQg(4r7R@Klk&4}&@rol#uwuQlsfZHUQ(~#F{73-BR)2RGBwX8m zyUsfGj6p5FQ!SLrLggHvz{uNd7P=Ou$@y|gMyMVBX(Ri;>~C%qUp~k@w~EIK4Ux-9 z?g2*F9u?_BYpPg5yhLIJQF;#7yj{yjh>cX7V9MP)b4T*@uV9TPKL z!iXLbJQ)twaBMu7aw97xX$DdBAWE3bFprQ}!6p6=|K?Y}-B0Z|^9=ojWvYXm#6EBQ z9~2W5JV$Zd-JrLqsV~>7s(?q;%R}s{GU1I%)9<@&^=1e%xpf{VfY6L*#qdEKOAb`F zxDC)avgMo776Bm{BvFZ@B7SAJjWsCdCXpo2GY~NmBMM9Lt8P`7lqNk>oi?D)-nc-h zL@*-1#G_@W3-HB3Z!Ohf!5r(|71xK#`ScY%jj<q}J8Kew2!_^U7~}c4 z94j33uO39a4o0>qBoV;1v@$jM41Wie3mvItzxzt}`Fo5d+i53#QuV-eIUQO>0|jR~ z04V-rJK~I0bg^I_6Wc#}Dr_fEVfJ}OeDvj4F_;{j$ zN-@mqA7dArDMv+6jkH8~y+d;f*SJ2|GBQ(&fA(jH3+HX67*G`=Pud>H?ilMCjF*qs zam)i4-dB{#qm{V~IK%VUAQ(9=Tze6QB%Br0~!NYJH?!%*qq-QF#7``6ibd@p_4f;7dnhgXG`z?O<_XzUs=dk z7}m}EV%y`E$uX)F=Y;$SDr#pZj`q<81JRc{Kd(r9L~)ibz^;C>%VZ@_QP88hJ($+6 zIQK3O|8CHNbb(5VFW{an=~@$Zu!vJAfV7DfADS-5=OHcL9?c3%3?w6re6(wdQKAl8pGz5XbZ`fLL@);^Y=QzDa5en!N<%*`iqu_A$d zLVVWrDPRh&N%iYfc_&5Utj@G?Fc2uQ>w2@_e9#xxw2|(J`HkX;N9bbC(WHHFDadZQ zD`3pZ{1xMb4xw9Vge%_>XuQR_+1Amg^m_8KY`>lc3Ez@0cpNtm=gT344khJH42osz zqz!*YGHk#@`=KQW%t(u86@OtkV>Mob$iy%aSHYD@4M|c*l>+Xf=jlV7S%oXA#I(4X zsiv

  • |4C z>ul^!!8r!zo+?O`NTlwQLAbk8_Sx?&v2@|t>uf%c1_9aa-N}<{*px*d4s`5P0HunE zYbhhPY7x)6TJR={4t+c)*Y`<#iLzli#KrKc6h~)c{ols{u3}kfiu{fT4$|{2lV%_ zwFSBwl;yd)pwQ-x1JMc^1 zc8|#GQcC>ZMyNf#$igxnPRj2NB5Tgf9lzoGh%96^krEv(20qRVAZ0@7vZ1kW!rksQ z3@IgEcwMxfMKZ~J~E76 z1b!p=ILfzqhmP(~IGmAtQ%qd^FF2B*E!<+trO*Cv<5=6-H?MO|HyHJ)gunf-_o*<6 z{>z_AQDxdpK`8$*KK;wk6>iJh`mB=%lkcCuNS^)eqe_zgd;G{EQ~&>tCy@)E`H$Q6 ze@%T~%6>@7fETPO=)CyvhkyCm+68qY{r!`0tK!ghwUPp#psY{g|4X#IGj2*y3 zaY^@4921oM(azmsA&@g9DPe3(si@t>?WzeRBl+&Hy(uXNQ3gh!6tt?Nc3hJ4>MV9V(K{zH;30 zfaXMfODrU@;#Y9Yefi2|LjUHLI1Q?f5%K|008BjTN)GMMC(3_=Xf-9KL6uC$#Q%})G8U1%zM;<_c% zE+h`X-JvAnP)`;j=)M50NfS)9-3{8sYj_gNsubY-=em$Qqm+O@TYYwSt+8i9ieyed-Y`?1!Ec)+zvZ75v0)jLbx|WKrdcV7d z;n+Ew32eis{n>Q51 z%ZlYy0^(P~_9z*@&Lh+9*jAezb4-z|B);K~UaPJ$>DS?vdi_+1@-_PH8`Q>MQ8E6a z-sZzESwqr(7&|mt97VugK`= zf^`dz(t8^tfMsi4+!Au*;ic;i-c91UvqJlhg5B_?v0u@xJk^Ppn8&sS9eO?rpGXrr z;rsVg!g{DrKhapbv0LJ1l4sQ|u92g!lfL}-)ozUz{c55`{=abq9(Q7X$DR4hjQoCH9%*Z~$~L{Gh;{3WYo;X2xFQ zWSEniOBb(AKHg&ABe{sJHM|t#eQX=A85XlxRvD-Irk;Sxm4FZ%9+V2-_2pX|8XJ>N zs)XpKgf)ma2p03zDk1w^y*zN=u&$=3T{=<4?D+cck)IymHLfuw2mkEG_>DGTY;8t zob+YW+j6qvvX$9FleOOJUP>nGJM)b$A(6q^A`RKP=}KVnF`K!N3rR3>2(?X2;ubYP zJI?6W9lYOSI7*`?hji;QaD8R)rsSLU4pmCI*TB`N>)q7eYo0&u=StA~wAjHK;@SAI z@L2Fv_?5s~oMo4pW0G9B8uM1~fFUH!m7|M2sOfYdfOu6k?IPs!a#5lS+w0Z20O&m9GwcN8~+@*bW<(^29*=V)LsCK}jiay>+?I$RBB z=pxN@Yb~(Ko25>xDiG0*8ZkLo{&17+5t<$h*mbB%9=$DcMrX$-iczVMn^YNJ;Br3i zX=*P5X<(pLQP;1pk69?&X-yrnG~6R())vp2o}tB8KUWt5l|$yut{WOc!ouV-A^oqM z4%GO~2|UoqMvN?(zOLr8Uh*xs*mUxD?6A~0cx`%w4Ew%U(XC`!DJuQ3w||gz+Ylm1 zXCZSb(WK|P92Owmn=X>8&~$K9_09+ws6 ztdN^c&0Izl$g&Cea&TUaJHNv_6lwKoS>o;f{F`)1|9=C^2_vTKdMH6ZlV5P#ju$lTwvZwl-gC;I zvTY{-I(AEfta}k%onNQIzG5gCg1^ELp>Jn&^}rwRULVMp>85+39y*v3e+SKRt!6IT zjBZEq+ph?}1U$_RTl<8yQjQ!$x_e1EqLa-Hoc`7;IzayOnLG@AQr)+QD5Sqg=tM%) z#{t9?HLoaZHG5X=IT?fKr~zYo$ea7pPv6R=kyFYe$Jh8dUdcZ}1X&>nA*r^2xqaIX zhln0*#h7ilvh{=RtI3k+FYir^lv{Zf;@WfCG3`l-L*<6trSB$9@Cu+?4{Flmog@ky zTI3-kl*!lwzH~=DMuT#6$NN{H4BRD~*e|N4PiVN7g1Q{8m(p5mDHbL0G~#Vd4E5T+ zSsJQqwAFe=sfA8T@9Id$2ZNW@(T~S_CI`FgeXaWo12??fIvON(PA(n;WU0Sh1u71r z->7)i4zJ$*dOGB6=M_I~_+Ef6crP&+l}>t7eog-K<=rcZWqUBli0NYh`s_94e%bT{ z#{0tM*j;a`Y1iXKb2i`B`J5w)n+pL^pmMzlJ~rSp_&D2}E9F6L^7vzRX|{5|<(P}_ z7k(AuS7nF9xJpmosuIwfZ zoR(S=DZL7g;;GDuwnE>d2j^|Ie;P85u@ao@;j%hum7sue7+kJh3K-H|+=rYa21_H) zE7K$9XJ4Cb2PPX`tKO8JeqQA-qx&ETdc}CfFr%|7=50<>P{2uM0@I)1EPj9W;ssiA zI{0Sf(yE9a)GyQj!TRW3go7LpL+lqz0!3g94Wm?mT?IPwOc&o15FZ{JYpIxf1&R=F zn^mr@@3Krmk4dS#8i=+5NMLs#a)X+77CRCs==7f15sH?^@q$r*tJ$3Q=>?B%O~Bc3 ze$&b8#;;DsUK++;9trV6KWu;YPZ?}pT3^0MYZg5J7Wl|yZU5-{)H)6_r^W2Jj!?wUHqcKz($R2eZk#eP0?`fGh%D4n01dhTLWim4I#d;8sd?;Yx7>#+tM4h`eYBJT_Lq&b{soCnv|g7aRA>oTQRJ3;coA~?oHKHi7MIjo?sgFxre<|;S~V)w zfif=p;{T0=Z~)5p)EyVLH0g>rv9UItq*)8M;$!pdR?Mm)%2kD-S=-CE*fFZTf_(ua zSTtixZ8ppe?hWV zuWRl8k&UBoC+A-vboch)^#iWYsnGYXBdoy`*(M(Hp^p!Q&s*KkS<0ibEbpcBFbW?s z5t_%XKg_4^hgZHQ)aW0pJQac{>2xSukV8b~4~Z4r;-gH4!>|lqPWI>@kvc1;h~KS< zTo)GR8cfk^dDS|2nM@d%sTR<(r3=M0$9W~nb6$JU4g;&AsAJuTQ!#3xpCh-#FZkGC zNJ*Z%?)2`4loh6wR%MslGmde$%}aY%obD%_+a5EX$GN{E@i#O2z2@j+^}P_|ZX16I z8{6s#loZbELm#!k9x)4jm`P23#K@d(Vs|lCGzA^XN13L@_LP6@yRX7N2oGarZ+eOk zr9>TQi^z!xd0)?TeCYae={jpT+S9+#Y=qbc#Ejm3Bdto z7~@k}-6ea-etyI;U;hJs*9JXSghs~4{X_zGW=@|rf82l@>n8VRe{>o{^>!)7AIiN! zi)PjY5ofP+#C;@l)z;nBQQA5x9(MLO<*@@l(l6z-ooY2VI})`%7a*acd5bpKHIc5o zt7l(cPIN)bn3BY_261NOS`@dznoNICueDd#3~Lru0T>AG0*(ZJtQRk`cf1uZdt}$S z*ATF~`QF9*{ip4s`E`6M=^Q$CSJg_lgGq0S6k58`LNS631c2af-xvC`c2@4f8cdYxLu5n z4KWdH#D0!}Lk)8Tb0r)9(sv~&_Lko~Q!W`Ot+TDqEw-OSM%nkKGN}XC2HY;XlY^uh zfPUO3oqwSx9)3QYF7&Z`f|zl>+8Z8tq1_w2eFK4vS*|U(iTUFep!KilLFe;!cSf(@sn;eq6B?vYA1*avPi@rdd5}X?jQVn9~*ZGr; zh0b_NC7c;%7j<~M(SGM5Up1IMuVmlX?W(JSVb+eV4t}e zI@Z7(z3w6&Vai15zxpCFYWEjQI7Vc#{3$Bg57l^woeu>5w}|YCt5Ra(ng=45?0)RH zqOEnW@T?_h%5bqX9T;0);dRY(a-G)Nu zPDgh=C(NMhbxEYObRYkn$6tvh@@MeujQEkUGwuHq#0p2au?=?}A5qcK$LJ1YU0#f# zIKW^{hPCy2t&K~XYwb+CvPsf$bF=cn?_`P$TXU8hOXihnAQ~f4}=eQ3qQfKh$bz&W` zkA%V1ia)i%nl{QrPm9$>oU(Ir$=Jrx)buGHF3?LI5JIROg7j{iA*7(XET!c2Enz4B zMuMNRtkrC9ck}yZ+}aOvN9R6Cd(y#-MK3(wIbR+EooCs84w%By-mo7SCerv92RlxE z2c6c^)fIl+H^~9$XfPJZ@aMocQU2IfS>(c(7P16S+$%T!N~Hk7X!p4nHV{5&2r?R#}=+hb%Scq zwa^(9KAcSd4=A)#k55zr8chB3Fz&b}^e3lM3-K~rXSsf<)&r=#yiV@$6?AH`b(gI6 zh>p0wZtlCR<{JnwKh@4_r(8G}$n~OyNv6CCH#XE~I6bJEmCoPVeU+IUcX%Gr?C2EE z;Jmh!m^>&xDaQK8s$d)cT2dyLdXtiW{c?2;1P&&};2{V{SlRHPaNVOWQ25$I1Pn4< ze)*0>;SzjJX38|c0A-UjyFXmK2wAe3cDw*ME z)(Mt0QCpVE3U9&sg^llWT}p^iEs8*i(Fy(4aTB3~nQl^H>CLkT>qg{a8?9lSK46(1 zf^`!M0lhAu8oVWsPwJRgiR;GxP>t3ME1UTPv16LqL45s=k@ z{$d5_&Xu7r7LSdWJbpBlQ#MTAqO%+-_$w`!Ry@D_%XHJf_5B~ds1)dK>0kz8#pYFo(jDo z^>Z~}FR|hOCUMmKJebR^{85?^T93&s4?)^#lwA?0_>c3-YtCWl?tddtc>ZvLiK z3K5ZeNp-@rv0Q!et!i7U#l50sB=#g$CgH?N6iq`d&VrA&x7ti}TrMG=#G{aVGga6@bWG>7gn#6Jt8Y+&@tv z=?n54CBtKUe1^H6ZfMcVWO*{0dKyQ!x+urCvhwHIT_R zI#R}{$}JZi_NHMwn_Is)A0kx#!L`78=@@BwyaKwTKQne|(8_H~2)RP&z96rSIFP=g0-3XYV3wAiuJJvFgV=}AoW>-(c9#*4&I&=&%qY@XyY^}tf!LFeNmXoZtxi7HUO}OCVDrJ=zvfPku z)@8-Z=kh`z3If6JIp#`2ey!XW1|Iq;E{}eGRD7lhc=IJ+pdiD>xfSK1Gl@Ue_gEoc z8uUX(pXWX1tBz+z-{N?Y06F7K$65+gnh2@kpoytvrAe2y9QMU;Pa5Q$Mh2B>t<_!0 zNc%3_rs((G>a%!y!Oc%DDkAYi_H%dC+a2*+dn`;IM^i^W*FLiva1-xCg|-P*zxt?F z`h#@c>&d#`1O7MRT_rHPDIX&_~1)YeubC`#2E1jmq{JZ!U6 zrt_WGSpDkg%j{MYg37q5L{xZOMiqGlL)GpvR#giveL5<$Jn}_cT4px;Lv~_)b|&?2 z%L(|Cq+c0BxO=8Bpt62TrqSB zPu?@~+YRtr_Adwc&Us|;M^>CNr3)7Ej(uh8<4QY(f=Eo(1Ad9+*qz--MZi0+c_w7UBvL+PH}SjA^Z0_oKmYiW z7~C&-`0*`%6@4YGgq8ap>b^+~N{X3SM$78rROfZYmG}p{G)~-!!WWZIAHCb#P`eo0xHtrc02rT0hZ0EO0h>KKx zKCiW!e@jK5<=dP>h;or|i#C?2U9{|TU!l31i@ZD7bu1KI`x0=~xP`Z8)nRvpOxpuH zS%cIO+p7q<67h%VG(c;?*#4cwlbsYh5S#f@J}Lu&$xvKyPKj@B!fn|Q zIn5ACKZ1A_yaPvN3Ym78Co1?f{m+AI2$lqsEpUf8x`sWm#OCneK+Vl5Io%Xt-;x~W zFV{U00N3se=dHchO{O^a6y53y29Rw>^6c_Jm6egVzKk#9nzQuAXWc6z14?>avM4pt zpLe&^P{+9*Zm^12mET4aFgc;y#k}gMkK;#N5~yiNf~}1BXf)v$|+nUB5|qv#>An-j3uIptlQrd^Y0j)|cc-Giwt_Hrl%e{;=&+e$c*~CuH4|==i)Y zwde5wxlE|l8{rQ5fT|4K>g@-8Ka;8l7QVxs+a3{oL0oOqF44H00iki*Xa$5~uW1{| z2fO@5SWag`E=QM4y{v`K>R}E=?Mb=g2NHrNzosK#huP~MfTvY`6fo}NoXLi|+HP!D zM933}ayDtlkk_L%aspcV(C5CD$&9DR&Q?+_Yip#8X6xJS&sFM~6_J+AQIxO(#Z}$!DBuq1vU#cYn3Ro{dl8BZz!8 z?f*o@dA0q=DnHqvA|YoaM0JjSA>tp500*|=ekC5(yo zsH#s42FX!t-JP=jb-yCHPj%#l&G%mDVjm+{zH}+V`bP#q^6Y!-&NJ5>vHvF$7V;`3 ztgP7X+dHd$1Kn3jD*(tR>ubec_f*60u&O9FhHcMJPA`y6Z}!`y=_0f2_>j#;f6g$^ zwxFH2BadO6z2xFvMnCS4Z_gpEaIHLZAWiss>{`jpH&oc z@al0q)rnH#bHjd3Xps=Wb+I$tNCU<#*Jv`A?2wDm3V9o&?&BaU8WXciyCB9#XZIpg zAI^~GnqbDtkOP#{{iXr&Uv_Ma7}+MIT>83HcJPlnI*UJ+x_{ZT<&yig$ICFW_`xzl zg>MlKX#3*deJdVrVJH$U6c=|Z=XWt@&hl6k z+Re^haae#W*o4PcA& zS!LHl=3nZXP|eNr*&PoJ6N>oddVl955=;vkiJN>g4BK2TzhC8L8#;q5aFvQnNFa+v z?4Exi*xXu*&*fY|O)cp-yKMYCKZ9bB_g1Auwr<;_hme)(VHJ+2E|yf1_!T zJS9fEhL94nNO66ySt}mD8_fPb%@czc`RNYN?DzT7b)$VAL;b^2f|1$3z;fb#ZHDqy zv7OH8!ttRZKOb*HnXy6w&o#za^}f&7!vNl02V$rd_ty+7FtbTW?`$3k zE4|j(bg#l6J4>~vzTX{##_Yl0=6X~OR+CWcw&}ZsCb3%Y&hqO(GNH9^5p%r*sD7hd z^`NZj#I_Pim@i!K{Zl9-X(F*~df)H#0-Vt+@|LiJGPdGZcy!=J5&eRzf%D+m5Ij>r zBQlikN8R?s^^4SF1IEnxlDH(F3{7R3Ufa{4*|^ACoRnODE7%=Hftzowmt#@)UUpTg zsySy%l|Eq;_vgQ{Id{TB+u20wmYjA6aSpW%~IX<{QQX2_ptK^mEihv7Bo4!}Y=f)KM>0eSN zj@b#S^xA-3N^l|IIkz-=*O`&S?i{M;7kz}2!9?@--rt9sAfCKhn~V2$T1)Vb%oLC+ zz|f{>r;e%ayn@tdm~nk!pEEeal5419ajL_IW&RTd+8iQ-su0djj(uZI94BP6#klgu z0ddh$EOgJs;1)P}Tab#3UXf_$lw$I0{lOL? zd7n%war{hP|E}Yb`f=|tat2I6yW2jJc=4W?xaDN|nwN)`fOf#_!UwCAS3G=lpUmVF zUhmK*nztYOLGGee^?5tsyW3PAQEIcGwt)MVy}-Bbr5%~EzxhuT*F0MySM`Cmq)!Ng zO}bicjP@~uo_EV|6pU!|k>SC;#0w)q?k|w&j$mUeb&T)>E7quvapH$W{6LLUko2}X zFb_>Cz=>&PHJa<|A_rl+$o5VZy4!A__` zF*|W6qjQ`jg)WdvXswqYlv-{qxKOL-@dK{$S_m#E82SgZBB813c<&{wx-}@g<@d*q zg)_X@97aq83?94uCR|@xzDFFYh98#D-Bs0>GYqjd*mAdcJ1eO2Se@sPSwZ>E2{LA> zT_N!>Iq2?bnHN%&7jKPZW#2@Nqe)Fld8JW*{vQk*qr=mehgf*xTyPMlciiCRy2hCD zK2CV0JdNr(IKKg7GCDL5m@NOYbUoRwMJ>z1`I^y;=)Njfe&_-BJu$rJdZ5vSs`n6; ztGf`6P*T{|9v{Q@OUat<%(ihzVN(v35VD`~2W}YxPE~=>#eIylwM3VXLcg|uGm~3R zznMu|{H*P$)23l9+#o@gY(v={c}z1=;9tb6H5nttKpwXPo_5ewY5uK)0*roHmThkF zONAM{KF2872Q%4AO0I`W$OE8?RV>D+GJYQfDWYeEBBR5*t74}kh%0g)V0~K;$)7fQzX5&g5)IwP9l7D|GzoAXd*r2#(UqEO>kRKH6 zJR&D(vscUch?D4@ZHn)TM64zjTyCh~&zpGl(z)t^@bDqPwh;4|uNDf59o?&NpS-uz zCHE99*7Th+2E4{Y8Bkl2lz}7aC(VP|uKthr{G5|EUynBfT1A8Adiw@%)I85yJUz1x z0j^Q!=rFde5gDm+2AgyWxxS=UV!)O2zG_r;Me~M1L+s(ETm^uE{O5EW%KSk4a~7y@ zO=3??BgTWakI8`adyl|UVy$p}M}>N~8OM3=>qGeZ?cIVQhBk3LUbkGD>z$f^n;dgp zhHt2ce-rSL%pBESwF10KwV@czZS|?}{cj2MKNJuU;Tfhw4)usHEs?UZYNkWaa;um$ z^VJ<(Y%@(wxTjMkJVX+8r0s_>9(*h0d7!U%{^k6Xj&a-O<7@`ME6$>yP*>CEw>g{9 zum?eV)SauFB_dvoJc(F12wruy;Cgyt?*Zo%z3&$EBjO7U-KU7f<>fKF@8&yp4qbhh z{7=(Lyr}p$7B>Yym!9dk_YDqY-$eKk@`VHJM*afF_O(7}D47O8b3@!kZSM{2g&!)l zSNyp)t+=P5PW^b7b}!O-{Z%>c4k5VfFMplmvqX| z1rBq(k@;Grc}F}rxR&4qC}HbB&tL#jb7(-Gg3CHm|8KM{;sChsq-&du~*;lT`dtO>bD1>f}`)%-lUlEx4kZPijbc3rlRM$OtSL(8ZTXQD4 zMNUYH`HekT`sM7vyM*Xj!hiu-XIJF8SZ?z(dAfNo_Clm&`FjS`n#GUf&K=EGIo=nz z+G$`!riYM=`TLjSNcjtIYk!G2qD7M>s%`cT4}O3>%st`%QD_UeyaTCOcfHaA2bkg6 z)W{3nNZ5d93CiUw>1W4WYFaB{sbn=+$sa2^2mYj!h@U*YSF*D-G?D5l+yP1ueR0^> zMGs^5%AowcF8-%4qn*Oewwk=KHm$&-_@m5^b|H^I>*?$F8%CqVhe|AN7ZGG4qBmRM;YqDMc4 z8?+8u;0I-mM7knJFEq%xa}U0`7q|KtC2id$NT%ZyM!J4I;xO2kJyhb;YB92II*~G_ z4_1|ht@5LY|iTv zdzSgpw@yAXF=XC;!q@Z;-e#p8z(egQWYR?`2(e|m(9l`-dnQrR?!rjXQ zrn2QUuNU5FL9kbc2BxVZcO;$@EiZ`;yZ{gU=D6oiX$g<`8OFoVsck4( z>by`7?Vua)koSfIk{d((mJV;rzV99jfak@Cf7wMxbE2xy`ci^@+K?WyAA)-=1vZ{_ zO|T2!NDsqM z(8OQXs%cH!NBT*%Xb^AB`G#!=`aSK}y z^?mZN0XE_N#e}4)Dh^n6Q6!w0qh7Bo*e)z7hfKF;n_lMsM=rdSjaMs?MR5|yNiwJM z7H_m1IuS^oJ<=-Z>?-I~P9`lWEcGrxZ)3kFK2jKHX)fv-TIqUVRM4cI(~(X>s3>mx zDKM8W%pre1QWPd3zuJYC1h6ZAciGkKIU%Y|tT^@#D9*s1GMJ@2X0wL0LrTTnC2Pm; z)cap-|Mm6t*EcKiIRDiq|F=wm0$!%j?%Yhx_-}mvTj2TxUP>W(@Lw(0->@5A*6{0> zT)4L#y(ClU0!1{Nnw|!fQ&0tl z$Il*~H}49W3~WUHvCmqPefn}2t9}^vGW#OK)7+hxm2J@$TQwuvGcz9@`kS3)azkp~ zITE+G!`30_>E(q3?I~NWybyC~I|{kk`zl)(#Qt51>%l>pI;F_+^Uk#TenJf~OC4DU z$IR5%(ONdq(rTL-ZZ@!8DmZ;intZYC?5uHU2!kzBm07GW7KCY|Zel)*L}7Z;3UprK z&}_5f*Z*9dJ!39i1%pL{3M#jx>30;>dR3Ef6;uOh?>Wu=Sola?3Y<0|bV>Qy|4s?7{`WnLPl9p%(+l9A zr8oBFZ>Z*~aZ%00o>pn|1W|xt1_;K^Zj{eM*xb0wMu~Q#h|?Ya+4>bA=9Fmrcty~I z1#mq_18U#U&v|;`o^WI|S<|)1Sh0vOwK)6(X8lLzr$XRhcCwo%iRFCiP#nOJz9{16 z;u#lfy={vmuUN0WpRpNYtMlNMykyZ$h=tqf-^6bOXdXPd$EqqZNgIl&GfVmMkSS zJkUHofw5C<*h_d(dxD>|-=(J-JYsW%~$rE0bb6W8V@8nfP+b>H&l z-6BTL-ORopoG~gu=;y_amYrMiNGU5bv%s3{g4u;6m3f5bufN9Ej%sVFy0GAac z+ar7Q-p~r)vv->d35n8tx51g`zqJX+*a)*t(c&3o-=8JZkm9d09^7`5&D2q11QTC= zS&p?PLKwY{=n%0P`%$~f8*yhktZ6w@gU)@==Qu~yhW`n3@rAMK3092|5Oh|$R00jI zf6Xnw>vWPuMaJR4ITopXef2nW`T5+5=#U}v{4XHNHbD46=azl2w@EcRc+2j_==$Wj z7T>-Z{0)eY@?8KfS#tlZ&HohM>=-rC2dhu3+n{6Ah0t{P_uHpE?92f37>Gc#F$e{7 zq*!L-e@KX%`D&Djj$$dAgY z=;IMoB27r*F^*W>NOzuccnKY;BPP}A*I5ipCQ5jQJk!+))noKU}Xpz*lwbc-Ydx|FeUBqNGt`+}5|6z!5&68R4w-L=2SuN?bkP!{}VxlKxV!FGuxY z`l|nV%BP~g=xzm>zvynAAdRX#qW@cv`0c|Quh^$ub;aviYU*2B)9xo!I;XOsT#!&V z#S?fli&H}*GFr85rEeG_>$qeIh>O+->Ir_$`%wKJZ3h*tDzdW1Y@$wrOA#IRA+T&PPC)us$`Xc8*^jRj8uOC+r0 zax-V8i_j>~m{C_l>Sc?h*ZP|Q!OX=sFH@HOqGNaaAmJ{!8T^2V%W>Zrzxw&L_Q5Gk zw(WU^zXY_jEq1YUdC@nG0%Ccsk#TBeZ&X0B2BVtZ^rgfI5|8o4b&kNC^)A|>uBup< z?o*xcz9K(I?yo^M5$FpVs?2wgG0@pqk)zj-tuJ!}mHjbQnO#ufN|7BBEzQP_Tm`;N zkIl%`W|SXrnzlhLLDy|DDq*Wl-b3b&O<3@N#tU;`NXkVBi`nS9raqu-Z-`tb80(QV}(rAk8lx=TIDU~A~^86Ow& z$OC$`Vp!WrlUkJ8|7q{r3qwsTK{`<9a2+`ph8Mrm`aikuE>#YEt zMvLeJokIaH6&tj45XI}AfCp4V+`lU*5_o<1DZ=TA(B7gqU7Fq}4 z7_7%DP>1_Ka=c3WxVyG$z?k3cTm$&hHLUMe#bISY37$`Xd8+`_H@n z?-VQ%SFDSAnoo?&inTP^dg`yf?T1g+DFzn(r{=aW>2{0L^)Dz1?0*Vv6<(@_?!FQx znDV^3*%&jsV$-!PmB=sig=XA(`e@yPafb@$s;0o8<`lUqU?u1p4;|3I+_C@9&xZ{J zzT(o)@Z`_@=DVVui6y`nx*wGaK0Wkh|1pzkZuduyd*#}-623vYNK|vYDR^y8sc8`H zTPTMH);t2!3HDIFL~_}&-@M%ly8dlh=zqiDTiT{Z z-M9qh45ZtsE{%a{A{b_4Ur4*ZgQSJhVHg z=zciezZ#!byn%31xrt-^HulnoUZpMFqjX79#`;Xij< zE*wl(wYHkF85-Y_ENfiM(789%DI2zBfBMk>)`Jy~T(h{Y4t=pX#|SeNE9;k8@nmRk zG(lFN-v3v?dJBX(#~}LfL2S7TM395oCfWiFi-wj6H`i94hTdgu`UK2YaJu=S@+Bch{h<{#$4pl3CsD$t0&MyGwo{%@^d5 zF_2nF{eGX^tQ$Himm{Zn=TFnJnSWe*u<&NuqtDVcM_V!Y^{%Zc*$TSY6FKmmxEppZ zPgiv!CpXJ?LYfYoF3EnDtQYSEw|UIkDzooJMA7SPi;T7Uo$QwAd#2tmqBj{X@{TKG z%fNySbBCAEj?;lvvu9J2r;!;diH0tqo&87BTNf!o1F@{zn3D+gw^4h+2azA5<(Y~g zYs;EAibK-*5@gqyILa|S7c7WBSxqr^cE0YseKMl-XnL;V(}pOS^WC=}Z~ocITD%G9 zWTg!bQpCLgGM0F7XGiatj~|r;D4v&j{_Gdx{DHcFGHD|V)sUkmJAM^RFaPPkuebW% zInNEZb^0y;@!O@`uA$Qn)rQ*{%DcT=qi%ByM!~1jJc249(^4H&qg`HC7`5q%2-j$r3 z?4NtUO^tivt=+;G-S+nOcqgYF@nc||UUA?f$nn}M6-~6i4PqFJ1IzaT|qDZi_N%BWH?cRN*uWo>9*(d;RtE-^ry< zG5HmLX#?OJ7E7%zuY@+*PaCHt60%`d)(Y5!<$tn8XVpAJRw-20o#Ce#)vPz$I@83z_swiHGxj3{`De-1-a3+zs-CNE&i13XHMAkG0#7P^+{28RU zmi_+1%}zw9_HQV$nteahb!WlJ^<&p@2a?@wlM>B@L z;7F(+7O0lR?h-UTk`WVWvrEB6&vN^-)rCB;0hU z;`GgPqxUPiZw4}6L^8bCo`EqMy^9}Z?##^Td|}qH#G2~(ig(tf<>i(>t_qEhyYKJU z^Y=o#3x?;%>(m^;}~c1zGfcn=A;C4YfT=?qUDY!9-{xU1!3XfVi2zqPoUA{-e1 zs{}p&7F{-A1y8Z`CJwJ(lEV`N(feN}QkL3&`O)3g&(k(+06>yi_Ep%MID5M<#wVV@jnX+bZO4XX-uO6Rfn zM%D!8x5V0?jC@GtsILFhEkrBk?JfHGtkU%p8^%c|YnI4o(~h|fg@{M;X3@wSJLd^saz>cnk!z8pyNdaQr$ z7NV}i_^Vc3yBqCdLA_?@O^>T8b*x5@B`zP3s_jWFkDEa6A3i6;G{Ti^`cGkM+FBL# z+uJ+es5mI>qBekDy)rxq+x4Rom;LMpP>K7pW_i(R%r%swk!daZVCM(s`TCe<@A!;~ zkLdGhw$z6YXOqxdU5>c>7HHV%nz%v!@QUP_Ov!Z&!FswjUAm_H_RzI>%(ZABNoYU) z{(G0EZONI+bd@e{Z#~yE+U{(QRZCjO_qNF?H&-p{>7zp0txH2*N5V557|{P@E>Txp z3bthZwLascFp6Wl&`IxP;Rn}ov|NdDGFt5EULfdQvT6%)Zq1i)n6ih$}{+r)%iQSVd;{tY_V$4GHZwR6vZ&v3Z^qo|@?f}0(@bJUKp-X49y8WpFc zMh%)k4}XVLqO5Sbj%iK!uKlx9tvk&hC^ztg!aVYj@qfVD%NKHIGJ%5Vrw%=jp!tI5 z&953Hdc7U(-Nc1uhiby)_in9R4Y53IHu$WHmyiSoD`em4Y|F80zFY7Vqy(fyDLx*L zJI}uu6ApL~Z<)kA*;-}w_Ir)*@QHJO9+B@Fe5`AF*7))@<7(!YKWieyW)E~LZShCf z!ka{H0CwD=`pH$A`d0Tcx#65ZHfu#^l3j(&mElvYfCfsg*31bv9R)~GXk z+vc*!EIWrc(f(F!nipTAFBuGqv0Q7ZRJQdY&v9)cRR@@X_(rHkz}*~?zi?stO8CH5 zPhVu;fn*z#h6Cdl__1%SSJ{Azd}@H?=}p0(f$KK!^G@9=9x$sH7T=$6*ZS+s$2M?v zrrgE=c=RK7!T}07%QW4Ixk4a-O^f*#}!Lf45*g zfes;nx*2h)*{7-r8xm+}4;iB&28Cq9MbQ91* z`u7)I_R#mt=#0u{BR%e#(!K?mVW0VJx*Ar7=@;ZLdghFpHYe)`A9ccBZq88Zj2z=0 ze#V>Fb^vszAs2D|R;9Ud`nh)^?vLbY4zc(3=6i2Q!EaakxvgQv6GycS>6p89hwWT- zuI^p@l3x)#ay+8X-m}K$G;|Cs-rjz|d^`Se0>$oPoV>TdFMu}xzJ0d)FvYF?MsZ?z zJ0$uUF+!eEF}X|QT;o+!RNT>*YRRF>x?#6cM$g=me`_juce}@YHbm-Ni&eXK>htfD zAEbMqJkhhrnE6pD%2;?2$JPE)#8vLVPu^G4-IE>~0;CADZr%J#i2c*@wF)@GXXOQ) zk?w`M>l;7I*;<2KYB5k$1_2zbK%91o61(-i{rbX9yA8Kfa9cJ_?l7$B)A{77EZ&dO zSr3iUHB!^}HYqx&o0qf3j@rqdZo;iGZvEJKl!g9WP4!Z1a>}IbGKX!5?e|T&vR?HI zxK$@*B(?c_~@j z0y?Z&a?>yG>kZM>ThQ@ilN(=Ec`pk)Y&VE?e9C7&cYHT=cZW>w-g8Zs5~)XH&mQ(G z(PrbVLn}bn2Kz4=_0wXHso%f8cza&x5v5vLC9h={=~zj{-g97_3LE7B4FOi`o9+-T zgswvFIenAUyB6enF?zj_6~sbMqqf!H8C#<-v8tqk9BTTleDVed1LB2;l|R5JX0O4A zv5d7_v&zfudLsOyT3C&(Pm40F?Jj2}Pdu0lQ$T-ZJDj-Rczd))(BdE4gf zBuoC)Xe)KC>`;Ze=dyMQ2}i=;LEib5$?e}M9cqKY6}-cS$T1lkvRW2u7M>)^-5IC` z5M##qy^)r0vqnZAEJUhhZCqyL?rU&J0#5(as`1v(eQaCTc4ww+lDr~jrPNuyOep3eNWc6Ka%tJ=lcYI z-lvsAG$L$DFO_@o<=h3|>W;G;?2prHkCRtyG^QFqZIPe)C?k`K#wR*ul^v%y{cY>G zZ6w*72?9)f$03!aQpdZZ-`+K}TIQe8;g|TAGaoX#n+fMv-`k_~a z@4G{kFoR@czXDai`d#Z1690lrk8H9Wh;<&bj0}0`h}U~6zvauC;^X0%eLHsXWM{UZ z5t9c4TUMRlD?TjsJjm}BAJ}tPc=6se9CZNwuxXR(89lk>Uwv*}`#d{a;@^Kpf7`Jn z2AwN5>@;y_F6hNwcx3{B@+aZ z-$Q);PR49~5^RALmZZYzO`rz9@3~gyXG%Z6Yb+Njl@DnHABkMVh+mC4l32Q`dh&F;CT(+hWIDHV z(Ny{#1r|+}=czx%YAs$|1h1s+Cz?t-km8y#%mRs_A2~`{V~x-X85A*<&Px2yF}-yLH_4&>&L_>PDG@fR|3Bv9z@;liyp?>xjF z8B<%F6xopG9*S%h2b7S~!RF5Jqo~(DD(URbG2}vh(K$@y96OhSoU@(2;Kx&OG%9U1 z{k5Rvfd5R(d`+Je>l;?RE-X^q%OkB9*Z3`{$|I%Rt&168J?q68yWGSa@boD4wMA|u z=rS^W4y-Lr7{g2+dgq5Em)rPZ%p>@AV~{ZJ+}I42G?b}AcV4&`fPYbGL@8+xnJ1vn zA;tLI+;UPFzt-*mXRhen&ZQ&Xw#VOM03v5G$LKaCN^IH1I}d8uSi69TGlPDfeqloI z5ac~>$)!cR?BX|fv=N67)wTH5e8Zs@<3`mmiyPZ5l{2Ooq$#s@dB)WC_O&VtZX1CP zn?EnTaOTYpd{|KN3H9IA_UCVh?X*HvtR6m|N6kruAi}~2vnf197YrnHF5B7S+x2i^ z3)-mIs8QI~Y%F$?@0ncqj5L)FIFgQ-Njn^KC^g@Ta#Us~kZCn@JtT0X$Wgc`b{eh@ z5E0b7T>VEvZ;KPb3x(~Z(2?|Zh6DvRkbdA?J{_PVA$5khd#wC<(!dGi{JGfO!wu<^ zBG*e>kW#yxMMOY$kqvT=F+V#U#F+imj-JOcNT`YYs3y`?dD4VwWG%z7%6_qDtd2rD zLY!}==d7*rV)qyl;o+2%&l7V)tlO<@tS1pu8Dj*E%Qf?~X%bkFD!GRdfJvFJ8H*yZ<;9}nR7w2g9IlDr zK^iPN50*M7e%g_qo6DG+-q;?sz{RB1NYIN>V^xf>nZ^9k5BDb1%)f1Bgt=o}B-tc= zDc)sa&Y3@>mQ!vjv~ihONX_(<7T6m$kPcL6Im-RUhX?(^sLXhZI9X(kWLMqV zfK3)G@chJ4?Ka5v=Hc}>7P~*L4}I~+PsPxyaF>iotS}7WonHSNE4|x3W!0uM?>6;e zD`IL(|8vK^O9y}C^m}a!Hz)32|IsWz-ud(@Sqp`*3t9AGk zxD;!uo_O}RTX!zEXxnL5wX~sad)~eKCDpB; z-G96F$mwIYN9#MWziB?7Q^#yem!QMP3Fm&hwSJv?bbSTw`ETzY3tTez_R{IVZ@0PuH2RlpzsUP9 z+5Y9WpIYc&A^lgkEve7<|C)qlT2byojAz(Z3#Vy*^?=fWeqA@XqHK2 zbV;g;IXT%A#bOI1Gx>!(Ebc~Zn${uMOdmG_Exp*?RN2{%TyuS*kmfC)8!?`js}-HO zvsyf!FHcg|rv^5|XK<4fEqT6(3Vh}$>p@!luWcyFJ|gMLleW~wL;G1M?V6-Mcw4_P zbF}+V$<=pwq_%QS>Ms!a?&_Y6uV$aWF&ips4~i2boF(ZUhhH_A9a#K4l)AH*l{vbz z!Y$D6V<+fW+^pB@u10h*cZcO+-+?62lv|)(nmFQMIZpi=YDbtijG}f+?H3XZIkqSI zrN;ftmZ8y<^5EvGxk7jp$$D{L$bjhry^n7*()Fhba^uxsFni}={}VEIup%yu3)Ag@ z&SraR*1xJY#z=i^9u_bEMWHJ7FOui#$m54Zsgs>O^_RJc9ZkGfbLX5=6X!hqFn4TA z<*py}^Yzt21fNp)1&6!e9$Rp|yzy0xzUZh3VJ43rv4RCj@L@qMJz|?m^~%=RRo#j- zr+H&G?)UhGT*HYs3)i0ERvMa%`iaw6#PudM!$?O|=KkS~t1U%R2zKYv)|Rg@xjS#6 zPCu3>-St{r<1Epac85B{nNs>y*_OeX>wC*tBC&oK{bbCqLnF#IxZI}&uTf>>m&BHq zxsdhfP7l$IM5m)~Vf5%sq4`SAHv4o8r(Cjo`;zM4^KSbimxtpn7{_DDa4MGH|1Amp ztlcrh4VQW-ddr{FEV{i3_6+ceUVi0O%x`OLe?!`aN4oPi`-K*qOM(|=prSlXsBv%* z7dBdW8+Kk999*t8IXk2DTLnU?6VFNoF@}G+?_$8_w|Eo%SH?YP6tKP{fU67ztMl|9XQ8&Wgzhwn?r> zMi^_zKwy`((^NX$eyMqcKNTs)_fiJtKcu7%d1<=du!f(xSR*3(MH5WTrDHZ^ z$#h~cCX|!`7qw2i(S%Zxl1d5nEWrj-Di))d4>8y5A#87O;`Dnj4Ke=P46cej6XTvD)epq8XX=f+tqNoHcDIrx`g}RFxGt~tgiHK(JXN`>&eu-!? zb6ku)96Y{5TfeM>Lhux9&uvDSfDNhj9Pubw8=@Z!S{Stii#d#Mnv8GFRSYRlRDWR0 zFL4st4!|&y-syo4_fRemif3;t(Pqi!LY&It`?cm%gU<2OU$qPonx9ovGFxoqtU$hW zBkS5BPdY>crX_dO#R_IUE*`X&w+L{@hu$W8v%Mf(7rB#LDccuo2fV@JEU=hSv%n!O z%#{MUS?d{1=7s7Pj+-r!e!wYRIuqDhpGVjKu{J@}?moz+<|u zPc0gAZCNKm_Am_7*D>NW&3sd5jipm2NSuh+tz(wQ14c(jI=U}{*M4Rw3$}AgRG{;28~Bf1?hh3;5vwH;gEtmCol^<% z2~r<{ife-bv#m>T6I^7*hV_EEen)9AZ1Z3 zJsYc^X4+q{g1@EmPN^WMmt(2H?4cO6w#&&81L`x)rXPPi5dG2z1~p^*iof+$L+BSx zS*=0CAWAWVfXck2b;wu4MOeZp<1T2Pj^U_%b5q=^tL!5z`SRFc!)qhIvLLM?P#glW z_l2~%=K;1hoEg(u#w8n}yXnW5mN#5A)VX-|hS&Oi{s219hX+C@6r?fa3Za{#Wi5*b?qWL;8IQ>EIH@O)meToLO_=*_ zC!ya~5JlzTSBP`Fn-(rAz+jkA!xjRKVqXg5r<8Q~O{xWQ;=NV7QoUjDFntd!gJ+gl zz7uR|TN?ULa%07;#u~5GF)M*iB1S_wu)3p*$o{rBw{^sbHhXXT>Pw1kXqdQ&qZ%5LXSeWVxU!*1)eRI^)=I4~|7kMbb7e<~B+a@&Y#B@%Y8acPd50EI;36S( z`@18w%j+{;0L~2OH|K*sNmNmp6|W**VK`KU0+7sPEZbGaMLdjvf#%j8M5Q&L>|;t| z%`#O)&nzy)l{i;2y7gFDv8pg{l3^kaWl^&v#0J`5Y8agXMHrkX9Z~P4FquOP(C)7v zuN)<})wAj)6Veb?DK0ypbtKHqluGF>DFs&MI9>0(qbcW#zTXfUBS^qCI2!9Bv9WlN zJo3oUo7J|p#oM9@JIMr9IZkn`Jz{SL)y}PI%EFYzd_k4<+p+6fT7^D{WXeHOM`u=erbDqcbZ$h`&O} zcyQE{V-6a_5SbzlEW$?txsIlaN=fpT>_P#2YaYMdvc9;5)8yScA45)gRA+>4jtvlU zB3PXU0vMJze%fH!NaY>?W>)YQ;HS zBDeW5L5#Ba#Q(^2Kb|gQ(bij6TPK3Bw$=%9V9*$twUAoMf5L3Y%yr%_03}Lk^>cJr zbyu!hk>*ejBS?GFY-r{UlU4+RBB79~)})_C`Fi!S>Xld!ppD5iMGi(YpJp)#xAUxKu9l<_Wp>O(V% zpg03bJcdayQ1+BFC8hzsQ(3Iwlm?dlB^TnzI;{~G4sN4_n#eN=Sv;LY)$>0U!_>`H z8Wo>ZDtzIVk}9j#X9OYJ^H`M%gRwDq=RH z4R~s(*Q0DT*MQ;57!L(l8#=2#FhYkvby%R=6{iQG>x1-Nez6B@)i<-Cu^4FUS0ly^ zH&SzxeHloe8EOgWNc|U5&k_u0yrSH|%?e6<(O@F9uet^8{~*AWsJ~T1ZH$o@`EZ=* zMTYG)6Ktng4-HYA0XVEx(Ne)uiF~isRedN(QEF;e3O#-q^(v`|0K(#&KR-`1jzcIW zlT^4QIL^4p=I7D8bG{p4Ss3!A9?&!OP6*ec2@krQ5vWyo0n#d>2_uLBwL=XvlD4eL z7)$tQLz>Ul5fy##TXiq(y;|mw>5|yF>Q~p6-PB+)8wfETsqnfAARG&9MY(`(_{*b)GRVRpCw(%&zR)#M7XmFfA+2@&nl zh?=kkLGRkDE+HDY;cA3%W_VmUk48y*R2R?m5w%G}6i@-cI4R3}HGXf^D|sV!_tm$! z_Le1Y2c%8eNA3*qyY^v!&fF(~R#_1)%8rEQ6~fvms+8xLpOWG`^3j2iAp3nPHl{X~ofQEg)EO2*xQ}*>!wf4D?6`B8K_M z9^SEc5A`Obs1Qd(N73a@$)Hq`GM^17yP$}@+7c+eb)WE(^s zLY`MOm0}OgVkmf+(yon`GSg6HLRchl8fCF?RB!lnSJ&JqRlkQ))?!AV?X|FV@~`;Mt9CNn{?FQYhCdM>PCuj z2a8Yv(us$5^*Zw8S7j_G4R!Cp-lja5_%~;+A*4B69$FArLi)kOF|#QgIL%FGe4N?ty)S4^$~1WC`34bx)w)Sql# zky{qeNJYUsAk%_tnX&URowH6pi3s<|7-j<=y6F0C(5@wqUq&)0a`rg)Q}m8N_At(F zO~0*GR1cTNSkQ%ut~yjXlb|%Ik2eA^G6D@eS8C}C9~Q@WoTszqp9BMpXZS>$izf-! z{IN&afN*vcwT80X75Ee6drkkS(Tuz_giUj!qWWEEJis#b#V&LjTAzzE=)&qP#X!1q zfCKN!cZu#KOi%$AE>ug7U-mAK2_e-7P7(|z^9)sByC~IoXhBn!P%rpmRgQXef)LV+ zHVfb-KnwgNGMx1MM0tQRh-Pv;becKnF7u54$O57Hj~8I&<^k8uFi0nnX`syp4iu$I z++Ollf*0Qswj$h`5jh=&rK~2e4&~y|6F|@lF z11)OE!_g@EZLM{1Hb8Y8He|R4WG4Xr!g4A%tDvN0s-MqLn1R$MvI4oil`&-H9^k)O zGO)0W7U1|T1-Z2eAb_~yWqB!!W!;J2-&3+JdPXNMi0Bd(9u}XtSio;agxDjY5c_5_UHsil!VCn#{f_5@8^{L}u%5>6k6P5~Or{y|pzt(&IS<2ws2FSuabe@ID zc!P1+`f($P6|}id23~5ZeHFH)h3XW!bR_hHUuv?-?0QoD+&~a7j?*+?lBs2O>+0s6 zs;U-G=Y+AguZO|j+DT4nvK zWFi94A0l%q!^2#R4MS&!FG*pD?e#IU@>ZpBas+Ey%ynWlGVas*AB$M#%m z8e}o4w_n`T4FX)#2126PI1z9-P|fzjtQIpjU#`s7w{qs(%99T+r0Ici29hvMP%8oA z=mrJX2Mi-5wu^A<^T=%VC6x=6u@`93oAQ9Ob;Cm?fN8@DKxKRWS~Rs&Dwl?*bDiS6 zQ|I)7FIWHzo|NAvPn`Y-BM5`Vr%Jo4u!a02juP>s2lm2e7#T#fnQ#VZ)ZUdgO17PJ zQwf_U9u=uup?^AF16u1ei0x%CK$`WT!yVO%R09o;k%TOFjDR&yjj>R$j4MIPq|JTS z*K22QL~BJ4oetL)@4|R9w*cN*8!H}ux>d)r=t49xkOY*4r}91ZBeKS z?%bAGk%JwHu5afNyJR%VH2g=2`nq4h!#1O-gKAfUs3n%U5e z=pf~!HO(8hB6x|+l5(<~Mc8pGZEielA_9AxUNN{E6~I&C>sw+eK|$6gc4WY_wNKyx zPXm;e8}PyzD2s>|TMGVw%a$b;YFS@vG8}~N|858Y*4FL!rq2-q>N(2#T_|>>2P8(X zNT~M$ip>WB+b8Oio2k911)N2jn?k{++6N-YFq|JV(o$a;Q#VEn1w5;2spp)DsLA^+ zlSYYzP10Dq0?|G&>sC=c5JJiLh2#2mG=%i_EB)XnAGb)McU2jKRTm1?prp@%S^-#d z+7u1ZV6lb>JPq!fOm(dqx%B}!{VopGo=AuRJVUtylD2Ol*p>oCHohg++f;(H#5M(M zU1b7}Dz?W`c@dHzm^~!3#7vqSYT>L`S^uFhoa+YfMF5;DO5NTJONd>ZU3qSee~|;D3D{9D$HT{(-ZWOX;^2@KY<&-Fm==ui zjZn`J5Dh4OsoR%Q3EdCh2SK{oU8Ub;F3}Yv4@jMt5*5)p;|eNYdOieQow3`o6Fl<( z&J{x-3V=85pVC@uz*04=_2CJtfYJXlgV$J*IWSvKt$hw*H#&4&+Bk? z2!p*WmFSU@#vl-u<5fVNEaZ-u$(fLwL@8#hK(A^c^8_r7OS5NLI!cH$+z4%t9}J}` zqN9ZjqJ4Ui)Gd%>{L8#>*DDJCU^GG9V9ebVMA@qsQhn~Lw0;B(x7$ym)2d2IW*jok_d9k;~QRXsh zZWR&I3~G*IVF0%DHInpX?9S(W_hR3$X_i6;5CXa+A+xCX-yG6EjD+q8Es}f@j<)rc zqKAp@RS|&JW~zyNi;HU7OgvQd7M3qzN33_LqB=noTy|3l5R&}HXE7d*Zr+l3dCNXG zl`(Rludj9buXxy=Cjdoe;esgFpoI&I9di}vN^%OH1_v8x3#y=#bv3CKx)s4BLN&10 zjMRKSY=$Z1KpNXnEhDUy>|kF~wm$2Z%02i+Zj<+()2|@TDO?Q+ZUVa?oPT?IlNNouaD$n`NUd z<+yA^||fD_5xoYuMpA+{j*8- z!@@XW8c|VaNnc4c{hO_j+9B2jEqP2DTFsUWlNuBOI$X2P4Qn>8UkbBCTM}QYluSDL z0&j4Bmr9jE06i#b2n43Kat#x;mx2}kqE%SQQ*gO2w7MmqN!!Z5L?)VjJ6i_#sHQ3> z&>a`;eTFyA>A9S%F=I<%?nlpB0(Pq8`9e>a;n#gq%xqQZ?a_XqH;h^I| zKBi&pu90~VJ=K&7DxyRF#{s)5*>hxKgm^Dx)Cnf=W)iFMxQbqgZ-4z)Csv2Bd%iLl zhup4BC1)@%Ec8Zb*BAhRx;H!YLAs+O3`-?32?)LK%?>Lw88384d;ErrQ0Gs~0@C>c|A;EhYN*8So= z$Ypjb7k?B;UZ6y-d5AuO6Qj&Zf+tXSGlrXHgIa?;TfWZs_Bxp^OJsF5Yn$Ca3D+*v zxBLmK{K2cwA6&Pk&^ZN@nu;Q~JES=3G2kDggcF4zS9q_A)0FEP)*f*R(c-z^=6)qYLi)aD^byo%Rv^;_SO=IoyJIdo)Uzlh ze<_S|Y~aAExP$w)jAR(Ah~S>+jK-j@X`zD>{HeB(Y9>kvVt-!Wpbsxxqwe8e6Y~rZFEbjLs$;?Svb2jO9OF{t(o|8X{94)2F?=aRp9+@ z-_VU*Tt;P0IeWYt8a`(}a}8UvVC;Q}m&f5iekr9@=VgrW?Rjy(99B=ZKS@~a4)r@E7geXpqysCq`=mtsrzq# z@Yy$mTpIe9$H{H0%$yX=h{j5+6%6r}jlZRSRfM$oSUfGF|R3b*!jQSlH(9 z3$;64-}sKB@F`7-E}9w?>qOL zbL*e$qYrNOX05r#9Py5KypwPx1t~0aQgjG{uwF~QRDmG4Sn%%!Dl+)aZv9#p_=4yp z{`xH{_~VUg90p#a*-O83f*>4&hktN|A_W!@L=C-uDfZSqZD-y~X z-ouG2pYl>JB@B)8!$+EMI(CK?6Xf@BrpN~Rlh^&G+>sn}D4$7uGdB)jpQY4J>5@6$ zo0c=Kra8gHQE6_UL^>m((geHHp;HIL#F2yH!HZb9YzVw(EXv>yK0^Zg|9oq+x~hsD zHE`)fQCpk5s;X*qbd+^$Vy7}g=iYju#Ur;^#`9xx^0V6vOk@Oc2?=FYRj#hu$J}kS zI&doQ-ePkC4=WQEf zo(o%L&3mM8FOAI;5)usV?`~O%F(LohSeW4P4-RQX#s2Sc^imD3h*^+0GBp<$f#3D) zBKa7P-9n^Jv#Y`3N^jxUuj2Oh2ma-q#AI>=aL#&)D#0F^fnvxpDHUOZJJXd2soXY!o%$VTyR#^&gf{VfO<)DfC!6Ene*AF0us+?I z2)es|r5yy1>QH2s;eiZQ_}%+9JA)@XIdSgk?G=L|28VI_QI<+k1u#aD9s>u* zo97#M*Zcc*3(c;(;wmbioLfsuN`x14O52_%$b7YW_l{(2d>ogGie*h!RyK&5_wJa7 z7!!sF_wL<0gsWU_lQLsW3=@2$~Fu%(dzFFiUm(}FSXS=hV8EqKiW=czdK5kLIk;o+gm!mnR%n-@RI`}nk;98*cRHeGv=Vj>R{ zuWoK`D(qiQHU$R<^Zv?a>?2;RwNj4}!C^mP@Q)SV-VWm9;z|IEQB@__(DrUX5A2Ac z5HL-$n<~=*YsR4zWXNj;UYW*5C)8te^(Le=<9Wh|a^2S7o0)TS+QW?(9o0zT;{}R1 z7b$g+{?Vrgi}gJe0&d**b){NmP*QQ_Vsco)$cVXM#i{6QAKO9?Cp_ZeO75W zt1P%&k<+cs4GRo$otZ%AsqL9D+gpKARS;o{va;J}PV9n(c-3)4dOInw6Q&~>Oi_aH{+~Y+ztC+V?vJLBFfz)~ zDX5B4EAFi{>W|IHhz_Ol)sVr##_qejz5W;<|HocI4=Y!9*f%3a@Ca2QQ#i_~Kl&l` zRD>Z{9|{u_6R4+MYCf6EiV!TGd&%o@nHXGfLz-GK(hnae8iWR!~-lh@Vgdw2aJ_lT;z&z~As{lzSz;YQ|X!4ko6dWt+U>J<64O zw;8ZSA!9I`>0E3u#kR)BF>gjf^eV?gF;&kjn8kB_690jeUuJ}-*HAJ zHN@L^-)&74!#N|zgCw*w-#k@T6nO4X778Aye7Vh3S{gMlC`kG3+dn5x2{I-;nCEJB z{yryvl|x5>>91OPB1@nBP(5u**xm0XvugSoeZp9cYnVR2|;dd zB+)FLWM=r0!i51cAg}vDR=0LUKb7CyvL+1;%~IN|%S*S``hdw|9yT_%&q7|@j!sTL zM@RklLAt{BcyP^mBe#cz{wjYNG94Cm|Cq9%dAYJ~Ie8vyv1W(_)PoHHx4l0gpDuy$ zT=cuY9rF(R^5yYxBQH@;Vq$u_P!{`@0M4Ju>^y|Vd>dlBDT79#JiFgD-)#%4qim+_ zz&6c~xPok>l93}7!%s13bv-j9tOXJn4jS4r@F0+GX0TR;z|!M+I5@`l7J!d+lSj7w zDz48gh_WF*DeeB;tWxw=kb|RomYvw><~@<_-$Q=mzseAekEs_cM&Qx`Jtt?pnTZLX z4>d6+at5fC@|LN<(K-7>I5^@k0`<>3s)QOqnnZak=?5?E>De@X3A=60-@TZ;-e(pP z5J&|Vfr(mYx}-N(dK{ZJkJtpPOosAotjtB$)NoRgk`6rN*c2|S2DRb@I(FBD)Ko^K zkkL`4P5xncX}zspj}Z|?vRT0xt&P4p&FJdz#56Z#B{LBL_7>uen!hlb1?FeZ(4wNE zF3x7{q?(?88yb40rbdWODQM1PmRAh294&YsR6}gqZ=mcrEO#M!-(7BWE9;CQ)MHcg zr(@^kK>e;={c5G9FLk2;7 zcz`@=7RhIG*OX9NRJNUB1f0hCx3iRF$ZT|1lM7;qHnWt8$Oe56cnN*QWY@HteIO`k@d_s)n{cAzn3kycXMq14L0=rz_jO* zZ~$h1^H+kv3wCOJQ2@6w!)CEpPwAc~SoTcOkYb9#a$74~6BzeDI_*`&(a(N!WKw05;)-7Z+)2VF6-_Rt ztDB*dB4FJrgStufKcPKIEXcPnB_+Rn`2r7cY*&rNIIGQXPEZ{YpgNn`kZ<3lLysvv z{@}iNSA{5>_(Duf433jD>TNXE2Dyia$7c~g;m5SJmB!y6HIKkiv#{Xc2wzZx?>L;# zyBX~-w7O=FWQtIEo$vL2q?O9`9v%SCO^yU(XGl&?E-WhQ#9(RhFUasjzr4KEs4_-V zE7L}Ucztgg;_I`s;d*;}FFJ92{o8MM?KJAFNgmVKdA9%pK0~jfQ~%4Zx^DQDi)Zp!Q?0 zfzBt?^h=x=8aB41m{@=_IR!=3zpBwg(FR8A3`D|!pimqtWJvI2R00(f^QFB#>(}C9 zTmphMB>MFlVd0;FL+@Kc9t zNqRCSAejUE<`fm71q1|?-HO53(Ul8%|)(7wy>QOib$>i+npbm+=sHw9P^3TtwOXG9u+jId)G{R=KP7Fj0 z+~%ZX8q#y|5Og~`l;cB^O-syY=+t|;*_5nv#G9HYveMFF5}MA=oM3I0w)6adHa2pA zBPc5?GcYqt+S=O2tg-d6T8!n|ij5kIpMETF!+QasL8yFBbH2iXsU+I16sNLd@+aE*K|ABy!d>9zC?{KyM=kzqH=*`(= z+|rW1;9(EmbhQ}{K$;GJzTsNW)?tl^+zWz-%=L|47@#p`I2fp2^4qn}Hxcnhf5C2w zXvJAG$L()ktJ!SSs2x2pAoKV1N_cI;vQ}jwN!Rl=DIK~qx82DGX-^}7_B8zbqz~vF zgfO_|j-=8Ie-()l_K-S`&T=>e0p|Od{^1Rz( zZz7Xcg;76t!DTaJC`^4N>a?P*3c=hC0jl_w09#++=&B{ve5fE3mf3Hg?~Vb>oOrhW zy7@KLqH~-SA1^J>eA|fZz|6pa3c@vBQj-r4L|aeK%g_@f48oev%mG{>oHiJt;yZ4oh$Zl&5aqkFz3#&dW_lm zazQaE*!1~cJhlCh84eEYf20f4FHTV-l3UDSJ?Dbsku0~KcGgn|Dc#xuU@%YNap;s7*|pBaN&QrY3_P-a4_P|VmlQ9 z0l{sPq>PMAJx*s|pQOf%MBDw|)cc>V1t39!-R#eO9amwu*!Hn(xS~q4w6M6CY}1k# z6${H{(6ezsdXB2RD9~lZb@Q`O6`Z^gJtU!olPnkFu-ezyGS_K83xS+!G!<{mo(#jq zr4gF5xh1l!ZR&4XTU#47=NvNw7!87vkzq7yFRXG&pFcDOZ{CcrZs;W8()7&Q$0=h{ zmX<=0tK0MGE)>ZIweAQ99?E?URaK-YNrYO>DBzO0d3mpK&ePJ!d;0rjB_zBCW>1-N zT3X0)L~dB=6f@?wJ$zKPv}U)-!g&0SY0u_P&v(m!*_K;GY-_u&j`rb(Ub7jf2nO#^ z=(D%eH0KMn1vI!ohxquS(&~Fx*RE6GIqerRYb&6e1FMw)cGRxKd3^(kYkRkTh@Icd z4UNXw{~~!rAP5DI+WO)&76r%c)k8JD3fnI4xGET)cs|`N0yP`;p-!3`1N0pnE4;+S z)2nd29v#3_y%8jIaPUpdGWef&bnS;b?{6O2Uw-+_DvaJ`L^khe+h_mffi`Phdi8!O z)6IHBkPhm?8}W}KNmgy1M?(Ts+9sSx0i1}tC_ka^4Ms)SH{6ns*MNsYrI}xHnd*J@ zs8kJtw3?8u)s9f727La(2=ooTkC{gnp6t1XTU2+b3G-JaIC9{Ne)IqFpXKxb^mmWh z*&jWW5r>o8uALObbp@fCE<2-4;HMG3&O3fy$4MkDmp+?BmCeg@uJ3`SPg`(4g7M ztyIs~R~Y#GKQCJq!rWHN*Wgtb!%z$?QAIpRb%dy`ad%82%h^Huci1?7V zoAn>kdP&I(xeP&)#>U2nxCB+4R!0N?OpqwL!NPLqu>m3WkcMlmrexNPO-;Y#=OeSU z9KdIIZao8K&FK55r>>_0j(=vU#DLupLeo#Cu=@)_S|M(n=uG71wFa}B1sn~wA zA$mg|<$D=}!OB^Wb?G7w@BOjb_$X^&DJ`d|>9=j?%b-qw(9&{panZ1{R*b9xqG%~W zT^EkJ%rh-Bb3gVB?84HIWR5Y=JlzX|n$5iSuF3fsoqS3#__4Qd-|7SSGFJ8;6bA`$ zaro!YpC15BGovE{5C;+x(wGA%#{kGE1B@IWk8eNx@E=8y+qb`&liYc9Pnq1sHqgg* zP!t1SHT!zF+KIyTL=mUV+8YtWhZRrP71xq{Lr7#>S5g3%G5t}IIX}xq4Yrn(o11?} zx&hSq2k6n#qW!ze9dI`8C(4!co&YgmyR^9E6q^ht(tu@v8kLl|uLlqpQjBsS4_C+4 zfNPe#_G~nhCbGf87u>Rh>o3rXq&R%Y#?|gsWTcr`3AK zBL$uZv+mt@JZGNA9JOr>uL>Xg;NouQuf5)hTmt-FlCC;sNd7|v9gG`Jr=KHHm* z0Cn<(PE*au`D;P4c5STf`#WWRQ>#0yojL)j3dxM11&|t?&k28W`2b+6QK1j_z*ha> z7@?-4Ll$t|sTf=>N!uH1%D+D;5b166xmE&U2SN%&I5jP;1gUZY2ru07;{)}% z2kca(&$Vk&$$tWeq=CQuQ${r`i5t5OGl<&@yP|{g5G1RMSZAIrJ0R>a6XuCSiq8S7 zq0Pn^jSkLus8Hki^4(VDt#na!js$MEKFwCG+Ij8g{iSMb?%vAJ>0b7Vep^6V-xvAY z6tBmzEF#hrJPPUEm@4Cc2}04M+XXxnjcwrls=nDlEF;a$2}u%S%vK2D&cz@0-zEMF zze2tohlnL@md!Qcyet^cuBpKXe1T^xWl>R4>|i=e_S_rYD9NC&U!OxApeX{03=V)v z=%FtF6eR?K4};L+1XW1zghvYWNi8e*7unv2{%oi*_vqHzJz0aLcDCJxDIW%K}ePdTzbOU$I$@qGMa#`=Ru{Bw*pmnOD#I zt5x-9c+0fzb|NXW+y)l^4dD5%m6nv$XnlPQ@}FnxLK%6ZwnC>-{6iBXFCR5{EfVIn z%EYu%S7VymVF{kVYX5hi%C!Xs#BXzFxZ96xz1%cQ31#xe znB|M!x{`@D4W@qnY|i6RQdRW`%B6cD>dpAmoD<|`H%(!ho@0mM&f_zP%g~9ti(cG) z=iVPJ$P3ePxbi+1J5!JFTt@vH;?4CJhG6XRfk(98|0@UKH8@+Vm{U;!d!Rj{jSLNA z)6yaV%6X`0e0+S&g+eqv^9L~ta4eV7f(KNzWfhF-m`!u_ZDrjeZ3F@XK1lkj9k>ViY%`iU1p6=O z4b7~Jx_uco@)#9fTw>miuv(7J34kUU#dEXED@@Gt``7%~S2IornZiz^Q#S{*+ZV^1 znwz{{4uX5>{G93Qd%X1E0i^-=4iMYUSfpf*&iEU@BTG(FL}X;cy}3q}6u^Z~mTKiG zIGc5Tr+fM|Z}#k{udh#cBOvx?H^5nI-Vs0fae~QgJFdD2vsx*j4oY|8{AV2cq<=!$J&x)u3^?wH9Pi(b*t(e;&C3>dT1Jbo;Wj;>j%wpIRDRu@t^V0ivK zaK>8l6Ir2a%MYa40PWwQdUqI7EIW(ukinLWHWd zhjbK1U&^LI5cAY{!>QZ}u$eh0Y2k$BTQ#-EOic3V=wN%oVqzZp7SA8WcR252hdih^y5@>8z&N)G5EwMb)=P4?DiQ2Zbx{-9_5hdS-g8C(eMb05skLCgT zg%pMCcyRRl3KJd;{e!GBqlTG~opY9z6y+Np5vLaT_C}sZZ4!doQxuD$bNv499~R)l z3XacEa754lV%fHeY!^@kQe6bOFEIwiwjx*`*#tF7%BB)%efWAfgyqn%_Xp-@-7Z4v zI5WDRuon&2W0P6|xRWe*mpoqMdIYVc_npW%4rToW&)kENE>?IV9)~`4Z(w|&MasLP zqE?K>WEmlUCU2Oy%`eacLty@av=ghjtp5IwiL#3r> z{fTM^YS%^@{71F5zbTIEqVMaSpLLIU44=AUxXVVItOs=HqrOY+G!F8Tz1UH~yL0~P z^!@|=hNS-FMW>?Z3GxGx#B3&{jCgldoNlFnh0y#{G!UY3!qfOWEtcT1=gHxyp#5<` zmqQ4%$bly4`FQFBx@^D~ae==XVR>CVk4-|QCrXE4?CbvnN7$$@OXmQt57B?V8Q)q) zeBb0`$?^@O@BIjAOu+71*5OS*SHswo1*x|cRNO3SsL3&A^ZoW&${NA3VyiB- z6?ap9@Iw@`=<0DB*lziSv z$aAG?(Oy~kV+~sz2cR4Q6;P)olMY*%aks)KVY#Pl_+9zD)Du29>K=Q2-^1z2!figK zoe$GlHY(%&4%mOi#*sd<|meWZsr9q3A{{Nu`c%JZYYT=hh9{FvY4lgaQA}ogw(UJ-x%b4MBcuogO4dja6 z=;wP0<<8G!NgD708hO}Bbcg5}1A{1Ne=~=l1~-UoAL4Z2tjYFU-;Q+drEvvvkrErp z3izHYV`1|2Hd4ApV|0vcM1?LH(DqAy8Vos9Q&nBA?5FI`mI#Hyd}smJ+z-Znm7wIt zCnh#+sRYO3c8z%3w#!{@^;17t6mTRUrIu|bm=!4!lh0|eoeUNg5p+^~a^r1oD&g0# zQ&4NSC>$IbYP2?x0QrNdjv?TEczMEzCnUM=ZJD7 zWUN&QNBYdE(!^wB5fY);VnuIPMzbZPrD5t_zqU_#o&PFjhF-en{dc16m#tatdAxeg z&#bRs(INZ08xO$JOLvxvOi>%xESttPybQMq%Kt`;swgP%SBP6ue1?Gb>~1^7`Nr*)$Od3pJ7-@gX{x=vCO8KM>tAOr9WbZy&i$<1#2eW0Y|qJ04M z<#yPLJvnQ_veK{@y+orD{&-`gACyiMXDo3^Jlo2SCN}{kTHj0Y&!k-=et<9X(5t9% z;i){nAkYvnZ585FeIJ@15^bH=;y<_`3Y^kmsRQn>Gk-Hp6JVO9<>YqG>#ww~zgMwnkM~z8s!25) zOKJfbW4h6)AYS*~J2X-4Ka~$gfLYCDv=XnBNRj^VkbxbR%aMDuh0zqd-E_QGY%n!lnQLTJP)4{<3AfYN*4$>%v zYr3mXz>TR{Sz%5a!xW=jD*rJn0z=E1&tF1wybv$c3k-ZNJvKj=YDcWk9RJ**5lJdL zKTlcbzdJm2f&At4-Ca8o89X?)wo;_3J_N0}L_C9EslNaM0T30U#S`KnPO7R@G+!t) zCmG${T!KA7Ff=l{*0zASJ`%a_3?_VuTFV45YohD41N^I*M9k@l%$R1&6( zUKx$^9q4y&8@%gCkp5ts2w+k_W9pxjlx&JNyfOhC6deOVzadRV;72;!NvB9f170Kc}9%r{f+=Jsv9PCM!f|aP0!PdAybX}i{{}aHo%^}(VAm-uK{&__cXO|G*vm9 zXVuz!hx)+ms2qDiqh^lEN?6tyh@R}80C39nZx2?0Mx+ppBa-6&IMIdV74k~yOB2}e1MB%>zH=)(Z^;P>x6 zd^mEO9Xz0-uU`W}jDuaZw6v`6HbkSL4QfwEx!Ytr?3g0ziW>=#Pvb0cVb%J5SgY&Z zT{xPo{LT$es79Jb`&l<eCJx)k z@ET44rRF4V?PjW??a=HXuGbLW2bo?)>7zg4qd3Qw3(Q1+kFWod=`mQ zgM`Tk9+ypF*`xSA8X43KFj8Y+V(MLu5s3z~6Yu-$72+enjv0csm4Shw<9Dr61?TUt2v0cYIM}}_ViGSq z%Dq9MmQ#T1#%p-tj}LmThxv%Se66gaoLtXJZv>Eyyo3vmHR!VA4$9<3_AP-Gwwm5& zv!hz~YNAFYHr+IZtaG=Vm%`m&)KP|{?#{2L;_t**EW;U!glI;`LR-1wIB*Zbd(sKc%j3jy%!|P)p13US}m)A{FWAPQGrK~#IHj1N)Sb^ z>ZaR&`VR8k3z&&ZYoVzQ82%!|WNtQXPFU4TisJRI!`9o{o%;NUIF_6s%*!Z{)h3Nx z#&0BfLg8xI1kQNktLk*C7mw@y0uz_f=Ba0VeEbq%2QGS9e3!q<;;y(h^ulWwA@_X^ zwP8l8*9~;oCl}s) zn|Rmr>Tt}X?xSbVUwpvo`;7bBu6y=A!}C^!4$vwOLJi>lKh1+xigu>ZTY}vm-_1y+ zcOpP1>K&)*{-0ucK1#8R=&J~7?Cctfh9|=l^4Hx@vCTpE2NVY}iwHl?uBWLDA8TyM zRQ!3Re^<7>xReIcKL>VZX?GDLUFgTR)%}bIqi+dJqt!*TJVR__o8KvpvXkz%D#5VL=d!^on zzQLo(Ek(?yZ!8H%hb29_1P}|{Ou|K?c;4(z81uXr*=PozfQf{CppnxbpO8?-E84A> z55*|*N-S(@ITYPLANi2lLfE^CJK~mxOLghD9kk1L@YN|=!UD@hbjEDSBr{y>70mzJ zcSF|jba+Azq{7zcXLe~!^&(UKL_1|jD`ZkNR_!G^JX!}ac9`z>EpYNoXX%GVYi%0b zuijs;iWj~~gX|5U${}&};tAJgEcU)!NIZ;Z8QhGjm67t|QbOa{4ua_7;MnRTI`t17 z+3emRWZfdq5h{IcgR^xdX*8~dpU^z8WI8zX8m6EBo}hou45w9t|DvbXOZy>3cAIS< zSh&P>e|qc|iP0hPMpZf*GdM={KKeC%5>o8gm6>WTYaUXNSUc;HTdw7W zY&mDoix97`mjY(REB?3=h+DYwZoVJ93jvcqBB>b4-jhv<6arxk}MDl zf@@$@2@lQx%BsUu;43p`2kZrA9%G1IT2h8sHF!S%K{`5=mV%KQ5I^KCddSMiWB~-P ztPEE@V@;&-`0-==CI1VXs^+DaVOW4I_AN2e;Yp?n-jATzpl2aF1YH6h@}N;Bxo*OT;TIFN*MU`*+agPW6cP@9?q-%iZX;|B0Qs3%8i; zK*fTxva&ihYZ>fg+m_P0ucmxf!k$6a-z_UDmm6&B^rsF7%{SdO?r)3vFD_ZeVMrZSHi*H@NEHuFB67n0H@JS=K5&Pr#n~ z%j|!`Rn7tQPf85<3@j`U^3&Zc>9iI4hyW+jLw#%BX@p`3^5fHCM8Zj(bd6>yOF8IO z#k-5t1kRx^84SSJrb4PmAwH7<*nfckQu4eU* z$(zi;zWF$KhYeoOacYS{6S!cu5Fma_a{0oDk4_wQwtf5gpINlnTC3_*6VPG3RaNCX ziGy`5dQh;=!Ehd}`wul>s68;R3Io;L4lnj1u=;!|J3%&eG-7OPFzrgVGBqzt{yAp? z*EJYb+Auf$!L5x=RKFv>5>CZw%Eky`Y4&hwUv?CgWHrbGpYt0kusomV2?Lni^KJk; z@81UN0)2XRVc|FCA#+ZC8{I9%g|Zq=CZOed?(*dv-~K~Hgu_Y?T5}DUwwLI&Qv!96 z{qmPSTPeCY8ct5<++wy|&!+ym!HGf^{wI_d1F)1!#*5&PmDXxc?^!lD7H+v{sl5*g zls6EJ5c!F#bhCFAf%M8DK5Y>G8_?tZ`T7M4lb6*=%>tS> z5E{8z`rGF~blAGStMG5Glmf^Tfb{I==B$*A3_9o$(h|p~r=a-wc|nL5sLhkvHNf{3i>X3`&rsD!n~<~R_WbYe`G$)fwO#8e;mYbN zo7-3TOI}w_U=IR_$E$Xnr9TK>B z<|R=U-!sbK-~c8q1n5F(7P9fdfE-SL_f~e&s=gARgD;(xlO~!QL*KT#&NBj2OMN4F zeGt+2=|u@+exL9aXuo_blmhsd`NGQ&rOb`PO@3Mtx;s7s*h2U z21CrK)DI_1z+i-NOZ;1yg^&)sxAJ33#NrN0PkczpcPMy_)FCSjbWX(uKPz;^tqXC2 zBR%H~0}d7aVxwoa`a9tNP;|SVs$AiiMN9Zc-hGD|u1;DR;IflnpxAYK)=COC>?%|= zomG%IkIv z!e_Qihnl@EoGv9{&Gss=Jz}YDSF}a~E`yKCp;HrGSBoPE6A3CQH$O9%*1v>85EmAc z`*|9#^%S7iLaWWEnQ4Xr6P|-Ea!*j=RfiytWf^L7hnaN5UCNI=M>aztIj~6A|&mP#ZCrzTRIxGVv&0hi&B-frHy1Ya~HoTgu~=JeMGCYw#vNd>ZfnB%a>$=9JSrvYl@tXulk? zL-xj6dFMWaq@i~wWS_HI+BHS=VigV5t$_cJjd|S?m;6S>eE&va=Yu7L9JkMp|5u%x zBXWE_kk|rov^Y?-{V+ZDFBcO;VY$Yiq)6otl2Sqq))U(2bcZ^M^OE$fYvCcnUc}|P z3jgJ=&uJ(hw8v`2wE~HQYlS5x!Mf!@*!|~^afx=LqtX<0Mu1jn1>qgRWUGLGVyfXv zt~=_;nt!jni>E?uHd5&GGB~L3g8fFH4r}&lF3VgUnk*n$ir)UtdzJw7ZJwbMe%EnR z1RJ>LJ$BsPo=Z3Yf~%0Gdf`mtWWpaG*N+VHF_UU4X&Uu}{zkT#-rKS;K#dy*PJPt{ zmi3>K=zm?K+eKY}zL}!ITUkyYe)`<^$uRT&-$C8;%lcBec0`eeERIX zp${BWkq?(SCZ#3iPo!_0pjehMej4>YGiJ7K>(PoUQ1Jb?GOM|zsH>hP6|#U;jiiRn zaD#|=fsRf4#a$M|{U^=A`2wOXx0^4F4R;-DWaX_EOh&jda$G&CVWy8_fQXn?NIUw^ zH_SY?v82rWa8b!{0vUV~2&~9@&~D;!FueKe20ar<5c~X2_eSg&B_+kJWMCrgwJFI zVMPEx(VpB54b4&s`=2l7fLbnq46C>4dfXl46m2Dm(Klccdx?3^c;r%z#7AGYB7ld4eKRZVppWZS1Gb$o>RnZB>{*KceiD*?GuHH{MT`p~G(T$o?I z_dI>itP(`BhVAQt;9*uqtF{)iF;0iIodM7T@uA*R^}MK5MC1~A@`%tJ#ESYmlu4ju z{oNdyf}V+*8S01HTVl-cOcchx!r&iiXrmS_p%+np^ZSbMPew~f=1A)3zGF$+xmB~e ztU$kB+i1>#wVBkVMk7mxf{MXbkz40EVPJioa|R3_{%kZy|Cxn;+lhK2%uVc`RH_mf z9HiiTM2O0g>?3hm$Cv3P;dHw-a+HA+z?e|3;4JmKO$RynN_t%$IMeFuC*8_ zl3CWPxE?H!W@c^EU*|WuR2KKNhAI{V+7hu;nA8pqs-`m*1 zKwKj1-f)C4S1f>3J9xhgAtIXWDwPh5kdb1Q|C*)}WKM+*sR!H%OGm#bG-9$l2Nc!_yxYecjR2Q+v?jI@$FHi+sY0r}*1Z{b3PkZaREX z&1Gq>HwSeBG$mIw(>88R7kj8Q-S6SQF`Rfr;WAO&5*+8iFVC9fpN-8**srIc<#9=J z)@puqT0!?s|Jt*T^Hq7Ovgg}08@#WVCl%eK3u4T>?=as3e5C>u%kqFJUA%iQ9h6_X z2wRqLR{iGXe|b@heIdEjS8=zfhxWj1*}!bi;^23MjUt!LzOO1@+7gtd&K{`RaQGh8 zJJx9R`s0!Ojqsal`B2$p*NbOp8+u@;7<74!5Fpl(>pn+H=&nVkvE1btt)5UNvp6HQ%bbZF^xg6f@0K7BUt+ilTt{( ztxbRRaYvt&@&R>E1n_`sASj)Ez?YWxbQNvm!=3sM#NM$Ho!t)q9n@A@DAccd1bM?Qwz@4({>ept$U}6*#aCBRWuq(|C91h0q(4K zTT~s2ghAH>)HfT0`df!W>-DLEynKZlRoiy{_s3LAR1~;Uif}8I4YK1XdJf`Y>R`-} z4V1c_ii8m;HiQ5HBVPX;X&Ta-nhQZsCxKXLltMHTR=+(P=F1X7@!er{pD8%=a+u@8 zu%1`6z$c)2DGt|zaF{w&LD*uLGr?~sJ|RRZ-QH*^q{u{P38w>xu|#pIXyGC z%xrJI9C{T7V$0Cuw(sNu@Y)xXcZR~k2YRu{gWcN}EZE{<6rPXWseKC}(Da}j%v{V| zm}xw2Lv5w7(M!n_R8M));8;evpH>_yr+;v~i%E<@7e1suXNe?o2Y0dD4v%$uMx=}! zFn>%vEhG!WcS}{@=aNSyA^z{+Z6QG29D?v|9)2&tzfQsJ@gA1ew+Q+Ci$IEogTQH} ze-lU^8hdus7rnNyF-`Q|&taH{9{mwX`Y9y*1>Z??Ue37{6EUv9s z-$$z1*inrB0H@Ug512^(s*H0)n&3!Va8e0S)ln1zt`PLYlmm<&DuDRYcVl<6`X8m5 zJp?sjDRh4aBgyL8p)d@xpjfuEcFKhX>}@?=Ns|D1dGhdCx?p!!upj@aeoK3ppp9-iwEjKCy_Xp-MWKSj^ z1B2G_lZ>O3p>eD<968t)T48;k^w3`ZM^N$pUH>E zZeI`Fv#=xNJagux+#0VqO^!7MIrF5fL=@^ z6^)zOP-E{SY$Y9FP;jO&be;YmMEi52MJDeN*oS^c>rMoiz|Z2ZU-h|DL-XEkgBt!I zDGHe?OQsCjr;mJJ-eG_B70pYgT+FU2D=&T3B#2aRu99%5?XKZ_kKQc8Ax%S$5tj}j zKD_JxqXV1;Kqf>-W=;>)z%)12jbkc z#RdAKv|+pxfAo3ue!|L_odo;>1~7c5&-Oa;UhwtXyT}^>+P{Yh?|A&bCcOBf*0+>E z)Hl^K@00)bd8OsV^UZ2-R`UrR9ly`D^y}ARGBO{=o4C2T0cg#-Fr?LGXoDA_3b|d3 zaw2{;F)`)I(KZ=jCy57da?X>ywYh4{7VFVK6u>cX* z&GHY2i-npB+mY{l4oFmPFG|}g&uHh)$EgJ7KZolH;%x8xp|$#<-mw}94G+-k>%OI~ zB9i-iMuui*0cTtV*J{JNx_i$G*Cb`Pw0jQp-m-ic)QgI!-M?RMwD0sKy53yuFLoIs z#S?Oz`JH)1CZ}VbU6hkkUID>u;QhAIA@wSnwqg^G(OQ))ParA!!W|A?me&dL!>$mE zf_8B*Zs$2aj!%S-rdL0?5K#OqE;NLT`cX05cta4C8h2LCHp02C0Fs|oY6Gj`%O}$V zwJIbnWrR0x`vb!Vs8NpzY35=~LxGwJ0SQLb@OvxBsrHPv6_aOn)Vgxz{Jtd^-N&hZ zW5KWX?UkSJA1)21Qhl6zSc4<(q`K>C#+53sq`=G5mUCWk){!2aS|Ycw3hq?`me6{# zY2nM<@YDb{BWZ^ncB$6Y=8pO8h z!>>!Z?Lp?Z2dEe7ND#!WO8$!Og(k8vGyJ4avwx~#98t5|v*{G7@vjZRu=Xs)D}(m0$w7Iht|YeJ&$S1nTjyTyzS0vvV}IULp4wEB0KxJIHbY~pe{caR(RCZ} zB~zyhdVQdxec+r#r%0kH3MRjPGSM74@L?s0bH--HeOk+eI%|4Oz4ogShZSy$q`l(6 zO8U!{T0MhYNt;cydhn@FGWHdsK!?%zdiKJ=wWnL+RRaRoUziqEt#tLRST2_t|Ge?S z(^kGN|F2i;(vGq=qGo{~f7qz{=IO@q7;MqQBo0o_8=a&so^08K>S?m1u&tM#9A3sA z?w8in3{y`SS&pI0i@90eX1coh-4IWPii` zhn$9?$5&qEdTb^`;y$Jepi$_92`0l(BEvH0(b6UOnmwmq_$#3Xar$>9uSA}~-@+Ld zB92zqPG+tc;HcT1ZT8KOHk&;$7EXA;A|edNm3oC;TvvDD`knr8ftYaZ7Z5#D+Z?Hv zgVGL`8e2qn8V*PG$EY(}VzK&aq6U)OT1Y`XnGDZ5!UPHqM`0-}DV|M}RkCD7kO*!L zsSW+pdRQ&zEG{1XKMKiEJCLpCW1$_M1wD$=8b4aIs)%eNgxEER?xmvib+$F1&$ql;!O(i|zkVU#A0N=zJOUf`=Mv-z2QXBUTg!3FHx0<%CRx7n04C_Y z0%eAz^-5Nc$@1y}JY6UKa93~Y!v_|YGiSJxdu~}%+@_q>*tRak*jaB&cneSX)1R~k zqj$HJF(PQja0vK7#JS^;UoWNi-maw8C*g2Ol)8p09PUlC!at5{J6TA-rP1^wvT+p~ zvi}{g5ta0pP-_Rx?N`8I*y_V#hl%>&;WJvgx)e?iJFfrnG%FL7i>s;A$UFI-wRIO@ zKns0hC$$6IVgfM01Tu4aB{)v-QPeDNe9~n#a zcRgGe=1b56OmCp!PB^E=MNp~NQ6F%BFbG`qVo zhpnu*=@t*hD9U3ht7Vllo_rTp3~L+!<2JCQm`F|$%N;N;{(vTns&kWpR0E_)n$;V# zd$Z^E$>DyGXq+x%SObb;{|*OuCncU>4A7`i$VntzAH^JAa z!NKxF)cHzYHp@wE9t|z zbc;WxqD zC5}IqlE|MxfhjvUEOedv#{EJ}xc$4)Ds~8+PET6(fO+ z7v_=)s+`6{UHwGD{7S<#pO+@qxtMazhU#D$V zkjSrt!THgSmi~L|5<5FP3=@96lt%sNdn zHF3@I*FrDP&y|%~pfE&TThn|Td|5f3R4wc3;c+qFgb5M}#R#y#h@NSZVAZ9n_4c*^ zD%xQ-hj{xebZ(nsqd80@kx0fyIddB-?$hrRwO_PDwE1Hgq6m3z6YGa`T(>=_2U{}f zDm*+qz#KYiqWq3IB@+22^Z3g5jF8WtKa2Avbkw^&$ccGa7e6~|H`(mR>Ehx7-YCF& zntdeD>rlPyZ*k|&yP6uN83cb1z@zDu=JhXie?(_Gp6;-AppkRYvmRd_Usm?-jUW5P zNMoI=S)^~@L_v%NSBq|5?rRL~K!&W}C$PeNvCa3eAU{7^(r6DT!Y~~9CfqW87Lz*H z)fKhZ@H;yb9*>vKA_I^pl$r5)UY={9#`3|OB+BPsMrp8Qu=e8(51(KG=S!=}`oqG4 zBuI+&_4Nn^h49Eorkf@vaM9%AKcvNTzbGAHWimQDIv7eyN??ilrqa9!Rn;p$em5p< zZYmxe9HiWo;!4y{{c=AyNfqc^2*4;KBPk}O3Mwi%2gNu;Nk*_Gh(uP1|N0CAxR|+w zS{Y(aepts!uI}nVu)nRScwvE$EAq_=o^E`svyjOULRR<4DKSiaD8K?RGT6YC-y!Xj zlfoPv9P2=8Vs3S+84?DNa0ULKz11HxYhx~D-?Vpk=NNOXw~Lz9_wcARWJMy8%*I1G zpEmbfr~)Fv!NGfgNC**@dMhX(&<mnK_FJ|$&k1r#bCFadFH+dDhj!2hSr1zx?U!X~-Rc0iiS@rhzKF9Q{M=sPks zHPt;Z0Q1Heh{MpY{|qP4aJq2#vbl;dU&pJE+ViZ@;3ff7A6v2hFaz1&$u3!N^;&=anStgBhqe%_SU~`l1-r5+fT3U>SQ_sGiwbY z?`Hl>&-BsmDu#tKc@7#yG=Zmvl{g!|{&Vg)u(bA5>EiC8Rpx~liN7PR%E{GDVuQyA zEGEj9m_LHV6XSXL4bTIqZwt+{MT{b+2dAf#v$HRpZZE;af`I7`TNsEOKrs!NkrcSr z8Vhv1;D!B6D(o`b&Q@aT0a0~zzrQJu_eM=2E49CMLbNge2A*?vKATgbmG&xAB)Ih; z>9A#IcQ{wzUtoGT3@{mYJg?i;JML zrAGG)Nyjsi5Ph{Ch5eVM`e=nSo}YdgoChfyus%W6VvlipHpeWo$#)tJd+4v5WXT{r zbb%utonXamU6~B--O1wS{*q7S41kLh{#ObA-@*>^ d|JHoO78AQ)9l>NV2~N`sUr-MO#%z5D(4 z{`P+NfBQM$@R?!ezOPuFP zgO%~oVm^JL_aiAYGd4)JFl1UVAgb3X*75Be&n{cYNLXE55w$2*KU@--E94XE+>6s~ z>wIH_vEpf0rnNXuSY9FD&nbkktOi?BDwhgIA{yGcRU8UQs%EreI^5Y@mz{X;E`fT~ zCbUz_GcJru7Y^HCOs>L|LZf5kei00lL32e%Mg#ArY5xEAg6!kFcNo!f^ie_Y-@mU8 zy!R!=LNO(&U3I&-t9>*5#FQiji&jwZ)s@7vXV0W_HNO-U4Gb32i$~nFKp3lsw%kf) z?acMdX7lp$5Rj3PSy))QNKE}h-b@!Yzx|7diet9*J;nQUOz?P!A+f1Gt9?1(a#>bH z;o}|ohB_o56re0T)ORefWmWM(D7Gzwx}09 zA0Gi2MbfYpXHFQ8z(;z7-PSBd?sDT+Vjmx$=(sq-x%&;0PAL-;>J{Jp3cANo>7_6e zbG*idjsQ^sr={qkq9+cE4US5KdCz&n4CUpK$FqdPmX?fK9Y@l*Xig?bTxl?18v+wz zr&tiP)s6sx{dxlq-EzHOA$!b4Q{l>x7*M*v!o0CSiz>i+DTcIY`$IMJNN}Gqf>wr|ho$>5+ z9?M9Vj`RI_+#B+Wt(1{18GSZZ)@qA>3`S;V%-h$mUrRH7d#qZWnL$cc3J0HS)voO5 z_$=ym5U#}7W9Zzzb<{+kjON^V-%#xYpPsU^jK6=|@&mYSNlD3<7FbX>I$1bZ$5klb zz}5E@w%cXro3lM5Mtt-U^7u%;fhHx!jnkcpo&EW$9esWMES`ZaH(_TxyNxV&69a?q z?(XiwmT^_4ou3aE8z=TGg&Gdw)(xla}FkJQqCdo)8}$1?8t-Me>zRa>!K z9cwRMyy)3^RcSN~?r#$Z|K`!}-&W?P^#+co;@|8X90)reDnFK0RoT3JNdP`T#HK6M zw!QK?=H0t*Y_b}0JRR;k z^x&Tv@ZtS?%E}xbOB9kAxg|4hwEc3yWn9U-N$XW$&~!*Qme>)WOqx0@yPwkX!A7(=EfPd0X^@+a-;qZ1PcZ_f9R zyAW~BR@!_7ZuJfAo(Txxc&oljn3|cfIyFG5o~hs-;IBSq}9fEUTnYyJDM&L&@y>~2EoV2Ps+%M$yZFwYi|Bor1ev| zY!-fRxEWIiUYVP>t3K)N2)O089KdRwz)EG?ayvWsKbul?-5w>Q6!#^}$jImkggE>W zVP$27=mjR+@H+nD2Y;B^vZp_gg~}{}3nJ`xKvgpZ)SC7Lo;_PXosd+Nmyd8cCB=G` zP-D^mPP(z@ldyY{;rP#awTY=?KR!|{mZwicj5xiTvU#itgv9loTfmTUsw`QIo~GsR z8-v4ia&l7bb!b8-g+nO>X*O~q5SYu!0j4c6QlQD=&=YrsgN)`{78!}cJrIsD2^RnN z*4Dan&~2t>t{9lgC^%u0M7ak-3PF_1SKtOXmJ z#@5@<|H=kNh>_)<`EK{~{I*>)%KN0G;JxYM{4ZZ1C}B{^o2kUV7a71VUE%{uOG-$! z+wqSPa_S%Hp@mi3HFMh;0nb+JF38}Pxmsh1Y6`3X?rwn7lnIIka~*FkcQ$F~ce{LJSbEC?EUcCun)_oHOloX=zC! zv72-g_)&|_9l8~1s0>QKuO)GeoM#Xr2C`{qV0$lx3~)>nV`D@j^66E>M|?MRSqM3)dNUyUH5L)Q53upi(=AK4H@5~kX=#JtGseMsjpu(`Ye3M={5SVy zH{f)8>4zW#8(Sy{CT@Lp(ZYSJQw$>hSuH(8`mUUH7#KKdHy-};;# zP)y$nC5RZftx$bQ=^H{es(?ygU-{?<;ky)fd1bmG9W)uVvxmMztmshXFq(3NE;zHAQi z@ewvKG^9&WhC<%Fd9%XGN*6eigTVOoDf-Stj-ri?P3J9G0s^~w^2J8yMtv^)m>X=G z4dX!Lm=61SPAeN51`ZBv@Yn0>>wp780|VA@RSS!}msGQ8WC#BDet906-#>jKcUWq2 z?Yj5+yFjNsW2le%w&3W9CXj-*-OxO`-@=xkeD~Zp>Xw9vh)z_L0`f(h^|)B_et7dW zBO{D^V2O9|yXMTy41796V`Ee^E`Pyc1;IJ{2V=2TerJv%`Re=8$p{zbV*-de`@2g1 z6V5qOaSHj%A~A9sF73Ky5yMuY%7rq@${48}hLIeG4c__b;V}HVI{_&Wa>1fG&XL6{ zC^6!<2Bau6^;y&$Spu%-((_bCp~o$O=nt}lWj!GJ*$5q{r zoU&I{dF}1X|K`^x2r=X}b=7}(pr)C%9aL9y$5DySsMGT+yw0X~zG6@FXWfT6MEHWMh>CGvc^z zmSO^Bca|{7mk!%baUf>L8~xGI(cq)9#>P}n z)U#~8mM&NPCaiywAv}76njrKF^FjaQz_wNOEE~6$G z7)*};2|6){K{$hAf*C-g5cZU9aQDMtL1+6lwEmP~2!w-!L&)!iW9++^k^WjJgi4?+hC z2ICdGSg_Pi_*&iz+NEEF(TBHAokJh4XBa2@>4ycS$cGKn!vX zxN#(Z_+U95`rROTjI`D-px*n~LQY=ZeD8pZlkYj}uf@gNB@pr=m&3Jy1$|)F7}&4s zOo)q%3%pbV2g}Zm)gPT%nf3{0+Uq8*b7m@P(_lv0wS1?0$Xkub;dn}XcfP#uD{*{RjGUNt}Y(p%pJJ@lt zfsT_C2jKW~4_7ogV{b*&tK-hF&Qp=eUrX)M>`=od{5(|Z3vV(IIRMHU`=-YpsgNIH zQWd3D`u2(p17~O0ir1*kyJq#DcoZJk9pW8IA$N@J#s`dDE-z;^Y~;*Vn8Ug<_j4q@ zk{##6Kq^|T(i!V)ZPtH+Qr|~L=#rKG&)_C;$q=3-)%R?*%YTh%ZE0R$|EIBgSE@LU zfWWV^t>Z-q4|%-jJ|lii?>QtXDT(&2#0TX9k z%8dkJrMi@9eBWw>(kClT{`zHCJ8sJ_L!T_P)M+#Ex=yl-94g|VPYdE9&hzuoi+FEUQ&F7{4k!suz05gF;|y0<-2rflWjzO94N&WYfI`Rwl6vKuz2E;P2aQG&>+U8;_T&a&iy zB6-hql{+6CNz)=2M~W^G^mGAnq)Xv?B4ErJ5ef1+@a7F62?-ooSsBg~NgdJ0~g4u+8ozhSW3)g5BY zqmq9wh%%%69)9L16xbQH`3yT!(p0ch8}l^eRmdMM2SL>g=+wzGGlHAXuZkt5?=fOw zGLd{MZ?W%=0*?q3p-u-Y+&s`cqDKsYH(w7i1eFMB0sAC>*MfdWgW4@AgtXK))h^A0 z;;^@AR(pVmnM9oPpHCpS)e^LoBei);PA-YR=IiMWG<4{Q5!TckLNu`$ zzRNmvl0a$FGA42AVrXgQW;nX&VEm7d9_LN7%l9+Z3dDdp};#K z&%>2?co+hd!9tB?RJGH@XL0Y17-ooGsO@x-nqHkPo`AzbaL~hP&O*DNAlyBJXj@QF zV0QjzcA?3Y0Yr4dzI5v7U$vHZ;3oW=fJMM-&i-|b;1 z*T=)5WCr+)jsDm--;Fki+P3{vM!p_?brc*`irQTdg1$_+RO?;}N9aTR9QY{q>6Zw~ zifeRUk8E-$ZJ^TNT2?*?V%A-K|CKT478WAo;o*fzsB3F~Cxpnw9XnZI-Gw+F58 z_<7V!V&V-2VFdreXE!$rMnV$R)+VO?>Xqg9k57BbBjAx0fL^fbu775pW}c2%@%~-y zG}TLpkC%6H;{5*ods1>TbX*wxkITvL=jJU&GGY;?6{wO|*!MtkJU)gVc$4=$3_R0bl9$J*fo z_T56OfNY|aFm;~d19G~6pPt&FDs%(-VtwWMTg#?c91n zB|ZoTK`;u6Sa313sXTelThW$CfN1g>8gzwBjI!+i?V*#QIXk0DdLrE|3qoHe^4t|9 zo5j)~w_ZwDVO1SC_8z{w>(Vr3Lk|5c==BQFUXU&j=$yRV8$0l8McjU)fyhPsVYnJV zHvr{&tsfOoEfK)ES?{<#pl}mL-@_El?3TPmx?8jFdKPdQS&e7*$DtaP21;-#E@1j4 zUsumc)Aw?tUbjuOY{#aY=qFgTbRpNgqvE0>6u2oPH-k_QIH&)B#ieEfd3kw(iavo zIVo<}iAb3Z8xN$UvU0#OQ9LpNlLRa$F#wU$AnH^5?&eCHGSjuYe!y#TgSI{7Z5@t~ z1mDDV;fj2@oS32VGT7cuPhZRIxq%=nB~gqTA%O4-w?052!3|bEmU#VmkY@0!PT7sP zy6L`eWe_-@@I91#I8Fq}ITWB+b#EE71+5sQIZmKnSXHthn*DMx9E(}q9UV&=0Vh=(h$H6Ia_37v*YLP zU}MR%>>ALhLsASM&EKNIfu};ykjF2zSPI_9ZH0jwU&lR3&mN-Xx@?)aC5YKY2Jm5< ztq2TDLDpB8{DicOU;Yb@OwISxQuhT-hIC#&xf;%wvaxv-jS5HjdTd;k-C`mhpM%!_ z@8#rz-QUOBzBlj~(%Dxv7uFfnPmS!`^cCXLObUl+gl}UNTS&>Ek-1qQvJux2PU<;EtK`LT2=Y4_uSFkb1zb zkkHIeGYbryAUUWE+HuT77{ekyH6dl-*#Ct4P@UXfX=(;%Wo1eK+s7IgCd7y^Ox&{v z^t$yaZKezk1IR(i3%dDOgd%Y}agvN%J<&JGGE|@Z-r2D{-5$FSEc*5hzgRuH=SQK+ zGBs^NBo^-5Zm~LfZj(l?7cN+mfV(iLn6uVrCzj2hk^*!}skX>n+^`jbC6?CHdl>!( z*VU}=2zx-W1d^}Bd4x*Ww^R^J^VZNb0eEWe_#AFfaiMUGMu+%ijr0K^ zpE!kEbxHh~zIER&-ls2oeOroS<H9WwlF#Dt`?PAB^$&^XDua9QFG1zkdBWI@bYi70?y{gFqm@ z?bKUaTT0KLOUuX{jp8UobDdF8>Z^2Zb5LKrY(-(~#7+tiN6MVn0e6|2nvx0D+gAb{ z!jB(693)zuS0xGx3x%;VzkTD)EY*Zc6>6D}eR>8eH&`u2ndau^y_IiH%qBf>jcx9k z(pPRUD}&B(yOx%}zI$GPW_ricLt}uGoO4ErJ8;-2xXpgNPlj%9XQ!#7gIuVE9R47^!*C4UmKV4{(4agoknhk!5>po^8i!??ii;L_+om%g#m8rK!k~|PvvUL;%H)o9dq3x`m=XrPcN?P zc&=kxh@}A{C&ifebxrp?wAEq=s%2yp7lw)}!V6Ljn`N`o-(!r82B|wq@ zJL@$#WT^sDT&9bveOD*Rk2chjJL)=_H zPw!1432760@^{Rl3JVLts>2zOk56Bs+_8g+9yLB9_&=t+mX<)ttA9j)f^qDa8>*e% ztyOh=d~I#S=ds}U>Ul4>ARYcC^AIstfg-|wz=;X9w2$Ak`!sq;y+&b+hyWvy-GByUqCs7;SDO3Kx} zghY1mF4;mZdPorY-^Lts65!rtO0_XKssLvP(ff3C0BUx|#q{CcXq`@>K9>S(kz0Z$ zsRAT_ACM(DXO9Mi2L)d(x_}YC_88+{smrkwe$IpMPGFTzKoe8);$ix6(UA!+w!epA z25<$m#If1rQSV?IhQPRAnIin*GSKbo3){MzVXjgLl@1tD^PibLYv`B_L@S(H&u!wO zgI_8-o_sG1A7|7N`aV59x;F|}mNjJqEGY0IP_t5PNnlXeW$Mv6n}z*rrf=i5sW?M+ zW7NzT?oS7ZN(`JmH>`{?6W(B7{jtkjzDTCSs_|Xvzi_HYRC~K+85qk zn8?O)LrSMCM%9w&94(M~di@Qn*0CCqqk)OCStpge{ckNmRU1OKy3Mqlpr9ZLfI9rt z#B_n*f9N0en`9%Gc8_SDQEL6Zy0#{Ddi(1i0f8{WDgJBt5?J<`OR#KBHn$5Dc|9hq zd=STzxkEkN4ff&kAby)h(-KK|T{4(NFD{>e)!lxa>Ko74KNMVLO0E?QiZ!&%%!7Ny zn81^P+@%~~Y?fl?a4fu_eXcIyKkPz;h}ci@SuN6u82azglhY+k-f2o!&ASXFAu#gr z;7Q*8oJjig=^eb-wX)p~hs=Pu1^?)^e+nF->q{HWNeM{YA7k7rLlB#Mr zV4Ah<$k4xmBm%?;P^Jev0jLzc&Oq_m7}$pFog3Aasqb52|CvWoOt*2@((5OmoZot` zE{vs8_+5@IFKre%xBtxSmZ{9G|K~d|P^$(*+xKN}6-iU5voW*rCiX9zN01#FS3quRn2WFZEhIBU5YDr$i9 zQpNpp%#=~41TBy(Dxn!Mk?Ut!B}zJr0pBxM-(_D|veM^y!@BTZM}p%+hP$?rz{bC- z%5|0Ye;>L_G$XHvUkr5^ht$09`kPC|zC=w6_8Anexyj?fR@*u{7QXO7Kc;(_E9+{m zf!Kk5Ws_7o9DdT^h%Q^sDke83|g@M64oidci6`iu#U+l5} ztHh;2%+-GOe$3bIvtlTjGhcUG@!)7^?M~?O<(K*KppIh^_~L@d<5=|iHN;x&qs7mH zsipvjrupx(V!e(~3*L;RtgOE-hHh@WD?U3^#RKfRVrlzRQTaGDRFJYJ#I<3C80t=%LMpsx~b)~GF7M$#j|+7XG1VoGdIjfZAi zCw%Zo%~jyXZjg;vQj9*DtZ-v*(@7|Cu{#r9YWi2W`R(?zxDIr_a*K42n%34jx30*=3a-^s)9*RC38*PI{a^%~w1 zQ|=_Y8y|9SE;~~k_lX+ZUG%-qJknH_IdmI$wEus{GQk#!=y6O6wcSD_0yz>n!y;Sqd zGjj=z<0jpUI_I_>PK$YeQfu(bKS@9@nG-L{e7N8&H|!nS?9Z()Emr^z0k~o~Yi`Rw^!+*>oE~$Hyt3C(1kz^E*YSFSg^!c< z1fGX@HwWQ9<2CA79YpM$|~!Y`l^(<(AJa7Nh2OV z3F7_0<6ly$b~g-$yQrT(f6hFH!f@BlN#JB@;2+Y-sr{!u-?5^p{YjfV-Yo{bD06O( zu~FF%r~Zw8l6>_n0(Ebr0rG-1C0)ehYb%Z-8nkO70QpoB2f;=X*|)YfIL}EMk%!9lYc+x1W8NVZwn6D6fM_Eb zaZTfl_{rHpFaZMt{buiYsM&gTEJX!qDtc!tkJ4EvLU#C=>EHK+ovk%Mz^-*C z2Hjc0iO#1_pWbT*hV+ zOsOiuD($IH(32UenfQob> zH?S)NMUPEsOr+HEnZyX!a~HlQ$iGDz`zejG-2egG5TebByh8~h-hl`^9W>VTYuL35 zolDV@XYYb-^|ea3wTrfOgg2o(x1l^7P^9YWWQ#|NrakQz)qUlDaCsU6KPDjD41kljq(d!pM|^0D`uH31$m)K~QNt46W!zi?@IL1 zUi6m6GL={-mIjf6;$JJmMSkz-@ghOm(NCtuqjY{BisE9~IxC`uE4@`hCz!|>W>i)ozYW#iz)HF0T6r)v|N{J?oQK~wTb~$sK*I7ll zvQW>`LNWzWB&1HLU>tswf9Hm&x%M?ZJ_$c)1|kpIXF?&l^pU13f>5u7(;Q-RD{oS_ zV1OBb0>|rP1gvq5iYnD)ZKvZ;;3r$2g};1eR&iLUdCbA`8W5FMnshO7+U+_X_XNr8 z%0b0vb4L(?v5ero%|Z!IGO~n7N?IT#92y;ctEi7g#``c!mTvk;v-UkA#0v)s#{^@~lbn4lp=c zgeR&fMDle$tmaSex!jz+0h0qFBj8-3`x7#0bU|7MVkyK0ur|Mc|DG(Xr|qh01V!a8 z-b8Q3MD;J|jWTtAU1=YPt6gq$GBhI)97wgF%s*R}(l_Q)pJ_n*ARR+`q1VyK(MSsEDC^Z&&9t*C2p#hK2SzP5+NQ1*(CP19}`mdodNJ| z;a)4!i=9>OD6Mbo-kF9RY=| zjXv~D51^E?pDUNPvSJK%blCukD4--aCCS$ioCu7ZlY9)~HWlM4w*-9Os|V6`%+#=m zW>3WP;{D1u-0S&=RqG4QG@8?s+b%gXu6XHwBcw}d1zmcj`|e&>(;j{=0ktgnDfbp= zz?hhF>*{8#jqUC2JF5!6e*Fr=!<#zX?II&18yXsNX{u;$&WLosBv@L&I4ujRwoe^_ z7mzQwnm9jf`l`f^3|^jlm~t<5#k3G-l{hM7hcRP+*+17lWRA~N;O7?##Gt`4HEpay z+Vr@+cRBd{@&5@spHIAo$aurE8gcRV3G2?jAmC}XAFnwyji=gqj|HNsjZsrwVh=F{ zk@q z-m<>8=`L1%FKm;#Wclzfr1CpP!?oeudoKl~zY zJ5@uhi@(sIXiVI55`U(<(LQ<9c-YtorjB~f#*XiQs{0ck{k<8U$_Nl0~4mmjvGk}3F_DR){BfRnN#Jwq>TJ1jp!+P-3MMbXjQKun{?JCq6Q zi3k?QgQNugRX2h`J1kp={XSb)+xZ;sIN~UaI3dD~YFJu=A4oq}t{dXGiyap~ z2>n!IZ!u1|-C*G1{b=$sc})r`8r9z=@z2BeZOhu?CPTnL)OTRJJ%{vsR@-=){rX>)R)po7@nu zex;f#0bwm|3tu()P@n)mCVSW-I|^!Z)JH>)uwCZLqJDn)sg#NduzW)E;r2xdA(D_U z6}@r41_Wj$Z;jC_g8qZeqfC%E|*u>qlXwh}e!)SKTuS_C+{G z`vx5=tkbQdM0HyL`WcLk8QpD484k;ef9gAndbC75X*xOFL2`}#&lH3O%} zgm-NRkYXFMjsy=60R)wG3oA?VSn0yj?@&MFb33*nRUGa>el|7`6A8JG-0ff4Zpzy_HZ(?Oe1RV~?9MqZ3On#Wa%sHf{$o0&AfjxG;B zx#U$M|7)if{GBh8ltLK+m>uamTLT{1-i2Qc_;I;t|!1mI?h zuHzw}4jrOc_3x3=-LjTDR*f8cD?hvn-a5Bg+g4mGRN z&${tRf#e1N!Yk4&%gf?dn#<3WaviMD0GdSHQzCx?v|HeeW%Vqo0>T6NF8}hMpu$ks zK?e!@zrc~WP{g)S-`X2;i5USz2w*YA{DQEwwf?M1D*-J&UD&{*K92j8$(KEY%kOv$ zT?bi?=<%WlNSI_x)4hYwPzZG%XxShAZiJQup0wp?z+~>Qk*qEi`aS0P%>R~o|=BakjDUOGTwWC}WaHaP=M?5zkDP_vlxW}skw zGV1|^?6nKt(Q?}Ie#=7#u{d=5xWSc`KF7n zr67}Vq%rN=s`L@tuwtm;v20UR{J|9Now|w=ty@;FPaNA1xD&YOkoBKwHmHq4As2_M zhbbq>2$k0Z_>`tO|J-TZ@0q?l;CuH!bx{Aj9aQA6hhA`||DC`xW4AOBkN;a84B zuPhLpttWQ<;Omy53M$iALeqPT1%6y!MUL!L0zSp^8H#_2W8{jhWY+LiG#=pbuzoyI zX!6IAi4Fo#@A-d|c5`!cbe$cnY;3(ipW`rBs?7>gH4fHFAgSVn9RBXpTYE)I;H3(;>zO<>$r(i!y+dYUg)UdpjkH~(}Cvm z`>*x0nw6qSQy!?e&UZnl;I>Li)jr0r`N{F9s5p?K>l6BZN(~PUNhw5buGVE{W=^df zm~l%<4jN){AKp7&zSC*8ZM~JKtFODb&*j93K)}Yv4wHBVBA!D#9!-w(Kxqr{(rz8L z5sJ#`l1~ip=(}XkG1`TMfq?<(oL-8o_Jj4%N>T6dhzN6_5~3E5T17#Fh~1w}vjD;j zP=N~h-7hu7nH)sl?!rL1Bsn`98&dN1tBjgjd^HCX6Ea{LB%U@l9^d-y-5^pU@#0 zIde~y`|?IzQKdI2^-CAkJI#WUK3LRG-D)@zJw6>13hW>|uedNJd7h5u4j={Bi%3Jo z#FMgD+KA`_t4h$812gDPEfw*V0vb9JAuY80 zvaF12)7oOu96L^M%=7kZf%#B$yp&xX(oUuEU6&@#LT5zS*Cd1c_Vk@G4J6!%6cx%#h^J}t$- zeh6YDL3*kh^|xubI2wFB&JBhfuWr5;X8hH2WjQ&BmoC7Z?n#Ix<=IWu2mK|`jU8~} zO|HqVNSH&o*n%%ImnjPISaL2c34_eh6E*RwUB{8qdct5bS@O^g03cDXM61GrjszyB z%sXYnv0hO>BAbXT(wE;1V-YzOzA;CqGXWQk0!o+k%X{LI>v2z6E5e1dpJvn3uHwfy zf7zR(8*o%!E`n*-tfjKhd`yMmm$OLuhYQ|8W=xR=G>~G+Im!j$MIRTzN$YRrK)MMy z$pvFFsT0sAjV83?ef^tghMavJkBuTEXFMzL`UtMr^Xu?==nYAVYXSX_wxvPic<>|^U_h!jl@P;$FOJQMa=RjngZJ%7ine9(> zx;}toxYh=A(}N!EyCyB=exZ>q)css*QAusV*)I=fPz(QFS2G_H>m3$UAn6Yy;uyrFv+J?k0-X=0~N#g{NZ}gt{@BDFbAyWkp}2H%t{;F zjJ|nF+38O}MZyuH^_53TfEOv_Iq#C!qJw{_bFZX&HVNxcg$yY-vjo11(<|B+>;Dx0zp|1h_?_(1Ul}2a13K`^vl;8E_EpG8o0DM%X z8SIUAxC;ToPt73o*WXK1k9O(F=z0)`U!<24ZUW5+yh=G`4_SV0*?TPIM)<>$p~d=~ z0SHBa-B{nDQe7z?hUDDAYD@XkaW`YaND}6qHQ#6^tZgQR z4aKMV|3vQOBcd$8nbb>|@*HFa;%k9E!#41M0Cf0s0py{)NOymWxk=gAW@>_cMS%1TD_Gnc&lqOGc>Ohlo1#rC6n7qT68U}z$u(pq&A8zeCB5(#JdpU< zH`e`ZMV$QxP=f+1qM!fpqS5tYAV+_=iXcDwgOM&NnvljU5&i!74J0^*3WYyh*-VaQ zGJkOZP9=8OP@WS5Er2L!hFW{J+IzRbCv~fK7jWl5Ck9!QDN(&@jE3?Js3hT$#I#x# zz@nz^D8rN1qY$hhZDUA*7yM=p5qtNtEIMO7!!=K@BzScfn58A~pT!H(T6BR@pROu( zyzQr-i7?*L7r|#??g<)~?V*ASOOdcsG$U0X{36Xo^vqN>Ewiz=Om8a$@rda|>9aZt z!jsn4=DkG!-%W$KKC9h!64yL|Tq=#2*pTQL%8*+E`r{+2)S^b#r{b=l?=998bqGix zs{?L5A;H5|GJ$~-0?sR9zXNVx-l-uz803a!nMP4h37F=-pil7Ic&+C=eEF-_+5}O9v-BXmnOWAmk@2 zF@luRDdo5uq+$LVO z|LMy3-{p4I>b)5O|BE@ok3mWlJ3$Mq^~R|7en+89qc@O1wZBh)g=FmIfK#B!g zmJttk={o~TbKryw#(&oVkt=lBW?ZrAoHgM5&&Wva_37^q8XFajDc?|<1>`J7z^-XbAD0IkOlx(;$d2jP_M(acK?^hF#hk#Nw754tvSn-0ye z3Og8+Kxy8Tq?>sc%%s|+vG~v)2o%xL(Up{xAP@j#e>(U9xfE(U7ii17JY9UH2Lj0* z#eY4#BK+=VzVo5J3Ui$cv^1fvydllo$Kiat+};0SeoSb`e{C85j0;21y#tepR)MQI z;T?J40`M0@)dkagnxGbZa^ePhCwi@TvH!S(Cr^Z}q``u!Uyr0hFI~?cd!_DL7;;;$ zeIsQ}UAGMY7pD30B_c$~>wpqy2C_cA>^E)V?bZVl0`+W>h(eV#l>SR{2|G@p7uO3ZDJX8*N#1NAHV+e%)Uvzc>k17AwT(* zuI>rw!uqy)f0xZC*$DcR?|>{S%YEEKgvzwmI(~3u=jphSm$`ZF*vVnwrD$Tx2nBfV z#BjWVWzM4U^&_{Q^eIMJr#@!Xx2jI}Z3ED$3DO5H_Tqv9pe;pR@@VDlUKswsoc#X% z#$KM!$6Cooz}WZbxq)hV(9Lv?n2Cah%SFq`$cSUZImkAh#Dv#PG?AkvM_qOu$Bk6} z?~E&2(<{Akb#A~_M}T$?4U&<~ta~z4_IVTV1}K)d3VR?#)IOSmCmS4{<~&k@`vCMg z0A#T3FI01m`@;QnrC*|M?33XH^#8PvNlYYpFb13Gu|MbY)s}j56v=Y#d!JOr>F#jG zVf}yi(1vQ2%4uog1MT+F4;3z-{1T_FzZ2fp)=&QI@4qlH!GS;@e+$FB)S7mbCdERo zEFxut*8bE30@4DslmOcQ%<5Q-{^`?UCu38BI2~PGnc``{kP`lvWRQi6i-a5>3E!q; z9xR7Njo#HK&{GU(`7Ftx6$*I|Hk(SYtrmwX?eOP$nE3870kd8Q;y{`?j>D|$$F=e= z=rEg(=+Aedewl3ZT3{XEU65F~UCqNo|D_s8VOoN(%N*9D(gFfVHF~cLY2G}`}at=B1^6u|r zvH=|!5YQn)aA+hblg^FD{i}3XiL_Z$svUsvQ>)a$#2f{~rmEFT5A@7N(fvMzTfUJI zR@Vt1+?XBq@A+0%AER>7zIGG-mvokC)F3@2h3PJ7YiDP1)Ma6{BgDFTzG8Z>+PoJf zb3&OB|L%Gs=t90G1~coLY<~Y7{s%NQdDhbdWElu6xtxS_27 zIj_YZi#-RHw|Kj%EpxzG7M z&$+G(cMS)y*RZ!g{EuO#mn{m5m#_k(2nUnhU+fl#7KM!A@QX5^ z-`gR{tOfJeVz9}4ey{Z}?T}V6Q|5fY|J+34WCeG*`u%K$O&LEPKYmDDxD&NPX6l@^ z9`*~*;};=R9|PbtE;gkP&VLpb7M7lt=7we3l#ya$4rht|%rMUt9`Dm+^S!n-7mWo9 zp!CEb^?uEk2C9 zRHM5j)n1svV1Uo%?IJ9pqdUy=Sy2&=8wT+)mdau^HZ`5#AT#Z0d{g`0wwz09S@}!5 z5pw+-aXl&;7n~t5@oq9SvdF5IduA^-Dv>$ot!NrsIHBlJY(ia{ibR{_M8Bxg0F4lg zhJ-SN*53;}El-x$_Ntpj1ebq=ThL8n_ID^o#=>G}{Yw&=FhMg@-Te5Q1t8ZVMt|=O zq>oKZ_!__D*XIbd)_ew3Pe?4TB-URyd5$E~*FcJnPK`vTacjMqIuftkHUGMYdFzX~ zQGgmL4r)wM#>PNiSGwj6F0xGiXd==x&D3S%O4a=y);g-!-4p>~4K9vv?PS6Gy57AL zHKIy^ICS&? z9(2%@I04{TBr+L@ln7fm4TYdG`vb{gVF|#*d^%_}QP(zRBcK zjd$I>PR~8RIDXO2yxz^fS^xI5sjNkIUjXXP(=y&=)i_7>_;7~g?2L0=sd|GSFT7B5 zg@X0hlMu)a=nV5*L#I=z)CBMO+|K!eB=r=K-&e;85%|;hE2`p-gi=j=ovmq{I=J}4 zo&5d|tZawf_xnK!;&Bibc})M!%C`5HmzV3s+qawd2eNHzHML0}Cc;%RkEs~v08>wL|J&bS zfGfZYKLZkM1=q8QzGAcXexacJos48}aZ-6*?{Ko^3(zTt!W7d!+m@|w?H$zb(P*@X z6YE!`i=H{x5y6!ZaMRllG8?KZHI)O2eg;r`#nr#>85kJE!u$RRXBe26B&u46xID3^T`y1rE~r{} z4HjeBA*lNHb`$Qk2L{l@sw%Jz$uqBQbbTEmYq85)J^^+lKWmwgN@9l0-A6v^Ge#Lz zWvGgRhycr%x}55)M4?D-M5@PTB((DKzLYouu^g!!n}8Lv9mi2CZyZWND}lQ7TLj}j z9jN_@n4?1s5L5z3sFMtpwYYx165`gnzP&mF{5U0*lxeF*KylE+OGr>k+q|sk + + + + + + + + + image/svg+xml + + + + + + + ? + + diff --git a/doc/source/_static/reshaping_melt.png b/doc/source/_static/reshaping_melt.png new file mode 100644 index 0000000000000000000000000000000000000000..d0c4e77655e60a0062b1d5d01f2c7d9d561c427b GIT binary patch literal 52900 zcmeFYby$>Z`zT5yAfc3igme!r9TEc~pmd|sjdX{IfJhGA-Q6&>Al)U6bTd`doiIA0t5q1MaJ~Zp4 z2CoAt#UycFBPqWYd)+GfnEhD*J!P&peE3_6yi{*W3a1B`p0LKdi~Tx}fj&Vk9i1C{ zH3!%j7TnXq%&xB2cr0-5ezF^AQR(O!6RPRXB8ehCScIbsqq&d(Z>eh~AZA{>+``HQ zKW$L1)~#e0-D%w2vDi0x4hzGHPCPg}3qC^=x1oYFaZHi6eFzupDZz?Ej$0Q<_^3}> z>H&gx){l1hnOHkqH*5Vk7E(7YA*5|YQ89|}E_gVc=ex(>FOVo41Hz?AJ!-9A^QOlK zzLiT6jR3*zBn1oGN@e_q=ILjP5%EQDl<_abkUL`a$fC>#BL~ZawMJ!V~DB!Dwun#2Iy2o4-L>itF z>037UV+NzFiDGXc>f!`x&r={E`KC72Mf0spItD+ZfaiDs=P7CnKFaYMRh8qltwEGo z@h#4(DeV zwvLIWFdCAfgMgAFkaQLl{L=hY*gr?yl?89NgO(8k z={acmQjx}dc-Y{wThM^Mx#O@nW!g_bw8f!0#@S zIm40pda^y3rzksw@B8Rd`cU=RgEp}-9K?2%L(vyuNYto#qVZwy23S)sSi=bM;QYkP zhIHu>dwkK;Ag^hZF?9T_L}969CUCCE!Q!Js&o>Csr5?ThO3xa}_L4a3l*|Gd+IGuEG>^UB zMq`7oja~Ds$rCHI)$RbDILJu!IGrZ^oJ&d|)nn+*#?6NK24n+c!^ocQlF#+2Q0Q%N z+IO4x)NE!?aKCcW!r_K?w?)3qXP1j*HQ04;Vb(8GH!6IfN~Dp%`s1wy#~DsF!i;xg z5NV72`xqPErzG+SIX)F_;Wq5G%-RflkN2?li1rZo=#cS|vHhO4|-mqM?~V9MY}KTcmn@4imimvR)d%G1!(UuN-(Irwco8|NKI_erLZAEK(oeUAoy!2JpON$u1A*w|Rx*z{QO z82cDF*F0}uDOB-}ngb^*fL229J$l*KAFte{u3w1DP)THnFN;AWlwQ~+tugdJWuYlz zD>-x?W(N*vA)n&yAY?BVKOYkT{ zDCD0Xeztq4^N{>W*;mT1v|n4lx_pfxx_Yu6w-JXI`!w!xY)`BedocUAzP3KvzO=rJ zzR6hsSg$ghvW?R7vX#=Y3PJsg)}Ge<{;kgV-su+puFIg{n0{}TPetBEe!DCJ@j8if zpM{cd@Z1Qmo}yCu^ZR3iSv$EyCKKu9Kgeqh7z|V-HYUny&uU9)hihkP!|K**@J=s~)18UoENf}sZ zxl@G$rBW_}UQ-V}V*_4lo_)qwdW7+)0B4&J9LV$q^-D~kNR+4yy$q6!vh>59wjISe z{H6l4$x>=DMKFi_QTj?PUPWY`a=miU7@?_{soEyOrq$*G#}|$?j&+0iu94pOp6uT0 zZchkq%yBpq#O;{^i#2@;DhBW1hjU$!ispT=Y zbw{P>8DFY;o<;fRKT**c#~XJX$LFT0dQxR>rZ-MAE`0T{F)a{e&>`~?+wTmvS&6&?-+@&bIP+BFE#u|6u zcHf0uC}ml=qG`HGvT4+l*wfE5<(m5%<0c1z3n2vg6{;C>G|~X_CUVk4%?CVK!lc(! z?TneU22^7VmCwFX_A%{9@p?E*I~yZ8NxWzm&6F@uT~UwJRQ@m~IQ3PV6jO3P&?@+y zgsy|DzN9{Odq}70_o$AWuTEc^lSN~2l7|xLlVc(04Bc#-?i~D6bancmh55?H(Dl`d9Z(D$Y6mmadO)oc@O&pX|yxjP8^7x z`sP#&Pl-)=@E!4G@ip6d*paVxuSBgBksk4#Y?SW&#Pxq3a!$V4tJWjpfak__z46*d zwbk!MeOqQ*w?08|b8yqEIio#dfH!j5g%66SetH-|^6AU<@cz4pDt zJ$$`(JzmP^s)7Pp9{IOilba2r%>zhDE*hrFQ`$7zDz*e}JbOExb|LHag9V8tMMfn9 z1-d0@B}CA-ZfO;&b=B1&6|8rZTkm%|Hf8r>EL4oOiPJd-K%NN(gj28+AP({2z(BJ!G${4I=F-w~~H9KeK0*X)RwaH{v?tTGM*a z8ZUsG^{&D$!Y}o^>5>(ylx&rDM?a6=a9VMa8&Mgnt(OardH{5}%sMPMo1Ax9zre;= z@e8*0Z>sPp(NI?3Q7_je*1^)!y$Qd9?5_LaM@8-F$u|oZeJ}YrAzv6&RCB{~B~<>X zy6lk*C8Syx`R&S!iz~!~A{00?LbxYkf>;W(1I>=;4=uAv;do!5(37eo_kU^CXRzVG zV~r#_7eHd>4*c;VCqCO{KqA4ByXr~|B=b@+A-#XMPwB}?N8zoZ4z@G z_8oQYgw$j^j8PB6N5ZnBJ?Ta%5*UrC2I=lJetuqpry((vU&UrT%||oi-*P@(hxT&2 zb32)Oj+zb67i1L34teA_X*Ca|ww1P+G_!Y0^hS=`w-!z|%%``NkL1s}ju7Z~8V&A= zZa)wx<}0d)Q+xS5UEq}LB$*}@ZP2p>nz=nc4zQ}uGw>E3{>-Od483*fnR}YWJFcHL z=kMDk47|qVj_Nk3*cr++KFFbMNHk|c-CBiz-k34a=2@ARZGt(za6+)@pT_p}h?pd? zM2bgY9KH76zzQN8?yC{pQeM>DaP6z>D*MsYzE-=6R-u>ysW*&Y&Bs5=p;ib=3D4sy z=;0M`oX@;e?6+XiNg9%zlHH0q-DV+9EOyq#Y#?wEYWm=%Tywc7Uuf{jFSFWf0K3S736cMmQe~c0EB)c9jFK#X?TlUg;f~k#7T5ad z8Ee|~JIbG9($rtUAyO<-#~se!qrcAudBw5C@y1k^$C#1BJcse-l`XcQmDwlP4{ftM z;r*G{eZ{7T`0(puj^nYo$*lx!x$lmKr#~M18HYsS;}fpOtUOf^30Einz>&(FD_FHO zU|PmWm@hFvknHB^nf2M(r%^Rk{%IiMYH$4MB_@HIcJXK3-g?jKL&s6kW0PLxZ5Yov ztt_;i<#cA+DRTd4PJAw5W20$dOk-5#Y|#Yf<1Zt$0%i}jbDg=0P1)P_y83!kH-T{_ zc9%Bx^B9!Ur0Fg$LU}Ra&UG|*SeCFnNGnB0H$UjvPBE{LKo*;o0J$hUmpo_cpG-L> zC;Z$jbm^&IX3JnWb?Dy}x|*`fB-1>mhGu@bJt}Y;^U>%HekrQvL*H-AZ*x9y;^9VO zC_8_|slpnLq76QKJO_7!l7Rz(w`1#tpBS|W-qCrA%|~uCw^F#&Yags`G${tMTd6WKWRmECNy0d1?7 z!s0$!X)432ALKW^IYMnQjwU#=;?utuI=;~X-%MGh%;|AOaDRw5)I`@5pO)dDL@LZppgacGvT9H|{Q`S?eTBBQOT8deQTjpl1 zw^`a5-hb$))KpP+{8<)ao|}+5qXml)igt?@`XcBjNYZHEpbL52aM7qoc_p2i65ed< zZI&v7!@$e@2`?XCiiGE}r!Xb6BD*cimN4J*Y5va)YjQiAybI1h&qL929%DWRN4$U# zK~nY`>-NuX2O_WXo{yM~%5rdUP+`>?5kW+}@$Mr5)?` z@`)<~RcQD3hHv@rWXlSqKc5D?h)e$*;ad);;5V4orbJ3piffjOpF0ys+|EOcAD>Y9 zz0!K){f6l#h;xJUA=@-37MljUkp7v$ZMR}iek*@Zb!d`rWAHa$NJK>vQ;3$&mB1jk zE(Z`q6MNN&2EI)lvU-<(5ZmQ2Y{72*ffVhhPP+`BETJD@*yRANZEwQoeQXX%q+&iHc@ zcr7|8)!AH#37y9}bBykDBd2{K2eSRfQfhwdLRiEg>cXSuIJkJxiqxe=g*525FKx{d z@lxZGxbM^7>vfXQR?^DQs_ie&=|%S>s)qRtz7i<4XXAC_dA|SpM|d_uz5b45imaJ} zbFu;Jb_uWh^hTee$}QjbZB?PcO8pf@DbutwgnOm4qtN}~ z)p5(oj8pZ?z#mFkJ_qX}{=+2vrEJ$E{xmeeOLc_xBRkjJrI2 zcRH@Qd^x-@Pr{*f)tP|r}r zw}p$yBgT)#q8>xWc$hEAR=@O!>WF)IyS!C6e=6v&`ocIJmHmAzSKouBD~_iv$V(^; z59u`-vKY1x-k-}VqboZ-nD`{Kg+xhH7W+AL4Lzc{DJ>|+INSQ2jlp?eE#Ud>KWao= zJd#CX!r0cSrf`%xgFbzf_%V@6J8O18uu!nXnd#j9!Vt;31*0|SMO@n+CAu{0bE*j@ zPg1^)BO1=kvzMJ+=UryKk5(>M?C|^WH1G%`bs3xrxC&JBNEH>PNpr$8rt(k4!Mc-b zAcf}X)Tx8)VYy{3TA}RaHF}H>vfu*52^JnZ$B$>pSok3s1J|sr#r=D=Lh@vELeK^j zY0~*B;i)Du&+6On@T@+YVou_@;*H;?seVvF(&weNzmI(%ZU@r}$}h==%*-_Fv9D?q zgI(Xy-iq2zX!9@_Q-U^2UkPt*%(N9qexO0`Y>v`p2`YLmaJv1H&i(N{-q!|K8 zJ5Rq(-BTmu_Bd+9Jb&^kUb?@aNX3$8cTshh^cadknj_Fio-4HC_Q~^lMP^pX!fTJS z?Kod-@LiBvnzh-yC-HJzsb|XB;Tc!&&M>I$SfPoKgh>1a7IHW2{dkD7 zAO%|*jwThg&2N1Sxp&*%QeLA^69iE7OEtb10+Cef<6=GGJ0B_r0=6AJSE8 z%~0Zo^2x*R$j3+p{*8X4Epr)cInJ^~!8xsZX|~iYp9r?-)0q$HlK^z575-6c`U`peMG0$s+LTO5nQ;?hMy!}#(!g6dXyWS1+m-#P^LUU=NvaYif zdLDL{^kL&7j) zXv{ul2-VB4^@^KL(7q<|1`^G|qPZXEtPkOe zalB7I1_zJl2M3>4NSCuCw9F0dS_^0yW3s@lCM-C&EKhI`czNkxfclp^Bfa6Ww}Jja zlBu$~y}H~hetm08)^`Th?+sa&oUGQi#vpb+K0Xi|2Z)1%1z5pi=VE35&Y8u^j_UU&fA1q^%bEhOP);JA;o zj#X1e`T>PE9Kt_7YM+J81PdC7!NDWpi2nDZ% zss4F<;5K5^&4~ZF(Ea_aFmNR5 zK&^KqOIC@QOCeeCs2x`!vo;a=Hwg@keY6RSB9olLJjCyrdLZXhTFS=fa%AwaCx%%= z5uJcJ_O0#`c57>^?d|miQblhR6P?TPnj9O5NyPnR7<_cOU6NB9*ye}PTW7bfs6Fr( z-tr1Kmhh>IjZ$j>kvBHRt7Jh9GS`hXB!p>K*qqY>O>E4$_+YY-Ve3a^G$W8kxx{p( zDdcT(jTf)YqW8`mbW%A(I-GJOQwG<%6^vQy$vT zMRV+wYVAG$F_yAD3#R;h=l7R}JQJD?@_6D|~RCs=VxZGd9)}N3Q>;D&_DZ<#{KX!e+ zDC4=u--3_R6~z>DvNhgoA+sO#H8@zVT%{3g)A0r0$a81Prv91pyJ90m>!#iolj5{)Xe zD2pGZVvhUsmU|_I+I8hzR!WWq)1shTF`^|Sv7PGbNtwuGLGi%okhP?#F zvPE{;z3l;ys3gEPG|zr1Ko}9A^Kog?M89aKCLyXtC31VNAID+%MFI?#O%`4%^dn&JzI~>N>wNfI>PMJFapH9OCVt6u?Gl0`4$mR|dU&-8M4x(XNX(bK2Q{F+7C?iWWgW>*v?8Zg;ZL%I4FtNW>!ohd}g&^ zu|@^03|yX}!|t>S0joB>kB?8b$u58XU;*`S7A^(>ENpTf%i{#3aIpiyF*P?MXCRjS z+r#pV@!>0~{n}-$8gm}IwO6Aws%k@vHakDc9^>IfE?ricjYp{Ix^&cCXM_sd%5(bs z5=(Co8ji%}$Z&S?y-;J}-H-bfYi-`un42ts^O&@q<{T7D zbQ`%Qs}R-ubBm&>e{CL@x2!IB6G!(aG8W%pHAwnu|L%$orD`q+S;A1 z8;z6GqVT%CFh15u;Wo`_A0=ef#vyG0@23gct$ih`QXz+b@#2?9Yl#62yvL@Ak{Ylf zh5v&eVworo@mvpfCW;ckRd$$^4vSAcT;lClR6j~9jCcJuO1lX5#;0dgUAI(#v`9hnW``~l(nl9E{OQ8n%*UV3EDIVCtlur4nB!g zue)0g!0;+1(51MKWN6ZOB6Sh!x&L(64xmGhV%3?3?5s%sIwl1eaMx=A-=t~o*}Z^U z3!8V7)Z3JaVpQRvD*xWz&Z}0US(R@WkM^5RIMN#4dxZ=$rAYTamck$X{Nh69bZb1R zO5nrNr++Nd@uuK#Admj#$yQ#&H#OjQoXhW?MS~3ofUAWdLIdb`y=}#(jCm&4F-}1NJ>fdxa~KtzoEg!9)L{r z(7cQcd*O%9S0R_kJIt4qj5Y}lGLg*>;g$W(!Nr%rHo9nr)xYE{Y!~SyoFc{ZGS9Br z^ZF3%ayDg5!_3^r2Ri(O67tO-I|c=lxGVQf#$%BD@v$$q@J1h}K^KljlY8B9MGh{v z`2-$Rwg3y~WJDGei1O_(u`Ac4n3QBPJe8s+{~fa^Am758@4CgYg$z`gw5kiib8_52 z>?}{U$_)YmwWFQV*_f@X=t~lqtb3((Uhl9?>##k+?YnQHHcTqu7(1EBXV-4!^OWB{ z*Tg)VP6qX07~Z4p+qZr*z`{oZmM6f9Q>|FrNH z7fy)~uwnjaf{9FQztu~WABlA#DkUUJ1Nrv}tqEh<^*^JPqhOOUmz9;-F1Gj}`DhqT zD5Nmu&8b&ODne7X8p9pE&8$1C67q#WSuinQlm)RJ5xW2=-z^g&qC`3*u1(oZx)j`3uqSxX&8| z#HE)1k)$0847cMg0TU7|3l>8{^Wm%{I}xZ9Q=bEuN|1o2&ii^a~)br6mOEMuM8J6?+zvFHwLV;>g1d#0ZwD=&>P*AkYjd#TaB9t`W*0*1VS0KxkfhHeML4cq?Lig35Q12DXKG@|gj~BECUxlWnN+!a zvXlJp-4jI)`UZ0Ke4|@JkOjl|8&G@WK^ulGP%cPl0Eax42~z>Jv*m7Hu+>3xLX4FZc=%md+6MhAt%}O|!jNNE&uiXa`wU0fP+dLXtaBk2v zC~`NO2&=J}qP#lWl^tH3q-h<||3Ma4b?E-ZMVs)ImX~fash#v5S(BQp7gVh+160fE zD4+5$5CZm`266J#sukOH6tJ<|{xsD;7>q7BzT;u0mC#K-40xCB^LB!g8j%8&D*(k8MCuniY2k@TRti@oc;ucurVqOJd_qT!V`}L2jMeN;K}a;dC|H(aS3sO^H{S82REbnAveOoKwOMD5zJ%AgiNBP~qk^W=N-w(Bou%}W}MZh5vP zYw41m+mNI*tLICShYc1cjbb3@6+#Q1p~1lpP^&ZkxYH)bUCrU*=1`H_S}nk>Mks?5_bA@WdswknR|v2x#5_L0JuOlsp!QASt;I*i1Bu~4(Nw0!YHsCXJtf9%uMS)( zkaMdusfDFI+7(W<3YbfR-Q)tV<0=}P^Mjw8^@`+VWUMS&)m$Ch!X_hD29C0Y)zY+1 z3QIjvOme!r{SlfYR}{~!W;goM*AszKL&wEb^3dYTZ6Tl>Htbn0PiY3@Dn~P>HBz~t*z0M_v$^JwZs`J=lOJ{J-Ii4!RT;v;wgFj$=?lkYU;7?JlwFsO>IK#U}nL!@FYG-`s*JVnb z%Q&IVMqIs5WwXg#6%~rz!coF6M>6#&D_0d0?v!L*mqruz9m0??IZmlB7k#m_tI>ZI zp-UbeWMh1ozRZJBnk}XgKI{x8F-F-NrjXb-pB#Re;&G<2-oniv7928E#FFM3^zlrT zh|}n6B~bdSwwg2RIam=zMnT~R39@R}4$b=J&e`r>Z2~3iM2ipgEOPwBCT5%2304t6 z@gW{gXV4O;=XV=Tz9FVlNVeF@*U@zt3I-wyk*S8gKIYe{-g(~ubv5}YUX+tWz@lk@ zAUt_gW$18UojxGPWCgkiruA^!b#HR?JTDC09FB{9nu&Acu#9rnH?*u9#~&i3w`L|) zk`YU9dpk~NIYC;{YsBT{*3JRPG^N~DahCO$A zVHTasWK5~h8*y`NV10mPjH5wq&AvG|^hjc0Mo3(hRF~3e0y;RARAujDJHUafFvc?1 zg}y~KSAWBJSfA%Hf#GnrkT7~!j`cPkBJA= zWI)6lCN;&njjl9L9?C%@w>l(t>(WCVj;J?N1Y3abOq$C@N3pgiTDFLfzsCF zM1^tn?Oj*wg7|IY#>Q{*(;WG*`h3$Rijjc~vHXycnL-Utw`d~Lx!bp)qIq*kyDi!= zrb8R?_gx5?0+N#IWFzVT_p|Sc=_Qd7_sYa&bM%HOppQyHYff3|WzF;IrWo~-Il6Q? zoH#|{$&2MX0VWdRTu5rvYiQB5>GY+YVw!sD^+ZYjxhZF6)SkJ{t3s3>q&z2CFsMLI z)jXS2BD=cetf$1Dh2^qdGtXw8@~9bVY}m7gvChKiB~m|6v}l?JFiH)j&;8Zyl#mWq z(hMDbkTN$gLg-j?acy&KyxOgbm^iaPo?2q)Hl^2F>D%>f9Xpny9EvYN&ZO`&aHqQr|E!&;;3fnDc z{1T12I^!VhP~pp5m+xxz7zUER_==Y4_>7g)k+TBSZn_;w>wyHJy{f2wWybGy+u$-e z!Gw8IDX;-3d!H<#Y?4}mD$8iPDTl5Pxoh%>l~G)HC5= zhfL@&$0~bw`kycCoJ<%_2Z0f#8%y(xz7?pI@Q6XH54O6>%@Vao2&5I=x!#uLIOFcv;ZmVJ>aAL%64o@J~XQf7GOMN@ao!HUbLhcdsR6Vi^!NZ#{Akf`s z>-C(dyKR1@kWRxPWLCIbg>cDCItdBWd33WB5tj!v3Xj$jtiR~W+yMQzewCd*=9KdL zI?8yePj)5eozNz1Gej1R6s-qlFibW`8v%tLW@9Zm0UYSy=fKE|Ik3CUrL>}*&W z3D;hi9z*+j%4IwRBj>I2g)}*)A(Ejn+3htJ1}ANKC}B?XE;%kMQH~p1SzQ*BpcB`EJ* z#~Ap+Jzrnu!c8fF%hj!5jES6jQiG~V*waJCRF^viWSgb2zVQ5-S)_=JG3T2Cb$7v> z&TgbgY3l6U#-oe=tu5E(uFG^7Pj&8)l&gl08b;l4k4cOj_x$dmXg<)UD(*Ml$Klga zc~66+NHBz(r+e`#F@#Jg-dXjt$TlItE@SH8&vI0iE}JyG`_iJeBzz-I%&cV1%9^3U z^IeP&hCPw&gi9k@(dIbWqi4ofb7VTIs42rP(?Jqi#gCg$bT65rPK3djeH_aTnVqsq zOUsaRJJ+r3*7WMVskxU`+-lIDTX0gvGW!DKtXV*&!pTzax}RMQH{-kYqqD_iIsVVf;`@ ziFf;;WX-#h9z%bUanGoAk^?+{SiSU+ZQ;r#~-y9TuOOg z1;+$pz7W-S|vNi+*=C^3L*~fb+X!@p($17D?nSQ zKMa;UOOh!Dp~bvZlyw}6SDciabl?sc%hWsA*)uBc6^g6YqBvd3Gbfzto4w#G_7l(KkgD_1}+>c~9pG4daY(#Mc%Qw+-2snJ?qj#s{<)$P83we*9jX>>&@gw3h^Z(y`Sh9i&T z$hapCHQ`BH<)ECK_^%sl!2A6}t!Nx0in()smF8i>%dhsa+m}9D=n1SBk_y*NGd~yj25RuDh26VFlx=Penc%( z!L5~v_pU3vXiY)rVJURP&|YNclsk2QIK%%U7m;>KrY3`>JFjtP*}7yIW8bha+(2Lw zM9n_INdrp>kyl(*j3HYt+k3l3C>>E4`~6|#MyjvIrfjpnPH?90!5G8%YaTMagq^i- z`Y-zR1&>hqSs$BbV-PMEJyIiuJ)wg}^!AxERYM|>d%Aoo+_cwplbH!tObi#^&6&GY zjQ|ywzB@G1GQP&Z=&qBVvSc;%4HGBVh0h@^lXZ5+N42z56>_Aes#=v?qBSX-J6rjk-x5Z;!+59O3+22S4X+3RyX^?!v)ox>a38-U zIElH$S8To;Hh6WCvAak*QY7>4xGUuS#f{-+G24abt!bV~@Q-|$G_Cw)Ziwc@TS2AC zBtj3XEMdp?K{MAa6QR9t4lT*;+b+H9XXAx;`gb1BtOs3GHP!0piLbjA>c@7h9GfNm zs>?4AI4(?Dx3h!tg}>{MZN1BO;P`M;IsRd%o_w{Mzp=kX;1T;f4)b*bzDKUg^%M5m zW6}-s5ZjQJ-GZ$B?j(nOh^-&v%kb3q1D!t911b0^UclIb9XxYZGy9$egkpr0=4gc; zUMS5}4c#NQ62<#$XtCQvtdyw<7|L+t=!PcqI^ZCYriv--Oy}p(fravD_Lkxe42|x` zo^+|hAb_2!phCrZYLfhj&-vi(`kP?_TugPvmzNeYyHa_+4I3_^h&AnEnYt!p6a1LA zCl`_NC9bDi1EOpnrmP!JqfJ-GhE;G;jpEf%Ek=VP5^+~)Ogy@PokkSM!+&X~W7o#j)g$s)9Jup#v@Jt#!{X(Ah-8zyZ% zVpk&^-BHD4hi~qUQzf%al(0Fs7Nu@|TaB}MJlBMjKy2#3Hy)Es*8io8d{9-PbT$e5 za<;pss58i+b&V@SRTVIf{g9lDRiT* zv)r5eUv3xbH8Q&&^u?uRL z-4k+gh+e)6Z(NY{{gPm-n{x9>sJHU{S?-vu39MJ| zp=nKuzj)EdY2)D58_mKw*Vh?Bny|EX`Y}30q6j9jbr*L+`|;kKk^SBE43k8d7E(g> za627WlZ5&i8IJ5y!_I@-XU49us;+WNs4c^*_x;CJO}!LFU6I*le^UQ#x)Jh8)D^jsslc(3di5bN5SIAiX&&-_?#K#n zO%_0_gKtEGgws{(&8-eX`=f0E90>+YSdr80?i%;a!WF{xK;xb@LMO^k{{(9Ttg+*q_{aPAMnx5X8eJWJ0L zn?()>`c3Kk)JWx;i|^eN+dIDpG(lzwYoB-x+XmIlKE1P^(}N@eBa4;@i5ZQ%0cFjb z_yIvRO}^jN7sQI3BKGf_$v?};cwm{4&aRCscgsb3?$6yYalR!VFvDoK@ z{R^cg%WKH>sTb#{%4H@h@I!dg?_D2ZF`0FqD^v7*HFph(0ye_^4F*7QqYroSX_8Cy zUi8ZLx{Fg%Ipe~qkfU^(=JrZ8s-TF%Q2JLDvLj=-yATFX)OebL1C8d}ouuv{j)y7dGv`5pl;vT(S&*~iC9 zN+p5_Z$3PFOomGvUiL_%`VN;TB*HqzgSyUkHKJ;ee6V~sGiR|YOzH85v)cAgDYAoG z8s+jUbr7SJ^tOR)joZ)NDZQ70oB1sDq{!k0tUy?Y(kp&QD-IxkJZ3JcGa$D+KC%XlpMRo_VILMX0rhSF9odE zcIKiOXap&xUc8{9VV;E&TRfmF>DvcqTN(}zuo=T$T#b%rIrXMz?z$TJMydw zy(jn-U8eCU1XL^1CqfhC_$j{Oa6(9%{1qSV?}u7_^%t^hkO@#_n7(;P@1a~y=G!TRk&~aFfzwJH>{XQS#RO+V^P?v> zZZFfXGEA4xi)<}+(KtO?>tzgAVdh!x$K`8=Mn`~i;L!25v(JVY)r#r~4l+%b;QAVV ziEP^pN@tl`xml7Yv^K{X{7FKzUne}j4n-%z|1YnZ?i?;WUhU<&k zfD?ishzOz!LG(oLEf~Gm(GtCj(L0fZM2|9h@4fdLVK5lIcZR5g(d#!!e($^1yVm#T z_xWqCd+)jX?6dbi_n!Ui=P0gz@&`G@-i#g0p^cS(5UWYQyZAWcEJ#Jklr~2ampq9a z_$}Y{%qgDxViHts>d;6j^o*Xjcr#`*f}>(xrD+&XkEn!Swy#kf))y(NuF>HhEpj9w zU!?JNgjcb8{F!@CU% zF+U>_3(ixSuh22?Y-Ww)@*`LVg8j;Qy%H)GnARD~5~x~KmKRKCcE#fjWrz!6f{_>Z8KG$eh!sKubZouG+U zc?++TuH9UzCb}X=CVCVMf;-#e%QRKf3P`OW<<52(+eW)B5StECCP0s@|C%UqcrPRX z4DmO5#&)d=P%NB0b?d!Cf>?NT4(vBCo{6Bq8D2MAx5MP6)t^TCbpn&;cOnJ1XLrWh z^a;r<-7FSCzm5REKM_;5iC%`bdfUxX5=!4SUGiVPUt$HVW}lKCY9u%KeBF)|`rXS3 zkM#B%TqrVtKr;nl^ZMq{Z>L-V=SooK247bw0Z22#@%Z{{hQ(G9`>;qiIDn0S$A@bk zBgU%i0ir^8PQOdFa&~&%0Nh`WVFs`7a>=!CtG8V0nyfKDh;Cuv;~Lb@2rjYgukQ#z zElXwCoX%D_Z6bgI#-Gm1!8po}Kdu@?EjH6;?lJ}`XSB1sIXqG(A5_0DQplbkd(Z3q z)a$I1Bx$Up&4p^H!Dm%q`|JpfDAo2O8OXPS*#zrY|FWYY@6s>0`c>$}15me93WM%} z;n+qN=Cf#ViL>E(r_F8r=PV|jk77n&j#lr4xMh<*Dfg+$n{7Z3Bj_Il-^I*?>LK+5 z_Do#W>hG%F9b6odF#AWW-!Bd)wZJ!~#x<(6y=-O*%KJ-#cH;bNq@i`=F_Vzeed4$r zyC}t?W`-Y+V#p;_yx$k2BrLG-xWD9M9Jrt2(1nB0?^_< z5&&2|w8i{>(t~Ur$}@5Jdhb2vYrn|Wanii#jDH2gIpdibI~m{79&Eu(Rc-TEtF7u( z2^bc#YbWsI&;4=I*3y`CJHNqQZ50rg+5n{7;=qZCEii*8M1C2nmLG==BuA`{Ww}Xu z9lwNNXm~*D8k&5S6K_QBYNhof=#%gCDNR4EZg&{R(k9j+=Me#%k7KUWXRX{Y+I&-{ z_X1#0&JHZ7mffN^FL%RMlt0bgCIrJ0Ic;R1?Cs%U=xSVLo8fzV+hKWExJr=hfhTa? z_GYTuZ9?8kCB1og=I+gjvDe1k6r%}9TsA_V1EP`&Hum6Ceky%;w?(esa+kR<(w@K_ zntBVZjb~jMZ(;3wz#3XJUI-~3 z&^_0#70+>ut(r1WK7>6F4P>}bg( zG9{(MudsSye(ng;4ZAWG9U2&KC|h^=PJ#S^VK(LOgx1%bN6`;yxOttiAS9(g^4bgy zy{$t%785&zliWPtsd!eIQM^&Qz1=Kai(O~D^t$^yY^YjS_(_6+-8t9zm04kYflr%H zT1E}@Gp7BQ0j>DmjBEAcx+dtJj;~Uj{gqu|Qe?#U=r1Pc+z-vOxF&NK$CnjK0OD1^ zO$Eiq$sK|79I+xDi|un*(t9-oZrS|leXetbE(3?q&qvYBWiMKta-MUTXbQEy(I~Fn zv8?Wq0VF4=-G019(&qvVAnjKenDWz-nt8Gz6X*1K4tcyC8WaG(Vx>EgY`M@&5rX= z%toUv7++{LT(#}UAtB_7hFYoEw3@)JlgRhXz`ROV?}|39wyf&@{Oc^q%KXUS(u8A< z$-!Ea%!~!G*0728wy&3i1gNik@`uBt`s>L{8h%G2P6nrqd4(H8>lfd}AvhO%xn1%3 znXOIm$P_AeEt4n_8??UO)8l@pWv$q%Wt~`%1=^6;!S!e)GQF+6{hm)y?-Q(7eldB< zV*nVbBQOm=t1W<+&r`m{+j-P~eG64QFOSUBMa28U{QxByM&-Lc<`XhR59}W3muXO6 zUfn8iZA9Q`6iWwE3R2S(h_t|Xee#u-C(FKB>{_qh%;PNW@r>il@xXC$;;0+=`NmD~ zGf>unnh*LLGic(Ea;W2vEy7;RuhNqyiICkA5|Xj1w!RsD6g^pz5a%S`0uV#d2E}n` z%i+_Aviecq3g)E9Ir{zOq%g70cgj>KZJ~^W8Y56e5%w8 zd36)Mp4}A`Kdw|+c)fU5^g*M@OIH&^{9Z!;sv9?3@^_JuU9NeArF=0-t4DC^Les#Y z)5k4b(BgXJgO;T-)jlV`62^Wf*dcBoy@&5jg>|4_3zGhLx6MKtYZ043%(Obr$u*b~ zC=5~Bsz2*Eu4|YrvApv6l3-DGdl-9>=qXsWnyb}XpK8N9!BIM=;AI-SzO#>A0$0-v z;<{8mA2q5uPkgVw<>r|aq2;*MSQ#r!+h85&*=x%y(gd%KD+Zt3spDbRF=%M6V^PSp zJS-b@y=~d;LX1-!M|lHE5Yb;pIopZ8}2E`P*WmD5_fsN;q<~$+E_Z` zDm@)k0+h^5pop73VyN}#2;*`a7SwMS8r%b%vUnNtf5sr7Tm1x54hcnkq|5>US0 zjqz5$16vNfop^2|qY%FiiAYLeCZ97#ypi!5pK&WEuCDBI;gfaaF?ertexgXw-SJO5 zB8%NKvugM5m*YKrqVR%xd`WX#AwhHfTO1vlJ;jH)Iz`t0;@kF%OoBe+UY8yQp|ze? zCC>+TnfMKYHUbR>VMZMX0;+P zZU=4|S}VsD&G$ZvK5PAHQSnwXw@vlzhV`gCCo#beD2rbvKnLMVjgIcl%KvhhJgm)N z9bi1lksqUznGfaBw%K|-I1D`Jp0hdaJFT{yz(l^~y8%t>;7P>d(!mZ|S{>nv>{-pA zED%PX-f1?-5~kns`w;@39LM)CS5(tLf;Tk021t)eP^}oDKHs_TJui2>mb9x?&rP!> zH;y!`HjTS}^xRmC^Fc1}jO~*0QRTbPOmG<>kR3OEG$1ubo8qSh@=T5!t=LY#eB90U zQ69NOe!~JEJB|Rl9D5EF?~C%&qH!O5X+@Cah9j8vyau7#M=CBf3|n2*dF9JowiMkY znM>lcyvUBaa8a4xQIL+|J887?R7?JKnMLyP7H-`6Qls*nQLCmuRg08PnJ8^aJvDDm zW%84x6SD@zH&^{5O~W&Ld4Ox>yqn8y@h0A= z?0>ib1iw`uB#i;SDj9n9wx;hI0>3toDo+`_G+Q@=6YnO)kW<>S#5IKS)BuUznMs*1 z#@s*C+O4Ay`$5b4+pyB3UK%1J|M|2qx~U~UNf+^z0J=*ht`IFQ9MGp*s`PWQCMA{A zZ73Yad4aX4tVrlV6e_V@mJHHA|HKG3rQ`4b_iJ{J;`I!ZYBlD*qQTKOvi&YJP=_4I z)A{IP0~16JC~G+yj6RB)o3r+?nDlvqBymR_#7{bYt2H>!9+pX>;&Hs@SSBduu2U>A z%UWkYf+bc$qjny%C06Q6skHj5to^pfuq%W|J-#e$a@B|pJCgN66=Tx~)!m9`kp=HK` z+a3zjaG|txP}pG{6JS&MtgWbT#;RXBv#2CSoerb{(#L6&wf zR#g6+HQ8i16_0d(4A`2Oqg`)~Z5KFzAK7(gS{CH)EzYBnKQw2}S8zFBU+#zr)vxyT zX!dxy#4^o`k>PCkn;)}e5$SC{wFRhC-QPNY%8 z-P+RiILGegCya%1g{(i;67y)FCW)LGZU!DMc+MRq`Xj3%-{p?)Ezc!=iZ={ zGp6%&TfpWZwaOrS7?9#vK@5!NhK6amFX(1JJbYJ!r~>*>F>!e801MaP39Fh@fx_gK zwqrVj>tLYBB6Elc%0Juw`sa*uIw|&(K?YGLb{2YnVFq&PImsma>h%C-V}n|{2{H+M z->THsD_63knuKi$US=y;qpl*mIgk|&TsMIAqv4pSW2c*}QdFQlF+!^ii_HB&QB3GI|2Uwp{_xI@m zJP5aBTg|i0-2mqcVN-a7SD^&L4(VrQO1}9;ytYotvee!n0+N-V5-L%Xkw&*%l+?`u@i$_RmB+BMXW(N4mJ z*`Pczj(05`wk+BWjb76ay-o;e+{YvxdzVf_!fT8_KsY_H>%qG|MM6Ccuso7J;^oqW zU?{#p)a7G7^wZyk#Yr)!J4lUW7op)?%&0|;YL(7u*l5yAsMk`~%+|1}r4&Fctz+VC zZ?L;d-FW>X*Z{%PIPu`^g?+11#%|*zUEA{2nC8LPvY6@%aR6;m6y~K}eU4kL;kHDP zg{a1>risS%oQ5bdoo;G39i-p+E_2h_)c)8`{XXbo25zAEgO$Dwd9XkEv$Xl9qm658 z*}-z|V4#?$eqX?GbDIEjw?slwo>1LDVo%95)l)yE(EONt1IDd) zbVIebIr8ry0#8Uw5svRTBk)V0?Nn8A$#wT~$sQMa_%!k}iUY>{IS9d;BIXnU2$gon6)a z-`>6kdswe@sV{DFryS(nDoiU(K<*U!|m_xlJ9E#%zrTpnC-=!U`q;zTpAc z={!}kev^`()hssPDfR$+{va@B)SR{!x+1FBy(HOHOGns7(SqG>W1MQ-z5lpE7n&e1 z{0hu_*m)WJl2I`tr6ftxbMK}c;e`W_Nl@jB@5v~Q?yFHho?hop3k&{^k>v9xjawx3 zwc6b1kJ7H+zvr3cY5?Sd88+J=)KL|*L9DM=cDH^HW^Rfg)FvQ4TIJCIe9Fj#H@lxq zhitv_nxHi_yM``;0xiq7eiMX1D}DOLUSnIHP~|KN8zAJhDBSkJd=rcVX%Fd!a4hc} z9TVj#m)L^l?I(FU7UtU;!*99umM(7!!>g%xKSVGv`49*J5#D+^tvauF{SlcP^)X{2 zeyxCbg&r9LOtZKXVA|&-QiT6doIQy{Kfy*gHT9hOa>^)+2& zAOtiCwXq?aVDk(`rw~P|d4bG_*XevV^g_Swx;^ovfuOJGW?&Z3^}9mS(zm zHS`H)TsmmM2;x5xvN`==RK?vc*S~yB?8BQy=l2G?vp?{9VB|5p8`g!~ffE4t1%bw3 zBk5{H^OCX-wFAa!&E26Lcq$T63;t0@#h$v|wtSRKQ6iL~>UGfR#r@O1TDVgQ^(Cx> z*6trU93I?-3ND3eqTG8gHwXUa&6|cNzD21bi~Ofw!zz|5Uce`Ft23Z?vzXSAyIt760&=Z<8D_kgue_`br@qgqwb3?erySEH zRdjVNCDwA}MST&yIJ{Aw;(lG{Q%Y=PAP?}lUD z4%+DNZef{G7mfL@$NWkeQK4YuRF1^=&2#XlDdZO}?g;3#$qw6obdpT7c7i)#(H#`5 z#c6Ed`f4if#~rDluYU(6uBd%=9J8J=3WJwme>U>_>24M6OO2Ukum%7$^u4xh2u(k( zLs#edJ{$KG`D><&PD^ByC;SRq*3HdQ!9uF#*XF?fh3F{2*IEKtB%|8|UnDCkjlatN zUFnjIezC!>H<)wHJ0GGNN9?whXY9$y9#M5*@np%ZH%_Nj_U$;;RBU#gVks5R4!sx; z?W7LOaoO!CH}R2I&bw@xb4A3i(ysrIj)Cxzhgc9bjUh($_4xxQ0YtNRK5v@Q@Tkpx zLEDXO8gzf3>HMH<>vChq6gG}{AetOtA21l_LG{?`wD)6?T|{ef6jpyJ$}r!R#b}?7 z(z)4g#EVS%r5)stRN_{9h2)f2`R!iDW8gNLMD>@L#4n|dEt|2_`As}@*D;{4nJP2V zIXR+DXY|9*t#{Wc*BcDKNS?cTnWbKBG5!6!GMalIz%Q`=EM;KG~2vgd4?67>c~W~Z}p$pePXG~8v?8o+WR%Tt*@@6yi> zPhDIsDcOgS2h3Lnqk-;tLwhLw-~wS4wFtKp^8g;}iG!9|b__if_6IzElJ7fpf$PS( zNRy;PM$ThtZeSCi;VCF-Zb~Rd9~cNh^D3R{BB#3Qn@SSz8&$@Id+aXBOlwukau<2b zYnIm})<3FstDJJ+H6KS947t39EQq}gQY`Xj$m-jpVw%)01Vz z#L_$e%YyXSGdS?tBhCAr!wlg6QPK3#CtB{-2C~p9TuF z!v@d$_*3+Q z^f9y_^H4~dia4J6zg%;NI1lltA*wf;UwwtNesz0_)Wl*b_9jaAOOV@KMd(icB9C>r$F=tg41JI;#s>s z{RaX9KLY9N8ZPcsr(d_ip7Kd9pE)VlLs>(NsA=uj^RwVaHWryNe$}T#7 zbIjfm6TLDK7BXsFC$75qqQW5_-L4d+; z7_V&S$34vyil-!dMm&%j%vqja2qAu%wV!E|nuC4Z05M_3t0 z`sghGh&TdK%pS)+{|#pfT8ld(UbhqyXg}9FImFxd^{)aXhEWKkK`%%g`QHIG&ytX{ zE^IwtwK|@z)z@z%{{9c9M0<^D%W~PQJN@5fnnAcziLN;im1x!W3t2ikx|(g{YJ-1p zB=ZQA6HGN~u>6&c;PFj^ZA9zUln?0lAt6m6yY@e3z)n=@eZLdhKW`^{D&W1>i2Vx8 zcF&2PSNZqO2WU(5C^Hp>d>8#oVAPk7HahzD7(TrWEgW6{!{C;3z|ivLUoSRMkPOR6 z`n-R9e6-_M!!eSG06HsJ8sH!1CW!J6xtAFMJMlkuFLJbb_q|$bbS}Aihh;BbJ{aFW zs7xQ|kE=3S{$mSlXZ$#9`;GGA4#MR(MQKtZboy76!-pu~4vj#c)?Y%xl>uDC#*dP+ zw@#_PN2eaoh>88+9)axt3>t+%d=#VqNU`dlZVEfc;H>bvhGinEp7?)^PAEV+Y6P$W zF+zWZH@6FV7ZNoP$iEzauT$PXb=^Pw&(&5?ly14g3LF1e7-@siVq(;f`@ze7zpIEC zmUea&{sp*B1B{zCH^{8hUq4xX+K94E!Q$)yR!Oaj2DTt2;>ThxI)5x~ zFkM(&6PowtpIcU=U~F6^Uzq=K90fgpw6?Zx@e7s%-(97DCB(@1#c=RHgYv=lgp0;p zpCiD8^s;@$+9j`OX(fQQ)-V4M#l>+L3HfYW9xb_gSvI=b7y*nM5tvc+uG6gad0P?6 zxvF4G>awkfByqCa^zcL>k*;#Ai%Z3-pA5M+WhBR*4F!2!yb(lfTkoWT2r~%c`VJ32 zbC;=7HJ?R)WuJSMgb<>q-)&AwJ_;%_dJ0mfxFKl0f~$0`;dpNI!pN@o5%OFyI*LFL@P2j8qWZq` zCE4K^w?@8eQL5%!+rO%drpIXI2wkW$3vu0BPNdZ&k@XW>jOi{q#*GLJXV2i&yRTp* zNTJi-)s?S?W?8+1_fTNT#W%<2IW+7J9usrl$9A-*Fq#bBN`D8lTWMrT3}n;D4fVZ*gP#!+zC!9&yTxg!o;xqzd~P zi=~~$Y+!>P9T?FZ&{ZtoB`A>#=^s#iY;f^o1f`=GEOLD>wsps9uJ=J6FkuFdgH(^{ zZ=5HL*##r<*B$avIVT3N%Sy18!=jPx!bhhoTxWfk{&Sc?K+VlY^ZoTti*ldXnq^dx zT-0^0cZ=~s*$P7w*BCu3{jM=Kbp;L`EJK_VMUTbLfnrnyxBFcuZ==e#`wmyTEq;E+ zKAzMy`PR{)tIetzh3pBVyDy((E~%${1FtXC)pxX3*95^8nv;+p)Vbz&I2ydp)r*Yk z!cQ8rVjjWU76z((*Tss|nhT!~WbjY=$}66w3$_U_G7)XvGUiy`ymk^fDR+Tz0;3iA zIewhBOvxP>Hsy`-DsMD@-Dij7MpL+##m5_f<)yVg^AwM2l{<|w&-xDkY_m4KKiJpE zF}`ZPwlp{5q~mI7_C6pV|57J4gaZ;2cs0AY-0B-9B6czQ3c6EQY}Q;hhnt{c#BtuS z1R;~y@pbUqJ6$v4IxbUx6vL;5ACPgf#WUMxmC<@=FV3t-`@zG?sT{Rgz&{PM=Rwr3 z%GR^#;ZnZ#SGaPumfvi*5t4*fr_WkEU+yotT_@Z!`;aI{QKVY zyS8c^WN%J5nT<*_Z_YlLFl)T&>#uU2CL|L&Gij>Qb~tJ%ckMV|XS>{K2(XyqSC#)p zcd6Wd4j4@Eo`fc6^3?ebOSQSKCF~wVW9!f|gFp3+gg*7!i4B2YB$Zu<9$|f#V%jeHakX@cQbh8m&t=Bq5wyy`FF=a_VkQ!FBAVX!75LLTzyZ5QUvNdXPMdex+) zXO7i(mip%+sBT}xQHp<*BCoewVS}X?1{jHAi;=Tgsiel6PW4b}43p}0IM;YR30-2+ zFSCuORR7>j1P|>_+-H%D%Gq<4v-R+Q1@(9(#s|!n3a;@LPO1tDKj|CjYfh>{9K@|^ zOK$YXGNrk(Xzzf|rKfF%)=MNe=DQ0F&Q*<~<8!VR(y=5>DyjS z^Vu~YIa37Q*GbFDepYPQgex~RhgPaFYt<%--1ah!9o0iSRQt729?7Vi-T-rP=)tc7 zvZ~VHCVYt{fyRge@(`I=%BQXp>6vJf8ik%?SR*uuS~e zaX>ul=*%VkiCSo9ht#vhpt5Sr&dQoaGo^~3!g8$e?Gp^AIr3dZUy>Q7nzO9?)T?+@jLn%W8nS zh6T;>7CUW>4mh&$LO{^z_T?=KO?P}oPlHOFl|^>&X7BD2;8!O^R5zD>O(Dm>y(RM6 zT|G~Gn45-x`z*ha%Iz2oIej@f{qI5qNkafViRn@eqUUL;xDiT~31`oD zCJ=V|R*P0;K?P^eZSQAvL075k3pq)s+osn^a+5eEyTooJeJI(D?<+JVd?-aP996x} z3#NwzMMYSYY9XWV5a-pAFu1`p)bDxmGjM^J9|~)Mm0~P7xBfe(%Wv=;(<;bVgs?Rr zRw-beQLxP_O*<^C+WqGJWl4Y!QQ_?5m?Sn>{~QUU0!2xuy7{Uaif&RRoeu9mqad@i zicPz}QFR}@&Op|b7o5uvd+u6d3)TbBkfaG^+#L;K9x)Cs{s~kr072x==Y#GOeiUJB zOV?a{She?F3m1G6{~48c?}o71ehFst=u4}OzALY1Ak&Vd45NU?vpylOy~@w2E)(5p zw?lo+2XPr>UaiFpT(5s0YsbvYK>Zqx3XG8En9&V!Rmc_j{tSLK@`=}AbMUSfJG=VGkWD+b==%+NoDa()%=*DZ8-@M+oO;#*;|oTfWuB0H zaV8S&2HpI=s!gL@8bi?gsxq~qJaOpKN%m7~m9sNwta&cK%d)){%<2BBZ-as6t19r- zW$AU*O(`ufwm#qOw>LOd0HlW6I)UBBXMLY}>J|NNzMeI{US`zxg;#eJs7t8nK(9@+ z-T%%2MUAx90Fw_qz2{lNd7+`;Ty^x!`c|CjGF_toSx?@(7CvJmXOjbEY1OQf>k`GN zoVZ($!CM(`d;@w6MQy$$px8waD(~{mK1s|ld6~a&aQ-bqSFn23qzO6zhTuHn<=wtb z#%AGHCSy@Xc#RJ#-k2fA#LZzpkH(NGr#hGMlwm$+;L`wKM!Vj2CO1cawl0>l_Kk*i zGaK8>5v=2DqEm}7Vo$!dYrN5e>N~IFes|Uf4%b>|G;-YZqSaj0j%X8XH!IrgG3ARo z@1!_Pm~rs%+@8UIS;_1W58gW=#8!$~mIVUuQj6l_bE<1k z5W^D*d6u(p*vAJAlLWcWIY^VyQIzse)uI`?@2{zw!9Go-@UK6qI>ti;&Qs4*k?qUD zA2Y7ox4huf!6cX17W!`&$}(ld2*^0S1+~JIUv8za+6zv4DIZ-69u?`x+%K#?{kAsF zp=;HEmZC0ubFB0{Bd~{YNe!*Ijp_bZ`TBfa``I^>+F;?RH({#GMuv+M4|5q6kY})! z<1aORzMUJtWdwDQz|Cfk8b=)`4ZI3YvJlbTv(?DHzMZv$PjR`e0R_npU|8OOAIBP* z6-NStT7$dtUDN>inzInvvyg|=2Ok;K0zZ^@qgo*rk*zZ#)r z&K3M5=VDY7-n5_{`JClaxStS!v5f9$on-f?n0y(K?O?$FF*4x*MoSN?54@W$wZI-Ni-wGxZh;rmGS>*l@g`Z zQvI5IA)8?odl7Mt{gK+i&SXjaDT-UBbA0>;E$`U4yb)yNL+Pe4CyIKO);H%*P@mVc zy8QcsWyqQu$1UuOP-!YqBMYfR5RFG^XVPJ8_SPE@Ihjn==och=;Z%SIThiwh^^`NT zT#HvfRiD&ElL1Q}X?0J%YldMs3N{iNEn72v8Vo5Yo@xpubdYO~4b{D>>uHJwxY@ed zFRMJ#F6YQk@AI1x4G7AnOu0AQJOrLm-`y>`=N9ZlzJ4rDoONP*jd}T6-p$-{DyM^H z+~$n1s;qp5DR%ec+kl4v09Na5ubpRkfK$z5S~wLqWK^1x*_`~IL~yNe(YkH4&cvm za)i$!?7d3-697p&YkE+t>pq(Q-8$grX3N%>%fIMxZ*Uy4Pab4Zomym~BCU4$REtD^ zV1w@dK|j`E0e(h~|2rn`dX-z#?Bg5rTbBE??Eu-lqF){$SV4b zWdtK25Hqo!z;7_RO7DZh?0aVm4n~};)%dOem;ZvEh6ke;@;e9LvRXzQtD98|sdjBa6 zO%#gR>m`a=y{fvdPTQD(QlL^`|NA~r8y&iOTeUc-HjiOxv?1 z$Gy99!kIw7Murm`tzVD(o_J~-4aezj`n5eoGPZpV+PCc#FZ}WQEo;QD3%yp6ZNJ)s z3C+b?;cwV_=ZmH%0UuJINe@tc5!LZ~#%)4&K5M6Uyr~iHa4qC3W6GK&&BVuozi6`e z-GAMu#K5G(K{Py-8_gwzGcYZfs`|i!Mf1E>B!Fk9zBq>iLX3QFJ-LS6dhzq=%e)1zr`CS87TlfMnY!ZIJNW#yf2-6Bg$zn z{YfHfMJQCHk$O@%VuIymNW*}Axv2OIYO`j>m2oz;b`=W4xi*}K5p`+fju4h{&xzk6 zz?w$=vJzSI{=KuwU0Z@}_C4$Jdyu^|U9#a*W@k%SdIXBBWaZa4qRRSu*)$oUlif#{Bwt^Op?j!^nz3vMX=0gFI)m$3) z<5fLg(J$$%mE3s1@|ZElA)QN0)LB_sJ(ozewP_G3nU3)19kB;#9j2feTi&;2Sm12z zKGm13w<&d}l2$*uETWzIg1;x;h>_}g^i|wfKZnoUgG9~5Wi8*wn|#rKAwnYCe*%qj z^XR5n@6EK0SIA#Z45{s0kR@`?9eL;I`F4LZGzWr9HP*^sjmR^D);gNXl@xHbo;ogQ zPwd}f?tEt!#sM0(29KV$rOgN;1Su~ai+|*uIvPGF$4kzCbx>6gOWA76blF%oZdb8G zVKp98FxG!9)!jd+fODGiRNxBJLA3xb@z2&%>sknKGEM5-@i0%ebvun6hdDDnwZowu zNfoD$OL5Mn?5+z=_f{GG&|1bDXkW+Olbp`}o+t6L#-lVy*1~`5H}pEep~%N2{P^4# z@$kyWBMbxn)+E>RwAj;c5BkxK9hq36buunKaQ<+6dppXPM_vAIQCUgF=eOKTFD#IB z{RvjYbCxS3mL~4RwPjF}#q9_B$lOBT5_>lK$PJ%BQ58SMJz1v3P&Kal1ZEQqKFmi! z%tzB7RbS0Hm#7($&DME=0FU?)-;#|^q9T3N&@CiVKEj_Y+(=VNXLo0Flxg^Tmm*52 z`I2`fPdNvw2!txy{f=*cysS#^Ka<|E6KPNnM0=k307FS8(*UgUf~S(R^j9I4qQT%_ zyGq(reFLB*^Ismgc<#S3nN_hYWy4CrL&k-xLhU~QZZ%z-tZDRj7oStQ7&ycUr7yML zU)7#<1^GSy`|ftD+K5T{i+1b*8%f>+jMowmah_{SV5(KL3ZSX-@TT!g$~bUG6+FOT zQ_V#AsouS;qC{@oWBcQ9t_Ih=OZp3+q3Z$UkKuG;HiDjT;2`Z-xFchV)I%J#PD%_E zVrR(ALB|;%e0t8xnUC*cB98$bF%!{_^KS>7V10{0>7nSS( z2=M?TTq+Z(x&PKxEkMrz2`g1!c&hX1?+NOn2lvgQ8Y=1Ge-GL8JBNo!I3p;(CJen} zPYQBCsTTD_Zc@|}H~ABXa{e3!tq~rc9_i#G9v7~dh>&h*|0g@ct4`YL&nsT~e>(E~ z4NVYZ#LoI8SoY`WXqPW}Pr`qq;XHl+RR6&fe$9V6qh?fV)?z+EOyjmSdq^@@4gZt) z^?PizCv@zq95oStyM5S)f!d?u?P?uZtw1LN;P03o{a51G?*~!WMO+?KI{v-xeHAurMQH{~ZmdthoBk@lbMKK*ty3`JyNqDG0>qmZA1LNuGnQC= z^g^0s_}jm`I)7r=JJ`Rm{q?@qVe#irh8f?#U8{6`82qVE+iqH~87#SPLVRGM=B3Ed zD(n}=z`wjw51?M_ka;(valZLNBh`XAlU{8Ys;RVm#PTT^&GpO~{f|ab1~~TU2@W0B z$+ne^*nLwQ*9x`T+XHbLCwZ)a_bqj|;|UWRbamR83zAtk4SIUgWFLj=*^Fk^NQ@%8 z>hI>4GmWXjskm{5hgGg{KeGPU-k$=`Rs3#$IezAF>yJ8dxtJ~@r>b=p!rJ?T+X!w54OQXz8?L=0H zaNaQxA+3on>B~%w!hZM_a||`C8#IqoAGZ%gV`O;~Fw7Ijyl$FZXI1yt7y{9HI6M2tzb~ii=Kj?P z{j>KEY^3>vwf`w77G<$E&lx>zO2U(@^NvVVRmNuj(?et}Wv z$3GkWpEH5Zk5Ta=^12iFpD+Apo6p~1Kg5h-bWYKe|KG;{-P8LL6%Pas(CsAub>^ST zqpar@%1)DiIhZp1*Mt9&;6(^(XxAGFN-_S~#6KrPJ?H<+PUukpc<&6@-47PNq4qM# z*2q)Pt2XZsK)qj5^19yvecSG6H(TfMD$bmY-baWVv|+`PIClOPW3PPg3qynIUqyPc zTCPAN!s=e6?xn4ma6E9JaPKUnuITKk3ZL><3~*++p`n~Rr6@NPiW;d;8>w{z%*+al z#WB!};sMV0J58|eq-B9D#nX1-!+x0E0|U7Mry`Y$0DtlC^UfY4&LgoxaR0;xaa zq4{?ij-Pgvi;Ehpau>Uc+6q2ycIxN(=3Nnw);rnh=^GS5KmHeTP%4j$3q;Loj7&^s zs2!VTs?4zPX~YaQ{Chx);XeA6H8tvJ{a8kaDG?!YpzUlC@Fxte>VucHmM)YR$8pDX z=g5wuaUX+2h2`n0^9_zCsdMQ1vpibZ<@f%~y(bRAzxzH;*gyTg9B@f(L=gv>B?4&4 zocP09JqVoe{V!R5Aq?~_Ktdc8X-6SD%8yh}vI@&D5J>F}kJDEpSc38~>U9 z9~)6n=-g;VPhdC5e-uH%XD?xBXei%AeC;OuRi_()1zX+tVpnL;qq=qVfsp^H8$IDZ zh3@Dq1g{y*z5eVF#9FYlif*EHR`DH`_RQ`Ld~oA-_d0pPQ>txQLsx}rF-oR}aV-oF zi_kFd>(7zAWwk*svqxfg%K}xm5e5|&S>k*>MKyTu zz_CYZA8(Wv;`TGQ9AUc8wEO1cT1|3Klgj^k%Xg#B4R1QsK0DZCazKp-%mFO+4yB{_ zMPK#W8K^=#njH~&bOJ*6u`-jfP1mC zjD-jpJ8`Y{jphZ*1L#$};;7H>7j6%VY!Q9@mMcZZFwv3sTlu^%_|A;n9xzAM)zv@_ zoi!^hhCq`=gKO7S`w-{53ycfhBf$I92W<|`?#!57yul|2;|q-e>yjrF(*{%$1Ag_U zucVfb+KwPQ;=Ey?wEvAcaiM|z7%|7Qkar>Y%qFN!16T=be>DG&wn|> z2Jq-k&wWB$L|ko)+>l&PsZ+uG!xOQ_SP;NyctNMSCwl2tnEz$%siu%J^7!Z-;-#5d zu@-ZhAE-ORPWPz`FB)D--T17DcrBi31>mLFS@W=%=HiHxFHLwdx720vVr#;ZrL;-& z*UaP~g%zp*(TDwO!HGADhOU&3hE3mu*zLBKzW|49>BJbRvH6MI{!GJ4>Jpp0zelpr zjlH)SG)vo?hq;`B6p_`8@WYWqA?((%>T$&xLsAJ3}HxUG3^xTi;^%@Lbb z*%sqxTR46Ffq0k|5Z5W14HL7Fz3u=jN=Hkfuv^|Ma9Q|GeVY)Sn z>)Gse@KmM=dF#|*w%vk0(&n~AF*7`5`?_mwLOpE~Rqn2R=F5>Hq}Dbxq2tKJv@+AQ zK&yB5dZ|4nG@$(RYKQ~$=nSbzXxN_JERWO~etvIVbF>>0@N>9VWmc^He*T5%xz`}+ zwPl2HCWoC0YlCAi?d7n;XRVS5&P~;>;UT-|bP(x7;*#uT-1U*_cc=yCl&2m(RcE2u`q36la`2|M(uos9vT(JzpZ1p$?mCk zEWFA78fAh@vJOY@p!J23i(HOtz4R zoe)MXF+{6z_89-5cpjITLVa|ZD2*3QrfF!Z7iVbq%gS23o`jjnZSfuOVJ@3+wiS0c zQAV{2*{uGFP(OR%6QIS)WV`(20v_YVJvU*&8=HPlw;}F>y!EMly+_|xs?<-_PTenU z;NO?qd!17YMCfT(It*^Pmx5uP$ zr^m80CLxt8X>S!&O{2Ds#ZpJ?&lhv&KRUAuj5vdsgKOiS54{`ozfJ!AElzU%rp#q^ z)j;Qx-_;sQ$VS=nxc-dKdZCI?;~=+lwrnMX0YHk?HfuCQ9fik3@z7<+uCo=R_ndxU z0jnoKwb+gFZiX{CEh*^4eEedie*vk0dY{`9KZrU*3zmK==rXJGc7X@)U=7dWdm_&J z+l+!YTg10*Qs~0`8Z5LKP9iUV9DfG%tWNa2xM@Cr0{m`+&I7h|d>Kf?Bzz5@jX2 zR{2~4}^BNH+Jp5!O)jAc7gu%+Cz>6}`;_kC^OG1e_0|0onDb}2$vZuBW9aUk1U zO}VW4_>0mnGtOrx(#8aUGCJR@d>2#t+32+RFL5pXp;0YCc^!MY)5ZGN94~z7$rYD; zD2%S7Mud$*e9ztrm#nDrb8{nXL|Kc(n#d{!jm6RNvzgR+l|v|TZluBap)KyXtjh*@ zD2&D0EM#x+Q%Et2uQ4o;n}0TvOtZ?yl8qKgzPEI{@*yLC#&@U{gQk5+u>iZc6@v>Q zaVN9MK2v+mqY}Ddc{<1kb zzbUfxqOaB3O7Oys%m|ZW5=G5bY^T@3AR0=6IxHp*YF+CcZO2ecHz4uF%vZrni%@_| zcBGSx)CO7Ow{t#Xga4esitGoMT>N%rWQqo#%CpW1pxZFG=(jI{fyNQWszu z?4h3plRHux87=iWiiv{s)*5BZb@-Xn5{33Kflz)A>ylgsJJsvWPct&>0`vwfT%N|p z>f|fCJG1qEUq0+&s+_)g!l{r-Zj6%9?M&eo)p)0gy{Wc;}}?{`mIxUWz{G}NA?zsuf9Ek zJUVr|p^@0%XiI7&r$4~l$<2>V8hK1!Hu(=;O;FQ{K5N&QQ}4r^s%Mle77-VvuxO#& z+sFKB)w9-Xc4He)1sRFRx4tirTWwZ7EF$xYOfEXc%qk_&fdp98WK(GkfskTrT3@?T#a;1T@pK9& zcYW`DVL^RuUbez>duhos(K0OxKPFGQx4!B}`GrjD%IPMY>QJa>ZK}`k+*CzLH$d%a z{u)r(#lm&nK0ij=!3cTPI-;fd>RChw>~8fk_t$CKl)%X(3$vL?ww4<_+xKn>X@8JkSuQgHJDJ92rF2YbZ{d^3O~N@3YgnP9a^5(vr$#xLxJW zlw|Z6(hjFs&jMcS--xHYI>))u$=8FKDAwUs@@$>fsSglf2XXu2ohDAyi5iYOj5|B| z1iF`%i3}b6P`xQt)HO4eF7iBtH)z)WcvS9bPbjGo(IW0axPYqGVk@ziBWUG(IrwP{ z?iijWq1u?GqrD%NrMlX}?#qq)8=igx<8}Ar*EcA>PFK(Y$vre`p#h)Fy$J`uE+2Zo zI=JNNyEr#CWnIVe2edoBvg%|MbJ0K)qtytKTP;WbrS_#Nl#&J)K`C z%RS9$p_*TTAgqUeO;RT-y|b|14Fu~oZh<^0_`YIwd~5DxUiwQpN}D7mN7`TD4|w%v zE|U5!B?dC)RKXSK9;s@ZXHKf{R8r>WP)L_887BRAEq*t=B+h4{8%NXdmbY$9F=rrj za#s!a72W~9lN^l^4gm!$^5NMtSbz|H-o!E{5{F7gN=`y#SDW}Y^z_c}QDtX;is1`m z6M7n=5-2C);GLXe)RP}`&G?@`m#l=6U|A@O8_idksy7J>28f5SE=TT9j35I~4e=mr zdBkAKZCTRN=bQ3W^p1p56vI04I`o;@g0T)7XrdUEdesJiAWrzEPc11Yvtkxc~H z?aj{%Tp2dOuXd2K1nuTO?$j@Kgg0>@)meNu*9}+~q5ibWu|`jrl4Ua0_~O)TCd@5n zs#<2-nBK>3^VE6k8A~LRz4<~Xc>b%!J4A%xmc)5`nA@luupyVO?2EX#la0OZL#+0{ z3LCJz4wB=Gs^P1!L+MvdjjNT!&jJ3oA^^Qc8z$%GKb$%i$i7M|&6*8Ioj(i9c-OW% zOs@5Pa!;#wn1DJX@OX{K!G4OSsSZWa>;}f3*>NU7nlyu}(#SV$^O3AVTPvA*7==$l zesqiCpwLF?8K7X#YUEd9;G)F)08}qO;D>XJM=m1EIK*lT>E`Kk6j-Gl6w$Tf%_I$X zDtGxL@rgCNG2Z+bvLkh-l8%$y{xA?}^Nj10$3jgZq1lZvaTYnLo$`mkV~b`38TY`B zoB;wv|5OCo(VpGW?~!ctL=9BH>Wh%&i@Y1R^(MvM^EM+_e3ho$%n>}i*vQG6T`2^=2P zxcQaO%{FH)itj`8;CuO-|G+ z$^F*J^vaWF4lFmVZiGX+l?Wud>*PQXU;Nvh_UF>iUTLjD*|Li%=MjDnEu6@uYL368 z)&8tACc)!&O{_R@0r8lOr(k}m@;r7`WgpR1-{#|OZ4=_6#ze&)}~1PE-O9nSs66j z2%TU4J*)B|iS5-l=**vL_K!pXlUmwriKG1Vs|xzZ(ibMW zx1ADF>WP9VhISK8*Mh;fRUb9{?`YNX25NRTcOghiItn7ejcCvklOL!+q-=@f8*Ydv zO42+|Y0mYOw^|sLJyJ|9DNN7WbKL05L!uS%=bK3Jt5b-w&vWHEQRFbqF++JDU+_YY z+c!5$O>`UY-F=#0Zm@>nFKAv-%m`h3^yszyJHW&dWII#c-a>UGY&bF8K8~(HCEvbo zPAlA2Aimvaii9GB1_I-p4gM6Gw|*LzYzHqAX}>9xT13?4jiL5y$y7G6cE{*{Uh> z4c8v(t!u|>9cq*0qK>1?j_qyhKN8L`E8vL!+-WD<`V#u}5oV9DZ$4S{fnb zDq}ThBG+WmdC%HUx=E+JH0B=S(K&sp7R%l5IJ&L9Hn=0<2VYwe^guaACtmU0ii4}G zL5rb60S+d@bPU>T1&_ukRhy0|R&}!jcj`l%ds@F%yKkSI7B7E6492$sE&GlOt}hEU zE`0!zt~Q1;mV9z6-SwlB>!toM%D)Qi+7xu&z5YdE=g9tYE3r^LlB)JDX4hdqs>0VG zD$1J2?%EdS*1r*+HGGhHihd77#ZFUQ7;7&}XS^Od!a;Y35C63Vi$QXdVG@-TuPg{O*YKFXsaZ|PqQhsex^o21mbh8$4U zx<2!(LwVvBYOdZ@v8umR@l{01M0u z)z7oAp}Y7!d04Er z!jG$I%oZlS{8-)xqH_uD3rF4_+)#ce~kH`kRBQ7B`9!5jCl=LX&zxX*bD^ zu)OpRX&v~S48ARE?8GF+W$Qd)dl08=C$wb?Z{D#I6EgjG8Y1YW!O?06oY_1&b3>!9 zg{tmf1@U=5wzKo?W0kDyW90>~AyPv)g8%2aCUs`gifZW&pzxKKZ(L~NJ@uc;&9{j>NpqLdh$)526I z)SB2JfU`%|DHKxthq}-Vp25a5u^X_|=F)O5*(skLAHgh2GWZ%|d(` zLtRQACVB_~;16S2_Ui9nze4r^zzLtLWtw~0li!*l%fx1xh9Ig|wLkIVf%F4qAeszN z21a+;X2kxo&R=RmDjXn61iE*>eIN`d#{&%(YgjR%|9i?`VuA-Tph?g`;-mc0SMQYx zEkocqv^yacP508mpKk&HiLxBdfeZXMj=#Qe&w>Xen@Y~1(_eDTFCFDqlPNqOdG-8n zxd8VnA|#C}yI`VD+UD(i3;IA-3Yym4W58+AXZ42g5cQ50_s}b>p0zt{ndU#Dvj52A z4Q%-1%VVcj!`i@qw;E86`Q6*!|D!YBOEr(6fdaW+J;G4EU+*LG=c`gEpzES7Vy1;2 z^hl^k>X=IzSGx7V>-&(!<~@qsiu)0)SMjrNuBo*|kQ3jKPo&Y^_dztRRu5j#a&!d$ zu6uxGr^|bdJxz$X@Mb^&Ja!m>WK6h8!{sD6QlQy7PSc-hFC&H3$Bpkz){Z{e`&9Ix z$E05Z4HH~mX}drBpN7+j0cw=qoOyNhzptGy{HNCG9}4?NiTyQv0KKS`=H~_4woI!$ zxIjTPK)HR%gD?K?5!YG{97kp1=A-e0A@>Xrp>@O09sU{QzvZib;5dfw&l3LM1^mxF zVBBv+r+Pn#`QANAM?L|%e+`g zHZ~psIkl7_x+E8dLL}kh+9Z-t>rY18Wd^pm2;9>8t|y-5gFwBM`AOKq8`EmW+l0V$@Je7 zgK}vrkeTwOSe^6b%a^$rEM~KQf`OPHfSr2{rbf6tbL0w~@3-}UzhDB4MW z2&_m%?dhra9f==20HAI__f6vS%#WGNo53O`^LGfzpbT2fmJ6i%*L(aW+Nk@5oAA^# zcPt|eOxy_U-scxB=T1<>c0$W}pq+p;LBWPkCd2hotb~BRG~^$F9?5Xq>kpUz)DLqm zp7ILCls)J~jAf~P=V%eYo72&@^avFxZYYM7KMdvyvd~P-RLdS88c)!F{a~!0VZFON z1x$%-U=Y&%4fD!sRfN?2_~4)oNWgiDf#D6fj-UJc`(txCy!!n4bFKh{7xXsZbXilB z+^PkTM`iv3a&W%y@;Hiyv?Hkt_f2KrerE<&?s(0&3|_-#rjHB#LXNUR%|%@&jLQRh z0!g7=Lkez-DLU&XFya0Bg#uA9Q49zD@SA-#KW3g-`wX@Fkm4~ocS-AAJrK)GABbg} zD}mPD{JNG#M=F1&4Fn7k35iZ%N8F;yTfmX3k=3IXK zkubTBMe5W~c}`y};PubnWOsP>+)XSL8ez2jorXQ$z^OGkVgMW43IR97K?k~2Zs`8$ zsba}Ot>xP$Gf8SjifxWv>!7ukV&0)g3lXj8E}<)j^>LIfyuUO8uN$0cO` zt*-=ULmOf*b{eo5D@tm4LFVVyR^<$9fvg9I*B|^^(CK%(T?SOy(cAS=_*8sOU4X9W zQA$S^S~*uDuQI}MImZR6<{^b&rB_xHTyA3*zi22nX-KgqaGK|IFEZa1P|>b0-&y>u zGj+YmLYgZ7!ohBi_gw!-a9pfs1;CYfLU%ry8!xBWR943FUzs=?QV$h!`&w@e7xec< z)mGH(_xou+bz|Vi`Q~_Dv2qUD_}J4bOLssl65ImL9pkSiy|`4@Ybd)ZSvcpxX4e07 zsL%kSZ5mg#+LwE6qN%B`+3}-4mnInm{tt0*3tTW>NVcrA9B#oRqpxXcq9vtkNpXaG ze@kHHDi-0|ZBM0T5iWrHmkO=I-`N4%@ugQSsA-75Z+y?y4fZ0Tp8u3&x~p1}m%)*- z*HJ=v=5nd1X)D(oMo7?fe~Tij#LF*G|9u+l_ zD(h3_=xp`J=3}na^^OT@>{tDuKeQT=4#P>?O!z+9(Otxxifyv#CU&refZ)#&_>)C`I zjEaX5Sgx)6#*%*9pT^8mBlP~2Mfc-J?Xg^$T8kymbcT-u*tPYz3#FCm^Z75oAk-bN zyTr0q`*OI0*DI^3g4s)K+Of7yshcA6w-EO*f2Nhp)K?aITZgkM&`0e7#$4Ml?`Df~ zdqqd&it-8z@-^hpsRC1+c8G;zMwc}9zF9z~1w$i!Z7A#`1(|10WIOg4I6qws4mA9Efh7FT~r2DTP{&Tb_z%>(RnG?0=qbCsbmf~cWZi;F<+q&iltrjf%u?$xwJm~2{a z!;E}Va6x?q7!faG2*a}uoSO;DOVRK(OqH=}6F&T!G#i&rEfaeX>Uhx)XESf)5T zhw-^QGJ6r~rf*>o&1@5qCnf>8!1YtgWWj-*DDjraG;rhs{a$_gczI&s^URJcVkTxe zM1?`8WLX6YhC^g$pjZfZ#Fg%&P3q*}ftDV9!XtXRwWM%IYUaC-6(HK2ALo~=SUpeU zyoU)I+`Vj7w zwY6-@s;WAd$3H}~73EgLhB^8=$KTfo3y$Mo(CEBp*e*=%lsJCb5jYPst}WLCT6m%= zX{#Z2>q>WsNOif693?3(iFzi+DlKyXwzLw|mi0ul=v(q}lDeft+uI+P5FI(d@2+c=|fcSf$4Zo=D z{2r_X?**It?7A!q{kw?ox7|&^{>hdIN7wf&1PVxU$wXs*Ac`N4Sz}QMiW8n=&s$tBt>+atkQkdnejIj z%4$?=A)pqYAme547iThwU2wa)4!SwFc-s$ZAc6WmBNUv|$bIKI$BeV?ae z7ODdso}}>6@Fag1!Z`dOYsDw9c)OLi$gGAWYL zdx#}YdXmc?TRn<`!Wy^8!)04UVml3)>~ZO;&Y@1lxN;I_{Y<0COQ-XYeq|<$CN$lA zR-?jm=7Y`)+L+hXtHs&#qFw{VMtOnT+}Ae^J0enpF9uV8pt@4MdFQx_x0>;CJUs4w z=&h@Ggk5|Sb!Jd4#E(k)lIR5Yz} z3n*#lR3^L9zJ^a(vf0ObHW0gCTdWZ*UvL^%8cDx16gB*k@ybn2DoAyoX-H^Rnsx72 zTGH%fePA7c2T@6Y!kYrDa6?18 zodc-Xi}Mq#PMn|dfKHf>9rCCP?*>CTM4U2rk3GV)&NPDlv7n-AZY1Z?_bH?VnjOClqx#%|p-hF(U>-Y9k) z@naohhJ}ohjiv##E!Ixys?dcEFOMYa9O+k_iP|P%;?8n)C83uA5AuqLY{#!wMO#jX z{VA2ozibud2n=V0))Kt;ldAW|Az4pzD&W~HpHn&QY8pU9jGsWj*W(Je<`qWd4e?jE15YK=An9GY{D9wLE?i)D%}}EQ%#= z(Hp9+C=pt;$gdVO+fSYgM0coQxI`@#TvA1lBvUt7kShKF71B5oAM%pq?P1GJSN~$H z*63xiqDgB^p+E6|n%6JD38uQ#lAtUNHb%XW8Ma!o4_@dOFClO zi1di?xM+B#r-a|IQK9BBhK1(E>d2G5Y~%_jk!FJO)u|}0c{dv-ysCTMS(aoWXT4^h zh|eCSt}vf@C!3;lN0YL$F|jk?95BH5!Ab*)Dq%$xW@V2)$y&tSMUQ76UOBg|c?f^Y z3KGg4QXW$OKM0Nq%K~ed%MPySbVRZ5L&11QCB9nFTfUF&g~#R-6R_hVWQ*{JNFeE_ zfz#x)YL9Kmo9W@S_8Q-crcyfLuUWh&WRU2P&VbseL~0|o+OBtJKaDv!P6qcE-bzEs0T!9o*dG7phy12 z!N!FxirV2afml5jme&rTPN7M$Soap^U#9LgcyQ51$U4h$XK$3gg~NfRZs3Pp{HXr) z_StGU6QHtD)>>Yya3GFAe7B1(^*zD*c?c~VNJ)(l1D0n0FXMq2RPlVh+=42Vjze)38I_&nQjUD^D>Uy%L;pim>AhhFV$DuBq!?>*m=_%|O* zg#!3k^dk=X-;VeDXRmatJT&0p-b~VIdce)V?f`j2<6D2|?T*a{5UyO4bs_8hBkT9E zJkWloR6uStzWn#+5ONp*Xv?<`Im!M9$Nr3sG$g=w#U3TaJT$cD1D{Kr43*9QZRBtN zxF{b$^59dCA|8A$t^lCLym|JCg!}jO-?8F@hXdEsN2Ar!{k>>+c=$qa-}M*8DAVO`+VMdj!T_-PwO^q3gUBma z0}<|TzBA!|5J@kf3voL-Ki{|W9<8eWM5_yK)ye<9es_K#kVwngA0C`Gnid!$<$JuU z_wVX|`la>1$N_&yDJIZ?(`P9~zw>bZ9!)C@Xs1`La_ocC>H!%MmD^Xt6A$9>N(5*p zzZ%VdA-BK?R5`uT-+B-SY#^0ql#mYd!MFjo%>p!mtJ{i`2XR3BNp$~@hQAPrYVe=v z10qL`WYXt!!9a_HWkEJh&WsQ3J`OuolZ(5%8lNhnXVY}nOFAdFP_%~%=Jt*5CZH8p zE5@B(87{#D?S6$s=sKe*eQy6uU28r7=iF*Vy9k|qnzOA%?a?=uZFd=N4Ua$_oT3}& z0|Thr#441D(+PtEq1-m|+2cdSu?+=)!?8p z#H5`xDOUayvhlGQrqSJ{v3WT*>%~IZX4#tngP9uU!os34y#S}5%q{T9@lPBPFs7Pa zI63ThgSNL#s#>!p5=wP@(VlbKA>Q8JZti54M_AG$*x9j&5?-Vd9{x=~=%xm0hoPh~ zC#R@a{cC^Illm7Ox=v+I%fa5BA>j_+c6&vzh@6>t3u7_f8uAM~C z*Hq7p@~&@ZfgFR?$wJL+U{!TX=`YyOMJn_7{>_mOq5`X@x3_I1?QIk*pY7&Y8<1FS zu{+D&cz1hM3pyerlZgL>L9G~@`L=t+==^4pJ+aXO$+qD9MGEBPJ%XF6&Dvqn# zr31LB>2K}*MTwWIJo|!rclibiCA3C}aIKo_q3Q2}$;Ynh4h^7;P6oHQonGsYaf_Wc znPigxIGz6yO;ZQ#YeGk|>HONR`10}HbIuZ0zea#Sl=JLvu;lkYrmi=-HJWXQuME0Kxr zfbNfr)8>il6pS!K;W*$RDT8ONRB^q#b-A~6R!&770;wWp=GolekSMzy}?SY@zs;L?Relqd z#*x+9*R;>NBY-2bGl$4o_$3!rLp$w=f;z9xd-m#^TEnh~89(h?~tp@PPzt?!}y z;@(fkHrpVmAm5={#TFXH)u(1^ti~VNN^obGpKResZZN!UO(Rh?LDe)u0uEU|It_N zg%JATs9*-{E^E6uZF{$vGKi%>PZSzC{LT#f1`inSx-wKdMsAg5lF~bONC2Ee*3l-@onN??3sBppApz{UJxxJ&(4@>8I5Yox%S<75V)*dq7*#yX(jFYt> z4JuGh*j6znlD}a4LXE{X)iHszcve!fwamKCqtZFj^~#L+whqmQpqjROW>`wl;v&E( z8bqy6)m{iNSgEP$Dax7dGdH}S*z`hsq3{dBn4W|SX{~7(+DKL0sM?&D!$~W1mJ-9I z1tJwMM*8am*HagP-MWAgB|OTK;bP7bDDdNnQ=grdrl?@yiSFow8Oy^fCWGTQ+)b~s zToJmnOk)maFL^9Zhn$A?{xVa=2J)M2`t>h&IU1YggI&yR_EIHXaVbM-OMZ^I(>b_> z+l3allVS!;S>E%;@qJwzfs+Cei8evGthiaCF>j2laQpn8!g?*d`1F0widQ}o9`rm> zaJUTHCCtt8CpbNyL;DF%=f5@8UG(#WOacjyzEs2a@0)xckXFiDoO_KK^9#8$B&It6H-!AU_(D`psZw#`zxjB}{g#%FVNJK_|NftFgZz0hXS*mZ)$?aB5D(;}fGn!;C` zo^|t-jnQY8lV%^*oC?5%qe5I)u}N{PmE3L_6^s>vt#Jt^8H{WdBOgEobWUS?xQ)5j%u$kNmF0H02I<|79&PE9jbndd6BOqs} zRMn75DEGRPg95Yus8U(1ay8dJYCh?!7GT&OpUX1wc%zr779I(NM#K{t)~>Gi8E>$p0Jg-RD33XeG<$ud zW>#!yM06tKeb@Y;#!*(eh6+gf}D>pl{&pU3>D!+gKvn-3S4e6Wf~QTM635k)a>t7{FRtp)poZ|jD!lCL-D00DU9TKs$WmBQINHIXgY4=e zW*^QmKWYEA5&#jtbO7os0eeJ>C-mYJk{9dH& z*ODJb3vPp*!=GC#{jde|>t==2afe_C*eS+kjTh9z779W>uSuOn;Z5wQ2r92BpRVmH zrH;JXt1f*+*GvpoAD15Kp@5%aLyNQ9cFC(nxqTr~rKY`YN-AAgwEj3_bh(|VZq<+Q zix?s(rYV@Ur0X*$UxTocBoQ%|lqSlWljU6~cX%i5(AJ;o6ExDPz!IR(IqVSBQ)N6! z1(}zZw^>I77%F8dvdvHu^JGrUG*Lb*O&)ZIBd|G>p>fYljL&=$30FAq5xKeh$2Bkh zLuM*VREvEqPhR7gFIhUdTbF*deeH9{QAE@=H8u3(MlV7qyc&Y(bnC_F$9OVBb9lFF zKwf{`Aq+eCb*>)tr@h9Y4U);Iw8C=kTZz8FNd~_uN=Ks3O5FVaUu;`Xf+%aA#s5*M zYHV$Hry+LK375`4ZQ~;5>(x+eOL+=ou&;PN2~!3B&5iP{HXCvEIL4K(RZ@g902n@T zl>C|SOJ70Y5KiQVCz#swgb@LCcp&;>=tD__LnzSD$;A2M80&H1B!YY2iNnl`K}C4Y ztB2qdyMs$kXg!F-eKH)P4eu5}=EQrm65W9nZqMMgcB$UVoxI2~PY^F9si?u{b;7ve zU>Q;BSgzI!)aV#Hes=QTnANq3Q`(XkovRs}y1fhTa+8Jxef&+$KOBzWkliE}*YjT} zdd|QspHGr65huG<+7IrigQw++UUWQr&g)|cWUY>5)3dgEgoaR9bLUG&p(gvOGo4X# z$1yhKHfJsQ)r$fH%~qkR=Lg5CJW&XKNWKJ&OU$swZ$A5P6T9{LaMTX_f7sZ~G-_nj z_g6fbYi$_!>;6UepJ>3aJ;@DzPP2)pmlof3LOOp`Y}mc{-hL^OtBef%n|* zk~kBkk23ey7IKO`;UJsHlFAoslgr;Rg*gCWJ(LO)k6YLr(@3TRu>ENUc(A zn}7bHZ+Y;L;kAL}g3L(WCC__;lBPR!$H2WIXs9CS7skaiH7{;4qOL9tm>>GVuYUT$ z2eyIz$H(C3^<2WDqP%N4%VHU<@ExmnbLjLXsXb_^w5~GMb4S_D+3HME{y)I%q{{)~ zZg7La^&6Mh*eW@ExHXTIj<53r(O%=9pTqTm?PVg4d-C3{CWbK@FxwoMZx@W+tP>>= zZYk*V?fT03Bt6(e)?P*Mr@H7_kWFHL{83dVn%jrhi3y>a2izrieda@4>x zxzW|dwTldy8ilsg&<2;Rj<<2k-R=l~0Skq)*7!GO?2+{^jcLu+>&UZAZ$$>MR4jg*Qb*?%~;gRf3h-%(J1PpYhNIZrUTaL!tg+gn_Rrq8XY#I(0o z`i+b_U8aQ{n-UYB|17%%c^P&XHFiY7GaC5pK=%(x66ET4<5lxPdMDG{Bi{e101nj@ z5e{-MK%ZnUYZiT&zn-#s%g0n`oGuDnM}Slmj%a{R=btIUZw5`aOS#SdcDUMCr#}KO z5u_x(LbUD(etUV-F=FG=YbS3S{-^Fz{*B;8GT=%~T{u9w8Arq$Hy^> zRI2)F#u+s^{4N$ZmHG!m@+cR#-k+y6;dQ(wp2w#n#pE-Zm;q0&jZBE%Du~y?hc@k6 z33wRuExECr4RK~d#h0(kAjd$jVxhWGK4q1h?~vQ8QP9A<^KQRNJUJsKLXn5 zv^bz@*jygYQw5f9h#GWcLlZ*J>ED&H)t;wjCT-X&l>N=_0WGCb;UimQcipApNt(h^ zSDC0ZKwz0?gYHwb>9)qZTwd*CVrx8SEAM9MZiWQRl9fZHXA^BNZ=OwG>_@5u%J3qCa{N$obbY&qAxL1$>z}3QM z;S~QYh3?&>?vy_ZeRwx?a6E*{XaJBDvLaFbYZd#`w(OsUf1d54ZHfFF9{=BY5gxK< zo!T#LKm3jCE7_mw3bx^(GVQNUb9nZf?ivCP)A#`B2Y68$u-slS-3RM0oAw51=X(Px z%m-AO4+z;c&Vw%D^LPWWPMX&k2oEBh3iRzQ;(gEEpMx(AAf5khOZZ>9!8{lr(y1dr zepp-BUeWFVC(?} zZ&ru@hT{Gl&r|^Lo|dQlr`XZ`9voi;9vfI@+09SewAWXavQ1JW;`wt9Bh3M$(9<-cAK=pvO2369!OkR~RdX zP>P6SDwyy4-v%1U0 zBqq#@(%jx&MO-Er!=JB>wW)OV%m~!=77>Kukyl}8qo}XMB=Y@gC8^>9+;fle8&V~gr||u&%@7AMD3|y%w0329MNGCeZ`ou$#5D&2%Zi~Ng%`d z=goeHT}X7o@v<{YVj}U<7C?Z&3yY9P_rk(pa~+&?Um=jYeuPx~@lR#EXqjz7}i{oz9e&e4js=R>VDCF1Rm1*187+-s4s=&aoD59`jiht!N5 z=L%%Ez2e9(i5F}m2$Sm3D4v?8B>JzO+uO-7AWTqQ8b%$)-_!V#Jh$4f$PwDc0S6Bf z8X&#N`|*N!EwegsQ)&ZT_eHRu?N#@8-zQ!mDr>j>oV9PFOhaz_#!s%3 zZaWKZvoZGXcP^h0CX+u8;ZQMB%50r@qo87@6O}{#F$5VJiCM|MV8NG+TWEGA8T`Vk zxla?jGxXkAbisxsk(2Ryq``oWY+Oyh6nQ!VrG{(S7LD&loG<0!=PIPYu3`xG{0z-) z_9qh6Se{k$#2W^-Nl!^K!A$~7<7R=8qqrUJW+9u5ALOug0$4|4tbLQ7DMBqT2#svo zhcLpCwuG^E;PtRWb(YED5CSvX8sm93W?aK5$YI%#VSI%hC65b&Ce-A(9qZwxSJ^+_ z3ZJgHHfh)Tl4C}fKg8LtPzOAy*(|DsVjn&2G&Fauq0u6-iCCxm)BR~=*Q(}$=e3I!o!=Mc5VFwXRvDrU+Ain6l zVPfFZN*CGP@O!<7FRmyRs{O3K%^kTk z5Vt+K=s2)PTMatw0Nb;8Tj4x}f4b0)J}Tf^V6vt7l9nmN%SK?5Z!&!^d6-Lrw87*T z_`Lm7;t*mM!P6uuOjP8spHm{P#zJ}^d$JM&#>;ofu-ngIxrCYU$N2f zWFkxkykUjO??9b}Cs+*_^dv_X)#wRqCC4xh;J3%c4Fs)naHBE%53CZI!yc@Xy2FqL z`m!P~lUM$N9sKA~fv!e@{7qyM8~!`eFX6XQ2rr)$2`5Lv8e`7AWsV}ig$WX^9MyXT z-yevY1yZC|LDLPg6~4$MHHYy;3>Tdk<=P=Ym3XQc_KG={^&L^($8nLQto`?1im-*R zRG!h}xFEQ|sRlKDvXod76_CKl?pCZWq<-F^!pc7QMCKcP1*tV+^S66e!ey-OZ`Ag9 zI#~4-ZN8Y1olZxnM4_g_C)w1|mmCs&nLeZMckXsXcR)L6JEqRG*F2st1S0Rlv%2j+ zykxa}juXaC1A`OU_bt|-__bUjv+;p<2cuD?hG{9X8lh$i^Q?h2+c|a}+=72=C`pI> zhXi}>7sT>#1p(FHqU~QdFzV1jpFuI9giv@WEg~KwR?v$eiXaAYZgF056mgOtHebzf zGhV6DSu8V-gl;1j~ZP5+AV4?=If=QV| zT!m~14>0Bp=c##-cr|$$npm4eon4#-oRv?xJqjQ1dAkqMD`i| zvlF^5I@$BeFv>8Ru+A`#umr-J=i5m;Nw|qGlAa~@C)&Oaf890sZIEU#Yw&7tCNVhC zuhPD9r{c15qhhj}-{`8dzq5E~uP1q6zJs^-Iy5|C$e-yG*dH8pz{HoVo4WK_ApH*4 zi{R$P6RKd|U@S@I9?poF)K~Hj^4i13!_}#+sWLi?Iubh3I(a%5jav;#jkS$|jiC*8 zP6{WiN9M-}zi@vo{!IF5vNrV-V^g4Wp^tB*r_XGVU_iZZrl-E2<=bIL#_&qVgBlD= zCgm!N;>%GAR+e5WbBYG_qwG~Higy%vS;ATCZna_6ZBM9$6uz;%nlUmmxa1 zgirJEr=EpB13&Yj_>8Cg6zypVHiSSjgyHLxuL&VSal+ECq!FZ5q|o=j?JF(ewUt=T zRJ;^Xl4O%V&faLit&VL}X;KNDB(M;%P~U~ywcTA|`^uKZwr#xJJ3f%ypFdF7=L^C~ zIEiKec~K}Z*}cl3T%m8zB+ptcuz&Y6k1{YjXiek?<_{NY6Y46Z*72IE%&G*(#^Vaq zoUgV06mh}D&sBA&aHm|S@HnY!pVwMh>Q7NmiB3W$3tU6@&lh}bFLIIEk)(5!jATLT z6EY`ZTZ#A?_|hfstlnx1FQ_+b-GCwzyhp}~5rJz0d4igHYDtb&EhHu- zK)Z6`iJSNZ<3*MLrPs6bQ}+t5Po9g%xxZd|=v{ns=kb{G07sWfspeAK;T$?1dXNcZ ztP57R&9{lSP52V|2Ki>(a^9ld6~J-8MIg#Pu|$kV7)IPhOheZ~euF7Ua!d7{K9|Or zYLc#oB8+m7VPAsV$6dUY7f&N` zBdqTcJr><@KkmZZ!rIe?6R^`qQ(mPff-dR$Sa-eIc;{#vjaXKeYg*TDoX=IE!RKCw zhf+_a`ed)7EMq+>Ff#CC&R^=%w6glV?xM?K0x__2Ivx9u1%zsTXLOuC5;+Sjs2-aW zne*W}=E>t}ck*!}+w9wj+W?as^PKKf?El0G=8Cu^+Z|Bv7jnV%;<(*W3{dL~dfW6Z z_gkM4eou0!d#Gb5eyBjG*E`&I@?T06TPE6v5z;&~EmY=osC85w@x9(a_j{Zowwp#uQp>@n<-;X<dW}831&a2d3zg};}amBHt@u4wWk!;t$ ziMoouHtM5ISEyBXR6dyaJaNZv%T8uWWv0GeB{<;&?8|lDuafhbWsmJEEVPZFaJ%5P zYM*jV6^(t3Dm@}yOl`fp=o`?%b`V}%98_PvT@c(|{&QNsG!$Ha_vS{R>Sz6~uv%k%qQ*sC%Ory)9f%^k}f={R(3ezJ9ffxL>S8l>|{RP3AB(huh!jo%4g zlJ3*Tp+}EL<;VNdPLQY2n^BF>K4|{@yar26Y$CsjMSoU|V#&Mbezx5_!0FBDX5l+w zIksGqQzA3!Q{blEKAicjqQkuXb)VQk?38n7=}gOV_P46>;w8^fF=>t&NT|}ENerKpZ1p@zKa?A zP0AfN?ohp^tJHLmL)j5)&u#YV6#RK-Nk>y;Yf-r?$@Y~Ug!Sr0;$Xj!c^Xr!XdK$H zV(<=TDCyW>J^!A{s@9I@U}JCPY}@y(hE0@er5sR`N%Cef-f_W8h0u)XB94-NZa&xL z+-s#FYbM>aQSmvMy@WFe6H#iJyB4p{#~H7QdGF0;eP=6pED@m5nAE*-_bzstLAY1d08! zE|qSkb|4$Kb3vc%NLRr4Oz|HR_e@@D=XLiG@oNAyYFO%+<5k&@PvRPSG&<|k$#ZLeuAaT(I9`MxSKpF8Lh zEo7qkBQ+kipNv41Am#eY0jeA6GCe_g5z(hg`%3+)bxMYcgi1`RPRiR#WFpxex*$0x z4M;;>N<_tQF*I{o^-dJY_j7ajPM9E3eF|<$chXgomo|cyghr62*Mi1e@j}Rg6E`-= z`-lPgjMQKqX!CS)T;n!W`WX4x`FxyK+N=6nSdhIf&^PT~4t6SlS|q9SDFt)ZydmY! zNvW4%k`WS25+^_0yW_i;Lj96hleiOVsuC>8E_}yumsPCynrre;Z_yp|dtieZw*y7y z2zl@t6HbybImvAK9XTJ4$L2qt1ery|;o%W%Cv3b>5Q^3ya$w73Eab0U8@8xqCny#h z#!vV1_09Y29?+_mDgPn_esduC>>2}KU8n4`-awOY-7nV(;S=)#70AV#OB$Kx?@VV4 z^KP++$4jD1DLXrDE0dZNs^_cb7XiW20vnRABb_`KZW1$~5WkzSyT)m>8Ezi_xIKfgKko7EQ(dBX%w3 zSR76G`LiXMJERrZ8pG&Twg89gWnVP$m01Lo#hyWhuiMu; z&%E$btMAu(umWzpT6&A#dFHLjkHMlT_Q|*w)nl{pk;aV3!exDs(@#YkOOoB}W9qd$ zy9A+`avcht%{CghrKZ=7MU_`(C41q$b>k`O^V)VxElW;jyXo(H!sVOG@}M!KSCTBA zSQj2=Df|;2S$w$HU%A=%#<;3$8me|54jz8KK~qEVH?olWoQ&unh!2C9CY8?fWh2cU zrnCsQLLKpyp`SnHDV&te?co(HMF9rBA}ntQs=GgO6s)C`tu>-ffKUuhL4dbC>SW-; zD&6xZp;BADD1C1wqG)lk@PrdGgGVKC@u|PENs8ytNAcUi_d(WHiI41Ro?buqgrkb= z>u%{PHk7F>k@|f0N&@=BTY<_EBQ5%b4|RI|mY!pSs_5OJekMmu0Z}Ir?Y@`ufBB%@|(Md#hTj(yPd; z{8Hsq2fa6&lbehhXPk&1n!CP!YZptFm=eeN zko`fwhnS{@Mw&+baD7QXzCTqhDquvGufmy?+v^S2q2g?GK3tR0zIcX=rGk69F*Br` z+k1XzP)YTkr~71XYJlLaWxFoF=Uiq0wEtUeWVrHBH8^9QW`O`&u{hCuIJP-uGm~?s zaUC+NoELDkEfhRfPA4hfZuGqUE`Yn%O>Nt$Yi7#sUFhe5OR``;L9L|gGsCkft@Z1% zm1SZ!?VFzb2M2$c9a!R3_%&gmh&M98Y7u@eg~XGIJNc1#^|d?N76tlu%sy)mp{Mj8 z%Y=PK&2TZ^k#2tN7uFT^@%J!LxO~AMtoGI{`^oDMi5!E-Yd35!IuO^8=+Iwj(dE%W zjy_zyxR=glvkrhZJN(#cyK<}c+hcW1ctzA{1Z??CGeeVgK-tR@knYLuvG0;J{PJru5ep0L`mZ;2h$KDV)7%R? zPV2m(GoxhLt&kPm+gbQlBJMzq+S4AV#}o=y`SzsRN~l?)fFhd;($dH7_px7QqzgZ&2n{0Kbk>ii?x>ui zcO#x4lmxd1O>`{fuok$>5QZ0Y>SsB=?D&Mg_bQw57p)wD4V^AyD_S5*4jl-inl6;B zZuX5mq zp5s*+y>QG?2s|u;?{ht{Dc7aiXRnAJ9Q2jeZ@&h9@TlVQ`E$C3#MKI!4w6o z1T7G1H%TmB7J54ceXjNk*u4M`5i(J0QLip0%5?F@=zkaFYIAkwZmeixyqOKWQ z*(P3xZm)U|gIh#{?@q!|XHm%rt_WDjMqcIdX{wf~{MxATqgGM%mEXKIkh5lzT^ib3Saxy?9|TPiK#KhAg>#*I^E2RTTR9NE3$a{G zPrc>pZrXt3VrktW86~)n{jfl&-xoEBm;KJ()D-h1=_GVe{m!ERahz-Aa60rhR7NH^ zfhPgtTX@fXWwco^F|viXg?Ie!q<@d)KtHM7Q!S*#wWLS=X6Ds&&CIjuvNE~y6L51W zL79-2{sYs^!o|g}l}Nvb<*22j1@6kR=G_N#KaWQ6Iy~hLoIl~x>?JcgOc}QS*~jqk zsp9bPvudfT*Ki$6qX%|;+GZF`FqpijlF!>NbKS;As(Qwv~ zljSwCvtc$gw)~3QZND448{O-KKp^b^NA-TJawXGAcy8z|yGkAgH$6*#q^53U8 zTM1BV$SIPG+Buq#b24)Tc^Lv0v5>f_zlZzW>%K}+#9%*|8bO8(Zb!tT0_jj#>CbM zm_v}~H7Eb?^Z#Gp{CCEGU8(usD>=Ac|NF{+ee?H9ewN1t{A)pfcI)>juw8Vj?Q;u=`6-+;PQ;JAQFtzaz&+ZoSY`O%9@Wtz?kH#lu6(eq|sk z#DJ=%(y@$)heodPp6;Xn>9W@$ML^W`P;-whG)AT|%F*p=jTg6#jp9?*DG+|Hn3zd~%!WuQ3I{;A*sa zH@@I=9W3@Zn6J%~2q%JdCr}~$V=fO*k)_}C`4x-G{q_;mj~(X+3nnvVy1UDkMM`&L zdXv+gu_V3Hw?U|CigP9EER7e-UU?doACgE#VHE-p(eR~YWy9j;3g1%|Y1I~+!3t9R zc9bYN*lt-7f!Z@2iLnWpeDy6f10gFdUGm#BgJkK*;ce~Hi4 z*=RSJlr?Sri$ZK)k#>A-+zsWh+nX6W|DjA z77V;!1=GRA==?BK4x9POoMAn*=#W2AD+P#z#7HT>U(r2-V!N*oZQ^2!NA*i;Y7Y{# zBDYK<)(2o_XS;kgu$Cq&O(+2iGl)j6YPi~TD4FpIh*xkzuuLxH4IxsD)7Icq-RqZU zO(`fSC~Bjn0VnGEJ|->~QwEANtEcg6Yip@io(tt3M5TKJ+Ix$Q)%DiLe|BF=9eAa> zM6SPM{3XcWt3nm90%v$>aP{z|R^efh^uyq^MTN<6gcj|EfOA8$x& z*4w5z*&M_qy}t%5etaJ~o)?8rN zy8Xq*3Hlt^^g3Qo8KRe#Tv~iljM{ajvg!PqysrB@xC*kev4A1ZCfQ4w)st{q0swfL z&e@ZSlJafM8HbDITvZ@eV$P))yJjy~Jr-~}_cO99u%`b1iZ&I!UqD8iq+?MKi zyto&JEJ;bDLFY$j7BDz$#rFnWy&{0^!Z!4Ic#_xi)SOm^_QGN;JLrVxtUD)C&{D29 z=ZaQ7H3~2ePM4>GRDWX2Hk`B5kdq_xm)~<~2?ypxMPbwO(bM-QTbn@_8>@_ZFlqI< zI2s167qfd{%`USnr;5jGt<=r%Rct&aUxzP`llk3J@UoZWS5&YzLYp8hcsK`T#RGn` zyJgMLDz4TxD@cA>?bY4IiuTRJ)izoAX0O<9?Z8$g zsMU>0qfC|o71{rTL&ymbceav48&ZiLH+6LovF3-8_GUbbK68^^PCYqvusr4$VNKBc z1eCEFuM1nNn6ybr^O-4NF=Ku5<)J0`_E<WbC?p_VcDYXOb)?_*kxf znb-6& zK4v6EOi@KMCaHoxP39Lma?#Pzj=OmYS_{j*4NpK&k?i^^SG_oT)qOduaJhqf?Rr~X z+DME)UU94ixNG^3M$+FC62aNzV(%Yag({-S}#45}rx%D+~og9}7U<~2vl-?4M`X~XUDK%Q(8+cd`ROTPAp z`(`Jno=CC;i-~-m4EDXX;vm4McVm~D-O8)e#w7L}q1(MX+~`Cu%v_=X1V8>6N}XlE``?fhB% z9|SDyi;69~RUkMc@n|E+uV63mCDW`WX_v+}mVeZ`9WIlu3$-p8fVFD*tGI0DH-616 z1cwuI@y-=Ry1BZ~A!Eo9j%7m(V*aKVI@phWsWBV;zu%Y$%V?1@ZINDUlS>BgT!eg+ z)6OWx)j|DAs+SuUciNx|@!8(g$#bgfIkSvB`kzESl)nSOYIR42VV5z0OokmoLPF#XHjPb< z)&YdUW;*!1T+&i~{9vJ8`=-hDfPOo}&o|kTw`hnel0Sw<$VCFk047t#s-$xi6ckl1 z5Vgo8Edf;btI=|Op{un}mQ(?vOI@cS&hau`zA>+>ixofc-TAC+YxiIhyALXj%ZJSF zaALlTG6JimrdkfmNvZxghV8IJ>(R^rbW;A=BDBAcY)m7}nv{fn^V_x@b;#>K_HkNG zOHJg76xovkzi z_qi0S6n!xBYJpgn0$z{+1O`)vs7a6oOq0in=~!7Ufv$7(6g?q6{+0uY;GeQ-wFHI< z)b{l~>7Tc%^K7=>E`w3OO-HR%gTiDWKJGEO8$G=_h$7~bza_+#5ZnDfH{(qujDTc7`o{PtxSZ1P0(|9keIJn$Dt{@*?M|F4}O zWBBtbiT}x=zdhxn{OUKBn2*r(ed1&Xb{n-ian|;e*ubk@N4tN!oxdD0rnZ2|#b6|x01YkLiv{kHCg`=olg$ka9(8|Uxk$6{ zIZc0eA<7F5c!W5+_B*u*G9l<0rO^Fl4v=8v+OGE)Lc+u2zJC{!mX|-*dQ;CfQ?Abf zfTzm+m3E-Ma?NRA>(pmaG=@3e34?gz& zB2hk*hmX>Z_PeL(kpAISd;^Q9Ro}KnlE3@9k=X&D`d5aAhH9*;E$+Ww@HweciB^_)!Lk< z=C*@D2DY<9H2dp+w-cz)lp5>}fnR;6l=GA`K#J}Q7gp-QHJ!#&VVKR8#(~6HnD{W- z?@iLtsg)*1O6CDI>)MZO-H)6{L+^?KUSQG7ZW=xZ08eyUHRhiSTnuyt z7*&pP&4bH+ZBes<3%RmgXt|1Gtb7X)uYD2q0sCFvZTF=NUS(}IPN|`H{g!i7B@fR| z$igQZ@BK7+m`pyZ_}i9=TpKhp{WaiT4FkwDc)ZoijZU>VR;5r;gw%Z%c0!reM!hpq z@VXb3ob$`1gAY(zI|^DY118AcrpW76-mr$cJ5dlhbyoF0M?%fNcsqs7BaAPTDa&qz@${9L|yWuR#q9u1PZxCXI@YemekW&D^=>M?5&Um+wwYxTZ|{E83)c z?$HgD0GKUa9l(mldy_@yMX48oPq6wqXLP8ZK7IO(kdTgR#k<-(bBDa<9 z1|xiJ0|2AM4;SR;^EB_ylJe4}cBa(zYxnY(JVstxxT(rzQB##(ObW%U8JXPYFqO`1 z9j4d0mU|YIg9^aOQ0am4hke+q?SSOt3mfWV7USc z3t#bEEIMs&^ns2SoklcagX=O6W-GhEu~sLg)x(ZjgEb5Jb?=($Gvk=F-weIh>>EXK zcD`K(9GIpn7%Sm;b3hyHKBMOazP*N&t7SrbNiibiQ@JTlwnq}AK{V3N&NVl-R@#u_ z&di8A>&1r3>W8DBUB>_(mTvdG%T=UM*ZvrB(X#pxO{~WNlZ00#`Hc;|(8FytKTsgY zcL9Wj9Z5OZ@BD5!m1hEV%Q+@{Z>mK5hST6AUaefu#smlwD+{)?huNXbRb^n(^`6KC z(O^uTn?(_MsjFamArXx4DiJ|71*(%ta`z z7VO90aP$pLoOXN6&1KXfi`P||c5lqvrgGY>jD6hoU|#h!HAgbCE@Gzd<7tcAl1l_Z1ihhix>a>HXw^>Ty8-eL)h4*_PCL zq9;^*JsU(27V7Or>5^R*EL!o3?eRDhT?;Xq_9$^~p&k-sLrwI*lEFgxn@_M`DZ;U| z-&Vb8e3^pcl6vz7?A!;S%erGZv|P|(n;~48F?;@q0K>vUKIeE-JW52W)2s`t5qet7 zBr#2Ibp$(;VUZhk3ihoQ8cBal@-wAs2 zO}}412;`R&^nEQhTY=(SKHVZtwoJJoBJR-EKzr|^Q(*22x;=Hr(sJ~+3WXUYH=0&D z(vMKwN8_*XhACnAXFei#qj6shVViC9!J88A`VkyZ^@-)=0|`WLUXX7E^2Na_QCa-_}r_j-^y`DQa*i}u4*o*FX$_-Lfa}hjr2+rTn_(DLJ@6r zsMi1Pm;b^)DLQ(^??G2~^_9AwM_yMJll894Qy0|DBZ)n$J+p&%MFW1hGfeC`^yY4w z2W-5C-w+}{Tx<5cbln~Q(Kmz7EmyznSpqWio1PXRIdrFyHdC0g+A=KRSHLd~!1HZfOgzd<|g8AK;83dEl?}#U)pD(LRetkpiYJZc}BQ zp1wA}?zU3w_`8!WL(A%j%udJgrxs;{)tl07;v&*V>W+8g#k|_5Me5Ol@Z;WXpC1>- z#80UWQ35%o9qE@d9*QN%`HX`$*_ zCD3)^+GSqMI-vfCivL)sR;9(5cd47J9uZvdOmiCM8Lu>wWbo_yj6u3R%3Pk|ci*_l zZ8Oh7$Yq;qJRLr>j^J@haUKPk!(-83SpRB>rxVa1e`-XVHdtVuL~lW}L#Nm2QtG97 z^L+Vf1>$pd3z?&czt{;;LRfpbWvG{kD7n6!$fG$ut)M#2ly+C6MUvFxyJgzs>)3h6 zd?WZJ?Y+|$Pv09Y=uG8_k^;xN9u30+FOsr03P1|jAVM{HXsX$Y%dIen9(eRQ|wWqnn$!rKmA@&m3OAC`rGz@jiV zsyQS>z9NR#ViklVZALQH=g5IU{6>~ut#s$CX1T0@DJ0o)vhe3w;vidzN)hE!lM4tS z^t1*wgc%AaH^n$AoE7Gk3P4vovU$*evohW0(htki(Ub8!a?7TzwXO|!qXtS7K)LM1 znCQ&s)0CoRhk-fK%yI$mVk46l&gH_8??rfFlXj=pJYdN+JHXPucs$kTTVqv~=FFsI z(>nOdg2BNS3G`#2S!-5-WX?pc&}c?n$ckQXnA0M%)o0MzWQtHr^sN#vQ#$Y3wOwax zsY^wq&^dGoB3qnWjo9or5gVDaZ?Mdtqk@MLvs?y^Sz5B^N%&!N`HANTAeriT?^PUL_eQi($p}P%-xjqoBm-jSQf{tT| zc|{KUo~O(;|00Ols0u)8N!$+EJA-Y!rpi63O(Sr()7o~L+mk2x7vs>%#-w*ME&|Q) zXaX7F;ql@k=hj%Wl7$cl+KnSeg}}Rqq{D!#b&=gB8{jc>B*J3=JfqE`s}+o^16L!9 zG;^|H`?ty_M}uq1-ypVYF=UE+4G_ClimI+j{Z&P8W!rb9$#$7HRvtz7ysNpz}@N(Q8@#;T;QV=sp4e4G-xJ)S4*Oo(km zH%YDPT$=0ppbKbXaSyA3SYOMEigt&%R&X&|&U{bXICl{Qo3`>?nWmv&PXUtP;__Qr zym?&~>f$%?Ln{v>rP`WmlthVnBGY$N+P?1c@6@blHb)xgi_(*RPHC|j4?Tg;9Ru6R zXi!&k^nN=uS!oGTss!^W#7W3&zQLAa0=ONmPa}Wr3Huao9#2I($F}8{1jFhW@w9C) zk&bz&vL5w;XK_G^Yf??mb|BgmJu$xsR-HDfWEKy%Pt4AHbym$Q?GXRki}Y?2FkzxSPM-en9ES()r)1Eip3 zCAe2w2=F=ztqXf!OSYiW@D_ZD)0z5#-1;#{^?vtP1v)-wQ`4{(MsJ;b4xep(vU;LXRK%=zV>N68Ri4?-(_`{4G$KZ@=ib) z%3Id<2SzXs$~>o!^i8H_Q%nwm-FQ^#7IH&5TDRFFp{3&A2Kwd{?x{kKZ8djQM?+h#7y;0-Re0IGnDTA(z>@R?VcZbQBGoTJ3L{|Q8}!M6KRt(XmN{U`ju zz?qR}>Ut=Ty!1jUSRaLXmiSm;3R@DavL`9h``b2lsK&+j!Cmw=8vdpedY=Fdt|5fzf?C%MWtPvY_gSX)}`Z>~Y*{+QllDr9;ft)Ut5!!0F)4+eH z{(Uq@0BALBB$9vF)PE>DL^XdDchvoy?e2dFyMR?3jath%{^MS%;cz@UC9!$&(`jk}<`2znv=Swqw_Pa8j<{S2@)868 zLxrp$d@wSqb{5u2W!ULe`uT!6HY#e>p?fkzz%~6oYU;ZC`*$hFstbHEyX_%H1#Rk! z5y6*1a&BYe;a%(uwg(FGz4K={3MS>7taQwG>zFLOmQz{x{>jgjxL?YgVJh}SOnCX4nW<`1Hge&tEUSJ8X9OkPex`WT|m6Ku&IEA&s8dtR3O)St}0tsGlj!~ z@)0$AJT5?uSUT<=N&|ECJWq^bXI$$0vS{={bJT(#^Z+j+jTE(k?wWf*> zt;$=sVy8Td;{8ih`+?;eR*W1axT*V@?a6c$5Vrh3*@~BqaI)QP%i;e{R~P{RYbCalKn$GMXup4Z1UD*Z>N| zO@P~~`?yRY`nmV^>I7`WVZT&k0m5ZYWvdKB8VBU<=wMRMUR27Q?CcFd&WIHc#qYyY z*D+Kz1O!SpmpzqcUcHrp1Qr%7q1%d{wj(V=B|l-NF$1<7+*Te5weaOvXSQGp@pr-f zb<)iyeMhR}TSNFtZ9baYqiM?L??Q-vh=u@6XTD5P*`OhKZ{k04XCfaes97itn-zD16 z+y#Gmmt*4nmwvrFAFMmAxBA{)AK#vg3;=ls+@T0BfU!)(Wy`2l08m-dfDDr+ zW0mO%h*=D;NVx4Yr?Po$7rCzv+aGd$ZqC&jfiiml&|JXrvj8E((_<-9h&1b}1@tep zoK0(=OO_vMwNG{$_svu1cIuY-FG55L*w=HOZKTd5%lts21gE~!9T-><*c#}@s`Eeq z9rVE05lC zXS>w~xTX6$@4|l=I&$n-I3g*z3_)E$CFYG3RT~95T$c7o1#hQ{lx9C!Zco&WAmj)KBCkSsLX1X&m{X|sPnXH zEvvk*9C^GiD?sJML8y7R$z`2YNK1sR9ONUmj^^^}AvwQ1GElRH;Rh2h7x|Xd|Sw96_QiaEt^$X4W@8&#Oo}>e!PQb-r;p=}Q zCH4^gk{G5M#S{GsH`_ZL7TN^QH%LfvDzWz~|8l;jcjRh`kaDc)0$skEvD ztr#hUH{$&S1-r=bJQtVD;XLIml}}mLdL&kM0uBRP< zz&wEHw^@+k8xu_>`Bi@BsVUX z)L3!RL5=aR;(0$n7(=t&$aHFLg`8Kzr4RzGfyi9JVBcxA;mLsFddCgKeBt>AdiWE; zLe78o(O(z_WrwjK`Hk&jq6N5C)e&gBvgn`l0OUT|Y6W&@R~6kR(Lgynk&-(4J?ko{ zbaU2TLvM>LFT+0j2)|KlO>=Rbd;}qTHwul}U?K&PvuP|5hk4Y2!SQ53O?72AslAIf z2H#TnSw~A(j62(Vq9tufXW6{8^P#XRu9JP;X559PpjyOwS;7Z>QT7r0am7N^}9qZVWH+zl(AlLK;T zU!`hKT30RS@oEc|Qn8-2RrtH5XzcamvpT6yW-`b3nidJz4)U=w#7)*;T_dLxhVy0m zg#c;cwtBn&AABMlfImw3P5~hIczME+5Ur+5qk$fgA+3oqpR^wUwXMk%+Qj@AP8oTd zi-vWIO%5B@F^hB>tJEbiXlk1+9`5hJ@og+^mv5)bbRn?^%dO3|;O$W-3;SYSS(ny< z7AY+Po1ojv^+*%I<)$59jR@Y9nj5(HQ73S#j=FF%0F?1h8QNF;c9%ozX!wU0@$1A!#&->1$4MsSA?O(ovpvwjLDVo3 zk-VMV0WhAMKvi3VPh6%%eZVo}x!Zn!EmN}ys3u}PB0Dvk9y}U?1a=w9f7@DU;Q+O| ztyXV6&25?Pu@%BePSs=OQnxy7i?vNFnAJ`hOp_`X8L>`%xV`+v z;}F7_MwM+rN9QL0>6Xhq?IAXe~fr#!4N%Xe$_-n|Fru2Uu+7V)b&UUf7==KLqUFZ2z*p28#(A$ch>U=|PB%5?Pzi zjjNTc5zs>BSc>QX(E4gYqyzjJ$NKG?X{uXFXAgN3s<^fshv{h7(1kAJ*Oo~D$*P|}7%xl0>v=u2DP;#iEPII;;RsSj1FA;&RAKTj1^W6!_owekLk(BtpTR)^p z@(jcptI*+?bHE9t02>C2JS=xh`<{+T9s|%ut9HtkyJ@rnDb{uTH35)kYlV*g;jDnJ ztS&g-1sfHWWfsYpuKK4O1yqUX$&!8H#*)GhX1PYB3hDfGBLWwxQ$IEU3ycQfW-d>f zyuL4hGPSON`C6vvE&Qejt4s#+c|N26tMvG&3RDgs&-xtoHJ?2P`q9wI1a+L>l8_97 zr3t=GRX!&&&*y@TtdrV9w|=<4v5PMgrTF*NazIOxfmiloqypSy3r{Swu2xyg@w@GY zFJF*%$8v&eIlh4Ukp79ct54u6twe7#@y+yt*1|Ov*9P%U9?)ewKo2ASL>oE&@?Bsn z(LF6yXc5q-041CI-IMbh)%1$P>c#`G&3E7jgZ+3fJUbS@txE*JD6N z_wzqh{i+`vpb^CkFgFi+>}q}LOccD5H{OvtbUWYbp>z{~`sn*^EWeS>Jrt$1dXwjO z1`wA0x^7FoFe-?7tE*$cVx)i{d zzVHVDxM5&j$%qhiKk1Fp_lF}!&=1i?e(oy?WQ`y{f~$Li=)mBK_UlZMz)}=!K)X0O zMXT&N1o}+xpwWt1Ex0?QS%EM3-A9cmNp$TT=W8rHG%?ElNvAw76#*ad*o;#zO&I=8 z8n$~DDVCir-c)n((9Gwmv!RkIrQs0$*=4-EoVh&TNiFOJk5bL{b2?Vw{_yKO?+0|A zxvvI1DWw(!a1AXl;fUYE9WAil_^x2~%Lz^9si29!m89?%+&qTT!%<3xbYNQPv`7Nk zK;NoVV0C2y@gcK0h%@Mt(J}4I(EOD*9j8@*lu}G@TB{g++{k4wAaN=K8T1v1R$;h- z-0MyPXwSDJMa1jFdhCeRSkecyTxJ4&@5&mRg=xzo0Cbr9ssvm8nOBh8i&YsO`CnxkK4PHDHYUZkiEbzh zU^Y**xDMD#T9chUSbfp};sofHjp69&>F)0D!V+S{V^G6Aa4m7M1=J;=omWs!Q=c7^ z`beW?6Vvx6K-+9Oi z;BB7TwZUTeM8S%X?Dmtve6XSo6dFWw+?g82X_!aCFTc}a-?g7u<1W*8WvymT~ z7@GcBbKw1^jAH_$6Ps!WbS8X!PRoNC^W6JE946~4<9g@=8Tjc-0&1R>YYch5H#5I- zPXz%m&-m`lp&TlW* z)6U~T0_7pyE9dn=7L&s#QPr%v&f(qOsieNw!R}V?4yG{9=DcKjKR`UOr8o;5`Am9M zOGwKdtGdHYLJX-e4DUJG;KM={dv%A$k1O^rjkCK$>WUq*qGhK?Ru+oW-UGrooGH;4%b-=* z`rcVgLPADeJzhcqdWjdYH(N7=%qS`{Qrwc;whp)!)r{f>!vnIQ%A5yN2nMKCQ5yn9 znUpDd!X?nQL11BgKpWSa0dkjyOo*9KUc2#*a)XnV1>yEuU)-$5M8ngVvobc*!OFeG zOz${Ui7)~*l;+*&7!W1+CHgRS*ScQ-pMAZ!g8!T97S&a6+@1<+KgmObVm9-d=>WyE zX`TIJ>|n=jI@Z$g?rk!@^ExlhvcZtq9g2~ zb7hA9^qQ5D*o@krP~?Zd(c+D*nT(_#Z477HKakGbvYf1R8ePghwT{*~m_C#jesm;g zn8@Y$q2TvU6loQCPYsSb^G1wW@yq$H_UrEXWD%{)Y|WEFD;e`V+KoxCQ3DGXBBE>% zqh>%(^CpqvbYemA&B4%iai?7wnm9KZ)9CT^Dox|aaEWVd$!vkRYg3(jtu&ThojuyS zhWA`*_a2;MFaF)rGffzs(K^$i)QouvR6-tdfNg@Y`?Ma2AzXLLy7>0Ay41}l6$u@3 z+YGm|+Kdudv+qqAp+HB0mZ|6RR~#|Dg{X-JAZMK@ekhExb!?jw&gL1?RRtF^szs^M zjKPPi(7EasnQ%<56N1GCt@maos)#`ek&EjtvE%YH=9>(gUwCTxb~LvPe@~ieaW)Y` z#`#dlPf?K;G!rY_UiPk~09=~|4F`(>phcop`DwYiZ<Pt6=b09%Dn(MAqDuEfw7?JwvoxNS4gJ(&GQEMo*az@fvWw9CGWiQ zycg2k_7l9SsYJ{d7y36uk)Hu|P`F<#AEf^I7Rg(LtCLN#`kSv|Y-6!M3X1D#2ST^% z!vV~zXNwO|X|&}SoIuDqeDb9E_peHr93rg-w=FXF>l>h& zCk;%a(G$^~7Xx}M+j_$CZ&!RMCq&r5nx=qa8CBxu0E%x3h#AK4$aOu%G-lY|gyFjp zTNvx5*Zp-`jA<;|pDt zt)X%^Ifj+p(@v#SW3~drtt35wTwk8xNHtw~m%Y$oDu}J0B6X|>_a9n@P-_c9|NiKN zJ$}8IfAxVYK3u{)?@z`G9-?v}6-*c;%|HEFTvp!OtS!roB^KBWg2Ut~5W;{3aEXu3Jm_DcheV+j z;WI}({ZcrGzD?S+Hr(0p2fKeR{D;3ta{sW#45iXNCdvQ63K%*eu1RkXM`eP4%l~7D zF9^H`|JSpbaZAI)g8uhKE~x+@&+AP4Zx`L)ai6IJck_R}^#3ka{J96AR-lIebSE?O z*S{LZGD0t7*XDyJ-jG|NR8?AJC@%7Z2wDBc>){*SC=SXAY7{1WMT}%E^*8gh!LB zrJtj485h6;ZEBbPS$)s0nNfX1 zRfbq2r4DtYoZ2DbV8SN_*k2A|qoH3QAId#u=(2Z63oHYrPoa*!qsF4aZqX=ys1j<# zar1)vkoY1hibhYQJdmgg%?F15m5G;3{FGqNc_tR1oE53~O)*>CERc^%gcL8%zSqjd zreJ(Vqghmg<^!3m({1I@%;ZKhk6eP#;)TuEZunykts3zxSGY)@@GvxzAQfZDB0UU*TV|==ed?v`-#=^ za(mnqNL1)+t;y}^&g{ChPMj^5h*DS1dEF9=LCHZ5^FKqp#00K|jevM@o!xoO!}4tL zyz-lU#nm#`Wb?T$=B9H|cVZ~%O0z8Snx#Xr&#Ote`h2?hIeYcha@6-i zdsoOBh=rV8ayn#3MVImy>%3yLX?OS?*`mee`27tjl{=V9PEklZRhAvLVATD8I|4t9y7P0b@b_91{QiJ z)zI?IS+qgBZ9(E4WO7mD2YRl$aUbWt^{!wusLb+iPz+f(I%DFUOy1Mwk%NU9#XJkw z?LkNSwXI3T&zlT-iNDD(2x6?!8H@BR4Vr>t9IAd=r&aQX`c+GIig(BWbydaQ#?KTC zEKRvuoY=0vg(EE(?dM~{pafS2y@bt&WpojIvy~#AhxOYkmnVxv3%9*0#i3>(cWcSs z?J}?!GL9U##CQ`lGv=Gi6vY<$8p)m ztxy!5>)1Us*I#kb4MdijJ{%EaI8+2$(%743Z>mdX*H0rU$Qt@0$Gv(|HHc+y zb(y5*$~^2{>bAM2X;ZFbATay9#yEnD;$g)}4B)#vp{;WQnr{@Og&sb4WQFE-W`e~~ zOvb$+6KoV6ifdD%sLbpRR+BC%8f*qKsp;69On-I?ST@myio2sg$-q&5Z=3qX%Itwx38u|XN!1D1tnH35+xss^ zwxuIYD21eDF>wvUropYzEb~GfRi9SfHl0L1Bjxrl3RNa&=McTxbV{}0{prbBVp)He z7?&-^Rx(!OW2GgWdT7?+nozSZD=##r1(#Q{S z!32D7SgndI?u*vrRhRLVdY!;(t0e=Z#Eh7()6onLR|&P!2mdj5w;)VS3v^wNqSk)! znb5Eubb&?qEKB|U}>+IK?Aa($({`ke(vebxL?1rASULkgCiC)2o}rNnhW!R*<#~IIW>>&}0-vw?#aC z?*!5s2jl)!UG>V*vRB)ascQ(9t9duWjWgkFMUkLtyW3m;y;5;zGh?(U^31fM@Up@0 z)43-4zx`^7382(n^+CFQ+{lyPZY4w2v&=N@W%kmlv_)i}0yS}hZBMzHQH0fOx$KqH zAh9fWIcdpQ^3lplmnn1ZgQ1TF@q0`bus^@7AUz=V*WU)601!WASAT{YlK-vBG9kJl zI2b38&5#D17~7%O|ARG=bl~o}ED5Nec|d2U*Wvzh#08DeFfY&lQmX@>OncVn?s%NvUvGpEi5CKZaOm9=G`R)# z(!9r$n7G-s+sm|&yPvSJmr*t+Au@h{u6FL|Sfr&<<4_-m^kcVOCd^g&x_c!02}_|g z3wPx~E`}uMYjrKgn>B5-V@(Gd&FX+IyFFqNWdB;P7m2&v_pS@Q+M|S{a#i_(H~fMW z5$M|V)OI`HrJlj|>w#05M$9OjIqds=6%dL1u4cGgGFRg_kN z)}ZFOyYQLTal;wfgGtIE@b&DjgQyZ?dwF(nK}se4f3h9mU9ZvexPeNKku|_V%~Ke} zr5jAxr@3rCi&;5|!{ezFY4Z&5SjB5xiN+z*sMHi;B01mwE-?muCw%X(c1zQNY-g+;7^nsBM6PxHNQG9ZUuK}|HhdAYVS$^J%gK*ZeuaVwDqOvr z_SdP~8;zYo@ICR!!`pYpOKxM?KlSi5f16&*9{$kEOyPi@#3in7=9##oEWdCKHOqUxwd zzvq@H{U#b>Wvf?x={!ID14{bbxwhIgap@q3qu5I4N)%2iS$RNR@oK`K%DJ{Uz2Ud+PQ5?iTB3jygTNl6_RJLf38viXvD&!+7j zEP2{X%6pzYx6$q#gvmfVlI1C_n}#@G#BL(HR8pZxIXPThny7pGljAVV+Gdtwq_)8t zn)Svxpz%PmcU@?D1Vpxiu2VJdG*(E+-COZnlqV6~GSY7V4(v`bkT)jqsUD@!67p1Vp7| zVwcg(ezHrkHohRC{Uy0*I~lY?R8aCO=j7YC0_#{DGrlD_8s&a0v1#L~j`Fp3rK}r` z-!;o5(i|lvqyRf3ddjz0NYT{5%2tgbZ@!N`AwjuKwp%I3286@`6cn)>CQm%jUFE*8 zyXK}=32rJ!5nP-*(m`V|8U^=u%qZtK>C3?_+eE?fsm)|h6F?N#bpm>ppP`60Y;3`? z*u$#*CE2xOQ$_sDJY*zP3$;gp|Yj9Juk`0+GQV@06Rq?1HeEt2XkJ+b(mU~Vmz{?bm}rg?vFfm@1+iP{PyBx2yj!;jFXgo5>C=Rz zy+3L_bai{etCi(U7kP>(UQNVO6rXKT;A8_L)-y{h^1_cy8e+9jN}Q0gIvlgtT+sSM z<)@bXlUE#ep=Wh+#6X8{LC;5u5c`{mroF`W>hvQL_!h>KOsBx8ecbc?_s^ub84s36 ziTiBy?jnCTzAn0>DB zCQq{@KY1QnbwX?xj7ej1hpzVoz0D{>U{&O+DAcGbdxEY)iyl)`?0W%Mg>FI&ir;NL zyrfv>Xk>UCJ}ssuHFezefqg-O$tF5U5!)xUQS82#loxM|>0g=Og^gB5ZO7P2p)t(% zJ2Fk+0P7(iz;Q(v(+ezenNc&uO6=AdRnNvA^JFO%IRv1XR~;|Hi*JoG+E!Vy%s1> z#PG_8ugxgTqv$Q#zdcc*6yfe};svd<%>5moo`L)i=|l9ky6D*1qTk!U1BTK{i_Rve#Sr=R4BH6=5fPEB`3{rhFIwi{ z5BPOeYlNy6)7e-_^Jk^5*|2HN2X&*Qd>d8QtKU;~Qj|CaYZ7**%O|h)m(uc%jh1Ab z5pIbUpHZfC4gJeM;Xk)Vu6-z9>Ee&VT*r%Zw!K4}<^ZKt5{`&$rmGKApr9 z$g%0wMbGaD@KbE^FJQVXHoIP3O;w7rCH?TSpPSxJVg+ho%BG&MTG6_=Sf@z=+L_W} zntMaHUL5^tbnSG1-vmwes?gO0vGB^d3QKZJLlb1_YC^U03Dqa&xWC_*GNyolMyFW* zS|SQ@(M@RQi2x_%h1MDpC4V!K`@d3z7fygCJ_Fi-v=jx#385AS!}T4{rz*x`Pb*0B zwXj9Ol~(hd8A22^>5I&~Cr4o;M~QX$1nPo-Sq+Bl%(;1$WFq(U_w86_3zj6aXeoi` zH~c+VvYckK>a@xQBvwq;bY33UHT0L@JTbzssF6q$WLw3s=bt83Q%<_&KKN(Q+uY9O zE6JFR-V%Uu_D!kNF?I>2%uIUN^5l+rGUm0Fax_CNf|`+~N#p6MJ0dpW8nt`!VQh?VF}sA`lnrB}#y|Kbp~kvTl*B)mWp zPv0${27W9@k~~3hWFy)(r|c-glKp0kt(FF)11qO6{;DA^XWr9bNhCZTYPclEg}oz*L;^ zvew$Eg%IW6xBa4^b(x}zFW^H1b1!bSnZ^n2pxF0bXonHs z-w>^@icTQxutd6jaf0EiJTE;ipIjcb^I-FePdSAs9)%$MTG94#7je%R=W*?%T7{1l zYOlc8+}E14gkaql%4Qx-L^vDmlpY=di@%I|?fQ#RGh{^JcfDyxpO;K(5&qSRJ$-UP z!}czR9|HLrLI^ZGWp^|*gJYOzm#bvDzadBvet8TTsii1d>a8!Bu z53W2E8?)A#e%c5yzmx0e?2%R4>qIQB2|fZWhL>BCT+_>~B|Cvm(AsN+pDXdNM7tlzpRAra0u?_=O`4 z#ATm&`A&KmnaHQ>_PN=g&hxK!9V!)SIOsL*uCY_)Yt{tKUEUCy9r?15^|cx8<69$l zUJu29M_L=ig7F>J=GvS_FzA|N6jMg_oiHuW!P(~y?y8M*W4B8Qso6pV_XEUhP-p(a z@~cuROZx%$gz=+jQ4&D}b6q`gxt*XGxMOXGP#z46+gS_gUET3Qa}{Qcqp?jb9Vgjo zx|tx{syCXn&U7}EQ-Kj2CYKReN>cVqi|vZvM2$Qb-YIBbFK!+eAvv9x@aQ`DOoF#; zHNaF$W2&;BrAlPEV{tXf$3jMfBeHiByHC&cs6bj{J!K9Kq;ai;Lvxff^Pro|eeS-} zDNY@m9nu;>kAi6Gm?H@WupV=>DR=yf&ZPC!e|#`o`1*U#R0(4Y19U(}?q_WGcX9q4 zpHFkpvu z2V!Uh3aNuh_V4~vzkMj%kZb@DgYO-hgYUoE*Pp_x&>)~F^t+M9^gmh5pF#egT-xUe z+QrIH0*sgqAQNLzuor& zTKbwlU&GfO6X3X9lrP(xHh{@l-mr8H5f)MC~nz}dY zDaS%svab6p_hJ8zgxypQ!e#m(AE)iI{I%R-l5zd>XpVdw0Ezd8;Id@_WN|qG{AWfI z`nORi=pO8vBEi^^;$DoAPB=9^gJ#HMEP;VNtXrXrSt`L>2wTl{(@PiuVS$l2x}G|2 z3$~$iwpOTso&!>I(rVb_9d!%)c$+7TVN0Qq_eIl?J*Z+d0$pH=m7hKglQ^t)M{;+6 z&qK~{wb8qIgLAfHuB)?pUSfi&>_QO{1cbQ-+ zw`!NpQ_YD?c0N2+taMRN+_uraOe@r7myk;dh^!wNjIH3+)l9!*B?gnKn#f;DEZkj# zXq0P9-8rr-xSbB-#iGL{4aNPsEA`Y8Sxma1W-GMz2MX-uo6mLx(HR`#tYYbSIpcnQ zr@8kqKTIFTI-|xk>Qe$rFiQvs~6s;7$;ibJa6Xr@+Kh#drIWqP8>%o7ysc zVW$!Yh`Hs+rIedbeG^5@$p(1yRn@2jZ%MSOE$xncU01{$qZ?iJIgXZtQKc+pE0cL} z$gKp}{53n&{rp^;qKvYXp4x08+#yT2z)kU+CIZG63F+YD#6G>na!Vn% zq5D7y#uzTW=3zHbUHV724rF&vqfKeJo|DH?dd0O`Y3nXi?ovf&Q|EBR>6gq~{RowpDwY#fBC5^*< zy`wAC;H<@nNc$Cj`S{~oGVy`SY|kq{CB`W(&)Xa4x>7Z+b6SvSNmr+1RfCT?W8qXMM+vKQZ{_QJ*0V(0qkt}9K(T|{I- zJG$WSNh3>no4K`gSf!v{ss1NoRz2s54zx z5AKa7aO%KBW)yd+PpYCzLOJZXA^&%X!CStCwx1x)VZ58g2ET?KUh*}+Qw9Jp`?paZi)cC)NtsNF*AC6)Ka-LQw0F6;&eP6JHI9lQUZYjRNx`iiT3RP3Ef-OJiB!+(kTU>b z#p>&=Gg z4}zJROMA%^t>Xa|r>ZKCHUu0=fECz8gfE+bl|K=65KvyutP-c()aB)MKaBj0PPHUu zO^eaGzQa6j`jU+@hufqrgogDb`(@PYRfyb;c@e`j#lY<>E)detw|HFo-$pvayJH6g z%SWfS3r15t)ux@9KfSzmlicqTh*h$a-T1|pO-#t_wM>e8w^~tdxXvh;kEJ5~xKHfE z!&1g8NBO`ZYO(^o69r=hYVzij#SAiWi4;t8XBxzxFZ`?DEePDGT-vU_nl7`ea>$t- zz>6Yoyhxw3q+A~Ead4O7n&%a1cE6Px2GQ(RIvi`_-uE|+EN57VQ=KDCRDzF*H2A$j z9h=Hz>T9Oft_D>T__fZnwfqR58Id!)_lGO8lt;hV8tSSz?|X}b0<*}S=bau>;f{-zt9 zE|B3HXBEvjUR7SCd6mT&Wq-#XtuM$hfSaR}*wTLbdhO&w}em&N)zJsE1YshobFu;5MIG3H=V z0`;kkoPg6|bn1vcU&YUIUGc`rbo-;1#WZqD(-rIS=k25cFm6}wQ&`#A3D+X>c38g= zl)Pav)1M&RVRTcS#zC^ny)Mh$ZZ%If=1$iV?Cop{*-1iNNX;nuS&om6K0l*A9wBWk z1U|uWCWC%UxsMoNMt9T(b=27OH4l={*Whb-Ju`t0sO>j?TyvU9xObI#+On!YQL*>r_ zQ#RVu(#<SG% z7~p4g>srBN)~0B-!AYrbuDURP=2=j>Utbd1E8?Tt)#fMj4vX5!k$V6)jMr4fh&dqz z`B)uU;lXsR+nCRuKktgh)oX81zb74fm@lp>OV`E$AM_b|gKAA5)k4(Cw?-*BF3Sf>L0 zOHUA}g#qbCw_)%*3&L!?gUgn0@vZ^$e}ef$d= zA6{o)=Ck0%E;E+Dd~gzF+K5pl=SH*a_gXdW_rDoJZrq>jAY;3a#sU`_GB#o<2XjB(cP1#dF4vYxJEgt_omcIepqdF$vUNV3%_zi z=Hw)V;OF`0%wk@?ua|n!IBMJz^TXFa{FhTJ zVF)2Nyj4i5NljzJGtC_;|PSzGkz-lS>hZ3srQXew+h|Gs0`x z-^E-*?;f9pq#6n3ZyJ+0B@JZ|9F~rC&I*i6&71d zs(*Jbb*N0~)gRMEOg>!il=sJW=AUOafRxPdC7dd-`nza0v4EFO_RlcAX!^`~$>0t; z=%15Nkob_1I)3D_l{w$PK^ad}c|1&k#2l|YGXJaU(fZl&fckgYK^%p8qvO}?t%2Uh z!>sVHWocIg4ryurIASJjM-a9!#Lvc}@~n{a(eaF7GS?IO`(l==9$qiN6~?F)#w%Fg zy&!y|rG?L-8V-7N>aWj%0WVNi*Z#PLHxB|&wT&;44sYv^FQEZruKpTy50>b~0VaO~ z?#6qxLlAsn(ecW$a9W0HXpOQmo$90ak5RYkD~3A9{Jpd|ay0E`P#Ky3_nu+l$jPDL z>_2yIg+$F6KiA;QWHOYBf{KcIb22JFQ*Tjd6^2Zo)`YC(dC9Aipo&nqw50ihBeCf# z>eEW!sv=PS?VKd~TbETHr`?7Su<*wNTROjDLW}fdL_zrDbzfKwv`=H{L{CoukAoVk zzFcSd>;Kv~3P#MhxVV9Us{Rr{*MEkBG5}&vRnUPw9QvhDXHD_q-ZMbE?8_H1zR-|1 zoqOxDzPP7AwkI#x09urHJQMd86fPlbMqbnN`zgt!ieyj8< z3e0o5=a{sr)RzXyJIk6ASmStV;^9Xgwx=f?%K17ht}UIDPCIp9pao$oH)T@Po0{qq z8xvA)v>Fp=a1w>@X3rb-jq`H#nDAOM3-6N9OPREvW%{GOyR;{=*+O^GeOy_9;uYqp znGKME0OI%hv2+F5aC(g{KF^yGZFd_>w2R79s}unKt8(U@4!6S#@#LUP?;=7u;u#N)iY3lz7%L1(|N`&XG)d0>18=(xv6qpLHTXyAa%n z$&7m%>(p2ZhFdE_RCP%5c+=HtW?pmH@iXewg>R|IY(#O~dj8(hiI=<&&yS_EURrYI ze|8uc=u*&F&?Z2ctCw!yarBzQ^3~1`a(+vB;hR6soU17GVI?%UIr2fzS`SbxR$4=0 z$yQ|ffELfO0e8b`0dJ6*d9Y)~eizS6#D-WY?kE=D9B?W)^nKCm#+!z?$yY0G2$snk zBslQnOKRvV;2T0c))^-s&(5dGSWLy7zxWG8gx@=~IrbAwy)0AI&s$sDJ8f_|Vt!$* z()%f)Eq(7jUaaW;9_YnLW=2|%nG*s^OCfW2VdR%!uDzA^K$(%EqVs4$k~4?lRH26R z#f~v4LtIIdl?kdJ@5q*2Ejgu^zCErKIxyn=2>^AvF5z2Ik(JYS@zjdiJu}v!-4Kgu z3Ti(?)Kl{f&QC#@FPz$gE+Id?dSN~0&#bkBT)k4>vBX8$YYr{5G5_+dDfL(j-(Yb} zOs@7CWxXI=WLo1zu)yc#E>gNbjHg40(~29Ai1X3wHZ#AYI+7x1Osl0;JM0J+6ZwQ=KR6-kwgtMQ)dE?ktMR z(~{n0w$itMypUN{=tEmpS&X7XbIx$&)Zu!DvQJiSNnQf3TX^!je&y=({# z8~tS*>tL>W!DoRKy3OZ&Ua8(RY&8almju5~C%8d!)boX~9gZh=j7n!|frKsi z_-Ey$doc;WHs{4yWn)T8a*|(C&kQoYD5|t?N2@R!qxliylXe~t&@@@%& zx|KP;gm7UUJ*GE3hj};>&_;L9?Gw7_8B0mUHBPJ0V$JYk>z?IEMxL_HNQdM~cVvWS(PfqH~4xB0ctD~6e`F3t(W=6C|9BA%=|JH~w; zsHdKGBbV)08k)zwXNUEf{VnY(-{V?D$gfbkHZ^?BFZ`J1-d{F50Nvijr}sVMKjtF3Dwe2X znf>^2oq9hQ>8z`+PH_RTr)5%nA%acou-h3fKd2Gm@>~J62kYU{F`-EkhBlIuIj^Dk z7%)Qh4b@&#jj&5$sX4o!Q*x6y+A>(3bZANq7>GkO(wdS`!Vc#f5De=l#6{d2+XNUW z0`8)ViTp-+KJ}1T>8#(@s$HSOs#g>e!}V0Y#kD*)*Li*|r`;Yii&}cyjd0?bHgL?E zt|)GgV7Aa*r}y5JB*E9YBKbU=dch!~`3J4&-50HO+@rH9EV3wAsXTG1GEKq{W=r-{ z;OgjeafIZKP!Qy_OGit9o+ct%bLH*W9{oClHuP(w;L(}c+#It?S{&V<74yIzh^Lv( zdmO+Wqut?M9>I&vc!9tdT~8))s0ZHExlGueL_J+b^IbPG+2r}esydhi&%y+?t^*$< zW4|Z@C9%A7rJESk3qQM^&8^zf$e&Cmb}}59 zniv_vORBN()&dI;W#h(8`)%+}x-M+l6i!IF_UbDtLY!lM;&30L&x9$^y zw%IrP(+{l_t_0rSV-mmVC*TD{roIcJiZHNqYvAG&)I-dnZ@=~Z(RN}t9`$iD} zdFK_{?2H>ev>!2=jPLP4gGNrB;M!JhKyd(u`qZus@+B`=xiZ-PiN1?<&=+;PK zl@HO$8agFM!vl`!izMJQ2y-1TwX2Mt1bUu$Yo>N?lf3E^0ma)in>)Y>=@#N+yagvHf1& zU`|Y2V(H;fsJ71J72cJnX$yFN$HL#PX=fxJE79VO{hB9!6qofY(D<9Z&Gxy!j#MO< zc*4~S?Z<2n4#qQsh2$eF5cT*ZV@)|O@yY4FXsvF?v9%LD5+dOSW5>qM!cr=-?eFRE zW*AhIibgzcyw>PH4{URm$)mAo6t-0%g=~#e-rtX(KPdU08mwoq2>8m=X zlTY1yw7h$x4KLS4U#rb67k4{ik)d0%^(rSu)Pf9Xm27YQLmt4a>D`t!tlTdNWG*BY zuyHm|$=Lbuz+Sk186!e`+R;q<78bsdX{cY+Vx)UZLcUw*o2xvrPRU-`it;%MD5lDp z@G*`=cfod(&;#V~^vKfK-=+Ypo?gd9KZ`y;(sx@pt_JoD?T!4+kw^CKBK$;Z4;gi% zn0MQ%jBPDo;B+CmXR;S>&4NBBQwJM2y27BIv%Y3CU=G5jIr`1yr$+J|)8-6Uw!bKg z#O@h-s`xgX6dOKR`R0TRvtU7vX^r+hlYv1h@no!5*bh^dt46@!9d|OTR7qF;kEZsC zLuQV5r$Kf86Qn;uh(cH}TYyh|5=7z-CKWus0SL$RvA@84t30 ze9}sV{8hTy%F`d+?XZ;DCe#XzQ8#Ym#)?VVNSH*PArdpUpd^%EeO%-;X5{20D_ z(=RC$=Cbd0DMZugM3Fw`G%(0nKs#WFtFY3#caabdeZtCYZU*O8rmNq?`G;yv89x^-#+>N=Qz=JUD*^lsGv z`wfuehrLR8rQVS9iPGMJcl#m1Ka&@lFt<@kf8vL7`Qld|InLuWpCGVeGxNmn4A)t7 zLT%+~VbEN(z)n&zouMZPVn3~ByZLTYWI0~F@^PNTe)DuwmKpPvursQ&CT?$5+V@u8 z9`Tc40ycwZ1cX6Q)Q-Qm{siIVp;mB3Rx9$Drz?E(ew}w(5q{nJ{j;8J-oaV9^q=`3 z-~qSU{8SlR>TjaW4EY^`=^#-Z{X1gy@870-9|Ea-XpMo~p9w1@2n~~-DOOCh9oRZ6 zX$+_nKHf*`HjwCEUWK|o{qws!6V}1xy=dF#YS)u37mdHxZe-Aps$mLO`&%(`@qm3n zu9wCj^Z2k&9}?<&eiM{Gx%pq)1Kzz_3xrk#RK`AkZT`=2V@yEOe0v_xk@Y7#{&$!< zVg;D6Lvl32Ul09La{?}>(DKAr>U7a?BI5BGKU4r7MiB~oZfhR#Fn}~pfD1t&nCr8? ziFMGgJ;u`NDpj`_m)rk2aJL;y2Lc2D;5uijES6VR;2azrTwDPkD*Z?e$kKQUYL{aos@J*5t7xI2${73WQvg5L&Kw3*_?p1|&=Gsh=9CJ2tZ4SpW?`4vu@ zH!YUVf!+R9DO3N_HJ0Pt!`tv^1LZgAx%x+Qbq=2<3UP@9Jqx=Q%Z;CV-W*EoEHvSB zn2-I;arg?GfTk`0#;U%#;qvnkga>Jkf3Aaa|4Qvx9totIs@2EHcDhG*sP;ghvnc4t zESPEh7M}T`;Ls9)M8g;f^ZQP3EVecJukD!J&%%8RWZsNVq+mT3W&TV<5?w(#t>-}( z?n?I5?>f5znX# zI1heg8Lu4fY_eJ2g0y0L)mDU8a;!ym6QO07LogfTMb$SmrN7w5@-&_vWACUK#%NIe zyLn(eXNL`PWyQt7@C7svy5sIoC-t^{B>=^}fL%bCVj1Z5rv8v2mPniLaNH4$Q@aFy zGC6qNi$%Y!@m$56%je)HgW4d0C|b@hYJCg;QGX|rr7HwXCMBX%EvL_4RXlY#ZyWBv zLTVkio~3q(wHMiglx;2;0loN~#gd7bafivJqE3|$WvZ8s1=OE=U&761C@YJB&U)&>1A?P@DCg_$l5M8h^}>FhIQUU_P_4wsv*SEW~8rA z@*rWSH38rdHd`ATxq7HCUrvZI<6>a>xEi{xd{B5|&S4eoPI-CIHFsEA!Q>a|pq3%j zmPP5e?UhP_7>*-PATsVY@X^5B?;$atFkUq5Ety@rfn{Q0L6d30MM-PY`k4;GQD3WR zOM9NZlBV*U<*Gptnw^4h(rK<;=fVAozst$XY+Msu^V!8t3cc3eJn=-__gm-mv9m(K zJF|8|Jo#yd8{{82LNswQHJ|M^Dg~{rQwH@}DPBzedpi|EybV70AY;+$1JaUt`W^li z59)+w2F$7UYrW8!l8Ml9L0NJ4Ifs4?Dijf;$P=;5*>swCwhVmL{Yk0ZJ83>w z{8!5TAqe84*>Q0WdtyO-nvrP-%^J&4R$9%=Oi$rp9xUd3=i}&xTDku$444$@@l+5R z6@V)e3b07pCb{xjmb!?&Lbx4%XZ2^yoC4W3)aPTNLVaVLFrm1YWZw;ftzawhUJT@2 zH3(FoA2IaJg$DjgFM3oQGL7hLP0GXiqWQWyt2Ze=!aIQKjlFF?*dT${7nu_oyE^9j z>$@-S(sO0du*RsJ3-ql`KJ%)XNWF*ZE#(fqKfy1DowVWmQJ&W1a4v%BSS@c__&F~R z3EN{>R9J05X1rcVu;5ga%EjDxBWH zziqN^cwE3oHD#0EvhY3);^Abahp%?Rg!rqZrp@mMuM|d4_Yyi4F_!NvrHlQv+IrqjVw9Ax77O z{4p;n`P+$a7QwGh)!4(wZ2=s}(F^YU%bv}X+ZH9{>q5pLWJ=(T2nM8U$W)Q7f8-Vc z3UtD~Z7$_O=~*kuGBM&CzTl&DZW;+|ED-SB)7BM#VO!c|YsZ+Nw{^X6zV2@={XB(; zGEr5fI%!X7mw#rH<(_-azp$uu9mY=PcO+7&85(2`S9MC}!w9ItmtRi8_46QGJfeNW z5jp4|@ss)~#LSbVnyB7eqh^a|-u z0Bx3MEE|J>zW8ZLKtKSW`zg1BS%7((!hr#6=J%1BkQLVfuE zRQJ{oRc&3=urx?ZhlEJCbOEz?fcHfL5>!ym>e41 zR$AbQ+JR+r13e&aZ*SfS_3&~Wa(l3&QH*Z!?!OPl`#EOY;YV;WVm_)scT<7d)Yo8= z@X_HPJO@1H0>Y5N2o%J8?o@Cg9emu}_<+$x7%2a*&@2Sh6lMgOuJx?8bX)0$bUnY9 zyxxJ8uoGiQWg~3PdgI)+%$1M%^kh^lfHzZNggLkyrE0_R zJ;lahToPS+{>Ukw z5waQF+}n)7Q$${*r$Pa;BAEuQ<)*fUc6L8Fz$;3#2zGD-QP+Xrdcl;Pf*qu(5{_3* zWZWos0x4L-`o&#Oczi{QzG)&Gb#x9w#Ks!I*8xrb;5d3djT{)2D>)F=yc@=wM|s)} z81G41M<;EE4pFwQbczA}=r{fbAc%|3!=}{}`W%@8Jap1fdIXu|a3~UW-kcqwSt@DV z3l}%~eqCN%G1V3bH>o+!b`R{g;f^Hg1D_~2b$u^k9z7+?;FYA^jaFA`U9^#N3!DPa zwb$)>p@xzZE`v6ZSnRrQIeM$z)^-%&H7AjTwpLACLVMj6_W;=t&`J51C$6| zm&KXB5s(o9pf3Zx7H{={_30Cng5DXj2A+7g#UCU~GWJ_e9JF82c>POFtkM8WO=vU1~Jx=iP9{)Qm|1+6~FL6V#EIj?6`GhM%YTh4p`UaqHZGs(TuzGEE zURWP~clbH$ga<&2L4fImebCYa%}&dFmM zr)J1*SkTSI(y456y8@8qkxbQWT4lF4^ZWi&<~LkyeWS&sJ~ztdm#;t;nji$DF4ZW|z90)UV%|_ycUEx8!!kb(20h z=OJtj{XMaz_UnM#N%q4$1&oLF$gKyIe{9pq>GOeyad{PX9{l}vfJc+=+XULdhN2rIZR1PeP~`8RlIuq z>UaxF-a>`ZIpTx>0_89?;5q0&3L2LMxFc9g&O$LCEo`**)i1J3oeN2-r^YR-UI>)u zR^21dcbTrH_NcT5)E8?HO8KpO?q67HaffKe4UtgLyp`d1GsIDG=YY^&~ID{CVVH$1W7NRcBu`M|Qc-9NlEJj8H52 zCsJQ%uqQ?C4Ay{ESs20$4iFsRAFs2}ZT`IC$y3g(T0x1)p~@LH)YpH(cfCc%6@Ger z+yPYgW`3x=UBl~q_ymJON^YB8qE%zqzyKZZoD*O$1}oZ1u-tw$X)10{KsT!H4cc;P za5HU~67u1fAypT|jVe^Ds{)i64vgo$Sr(yKVthPcaw0clHKW&UFQ(nhA~{0z zEv(;iLyIwE?Np^dwhq(sI#1=%nwurV3t?oTxa15x2M_mff)0An>JGU&iKGP ze3>VnqapWAf>gSI4(8VES8n`qJC3?4-B`jmezr2!wA$l(#26Kf;jhBoF`xNWZXP^n zFLkb9pWAiU^WpR_&oEC6N0*j}-@JKKu>({%4x^Ax{JtB2^1K7E8C&nqbG;ATY*SvW zbAAd$o+O}NGQuS=j@ANg-%=Ac4d_V#DErLV8JobDCBP}e42=C%w-yj+ zOqG5rgg{-l+H3A>$ZI+;B7&)4Ah|QT`Z@Cw=f^Ub2$$KozG<;c8dd7xf}sne2P)}a zAdOj9(npUO9P|{+p@TF3EHt4WCb8-ur2QaP^vB+=ZKX;?uhW4MuZy$p5Wn8++bgT= zX(l$7p2(R@l8;J9G>Tsfk^}DL1gIg0(&V{x&PKD*o3({e4>Xv)QU-d&d#rb3NR~fX_tJ9CW4z6A z(5xp=N6_DKI$1Ch%{Tkmi8>i{GFB=ogPW?X&VR^odN_*~oKD;)`G2NF?KfqXU%70( z`>@8J4APv<9W|dxE|2FbOTkR{&*hfcBM4xB{${3NpGu_(H<)v#v(gjmr>>LS-o%g! z_-@7ly@k@spzrdcQaLmcRYuC)XT@%Eb)g;6`e?%glTOc%c#5#AQP8t1lD_9~E+^Bh zgl|{G#{v;rr!|i;b9U}vYpPoUh}YLY9H}ptjQ?a?S7`VcM<}*48q6IRt-6}ZAGhQS zdF#+v<~brIb!5rViUMwyNb>|Ym3pSoV#@a-D}Y<~5ltuCHHiq@>Xp<_(<%fkcN81ddGZgwFQLieIrQUg&uJW;%0tIAkdyKe zQKDMLB_RY=VVzL)Hf|K1ycvGSa=5ao3rb?^(bIqNI^o7Mc%MnS=|HQ6V6c&6Y)aX3 zr-6G$rq!3WLO{%f)&GmS?|XhX!P^(>ku7byH`x@<_sMOok|OU$y7qA|MtG+0THlx` zEkN-XOI|Bbg2@sjAb{0oWZuxI{J<3uKM)NzZ?dj<`0P=18K~h+@#40Qdl^)fyLPIj2ynXNC^?_-YSktnb zKr7>WJ`a$jN^-Vmq_`v>-K-r{tjcVH;Vn7ce6|3@NKti9PiwDs%x$P_Y@*t+@s-1p z2=n3cY)J0m;1?9wT-W}>Pp9O{d@ii3`obTO&I{?^MCjIY@$eV`B>=Z(*-SYXfhLj} zKvl>eY%a=k(ywaxK-*adEycFyq8Cuv?ZKdRK9b?I?ml0Hz~>oD;?1$*CyUSvEwJVU zbM~Tz7-Z8yPfk4bY*|PQN(FZFKa9dvKGhOzEkVbH!^#{pECa81YqjLbOlb;$3w~~16J-?T#RAw>y%w37o+0iNu-f01k!d7_&wXcqZSxMpZe{r!d;wh zYkd{a!HU0F)yaR4UuKlRnvD$At5q*8{1b6`a|xA91E2B5p6#`T-YD+N_mMZ&xr3QG zAB6c5s%mR{qN&BDnY6XEn7r=XUC@Y_#boPk6K)JLa+1^w)ivu63;-U>dVgu*y5*(L zS)#bK_Qf04u(JqQ!g!#OVErt*0tRu?$8)@AffR2Wj0EtE6xu`UjBON_zC(#cdt#tk zvJRw+FlO;tfm?ne_TR?B-c-d~(~@9fyji7zhAe|@E2X(Bs@S{a%xTmEPB4P&R0={)>!pE%UEHUL6B;J`i+f|_j|9ZJd$ zLwT)&T=B(@8ub%z9qmP>6pwA;mgd#VwH8+D zs-zvPFiRf)w@2O@%~Hdn&q3cr3g$1o5A~fyF*MzrHC(*&xGxAkJMLh^?dDm{*zi-7|j=`3*2vNG|CYfL#rA)Rec?HsZs;j z%(7R8YQKZUu4T2U6rIJ#+;5NepaYl7Jqw3iiO{JwYHR%B)9Y2n*stx&Z7rr?r+MlG zIIgt|QfTI;sfX!jUUq7*?Tz9#s_egGx=IU&e>R4QDJnfrB8yz1XIYCxUtO0e9#S#w z`W^LvLkxVg9dz1dyE!7cHK)TvCU1UpS2@H*`6KUx?6%~n<>)EV)LNS?>jx~asV;9D z>Nw)NbIJOA-Lw|NT2JlT7deC1h9LJ-T<+&ScjB4ff|=nJWOB3n`!5C%2a-~56Ua_P zPv8xph3xt-Df#iEVq9$1+9tBmYKrjS(SIx?|n-3Jg%G z6ex)^EX~akOcbgHn1FV6cWHA&2)UlYAY!|J*c>*8`P>q!Pt;rWd5fd|Xv!*!qzw8g zHp7bp>_L0L@bmIs9USF}sah_03vnL1v97^uUdI}_@ZLkdwluZpV3bhPLu4@am})~* z$P1jHju4q+R<~w{C*8=}dss%1g5U45zl1Wxgz6+mLgvG~c%Zj_j-l4DUdJb?G{ACt zr%xS>t>V?=?vY*p)`t)cO*b)^56p(&N?BVWP9SD^fY6p_p4yQk34xYQ@sS}WG*`q{3kC7GiG)#k5-w1MyS$Y zXw{;TCl}dV6%?lyfrFAQV^hr{tCI>R<9g1MA97?dXj2`M`lPia<1Qp(7_Q|VxA`G3 z+}OXBohvi1HTT6I%iFoLZV<4#x4IQHnaxi(n=ux!A4hJ+4CHwVYX`UO?$J!2Y_@!< zU`m0+dm8On-gBqx|8C=eN510h<@ibJ=i}!sLP;X2TW30ZNMWlee+~~ju(-Z} zxMy>jpv+P;9l63_-JMpoCXXLWww5*i28qd6(2%? z+Db5pru#GUui?BMfpN?>bt-cnC-i$HTdB6K5iQkH-6R9biMMZntcWVPLBf24wXb!A z)S|S0MGJ~bw912yuT_`Qm21$||LU>d1=_asDnvI+@G#rgsvut+)|~&0WKWpeh7U}i ztBG*oc8iZr^@eY1tbrG|!WkO{K5h))DIP5|tm|##5yanrq=-nMAlU5&pZNFwXKGa8 zTCa@qP?+E@Zh{xLo43J&t&EDc0(m4 zOrWjiC!k5Xnwpv((D8M~X1N?6`_h`m2&rOiP4ACCxYz*)HQPQr$qm>epNzH4 z`fqI+!(u*!kzj61z-a)?A(}RPsq(WeqfWaj!^cY?^#xN4>b*__yvbJT%|+;by;TX1 zf#XN#ZrCUwt+)r!Oy5J%($WsXL;a7r*L2Z{_w!7lg-YqE@+@ty35H6-4o^4VIjhX)2mbN*n5~(@80{o z?S4wGI^9cD@?KlM*hcx3Y6(tNR#z~47Axw)+d zBo!#*-Z?&w>2ZBJvX;_lbA(xva`C1sN|dwZhH<{xMR@kSyVI>!(S`SONW=wyNT^iI zLuh|_1Ju)Mc0PjK3_Ixb*KPf#`?)n@N*Euft#9_JqwrIZ%^`+u78*b@Koqz%|8efm^FktvNW~DZRjv<%Lc|-m z(izaXu<%-TwWZ`UVx1!dgen;AIl8}H&pD`lL6g(To`e1N70(3qu;h&q>fWcZ0H-Vo zU2v;uo%3;7GByTQ=>6^ztfW!cTMH@7REg5}cHi_j+8ohts#@loJ#l1-{M(Bq@QM|) z3R>YQ3MJaJE{9eu<#37kgT$-T2-=923gesYRcV5o6*PKATJ?HHB=bje>1X`~b4zmn ziB*0rf0*BQApv2mv$K=g_0)tDh$=?2#9bE7tF?72Q5KzZCHXl4ODUiVr;B7x<}DOG zm-%_Gd2O9`D|9pvp?!E*1cR=$6AQ>p$-H&X&omsPqf`bYZIdg9;9ds|wgsel1zAlJ zMkS&p=}%lJ+6Uy{z5eaqZe_UA@jobuQo8`dN*!=vG*i06<8T;H5E$lsR4sv7En3o! zF5*N#(;V;-ouIak)7j-n>y=_I7@8}IpsXP}je@e&7Iu2x`b8r~;>|-Sth1BZxc42# z-s~=xA?V=GO%Xtc4Blbkayd49puhlUf?r5T$O~_~-4~KEC!gy5$SrlakuUn#*2YqK z8sO5T?}$B9`P97d;$)utXPTRGVlvY6&&*e!t3C+g0|WG@E>eR7BW`PSUw*1VHH!>H zCc0w(I8fZ}8$_%<9IHz^n{Xq#A$&)=VokmJCAgYqYpg!ajAv-0$Fz>1xZ91V}@aul58C z36jr^U0jwN@F&6i69H;QALbd#Rf%ChY=8y4qVwe@^jc@16|kRnbS?Gd z6vR6BxgA}S5A1W`Eb5-V{Y*5dus3!5?QV}uPqFQpk&$=^!SOm2cTJ&xTVojW)qM?9 z{Ov)2AaBaN?O_w46s!c;vRm;xwnHD83jUkRAz!9wYr0R;H^W~QE6p_SZ1fi%Hvzdk zZ~esj63tqfr=iadPq!7Ttd@v?bMoEdr;gnmPV1io`W5XHQe z;gb~@GOX64!fyceIrf!$)NUyG^A)dse~R5yooruXApxYevAQ?vz6%>0;&wI^Ia`h9 z;o^b_Aq8$?WDWj(|Ab`>_b?%Y*?J1laaPJy5CrCu(TwZqq*y93kGpX-JTPO!R!?vl z(2rE96lliPchTO_@m9uat_J9=BpJK&&dkVv2YD$bRmQR%W(g`!p)p7+R5c}s^Pp1K z(@ecp+1O*|h1`JEhtE-PHQQtMv^|a=yqkRy0rG5|-1J=gG}r!&s$}Lmx7b;)Uze7Nwrm)M> zUg73ZBdfC5l8s%qyf)T}hi{9Hy1ICDqFm3nt{qe?s6!#XppM?xxn+)bu3ywy-?qx` z(qi>-W$1k1_;RNY<$3j$vd`t^YvmIz7lH>eK=#DdB!x;DJjZt4HGSpv^^@W|OzLRX zcx4+np6RvPF4~^_nCr!L>AnxLSRFdx0Y&(R>7%-jD?k zNs8B+15f|snEdVn9GJ_h@7d$>`TI}ublL`{@k{>ilAQHUqr@{M4MqN-^l)ED`IRvA zyUPeI{i9dwc4daxXALlag~vUIG>Nfw*&8#0jLbAVTzeBu_}$xCQ->bPD6WgjuE@t{z!@2mM}@H?d=iWKv{|C`t3CEr1GG#k zLRXJ(oWX$)ZRsv1o$9l|U|0VHyE`BAqYi6EMY@Up5!&z~N~7M+`2P&1)d>DZAj>NC^)JfO>&|{nz8+ z!@*EyzE=u3Ck0wU0X0o;LH0htR(Th+kv#^F(1OTwqrb&%Qqgq3GqVbat~4%Su#Vu0 zLPJqgi|yUZE+OG`DZc^}AQI==Gqr!o4ra&UU`X!jOn<@|V=sDTcJ^aG6P>BHEc)ay ztqAc8AXQ}qy_XuW<8>oSJNxLoMho*`)iX;iDIMrR;-V!v(-EyWk?FJZXcJ&YZVWEm_2?Xzv+0YpSLH*@$vB) z+Dj2-eY#J8-J*a$>SG))0V>QhwO>7%%n%#KC8D zdb-oxW9x@cR)LCe7)KW<{1#d>OUooU#ym)oAEk=vezN4p@C>^Ce!noOeproOn2fg6 z-u1PrRbkO{dwhA=0>_?VU|64ks9hFxUHkGhOVapBOLcfQGbP`Kd^_mfZ5_mCfQzJ{ zpvYS)U=bP84l=WjaxCp5h>@kX}cCLxl<5iAODC$fQAVmqvM5Drm5H48EBQypNykY-$tx5 zxp`f4w|Y?_s9YN+OQGx}sZRQPeQ|}RnXvdINg4JXtB+om`sV zB@xAg76?80Zpw|p2U{%H=9pA!Vv9XqUvRegEy(@Pt@MAK4qrZtXQ_l#%^nf@JS{1^ zaS`GNSe363)^#<9K^UfoWVDiYRW7v>MxPDuRN;BuF%dKgXix9m$&WQP^*V!xm*obq z`9?9WzGhA>w_sX$PS&>5O33$EdcMyJz4@;3dw*di4{!CRr%@?ooKlC5x`4V}_ly7( zXK&GU{Nt#etw2hJbQh_iOg9=lYx=AFH`Uh%S+7nCrnDBZWqA%3*Wk1At-nmGRe*3i zI@2Ve+F4m%a8#~Ew*g?bY_;^OEeUlWCit{ViB6HSDXjOtmONLc(^dig<;#~ZUeRE; zqmms4oMHmWD`b$@7aA(x>d@(#ZLW<1bILT-0Cr}Dp)&OMs*d$k$^3^z$wc)D-?0F< zz93SD;$)$tLc@61S%a>EovJqAC`M;Ez_<_0O`?vAp3}FKWcY?S{}eukUtlE`4y<8E0jEM7kDe0Heg22Iwt?gKbKr<5M|wC{1fdfJ>NFbZmAbsA1Qwtuk_M!a%ZGm$s4 z_PS0INJG@^RfMbUzY@knDY2~A;(4DY10A#t6P!t58J-KP&QzOJK^rex$|q%}GT*f4 zPPoK@!nQ<{utRh4ydn6(Qs)^Jd|j7Tfa2oN35cNMS=CQbh)+aI_r>W+?WbJNZ?(Jl zJ_iIe`Z9=590Z$fxZYmc+yaLG?UuSy$dgtfYlu-&hp3ji|#-$y}cT2r* zRugw=4?9~9NETbQII!M@bud@o#EmkYmaEfgrvwCD#dHyuCDp!_%QZ4B(r|(#r;%z4 zQiRwkvPKyHCR_ZVqqsWj zPX7uJt)ejvx%+%i0-*{y%!>JSq)10sPZ-xfj4;~T=W1LQshheSrX2R5uG=3lFNrCJ zQtgmMws>ld#0vP?hUTfxA!oKulX#UE7B=I;i=55@D6_ta$Vc2j)1Vt)*W&sn`3_p2 zb+tY7E*frWq%Uqa___dezBhL~SfE}>fzK{w8T;ayWDE`Y!B`*vKQ5PVcZ6Ivtx%&marM?c6NY zSS1OWU&nI0FN?N%HKDa& zqO2qmJKE72dg#HOB+(8p1r4NMFsA^LM~SA8V)}0%3|i*mHug~f=PNo)qSWfOmm?Np ztOw}6nDJsuTMerD)ED&VOZ#T;)L%OxFPdqwDU&2rEWXJ5$yD~bB((tuSjToj`D+5E zZ*`b~Hq-B~yV$3z_L6z&>P0-=6;$rCcXJt*e1`qz;1NE5)4dt`nvsM-6Yc1}%{&ie zJH14ho}O8q3&~x(3mZ`&79(g}vxGxl+=V@oXq3{bML*!8kT4{a) z@KcDnk(Q%yM_Ham`LFEA`XQHwKyhX2-&l^@0L8q_QJvp(QPA{9T$`M!r>tT$<8J(Z zI6s@%S{OM4lz4_qD<5aJU`eG~Fqor@p_ z9d(k7Tg-}B5>;6YsbMl~&`E=uASc-!uUE$#Y;SByTtoNnMt7>uEn$v8kS=+0%5%aH2iMW$8K8i0yiCbA&LBqlA;_*1_ zd;)TGGUZ4=NHNS8`EnE$eJK0T4f)_baipR_w&H8nHAjj+e~SVy3`!ttT=RY~pXe!^ zQgcxwGaas!W!=4?-H?t9ORF3b#cyWaPKvhlrC8k^Zqx%cC^45mzKoV*JT`!+NJ7x^MH zKY{C^j6;gKXT)`6UZdAkRKUAHFQTvxlwRqaQ#CDd)d#QQ^sL&~`%PLKNi&(sciqOL zI5-&0wkC*3#I-0H2y}n)2!SZ!43UZ=-u?V19))b`7vOO+*c?ikZg%5LgoR=;1~r5+R2OWo zj^8g7P%N3VnIkSWi$kF#-#Bp;Oqw(BI8T`SpY)vf?(&V^xzh9W?V^v8@Up|~u~gCM zX^Ii$QFbEHOq2de`q0rx`d&tr%mca@<<*!`&u|`9oVQFd!&j;Y?5^-(;jmBO;n}8p zp#KDn0uT-ez`S_vHjuFCHD0!+@_UuEA_4_Yug^OCsuz^XZ3jh;t+zqcSzCW!JbjICCO1Y8o|Bs-5{+#C{omY2{Nme@LI(?vTGGSgnHE6XO`4a%Jn+gI^=}2 zr^h)7NjM+ckJV|F)$a#X`g+Sr=(5@ko?=NQV)CvZ4UM+ks%!+hB;IZXnX(uSgl~+=k1tovT=zcnc;3w$X^JyqWxHCs zB=$xR0h2D7RjT|ig*M7zgVhsMUj^h`g>+Z?ukH`7QFQzxzE_HV9nPCkAnLvi%*tkR zZPYreFqx@@p>StHFh)uB!(CCXo&P^lIlR(XYuHqZj=FPNhqvj0Kxth(ihpi1G z*|wHAb{V~1cWzBDq1I#KqreJea8OH8adG+ke(clTDOAAo`7upb;hs%WQgR@L2M2sZ zFyxYZ=yo#NU#gY~u@84?@OIcgZ!QCB_ToA9X6YXiO;PssvU)A`kOY|OL1|Ymi8p#F zkDq6(Km2VpFlR_C@Uv)FRKOO;ksGV~{kk1v4vp9ffp5z;YVN2^JdYkxYR|X!!Ahi# z|H<)KVH=p~$2n2^@OO`Xi_~Vf-RE(-uLGy+HA8AFo0Bn$uVfc+pCW2<*~@EhqJc%` z#-=F0r}isXA0Tw9DAMEg6z1bA-uHum9lXvH%zX<6lP1=vlxX2yoe-QaDBOfpRaJZ& z?Yuy<219B|#7(G9&3;26VBj6I+Esy-($3RlbGG^=#pCGPPq|-7tsNf+;i<9J*b|Nu zfgJ7~nf9-jd<_ub>gu>8GiPabTE$Bisy9OV$(BJ<^RYQ_2&i7c5Bs8C3ahGViT0-s z8AJLrO5|BE7Q-dnSGrAh<>`gh^+$RtTty%vEmByHT1P_{u!b+ocUub$7S#oA6qQ(t z4+d7+HYJAI{7D2kAnc2lFp~lA%ny)47iG17Hd9gX(luQC@c817Gp666*zB|h{H~hx zhiCNBdLY<#*LZ(bQ~L8c#5$nYD&4&{ME*m~{F1!a8UTS;i*gg;KZF^TE+Fh5ACQ{= zrVbyeH=v;;!1!@<$v)Hz_*Y_rz!e}l$9SQtL;cUkKp4RQXw*}^E^q(y`Tv#~`YUUZSnmhN6f(6(R~|c|}=?;8X1u{H#OFXM)4z{Y$LM-}TM_UxO4P zE|%2!);!x7%*lWAVld@OpCr{6Tr5@?{p-bl_)#YbIQuD){*V7T`#e?P?EgjFJf8h> zHNu)3DSc_NJ@%u*?VSO``6MmFikLAK*e<-~A=RIg1+-_$Hw_5=?-lZfq=uOuV9SR6 zlaKm1Y8w?mT8c0;WBe~!3Z(vH0yk5^clSTDrqYWBvQjeRstg77EDj0^h5-Q{7%39h z_l1JOqp}bYQIrx9Ayss=Gqtcbfr8QujPrP|iX~U$IywTc8BejD8nb~M?KFb#PqAHT ztQbrtEQYQKtEwoh_)X{~%j+-HWQBgvQHG>NnSNxXZU_+2Rommuag)!;5TA~o-o3Mi z%he<*)T`3mzCJ}P2B`O|EXF$IdirL#8v2W{LeCIZpr|4#ZbT*aG<8y*<=%NbTvhXZ z+9BI)+Q=_^)OvhmaBc^U2|x)=BV1mFUA_>pCx-^yY2;}Ia(};@K{ta9P;lN-5K{#4t(V=e2-h-e0yZT#%Id!e46su7bb+3G#gJRO9ptQi7hxE7J1~9@ zNxJ)1aF>m8@UV0J96y;9FPL4`NIA1@;;n+JnQmkb#m8WTqX^V0wgn5`WUNB78;PJ- zRxSNn=-)yfj71h~h!Q#IA)}3kG$iBd2Biqo;V)`Amu!)EZ^d|19#>cP3+yU~p)W6x z+-6td$;a}nS|;Aou}*qQkO*wzS{k?TkN%3=;c5}I$>=79uIESlCCc1C>6s$f`U>C3 zrehc-41P-pZReRjdWh~4DGY2tW_wdS_r{EC*lSW~Rs<-JkfX$DLEwbC9G7FmGpQA} zk9R`n%dX8jbs$pIsEWrp+hq#>M|GP;^$_%57~kHTyVg=_6WN3>)b$2pT)5=zf0;T_y2Kj7WqYf;B~F`|JVt z>f0>?9j|t}@a~2$%QhU2A##;zBO8xh*VZ`+Q7!X#ZH%&zX`yTK#jzoCSJGg0A@S^h z>32Q(UnC!6-X9p)J`Qbm+%6d3XROafVRi?9<)v?Z^b|j?nC>Xsx$k{-L#9yUYxRBZ zmrEmI`}3>LLwlr^z~jy`dnRuy%*U`#S2|I@3OE-SY+rv#%M|2c#Wl${nSPKs&Lx81 zVDJsV>-dy743~wAktB(Vh!DCuCG2V}s2^ibQl!`F$XZPcWqowKu+ZzaQ07DU0R)K{ zvP1L(ONOdkK9VB|zzgZ&LeV!p_I(0F87{zePMHZ*Z8X~8d!BuUb{7gLMSk0f0_#W3 zO39rGJL&(H87jXMarPPRivN%&DT0V*Z$KL-k z6mbBE8DWXE>I8b|qemsO`fG&m!jtIFy5LWQ-bKQ`d0r%x90_fVI`@t-5*G_9P^9Xo zKJ~MK0K}{qMG93Uy+B)`t4v~ZC{MUBk%^z2JGhAA7>c3PjIqqp1bH9Fg_E)ll)Mz7 z3#nCK(qg*6y1=LfHh;1d2aE8Fqh$9e))Z3Ub*eJ64Lz6nPFqQA4cGGhff;`ZZTmZg zJ+>}d!|QesYQ#6EUx)-Drb1`g6j9gg;=GwYKb3awcSLq#c93>Vov9$)p0D^L9>TJE z>_5C=w#35>Wut_`jOhO!Ygo)8m&j;*=-o+gRHbQJilC0KmBKh{Xw7l$ONg(e|sOf9LkB}9PUzgX0-UJV@?ZnH0B;R}s zva{C;l=G^y5VNK8c!XU7_g+tNO(FWHlgj@jZ^ZoYLIQ!~Q_ClfPsfv!liw%jC(9>U zCM623ijGwxlpo))qUU{~6gBvOSQR=e>n(owPDF}aG)H7zI7U?Eom1Kt&G0J*iZbRh zVuoN>K^y!-l==7b6g==eT0C^k%*`UsF3$YUD(76D?>(P+8bD+rHRo5mQ~)kP!}G(F zd`AB4gsg{5f>#wv7D^fVEz~140sj_nJ835gEAds*%fx|1Tb3}EA4A`VD2K9!ZiZ$O zgA#qK?5lPvud6mHCu{hOZoUnCD<0nKO&*-@6t1+pmt|_1? zq|wev;f(p0`6=uP*2&^((yGbY)GEp*|F?yH-qGHEvmx9;jsBV5h5@GU$DJ7?%bkzv zP)wO*D@=-Se!gaA>LWLQ-Kg;^d&TOt^lR)ap{(_H;CF(b#6G2cLPKmoPzbL4n)u~S zur3xpMjp=8%dnSaFMVEr##X^V!YD!C$CU`C`}+KALa<<*kQB8Ptdy!G^1=55WiWPo ziRDb?8)0P$R{7KHjYh1R*e2Cx)sRVC3tlnGSt|wh(yMu70oj3T!riFdE)*sd)yi$-wbhx`364#t zm54cC>jqxO1r_6|=}loxxlUnoP}JeoSy>uPQA~+U?oSrDh8$cj_}E_M!gs(+#L48L6xx&{wnKLE#JQw2 z_7rAZPO6j!txS4ZhH$M=O+>BFI`x{+beJVZ>UL#fr8%W`^M*F1HUriI^8@oob%*sv zm1ngjH9GY(b)yZw>!f?=`#)DD*LRLI_pg6QEf@W?7}A>e$T%~?bxj~mP}fK;$+4>2 zk52K|sakmMCU!-CmBmly_44xEz0&KG=i+JZ$r}&-tMBgI9#bA=QKgbWlc0yXXP?PBc{AOcVzDC3Uf4(YxCh8-pxPWHJaTs-Uu+%8-ivNpn7Q~{zp@-Et3 zN@Ma#n%dW)WJ7cZ;#@xNlI~`(ZldqHgmOiV)i*R_wN)J^`Q}1(iBQCjgKfj!i|V_0 z8i^U9b%pm@^u%@Fhq{G!qzfgWr~gc$PEU-vrs-$i^=9RnqiQl@T3)JcTfcR_R67c~ z^g2G4#E|Tly@|Ap^?Z$zffIfCMvtpH@N1IYL5%@H7c4LKyB$S;^>2ai zn!o3M?>EBfO%8DnaSXu;;SceW#*&sN=_QHa7jXT4Cu&t>xH~pp7voR$e9(E2V1Rqj zX~0+YTAhzK&!_l-eP*|HqGJR$%|pvVbxxN;SIrT}>+R7&uT%JT^JqzGMVV>ENQr*M ziwgV}L$9nF^``pz@EXQPvb_%n-McbJ2@s}z1KYBM(h6%s+rAc<@-F?Prac#zo#eT? z@O|2ueZoM3!4$k?$CRwCuC1H+&iE?zglT`md`_PE$%SNFX>h5=2+XvUJ z!!TYcxA-IRrL-1Yd%Nl%HS5DZ+qXr^EuUHPs&v*N>us2>n0AytlxE8k z9R|0NH&GCyeyVhZIu%Ei!->xm_iVOoB&OtM8r#(Z6Fz`mAbBSxmorNq+c#)P8-ZbV zLG3j@6Z>rM z$YSdC;S4w4-P}I=RR#}bi3^1n$%m@2IMU&Yh-{Nj1jY3Zo|;G#ZusjrBN}^FEXG*; zYhGA-j^NpMAq7ZzzFmcVsv|I(BC`;#Sgr`i;ZyUotuqva6*#p(-A^K--$lXQ&fQI2 zxNnFLXycHh#v}9NK~xi@DYRzfqg0PttDo1PDF{vEH_>P>ieFgr?73fTw+wQ4bGTW6 zCM?I6N^(kMe)<%+>2!=_ey{8_?_lW{9gLlF{#H8Ex|IFBdb}9yIgVr0YdU%)w2#1B z&Ry0Fr6J9k&3i$7o=y?*ViZ)-lDoe&1#DFlDEI*y`qIBy81dWMYws-U_w1|o*B`n> zjeRHOPMdbfS!k-X9OPc?h<4<*czqLCy|<*HEV8wz+Ld7a$`*r0{VH*2K+rsmAyy;~ z=~OXj2Q`FvY^Z^6Pjy9m$8)HuuWGivYpZedMU8S!OtVSyW-<0@!5f8;jHn{^k^wGW z*QH#D@~}07UfNHwIhnnLi+u)y)N*%ylvW%M{&ojn70)yO_=D+I!W)}z&cTHeJC3EJ zW;SoPgC#|YA3l%r(<4E86liNL_asNV4xYwmv!ydT!44Vs6NL|K&nnVY+9HW330d*q zx)+^F?{J)445SoACDi$El>eY9K-aZ0!M{vvr#n?*WJSCNU>*CDc|YSdv_U#;}%Dt@m1L^Uv>)9rJskgXp&d zgy-KBN*L1@(8D}K8pF8*= zAfqZr8k4z`puULBj6*u9j!BIdR*?M(-iL&brb^hA&$W>Qe$X2hIIv-1#v-Y@e?+fC z9gCw3yL<_Tx`)p}kAZe#9)zBruns%WdxgeLVh`RZ-;i3F+Pv>}3ttZPRVUR^6s;8L z6EPQSloS=;h%)!XlP?ut)Vzxkon{=<(~A~R7Ak({*>EnCq`7gJFFc}aD=i@6ual)V zrtToW>&F`5h;%y5nwOm2Q|hXuCviV#n*lapkLGYlHql1Z7MYjgS>!2eR&TC2hq-ur zUV>bTLW6Qlc*vp7oX%3j>cVNnLCDHfUs;n|bx<`>sotPpYf*_(g<0igXRuq@8#R0q zsM20jb@o{%#;PzSb3x}Snm^tvp8qRfARl3ybE|&L%hsDV1F~Dm%#5fGM?cF~bx;uU<#ID0qqTQX=|Y41P?;aa+^z z<-;5Rh8?*2uCVM(n-X4&3 zo_XQ5cK^wGkOEe`dU~t=W#*kp_fXLk+hkm;+NoLCXj4W+;gUg&)2gD4CDCs7DaBfz zU4r0Dh3;$J&2~yRa5GC&QPquE$zE7r{dkJTypA2X73^fTo37LwCf`z?cN9&0Bf;c} zcI9!A!Z+cO#ft^`%Eih%##vL_SiSpr__+EON&SVNk%i>vWH`S794NRn$#m{78)@!P zrA5${8gSI_ef`MJVI*ztj&Gn}7ogxMLi2PYy89tSLR(7OTEpr33r1rW_|7k_rnATPPtj=%)l04#ihi39%g7NItt~{rhVxgum=+F(7!JFys;a zJ*_{A-^)~$NPfQf@-8X+b96v8ltSQW)^`;mib_n&LhQnYV1ll<1lY+bwLP_VJ0Er| zcA40A*pQj$*-)9aSon=DjUW1z2a3P(4Ae)Y1+<0z2#ATUNuvwb@xSFA<#&D~UG9U6Mf?VDG0K>>mglFr%ZU^;WZ1qg9nx zJyGM(h*2_|lbehjr=N)b$z8gTb2a7nJ+lAHaN>mMPt|Wj<9q{apG*p-I0>P3s9MM` z!QtjqKHKyO`kbb2$NVnD$8D8wc<8`n>{6MODr2nWaZ>2Lcm)Bd)W5wC1e3)jV1F;MlO9l6IW5)do zF7NrBA!W4(?w+%`sX^R#mK}P0o^zT0M+4vMBEnRLYsxa_DHm{$Dik5qY-VyU zG$FyWDtZ3DwgrR6Drh9+JB;u;?)|yy+|;+7e#}hSNr!wMye0|q70^zCT)e-S(q4y* zEiVzW>fH9`KRWn9?LZT*JX;e2ig+VKv{oTJNqFu|tjUjrD=h9vTd$G3Q2VVt1Tkno zmJ9j(G{ZuXCf@uyAfzYay;ZAq$ePaUa*NOBk(^0X*b4KnMzm*RkHS)_7Q}MU7!b=h@)d|eC z3xnz(dY~EocZJ~7_e;W5=eL|Q9cMc zPV2sk_v(_thz*Sj-ZJNw>(TNfV@% z^Kce7ZdE*Un=Cn8Ql@6}_Haf0km#%hiKu|Ljiiu&!|N01ZbNEO#oG6X?fY4=@aX#x zjVwFMB@n@SQY9$k^5l|z@L-JT`oN{TX~HqHq8(H@^ft#zapzDJosaeQtX!*slpGcfz4 z_rRUOmISp0PIQ8EmHLJVN1aW7LM4Z5L!(FEh7|B3hb9K4h9-oy z1TTfGg-U|V3^{?bfj@$x0xkotqscEYvn;Z}zP9iE7*AEu+C)nsMO@xpp27fff7ReE zEjx8NtxgG_(%{@-cJLTaaa+DseuBSyWp^cbr`{o>zc?w$qb4tQih_`4!PJXC=yd@z zZtFgUn*y7IYm?UqO!5UF(kr28z z)(Mu7?G^8lvR09x`?D~_Sws@t8(b!mQR+NiEwyshlZ{$m3RN|b{N|mZoHc_4_=gP5 zg}6p;1^(N7c8$8NhNjw@MS^w}*U?RZ^0T{Vfk!F)@DZQAFd@0OtNu6JDgn=az*UIq zYqZ|nPaCpdf!7_9Uj+5D9Tx}=fDn^-*zWC3O;OL1&O(MX?mY_N#yOXdr$g>SWMqO8 zxD)n4g%4aeMwcJ(hCA}KAGt|?yGcTvh%jGK0%34Zs z%LTO!9vN;IuC7j&BYYp1BEi2FxT?llb|1}sJ(|kapONjr_~CVv=pGA_}qDbpEf4W?@8TltZkim-1*6VKfwe1ei~*XBmMn| zvlTy?rkoe}r#nn6jLb~`nHzYO?`f1r(Zb!tT2s`*#>CbM z_=W&C3n$<2=l^ea{`TJus^>y^vzk2Od<>e@j4G zxWV@C1(8O6fc`juHQ_+|FGcx52~%YJdqGhCSV-uq`HA7rF#at8pi<=j7pt_o;9BxO zdg{R9ay>}AyIin4YIZqV)H>bhU)MK-{;diBTCBd8Cd%44l3IT@AXn6IG@bu+RPeEH zYdFPZB46$$4i2TsV0_%qpNd6FIiG87!P<`7BWcmuY+x4Ohns?0^Re$fV6UsAy7(g$ z`aepG{0vj8n*W+o?Df;myfIkW=#M!apsVPUAn|##y1IIT6Uk|(-}C_4-#ltLj!)Pca`4Bs3~7))X_JJ>2#E14(da3!ApYg!n}{TSn_jN+%GI(jF3S0m@8dn} zGim;?dfky!ZWV#+l>phAYns0^hxhnaxQqxt&;(yG=zYEU1NOtkdtKV`8p#OC6 zZS%T{&yk2|ma&^DPg?}t9x4|pQ}kyDb{rzMfbKmP7}R?aSU>!b>xTJbM<8+`#a+&Z zk6)jJOp;mge@+~Ee|xnMGF=r7C@Kb(h^I^F*DKsc&kIYB^)OE3<#r#I)5?#3nLIun zV82xlsYHP0`(j}v!e3Ph=Q^F_h=+Z}ROGo$z4E-+>+cH26w~37`pX+u1Od6vGrWZ5 z0AKKzC9kxcqP?m!9ZpsQp54?}u+B0c%MLb?YPjdAiX18R+#RRLNIN^2t8v`<`8p#` z;oD!Es_qv*Fp22M_auiAd6d zUIB+m|3Lpr_ved%S}z-Ksa~gY-6mzzp~Tl6cc&rN%k6roBm&mG9;Tz|V0kXp0(qh_ z9JRDpg_2QZC)jZ-u4Tu#2Hvmtr^{+8^jci!oOTWx;^;Ku%k^5MJ3#m2);KzC<*Pq} zLc;Z?Vi|PadYlb&9yU(H(hbP}!J%@5U#Zdph`-mr%gFqp3&CSIFX6z$X=}*B%yr6; zl0(WY2<62KDF+O>l(%ttG6_9i-r!~zlf(JClcTFerL1>wyc5>zJz@NuRE?$Iz(FJ+ zYNqz<>7CWB*MEbqU=?oOAb%s9^d!A?I; zXl>fIqDqt}f9WiM;jo%^PN>k6=#3y@x>|IK@Y0dSsW$E>Ri<#>p5kC`*W41 z>wC0@{oH8C;vZNk7-Yc!rl47_SqU4C)72SgD-E29=yp z$T)V5rIvjk+no0%X_6?So{UR~v<5&m(@O=YnFzr4jQxaud`7Y1ltBq+=fByAsdqVA z4I;rIGD?b$rBWcCt9&n#Dw4u(K?ZnJ1Gv1*Pl5kJHI+{aoT+C_;(OHBWREOqCv~W7npJh)o;2Lh~7g5QjXo^{9E@cqox2n;|`2YH*UVb(H@qb$iR0 z$%@o>JI!qXP4LbzVhTt9aC)KMX2Ce28~QKHJV+tr{IlcAuQSU&5=in$=|G5m&Xs5k z+FMZuK#*Okv1wjkX~3DIfQ|0=gMQWuAFHilq{*yP0OdAcanLvZgKGhrPYyOzVZZbDj>hQa{aphBBC zqW6b9IVrjb5H;bS)BUkJieHfZijZ)lb6E)TK!o6fCXd^Z^|J=6@+R>muVa4RAH`SU=ej+d8R&8GO~^Dfiema57cUl0KT;O~y*NW9Z( zJ(?hGkV#<77H**|D}k?^(f2BwxwHNg)<|i9a4e-nhW6W4lab65WYU7w0%X!iDP6~d zt>T#s7)SZeLT$cH+U8dEa`XuF7)+OFR2-Fm4X+4gO5<@EaE)`h?D+K^$@MO`CO!Um zo?x-cNFqtlx1FILFu&8=9%1wwXO$^3YZ6iMSw)@cHvxIg z&19$I+0sON*ev|GZRXjv>aCN!Zx0*rm(*1Db4J4#Ys?~icB^fjq6Iv=q~>|>4CY%s zoI>?Dm%W`rFKV^99o9Y_p6-P*FeqmF5&bAK68#I-a@7Gf*L{5TWSpdtme522g%R~X z6Y2|hbNFi)3gM&EV~$_(xhpoi9ow1-$n83js;Fq8j5yz|7b@j!^hJ?N(GDbeOzW7A zW-3!rkDIwd$Sf+0YgTxq!Z?0W6grE#OZ-kk!l1wgDmlGXzqiwZF|*8^h{rh+Sf0vs zg@Ij5r>E-}ALyjon!PfX$BP%mX`*n$h0o0xed0RCqyZ! zhw?&6 zp-UDrv(far?Wt|?r!aNd6t3Ms#OvDF^4&5)GtHWsrk_UPY!6SxfYQfyYj3I~4?vDf zcAoSax8o&;W`f&l)Y!1!CYlQ4?29sTJD~QogTfS<4DLLa${A=yQqRcJY70bzU+y}l z0nxD|mD{N&U4|s+bUVXWdUrf;o!{+%CF{iG`~cMKb!E%T$VS69)T5#Ayq`^>1YgktzW=h)F^A?8z|w1Q8~SRH(!`0PK2c z1lFL#Wzw(JvrjwixYSN~;?d z9YcI53svrXAI?awLMun!F8zu0lHow&=vdqL+Z|UpVE`1ASJtaGi#UpPx;fo^L$sb` zF`UeC#F*XWxET!uLXV@%<^yi^ajVaCg!}>0;Mxz}Uq4kBxM#@2Y$Tn}Wz<91%?+=(>^s|1 zC*jf@rloks!nTb$M_fm*%5)kA0Knnr>~1M_nb;QNvFQ4OF_NvYe74bVL^7N$TutfH z1SI%Id^p_&*qQcE;O3wClC%)Hc%i@hDRb}#fX=s~?J}8&Fe#Q`zlONZWmpObALyev zl4dOpR{*Xya(cn%AB6?ePnK#;q=`^X&(&JQc!3wbSHAzT$dIyh>18`{5@XkP5$_cf z?l7rk8WYVI0M6ZrNGkx{Ls&-my)LcsxgCuUtV_wCGBDD}LO`$CqQ<|oTuCcvz?(ke zpzW>n)^7WcYA{?%c0Dm2qe_%IR;;w%yDQ zmpeK>1qHRtqy+*9)*quFV4(w*W+Ua%v7PSq!MW&guBN~T$JD!*nAdg4RYNYMw-_0Z zS%}B=U`JpogsFX}hOzM|rtXEk5orz^%=8t zj~+d$Q8*ili^T$lJOW+IG!CI@97UV=x^AJXj+-)TChCEFx6R^sscmfz zWwz}Hy*YSN#B5?pHBTDbi$C4J7W_@{Pi*S~(nWkUS^Mn~W}TF@bV;W2Xr8cV0;AqyLEG(7i^;)k)sYL%Z*`(eO8OmDyx6To z`<=N^!4P!X%vOFAAUR34DtBnW>2YJVDVs1<4rq_%OR4s`y#)Td;tO~;I`91aR<8_X z|D5d;-E-wN1(ryw3w9k0fd7Tm7;vf!TD zxK1oK*k$an5e_CW**)VpjNjuHv20eQ_K!*B^B8v(nTi+lK@OUhqdFTCn%k95>0Fgs za=E*9&}y(v^Rd9yAzCP^Xm;6`22d$l7Zv{xr2MLb63(j#q#=1y!+%Oggb&co9q=c^ zo8$i&1a2DunG-}i@$}ETe?9L{3RJ$xYbk?yf5PT(^93&L!0>q;^X4BP`+HOvs8jK~ zI1^+<{`0k`dAu2b>cQ&$z#lmM`_{*&Lj8*!W5VYrH~o7GgllcT%<-A;vA;v)e@0=S z%4+mwUB=(Z=-*SI|36Cjae%bFNtfWC6O*o30j0eGl8V5;M$x-~v_sh2sfqocN)qav z0fyUg^1c5xO8Nz;*#oY(94-E_Vjw6~0K+~EvN!)46+#Cz`aebgt?mCQ`d@bbKa2j4 zb3Zx%|DfZ)5C||8|Nn%JcC1+-+e0Si)09u;TD@QgD(0Ho2x9&{7eT^+rwWzph+HE| z+TUU#UkVuB==6Vf5`PXv=zbLiehMJ$^z;!`-Iy=PJGmR0`>}F5qgDkeOmjKg|x1&&{##BV7<0N&WIF!2cYz z8uf%meZ{7e2I}VxAbY7_zItLjfmC|#*xRkgSrhNAb)tD>{E`&^IcPs(tzQaq74ra< zCD`1X;J7tJ6CnU8yjy;}&)8`rgDU`HwEdDsIc*qR``xKbe+*T`!BU$}t=bt-dM5xZ z>Xnvr3J60kl}qWVnm=y4O2?vA?E}4*%MbvkMZougTRm#L0anZrD0n;DBV|>+ zP(0-`1Z=!sT7)Eng7)3&V`@#Rop;AiZ?-cA$Ma+k(fZ1?>#7Bj(FoZ4`r{do=amU0 zeK}iCdI&EiTn-Q86dhsC(FCqZv#bx04E+zFb=``?g^FR%6PAF?R4Sfc>m`FkK>vK5 zmCVs%Q`^>*!R0_K&HBU=AtGuqoP)J9$VY@T;6h=r|YUgR(a6= z6evTM;a32TNjLjS*vn~m`JH7EI8tSQchcYO5hF8i-1i8MLy8KTNMo$s$LopG1(+gT z$kj1ZI@mDPatAjUJ{mGEyfcs8r!@mOO9QW#kvBd!p8p{1*N@GEtp}>{En)~r_<|sJ%iHlvz|3S_d<6h95`Cv(4o83!>)9>l_B=N)(x^!P^y!o2w~uf( z`-=b*zJ^t#=dn<0VKiFiduO$ux{V&e_1t*na|kZeyuMKPSL_%VI7G=HRHD{!-$@*k z?ewC6d>jVt#s&+knixJ`EZ6$o>rShRw&`SILD1q{>cJJ@VY9b6{;5mP54&@J81oIj z5(Krj=cSs%-1n+C_Mf9k&bSe$~)7%fIb?oY}CrA*|9v_+XTQ$$o z4gmB3UrLT4LnNBb#vKv}CiwumQyD1C#z@#)tU*B6LhKm=3KXzUD3hJ5J%o0(+hfxW zf{%9-uD#v>ozGX(qp9EKS+{8a14X&r+vDbXJ;JC~Xcv#u27A2f69L|i+jyhH`T>XK zO7^mEGJE5g2h{oP-Z1BKt$(uobo9?I7Z%!8lTCnYj_i1V3=XF6$nk)bsD`C%_rNU; zvS~b1^Ol+AU@?H{J>u%dV$h!S8D)R0=y;$795b^;twJ~e-XZ*CF;loMdG`$)g>>w% z#kdUmWtDoRq3V-AYH6oS3I2GOX0kf8!JXEh{gOk7Kz@+4zuMO%F`krd*kS4 zP=n7-Yyvv0T^Ppo^J>|C2jEZkP(s3XzGy;=^X^NN2j(x@O^znn9{s-<| zY{+B-un`XOw#}J6bNPfrjl8!$RmS}O;#V*M0@k4cM>T@QFqfBu;_v5e%RF`n`Xn0 z4lQ8VF_wB>x@XT(Yj5R9x2>hm?|+~P*7hcHzkE4NXaHY;te`_>=bgcXi6@>!0+VX3 zzue~24s@b?be4gQN>qC#?!AnK}}= zrOzS(EKvnNWl=dKX=-#C2$BRX)YAb%K+z7u+fkudez61PbI51}BXA{^$|!t%avR%i0y?MiGFNrUoE6!E3z<^lGgk{1~?wd3v zjTp8b4w~LbDtp+S2Li6EugUrTeh+Ahj|KUd=kApEIb>zy0ALumZ0^XK1AU(l+SoGO zzY9|Vboa|SGm}m~=*t%2Ai5orWk^u%nS`gbe38+tvu-xLJWo=n*;lZddVpM7A61ba zLLEI(Oo0(n-M;0)F&ntT#IpJE;r7t$!1vr+e*9SMa1GyUyt(#H-+3f%-mR=8fA)%! zVZRvYEl5Sc8yCHJVE;AqnsdHPv$uCIQT|>@r=^V2c?=Y10C?MyrpA8&TC!sm6hE^ZH1b_9hBKk)>T>3-p6lZGb0R=&N3 zn#3U>x;1IWpsXN`Se%c}$NbiMJ}y&jzxs|BB>yM`bVK!0U)a}js5}du6;6|IaKO2c zkU`OOk==n%d2yG8%kxK{mb*-0 z-SYBYdMXc`#!Cc{qflR@-<=LJT`J)(ol>}@z~@5T`~|8-dnbxR!lS09*RCn2q)HD^ z2;Y7QznJRJIcF;tB;7HS5kyL^%^Q>QBub9P?6gd~=yf0}x&4mB^krB25DRx3jEqvf zu;wLTe()^yg=gyehhsDTN}$n;d~hpq7(y@o58_RsQfPEd3*hvT_<#Yk`EP_ zT}cj!4j&~B$%01E!c{*U*c{8cfWp2FItJ$RD~sJX&B5~mX*arxKn`M7awFN#{qS0RKq)iVYpPp z>bxdb5M+L3O0{c7=Q7$ao2|+Xq_1@YC~ZIxQOeu>A8bJg!JB#QU&8+U@em zv6_w_6}B6@PHO}!VF9g`-iq0%&}6bL?p6dq?2V{)D|~>I+fiNRyu1Tz&mHUj`vlE; z+x;k~7@QOO6Qv>4jA*VQoF=C72z7bc+s1}j7_N36?~I&PkF(n-I?C?*;Qay#>1@$B zp-xOP2L5YR%)~gBR}1Fs9(~z-adDY}{LH)IlA1otQ*Z0fVEavyf^G$k3ooSrX|@;P zydmit0pqchX*TKs8=v$c2yq)g!z>@4&9sZldA`1K)HaP} zg3h67kr90zoQwg43jsx*-EiED!18Bq%=JzU+C`Oi^0^W%#HSEd9%<(b2Km|~TRV^Y zLU~ysM0!70M5b0uwxhx2B{>Q!N9Uw^$K)HI^N@P^y@((4hiV4tQoZ?mjT0M`9MgU( z$5rr;1ev?bq{+1$KXt~{9q*C%?k7h-KBQ%5P?-&tfzr=e%y@9z`yyXovgJh-1v6Dn zm}&2s=^ReVQK3y58VEj@OJ=W%&^l2^1LtQD+Q{8+#3)$1Y2GjNv+c6U0Z1|HS(a|( z!hg0~El{JQqjkZSqz;ZV$_ThX#$onFk3sKKg)#M(iM=7oN}Ha)+r~9%&J|5^U~=WB zh`?g$xNaUCKmE|*tnKph$zc5+UnG(|%Nb$|KFq;PeRNBSC%F9r6sO@`I`4hx^(XY02VgOdj-O!~iY1-cDN#yRM$CPQnk!9QVIPpi!7}+Mb;Sa?#_%Z( zFgx|m{jz=8u;e*NY}TyS%(=g=EW7fa#Ar=7r9CBnmU^Okd!zsqs)b#x8i!a$j8=ObY$vAb?Y(f5d3cKWDvQ||sdTo{8mEN~WO&ycIXXKaSjpY1R&YLK2UPFKcW)O}JCQ$CQfo z7n7rI@A^Zv!dYv&dOYsr-t|^8Rl-)Ay9_^%|0I)Lt7Q~R{VAz#KDmQHE8rlf^sc@;fk^uo0V6Rsw9bf#0|34SwPo2QI1ksiiOU@4{sYX;pqujQOTpnw| zRMF=#l>T>T!2dPS1Hiiq*8cZdIS>ud z1Nh(5|KdFVr<4CL*-0dr$x1Bmubx_G1uUiP%}B+bh{^Afas5nsOzC zcLIdAF!6L&bB5f;@V&czT=^MqDh8vz)%B$o2-Yy`Dnp*TZehu!luVOU+A9zaZp23? zlc2?5tLATs_P0@=lCPk_#6?`32U#jUhaH#8+$C4qb!C+QGc^>g+;F!Hbb`mZ(yX{0l7;xtz$3iDr0Mj_I8fDEKZSN~Tx5gIm{G4deTGg_aW z`|lh6wYZJmtJRY6^wA6&>cOGRY&a5030kAc{4( z#+2o_NnIBq&`h^h>YfSfX$csHy}G^4_2Mtv^Vl_R)aAiuB*_1)+LxljKE0>v*jpT{! zby^WSMv4N?^r*@<+&2nIi^`eW0x4#L+q@KawYy{srIi~D2@;pXe8vqLzOB(dg6*fV z2M?f*Plw!0O)^{2WGF~dV^)Y#4<+vo9>6MBAzNcu*!UDW9g7r!``FALhzmCU*Q1b* zw|%7*p!jFJsFGFH-MOw4$;yw-p2}6Gfp`f{#>W^*mp8I=?XE!^Luo3-dU|5EJ7D>* z2`+Fa@>^$Sql4#6rv3FC3v&A__%vGOpK5oPvvH_m68wha>P_?#GTNq zqaN^?mz&ie=ys?`YtS;J9qr>82?EE|BHMwew7Wk%_m{(wCc${+6UAa`eB|SI-tO{I zE|domsuCwUb%htvd%FnJ`07~boVIHPZQS{Xyu@(T3a95`IGf`=!M?ve{=C^#$+x=?qU8K8wD%_v6<|8>QYY;0#a=^6^D-D!rZe`CzU6Vb(FK$t}ADW+B(oiy!^3V zQJ+=M=uB;d?q*mjh%C9&+ONsI@|z@pL83{S%8Y9RLP^+_{8z0E+O=Ab^jbk}I|oo!R*o807BO_O@t zVLI(iplI2?m6p=amcd66HqXJ1d0|=wy^)BqzG%XOWc8?PrbG7}#4bdj?^_2hoyjbV zmWNpXGAC5DY|40-SjM>h(*e3uVp2Z7d$` zOxFNDB)wEki7xxbge3yc0%GB_;G$OZIR#2ZhlxXv4J!J-8nHgX++c9J4fzSva$vz;Zm(=B5=p zn;|5b*z+FRVmzx#ZD05(EW zoW%du%X4TV@*<1t8)7lf*x z4Zc?P63;sP)Qb;);SwEwQ@T`HY_6gCFA2P9?SJ0=Tm+7J+RBwrgc8z>`_KLI`S5V( zE83p^s=<7MT!uD|ZN0`a1ldP9S9R!yVU4&G2NwNTu+WlY(|WAVc3^S!$&W)v_f?kQ zRxA^ zT5m9I{RKSEbDaupY+Nc516$$V@o8v@L-{*n0TEHcT0T(76G8T&UzTqbve21s`|+j< zD0?_hx8;~h*v5Q0ySe&{FUTN}UCt;I!7s7&I7XaL*og`Q36&bf!Y8JgJ2Ok*2`G_@ zfBnJCo&R!1YVL9HeUmep?xie0sm28ZQxoF$IUjvy0+WPpJUXT||AV}s#~N6|%jb1!!^{w~8Dsi2|O;fC_cG;MfFiswj~YGG`$)CiMC zhugD$nj>VUf2xdd)(0}0+-(IOG~qjK;-sw11iPl?x{Srk$;}69RD=PU3_f|MC_n!C zh!)1Lq|Q(HEFtLv1{=DXLpG&(ZJn_ZV&@z;`MVv6mn?pHAdF}J$y&UyP1rwU2`hrPFq zYU|tHMhiuX6e(6H1&X`77k4P`P_($amjb1@yR^7d+@Vn1iU!x<9wY?I8`}QPIsbFV zeebyP@qWw5*x6ZouDPZ?^O<{HdRKceTjRK)3a_)W4rgl@LC|j$$e+%+JAL*=Yo^Q= zz{Q(!HLuWG4jp;eCfHlG&qeeB&~x-jWJ)CG&1##l zQeUUgJ5*ez55Yo~768d9{EYtKCX1yR=B9vqP$QEO4FYO~>a1U7KBIbxkz|eexJB)L zd7S{y;fpM@4@1Qz&O>>4Ty*Af$Z!agN-!QnU2Q%k0=vHPIYwO4f&NTWbJhSlj!I}+ zGm}MX?)Dfnp<#P&rRYur8_P){H+hm?Ys;G|PNrl^80o?j&X2@=teela3^=p{kPjk) z98wC6anN0+B|*xzYw<=6?&NID69>L!4;O40d{zDG<9eXiXQVi3t6@(LyG&O+Y*UR}zGdS`xZ@h&eNY>!xkCUF_Ib-(Z(b$D9@+ z(RG68j6d||`D%q7yfk30b?TMDcrm&N-Wx>g?F1H{1UDF+TKp?hKelI+HT(<{3;q;u!b{vr}fJ$XVjlc0zVg%pY$mlX#zK*z0 zIZ?6C&WGOjnIKu0F64e`69F6WH66t6b$kCblsbb2kqTS8PKhn=TI=^YXnZi*dW7vI zEG;XvYsE6pk{0f#2ODiRNy3|O?OVtBf8-S<2#e)!F)=9wdW*WETa*zlS$+P;FeO)#i&ZK-a_lgp=TU8x8PbylD#893f8ZrsmlHp)xBLlsB(NqRQT*1zfotRX^jA!nlaElo>OgbK4jucnUuk6E6#0(E1ia zO(7mcj+pOWt2@a1vHTg?8beSQv@^fTjFuqi2~O(nL~s4U^U9&sU#gTbY07Uc>1LR# zVJ(HNC94^--kJFv-PT~hM1n-!<#kQk=4OURR(spOx+#B9!HOvQ;DoZleE5YId|Nbt z-VD;bg3+uAtU|4LMR^7J?Mgp#ym+1oL)W*ARJ6b7W+jA#T0h&)(}8Z;s35_(YmaKw3+wc2pd~=dPs&6X@H6x(lE_Th*5YHk<2#9SA@FBR zhWPLZ!)vXJ-W*{D&_NFQ0)Kgl8%`WCpZ@UO@2>X8n;~StVv6eJcQDu%C6j9teBqUt zPBeqvT-9nbf4(|cXj8sDU#($wDHZ3)Gfh}2JNR5%+N-N<`_(1HjFa@4lw~< z7VAlbfV;2kY2_F=g}y0+m2)D@1Rl^P%)t`Ge>d(2ic;gh^!p(Xy-_%fBT0EMKBjx2 zdJ=uXi?{Qc%F+JPthqvITpL!(t?$CLp^=TblJ@f>8UmA_F1MK&Hc;E)EE^?kOECYl zZ;va5?H~&**D+L?`zN>mCw%98%)I&H(EexU-4y}r`X#{S{-0&JXOxPt1l39j_+RAO zQ*u~c4lzXW?>+yiPs!rI;)`^g=D+&$cO3|AFvf@X+xIwsD}mQlUGbKs!`=S9W(WBM z_l@z>=t1aMyq*&I>Ns&Pq716|0?b6I*dcV5BZd@tojRF&xXZ@^AK_ua$Kc}0c$|jK6?H1hOOF@o zNSg**>Hl6Ed)8xgz~Xh|8|QL&^#Z3C*@j3W8hY%oH+Lki0|K~{X8wRij2uHX@bz*W}|5#yL?i1noT=qZ*5{4%3EW`OUmJe}XNpF3oY zbafykua$D6n~<-;I{8x$AAK*8Tm3Ejtw@+67Gx+m%k1PT%KnRx9$05fSNkBI$LM0a0zVdey2J69C>b{03K+ zkj!!|yRGz~8X5qpO)OI#o*(gffk16&RWK4S30(d`9&pFxJb9{|E~}D;(4lTl2vU#+ zpDA~;N_JB)w*okZ#cG0w81wcm@Lxe&#^@e2vPM)hhPK6^_d>P$6|gkXd@sBex`bi< zF>n<%a~=|d+f*8kYvUQQ3ySM3TKJx<*C)gj720A!JAXeFU?-Z^sY+4u3!>x0l+iCt zm7?@IvHe`w2{~y@(%O+ur$$cZgB=Q;-r{^_$!e#sOYC!67=lQzxagK-Be24ZRv49n zhh{k(9R(elgJQ%zIQZlBd{|yZD{#tCnygV%hWOnyetT=4h;nEda2x)demqBjffVz| z^6AhdUg_q8lH=%|!;AbjXjBLCehNo!iO@n_=L^yZcZ-%ICUE#woQH!^}qQ zTmwjDMAe$5?^X?{(az8qxz502?UgXD=dO2WK|4cnnd`V736M_POEKf%-);~Q8JXC=$nANEU83FI@tig*s&_cCF#>x% zY&jtQ>NpMw1oacsUy`^ z8ck8%Vr^M~@M~fjIdxGz@}ef;jc;al$C1K7)g~LSaI3+kdgXlhb5hC8>cm2rr?MK7 zegBV(GK8n$VUBbvO&|#}$u_~;`q^T%3}?P|TqPHhVZ_=)RXdoD+0xHD=0X**=P&yO z|JPZxl5XtCQvjT_18U@A|D<4qjx0xg|K~3bRXuFJ0nyJ03TK+ zId8}rdpV!y9P1dk;L|gC4Nu3q+yYv|<+xAGpAN|vVGK?rjrH7pv;5>{Y4#>mbSwS& z5=Sd@`hAvH{(y6<5zPkVKr6+x1f(UK_GQL|IAv}~qkhDeomw)Tt&*evAxGdgk}VIo zr}t{8%RXxB`ps0Gl$?53?ZE*}Rd^aN-3`~aP~1|pzNF{2e4PqAS*!w_f3!UH%lY4) z@uU1&0?PwUMx$*4^2rGWtc~$mWds_iY(ld)iDE&nlKiXl*|i~fMspXGX*QazCN?zf z?4ZX{8U|!A8gJ0wHxo*)^I@cWAC%dAKCrCZPECs8rGk;_6dT;agZ-M+%M5A{4gw3G zCz}mtw~5f%aM@E;@UPMZjHJ_=uK2z6&ps_dxv3rcDzs?V+!AA{(GtFCz-yD;_ia3H zdb&7zz|W9qohGC5d%9&XVPDIFAYP&2b1nY|NiwF#Q3LXdL%*YpXEH$$w1?O?J<_RR zTBOehZY*czQrbIm(lUz@^aa<`CDkqMD_n0K={!sWp}9Tq7=coY+|Ujb>B@jsg)9A*lFl9BuwAfE7=lJ7k*b zoil)NLdMCS!&6m1{6eHeosrzG3~v$o`_*gqAd2~|X9^iK&JQ1LDtu@xZAQQuc|MGb zmYRJ_!$j~_6tQhNNt4@|iGSIVt0=as-{rIYoSFnLffUYee%XVE9y|kiVim&Sd>L6gb_-CaAWvxM4~V}#VTl0KU279>{l&}`%WuD^V`dx z(gfA+j17Khc2@SkIvnW_gr1P!%Z$2HFAL6%>i*OtoD49dYc~GVR zL2?Jp&fl6cLM|-4f3m_^8u^G4t@#AuF+h_nkug)UTHS9S%rgHnho3yM%GNTZXMStD zFN5RmDYn%j*i>=raob*Mfo1v3gP{qIdDNVX9!W*9~;k24L(ajh!CTPjFc zcivNPGGjyhzl4-HP;KX146@l`UK7rB=1f_RA2oBOcES=71)qCDw{@Zc@P`1-Jf)nH@r-n1;oOfyo6t;WMJAiQxj?kuH`(j|N3rN;D(i1GnYF1LgioVCbnWKjokQdJHn#^)QCQ@Abt|7-tW}D$*Yz`SSG#~JlH9)S z?Aknw(2~c{_4>|?bCSZRx^04=mV_c8#bS~=xrGJR`6xcuT#;`82+aLx6E>f zMcAh-Cx}f}U*t;`ijDxu{f1)Gb9>$II-y{1#j$LDO91rVG>Y-=eH<>c&VV1uBlkDx z0&6Z{EmsgaSq7S;@GCGsFWhsI`&-HgG(tfbIjrKk(TbPz6I=FlGpeNKLN?qojB$9Y zN6V5{`5i*PlXSOTP!at~WX`_U20X8s@JhZ1Y%MS2HRU#l5VJsDQZnnJk$(}TrMn%e zN|8`xo1*G&lzWe3-eMK?2zc53KLxz@{_g-@iHtfmb5EQ{K06(=XNA-URMu7`JryA$ zdtTMKU_Wc~OH-D{kK42EIc@zCGp1|&%J>Q$;um0wpB`A?4|ix%ac!elp51x62y{7z z;tX-ViM&;AjM$X;6Sq;4W3e(Ng;*2By0qr$jep@}r1cu677nG7zha!cn7U~=DAoP+ zzk$HUF6mXLUBiSrx@Yb+p60%&(67jJw+y_`tB8KDrAwkrENqqXD$Tg!6AFC&j7kEM z-SAEYmYvS@#jgWQnFNfeqZ;?1dtk`8T~`Anhe@?g;hhe@PujA_rV!7-z4%G}Fsve% z0M2o(4?WgzRAUa49@t2eKkP_0&>N_Q5rtCpJX)1P!Tttyh^{{pL$R}u@udt% zK$qtYHnw_GSq(-JG>6UL4Exsfx+D#wbbfHVED6b3pn?5rS%u)Nb)Y_L!e#8EMnz(! zLZ*me{gOrK-urBH`o+dl>On4lnww&eA5C1Lx>Uc+zP1d_7Hw=#bgf_u@Y8c5Xq!WU z8RnoxqsFnC66);GiP`gXa2w=Y&UA{u*gy%uWWW;4FU;Zv+KLOcdzGr$r#G59SbC-F zR$h?ciVlp)QEDtB7&mg|9^Tf_%m3K#T|v#YeCrB1Hz#ccT>2`tw7B_MPIT~m&=^Nb zT2O7VJYRYF*1TW=&or{05nHz#DQP`LAM>?H^=57><7=FK%Gc*Q?e8@vDytuhf@FCJ z+_nq0Bm(RT*@UO5)8+4&@=AYr{q=`DL4kV@>8f#UTh9G_bz^861?smZ@mxh^8mgr4 zoE;zDO+0bA_o>9M=0!%l{*8J8mS&&v@p020l#jsGNF!np{7K( z^>n{J?y+(efb}c;rBO_pF{uD(uE&qc<5>Seg=rsFfO|`Q`UjN+e;cX8Q7y?#`&82~ z@QX%LP7(^i<>Am%Wjek6#>Bn@cwXZGl`Tns zxU(yKb_a~7p-AMq@7Y_nuhDQol&N@YTWByFU=eU0apAzDf>cLprptsW5827`GhYsftNBb5Kr%mqc4I9#5Nho2cvEJ|n9n^r%NWHtM{y8wQ{R ziIq`05IxXemdP|6&L5ufGaQXQZ;drtS7vN0bf`H#zDRE5Ag(s{u-{y zkC-l5>@57wXikNt!cO&Em0ID`h0@E)z*-%iIPnSPZTSOwvl~#hw|~vj{Te{EPUiEH ztGFGX<7uM))D2VGU5`jwAN8&tz3Hm#{{|6j$#zR?y-u2y<6t=aACTB#&AnT@`}u-u zvBaB2ddOAJ$sq9Es{9^mfmsF0F()2}!Mmzbk0c;F4ydlRwqhjbI>YyNOd?I-eTUh2 z+^8U6@-j@NHX~gQ<(yGY(SYR8ESNWob?2BLET0{_eF$$N zC9X6Kxfbo(f%myOPT2`rC#yfQ#inFrAtbW$~DYp zb_03yF)_aU_11dDwOt|~5AVDK!ld;g@_f-z-^qc);pE}oZ{zi@ppdlLVKijZ2ICvh z)AVHX9fp_W;{6!OLv9T7%-?R%WwMeq^e#(NjhzR0SYUjB@%fX{u>3_+1H0U!A2~QG z?oDOw=r^8$t-Al80ccnIuz;Tm&}9-*)Y78Kbw%xHwZ!4`qF0!8h|GRAHBS(E0*A+)(q59k z%iwMedUt`IJJ!KHmChgGw1Ng;B4g^~N~{(l0Ny2uvMKK^y-;P|MquNwtl7Gjdg0G4 z275J?LS{V+$5Wxo^B~kKQnkkWCw!*ravKaJQMURJtPe4D(z19F0=? ze*&o`Z&CVvk{Q<1O?`41xchQM6;EDvPp;swmLW^<=3(7|LP%?QxJrp=-_RW&R#3C= z|9s{3y)jFrSij!+cy>Zs!B|1;NzfX0Q#;`PhYQ=boPXw7g6(pLIMUS#4I&!uvr?N) z6PSv1BH*WmUuUgWFK~maS`g*!7V1i=&E&Yt(m7VM!ip&`s7imd!bo zI8!a^5r-*-{7tusTWvXkAd*I|3*EULBbIog89CXlFTHKYm<>^BmA5w4%c7N!rbd*E z6*2m?$eYI$@lFtc?n?fMp;(bsdxd>IcZQc^YT)GPEDW-r{LV~Ju;Oh@j5jeOyck)vBZl6N7#sUts-`G zvcHR&RkIA9Ij+j130FvcZ*j{b-rVU0Sgj(fKH!0Z6+u9b1WKb0eNaD3aK6RmPakBY zD?DjHb}swEM=IEIVWA&$;IQE@0S}4^AX)^Ns8oXFQ}i(`lqA`*C}gY{bC(T#R(N!;js@P> zBzG`kE-#W#AtuVn$(s`eRMTQS%JxZ6BI>7*tQ4LVFQb-K_u7j?f16C58Z}+!{x^Z5 z+gRE#@c}`#PP_IToODp zS$j(U+!J0eef7|^e|&H-;r(PyPpd_;)=5Po1HtoTYmJWq<$`a`(*xRQ|4e@ks>9}W zzDPwU;T9ul>5Qqf85ZB3luA;?dd3{|u1DfRQZ69qo1_@!#!u9QwI1`ABK;AKKlLJ|D4JR+a6EE7PlkvcJHOWJjo3_ z6XZvX4>s)#_HR7~xfp-`&GQ*>G(-AE7``{r;I zj~&ZZ`#0oJq>)}kc}da{AxW|QWo*V292I~Q3_IPMc=#YX!}pT@7KWBKS$0LKd#LuI z>H}(;FMJ-)7lT%xCS`W*0T`R^9`T~}b0QnONO@Kw8_gKda_g4%RQSmt2L02M7bR?e z@XH6LSl9s0=Bn|m$-TZSe}jn$f|gsi`;Xt9@2WGMJT!tKi~AQNO{iv*95!U*m^Xwj z=V~}3IO-N7LYpwa{}C(I4k8ah8{|+HVxB3}c>8!VV3G`kHD{p@EJ_vpK0zPf=DLsW zvw!F^RfVy)jhl!oe;2{X5>R2)y2)4L=D*e0th|dUOJHcZ{Up{_r`Ez&l2_o*B1;}S zy{O2-{wRAaaS&_-+c3^ON$j@bE1Dfv(BBU91OJ@l z6CKYF^X%JBZb|y0I(6mo!|E1h-QqdFmAGEMmg-DMio8P}UMGF$M&;$foxNBu? zC4CajMTPaE>MOk#F)1@@c#&fs&=G??YG?gRhmw6#W7vs8lD`hLLxe27JQiXQKe5V~ zTz>$T_xx#IqE;JuapY2gOP9&R%glJk*#eN`j4CV@YJ%uH ze;QVw?jn$Oe>&(|vHq^oUY)kE3z#FJl!eXbGIp8HDzxoCJ88H!cjltnxMu@8ILXL& z8!nyfP<;M~4woz*a?Q6w%@%TgDa+>j!1n+tPrS{ff-S#{jl#E0WgH zqH8D$so1q+Q#%ovXgDZ?g3m43;3nT6g}fJ$OX$Nh-vD4Xjk|k{Q%@dMj++CaqbcB( zrsZ~_RD$aUwC&d6`rp8Oewk>cKHt_v*l~&`Q+mOH7at*f$BXN&Ru;3-8pMX{;$m9J z@g|_aJ9xXs?_R_ixJE*!-PCm9duQGnusK9k<2Z}Zx&&e77wp7k!`tgQKor}RH#N>#2dNp0 z;UlbY=5t=>4`l;c2Ju?>%?Zraj%WCgwIdQIj1O*hco4sdZQjLTx2Le)jTN2B-yH{J z28ePg^j4$7h6ZI+aljp%Ll*N@bcSaW+I6k?njR z08iZ&w*I40A^2+B@jVRfGwwU&a$obb9$vrBN4e|x5P|OH*0Ka}h-`bfvYl_cbezPX z)apXxwDPTQ6EaD~QbD?_0K%J~2wqGgPd3<)f@f2NvzU37&H_1Yx6(&>*+Dh7dad2Z zt2KQRY+CWPZC;l~W7JkleIsP1lLQnt{JaEk=RKG#6ecvf9gq_49<}`aeNtkTuAVb+h)M6k4KblbSuMN<@5t%a z-8Oy;&A(=GVZ}Q}5d6c#*&t;FG^Ev;I;gZTcjQ77K)0i0nT?`!C|Lx`DPWLvH-xlw z9!Y7vZ>;lZmzMbKD*b}N%n9gxXPoBgtqA?){3)sjqE#=m5yIOMfMv(t>~ ze5XsqCs@luLw#cGU5HtaNQ9kn)8OIMbn&I)mETSw*OdX(NtXti(|-FTrgU!c$wMIx zC6>&yuVJ{}DDASydy^7a!y4F<5?kO}(zVC7`2C^h#D#+2 z;nyuqzaQ2j({DIyY85K_4$Ja+Jely(BB!87&yHI7ww~L+{*LC?Gl9KnlAniqQIQNq zYlk{$D~hdon#2CZEBSeER+}%+<1&(;H)Fs-QFTy&%8z}3M(Q=Z#eq+!$UDBCg5qF@11da7p4e_^x=$po%K6Rr!%X< zte^T8D7FjExH$c8>mhd22;S zTxDflHjq8w)-q;vX=~KkiZ(VfHeq*yBb0`n67K8)EGEd| z{Y1CGy4GUZ$4U#KUTfiE1q|rxcb&dLcbPVFTA|?=lanm0)S)p&3 zL3ffIG;fj0Zz#ih2fngYF)oKny*Y&Y)tY)E4Bq=1TF#N9SA@ULb+{%K^6cVZ8U6CW zn}9)p$p8$cIw2y)*Fl%=5wItn!^9NvWYKTZ{~W3pr_uN+(6`bVBEUlSh8s1&cD4A6 zE&jsu%HtO3&V8E|BD-qB3~BbF+d)+{z~Ng4wx)k%bBjiVlDx}+x@6dXqJ-QxaDTfMREcX<9f8%pXLXO)1p z#;{z~+Hv)LJ?6`9qf=|*FS)G`Pdk}6r|lu-=GR}+ZWuo=fhtjH%aGm0GH5i*Qp!9G z-0mi$dqaJ1o)hYCecE+ItV_df1Y=;X&ClJ1sj@+0b7BG4T>!jMeVJMDThf!@94r zX;&rhjGs5eA`j(t?d^=4lz};0V#l*I0sMO^IoGBWbM|7wJJ_4n85Cnc_6G>NbskM6;WbnEoI zlNj-(uTil=se}`jYx_r-ol2(DY|w0QSJ}H)`UIwz6pbBWhrkbJeVKH#`4~#fyS!ja zHi=|Y2$j-E@A(Ex?5jkzleh%dgQG2LJz;I=P*0CKCUE?l2M7)i3;!;R0{YeKVe`__ z-ARL^U-8wF{^*rEm8xY&z>d@p4UP=o8`WZ99AVs2tV0d?$HMOFS_3Rwtd5k>LJ07Ve z8tCeF?!$*Rn;v#xI9uxIf}6(eKIb#h(S-+@FA?u{_Hv36Nr96T;1N%}h#f_>fbwjv zN{+jIp1zr4nI0~Z%=5zC^$Fk3uyucWX9`91i}-_gL(?VCPuDz`-Pdz$dVguB?U0^b z9u&WHoUN2x&s*Y_CS&a)$Pz$W3o03AJ!!`Nu&4`O{gS!3@wLCRubc+?$IQ%Nru>I( z{ZS}^UagmdxG3F>pSWAQrL!JVyH5Xb@v5ut;zT2V^RTGfy zTqvCRo3ziTVwf`#VY_HWDr~nI3J?gSiISw0B{Q89QvTw3Z^3wJo|r>w&s^;kW=~F%W_ouTf4Xz0`|%PU>fSN~P*t3Ba$~pbLx>tRpKV|`ye`%No?7^@Tgj{~Z~g;z zny-;EFW(th3(d}>mVd}FwY4v*`;&ll*-A>r)}}w|TuWd|BjySY)L~dgUaG}YYEmBE zWu|C_;yBb7Ptk=SK+82{Vkc;mM56N@_$@Qri&uC--cjNzEM(djDhtw8(b%Dcq{pwH z4*)7A?@i$#CZ#WfLN9Ghrg)~Xa$=*0-^n^+!Cn)N8`HrXF>icCX9e>yFmC6xp==1e zWXytMtfxuJn_P4vC_VhKrxIraPPCs&_jTYaD!$L_DGG@2ATMb@{;U4+XGiZ&X{x$m zj>Ex3u&nGMGozg#@gQ*ABReFc)&AM}D9A+wv0OtG!y}8+tK^1a;Jb+7iv!g&k9pn2 zp=oa~FK?4RoEWhyRrYs*@ccp6hAXiw@>Dcz^GW&u)s9r!TsJs~Z-MW;&7|+MEPSVz z;XLre`BRTWXGpqo-2ZpXEp-^&nbd(laG3X zuUpa#z_tby$+#1Wub62z<~!wgv>TmRQO9>J)Vmag5msNyQ2_E5+U`unD^EHfusHf% zKlzT4zmXaV7?pryAc&119wr2L=ujtjzZbRIs(`&u1jq_#G|cD^tZ}8^&V7PND+-57 z3peU9IB3}0gLd09TJ)xpN}rw6rfY!D(bYY~@?JC4uU|Nm%;Jk(kvH>}nGpNn`ges! z{8)~LW$+2&^@n3zeeChKV}L68z{sJR&FS~UvUpDqS?>WH?s*FRNG~Sb&*?g8ntFHm4TL-BLaR~&4?bscu=vY~Ya5qK*^2@uo^;h||($C%UCt?Yu%rAVE0pb>Q38d)o ze$vYkhCMXo``xf9^iQg@UxMg!A{cK}?xi!(EF!ov8MzJbJC8y{O6@Sm_fy%yer)a6 zz7nAaq4da{HWU`0$_yD_Y|Or968ou8r%pd9`RMC~O}R1H>_$>~JUL+J}tf z&N#M$p-lXa2ER7gm&D;v)$0QFne(NdKfEdXmZc;7Tf^om14j{(8WttmIDT(vu?)=H z7ak}%8CiN;Y}ciV-b~cvGejUJDH`{jISjeOyXZ+6k47)x(}jhoXFmxZ8miq+`YX$H zeC_z&HWmP zhZXyA=>D|^L|T=4`0)-D-}(m6*Tj;5cg7UC;x=nrG(j!?z2}a~z0d9(M&X)TTaA~B zukc*p7#Nd;k~HUN%U}E!_3tNqf~bH!FaobS710^57DPeg24@L&5>?2gD`1OhhDXMf zHUV91%|YZ-nngjMc|D%MS;Dpe6+4BTZw#NlPFo%%BU1LY33s zo{T*(F!CcfhDkgn8ZMbmMZ6%9uQ+p~LrJ{RQb0pdM7O&MX|Y+TtRsO-z*^7k-nQP^ zwQ(kS%kQ_qMI})pcOCRW)&8O?Mr0zSyo}yVY4n=LWAX=VSJ77k0|R~ahf81DQyF3@M~U*D71Lz#!kPjPoy#yGgHV zJpR@|l&)%`Q(Dq6+}z=Q58;ZmEwEi6`-cmQL$yFGJ93gc&Cgebf2`kcfdwKVh=mcT zvkPN?R-W`b2E2Z`P_*{4k&yw5ASaq88-kPp5X2y`yv?i$eOq82P^3-tpTaJcSi6A4 zv*^WebS9&ler;E?EdQCHcKg-*yak{Bolh@o@w8#a_rCwsHCpP^qo{CiARHe>Iq^da z@RjilSTx(S(s}-yNs(h%8oW`(To@3jFp|pMH4vwgCwt z9BKG)F%5 zu&{{r)Czmn;<|tB${%6OwTrqpM3V_fUubT_q0=*g8JzCSaN7iz9aj^pB2lB1W%}K| zR}QcES)LsqY&!K8x6~=)tdNj}8{`9KYXV z)WQeK&BgyPk%iklB( zgfo!`2of%zwVf|y&8nrzm7?qflu~{$4E^_2k0&~i2^A)rUd=6`|1uH9lyFE%NeH9E ze~;N;n}k~}3H!cbuC$HjKUW63D2cKw+?yUn_aH@9UP;usTf^*zoLx*47=`nhZDE5}8~m6*gW4=xTMy5-%dRWUSw$K~f6`Zm+#4AF_AT?P zQF=dKXW)!*6xV%3s=K1MHGrUuh27f~Zx3M__N5Ni#wEe}oXSLk^YU<{l(P{%FqIE) zt%UrwAHEMU1WORA7gF}UKPsv!@xv6lZSanX7L0(Xg;LOyFR2{&a?pGa08rmOZ1Ruj z)gZ12yP@|ty-ydN`>M6JrXqL}mq##Bpq8+K+|>UKUN3+@=pLEH_$1o$bZPh%qb@gE z-oK36EPal~_Rq$+tNBe62siV_2RF^?1ol5PmpES0EzuPvo zV=@W3v|28}BtqD;@O8s_C!g{_F~M4lxN!OM35_Lz#^zK26orrT>P|%ZSCf@ZQ&Tg= zor3`@UCf|*(OVq1m96t4$I}!hYa=Kdv^x`XHg|l$hZQOf-_7T z>3laZI4a$5SoL-FGx&5Rz{3RitHI<-Yuq{Urh9tYHbGn z>C6{D8KtqGkycL?ghC+y*puAA(PuCVlvY@t{>p0Arn;s2FiOy8BX=P=;fNFMsI;r3G6khf#>i={3O^+pf89Z;Fmevy&%^Q02zg1U|`cV=k?4Ly?ds>5R2n~x|uke(4jZ$KH#k0)~cBP z*F6hFuXm9DWmt_F;L+*r3aA+Ngn=nQ8y!MNm#V_bmviK}^>_Q^XV#n#bKl{zxvawp zjWT*>6baOims>Qq65tAif1%q?$&MWEhXFs8&9!^snPFNu9z@f3O^evj4WByTs>^zq zze+i~Jo^c&4TXA*#{^n(Y2{_y9LP`-_nue(2=bS@h5Yb^BV>@*>u)#ualB*m=!wki%AL}k#+uw9r zE3J*Pkl>1S7D;8D*!BhQ&l5qx`rg27!YY%qHmfFAX7t)Vw@{<|z9u3(tERqSmla$g z8{#axCv5L2{txvOfCbG%^T#j}C{b-TF#@b-2S?5X|50qS4Am!l+XJ!@()SNBWM&%& zvwH7lBjGj3u>3SewBArJjE@5he0zPm);_|$*HEFjZ2~e>K?7aS2IMCw0Tsvdqr&{M z*A?v_4$fV|;g^x3#Z<~L_O(ZPT|T!H!z}1`Fjh~4Z?Dvm2GlWR@>gro zMhO7+9;qUG+Ct&n@^@w2z<4SHnQSkKbOraw7%4p!og{yBNTWa0&lW5|hejDl&KI^y~Zk^tp>s z&Vd~WvozI@w}R61H97{oF3UgJX?3X&1ge&;(mraV{t1A(B!V$oNF0}F4;_l5XDjNB~H zXFEUn0nQhyQw;{J*Ohdqbeyl+CQ$Ai)q&f-hWq5i6f=*vQXI;+mS7c*|@jU_3(=4!rKpIS})_r!=RCCKvms9(k z>mxry);~x=p$Yc~1EJH31$)SV>h!@m*(6I&SHThKt7wAdVv@PR)G79_)G?^^g-rV?K^svR`EDh&t7@26{ZJmrUQ9EyR*?%|FX1TkqFt-*HNqaj+ z)_EDg4<5PT^aX)T>2u2ous&Ly6KwvlT*0mF0ld|}plai|T`@6t?yo?+5I6GqOvg$T zVYIk@?+WoaMBHe)09Txi@DjQ66G=xKh;1gbSlX^OuU68| zZ5s7Ap>=0jJ0|#yF@EwXA)``yZTEH)%i{wD%)4*%Z7RiWg6&D1YF#wayRjPBky*i zQr&s1t=IR}3$B0nf=b>l+H39b^Yd*E^H_)@m4T>Im`&*o)-bU<^_%jCd9M2I!t;Js z9YeX{Oq~6FTk%r8mG`z-rRNcqvZCL`5FagaAv@cn&6_r_-%@yb-Z(e4HX8$fahtQK z2V26DKjXabLw|j%4?*#KTjQJDMEs7gJ?biSjpkk2751M|QE`-y#ipx_$S;@t^9=-} z+Zg{>dv6_;Rrj?E({c29g2m&g+`j+&4Y*s>F|t+oZ7fnbV`g7m(RR*8L%@JtjO2 z#9za)EUP`FmcZUrc7dVEuZEmZ(WYDr7x5Jh&3tjmg(E*nTlt~GklR1dqu7u^WZ#x` zRk~X-F(zi&hcE8I;bG6?C(0#6u~*RfOvwYgC}haSjjpv_Bi?Yszm*|(6kg@@dX0v0 zab?3W{)G$@4?b%?7ZDxFLe{OhuC$4YWvtL26iQ9@l5CbDS)J1Zf#B$W4U}#e!reXa?(LxoJ$~uC`cj+E&O&xUKh+vWjG4R^GKtu?kMvwc%NM_S zY_Twz_l17FqohN_Tolh(F4T|tO=TN@k_3!uz5q8_d^J%Q+au%o#)wjZ_f(*GZ_0E2h!!C!QqrDJ(L!$VX+Kt05hwU+3eKwY} zeWoo=BlyPG=c1Wwlp!hF!f%$3!Y=H$!_!cH@&oO=#CY?!1)iuUNleVtc`6q4D?Q_ZjdwXhP+PX|=@xWyzV8I4SPx@v3OA%^KGB2Z6`cNDJ9<~s$? z%pIG}U%YvjoZE$jolx`c_h3tzeJTUn-!-BrlcwC@U8rbin2qySUC>0}JN&Tjy8(V) z;3fSVb{LxgEAhyuvlvb9QQx3c>j(F4?M^@yWf!%cG~zBBX6MFvI;!yNI98*KmN)4b zP5=)r2qY0iq6BA08M@Kn=&KL)6O^7R6lZ8F>M&;@S_xTGXQjKHwB9|_@(N7Q=_?W=-37e8rG>?AU69ib zch)$ZlT$@FH|J$-pYA|E-ea0+tkTLSzHaNR<5de{>Zp1z=CAe?lN~l}uz0_4zPEI5 zG5~c{pVOASFu72el$}#CyL-SYnaZ$3)Wc=7C3U&uVMClc^olUCsgADfVA$&AzgdETab$=H z@X2GtRGNM68%kS+uQXl_$Jg-NKc2BV00o?7sJz2dSWkncTlfINDLZcxS=ACCCMnI4 zB}VKtm3;t0(l5UQWVWtt>SApyW|kudTU}s~izeUSY&M%x!PBV%EE@mXanoUBi9Iw8 zR-zRnBchv0Dbpk#OPbflSNhCbX3Qnnl$qS=$A_mVKyPqJD*tYy12`)?2S8-kE@?^S zdCM1npnw#GJ&;}~b<1ANn~0rTrrxEc{#|yev%1>M~+!mI9^E2_LWBLNud@y zn!{Coro|bw@z{z&j7^MQi&y`oX<#0|DZ$VugBj&+^ugzgFm&vAM1LW(e^cVmiA|Be9sVIgA}kuh0qTE!z)>lT}OWHIDcHnZr2KH{v`GObgY z9QmxnTxm)_D8dG5Bck?RC(ykNsAYjO$+n@mOQMv#P~OP=|hRKK>RvpXi!+2a$em_ z5UW4jf>Gu+axdo9OGp>sDk77Dw0e@ShZai&UxWS&V=j&XV5538yT?=TI5u7sfIbuC zJ^%mY3kw`E|6z83f-DqWm}2!Q8AmVql)26UXAWg=jM#YfN_wKi8GpAdnRjvn(|p7C1mRgJ}%WAJ!);| z}u01JU3)0Ugb=4M8eZ3##Lk<@TAsQ3&g*r(~OKujcH`%G5 zsa+>dwp^BMP|SY&|L+@)9psEmxL$mnDrmZD9)xvvAtm$O>$P%}Fdp(olt zcEK)$=~&R%5h6xsfYKTMTS!KH@kP$rR*|&E;p;s4z+`+jDM* zne{Jk2{z{(=}e)VXezt7St}Su%5-{&lGz=>=;P?pEi+#-Sv9cVsWN;_aMLTv&Zv)j zlg(ZA9DhvxU?*5urmKYGhSM2biu)R|I57QaIKVZ*1}wbz;pwQ@D8MZ<~Q={ zS-34;Akk0py+6?aem%jOv?5xGm*?2vx;L*g_&c%(g`@YIhme5wa({;m`a>_4&dUA- z$vjv=Gwfl9T}&T38tz&w2*L$|nVANDxQs#w^$>wz20P9W=^OkJtM1`7*Wz|z4;fM) z)R&Dn;(vp@ka#MUSHDpn%=>Pdy?k$37=MdY!?C|m-V<11GJY(} zs!g2g9f#i#Z-X42Yx6IR$M%TvgzNx}SGhR@f9q+6#c_M09}Gx_>U=#2KG zH;c>JTS4p>H*6hvKc7tbC7^h~6wlr4Z!rlmr+N*!EsW(N7O$-l+HSdZONI#<_G3Bp z7udW(Nv@Wg2D#L+UF;giNbNppwBvUFi6GmxeIg9#tVH_@-geAWg*2ER;a1-yf zQ6VGyZTnoQ97}SiLwxl;Km(}4U!d@b_Nh*^YvK8D6Tc*Yf5%lKa@f6evJ`l09!U4Nz zNLOSWiK1VV2*L-**_p&I>-V#qe079;=-z)_Oe}|4cJg*HN4+JXD&gfUsK(fK{Am$c3K8i}S-_W#bXm!IIkr0I z3rz0SH3MQER-CT67SsbTE1AobCU@xyL4sw&SA?KrO@7VmZy!S8I~fzjM!NuQXeJ1f zU;}yT#O)4E-u!RScC|2hQ?tzA#p&Z0{oGo z(OOVTE*ioP%aSbTi9fFizgG5;W|4pMdnk;nwhzv~L%UD#Gt5KYo~-&h2q_-bL;782 zsfAAVcI)Y#kk{L%Yz+4`9deMMjAehS)z;8Ro^d+^84Lh$-sF^%VY-$GYl5`(jE-DF#3FMc-B zT1vo}DCmli^w(&+c5-S@DhXrmHR~f_!jsPy#1+bTLTwf_*Y%&GrstB|uPal$!bK?G z^-G5F%`EO&ZBKZ#NwSrp>WF3MdhB{id^i0Gr#{rjXPJ%ZPWvgd0tZ)PX0)2?=85T) z=D>EM^9kIkxrX1a-?x`vER4E?oKN?DsEk|Se!P%Kj0qyERe+7*(c)aIcL8P_)!??Lrm03_p5-e-WPQs3lxFX{GGxAj8ZGb z(x2nE#+<*f-p^e7-(=1PUaw;{i=Dx`GY5N~OwO$tk%{Ze!AOl6HWmGjE@C>s_Nx7*EFe-l2*9-Le z-%|OnLW3M|lhwcL_p38uA4ccrS*=Sm;d85TlGXihWmWnHP6jDsSWKP_>gu!m8&?~q zvmze68!J10WEh@4ySfRHhhH|}x%nmLIVKNVq+S9)cHhHyrNn+OsiMY^lYMD$PB}{sO7!D%R}Fu1X=toYcUk0hp=?jCyiT?NDk~;(zoQz^zR&YrTrve=t7UuwUDRyRm8aak$=5-gavx{e zR=F@g*1ZY^TZdICzq-wDrZA z1B<)UxSJV(K2@z&%o`1X620D%lW|H-jiIo`Rw1UDgB*axShE%%E)NC`2T$FDt#wXZ zL&XP(t5SD%^WRe)JQsx4zb}kuG=S=Sq4i(#=t!$WYkla9dj+Vj0Qg4|WJ~b4V2>t= ze+86PUy&MS{3m4Q1;ht{AnVIlkH-%_&Qbdlp!bN1j}Lle86Kx7`dnN3Uajyzr#u2l zZxH}}OX%v7K7Q~K=#z_h^BIUx&ItdR*26obPpwn_^A?jiysEYkpHPeQldPUi&TykJ z*-i{+B>W5qJ3Mkg(>%a`U0WkWSu$+ee4Mry853hS@{@>}!LkGOgAr`V^xGw<#};C*(b1^vD+7bw_qw4i=dH0afI3mw zuBF)}x05yrdGfcI#pf5qqhbbZ$W-5S^c{MDud1SBbmnQ7G=7;U&)zf2T6=XMGd(X63lG_nS?yEKSVAvC|{%0d84SICX z;*4F{`%O|?XFxcFWNQa`23mSySn`QyZtWF+$OP{JxXRrhu!z3)jLp44uJy+RLH>L+ z-;2Az*w`C#FS)PJ(jxfvhHmYkLX)cfLz3Qny&Z>+I*b>0z-E$J)8O`kgY`nG=m{M4 z>Th|y=EuCwc!=MBdnjDFGcL6=50uV+$bA}R@rozi@6uWLN}@4W#&5ru39~V0XXj&8 zlcr$2>+jGFg7z@PF=bj zZ1Y^qy0h|WS7H9jm&$ZVX|8R;SXeBcrV40fP-tw;K(1a&$gvDaO1kFK=}s^I<<;U0 z{l|G|+d=HOZN!R3{O*}LBO{wOLI0^>(dRA8bEUv@%uHOB4qNUBv=Nau-ykBSjmWZT zj(E%T`fmrR=WJb}^)-WC@51& zmbIdHc(Vtni2k%KWc}kcbKinr+r>OBlK}vYkV2H<=UD;iyi3{ZqinS9cM^*2I)8T+ zeIKGMKnf)2I=juE7x*o^kjZF0<>Ce89vUCt3dx(dQPGtwyf{@vSC;P>KOFPQa2k3x zdVHE2_^YT{{BkR)>Vi}U`k%dV0!V~OOAHKJ2Q*m0MYKbtM-K}^Itqr*^}1l3$8$*x z-7S!mSZCulo75)qlOn*g3K}f6tdk^l4V8Fak*Rm?4itO z0arE&g+unxXaw6oq|VJnU?4zaY%|n=)Nf{&gAcRmD~Cv`!E3*Enl_@#iq@9`q?T!a zNi9n$z|%N%DTi@3dLnoB+k?Z$$k=V90yOJYbPhW(`we&MgBRnFbY)Y)Mhkbs8`Wng z#qJapuj-DwxfnGS*0eY<{8g4X^h9q-oe#7i1=1<%Ck0%s2a|O3o_h)^U}jyt6PqX! zrl@c*CEor9xzxE-a516L{yDWMy6cu55*IN_O*|I8dtjjJAWCL^vbiiPWL(_kicXPH zqBHmpr|Hv+@w+u7$bnKIxXu}=ERGl;LYSeGKsjK z4$PFYO%u5aIJP~XiXnwIDh``o_=LXk?x)tsJ|Oa@rnu`5p$lk5S5~&&l6U^NjyTA7 zlzkIRtte+;b6$|07!vcBFQi%k(y%iY+~*Yyq{JGW#2Y03b(?Z~C{Fy%!P#{oEmZY= z>qo6N$QkpwmoMWmgF=IOo4yg|VmzaEB~STyc+g%1{U41OY(C>OTZF6!Eoe^Qh|&G& zvXG%2l>baSA3b+<1rby(Z)@wQxTV@#MUw!;SPOXT^MdN>@T#0YPWs*(N@qcA=M~C& z?x=Ipu=5R!lA2K)8R(Tu`o!HFag~=_Qj03QMN3O!hHB>iko|#*5ZNh4>Lnv1tXT70 z2(d6-o?v)`Jwof3hWgf7Bn)2Kw3kDVSp{zaaOF|cJ_XyRnm+P}4>LctO%bv#Z4ID}AYz-AVe*&%K+sH8)mo@%0`r>kUy& z-I^+@W~nikH2d^UK-#&>&VQWlfDS&SRuc9>%VJjCt&_nfKn238=@vB+W}>ZrB^